diff --git a/EXTERNAL_HEADERS/AssertMacros.h b/EXTERNAL_HEADERS/AssertMacros.h new file mode 100644 index 000000000..2deea1201 --- /dev/null +++ b/EXTERNAL_HEADERS/AssertMacros.h @@ -0,0 +1,1165 @@ +/* + File: AssertMacros.h + + Contains: This file defines structured error handling and assertion macros for + programming in C. Originally used in QuickDraw GX and later enhanced. + These macros are used throughout Apple's software. + + See "Living In an Exceptional World" by Sean Parent + (develop, The Apple Technical Journal, Issue 11, August/September 1992) + + for the methodology behind these error handling and assertion macros. + + Copyright: � 2002-2007 by Apple Inc., all rights reserved. + + Bugs?: For bug reports, consult the following page on + the World Wide Web: + + http://developer.apple.com/bugreporter/ +*/ +#ifndef __ASSERTMACROS__ +#define __ASSERTMACROS__ + + +/* + * Macro overview: + * + * check(assertion) + * In production builds, pre-processed away + * In debug builds, if assertion evaluates to false, calls DEBUG_ASSERT_MESSAGE + * + * verify(assertion) + * In production builds, evaluates assertion and does nothing + * In debug builds, if assertion evaluates to false, calls DEBUG_ASSERT_MESSAGE + * + * require(assertion, exceptionLabel) + * In production builds, if the assertion expression evaluates to false, goto exceptionLabel + * In debug builds, if the assertion expression evaluates to false, calls DEBUG_ASSERT_MESSAGE + * and jumps to exceptionLabel + * + * In addition the following suffixes are available: + * + * _noerr Adds "!= 0" to assertion. Useful for asserting and OSStatus or OSErr is noErr (zero) + * _action Adds statement to be executued if assertion fails + * _quiet Suppress call to DEBUG_ASSERT_MESSAGE + * _string Allows you to add explanitory message to DEBUG_ASSERT_MESSAGE + * + * For instance, require_noerr_string(resultCode, label, msg) will do nothing if + * resultCode is zero, otherwise it will call DEBUG_ASSERT_MESSAGE with msg + * and jump to label. + * + * Configuration: + * + * By default all macros generate "production code" (i.e non-debug). If + * DEBUG_ASSERT_PRODUCTION_CODE is defined to zero or DEBUG is defined to non-zero + * while this header is included, the macros will generated debug code. + * + * If DEBUG_ASSERT_COMPONENT_NAME_STRING is defined, all debug messages will + * be prefixed with it. + * + * By default, all messages write to stderr. If you would like to write a custom + * error message formater, defined DEBUG_ASSERT_MESSAGE to your function name. + * + */ + + +/* + * Before including this file, #define DEBUG_ASSERT_COMPONENT_NAME_STRING to + * a C-string containing the name of your client. This string will be passed to + * the DEBUG_ASSERT_MESSAGE macro for inclusion in any assertion messages. + * + * If you do not define DEBUG_ASSERT_COMPONENT_NAME_STRING, the default + * DEBUG_ASSERT_COMPONENT_NAME_STRING value, an empty string, will be used by + * the assertion macros. + */ +#ifndef DEBUG_ASSERT_COMPONENT_NAME_STRING + #define DEBUG_ASSERT_COMPONENT_NAME_STRING "" +#endif + + +/* + * To activate the additional assertion code and messages for non-production builds, + * #define DEBUG_ASSERT_PRODUCTION_CODE to zero before including this file. + * + * If you do not define DEBUG_ASSERT_PRODUCTION_CODE, the default value 1 will be used + * (production code = no assertion code and no messages). + */ +#ifndef DEBUG_ASSERT_PRODUCTION_CODE + #define DEBUG_ASSERT_PRODUCTION_CODE !DEBUG +#endif + + +/* + * DEBUG_ASSERT_MESSAGE(component, assertion, label, error, file, line, errorCode) + * + * Summary: + * All assertion messages are routed through this macro. If you wish to use your + * own routine to display assertion messages, you can override DEBUG_ASSERT_MESSAGE + * by #defining DEBUG_ASSERT_MESSAGE before including this file. + * + * Parameters: + * + * componentNameString: + * A pointer to a string constant containing the name of the + * component this code is part of. This must be a string constant + * (and not a string variable or NULL) because the preprocessor + * concatenates it with other string constants. + * + * assertionString: + * A pointer to a string constant containing the assertion. + * This must be a string constant (and not a string variable or + * NULL) because the Preprocessor concatenates it with other + * string constants. + * + * exceptionLabelString: + * A pointer to a string containing the exceptionLabel, or NULL. + * + * errorString: + * A pointer to the error string, or NULL. DEBUG_ASSERT_MESSAGE macros + * must not attempt to concatenate this string with constant + * character strings. + * + * fileName: + * A pointer to the fileName or pathname (generated by the + * preprocessor __FILE__ identifier), or NULL. + * + * lineNumber: + * The line number in the file (generated by the preprocessor + * __LINE__ identifier), or 0 (zero). + * + * errorCode: + * A value associated with the assertion, or 0. + * + * Here is an example of a DEBUG_ASSERT_MESSAGE macro and a routine which displays + * assertion messsages: + * + * #define DEBUG_ASSERT_COMPONENT_NAME_STRING "MyCoolProgram" + * + * #define DEBUG_ASSERT_MESSAGE(componentNameString, assertionString, \ + * exceptionLabelString, errorString, fileName, lineNumber, errorCode) \ + * MyProgramDebugAssert(componentNameString, assertionString, \ + * exceptionLabelString, errorString, fileName, lineNumber, errorCode) + * + * static void + * MyProgramDebugAssert(const char *componentNameString, const char *assertionString, + * const char *exceptionLabelString, const char *errorString, + * const char *fileName, long lineNumber, int errorCode) + * { + * if ( (assertionString != NULL) && (*assertionString != '\0') ) + * fprintf(stderr, "Assertion failed: %s: %s\n", componentNameString, assertionString); + * else + * fprintf(stderr, "Check failed: %s:\n", componentNameString); + * if ( exceptionLabelString != NULL ) + * fprintf(stderr, " %s\n", exceptionLabelString); + * if ( errorString != NULL ) + * fprintf(stderr, " %s\n", errorString); + * if ( fileName != NULL ) + * fprintf(stderr, " file: %s\n", fileName); + * if ( lineNumber != 0 ) + * fprintf(stderr, " line: %ld\n", lineNumber); + * if ( errorCode != 0 ) + * fprintf(stderr, " error: %d\n", errorCode); + * } + * + * If you do not define DEBUG_ASSERT_MESSAGE, a simple printf to stderr will be used. + */ +#ifndef DEBUG_ASSERT_MESSAGE + #ifdef KERNEL + #include + #define DEBUG_ASSERT_MESSAGE(name, assertion, label, message, file, line, value) \ + printf( "AssertMacros: %s, %s file: %s, line: %d\n", assertion, (message!=0) ? message : "", file, line); + #else + #include + #define DEBUG_ASSERT_MESSAGE(name, assertion, label, message, file, line, value) \ + fprintf(stderr, "AssertMacros: %s, %s file: %s, line: %d\n", assertion, (message!=0) ? message : "", file, line); + #endif +#endif + + + + + +/* + * debug_string(message) + * + * Summary: + * Production builds: does nothing and produces no code. + * + * Non-production builds: call DEBUG_ASSERT_MESSAGE. + * + * Parameters: + * + * message: + * The C string to display. + * + */ +#if DEBUG_ASSERT_PRODUCTION_CODE + #define debug_string(message) +#else + #define debug_string(message) \ + do \ + { \ + DEBUG_ASSERT_MESSAGE( \ + DEBUG_ASSERT_COMPONENT_NAME_STRING, \ + "", \ + 0, \ + message, \ + __FILE__, \ + __LINE__, \ + 0); \ + } while ( 0 ) +#endif + + +/* + * check(assertion) + * + * Summary: + * Production builds: does nothing and produces no code. + * + * Non-production builds: if the assertion expression evaluates to false, + * call DEBUG_ASSERT_MESSAGE. + * + * Parameters: + * + * assertion: + * The assertion expression. + */ +#if DEBUG_ASSERT_PRODUCTION_CODE + #define check(assertion) +#else + #define check(assertion) \ + do \ + { \ + if ( __builtin_expect(!(assertion), 0) ) \ + { \ + DEBUG_ASSERT_MESSAGE( \ + DEBUG_ASSERT_COMPONENT_NAME_STRING, \ + #assertion, \ + 0, \ + 0, \ + __FILE__, \ + __LINE__, \ + 0); \ + } \ + } while ( 0 ) +#endif + +#define ncheck(assertion) \ + check(!(assertion)) + + +/* + * check_string(assertion, message) + * + * Summary: + * Production builds: does nothing and produces no code. + * + * Non-production builds: if the assertion expression evaluates to false, + * call DEBUG_ASSERT_MESSAGE. + * + * Parameters: + * + * assertion: + * The assertion expression. + * + * message: + * The C string to display. + */ +#if DEBUG_ASSERT_PRODUCTION_CODE + #define check_string(assertion, message) +#else + #define check_string(assertion, message) \ + do \ + { \ + if ( __builtin_expect(!(assertion), 0) ) \ + { \ + DEBUG_ASSERT_MESSAGE( \ + DEBUG_ASSERT_COMPONENT_NAME_STRING, \ + #assertion, \ + 0, \ + message, \ + __FILE__, \ + __LINE__, \ + 0); \ + } \ + } while ( 0 ) +#endif + +#define ncheck_string(assertion, message) \ + check_string(!(assertion), message) + + +/* + * check_noerr(errorCode) + * + * Summary: + * Production builds: does nothing and produces no code. + * + * Non-production builds: if the errorCode expression does not equal 0 (noErr), + * call DEBUG_ASSERT_MESSAGE. + * + * Parameters: + * + * errorCode: + * The errorCode expression to compare with 0. + */ +#if DEBUG_ASSERT_PRODUCTION_CODE + #define check_noerr(errorCode) +#else + #define check_noerr(errorCode) \ + do \ + { \ + long evalOnceErrorCode = (errorCode); \ + if ( __builtin_expect(0 != evalOnceErrorCode, 0) ) \ + { \ + DEBUG_ASSERT_MESSAGE( \ + DEBUG_ASSERT_COMPONENT_NAME_STRING, \ + #errorCode " == 0 ", \ + 0, \ + 0, \ + __FILE__, \ + __LINE__, \ + evalOnceErrorCode); \ + } \ + } while ( 0 ) +#endif + + +/* + * check_noerr_string(errorCode, message) + * + * Summary: + * Production builds: check_noerr_string() does nothing and produces + * no code. + * + * Non-production builds: if the errorCode expression does not equal 0 (noErr), + * call DEBUG_ASSERT_MESSAGE. + * + * Parameters: + * + * errorCode: + * The errorCode expression to compare to 0. + * + * message: + * The C string to display. + */ +#if DEBUG_ASSERT_PRODUCTION_CODE + #define check_noerr_string(errorCode, message) +#else + #define check_noerr_string(errorCode, message) \ + do \ + { \ + long evalOnceErrorCode = (errorCode); \ + if ( __builtin_expect(0 != evalOnceErrorCode, 0) ) \ + { \ + DEBUG_ASSERT_MESSAGE( \ + DEBUG_ASSERT_COMPONENT_NAME_STRING, \ + #errorCode " == 0 ", \ + 0, \ + message, \ + __FILE__, \ + __LINE__, \ + evalOnceErrorCode); \ + } \ + } while ( 0 ) +#endif + + +/* + * verify(assertion) + * + * Summary: + * Production builds: evaluate the assertion expression, but ignore + * the result. + * + * Non-production builds: if the assertion expression evaluates to false, + * call DEBUG_ASSERT_MESSAGE. + * + * Parameters: + * + * assertion: + * The assertion expression. + */ +#if DEBUG_ASSERT_PRODUCTION_CODE + #define verify(assertion) \ + do \ + { \ + if ( !(assertion) ) \ + { \ + } \ + } while ( 0 ) +#else + #define verify(assertion) \ + do \ + { \ + if ( __builtin_expect(!(assertion), 0) ) \ + { \ + DEBUG_ASSERT_MESSAGE( \ + DEBUG_ASSERT_COMPONENT_NAME_STRING, \ + #assertion, \ + 0, \ + 0, \ + __FILE__, \ + __LINE__, \ + 0); \ + } \ + } while ( 0 ) +#endif + +#define nverify(assertion) \ + verify(!(assertion)) + + +/* + * verify_string(assertion, message) + * + * Summary: + * Production builds: evaluate the assertion expression, but ignore + * the result. + * + * Non-production builds: if the assertion expression evaluates to false, + * call DEBUG_ASSERT_MESSAGE. + * + * Parameters: + * + * assertion: + * The assertion expression. + * + * message: + * The C string to display. + */ +#if DEBUG_ASSERT_PRODUCTION_CODE + #define verify_string(assertion, message) \ + do \ + { \ + if ( !(assertion) ) \ + { \ + } \ + } while ( 0 ) +#else + #define verify_string(assertion, message) \ + do \ + { \ + if ( __builtin_expect(!(assertion), 0) ) \ + { \ + DEBUG_ASSERT_MESSAGE( \ + DEBUG_ASSERT_COMPONENT_NAME_STRING, \ + #assertion, \ + 0, \ + message, \ + __FILE__, \ + __LINE__, \ + 0); \ + } \ + } while ( 0 ) +#endif + +#define nverify_string(assertion, message) \ + verify_string(!(assertion), message) + + +/* + * verify_noerr(errorCode) + * + * Summary: + * Production builds: evaluate the errorCode expression, but ignore + * the result. + * + * Non-production builds: if the errorCode expression does not equal 0 (noErr), + * call DEBUG_ASSERT_MESSAGE. + * + * Parameters: + * + * errorCode: + * The expression to compare to 0. + */ +#if DEBUG_ASSERT_PRODUCTION_CODE + #define verify_noerr(errorCode) \ + do \ + { \ + if ( 0 != (errorCode) ) \ + { \ + } \ + } while ( 0 ) +#else + #define verify_noerr(errorCode) \ + do \ + { \ + long evalOnceErrorCode = (errorCode); \ + if ( __builtin_expect(0 != evalOnceErrorCode, 0) ) \ + { \ + DEBUG_ASSERT_MESSAGE( \ + DEBUG_ASSERT_COMPONENT_NAME_STRING, \ + #errorCode " == 0 ", \ + 0, \ + 0, \ + __FILE__, \ + __LINE__, \ + evalOnceErrorCode); \ + } \ + } while ( 0 ) +#endif + + +/* + * verify_noerr_string(errorCode, message) + * + * Summary: + * Production builds: evaluate the errorCode expression, but ignore + * the result. + * + * Non-production builds: if the errorCode expression does not equal 0 (noErr), + * call DEBUG_ASSERT_MESSAGE. + * + * Parameters: + * + * errorCode: + * The expression to compare to 0. + * + * message: + * The C string to display. + */ +#if DEBUG_ASSERT_PRODUCTION_CODE + #define verify_noerr_string(errorCode, message) \ + do \ + { \ + if ( 0 != (errorCode) ) \ + { \ + } \ + } while ( 0 ) +#else + #define verify_noerr_string(errorCode, message) \ + do \ + { \ + long evalOnceErrorCode = (errorCode); \ + if ( __builtin_expect(0 != evalOnceErrorCode, 0) ) \ + { \ + DEBUG_ASSERT_MESSAGE( \ + DEBUG_ASSERT_COMPONENT_NAME_STRING, \ + #errorCode " == 0 ", \ + 0, \ + message, \ + __FILE__, \ + __LINE__, \ + evalOnceErrorCode); \ + } \ + } while ( 0 ) +#endif + + +/* + * verify_action(assertion, action) + * + * Summary: + * Production builds: if the assertion expression evaluates to false, + * then execute the action statement or compound statement (block). + * + * Non-production builds: if the assertion expression evaluates to false, + * call DEBUG_ASSERT_MESSAGE and then execute the action statement or compound + * statement (block). + * + * Parameters: + * + * assertion: + * The assertion expression. + * + * action: + * The statement or compound statement (block). + */ +#if DEBUG_ASSERT_PRODUCTION_CODE + #define verify_action(assertion, action) \ + do \ + { \ + if ( __builtin_expect(!(assertion), 0) ) \ + { \ + action; \ + } \ + } while ( 0 ) +#else + #define verify_action(assertion, action) \ + do \ + { \ + if ( __builtin_expect(!(assertion), 0) ) \ + { \ + DEBUG_ASSERT_MESSAGE( \ + DEBUG_ASSERT_COMPONENT_NAME_STRING, \ + #assertion, \ + 0, \ + 0, \ + __FILE__, \ + __LINE__, \ + 0); \ + { action; } \ + } \ + } while ( 0 ) +#endif + + +/* + * require(assertion, exceptionLabel) + * + * Summary: + * Production builds: if the assertion expression evaluates to false, + * goto exceptionLabel. + * + * Non-production builds: if the assertion expression evaluates to false, + * call DEBUG_ASSERT_MESSAGE and then goto exceptionLabel. + * + * Parameters: + * + * assertion: + * The assertion expression. + * + * exceptionLabel: + * The label. + */ +#if DEBUG_ASSERT_PRODUCTION_CODE + #define require(assertion, exceptionLabel) \ + do \ + { \ + if ( __builtin_expect(!(assertion), 0) ) \ + { \ + goto exceptionLabel; \ + } \ + } while ( 0 ) +#else + #define require(assertion, exceptionLabel) \ + do \ + { \ + if ( __builtin_expect(!(assertion), 0) ) \ + { \ + DEBUG_ASSERT_MESSAGE( \ + DEBUG_ASSERT_COMPONENT_NAME_STRING, \ + #assertion, \ + #exceptionLabel, \ + 0, \ + __FILE__, \ + __LINE__, \ + 0); \ + goto exceptionLabel; \ + } \ + } while ( 0 ) +#endif + +#define nrequire(assertion, exceptionLabel) \ + require(!(assertion), exceptionLabel) + + +/* + * require_action(assertion, exceptionLabel, action) + * + * Summary: + * Production builds: if the assertion expression evaluates to false, + * execute the action statement or compound statement (block) and then + * goto exceptionLabel. + * + * Non-production builds: if the assertion expression evaluates to false, + * call DEBUG_ASSERT_MESSAGE, execute the action statement or compound + * statement (block), and then goto exceptionLabel. + * + * Parameters: + * + * assertion: + * The assertion expression. + * + * exceptionLabel: + * The label. + * + * action: + * The statement or compound statement (block). + */ +#if DEBUG_ASSERT_PRODUCTION_CODE + #define require_action(assertion, exceptionLabel, action) \ + do \ + { \ + if ( __builtin_expect(!(assertion), 0) ) \ + { \ + { \ + action; \ + } \ + goto exceptionLabel; \ + } \ + } while ( 0 ) +#else + #define require_action(assertion, exceptionLabel, action) \ + do \ + { \ + if ( __builtin_expect(!(assertion), 0) ) \ + { \ + DEBUG_ASSERT_MESSAGE( \ + DEBUG_ASSERT_COMPONENT_NAME_STRING, \ + #assertion, \ + #exceptionLabel, \ + 0, \ + __FILE__, \ + __LINE__, \ + 0); \ + { \ + action; \ + } \ + goto exceptionLabel; \ + } \ + } while ( 0 ) +#endif + +#define nrequire_action(assertion, exceptionLabel, action) \ + require_action(!(assertion), exceptionLabel, action) + + +/* + * require_quiet(assertion, exceptionLabel) + * + * Summary: + * If the assertion expression evaluates to false, goto exceptionLabel. + * + * Parameters: + * + * assertion: + * The assertion expression. + * + * exceptionLabel: + * The label. + */ +#define require_quiet(assertion, exceptionLabel) \ + do \ + { \ + if ( __builtin_expect(!(assertion), 0) ) \ + { \ + goto exceptionLabel; \ + } \ + } while ( 0 ) + +#define nrequire_quiet(assertion, exceptionLabel) \ + require_quiet(!(assertion), exceptionLabel) + + +/* + * require_action_quiet(assertion, exceptionLabel, action) + * + * Summary: + * If the assertion expression evaluates to false, execute the action + * statement or compound statement (block), and goto exceptionLabel. + * + * Parameters: + * + * assertion: + * The assertion expression. + * + * exceptionLabel: + * The label. + * + * action: + * The statement or compound statement (block). + */ +#define require_action_quiet(assertion, exceptionLabel, action) \ + do \ + { \ + if ( __builtin_expect(!(assertion), 0) ) \ + { \ + { \ + action; \ + } \ + goto exceptionLabel; \ + } \ + } while ( 0 ) + +#define nrequire_action_quiet(assertion, exceptionLabel, action) \ + require_action_quiet(!(assertion), exceptionLabel, action) + + +/* + * require_string(assertion, exceptionLabel, message) + * + * Summary: + * Production builds: if the assertion expression evaluates to false, + * goto exceptionLabel. + * + * Non-production builds: if the assertion expression evaluates to false, + * call DEBUG_ASSERT_MESSAGE, and then goto exceptionLabel. + * + * Parameters: + * + * assertion: + * The assertion expression. + * + * exceptionLabel: + * The label. + * + * message: + * The C string to display. + */ +#if DEBUG_ASSERT_PRODUCTION_CODE + #define require_string(assertion, exceptionLabel, message) \ + do \ + { \ + if ( __builtin_expect(!(assertion), 0) ) \ + { \ + goto exceptionLabel; \ + } \ + } while ( 0 ) +#else + #define require_string(assertion, exceptionLabel, message) \ + do \ + { \ + if ( __builtin_expect(!(assertion), 0) ) \ + { \ + DEBUG_ASSERT_MESSAGE( \ + DEBUG_ASSERT_COMPONENT_NAME_STRING, \ + #assertion, \ + #exceptionLabel, \ + message, \ + __FILE__, \ + __LINE__, \ + 0); \ + goto exceptionLabel; \ + } \ + } while ( 0 ) +#endif + +#define nrequire_string(assertion, exceptionLabel, string) \ + require_string(!(assertion), exceptionLabel, string) + + +/* + * require_action_string(assertion, exceptionLabel, action, message) + * + * Summary: + * Production builds: if the assertion expression evaluates to false, + * execute the action statement or compound statement (block), and then + * goto exceptionLabel. + * + * Non-production builds: if the assertion expression evaluates to false, + * call DEBUG_ASSERT_MESSAGE, execute the action statement or compound + * statement (block), and then goto exceptionLabel. + * + * Parameters: + * + * assertion: + * The assertion expression. + * + * exceptionLabel: + * The label. + * + * action: + * The statement or compound statement (block). + * + * message: + * The C string to display. + */ +#if DEBUG_ASSERT_PRODUCTION_CODE + #define require_action_string(assertion, exceptionLabel, action, message) \ + do \ + { \ + if ( __builtin_expect(!(assertion), 0) ) \ + { \ + { \ + action; \ + } \ + goto exceptionLabel; \ + } \ + } while ( 0 ) +#else + #define require_action_string(assertion, exceptionLabel, action, message) \ + do \ + { \ + if ( __builtin_expect(!(assertion), 0) ) \ + { \ + DEBUG_ASSERT_MESSAGE( \ + DEBUG_ASSERT_COMPONENT_NAME_STRING, \ + #assertion, \ + #exceptionLabel, \ + message, \ + __FILE__, \ + __LINE__, \ + 0); \ + { \ + action; \ + } \ + goto exceptionLabel; \ + } \ + } while ( 0 ) +#endif + +#define nrequire_action_string(assertion, exceptionLabel, action, message) \ + require_action_string(!(assertion), exceptionLabel, action, message) + + +/* + * require_noerr(errorCode, exceptionLabel) + * + * Summary: + * Production builds: if the errorCode expression does not equal 0 (noErr), + * goto exceptionLabel. + * + * Non-production builds: if the errorCode expression does not equal 0 (noErr), + * call DEBUG_ASSERT_MESSAGE and then goto exceptionLabel. + * + * Parameters: + * + * errorCode: + * The expression to compare to 0. + * + * exceptionLabel: + * The label. + */ +#if DEBUG_ASSERT_PRODUCTION_CODE + #define require_noerr(errorCode, exceptionLabel) \ + do \ + { \ + if ( __builtin_expect(0 != (errorCode), 0) ) \ + { \ + goto exceptionLabel; \ + } \ + } while ( 0 ) +#else + #define require_noerr(errorCode, exceptionLabel) \ + do \ + { \ + long evalOnceErrorCode = (errorCode); \ + if ( __builtin_expect(0 != evalOnceErrorCode, 0) ) \ + { \ + DEBUG_ASSERT_MESSAGE( \ + DEBUG_ASSERT_COMPONENT_NAME_STRING, \ + #errorCode " == 0 ", \ + #exceptionLabel, \ + 0, \ + __FILE__, \ + __LINE__, \ + evalOnceErrorCode); \ + goto exceptionLabel; \ + } \ + } while ( 0 ) +#endif + +/* + * require_noerr_action(errorCode, exceptionLabel, action) + * + * Summary: + * Production builds: if the errorCode expression does not equal 0 (noErr), + * execute the action statement or compound statement (block) and + * goto exceptionLabel. + * + * Non-production builds: if the errorCode expression does not equal 0 (noErr), + * call DEBUG_ASSERT_MESSAGE, execute the action statement or + * compound statement (block), and then goto exceptionLabel. + * + * Parameters: + * + * errorCode: + * The expression to compare to 0. + * + * exceptionLabel: + * The label. + * + * action: + * The statement or compound statement (block). + */ +#if DEBUG_ASSERT_PRODUCTION_CODE + #define require_noerr_action(errorCode, exceptionLabel, action) \ + do \ + { \ + if ( __builtin_expect(0 != (errorCode), 0) ) \ + { \ + { \ + action; \ + } \ + goto exceptionLabel; \ + } \ + } while ( 0 ) +#else + #define require_noerr_action(errorCode, exceptionLabel, action) \ + do \ + { \ + long evalOnceErrorCode = (errorCode); \ + if ( __builtin_expect(0 != evalOnceErrorCode, 0) ) \ + { \ + DEBUG_ASSERT_MESSAGE( \ + DEBUG_ASSERT_COMPONENT_NAME_STRING, \ + #errorCode " == 0 ", \ + #exceptionLabel, \ + 0, \ + __FILE__, \ + __LINE__, \ + evalOnceErrorCode); \ + { \ + action; \ + } \ + goto exceptionLabel; \ + } \ + } while ( 0 ) +#endif + + +/* + * require_noerr_quiet(errorCode, exceptionLabel) + * + * Summary: + * If the errorCode expression does not equal 0 (noErr), + * goto exceptionLabel. + * + * Parameters: + * + * errorCode: + * The expression to compare to 0. + * + * exceptionLabel: + * The label. + */ +#define require_noerr_quiet(errorCode, exceptionLabel) \ + do \ + { \ + if ( __builtin_expect(0 != (errorCode), 0) ) \ + { \ + goto exceptionLabel; \ + } \ + } while ( 0 ) + + +/* + * require_noerr_action_quiet(errorCode, exceptionLabel, action) + * + * Summary: + * If the errorCode expression does not equal 0 (noErr), + * execute the action statement or compound statement (block) and + * goto exceptionLabel. + * + * Parameters: + * + * errorCode: + * The expression to compare to 0. + * + * exceptionLabel: + * The label. + * + * action: + * The statement or compound statement (block). + */ +#define require_noerr_action_quiet(errorCode, exceptionLabel, action) \ + do \ + { \ + if ( __builtin_expect(0 != (errorCode), 0) ) \ + { \ + { \ + action; \ + } \ + goto exceptionLabel; \ + } \ + } while ( 0 ) + + +/* + * require_noerr_string(errorCode, exceptionLabel, message) + * + * Summary: + * Production builds: if the errorCode expression does not equal 0 (noErr), + * goto exceptionLabel. + * + * Non-production builds: if the errorCode expression does not equal 0 (noErr), + * call DEBUG_ASSERT_MESSAGE, and then goto exceptionLabel. + * + * Parameters: + * + * errorCode: + * The expression to compare to 0. + * + * exceptionLabel: + * The label. + * + * message: + * The C string to display. + */ +#if DEBUG_ASSERT_PRODUCTION_CODE + #define require_noerr_string(errorCode, exceptionLabel, message) \ + do \ + { \ + if ( __builtin_expect(0 != (errorCode), 0) ) \ + { \ + goto exceptionLabel; \ + } \ + } while ( 0 ) +#else + #define require_noerr_string(errorCode, exceptionLabel, message) \ + do \ + { \ + long evalOnceErrorCode = (errorCode); \ + if ( __builtin_expect(0 != evalOnceErrorCode, 0) ) \ + { \ + DEBUG_ASSERT_MESSAGE( \ + DEBUG_ASSERT_COMPONENT_NAME_STRING, \ + #errorCode " == 0 ", \ + #exceptionLabel, \ + message, \ + __FILE__, \ + __LINE__, \ + evalOnceErrorCode); \ + goto exceptionLabel; \ + } \ + } while ( 0 ) +#endif + + +/* + * require_noerr_action_string(errorCode, exceptionLabel, action, message) + * + * Summary: + * Production builds: if the errorCode expression does not equal 0 (noErr), + * execute the action statement or compound statement (block) and + * goto exceptionLabel. + * + * Non-production builds: if the errorCode expression does not equal 0 (noErr), + * call DEBUG_ASSERT_MESSAGE, execute the action statement or compound + * statement (block), and then goto exceptionLabel. + * + * Parameters: + * + * errorCode: + * The expression to compare to 0. + * + * exceptionLabel: + * The label. + * + * action: + * The statement or compound statement (block). + * + * message: + * The C string to display. + */ +#if DEBUG_ASSERT_PRODUCTION_CODE + #define require_noerr_action_string(errorCode, exceptionLabel, action, message)\ + do \ + { \ + if ( __builtin_expect(0 != (errorCode), 0) ) \ + { \ + { \ + action; \ + } \ + goto exceptionLabel; \ + } \ + } while ( 0 ) +#else + #define require_noerr_action_string(errorCode, exceptionLabel, action, message) \ + do \ + { \ + long evalOnceErrorCode = (errorCode); \ + if ( __builtin_expect(0 != evalOnceErrorCode, 0) ) \ + { \ + DEBUG_ASSERT_MESSAGE( \ + DEBUG_ASSERT_COMPONENT_NAME_STRING, \ + #errorCode " == 0 ", \ + #exceptionLabel, \ + message, \ + __FILE__, \ + __LINE__, \ + evalOnceErrorCode); \ + { \ + action; \ + } \ + goto exceptionLabel; \ + } \ + } while ( 0 ) +#endif + + +#endif /* __ASSERTMACROS__ */ + diff --git a/EXTERNAL_HEADERS/Makefile b/EXTERNAL_HEADERS/Makefile index f90e7e8dc..9f8e3535b 100644 --- a/EXTERNAL_HEADERS/Makefile +++ b/EXTERNAL_HEADERS/Makefile @@ -9,20 +9,19 @@ include $(MakeInc_def) INSTINC_SUBDIRS = \ architecture \ - machine \ mach-o INSTINC_SUBDIRS_PPC = \ - architecture \ - ppc + architecture INSTINC_SUBDIRS_I386 = \ - architecture \ - i386 + architecture + +INSTINC_SUBDIRS_X86_64 = \ + architecture INSTINC_SUBDIRS_ARM = \ - architecture \ - arm + architecture EXPORT_FILES = \ AppleSecureBootEpoch.h \ diff --git a/EXTERNAL_HEADERS/architecture/Makefile b/EXTERNAL_HEADERS/architecture/Makefile index ed5ca391b..8c929ba14 100644 --- a/EXTERNAL_HEADERS/architecture/Makefile +++ b/EXTERNAL_HEADERS/architecture/Makefile @@ -15,6 +15,9 @@ INSTINC_SUBDIRS_PPC = \ INSTINC_SUBDIRS_I386 = \ i386 +INSTINC_SUBDIRS_X86_64 = \ + i386 + INSTINC_SUBDIRS_ARM = \ arm @@ -24,6 +27,8 @@ INSTALL_MI_LIST = INSTALL_MI_DIR = architecture +INSTALL_KF_MI_LIST = byte_order.h + EXPORT_MI_LIST = ${EXPORT_FILES} EXPORT_MI_DIR = architecture @@ -31,7 +36,3 @@ EXPORT_MI_DIR = architecture include $(MakeInc_rule) include $(MakeInc_dir) - -build_installhdrs_mi:: - $(MKDIR) $(DSTROOT)/$(KINCDIR)/$(EXPORT_MI_DIR) - $(LN) ../../../../../../../../usr/include/architecture/byte_order.h $(DSTROOT)/$(KINCDIR)/$(EXPORT_MI_DIR)/byte_order.h diff --git a/EXTERNAL_HEADERS/architecture/byte_order.h b/EXTERNAL_HEADERS/architecture/byte_order.h new file mode 100644 index 000000000..72507854a --- /dev/null +++ b/EXTERNAL_HEADERS/architecture/byte_order.h @@ -0,0 +1,371 @@ +/* + * Copyright (c) 1999-2002 Apple Computer, Inc. All rights reserved. + * + * @APPLE_LICENSE_HEADER_START@ + * + * "Portions Copyright (c) 1999 Apple Computer, Inc. All Rights + * Reserved. This file contains Original Code and/or Modifications of + * Original Code as defined in and that are subject to the Apple Public + * Source License Version 1.0 (the 'License'). You may not use this file + * except in compliance with the License. Please obtain a copy of the + * License at http://www.apple.com/publicsource and read it before using + * this file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the + * License for the specific language governing rights and limitations + * under the License." + * + * @APPLE_LICENSE_HEADER_END@ + */ +/* + * Copyright (c) 1992 NeXT Computer, Inc. + * + * Byte ordering conversion. + * + */ + +#ifndef _ARCHITECTURE_BYTE_ORDER_H_ +#define _ARCHITECTURE_BYTE_ORDER_H_ + +#include + +typedef unsigned long NXSwappedFloat; +typedef unsigned long long NXSwappedDouble; + +static __inline__ +unsigned short +NXSwapShort( + unsigned short inv +) +{ + return (unsigned short)OSSwapInt16((uint16_t)inv); +} + +static __inline__ +unsigned int +NXSwapInt( + unsigned int inv +) +{ + return (unsigned int)OSSwapInt32((uint32_t)inv); +} + +static __inline__ +unsigned long +NXSwapLong( + unsigned long inv +) +{ + return (unsigned long)OSSwapInt32((uint32_t)inv); +} + +static __inline__ +unsigned long long +NXSwapLongLong( + unsigned long long inv +) +{ + return (unsigned long long)OSSwapInt64((uint64_t)inv); +} + +static __inline__ NXSwappedFloat +NXConvertHostFloatToSwapped(float x) +{ + union fconv { + float number; + NXSwappedFloat sf; + } u; + u.number = x; + return u.sf; +} + +static __inline__ float +NXConvertSwappedFloatToHost(NXSwappedFloat x) +{ + union fconv { + float number; + NXSwappedFloat sf; + } u; + u.sf = x; + return u.number; +} + +static __inline__ NXSwappedDouble +NXConvertHostDoubleToSwapped(double x) +{ + union dconv { + double number; + NXSwappedDouble sd; + } u; + u.number = x; + return u.sd; +} + +static __inline__ double +NXConvertSwappedDoubleToHost(NXSwappedDouble x) +{ + union dconv { + double number; + NXSwappedDouble sd; + } u; + u.sd = x; + return u.number; +} + +static __inline__ NXSwappedFloat +NXSwapFloat(NXSwappedFloat x) +{ + return (NXSwappedFloat)OSSwapInt32((uint32_t)x); +} + +static __inline__ NXSwappedDouble +NXSwapDouble(NXSwappedDouble x) +{ + return (NXSwappedDouble)OSSwapInt64((uint64_t)x); +} + +/* + * Identify the byte order + * of the current host. + */ + +enum NXByteOrder { + NX_UnknownByteOrder, + NX_LittleEndian, + NX_BigEndian +}; + +static __inline__ +enum NXByteOrder +NXHostByteOrder(void) +{ +#if defined(__LITTLE_ENDIAN__) + return NX_LittleEndian; +#elif defined(__BIG_ENDIAN__) + return NX_BigEndian; +#else + return NX_UnknownByteOrder; +#endif +} + +static __inline__ +unsigned short +NXSwapBigShortToHost( + unsigned short x +) +{ + return (unsigned short)OSSwapBigToHostInt16((uint16_t)x); +} + +static __inline__ +unsigned int +NXSwapBigIntToHost( + unsigned int x +) +{ + return (unsigned int)OSSwapBigToHostInt32((uint32_t)x); +} + +static __inline__ +unsigned long +NXSwapBigLongToHost( + unsigned long x +) +{ + return (unsigned long)OSSwapBigToHostInt32((uint32_t)x); +} + +static __inline__ +unsigned long long +NXSwapBigLongLongToHost( + unsigned long long x +) +{ + return (unsigned long long)OSSwapBigToHostInt64((uint64_t)x); +} + +static __inline__ +double +NXSwapBigDoubleToHost( + NXSwappedDouble x +) +{ + return NXConvertSwappedDoubleToHost((NXSwappedDouble)OSSwapBigToHostInt64((uint64_t)x)); +} + +static __inline__ +float +NXSwapBigFloatToHost( + NXSwappedFloat x +) +{ + return NXConvertSwappedFloatToHost((NXSwappedFloat)OSSwapBigToHostInt32((uint32_t)x)); +} + +static __inline__ +unsigned short +NXSwapHostShortToBig( + unsigned short x +) +{ + return (unsigned short)OSSwapHostToBigInt16((uint16_t)x); +} + +static __inline__ +unsigned int +NXSwapHostIntToBig( + unsigned int x +) +{ + return (unsigned int)OSSwapHostToBigInt32((uint32_t)x); +} + +static __inline__ +unsigned long +NXSwapHostLongToBig( + unsigned long x +) +{ + return (unsigned long)OSSwapHostToBigInt32((uint32_t)x); +} + +static __inline__ +unsigned long long +NXSwapHostLongLongToBig( + unsigned long long x +) +{ + return (unsigned long long)OSSwapHostToBigInt64((uint64_t)x); +} + +static __inline__ +NXSwappedDouble +NXSwapHostDoubleToBig( + double x +) +{ + return (NXSwappedDouble)OSSwapHostToBigInt64((uint64_t)NXConvertHostDoubleToSwapped(x)); +} + +static __inline__ +NXSwappedFloat +NXSwapHostFloatToBig( + float x +) +{ + return (NXSwappedFloat)OSSwapHostToBigInt32((uint32_t)NXConvertHostFloatToSwapped(x)); +} + +static __inline__ +unsigned short +NXSwapLittleShortToHost( + unsigned short x +) +{ + return (unsigned short)OSSwapLittleToHostInt16((uint16_t)x); +} + +static __inline__ +unsigned int +NXSwapLittleIntToHost( + unsigned int x +) +{ + return (unsigned int)OSSwapLittleToHostInt32((uint32_t)x); +} + +static __inline__ +unsigned long +NXSwapLittleLongToHost( + unsigned long x +) +{ + return (unsigned long)OSSwapLittleToHostInt32((uint32_t)x); +} + +static __inline__ +unsigned long long +NXSwapLittleLongLongToHost( + unsigned long long x +) +{ + return (unsigned long long)OSSwapLittleToHostInt64((uint64_t)x); +} + +static __inline__ +double +NXSwapLittleDoubleToHost( + NXSwappedDouble x +) +{ + return NXConvertSwappedDoubleToHost((NXSwappedDouble)OSSwapLittleToHostInt64((uint64_t)x)); +} + +static __inline__ +float +NXSwapLittleFloatToHost( + NXSwappedFloat x +) +{ + return NXConvertSwappedFloatToHost((NXSwappedFloat)OSSwapLittleToHostInt32((uint32_t)x)); +} + +static __inline__ +unsigned short +NXSwapHostShortToLittle( + unsigned short x +) +{ + return (unsigned short)OSSwapHostToLittleInt16((uint16_t)x); +} + +static __inline__ +unsigned int +NXSwapHostIntToLittle( + unsigned int x +) +{ + return (unsigned int)OSSwapHostToLittleInt32((uint32_t)x); +} + +static __inline__ +unsigned long +NXSwapHostLongToLittle( + unsigned long x +) +{ + return (unsigned long)OSSwapHostToLittleInt32((uint32_t)x); +} + +static __inline__ +unsigned long long +NXSwapHostLongLongToLittle( + unsigned long long x +) +{ + return (unsigned long long)OSSwapHostToLittleInt64((uint64_t)x); +} + +static __inline__ +NXSwappedDouble +NXSwapHostDoubleToLittle( + double x +) +{ + return (NXSwappedDouble)OSSwapHostToLittleInt64((uint64_t)NXConvertHostDoubleToSwapped(x)); +} + +static __inline__ +NXSwappedFloat +NXSwapHostFloatToLittle( + float x +) +{ + return (NXSwappedFloat)OSSwapHostToLittleInt32((uint32_t)NXConvertHostFloatToSwapped(x)); +} + +#endif /* _ARCHITECTURE_BYTE_ORDER_H_ */ diff --git a/EXTERNAL_HEADERS/architecture/i386/Makefile b/EXTERNAL_HEADERS/architecture/i386/Makefile index 5ab63fe3e..5e5d47fa8 100644 --- a/EXTERNAL_HEADERS/architecture/i386/Makefile +++ b/EXTERNAL_HEADERS/architecture/i386/Makefile @@ -9,6 +9,8 @@ include $(MakeInc_def) INSTINC_SUBDIRS_I386 = +INSTINC_SUBDIRS_X86_64 = + EXPORT_FILES = \ asm_help.h \ cpu.h \ diff --git a/EXTERNAL_HEADERS/architecture/i386/pio.h b/EXTERNAL_HEADERS/architecture/i386/pio.h index 1c9586d62..c1f3760b5 100644 --- a/EXTERNAL_HEADERS/architecture/i386/pio.h +++ b/EXTERNAL_HEADERS/architecture/i386/pio.h @@ -61,10 +61,10 @@ typedef unsigned short i386_ioport_t; #if defined(__GNUC__) -static __inline__ unsigned long inl( +static __inline__ unsigned int inl( i386_ioport_t port) { - unsigned long datum; + unsigned int datum; __asm__ volatile("inl %w1, %0" : "=a" (datum) : "Nd" (port)); return(datum); } @@ -87,7 +87,7 @@ static __inline__ unsigned char inb( static __inline__ void outl( i386_ioport_t port, - unsigned long datum) + unsigned int datum) { __asm__ volatile("outl %0, %w1" : : "a" (datum), "Nd" (port)); } diff --git a/EXTERNAL_HEADERS/mach-o/Makefile b/EXTERNAL_HEADERS/mach-o/Makefile index efac1ba25..1ce373da0 100644 --- a/EXTERNAL_HEADERS/mach-o/Makefile +++ b/EXTERNAL_HEADERS/mach-o/Makefile @@ -7,7 +7,7 @@ export MakeInc_dir=${SRCROOT}/makedefs/MakeInc.dir include $(MakeInc_cmd) include $(MakeInc_def) -INSTINC_SUBDIRS = \ +INSTINC_SUBDIRS = EXPORT_FILES = \ fat.h \ diff --git a/EXTERNAL_HEADERS/mach-o/arm/reloc.h b/EXTERNAL_HEADERS/mach-o/arm/reloc.h new file mode 100644 index 000000000..e2da8b80c --- /dev/null +++ b/EXTERNAL_HEADERS/mach-o/arm/reloc.h @@ -0,0 +1,42 @@ +/* + * Copyright (c) 1999 Apple Computer, Inc. All rights reserved. + * + * @APPLE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this + * file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_LICENSE_HEADER_END@ + */ +/* + * Relocation types used in the arm implementation. Relocation entries for + * things other than instructions use the same generic relocation as discribed + * in and their r_type is ARM_RELOC_VANILLA, one of the + * *_SECTDIFF or the *_PB_LA_PTR types. The rest of the relocation types are + * for instructions. Since they are for instructions the r_address field + * indicates the 32 bit instruction that the relocation is to be preformed on. + */ +enum reloc_type_arm +{ + ARM_RELOC_VANILLA, /* generic relocation as discribed above */ + ARM_RELOC_PAIR, /* the second relocation entry of a pair */ + ARM_RELOC_SECTDIFF, /* a PAIR follows with subtract symbol value */ + ARM_RELOC_LOCAL_SECTDIFF, /* like ARM_RELOC_SECTDIFF, but the symbol + referenced was local. */ + ARM_RELOC_PB_LA_PTR,/* prebound lazy pointer */ + ARM_RELOC_BR24, /* 24 bit branch displacement (to a word address) */ + ARM_THUMB_RELOC_BR22, /* 22 bit branch displacement (to a half-word + address) */ +}; diff --git a/EXTERNAL_HEADERS/mach-o/fat.h b/EXTERNAL_HEADERS/mach-o/fat.h index 6e724325f..e2bcf433d 100644 --- a/EXTERNAL_HEADERS/mach-o/fat.h +++ b/EXTERNAL_HEADERS/mach-o/fat.h @@ -1,19 +1,14 @@ /* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. + * Copyright (c) 1999 Apple Computer, Inc. All rights reserved. * - * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ + * @APPLE_LICENSE_HEADER_START@ * * This file contains Original Code and/or Modifications of Original Code * as defined in and that are subject to the Apple Public Source License * Version 2.0 (the 'License'). You may not use this file except in - * compliance with the License. The rights granted to you under the License - * may not be used to create, or enable the creation or redistribution of, - * unlawful or unlicensed copies of an Apple operating system, or to - * circumvent, violate, or enable the circumvention or violation of, any - * terms of an Apple operating system software license agreement. - * - * Please obtain a copy of the License at - * http://www.opensource.apple.com/apsl/ and read it before using this file. + * compliance with the License. Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this + * file. * * The Original Code and all software distributed under the License are * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER @@ -23,8 +18,10 @@ * Please see the License for the specific language governing rights and * limitations under the License. * - * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ + * @APPLE_LICENSE_HEADER_END@ */ +#ifndef _MACH_O_FAT_H_ +#define _MACH_O_FAT_H_ /* * This header file describes the structures of the file format for "fat" * architecture specific file (wrapper design). At the begining of the file @@ -44,36 +41,24 @@ * is needed here for the cpu_type_t and cpu_subtype_t types * and contains the constants for the possible values of these types. */ +#include #include +#include #define FAT_MAGIC 0xcafebabe -#define FAT_CIGAM 0xbebafeca +#define FAT_CIGAM 0xbebafeca /* NXSwapLong(FAT_MAGIC) */ struct fat_header { - unsigned long magic; /* FAT_MAGIC */ - unsigned long nfat_arch; /* number of structs that follow */ + uint32_t magic; /* FAT_MAGIC */ + uint32_t nfat_arch; /* number of structs that follow */ }; struct fat_arch { cpu_type_t cputype; /* cpu specifier (int) */ cpu_subtype_t cpusubtype; /* machine specifier (int) */ - unsigned long offset; /* file offset to this object file */ - unsigned long size; /* size of this object file */ - unsigned long align; /* alignment as a power of 2 */ + uint32_t offset; /* file offset to this object file */ + uint32_t size; /* size of this object file */ + uint32_t align; /* alignment as a power of 2 */ }; -#ifdef KERNEL - -#include - -struct vnode; - -/* XXX return type should be load_return_t, but mach_loader.h is not in scope */ -int fatfile_getarch_affinity(struct vnode *vp, vm_offset_t data_ptr, - struct fat_arch *archret, int affinity); -int fatfile_getarch(struct vnode *vp, vm_offset_t data_ptr, - struct fat_arch *archret); -int fatfile_getarch_with_bits(struct vnode *vp, integer_t archbits, - vm_offset_t data_ptr, struct fat_arch *archret); - -#endif /* KERNEL */ +#endif /* _MACH_O_FAT_H_ */ diff --git a/EXTERNAL_HEADERS/mach-o/kld.h b/EXTERNAL_HEADERS/mach-o/kld.h index 0fd1207e0..6b15999d3 100644 --- a/EXTERNAL_HEADERS/mach-o/kld.h +++ b/EXTERNAL_HEADERS/mach-o/kld.h @@ -1,19 +1,14 @@ /* * Copyright (c) 1999 Apple Computer, Inc. All rights reserved. * - * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ + * @APPLE_LICENSE_HEADER_START@ * * This file contains Original Code and/or Modifications of Original Code * as defined in and that are subject to the Apple Public Source License * Version 2.0 (the 'License'). You may not use this file except in - * compliance with the License. The rights granted to you under the License - * may not be used to create, or enable the creation or redistribution of, - * unlawful or unlicensed copies of an Apple operating system, or to - * circumvent, violate, or enable the circumvention or violation of, any - * terms of an Apple operating system software license agreement. - * - * Please obtain a copy of the License at - * http://www.opensource.apple.com/apsl/ and read it before using this file. + * compliance with the License. Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this + * file. * * The Original Code and all software distributed under the License are * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER @@ -23,7 +18,7 @@ * Please see the License for the specific language governing rights and * limitations under the License. * - * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ + * @APPLE_LICENSE_HEADER_END@ */ #ifndef _MACHO_KLD_H_ @@ -33,7 +28,7 @@ #include /* - * These API's are in libkld. Both kmodload(8) and /mach_kernel should + * These API's are in libkld. Both kextload(8) and /mach_kernel should * link with -lkld and then ld(1) will expand -lkld to libkld.dylib or * libkld.a depending on if -dynamic or -static is in effect. * @@ -48,20 +43,20 @@ extern void kld_error_vprintf(const char *format, va_list ap); /* - * These two are only in libkld.dylib for use by kmodload(8) (user code compiled + * These two are only in libkld.dylib for use by kextload(8) (user code compiled * with the default -dynamic). */ #ifdef __DYNAMIC__ -__private_extern__ long kld_load_basefile( +extern long kld_load_basefile( const char *base_filename); /* Note: this takes only one object file name */ -__private_extern__ long kld_load( +extern long kld_load( struct mach_header **header_addr, const char *object_filename, const char *output_filename); -__private_extern__ long kld_load_from_memory( +extern long kld_load_from_memory( struct mach_header **header_addr, const char *object_name, char *object_addr, @@ -70,40 +65,40 @@ __private_extern__ long kld_load_from_memory( #endif /* __DYNAMIC__ */ /* - * This two are only in libkld.a use by /mach_kernel (kernel code compiled with + * This one is only in libkld.a use by /mach_kernel (kernel code compiled with * -static). */ #ifdef __STATIC__ /* Note: this api does not write an output file */ -__private_extern__ long kld_load_from_memory( +extern long kld_load_from_memory( struct mach_header **header_addr, const char *object_name, char *object_addr, long object_size); #endif /* __STATIC__ */ -__private_extern__ long kld_load_basefile_from_memory( +extern long kld_load_basefile_from_memory( const char *base_filename, char *base_addr, long base_size); -__private_extern__ long kld_unload_all( +extern long kld_unload_all( long deallocate_sets); -__private_extern__ long kld_lookup( +extern long kld_lookup( const char *symbol_name, unsigned long *value); -__private_extern__ long kld_forget_symbol( +extern long kld_forget_symbol( const char *symbol_name); -__private_extern__ void kld_address_func( +extern void kld_address_func( unsigned long (*func)(unsigned long size, unsigned long headers_size)); #define KLD_STRIP_ALL 0x00000000 #define KLD_STRIP_NONE 0x00000001 -__private_extern__ void kld_set_link_options( +extern void kld_set_link_options( unsigned long link_options); #endif /* _MACHO_KLD_H_ */ diff --git a/EXTERNAL_HEADERS/mach-o/loader.h b/EXTERNAL_HEADERS/mach-o/loader.h index fd49201b7..b00ac7a67 100644 --- a/EXTERNAL_HEADERS/mach-o/loader.h +++ b/EXTERNAL_HEADERS/mach-o/loader.h @@ -1,19 +1,14 @@ /* * Copyright (c) 1999-2008 Apple Inc. All Rights Reserved. * - * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ + * @APPLE_LICENSE_HEADER_START@ * * This file contains Original Code and/or Modifications of Original Code * as defined in and that are subject to the Apple Public Source License * Version 2.0 (the 'License'). You may not use this file except in - * compliance with the License. The rights granted to you under the License - * may not be used to create, or enable the creation or redistribution of, - * unlawful or unlicensed copies of an Apple operating system, or to - * circumvent, violate, or enable the circumvention or violation of, any - * terms of an Apple operating system software license agreement. - * - * Please obtain a copy of the License at - * http://www.opensource.apple.com/apsl/ and read it before using this file. + * compliance with the License. Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this + * file. * * The Original Code and all software distributed under the License are * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER @@ -23,7 +18,7 @@ * Please see the License for the specific language governing rights and * limitations under the License. * - * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ + * @APPLE_LICENSE_HEADER_END@ */ #ifndef _MACHO_LOADER_H_ #define _MACHO_LOADER_H_ @@ -50,9 +45,7 @@ * states and the structures of those flavors for each machine. */ #include -#ifndef KERNEL #include -#endif /* KERNEL */ /* * The 32-bit mach header appears at the very beginning of the object file for @@ -126,6 +119,7 @@ struct mach_header_64 { /* linking only, no section contents */ #define MH_DSYM 0xa /* companion file with only debug */ /* sections */ +#define MH_KEXT_BUNDLE 0xb /* x86_64 kexts */ /* Constants for the flags field of the mach_header */ #define MH_NOUNDEFS 0x1 /* the object file has no undefined @@ -180,6 +174,13 @@ struct mach_header_64 { in the task will be given stack execution privilege. Only used in MH_EXECUTE filetypes. */ +#define MH_DEAD_STRIPPABLE_DYLIB 0x400000 /* Only for use on dylibs. When + linking against a dylib that + has this bit set, the static linker + will automatically not create a + LC_LOAD_DYLIB load command to the + dylib if no symbols are being + referenced from the dylib. */ #define MH_ROOT_SAFE 0x40000 /* When this bit is set, the binary declares it is safe for use in processes with uid zero */ @@ -272,6 +273,8 @@ struct load_command { #define LC_REEXPORT_DYLIB (0x1f | LC_REQ_DYLD) /* load and re-export dylib */ #define LC_LAZY_LOAD_DYLIB 0x20 /* delay load of dylib until first use */ #define LC_ENCRYPTION_INFO 0x21 /* encrypted segment information */ +#define LC_DYLD_INFO 0x22 /* compressed dyld information */ +#define LC_DYLD_INFO_ONLY (0x22|LC_REQ_DYLD) /* compressed dyld information only */ /* * A variable length string in a load command is represented by an lc_str @@ -1132,6 +1135,171 @@ struct encryption_info_command { 0 means not-encrypted yet */ }; +/* + * The dyld_info_command contains the file offsets and sizes of + * the new compressed form of the information dyld needs to + * load the image. This information is used by dyld on Mac OS X + * 10.6 and later. All information pointed to by this command + * is encoded using byte streams, so no endian swapping is needed + * to interpret it. + */ +struct dyld_info_command { + uint32_t cmd; /* LC_DYLD_INFO or LC_DYLD_INFO_ONLY */ + uint32_t cmdsize; /* sizeof(struct dyld_info_command) */ + + /* + * Dyld rebases an image whenever dyld loads it at an address different + * from its preferred address. The rebase information is a stream + * of byte sized opcodes whose symbolic names start with REBASE_OPCODE_. + * Conceptually the rebase information is a table of tuples: + * + * The opcodes are a compressed way to encode the table by only + * encoding when a column changes. In addition simple patterns + * like "every n'th offset for m times" can be encoded in a few + * bytes. + */ + uint32_t rebase_off; /* file offset to rebase info */ + uint32_t rebase_size; /* size of rebase info */ + + /* + * Dyld binds an image during the loading process, if the image + * requires any pointers to be initialized to symbols in other images. + * The rebase information is a stream of byte sized + * opcodes whose symbolic names start with BIND_OPCODE_. + * Conceptually the bind information is a table of tuples: + * + * The opcodes are a compressed way to encode the table by only + * encoding when a column changes. In addition simple patterns + * like for runs of pointers initialzed to the same value can be + * encoded in a few bytes. + */ + uint32_t bind_off; /* file offset to binding info */ + uint32_t bind_size; /* size of binding info */ + + /* + * Some C++ programs require dyld to unique symbols so that all + * images in the process use the same copy of some code/data. + * This step is done after binding. The content of the weak_bind + * info is an opcode stream like the bind_info. But it is sorted + * alphabetically by symbol name. This enable dyld to walk + * all images with weak binding information in order and look + * for collisions. If there are no collisions, dyld does + * no updating. That means that some fixups are also encoded + * in the bind_info. For instance, all calls to "operator new" + * are first bound to libstdc++.dylib using the information + * in bind_info. Then if some image overrides operator new + * that is detected when the weak_bind information is processed + * and the call to operator new is then rebound. + */ + uint32_t weak_bind_off; /* file offset to weak binding info */ + uint32_t weak_bind_size; /* size of weak binding info */ + + /* + * Some uses of external symbols do not need to be bound immediately. + * Instead they can be lazily bound on first use. The lazy_bind + * are contains a stream of BIND opcodes to bind all lazy symbols. + * Normal use is that dyld ignores the lazy_bind section when + * loading an image. Instead the static linker arranged for the + * lazy pointer to initially point to a helper function which + * pushes the offset into the lazy_bind area for the symbol + * needing to be bound, then jumps to dyld which simply adds + * the offset to lazy_bind_off to get the information on what + * to bind. + */ + uint32_t lazy_bind_off; /* file offset to lazy binding info */ + uint32_t lazy_bind_size; /* size of lazy binding infs */ + + /* + * The symbols exported by a dylib are encoded in a trie. This + * is a compact representation that factors out common prefixes. + * It also reduces LINKEDIT pages in RAM because it encodes all + * information (name, address, flags) in one small, contiguous range. + * The export area is a stream of nodes. The first node sequentially + * is the start node for the trie. + * + * Nodes for a symbol start with a byte that is the length of + * the exported symbol information for the string so far. + * If there is no exported symbol, the byte is zero. If there + * is exported info, it follows the length byte. The exported + * info normally consists of a flags and offset both encoded + * in uleb128. The offset is location of the content named + * by the symbol. It is the offset from the mach_header for + * the image. + * + * After the initial byte and optional exported symbol information + * is a byte of how many edges (0-255) that this node has leaving + * it, followed by each edge. + * Each edge is a zero terminated cstring of the addition chars + * in the symbol, followed by a uleb128 offset for the node that + * edge points to. + * + */ + uint32_t export_off; /* file offset to lazy binding info */ + uint32_t export_size; /* size of lazy binding infs */ +}; + +/* + * The following are used to encode rebasing information + */ +#define REBASE_TYPE_POINTER 1 +#define REBASE_TYPE_TEXT_ABSOLUTE32 2 +#define REBASE_TYPE_TEXT_PCREL32 3 + +#define REBASE_OPCODE_MASK 0xF0 +#define REBASE_IMMEDIATE_MASK 0x0F +#define REBASE_OPCODE_DONE 0x00 +#define REBASE_OPCODE_SET_TYPE_IMM 0x10 +#define REBASE_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB 0x20 +#define REBASE_OPCODE_ADD_ADDR_ULEB 0x30 +#define REBASE_OPCODE_ADD_ADDR_IMM_SCALED 0x40 +#define REBASE_OPCODE_DO_REBASE_IMM_TIMES 0x50 +#define REBASE_OPCODE_DO_REBASE_ULEB_TIMES 0x60 +#define REBASE_OPCODE_DO_REBASE_ADD_ADDR_ULEB 0x70 +#define REBASE_OPCODE_DO_REBASE_ULEB_TIMES_SKIPPING_ULEB 0x80 + + +/* + * The following are used to encode binding information + */ +#define BIND_TYPE_POINTER 1 +#define BIND_TYPE_TEXT_ABSOLUTE32 2 +#define BIND_TYPE_TEXT_PCREL32 3 + +#define BIND_SPECIAL_DYLIB_SELF 0 +#define BIND_SPECIAL_DYLIB_MAIN_EXECUTABLE -1 +#define BIND_SPECIAL_DYLIB_FLAT_LOOKUP -2 + +#define BIND_SYMBOL_FLAGS_WEAK_IMPORT 0x1 +#define BIND_SYMBOL_FLAGS_NON_WEAK_DEFINITION 0x8 + +#define BIND_OPCODE_MASK 0xF0 +#define BIND_IMMEDIATE_MASK 0x0F +#define BIND_OPCODE_DONE 0x00 +#define BIND_OPCODE_SET_DYLIB_ORDINAL_IMM 0x10 +#define BIND_OPCODE_SET_DYLIB_ORDINAL_ULEB 0x20 +#define BIND_OPCODE_SET_DYLIB_SPECIAL_IMM 0x30 +#define BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM 0x40 +#define BIND_OPCODE_SET_TYPE_IMM 0x50 +#define BIND_OPCODE_SET_ADDEND_SLEB 0x60 +#define BIND_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB 0x70 +#define BIND_OPCODE_ADD_ADDR_ULEB 0x80 +#define BIND_OPCODE_DO_BIND 0x90 +#define BIND_OPCODE_DO_BIND_ADD_ADDR_ULEB 0xA0 +#define BIND_OPCODE_DO_BIND_ADD_ADDR_IMM_SCALED 0xB0 +#define BIND_OPCODE_DO_BIND_ULEB_TIMES_SKIPPING_ULEB 0xC0 + + +/* + * The following are used on the flags byte of a terminal node + * in the export information. + */ +#define EXPORT_SYMBOL_FLAGS_KIND_MASK 0x03 +#define EXPORT_SYMBOL_FLAGS_KIND_REGULAR 0x00 +#define EXPORT_SYMBOL_FLAGS_KIND_THREAD_LOCAL 0x01 +#define EXPORT_SYMBOL_FLAGS_WEAK_DEFINITION 0x04 +#define EXPORT_SYMBOL_FLAGS_INDIRECT_DEFINITION 0x08 +#define EXPORT_SYMBOL_FLAGS_HAS_SPECIALIZATIONS 0x10 + /* * The symseg_command contains the offset and size of the GNU style * symbol table information as described in the header file . diff --git a/EXTERNAL_HEADERS/mach-o/nlist.h b/EXTERNAL_HEADERS/mach-o/nlist.h index bcd71f61a..868ec2046 100644 --- a/EXTERNAL_HEADERS/mach-o/nlist.h +++ b/EXTERNAL_HEADERS/mach-o/nlist.h @@ -1,19 +1,14 @@ /* - * Copyright (c) 1999 Apple Computer, Inc. All rights reserved. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ + * Copyright (c) 1999-2003 Apple Computer, Inc. All Rights Reserved. + * + * @APPLE_LICENSE_HEADER_START@ * * This file contains Original Code and/or Modifications of Original Code * as defined in and that are subject to the Apple Public Source License * Version 2.0 (the 'License'). You may not use this file except in - * compliance with the License. The rights granted to you under the License - * may not be used to create, or enable the creation or redistribution of, - * unlawful or unlicensed copies of an Apple operating system, or to - * circumvent, violate, or enable the circumvention or violation of, any - * terms of an Apple operating system software license agreement. - * - * Please obtain a copy of the License at - * http://www.opensource.apple.com/apsl/ and read it before using this file. + * compliance with the License. Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this + * file. * * The Original Code and all software distributed under the License are * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER @@ -23,7 +18,7 @@ * Please see the License for the specific language governing rights and * limitations under the License. * - * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ + * @APPLE_LICENSE_HEADER_END@ */ #ifndef _MACHO_NLIST_H_ #define _MACHO_NLIST_H_ @@ -68,39 +63,42 @@ * * @(#)nlist.h 8.2 (Berkeley) 1/21/94 */ +#include /* - * Format of a symbol table entry of a Mach-O file. Modified from the BSD - * format. The modifications from the original format were changing n_other - * (an unused field) to n_sect and the addition of the N_SECT type. These - * modifications are required to support symbols in an arbitrary number of - * sections not just the three sections (text, data and bss) in a BSD file. + * Format of a symbol table entry of a Mach-O file for 32-bit architectures. + * Modified from the BSD format. The modifications from the original format + * were changing n_other (an unused field) to n_sect and the addition of the + * N_SECT type. These modifications are required to support symbols in a larger + * number of sections not just the three sections (text, data and bss) in a BSD + * file. */ struct nlist { union { +#ifndef __LP64__ char *n_name; /* for use when in-core */ - long n_strx; /* index into the string table */ +#endif + int32_t n_strx; /* index into the string table */ } n_un; - unsigned char n_type; /* type flag, see below */ - unsigned char n_sect; /* section number or NO_SECT */ - short n_desc; /* see */ - unsigned long n_value; /* value of this symbol (or stab offset) */ + uint8_t n_type; /* type flag, see below */ + uint8_t n_sect; /* section number or NO_SECT */ + int16_t n_desc; /* see */ + uint32_t n_value; /* value of this symbol (or stab offset) */ }; /* * This is the symbol table entry structure for 64-bit architectures. */ struct nlist_64 { - union { - uint32_t n_strx; /* index into the string table */ - } n_un; - uint8_t n_type; /* type flag, see below */ - uint8_t n_sect; /* section number or NO_SECT */ - uint16_t n_desc; /* see */ - uint64_t n_value; /* value of this symbol (or stab offset) */ + union { + uint32_t n_strx; /* index into the string table */ + } n_un; + uint8_t n_type; /* type flag, see below */ + uint8_t n_sect; /* section number or NO_SECT */ + uint16_t n_desc; /* see */ + uint64_t n_value; /* value of this symbol (or stab offset) */ }; - /* * Symbols with a index into the string table of zero (n_un.n_strx == 0) are * defined to have a null, "", name. Therefore all string indexes to non null @@ -109,7 +107,7 @@ struct nlist_64 { */ /* - * The n_type field really contains three fields: + * The n_type field really contains four fields: * unsigned char N_STAB:3, * N_PEXT:1, * N_TYPE:3, @@ -163,8 +161,14 @@ struct nlist_64 { * Common symbols are represented by undefined (N_UNDF) external (N_EXT) types * who's values (n_value) are non-zero. In which case the value of the n_value * field is the size (in bytes) of the common symbol. The n_sect field is set - * to NO_SECT. + * to NO_SECT. The alignment of a common symbol may be set as a power of 2 + * between 2^1 and 2^15 as part of the n_desc field using the macros below. If + * the alignment is not set (a value of zero) then natural alignment based on + * the size is used. */ +#define GET_COMM_ALIGN(n_desc) (((n_desc) >> 8) & 0x0f) +#define SET_COMM_ALIGN(n_desc,align) \ + (n_desc) = (((n_desc) & 0xf0ff) | (((align) & 0x0f) << 8)) /* * To support the lazy binding of undefined symbols in the dynamic link-editor, @@ -186,7 +190,7 @@ struct nlist_64 { * REFERENCE_FLAG_DEFINED, is also used. */ /* Reference type bits of the n_desc field of undefined symbols */ -#define REFERENCE_TYPE 0xf +#define REFERENCE_TYPE 0x7 /* types of references */ #define REFERENCE_FLAG_UNDEFINED_NON_LAZY 0 #define REFERENCE_FLAG_UNDEFINED_LAZY 1 @@ -204,10 +208,95 @@ struct nlist_64 { #define REFERENCED_DYNAMICALLY 0x0010 /* - * The non-reference type bits of the n_desc field for global symbols are - * reserved for the dynamic link editor. All of these bits must start out - * zero in the object file. + * For images created by the static link editor with the -twolevel_namespace + * option in effect the flags field of the mach header is marked with + * MH_TWOLEVEL. And the binding of the undefined references of the image are + * determined by the static link editor. Which library an undefined symbol is + * bound to is recorded by the static linker in the high 8 bits of the n_desc + * field using the SET_LIBRARY_ORDINAL macro below. The ordinal recorded + * references the libraries listed in the Mach-O's LC_LOAD_DYLIB load commands + * in the order they appear in the headers. The library ordinals start from 1. + * For a dynamic library that is built as a two-level namespace image the + * undefined references from module defined in another use the same nlist struct + * an in that case SELF_LIBRARY_ORDINAL is used as the library ordinal. For + * defined symbols in all images they also must have the library ordinal set to + * SELF_LIBRARY_ORDINAL. The EXECUTABLE_ORDINAL refers to the executable + * image for references from plugins that refer to the executable that loads + * them. + * + * The DYNAMIC_LOOKUP_ORDINAL is for undefined symbols in a two-level namespace + * image that are looked up by the dynamic linker with flat namespace semantics. + * This ordinal was added as a feature in Mac OS X 10.3 by reducing the + * value of MAX_LIBRARY_ORDINAL by one. So it is legal for existing binaries + * or binaries built with older tools to have 0xfe (254) dynamic libraries. In + * this case the ordinal value 0xfe (254) must be treated as a library ordinal + * for compatibility. */ -#define N_DESC_DISCARDED 0x8000 /* symbol is discarded */ +#define GET_LIBRARY_ORDINAL(n_desc) (((n_desc) >> 8) & 0xff) +#define SET_LIBRARY_ORDINAL(n_desc,ordinal) \ + (n_desc) = (((n_desc) & 0x00ff) | (((ordinal) & 0xff) << 8)) +#define SELF_LIBRARY_ORDINAL 0x0 +#define MAX_LIBRARY_ORDINAL 0xfd +#define DYNAMIC_LOOKUP_ORDINAL 0xfe +#define EXECUTABLE_ORDINAL 0xff -#endif +/* + * The bit 0x0020 of the n_desc field is used for two non-overlapping purposes + * and has two different symbolic names, N_NO_DEAD_STRIP and N_DESC_DISCARDED. + */ + +/* + * The N_NO_DEAD_STRIP bit of the n_desc field only ever appears in a + * relocatable .o file (MH_OBJECT filetype). And is used to indicate to the + * static link editor it is never to dead strip the symbol. + */ +#define N_NO_DEAD_STRIP 0x0020 /* symbol is not to be dead stripped */ + +/* + * The N_DESC_DISCARDED bit of the n_desc field never appears in linked image. + * But is used in very rare cases by the dynamic link editor to mark an in + * memory symbol as discared and longer used for linking. + */ +#define N_DESC_DISCARDED 0x0020 /* symbol is discarded */ + +/* + * The N_WEAK_REF bit of the n_desc field indicates to the dynamic linker that + * the undefined symbol is allowed to be missing and is to have the address of + * zero when missing. + */ +#define N_WEAK_REF 0x0040 /* symbol is weak referenced */ + +/* + * The N_WEAK_DEF bit of the n_desc field indicates to the static and dynamic + * linkers that the symbol definition is weak, allowing a non-weak symbol to + * also be used which causes the weak definition to be discared. Currently this + * is only supported for symbols in coalesed sections. + */ +#define N_WEAK_DEF 0x0080 /* coalesed symbol is a weak definition */ + +/* + * The N_REF_TO_WEAK bit of the n_desc field indicates to the dynamic linker + * that the undefined symbol should be resolved using flat namespace searching. + */ +#define N_REF_TO_WEAK 0x0080 /* reference to a weak symbol */ + +/* + * The N_ARM_THUMB_DEF bit of the n_desc field indicates that the symbol is + * a defintion of a Thumb function. + */ +#define N_ARM_THUMB_DEF 0x0008 /* symbol is a Thumb function (ARM) */ + +#ifndef __STRICT_BSD__ +#if __cplusplus +extern "C" { +#endif /* __cplusplus */ +/* + * The function nlist(3) from the C library. + */ +extern int nlist (const char *filename, struct nlist *list); +#if __cplusplus +} +#endif /* __cplusplus */ +#endif /* __STRICT_BSD__ */ + +#endif /* _MACHO_LIST_H_ */ diff --git a/EXTERNAL_HEADERS/mach-o/ppc/reloc.h b/EXTERNAL_HEADERS/mach-o/ppc/reloc.h new file mode 100644 index 000000000..7b564cc0a --- /dev/null +++ b/EXTERNAL_HEADERS/mach-o/ppc/reloc.h @@ -0,0 +1,65 @@ +/* + * Copyright (c) 1999 Apple Computer, Inc. All rights reserved. + * + * @APPLE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this + * file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_LICENSE_HEADER_END@ + */ +/* + * Relocation types used in the ppc implementation. Relocation entries for + * things other than instructions use the same generic relocation as discribed + * above and their r_type is RELOC_VANILLA. The rest of the relocation types + * are for instructions. Since they are for instructions the r_address field + * indicates the 32 bit instruction that the relocation is to be preformed on. + * The fields r_pcrel and r_length are ignored for non-RELOC_VANILLA r_types + * except for PPC_RELOC_BR14. + * + * For PPC_RELOC_BR14 if the r_length is the unused value 3, then the branch was + * statically predicted setting or clearing the Y-bit based on the sign of the + * displacement or the opcode. If this is the case the static linker must flip + * the value of the Y-bit if the sign of the displacement changes for non-branch + * always conditions. + */ +enum reloc_type_ppc +{ + PPC_RELOC_VANILLA, /* generic relocation as discribed above */ + PPC_RELOC_PAIR, /* the second relocation entry of a pair */ + PPC_RELOC_BR14, /* 14 bit branch displacement (to a word address) */ + PPC_RELOC_BR24, /* 24 bit branch displacement (to a word address) */ + PPC_RELOC_HI16, /* a PAIR follows with the low half */ + PPC_RELOC_LO16, /* a PAIR follows with the high half */ + PPC_RELOC_HA16, /* Same as the RELOC_HI16 except the low 16 bits and the + * high 16 bits are added together with the low 16 bits + * sign extened first. This means if bit 15 of the low + * 16 bits is set the high 16 bits stored in the + * instruction will be adjusted. + */ + PPC_RELOC_LO14, /* Same as the LO16 except that the low 2 bits are not + * stored in the instruction and are always zero. This + * is used in double word load/store instructions. + */ + PPC_RELOC_SECTDIFF, /* a PAIR follows with subtract symbol value */ + PPC_RELOC_PB_LA_PTR,/* prebound lazy pointer */ + PPC_RELOC_HI16_SECTDIFF, /* section difference forms of above. a PAIR */ + PPC_RELOC_LO16_SECTDIFF, /* follows these with subtract symbol value */ + PPC_RELOC_HA16_SECTDIFF, + PPC_RELOC_JBSR, + PPC_RELOC_LO14_SECTDIFF, + PPC_RELOC_LOCAL_SECTDIFF /* like PPC_RELOC_SECTDIFF, but the symbol + referenced was local. */ +}; diff --git a/EXTERNAL_HEADERS/mach-o/reloc.h b/EXTERNAL_HEADERS/mach-o/reloc.h index 172a5b523..e36f4f734 100644 --- a/EXTERNAL_HEADERS/mach-o/reloc.h +++ b/EXTERNAL_HEADERS/mach-o/reloc.h @@ -1,19 +1,14 @@ /* * Copyright (c) 1999 Apple Computer, Inc. All rights reserved. * - * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ + * @APPLE_LICENSE_HEADER_START@ * * This file contains Original Code and/or Modifications of Original Code * as defined in and that are subject to the Apple Public Source License * Version 2.0 (the 'License'). You may not use this file except in - * compliance with the License. The rights granted to you under the License - * may not be used to create, or enable the creation or redistribution of, - * unlawful or unlicensed copies of an Apple operating system, or to - * circumvent, violate, or enable the circumvention or violation of, any - * terms of an Apple operating system software license agreement. - * - * Please obtain a copy of the License at - * http://www.opensource.apple.com/apsl/ and read it before using this file. + * compliance with the License. Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this + * file. * * The Original Code and all software distributed under the License are * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER @@ -23,7 +18,7 @@ * Please see the License for the specific language governing rights and * limitations under the License. * - * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ + * @APPLE_LICENSE_HEADER_END@ */ /* $NetBSD: exec.h,v 1.6 1994/10/27 04:16:05 cgd Exp $ */ @@ -56,6 +51,7 @@ #ifndef _MACHO_RELOC_H_ #define _MACHO_RELOC_H_ +#include /* * Format of a relocation entry of a Mach-O file. Modified from the 4.3BSD @@ -66,12 +62,12 @@ * Also the last 4 bits have had the r_type tag added to them. */ struct relocation_info { - long r_address; /* offset in the section to what is being + int32_t r_address; /* offset in the section to what is being relocated */ - unsigned int r_symbolnum:24, /* symbol index if r_extern == 1 or section + uint32_t r_symbolnum:24, /* symbol index if r_extern == 1 or section ordinal if r_extern == 0 */ r_pcrel:1, /* was relocated pc relative already */ - r_length:2, /* 0=byte, 1=word, 2=long */ + r_length:2, /* 0=byte, 1=word, 2=long, 3=quad */ r_extern:1, /* does not include value of sym referenced */ r_type:4; /* if not 0, machine specific relocation type */ }; @@ -154,31 +150,31 @@ struct relocation_info { stucture */ struct scattered_relocation_info { #ifdef __BIG_ENDIAN__ - unsigned int r_scattered:1, /* 1=scattered, 0=non-scattered (see above) */ + uint32_t r_scattered:1, /* 1=scattered, 0=non-scattered (see above) */ r_pcrel:1, /* was relocated pc relative already */ - r_length:2, /* 0=byte, 1=word, 2=long */ + r_length:2, /* 0=byte, 1=word, 2=long, 3=quad */ r_type:4, /* if not 0, machine specific relocation type */ r_address:24; /* offset in the section to what is being relocated */ - long r_value; /* the value the item to be relocated is + int32_t r_value; /* the value the item to be relocated is refering to (without any offset added) */ #endif /* __BIG_ENDIAN__ */ #ifdef __LITTLE_ENDIAN__ - unsigned int + uint32_t r_address:24, /* offset in the section to what is being relocated */ r_type:4, /* if not 0, machine specific relocation type */ - r_length:2, /* 0=byte, 1=word, 2=long */ + r_length:2, /* 0=byte, 1=word, 2=long, 3=quad */ r_pcrel:1, /* was relocated pc relative already */ r_scattered:1; /* 1=scattered, 0=non-scattered (see above) */ - long r_value; /* the value the item to be relocated is + int32_t r_value; /* the value the item to be relocated is refering to (without any offset added) */ #endif /* __LITTLE_ENDIAN__ */ }; /* * Relocation types used in a generic implementation. Relocation entries for - * nornal things use the generic relocation as discribed above and their r_type + * normal things use the generic relocation as discribed above and their r_type * is GENERIC_RELOC_VANILLA (a value of zero). * * Another type of generic relocation, GENERIC_RELOC_SECTDIFF, is to support @@ -197,9 +193,10 @@ struct scattered_relocation_info { enum reloc_type_generic { GENERIC_RELOC_VANILLA, /* generic relocation as discribed above */ - GENERIC_RELOC_PAIR, /* Only follows a GENRIC_RELOC_SECTDIFF */ + GENERIC_RELOC_PAIR, /* Only follows a GENERIC_RELOC_SECTDIFF */ GENERIC_RELOC_SECTDIFF, - GENERIC_RELOC_PB_LA_PTR /* prebound lazy pointer */ + GENERIC_RELOC_PB_LA_PTR, /* prebound lazy pointer */ + GENERIC_RELOC_LOCAL_SECTDIFF }; #endif /* _MACHO_RELOC_H_ */ diff --git a/EXTERNAL_HEADERS/mach-o/stab.h b/EXTERNAL_HEADERS/mach-o/stab.h new file mode 100644 index 000000000..e9e15b27a --- /dev/null +++ b/EXTERNAL_HEADERS/mach-o/stab.h @@ -0,0 +1,122 @@ +/* + * Copyright (c) 1999 Apple Computer, Inc. All rights reserved. + * + * @APPLE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this + * file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_LICENSE_HEADER_END@ + */ +#ifndef _MACHO_STAB_H_ +#define _MACHO_STAB_H_ +/* $NetBSD: stab.h,v 1.4 1994/10/26 00:56:25 cgd Exp $ */ + +/*- + * Copyright (c) 1991 The Regents of the University of California. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)stab.h 5.2 (Berkeley) 4/4/91 + */ + +/* + * This file gives definitions supplementing for permanent symbol + * table entries of Mach-O files. Modified from the BSD definitions. The + * modifications from the original definitions were changing what the values of + * what was the n_other field (an unused field) which is now the n_sect field. + * These modifications are required to support symbols in an arbitrary number of + * sections not just the three sections (text, data and bss) in a BSD file. + * The values of the defined constants have NOT been changed. + * + * These must have one of the N_STAB bits on. The n_value fields are subject + * to relocation according to the value of their n_sect field. So for types + * that refer to things in sections the n_sect field must be filled in with the + * proper section ordinal. For types that are not to have their n_value field + * relocatated the n_sect field must be NO_SECT. + */ + +/* + * Symbolic debugger symbols. The comments give the conventional use for + * + * .stabs "n_name", n_type, n_sect, n_desc, n_value + * + * where n_type is the defined constant and not listed in the comment. Other + * fields not listed are zero. n_sect is the section ordinal the entry is + * refering to. + */ +#define N_GSYM 0x20 /* global symbol: name,,NO_SECT,type,0 */ +#define N_FNAME 0x22 /* procedure name (f77 kludge): name,,NO_SECT,0,0 */ +#define N_FUN 0x24 /* procedure: name,,n_sect,linenumber,address */ +#define N_STSYM 0x26 /* static symbol: name,,n_sect,type,address */ +#define N_LCSYM 0x28 /* .lcomm symbol: name,,n_sect,type,address */ +#define N_BNSYM 0x2e /* begin nsect sym: 0,,n_sect,0,address */ +#define N_OPT 0x3c /* emitted with gcc2_compiled and in gcc source */ +#define N_RSYM 0x40 /* register sym: name,,NO_SECT,type,register */ +#define N_SLINE 0x44 /* src line: 0,,n_sect,linenumber,address */ +#define N_ENSYM 0x4e /* end nsect sym: 0,,n_sect,0,address */ +#define N_SSYM 0x60 /* structure elt: name,,NO_SECT,type,struct_offset */ +#define N_SO 0x64 /* source file name: name,,n_sect,0,address */ +#define N_OSO 0x66 /* object file name: name,,0,0,st_mtime */ +#define N_LSYM 0x80 /* local sym: name,,NO_SECT,type,offset */ +#define N_BINCL 0x82 /* include file beginning: name,,NO_SECT,0,sum */ +#define N_SOL 0x84 /* #included file name: name,,n_sect,0,address */ +#define N_PARAMS 0x86 /* compiler parameters: name,,NO_SECT,0,0 */ +#define N_VERSION 0x88 /* compiler version: name,,NO_SECT,0,0 */ +#define N_OLEVEL 0x8A /* compiler -O level: name,,NO_SECT,0,0 */ +#define N_PSYM 0xa0 /* parameter: name,,NO_SECT,type,offset */ +#define N_EINCL 0xa2 /* include file end: name,,NO_SECT,0,0 */ +#define N_ENTRY 0xa4 /* alternate entry: name,,n_sect,linenumber,address */ +#define N_LBRAC 0xc0 /* left bracket: 0,,NO_SECT,nesting level,address */ +#define N_EXCL 0xc2 /* deleted include file: name,,NO_SECT,0,sum */ +#define N_RBRAC 0xe0 /* right bracket: 0,,NO_SECT,nesting level,address */ +#define N_BCOMM 0xe2 /* begin common: name,,NO_SECT,0,0 */ +#define N_ECOMM 0xe4 /* end common: name,,n_sect,0,0 */ +#define N_ECOML 0xe8 /* end common (local name): 0,,n_sect,0,address */ +#define N_LENG 0xfe /* second stab entry with length information */ + +/* + * for the berkeley pascal compiler, pc(1): + */ +#define N_PC 0x30 /* global pascal symbol: name,,NO_SECT,subtype,line */ + +#endif /* _MACHO_STAB_H_ */ diff --git a/EXTERNAL_HEADERS/mach-o/x86_64/reloc.h b/EXTERNAL_HEADERS/mach-o/x86_64/reloc.h new file mode 100644 index 000000000..74edf082c --- /dev/null +++ b/EXTERNAL_HEADERS/mach-o/x86_64/reloc.h @@ -0,0 +1,174 @@ +/* + * Copyright (c) 2006 Apple Computer, Inc. All rights reserved. + * + * @APPLE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this + * file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_LICENSE_HEADER_END@ + */ +/* + * Relocations for x86_64 are a bit different than for other architectures in + * Mach-O: Scattered relocations are not used. Almost all relocations produced + * by the compiler are external relocations. An external relocation has the + * r_extern bit set to 1 and the r_symbolnum field contains the symbol table + * index of the target label. + * + * When the assembler is generating relocations, if the target label is a local + * label (begins with 'L'), then the previous non-local label in the same + * section is used as the target of the external relocation. An addend is used + * with the distance from that non-local label to the target label. Only when + * there is no previous non-local label in the section is an internal + * relocation used. + * + * The addend (i.e. the 4 in _foo+4) is encoded in the instruction (Mach-O does + * not have RELA relocations). For PC-relative relocations, the addend is + * stored directly in the instruction. This is different from other Mach-O + * architectures, which encode the addend minus the current section offset. + * + * The relocation types are: + * + * X86_64_RELOC_UNSIGNED // for absolute addresses + * X86_64_RELOC_SIGNED // for signed 32-bit displacement + * X86_64_RELOC_BRANCH // a CALL/JMP instruction with 32-bit displacement + * X86_64_RELOC_GOT_LOAD // a MOVQ load of a GOT entry + * X86_64_RELOC_GOT // other GOT references + * X86_64_RELOC_SUBTRACTOR // must be followed by a X86_64_RELOC_UNSIGNED + * + * The following are sample assembly instructions, followed by the relocation + * and section content they generate in an object file: + * + * call _foo + * r_type=X86_64_RELOC_BRANCH, r_length=2, r_extern=1, r_pcrel=1, r_symbolnum=_foo + * E8 00 00 00 00 + * + * call _foo+4 + * r_type=X86_64_RELOC_BRANCH, r_length=2, r_extern=1, r_pcrel=1, r_symbolnum=_foo + * E8 04 00 00 00 + * + * movq _foo@GOTPCREL(%rip), %rax + * r_type=X86_64_RELOC_GOT_LOAD, r_length=2, r_extern=1, r_pcrel=1, r_symbolnum=_foo + * 48 8B 05 00 00 00 00 + * + * pushq _foo@GOTPCREL(%rip) + * r_type=X86_64_RELOC_GOT, r_length=2, r_extern=1, r_pcrel=1, r_symbolnum=_foo + * FF 35 00 00 00 00 + * + * movl _foo(%rip), %eax + * r_type=X86_64_RELOC_SIGNED, r_length=2, r_extern=1, r_pcrel=1, r_symbolnum=_foo + * 8B 05 00 00 00 00 + * + * movl _foo+4(%rip), %eax + * r_type=X86_64_RELOC_SIGNED, r_length=2, r_extern=1, r_pcrel=1, r_symbolnum=_foo + * 8B 05 04 00 00 00 + * + * movb $0x12, _foo(%rip) + * r_type=X86_64_RELOC_SIGNED, r_length=2, r_extern=1, r_pcrel=1, r_symbolnum=_foo + * C6 05 FF FF FF FF 12 + * + * movl $0x12345678, _foo(%rip) + * r_type=X86_64_RELOC_SIGNED, r_length=2, r_extern=1, r_pcrel=1, r_symbolnum=_foo + * C7 05 FC FF FF FF 78 56 34 12 + * + * .quad _foo + * r_type=X86_64_RELOC_UNSIGNED, r_length=3, r_extern=1, r_pcrel=0, r_symbolnum=_foo + * 00 00 00 00 00 00 00 00 + * + * .quad _foo+4 + * r_type=X86_64_RELOC_UNSIGNED, r_length=3, r_extern=1, r_pcrel=0, r_symbolnum=_foo + * 04 00 00 00 00 00 00 00 + * + * .quad _foo - _bar + * r_type=X86_64_RELOC_SUBTRACTOR, r_length=3, r_extern=1, r_pcrel=0, r_symbolnum=_bar + * r_type=X86_64_RELOC_UNSIGNED, r_length=3, r_extern=1, r_pcrel=0, r_symbolnum=_foo + * 00 00 00 00 00 00 00 00 + * + * .quad _foo - _bar + 4 + * r_type=X86_64_RELOC_SUBTRACTOR, r_length=3, r_extern=1, r_pcrel=0, r_symbolnum=_bar + * r_type=X86_64_RELOC_UNSIGNED, r_length=3, r_extern=1, r_pcrel=0, r_symbolnum=_foo + * 04 00 00 00 00 00 00 00 + * + * .long _foo - _bar + * r_type=X86_64_RELOC_SUBTRACTOR, r_length=2, r_extern=1, r_pcrel=0, r_symbolnum=_bar + * r_type=X86_64_RELOC_UNSIGNED, r_length=2, r_extern=1, r_pcrel=0, r_symbolnum=_foo + * 00 00 00 00 + * + * lea L1(%rip), %rax + * r_type=X86_64_RELOC_SIGNED, r_length=2, r_extern=1, r_pcrel=1, r_symbolnum=_prev + * 48 8d 05 12 00 00 00 + * // assumes _prev is the first non-local label 0x12 bytes before L1 + * + * lea L0(%rip), %rax + * r_type=X86_64_RELOC_SIGNED, r_length=2, r_extern=0, r_pcrel=1, r_symbolnum=3 + * 48 8d 05 56 00 00 00 + * // assumes L0 is in third section, has an address of 0x00000056 in .o + * // file, and there is no previous non-local label + * + * .quad L1 + * r_type=X86_64_RELOC_UNSIGNED, r_length=3, r_extern=1, r_pcrel=0, r_symbolnum=_prev + * 12 00 00 00 00 00 00 00 + * // assumes _prev is the first non-local label 0x12 bytes before L1 + * + * .quad L0 + * r_type=X86_64_RELOC_UNSIGNED, r_length=3, r_extern=0, r_pcrel=0, r_symbolnum=3 + * 56 00 00 00 00 00 00 00 + * // assumes L0 is in third section, has an address of 0x00000056 in .o + * // file, and there is no previous non-local label + * + * .quad _foo - . + * r_type=X86_64_RELOC_SUBTRACTOR, r_length=3, r_extern=1, r_pcrel=0, r_symbolnum=_prev + * r_type=X86_64_RELOC_UNSIGNED, r_length=3, r_extern=1, r_pcrel=0, r_symbolnum=_foo + * EE FF FF FF FF FF FF FF + * // assumes _prev is the first non-local label 0x12 bytes before this + * // .quad + * + * .quad _foo - L1 + * r_type=X86_64_RELOC_SUBTRACTOR, r_length=3, r_extern=1, r_pcrel=0, r_symbolnum=_prev + * r_type=X86_64_RELOC_UNSIGNED, r_length=3, r_extern=1, r_pcrel=0, r_symbolnum=_foo + * EE FF FF FF FF FF FF FF + * // assumes _prev is the first non-local label 0x12 bytes before L1 + * + * .quad L1 - _prev + * // No relocations. This is an assembly time constant. + * 12 00 00 00 00 00 00 00 + * // assumes _prev is the first non-local label 0x12 bytes before L1 + * + * + * + * In final linked images, there are only two valid relocation kinds: + * + * r_type=X86_64_RELOC_UNSIGNED, r_length=3, r_pcrel=0, r_extern=1, r_symbolnum=sym_index + * This tells dyld to add the address of a symbol to a pointer sized (8-byte) + * piece of data (i.e on disk the 8-byte piece of data contains the addend). The + * r_symbolnum contains the index into the symbol table of the target symbol. + * + * r_type=X86_64_RELOC_UNSIGNED, r_length=3, r_pcrel=0, r_extern=0, r_symbolnum=0 + * This tells dyld to adjust the pointer sized (8-byte) piece of data by the amount + * the containing image was loaded from its base address (e.g. slide). + * + */ +enum reloc_type_x86_64 +{ + X86_64_RELOC_UNSIGNED, // for absolute addresses + X86_64_RELOC_SIGNED, // for signed 32-bit displacement + X86_64_RELOC_BRANCH, // a CALL/JMP instruction with 32-bit displacement + X86_64_RELOC_GOT_LOAD, // a MOVQ load of a GOT entry + X86_64_RELOC_GOT, // other GOT references + X86_64_RELOC_SUBTRACTOR, // must be followed by a X86_64_RELOC_UNSIGNED + X86_64_RELOC_SIGNED_1, // for signed 32-bit displacement with a -1 addend + X86_64_RELOC_SIGNED_2, // for signed 32-bit displacement with a -2 addend + X86_64_RELOC_SIGNED_4, // for signed 32-bit displacement with a -4 addend +}; diff --git a/EXTERNAL_HEADERS/machine/Makefile b/EXTERNAL_HEADERS/machine/Makefile deleted file mode 100644 index e6aee2489..000000000 --- a/EXTERNAL_HEADERS/machine/Makefile +++ /dev/null @@ -1,32 +0,0 @@ -export MakeInc_cmd=${SRCROOT}/makedefs/MakeInc.cmd -export MakeInc_def=${SRCROOT}/makedefs/MakeInc.def -export MakeInc_rule=${SRCROOT}/makedefs/MakeInc.rule -export MakeInc_dir=${SRCROOT}/makedefs/MakeInc.dir - - -include $(MakeInc_cmd) -include $(MakeInc_def) - -INSTINC_SUBDIRS = - -INSTINC_SUBDIRS_PPC = - -INSTINC_SUBDIRS_I386 = - -INSTINC_SUBDIRS_ARM = - -EXPORT_FILES = \ - limits.h - -INSTALL_MI_LIST = - -INSTALL_MI_DIR = . - -EXPORT_MI_LIST = ${EXPORT_FILES} - -EXPORT_MI_DIR = machine - -include $(MakeInc_rule) -include $(MakeInc_dir) - - diff --git a/EXTERNAL_HEADERS/machine/limits.h b/EXTERNAL_HEADERS/machine/limits.h deleted file mode 100644 index 4ab4a836d..000000000 --- a/EXTERNAL_HEADERS/machine/limits.h +++ /dev/null @@ -1,130 +0,0 @@ -/* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ - * - * This file contains Original Code and/or Modifications of Original Code - * as defined in and that are subject to the Apple Public Source License - * Version 2.0 (the 'License'). You may not use this file except in - * compliance with the License. The rights granted to you under the License - * may not be used to create, or enable the creation or redistribution of, - * unlawful or unlicensed copies of an Apple operating system, or to - * circumvent, violate, or enable the circumvention or violation of, any - * terms of an Apple operating system software license agreement. - * - * Please obtain a copy of the License at - * http://www.opensource.apple.com/apsl/ and read it before using this file. - * - * The Original Code and all software distributed under the License are - * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER - * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, - * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. - * Please see the License for the specific language governing rights and - * limitations under the License. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ - */ -#if !defined (_LIMITS_H___) && !defined (_MACH_MACHLIMITS_H_) -#if defined (__ppc__) -#include -#elif defined (__i386__) -#include -#else -#error architecture not supported -#endif -#undef MB_LEN_MAX -#endif -#ifndef _LIMITS_H___ -#ifndef _MACH_MACHLIMITS_H_ - -/* _MACH_MACHLIMITS_H_ is used on OSF/1. */ -#define _LIMITS_H___ -#define _MACH_MACHLIMITS_H_ - -/* Number of bits in a `char'. */ -#undef CHAR_BIT -#define CHAR_BIT 8 - -/* Maximum length of a multibyte character. */ -#ifndef MB_LEN_MAX -#define MB_LEN_MAX 1 -#endif - -/* Minimum and maximum values a `signed char' can hold. */ -#undef SCHAR_MIN -#define SCHAR_MIN (-128) -#undef SCHAR_MAX -#define SCHAR_MAX 127 - -/* Maximum value an `unsigned char' can hold. (Minimum is 0). */ -#undef UCHAR_MAX -#define UCHAR_MAX 255 - -/* Minimum and maximum values a `char' can hold. */ -#ifdef __CHAR_UNSIGNED__ -#undef CHAR_MIN -#define CHAR_MIN 0 -#undef CHAR_MAX -#define CHAR_MAX 255 -#else -#undef CHAR_MIN -#define CHAR_MIN (-128) -#undef CHAR_MAX -#define CHAR_MAX 127 -#endif - -/* Minimum and maximum values a `signed short int' can hold. */ -#undef SHRT_MIN -#define SHRT_MIN (-32768) -#undef SHRT_MAX -#define SHRT_MAX 32767 - -/* Maximum value an `unsigned short int' can hold. (Minimum is 0). */ -#undef USHRT_MAX -#define USHRT_MAX 65535 - -/* Minimum and maximum values a `signed int' can hold. */ -#ifndef __INT_MAX__ -#define __INT_MAX__ 2147483647 -#endif -#undef INT_MIN -#define INT_MIN (-INT_MAX-1) -#undef INT_MAX -#define INT_MAX __INT_MAX__ - -/* Maximum value an `unsigned int' can hold. (Minimum is 0). */ -#undef UINT_MAX -#define UINT_MAX (INT_MAX * 2U + 1) - -/* Minimum and maximum values a `signed long int' can hold. - (Same as `int'). */ -#ifndef __LONG_MAX__ -#define __LONG_MAX__ 2147483647L -#endif -#undef LONG_MIN -#define LONG_MIN (-LONG_MAX-1) -#undef LONG_MAX -#define LONG_MAX __LONG_MAX__ - -/* Maximum value an `unsigned long int' can hold. (Minimum is 0). */ -#undef ULONG_MAX -#define ULONG_MAX (LONG_MAX * 2UL + 1) - -#if defined (__GNU_LIBRARY__) ? defined (__USE_GNU) : !defined (__STRICT_ANSI__) -/* Minimum and maximum values a `signed long long int' can hold. */ -#ifndef __LONG_LONG_MAX__ -#define __LONG_LONG_MAX__ 9223372036854775807LL -#endif -#undef LONG_LONG_MIN -#define LONG_LONG_MIN (-LONG_LONG_MAX-1) -#undef LONG_LONG_MAX -#define LONG_LONG_MAX __LONG_LONG_MAX__ - -/* Maximum value an `unsigned long long int' can hold. (Minimum is 0). */ -#undef ULONG_LONG_MAX -#define ULONG_LONG_MAX (LONG_LONG_MAX * 2ULL + 1) -#endif - -#endif /* _MACH_MACHLIMITS_H_ */ -#endif /* _LIMITS_H___ */ diff --git a/Makefile b/Makefile index c24af4cb4..57c8a4c88 100644 --- a/Makefile +++ b/Makefile @@ -5,13 +5,13 @@ ifndef SRCROOT export SRCROOT=$(shell /bin/pwd) endif ifndef OBJROOT -export OBJROOT=$(SRCROOT)/BUILD/obj +export OBJROOT=$(SRCROOT)/BUILD/obj/ endif ifndef DSTROOT -export DSTROOT=$(SRCROOT)/BUILD/dst +export DSTROOT=$(SRCROOT)/BUILD/dst/ endif ifndef SYMROOT -export SYMROOT=$(SRCROOT)/BUILD/sym +export SYMROOT=$(SRCROOT)/BUILD/sym/ endif export MakeInc_cmd=${VERSDIR}/makedefs/MakeInc.cmd @@ -32,7 +32,13 @@ ALL_SUBDIRS = \ libsa \ security -CONFIG_SUBDIRS = config +CONFIG_SUBDIRS_PPC = config + +CONFIG_SUBDIRS_I386 = config + +CONFIG_SUBDIRS_X86_64 = config + +CONFIG_SUBDIRS_ARM = config INSTINC_SUBDIRS = $(ALL_SUBDIRS) EXTERNAL_HEADERS @@ -40,6 +46,8 @@ INSTINC_SUBDIRS_PPC = $(INSTINC_SUBDIRS) EXTERNAL_HEADERS INSTINC_SUBDIRS_I386 = $(INSTINC_SUBDIRS) EXTERNAL_HEADERS +INSTINC_SUBDIRS_X86_64 = $(INSTINC_SUBDIRS) EXTERNAL_HEADERS + INSTINC_SUBDIRS_ARM = $(INSTINC_SUBDIRS) EXTERNAL_HEADERS EXPINC_SUBDIRS = $(ALL_SUBDIRS) @@ -48,10 +56,17 @@ EXPINC_SUBDIRS_PPC = $(EXPINC_SUBDIRS) EXPINC_SUBDIRS_I386 = $(EXPINC_SUBDIRS) +EXPINC_SUBDIRS_X86_64 = $(EXPINC_SUBDIRS) + EXPINC_SUBDIRS_ARM = $(EXPINC_SUBDIRS) -COMP_SUBDIRS = $(ALL_SUBDIRS) +COMP_SUBDIRS_PPC = $(ALL_SUBDIRS) + +COMP_SUBDIRS_I386 = $(ALL_SUBDIRS) + +COMP_SUBDIRS_X86_64 = $(ALL_SUBDIRS) +COMP_SUBDIRS_ARM = $(ALL_SUBDIRS) INST_SUBDIRS = \ libkern \ diff --git a/README b/README index 10f6d9084..2040c2cee 100644 --- a/README +++ b/README @@ -22,13 +22,13 @@ A. How to build XNU: Examples: /* make a debug kernel for H1 arm board */ - make TARGET_CONFIGS="debug arm s5l8900x" + make TARGET_CONFIGS="debug arm s5l8900x" SDKROOT=/path/to/SDK $(OBJROOT)/DEBUG_ARM_S5L8900X/osfmk/DEBUG/osfmk.o: pre-linked object for osfmk component $(OBJROOT)/DEBUG_ARM_S5L8900X/mach_kernel: bootable image /* make debug and development kernels for H1 arm board */ - make TARGET_CONFIGS="debug arm s5l8900x development arm s5l8900x" + make TARGET_CONFIGS="debug arm s5l8900x development arm s5l8900x" SDKROOT=/path/to/SDK $(OBJROOT)/DEBUG_ARM_S5L8900X/osfmk/DEBUG/osfmk.o: pre-linked object for osfmk component $(OBJROOT)/DEBUG_ARM_S5L8900X/mach_kernel: bootable image @@ -36,9 +36,9 @@ A. How to build XNU: $(OBJROOT)/DEVELOPMENT_ARM_S5L8900X/mach_kernel: bootable image /* this is all you need to do to build H1 arm with DEVELOPMENT kernel configuration */ - make TARGET_CONFIGS="default arm default" + make TARGET_CONFIGS="default arm default" SDKROOT=/path/to/SDK - or the following is equivalent + or the following is equivalent (ommitted SDKROOT will use /) make ARCH_CONFIGS=ARM @@ -90,6 +90,7 @@ A. How to build XNU: or $ export TARGET_CONFIGS="DEBUG ARM MX31ADS" + $ export SDKROOT=/path/to/SDK $ make all Example: @@ -150,10 +151,16 @@ A. How to build XNU: $ make cscope # this will build cscope database +9) Other makefile options + + $ make MAKEJOBS=-j8 # this will use 8 processes during the build. The default is 2x the number of active cores + + $ make -w # trace recursive make invocations. Useful in combination with VERBOSE=YES + ============================================= B. How to install a new header file from XNU -[Note: This does not covers installing header file in IOKit framework] +[Note: This does not cover installing header files in IOKit framework] 1) XNU installs header files at the following locations - a. $(DSTROOT)/System/Library/Frameworks/Kernel.framework/Headers @@ -196,7 +203,7 @@ B. How to install a new header file from XNU 3) The Makefile combines the file lists mentioned above into different install lists which are used by build system to install the header files. - If the install list that you are interested does not exists, create it + If the install list that you are interested does not exist, create it by adding the appropriate file lists. The default install lists, its member file lists and their default location are described below - diff --git a/bsd/Makefile b/bsd/Makefile index 87836eda0..d4df2fc62 100644 --- a/bsd/Makefile +++ b/bsd/Makefile @@ -12,7 +12,6 @@ INSTINC_SUBDIRS = \ crypto \ dev \ hfs \ - isofs \ libkern \ machine \ miscfs \ @@ -22,8 +21,8 @@ INSTINC_SUBDIRS = \ netinet6 \ netkey \ nfs \ + security \ sys \ - ufs \ uuid \ vfs @@ -33,6 +32,9 @@ INSTINC_SUBDIRS_PPC = \ INSTINC_SUBDIRS_I386 = \ i386 +INSTINC_SUBDIRS_X86_64 = \ + i386 + INSTINC_SUBDIRS_ARM = \ arm @@ -41,7 +43,6 @@ EXPINC_SUBDIRS = \ crypto \ dev \ hfs \ - isofs \ libkern \ machine \ miscfs \ @@ -51,8 +52,8 @@ EXPINC_SUBDIRS = \ netinet6 \ netkey \ nfs \ + security \ sys \ - ufs \ uuid \ vfs \ vm @@ -63,6 +64,9 @@ EXPINC_SUBDIRS_PPC = \ EXPINC_SUBDIRS_I386 = \ i386 +EXPINC_SUBDIRS_X86_64 = \ + i386 + EXPINC_SUBDIRS_ARM = \ arm diff --git a/bsd/bsm/Makefile b/bsd/bsm/Makefile index ec3b7ceb4..0bb6f4dcf 100644 --- a/bsd/bsm/Makefile +++ b/bsd/bsm/Makefile @@ -13,14 +13,19 @@ INSTINC_SUBDIRS_PPC = \ INSTINC_SUBDIRS_I386 = \ +INSTINC_SUBDIRS_X86_64 = \ + EXPINC_SUBDIRS = \ EXPINC_SUBDIRS_PPC = \ EXPINC_SUBDIRS_I386 = \ +EXPINC_SUBDIRS_X86_64 = \ + DATAFILES = \ - audit.h audit_record.h audit_kevents.h + audit.h audit_domain.h audit_errno.h audit_fcntl.h audit_internal.h \ + audit_kevents.h audit_record.h audit_socket_type.h KERNFILES = \ audit.h diff --git a/bsd/bsm/audit.h b/bsd/bsm/audit.h index 96550b3f0..bb4a9497b 100644 --- a/bsd/bsm/audit.h +++ b/bsd/bsm/audit.h @@ -1,305 +1,326 @@ -/* - * Copyright (c) 1999-2007 Apple Inc. All Rights Reserved. +/*- + * Copyright (c) 2005-2009 Apple Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of Apple Inc. ("Apple") nor the names of + * its contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY APPLE AND ITS CONTRIBUTORS "AS IS" AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL APPLE OR ITS CONTRIBUTORS BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * - * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ - * - * This file contains Original Code and/or Modifications of Original Code - * as defined in and that are subject to the Apple Public Source License - * Version 2.0 (the 'License'). You may not use this file except in - * compliance with the License. The rights granted to you under the License - * may not be used to create, or enable the creation or redistribution of, - * unlawful or unlicensed copies of an Apple operating system, or to - * circumvent, violate, or enable the circumvention or violation of, any - * terms of an Apple operating system software license agreement. - * - * Please obtain a copy of the License at - * http://www.opensource.apple.com/apsl/ and read it before using this file. - * - * The Original Code and all software distributed under the License are - * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER - * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, - * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. - * Please see the License for the specific language governing rights and - * limitations under the License. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ + * $P4: //depot/projects/trustedbsd/openbsm/sys/bsm/audit.h#10 $ */ -#ifndef _BSM_AUDIT_H +#ifndef _BSM_AUDIT_H #define _BSM_AUDIT_H -#include -#include #include -#include -#include +#include #define AUDIT_RECORD_MAGIC 0x828a0f1b -#define MAX_AUDIT_RECORDS 20 -#define MAX_AUDIT_RECORD_SIZE 4096 -#define MIN_AUDIT_FILE_SIZE 512 * 1024 +#define MAX_AUDIT_RECORDS 20 +#define MAXAUDITDATA (0x8000 - 1) +#define MAX_AUDIT_RECORD_SIZE MAXAUDITDATA +#define MIN_AUDIT_FILE_SIZE (512 * 1024) -/* - * Triggers for the audit daemon +/* + * Minimum noumber of free blocks on the filesystem containing the audit + * log necessary to avoid a hard log rotation. DO NOT SET THIS VALUE TO 0 + * as the kernel does an unsigned compare, plus we want to leave a few blocks + * free so userspace can terminate the log, etc. */ -#define AUDIT_TRIGGER_LOW_SPACE 1 -#define AUDIT_TRIGGER_FILE_FULL 2 +#define AUDIT_HARD_LIMIT_FREE_BLOCKS 4 /* - * Pre-defined audit IDs + * Triggers for the audit daemon. + */ +#define AUDIT_TRIGGER_MIN 1 +#define AUDIT_TRIGGER_LOW_SPACE 1 /* Below low watermark. */ +#define AUDIT_TRIGGER_ROTATE_KERNEL 2 /* Kernel requests rotate. */ +#define AUDIT_TRIGGER_READ_FILE 3 /* Re-read config file. */ +#define AUDIT_TRIGGER_CLOSE_AND_DIE 4 /* Terminate audit. */ +#define AUDIT_TRIGGER_NO_SPACE 5 /* Below min free space. */ +#define AUDIT_TRIGGER_ROTATE_USER 6 /* User requests rotate. */ +#define AUDIT_TRIGGER_INITIALIZE 7 /* User initialize of auditd. */ +#define AUDIT_TRIGGER_EXPIRE_TRAILS 8 /* User expiration of trails. */ +#define AUDIT_TRIGGER_MAX 8 + +/* + * The special device filename (FreeBSD). */ -#define AU_DEFAUDITID ((uid_t)-1) +#define AUDITDEV_FILENAME "audit" +#define AUDIT_TRIGGER_FILE ("/dev/" AUDITDEV_FILENAME) /* - * Define the masks for the classes of audit events. + * Pre-defined audit IDs */ -#define AU_NULL 0x00000000 -#define AU_FREAD 0x00000001 -#define AU_FWRITE 0x00000002 -#define AU_FACCESS 0x00000004 -#define AU_FMODIFY 0x00000008 -#define AU_FCREATE 0x00000010 -#define AU_FDELETE 0x00000020 -#define AU_CLOSE 0x00000040 -#define AU_PROCESS 0x00000080 -#define AU_NET 0x00000100 -#define AU_IPC 0x00000200 -#define AU_NONAT 0x00000400 -#define AU_ADMIN 0x00000800 -#define AU_LOGIN 0x00001000 -#define AU_TFM 0x00002000 -#define AU_APPL 0x00004000 -#define AU_SETL 0x00008000 -#define AU_IFLOAT 0x00010000 -#define AU_PRIV 0x00020000 -#define AU_MAC_RW 0x00040000 -#define AU_XCONN 0x00080000 -#define AU_XCREATE 0x00100000 -#define AU_XDELETE 0x00200000 -#define AU_XIFLOAT 0x00400000 -#define AU_XPRIVS 0x00800000 -#define AU_XPRIVF 0x01000000 -#define AU_XMOVE 0x02000000 -#define AU_XDACF 0x04000000 -#define AU_XMACF 0x08000000 -#define AU_XSECATTR 0x10000000 -#define AU_IOCTL 0x20000000 -#define AU_EXEC 0x40000000 -#define AU_OTHER 0x80000000 -#define AU_ALL 0xffffffff +#define AU_DEFAUDITID (uid_t)(-1) +#define AU_DEFAUDITSID 0 +#define AU_ASSIGN_ASID -1 /* - * IPC types + * IPC types. */ -#define AT_IPC_MSG ((u_char)1) /* message IPC id */ -#define AT_IPC_SEM ((u_char)2) /* semaphore IPC id */ -#define AT_IPC_SHM ((u_char)3) /* shared mem IPC id */ +#define AT_IPC_MSG ((u_char)1) /* Message IPC id. */ +#define AT_IPC_SEM ((u_char)2) /* Semaphore IPC id. */ +#define AT_IPC_SHM ((u_char)3) /* Shared mem IPC id. */ /* * Audit conditions. */ -#define AUC_UNSET 0 -#define AUC_AUDITING 1 -#define AUC_NOAUDIT 2 -#define AUC_DISABLED -1 +#define AUC_UNSET 0 +#define AUC_AUDITING 1 +#define AUC_NOAUDIT 2 +#define AUC_DISABLED -1 /* * auditon(2) commands. */ -#define A_GETPOLICY 2 -#define A_SETPOLICY 3 -#define A_GETKMASK 4 -#define A_SETKMASK 5 -#define A_GETQCTRL 6 -#define A_SETQCTRL 7 -#define A_GETCWD 8 -#define A_GETCAR 9 -#define A_GETSTAT 12 -#define A_SETSTAT 13 +#define A_OLDGETPOLICY 2 +#define A_OLDSETPOLICY 3 +#define A_GETKMASK 4 +#define A_SETKMASK 5 +#define A_OLDGETQCTRL 6 +#define A_OLDSETQCTRL 7 +#define A_GETCWD 8 +#define A_GETCAR 9 +#define A_GETSTAT 12 +#define A_SETSTAT 13 #define A_SETUMASK 14 -#define A_SETSMASK 15 -#define A_GETCOND 20 -#define A_SETCOND 21 -#define A_GETCLASS 22 -#define A_SETCLASS 23 -#define A_GETPINFO 24 -#define A_SETPMASK 25 -#define A_SETFSIZE 26 -#define A_GETFSIZE 27 -#define A_GETPINFO_ADDR 28 -#define A_GETKAUDIT 29 -#define A_SETKAUDIT 30 +#define A_SETSMASK 15 +#define A_OLDGETCOND 20 +#define A_OLDSETCOND 21 +#define A_GETCLASS 22 +#define A_SETCLASS 23 +#define A_GETPINFO 24 +#define A_SETPMASK 25 +#define A_SETFSIZE 26 +#define A_GETFSIZE 27 +#define A_GETPINFO_ADDR 28 +#define A_GETKAUDIT 29 +#define A_SETKAUDIT 30 +#define A_SENDTRIGGER 31 +#define A_GETSINFO_ADDR 32 +#define A_GETPOLICY 33 +#define A_SETPOLICY 34 +#define A_GETQCTRL 35 +#define A_SETQCTRL 36 +#define A_GETCOND 37 +#define A_SETCOND 38 /* * Audit policy controls. */ -#define AUDIT_CNT 0x0001 -#define AUDIT_AHLT 0x0002 -#define AUDIT_ARGV 0x0004 -#define AUDIT_ARGE 0x0008 -#define AUDIT_PASSWD 0x0010 -#define AUDIT_SEQ 0x0020 -#define AUDIT_WINDATA 0x0040 -#define AUDIT_USER 0x0080 -#define AUDIT_GROUP 0x0100 -#define AUDIT_TRAIL 0x0200 -#define AUDIT_PATH 0x0400 +#define AUDIT_CNT 0x0001 +#define AUDIT_AHLT 0x0002 +#define AUDIT_ARGV 0x0004 +#define AUDIT_ARGE 0x0008 +#define AUDIT_SEQ 0x0010 +#define AUDIT_WINDATA 0x0020 +#define AUDIT_USER 0x0040 +#define AUDIT_GROUP 0x0080 +#define AUDIT_TRAIL 0x0100 +#define AUDIT_PATH 0x0200 +#define AUDIT_SCNT 0x0400 +#define AUDIT_PUBLIC 0x0800 +#define AUDIT_ZONENAME 0x1000 +#define AUDIT_PERZONE 0x2000 + +/* + * Default audit queue control parameters. + */ +#define AQ_HIWATER 100 +#define AQ_MAXHIGH 10000 +#define AQ_LOWATER 10 +#define AQ_BUFSZ MAXAUDITDATA +#define AQ_MAXBUFSZ 1048576 /* - * Audit queue control parameters + * Default minimum percentage free space on file system. */ -#define AQ_HIWATER 100 -#define AQ_MAXHIGH 10000 -#define AQ_LOWATER 10 -#define AQ_BUFSZ 1024 -#define AQ_MAXBUFSZ 1048576 +#define AU_FS_MINFREE 20 -#define AU_FS_MINFREE 20 /* default min filesystem freespace, in percent */ +/* + * Type definitions used indicating the length of variable length addresses + * in tokens containing addresses, such as header fields. + */ +#define AU_IPv4 4 +#define AU_IPv6 16 __BEGIN_DECLS -typedef uid_t au_id_t; -typedef pid_t au_asid_t; -typedef u_int16_t au_event_t; -typedef u_int16_t au_emod_t; -typedef u_int32_t au_class_t; +typedef uid_t au_id_t; +typedef pid_t au_asid_t; +typedef u_int16_t au_event_t; +typedef u_int16_t au_emod_t; +typedef u_int32_t au_class_t; +typedef u_int64_t au_asflgs_t __attribute__ ((aligned (8))); struct au_tid { - dev_t port; - u_int32_t machine; + dev_t port; + u_int32_t machine; }; -typedef struct au_tid au_tid_t; +typedef struct au_tid au_tid_t; struct au_tid_addr { - dev_t at_port; - u_int32_t at_type; - u_int32_t at_addr[4]; + dev_t at_port; + u_int32_t at_type; + u_int32_t at_addr[4]; }; -typedef struct au_tid_addr au_tid_addr_t; +typedef struct au_tid_addr au_tid_addr_t; struct au_mask { - unsigned int am_success; /* success bits */ - unsigned int am_failure; /* failure bits */ + unsigned int am_success; /* Success bits. */ + unsigned int am_failure; /* Failure bits. */ }; -typedef struct au_mask au_mask_t; +typedef struct au_mask au_mask_t; struct auditinfo { - au_id_t ai_auid; /* Audit user ID */ - au_mask_t ai_mask; /* Audit masks */ - au_tid_t ai_termid; /* Terminal ID */ - au_asid_t ai_asid; /* Audit session ID */ + au_id_t ai_auid; /* Audit user ID. */ + au_mask_t ai_mask; /* Audit masks. */ + au_tid_t ai_termid; /* Terminal ID. */ + au_asid_t ai_asid; /* Audit session ID. */ }; -typedef struct auditinfo auditinfo_t; +typedef struct auditinfo auditinfo_t; struct auditinfo_addr { - au_id_t ai_auid; /* Audit user ID */ - au_mask_t ai_mask; /* Audit masks */ - au_tid_addr_t ai_termid; /* Terminal ID */ - au_asid_t ai_asid; /* Audit session ID */ + au_id_t ai_auid; /* Audit user ID. */ + au_mask_t ai_mask; /* Audit masks. */ + au_tid_addr_t ai_termid; /* Terminal ID. */ + au_asid_t ai_asid; /* Audit session ID. */ + au_asflgs_t ai_flags; /* Audit session flags. */ }; -typedef struct auditinfo_addr auditinfo_addr_t; +typedef struct auditinfo_addr auditinfo_addr_t; struct auditpinfo { - pid_t ap_pid; /* ID of target process */ - au_id_t ap_auid; /* Audit user ID */ - au_mask_t ap_mask; /* Audit masks */ - au_tid_t ap_termid; /* Terminal ID */ - au_asid_t ap_asid; /* Audit session ID */ + pid_t ap_pid; /* ID of target process. */ + au_id_t ap_auid; /* Audit user ID. */ + au_mask_t ap_mask; /* Audit masks. */ + au_tid_t ap_termid; /* Terminal ID. */ + au_asid_t ap_asid; /* Audit session ID. */ }; -typedef struct auditpinfo auditpinfo_t; +typedef struct auditpinfo auditpinfo_t; struct auditpinfo_addr { - pid_t ap_pid; /* ID of target process */ - au_id_t ap_auid; /* Audit user ID */ - au_mask_t ap_mask; /* Audit masks */ - au_tid_addr_t ap_termid; /* Terminal ID */ - au_asid_t ap_asid; /* Audit session ID */ + pid_t ap_pid; /* ID of target process. */ + au_id_t ap_auid; /* Audit user ID. */ + au_mask_t ap_mask; /* Audit masks. */ + au_tid_addr_t ap_termid; /* Terminal ID. */ + au_asid_t ap_asid; /* Audit session ID. */ + au_asflgs_t ap_flags; /* Audit session flags. */ }; -typedef struct auditpinfo_addr auditpinfo_addr_t; +typedef struct auditpinfo_addr auditpinfo_addr_t; -/* Token and record structures */ - -struct au_token { - u_char *t_data; - size_t len; - TAILQ_ENTRY(au_token) tokens; +struct au_session { + auditinfo_addr_t *as_aia_p; /* Ptr to full audit info. */ + au_mask_t as_mask; /* Process Audit Masks. */ }; -typedef struct au_token token_t; - -struct au_record { - char used; /* Is this record currently being used */ - int desc; /* The descriptor associated with this record */ - TAILQ_HEAD(, au_token) token_q; /* queue of BSM tokens */ - u_char *data; - size_t len; - LIST_ENTRY(au_record) au_rec_q; -}; -typedef struct au_record au_record_t; +typedef struct au_session au_session_t; /* - * Kernel audit queue control parameters. + * Contents of token_t are opaque outside of libbsm. + */ +typedef struct au_token token_t; + +/* + * Kernel audit queue control parameters: + * Default: Maximum: + * aq_hiwater: AQ_HIWATER (100) AQ_MAXHIGH (10000) + * aq_lowater: AQ_LOWATER (10) +mach_port_name_t audit_session_self(void); +au_asid_t audit_session_join(mach_port_name_t port); +#endif /* __APPLE_API_PRIVATE */ + +#endif /* defined(_KERNEL) || defined(KERNEL) */ __END_DECLS diff --git a/bsd/bsm/audit_domain.h b/bsd/bsm/audit_domain.h new file mode 100644 index 000000000..9edcb4fbc --- /dev/null +++ b/bsd/bsm/audit_domain.h @@ -0,0 +1,114 @@ +/*- + * Copyright (c) 2008 Apple Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of Apple Inc. ("Apple") nor the names of + * its contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY APPLE AND ITS CONTRIBUTORS "AS IS" AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL APPLE OR ITS CONTRIBUTORS BE LIABLE FOR + * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING + * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + * $P4: //depot/projects/trustedbsd/openbsm/sys/bsm/audit_domain.h#1 $ + */ + +#ifndef _BSM_AUDIT_DOMAIN_H_ +#define _BSM_AUDIT_DOMAIN_H_ + +/* + * BSM protocol domain constants - protocol domains defined in Solaris. + */ +#define BSM_PF_UNSPEC 0 +#define BSM_PF_LOCAL 1 +#define BSM_PF_INET 2 +#define BSM_PF_IMPLINK 3 +#define BSM_PF_PUP 4 +#define BSM_PF_CHAOS 5 +#define BSM_PF_NS 6 +#define BSM_PF_NBS 7 /* Solaris-specific. */ +#define BSM_PF_ECMA 8 +#define BSM_PF_DATAKIT 9 +#define BSM_PF_CCITT 10 +#define BSM_PF_SNA 11 +#define BSM_PF_DECnet 12 +#define BSM_PF_DLI 13 +#define BSM_PF_LAT 14 +#define BSM_PF_HYLINK 15 +#define BSM_PF_APPLETALK 16 +#define BSM_PF_NIT 17 /* Solaris-specific. */ +#define BSM_PF_802 18 /* Solaris-specific. */ +#define BSM_PF_OSI 19 +#define BSM_PF_X25 20 /* Solaris/Linux-specific. */ +#define BSM_PF_OSINET 21 /* Solaris-specific. */ +#define BSM_PF_GOSIP 22 /* Solaris-specific. */ +#define BSM_PF_IPX 23 +#define BSM_PF_ROUTE 24 +#define BSM_PF_LINK 25 +#define BSM_PF_INET6 26 +#define BSM_PF_KEY 27 +#define BSM_PF_NCA 28 /* Solaris-specific. */ +#define BSM_PF_POLICY 29 /* Solaris-specific. */ +#define BSM_PF_INET_OFFLOAD 30 /* Solaris-specific. */ + +/* + * BSM protocol domain constants - protocol domains not defined in Solaris. + */ +#define BSM_PF_NETBIOS 500 /* FreeBSD/Darwin-specific. */ +#define BSM_PF_ISO 501 /* FreeBSD/Darwin-specific. */ +#define BSM_PF_XTP 502 /* FreeBSD/Darwin-specific. */ +#define BSM_PF_COIP 503 /* FreeBSD/Darwin-specific. */ +#define BSM_PF_CNT 504 /* FreeBSD/Darwin-specific. */ +#define BSM_PF_RTIP 505 /* FreeBSD/Darwin-specific. */ +#define BSM_PF_SIP 506 /* FreeBSD/Darwin-specific. */ +#define BSM_PF_PIP 507 /* FreeBSD/Darwin-specific. */ +#define BSM_PF_ISDN 508 /* FreeBSD/Darwin-specific. */ +#define BSM_PF_E164 509 /* FreeBSD/Darwin-specific. */ +#define BSM_PF_NATM 510 /* FreeBSD/Darwin-specific. */ +#define BSM_PF_ATM 511 /* FreeBSD/Darwin-specific. */ +#define BSM_PF_NETGRAPH 512 /* FreeBSD/Darwin-specific. */ +#define BSM_PF_SLOW 513 /* FreeBSD-specific. */ +#define BSM_PF_SCLUSTER 514 /* FreeBSD-specific. */ +#define BSM_PF_ARP 515 /* FreeBSD-specific. */ +#define BSM_PF_BLUETOOTH 516 /* FreeBSD-specific. */ +#define BSM_PF_IEEE80211 517 /* FreeBSD-specific. */ +#define BSM_PF_AX25 518 /* Linux-specific. */ +#define BSM_PF_ROSE 519 /* Linux-specific. */ +#define BSM_PF_NETBEUI 520 /* Linux-specific. */ +#define BSM_PF_SECURITY 521 /* Linux-specific. */ +#define BSM_PF_PACKET 522 /* Linux-specific. */ +#define BSM_PF_ASH 523 /* Linux-specific. */ +#define BSM_PF_ECONET 524 /* Linux-specific. */ +#define BSM_PF_ATMSVC 525 /* Linux-specific. */ +#define BSM_PF_IRDA 526 /* Linux-specific. */ +#define BSM_PF_PPPOX 527 /* Linux-specific. */ +#define BSM_PF_WANPIPE 528 /* Linux-specific. */ +#define BSM_PF_LLC 529 /* Linux-specific. */ +#define BSM_PF_CAN 530 /* Linux-specific. */ +#define BSM_PF_TIPC 531 /* Linux-specific. */ +#define BSM_PF_IUCV 532 /* Linux-specific. */ +#define BSM_PF_RXRPC 533 /* Linux-specific. */ +#define BSM_PF_PHONET 534 /* Linux-specific. */ + +/* + * Used when there is no mapping from a local to BSM protocol domain. + */ +#define BSM_PF_UNKNOWN 700 /* OpenBSM-specific. */ + +#endif /* !_BSM_AUDIT_DOMAIN_H_ */ diff --git a/bsd/bsm/audit_errno.h b/bsd/bsm/audit_errno.h new file mode 100644 index 000000000..f7dec8d89 --- /dev/null +++ b/bsd/bsm/audit_errno.h @@ -0,0 +1,214 @@ +/*- + * Copyright (c) 2008 Apple Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of Apple Inc. ("Apple") nor the names of + * its contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY APPLE AND ITS CONTRIBUTORS "AS IS" AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL APPLE OR ITS CONTRIBUTORS BE LIABLE FOR + * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING + * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + * $P4: //depot/projects/trustedbsd/openbsm/sys/bsm/audit_errno.h#5 $ + */ + +#ifndef _BSM_AUDIT_ERRNO_H_ +#define _BSM_AUDIT_ERRNO_H_ + +/* + * For the purposes of portable encoding, we convert between local error + * numbers and Solaris error numbers (as well as some extensions for error + * numbers that don't exist in Solaris). Although the first 35 or so + * constants are the same across all OS's, we don't handle that in any + * special way. + * + * When adding constants here, also add them to bsm_errno.c. + */ +#define BSM_ERRNO_ESUCCESS 0 +#define BSM_ERRNO_EPERM 1 +#define BSM_ERRNO_ENOENT 2 +#define BSM_ERRNO_ESRCH 3 +#define BSM_ERRNO_EINTR 4 +#define BSM_ERRNO_EIO 5 +#define BSM_ERRNO_ENXIO 6 +#define BSM_ERRNO_E2BIG 7 +#define BSM_ERRNO_ENOEXEC 8 +#define BSM_ERRNO_EBADF 9 +#define BSM_ERRNO_ECHILD 10 +#define BSM_ERRNO_EAGAIN 11 +#define BSM_ERRNO_ENOMEM 12 +#define BSM_ERRNO_EACCES 13 +#define BSM_ERRNO_EFAULT 14 +#define BSM_ERRNO_ENOTBLK 15 +#define BSM_ERRNO_EBUSY 16 +#define BSM_ERRNO_EEXIST 17 +#define BSM_ERRNO_EXDEV 18 +#define BSM_ERRNO_ENODEV 19 +#define BSM_ERRNO_ENOTDIR 20 +#define BSM_ERRNO_EISDIR 21 +#define BSM_ERRNO_EINVAL 22 +#define BSM_ERRNO_ENFILE 23 +#define BSM_ERRNO_EMFILE 24 +#define BSM_ERRNO_ENOTTY 25 +#define BSM_ERRNO_ETXTBSY 26 +#define BSM_ERRNO_EFBIG 27 +#define BSM_ERRNO_ENOSPC 28 +#define BSM_ERRNO_ESPIPE 29 +#define BSM_ERRNO_EROFS 30 +#define BSM_ERRNO_EMLINK 31 +#define BSM_ERRNO_EPIPE 32 +#define BSM_ERRNO_EDOM 33 +#define BSM_ERRNO_ERANGE 34 +#define BSM_ERRNO_ENOMSG 35 +#define BSM_ERRNO_EIDRM 36 +#define BSM_ERRNO_ECHRNG 37 /* Solaris/Linux-specific. */ +#define BSM_ERRNO_EL2NSYNC 38 /* Solaris/Linux-specific. */ +#define BSM_ERRNO_EL3HLT 39 /* Solaris/Linux-specific. */ +#define BSM_ERRNO_EL3RST 40 /* Solaris/Linux-specific. */ +#define BSM_ERRNO_ELNRNG 41 /* Solaris/Linux-specific. */ +#define BSM_ERRNO_EUNATCH 42 /* Solaris/Linux-specific. */ +#define BSM_ERRNO_ENOCSI 43 /* Solaris/Linux-specific. */ +#define BSM_ERRNO_EL2HLT 44 /* Solaris/Linux-specific. */ +#define BSM_ERRNO_EDEADLK 45 +#define BSM_ERRNO_ENOLCK 46 +#define BSM_ERRNO_ECANCELED 47 +#define BSM_ERRNO_ENOTSUP 48 +#define BSM_ERRNO_EDQUOT 49 +#define BSM_ERRNO_EBADE 50 /* Solaris/Linux-specific. */ +#define BSM_ERRNO_EBADR 51 /* Solaris/Linux-specific. */ +#define BSM_ERRNO_EXFULL 52 /* Solaris/Linux-specific. */ +#define BSM_ERRNO_ENOANO 53 /* Solaris/Linux-specific. */ +#define BSM_ERRNO_EBADRQC 54 /* Solaris/Linux-specific. */ +#define BSM_ERRNO_EBADSLT 55 /* Solaris/Linux-specific. */ +#define BSM_ERRNO_EDEADLOCK 56 /* Solaris-specific. */ +#define BSM_ERRNO_EBFONT 57 /* Solaris/Linux-specific. */ +#define BSM_ERRNO_EOWNERDEAD 58 /* Solaris/Linux-specific. */ +#define BSM_ERRNO_ENOTRECOVERABLE 59 /* Solaris/Linux-specific. */ +#define BSM_ERRNO_ENOSTR 60 /* Solaris/Darwin/Linux-specific. */ +#define BSM_ERRNO_ENODATA 61 /* Solaris/Darwin/Linux-specific. */ +#define BSM_ERRNO_ETIME 62 /* Solaris/Darwin/Linux-specific. */ +#define BSM_ERRNO_ENOSR 63 /* Solaris/Darwin/Linux-specific. */ +#define BSM_ERRNO_ENONET 64 /* Solaris/Linux-specific. */ +#define BSM_ERRNO_ENOPKG 65 /* Solaris/Linux-specific. */ +#define BSM_ERRNO_EREMOTE 66 +#define BSM_ERRNO_ENOLINK 67 +#define BSM_ERRNO_EADV 68 /* Solaris/Linux-specific. */ +#define BSM_ERRNO_ESRMNT 69 /* Solaris/Linux-specific. */ +#define BSM_ERRNO_ECOMM 70 /* Solaris/Linux-specific. */ +#define BSM_ERRNO_EPROTO 71 +#define BSM_ERRNO_ELOCKUNMAPPED 72 /* Solaris-specific. */ +#define BSM_ERRNO_ENOTACTIVE 73 /* Solaris-specific. */ +#define BSM_ERRNO_EMULTIHOP 74 +#define BSM_ERRNO_EBADMSG 77 +#define BSM_ERRNO_ENAMETOOLONG 78 +#define BSM_ERRNO_EOVERFLOW 79 +#define BSM_ERRNO_ENOTUNIQ 80 /* Solaris/Linux-specific. */ +#define BSM_ERRNO_EBADFD 81 /* Solaris/Linux-specific. */ +#define BSM_ERRNO_EREMCHG 82 /* Solaris/Linux-specific. */ +#define BSM_ERRNO_ELIBACC 83 /* Solaris/Linux-specific. */ +#define BSM_ERRNO_ELIBBAD 84 /* Solaris/Linux-specific. */ +#define BSM_ERRNO_ELIBSCN 85 /* Solaris/Linux-specific. */ +#define BSM_ERRNO_ELIBMAX 86 /* Solaris/Linux-specific. */ +#define BSM_ERRNO_ELIBEXEC 87 /* Solaris/Linux-specific. */ +#define BSM_ERRNO_EILSEQ 88 +#define BSM_ERRNO_ENOSYS 89 +#define BSM_ERRNO_ELOOP 90 +#define BSM_ERRNO_ERESTART 91 +#define BSM_ERRNO_ESTRPIPE 92 /* Solaris/Linux-specific. */ +#define BSM_ERRNO_ENOTEMPTY 93 +#define BSM_ERRNO_EUSERS 94 +#define BSM_ERRNO_ENOTSOCK 95 +#define BSM_ERRNO_EDESTADDRREQ 96 +#define BSM_ERRNO_EMSGSIZE 97 +#define BSM_ERRNO_EPROTOTYPE 98 +#define BSM_ERRNO_ENOPROTOOPT 99 +#define BSM_ERRNO_EPROTONOSUPPORT 120 +#define BSM_ERRNO_ESOCKTNOSUPPORT 121 +#define BSM_ERRNO_EOPNOTSUPP 122 +#define BSM_ERRNO_EPFNOSUPPORT 123 +#define BSM_ERRNO_EAFNOSUPPORT 124 +#define BSM_ERRNO_EADDRINUSE 125 +#define BSM_ERRNO_EADDRNOTAVAIL 126 +#define BSM_ERRNO_ENETDOWN 127 +#define BSM_ERRNO_ENETUNREACH 128 +#define BSM_ERRNO_ENETRESET 129 +#define BSM_ERRNO_ECONNABORTED 130 +#define BSM_ERRNO_ECONNRESET 131 +#define BSM_ERRNO_ENOBUFS 132 +#define BSM_ERRNO_EISCONN 133 +#define BSM_ERRNO_ENOTCONN 134 +#define BSM_ERRNO_ESHUTDOWN 143 +#define BSM_ERRNO_ETOOMANYREFS 144 +#define BSM_ERRNO_ETIMEDOUT 145 +#define BSM_ERRNO_ECONNREFUSED 146 +#define BSM_ERRNO_EHOSTDOWN 147 +#define BSM_ERRNO_EHOSTUNREACH 148 +#define BSM_ERRNO_EALREADY 149 +#define BSM_ERRNO_EINPROGRESS 150 +#define BSM_ERRNO_ESTALE 151 + +/* + * OpenBSM constants for error numbers not defined in Solaris. In the event + * that these errors are added to Solaris, we will deprecate the OpenBSM + * numbers in the same way we do for audit event constants. + * + * ELAST doesn't get a constant in the BSM space. + */ +#define BSM_ERRNO_EPROCLIM 190 /* FreeBSD/Darwin-specific. */ +#define BSM_ERRNO_EBADRPC 191 /* FreeBSD/Darwin-specific. */ +#define BSM_ERRNO_ERPCMISMATCH 192 /* FreeBSD/Darwin-specific. */ +#define BSM_ERRNO_EPROGUNAVAIL 193 /* FreeBSD/Darwin-specific. */ +#define BSM_ERRNO_EPROGMISMATCH 194 /* FreeBSD/Darwin-specific. */ +#define BSM_ERRNO_EPROCUNAVAIL 195 /* FreeBSD/Darwin-specific. */ +#define BSM_ERRNO_EFTYPE 196 /* FreeBSD/Darwin-specific. */ +#define BSM_ERRNO_EAUTH 197 /* FreeBSD/Darwin-specific. */ +#define BSM_ERRNO_ENEEDAUTH 198 /* FreeBSD/Darwin-specific. */ +#define BSM_ERRNO_ENOATTR 199 /* FreeBSD/Darwin-specific. */ +#define BSM_ERRNO_EDOOFUS 200 /* FreeBSD-specific. */ +#define BSM_ERRNO_EJUSTRETURN 201 /* FreeBSD-specific. */ +#define BSM_ERRNO_ENOIOCTL 202 /* FreeBSD-specific. */ +#define BSM_ERRNO_EDIRIOCTL 203 /* FreeBSD-specific. */ +#define BSM_ERRNO_EPWROFF 204 /* Darwin-specific. */ +#define BSM_ERRNO_EDEVERR 205 /* Darwin-specific. */ +#define BSM_ERRNO_EBADEXEC 206 /* Darwin-specific. */ +#define BSM_ERRNO_EBADARCH 207 /* Darwin-specific. */ +#define BSM_ERRNO_ESHLIBVERS 208 /* Darwin-specific. */ +#define BSM_ERRNO_EBADMACHO 209 /* Darwin-specific. */ +#define BSM_ERRNO_EPOLICY 210 /* Darwin-specific. */ +#define BSM_ERRNO_EDOTDOT 211 /* Linux-specific. */ +#define BSM_ERRNO_EUCLEAN 212 /* Linux-specific. */ +#define BSM_ERRNO_ENOTNAM 213 /* Linux(Xenix?)-specific. */ +#define BSM_ERRNO_ENAVAIL 214 /* Linux(Xenix?)-specific. */ +#define BSM_ERRNO_EISNAM 215 /* Linux(Xenix?)-specific. */ +#define BSM_ERRNO_EREMOTEIO 216 /* Linux-specific. */ +#define BSM_ERRNO_ENOMEDIUM 217 /* Linux-specific. */ +#define BSM_ERRNO_EMEDIUMTYPE 218 /* Linux-specific. */ +#define BSM_ERRNO_ENOKEY 219 /* Linux-specific. */ +#define BSM_ERRNO_EKEYEXPIRED 220 /* Linux-specific. */ +#define BSM_ERRNO_EKEYREVOKED 221 /* Linux-specific. */ +#define BSM_ERRNO_EKEYREJECTED 222 /* Linux-specific. */ + +/* + * In the event that OpenBSM doesn't have a file representation of a local + * error number, use this. + */ +#define BSM_ERRNO_UNKNOWN 250 /* OpenBSM-specific. */ + +#endif /* !_BSM_AUDIT_ERRNO_H_ */ diff --git a/bsd/bsm/audit_fcntl.h b/bsd/bsm/audit_fcntl.h new file mode 100644 index 000000000..1398b2f0d --- /dev/null +++ b/bsd/bsm/audit_fcntl.h @@ -0,0 +1,140 @@ +/*- + * Copyright (c) 2009 Apple Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of Apple Inc. ("Apple") nor the names of + * its contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY APPLE AND ITS CONTRIBUTORS "AS IS" AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL APPLE OR ITS CONTRIBUTORS BE LIABLE FOR + * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING + * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + * $P4: //depot/projects/trustedbsd/openbsm/sys/bsm/audit_fcntl.h#2 $ + */ + +#ifndef _BSM_AUDIT_FCNTL_H_ +#define _BSM_AUDIT_FCNTL_H_ + +/* + * Shared and Solaris-specific: (0-99). + */ +#define BSM_F_DUPFD 0 +#define BSM_F_GETFD 1 +#define BSM_F_SETFD 2 +#define BSM_F_GETFL 3 +#define BSM_F_SETFL 4 +#define BSM_F_O_GETLK 5 /* Solaris-specific. */ +#define BSM_F_SETLK 6 +#define BSM_F_SETLKW 7 +#define BSM_F_CHKFL 8 /* Solaris-specific. */ +#define BSM_F_DUP2FD 9 /* FreeBSD/Solaris-specific. */ +#define BSM_F_ALLOCSP 10 /* Solaris-specific. */ +#define BSM_F_FREESP 11 /* Solaris-specific. */ + +#define BSM_F_ISSTREAM 13 /* Solaris-specific. */ +#define BSM_F_GETLK 14 +#define BSM_F_PRIV 15 /* Solaris-specific. */ +#define BSM_F_NPRIV 16 /* Solaris-specific. */ +#define BSM_F_QUOTACTL 17 /* Solaris-specific. */ +#define BSM_F_BLOCKS 18 /* Solaris-specific. */ +#define BSM_F_BLKSIZE 19 /* Solaris-specific. */ + +#define BSM_F_GETOWN 23 +#define BSM_F_SETOWN 24 +#define BSM_F_REVOKE 25 /* Solaris-specific. */ +#define BSM_F_HASREMOTELOCKS 26 /* Solaris-specific. */ +#define BSM_F_FREESP64 27 /* Solaris-specific. */ +#define BSM_F_ALLOCSP64 28 /* Solaris-specific. */ + +#define BSM_F_GETLK64 33 /* Solaris-specific. */ +#define BSM_F_SETLK64 34 /* Solaris-specific. */ +#define BSM_F_SETLKW64 35 /* Solaris-specific. */ + +#define BSM_F_SHARE 40 /* Solaris-specific. */ +#define BSM_F_UNSHARE 41 /* Solaris-specific. */ +#define BSM_F_SETLK_NBMAND 42 /* Solaris-specific. */ +#define BSM_F_SHARE_NBMAND 43 /* Solaris-specific. */ +#define BSM_F_SETLK64_NBMAND 44 /* Solaris-specific. */ +#define BSM_F_GETXFL 45 /* Solaris-specific. */ +#define BSM_F_BADFD 46 /* Solaris-specific. */ + +/* + * FreeBSD-specific (100-199). + */ +#define BSM_F_OGETLK 107 /* FreeBSD-specific. */ +#define BSM_F_OSETLK 108 /* FreeBSD-specific. */ +#define BSM_F_OSETLKW 109 /* FreeBSD-specific. */ + +#define BSM_F_SETLK_REMOTE 114 /* FreeBSD-specific. */ + +/* + * Linux-specific (200-299). + */ +#define BSM_F_SETSIG 210 /* Linux-specific. */ +#define BSM_F_GETSIG 211 /* Linux-specific. */ + +/* + * Darwin-specific (300-399). + */ +#define BSM_F_CHKCLEAN 341 /* Darwin-specific. */ +#define BSM_F_PREALLOCATE 342 /* Darwin-specific. */ +#define BSM_F_SETSIZE 343 /* Darwin-specific. */ +#define BSM_F_RDADVISE 344 /* Darwin-specific. */ +#define BSM_F_RDAHEAD 345 /* Darwin-specific. */ +#define BSM_F_READBOOTSTRAP 346 /* Darwin-specific. */ +#define BSM_F_WRITEBOOTSTRAP 347 /* Darwin-specific. */ +#define BSM_F_NOCACHE 348 /* Darwin-specific. */ +#define BSM_F_LOG2PHYS 349 /* Darwin-specific. */ +#define BSM_F_GETPATH 350 /* Darwin-specific. */ +#define BSM_F_FULLFSYNC 351 /* Darwin-specific. */ +#define BSM_F_PATHPKG_CHECK 352 /* Darwin-specific. */ +#define BSM_F_FREEZE_FS 353 /* Darwin-specific. */ +#define BSM_F_THAW_FS 354 /* Darwin-specific. */ +#define BSM_F_GLOBAL_NOCACHE 355 /* Darwin-specific. */ +#define BSM_F_OPENFROM 356 /* Darwin-specific. */ +#define BSM_F_UNLINKFROM 357 /* Darwin-specific. */ +#define BSM_F_CHECK_OPENEVT 358 /* Darwin-specific. */ +#define BSM_F_ADDSIGS 359 /* Darwin-specific. */ +#define BSM_F_MARKDEPENDENCY 360 /* Darwin-specific. */ + +/* + * Darwin file system specific (400-499). + */ +#define BSM_F_FS_SPECIFIC_0 400 /* Darwin-fs-specific. */ +#define BSM_F_FS_SPECIFIC_1 401 /* Darwin-fs-specific. */ +#define BSM_F_FS_SPECIFIC_2 402 /* Darwin-fs-specific. */ +#define BSM_F_FS_SPECIFIC_3 403 /* Darwin-fs-specific. */ +#define BSM_F_FS_SPECIFIC_4 404 /* Darwin-fs-specific. */ +#define BSM_F_FS_SPECIFIC_5 405 /* Darwin-fs-specific. */ +#define BSM_F_FS_SPECIFIC_6 406 /* Darwin-fs-specific. */ +#define BSM_F_FS_SPECIFIC_7 407 /* Darwin-fs-specific. */ +#define BSM_F_FS_SPECIFIC_8 408 /* Darwin-fs-specific. */ +#define BSM_F_FS_SPECIFIC_9 409 /* Darwin-fs-specific. */ +#define BSM_F_FS_SPECIFIC_10 410 /* Darwin-fs-specific. */ +#define BSM_F_FS_SPECIFIC_11 411 /* Darwin-fs-specific. */ +#define BSM_F_FS_SPECIFIC_12 412 /* Darwin-fs-specific. */ +#define BSM_F_FS_SPECIFIC_13 413 /* Darwin-fs-specific. */ +#define BSM_F_FS_SPECIFIC_14 414 /* Darwin-fs-specific. */ +#define BSM_F_FS_SPECIFIC_15 415 /* Darwin-fs-specific. */ + + +#define BSM_F_UNKNOWN 0xFFFF + +#endif /* !_BSM_AUDIT_FCNTL_H_ */ diff --git a/bsd/bsm/audit_internal.h b/bsd/bsm/audit_internal.h new file mode 100644 index 000000000..71a51307a --- /dev/null +++ b/bsd/bsm/audit_internal.h @@ -0,0 +1,117 @@ +/*- + * Copyright (c) 2005-2008 Apple Inc. + * Copyright (c) 2005 SPARTA, Inc. + * All rights reserved. + * + * This code was developed in part by Robert N. M. Watson, Senior Principal + * Scientist, SPARTA, Inc. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of Apple Computer, Inc. ("Apple") nor the names of + * its contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY APPLE AND ITS CONTRIBUTORS "AS IS" AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL APPLE OR ITS CONTRIBUTORS BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * $P4: //depot/projects/trustedbsd/openbsm/sys/bsm/audit_internal.h#5 $ + */ + +#ifndef _AUDIT_INTERNAL_H +#define _AUDIT_INTERNAL_H + +#if defined(__linux__) && !defined(__unused) +#define __unused +#endif + +/* + * audit_internal.h contains private interfaces that are shared by user space + * and the kernel for the purposes of assembling audit records. Applications + * should not include this file or use the APIs found within, or it may be + * broken with future releases of OpenBSM, which may delete, modify, or + * otherwise break these interfaces or the assumptions they rely on. + */ +struct au_token { + u_char *t_data; + size_t len; + TAILQ_ENTRY(au_token) tokens; +}; + +struct au_record { + char used; /* Record currently in use? */ + int desc; /* Descriptor for record. */ + TAILQ_HEAD(, au_token) token_q; /* Queue of BSM tokens. */ + u_char *data; + size_t len; + LIST_ENTRY(au_record) au_rec_q; +}; +typedef struct au_record au_record_t; + + +/* + * We could determined the header and trailer sizes by defining appropriate + * structures. We hold off that approach until we have a consistent way of + * using structures for all tokens. This is not straightforward since these + * token structures may contain pointers of whose contents we do not know the + * size (e.g text tokens). + */ +#define AUDIT_HEADER_EX_SIZE(a) ((a)->ai_termid.at_type+18+sizeof(u_int32_t)) +#define AUDIT_HEADER_SIZE 18 +#define MAX_AUDIT_HEADER_SIZE (5*sizeof(u_int32_t)+18) +#define AUDIT_TRAILER_SIZE 7 + +/* + * BSM token streams store fields in big endian byte order, so as to be + * portable; when encoding and decoding, we must convert byte orders for + * typed values. + */ +#define ADD_U_CHAR(loc, val) \ + do { \ + *(loc) = (val); \ + (loc) += sizeof(u_char); \ + } while(0) + + +#define ADD_U_INT16(loc, val) \ + do { \ + be16enc((loc), (val)); \ + (loc) += sizeof(u_int16_t); \ + } while(0) + +#define ADD_U_INT32(loc, val) \ + do { \ + be32enc((loc), (val)); \ + (loc) += sizeof(u_int32_t); \ + } while(0) + +#define ADD_U_INT64(loc, val) \ + do { \ + be64enc((loc), (val)); \ + (loc) += sizeof(u_int64_t); \ + } while(0) + +#define ADD_MEM(loc, data, size) \ + do { \ + memcpy((loc), (data), (size)); \ + (loc) += size; \ + } while(0) + +#define ADD_STRING(loc, data, size) ADD_MEM(loc, data, size) + +#endif /* !_AUDIT_INTERNAL_H_ */ diff --git a/bsd/bsm/audit_kernel.h b/bsd/bsm/audit_kernel.h index b68db2a6c..3abea7ddd 100644 --- a/bsd/bsm/audit_kernel.h +++ b/bsd/bsm/audit_kernel.h @@ -1,30 +1,34 @@ /* - * Copyright (c) 1999-2007 Apple Inc. All Rights Reserved. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ + * Copyright (c) 2004-2008, Apple Inc. All rights reserved. * - * This file contains Original Code and/or Modifications of Original Code - * as defined in and that are subject to the Apple Public Source License - * Version 2.0 (the 'License'). You may not use this file except in - * compliance with the License. The rights granted to you under the License - * may not be used to create, or enable the creation or redistribution of, - * unlawful or unlicensed copies of an Apple operating system, or to - * circumvent, violate, or enable the circumvention or violation of, any - * terms of an Apple operating system software license agreement. + * @APPLE_BSD_LICENSE_HEADER_START@ * - * Please obtain a copy of the License at - * http://www.opensource.apple.com/apsl/ and read it before using this file. + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: * - * The Original Code and all software distributed under the License are - * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER - * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, - * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. - * Please see the License for the specific language governing rights and - * limitations under the License. + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of Apple Inc. ("Apple") nor the names of + * its contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. * - * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ - */ + * THIS SOFTWARE IS PROVIDED BY APPLE AND ITS CONTRIBUTORS "AS IS" AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL APPLE OR ITS CONTRIBUTORS BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * @APPLE_BSD_LICENSE_HEADER_END@ +*/ /* * NOTICE: This file was modified by SPARTA, Inc. in 2005 to introduce * support for mandatory and extensible security protections. This notice @@ -35,440 +39,8 @@ #ifndef _BSM_AUDIT_KERNEL_H #define _BSM_AUDIT_KERNEL_H -#if CONFIG_MACF -#include -#include -#endif - -#ifdef KERNEL - -#include - -#include -#include -#include - -/* - * Audit subsystem condition flags. The audit_enabled flag is set and - * removed automatically as a result of configuring log files, and - * can be observed but should not be directly manipulated. The audit - * suspension flag permits audit to be temporarily disabled without - * reconfiguring the audit target. - */ -extern int audit_enabled; -extern int audit_suspended; - -#define BSM_SUCCESS 0 -#define BSM_FAILURE 1 -#define BSM_NOAUDIT 2 - -/* - * Define the masks for the audited arguments. - */ -#define ARG_EUID 0x0000000000000001ULL -#define ARG_RUID 0x0000000000000002ULL -#define ARG_SUID 0x0000000000000004ULL -#define ARG_EGID 0x0000000000000008ULL -#define ARG_RGID 0x0000000000000010ULL -#define ARG_SGID 0x0000000000000020ULL -#define ARG_PID 0x0000000000000040ULL -#define ARG_UID 0x0000000000000080ULL -#define ARG_AUID 0x0000000000000100ULL -#define ARG_GID 0x0000000000000200ULL -#define ARG_FD 0x0000000000000400ULL -#define ARG_POSIX_IPC_PERM 0x0000000000000800ULL -#define ARG_FFLAGS 0x0000000000001000ULL -#define ARG_MODE 0x0000000000002000ULL -#define ARG_DEV 0x0000000000004000ULL -#define ARG_ADDR 0x0000000000008000ULL -#define ARG_LEN 0x0000000000010000ULL -#define ARG_MASK 0x0000000000020000ULL -#define ARG_SIGNUM 0x0000000000040000ULL -#define ARG_LOGIN 0x0000000000080000ULL -#define ARG_SADDRINET 0x0000000000100000ULL -#define ARG_SADDRINET6 0x0000000000200000ULL -#define ARG_SADDRUNIX 0x0000000000400000ULL -#define ARG_KPATH1 0x0000000000800000ULL -#define ARG_KPATH2 0x0000000001000000ULL -#define ARG_UPATH1 0x0000000002000000ULL -#define ARG_UPATH2 0x0000000004000000ULL -#define ARG_TEXT 0x0000000008000000ULL -#define ARG_VNODE1 0x0000000010000000ULL -#define ARG_VNODE2 0x0000000020000000ULL -#define ARG_SVIPC_CMD 0x0000000040000000ULL -#define ARG_SVIPC_PERM 0x0000000080000000ULL -#define ARG_SVIPC_ID 0x0000000100000000ULL -#define ARG_SVIPC_ADDR 0x0000000200000000ULL -#define ARG_GROUPSET 0x0000000400000000ULL -#define ARG_CMD 0x0000000800000000ULL -#define ARG_SOCKINFO 0x0000001000000000ULL -#define ARG_ASID 0x0000002000000000ULL -#define ARG_TERMID 0x0000004000000000ULL -#define ARG_AUDITON 0x0000008000000000ULL -#define ARG_VALUE 0x0000010000000000ULL -#define ARG_AMASK 0x0000020000000000ULL -#define ARG_CTLNAME 0x0000040000000000ULL -#define ARG_PROCESS 0x0000080000000000ULL -#define ARG_MACHPORT1 0x0000100000000000ULL -#define ARG_MACHPORT2 0x0000200000000000ULL -#define ARG_MAC_STRING 0x0000400000000000ULL -#define ARG_NONE 0x0000000000000000ULL -#define ARG_ALL 0xFFFFFFFFFFFFFFFFULL - -/* Defines for the kernel audit record k_ar_commit field */ -#define AR_COMMIT_KERNEL 0x00000001U -#define AR_COMMIT_USER 0x00000010U - -struct vnode_au_info { - mode_t vn_mode; - uid_t vn_uid; - gid_t vn_gid; - dev_t vn_dev; - long vn_fsid; - long vn_fileid; - long vn_gen; -}; - -struct groupset { - gid_t gidset[NGROUPS]; - u_int gidset_size; -}; - -struct socket_au_info { - int so_domain; - int so_type; - int so_protocol; - in_addr_t so_raddr; /* remote address if INET socket */ - in_addr_t so_laddr; /* local address if INET socket */ - u_short so_rport; /* remote port */ - u_short so_lport; /* local port */ -}; - -union auditon_udata { - char au_path[MAXPATHLEN]; - long au_cond; - long au_flags; - long au_policy; - au_evclass_map_t au_evclass; - au_mask_t au_mask; - auditinfo_t au_auinfo; - auditpinfo_t au_aupinfo; - auditpinfo_addr_t au_aupinfo_addr; - au_qctrl_t au_qctrl; - au_stat_t au_stat; - au_fstat_t au_fstat; -}; - -struct posix_ipc_perm { - uid_t pipc_uid; - gid_t pipc_gid; - mode_t pipc_mode; -}; - -#if CONFIG_MACF - -#define MAC_AUDIT_LABEL_LEN 1024 -#define MAC_AUDIT_DATA_TYPE 0 -#define MAC_AUDIT_TEXT_TYPE 1 - -struct mac_audit_record { - int type; // one of the types defined above - int length; // byte length of the data field - u_char *data; // the payload - LIST_ENTRY(mac_audit_record) records; -}; - -#endif - -struct audit_record { - /* Audit record header. */ - u_int32_t ar_magic; - int ar_event; - int ar_retval; /* value returned to the process */ - int ar_errno; /* return status of system call */ - struct timespec ar_starttime; - struct timespec ar_endtime; - u_int64_t ar_valid_arg; /* Bitmask of valid arguments */ - - /* Audit subject information. */ - struct xucred ar_subj_cred; - uid_t ar_subj_ruid; - gid_t ar_subj_rgid; - gid_t ar_subj_egid; - uid_t ar_subj_auid; /* Audit user ID */ - pid_t ar_subj_asid; /* Audit session ID */ - pid_t ar_subj_pid; - struct au_tid ar_subj_term; - char ar_subj_comm[MAXCOMLEN + 1]; - struct au_mask ar_subj_amask; - - /* Operation arguments. */ - uid_t ar_arg_euid; - uid_t ar_arg_ruid; - uid_t ar_arg_suid; - gid_t ar_arg_egid; - gid_t ar_arg_rgid; - gid_t ar_arg_sgid; - pid_t ar_arg_pid; - pid_t ar_arg_asid; - struct au_tid ar_arg_termid; - uid_t ar_arg_uid; - uid_t ar_arg_auid; - gid_t ar_arg_gid; - struct groupset ar_arg_groups; - int ar_arg_fd; - int ar_arg_fflags; - mode_t ar_arg_mode; - int ar_arg_dev; - long ar_arg_value; - void * ar_arg_addr; - int ar_arg_len; - int ar_arg_mask; - u_int ar_arg_signum; - char ar_arg_login[MAXLOGNAME]; - int ar_arg_ctlname[CTL_MAXNAME]; - struct sockaddr ar_arg_sockaddr; - struct socket_au_info ar_arg_sockinfo; - char *ar_arg_upath1; - char *ar_arg_upath2; - char *ar_arg_kpath1; - char *ar_arg_kpath2; -#if CONFIG_MACF - char *ar_vnode1_mac_labels; - char *ar_vnode2_mac_labels; - char *ar_cred_mac_labels; - char *ar_arg_mac_string; -#endif - char *ar_arg_text; - struct au_mask ar_arg_amask; - struct vnode_au_info ar_arg_vnode1; - struct vnode_au_info ar_arg_vnode2; - int ar_arg_cmd; - int ar_arg_svipc_cmd; - struct ipc_perm ar_arg_svipc_perm; - int ar_arg_svipc_id; - user_addr_t ar_arg_svipc_addr; - struct posix_ipc_perm ar_arg_pipc_perm; - mach_port_name_t ar_arg_mach_port1; - mach_port_name_t ar_arg_mach_port2; - union auditon_udata ar_arg_auditon; - -#if CONFIG_MACF - /* MAC security related fields added by MAC policies - * ar_forced_by_mac is 1 if mac_audit_check_preselect() forced this - * call to be audited, 0 otherwise. - */ - LIST_HEAD(mac_audit_record_list_t, mac_audit_record) *ar_mac_records; - int ar_forced_by_mac; -#endif - -}; - -/* - * In-kernel version of audit record; the basic record plus queue meta-data. - * This record can also have a pointer set to some opaque data that will - * be passed through to the audit writing mechanism. - */ -struct kaudit_record { - struct audit_record k_ar; - u_int32_t k_ar_commit; - void *k_udata; /* user data */ - u_int k_ulen; /* user data length */ - struct uthread *k_uthread; /* thread we are auditing */ - TAILQ_ENTRY(kaudit_record) k_q; -}; - -struct proc; -struct vnode; -struct componentname; - -int kau_will_audit(void); - -void audit_abort(struct kaudit_record *ar); -void audit_commit(struct kaudit_record *ar, int error, - int retval); -void audit_init(void); -void audit_shutdown(void); - -struct kaudit_record *audit_new(int event, struct proc *p, - struct uthread *uthread); - -void audit_syscall_enter(unsigned short code, - struct proc *proc, struct uthread *uthread); -#if CONFIG_MACF -/* - * The parameter list of audit_syscall_exit() was modified to also take the - * Darwin syscall number, which is required by mac_audit_check_postselect(). - */ -void audit_syscall_exit(unsigned short code, int error, - struct proc *proc, struct uthread *uthread); -#else -void audit_syscall_exit(int error, struct proc *proc, - struct uthread *uthread); -#endif -void audit_mach_syscall_enter(unsigned short audit_event); -void audit_mach_syscall_exit(int retval, - struct uthread *uthread); - -int kaudit_to_bsm(struct kaudit_record *kar, - struct au_record **pau); - -int bsm_rec_verify(void *rec); - -/* - * Kernel versions of the BSM audit record functions. - */ -struct au_record *kau_open(void); -int kau_write(struct au_record *rec, token_t *m); -int kau_close(struct au_record *rec, - struct timespec *endtime, short event); -void kau_free(struct au_record *rec); -void kau_init(void); -token_t *kau_to_file(const char *file, const struct timeval *tv); -token_t *kau_to_header(const struct timespec *ctime, int rec_size, - au_event_t e_type, au_emod_t e_mod); -token_t *kau_to_header32(const struct timespec *ctime, int rec_size, - au_event_t e_type, au_emod_t e_mod); -token_t *kau_to_header64(const struct timespec *ctime, int rec_size, - au_event_t e_type, au_emod_t e_mod); -/* - * The remaining kernel functions are conditionally compiled in as they - * are wrapped by a macro, and the macro should be the only place in - * the source tree where these functions are referenced. - */ -#if AUDIT -void audit_arg_addr(user_addr_t addr); -void audit_arg_len(user_size_t len); -void audit_arg_fd(int fd); -void audit_arg_fflags(int fflags); -void audit_arg_gid(gid_t gid, gid_t egid, gid_t rgid, - gid_t sgid); -void audit_arg_uid(uid_t uid, uid_t euid, uid_t ruid, - uid_t suid); -void audit_arg_groupset(const gid_t *gidset, u_int gidset_size); -void audit_arg_login(const char *login); -void audit_arg_ctlname(const int *name, int namelen); -void audit_arg_mask(int mask); -void audit_arg_mode(mode_t mode); -void audit_arg_dev(int dev); -void audit_arg_value(long value); -void audit_arg_owner(uid_t uid, gid_t gid); -void audit_arg_pid(pid_t pid); -void audit_arg_process(struct proc *p); -void audit_arg_signum(u_int signum); -void audit_arg_socket(int sodomain, int sotype, - int soprotocol); -void audit_arg_sockaddr(struct vnode *cwd_vp, - struct sockaddr *so); -void audit_arg_auid(uid_t auid); -void audit_arg_auditinfo(const struct auditinfo *au_info); -void audit_arg_upath(struct vnode *cwd_vp, char *upath, - u_int64_t flags); -void audit_arg_vnpath(struct vnode *vp, u_int64_t flags); -void audit_arg_vnpath_withref(struct vnode *vp, u_int64_t flags); -void audit_arg_text(const char *text); -void audit_arg_cmd(int cmd); -void audit_arg_svipc_cmd(int cmd); -void audit_arg_svipc_perm(const struct ipc_perm *perm); -void audit_arg_svipc_id(int id); -void audit_arg_svipc_addr(user_addr_t addr); -void audit_arg_posix_ipc_perm(uid_t uid, gid_t gid, - mode_t mode); -void audit_arg_auditon(const union auditon_udata *udata); -void audit_arg_file(struct proc *p, const struct fileproc *fp); -void audit_arg_mach_port1(mach_port_name_t port); -void audit_arg_mach_port2(mach_port_name_t port); - -void audit_sysclose(struct proc *p, int fd); - -void audit_proc_init(struct proc *p); -void audit_proc_fork(struct proc *parent, - struct proc *child); -void audit_proc_free(struct proc *p); - -#if CONFIG_MACF -/* - * audit_mac_data() is the MAC Framework's entry point to the audit subsystem. - * It currently creates only text and data audit tokens. - */ -int audit_mac_data(int type, int len, u_char *data); -void audit_arg_mac_string(const char *string); - -#endif - -/* - * Define a macro to wrap the audit_arg_* calls by checking the global - * audit_enabled flag before performing the actual call. - */ -#define AUDIT_ARG(op, args...) do { \ - if (audit_enabled) \ - audit_arg_ ## op (args); \ - } while (0) - -#define AUDIT_SYSCALL_ENTER(args...) do { \ - if (audit_enabled) { \ - audit_syscall_enter(args); \ - } \ - } while (0) - -/* - * Wrap the audit_syscall_exit() function so that it is called only when - * auditing is enabled, or we have a audit record on the thread. It is - * possible that an audit record was begun before auditing was turned off. - */ -#define AUDIT_SYSCALL_EXIT(code, proc, uthread, error) do { \ - if (audit_enabled || (uthread->uu_ar != NULL)) { \ - audit_syscall_exit(code, error, proc, uthread); \ - } \ - } while (0) - -/* - * Wrap the audit_mach_syscall_enter() and audit_mach_syscall_exit() - * functions in a manner similar to other system call enter/exit functions. - */ -#define AUDIT_MACH_SYSCALL_ENTER(args...) do { \ - if (audit_enabled) { \ - audit_mach_syscall_enter(args); \ - } \ - } while (0) - -#define AUDIT_MACH_SYSCALL_EXIT(retval) do { \ - struct uthread *__uthread = get_bsdthread_info(current_thread()); \ - if (audit_enabled || (__uthread->uu_ar != NULL)) { \ - audit_mach_syscall_exit(retval, __uthread); \ - } \ - } while (0) - -/* - * A Macro to wrap the audit_sysclose() function. - */ -#define AUDIT_SYSCLOSE(args...) do { \ - if (audit_enabled) \ - audit_sysclose(args); \ - } while (0) - -#else /* !AUDIT */ - -#define AUDIT_ARG(op, args...) do { \ - } while (0) - -#define AUDIT_SYSCALL_ENTER(args...) do { \ - } while (0) - -#define AUDIT_SYSCALL_EXIT(code, proc, uthread, error) do { \ - } while (0) - -#define AUDIT_MACH_SYSCALL_ENTER(args...) do { \ - } while (0) - -#define AUDIT_MACH_SYSCALL_EXIT(retval) do { \ - } while (0) - -#define AUDIT_SYSCLOSE(op, args...) do { \ - } while (0) - -#endif /* AUDIT */ +#warning " is deprecated. Please use instead." -#endif /* KERNEL */ +#include #endif /* !_BSM_AUDIT_KERNEL_H */ diff --git a/bsd/bsm/audit_kevents.h b/bsd/bsm/audit_kevents.h index 6df33b22a..268c456c3 100644 --- a/bsd/bsm/audit_kevents.h +++ b/bsd/bsm/audit_kevents.h @@ -1,450 +1,793 @@ -/* - * Copyright (c) 1999-2007 Apple Inc. All Rights Reserved. +/*- + * Copyright (c) 2005-2009 Apple Inc. + * All rights reserved. * - * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ - * - * This file contains Original Code and/or Modifications of Original Code - * as defined in and that are subject to the Apple Public Source License - * Version 2.0 (the 'License'). You may not use this file except in - * compliance with the License. The rights granted to you under the License - * may not be used to create, or enable the creation or redistribution of, - * unlawful or unlicensed copies of an Apple operating system, or to - * circumvent, violate, or enable the circumvention or violation of, any - * terms of an Apple operating system software license agreement. - * - * Please obtain a copy of the License at - * http://www.opensource.apple.com/apsl/ and read it before using this file. - * - * The Original Code and all software distributed under the License are - * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER - * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, - * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. - * Please see the License for the specific language governing rights and - * limitations under the License. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ - */ -/* - * NOTICE: This file was modified by SPARTA, Inc. in 2005 to introduce - * support for mandatory and extensible security protections. This notice - * is included in support of clause 2.2 (b) of the Apple Public License, - * Version 2.0. - */ - -#ifndef _BSM_AUDIT_KEVENTS_H_ -#define _BSM_AUDIT_KEVENTS_H_ - -/* - * Values marked as AUE_NULL are not required to be audited as per CAPP - * - * The second value within comments is the syscall number in Darwin + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of Apple Inc. ("Apple") nor the names of + * its contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. * - * Values in the third column are the values assigned by BSM for obsolete - * or old system calls + * THIS SOFTWARE IS PROVIDED BY APPLE AND ITS CONTRIBUTORS "AS IS" AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL APPLE OR ITS CONTRIBUTORS BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * - * Values marked as XXX in the third column do not have an - * event number assigned as yet, and have (temporarily) been assigned - * value of AUE_NULL + * $P4: //depot/projects/trustedbsd/openbsm/sys/bsm/audit_kevents.h#6 $ */ -#define AUE_NULL 0 -#define AUE_EXIT 1 /*1*/ -#define AUE_FORK 2 /*2*/ -#define AUE_OPEN 3 /*3*/ -#define AUE_READ AUE_NULL /*4*/ -#define AUE_WRITE AUE_NULL /*5*/ -#define AUE_OPEN_R 72 /*5*/ -#define AUE_OPEN_RC 73 /*5*/ -#define AUE_OPEN_RTC 75 /*5*/ -#define AUE_OPEN_RT 74 /*5*/ -#define AUE_OPEN_RW 80 /*5*/ -#define AUE_OPEN_RWC 81 /*5*/ -#define AUE_OPEN_RWTC 83 /*5*/ -#define AUE_OPEN_RWT 82 /*5*/ -#define AUE_OPEN_W 76 /*5*/ -#define AUE_OPEN_WC 77 /*5*/ -#define AUE_OPEN_WTC 79 /*5*/ -#define AUE_OPEN_WT 78 /*5*/ -#define AUE_CLOSE 112 /*6*/ -#define AU_WAIT4 AUE_NULL /*7*/ -#define AUE_O_CREAT AUE_OPEN_RWTC /*8*/ /*4*/ -#define AUE_LINK 5 /*9*/ -#define AUE_UNLINK 6 /*10*/ -#define AUE_O_EXECV AUE_NULL /*11*/ -#define AUE_CHDIR 8 /*12*/ -#define AUE_FCHDIR 68 /*13*/ -#define AUE_MKNOD 9 /*14*/ -#define AUE_CHMOD 10 /*15*/ -#define AUE_CHOWN 11 /*16*/ -#define AUE_O_SBREAK AUE_NULL /*17*/ -#define AUE_GETFSSTAT 301 /*18*/ -#define AUE_O_LSEEK AUE_NULL /*19*/ -#define AUE_GETPID AUE_NULL /*20*/ -#define AUE_O_MOUNT AUE_NULL /*21*/ -#define AUE_O_UMOUNT AUE_NULL /*22*/ -#define AUE_SETUID 200 /*23*/ -#define AUE_GETUID AUE_NULL /*24*/ -#define AUE_GETEUID AUE_NULL /*25*/ -#define AUE_PTRACE 302 /*26*/ -#define AUE_RECVMSG 190 /*27*/ -#define AUE_SENDMSG 188 /*28*/ -#define AUE_RECVFROM 191 /*29*/ -#define AUE_ACCEPT 33 /*30*/ -#define AUE_GETPEERNAME AUE_NULL /*31*/ -#define AUE_GETSOCKNAME AUE_NULL /*32*/ -#define AUE_ACCESS 14 /*33*/ -#define AUE_CHFLAGS 303 /*34*/ -#define AUE_FCHFLAGS 304 /*35*/ -#define AUE_SYNC AUE_NULL /*36*/ -#define AUE_KILL 15 /*37*/ -#define AUE_O_STAT AUE_STAT /*38*/ -#define AUE_GETPPID AUE_NULL /*39*/ -#define AUE_O_LSTAT AUE_LSTAT /*40*/ -#define AUE_DUP AUE_NULL /*41*/ -#define AUE_PIPE 185 /*42*/ -#define AUE_GETEGID AUE_NULL /*43*/ -#define AUE_PROFILE 305 /*44*/ -#define AUE_KTRACE AUE_NULL /*45*/ -#define AUE_REBOOT 308 -#define AUE_SIGACTION AUE_NULL /*46*/ /*XXX*/ -#define AUE_GETGID AUE_NULL /*47*/ -#define AUE_SIGPROCMASK AUE_NULL /*48*/ /*XXX*/ -#define AUE_GETLOGIN AUE_NULL /*49*/ -#define AUE_SETLOGIN 307 /*50*/ -#define AUE_ACCT 18 /*51*/ -#define AUE_SIGPENDING AUE_NULL /*52*/ /*XXX*/ -#define AUE_SIGALTSTACK AUE_NULL /*53*/ /*XXX*/ -#define AUE_IOCTL 158 /*54*/ -#define AUE_SYSTEMBOOT 113 /*55*/ -#define AUE_REVOKE 309 /*56*/ -#define AUE_SYMLINK 21 /*57*/ -#define AUE_READLINK 22 /*58*/ -#define AUE_EXECVE 23 /*59*/ -#define AUE_UMASK 310 /*60*/ -#define AUE_CHROOT 24 /*61*/ -#define AUE_O_FSTAT AUE_FSTAT /*62*/ - -#define AUE_O_GETPAGESIZE AUE_NULL /*64*/ -#define AUE_MSYNC AUE_NULL /*65*/ -#define AUE_VFORK 25 /*66*/ -#define AUE_O_VREAD AUE_NULL /*67*/ -#define AUE_O_VWRITE AUE_NULL /*68*/ -#define AUE_SBRK AUE_NULL /*69*/ /*EOPNOTSUP*/ -#define AUE_SSTK AUE_NULL /*70*/ /*EOPNOTSUP*/ -#define AUE_O_MMAP AUE_MMAP /*71*/ -#define AUE_O_VADVISE AUE_NULL /*72*/ -#define AUE_MUNMAP 213 /*73*/ -#define AUE_MPROTECT 311 /*74*/ -#define AUE_MADVISE AUE_NULL /*75*/ -#define AUE_O_VHANGUP AUE_NULL /*76*/ -#define AUE_O_VLIMIT AUE_NULL /*77*/ -#define AUE_MINCORE AUE_NULL /*78*/ -#define AUE_GETGROUPS AUE_NULL /*79*/ -#define AUE_SETGROUPS 26 /*80*/ -#define AUE_GETPGRP AUE_NULL /*81*/ -#define AUE_SETPGRP 27 /*82*/ -#define AUE_SETITIMER AUE_NULL /*83*/ /*XXX*/ -#define AUE_O_WAIT AUE_NULL /*84*/ -#define AUE_SWAPON 28 /*85*/ -#define AUE_GETITIMER AUE_NULL /*86*/ -#define AUE_O_GETHOSTNAME AUE_NULL /*87*/ -#define AUE_O_SETHOSTNAME AUE_SYSCTL /*88*/ -#define AUE_GETDTABLESIZE AUE_NULL /*89*/ -#define AUE_DUP2 AUE_NULL /*90*/ -#define AUE_O_GETDOPT AUE_NULL /*91*/ -#define AUE_FCNTL 30 /*92*/ -#define AUE_SELECT AUE_NULL /*93*/ -#define AUE_O_SETDOPT AUE_NULL /*94*/ -#define AUE_FSYNC AUE_NULL /*95*/ -#define AUE_SETPRIORITY 312 /*96*/ -#define AUE_SOCKET 183 /*97*/ -#define AUE_CONNECT 32 /*98*/ -#define AUE_O_ACCEPT AUE_NULL /*99*/ -#define AUE_GETPRIORITY AUE_NULL /*100*/ -#define AUE_O_SEND AUE_SENDMSG /*101*/ -#define AUE_O_RECV AUE_RECVMSG /*102*/ -#define AUE_SIGRETURN AUE_NULL /*103*/ /*XXX*/ -#define AUE_BIND 34 /*104*/ -#define AUE_SETSOCKOPT 35 /*105*/ -#define AUE_LISTEN AUE_NULL /*106*/ -#define AUE_O_VTIMES AUE_NULL /*107*/ -#define AUE_O_SIGVEC AUE_NULL /*108*/ -#define AUE_O_SIGBLOCK AUE_NULL /*109*/ -#define AUE_O_SIGSETMASK AUE_NULL /*110*/ -#define AUE_SIGSUSPEND AUE_NULL /*111*/ /*XXX*/ -#define AUE_O_SIGSTACK AUE_NULL /*112*/ -#define AUE_O_RECVMSG AUE_RECVMSG /*113*/ -#define AUE_O_SENDMSG AUE_SENDMSG /*114*/ -#define AUE_O_VTRACE AUE_NULL /*115*/ /*36*/ -#define AUE_GETTIMEOFDAY AUE_NULL /*116*/ -#define AUE_GETRUSAGE AUE_NULL /*117*/ -#define AUE_GTSOCKOPT AUE_NULL /*118*/ -#define AUE_O_RESUBA AUE_NULL /*119*/ -#define AUE_READV AUE_NULL /*120*/ -#define AUE_WRITEV AUE_NULL /*121*/ -#define AUE_SETTIMEOFDAY 313 /*122*/ -#define AUE_FCHOWN 38 /*123*/ -#define AUE_FCHMOD 39 /*124*/ -#define AUE_O_RECVFROM AUE_RECVFROM /*125*/ -#define AUE_O_SETREUID AUE_SETEUID /*126*/ /*40*/ -#define AUE_O_SETREGID AUE_SETEGID /*127*/ /*41*/ -#define AUE_RENAME 42 /*128*/ -#define AUE_O_TRUNCATE AUE_TRUNCATE /*129*/ -#define AUE_O_FTRUNCATE AUE_FTRUNCATE /*130*/ -#define AUE_FLOCK 314 /*131*/ -#define AUE_MKFIFO 315 /*132*/ -#define AUE_SENDTO 184 /*133*/ -#define AUE_SHUTDOWN 46 /*134*/ -#define AUE_SOCKETPAIR 317 /*135*/ -#define AUE_MKDIR 47 /*136*/ -#define AUE_RMDIR 48 /*137*/ -#define AUE_UTIMES 49 /*138*/ -#define AUE_FUTIMES 318 /*139*/ -#define AUE_ADJTIME 50 /*140*/ -#define AUE_O_GETPEERNAME AUE_NULL /*141*/ -#define AUE_O_GETHOSTID AUE_NULL /*142*/ -#define AUE_O_SETHOSTID AUE_NULL /*143*/ -#define AUE_O_GETRLIMIT AUE_NULL /*144*/ -#define AUE_O_SETRLIMIT AUE_SETRLIMIT /*145*/ -#define AUE_O_KILLPG AUE_KILL /*146*/ -#define AUE_SETSID 319 /*147*/ -#define AUE_O_SETQUOTA AUE_NULL /*148*/ -#define AUE_O_QUOTA AUE_NULL /*149*/ -#define AUE_O_GETSOCKNAME AUE_NULL /*150*/ -#define AUE_GETPGID AUE_NULL /*151*/ -#define AUE_SETPRIVEXEC 320 /*152*/ -#define AUE_PREAD AUE_NULL /*153*/ -#define AUE_PWRITE AUE_NULL /*154*/ -#define AUE_NFSSVC 321 /*155*/ -#define AUE_O_GETDIRENTRIES AUE_GETDIRENTRIES /*156*/ -#define AUE_STATFS 54 /*157*/ -#define AUE_FSTATFS 55 /*158*/ -#define AUE_UNMOUNT 12 /*159*/ -#define AUE_O_ASYNCDAEMON AUE_NULL /*160*/ -#define AUE_GETFH 322 /*161*/ -#define AUE_O_GETDOMAINNAME AUE_NULL /*162*/ -#define AUE_O_SETDOMAINNAME AUE_SYSCTL /*163*/ -#define AUE_O_PCFS_MOUNT AUE_NULL /*164*/ -#define AUE_QUOTACTL 60 /*165*/ -#define AUE_O_EXPORTFS AUE_NULL /*166*/ -#define AUE_MOUNT 62 /*167*/ -#define AUE_O_USTATE AUE_NULL /*168*/ -#define AUE_TABLE AUE_NULL /*170*/ /*ENOSYS*/ -#define AUE_O_WAIT3 AUE_NULL /*171*/ -#define AUE_O_RPAUSE AUE_NULL /*172*/ -#define AUE_O_GETDENTS AUE_NULL /*174*/ -#define AUE_GCCONTROL AUE_NULL /*175*/ /*ENOSYS*/ -#define AUE_ADDPROFILE 324 /*176*/ - -#define AUE_KDBUGTRACE 325 /*180*/ -#define AUE_SETGID 205 /*181*/ -#define AUE_SETEGID 214 /*182*/ -#define AUE_SETEUID 215 /*183*/ - -#define AUE_STAT 16 /*188*/ -#define AUE_FSTAT 326 /*189*/ -#define AUE_LSTAT 17 /*190*/ -#define AUE_PATHCONF 71 /*191*/ -#define AUE_FPATHCONF 327 /*192*/ -#define AUE_GETRLIMIT AUE_NULL /*194*/ -#define AUE_SETRLIMIT 51 /*195*/ -#define AUE_GETDIRENTRIES 328 /*196*/ -#define AUE_MMAP 210 /*197*/ -#define AUE_SYSCALL AUE_NULL /*198*/ /*ENOSYS*/ -#define AUE_LSEEK AUE_NULL /*199*/ -#define AUE_TRUNCATE 329 /*200*/ -#define AUE_FTRUNCATE 330 /*201*/ -#define AUE_SYSCTL 331 /*202*/ -#define AUE_MLOCK 332 /*203*/ -#define AUE_MUNLOCK 333 /*204*/ -#define AUE_UNDELETE 334 /*205*/ - -#define AUE_MKCOMPLEX AUE_NULL /*216*/ /*XXX*/ -#define AUE_STATV AUE_NULL /*217*/ /*EOPNOTSUPP*/ -#define AUE_LSTATV AUE_NULL /*218*/ /*EOPNOTSUPP*/ -#define AUE_FSTATV AUE_NULL /*219*/ /*EOPNOTSUPP*/ -#define AUE_GETATTRLIST 335 /*220*/ -#define AUE_SETATTRLIST 336 /*221*/ -#define AUE_GETDIRENTRIESATTR 337 /*222*/ -#define AUE_EXCHANGEDATA 338 /*223*/ -#define AUE_CHECKUSERACCESS AUE_ACCESS /*224*/ /* To Be Removed */ -#define AUE_SEARCHFS 339 /*225*/ +#ifndef _BSM_AUDIT_KEVENTS_H_ +#define _BSM_AUDIT_KEVENTS_H_ -#define AUE_DELETE AUE_UNLINK /*226*/ /* reserved */ -#define AUE_COPYFILE 361 /*227*/ /* reserved */ -#define AUE_WATCHEVENT AUE_NULL /*231*/ /* reserved */ -#define AUE_WAITEVENT AUE_NULL /*232*/ /* reserved */ -#define AUE_MODWATCH AUE_NULL /*233*/ /* reserved */ -#define AUE_FSCTL AUE_NULL /*242*/ /* reserved */ +/* + * The reserved event numbers for kernel events are 1...2047 and 43001..44900. + */ +#define AUE_IS_A_KEVENT(e) (((e) > 0 && (e) < 2048) || \ + ((e) > 43000 && (e) < 45000)) -#define AUE_MINHERIT 340 /*250*/ -#define AUE_SEMSYS AUE_NULL /*251*/ /* To Be Removed */ -#define AUE_MSGSYS AUE_NULL /*252*/ /* To Be Removed */ -#define AUE_SHMSYS AUE_NULL /*253*/ -#define AUE_SEMCTL 98 /*254*/ -#define AUE_SEMCTL_GETALL 105 /*254*/ -#define AUE_SEMCTL_GETNCNT 102 /*254*/ -#define AUE_SEMCTL_GETPID 103 /*254*/ -#define AUE_SEMCTL_GETVAL 104 /*254*/ -#define AUE_SEMCTL_GETZCNT 106 /*254*/ -#define AUE_SEMCTL_RMID 99 /*254*/ -#define AUE_SEMCTL_SET 100 /*254*/ -#define AUE_SEMCTL_SETALL 108 /*254*/ -#define AUE_SEMCTL_SETVAL 107 /*254*/ -#define AUE_SEMCTL_STAT 101 /*254*/ -#define AUE_SEMGET 109 /*255*/ -#define AUE_SEMOP 110 /*256*/ -#define AUE_SEMCONFIG 341 /*257*/ -#define AUE_MSGCL AUE_NULL /*258*/ /*EOPNOTSUPP*/ -#define AUE_MSGGET 88 /*259*/ /*88-EOPNOTSUPP*/ -#define AUE_MSGRCV 89 /*261*/ /*89-EOPNOTSUPP*/ -#define AUE_MSGSND 90 /*260*/ /*90-EOPNOTSUPP*/ -#define AUE_SHMAT 96 /*262*/ -#define AUE_SHMCTL 91 /*263*/ -#define AUE_SHMCTL_RMID 92 /*263*/ -#define AUE_SHMCTL_SET 93 /*263*/ -#define AUE_SHMCTL_STAT 94 /*263*/ -#define AUE_SHMDT 97 /*264*/ -#define AUE_SHMGET 95 /*265*/ -#define AUE_SHMOPEN 345 /*266*/ -#define AUE_SHMUNLINK 346 /*267*/ -#define AUE_SEMOPEN 342 /*268*/ -#define AUE_SEMCLOSE 343 /*269*/ -#define AUE_SEMUNLINK 344 /*270*/ -#define AUE_SEMWAIT AUE_NULL /*271*/ -#define AUE_SEMTRYWAIT AUE_NULL /*272*/ -#define AUE_SEMPOST AUE_NULL /*273*/ -#define AUE_SEMGETVALUE AUE_NULL /*274*/ /*ENOSYS*/ -#define AUE_SEMINIT AUE_NULL /*275*/ /*ENOSYS*/ -#define AUE_SEMDESTROY AUE_NULL /*276*/ /*ENOSYS*/ +/* + * Values marked as AUE_NULL are not required to be audited as per CAPP. + * + * Some conflicts exist in the assignment of name to event number mappings + * between BSM implementations. In general, we prefer the OpenSolaris + * definition as we consider Solaris BSM to be authoritative. _DARWIN_ has + * been inserted for the Darwin variants. If necessary, other tags will be + * added in the future. + */ +#define AUE_NULL 0 +#define AUE_EXIT 1 +#define AUE_FORK 2 +#define AUE_FORKALL AUE_FORK /* Solaris-specific. */ +#define AUE_OPEN 3 +#define AUE_CREAT 4 +#define AUE_LINK 5 +#define AUE_UNLINK 6 +#define AUE_DELETE AUE_UNLINK /* Darwin-specific. */ +#define AUE_EXEC 7 +#define AUE_CHDIR 8 +#define AUE_MKNOD 9 +#define AUE_CHMOD 10 +#define AUE_CHOWN 11 +#define AUE_UMOUNT 12 +#define AUE_JUNK 13 /* Solaris-specific. */ +#define AUE_ACCESS 14 +#define AUE_KILL 15 +#define AUE_STAT 16 +#define AUE_LSTAT 17 +#define AUE_ACCT 18 +#define AUE_MCTL 19 /* Solaris-specific. */ +#define AUE_REBOOT 20 /* XXX: Darwin conflict. */ +#define AUE_SYMLINK 21 +#define AUE_READLINK 22 +#define AUE_EXECVE 23 +#define AUE_CHROOT 24 +#define AUE_VFORK 25 +#define AUE_SETGROUPS 26 +#define AUE_SETPGRP 27 +#define AUE_SWAPON 28 +#define AUE_SETHOSTNAME 29 /* XXX: Darwin conflict. */ +#define AUE_FCNTL 30 +#define AUE_SETPRIORITY 31 /* XXX: Darwin conflict. */ +#define AUE_CONNECT 32 +#define AUE_ACCEPT 33 +#define AUE_BIND 34 +#define AUE_SETSOCKOPT 35 +#define AUE_VTRACE 36 /* Solaris-specific. */ +#define AUE_SETTIMEOFDAY 37 /* XXX: Darwin conflict. */ +#define AUE_FCHOWN 38 +#define AUE_FCHMOD 39 +#define AUE_SETREUID 40 +#define AUE_SETREGID 41 +#define AUE_RENAME 42 +#define AUE_TRUNCATE 43 /* XXX: Darwin conflict. */ +#define AUE_FTRUNCATE 44 /* XXX: Darwin conflict. */ +#define AUE_FLOCK 45 /* XXX: Darwin conflict. */ +#define AUE_SHUTDOWN 46 +#define AUE_MKDIR 47 +#define AUE_RMDIR 48 +#define AUE_UTIMES 49 +#define AUE_ADJTIME 50 +#define AUE_SETRLIMIT 51 +#define AUE_KILLPG 52 +#define AUE_NFS_SVC 53 /* XXX: Darwin conflict. */ +#define AUE_STATFS 54 +#define AUE_FSTATFS 55 +#define AUE_UNMOUNT 56 /* XXX: Darwin conflict. */ +#define AUE_ASYNC_DAEMON 57 +#define AUE_NFS_GETFH 58 /* XXX: Darwin conflict. */ +#define AUE_SETDOMAINNAME 59 +#define AUE_QUOTACTL 60 /* XXX: Darwin conflict. */ +#define AUE_EXPORTFS 61 +#define AUE_MOUNT 62 +#define AUE_SEMSYS 63 +#define AUE_MSGSYS 64 +#define AUE_SHMSYS 65 +#define AUE_BSMSYS 66 /* Solaris-specific. */ +#define AUE_RFSSYS 67 /* Solaris-specific. */ +#define AUE_FCHDIR 68 +#define AUE_FCHROOT 69 +#define AUE_VPIXSYS 70 /* Solaris-specific. */ +#define AUE_PATHCONF 71 +#define AUE_OPEN_R 72 +#define AUE_OPEN_RC 73 +#define AUE_OPEN_RT 74 +#define AUE_OPEN_RTC 75 +#define AUE_OPEN_W 76 +#define AUE_OPEN_WC 77 +#define AUE_OPEN_WT 78 +#define AUE_OPEN_WTC 79 +#define AUE_OPEN_RW 80 +#define AUE_OPEN_RWC 81 +#define AUE_OPEN_RWT 82 +#define AUE_OPEN_RWTC 83 +#define AUE_MSGCTL 84 +#define AUE_MSGCTL_RMID 85 +#define AUE_MSGCTL_SET 86 +#define AUE_MSGCTL_STAT 87 +#define AUE_MSGGET 88 +#define AUE_MSGRCV 89 +#define AUE_MSGSND 90 +#define AUE_SHMCTL 91 +#define AUE_SHMCTL_RMID 92 +#define AUE_SHMCTL_SET 93 +#define AUE_SHMCTL_STAT 94 +#define AUE_SHMGET 95 +#define AUE_SHMAT 96 +#define AUE_SHMDT 97 +#define AUE_SEMCTL 98 +#define AUE_SEMCTL_RMID 99 +#define AUE_SEMCTL_SET 100 +#define AUE_SEMCTL_STAT 101 +#define AUE_SEMCTL_GETNCNT 102 +#define AUE_SEMCTL_GETPID 103 +#define AUE_SEMCTL_GETVAL 104 +#define AUE_SEMCTL_GETALL 105 +#define AUE_SEMCTL_GETZCNT 106 +#define AUE_SEMCTL_SETVAL 107 +#define AUE_SEMCTL_SETALL 108 +#define AUE_SEMGET 109 +#define AUE_SEMOP 110 +#define AUE_CORE 111 /* Solaris-specific, currently. */ +#define AUE_CLOSE 112 +#define AUE_SYSTEMBOOT 113 /* Solaris-specific. */ +#define AUE_ASYNC_DAEMON_EXIT 114 /* Solaris-specific. */ +#define AUE_NFSSVC_EXIT 115 /* Solaris-specific. */ +#define AUE_WRITEL 128 /* Solaris-specific. */ +#define AUE_WRITEVL 129 /* Solaris-specific. */ +#define AUE_GETAUID 130 +#define AUE_SETAUID 131 +#define AUE_GETAUDIT 132 +#define AUE_SETAUDIT 133 +#define AUE_GETUSERAUDIT 134 /* Solaris-specific. */ +#define AUE_SETUSERAUDIT 135 /* Solaris-specific. */ +#define AUE_AUDITSVC 136 /* Solaris-specific. */ +#define AUE_AUDITUSER 137 /* Solaris-specific. */ +#define AUE_AUDITON 138 +#define AUE_AUDITON_GTERMID 139 /* Solaris-specific. */ +#define AUE_AUDITON_STERMID 140 /* Solaris-specific. */ +#define AUE_AUDITON_GPOLICY 141 +#define AUE_AUDITON_SPOLICY 142 +#define AUE_AUDITON_GQCTRL 145 +#define AUE_AUDITON_SQCTRL 146 +#define AUE_GETKERNSTATE 147 /* Solaris-specific. */ +#define AUE_SETKERNSTATE 148 /* Solaris-specific. */ +#define AUE_GETPORTAUDIT 149 /* Solaris-specific. */ +#define AUE_AUDITSTAT 150 /* Solaris-specific. */ +#define AUE_REVOKE 151 +#define AUE_MAC 152 /* Solaris-specific. */ +#define AUE_ENTERPROM 153 /* Solaris-specific. */ +#define AUE_EXITPROM 154 /* Solaris-specific. */ +#define AUE_IFLOAT 155 /* Solaris-specific. */ +#define AUE_PFLOAT 156 /* Solaris-specific. */ +#define AUE_UPRIV 157 /* Solaris-specific. */ +#define AUE_IOCTL 158 +#define AUE_SOCKET 183 +#define AUE_SENDTO 184 +#define AUE_PIPE 185 +#define AUE_SOCKETPAIR 186 /* XXX: Darwin conflict. */ +#define AUE_SEND 187 +#define AUE_SENDMSG 188 +#define AUE_RECV 189 +#define AUE_RECVMSG 190 +#define AUE_RECVFROM 191 +#define AUE_READ 192 +#define AUE_GETDENTS 193 +#define AUE_LSEEK 194 +#define AUE_WRITE 195 +#define AUE_WRITEV 196 +#define AUE_NFS 197 /* Solaris-specific. */ +#define AUE_READV 198 +#define AUE_OSTAT 199 /* Solaris-specific. */ +#define AUE_SETUID 200 /* XXXRW: Solaris old setuid? */ +#define AUE_STIME 201 /* XXXRW: Solaris old stime? */ +#define AUE_UTIME 202 /* XXXRW: Solaris old utime? */ +#define AUE_NICE 203 /* XXXRW: Solaris old nice? */ +#define AUE_OSETPGRP 204 /* Solaris-specific. */ +#define AUE_SETGID 205 +#define AUE_READL 206 /* Solaris-specific. */ +#define AUE_READVL 207 /* Solaris-specific. */ +#define AUE_FSTAT 208 +#define AUE_DUP2 209 +#define AUE_MMAP 210 +#define AUE_AUDIT 211 +#define AUE_PRIOCNTLSYS 212 /* Solaris-specific. */ +#define AUE_MUNMAP 213 +#define AUE_SETEGID 214 +#define AUE_SETEUID 215 +#define AUE_PUTMSG 216 /* Solaris-specific. */ +#define AUE_GETMSG 217 /* Solaris-specific. */ +#define AUE_PUTPMSG 218 /* Solaris-specific. */ +#define AUE_GETPMSG 219 /* Solaris-specific. */ +#define AUE_AUDITSYS 220 /* Solaris-specific. */ +#define AUE_AUDITON_GETKMASK 221 +#define AUE_AUDITON_SETKMASK 222 +#define AUE_AUDITON_GETCWD 223 +#define AUE_AUDITON_GETCAR 224 +#define AUE_AUDITON_GETSTAT 225 +#define AUE_AUDITON_SETSTAT 226 +#define AUE_AUDITON_SETUMASK 227 +#define AUE_AUDITON_SETSMASK 228 +#define AUE_AUDITON_GETCOND 229 +#define AUE_AUDITON_SETCOND 230 +#define AUE_AUDITON_GETCLASS 231 +#define AUE_AUDITON_SETCLASS 232 +#define AUE_FUSERS 233 /* Solaris-specific; also UTSSYS? */ +#define AUE_STATVFS 234 +#define AUE_XSTAT 235 /* Solaris-specific. */ +#define AUE_LXSTAT 236 /* Solaris-specific. */ +#define AUE_LCHOWN 237 +#define AUE_MEMCNTL 238 /* Solaris-specific. */ +#define AUE_SYSINFO 239 /* Solaris-specific. */ +#define AUE_XMKNOD 240 /* Solaris-specific. */ +#define AUE_FORK1 241 +#define AUE_MODCTL 242 /* Solaris-specific. */ +#define AUE_MODLOAD 243 +#define AUE_MODUNLOAD 244 +#define AUE_MODCONFIG 245 /* Solaris-specific. */ +#define AUE_MODADDMAJ 246 /* Solaris-specific. */ +#define AUE_SOCKACCEPT 247 /* Solaris-specific. */ +#define AUE_SOCKCONNECT 248 /* Solaris-specific. */ +#define AUE_SOCKSEND 249 /* Solaris-specific. */ +#define AUE_SOCKRECEIVE 250 /* Solaris-specific. */ +#define AUE_ACLSET 251 +#define AUE_FACLSET 252 +#define AUE_DOORFS 253 /* Solaris-specific. */ +#define AUE_DOORFS_DOOR_CALL 254 /* Solaris-specific. */ +#define AUE_DOORFS_DOOR_RETURN 255 /* Solaris-specific. */ +#define AUE_DOORFS_DOOR_CREATE 256 /* Solaris-specific. */ +#define AUE_DOORFS_DOOR_REVOKE 257 /* Solaris-specific. */ +#define AUE_DOORFS_DOOR_INFO 258 /* Solaris-specific. */ +#define AUE_DOORFS_DOOR_CRED 259 /* Solaris-specific. */ +#define AUE_DOORFS_DOOR_BIND 260 /* Solaris-specific. */ +#define AUE_DOORFS_DOOR_UNBIND 261 /* Solaris-specific. */ +#define AUE_P_ONLINE 262 /* Solaris-specific. */ +#define AUE_PROCESSOR_BIND 263 /* Solaris-specific. */ +#define AUE_INST_SYNC 264 /* Solaris-specific. */ +#define AUE_SOCKCONFIG 265 /* Solaris-specific. */ +#define AUE_SETAUDIT_ADDR 266 +#define AUE_GETAUDIT_ADDR 267 +#define AUE_UMOUNT2 268 /* Solaris-specific. */ +#define AUE_FSAT 269 /* Solaris-specific. */ +#define AUE_OPENAT_R 270 +#define AUE_OPENAT_RC 271 +#define AUE_OPENAT_RT 272 +#define AUE_OPENAT_RTC 273 +#define AUE_OPENAT_W 274 +#define AUE_OPENAT_WC 275 +#define AUE_OPENAT_WT 276 +#define AUE_OPENAT_WTC 277 +#define AUE_OPENAT_RW 278 +#define AUE_OPENAT_RWC 279 +#define AUE_OPENAT_RWT 280 +#define AUE_OPENAT_RWTC 281 +#define AUE_RENAMEAT 282 +#define AUE_FSTATAT 283 +#define AUE_FCHOWNAT 284 +#define AUE_FUTIMESAT 285 +#define AUE_UNLINKAT 286 +#define AUE_CLOCK_SETTIME 287 +#define AUE_NTP_ADJTIME 288 +#define AUE_SETPPRIV 289 /* Solaris-specific. */ +#define AUE_MODDEVPLCY 290 /* Solaris-specific. */ +#define AUE_MODADDPRIV 291 /* Solaris-specific. */ +#define AUE_CRYPTOADM 292 /* Solaris-specific. */ +#define AUE_CONFIGKSSL 293 /* Solaris-specific. */ +#define AUE_BRANDSYS 294 /* Solaris-specific. */ +#define AUE_PF_POLICY_ADDRULE 295 /* Solaris-specific. */ +#define AUE_PF_POLICY_DELRULE 296 /* Solaris-specific. */ +#define AUE_PF_POLICY_CLONE 297 /* Solaris-specific. */ +#define AUE_PF_POLICY_FLIP 298 /* Solaris-specific. */ +#define AUE_PF_POLICY_FLUSH 299 /* Solaris-specific. */ +#define AUE_PF_POLICY_ALGS 300 /* Solaris-specific. */ +#define AUE_PORTFS 301 /* Solaris-specific. */ -#define AUE_GETSID AUE_NULL /*310*/ +/* + * Events added for Apple Darwin that potentially collide with future Solaris + * BSM events. These are assigned AUE_DARWIN prefixes, and are deprecated in + * new trails. Systems generating these events should switch to the new + * identifiers that avoid colliding with the Solaris identifier space. + */ +#define AUE_DARWIN_GETFSSTAT 301 +#define AUE_DARWIN_PTRACE 302 +#define AUE_DARWIN_CHFLAGS 303 +#define AUE_DARWIN_FCHFLAGS 304 +#define AUE_DARWIN_PROFILE 305 +#define AUE_DARWIN_KTRACE 306 +#define AUE_DARWIN_SETLOGIN 307 +#define AUE_DARWIN_REBOOT 308 +#define AUE_DARWIN_REVOKE 309 +#define AUE_DARWIN_UMASK 310 +#define AUE_DARWIN_MPROTECT 311 +#define AUE_DARWIN_SETPRIORITY 312 +#define AUE_DARWIN_SETTIMEOFDAY 313 +#define AUE_DARWIN_FLOCK 314 +#define AUE_DARWIN_MKFIFO 315 +#define AUE_DARWIN_POLL 316 +#define AUE_DARWIN_SOCKETPAIR 317 +#define AUE_DARWIN_FUTIMES 318 +#define AUE_DARWIN_SETSID 319 +#define AUE_DARWIN_SETPRIVEXEC 320 /* Darwin-specific. */ +#define AUE_DARWIN_NFSSVC 321 +#define AUE_DARWIN_GETFH 322 +#define AUE_DARWIN_QUOTACTL 323 +#define AUE_DARWIN_ADDPROFILE 324 /* Darwin-specific. */ +#define AUE_DARWIN_KDEBUGTRACE 325 /* Darwin-specific. */ +#define AUE_DARWIN_KDBUGTRACE AUE_KDEBUGTRACE +#define AUE_DARWIN_FSTAT 326 +#define AUE_DARWIN_FPATHCONF 327 +#define AUE_DARWIN_GETDIRENTRIES 328 +#define AUE_DARWIN_TRUNCATE 329 +#define AUE_DARWIN_FTRUNCATE 330 +#define AUE_DARWIN_SYSCTL 331 +#define AUE_DARWIN_MLOCK 332 +#define AUE_DARWIN_MUNLOCK 333 +#define AUE_DARWIN_UNDELETE 334 +#define AUE_DARWIN_GETATTRLIST 335 /* Darwin-specific. */ +#define AUE_DARWIN_SETATTRLIST 336 /* Darwin-specific. */ +#define AUE_DARWIN_GETDIRENTRIESATTR 337 /* Darwin-specific. */ +#define AUE_DARWIN_EXCHANGEDATA 338 /* Darwin-specific. */ +#define AUE_DARWIN_SEARCHFS 339 /* Darwin-specific. */ +#define AUE_DARWIN_MINHERIT 340 +#define AUE_DARWIN_SEMCONFIG 341 +#define AUE_DARWIN_SEMOPEN 342 +#define AUE_DARWIN_SEMCLOSE 343 +#define AUE_DARWIN_SEMUNLINK 344 +#define AUE_DARWIN_SHMOPEN 345 +#define AUE_DARWIN_SHMUNLINK 346 +#define AUE_DARWIN_LOADSHFILE 347 /* Darwin-specific. */ +#define AUE_DARWIN_RESETSHFILE 348 /* Darwin-specific. */ +#define AUE_DARWIN_NEWSYSTEMSHREG 349 /* Darwin-specific. */ +#define AUE_DARWIN_PTHREADKILL 350 /* Darwin-specific. */ +#define AUE_DARWIN_PTHREADSIGMASK 351 /* Darwin-specific. */ +#define AUE_DARWIN_AUDITCTL 352 +#define AUE_DARWIN_RFORK 353 +#define AUE_DARWIN_LCHMOD 354 +#define AUE_DARWIN_SWAPOFF 355 +#define AUE_DARWIN_INITPROCESS 356 /* Darwin-specific. */ +#define AUE_DARWIN_MAPFD 357 /* Darwin-specific. */ +#define AUE_DARWIN_TASKFORPID 358 /* Darwin-specific. */ +#define AUE_DARWIN_PIDFORTASK 359 /* Darwin-specific. */ +#define AUE_DARWIN_SYSCTL_NONADMIN 360 +#define AUE_DARWIN_COPYFILE 361 /* Darwin-specific. */ -#define AUE_MLOCKALL AUE_NULL /*324*/ /*ENOSYS*/ -#define AUE_MUNLOCKALL AUE_NULL /*325*/ /*ENOSYS*/ +/* + * Audit event identifiers added as part of OpenBSM, generally corresponding + * to events in FreeBSD, Darwin, and Linux that were not present in Solaris. + * These often duplicate events added to the Solaris set by Darwin, but use + * event identifiers in a higher range in order to avoid colliding with + * future Solaris additions. + * + * If an event in this section is later added to Solaris, we prefer the + * Solaris event identifier, and add _OPENBSM_ to the OpenBSM-specific + * identifier so that old trails can still be processed, but new trails use + * the Solaris identifier. + */ +#define AUE_GETFSSTAT 43001 +#define AUE_PTRACE 43002 +#define AUE_CHFLAGS 43003 +#define AUE_FCHFLAGS 43004 +#define AUE_PROFILE 43005 +#define AUE_KTRACE 43006 +#define AUE_SETLOGIN 43007 +#define AUE_OPENBSM_REVOKE 43008 /* Solaris event now preferred. */ +#define AUE_UMASK 43009 +#define AUE_MPROTECT 43010 +#define AUE_MKFIFO 43011 +#define AUE_POLL 43012 +#define AUE_FUTIMES 43013 +#define AUE_SETSID 43014 +#define AUE_SETPRIVEXEC 43015 /* Darwin-specific. */ +#define AUE_ADDPROFILE 43016 /* Darwin-specific. */ +#define AUE_KDEBUGTRACE 43017 /* Darwin-specific. */ +#define AUE_KDBUGTRACE AUE_KDEBUGTRACE +#define AUE_OPENBSM_FSTAT 43018 /* Solaris event now preferred. */ +#define AUE_FPATHCONF 43019 +#define AUE_GETDIRENTRIES 43020 +#define AUE_SYSCTL 43021 +#define AUE_MLOCK 43022 +#define AUE_MUNLOCK 43023 +#define AUE_UNDELETE 43024 +#define AUE_GETATTRLIST 43025 /* Darwin-specific. */ +#define AUE_SETATTRLIST 43026 /* Darwin-specific. */ +#define AUE_GETDIRENTRIESATTR 43027 /* Darwin-specific. */ +#define AUE_EXCHANGEDATA 43028 /* Darwin-specific. */ +#define AUE_SEARCHFS 43029 /* Darwin-specific. */ +#define AUE_MINHERIT 43030 +#define AUE_SEMCONFIG 43031 +#define AUE_SEMOPEN 43032 +#define AUE_SEMCLOSE 43033 +#define AUE_SEMUNLINK 43034 +#define AUE_SHMOPEN 43035 +#define AUE_SHMUNLINK 43036 +#define AUE_LOADSHFILE 43037 /* Darwin-specific. */ +#define AUE_RESETSHFILE 43038 /* Darwin-specific. */ +#define AUE_NEWSYSTEMSHREG 43039 /* Darwin-specific. */ +#define AUE_PTHREADKILL 43040 /* Darwin-specific. */ +#define AUE_PTHREADSIGMASK 43041 /* Darwin-specific. */ +#define AUE_AUDITCTL 43042 +#define AUE_RFORK 43043 +#define AUE_LCHMOD 43044 +#define AUE_SWAPOFF 43045 +#define AUE_INITPROCESS 43046 /* Darwin-specific. */ +#define AUE_MAPFD 43047 /* Darwin-specific. */ +#define AUE_TASKFORPID 43048 /* Darwin-specific. */ +#define AUE_PIDFORTASK 43049 /* Darwin-specific. */ +#define AUE_SYSCTL_NONADMIN 43050 +#define AUE_COPYFILE 43051 /* Darwin-specific. */ -#define AUE_ISSETUGID AUE_NULL /*327*/ -#define AUE_PTHREADKILL 350 /*328*/ -#define AUE_PTHREADSIGMASK 351 /*329*/ -#define AUE_SIGWAIT AUE_NULL /*330*/ /*XXX*/ -#define AUE_SWAPOFF 355 -#define AUE_INITPROCESS 356 -#define AUE_MAPFD 357 -#define AUE_TASKNAMEFORPID AUE_NULL -#define AUE_TASKFORPID 358 -#define AUE_PIDFORTASK 359 -#define AUE_SYSCTL_NONADMIN 360 +/* + * Events added to OpenBSM for FreeBSD and Linux; may also be used by Darwin + * in the future. + */ +#define AUE_LUTIMES 43052 +#define AUE_LCHFLAGS 43053 /* FreeBSD-specific. */ +#define AUE_SENDFILE 43054 /* BSD/Linux-specific. */ +#define AUE_USELIB 43055 /* Linux-specific. */ +#define AUE_GETRESUID 43056 +#define AUE_SETRESUID 43057 +#define AUE_GETRESGID 43058 +#define AUE_SETRESGID 43059 +#define AUE_WAIT4 43060 /* FreeBSD-specific. */ +#define AUE_LGETFH 43061 /* FreeBSD-specific. */ +#define AUE_FHSTATFS 43062 /* FreeBSD-specific. */ +#define AUE_FHOPEN 43063 /* FreeBSD-specific. */ +#define AUE_FHSTAT 43064 /* FreeBSD-specific. */ +#define AUE_JAIL 43065 /* FreeBSD-specific. */ +#define AUE_EACCESS 43066 /* FreeBSD-specific. */ +#define AUE_KQUEUE 43067 /* FreeBSD-specific. */ +#define AUE_KEVENT 43068 /* FreeBSD-specific. */ +#define AUE_FSYNC 43069 +#define AUE_NMOUNT 43070 /* FreeBSD-specific. */ +#define AUE_BDFLUSH 43071 /* Linux-specific. */ +#define AUE_SETFSUID 43072 /* Linux-specific. */ +#define AUE_SETFSGID 43073 /* Linux-specific. */ +#define AUE_PERSONALITY 43074 /* Linux-specific. */ +#define AUE_SCHED_GETSCHEDULER 43075 /* POSIX.1b. */ +#define AUE_SCHED_SETSCHEDULER 43076 /* POSIX.1b. */ +#define AUE_PRCTL 43077 /* Linux-specific. */ +#define AUE_GETCWD 43078 /* FreeBSD/Linux-specific. */ +#define AUE_CAPGET 43079 /* Linux-specific. */ +#define AUE_CAPSET 43080 /* Linux-specific. */ +#define AUE_PIVOT_ROOT 43081 /* Linux-specific. */ +#define AUE_RTPRIO 43082 /* FreeBSD-specific. */ +#define AUE_SCHED_GETPARAM 43083 /* POSIX.1b. */ +#define AUE_SCHED_SETPARAM 43084 /* POSIX.1b. */ +#define AUE_SCHED_GET_PRIORITY_MAX 43085 /* POSIX.1b. */ +#define AUE_SCHED_GET_PRIORITY_MIN 43086 /* POSIX.1b. */ +#define AUE_SCHED_RR_GET_INTERVAL 43087 /* POSIX.1b. */ +#define AUE_ACL_GET_FILE 43088 /* FreeBSD. */ +#define AUE_ACL_SET_FILE 43089 /* FreeBSD. */ +#define AUE_ACL_GET_FD 43090 /* FreeBSD. */ +#define AUE_ACL_SET_FD 43091 /* FreeBSD. */ +#define AUE_ACL_DELETE_FILE 43092 /* FreeBSD. */ +#define AUE_ACL_DELETE_FD 43093 /* FreeBSD. */ +#define AUE_ACL_CHECK_FILE 43094 /* FreeBSD. */ +#define AUE_ACL_CHECK_FD 43095 /* FreeBSD. */ +#define AUE_ACL_GET_LINK 43096 /* FreeBSD. */ +#define AUE_ACL_SET_LINK 43097 /* FreeBSD. */ +#define AUE_ACL_DELETE_LINK 43098 /* FreeBSD. */ +#define AUE_ACL_CHECK_LINK 43099 /* FreeBSD. */ +#define AUE_SYSARCH 43100 /* FreeBSD. */ +#define AUE_EXTATTRCTL 43101 /* FreeBSD. */ +#define AUE_EXTATTR_GET_FILE 43102 /* FreeBSD. */ +#define AUE_EXTATTR_SET_FILE 43103 /* FreeBSD. */ +#define AUE_EXTATTR_LIST_FILE 43104 /* FreeBSD. */ +#define AUE_EXTATTR_DELETE_FILE 43105 /* FreeBSD. */ +#define AUE_EXTATTR_GET_FD 43106 /* FreeBSD. */ +#define AUE_EXTATTR_SET_FD 43107 /* FreeBSD. */ +#define AUE_EXTATTR_LIST_FD 43108 /* FreeBSD. */ +#define AUE_EXTATTR_DELETE_FD 43109 /* FreeBSD. */ +#define AUE_EXTATTR_GET_LINK 43110 /* FreeBSD. */ +#define AUE_EXTATTR_SET_LINK 43111 /* FreeBSD. */ +#define AUE_EXTATTR_LIST_LINK 43112 /* FreeBSD. */ +#define AUE_EXTATTR_DELETE_LINK 43113 /* FreeBSD. */ +#define AUE_KENV 43114 /* FreeBSD. */ +#define AUE_JAIL_ATTACH 43115 /* FreeBSD. */ +#define AUE_SYSCTL_WRITE 43116 /* FreeBSD. */ +#define AUE_IOPERM 43117 /* Linux. */ +#define AUE_READDIR 43118 /* Linux. */ +#define AUE_IOPL 43119 /* Linux. */ +#define AUE_VM86 43120 /* Linux. */ +#define AUE_MAC_GET_PROC 43121 /* FreeBSD/Darwin. */ +#define AUE_MAC_SET_PROC 43122 /* FreeBSD/Darwin. */ +#define AUE_MAC_GET_FD 43123 /* FreeBSD/Darwin. */ +#define AUE_MAC_GET_FILE 43124 /* FreeBSD/Darwin. */ +#define AUE_MAC_SET_FD 43125 /* FreeBSD/Darwin. */ +#define AUE_MAC_SET_FILE 43126 /* FreeBSD/Darwin. */ +#define AUE_MAC_SYSCALL 43127 /* FreeBSD. */ +#define AUE_MAC_GET_PID 43128 /* FreeBSD/Darwin. */ +#define AUE_MAC_GET_LINK 43129 /* FreeBSD/Darwin. */ +#define AUE_MAC_SET_LINK 43130 /* FreeBSD/Darwin. */ +#define AUE_MAC_EXECVE 43131 /* FreeBSD/Darwin. */ +#define AUE_GETPATH_FROMFD 43132 /* FreeBSD. */ +#define AUE_GETPATH_FROMADDR 43133 /* FreeBSD. */ +#define AUE_MQ_OPEN 43134 /* FreeBSD. */ +#define AUE_MQ_SETATTR 43135 /* FreeBSD. */ +#define AUE_MQ_TIMEDRECEIVE 43136 /* FreeBSD. */ +#define AUE_MQ_TIMEDSEND 43137 /* FreeBSD. */ +#define AUE_MQ_NOTIFY 43138 /* FreeBSD. */ +#define AUE_MQ_UNLINK 43139 /* FreeBSD. */ +#define AUE_LISTEN 43140 /* FreeBSD/Darwin/Linux. */ +#define AUE_MLOCKALL 43141 /* FreeBSD. */ +#define AUE_MUNLOCKALL 43142 /* FreeBSD. */ +#define AUE_CLOSEFROM 43143 /* FreeBSD. */ +#define AUE_FEXECVE 43144 /* FreeBSD. */ +#define AUE_FACCESSAT 43145 /* FreeBSD. */ +#define AUE_FCHMODAT 43146 /* FreeBSD. */ +#define AUE_LINKAT 43147 /* FreeBSD. */ +#define AUE_MKDIRAT 43148 /* FreeBSD. */ +#define AUE_MKFIFOAT 43149 /* FreeBSD. */ +#define AUE_MKNODAT 43150 /* FreeBSD. */ +#define AUE_READLINKAT 43151 /* FreeBSD. */ +#define AUE_SYMLINKAT 43152 /* FreeBSD. */ +#define AUE_MAC_GETFSSTAT 43153 /* Darwin. */ +#define AUE_MAC_GET_MOUNT 43154 /* Darwin. */ +#define AUE_MAC_GET_LCID 43155 /* Darwin. */ +#define AUE_MAC_GET_LCTX 43156 /* Darwin. */ +#define AUE_MAC_SET_LCTX 43157 /* Darwin. */ +#define AUE_MAC_MOUNT 43158 /* Darwin. */ +#define AUE_GETLCID 43159 /* Darwin. */ +#define AUE_SETLCID 43160 /* Darwin. */ +#define AUE_TASKNAMEFORPID 43161 /* Darwin. */ +#define AUE_ACCESS_EXTENDED 43162 /* Darwin. */ +#define AUE_CHMOD_EXTENDED 43163 /* Darwin. */ +#define AUE_FCHMOD_EXTENDED 43164 /* Darwin. */ +#define AUE_FSTAT_EXTENDED 43165 /* Darwin. */ +#define AUE_LSTAT_EXTENDED 43166 /* Darwin. */ +#define AUE_MKDIR_EXTENDED 43167 /* Darwin. */ +#define AUE_MKFIFO_EXTENDED 43168 /* Darwin. */ +#define AUE_OPEN_EXTENDED 43169 /* Darwin. */ +#define AUE_OPEN_EXTENDED_R 43170 /* Darwin. */ +#define AUE_OPEN_EXTENDED_RC 43171 /* Darwin. */ +#define AUE_OPEN_EXTENDED_RT 43172 /* Darwin. */ +#define AUE_OPEN_EXTENDED_RTC 43173 /* Darwin. */ +#define AUE_OPEN_EXTENDED_W 43174 /* Darwin. */ +#define AUE_OPEN_EXTENDED_WC 43175 /* Darwin. */ +#define AUE_OPEN_EXTENDED_WT 43176 /* Darwin. */ +#define AUE_OPEN_EXTENDED_WTC 43177 /* Darwin. */ +#define AUE_OPEN_EXTENDED_RW 43178 /* Darwin. */ +#define AUE_OPEN_EXTENDED_RWC 43179 /* Darwin. */ +#define AUE_OPEN_EXTENDED_RWT 43180 /* Darwin. */ +#define AUE_OPEN_EXTENDED_RWTC 43181 /* Darwin. */ +#define AUE_STAT_EXTENDED 43182 /* Darwin. */ +#define AUE_UMASK_EXTENDED 43183 /* Darwin. */ +#define AUE_OPENAT 43184 /* FreeBSD. */ +#define AUE_POSIX_OPENPT 43185 /* FreeBSD. */ +#define AUE_CAP_NEW 43186 /* TrustedBSD. */ +#define AUE_CAP_GETRIGHTS 43187 /* TrustedBSD. */ +#define AUE_CAP_ENTER 43188 /* TrustedBSD. */ +#define AUE_CAP_GETMODE 43189 /* TrustedBSD. */ +#define AUE_POSIX_SPAWN 43190 /* Darwin. */ +#define AUE_FSGETPATH 43191 /* Darwin. */ +#define AUE_PREAD 43192 /* Darwin/FreeBSD. */ +#define AUE_PWRITE 43193 /* Darwin/FreeBSD. */ +#define AUE_FSCTL 43194 /* Darwin. */ +#define AUE_FFSCTL 43195 /* Darwin. */ -// BSM events for security system calls -#define AUE_MAC_GET_PROC 400 -#define AUE_MAC_SET_PROC 401 -#define AUE_MAC_GET_PID 402 -#define AUE_MAC_SET_FILE 403 -#define AUE_MAC_GET_FILE 404 -#define AUE_MAC_SET_LINK 405 -#define AUE_MAC_GET_LINK 406 -#define AUE_MAC_SET_FD 407 -#define AUE_MAC_GET_FD 408 -#define AUE_MAC_EXECVE 409 -#define AUE_MAC_SYSCALL 410 -#define AUE_MAC_GET_LCID 411 -#define AUE_MAC_GET_LCTX 412 -#define AUE_MAC_SET_LCTX 413 -#define AUE_SETLCID 414 -#define AUE_GETLCID 415 -#define AUE_MAC_MOUNT 416 -#define AUE_MAC_GET_MOUNT 417 -#define AUE_MAC_GETFSSTAT 418 - -// BSM events for extended attributes -#define AUE_EXTATTR_SET_FILE 451 -#define AUE_EXTATTR_GET_FILE 452 -#define AUE_EXTATTR_DELETE_FILE 453 -#define AUE_EXTATTR_LIST_FILE 454 -#define AUE_EXTATTR_SET_LINK 455 -#define AUE_EXTATTR_GET_LINK 456 -#define AUE_EXTATTR_DELETE_LINK 457 -#define AUE_EXTATTR_LIST_LINK 458 -#define AUE_EXTATTR_SET_FD 459 -#define AUE_EXTATTR_GET_FD 460 -#define AUE_EXTATTR_DELETE_FD 461 -#define AUE_EXTATTR_LIST_FD 462 +/* + * Darwin BSM uses a number of AUE_O_* definitions, which are aliased to the + * normal Solaris BSM identifiers. _O_ refers to it being an old, or compat + * interface. In most cases, Darwin has never implemented these system calls + * but picked up the fields in their system call table from their FreeBSD + * import. Happily, these have different names than the AUE_O* definitions + * in Solaris BSM. + */ +#define AUE_O_CREAT AUE_OPEN_RWTC /* Darwin */ +#define AUE_O_EXECVE AUE_NULL /* Darwin */ +#define AUE_O_SBREAK AUE_NULL /* Darwin */ +#define AUE_O_LSEEK AUE_NULL /* Darwin */ +#define AUE_O_MOUNT AUE_NULL /* Darwin */ +#define AUE_O_UMOUNT AUE_NULL /* Darwin */ +#define AUE_O_STAT AUE_STAT /* Darwin */ +#define AUE_O_LSTAT AUE_LSTAT /* Darwin */ +#define AUE_O_FSTAT AUE_FSTAT /* Darwin */ +#define AUE_O_GETPAGESIZE AUE_NULL /* Darwin */ +#define AUE_O_VREAD AUE_NULL /* Darwin */ +#define AUE_O_VWRITE AUE_NULL /* Darwin */ +#define AUE_O_MMAP AUE_MMAP /* Darwin */ +#define AUE_O_VADVISE AUE_NULL /* Darwin */ +#define AUE_O_VHANGUP AUE_NULL /* Darwin */ +#define AUE_O_VLIMIT AUE_NULL /* Darwin */ +#define AUE_O_WAIT AUE_NULL /* Darwin */ +#define AUE_O_GETHOSTNAME AUE_NULL /* Darwin */ +#define AUE_O_SETHOSTNAME AUE_SYSCTL /* Darwin */ +#define AUE_O_GETDOPT AUE_NULL /* Darwin */ +#define AUE_O_SETDOPT AUE_NULL /* Darwin */ +#define AUE_O_ACCEPT AUE_NULL /* Darwin */ +#define AUE_O_SEND AUE_SENDMSG /* Darwin */ +#define AUE_O_RECV AUE_RECVMSG /* Darwin */ +#define AUE_O_VTIMES AUE_NULL /* Darwin */ +#define AUE_O_SIGVEC AUE_NULL /* Darwin */ +#define AUE_O_SIGBLOCK AUE_NULL /* Darwin */ +#define AUE_O_SIGSETMASK AUE_NULL /* Darwin */ +#define AUE_O_SIGSTACK AUE_NULL /* Darwin */ +#define AUE_O_RECVMSG AUE_RECVMSG /* Darwin */ +#define AUE_O_SENDMSG AUE_SENDMSG /* Darwin */ +#define AUE_O_VTRACE AUE_NULL /* Darwin */ +#define AUE_O_RESUBA AUE_NULL /* Darwin */ +#define AUE_O_RECVFROM AUE_RECVFROM /* Darwin */ +#define AUE_O_SETREUID AUE_SETREUID /* Darwin */ +#define AUE_O_SETREGID AUE_SETREGID /* Darwin */ +#define AUE_O_GETDIRENTRIES AUE_GETDIRENTRIES /* Darwin */ +#define AUE_O_TRUNCATE AUE_TRUNCATE /* Darwin */ +#define AUE_O_FTRUNCATE AUE_FTRUNCATE /* Darwin */ +#define AUE_O_GETPEERNAME AUE_NULL /* Darwin */ +#define AUE_O_GETHOSTID AUE_NULL /* Darwin */ +#define AUE_O_SETHOSTID AUE_NULL /* Darwin */ +#define AUE_O_GETRLIMIT AUE_NULL /* Darwin */ +#define AUE_O_SETRLIMIT AUE_SETRLIMIT /* Darwin */ +#define AUE_O_KILLPG AUE_KILL /* Darwin */ +#define AUE_O_SETQUOTA AUE_NULL /* Darwin */ +#define AUE_O_QUOTA AUE_NULL /* Darwin */ +#define AUE_O_GETSOCKNAME AUE_NULL /* Darwin */ +#define AUE_O_GETDIREENTRIES AUE_GETDIREENTRIES /* Darwin */ +#define AUE_O_ASYNCDAEMON AUE_NULL /* Darwin */ +#define AUE_O_GETDOMAINNAME AUE_NULL /* Darwin */ +#define AUE_O_SETDOMAINNAME AUE_SYSCTL /* Darwin */ +#define AUE_O_PCFS_MOUNT AUE_NULL /* Darwin */ +#define AUE_O_EXPORTFS AUE_NULL /* Darwin */ +#define AUE_O_USTATE AUE_NULL /* Darwin */ +#define AUE_O_WAIT3 AUE_NULL /* Darwin */ +#define AUE_O_RPAUSE AUE_NULL /* Darwin */ +#define AUE_O_GETDENTS AUE_NULL /* Darwin */ -// BSM events - Have to identify which ones are relevant to MacOSX -#define AUE_ACLSET 251 -#define AUE_AUDIT 211 -#define AUE_AUDITON 138 -#define AUE_AUDITON_GETCAR 224 -#define AUE_AUDITON_GETCLASS 231 -#define AUE_AUDITON_GETCOND 229 -#define AUE_AUDITON_GETCWD 223 -#define AUE_AUDITON_GETKMASK 221 -#define AUE_AUDITON_GETSTAT 225 -#define AUE_AUDITON_GPOLICY 141 -#define AUE_AUDITON_GQCTRL 145 -#define AUE_AUDITON_SETCLASS 232 -#define AUE_AUDITON_SETCOND 230 -#define AUE_AUDITON_SETKMASK 222 -#define AUE_AUDITON_SETSMASK 228 -#define AUE_AUDITON_SETSTAT 226 -#define AUE_AUDITON_SETUMASK 227 -#define AUE_AUDITON_SPOLICY 142 -#define AUE_AUDITON_SQCTRL 146 -#define AUE_AUDITCTL 352 -#define AUE_DOORFS_DOOR_BIND 260 -#define AUE_DOORFS_DOOR_CALL 254 -#define AUE_DOORFS_DOOR_CREATE 256 -#define AUE_DOORFS_DOOR_CRED 259 -#define AUE_DOORFS_DOOR_INFO 258 -#define AUE_DOORFS_DOOR_RETURN 255 -#define AUE_DOORFS_DOOR_REVOKE 257 -#define AUE_DOORFS_DOOR_UNBIND 261 -#define AUE_ENTERPROM 153 -#define AUE_EXEC 7 -#define AUE_EXITPROM 154 -#define AUE_FACLSET 252 -#define AUE_FCHROOT 69 -#define AUE_FORK1 241 -#define AUE_GETAUDIT 132 -#define AUE_GETAUDIT_ADDR 267 -#define AUE_GETAUID 130 -#define AUE_GETMSG 217 -#define AUE_SOCKACCEPT 247 -#define AUE_SOCKRECEIVE 250 -#define AUE_GETPMSG 219 -#define AUE_GETPORTAUDIT 149 -#define AUE_INST_SYNC 264 -#define AUE_LCHOWN 237 -#define AUE_LXSTAT 236 -#define AUE_MEMCNTL 238 -#define AUE_MODADDMAJ 246 -#define AUE_MODCONFIG 245 -#define AUE_MODLOAD 243 -#define AUE_MODUNLOAD 244 -#define AUE_MSGCTL 84 -#define AUE_MSGCTL_RMID 85 -#define AUE_MSGCTL_SET 86 -#define AUE_MSGCTL_STAT 87 -#define AUE_NICE 203 -#define AUE_P_ONLINE 262 -#define AUE_PRIOCNTLSYS 212 -#define AUE_CORE 111 -#define AUE_PROCESSOR_BIND 263 -#define AUE_PUTMSG 216 -#define AUE_SOCKCONNECT 248 -#define AUE_SOCKSEND 249 -#define AUE_PUTPMSG 218 -#define AUE_SETAUDIT 133 -#define AUE_SETAUDIT_ADDR 266 -#define AUE_SETAUID 131 -#define AUE_SOCKCONFIG 183 -#define AUE_STATVFS 234 -#define AUE_STIME 201 -#define AUE_SYSINFO 39 -#define AUE_UTIME 202 -#define AUE_UTSYS 233 -#define AUE_XMKNOD 240 -#define AUE_XSTAT 235 +/* + * Possible desired future values based on review of BSD/Darwin system calls. + */ +#define AUE_ATGETMSG AUE_NULL +#define AUE_ATPUTMSG AUE_NULL +#define AUE_ATSOCKET AUE_NULL +#define AUE_ATPGETREQ AUE_NULL +#define AUE_ATPGETRSP AUE_NULL +#define AUE_ATPSNDREQ AUE_NULL +#define AUE_ATPSNDRSP AUE_NULL +#define AUE_BSDTHREADCREATE AUE_NULL +#define AUE_BSDTHREADTERMINATE AUE_NULL +#define AUE_BSDTHREADREGISTER AUE_NULL +#define AUE_CHUD AUE_NULL +#define AUE_CSOPS AUE_NULL +#define AUE_DUP AUE_NULL +#define AUE_FDATASYNC AUE_NULL +#define AUE_FGETATTRLIST AUE_NULL +#define AUE_FGETXATTR AUE_NULL +#define AUE_FLISTXATTR AUE_NULL +#define AUE_FREMOVEXATTR AUE_NULL +#define AUE_FSETATTRLIST AUE_NULL +#define AUE_FSETXATTR AUE_NULL +#define AUE_FSTATFS64 AUE_NULL +#define AUE_FSTATV AUE_NULL +#define AUE_FSTAT64 AUE_NULL +#define AUE_FSTAT64_EXTENDED AUE_NULL +#define AUE_GCCONTROL AUE_NULL +#define AUE_GETDIRENTRIES64 AUE_NULL +#define AUE_GETDTABLESIZE AUE_NULL +#define AUE_GETEGID AUE_NULL +#define AUE_GETEUID AUE_NULL +#define AUE_GETFSSTAT64 AUE_NULL +#define AUE_GETGID AUE_NULL +#define AUE_GETGROUPS AUE_NULL +#define AUE_GETITIMER AUE_NULL +#define AUE_GETLOGIN AUE_NULL +#define AUE_GETPEERNAME AUE_NULL +#define AUE_GETPGID AUE_NULL +#define AUE_GETPGRP AUE_NULL +#define AUE_GETPID AUE_NULL +#define AUE_GETPPID AUE_NULL +#define AUE_GETPRIORITY AUE_NULL +#define AUE_GETRLIMIT AUE_NULL +#define AUE_GETRUSAGE AUE_NULL +#define AUE_GETSGROUPS AUE_NULL +#define AUE_GETSID AUE_NULL +#define AUE_GETSOCKNAME AUE_NULL +#define AUE_GETTIMEOFDAY AUE_NULL +#define AUE_GETTID AUE_NULL +#define AUE_GETUID AUE_NULL +#define AUE_GETSOCKOPT AUE_NULL +#define AUE_GETWGROUPS AUE_NULL +#define AUE_GETXATTR AUE_NULL +#define AUE_IDENTITYSVC AUE_NULL +#define AUE_INITGROUPS AUE_NULL +#define AUE_IOPOLICYSYS AUE_NULL +#define AUE_ISSETUGID AUE_NULL +#define AUE_LIOLISTIO AUE_NULL +#define AUE_LISTXATTR AUE_NULL +#define AUE_LSTATV AUE_NULL +#define AUE_LSTAT64 AUE_NULL +#define AUE_LSTAT64_EXTENDED AUE_NULL +#define AUE_MADVISE AUE_NULL +#define AUE_MINCORE AUE_NULL +#define AUE_MKCOMPLEX AUE_NULL +#define AUE_MODWATCH AUE_NULL +#define AUE_MSGCL AUE_NULL +#define AUE_MSYNC AUE_NULL +#define AUE_PREADV AUE_NULL +#define AUE_PROCINFO AUE_NULL +#define AUE_PTHREADCANCELED AUE_NULL +#define AUE_PTHREADCHDIR AUE_NULL +#define AUE_PTHREADCONDBROADCAST AUE_NULL +#define AUE_PTHREADCONDDESTORY AUE_NULL +#define AUE_PTHREADCONDINIT AUE_NULL +#define AUE_PTHREADCONDSIGNAL AUE_NULL +#define AUE_PTHREADCONDWAIT AUE_NULL +#define AUE_PTHREADFCHDIR AUE_NULL +#define AUE_PTHREADMARK AUE_NULL +#define AUE_PTHREADMUTEXDESTROY AUE_NULL +#define AUE_PTHREADMUTEXINIT AUE_NULL +#define AUE_PTHREADMUTEXTRYLOCK AUE_NULL +#define AUE_PTHREADMUTEXUNLOCK AUE_NULL +#define AUE_PWRITEV AUE_NULL +#define AUE_REMOVEXATTR AUE_NULL +#define AUE_SBRK AUE_NULL +#define AUE_SELECT AUE_NULL +#define AUE_SEMDESTROY AUE_NULL +#define AUE_SEMGETVALUE AUE_NULL +#define AUE_SEMINIT AUE_NULL +#define AUE_SEMPOST AUE_NULL +#define AUE_SEMTRYWAIT AUE_NULL +#define AUE_SEMWAIT AUE_NULL +#define AUE_SEMWAITSIGNAL AUE_NULL +#define AUE_SETITIMER AUE_NULL +#define AUE_SETSGROUPS AUE_NULL +#define AUE_SETTID AUE_NULL +#define AUE_SETTIDWITHPID AUE_NULL +#define AUE_SETWGROUPS AUE_NULL +#define AUE_SETXATTR AUE_NULL +#define AUE_SHAREDREGIONCHECK AUE_NULL +#define AUE_SHAREDREGIONMAP AUE_NULL +#define AUE_SIGACTION AUE_NULL +#define AUE_SIGALTSTACK AUE_NULL +#define AUE_SIGPENDING AUE_NULL +#define AUE_SIGPROCMASK AUE_NULL +#define AUE_SIGRETURN AUE_NULL +#define AUE_SIGSUSPEND AUE_NULL +#define AUE_SIGWAIT AUE_NULL +#define AUE_SSTK AUE_NULL +#define AUE_STACKSNAPSHOT AUE_NULL +#define AUE_STATFS64 AUE_NULL +#define AUE_STATV AUE_NULL +#define AUE_STAT64 AUE_NULL +#define AUE_STAT64_EXTENDED AUE_NULL +#define AUE_SYNC AUE_NULL +#define AUE_SYSCALL AUE_NULL +#define AUE_TABLE AUE_NULL +#define AUE_VMPRESSUREMONITOR AUE_NULL +#define AUE_WAITEVENT AUE_NULL +#define AUE_WAITID AUE_NULL +#define AUE_WATCHEVENT AUE_NULL +#define AUE_WORKQOPEN AUE_NULL +#define AUE_WORKQOPS AUE_NULL #endif /* !_BSM_AUDIT_KEVENTS_H_ */ diff --git a/bsd/bsm/audit_klib.h b/bsd/bsm/audit_klib.h deleted file mode 100644 index 5f3e4717b..000000000 --- a/bsd/bsm/audit_klib.h +++ /dev/null @@ -1,68 +0,0 @@ -/* - * Copyright (c) 1999-2007 Apple Inc. All Rights Reserved. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ - * - * This file contains Original Code and/or Modifications of Original Code - * as defined in and that are subject to the Apple Public Source License - * Version 2.0 (the 'License'). You may not use this file except in - * compliance with the License. The rights granted to you under the License - * may not be used to create, or enable the creation or redistribution of, - * unlawful or unlicensed copies of an Apple operating system, or to - * circumvent, violate, or enable the circumvention or violation of, any - * terms of an Apple operating system software license agreement. - * - * Please obtain a copy of the License at - * http://www.opensource.apple.com/apsl/ and read it before using this file. - * - * The Original Code and all software distributed under the License are - * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER - * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, - * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. - * Please see the License for the specific language governing rights and - * limitations under the License. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ - */ - -#ifndef _BSM_AUDIT_KLIB_H_ -#define _BSM_AUDIT_KLIB_H_ - -#define AU_PRS_SUCCESS 1 -#define AU_PRS_FAILURE 2 -#define AU_PRS_BOTH (AU_PRS_SUCCESS|AU_PRS_FAILURE) - -#ifdef KERNEL -#include -/* - * Some of the BSM tokenizer functions take different parameters in the - * kernel implementations in order to save the copying of large kernel - * data structures. The prototypes of these functions are declared here. - */ -token_t *kau_to_socket(struct socket_au_info *soi); -token_t *kau_to_attr32(struct vnode_au_info *vni); -token_t *kau_to_attr64(struct vnode_au_info *vni); -int auditon_command_event(int cmd); -int au_preselect(au_event_t event, au_mask_t *mask_p, int sorf); -au_event_t flags_and_error_to_openevent(int oflags, int error); -au_event_t ctlname_to_sysctlevent(int name[], uint64_t valid_arg); -au_event_t msgctl_to_event(int cmd); -au_event_t semctl_to_event(int cmd); -void au_evclassmap_init(void); -void au_evclassmap_insert(au_event_t event, au_class_t class); -au_class_t au_event_class(au_event_t event); - -int canon_path(struct vnode *cwd_vp, char *path, char *cpath); - - - - -/* - * Define a system call to audit event mapping table. - */ -extern au_event_t sys_au_event[]; - -#endif /*KERNEL*/ - -#endif /* ! _BSM_AUDIT_KLIB_H_ */ diff --git a/bsd/bsm/audit_record.h b/bsd/bsm/audit_record.h index fb58ccc05..f8dced869 100644 --- a/bsd/bsm/audit_record.h +++ b/bsd/bsm/audit_record.h @@ -1,207 +1,129 @@ -/* - * Copyright (c) 1999-2007 Apple Inc. All Rights Reserved. +/*- + * Copyright (c) 2005-2009 Apple Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of Apple Inc. ("Apple") nor the names of + * its contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY APPLE AND ITS CONTRIBUTORS "AS IS" AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL APPLE OR ITS CONTRIBUTORS BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * - * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ - * - * This file contains Original Code and/or Modifications of Original Code - * as defined in and that are subject to the Apple Public Source License - * Version 2.0 (the 'License'). You may not use this file except in - * compliance with the License. The rights granted to you under the License - * may not be used to create, or enable the creation or redistribution of, - * unlawful or unlicensed copies of an Apple operating system, or to - * circumvent, violate, or enable the circumvention or violation of, any - * terms of an Apple operating system software license agreement. - * - * Please obtain a copy of the License at - * http://www.opensource.apple.com/apsl/ and read it before using this file. - * - * The Original Code and all software distributed under the License are - * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER - * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, - * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. - * Please see the License for the specific language governing rights and - * limitations under the License. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ + * $P4: //depot/projects/trustedbsd/openbsm/sys/bsm/audit_record.h#10 $ */ #ifndef _BSM_AUDIT_RECORD_H_ #define _BSM_AUDIT_RECORD_H_ -#include -#include -#include -#include -#include -#include -#include -#include - -/* We could determined the header and trailer sizes by - * defining appropriate structures. We hold off that approach - * till we have a consistant way of using structures for all tokens. - * This is not straightforward since these token structures may - * contain pointers of whose contents we dont know the size - * (e.g text tokens) - */ -#define HEADER_SIZE 18 -#define TRAILER_SIZE 7 - -#define ADD_U_CHAR(loc, val) \ - do {\ - *loc = val;\ - loc += sizeof(u_char);\ - }while(0) - - -#define ADD_U_INT16(loc, val) \ - do { \ - memcpy(loc, (const u_char *)&val, sizeof(u_int16_t));\ - loc += sizeof(u_int16_t); \ - }while(0) - -#define ADD_U_INT32(loc, val) \ - do { \ - memcpy(loc, (const u_char *)&val, sizeof(u_int32_t));\ - loc += sizeof(u_int32_t); \ - }while(0) - -#define ADD_U_INT64(loc, val)\ - do {\ - memcpy(loc, (const u_char *)&val, sizeof(u_int64_t));\ - loc += sizeof(u_int64_t); \ - }while(0) - -#define ADD_MEM(loc, data, size) \ - do { \ - memcpy(loc, data, size);\ - loc += size;\ - }while(0) - -#define ADD_STRING(loc, data, size) ADD_MEM(loc, data, size) - - -/* Various token id types */ - -/* - * Values inside the comments are not documented in the BSM pages and - * have been picked up from the header files - */ - -/* - * Values marked as XXX do not have a value defined in the BSM header files - */ - -/* - * Control token types - -#define AUT_OTHER_FILE ((char)0x11) -#define AUT_OTHER_FILE32 AUT_OTHER_FILE -#define AUT_OHEADER ((char)0x12) - - */ - -#define AUT_INVALID 0x00 -#define AU_FILE_TOKEN 0x11 -#define AU_TRAILER_TOKEN 0x13 -#define AU_HEADER_32_TOKEN 0x14 -#define AU_HEADER_EX_32_TOKEN 0x15 +#include /* struct timeval */ /* - * Data token types -#define AUT_SERVER ((char)0x25) -#define AUT_SERVER32 AUT_SERVER + * Token type identifiers. */ - -#define AU_DATA_TOKEN 0x21 -#define AU_ARB_TOKEN AU_DATA_TOKEN -#define AU_IPC_TOKEN 0x22 -#define AU_PATH_TOKEN 0x23 -#define AU_SUBJECT_32_TOKEN 0x24 -#define AU_PROCESS_32_TOKEN 0x26 -#define AU_RETURN_32_TOKEN 0x27 -#define AU_TEXT_TOKEN 0x28 -#define AU_OPAQUE_TOKEN 0x29 -#define AU_IN_ADDR_TOKEN 0x2A -#define AU_IP_TOKEN 0x2B -#define AU_IPORT_TOKEN 0x2C -#define AU_ARG32_TOKEN 0x2D -#define AU_SOCK_TOKEN 0x2E -#define AU_SEQ_TOKEN 0x2F +#define AUT_INVALID 0x00 +#define AUT_OTHER_FILE32 0x11 +#define AUT_OHEADER 0x12 +#define AUT_TRAILER 0x13 +#define AUT_HEADER32 0x14 +#define AUT_HEADER32_EX 0x15 +#define AUT_DATA 0x21 +#define AUT_IPC 0x22 +#define AUT_PATH 0x23 +#define AUT_SUBJECT32 0x24 +#define AUT_XATPATH 0x25 +#define AUT_PROCESS32 0x26 +#define AUT_RETURN32 0x27 +#define AUT_TEXT 0x28 +#define AUT_OPAQUE 0x29 +#define AUT_IN_ADDR 0x2a +#define AUT_IP 0x2b +#define AUT_IPORT 0x2c +#define AUT_ARG32 0x2d +#define AUT_SOCKET 0x2e +#define AUT_SEQ 0x2f +#define AUT_ACL 0x30 +#define AUT_ATTR 0x31 +#define AUT_IPC_PERM 0x32 +#define AUT_LABEL 0x33 +#define AUT_GROUPS 0x34 +#define AUT_ACE 0x35 +#define AUT_PRIV 0x38 +#define AUT_UPRIV 0x39 +#define AUT_LIAISON 0x3a +#define AUT_NEWGROUPS 0x3b +#define AUT_EXEC_ARGS 0x3c +#define AUT_EXEC_ENV 0x3d +#define AUT_ATTR32 0x3e +#define AUT_UNAUTH 0x3f +#define AUT_XATOM 0x40 +#define AUT_XOBJ 0x41 +#define AUT_XPROTO 0x42 +#define AUT_XSELECT 0x43 +#define AUT_XCOLORMAP 0x44 +#define AUT_XCURSOR 0x45 +#define AUT_XFONT 0x46 +#define AUT_XGC 0x47 +#define AUT_XPIXMAP 0x48 +#define AUT_XPROPERTY 0x49 +#define AUT_XWINDOW 0x4a +#define AUT_XCLIENT 0x4b +#define AUT_CMD 0x51 +#define AUT_EXIT 0x52 +#define AUT_ZONENAME 0x60 +#define AUT_HOST 0x70 +#define AUT_ARG64 0x71 +#define AUT_RETURN64 0x72 +#define AUT_ATTR64 0x73 +#define AUT_HEADER64 0x74 +#define AUT_SUBJECT64 0x75 +#define AUT_PROCESS64 0x77 +#define AUT_OTHER_FILE64 0x78 +#define AUT_HEADER64_EX 0x79 +#define AUT_SUBJECT32_EX 0x7a +#define AUT_PROCESS32_EX 0x7b +#define AUT_SUBJECT64_EX 0x7c +#define AUT_PROCESS64_EX 0x7d +#define AUT_IN_ADDR_EX 0x7e +#define AUT_SOCKET_EX 0x7f /* - * Modifier token types - -#define AUT_ACL ((char)0x30) -#define AUT_LABEL ((char)0x33) -#define AUT_GROUPS ((char)0x34) -#define AUT_ILABEL ((char)0x35) -#define AUT_SLABEL ((char)0x36) -#define AUT_CLEAR ((char)0x37) -#define AUT_PRIV ((char)0x38) -#define AUT_UPRIV ((char)0x39) -#define AUT_LIAISON ((char)0x3A) - + * Pre-64-bit BSM, 32-bit tokens weren't explicitly named as '32'. We have + * compatibility defines. */ - -#define AU_ATTR_TOKEN 0x31 -#define AU_IPCPERM_TOKEN 0x32 -#define AU_NEWGROUPS_TOKEN 0x3B -#define AU_EXEC_ARG_TOKEN 0x3C -#define AU_EXEC_ENV_TOKEN 0x3D -#define AU_ATTR32_TOKEN 0x3E +#define AUT_HEADER AUT_HEADER32 +#define AUT_ARG AUT_ARG32 +#define AUT_RETURN AUT_RETURN32 +#define AUT_SUBJECT AUT_SUBJECT32 +#define AUT_PROCESS AUT_PROCESS32 +#define AUT_OTHER_FILE AUT_OTHER_FILE32 /* - * Command token types - */ - -#define AU_CMD_TOKEN 0x51 -#define AU_EXIT_TOKEN 0x52 - -/* - * Miscellaneous token types - -#define AUT_HOST ((char)0x70) - - */ - -/* - * 64bit token types - -#define AUT_SERVER64 ((char)0x76) -#define AUT_OTHER_FILE64 ((char)0x78) - - */ - -#define AU_ARG64_TOKEN 0x71 -#define AU_RETURN_64_TOKEN 0x72 -#define AU_ATTR64_TOKEN 0x73 -#define AU_HEADER_64_TOKEN 0x74 -#define AU_SUBJECT_64_TOKEN 0x75 -#define AU_PROCESS_64_TOKEN 0x77 - -/* - * Extended network address token types - */ - -#define AU_HEADER_EX_64_TOKEN 0x79 -#define AU_SUBJECT_32_EX_TOKEN 0x7a -#define AU_PROCESS_32_EX_TOKEN 0x7b -#define AU_SUBJECT_64_EX_TOKEN 0x7c -#define AU_PROCESS_64_EX_TOKEN 0x7d -#define AU_IN_ADDR_EX_TOKEN 0x7e -#define AU_SOCK_EX32_TOKEN 0x7f -#define AU_SOCK_EX128_TOKEN AUT_INVALID /*XXX*/ -#define AU_IP_EX_TOKEN AUT_INVALID /*XXX*/ - -/* - * The values for the following token ids are not - * defined by BSM + * The values for the following token ids are not defined by BSM. + * + * XXXRW: Not sure how to handle these in OpenBSM yet, but I'll give them + * names more consistent with Sun's BSM. These originally came from Apple's + * BSM. */ -#define AU_SOCK_INET_32_TOKEN 0x80 /*XXX*/ -#define AU_SOCK_INET_128_TOKEN 0x81 /*XXX*/ -#define AU_SOCK_UNIX_TOKEN 0x82 /*XXX*/ +#define AUT_SOCKINET32 0x80 /* XXX */ +#define AUT_SOCKINET128 0x81 /* XXX */ +#define AUT_SOCKUNIX 0x82 /* XXX */ /* print values for the arbitrary token */ #define AUP_BINARY 0 @@ -212,112 +134,164 @@ /* data-types for the arbitrary token */ #define AUR_BYTE 0 +#define AUR_CHAR AUR_BYTE #define AUR_SHORT 1 -#define AUR_LONG 2 +#define AUR_INT32 2 +#define AUR_INT AUR_INT32 +#define AUR_INT64 3 /* ... and their sizes */ -#define AUR_BYTE_SIZE sizeof(u_char) -#define AUR_SHORT_SIZE sizeof(u_int16_t) -#define AUR_LONG_SIZE sizeof(u_int32_t) +#define AUR_BYTE_SIZE sizeof(u_char) +#define AUR_CHAR_SIZE AUR_BYTE_SIZE +#define AUR_SHORT_SIZE sizeof(uint16_t) +#define AUR_INT32_SIZE sizeof(uint32_t) +#define AUR_INT_SIZE AUR_INT32_SIZE +#define AUR_INT64_SIZE sizeof(uint64_t) /* Modifiers for the header token */ #define PAD_NOTATTR 0x4000 /* nonattributable event */ #define PAD_FAILURE 0x8000 /* fail audit event */ +#define AUDIT_MAX_GROUPS 16 -#define MAX_GROUPS 16 -#define HEADER_VERSION 1 -#define TRAILER_PAD_MAGIC 0xB105 +/* + * A number of BSM versions are floating around and defined. Here are + * constants for them. OpenBSM uses the same token types, etc, used in the + * Solaris BSM version, but has a separate version number in order to + * identify a potentially different event identifier name space. + */ +#define AUDIT_HEADER_VERSION_OLDDARWIN 1 /* In retrospect, a mistake. */ +#define AUDIT_HEADER_VERSION_SOLARIS 2 +#define AUDIT_HEADER_VERSION_TSOL25 3 +#define AUDIT_HEADER_VERSION_TSOL 4 +#define AUDIT_HEADER_VERSION_OPENBSM10 10 +#define AUDIT_HEADER_VERSION_OPENBSM11 11 +#define AUDIT_HEADER_VERSION_OPENBSM AUDIT_HEADER_VERSION_OPENBSM11 + +#define AUT_TRAILER_MAGIC 0xb105 /* BSM library calls */ __BEGIN_DECLS -int au_open(void); -int au_write(int d, token_t *m); -int au_close(int d, int keep, short event); -token_t *au_to_file(char *file); -token_t *au_to_header(int rec_size, au_event_t e_type, - au_emod_t e_mod); -token_t *au_to_header32(int rec_size, au_event_t e_type, - au_emod_t e_mod); -token_t *au_to_header64(int rec_size, au_event_t e_type, - au_emod_t e_mod); -token_t *au_to_me(void); - -token_t *au_to_arg(char n, char *text, u_int32_t v); -token_t *au_to_arg32(char n, const char *text, u_int32_t v); -token_t *au_to_arg64(char n, const char *text, u_int64_t v); -token_t *au_to_attr(struct vnode_attr *attr); -token_t *au_to_attr32(struct vnode_attr *attr); -token_t *au_to_attr64(struct vnode_attr *attr); -token_t *au_to_data(char unit_print, char unit_type, - char unit_count, unsigned char *p); -token_t *au_to_exit(int retval, int err); -token_t *au_to_groups(gid_t *groups); -token_t *au_to_newgroups(u_int16_t n, gid_t *groups); -token_t *au_to_in_addr(struct in_addr *internet_addr); -token_t *au_to_in_addr_ex(struct in6_addr *internet_addr); -token_t *au_to_ip(struct ip *ip); -token_t *au_to_ipc(char type, int id); -token_t *au_to_ipc_perm(struct ipc_perm *perm); -token_t *au_to_iport(u_int16_t iport); -token_t *au_to_opaque(char *data, u_int16_t bytes); -token_t *au_to_path(char *path); -token_t *au_to_process(au_id_t auid, uid_t euid, gid_t egid, - uid_t ruid, gid_t rgid, pid_t pid, - au_asid_t sid, au_tid_t *tid); -token_t *au_to_process32(au_id_t auid, uid_t euid, gid_t egid, - uid_t ruid, gid_t rgid, pid_t pid, - au_asid_t sid, au_tid_t *tid); -token_t *au_to_process64(au_id_t auid, uid_t euid, gid_t egid, - uid_t ruid, gid_t rgid, pid_t pid, - au_asid_t sid, au_tid_t *tid); -token_t *au_to_process_ex(au_id_t auid, uid_t euid, - gid_t egid, uid_t ruid, gid_t rgid, pid_t pid, - au_asid_t sid, au_tid_addr_t *tid); -token_t *au_to_process32_ex(au_id_t auid, uid_t euid, - gid_t egid, uid_t ruid, gid_t rgid, pid_t pid, - au_asid_t sid, au_tid_addr_t *tid); -token_t *au_to_process64_ex(au_id_t auid, uid_t euid, - gid_t egid, uid_t ruid, gid_t rgid, pid_t pid, - au_asid_t sid, au_tid_addr_t *tid); -token_t *au_to_return(char status, u_int32_t ret); -token_t *au_to_return32(char status, u_int32_t ret); -token_t *au_to_return64(char status, u_int64_t ret); -token_t *au_to_seq(u_int32_t audit_count); -token_t *au_to_socket(struct socket *so); -token_t *au_to_socket_ex_32(u_int16_t lp, u_int16_t rp, - struct sockaddr *la, struct sockaddr *ta); -token_t *au_to_socket_ex_128(u_int16_t lp, u_int16_t rp, - struct sockaddr *la, struct sockaddr *ta); -token_t *au_to_sock_inet(struct sockaddr_in *so); -token_t *au_to_sock_inet32(struct sockaddr_in *so); -token_t *au_to_sock_inet128(struct sockaddr_in6 *so); -token_t *au_to_sock_unix(struct sockaddr_un *so); -token_t *au_to_subject(au_id_t auid, uid_t euid, gid_t egid, - uid_t ruid, gid_t rgid, pid_t pid, - au_asid_t sid, au_tid_t *tid); -token_t *au_to_subject32(au_id_t auid, uid_t euid, gid_t egid, - uid_t ruid, gid_t rgid, pid_t pid, - au_asid_t sid, au_tid_t *tid); -token_t *au_to_subject64(au_id_t auid, uid_t euid, gid_t egid, - uid_t ruid, gid_t rgid, pid_t pid, - au_asid_t sid, au_tid_t *tid); -token_t *au_to_subject_ex(au_id_t auid, uid_t euid, - gid_t egid, uid_t ruid, gid_t rgid, pid_t pid, - au_asid_t sid, au_tid_addr_t *tid); -token_t *au_to_subject32_ex(au_id_t auid, uid_t euid, - gid_t egid, uid_t ruid, gid_t rgid, pid_t pid, - au_asid_t sid, au_tid_addr_t *tid); -token_t *au_to_subject64_ex(au_id_t auid, uid_t euid, - gid_t egid, uid_t ruid, gid_t rgid, pid_t pid, - au_asid_t sid, au_tid_addr_t *tid); -token_t *au_to_exec_args(const char **); -token_t *au_to_exec_env(const char **); -token_t *au_to_text(const char *text); -token_t *au_to_kevent(struct kevent *kev); -token_t *au_to_trailer(int rec_size); +struct in_addr; +struct in6_addr; +struct ip; +struct ipc_perm; +struct kevent; +struct sockaddr; +struct sockaddr_in; +struct sockaddr_in6; +struct sockaddr_un; +#if defined(_KERNEL) || defined(KERNEL) +struct vnode_au_info; +#endif + +int au_open(void); +int au_write(int d, token_t *m); +int au_close(int d, int keep, short event); +int au_close_buffer(int d, short event, u_char *buffer, size_t *buflen); +int au_close_token(token_t *tok, u_char *buffer, size_t *buflen); + +token_t *au_to_file(const char *file, struct timeval tm); + +token_t *au_to_header32_tm(int rec_size, au_event_t e_type, au_emod_t e_mod, + struct timeval tm); +token_t *au_to_header32_ex_tm(int rec_size, au_event_t e_type, au_emod_t e_mod, + struct timeval tm, struct auditinfo_addr *aia); +token_t *au_to_header64_tm(int rec_size, au_event_t e_type, au_emod_t e_mod, + struct timeval tm); +#if !defined(KERNEL) && !defined(_KERNEL) +token_t *au_to_header(int rec_size, au_event_t e_type, au_emod_t e_mod); +token_t *au_to_header_ex(int rec_size, au_event_t e_type, au_emod_t e_mod); +token_t *au_to_header32(int rec_size, au_event_t e_type, au_emod_t e_mod); +token_t *au_to_header64(int rec_size, au_event_t e_type, au_emod_t e_mod); +token_t *au_to_header32_ex(int rec_size, au_event_t e_type, au_emod_t e_mod); +#endif + +token_t *au_to_me(void); +token_t *au_to_arg(char n, const char *text, uint32_t v); +token_t *au_to_arg32(char n, const char *text, uint32_t v); +token_t *au_to_arg64(char n, const char *text, uint64_t v); + +#if defined(_KERNEL) || defined(KERNEL) +token_t *au_to_attr(struct vnode_au_info *vni); +token_t *au_to_attr32(struct vnode_au_info *vni); +token_t *au_to_attr64(struct vnode_au_info *vni); +#endif + +token_t *au_to_data(char unit_print, char unit_type, char unit_count, + const char *p); +token_t *au_to_exit(int retval, int err); +token_t *au_to_groups(int *groups); +token_t *au_to_newgroups(uint16_t n, gid_t *groups); +token_t *au_to_in_addr(struct in_addr *internet_addr); +token_t *au_to_in_addr_ex(struct in6_addr *internet_addr); +token_t *au_to_ip(struct ip *ip); +token_t *au_to_ipc(char type, int id); +token_t *au_to_ipc_perm(struct ipc_perm *perm); +token_t *au_to_iport(uint16_t iport); +token_t *au_to_opaque(const char *data, uint16_t bytes); +token_t *au_to_path(const char *path); +token_t *au_to_process(au_id_t auid, uid_t euid, gid_t egid, uid_t ruid, + gid_t rgid, pid_t pid, au_asid_t sid, au_tid_t *tid); +token_t *au_to_process32(au_id_t auid, uid_t euid, gid_t egid, uid_t ruid, + gid_t rgid, pid_t pid, au_asid_t sid, au_tid_t *tid); +token_t *au_to_process64(au_id_t auid, uid_t euid, gid_t egid, uid_t ruid, + gid_t rgid, pid_t pid, au_asid_t sid, au_tid_t *tid); +token_t *au_to_process_ex(au_id_t auid, uid_t euid, gid_t egid, uid_t ruid, + gid_t rgid, pid_t pid, au_asid_t sid, au_tid_addr_t *tid); +token_t *au_to_process32_ex(au_id_t auid, uid_t euid, gid_t egid, + uid_t ruid, gid_t rgid, pid_t pid, au_asid_t sid, + au_tid_addr_t *tid); +token_t *au_to_process64_ex(au_id_t auid, uid_t euid, gid_t egid, uid_t ruid, + gid_t rgid, pid_t pid, au_asid_t sid, au_tid_addr_t *tid); +token_t *au_to_return(char status, uint32_t ret); +token_t *au_to_return32(char status, uint32_t ret); +token_t *au_to_return64(char status, uint64_t ret); +token_t *au_to_seq(long audit_count); +token_t *au_to_socket_ex(u_short so_domain, u_short so_type, + struct sockaddr *sa_local, struct sockaddr *sa_remote); +token_t *au_to_sock_inet(struct sockaddr_in *so); +token_t *au_to_sock_inet32(struct sockaddr_in *so); +token_t *au_to_sock_inet128(struct sockaddr_in6 *so); +token_t *au_to_sock_unix(struct sockaddr_un *so); +token_t *au_to_subject(au_id_t auid, uid_t euid, gid_t egid, uid_t ruid, + gid_t rgid, pid_t pid, au_asid_t sid, au_tid_t *tid); +token_t *au_to_subject32(au_id_t auid, uid_t euid, gid_t egid, uid_t ruid, + gid_t rgid, pid_t pid, au_asid_t sid, au_tid_t *tid); +token_t *au_to_subject64(au_id_t auid, uid_t euid, gid_t egid, uid_t ruid, + gid_t rgid, pid_t pid, au_asid_t sid, au_tid_t *tid); +token_t *au_to_subject_ex(au_id_t auid, uid_t euid, gid_t egid, uid_t ruid, + gid_t rgid, pid_t pid, au_asid_t sid, au_tid_addr_t *tid); +token_t *au_to_subject32_ex(au_id_t auid, uid_t euid, gid_t egid, uid_t ruid, + gid_t rgid, pid_t pid, au_asid_t sid, au_tid_addr_t *tid); +token_t *au_to_subject64_ex(au_id_t auid, uid_t euid, gid_t egid, uid_t ruid, + gid_t rgid, pid_t pid, au_asid_t sid, au_tid_addr_t *tid); +#if defined(_KERNEL) || defined(KERNEL) +token_t *au_to_exec_args(char *args, int argc); +token_t *au_to_exec_env(char *envs, int envc); +#else +token_t *au_to_exec_args(char **argv); +token_t *au_to_exec_env(char **envp); +#endif +token_t *au_to_text(const char *text); +token_t *au_to_kevent(struct kevent *kev); +token_t *au_to_trailer(int rec_size); +token_t *au_to_zonename(const char *zonename); + +/* + * BSM library routines for converting between local and BSM constant spaces. + */ +int au_bsm_to_domain(u_short bsm_domain, int *local_domainp); +int au_bsm_to_errno(u_char bsm_error, int *errorp); +int au_bsm_to_fcntl_cmd(u_short bsm_fcntl_cmd, int *local_fcntl_cmdp); +int au_bsm_to_socket_type(u_short bsm_socket_type, + int *local_socket_typep); +u_short au_domain_to_bsm(int local_domain); +u_char au_errno_to_bsm(int local_errno); +u_short au_fcntl_cmd_to_bsm(int local_fcntl_command); +u_short au_socket_type_to_bsm(int local_socket_type); __END_DECLS diff --git a/bsd/bsm/audit_socket_type.h b/bsd/bsm/audit_socket_type.h new file mode 100644 index 000000000..85f6aef7a --- /dev/null +++ b/bsd/bsm/audit_socket_type.h @@ -0,0 +1,46 @@ +/*- + * Copyright (c) 2008 Apple Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of Apple Inc. ("Apple") nor the names of + * its contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY APPLE AND ITS CONTRIBUTORS "AS IS" AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL APPLE OR ITS CONTRIBUTORS BE LIABLE FOR + * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING + * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + * $P4: //depot/projects/trustedbsd/openbsm/sys/bsm/audit_socket_type.h#1 $ + */ + +#ifndef _BSM_AUDIT_SOCKET_TYPE_H_ +#define _BSM_AUDIT_SOCKET_TYPE_H_ + +/* + * BSM socket type constants. + */ +#define BSM_SOCK_DGRAM 1 +#define BSM_SOCK_STREAM 2 +#define BSM_SOCK_RAW 4 +#define BSM_SOCK_RDM 5 +#define BSM_SOCK_SEQPACKET 6 + +#define BSM_SOCK_UNKNOWN 500 + +#endif /* !_BSM_AUDIT_SOCKET_TYPE_H_ */ diff --git a/bsd/conf/MASTER b/bsd/conf/MASTER index 9459048eb..ec9ff0940 100644 --- a/bsd/conf/MASTER +++ b/bsd/conf/MASTER @@ -126,7 +126,11 @@ options ROUTING # routing # options NETMIBS # # options VLAN # # options BOND # # +options PF # Packet Filter # +options PF_PKTHDR # PF tag inside mbuf pkthdr # +options PFLOG # PF log interface # options IPDIVERT # Divert sockets (for NAT) # +options IPFLOW # IP fast forwarding # options IPFIREWALL # IP Firewalling (used by NAT) # options IPFIREWALL_FORWARD #Transparent proxy # options IPFIREWALL_DEFAULT_TO_ACCEPT # allow everything by default # @@ -139,21 +143,22 @@ options RANDOM_IP_ID # random (not sequential) ip ids # options TCP_DROP_SYNFIN # Drop TCP packets with SYN+FIN set # options ICMP_BANDLIM # ICMP bandwidth limiting sysctl options IFNET_INPUT_SANITY_CHK # allow dlil/ifnet input sanity check # -options AUDIT # Security event auditing # options SYSV_SEM # SVID semaphores # options SYSV_MSG # SVID messages # options SYSV_SHM # SVID shared mem # +options PSYNCH # pthread synch # options PANIC_INFO # want kernel panic info # options DEVELOPMENT # dev kernel # # secure_kernel - secure kernel from user programs options SECURE_KERNEL # +options OLD_SEMWAIT_SIGNAL # old semwait_signal handler + # # 4.4 general kernel # options SOCKETS # socket support # -options COMPAT_43_TTY # 4.3 BSD tty compat # options DIAGNOSTIC # diagnostics # options CONFIG_DTRACE # dtrace support # options GPROF # build profiling # @@ -162,14 +167,15 @@ options NETWORKING # networking layer # options CONFIG_FSE # file system events # options CONFIG_IMAGEBOOT # local image boot # options CONFIG_SOWUPCALL # SB_UPCALL on sowwakeup # -options CONFIG_FORCE_OUT_IFP # Force IP output to use an interface # options CONFIG_MBUF_NOEXPAND # limit mbuf expansion # options CONFIG_MBUF_JUMBO # jumbo cluster pool # -options CONFIG_SCOPEDROUTING # scoped routing on by default # -options CONFIG_IP_EDGEHOLE # Drop tagged packets at EDGE interface # +options CONFIG_MBUF_TAGS_MALLOC # use malloc for tags # +options CONFIG_FORCE_OUT_IFP # Enable IP_FORCE_OUT_IFP # +options CONFIG_IFEF_NOWINDOWSCALE # Scale TCP window per driver # options CONFIG_WORKQUEUE # + # # 4.4 filesystems # @@ -178,9 +184,9 @@ options HFS # HFS/HFS+ support # options FIFO # fifo support # options UNION # union_fs support # options FDESC # fdesc_fs support # -options CD9660 # ISO 9660 CD-ROM support # options DEVFS # devfs support # options JOURNALING # journaling support # +options HFS_COMPRESSION # hfs compression # # # file system features @@ -282,7 +288,7 @@ options CONFIG_VNODES=263168 # options CONFIG_VNODES=263168 # options CONFIG_VNODES=10240 # options CONFIG_VNODES=1024 # -options CONFIG_VNODES=640 # +options CONFIG_VNODES=750 # options CONFIG_VNODE_FREE_MIN=500 # options CONFIG_VNODE_FREE_MIN=300 # @@ -328,10 +334,11 @@ options CONFIG_NMBCLUSTERS="((1024 * 1024) / MCLBYTES)" # options CONFIG_NMBCLUSTERS="((1024 * 512) / MCLBYTES)" # options CONFIG_NMBCLUSTERS="((1024 * 256) / MCLBYTES)" # +# # set maximum space used for packet buffers # -options CONFIG_USESOCKTHRESHOLD=1 # -options CONFIG_USESOCKTHRESHOLD=0 # +options CONFIG_USESOCKTHRESHOLD=1 # +options CONFIG_USESOCKTHRESHOLD=0 # # # Configure size of TCP hash table @@ -397,6 +404,11 @@ options CONFIG_NO_KPRINTF_STRINGS # # options CONFIG_EMBEDDED # +# only execute signed code. Hang this off config_embedded since there's +# nothing more appropriate right now +# +options CONFIG_ENFORCE_SIGNED_CODE # + # # code decryption... used on embedded for app protection # must be set in all the bsd/conf and osfmk/conf MASTER files diff --git a/bsd/conf/MASTER.i386 b/bsd/conf/MASTER.i386 index c2ae93a18..08eca2cbc 100644 --- a/bsd/conf/MASTER.i386 +++ b/bsd/conf/MASTER.i386 @@ -44,16 +44,17 @@ # # Standard Apple Research Configurations: # -------- ----- -------- --------------- -# BASE = [ intel mach medium config_dtrace vol pst gdb kernobjc fixpri simple_clock mdebug kernserv driverkit uxpr kernstack ipc_compat ipc_debug compat_43_tty sysv_sem sysv_msg sysv_shm audit panic_info config_imageboot config_workqueue ] -# FILESYS = [ devfs revfs hfs journaling fdesc config_fse quota namedstreams fifo ffs union cd9660 config_volfs ] -# NETWORKING = [ inet inet6 compat_oldsock mrouting tcpdrop_synfin bpfilter ipdivert netat ipfirewall ipv6firewall ipfw2 dummynet traffic_mgt sendfile netmibs bond vlan gif stf zlib randomipid ifnet_input_chk config_mbuf_jumbo ] +# BASE = [ intel mach medium config_dtrace vol pst gdb kernobjc fixpri simple_clock mdebug kernserv driverkit uxpr kernstack ipc_compat ipc_debug sysv_sem sysv_msg sysv_shm audit panic_info config_imageboot config_workqueue psynch ] +# FILESYS = [ devfs revfs hfs journaling fdesc config_fse quota namedstreams fifo union config_volfs hfs_compression ] +# NETWORKING = [ inet inet6 compat_oldsock mrouting tcpdrop_synfin bpfilter ipdivert ipfirewall ipv6firewall ipfw2 dummynet traffic_mgt sendfile netmibs bond vlan gif stf zlib randomipid ifnet_input_chk config_mbuf_jumbo ipflow ] # NFS = [ nfsclient nfsserver ] # VPN = [ ipsec ] # RELEASE = [ BASE NETWORKING NFS VPN FILESYS libdriver ] # PROFILE = [ RELEASE profile ] -# DEBUG = [ BASE NETWORKING NFS VPN FILESYS libdriver_g debug xpr_debug mach_assert ] +# DEBUG = [ BASE NETWORKING NFS VPN FILESYS libdriver_g debug xpr_debug mach_assert pf pflog ] # -# EMBEDDED_BASE = [ intel mach bsmall vol pst gdb kernobjc fixpri simple_clock mdebug kernserv driverkit uxpr kernstack ipc_compat ipc_debug compat_43_tty sysv_sem sysv_msg sysv_shm audit panic_info config_imageboot config_workqueue ] +# +# EMBEDDED_BASE = [ intel mach bsmall vol pst gdb kernobjc fixpri simple_clock mdebug kernserv driverkit uxpr kernstack ipc_compat ipc_debug sysv_sem sysv_msg sysv_shm audit panic_info config_imageboot config_workqueue psynch ] # EMBEDDED_FILESYS = [ devfs hfs journaling fdesc fifo ] # EMBEDDED_NET = [ inet compat_oldsock mrouting tcpdrop_synfin bpfilter config_mbuf_noexpand ] # EMBEDDED = [ EMBEDDED_BASE EMBEDDED_NET VPN EMBEDDED_FILESYS libdriver no_printf_str no_kprintf_str no_kdebug ] @@ -78,8 +79,10 @@ options UXPR # user-level XPR package # config mach_kernel swap generic # options EVENT # +options NO_NESTED_PMAP # + # -# Note: MAC options must be set in all the bsd/conf, osfmk/conf, and +# Note: MAC/AUDIT options must be set in all the bsd/conf, osfmk/conf, and # security/conf MASTER files. # options CONFIG_MACF # Mandatory Access Control Framework @@ -88,6 +91,7 @@ options CONFIG_MACF_SOCKET_SUBSET # MAC socket subest (no labels) #options CONFIG_MACF_NET # mbuf #options CONFIG_MACF_DEBUG #options CONFIG_MACF_MACH +options CONFIG_AUDIT # Kernel auditing # app-profiling i.e. pre-heating - off? options CONFIG_APP_PROFILE=0 diff --git a/bsd/conf/MASTER.ppc b/bsd/conf/MASTER.ppc index b66984140..2a0846433 100644 --- a/bsd/conf/MASTER.ppc +++ b/bsd/conf/MASTER.ppc @@ -45,15 +45,15 @@ # Standard Apple Research Configurations: # -------- ----- -------- --------------- # -# BASE = [ ppc mach medium config_dtrace vol pst gdb noprofiling simple_clock kernstack compat_43_tty sysv_sem sysv_msg sysv_shm audit panic_info config_imageboot config_workqueue ] -# FILESYS = [ devfs revfs hfs journaling fdesc config_fse quota namedstreams fifo ffs union cd9660 config_volfs ] -# NETWORKING = [ inet inet6 compat_oldsock mrouting tcpdrop_synfin bpfilter ipdivert netat ipfirewall ipv6firewall ipfw2 dummynet traffic_mgt sendfile netmibs bond vlan gif stf zlib randomipid ifnet_input_chk ] +# BASE = [ ppc mach medium config_dtrace vol pst gdb noprofiling simple_clock kernstack sysv_sem sysv_msg sysv_shm audit panic_info config_imageboot config_workqueue ] +# FILESYS = [ devfs revfs hfs journaling fdesc config_fse quota namedstreams fifo union config_volfs hfs_compression ] +# NETWORKING = [ inet inet6 compat_oldsock mrouting tcpdrop_synfin bpfilter ipdivert ipfirewall ipv6firewall ipfw2 dummynet traffic_mgt sendfile netmibs bond vlan gif stf zlib randomipid ifnet_input_chk ipflow ] # NFS = [ nfsclient nfsserver ] # VPN = [ ipsec ] # RELEASE = [ BASE NETWORKING NFS VPN FILESYS libdriver ] # DEVELOPMENT = [ RELEASE ] # PROFILE = [ RELEASE profile ] -# DEBUG = [ BASE NETWORKING NFS VPN FILESYS libdriver_g debug xpr_debug mach_assert ] +# DEBUG = [ BASE NETWORKING NFS VPN FILESYS libdriver_g debug xpr_debug mach_assert pf pflog ] # ###################################################################### # @@ -69,7 +69,7 @@ options UXPR # user-level XPR package # config mach_kernel swap generic # # -# Note: MAC options must be set in all the bsd/conf, osfmk/conf, and +# Note: MAC/AUDIT options must be set in all the bsd/conf, osfmk/conf, and # security/conf MASTER files. # options CONFIG_MACF # Mandatory Access Control Framework @@ -78,6 +78,7 @@ options CONFIG_MACF_SOCKET_SUBSET # MAC socket subest (no labels) #options CONFIG_MACF_NET # mbuf #options CONFIG_MACF_DEBUG #options CONFIG_MACF_MACH +options CONFIG_AUDIT # Kernel auditing options EVENT # diff --git a/bsd/conf/MASTER.x86_64 b/bsd/conf/MASTER.x86_64 new file mode 100644 index 000000000..dd1f24e96 --- /dev/null +++ b/bsd/conf/MASTER.x86_64 @@ -0,0 +1,119 @@ +# +# Mach Operating System +# Copyright (c) 1986 Carnegie-Mellon University +# All rights reserved. The CMU software License Agreement +# specifies the terms and conditions for use and redistribution. +# +###################################################################### +# +# Master Apple configuration file (see the master machine independent +# configuration file for a description of the file format). +# +###################################################################### +# +# Apple (PSEUDO-)DEVICES (select any combination) +# ex = Excelan EXOS 202 Ethernet interface +# ip = Interphase V/SMD 3200 disk controller +# od = Canon OMD-1 Optical Disk +# rd = RAM disk +# sd = SCSI disk +# sg = Generic SCSI Device +# st = SCSI tape +# fd = Floppy Disk +# en = Integrated Ethernet controller +# dsp = DSP560001 digital signal processor +# iplmeas = ipl time measurement +# nextp = NeXT Laser Printer +# sound = sound I/O +# vol = removable volume support device +# venip = virtual Ethernet/IP network interface +# zs = Serial device +# +# MULTIPROCESSOR SUPPORT (select exactly one) +# multi = support 4 processors +# uni = supports single processor +# +# SPECIAL CHARACTERISTICS (select any combination) +# gdb = GNU kernel debugger +# posix_kern = POSIX support +# +# CPU TYPE (select exactly one) +# NeXT = FIXME +# +###################################################################### +# +# Standard Apple Research Configurations: +# -------- ----- -------- --------------- +# BASE = [ intel mach medium config_dtrace vol pst gdb kernobjc fixpri simple_clock mdebug kernserv driverkit uxpr kernstack ipc_compat ipc_debug sysv_sem sysv_msg sysv_shm audit panic_info config_imageboot config_workqueue psynch ] +# FILESYS = [ devfs revfs hfs journaling fdesc config_fse quota namedstreams fifo union config_volfs hfs_compression ] +# NETWORKING = [ inet inet6 compat_oldsock mrouting tcpdrop_synfin bpfilter ipdivert ipfirewall ipv6firewall ipfw2 dummynet traffic_mgt sendfile netmibs bond vlan gif stf zlib randomipid ifnet_input_chk config_mbuf_jumbo ipflow ] +# NFS = [ nfsclient nfsserver ] +# VPN = [ ipsec ] +# RELEASE = [ BASE NETWORKING NFS VPN FILESYS libdriver ] +# PROFILE = [ RELEASE profile ] +# DEBUG = [ BASE NETWORKING NFS VPN FILESYS libdriver_g debug xpr_debug mach_assert pf pflog ] +# +# +# EMBEDDED_BASE = [ intel mach bsmall vol pst gdb kernobjc fixpri simple_clock mdebug kernserv driverkit uxpr kernstack ipc_compat ipc_debug sysv_sem sysv_msg sysv_shm audit panic_info config_imageboot config_workqueue psynch ] +# EMBEDDED_FILESYS = [ devfs hfs journaling fdesc fifo ] +# EMBEDDED_NET = [ inet compat_oldsock mrouting tcpdrop_synfin bpfilter config_mbuf_noexpand ] +# EMBEDDED = [ EMBEDDED_BASE EMBEDDED_NET VPN EMBEDDED_FILESYS libdriver no_printf_str no_kprintf_str no_kdebug ] +# DEVELOPMENT = [ EMBEDDED_BASE EMBEDDED_NET NFS VPN EMBEDDED_FILESYS libdriver netmibs development mach_assert ] +# +###################################################################### +# +machine "x86_64" # +cpu "x86_64" # + +makeoptions CCONFIGFLAGS = "-g -O3 -fno-omit-frame-pointer" # +makeoptions CCONFIGFLAGS = "-O3" # +makeoptions RELOC = "00100000" # +makeoptions SYMADDR = "00780000" # + +options GDB # GNU kernel debugger # +options DEBUG # general debugging code # +options SHOW_SPACE # print size of structures # +options EVENTMETER # event meter support # +options FP_EMUL # floating point emulation # +options UXPR # user-level XPR package # +config mach_kernel swap generic # +options EVENT # + +options NO_NESTED_PMAP # + +# +# Note: MAC/AUDIT options must be set in all the bsd/conf, osfmk/conf, and +# security/conf MASTER files. +# +options CONFIG_MACF # Mandatory Access Control Framework +options CONFIG_MACF_SOCKET_SUBSET # MAC socket subest (no labels) +#options CONFIG_MACF_SOCKET # MAC socket labels +#options CONFIG_MACF_NET # mbuf +#options CONFIG_MACF_DEBUG +#options CONFIG_MACF_MACH +options CONFIG_AUDIT # Kernel auditing + +# app-profiling i.e. pre-heating - off? +options CONFIG_APP_PROFILE=0 + +# +# code decryption... used on i386 for DSMOS +# must be set in all the bsd/conf and osfmk/conf MASTER files +# +options CONFIG_CODE_DECRYPTION + +# +# Ipl measurement system +# +pseudo-device iplmeas # + +# +# NFS measurement system +# +pseudo-device nfsmeas # + +# +# Removable Volume support +# +pseudo-device vol # + diff --git a/bsd/conf/Makefile b/bsd/conf/Makefile index 29ae6092f..a79644e77 100644 --- a/bsd/conf/Makefile +++ b/bsd/conf/Makefile @@ -3,10 +3,11 @@ export MakeInc_def=${SRCROOT}/makedefs/MakeInc.def export MakeInc_rule=${SRCROOT}/makedefs/MakeInc.rule export MakeInc_dir=${SRCROOT}/makedefs/MakeInc.dir -export ubc_subr.o_CFLAGS_ADD=-Wno-discard-qual -export vnode_pager.o_CFLAGS_ADD=-Werror -export vm_unix.o_CFLAGS_ADD=-Werror -export dp_backing_file.o_CFLAGS_ADD=-Werror +export dp_backing_file.o_CFLAGS_ADD=-Werror -Wshorten-64-to-32 +export ubc_subr.o_CFLAGS_ADD=-Wno-discard-qual -Wshorten-64-to-32 +export vnode_pager.o_CFLAGS_ADD=-Werror -Wshorten-64-to-32 +export vm_unix.o_CFLAGS_ADD=-Werror -Wshorten-64-to-32 + export if_mib.o_CFLAGS_ADD=-Wno-unused-parameter export adsp_Write.o_CFLAGS_ADD=-Wno-sign-compare export adsp_Packet.o_CFLAGS_ADD=-Wno-sign-compare @@ -114,7 +115,6 @@ $(COMPOBJROOT)/$(BSD_KERNEL_CONFIG)/Makefile : $(SOURCE)/MASTER \ $(SOURCE)/files.$(ARCH_CONFIG_LC) \ $(COMPOBJROOT)/doconf $(_v)(doconf_target=$(addsuffix /conf, $(TARGET)); \ - echo $${doconf_target};\ $(MKDIR) $${doconf_target}; \ cd $${doconf_target}; \ rm -f $(notdir $?); \ diff --git a/bsd/conf/Makefile.i386 b/bsd/conf/Makefile.i386 index ec78b385c..0b5f62979 100644 --- a/bsd/conf/Makefile.i386 +++ b/bsd/conf/Makefile.i386 @@ -2,101 +2,31 @@ #BEGIN Machine dependent Makefile fragment for i386 ###################################################################### +# files to build with certain warnings turned off +dis_tables.o_CFLAGS_ADD += -Wno-cast-qual +fbt_x86.o_CFLAGS_ADD += -Wno-cast-qual + + # Enable -Werror for i386 builds CFLAGS+=$(WERROR) CWARNFLAGS= $(filter-out -Wbad-function-cast, $(CWARNFLAGS_STD)) # Objects that don't compile cleanly: OBJS_NO_WERROR = \ - vfs_xattr.o \ - vfs_fsevents.o \ fifo_vnops.o \ - subr_log.o \ - ioconf.o \ aescrypt.o \ aeskey.o \ des_setkey.o \ sha2.o \ - vn.o \ - BTreeTreeOps.o \ - cd9660_lookup.o \ - cd9660_node.o \ - cd9660_rrip.o \ - cd9660_util.o \ - cd9660_vfsops.o \ - cd9660_vnops.o \ - bsd_init.o \ - kern_newsysctl.o \ - kern_prot.o \ - kpi_socket.o \ - kpi_socketfilter.o \ - uipc_domain.o \ - uipc_proto.o \ - uipc_usrreq.o \ - ether_if_module.o \ if_ethersubr.o \ - if_gif.o \ if_media.o \ - if_stf.o \ kext_net.o \ - netisr.o \ - rtsock.o \ dhcp_options.o \ - if_ether.o \ - igmp.o \ in_bootp.o \ - in_cksum.o \ - ip_fw2.o \ - ip_fw2_compat.o \ - kpi_ipfilter.o \ - in_gif.o \ - ip_divert.o \ - ip_dummynet.o \ - ip_icmp.o \ - ip_input.o \ - ip_mroute.o \ - ip_output.o \ - tcp_input.o \ - tcp_subr.o \ - tcp_timer.o \ - ah_core.o \ - esp_core.o \ - esp_input.o \ - esp_rijndael.o \ - in6_gif.o \ - in6_proto.o \ - in6_src.o \ - ip6_output.o \ - ipsec.o \ - raw_ip6.o \ - key.o \ - keydb.o \ krpc_subr.o \ - ffs_alloc.o \ - ffs_balloc.o \ - ffs_inode.o \ - ffs_subr.o \ - ffs_vfsops.o \ - ffs_vnops.o \ - ufs_attrlist.o \ - ufs_bmap.o \ - ufs_byte_order.o \ - ufs_ihash.o \ - ufs_inode.o \ - ufs_lockf.o \ - ufs_lookup.o \ - ufs_quota.o \ - ufs_readwrite.o \ - ufs_vfsops.o \ - ufs_vnops.o \ ux_exception.o \ - vfs_journal.o \ - vfs_syscalls.o \ - sysctl.o \ unix_startup.o \ randomdev.o \ - sha1mod.o \ - in_arp.o \ vnode_pager.o \ dp_backing_file.o \ vm_unix.o \ @@ -104,19 +34,8 @@ OBJS_NO_WERROR = \ km.o \ init_sysent.o \ drv_dep.o \ - dtrace.o \ - lockstat.o \ - profile_prvd.o \ - systrace.o \ - blist.o \ - dtrace_glue.o \ - fbt.o \ - fbt_x86.o \ - sdt.o \ - sdt_subr.o \ sdt_x86.o \ dtrace_isa.o \ - dis_tables.o \ aes_modes.o diff --git a/bsd/conf/Makefile.ppc b/bsd/conf/Makefile.ppc index 89d810966..2dd4e88b3 100644 --- a/bsd/conf/Makefile.ppc +++ b/bsd/conf/Makefile.ppc @@ -2,103 +2,34 @@ #BEGIN Machine dependent Makefile fragment for ppc ###################################################################### +# files to build with certain warnings turned off +dis_tables.o_CFLAGS_ADD += -Wno-cast-qual +fbt_ppc.o_CFLAGS_ADD += -Wno-cast-qual -Wno-pointer-to-int-cast + + # Enable -Werror for ppc builds CFLAGS+=$(WERROR) CWARNFLAGS= $(filter-out -Wbad-function-cast, $(CWARNFLAGS_STD)) # Objects that don't compile cleanly: OBJS_NO_WERROR = \ - vfs_xattr.o \ - vfs_fsevents.o \ fifo_vnops.o \ - subr_log.o \ - ioconf.o \ aescrypt.o \ aeskey.o \ des_setkey.o \ sha2.o \ shadow.o \ - vn.o \ - cd9660_lookup.o \ - cd9660_node.o \ - cd9660_rrip.o \ - cd9660_util.o \ - cd9660_vfsops.o \ - cd9660_vnops.o \ - bsd_init.o \ - bsd_stubs.o \ - kern_newsysctl.o \ - kern_prot.o \ - kpi_socket.o \ - kpi_socketfilter.o \ - uipc_domain.o \ - uipc_proto.o \ - uipc_usrreq.o \ - ether_if_module.o \ if_ethersubr.o \ - if_gif.o \ if_media.o \ - if_stf.o \ kext_net.o \ - netisr.o \ - rtsock.o \ dhcp_options.o \ - if_ether.o \ - igmp.o \ in_bootp.o \ - in_cksum.o \ - ip_fw2.o \ - ip_fw2_compat.o \ - kpi_ipfilter.o \ - in_gif.o \ - ip_divert.o \ - ip_dummynet.o \ - ip_icmp.o \ - ip_input.o \ - ip_mroute.o \ - ip_output.o \ - tcp_input.o \ - tcp_subr.o \ - tcp_timer.o \ - ah_core.o \ - esp_core.o \ - esp_input.o \ - esp_rijndael.o \ - in6_gif.o \ - in6_proto.o \ - in6_src.o \ - ip6_output.o \ - ipsec.o \ - raw_ip6.o \ - key.o \ - keydb.o \ krpc_subr.o \ - ffs_alloc.o \ - ffs_balloc.o \ - ffs_inode.o \ - ffs_subr.o \ - ffs_vfsops.o \ - ffs_vnops.o \ - ufs_attrlist.o \ - ufs_bmap.o \ - ufs_byte_order.o \ - ufs_ihash.o \ - ufs_inode.o \ - ufs_lockf.o \ - ufs_lookup.o \ - ufs_quota.o \ - ufs_readwrite.o \ - ufs_vfsops.o \ - ufs_vnops.o \ ux_exception.o \ - vfs_journal.o \ - vfs_syscalls.o \ sysctl.o \ unix_startup.o \ randomdev.o \ - sha1mod.o \ devtimer.o \ - in_arp.o \ vnode_pager.o \ dp_backing_file.o \ vm_unix.o \ @@ -106,16 +37,7 @@ OBJS_NO_WERROR = \ km.o \ at.o \ drv_dep.o \ - dtrace.o \ - lockstat.o \ - profile_prvd.o \ - systrace.o \ - blist.o \ - dtrace_glue.o \ - fbt.o \ fbt_ppc.o \ - sdt.o \ - sdt_subr.o \ sdt_ppc.o \ dtrace_isa.o \ dtrace_subr_ppc.o diff --git a/bsd/conf/Makefile.template b/bsd/conf/Makefile.template index 1990dd5b6..fdee45a3e 100644 --- a/bsd/conf/Makefile.template +++ b/bsd/conf/Makefile.template @@ -45,11 +45,7 @@ include $(MakeInc_def) # CFLAGS+= -imacros meta_features.h -DARCH_PRIVATE -DKERNEL -DDRIVER_PRIVATE \ -D_KERNEL_BUILD -DKERNEL_BUILD -DMACH_KERNEL -DBSD_BUILD \ - -DBSD_KERNEL_PRIVATE -DNCPUS=1 -Wno-four-char-constants -fpascal-strings \ - -D__APPLE__ -DLP64KERN=1 -DLP64_DEBUG=0 -I. $(CFLAGS_INLINE_CONFIG) - -# XXX: ld flags for bsd.o -export LDFLAGS_COMPONENT += -keep_private_externs + -DBSD_KERNEL_PRIVATE -DLP64KERN=1 -DLP64_DEBUG=0 -I. $(CFLAGS_INLINE_CONFIG) # # Directories for mig generated files @@ -103,12 +99,28 @@ ${OBJS}: ${OBJSDEPS} LDOBJS = $(OBJS) $(COMPONENT).o: $(LDOBJS) - @echo LD $(COMPONENT) - $(_v)$(LD) $(LDFLAGS_COMPONENT) -o $(COMPONENT).o ${LDOBJS} + @echo LDFILELIST $(COMPONENT) + $(_v)( for obj in ${LDOBJS}; do \ + echo $(TARGET)$(COMP_OBJ_DIR)/$(KERNEL_CONFIG)/$${obj}; \ + done; ) > $(COMPONENT).o + +MAKESYSCALLS = $(SRCROOT)/bsd/kern/makesyscalls.sh + +init_sysent.c: $(SRCROOT)/bsd/kern/syscalls.master $(MAKESYSCALLS) + @echo "Generating $@ from $<"; + $(_v)$(MAKESYSCALLS) $< table > /dev/null + +syscalls.c: $(SRCROOT)/bsd/kern/syscalls.master $(MAKESYSCALLS) + @echo "Generating $@ from $<"; + $(_v)$(MAKESYSCALLS) $< names > /dev/null + +audit_kevents.c: $(SRCROOT)/bsd/kern/syscalls.master $(MAKESYSCALLS) + @echo "Generating $@ from $<"; + $(_v)$(MAKESYSCALLS) $< audit > /dev/null do_depend: do_all $(_v)${MD} -u Makedep -f -d `ls *.d`; - + do_all: $(COMPONENT).o do_build_all: do_depend diff --git a/bsd/conf/Makefile.x86_64 b/bsd/conf/Makefile.x86_64 new file mode 100644 index 000000000..83b41e2dd --- /dev/null +++ b/bsd/conf/Makefile.x86_64 @@ -0,0 +1,49 @@ +###################################################################### +#BEGIN Machine dependent Makefile fragment for x86_64 +###################################################################### + +# files to build with certain warnings turned off +dis_tables.o_CFLAGS_ADD += -Wno-cast-qual +fbt_x86.o_CFLAGS_ADD += -Wno-cast-qual + + +# Enable -Werror for x86_64 builds +CFLAGS+=$(WERROR) +CWARNFLAGS= $(filter-out -Wbad-function-cast, $(CWARNFLAGS_STD)) + +# Objects that don't compile cleanly: +OBJS_NO_WERROR = \ + fifo_vnops.o \ + aescrypt.o \ + aeskey.o \ + des_setkey.o \ + sha2.o \ + if_ethersubr.o \ + if_media.o \ + kext_net.o \ + dhcp_options.o \ + in_bootp.o \ + krpc_subr.o \ + ux_exception.o \ + unix_startup.o \ + randomdev.o \ + vnode_pager.o \ + dp_backing_file.o \ + vm_unix.o \ + mem.o \ + km.o \ + init_sysent.o \ + drv_dep.o \ + sdt_x86.o \ + dtrace_isa.o \ + aes_modes.o + + +OBJS_WERROR=$(filter-out $(OBJS_NO_WERROR),$(OBJS)) + +$(OBJS_WERROR): WERROR=-Werror + +###################################################################### +#END Machine dependent Makefile fragment for x86_64 +###################################################################### + diff --git a/bsd/conf/files b/bsd/conf/files index 0a7cfa9ae..61afa6bf3 100644 --- a/bsd/conf/files +++ b/bsd/conf/files @@ -12,7 +12,6 @@ OPTIONS/hw_ast optional hw_ast OPTIONS/hw_footprint optional hw_footprint OPTIONS/kernserv optional kernserv -OPTIONS/config_ip_edgehole optional config_ip_edgehole OPTIONS/config_macf optional config_macf OPTIONS/config_macf_socket_subset optional config_macf_socket_subset OPTIONS/config_macf_socket optional config_macf_socket @@ -60,12 +59,11 @@ OPTIONS/norma_ether optional norma_ether OPTIONS/new_vm_code optional new_vm_code OPTIONS/old_vm_code optional old_vm_code OPTIONS/compat_43 optional compat_43 -OPTIONS/compat_43_tty optional compat_43_tty OPTIONS/diagnostic optional diagnostic OPTIONS/config_dtrace optional config_dtrace OPTIONS/profiling optional profiling OPTIONS/vndevice optional vndevice -OPTIONS/audit optional audit +OPTIONS/config_audit optional config_audit OPTIONS/config_fse optional config_fse OPTIONS/sockets optional sockets OPTIONS/development optional development @@ -115,6 +113,7 @@ OPTIONS/gif optional gif OPTIONS/netat optional netat OPTIONS/sendfile optional sendfile OPTIONS/randomipid optional randomipid +OPTIONS/pf optional pf OPTIONS/zlib optional zlib @@ -128,7 +127,6 @@ OPTIONS/fdesc optional fdesc OPTIONS/fifo optional fifo OPTIONS/nullfs optional nullfs OPTIONS/union optional union -OPTIONS/cd9660 optional cd9660 OPTIONS/devfs optional devfs OPTIONS/crypto optional crypto OPTIONS/allcrypto optional allcrypto @@ -136,10 +134,12 @@ OPTIONS/journaling optional journaling OPTIONS/crypto optional crypto OPTIONS/allcrypto optional allcrypto OPTIONS/journaling optional journaling +OPTIONS/hfs_compression optional hfs_compression OPTIONS/config_imageboot optional config_imageboot bsd/dev/random/randomdev.c standard +bsd/dev/random/fips_sha1.c standard bsd/dev/random/YarrowCoreLib/port/smf.c standard bsd/dev/random/YarrowCoreLib/src/comp.c standard bsd/dev/random/YarrowCoreLib/src/prng.c standard @@ -178,12 +178,11 @@ bsd/vfs/vfs_vnops.c standard bsd/vfs/vfs_xattr.c standard bsd/vfs/vnode_if.c standard bsd/vfs/kpi_vfs.c standard -bsd/vfs/vfs_journal.c optional journaling +bsd/vfs/vfs_journal.c standard bsd/vfs/vfs_fsevents.c standard bsd/miscfs/deadfs/dead_vnops.c standard -bsd/miscfs/fdesc/fdesc_vfsops.c optional fdesc -bsd/miscfs/fdesc/fdesc_vnops.c optional fdesc +bsd/miscfs/devfs/devfs_fdesc_support.c optional fdesc bsd/miscfs/fifofs/fifo_vnops.c optional fifo sockets bsd/miscfs/nullfs/null_subr.c optional nullfs bsd/miscfs/nullfs/null_vfsops.c optional nullfs @@ -197,13 +196,7 @@ bsd/miscfs/devfs/devfs_tree.c optional devfs bsd/miscfs/devfs/devfs_vnops.c optional devfs bsd/miscfs/devfs/devfs_vfsops.c optional devfs -bsd/isofs/cd9660/cd9660_bmap.c optional cd9660 -bsd/isofs/cd9660/cd9660_lookup.c optional cd9660 -bsd/isofs/cd9660/cd9660_node.c optional cd9660 -bsd/isofs/cd9660/cd9660_rrip.c optional cd9660 -bsd/isofs/cd9660/cd9660_util.c optional cd9660 -bsd/isofs/cd9660/cd9660_vfsops.c optional cd9660 -bsd/isofs/cd9660/cd9660_vnops.c optional cd9660 +bsd/kern/decmpfs.c standard bsd/net/bpf.c optional bpfilter bsd/net/bpf_filter.c optional bpfilter @@ -242,7 +235,16 @@ bsd/net/net_osdep.c optional sockets bsd/net/kpi_interface.c optional networking bsd/net/kpi_protocol.c optional networking bsd/net/kpi_interfacefilter.c optional networking - +bsd/net/net_str_id.c optional networking +bsd/net/if_utun.c optional networking +bsd/net/if_pflog.c optional pflog pf +bsd/net/pf.c optional pf +bsd/net/pf_if.c optional pf +bsd/net/pf_ioctl.c optional pf +bsd/net/pf_norm.c optional pf +bsd/net/pf_osfp.c optional pf +bsd/net/pf_ruleset.c optional pf +bsd/net/pf_table.c optional pf bsd/netinet/if_atm.c optional atm bsd/netinet/igmp.c optional inet @@ -263,7 +265,6 @@ bsd/netinet/ip_id.c optional randomipid inet bsd/netinet/ip_input.c optional inet bsd/netinet/ip_mroute.c optional mrouting bsd/netinet/ip_output.c optional inet -bsd/netinet/ip_edgehole.c optional config_ip_edgehole bsd/netinet/raw_ip.c optional inet bsd/netinet/tcp_debug.c optional tcpdebug bsd/netinet/tcp_input.c optional inet @@ -405,6 +406,8 @@ bsd/nfs/nfs_bio.c optional nfsclient bsd/nfs/nfs_boot.c optional nfsclient bsd/nfs/nfs_gss.c optional nfsclient bsd/nfs/nfs_gss.c optional nfsserver +bsd/nfs/nfs_gss_crypto.c optional nfsclient +bsd/nfs/nfs_gss_crypto.c optional nfsserver bsd/nfs/nfs_lock.c optional nfsclient bsd/nfs/nfs_node.c optional nfsclient bsd/nfs/nfs_serv.c optional nfsserver @@ -422,25 +425,6 @@ bsd/nfs/nfs4_vnops.c optional nfsclient bsd/kern/netboot.c optional nfsclient -bsd/ufs/ffs/ffs_alloc.c optional ffs -bsd/ufs/ffs/ffs_balloc.c optional ffs -bsd/ufs/ffs/ffs_inode.c optional ffs -bsd/ufs/ffs/ffs_subr.c optional ffs -bsd/ufs/ffs/ffs_tables.c optional ffs -bsd/ufs/ffs/ffs_vfsops.c optional ffs -bsd/ufs/ffs/ffs_vnops.c optional ffs -bsd/ufs/mfs/mfs_vfsops.c optional mfs -bsd/ufs/mfs/mfs_vnops.c optional mfs -bsd/ufs/ufs/ufs_attrlist.c optional ffs -bsd/ufs/ufs/ufs_bmap.c optional ffs -bsd/ufs/ufs/ufs_byte_order.c optional rev_endian_fs -bsd/ufs/ufs/ufs_ihash.c optional ffs -bsd/ufs/ufs/ufs_inode.c optional ffs -bsd/ufs/ufs/ufs_lookup.c optional ffs -bsd/ufs/ufs/ufs_quota.c optional quota -bsd/ufs/ufs/ufs_vfsops.c optional ffs -bsd/ufs/ufs/ufs_vnops.c optional ffs - bsd/hfs/hfs_attrlist.c optional hfs bsd/hfs/hfs_btreeio.c optional hfs bsd/hfs/hfs_catalog.c optional hfs @@ -476,16 +460,29 @@ bsd/hfs/hfscommon/Misc/FileExtentMapping.c optional hfs bsd/hfs/hfscommon/Misc/VolumeAllocation.c optional hfs bsd/hfs/hfscommon/Unicode/UnicodeWrappers.c optional hfs +bsd/security/audit/audit.c optional config_audit +bsd/security/audit/audit_arg.c optional config_audit +bsd/security/audit/audit_bsd.c optional config_audit +bsd/security/audit/audit_bsm.c optional config_audit +bsd/security/audit/audit_bsm_errno.c optional config_audit +bsd/security/audit/audit_bsm_fcntl.c optional config_audit +bsd/security/audit/audit_bsm_domain.c optional config_audit +bsd/security/audit/audit_bsm_klib.c optional config_audit +bsd/security/audit/audit_bsm_socket_type.c optional config_audit +bsd/security/audit/audit_bsm_token.c optional config_audit +./audit_kevents.c optional config_audit +bsd/security/audit/audit_mac.c optional config_audit +bsd/security/audit/audit_pipe.c optional config_audit +bsd/security/audit/audit_session.c standard +bsd/security/audit/audit_syscalls.c standard +bsd/security/audit/audit_worker.c optional config_audit + bsd/kern/bsd_init.c standard -bsd/kern/init_sysent.c standard +./init_sysent.c standard bsd/kern/kdebug.c standard bsd/kern/kern_acct.c standard bsd/kern/kern_aio.c standard -bsd/kern/kern_audit.c optional audit bsd/kern/kern_authorization.c standard -bsd/kern/kern_bsm_token.c optional audit -bsd/kern/kern_bsm_audit.c optional audit -bsd/kern/kern_bsm_klib.c optional audit bsd/kern/kern_clock.c standard bsd/kern/kern_core.c standard bsd/kern/kern_credential.c standard @@ -493,6 +490,7 @@ bsd/kern/kern_symfile.c standard bsd/kern/kern_descrip.c standard bsd/kern/kern_event.c standard bsd/kern/kern_control.c optional networking +bsd/kern/kern_callout.c standard bsd/kern/kern_exec.c standard bsd/kern/kern_exit.c standard bsd/kern/kern_lockf.c standard @@ -513,7 +511,6 @@ bsd/kern/kern_newsysctl.c standard bsd/kern/kern_memorystatus.c optional config_embedded bsd/kern/kern_mib.c standard bsd/kern/kpi_mbuf.c optional sockets -bsd/kern/sysctl_init.c standard bsd/kern/kern_time.c standard bsd/kern/kern_xxx.c standard bsd/kern/mach_process.c standard @@ -528,9 +525,9 @@ bsd/kern/sys_generic.c standard bsd/kern/sys_pipe.c standard bsd/kern/sys_socket.c optional sockets bsd/kern/sys_domain.c optional sockets -bsd/kern/syscalls.c standard +./syscalls.c standard bsd/kern/tty.c standard -bsd/kern/tty_compat.c optional compat_43_tty +bsd/kern/tty_compat.c standard bsd/kern/tty_conf.c standard bsd/kern/tty_pty.c optional pty bsd/kern/tty_ptmx.c optional ptmx @@ -550,7 +547,6 @@ bsd/kern/sysv_shm.c standard bsd/kern/sysv_sem.c standard bsd/kern/sysv_msg.c standard bsd/kern/mach_fat.c standard -bsd/kern/mach_header.c standard bsd/kern/mach_loader.c standard bsd/kern/posix_sem.c standard bsd/kern/posix_shm.c standard @@ -558,8 +554,8 @@ bsd/kern/posix_shm.c standard bsd/kern/qsort.c standard bsd/kern/kpi_socket.c optional sockets bsd/kern/kpi_socketfilter.c optional sockets -bsd/kern/pthread_support.c standard -bsd/kern/pthread_synch.c standard +bsd/kern/pthread_support.c optional psynch +bsd/kern/pthread_synch.c standard bsd/kern/proc_info.c standard bsd/kern/socket_info.c optional sockets diff --git a/bsd/conf/files.i386 b/bsd/conf/files.i386 index 024d69708..424cc3e3e 100644 --- a/bsd/conf/files.i386 +++ b/bsd/conf/files.i386 @@ -11,7 +11,6 @@ bsd/dev/i386/kern_machdep.c standard bsd/dev/i386/memmove.c standard bsd/dev/i386/stubs.c standard bsd/dev/i386/systemcalls.c standard -bsd/dev/i386/lock_stubs.c standard bsd/dev/i386/sysctl.c standard bsd/dev/i386/unix_signal.c standard bsd/dev/i386/munge.s standard diff --git a/bsd/conf/files.x86_64 b/bsd/conf/files.x86_64 new file mode 100644 index 000000000..322174554 --- /dev/null +++ b/bsd/conf/files.x86_64 @@ -0,0 +1,33 @@ +OPTIONS/show_space optional show_space +OPTIONS/gdb optional gdb +OPTIONS/iplmeas optional iplmeas + + +bsd/dev/i386/conf.c standard +bsd/dev/i386/cons.c standard +bsd/dev/i386/mem.c standard +bsd/dev/i386/km.c standard +bsd/dev/i386/kern_machdep.c standard +bsd/dev/i386/memmove.c standard +bsd/dev/i386/stubs.c standard +bsd/dev/i386/systemcalls.c standard +bsd/dev/i386/sysctl.c standard +bsd/dev/i386/unix_signal.c standard +bsd/dev/x86_64/munge.s standard + +bsd/crypto/aes/gen/aescrypt.c optional crypto +bsd/crypto/aes/gen/aeskey.c optional crypto +bsd/crypto/aes/gen/aestab.c optional crypto + +# Lightly ifdef'd to support K64 DTrace +bsd/dev/i386/dtrace_isa.c optional config_dtrace +bsd/dev/i386/dtrace_subr_x86.c optional config_dtrace +bsd/dev/i386/fbt_x86.c optional config_dtrace +bsd/dev/i386/sdt_x86.c optional config_dtrace +bsd/dev/i386/fasttrap_isa.c optional config_dtrace +bsd/dev/i386/instr_size.c optional config_dtrace +bsd/dev/i386/dis_tables.c optional config_dtrace + +bsd/kern/bsd_stubs.c standard +bsd/netinet/in_cksum.c optional inet + diff --git a/bsd/conf/param.c b/bsd/conf/param.c index 5e7191195..9aafb343c 100644 --- a/bsd/conf/param.c +++ b/bsd/conf/param.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2007 Apple Inc. All rights reserved. + * Copyright (c) 2000-2008 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -77,7 +77,6 @@ #include #include #include -#include #include #include #include @@ -98,7 +97,7 @@ int desiredvnodes = CONFIG_VNODES; int maxfiles = MAXFILES; unsigned int ncallout = 16 + 2*NPROC; -int nmbclusters = NMBCLUSTERS; +unsigned int nmbclusters = NMBCLUSTERS; int nport = NPROC / 2; #define MAXSOCKETS NMBCLUSTERS diff --git a/bsd/crypto/Makefile b/bsd/crypto/Makefile index 90a8d7eec..0af469f52 100644 --- a/bsd/crypto/Makefile +++ b/bsd/crypto/Makefile @@ -20,6 +20,8 @@ INSTINC_SUBDIRS_PPC = \ INSTINC_SUBDIRS_I386 = \ +INSTINC_SUBDIRS_X86_64 = \ + INSTINC_SUBDIRS_ARM = \ EXPINC_SUBDIRS = ${INSTINC_SUBDIRS} @@ -28,6 +30,8 @@ EXPINC_SUBDIRS_PPC = \ EXPINC_SUBDIRS_I386 = \ +EXPINC_SUBDIRS_X86_64 = \ + EXPINC_SUBDIRS_ARM = \ PRIVATE_DATAFILES = \ diff --git a/bsd/crypto/aes/Makefile b/bsd/crypto/aes/Makefile index f34372331..026261c65 100644 --- a/bsd/crypto/aes/Makefile +++ b/bsd/crypto/aes/Makefile @@ -13,6 +13,8 @@ INSTINC_SUBDIRS_PPC = \ INSTINC_SUBDIRS_I386 = \ +INSTINC_SUBDIRS_X86_64 = \ + INSTINC_SUBDIRS_ARM = \ EXPINC_SUBDIRS = \ @@ -21,6 +23,8 @@ EXPINC_SUBDIRS_PPC = \ EXPINC_SUBDIRS_I386 = \ +EXPINC_SUBDIRS_X86_64 = \ + EXPINC_SUBDIRS_ARM = \ PRIVATE_DATAFILES = \ diff --git a/bsd/crypto/aes/aes.h b/bsd/crypto/aes/aes.h index f75d02272..eaba0a692 100644 --- a/bsd/crypto/aes/aes.h +++ b/bsd/crypto/aes/aes.h @@ -58,11 +58,11 @@ extern "C" #define AES_BLOCK_SIZE 16 /* the AES block size in bytes */ #define N_COLS 4 /* the number of columns in the state */ -typedef unsigned long uint_32t; +typedef unsigned int uint_32t; typedef unsigned char uint_8t; typedef unsigned short uint_16t; typedef unsigned char aes_08t; -typedef unsigned long aes_32t; +typedef unsigned int aes_32t; #define void_ret void #define int_ret int diff --git a/bsd/miscfs/fdesc/Makefile b/bsd/crypto/aes/gen/Makefile similarity index 73% rename from bsd/miscfs/fdesc/Makefile rename to bsd/crypto/aes/gen/Makefile index 49b06dbdb..7ea225c10 100644 --- a/bsd/miscfs/fdesc/Makefile +++ b/bsd/crypto/aes/gen/Makefile @@ -19,20 +19,16 @@ EXPINC_SUBDIRS_PPC = \ EXPINC_SUBDIRS_I386 = \ -DATAFILES = - PRIVATE_DATAFILES = \ - fdesc.h + aestab.h aesopt.h -INSTALL_MI_LIST = ${DATAFILES} +INSTALL_MI_DIR = crypto -INSTALL_MI_DIR = miscfs/fdesc +EXPORT_MI_DIR = ${INSTALL_MI_DIR} -EXPORT_MI_LIST = ${DATAFILES} ${PRIVATE_DATAFILES} +INSTALL_KF_MI_LIST = -EXPORT_MI_DIR = miscfs/fdesc +INSTALL_KF_MI_LCL_LIST = ${PRIVATE_DATAFILES} include $(MakeInc_rule) include $(MakeInc_dir) - - diff --git a/bsd/crypto/aes/gen/aescrypt.c b/bsd/crypto/aes/gen/aescrypt.c new file mode 100644 index 000000000..31d4c81af --- /dev/null +++ b/bsd/crypto/aes/gen/aescrypt.c @@ -0,0 +1,411 @@ +/* + --------------------------------------------------------------------------- + Copyright (c) 2003, Dr Brian Gladman, Worcester, UK. All rights reserved. + + LICENSE TERMS + + The free distribution and use of this software in both source and binary + form is allowed (with or without changes) provided that: + + 1. distributions of this source code include the above copyright + notice, this list of conditions and the following disclaimer; + + 2. distributions in binary form include the above copyright + notice, this list of conditions and the following disclaimer + in the documentation and/or other associated materials; + + 3. the copyright holder's name is not used to endorse products + built using this software without specific written permission. + + ALTERNATIVELY, provided that this notice is retained in full, this product + may be distributed under the terms of the GNU General Public License (GPL), + in which case the provisions of the GPL apply INSTEAD OF those given above. + + DISCLAIMER + + This software is provided 'as is' with no explicit or implied warranties + in respect of its properties, including, but not limited to, correctness + and/or fitness for purpose. + --------------------------------------------------------------------------- + Issue 28/01/2004 + + This file contains the code for implementing encryption and decryption + for AES (Rijndael) for block and key sizes of 16, 24 and 32 bytes. It + can optionally be replaced by code written in assembler using NASM. For + further details see the file aesopt.h +*/ + +#include "aesopt.h" +#include "aestab.h" + +#if defined(__cplusplus) +extern "C" +{ +#endif + +#define ki(y,x,k,c) (s(y,c) = s(x, c) ^ (k)[c]) +#define xo(y,x,c) (s(y,c) ^= s(x, c)) +#define si(y,x,c) (s(y,c) = word_in(x, c)) +#define so(y,x,c) word_out(y, c, s(x,c)) + +#if defined(ARRAYS) +#define locals(y,x) x[4],y[4] +#else +#define locals(y,x) x##0,x##1,x##2,x##3,y##0,y##1,y##2,y##3 +#endif + +#define dtables(tab) const aes_32t *tab##0, *tab##1, *tab##2, *tab##3 +#define itables(tab) tab##0 = tab[0]; tab##1 = tab[1]; tab##2 = tab[2]; tab##3 = tab[3] + +#define l_copy(y, x) s(y,0) = s(x,0); s(y,1) = s(x,1); \ + s(y,2) = s(x,2); s(y,3) = s(x,3); + +#define key_in(y,x,k) ki(y,x,k,0); ki(y,x,k,1); ki(y,x,k,2); ki(y,x,k,3) +#define cbc(y,x) xo(y,x,0); xo(y,x,1); xo(y,x,2); xo(y,x,3) +#define state_in(y,x) si(y,x,0); si(y,x,1); si(y,x,2); si(y,x,3) +#define state_out(y,x) so(y,x,0); so(y,x,1); so(y,x,2); so(y,x,3) +#define round(rm,y,x,k) rm(y,x,k,0); rm(y,x,k,1); rm(y,x,k,2); rm(y,x,k,3) + +#if defined(ENCRYPTION) && !defined(AES_ASM) + +/* Visual C++ .Net v7.1 provides the fastest encryption code when using + Pentium optimiation with small code but this is poor for decryption + so we need to control this with the following VC++ pragmas +*/ + +#if defined(_MSC_VER) +#pragma optimize( "s", on ) +#endif + +/* Given the column (c) of the output state variable, the following + macros give the input state variables which are needed in its + computation for each row (r) of the state. All the alternative + macros give the same end values but expand into different ways + of calculating these values. In particular the complex macro + used for dynamically variable block sizes is designed to expand + to a compile time constant whenever possible but will expand to + conditional clauses on some branches (I am grateful to Frank + Yellin for this construction) +*/ + +#define fwd_var(x,r,c)\ + ( r == 0 ? ( c == 0 ? s(x,0) : c == 1 ? s(x,1) : c == 2 ? s(x,2) : s(x,3))\ + : r == 1 ? ( c == 0 ? s(x,1) : c == 1 ? s(x,2) : c == 2 ? s(x,3) : s(x,0))\ + : r == 2 ? ( c == 0 ? s(x,2) : c == 1 ? s(x,3) : c == 2 ? s(x,0) : s(x,1))\ + : ( c == 0 ? s(x,3) : c == 1 ? s(x,0) : c == 2 ? s(x,1) : s(x,2))) + +#if defined(FT4_SET) +#undef dec_fmvars +# if defined(ENC_ROUND_CACHE_TABLES) +#define fwd_rnd(y,x,k,c) (s(y,c) = (k)[c] ^ four_cached_tables(x,t_fn,fwd_var,rf1,c)) +# else +#define fwd_rnd(y,x,k,c) (s(y,c) = (k)[c] ^ four_tables(x,t_fn,fwd_var,rf1,c)) +# endif +#elif defined(FT1_SET) +#undef dec_fmvars +#define fwd_rnd(y,x,k,c) (s(y,c) = (k)[c] ^ one_table(x,upr,t_fn,fwd_var,rf1,c)) +#else +#define fwd_rnd(y,x,k,c) (s(y,c) = (k)[c] ^ fwd_mcol(no_table(x,t_sbox,fwd_var,rf1,c))) +#endif + +#if defined(FL4_SET) +# if defined(LAST_ENC_ROUND_CACHE_TABLES) +#define fwd_lrnd(y,x,k,c) (s(y,c) = (k)[c] ^ four_cached_tables(x,t_fl,fwd_var,rf1,c)) +# else +#define fwd_lrnd(y,x,k,c) (s(y,c) = (k)[c] ^ four_tables(x,t_fl,fwd_var,rf1,c)) +# endif +#elif defined(FL1_SET) +#define fwd_lrnd(y,x,k,c) (s(y,c) = (k)[c] ^ one_table(x,ups,t_fl,fwd_var,rf1,c)) +#else +#define fwd_lrnd(y,x,k,c) (s(y,c) = (k)[c] ^ no_table(x,t_sbox,fwd_var,rf1,c)) +#endif + +aes_rval aes_encrypt_cbc(const unsigned char *in, const unsigned char *in_iv, unsigned int num_blk, + unsigned char *out, const aes_encrypt_ctx cx[1]) +{ aes_32t locals(b0, b1); + const aes_32t *kp; + const aes_32t *kptr = cx->ks; +#if defined(ENC_ROUND_CACHE_TABLES) + dtables(t_fn); +#endif +#if defined(LAST_ENC_ROUND_CACHE_TABLES) + dtables(t_fl); +#endif + +#if defined( dec_fmvars ) + dec_fmvars; /* declare variables for fwd_mcol() if needed */ +#endif + +#if defined( AES_ERR_CHK ) + if( cx->rn != 10 && cx->rn != 12 && cx->rn != 14 ) + return aes_error; +#endif + + // Load IV into b0. + state_in(b0, in_iv); + + for (;num_blk; in += AES_BLOCK_SIZE, out += AES_BLOCK_SIZE, --num_blk) + { + kp = kptr; +#if 0 + // Read the plaintext into b1 + state_in(b1, in); + // Do the CBC with b0 which is either the iv or the ciphertext of the previous block. + cbc(b1, b0); + + // Xor b1 with the key schedule to get things started. + key_in(b0, b1, kp); +#else + // Since xor is associative we mess with the ordering here to get the loads started early + key_in(b1, b0, kp); // Xor b0(IV) with the key schedule and assign to b1 + state_in(b0, in); // Load block into b0 + cbc(b0, b1); // Xor b0 with b1 and store in b0 +#endif + +#if defined(ENC_ROUND_CACHE_TABLES) + itables(t_fn); +#endif + +#if (ENC_UNROLL == FULL) + + switch(cx->rn) + { + case 14: + round(fwd_rnd, b1, b0, kp + 1 * N_COLS); + round(fwd_rnd, b0, b1, kp + 2 * N_COLS); + kp += 2 * N_COLS; + case 12: + round(fwd_rnd, b1, b0, kp + 1 * N_COLS); + round(fwd_rnd, b0, b1, kp + 2 * N_COLS); + kp += 2 * N_COLS; + case 10: + default: + round(fwd_rnd, b1, b0, kp + 1 * N_COLS); + round(fwd_rnd, b0, b1, kp + 2 * N_COLS); + round(fwd_rnd, b1, b0, kp + 3 * N_COLS); + round(fwd_rnd, b0, b1, kp + 4 * N_COLS); + round(fwd_rnd, b1, b0, kp + 5 * N_COLS); + round(fwd_rnd, b0, b1, kp + 6 * N_COLS); + round(fwd_rnd, b1, b0, kp + 7 * N_COLS); + round(fwd_rnd, b0, b1, kp + 8 * N_COLS); + round(fwd_rnd, b1, b0, kp + 9 * N_COLS); +#if defined(LAST_ENC_ROUND_CACHE_TABLES) + itables(t_fl); +#endif + round(fwd_lrnd, b0, b1, kp +10 * N_COLS); + } + +#else + + { aes_32t rnd; +#if (ENC_UNROLL == PARTIAL) + for(rnd = 0; rnd < (cx->rn >> 1) - 1; ++rnd) + { + kp += N_COLS; + round(fwd_rnd, b1, b0, kp); + kp += N_COLS; + round(fwd_rnd, b0, b1, kp); + } + kp += N_COLS; + round(fwd_rnd, b1, b0, kp); +#else + for(rnd = 0; rnd < cx->rn - 1; ++rnd) + { + kp += N_COLS; + round(fwd_rnd, b1, b0, kp); + l_copy(b0, b1); + } +#endif +#if defined(LAST_ENC_ROUND_CACHE_TABLES) + itables(t_fl); +#endif + kp += N_COLS; + round(fwd_lrnd, b0, b1, kp); + } +#endif + + state_out(out, b0); + } + +#if defined( AES_ERR_CHK ) + return aes_good; +#endif +} + +#endif + +#if defined(DECRYPTION) && !defined(AES_ASM) + +/* Visual C++ .Net v7.1 provides the fastest encryption code when using + Pentium optimiation with small code but this is poor for decryption + so we need to control this with the following VC++ pragmas +*/ + +#if defined(_MSC_VER) +#pragma optimize( "t", on ) +#endif + +/* Given the column (c) of the output state variable, the following + macros give the input state variables which are needed in its + computation for each row (r) of the state. All the alternative + macros give the same end values but expand into different ways + of calculating these values. In particular the complex macro + used for dynamically variable block sizes is designed to expand + to a compile time constant whenever possible but will expand to + conditional clauses on some branches (I am grateful to Frank + Yellin for this construction) +*/ + +#define inv_var(x,r,c)\ + ( r == 0 ? ( c == 0 ? s(x,0) : c == 1 ? s(x,1) : c == 2 ? s(x,2) : s(x,3))\ + : r == 1 ? ( c == 0 ? s(x,3) : c == 1 ? s(x,0) : c == 2 ? s(x,1) : s(x,2))\ + : r == 2 ? ( c == 0 ? s(x,2) : c == 1 ? s(x,3) : c == 2 ? s(x,0) : s(x,1))\ + : ( c == 0 ? s(x,1) : c == 1 ? s(x,2) : c == 2 ? s(x,3) : s(x,0))) + +#if defined(IT4_SET) +#undef dec_imvars +# if defined(DEC_ROUND_CACHE_TABLES) +#define inv_rnd(y,x,k,c) (s(y,c) = (k)[c] ^ four_cached_tables(x,t_in,inv_var,rf1,c)) +# else +#define inv_rnd(y,x,k,c) (s(y,c) = (k)[c] ^ four_tables(x,t_in,inv_var,rf1,c)) +# endif +#elif defined(IT1_SET) +#undef dec_imvars +#define inv_rnd(y,x,k,c) (s(y,c) = (k)[c] ^ one_table(x,upr,t_in,inv_var,rf1,c)) +#else +#define inv_rnd(y,x,k,c) (s(y,c) = inv_mcol((k)[c] ^ no_table(x,t_ibox,inv_var,rf1,c))) +#endif + +#if defined(IL4_SET) +# if defined(LAST_DEC_ROUND_CACHE_TABLES) +#define inv_lrnd(y,x,k,c) (s(y,c) = (k)[c] ^ four_cached_tables(x,t_il,inv_var,rf1,c)) +# else +#define inv_lrnd(y,x,k,c) (s(y,c) = (k)[c] ^ four_tables(x,t_il,inv_var,rf1,c)) +# endif +#elif defined(IL1_SET) +#define inv_lrnd(y,x,k,c) (s(y,c) = (k)[c] ^ one_table(x,ups,t_il,inv_var,rf1,c)) +#else +#define inv_lrnd(y,x,k,c) (s(y,c) = (k)[c] ^ no_table(x,t_ibox,inv_var,rf1,c)) +#endif + +aes_rval aes_decrypt_cbc(const unsigned char *in, const unsigned char *in_iv, unsigned int num_blk, + unsigned char *out, const aes_decrypt_ctx cx[1]) +{ aes_32t locals(b0, b1); + const aes_32t *kptr = cx->ks + cx->rn * N_COLS; + const aes_32t *kp; +#if defined(DEC_ROUND_CACHE_TABLES) + dtables(t_in); +#endif +#if defined(LAST_DEC_ROUND_CACHE_TABLES) + dtables(t_il); +#endif + +#if defined( dec_imvars ) + dec_imvars; /* declare variables for inv_mcol() if needed */ +#endif + +#if defined( AES_ERR_CHK ) + if( cx->rn != 10 && cx->rn != 12 && cx->rn != 14 ) + return aes_error; +#endif + +#if defined(DEC_ROUND_CACHE_TABLES) + itables(t_in); +#endif + + in += AES_BLOCK_SIZE * (num_blk - 1); + out += AES_BLOCK_SIZE * (num_blk - 1); + // Load the last block's ciphertext into b1 + state_in(b1, in); + + for (;num_blk; out -= AES_BLOCK_SIZE, --num_blk) + { + kp = kptr; + // Do the xor part of state_in, where b1 is the previous block's ciphertext. + key_in(b0, b1, kp); + +#if (DEC_UNROLL == FULL) + + switch(cx->rn) + { + case 14: + round(inv_rnd, b1, b0, kp - 1 * N_COLS); + round(inv_rnd, b0, b1, kp - 2 * N_COLS); + kp -= 2 * N_COLS; + case 12: + round(inv_rnd, b1, b0, kp - 1 * N_COLS); + round(inv_rnd, b0, b1, kp - 2 * N_COLS); + kp -= 2 * N_COLS; + case 10: + default: + round(inv_rnd, b1, b0, kp - 1 * N_COLS); + round(inv_rnd, b0, b1, kp - 2 * N_COLS); + round(inv_rnd, b1, b0, kp - 3 * N_COLS); + round(inv_rnd, b0, b1, kp - 4 * N_COLS); + round(inv_rnd, b1, b0, kp - 5 * N_COLS); + round(inv_rnd, b0, b1, kp - 6 * N_COLS); + round(inv_rnd, b1, b0, kp - 7 * N_COLS); + round(inv_rnd, b0, b1, kp - 8 * N_COLS); + round(inv_rnd, b1, b0, kp - 9 * N_COLS); +#if defined(LAST_DEC_ROUND_CACHE_TABLES) + itables(t_il); +#endif + round(inv_lrnd, b0, b1, kp - 10 * N_COLS); + } + +#else + + { aes_32t rnd; +#if (DEC_UNROLL == PARTIAL) + for(rnd = 0; rnd < (cx->rn >> 1) - 1; ++rnd) + { + kp -= N_COLS; + round(inv_rnd, b1, b0, kp); + kp -= N_COLS; + round(inv_rnd, b0, b1, kp); + } + kp -= N_COLS; + round(inv_rnd, b1, b0, kp); +#else + for(rnd = 0; rnd < cx->rn - 1; ++rnd) + { + kp -= N_COLS; + round(inv_rnd, b1, b0, kp); + l_copy(b0, b1); + } +#endif +#if defined(LAST_DEC_ROUND_CACHE_TABLES) + itables(t_il); +#endif + kp -= N_COLS; + round(inv_lrnd, b0, b1, kp); + } +#endif + + if (num_blk == 1) + { + // We are doing the first block so we need the IV rather than the previous + // block for CBC (there is no previous block) + state_in(b1, in_iv); + } + else + { + in -= AES_BLOCK_SIZE; + state_in(b1, in); + } + + // Do the CBC with b1 which is either the IV or the ciphertext of the previous block. + cbc(b0, b1); + + state_out(out, b0); + } +#if defined( AES_ERR_CHK ) + return aes_good; +#endif +} + +#endif + +#if defined(__cplusplus) +} +#endif diff --git a/bsd/crypto/aes/gen/aeskey.c b/bsd/crypto/aes/gen/aeskey.c new file mode 100644 index 000000000..5e0a6453c --- /dev/null +++ b/bsd/crypto/aes/gen/aeskey.c @@ -0,0 +1,455 @@ +/* + --------------------------------------------------------------------------- + Copyright (c) 2003, Dr Brian Gladman, Worcester, UK. All rights reserved. + + LICENSE TERMS + + The free distribution and use of this software in both source and binary + form is allowed (with or without changes) provided that: + + 1. distributions of this source code include the above copyright + notice, this list of conditions and the following disclaimer; + + 2. distributions in binary form include the above copyright + notice, this list of conditions and the following disclaimer + in the documentation and/or other associated materials; + + 3. the copyright holder's name is not used to endorse products + built using this software without specific written permission. + + ALTERNATIVELY, provided that this notice is retained in full, this product + may be distributed under the terms of the GNU General Public License (GPL), + in which case the provisions of the GPL apply INSTEAD OF those given above. + + DISCLAIMER + + This software is provided 'as is' with no explicit or implied warranties + in respect of its properties, including, but not limited to, correctness + and/or fitness for purpose. + --------------------------------------------------------------------------- + Issue Date: 26/08/2003 + + This file contains the code for implementing the key schedule for AES + (Rijndael) for block and key sizes of 16, 24, and 32 bytes. See aesopt.h + for further details including optimisation. +*/ + +#include "aesopt.h" +#include "aestab.h" + +#if defined(__cplusplus) +extern "C" +{ +#endif + +/* Initialise the key schedule from the user supplied key. The key + length can be specified in bytes, with legal values of 16, 24 + and 32, or in bits, with legal values of 128, 192 and 256. These + values correspond with Nk values of 4, 6 and 8 respectively. + + The following macros implement a single cycle in the key + schedule generation process. The number of cycles needed + for each cx->n_col and nk value is: + + nk = 4 5 6 7 8 + ------------------------------ + cx->n_col = 4 10 9 8 7 7 + cx->n_col = 5 14 11 10 9 9 + cx->n_col = 6 19 15 12 11 11 + cx->n_col = 7 21 19 16 13 14 + cx->n_col = 8 29 23 19 17 14 +*/ + +#define ke4(k,i) \ +{ k[4*(i)+4] = ss[0] ^= ls_box(ss[3],3) ^ t_use(r,c)[i]; k[4*(i)+5] = ss[1] ^= ss[0]; \ + k[4*(i)+6] = ss[2] ^= ss[1]; k[4*(i)+7] = ss[3] ^= ss[2]; \ +} +#define kel4(k,i) \ +{ k[4*(i)+4] = ss[0] ^= ls_box(ss[3],3) ^ t_use(r,c)[i]; k[4*(i)+5] = ss[1] ^= ss[0]; \ + k[4*(i)+6] = ss[2] ^= ss[1]; k[4*(i)+7] = ss[3] ^= ss[2]; \ +} + +#define ke6(k,i) \ +{ k[6*(i)+ 6] = ss[0] ^= ls_box(ss[5],3) ^ t_use(r,c)[i]; k[6*(i)+ 7] = ss[1] ^= ss[0]; \ + k[6*(i)+ 8] = ss[2] ^= ss[1]; k[6*(i)+ 9] = ss[3] ^= ss[2]; \ + k[6*(i)+10] = ss[4] ^= ss[3]; k[6*(i)+11] = ss[5] ^= ss[4]; \ +} +#define kel6(k,i) \ +{ k[6*(i)+ 6] = ss[0] ^= ls_box(ss[5],3) ^ t_use(r,c)[i]; k[6*(i)+ 7] = ss[1] ^= ss[0]; \ + k[6*(i)+ 8] = ss[2] ^= ss[1]; k[6*(i)+ 9] = ss[3] ^= ss[2]; \ +} + +#define ke8(k,i) \ +{ k[8*(i)+ 8] = ss[0] ^= ls_box(ss[7],3) ^ t_use(r,c)[i]; k[8*(i)+ 9] = ss[1] ^= ss[0]; \ + k[8*(i)+10] = ss[2] ^= ss[1]; k[8*(i)+11] = ss[3] ^= ss[2]; \ + k[8*(i)+12] = ss[4] ^= ls_box(ss[3],0); k[8*(i)+13] = ss[5] ^= ss[4]; \ + k[8*(i)+14] = ss[6] ^= ss[5]; k[8*(i)+15] = ss[7] ^= ss[6]; \ +} +#define kel8(k,i) \ +{ k[8*(i)+ 8] = ss[0] ^= ls_box(ss[7],3) ^ t_use(r,c)[i]; k[8*(i)+ 9] = ss[1] ^= ss[0]; \ + k[8*(i)+10] = ss[2] ^= ss[1]; k[8*(i)+11] = ss[3] ^= ss[2]; \ +} + +#if defined(ENCRYPTION_KEY_SCHEDULE) + +#if defined(AES_128) || defined(AES_VAR) + +aes_rval aes_encrypt_key128(const unsigned char *key, aes_encrypt_ctx cx[1]) +{ aes_32t ss[4]; + + cx->ks[0] = ss[0] = word_in(key, 0); + cx->ks[1] = ss[1] = word_in(key, 1); + cx->ks[2] = ss[2] = word_in(key, 2); + cx->ks[3] = ss[3] = word_in(key, 3); + +#if ENC_UNROLL == NONE + { aes_32t i; + + for(i = 0; i < ((11 * N_COLS - 5) / 4); ++i) + ke4(cx->ks, i); + } +#else + ke4(cx->ks, 0); ke4(cx->ks, 1); + ke4(cx->ks, 2); ke4(cx->ks, 3); + ke4(cx->ks, 4); ke4(cx->ks, 5); + ke4(cx->ks, 6); ke4(cx->ks, 7); + ke4(cx->ks, 8); +#endif + kel4(cx->ks, 9); + cx->rn = 10; +#if defined( AES_ERR_CHK ) + return aes_good; +#endif +} + +#endif + +#if defined(AES_192) || defined(AES_VAR) + +aes_rval aes_encrypt_key192(const unsigned char *key, aes_encrypt_ctx cx[1]) +{ aes_32t ss[6]; + + cx->ks[0] = ss[0] = word_in(key, 0); + cx->ks[1] = ss[1] = word_in(key, 1); + cx->ks[2] = ss[2] = word_in(key, 2); + cx->ks[3] = ss[3] = word_in(key, 3); + cx->ks[4] = ss[4] = word_in(key, 4); + cx->ks[5] = ss[5] = word_in(key, 5); + +#if ENC_UNROLL == NONE + { aes_32t i; + + for(i = 0; i < (13 * N_COLS - 7) / 6; ++i) + ke6(cx->ks, i); + } +#else + ke6(cx->ks, 0); ke6(cx->ks, 1); + ke6(cx->ks, 2); ke6(cx->ks, 3); + ke6(cx->ks, 4); ke6(cx->ks, 5); + ke6(cx->ks, 6); +#endif + kel6(cx->ks, 7); + cx->rn = 12; +#if defined( AES_ERR_CHK ) + return aes_good; +#endif +} + +#endif + +#if defined(AES_256) || defined(AES_VAR) + +aes_rval aes_encrypt_key256(const unsigned char *key, aes_encrypt_ctx cx[1]) +{ aes_32t ss[8]; + + cx->ks[0] = ss[0] = word_in(key, 0); + cx->ks[1] = ss[1] = word_in(key, 1); + cx->ks[2] = ss[2] = word_in(key, 2); + cx->ks[3] = ss[3] = word_in(key, 3); + cx->ks[4] = ss[4] = word_in(key, 4); + cx->ks[5] = ss[5] = word_in(key, 5); + cx->ks[6] = ss[6] = word_in(key, 6); + cx->ks[7] = ss[7] = word_in(key, 7); + +#if ENC_UNROLL == NONE + { aes_32t i; + + for(i = 0; i < (15 * N_COLS - 9) / 8; ++i) + ke8(cx->ks, i); + } +#else + ke8(cx->ks, 0); ke8(cx->ks, 1); + ke8(cx->ks, 2); ke8(cx->ks, 3); + ke8(cx->ks, 4); ke8(cx->ks, 5); +#endif + kel8(cx->ks, 6); + cx->rn = 14; +#if defined( AES_ERR_CHK ) + return aes_good; +#endif +} + +#endif + +#if defined(AES_VAR) + +aes_rval aes_encrypt_key(const unsigned char *key, int key_len, aes_encrypt_ctx cx[1]) +{ + switch(key_len) + { +#if defined( AES_ERR_CHK ) + case 16: case 128: return aes_encrypt_key128(key, cx); + case 24: case 192: return aes_encrypt_key192(key, cx); + case 32: case 256: return aes_encrypt_key256(key, cx); + default: return aes_error; +#else + case 16: case 128: aes_encrypt_key128(key, cx); return; + case 24: case 192: aes_encrypt_key192(key, cx); return; + case 32: case 256: aes_encrypt_key256(key, cx); return; +#endif + } +} + +#endif + +#endif + +#if defined(DECRYPTION_KEY_SCHEDULE) + +#if DEC_ROUND == NO_TABLES +#define ff(x) (x) +#else +#define ff(x) inv_mcol(x) +#if defined( dec_imvars ) +#define d_vars dec_imvars +#endif +#endif + +#if 1 +#define kdf4(k,i) \ +{ ss[0] = ss[0] ^ ss[2] ^ ss[1] ^ ss[3]; ss[1] = ss[1] ^ ss[3]; ss[2] = ss[2] ^ ss[3]; ss[3] = ss[3]; \ + ss[4] = ls_box(ss[(i+3) % 4], 3) ^ t_use(r,c)[i]; ss[i % 4] ^= ss[4]; \ + ss[4] ^= k[4*(i)]; k[4*(i)+4] = ff(ss[4]); ss[4] ^= k[4*(i)+1]; k[4*(i)+5] = ff(ss[4]); \ + ss[4] ^= k[4*(i)+2]; k[4*(i)+6] = ff(ss[4]); ss[4] ^= k[4*(i)+3]; k[4*(i)+7] = ff(ss[4]); \ +} +#define kd4(k,i) \ +{ ss[4] = ls_box(ss[(i+3) % 4], 3) ^ t_use(r,c)[i]; ss[i % 4] ^= ss[4]; ss[4] = ff(ss[4]); \ + k[4*(i)+4] = ss[4] ^= k[4*(i)]; k[4*(i)+5] = ss[4] ^= k[4*(i)+1]; \ + k[4*(i)+6] = ss[4] ^= k[4*(i)+2]; k[4*(i)+7] = ss[4] ^= k[4*(i)+3]; \ +} +#define kdl4(k,i) \ +{ ss[4] = ls_box(ss[(i+3) % 4], 3) ^ t_use(r,c)[i]; ss[i % 4] ^= ss[4]; \ + k[4*(i)+4] = (ss[0] ^= ss[1]) ^ ss[2] ^ ss[3]; k[4*(i)+5] = ss[1] ^ ss[3]; \ + k[4*(i)+6] = ss[0]; k[4*(i)+7] = ss[1]; \ +} +#else +#define kdf4(k,i) \ +{ ss[0] ^= ls_box(ss[3],3) ^ t_use(r,c)[i]; k[4*(i)+ 4] = ff(ss[0]); ss[1] ^= ss[0]; k[4*(i)+ 5] = ff(ss[1]); \ + ss[2] ^= ss[1]; k[4*(i)+ 6] = ff(ss[2]); ss[3] ^= ss[2]; k[4*(i)+ 7] = ff(ss[3]); \ +} +#define kd4(k,i) \ +{ ss[4] = ls_box(ss[3],3) ^ t_use(r,c)[i]; \ + ss[0] ^= ss[4]; ss[4] = ff(ss[4]); k[4*(i)+ 4] = ss[4] ^= k[4*(i)]; \ + ss[1] ^= ss[0]; k[4*(i)+ 5] = ss[4] ^= k[4*(i)+ 1]; \ + ss[2] ^= ss[1]; k[4*(i)+ 6] = ss[4] ^= k[4*(i)+ 2]; \ + ss[3] ^= ss[2]; k[4*(i)+ 7] = ss[4] ^= k[4*(i)+ 3]; \ +} +#define kdl4(k,i) \ +{ ss[0] ^= ls_box(ss[3],3) ^ t_use(r,c)[i]; k[4*(i)+ 4] = ss[0]; ss[1] ^= ss[0]; k[4*(i)+ 5] = ss[1]; \ + ss[2] ^= ss[1]; k[4*(i)+ 6] = ss[2]; ss[3] ^= ss[2]; k[4*(i)+ 7] = ss[3]; \ +} +#endif + +#define kdf6(k,i) \ +{ ss[0] ^= ls_box(ss[5],3) ^ t_use(r,c)[i]; k[6*(i)+ 6] = ff(ss[0]); ss[1] ^= ss[0]; k[6*(i)+ 7] = ff(ss[1]); \ + ss[2] ^= ss[1]; k[6*(i)+ 8] = ff(ss[2]); ss[3] ^= ss[2]; k[6*(i)+ 9] = ff(ss[3]); \ + ss[4] ^= ss[3]; k[6*(i)+10] = ff(ss[4]); ss[5] ^= ss[4]; k[6*(i)+11] = ff(ss[5]); \ +} +#define kd6(k,i) \ +{ ss[6] = ls_box(ss[5],3) ^ t_use(r,c)[i]; \ + ss[0] ^= ss[6]; ss[6] = ff(ss[6]); k[6*(i)+ 6] = ss[6] ^= k[6*(i)]; \ + ss[1] ^= ss[0]; k[6*(i)+ 7] = ss[6] ^= k[6*(i)+ 1]; \ + ss[2] ^= ss[1]; k[6*(i)+ 8] = ss[6] ^= k[6*(i)+ 2]; \ + ss[3] ^= ss[2]; k[6*(i)+ 9] = ss[6] ^= k[6*(i)+ 3]; \ + ss[4] ^= ss[3]; k[6*(i)+10] = ss[6] ^= k[6*(i)+ 4]; \ + ss[5] ^= ss[4]; k[6*(i)+11] = ss[6] ^= k[6*(i)+ 5]; \ +} +#define kdl6(k,i) \ +{ ss[0] ^= ls_box(ss[5],3) ^ t_use(r,c)[i]; k[6*(i)+ 6] = ss[0]; ss[1] ^= ss[0]; k[6*(i)+ 7] = ss[1]; \ + ss[2] ^= ss[1]; k[6*(i)+ 8] = ss[2]; ss[3] ^= ss[2]; k[6*(i)+ 9] = ss[3]; \ +} + +#define kdf8(k,i) \ +{ ss[0] ^= ls_box(ss[7],3) ^ t_use(r,c)[i]; k[8*(i)+ 8] = ff(ss[0]); ss[1] ^= ss[0]; k[8*(i)+ 9] = ff(ss[1]); \ + ss[2] ^= ss[1]; k[8*(i)+10] = ff(ss[2]); ss[3] ^= ss[2]; k[8*(i)+11] = ff(ss[3]); \ + ss[4] ^= ls_box(ss[3],0); k[8*(i)+12] = ff(ss[4]); ss[5] ^= ss[4]; k[8*(i)+13] = ff(ss[5]); \ + ss[6] ^= ss[5]; k[8*(i)+14] = ff(ss[6]); ss[7] ^= ss[6]; k[8*(i)+15] = ff(ss[7]); \ +} +#define kd8(k,i) \ +{ aes_32t g = ls_box(ss[7],3) ^ t_use(r,c)[i]; \ + ss[0] ^= g; g = ff(g); k[8*(i)+ 8] = g ^= k[8*(i)]; \ + ss[1] ^= ss[0]; k[8*(i)+ 9] = g ^= k[8*(i)+ 1]; \ + ss[2] ^= ss[1]; k[8*(i)+10] = g ^= k[8*(i)+ 2]; \ + ss[3] ^= ss[2]; k[8*(i)+11] = g ^= k[8*(i)+ 3]; \ + g = ls_box(ss[3],0); \ + ss[4] ^= g; g = ff(g); k[8*(i)+12] = g ^= k[8*(i)+ 4]; \ + ss[5] ^= ss[4]; k[8*(i)+13] = g ^= k[8*(i)+ 5]; \ + ss[6] ^= ss[5]; k[8*(i)+14] = g ^= k[8*(i)+ 6]; \ + ss[7] ^= ss[6]; k[8*(i)+15] = g ^= k[8*(i)+ 7]; \ +} +#define kdl8(k,i) \ +{ ss[0] ^= ls_box(ss[7],3) ^ t_use(r,c)[i]; k[8*(i)+ 8] = ss[0]; ss[1] ^= ss[0]; k[8*(i)+ 9] = ss[1]; \ + ss[2] ^= ss[1]; k[8*(i)+10] = ss[2]; ss[3] ^= ss[2]; k[8*(i)+11] = ss[3]; \ +} + +#if defined(AES_128) || defined(AES_VAR) + +aes_rval aes_decrypt_key128(const unsigned char *key, aes_decrypt_ctx cx[1]) +{ aes_32t ss[5]; +#if defined( d_vars ) + d_vars; +#endif + cx->ks[0] = ss[0] = word_in(key, 0); + cx->ks[1] = ss[1] = word_in(key, 1); + cx->ks[2] = ss[2] = word_in(key, 2); + cx->ks[3] = ss[3] = word_in(key, 3); + +#if DEC_UNROLL == NONE + { aes_32t i; + + for(i = 0; i < (11 * N_COLS - 5) / 4; ++i) + ke4(cx->ks, i); + kel4(cx->ks, 9); +#if !(DEC_ROUND == NO_TABLES) + for(i = N_COLS; i < 10 * N_COLS; ++i) + cx->ks[i] = inv_mcol(cx->ks[i]); +#endif + } +#else + kdf4(cx->ks, 0); kd4(cx->ks, 1); + kd4(cx->ks, 2); kd4(cx->ks, 3); + kd4(cx->ks, 4); kd4(cx->ks, 5); + kd4(cx->ks, 6); kd4(cx->ks, 7); + kd4(cx->ks, 8); kdl4(cx->ks, 9); +#endif + cx->rn = 10; +#if defined( AES_ERR_CHK ) + return aes_good; +#endif +} + +#endif + +#if defined(AES_192) || defined(AES_VAR) + +aes_rval aes_decrypt_key192(const unsigned char *key, aes_decrypt_ctx cx[1]) +{ aes_32t ss[7]; +#if defined( d_vars ) + d_vars; +#endif + cx->ks[0] = ss[0] = word_in(key, 0); + cx->ks[1] = ss[1] = word_in(key, 1); + cx->ks[2] = ss[2] = word_in(key, 2); + cx->ks[3] = ss[3] = word_in(key, 3); + +#if DEC_UNROLL == NONE + cx->ks[4] = ss[4] = word_in(key, 4); + cx->ks[5] = ss[5] = word_in(key, 5); + { aes_32t i; + + for(i = 0; i < (13 * N_COLS - 7) / 6; ++i) + ke6(cx->ks, i); + kel6(cx->ks, 7); +#if !(DEC_ROUND == NO_TABLES) + for(i = N_COLS; i < 12 * N_COLS; ++i) + cx->ks[i] = inv_mcol(cx->ks[i]); +#endif + } +#else + cx->ks[4] = ff(ss[4] = word_in(key, 4)); + cx->ks[5] = ff(ss[5] = word_in(key, 5)); + kdf6(cx->ks, 0); kd6(cx->ks, 1); + kd6(cx->ks, 2); kd6(cx->ks, 3); + kd6(cx->ks, 4); kd6(cx->ks, 5); + kd6(cx->ks, 6); kdl6(cx->ks, 7); +#endif + cx->rn = 12; +#if defined( AES_ERR_CHK ) + return aes_good; +#endif +} + +#endif + +#if defined(AES_256) || defined(AES_VAR) + +aes_rval aes_decrypt_key256(const unsigned char *key, aes_decrypt_ctx cx[1]) +{ aes_32t ss[8]; +#if defined( d_vars ) + d_vars; +#endif + cx->ks[0] = ss[0] = word_in(key, 0); + cx->ks[1] = ss[1] = word_in(key, 1); + cx->ks[2] = ss[2] = word_in(key, 2); + cx->ks[3] = ss[3] = word_in(key, 3); + +#if DEC_UNROLL == NONE + cx->ks[4] = ss[4] = word_in(key, 4); + cx->ks[5] = ss[5] = word_in(key, 5); + cx->ks[6] = ss[6] = word_in(key, 6); + cx->ks[7] = ss[7] = word_in(key, 7); + { aes_32t i; + + for(i = 0; i < (15 * N_COLS - 9) / 8; ++i) + ke8(cx->ks, i); + kel8(cx->ks, i); +#if !(DEC_ROUND == NO_TABLES) + for(i = N_COLS; i < 14 * N_COLS; ++i) + cx->ks[i] = inv_mcol(cx->ks[i]); + +#endif + } +#else + cx->ks[4] = ff(ss[4] = word_in(key, 4)); + cx->ks[5] = ff(ss[5] = word_in(key, 5)); + cx->ks[6] = ff(ss[6] = word_in(key, 6)); + cx->ks[7] = ff(ss[7] = word_in(key, 7)); + kdf8(cx->ks, 0); kd8(cx->ks, 1); + kd8(cx->ks, 2); kd8(cx->ks, 3); + kd8(cx->ks, 4); kd8(cx->ks, 5); + kdl8(cx->ks, 6); +#endif + cx->rn = 14; +#if defined( AES_ERR_CHK ) + return aes_good; +#endif +} + +#endif + +#if defined(AES_VAR) + +aes_rval aes_decrypt_key(const unsigned char *key, int key_len, aes_decrypt_ctx cx[1]) +{ + switch(key_len) + { +#if defined( AES_ERR_CHK ) + case 16: case 128: return aes_decrypt_key128(key, cx); + case 24: case 192: return aes_decrypt_key192(key, cx); + case 32: case 256: return aes_decrypt_key256(key, cx); + default: return aes_error; +#else + case 16: case 128: aes_decrypt_key128(key, cx); return; + case 24: case 192: aes_decrypt_key192(key, cx); return; + case 32: case 256: aes_decrypt_key256(key, cx); return; +#endif + } +} + +#endif + +#endif + +#if defined(__cplusplus) +} +#endif diff --git a/bsd/crypto/aes/gen/aesopt.h b/bsd/crypto/aes/gen/aesopt.h new file mode 100644 index 000000000..2b78eb920 --- /dev/null +++ b/bsd/crypto/aes/gen/aesopt.h @@ -0,0 +1,753 @@ +/* + --------------------------------------------------------------------------- + Copyright (c) 2003, Dr Brian Gladman, Worcester, UK. All rights reserved. + + LICENSE TERMS + + The free distribution and use of this software in both source and binary + form is allowed (with or without changes) provided that: + + 1. distributions of this source code include the above copyright + notice, this list of conditions and the following disclaimer; + + 2. distributions in binary form include the above copyright + notice, this list of conditions and the following disclaimer + in the documentation and/or other associated materials; + + 3. the copyright holder's name is not used to endorse products + built using this software without specific written permission. + + ALTERNATIVELY, provided that this notice is retained in full, this product + may be distributed under the terms of the GNU General Public License (GPL), + in which case the provisions of the GPL apply INSTEAD OF those given above. + + DISCLAIMER + + This software is provided 'as is' with no explicit or implied warranties + in respect of its properties, including, but not limited to, correctness + and/or fitness for purpose. + --------------------------------------------------------------------------- + Issue 28/01/2004 + + My thanks go to Dag Arne Osvik for devising the schemes used here for key + length derivation from the form of the key schedule + + This file contains the compilation options for AES (Rijndael) and code + that is common across encryption, key scheduling and table generation. + + OPERATION + + These source code files implement the AES algorithm Rijndael designed by + Joan Daemen and Vincent Rijmen. This version is designed for the standard + block size of 16 bytes and for key sizes of 128, 192 and 256 bits (16, 24 + and 32 bytes). + + This version is designed for flexibility and speed using operations on + 32-bit words rather than operations on bytes. It can be compiled with + either big or little endian internal byte order but is faster when the + native byte order for the processor is used. + + THE CIPHER INTERFACE + + The cipher interface is implemented as an array of bytes in which lower + AES bit sequence indexes map to higher numeric significance within bytes. + + aes_08t (an unsigned 8-bit type) + aes_32t (an unsigned 32-bit type) + struct aes_encrypt_ctx (structure for the cipher encryption context) + struct aes_decrypt_ctx (structure for the cipher decryption context) + aes_rval the function return type + + C subroutine calls: + + aes_rval aes_encrypt_key128(const unsigned char *key, aes_encrypt_ctx cx[1]); + aes_rval aes_encrypt_key192(const unsigned char *key, aes_encrypt_ctx cx[1]); + aes_rval aes_encrypt_key256(const unsigned char *key, aes_encrypt_ctx cx[1]); + aes_rval aes_encrypt(const unsigned char *in, unsigned char *out, + const aes_encrypt_ctx cx[1]); + + aes_rval aes_decrypt_key128(const unsigned char *key, aes_decrypt_ctx cx[1]); + aes_rval aes_decrypt_key192(const unsigned char *key, aes_decrypt_ctx cx[1]); + aes_rval aes_decrypt_key256(const unsigned char *key, aes_decrypt_ctx cx[1]); + aes_rval aes_decrypt(const unsigned char *in, unsigned char *out, + const aes_decrypt_ctx cx[1]); + + IMPORTANT NOTE: If you are using this C interface with dynamic tables make sure that + you call genTabs() before AES is used so that the tables are initialised. + + C++ aes class subroutines: + + Class AESencrypt for encryption + + Construtors: + AESencrypt(void) + AESencrypt(const unsigned char *key) - 128 bit key + Members: + aes_rval key128(const unsigned char *key) + aes_rval key192(const unsigned char *key) + aes_rval key256(const unsigned char *key) + aes_rval encrypt(const unsigned char *in, unsigned char *out) const + + Class AESdecrypt for encryption + Construtors: + AESdecrypt(void) + AESdecrypt(const unsigned char *key) - 128 bit key + Members: + aes_rval key128(const unsigned char *key) + aes_rval key192(const unsigned char *key) + aes_rval key256(const unsigned char *key) + aes_rval decrypt(const unsigned char *in, unsigned char *out) const + + COMPILATION + + The files used to provide AES (Rijndael) are + + a. aes.h for the definitions needed for use in C. + b. aescpp.h for the definitions needed for use in C++. + c. aesopt.h for setting compilation options (also includes common code). + d. aescrypt.c for encryption and decrytpion, or + e. aeskey.c for key scheduling. + f. aestab.c for table loading or generation. + g. aescrypt.asm for encryption and decryption using assembler code. + h. aescrypt.mmx.asm for encryption and decryption using MMX assembler. + + To compile AES (Rijndael) for use in C code use aes.h and set the + defines here for the facilities you need (key lengths, encryption + and/or decryption). Do not define AES_DLL or AES_CPP. Set the options + for optimisations and table sizes here. + + To compile AES (Rijndael) for use in in C++ code use aescpp.h but do + not define AES_DLL + + To compile AES (Rijndael) in C as a Dynamic Link Library DLL) use + aes.h and include the AES_DLL define. + + CONFIGURATION OPTIONS (here and in aes.h) + + a. set AES_DLL in aes.h if AES (Rijndael) is to be compiled as a DLL + b. You may need to set PLATFORM_BYTE_ORDER to define the byte order. + c. If you want the code to run in a specific internal byte order, then + ALGORITHM_BYTE_ORDER must be set accordingly. + d. set other configuration options decribed below. +*/ + +#if !defined( _AESOPT_H ) +#define _AESOPT_H + +#include + +/* CONFIGURATION - USE OF DEFINES + + Later in this section there are a number of defines that control the + operation of the code. In each section, the purpose of each define is + explained so that the relevant form can be included or excluded by + setting either 1's or 0's respectively on the branches of the related + #if clauses. + + PLATFORM SPECIFIC INCLUDES AND BYTE ORDER IN 32-BIT WORDS + + To obtain the highest speed on processors with 32-bit words, this code + needs to determine the byte order of the target machine. The following + block of code is an attempt to capture the most obvious ways in which + various environemnts define byte order. It may well fail, in which case + the definitions will need to be set by editing at the points marked + **** EDIT HERE IF NECESSARY **** below. My thanks go to Peter Gutmann + for his assistance with this endian detection nightmare. +*/ + +#define BRG_LITTLE_ENDIAN 1234 /* byte 0 is least significant (i386) */ +#define BRG_BIG_ENDIAN 4321 /* byte 0 is most significant (mc68k) */ + +#if defined(__GNUC__) || defined(__GNU_LIBRARY__) +# if defined(__FreeBSD__) || defined(__OpenBSD__) +# include +# elif defined( BSD ) && BSD >= 199103 +# include +# elif defined(__APPLE__) +# if defined(__BIG_ENDIAN__) && !defined( BIG_ENDIAN ) +# define BIG_ENDIAN +# elif defined(__LITTLE_ENDIAN__) && !defined( LITTLE_ENDIAN ) +# define LITTLE_ENDIAN +# endif +# else +# include +# if defined(__BEOS__) +# include +# endif +# endif +#endif + +#if !defined(PLATFORM_BYTE_ORDER) +# if defined(LITTLE_ENDIAN) || defined(BIG_ENDIAN) +# if defined(LITTLE_ENDIAN) && !defined(BIG_ENDIAN) +# define PLATFORM_BYTE_ORDER BRG_LITTLE_ENDIAN +# elif !defined(LITTLE_ENDIAN) && defined(BIG_ENDIAN) +# define PLATFORM_BYTE_ORDER BRG_BIG_ENDIAN +# elif defined(BYTE_ORDER) && (BYTE_ORDER == LITTLE_ENDIAN) +# define PLATFORM_BYTE_ORDER BRG_LITTLE_ENDIAN +# elif defined(BYTE_ORDER) && (BYTE_ORDER == BIG_ENDIAN) +# define PLATFORM_BYTE_ORDER BRG_BIG_ENDIAN +# endif +# elif defined(_LITTLE_ENDIAN) || defined(_BIG_ENDIAN) +# if defined(_LITTLE_ENDIAN) && !defined(_BIG_ENDIAN) +# define PLATFORM_BYTE_ORDER BRG_LITTLE_ENDIAN +# elif !defined(_LITTLE_ENDIAN) && defined(_BIG_ENDIAN) +# define PLATFORM_BYTE_ORDER BRG_BIG_ENDIAN +# elif defined(_BYTE_ORDER) && (_BYTE_ORDER == _LITTLE_ENDIAN) +# define PLATFORM_BYTE_ORDER BRG_LITTLE_ENDIAN +# elif defined(_BYTE_ORDER) && (_BYTE_ORDER == _BIG_ENDIAN) +# define PLATFORM_BYTE_ORDER BRG_BIG_ENDIAN +# endif +# elif defined(__LITTLE_ENDIAN__) || defined(__BIG_ENDIAN__) +# if defined(__LITTLE_ENDIAN__) && !defined(__BIG_ENDIAN__) +# define PLATFORM_BYTE_ORDER BRG_LITTLE_ENDIAN +# elif !defined(__LITTLE_ENDIAN__) && defined(__BIG_ENDIAN__) +# define PLATFORM_BYTE_ORDER BRG_BIG_ENDIAN +# elif defined(__BYTE_ORDER__) && (__BYTE_ORDER__ == __LITTLE_ENDIAN__) +# define PLATFORM_BYTE_ORDER BRG_LITTLE_ENDIAN +# elif defined(__BYTE_ORDER__) && (__BYTE_ORDER__ == __BIG_ENDIAN__) +# define PLATFORM_BYTE_ORDER BRG_BIG_ENDIAN +# endif +# endif +#endif + +/* if the platform is still unknown, try to find its byte order */ +/* from commonly used machine defines */ + +#if !defined(PLATFORM_BYTE_ORDER) + +#if defined( __alpha__ ) || defined( __alpha ) || defined( i386 ) || \ + defined( __i386__ ) || defined( _M_I86 ) || defined( _M_IX86 ) || \ + defined( __OS2__ ) || defined( sun386 ) || defined( __TURBOC__ ) || \ + defined( vax ) || defined( vms ) || defined( VMS ) || \ + defined( __VMS ) +# define PLATFORM_BYTE_ORDER BRG_LITTLE_ENDIAN + +#elif defined( AMIGA ) || defined( applec ) || defined( __AS400__ ) || \ + defined( _CRAY ) || defined( __hppa ) || defined( __hp9000 ) || \ + defined( ibm370 ) || defined( mc68000 ) || defined( m68k ) || \ + defined( __MRC__ ) || defined( __MVS__ ) || defined( __MWERKS__ ) || \ + defined( sparc ) || defined( __sparc) || defined( SYMANTEC_C ) || \ + defined( __TANDEM ) || defined( THINK_C ) || defined( __VMCMS__ ) +# define PLATFORM_BYTE_ORDER BRG_BIG_ENDIAN + +#elif 0 /* **** EDIT HERE IF NECESSARY **** */ +# define PLATFORM_BYTE_ORDER BRG_LITTLE_ENDIAN +#elif 0 /* **** EDIT HERE IF NECESSARY **** */ +# define PLATFORM_BYTE_ORDER BRG_BIG_ENDIAN +#else +# error Please edit aesopt.h (line 234 or 236) to set the platform byte order +#endif + +#endif + +/* SOME LOCAL DEFINITIONS */ + +#define NO_TABLES 0 +#define ONE_TABLE 1 +#define FOUR_TABLES 4 +#define NONE 0 +#define PARTIAL 1 +#define FULL 2 + +#if defined(bswap32) +#define aes_sw32 bswap32 +#elif defined(bswap_32) +#define aes_sw32 bswap_32 +#else +#define brot(x,n) (((aes_32t)(x) << n) | ((aes_32t)(x) >> (32 - n))) +#define aes_sw32(x) ((brot((x),8) & 0x00ff00ff) | (brot((x),24) & 0xff00ff00)) +#endif + +/* 1. FUNCTIONS REQUIRED + + This implementation provides subroutines for encryption, decryption + and for setting the three key lengths (separately) for encryption + and decryption. When the assembler code is not being used the following + definition blocks allow the selection of the routines that are to be + included in the compilation. +*/ +#if defined( AES_ENCRYPT ) +#define ENCRYPTION +#define ENCRYPTION_KEY_SCHEDULE +#endif + +#if defined( AES_DECRYPT ) +#define DECRYPTION +#define DECRYPTION_KEY_SCHEDULE +#endif + +/* 2. ASSEMBLER SUPPORT + + This define (which can be on the command line) enables the use of the + assembler code routines for encryption and decryption with the C code + only providing key scheduling +*/ +#if 0 && !defined(AES_ASM) +#define AES_ASM +#endif + +/* 3. BYTE ORDER WITHIN 32 BIT WORDS + + The fundamental data processing units in Rijndael are 8-bit bytes. The + input, output and key input are all enumerated arrays of bytes in which + bytes are numbered starting at zero and increasing to one less than the + number of bytes in the array in question. This enumeration is only used + for naming bytes and does not imply any adjacency or order relationship + from one byte to another. When these inputs and outputs are considered + as bit sequences, bits 8*n to 8*n+7 of the bit sequence are mapped to + byte[n] with bit 8n+i in the sequence mapped to bit 7-i within the byte. + In this implementation bits are numbered from 0 to 7 starting at the + numerically least significant end of each byte (bit n represents 2^n). + + However, Rijndael can be implemented more efficiently using 32-bit + words by packing bytes into words so that bytes 4*n to 4*n+3 are placed + into word[n]. While in principle these bytes can be assembled into words + in any positions, this implementation only supports the two formats in + which bytes in adjacent positions within words also have adjacent byte + numbers. This order is called big-endian if the lowest numbered bytes + in words have the highest numeric significance and little-endian if the + opposite applies. + + This code can work in either order irrespective of the order used by the + machine on which it runs. Normally the internal byte order will be set + to the order of the processor on which the code is to be run but this + define can be used to reverse this in special situations + + NOTE: Assembler code versions rely on PLATFORM_BYTE_ORDER being set +*/ +#if 1 || defined(AES_ASM) +#define ALGORITHM_BYTE_ORDER PLATFORM_BYTE_ORDER +#elif 0 +#define ALGORITHM_BYTE_ORDER BRG_LITTLE_ENDIAN +#elif 0 +#define ALGORITHM_BYTE_ORDER BRG_BIG_ENDIAN +#else +#error The algorithm byte order is not defined +#endif + +/* 4. FAST INPUT/OUTPUT OPERATIONS. + + On some machines it is possible to improve speed by transferring the + bytes in the input and output arrays to and from the internal 32-bit + variables by addressing these arrays as if they are arrays of 32-bit + words. On some machines this will always be possible but there may + be a large performance penalty if the byte arrays are not aligned on + the normal word boundaries. On other machines this technique will + lead to memory access errors when such 32-bit word accesses are not + properly aligned. The option SAFE_IO avoids such problems but will + often be slower on those machines that support misaligned access + (especially so if care is taken to align the input and output byte + arrays on 32-bit word boundaries). If SAFE_IO is not defined it is + assumed that access to byte arrays as if they are arrays of 32-bit + words will not cause problems when such accesses are misaligned. +*/ +#if 0 && !defined(_MSC_VER) +#define SAFE_IO +#endif + +/* 5. LOOP UNROLLING + + The code for encryption and decrytpion cycles through a number of rounds + that can be implemented either in a loop or by expanding the code into a + long sequence of instructions, the latter producing a larger program but + one that will often be much faster. The latter is called loop unrolling. + There are also potential speed advantages in expanding two iterations in + a loop with half the number of iterations, which is called partial loop + unrolling. The following options allow partial or full loop unrolling + to be set independently for encryption and decryption +*/ +#if 1 +#define ENC_UNROLL FULL +#elif 0 +#define ENC_UNROLL PARTIAL +#else +#define ENC_UNROLL NONE +#endif + +#if 1 +#define DEC_UNROLL FULL +#elif 0 +#define DEC_UNROLL PARTIAL +#else +#define DEC_UNROLL NONE +#endif + +/* 6. FAST FINITE FIELD OPERATIONS + + If this section is included, tables are used to provide faster finite + field arithmetic (this has no effect if FIXED_TABLES is defined). +*/ +#if 1 +#define FF_TABLES +#endif + +/* 7. INTERNAL STATE VARIABLE FORMAT + + The internal state of Rijndael is stored in a number of local 32-bit + word varaibles which can be defined either as an array or as individual + names variables. Include this section if you want to store these local + varaibles in arrays. Otherwise individual local variables will be used. +*/ +#if 0 +#define ARRAYS +#endif + +/* In this implementation the columns of the state array are each held in + 32-bit words. The state array can be held in various ways: in an array + of words, in a number of individual word variables or in a number of + processor registers. The following define maps a variable name x and + a column number c to the way the state array variable is to be held. + The first define below maps the state into an array x[c] whereas the + second form maps the state into a number of individual variables x0, + x1, etc. Another form could map individual state colums to machine + register names. +*/ + +#if defined(ARRAYS) +#define s(x,c) x[c] +#else +#define s(x,c) x##c +#endif + +/* 8. FIXED OR DYNAMIC TABLES + + When this section is included the tables used by the code are compiled + statically into the binary file. Otherwise the subroutine gen_tabs() + must be called to compute them before the code is first used. +*/ +#if 1 +#define FIXED_TABLES +#endif + +/* 9. TABLE ALIGNMENT + + On some sytsems speed will be improved by aligning the AES large lookup + tables on particular boundaries. This define should be set to a power of + two giving the desired alignment. It can be left undefined if alignment + is not needed. This option is specific to the Microsft VC++ compiler - + it seems to sometimes cause trouble for the VC++ version 6 compiler. +*/ + +#if 0 && defined(_MSC_VER) && (_MSC_VER >= 1300) +#define TABLE_ALIGN 64 +#endif + +/* 10. INTERNAL TABLE CONFIGURATION + + This cipher proceeds by repeating in a number of cycles known as 'rounds' + which are implemented by a round function which can optionally be speeded + up using tables. The basic tables are each 256 32-bit words, with either + one or four tables being required for each round function depending on + how much speed is required. The encryption and decryption round functions + are different and the last encryption and decrytpion round functions are + different again making four different round functions in all. + + This means that: + 1. Normal encryption and decryption rounds can each use either 0, 1 + or 4 tables and table spaces of 0, 1024 or 4096 bytes each. + 2. The last encryption and decryption rounds can also use either 0, 1 + or 4 tables and table spaces of 0, 1024 or 4096 bytes each. + + Include or exclude the appropriate definitions below to set the number + of tables used by this implementation. +*/ + +#if 1 /* set tables for the normal encryption round */ +#define ENC_ROUND FOUR_TABLES +#elif 0 +#define ENC_ROUND ONE_TABLE +#else +#define ENC_ROUND NO_TABLES +#endif + +#if 1 /* set tables for the last encryption round */ +#define LAST_ENC_ROUND FOUR_TABLES +#elif 0 +#define LAST_ENC_ROUND ONE_TABLE +#else +#define LAST_ENC_ROUND NO_TABLES +#endif + +#if 1 /* set tables for the normal decryption round */ +#define DEC_ROUND FOUR_TABLES +#elif 0 +#define DEC_ROUND ONE_TABLE +#else +#define DEC_ROUND NO_TABLES +#endif + +#if 1 /* set tables for the last decryption round */ +#define LAST_DEC_ROUND FOUR_TABLES +#elif 0 +#define LAST_DEC_ROUND ONE_TABLE +#else +#define LAST_DEC_ROUND NO_TABLES +#endif + +/* The decryption key schedule can be speeded up with tables in the same + way that the round functions can. Include or exclude the following + defines to set this requirement. +*/ +#if 1 +#define KEY_SCHED FOUR_TABLES +#elif 0 +#define KEY_SCHED ONE_TABLE +#else +#define KEY_SCHED NO_TABLES +#endif + +/* 11. TABLE POINTER CACHING + + Normally tables are referenced directly, Enable this option if you wish to + cache pointers to the tables in the encrypt/decrypt code. Note that this + only works if you are using FOUR_TABLES for the ROUND you enable this for. +*/ +#if 1 +#define ENC_ROUND_CACHE_TABLES +#endif +#if 1 +#define LAST_ENC_ROUND_CACHE_TABLES +#endif +#if 1 +#define DEC_ROUND_CACHE_TABLES +#endif +#if 1 +#define LAST_DEC_ROUND_CACHE_TABLES +#endif + + +/* END OF CONFIGURATION OPTIONS */ + +#define RC_LENGTH (5 * (AES_BLOCK_SIZE / 4 - 2)) + +/* Disable or report errors on some combinations of options */ + +#if ENC_ROUND == NO_TABLES && LAST_ENC_ROUND != NO_TABLES +#undef LAST_ENC_ROUND +#define LAST_ENC_ROUND NO_TABLES +#elif ENC_ROUND == ONE_TABLE && LAST_ENC_ROUND == FOUR_TABLES +#undef LAST_ENC_ROUND +#define LAST_ENC_ROUND ONE_TABLE +#endif + +#if ENC_ROUND == NO_TABLES && ENC_UNROLL != NONE +#undef ENC_UNROLL +#define ENC_UNROLL NONE +#endif + +#if DEC_ROUND == NO_TABLES && LAST_DEC_ROUND != NO_TABLES +#undef LAST_DEC_ROUND +#define LAST_DEC_ROUND NO_TABLES +#elif DEC_ROUND == ONE_TABLE && LAST_DEC_ROUND == FOUR_TABLES +#undef LAST_DEC_ROUND +#define LAST_DEC_ROUND ONE_TABLE +#endif + +#if DEC_ROUND == NO_TABLES && DEC_UNROLL != NONE +#undef DEC_UNROLL +#define DEC_UNROLL NONE +#endif + +/* upr(x,n): rotates bytes within words by n positions, moving bytes to + higher index positions with wrap around into low positions + ups(x,n): moves bytes by n positions to higher index positions in + words but without wrap around + bval(x,n): extracts a byte from a word + + NOTE: The definitions given here are intended only for use with + unsigned variables and with shift counts that are compile + time constants +*/ + +#if (ALGORITHM_BYTE_ORDER == BRG_LITTLE_ENDIAN) +#define upr(x,n) (((aes_32t)(x) << (8 * (n))) | ((aes_32t)(x) >> (32 - 8 * (n)))) +#define ups(x,n) ((aes_32t) (x) << (8 * (n))) +#define bval(x,n) ((aes_08t)((x) >> (8 * (n)))) +#define bytes2word(b0, b1, b2, b3) \ + (((aes_32t)(b3) << 24) | ((aes_32t)(b2) << 16) | ((aes_32t)(b1) << 8) | (b0)) +#endif + +#if (ALGORITHM_BYTE_ORDER == BRG_BIG_ENDIAN) +#define upr(x,n) (((aes_32t)(x) >> (8 * (n))) | ((aes_32t)(x) << (32 - 8 * (n)))) +#define ups(x,n) ((aes_32t) (x) >> (8 * (n)))) +#define bval(x,n) ((aes_08t)((x) >> (24 - 8 * (n)))) +#define bytes2word(b0, b1, b2, b3) \ + (((aes_32t)(b0) << 24) | ((aes_32t)(b1) << 16) | ((aes_32t)(b2) << 8) | (b3)) +#endif + +#if defined(SAFE_IO) + +#define word_in(x,c) bytes2word(((aes_08t*)(x)+4*c)[0], ((aes_08t*)(x)+4*c)[1], \ + ((aes_08t*)(x)+4*c)[2], ((aes_08t*)(x)+4*c)[3]) +#define word_out(x,c,v) { ((aes_08t*)(x)+4*c)[0] = bval(v,0); ((aes_08t*)(x)+4*c)[1] = bval(v,1); \ + ((aes_08t*)(x)+4*c)[2] = bval(v,2); ((aes_08t*)(x)+4*c)[3] = bval(v,3); } + +#elif (ALGORITHM_BYTE_ORDER == PLATFORM_BYTE_ORDER) + +#define word_in(x,c) (*((aes_32t*)(x)+(c))) +#define word_out(x,c,v) (*((aes_32t*)(x)+(c)) = (v)) + +#else + +#define word_in(x,c) aes_sw32(*((aes_32t*)(x)+(c))) +#define word_out(x,c,v) (*((aes_32t*)(x)+(c)) = aes_sw32(v)) + +#endif + +/* the finite field modular polynomial and elements */ + +#define WPOLY 0x011b +#define BPOLY 0x1b + +/* multiply four bytes in GF(2^8) by 'x' {02} in parallel */ + +#define m1 0x80808080 +#define m2 0x7f7f7f7f +#define gf_mulx(x) ((((x) & m2) << 1) ^ ((((x) & m1) >> 7) * BPOLY)) + +/* The following defines provide alternative definitions of gf_mulx that might + give improved performance if a fast 32-bit multiply is not available. Note + that a temporary variable u needs to be defined where gf_mulx is used. + +#define gf_mulx(x) (u = (x) & m1, u |= (u >> 1), ((x) & m2) << 1) ^ ((u >> 3) | (u >> 6)) +#define m4 (0x01010101 * BPOLY) +#define gf_mulx(x) (u = (x) & m1, ((x) & m2) << 1) ^ ((u - (u >> 7)) & m4) +*/ + +/* Work out which tables are needed for the different options */ + +#if defined( AES_ASM ) +#if defined( ENC_ROUND ) +#undef ENC_ROUND +#endif +#define ENC_ROUND FOUR_TABLES +#if defined( LAST_ENC_ROUND ) +#undef LAST_ENC_ROUND +#endif +#define LAST_ENC_ROUND FOUR_TABLES +#if defined( DEC_ROUND ) +#undef DEC_ROUND +#endif +#define DEC_ROUND FOUR_TABLES +#if defined( LAST_DEC_ROUND ) +#undef LAST_DEC_ROUND +#endif +#define LAST_DEC_ROUND FOUR_TABLES +#if defined( KEY_SCHED ) +#undef KEY_SCHED +#define KEY_SCHED FOUR_TABLES +#endif +#endif + +#if defined(ENCRYPTION) || defined(AES_ASM) +#if ENC_ROUND == ONE_TABLE +#define FT1_SET +#elif ENC_ROUND == FOUR_TABLES +#define FT4_SET +#else +#define SBX_SET +#endif +#if LAST_ENC_ROUND == ONE_TABLE +#define FL1_SET +#elif LAST_ENC_ROUND == FOUR_TABLES +#define FL4_SET +#elif !defined(SBX_SET) +#define SBX_SET +#endif +#endif + +#if defined(DECRYPTION) || defined(AES_ASM) +#if DEC_ROUND == ONE_TABLE +#define IT1_SET +#elif DEC_ROUND == FOUR_TABLES +#define IT4_SET +#else +#define ISB_SET +#endif +#if LAST_DEC_ROUND == ONE_TABLE +#define IL1_SET +#elif LAST_DEC_ROUND == FOUR_TABLES +#define IL4_SET +#elif !defined(ISB_SET) +#define ISB_SET +#endif +#endif + +#if defined(ENCRYPTION_KEY_SCHEDULE) || defined(DECRYPTION_KEY_SCHEDULE) +#if KEY_SCHED == ONE_TABLE +#define LS1_SET +#define IM1_SET +#elif KEY_SCHED == FOUR_TABLES +#define LS4_SET +#define IM4_SET +#elif !defined(SBX_SET) +#define SBX_SET +#endif +#endif + +/* generic definitions of Rijndael macros that use tables */ + +#define no_table(x,box,vf,rf,c) bytes2word( \ + box[bval(vf(x,0,c),rf(0,c))], \ + box[bval(vf(x,1,c),rf(1,c))], \ + box[bval(vf(x,2,c),rf(2,c))], \ + box[bval(vf(x,3,c),rf(3,c))]) + +#define one_table(x,op,tab,vf,rf,c) \ + ( tab[bval(vf(x,0,c),rf(0,c))] \ + ^ op(tab[bval(vf(x,1,c),rf(1,c))],1) \ + ^ op(tab[bval(vf(x,2,c),rf(2,c))],2) \ + ^ op(tab[bval(vf(x,3,c),rf(3,c))],3)) + +#define four_tables(x,tab,vf,rf,c) \ + ( tab[0][bval(vf(x,0,c),rf(0,c))] \ + ^ tab[1][bval(vf(x,1,c),rf(1,c))] \ + ^ tab[2][bval(vf(x,2,c),rf(2,c))] \ + ^ tab[3][bval(vf(x,3,c),rf(3,c))]) + +#define four_cached_tables(x,tab,vf,rf,c) \ +( tab##0[bval(vf(x,0,c),rf(0,c))] \ + ^ tab##1[bval(vf(x,1,c),rf(1,c))] \ + ^ tab##2[bval(vf(x,2,c),rf(2,c))] \ + ^ tab##3[bval(vf(x,3,c),rf(3,c))]) + +#define vf1(x,r,c) (x) +#define rf1(r,c) (r) +#define rf2(r,c) ((8+r-c)&3) + +/* perform forward and inverse column mix operation on four bytes in long word x in */ +/* parallel. NOTE: x must be a simple variable, NOT an expression in these macros. */ + +#if defined(FM4_SET) /* not currently used */ +#define fwd_mcol(x) four_tables(x,t_use(f,m),vf1,rf1,0) +#elif defined(FM1_SET) /* not currently used */ +#define fwd_mcol(x) one_table(x,upr,t_use(f,m),vf1,rf1,0) +#else +#define dec_fmvars aes_32t g2 +#define fwd_mcol(x) (g2 = gf_mulx(x), g2 ^ upr((x) ^ g2, 3) ^ upr((x), 2) ^ upr((x), 1)) +#endif + +#if defined(IM4_SET) +#define inv_mcol(x) four_tables(x,t_use(i,m),vf1,rf1,0) +#elif defined(IM1_SET) +#define inv_mcol(x) one_table(x,upr,t_use(i,m),vf1,rf1,0) +#else +#define dec_imvars aes_32t g2, g4, g9 +#define inv_mcol(x) (g2 = gf_mulx(x), g4 = gf_mulx(g2), g9 = (x) ^ gf_mulx(g4), g4 ^= g9, \ + (x) ^ g2 ^ g4 ^ upr(g2 ^ g9, 3) ^ upr(g4, 2) ^ upr(g9, 1)) +#endif + +#if defined(FL4_SET) +#define ls_box(x,c) four_tables(x,t_use(f,l),vf1,rf2,c) +#elif defined(LS4_SET) +#define ls_box(x,c) four_tables(x,t_use(l,s),vf1,rf2,c) +#elif defined(FL1_SET) +#define ls_box(x,c) one_table(x,upr,t_use(f,l),vf1,rf2,c) +#elif defined(LS1_SET) +#define ls_box(x,c) one_table(x,upr,t_use(l,s),vf1,rf2,c) +#else +#define ls_box(x,c) no_table(x,t_use(s,box),vf1,rf2,c) +#endif + +#endif diff --git a/bsd/crypto/aes/gen/aestab.c b/bsd/crypto/aes/gen/aestab.c new file mode 100644 index 000000000..dfd2ee969 --- /dev/null +++ b/bsd/crypto/aes/gen/aestab.c @@ -0,0 +1,384 @@ +/* + --------------------------------------------------------------------------- + Copyright (c) 2003, Dr Brian Gladman, Worcester, UK. All rights reserved. + + LICENSE TERMS + + The free distribution and use of this software in both source and binary + form is allowed (with or without changes) provided that: + + 1. distributions of this source code include the above copyright + notice, this list of conditions and the following disclaimer; + + 2. distributions in binary form include the above copyright + notice, this list of conditions and the following disclaimer + in the documentation and/or other associated materials; + + 3. the copyright holder's name is not used to endorse products + built using this software without specific written permission. + + ALTERNATIVELY, provided that this notice is retained in full, this product + may be distributed under the terms of the GNU General Public License (GPL), + in which case the provisions of the GPL apply INSTEAD OF those given above. + + DISCLAIMER + + This software is provided 'as is' with no explicit or implied warranties + in respect of its properties, including, but not limited to, correctness + and/or fitness for purpose. + --------------------------------------------------------------------------- + Issue 28/01/2004 + +*/ + +#if defined(__cplusplus) +extern "C" +{ +#endif + +#define DO_TABLES + +#include "aesopt.h" + +#if defined(FIXED_TABLES) + +#define sb_data(w) {\ + w(0x63), w(0x7c), w(0x77), w(0x7b), w(0xf2), w(0x6b), w(0x6f), w(0xc5),\ + w(0x30), w(0x01), w(0x67), w(0x2b), w(0xfe), w(0xd7), w(0xab), w(0x76),\ + w(0xca), w(0x82), w(0xc9), w(0x7d), w(0xfa), w(0x59), w(0x47), w(0xf0),\ + w(0xad), w(0xd4), w(0xa2), w(0xaf), w(0x9c), w(0xa4), w(0x72), w(0xc0),\ + w(0xb7), w(0xfd), w(0x93), w(0x26), w(0x36), w(0x3f), w(0xf7), w(0xcc),\ + w(0x34), w(0xa5), w(0xe5), w(0xf1), w(0x71), w(0xd8), w(0x31), w(0x15),\ + w(0x04), w(0xc7), w(0x23), w(0xc3), w(0x18), w(0x96), w(0x05), w(0x9a),\ + w(0x07), w(0x12), w(0x80), w(0xe2), w(0xeb), w(0x27), w(0xb2), w(0x75),\ + w(0x09), w(0x83), w(0x2c), w(0x1a), w(0x1b), w(0x6e), w(0x5a), w(0xa0),\ + w(0x52), w(0x3b), w(0xd6), w(0xb3), w(0x29), w(0xe3), w(0x2f), w(0x84),\ + w(0x53), w(0xd1), w(0x00), w(0xed), w(0x20), w(0xfc), w(0xb1), w(0x5b),\ + w(0x6a), w(0xcb), w(0xbe), w(0x39), w(0x4a), w(0x4c), w(0x58), w(0xcf),\ + w(0xd0), w(0xef), w(0xaa), w(0xfb), w(0x43), w(0x4d), w(0x33), w(0x85),\ + w(0x45), w(0xf9), w(0x02), w(0x7f), w(0x50), w(0x3c), w(0x9f), w(0xa8),\ + w(0x51), w(0xa3), w(0x40), w(0x8f), w(0x92), w(0x9d), w(0x38), w(0xf5),\ + w(0xbc), w(0xb6), w(0xda), w(0x21), w(0x10), w(0xff), w(0xf3), w(0xd2),\ + w(0xcd), w(0x0c), w(0x13), w(0xec), w(0x5f), w(0x97), w(0x44), w(0x17),\ + w(0xc4), w(0xa7), w(0x7e), w(0x3d), w(0x64), w(0x5d), w(0x19), w(0x73),\ + w(0x60), w(0x81), w(0x4f), w(0xdc), w(0x22), w(0x2a), w(0x90), w(0x88),\ + w(0x46), w(0xee), w(0xb8), w(0x14), w(0xde), w(0x5e), w(0x0b), w(0xdb),\ + w(0xe0), w(0x32), w(0x3a), w(0x0a), w(0x49), w(0x06), w(0x24), w(0x5c),\ + w(0xc2), w(0xd3), w(0xac), w(0x62), w(0x91), w(0x95), w(0xe4), w(0x79),\ + w(0xe7), w(0xc8), w(0x37), w(0x6d), w(0x8d), w(0xd5), w(0x4e), w(0xa9),\ + w(0x6c), w(0x56), w(0xf4), w(0xea), w(0x65), w(0x7a), w(0xae), w(0x08),\ + w(0xba), w(0x78), w(0x25), w(0x2e), w(0x1c), w(0xa6), w(0xb4), w(0xc6),\ + w(0xe8), w(0xdd), w(0x74), w(0x1f), w(0x4b), w(0xbd), w(0x8b), w(0x8a),\ + w(0x70), w(0x3e), w(0xb5), w(0x66), w(0x48), w(0x03), w(0xf6), w(0x0e),\ + w(0x61), w(0x35), w(0x57), w(0xb9), w(0x86), w(0xc1), w(0x1d), w(0x9e),\ + w(0xe1), w(0xf8), w(0x98), w(0x11), w(0x69), w(0xd9), w(0x8e), w(0x94),\ + w(0x9b), w(0x1e), w(0x87), w(0xe9), w(0xce), w(0x55), w(0x28), w(0xdf),\ + w(0x8c), w(0xa1), w(0x89), w(0x0d), w(0xbf), w(0xe6), w(0x42), w(0x68),\ + w(0x41), w(0x99), w(0x2d), w(0x0f), w(0xb0), w(0x54), w(0xbb), w(0x16) } + +#define isb_data(w) {\ + w(0x52), w(0x09), w(0x6a), w(0xd5), w(0x30), w(0x36), w(0xa5), w(0x38),\ + w(0xbf), w(0x40), w(0xa3), w(0x9e), w(0x81), w(0xf3), w(0xd7), w(0xfb),\ + w(0x7c), w(0xe3), w(0x39), w(0x82), w(0x9b), w(0x2f), w(0xff), w(0x87),\ + w(0x34), w(0x8e), w(0x43), w(0x44), w(0xc4), w(0xde), w(0xe9), w(0xcb),\ + w(0x54), w(0x7b), w(0x94), w(0x32), w(0xa6), w(0xc2), w(0x23), w(0x3d),\ + w(0xee), w(0x4c), w(0x95), w(0x0b), w(0x42), w(0xfa), w(0xc3), w(0x4e),\ + w(0x08), w(0x2e), w(0xa1), w(0x66), w(0x28), w(0xd9), w(0x24), w(0xb2),\ + w(0x76), w(0x5b), w(0xa2), w(0x49), w(0x6d), w(0x8b), w(0xd1), w(0x25),\ + w(0x72), w(0xf8), w(0xf6), w(0x64), w(0x86), w(0x68), w(0x98), w(0x16),\ + w(0xd4), w(0xa4), w(0x5c), w(0xcc), w(0x5d), w(0x65), w(0xb6), w(0x92),\ + w(0x6c), w(0x70), w(0x48), w(0x50), w(0xfd), w(0xed), w(0xb9), w(0xda),\ + w(0x5e), w(0x15), w(0x46), w(0x57), w(0xa7), w(0x8d), w(0x9d), w(0x84),\ + w(0x90), w(0xd8), w(0xab), w(0x00), w(0x8c), w(0xbc), w(0xd3), w(0x0a),\ + w(0xf7), w(0xe4), w(0x58), w(0x05), w(0xb8), w(0xb3), w(0x45), w(0x06),\ + w(0xd0), w(0x2c), w(0x1e), w(0x8f), w(0xca), w(0x3f), w(0x0f), w(0x02),\ + w(0xc1), w(0xaf), w(0xbd), w(0x03), w(0x01), w(0x13), w(0x8a), w(0x6b),\ + w(0x3a), w(0x91), w(0x11), w(0x41), w(0x4f), w(0x67), w(0xdc), w(0xea),\ + w(0x97), w(0xf2), w(0xcf), w(0xce), w(0xf0), w(0xb4), w(0xe6), w(0x73),\ + w(0x96), w(0xac), w(0x74), w(0x22), w(0xe7), w(0xad), w(0x35), w(0x85),\ + w(0xe2), w(0xf9), w(0x37), w(0xe8), w(0x1c), w(0x75), w(0xdf), w(0x6e),\ + w(0x47), w(0xf1), w(0x1a), w(0x71), w(0x1d), w(0x29), w(0xc5), w(0x89),\ + w(0x6f), w(0xb7), w(0x62), w(0x0e), w(0xaa), w(0x18), w(0xbe), w(0x1b),\ + w(0xfc), w(0x56), w(0x3e), w(0x4b), w(0xc6), w(0xd2), w(0x79), w(0x20),\ + w(0x9a), w(0xdb), w(0xc0), w(0xfe), w(0x78), w(0xcd), w(0x5a), w(0xf4),\ + w(0x1f), w(0xdd), w(0xa8), w(0x33), w(0x88), w(0x07), w(0xc7), w(0x31),\ + w(0xb1), w(0x12), w(0x10), w(0x59), w(0x27), w(0x80), w(0xec), w(0x5f),\ + w(0x60), w(0x51), w(0x7f), w(0xa9), w(0x19), w(0xb5), w(0x4a), w(0x0d),\ + w(0x2d), w(0xe5), w(0x7a), w(0x9f), w(0x93), w(0xc9), w(0x9c), w(0xef),\ + w(0xa0), w(0xe0), w(0x3b), w(0x4d), w(0xae), w(0x2a), w(0xf5), w(0xb0),\ + w(0xc8), w(0xeb), w(0xbb), w(0x3c), w(0x83), w(0x53), w(0x99), w(0x61),\ + w(0x17), w(0x2b), w(0x04), w(0x7e), w(0xba), w(0x77), w(0xd6), w(0x26),\ + w(0xe1), w(0x69), w(0x14), w(0x63), w(0x55), w(0x21), w(0x0c), w(0x7d) } + +#define mm_data(w) {\ + w(0x00), w(0x01), w(0x02), w(0x03), w(0x04), w(0x05), w(0x06), w(0x07),\ + w(0x08), w(0x09), w(0x0a), w(0x0b), w(0x0c), w(0x0d), w(0x0e), w(0x0f),\ + w(0x10), w(0x11), w(0x12), w(0x13), w(0x14), w(0x15), w(0x16), w(0x17),\ + w(0x18), w(0x19), w(0x1a), w(0x1b), w(0x1c), w(0x1d), w(0x1e), w(0x1f),\ + w(0x20), w(0x21), w(0x22), w(0x23), w(0x24), w(0x25), w(0x26), w(0x27),\ + w(0x28), w(0x29), w(0x2a), w(0x2b), w(0x2c), w(0x2d), w(0x2e), w(0x2f),\ + w(0x30), w(0x31), w(0x32), w(0x33), w(0x34), w(0x35), w(0x36), w(0x37),\ + w(0x38), w(0x39), w(0x3a), w(0x3b), w(0x3c), w(0x3d), w(0x3e), w(0x3f),\ + w(0x40), w(0x41), w(0x42), w(0x43), w(0x44), w(0x45), w(0x46), w(0x47),\ + w(0x48), w(0x49), w(0x4a), w(0x4b), w(0x4c), w(0x4d), w(0x4e), w(0x4f),\ + w(0x50), w(0x51), w(0x52), w(0x53), w(0x54), w(0x55), w(0x56), w(0x57),\ + w(0x58), w(0x59), w(0x5a), w(0x5b), w(0x5c), w(0x5d), w(0x5e), w(0x5f),\ + w(0x60), w(0x61), w(0x62), w(0x63), w(0x64), w(0x65), w(0x66), w(0x67),\ + w(0x68), w(0x69), w(0x6a), w(0x6b), w(0x6c), w(0x6d), w(0x6e), w(0x6f),\ + w(0x70), w(0x71), w(0x72), w(0x73), w(0x74), w(0x75), w(0x76), w(0x77),\ + w(0x78), w(0x79), w(0x7a), w(0x7b), w(0x7c), w(0x7d), w(0x7e), w(0x7f),\ + w(0x80), w(0x81), w(0x82), w(0x83), w(0x84), w(0x85), w(0x86), w(0x87),\ + w(0x88), w(0x89), w(0x8a), w(0x8b), w(0x8c), w(0x8d), w(0x8e), w(0x8f),\ + w(0x90), w(0x91), w(0x92), w(0x93), w(0x94), w(0x95), w(0x96), w(0x97),\ + w(0x98), w(0x99), w(0x9a), w(0x9b), w(0x9c), w(0x9d), w(0x9e), w(0x9f),\ + w(0xa0), w(0xa1), w(0xa2), w(0xa3), w(0xa4), w(0xa5), w(0xa6), w(0xa7),\ + w(0xa8), w(0xa9), w(0xaa), w(0xab), w(0xac), w(0xad), w(0xae), w(0xaf),\ + w(0xb0), w(0xb1), w(0xb2), w(0xb3), w(0xb4), w(0xb5), w(0xb6), w(0xb7),\ + w(0xb8), w(0xb9), w(0xba), w(0xbb), w(0xbc), w(0xbd), w(0xbe), w(0xbf),\ + w(0xc0), w(0xc1), w(0xc2), w(0xc3), w(0xc4), w(0xc5), w(0xc6), w(0xc7),\ + w(0xc8), w(0xc9), w(0xca), w(0xcb), w(0xcc), w(0xcd), w(0xce), w(0xcf),\ + w(0xd0), w(0xd1), w(0xd2), w(0xd3), w(0xd4), w(0xd5), w(0xd6), w(0xd7),\ + w(0xd8), w(0xd9), w(0xda), w(0xdb), w(0xdc), w(0xdd), w(0xde), w(0xdf),\ + w(0xe0), w(0xe1), w(0xe2), w(0xe3), w(0xe4), w(0xe5), w(0xe6), w(0xe7),\ + w(0xe8), w(0xe9), w(0xea), w(0xeb), w(0xec), w(0xed), w(0xee), w(0xef),\ + w(0xf0), w(0xf1), w(0xf2), w(0xf3), w(0xf4), w(0xf5), w(0xf6), w(0xf7),\ + w(0xf8), w(0xf9), w(0xfa), w(0xfb), w(0xfc), w(0xfd), w(0xfe), w(0xff) } + +#define rc_data(w) {\ + w(0x01), w(0x02), w(0x04), w(0x08), w(0x10),w(0x20), w(0x40), w(0x80),\ + w(0x1b), w(0x36) } + +#define h0(x) (x) + +#define w0(p) bytes2word(p, 0, 0, 0) +#define w1(p) bytes2word(0, p, 0, 0) +#define w2(p) bytes2word(0, 0, p, 0) +#define w3(p) bytes2word(0, 0, 0, p) + +#define u0(p) bytes2word(f2(p), p, p, f3(p)) +#define u1(p) bytes2word(f3(p), f2(p), p, p) +#define u2(p) bytes2word(p, f3(p), f2(p), p) +#define u3(p) bytes2word(p, p, f3(p), f2(p)) + +#define v0(p) bytes2word(fe(p), f9(p), fd(p), fb(p)) +#define v1(p) bytes2word(fb(p), fe(p), f9(p), fd(p)) +#define v2(p) bytes2word(fd(p), fb(p), fe(p), f9(p)) +#define v3(p) bytes2word(f9(p), fd(p), fb(p), fe(p)) + +#endif + +#if defined(FIXED_TABLES) || !defined(FF_TABLES) + +#define f2(x) ((x<<1) ^ (((x>>7) & 1) * WPOLY)) +#define f4(x) ((x<<2) ^ (((x>>6) & 1) * WPOLY) ^ (((x>>6) & 2) * WPOLY)) +#define f8(x) ((x<<3) ^ (((x>>5) & 1) * WPOLY) ^ (((x>>5) & 2) * WPOLY) \ + ^ (((x>>5) & 4) * WPOLY)) +#define f3(x) (f2(x) ^ x) +#define f9(x) (f8(x) ^ x) +#define fb(x) (f8(x) ^ f2(x) ^ x) +#define fd(x) (f8(x) ^ f4(x) ^ x) +#define fe(x) (f8(x) ^ f4(x) ^ f2(x)) + +#else + +#define f2(x) ((x) ? pow[log[x] + 0x19] : 0) +#define f3(x) ((x) ? pow[log[x] + 0x01] : 0) +#define f9(x) ((x) ? pow[log[x] + 0xc7] : 0) +#define fb(x) ((x) ? pow[log[x] + 0x68] : 0) +#define fd(x) ((x) ? pow[log[x] + 0xee] : 0) +#define fe(x) ((x) ? pow[log[x] + 0xdf] : 0) +#define fi(x) ((x) ? pow[ 255 - log[x]] : 0) + +#endif + +#include "aestab.h" + +#if defined(FIXED_TABLES) + +/* implemented in case of wrong call for fixed tables */ + +void gen_tabs(void) +{ +} + +#else /* dynamic table generation */ + +#if !defined(FF_TABLES) + +/* Generate the tables for the dynamic table option + + It will generally be sensible to use tables to compute finite + field multiplies and inverses but where memory is scarse this + code might sometimes be better. But it only has effect during + initialisation so its pretty unimportant in overall terms. +*/ + +/* return 2 ^ (n - 1) where n is the bit number of the highest bit + set in x with x in the range 1 < x < 0x00000200. This form is + used so that locals within fi can be bytes rather than words +*/ + +static aes_08t hibit(const aes_32t x) +{ aes_08t r = (aes_08t)((x >> 1) | (x >> 2)); + + r |= (r >> 2); + r |= (r >> 4); + return (r + 1) >> 1; +} + +/* return the inverse of the finite field element x */ + +static aes_08t fi(const aes_08t x) +{ aes_08t p1 = x, p2 = BPOLY, n1 = hibit(x), n2 = 0x80, v1 = 1, v2 = 0; + + if(x < 2) return x; + + for(;;) + { + if(!n1) return v1; + + while(n2 >= n1) + { + n2 /= n1; p2 ^= p1 * n2; v2 ^= v1 * n2; n2 = hibit(p2); + } + + if(!n2) return v2; + + while(n1 >= n2) + { + n1 /= n2; p1 ^= p2 * n1; v1 ^= v2 * n1; n1 = hibit(p1); + } + } +} + +#endif + +/* The forward and inverse affine transformations used in the S-box */ + +#define fwd_affine(x) \ + (w = (aes_32t)x, w ^= (w<<1)^(w<<2)^(w<<3)^(w<<4), 0x63^(aes_08t)(w^(w>>8))) + +#define inv_affine(x) \ + (w = (aes_32t)x, w = (w<<1)^(w<<3)^(w<<6), 0x05^(aes_08t)(w^(w>>8))) + +static int init = 0; + +void gen_tabs(void) +{ aes_32t i, w; + +#if defined(FF_TABLES) + + aes_08t pow[512], log[256]; + + if(init) return; + /* log and power tables for GF(2^8) finite field with + WPOLY as modular polynomial - the simplest primitive + root is 0x03, used here to generate the tables + */ + + i = 0; w = 1; + do + { + pow[i] = (aes_08t)w; + pow[i + 255] = (aes_08t)w; + log[w] = (aes_08t)i++; + w ^= (w << 1) ^ (w & 0x80 ? WPOLY : 0); + } + while (w != 1); + +#else + if(init) return; +#endif + + for(i = 0, w = 1; i < RC_LENGTH; ++i) + { + t_set(r,c)[i] = bytes2word(w, 0, 0, 0); + w = f2(w); + } + + for(i = 0; i < 256; ++i) + { aes_08t b; + + b = fwd_affine(fi((aes_08t)i)); + w = bytes2word(f2(b), b, b, f3(b)); + +#if defined( SBX_SET ) + t_set(s,box)[i] = b; +#endif + +#if defined( FT1_SET ) /* tables for a normal encryption round */ + t_set(f,n)[i] = w; +#endif +#if defined( FT4_SET ) + t_set(f,n)[0][i] = w; + t_set(f,n)[1][i] = upr(w,1); + t_set(f,n)[2][i] = upr(w,2); + t_set(f,n)[3][i] = upr(w,3); +#endif + w = bytes2word(b, 0, 0, 0); + +#if defined( FL1_SET ) /* tables for last encryption round (may also */ + t_set(f,l)[i] = w; /* be used in the key schedule) */ +#endif +#if defined( FL4_SET ) + t_set(f,l)[0][i] = w; + t_set(f,l)[1][i] = upr(w,1); + t_set(f,l)[2][i] = upr(w,2); + t_set(f,l)[3][i] = upr(w,3); +#endif + +#if defined( LS1_SET ) /* table for key schedule if t_set(f,l) above is */ + t_set(l,s)[i] = w; /* not of the required form */ +#endif +#if defined( LS4_SET ) + t_set(l,s)[0][i] = w; + t_set(l,s)[1][i] = upr(w,1); + t_set(l,s)[2][i] = upr(w,2); + t_set(l,s)[3][i] = upr(w,3); +#endif + + b = fi(inv_affine((aes_08t)i)); + w = bytes2word(fe(b), f9(b), fd(b), fb(b)); + +#if defined( IM1_SET ) /* tables for the inverse mix column operation */ + t_set(i,m)[b] = w; +#endif +#if defined( IM4_SET ) + t_set(i,m)[0][b] = w; + t_set(i,m)[1][b] = upr(w,1); + t_set(i,m)[2][b] = upr(w,2); + t_set(i,m)[3][b] = upr(w,3); +#endif + +#if defined( ISB_SET ) + t_set(i,box)[i] = b; +#endif +#if defined( IT1_SET ) /* tables for a normal decryption round */ + t_set(i,n)[i] = w; +#endif +#if defined( IT4_SET ) + t_set(i,n)[0][i] = w; + t_set(i,n)[1][i] = upr(w,1); + t_set(i,n)[2][i] = upr(w,2); + t_set(i,n)[3][i] = upr(w,3); +#endif + w = bytes2word(b, 0, 0, 0); +#if defined( IL1_SET ) /* tables for last decryption round */ + t_set(i,l)[i] = w; +#endif +#if defined( IL4_SET ) + t_set(i,l)[0][i] = w; + t_set(i,l)[1][i] = upr(w,1); + t_set(i,l)[2][i] = upr(w,2); + t_set(i,l)[3][i] = upr(w,3); +#endif + } + init = 1; +} + +#endif + +#if defined(__cplusplus) +} +#endif + diff --git a/bsd/crypto/aes/gen/aestab.h b/bsd/crypto/aes/gen/aestab.h new file mode 100644 index 000000000..004ef9e74 --- /dev/null +++ b/bsd/crypto/aes/gen/aestab.h @@ -0,0 +1,175 @@ +/* + --------------------------------------------------------------------------- + Copyright (c) 2003, Dr Brian Gladman, Worcester, UK. All rights reserved. + + LICENSE TERMS + + The free distribution and use of this software in both source and binary + form is allowed (with or without changes) provided that: + + 1. distributions of this source code include the above copyright + notice, this list of conditions and the following disclaimer; + + 2. distributions in binary form include the above copyright + notice, this list of conditions and the following disclaimer + in the documentation and/or other associated materials; + + 3. the copyright holder's name is not used to endorse products + built using this software without specific written permission. + + ALTERNATIVELY, provided that this notice is retained in full, this product + may be distributed under the terms of the GNU General Public License (GPL), + in which case the provisions of the GPL apply INSTEAD OF those given above. + + DISCLAIMER + + This software is provided 'as is' with no explicit or implied warranties + in respect of its properties, including, but not limited to, correctness + and/or fitness for purpose. + --------------------------------------------------------------------------- + Issue 28/01/2004 + + This file contains the code for declaring the tables needed to implement + AES. The file aesopt.h is assumed to be included before this header file. + If there are no global variables, the definitions here can be used to put + the AES tables in a structure so that a pointer can then be added to the + AES context to pass them to the AES routines that need them. If this + facility is used, the calling program has to ensure that this pointer is + managed appropriately. In particular, the value of the t_dec(in,it) item + in the table structure must be set to zero in order to ensure that the + tables are initialised. In practice the three code sequences in aeskey.c + that control the calls to gen_tabs() and the gen_tabs() routine itself will + have to be changed for a specific implementation. If global variables are + available it will generally be preferable to use them with the precomputed + FIXED_TABLES option that uses static global tables. + + The following defines can be used to control the way the tables + are defined, initialised and used in embedded environments that + require special features for these purposes + + the 't_dec' construction is used to declare fixed table arrays + the 't_set' construction is used to set fixed table values + the 't_use' construction is used to access fixed table values + + 256 byte tables: + + t_xxx(s,box) => forward S box + t_xxx(i,box) => inverse S box + + 256 32-bit word OR 4 x 256 32-bit word tables: + + t_xxx(f,n) => forward normal round + t_xxx(f,l) => forward last round + t_xxx(i,n) => inverse normal round + t_xxx(i,l) => inverse last round + t_xxx(l,s) => key schedule table + t_xxx(i,m) => key schedule table + + Other variables and tables: + + t_xxx(r,c) => the rcon table +*/ + +#if !defined( _AESTAB_H ) +#define _AESTAB_H + +#define t_dec(m,n) t_##m##n +#define t_set(m,n) t_##m##n +#define t_use(m,n) t_##m##n + +#if defined(FIXED_TABLES) +#define Const const +#else +#define Const +#endif + +#if defined(DO_TABLES) +#define Extern +#else +#define Extern extern +#endif + +#if defined(_MSC_VER) && defined(TABLE_ALIGN) +#define Align __declspec(align(TABLE_ALIGN)) +#else +#define Align +#endif + +#if defined(__cplusplus) +extern "C" +{ +#endif + +#if defined(DO_TABLES) && defined(FIXED_TABLES) +#define d_1(t,n,b,e) Align Const t n[256] = b(e) +#define d_4(t,n,b,e,f,g,h) Align Const t n[4][256] = { b(e), b(f), b(g), b(h) } +Extern Align Const aes_32t t_dec(r,c)[RC_LENGTH] = rc_data(w0); +#else +#define d_1(t,n,b,e) Extern Align Const t n[256] +#define d_4(t,n,b,e,f,g,h) Extern Align Const t n[4][256] +Extern Align Const aes_32t t_dec(r,c)[RC_LENGTH]; +#endif + +#if defined( SBX_SET ) + d_1(aes_08t, t_dec(s,box), sb_data, h0); +#endif +#if defined( ISB_SET ) + d_1(aes_08t, t_dec(i,box), isb_data, h0); +#endif + +#if defined( FT1_SET ) + d_1(aes_32t, t_dec(f,n), sb_data, u0); +#endif +#if defined( FT4_SET ) + d_4(aes_32t, t_dec(f,n), sb_data, u0, u1, u2, u3); +#endif + +#if defined( FL1_SET ) + d_1(aes_32t, t_dec(f,l), sb_data, w0); +#endif +#if defined( FL4_SET ) + d_4(aes_32t, t_dec(f,l), sb_data, w0, w1, w2, w3); +#endif + +#if defined( IT1_SET ) + d_1(aes_32t, t_dec(i,n), isb_data, v0); +#endif +#if defined( IT4_SET ) + d_4(aes_32t, t_dec(i,n), isb_data, v0, v1, v2, v3); +#endif + +#if defined( IL1_SET ) + d_1(aes_32t, t_dec(i,l), isb_data, w0); +#endif +#if defined( IL4_SET ) + d_4(aes_32t, t_dec(i,l), isb_data, w0, w1, w2, w3); +#endif + +#if defined( LS1_SET ) +#if defined( FL1_SET ) +#undef LS1_SET +#else + d_1(aes_32t, t_dec(l,s), sb_data, w0); +#endif +#endif + +#if defined( LS4_SET ) +#if defined( FL4_SET ) +#undef LS4_SET +#else + d_4(aes_32t, t_dec(l,s), sb_data, w0, w1, w2, w3); +#endif +#endif + +#if defined( IM1_SET ) + d_1(aes_32t, t_dec(i,m), mm_data, v0); +#endif +#if defined( IM4_SET ) + d_4(aes_32t, t_dec(i,m), mm_data, v0, v1, v2, v3); +#endif + +#if defined(__cplusplus) +} +#endif + +#endif diff --git a/bsd/crypto/blowfish/Makefile b/bsd/crypto/blowfish/Makefile index 0521cc6fd..6b3066a93 100644 --- a/bsd/crypto/blowfish/Makefile +++ b/bsd/crypto/blowfish/Makefile @@ -13,12 +13,16 @@ INSTINC_SUBDIRS_PPC = \ INSTINC_SUBDIRS_I386 = \ +INSTINC_SUBDIRS_X86_64 = \ + EXPINC_SUBDIRS = \ EXPINC_SUBDIRS_PPC = \ EXPINC_SUBDIRS_I386 = \ +EXPINC_SUBDIRS_X86_64 = \ + PRIVATE_DATAFILES = \ blowfish.h diff --git a/bsd/crypto/cast128/Makefile b/bsd/crypto/cast128/Makefile index d214498bb..6eb76064a 100644 --- a/bsd/crypto/cast128/Makefile +++ b/bsd/crypto/cast128/Makefile @@ -13,12 +13,16 @@ INSTINC_SUBDIRS_PPC = \ INSTINC_SUBDIRS_I386 = \ +INSTINC_SUBDIRS_X86_64 = \ + EXPINC_SUBDIRS = \ EXPINC_SUBDIRS_PPC = \ EXPINC_SUBDIRS_I386 = \ +EXPINC_SUBDIRS_X86_64 = \ + PRIVATE_DATAFILES = \ cast128.h diff --git a/bsd/crypto/des/Makefile b/bsd/crypto/des/Makefile index c4c411272..df4545d55 100644 --- a/bsd/crypto/des/Makefile +++ b/bsd/crypto/des/Makefile @@ -13,6 +13,8 @@ INSTINC_SUBDIRS_PPC = \ INSTINC_SUBDIRS_I386 = \ +INSTINC_SUBDIRS_X86_64 = \ + INSTINC_SUBDIRS_ARM = \ EXPINC_SUBDIRS = \ @@ -21,6 +23,8 @@ EXPINC_SUBDIRS_PPC = \ EXPINC_SUBDIRS_I386 = \ +EXPINC_SUBDIRS_X86_64 = \ + EXPINC_SUBDIRS_ARM = \ PRIVATE_DATAFILES = \ diff --git a/bsd/crypto/rc4/Makefile b/bsd/crypto/rc4/Makefile index 7efd5ff1e..4de505de8 100644 --- a/bsd/crypto/rc4/Makefile +++ b/bsd/crypto/rc4/Makefile @@ -13,6 +13,8 @@ INSTINC_SUBDIRS_PPC = \ INSTINC_SUBDIRS_I386 = \ +INSTINC_SUBDIRS_X86_64 = \ + INSTINC_SUBDIRS_ARM = \ EXPINC_SUBDIRS = \ @@ -21,6 +23,8 @@ EXPINC_SUBDIRS_PPC = \ EXPINC_SUBDIRS_I386 = \ +EXPINC_SUBDIRS_X86_64 = \ + EXPINC_SUBDIRS_ARM = \ PRIVATE_DATAFILES = \ diff --git a/bsd/crypto/sha2/Makefile b/bsd/crypto/sha2/Makefile index a89ecdaff..8e85f612c 100644 --- a/bsd/crypto/sha2/Makefile +++ b/bsd/crypto/sha2/Makefile @@ -13,6 +13,8 @@ INSTINC_SUBDIRS_PPC = \ INSTINC_SUBDIRS_I386 = \ +INSTINC_SUBDIRS_X86_64 = \ + INSTINC_SUBDIRS_ARM = \ EXPINC_SUBDIRS = \ @@ -21,6 +23,8 @@ EXPINC_SUBDIRS_PPC = \ EXPINC_SUBDIRS_I386 = \ +EXPINC_SUBDIRS_X86_64 = \ + EXPINC_SUBDIRS_ARM = \ PRIVATE_DATAFILES = \ diff --git a/bsd/dev/Makefile b/bsd/dev/Makefile index 66b0e557c..b2f00140a 100644 --- a/bsd/dev/Makefile +++ b/bsd/dev/Makefile @@ -13,12 +13,16 @@ INSTINC_SUBDIRS_PPC = \ INSTINC_SUBDIRS_I386 = \ +INSTINC_SUBDIRS_X86_64 = \ + EXPINC_SUBDIRS = \ EXPINC_SUBDIRS_PPC = \ EXPINC_SUBDIRS_I386 = \ +EXPINC_SUBDIRS_X86_64 = \ + INSTALL_MI_LIST = INSTALL_MI_DIR = dev diff --git a/bsd/dev/chud/chud_bsd_callback.c b/bsd/dev/chud/chud_bsd_callback.c index bfb0aeb61..6fad80050 100644 --- a/bsd/dev/chud/chud_bsd_callback.c +++ b/bsd/dev/chud/chud_bsd_callback.c @@ -35,25 +35,42 @@ #include /* proc_t */ #include /* struct sysent */ #include +#include /* KDEBUG_ENABLE_CHUD */ +#include + +#ifdef __ppc__ +#include + +#define FM_ARG0 0x38ULL // offset from r1 to first argument +#define SPILLED_WORD_COUNT 7 // number of 32-bit words spilled to the stack + +extern struct savearea * find_user_regs( thread_t act); +#endif #pragma mark **** kern debug **** -typedef void (*chudxnu_kdebug_callback_func_t)(uint32_t debugid, uint32_t arg0, uint32_t arg1, uint32_t arg2, uint32_t arg3, uint32_t arg4); -static chudxnu_kdebug_callback_func_t kdebug_callback_fn = NULL; +typedef void (*chudxnu_kdebug_callback_func_t)(uint32_t debugid, uintptr_t arg0, uintptr_t arg1, uintptr_t arg2, uintptr_t arg3, uintptr_t arg4); +static void chud_null_kdebug(uint32_t debugid, uintptr_t arg0, uintptr_t arg1, uintptr_t arg2, uintptr_t arg3, uintptr_t arg4); +static chudxnu_kdebug_callback_func_t kdebug_callback_fn = chud_null_kdebug; kern_return_t chudxnu_kdebug_callback_enter(chudxnu_kdebug_callback_func_t); kern_return_t chudxnu_kdebug_callback_cancel(void); extern void kdbg_control_chud(int val, void *fn); -extern unsigned int kdebug_enable; + +static void chud_null_kdebug(uint32_t debugid __unused, uintptr_t arg0 __unused, + uintptr_t arg1 __unused, uintptr_t arg2 __unused, uintptr_t arg3 __unused, + uintptr_t arg4 __unused) { + return; +} static void chudxnu_private_kdebug_callback( - unsigned int debugid, - unsigned int arg0, - unsigned int arg1, - unsigned int arg2, - unsigned int arg3, - unsigned int arg4) + uint32_t debugid, + uintptr_t arg0, + uintptr_t arg1, + uintptr_t arg2, + uintptr_t arg3, + uintptr_t arg4) { chudxnu_kdebug_callback_func_t fn = kdebug_callback_fn; @@ -65,39 +82,132 @@ chudxnu_private_kdebug_callback( __private_extern__ kern_return_t chudxnu_kdebug_callback_enter(chudxnu_kdebug_callback_func_t func) { - kdebug_callback_fn = func; - - kdbg_control_chud(TRUE, (void *)chudxnu_private_kdebug_callback); - kdebug_enable |= 0x10; - - return KERN_SUCCESS; + /* Atomically set the callback. */ + if(OSCompareAndSwapPtr(chud_null_kdebug, func, + (void * volatile *)&kdebug_callback_fn)) { + + kdbg_control_chud(TRUE, (void *)chudxnu_private_kdebug_callback); + OSBitOrAtomic((UInt32)KDEBUG_ENABLE_CHUD, (volatile UInt32 *)&kdebug_enable); + + return KERN_SUCCESS; + } + return KERN_FAILURE; } __private_extern__ kern_return_t chudxnu_kdebug_callback_cancel(void) { - kdebug_callback_fn = NULL; - kdbg_control_chud(FALSE, NULL); - kdebug_enable &= ~(0x10); + OSBitAndAtomic((UInt32)~(KDEBUG_ENABLE_CHUD), (volatile UInt32 *)&kdebug_enable); + kdbg_control_chud(FALSE, NULL); + + chudxnu_kdebug_callback_func_t old = kdebug_callback_fn; + + while(!OSCompareAndSwapPtr(old, chud_null_kdebug, + (void * volatile *)&kdebug_callback_fn)) { + old = kdebug_callback_fn; + } return KERN_SUCCESS; } #pragma mark **** CHUD syscall **** -typedef kern_return_t (*chudxnu_syscall_callback_func_t)(uint32_t code, uint32_t arg0, uint32_t arg1, uint32_t arg2, uint32_t arg3, uint32_t arg4); -static chudxnu_syscall_callback_func_t syscall_callback_fn = NULL; +typedef kern_return_t (*chudxnu_syscall_callback_func_t)(uint64_t code, uint64_t arg0, uint64_t arg1, uint64_t arg2, uint64_t arg3, uint64_t arg4); + +static kern_return_t chud_null_syscall(uint64_t code, uint64_t arg0, uint64_t arg1, uint64_t arg2, uint64_t arg3, uint64_t arg4); +static chudxnu_syscall_callback_func_t syscall_callback_fn = chud_null_syscall; kern_return_t chudxnu_syscall_callback_enter(chudxnu_syscall_callback_func_t func); kern_return_t chudxnu_syscall_callback_cancel(void); +static kern_return_t chud_null_syscall(uint64_t code __unused, + uint64_t arg0 __unused, uint64_t arg1 __unused, uint64_t arg2 __unused, + uint64_t arg3 __unused, uint64_t arg4 __unused) { + return (kern_return_t)EINVAL; +} + +/* + * chud + * + * Performs performance-related tasks. A private interface registers a handler for this + * system call. The implementation is in the CHUDProf kernel extension. + * + * chud() is a callback style system call used by the CHUD Tools suite of performance tools. If the CHUD + * kexts are not loaded, this system call will always return EINVAL. The CHUD kexts contain the + * implementation of the system call. + * + * The current behavior of the chud() system call is as follows: + * + * Parameters: p (ignored) + * uap User argument descriptor (see below) + * retval return value of fn (the function returned by syscall_callback_fn) + * + * Indirect parameters: uap->code Selects the operation to do. This is broken down into a + * 16-bit facility and a 16-bit action. + * + * The rest of the indirect parameters depend on the facility and the action that is selected: + * + * Facility: 1 Amber instruction tracer + * Action: 1 Indicate that a new thread has been created. No arguments are used. + * + * Action: 2 Indicate that a thread is about to exit. No arguments are used. + * + * Facility: 2 Not Supported for this system call + * + * Facility: 3 CHUD Trace facility + * Action: 1 Record a backtrace of the calling process into the CHUD Trace facility sample + * buffer. + * + * uap->arg1 Number of frames to skip + * uap->arg2 Pointer to a uint64_t containing a timestamp for the + * beginning of the sample. NULL uses the current time. + * uap->arg3 Pointer to a uint64_t containing a timestamp for the end + * of the sample. NULL uses the current time. + * uap->arg4 Pointer to auxiliary data to be recorded with the sample + * uap->arg5 Size of the auxiliary data pointed to by arg4. + * + * Returns: EINVAL If syscall_callback_fn returns an invalid function + * KERN_SUCCESS Success + * KERN_FAILURE Generic failure + * KERN_NO_SPACE Auxiliary data is too large (only used by Facility: 3) + * + * Implicit returns: retval return value of fn (the function returned by syscall_callback_fn) + */ int -chud(__unused proc_t p, struct chud_args *uap, register_t *retval) +chud(__unused proc_t p, struct chud_args *uap, int32_t *retval) { chudxnu_syscall_callback_func_t fn = syscall_callback_fn; if(!fn) { return EINVAL; } + +#ifdef __ppc__ + // ppc32 user land spills 2.5 64-bit args (5 x 32-bit) to the stack + // here we have to copy them out. r1 is the stack pointer in this world. + // the offset is calculated according to the PPC32 ABI + // Important: this only happens for 32-bit user threads + + if(!IS_64BIT_PROCESS(p)) { + struct savearea *regs = find_user_regs(current_thread()); + if(!regs) { + return EINVAL; + } + + // %r1 is the stack pointer on ppc32 + uint32_t stackPointer = regs->save_r1; + + // calculate number of bytes spilled to the stack + uint32_t spilledSize = sizeof(struct chud_args) - (sizeof(uint32_t) * SPILLED_WORD_COUNT); + + // obtain offset to arguments spilled onto user-thread stack + user_addr_t incomingAddr = (user_addr_t)stackPointer + FM_ARG0; + + // destination is halfway through arg3 + uint8_t *dstAddr = (uint8_t*)(&(uap->arg3)) + sizeof(uint32_t); + + copyin(incomingAddr, dstAddr, spilledSize); + } +#endif *retval = fn(uap->code, uap->arg1, uap->arg2, uap->arg3, uap->arg4, uap->arg5); @@ -107,13 +217,82 @@ chud(__unused proc_t p, struct chud_args *uap, register_t *retval) __private_extern__ kern_return_t chudxnu_syscall_callback_enter(chudxnu_syscall_callback_func_t func) { - syscall_callback_fn = func; - return KERN_SUCCESS; + if(OSCompareAndSwapPtr(chud_null_syscall, func, + (void * volatile *)&syscall_callback_fn)) { + return KERN_SUCCESS; + } + return KERN_FAILURE; } __private_extern__ kern_return_t chudxnu_syscall_callback_cancel(void) { - syscall_callback_fn = NULL; + chudxnu_syscall_callback_func_t old = syscall_callback_fn; + + while(!OSCompareAndSwapPtr(old, chud_null_syscall, + (void * volatile *)&syscall_callback_fn)) { + old = syscall_callback_fn; + } + return KERN_SUCCESS; } + +/* DTrace callback */ +typedef kern_return_t (*chudxnu_dtrace_callback_t)(uint64_t selector, + uint64_t *args, uint32_t count); +int chudxnu_dtrace_callback(uint64_t selector, uint64_t *args, uint32_t count); +kern_return_t chudxnu_dtrace_callback_enter(chudxnu_dtrace_callback_t fn); +void chudxnu_dtrace_callback_cancel(void); + +int +chud_null_dtrace(uint64_t selector, uint64_t *args, uint32_t count); + +static chudxnu_dtrace_callback_t + dtrace_callback = (chudxnu_dtrace_callback_t) chud_null_dtrace; + +int +chud_null_dtrace(uint64_t selector __unused, uint64_t *args __unused, + uint32_t count __unused) { + return ENXIO; +} + +int +chudxnu_dtrace_callback(uint64_t selector, uint64_t *args, uint32_t count) +{ + /* If no callback is hooked up, let's return ENXIO */ + int ret = ENXIO; + + /* Make a local stack copy of the function ptr */ + chudxnu_dtrace_callback_t fn = dtrace_callback; + + if(fn) { + ret = fn(selector, args, count); + } + + return ret; +} + +__private_extern__ kern_return_t +chudxnu_dtrace_callback_enter(chudxnu_dtrace_callback_t fn) +{ + /* Atomically enter the call back */ + if(!OSCompareAndSwapPtr(chud_null_dtrace, fn, + (void * volatile *) &dtrace_callback)) { + return KERN_FAILURE; + } + + return KERN_SUCCESS; +} + +__private_extern__ void +chudxnu_dtrace_callback_cancel(void) +{ + chudxnu_dtrace_callback_t old_fn = dtrace_callback; + + /* Atomically clear the call back */ + while(!OSCompareAndSwapPtr(old_fn, chud_null_dtrace, + (void * volatile *) &dtrace_callback)) { + old_fn = dtrace_callback; + } +} + diff --git a/bsd/dev/dtrace/blist.c b/bsd/dev/dtrace/blist.c index cb6177675..bbaaec0a9 100644 --- a/bsd/dev/dtrace/blist.c +++ b/bsd/dev/dtrace/blist.c @@ -144,9 +144,11 @@ static void blst_radix_print(blmeta_t *scan, daddr_t blk, daddr_t radix, int skip, int tab); #endif +#if !defined(__APPLE__) #ifdef _KERNEL static MALLOC_DEFINE(M_SWAP, "SWAP", "Swap space"); #endif +#endif /* __APPLE__ */ /* * blist_create() - create a blist capable of handling up to the specified @@ -347,7 +349,11 @@ blst_leaf_alloc(blmeta_t *scan, daddr_t blk, int count) scan->u.bmu_bitmap &= ~(1 << r); return(blk + r); } +#if !defined(__APPLE__) if (count <= BLIST_BMAP_RADIX) { +#else + if (count <= (int)BLIST_BMAP_RADIX) { +#endif /* __APPLE__ */ /* * non-optimized code to allocate N bits out of the bitmap. * The more bits, the faster the code runs. It will run @@ -613,11 +619,19 @@ static void blst_copy(blmeta_t *scan, daddr_t blk, daddr_t radix, if (v == (u_daddr_t)-1) { blist_free(dest, blk, count); } else if (v != 0) { +#if !defined(__APPLE__) int i; for (i = 0; i < BLIST_BMAP_RADIX && i < count; ++i) if (v & (1 << i)) blist_free(dest, blk + i, 1); +#else + int j; /* Avoid shadow warnings */ + + for (j = 0; j < (int)BLIST_BMAP_RADIX && j < count; ++j) + if (v & (1 << j)) + blist_free(dest, blk + j, 1); +#endif /* __APPLE__ */ } return; } diff --git a/bsd/dev/dtrace/dtrace.c b/bsd/dev/dtrace/dtrace.c index bdbe6a874..081f70dc3 100644 --- a/bsd/dev/dtrace/dtrace.c +++ b/bsd/dev/dtrace/dtrace.c @@ -20,11 +20,11 @@ */ /* - * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Copyright 2008 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ -/* #pragma ident "@(#)dtrace.c 1.49 06/08/11 SMI" */ +/* #pragma ident "@(#)dtrace.c 1.65 08/07/02 SMI" */ /* * DTrace - Dynamic Tracing for Solaris @@ -65,9 +65,34 @@ * [Group] Functions", allowing one to find each block by searching forward * on capital-f functions. */ - -#define _DTRACE_WANT_PROC_GLUE_ 1 - +#if !defined(__APPLE__) +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#else #include #include #include @@ -88,6 +113,17 @@ #include #include #include +#include +#include + +#if defined(__APPLE__) +extern uint32_t pmap_find_phys(void *, uint64_t); +extern boolean_t pmap_valid_page(uint32_t); +#endif /* __APPLE__ */ + + +/* Solaris proc_t is the struct. Darwin's proc_t is a pointer to it. */ +#define proc_t struct proc /* Steer clear of the Darwin typedef for proc_t */ #define t_predcache t_dtrace_predcache /* Cosmetic. Helps readability of thread.h */ @@ -95,14 +131,16 @@ extern void dtrace_suspend(void); extern void dtrace_resume(void); extern void dtrace_init(void); extern void helper_init(void); - -#if defined(__APPLE__) +extern void fasttrap_init(void); +extern void dtrace_lazy_dofs_duplicate(proc_t *, proc_t *); +extern void dtrace_lazy_dofs_destroy(proc_t *); +extern void dtrace_postinit(void); #include "../../../osfmk/chud/chud_dtrace.h" extern kern_return_t chudxnu_dtrace_callback (uint64_t selector, uint64_t *args, uint32_t count); -#endif +#endif /* __APPLE__ */ /* * DTrace Tunable Variables @@ -125,12 +163,9 @@ extern kern_return_t chudxnu_dtrace_callback * /etc/system. */ int dtrace_destructive_disallow = 0; -#if defined(__APPLE__) -#define proc_t struct proc -#endif /* __APPLE__ */ dtrace_optval_t dtrace_nonroot_maxsize = (16 * 1024 * 1024); size_t dtrace_difo_maxsize = (256 * 1024); -dtrace_optval_t dtrace_dof_maxsize = (256 * 1024); +dtrace_optval_t dtrace_dof_maxsize = (384 * 1024); size_t dtrace_global_maxsize = (16 * 1024); size_t dtrace_actions_max = (16 * 1024); size_t dtrace_retain_max = 1024; @@ -199,23 +234,22 @@ static dtrace_ecb_t *dtrace_ecb_create_cache; /* cached created ECB */ static dtrace_genid_t dtrace_probegen; /* current probe generation */ static dtrace_helpers_t *dtrace_deferred_pid; /* deferred helper list */ static dtrace_enabling_t *dtrace_retained; /* list of retained enablings */ +static dtrace_genid_t dtrace_retained_gen; /* current retained enab gen */ static dtrace_dynvar_t dtrace_dynhash_sink; /* end of dynamic hash chains */ #if defined(__APPLE__) -static int dtrace_dof_mode; /* dof mode */ +static int dtrace_dof_mode; /* See dtrace_impl.h for a description of Darwin's dof modes. */ #endif #if defined(__APPLE__) - /* * To save memory, some common memory allocations are given a - * unique zone. In example, dtrace_probe_t is 72 bytes in size, + * unique zone. For example, dtrace_probe_t is 72 bytes in size, * which means it would fall into the kalloc.128 bucket. With * 20k elements allocated, the space saved is substantial. */ struct zone *dtrace_probe_t_zone; - -#endif +#endif /* __APPLE__ */ /* * DTrace Locking @@ -248,6 +282,11 @@ struct zone *dtrace_probe_t_zone; * acquired _between_ dtrace_provider_lock and dtrace_lock. */ +#if !defined(__APPLE__) +static kmutex_t dtrace_lock; /* probe state lock */ +static kmutex_t dtrace_provider_lock; /* provider state lock */ +static kmutex_t dtrace_meta_lock; /* meta-provider state lock */ +#else /* * APPLE NOTE: * @@ -268,9 +307,8 @@ struct zone *dtrace_probe_t_zone; static lck_mtx_t dtrace_lock; /* probe state lock */ static lck_mtx_t dtrace_provider_lock; /* provider state lock */ static lck_mtx_t dtrace_meta_lock; /* meta-provider state lock */ -#if defined(__APPLE__) static lck_rw_t dtrace_dof_mode_lock; /* dof mode lock */ -#endif +#endif /* __APPLE__ */ /* * DTrace Provider Variables @@ -313,9 +351,13 @@ dtrace_id_t dtrace_probeid_error; /* special ERROR probe */ uint32_t dtrace_helptrace_next = 0; uint32_t dtrace_helptrace_nlocals; char *dtrace_helptrace_buffer; +#if !defined(__APPLE__) /* Quiet compiler warning */ int dtrace_helptrace_bufsize = 512 * 1024; +#else +size_t dtrace_helptrace_bufsize = 512 * 1024; +#endif /* __APPLE__ */ -#ifdef DEBUG +#if DEBUG int dtrace_helptrace_enabled = 1; #else int dtrace_helptrace_enabled = 0; @@ -330,7 +372,7 @@ int dtrace_helptrace_enabled = 0; * debugging problems in the DIF code generator or in DOF generation . The * error hash may be examined with the ::dtrace_errhash MDB dcmd. */ -#ifdef DEBUG +#if DEBUG static dtrace_errhash_t dtrace_errhash[DTRACE_ERRHASHSZ]; static const char *dtrace_errlast; static kthread_t *dtrace_errthread; @@ -360,6 +402,8 @@ static lck_mtx_t dtrace_errlock; #define DTRACE_AGGHASHSIZE_SLEW 17 +#define DTRACE_V4MAPPED_OFFSET (sizeof (uint32_t) * 3) + /* * The key for a thread-local variable consists of the lower 61 bits of the * t_did, plus the 3 bits of the highest active interrupt above LOCK_LEVEL. @@ -385,22 +429,41 @@ static lck_mtx_t dtrace_errlock; (where) = ((curthread->t_did + DIF_VARIABLE_MAX) & \ (((uint64_t)1 << 61) - 1)) | ((uint64_t)intr << 61); \ } +#else +#if (defined(__x86_64__) || defined(__ppc64__)) +/* FIXME: two function calls!! */ +#define DTRACE_TLS_THRKEY(where) { \ + uint_t intr = ml_at_interrupt_context(); /* Note: just one measly bit */ \ + uint64_t thr = (uintptr_t)current_thread(); \ + ASSERT(intr < (1 << 3)); \ + (where) = ((thr + DIF_VARIABLE_MAX) & \ + (((uint64_t)1 << 61) - 1)) | ((uint64_t)intr << 61); \ +} #else +/* FIXME: three function calls!!! */ #define DTRACE_TLS_THRKEY(where) { \ - uint_t intr = ml_at_interrupt_context(); /* XXX just one measely bit */ \ - uint_t thr = (uint_t)current_thread(); \ + uint_t intr = ml_at_interrupt_context(); /* Note: just one measly bit */ \ + uint64_t thr = (uintptr_t)current_thread(); \ uint_t pid = (uint_t)proc_selfpid(); \ ASSERT(intr < (1 << 3)); \ - (where) = ((((uint64_t)thr << 32 | pid) + DIF_VARIABLE_MAX) & \ + (where) = (((thr << 32 | pid) + DIF_VARIABLE_MAX) & \ (((uint64_t)1 << 61) - 1)) | ((uint64_t)intr << 61); \ } +#endif #endif /* __APPLE__ */ +#define DT_BSWAP_8(x) ((x) & 0xff) +#define DT_BSWAP_16(x) ((DT_BSWAP_8(x) << 8) | DT_BSWAP_8((x) >> 8)) +#define DT_BSWAP_32(x) ((DT_BSWAP_16(x) << 16) | DT_BSWAP_16((x) >> 16)) +#define DT_BSWAP_64(x) ((DT_BSWAP_32(x) << 32) | DT_BSWAP_32((x) >> 32)) + +#define DT_MASK_LO 0x00000000FFFFFFFFULL + #define DTRACE_STORE(type, tomax, offset, what) \ *((type *)((uintptr_t)(tomax) + (uintptr_t)offset)) = (type)(what); #if !defined(__APPLE__) -#if !(defined(__i386__) || defined (__x86_64__)) +#ifndef __i386 #define DTRACE_ALIGNCHECK(addr, size, flags) \ if (addr & (size - 1)) { \ *flags |= CPU_DTRACE_BADALIGN; \ @@ -410,7 +473,38 @@ static lck_mtx_t dtrace_errlock; #else #define DTRACE_ALIGNCHECK(addr, size, flags) #endif +#else /* __APPLE__ */ +#define DTRACE_ALIGNCHECK(addr, size, flags) \ + if (addr & (MIN(size,4) - 1)) { \ + *flags |= CPU_DTRACE_BADALIGN; \ + cpu_core[CPU->cpu_id].cpuc_dtrace_illval = addr; \ + return (0); \ + } +#endif /* __APPLE__ */ + +/* + * Test whether a range of memory starting at testaddr of size testsz falls + * within the range of memory described by addr, sz. We take care to avoid + * problems with overflow and underflow of the unsigned quantities, and + * disallow all negative sizes. Ranges of size 0 are allowed. + */ +#define DTRACE_INRANGE(testaddr, testsz, baseaddr, basesz) \ + ((testaddr) - (baseaddr) < (basesz) && \ + (testaddr) + (testsz) - (baseaddr) <= (basesz) && \ + (testaddr) + (testsz) >= (testaddr)) + +/* + * Test whether alloc_sz bytes will fit in the scratch region. We isolate + * alloc_sz on the righthand side of the comparison in order to avoid overflow + * or underflow in the comparison with it. This is simpler than the INRANGE + * check above, because we know that the dtms_scratch_ptr is valid in the + * range. Allocations of size zero are allowed. + */ +#define DTRACE_INSCRATCH(mstate, alloc_sz) \ + ((mstate)->dtms_scratch_base + (mstate)->dtms_scratch_size - \ + (mstate)->dtms_scratch_ptr >= (alloc_sz)) +#if !defined(__APPLE__) #define DTRACE_LOADFUNC(bits) \ /*CSTYLED*/ \ uint##bits##_t \ @@ -445,18 +539,12 @@ dtrace_load##bits(uintptr_t addr) \ rval = *((volatile uint##bits##_t *)addr); \ *flags &= ~CPU_DTRACE_NOFAULT; \ \ - return (rval); \ + return (!(*flags & CPU_DTRACE_FAULT) ? rval : 0); \ } -#else -#define DTRACE_ALIGNCHECK(addr, size, flags) \ - if (addr & (MIN(size,4) - 1)) { \ - *flags |= CPU_DTRACE_BADALIGN; \ - cpu_core[CPU->cpu_id].cpuc_dtrace_illval = addr; \ - return (0); \ - } - +#else /* __APPLE__ */ #define RECOVER_LABEL(bits) __asm__ volatile("_dtraceLoadRecover" #bits ":" ); +#if (defined(__i386__) || defined (__x86_64__)) #define DTRACE_LOADFUNC(bits) \ /*CSTYLED*/ \ extern vm_offset_t dtraceLoadRecover##bits; \ @@ -469,7 +557,6 @@ dtrace_load##bits(uintptr_t addr) \ /*CSTYLED*/ \ uint##bits##_t rval = 0; \ int i; \ - ppnum_t pp; \ volatile uint16_t *flags = (volatile uint16_t *) \ &cpu_core[CPU->cpu_id].cpuc_dtrace_flags; \ \ @@ -490,10 +577,52 @@ dtrace_load##bits(uintptr_t addr) \ return (0); \ } \ \ - pp = pmap_find_phys(kernel_pmap, addr); \ + { \ + volatile vm_offset_t recover = (vm_offset_t)&dtraceLoadRecover##bits; \ + *flags |= CPU_DTRACE_NOFAULT; \ + recover = dtrace_set_thread_recover(current_thread(), recover); \ + /*CSTYLED*/ \ + /* \ + * PR6394061 - avoid device memory that is unpredictably \ + * mapped and unmapped \ + */ \ + if (pmap_valid_page(pmap_find_phys(kernel_pmap, addr))) \ + rval = *((volatile uint##bits##_t *)addr); \ + RECOVER_LABEL(bits); \ + (void)dtrace_set_thread_recover(current_thread(), recover); \ + *flags &= ~CPU_DTRACE_NOFAULT; \ + } \ + \ + return (rval); \ +} +#else /* all other architectures */ +#define DTRACE_LOADFUNC(bits) \ +/*CSTYLED*/ \ +extern vm_offset_t dtraceLoadRecover##bits; \ +uint##bits##_t dtrace_load##bits(uintptr_t addr); \ + \ +uint##bits##_t \ +dtrace_load##bits(uintptr_t addr) \ +{ \ + size_t size = bits / NBBY; \ + /*CSTYLED*/ \ + uint##bits##_t rval = 0; \ + int i; \ + volatile uint16_t *flags = (volatile uint16_t *) \ + &cpu_core[CPU->cpu_id].cpuc_dtrace_flags; \ + \ + DTRACE_ALIGNCHECK(addr, size, flags); \ + \ + for (i = 0; i < dtrace_toxranges; i++) { \ + if (addr >= dtrace_toxrange[i].dtt_limit) \ + continue; \ + \ + if (addr + size <= dtrace_toxrange[i].dtt_base) \ + continue; \ \ - if (0 == pp || /* pmap_find_phys failed ? */ \ - !dtxnu_is_RAM_page(pp) /* Backed by RAM? */ ) { \ + /* \ + * This address falls within a toxic region; return 0. \ + */ \ *flags |= CPU_DTRACE_BADADDR; \ cpu_core[CPU->cpu_id].cpuc_dtrace_illval = addr; \ return (0); \ @@ -503,7 +632,7 @@ dtrace_load##bits(uintptr_t addr) \ volatile vm_offset_t recover = (vm_offset_t)&dtraceLoadRecover##bits; \ *flags |= CPU_DTRACE_NOFAULT; \ recover = dtrace_set_thread_recover(current_thread(), recover); \ - /*CSTYLED*/ \ + /*CSTYLED*/ \ rval = *((volatile uint##bits##_t *)addr); \ RECOVER_LABEL(bits); \ (void)dtrace_set_thread_recover(current_thread(), recover); \ @@ -512,9 +641,9 @@ dtrace_load##bits(uintptr_t addr) \ \ return (rval); \ } +#endif #endif /* __APPLE__ */ - #ifdef __LP64__ #define dtrace_loadptr dtrace_load64 #else @@ -539,12 +668,23 @@ dtrace_load##bits(uintptr_t addr) \ ((flags) & CPU_DTRACE_TUPOFLOW) ? DTRACEFLT_TUPOFLOW : \ ((flags) & CPU_DTRACE_BADALIGN) ? DTRACEFLT_BADALIGN : \ ((flags) & CPU_DTRACE_NOSCRATCH) ? DTRACEFLT_NOSCRATCH : \ + ((flags) & CPU_DTRACE_BADSTACK) ? DTRACEFLT_BADSTACK : \ DTRACEFLT_UNKNOWN) #define DTRACEACT_ISSTRING(act) \ ((act)->dta_kind == DTRACEACT_DIFEXPR && \ (act)->dta_difo->dtdo_rtype.dtdt_kind == DIF_TYPE_STRING) + +#if defined (__APPLE__) +/* Avoid compiler warnings when assigning regs[rd] = NULL */ +#ifdef NULL +#undef NULL +#define NULL (uintptr_t)0 +#endif +#endif /* __APPLE__ */ + +static size_t dtrace_strlen(const char *, size_t); static dtrace_probe_t *dtrace_probe_lookup_id(dtrace_id_t id); static void dtrace_enabling_provide(dtrace_provider_t *); static int dtrace_enabling_match(dtrace_enabling_t *, int *); @@ -675,8 +815,7 @@ dtrace_canstore_statvar(uint64_t addr, size_t sz, if (svar == NULL || svar->dtsv_size == 0) continue; - if (addr - svar->dtsv_data < svar->dtsv_size && - addr + sz <= svar->dtsv_data + svar->dtsv_size) + if (DTRACE_INRANGE(addr, sz, svar->dtsv_data, svar->dtsv_size)) return (1); } @@ -693,16 +832,11 @@ static int dtrace_canstore(uint64_t addr, size_t sz, dtrace_mstate_t *mstate, dtrace_vstate_t *vstate) { - uintptr_t a; - size_t s; - /* * First, check to see if the address is in scratch space... */ - a = mstate->dtms_scratch_base; - s = mstate->dtms_scratch_size; - - if (addr - a < s && addr + sz <= a + s) + if (DTRACE_INRANGE(addr, sz, mstate->dtms_scratch_base, + mstate->dtms_scratch_size)) return (1); /* @@ -710,10 +844,42 @@ dtrace_canstore(uint64_t addr, size_t sz, dtrace_mstate_t *mstate, * up both thread-local variables and any global dynamically-allocated * variables. */ - a = (uintptr_t)vstate->dtvs_dynvars.dtds_base; - s = vstate->dtvs_dynvars.dtds_size; - if (addr - a < s && addr + sz <= a + s) + if (DTRACE_INRANGE(addr, sz, (uintptr_t)vstate->dtvs_dynvars.dtds_base, + vstate->dtvs_dynvars.dtds_size)) { + dtrace_dstate_t *dstate = &vstate->dtvs_dynvars; + uintptr_t base = (uintptr_t)dstate->dtds_base + + (dstate->dtds_hashsize * sizeof (dtrace_dynhash_t)); + uintptr_t chunkoffs; + + /* + * Before we assume that we can store here, we need to make + * sure that it isn't in our metadata -- storing to our + * dynamic variable metadata would corrupt our state. For + * the range to not include any dynamic variable metadata, + * it must: + * + * (1) Start above the hash table that is at the base of + * the dynamic variable space + * + * (2) Have a starting chunk offset that is beyond the + * dtrace_dynvar_t that is at the base of every chunk + * + * (3) Not span a chunk boundary + * + */ + if (addr < base) + return (0); + + chunkoffs = (addr - base) % dstate->dtds_chunksize; + + if (chunkoffs < sizeof (dtrace_dynvar_t)) + return (0); + + if (chunkoffs + sz > dstate->dtds_chunksize) + return (0); + return (1); + } /* * Finally, check the static local and global variables. These checks @@ -730,6 +896,104 @@ dtrace_canstore(uint64_t addr, size_t sz, dtrace_mstate_t *mstate, return (0); } + +/* + * Convenience routine to check to see if the address is within a memory + * region in which a load may be issued given the user's privilege level; + * if not, it sets the appropriate error flags and loads 'addr' into the + * illegal value slot. + * + * DTrace subroutines (DIF_SUBR_*) should use this helper to implement + * appropriate memory access protection. + */ +static int +dtrace_canload(uint64_t addr, size_t sz, dtrace_mstate_t *mstate, + dtrace_vstate_t *vstate) +{ +#if !defined(__APPLE__) /* Quiet compiler warning - matches dtrace_dif_emulate */ + volatile uintptr_t *illval = &cpu_core[CPU->cpu_id].cpuc_dtrace_illval; +#else + volatile uint64_t *illval = &cpu_core[CPU->cpu_id].cpuc_dtrace_illval; +#endif /* __APPLE */ + + /* + * If we hold the privilege to read from kernel memory, then + * everything is readable. + */ + if ((mstate->dtms_access & DTRACE_ACCESS_KERNEL) != 0) + return (1); + + /* + * You can obviously read that which you can store. + */ + if (dtrace_canstore(addr, sz, mstate, vstate)) + return (1); + + /* + * We're allowed to read from our own string table. + */ + if (DTRACE_INRANGE(addr, sz, (uintptr_t)mstate->dtms_difo->dtdo_strtab, + mstate->dtms_difo->dtdo_strlen)) + return (1); + + DTRACE_CPUFLAG_SET(CPU_DTRACE_KPRIV); + *illval = addr; + return (0); +} + +/* + * Convenience routine to check to see if a given string is within a memory + * region in which a load may be issued given the user's privilege level; + * this exists so that we don't need to issue unnecessary dtrace_strlen() + * calls in the event that the user has all privileges. + */ +static int +dtrace_strcanload(uint64_t addr, size_t sz, dtrace_mstate_t *mstate, + dtrace_vstate_t *vstate) +{ + size_t strsz; + + /* + * If we hold the privilege to read from kernel memory, then + * everything is readable. + */ + if ((mstate->dtms_access & DTRACE_ACCESS_KERNEL) != 0) + return (1); + + strsz = 1 + dtrace_strlen((char *)(uintptr_t)addr, sz); + if (dtrace_canload(addr, strsz, mstate, vstate)) + return (1); + + return (0); +} + +/* + * Convenience routine to check to see if a given variable is within a memory + * region in which a load may be issued given the user's privilege level. + */ +static int +dtrace_vcanload(void *src, dtrace_diftype_t *type, dtrace_mstate_t *mstate, + dtrace_vstate_t *vstate) +{ + size_t sz; + ASSERT(type->dtdt_flags & DIF_TF_BYREF); + + /* + * If we hold the privilege to read from kernel memory, then + * everything is readable. + */ + if ((mstate->dtms_access & DTRACE_ACCESS_KERNEL) != 0) + return (1); + + if (type->dtdt_kind == DIF_TYPE_STRING) + sz = dtrace_strlen(src, + vstate->dtvs_state->dts_options[DTRACEOPT_STRSIZE]) + 1; + else + sz = type->dtdt_size; + + return (dtrace_canload((uintptr_t)src, sz, mstate, vstate)); +} + /* * Compare two strings using safe loads. */ @@ -745,15 +1009,17 @@ dtrace_strncmp(char *s1, char *s2, size_t limit) flags = (volatile uint16_t *)&cpu_core[CPU->cpu_id].cpuc_dtrace_flags; do { - if (s1 == NULL) + if (s1 == NULL) { c1 = '\0'; - else + } else { c1 = dtrace_load8((uintptr_t)s1++); + } - if (s2 == NULL) + if (s2 == NULL) { c2 = '\0'; - else + } else { c2 = dtrace_load8((uintptr_t)s2++); + } if (c1 != c2) return (c1 - c2); @@ -771,9 +1037,10 @@ dtrace_strlen(const char *s, size_t lim) { uint_t len; - for (len = 0; len != lim; len++) + for (len = 0; len != lim; len++) { if (dtrace_load8((uintptr_t)s++) == '\0') break; + } return (len); } @@ -866,11 +1133,12 @@ dtrace_vcopy(void *src, void *dst, dtrace_diftype_t *type) { ASSERT(type->dtdt_flags & DIF_TF_BYREF); - if (type->dtdt_kind == DIF_TYPE_STRING) + if (type->dtdt_kind == DIF_TYPE_STRING) { dtrace_strcpy(src, dst, type->dtdt_size); - else + } else { dtrace_bcopy(src, dst, type->dtdt_size); } +} /* * Compare s1 to s2 using safe memory accesses. The s1 data is assumed to be @@ -915,6 +1183,93 @@ dtrace_bzero(void *dst, size_t len) *cp++ = 0; } +static void +dtrace_add_128(uint64_t *addend1, uint64_t *addend2, uint64_t *sum) +{ + uint64_t result[2]; + + result[0] = addend1[0] + addend2[0]; + result[1] = addend1[1] + addend2[1] + + (result[0] < addend1[0] || result[0] < addend2[0] ? 1 : 0); + + sum[0] = result[0]; + sum[1] = result[1]; +} + +/* + * Shift the 128-bit value in a by b. If b is positive, shift left. + * If b is negative, shift right. + */ +static void +dtrace_shift_128(uint64_t *a, int b) +{ + uint64_t mask; + + if (b == 0) + return; + + if (b < 0) { + b = -b; + if (b >= 64) { + a[0] = a[1] >> (b - 64); + a[1] = 0; + } else { + a[0] >>= b; + mask = 1LL << (64 - b); + mask -= 1; + a[0] |= ((a[1] & mask) << (64 - b)); + a[1] >>= b; + } + } else { + if (b >= 64) { + a[1] = a[0] << (b - 64); + a[0] = 0; + } else { + a[1] <<= b; + mask = a[0] >> (64 - b); + a[1] |= mask; + a[0] <<= b; + } + } +} + +/* + * The basic idea is to break the 2 64-bit values into 4 32-bit values, + * use native multiplication on those, and then re-combine into the + * resulting 128-bit value. + * + * (hi1 << 32 + lo1) * (hi2 << 32 + lo2) = + * hi1 * hi2 << 64 + + * hi1 * lo2 << 32 + + * hi2 * lo1 << 32 + + * lo1 * lo2 + */ +static void +dtrace_multiply_128(uint64_t factor1, uint64_t factor2, uint64_t *product) +{ + uint64_t hi1, hi2, lo1, lo2; + uint64_t tmp[2]; + + hi1 = factor1 >> 32; + hi2 = factor2 >> 32; + + lo1 = factor1 & DT_MASK_LO; + lo2 = factor2 & DT_MASK_LO; + + product[0] = lo1 * lo2; + product[1] = hi1 * hi2; + + tmp[0] = hi1 * lo2; + tmp[1] = 0; + dtrace_shift_128(tmp, 32); + dtrace_add_128(product, tmp, product); + + tmp[0] = hi2 * lo1; + tmp[1] = 0; + dtrace_shift_128(tmp, 32); + dtrace_add_128(product, tmp, product); +} + /* * This privilege check should be used by actions and subroutines to * verify that the user credentials of the process that enabled the @@ -956,6 +1311,7 @@ static int dtrace_priv_proc_common_zone(dtrace_state_t *state) { cred_t *cr, *s_cr = state->dts_cred.dcr_cred; +#pragma unused(cr, s_cr) /* __APPLE__ */ /* * We should always have a non-NULL state cred here, since if cred @@ -1116,7 +1472,7 @@ dtrace_priv_kernel_destructive(dtrace_state_t *state) * clean the dirty dynamic variable lists on all CPUs. Dynamic variable * cleaning is explained in detail in . */ -#if defined(__APPLE__) +#if defined(__APPLE__) /* Quiet compiler warning. */ static #endif /* __APPLE__ */ void @@ -1211,12 +1567,13 @@ dtrace_dynvar_clean(dtrace_dstate_t *dstate) * variable can be allocated. If NULL is returned, the appropriate counter * will be incremented. */ -#if defined(__APPLE__) +#if defined(__APPLE__) /* Quiet compiler warning. */ static #endif /* __APPLE__ */ dtrace_dynvar_t * dtrace_dynvar(dtrace_dstate_t *dstate, uint_t nkeys, - dtrace_key_t *key, size_t dsize, dtrace_dynvar_op_t op) + dtrace_key_t *key, size_t dsize, dtrace_dynvar_op_t op, + dtrace_mstate_t *mstate, dtrace_vstate_t *vstate) { uint64_t hashval = DTRACE_DYNHASH_VALID; dtrace_dynhash_t *hash = dstate->dtds_hash; @@ -1268,6 +1625,9 @@ dtrace_dynvar(dtrace_dstate_t *dstate, uint_t nkeys, uint64_t j, size = key[i].dttk_size; uintptr_t base = (uintptr_t)key[i].dttk_value; + if (!dtrace_canload(base, size, mstate, vstate)) + break; + for (j = 0; j < size; j++) { hashval += dtrace_load8(base + j); hashval += (hashval << 10); @@ -1276,6 +1636,9 @@ dtrace_dynvar(dtrace_dstate_t *dstate, uint_t nkeys, } } + if (DTRACE_CPUFLAG_ISSET(CPU_DTRACE_FAULT)) + return (NULL); + hashval += (hashval << 3); hashval ^= (hashval >> 11); hashval += (hashval << 15); @@ -1306,9 +1669,15 @@ dtrace_dynvar(dtrace_dstate_t *dstate, uint_t nkeys, while ((lock = *lockp) & 1) continue; +#if !defined(__APPLE__) /* Quiet compiler warning */ if (dtrace_casptr((void *)lockp, (void *)lock, (void *)(lock + 1)) == (void *)lock) break; +#else + if (dtrace_casptr((void *)(uintptr_t)lockp, + (void *)lock, (void *)(lock + 1)) == (void *)lock) + break; +#endif /* __APPLE__ */ } dtrace_membar_producer(); @@ -1662,15 +2031,15 @@ dtrace_dynvar(dtrace_dstate_t *dstate, uint_t nkeys, dvar->dtdv_next = free; } while (dtrace_casptr(&dcpu->dtdsc_dirty, free, dvar) != free); - return (dtrace_dynvar(dstate, nkeys, key, dsize, op)); + return (dtrace_dynvar(dstate, nkeys, key, dsize, op, mstate, vstate)); } /*ARGSUSED*/ static void dtrace_aggregate_min(uint64_t *oval, uint64_t nval, uint64_t arg) { -#pragma unused(arg) - if (nval < *oval) +#pragma unused(arg) /* __APPLE__ */ + if ((int64_t)nval < (int64_t)*oval) *oval = nval; } @@ -1678,8 +2047,8 @@ dtrace_aggregate_min(uint64_t *oval, uint64_t nval, uint64_t arg) static void dtrace_aggregate_max(uint64_t *oval, uint64_t nval, uint64_t arg) { -#pragma unused(arg) - if (nval > *oval) +#pragma unused(arg) /* __APPLE__ */ + if ((int64_t)nval > (int64_t)*oval) *oval = nval; } @@ -1748,24 +2117,50 @@ dtrace_aggregate_lquantize(uint64_t *lquanta, uint64_t nval, uint64_t incr) static void dtrace_aggregate_avg(uint64_t *data, uint64_t nval, uint64_t arg) { -#pragma unused(arg) +#pragma unused(arg) /* __APPLE__ */ data[0]++; data[1] += nval; } /*ARGSUSED*/ static void -dtrace_aggregate_count(uint64_t *oval, uint64_t nval, uint64_t arg) +dtrace_aggregate_stddev(uint64_t *data, uint64_t nval, uint64_t arg) { -#pragma unused(nval,arg) - *oval = *oval + 1; +#pragma unused(arg) /* __APPLE__ */ + int64_t snval = (int64_t)nval; + uint64_t tmp[2]; + + data[0]++; + data[1] += nval; + + /* + * What we want to say here is: + * + * data[2] += nval * nval; + * + * But given that nval is 64-bit, we could easily overflow, so + * we do this as 128-bit arithmetic. + */ + if (snval < 0) + snval = -snval; + + dtrace_multiply_128((uint64_t)snval, (uint64_t)snval, tmp); + dtrace_add_128(data + 2, tmp, data + 2); } /*ARGSUSED*/ static void -dtrace_aggregate_sum(uint64_t *oval, uint64_t nval, uint64_t arg) +dtrace_aggregate_count(uint64_t *oval, uint64_t nval, uint64_t arg) { -#pragma unused(arg) +#pragma unused(nval, arg) /* __APPLE__ */ + *oval = *oval + 1; +} + +/*ARGSUSED*/ +static void +dtrace_aggregate_sum(uint64_t *oval, uint64_t nval, uint64_t arg) +{ +#pragma unused(arg) /* __APPLE__ */ *oval += nval; } @@ -2084,17 +2479,28 @@ dtrace_speculation_commit(dtrace_state_t *state, processorid_t cpu, dtrace_speculation_t *spec; dtrace_buffer_t *src, *dest; uintptr_t daddr, saddr, dlimit; +#if !defined(__APPLE__) /* Quiet compiler warning */ dtrace_speculation_state_t current, new; +#else + dtrace_speculation_state_t current, new = DTRACESPEC_INACTIVE; +#endif /* __APPLE__ */ intptr_t offs; if (which == 0) return; +#if !defined(__APPLE__) /* Quiet compiler warning */ if (which > state->dts_nspeculations) { cpu_core[cpu].cpuc_dtrace_flags |= CPU_DTRACE_ILLOP; return; } - +#else + if (which > (dtrace_specid_t)state->dts_nspeculations) { + cpu_core[cpu].cpuc_dtrace_flags |= CPU_DTRACE_ILLOP; + return; + } +#endif /* __APPLE__ */ + spec = &state->dts_speculations[which - 1]; src = &spec->dtsp_buffer[cpu]; dest = &state->dts_buffer[cpu]; @@ -2200,6 +2606,7 @@ dtrace_speculation_commit(dtrace_state_t *state, processorid_t cpu, (current == DTRACESPEC_ACTIVEONE && new == DTRACESPEC_COMMITTING)) { uint32_t rval = dtrace_cas32((uint32_t *)&spec->dtsp_state, DTRACESPEC_COMMITTING, DTRACESPEC_INACTIVE); +#pragma unused(rval) /* __APPLE__ */ ASSERT(rval == DTRACESPEC_COMMITTING); } @@ -2220,16 +2627,27 @@ dtrace_speculation_discard(dtrace_state_t *state, processorid_t cpu, dtrace_specid_t which) { dtrace_speculation_t *spec; +#if !defined(__APPLE__) /* Quiet compiler warning */ dtrace_speculation_state_t current, new; +#else + dtrace_speculation_state_t current, new = DTRACESPEC_INACTIVE; +#endif /* __APPLE__ */ dtrace_buffer_t *buf; if (which == 0) return; +#if !defined(__APPLE__) /* Quiet compiler warning */ if (which > state->dts_nspeculations) { cpu_core[cpu].cpuc_dtrace_flags |= CPU_DTRACE_ILLOP; return; } +#else + if (which > (dtrace_specid_t)state->dts_nspeculations) { + cpu_core[cpu].cpuc_dtrace_flags |= CPU_DTRACE_ILLOP; + return; + } +#endif /* __APPLE__ */ spec = &state->dts_speculations[which - 1]; buf = &spec->dtsp_buffer[cpu]; @@ -2289,7 +2707,11 @@ dtrace_speculation_clean_here(dtrace_state_t *state) return; } +#if !defined(__APPLE__) /* Quiet compiler warning */ for (i = 0; i < state->dts_nspeculations; i++) { +#else + for (i = 0; i < (dtrace_specid_t)state->dts_nspeculations; i++) { +#endif /* __APPLE__ */ dtrace_speculation_t *spec = &state->dts_speculations[i]; dtrace_buffer_t *src = &spec->dtsp_buffer[cpu]; @@ -2324,10 +2746,19 @@ dtrace_speculation_clean_here(dtrace_state_t *state) static void dtrace_speculation_clean(dtrace_state_t *state) { +#if !defined(__APPLE__) /* Quiet compiler warning */ int work = 0, rv; +#else + int work = 0; + uint32_t rv; +#endif /* __APPLE__ */ dtrace_specid_t i; +#if !defined(__APPLE__) /* Quiet compiler warning */ for (i = 0; i < state->dts_nspeculations; i++) { +#else + for (i = 0; i < (dtrace_specid_t)state->dts_nspeculations; i++) { +#endif /* __APPLE__ */ dtrace_speculation_t *spec = &state->dts_speculations[i]; ASSERT(!spec->dtsp_cleaning); @@ -2351,7 +2782,11 @@ dtrace_speculation_clean(dtrace_state_t *state) * speculation buffers, as appropriate. We can now set the state * to inactive. */ +#if !defined(__APPLE__) /* Quiet compiler warning */ for (i = 0; i < state->dts_nspeculations; i++) { +#else + for (i = 0; i < (dtrace_specid_t)state->dts_nspeculations; i++) { +#endif /* __APPLE__ */ dtrace_speculation_t *spec = &state->dts_speculations[i]; dtrace_speculation_state_t current, new; @@ -2382,13 +2817,21 @@ dtrace_speculation_buffer(dtrace_state_t *state, processorid_t cpuid, dtrace_specid_t which) { dtrace_speculation_t *spec; +#if !defined(__APPLE__) /* Quiet compiler warning */ dtrace_speculation_state_t current, new; +#else + dtrace_speculation_state_t current, new = DTRACESPEC_INACTIVE; +#endif /* __APPLE__ */ dtrace_buffer_t *buf; if (which == 0) return (NULL); +#if !defined(__APPLE__) /* Quiet compiler warning */ if (which > state->dts_nspeculations) { +#else + if (which > (dtrace_specid_t)state->dts_nspeculations) { +#endif /* __APPLE__ */ cpu_core[cpuid].cpuc_dtrace_flags |= CPU_DTRACE_ILLOP; return (NULL); } @@ -2441,6 +2884,53 @@ dtrace_speculation_buffer(dtrace_state_t *state, processorid_t cpuid, return (buf); } +/* + * Return a string. In the event that the user lacks the privilege to access + * arbitrary kernel memory, we copy the string out to scratch memory so that we + * don't fail access checking. + * + * dtrace_dif_variable() uses this routine as a helper for various + * builtin values such as 'execname' and 'probefunc.' + */ +#if defined(__APPLE__) /* Quiet compiler warning. */ +static +#endif /* __APPLE__ */ +uintptr_t +dtrace_dif_varstr(uintptr_t addr, dtrace_state_t *state, + dtrace_mstate_t *mstate) +{ + uint64_t size = state->dts_options[DTRACEOPT_STRSIZE]; + uintptr_t ret; + size_t strsz; + + /* + * The easy case: this probe is allowed to read all of memory, so + * we can just return this as a vanilla pointer. + */ + if ((mstate->dtms_access & DTRACE_ACCESS_KERNEL) != 0) + return (addr); + + /* + * This is the tougher case: we copy the string in question from + * kernel memory into scratch memory and return it that way: this + * ensures that we won't trip up when access checking tests the + * BYREF return value. + */ + strsz = dtrace_strlen((char *)addr, size) + 1; + + if (mstate->dtms_scratch_ptr + strsz > + mstate->dtms_scratch_base + mstate->dtms_scratch_size) { + DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH); + return (NULL); + } + + dtrace_strcpy((const void *)addr, (void *)mstate->dtms_scratch_ptr, + strsz); + ret = mstate->dtms_scratch_ptr; + mstate->dtms_scratch_ptr += strsz; + return (ret); +} + /* * This function implements the DIF emulator's variable lookups. The emulator * passes a reserved variable identifier and optional built-in array index. @@ -2478,9 +2968,9 @@ dtrace_dif_variable(dtrace_mstate_t *mstate, dtrace_state_t *state, uint64_t v, mstate->dtms_probe->dtpr_id, mstate->dtms_probe->dtpr_arg, ndx, aframes); #if defined(__APPLE__) - /* Special case access of arg5 as passed to dtrace_probeid_error (which see.) */ + /* Special case access of arg5 as passed to dtrace_probe_error() (which see.) */ else if (mstate->dtms_probe->dtpr_id == dtrace_probeid_error && ndx == 5) { - return ((dtrace_state_t *)(mstate->dtms_arg[0]))->dts_arg_error_illval; + return ((dtrace_state_t *)(uintptr_t)(mstate->dtms_arg[0]))->dts_arg_error_illval; } #endif /* __APPLE__ */ else @@ -2649,7 +3139,11 @@ dtrace_dif_variable(dtrace_mstate_t *mstate, dtrace_state_t *state, uint64_t v, (uint32_t *)(uintptr_t)mstate->dtms_arg[0]); mstate->dtms_caller = caller[1]; } else if ((mstate->dtms_caller = +#if !defined(__APPLE__) /* Quiet compiler warnings */ dtrace_caller(aframes)) == -1) { +#else + dtrace_caller(aframes)) == (uintptr_t)-1) { +#endif /* __APPLE__ */ /* * We have failed to do this the quick way; * we must resort to the slower approach of @@ -2680,7 +3174,9 @@ dtrace_dif_variable(dtrace_mstate_t *mstate, dtrace_state_t *state, uint64_t v, * we're after. */ ustack[2] = NULL; + DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT); dtrace_getupcstack(ustack, 3); + DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT); mstate->dtms_ucaller = ustack[2]; mstate->dtms_present |= DTRACE_MSTATE_UCALLER; } @@ -2689,23 +3185,27 @@ dtrace_dif_variable(dtrace_mstate_t *mstate, dtrace_state_t *state, uint64_t v, case DIF_VAR_PROBEPROV: ASSERT(mstate->dtms_present & DTRACE_MSTATE_PROBE); - return ((uint64_t)(uintptr_t) - mstate->dtms_probe->dtpr_provider->dtpv_name); + return (dtrace_dif_varstr( + (uintptr_t)mstate->dtms_probe->dtpr_provider->dtpv_name, + state, mstate)); case DIF_VAR_PROBEMOD: ASSERT(mstate->dtms_present & DTRACE_MSTATE_PROBE); - return ((uint64_t)(uintptr_t) - mstate->dtms_probe->dtpr_mod); + return (dtrace_dif_varstr( + (uintptr_t)mstate->dtms_probe->dtpr_mod, + state, mstate)); case DIF_VAR_PROBEFUNC: ASSERT(mstate->dtms_present & DTRACE_MSTATE_PROBE); - return ((uint64_t)(uintptr_t) - mstate->dtms_probe->dtpr_func); + return (dtrace_dif_varstr( + (uintptr_t)mstate->dtms_probe->dtpr_func, + state, mstate)); case DIF_VAR_PROBENAME: ASSERT(mstate->dtms_present & DTRACE_MSTATE_PROBE); - return ((uint64_t)(uintptr_t) - mstate->dtms_probe->dtpr_name); + return (dtrace_dif_varstr( + (uintptr_t)mstate->dtms_probe->dtpr_name, + state, mstate)); #if !defined(__APPLE__) case DIF_VAR_PID: @@ -2758,6 +3258,12 @@ dtrace_dif_variable(dtrace_mstate_t *mstate, dtrace_state_t *state, uint64_t v, if (DTRACE_ANCHORED(mstate->dtms_probe) && CPU_ON_INTR(CPU)) return (pid0.pid_id); + /* + * It is always safe to dereference one's own t_procp pointer: + * it always points to a valid, allocated proc structure. + * (This is true because threads don't clean up their own + * state -- they leave that task to whomever reaps them.) + */ return ((uint64_t)curthread->t_procp->p_ppid); #else case DIF_VAR_PPID: @@ -2770,7 +3276,7 @@ dtrace_dif_variable(dtrace_mstate_t *mstate, dtrace_state_t *state, uint64_t v, if (DTRACE_ANCHORED(mstate->dtms_probe) && CPU_ON_INTR(CPU)) return (0); - return ((uint64_t)(uintptr_t)(current_proc()->p_ppid)); + return ((uint64_t)proc_selfppid()); #endif /* __APPLE__ */ #if !defined(__APPLE__) @@ -2784,13 +3290,22 @@ dtrace_dif_variable(dtrace_mstate_t *mstate, dtrace_state_t *state, uint64_t v, return ((uint64_t)curthread->t_tid); #else case DIF_VAR_TID: - /* - * See comment in DIF_VAR_PID. - */ - if (DTRACE_ANCHORED(mstate->dtms_probe) && CPU_ON_INTR(CPU)) + /* We do not need to check for null current_thread() */ + return thread_tid(current_thread()); /* globally unique */ + + case DIF_VAR_PTHREAD_SELF: + if (!dtrace_priv_proc(state)) + return (0); + + /* Not currently supported, but we should be able to delta the dispatchqaddr and dispatchqoffset to get pthread_self */ + return 0; + + case DIF_VAR_DISPATCHQADDR: + if (!dtrace_priv_proc(state)) return (0); - return ((uint64_t)(uintptr_t)current_thread()); /* Is user's (pthread_t)t->kernel_thread */ + /* We do not need to check for null current_thread() */ + return thread_dispatchqaddr(current_thread()); #endif /* __APPLE__ */ #if !defined(__APPLE__) @@ -2810,8 +3325,9 @@ dtrace_dif_variable(dtrace_mstate_t *mstate, dtrace_state_t *state, uint64_t v, * (This is true because threads don't clean up their own * state -- they leave that task to whomever reaps them.) */ - return ((uint64_t)(uintptr_t) - curthread->t_procp->p_user.u_comm); + return (dtrace_dif_varstr( + (uintptr_t)curthread->t_procp->p_user.u_comm, + state, mstate)); #else case DIF_VAR_EXECNAME: { @@ -2819,9 +3335,10 @@ dtrace_dif_variable(dtrace_mstate_t *mstate, dtrace_state_t *state, uint64_t v, size_t scratch_size = MAXCOMLEN+1; /* The scratch allocation's lifetime is that of the clause. */ - if (mstate->dtms_scratch_ptr + scratch_size > - mstate->dtms_scratch_base + mstate->dtms_scratch_size) + if (!DTRACE_INSCRATCH(mstate, scratch_size)) { + DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH); return 0; + } if (!dtrace_priv_proc_relaxed(state)) return (0); @@ -2849,14 +3366,16 @@ dtrace_dif_variable(dtrace_mstate_t *mstate, dtrace_state_t *state, uint64_t v, * (This is true because threads don't clean up their own * state -- they leave that task to whomever reaps them.) */ - return ((uint64_t)(uintptr_t) - curthread->t_procp->p_zone->zone_name); + return (dtrace_dif_varstr( + (uintptr_t)curthread->t_procp->p_zone->zone_name, + state, mstate)); #else case DIF_VAR_ZONENAME: if (!dtrace_priv_proc(state)) return (0); + /* FIXME: return e.g. "global" allocated from scratch a la execname. */ return ((uint64_t)(uintptr_t)NULL); /* Darwin doesn't do "zones" */ #endif /* __APPLE__ */ @@ -2871,7 +3390,16 @@ dtrace_dif_variable(dtrace_mstate_t *mstate, dtrace_state_t *state, uint64_t v, if (DTRACE_ANCHORED(mstate->dtms_probe) && CPU_ON_INTR(CPU)) return ((uint64_t)p0.p_cred->cr_uid); - return ((uint64_t)curthread->t_cred->cr_uid); + /* + * It is always safe to dereference one's own t_procp pointer: + * it always points to a valid, allocated proc structure. + * (This is true because threads don't clean up their own + * state -- they leave that task to whomever reaps them.) + * + * Additionally, it is safe to dereference one's own process + * credential, since this is never NULL after process birth. + */ + return ((uint64_t)curthread->t_procp->p_cred->cr_uid); #else case DIF_VAR_UID: if (!dtrace_priv_proc(state)) @@ -2884,9 +3412,13 @@ dtrace_dif_variable(dtrace_mstate_t *mstate, dtrace_state_t *state, uint64_t v, return (0); if (dtrace_CRED() != NULL) + /* Credential does not require lazy initialization. */ return ((uint64_t)kauth_getuid()); - else - return -1LL; + else { + /* proc_lock would be taken under kauth_cred_proc_ref() in kauth_cred_get(). */ + DTRACE_CPUFLAG_SET(CPU_DTRACE_ILLOP); + return -1ULL; + } #endif /* __APPLE__ */ #if !defined(__APPLE__) @@ -2900,7 +3432,16 @@ dtrace_dif_variable(dtrace_mstate_t *mstate, dtrace_state_t *state, uint64_t v, if (DTRACE_ANCHORED(mstate->dtms_probe) && CPU_ON_INTR(CPU)) return ((uint64_t)p0.p_cred->cr_gid); - return ((uint64_t)curthread->t_cred->cr_gid); + /* + * It is always safe to dereference one's own t_procp pointer: + * it always points to a valid, allocated proc structure. + * (This is true because threads don't clean up their own + * state -- they leave that task to whomever reaps them.) + * + * Additionally, it is safe to dereference one's own process + * credential, since this is never NULL after process birth. + */ + return ((uint64_t)curthread->t_procp->p_cred->cr_gid); #else case DIF_VAR_GID: if (!dtrace_priv_proc(state)) @@ -2913,9 +3454,13 @@ dtrace_dif_variable(dtrace_mstate_t *mstate, dtrace_state_t *state, uint64_t v, return (0); if (dtrace_CRED() != NULL) + /* Credential does not require lazy initialization. */ return ((uint64_t)kauth_getgid()); - else - return -1LL; + else { + /* proc_lock would be taken under kauth_cred_proc_ref() in kauth_cred_get(). */ + DTRACE_CPUFLAG_SET(CPU_DTRACE_ILLOP); + return -1ULL; + } #endif /* __APPLE__ */ #if !defined(__APPLE__) @@ -2930,6 +3475,12 @@ dtrace_dif_variable(dtrace_mstate_t *mstate, dtrace_state_t *state, uint64_t v, if (DTRACE_ANCHORED(mstate->dtms_probe) && CPU_ON_INTR(CPU)) return (0); + /* + * It is always safe to dereference one's own t_lwp pointer in + * the event that this pointer is non-NULL. (This is true + * because threads and lwps don't clean up their own state -- + * they leave that task to whomever reaps them.) + */ if ((lwp = curthread->t_lwp) == NULL) return (0); @@ -2947,7 +3498,12 @@ dtrace_dif_variable(dtrace_mstate_t *mstate, dtrace_state_t *state, uint64_t v, if (DTRACE_ANCHORED(mstate->dtms_probe) && CPU_ON_INTR(CPU)) return (0); - return (uthread ? uthread->t_dtrace_errno : -1); + if (uthread) + return (uint64_t)uthread->t_dtrace_errno; + else { + DTRACE_CPUFLAG_SET(CPU_DTRACE_ILLOP); + return -1ULL; + } } #endif /* __APPLE__ */ @@ -2975,6 +3531,7 @@ dtrace_dif_subr(uint_t subr, uint_t rd, uint64_t *regs, #else volatile uint64_t *illval = &cpu_core[CPU->cpu_id].cpuc_dtrace_illval; #endif /* __APPLE__ */ + dtrace_vstate_t *vstate = &state->dts_vstate; #if !defined(__APPLE__) union { @@ -2987,7 +3544,7 @@ dtrace_dif_subr(uint_t subr, uint_t rd, uint64_t *regs, uintptr_t rw; } r; #else -/* XXX awaits lock/mutex work */ +/* FIXME: awaits lock/mutex work */ #endif /* __APPLE__ */ switch (subr) { @@ -2997,6 +3554,12 @@ dtrace_dif_subr(uint_t subr, uint_t rd, uint64_t *regs, #if !defined(__APPLE__) case DIF_SUBR_MUTEX_OWNED: + if (!dtrace_canload(tupregs[0].dttk_value, sizeof (kmutex_t), + mstate, vstate)) { + regs[rd] = NULL; + break; + } + m.mx = dtrace_load64(tupregs[0].dttk_value); if (MUTEX_TYPE_ADAPTIVE(&m.mi)) regs[rd] = MUTEX_OWNER(&m.mi) != MUTEX_NO_OWNER; @@ -3005,6 +3568,12 @@ dtrace_dif_subr(uint_t subr, uint_t rd, uint64_t *regs, break; case DIF_SUBR_MUTEX_OWNER: + if (!dtrace_canload(tupregs[0].dttk_value, sizeof (kmutex_t), + mstate, vstate)) { + regs[rd] = NULL; + break; + } + m.mx = dtrace_load64(tupregs[0].dttk_value); if (MUTEX_TYPE_ADAPTIVE(&m.mi) && MUTEX_OWNER(&m.mi) != MUTEX_NO_OWNER) @@ -3014,11 +3583,23 @@ dtrace_dif_subr(uint_t subr, uint_t rd, uint64_t *regs, break; case DIF_SUBR_MUTEX_TYPE_ADAPTIVE: + if (!dtrace_canload(tupregs[0].dttk_value, sizeof (kmutex_t), + mstate, vstate)) { + regs[rd] = NULL; + break; + } + m.mx = dtrace_load64(tupregs[0].dttk_value); regs[rd] = MUTEX_TYPE_ADAPTIVE(&m.mi); break; case DIF_SUBR_MUTEX_TYPE_SPIN: + if (!dtrace_canload(tupregs[0].dttk_value, sizeof (kmutex_t), + mstate, vstate)) { + regs[rd] = NULL; + break; + } + m.mx = dtrace_load64(tupregs[0].dttk_value); regs[rd] = MUTEX_TYPE_SPIN(&m.mi); break; @@ -3026,22 +3607,40 @@ dtrace_dif_subr(uint_t subr, uint_t rd, uint64_t *regs, case DIF_SUBR_RW_READ_HELD: { uintptr_t tmp; + if (!dtrace_canload(tupregs[0].dttk_value, sizeof (uintptr_t), + mstate, vstate)) { + regs[rd] = NULL; + break; + } + r.rw = dtrace_loadptr(tupregs[0].dttk_value); regs[rd] = _RW_READ_HELD(&r.ri, tmp); break; } case DIF_SUBR_RW_WRITE_HELD: + if (!dtrace_canload(tupregs[0].dttk_value, sizeof (krwlock_t), + mstate, vstate)) { + regs[rd] = NULL; + break; + } + r.rw = dtrace_loadptr(tupregs[0].dttk_value); regs[rd] = _RW_WRITE_HELD(&r.ri); break; case DIF_SUBR_RW_ISWRITER: + if (!dtrace_canload(tupregs[0].dttk_value, sizeof (krwlock_t), + mstate, vstate)) { + regs[rd] = NULL; + break; + } + r.rw = dtrace_loadptr(tupregs[0].dttk_value); regs[rd] = _RW_ISWRITER(&r.ri); break; #else -/* XXX awaits lock/mutex work */ +/* FIXME: awaits lock/mutex work */ #endif /* __APPLE__ */ case DIF_SUBR_BCOPY: { @@ -3059,6 +3658,11 @@ dtrace_dif_subr(uint_t subr, uint_t rd, uint64_t *regs, break; } + if (!dtrace_canload(src, size, mstate, vstate)) { + regs[rd] = NULL; + break; + } + dtrace_bcopy((void *)src, (void *)dest, size); break; } @@ -3075,8 +3679,13 @@ dtrace_dif_subr(uint_t subr, uint_t rd, uint64_t *regs, * probes will not activate in user contexts to which the * enabling user does not have permissions. */ - if (mstate->dtms_scratch_ptr + scratch_size > - mstate->dtms_scratch_base + mstate->dtms_scratch_size) { + + /* + * Rounding up the user allocation size could have overflowed + * a large, bogus allocation (like -1ULL) to 0. + */ + if (scratch_size < size || + !DTRACE_INSCRATCH(mstate, scratch_size)) { DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH); regs[rd] = NULL; break; @@ -3085,10 +3694,10 @@ dtrace_dif_subr(uint_t subr, uint_t rd, uint64_t *regs, if (subr == DIF_SUBR_COPYIN) { DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT); #if !defined(__APPLE__) - dtrace_copyin(tupregs[0].dttk_value, dest, size); + dtrace_copyin(tupregs[0].dttk_value, dest, size, flags); #else if (dtrace_priv_proc(state)) - dtrace_copyin(tupregs[0].dttk_value, dest, size); + dtrace_copyin(tupregs[0].dttk_value, dest, size, flags); #endif /* __APPLE__ */ DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT); } @@ -3115,10 +3724,10 @@ dtrace_dif_subr(uint_t subr, uint_t rd, uint64_t *regs, DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT); #if !defined(__APPLE__) - dtrace_copyin(tupregs[0].dttk_value, dest, size); + dtrace_copyin(tupregs[0].dttk_value, dest, size, flags); #else if (dtrace_priv_proc(state)) - dtrace_copyin(tupregs[0].dttk_value, dest, size); + dtrace_copyin(tupregs[0].dttk_value, dest, size, flags); #endif /* __APPLE__ */ DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT); break; @@ -3136,8 +3745,7 @@ dtrace_dif_subr(uint_t subr, uint_t rd, uint64_t *regs, * probes will not activate in user contexts to which the * enabling user does not have permissions. */ - if (mstate->dtms_scratch_ptr + size > - mstate->dtms_scratch_base + mstate->dtms_scratch_size) { + if (!DTRACE_INSCRATCH(mstate, size)) { DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH); regs[rd] = NULL; break; @@ -3145,10 +3753,10 @@ dtrace_dif_subr(uint_t subr, uint_t rd, uint64_t *regs, DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT); #if !defined(__APPLE__) - dtrace_copyinstr(tupregs[0].dttk_value, dest, size); + dtrace_copyinstr(tupregs[0].dttk_value, dest, size, flags); #else if (dtrace_priv_proc(state)) - dtrace_copyinstr(tupregs[0].dttk_value, dest, size); + dtrace_copyinstr(tupregs[0].dttk_value, dest, size, flags); #endif /* __APPLE__ */ DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT); @@ -3167,6 +3775,13 @@ dtrace_dif_subr(uint_t subr, uint_t rd, uint64_t *regs, int cont = 0; while (baddr != NULL && !(*flags & CPU_DTRACE_FAULT)) { + + if (!dtrace_canload(baddr, sizeof (mblk_t), mstate, + vstate)) { + regs[rd] = NULL; + break; + } + wptr = dtrace_loadptr(baddr + offsetof(mblk_t, b_wptr)); @@ -3213,6 +3828,7 @@ dtrace_dif_subr(uint_t subr, uint_t rd, uint64_t *regs, case DIF_SUBR_MSGSIZE: case DIF_SUBR_MSGDSIZE: { /* Darwin does not implement SysV streams messages */ + DTRACE_CPUFLAG_SET(CPU_DTRACE_ILLOP); regs[rd] = 0; break; } @@ -3283,7 +3899,7 @@ dtrace_dif_subr(uint_t subr, uint_t rd, uint64_t *regs, dtrace_priv_proc_control(state) && !dtrace_istoxic(kaddr, size)) { DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT); - dtrace_copyout(kaddr, uaddr, size); + dtrace_copyout(kaddr, uaddr, size, flags); DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT); } break; @@ -3298,7 +3914,7 @@ dtrace_dif_subr(uint_t subr, uint_t rd, uint64_t *regs, dtrace_priv_proc_control(state) && !dtrace_istoxic(kaddr, size)) { DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT); - dtrace_copyoutstr(kaddr, uaddr, size); + dtrace_copyoutstr(kaddr, uaddr, size, flags); DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT); } break; @@ -3313,7 +3929,7 @@ dtrace_dif_subr(uint_t subr, uint_t rd, uint64_t *regs, dtrace_priv_proc_control(state) && !dtrace_istoxic(kaddr, size)) { DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT); - dtrace_copyout(kaddr, uaddr, size); + dtrace_copyout(kaddr, uaddr, size, flags); DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT); } break; @@ -3328,18 +3944,28 @@ dtrace_dif_subr(uint_t subr, uint_t rd, uint64_t *regs, dtrace_priv_proc_control(state) && !dtrace_istoxic(kaddr, size)) { DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT); - dtrace_copyoutstr(kaddr, uaddr, size); + dtrace_copyoutstr(kaddr, uaddr, size, flags); DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT); } break; } #endif /* __APPLE__ */ - case DIF_SUBR_STRLEN: - regs[rd] = dtrace_strlen((char *)(uintptr_t) - tupregs[0].dttk_value, + case DIF_SUBR_STRLEN: { + size_t sz; + uintptr_t addr = (uintptr_t)tupregs[0].dttk_value; + sz = dtrace_strlen((char *)addr, state->dts_options[DTRACEOPT_STRSIZE]); + + if (!dtrace_canload(addr, sz + 1, mstate, vstate)) { + regs[rd] = NULL; + break; + } + + regs[rd] = sz; + break; + } case DIF_SUBR_STRCHR: case DIF_SUBR_STRRCHR: { @@ -3350,6 +3976,7 @@ dtrace_dif_subr(uint_t subr, uint_t rd, uint64_t *regs, * is DIF_SUBR_STRRCHR, we will look for the last occurrence * of the specified character instead of the first. */ + uintptr_t saddr = tupregs[0].dttk_value; uintptr_t addr = tupregs[0].dttk_value; uintptr_t limit = addr + state->dts_options[DTRACEOPT_STRSIZE]; char c, target = (char)tupregs[1].dttk_value; @@ -3366,6 +3993,11 @@ dtrace_dif_subr(uint_t subr, uint_t rd, uint64_t *regs, break; } + if (!dtrace_canload(saddr, addr - saddr, mstate, vstate)) { + regs[rd] = NULL; + break; + } + break; } @@ -3392,6 +4024,17 @@ dtrace_dif_subr(uint_t subr, uint_t rd, uint64_t *regs, regs[rd] = notfound; + if (!dtrace_canload((uintptr_t)addr, len + 1, mstate, vstate)) { + regs[rd] = NULL; + break; + } + + if (!dtrace_canload((uintptr_t)substr, sublen + 1, mstate, + vstate)) { + regs[rd] = NULL; + break; + } + /* * strstr() and index()/rindex() have similar semantics if * both strings are the empty string: strstr() returns a @@ -3466,13 +4109,21 @@ dtrace_dif_subr(uint_t subr, uint_t rd, uint64_t *regs, break; } +#if !defined(__APPLE__) /* Quiet compiler warnings */ if (pos > len) +#else + if ((size_t)pos > len) +#endif /* __APPLE__ */ pos = len; } else { if (pos < 0) pos = 0; +#if !defined(__APPLE__) /* Quiet compiler warnings */ if (pos >= len) { +#else + if ((size_t)pos >= len) { +#endif /* __APPLE__ */ if (sublen == 0) regs[rd] = len; break; @@ -3513,12 +4164,25 @@ dtrace_dif_subr(uint_t subr, uint_t rd, uint64_t *regs, uintptr_t tokaddr = tupregs[1].dttk_value; uint64_t size = state->dts_options[DTRACEOPT_STRSIZE]; uintptr_t limit, toklimit = tokaddr + size; - uint8_t c, tokmap[32]; /* 256 / 8 */ char *dest = (char *)mstate->dtms_scratch_ptr; +#if !defined(__APPLE__) /* Quiet compiler warnings */ + uint8_t c, tokmap[32]; /* 256 / 8 */ int i; +#else + uint8_t c='\0', tokmap[32]; /* 256 / 8 */ + uint64_t i = 0; +#endif /* __APPLE__ */ + + /* + * Check both the token buffer and (later) the input buffer, + * since both could be non-scratch addresses. + */ + if (!dtrace_strcanload(tokaddr, size, mstate, vstate)) { + regs[rd] = NULL; + break; + } - if (mstate->dtms_scratch_ptr + size > - mstate->dtms_scratch_base + mstate->dtms_scratch_size) { + if (!DTRACE_INSCRATCH(mstate, size)) { DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH); regs[rd] = NULL; break; @@ -3533,6 +4197,19 @@ dtrace_dif_subr(uint_t subr, uint_t rd, uint64_t *regs, * it behaves like an implicit clause-local variable. */ addr = mstate->dtms_strtok; + } else { + /* + * If the user-specified address is non-NULL we must + * access check it. This is the only time we have + * a chance to do so, since this address may reside + * in the string table of this clause-- future calls + * (when we fetch addr from mstate->dtms_strtok) + * would fail this access check. + */ + if (!dtrace_strcanload(addr, size, mstate, vstate)) { + regs[rd] = NULL; + break; + } } /* @@ -3606,16 +4283,20 @@ dtrace_dif_subr(uint_t subr, uint_t rd, uint64_t *regs, size_t len = dtrace_strlen((char *)s, size); int64_t i = 0; - if (nargs <= 2) - remaining = (int64_t)size; + if (!dtrace_canload(s, len + 1, mstate, vstate)) { + regs[rd] = NULL; + break; + } - if (mstate->dtms_scratch_ptr + size > - mstate->dtms_scratch_base + mstate->dtms_scratch_size) { + if (!DTRACE_INSCRATCH(mstate, size)) { DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH); regs[rd] = NULL; break; } + if (nargs <= 2) + remaining = (int64_t)size; + if (index < 0) { index += len; @@ -3625,18 +4306,29 @@ dtrace_dif_subr(uint_t subr, uint_t rd, uint64_t *regs, } } - if (index >= len || index < 0) - index = len; - - for (d[0] = '\0'; remaining > 0; remaining--) { - if ((d[i++] = dtrace_load8(s++ + index)) == '\0') - break; - - if (i == size) { - d[i - 1] = '\0'; +#if !defined(__APPLE__) /* Quiet compiler warnings */ + if (index >= len || index < 0) { + remaining = 0; + } else if (remaining < 0) { + remaining += len - index; + } else if (index + remaining > size) { + remaining = size - index; + } +#else + if ((size_t)index >= len || index < 0) { + remaining = 0; + } else if (remaining < 0) { + remaining += len - index; + } else if ((uint64_t)index + (uint64_t)remaining > size) { + remaining = size - index; + } +#endif /* __APPLE__ */ + for (i = 0; i < remaining; i++) { + if ((d[i] = dtrace_load8(s + index + i)) == '\0') break; } - } + + d[i] = '\0'; mstate->dtms_scratch_ptr += size; regs[rd] = (uintptr_t)d; @@ -3645,7 +4337,7 @@ dtrace_dif_subr(uint_t subr, uint_t rd, uint64_t *regs, #if !defined(__APPLE__) case DIF_SUBR_GETMAJOR: -#ifdef __LP64__ +#ifdef _LP64 regs[rd] = (tupregs[0].dttk_value >> NBITSMINOR64) & MAXMAJ64; #else regs[rd] = (tupregs[0].dttk_value >> NBITSMINOR) & MAXMAJ; @@ -3660,7 +4352,7 @@ dtrace_dif_subr(uint_t subr, uint_t rd, uint64_t *regs, #if !defined(__APPLE__) case DIF_SUBR_GETMINOR: -#ifdef __LP64__ +#ifdef _LP64 regs[rd] = tupregs[0].dttk_value & MAXMIN64; #else regs[rd] = tupregs[0].dttk_value & MAXMIN; @@ -3689,8 +4381,18 @@ dtrace_dif_subr(uint_t subr, uint_t rd, uint64_t *regs, char *s; int i, len, depth = 0; - if (size == 0 || mstate->dtms_scratch_ptr + size > - mstate->dtms_scratch_base + mstate->dtms_scratch_size) { + /* + * Due to all the pointer jumping we do and context we must + * rely upon, we just mandate that the user must have kernel + * read privileges to use this routine. + */ + if ((mstate->dtms_access & DTRACE_ACCESS_KERNEL) == 0) { + *flags |= CPU_DTRACE_KPRIV; + *illval = daddr; + regs[rd] = NULL; + } + + if (!DTRACE_INSCRATCH(mstate, size)) { DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH); regs[rd] = NULL; break; @@ -3711,9 +4413,9 @@ dtrace_dif_subr(uint_t subr, uint_t rd, uint64_t *regs, * explained to them, and who can't even concisely describe * the conditions under which one would be forced to resort to * this technique. Needless to say, those conditions are - * found here -- and probably only here. Is this is the only - * use of this infamous trick in shipping, production code? - * If it isn't, it probably should be... + * found here -- and probably only here. Is this the only use + * of this infamous trick in shipping, production code? If it + * isn't, it probably should be... */ if (minor != -1) { uintptr_t maddr = dtrace_loadptr(daddr + @@ -3731,7 +4433,7 @@ dtrace_dif_subr(uint_t subr, uint_t rd, uint64_t *regs, while (maddr != NULL && !(*flags & CPU_DTRACE_FAULT)) { uint64_t m; -#ifdef __LP64__ +#ifdef _LP64 m = dtrace_load64(maddr + dev) & MAXMIN64; #else m = dtrace_load32(maddr + dev) & MAXMIN; @@ -3858,7 +4560,8 @@ dtrace_dif_subr(uint_t subr, uint_t rd, uint64_t *regs, } #else case DIF_SUBR_DDI_PATHNAME: { - /* XXX awaits galactic disentanglement ;-} */ + /* FIXME: awaits galactic disentanglement ;-} */ + DTRACE_CPUFLAG_SET(CPU_DTRACE_ILLOP); regs[rd] = NULL; break; } @@ -3869,10 +4572,19 @@ dtrace_dif_subr(uint_t subr, uint_t rd, uint64_t *regs, uint64_t size = state->dts_options[DTRACEOPT_STRSIZE]; uintptr_t s1 = tupregs[0].dttk_value; uintptr_t s2 = tupregs[1].dttk_value; +#if !defined(__APPLE__) /* Quiet compiler warnings */ int i = 0; +#else + uint64_t i = 0; +#endif /* __APPLE__ */ + + if (!dtrace_strcanload(s1, size, mstate, vstate) || + !dtrace_strcanload(s2, size, mstate, vstate)) { + regs[rd] = NULL; + break; + } - if (mstate->dtms_scratch_ptr + size > - mstate->dtms_scratch_base + mstate->dtms_scratch_size) { + if (!DTRACE_INSCRATCH(mstate, size)) { DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH); regs[rd] = NULL; break; @@ -3916,8 +4628,7 @@ dtrace_dif_subr(uint_t subr, uint_t rd, uint64_t *regs, uint64_t size = 22; /* enough room for 2^64 in decimal */ char *end = (char *)mstate->dtms_scratch_ptr + size - 1; - if (mstate->dtms_scratch_ptr + size > - mstate->dtms_scratch_base + mstate->dtms_scratch_size) { + if (!DTRACE_INSCRATCH(mstate, size)) { DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH); regs[rd] = NULL; break; @@ -3937,6 +4648,36 @@ dtrace_dif_subr(uint_t subr, uint_t rd, uint64_t *regs, break; } + case DIF_SUBR_HTONS: + case DIF_SUBR_NTOHS: +#ifdef _BIG_ENDIAN + regs[rd] = (uint16_t)tupregs[0].dttk_value; +#else + regs[rd] = DT_BSWAP_16((uint16_t)tupregs[0].dttk_value); +#endif + break; + + + case DIF_SUBR_HTONL: + case DIF_SUBR_NTOHL: +#ifdef _BIG_ENDIAN + regs[rd] = (uint32_t)tupregs[0].dttk_value; +#else + regs[rd] = DT_BSWAP_32((uint32_t)tupregs[0].dttk_value); +#endif + break; + + + case DIF_SUBR_HTONLL: + case DIF_SUBR_NTOHLL: +#ifdef _BIG_ENDIAN + regs[rd] = (uint64_t)tupregs[0].dttk_value; +#else + regs[rd] = DT_BSWAP_64((uint64_t)tupregs[0].dttk_value); +#endif + break; + + case DIF_SUBR_DIRNAME: case DIF_SUBR_BASENAME: { char *dest = (char *)mstate->dtms_scratch_ptr; @@ -3946,8 +4687,12 @@ dtrace_dif_subr(uint_t subr, uint_t rd, uint64_t *regs, int lastbase = -1, firstbase = -1, lastdir = -1; int start, end; - if (mstate->dtms_scratch_ptr + size > - mstate->dtms_scratch_base + mstate->dtms_scratch_size) { + if (!dtrace_canload(src, len + 1, mstate, vstate)) { + regs[rd] = NULL; + break; + } + + if (!DTRACE_INSCRATCH(mstate, size)) { DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH); regs[rd] = NULL; break; @@ -4055,8 +4800,13 @@ dtrace_dif_subr(uint_t subr, uint_t rd, uint64_t *regs, end = lastbase; } +#if !defined(__APPLE__) /* Quiet compiler warnings */ for (i = start, j = 0; i <= end && j < size - 1; i++, j++) dest[j] = dtrace_load8(src + i); +#else + for (i = start, j = 0; i <= end && (uint64_t)j < size - 1; i++, j++) + dest[j] = dtrace_load8(src + i); +#endif /* __APPLE__ */ dest[j] = '\0'; regs[rd] = (uintptr_t)dest; @@ -4070,8 +4820,12 @@ dtrace_dif_subr(uint_t subr, uint_t rd, uint64_t *regs, uintptr_t src = tupregs[0].dttk_value; int i = 0, j = 0; - if (mstate->dtms_scratch_ptr + size > - mstate->dtms_scratch_base + mstate->dtms_scratch_size) { + if (!dtrace_strcanload(src, size, mstate, vstate)) { + regs[rd] = NULL; + break; + } + + if (!DTRACE_INSCRATCH(mstate, size)) { DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH); regs[rd] = NULL; break; @@ -4083,8 +4837,13 @@ dtrace_dif_subr(uint_t subr, uint_t rd, uint64_t *regs, do { c = dtrace_load8(src + i++); next: +#if !defined(__APPLE__) /* Quiet compiler warnings */ if (j + 5 >= size) /* 5 = strlen("/..c\0") */ break; +#else + if ((uint64_t)(j + 5) >= size) /* 5 = strlen("/..c\0") */ + break; +#endif /* __APPLE__ */ if (c != '/') { dest[j++] = c; @@ -4167,24 +4926,274 @@ dtrace_dif_subr(uint_t subr, uint_t rd, uint64_t *regs, mstate->dtms_scratch_ptr += size; break; } -#ifdef __APPLE__ - /* CHUD callback ('chud(uint64_t, [uint64_t], [uint64_t] ...)') */ - case DIF_SUBR_CHUD: { - uint64_t selector = tupregs[0].dttk_value; - uint64_t args[DIF_DTR_NREGS-1] = {0ULL}; - uint32_t ii; + case DIF_SUBR_INET_NTOA: + case DIF_SUBR_INET_NTOA6: + case DIF_SUBR_INET_NTOP: { + size_t size; + int af, argi, i; + char *base, *end; - /* copy in any variadic argument list */ - for(ii = 0; ii < DIF_DTR_NREGS-1; ii++) { - args[ii] = tupregs[ii+1].dttk_value; + if (subr == DIF_SUBR_INET_NTOP) { + af = (int)tupregs[0].dttk_value; + argi = 1; + } else { + af = subr == DIF_SUBR_INET_NTOA ? AF_INET: AF_INET6; + argi = 0; } - kern_return_t ret = - chudxnu_dtrace_callback(selector, args, DIF_DTR_NREGS-1); + if (af == AF_INET) { +#if !defined(__APPLE__) + ipaddr_t ip4; +#else + in_addr_t ip4; +#endif /* __APPLE__ */ + uint8_t *ptr8, val; + + /* + * Safely load the IPv4 address. + */ + ip4 = dtrace_load32(tupregs[argi].dttk_value); + + /* + * Check an IPv4 string will fit in scratch. + */ +#if !defined(__APPLE__) + size = INET_ADDRSTRLEN; +#else + size = MAX_IPv4_STR_LEN; +#endif /* __APPLE__ */ + if (!DTRACE_INSCRATCH(mstate, size)) { + DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH); + regs[rd] = NULL; + break; + } + base = (char *)mstate->dtms_scratch_ptr; + end = (char *)mstate->dtms_scratch_ptr + size - 1; + + /* + * Stringify as a dotted decimal quad. + */ + *end-- = '\0'; + ptr8 = (uint8_t *)&ip4; + for (i = 3; i >= 0; i--) { + val = ptr8[i]; + + if (val == 0) { + *end-- = '0'; + } else { + for (; val; val /= 10) { + *end-- = '0' + (val % 10); + } + } + + if (i > 0) + *end-- = '.'; + } + ASSERT(end + 1 >= base); + + } else if (af == AF_INET6) { +#if defined(__APPLE__) +#define _S6_un __u6_addr +#define _S6_u8 __u6_addr8 +#endif /* __APPLE__ */ + struct in6_addr ip6; + int firstzero, tryzero, numzero, v6end; + uint16_t val; + const char digits[] = "0123456789abcdef"; + + /* + * Stringify using RFC 1884 convention 2 - 16 bit + * hexadecimal values with a zero-run compression. + * Lower case hexadecimal digits are used. + * eg, fe80::214:4fff:fe0b:76c8. + * The IPv4 embedded form is returned for inet_ntop, + * just the IPv4 string is returned for inet_ntoa6. + */ + + /* + * Safely load the IPv6 address. + */ + dtrace_bcopy( + (void *)(uintptr_t)tupregs[argi].dttk_value, + (void *)(uintptr_t)&ip6, sizeof (struct in6_addr)); + + /* + * Check an IPv6 string will fit in scratch. + */ + size = INET6_ADDRSTRLEN; + if (!DTRACE_INSCRATCH(mstate, size)) { + DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH); + regs[rd] = NULL; + break; + } + base = (char *)mstate->dtms_scratch_ptr; + end = (char *)mstate->dtms_scratch_ptr + size - 1; + *end-- = '\0'; + + /* + * Find the longest run of 16 bit zero values + * for the single allowed zero compression - "::". + */ + firstzero = -1; + tryzero = -1; + numzero = 1; +#if !defined(__APPLE__) /* Quiet compiler warnings */ + for (i = 0; i < sizeof (struct in6_addr); i++) { +#else + for (i = 0; i < (int)sizeof (struct in6_addr); i++) { +#endif /* __APPLE__ */ + if (ip6._S6_un._S6_u8[i] == 0 && + tryzero == -1 && i % 2 == 0) { + tryzero = i; + continue; + } + + if (tryzero != -1 && + (ip6._S6_un._S6_u8[i] != 0 || + i == sizeof (struct in6_addr) - 1)) { + + if (i - tryzero <= numzero) { + tryzero = -1; + continue; + } + + firstzero = tryzero; + numzero = i - i % 2 - tryzero; + tryzero = -1; + + if (ip6._S6_un._S6_u8[i] == 0 && + i == sizeof (struct in6_addr) - 1) + numzero += 2; + } + } +#if !defined(__APPLE__) /* Quiet compiler warnings */ + ASSERT(firstzero + numzero <= sizeof (struct in6_addr)); +#else + ASSERT(firstzero + numzero <= (int)sizeof (struct in6_addr)); +#endif /* __APPLE__ */ + + /* + * Check for an IPv4 embedded address. + */ + v6end = sizeof (struct in6_addr) - 2; + if (IN6_IS_ADDR_V4MAPPED(&ip6) || + IN6_IS_ADDR_V4COMPAT(&ip6)) { +#if !defined(__APPLE__) /* Quiet compiler warnings */ + for (i = sizeof (struct in6_addr) - 1; + i >= DTRACE_V4MAPPED_OFFSET; i--) { +#else + for (i = sizeof (struct in6_addr) - 1; + i >= (int)DTRACE_V4MAPPED_OFFSET; i--) { +#endif /* __APPLE__ */ + ASSERT(end >= base); + + val = ip6._S6_un._S6_u8[i]; + + if (val == 0) { + *end-- = '0'; + } else { + for (; val; val /= 10) { + *end-- = '0' + val % 10; + } + } + +#if !defined(__APPLE__) /* Quiet compiler warnings */ + if (i > DTRACE_V4MAPPED_OFFSET) + *end-- = '.'; +#else + if (i > (int)DTRACE_V4MAPPED_OFFSET) + *end-- = '.'; +#endif /* __APPLE__ */ + } + + if (subr == DIF_SUBR_INET_NTOA6) + goto inetout; + + /* + * Set v6end to skip the IPv4 address that + * we have already stringified. + */ + v6end = 10; + } + + /* + * Build the IPv6 string by working through the + * address in reverse. + */ + for (i = v6end; i >= 0; i -= 2) { + ASSERT(end >= base); + + if (i == firstzero + numzero - 2) { + *end-- = ':'; + *end-- = ':'; + i -= numzero - 2; + continue; + } + + if (i < 14 && i != firstzero - 2) + *end-- = ':'; + + val = (ip6._S6_un._S6_u8[i] << 8) + + ip6._S6_un._S6_u8[i + 1]; + + if (val == 0) { + *end-- = '0'; + } else { + for (; val; val /= 16) { + *end-- = digits[val % 16]; + } + } + } + ASSERT(end + 1 >= base); + +#if defined(__APPLE__) +#undef _S6_un +#undef _S6_u8 +#endif /* __APPLE__ */ + } else { + /* + * The user didn't use AH_INET or AH_INET6. + */ + DTRACE_CPUFLAG_SET(CPU_DTRACE_ILLOP); + regs[rd] = NULL; + break; + } + +inetout: regs[rd] = (uintptr_t)end + 1; + mstate->dtms_scratch_ptr += size; + break; + } + +#ifdef __APPLE__ + + /* CoreProfile callback ('core_profile(uint64_t, [uint64_t], [uint64_t] ...)') */ + case DIF_SUBR_COREPROFILE: { + uint64_t selector = tupregs[0].dttk_value; + uint64_t args[DIF_DTR_NREGS-1] = {0ULL}; + uint32_t ii; + uint32_t count = (uint32_t)nargs; + + if (count < 1) { + regs[rd] = KERN_FAILURE; + break; + } + + if(count > DIF_DTR_NREGS) + count = DIF_DTR_NREGS; + + /* copy in any variadic argument list, bounded by DIF_DTR_NREGS */ + for(ii = 0; ii < count-1; ii++) { + args[ii] = tupregs[ii+1].dttk_value; + } + + kern_return_t ret = + chudxnu_dtrace_callback(selector, args, count-1); if(KERN_SUCCESS != ret) { /* error */ } + + regs[rd] = ret; break; } @@ -4224,11 +5233,21 @@ dtrace_dif_emulate(dtrace_difo_t *difo, dtrace_mstate_t *mstate, uint8_t cc_n = 0, cc_z = 0, cc_v = 0, cc_c = 0; int64_t cc_r; +#if !defined(__APPLE__) /* Quiet compiler warnings */ uint_t pc = 0, id, opc; +#else + uint_t pc = 0, id, opc = 0; +#endif /* __APPLE__ */ uint8_t ttop = 0; dif_instr_t instr; uint_t r1, r2, rd; + /* + * We stash the current DIF object into the machine state: we need it + * for subsequent access checking. + */ + mstate->dtms_difo = difo; + regs[DIF_REG_R0] = 0; /* %r0 is fixed at zero */ while (pc < textlen && !(*flags & CPU_DTRACE_FAULT)) { @@ -4432,6 +5451,36 @@ dtrace_dif_emulate(dtrace_difo_t *difo, dtrace_mstate_t *mstate, case DIF_OP_LDX: regs[rd] = dtrace_load64(regs[r1]); break; +#if !defined(__APPLE__) + case DIF_OP_ULDSB: + regs[rd] = (int8_t) + dtrace_fuword8((void *)(uintptr_t)regs[r1]); + break; + case DIF_OP_ULDSH: + regs[rd] = (int16_t) + dtrace_fuword16((void *)(uintptr_t)regs[r1]); + break; + case DIF_OP_ULDSW: + regs[rd] = (int32_t) + dtrace_fuword32((void *)(uintptr_t)regs[r1]); + break; + case DIF_OP_ULDUB: + regs[rd] = + dtrace_fuword8((void *)(uintptr_t)regs[r1]); + break; + case DIF_OP_ULDUH: + regs[rd] = + dtrace_fuword16((void *)(uintptr_t)regs[r1]); + break; + case DIF_OP_ULDUW: + regs[rd] = + dtrace_fuword32((void *)(uintptr_t)regs[r1]); + break; + case DIF_OP_ULDX: + regs[rd] = + dtrace_fuword64((void *)(uintptr_t)regs[r1]); + break; +#else /* Darwin 32-bit kernel may fetch from 64-bit user. Don't want uintptr_t cast. */ case DIF_OP_ULDSB: regs[rd] = (int8_t) dtrace_fuword8(regs[r1]); @@ -4459,9 +5508,11 @@ dtrace_dif_emulate(dtrace_difo_t *difo, dtrace_mstate_t *mstate, case DIF_OP_ULDX: regs[rd] = dtrace_fuword64(regs[r1]); +#endif /* __APPLE__ */ break; case DIF_OP_RET: rval = regs[rd]; + pc = textlen; break; case DIF_OP_NOP: break; @@ -4472,15 +5523,25 @@ dtrace_dif_emulate(dtrace_difo_t *difo, dtrace_mstate_t *mstate, regs[rd] = (uint64_t)(uintptr_t) (strtab + DIF_INSTR_STRING(instr)); break; - case DIF_OP_SCMP: - cc_r = dtrace_strncmp((char *)(uintptr_t)regs[r1], - (char *)(uintptr_t)regs[r2], - state->dts_options[DTRACEOPT_STRSIZE]); + case DIF_OP_SCMP: { + size_t sz = state->dts_options[DTRACEOPT_STRSIZE]; + uintptr_t s1 = regs[r1]; + uintptr_t s2 = regs[r2]; + + if (s1 != NULL && + !dtrace_strcanload(s1, sz, mstate, vstate)) + break; + if (s2 != NULL && + !dtrace_strcanload(s2, sz, mstate, vstate)) + break; + + cc_r = dtrace_strncmp((char *)s1, (char *)s2, sz); cc_n = cc_r < 0; cc_z = cc_r == 0; cc_v = cc_c = 0; break; + } case DIF_OP_LDGA: regs[rd] = dtrace_dif_variable(mstate, state, r1, regs[r2]); @@ -4543,6 +5604,10 @@ dtrace_dif_emulate(dtrace_difo_t *difo, dtrace_mstate_t *mstate, *(uint8_t *)a = 0; a += sizeof (uint64_t); } + if (!dtrace_vcanload( + (void *)(uintptr_t)regs[rd], &v->dtdv_type, + mstate, vstate)) + break; dtrace_vcopy((void *)(uintptr_t)regs[rd], (void *)a, &v->dtdv_type); @@ -4574,7 +5639,11 @@ dtrace_dif_emulate(dtrace_difo_t *difo, dtrace_mstate_t *mstate, id -= DIF_VAR_OTHER_UBASE; +#if !defined(__APPLE__) /* Quiet compiler warnings */ ASSERT(id < vstate->dtvs_nlocals); +#else + ASSERT(id < (uint_t)vstate->dtvs_nlocals); +#endif /* __APPLE__ */ ASSERT(vstate->dtvs_locals != NULL); svar = vstate->dtvs_locals[id]; @@ -4613,7 +5682,11 @@ dtrace_dif_emulate(dtrace_difo_t *difo, dtrace_mstate_t *mstate, ASSERT(id >= DIF_VAR_OTHER_UBASE); id -= DIF_VAR_OTHER_UBASE; +#if !defined(__APPLE__) /* Quiet compiler warnings */ ASSERT(id < vstate->dtvs_nlocals); +#else + ASSERT(id < (uint_t)vstate->dtvs_nlocals); +#endif /* __APPLE__ */ ASSERT(vstate->dtvs_locals != NULL); svar = vstate->dtvs_locals[id]; @@ -4636,6 +5709,11 @@ dtrace_dif_emulate(dtrace_difo_t *difo, dtrace_mstate_t *mstate, a += sizeof (uint64_t); } + if (!dtrace_vcanload( + (void *)(uintptr_t)regs[rd], &v->dtdv_type, + mstate, vstate)) + break; + dtrace_vcopy((void *)(uintptr_t)regs[rd], (void *)a, &v->dtdv_type); break; @@ -4662,7 +5740,8 @@ dtrace_dif_emulate(dtrace_difo_t *difo, dtrace_mstate_t *mstate, key[1].dttk_size = 0; dvar = dtrace_dynvar(dstate, 2, key, - sizeof (uint64_t), DTRACE_DYNVAR_NOALLOC); + sizeof (uint64_t), DTRACE_DYNVAR_NOALLOC, + mstate, vstate); if (dvar == NULL) { regs[rd] = 0; @@ -4697,7 +5776,7 @@ dtrace_dif_emulate(dtrace_difo_t *difo, dtrace_mstate_t *mstate, v->dtdv_type.dtdt_size > sizeof (uint64_t) ? v->dtdv_type.dtdt_size : sizeof (uint64_t), regs[rd] ? DTRACE_DYNVAR_ALLOC : - DTRACE_DYNVAR_DEALLOC); + DTRACE_DYNVAR_DEALLOC, mstate, vstate); /* * Given that we're storing to thread-local data, @@ -4709,11 +5788,15 @@ dtrace_dif_emulate(dtrace_difo_t *difo, dtrace_mstate_t *mstate, dtrace_set_thread_predcache(current_thread(), 0); #endif /* __APPLE__ */ - if (dvar == NULL) break; if (v->dtdv_type.dtdt_flags & DIF_TF_BYREF) { + if (!dtrace_vcanload( + (void *)(uintptr_t)regs[rd], + &v->dtdv_type, mstate, vstate)) + break; + dtrace_vcopy((void *)(uintptr_t)regs[rd], dvar->dtdv_data, &v->dtdv_type); } else { @@ -4801,7 +5884,7 @@ dtrace_dif_emulate(dtrace_difo_t *difo, dtrace_mstate_t *mstate, dvar = dtrace_dynvar(dstate, nkeys, key, v->dtdv_type.dtdt_size > sizeof (uint64_t) ? v->dtdv_type.dtdt_size : sizeof (uint64_t), - DTRACE_DYNVAR_NOALLOC); + DTRACE_DYNVAR_NOALLOC, mstate, vstate); if (dvar == NULL) { regs[rd] = 0; @@ -4842,12 +5925,17 @@ dtrace_dif_emulate(dtrace_difo_t *difo, dtrace_mstate_t *mstate, v->dtdv_type.dtdt_size > sizeof (uint64_t) ? v->dtdv_type.dtdt_size : sizeof (uint64_t), regs[rd] ? DTRACE_DYNVAR_ALLOC : - DTRACE_DYNVAR_DEALLOC); + DTRACE_DYNVAR_DEALLOC, mstate, vstate); if (dvar == NULL) break; if (v->dtdv_type.dtdt_flags & DIF_TF_BYREF) { + if (!dtrace_vcanload( + (void *)(uintptr_t)regs[rd], &v->dtdv_type, + mstate, vstate)) + break; + dtrace_vcopy((void *)(uintptr_t)regs[rd], dvar->dtdv_data, &v->dtdv_type); } else { @@ -4861,17 +5949,21 @@ dtrace_dif_emulate(dtrace_difo_t *difo, dtrace_mstate_t *mstate, uintptr_t ptr = P2ROUNDUP(mstate->dtms_scratch_ptr, 8); size_t size = ptr - mstate->dtms_scratch_ptr + regs[r1]; - if (mstate->dtms_scratch_ptr + size > - mstate->dtms_scratch_base + - mstate->dtms_scratch_size) { + /* + * Rounding up the user allocation size could have + * overflowed large, bogus allocations (like -1ULL) to + * 0. + */ + if (size < regs[r1] || + !DTRACE_INSCRATCH(mstate, size)) { DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH); regs[rd] = NULL; - } else { - dtrace_bzero((void *) - mstate->dtms_scratch_ptr, size); + break; + } + + dtrace_bzero((void *) mstate->dtms_scratch_ptr, size); mstate->dtms_scratch_ptr += size; regs[rd] = ptr; - } break; } @@ -4883,6 +5975,9 @@ dtrace_dif_emulate(dtrace_difo_t *difo, dtrace_mstate_t *mstate, break; } + if (!dtrace_canload(regs[r1], regs[r2], mstate, vstate)) + break; + dtrace_bcopy((void *)(uintptr_t)regs[r1], (void *)(uintptr_t)regs[rd], (size_t)regs[r2]); break; @@ -4959,8 +6054,13 @@ dtrace_action_breakpoint(dtrace_ecb_t *ecb) dtrace_probe_t *probe = ecb->dte_probe; dtrace_provider_t *prov = probe->dtpr_provider; char c[DTRACE_FULLNAMELEN + 80], *str; +#if !defined(__APPLE__) /* Quiet compiler warnings */ char *msg = "dtrace: breakpoint action at probe "; char *ecbmsg = " (ecb "; +#else + const char *msg = "dtrace: breakpoint action at probe "; + const char *ecbmsg = " (ecb "; +#endif /* __APPLE__ */ uintptr_t mask = (0xf << (sizeof (uintptr_t) * NBBY / 4)); uintptr_t val = (uintptr_t)ecb; int shift = (sizeof (uintptr_t) * NBBY) - 4, i = 0; @@ -5080,7 +6180,7 @@ dtrace_action_raise(uint64_t sig) if (uthread && uthread->t_dtrace_sig == 0) { uthread->t_dtrace_sig = sig; - psignal(current_proc(), (int)sig); + astbsd_on(); } #endif /* __APPLE__ */ } @@ -5098,7 +6198,12 @@ dtrace_action_stop(void) aston(curthread); } #else - psignal(current_proc(), SIGSTOP); + uthread_t uthread = (uthread_t)get_bsdthread_info(current_thread()); + + if (uthread && uthread->t_dtrace_stop == 0) { + uthread->t_dtrace_stop = 1; + astbsd_on(); + } #endif /* __APPLE__ */ } @@ -5173,8 +6278,11 @@ dtrace_action_ustack(dtrace_mstate_t *mstate, dtrace_state_t *state, size = (uintptr_t)fps - mstate->dtms_scratch_ptr + (nframes * sizeof (uint64_t)); - if (mstate->dtms_scratch_ptr + size > - mstate->dtms_scratch_base + mstate->dtms_scratch_size) { +#if !defined(__APPLE__) /* Quiet compiler warnings */ + if (!DTRACE_INSCRATCH(mstate, size)) { +#else + if (!DTRACE_INSCRATCH(mstate, (uintptr_t)size)) { +#endif /* __APPLE__ */ /* * Not enough room for our frame pointers -- need to indicate * that we ran out of scratch space. @@ -5291,11 +6399,13 @@ __dtrace_probe(dtrace_id_t id, uint64_t arg0, uint64_t arg1, #if !defined(__APPLE__) /* * Kick out immediately if this CPU is still being born (in which case - * curthread will be set to -1) + * curthread will be set to -1) or the current thread can't allow + * probes in its current context. */ - if ((uintptr_t)curthread & 1) + if (((uintptr_t)curthread & 1) || (curthread->t_flag & T_DONTDTRACE)) return; #else + /* Not a concern for Darwin */ #endif /* __APPLE__ */ cookie = dtrace_interrupt_disable(); @@ -5333,6 +6443,8 @@ __dtrace_probe(dtrace_id_t id, uint64_t arg0, uint64_t arg1, if (vtime && curthread->t_dtrace_start) curthread->t_dtrace_vtime += now - curthread->t_dtrace_start; #else + /* FIXME: the time spent entering DTrace and arriving to this point is attributed + to the current thread. Instead it should accrue to DTrace. */ vtime = dtrace_vtime_references != 0; if (vtime) @@ -5358,10 +6470,10 @@ __dtrace_probe(dtrace_id_t id, uint64_t arg0, uint64_t arg1, /* * A provider may call dtrace_probe_error() in lieu of dtrace_probe() in some circumstances. * See, e.g. fasttrap_isa.c. However the provider has no access to ECB context, so passes - * NULL through "arg0" and the probe_id of the ovedrriden probe as arg1. Detect that here + * 0 through "arg0" and the probe_id of the overridden probe as arg1. Detect that here * and cons up a viable state (from the probe_id). */ - if (dtrace_probeid_error == id && NULL == arg0) { + if (dtrace_probeid_error == id && 0 == arg0) { dtrace_id_t ftp_id = (dtrace_id_t)arg1; dtrace_probe_t *ftp_probe = dtrace_probes[ftp_id - 1]; dtrace_ecb_t *ftp_ecb = ftp_probe->dtpr_ecb; @@ -5379,7 +6491,9 @@ __dtrace_probe(dtrace_id_t id, uint64_t arg0, uint64_t arg1, } #endif /* __APPLE__ */ + mstate.dtms_difo = NULL; mstate.dtms_probe = probe; + mstate.dtms_strtok = NULL; mstate.dtms_arg[0] = arg0; mstate.dtms_arg[1] = arg1; mstate.dtms_arg[2] = arg2; @@ -5483,6 +6597,7 @@ __dtrace_probe(dtrace_id_t id, uint64_t arg0, uint64_t arg1, cred_t *s_cr = ecb->dte_state->dts_cred.dcr_cred; proc_t *proc; +#pragma unused(proc) /* __APPLE__ */ ASSERT(s_cr != NULL); @@ -5510,14 +6625,17 @@ __dtrace_probe(dtrace_id_t id, uint64_t arg0, uint64_t arg1, cred_t *cr; cred_t *s_cr = ecb->dte_state->dts_cred.dcr_cred; +#pragma unused(cr, s_cr) /* __APPLE__ */ ASSERT(s_cr != NULL); -#if !defined(__APPLE__) /* Darwin doesn't do zones. */ +#if !defined(__APPLE__) if ((cr = CRED()) == NULL || s_cr->cr_zone->zone_id != cr->cr_zone->zone_id) continue; +#else + /* Darwin doesn't do zones. */ #endif /* __APPLE__ */ } } @@ -5559,6 +6677,11 @@ __dtrace_probe(dtrace_id_t id, uint64_t arg0, uint64_t arg1, mstate.dtms_epid = ecb->dte_epid; mstate.dtms_present |= DTRACE_MSTATE_EPID; + if (state->dts_cred.dcr_visible & DTRACE_CRV_KERNEL) + mstate.dtms_access = DTRACE_ACCESS_KERNEL; + else + mstate.dtms_access = 0; + if (pred != NULL) { dtrace_difo_t *dp = pred->dtp_difo; int rval; @@ -5639,10 +6762,17 @@ __dtrace_probe(dtrace_id_t id, uint64_t arg0, uint64_t arg1, if (!dtrace_priv_kernel(state)) continue; +#if !defined(__APPLE__) /* Quiet compiler warnings */ dtrace_getpcstack((pc_t *)(tomax + valoffs), size / sizeof (pc_t), probe->dtpr_aframes, DTRACE_ANCHORED(probe) ? NULL : (uint32_t *)arg0); +#else + dtrace_getpcstack((pc_t *)(tomax + valoffs), + size / sizeof (pc_t), probe->dtpr_aframes, + DTRACE_ANCHORED(probe) ? NULL : + (uint32_t *)(uintptr_t)arg0); +#endif /* __APPLE__ */ continue; @@ -5761,6 +6891,9 @@ __dtrace_probe(dtrace_id_t id, uint64_t arg0, uint64_t arg1, case DTRACEACT_PRINTA: case DTRACEACT_SYSTEM: case DTRACEACT_FREOPEN: +#if defined(__APPLE__) + case DTRACEACT_APPLEBINARY: +#endif /* __APPLE__ */ break; case DTRACEACT_SYM: @@ -5842,6 +6975,10 @@ __dtrace_probe(dtrace_id_t id, uint64_t arg0, uint64_t arg1, if (dp->dtdo_rtype.dtdt_flags & DIF_TF_BYREF) { uintptr_t end = valoffs + size; + if (!dtrace_vcanload((void *)(uintptr_t)val, + &dp->dtdo_rtype, &mstate, vstate)) + continue; + /* * If this is a string, we're going to only * load until we find the zero byte -- after @@ -5965,6 +7102,8 @@ __dtrace_probe(dtrace_id_t id, uint64_t arg0, uint64_t arg1, if (vtime) curthread->t_dtrace_start = dtrace_gethrtime(); #else + /* FIXME: the time spent leaving DTrace from this point to the rti is attributed + to the current thread. Instead it should accrue to DTrace. */ if (vtime) { thread_t thread = current_thread(); int64_t t = dtrace_get_thread_tracing(thread); @@ -5983,7 +7122,10 @@ __dtrace_probe(dtrace_id_t id, uint64_t arg0, uint64_t arg1, } #if defined(__APPLE__) -/* Don't allow a thread to re-enter dtrace_probe() */ +/* Don't allow a thread to re-enter dtrace_probe(). This could occur if a probe is encountered + on some function in the transitive closure of the call to dtrace_probe(). Solaris has some + strong guarantees that this won't happen, the Darwin implementation is not so mature as to + make those guarantees. */ void dtrace_probe(dtrace_id_t id, uint64_t arg0, uint64_t arg1, uint64_t arg2, uint64_t arg3, uint64_t arg4) @@ -5992,12 +7134,15 @@ dtrace_probe(dtrace_id_t id, uint64_t arg0, uint64_t arg1, if (id == dtrace_probeid_error) { __dtrace_probe(id, arg0, arg1, arg2, arg3, arg4); - dtrace_getfp(); /* Defeat tail-call optimization of __dtrace_probe() */ + dtrace_getipl(); /* Defeat tail-call optimization of __dtrace_probe() */ } else if (!dtrace_get_thread_reentering(thread)) { dtrace_set_thread_reentering(thread, TRUE); __dtrace_probe(id, arg0, arg1, arg2, arg3, arg4); dtrace_set_thread_reentering(thread, FALSE); } +#if DEBUG + else __dtrace_probe(dtrace_probeid_error, 0, id, 1, -1, DTRACEFLT_UNKNOWN); +#endif } #endif /* __APPLE__ */ @@ -6013,7 +7158,11 @@ dtrace_probe(dtrace_id_t id, uint64_t arg0, uint64_t arg1, * specified.) */ static uint_t +#if !defined(__APPLE__) /* Quiet compiler warnings */ dtrace_hash_str(char *p) +#else +dtrace_hash_str(const char *p) +#endif /* __APPLE__ */ { unsigned int g; uint_t hval = 0; @@ -6045,11 +7194,11 @@ dtrace_hash_create(uintptr_t stroffs, uintptr_t nextoffs, uintptr_t prevoffs) return (hash); } -#if !defined(__APPLE__) /* Quiet compiler warning */ +#if !defined(__APPLE__) /* Unused. Quiet compiler warning. */ static void dtrace_hash_destroy(dtrace_hash_t *hash) { -#ifdef DEBUG +#if DEBUG int i; for (i = 0; i < hash->dth_size; i++) @@ -6232,6 +7381,7 @@ dtrace_badattr(const dtrace_attribute_t *a) * Return a duplicate copy of a string. If the specified string is NULL, * this function returns a zero-length string. */ +#if !defined(__APPLE__) static char * dtrace_strdup(const char *str) { @@ -6242,6 +7392,19 @@ dtrace_strdup(const char *str) return (new); } +#else /* Employ size bounded string operation. */ +static char * +dtrace_strdup(const char *str) +{ + size_t bufsize = (str != NULL ? strlen(str) : 0) + 1; + char *new = kmem_zalloc(bufsize, KM_SLEEP); + + if (str != NULL) + (void) strlcpy(new, str, bufsize); + + return (new); +} +#endif /* __APPLE__ */ #define DTRACE_ISALPHA(c) \ (((c) >= 'a' && (c) <= 'z') || ((c) >= 'A' && (c) <= 'Z')) @@ -6300,7 +7463,11 @@ dtrace_cred2priv(cred_t *cr, uint32_t *privp, uid_t *uidp, zoneid_t *zoneidp) static void dtrace_errdebug(const char *str) { +#if !defined(__APPLE__) /* Quiet compiler warnings */ int hval = dtrace_hash_str((char *)str) % DTRACE_ERRHASHSZ; +#else + int hval = dtrace_hash_str(str) % DTRACE_ERRHASHSZ; +#endif /* __APPLE__ */ int occupied = 0; lck_mtx_lock(&dtrace_errlock); @@ -6308,7 +7475,7 @@ dtrace_errdebug(const char *str) #if !defined(__APPLE__) dtrace_errthread = curthread; #else - dtrace_errthread = current_thread(); + dtrace_errthread = (kthread_t *)current_thread(); #endif /* __APPLE__ */ while (occupied++ < DTRACE_ERRHASHSZ) { @@ -6536,14 +7703,19 @@ dtrace_match_glob(const char *s, const char *p, int depth) static int dtrace_match_string(const char *s, const char *p, int depth) { +#pragma unused(depth) /* __APPLE__ */ +#if !defined(__APPLE__) return (s != NULL && strcmp(s, p) == 0); +#else /* Employ size bounded string operation. */ + return (s != NULL && strncmp(s, p, strlen(s) + 1) == 0); +#endif /* __APPLE__ */ } /*ARGSUSED*/ static int dtrace_match_nul(const char *s, const char *p, int depth) { -#pragma unused(s,p,depth) +#pragma unused(s, p, depth) /* __APPLE__ */ return (1); /* always match the empty pattern */ } @@ -6551,7 +7723,7 @@ dtrace_match_nul(const char *s, const char *p, int depth) static int dtrace_match_nonzero(const char *s, const char *p, int depth) { -#pragma unused(p,depth) +#pragma unused(p, depth) /* __APPLE__ */ return (s != NULL && s[0] != '\0'); } @@ -6579,9 +7751,15 @@ dtrace_match(const dtrace_probekey_t *pkp, uint32_t priv, uid_t uid, return (nmatched); } +#if !defined(__APPLE__) /* Quiet compiler warnings */ template.dtpr_mod = (char *)pkp->dtpk_mod; template.dtpr_func = (char *)pkp->dtpk_func; template.dtpr_name = (char *)pkp->dtpk_name; +#else + template.dtpr_mod = (char *)(uintptr_t)pkp->dtpk_mod; + template.dtpr_func = (char *)(uintptr_t)pkp->dtpk_func; + template.dtpr_name = (char *)(uintptr_t)pkp->dtpk_name; +#endif /* __APPLE__ */ /* * We want to find the most distinct of the module name, function @@ -6612,7 +7790,11 @@ dtrace_match(const dtrace_probekey_t *pkp, uint32_t priv, uid_t uid, * invoke our callback for each one that matches our input probe key. */ if (hash == NULL) { +#if !defined(__APPLE__) /* Quiet compiler warning */ for (i = 0; i < dtrace_nprobes; i++) { +#else + for (i = 0; i < (dtrace_id_t)dtrace_nprobes; i++) { +#endif /* __APPLE__ */ if ((probe = dtrace_probes[i]) == NULL || dtrace_match_probe(probe, pkp, priv, uid, zoneid) <= 0) @@ -6765,8 +7947,16 @@ dtrace_register(const char *name, const dtrace_pattr_t *pap, uint32_t priv, } provider = kmem_zalloc(sizeof (dtrace_provider_t), KM_SLEEP); +#if !defined(__APPLE__) provider->dtpv_name = kmem_alloc(strlen(name) + 1, KM_SLEEP); (void) strcpy(provider->dtpv_name, name); +#else /* Employ size bounded string operation. */ + { + size_t bufsize = strlen(name) + 1; + provider->dtpv_name = kmem_alloc(bufsize, KM_SLEEP); + (void) strlcpy(provider->dtpv_name, name, bufsize); + } +#endif /* __APPLE__ */ provider->dtpv_attr = *pap; provider->dtpv_priv.dtpp_flags = priv; @@ -6870,7 +8060,6 @@ dtrace_unregister(dtrace_provider_id_t id) ASSERT(dtrace_devi != NULL); lck_mtx_assert(&dtrace_provider_lock, LCK_MTX_ASSERT_OWNED); lck_mtx_assert(&dtrace_lock, LCK_MTX_ASSERT_OWNED); - self = 1; if (dtrace_provider->dtpv_next != NULL) { @@ -7145,7 +8334,11 @@ dtrace_probe_create(dtrace_provider_id_t prov, const char *mod, dtrace_hash_add(dtrace_byfunc, probe); dtrace_hash_add(dtrace_byname, probe); +#if !defined(__APPLE__) /* Quiet compiler warning */ if (id - 1 >= dtrace_nprobes) { +#else + if (id - 1 >= (dtrace_id_t)dtrace_nprobes) { +#endif /* __APPLE__ */ size_t osize = dtrace_nprobes * sizeof (dtrace_probe_t *); size_t nsize = osize << 1; @@ -7178,7 +8371,11 @@ dtrace_probe_create(dtrace_provider_id_t prov, const char *mod, dtrace_nprobes <<= 1; } +#if !defined(__APPLE__) /* Quiet compiler warning */ ASSERT(id - 1 < dtrace_nprobes); +#else + ASSERT(id - 1 < (dtrace_id_t)dtrace_nprobes); +#endif /* __APPLE__ */ } ASSERT(dtrace_probes[id - 1] == NULL); @@ -7195,8 +8392,13 @@ dtrace_probe_lookup_id(dtrace_id_t id) { lck_mtx_assert(&dtrace_lock, LCK_MTX_ASSERT_OWNED); +#if !defined(__APPLE__) /* Quiet compiler warning */ if (id == 0 || id > dtrace_nprobes) return (NULL); +#else + if (id == 0 || id > (dtrace_id_t)dtrace_nprobes) + return (NULL); +#endif /* __APPLE__ */ return (dtrace_probes[id - 1]); } @@ -7269,12 +8471,21 @@ dtrace_probe_description(const dtrace_probe_t *prp, dtrace_probedesc_t *pdp) bzero(pdp, sizeof (dtrace_probedesc_t)); pdp->dtpd_id = prp->dtpr_id; +#if !defined(__APPLE__) + (void) strncpy(pdp->dtpd_provider, + prp->dtpr_provider->dtpv_name, DTRACE_PROVNAMELEN - 1); + + (void) strncpy(pdp->dtpd_mod, prp->dtpr_mod, DTRACE_MODNAMELEN - 1); + (void) strncpy(pdp->dtpd_func, prp->dtpr_func, DTRACE_FUNCNAMELEN - 1); + (void) strncpy(pdp->dtpd_name, prp->dtpr_name, DTRACE_NAMELEN - 1); +#else /* Employ size bounded string operation. */ (void) strlcpy(pdp->dtpd_provider, prp->dtpr_provider->dtpv_name, DTRACE_PROVNAMELEN); (void) strlcpy(pdp->dtpd_mod, prp->dtpr_mod, DTRACE_MODNAMELEN); (void) strlcpy(pdp->dtpd_func, prp->dtpr_func, DTRACE_FUNCNAMELEN); (void) strlcpy(pdp->dtpd_name, prp->dtpr_name, DTRACE_NAMELEN); +#endif /* __APPLE__ */ } /* @@ -7297,6 +8508,7 @@ dtrace_probe_provide(dtrace_probedesc_t *desc, dtrace_provider_t *prv) { struct modctl *ctl; int all = 0; +#pragma unused(ctl) /* __APPLE__ */ lck_mtx_assert(&dtrace_provider_lock, LCK_MTX_ASSERT_OWNED); @@ -7318,7 +8530,7 @@ dtrace_probe_provide(dtrace_probedesc_t *desc, dtrace_provider_t *prv) * that this also prevents the mod_busy bits from changing. * (mod_busy can only be changed with mod_lock held.) */ - lck_mtx_lock(&mod_lock); + mutex_enter(&mod_lock); ctl = &modules; do { @@ -7329,9 +8541,10 @@ dtrace_probe_provide(dtrace_probedesc_t *desc, dtrace_provider_t *prv) } while ((ctl = ctl->mod_next) != &modules); - lck_mtx_unlock(&mod_lock); + mutex_exit(&mod_lock); #else #if 0 /* FIXME: Workaround for PR_4643546 */ + /* NOTE: kmod_lock has been removed. */ simple_lock(&kmod_lock); kmod_info_t *ktl = kmod; @@ -7516,15 +8729,23 @@ dtrace_helper_provide_one(dof_helper_t *dhp, dof_sec_t *sec, pid_t pid) dhpb.dthpb_mod = dhp->dofhp_mod; dhpb.dthpb_func = strtab + probe->dofpr_func; dhpb.dthpb_name = strtab + probe->dofpr_name; -#if defined(__APPLE__) - dhpb.dthpb_base = dhp->dofhp_addr; -#else +#if !defined(__APPLE__) dhpb.dthpb_base = probe->dofpr_addr; +#else + dhpb.dthpb_base = dhp->dofhp_addr; /* FIXME: James, why? */ #endif +#if !defined(__APPLE__) /* Quiet compiler warning */ dhpb.dthpb_offs = off + probe->dofpr_offidx; +#else + dhpb.dthpb_offs = (int32_t *)(off + probe->dofpr_offidx); +#endif /* __APPLE__ */ dhpb.dthpb_noffs = probe->dofpr_noffs; if (enoff != NULL) { +#if !defined(__APPLE__) /* Quiet compiler warning */ dhpb.dthpb_enoffs = enoff + probe->dofpr_enoffidx; +#else + dhpb.dthpb_enoffs = (int32_t *)(enoff + probe->dofpr_enoffidx); +#endif /* __APPLE__ */ dhpb.dthpb_nenoffs = probe->dofpr_nenoffs; } else { dhpb.dthpb_enoffs = NULL; @@ -7545,7 +8766,11 @@ dtrace_helper_provide(dof_helper_t *dhp, pid_t pid) { uintptr_t daddr = (uintptr_t)dhp->dofhp_dof; dof_hdr_t *dof = (dof_hdr_t *)daddr; +#if !defined(__APPLE__) /* Quiet compiler warning */ int i; +#else + uint32_t i; +#endif /* __APPLE__ */ lck_mtx_assert(&dtrace_meta_lock, LCK_MTX_ASSERT_OWNED); @@ -7602,7 +8827,11 @@ dtrace_helper_provider_remove(dof_helper_t *dhp, pid_t pid) { uintptr_t daddr = (uintptr_t)dhp->dofhp_dof; dof_hdr_t *dof = (dof_hdr_t *)daddr; +#if !defined(__APPLE__) /* Quiet compiler warning */ int i; +#else + uint32_t i; +#endif /* __APPLE__ */ lck_mtx_assert(&dtrace_meta_lock, LCK_MTX_ASSERT_OWNED); @@ -7629,7 +8858,11 @@ dtrace_meta_register(const char *name, const dtrace_mops_t *mops, void *arg, { dtrace_meta_t *meta; dtrace_helpers_t *help, *next; +#if !defined(__APPLE__) /* Quiet compiler warning */ int i; +#else + uint_t i; +#endif /* __APPLE__ */ *idp = DTRACE_METAPROVNONE; @@ -7654,8 +8887,16 @@ dtrace_meta_register(const char *name, const dtrace_mops_t *mops, void *arg, meta = kmem_zalloc(sizeof (dtrace_meta_t), KM_SLEEP); meta->dtm_mops = *mops; +#if !defined(__APPLE__) meta->dtm_name = kmem_alloc(strlen(name) + 1, KM_SLEEP); (void) strcpy(meta->dtm_name, name); +#else /* Employ size bounded string operation. */ + { + size_t bufsize = strlen(name) + 1; + meta->dtm_name = kmem_alloc(bufsize, KM_SLEEP); + (void) strlcpy(meta->dtm_name, name, bufsize); + } +#endif /* __APPLE__ */ meta->dtm_arg = arg; lck_mtx_lock(&dtrace_meta_lock); @@ -7771,13 +9012,18 @@ static int dtrace_difo_validate(dtrace_difo_t *dp, dtrace_vstate_t *vstate, uint_t nregs, cred_t *cr) { +#if !defined(__APPLE__) /* Quiet compiler warnings */ int err = 0, i; - int (*efunc)(uint_t pc, const char *, ...) = dtrace_difo_err; - int kcheck; - uint_t pc; - - kcheck = cr == NULL || - PRIV_POLICY_ONLY(cr, PRIV_DTRACE_KERNEL, B_FALSE) == 0; +#else + int err = 0; + uint_t i; +#endif /* __APPLE__ */ + int (*efunc)(uint_t pc, const char *, ...) = dtrace_difo_err; + int kcheckload; + uint_t pc; + + kcheckload = cr == NULL || + (vstate->dtvs_state->dts_cred.dcr_visible & DTRACE_CRV_KERNEL) == 0; dp->dtdo_destructive = 0; @@ -7845,7 +9091,7 @@ dtrace_difo_validate(dtrace_difo_t *dp, dtrace_vstate_t *vstate, uint_t nregs, err += efunc(pc, "invalid register %u\n", rd); if (rd == 0) err += efunc(pc, "cannot write to %r0\n"); - if (kcheck) + if (kcheckload) dp->dtdo_buf[pc] = DIF_INSTR_LOAD(op + DIF_OP_RLDSB - DIF_OP_LDSB, r1, rd); break; @@ -8056,7 +9302,12 @@ dtrace_difo_validate(dtrace_difo_t *dp, dtrace_vstate_t *vstate, uint_t nregs, for (i = 0; i < dp->dtdo_varlen && err == 0; i++) { dtrace_difv_t *v = &dp->dtdo_vartab[i], *existing = NULL; dtrace_diftype_t *vt, *et; +#if !defined(__APPLE__) /* Quiet compiler warnings */ uint_t id, ndx; +#else + uint_t id; + int ndx; +#endif /* __APPLE__ */ if (v->dtdv_scope != DIFV_SCOPE_GLOBAL && v->dtdv_scope != DIFV_SCOPE_THREAD && @@ -8281,13 +9532,24 @@ dtrace_difo_validate_helper(dtrace_difo_t *dp) subr == DIF_SUBR_COPYINTO || subr == DIF_SUBR_COPYINSTR || subr == DIF_SUBR_INDEX || + subr == DIF_SUBR_INET_NTOA || + subr == DIF_SUBR_INET_NTOA6 || + subr == DIF_SUBR_INET_NTOP || subr == DIF_SUBR_LLTOSTR || subr == DIF_SUBR_RINDEX || subr == DIF_SUBR_STRCHR || subr == DIF_SUBR_STRJOIN || subr == DIF_SUBR_STRRCHR || subr == DIF_SUBR_STRSTR || - subr == DIF_SUBR_CHUD) +#if defined(__APPLE__) + subr == DIF_SUBR_COREPROFILE || +#endif /* __APPLE__ */ + subr == DIF_SUBR_HTONS || + subr == DIF_SUBR_HTONL || + subr == DIF_SUBR_HTONLL || + subr == DIF_SUBR_NTOHS || + subr == DIF_SUBR_NTOHL || + subr == DIF_SUBR_NTOHLL) break; err += efunc(pc, "invalid subr %u\n", subr); @@ -8309,7 +9571,11 @@ dtrace_difo_validate_helper(dtrace_difo_t *dp) static int dtrace_difo_cacheable(dtrace_difo_t *dp) { +#if !defined(__APPLE__) /* Quiet compiler warnings */ int i; +#else + uint_t i; +#endif /* __APPLE__ */ if (dp == NULL) return (0); @@ -8354,7 +9620,11 @@ dtrace_difo_cacheable(dtrace_difo_t *dp) static void dtrace_difo_hold(dtrace_difo_t *dp) { +#if !defined(__APPLE__) /* Quiet compiler warnings */ int i; +#else + uint_t i; +#endif /* __APPLE__ */ lck_mtx_assert(&dtrace_lock, LCK_MTX_ASSERT_OWNED); @@ -8386,7 +9656,11 @@ dtrace_difo_hold(dtrace_difo_t *dp) static void dtrace_difo_chunksize(dtrace_difo_t *dp, dtrace_vstate_t *vstate) { +#if !defined(__APPLE__) /* Quiet compiler warnings */ uint64_t sval; +#else + uint64_t sval = 0; +#endif /* __APPLE__ */ dtrace_key_t tupregs[DIF_DTR_NREGS + 2]; /* +2 for thread and id */ const dif_instr_t *text = dp->dtdo_buf; uint_t pc, srd = 0; @@ -8521,18 +9795,28 @@ dtrace_difo_chunksize(dtrace_difo_t *dp, dtrace_vstate_t *vstate) static void dtrace_difo_init(dtrace_difo_t *dp, dtrace_vstate_t *vstate) { +#if !defined(__APPLE__) /* Quiet compiler warnings */ int i, oldsvars, osz, nsz, otlocals, ntlocals; uint_t id; +#else + int oldsvars, osz, nsz, otlocals, ntlocals; + uint_t i, id; +#endif /* __APPLE__ */ lck_mtx_assert(&dtrace_lock, LCK_MTX_ASSERT_OWNED); ASSERT(dp->dtdo_buf != NULL && dp->dtdo_len != 0); for (i = 0; i < dp->dtdo_varlen; i++) { dtrace_difv_t *v = &dp->dtdo_vartab[i]; +#if !defined(__APPLE__) /* Quiet compiler warnings */ dtrace_statvar_t *svar, ***svarp; +#else + dtrace_statvar_t *svar; + dtrace_statvar_t ***svarp = NULL; +#endif /* __APPLE__ */ size_t dsize = 0; uint8_t scope = v->dtdv_scope; - int *np; + int *np = (int *)NULL; if ((id = v->dtdv_id) < DIF_VAR_OTHER_UBASE) continue; @@ -8541,7 +9825,11 @@ dtrace_difo_init(dtrace_difo_t *dp, dtrace_vstate_t *vstate) switch (scope) { case DIFV_SCOPE_THREAD: +#if !defined(__APPLE__) /* Quiet compiler warnings */ while (id >= (otlocals = vstate->dtvs_ntlocals)) { +#else + while (id >= (uint_t)(otlocals = vstate->dtvs_ntlocals)) { +#endif /* __APPLE__ */ dtrace_difv_t *tlocals; if ((ntlocals = (otlocals << 1)) == 0) @@ -8591,7 +9879,11 @@ dtrace_difo_init(dtrace_difo_t *dp, dtrace_vstate_t *vstate) ASSERT(0); } +#if !defined(__APPLE__) /* Quiet compiler warnings */ while (id >= (oldsvars = *np)) { +#else + while (id >= (uint_t)(oldsvars = *np)) { +#endif /* __APPLE__ */ dtrace_statvar_t **statics; int newsvars, oldsize, newsize; @@ -8678,16 +9970,28 @@ dtrace_difo_duplicate(dtrace_difo_t *dp, dtrace_vstate_t *vstate) static void dtrace_difo_destroy(dtrace_difo_t *dp, dtrace_vstate_t *vstate) { +#if !defined(__APPLE__) /* Quiet compiler warnings */ int i; +#else + uint_t i; +#endif /* __APPLE__ */ ASSERT(dp->dtdo_refcnt == 0); for (i = 0; i < dp->dtdo_varlen; i++) { dtrace_difv_t *v = &dp->dtdo_vartab[i]; +#if !defined(__APPLE__) /* Quiet compiler warnings */ dtrace_statvar_t *svar, **svarp; uint_t id; uint8_t scope = v->dtdv_scope; - int *np; + int *np; +#else + dtrace_statvar_t *svar; + dtrace_statvar_t **svarp = NULL; + uint_t id; + uint8_t scope = v->dtdv_scope; + int *np = NULL; +#endif /* __APPLE__ */ switch (scope) { case DIFV_SCOPE_THREAD: @@ -8711,7 +10015,12 @@ dtrace_difo_destroy(dtrace_difo_t *dp, dtrace_vstate_t *vstate) continue; id -= DIF_VAR_OTHER_UBASE; + +#if !defined(__APPLE__) /* Quiet compiler warnings */ ASSERT(id < *np); +#else + ASSERT(id < (uint_t)*np); +#endif /* __APPLE__ */ svar = svarp[id]; ASSERT(svar != NULL); @@ -8741,7 +10050,11 @@ dtrace_difo_destroy(dtrace_difo_t *dp, dtrace_vstate_t *vstate) static void dtrace_difo_release(dtrace_difo_t *dp, dtrace_vstate_t *vstate) { +#if !defined(__APPLE__) /* Quiet compiler warnings */ int i; +#else + uint_t i; +#endif /* __APPLE__ */ lck_mtx_assert(&dtrace_lock, LCK_MTX_ASSERT_OWNED); ASSERT(dp->dtdo_refcnt != 0); @@ -8897,6 +10210,7 @@ static void dtrace_predicate_release(dtrace_predicate_t *pred, dtrace_vstate_t *vstate) { dtrace_difo_t *dp = pred->dtp_difo; +#pragma unused(dp) /* __APPLE__ */ lck_mtx_assert(&dtrace_lock, LCK_MTX_ASSERT_OWNED); ASSERT(dp != NULL && dp->dtdo_refcnt != 0); @@ -8917,8 +10231,8 @@ dtrace_actdesc_create(dtrace_actkind_t kind, uint32_t ntuple, { dtrace_actdesc_t *act; -/* ASSERT(!DTRACEACT_ISPRINTFLIKE(kind) || (arg != NULL && - arg >= KERNELBASE) || (arg == NULL && kind == DTRACEACT_PRINTA));*/ + ASSERT(!DTRACEACT_ISPRINTFLIKE(kind) || (arg != NULL && + arg >= KERNELBASE) || (arg == NULL && kind == DTRACEACT_PRINTA)); act = kmem_zalloc(sizeof (dtrace_actdesc_t), KM_SLEEP); act->dtad_kind = kind; @@ -8954,8 +10268,8 @@ dtrace_actdesc_release(dtrace_actdesc_t *act, dtrace_vstate_t *vstate) if (DTRACEACT_ISPRINTFLIKE(kind)) { char *str = (char *)(uintptr_t)act->dtad_arg; -/* ASSERT((str != NULL && (uintptr_t)str >= KERNELBASE) || - (str == NULL && act->dtad_kind == DTRACEACT_PRINTA));*/ + ASSERT((str != NULL && (uintptr_t)str >= KERNELBASE) || + (str == NULL && act->dtad_kind == DTRACEACT_PRINTA)); if (str != NULL) kmem_free(str, strlen(str) + 1); @@ -8988,11 +10302,19 @@ dtrace_ecb_add(dtrace_state_t *state, dtrace_probe_t *probe) epid = state->dts_epid++; +#if !defined(__APPLE__) /* Quiet compiler warnings */ if (epid - 1 >= state->dts_necbs) { +#else + if (epid - 1 >= (dtrace_epid_t)state->dts_necbs) { +#endif /* __APPLE__ */ dtrace_ecb_t **oecbs = state->dts_ecbs, **ecbs; int necbs = state->dts_necbs << 1; +#if !defined(__APPLE__) /* Quiet compiler warnings */ ASSERT(epid == state->dts_necbs + 1); +#else + ASSERT(epid == (dtrace_epid_t)state->dts_necbs + 1); +#endif /* __APPLE__ */ if (necbs == 0) { ASSERT(oecbs == NULL); @@ -9217,11 +10539,12 @@ dtrace_ecb_aggregation_create(dtrace_ecb_t *ecb, dtrace_actdesc_t *desc) switch (desc->dtad_kind) { case DTRACEAGG_MIN: - agg->dtag_initial = UINT64_MAX; + agg->dtag_initial = INT64_MAX; agg->dtag_aggregate = dtrace_aggregate_min; break; case DTRACEAGG_MAX: + agg->dtag_initial = INT64_MIN; agg->dtag_aggregate = dtrace_aggregate_max; break; @@ -9254,6 +10577,11 @@ dtrace_ecb_aggregation_create(dtrace_ecb_t *ecb, dtrace_actdesc_t *desc) size = sizeof (uint64_t) * 2; break; + case DTRACEAGG_STDDEV: + agg->dtag_aggregate = dtrace_aggregate_stddev; + size = sizeof (uint64_t) * 4; + break; + case DTRACEAGG_SUM: agg->dtag_aggregate = dtrace_aggregate_sum; break; @@ -9312,13 +10640,21 @@ dtrace_ecb_aggregation_create(dtrace_ecb_t *ecb, dtrace_actdesc_t *desc) aggid = (dtrace_aggid_t)(uintptr_t)vmem_alloc(state->dts_aggid_arena, 1, VM_BESTFIT | VM_SLEEP); +#if !defined(__APPLE__) /* Quiet compiler warnings */ if (aggid - 1 >= state->dts_naggregations) { +#else + if (aggid - 1 >= (dtrace_aggid_t)state->dts_naggregations) { +#endif /* __APPLE__ */ dtrace_aggregation_t **oaggs = state->dts_aggregations; dtrace_aggregation_t **aggs; int naggs = state->dts_naggregations << 1; int onaggs = state->dts_naggregations; +#if !defined(__APPLE__) /* Quiet compiler warnings */ ASSERT(aggid == state->dts_naggregations + 1); +#else + ASSERT(aggid == (dtrace_aggid_t)state->dts_naggregations + 1); +#endif /* __APPLE */ if (naggs == 0) { ASSERT(oaggs == NULL); @@ -9376,7 +10712,12 @@ dtrace_ecb_action_add(dtrace_ecb_t *ecb, dtrace_actdesc_t *desc) uint16_t format = 0; dtrace_recdesc_t *rec; dtrace_state_t *state = ecb->dte_state; +#if !defined(__APPLE__) /* Quiet compiler warnings */ dtrace_optval_t *opt = state->dts_options, nframes, strsize; +#else + dtrace_optval_t *opt = state->dts_options; + dtrace_optval_t nframes=0, strsize; +#endif /* __APPLE__ */ uint64_t arg = desc->dtad_arg; lck_mtx_assert(&dtrace_lock, LCK_MTX_ASSERT_OWNED); @@ -9422,7 +10763,7 @@ dtrace_ecb_action_add(dtrace_ecb_t *ecb, dtrace_actdesc_t *desc) format = 0; } else { ASSERT(arg != NULL); - /* ASSERT(arg > KERNELBASE); */ + ASSERT(arg > KERNELBASE); format = dtrace_format_add(state, (char *)(uintptr_t)arg); } @@ -9430,6 +10771,9 @@ dtrace_ecb_action_add(dtrace_ecb_t *ecb, dtrace_actdesc_t *desc) /*FALLTHROUGH*/ case DTRACEACT_LIBACT: case DTRACEACT_DIFEXPR: +#if defined(__APPLE__) + case DTRACEACT_APPLEBINARY: +#endif /* __APPLE__ */ if (dp == NULL) return (EINVAL); @@ -9860,10 +11204,15 @@ static dtrace_ecb_t * dtrace_epid2ecb(dtrace_state_t *state, dtrace_epid_t id) { dtrace_ecb_t *ecb; +#pragma unused(ecb) /* __APPLE__ */ lck_mtx_assert(&dtrace_lock, LCK_MTX_ASSERT_OWNED); +#if !defined(__APPLE__) /* Quiet compiler warnings */ if (id == 0 || id > state->dts_necbs) +#else + if (id == 0 || id > (dtrace_epid_t)state->dts_necbs) +#endif /* __APPLE__ */ return (NULL); ASSERT(state->dts_necbs > 0 && state->dts_ecbs != NULL); @@ -9876,10 +11225,15 @@ static dtrace_aggregation_t * dtrace_aggid2agg(dtrace_state_t *state, dtrace_aggid_t id) { dtrace_aggregation_t *agg; +#pragma unused(agg) /* __APPLE__ */ lck_mtx_assert(&dtrace_lock, LCK_MTX_ASSERT_OWNED); +#if !defined(__APPLE__) /* Quiet compiler warnings */ if (id == 0 || id > state->dts_naggregations) +#else + if (id == 0 || id > (dtrace_aggid_t)state->dts_naggregations) +#endif /* __APPLE__ */ return (NULL); ASSERT(state->dts_naggregations > 0 && state->dts_aggregations != NULL); @@ -9965,9 +11319,16 @@ dtrace_buffer_alloc(dtrace_buffer_t *bufs, size_t size, int flags, lck_mtx_assert(&cpu_lock, LCK_MTX_ASSERT_OWNED); lck_mtx_assert(&dtrace_lock, LCK_MTX_ASSERT_OWNED); +#if !defined(__APPLE__) /* Quiet compiler warnings */ if (size > dtrace_nonroot_maxsize && !PRIV_POLICY_CHOICE(CRED(), PRIV_ALL, B_FALSE)) return (EFBIG); +#else + if (size > (size_t)dtrace_nonroot_maxsize && + !PRIV_POLICY_CHOICE(CRED(), PRIV_ALL, B_FALSE)) + return (EFBIG); +#endif /* __APPLE__ */ + #if defined(__APPLE__) if (size > (sane_size / 8) / (int)NCPU) /* As in kdbg_set_nkdbufs(), roughly. */ @@ -10087,7 +11448,11 @@ dtrace_buffer_reserve(dtrace_buffer_t *buf, size_t needed, size_t align, offs += sizeof (uint32_t); } +#if !defined(__APPLE__) /* Quiet compiler warnings */ if ((soffs = offs + needed) > buf->dtb_size) { +#else + if ((uint64_t)(soffs = offs + needed) > buf->dtb_size) { +#endif /* __APPLE__ */ dtrace_buffer_drop(buf); return (-1); } @@ -10158,7 +11523,11 @@ dtrace_buffer_reserve(dtrace_buffer_t *buf, size_t needed, size_t align, * there. We need to clear the buffer from the current * offset to the end (there may be old gunk there). */ +#if !defined(__APPLE__) /* Quiet compiler warnings */ while (offs < buf->dtb_size) +#else + while ((uint64_t)offs < buf->dtb_size) +#endif /* __APPLE__ */ tomax[offs++] = 0; /* @@ -10195,14 +11564,22 @@ dtrace_buffer_reserve(dtrace_buffer_t *buf, size_t needed, size_t align, } } +#if !defined(__APPLE__) /* Quiet compiler warnings */ while (offs + total_off > woffs) { +#else + while (offs + total_off > (size_t)woffs) { +#endif /* __APPLE__ */ dtrace_epid_t epid = *(uint32_t *)(tomax + woffs); size_t size; if (epid == DTRACE_EPIDNONE) { size = sizeof (uint32_t); } else { +#if !defined(__APPLE__) /* Quiet compiler warnings */ ASSERT(epid <= state->dts_necbs); +#else + ASSERT(epid <= (dtrace_epid_t)state->dts_necbs); +#endif /* __APPLE__ */ ASSERT(state->dts_ecbs[epid - 1] != NULL); size = state->dts_ecbs[epid - 1]->dte_size; @@ -10237,7 +11614,12 @@ dtrace_buffer_reserve(dtrace_buffer_t *buf, size_t needed, size_t align, buf->dtb_offset = 0; woffs = total_off; +#if !defined(__APPLE__) /* Quiet compiler warnings */ while (woffs < buf->dtb_size) +#else + while ((uint64_t)woffs < buf->dtb_size) +#endif /* __APPLE__ */ + tomax[woffs++] = 0; } @@ -10391,7 +11773,7 @@ dtrace_enabling_add(dtrace_enabling_t *enab, dtrace_ecbdesc_t *ecb) ASSERT(enab->dten_next == NULL && enab->dten_prev == NULL); #if defined(__APPLE__) - if (ecb == NULL) return; /* XXX protection against gcc 4.0 botch on x86 */ + if (ecb == NULL) return; /* Note: protection against gcc 4.0 botch on x86 */ #endif /* __APPLE__ */ if (enab->dten_ndesc < enab->dten_maxdesc) { @@ -10498,6 +11880,7 @@ dtrace_enabling_destroy(dtrace_enabling_t *enab) ASSERT(enab->dten_vstate->dtvs_state != NULL); ASSERT(enab->dten_vstate->dtvs_state->dts_nretained > 0); enab->dten_vstate->dtvs_state->dts_nretained--; + dtrace_retained_gen++; } if (enab->dten_prev == NULL) { @@ -10540,6 +11923,7 @@ dtrace_enabling_retain(dtrace_enabling_t *enab) return (ENOSPC); state->dts_nretained++; + dtrace_retained_gen++; if (dtrace_retained == NULL) { dtrace_retained = enab; @@ -10592,6 +11976,7 @@ dtrace_enabling_replicate(dtrace_state_t *state, dtrace_probedesc_t *match, dtrace_ecbdesc_t *ep = enab->dten_desc[i]; dtrace_probedesc_t *pd = &ep->dted_probe; +#if !defined(__APPLE__) if (strcmp(pd->dtpd_provider, match->dtpd_provider)) continue; @@ -10603,6 +11988,19 @@ dtrace_enabling_replicate(dtrace_state_t *state, dtrace_probedesc_t *match, if (strcmp(pd->dtpd_name, match->dtpd_name)) continue; +#else /* Employ size bounded string operation. */ + if (strncmp(pd->dtpd_provider, match->dtpd_provider, DTRACE_PROVNAMELEN)) + continue; + + if (strncmp(pd->dtpd_mod, match->dtpd_mod, DTRACE_MODNAMELEN)) + continue; + + if (strncmp(pd->dtpd_func, match->dtpd_func, DTRACE_FUNCNAMELEN)) + continue; + + if (strncmp(pd->dtpd_name, match->dtpd_name, DTRACE_NAMELEN)) + continue; +#endif /* __APPLE__ */ /* * We have a winning probe! Add it to our growing @@ -10707,43 +12105,29 @@ dtrace_enabling_matchall(void) lck_mtx_lock(&dtrace_lock); /* - * Because we can be called after dtrace_detach() has been called, we - * cannot assert that there are retained enablings. We can safely - * load from dtrace_retained, however: the taskq_destroy() at the - * end of dtrace_detach() will block pending our completion. + * Iterate over all retained enablings to see if any probes match + * against them. We only perform this operation on enablings for which + * we have sufficient permissions by virtue of being in the global zone + * or in the same zone as the DTrace client. Because we can be called + * after dtrace_detach() has been called, we cannot assert that there + * are retained enablings. We can safely load from dtrace_retained, + * however: the taskq_destroy() at the end of dtrace_detach() will + * block pending our completion. */ - for (enab = dtrace_retained; enab != NULL; enab = enab->dten_next) - (void) dtrace_enabling_match(enab, NULL); - - lck_mtx_unlock(&dtrace_lock); - lck_mtx_unlock(&cpu_lock); -} - -static int -dtrace_enabling_matchstate(dtrace_state_t *state, int *nmatched) -{ - dtrace_enabling_t *enab; - int matched, total_matched = 0, err; - - lck_mtx_assert(&cpu_lock, LCK_MTX_ASSERT_OWNED); - lck_mtx_assert(&dtrace_lock, LCK_MTX_ASSERT_OWNED); - for (enab = dtrace_retained; enab != NULL; enab = enab->dten_next) { - ASSERT(enab->dten_vstate->dtvs_state != NULL); - - if (enab->dten_vstate->dtvs_state != state) - continue; - - if ((err = dtrace_enabling_match(enab, &matched)) != 0) - return (err); +#if !defined(__APPLE__) + cred_t *cr = enab->dten_vstate->dtvs_state->dts_cred.dcr_cred; - total_matched += matched; + if (INGLOBALZONE(curproc) || + cr != NULL && getzoneid() == crgetzoneid(cr)) + (void) dtrace_enabling_match(enab, NULL); +#else + (void) dtrace_enabling_match(enab, NULL); /* As if always in "global" zone." */ +#endif /* __APPLE__ */ } - if (nmatched != NULL) - *nmatched = total_matched; - - return (0); + lck_mtx_unlock(&dtrace_lock); + lck_mtx_unlock(&cpu_lock); } /* @@ -10798,6 +12182,7 @@ dtrace_enabling_provide(dtrace_provider_t *prv) { int i, all = 0; dtrace_probedesc_t desc; + dtrace_genid_t gen; lck_mtx_assert(&dtrace_lock, LCK_MTX_ASSERT_OWNED); lck_mtx_assert(&dtrace_provider_lock, LCK_MTX_ASSERT_OWNED); @@ -10808,15 +12193,25 @@ dtrace_enabling_provide(dtrace_provider_t *prv) } do { - dtrace_enabling_t *enab = dtrace_retained; + dtrace_enabling_t *enab; void *parg = prv->dtpv_arg; - for (; enab != NULL; enab = enab->dten_next) { +retry: + gen = dtrace_retained_gen; + for (enab = dtrace_retained; enab != NULL; + enab = enab->dten_next) { for (i = 0; i < enab->dten_ndesc; i++) { desc = enab->dten_desc[i]->dted_probe; lck_mtx_unlock(&dtrace_lock); prv->dtpv_pops.dtps_provide(parg, &desc); lck_mtx_lock(&dtrace_lock); + /* + * Process the retained enablings again if + * they have changed while we weren't holding + * dtrace_lock. + */ + if (gen != dtrace_retained_gen) + goto retry; } } } while (all && (prv = prv->dtpv_next) != NULL); @@ -10833,7 +12228,7 @@ dtrace_enabling_provide(dtrace_provider_t *prv) static void dtrace_dof_error(dof_hdr_t *dof, const char *str) { -#pragma unused(dof) +#pragma unused(dof) /* __APPLE__ */ if (dtrace_err_verbose) cmn_err(CE_WARN, "failed to process DOF: %s", str); @@ -10859,7 +12254,11 @@ dtrace_dof_create(dtrace_state_t *state) lck_mtx_assert(&dtrace_lock, LCK_MTX_ASSERT_OWNED); +#if !defined(__APPLE__) + dof = kmem_zalloc(len, KM_SLEEP); +#else dof = dt_kmem_zalloc_aligned(len, 8, KM_SLEEP); +#endif /* __APPLE__ */ dof->dofh_ident[DOF_ID_MAG0] = DOF_MAG_MAG0; dof->dofh_ident[DOF_ID_MAG1] = DOF_MAG_MAG1; dof->dofh_ident[DOF_ID_MAG2] = DOF_MAG_MAG2; @@ -10906,10 +12305,10 @@ dtrace_dof_create(dtrace_state_t *state) } static dof_hdr_t * -#if defined(__APPLE__) -dtrace_dof_copyin(user_addr_t uarg, int *errp) -#else +#if !defined(__APPLE__) dtrace_dof_copyin(uintptr_t uarg, int *errp) +#else +dtrace_dof_copyin(user_addr_t uarg, int *errp) #endif { dof_hdr_t hdr, *dof; @@ -10919,10 +12318,10 @@ dtrace_dof_copyin(uintptr_t uarg, int *errp) /* * First, we're going to copyin() the sizeof (dof_hdr_t). */ -#if defined(__APPLE__) - if (copyin(uarg, &hdr, sizeof (hdr)) != 0) { -#else +#if !defined(__APPLE__) if (copyin((void *)uarg, &hdr, sizeof (hdr)) != 0) { +#else + if (copyin(uarg, &hdr, sizeof (hdr)) != 0) { #endif dtrace_dof_error(NULL, "failed to copyin DOF header"); *errp = EFAULT; @@ -10933,7 +12332,11 @@ dtrace_dof_copyin(uintptr_t uarg, int *errp) * Now we'll allocate the entire DOF and copy it in -- provided * that the length isn't outrageous. */ +#if !defined(__APPLE__) /* Quiet compiler warnings */ if (hdr.dofh_loadsz >= dtrace_dof_maxsize) { +#else + if (hdr.dofh_loadsz >= (uint64_t)dtrace_dof_maxsize) { +#endif /* __APPLE__ */ dtrace_dof_error(&hdr, "load size exceeds maximum"); *errp = E2BIG; return (NULL); @@ -10945,12 +12348,14 @@ dtrace_dof_copyin(uintptr_t uarg, int *errp) return (NULL); } +#if !defined(__APPLE__) + dof = kmem_alloc(hdr.dofh_loadsz, KM_SLEEP); + + if (copyin((void *)uarg, dof, hdr.dofh_loadsz) != 0) { +#else dof = dt_kmem_alloc_aligned(hdr.dofh_loadsz, 8, KM_SLEEP); -#if defined(__APPLE__) if (copyin(uarg, dof, hdr.dofh_loadsz) != 0) { -#else - if (copyin((void *)uarg, dof, hdr.dofh_loadsz) != 0) { #endif dt_kmem_free_aligned(dof, hdr.dofh_loadsz); *errp = EFAULT; @@ -10982,7 +12387,7 @@ dtrace_dof_copyin_from_proc(proc_t* p, user_addr_t uarg, int *errp) * Now we'll allocate the entire DOF and copy it in -- provided * that the length isn't outrageous. */ - if (hdr.dofh_loadsz >= dtrace_dof_maxsize) { + if (hdr.dofh_loadsz >= (uint64_t)dtrace_dof_maxsize) { dtrace_dof_error(&hdr, "load size exceeds maximum"); *errp = E2BIG; return (NULL); @@ -11020,9 +12425,15 @@ dtrace_dof_property(const char *name) * only) interpreted to be integer arrays. We must read our DOF * as an integer array, and then squeeze it into a byte array. */ +#if !defined(__APPLE__) /* Quiet compiler warnings */ if (ddi_prop_lookup_int_array(DDI_DEV_T_ANY, dtrace_devi, 0, (char *)name, (int **)&buf, &len) != DDI_PROP_SUCCESS) return (NULL); +#else + if (ddi_prop_lookup_int_array(DDI_DEV_T_ANY, dtrace_devi, 0, + name, (int **)&buf, &len) != DDI_PROP_SUCCESS) + return (NULL); +#endif /* __APPLE__ */ for (i = 0; i < len; i++) buf[i] = (uchar_t)(((int *)buf)[i]); @@ -11039,13 +12450,21 @@ dtrace_dof_property(const char *name) return (NULL); } +#if !defined(__APPLE__) /* Quiet compiler warnings */ if (loadsz >= dtrace_dof_maxsize) { +#else + if (loadsz >= (uint64_t)dtrace_dof_maxsize) { +#endif /* __APPLE__ */ ddi_prop_free(buf); dtrace_dof_error(NULL, "oversized DOF"); return (NULL); } +#if !defined(__APPLE__) + dof = kmem_alloc(loadsz, KM_SLEEP); +#else dof = dt_kmem_alloc_aligned(loadsz, 8, KM_SLEEP); +#endif /* __APPLE__ */ bcopy(buf, dof, loadsz); ddi_prop_free(buf); @@ -11055,7 +12474,11 @@ dtrace_dof_property(const char *name) static void dtrace_dof_destroy(dof_hdr_t *dof) { +#if !defined(__APPLE__) + kmem_free(dof, dof->dofh_loadsz); +#else dt_kmem_free_aligned(dof, dof->dofh_loadsz); +#endif /* __APPLE__ */ } /* @@ -11129,6 +12552,9 @@ dtrace_dof_probedesc(dof_hdr_t *dof, dof_sec_t *sec, dtrace_probedesc_t *desc) (void) strncpy(desc->dtpd_provider, (char *)(str + probe->dofp_provider), MIN(DTRACE_PROVNAMELEN - 1, size - probe->dofp_provider)); +#if defined(__APPLE__) /* Employ size bounded string operation. */ + desc->dtpd_provider[DTRACE_PROVNAMELEN - 1] = '\0'; +#endif /* __APPLE__ */ if (probe->dofp_mod >= strtab->dofs_size) { dtrace_dof_error(dof, "corrupt probe module"); @@ -11137,6 +12563,9 @@ dtrace_dof_probedesc(dof_hdr_t *dof, dof_sec_t *sec, dtrace_probedesc_t *desc) (void) strncpy(desc->dtpd_mod, (char *)(str + probe->dofp_mod), MIN(DTRACE_MODNAMELEN - 1, size - probe->dofp_mod)); +#if defined(__APPLE__) /* Employ size bounded string operation. */ + desc->dtpd_mod[DTRACE_MODNAMELEN - 1] = '\0'; +#endif /* __APPLE__ */ if (probe->dofp_func >= strtab->dofs_size) { dtrace_dof_error(dof, "corrupt probe function"); @@ -11145,6 +12574,9 @@ dtrace_dof_probedesc(dof_hdr_t *dof, dof_sec_t *sec, dtrace_probedesc_t *desc) (void) strncpy(desc->dtpd_func, (char *)(str + probe->dofp_func), MIN(DTRACE_FUNCNAMELEN - 1, size - probe->dofp_func)); +#if defined(__APPLE__) /* Employ size bounded string operation. */ + desc->dtpd_func[DTRACE_FUNCNAMELEN - 1] = '\0'; +#endif /* __APPLE__ */ if (probe->dofp_name >= strtab->dofs_size) { dtrace_dof_error(dof, "corrupt probe name"); @@ -11153,6 +12585,9 @@ dtrace_dof_probedesc(dof_hdr_t *dof, dof_sec_t *sec, dtrace_probedesc_t *desc) (void) strncpy(desc->dtpd_name, (char *)(str + probe->dofp_name), MIN(DTRACE_NAMELEN - 1, size - probe->dofp_name)); +#if defined(__APPLE__) /* Employ size bounded string operation. */ + desc->dtpd_name[DTRACE_NAMELEN - 1] = '\0'; +#endif /* __APPLE__ */ return (desc); } @@ -11166,7 +12601,13 @@ dtrace_dof_difo(dof_hdr_t *dof, dof_sec_t *sec, dtrace_vstate_t *vstate, dof_difohdr_t *dofd; uintptr_t daddr = (uintptr_t)dof; size_t max_size = dtrace_difo_maxsize; +#if !defined(__APPLE__) /* Quiet compiler warnings */ int i, l, n; +#else + uint_t i; + int l, n; +#endif /* __APPLE__ */ + static const struct { int section; @@ -11238,18 +12679,31 @@ dtrace_dof_difo(dof_hdr_t *dof, dof_sec_t *sec, dtrace_vstate_t *vstate, ttl += subsec->dofs_size; for (i = 0; difo[i].section != DOF_SECT_NONE; i++) { + +#if !defined(__APPLE__) /* Quiet compiler warnings */ if (subsec->dofs_type != difo[i].section) continue; +#else + if (subsec->dofs_type != (uint32_t)difo[i].section) + continue; +#endif /* __APPLE __ */ if (!(subsec->dofs_flags & DOF_SECF_LOAD)) { dtrace_dof_error(dof, "section not loaded"); goto err; } +#if !defined(__APPLE__) /* Quiet compiler warnings */ if (subsec->dofs_align != difo[i].align) { dtrace_dof_error(dof, "bad alignment"); goto err; } +#else + if (subsec->dofs_align != (uint32_t)difo[i].align) { + dtrace_dof_error(dof, "bad alignment"); + goto err; + } +#endif /* __APPLE__ */ bufp = (void **)((uintptr_t)dp + difo[i].bufoffs); lenp = (uint32_t *)((uintptr_t)dp + difo[i].lenoffs); @@ -11259,10 +12713,17 @@ dtrace_dof_difo(dof_hdr_t *dof, dof_sec_t *sec, dtrace_vstate_t *vstate, goto err; } +#if !defined(__APPLE__) /* Quiet compiler warnings */ if (difo[i].entsize != subsec->dofs_entsize) { dtrace_dof_error(dof, "entry size mismatch"); goto err; } +#else + if ((uint32_t)difo[i].entsize != subsec->dofs_entsize) { + dtrace_dof_error(dof, "entry size mismatch"); + goto err; + } +#endif /* __APPLE__ */ if (subsec->dofs_entsize != 0 && (subsec->dofs_size % subsec->dofs_entsize) != 0) { @@ -11291,7 +12752,7 @@ dtrace_dof_difo(dof_hdr_t *dof, dof_sec_t *sec, dtrace_vstate_t *vstate, goto err; } } - + if (dp->dtdo_buf == NULL) { /* * We can't have a DIF object without DIF text. @@ -11619,6 +13080,7 @@ static int dtrace_dof_slurp(dof_hdr_t *dof, dtrace_vstate_t *vstate, cred_t *cr, dtrace_enabling_t **enabp, uint64_t ubase, int noprobes) { +#pragma unused(ubase) /* __APPLE__ */ uint64_t len = dof->dofh_loadsz, seclen; uintptr_t daddr = (uintptr_t)dof; dtrace_ecbdesc_t *ep; @@ -11769,11 +13231,6 @@ dtrace_dof_slurp(dof_hdr_t *dof, dtrace_vstate_t *vstate, cred_t *cr, } #if !defined(__APPLE__) - /* - * APPLE NOTE: We have no relocation to perform. All dof values are - * relative offsets. - */ - /* * Take a second pass through the sections and locate and perform any * relocations that are present. We do this after the first pass to @@ -11793,6 +13250,11 @@ dtrace_dof_slurp(dof_hdr_t *dof, dtrace_vstate_t *vstate, cred_t *cr, break; } } +#else + /* + * APPLE NOTE: We have no relocation to perform. All dof values are + * relative offsets. + */ #endif /* __APPLE__ */ if ((enab = *enabp) == NULL) @@ -11812,7 +13274,7 @@ dtrace_dof_slurp(dof_hdr_t *dof, dtrace_vstate_t *vstate, cred_t *cr, return (-1); } #else - /* XXX Defend against gcc 4.0 botch on x86 (not all paths out of inlined dtrace_dof_ecbdesc + /* Note: Defend against gcc 4.0 botch on x86 (not all paths out of inlined dtrace_dof_ecbdesc are checked for the NULL return value.) */ ep = dtrace_dof_ecbdesc(dof, sec, vstate, cr); if (ep == NULL) { @@ -11835,7 +13297,12 @@ dtrace_dof_slurp(dof_hdr_t *dof, dtrace_vstate_t *vstate, cred_t *cr, static int dtrace_dof_options(dof_hdr_t *dof, dtrace_state_t *state) { +#if !defined(__APPLE__) /* Quiet compiler warnings */ int i, rval; +#else + uint_t i; + int rval; +#endif /* __APPLE__ */ uint32_t entsize; size_t offs; dof_optdesc_t *desc; @@ -11872,7 +13339,11 @@ dtrace_dof_options(dof_hdr_t *dof, dtrace_state_t *state) return (EINVAL); } +#if !defined(__APPLE__) /* Quiet compiler warnings */ if (desc->dofo_value == DTRACEOPT_UNSET) { +#else + if (desc->dofo_value == (uint64_t)DTRACEOPT_UNSET) { +#endif /* __APPLE __ */ dtrace_dof_error(dof, "unset option"); return (EINVAL); } @@ -11891,7 +13362,7 @@ dtrace_dof_options(dof_hdr_t *dof, dtrace_state_t *state) /* * DTrace Consumer State Functions */ -#if defined(__APPLE__) +#if defined(__APPLE__) /* Quiet compiler warning. */ static #endif /* __APPLE__ */ int @@ -11901,7 +13372,11 @@ dtrace_dstate_init(dtrace_dstate_t *dstate, size_t size) void *base; uintptr_t limit; dtrace_dynvar_t *dvar, *next, *start; +#if !defined(__APPLE__) /* Quiet compiler warning */ int i; +#else + size_t i; +#endif /* __APPLE__ */ lck_mtx_assert(&dtrace_lock, LCK_MTX_ASSERT_OWNED); ASSERT(dstate->dtds_base == NULL && dstate->dtds_percpu == NULL); @@ -11954,7 +13429,7 @@ dtrace_dstate_init(dtrace_dstate_t *dstate, size_t size) maxper = (limit - (uintptr_t)start) / (int)NCPU; maxper = (maxper / dstate->dtds_chunksize) * dstate->dtds_chunksize; - for (i = 0; i < (int)NCPU; i++) { + for (i = 0; i < NCPU; i++) { dstate->dtds_percpu[i].dtdsc_free = dvar = start; /* @@ -11964,7 +13439,7 @@ dtrace_dstate_init(dtrace_dstate_t *dstate, size_t size) * whatever is left over. In either case, we set the limit to * be the limit of the dynamic variable space. */ - if (maxper == 0 || i == (int)NCPU - 1) { + if (maxper == 0 || i == NCPU - 1) { limit = (uintptr_t)base + size; start = NULL; } else { @@ -11992,7 +13467,7 @@ dtrace_dstate_init(dtrace_dstate_t *dstate, size_t size) return (0); } -#if defined(__APPLE__) +#if defined(__APPLE__) /* Quiet compiler warning. */ static #endif /* __APPLE__ */ void @@ -12070,11 +13545,13 @@ dtrace_state_deadman(dtrace_state_t *state) state->dts_alive = now; } -#if defined(__APPLE__) -static -#endif /* __APPLE__ */ +#if !defined(__APPLE__) dtrace_state_t * dtrace_state_create(dev_t *devp, cred_t *cr) +#else +static int +dtrace_state_create(dev_t *devp, cred_t *cr, dtrace_state_t **new_state) +#endif /* __APPLE__ */ { minor_t minor; major_t major; @@ -12089,13 +13566,21 @@ dtrace_state_create(dev_t *devp, cred_t *cr) #if !defined(__APPLE__) minor = (minor_t)(uintptr_t)vmem_alloc(dtrace_minor, 1, VM_BESTFIT | VM_SLEEP); + + if (ddi_soft_state_zalloc(dtrace_softstate, minor) != DDI_SUCCESS) { + vmem_free(dtrace_minor, (void *)(uintptr_t)minor, 1); + return (NULL); + } #else + /* Cause restart */ + *new_state = NULL; + /* * Darwin's DEVFS layer acquired the minor number for this "device" when it called * dtrace_devfs_clone_func(). At that time, dtrace_devfs_clone_func() proposed a minor number * (next unused according to vmem_alloc()) and then immediately put the number back in play * (by calling vmem_free()). Now that minor number is being used for an open, so committing it - * to use. The following vmem_alloc() must deliver that same minor number. + * to use. The following vmem_alloc() must deliver that same minor number. FIXME. */ minor = (minor_t)(uintptr_t)vmem_alloc(dtrace_minor, 1, @@ -12107,20 +13592,20 @@ dtrace_state_create(dev_t *devp, cred_t *cr) printf("dtrace_open: couldn't re-acquire vended minor number %d. Instead got %d\n", getminor(*devp), minor); vmem_free(dtrace_minor, (void *)(uintptr_t)minor, 1); - return NULL; + return (ERESTART); /* can't reacquire */ } } else { /* NULL==devp iff "Anonymous state" (see dtrace_anon_property), * so just vend the minor device number here de novo since no "open" has occurred. */ } -#endif /* __APPLE__ */ - if (ddi_soft_state_zalloc(dtrace_softstate, minor) != DDI_SUCCESS) { vmem_free(dtrace_minor, (void *)(uintptr_t)minor, 1); - return (NULL); + return (EAGAIN); /* temporary resource shortage */ } +#endif /* __APPLE__ */ + state = ddi_get_soft_state(dtrace_softstate, minor); state->dts_epid = DTRACE_EPIDNONE + 1; @@ -12313,7 +13798,12 @@ dtrace_state_create(dev_t *devp, cred_t *cr) } } +#if !defined(__APPLE__) return (state); +#else + *new_state = state; + return(0); /* Success */ +#endif /* __APPLE__ */ } static int @@ -12351,7 +13841,11 @@ dtrace_state_buffer(dtrace_state_t *state, dtrace_buffer_t *buf, int which) flags |= DTRACEBUF_INACTIVE; } +#if !defined(__APPLE__) /* Quiet compiler warning */ for (size = opt[which]; size >= sizeof (uint64_t); size >>= 1) { +#else + for (size = opt[which]; (size_t)size >= sizeof (uint64_t); size >>= 1) { +#endif /* __APPLE__ */ /* * The size must be 8-byte aligned. If the size is not 8-byte * aligned, drop it down by the difference. @@ -12545,10 +14039,17 @@ dtrace_state_go(dtrace_state_t *state, processorid_t *cpu) * If we have an aggregation buffer, we must also have * a buffer to use as scratch. */ +#if !defined(__APPLE__) /* Quiet compiler warning */ if (opt[DTRACEOPT_BUFSIZE] == DTRACEOPT_UNSET || opt[DTRACEOPT_BUFSIZE] < state->dts_needed) { opt[DTRACEOPT_BUFSIZE] = state->dts_needed; } +#else + if (opt[DTRACEOPT_BUFSIZE] == DTRACEOPT_UNSET || + (size_t)opt[DTRACEOPT_BUFSIZE] < state->dts_needed) { + opt[DTRACEOPT_BUFSIZE] = state->dts_needed; + } +#endif /* __APPLE__ */ } } @@ -12904,7 +14405,7 @@ dtrace_state_destroy(dtrace_state_t *state) kmem_free(state->dts_ecbs, state->dts_necbs * sizeof (dtrace_ecb_t *)); if (state->dts_aggregations != NULL) { -#ifdef DEBUG +#if DEBUG for (i = 0; i < state->dts_naggregations; i++) ASSERT(state->dts_aggregations[i] == NULL); #endif @@ -12991,10 +14492,15 @@ dtrace_anon_property(void) * If we haven't allocated an anonymous state, we'll do so now. */ if ((state = dtrace_anon.dta_state) == NULL) { +#if !defined(__APPLE__) state = dtrace_state_create(NULL, NULL); dtrace_anon.dta_state = state; - if (state == NULL) { +#else + rv = dtrace_state_create(NULL, NULL, &state); + dtrace_anon.dta_state = state; + if (rv != 0 || state == NULL) { +#endif /* __APPLE__ */ /* * This basically shouldn't happen: the only * failure mode from dtrace_state_create() is a @@ -13059,14 +14565,23 @@ static void dtrace_helper_trace(dtrace_helper_action_t *helper, dtrace_mstate_t *mstate, dtrace_vstate_t *vstate, int where) { +#if !defined(__APPLE__) /* Quiet compiler warning */ uint32_t size, next, nnext, i; +#else + uint32_t size, next, nnext; + int i; +#endif /* __APPLE__ */ dtrace_helptrace_t *ent; uint16_t flags = cpu_core[CPU->cpu_id].cpuc_dtrace_flags; if (!dtrace_helptrace_enabled) return; +#if !defined(__APPLE__) /* Quiet compiler warning */ ASSERT(vstate->dtvs_nlocals <= dtrace_helptrace_nlocals); +#else + ASSERT((uint32_t)vstate->dtvs_nlocals <= dtrace_helptrace_nlocals); +#endif /* __APPLE__ */ /* * What would a tracing framework be without its own tracing @@ -13238,7 +14753,11 @@ dtrace_helper_destroygen(proc_t* p, int gen) #endif dtrace_helpers_t *help = p->p_dtrace_helpers; dtrace_vstate_t *vstate; +#if !defined(__APPLE__) /* Quiet compiler warning */ int i; +#else + uint_t i; +#endif /* __APPLE__ */ lck_mtx_assert(&dtrace_lock, LCK_MTX_ASSERT_OWNED); @@ -13411,7 +14930,11 @@ dtrace_helper_action_add(proc_t* p, int which, dtrace_ecbdesc_t *ep) last->dtha_next = helper; } +#if !defined(__APPLE__) /* Quiet compiler warning */ if (vstate->dtvs_nlocals > dtrace_helptrace_nlocals) { +#else + if ((uint32_t)vstate->dtvs_nlocals > dtrace_helptrace_nlocals) { +#endif /* __APPLE__ */ dtrace_helptrace_nlocals = vstate->dtvs_nlocals; dtrace_helptrace_next = 0; } @@ -13469,7 +14992,11 @@ dtrace_helper_provider_register(proc_t *p, dtrace_helpers_t *help, * off to the meta provider. */ +#if !defined(__APPLE__) /* Quiet compiler warning */ int i; +#else + uint_t i; +#endif /* __APPLE__ */ lck_mtx_unlock(&dtrace_lock); for (i = 0; i < help->dthps_nprovs; i++) { @@ -13821,7 +15348,11 @@ dtrace_helper_slurp(proc_t* p, dof_hdr_t *dof, dof_helper_t *dhp) * Look for helper providers and validate their descriptions. */ if (dhp != NULL) { +#if !defined(__APPLE__) /* Quiet compiler warning */ for (i = 0; i < dof->dofh_secnum; i++) { +#else + for (i = 0; (uint32_t)i < dof->dofh_secnum; i++) { +#endif /* __APPLE__ */ dof_sec_t *sec = (dof_sec_t *)(uintptr_t)(daddr + dof->dofh_secoff + i * dof->dofh_secsize); @@ -13845,6 +15376,7 @@ dtrace_helper_slurp(proc_t* p, dof_hdr_t *dof, dof_helper_t *dhp) dtrace_ecbdesc_t *ep = enab->dten_desc[i]; dtrace_probedesc_t *desc = &ep->dted_probe; +#if !defined(__APPLE__) if (strcmp(desc->dtpd_provider, "dtrace") != 0) continue; @@ -13853,14 +15385,25 @@ dtrace_helper_slurp(proc_t* p, dof_hdr_t *dof, dof_helper_t *dhp) if (strcmp(desc->dtpd_func, "ustack") != 0) continue; +#else /* Employ size bounded string operation. */ + if (!LIT_STRNEQL(desc->dtpd_provider, "dtrace")) + continue; -#if !defined(__APPLE__) - if ((rv = dtrace_helper_action_add(DTRACE_HELPER_ACTION_USTACK, ep)) != 0) -#else - if ((rv = dtrace_helper_action_add(p, DTRACE_HELPER_ACTION_USTACK, ep)) != 0) -#endif - { - /* + if (!LIT_STRNEQL(desc->dtpd_mod, "helper")) + continue; + + if (!LIT_STRNEQL(desc->dtpd_func, "ustack")) + continue; +#endif /* __APPLE__ */ + +#if !defined(__APPLE__) + if ((rv = dtrace_helper_action_add(DTRACE_HELPER_ACTION_USTACK, + ep)) != 0) { +#else + if ((rv = dtrace_helper_action_add(p, DTRACE_HELPER_ACTION_USTACK, + ep)) != 0) { +#endif + /* * Adding this helper action failed -- we are now going * to rip out the entire generation and return failure. */ @@ -13968,7 +15511,7 @@ dtrace_helper_slurp(proc_t* p, dof_hdr_t *dof, dof_helper_t *dhp) * If the dofs data is claimed by this method, dofs_claimed will be set. * Callers should not free claimed dofs. */ -int +static int dtrace_lazy_dofs_add(proc_t *p, dof_ioctl_data_t* incoming_dofs, int *dofs_claimed) { ASSERT(p); @@ -14050,7 +15593,7 @@ dtrace_lazy_dofs_add(proc_t *p, dof_ioctl_data_t* incoming_dofs, int *dofs_claim for (i=0; idofiod_count-1; i++) { ASSERT(all_dofs->dofiod_helpers[i].dofhp_dof < all_dofs->dofiod_helpers[i+1].dofhp_dof); } -#endif DEBUG +#endif /* DEBUG */ unlock: lck_mtx_unlock(&p->p_dtrace_sprlock); @@ -14069,7 +15612,7 @@ dtrace_lazy_dofs_add(proc_t *p, dof_ioctl_data_t* incoming_dofs, int *dofs_claim * EINVAL: lazy dof is enabled, but the requested generation was not found. * EACCES: This removal needs to be handled non-lazily. */ -int +static int dtrace_lazy_dofs_remove(proc_t *p, int generation) { int rval = EINVAL; @@ -14339,15 +15882,18 @@ dtrace_helpers_create(proc_t *p) static void dtrace_helpers_destroy(void) { + dtrace_helpers_t *help; + dtrace_vstate_t *vstate; proc_t *p = curproc; + int i; #else static void dtrace_helpers_destroy(proc_t* p) { -#endif dtrace_helpers_t *help; dtrace_vstate_t *vstate; - int i; + uint_t i; +#endif lck_mtx_lock(&dtrace_lock); @@ -14440,7 +15986,12 @@ dtrace_helpers_duplicate(proc_t *from, proc_t *to) dtrace_helper_action_t *helper, *new, *last; dtrace_difo_t *dp; dtrace_vstate_t *vstate; +#if !defined(__APPLE__) /* Quiet compiler warning */ int i, j, sz, hasprovs = 0; +#else + uint_t i; + int j, sz, hasprovs = 0; +#endif /* __APPLE__ */ lck_mtx_lock(&dtrace_lock); ASSERT(from->p_dtrace_helpers != NULL); @@ -14474,6 +16025,7 @@ dtrace_helpers_duplicate(proc_t *from, proc_t *to) sz = sizeof (dtrace_difo_t *) * new->dtha_nactions; new->dtha_actions = kmem_alloc(sz, KM_SLEEP); +#if !defined(__APPLE__) /* Quiet compiler warning */ for (j = 0; j < new->dtha_nactions; j++) { dtrace_difo_t *dp = helper->dtha_actions[j]; @@ -14481,6 +16033,15 @@ dtrace_helpers_duplicate(proc_t *from, proc_t *to) dp = dtrace_difo_duplicate(dp, vstate); new->dtha_actions[j] = dp; } +#else + for (j = 0; j < new->dtha_nactions; j++) { + dtrace_difo_t *dpj = helper->dtha_actions[j]; + + ASSERT(dpj != NULL); + dpj = dtrace_difo_duplicate(dpj, vstate); + new->dtha_actions[j] = dpj; + } +#endif /* __APPLE__ */ if (last != NULL) { last->dtha_next = new; @@ -14526,7 +16087,11 @@ dtrace_module_loaded(struct modctl *ctl) lck_mtx_lock(&dtrace_provider_lock); lck_mtx_lock(&mod_lock); - // ASSERT(ctl->mod_busy); +#if !defined(__APPLE__) + ASSERT(ctl->mod_busy); +#else + /* FIXME: awaits kmod awareness PR_4648477. */ +#endif /* __APPLE__ */ /* * We're going to call each providers per-module provide operation @@ -14661,7 +16226,7 @@ dtrace_module_unloaded(struct modctl *ctl) kmem_free(probe, sizeof (dtrace_probe_t)); #else zfree(dtrace_probe_t_zone, probe); -#endif +#endif /* __APPLE__ */ } lck_mtx_unlock(&dtrace_lock); @@ -14790,6 +16355,7 @@ dtrace_toxrange_add(uintptr_t base, uintptr_t limit) static int dtrace_attach(dev_info_t *devi, ddi_attach_cmd_t cmd) { +#pragma unused(cmd) /* __APPLE__ */ dtrace_provider_id_t id; dtrace_state_t *state = NULL; dtrace_enabling_t *enab; @@ -14820,6 +16386,8 @@ dtrace_attach(dev_info_t *devi, ddi_attach_cmd_t cmd) lck_mtx_unlock(&dtrace_lock); return (DDI_FAILURE); } +#else + /* Darwin uses BSD cloning device driver to automagically obtain minor device number. */ #endif /* __APPLE__ */ ddi_report_dev(devi); @@ -14834,8 +16402,6 @@ dtrace_attach(dev_info_t *devi, ddi_attach_cmd_t cmd) dtrace_cpustart_fini = dtrace_resume; dtrace_debugger_init = dtrace_suspend; dtrace_debugger_fini = dtrace_resume; - dtrace_kreloc_init = dtrace_suspend; - dtrace_kreloc_fini = dtrace_resume; register_cpu_setup_func((cpu_setup_func_t *)dtrace_cpu_setup, NULL); @@ -14854,7 +16420,6 @@ dtrace_attach(dev_info_t *devi, ddi_attach_cmd_t cmd) NULL, NULL, NULL, NULL, NULL, 0); lck_mtx_assert(&cpu_lock, LCK_MTX_ASSERT_OWNED); - dtrace_bymod = dtrace_hash_create(offsetof(dtrace_probe_t, dtpr_mod), offsetof(dtrace_probe_t, dtpr_nextmod), offsetof(dtrace_probe_t, dtpr_prevmod)); @@ -14976,8 +16541,6 @@ dtrace_attach(dev_info_t *devi, ddi_attach_cmd_t cmd) return (DDI_SUCCESS); } -extern void fasttrap_init(void); - /*ARGSUSED*/ static int dtrace_open(dev_t *devp, int flag, int otyp, cred_t *cred_p) @@ -14987,6 +16550,9 @@ dtrace_open(dev_t *devp, int flag, int otyp, cred_t *cred_p) uint32_t priv; uid_t uid; zoneid_t zoneid; +#if defined (__APPLE__) + int rv; +#endif /* __APPLE__ */ #if !defined(__APPLE__) if (getminor(*devp) == DTRACEMNRN_HELPER) @@ -14996,7 +16562,8 @@ dtrace_open(dev_t *devp, int flag, int otyp, cred_t *cred_p) * If this wasn't an open with the "helper" minor, then it must be * the "dtrace" minor. */ - ASSERT(getminor(*devp) == DTRACEMNRN_DTRACE); + if (getminor(*devp) != DTRACEMNRN_DTRACE) + return (ENXIO); #else /* Darwin puts Helper on its own major device. */ #endif /* __APPLE__ */ @@ -15040,6 +16607,7 @@ dtrace_open(dev_t *devp, int flag, int otyp, cred_t *cred_p) return (EBUSY); } +#if !defined(__APPLE__) state = dtrace_state_create(devp, cred_p); lck_mtx_unlock(&cpu_lock); @@ -15049,10 +16617,22 @@ dtrace_open(dev_t *devp, int flag, int otyp, cred_t *cred_p) lck_mtx_unlock(&dtrace_lock); return (EAGAIN); } + + lck_mtx_unlock(&dtrace_lock); +#else + rv = dtrace_state_create(devp, cred_p, &state); + lck_mtx_unlock(&cpu_lock); + if (rv != 0 || state == NULL) { + if (--dtrace_opens == 0) + (void) kdi_dtrace_set(KDI_DTSET_DTRACE_DEACTIVATE); + lck_mtx_unlock(&dtrace_lock); + /* propagate EAGAIN or ERESTART */ + return (rv); + } + lck_mtx_unlock(&dtrace_lock); -#if defined(__APPLE__) lck_rw_lock_exclusive(&dtrace_dof_mode_lock); /* @@ -15076,7 +16656,7 @@ dtrace_open(dev_t *devp, int flag, int otyp, cred_t *cred_p) } lck_rw_unlock_exclusive(&dtrace_dof_mode_lock); -#endif +#endif /* __APPLE__ */ return (0); } @@ -15085,7 +16665,7 @@ dtrace_open(dev_t *devp, int flag, int otyp, cred_t *cred_p) static int dtrace_close(dev_t dev, int flag, int otyp, cred_t *cred_p) { -#pragma unused(flag,otyp,cred_p) +#pragma unused(flag, otyp, cred_p) /* __APPLE__ */ minor_t minor = getminor(dev); dtrace_state_t *state; @@ -15136,187 +16716,78 @@ dtrace_close(dev_t dev, int flag, int otyp, cred_t *cred_p) lck_mtx_unlock(&dtrace_lock); lck_rw_unlock_exclusive(&dtrace_dof_mode_lock); -#endif +#endif /* __APPLE__ */ return (0); } -#if defined(__APPLE__) -/* - * Introduce cast to quiet warnings. - * XXX: This hides a lot of brokenness. - */ -#define copyin(src, dst, len) copyin( (user_addr_t)(src), (dst), (len) ) -#define copyout(src, dst, len) copyout( (src), (user_addr_t)(dst), (len) ) -#endif /* __APPLE__ */ - -#if defined(__APPLE__) +#if !defined(__APPLE__) /*ARGSUSED*/ static int -dtrace_ioctl_helper(int cmd, caddr_t arg, int *rv) +dtrace_ioctl_helper(int cmd, intptr_t arg, int *rv) { -#pragma unused(rv) - /* - * Safe to check this outside the dof mode lock - */ - if (dtrace_dof_mode == DTRACE_DOF_MODE_NEVER) - return KERN_SUCCESS; + int rval; + dof_helper_t help, *dhp = NULL; switch (cmd) { - case DTRACEHIOC_ADDDOF: { - dof_helper_t *dhp = NULL; - size_t dof_ioctl_data_size; - dof_ioctl_data_t* multi_dof; - unsigned int i; - int rval = 0; - user_addr_t user_address = *(user_addr_t*)arg; - uint64_t dof_count; - int multi_dof_claimed = 0; - proc_t* p = current_proc(); + case DTRACEHIOC_ADDDOF: + if (copyin((void *)arg, &help, sizeof (help)) != 0) { + dtrace_dof_error(NULL, "failed to copyin DOF helper"); + return (EFAULT); + } - /* - * Read the number of DOF sections being passed in. - */ - if (copyin(user_address + offsetof(dof_ioctl_data_t, dofiod_count), - &dof_count, - sizeof(dof_count))) { - dtrace_dof_error(NULL, "failed to copyin dofiod_count"); - return (EFAULT); - } - - /* - * Range check the count. - */ - if (dof_count == 0 || dof_count > 1024) { - dtrace_dof_error(NULL, "dofiod_count is not valid"); - return (EINVAL); - } - - /* - * Allocate a correctly sized structure and copyin the data. - */ - dof_ioctl_data_size = DOF_IOCTL_DATA_T_SIZE(dof_count); - if ((multi_dof = kmem_alloc(dof_ioctl_data_size, KM_SLEEP)) == NULL) - return (ENOMEM); - - /* NOTE! We can no longer exit this method via return */ - if (copyin(user_address, multi_dof, dof_ioctl_data_size) != 0) { - dtrace_dof_error(NULL, "failed copyin of dof_ioctl_data_t"); - rval = EFAULT; - goto cleanup; - } - - /* - * Check that the count didn't change between the first copyin and the second. - */ - if (multi_dof->dofiod_count != dof_count) { - rval = EINVAL; - goto cleanup; - } + dhp = &help; + arg = (intptr_t)help.dofhp_dof; + /*FALLTHROUGH*/ - /* - * Try to process lazily first. - */ - rval = dtrace_lazy_dofs_add(p, multi_dof, &multi_dof_claimed); + case DTRACEHIOC_ADD: { + dof_hdr_t *dof = dtrace_dof_copyin(arg, &rval); - /* - * If rval is EACCES, we must be non-lazy. - */ - if (rval == EACCES) { - rval = 0; - /* - * Process each dof_helper_t - */ - i = 0; - do { - dhp = &multi_dof->dofiod_helpers[i]; - - dof_hdr_t *dof = dtrace_dof_copyin(dhp->dofhp_dof, &rval); - - if (dof != NULL) { - lck_mtx_lock(&dtrace_lock); - - /* - * dtrace_helper_slurp() takes responsibility for the dof -- - * it may free it now or it may save it and free it later. - */ - if ((dhp->dofhp_dof = (uint64_t)dtrace_helper_slurp(p, dof, dhp)) == -1ULL) { - rval = EINVAL; - } - - lck_mtx_unlock(&dtrace_lock); - } - } while (++i < multi_dof->dofiod_count && rval == 0); - } + if (dof == NULL) + return (rval); - /* - * We need to copyout the multi_dof struct, because it contains - * the generation (unique id) values needed to call DTRACEHIOC_REMOVE - * - * This could certainly be better optimized. - */ - if (copyout(multi_dof, user_address, dof_ioctl_data_size) != 0) { - dtrace_dof_error(NULL, "failed copyout of dof_ioctl_data_t"); - /* Don't overwrite pre-existing error code */ - if (rval == 0) rval = EFAULT; - } - - cleanup: - /* - * If we had to allocate struct memory, free it. - */ - if (multi_dof != NULL && !multi_dof_claimed) { - kmem_free(multi_dof, dof_ioctl_data_size); - } - - return rval; + mutex_enter(&dtrace_lock); + + /* + * dtrace_helper_slurp() takes responsibility for the dof -- + * it may free it now or it may save it and free it later. + */ + if ((rval = dtrace_helper_slurp(dof, dhp)) != -1) { + *rv = rval; + rval = 0; + } else { + rval = EINVAL; } - case DTRACEHIOC_REMOVE: { - int generation = *(int*)arg; - proc_t* p = current_proc(); + mutex_exit(&dtrace_lock); + return (rval); + } - /* - * Try lazy first. - */ - int rval = dtrace_lazy_dofs_remove(p, generation); - - /* - * EACCES means non-lazy - */ - if (rval == EACCES) { - lck_mtx_lock(&dtrace_lock); - rval = dtrace_helper_destroygen(p, generation); - lck_mtx_unlock(&dtrace_lock); - } + case DTRACEHIOC_REMOVE: { + mutex_enter(&dtrace_lock); + rval = dtrace_helper_destroygen(arg); + mutex_exit(&dtrace_lock); - return (rval); - } + return (rval); + } - default: - break; + default: + break; } - return ENOTTY; + return (ENOTTY); } -#endif /* __APPLE__ */ /*ARGSUSED*/ static int -dtrace_ioctl(dev_t dev, int cmd, intptr_t arg, int md, cred_t *cr, int *rv) +dtrace_ioctl(dev_t dev, u_long cmd, intptr_t arg, int md, cred_t *cr, int *rv) { -#pragma unused(md) - minor_t minor = getminor(dev); dtrace_state_t *state; int rval; -#if !defined(__APPLE__) if (minor == DTRACEMNRN_HELPER) return (dtrace_ioctl_helper(cmd, arg, rv)); -#else - /* Darwin puts Helper on its own major device. */ -#endif /* __APPLE__ */ state = ddi_get_soft_state(dtrace_softstate, minor); @@ -15544,13 +17015,9 @@ dtrace_ioctl(dev_t dev, int cmd, intptr_t arg, int md, cred_t *cr, int *rv) * cue to reevaluate our enablings. */ if (arg == NULL) { - lck_mtx_lock(&cpu_lock); - lck_mtx_lock(&dtrace_lock); - err = dtrace_enabling_matchstate(state, rv); - lck_mtx_unlock(&dtrace_lock); - lck_mtx_unlock(&cpu_lock); + dtrace_enabling_matchall(); - return (err); + return (0); } if ((dof = dtrace_dof_copyin(arg, &rval)) == NULL) @@ -15811,7 +17278,7 @@ dtrace_ioctl(dev_t dev, int cmd, intptr_t arg, int md, cred_t *cr, int *rv) if (copyin((void *)arg, &desc, sizeof (desc)) != 0) return (EFAULT); - if (desc.dtbd_cpu < 0 || desc.dtbd_cpu >= (int)NCPU) + if (desc.dtbd_cpu < 0 || desc.dtbd_cpu >= NCPU) return (EINVAL); lck_mtx_lock(&dtrace_lock); @@ -15977,7 +17444,7 @@ dtrace_ioctl(dev_t dev, int cmd, intptr_t arg, int md, cred_t *cr, int *rv) nerrs = state->dts_errors; dstate = &state->dts_vstate.dtvs_dynvars; - for (i = 0; i < (int)NCPU; i++) { + for (i = 0; i < NCPU; i++) { dtrace_dstate_percpu_t *dcpu = &dstate->dtds_percpu[i]; stat.dtst_dyndrops += dcpu->dtdsc_drops; @@ -16067,33 +17534,938 @@ dtrace_ioctl(dev_t dev, int cmd, intptr_t arg, int md, cred_t *cr, int *rv) return (ENOTTY); } - -#if defined(__APPLE__) -#undef copyin -#undef copyout -#endif /* __APPLE__ */ - -#if !defined(__APPLE__) +#else /*ARGSUSED*/ static int -dtrace_detach(dev_info_t *dip, ddi_detach_cmd_t cmd) +dtrace_ioctl_helper(u_long cmd, caddr_t arg, int *rv) { - dtrace_state_t *state; +#pragma unused(rv) + /* + * Safe to check this outside the dof mode lock + */ + if (dtrace_dof_mode == DTRACE_DOF_MODE_NEVER) + return KERN_SUCCESS; switch (cmd) { - case DDI_DETACH: - break; - - case DDI_SUSPEND: - return (DDI_SUCCESS); - - default: - return (DDI_FAILURE); - } + case DTRACEHIOC_ADDDOF: { + dof_helper_t *dhp = NULL; + size_t dof_ioctl_data_size; + dof_ioctl_data_t* multi_dof; + unsigned int i; + int rval = 0; + user_addr_t user_address = *(user_addr_t*)arg; + uint64_t dof_count; + int multi_dof_claimed = 0; + proc_t* p = current_proc(); - lck_mtx_lock(&cpu_lock); - lck_mtx_lock(&dtrace_provider_lock); - lck_mtx_lock(&dtrace_lock); + /* + * Read the number of DOF sections being passed in. + */ + if (copyin(user_address + offsetof(dof_ioctl_data_t, dofiod_count), + &dof_count, + sizeof(dof_count))) { + dtrace_dof_error(NULL, "failed to copyin dofiod_count"); + return (EFAULT); + } + + /* + * Range check the count. + */ + if (dof_count == 0 || dof_count > 1024) { + dtrace_dof_error(NULL, "dofiod_count is not valid"); + return (EINVAL); + } + + /* + * Allocate a correctly sized structure and copyin the data. + */ + dof_ioctl_data_size = DOF_IOCTL_DATA_T_SIZE(dof_count); + if ((multi_dof = kmem_alloc(dof_ioctl_data_size, KM_SLEEP)) == NULL) + return (ENOMEM); + + /* NOTE! We can no longer exit this method via return */ + if (copyin(user_address, multi_dof, dof_ioctl_data_size) != 0) { + dtrace_dof_error(NULL, "failed copyin of dof_ioctl_data_t"); + rval = EFAULT; + goto cleanup; + } + + /* + * Check that the count didn't change between the first copyin and the second. + */ + if (multi_dof->dofiod_count != dof_count) { + rval = EINVAL; + goto cleanup; + } + + /* + * Try to process lazily first. + */ + rval = dtrace_lazy_dofs_add(p, multi_dof, &multi_dof_claimed); + + /* + * If rval is EACCES, we must be non-lazy. + */ + if (rval == EACCES) { + rval = 0; + /* + * Process each dof_helper_t + */ + i = 0; + do { + dhp = &multi_dof->dofiod_helpers[i]; + + dof_hdr_t *dof = dtrace_dof_copyin(dhp->dofhp_dof, &rval); + + if (dof != NULL) { + lck_mtx_lock(&dtrace_lock); + + /* + * dtrace_helper_slurp() takes responsibility for the dof -- + * it may free it now or it may save it and free it later. + */ + if ((dhp->dofhp_dof = (uint64_t)dtrace_helper_slurp(p, dof, dhp)) == -1ULL) { + rval = EINVAL; + } + + lck_mtx_unlock(&dtrace_lock); + } + } while (++i < multi_dof->dofiod_count && rval == 0); + } + + /* + * We need to copyout the multi_dof struct, because it contains + * the generation (unique id) values needed to call DTRACEHIOC_REMOVE + * + * This could certainly be better optimized. + */ + if (copyout(multi_dof, user_address, dof_ioctl_data_size) != 0) { + dtrace_dof_error(NULL, "failed copyout of dof_ioctl_data_t"); + /* Don't overwrite pre-existing error code */ + if (rval == 0) rval = EFAULT; + } + + cleanup: + /* + * If we had to allocate struct memory, free it. + */ + if (multi_dof != NULL && !multi_dof_claimed) { + kmem_free(multi_dof, dof_ioctl_data_size); + } + + return rval; + } + + case DTRACEHIOC_REMOVE: { + int generation = *(int*)arg; + proc_t* p = current_proc(); + + /* + * Try lazy first. + */ + int rval = dtrace_lazy_dofs_remove(p, generation); + + /* + * EACCES means non-lazy + */ + if (rval == EACCES) { + lck_mtx_lock(&dtrace_lock); + rval = dtrace_helper_destroygen(p, generation); + lck_mtx_unlock(&dtrace_lock); + } + + return (rval); + } + + default: + break; + } + + return ENOTTY; +} + +/*ARGSUSED*/ +static int +dtrace_ioctl(dev_t dev, u_long cmd, user_addr_t arg, int md, cred_t *cr, int *rv) +{ +#pragma unused(md) + minor_t minor = getminor(dev); + dtrace_state_t *state; + int rval; + + /* Darwin puts Helper on its own major device. */ + + state = ddi_get_soft_state(dtrace_softstate, minor); + + if (state->dts_anon) { + ASSERT(dtrace_anon.dta_state == NULL); + state = state->dts_anon; + } + + switch (cmd) { + case DTRACEIOC_PROVIDER: { + dtrace_providerdesc_t pvd; + dtrace_provider_t *pvp; + + if (copyin(arg, &pvd, sizeof (pvd)) != 0) + return (EFAULT); + + pvd.dtvd_name[DTRACE_PROVNAMELEN - 1] = '\0'; + lck_mtx_lock(&dtrace_provider_lock); + + for (pvp = dtrace_provider; pvp != NULL; pvp = pvp->dtpv_next) { + if (strncmp(pvp->dtpv_name, pvd.dtvd_name, DTRACE_PROVNAMELEN) == 0) + break; + } + + lck_mtx_unlock(&dtrace_provider_lock); + + if (pvp == NULL) + return (ESRCH); + + bcopy(&pvp->dtpv_priv, &pvd.dtvd_priv, sizeof (dtrace_ppriv_t)); + bcopy(&pvp->dtpv_attr, &pvd.dtvd_attr, sizeof (dtrace_pattr_t)); + if (copyout(&pvd, arg, sizeof (pvd)) != 0) + return (EFAULT); + + return (0); + } + + case DTRACEIOC_EPROBE: { + dtrace_eprobedesc_t epdesc; + dtrace_ecb_t *ecb; + dtrace_action_t *act; + void *buf; + size_t size; + uintptr_t dest; + int nrecs; + + if (copyin(arg, &epdesc, sizeof (epdesc)) != 0) + return (EFAULT); + + lck_mtx_lock(&dtrace_lock); + + if ((ecb = dtrace_epid2ecb(state, epdesc.dtepd_epid)) == NULL) { + lck_mtx_unlock(&dtrace_lock); + return (EINVAL); + } + + if (ecb->dte_probe == NULL) { + lck_mtx_unlock(&dtrace_lock); + return (EINVAL); + } + + epdesc.dtepd_probeid = ecb->dte_probe->dtpr_id; + epdesc.dtepd_uarg = ecb->dte_uarg; + epdesc.dtepd_size = ecb->dte_size; + + nrecs = epdesc.dtepd_nrecs; + epdesc.dtepd_nrecs = 0; + for (act = ecb->dte_action; act != NULL; act = act->dta_next) { + if (DTRACEACT_ISAGG(act->dta_kind) || act->dta_intuple) + continue; + + epdesc.dtepd_nrecs++; + } + + /* + * Now that we have the size, we need to allocate a temporary + * buffer in which to store the complete description. We need + * the temporary buffer to be able to drop dtrace_lock() + * across the copyout(), below. + */ + size = sizeof (dtrace_eprobedesc_t) + + (epdesc.dtepd_nrecs * sizeof (dtrace_recdesc_t)); + + buf = kmem_alloc(size, KM_SLEEP); + dest = (uintptr_t)buf; + + bcopy(&epdesc, (void *)dest, sizeof (epdesc)); + dest += offsetof(dtrace_eprobedesc_t, dtepd_rec[0]); + + for (act = ecb->dte_action; act != NULL; act = act->dta_next) { + if (DTRACEACT_ISAGG(act->dta_kind) || act->dta_intuple) + continue; + + if (nrecs-- == 0) + break; + + bcopy(&act->dta_rec, (void *)dest, + sizeof (dtrace_recdesc_t)); + dest += sizeof (dtrace_recdesc_t); + } + + lck_mtx_unlock(&dtrace_lock); + + if (copyout(buf, arg, dest - (uintptr_t)buf) != 0) { + kmem_free(buf, size); + return (EFAULT); + } + + kmem_free(buf, size); + return (0); + } + + case DTRACEIOC_AGGDESC: { + dtrace_aggdesc_t aggdesc; + dtrace_action_t *act; + dtrace_aggregation_t *agg; + int nrecs; + uint32_t offs; + dtrace_recdesc_t *lrec; + void *buf; + size_t size; + uintptr_t dest; + + if (copyin(arg, &aggdesc, sizeof (aggdesc)) != 0) + return (EFAULT); + + lck_mtx_lock(&dtrace_lock); + + if ((agg = dtrace_aggid2agg(state, aggdesc.dtagd_id)) == NULL) { + lck_mtx_unlock(&dtrace_lock); + return (EINVAL); + } + + aggdesc.dtagd_epid = agg->dtag_ecb->dte_epid; + + nrecs = aggdesc.dtagd_nrecs; + aggdesc.dtagd_nrecs = 0; + + offs = agg->dtag_base; + lrec = &agg->dtag_action.dta_rec; + aggdesc.dtagd_size = lrec->dtrd_offset + lrec->dtrd_size - offs; + + for (act = agg->dtag_first; ; act = act->dta_next) { + ASSERT(act->dta_intuple || + DTRACEACT_ISAGG(act->dta_kind)); + + /* + * If this action has a record size of zero, it + * denotes an argument to the aggregating action. + * Because the presence of this record doesn't (or + * shouldn't) affect the way the data is interpreted, + * we don't copy it out to save user-level the + * confusion of dealing with a zero-length record. + */ + if (act->dta_rec.dtrd_size == 0) { + ASSERT(agg->dtag_hasarg); + continue; + } + + aggdesc.dtagd_nrecs++; + + if (act == &agg->dtag_action) + break; + } + + /* + * Now that we have the size, we need to allocate a temporary + * buffer in which to store the complete description. We need + * the temporary buffer to be able to drop dtrace_lock() + * across the copyout(), below. + */ + size = sizeof (dtrace_aggdesc_t) + + (aggdesc.dtagd_nrecs * sizeof (dtrace_recdesc_t)); + + buf = kmem_alloc(size, KM_SLEEP); + dest = (uintptr_t)buf; + + bcopy(&aggdesc, (void *)dest, sizeof (aggdesc)); + dest += offsetof(dtrace_aggdesc_t, dtagd_rec[0]); + + for (act = agg->dtag_first; ; act = act->dta_next) { + dtrace_recdesc_t rec = act->dta_rec; + + /* + * See the comment in the above loop for why we pass + * over zero-length records. + */ + if (rec.dtrd_size == 0) { + ASSERT(agg->dtag_hasarg); + continue; + } + + if (nrecs-- == 0) + break; + + rec.dtrd_offset -= offs; + bcopy(&rec, (void *)dest, sizeof (rec)); + dest += sizeof (dtrace_recdesc_t); + + if (act == &agg->dtag_action) + break; + } + + lck_mtx_unlock(&dtrace_lock); + + if (copyout(buf, arg, dest - (uintptr_t)buf) != 0) { + kmem_free(buf, size); + return (EFAULT); + } + + kmem_free(buf, size); + return (0); + } + + case DTRACEIOC_ENABLE: { + dof_hdr_t *dof; + dtrace_enabling_t *enab = NULL; + dtrace_vstate_t *vstate; + int err = 0; + + *rv = 0; + + /* + * If a NULL argument has been passed, we take this as our + * cue to reevaluate our enablings. + */ + if (arg == NULL) { + dtrace_enabling_matchall(); + + return (0); + } + + if ((dof = dtrace_dof_copyin(arg, &rval)) == NULL) + return (rval); + + lck_mtx_lock(&cpu_lock); + lck_mtx_lock(&dtrace_lock); + vstate = &state->dts_vstate; + + if (state->dts_activity != DTRACE_ACTIVITY_INACTIVE) { + lck_mtx_unlock(&dtrace_lock); + lck_mtx_unlock(&cpu_lock); + dtrace_dof_destroy(dof); + return (EBUSY); + } + + if (dtrace_dof_slurp(dof, vstate, cr, &enab, 0, B_TRUE) != 0) { + lck_mtx_unlock(&dtrace_lock); + lck_mtx_unlock(&cpu_lock); + dtrace_dof_destroy(dof); + return (EINVAL); + } + + if ((rval = dtrace_dof_options(dof, state)) != 0) { + dtrace_enabling_destroy(enab); + lck_mtx_unlock(&dtrace_lock); + lck_mtx_unlock(&cpu_lock); + dtrace_dof_destroy(dof); + return (rval); + } + + if ((err = dtrace_enabling_match(enab, rv)) == 0) { + err = dtrace_enabling_retain(enab); + } else { + dtrace_enabling_destroy(enab); + } + + lck_mtx_unlock(&cpu_lock); + lck_mtx_unlock(&dtrace_lock); + dtrace_dof_destroy(dof); + + return (err); + } + + case DTRACEIOC_REPLICATE: { + dtrace_repldesc_t desc; + dtrace_probedesc_t *match = &desc.dtrpd_match; + dtrace_probedesc_t *create = &desc.dtrpd_create; + int err; + + if (copyin(arg, &desc, sizeof (desc)) != 0) + return (EFAULT); + + match->dtpd_provider[DTRACE_PROVNAMELEN - 1] = '\0'; + match->dtpd_mod[DTRACE_MODNAMELEN - 1] = '\0'; + match->dtpd_func[DTRACE_FUNCNAMELEN - 1] = '\0'; + match->dtpd_name[DTRACE_NAMELEN - 1] = '\0'; + + create->dtpd_provider[DTRACE_PROVNAMELEN - 1] = '\0'; + create->dtpd_mod[DTRACE_MODNAMELEN - 1] = '\0'; + create->dtpd_func[DTRACE_FUNCNAMELEN - 1] = '\0'; + create->dtpd_name[DTRACE_NAMELEN - 1] = '\0'; + + lck_mtx_lock(&dtrace_lock); + err = dtrace_enabling_replicate(state, match, create); + lck_mtx_unlock(&dtrace_lock); + + return (err); + } + + case DTRACEIOC_PROBEMATCH: + case DTRACEIOC_PROBES: { + dtrace_probe_t *probe = NULL; + dtrace_probedesc_t desc; + dtrace_probekey_t pkey; + dtrace_id_t i; + int m = 0; + uint32_t priv; + uid_t uid; + zoneid_t zoneid; + + if (copyin(arg, &desc, sizeof (desc)) != 0) + return (EFAULT); + + desc.dtpd_provider[DTRACE_PROVNAMELEN - 1] = '\0'; + desc.dtpd_mod[DTRACE_MODNAMELEN - 1] = '\0'; + desc.dtpd_func[DTRACE_FUNCNAMELEN - 1] = '\0'; + desc.dtpd_name[DTRACE_NAMELEN - 1] = '\0'; + + /* + * Before we attempt to match this probe, we want to give + * all providers the opportunity to provide it. + */ + if (desc.dtpd_id == DTRACE_IDNONE) { + lck_mtx_lock(&dtrace_provider_lock); + dtrace_probe_provide(&desc, NULL); + lck_mtx_unlock(&dtrace_provider_lock); + desc.dtpd_id++; + } + + if (cmd == DTRACEIOC_PROBEMATCH) { + dtrace_probekey(&desc, &pkey); + pkey.dtpk_id = DTRACE_IDNONE; + } + + dtrace_cred2priv(cr, &priv, &uid, &zoneid); + + lck_mtx_lock(&dtrace_lock); + + if (cmd == DTRACEIOC_PROBEMATCH) { + /* Quiet compiler warning */ + for (i = desc.dtpd_id; i <= (dtrace_id_t)dtrace_nprobes; i++) { + if ((probe = dtrace_probes[i - 1]) != NULL && + (m = dtrace_match_probe(probe, &pkey, + priv, uid, zoneid)) != 0) + break; + } + + if (m < 0) { + lck_mtx_unlock(&dtrace_lock); + return (EINVAL); + } + + } else { + /* Quiet compiler warning */ + for (i = desc.dtpd_id; i <= (dtrace_id_t)dtrace_nprobes; i++) { + if ((probe = dtrace_probes[i - 1]) != NULL && + dtrace_match_priv(probe, priv, uid, zoneid)) + break; + } + } + + if (probe == NULL) { + lck_mtx_unlock(&dtrace_lock); + return (ESRCH); + } + + dtrace_probe_description(probe, &desc); + lck_mtx_unlock(&dtrace_lock); + + if (copyout(&desc, arg, sizeof (desc)) != 0) + return (EFAULT); + + return (0); + } + + case DTRACEIOC_PROBEARG: { + dtrace_argdesc_t desc; + dtrace_probe_t *probe; + dtrace_provider_t *prov; + + if (copyin(arg, &desc, sizeof (desc)) != 0) + return (EFAULT); + + if (desc.dtargd_id == DTRACE_IDNONE) + return (EINVAL); + + if (desc.dtargd_ndx == DTRACE_ARGNONE) + return (EINVAL); + + lck_mtx_lock(&dtrace_provider_lock); + lck_mtx_lock(&mod_lock); + lck_mtx_lock(&dtrace_lock); + + /* Quiet compiler warning */ + if (desc.dtargd_id > (dtrace_id_t)dtrace_nprobes) { + lck_mtx_unlock(&dtrace_lock); + lck_mtx_unlock(&mod_lock); + lck_mtx_unlock(&dtrace_provider_lock); + return (EINVAL); + } + + if ((probe = dtrace_probes[desc.dtargd_id - 1]) == NULL) { + lck_mtx_unlock(&dtrace_lock); + lck_mtx_unlock(&mod_lock); + lck_mtx_unlock(&dtrace_provider_lock); + return (EINVAL); + } + + lck_mtx_unlock(&dtrace_lock); + + prov = probe->dtpr_provider; + + if (prov->dtpv_pops.dtps_getargdesc == NULL) { + /* + * There isn't any typed information for this probe. + * Set the argument number to DTRACE_ARGNONE. + */ + desc.dtargd_ndx = DTRACE_ARGNONE; + } else { + desc.dtargd_native[0] = '\0'; + desc.dtargd_xlate[0] = '\0'; + desc.dtargd_mapping = desc.dtargd_ndx; + + prov->dtpv_pops.dtps_getargdesc(prov->dtpv_arg, + probe->dtpr_id, probe->dtpr_arg, &desc); + } + + lck_mtx_unlock(&mod_lock); + lck_mtx_unlock(&dtrace_provider_lock); + + if (copyout(&desc, arg, sizeof (desc)) != 0) + return (EFAULT); + + return (0); + } + + case DTRACEIOC_GO: { + processorid_t cpuid; + rval = dtrace_state_go(state, &cpuid); + + if (rval != 0) + return (rval); + + if (copyout(&cpuid, arg, sizeof (cpuid)) != 0) + return (EFAULT); + + return (0); + } + + case DTRACEIOC_STOP: { + processorid_t cpuid; + + lck_mtx_lock(&dtrace_lock); + rval = dtrace_state_stop(state, &cpuid); + lck_mtx_unlock(&dtrace_lock); + + if (rval != 0) + return (rval); + + if (copyout(&cpuid, arg, sizeof (cpuid)) != 0) + return (EFAULT); + + return (0); + } + + case DTRACEIOC_DOFGET: { + dof_hdr_t hdr, *dof; + uint64_t len; + + if (copyin(arg, &hdr, sizeof (hdr)) != 0) + return (EFAULT); + + lck_mtx_lock(&dtrace_lock); + dof = dtrace_dof_create(state); + lck_mtx_unlock(&dtrace_lock); + + len = MIN(hdr.dofh_loadsz, dof->dofh_loadsz); + rval = copyout(dof, arg, len); + dtrace_dof_destroy(dof); + + return (rval == 0 ? 0 : EFAULT); + } + + case DTRACEIOC_AGGSNAP: + case DTRACEIOC_BUFSNAP: { + dtrace_bufdesc_t desc; + caddr_t cached; + dtrace_buffer_t *buf; + + if (copyin(arg, &desc, sizeof (desc)) != 0) + return (EFAULT); + + if ((int)desc.dtbd_cpu < 0 || desc.dtbd_cpu >= NCPU) + return (EINVAL); + + lck_mtx_lock(&dtrace_lock); + + if (cmd == DTRACEIOC_BUFSNAP) { + buf = &state->dts_buffer[desc.dtbd_cpu]; + } else { + buf = &state->dts_aggbuffer[desc.dtbd_cpu]; + } + + if (buf->dtb_flags & (DTRACEBUF_RING | DTRACEBUF_FILL)) { + size_t sz = buf->dtb_offset; + + if (state->dts_activity != DTRACE_ACTIVITY_STOPPED) { + lck_mtx_unlock(&dtrace_lock); + return (EBUSY); + } + + /* + * If this buffer has already been consumed, we're + * going to indicate that there's nothing left here + * to consume. + */ + if (buf->dtb_flags & DTRACEBUF_CONSUMED) { + lck_mtx_unlock(&dtrace_lock); + + desc.dtbd_size = 0; + desc.dtbd_drops = 0; + desc.dtbd_errors = 0; + desc.dtbd_oldest = 0; + sz = sizeof (desc); + + if (copyout(&desc, arg, sz) != 0) + return (EFAULT); + + return (0); + } + + /* + * If this is a ring buffer that has wrapped, we want + * to copy the whole thing out. + */ + if (buf->dtb_flags & DTRACEBUF_WRAPPED) { + dtrace_buffer_polish(buf); + sz = buf->dtb_size; + } + + if (copyout(buf->dtb_tomax, (user_addr_t)desc.dtbd_data, sz) != 0) { + lck_mtx_unlock(&dtrace_lock); + return (EFAULT); + } + + desc.dtbd_size = sz; + desc.dtbd_drops = buf->dtb_drops; + desc.dtbd_errors = buf->dtb_errors; + desc.dtbd_oldest = buf->dtb_xamot_offset; + + lck_mtx_unlock(&dtrace_lock); + + if (copyout(&desc, arg, sizeof (desc)) != 0) + return (EFAULT); + + buf->dtb_flags |= DTRACEBUF_CONSUMED; + + return (0); + } + + if (buf->dtb_tomax == NULL) { + ASSERT(buf->dtb_xamot == NULL); + lck_mtx_unlock(&dtrace_lock); + return (ENOENT); + } + + cached = buf->dtb_tomax; + ASSERT(!(buf->dtb_flags & DTRACEBUF_NOSWITCH)); + + dtrace_xcall(desc.dtbd_cpu, + (dtrace_xcall_t)dtrace_buffer_switch, buf); + + state->dts_errors += buf->dtb_xamot_errors; + + /* + * If the buffers did not actually switch, then the cross call + * did not take place -- presumably because the given CPU is + * not in the ready set. If this is the case, we'll return + * ENOENT. + */ + if (buf->dtb_tomax == cached) { + ASSERT(buf->dtb_xamot != cached); + lck_mtx_unlock(&dtrace_lock); + return (ENOENT); + } + + ASSERT(cached == buf->dtb_xamot); + + /* + * We have our snapshot; now copy it out. + */ + if (copyout(buf->dtb_xamot, (user_addr_t)desc.dtbd_data, + buf->dtb_xamot_offset) != 0) { + lck_mtx_unlock(&dtrace_lock); + return (EFAULT); + } + + desc.dtbd_size = buf->dtb_xamot_offset; + desc.dtbd_drops = buf->dtb_xamot_drops; + desc.dtbd_errors = buf->dtb_xamot_errors; + desc.dtbd_oldest = 0; + + lck_mtx_unlock(&dtrace_lock); + + /* + * Finally, copy out the buffer description. + */ + if (copyout(&desc, arg, sizeof (desc)) != 0) + return (EFAULT); + + return (0); + } + + case DTRACEIOC_CONF: { + dtrace_conf_t conf; + + bzero(&conf, sizeof (conf)); + conf.dtc_difversion = DIF_VERSION; + conf.dtc_difintregs = DIF_DIR_NREGS; + conf.dtc_diftupregs = DIF_DTR_NREGS; + conf.dtc_ctfmodel = CTF_MODEL_NATIVE; + + if (copyout(&conf, arg, sizeof (conf)) != 0) + return (EFAULT); + + return (0); + } + + case DTRACEIOC_STATUS: { + dtrace_status_t stat; + dtrace_dstate_t *dstate; + int i, j; + uint64_t nerrs; + + /* + * See the comment in dtrace_state_deadman() for the reason + * for setting dts_laststatus to INT64_MAX before setting + * it to the correct value. + */ + state->dts_laststatus = INT64_MAX; + dtrace_membar_producer(); + state->dts_laststatus = dtrace_gethrtime(); + + bzero(&stat, sizeof (stat)); + + lck_mtx_lock(&dtrace_lock); + + if (state->dts_activity == DTRACE_ACTIVITY_INACTIVE) { + lck_mtx_unlock(&dtrace_lock); + return (ENOENT); + } + + if (state->dts_activity == DTRACE_ACTIVITY_DRAINING) + stat.dtst_exiting = 1; + + nerrs = state->dts_errors; + dstate = &state->dts_vstate.dtvs_dynvars; + + for (i = 0; i < (int)NCPU; i++) { + dtrace_dstate_percpu_t *dcpu = &dstate->dtds_percpu[i]; + + stat.dtst_dyndrops += dcpu->dtdsc_drops; + stat.dtst_dyndrops_dirty += dcpu->dtdsc_dirty_drops; + stat.dtst_dyndrops_rinsing += dcpu->dtdsc_rinsing_drops; + + if (state->dts_buffer[i].dtb_flags & DTRACEBUF_FULL) + stat.dtst_filled++; + + nerrs += state->dts_buffer[i].dtb_errors; + + for (j = 0; j < state->dts_nspeculations; j++) { + dtrace_speculation_t *spec; + dtrace_buffer_t *buf; + + spec = &state->dts_speculations[j]; + buf = &spec->dtsp_buffer[i]; + stat.dtst_specdrops += buf->dtb_xamot_drops; + } + } + + stat.dtst_specdrops_busy = state->dts_speculations_busy; + stat.dtst_specdrops_unavail = state->dts_speculations_unavail; + stat.dtst_stkstroverflows = state->dts_stkstroverflows; + stat.dtst_dblerrors = state->dts_dblerrors; + stat.dtst_killed = + (state->dts_activity == DTRACE_ACTIVITY_KILLED); + stat.dtst_errors = nerrs; + + lck_mtx_unlock(&dtrace_lock); + + if (copyout(&stat, arg, sizeof (stat)) != 0) + return (EFAULT); + + return (0); + } + + case DTRACEIOC_FORMAT: { + dtrace_fmtdesc_t fmt; + char *str; + int len; + + if (copyin(arg, &fmt, sizeof (fmt)) != 0) + return (EFAULT); + + lck_mtx_lock(&dtrace_lock); + + if (fmt.dtfd_format == 0 || + fmt.dtfd_format > state->dts_nformats) { + lck_mtx_unlock(&dtrace_lock); + return (EINVAL); + } + + /* + * Format strings are allocated contiguously and they are + * never freed; if a format index is less than the number + * of formats, we can assert that the format map is non-NULL + * and that the format for the specified index is non-NULL. + */ + ASSERT(state->dts_formats != NULL); + str = state->dts_formats[fmt.dtfd_format - 1]; + ASSERT(str != NULL); + + len = strlen(str) + 1; + + if (len > fmt.dtfd_length) { + fmt.dtfd_length = len; + + if (copyout(&fmt, arg, sizeof (fmt)) != 0) { + lck_mtx_unlock(&dtrace_lock); + return (EINVAL); + } + } else { + if (copyout(str, (user_addr_t)fmt.dtfd_string, len) != 0) { + lck_mtx_unlock(&dtrace_lock); + return (EINVAL); + } + } + + lck_mtx_unlock(&dtrace_lock); + return (0); + } + + default: + break; + } + + return (ENOTTY); +} +#endif /* __APPLE__ */ + +#if !defined(__APPLE__) +/*ARGSUSED*/ +static int +dtrace_detach(dev_info_t *dip, ddi_detach_cmd_t cmd) +{ + dtrace_state_t *state; + + switch (cmd) { + case DDI_DETACH: + break; + + case DDI_SUSPEND: + return (DDI_SUCCESS); + + default: + return (DDI_FAILURE); + } + + lck_mtx_lock(&cpu_lock); + lck_mtx_lock(&dtrace_provider_lock); + lck_mtx_lock(&dtrace_lock); ASSERT(dtrace_opens == 0); @@ -16281,7 +18653,7 @@ _fini(void) { return (mod_remove(&modlinkage)); } -#else +#else /* Darwin BSD driver model. */ d_open_t _dtrace_open, helper_open; d_close_t _dtrace_close, helper_close; @@ -16322,16 +18694,22 @@ _dtrace_ioctl(dev_t dev, u_long cmd, caddr_t data, int fflag, struct proc *p) { #pragma unused(p) int err, rv = 0; + user_addr_t uaddrp; + + if (proc_is64bit(p)) + uaddrp = *(user_addr_t *)data; + else + uaddrp = (user_addr_t) *(uint32_t *)data; - err = dtrace_ioctl(dev, (int)cmd, *(intptr_t *)data, fflag, CRED(), &rv); + err = dtrace_ioctl(dev, cmd, uaddrp, fflag, CRED(), &rv); - /* XXX Darwin's BSD ioctls only return -1 or zero. Overload errno to mimic Solaris. 20 bits suffice. */ + /* Darwin's BSD ioctls only return -1 or zero. Overload errno to mimic Solaris. 20 bits suffice. */ if (err != 0) { ASSERT( (err & 0xfffff000) == 0 ); - return (err & 0xfff); /* ioctl returns -1 and errno set to an error code < 4096 */ + return (err & 0xfff); /* ioctl will return -1 and will set errno to an error code < 4096 */ } else if (rv != 0) { ASSERT( (rv & 0xfff00000) == 0 ); - return (((rv & 0xfffff) << 12)); /* ioctl returns -1 and errno set to a return value >= 4096 */ + return (((rv & 0xfffff) << 12)); /* ioctl will return -1 and will set errno to a value >= 4096 */ } else return 0; } @@ -16342,14 +18720,14 @@ helper_ioctl(dev_t dev, u_long cmd, caddr_t data, int fflag, struct proc *p) #pragma unused(dev,fflag,p) int err, rv = 0; - err = dtrace_ioctl_helper((int)cmd, data, &rv); - /* XXX Darwin's BSD ioctls only return -1 or zero. Overload errno to mimic Solaris. 20 bits suffice. */ + err = dtrace_ioctl_helper(cmd, data, &rv); + /* Darwin's BSD ioctls only return -1 or zero. Overload errno to mimic Solaris. 20 bits suffice. */ if (err != 0) { ASSERT( (err & 0xfffff000) == 0 ); - return (err & 0xfff); /* ioctl returns -1 and errno set to an error code < 4096 */ + return (err & 0xfff); /* ioctl will return -1 and will set errno to an error code < 4096 */ } else if (rv != 0) { ASSERT( (rv & 0xfff00000) == 0 ); - return (((rv & 0xfffff) << 20)); /* ioctl returns -1 and errno set to a return value >= 4096 */ + return (((rv & 0xfffff) << 12)); /* ioctl will return -1 and will set errno to a value >= 4096 */ } else return 0; } @@ -16429,7 +18807,7 @@ dtrace_clone_func(dev_t dev, int action) else { /* * Propose a minor number, namely the next number that vmem_alloc() will return. - * Immediately put it back in play by calling vmem_free(). + * Immediately put it back in play by calling vmem_free(). FIXME. */ int ret = (int)(uintptr_t)vmem_alloc(dtrace_minor, 1, VM_BESTFIT | VM_SLEEP); @@ -16492,12 +18870,10 @@ dtrace_init( void ) } #if defined(DTRACE_MEMORY_ZONES) - /* * Initialize the dtrace kalloc-emulation zones. */ dtrace_alloc_init(); - #endif /* DTRACE_MEMORY_ZONES */ /* @@ -16521,7 +18897,7 @@ dtrace_init( void ) lck_mtx_init(&dtrace_lock, dtrace_lck_grp, dtrace_lck_attr); lck_mtx_init(&dtrace_provider_lock, dtrace_lck_grp, dtrace_lck_attr); lck_mtx_init(&dtrace_meta_lock, dtrace_lck_grp, dtrace_lck_attr); -#ifdef DEBUG +#if DEBUG lck_mtx_init(&dtrace_errlock, dtrace_lck_grp, dtrace_lck_attr); #endif lck_rw_init(&dtrace_dof_mode_lock, dtrace_lck_grp, dtrace_lck_attr); @@ -16550,6 +18926,7 @@ dtrace_init( void ) lck_mtx_lock(&cpu_lock); for (i = 0; i < ncpu; ++i) + /* FIXME: track CPU configuration a la CHUD Processor Pref Pane. */ dtrace_cpu_setup_initial( (processorid_t)i ); /* In lieu of register_cpu_setup_func() callback */ lck_mtx_unlock(&cpu_lock); @@ -16559,7 +18936,7 @@ dtrace_init( void ) * See dtrace_impl.h for a description of dof modes. * The default is lazy dof. * - * XXX Warn if state is LAZY_OFF? It won't break anything, but + * FIXME: Warn if state is LAZY_OFF? It won't break anything, but * makes no sense... */ if (!PE_parse_boot_argn("dtrace_dof_mode", &dtrace_dof_mode, sizeof (dtrace_dof_mode))) { @@ -16597,7 +18974,12 @@ dtrace_init( void ) void dtrace_postinit(void) { - dtrace_attach( (dev_info_t *)makedev(gMajDevNo, 0), 0 ); + /* + * Called from bsd_init after all provider's *_init() routines have been + * run. That way, anonymous DOF enabled under dtrace_attach() is safe + * to go. + */ + dtrace_attach( (dev_info_t *)(uintptr_t)makedev(gMajDevNo, 0), 0 ); /* Punning a dev_t to a dev_info_t* */ } #undef DTRACE_MAJOR diff --git a/bsd/dev/dtrace/dtrace_alloc.c b/bsd/dev/dtrace/dtrace_alloc.c index df0107bd3..e43ca8ce5 100644 --- a/bsd/dev/dtrace/dtrace_alloc.c +++ b/bsd/dev/dtrace/dtrace_alloc.c @@ -116,7 +116,7 @@ void *dtrace_alloc(vm_size_t size) /* * If size is too large for a zone, then use kmem_alloc. - * (We use kmem_alloc instead of kmem_alloc_wired so that + * (We use kmem_alloc instead of kmem_alloc_kobject so that * krealloc can use kmem_realloc.) */ diff --git a/bsd/dev/dtrace/dtrace_glue.c b/bsd/dev/dtrace/dtrace_glue.c index 1ef883569..6d4586e2c 100644 --- a/bsd/dev/dtrace/dtrace_glue.c +++ b/bsd/dev/dtrace/dtrace_glue.c @@ -67,7 +67,8 @@ /* * pid/proc */ -#define proc_t struct proc +/* Solaris proc_t is the struct. Darwin's proc_t is a pointer to it. */ +#define proc_t struct proc /* Steer clear of the Darwin typedef for proc_t */ /* Not called from probe context */ proc_t * @@ -419,11 +420,11 @@ _cyclic_add_omni(cyclic_id_list_t cyc_list) t = (char *)cyc_list; t += sizeof(cyc_omni_handler_t); - cyc_list = (cyclic_id_list_t)t; + cyc_list = (cyclic_id_list_t)(uintptr_t)t; t += sizeof(cyclic_id_t)*NCPU; t += (sizeof(wrap_timer_call_t))*cpu_number(); - wrapTC = (wrap_timer_call_t *)t; + wrapTC = (wrap_timer_call_t *)(uintptr_t)t; cyc_list[cpu_number()] = timer_call_add_cyclic(wrapTC, &cH, &cT); } @@ -454,7 +455,7 @@ _cyclic_remove_omni(cyclic_id_list_t cyc_list) t = (char *)cyc_list; t += sizeof(cyc_omni_handler_t); - cyc_list = (cyclic_id_list_t)t; + cyc_list = (cyclic_id_list_t)(uintptr_t)t; cid = cyc_list[cpu_number()]; oarg = timer_call_get_cyclic_arg(cid); @@ -674,7 +675,7 @@ _dtrace_register_anon_DOF(char *name, uchar_t *data, uint_t nelements) int ddi_prop_lookup_int_array(dev_t match_dev, dev_info_t *dip, uint_t flags, - char *name, int **data, uint_t *nelements) + const char *name, int **data, uint_t *nelements) { #pragma unused(match_dev,dip,flags) unsigned int i; @@ -698,14 +699,14 @@ ddi_prop_free(void *buf) } int -ddi_driver_major(dev_info_t *devi) { return (int)major(devi); } +ddi_driver_major(dev_info_t *devi) { return (int)major(CAST_DOWN_EXPLICIT(int,devi)); } int ddi_create_minor_node(dev_info_t *dip, const char *name, int spec_type, minor_t minor_num, const char *node_type, int flag) { #pragma unused(spec_type,node_type,flag) - dev_t dev = makedev( (uint32_t)dip, minor_num ); + dev_t dev = makedev( ddi_driver_major(dip), minor_num ); if (NULL == devfs_make_node( dev, DEVFS_CHAR, UID_ROOT, GID_WHEEL, 0666, name, 0 )) return DDI_FAILURE; @@ -885,7 +886,7 @@ void dt_kmem_free_aligned(void* buf, size_t size) */ kmem_cache_t * kmem_cache_create( - char *name, /* descriptive name for this cache */ + const char *name, /* descriptive name for this cache */ size_t bufsize, /* size of the objects it manages */ size_t align, /* required object alignment */ int (*constructor)(void *, void *, int), /* object constructor */ @@ -988,7 +989,7 @@ vmem_create(const char *name, void *base, size_t size, size_t quantum, void *ign p->blist = bl = blist_create( size ); blist_free(bl, 0, size); - if (base) blist_alloc( bl, (daddr_t)base ); /* Chomp off initial ID(s) */ + if (base) blist_alloc( bl, (daddr_t)(uintptr_t)base ); /* Chomp off initial ID(s) */ return (vmem_t *)p; } @@ -1011,7 +1012,7 @@ vmem_alloc(vmem_t *vmp, size_t size, int vmflag) panic("vmem_alloc: failure after blist_resize!"); } - return (void *)p; + return (void *)(uintptr_t)p; } void @@ -1019,7 +1020,7 @@ vmem_free(vmem_t *vmp, void *vaddr, size_t size) { struct blist_hdl *p = (struct blist_hdl *)vmp; - blist_free( p->blist, (daddr_t)vaddr, (daddr_t)size ); + blist_free( p->blist, (daddr_t)(uintptr_t)vaddr, (daddr_t)size ); } void @@ -1043,7 +1044,8 @@ vmem_destroy(vmem_t *vmp) hrtime_t dtrace_gethrestime(void) { - uint32_t secs, nanosecs; + clock_sec_t secs; + clock_nsec_t nanosecs; uint64_t secs64, ns64; clock_get_calendar_nanotime_nowait(&secs, &nanosecs); @@ -1122,7 +1124,7 @@ dtrace_gethrtime(void) uint32_t dtrace_cas32(uint32_t *target, uint32_t cmp, uint32_t new) { - if (OSCompareAndSwap( cmp, new, (unsigned long *)target )) + if (OSCompareAndSwap( (UInt32)cmp, (UInt32)new, (volatile UInt32 *)target )) return cmp; else return ~cmp; /* Must return something *other* than cmp */ @@ -1131,14 +1133,10 @@ dtrace_cas32(uint32_t *target, uint32_t cmp, uint32_t new) void * dtrace_casptr(void *target, void *cmp, void *new) { -#if defined(__LP64__) -#error dtrace_casptr implementation missing for LP64 -#else - if (OSCompareAndSwap( (uint32_t)cmp, (uint32_t)new, (unsigned long *)target )) + if (OSCompareAndSwapPtr( cmp, new, (void**)target )) return cmp; else return (void *)(~(uintptr_t)cmp); /* Must return something *other* than cmp */ -#endif } /* @@ -1201,8 +1199,10 @@ dtrace_copycheck(user_addr_t uaddr, uintptr_t kaddr, size_t size) } void -dtrace_copyin(user_addr_t src, uintptr_t dst, size_t len) +dtrace_copyin(user_addr_t src, uintptr_t dst, size_t len, volatile uint16_t *flags) { +#pragma unused(flags) + if (dtrace_copycheck( src, dst, len )) { if (copyin((const user_addr_t)src, (char *)dst, (vm_size_t)len)) { DTRACE_CPUFLAG_SET(CPU_DTRACE_BADADDR); @@ -1213,8 +1213,10 @@ dtrace_copyin(user_addr_t src, uintptr_t dst, size_t len) } void -dtrace_copyinstr(user_addr_t src, uintptr_t dst, size_t len) +dtrace_copyinstr(user_addr_t src, uintptr_t dst, size_t len, volatile uint16_t *flags) { +#pragma unused(flags) + size_t actual; if (dtrace_copycheck( src, dst, len )) { @@ -1236,8 +1238,10 @@ dtrace_copyinstr(user_addr_t src, uintptr_t dst, size_t len) } void -dtrace_copyout(uintptr_t src, user_addr_t dst, size_t len) +dtrace_copyout(uintptr_t src, user_addr_t dst, size_t len, volatile uint16_t *flags) { +#pragma unused(flags) + if (dtrace_copycheck( dst, src, len )) { if (copyout((const void *)src, dst, (vm_size_t)len)) { DTRACE_CPUFLAG_SET(CPU_DTRACE_BADADDR); @@ -1248,8 +1252,10 @@ dtrace_copyout(uintptr_t src, user_addr_t dst, size_t len) } void -dtrace_copyoutstr(uintptr_t src, user_addr_t dst, size_t len) +dtrace_copyoutstr(uintptr_t src, user_addr_t dst, size_t len, volatile uint16_t *flags) { +#pragma unused(flags) + size_t actual; if (dtrace_copycheck( dst, src, len )) { @@ -1504,7 +1510,7 @@ void cmn_err( int level, const char *format, ... ) * 2002-01-24 gvdl Initial implementation of strstr */ -__private_extern__ char * +__private_extern__ const char * strstr(const char *in, const char *str) { char c; @@ -1512,7 +1518,7 @@ strstr(const char *in, const char *str) c = *str++; if (!c) - return (char *) in; // Trivial empty string case + return (const char *) in; // Trivial empty string case len = strlen(str); do { @@ -1525,7 +1531,7 @@ strstr(const char *in, const char *str) } while (sc != c); } while (strncmp(in, str, len) != 0); - return (char *) (in - 1); + return (const char *) (in - 1); } /* @@ -1541,7 +1547,7 @@ dtrace_caller(int ignore) int dtrace_getstackdepth(int aframes) { - struct frame *fp = (struct frame *)dtrace_getfp(); + struct frame *fp = (struct frame *)__builtin_frame_address(0); struct frame *nextfp, *minfp, *stacktop; int depth = 0; int on_intr; @@ -1549,7 +1555,7 @@ dtrace_getstackdepth(int aframes) if ((on_intr = CPU_ON_INTR(CPU)) != 0) stacktop = (struct frame *)dtrace_get_cpu_int_stack_top(); else - stacktop = (struct frame *)(dtrace_get_kernel_stack(current_thread()) + KERNEL_STACK_SIZE); + stacktop = (struct frame *)(dtrace_get_kernel_stack(current_thread()) + kernel_stack_size); minfp = fp; @@ -1568,7 +1574,7 @@ dtrace_getstackdepth(int aframes) vm_offset_t kstack_base = dtrace_get_kernel_stack(current_thread()); minfp = (struct frame *)kstack_base; - stacktop = (struct frame *)(kstack_base + KERNEL_STACK_SIZE); + stacktop = (struct frame *)(kstack_base + kernel_stack_size); on_intr = 0; continue; diff --git a/bsd/dev/dtrace/dtrace_ptss.c b/bsd/dev/dtrace/dtrace_ptss.c index f4503c9ef..9c75c3f1f 100644 --- a/bsd/dev/dtrace/dtrace_ptss.c +++ b/bsd/dev/dtrace/dtrace_ptss.c @@ -70,16 +70,16 @@ dtrace_ptss_claim_entry_locked(struct proc* p) { // CAS the entries onto the free list. do { page->entries[DTRACE_PTSS_ENTRIES_PER_PAGE-1].next = p->p_dtrace_ptss_free_list; - } while (!OSCompareAndSwap((UInt32)page->entries[DTRACE_PTSS_ENTRIES_PER_PAGE-1].next, - (UInt32)&page->entries[0], - (volatile UInt32 *)&p->p_dtrace_ptss_free_list)); + } while (!OSCompareAndSwapPtr((void *)page->entries[DTRACE_PTSS_ENTRIES_PER_PAGE-1].next, + (void *)&page->entries[0], + (void * volatile *)&p->p_dtrace_ptss_free_list)); // Now that we've added to the free list, try again. continue; } // Claim temp - if (!OSCompareAndSwap((UInt32)temp, (UInt32)temp->next, (volatile UInt32 *)&p->p_dtrace_ptss_free_list)) + if (!OSCompareAndSwapPtr((void *)temp, (void *)temp->next, (void * volatile *)&p->p_dtrace_ptss_free_list)) continue; // At this point, we own temp. @@ -113,7 +113,7 @@ dtrace_ptss_claim_entry(struct proc* p) { } // Claim temp - if (!OSCompareAndSwap((UInt32)temp, (UInt32)temp->next, (volatile UInt32 *)&p->p_dtrace_ptss_free_list)) + if (!OSCompareAndSwapPtr((void *)temp, (void *)temp->next, (void * volatile *)&p->p_dtrace_ptss_free_list)) continue; // At this point, we own temp. @@ -133,7 +133,7 @@ dtrace_ptss_release_entry(struct proc* p, struct dtrace_ptss_page_entry* e) { if (p && e) { do { e->next = p->p_dtrace_ptss_free_list; - } while (!OSCompareAndSwap((UInt32)e->next, (UInt32)e, (volatile UInt32 *)&p->p_dtrace_ptss_free_list)); + } while (!OSCompareAndSwapPtr((void *)e->next, (void *)e, (void * volatile *)&p->p_dtrace_ptss_free_list)); } } diff --git a/bsd/dev/dtrace/dtrace_subr.c b/bsd/dev/dtrace/dtrace_subr.c index 2a2a4b3de..3d8e65309 100644 --- a/bsd/dev/dtrace/dtrace_subr.c +++ b/bsd/dev/dtrace/dtrace_subr.c @@ -20,12 +20,12 @@ */ /* - * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ /* - * #pragma ident "@(#)dtrace_subr.c 1.7 06/04/24 SMI" + * #pragma ident "@(#)dtrace_subr.c 1.8 07/06/05 SMI" */ #include @@ -37,7 +37,8 @@ #include #if defined(__APPLE__) -#define proc_t struct proc +/* Solaris proc_t is the struct. Darwin's proc_t is a pointer to it. */ +#define proc_t struct proc /* Steer clear of the Darwin typedef for proc_t */ #endif /* Copied from an arch specific dtrace_subr.c. */ @@ -57,9 +58,6 @@ void (*dtrace_helpers_fork)(proc_t *, proc_t *); void (*dtrace_cpustart_init)(void); void (*dtrace_cpustart_fini)(void); -void (*dtrace_kreloc_init)(void); -void (*dtrace_kreloc_fini)(void); - void (*dtrace_debugger_init)(void); void (*dtrace_debugger_fini)(void); diff --git a/bsd/dev/dtrace/fasttrap.c b/bsd/dev/dtrace/fasttrap.c index 3cb1b62e6..814778290 100644 --- a/bsd/dev/dtrace/fasttrap.c +++ b/bsd/dev/dtrace/fasttrap.c @@ -20,12 +20,12 @@ */ /* - * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Copyright 2008 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ /* - * #pragma ident "@(#)fasttrap.c 1.21 06/06/12 SMI" + * #pragma ident "@(#)fasttrap.c 1.26 08/04/21 SMI" */ #include @@ -51,7 +51,12 @@ #include -#define proc_t struct proc +/* Solaris proc_t is the struct. Darwin's proc_t is a pointer to it. */ +#define proc_t struct proc /* Steer clear of the Darwin typedef for proc_t */ + +__private_extern__ +void +qsort(void *a, size_t n, size_t es, int (*cmp)(const void *, const void *)); /* * User-Land Trap-Based Tracing @@ -496,8 +501,17 @@ fasttrap_fork(proc_t *p, proc_t *cp) lck_mtx_lock(&bucket->ftb_mtx); for (tp = bucket->ftb_data; tp != NULL; tp = tp->ftt_next) { if (tp->ftt_pid == ppid && - !tp->ftt_proc->ftpc_defunct) { + tp->ftt_proc->ftpc_acount != 0) { fasttrap_tracepoint_remove(cp, tp); + + /* + * The count of active providers can only be + * decremented (i.e. to zero) during exec, + * exit, and removal of a meta provider so it + * should be impossible to drop the count + * mid-fork. + */ + ASSERT(tp->ftt_proc->ftpc_acount != 0); } } lck_mtx_unlock(&bucket->ftb_mtx); @@ -613,8 +627,14 @@ fasttrap_tracepoint_enable(proc_t *p, fasttrap_probe_t *probe, uint_t index) again: lck_mtx_lock(&bucket->ftb_mtx); for (tp = bucket->ftb_data; tp != NULL; tp = tp->ftt_next) { + /* + * Note that it's safe to access the active count on the + * associated proc structure because we know that at least one + * provider (this one) will still be around throughout this + * operation. + */ if (tp->ftt_pid != pid || tp->ftt_pc != pc || - tp->ftt_proc->ftpc_defunct) + tp->ftt_proc->ftpc_acount == 0) continue; /* @@ -825,10 +845,12 @@ fasttrap_tracepoint_disable(proc_t *p, fasttrap_probe_t *probe, uint_t index) if (tp->ftt_ids != NULL) { tmp_probe = tp->ftt_ids->fti_probe; + /* LINTED - alignment */ tmp_index = FASTTRAP_ID_INDEX(tp->ftt_ids); tmp_tp = &tmp_probe->ftp_tps[tmp_index].fit_tp; } else { tmp_probe = tp->ftt_retids->fti_probe; + /* LINTED - alignment */ tmp_index = FASTTRAP_ID_INDEX(tp->ftt_retids); tmp_tp = &tmp_probe->ftp_tps[tmp_index].fit_tp; } @@ -1176,7 +1198,7 @@ fasttrap_pid_getargdesc(void *arg, dtrace_id_t id, void *parg, #pragma unused(arg, id) fasttrap_probe_t *probe = parg; char *str; - int i; + int i, ndx; desc->dtargd_native[0] = '\0'; desc->dtargd_xlate[0] = '\0'; @@ -1187,14 +1209,11 @@ fasttrap_pid_getargdesc(void *arg, dtrace_id_t id, void *parg, return; } - /* - * We only need to set this member if the argument is remapped. - */ - if (probe->ftp_argmap != NULL) - desc->dtargd_mapping = probe->ftp_argmap[desc->dtargd_ndx]; + ndx = (probe->ftp_argmap != NULL) ? + probe->ftp_argmap[desc->dtargd_ndx] : desc->dtargd_ndx; str = probe->ftp_ntypes; - for (i = 0; i < desc->dtargd_mapping; i++) { + for (i = 0; i < ndx; i++) { str += strlen(str) + 1; } @@ -1296,10 +1315,12 @@ fasttrap_proc_lookup(pid_t pid) lck_mtx_lock(&bucket->ftb_mtx); for (fprc = bucket->ftb_data; fprc != NULL; fprc = fprc->ftpc_next) { - if (fprc->ftpc_pid == pid && !fprc->ftpc_defunct) { + if (fprc->ftpc_pid == pid && fprc->ftpc_acount != 0) { lck_mtx_lock(&fprc->ftpc_mtx); lck_mtx_unlock(&bucket->ftb_mtx); - fprc->ftpc_count++; + fprc->ftpc_rcount++; + atomic_add_64(&fprc->ftpc_acount, 1); + ASSERT(fprc->ftpc_acount <= fprc->ftpc_rcount); lck_mtx_unlock(&fprc->ftpc_mtx); return (fprc); @@ -1315,7 +1336,8 @@ fasttrap_proc_lookup(pid_t pid) new_fprc = kmem_zalloc(sizeof (fasttrap_proc_t), KM_SLEEP); ASSERT(new_fprc != NULL); new_fprc->ftpc_pid = pid; - new_fprc->ftpc_count = 1; + new_fprc->ftpc_rcount = 1; + new_fprc->ftpc_acount = 1; lck_mtx_lock(&bucket->ftb_mtx); @@ -1324,10 +1346,12 @@ fasttrap_proc_lookup(pid_t pid) * been created for this pid while we weren't under the bucket lock. */ for (fprc = bucket->ftb_data; fprc != NULL; fprc = fprc->ftpc_next) { - if (fprc->ftpc_pid == pid && !fprc->ftpc_defunct) { + if (fprc->ftpc_pid == pid && fprc->ftpc_acount != 0) { lck_mtx_lock(&fprc->ftpc_mtx); lck_mtx_unlock(&bucket->ftb_mtx); - fprc->ftpc_count++; + fprc->ftpc_rcount++; + atomic_add_64(&fprc->ftpc_acount, 1); + ASSERT(fprc->ftpc_acount <= fprc->ftpc_rcount); lck_mtx_unlock(&fprc->ftpc_mtx); kmem_free(new_fprc, sizeof (fasttrap_proc_t)); @@ -1360,15 +1384,22 @@ fasttrap_proc_release(fasttrap_proc_t *proc) lck_mtx_lock(&proc->ftpc_mtx); - ASSERT(proc->ftpc_count != 0); + ASSERT(proc->ftpc_rcount != 0); + ASSERT(proc->ftpc_acount <= proc->ftpc_rcount); - if (--proc->ftpc_count != 0) { + if (--proc->ftpc_rcount != 0) { lck_mtx_unlock(&proc->ftpc_mtx); return; } lck_mtx_unlock(&proc->ftpc_mtx); + /* + * There should definitely be no live providers associated with this + * process at this point. + */ + ASSERT(proc->ftpc_acount == 0); + bucket = &fasttrap_procs.fth_table[FASTTRAP_PROCS_INDEX(pid)]; lck_mtx_lock(&bucket->ftb_mtx); @@ -1565,6 +1596,16 @@ fasttrap_provider_free(fasttrap_provider_t *provider) ASSERT(provider->ftp_ccount == 0); ASSERT(provider->ftp_mcount == 0); + /* + * If this provider hasn't been retired, we need to explicitly drop the + * count of active providers on the associated process structure. + */ + if (!provider->ftp_retired) { + atomic_add_64(&provider->ftp_proc->ftpc_acount, -1); + ASSERT(provider->ftp_proc->ftpc_acount < + provider->ftp_proc->ftpc_rcount); + } + fasttrap_proc_release(provider->ftp_proc); #if defined(__APPLE__) @@ -1628,13 +1669,12 @@ fasttrap_provider_retire(pid_t pid, const char *name, int mprov) } /* - * Mark the provider to be removed in our post-processing step, - * mark it retired, and mark its proc as defunct (though it may - * already be marked defunct by another provider that shares the - * same proc). Marking it indicates that we should try to remove it; - * setting the retired flag indicates that we're done with this - * provider; setting the proc to be defunct indicates that all - * tracepoints associated with the traced process should be ignored. + * Mark the provider to be removed in our post-processing step, mark it + * retired, and drop the active count on its proc. Marking it indicates + * that we should try to remove it; setting the retired flag indicates + * that we're done with this provider; dropping the active the proc + * releases our hold, and when this reaches zero (as it will during + * exit or exec) the proc and associated providers become defunct. * * We obviously need to take the bucket lock before the provider lock * to perform the lookup, but we need to drop the provider lock @@ -1643,7 +1683,9 @@ fasttrap_provider_retire(pid_t pid, const char *name, int mprov) * bucket lock therefore protects the integrity of the provider hash * table. */ - fp->ftp_proc->ftpc_defunct = 1; + atomic_add_64(&fp->ftp_proc->ftpc_acount, -1); + ASSERT(fp->ftp_proc->ftpc_acount < fp->ftp_proc->ftpc_rcount); + fp->ftp_retired = 1; fp->ftp_marked = 1; provid = fp->ftp_provid; @@ -1661,6 +1703,18 @@ fasttrap_provider_retire(pid_t pid, const char *name, int mprov) fasttrap_pid_cleanup(); } +static int +fasttrap_uint32_cmp(const void *ap, const void *bp) +{ + return (*(const uint32_t *)ap - *(const uint32_t *)bp); +} + +static int +fasttrap_uint64_cmp(const void *ap, const void *bp) +{ + return (*(const uint64_t *)ap - *(const uint64_t *)bp); +} + static int fasttrap_add_probe(fasttrap_probe_spec_t *pdata) { @@ -1670,6 +1724,12 @@ fasttrap_add_probe(fasttrap_probe_spec_t *pdata) const char *name; unsigned int i, aframes, whack; + /* + * There needs to be at least one desired trace point. + */ + if (pdata->ftps_noffs == 0) + return (EINVAL); + #if defined(__APPLE__) switch (pdata->ftps_probe_type) { #endif @@ -1733,7 +1793,7 @@ fasttrap_add_probe(fasttrap_probe_spec_t *pdata) char name_str[17]; (void) snprintf(name_str, sizeof(name_str), "%llx", - (unsigned long long)pdata->ftps_offs[i]); + (uint64_t)pdata->ftps_offs[i]); if (dtrace_probe_lookup(provider->ftp_provid, pdata->ftps_mod, pdata->ftps_func, name_str) != 0) @@ -1791,6 +1851,21 @@ fasttrap_add_probe(fasttrap_probe_spec_t *pdata) goto no_mem; } + /* + * Make sure all tracepoint program counter values are unique. + * We later assume that each probe has exactly one tracepoint + * for a given pc. + */ + qsort(pdata->ftps_offs, pdata->ftps_noffs, + sizeof (uint64_t), fasttrap_uint64_cmp); + for (i = 1; i < pdata->ftps_noffs; i++) { + if (pdata->ftps_offs[i] > pdata->ftps_offs[i - 1]) + continue; + + atomic_add_32(&fasttrap_total, -pdata->ftps_noffs); + goto no_mem; + } + ASSERT(pdata->ftps_noffs > 0); #if !defined(__APPLE__) pp = kmem_zalloc(offsetof(fasttrap_probe_t, @@ -1920,15 +1995,15 @@ fasttrap_meta_provide(void *arg, dtrace_helper_provdesc_t *dhpv, pid_t pid) * The highest stability class that fasttrap supports is ISA; cap * the stability of the new provider accordingly. */ - if (dhpv->dthpv_pattr.dtpa_provider.dtat_class >= DTRACE_CLASS_COMMON) + if (dhpv->dthpv_pattr.dtpa_provider.dtat_class > DTRACE_CLASS_ISA) dhpv->dthpv_pattr.dtpa_provider.dtat_class = DTRACE_CLASS_ISA; - if (dhpv->dthpv_pattr.dtpa_mod.dtat_class >= DTRACE_CLASS_COMMON) + if (dhpv->dthpv_pattr.dtpa_mod.dtat_class > DTRACE_CLASS_ISA) dhpv->dthpv_pattr.dtpa_mod.dtat_class = DTRACE_CLASS_ISA; - if (dhpv->dthpv_pattr.dtpa_func.dtat_class >= DTRACE_CLASS_COMMON) + if (dhpv->dthpv_pattr.dtpa_func.dtat_class > DTRACE_CLASS_ISA) dhpv->dthpv_pattr.dtpa_func.dtat_class = DTRACE_CLASS_ISA; - if (dhpv->dthpv_pattr.dtpa_name.dtat_class >= DTRACE_CLASS_COMMON) + if (dhpv->dthpv_pattr.dtpa_name.dtat_class > DTRACE_CLASS_ISA) dhpv->dthpv_pattr.dtpa_name.dtat_class = DTRACE_CLASS_ISA; - if (dhpv->dthpv_pattr.dtpa_args.dtat_class >= DTRACE_CLASS_COMMON) + if (dhpv->dthpv_pattr.dtpa_args.dtat_class > DTRACE_CLASS_ISA) dhpv->dthpv_pattr.dtpa_args.dtat_class = DTRACE_CLASS_ISA; #if defined(__APPLE__) @@ -1953,13 +2028,10 @@ fasttrap_meta_provide(void *arg, dtrace_helper_provdesc_t *dhpv, pid_t pid) * having duplicate probes. However, duplicate probes are not fatal, * and there is no way to get that by accident, so we will not check * for that case. + * + * UPDATE: It turns out there are several use cases that require adding + * probes to existing providers. Disabling this optimization for now... */ - - if (provider->ftp_mcount != 0) { - /* This is the duplicate provider case. */ - lck_mtx_unlock(&provider->ftp_mtx); - return NULL; - } #endif /* __APPLE__ */ /* @@ -1991,6 +2063,25 @@ fasttrap_meta_create_probe(void *arg, void *parg, */ ASSERT(provider->ftp_mcount > 0); + /* + * The offsets must be unique. + */ + qsort(dhpb->dthpb_offs, dhpb->dthpb_noffs, sizeof (uint32_t), + fasttrap_uint32_cmp); + for (i = 1; i < dhpb->dthpb_noffs; i++) { + if (dhpb->dthpb_base + dhpb->dthpb_offs[i] <= + dhpb->dthpb_base + dhpb->dthpb_offs[i - 1]) + return; + } + + qsort(dhpb->dthpb_enoffs, dhpb->dthpb_nenoffs, sizeof (uint32_t), + fasttrap_uint32_cmp); + for (i = 1; i < dhpb->dthpb_nenoffs; i++) { + if (dhpb->dthpb_base + dhpb->dthpb_enoffs[i] <= + dhpb->dthpb_base + dhpb->dthpb_enoffs[i - 1]) + return; + } + /* * Grab the creation lock to ensure consistency between calls to * dtrace_probe_lookup() and dtrace_probe_create() in the face of @@ -2059,7 +2150,7 @@ fasttrap_meta_create_probe(void *arg, void *parg, * Unfortunately, a side effect of this is that the relocations do not point at exactly * the location we want. We need to fix up the addresses here. The fixups vary by arch and type. */ -#if defined(__i386__) +#if defined(__i386__) || defined(__x86_64__) /* * Both 32 & 64 bit want to go back one byte, to point at the first NOP */ @@ -2103,7 +2194,7 @@ fasttrap_meta_create_probe(void *arg, void *parg, * Unfortunately, a side effect of this is that the relocations do not point at exactly * the location we want. We need to fix up the addresses here. The fixups vary by arch and type. */ -#if defined(__i386__) +#if defined(__i386__) || defined(__x86_64__) /* * Both 32 & 64 bit want to go forward two bytes, to point at a single byte nop. */ @@ -2168,28 +2259,21 @@ static dtrace_mops_t fasttrap_mops = { /*ARGSUSED*/ static int -fasttrap_ioctl(dev_t dev, int cmd, intptr_t arg, int md, cred_t *cr, int *rv) +fasttrap_ioctl(dev_t dev, u_long cmd, user_addr_t arg, int md, cred_t *cr, int *rv) { #pragma unused(dev, md, rv) if (!dtrace_attached()) return (EAGAIN); if (cmd == FASTTRAPIOC_MAKEPROBE) { - // FIXME! What size is arg? If it is not 64 bit, how do we pass in a 64 bit value? - fasttrap_probe_spec_t *uprobe = (void *)arg; fasttrap_probe_spec_t *probe; uint64_t noffs; size_t size, i; int ret; char *c; - /* - * FIXME! How does this work? The kern is running in 32 bit mode. It has a 32 bit pointer, - * uprobe. We do address manipulations on it, and still have a 64 bit value? This seems - * broken. What is the right way to do this? - */ - if (copyin((user_addr_t)(unsigned long)&uprobe->ftps_noffs, &noffs, - sizeof (uprobe->ftps_noffs))) + if (copyin(arg + __offsetof(fasttrap_probe_spec_t, ftps_noffs), &noffs, + sizeof (probe->ftps_noffs))) return (EFAULT); /* @@ -2210,7 +2294,7 @@ fasttrap_ioctl(dev_t dev, int cmd, intptr_t arg, int md, cred_t *cr, int *rv) probe = kmem_alloc(size, KM_SLEEP); - if (copyin((user_addr_t)(unsigned long)uprobe, probe, size) != 0) { + if (copyin(arg, probe, size) != 0) { kmem_free(probe, size); return (EFAULT); } @@ -2278,7 +2362,7 @@ fasttrap_ioctl(dev_t dev, int cmd, intptr_t arg, int md, cred_t *cr, int *rv) uint_t index; // int ret; - if (copyin((user_addr_t)(unsigned long)arg, &instr, sizeof (instr)) != 0) + if (copyin(arg, &instr, sizeof (instr)) != 0) return (EFAULT); if (!PRIV_POLICY_CHOICE(cr, PRIV_ALL, B_FALSE)) { @@ -2312,7 +2396,7 @@ fasttrap_ioctl(dev_t dev, int cmd, intptr_t arg, int md, cred_t *cr, int *rv) while (tp != NULL) { if (instr.ftiq_pid == tp->ftt_pid && instr.ftiq_pc == tp->ftt_pc && - !tp->ftt_proc->ftpc_defunct) + tp->ftt_proc->ftpc_acount != 0) break; tp = tp->ftt_next; @@ -2327,7 +2411,7 @@ fasttrap_ioctl(dev_t dev, int cmd, intptr_t arg, int md, cred_t *cr, int *rv) sizeof (instr.ftiq_instr)); lck_mtx_unlock(&fasttrap_tpoints.fth_table[index].ftb_mtx); - if (copyout(&instr, (user_addr_t)(unsigned long)arg, sizeof (instr)) != 0) + if (copyout(&instr, arg, sizeof (instr)) != 0) return (EFAULT); return (0); @@ -2462,13 +2546,15 @@ _fasttrap_open(dev_t dev, int flags, int devtype, struct proc *p) static int _fasttrap_ioctl(dev_t dev, u_long cmd, caddr_t data, int fflag, struct proc *p) { -#pragma unused(p) int err, rv = 0; + user_addr_t uaddrp; - /* - * FIXME! 64 bit problem with the data var. - */ - err = fasttrap_ioctl(dev, (int)cmd, *(intptr_t *)data, fflag, CRED(), &rv); + if (proc_is64bit(p)) + uaddrp = *(user_addr_t *)data; + else + uaddrp = (user_addr_t) *(uint32_t *)data; + + err = fasttrap_ioctl(dev, cmd, uaddrp, fflag, CRED(), &rv); /* XXX Darwin's BSD ioctls only return -1 or zero. Overload errno to mimic Solaris. 20 bits suffice. */ if (err != 0) { @@ -2568,7 +2654,7 @@ fasttrap_init( void ) lck_mtx_init(&fasttrap_cleanup_mtx, fasttrap_lck_grp, fasttrap_lck_attr); lck_mtx_init(&fasttrap_count_mtx, fasttrap_lck_grp, fasttrap_lck_attr); - if (DDI_FAILURE == fasttrap_attach((dev_info_t *)device, 0 )) { + if (DDI_FAILURE == fasttrap_attach((dev_info_t *)(uintptr_t)device, 0 )) { // FIX ME! Do we remove the devfs node here? // What kind of error reporting? printf("fasttrap_init: Call to fasttrap_attach failed.\n"); diff --git a/bsd/dev/dtrace/fbt.c b/bsd/dev/dtrace/fbt.c index 311e4d6cf..94e15da00 100644 --- a/bsd/dev/dtrace/fbt.c +++ b/bsd/dev/dtrace/fbt.c @@ -2,9 +2,8 @@ * CDDL HEADER START * * The contents of this file are subject to the terms of the - * Common Development and Distribution License, Version 1.0 only - * (the "License"). You may not use this file except in compliance - * with the License. + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. @@ -20,11 +19,11 @@ * CDDL HEADER END */ /* - * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ -/* #pragma ident "@(#)fbt.c 1.15 05/09/19 SMI" */ +/* #pragma ident "@(#)fbt.c 1.18 07/01/10 SMI" */ #ifdef KERNEL #ifndef _KERNEL @@ -33,9 +32,7 @@ #endif #include -#include - -extern struct mach_header _mh_execute_header; /* the kernel's mach header */ +#include #include #include @@ -251,22 +248,26 @@ fbt_getargdesc(void *arg, dtrace_id_t id, void *parg, dtrace_argdesc_t *desc) * If we have a parent container, we must manually import it. */ if ((parent = ctf_parent_name(fp)) != NULL) { - struct modctl *mod; + struct modctl *mp = &modules; + struct modctl *mod = NULL; /* * We must iterate over all modules to find the module that * is our parent. */ - for (mod = &modules; mod != NULL; mod = mod->mod_next) { - if (strcmp(mod->mod_filename, parent) == 0) + do { + if (strcmp(mp->mod_modname, parent) == 0) { + mod = mp; break; - } + } + } while ((mp = mp->mod_next) != &modules); if (mod == NULL) goto err; - if ((pfp = ctf_modopen(mod->mod_mp, &error)) == NULL) + if ((pfp = ctf_modopen(mod->mod_mp, &error)) == NULL) { goto err; + } if (ctf_import(fp, pfp) != 0) { ctf_close(pfp); @@ -326,7 +327,7 @@ static dtrace_pops_t fbt_pops = { #if !defined(__APPLE__) fbt_getargdesc, #else - NULL, /* XXX where to look for xnu? */ + NULL, /* FIXME: where to look for xnu? */ #endif /* __APPLE__ */ NULL, NULL, @@ -364,6 +365,7 @@ fbt_attach(dev_info_t *devi, ddi_attach_cmd_t cmd) dtrace_invop_add(fbt_invop); +#if !defined(__APPLE__) if (ddi_create_minor_node(devi, "fbt", S_IFCHR, 0, DDI_PSEUDO, NULL) == DDI_FAILURE || dtrace_register("fbt", &fbt_attr, DTRACE_PRIV_KERNEL, NULL, @@ -371,6 +373,15 @@ fbt_attach(dev_info_t *devi, ddi_attach_cmd_t cmd) fbt_cleanup(devi); return (DDI_FAILURE); } +#else + if (ddi_create_minor_node(devi, "fbt", S_IFCHR, 0, + DDI_PSEUDO, 0) == DDI_FAILURE || + dtrace_register("fbt", &fbt_attr, DTRACE_PRIV_KERNEL, NULL, + &fbt_pops, NULL, &fbt_id) != 0) { + fbt_cleanup(devi); + return (DDI_FAILURE); + } +#endif /* __APPLE__ */ ddi_report_dev(devi); fbt_devi = devi; @@ -426,7 +437,7 @@ fbt_init( void ) if (0 == gDisableFBT) { int majdevno = cdevsw_add(FBT_MAJOR, &fbt_cdevsw); - int size = 0, header_size, round_size; + unsigned long size = 0, header_size, round_size; kern_return_t ret; void *p, *q; @@ -439,16 +450,17 @@ fbt_init( void ) * Capture the kernel's mach_header in its entirety and the contents of * its LINKEDIT segment (and only that segment). This is sufficient to * build all the fbt probes lazily the first time a client looks to - * the fbt provider. Remeber thes on the global struct modctl g_fbt_kernctl. + * the fbt provider. Remeber these on the global struct modctl g_fbt_kernctl. */ - header_size = sizeof(struct mach_header) + _mh_execute_header.sizeofcmds; + header_size = sizeof(kernel_mach_header_t) + _mh_execute_header.sizeofcmds; p = getsegdatafromheader(&_mh_execute_header, SEG_LINKEDIT, &size); - round_size = round_page_32(header_size + size); + round_size = round_page(header_size + size); + /* "q" will accomodate copied kernel_mach_header_t, its load commands, and LINKEIT segment. */ ret = kmem_alloc_pageable(kernel_map, (vm_offset_t *)&q, round_size); if (p && (ret == KERN_SUCCESS)) { - struct segment_command *sgp; + kernel_segment_command_t *sgp; bcopy( (void *)&_mh_execute_header, q, header_size); bcopy( p, (char *)q + header_size, size); @@ -456,7 +468,7 @@ fbt_init( void ) sgp = getsegbynamefromheader(q, SEG_LINKEDIT); if (sgp) { - sgp->vmaddr = (unsigned long)((char *)q + header_size); + sgp->vmaddr = (uintptr_t)((char *)q + header_size); g_fbt_kernctl.address = (vm_address_t)q; g_fbt_kernctl.size = header_size + size; } else { @@ -472,8 +484,9 @@ fbt_init( void ) } strncpy((char *)&(g_fbt_kernctl.mod_modname), "mach_kernel", KMOD_MAX_NAME); + ((char *)&(g_fbt_kernctl.mod_modname))[KMOD_MAX_NAME -1] = '\0'; - fbt_attach( (dev_info_t *)majdevno, DDI_ATTACH ); + fbt_attach( (dev_info_t *)(uintptr_t)majdevno, DDI_ATTACH ); gDisableFBT = 1; /* Ensure this initialization occurs just one time. */ } diff --git a/bsd/dev/dtrace/lockstat.c b/bsd/dev/dtrace/lockstat.c index 82539d98b..0f9d6d4ff 100644 --- a/bsd/dev/dtrace/lockstat.c +++ b/bsd/dev/dtrace/lockstat.c @@ -19,11 +19,11 @@ * CDDL HEADER END */ /* - * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Copyright 2008 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ -/* #pragma ident "@(#)lockstat.c 1.11 06/03/24 SMI" */ +/* #pragma ident "@(#)lockstat.c 1.12 08/01/16 SMI" */ #ifdef KERNEL @@ -55,7 +55,7 @@ /* * Hot patch values, x86 */ -#ifdef __i386__ +#if defined(__i386__) || defined(__x86_64__) #define NOP 0x90 #define RET 0xc3 #define LOCKSTAT_AFRAMES 1 @@ -77,7 +77,7 @@ typedef struct lockstat_probe { lockstat_probe_t lockstat_probes[] = { -#ifdef __i386__ +#if defined(__i386__) || defined(__x86_64__) /* Not implemented yet on PPC... */ { LS_LCK_MTX_LOCK, LSA_ACQUIRE, LS_LCK_MTX_LOCK_ACQUIRE, DTRACE_IDNONE }, { LS_LCK_MTX_LOCK, LSA_SPIN, LS_LCK_MTX_LOCK_SPIN, DTRACE_IDNONE }, @@ -88,17 +88,11 @@ lockstat_probe_t lockstat_probes[] = { LS_LCK_MTX_EXT_LOCK, LSA_SPIN, LS_LCK_MTX_EXT_LOCK_SPIN, DTRACE_IDNONE }, { LS_LCK_MTX_EXT_TRY_LOCK, LSA_ACQUIRE, LS_LCK_MTX_TRY_EXT_LOCK_ACQUIRE, DTRACE_IDNONE }, { LS_LCK_MTX_UNLOCK, LSA_RELEASE, LS_LCK_MTX_EXT_UNLOCK_RELEASE, DTRACE_IDNONE }, - { LS_MUTEX_LOCK, LSA_ACQUIRE, LS_MUTEX_LOCK_ACQUIRE, DTRACE_IDNONE }, - { LS_MUTEX_UNLOCK, LSA_RELEASE, LS_MUTEX_UNLOCK_RELEASE, DTRACE_IDNONE }, - { LS_MUTEX_TRY_LOCK, LSA_ACQUIRE, LS_MUTEX_TRY_LOCK_ACQUIRE, DTRACE_IDNONE }, - { LS_MUTEX_TRY_SPIN, LSA_ACQUIRE, LS_MUTEX_TRY_SPIN_ACQUIRE, DTRACE_IDNONE }, - { LS_MUTEX_LOCK_SPIN, LSA_ACQUIRE, LS_MUTEX_LOCK_SPIN_ACQUIRE, DTRACE_IDNONE }, + { LS_LCK_MTX_LOCK_SPIN_LOCK, LSA_ACQUIRE, LS_LCK_MTX_LOCK_SPIN_ACQUIRE, DTRACE_IDNONE }, #endif { LS_LCK_MTX_LOCK, LSA_BLOCK, LS_LCK_MTX_LOCK_BLOCK, DTRACE_IDNONE }, { LS_LCK_MTX_EXT_LOCK, LSA_BLOCK, LS_LCK_MTX_EXT_LOCK_BLOCK, DTRACE_IDNONE }, - { LS_MUTEX_CONVERT_SPIN, LSA_ACQUIRE, LS_MUTEX_CONVERT_SPIN_ACQUIRE, DTRACE_IDNONE }, - { LS_LCK_RW_LOCK_SHARED, LSR_ACQUIRE, LS_LCK_RW_LOCK_SHARED_ACQUIRE, DTRACE_IDNONE }, { LS_LCK_RW_LOCK_SHARED, LSR_BLOCK, LS_LCK_RW_LOCK_SHARED_BLOCK, DTRACE_IDNONE }, { LS_LCK_RW_LOCK_SHARED, LSR_SPIN, LS_LCK_RW_LOCK_SHARED_SPIN, DTRACE_IDNONE }, @@ -127,30 +121,30 @@ lockstat_probe_t lockstat_probes[] = { LS_LCK_RW_TRY_LOCK_SHARED, LSA_SPIN, LS_LCK_RW_TRY_LOCK_SHARED_SPIN, DTRACE_IDNONE }, #endif - { NULL } + { NULL, NULL, 0, 0 } }; dtrace_id_t lockstat_probemap[LS_NPROBES]; -extern void lck_mtx_lock_lockstat_patch_point(); -extern void lck_mtx_try_lock_lockstat_patch_point(); -extern void lck_mtx_try_lock_spin_lockstat_patch_point(); -extern void lck_mtx_unlock_lockstat_patch_point(); -extern void lck_mtx_unlock2_lockstat_patch_point(); -extern void mutex_lock_lockstat_patch_point(); -extern void mutex_unlock_lockstat_patch_point(); -extern void mutex_unlock2_lockstat_patch_point(); -extern void mutex_try_lockstat_patch_point(); -extern void mutex_try_spin_lockstat_patch_point(); -extern void mutex_lock_spin_lockstat_patch_point(); -extern void mutex_convert_spin_lockstat_patch_point(); -extern void lck_rw_done_lockstat_patch_point(); -extern void lck_rw_lock_shared_lockstat_patch_point(); -extern void lck_mtx_lock_ext_lockstat_patch_point(); -extern void lck_mtx_ext_unlock_lockstat_patch_point(); +#if CONFIG_DTRACE +extern void lck_mtx_lock_lockstat_patch_point(void); +extern void lck_mtx_try_lock_lockstat_patch_point(void); +extern void lck_mtx_try_lock_spin_lockstat_patch_point(void); +extern void lck_mtx_unlock_lockstat_patch_point(void); +extern void lck_mtx_lock_ext_lockstat_patch_point(void); +extern void lck_mtx_ext_unlock_lockstat_patch_point(void); + +extern void lck_rw_lock_shared_lockstat_patch_point(void); +extern void lck_rw_lock_exclusive_lockstat_patch_point(void); +extern void lck_rw_lock_shared_to_exclusive_lockstat_patch_point(void); +extern void lck_rw_try_lock_shared_lockstat_patch_point(void); +extern void lck_rw_try_lock_exclusive_lockstat_patch_point(void); +extern void lck_mtx_lock_spin_lockstat_patch_point(void); +#endif /* CONFIG_DTRACE */ vm_offset_t *assembly_probes[] = { -#if defined(__i386__) +#if CONFIG_DTRACE +#if defined(__i386__) || defined(__x86_64__) /* * On x86 these points are better done via hot patches, which ensure * there is zero overhead when not in use. On x86 these patch points @@ -161,26 +155,27 @@ vm_offset_t *assembly_probes[] = { (vm_offset_t *) lck_mtx_try_lock_lockstat_patch_point, (vm_offset_t *) lck_mtx_try_lock_spin_lockstat_patch_point, (vm_offset_t *) lck_mtx_unlock_lockstat_patch_point, - (vm_offset_t *) lck_mtx_unlock2_lockstat_patch_point, - (vm_offset_t *) lck_rw_lock_shared_lockstat_patch_point, - (vm_offset_t *) lck_rw_done_lockstat_patch_point, (vm_offset_t *) lck_mtx_lock_ext_lockstat_patch_point, (vm_offset_t *) lck_mtx_ext_unlock_lockstat_patch_point, - (vm_offset_t *) mutex_lock_lockstat_patch_point, - (vm_offset_t *) mutex_try_spin_lockstat_patch_point, - (vm_offset_t *) mutex_try_lockstat_patch_point, - (vm_offset_t *) mutex_unlock_lockstat_patch_point, - (vm_offset_t *) mutex_unlock2_lockstat_patch_point, - (vm_offset_t *) mutex_lock_spin_lockstat_patch_point, - (vm_offset_t *) mutex_convert_spin_lockstat_patch_point, -#endif + (vm_offset_t *) lck_rw_lock_shared_lockstat_patch_point, + (vm_offset_t *) lck_rw_lock_exclusive_lockstat_patch_point, + (vm_offset_t *) lck_rw_lock_shared_to_exclusive_lockstat_patch_point, + (vm_offset_t *) lck_rw_try_lock_shared_lockstat_patch_point, + (vm_offset_t *) lck_rw_try_lock_exclusive_lockstat_patch_point, + (vm_offset_t *) lck_mtx_lock_spin_lockstat_patch_point, +#else (vm_offset_t *) lck_mtx_unlock_lockstat_patch_point, +#endif +#endif /* CONFIG_DTRACE */ NULL }; /* * Hot patch switches back and forth the probe points between NOP and RET. * The argument indicates whether the probe point is on or off. */ +#if defined(__APPLE__) +static +#endif /* __APPLE__ */ void lockstat_hot_patch(boolean_t active) { #pragma unused(active) @@ -188,7 +183,7 @@ void lockstat_hot_patch(boolean_t active) for (i = 0; assembly_probes[i]; i++) { -#ifdef __i386__ +#if defined(__i386__) || defined(__x86_64__) uint8_t instr; instr = (active ? NOP : RET ); (void) ml_nofault_copy( (vm_offset_t)&instr, *(assembly_probes[i]), @@ -207,17 +202,6 @@ void lockstat_hot_patch(boolean_t active) void (*lockstat_probe)(dtrace_id_t, uint64_t, uint64_t, uint64_t, uint64_t, uint64_t); -/* - * An initial value for lockstat_probe. See lockstat_attach(). Think safety. - */ -static void -lockstat_stub(dtrace_id_t id, uint64_t arg0, uint64_t arg1, - uint64_t arg2, uint64_t arg3, uint64_t arg4) -{ -#pragma unused(id,arg0,arg1,arg2,arg3,arg4) -} - - static dev_info_t *lockstat_devi; /* saved in xxattach() for xxinfo() */ static dtrace_provider_id_t lockstat_id; @@ -225,7 +209,8 @@ static dtrace_provider_id_t lockstat_id; static void lockstat_enable(void *arg, dtrace_id_t id, void *parg) { -#pragma unused(arg) +#pragma unused(arg) /* __APPLE__ */ + lockstat_probe_t *probe = parg; ASSERT(!lockstat_probemap[probe->lsp_probe]); @@ -233,9 +218,6 @@ lockstat_enable(void *arg, dtrace_id_t id, void *parg) lockstat_probemap[probe->lsp_probe] = id; membar_producer(); - lockstat_probe = dtrace_probe; - membar_producer(); - lockstat_hot_patch(TRUE); membar_producer(); @@ -245,7 +227,8 @@ lockstat_enable(void *arg, dtrace_id_t id, void *parg) static void lockstat_disable(void *arg, dtrace_id_t id, void *parg) { -#pragma unused(arg,id) +#pragma unused(arg, id) /* __APPLE__ */ + lockstat_probe_t *probe = parg; int i; @@ -275,7 +258,8 @@ lockstat_disable(void *arg, dtrace_id_t id, void *parg) static void lockstat_provide(void *arg, const dtrace_probedesc_t *desc) { -#pragma unused(arg,desc) +#pragma unused(arg, desc) /* __APPLE__ */ + int i = 0; for (i = 0; lockstat_probes[i].lsp_func != NULL; i++) { @@ -297,7 +281,8 @@ lockstat_provide(void *arg, const dtrace_probedesc_t *desc) static void lockstat_destroy(void *arg, dtrace_id_t id, void *parg) { -#pragma unused(arg,id) +#pragma unused(arg, id) /* __APPLE__ */ + lockstat_probe_t *probe = parg; ASSERT(!lockstat_probemap[probe->lsp_probe]); @@ -345,9 +330,11 @@ lockstat_attach(dev_info_t *devi, ddi_attach_cmd_t cmd) return (DDI_FAILURE); } + lockstat_probe = dtrace_probe; + membar_producer(); + ddi_report_dev(devi); lockstat_devi = devi; - lockstat_probe = lockstat_stub; return (DDI_SUCCESS); } @@ -399,7 +386,7 @@ void lockstat_init( void ) return; } - lockstat_attach( (dev_info_t *)majdevno, DDI_ATTACH ); + lockstat_attach( (dev_info_t *)(uintptr_t)majdevno, DDI_ATTACH ); gLockstatInited = 1; } else panic("lockstat_init: called twice!\n"); diff --git a/bsd/dev/dtrace/profile_prvd.c b/bsd/dev/dtrace/profile_prvd.c index cd561c2df..a74254c5c 100644 --- a/bsd/dev/dtrace/profile_prvd.c +++ b/bsd/dev/dtrace/profile_prvd.c @@ -19,11 +19,11 @@ * CDDL HEADER END */ /* - * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ -/* #pragma ident "@(#)profile.c 1.6 06/03/24 SMI" */ +/* #pragma ident "@(#)profile.c 1.7 07/01/10 SMI" */ #if !defined(__APPLE__) #include @@ -68,7 +68,7 @@ #if defined(__ppc__) || defined(__ppc64__) extern struct savearea *find_kern_regs(thread_t); #elif defined(__i386__) || defined(__x86_64__) -extern x86_saved_state32_t *find_kern_regs(thread_t); +extern x86_saved_state_t *find_kern_regs(thread_t); #else #error Unknown architecture #endif @@ -83,20 +83,18 @@ static dev_info_t *profile_devi; static dtrace_provider_id_t profile_id; /* - * Regardless of platform, there are five artificial frames in the case of the + * Regardless of platform, the stack frames look like this in the case of the * profile provider: * * profile_fire * cyclic_expire * cyclic_fire * [ cbe ] - * [ locore ] + * [ interrupt code ] * - * On amd64, there are two frames associated with locore: one in locore, and - * another in common interrupt dispatch code. (i386 has not been modified to - * use this common layer.) Further, on i386, the interrupted instruction - * appears as its own stack frame. All of this means that we need to add one - * frame for amd64, and then take one away for both amd64 and i386. + * On x86, there are five frames from the generic interrupt code; further, the + * interrupted instruction appears as its own stack frame, giving us a total of + * 10. * * On SPARC, the picture is further complicated because the compiler * optimizes away tail-calls -- so the following frames are optimized away: @@ -108,28 +106,24 @@ static dtrace_provider_id_t profile_id; * frame cannot be tail-call eliminated, yielding four frames in this case. * * All of the above constraints lead to the mess below. Yes, the profile - * provider should ideally figure this out on-the-fly by hiting one of its own + * provider should ideally figure this out on-the-fly by hitting one of its own * probes and then walking its own stack trace. This is complicated, however, * and the static definition doesn't seem to be overly brittle. Still, we * allow for a manual override in case we get it completely wrong. */ #if !defined(__APPLE__) -#ifdef __x86_64__ -#define PROF_ARTIFICIAL_FRAMES 7 -#else -#ifdef __i386__ -#define PROF_ARTIFICIAL_FRAMES 6 +#ifdef __x86 +#define PROF_ARTIFICIAL_FRAMES 10 #else #ifdef __sparc -#ifdef DEBUG +#if DEBUG #define PROF_ARTIFICIAL_FRAMES 4 #else #define PROF_ARTIFICIAL_FRAMES 3 #endif #endif #endif -#endif #else /* is Mac OS X */ @@ -191,6 +185,7 @@ static int profile_ticks[] = { static uint32_t profile_max; /* maximum number of profile probes */ static uint32_t profile_total; /* current number of profile probes */ + static void profile_fire(void *arg) { @@ -221,12 +216,17 @@ profile_fire(void *arg) } } #elif defined(__i386__) || defined(__x86_64__) - { - x86_saved_state32_t *kern_regs = find_kern_regs(current_thread()); + x86_saved_state_t *kern_regs = find_kern_regs(current_thread()); if (NULL != kern_regs) { /* Kernel was interrupted. */ - dtrace_probe(prof->prof_id, kern_regs->eip, 0x0, 0, 0, 0); +#if defined(__i386__) + dtrace_probe(prof->prof_id, saved_state32(kern_regs)->eip, 0x0, 0, 0, 0); +#elif defined(__x86_64__) + dtrace_probe(prof->prof_id, saved_state64(kern_regs)->isf.rip, 0x0, 0, 0, 0); +#else +#error Unknown arch +#endif } else { /* Possibly a user interrupt */ x86_saved_state_t *tagged_regs = (x86_saved_state_t *)find_user_regs(current_thread()); @@ -245,7 +245,6 @@ profile_fire(void *arg) dtrace_probe(prof->prof_id, 0x0, regs->eip, 0, 0, 0); } } - } #else #error Unknown architecture #endif @@ -277,12 +276,17 @@ profile_tick(void *arg) } } #elif defined(__i386__) || defined(__x86_64__) - { - x86_saved_state32_t *kern_regs = find_kern_regs(current_thread()); + x86_saved_state_t *kern_regs = find_kern_regs(current_thread()); if (NULL != kern_regs) { /* Kernel was interrupted. */ - dtrace_probe(prof->prof_id, kern_regs->eip, 0x0, 0, 0, 0); +#if defined(__i386__) + dtrace_probe(prof->prof_id, saved_state32(kern_regs)->eip, 0x0, 0, 0, 0); +#elif defined(__x86_64__) + dtrace_probe(prof->prof_id, saved_state64(kern_regs)->isf.rip, 0x0, 0, 0, 0); +#else +#error Unknown arch +#endif } else { /* Possibly a user interrupt */ x86_saved_state_t *tagged_regs = (x86_saved_state_t *)find_user_regs(current_thread()); @@ -301,7 +305,6 @@ profile_tick(void *arg) dtrace_probe(prof->prof_id, 0x0, regs->eip, 0, 0, 0); } } - } #else #error Unknown architecture #endif @@ -346,10 +349,12 @@ profile_create(hrtime_t interval, const char *name, int kind) static void profile_provide(void *arg, const dtrace_probedesc_t *desc) { +#pragma unused(arg) /* __APPLE__ */ int i, j, rate, kind; hrtime_t val = 0, mult = 1, len; const char *name, *suffix = NULL; +#if !defined(__APPLE__) const struct { char *prefix; int kind; @@ -378,20 +383,53 @@ profile_provide(void *arg, const dtrace_probedesc_t *desc) { "d", NANOSEC * (hrtime_t)(24 * 60 * 60) }, { "day", NANOSEC * (hrtime_t)(24 * 60 * 60) }, { "hz", 0 }, -#if !defined(__APPLE__) { NULL } + }; #else + const struct { + const char *prefix; + int kind; + } types[] = { + { PROF_PREFIX_PROFILE, PROF_PROFILE }, + { PROF_PREFIX_TICK, PROF_TICK }, { NULL, 0 } -#endif /* __APPLE__ */ }; + const struct { + const char *name; + hrtime_t mult; + } suffixes[] = { + { "ns", NANOSEC / NANOSEC }, + { "nsec", NANOSEC / NANOSEC }, + { "us", NANOSEC / MICROSEC }, + { "usec", NANOSEC / MICROSEC }, + { "ms", NANOSEC / MILLISEC }, + { "msec", NANOSEC / MILLISEC }, + { "s", NANOSEC / SEC }, + { "sec", NANOSEC / SEC }, + { "m", NANOSEC * (hrtime_t)60 }, + { "min", NANOSEC * (hrtime_t)60 }, + { "h", NANOSEC * (hrtime_t)(60 * 60) }, + { "hour", NANOSEC * (hrtime_t)(60 * 60) }, + { "d", NANOSEC * (hrtime_t)(24 * 60 * 60) }, + { "day", NANOSEC * (hrtime_t)(24 * 60 * 60) }, + { "hz", 0 }, + { NULL, 0 } + }; +#endif /* __APPLE__ */ + + if (desc == NULL) { char n[PROF_NAMELEN]; /* * If no description was provided, provide all of our probes. */ +#if !defined(__APPLE__) for (i = 0; i < sizeof (profile_rates) / sizeof (int); i++) { +#else + for (i = 0; i < (int)(sizeof (profile_rates) / sizeof (int)); i++) { +#endif /* __APPLE__ */ if ((rate = profile_rates[i]) == 0) continue; @@ -400,7 +438,11 @@ profile_provide(void *arg, const dtrace_probedesc_t *desc) profile_create(NANOSEC / rate, n, PROF_PROFILE); } +#if !defined(__APPLE__) for (i = 0; i < sizeof (profile_ticks) / sizeof (int); i++) { +#else + for (i = 0; i < (int)(sizeof (profile_ticks) / sizeof (int)); i++) { +#endif /* __APPLE__ */ if ((rate = profile_ticks[i]) == 0) continue; @@ -457,10 +499,17 @@ profile_provide(void *arg, const dtrace_probedesc_t *desc) * Look-up the suffix to determine the multiplier. */ for (i = 0, mult = 0; suffixes[i].name != NULL; i++) { +#if !defined(__APPLE__) if (strcasecmp(suffixes[i].name, suffix) == 0) { mult = suffixes[i].mult; break; } +#else + if (strncasecmp(suffixes[i].name, suffix, strlen(suffixes[i].name) + 1) == 0) { + mult = suffixes[i].mult; + break; + } +#endif /* __APPLE__ */ } if (suffixes[i].name == NULL && *suffix != '\0') @@ -482,6 +531,7 @@ profile_provide(void *arg, const dtrace_probedesc_t *desc) static void profile_destroy(void *arg, dtrace_id_t id, void *parg) { +#pragma unused(arg,id) /* __APPLE__ */ profile_probe_t *prof = parg; ASSERT(prof->prof_cyclic == CYCLIC_NONE); @@ -502,6 +552,7 @@ profile_destroy(void *arg, dtrace_id_t id, void *parg) static void profile_online(void *arg, cpu_t *cpu, cyc_handler_t *hdlr, cyc_time_t *when) { +#pragma unused(cpu) /* __APPLE__ */ profile_probe_t *prof = arg; profile_probe_percpu_t *pcpu; @@ -536,6 +587,8 @@ profile_offline(void *arg, cpu_t *cpu, void *oarg) ASSERT(pcpu->profc_probe == arg); #if !defined(__APPLE__) kmem_free(pcpu, sizeof (profile_probe_percpu_t)); +#else +#pragma unused(pcpu,arg,cpu) /* __APPLE__ */ #endif /* __APPLE__ */ } @@ -543,6 +596,7 @@ profile_offline(void *arg, cpu_t *cpu, void *oarg) static void profile_enable(void *arg, dtrace_id_t id, void *parg) { +#pragma unused(arg,id) /* __APPLE__ */ profile_probe_t *prof = parg; cyc_omni_handler_t omni; cyc_handler_t hdlr; @@ -596,6 +650,7 @@ profile_disable(void *arg, dtrace_id_t id, void *parg) #if !defined(__APPLE__) cyclic_remove(prof->prof_cyclic); #else +#pragma unused(arg,id) if (prof->prof_kind == PROF_TICK) { cyclic_timer_remove(prof->prof_cyclic); } else { @@ -654,6 +709,7 @@ profile_attach(dev_info_t *devi, ddi_attach_cmd_t cmd) return (DDI_FAILURE); } +#if !defined(__APPLE__) if (ddi_create_minor_node(devi, "profile", S_IFCHR, 0, DDI_PSEUDO, NULL) == DDI_FAILURE || dtrace_register("profile", &profile_attr, @@ -662,11 +718,19 @@ profile_attach(dev_info_t *devi, ddi_attach_cmd_t cmd) ddi_remove_minor_node(devi, NULL); return (DDI_FAILURE); } - -#if !defined(__APPLE__) + profile_max = ddi_getprop(DDI_DEV_T_ANY, devi, DDI_PROP_DONTPASS, "profile-max-probes", PROFILE_MAX_DEFAULT); #else + if (ddi_create_minor_node(devi, "profile", S_IFCHR, 0, + DDI_PSEUDO, 0) == DDI_FAILURE || + dtrace_register("profile", &profile_attr, + DTRACE_PRIV_KERNEL | DTRACE_PRIV_USER, NULL, + &profile_pops, NULL, &profile_id) != 0) { + ddi_remove_minor_node(devi, NULL); + return (DDI_FAILURE); + } + profile_max = PROFILE_MAX_DEFAULT; #endif /* __APPLE__ */ @@ -834,7 +898,7 @@ void profile_init( void ) return; } - profile_attach( (dev_info_t *)majdevno, DDI_ATTACH ); + profile_attach( (dev_info_t *)(uintptr_t)majdevno, DDI_ATTACH ); gProfileInited = 1; } else diff --git a/bsd/dev/dtrace/sdt.c b/bsd/dev/dtrace/sdt.c index 946c6a4c6..725ab5585 100644 --- a/bsd/dev/dtrace/sdt.c +++ b/bsd/dev/dtrace/sdt.c @@ -19,11 +19,11 @@ * CDDL HEADER END */ /* - * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Copyright 2008 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ -/* #pragma ident "@(#)sdt.c 1.6 06/03/24 SMI" */ +/* #pragma ident "@(#)sdt.c 1.9 08/07/01 SMI" */ #ifdef KERNEL #ifndef _KERNEL @@ -69,6 +69,12 @@ extern kern_return_t fbt_perfCallback(int, struct savearea_t *, int, int); #define SDT_PROBETAB_SIZE 0x1000 /* 4k entries -- 16K total */ +#if defined(__x86_64__) +#define DTRACE_PROBE_PREFIX "_dtrace_probeDOLLAR" +#else +#define DTRACE_PROBE_PREFIX "_dtrace_probe$" +#endif + static dev_info_t *sdt_devi; static int sdt_verbose = 0; sdt_probe_t **sdt_probetab; @@ -96,16 +102,17 @@ __sdt_provide_module(void *arg, struct modctl *ctl) return; } - if (mp->sdt_nprobes != 0 || (sdpd = mp->sdt_probes) == NULL) + if (!mp || mp->sdt_nprobes != 0 || (sdpd = mp->sdt_probes) == NULL) return; for (sdpd = mp->sdt_probes; sdpd != NULL; sdpd = sdpd->sdpd_next) { - char *name = sdpd->sdpd_name, *func, *nname; + const char *name = sdpd->sdpd_name, *func; + char *nname; int i, j; dtrace_id_t id; for (prov = sdt_providers; prov->sdtp_prefix != NULL; prov++) { - char *prefpart, *prefix = prov->sdtp_prefix; + const char *prefpart, *prefix = prov->sdtp_prefix; if ((prefpart = strstr(name, prefix))) { name = prefpart + strlen(prefix); @@ -275,10 +282,12 @@ sdt_enable(void *arg, dtrace_id_t id, void *parg) while (sdp != NULL) { (void)ml_nofault_copy( (vm_offset_t)&sdp->sdp_patchval, (vm_offset_t)sdp->sdp_patchpoint, - sizeof(sdp->sdp_patchval)); + (vm_size_t)sizeof(sdp->sdp_patchval)); sdp = sdp->sdp_next; } +#if !defined(__APPLE__) err: +#endif /* __APPLE__ */ ; } @@ -299,14 +308,23 @@ sdt_disable(void *arg, dtrace_id_t id, void *parg) while (sdp != NULL) { (void)ml_nofault_copy( (vm_offset_t)&sdp->sdp_savedval, (vm_offset_t)sdp->sdp_patchpoint, - sizeof(sdp->sdp_savedval)); + (vm_size_t)sizeof(sdp->sdp_savedval)); sdp = sdp->sdp_next; } +#if !defined(__APPLE__) err: +#endif /* __APPLE__ */ ; } +static uint64_t +sdt_getarg(void *arg, dtrace_id_t id, void *parg, int argno, int aframes) +{ +#pragma unused(arg,id,parg) /* __APPLE__ */ + return dtrace_getarg(argno, aframes); +} + static dtrace_pops_t sdt_pops = { NULL, sdt_provide_module, @@ -315,7 +333,7 @@ static dtrace_pops_t sdt_pops = { NULL, NULL, sdt_getargdesc, - NULL, + sdt_getarg, NULL, sdt_destroy }; @@ -519,10 +537,16 @@ static int gSDTInited = 0; static struct modctl g_sdt_kernctl; static struct module g_sdt_mach_module; -#include #include +#include -extern struct mach_header _mh_execute_header; /* the kernel's mach header */ +#if defined(__LP64__) +#define KERNEL_MAGIC MH_MAGIC_64 +typedef struct nlist_64 kernel_nlist_t; +#else +#define KERNEL_MAGIC MH_MAGIC +typedef struct nlist kernel_nlist_t; +#endif void sdt_init( void ) { @@ -536,17 +560,17 @@ void sdt_init( void ) return; } - if (MH_MAGIC != _mh_execute_header.magic) { + if (KERNEL_MAGIC != _mh_execute_header.magic) { g_sdt_kernctl.address = (vm_address_t)NULL; g_sdt_kernctl.size = 0; } else { - struct mach_header *mh; + kernel_mach_header_t *mh; struct load_command *cmd; - struct segment_command *orig_ts = NULL, *orig_le = NULL; + kernel_segment_command_t *orig_ts = NULL, *orig_le = NULL; struct symtab_command *orig_st = NULL; - struct nlist *sym = NULL; + kernel_nlist_t *sym = NULL; char *strings; - unsigned int i; + unsigned int i; g_sdt_mach_module.sdt_nprobes = 0; g_sdt_mach_module.sdt_probes = NULL; @@ -556,34 +580,34 @@ void sdt_init( void ) strncpy((char *)&(g_sdt_kernctl.mod_modname), "mach_kernel", KMOD_MAX_NAME); mh = &_mh_execute_header; - cmd = (struct load_command *) &mh[1]; + cmd = (struct load_command*) &mh[1]; for (i = 0; i < mh->ncmds; i++) { - if (cmd->cmd == LC_SEGMENT) { - struct segment_command *orig_sg = (struct segment_command *) cmd; + if (cmd->cmd == LC_SEGMENT_KERNEL) { + kernel_segment_command_t *orig_sg = (kernel_segment_command_t *) cmd; - if (strcmp(SEG_TEXT, orig_sg->segname) == 0) + if (LIT_STRNEQL(orig_sg->segname, SEG_TEXT)) orig_ts = orig_sg; - else if (strcmp(SEG_LINKEDIT, orig_sg->segname) == 0) + else if (LIT_STRNEQL(orig_sg->segname, SEG_LINKEDIT)) orig_le = orig_sg; - else if (strcmp("", orig_sg->segname) == 0) + else if (LIT_STRNEQL(orig_sg->segname, "")) orig_ts = orig_sg; /* kexts have a single unnamed segment */ } else if (cmd->cmd == LC_SYMTAB) orig_st = (struct symtab_command *) cmd; - cmd = (struct load_command *) ((caddr_t) cmd + cmd->cmdsize); + cmd = (struct load_command *) ((uintptr_t) cmd + cmd->cmdsize); } if ((orig_ts == NULL) || (orig_st == NULL) || (orig_le == NULL)) return; - sym = (struct nlist *)orig_le->vmaddr; - strings = ((char *)sym) + orig_st->nsyms * sizeof(struct nlist); + sym = (kernel_nlist_t *)(orig_le->vmaddr + orig_st->symoff - orig_le->fileoff); + strings = (char *)(orig_le->vmaddr + orig_st->stroff - orig_le->fileoff); for (i = 0; i < orig_st->nsyms; i++) { uint8_t n_type = sym[i].n_type & (N_TYPE | N_EXT); char *name = strings + sym[i].n_un.n_strx; - char *prev_name; + const char *prev_name; unsigned long best; unsigned int j; @@ -598,7 +622,7 @@ void sdt_init( void ) if (*name == '_') name += 1; - if (strstr(name, "_dtrace_probe$")) { + if (strstr(name, DTRACE_PROBE_PREFIX)) { sdt_probedesc_t *sdpd = kmem_alloc(sizeof(sdt_probedesc_t), KM_SLEEP); int len = strlen(name) + 1; @@ -607,19 +631,21 @@ void sdt_init( void ) prev_name = ""; best = 0; + + /* Avoid shadow build warnings */ for (j = 0; j < orig_st->nsyms; j++) { - uint8_t n_type = sym[j].n_type & (N_TYPE | N_EXT); - char *name = strings + sym[j].n_un.n_strx; + uint8_t jn_type = sym[j].n_type & (N_TYPE | N_EXT); + char *jname = strings + sym[j].n_un.n_strx; - if (((N_SECT | N_EXT) != n_type && (N_ABS | N_EXT) != n_type)) + if (((N_SECT | N_EXT) != jn_type && (N_ABS | N_EXT) != jn_type)) continue; if (0 == sym[j].n_un.n_strx) /* iff a null, "", name. */ continue; - if (*name == '_') - name += 1; - if (strstr(name, "_dtrace_probe$")) + if (*jname == '_') + jname += 1; + if (strstr(jname, DTRACE_PROBE_PREFIX)) continue; if (*(unsigned long *)sym[i].n_value <= (unsigned long)sym[j].n_value) @@ -627,7 +653,7 @@ void sdt_init( void ) if ((unsigned long)sym[j].n_value > best) { best = (unsigned long)sym[j].n_value; - prev_name = name; + prev_name = jname; } } @@ -644,12 +670,13 @@ void sdt_init( void ) } } - sdt_attach( (dev_info_t *)majdevno, DDI_ATTACH ); + sdt_attach( (dev_info_t *)(uintptr_t)majdevno, DDI_ATTACH ); gSDTInited = 1; } else panic("sdt_init: called twice!\n"); } + #undef SDT_MAJOR /*ARGSUSED*/ diff --git a/bsd/dev/dtrace/sdt_subr.c b/bsd/dev/dtrace/sdt_subr.c index f57c5d614..90ea1331a 100644 --- a/bsd/dev/dtrace/sdt_subr.c +++ b/bsd/dev/dtrace/sdt_subr.c @@ -19,11 +19,11 @@ * CDDL HEADER END */ /* - * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Copyright 2008 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ -/* #pragma ident "@(#)sdt_subr.c 1.7 06/04/03 SMI" */ +/* #pragma ident "@(#)sdt_subr.c 1.13 08/06/13 SMI" */ #include @@ -75,6 +75,14 @@ static dtrace_pattr_t sdt_attr = { { DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_ISA }, }; +static dtrace_pattr_t xpv_attr = { +{ DTRACE_STABILITY_EVOLVING, DTRACE_STABILITY_EVOLVING, DTRACE_CLASS_PLATFORM }, +{ DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_UNKNOWN }, +{ DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_UNKNOWN }, +{ DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_PLATFORM }, +{ DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_PLATFORM }, +}; + sdt_provider_t sdt_providers[] = { { "vtrace", "__vtrace____", &vtrace_attr, 0 }, { "sysinfo", "__cpu_sysinfo____", &info_attr, 0 }, @@ -83,17 +91,25 @@ sdt_provider_t sdt_providers[] = { { "sched", "__sched____", &stab_attr, 0 }, { "proc", "__proc____", &stab_attr, 0 }, { "io", "__io____", &stab_attr, 0 }, + { "ip", "__ip____", &stab_attr, 0 }, { "mib", "__mib____", &stab_attr, 0 }, { "fsinfo", "__fsinfo____", &fsinfo_attr, 0 }, + { "nfsv3", "__nfsv3____", &stab_attr, 0 }, + { "nfsv4", "__nfsv4____", &stab_attr, 0 }, + { "xpv", "__xpv____", &xpv_attr, 0 }, + { "sysevent", "__sysevent____", &stab_attr, 0 }, { "sdt", "__sdt____", &sdt_attr, 0 }, +#if !defined(__APPLE__) { NULL } +#else + { NULL, NULL, NULL, 0 } +#endif /* __APPLE__ */ }; -#warning !!! Need xnu cognate for disp_t. -#warning !!! Need translators for bufinfo_t, cpuinfo_t, devinfo_t, fileinfo_t. +/* Warning: Need xnu cognate for disp_t. */ sdt_argdesc_t sdt_args[] = { { "sched", "wakeup", 0, 0, "struct thread *", "lwpsinfo_t *" }, - { "sched", "wakeup", 1, 0, "struct proc *", "psinfo_t *" }, + { "sched", "wakeup", 1, 1, "struct proc *", "psinfo_t *" }, { "sched", "dequeue", 0, 0, "struct thread *", "lwpsinfo_t *" }, { "sched", "dequeue", 1, 0, "struct proc *", "psinfo_t *" }, { "sched", "dequeue", 2, 1, "disp_t *", "cpuinfo_t *" }, @@ -101,8 +117,10 @@ sdt_argdesc_t sdt_args[] = { { "sched", "enqueue", 1, 0, "struct proc *", "psinfo_t *" }, { "sched", "enqueue", 2, 1, "disp_t *", "cpuinfo_t *" }, { "sched", "enqueue", 3, 2, "int", NULL }, + /* sched:::sleep has no arguments */ + /* sched:::on-cpu has no arguments */ { "sched", "off-cpu", 0, 0, "struct thread *", "lwpsinfo_t *" }, - { "sched", "off-cpu", 1, 0, "struct proc *", "psinfo_t *" }, + { "sched", "off-cpu", 1, 1, "struct proc *", "psinfo_t *" }, { "sched", "tick", 0, 0, "struct thread *", "lwpsinfo_t *" }, { "sched", "tick", 1, 0, "struct proc *", "psinfo_t *" }, { "sched", "change-pri", 0, 0, "struct thread *", "lwpsinfo_t *" }, @@ -116,6 +134,11 @@ sdt_argdesc_t sdt_args[] = { { "sched", "schedctl-yield", 0, 0, "int", NULL }, { "sched", "surrender", 0, 0, "struct thread *", "lwpsinfo_t *" }, { "sched", "surrender", 1, 0, "struct proc *", "psinfo_t *" }, + { "sched", "surrender", 1, 0, "kthread_t *", "psinfo_t *" }, + { "sched", "cpucaps-sleep", 0, 0, "kthread_t *", "lwpsinfo_t *" }, + { "sched", "cpucaps-sleep", 1, 0, "kthread_t *", "psinfo_t *" }, + { "sched", "cpucaps-wakeup", 0, 0, "kthread_t *", "lwpsinfo_t *" }, + { "sched", "cpucaps-wakeup", 1, 0, "kthread_t *", "psinfo_t *" }, { "proc", "create", 0, 0, "struct proc *", "psinfo_t *" }, { "proc", "exec", 0, 0, "string", NULL }, @@ -125,7 +148,7 @@ sdt_argdesc_t sdt_args[] = { { "proc", "fault", 0, 0, "int", NULL }, { "proc", "fault", 1, 1, "siginfo_t *", NULL }, { "proc", "lwp-create", 0, 0, "struct thread *", "lwpsinfo_t *" }, - { "proc", "lwp-create", 1, 1, "struct proc *", "psinfo_t *" }, + { "proc", "lwp-create", 1, 0, "struct thread *", "psinfo_t *" }, /* proc:::lwp-start has no arguments */ /* proc:::lwp-exit has no arguments */ { "proc", "signal-clear", 0, 0, "int", NULL }, @@ -153,42 +176,718 @@ sdt_argdesc_t sdt_args[] = { { "io", "wait-done", 0, 0, "struct buf *", "bufinfo_t *" }, { "io", "wait-done", 1, 0, "struct buf *", "devinfo_t *" }, { "io", "wait-done", 2, 0, "struct buf *", "fileinfo_t *" }, - - { "vminfo", "anonfree", 0, 0, "int", NULL }, - { "vminfo", "anonpgin", 0, 0, "int", NULL }, - { "vminfo", "anonpgout", 0, 0, "int", NULL }, - { "vminfo", "as_fault", 0, 0, "int", NULL }, - { "vminfo", "cow_fault", 0, 0, "int", NULL }, - { "vminfo", "dfree", 0, 0, "int", NULL }, - { "vminfo", "execfree", 0, 0, "int", NULL }, - { "vminfo", "execpgin", 0, 0, "int", NULL }, - { "vminfo", "execpgout", 0, 0, "int", NULL }, - { "vminfo", "fsfree", 0, 0, "int", NULL }, - { "vminfo", "fspgin", 0, 0, "int", NULL }, - { "vminfo", "fspgout", 0, 0, "int", NULL }, - { "vminfo", "kerenl_asflt", 0, 0, "int", NULL }, - { "vminfo", "maj_fault", 0, 0, "int", NULL }, - { "vminfo", "pgfrec", 0, 0, "int", NULL }, - { "vminfo", "pgin", 0, 0, "int", NULL }, - { "vminfo", "pgout", 0, 0, "int", NULL }, - { "vminfo", "pgpgin", 0, 0, "int", NULL }, - { "vminfo", "pgpgout", 0, 0, "int", NULL }, - { "vminfo", "pgrec", 0, 0, "int", NULL }, - { "vminfo", "pgrrun", 0, 0, "int", NULL }, - { "vminfo", "pgswapin", 0, 0, "int", NULL }, - { "vminfo", "pgswapout", 0, 0, "int", NULL }, - { "vminfo", "prot_fault", 0, 0, "int", NULL }, - { "vminfo", "rev", 0, 0, "int", NULL }, - { "vminfo", "scan", 0, 0, "int", NULL }, - { "vminfo", "softlock", 0, 0, "int", NULL }, - { "vminfo", "swapin", 0, 0, "int", NULL }, - { "vminfo", "swapout", 0, 0, "int", NULL }, - { "vminfo", "zfod", 0, 0, "int", NULL }, +#if defined(__APPLE__) + { "io", "journal-start", 0, 0, "struct buf *", "bufinfo_t *" }, + { "io", "journal-start", 1, 0, "struct buf *", "devinfo_t *" }, + { "io", "journal-start", 2, 0, "struct buf *", "fileinfo_t *" }, + { "io", "journal-done", 0, 0, "struct buf *", "bufinfo_t *" }, + { "io", "journal-done", 1, 0, "struct buf *", "devinfo_t *" }, + { "io", "journal-done", 2, 0, "struct buf *", "fileinfo_t *" }, +#endif /* __APPLE__ */ { "mib", NULL, 0, 0, "int", NULL }, { "fsinfo", NULL, 0, 0, "struct vnode *", "fileinfo_t *" }, { "fsinfo", NULL, 1, 1, "int", "int" }, + + { "nfsv3", "op-getattr-start", 0, 0, "struct svc_req *", + "conninfo_t *" }, + { "nfsv3", "op-getattr-start", 1, 1, "nfsv3oparg_t *", + "nfsv3opinfo_t *" }, + { "nfsv3", "op-getattr-start", 2, 3, "GETATTR3args *", NULL }, + { "nfsv3", "op-getattr-done", 0, 0, "struct svc_req *", + "conninfo_t *" }, + { "nfsv3", "op-getattr-done", 1, 1, "nfsv3oparg_t *", + "nfsv3opinfo_t *" }, + { "nfsv3", "op-getattr-done", 2, 3, "GETATTR3res *", NULL }, + { "nfsv3", "op-setattr-start", 0, 0, "struct svc_req *", + "conninfo_t *" }, + { "nfsv3", "op-setattr-start", 1, 1, "nfsv3oparg_t *", + "nfsv3opinfo_t *" }, + { "nfsv3", "op-setattr-start", 2, 3, "SETATTR3args *", NULL }, + { "nfsv3", "op-setattr-done", 0, 0, "struct svc_req *", + "conninfo_t *" }, + { "nfsv3", "op-setattr-done", 1, 1, "nfsv3oparg_t *", + "nfsv3opinfo_t *" }, + { "nfsv3", "op-setattr-done", 2, 3, "SETATTR3res *", NULL }, + { "nfsv3", "op-lookup-start", 0, 0, "struct svc_req *", + "conninfo_t *" }, + { "nfsv3", "op-lookup-start", 1, 1, "nfsv3oparg_t *", + "nfsv3opinfo_t *" }, + { "nfsv3", "op-lookup-start", 2, 3, "LOOKUP3args *", NULL }, + { "nfsv3", "op-lookup-done", 0, 0, "struct svc_req *", + "conninfo_t *" }, + { "nfsv3", "op-lookup-done", 1, 1, "nfsv3oparg_t *", + "nfsv3opinfo_t *" }, + { "nfsv3", "op-lookup-done", 2, 3, "LOOKUP3res *", NULL }, + { "nfsv3", "op-access-start", 0, 0, "struct svc_req *", + "conninfo_t *" }, + { "nfsv3", "op-access-start", 1, 1, "nfsv3oparg_t *", + "nfsv3opinfo_t *" }, + { "nfsv3", "op-access-start", 2, 3, "ACCESS3args *", NULL }, + { "nfsv3", "op-access-done", 0, 0, "struct svc_req *", + "conninfo_t *" }, + { "nfsv3", "op-access-done", 1, 1, "nfsv3oparg_t *", + "nfsv3opinfo_t *" }, + { "nfsv3", "op-access-done", 2, 3, "ACCESS3res *", NULL }, + { "nfsv3", "op-commit-start", 0, 0, "struct svc_req *", + "conninfo_t *" }, + { "nfsv3", "op-commit-start", 1, 1, "nfsv3oparg_t *", + "nfsv3opinfo_t *" }, + { "nfsv3", "op-commit-start", 2, 3, "COMMIT3args *", NULL }, + { "nfsv3", "op-commit-done", 0, 0, "struct svc_req *", + "conninfo_t *" }, + { "nfsv3", "op-commit-done", 1, 1, "nfsv3oparg_t *", + "nfsv3opinfo_t *" }, + { "nfsv3", "op-commit-done", 2, 3, "COMMIT3res *", NULL }, + { "nfsv3", "op-create-start", 0, 0, "struct svc_req *", + "conninfo_t *" }, + { "nfsv3", "op-create-start", 1, 1, "nfsv3oparg_t *", + "nfsv3opinfo_t *" }, + { "nfsv3", "op-create-start", 2, 3, "CREATE3args *", NULL }, + { "nfsv3", "op-create-done", 0, 0, "struct svc_req *", + "conninfo_t *" }, + { "nfsv3", "op-create-done", 1, 1, "nfsv3oparg_t *", + "nfsv3opinfo_t *" }, + { "nfsv3", "op-create-done", 2, 3, "CREATE3res *", NULL }, + { "nfsv3", "op-fsinfo-start", 0, 0, "struct svc_req *", + "conninfo_t *" }, + { "nfsv3", "op-fsinfo-start", 1, 1, "nfsv3oparg_t *", + "nfsv3opinfo_t *" }, + { "nfsv3", "op-fsinfo-start", 2, 3, "FSINFO3args *", NULL }, + { "nfsv3", "op-fsinfo-done", 0, 0, "struct svc_req *", + "conninfo_t *" }, + { "nfsv3", "op-fsinfo-done", 1, 1, "nfsv3oparg_t *", + "nfsv3opinfo_t *" }, + { "nfsv3", "op-fsinfo-done", 2, 3, "FSINFO3res *", NULL }, + { "nfsv3", "op-fsstat-start", 0, 0, "struct svc_req *", + "conninfo_t *" }, + { "nfsv3", "op-fsstat-start", 1, 1, "nfsv3oparg_t *", + "nfsv3opinfo_t *" }, + { "nfsv3", "op-fsstat-start", 2, 3, "FSSTAT3args *", NULL }, + { "nfsv3", "op-fsstat-done", 0, 0, "struct svc_req *", + "conninfo_t *" }, + { "nfsv3", "op-fsstat-done", 1, 1, "nfsv3oparg_t *", + "nfsv3opinfo_t *" }, + { "nfsv3", "op-fsstat-done", 2, 3, "FSSTAT3res *", NULL }, + { "nfsv3", "op-link-start", 0, 0, "struct svc_req *", + "conninfo_t *" }, + { "nfsv3", "op-link-start", 1, 1, "nfsv3oparg_t *", + "nfsv3opinfo_t *" }, + { "nfsv3", "op-link-start", 2, 3, "LINK3args *", NULL }, + { "nfsv3", "op-link-done", 0, 0, "struct svc_req *", + "conninfo_t *" }, + { "nfsv3", "op-link-done", 1, 1, "nfsv3oparg_t *", + "nfsv3opinfo_t *" }, + { "nfsv3", "op-link-done", 2, 3, "LINK3res *", NULL }, + { "nfsv3", "op-mkdir-start", 0, 0, "struct svc_req *", + "conninfo_t *" }, + { "nfsv3", "op-mkdir-start", 1, 1, "nfsv3oparg_t *", + "nfsv3opinfo_t *" }, + { "nfsv3", "op-mkdir-start", 2, 3, "MKDIR3args *", NULL }, + { "nfsv3", "op-mkdir-done", 0, 0, "struct svc_req *", + "conninfo_t *" }, + { "nfsv3", "op-mkdir-done", 1, 1, "nfsv3oparg_t *", + "nfsv3opinfo_t *" }, + { "nfsv3", "op-mkdir-done", 2, 3, "MKDIR3res *", NULL }, + { "nfsv3", "op-mknod-start", 0, 0, "struct svc_req *", + "conninfo_t *" }, + { "nfsv3", "op-mknod-start", 1, 1, "nfsv3oparg_t *", + "nfsv3opinfo_t *" }, + { "nfsv3", "op-mknod-start", 2, 3, "MKNOD3args *", NULL }, + { "nfsv3", "op-mknod-done", 0, 0, "struct svc_req *", + "conninfo_t *" }, + { "nfsv3", "op-mknod-done", 1, 1, "nfsv3oparg_t *", + "nfsv3opinfo_t *" }, + { "nfsv3", "op-mknod-done", 2, 3, "MKNOD3res *", NULL }, + { "nfsv3", "op-null-start", 0, 0, "struct svc_req *", + "conninfo_t *" }, + { "nfsv3", "op-null-start", 1, 1, "nfsv3oparg_t *", + "nfsv3opinfo_t *" }, + { "nfsv3", "op-null-done", 0, 0, "struct svc_req *", + "conninfo_t *" }, + { "nfsv3", "op-null-done", 1, 1, "nfsv3oparg_t *", + "nfsv3opinfo_t *" }, + { "nfsv3", "op-pathconf-start", 0, 0, "struct svc_req *", + "conninfo_t *" }, + { "nfsv3", "op-pathconf-start", 1, 1, "nfsv3oparg_t *", + "nfsv3opinfo_t *" }, + { "nfsv3", "op-pathconf-start", 2, 3, "PATHCONF3args *", NULL }, + { "nfsv3", "op-pathconf-done", 0, 0, "struct svc_req *", + "conninfo_t *" }, + { "nfsv3", "op-pathconf-done", 1, 1, "nfsv3oparg_t *", + "nfsv3opinfo_t *" }, + { "nfsv3", "op-pathconf-done", 2, 3, "PATHCONF3res *", NULL }, + { "nfsv3", "op-read-start", 0, 0, "struct svc_req *", + "conninfo_t *" }, + { "nfsv3", "op-read-start", 1, 1, "nfsv3oparg_t *", + "nfsv3opinfo_t *" }, + { "nfsv3", "op-read-start", 2, 3, "READ3args *", NULL }, + { "nfsv3", "op-read-done", 0, 0, "struct svc_req *", + "conninfo_t *" }, + { "nfsv3", "op-read-done", 1, 1, "nfsv3oparg_t *", + "nfsv3opinfo_t *" }, + { "nfsv3", "op-read-done", 2, 3, "READ3res *", NULL }, + { "nfsv3", "op-readdir-start", 0, 0, "struct svc_req *", + "conninfo_t *" }, + { "nfsv3", "op-readdir-start", 1, 1, "nfsv3oparg_t *", + "nfsv3opinfo_t *" }, + { "nfsv3", "op-readdir-start", 2, 3, "READDIR3args *", NULL }, + { "nfsv3", "op-readdir-done", 0, 0, "struct svc_req *", + "conninfo_t *" }, + { "nfsv3", "op-readdir-done", 1, 1, "nfsv3oparg_t *", + "nfsv3opinfo_t *" }, + { "nfsv3", "op-readdir-done", 2, 3, "READDIR3res *", NULL }, + { "nfsv3", "op-readdirplus-start", 0, 0, "struct svc_req *", + "conninfo_t *" }, + { "nfsv3", "op-readdirplus-start", 1, 1, "nfsv3oparg_t *", + "nfsv3opinfo_t *" }, + { "nfsv3", "op-readdirplus-start", 2, 3, "READDIRPLUS3args *", NULL }, + { "nfsv3", "op-readdirplus-done", 0, 0, "struct svc_req *", + "conninfo_t *" }, + { "nfsv3", "op-readdirplus-done", 1, 1, "nfsv3oparg_t *", + "nfsv3opinfo_t *" }, + { "nfsv3", "op-readdirplus-done", 2, 3, "READDIRPLUS3res *", NULL }, + { "nfsv3", "op-readlink-start", 0, 0, "struct svc_req *", + "conninfo_t *" }, + { "nfsv3", "op-readlink-start", 1, 1, "nfsv3oparg_t *", + "nfsv3opinfo_t *" }, + { "nfsv3", "op-readlink-start", 2, 3, "READLINK3args *", NULL }, + { "nfsv3", "op-readlink-done", 0, 0, "struct svc_req *", + "conninfo_t *" }, + { "nfsv3", "op-readlink-done", 1, 1, "nfsv3oparg_t *", + "nfsv3opinfo_t *" }, + { "nfsv3", "op-readlink-done", 2, 3, "READLINK3res *", NULL }, + { "nfsv3", "op-remove-start", 0, 0, "struct svc_req *", + "conninfo_t *" }, + { "nfsv3", "op-remove-start", 1, 1, "nfsv3oparg_t *", + "nfsv3opinfo_t *" }, + { "nfsv3", "op-remove-start", 2, 3, "REMOVE3args *", NULL }, + { "nfsv3", "op-remove-done", 0, 0, "struct svc_req *", + "conninfo_t *" }, + { "nfsv3", "op-remove-done", 1, 1, "nfsv3oparg_t *", + "nfsv3opinfo_t *" }, + { "nfsv3", "op-remove-done", 2, 3, "REMOVE3res *", NULL }, + { "nfsv3", "op-rename-start", 0, 0, "struct svc_req *", + "conninfo_t *" }, + { "nfsv3", "op-rename-start", 1, 1, "nfsv3oparg_t *", + "nfsv3opinfo_t *" }, + { "nfsv3", "op-rename-start", 2, 3, "RENAME3args *", NULL }, + { "nfsv3", "op-rename-done", 0, 0, "struct svc_req *", + "conninfo_t *" }, + { "nfsv3", "op-rename-done", 1, 1, "nfsv3oparg_t *", + "nfsv3opinfo_t *" }, + { "nfsv3", "op-rename-done", 2, 3, "RENAME3res *", NULL }, + { "nfsv3", "op-rmdir-start", 0, 0, "struct svc_req *", + "conninfo_t *" }, + { "nfsv3", "op-rmdir-start", 1, 1, "nfsv3oparg_t *", + "nfsv3opinfo_t *" }, + { "nfsv3", "op-rmdir-start", 2, 3, "RMDIR3args *", NULL }, + { "nfsv3", "op-rmdir-done", 0, 0, "struct svc_req *", + "conninfo_t *" }, + { "nfsv3", "op-rmdir-done", 1, 1, "nfsv3oparg_t *", + "nfsv3opinfo_t *" }, + { "nfsv3", "op-rmdir-done", 2, 3, "RMDIR3res *", NULL }, + { "nfsv3", "op-setattr-start", 0, 0, "struct svc_req *", + "conninfo_t *" }, + { "nfsv3", "op-setattr-start", 1, 1, "nfsv3oparg_t *", + "nfsv3opinfo_t *" }, + { "nfsv3", "op-setattr-start", 2, 3, "SETATTR3args *", NULL }, + { "nfsv3", "op-setattr-done", 0, 0, "struct svc_req *", + "conninfo_t *" }, + { "nfsv3", "op-setattr-done", 1, 1, "nfsv3oparg_t *", + "nfsv3opinfo_t *" }, + { "nfsv3", "op-setattr-done", 2, 3, "SETATTR3res *", NULL }, + { "nfsv3", "op-symlink-start", 0, 0, "struct svc_req *", + "conninfo_t *" }, + { "nfsv3", "op-symlink-start", 1, 1, "nfsv3oparg_t *", + "nfsv3opinfo_t *" }, + { "nfsv3", "op-symlink-start", 2, 3, "SYMLINK3args *", NULL }, + { "nfsv3", "op-symlink-done", 0, 0, "struct svc_req *", + "conninfo_t *" }, + { "nfsv3", "op-symlink-done", 1, 1, "nfsv3oparg_t *", + "nfsv3opinfo_t *" }, + { "nfsv3", "op-symlink-done", 2, 3, "SYMLINK3res *", NULL }, + { "nfsv3", "op-write-start", 0, 0, "struct svc_req *", + "conninfo_t *" }, + { "nfsv3", "op-write-start", 1, 1, "nfsv3oparg_t *", + "nfsv3opinfo_t *" }, + { "nfsv3", "op-write-start", 2, 3, "WRITE3args *", NULL }, + { "nfsv3", "op-write-done", 0, 0, "struct svc_req *", + "conninfo_t *" }, + { "nfsv3", "op-write-done", 1, 1, "nfsv3oparg_t *", + "nfsv3opinfo_t *" }, + { "nfsv3", "op-write-done", 2, 3, "WRITE3res *", NULL }, + + { "nfsv4", "null-start", 0, 0, "struct svc_req *", "conninfo_t *" }, + { "nfsv4", "null-done", 0, 0, "struct svc_req *", "conninfo_t *" }, + { "nfsv4", "compound-start", 0, 0, "struct compound_state *", + "conninfo_t *" }, + { "nfsv4", "compound-start", 1, 0, "struct compound_state *", + "nfsv4opinfo_t *" }, + { "nfsv4", "compound-start", 2, 1, "COMPOUND4args *", NULL }, + { "nfsv4", "compound-done", 0, 0, "struct compound_state *", + "conninfo_t *" }, + { "nfsv4", "compound-done", 1, 0, "struct compound_state *", + "nfsv4opinfo_t *" }, + { "nfsv4", "compound-done", 2, 1, "COMPOUND4res *", NULL }, + { "nfsv4", "op-access-start", 0, 0, "struct compound_state *", + "conninfo_t *"}, + { "nfsv4", "op-access-start", 1, 0, "struct compound_state *", + "nfsv4opinfo_t *" }, + { "nfsv4", "op-access-start", 2, 1, "ACCESS4args *", NULL }, + { "nfsv4", "op-access-done", 0, 0, "struct compound_state *", + "conninfo_t *" }, + { "nfsv4", "op-access-done", 1, 0, "struct compound_state *", + "nfsv4opinfo_t *" }, + { "nfsv4", "op-access-done", 2, 1, "ACCESS4res *", NULL }, + { "nfsv4", "op-close-start", 0, 0, "struct compound_state *", + "conninfo_t *" }, + { "nfsv4", "op-close-start", 1, 0, "struct compound_state *", + "nfsv4opinfo_t *" }, + { "nfsv4", "op-close-start", 2, 1, "CLOSE4args *", NULL }, + { "nfsv4", "op-close-done", 0, 0, "struct compound_state *", + "conninfo_t *" }, + { "nfsv4", "op-close-done", 1, 0, "struct compound_state *", + "nfsv4opinfo_t *" }, + { "nfsv4", "op-close-done", 2, 1, "CLOSE4res *", NULL }, + { "nfsv4", "op-commit-start", 0, 0, "struct compound_state *", + "conninfo_t *" }, + { "nfsv4", "op-commit-start", 1, 0, "struct compound_state *", + "nfsv4opinfo_t *" }, + { "nfsv4", "op-commit-start", 2, 1, "COMMIT4args *", NULL }, + { "nfsv4", "op-commit-done", 0, 0, "struct compound_state *", + "conninfo_t *" }, + { "nfsv4", "op-commit-done", 1, 0, "struct compound_state *", + "nfsv4opinfo_t *" }, + { "nfsv4", "op-commit-done", 2, 1, "COMMIT4res *", NULL }, + { "nfsv4", "op-create-start", 0, 0, "struct compound_state *", + "conninfo_t *" }, + { "nfsv4", "op-create-start", 1, 0, "struct compound_state *", + "nfsv4opinfo_t *" }, + { "nfsv4", "op-create-start", 2, 1, "CREATE4args *", NULL }, + { "nfsv4", "op-create-done", 0, 0, "struct compound_state *", + "conninfo_t *" }, + { "nfsv4", "op-create-done", 1, 0, "struct compound_state *", + "nfsv4opinfo_t *" }, + { "nfsv4", "op-create-done", 2, 1, "CREATE4res *", NULL }, + { "nfsv4", "op-delegpurge-start", 0, 0, "struct compound_state *", + "conninfo_t *" }, + { "nfsv4", "op-delegpurge-start", 1, 0, "struct compound_state *", + "nfsv4opinfo_t *" }, + { "nfsv4", "op-delegpurge-start", 2, 1, "DELEGPURGE4args *", NULL }, + { "nfsv4", "op-delegpurge-done", 0, 0, "struct compound_state *", + "conninfo_t *" }, + { "nfsv4", "op-delegpurge-done", 1, 0, "struct compound_state *", + "nfsv4opinfo_t *" }, + { "nfsv4", "op-delegpurge-done", 2, 1, "DELEGPURGE4res *", NULL }, + { "nfsv4", "op-delegreturn-start", 0, 0, "struct compound_state *", + "conninfo_t *" }, + { "nfsv4", "op-delegreturn-start", 1, 0, "struct compound_state *", + "nfsv4opinfo_t *" }, + { "nfsv4", "op-delegreturn-start", 2, 1, "DELEGRETURN4args *", NULL }, + { "nfsv4", "op-delegreturn-done", 0, 0, "struct compound_state *", + "conninfo_t *" }, + { "nfsv4", "op-delegreturn-done", 1, 0, "struct compound_state *", + "nfsv4opinfo_t *" }, + { "nfsv4", "op-delegreturn-done", 2, 1, "DELEGRETURN4res *", NULL }, + { "nfsv4", "op-getattr-start", 0, 0, "struct compound_state *", + "conninfo_t *" }, + { "nfsv4", "op-getattr-start", 1, 0, "struct compound_state *", + "nfsv4opinfo_t *" }, + { "nfsv4", "op-getattr-start", 2, 1, "GETATTR4args *", NULL }, + { "nfsv4", "op-getattr-done", 0, 0, "struct compound_state *", + "conninfo_t *" }, + { "nfsv4", "op-getattr-done", 1, 0, "struct compound_state *", + "nfsv4opinfo_t *" }, + { "nfsv4", "op-getattr-done", 2, 1, "GETATTR4res *", NULL }, + { "nfsv4", "op-getfh-start", 0, 0, "struct compound_state *", + "conninfo_t *" }, + { "nfsv4", "op-getfh-start", 1, 0, "struct compound_state *", + "nfsv4opinfo_t *" }, + { "nfsv4", "op-getfh-done", 0, 0, "struct compound_state *", + "conninfo_t *" }, + { "nfsv4", "op-getfh-done", 1, 0, "struct compound_state *", + "nfsv4opinfo_t *" }, + { "nfsv4", "op-getfh-done", 2, 1, "GETFH4res *", NULL }, + { "nfsv4", "op-link-start", 0, 0, "struct compound_state *", + "conninfo_t *" }, + { "nfsv4", "op-link-start", 1, 0, "struct compound_state *", + "nfsv4opinfo_t *" }, + { "nfsv4", "op-link-start", 2, 1, "LINK4args *", NULL }, + { "nfsv4", "op-link-done", 0, 0, "struct compound_state *", + "conninfo_t *" }, + { "nfsv4", "op-link-done", 1, 0, "struct compound_state *", + "nfsv4opinfo_t *" }, + { "nfsv4", "op-link-done", 2, 1, "LINK4res *", NULL }, + { "nfsv4", "op-lock-start", 0, 0, "struct compound_state *", + "conninfo_t *" }, + { "nfsv4", "op-lock-start", 1, 0, "struct compound_state *", + "nfsv4opinfo_t *" }, + { "nfsv4", "op-lock-start", 2, 1, "LOCK4args *", NULL }, + { "nfsv4", "op-lock-done", 0, 0, "struct compound_state *", + "conninfo_t *" }, + { "nfsv4", "op-lock-done", 1, 0, "struct compound_state *", + "nfsv4opinfo_t *" }, + { "nfsv4", "op-lock-done", 2, 1, "LOCK4res *", NULL }, + { "nfsv4", "op-lockt-start", 0, 0, "struct compound_state *", + "conninfo_t *" }, + { "nfsv4", "op-lockt-start", 1, 0, "struct compound_state *", + "nfsv4opinfo_t *" }, + { "nfsv4", "op-lockt-start", 2, 1, "LOCKT4args *", NULL }, + { "nfsv4", "op-lockt-done", 0, 0, "struct compound_state *", + "conninfo_t *" }, + { "nfsv4", "op-lockt-done", 1, 0, "struct compound_state *", + "nfsv4opinfo_t *" }, + { "nfsv4", "op-lockt-done", 2, 1, "LOCKT4res *", NULL }, + { "nfsv4", "op-locku-start", 0, 0, "struct compound_state *", + "conninfo_t *" }, + { "nfsv4", "op-locku-start", 1, 0, "struct compound_state *", + "nfsv4opinfo_t *" }, + { "nfsv4", "op-locku-start", 2, 1, "LOCKU4args *", NULL }, + { "nfsv4", "op-locku-done", 0, 0, "struct compound_state *", + "conninfo_t *" }, + { "nfsv4", "op-locku-done", 1, 0, "struct compound_state *", + "nfsv4opinfo_t *" }, + { "nfsv4", "op-locku-done", 2, 1, "LOCKU4res *", NULL }, + { "nfsv4", "op-lookup-start", 0, 0, "struct compound_state *", + "conninfo_t *" }, + { "nfsv4", "op-lookup-start", 1, 0, "struct compound_state *", + "nfsv4opinfo_t *" }, + { "nfsv4", "op-lookup-start", 2, 1, "LOOKUP4args *", NULL }, + { "nfsv4", "op-lookup-done", 0, 0, "struct compound_state *", + "conninfo_t *" }, + { "nfsv4", "op-lookup-done", 1, 0, "struct compound_state *", + "nfsv4opinfo_t *" }, + { "nfsv4", "op-lookup-done", 2, 1, "LOOKUP4res *", NULL }, + { "nfsv4", "op-lookupp-start", 0, 0, "struct compound_state *", + "conninfo_t *" }, + { "nfsv4", "op-lookupp-start", 1, 0, "struct compound_state *", + "nfsv4opinfo_t *" }, + { "nfsv4", "op-lookupp-done", 0, 0, "struct compound_state *", + "conninfo_t *" }, + { "nfsv4", "op-lookupp-done", 1, 0, "struct compound_state *", + "nfsv4opinfo_t *" }, + { "nfsv4", "op-lookupp-done", 2, 1, "LOOKUPP4res *", NULL }, + { "nfsv4", "op-nverify-start", 0, 0, "struct compound_state *", + "conninfo_t *" }, + { "nfsv4", "op-nverify-start", 1, 0, "struct compound_state *", + "nfsv4opinfo_t *" }, + { "nfsv4", "op-nverify-start", 2, 1, "NVERIFY4args *", NULL }, + { "nfsv4", "op-nverify-done", 0, 0, "struct compound_state *", + "conninfo_t *" }, + { "nfsv4", "op-nverify-done", 1, 0, "struct compound_state *", + "nfsv4opinfo_t *" }, + { "nfsv4", "op-nverify-done", 2, 1, "NVERIFY4res *", NULL }, + { "nfsv4", "op-open-start", 0, 0, "struct compound_state *", + "conninfo_t *" }, + { "nfsv4", "op-open-start", 1, 0, "struct compound_state *", + "nfsv4opinfo_t *" }, + { "nfsv4", "op-open-start", 2, 1, "OPEN4args *", NULL }, + { "nfsv4", "op-open-done", 0, 0, "struct compound_state *", + "conninfo_t *" }, + { "nfsv4", "op-open-done", 1, 0, "struct compound_state *", + "nfsv4opinfo_t *" }, + { "nfsv4", "op-open-done", 2, 1, "OPEN4res *", NULL }, + { "nfsv4", "op-open-confirm-start", 0, 0, "struct compound_state *", + "conninfo_t *" }, + { "nfsv4", "op-open-confirm-start", 1, 0, "struct compound_state *", + "nfsv4opinfo_t *" }, + { "nfsv4", "op-open-confirm-start", 2, 1, "OPEN_CONFIRM4args *", NULL }, + { "nfsv4", "op-open-confirm-done", 0, 0, "struct compound_state *", + "conninfo_t *" }, + { "nfsv4", "op-open-confirm-done", 1, 0, "struct compound_state *", + "nfsv4opinfo_t *" }, + { "nfsv4", "op-open-confirm-done", 2, 1, "OPEN_CONFIRM4res *", NULL }, + { "nfsv4", "op-open-downgrade-start", 0, 0, "struct compound_state *", + "conninfo_t *" }, + { "nfsv4", "op-open-downgrade-start", 1, 0, "struct compound_state *", + "nfsv4opinfo_t *" }, + { "nfsv4", "op-open-downgrade-start", 2, 1, "OPEN_DOWNGRADE4args *", NULL }, + { "nfsv4", "op-open-downgrade-done", 0, 0, "struct compound_state *", + "conninfo_t *" }, + { "nfsv4", "op-open-downgrade-done", 1, 0, "struct compound_state *", + "nfsv4opinfo_t *" }, + { "nfsv4", "op-open-downgrade-done", 2, 1, "OPEN_DOWNGRADE4res *", NULL }, + { "nfsv4", "op-openattr-start", 0, 0, "struct compound_state *", + "conninfo_t *" }, + { "nfsv4", "op-openattr-start", 1, 0, "struct compound_state *", + "nfsv4opinfo_t *" }, + { "nfsv4", "op-openattr-start", 2, 1, "OPENATTR4args *", NULL }, + { "nfsv4", "op-openattr-done", 0, 0, "struct compound_state *", + "conninfo_t *" }, + { "nfsv4", "op-openattr-done", 1, 0, "struct compound_state *", + "nfsv4opinfo_t *" }, + { "nfsv4", "op-openattr-done", 2, 1, "OPENATTR4res *", NULL }, + { "nfsv4", "op-putfh-start", 0, 0, "struct compound_state *", + "conninfo_t *" }, + { "nfsv4", "op-putfh-start", 1, 0, "struct compound_state *", + "nfsv4opinfo_t *" }, + { "nfsv4", "op-putfh-start", 2, 1, "PUTFH4args *", NULL }, + { "nfsv4", "op-putfh-done", 0, 0, "struct compound_state *", + "conninfo_t *" }, + { "nfsv4", "op-putfh-done", 1, 0, "struct compound_state *", + "nfsv4opinfo_t *" }, + { "nfsv4", "op-putfh-done", 2, 1, "PUTFH4res *", NULL }, + { "nfsv4", "op-putpubfh-start", 0, 0, "struct compound_state *", + "conninfo_t *" }, + { "nfsv4", "op-putpubfh-start", 1, 0, "struct compound_state *", + "nfsv4opinfo_t *" }, + { "nfsv4", "op-putpubfh-done", 0, 0, "struct compound_state *", + "conninfo_t *" }, + { "nfsv4", "op-putpubfh-done", 1, 0, "struct compound_state *", + "nfsv4opinfo_t *" }, + { "nfsv4", "op-putpubfh-done", 2, 1, "PUTPUBFH4res *", NULL }, + { "nfsv4", "op-putrootfh-start", 0, 0, "struct compound_state *", + "conninfo_t *" }, + { "nfsv4", "op-putrootfh-start", 1, 0, "struct compound_state *", + "nfsv4opinfo_t *" }, + { "nfsv4", "op-putrootfh-done", 0, 0, "struct compound_state *", + "conninfo_t *" }, + { "nfsv4", "op-putrootfh-done", 1, 0, "struct compound_state *", + "nfsv4opinfo_t *" }, + { "nfsv4", "op-putrootfh-done", 2, 1, "PUTROOTFH4res *", NULL }, + { "nfsv4", "op-read-start", 0, 0, "struct compound_state *", + "conninfo_t *" }, + { "nfsv4", "op-read-start", 1, 0, "struct compound_state *", + "nfsv4opinfo_t *" }, + { "nfsv4", "op-read-start", 2, 1, "READ4args *", NULL }, + { "nfsv4", "op-read-done", 0, 0, "struct compound_state *", + "conninfo_t *" }, + { "nfsv4", "op-read-done", 1, 0, "struct compound_state *", + "nfsv4opinfo_t *" }, + { "nfsv4", "op-read-done", 2, 1, "READ4res *", NULL }, + { "nfsv4", "op-readdir-start", 0, 0, "struct compound_state *", + "conninfo_t *" }, + { "nfsv4", "op-readdir-start", 1, 0, "struct compound_state *", + "nfsv4opinfo_t *" }, + { "nfsv4", "op-readdir-start", 2, 1, "READDIR4args *", NULL }, + { "nfsv4", "op-readdir-done", 0, 0, "struct compound_state *", + "conninfo_t *" }, + { "nfsv4", "op-readdir-done", 1, 0, "struct compound_state *", + "nfsv4opinfo_t *" }, + { "nfsv4", "op-readdir-done", 2, 1, "READDIR4res *", NULL }, + { "nfsv4", "op-readlink-start", 0, 0, "struct compound_state *", + "conninfo_t *" }, + { "nfsv4", "op-readlink-start", 1, 0, "struct compound_state *", + "nfsv4opinfo_t *" }, + { "nfsv4", "op-readlink-done", 0, 0, "struct compound_state *", + "conninfo_t *" }, + { "nfsv4", "op-readlink-done", 1, 0, "struct compound_state *", + "nfsv4opinfo_t *" }, + { "nfsv4", "op-readlink-done", 2, 1, "READLINK4res *", NULL }, + { "nfsv4", "op-release-lockowner-start", 0, 0, + "struct compound_state *", "conninfo_t *" }, + { "nfsv4", "op-release-lockowner-start", 1, 0, + "struct compound_state *", "nfsv4opinfo_t *" }, + { "nfsv4", "op-release-lockowner-start", 2, 1, + "RELEASE_LOCKOWNER4args *", NULL }, + { "nfsv4", "op-release-lockowner-done", 0, 0, + "struct compound_state *", "conninfo_t *" }, + { "nfsv4", "op-release-lockowner-done", 1, 0, + "struct compound_state *", "nfsv4opinfo_t *" }, + { "nfsv4", "op-release-lockowner-done", 2, 1, + "RELEASE_LOCKOWNER4res *", NULL }, + { "nfsv4", "op-remove-start", 0, 0, "struct compound_state *", + "conninfo_t *" }, + { "nfsv4", "op-remove-start", 1, 0, "struct compound_state *", + "nfsv4opinfo_t *" }, + { "nfsv4", "op-remove-start", 2, 1, "REMOVE4args *", NULL }, + { "nfsv4", "op-remove-done", 0, 0, "struct compound_state *", + "conninfo_t *" }, + { "nfsv4", "op-remove-done", 1, 0, "struct compound_state *", + "nfsv4opinfo_t *" }, + { "nfsv4", "op-remove-done", 2, 1, "REMOVE4res *", NULL }, + { "nfsv4", "op-rename-start", 0, 0, "struct compound_state *", + "conninfo_t *" }, + { "nfsv4", "op-rename-start", 1, 0, "struct compound_state *", + "nfsv4opinfo_t *" }, + { "nfsv4", "op-rename-start", 2, 1, "RENAME4args *", NULL }, + { "nfsv4", "op-rename-done", 0, 0, "struct compound_state *", + "conninfo_t *" }, + { "nfsv4", "op-rename-done", 1, 0, "struct compound_state *", + "nfsv4opinfo_t *" }, + { "nfsv4", "op-rename-done", 2, 1, "RENAME4res *", NULL }, + { "nfsv4", "op-renew-start", 0, 0, "struct compound_state *", + "conninfo_t *" }, + { "nfsv4", "op-renew-start", 1, 0, "struct compound_state *", + "nfsv4opinfo_t *" }, + { "nfsv4", "op-renew-start", 2, 1, "RENEW4args *", NULL }, + { "nfsv4", "op-renew-done", 0, 0, "struct compound_state *", + "conninfo_t *" }, + { "nfsv4", "op-renew-done", 1, 0, "struct compound_state *", + "nfsv4opinfo_t *" }, + { "nfsv4", "op-renew-done", 2, 1, "RENEW4res *", NULL }, + { "nfsv4", "op-restorefh-start", 0, 0, "struct compound_state *", + "conninfo_t *" }, + { "nfsv4", "op-restorefh-start", 1, 0, "struct compound_state *", + "nfsv4opinfo_t *" }, + { "nfsv4", "op-restorefh-done", 0, 0, "struct compound_state *", + "conninfo_t *" }, + { "nfsv4", "op-restorefh-done", 1, 0, "struct compound_state *", + "nfsv4opinfo_t *" }, + { "nfsv4", "op-restorefh-done", 2, 1, "RESTOREFH4res *", NULL }, + { "nfsv4", "op-savefh-start", 0, 0, "struct compound_state *", + "conninfo_t *" }, + { "nfsv4", "op-savefh-start", 1, 0, "struct compound_state *", + "nfsv4opinfo_t *" }, + { "nfsv4", "op-savefh-done", 0, 0, "struct compound_state *", + "conninfo_t *" }, + { "nfsv4", "op-savefh-done", 1, 0, "struct compound_state *", + "nfsv4opinfo_t *" }, + { "nfsv4", "op-savefh-done", 2, 1, "SAVEFH4res *", NULL }, + { "nfsv4", "op-secinfo-start", 0, 0, "struct compound_state *", + "conninfo_t *" }, + { "nfsv4", "op-secinfo-start", 1, 0, "struct compound_state *", + "nfsv4opinfo_t *" }, + { "nfsv4", "op-secinfo-start", 2, 1, "SECINFO4args *", NULL }, + { "nfsv4", "op-secinfo-done", 0, 0, "struct compound_state *", + "conninfo_t *" }, + { "nfsv4", "op-secinfo-done", 1, 0, "struct compound_state *", + "nfsv4opinfo_t *" }, + { "nfsv4", "op-secinfo-done", 2, 1, "SECINFO4res *", NULL }, + { "nfsv4", "op-setattr-start", 0, 0, "struct compound_state *", + "conninfo_t *" }, + { "nfsv4", "op-setattr-start", 1, 0, "struct compound_state *", + "nfsv4opinfo_t *" }, + { "nfsv4", "op-setattr-start", 2, 1, "SETATTR4args *", NULL }, + { "nfsv4", "op-setattr-done", 0, 0, "struct compound_state *", + "conninfo_t *" }, + { "nfsv4", "op-setattr-done", 1, 0, "struct compound_state *", + "nfsv4opinfo_t *" }, + { "nfsv4", "op-setattr-done", 2, 1, "SETATTR4res *", NULL }, + { "nfsv4", "op-setclientid-start", 0, 0, "struct compound_state *", + "conninfo_t *" }, + { "nfsv4", "op-setclientid-start", 1, 0, "struct compound_state *", + "nfsv4opinfo_t *" }, + { "nfsv4", "op-setclientid-start", 2, 1, "SETCLIENTID4args *", NULL }, + { "nfsv4", "op-setclientid-done", 0, 0, "struct compound_state *", + "conninfo_t *" }, + { "nfsv4", "op-setclientid-done", 1, 0, "struct compound_state *", + "nfsv4opinfo_t *" }, + { "nfsv4", "op-setclientid-done", 2, 1, "SETCLIENTID4res *", NULL }, + { "nfsv4", "op-setclientid-confirm-start", 0, 0, + "struct compound_state *", "conninfo_t *" }, + { "nfsv4", "op-setclientid-confirm-start", 1, 0, + "struct compound_state *", "nfsv4opinfo_t *" }, + { "nfsv4", "op-setclientid-confirm-start", 2, 1, + "SETCLIENTID_CONFIRM4args *", NULL }, + { "nfsv4", "op-setclientid-confirm-done", 0, 0, + "struct compound_state *", "conninfo_t *" }, + { "nfsv4", "op-setclientid-confirm-done", 1, 0, + "struct compound_state *", "nfsv4opinfo_t *" }, + { "nfsv4", "op-setclientid-confirm-done", 2, 1, + "SETCLIENTID_CONFIRM4res *", NULL }, + { "nfsv4", "op-verify-start", 0, 0, "struct compound_state *", + "conninfo_t *" }, + { "nfsv4", "op-verify-start", 1, 0, "struct compound_state *", + "nfsv4opinfo_t *" }, + { "nfsv4", "op-verify-start", 2, 1, "VERIFY4args *", NULL }, + { "nfsv4", "op-verify-done", 0, 0, "struct compound_state *", + "conninfo_t *" }, + { "nfsv4", "op-verify-done", 1, 0, "struct compound_state *", + "nfsv4opinfo_t *" }, + { "nfsv4", "op-verify-done", 2, 1, "VERIFY4res *", NULL }, + { "nfsv4", "op-write-start", 0, 0, "struct compound_state *", + "conninfo_t *" }, + { "nfsv4", "op-write-start", 1, 0, "struct compound_state *", + "nfsv4opinfo_t *" }, + { "nfsv4", "op-write-start", 2, 1, "WRITE4args *", NULL }, + { "nfsv4", "op-write-done", 0, 0, "struct compound_state *", + "conninfo_t *" }, + { "nfsv4", "op-write-done", 1, 0, "struct compound_state *", + "nfsv4opinfo_t *" }, + { "nfsv4", "op-write-done", 2, 1, "WRITE4res *", NULL }, + { "nfsv4", "cb-recall-start", 0, 0, "rfs4_client_t *", + "conninfo_t *" }, + { "nfsv4", "cb-recall-start", 1, 1, "rfs4_deleg_state_t *", + "nfsv4cbinfo_t *" }, + { "nfsv4", "cb-recall-start", 2, 2, "CB_RECALL4args *", NULL }, + { "nfsv4", "cb-recall-done", 0, 0, "rfs4_client_t *", + "conninfo_t *" }, + { "nfsv4", "cb-recall-done", 1, 1, "rfs4_deleg_state_t *", + "nfsv4cbinfo_t *" }, + { "nfsv4", "cb-recall-done", 2, 2, "CB_RECALL4res *", NULL }, + + { "ip", "send", 0, 0, "mblk_t *", "pktinfo_t *" }, + { "ip", "send", 1, 1, "conn_t *", "csinfo_t *" }, + { "ip", "send", 2, 2, "void_ip_t *", "ipinfo_t *" }, + { "ip", "send", 3, 3, "__dtrace_ipsr_ill_t *", "ifinfo_t *" }, + { "ip", "send", 4, 4, "ipha_t *", "ipv4info_t *" }, + { "ip", "send", 5, 5, "ip6_t *", "ipv6info_t *" }, + { "ip", "send", 6, 6, "int", NULL }, /* used by __dtrace_ipsr_ill_t */ + { "ip", "receive", 0, 0, "mblk_t *", "pktinfo_t *" }, + { "ip", "receive", 1, 1, "conn_t *", "csinfo_t *" }, + { "ip", "receive", 2, 2, "void_ip_t *", "ipinfo_t *" }, + { "ip", "receive", 3, 3, "__dtrace_ipsr_ill_t *", "ifinfo_t *" }, + { "ip", "receive", 4, 4, "ipha_t *", "ipv4info_t *" }, + { "ip", "receive", 5, 5, "ip6_t *", "ipv6info_t *" }, + { "ip", "receive", 6, 6, "int", NULL }, /* used by __dtrace_ipsr_ill_t */ + + { "sysevent", "post", 0, 0, "evch_bind_t *", "syseventchaninfo_t *" }, + { "sysevent", "post", 1, 1, "sysevent_impl_t *", "syseventinfo_t *" }, + + { "xpv", "add-to-physmap-end", 0, 0, "int", NULL }, + { "xpv", "add-to-physmap-start", 0, 0, "domid_t", NULL }, + { "xpv", "add-to-physmap-start", 1, 1, "uint_t", NULL }, + { "xpv", "add-to-physmap-start", 2, 2, "ulong_t", NULL }, + { "xpv", "add-to-physmap-start", 3, 3, "ulong_t", NULL }, + { "xpv", "decrease-reservation-end", 0, 0, "int", NULL }, + { "xpv", "decrease-reservation-start", 0, 0, "domid_t", NULL }, + { "xpv", "decrease-reservation-start", 1, 1, "ulong_t", NULL }, + { "xpv", "decrease-reservation-start", 2, 2, "uint_t", NULL }, + { "xpv", "decrease-reservation-start", 3, 3, "ulong_t *", NULL }, + { "xpv", "dom-create-start", 0, 0, "xen_domctl_t *", NULL }, + { "xpv", "dom-destroy-start", 0, 0, "domid_t", NULL }, + { "xpv", "dom-pause-start", 0, 0, "domid_t", NULL }, + { "xpv", "dom-unpause-start", 0, 0, "domid_t", NULL }, + { "xpv", "dom-create-end", 0, 0, "int", NULL }, + { "xpv", "dom-destroy-end", 0, 0, "int", NULL }, + { "xpv", "dom-pause-end", 0, 0, "int", NULL }, + { "xpv", "dom-unpause-end", 0, 0, "int", NULL }, + { "xpv", "evtchn-op-end", 0, 0, "int", NULL }, + { "xpv", "evtchn-op-start", 0, 0, "int", NULL }, + { "xpv", "evtchn-op-start", 1, 1, "void *", NULL }, + { "xpv", "increase-reservation-end", 0, 0, "int", NULL }, + { "xpv", "increase-reservation-start", 0, 0, "domid_t", NULL }, + { "xpv", "increase-reservation-start", 1, 1, "ulong_t", NULL }, + { "xpv", "increase-reservation-start", 2, 2, "uint_t", NULL }, + { "xpv", "increase-reservation-start", 3, 3, "ulong_t *", NULL }, + { "xpv", "mmap-end", 0, 0, "int", NULL }, + { "xpv", "mmap-entry", 0, 0, "ulong_t", NULL }, + { "xpv", "mmap-entry", 1, 1, "ulong_t", NULL }, + { "xpv", "mmap-entry", 2, 2, "ulong_t", NULL }, + { "xpv", "mmap-start", 0, 0, "domid_t", NULL }, + { "xpv", "mmap-start", 1, 1, "int", NULL }, + { "xpv", "mmap-start", 2, 2, "privcmd_mmap_entry_t *", NULL }, + { "xpv", "mmapbatch-end", 0, 0, "int", NULL }, + { "xpv", "mmapbatch-end", 1, 1, "struct seg *", NULL }, + { "xpv", "mmapbatch-end", 2, 2, "caddr_t", NULL }, + { "xpv", "mmapbatch-start", 0, 0, "domid_t", NULL }, + { "xpv", "mmapbatch-start", 1, 1, "int", NULL }, + { "xpv", "mmapbatch-start", 2, 2, "caddr_t", NULL }, + { "xpv", "mmu-ext-op-end", 0, 0, "int", NULL }, + { "xpv", "mmu-ext-op-start", 0, 0, "int", NULL }, + { "xpv", "mmu-ext-op-start", 1, 1, "struct mmuext_op *" , NULL}, + { "xpv", "mmu-update-start", 0, 0, "int", NULL }, + { "xpv", "mmu-update-start", 1, 1, "int", NULL }, + { "xpv", "mmu-update-start", 2, 2, "mmu_update_t *", NULL }, + { "xpv", "mmu-update-end", 0, 0, "int", NULL }, + { "xpv", "populate-physmap-end", 0, 0, "int" , NULL}, + { "xpv", "populate-physmap-start", 0, 0, "domid_t" , NULL}, + { "xpv", "populate-physmap-start", 1, 1, "ulong_t" , NULL}, + { "xpv", "populate-physmap-start", 2, 2, "ulong_t *" , NULL}, + { "xpv", "set-memory-map-end", 0, 0, "int" , NULL}, + { "xpv", "set-memory-map-start", 0, 0, "domid_t" , NULL}, + { "xpv", "set-memory-map-start", 1, 1, "int", NULL }, + { "xpv", "set-memory-map-start", 2, 2, "struct xen_memory_map *", NULL }, + { "xpv", "setvcpucontext-end", 0, 0, "int", NULL }, + { "xpv", "setvcpucontext-start", 0, 0, "domid_t", NULL }, + { "xpv", "setvcpucontext-start", 1, 1, "vcpu_guest_context_t *", NULL }, +#if !defined(__APPLE__) { NULL } +#else + { NULL, NULL, 0, 0, NULL, NULL } +#endif /* __APPLE__ */ }; /*ARGSUSED*/ @@ -205,6 +904,7 @@ sdt_getargdesc(void *arg, dtrace_id_t id, void *parg, dtrace_argdesc_t *desc) for (i = 0; sdt_args[i].sda_provider != NULL; i++) { sdt_argdesc_t *a = &sdt_args[i]; +#if !defined(__APPLE__) if (strcmp(sdp->sdp_provider->sdtp_name, a->sda_provider) != 0) continue; @@ -220,6 +920,23 @@ sdt_getargdesc(void *arg, dtrace_id_t id, void *parg, dtrace_argdesc_t *desc) if (a->sda_xlate != NULL) (void) strcpy(desc->dtargd_xlate, a->sda_xlate); +#else + if (strncmp(sdp->sdp_provider->sdtp_name, a->sda_provider, strlen(a->sda_provider) + 1) != 0) + continue; + + if (a->sda_name != NULL && + strncmp(sdp->sdp_name, a->sda_name, strlen(a->sda_name) + 1) != 0) + continue; + + if (desc->dtargd_ndx != a->sda_ndx) + continue; + + if (a->sda_native != NULL) + (void) strlcpy(desc->dtargd_native, a->sda_native, DTRACE_ARGTYPELEN); + + if (a->sda_xlate != NULL) + (void) strlcpy(desc->dtargd_xlate, a->sda_xlate, DTRACE_ARGTYPELEN); +#endif /* __APPLE__ */ desc->dtargd_mapping = a->sda_mapping; return; diff --git a/bsd/dev/dtrace/systrace.c b/bsd/dev/dtrace/systrace.c index 52362b640..3a8382f15 100644 --- a/bsd/dev/dtrace/systrace.c +++ b/bsd/dev/dtrace/systrace.c @@ -23,7 +23,7 @@ * Use is subject to license terms. */ -/* #pragma ident "@(#)systrace.c 1.5 06/03/24 SMI" */ +/* #pragma ident "@(#)systrace.c 1.6 06/09/19 SMI" */ #if !defined(__APPLE__) #include @@ -98,15 +98,16 @@ extern const char *syscallnames[]; systrace_sysent_t *systrace_sysent = NULL; void (*systrace_probe)(dtrace_id_t, uint64_t, uint64_t, - uint64_t, uint64_t, uint64_t); + uint64_t, uint64_t, uint64_t, uint64_t, uint64_t, uint64_t); void systrace_stub(dtrace_id_t id, uint64_t arg0, uint64_t arg1, - uint64_t arg2, uint64_t arg3, uint64_t arg4) + uint64_t arg2, uint64_t arg3, uint64_t arg4, uint64_t arg5, uint64_t arg6, uint64_t arg7) { -#pragma unused(id,arg0,arg1,arg2,arg3,arg4) +#pragma unused(id,arg0,arg1,arg2,arg3,arg4,arg5,arg6,arg7) } + int32_t dtrace_systrace_syscall(struct proc *pp, void *uap, int *rv) { @@ -131,6 +132,10 @@ dtrace_systrace_syscall(struct proc *pp, void *uap, int *rv) code = regs->save_r3; else code = regs->save_r0; + + /* + * FIXME: unix_syscall screens for "unsafe calls" and instead calls nosys(), *not* sysent[code] ! + */ } #elif defined(__i386__) || defined (__x86_64__) #pragma unused(flavor) @@ -149,9 +154,11 @@ dtrace_systrace_syscall(struct proc *pp, void *uap, int *rv) } } else { code = saved_state32(tagged_regs)->eax & I386_SYSCALL_NUMBER_MASK; - /* - * TODO: handle indirect system calls - */ + + if (code == 0) { + vm_offset_t params = (vm_offset_t) (saved_state32(tagged_regs)->uesp + sizeof (int)); + code = fuword(params); + } } } #else @@ -163,9 +170,9 @@ dtrace_systrace_syscall(struct proc *pp, void *uap, int *rv) if ((id = sy->stsy_entry) != DTRACE_IDNONE) { if (ip) - (*systrace_probe)(id, *ip, *(ip+1), *(ip+2), *(ip+3), *(ip+4)); + (*systrace_probe)(id, *ip, *(ip+1), *(ip+2), *(ip+3), *(ip+4), *(ip+5), *(ip+6), *(ip+7)); else - (*systrace_probe)(id, 0, 0, 0, 0, 0); + (*systrace_probe)(id, 0, 0, 0, 0, 0, 0, 0, 0); } #if 0 /* XXX */ @@ -185,7 +192,7 @@ dtrace_systrace_syscall(struct proc *pp, void *uap, int *rv) rval = (*sy->stsy_underlying)(pp, uap, rv); if ((id = sy->stsy_return) != DTRACE_IDNONE) { - uint64_t munged_rv; + uint64_t munged_rv0, munged_rv1; uthread_t uthread = (uthread_t)get_bsdthread_info(current_thread()); if (uthread) @@ -195,38 +202,64 @@ dtrace_systrace_syscall(struct proc *pp, void *uap, int *rv) * "Decode" rv for use in the call to dtrace_probe() */ if (rval == ERESTART) { - munged_rv = -1LL; /* System call will be reissued in user mode. Make DTrace report a -1 return. */ + munged_rv0 = -1LL; /* System call will be reissued in user mode. Make DTrace report a -1 return. */ + munged_rv1 = -1LL; } else if (rval != EJUSTRETURN) { if (rval) { - munged_rv = -1LL; /* Mimic what libc will do. */ + munged_rv0 = -1LL; /* Mimic what libc will do. */ + munged_rv1 = -1LL; } else { switch (sy->stsy_return_type) { case _SYSCALL_RET_INT_T: - munged_rv = rv[0]; + munged_rv0 = rv[0]; + munged_rv1 = rv[1]; break; case _SYSCALL_RET_UINT_T: - munged_rv = ((u_int)rv[0]); + munged_rv0 = ((u_int)rv[0]); + munged_rv1 = ((u_int)rv[1]); break; case _SYSCALL_RET_OFF_T: - munged_rv = *(u_int64_t *)rv; + munged_rv0 = *(u_int64_t *)rv; + munged_rv1 = 0LL; break; case _SYSCALL_RET_ADDR_T: case _SYSCALL_RET_SIZE_T: case _SYSCALL_RET_SSIZE_T: - munged_rv = *(user_addr_t *)rv; + munged_rv0 = *(user_addr_t *)rv; + munged_rv1 = 0LL; break; case _SYSCALL_RET_NONE: - munged_rv = 0LL; + munged_rv0 = 0LL; + munged_rv1 = 0LL; break; default: - munged_rv = 0LL; + munged_rv0 = 0LL; + munged_rv1 = 0LL; break; } } - } else - munged_rv = 0LL; + } else { + munged_rv0 = 0LL; + munged_rv1 = 0LL; + } - (*systrace_probe)(id, munged_rv, munged_rv, (uint64_t)rval, 0, 0); + /* + * says: + * + * "This is a bit of an historical artifact. At first, the syscall provider just + * had its return value in arg0, and the fbt and pid providers had their return + * values in arg1 (so that we could use arg0 for the offset of the return site). + * + * We inevitably started writing scripts where we wanted to see the return + * values from probes in all three providers, and we made this script easier + * to write by replicating the syscall return values in arg1 to match fbt and + * pid. We debated briefly about removing the return value from arg0, but + * decided that it would be less confusing to have the same data in two places + * than to have some non-helpful, non-intuitive value in arg0. + * + * This change was made 4/23/2003 according to the DTrace project's putback log." + */ + (*systrace_probe)(id, munged_rv0, munged_rv0, munged_rv1, (uint64_t)rval, 0, 0, 0, 0); } return (rval); @@ -242,7 +275,7 @@ dtrace_systrace_syscall_return(unsigned short code, int rval, int *rv) sy = (code >= NUM_SYSENT) ? &systrace_sysent[63] : &systrace_sysent[code]; if ((id = sy->stsy_return) != DTRACE_IDNONE) { - uint64_t munged_rv; + uint64_t munged_rv0, munged_rv1; uthread_t uthread = (uthread_t)get_bsdthread_info(current_thread()); if (uthread) @@ -252,38 +285,48 @@ dtrace_systrace_syscall_return(unsigned short code, int rval, int *rv) * "Decode" rv for use in the call to dtrace_probe() */ if (rval == ERESTART) { - munged_rv = -1LL; /* System call will be reissued in user mode. Make DTrace report a -1 return. */ + munged_rv0 = -1LL; /* System call will be reissued in user mode. Make DTrace report a -1 return. */ + munged_rv1 = -1LL; } else if (rval != EJUSTRETURN) { if (rval) { - munged_rv = -1LL; /* Mimic what libc will do. */ + munged_rv0 = -1LL; /* Mimic what libc will do. */ + munged_rv1 = -1LL; } else { switch (sy->stsy_return_type) { case _SYSCALL_RET_INT_T: - munged_rv = rv[0]; + munged_rv0 = rv[0]; + munged_rv1 = rv[1]; break; case _SYSCALL_RET_UINT_T: - munged_rv = ((u_int)rv[0]); + munged_rv0 = ((u_int)rv[0]); + munged_rv1 = ((u_int)rv[1]); break; case _SYSCALL_RET_OFF_T: - munged_rv = *(u_int64_t *)rv; + munged_rv0 = *(u_int64_t *)rv; + munged_rv1 = 0LL; break; case _SYSCALL_RET_ADDR_T: case _SYSCALL_RET_SIZE_T: case _SYSCALL_RET_SSIZE_T: - munged_rv = *(user_addr_t *)rv; + munged_rv0 = *(user_addr_t *)rv; + munged_rv1 = 0LL; break; case _SYSCALL_RET_NONE: - munged_rv = 0LL; + munged_rv0 = 0LL; + munged_rv1 = 0LL; break; default: - munged_rv = 0LL; + munged_rv0 = 0LL; + munged_rv1 = 0LL; break; } } - } else - munged_rv = 0LL; + } else { + munged_rv0 = 0LL; + munged_rv1 = 0LL; + } - (*systrace_probe)(id, munged_rv, munged_rv, (uint64_t)rval, 0, 0); + (*systrace_probe)(id, munged_rv0, munged_rv0, munged_rv1, (uint64_t)rval, 0, 0, 0, 0); } } #endif /* __APPLE__ */ @@ -301,9 +344,7 @@ dtrace_systrace_syscall_return(unsigned short code, int rval, int *rv) static dev_info_t *systrace_devi; static dtrace_provider_id_t systrace_id; -#if defined(__APPLE__) -#define systrace_init _systrace_init /* Avoid name clash with Darwin automagic conf symbol */ -#endif +#if !defined (__APPLE__) static void systrace_init(struct sysent *actual, systrace_sysent_t **interposed) { @@ -331,16 +372,50 @@ systrace_init(struct sysent *actual, systrace_sysent_t **interposed) #endif s->stsy_underlying = a->sy_callc; -#if defined(__APPLE__) - s->stsy_return_type = a->sy_return_type; + } +} +#else +#define systrace_init _systrace_init /* Avoid name clash with Darwin automagic conf symbol */ +static void +systrace_init(struct sysent *actual, systrace_sysent_t **interposed) +{ + + systrace_sysent_t *ssysent = *interposed; /* Avoid sysent shadow warning + from bsd/sys/sysent.h */ + int i; + + if (ssysent == NULL) { + *interposed = ssysent = kmem_zalloc(sizeof (systrace_sysent_t) * + NSYSCALL, KM_SLEEP); + } + + for (i = 0; i < NSYSCALL; i++) { + struct sysent *a = &actual[i]; + systrace_sysent_t *s = &ssysent[i]; + + if (LOADABLE_SYSCALL(a) && !LOADED_SYSCALL(a)) + continue; + + if (a->sy_callc == dtrace_systrace_syscall) + continue; + +#ifdef _SYSCALL32_IMPL + if (a->sy_callc == dtrace_systrace_syscall32) + continue; #endif + + s->stsy_underlying = a->sy_callc; + s->stsy_return_type = a->sy_return_type; } } +#endif /* __APPLE__ */ + /*ARGSUSED*/ static void systrace_provide(void *arg, const dtrace_probedesc_t *desc) { +#pragma unused(arg) /* __APPLE__ */ int i; if (desc != NULL) @@ -382,8 +457,11 @@ systrace_provide(void *arg, const dtrace_probedesc_t *desc) static void systrace_destroy(void *arg, dtrace_id_t id, void *parg) { +#pragma unused(arg,id) /* __APPLE__ */ + int sysnum = SYSTRACE_SYSNUM((uintptr_t)parg); +#pragma unused(sysnum) /* __APPLE__ */ /* * There's nothing to do here but assert that we have actually been * disabled. @@ -405,6 +483,8 @@ systrace_destroy(void *arg, dtrace_id_t id, void *parg) static void systrace_enable(void *arg, dtrace_id_t id, void *parg) { +#pragma unused(arg) /* __APPLE__ */ + int sysnum = SYSTRACE_SYSNUM((uintptr_t)parg); int enabled = (systrace_sysent[sysnum].stsy_entry != DTRACE_IDNONE || systrace_sysent[sysnum].stsy_return != DTRACE_IDNONE); @@ -440,6 +520,8 @@ systrace_enable(void *arg, dtrace_id_t id, void *parg) static void systrace_disable(void *arg, dtrace_id_t id, void *parg) { +#pragma unused(arg,id) /* __APPLE__ */ + int sysnum = SYSTRACE_SYSNUM((uintptr_t)parg); int disable = (systrace_sysent[sysnum].stsy_entry == DTRACE_IDNONE || systrace_sysent[sysnum].stsy_return == DTRACE_IDNONE); @@ -502,7 +584,8 @@ systrace_attach(dev_info_t *devi, ddi_attach_cmd_t cmd) return (DDI_FAILURE); } - systrace_probe = dtrace_probe; +#if !defined(__APPLE__) + systrace_probe = (void (*)())dtrace_probe; membar_enter(); if (ddi_create_minor_node(devi, "systrace", S_IFCHR, 0, @@ -513,6 +596,19 @@ systrace_attach(dev_info_t *devi, ddi_attach_cmd_t cmd) ddi_remove_minor_node(devi, NULL); return (DDI_FAILURE); } +#else + systrace_probe = (void(*))&dtrace_probe; + membar_enter(); + + if (ddi_create_minor_node(devi, "systrace", S_IFCHR, 0, + DDI_PSEUDO, 0) == DDI_FAILURE || + dtrace_register("syscall", &systrace_attr, DTRACE_PRIV_USER, NULL, + &systrace_pops, NULL, &systrace_id) != 0) { + systrace_probe = systrace_stub; + ddi_remove_minor_node(devi, NULL); + return (DDI_FAILURE); + } +#endif /* __APPLE__ */ ddi_report_dev(devi); systrace_devi = devi; @@ -655,148 +751,10 @@ typedef struct { #endif /* !MACH_ASSERT */ } mach_trap_t; -#define MACH_TRAP_TABLE_COUNT 128 - extern mach_trap_t mach_trap_table[]; extern int mach_trap_count; -#define MACH_TRAP(name, foo, bar, baz) #name - -/* XXX From osfmk/kern/syscall_sw.c */ -static const char * mach_name_table[MACH_TRAP_TABLE_COUNT] = { -/* 0 */ MACH_TRAP(kern_invalid, 0, NULL, NULL), -/* 1 */ MACH_TRAP(kern_invalid, 0, NULL, NULL), -/* 2 */ MACH_TRAP(kern_invalid, 0, NULL, NULL), -/* 3 */ MACH_TRAP(kern_invalid, 0, NULL, NULL), -/* 4 */ MACH_TRAP(kern_invalid, 0, NULL, NULL), -/* 5 */ MACH_TRAP(kern_invalid, 0, NULL, NULL), -/* 6 */ MACH_TRAP(kern_invalid, 0, NULL, NULL), -/* 7 */ MACH_TRAP(kern_invalid, 0, NULL, NULL), -/* 8 */ MACH_TRAP(kern_invalid, 0, NULL, NULL), -/* 9 */ MACH_TRAP(kern_invalid, 0, NULL, NULL), -/* 10 */ MACH_TRAP(kern_invalid, 0, NULL, NULL), -/* 11 */ MACH_TRAP(kern_invalid, 0, NULL, NULL), -/* 12 */ MACH_TRAP(kern_invalid, 0, NULL, NULL), -/* 13 */ MACH_TRAP(kern_invalid, 0, NULL, NULL), -/* 14 */ MACH_TRAP(kern_invalid, 0, NULL, NULL), -/* 15 */ MACH_TRAP(kern_invalid, 0, NULL, NULL), -/* 16 */ MACH_TRAP(kern_invalid, 0, NULL, NULL), -/* 17 */ MACH_TRAP(kern_invalid, 0, NULL, NULL), -/* 18 */ MACH_TRAP(kern_invalid, 0, NULL, NULL), -/* 19 */ MACH_TRAP(kern_invalid, 0, NULL, NULL), -/* 20 */ MACH_TRAP(kern_invalid, 0, NULL, NULL), -/* 21 */ MACH_TRAP(kern_invalid, 0, NULL, NULL), -/* 22 */ MACH_TRAP(kern_invalid, 0, NULL, NULL), -/* 23 */ MACH_TRAP(kern_invalid, 0, NULL, NULL), -/* 24 */ MACH_TRAP(kern_invalid, 0, NULL, NULL), -/* 25 */ MACH_TRAP(kern_invalid, 0, NULL, NULL), -/* 26 */ MACH_TRAP(mach_reply_port, 0, NULL, NULL), -/* 27 */ MACH_TRAP(thread_self_trap, 0, NULL, NULL), -/* 28 */ MACH_TRAP(task_self_trap, 0, NULL, NULL), -/* 29 */ MACH_TRAP(host_self_trap, 0, NULL, NULL), -/* 30 */ MACH_TRAP(kern_invalid, 0, NULL, NULL), -/* 31 */ MACH_TRAP(mach_msg_trap, 7, munge_wwwwwww, munge_ddddddd), -/* 32 */ MACH_TRAP(mach_msg_overwrite_trap, 8, munge_wwwwwwww, munge_dddddddd), -/* 33 */ MACH_TRAP(semaphore_signal_trap, 1, munge_w, munge_d), -/* 34 */ MACH_TRAP(semaphore_signal_all_trap, 1, munge_w, munge_d), -/* 35 */ MACH_TRAP(semaphore_signal_thread_trap, 2, munge_ww, munge_dd), -/* 36 */ MACH_TRAP(semaphore_wait_trap, 1, munge_w, munge_d), -/* 37 */ MACH_TRAP(semaphore_wait_signal_trap, 2, munge_ww, munge_dd), -/* 38 */ MACH_TRAP(semaphore_timedwait_trap, 3, munge_www, munge_ddd), -/* 39 */ MACH_TRAP(semaphore_timedwait_signal_trap, 4, munge_wwww, munge_dddd), -/* 40 */ MACH_TRAP(kern_invalid, 0, NULL, NULL), -/* 41 */ MACH_TRAP(init_process, 0, NULL, NULL), -/* 42 */ MACH_TRAP(kern_invalid, 0, NULL, NULL), -/* 43 */ MACH_TRAP(map_fd, 5, munge_wwwww, munge_ddddd), -/* 44 */ MACH_TRAP(task_name_for_pid, 3, munge_www, munge_ddd), -/* 45 */ MACH_TRAP(task_for_pid, 3, munge_www, munge_ddd), -/* 46 */ MACH_TRAP(pid_for_task, 2, munge_ww,munge_dd), -/* 47 */ MACH_TRAP(kern_invalid, 0, NULL, NULL), -/* 48 */ MACH_TRAP(macx_swapon, 4, munge_wwww, munge_dddd), -/* 49 */ MACH_TRAP(macx_swapoff, 2, munge_ww, munge_dd), -/* 50 */ MACH_TRAP(kern_invalid, 0, NULL, NULL), -/* 51 */ MACH_TRAP(macx_triggers, 4, munge_wwww, munge_dddd), -/* 52 */ MACH_TRAP(macx_backing_store_suspend, 1, munge_w, munge_d), -/* 53 */ MACH_TRAP(macx_backing_store_recovery, 1, munge_w, munge_d), -/* 54 */ MACH_TRAP(kern_invalid, 0, NULL, NULL), -/* 55 */ MACH_TRAP(kern_invalid, 0, NULL, NULL), -/* 56 */ MACH_TRAP(kern_invalid, 0, NULL, NULL), -/* 57 */ MACH_TRAP(kern_invalid, 0, NULL, NULL), -/* 58 */ MACH_TRAP(kern_invalid, 0, NULL, NULL), -/* 59 */ MACH_TRAP(swtch_pri, 0, NULL, NULL), -/* 60 */ MACH_TRAP(swtch, 0, NULL, NULL), -/* 61 */ MACH_TRAP(thread_switch, 3, munge_www, munge_ddd), -/* 62 */ MACH_TRAP(clock_sleep_trap, 5, munge_wwwww, munge_ddddd), -/* 63 */ MACH_TRAP(kern_invalid, 0, NULL, NULL), -/* traps 64 - 95 reserved (debo) */ -/* 64 */ MACH_TRAP(kern_invalid, 0, NULL, NULL), -/* 65 */ MACH_TRAP(kern_invalid, 0, NULL, NULL), -/* 66 */ MACH_TRAP(kern_invalid, 0, NULL, NULL), -/* 67 */ MACH_TRAP(kern_invalid, 0, NULL, NULL), -/* 68 */ MACH_TRAP(kern_invalid, 0, NULL, NULL), -/* 69 */ MACH_TRAP(kern_invalid, 0, NULL, NULL), -/* 70 */ MACH_TRAP(kern_invalid, 0, NULL, NULL), -/* 71 */ MACH_TRAP(kern_invalid, 0, NULL, NULL), -/* 72 */ MACH_TRAP(kern_invalid, 0, NULL, NULL), -/* 73 */ MACH_TRAP(kern_invalid, 0, NULL, NULL), -/* 74 */ MACH_TRAP(kern_invalid, 0, NULL, NULL), -/* 75 */ MACH_TRAP(kern_invalid, 0, NULL, NULL), -/* 76 */ MACH_TRAP(kern_invalid, 0, NULL, NULL), -/* 77 */ MACH_TRAP(kern_invalid, 0, NULL, NULL), -/* 78 */ MACH_TRAP(kern_invalid, 0, NULL, NULL), -/* 79 */ MACH_TRAP(kern_invalid, 0, NULL, NULL), -/* 80 */ MACH_TRAP(kern_invalid, 0, NULL, NULL), -/* 81 */ MACH_TRAP(kern_invalid, 0, NULL, NULL), -/* 82 */ MACH_TRAP(kern_invalid, 0, NULL, NULL), -/* 83 */ MACH_TRAP(kern_invalid, 0, NULL, NULL), -/* 84 */ MACH_TRAP(kern_invalid, 0, NULL, NULL), -/* 85 */ MACH_TRAP(kern_invalid, 0, NULL, NULL), -/* 86 */ MACH_TRAP(kern_invalid, 0, NULL, NULL), -/* 87 */ MACH_TRAP(kern_invalid, 0, NULL, NULL), -/* 88 */ MACH_TRAP(kern_invalid, 0, NULL, NULL), -/* 89 */ MACH_TRAP(mach_timebase_info_trap, 1, munge_w, munge_d), -/* 90 */ MACH_TRAP(mach_wait_until_trap, 2, munge_l, munge_d), -/* 91 */ MACH_TRAP(mk_timer_create_trap, 0, NULL, NULL), -/* 92 */ MACH_TRAP(mk_timer_destroy_trap, 1, munge_w, munge_d), -/* 93 */ MACH_TRAP(mk_timer_arm_trap, 3, munge_wl, munge_dd), -/* 94 */ MACH_TRAP(mk_timer_cancel_trap, 2, munge_ww, munge_dd), -/* 95 */ MACH_TRAP(kern_invalid, 0, NULL, NULL), -/* traps 64 - 95 reserved (debo) */ -/* 96 */ MACH_TRAP(kern_invalid, 0, NULL, NULL), -/* 97 */ MACH_TRAP(kern_invalid, 0, NULL, NULL), -/* 98 */ MACH_TRAP(kern_invalid, 0, NULL, NULL), -/* 99 */ MACH_TRAP(kern_invalid, 0, NULL, NULL), -/* traps 100-107 reserved for iokit (esb) */ -/* 100 */ MACH_TRAP(iokit_user_client_trap, 8, munge_wwwwwwww, munge_dddddddd), -/* 101 */ MACH_TRAP(kern_invalid, 0, NULL, NULL), -/* 102 */ MACH_TRAP(kern_invalid, 0, NULL, NULL), -/* 103 */ MACH_TRAP(kern_invalid, 0, NULL, NULL), -/* 104 */ MACH_TRAP(kern_invalid, 0, NULL, NULL), -/* 105 */ MACH_TRAP(kern_invalid, 0, NULL, NULL), -/* 106 */ MACH_TRAP(kern_invalid, 0, NULL, NULL), -/* 107 */ MACH_TRAP(kern_invalid, 0, NULL, NULL), -/* traps 108-127 unused */ -/* 108 */ MACH_TRAP(kern_invalid, 0, NULL, NULL), -/* 109 */ MACH_TRAP(kern_invalid, 0, NULL, NULL), -/* 110 */ MACH_TRAP(kern_invalid, 0, NULL, NULL), -/* 111 */ MACH_TRAP(kern_invalid, 0, NULL, NULL), -/* 112 */ MACH_TRAP(kern_invalid, 0, NULL, NULL), -/* 113 */ MACH_TRAP(kern_invalid, 0, NULL, NULL), -/* 114 */ MACH_TRAP(kern_invalid, 0, NULL, NULL), -/* 115 */ MACH_TRAP(kern_invalid, 0, NULL, NULL), -/* 116 */ MACH_TRAP(kern_invalid, 0, NULL, NULL), -/* 117 */ MACH_TRAP(kern_invalid, 0, NULL, NULL), -/* 118 */ MACH_TRAP(kern_invalid, 0, NULL, NULL), -/* 119 */ MACH_TRAP(kern_invalid, 0, NULL, NULL), -/* 120 */ MACH_TRAP(kern_invalid, 0, NULL, NULL), -/* 121 */ MACH_TRAP(kern_invalid, 0, NULL, NULL), -/* 122 */ MACH_TRAP(kern_invalid, 0, NULL, NULL), -/* 123 */ MACH_TRAP(kern_invalid, 0, NULL, NULL), -/* 124 */ MACH_TRAP(kern_invalid, 0, NULL, NULL), -/* 125 */ MACH_TRAP(kern_invalid, 0, NULL, NULL), -/* 126 */ MACH_TRAP(kern_invalid, 0, NULL, NULL), -/* 127 */ MACH_TRAP(kern_invalid, 0, NULL, NULL), -}; +extern const char *mach_syscall_name_table[]; /* XXX From osfmk/i386/bsd_i386.c */ struct mach_call_args { @@ -918,7 +876,7 @@ machtrace_init(mach_trap_t *actual, machtrace_sysent_t **interposed) if ((mach_call_t)(a->mach_trap_function) == (mach_call_t)(dtrace_machtrace_syscall)) continue; - s->stsy_underlying = a->mach_trap_function; + s->stsy_underlying = (sy_call_t *)a->mach_trap_function; } } @@ -926,6 +884,8 @@ machtrace_init(mach_trap_t *actual, machtrace_sysent_t **interposed) static void machtrace_provide(void *arg, const dtrace_probedesc_t *desc) { +#pragma unused(arg) /* __APPLE__ */ + int i; if (desc != NULL) @@ -939,13 +899,13 @@ machtrace_provide(void *arg, const dtrace_probedesc_t *desc) continue; if (dtrace_probe_lookup(machtrace_id, NULL, - mach_name_table[i], "entry") != 0) + mach_syscall_name_table[i], "entry") != 0) continue; - (void) dtrace_probe_create(machtrace_id, NULL, mach_name_table[i], + (void) dtrace_probe_create(machtrace_id, NULL, mach_syscall_name_table[i], "entry", MACHTRACE_ARTIFICIAL_FRAMES, (void *)((uintptr_t)SYSTRACE_ENTRY(i))); - (void) dtrace_probe_create(machtrace_id, NULL, mach_name_table[i], + (void) dtrace_probe_create(machtrace_id, NULL, mach_syscall_name_table[i], "return", MACHTRACE_ARTIFICIAL_FRAMES, (void *)((uintptr_t)SYSTRACE_RETURN(i))); @@ -958,7 +918,10 @@ machtrace_provide(void *arg, const dtrace_probedesc_t *desc) static void machtrace_destroy(void *arg, dtrace_id_t id, void *parg) { +#pragma unused(arg,id) /* __APPLE__ */ int sysnum = SYSTRACE_SYSNUM((uintptr_t)parg); + +#pragma unused(sysnum) /* __APPLE__ */ /* * There's nothing to do here but assert that we have actually been @@ -975,6 +938,8 @@ machtrace_destroy(void *arg, dtrace_id_t id, void *parg) static void machtrace_enable(void *arg, dtrace_id_t id, void *parg) { +#pragma unused(arg) /* __APPLE__ */ + int sysnum = SYSTRACE_SYSNUM((uintptr_t)parg); int enabled = (machtrace_sysent[sysnum].stsy_entry != DTRACE_IDNONE || machtrace_sysent[sysnum].stsy_return != DTRACE_IDNONE); @@ -986,7 +951,7 @@ machtrace_enable(void *arg, dtrace_id_t id, void *parg) } if (enabled) { - ASSERT(sysent[sysnum].sy_callc == dtrace_machtrace_syscall); + ASSERT(sysent[sysnum].sy_callc == (void *)dtrace_machtrace_syscall); return; } @@ -999,6 +964,8 @@ machtrace_enable(void *arg, dtrace_id_t id, void *parg) static void machtrace_disable(void *arg, dtrace_id_t id, void *parg) { +#pragma unused(arg,id) /* __APPLE__ */ + int sysnum = SYSTRACE_SYSNUM((uintptr_t)parg); int disable = (machtrace_sysent[sysnum].stsy_entry == DTRACE_IDNONE || machtrace_sysent[sysnum].stsy_return == DTRACE_IDNONE); @@ -1050,7 +1017,8 @@ machtrace_attach(dev_info_t *devi, ddi_attach_cmd_t cmd) return (DDI_FAILURE); } - machtrace_probe = dtrace_probe; +#if !defined(__APPLE__) + machtrace_probe = (void (*)())dtrace_probe; membar_enter(); if (ddi_create_minor_node(devi, "machtrace", S_IFCHR, 0, @@ -1058,6 +1026,16 @@ machtrace_attach(dev_info_t *devi, ddi_attach_cmd_t cmd) dtrace_register("mach_trap", &machtrace_attr, DTRACE_PRIV_USER, NULL, &machtrace_pops, NULL, &machtrace_id) != 0) { machtrace_probe = systrace_stub; +#else + machtrace_probe = dtrace_probe; + membar_enter(); + + if (ddi_create_minor_node(devi, "machtrace", S_IFCHR, 0, + DDI_PSEUDO, 0) == DDI_FAILURE || + dtrace_register("mach_trap", &machtrace_attr, DTRACE_PRIV_USER, NULL, + &machtrace_pops, NULL, &machtrace_id) != 0) { + machtrace_probe = (void (*))&systrace_stub; +#endif /* __APPLE__ */ ddi_remove_minor_node(devi, NULL); return (DDI_FAILURE); } @@ -1115,8 +1093,8 @@ void systrace_init( void ) return; } - systrace_attach( (dev_info_t *)majdevno, DDI_ATTACH ); - machtrace_attach( (dev_info_t *)majdevno, DDI_ATTACH ); + systrace_attach( (dev_info_t *)(uintptr_t)majdevno, DDI_ATTACH ); + machtrace_attach( (dev_info_t *)(uintptr_t)majdevno, DDI_ATTACH ); gSysTraceInited = 1; } else diff --git a/bsd/dev/dtrace/systrace.h b/bsd/dev/dtrace/systrace.h index 454b71515..915ed2561 100644 --- a/bsd/dev/dtrace/systrace.h +++ b/bsd/dev/dtrace/systrace.h @@ -2,9 +2,8 @@ * CDDL HEADER START * * The contents of this file are subject to the terms of the - * Common Development and Distribution License, Version 1.0 only - * (the "License"). You may not use this file except in compliance - * with the License. + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. @@ -19,15 +18,16 @@ * * CDDL HEADER END */ + /* - * Copyright 2003 Sun Microsystems, Inc. All rights reserved. + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ #ifndef _SYS_SYSTRACE_H #define _SYS_SYSTRACE_H -/* #pragma ident "@(#)systrace.h 1.2 05/06/08 SMI" */ +/* #pragma ident "@(#)systrace.h 1.3 06/09/19 SMI" */ #if defined(__APPLE__) #ifdef KERNEL @@ -53,7 +53,7 @@ typedef struct systrace_sysent { #if !defined(__APPLE__) int64_t (*stsy_underlying)(); #else - int32_t (*stsy_underlying)(); + int32_t (*stsy_underlying)(struct proc *, void *, int *); int32_t stsy_return_type; #endif /* __APPLE__ */ } systrace_sysent_t; @@ -63,17 +63,17 @@ extern systrace_sysent_t *systrace_sysent32; #if !defined(__APPLE__) extern void (*systrace_probe)(dtrace_id_t, uintptr_t, uintptr_t, - uintptr_t, uintptr_t, uintptr_t); + uintptr_t, uintptr_t, uintptr_t, uintptr_t); extern void systrace_stub(dtrace_id_t, uintptr_t, uintptr_t, - uintptr_t, uintptr_t, uintptr_t); + uintptr_t, uintptr_t, uintptr_t, uintptr_t); extern int64_t dtrace_systrace_syscall(uintptr_t arg0, uintptr_t arg1, uintptr_t arg2, uintptr_t arg3, uintptr_t arg4, uintptr_t arg5); #else extern void (*systrace_probe)(dtrace_id_t, uint64_t, uint64_t, - uint64_t, uint64_t, uint64_t); + uint64_t, uint64_t, uint64_t, uint64_t, uint64_t, uint64_t); extern void systrace_stub(dtrace_id_t, uint64_t, uint64_t, - uint64_t, uint64_t, uint64_t); + uint64_t, uint64_t, uint64_t, uint64_t, uint64_t, uint64_t); extern int32_t dtrace_systrace_syscall(struct proc *, void *, int *); diff --git a/bsd/dev/i386/conf.c b/bsd/dev/i386/conf.c index 0b39c35cd..964f945bf 100644 --- a/bsd/dev/i386/conf.c +++ b/bsd/dev/i386/conf.c @@ -45,7 +45,6 @@ #include /* Prototypes that should be elsewhere: */ -extern int isdisk(dev_t dev, int type); extern dev_t chrtoblk(dev_t dev); extern int chrtoblk_set(int cdev, int bdev); extern int iskmemdev(dev_t dev); @@ -102,15 +101,11 @@ extern d_read_t cnread; extern d_write_t cnwrite; extern d_ioctl_t cnioctl; extern d_select_t cnselect; -extern d_getc_t cngetc; -extern d_putc_t cnputc; extern d_open_t kmopen; extern d_close_t kmclose; extern d_read_t kmread; extern d_write_t kmwrite; extern d_ioctl_t kmioctl; -extern d_getc_t kmgetc; -extern d_putc_t kmputc; extern d_open_t sgopen; extern d_close_t sgclose; extern d_ioctl_t sgioctl; @@ -145,7 +140,6 @@ extern d_close_t ptsclose; extern d_read_t ptsread; extern d_write_t ptswrite; extern d_stop_t ptsstop; -extern d_putc_t ptsputc; extern d_open_t ptcopen; extern d_close_t ptcclose; extern d_read_t ptcread; @@ -158,7 +152,6 @@ extern d_ioctl_t ptyioctl; #define ptsread eno_rdwrt #define ptswrite eno_rdwrt #define ptsstop nulldev -#define ptsputc nulldev #define ptcopen eno_opcl #define ptcclose eno_opcl @@ -204,7 +197,7 @@ struct cdevsw cdevsw[] = { cnopen, cnclose, cnread, cnwrite, /* 0*/ cnioctl, nullstop, nullreset, 0, cnselect, - eno_mmap, eno_strat, cngetc, cnputc, D_TTY + eno_mmap, eno_strat, eno_getc, eno_putc, D_TTY }, NO_CDEVICE, /* 1*/ { @@ -240,7 +233,7 @@ struct cdevsw cdevsw[] = { kmopen, kmclose, kmread, kmwrite, /*12*/ kmioctl, nullstop, nullreset, km_tty, ttselect, - eno_mmap, eno_strat, kmgetc, kmputc, 0 + eno_mmap, eno_strat, eno_getc, eno_putc, 0 }, NO_CDEVICE, /*13*/ NO_CDEVICE, /*14*/ diff --git a/bsd/dev/i386/cons.c b/bsd/dev/i386/cons.c index b5e3e7289..dbd46a1cc 100644 --- a/bsd/dev/i386/cons.c +++ b/bsd/dev/i386/cons.c @@ -25,19 +25,27 @@ * * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ */ -/* - * Copyright (c) 1987, 1988 NeXT, Inc. - * - * HISTORY - * 7-Jan-93 Mac Gillon (mgillon) at NeXT - * Integrated POSIX support - * - * 12-Aug-87 John Seamons (jks) at NeXT - * Ported to NeXT. - */ /* - * Indirect driver for console. + * Indirect driver for console + * + * The purpose of this driver is to provide a device node indirection for + * the console device, which can be any tty class device. It does this by + * externalizing a global pointer "constty", which is then pointed at the + * console tty device. + * + * The default for this pointer is uninitialized; when it is NULL, we fall + * back to the "km" device, which is a tty BSD wrapper device for the + * Platform Expert console device. When it is non-NULL, we call through + * to the tty device device instead. + * + * The registration for this device node is static, and the devfs init + * code does not externalize a named device for it, to avoid software + * seeing the device and trying to open it. + * + * The upshot of this is that the console driver should not be set as your + * controlling tty, since you will get a reference to a device which does + * not have an actual device node in /dev, so its name cannot be looked up. */ #include #include @@ -47,246 +55,95 @@ #include #include -struct tty cons; struct tty *constty; /* current console device */ -int cnopen(__unused dev_t dev, int flag, int devtype, struct proc *pp); -int cnclose(__unused dev_t dev, int flag, int mode, struct proc *pp); +/* + * The km driver supplied the default console device for the systems + * (usually a raw frame buffer driver, but potentially a serial driver). + */ +extern struct tty *km_tty[1]; + +/* + * cdevsw[] entries for the console device driver + */ +int cnopen(__unused dev_t dev, int flag, int devtype, proc_t pp); +int cnclose(__unused dev_t dev, int flag, int mode, proc_t pp); int cnread(__unused dev_t dev, struct uio *uio, int ioflag); int cnwrite(__unused dev_t dev, struct uio *uio, int ioflag); -int cnioctl(__unused dev_t dev, int cmd, caddr_t addr, int flg, struct proc *p); -int cnselect(__unused dev_t dev, int flag, void * wql, struct proc *p); - -void slave_cnenable(void); - -int alert( - __unused int width, - __unused int height, - __unused const char *title, - const char *msg, - int p1, int p2, int p3, int p4, int p5, int p6, int p7, int p8); -int alert_done(void); +int cnioctl(__unused dev_t dev, u_long cmd, caddr_t addr, int flg, proc_t p); +int cnselect(__unused dev_t dev, int flag, void * wql, proc_t p); -/*ARGSUSED*/ -int -cnopen(__unused dev_t dev, int flag, int devtype, struct proc *pp) +static dev_t +cndev(void) { - dev_t device; - boolean_t funnel_state; - int error; - - funnel_state = thread_funnel_set(kernel_flock, TRUE); - if (constty) - device = constty->t_dev; + return constty->t_dev; else - device = cons.t_dev; - error = (*cdevsw[major(device)].d_open)(device, flag, devtype, pp); - thread_funnel_set(kernel_flock, funnel_state); - - return(error); + return km_tty[0]->t_dev; } -/*ARGSUSED*/ int -cnclose(__unused dev_t dev, int flag, int mode, struct proc *pp) +cnopen(__unused dev_t dev, int flag, int devtype, struct proc *pp) { - dev_t device; - boolean_t funnel_state; - int error; - - funnel_state = thread_funnel_set(kernel_flock, TRUE); - if (constty) - device = constty->t_dev; - else - device = cons.t_dev; - error = (*cdevsw[major(device)].d_close)(device, flag, mode, pp); - thread_funnel_set(kernel_flock, funnel_state); - - return(error); + dev = cndev(); + return ((*cdevsw[major(dev)].d_open)(dev, flag, devtype, pp)); +} +int +cnclose(__unused dev_t dev, int flag, int mode, struct proc *pp) +{ + dev = cndev(); + return ((*cdevsw[major(dev)].d_close)(dev, flag, mode, pp)); } -/*ARGSUSED*/ + int cnread(__unused dev_t dev, struct uio *uio, int ioflag) { - dev_t device; - boolean_t funnel_state; - int error; - - funnel_state = thread_funnel_set(kernel_flock, TRUE); - if (constty) - device = constty->t_dev; - else - device = cons.t_dev; - error = (*cdevsw[major(device)].d_read)(device, uio, ioflag); - thread_funnel_set(kernel_flock, funnel_state); - - return(error); + dev = cndev(); + return ((*cdevsw[major(dev)].d_read)(dev, uio, ioflag)); } -/*ARGSUSED*/ + int cnwrite(__unused dev_t dev, struct uio *uio, int ioflag) { - dev_t device; - boolean_t funnel_state; - int error; - - funnel_state = thread_funnel_set(kernel_flock, TRUE); - if (constty) - device = constty->t_dev; - else - device = cons.t_dev; - error = (*cdevsw[major(device)].d_write)(device, uio, ioflag); - thread_funnel_set(kernel_flock, funnel_state); - - return(error); + dev = cndev(); + return ((*cdevsw[major(dev)].d_write)(dev, uio, ioflag)); } -/*ARGSUSED*/ + int -cnioctl(__unused dev_t dev, int cmd, caddr_t addr, int flag, struct proc *p) +cnioctl(__unused dev_t dev, u_long cmd, caddr_t addr, int flag, struct proc *p) { - dev_t device; - boolean_t funnel_state; - int error; - - funnel_state = thread_funnel_set(kernel_flock, TRUE); - - if (constty) - device = constty->t_dev; - else - device = cons.t_dev; + dev = cndev(); +#if 0 /* * Superuser can always use this to wrest control of console * output from the "virtual" console. + * + * XXX Unfortunately, this code doesn't do what the author thougt + * XXX it did; use of the console device, a TIOCCONS would always + * XXX disassociate the console from a virtual terminal and send + * XXX it back to the fake tty. */ if ((unsigned) cmd == TIOCCONS && constty) { - error = proc_suser(p); - if (error) { - goto out; + int error = proc_suser(p); + if (!error) { + constty = NULL; } - constty = NULL; - error = 0; - goto out; + return(error); } - error = (*cdevsw[major(device)].d_ioctl)(device, cmd, addr, flag, p); -out: - thread_funnel_set(kernel_flock, funnel_state); +#endif /* 0 */ - return(error); + return ((*cdevsw[major(dev)].d_ioctl)(dev, cmd, addr, flag, p)); } -/*ARGSUSED*/ -/* called with funnel held */ -int -cnselect(__unused dev_t dev, int flag, void * wql, struct proc *p) -{ - dev_t device; - - if (constty) - device = constty->t_dev; - else - device = cons.t_dev; - return ((*cdevsw[major(device)].d_select)(device, flag, wql, p)); -} -#if 0 /* FIXME - using OSFMK console driver for the moment */ int -cngetc() +cnselect(__unused dev_t dev, int flag, void *wql, struct proc *p) { - dev_t device; - boolean_t funnel_state; - int error; - - funnel_state = thread_funnel_set(kernel_flock, TRUE); - if (constty) - device = constty->t_dev; - else - device = cons.t_dev; - error = (*cdevsw[major(device)].d_getc)(device); - thread_funnel_set(kernel_flock, funnel_state); - - return(error); + dev = cndev(); + return ((*cdevsw[major(dev)].d_select)(dev, flag, wql, p)); } - -/*ARGSUSED*/ -int -cnputc(c) - char c; -{ - dev_t device; - boolean_t funnel_state; - int error; - - funnel_state = thread_funnel_set(kernel_flock, TRUE); - if (constty) - device = constty->t_dev; - else - device = cons.t_dev; - error = (*cdevsw[major(device)].d_putc)(device, c); - thread_funnel_set(kernel_flock, funnel_state); - - return(error); -} -#endif - -void -slave_cnenable(void) -{ - /* FIXME: what to do here? */ -} - -#if 0 -void -kprintf( const char *format, ...) -{ - /* on PPC this outputs to the serial line */ - /* nop on intel ... umeshv@apple.com */ - -} -#endif - -/* - * Write message to console; create an alert panel if no text-type window - * currently exists. Caller must call alert_done() when finished. - * The height and width arguments are not used; they are provided for - * compatibility with the 68k version of alert(). - */ -int -alert( - __unused int width, - __unused int height, - __unused const char *title, - const char *msg, - int p1, - int p2, - int p3, - int p4, - int p5, - int p6, - int p7, - int p8) -{ - char smsg[200]; - - snprintf(smsg, sizeof(smsg), msg, p1, p2, p3, p4, p5, p6, p7, p8); -#if FIXME /* [ */ - /* DoAlert(title, smsg); */ -#else - printf("%s\n",smsg); -#endif /* FIXME ] */ - - return 0; -} - -int -alert_done(void) -{ - /* DoRestore(); */ - return 0; -} - diff --git a/bsd/dev/i386/dis_tables.c b/bsd/dev/i386/dis_tables.c index 08519b456..bfaa4bc79 100644 --- a/bsd/dev/i386/dis_tables.c +++ b/bsd/dev/i386/dis_tables.c @@ -1,4 +1,5 @@ /* + * * CDDL HEADER START * * The contents of this file are subject to the terms of the @@ -19,7 +20,7 @@ * CDDL HEADER END */ /* - * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Copyright 2008 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -27,8 +28,9 @@ /* All Rights Reserved */ -/* #pragma ident "@(#)dis_tables.c 1.13 06/06/15 SMI" */ - +/* + * #pragma ident "@(#)dis_tables.c 1.18 08/05/24 SMI" + */ #if !defined(__APPLE__) #include "dis_tables.h" #else @@ -36,6 +38,7 @@ #include #include + #endif /* __APPLE__ */ /* BEGIN CSTYLED */ @@ -63,7 +66,9 @@ #ifdef DIS_TEXT extern char *strncpy(char *, const char *, size_t); extern size_t strlen(const char *); +#if !defined(__APPLE__) extern int strcmp(const char *, const char *); +#endif /* __APPLE__ */ extern int strncmp(const char *, const char *, size_t); extern size_t strlcat(char *, const char *, size_t); #endif @@ -109,6 +114,7 @@ enum { MO, /* memory only (no registers) */ PREF, SWAPGS, + MONITOR_MWAIT, R, RA, SEG, @@ -155,6 +161,7 @@ enum { CWD, /* so data16 can be evaluated for cwd and variants */ RET, /* single immediate 16-bit operand */ MOVZ, /* for movs and movz, with different size operands */ + CRC32, /* for crc32, with different size operands */ XADDB, /* for xaddb */ MOVSXZ, /* AMD64 mov sign extend 32 to 64 bit instruction */ @@ -169,6 +176,7 @@ enum { MMOS, /* Prefixable MMX/SIMD-Int mm -> mm/mem */ MMOMS, /* Prefixable MMX/SIMD-Int mm -> mem */ MMOPM, /* MMX/SIMD-Int mm/mem -> mm,imm8 */ + MMOPM_66o, /* MMX/SIMD-Int 0x66 optional mm/mem -> mm,imm8 */ MMOPRM, /* Prefixable MMX/SIMD-Int r32/mem -> mm,imm8 */ MMOSH, /* Prefixable MMX mm,imm8 */ MM, /* MMX/SIMD-Int mm/mem -> mm */ @@ -183,12 +191,19 @@ enum { XMMOM, /* Prefixable SIMD xmm -> mem */ XMMOMS, /* Prefixable SIMD mem -> xmm */ XMM, /* SIMD xmm/mem -> xmm */ + XMM_66r, /* SIMD 0x66 prefix required xmm/mem -> xmm */ + XMM_66o, /* SIMD 0x66 prefix optional xmm/mem -> xmm */ XMMXIMPL, /* SIMD xmm -> xmm (mem) */ XMM3P, /* SIMD xmm -> r32,imm8 */ + XMM3PM_66r, /* SIMD 0x66 prefix required xmm -> r32/mem,imm8 */ XMMP, /* SIMD xmm/mem w/to xmm,imm8 */ + XMMP_66o, /* SIMD 0x66 prefix optional xmm/mem w/to xmm,imm8 */ + XMMP_66r, /* SIMD 0x66 prefix required xmm/mem w/to xmm,imm8 */ XMMPRM, /* SIMD r32/mem -> xmm,imm8 */ + XMMPRM_66r, /* SIMD 0x66 prefix required r32/mem -> xmm,imm8 */ XMMS, /* SIMD xmm -> xmm/mem */ XMMM, /* SIMD mem -> xmm */ + XMMM_66r, /* SIMD 0x66 prefix required mem -> xmm */ XMMMS, /* SIMD xmm -> mem */ XMM3MX, /* SIMD r32/mem -> xmm */ XMM3MXS, /* SIMD xmm -> r32/mem */ @@ -198,6 +213,8 @@ enum { XMMXMM, /* SIMD xmm/mem -> mm */ XMMMX, /* SIMD mm -> xmm */ XMMXM, /* SIMD xmm -> mm */ + XMMX2I, /* SIMD xmm -> xmm, imm, imm */ + XMM2I, /* SIMD xmm, imm, imm */ XMMFENCE, /* SIMD lfence or mfence */ XMMSFNC /* SIMD sfence (none or mem) */ }; @@ -454,7 +471,7 @@ const instable_t dis_op0F00[8] = { */ const instable_t dis_op0F01[8] = { -/* [0] */ TNSZ("sgdt",MO,6), TNSZ("sidt",MO,6), TNSZ("lgdt",MO,6), TNSZ("lidt",MO,6), +/* [0] */ TNSZ("sgdt",MO,6), TNSZ("sidt",MONITOR_MWAIT,6), TNSZ("lgdt",MO,6), TNSZ("lidt",MO,6), /* [4] */ TNSZ("smsw",M,2), INVALID, TNSZ("lmsw",M,2), TNS("invlpg",SWAPGS), }; @@ -591,7 +608,7 @@ const instable_t dis_opSIMDdata16[256] = { /* [70] */ TNSZ("pshufd",XMMP,16), INVALID, INVALID, INVALID, /* [74] */ TNSZ("pcmpeqb",XMM,16), TNSZ("pcmpeqw",XMM,16), TNSZ("pcmpeqd",XMM,16), INVALID, -/* [78] */ INVALID, INVALID, INVALID, INVALID, +/* [78] */ TNSZ("extrq",XMM2I,16), TNSZ("extrq",XMM,16), INVALID, INVALID, /* [7C] */ INVALID, INVALID, TNSZ("movd",XMM3MXS,4), TNSZ("movdqa",XMMS,16), /* [80] */ INVALID, INVALID, INVALID, INVALID, @@ -651,7 +668,7 @@ const instable_t dis_opSIMDrepnz[256] = { /* [20] */ INVALID, INVALID, INVALID, INVALID, /* [24] */ INVALID, INVALID, INVALID, INVALID, -/* [28] */ INVALID, INVALID, TNSZ("cvtsi2sd",XMM3MX,4),INVALID, +/* [28] */ INVALID, INVALID, TNSZ("cvtsi2sd",XMM3MX,4),TNSZ("movntsd",XMMMS,8), /* [2C] */ TNSZ("cvttsd2si",XMMXM3,8),TNSZ("cvtsd2si",XMMXM3,8),INVALID, INVALID, /* [30] */ INVALID, INVALID, INVALID, INVALID, @@ -676,7 +693,7 @@ const instable_t dis_opSIMDrepnz[256] = { /* [70] */ TNSZ("pshuflw",XMMP,16),INVALID, INVALID, INVALID, /* [74] */ INVALID, INVALID, INVALID, INVALID, -/* [78] */ INVALID, INVALID, INVALID, INVALID, +/* [78] */ TNSZ("insertq",XMMX2I,16),TNSZ("insertq",XMM,8),INVALID, INVALID, /* [7C] */ INVALID, INVALID, INVALID, INVALID, /* [80] */ INVALID, INVALID, INVALID, INVALID, @@ -736,7 +753,7 @@ const instable_t dis_opSIMDrepz[256] = { /* [20] */ INVALID, INVALID, INVALID, INVALID, /* [24] */ INVALID, INVALID, INVALID, INVALID, -/* [28] */ INVALID, INVALID, TNSZ("cvtsi2ss",XMM3MX,4),INVALID, +/* [28] */ INVALID, INVALID, TNSZ("cvtsi2ss",XMM3MX,4),TNSZ("movntss",XMMMS,4), /* [2C] */ TNSZ("cvttss2si",XMMXM3,4),TNSZ("cvtss2si",XMMXM3,4),INVALID, INVALID, /* [30] */ INVALID, INVALID, INVALID, INVALID, @@ -781,8 +798,8 @@ const instable_t dis_opSIMDrepz[256] = { /* [B0] */ INVALID, INVALID, INVALID, INVALID, /* [B4] */ INVALID, INVALID, INVALID, INVALID, -/* [B8] */ INVALID, INVALID, INVALID, INVALID, -/* [BC] */ INVALID, INVALID, INVALID, INVALID, +/* [B8] */ TS("popcnt",MRw), INVALID, INVALID, INVALID, +/* [BC] */ INVALID, TS("lzcnt",MRw), INVALID, INVALID, /* [C0] */ INVALID, INVALID, TNSZ("cmpss",XMMP,4), INVALID, /* [C4] */ INVALID, INVALID, INVALID, INVALID, @@ -805,6 +822,170 @@ const instable_t dis_opSIMDrepz[256] = { /* [FC] */ INVALID, INVALID, INVALID, INVALID, }; +const instable_t dis_op0F38[256] = { +/* [00] */ TNSZ("pshufb",XMM_66o,16),TNSZ("phaddw",XMM_66o,16),TNSZ("phaddd",XMM_66o,16),TNSZ("phaddsw",XMM_66o,16), +/* [04] */ TNSZ("pmaddubsw",XMM_66o,16),TNSZ("phsubw",XMM_66o,16), TNSZ("phsubd",XMM_66o,16),TNSZ("phsubsw",XMM_66o,16), +/* [08] */ TNSZ("psignb",XMM_66o,16),TNSZ("psignw",XMM_66o,16),TNSZ("psignd",XMM_66o,16),TNSZ("pmulhrsw",XMM_66o,16), +/* [0C] */ INVALID, INVALID, INVALID, INVALID, + +/* [10] */ TNSZ("pblendvb",XMM_66r,16),INVALID, INVALID, INVALID, +/* [14] */ TNSZ("blendvps",XMM_66r,16),TNSZ("blendvpd",XMM_66r,16),INVALID, TNSZ("ptest",XMM_66r,16), +/* [18] */ INVALID, INVALID, INVALID, INVALID, +/* [1C] */ TNSZ("pabsb",XMM_66o,16),TNSZ("pabsw",XMM_66o,16),TNSZ("pabsd",XMM_66o,16),INVALID, + +/* [20] */ TNSZ("pmovsxbw",XMM_66r,16),TNSZ("pmovsxbd",XMM_66r,16),TNSZ("pmovsxbq",XMM_66r,16),TNSZ("pmovsxwd",XMM_66r,16), +/* [24] */ TNSZ("pmovsxwq",XMM_66r,16),TNSZ("pmovsxdq",XMM_66r,16),INVALID, INVALID, +/* [28] */ TNSZ("pmuldq",XMM_66r,16),TNSZ("pcmpeqq",XMM_66r,16),TNSZ("movntdqa",XMMM_66r,16),TNSZ("packusdw",XMM_66r,16), +/* [2C] */ INVALID, INVALID, INVALID, INVALID, + +/* [30] */ TNSZ("pmovzxbw",XMM_66r,16),TNSZ("pmovzxbd",XMM_66r,16),TNSZ("pmovzxbq",XMM_66r,16),TNSZ("pmovzxwd",XMM_66r,16), +/* [34] */ TNSZ("pmovzxwq",XMM_66r,16),TNSZ("pmovzxdq",XMM_66r,16),INVALID, TNSZ("pcmpgtq",XMM_66r,16), +/* [38] */ TNSZ("pminsb",XMM_66r,16),TNSZ("pminsd",XMM_66r,16),TNSZ("pminuw",XMM_66r,16),TNSZ("pminud",XMM_66r,16), +/* [3C] */ TNSZ("pmaxsb",XMM_66r,16),TNSZ("pmaxsd",XMM_66r,16),TNSZ("pmaxuw",XMM_66r,16),TNSZ("pmaxud",XMM_66r,16), + +/* [40] */ TNSZ("pmulld",XMM_66r,16),TNSZ("phminposuw",XMM_66r,16),INVALID, INVALID, +/* [44] */ INVALID, INVALID, INVALID, INVALID, +/* [48] */ INVALID, INVALID, INVALID, INVALID, +/* [4C] */ INVALID, INVALID, INVALID, INVALID, + +/* [50] */ INVALID, INVALID, INVALID, INVALID, +/* [54] */ INVALID, INVALID, INVALID, INVALID, +/* [58] */ INVALID, INVALID, INVALID, INVALID, +/* [5C] */ INVALID, INVALID, INVALID, INVALID, + +/* [60] */ INVALID, INVALID, INVALID, INVALID, +/* [64] */ INVALID, INVALID, INVALID, INVALID, +/* [68] */ INVALID, INVALID, INVALID, INVALID, +/* [6C] */ INVALID, INVALID, INVALID, INVALID, + +/* [70] */ INVALID, INVALID, INVALID, INVALID, +/* [74] */ INVALID, INVALID, INVALID, INVALID, +/* [78] */ INVALID, INVALID, INVALID, INVALID, +/* [7C] */ INVALID, INVALID, INVALID, INVALID, + +/* [80] */ INVALID, INVALID, INVALID, INVALID, +/* [84] */ INVALID, INVALID, INVALID, INVALID, +/* [88] */ INVALID, INVALID, INVALID, INVALID, +/* [8C] */ INVALID, INVALID, INVALID, INVALID, + +/* [90] */ INVALID, INVALID, INVALID, INVALID, +/* [94] */ INVALID, INVALID, INVALID, INVALID, +/* [98] */ INVALID, INVALID, INVALID, INVALID, +/* [9C] */ INVALID, INVALID, INVALID, INVALID, + +/* [A0] */ INVALID, INVALID, INVALID, INVALID, +/* [A4] */ INVALID, INVALID, INVALID, INVALID, +/* [A8] */ INVALID, INVALID, INVALID, INVALID, +/* [AC] */ INVALID, INVALID, INVALID, INVALID, + +/* [B0] */ INVALID, INVALID, INVALID, INVALID, +/* [B4] */ INVALID, INVALID, INVALID, INVALID, +/* [B8] */ INVALID, INVALID, INVALID, INVALID, +/* [BC] */ INVALID, INVALID, INVALID, INVALID, + +/* [C0] */ INVALID, INVALID, INVALID, INVALID, +/* [C4] */ INVALID, INVALID, INVALID, INVALID, +/* [C8] */ INVALID, INVALID, INVALID, INVALID, +/* [CC] */ INVALID, INVALID, INVALID, INVALID, + +/* [D0] */ INVALID, INVALID, INVALID, INVALID, +/* [D4] */ INVALID, INVALID, INVALID, INVALID, +/* [D8] */ INVALID, INVALID, INVALID, INVALID, +/* [DC] */ INVALID, INVALID, INVALID, INVALID, + +/* [E0] */ INVALID, INVALID, INVALID, INVALID, +/* [E4] */ INVALID, INVALID, INVALID, INVALID, +/* [E8] */ INVALID, INVALID, INVALID, INVALID, +/* [EC] */ INVALID, INVALID, INVALID, INVALID, + +/* [F0] */ TNS("crc32b",CRC32), TS("crc32",CRC32), INVALID, INVALID, +/* [F4] */ INVALID, INVALID, INVALID, INVALID, +/* [F8] */ INVALID, INVALID, INVALID, INVALID, +/* [FC] */ INVALID, INVALID, INVALID, INVALID, +}; + +const instable_t dis_op0F3A[256] = { +/* [00] */ INVALID, INVALID, INVALID, INVALID, +/* [04] */ INVALID, INVALID, INVALID, INVALID, +/* [08] */ TNSZ("roundps",XMMP_66r,16),TNSZ("roundpd",XMMP_66r,16),TNSZ("roundss",XMMP_66r,16),TNSZ("roundsd",XMMP_66r,16), +/* [0C] */ TNSZ("blendps",XMMP_66r,16),TNSZ("blendpd",XMMP_66r,16),TNSZ("pblendw",XMMP_66r,16),TNSZ("palignr",XMMP_66o,16), + +/* [10] */ INVALID, INVALID, INVALID, INVALID, +/* [14] */ TNSZ("pextrb",XMM3PM_66r,8),TNSZ("pextrw",XMM3PM_66r,16),TSZ("pextr",XMM3PM_66r,16),TNSZ("extractps",XMM3PM_66r,16), +/* [18] */ INVALID, INVALID, INVALID, INVALID, +/* [1C] */ INVALID, INVALID, INVALID, INVALID, + +/* [20] */ TNSZ("pinsrb",XMMPRM_66r,8),TNSZ("insertps",XMMP_66r,16),TSZ("pinsr",XMMPRM_66r,16),INVALID, +/* [24] */ INVALID, INVALID, INVALID, INVALID, +/* [28] */ INVALID, INVALID, INVALID, INVALID, +/* [2C] */ INVALID, INVALID, INVALID, INVALID, + +/* [30] */ INVALID, INVALID, INVALID, INVALID, +/* [34] */ INVALID, INVALID, INVALID, INVALID, +/* [38] */ INVALID, INVALID, INVALID, INVALID, +/* [3C] */ INVALID, INVALID, INVALID, INVALID, + +/* [40] */ TNSZ("dpps",XMMP_66r,16),TNSZ("dppd",XMMP_66r,16),TNSZ("mpsadbw",XMMP_66r,16),INVALID, +/* [44] */ INVALID, INVALID, INVALID, INVALID, +/* [48] */ INVALID, INVALID, INVALID, INVALID, +/* [4C] */ INVALID, INVALID, INVALID, INVALID, + +/* [50] */ INVALID, INVALID, INVALID, INVALID, +/* [54] */ INVALID, INVALID, INVALID, INVALID, +/* [58] */ INVALID, INVALID, INVALID, INVALID, +/* [5C] */ INVALID, INVALID, INVALID, INVALID, + +/* [60] */ TNSZ("pcmpestrm",XMMP_66r,16),TNSZ("pcmpestri",XMMP_66r,16),TNSZ("pcmpistrm",XMMP_66r,16),TNSZ("pcmpistri",XMMP_66r,16), +/* [64] */ INVALID, INVALID, INVALID, INVALID, +/* [68] */ INVALID, INVALID, INVALID, INVALID, +/* [6C] */ INVALID, INVALID, INVALID, INVALID, + +/* [70] */ INVALID, INVALID, INVALID, INVALID, +/* [74] */ INVALID, INVALID, INVALID, INVALID, +/* [78] */ INVALID, INVALID, INVALID, INVALID, +/* [7C] */ INVALID, INVALID, INVALID, INVALID, + +/* [80] */ INVALID, INVALID, INVALID, INVALID, +/* [84] */ INVALID, INVALID, INVALID, INVALID, +/* [88] */ INVALID, INVALID, INVALID, INVALID, +/* [8C] */ INVALID, INVALID, INVALID, INVALID, + +/* [90] */ INVALID, INVALID, INVALID, INVALID, +/* [94] */ INVALID, INVALID, INVALID, INVALID, +/* [98] */ INVALID, INVALID, INVALID, INVALID, +/* [9C] */ INVALID, INVALID, INVALID, INVALID, + +/* [A0] */ INVALID, INVALID, INVALID, INVALID, +/* [A4] */ INVALID, INVALID, INVALID, INVALID, +/* [A8] */ INVALID, INVALID, INVALID, INVALID, +/* [AC] */ INVALID, INVALID, INVALID, INVALID, + +/* [B0] */ INVALID, INVALID, INVALID, INVALID, +/* [B4] */ INVALID, INVALID, INVALID, INVALID, +/* [B8] */ INVALID, INVALID, INVALID, INVALID, +/* [BC] */ INVALID, INVALID, INVALID, INVALID, + +/* [C0] */ INVALID, INVALID, INVALID, INVALID, +/* [C4] */ INVALID, INVALID, INVALID, INVALID, +/* [C8] */ INVALID, INVALID, INVALID, INVALID, +/* [CC] */ INVALID, INVALID, INVALID, INVALID, + +/* [D0] */ INVALID, INVALID, INVALID, INVALID, +/* [D4] */ INVALID, INVALID, INVALID, INVALID, +/* [D8] */ INVALID, INVALID, INVALID, INVALID, +/* [DC] */ INVALID, INVALID, INVALID, INVALID, + +/* [E0] */ INVALID, INVALID, INVALID, INVALID, +/* [E4] */ INVALID, INVALID, INVALID, INVALID, +/* [E8] */ INVALID, INVALID, INVALID, INVALID, +/* [EC] */ INVALID, INVALID, INVALID, INVALID, + +/* [F0] */ INVALID, INVALID, INVALID, INVALID, +/* [F4] */ INVALID, INVALID, INVALID, INVALID, +/* [F8] */ INVALID, INVALID, INVALID, INVALID, +/* [FC] */ INVALID, INVALID, INVALID, INVALID, +}; + /* * Decode table for 0x0F opcodes */ @@ -819,7 +1000,12 @@ const instable_t dis_op0F[16][16] = { /* [10] */ TNSZ("movups",XMMO,16), TNSZ("movups",XMMOS,16),TNSZ("movlps",XMMO,8), TNSZ("movlps",XMMOS,8), /* [14] */ TNSZ("unpcklps",XMMO,16),TNSZ("unpckhps",XMMO,16),TNSZ("movhps",XMMOM,8),TNSZ("movhps",XMMOMS,8), /* [18] */ IND(dis_op0F18), INVALID, INVALID, INVALID, +#if !defined(__APPLE__) +/* [1C] */ INVALID, INVALID, INVALID, INVALID, +#else +/* Need to handle multi-byte NOP */ /* [1C] */ INVALID, INVALID, INVALID, TS("nop",Mw), +#endif /* __APPLE __ */ }, { /* [20] */ TSy("mov",SREG), TSy("mov",SREG), TSy("mov",SREG), TSy("mov",SREG), /* [24] */ TSx("mov",SREG), INVALID, TSx("mov",SREG), INVALID, @@ -848,7 +1034,7 @@ const instable_t dis_op0F[16][16] = { }, { /* [70] */ TNSZ("pshufw",MMOPM,8), TNS("psrXXX",MR), TNS("psrXXX",MR), TNS("psrXXX",MR), /* [74] */ TNSZ("pcmpeqb",MMO,8), TNSZ("pcmpeqw",MMO,8), TNSZ("pcmpeqd",MMO,8), TNS("emms",NORM), -/* [78] */ INVALID, INVALID, INVALID, INVALID, +/* [78] */ TNS("INVALID",XMMO), TNS("INVALID",XMMO), INVALID, INVALID, /* [7C] */ INVALID, INVALID, TNSZ("movd",MMOS,4), TNSZ("movq",MMOS,8), }, { /* [80] */ TNS("jo",D), TNS("jno",D), TNS("jb",D), TNS("jae",D), @@ -868,7 +1054,7 @@ const instable_t dis_op0F[16][16] = { }, { /* [B0] */ TNS("cmpxchgb",RMw), TS("cmpxchg",RMw), TS("lss",MR), TS("btr",RMw), /* [B4] */ TS("lfs",MR), TS("lgs",MR), TS("movzb",MOVZ), TNS("movzwl",MOVZ), -/* [B8] */ INVALID, INVALID, IND(dis_op0FBA), TS("btc",RMw), +/* [B8] */ TNS("INVALID",MRw), INVALID, IND(dis_op0FBA), TS("btc",RMw), /* [BC] */ TS("bsf",MRw), TS("bsr",MRw), TS("movsb",MOVZ), TNS("movswl",MOVZ), }, { /* [C0] */ TNS("xaddb",XADDB), TS("xadd",RMw), TNSZ("cmpps",XMMOPM,16),TNS("movnti",RM), @@ -1150,14 +1336,30 @@ const instable_t dis_distable[16][16] = { /* [1,C] */ TNS("sbbb",IA), TS("sbb",IA), TSx("push",SEG), TSx("pop",SEG), }, { /* [2,0] */ TNS("andb",RMw), TS("and",RMw), TNS("andb",MRw), TS("and",MRw), +#if !defined(__APPLE__) /* [2,4] */ TNS("andb",IA), TS("and",IA), TNSx("%es:",OVERRIDE), TNSx("daa",NORM), +#else +/* [2,4] */ TNS("andb",IA), TS("and",IA), TNS("%es:",OVERRIDE), TNSx("daa",NORM), +#endif /* __APPLE__ */ /* [2,8] */ TNS("subb",RMw), TS("sub",RMw), TNS("subb",MRw), TS("sub",MRw), +#if !defined(__APPLE__) /* [2,C] */ TNS("subb",IA), TS("sub",IA), TNSx("%cs:",OVERRIDE), TNSx("das",NORM), +#else +/* [2,C] */ TNS("subb",IA), TS("sub",IA), TNS("%cs:",OVERRIDE), TNSx("das",NORM), +#endif /* __APPLE__ */ }, { /* [3,0] */ TNS("xorb",RMw), TS("xor",RMw), TNS("xorb",MRw), TS("xor",MRw), +#if !defined(__APPLE__) /* [3,4] */ TNS("xorb",IA), TS("xor",IA), TNSx("%ss:",OVERRIDE), TNSx("aaa",NORM), +#else +/* [3,4] */ TNS("xorb",IA), TS("xor",IA), TNS("%ss:",OVERRIDE), TNSx("aaa",NORM), +#endif /* __APPLE__ */ /* [3,8] */ TNS("cmpb",RMw), TS("cmp",RMw), TNS("cmpb",MRw), TS("cmp",MRw), +#if !defined(__APPLE__) /* [3,C] */ TNS("cmpb",IA), TS("cmp",IA), TNSx("%ds:",OVERRIDE), TNSx("aas",NORM), +#else +/* [3,C] */ TNS("cmpb",IA), TS("cmp",IA), TNS("%ds:",OVERRIDE), TNSx("aas",NORM), +#endif /* __APPLE__ */ }, { /* [4,0] */ TSx("inc",R), TSx("inc",R), TSx("inc",R), TSx("inc",R), /* [4,4] */ TSx("inc",R), TSx("inc",R), TSx("inc",R), TSx("inc",R), @@ -1243,9 +1445,6 @@ const instable_t dis_distable[16][16] = { #define REX_X 0x02 /* high order bit extension of SIB index field */ #define REX_B 0x01 /* extends ModRM r_m, SIB base, or opcode reg */ -static uint_t opnd_size; /* SIZE16, SIZE32 or SIZE64 */ -static uint_t addr_size; /* SIZE16, SIZE32 or SIZE64 */ - /* * Even in 64 bit mode, usually only 4 byte immediate operands are supported. */ @@ -1344,6 +1543,7 @@ dtrace_get_modrm(dis86_t *x, uint_t *mode, uint_t *reg, uint_t *r_m) static void dtrace_rex_adjust(uint_t rex_prefix, uint_t mode, uint_t *reg, uint_t *r_m) { +#pragma unused (mode) if (reg != NULL && r_m == NULL) { if (rex_prefix & REX_B) *reg += 8; @@ -1365,8 +1565,8 @@ dtrace_imm_opnd(dis86_t *x, int wbit, int size, int opindex) int byte; int valsize; - if (x->d86_numopnds < opindex + 1) - x->d86_numopnds = opindex + 1; + if (x->d86_numopnds < (uint_t)opindex + 1) + x->d86_numopnds = (uint_t)opindex + 1; switch (wbit) { case BYTE_OPND: @@ -1409,7 +1609,7 @@ dtrace_imm_opnd(dis86_t *x, int wbit, int size, int opindex) } /* Do sign extension */ if (x->d86_bytes[x->d86_len - 1] & 0x80) { - for (; i < sizeof (uint64_t); i++) + for (; i < (int)sizeof (uint64_t); i++) x->d86_opnd[opindex].d86_value |= (uint64_t)0xff << (i * 8); } @@ -1446,6 +1646,8 @@ dtrace_check_override(dis86_t *x, int opindex) (void) strlcat(x->d86_opnd[opindex].d86_prefix, x->d86_seg_prefix, PFIXLEN); } +#else + #pragma unused (opindex) #endif x->d86_seg_prefix = NULL; } @@ -1472,10 +1674,12 @@ dtrace_get_operand(dis86_t *x, uint_t mode, uint_t r_m, int wbit, int opindex) int dispsize; /* size of displacement in bytes */ #ifdef DIS_TEXT char *opnd = x->d86_opnd[opindex].d86_opnd; +#else + #pragma unused (wbit) #endif - if (x->d86_numopnds < opindex + 1) - x->d86_numopnds = opindex + 1; + if (x->d86_numopnds < (uint_t)opindex + 1) + x->d86_numopnds = (uint_t)opindex + 1; if (x->d86_error) return; @@ -1682,12 +1886,38 @@ dtrace_get_operand(dis86_t *x, uint_t mode, uint_t r_m, int wbit, int opindex) /* * Similar, but for 2 operands plus an immediate. + * vbit indicates direction + * 0 for "opcode imm, r, r_m" or + * 1 for "opcode imm, r_m, r" */ -#define THREEOPERAND(x, mode, reg, r_m, rex_prefix, wbit, w2, immsize) { \ +#define THREEOPERAND(x, mode, reg, r_m, rex_prefix, wbit, w2, immsize, vbit) { \ dtrace_get_modrm(x, &mode, ®, &r_m); \ dtrace_rex_adjust(rex_prefix, mode, ®, &r_m); \ - dtrace_get_operand(x, mode, r_m, wbit, 1); \ - dtrace_get_operand(x, REG_ONLY, reg, w2, 2); \ + dtrace_get_operand(x, mode, r_m, wbit, 2-vbit); \ + dtrace_get_operand(x, REG_ONLY, reg, w2, 1+vbit); \ + dtrace_imm_opnd(x, wbit, immsize, 0); \ +} + +/* + * Similar, but for 2 operands plus two immediates. + */ +#define FOUROPERAND(x, mode, reg, r_m, rex_prefix, wbit, w2, immsize) { \ + dtrace_get_modrm(x, &mode, ®, &r_m); \ + dtrace_rex_adjust(rex_prefix, mode, ®, &r_m); \ + dtrace_get_operand(x, mode, r_m, wbit, 2); \ + dtrace_get_operand(x, REG_ONLY, reg, w2, 3); \ + dtrace_imm_opnd(x, wbit, immsize, 1); \ + dtrace_imm_opnd(x, wbit, immsize, 0); \ +} + +/* + * 1 operands plus two immediates. + */ +#define ONEOPERAND_TWOIMM(x, mode, reg, r_m, rex_prefix, wbit, immsize) { \ + dtrace_get_modrm(x, &mode, ®, &r_m); \ + dtrace_rex_adjust(rex_prefix, mode, ®, &r_m); \ + dtrace_get_operand(x, mode, r_m, wbit, 2); \ + dtrace_imm_opnd(x, wbit, immsize, 1); \ dtrace_imm_opnd(x, wbit, immsize, 0); \ } @@ -1712,7 +1942,9 @@ dtrace_disx86(dis86_t *x, uint_t cpu_mode) #else #define NOMEM /* nothing */ #endif - uint_t wbit; /* opcode wbit, 0 is 8 bit, !0 for opnd_size */ + uint_t opnd_size; /* SIZE16, SIZE32 or SIZE64 */ + uint_t addr_size; /* SIZE16, SIZE32 or SIZE64 */ + uint_t wbit = 0; /* opcode wbit, 0 is 8 bit, !0 for opnd_size */ uint_t w2; /* wbit value for second operand */ uint_t vbit; uint_t mode = 0; /* mode value from ModRM byte */ @@ -1739,6 +1971,8 @@ dtrace_disx86(dis86_t *x, uint_t cpu_mode) uint_t rex_prefix = 0; /* amd64 register extension prefix */ size_t off; + instable_t dp_mmx; + x->d86_len = 0; x->d86_rmindex = -1; x->d86_error = 0; @@ -1746,7 +1980,7 @@ dtrace_disx86(dis86_t *x, uint_t cpu_mode) x->d86_numopnds = 0; x->d86_seg_prefix = NULL; x->d86_mnem[0] = 0; - for (i = 0; i < 3; ++i) { + for (i = 0; i < 4; ++i) { x->d86_opnd[i].d86_opnd[0] = 0; x->d86_opnd[i].d86_prefix[0] = 0; x->d86_opnd[i].d86_value_size = 0; @@ -1779,6 +2013,7 @@ dtrace_disx86(dis86_t *x, uint_t cpu_mode) x->d86_check_func != NULL && x->d86_check_func(x->d86_data)) { #ifdef DIS_TEXT (void) strncpy(x->d86_mnem, ".byte\t0", OPLEN); + x->d86_mnem[OPLEN - 1] = '\0'; #endif goto done; } @@ -1891,6 +2126,76 @@ dtrace_disx86(dis86_t *x, uint_t cpu_mode) dp = (instable_t *)&dis_op0F7123[opcode5][subcode]; } else if ((opcode4 == 0xc) && (opcode5 >= 0x8)) { dp = (instable_t *)&dis_op0FC8[0]; + } else if ((opcode4 == 0x3) && (opcode5 == 0xA)) { + opcode_bytes = 3; + if (dtrace_get_opcode(x, &opcode6, &opcode7) != 0) + goto error; + if (opnd_size == SIZE16) + opnd_size = SIZE32; + + dp = (instable_t *)&dis_op0F3A[(opcode6<<4)|opcode7]; +#ifdef DIS_TEXT + if (LIT_STRNEQL(dp->it_name, "INVALID")) + goto error; +#endif + switch (dp->it_adrmode) { + case XMMP_66r: + case XMMPRM_66r: + case XMM3PM_66r: + if (opnd_size_prefix == 0) { + goto error; + } + break; + case XMMP_66o: + if (opnd_size_prefix == 0) { + /* SSSE3 MMX instructions */ + dp_mmx = *dp; + dp = &dp_mmx; + dp->it_adrmode = MMOPM_66o; +#ifdef DIS_MEM + dp->it_size = 8; +#endif + } + break; + default: + goto error; + } + } else if ((opcode4 == 0x3) && (opcode5 == 0x8)) { + opcode_bytes = 3; + if (dtrace_get_opcode(x, &opcode6, &opcode7) != 0) + goto error; + dp = (instable_t *)&dis_op0F38[(opcode6<<4)|opcode7]; +#ifdef DIS_TEXT + if (LIT_STRNEQL(dp->it_name, "INVALID")) + goto error; +#endif + switch (dp->it_adrmode) { + case XMM_66r: + case XMMM_66r: + if (opnd_size_prefix == 0) { + goto error; + } + break; + case XMM_66o: + if (opnd_size_prefix == 0) { + /* SSSE3 MMX instructions */ + dp_mmx = *dp; + dp = &dp_mmx; + dp->it_adrmode = MM; +#ifdef DIS_MEM + dp->it_size = 8; +#endif + } + break; + case CRC32: + if (rep_prefix != 0xF2) { + goto error; + } + rep_prefix = 0; + break; + default: + goto error; + } } else { dp = (instable_t *)&dis_op0F[opcode4][opcode5]; } @@ -1940,8 +2245,8 @@ dtrace_disx86(dis86_t *x, uint_t cpu_mode) /* * at this point we should have a correct (or invalid) opcode */ - if (cpu_mode == SIZE64 && dp->it_invalid64 || - cpu_mode != SIZE64 && dp->it_invalid32) + if ((cpu_mode == SIZE64 && dp->it_invalid64) || + (cpu_mode != SIZE64 && dp->it_invalid32)) goto error; if (dp->it_indirect != TERM) goto error; @@ -2033,6 +2338,27 @@ dtrace_disx86(dis86_t *x, uint_t cpu_mode) opnd_size = SIZE32; } break; + case MRw: + if (rep_prefix) { + if (rep_prefix == 0xf3) { + + /* + * Calculate our offset in dis_op0F + */ + if ((uintptr_t)dp - (uintptr_t)dis_op0F + > sizeof (dis_op0F)) + goto error; + + off = ((uintptr_t)dp - (uintptr_t)dis_op0F) / + sizeof (instable_t); + + dp = (instable_t *)&dis_opSIMDrepz[off]; + rep_prefix = 0; + } else { + goto error; + } + } + break; } /* @@ -2061,7 +2387,7 @@ dtrace_disx86(dis86_t *x, uint_t cpu_mode) if (dp->it_adrmode != CBW && dp->it_adrmode != CWD && dp->it_adrmode != XMMSFNC) { - if (strcmp(dp->it_name, "INVALID") == 0) + if (LIT_STRNEQL(dp->it_name, "INVALID")) goto error; (void) strlcat(x->d86_mnem, dp->it_name, OPLEN); if (dp->it_suffix) { @@ -2073,6 +2399,13 @@ dtrace_disx86(dis86_t *x, uint_t cpu_mode) break; } x->d86_mnem[i - 1] = *types[opnd_size]; + } else if ((opnd_size == 2) && (opcode_bytes == 3) && + ((opcode6 == 1 && opcode7 == 6) || + (opcode6 == 2 && opcode7 == 2))) { + /* + * To handle PINSRD and PEXTRD + */ + (void) strlcat(x->d86_mnem, "d", OPLEN); } else { (void) strlcat(x->d86_mnem, types[opnd_size], OPLEN); @@ -2096,8 +2429,10 @@ dtrace_disx86(dis86_t *x, uint_t cpu_mode) */ case MOVSXZ: #ifdef DIS_TEXT - if (rex_prefix == 0) + if (rex_prefix == 0) { (void) strncpy(x->d86_mnem, "movzld", OPLEN); + x->d86_mnem[OPLEN - 1] = '\0'; + } #endif dtrace_get_modrm(x, &mode, ®, &r_m); dtrace_rex_adjust(rex_prefix, mode, ®, &r_m); @@ -2126,6 +2461,20 @@ dtrace_disx86(dis86_t *x, uint_t cpu_mode) wbit = WBIT(opcode5); dtrace_get_operand(x, mode, r_m, wbit, 0); break; + case CRC32: + opnd_size = SIZE32; + if (rex_prefix & REX_W) + opnd_size = SIZE64; + x->d86_opnd_size = opnd_size; + + dtrace_get_modrm(x, &mode, ®, &r_m); + dtrace_rex_adjust(rex_prefix, mode, ®, &r_m); + dtrace_get_operand(x, REG_ONLY, reg, LONG_OPND, 1); + wbit = WBIT(opcode7); + if (opnd_size_prefix) + x->d86_opnd_size = opnd_size = SIZE16; + dtrace_get_operand(x, mode, r_m, wbit, 0); + break; /* * imul instruction, with either 8-bit or longer immediate @@ -2134,7 +2483,7 @@ dtrace_disx86(dis86_t *x, uint_t cpu_mode) case IMUL: wbit = LONG_OPND; THREEOPERAND(x, mode, reg, r_m, rex_prefix, wbit, LONG_OPND, - OPSIZE(opnd_size, opcode2 == 0x9)); + OPSIZE(opnd_size, opcode2 == 0x9), 1); break; /* memory or register operand to register, with 'w' bit */ @@ -2163,7 +2512,7 @@ dtrace_disx86(dis86_t *x, uint_t cpu_mode) case MMS: case MMOS: #ifdef DIS_TEXT - wbit = strcmp(dp->it_name, "movd") ? MM_OPND : LONG_OPND; + wbit = !LIT_STRNEQL(dp->it_name, "movd") ? MM_OPND : LONG_OPND; #else wbit = LONG_OPND; #endif @@ -2325,6 +2674,7 @@ dtrace_disx86(dis86_t *x, uint_t cpu_mode) if (cpu_mode == SIZE64 && mode == 3 && r_m == 0) { #ifdef DIS_TEXT (void) strncpy(x->d86_mnem, "swapgs", OPLEN); + x->d86_mnem[OPLEN - 1] = '\0'; #endif NOMEM; break; @@ -2346,6 +2696,28 @@ dtrace_disx86(dis86_t *x, uint_t cpu_mode) wbit = BYTE_OPND; goto just_mem; + case MONITOR_MWAIT: + if (mode == 3) { + if (r_m == 0) { +#ifdef DIS_TEXT + (void) strncpy(x->d86_mnem, "monitor", OPLEN); + x->d86_mnem[OPLEN - 1] = '\0'; +#endif + NOMEM; + break; + } else if (r_m == 1) { +#ifdef DIS_TEXT + (void) strncpy(x->d86_mnem, "mwait", OPLEN); + x->d86_mnem[OPLEN - 1] = '\0'; +#endif + NOMEM; + break; + } else { + goto error; + } + } + /*FALLTHROUGH*/ + case MO: /* Similar to M, but only memory (no direct registers) */ wbit = LONG_OPND; @@ -2451,7 +2823,7 @@ dtrace_disx86(dis86_t *x, uint_t cpu_mode) case MM: case MMO: #ifdef DIS_TEXT - wbit = strcmp(dp->it_name, "movd") ? MM_OPND : LONG_OPND; + wbit = !LIT_STRNEQL(dp->it_name, "movd") ? MM_OPND : LONG_OPND; #else wbit = LONG_OPND; #endif @@ -2460,7 +2832,7 @@ dtrace_disx86(dis86_t *x, uint_t cpu_mode) case MMOIMPL: #ifdef DIS_TEXT - wbit = strcmp(dp->it_name, "movd") ? MM_OPND : LONG_OPND; + wbit = !LIT_STRNEQL(dp->it_name, "movd") ? MM_OPND : LONG_OPND; #else wbit = LONG_OPND; #endif @@ -2485,24 +2857,32 @@ dtrace_disx86(dis86_t *x, uint_t cpu_mode) if (mode != REG_ONLY) goto error; - THREEOPERAND(x, mode, reg, r_m, rex_prefix, wbit, LONG_OPND, 1); + THREEOPERAND(x, mode, reg, r_m, rex_prefix, wbit, LONG_OPND, 1, + 1); NOMEM; break; + case XMM3PM_66r: + THREEOPERAND(x, mode, reg, r_m, rex_prefix, LONG_OPND, XMM_OPND, + 1, 0); + break; + /* MMX/SIMD-Int predicated r32/mem to mm reg */ case MMOPRM: wbit = LONG_OPND; w2 = MM_OPND; goto xmmprm; case XMMPRM: + case XMMPRM_66r: wbit = LONG_OPND; w2 = XMM_OPND; xmmprm: - THREEOPERAND(x, mode, reg, r_m, rex_prefix, wbit, w2, 1); + THREEOPERAND(x, mode, reg, r_m, rex_prefix, wbit, w2, 1, 1); break; /* MMX/SIMD-Int predicated mm/mem to mm reg */ case MMOPM: + case MMOPM_66o: wbit = w2 = MM_OPND; goto xmmprm; @@ -2518,6 +2898,8 @@ dtrace_disx86(dis86_t *x, uint_t cpu_mode) /* SIMD memory or xmm reg operand to xmm reg */ case XMM: + case XMM_66o: + case XMM_66r: case XMMO: case XMMXIMPL: wbit = XMM_OPND; @@ -2533,10 +2915,13 @@ dtrace_disx86(dis86_t *x, uint_t cpu_mode) * movhps and movlhps behave similarly. */ if (mode == REG_ONLY) { - if (strcmp(dp->it_name, "movlps") == 0) + if (LIT_STRNEQL(dp->it_name, "movlps")) (void) strncpy(x->d86_mnem, "movhlps", OPLEN); - else if (strcmp(dp->it_name, "movhps") == 0) + x->d86_mnem[OPLEN - 1] = '\0'; + } else if (LIT_STRNEQL(dp->it_name, "movhps")) { (void) strncpy(x->d86_mnem, "movlhps", OPLEN); + x->d86_mnem[OPLEN - 1] = '\0'; + } } #endif if (dp->it_adrmode == XMMXIMPL) @@ -2550,9 +2935,9 @@ dtrace_disx86(dis86_t *x, uint_t cpu_mode) case XMMOMS: dtrace_get_modrm(x, &mode, ®, &r_m); #ifdef DIS_TEXT - if ((strcmp(dp->it_name, "movlps") == 0 || - strcmp(dp->it_name, "movhps") == 0 || - strcmp(dp->it_name, "movntps") == 0) && + if ((LIT_STRNEQL(dp->it_name, "movlps") || + LIT_STRNEQL(dp->it_name, "movhps") || + LIT_STRNEQL(dp->it_name, "movntps")) && mode == REG_ONLY) goto error; #endif @@ -2562,14 +2947,16 @@ dtrace_disx86(dis86_t *x, uint_t cpu_mode) /* SIMD memory to xmm reg */ case XMMM: + case XMMM_66r: case XMMOM: wbit = XMM_OPND; dtrace_get_modrm(x, &mode, ®, &r_m); #ifdef DIS_TEXT if (mode == REG_ONLY) { - if (strcmp(dp->it_name, "movhps") == 0) + if (LIT_STRNEQL(dp->it_name, "movhps")) { (void) strncpy(x->d86_mnem, "movlhps", OPLEN); - else + x->d86_mnem[OPLEN - 1] = '\0'; + } else goto error; } #endif @@ -2624,9 +3011,12 @@ dtrace_disx86(dis86_t *x, uint_t cpu_mode) /* SIMD predicated memory or xmm reg with/to xmm reg */ case XMMP: + case XMMP_66r: + case XMMP_66o: case XMMOPM: wbit = XMM_OPND; - THREEOPERAND(x, mode, reg, r_m, rex_prefix, wbit, XMM_OPND, 1); + THREEOPERAND(x, mode, reg, r_m, rex_prefix, wbit, XMM_OPND, 1, + 1); #ifdef DIS_TEXT /* @@ -2645,6 +3035,7 @@ dtrace_disx86(dis86_t *x, uint_t cpu_mode) goto error; (void) strncpy(x->d86_mnem, "cmp", OPLEN); + x->d86_mnem[OPLEN - 1] = '\0'; (void) strlcat(x->d86_mnem, dis_PREDSUFFIX[pred], OPLEN); (void) strlcat(x->d86_mnem, @@ -2657,6 +3048,17 @@ dtrace_disx86(dis86_t *x, uint_t cpu_mode) #endif break; + case XMMX2I: + FOUROPERAND(x, mode, reg, r_m, rex_prefix, XMM_OPND, XMM_OPND, + 1); + NOMEM; + break; + + case XMM2I: + ONEOPERAND_TWOIMM(x, mode, reg, r_m, rex_prefix, XMM_OPND, 1); + NOMEM; + break; + /* immediate operand to accumulator */ case IA: wbit = WBIT(opcode2); @@ -3270,8 +3672,8 @@ dtrace_disx86_str(dis86_t *dis, uint_t mode, uint64_t pc, char *buf, lookup = dis->d86_sym_lookup; if (tgt != 0) { - /* Print symbol, if found, for tgt */ - if (lookup(dis->d86_data, tgt, NULL, 0) == 0) { + if ((dis->d86_flags & DIS_F_NOIMMSYM) == 0 && + lookup(dis->d86_data, tgt, NULL, 0) == 0) { (void) strlcat(buf, "\t<", buflen); curlen = strlen(buf); lookup(dis->d86_data, tgt, buf + curlen, diff --git a/bsd/dev/i386/dtrace_isa.c b/bsd/dev/i386/dtrace_isa.c index 304532c6b..65749f9df 100644 --- a/bsd/dev/i386/dtrace_isa.c +++ b/bsd/dev/i386/dtrace_isa.c @@ -49,18 +49,48 @@ typedef x86_saved_state_t savearea_t; #include #include +/* + * APPLE NOTE: The regmap is used to decode which 64bit uregs[] register + * is being accessed when passed the 32bit uregs[] constant (based on + * the reg.d translator file). The dtrace_getreg() is smart enough to handle + * the register mappings. The register set definitions are the same as + * those used by the fasttrap_getreg code. + */ +#include "fasttrap_regset.h" +static const uint8_t regmap[19] = { + REG_GS, /* GS */ + REG_FS, /* FS */ + REG_ES, /* ES */ + REG_DS, /* DS */ + REG_RDI, /* EDI */ + REG_RSI, /* ESI */ + REG_RBP, /* EBP, REG_FP */ + REG_RSP, /* ESP */ + REG_RBX, /* EBX */ + REG_RDX, /* EDX, REG_R1 */ + REG_RCX, /* ECX */ + REG_RAX, /* EAX, REG_R0 */ + REG_TRAPNO, /* TRAPNO */ + REG_ERR, /* ERR */ + REG_RIP, /* EIP, REG_PC */ + REG_CS, /* CS */ + REG_RFL, /* EFL, REG_PS */ + REG_RSP, /* UESP, REG_SP */ + REG_SS /* SS */ +}; + extern dtrace_id_t dtrace_probeid_error; /* special ERROR probe */ void dtrace_probe_error(dtrace_state_t *state, dtrace_epid_t epid, int which, - int fault, int fltoffs, uint64_t illval) + int fltoffs, int fault, uint64_t illval) { /* * For the case of the error probe firing lets * stash away "illval" here, and special-case retrieving it in DIF_VARIABLE_ARG. */ state->dts_arg_error_illval = illval; - dtrace_probe( dtrace_probeid_error, (uint64_t)(uintptr_t)state, epid, which, fault, fltoffs ); + dtrace_probe( dtrace_probeid_error, (uint64_t)(uintptr_t)state, epid, which, fltoffs, fault ); } /* @@ -135,26 +165,77 @@ dtrace_xcall(processorid_t cpu, dtrace_xcall_t f, void *arg) /* * Runtime and ABI */ -extern greg_t -dtrace_getfp(void) -{ - return (greg_t)__builtin_frame_address(0); -} uint64_t dtrace_getreg(struct regs *savearea, uint_t reg) { boolean_t is64Bit = proc_is64bit(current_proc()); x86_saved_state_t *regs = (x86_saved_state_t *)savearea; - + if (is64Bit) { - /* beyond register SS */ - if (reg > x86_SAVED_STATE64_COUNT - 1) { - DTRACE_CPUFLAG_SET(CPU_DTRACE_ILLOP); - return (0); - } - return ((uint64_t *)(&(regs->ss_64.gs)))[reg]; - } else { + if (reg <= SS) { + reg = regmap[reg]; + } else { + reg -= (SS + 1); + } + + switch (reg) { + case REG_RDI: + return (uint64_t)(regs->ss_64.rdi); + case REG_RSI: + return (uint64_t)(regs->ss_64.rsi); + case REG_RDX: + return (uint64_t)(regs->ss_64.rdx); + case REG_RCX: + return (uint64_t)(regs->ss_64.rcx); + case REG_R8: + return (uint64_t)(regs->ss_64.r8); + case REG_R9: + return (uint64_t)(regs->ss_64.r9); + case REG_RAX: + return (uint64_t)(regs->ss_64.rax); + case REG_RBX: + return (uint64_t)(regs->ss_64.rbx); + case REG_RBP: + return (uint64_t)(regs->ss_64.rbp); + case REG_R10: + return (uint64_t)(regs->ss_64.r10); + case REG_R11: + return (uint64_t)(regs->ss_64.r11); + case REG_R12: + return (uint64_t)(regs->ss_64.r12); + case REG_R13: + return (uint64_t)(regs->ss_64.r13); + case REG_R14: + return (uint64_t)(regs->ss_64.r14); + case REG_R15: + return (uint64_t)(regs->ss_64.r15); + case REG_FS: + return (uint64_t)(regs->ss_64.fs); + case REG_GS: + return (uint64_t)(regs->ss_64.gs); + case REG_TRAPNO: + return (uint64_t)(regs->ss_64.isf.trapno); + case REG_ERR: + return (uint64_t)(regs->ss_64.isf.err); + case REG_RIP: + return (uint64_t)(regs->ss_64.isf.rip); + case REG_CS: + return (uint64_t)(regs->ss_64.isf.cs); + case REG_SS: + return (uint64_t)(regs->ss_64.isf.ss); + case REG_RFL: + return (uint64_t)(regs->ss_64.isf.rflags); + case REG_RSP: + return (uint64_t)(regs->ss_64.isf.rsp); + case REG_DS: + case REG_ES: + default: + DTRACE_CPUFLAG_SET(CPU_DTRACE_ILLOP); + return (0); + } + + } else { /* is 32bit user */ /* beyond register SS */ if (reg > x86_SAVED_STATE32_COUNT - 1) { DTRACE_CPUFLAG_SET(CPU_DTRACE_ILLOP); @@ -162,7 +243,6 @@ dtrace_getreg(struct regs *savearea, uint_t reg) } return (uint64_t)((unsigned int *)(&(regs->ss_32.gs)))[reg]; } - } #define RETURN_OFFSET 4 @@ -254,6 +334,69 @@ dtrace_getustack_common(uint64_t *pcstack, int pcstack_limit, user_addr_t pc, return (ret); } + +/* + * The return value indicates if we've modified the stack. + */ +static int +dtrace_adjust_stack(uint64_t **pcstack, int *pcstack_limit, user_addr_t *pc, + user_addr_t sp) +{ + int64_t missing_tos; + int rc = 0; + boolean_t is64Bit = proc_is64bit(current_proc()); + + ASSERT(pc != NULL); + + if (DTRACE_CPUFLAG_ISSET(CPU_DTRACE_ENTRY)) { + /* + * If we found ourselves in an entry probe, the frame pointer has not + * yet been pushed (that happens in the + * function prologue). The best approach is to + * add the current pc as a missing top of stack, + * and back the pc up to the caller, which is stored at the + * current stack pointer address since the call + * instruction puts it there right before + * the branch. + */ + + missing_tos = *pc; + + if (is64Bit) + *pc = dtrace_fuword64(sp); + else + *pc = dtrace_fuword32(sp); + } else { + /* + * We might have a top of stack override, in which case we just + * add that frame without question to the top. This + * happens in return probes where you have a valid + * frame pointer, but it's for the callers frame + * and you'd like to add the pc of the return site + * to the frame. + */ + missing_tos = cpu_core[CPU->cpu_id].cpuc_missing_tos; + } + + if (missing_tos != 0) { + if (pcstack != NULL && pcstack_limit != NULL) { + /* + * If the missing top of stack has been filled out, then + * we add it and adjust the size. + */ + *(*pcstack)++ = missing_tos; + (*pcstack_limit)--; + } + /* + * return 1 because we would have changed the + * stack whether or not it was passed in. This + * ensures the stack count is correct + */ + rc = 1; + } + return rc; +} + void dtrace_getupcstack(uint64_t *pcstack, int pcstack_limit) { @@ -297,17 +440,15 @@ dtrace_getupcstack(uint64_t *pcstack, int pcstack_limit) fp = regs->ss_32.ebp; } - if (DTRACE_CPUFLAG_ISSET(CPU_DTRACE_ENTRY)) { - *pcstack++ = (uint64_t)pc; - pcstack_limit--; - if (pcstack_limit <= 0) - return; + /* + * The return value indicates if we've modified the stack. + * Since there is nothing else to fix up in either case, + * we can safely ignore it here. + */ + (void)dtrace_adjust_stack(&pcstack, &pcstack_limit, &pc, sp); - if (is64Bit) - pc = dtrace_fuword64(sp); - else - pc = dtrace_fuword32(sp); - } + if(pcstack_limit <= 0) + return; /* * Note that unlike ppc, the x86 code does not use @@ -356,15 +497,17 @@ dtrace_getustackdepth(void) fp = regs->ss_32.ebp; } - if (DTRACE_CPUFLAG_ISSET(CPU_DTRACE_ENTRY)) { - n++; - - if (is64Bit) - pc = dtrace_fuword64(sp); - else - pc = dtrace_fuword32(sp); + if (dtrace_adjust_stack(NULL, NULL, &pc, sp) == 1) { + /* + * we would have adjusted the stack if we had + * supplied one (that is what rc == 1 means). + * Also, as a side effect, the pc might have + * been fixed up, which is good for calling + * in to dtrace_getustack_common. + */ + n++; } - + /* * Note that unlike ppc, the x86 code does not use * CPU_DTRACE_USTACK_FP. This is because x86 always @@ -428,17 +571,13 @@ dtrace_getufpstack(uint64_t *pcstack, uint64_t *fpstack, int pcstack_limit) } #endif - if (DTRACE_CPUFLAG_ISSET(CPU_DTRACE_ENTRY)) { - *pcstack++ = (uint64_t)pc; - *fpstack++ = 0; - pcstack_limit--; - if (pcstack_limit <= 0) - return; - - if (is64Bit) - pc = dtrace_fuword64(sp); - else - pc = dtrace_fuword32(sp); + if(dtrace_adjust_stack(&pcstack, &pcstack_limit, &pc, sp) == 1) { + /* + * we made a change. + */ + *fpstack++ = 0; + if (pcstack_limit <= 0) + return; } while (pc != 0) { @@ -505,7 +644,7 @@ void dtrace_getpcstack(pc_t *pcstack, int pcstack_limit, int aframes, uint32_t *intrpc) { - struct frame *fp = (struct frame *)dtrace_getfp(); + struct frame *fp = (struct frame *)__builtin_frame_address(0); struct frame *nextfp, *minfp, *stacktop; int depth = 0; int last = 0; @@ -516,7 +655,7 @@ dtrace_getpcstack(pc_t *pcstack, int pcstack_limit, int aframes, if ((on_intr = CPU_ON_INTR(CPU)) != 0) stacktop = (struct frame *)dtrace_get_cpu_int_stack_top(); else - stacktop = (struct frame *)(dtrace_get_kernel_stack(current_thread()) + KERNEL_STACK_SIZE); + stacktop = (struct frame *)(dtrace_get_kernel_stack(current_thread()) + kernel_stack_size); minfp = fp; @@ -527,7 +666,11 @@ dtrace_getpcstack(pc_t *pcstack, int pcstack_limit, int aframes, while (depth < pcstack_limit) { nextfp = *(struct frame **)fp; - pc = *(uintptr_t *)(((uint32_t)fp) + RETURN_OFFSET); +#if defined(__x86_64__) + pc = *(uintptr_t *)(((uintptr_t)fp) + RETURN_OFFSET64); +#else + pc = *(uintptr_t *)(((uintptr_t)fp) + RETURN_OFFSET); +#endif if (nextfp <= minfp || nextfp >= stacktop) { if (on_intr) { @@ -537,7 +680,7 @@ dtrace_getpcstack(pc_t *pcstack, int pcstack_limit, int aframes, vm_offset_t kstack_base = dtrace_get_kernel_stack(current_thread()); minfp = (struct frame *)kstack_base; - stacktop = (struct frame *)(kstack_base + KERNEL_STACK_SIZE); + stacktop = (struct frame *)(kstack_base + kernel_stack_size); on_intr = 0; continue; @@ -585,16 +728,26 @@ uint64_t dtrace_getarg(int arg, int aframes) { uint64_t val; - struct frame *fp = (struct frame *)dtrace_getfp(); + struct frame *fp = (struct frame *)__builtin_frame_address(0); uintptr_t *stack; uintptr_t pc; int i; + +#if defined(__x86_64__) + /* + * A total of 6 arguments are passed via registers; any argument with + * index of 5 or lower is therefore in a register. + */ + int inreg = 5; +#endif + for (i = 1; i <= aframes; i++) { fp = fp->backchain; pc = fp->retaddr; if (pc == (uintptr_t)dtrace_invop_callsite) { +#if defined(__i386__) /* * If we pass through the invalid op handler, we will * use the pointer that it passed to the stack as the @@ -605,19 +758,62 @@ dtrace_getarg(int arg, int aframes) stack = (uintptr_t *)&fp[1]; /* Find marshalled arguments */ fp = (struct frame *)stack[1]; /* Grab *second* argument */ stack = (uintptr_t *)&fp[1]; /* Find marshalled arguments */ - DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT); - val = (uint64_t)(stack[arg]); - DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT); - return val; +#elif defined(__x86_64__) + /* + * In the case of x86_64, we will use the pointer to the + * save area structure that was pushed when we took the + * trap. To get this structure, we must increment + * beyond the frame structure. If the + * argument that we're seeking is passed on the stack, + * we'll pull the true stack pointer out of the saved + * registers and decrement our argument by the number + * of arguments passed in registers; if the argument + * we're seeking is passed in regsiters, we can just + * load it directly. + */ + + /* fp points to frame of dtrace_invop() activation. */ + fp = fp->backchain; /* to fbt_perfcallback() activation. */ + fp = fp->backchain; /* to kernel_trap() activation. */ + fp = fp->backchain; /* to trap_from_kernel() activation. */ + + x86_saved_state_t *tagged_regs = (x86_saved_state_t *)&fp[1]; + x86_saved_state64_t *saved_state = saved_state64(tagged_regs); + + if (arg <= inreg) { + stack = (uintptr_t *)&saved_state->rdi; + } else { + stack = (uintptr_t *)(saved_state->isf.rsp); + arg -= inreg; + } +#else +#error Unknown arch +#endif + goto load; } } /* * Arrive here when provider has called dtrace_probe directly. */ + arg++; /* Advance past probeID */ + +#if defined(__x86_64__) + if (arg <= inreg) { + /* + * This shouldn't happen. If the argument is passed in a + * register then it should have been, well, passed in a + * register... + */ + DTRACE_CPUFLAG_SET(CPU_DTRACE_ILLOP); + return (0); + } + + arg -= (inreg + 1); +#endif stack = (uintptr_t *)&fp[1]; /* Find marshalled arguments */ - stack++; /* Advance past probeID */ +load: DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT); val = *(((uint64_t *)stack) + arg); /* dtrace_probe arguments arg0 .. arg4 are 64bits wide */ DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT); @@ -635,15 +831,8 @@ dtrace_toxic_ranges(void (*func)(uintptr_t base, uintptr_t limit)) * "base" is the smallest toxic address in the range, "limit" is the first * VALID address greater than "base". */ - func(0x0, VM_MIN_KERNEL_ADDRESS); - func(VM_MAX_KERNEL_ADDRESS + 1, ~(uintptr_t)0); -} - -extern boolean_t pmap_valid_page(ppnum_t pn); - -boolean_t -dtxnu_is_RAM_page(ppnum_t pn) -{ - return pmap_valid_page(pn); + func(0x0, VM_MIN_KERNEL_AND_KEXT_ADDRESS); + if (VM_MAX_KERNEL_ADDRESS < ~(uintptr_t)0) + func(VM_MAX_KERNEL_ADDRESS + 1, ~(uintptr_t)0); } diff --git a/bsd/dev/i386/dtrace_subr_x86.c b/bsd/dev/i386/dtrace_subr_x86.c index ae29f8416..c4ab38a31 100644 --- a/bsd/dev/i386/dtrace_subr_x86.c +++ b/bsd/dev/i386/dtrace_subr_x86.c @@ -20,12 +20,12 @@ */ /* - * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ /* - * #pragma ident "@(#)dtrace_subr.c 1.13 06/06/12 SMI" + * #pragma ident "@(#)dtrace_subr.c 1.16 07/09/18 SMI" */ #include @@ -154,7 +154,7 @@ dtrace_user_probe(x86_saved_state_t *regs) return KERN_SUCCESS; } else if (trapno == T_INT3) { - uint8_t instr; + uint8_t instr, instr2; rwp = &CPU->cpu_ft_lock; /* @@ -189,7 +189,8 @@ dtrace_user_probe(x86_saved_state_t *regs) * that case, return to user land to retry the instuction. */ user_addr_t pc = (regs64) ? regs64->isf.rip : (user_addr_t)regs32->eip; - if (fuword8(pc - 1, &instr) == 0 && instr != FASTTRAP_INSTR) { + if (fuword8(pc - 1, &instr) == 0 && instr != FASTTRAP_INSTR && // neither single-byte INT3 (0xCC) + !(instr == 3 && fuword8(pc - 2, &instr2) == 0 && instr2 == 0xCD)) { // nor two-byte INT 3 (0xCD03) if (regs64) { regs64->isf.rip--; } else { diff --git a/bsd/dev/i386/fasttrap_isa.c b/bsd/dev/i386/fasttrap_isa.c index c96666b0b..be620b517 100644 --- a/bsd/dev/i386/fasttrap_isa.c +++ b/bsd/dev/i386/fasttrap_isa.c @@ -20,12 +20,12 @@ */ /* - * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Copyright 2008 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ /* - * #pragma ident "@(#)fasttrap_isa.c 1.23 06/09/19 SMI" + * #pragma ident "@(#)fasttrap_isa.c 1.27 08/04/09 SMI" */ #ifdef KERNEL @@ -45,7 +45,8 @@ extern dtrace_id_t dtrace_probeid_error; #include #include -#define proc_t struct proc +/* Solaris proc_t is the struct. Darwin's proc_t is a pointer to it. */ +#define proc_t struct proc /* Steer clear of the Darwin typedef for proc_t */ /* * Lossless User-Land Tracing on x86 @@ -247,7 +248,7 @@ fasttrap_anarg(x86_saved_state_t *regs, int function_entry, int argno) value = dtrace_fuword64(stack); DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT | CPU_DTRACE_BADADDR); } else { - uint32_t *stack = (uint32_t *)regs32->uesp; + uint32_t *stack = (uint32_t *)(uintptr_t)(regs32->uesp); DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT); value = dtrace_fuword32((user_addr_t)(unsigned long)&stack[argno + shift]); DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT | CPU_DTRACE_BADADDR); @@ -396,6 +397,7 @@ fasttrap_tracepoint_init(proc_t *p, fasttrap_tracepoint_t *tp, user_addr_t pc, tp->ftt_type = FASTTRAP_T_JCC; tp->ftt_code = (instr[start + 1] & 0x0f) | FASTTRAP_JO; tp->ftt_dest = pc + tp->ftt_size + + /* LINTED - alignment */ *(int32_t *)&instr[start + 2]; break; } @@ -466,12 +468,14 @@ fasttrap_tracepoint_init(proc_t *p, fasttrap_tracepoint_t *tp, user_addr_t pc, i = 2; } - if (sz == 1) + if (sz == 1) { tp->ftt_dest = *(int8_t *)&instr[start + i]; - else if (sz == 4) + } else if (sz == 4) { + /* LINTED - alignment */ tp->ftt_dest = *(int32_t *)&instr[start + i]; - else + } else { tp->ftt_dest = 0; + } } } else { switch (instr[start]) { @@ -481,6 +485,7 @@ fasttrap_tracepoint_init(proc_t *p, fasttrap_tracepoint_t *tp, user_addr_t pc, case FASTTRAP_RET16: tp->ftt_type = FASTTRAP_T_RET16; + /* LINTED - alignment */ tp->ftt_dest = *(uint16_t *)&instr[start + 1]; break; @@ -524,6 +529,7 @@ fasttrap_tracepoint_init(proc_t *p, fasttrap_tracepoint_t *tp, user_addr_t pc, case FASTTRAP_CALL: tp->ftt_type = FASTTRAP_T_CALL; tp->ftt_dest = pc + tp->ftt_size + + /* LINTED - alignment */ *(int32_t *)&instr[start + 1]; tp->ftt_code = 0; break; @@ -531,6 +537,7 @@ fasttrap_tracepoint_init(proc_t *p, fasttrap_tracepoint_t *tp, user_addr_t pc, case FASTTRAP_JMP32: tp->ftt_type = FASTTRAP_T_JMP; tp->ftt_dest = pc + tp->ftt_size + + /* LINTED - alignment */ *(int32_t *)&instr[start + 1]; break; case FASTTRAP_JMP8: @@ -671,6 +678,8 @@ fasttrap_return_common(x86_saved_state_t *regs, user_addr_t pc, pid_t pid, x86_saved_state32_t *regs32; unsigned int p_model; + dtrace_icookie_t cookie; + if (is_saved_state64(regs)) { regs64 = saved_state64(regs); regs32 = NULL; @@ -692,7 +701,7 @@ fasttrap_return_common(x86_saved_state_t *regs, user_addr_t pc, pid_t pid, for (tp = bucket->ftb_data; tp != NULL; tp = tp->ftt_next) { if (pid == tp->ftt_pid && pc == tp->ftt_pc && - !tp->ftt_proc->ftpc_defunct) + tp->ftt_proc->ftpc_acount != 0) break; } @@ -718,6 +727,13 @@ fasttrap_return_common(x86_saved_state_t *regs, user_addr_t pc, pid_t pid, id->fti_probe->ftp_fsize) continue; + /* + * Provide a hint to the stack trace functions to add the + * following pc to the top of the stack since it's missing + * on a return probe yet highly desirable for consistency. + */ + cookie = dtrace_interrupt_disable(); + cpu_core[CPU->cpu_id].cpuc_missing_tos = pc; if (ISSET(current_proc()->p_lflag, P_LNOATTACH)) { dtrace_probe(dtrace_probeid_error, 0 /* state */, id->fti_probe->ftp_id, 1 /* ndx */, -1 /* offset */, DTRACEFLT_UPRIV); @@ -730,6 +746,9 @@ fasttrap_return_common(x86_saved_state_t *regs, user_addr_t pc, pid_t pid, pc - id->fti_probe->ftp_faddr, regs32->eax, regs32->edx, 0, 0); } + /* remove the hint */ + cpu_core[CPU->cpu_id].cpuc_missing_tos = 0; + dtrace_interrupt_enable(cookie); } lck_mtx_unlock(pid_mtx); @@ -786,7 +805,7 @@ fasttrap_usdt_args32(fasttrap_probe_t *probe, x86_saved_state32_t *regs32, int a uint32_t *argv) { int i, x, cap = MIN(argc, probe->ftp_nargs); - uint32_t *stack = (uint32_t *)regs32->uesp; + uint32_t *stack = (uint32_t *)(uintptr_t)(regs32->uesp); for (i = 0; i < cap; i++) { x = probe->ftp_argmap[i]; @@ -989,7 +1008,7 @@ fasttrap_pid_probe32(x86_saved_state_t *regs) */ for (tp = bucket->ftb_data; tp != NULL; tp = tp->ftt_next) { if (pid == tp->ftt_pid && pc == tp->ftt_pc && - !tp->ftt_proc->ftpc_defunct) + tp->ftt_proc->ftpc_acount != 0) break; } @@ -1013,7 +1032,7 @@ fasttrap_pid_probe32(x86_saved_state_t *regs) fasttrap_id_t *id; uint32_t s0, s1, s2, s3, s4, s5; - uint32_t *stack = (uint32_t *)regs32->uesp; + uint32_t *stack = (uint32_t *)(uintptr_t)(regs32->uesp); /* * In 32-bit mode, all arguments are passed on the @@ -1341,7 +1360,7 @@ fasttrap_pid_probe32(x86_saved_state_t *regs) case FASTTRAP_T_COMMON: { user_addr_t addr; - uint8_t scratch[2 * FASTTRAP_MAX_INSTR_SIZE + 5 + 2]; + uint8_t scratch[2 * FASTTRAP_MAX_INSTR_SIZE + 7]; uint_t i = 0; /* @@ -1402,6 +1421,7 @@ fasttrap_pid_probe32(x86_saved_state_t *regs) * the size of the traced instruction cancels out. */ scratch[i++] = FASTTRAP_JMP32; + /* LINTED - alignment */ *(uint32_t *)&scratch[i] = pc - addr - 5; i += sizeof (uint32_t); @@ -1411,6 +1431,8 @@ fasttrap_pid_probe32(x86_saved_state_t *regs) scratch[i++] = FASTTRAP_INT; scratch[i++] = T_DTRACE_RET; + ASSERT(i <= sizeof (scratch)); + if (fasttrap_copyout(scratch, addr, i)) { fasttrap_sigtrap(p, uthread, pc); new_pc = pc; @@ -1545,7 +1567,7 @@ fasttrap_pid_probe64(x86_saved_state_t *regs) */ for (tp = bucket->ftb_data; tp != NULL; tp = tp->ftt_next) { if (pid == tp->ftt_pid && pc == tp->ftt_pc && - !tp->ftt_proc->ftpc_defunct) + tp->ftt_proc->ftpc_acount != 0) break; } @@ -1878,7 +1900,7 @@ fasttrap_pid_probe64(x86_saved_state_t *regs) case FASTTRAP_T_COMMON: { user_addr_t addr; - uint8_t scratch[2 * FASTTRAP_MAX_INSTR_SIZE + 5 + 2]; + uint8_t scratch[2 * FASTTRAP_MAX_INSTR_SIZE + 22]; uint_t i = 0; /* @@ -2025,6 +2047,7 @@ fasttrap_pid_probe64(x86_saved_state_t *regs) panic("unhandled ripmode in fasttrap_pid_probe64"); } + /* LINTED - alignment */ *(uint64_t *)&scratch[i] = *reg; uthread->t_dtrace_regv = *reg; *reg = pc + tp->ftt_size; @@ -2040,8 +2063,10 @@ fasttrap_pid_probe64(x86_saved_state_t *regs) */ scratch[i++] = FASTTRAP_GROUP5_OP; scratch[i++] = FASTTRAP_MODRM(0, 4, 5); + /* LINTED - alignment */ *(uint32_t *)&scratch[i] = 0; i += sizeof (uint32_t); + /* LINTED - alignment */ *(uint64_t *)&scratch[i] = pc + tp->ftt_size; i += sizeof (uint64_t); @@ -2051,6 +2076,8 @@ fasttrap_pid_probe64(x86_saved_state_t *regs) scratch[i++] = FASTTRAP_INT; scratch[i++] = T_DTRACE_RET; + ASSERT(i <= sizeof (scratch)); + if (fasttrap_copyout(scratch, addr, i)) { fasttrap_sigtrap(p, uthread, pc); new_pc = pc; @@ -2180,6 +2207,7 @@ fasttrap_return_probe(x86_saved_state_t *regs) return (0); } + uint64_t fasttrap_pid_getarg(void *arg, dtrace_id_t id, void *parg, int argno, int aframes) @@ -2223,6 +2251,20 @@ fasttrap_getreg(x86_saved_state_t *regs, uint_t reg) case REG_R13: return regs64->r13; case REG_R14: return regs64->r14; case REG_R15: return regs64->r15; + case REG_TRAPNO: return regs64->isf.trapno; + case REG_ERR: return regs64->isf.err; + case REG_RIP: return regs64->isf.rip; + case REG_CS: return regs64->isf.cs; + case REG_RFL: return regs64->isf.rflags; + case REG_SS: return regs64->isf.ss; + case REG_FS: return regs64->fs; + case REG_GS: return regs64->gs; + case REG_ES: + case REG_DS: + case REG_FSBASE: + case REG_GSBASE: + // Important to distinguish these requests (which should be legal) from other values. + panic("dtrace: unimplemented x86_64 getreg()"); } panic("dtrace: unhandled x86_64 getreg() constant"); diff --git a/bsd/dev/i386/fasttrap_regset.h b/bsd/dev/i386/fasttrap_regset.h index 805fa83c4..348e04a30 100644 --- a/bsd/dev/i386/fasttrap_regset.h +++ b/bsd/dev/i386/fasttrap_regset.h @@ -115,6 +115,13 @@ extern "C" { #define FS 1 #define GS 0 +#define REG_PC EIP +#define REG_FP EBP +#define REG_SP UESP +#define REG_PS EFL +#define REG_R0 EAX +#define REG_R1 EDX + #ifdef __cplusplus } #endif diff --git a/bsd/dev/i386/fbt_x86.c b/bsd/dev/i386/fbt_x86.c index b2b021280..19d461ac2 100644 --- a/bsd/dev/i386/fbt_x86.c +++ b/bsd/dev/i386/fbt_x86.c @@ -38,8 +38,7 @@ #include #include #include - -extern struct mach_header _mh_execute_header; /* the kernel's mach header */ +#include #include #include @@ -59,6 +58,10 @@ extern struct mach_header _mh_execute_header; /* the kernel's mach header */ #define DTRACE_INVOP_NOP_SKIP 1 #define DTRACE_INVOP_MOVL_ESP_EBP 10 #define DTRACE_INVOP_MOVL_ESP_EBP_SKIP 2 +#define DTRACE_INVOP_MOV_RSP_RBP 11 +#define DTRACE_INVOP_MOV_RSP_RBP_SKIP 3 +#define DTRACE_INVOP_POP_RBP 12 +#define DTRACE_INVOP_POP_RBP_SKIP 1 #define DTRACE_INVOP_LEAVE_SKIP 1 #define FBT_PUSHL_EBP 0x55 @@ -66,7 +69,12 @@ extern struct mach_header _mh_execute_header; /* the kernel's mach header */ #define FBT_MOVL_ESP_EBP1_V0 0xec #define FBT_MOVL_ESP_EBP0_V1 0x89 #define FBT_MOVL_ESP_EBP1_V1 0xe5 + +#define FBT_PUSH_RBP 0x55 #define FBT_REX_RSP_RBP 0x48 +#define FBT_MOV_RSP_RBP0 0x89 +#define FBT_MOV_RSP_RBP1 0xe5 +#define FBT_POP_RBP 0x5d #define FBT_POPL_EBP 0x5d #define FBT_RET 0xc3 @@ -93,8 +101,11 @@ extern dtrace_provider_id_t fbt_id; extern fbt_probe_t **fbt_probetab; extern int fbt_probetab_mask; +kern_return_t fbt_perfCallback(int, x86_saved_state_t *, __unused int, __unused int); + /* * Critical routines that must not be probed. PR_5221096, PR_5379018. + * The blacklist must be kept in alphabetic order for purposes of bsearch(). */ static const char * critical_blacklist[] = @@ -104,11 +115,13 @@ static const char * critical_blacklist[] = "console_cpu_free", "cpu_IA32e_disable", "cpu_IA32e_enable", + "cpu_NMI_interrupt", "cpu_control", "cpu_data_alloc", - "cpu_desc_init", - "cpu_desc_init64", - "cpu_desc_load64", + "cpu_desc_init", + "cpu_desc_init64", + "cpu_desc_load", + "cpu_desc_load64", "cpu_exit_wait", "cpu_info", "cpu_info_count", @@ -127,9 +140,9 @@ static const char * critical_blacklist[] = "cpu_thread_init", "cpu_threadtype", "cpu_to_processor", - "cpu_topology_start", + "cpu_topology_sort", + "cpu_topology_start_cpu", "cpu_type", - "cpu_window_init", "cpuid_cpu_display", "handle_pending_TLB_flushes", "hw_compare_and_store", @@ -143,20 +156,23 @@ static const char * critical_blacklist[] = "pmap_cpu_high_shared_remap", "pmap_cpu_init", "register_cpu_setup_func", - "unregister_cpu_setup_func" + "unregister_cpu_setup_func", + "vstart" }; #define CRITICAL_BLACKLIST_COUNT (sizeof(critical_blacklist)/sizeof(critical_blacklist[0])) /* * The transitive closure of entry points that can be reached from probe context. - * (Apart from routines whose names begin with dtrace_ or dtxnu_.) + * (Apart from routines whose names begin with dtrace_). */ static const char * probe_ctx_closure[] = { "Debugger", + "IS_64BIT_PROCESS", "OSCompareAndSwap", "absolutetime_to_microtime", "ast_pending", + "astbsd_on", "clock_get_calendar_nanotime_nowait", "copyin", "copyin_user", @@ -198,11 +214,14 @@ static const char * probe_ctx_closure[] = "proc_is64bit", "proc_selfname", "proc_selfpid", + "proc_selfppid", "psignal_lock", "rtc_nanotime_load", "rtc_nanotime_read", + "sdt_getargdesc", "strlcpy", "sync_iss_to_iks_unconditionally", + "systrace_stub", "timer_grab" }; #define PROBE_CTX_CLOSURE_COUNT (sizeof(probe_ctx_closure)/sizeof(probe_ctx_closure[0])) @@ -210,7 +229,7 @@ static const char * probe_ctx_closure[] = static int _cmp(const void *a, const void *b) { - return strcmp((const char *)a, *(const char **)b); + return strncmp((const char *)a, *(const char **)b, strlen((const char *)a) + 1); } static const void * bsearch( @@ -238,6 +257,7 @@ static const void * bsearch( return (NULL); } +#if defined(__i386__) int fbt_invop(uintptr_t addr, uintptr_t *stack, uintptr_t rval) { @@ -252,7 +272,7 @@ fbt_invop(uintptr_t addr, uintptr_t *stack, uintptr_t rval) if (CPU_ON_INTR(CPU)) stacktop = (uintptr_t *)dtrace_get_cpu_int_stack_top(); else - stacktop = (uintptr_t *)(dtrace_get_kernel_stack(current_thread()) + KERNEL_STACK_SIZE); + stacktop = (uintptr_t *)(dtrace_get_kernel_stack(current_thread()) + kernel_stack_size); stack += 1; /* skip over the target's pushl'd %ebp */ @@ -269,6 +289,7 @@ fbt_invop(uintptr_t addr, uintptr_t *stack, uintptr_t rval) if (stack <= stacktop) stack4 = *stack++; + /* 32-bit ABI, arguments passed on stack. */ dtrace_probe(fbt->fbtp_id, stack0, stack1, stack2, stack3, stack4); CPU->cpu_dtrace_caller = 0; } else { @@ -286,6 +307,7 @@ fbt_invop(uintptr_t addr, uintptr_t *stack, uintptr_t rval) #define IS_USER_TRAP(regs) (regs && (((regs)->cs & 3) != 0)) #define T_INVALID_OPCODE 6 #define FBT_EXCEPTION_CODE T_INVALID_OPCODE +#define T_PREEMPT 255 kern_return_t fbt_perfCallback( @@ -319,7 +341,7 @@ fbt_perfCallback( switch (emul) { case DTRACE_INVOP_NOP: - saved_state->eip += DTRACE_INVOP_NOP_SKIP; /* Skip over the patched NOP */ + saved_state->eip += DTRACE_INVOP_NOP_SKIP; /* Skip over the patched NOP (planted by sdt.) */ retval = KERN_SUCCESS; break; @@ -372,6 +394,10 @@ fbt_perfCallback( pDst > (((uint32_t *)edi)); pDst--) *pDst = pDst[-delta]; + +/* Track the stack lift in "saved_state". */ + saved_state = (x86_saved_state32_t *) (((uintptr_t)saved_state) + (delta << 2)); + /* Now adjust the value of %edi in our caller (kernel_trap)'s frame */ *(ebp - 1) = edi + (delta << 2); @@ -382,6 +408,8 @@ fbt_perfCallback( retval = KERN_FAILURE; break; } + saved_state->trapno = T_PREEMPT; /* Avoid call to i386_astintr()! */ + ml_set_interrupts_enabled(oldlevel); } @@ -393,20 +421,20 @@ static void __fbt_provide_module(void *arg, struct modctl *ctl) { #pragma unused(arg) - struct mach_header *mh; + kernel_mach_header_t *mh; struct load_command *cmd; - struct segment_command *orig_ts = NULL, *orig_le = NULL; + kernel_segment_command_t *orig_ts = NULL, *orig_le = NULL; struct symtab_command *orig_st = NULL; struct nlist *sym = NULL; char *strings; uintptr_t instrLow, instrHigh; char *modname; - unsigned int i, j; + unsigned int i, j; int gIgnoreFBTBlacklist = 0; PE_parse_boot_argn("IgnoreFBTBlacklist", &gIgnoreFBTBlacklist, sizeof (gIgnoreFBTBlacklist)); - mh = (struct mach_header *)(ctl->address); + mh = (kernel_mach_header_t *)(ctl->address); modname = ctl->mod_modname; if (0 == ctl->address || 0 == ctl->size) /* Has the linker been jettisoned? */ @@ -417,7 +445,7 @@ __fbt_provide_module(void *arg, struct modctl *ctl) * where prohibited. */ - if (strcmp(modname, "com.apple.driver.dtrace") == 0) + if (LIT_STRNEQL(modname, "com.apple.driver.dtrace")) return; if (strstr(modname, "CHUD") != NULL) @@ -428,14 +456,14 @@ __fbt_provide_module(void *arg, struct modctl *ctl) cmd = (struct load_command *) &mh[1]; for (i = 0; i < mh->ncmds; i++) { - if (cmd->cmd == LC_SEGMENT) { - struct segment_command *orig_sg = (struct segment_command *) cmd; + if (cmd->cmd == LC_SEGMENT_KERNEL) { + kernel_segment_command_t *orig_sg = (kernel_segment_command_t *) cmd; - if (strcmp(SEG_TEXT, orig_sg->segname) == 0) + if (LIT_STRNEQL(orig_sg->segname, SEG_TEXT)) orig_ts = orig_sg; - else if (strcmp(SEG_LINKEDIT, orig_sg->segname) == 0) + else if (LIT_STRNEQL(orig_sg->segname, SEG_LINKEDIT)) orig_le = orig_sg; - else if (strcmp("", orig_sg->segname) == 0) + else if (LIT_STRNEQL(orig_sg->segname, "")) orig_ts = orig_sg; /* kexts have a single unnamed segment */ } else if (cmd->cmd == LC_SYMTAB) @@ -447,8 +475,8 @@ __fbt_provide_module(void *arg, struct modctl *ctl) if ((orig_ts == NULL) || (orig_st == NULL) || (orig_le == NULL)) return; - sym = (struct nlist *)orig_le->vmaddr; - strings = ((char *)sym) + orig_st->nsyms * sizeof(struct nlist); + sym = (struct nlist *)(orig_le->vmaddr + orig_st->symoff - orig_le->fileoff); + strings = (char *)(orig_le->vmaddr + orig_st->stroff - orig_le->fileoff); /* Find extent of the TEXT section */ instrLow = (uintptr_t)orig_ts->vmaddr; @@ -472,8 +500,7 @@ __fbt_provide_module(void *arg, struct modctl *ctl) if (*name == '_') name += 1; - if (strstr(name, "dtrace_") == name && - strstr(name, "dtrace_safe_") != name) { + if (LIT_STRNSTART(name, "dtrace_") && !LIT_STRNSTART(name, "dtrace_safe_")) { /* * Anything beginning with "dtrace_" may be called * from probe context unless it explitly indicates @@ -483,88 +510,94 @@ __fbt_provide_module(void *arg, struct modctl *ctl) continue; } - if (strstr(name, "dsmos_") == name) + if (LIT_STRNSTART(name, "dsmos_")) continue; /* Don't Steal Mac OS X! */ - if (strstr(name, "dtxnu_") == name || - strstr(name, "_dtrace") == name) + if (LIT_STRNSTART(name, "_dtrace")) continue; /* Shims in dtrace.c */ - if (strstr(name, "chud") == name) + if (LIT_STRNSTART(name, "chud")) continue; /* Professional courtesy. */ - if (strstr(name, "hibernate_") == name) + if (LIT_STRNSTART(name, "hibernate_")) continue; /* Let sleeping dogs lie. */ - if (0 == strcmp(name, "ZN9IOService14newTemperatureElPS_") || /* IOService::newTemperature */ - 0 == strcmp(name, "ZN9IOService26temperatureCriticalForZoneEPS_")) /* IOService::temperatureCriticalForZone */ + if (LIT_STRNEQL(name, "_ZN9IOService14newTemperatureElPS_") || /* IOService::newTemperature */ + LIT_STRNEQL(name, "_ZN9IOService26temperatureCriticalForZoneEPS_")) /* IOService::temperatureCriticalForZone */ continue; /* Per the fire code */ /* * Place no probes (illegal instructions) in the exception handling path! */ - if (0 == strcmp(name, "t_invop") || - 0 == strcmp(name, "enter_lohandler") || - 0 == strcmp(name, "lo_alltraps") || - 0 == strcmp(name, "kernel_trap") || - 0 == strcmp(name, "i386_astintr")) + if (LIT_STRNEQL(name, "t_invop") || + LIT_STRNEQL(name, "enter_lohandler") || + LIT_STRNEQL(name, "lo_alltraps") || + LIT_STRNEQL(name, "kernel_trap") || + LIT_STRNEQL(name, "interrupt") || + LIT_STRNEQL(name, "i386_astintr")) continue; - if (0 == strcmp(name, "current_thread") || - 0 == strcmp(name, "ast_pending") || - 0 == strcmp(name, "fbt_perfCallback") || - 0 == strcmp(name, "machine_thread_get_kern_state") || - 0 == strcmp(name, "ml_set_interrupts_enabled") || - 0 == strcmp(name, "dtrace_invop") || - 0 == strcmp(name, "fbt_invop") || - 0 == strcmp(name, "sdt_invop") || - 0 == strcmp(name, "max_valid_stack_address")) + if (LIT_STRNEQL(name, "current_thread") || + LIT_STRNEQL(name, "ast_pending") || + LIT_STRNEQL(name, "fbt_perfCallback") || + LIT_STRNEQL(name, "machine_thread_get_kern_state") || + LIT_STRNEQL(name, "get_threadtask") || + LIT_STRNEQL(name, "ml_set_interrupts_enabled") || + LIT_STRNEQL(name, "dtrace_invop") || + LIT_STRNEQL(name, "fbt_invop") || + LIT_STRNEQL(name, "sdt_invop") || + LIT_STRNEQL(name, "max_valid_stack_address")) continue; /* * Voodoo. */ - if (strstr(name, "machine_stack_") == name || - strstr(name, "mapping_") == name || - 0 == strcmp(name, "tmrCvt") || - - strstr(name, "tsc_") == name || - - strstr(name, "pmCPU") == name || - 0 == strcmp(name, "Cstate_table_set") || - 0 == strcmp(name, "pmKextRegister") || - 0 == strcmp(name, "pmSafeMode") || - 0 == strcmp(name, "pmUnregister") || - strstr(name, "pms") == name || - 0 == strcmp(name, "power_management_init") || - strstr(name, "usimple_") == name || - - strstr(name, "rtc_") == name || - strstr(name, "_rtc_") == name || - strstr(name, "rtclock_") == name || - strstr(name, "clock_") == name || - strstr(name, "absolutetime_to_") == name || - 0 == strcmp(name, "setPop") || - 0 == strcmp(name, "nanoseconds_to_absolutetime") || - 0 == strcmp(name, "nanotime_to_absolutetime") || - - strstr(name, "etimer_") == name || - - strstr(name, "commpage_") == name || - strstr(name, "pmap_") == name || - strstr(name, "ml_") == name || - strstr(name, "PE_") == name || - strstr(name, "lapic_") == name || - strstr(name, "acpi_") == name) + if (LIT_STRNSTART(name, "machine_stack_") || + LIT_STRNSTART(name, "mapping_") || + LIT_STRNEQL(name, "tmrCvt") || + + LIT_STRNSTART(name, "tsc_") || + + LIT_STRNSTART(name, "pmCPU") || + LIT_STRNEQL(name, "pmKextRegister") || + LIT_STRNEQL(name, "pmMarkAllCPUsOff") || + LIT_STRNEQL(name, "pmSafeMode") || + LIT_STRNEQL(name, "pmTimerSave") || + LIT_STRNEQL(name, "pmTimerRestore") || + LIT_STRNEQL(name, "pmUnRegister") || + LIT_STRNSTART(name, "pms") || + LIT_STRNEQL(name, "power_management_init") || + LIT_STRNSTART(name, "usimple_") || + LIT_STRNEQL(name, "lck_spin_lock") || + LIT_STRNEQL(name, "lck_spin_unlock") || + + LIT_STRNSTART(name, "rtc_") || + LIT_STRNSTART(name, "_rtc_") || + LIT_STRNSTART(name, "rtclock_") || + LIT_STRNSTART(name, "clock_") || + LIT_STRNSTART(name, "absolutetime_to_") || + LIT_STRNEQL(name, "setPop") || + LIT_STRNEQL(name, "nanoseconds_to_absolutetime") || + LIT_STRNEQL(name, "nanotime_to_absolutetime") || + + LIT_STRNSTART(name, "etimer_") || + + LIT_STRNSTART(name, "commpage_") || + LIT_STRNSTART(name, "pmap_") || + LIT_STRNSTART(name, "ml_") || + LIT_STRNSTART(name, "PE_") || + LIT_STRNEQL(name, "kprintf") || + LIT_STRNSTART(name, "lapic_") || + LIT_STRNSTART(name, "acpi_")) continue; /* * Avoid machine_ routines. PR_5346750. */ - if (strstr(name, "machine_") == name) + if (LIT_STRNSTART(name, "machine_")) continue; - if (0 == strcmp(name, "handle_pending_TLB_flushes")) + if (LIT_STRNEQL(name, "handle_pending_TLB_flushes")) continue; /* @@ -584,25 +617,25 @@ __fbt_provide_module(void *arg, struct modctl *ctl) /* * Place no probes that could be hit on the way to the debugger. */ - if (strstr(name, "kdp_") == name || - strstr(name, "kdb_") == name || - strstr(name, "kdbg_") == name || - strstr(name, "kdebug_") == name || - 0 == strcmp(name, "kernel_debug") || - 0 == strcmp(name, "Debugger") || - 0 == strcmp(name, "Call_DebuggerC") || - 0 == strcmp(name, "lock_debugger") || - 0 == strcmp(name, "unlock_debugger") || - 0 == strcmp(name, "SysChoked")) + if (LIT_STRNSTART(name, "kdp_") || + LIT_STRNSTART(name, "kdb_") || + LIT_STRNSTART(name, "kdbg_") || + LIT_STRNSTART(name, "kdebug_") || + LIT_STRNEQL(name, "kernel_debug") || + LIT_STRNEQL(name, "Debugger") || + LIT_STRNEQL(name, "Call_DebuggerC") || + LIT_STRNEQL(name, "lock_debugger") || + LIT_STRNEQL(name, "unlock_debugger") || + LIT_STRNEQL(name, "SysChoked")) continue; /* * Place no probes that could be hit on the way to a panic. */ if (NULL != strstr(name, "panic_") || - 0 == strcmp(name, "panic") || - 0 == strcmp(name, "handleMck") || - 0 == strcmp(name, "unresolved_kernel_trap")) + LIT_STRNEQL(name, "panic") || + LIT_STRNEQL(name, "handleMck") || + LIT_STRNEQL(name, "unresolved_kernel_trap")) continue; if (dtrace_probe_lookup(fbt_id, modname, name, NULL) != 0) @@ -799,6 +832,571 @@ __fbt_provide_module(void *arg, struct modctl *ctl) goto again; } } +#elif defined(__x86_64__) +int +fbt_invop(uintptr_t addr, uintptr_t *state, uintptr_t rval) +{ + fbt_probe_t *fbt = fbt_probetab[FBT_ADDR2NDX(addr)]; + + for (; fbt != NULL; fbt = fbt->fbtp_hashnext) { + if ((uintptr_t)fbt->fbtp_patchpoint == addr) { + + if (fbt->fbtp_roffset == 0) { + x86_saved_state64_t *regs = (x86_saved_state64_t *)state; + + CPU->cpu_dtrace_caller = *(uintptr_t *)(((uintptr_t)(regs->isf.rsp))+sizeof(uint64_t)); // 8(%rsp) + /* 64-bit ABI, arguments passed in registers. */ + dtrace_probe(fbt->fbtp_id, regs->rdi, regs->rsi, regs->rdx, regs->rcx, regs->r8); + CPU->cpu_dtrace_caller = 0; + } else { + + dtrace_probe(fbt->fbtp_id, fbt->fbtp_roffset, rval, 0, 0, 0); + CPU->cpu_dtrace_caller = 0; + } + + return (fbt->fbtp_rval); + } + } + + return (0); +} + +#define IS_USER_TRAP(regs) (regs && (((regs)->isf.cs & 3) != 0)) +#define T_INVALID_OPCODE 6 +#define FBT_EXCEPTION_CODE T_INVALID_OPCODE +#define T_PREEMPT 255 + +kern_return_t +fbt_perfCallback( + int trapno, + x86_saved_state_t *tagged_regs, + __unused int unused1, + __unused int unused2) +{ + kern_return_t retval = KERN_FAILURE; + x86_saved_state64_t *saved_state = saved_state64(tagged_regs); + + if (FBT_EXCEPTION_CODE == trapno && !IS_USER_TRAP(saved_state)) { + boolean_t oldlevel; + uint64_t rsp_probe, *rbp, r12, fp, delta = 0; + uint32_t *pDst; + int emul; + + oldlevel = ml_set_interrupts_enabled(FALSE); + + /* Calculate where the stack pointer was when the probe instruction "fired." */ + rsp_probe = saved_state->isf.rsp; /* Easy, x86_64 establishes this value in idt64.s */ + + emul = dtrace_invop( saved_state->isf.rip, (uintptr_t *)saved_state, saved_state->rax ); + __asm__ volatile(".globl _dtrace_invop_callsite"); + __asm__ volatile("_dtrace_invop_callsite:"); + + switch (emul) { + case DTRACE_INVOP_NOP: + saved_state->isf.rip += DTRACE_INVOP_NOP_SKIP; /* Skip over the patched NOP (planted by sdt). */ + retval = KERN_SUCCESS; + break; + + case DTRACE_INVOP_MOV_RSP_RBP: + saved_state->rbp = rsp_probe; /* Emulate patched mov %rsp,%rbp */ + saved_state->isf.rip += DTRACE_INVOP_MOV_RSP_RBP_SKIP; /* Skip over the bytes of the patched mov %rsp,%rbp */ + retval = KERN_SUCCESS; + break; + + case DTRACE_INVOP_POP_RBP: + case DTRACE_INVOP_LEAVE: +/* + * Emulate first micro-op of patched leave: mov %rbp,%rsp + * fp points just below the return address slot for target's ret + * and at the slot holding the frame pointer saved by the target's prologue. + */ + fp = saved_state->rbp; +/* Emulate second micro-op of patched leave: patched pop %rbp + * savearea rbp is set for the frame of the caller to target + * The *live* %rsp will be adjusted below for pop increment(s) + */ + saved_state->rbp = *(uint64_t *)fp; +/* Skip over the patched leave */ + saved_state->isf.rip += DTRACE_INVOP_LEAVE_SKIP; +/* + * Lift the stack to account for the emulated leave + * Account for words local in this frame + * (in "case DTRACE_INVOP_POPL_EBP:" this is zero.) + */ + delta = ((uint32_t *)fp) - ((uint32_t *)rsp_probe); /* delta is a *word* increment */ +/* Account for popping off the rbp (just accomplished by the emulation + * above...) + */ + delta += 2; + saved_state->isf.rsp += (delta << 2); + +/* XXX Fragile in the extreme. + * This is sensitive to trap_from_kernel()'s internals. + */ + rbp = (uint64_t *)__builtin_frame_address(0); + rbp = (uint64_t *)*rbp; + r12 = *(rbp - 4); + +/* Shift contents of stack */ + for (pDst = (uint32_t *)fp; + pDst > (((uint32_t *)r12)); + pDst--) + *pDst = pDst[-delta]; + +/* Track the stack lift in "saved_state". */ + saved_state = (x86_saved_state64_t *) (((uintptr_t)saved_state) + (delta << 2)); + +/* Now adjust the value of %r12 in our caller (kernel_trap)'s frame */ + *(rbp - 4) = r12 + (delta << 2); + + retval = KERN_SUCCESS; + break; + + default: + retval = KERN_FAILURE; + break; + } + saved_state->isf.trapno = T_PREEMPT; /* Avoid call to i386_astintr()! */ + + ml_set_interrupts_enabled(oldlevel); + } + + return retval; +} + +/*ARGSUSED*/ +static void +__fbt_provide_module(void *arg, struct modctl *ctl) +{ +#pragma unused(arg) + kernel_mach_header_t *mh; + struct load_command *cmd; + kernel_segment_command_t *orig_ts = NULL, *orig_le = NULL; + struct symtab_command *orig_st = NULL; + struct nlist_64 *sym = NULL; + char *strings; + uintptr_t instrLow, instrHigh; + char *modname; + unsigned int i, j; + + int gIgnoreFBTBlacklist = 0; + PE_parse_boot_argn("IgnoreFBTBlacklist", &gIgnoreFBTBlacklist, sizeof (gIgnoreFBTBlacklist)); + + mh = (kernel_mach_header_t *)(ctl->address); + modname = ctl->mod_modname; + + if (0 == ctl->address || 0 == ctl->size) /* Has the linker been jettisoned? */ + return; + + /* + * Employees of dtrace and their families are ineligible. Void + * where prohibited. + */ + + if (LIT_STRNEQL(modname, "com.apple.driver.dtrace")) + return; + + if (strstr(modname, "CHUD") != NULL) + return; + + if (mh->magic != MH_MAGIC_64) + return; + + cmd = (struct load_command *) &mh[1]; + for (i = 0; i < mh->ncmds; i++) { + if (cmd->cmd == LC_SEGMENT_KERNEL) { + kernel_segment_command_t *orig_sg = (kernel_segment_command_t *) cmd; + + if (LIT_STRNEQL(orig_sg->segname, SEG_TEXT)) + orig_ts = orig_sg; + else if (LIT_STRNEQL(orig_sg->segname, SEG_LINKEDIT)) + orig_le = orig_sg; + else if (LIT_STRNEQL(orig_sg->segname, "")) + orig_ts = orig_sg; /* kexts have a single unnamed segment */ + } + else if (cmd->cmd == LC_SYMTAB) + orig_st = (struct symtab_command *) cmd; + + cmd = (struct load_command *) ((caddr_t) cmd + cmd->cmdsize); + } + + if ((orig_ts == NULL) || (orig_st == NULL) || (orig_le == NULL)) + return; + + sym = (struct nlist_64 *)(orig_le->vmaddr + orig_st->symoff - orig_le->fileoff); + strings = (char *)(orig_le->vmaddr + orig_st->stroff - orig_le->fileoff); + + /* Find extent of the TEXT section */ + instrLow = (uintptr_t)orig_ts->vmaddr; + instrHigh = (uintptr_t)(orig_ts->vmaddr + orig_ts->vmsize); + + for (i = 0; i < orig_st->nsyms; i++) { + fbt_probe_t *fbt, *retfbt; + machine_inst_t *instr, *limit, theInstr, i1, i2, i3; + uint8_t n_type = sym[i].n_type & (N_TYPE | N_EXT); + char *name = strings + sym[i].n_un.n_strx; + int size; + + /* Check that the symbol is a global and that it has a name. */ + if (((N_SECT | N_EXT) != n_type && (N_ABS | N_EXT) != n_type)) + continue; + + if (0 == sym[i].n_un.n_strx) /* iff a null, "", name. */ + continue; + + /* Lop off omnipresent leading underscore. */ + if (*name == '_') + name += 1; + + if (LIT_STRNSTART(name, "dtrace_") && !LIT_STRNSTART(name, "dtrace_safe_")) { + /* + * Anything beginning with "dtrace_" may be called + * from probe context unless it explitly indicates + * that it won't be called from probe context by + * using the prefix "dtrace_safe_". + */ + continue; + } + + if (LIT_STRNSTART(name, "fasttrap_") || + LIT_STRNSTART(name, "fuword") || + LIT_STRNSTART(name, "suword") || + LIT_STRNEQL(name, "sprlock") || + LIT_STRNEQL(name, "sprunlock") || + LIT_STRNEQL(name, "uread") || + LIT_STRNEQL(name, "uwrite")) + continue; /* Fasttrap inner-workings. */ + + if (LIT_STRNSTART(name, "dsmos_")) + continue; /* Don't Steal Mac OS X! */ + + if (LIT_STRNSTART(name, "_dtrace")) + continue; /* Shims in dtrace.c */ + + if (LIT_STRNSTART(name, "chud")) + continue; /* Professional courtesy. */ + + if (LIT_STRNSTART(name, "hibernate_")) + continue; /* Let sleeping dogs lie. */ + + if (LIT_STRNEQL(name, "ZN9IOService14newTemperatureElPS_") || /* IOService::newTemperature */ + LIT_STRNEQL(name, "ZN9IOService26temperatureCriticalForZoneEPS_")) /* IOService::temperatureCriticalForZone */ + continue; /* Per the fire code */ + + /* + * Place no probes (illegal instructions) in the exception handling path! + */ + if (LIT_STRNEQL(name, "t_invop") || + LIT_STRNEQL(name, "enter_lohandler") || + LIT_STRNEQL(name, "lo_alltraps") || + LIT_STRNEQL(name, "kernel_trap") || + LIT_STRNEQL(name, "interrupt") || + LIT_STRNEQL(name, "i386_astintr")) + continue; + + if (LIT_STRNEQL(name, "current_thread") || + LIT_STRNEQL(name, "ast_pending") || + LIT_STRNEQL(name, "fbt_perfCallback") || + LIT_STRNEQL(name, "machine_thread_get_kern_state") || + LIT_STRNEQL(name, "get_threadtask") || + LIT_STRNEQL(name, "ml_set_interrupts_enabled") || + LIT_STRNEQL(name, "dtrace_invop") || + LIT_STRNEQL(name, "fbt_invop") || + LIT_STRNEQL(name, "sdt_invop") || + LIT_STRNEQL(name, "max_valid_stack_address")) + continue; + + /* + * Voodoo. + */ + if (LIT_STRNSTART(name, "machine_stack_") || + LIT_STRNSTART(name, "mapping_") || + LIT_STRNEQL(name, "tmrCvt") || + + LIT_STRNSTART(name, "tsc_") || + + LIT_STRNSTART(name, "pmCPU") || + LIT_STRNEQL(name, "pmKextRegister") || + LIT_STRNEQL(name, "pmMarkAllCPUsOff") || + LIT_STRNEQL(name, "pmSafeMode") || + LIT_STRNEQL(name, "pmTimerSave") || + LIT_STRNEQL(name, "pmTimerRestore") || + LIT_STRNEQL(name, "pmUnRegister") || + LIT_STRNSTART(name, "pms") || + LIT_STRNEQL(name, "power_management_init") || + LIT_STRNSTART(name, "usimple_") || + LIT_STRNSTART(name, "lck_spin_lock") || + LIT_STRNSTART(name, "lck_spin_unlock") || + + LIT_STRNSTART(name, "rtc_") || + LIT_STRNSTART(name, "_rtc_") || + LIT_STRNSTART(name, "rtclock_") || + LIT_STRNSTART(name, "clock_") || + LIT_STRNSTART(name, "absolutetime_to_") || + LIT_STRNEQL(name, "setPop") || + LIT_STRNEQL(name, "nanoseconds_to_absolutetime") || + LIT_STRNEQL(name, "nanotime_to_absolutetime") || + + LIT_STRNSTART(name, "etimer_") || + + LIT_STRNSTART(name, "commpage_") || + LIT_STRNSTART(name, "pmap_") || + LIT_STRNSTART(name, "ml_") || + LIT_STRNSTART(name, "PE_") || + LIT_STRNEQL(name, "kprintf") || + LIT_STRNSTART(name, "lapic_") || + LIT_STRNSTART(name, "acpi_")) + continue; + + /* + * Avoid machine_ routines. PR_5346750. + */ + if (LIT_STRNSTART(name, "machine_")) + continue; + + if (LIT_STRNEQL(name, "handle_pending_TLB_flushes")) + continue; + + /* + * Place no probes on critical routines. PR_5221096 + */ + if (!gIgnoreFBTBlacklist && + bsearch( name, critical_blacklist, CRITICAL_BLACKLIST_COUNT, sizeof(name), _cmp ) != NULL) + continue; + + /* + * Place no probes that could be hit in probe context. + */ + if (!gIgnoreFBTBlacklist && + bsearch( name, probe_ctx_closure, PROBE_CTX_CLOSURE_COUNT, sizeof(name), _cmp ) != NULL) + continue; + + /* + * Place no probes that could be hit on the way to the debugger. + */ + if (LIT_STRNSTART(name, "kdp_") || + LIT_STRNSTART(name, "kdb_") || + LIT_STRNSTART(name, "kdbg_") || + LIT_STRNSTART(name, "kdebug_") || + LIT_STRNEQL(name, "kernel_debug") || + LIT_STRNEQL(name, "Debugger") || + LIT_STRNEQL(name, "Call_DebuggerC") || + LIT_STRNEQL(name, "lock_debugger") || + LIT_STRNEQL(name, "unlock_debugger") || + LIT_STRNEQL(name, "SysChoked")) + continue; + + /* + * Place no probes that could be hit on the way to a panic. + */ + if (NULL != strstr(name, "panic_") || + LIT_STRNEQL(name, "panic") || + LIT_STRNEQL(name, "handleMck") || + LIT_STRNEQL(name, "unresolved_kernel_trap")) + continue; + + if (dtrace_probe_lookup(fbt_id, modname, name, NULL) != 0) + continue; + + for (j = 0, instr = (machine_inst_t *)sym[i].n_value, theInstr = 0; + (j < 4) && ((uintptr_t)instr >= instrLow) && (instrHigh > (uintptr_t)(instr + 2)); + j++) { + theInstr = instr[0]; + if (theInstr == FBT_PUSH_RBP || theInstr == FBT_RET || theInstr == FBT_RET_IMM16) + break; + + if ((size = dtrace_instr_size(instr)) <= 0) + break; + + instr += size; + } + + if (theInstr != FBT_PUSH_RBP) + continue; + + i1 = instr[1]; + i2 = instr[2]; + i3 = instr[3]; + + limit = (machine_inst_t *)instrHigh; + + if (i1 == FBT_REX_RSP_RBP && i2 == FBT_MOV_RSP_RBP0 && i3 == FBT_MOV_RSP_RBP1) { + instr += 1; /* Advance to the mov %rsp,%rbp */ + theInstr = i1; + } else { + continue; + } +#if 0 + else { + /* + * Sometimes, the compiler will schedule an intervening instruction + * in the function prologue. Example: + * + * _mach_vm_read: + * 000006d8 pushl %ebp + * 000006d9 movl $0x00000004,%edx + * 000006de movl %esp,%ebp + * + * Try the next instruction, to see if it is a movl %esp,%ebp + */ + + instr += 1; /* Advance past the pushl %ebp */ + if ((size = dtrace_instr_size(instr)) <= 0) + continue; + + instr += size; + + if ((instr + 1) >= limit) + continue; + + i1 = instr[0]; + i2 = instr[1]; + + if (!(i1 == FBT_MOVL_ESP_EBP0_V0 && i2 == FBT_MOVL_ESP_EBP1_V0) && + !(i1 == FBT_MOVL_ESP_EBP0_V1 && i2 == FBT_MOVL_ESP_EBP1_V1)) + continue; + + /* instr already points at the movl %esp,%ebp */ + theInstr = i1; + } +#endif + + fbt = kmem_zalloc(sizeof (fbt_probe_t), KM_SLEEP); + strlcpy( (char *)&(fbt->fbtp_name), name, MAX_FBTP_NAME_CHARS ); + fbt->fbtp_id = dtrace_probe_create(fbt_id, modname, name, FBT_ENTRY, FBT_AFRAMES_ENTRY, fbt); + fbt->fbtp_patchpoint = instr; + fbt->fbtp_ctl = ctl; + fbt->fbtp_loadcnt = ctl->mod_loadcnt; + fbt->fbtp_rval = DTRACE_INVOP_MOV_RSP_RBP; + fbt->fbtp_savedval = theInstr; + fbt->fbtp_patchval = FBT_PATCHVAL; + + fbt->fbtp_hashnext = fbt_probetab[FBT_ADDR2NDX(instr)]; + fbt->fbtp_symndx = i; + fbt_probetab[FBT_ADDR2NDX(instr)] = fbt; + + retfbt = NULL; +again: + if (instr >= limit) + continue; + + /* + * If this disassembly fails, then we've likely walked off into + * a jump table or some other unsuitable area. Bail out of the + * disassembly now. + */ + if ((size = dtrace_instr_size(instr)) <= 0) + continue; + + /* + * We (desperately) want to avoid erroneously instrumenting a + * jump table, especially given that our markers are pretty + * short: two bytes on x86, and just one byte on amd64. To + * determine if we're looking at a true instruction sequence + * or an inline jump table that happens to contain the same + * byte sequences, we resort to some heuristic sleeze: we + * treat this instruction as being contained within a pointer, + * and see if that pointer points to within the body of the + * function. If it does, we refuse to instrument it. + */ + for (j = 0; j < sizeof (uintptr_t); j++) { + uintptr_t check = (uintptr_t)instr - j; + uint8_t *ptr; + + if (check < sym[i].n_value) + break; + + if (check + sizeof (uintptr_t) > (uintptr_t)limit) + continue; + + ptr = *(uint8_t **)check; + + if (ptr >= (uint8_t *)sym[i].n_value && ptr < limit) { + instr += size; + goto again; + } + } + + /* + * OK, it's an instruction. + */ + theInstr = instr[0]; + + /* Walked onto the start of the next routine? If so, bail out of this function. */ + if (theInstr == FBT_PUSH_RBP) + continue; + + if (!(size == 1 && (theInstr == FBT_POP_RBP || theInstr == FBT_LEAVE))) { + instr += size; + goto again; + } + + /* + * Found the pop %rbp; or leave. + */ + machine_inst_t *patch_instr = instr; + + /* + * Scan forward for a "ret", or "jmp". + */ + instr += size; + if (instr >= limit) + continue; + + size = dtrace_instr_size(instr); + if (size <= 0) /* Failed instruction decode? */ + continue; + + theInstr = instr[0]; + + if (!(size == FBT_RET_LEN && (theInstr == FBT_RET)) && + !(size == FBT_RET_IMM16_LEN && (theInstr == FBT_RET_IMM16)) && + !(size == FBT_JMP_SHORT_REL_LEN && (theInstr == FBT_JMP_SHORT_REL)) && + !(size == FBT_JMP_NEAR_REL_LEN && (theInstr == FBT_JMP_NEAR_REL)) && + !(size == FBT_JMP_FAR_ABS_LEN && (theInstr == FBT_JMP_FAR_ABS))) + continue; + + /* + * pop %rbp; ret; or leave; ret; or leave; jmp tailCalledFun; -- We have a winner! + */ + fbt = kmem_zalloc(sizeof (fbt_probe_t), KM_SLEEP); + strlcpy( (char *)&(fbt->fbtp_name), name, MAX_FBTP_NAME_CHARS ); + + if (retfbt == NULL) { + fbt->fbtp_id = dtrace_probe_create(fbt_id, modname, + name, FBT_RETURN, FBT_AFRAMES_RETURN, fbt); + } else { + retfbt->fbtp_next = fbt; + fbt->fbtp_id = retfbt->fbtp_id; + } + + retfbt = fbt; + fbt->fbtp_patchpoint = patch_instr; + fbt->fbtp_ctl = ctl; + fbt->fbtp_loadcnt = ctl->mod_loadcnt; + + if (*patch_instr == FBT_POP_RBP) { + fbt->fbtp_rval = DTRACE_INVOP_POP_RBP; + } else { + ASSERT(*patch_instr == FBT_LEAVE); + fbt->fbtp_rval = DTRACE_INVOP_LEAVE; + } + fbt->fbtp_roffset = + (uintptr_t)(patch_instr - (uint8_t *)sym[i].n_value); + + fbt->fbtp_savedval = *patch_instr; + fbt->fbtp_patchval = FBT_PATCHVAL; + fbt->fbtp_hashnext = fbt_probetab[FBT_ADDR2NDX(patch_instr)]; + fbt->fbtp_symndx = i; + fbt_probetab[FBT_ADDR2NDX(patch_instr)] = fbt; + + instr += size; + goto again; + } +} +#else +#error Unknown arch +#endif extern struct modctl g_fbt_kernctl; #undef kmem_alloc /* from its binding to dt_kmem_alloc glue */ @@ -812,7 +1410,8 @@ fbt_provide_module(void *arg, struct modctl *ctl) #pragma unused(ctl) __fbt_provide_module(arg, &g_fbt_kernctl); - kmem_free(kernel_map, (vm_offset_t)g_fbt_kernctl.address, round_page_32(g_fbt_kernctl.size)); + if ( (vm_offset_t)g_fbt_kernctl.address != (vm_offset_t )NULL ) + kmem_free(kernel_map, (vm_offset_t)g_fbt_kernctl.address, round_page(g_fbt_kernctl.size)); g_fbt_kernctl.address = 0; g_fbt_kernctl.size = 0; } diff --git a/bsd/dev/i386/kern_machdep.c b/bsd/dev/i386/kern_machdep.c index bb9851e2e..c77af1d8b 100644 --- a/bsd/dev/i386/kern_machdep.c +++ b/bsd/dev/i386/kern_machdep.c @@ -40,6 +40,8 @@ #include #include +extern int bootarg_no64exec; /* bsd_init.c */ + /********************************************************************** * Routine: grade_binary() * @@ -56,7 +58,7 @@ grade_binary(cpu_type_t exectype, __unused cpu_subtype_t execsubtype) case CPU_TYPE_POWERPC: /* via translator */ return 1; case CPU_TYPE_X86_64: /* native 64-bit */ - return ml_is64bit() ? 2 : 0; + return ((ml_is64bit() && !bootarg_no64exec) ? 2 : 0); default: /* all other binary types */ return 0; } diff --git a/bsd/dev/i386/km.c b/bsd/dev/i386/km.c index d5ecffc29..d276b6d95 100644 --- a/bsd/dev/i386/km.c +++ b/bsd/dev/i386/km.c @@ -35,7 +35,7 @@ #include #include -#include +#include #include #include #include @@ -54,20 +54,12 @@ extern void cnputcusr(char); extern int cngetc(void); void kminit(void); -int kmopen(dev_t dev, int flag, int devtype, struct proc *pp); -int kmclose(dev_t dev, int flag, int mode, struct proc *p); -int kmread(dev_t dev, struct uio *uio, int ioflag); -int kmwrite(dev_t dev, struct uio *uio, int ioflag); -int kmioctl(dev_t dev, int cmd, caddr_t data, int flag, struct proc *p); -int kmputc(int c); -int kmgetc(dev_t dev); -int kmgetc_silent(dev_t dev); void cons_cinput(char ch); /* * 'Global' variables, shared only by this file and conf.c. */ -struct tty *km_tty[1] = { &cons }; +struct tty *km_tty[1] = { 0 }; /* * this works early on, after initialize_screen() but before autoconf (and thus @@ -88,18 +80,16 @@ extern void KeyboardOpen(void); void kminit(void) { - cons.t_dev = makedev(12, 0); + km_tty[0] = ttymalloc(); + km_tty[0]->t_dev = makedev(12, 0); initialized = 1; } + /* * cdevsw interface to km driver. */ int -kmopen( - dev_t dev, - int flag, - __unused int devtype, - struct proc *pp) +kmopen(dev_t dev, int flag, __unused int devtype, proc_t pp) { int unit; struct tty *tp; @@ -110,7 +100,10 @@ kmopen( if(unit >= 1) return (ENXIO); - tp = (struct tty *)&cons; + tp = km_tty[unit]; + + tty_lock(tp); + tp->t_oproc = kmstart; tp->t_param = NULL; tp->t_dev = dev; @@ -123,29 +116,35 @@ kmopen( tp->t_ispeed = tp->t_ospeed = TTYDEF_SPEED; termioschars(&tp->t_termios); ttsetwater(tp); - } else if ((tp->t_state & TS_XCLUDE) && proc_suser(pp)) - return EBUSY; + } else if ((tp->t_state & TS_XCLUDE) && proc_suser(pp)) { + ret = EBUSY; + goto out; + } tp->t_state |= TS_CARR_ON; /* lie and say carrier exists and is on. */ + ret = ((*linesw[tp->t_line].l_open)(dev, tp)); { PE_Video video; wp = &tp->t_winsize; - /* Magic numbers. These are CHARWIDTH and CHARHEIGHT + /* + * Magic numbers. These are CHARWIDTH and CHARHEIGHT * from pexpert/i386/video_console.c */ wp->ws_xpixel = 8; wp->ws_ypixel = 16; + tty_unlock(tp); /* XXX race window */ + if (flag & O_POPUP) PE_initialize_console(0, kPETextScreen); bzero(&video, sizeof(video)); PE_current_console(&video); - if( video.v_display == VGA_TEXT_MODE ) { - wp->ws_col = video.v_width; - wp->ws_row = video.v_height; - } else if( video.v_width != 0 && video.v_height != 0 ) { + + tty_lock(tp); + + if( video.v_display == FB_TEXT_MODE && video.v_width != 0 && video.v_height != 0 ) { wp->ws_col = video.v_width / wp->ws_xpixel; wp->ws_row = video.v_height / wp->ws_ypixel; } else { @@ -153,149 +152,148 @@ kmopen( wp->ws_row = 36; } } + +out: + tty_unlock(tp); + return ret; } int -kmclose( - __unused dev_t dev, - int flag, - __unused int mode, - __unused struct proc *p) +kmclose(dev_t dev, int flag, __unused int mode, __unused proc_t p) { - - struct tty *tp; + int ret; + struct tty *tp = km_tty[minor(dev)]; - tp = &cons; - (*linesw[tp->t_line].l_close)(tp,flag); + tty_lock(tp); + ret = (*linesw[tp->t_line].l_close)(tp,flag); ttyclose(tp); - return (0); + tty_unlock(tp); + + return (ret); } int -kmread( - __unused dev_t dev, - struct uio *uio, - int ioflag) +kmread(dev_t dev, struct uio *uio, int ioflag) { - register struct tty *tp; - - tp = &cons; - return ((*linesw[tp->t_line].l_read)(tp, uio, ioflag)); + int ret; + struct tty *tp = km_tty[minor(dev)]; + + tty_lock(tp); + ret = (*linesw[tp->t_line].l_read)(tp, uio, ioflag); + tty_unlock(tp); + + return (ret); } int -kmwrite( - __unused dev_t dev, - struct uio *uio, - int ioflag) +kmwrite(dev_t dev, struct uio *uio, int ioflag) { - register struct tty *tp; - - tp = &cons; - return ((*linesw[tp->t_line].l_write)(tp, uio, ioflag)); + int ret; + struct tty *tp = km_tty[minor(dev)]; + + tty_lock(tp); + ret = (*linesw[tp->t_line].l_write)(tp, uio, ioflag); + tty_unlock(tp); + + return (ret); } int -kmioctl( - __unused dev_t dev, - int cmd, - caddr_t data, - int flag, - struct proc *p) +kmioctl(dev_t dev, u_long cmd, caddr_t data, int flag, proc_t p) { - int error; - struct tty *tp = &cons; + int error = 0; + struct tty *tp = km_tty[minor(dev)]; struct winsize *wp; + + tty_lock(tp); switch (cmd) { - - - case KMIOCSIZE: wp = (struct winsize *)data; *wp = tp->t_winsize; - return 0; + break; case TIOCSWINSZ: /* Prevent changing of console size -- * this ensures that login doesn't revert to the * termcap-defined size */ - return EINVAL; + error = EINVAL; + break; /* Bodge in the CLOCAL flag as the km device is always local */ - case TIOCSETA: - case TIOCSETAW: - case TIOCSETAF: { - register struct termios *t = (struct termios *)data; - t->c_cflag |= CLOCAL; - /* No Break */ - } + case TIOCSETA_32: + case TIOCSETAW_32: + case TIOCSETAF_32: + { + struct termios32 *t = (struct termios32 *)data; + t->c_cflag |= CLOCAL; + /* No Break */ + } + goto fallthrough; + case TIOCSETA_64: + case TIOCSETAW_64: + case TIOCSETAF_64: + { + struct user_termios *t = (struct user_termios *)data; + t->c_cflag |= CLOCAL; + /* No Break */ + } +fallthrough: default: error = (*linesw[tp->t_line].l_ioctl)(tp, cmd, data, flag, p); if (ENOTTY != error) - return error; - return ttioctl (tp, cmd, data, flag, p); + break; + error = ttioctl_locked(tp, cmd, data, flag, p); + break; } -} - -int -kmputc( - int c) -{ - if( disableConsoleOutput) - return( 0); + tty_unlock(tp); - if(!initialized) - return( 0); - - if(c == '\n') - cnputcusr('\r'); - - cnputcusr(c); - - return 0; + return (error); } +/* + * kmputc + * + * Output a character to the serial console driver via cnputcusr(), + * which is exported by that driver. + * + * Locks: Assumes tp in the calling tty driver code is locked on + * entry, remains locked on exit + * + * Notes: Called from kmoutput(); giving the locking output + * assumptions here, this routine should be static (and + * inlined, given there is only one call site). + */ int -kmgetc( - __unused dev_t dev) +kmputc(__unused dev_t dev, char c) { - int c; - - c= cngetc(); - - if (c == '\r') { - c = '\n'; + if(!disableConsoleOutput && initialized) { + /* OCRNL */ + if(c == '\n') + cnputcusr('\r'); + cnputcusr(c); } - cnputcusr(c); - return c; -} -int -kmgetc_silent( - __unused dev_t dev) -{ - int c; - - c= cngetc(); - if (c == '\r') { - c = '\n'; - } - return c; + return (0); } + /* * Callouts from linesw. */ #define KM_LOWAT_DELAY ((ns_time_t)1000) +/* + * t_oproc for this driver; called from within the line discipline + * + * Locks: Assumes tp is locked on entry, remains locked on exit + */ static void -kmstart( - struct tty *tp) +kmstart(struct tty *tp) { if (tp->t_state & (TS_TIMEOUT | TS_BUSY | TS_TTSTOP)) goto out; @@ -310,54 +308,95 @@ kmstart( return; } +/* + * One-shot output retry timeout from kmoutput(); re-calls kmoutput() at + * intervals until the output queue for the tty is empty, at which point + * the timeout is not rescheduled by kmoutput() + * + * This function must take the tty_lock() around the kmoutput() call; it + * ignores the return value. + */ static void kmtimeout(void *arg) { - boolean_t funnel_state; - struct tty *tp = (struct tty *) arg; - - funnel_state = thread_funnel_set(kernel_flock, TRUE); - kmoutput(tp); - (void) thread_funnel_set(kernel_flock, funnel_state); - + struct tty *tp = (struct tty *)arg; + tty_lock(tp); + (void)kmoutput(tp); + tty_unlock(tp); } + +/* + * kmoutput + * + * Locks: Assumes tp is locked on entry, remains locked on exit + * + * Notes: Called from kmstart() and kmtimeout(); kmtimeout() is a + * timer initiated by this routine to deal with pending + * output not yet flushed (output is flushed at a maximum + * of sizeof(buf) charatcers at a time before dropping into + * the timeout code). + */ static int -kmoutput( - struct tty *tp) +kmoutput(struct tty *tp) { - /* - * FIXME - to be grokked...copied from m68k km.c. - */ - char buf[80]; - char *cp; + unsigned char buf[80]; /* buffer; limits output per call */ + unsigned char *cp; int cc = -1; + /* While there is data available to be output... */ while (tp->t_outq.c_cc > 0) { cc = ndqb(&tp->t_outq, 0); if (cc == 0) break; - cc = min(cc, sizeof buf); + /* + * attempt to output as many characters as are available, + * up to the available transfer buffer size. + */ + cc = min(cc, sizeof(buf)); + /* copy the output queue contents to the buffer */ (void) q_to_b(&tp->t_outq, buf, cc); for (cp = buf; cp < &buf[cc]; cp++) { - kmputc(*cp & 0x7f); + /* output the buffer one charatcer at a time */ + kmputc(tp->t_dev, *cp & 0x7f); } } + /* + * XXX This is likely not necessary, as the tty output queue is not + * XXX writeable while we hold the tty_lock(). + */ if (tp->t_outq.c_cc > 0) { timeout(kmtimeout, tp, hz); } tp->t_state &= ~TS_BUSY; + /* Start the output processing for the line discipline */ (*linesw[tp->t_line].l_start)(tp); return 0; } + +/* + * cons_cinput + * + * Driver character input from the polled mode serial console driver calls + * this routine to input a character from the serial driver into the tty + * line discipline specific input processing receiv interrupt routine, + * l_rint(). + * + * Locks: Assumes that the tty_lock() is NOT held on the tp, so a + * serial driver should NOT call this function as a result + * of being called from a function which already holds the + * lock; ECHOE will be handled at the line discipline, if + * output echo processing is going to occur. + */ void cons_cinput(char ch) { - struct tty *tp = &cons; - + struct tty *tp = km_tty[0]; /* XXX */ + + tty_lock(tp); (*linesw[tp->t_line].l_rint) (ch, tp); + tty_unlock(tp); } - diff --git a/bsd/dev/i386/mem.c b/bsd/dev/i386/mem.c index 9f4eeddde..e598cdf67 100644 --- a/bsd/dev/i386/mem.c +++ b/bsd/dev/i386/mem.c @@ -77,6 +77,7 @@ #include #include #include +#include #include #include #include @@ -142,13 +143,8 @@ mmrw(dev_t dev, struct uio *uio, enum uio_rw rw) while (uio_resid(uio) > 0 && error == 0) { - if (uio_iov_len(uio) == 0) { - uio_next_iov(uio); - uio->uio_iovcnt--; - if (uio->uio_iovcnt < 0) - panic("mmrw"); - continue; - } + uio_update(uio, 0); + switch (minor(dev)) { /* minor device 0 is physical memory */ @@ -157,7 +153,7 @@ mmrw(dev_t dev, struct uio *uio, enum uio_rw rw) return(ENODEV); v = trunc_page(uio->uio_offset); - if (uio->uio_offset >= mem_size) + if (uio->uio_offset >= (off_t)mem_size) goto fault; size= PAGE_SIZE; @@ -166,8 +162,7 @@ mmrw(dev_t dev, struct uio *uio, enum uio_rw rw) goto fault; } o = uio->uio_offset - v; - // LP64todo - fix this! - c = min(PAGE_SIZE - o, (u_int)uio_iov_len(uio)); + c = min(PAGE_SIZE - o, uio_curriovlen(uio)); error = uiomove((caddr_t) (where + o), c, uio); kmem_free(kernel_map, where, PAGE_SIZE); continue; @@ -178,9 +173,9 @@ mmrw(dev_t dev, struct uio *uio, enum uio_rw rw) return(ENODEV); /* Do some sanity checking */ if (((vm_address_t)uio->uio_offset >= VM_MAX_KERNEL_ADDRESS) || - ((vm_address_t)uio->uio_offset <= VM_MIN_KERNEL_ADDRESS)) + ((vm_address_t)uio->uio_offset <= VM_MIN_KERNEL_AND_KEXT_ADDRESS)) goto fault; - c = uio_iov_len(uio); + c = uio_curriovlen(uio); if (!kernacc(uio->uio_offset, c)) goto fault; error = uiomove((caddr_t)(uintptr_t)uio->uio_offset, @@ -191,7 +186,7 @@ mmrw(dev_t dev, struct uio *uio, enum uio_rw rw) case 2: if (rw == UIO_READ) return (0); - c = uio_iov_len(uio); + c = uio_curriovlen(uio); break; case 3: if(devzerobuf == NULL) { @@ -199,11 +194,10 @@ mmrw(dev_t dev, struct uio *uio, enum uio_rw rw) bzero(devzerobuf, PAGE_SIZE); } if(uio->uio_rw == UIO_WRITE) { - c = uio_iov_len(uio); + c = uio_curriovlen(uio); break; } - // LP64todo - fix this! - c = min(uio_iov_len(uio), PAGE_SIZE); + c = min(uio_curriovlen(uio), PAGE_SIZE); error = uiomove(devzerobuf, (int)c, uio); continue; default: @@ -213,10 +207,7 @@ mmrw(dev_t dev, struct uio *uio, enum uio_rw rw) if (error) break; - uio_iov_base_add(uio, c); - uio_iov_len_add(uio, -((int)c)); - uio->uio_offset += c; - uio_setresid(uio, (uio_resid(uio) - c)); + uio_update(uio, c); } return (error); fault: diff --git a/bsd/dev/i386/munge.s b/bsd/dev/i386/munge.s index 18a3dc4bb..d174c06e3 100644 --- a/bsd/dev/i386/munge.s +++ b/bsd/dev/i386/munge.s @@ -150,18 +150,36 @@ ENTRY(munge_wlw) movl %edx,4(%ecx) ret +Entry(munge_wwwlw) + movl 8(%esp),%ecx // get &uu_args + xorl %edx,%edx + movl 20(%ecx),%eax + movl %eax,32(%ecx) + movl %edx,36(%ecx) + jmp Lwwwl + ENTRY(munge_wwwl) movl 8(%esp),%ecx // get &uu_args xorl %edx,%edx +Lwwwl: movl 12(%ecx),%eax movl %eax,24(%ecx) movl 16(%ecx),%eax movl %eax,28(%ecx) jmp Lw3 +ENTRY(munge_wwwwlw) + movl 8(%esp),%ecx // get &uu_args + xorl %edx,%edx + movl 24(%ecx),%eax + movl %eax,40(%ecx) + movl %edx,44(%ecx) + jmp Lwwwwl + ENTRY(munge_wwwwl) movl 8(%esp),%ecx // get &uu_args xorl %edx,%edx +Lwwwwl: movl 16(%ecx),%eax movl %eax,32(%ecx) movl 20(%ecx),%eax @@ -177,6 +195,32 @@ ENTRY(munge_wwwwwl) movl %eax,44(%ecx) jmp Lw5 +ENTRY(munge_wwwwwwlw) + movl 8(%esp),%ecx // get &uu_args + xorl %edx,%edx + movl 32(%ecx),%eax + movl %eax,56(%ecx) + movl %edx,60(%ecx) + movl 24(%ecx),%eax + movl %eax,48(%ecx) + movl 28(%ecx),%eax + movl %eax,52(%ecx) + jmp Lw6 + + +ENTRY(munge_wwwwwwll) + movl 8(%esp),%ecx // get &uu_args + xorl %edx,%edx + movl 32(%ecx),%eax + movl %eax,56(%ecx) + movl 36(%ecx),%eax + movl %eax,60(%ecx) + movl 24(%ecx),%eax + movl %eax,48(%ecx) + movl 28(%ecx),%eax + movl %eax,52(%ecx) + jmp Lw6 + ENTRY(munge_wsw) movl 8(%esp),%ecx // get &uu_args movl 8(%ecx),%eax @@ -209,3 +253,8 @@ ENTRY(munge_wwwsw) movl %edx,28(%ecx) xorl %edx,%edx jmp Lw3 + +ENTRY(munge_llllll) + ret // actually, this method has nothing to do - all + // arguments are already 64-bits, with no mixing of + // args that need sign/zero extension diff --git a/bsd/dev/i386/pci_device.h b/bsd/dev/i386/pci_device.h deleted file mode 100644 index 6bb2dcd78..000000000 --- a/bsd/dev/i386/pci_device.h +++ /dev/null @@ -1,130 +0,0 @@ -/* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ - * - * This file contains Original Code and/or Modifications of Original Code - * as defined in and that are subject to the Apple Public Source License - * Version 2.0 (the 'License'). You may not use this file except in - * compliance with the License. The rights granted to you under the License - * may not be used to create, or enable the creation or redistribution of, - * unlawful or unlicensed copies of an Apple operating system, or to - * circumvent, violate, or enable the circumvention or violation of, any - * terms of an Apple operating system software license agreement. - * - * Please obtain a copy of the License at - * http://www.opensource.apple.com/apsl/ and read it before using this file. - * - * The Original Code and all software distributed under the License are - * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER - * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, - * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. - * Please see the License for the specific language governing rights and - * limitations under the License. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ - */ -/* - * @OSF_FREE_COPYRIGHT@ - * - */ -/* - * HISTORY - * - * Revision 1.2 1998/09/30 21:20:44 wsanchez - * Merged in IntelMerge1 (mburg: Intel support) - * - * Revision 1.1.2.1 1998/09/30 18:18:50 mburg - * Changes for Intel port - * - * Revision 1.1.1.1 1998/03/07 02:25:45 wsanchez - * Import of OSF Mach kernel (~mburg) - * - * Revision 1.1.6.2 1995/12/15 10:52:14 bernadat - * Split dev and vendor ids. - * [95/11/15 bernadat] - * - * Revision 1.1.6.1 1995/02/23 17:22:27 alanl - * Taken from DIPC2_SHARED - * [1995/01/03 19:09:31 alanl] - * - * Revision 1.1.2.1 1994/10/11 18:24:42 rwd - * Created. - * [1994/10/11 18:15:31 rwd] - * - * $EndLog$ - */ -/* - * Taken from - * - * Copyright (c) 1994 Wolfgang Stanglmeier, Koeln, Germany - * - */ - -#ifndef __PCI_DEVICE_H__ -#define __PCI_DEVICE_H__ - -/*------------------------------------------------------------ - * - * Per driver structure. - * - *------------------------------------------------------------ -*/ - -typedef unsigned short pci_vendor_id_t; -typedef unsigned short pci_dev_id_t; - -typedef union { - unsigned long cfg1; - struct { - unsigned char enable; - unsigned char forward; - unsigned short port; - } cfg2; - } pcici_t; - -struct pci_driver { - int (*probe )(pcici_t pci_ident); /* test whether device - is present */ - int (*attach)(pcici_t pci_ident); /* setup driver for a - device */ - pci_vendor_id_t vendor_id; /* vendor pci id */ - pci_dev_id_t device_id; /* device pci id */ - char *name; /* device name */ - char *vendor; /* device long name */ - void (*intr)(int); /* interupt handler */ -}; - -/*----------------------------------------------------------- - * - * Per device structure. - * - * It is initialized by the config utility and should live in - * "ioconf.c". At the moment there is only one field. - * - * This is a first attempt to include the pci bus to 386bsd. - * So this structure may grow .. - * - *----------------------------------------------------------- -*/ - -struct pci_device { - struct pci_driver * pd_driver; -}; - -/*----------------------------------------------------------- - * - * This functions may be used by drivers to map devices - * to virtual and physical addresses. The va and pa - * addresses are "in/out" parameters. If they are 0 - * on entry, the mapping function assigns an address. - * - *----------------------------------------------------------- -*/ - -int pci_map_mem(pcici_t tag, - unsigned long entry, - vm_offset_t *va, - vm_offset_t *pa); -#endif /*__PCI_DEVICE_H__*/ diff --git a/bsd/dev/i386/sdt_x86.c b/bsd/dev/i386/sdt_x86.c index eb1c2ecff..c354b303e 100644 --- a/bsd/dev/i386/sdt_x86.c +++ b/bsd/dev/i386/sdt_x86.c @@ -19,11 +19,11 @@ * CDDL HEADER END */ /* - * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Copyright 2008 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ -/* #pragma ident "@(#)sdt.c 1.6 06/03/24 SMI" */ +/* #pragma ident "@(#)sdt.c 1.9 08/07/01 SMI" */ #ifdef KERNEL #ifndef _KERNEL @@ -46,6 +46,7 @@ extern sdt_probe_t **sdt_probetab; +#if defined(__i386__) /*ARGSUSED*/ int sdt_invop(uintptr_t addr, uintptr_t *stack, uintptr_t eax) @@ -60,7 +61,7 @@ sdt_invop(uintptr_t addr, uintptr_t *stack, uintptr_t eax) if (CPU_ON_INTR(CPU)) stacktop = (uintptr_t *)dtrace_get_cpu_int_stack_top(); else - stacktop = (uintptr_t *)(dtrace_get_kernel_stack(current_thread()) + KERNEL_STACK_SIZE); + stacktop = (uintptr_t *)(dtrace_get_kernel_stack(current_thread()) + kernel_stack_size); if (stack <= stacktop) stack0 = *stack++; @@ -81,4 +82,28 @@ sdt_invop(uintptr_t addr, uintptr_t *stack, uintptr_t eax) return (0); } +#elif defined(__x86_64__) +/*ARGSUSED*/ +int +sdt_invop(uintptr_t addr, uintptr_t *stack, uintptr_t eax) +{ +#pragma unused(eax) + sdt_probe_t *sdt = sdt_probetab[SDT_ADDR2NDX(addr)]; + + for (; sdt != NULL; sdt = sdt->sdp_hashnext) { + if ((uintptr_t)sdt->sdp_patchpoint == addr) { + x86_saved_state64_t *regs = (x86_saved_state64_t *)stack; + + dtrace_probe(sdt->sdp_id, regs->rdi, regs->rsi, regs->rdx, regs->rcx, regs->r8); + + return (DTRACE_INVOP_NOP); + } + } + + return (0); +} +#else +#error Unknown arch +#endif + diff --git a/bsd/dev/i386/sel_inline.h b/bsd/dev/i386/sel_inline.h deleted file mode 100644 index 39ab9b69f..000000000 --- a/bsd/dev/i386/sel_inline.h +++ /dev/null @@ -1,145 +0,0 @@ -/* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ - * - * This file contains Original Code and/or Modifications of Original Code - * as defined in and that are subject to the Apple Public Source License - * Version 2.0 (the 'License'). You may not use this file except in - * compliance with the License. The rights granted to you under the License - * may not be used to create, or enable the creation or redistribution of, - * unlawful or unlicensed copies of an Apple operating system, or to - * circumvent, violate, or enable the circumvention or violation of, any - * terms of an Apple operating system software license agreement. - * - * Please obtain a copy of the License at - * http://www.opensource.apple.com/apsl/ and read it before using this file. - * - * The Original Code and all software distributed under the License are - * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER - * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, - * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. - * Please see the License for the specific language governing rights and - * limitations under the License. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ - */ -/* - * Copyright (c) 1992 NeXT Computer, Inc. - * - * Selector value conversion/validation. - * - * HISTORY - * - * 19 June 1992 ? at NeXT - * Created. - */ - - -static inline -unsigned int -sel_to_selector( - sel_t sel -) -{ - union { - sel_t sel; - unsigned short selector; - } tconv; - - tconv.sel = sel; - - return (tconv.selector); -} - -static inline -sel_t -selector_to_sel( - unsigned int selector -) -{ - union { - unsigned short selector; - sel_t sel; - } tconv; - - tconv.selector = selector; - - return (tconv.sel); -} - -#if 0 -static inline -boolean_t -valid_user_data_selector( - unsigned int selector -) -{ - sel_t sel = selector_to_sel(selector); - - if (selector == 0) - return (TRUE); - - if (sel.ti == SEL_LDT) - return (TRUE); - else if (sel.index < GDTSZ) { - data_desc_t *desc = (data_desc_t *)sel_to_gdt_entry(sel); - - if (desc->dpl == USER_PRIV) - return (TRUE); - } - - return (FALSE); -} - -static inline -boolean_t -valid_user_code_selector( - unsigned int selector -) -{ - sel_t sel = selector_to_sel(selector); - - if (selector == 0) - return (FALSE); - - if (sel.ti == SEL_LDT) { - if (sel.rpl == USER_PRIV) - return (TRUE); - } - else if (sel.index < GDTSZ && sel.rpl == USER_PRIV) { - code_desc_t *desc = (code_desc_t *)sel_to_gdt_entry(sel); - - if (desc->dpl == USER_PRIV) - return (TRUE); - } - - return (FALSE); -} - -static inline -boolean_t -valid_user_stack_selector( - unsigned int selector -) -{ - sel_t sel = selector_to_sel(selector); - - if (selector == 0) - return (FALSE); - - if (sel.ti == SEL_LDT) { - if (sel.rpl == USER_PRIV) - return (TRUE); - } - else if (sel.index < GDTSZ && sel.rpl == USER_PRIV) { - data_desc_t *desc = (data_desc_t *)sel_to_gdt_entry(sel); - - if (desc->dpl == USER_PRIV) - return (TRUE); - } - - return (FALSE); -} -#endif diff --git a/bsd/dev/i386/sysctl.c b/bsd/dev/i386/sysctl.c index 3cb481c40..13d4355b9 100644 --- a/bsd/dev/i386/sysctl.c +++ b/bsd/dev/i386/sysctl.c @@ -38,11 +38,11 @@ hw_cpu_sysctl SYSCTL_HANDLER_ARGS { __unused struct sysctl_oid *unused_oidp = oidp; i386_cpu_info_t *cpu_info = cpuid_info(); - void *ptr = (uint8_t *)cpu_info + (uint32_t)arg1; + void *ptr = (uint8_t *)cpu_info + (uintptr_t)arg1; int value; if (arg2 == -1) { - ptr = *(char **)ptr; + ptr = *(void **)ptr; arg2 = 0; } @@ -58,6 +58,19 @@ hw_cpu_sysctl SYSCTL_HANDLER_ARGS return SYSCTL_OUT(req, ptr, arg2 ? (size_t) arg2 : strlen((char *)ptr)+1); } +static int +hw_cpu_sysctl_nonzero SYSCTL_HANDLER_ARGS +{ + i386_cpu_info_t *cpu_info = cpuid_info(); + void *ptr = (uint8_t *)cpu_info + (uintptr_t)arg1; + int value = *(uint32_t *)ptr; + + if (value == 0) + return ENOENT; + + return hw_cpu_sysctl(oidp, arg1, arg2, req); +} + static int hw_cpu_features SYSCTL_HANDLER_ARGS { @@ -101,17 +114,6 @@ hw_cpu_logical_per_package SYSCTL_HANDLER_ARGS sizeof(cpu_info->cpuid_logical_per_package)); } -static int -hw_cpu_sysctl_nehalem SYSCTL_HANDLER_ARGS -{ - i386_cpu_info_t *cpu_info = cpuid_info(); - - if (cpu_info->cpuid_model != 26) - return ENOENT; - - hw_cpu_sysctl(oidp, arg1, arg2, req); -} - static int hw_cpu_flex_ratio_desired SYSCTL_HANDLER_ARGS { @@ -157,6 +159,14 @@ hw_cpu_flex_ratio_max SYSCTL_HANDLER_ARGS SYSCTL_NODE(_machdep, OID_AUTO, cpu, CTLFLAG_RW|CTLFLAG_LOCKED, 0, "CPU info"); +SYSCTL_PROC(_machdep_cpu, OID_AUTO, max_basic, CTLTYPE_INT | CTLFLAG_RD, + (void *)offsetof(i386_cpu_info_t, cpuid_max_basic),sizeof(uint32_t), + hw_cpu_sysctl, "IU", "Max Basic Information value"); + +SYSCTL_PROC(_machdep_cpu, OID_AUTO, max_ext, CTLTYPE_INT | CTLFLAG_RD, + (void *)offsetof(i386_cpu_info_t, cpuid_max_ext), sizeof(uint32_t), + hw_cpu_sysctl, "IU", "Max Extended Function Information value"); + SYSCTL_PROC(_machdep_cpu, OID_AUTO, vendor, CTLTYPE_STRING | CTLFLAG_RD, (void *)offsetof(i386_cpu_info_t, cpuid_vendor), 0, hw_cpu_sysctl, "A", "CPU vendor"); @@ -187,11 +197,11 @@ SYSCTL_PROC(_machdep_cpu, OID_AUTO, stepping, CTLTYPE_INT | CTLFLAG_RD, SYSCTL_PROC(_machdep_cpu, OID_AUTO, feature_bits, CTLTYPE_QUAD | CTLFLAG_RD, (void *)offsetof(i386_cpu_info_t, cpuid_features), sizeof(uint64_t), - hw_cpu_sysctl, "I", "CPU features"); + hw_cpu_sysctl, "IU", "CPU features"); SYSCTL_PROC(_machdep_cpu, OID_AUTO, extfeature_bits, CTLTYPE_QUAD | CTLFLAG_RD, (void *)offsetof(i386_cpu_info_t, cpuid_extfeatures), sizeof(uint64_t), - hw_cpu_sysctl, "I", "CPU extended features"); + hw_cpu_sysctl, "IU", "CPU extended features"); SYSCTL_PROC(_machdep_cpu, OID_AUTO, signature, CTLTYPE_INT | CTLFLAG_RD, (void *)offsetof(i386_cpu_info_t, cpuid_signature), sizeof(uint32_t), @@ -353,30 +363,65 @@ SYSCTL_PROC(_machdep_cpu_cache, OID_AUTO, size, SYSCTL_NODE(_machdep_cpu, OID_AUTO, tlb, CTLFLAG_RW|CTLFLAG_LOCKED, 0, "tlb"); +SYSCTL_NODE(_machdep_cpu_tlb, OID_AUTO, inst, CTLFLAG_RW|CTLFLAG_LOCKED, 0, + "inst"); +SYSCTL_NODE(_machdep_cpu_tlb, OID_AUTO, data, CTLFLAG_RW|CTLFLAG_LOCKED, 0, + "data"); + +SYSCTL_PROC(_machdep_cpu_tlb_inst, OID_AUTO, small, + CTLTYPE_INT | CTLFLAG_RD, + (void *)offsetof(i386_cpu_info_t, + cpuid_tlb[TLB_INST][TLB_SMALL][0]), + sizeof(uint32_t), + hw_cpu_sysctl_nonzero, "I", + "Number of small page instruction TLBs"); -SYSCTL_PROC(_machdep_cpu_tlb, OID_AUTO, inst_small, +SYSCTL_PROC(_machdep_cpu_tlb_data, OID_AUTO, small, CTLTYPE_INT | CTLFLAG_RD, - (void *)offsetof(i386_cpu_info_t, cpuid_itlb_small), + (void *)offsetof(i386_cpu_info_t, + cpuid_tlb[TLB_DATA][TLB_SMALL][0]), sizeof(uint32_t), - hw_cpu_sysctl, "I", "Number of small page instruction TLBs"); + hw_cpu_sysctl_nonzero, "I", + "Number of small page data TLBs (1st level)"); -SYSCTL_PROC(_machdep_cpu_tlb, OID_AUTO, data_small, +SYSCTL_PROC(_machdep_cpu_tlb_data, OID_AUTO, small_level1, CTLTYPE_INT | CTLFLAG_RD, - (void *)offsetof(i386_cpu_info_t, cpuid_dtlb_small), + (void *)offsetof(i386_cpu_info_t, + cpuid_tlb[TLB_DATA][TLB_SMALL][1]), sizeof(uint32_t), - hw_cpu_sysctl, "I", "Number of small page data TLBs"); + hw_cpu_sysctl_nonzero, "I", + "Number of small page data TLBs (2nd level)"); -SYSCTL_PROC(_machdep_cpu_tlb, OID_AUTO, inst_large, +SYSCTL_PROC(_machdep_cpu_tlb_inst, OID_AUTO, large, CTLTYPE_INT | CTLFLAG_RD, - (void *)offsetof(i386_cpu_info_t, cpuid_itlb_large), + (void *)offsetof(i386_cpu_info_t, + cpuid_tlb[TLB_INST][TLB_LARGE][0]), sizeof(uint32_t), - hw_cpu_sysctl, "I", "Number of large page instruction TLBs"); + hw_cpu_sysctl_nonzero, "I", + "Number of large page instruction TLBs"); -SYSCTL_PROC(_machdep_cpu_tlb, OID_AUTO, data_large, +SYSCTL_PROC(_machdep_cpu_tlb_data, OID_AUTO, large, CTLTYPE_INT | CTLFLAG_RD, - (void *)offsetof(i386_cpu_info_t, cpuid_dtlb_large), + (void *)offsetof(i386_cpu_info_t, + cpuid_tlb[TLB_DATA][TLB_LARGE][0]), sizeof(uint32_t), - hw_cpu_sysctl, "I", "Number of large page data TLBs"); + hw_cpu_sysctl_nonzero, "I", + "Number of large page data TLBs (1st level)"); + +SYSCTL_PROC(_machdep_cpu_tlb_data, OID_AUTO, large_level1, + CTLTYPE_INT | CTLFLAG_RD, + (void *)offsetof(i386_cpu_info_t, + cpuid_tlb[TLB_DATA][TLB_LARGE][1]), + sizeof(uint32_t), + hw_cpu_sysctl_nonzero, "I", + "Number of large page data TLBs (2nd level)"); + +SYSCTL_PROC(_machdep_cpu_tlb, OID_AUTO, shared, + CTLTYPE_INT | CTLFLAG_RD, + (void *)offsetof(i386_cpu_info_t, cpuid_stlb), + sizeof(uint32_t), + hw_cpu_sysctl_nonzero, "I", + "Number of shared TLBs"); SYSCTL_NODE(_machdep_cpu, OID_AUTO, address_bits, CTLFLAG_RW|CTLFLAG_LOCKED, 0, @@ -394,6 +439,7 @@ SYSCTL_PROC(_machdep_cpu_address_bits, OID_AUTO, virtual, sizeof(uint32_t), hw_cpu_sysctl, "I", "Number of virtual address bits"); + SYSCTL_PROC(_machdep_cpu, OID_AUTO, core_count, CTLTYPE_INT | CTLFLAG_RD, (void *)offsetof(i386_cpu_info_t, core_count), @@ -427,6 +473,7 @@ SYSCTL_PROC(_machdep_cpu_flex_ratio, OID_AUTO, max, uint64_t pmap_pv_hashlist_walks; uint64_t pmap_pv_hashlist_cnts; uint32_t pmap_pv_hashlist_max; +uint32_t pmap_kernel_text_ps = PAGE_SIZE; /*extern struct sysctl_oid_list sysctl__machdep_pmap_children;*/ @@ -436,3 +483,24 @@ SYSCTL_NODE(_machdep, OID_AUTO, pmap, CTLFLAG_RW|CTLFLAG_LOCKED, 0, SYSCTL_QUAD (_machdep_pmap, OID_AUTO, hashwalks, CTLFLAG_RD | CTLFLAG_KERN, &pmap_pv_hashlist_walks, ""); SYSCTL_QUAD (_machdep_pmap, OID_AUTO, hashcnts, CTLFLAG_RD | CTLFLAG_KERN, &pmap_pv_hashlist_cnts, ""); SYSCTL_INT (_machdep_pmap, OID_AUTO, hashmax, CTLFLAG_RD | CTLFLAG_KERN, &pmap_pv_hashlist_max, 0, ""); +SYSCTL_INT (_machdep_pmap, OID_AUTO, kernel_text_ps, CTLFLAG_RD | CTLFLAG_KERN, &pmap_kernel_text_ps, 0, ""); + +SYSCTL_NODE(_machdep, OID_AUTO, memmap, CTLFLAG_RD|CTLFLAG_LOCKED, NULL, "physical memory map"); + +uint64_t firmware_Conventional_bytes = 0; +uint64_t firmware_RuntimeServices_bytes = 0; +uint64_t firmware_ACPIReclaim_bytes = 0; +uint64_t firmware_ACPINVS_bytes = 0; +uint64_t firmware_PalCode_bytes = 0; +uint64_t firmware_Reserved_bytes = 0; +uint64_t firmware_Unusable_bytes = 0; +uint64_t firmware_other_bytes = 0; + +SYSCTL_QUAD(_machdep_memmap, OID_AUTO, Conventional, CTLFLAG_RD|CTLFLAG_LOCKED, &firmware_Conventional_bytes, ""); +SYSCTL_QUAD(_machdep_memmap, OID_AUTO, RuntimeServices, CTLFLAG_RD|CTLFLAG_LOCKED, &firmware_RuntimeServices_bytes, ""); +SYSCTL_QUAD(_machdep_memmap, OID_AUTO, ACPIReclaim, CTLFLAG_RD|CTLFLAG_LOCKED, &firmware_ACPIReclaim_bytes, ""); +SYSCTL_QUAD(_machdep_memmap, OID_AUTO, ACPINVS, CTLFLAG_RD|CTLFLAG_LOCKED, &firmware_ACPINVS_bytes, ""); +SYSCTL_QUAD(_machdep_memmap, OID_AUTO, PalCode, CTLFLAG_RD|CTLFLAG_LOCKED, &firmware_PalCode_bytes, ""); +SYSCTL_QUAD(_machdep_memmap, OID_AUTO, Reserved, CTLFLAG_RD|CTLFLAG_LOCKED, &firmware_Reserved_bytes, ""); +SYSCTL_QUAD(_machdep_memmap, OID_AUTO, Unusable, CTLFLAG_RD|CTLFLAG_LOCKED, &firmware_Unusable_bytes, ""); +SYSCTL_QUAD(_machdep_memmap, OID_AUTO, Other, CTLFLAG_RD|CTLFLAG_LOCKED, &firmware_other_bytes, ""); diff --git a/bsd/dev/i386/systemcalls.c b/bsd/dev/i386/systemcalls.c index 6b24ccf6c..95f9fe702 100644 --- a/bsd/dev/i386/systemcalls.c +++ b/bsd/dev/i386/systemcalls.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2007 Apple Inc. All rights reserved. + * Copyright (c) 2000-2008 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -31,6 +31,7 @@ #include #include #include +#include #include #include @@ -47,7 +48,7 @@ #include #include -#include +#include #include #include @@ -64,6 +65,10 @@ extern void *find_user_regs(thread_t); extern void x86_toggle_sysenter_arg_store(thread_t thread, boolean_t valid); extern boolean_t x86_sysenter_arg_store_isvalid(thread_t thread); + +/* dynamically generated at build time based on syscalls.master */ +extern const char *syscallnames[]; + /* * Function: unix_syscall * @@ -95,6 +100,7 @@ unix_syscall(x86_saved_state_t *state) thread = current_thread(); uthread = get_bsdthread_info(thread); + /* Get the approriate proc; may be different from task's for vfork() */ if (!(uthread->uu_flag & UT_VFORK)) p = (struct proc *)get_bsdtask_info(current_task()); @@ -111,8 +117,10 @@ unix_syscall(x86_saved_state_t *state) } code = regs->eax & I386_SYSCALL_NUMBER_MASK; + DEBUG_KPRINT_SYSCALL_UNIX("unix_syscall: code=%d(%s) eip=%u\n", + code, syscallnames[code >= NUM_SYSENT ? 63 : code], (uint32_t)regs->eip); args_in_uthread = ((regs->eax & I386_SYSCALL_ARG_BYTES_MASK) != 0) && x86_sysenter_arg_store_isvalid(thread); - params = (vm_offset_t) ((caddr_t)regs->uesp + sizeof (int)); + params = (vm_offset_t) (regs->uesp + sizeof (int)); regs->efl &= ~(EFL_CF); @@ -153,7 +161,7 @@ unix_syscall(x86_saved_state_t *state) /* * If non-NULL, then call the syscall argument munger to - * copy in arguments (see xnu/bsd/dev/i386/munge.s); the + * copy in arguments (see xnu/bsd/dev/{i386|x86_64}/munge.s); the * first argument is NULL because we are munging in place * after a copyin because the ABI currently doesn't use * registers to pass system call arguments. @@ -183,10 +191,13 @@ unix_syscall(x86_saved_state_t *state) AUDIT_SYSCALL_ENTER(code, p, uthread); error = (*(callp->sy_call))((void *) p, (void *) vt, &(uthread->uu_rval[0])); AUDIT_SYSCALL_EXIT(code, p, uthread, error); +#if CONFIG_MACF + mac_thread_userret(code, error, thread); +#endif #ifdef JOE_DEBUG if (uthread->uu_iocount) - joe_debug("system call returned with uu_iocount != 0"); + printf("system call returned with uu_iocount != 0\n"); #endif #if CONFIG_DTRACE uthread->t_dtrace_errno = error; @@ -199,6 +210,7 @@ unix_syscall(x86_saved_state_t *state) * The SYSENTER_TF_CS covers single-stepping over a sysenter * - see debug trap handler in idt.s/idt64.s */ + if (regs->cs == SYSENTER_CS || regs->cs == SYSENTER_TF_CS) { regs->eip -= 5; } @@ -215,6 +227,10 @@ unix_syscall(x86_saved_state_t *state) } } + DEBUG_KPRINT_SYSCALL_UNIX( + "unix_syscall: error=%d retval=(%u,%u)\n", + error, regs->eax, regs->edx); + uthread->uu_flag &= ~UT_NOTCANCELPT; #if DEBUG /* @@ -234,7 +250,8 @@ unix_syscall(x86_saved_state_t *state) } if (code != 180) KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_EXCP_SC, code) | DBG_FUNC_END, - error, uthread->uu_rval[0], uthread->uu_rval[1], 0, 0); + error, uthread->uu_rval[0], uthread->uu_rval[1], p->p_pid, 0); + thread_exception_return(); /* NOTREACHED */ @@ -280,6 +297,9 @@ unix_syscall64(x86_saved_state_t *state) args_in_regs = 6; code = regs->rax & SYSCALL_NUMBER_MASK; + DEBUG_KPRINT_SYSCALL_UNIX( + "unix_syscall64: code=%d(%s) rip=%llx\n", + code, syscallnames[code >= NUM_SYSENT ? 63 : code], regs->isf.rip); callp = (code >= NUM_SYSENT) ? &sysent[63] : &sysent[code]; uargp = (void *)(®s->rdi); @@ -389,7 +409,10 @@ unix_syscall64(x86_saved_state_t *state) } } - + DEBUG_KPRINT_SYSCALL_UNIX( + "unix_syscall64: error=%d retval=(%llu,%llu)\n", + error, regs->rax, regs->rdx); + uthread->uu_flag &= ~UT_NOTCANCELPT; /* @@ -409,7 +432,7 @@ unix_syscall64(x86_saved_state_t *state) } if (code != 180) KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_EXCP_SC, code) | DBG_FUNC_END, - error, uthread->uu_rval[0], uthread->uu_rval[1], 0, 0); + error, uthread->uu_rval[0], uthread->uu_rval[1], p->p_pid, 0); thread_exception_return(); /* NOTREACHED */ @@ -429,6 +452,7 @@ unix_syscall_return(int error) thread = current_thread(); uthread = get_bsdthread_info(thread); + p = current_proc(); if (proc_is64bit(p)) { @@ -451,6 +475,7 @@ unix_syscall_return(int error) if (callp->sy_call == dtrace_systrace_syscall) dtrace_systrace_syscall_return( code, error, uthread->uu_rval ); #endif /* CONFIG_DTRACE */ + AUDIT_SYSCALL_EXIT(code, p, uthread, error); if (error == ERESTART) { /* @@ -491,6 +516,9 @@ unix_syscall_return(int error) regs->isf.rflags &= ~EFL_CF; } } + DEBUG_KPRINT_SYSCALL_UNIX( + "unix_syscall_return: error=%d retval=(%llu,%llu)\n", + error, regs->rax, regs->rdx); } else { x86_saved_state32_t *regs; @@ -505,9 +533,10 @@ unix_syscall_return(int error) if (callp->sy_call == dtrace_systrace_syscall) dtrace_systrace_syscall_return( code, error, uthread->uu_rval ); #endif /* CONFIG_DTRACE */ + AUDIT_SYSCALL_EXIT(code, p, uthread, error); if (callp == sysent) { - params = (vm_offset_t) ((caddr_t)regs->uesp + sizeof (int)); + params = (vm_offset_t) (regs->uesp + sizeof (int)); code = fuword(params); } if (error == ERESTART) { @@ -522,6 +551,9 @@ unix_syscall_return(int error) regs->edx = uthread->uu_rval[1]; } } + DEBUG_KPRINT_SYSCALL_UNIX( + "unix_syscall_return: error=%d retval=(%u,%u)\n", + error, regs->eax, regs->edx); } @@ -544,7 +576,7 @@ unix_syscall_return(int error) } if (code != 180) KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_EXCP_SC, code) | DBG_FUNC_END, - error, uthread->uu_rval[0], uthread->uu_rval[1], 0, 0); + error, uthread->uu_rval[0], uthread->uu_rval[1], p->p_pid, 0); thread_exception_return(); /* NOTREACHED */ @@ -593,11 +625,3 @@ munge_wwlwww( arg64[0] = arg32[0]; /* Wwlwww */ } -#ifdef JOE_DEBUG -joe_debug(char *p) { - - printf("%s\n", p); -} -#endif - - diff --git a/bsd/dev/i386/unix_signal.c b/bsd/dev/i386/unix_signal.c index 7a9291cae..eb96e879e 100644 --- a/bsd/dev/i386/unix_signal.c +++ b/bsd/dev/i386/unix_signal.c @@ -48,16 +48,18 @@ #include #include /* for thread_abort_safely */ #include -#include #include #include +#include #include #include #include + + /* Forward: */ extern boolean_t machine_exception(int, mach_exception_code_t, mach_exception_subcode_t, int *, mach_exception_subcode_t *); @@ -90,18 +92,50 @@ extern kern_return_t thread_setstatus(thread_t thread, int flavor, * to the user specified pc, psl. */ struct sigframe32 { - int retaddr; - sig_t catcher; - int sigstyle; - int sig; - siginfo_t * sinfo; - struct ucontext * uctx; + int retaddr; + user32_addr_t catcher; /* sig_t */ + int sigstyle; + int sig; + user32_addr_t sinfo; /* siginfo32_t* */ + user32_addr_t uctx; /* struct ucontext32 */ }; +/* + * NOTE: Source and target may *NOT* overlap! + */ +static void +siginfo_user_to_user32(user_siginfo_t *in, user32_siginfo_t *out) +{ + out->si_signo = in->si_signo; + out->si_errno = in->si_errno; + out->si_code = in->si_code; + out->si_pid = in->si_pid; + out->si_uid = in->si_uid; + out->si_status = in->si_status; + out->si_addr = CAST_DOWN_EXPLICIT(user32_addr_t,in->si_addr); + /* following cast works for sival_int because of padding */ + out->si_value.sival_ptr = CAST_DOWN_EXPLICIT(user32_addr_t,in->si_value.sival_ptr); + out->si_band = in->si_band; /* range reduction */ + out->__pad[0] = in->pad[0]; /* mcontext.ss.r1 */ +} +static void +siginfo_user_to_user64(user_siginfo_t *in, user64_siginfo_t *out) +{ + out->si_signo = in->si_signo; + out->si_errno = in->si_errno; + out->si_code = in->si_code; + out->si_pid = in->si_pid; + out->si_uid = in->si_uid; + out->si_status = in->si_status; + out->si_addr = in->si_addr; + out->si_value.sival_ptr = in->si_value.sival_ptr; + out->si_band = in->si_band; /* range reduction */ + out->__pad[0] = in->pad[0]; /* mcontext.ss.r1 */ +} void -sendsig(struct proc *p, user_addr_t ua_catcher, int sig, int mask, __unused u_long code) +sendsig(struct proc *p, user_addr_t ua_catcher, int sig, int mask, __unused uint32_t code) { union { struct mcontext32 mctx32; @@ -117,6 +151,8 @@ sendsig(struct proc *p, user_addr_t ua_catcher, int sig, int mask, __unused u_lo struct sigacts *ps = p->p_sigacts; int oonstack, flavor; + user_addr_t trampact; + int sigonstack; void * state; mach_msg_type_number_t state_count; @@ -132,13 +168,15 @@ sendsig(struct proc *p, user_addr_t ua_catcher, int sig, int mask, __unused u_lo infostyle = UC_FLAVOR; oonstack = ut->uu_sigstk.ss_flags & SA_ONSTACK; + trampact = ps->ps_trampact[sig]; + sigonstack = (ps->ps_sigonstack & sigmask(sig)); /* * init siginfo */ proc_unlock(p); - bzero((caddr_t)&sinfo64, sizeof(user_siginfo_t)); + bzero((caddr_t)&sinfo64, sizeof(sinfo64)); sinfo64.si_signo = sig; @@ -168,7 +206,7 @@ sendsig(struct proc *p, user_addr_t ua_catcher, int sig, int mask, __unused u_lo /* figure out where our new stack lives */ if ((ut->uu_flag & UT_ALTSTACK) && !oonstack && - (ps->ps_sigonstack & sigmask(sig))) { + (sigonstack)) { ua_sp = ut->uu_sigstk.ss_sp; stack_size = ut->uu_sigstk.ss_size; ua_sp += stack_size; @@ -184,7 +222,7 @@ sendsig(struct proc *p, user_addr_t ua_catcher, int sig, int mask, __unused u_lo ua_sp -= sizeof (struct user_ucontext64); ua_uctxp = ua_sp; // someone tramples the first word! - ua_sp -= sizeof (user_siginfo_t); + ua_sp -= sizeof (user64_siginfo_t); ua_sip = ua_sp; ua_sp -= sizeof (struct mcontext64); @@ -228,7 +266,7 @@ sendsig(struct proc *p, user_addr_t ua_catcher, int sig, int mask, __unused u_lo sinfo64.pad[0] = tstate64->rsp; sinfo64.si_addr = tstate64->rip; - tstate64->rip = ps->ps_trampact[sig]; + tstate64->rip = trampact; tstate64->rsp = ua_fp; tstate64->rflags = get_eflags_exportmask(); /* @@ -250,7 +288,7 @@ sendsig(struct proc *p, user_addr_t ua_catcher, int sig, int mask, __unused u_lo } else { x86_thread_state32_t *tstate32; - struct ucontext uctx32; + struct user_ucontext32 uctx32; struct sigframe32 frame32; flavor = x86_THREAD_STATE32; @@ -275,7 +313,7 @@ sendsig(struct proc *p, user_addr_t ua_catcher, int sig, int mask, __unused u_lo /* figure out where our new stack lives */ if ((ut->uu_flag & UT_ALTSTACK) && !oonstack && - (ps->ps_sigonstack & sigmask(sig))) { + (sigonstack)) { ua_sp = ut->uu_sigstk.ss_sp; stack_size = ut->uu_sigstk.ss_size; ua_sp += stack_size; @@ -285,10 +323,10 @@ sendsig(struct proc *p, user_addr_t ua_catcher, int sig, int mask, __unused u_lo } ua_cr2 = mctx.mctx32.es.faultvaddr; - ua_sp -= sizeof (struct ucontext); + ua_sp -= sizeof (struct user_ucontext32); ua_uctxp = ua_sp; // someone tramples the first word! - ua_sp -= sizeof (siginfo_t); + ua_sp -= sizeof (user32_siginfo_t); ua_sip = ua_sp; ua_sp -= sizeof (struct mcontext32); @@ -316,9 +354,9 @@ sendsig(struct proc *p, user_addr_t ua_catcher, int sig, int mask, __unused u_lo frame32.retaddr = -1; frame32.sigstyle = infostyle; frame32.sig = sig; - frame32.catcher = CAST_DOWN(sig_t, ua_catcher); - frame32.sinfo = CAST_DOWN(siginfo_t *, ua_sip); - frame32.uctx = CAST_DOWN(struct ucontext *, ua_uctxp); + frame32.catcher = CAST_DOWN_EXPLICIT(user32_addr_t, ua_catcher); + frame32.sinfo = CAST_DOWN_EXPLICIT(user32_addr_t, ua_sip); + frame32.uctx = CAST_DOWN_EXPLICIT(user32_addr_t, ua_uctxp); if (copyout((caddr_t)&frame32, ua_fp, sizeof (frame32))) goto bad; @@ -330,7 +368,7 @@ sendsig(struct proc *p, user_addr_t ua_catcher, int sig, int mask, __unused u_lo uctx32.uc_onstack = oonstack; uctx32.uc_sigmask = mask; - uctx32.uc_stack.ss_sp = CAST_DOWN(char *, ua_fp); + uctx32.uc_stack.ss_sp = CAST_DOWN_EXPLICIT(user32_addr_t, ua_fp); uctx32.uc_stack.ss_size = stack_size; if (oonstack) @@ -339,7 +377,7 @@ sendsig(struct proc *p, user_addr_t ua_catcher, int sig, int mask, __unused u_lo uctx32.uc_mcsize = sizeof(struct mcontext32); - uctx32.uc_mcontext = CAST_DOWN(_STRUCT_MCONTEXT32 *, ua_mctxp); + uctx32.uc_mcontext = CAST_DOWN_EXPLICIT(user32_addr_t, ua_mctxp); if (copyout((caddr_t)&uctx32, ua_uctxp, sizeof (uctx32))) goto bad; @@ -358,7 +396,6 @@ sendsig(struct proc *p, user_addr_t ua_catcher, int sig, int mask, __unused u_lo sinfo64.si_code = ILL_ILLOPC; break; default: - printf("unknown SIGILL code %ld\n", (long) ut->uu_code); sinfo64.si_code = ILL_NOOP; } break; @@ -369,7 +406,13 @@ sendsig(struct proc *p, user_addr_t ua_catcher, int sig, int mask, __unused u_lo #define FP_OE 3 /* overflow */ #define FP_UE 4 /* underflow */ #define FP_PE 5 /* precision */ - if (ut->uu_subcode & (1 << FP_ZE)) { + if (ut->uu_code == EXC_I386_DIV) { + sinfo64.si_code = FPE_INTDIV; + } + else if (ut->uu_code == EXC_I386_INTO) { + sinfo64.si_code = FPE_INTOVF; + } + else if (ut->uu_subcode & (1 << FP_ZE)) { sinfo64.si_code = FPE_FLTDIV; } else if (ut->uu_subcode & (1 << FP_OE)) { sinfo64.si_code = FPE_FLTOVF; @@ -380,8 +423,6 @@ sendsig(struct proc *p, user_addr_t ua_catcher, int sig, int mask, __unused u_lo } else if (ut->uu_subcode & (1 << FP_IE)) { sinfo64.si_code = FPE_FLTINV; } else { - printf("unknown SIGFPE code %ld, subcode %lx\n", - (long) ut->uu_code, (long) ut->uu_subcode); sinfo64.si_code = FPE_NOOP; } break; @@ -409,7 +450,6 @@ sendsig(struct proc *p, user_addr_t ua_catcher, int sig, int mask, __unused u_lo sinfo64.si_code = SEGV_MAPERR; break; default: - printf("unknown SIGSEGV code %ld\n", (long) ut->uu_code); sinfo64.si_code = FPE_NOOP; } break; @@ -460,41 +500,82 @@ sendsig(struct proc *p, user_addr_t ua_catcher, int sig, int mask, __unused u_lo } } if (proc_is64bit(p)) { + user64_siginfo_t sinfo64_user64; + + bzero((caddr_t)&sinfo64_user64, sizeof(sinfo64_user64)); + + siginfo_user_to_user64(&sinfo64,&sinfo64_user64); + +#if CONFIG_DTRACE + bzero((caddr_t)&(ut->t_dtrace_siginfo), sizeof(ut->t_dtrace_siginfo)); + + ut->t_dtrace_siginfo.si_signo = sinfo64.si_signo; + ut->t_dtrace_siginfo.si_code = sinfo64.si_code; + ut->t_dtrace_siginfo.si_pid = sinfo64.si_pid; + ut->t_dtrace_siginfo.si_uid = sinfo64.si_uid; + ut->t_dtrace_siginfo.si_status = sinfo64.si_status; + /* XXX truncates faulting address to void * on K32 */ + ut->t_dtrace_siginfo.si_addr = CAST_DOWN(void *, sinfo64.si_addr); + + /* Fire DTrace proc:::fault probe when signal is generated by hardware. */ + switch (sig) { + case SIGILL: case SIGBUS: case SIGSEGV: case SIGFPE: case SIGTRAP: + DTRACE_PROC2(fault, int, (int)(ut->uu_code), siginfo_t *, &(ut->t_dtrace_siginfo)); + break; + default: + break; + } /* XXX truncates catcher address to uintptr_t */ - DTRACE_PROC3(signal__handle, int, sig, siginfo_t *, &sinfo64, + DTRACE_PROC3(signal__handle, int, sig, siginfo_t *, &(ut->t_dtrace_siginfo), void (*)(void), CAST_DOWN(sig_t, ua_catcher)); +#endif /* CONFIG_DTRACE */ - if (copyout((caddr_t)&sinfo64, ua_sip, sizeof (sinfo64))) - goto bad; + if (copyout((caddr_t)&sinfo64_user64, ua_sip, sizeof (sinfo64_user64))) + goto bad; flavor = x86_THREAD_STATE64; state_count = x86_THREAD_STATE64_COUNT; state = (void *)&mctx.mctx64.ss; } else { - x86_thread_state32_t *tstate32; - siginfo_t sinfo32; + x86_thread_state32_t *tstate32; + user32_siginfo_t sinfo32; + + bzero((caddr_t)&sinfo32, sizeof(sinfo32)); - bzero((caddr_t)&sinfo32, sizeof(siginfo_t)); + siginfo_user_to_user32(&sinfo64,&sinfo32); - sinfo32.si_signo = sinfo64.si_signo; - sinfo32.si_code = sinfo64.si_code; - sinfo32.si_pid = sinfo64.si_pid; - sinfo32.si_uid = sinfo64.si_uid; - sinfo32.si_status = sinfo64.si_status; - sinfo32.si_addr = CAST_DOWN(void *, sinfo64.si_addr); - sinfo32.__pad[0] = sinfo64.pad[0]; +#if CONFIG_DTRACE + bzero((caddr_t)&(ut->t_dtrace_siginfo), sizeof(ut->t_dtrace_siginfo)); - DTRACE_PROC3(signal__handle, int, sig, siginfo_t *, &sinfo32, + ut->t_dtrace_siginfo.si_signo = sinfo32.si_signo; + ut->t_dtrace_siginfo.si_code = sinfo32.si_code; + ut->t_dtrace_siginfo.si_pid = sinfo32.si_pid; + ut->t_dtrace_siginfo.si_uid = sinfo32.si_uid; + ut->t_dtrace_siginfo.si_status = sinfo32.si_status; + ut->t_dtrace_siginfo.si_addr = CAST_DOWN(void *, sinfo32.si_addr); + + /* Fire DTrace proc:::fault probe when signal is generated by hardware. */ + switch (sig) { + case SIGILL: case SIGBUS: case SIGSEGV: case SIGFPE: case SIGTRAP: + DTRACE_PROC2(fault, int, (int)(ut->uu_code), siginfo_t *, &(ut->t_dtrace_siginfo)); + break; + default: + break; + } + + DTRACE_PROC3(signal__handle, int, sig, siginfo_t *, &(ut->t_dtrace_siginfo), void (*)(void), CAST_DOWN(sig_t, ua_catcher)); +#endif /* CONFIG_DTRACE */ - if (copyout((caddr_t)&sinfo32, ua_sip, sizeof (sinfo32))) - goto bad; + if (copyout((caddr_t)&sinfo32, ua_sip, sizeof (sinfo32))) + goto bad; tstate32 = &mctx.mctx32.ss; - tstate32->eip = CAST_DOWN(unsigned int, ps->ps_trampact[sig]); - tstate32->esp = CAST_DOWN(unsigned int, ua_fp); - + + tstate32->eip = CAST_DOWN_EXPLICIT(user32_addr_t, trampact); + tstate32->esp = CAST_DOWN_EXPLICIT(user32_addr_t, ua_fp); + tstate32->eflags = get_eflags_exportmask(); tstate32->cs = USER_CS; @@ -512,6 +593,7 @@ sendsig(struct proc *p, user_addr_t ua_catcher, int sig, int mask, __unused u_lo goto bad; ml_fp_setvalid(FALSE); + proc_lock(p); return; @@ -595,7 +677,7 @@ sigreturn(struct proc *p, struct sigreturn_args *uap, __unused int *retval) fs = (void *)&mctx.mctx64.fs; } else { - struct ucontext uctx32; + struct user_ucontext32 uctx32; if ((error = copyin(uap->uctx, (void *)&uctx32, sizeof (uctx32)))) return(error); diff --git a/bsd/dev/memdev.c b/bsd/dev/memdev.c index 5b4e005e7..fe07f5e53 100644 --- a/bsd/dev/memdev.c +++ b/bsd/dev/memdev.c @@ -108,10 +108,13 @@ static strategy_fcn_t mdevstrategy; static int mdevbioctl(dev_t dev, u_long cmd, caddr_t data, int flag, struct proc *p); static int mdevcioctl(dev_t dev, u_long cmd, caddr_t data, int flag, struct proc *p); static int mdevrw(dev_t dev, struct uio *uio, int ioflag); + #ifdef CONFIG_MEMDEV_INSECURE + static char * nonspace(char *pos, char *end); static char * getspace(char *pos, char *end); static char * cvtnum(char *pos, char *end, unsigned int *num); + #endif /* CONFIG_MEMDEV_INSECURE */ extern void bcopy_phys(addr64_t from, addr64_t to, vm_size_t bytes); @@ -153,7 +156,7 @@ static struct cdevsw mdevcdevsw = { }; struct mdev { - vm_offset_t mdBase; /* file size in bytes */ + uint64_t mdBase; /* file size in bytes */ uint32_t mdSize; /* file size in bytes */ int mdFlags; /* flags */ int mdSecsize; /* sector size */ @@ -210,7 +213,7 @@ static int mdevrw(dev_t dev, struct uio *uio, __unused int ioflag) { saveflag = uio->uio_segflg; /* Remember what the request is */ #if LP64_DEBUG - if (IS_VALID_UIO_SEGFLG(uio->uio_segflg) == 0) { + if (UIO_IS_USER_SPACE(uio) == 0 && UIO_IS_SYS_SPACE(uio) == 0) { panic("mdevrw - invalid uio_segflg\n"); } #endif /* LP64_DEBUG */ @@ -285,9 +288,9 @@ static void mdevstrategy(struct buf *bp) { lop = min((4096 - (vaddr & 4095)), (4096 - (fvaddr & 4095))); /* Get smallest amount left on sink and source */ csize = min(lop, left); /* Don't move more than we need to */ - pp = pmap_find_phys(kernel_pmap, (addr64_t)((unsigned int)vaddr)); /* Get the sink physical address */ + pp = pmap_find_phys(kernel_pmap, (addr64_t)((uintptr_t)vaddr)); /* Get the sink physical address */ if(!pp) { /* Not found, what gives? */ - panic("mdevstrategy: sink address %016llX not mapped\n", (addr64_t)((unsigned int)vaddr)); + panic("mdevstrategy: sink address %016llX not mapped\n", (addr64_t)((uintptr_t)vaddr)); } paddr = (addr64_t)(((addr64_t)pp << 12) | (addr64_t)(vaddr & 4095)); /* Get actual address */ bcopy_phys(fvaddr, paddr, csize); /* Copy this on in */ @@ -311,9 +314,9 @@ static void mdevstrategy(struct buf *bp) { lop = min((4096 - (vaddr & 4095)), (4096 - (fvaddr & 4095))); /* Get smallest amount left on sink and source */ csize = min(lop, left); /* Don't move more than we need to */ - pp = pmap_find_phys(kernel_pmap, (addr64_t)((unsigned int)vaddr)); /* Get the source physical address */ + pp = pmap_find_phys(kernel_pmap, (addr64_t)((uintptr_t)vaddr)); /* Get the source physical address */ if(!pp) { /* Not found, what gives? */ - panic("mdevstrategy: source address %016llX not mapped\n", (addr64_t)((unsigned int)vaddr)); + panic("mdevstrategy: source address %016llX not mapped\n", (addr64_t)((uintptr_t)vaddr)); } paddr = (addr64_t)(((addr64_t)pp << 12) | (addr64_t)(vaddr & 4095)); /* Get actual address */ @@ -348,7 +351,7 @@ static int mdevcioctl(dev_t dev, u_long cmd, caddr_t data, int flag, struct proc static int mdevioctl(dev_t dev, u_long cmd, caddr_t data, __unused int flag, struct proc *p, int is_char) { int error; - u_long *f; + u_int32_t *f; u_int64_t *o; int devid; @@ -359,7 +362,7 @@ static int mdevioctl(dev_t dev, u_long cmd, caddr_t data, __unused int flag, error = proc_suser(p); /* Are we superman? */ if (error) return (error); /* Nope... */ - f = (u_long*)data; + f = (u_int32_t*)data; o = (u_int64_t *)data; switch (cmd) { @@ -431,13 +434,14 @@ static int mdevsize(dev_t dev) { void mdevinit(__unused int the_cnt) { #ifdef CONFIG_MEMDEV_INSECURE - + int devid, phys; ppnum_t base; unsigned int size; char *ba, *lp; dev_t dev; + ba = PE_boot_args(); /* Get the boot arguments */ lp = ba + 256; /* Point to the end */ @@ -474,13 +478,15 @@ void mdevinit(__unused int the_cnt) { dev = mdevadd(devid, base >> 12, size >> 12, phys); /* Go add the device */ } - + #endif /* CONFIG_MEMDEV_INSECURE */ + return; } #ifdef CONFIG_MEMDEV_INSECURE + char *nonspace(char *pos, char *end) { /* Find next non-space in string */ if(pos >= end) return end; /* Don't go past end */ @@ -534,6 +540,7 @@ char *cvtnum(char *pos, char *end, unsigned int *num) { /* Convert to a number pos++; /* Step on */ } } + #endif /* CONFIG_MEMDEV_INSECURE */ dev_t mdevadd(int devid, ppnum_t base, unsigned int size, int phys) { @@ -549,7 +556,7 @@ dev_t mdevadd(int devid, ppnum_t base, unsigned int size, int phys) { continue; /* Skip check */ } if(!(((base + size -1 ) < mdev[i].mdBase) || ((mdev[i].mdBase + mdev[i].mdSize - 1) < base))) { /* Is there any overlap? */ - panic("mdevadd: attempt to add overlapping memory device at %08X-%08X\n", mdev[i].mdBase, mdev[i].mdBase + mdev[i].mdSize - 1); + panic("mdevadd: attempt to add overlapping memory device at %08lX-%08lX\n", (long) mdev[i].mdBase, (long) mdev[i].mdBase + mdev[i].mdSize - 1); } } if(devid < 0) { /* Do we have free slots? */ diff --git a/bsd/dev/memdev.h b/bsd/dev/memdev.h index a07b5d2a6..bcf4e05ec 100644 --- a/bsd/dev/memdev.h +++ b/bsd/dev/memdev.h @@ -12,6 +12,6 @@ void mdevinit(vm_offset_t base, unsigned int size); #endif /* __APPLE_API_PRIVATE */ -#endif KERNEL_PRIVATE +#endif /* KERNEL_PRIVATE */ #endif /* _SYS_MEMDEV_H_*/ diff --git a/bsd/dev/ppc/conf.c b/bsd/dev/ppc/conf.c index 0e4a33aff..acc9a8545 100644 --- a/bsd/dev/ppc/conf.c +++ b/bsd/dev/ppc/conf.c @@ -44,7 +44,7 @@ #include #include #include -#include +#include struct bdevsw bdevsw[] = @@ -96,7 +96,6 @@ extern struct tty *km_tty[]; dev_t chrtoblk(dev_t dev); int chrtoblk_set(int cdev, int bdev); -int isdisk(dev_t dev, int type); int iskmemdev(dev_t dev); @@ -129,7 +128,6 @@ extern d_close_t ptsclose; extern d_read_t ptsread; extern d_write_t ptswrite; extern d_stop_t ptsstop; -extern d_putc_t ptsputc; extern d_open_t ptcopen; extern d_close_t ptcclose; extern d_read_t ptcread; @@ -142,7 +140,6 @@ extern d_ioctl_t ptyioctl; #define ptsread eno_rdwrt #define ptswrite eno_rdwrt #define ptsstop nulldev -#define ptsputc nulldev #define ptcopen eno_opcl #define ptcclose eno_opcl @@ -176,7 +173,7 @@ struct cdevsw cdevsw[] = consioctl, ((stop_fcn_t *)&nulldev), ((reset_fcn_t *)&nulldev), 0, consselect, - eno_mmap, eno_strat, cons_getc, cons_putc, D_TTY + eno_mmap, eno_strat, eno_getc, eno_putc, D_TTY }, NO_CDEVICE, /* 1*/ { @@ -225,7 +222,7 @@ struct cdevsw cdevsw[] = kmioctl, ((stop_fcn_t *)&nulldev), ((reset_fcn_t *)&nulldev), km_tty, ttselect, - eno_mmap, eno_strat, kmgetc, kmputc, 0 + eno_mmap, eno_strat, eno_getc, eno_putc, 0 }, NO_CDEVICE, /*13*/ NO_CDEVICE, /*14*/ diff --git a/bsd/dev/ppc/cons.c b/bsd/dev/ppc/cons.c index 2e1fec1a6..207ee03ae 100644 --- a/bsd/dev/ppc/cons.c +++ b/bsd/dev/ppc/cons.c @@ -46,211 +46,94 @@ #include #include #include -#include +#include -struct tty cons; -struct tty *constty; /* current console device */ +struct tty *constty; /* current console device */ + +/* + * The km driver supplied the default console device for the systems + * (usually a raw frame buffer driver, but potentially a serial driver). + */ +extern struct tty *km_tty[1]; + +static dev_t +cndev(void) +{ + if (constty) + return constty->t_dev; + else + return km_tty[0]->t_dev; +} /*ARGSUSED*/ int consopen(__unused dev_t dev, int flag, int devtype, struct proc *pp) { - dev_t device; - boolean_t funnel_state; - int error; - - funnel_state = thread_funnel_set(kernel_flock, TRUE); - - if (constty) - device = constty->t_dev; - else - device = cons.t_dev; - error = (*cdevsw[major(device)].d_open)(device, flag, devtype, pp); - thread_funnel_set(kernel_flock, funnel_state); - - return(error); + dev = cndev(); + return ((*cdevsw[major(dev)].d_open)(dev, flag, devtype, pp)); } + /*ARGSUSED*/ int consclose(__unused dev_t dev, int flag, int mode, struct proc *pp) { - dev_t device; - boolean_t funnel_state; - int error; - - funnel_state = thread_funnel_set(kernel_flock, TRUE); - if (constty) - device = constty->t_dev; - else - device = cons.t_dev; - error = (*cdevsw[major(device)].d_close)(device, flag, mode, pp); - thread_funnel_set(kernel_flock, funnel_state); - - return(error); - - + dev = cndev(); + return ((*cdevsw[major(dev)].d_close)(dev, flag, mode, pp)); } + /*ARGSUSED*/ int consread(__unused dev_t dev, struct uio *uio, int ioflag) { - dev_t device; - boolean_t funnel_state; - int error; - - funnel_state = thread_funnel_set(kernel_flock, TRUE); - if (constty) - device = constty->t_dev; - else - device = cons.t_dev; - error = (*cdevsw[major(device)].d_read)(device, uio, ioflag); - thread_funnel_set(kernel_flock, funnel_state); - - return(error); + dev = cndev(); + return ((*cdevsw[major(dev)].d_read)(dev, uio, ioflag)); } + /*ARGSUSED*/ int conswrite(__unused dev_t dev, struct uio *uio, int ioflag) { - dev_t device; - boolean_t funnel_state; - int error; - - funnel_state = thread_funnel_set(kernel_flock, TRUE); - if (constty) - device = constty->t_dev; - else - device = cons.t_dev; - error = (*cdevsw[major(device)].d_write)(device, uio, ioflag); - thread_funnel_set(kernel_flock, funnel_state); - - return(error); + dev = cndev(); + return ((*cdevsw[major(dev)].d_write)(dev, uio, ioflag)); } + /*ARGSUSED*/ int consioctl(__unused dev_t dev, u_long cmd, caddr_t addr, int flag, struct proc *p) { - dev_t device; - boolean_t funnel_state; - int error; - - funnel_state = thread_funnel_set(kernel_flock, TRUE); - - if (constty) - device = constty->t_dev; - else - device = cons.t_dev; + dev = cndev(); +#if 0 /* * Superuser can always use this to wrest control of console * output from the "virtual" console. + * + * XXX Unfortunately, this code doesn't do what the author thougt + * XXX it did; use of the console device, a TIOCCONS would always + * XXX disassociate the console from a virtual terminal and send + * XXX it back to the fake tty. */ - if ((unsigned int)cmd == TIOCCONS && constty) { - error = proc_suser(p); - if (error) { - goto out; + if ((unsigned) cmd == TIOCCONS && constty) { + int error = proc_suser(p); + if (!error) { + constty = NULL; } - constty = NULL; - error = 0; - goto out; + return(error); } - error = (*cdevsw[major(device)].d_ioctl)(device, cmd, addr, flag, p); -out: - thread_funnel_set(kernel_flock, funnel_state); +#endif /* 0 */ - return(error); + return ((*cdevsw[major(dev)].d_ioctl)(dev, cmd, addr, flag, p)); } + /*ARGSUSED*/ /* called with funnel held */ int consselect(__unused dev_t dev, int flag, void *wql, struct proc *p) { - dev_t device; - - if (constty) - device = constty->t_dev; - else - device = cons.t_dev; - return ((*cdevsw[major(device)].d_select)(device, flag, wql, p)); -} - -int -cons_getc(__unused dev_t dev) -{ - dev_t device; - boolean_t funnel_state; - int error; - - funnel_state = thread_funnel_set(kernel_flock, TRUE); - if (constty) - device = constty->t_dev; - else - device = cons.t_dev; - error = (*cdevsw[major(device)].d_getc)(device); - thread_funnel_set(kernel_flock, funnel_state); - - return(error); + dev = cndev(); + return ((*cdevsw[major(dev)].d_select)(dev, flag, wql, p)); } - -int -cons_putc(__unused dev_t dev, char c) -{ - dev_t device; - boolean_t funnel_state; - int error; - - funnel_state = thread_funnel_set(kernel_flock, TRUE); - if (constty) - device = constty->t_dev; - else - device = cons.t_dev; - error = (*cdevsw[major(device)].d_putc)(device, c); - thread_funnel_set(kernel_flock, funnel_state); - - return(error); -} - -/* - * Write message to console; create an alert panel if no text-type window - * currently exists. Caller must call alert_done() when finished. - * The height and width arguments are not used; they are provided for - * compatibility with the 68k version of alert(). - */ -int -alert( - __unused int width, - __unused int height, - __unused const char *title, - const char *msg, - int p1, - int p2, - int p3, - int p4, - int p5, - int p6, - int p7, - int p8) -{ - char smsg[200]; - - snprintf(smsg, sizeof(smsg), msg, p1, p2, p3, p4, p5, p6, p7, p8); -#if FIXME /* [ */ - /* DoAlert(title, smsg); */ -#else - printf("%s\n",smsg); -#endif /* FIXME ] */ - - return 0; -} - -int -alert_done(void) -{ - /* DoRestore(); */ - return 0; -} - diff --git a/bsd/dev/ppc/cons.h b/bsd/dev/ppc/cons.h deleted file mode 100644 index acad7e39e..000000000 --- a/bsd/dev/ppc/cons.h +++ /dev/null @@ -1,88 +0,0 @@ -/* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ - * - * This file contains Original Code and/or Modifications of Original Code - * as defined in and that are subject to the Apple Public Source License - * Version 2.0 (the 'License'). You may not use this file except in - * compliance with the License. The rights granted to you under the License - * may not be used to create, or enable the creation or redistribution of, - * unlawful or unlicensed copies of an Apple operating system, or to - * circumvent, violate, or enable the circumvention or violation of, any - * terms of an Apple operating system software license agreement. - * - * Please obtain a copy of the License at - * http://www.opensource.apple.com/apsl/ and read it before using this file. - * - * The Original Code and all software distributed under the License are - * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER - * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, - * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. - * Please see the License for the specific language governing rights and - * limitations under the License. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ - */ -/* - * Copyright (c) 1987 NeXT, Inc. - */ - -struct consdev { - char *cn_name; /* name of device in dev_name_list */ - int (*cn_probe)(void); /* probe and fill in consdev info */ - int (*cn_init)(void); /* turn on as console */ - int (*cn_getc)(void); /* kernel getchar interface */ - int (*cn_putc)(void); /* kernel putchar interface */ - struct tty *cn_tp; /* tty structure for console device */ - dev_t cn_dev; /* major/minor of device */ - short cn_pri; /* pecking order; the higher the better */ -}; - -/* values for cn_pri - reflect our policy for console selection */ -#define CN_DEAD 0 /* device doesn't exist */ -#define CN_NORMAL 1 /* device exists but is nothing special */ -#define CN_INTERNAL 2 /* "internal" bit-mapped display */ -#define CN_REMOTE 3 /* serial interface with remote bit set */ - -/* XXX */ -#define CONSMAJOR 0 - -#ifdef KERNEL - -#include - -extern struct consdev constab[]; -extern struct consdev *cn_tab; -extern struct tty *cn_tty; - -extern struct tty cons; -extern struct tty *constty; /* current console device */ - -int consopen(dev_t, int, int, struct proc *); -int consclose(dev_t, int, int, struct proc *); -int consread(dev_t, struct uio *, int); -int conswrite(dev_t, struct uio *, int); -int consioctl(dev_t, u_long, caddr_t, int, struct proc *); -int consselect(dev_t, int, void *, struct proc *); -int cons_getc(dev_t); -int cons_putc(dev_t, char); -int alert(int, int, const char *, const char *, int, int, int, int, int, int, int, int); -int alert_done(void); - -/* - * These really want their own header file, but this is the only one in - * common, and the km device is the keyboard monitor, so it's technically a - * part of the console. - */ -int kmopen(dev_t, int, int, struct proc *); -int kmclose(dev_t, int, int, struct proc *); -int kmread(dev_t, struct uio *, int); -int kmwrite(dev_t, struct uio *, int); -int kmioctl(dev_t, u_long, caddr_t, int, struct proc *); -int kmgetc(dev_t); -int kmputc(dev_t, char); - -#endif - diff --git a/bsd/dev/ppc/dtrace_isa.c b/bsd/dev/ppc/dtrace_isa.c index 18449879a..21b49bdc4 100644 --- a/bsd/dev/ppc/dtrace_isa.c +++ b/bsd/dev/ppc/dtrace_isa.c @@ -51,14 +51,14 @@ extern dtrace_id_t dtrace_probeid_error; /* special ERROR probe */ void dtrace_probe_error(dtrace_state_t *state, dtrace_epid_t epid, int which, - int fault, int fltoffs, uint64_t illval) + int fltoffs, int fault, uint64_t illval) { /* * dtrace_getarg() is a lost cause on PPC. For the case of the error probe firing lets * stash away "illval" here, and special-case retrieving it in DIF_VARIABLE_ARG. */ state->dts_arg_error_illval = illval; - dtrace_probe( dtrace_probeid_error, (uint64_t)(uintptr_t)state, epid, which, fault, fltoffs ); + dtrace_probe( dtrace_probeid_error, (uint64_t)(uintptr_t)state, epid, which, fltoffs, fault ); } /* @@ -143,12 +143,6 @@ dtrace_xcall(processorid_t cpu, dtrace_xcall_t f, void *arg) /* * Runtime and ABI */ -extern greg_t -dtrace_getfp(void) -{ - return (greg_t)__builtin_frame_address(0); -} - uint64_t dtrace_getreg(struct regs *savearea, uint_t reg) { @@ -497,7 +491,7 @@ void dtrace_getpcstack(pc_t *pcstack, int pcstack_limit, int aframes, uint32_t *intrpc) { - struct frame *fp = (struct frame *)dtrace_getfp(); + struct frame *fp = (struct frame *)__builtin_frame_address(0); struct frame *nextfp, *minfp, *stacktop; int depth = 0; int last = 0; @@ -508,7 +502,7 @@ dtrace_getpcstack(pc_t *pcstack, int pcstack_limit, int aframes, if ((on_intr = CPU_ON_INTR(CPU)) != 0) stacktop = (struct frame *)dtrace_get_cpu_int_stack_top(); else - stacktop = (struct frame *)(dtrace_get_kernel_stack(current_thread()) + KERNEL_STACK_SIZE); + stacktop = (struct frame *)(dtrace_get_kernel_stack(current_thread()) + kernel_stack_size); minfp = fp; @@ -519,7 +513,7 @@ dtrace_getpcstack(pc_t *pcstack, int pcstack_limit, int aframes, while (depth < pcstack_limit) { nextfp = *(struct frame **)fp; - pc = *(uintptr_t *)(((uint32_t)fp) + RETURN_OFFSET); + pc = *(uintptr_t *)(((uintptr_t)fp) + RETURN_OFFSET); if (nextfp <= minfp || nextfp >= stacktop) { if (on_intr) { @@ -529,7 +523,7 @@ dtrace_getpcstack(pc_t *pcstack, int pcstack_limit, int aframes, vm_offset_t kstack_base = dtrace_get_kernel_stack(current_thread()); minfp = (struct frame *)kstack_base; - stacktop = (struct frame *)(kstack_base + KERNEL_STACK_SIZE); + stacktop = (struct frame *)(kstack_base + kernel_stack_size); on_intr = 0; continue; @@ -587,15 +581,9 @@ dtrace_toxic_ranges(void (*func)(uintptr_t base, uintptr_t limit)) * VALID address greater than "base". */ func(0x0, VM_MIN_KERNEL_ADDRESS); - func(VM_MAX_KERNEL_ADDRESS + 1, ~(uintptr_t)0); + if (VM_MAX_KERNEL_ADDRESS < ~(uintptr_t)0) + func(VM_MAX_KERNEL_ADDRESS + 1, ~(uintptr_t)0); } extern void *mapping_phys_lookup(ppnum_t, unsigned int *); -boolean_t -dtxnu_is_RAM_page(ppnum_t pn) -{ - unsigned int ignore; - return (NULL == mapping_phys_lookup(pn, &ignore)) ? FALSE : TRUE; -} - diff --git a/bsd/dev/ppc/fasttrap_isa.c b/bsd/dev/ppc/fasttrap_isa.c index 70cfbe00e..10e2edd08 100644 --- a/bsd/dev/ppc/fasttrap_isa.c +++ b/bsd/dev/ppc/fasttrap_isa.c @@ -48,12 +48,12 @@ */ /* - * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Copyright 2008 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ /* - * #pragma ident "@(#)fasttrap_isa.c 1.23 06/09/19 SMI" + * #pragma ident "@(#)fasttrap_isa.c 1.27 08/04/09 SMI" */ #ifdef KERNEL @@ -78,7 +78,8 @@ #include /* All the bits we care about are guarded by MACH_KERNEL_PRIVATE :-( */ extern dtrace_id_t dtrace_probeid_error; -#define proc_t struct proc +/* Solaris proc_t is the struct. Darwin's proc_t is a pointer to it. */ +#define proc_t struct proc /* Steer clear of the Darwin typedef for proc_t */ static int32_t branchtaken(int32_t bo, int32_t bi, ppc_saved_state_t *sv); static int32_t dtrace_decode_ppc(uint32_t inst); @@ -335,7 +336,7 @@ fasttrap_return_common(ppc_saved_state_t *sv, user_addr_t pc, pid_t pid, user_ad for (tp = bucket->ftb_data; tp != NULL; tp = tp->ftt_next) { if (pid == tp->ftt_pid && pc == tp->ftt_pc && - !tp->ftt_proc->ftpc_defunct) + tp->ftt_proc->ftpc_acount != 0) break; } @@ -448,7 +449,7 @@ fasttrap_pid_probe(ppc_saved_state_t *sv) */ for (tp = bucket->ftb_data; tp != NULL; tp = tp->ftt_next) { if (pid == tp->ftt_pid && (sv->save_srr0 == tp->ftt_pc) && - !tp->ftt_proc->ftpc_defunct) + tp->ftt_proc->ftpc_acount != 0) break; } diff --git a/bsd/dev/ppc/fbt_ppc.c b/bsd/dev/ppc/fbt_ppc.c index 5ee9cea6a..0a505d23e 100644 --- a/bsd/dev/ppc/fbt_ppc.c +++ b/bsd/dev/ppc/fbt_ppc.c @@ -89,6 +89,9 @@ extern dtrace_provider_id_t fbt_id; extern fbt_probe_t **fbt_probetab; extern int fbt_probetab_mask; +kern_return_t fbt_perfCallback(int, ppc_saved_state_t *, int, int); +kern_return_t fbt_perfIntCallback(int, ppc_saved_state_t *, int, int); + /* * Critical routines that must not be probed. PR_5221096, PR_5379018. */ @@ -121,7 +124,7 @@ static const char * critical_blacklist[] = /* * The transitive closure of entry points that can be reached from probe context. - * (Apart from routines whose names begin with dtrace_ or dtxnu_.) + * (Apart from routines whose names begin with dtrace_). */ static const char * probe_ctx_closure[] = { @@ -168,17 +171,20 @@ static const char * probe_ctx_closure[] = "proc_is64bit", "proc_selfname", "proc_selfpid", + "proc_selfppid", "psignal_lock", + "sdt_getargdesc", "splhigh", "splx", "strlcpy", + "systrace_stub", "timer_grab" }; #define PROBE_CTX_CLOSURE_COUNT (sizeof(probe_ctx_closure)/sizeof(probe_ctx_closure[0])) static int _cmp(const void *a, const void *b) { - return strcmp((const char *)a, *(const char **)b); + return strncmp((const char *)a, *(const char **)b, strlen((const char *)a) + 1); } static const void * bsearch( @@ -218,12 +224,12 @@ fbt_invop(uintptr_t addr, uintptr_t *stack, uintptr_t rval) if (fbt->fbtp_roffset == 0) { ppc_saved_state_t *regs = (ppc_saved_state_t *)stack; - CPU->cpu_dtrace_caller = addr; + CPU->cpu_dtrace_caller = regs->save_lr; dtrace_probe(fbt->fbtp_id, regs->save_r3 & mask, regs->save_r4 & mask, regs->save_r5 & mask, regs->save_r6 & mask, regs->save_r7 & mask); - CPU->cpu_dtrace_caller = NULL; + CPU->cpu_dtrace_caller = (uintptr_t)NULL; } else { dtrace_probe(fbt->fbtp_id, fbt->fbtp_roffset, rval, 0, 0, 0); @@ -235,7 +241,7 @@ fbt_invop(uintptr_t addr, uintptr_t *stack, uintptr_t rval) regs->save_srr0 &= mask; } - CPU->cpu_dtrace_caller = NULL; + CPU->cpu_dtrace_caller = (uintptr_t)NULL; } return (fbt->fbtp_rval); @@ -347,7 +353,7 @@ __fbt_provide_module(void *arg, struct modctl *ctl) * where prohibited. */ - if (strcmp(modname, "com.apple.driver.dtrace") == 0) + if (LIT_STRNEQL(modname, "com.apple.driver.dtrace")) return; if (strstr(modname, "CHUD") != NULL) @@ -361,11 +367,11 @@ __fbt_provide_module(void *arg, struct modctl *ctl) if (cmd->cmd == LC_SEGMENT) { struct segment_command *orig_sg = (struct segment_command *) cmd; - if (strcmp(SEG_TEXT, orig_sg->segname) == 0) + if (LIT_STRNEQL(orig_sg->segname, SEG_TEXT)) orig_ts = orig_sg; - else if (strcmp(SEG_LINKEDIT, orig_sg->segname) == 0) + else if (LIT_STRNEQL(orig_sg->segname, SEG_LINKEDIT)) orig_le = orig_sg; - else if (strcmp("", orig_sg->segname) == 0) + else if (LIT_STRNEQL(orig_sg->segname, "")) orig_ts = orig_sg; /* kexts have a single unnamed segment */ } else if (cmd->cmd == LC_SYMTAB) @@ -377,8 +383,8 @@ __fbt_provide_module(void *arg, struct modctl *ctl) if ((orig_ts == NULL) || (orig_st == NULL) || (orig_le == NULL)) return; - sym = (struct nlist *)orig_le->vmaddr; - strings = ((char *)sym) + orig_st->nsyms * sizeof(struct nlist); + sym = (struct nlist *)(orig_le->vmaddr + orig_st->symoff - orig_le->fileoff); + strings = (char *)(orig_le->vmaddr + orig_st->stroff - orig_le->fileoff); /* Find extent of the TEXT section */ instrLow = (uintptr_t)orig_ts->vmaddr; @@ -402,8 +408,7 @@ __fbt_provide_module(void *arg, struct modctl *ctl) if (*name == '_') name += 1; - if (strstr(name, "dtrace_") == name && - strstr(name, "dtrace_safe_") != name) { + if (LIT_STRNSTART(name, "dtrace_") && !LIT_STRNSTART(name, "dtrace_safe_")) { /* * Anything beginning with "dtrace_" may be called * from probe context unless it explitly indicates @@ -413,86 +418,94 @@ __fbt_provide_module(void *arg, struct modctl *ctl) continue; } - if (strstr(name, "dsmos_") == name) + if (LIT_STRNSTART(name, "fasttrap_") || + LIT_STRNSTART(name, "fuword") || + LIT_STRNSTART(name, "suword") || + LIT_STRNEQL(name, "sprlock") || + LIT_STRNEQL(name, "sprunlock") || + LIT_STRNEQL(name, "uread") || + LIT_STRNEQL(name, "uwrite")) + continue; /* Fasttrap inner-workings. */ + + if (LIT_STRNSTART(name, "dsmos_")) continue; /* Don't Steal Mac OS X! */ - if (strstr(name, "dtxnu_") == name || - strstr(name, "_dtrace") == name) + if (LIT_STRNSTART(name, "_dtrace")) continue; /* Shims in dtrace.c */ - if (strstr(name, "chud") == name) + if (LIT_STRNSTART(name, "chud")) continue; /* Professional courtesy. */ - if (strstr(name, "hibernate_") == name) + if (LIT_STRNSTART(name, "hibernate_")) continue; /* Let sleeping dogs lie. */ - if (0 == strcmp(name, "ZN9IOService14newTemperatureElPS_") || /* IOService::newTemperature */ - 0 == strcmp(name, "ZN9IOService26temperatureCriticalForZoneEPS_")) /* IOService::temperatureCriticalForZone */ + if (LIT_STRNEQL(name, "_ZN9IOService14newTemperatureElPS_") || /* IOService::newTemperature */ + LIT_STRNEQL(name, "_ZN9IOService26temperatureCriticalForZoneEPS_")) /* IOService::temperatureCriticalForZone */ continue; /* Per the fire code */ /* * Place no probes (illegal instructions) in the exception handling path! */ - if (0 == strcmp(name, "L_handler700") || - 0 == strcmp(name, "save_get_phys_64") || - 0 == strcmp(name, "save_get_phys_32") || - 0 == strcmp(name, "EmulExit") || - 0 == strcmp(name, "Emulate") || - 0 == strcmp(name, "Emulate64") || - 0 == strcmp(name, "switchSegs") || - 0 == strcmp(name, "save_ret_phys")) + if (LIT_STRNEQL(name, "L_handler700") || + LIT_STRNEQL(name, "save_get_phys_64") || + LIT_STRNEQL(name, "save_get_phys_32") || + LIT_STRNEQL(name, "EmulExit") || + LIT_STRNEQL(name, "Emulate") || + LIT_STRNEQL(name, "Emulate64") || + LIT_STRNEQL(name, "switchSegs") || + LIT_STRNEQL(name, "save_ret_phys")) continue; - if (0 == strcmp(name, "thandler") || - 0 == strcmp(name, "versave") || - 0 == strcmp(name, "timer_event") || - 0 == strcmp(name, "hw_atomic_or") || - 0 == strcmp(name, "trap")) + if (LIT_STRNEQL(name, "thandler") || + LIT_STRNEQL(name, "versave") || + LIT_STRNEQL(name, "timer_event") || + LIT_STRNEQL(name, "hw_atomic_or") || + LIT_STRNEQL(name, "trap")) continue; - if (0 == strcmp(name, "fbt_perfCallback") || - 0 == strcmp(name, "fbt_perfIntCallback") || - 0 == strcmp(name, "ml_set_interrupts_enabled") || - 0 == strcmp(name, "dtrace_invop") || - 0 == strcmp(name, "fbt_invop") || - 0 == strcmp(name, "sdt_invop") || - 0 == strcmp(name, "max_valid_stack_address")) + if (LIT_STRNEQL(name, "fbt_perfCallback") || + LIT_STRNEQL(name, "fbt_perfIntCallback") || + LIT_STRNEQL(name, "ml_set_interrupts_enabled") || + LIT_STRNEQL(name, "dtrace_invop") || + LIT_STRNEQL(name, "fbt_invop") || + LIT_STRNEQL(name, "sdt_invop") || + LIT_STRNEQL(name, "max_valid_stack_address")) continue; /* * Probes encountered while we're on the interrupt stack are routed along * the interrupt handling path. No probes allowed there either! */ - if (0 == strcmp(name, "ihandler") || - 0 == strcmp(name, "interrupt") || - 0 == strcmp(name, "disable_preemption")) + if (LIT_STRNEQL(name, "ihandler") || + LIT_STRNEQL(name, "interrupt") || + LIT_STRNEQL(name, "disable_preemption")) continue; /* * Avoid weird stack voodoo in and under machine_stack_handoff et al */ - if (strstr(name, "machine_stack") == name || - 0 == strcmp(name, "getPerProc") || /* Called in machine_stack_handoff with weird stack state */ - 0 == strcmp(name, "fpu_save") || /* Called in machine_stack_handoff with weird stack state */ - 0 == strcmp(name, "vec_save") || /* Called in machine_stack_handoff with weird stack state */ - 0 == strcmp(name, "pmap_switch")) /* Called in machine_stack_handoff with weird stack state */ + if (LIT_STRNSTART(name, "machine_stack") || + LIT_STRNEQL(name, "getPerProc") || /* Called in machine_stack_handoff with weird stack state */ + LIT_STRNEQL(name, "fpu_save") || /* Called in machine_stack_handoff with weird stack state */ + LIT_STRNEQL(name, "vec_save") || /* Called in machine_stack_handoff with weird stack state */ + LIT_STRNEQL(name, "pmap_switch")) /* Called in machine_stack_handoff with weird stack state */ continue; /* * Avoid machine_ routines. PR_5346750. */ - if (strstr(name, "machine_") == name) + if (LIT_STRNSTART(name, "machine_")) continue; /* * Avoid low level pmap and virtual machine monitor PowerPC routines. See PR_5379018. */ - if (strstr(name, "hw_") == name || - strstr(name, "mapping_") == name || - strstr(name, "commpage_") == name || - strstr(name, "pmap_") == name || - strstr(name, "vmm_") == name) + if (LIT_STRNSTART(name, "hw_") || + LIT_STRNSTART(name, "mapping_") || + LIT_STRNSTART(name, "commpage_") || + LIT_STRNSTART(name, "pmap_") || + LIT_STRNSTART(name, "vmm_")) continue; /* * Place no probes on critical routines. PR_5221096 @@ -511,25 +524,25 @@ __fbt_provide_module(void *arg, struct modctl *ctl) /* * Place no probes that could be hit on the way to the debugger. */ - if (strstr(name, "kdp_") == name || - strstr(name, "kdb_") == name || - strstr(name, "kdbg_") == name || - strstr(name, "kdebug_") == name || - 0 == strcmp(name, "kernel_debug") || - 0 == strcmp(name, "Debugger") || - 0 == strcmp(name, "Call_DebuggerC") || - 0 == strcmp(name, "lock_debugger") || - 0 == strcmp(name, "unlock_debugger") || - 0 == strcmp(name, "SysChoked")) + if (LIT_STRNSTART(name, "kdp_") || + LIT_STRNSTART(name, "kdb_") || + LIT_STRNSTART(name, "kdbg_") || + LIT_STRNSTART(name, "kdebug_") || + LIT_STRNEQL(name, "kernel_debug") || + LIT_STRNEQL(name, "Debugger") || + LIT_STRNEQL(name, "Call_DebuggerC") || + LIT_STRNEQL(name, "lock_debugger") || + LIT_STRNEQL(name, "unlock_debugger") || + LIT_STRNEQL(name, "SysChoked")) continue; /* * Place no probes that could be hit on the way to a panic. */ if (NULL != strstr(name, "panic_") || - 0 == strcmp(name, "panic") || - 0 == strcmp(name, "handleMck") || - 0 == strcmp(name, "unresolved_kernel_trap")) + LIT_STRNEQL(name, "panic") || + LIT_STRNEQL(name, "handleMck") || + LIT_STRNEQL(name, "unresolved_kernel_trap")) continue; if (dtrace_probe_lookup(fbt_id, modname, name, NULL) != 0) @@ -674,7 +687,8 @@ fbt_provide_module(void *arg, struct modctl *ctl) #pragma unused(ctl) __fbt_provide_module(arg, &g_fbt_kernctl); - kmem_free(kernel_map, (vm_offset_t)g_fbt_kernctl.address, round_page_32(g_fbt_kernctl.size)); + if ( (vm_offset_t)g_fbt_kernctl.address != (vm_offset_t )NULL ) + kmem_free(kernel_map, (vm_offset_t)g_fbt_kernctl.address, round_page(g_fbt_kernctl.size)); g_fbt_kernctl.address = 0; g_fbt_kernctl.size = 0; } diff --git a/bsd/dev/ppc/km.c b/bsd/dev/ppc/km.c index 27abea786..e82d6be27 100644 --- a/bsd/dev/ppc/km.c +++ b/bsd/dev/ppc/km.c @@ -35,7 +35,7 @@ #include #include -#include +#include #include #include #include @@ -50,7 +50,7 @@ /* * 'Global' variables, shared only by this file and conf.c. */ -struct tty *km_tty[1] = { &cons }; +struct tty *km_tty[1] = { 0 }; /* * this works early on, after initialize_screen() but before autoconf (and thus @@ -68,7 +68,7 @@ extern int cngetc(void); // From osfmk extern void cons_cinput(char ch); // Used by osfmk static int kmoutput(struct tty *tp); -static void kmtimeout(struct tty *tp); +static void kmtimeout(void *tp); static void kmstart(struct tty *tp); extern void KeyboardOpen(void); @@ -76,14 +76,16 @@ extern void KeyboardOpen(void); void kminit(void) { - cons.t_dev = makedev(12, 0); + km_tty[0] = ttymalloc(); + km_tty[0]->t_dev = makedev(12, 0); initialized = 1; } + /* * cdevsw interface to km driver. */ int -kmopen(dev_t dev, int flag, __unused int devtype, struct proc *pp) +kmopen(dev_t dev, int flag, __unused int devtype, proc_t pp) { int unit; struct tty *tp; @@ -94,7 +96,10 @@ kmopen(dev_t dev, int flag, __unused int devtype, struct proc *pp) if(unit >= 1) return (ENXIO); - tp = (struct tty *)&cons; + tp = km_tty[unit]; + + tty_lock(tp); + tp->t_oproc = kmstart; tp->t_param = NULL; tp->t_dev = dev; @@ -107,25 +112,34 @@ kmopen(dev_t dev, int flag, __unused int devtype, struct proc *pp) tp->t_ispeed = tp->t_ospeed = TTYDEF_SPEED; termioschars(&tp->t_termios); ttsetwater(tp); - } else if ((tp->t_state & TS_XCLUDE) && proc_suser(pp)) - return EBUSY; + } else if ((tp->t_state & TS_XCLUDE) && proc_suser(pp)) { + ret = EBUSY; + goto out; + } tp->t_state |= TS_CARR_ON; /* lie and say carrier exists and is on. */ + ret = ((*linesw[tp->t_line].l_open)(dev, tp)); { PE_Video video; wp = &tp->t_winsize; - /* Magic numbers. These are CHARWIDTH and CHARHEIGHT + /* + * Magic numbers. These are CHARWIDTH and CHARHEIGHT * from osfmk/ppc/POWERMAC/video_console.c */ wp->ws_xpixel = 8; wp->ws_ypixel = 16; + tty_unlock(tp); /* XXX race window */ + if (flag & O_POPUP) PE_initialize_console(0, kPETextScreen); bzero(&video, sizeof(video)); PE_current_console(&video); + + tty_lock(tp); + if( video.v_width != 0 && video.v_height != 0 ) { wp->ws_col = video.v_width / wp->ws_xpixel; wp->ws_row = video.v_height / wp->ws_ypixel; @@ -134,126 +148,134 @@ kmopen(dev_t dev, int flag, __unused int devtype, struct proc *pp) wp->ws_row = 36; } } + +out: + tty_unlock(tp); + return ret; } int -kmclose(__unused dev_t dev, __unused int flag, __unused int mode, - __unused struct proc *p) +kmclose(dev_t dev, __unused int flag, __unused int mode, __unused proc_t p) { - - struct tty *tp; + int ret; + struct tty *tp = km_tty[minor(dev)]; - tp = &cons; - (*linesw[tp->t_line].l_close)(tp,flag); + tty_lock(tp); + ret = (*linesw[tp->t_line].l_close)(tp,flag); ttyclose(tp); - return (0); + tty_unlock(tp); + + return (ret); } int -kmread(__unused dev_t dev, struct uio *uio, int ioflag) +kmread(dev_t dev, struct uio *uio, int ioflag) { - register struct tty *tp; - - tp = &cons; - return ((*linesw[tp->t_line].l_read)(tp, uio, ioflag)); + int ret; + struct tty *tp = km_tty[minor(dev)]; + + tty_lock(tp); + ret = (*linesw[tp->t_line].l_read)(tp, uio, ioflag); + tty_unlock(tp); + + return (ret); } int -kmwrite(__unused dev_t dev, struct uio *uio, int ioflag) +kmwrite(dev_t dev, struct uio *uio, int ioflag) { - register struct tty *tp; - - tp = &cons; - return ((*linesw[tp->t_line].l_write)(tp, uio, ioflag)); + int ret; + struct tty *tp = km_tty[minor(dev)]; + + tty_lock(tp); + ret = (*linesw[tp->t_line].l_write)(tp, uio, ioflag); + tty_unlock(tp); + + return (ret); } int -kmioctl( __unused dev_t dev, u_long cmd, caddr_t data, int flag, - struct proc *p) +kmioctl(dev_t dev, u_long cmd, caddr_t data, int flag, proc_t p) { - int error; - struct tty *tp = &cons; + int error = 0; + struct tty *tp = km_tty[minor(dev)]; struct winsize *wp; + + tty_lock(tp); switch (cmd) { - - - case KMIOCSIZE: wp = (struct winsize *)data; *wp = tp->t_winsize; - return 0; + break; case TIOCSWINSZ: /* Prevent changing of console size -- * this ensures that login doesn't revert to the * termcap-defined size */ - return EINVAL; + error = EINVAL; + break; /* Bodge in the CLOCAL flag as the km device is always local */ - case TIOCSETA: - case TIOCSETAW: - case TIOCSETAF: { - register struct termios *t = (struct termios *)data; - t->c_cflag |= CLOCAL; - /* No Break */ - } + case TIOCSETA_32: + case TIOCSETAW_32: + case TIOCSETAF_32: + { + struct termios32 *t = (struct termios32 *)data; + t->c_cflag |= CLOCAL; + /* No Break */ + } + goto fallthrough; + case TIOCSETA_64: + case TIOCSETAW_64: + case TIOCSETAF_64: + { + struct user_termios *t = (struct user_termios *)data; + t->c_cflag |= CLOCAL; + /* No Break */ + } +fallthrough: default: error = (*linesw[tp->t_line].l_ioctl)(tp, cmd, data, flag, p); if (ENOTTY != error) - return error; - return ttioctl (tp, cmd, data, flag, p); + break; + error = ttioctl_locked(tp, cmd, data, flag, p); + break; } -} - -int -kmputc(__unused dev_t dev, char c) -{ - - if( disableConsoleOutput) - return( 0); - - if(!initialized) - return( 0); - if(c == '\n') - cnputcusr('\r'); + tty_unlock(tp); - cnputcusr(c); - - return 0; + return (error); } +/* + * kmputc + * + * Output a character to the serial console driver via cnputcusr(), + * which is exported by that driver. + * + * Locks: Assumes tp in the calling tty driver code is locked on + * entry, remains locked on exit + * + * Notes: Called from kmoutput(); giving the locking output + * assumptions here, this routine should be static (and + * inlined, given there is only one call site). + */ int -kmgetc(__unused dev_t dev) +kmputc(__unused dev_t dev, char c) { - int c; - - c= cngetc(); - - if (c == '\r') { - c = '\n'; + if(!disableConsoleOutput && initialized) { + /* OCRNL */ + if(c == '\n') + cnputcusr('\r'); + cnputcusr(c); } - cnputcusr(c); - return c; -} -#if 0 -int -kmgetc_silent( - __unused dev_t dev) -{ - int c; - - c= cngetc(); - if (c == '\r') { - c = '\n'; - } - return c; + return (0); } -#endif /* 0 */ + /* * Callouts from linesw. @@ -261,6 +283,11 @@ kmgetc_silent( #define KM_LOWAT_DELAY ((ns_time_t)1000) +/* + * t_oproc for this driver; called from within the line discipline + * + * Locks: Assumes tp is locked on entry, remains locked on exit + */ static void kmstart(struct tty *tp) { @@ -277,35 +304,59 @@ kmstart(struct tty *tp) return; } +/* + * One-shot output retry timeout from kmoutput(); re-calls kmoutput() at + * intervals until the output queue for the tty is empty, at which point + * the timeout is not rescheduled by kmoutput() + * + * This function must take the tty_lock() around the kmoutput() call; it + * ignores the return value. + */ static void -kmtimeout(struct tty *tp) +kmtimeout(void *arg) { - boolean_t funnel_state; - - funnel_state = thread_funnel_set(kernel_flock, TRUE); - kmoutput(tp); - (void) thread_funnel_set(kernel_flock, funnel_state); - + struct tty *tp = (struct tty *)arg; + tty_lock(tp); + (void)kmoutput(tp); + tty_unlock(tp); } + +/* + * kmoutput + * + * Locks: Assumes tp is locked on entry, remains locked on exit + * + * Notes: Called from kmstart() and kmtimeout(); kmtimeout() is a + * timer initiated by this routine to deal with pending + * output not yet flushed (output is flushed at a maximum + * of sizeof(buf) charatcers at a time before dropping into + * the timeout code). + */ static int kmoutput(struct tty *tp) { - /* - * FIXME - to be grokked...copied from m68k km.c. - */ - char buf[80]; - char *cp; - int cc = -1; + char buf[80]; /* buffer; limits output per call */ + char *cp; + int cc = -1; + + /* While there is data available to be output... */ while (tp->t_outq.c_cc > 0) { cc = ndqb(&tp->t_outq, 0); if (cc == 0) break; + /* + * attempt to output as many characters as are available, + * up to the available transfer buffer size. + */ cc = min(cc, sizeof buf); + /* copy the output queue contents to the buffer */ (void) q_to_b(&tp->t_outq, (unsigned char *)buf, cc); - for (cp = buf; cp < &buf[cc]; cp++) - kmputc(tp->t_dev, *cp & 0x7f); + for (cp = buf; cp < &buf[cc]; cp++) { + /* output the buffer one charatcer at a time */ + kmputc(tp->t_dev, *cp & 0x7f); + } } if (tp->t_outq.c_cc > 0) { timeout((timeout_fcn_t)kmtimeout, tp, hz); @@ -316,10 +367,26 @@ kmoutput(struct tty *tp) return 0; } -void cons_cinput(char ch) +/* + * cons_cinput + * + * Driver character input from the polled mode serial console driver calls + * this routine to input a character from the serial driver into the tty + * line discipline specific input processing receiv interrupt routine, + * l_rint(). + * + * Locks: Assumes that the tty_lock() is NOT held on the tp, so a + * serial driver should NOT call this function as a result + * of being called from a function which already holds the + * lock; ECHOE will be handled at the line discipline, if + * output echo processing is going to occur. + */ +void +cons_cinput(char ch) { - struct tty *tp = &cons; - + struct tty *tp = km_tty[0]; /* XXX */ + + tty_lock(tp); (*linesw[tp->t_line].l_rint) (ch, tp); + tty_unlock(tp); } - diff --git a/bsd/dev/ppc/machdep.c b/bsd/dev/ppc/machdep.c index 8f912037b..bf9f5beff 100644 --- a/bsd/dev/ppc/machdep.c +++ b/bsd/dev/ppc/machdep.c @@ -60,7 +60,7 @@ getchar(void) #if 0 if (c == 0x1b) /* ESC ? */ call_kdp(); -#endif 0 +#endif if (c == '\r') c = '\n'; diff --git a/bsd/dev/ppc/mem.c b/bsd/dev/ppc/mem.c index 301ade89d..fc2d39efb 100644 --- a/bsd/dev/ppc/mem.c +++ b/bsd/dev/ppc/mem.c @@ -77,6 +77,7 @@ #include #include #include +#include #include #include #include @@ -151,13 +152,8 @@ mmrw(dev, uio, rw) vm_offset_t where; while (uio_resid(uio) > 0 && error == 0) { - if (uio_iov_len(uio) == 0) { - uio_next_iov(uio); - uio->uio_iovcnt--; - if (uio->uio_iovcnt < 0) - panic("mmrw"); - continue; - } + uio_update(uio, 0); + switch (minor(dev)) { /* minor device 0 is physical memory */ @@ -189,8 +185,7 @@ mmrw(dev, uio, rw) } } o = uio->uio_offset - vll; - // LP64todo - fix this! - c = min(PAGE_SIZE - o, uio_iov_len(uio)); + c = min(PAGE_SIZE - o, uio_curriovlen(uio)); error = uiomove((caddr_t)(where + o), c, uio); if(dgWork.dgFlags & enaDiagDM) (void)mapping_remove(kernel_pmap, (addr64_t)where); /* Unmap it */ @@ -205,7 +200,7 @@ mmrw(dev, uio, rw) if (((addr64_t)uio->uio_offset > vm_last_addr) || ((addr64_t)uio->uio_offset < VM_MIN_KERNEL_ADDRESS)) goto fault; - c = uio_iov_len(uio); + c = uio_curriovlen(uio); if (!kernacc(uio->uio_offset, c)) goto fault; error = uiomove64(uio->uio_offset, c, uio); @@ -215,7 +210,7 @@ mmrw(dev, uio, rw) case 2: if (rw == UIO_READ) return (0); - c = uio_iov_len(uio); + c = uio_curriovlen(uio); break; /* minor device 3 is ZERO/RATHOLE */ case 3: @@ -224,11 +219,10 @@ mmrw(dev, uio, rw) bzero(devzerobuf, PAGE_SIZE); } if(uio->uio_rw == UIO_WRITE) { - c = uio_iov_len(uio); + c = uio_curriovlen(uio); break; } - // LP64todo - fix this! - c = min(uio_iov_len(uio), PAGE_SIZE); + c = min(uio_curriovlen(uio), PAGE_SIZE); error = uiomove(devzerobuf, c, uio); continue; default: @@ -238,15 +232,7 @@ mmrw(dev, uio, rw) if (error) break; - uio_iov_base_add(uio, c); - uio->uio_offset += c; -#if LP64KERN - uio_setresid(uio, (uio_resid(uio) - c)); - uio_iov_len_add(uio, -((int64_t)c)); -#else - uio_setresid(uio, (uio_resid(uio) - c)); - uio_iov_len_add(uio, -((int)c)); -#endif + uio_update(uio, c); } return (error); fault: diff --git a/bsd/dev/ppc/munge.s b/bsd/dev/ppc/munge.s index 2af7c6e1c..9e33bc326 100644 --- a/bsd/dev/ppc/munge.s +++ b/bsd/dev/ppc/munge.s @@ -301,6 +301,33 @@ _munge_wwlwww: blr + .align 5 + .globl _munge_wwwwlw // 4 'w's and an l an w +_munge_wwwwlw: + li r0,0 + lwz r5,0*8+4(r3) + lwz r6,1*8+4(r3) + lwz r7,2*8+4(r3) + lwz r8,3*8+4(r3) + lwz r9,4*8+4(r3) + lwz r10,5*8+4(r3) + lwz r11,6*8+4(r3) + + stw r0,0*8+0(r4) + stw r5,0*8+4(r4) + stw r0,1*8+0(r4) + stw r6,1*8+4(r4) + stw r0,2*8+0(r4) + stw r7,2*8+4(r4) + stw r0,3*8+0(r4) + stw r8,3*8+4(r4) + stw r9,4*8+0(r4) + stw r10,4*8+4(r4) + stw r0,5*8+0(r4) + stw r11,5*8+4(r4) + + blr + .align 5 .globl _munge_wwwwl // 4 'w's and an l @@ -416,3 +443,35 @@ _munge_wwwsw: stw r9,4*8+4(r4) blr + + .align 5 + .globl _munge_llllll +_munge_llllll: + li r0,0 + lwz r5,0*8+4(r3) // l1 + lwz r6,1*8+4(r3) + lwz r7,2*8+4(r3) // l2 + lwz r8,3*8+4(r3) + lwz r9,4*8+4(r3) // l3 + lwz r10,5*8+4(r3) + lwz r11,6*8+4(r3) // l4 + + stw r5,0*8+0(r4) + stw r6,0*8+4(r4) + stw r7,1*8+0(r4) + stw r8,1*8+4(r4) + stw r9,2*8+0(r4) + stw r10,2*8+4(r4) + stw r11,3*8+0(r4) + + // the rest spill to the stack (r1) + // we'll zero fill for now + // and make the syscall handler + // do the copyin from the user stack + stw r0,3*8+4(r4) + stw r0,4*8+0(r4) + stw r0,4*8+4(r4) + stw r0,5*8+0(r4) + stw r0,5*8+4(r4) + + blr diff --git a/bsd/dev/ppc/nvram.c b/bsd/dev/ppc/nvram.c deleted file mode 100644 index 0114a93b9..000000000 --- a/bsd/dev/ppc/nvram.c +++ /dev/null @@ -1,116 +0,0 @@ -/* - * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ - * - * This file contains Original Code and/or Modifications of Original Code - * as defined in and that are subject to the Apple Public Source License - * Version 2.0 (the 'License'). You may not use this file except in - * compliance with the License. The rights granted to you under the License - * may not be used to create, or enable the creation or redistribution of, - * unlawful or unlicensed copies of an Apple operating system, or to - * circumvent, violate, or enable the circumvention or violation of, any - * terms of an Apple operating system software license agreement. - * - * Please obtain a copy of the License at - * http://www.opensource.apple.com/apsl/ and read it before using this file. - * - * The Original Code and all software distributed under the License are - * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER - * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, - * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. - * Please see the License for the specific language governing rights and - * limitations under the License. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ - */ -/* - * BSD driver for Non-volatile RAM. - * Stub functions call the real thing in the Platform Expert. - * - * Suurballe 11 Feb 1999 - */ - -#include -#include - -extern int PEnvopen ( dev_t, int, int, struct proc * ); -extern int PEnvclose ( dev_t, int, int, struct proc * ); -extern int PEnvread ( long, int, unsigned char *); -extern int PEnvwrite ( long, int, unsigned char * ); - - -nvopen(dev, flag, devtype, pp) - dev_t dev; - int flag, devtype; - struct proc *pp; -{ - return PEnvopen(dev,flag,devtype,pp); -} - - - -nvclose(dev, flag, mode, pp) - dev_t dev; - int flag, mode; - struct proc *pp; -{ - return PEnvclose(dev,flag,mode,pp); -} - - - -nvread(dev, uio, ioflag) - dev_t dev; - struct uio *uio; - int ioflag; -{ - long offset; - long size; - int c; - unsigned char cc; - long read = 0; - int error = 0; - - offset = uio->uio_offset; - size = uio_resid(uio); - - for (read = 0; read < size; read++, offset++) { - error = PEnvread(offset, 1, &cc); - if ( error ) { - return error; - } - c = (int)cc; - error = ureadc(c, uio); - if (error) { - return error; - } - } - return error; -} - - - -nvwrite(dev_t dev, struct uio *uio, int ioflag) -{ - long offset; - long size; - int c; - unsigned char cc; - long wrote = 0; - int error = 0; - - offset = uio->uio_offset; - size = uio_resid(uio); - - for (wrote = 0; wrote < size; wrote++, offset++) { - c = uwritec(uio); - if (c < 0) { - return 0; - } - cc = (unsigned char)c; - error = PEnvwrite(offset, 1, &cc); - } - return error; -} diff --git a/bsd/dev/ppc/ppc_init.c b/bsd/dev/ppc/ppc_init.c index 489610109..545cfe5ae 100644 --- a/bsd/dev/ppc/ppc_init.c +++ b/bsd/dev/ppc/ppc_init.c @@ -46,7 +46,7 @@ #include #ifdef __MACHO__ -#include +#include #endif /* External references */ @@ -98,15 +98,14 @@ vm_offset_t first_avail; #ifdef __MACHO__ -extern struct mach_header _mh_execute_header; void *sectTEXTB; -int sectSizeTEXT; +unsigned long sectSizeTEXT; void *sectDATAB; -int sectSizeDATA; +unsigned long sectSizeDATA; void *sectOBJCB; -int sectSizeOBJC; +unsigned long sectSizeOBJC; void *sectLINKB; -int sectSizeLINK; +unsigned long sectSizeLINK; vm_offset_t end, etext, edata; #define ETEXT etext @@ -120,9 +119,6 @@ void ppc_vm_init(unsigned int memory_size, boot_args *args) unsigned int i; vm_offset_t addr; int boot_task_end_offset; -#if NCPUS > 1 - const char *cpus; -#endif /* NCPUS > 1 */ printf("mem_size = %d M\n",memory_size / (1024 * 1024)); diff --git a/bsd/dev/ppc/systemcalls.c b/bsd/dev/ppc/systemcalls.c index 8cc3ca4b8..973d35899 100644 --- a/bsd/dev/ppc/systemcalls.c +++ b/bsd/dev/ppc/systemcalls.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2007 Apple Inc. All rights reserved. + * Copyright (c) 2000-2008 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -32,6 +32,8 @@ * Version 2.0. */ +#include + #include #include #include @@ -54,7 +56,7 @@ #include #include -#include +#include #if CONFIG_DTRACE extern int32_t dtrace_systrace_syscall(struct proc *, void *, int *); @@ -193,7 +195,7 @@ unix_syscall(struct savearea *regs) #ifdef JOE_DEBUG if (uthread->uu_iocount) - joe_debug("system call returned with uu_iocount != 0"); + printf("system call returned with uu_iocount != 0\n"); #endif #if CONFIG_DTRACE uthread->t_dtrace_errno = error; @@ -274,10 +276,10 @@ unix_syscall(struct savearea *regs) if (callp->sy_return_type == _SYSCALL_RET_SSIZE_T) KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_EXCP_SC, code) | DBG_FUNC_END, - error, uthread->uu_rval[1], 0, 0, 0); + error, uthread->uu_rval[1], 0, proc->p_pid, 0); else KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_EXCP_SC, code) | DBG_FUNC_END, - error, uthread->uu_rval[0], uthread->uu_rval[1], 0, 0); + error, uthread->uu_rval[0], uthread->uu_rval[1], proc->p_pid, 0); } thread_exception_return(); @@ -311,6 +313,7 @@ unix_syscall_return(int error) if (callp->sy_call == dtrace_systrace_syscall) dtrace_systrace_syscall_return( code, error, uthread->uu_rval ); #endif /* CONFIG_DTRACE */ + AUDIT_SYSCALL_EXIT(code, proc, uthread, error); /* * Get index into sysent table @@ -385,19 +388,46 @@ unix_syscall_return(int error) if (kdebug_enable && (code != 180)) { if (callp->sy_return_type == _SYSCALL_RET_SSIZE_T) KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_EXCP_SC, code) | DBG_FUNC_END, - error, uthread->uu_rval[1], 0, 0, 0); + error, uthread->uu_rval[1], 0, proc->p_pid, 0); else KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_EXCP_SC, code) | DBG_FUNC_END, - error, uthread->uu_rval[0], uthread->uu_rval[1], 0, 0); + error, uthread->uu_rval[0], uthread->uu_rval[1], proc->p_pid, 0); } thread_exception_return(); /* NOTREACHED */ } -#ifdef JOE_DEBUG -joe_debug(char *p) { +void +munge_lwww( + const void *in32, + void *out64) +{ + const uint32_t *arg32; + uint64_t *arg64; + + arg32 = (const uint32_t *) in32; + arg64 = (uint64_t *) out64; - printf("%s\n", p); + arg64[3] = arg32[9]; /* lwwW */ + arg64[2] = arg32[7]; /* lwWw */ + arg64[1] = arg32[5]; /* lWww */ + arg64[0] = ((uint64_t) arg32[1]) << 32; /* Lwww (hi) */ + arg64[0] |= (uint64_t) arg32[3]; /* Lwww (lo) */ +} + +void +munge_lw( + const void *in32, + void *out64) +{ + const uint32_t *arg32; + uint64_t *arg64; + + arg32 = (const uint32_t *) in32; + arg64 = (uint64_t *) out64; + + arg64[1] = arg32[5]; /* lW */ + arg64[0] = ((uint64_t) arg32[1]) << 32; /* Lw (hi) */ + arg64[0] |= (uint64_t) arg32[3]; /* Lw (lo) */ } -#endif diff --git a/bsd/dev/ppc/unix_signal.c b/bsd/dev/ppc/unix_signal.c index 435123101..4ca48b0b7 100644 --- a/bsd/dev/ppc/unix_signal.c +++ b/bsd/dev/ppc/unix_signal.c @@ -160,7 +160,7 @@ ucontext_64to32(struct user_ucontext64 *in, struct ucontext64 *out) * NOTE: Source and target may *NOT* overlap! */ static void -siginfo_64to32(user_siginfo_t *in, siginfo_t *out) +siginfo_user_to_user32(user_siginfo_t *in, user32_siginfo_t *out) { out->si_signo = in->si_signo; out->si_errno = in->si_errno; @@ -168,9 +168,24 @@ siginfo_64to32(user_siginfo_t *in, siginfo_t *out) out->si_pid = in->si_pid; out->si_uid = in->si_uid; out->si_status = in->si_status; - out->si_addr = CAST_DOWN(void *,in->si_addr); + out->si_addr = CAST_DOWN_EXPLICIT(user32_addr_t,in->si_addr); /* following cast works for sival_int because of padding */ - out->si_value.sival_ptr = CAST_DOWN(void *,in->si_value.sival_ptr); + out->si_value.sival_ptr = CAST_DOWN_EXPLICIT(user32_addr_t,in->si_value.sival_ptr); + out->si_band = in->si_band; /* range reduction */ + out->__pad[0] = in->pad[0]; /* mcontext.ss.r1 */ +} + +static void +siginfo_user_to_user64(user_siginfo_t *in, user64_siginfo_t *out) +{ + out->si_signo = in->si_signo; + out->si_errno = in->si_errno; + out->si_code = in->si_code; + out->si_pid = in->si_pid; + out->si_uid = in->si_uid; + out->si_status = in->si_status; + out->si_addr = in->si_addr; + out->si_value.sival_ptr = in->si_value.sival_ptr; out->si_band = in->si_band; /* range reduction */ out->__pad[0] = in->pad[0]; /* mcontext.ss.r1 */ } @@ -181,7 +196,7 @@ siginfo_64to32(user_siginfo_t *in, siginfo_t *out) */ void -sendsig(struct proc *p, user_addr_t catcher, int sig, int mask, __unused u_long code) +sendsig(struct proc *p, user_addr_t catcher, int sig, int mask, __unused uint32_t code) { kern_return_t kretn; struct mcontext mctx; @@ -397,7 +412,7 @@ sendsig(struct proc *p, user_addr_t catcher, int sig, int mask, __unused u_long p_uctx = sp; /* this is where siginfo goes on stack */ - sp -= sizeof(user_siginfo_t); + sp -= sizeof(user64_siginfo_t); p_sinfo = sp; sp = TRUNC_DOWN64(sp, C_64_PARAMSAVE_LEN+C_64_LINKAGE_LEN, C_64_STK_ALIGN); @@ -414,7 +429,7 @@ sendsig(struct proc *p, user_addr_t catcher, int sig, int mask, __unused u_long p_uctx = sp; /* this is where siginfo goes on stack */ - sp -= sizeof(siginfo_t); + sp -= sizeof(user32_siginfo_t); p_sinfo = sp; sp = TRUNC_DOWN32(sp, C_32_PARAMSAVE_LEN+C_32_LINKAGE_LEN, C_32_STK_ALIGN); @@ -442,7 +457,7 @@ sendsig(struct proc *p, user_addr_t catcher, int sig, int mask, __unused u_long uctx.uc_mcontext64 = p_mctx; /* setup siginfo */ - bzero((caddr_t)&sinfo, sizeof(user_siginfo_t)); + bzero((caddr_t)&sinfo, sizeof(sinfo)); sinfo.si_signo = sig; if (ctx32 == 0) { sinfo.si_addr = mctx64.ss.srr0; @@ -605,29 +620,75 @@ sendsig(struct proc *p, user_addr_t catcher, int sig, int mask, __unused u_long /* copy info out to user space */ if (IS_64BIT_PROCESS(p)) { + user64_siginfo_t sinfo64; + + siginfo_user_to_user64(&sinfo,&sinfo64); + +#if CONFIG_DTRACE + bzero((caddr_t)&(ut->t_dtrace_siginfo), sizeof(ut->t_dtrace_siginfo)); + + ut->t_dtrace_siginfo.si_signo = sinfo.si_signo; + ut->t_dtrace_siginfo.si_code = sinfo.si_code; + ut->t_dtrace_siginfo.si_pid = sinfo.si_pid; + ut->t_dtrace_siginfo.si_uid = sinfo.si_uid; + ut->t_dtrace_siginfo.si_status = sinfo.si_status; + /* XXX truncates faulting address to void * on K32 */ + ut->t_dtrace_siginfo.si_addr = CAST_DOWN(void *, sinfo.si_addr); + + + /* Fire DTrace proc:::fault probe when signal is generated by hardware. */ + switch (sig) { + case SIGILL: case SIGBUS: case SIGSEGV: case SIGFPE: case SIGTRAP: + DTRACE_PROC2(fault, int, (int)(ut->uu_code), siginfo_t *, &(ut->t_dtrace_siginfo)); + break; + default: + break; + } /* XXX truncates catcher address to uintptr_t */ - DTRACE_PROC3(signal__handle, int, sig, siginfo_t *, &sinfo, + DTRACE_PROC3(signal__handle, int, sig, siginfo_t *, &(ut->t_dtrace_siginfo), void (*)(void), CAST_DOWN(sig_t, catcher)); +#endif /* CONFIG_DTRACE */ if (copyout(&uctx, p_uctx, sizeof(struct user_ucontext64))) goto bad; - if (copyout(&sinfo, p_sinfo, sizeof(user_siginfo_t))) + if (copyout(&sinfo64, p_sinfo, sizeof(sinfo64))) goto bad; } else { struct ucontext64 uctx32; - siginfo_t sinfo32; + user32_siginfo_t sinfo32; ucontext_64to32(&uctx, &uctx32); - siginfo_64to32(&sinfo,&sinfo32); + siginfo_user_to_user32(&sinfo,&sinfo32); + +#if CONFIG_DTRACE + bzero((caddr_t)&(ut->t_dtrace_siginfo), sizeof(ut->t_dtrace_siginfo)); + + ut->t_dtrace_siginfo.si_signo = sinfo.si_signo; + ut->t_dtrace_siginfo.si_code = sinfo.si_code; + ut->t_dtrace_siginfo.si_pid = sinfo.si_pid; + ut->t_dtrace_siginfo.si_uid = sinfo.si_uid; + ut->t_dtrace_siginfo.si_status = sinfo.si_status; + ut->t_dtrace_siginfo.si_addr = CAST_DOWN(void *, sinfo.si_addr); + + + /* Fire DTrace proc:::fault probe when signal is generated by hardware. */ + switch (sig) { + case SIGILL: case SIGBUS: case SIGSEGV: case SIGFPE: case SIGTRAP: + DTRACE_PROC2(fault, int, (int)(ut->uu_code), siginfo_t *, &(ut->t_dtrace_siginfo)); + break; + default: + break; + } - DTRACE_PROC3(signal__handle, int, sig, siginfo_t *, &sinfo32, + DTRACE_PROC3(signal__handle, int, sig, siginfo_t *, &(ut->t_dtrace_siginfo), void (*)(void), CAST_DOWN(sig_t, catcher)); +#endif /* CONFIG_DTRACE */ if (copyout(&uctx32, p_uctx, sizeof(struct ucontext64))) goto bad; - if (copyout(&sinfo32, p_sinfo, sizeof(siginfo_t))) + if (copyout(&sinfo32, p_sinfo, sizeof(sinfo32))) goto bad; } if ((ctx32 == 0) || dualcontext) { @@ -661,16 +722,16 @@ sendsig(struct proc *p, user_addr_t catcher, int sig, int mask, __unused u_long panic("sendsig: thread_setstatus failed, ret = %08X\n", kretn); } } else { - mctx.ss.r3 = CAST_DOWN(unsigned long,catcher); - mctx.ss.r4 = (unsigned long)infostyle; - mctx.ss.r5 = (unsigned long)sig; - mctx.ss.r6 = CAST_DOWN(unsigned long,p_sinfo); - mctx.ss.r7 = CAST_DOWN(unsigned long,p_uctx); + mctx.ss.r3 = CAST_DOWN(uint32_t,catcher); + mctx.ss.r4 = (uint32_t)infostyle; + mctx.ss.r5 = (uint32_t)sig; + mctx.ss.r6 = CAST_DOWN(uint32_t,p_sinfo); + mctx.ss.r7 = CAST_DOWN(uint32_t,p_uctx); - mctx.ss.srr0 = CAST_DOWN(unsigned long,trampact); + mctx.ss.srr0 = CAST_DOWN(uint32_t,trampact); /* MSR_EXPORT_MASK_SET */ mctx.ss.srr1 = get_msr_exportmask(); - mctx.ss.r1 = CAST_DOWN(unsigned long,sp); + mctx.ss.r1 = CAST_DOWN(uint32_t,sp); state_count = PPC_THREAD_STATE_COUNT; if ((kretn = thread_setstatus(th_act, PPC_THREAD_STATE, (void *)&mctx.ss, state_count)) != KERN_SUCCESS) { panic("sendsig: thread_setstatus failed, ret = %08X\n", kretn); @@ -719,7 +780,7 @@ sigreturn(struct proc *p, struct sigreturn_args *uap, __unused int *retval) struct sigacts *ps = p->p_sigacts; sigset_t mask; user_addr_t action; - unsigned long state_count; + uint32_t state_count; unsigned int state_flavor; struct uthread * ut; int vec_used = 0; diff --git a/bsd/dev/random/Makefile b/bsd/dev/random/Makefile index 778286e30..1190bc1ff 100644 --- a/bsd/dev/random/Makefile +++ b/bsd/dev/random/Makefile @@ -13,12 +13,16 @@ INSTINC_SUBDIRS_PPC = \ INSTINC_SUBDIRS_I386 = \ +INSTINC_SUBDIRS_X86_64 = \ + EXPINC_SUBDIRS = \ EXPINC_SUBDIRS_PPC = \ EXPINC_SUBDIRS_I386 = \ +EXPINC_SUBDIRS_X86_64 = \ + DATAFILES = \ INSTALL_MI_LIST = ${DATAFILES} diff --git a/bsd/dev/random/YarrowCoreLib/src/prng.c b/bsd/dev/random/YarrowCoreLib/src/prng.c index bbb56785f..f14c41163 100644 --- a/bsd/dev/random/YarrowCoreLib/src/prng.c +++ b/bsd/dev/random/YarrowCoreLib/src/prng.c @@ -93,8 +93,10 @@ static HANDLE Statmutex = NULL; static DWORD mutexCreatorId = 0; #endif +#if 0 #pragma mark - #pragma mark * * * Static Utility functions * * * +#endif /* All error checking should be done in the function that calls these */ @@ -211,8 +213,10 @@ bubbleSort( UINT *data, LONG len ) } } +#if 0 #pragma mark - #pragma mark * * * Public functions * * * +#endif /* Set up the PRNG */ prng_error_status diff --git a/bsd/dev/random/YarrowCoreLib/src/sha1mod.c b/bsd/dev/random/YarrowCoreLib/src/sha1mod.c index f240dd9af..c1e245aa3 100644 --- a/bsd/dev/random/YarrowCoreLib/src/sha1mod.c +++ b/bsd/dev/random/YarrowCoreLib/src/sha1mod.c @@ -68,12 +68,12 @@ By Steve Reid /* Hash a single 512-bit block. This is the core of the algorithm. */ __private_extern__ void -YSHA1Transform(unsigned long state[5], const unsigned char buffer[64]) +YSHA1Transform(u_int32_t state[5], const unsigned char buffer[64]) { -unsigned long a, b, c, d, e; +u_int32_t a, b, c, d, e; typedef union { unsigned char c[64]; - unsigned long l[16]; + u_int32_t l[16]; } CHAR64LONG16; CHAR64LONG16* block; #ifdef SHA1HANDSOFF @@ -164,16 +164,16 @@ unsigned int i, j; __private_extern__ void YSHA1Final(unsigned char digest[20], YSHA1_CTX* context) { -unsigned long i, j; +u_int32_t i, j; unsigned char finalcount[8]; for (i = 0; i < 8; i++) { finalcount[i] = (unsigned char)((context->count[(i >= 4 ? 0 : 1)] >> ((3-(i & 3)) * 8) ) & 255); /* Endian independent */ } - YSHA1Update(context, (unsigned char *)"\200", 1); + YSHA1Update(context, (const unsigned char *)"\200", 1); while ((context->count[0] & 504) != 448) { - YSHA1Update(context, (unsigned char *)"\0", 1); + YSHA1Update(context, (const unsigned char *)"\0", 1); } YSHA1Update(context, finalcount, 8); /* Should cause a YSHA1Transform() */ for (i = 0; i < 20; i++) { diff --git a/bsd/dev/random/YarrowCoreLib/src/sha1mod.h b/bsd/dev/random/YarrowCoreLib/src/sha1mod.h index 60b5c64cf..9d64139ba 100644 --- a/bsd/dev/random/YarrowCoreLib/src/sha1mod.h +++ b/bsd/dev/random/YarrowCoreLib/src/sha1mod.h @@ -37,6 +37,8 @@ By Steve Reid #define __SHA1_H__ +#include + /* Test Vectors (from FIPS PUB 180-1) "abc" @@ -53,13 +55,13 @@ A million repetitions of "a" //Context declaration typedef struct { - unsigned long state[5]; - unsigned long count[2]; + u_int32_t state[5]; + u_int32_t count[2]; unsigned char buffer[64]; } YSHA1_CTX; //Function forward declerations -__private_extern__ void YSHA1Transform(unsigned long state[5], +__private_extern__ void YSHA1Transform(u_int32_t state[5], const unsigned char buffer[64]); __private_extern__ void YSHA1Init(YSHA1_CTX* context); __private_extern__ void YSHA1Update(YSHA1_CTX* context, diff --git a/bsd/dev/random/fips_sha1.c b/bsd/dev/random/fips_sha1.c new file mode 100644 index 000000000..136bd266c --- /dev/null +++ b/bsd/dev/random/fips_sha1.c @@ -0,0 +1,386 @@ +/* + * Copyright (c) 2000-2009 Apple, Inc. All rights reserved. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. The rights granted to you under the License + * may not be used to create, or enable the creation or redistribution of, + * unlawful or unlicensed copies of an Apple operating system, or to + * circumvent, violate, or enable the circumvention or violation of, any + * terms of an Apple operating system software license agreement. + * + * Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ + */ + +/* + * This SHA1 code is based on the basic framework from the reference + * implementation for MD5. That implementation is Copyright (C) + * 1991-2, RSA Data Security, Inc. Created 1991. All rights reserved. + * + * License to copy and use this software is granted provided that it + * is identified as the "RSA Data Security, Inc. MD5 Message-Digest + * Algorithm" in all material mentioning or referencing this software + * or this function. + * + * License is also granted to make and use derivative works provided + * that such works are identified as "derived from the RSA Data + * Security, Inc. MD5 Message-Digest Algorithm" in all material + * mentioning or referencing the derived work. + * + * RSA Data Security, Inc. makes no representations concerning either + * the merchantability of this software or the suitability of this + * software for any particular purpose. It is provided "as is" + * without express or implied warranty of any kind. + * + * These notices must be retained in any copies of any part of this + * documentation and/or software. + * + * Based on the FIPS 180-1: Secure Hash Algorithm (SHA-1) available at + * http://www.itl.nist.gov/div897/pubs/fip180-1.htm + */ + +/* + WARNING! WARNING! WARNING! WARNING! WARNING! WARNING! WARNING! WARNING! WARNING! + + THIS FILE IS NEEDED TO PASS FIPS ACCEPTANCE FOR THE RANDOM NUMBER GENERATOR. + IF YOU ALTER IT IN ANY WAY, WE WILL NEED TO GO THOUGH FIPS ACCEPTANCE AGAIN, + AN OPERATION THAT IS VERY EXPENSIVE AND TIME CONSUMING. IN OTHER WORDS, + DON'T MESS WITH THIS FILE. + + WARNING! WARNING! WARNING! WARNING! WARNING! WARNING! WARNING! WARNING! WARNING! +*/ + +#include +#include + +#include "fips_sha1.h" + +typedef int Boolean; + +/* Internal mappings to the legacy sha1_ctxt structure. */ +#define state h.b32 +#define bcount c.b32 +#define buffer m.b8 + +/* + * The digest algorithm interprets the input message as a sequence of 32-bit + * big-endian words. We must reverse bytes in each word on x86/64 platforms, + * but not on big-endian ones such as PPC. For performance, we take advantage + * of the bswap instruction on x86/64 to perform byte-reversal. On PPC, we + * could do 4-byte load if the address is 4-byte aligned which should further + * improve the performance. But for code simplicity, we punt and do 1-byte + * loads instead. + */ +#if (defined(__i386__) || defined(__x86_64__)) && defined(__GNUC__) +#define FETCH_32(p) ({ \ + register u_int32_t l = (u_int32_t)*((const u_int32_t *)(p)); \ + __asm__ __volatile__("bswap %0" : "=r" (l) : "0" (l)); \ + l; \ +}) +#else +#define FETCH_32(p) \ + (((u_int32_t)*((const u_int8_t *)(p) + 3)) | \ + (((u_int32_t)*((const u_int8_t *)(p) + 2)) << 8) | \ + (((u_int32_t)*((const u_int8_t *)(p) + 1)) << 16) | \ + (((u_int32_t)*((const u_int8_t *)(p))) << 24)) +#endif /* __i386__ || __x86_64__ */ + +/* + * Encodes input (u_int32_t) into output (unsigned char). Assumes len is + * a multiple of 4. This is not compatible with memcpy(). + */ +static void +Encode(unsigned char *output, u_int32_t *input, unsigned int len) +{ + unsigned int i, j; + + for (i = 0, j = 0; j < len; i++, j += 4) { + output[j + 3] = input[i] & 0xff; + output[j + 2] = (input[i] >> 8) & 0xff; + output[j + 1] = (input[i] >> 16) & 0xff; + output[j] = (input[i] >> 24) & 0xff; + } +} + +static unsigned char PADDING[64] = { 0x80, /* zeros */ }; + +/* Constants from FIPS 180-1 */ +#define K_00_19 0x5a827999UL +#define K_20_39 0x6ed9eba1UL +#define K_40_59 0x8f1bbcdcUL +#define K_60_79 0xca62c1d6UL + +/* F, G, H and I are basic SHA1 functions. */ +#define F(b, c, d) ((((c) ^ (d)) & (b)) ^ (d)) +#define G(b, c, d) ((b) ^ (c) ^ (d)) +#define H(b, c, d) (((b) & (c)) | (((b) | (c)) & (d))) + +/* ROTATE_LEFT rotates x left n bits. */ +#define ROTATE_LEFT(x, n) (((x) << (n)) | ((x) >> (32 - (n)))) + +/* R, R1-R4 are macros used during each transformation round. */ +#define R(f, k, v, w, x, y, z, i) { \ + (v) = ROTATE_LEFT(w, 5) + f(x, y, z) + (v) + (i) + (k); \ + (x) = ROTATE_LEFT(x, 30); \ +} + +#define R1(v, w, x, y, z, i) R(F, K_00_19, v, w, x, y, z, i) +#define R2(v, w, x, y, z, i) R(G, K_20_39, v, w, x, y, z, i) +#define R3(v, w, x, y, z, i) R(H, K_40_59, v, w, x, y, z, i) +#define R4(v, w, x, y, z, i) R(G, K_60_79, v, w, x, y, z, i) + +/* WUPDATE represents Wt variable that gets updated for steps 16-79 */ +#define WUPDATE(p, q, r, s) { \ + (p) = ((q) ^ (r) ^ (s) ^ (p)); \ + (p) = ROTATE_LEFT(p, 1); \ +} + +static void SHA1Transform(u_int32_t, u_int32_t, u_int32_t, u_int32_t, + u_int32_t, const u_int8_t *, SHA1_CTX *); + +/* + * SHA1 initialization. Begins a SHA1 operation, writing a new context. + */ +void +FIPS_SHA1Init(SHA1_CTX *context) +{ + context->bcount[0] = context->bcount[1] = 0; + context->count = 0; + + /* Load magic initialization constants. */ + context->state[0] = 0x67452301UL; + context->state[1] = 0xefcdab89UL; + context->state[2] = 0x98badcfeUL; + context->state[3] = 0x10325476UL; + context->state[4] = 0xc3d2e1f0UL; +} + +/* + * SHA1 block update operation. Continues a SHA1 message-digest + * operation, processing another message block, and updating the + * context. + */ +void FIPS_SHA1Update(SHA1_CTX *context, const void *inpp, size_t inputLen) +{ + u_int32_t i, index, partLen; + const unsigned char *input = (const unsigned char *)inpp; + + if (inputLen == 0) + return; + + /* Compute number of bytes mod 64 */ + index = (context->bcount[1] >> 3) & 0x3F; + + /* Update number of bits */ + if ((context->bcount[1] += (inputLen << 3)) < (inputLen << 3)) + context->bcount[0]++; + context->bcount[0] += (inputLen >> 29); + + partLen = 64 - index; + + /* Transform as many times as possible. */ + i = 0; + if (inputLen >= partLen) { + if (index != 0) { + memcpy(&context->buffer[index], input, partLen); + SHA1Transform(context->state[0], context->state[1], + context->state[2], context->state[3], + context->state[4], context->buffer, context); + i = partLen; + } + + for (; i + 63 < inputLen; i += 64) + SHA1Transform(context->state[0], context->state[1], + context->state[2], context->state[3], + context->state[4], &input[i], context); + + if (inputLen == i) + return; + + index = 0; + } + + /* Buffer remaining input */ + memcpy(&context->buffer[index], &input[i], inputLen - i); +} + + + + +/* + * This is function is only called in from the pagefault path or from page_copy(). + * So we assume that we can safely convert the virtual address to the physical address and use it. + * Assumptions: The passed in address(inpp) is a kernel virtual address + * and a physical page has been faulted in. + * The inputLen passed in should always be less than or equal to a page size (4096) + * and inpp should be on a page boundary. + * "performSHA1WithinKernelOnly" is initialized only when the hardware driver exists and is ready. + */ + + + +/* + * SHA1 finalization. Ends an SHA1 message-digest operation, writing the + * the message digest and zeroizing the context. + */ +void +FIPS_SHA1Final(void *digest, SHA1_CTX *context) +{ + unsigned char bits[8]; + u_int32_t index = (context->bcount[1] >> 3) & 0x3f; + + /* Save number of bits */ + Encode(bits, context->bcount, 8); + + /* Pad out to 56 mod 64. */ + FIPS_SHA1Update(context, PADDING, ((index < 56) ? 56 : 120) - index); + + /* Append length (before padding) */ + FIPS_SHA1Update(context, bits, 8); + + /* Store state in digest */ + Encode(digest, context->state, 20); + + /* Zeroize sensitive information. */ + memset(context, 0, sizeof (*context)); +} + +/* + * SHA1 basic transformation. Transforms state based on block. + */ +static void +SHA1Transform(u_int32_t a, u_int32_t b, u_int32_t c, u_int32_t d, + u_int32_t e, const u_int8_t block[64], SHA1_CTX *context) +{ + /* Register (instead of array) is a win in most cases */ + register u_int32_t w0, w1, w2, w3, w4, w5, w6, w7; + register u_int32_t w8, w9, w10, w11, w12, w13, w14, w15; + + w15 = FETCH_32(block + 60); + w14 = FETCH_32(block + 56); + w13 = FETCH_32(block + 52); + w12 = FETCH_32(block + 48); + w11 = FETCH_32(block + 44); + w10 = FETCH_32(block + 40); + w9 = FETCH_32(block + 36); + w8 = FETCH_32(block + 32); + w7 = FETCH_32(block + 28); + w6 = FETCH_32(block + 24); + w5 = FETCH_32(block + 20); + w4 = FETCH_32(block + 16); + w3 = FETCH_32(block + 12); + w2 = FETCH_32(block + 8); + w1 = FETCH_32(block + 4); + w0 = FETCH_32(block + 0); + + /* Round 1 */ + R1(e, a, b, c, d, w0); /* 0 */ + R1(d, e, a, b, c, w1); /* 1 */ + R1(c, d, e, a, b, w2); /* 2 */ + R1(b, c, d, e, a, w3); /* 3 */ + R1(a, b, c, d, e, w4); /* 4 */ + R1(e, a, b, c, d, w5); /* 5 */ + R1(d, e, a, b, c, w6); /* 6 */ + R1(c, d, e, a, b, w7); /* 7 */ + R1(b, c, d, e, a, w8); /* 8 */ + R1(a, b, c, d, e, w9); /* 9 */ + R1(e, a, b, c, d, w10); /* 10 */ + R1(d, e, a, b, c, w11); /* 11 */ + R1(c, d, e, a, b, w12); /* 12 */ + R1(b, c, d, e, a, w13); /* 13 */ + R1(a, b, c, d, e, w14); /* 14 */ + R1(e, a, b, c, d, w15); /* 15 */ + WUPDATE( w0, w13, w8, w2); R1(d, e, a, b, c, w0); /* 16 */ + WUPDATE( w1, w14, w9, w3); R1(c, d, e, a, b, w1); /* 17 */ + WUPDATE( w2, w15, w10, w4); R1(b, c, d, e, a, w2); /* 18 */ + WUPDATE( w3, w0, w11, w5); R1(a, b, c, d, e, w3); /* 19 */ + + /* Round 2 */ + WUPDATE( w4, w1, w12, w6); R2(e, a, b, c, d, w4); /* 20 */ + WUPDATE( w5, w2, w13, w7); R2(d, e, a, b, c, w5); /* 21 */ + WUPDATE( w6, w3, w14, w8); R2(c, d, e, a, b, w6); /* 22 */ + WUPDATE( w7, w4, w15, w9); R2(b, c, d, e, a, w7); /* 23 */ + WUPDATE( w8, w5, w0, w10); R2(a, b, c, d, e, w8); /* 24 */ + WUPDATE( w9, w6, w1, w11); R2(e, a, b, c, d, w9); /* 25 */ + WUPDATE(w10, w7, w2, w12); R2(d, e, a, b, c, w10); /* 26 */ + WUPDATE(w11, w8, w3, w13); R2(c, d, e, a, b, w11); /* 27 */ + WUPDATE(w12, w9, w4, w14); R2(b, c, d, e, a, w12); /* 28 */ + WUPDATE(w13, w10, w5, w15); R2(a, b, c, d, e, w13); /* 29 */ + WUPDATE(w14, w11, w6, w0); R2(e, a, b, c, d, w14); /* 30 */ + WUPDATE(w15, w12, w7, w1); R2(d, e, a, b, c, w15); /* 31 */ + WUPDATE( w0, w13, w8, w2); R2(c, d, e, a, b, w0); /* 32 */ + WUPDATE( w1, w14, w9, w3); R2(b, c, d, e, a, w1); /* 33 */ + WUPDATE( w2, w15, w10, w4); R2(a, b, c, d, e, w2); /* 34 */ + WUPDATE( w3, w0, w11, w5); R2(e, a, b, c, d, w3); /* 35 */ + WUPDATE( w4, w1, w12, w6); R2(d, e, a, b, c, w4); /* 36 */ + WUPDATE( w5, w2, w13, w7); R2(c, d, e, a, b, w5); /* 37 */ + WUPDATE( w6, w3, w14, w8); R2(b, c, d, e, a, w6); /* 38 */ + WUPDATE( w7, w4, w15, w9); R2(a, b, c, d, e, w7); /* 39 */ + + /* Round 3 */ + WUPDATE( w8, w5, w0, w10); R3(e, a, b, c, d, w8); /* 40 */ + WUPDATE( w9, w6, w1, w11); R3(d, e, a, b, c, w9); /* 41 */ + WUPDATE(w10, w7, w2, w12); R3(c, d, e, a, b, w10); /* 42 */ + WUPDATE(w11, w8, w3, w13); R3(b, c, d, e, a, w11); /* 43 */ + WUPDATE(w12, w9, w4, w14); R3(a, b, c, d, e, w12); /* 44 */ + WUPDATE(w13, w10, w5, w15); R3(e, a, b, c, d, w13); /* 45 */ + WUPDATE(w14, w11, w6, w0); R3(d, e, a, b, c, w14); /* 46 */ + WUPDATE(w15, w12, w7, w1); R3(c, d, e, a, b, w15); /* 47 */ + WUPDATE( w0, w13, w8, w2); R3(b, c, d, e, a, w0); /* 48 */ + WUPDATE( w1, w14, w9, w3); R3(a, b, c, d, e, w1); /* 49 */ + WUPDATE( w2, w15, w10, w4); R3(e, a, b, c, d, w2); /* 50 */ + WUPDATE( w3, w0, w11, w5); R3(d, e, a, b, c, w3); /* 51 */ + WUPDATE( w4, w1, w12, w6); R3(c, d, e, a, b, w4); /* 52 */ + WUPDATE( w5, w2, w13, w7); R3(b, c, d, e, a, w5); /* 53 */ + WUPDATE( w6, w3, w14, w8); R3(a, b, c, d, e, w6); /* 54 */ + WUPDATE( w7, w4, w15, w9); R3(e, a, b, c, d, w7); /* 55 */ + WUPDATE( w8, w5, w0, w10); R3(d, e, a, b, c, w8); /* 56 */ + WUPDATE( w9, w6, w1, w11); R3(c, d, e, a, b, w9); /* 57 */ + WUPDATE(w10, w7, w2, w12); R3(b, c, d, e, a, w10); /* 58 */ + WUPDATE(w11, w8, w3, w13); R3(a, b, c, d, e, w11); /* 59 */ + + WUPDATE(w12, w9, w4, w14); R4(e, a, b, c, d, w12); /* 60 */ + WUPDATE(w13, w10, w5, w15); R4(d, e, a, b, c, w13); /* 61 */ + WUPDATE(w14, w11, w6, w0); R4(c, d, e, a, b, w14); /* 62 */ + WUPDATE(w15, w12, w7, w1); R4(b, c, d, e, a, w15); /* 63 */ + WUPDATE( w0, w13, w8, w2); R4(a, b, c, d, e, w0); /* 64 */ + WUPDATE( w1, w14, w9, w3); R4(e, a, b, c, d, w1); /* 65 */ + WUPDATE( w2, w15, w10, w4); R4(d, e, a, b, c, w2); /* 66 */ + WUPDATE( w3, w0, w11, w5); R4(c, d, e, a, b, w3); /* 67 */ + WUPDATE( w4, w1, w12, w6); R4(b, c, d, e, a, w4); /* 68 */ + WUPDATE( w5, w2, w13, w7); R4(a, b, c, d, e, w5); /* 69 */ + WUPDATE( w6, w3, w14, w8); R4(e, a, b, c, d, w6); /* 70 */ + WUPDATE( w7, w4, w15, w9); R4(d, e, a, b, c, w7); /* 71 */ + WUPDATE( w8, w5, w0, w10); R4(c, d, e, a, b, w8); /* 72 */ + WUPDATE( w9, w6, w1, w11); R4(b, c, d, e, a, w9); /* 73 */ + WUPDATE(w10, w7, w2, w12); R4(a, b, c, d, e, w10); /* 74 */ + WUPDATE(w11, w8, w3, w13); R4(e, a, b, c, d, w11); /* 75 */ + WUPDATE(w12, w9, w4, w14); R4(d, e, a, b, c, w12); /* 76 */ + WUPDATE(w13, w10, w5, w15); R4(c, d, e, a, b, w13); /* 77 */ + WUPDATE(w14, w11, w6, w0); R4(b, c, d, e, a, w14); /* 78 */ + WUPDATE(w15, w12, w7, w1); R4(a, b, c, d, e, w15); /* 79 */ + + context->state[0] += a; + context->state[1] += b; + context->state[2] += c; + context->state[3] += d; + context->state[4] += e; + + /* Zeroize sensitive information. */ + w15 = w14 = w13 = w12 = w11 = w10 = w9 = w8 = 0; + w7 = w6 = w5 = w4 = w3 = w2 = w1 = w0 = 0; +} diff --git a/bsd/dev/random/fips_sha1.h b/bsd/dev/random/fips_sha1.h new file mode 100644 index 000000000..b12684a98 --- /dev/null +++ b/bsd/dev/random/fips_sha1.h @@ -0,0 +1,77 @@ +/* + * Copyright (c) 2000-2009 Apple, Inc. All rights reserved. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. The rights granted to you under the License + * may not be used to create, or enable the creation or redistribution of, + * unlawful or unlicensed copies of an Apple operating system, or to + * circumvent, violate, or enable the circumvention or violation of, any + * terms of an Apple operating system software license agreement. + * + * Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ + */ + +/* + WARNING! WARNING! WARNING! WARNING! WARNING! WARNING! WARNING! WARNING! WARNING! + + THIS FILE IS NEEDED TO PASS FIPS ACCEPTANCE FOR THE RANDOM NUMBER GENERATOR. + IF YOU ALTER IT IN ANY WAY, WE WILL NEED TO GO THOUGH FIPS ACCEPTANCE AGAIN, + AN OPERATION THAT IS VERY EXPENSIVE AND TIME CONSUMING. IN OTHER WORDS, + DON'T MESS WITH THIS FILE. + + WARNING! WARNING! WARNING! WARNING! WARNING! WARNING! WARNING! WARNING! WARNING! +*/ + +#ifndef _CRYPTO_FIPS_SHA1_H_ +#define _CRYPTO_FIPS_SHA1_H_ + +#ifdef __cplusplus +extern "C" { +#endif + +#include + +#define SHA_DIGEST_LENGTH 20 +#define SHA1_RESULTLEN SHA_DIGEST_LENGTH + +typedef struct sha1_ctxt { + union { + u_int8_t b8[20]; + u_int32_t b32[5]; /* state (ABCDE) */ + } h; + union { + u_int8_t b8[8]; + u_int32_t b32[2]; + u_int64_t b64[1]; /* # of bits, modulo 2^64 (msb first) */ + } c; + union { + u_int8_t b8[64]; + u_int32_t b32[16]; /* input buffer */ + } m; + u_int8_t count; /* unused; for compatibility only */ +} SHA1_CTX; + +extern void FIPS_SHA1Init(SHA1_CTX *); +extern void FIPS_SHA1Update(SHA1_CTX *, const void *, size_t); +extern void FIPS_SHA1Final(void *, SHA1_CTX *); + +#ifdef __cplusplus +} +#endif + +#endif /*_CRYPTO_SHA1_H_*/ diff --git a/bsd/dev/random/randomdev.c b/bsd/dev/random/randomdev.c index 9208ff6b6..ab9312f97 100644 --- a/bsd/dev/random/randomdev.c +++ b/bsd/dev/random/randomdev.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 1999-2006 Apple Computer, Inc. All rights reserved. + * Copyright (c) 1999-2009 Apple, Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -26,6 +26,17 @@ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ */ +/* + WARNING! WARNING! WARNING! WARNING! WARNING! WARNING! WARNING! WARNING! WARNING! + + THIS FILE IS NEEDED TO PASS FIPS ACCEPTANCE FOR THE RANDOM NUMBER GENERATOR. + IF YOU ALTER IT IN ANY WAY, WE WILL NEED TO GO THOUGH FIPS ACCEPTANCE AGAIN, + AN OPERATION THAT IS VERY EXPENSIVE AND TIME CONSUMING. IN OTHER WORDS, + DON'T MESS WITH THIS FILE. + + WARNING! WARNING! WARNING! WARNING! WARNING! WARNING! WARNING! WARNING! WARNING! +*/ + #include #include #include @@ -43,11 +54,14 @@ #include #include -#include + +#include #include #include +#include "fips_sha1.h" + #define RANDOM_MAJOR -1 /* let the kernel pick the device number */ d_ioctl_t random_ioctl; @@ -83,17 +97,13 @@ static lck_attr_t *gYarrowAttr; static lck_grp_attr_t *gYarrowGrpAttr; static lck_mtx_t *gYarrowMutex = 0; -void CheckReseed(void); - #define RESEED_TICKS 50 /* how long a reseed operation can take */ -enum {kBSizeInBits = 160}; // MUST be a multiple of 32!!! -enum {kBSizeInBytes = kBSizeInBits / 8}; -typedef u_int32_t BlockWord; -enum {kWordSizeInBits = 32}; -enum {kBSize = 5}; +typedef u_int8_t BlockWord; +enum {kBSize = 20}; typedef BlockWord Block[kBSize]; +enum {kBlockSize = sizeof(Block)}; /* define prototypes to keep the compiler happy... */ @@ -112,26 +122,26 @@ u_int32_t CalculateCRC(u_int8_t* buffer, size_t length); void add_blocks(Block a, Block b, BlockWord carry) { - int i = kBSize; - while (--i >= 0) + int i = kBlockSize - 1; + while (i >= 0) { - u_int64_t c = (u_int64_t)carry + - (u_int64_t)a[i] + - (u_int64_t)b[i]; - a[i] = c & ((1LL << kWordSizeInBits) - 1); - carry = c >> kWordSizeInBits; + u_int32_t c = (u_int32_t)carry + + (u_int32_t)a[i] + + (u_int32_t)b[i]; + a[i] = c & 0xff; + carry = c >> 8; + i -= 1; } } -struct sha1_ctxt g_sha1_ctx; -char zeros[(512 - kBSizeInBits) / 8]; -Block g_xkey; -Block g_random_data; -int g_bytes_used; -unsigned char g_SelfTestInitialized = 0; -u_int32_t gLastBlockChecksum; +static char zeros[(512 - kBSize * 8) / 8]; +static Block g_xkey; +static Block g_random_data; +static int g_bytes_used; +static unsigned char g_SelfTestInitialized = 0; +static u_int32_t gLastBlockChecksum; static const u_int32_t g_crc_table[] = { @@ -196,6 +206,8 @@ u_int32_t CalculateCRC(u_int8_t* buffer, size_t length) void random_block(Block b, int addOptional) { + SHA1_CTX sha1_ctx; + int repeatCount = 0; do { @@ -203,6 +215,7 @@ random_block(Block b, int addOptional) if (addOptional) { + // create an xSeed to add. Block xSeed; prngOutput (gPrngRef, (BYTE*) &xSeed, sizeof (xSeed)); @@ -210,17 +223,33 @@ random_block(Block b, int addOptional) add_blocks (g_xkey, xSeed, 0); } + // initialize the value of H + FIPS_SHA1Init(&sha1_ctx); + + // to stay compatible with the FIPS specification, we need to flip the bytes in + // g_xkey to little endian byte order. In our case, this makes exactly no difference + // (random is random), but we need to do it anyway to keep FIPS happy + // compute "G" - SHA1Update (&g_sha1_ctx, (const u_int8_t *) &g_xkey, sizeof (g_xkey)); + FIPS_SHA1Update(&sha1_ctx, g_xkey, kBlockSize); // add zeros to fill the internal SHA-1 buffer - SHA1Update (&g_sha1_ctx, (const u_int8_t *)zeros, sizeof (zeros)); + FIPS_SHA1Update (&sha1_ctx, (const u_int8_t *)zeros, sizeof (zeros)); + + // we have to do a byte order correction here because the sha1 math is being done internally + // as u_int32_t, not a stream of bytes. Since we maintain our data as a byte stream, we need + // to convert - // write the resulting block - memmove(b, g_sha1_ctx.h.b8, sizeof (Block)); + u_int32_t* finger = (u_int32_t*) b; + + unsigned j; + for (j = 0; j < kBlockSize / sizeof (u_int32_t); ++j) + { + *finger++ = OSSwapHostToBigInt32(sha1_ctx.h.b32[j]); + } // calculate the CRC-32 of the block - u_int32_t new_crc = CalculateCRC(g_sha1_ctx.h.b8, sizeof (Block)); + u_int32_t new_crc = CalculateCRC(sha1_ctx.h.b8, sizeof (Block)); // make sure we don't repeat int cmp = new_crc == gLastBlockChecksum; @@ -276,7 +305,6 @@ PreliminarySetup(void) /* clear the error flag, reads and write should then work */ gRandomError = 0; - { struct timeval tt; char buffer [16]; @@ -302,7 +330,6 @@ PreliminarySetup(void) /* and scramble it some more */ perr = prngForceReseed(gPrngRef, RESEED_TICKS); - } /* make a mutex to control access */ gYarrowGrpAttr = lck_grp_attr_alloc_init(); @@ -313,7 +340,7 @@ PreliminarySetup(void) fips_initialize (); } -const Block kKnownAnswer = {0x92b404e5, 0x56588ced, 0x6c1acd4e, 0xbf053f68, 0x9f73a93}; +const Block kKnownAnswer = {0x92, 0xb4, 0x04, 0xe5, 0x56, 0x58, 0x8c, 0xed, 0x6c, 0x1a, 0xcd, 0x4e, 0xbf, 0x05, 0x3f, 0x68, 0x09, 0xf7, 0x3a, 0x93}; void fips_initialize(void) @@ -321,22 +348,15 @@ fips_initialize(void) /* So that we can do the self test, set the seed to zero */ memset(&g_xkey, 0, sizeof(g_xkey)); - /* initialize our SHA1 generator */ - SHA1Init (&g_sha1_ctx); - /* other initializations */ memset (zeros, 0, sizeof (zeros)); g_bytes_used = 0; random_block(g_random_data, FALSE); // check here to see if we got the initial data we were expecting - int i; - for (i = 0; i < kBSize; ++i) + if (memcmp(kKnownAnswer, g_random_data, kBlockSize) != 0) { - if (kKnownAnswer[i] != g_random_data[i]) - { - panic("FIPS random self test failed"); - } + panic("FIPS random self test failed"); } // now do the random block again to make sure that userland doesn't get predicatable data @@ -358,10 +378,8 @@ random_init(void) /* install us in the file system */ gRandomInstalled = 1; -#ifndef ARM_BOARD_CONFIG_S5L8900XFPGA_1136JFS /* setup yarrow and the mutex */ PreliminarySetup(); -#endif ret = cdevsw_add(RANDOM_MAJOR, &random_cdevsw); if (ret < 0) { @@ -458,7 +476,6 @@ random_write (__unused dev_t dev, struct uio *uio, __unused int ioflag) while (uio_resid(uio) > 0 && retCode == 0) { /* get the user's data */ - // LP64todo - fix this! uio_resid may be 64-bit value int bytesToInput = min(uio_resid(uio), sizeof (rdBuffer)); retCode = uiomove(rdBuffer, bytesToInput, uio); if (retCode != 0) @@ -500,18 +517,17 @@ random_read(__unused dev_t dev, struct uio *uio, __unused int ioflag) /* lock down the mutex */ lck_mtx_lock(gYarrowMutex); - CheckReseed(); int bytes_remaining = uio_resid(uio); while (bytes_remaining > 0 && retCode == 0) { /* get the user's data */ int bytes_to_read = 0; - int bytes_available = kBSizeInBytes - g_bytes_used; + int bytes_available = kBlockSize - g_bytes_used; if (bytes_available == 0) { random_block(g_random_data, TRUE); g_bytes_used = 0; - bytes_available = kBSizeInBytes; + bytes_available = kBlockSize; } bytes_to_read = min (bytes_remaining, bytes_available); @@ -537,24 +553,21 @@ void read_random(void* buffer, u_int numbytes) { if (gYarrowMutex == 0) { /* are we initialized? */ -#ifndef ARM_BOARD_CONFIG_S5L8900XFPGA_1136JFS PreliminarySetup (); -#endif } lck_mtx_lock(gYarrowMutex); - CheckReseed(); int bytes_read = 0; int bytes_remaining = numbytes; while (bytes_remaining > 0) { - int bytes_to_read = min(bytes_remaining, kBSizeInBytes - g_bytes_used); + int bytes_to_read = min(bytes_remaining, kBlockSize - g_bytes_used); if (bytes_to_read == 0) { random_block(g_random_data, TRUE); g_bytes_used = 0; - bytes_to_read = min(bytes_remaining, kBSizeInBytes); + bytes_to_read = min(bytes_remaining, kBlockSize); } memmove ((u_int8_t*) buffer + bytes_read, ((u_int8_t*)g_random_data)+ g_bytes_used, bytes_to_read); @@ -567,17 +580,12 @@ read_random(void* buffer, u_int numbytes) } /* - * Return an unsigned long pseudo-random number. + * Return an u_int32_t pseudo-random number. */ -u_long +u_int32_t RandomULong(void) { - u_long buf; + u_int32_t buf; read_random(&buf, sizeof (buf)); return (buf); } - -void -CheckReseed(void) -{ -} diff --git a/bsd/dev/random/randomdev.h b/bsd/dev/random/randomdev.h index 54e11cfa2..2d3b1a33a 100644 --- a/bsd/dev/random/randomdev.h +++ b/bsd/dev/random/randomdev.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 1999, 2000-2002 Apple Computer, Inc. All rights reserved. + * Copyright (c) 1999, 2000-2002, 2009 Apple, Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -26,6 +26,17 @@ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ */ +/* + WARNING! WARNING! WARNING! WARNING! WARNING! WARNING! WARNING! WARNING! WARNING! + + THIS FILE IS NEEDED TO PASS FIPS ACCEPTANCE FOR THE RANDOM NUMBER GENERATOR. + IF YOU ALTER IT IN ANY WAY, WE WILL NEED TO GO THOUGH FIPS ACCEPTANCE AGAIN, + AN OPERATION THAT IS VERY EXPENSIVE AND TIME CONSUMING. IN OTHER WORDS, + DON'T MESS WITH THIS FILE. + + WARNING! WARNING! WARNING! WARNING! WARNING! WARNING! WARNING! WARNING! WARNING! +*/ + #ifndef __DEV_RANDOMDEV_H__ #define __DEV_RANDOMDEV_H__ @@ -42,7 +53,7 @@ int random_close(dev_t dev, int flags, int mode, struct proc *pp); int random_read(dev_t dev, struct uio *uio, int ioflag); int random_write(dev_t dev, struct uio *uio, int ioflag); -u_long RandomULong( void ); +u_int32_t RandomULong( void ); #endif /* __APPLE_API_PRIVATE */ #endif /* __DEV_RANDOMDEV_H__ */ diff --git a/bsd/dev/unix_startup.c b/bsd/dev/unix_startup.c index 1522646ea..9822fbd88 100644 --- a/bsd/dev/unix_startup.c +++ b/bsd/dev/unix_startup.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2008 Apple Computer, Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -39,26 +39,31 @@ #include #include +#include +#include #include #include #include #include #include #include -#include +#include #include +#include extern vm_map_t mb_map; #if INET || INET6 -extern u_long tcp_sendspace; -extern u_long tcp_recvspace; +extern uint32_t tcp_sendspace; +extern uint32_t tcp_recvspace; #endif void bsd_bufferinit(void) __attribute__((section("__TEXT, initcode"))); extern void md_prepare_for_shutdown(int, int, char *); -int bsd_mbuf_cluster_reserve(void); +unsigned int bsd_mbuf_cluster_reserve(void); +void bsd_srv_setup(int); +void bsd_exec_setup(int); /* * Declare these as initialized data so we can patch them. @@ -86,7 +91,7 @@ static unsigned int mbuf_poolsz; vm_map_t buffer_map; vm_map_t bufferhdr_map; - +static int vnodes_sized = 0; extern void bsd_startupearly(void) __attribute__((section("__TEXT, initcode"))); @@ -99,7 +104,7 @@ bsd_startupearly(void) /* clip the number of buf headers upto 16k */ if (max_nbuf_headers == 0) - max_nbuf_headers = atop(sane_size / 50); /* Get 2% of ram, but no more than we can map */ + max_nbuf_headers = atop_kernel(sane_size / 50); /* Get 2% of ram, but no more than we can map */ if ((customnbuf == 0) && (max_nbuf_headers > 16384)) max_nbuf_headers = 16384; if (max_nbuf_headers < CONFIG_MIN_NBUF) @@ -107,7 +112,7 @@ bsd_startupearly(void) /* clip the number of hash elements to 200000 */ if ( (customnbuf == 0 ) && nbuf_hashelements == 0) { - nbuf_hashelements = atop(sane_size / 50); + nbuf_hashelements = atop_kernel(sane_size / 50); if (nbuf_hashelements > 200000) nbuf_hashelements = 200000; } else @@ -170,6 +175,7 @@ bsd_startupearly(void) } #endif /* SOCKETS */ + if (vnodes_sized == 0) { /* * Size vnodes based on memory * Number vnodes is (memsize/64k) + 1024 @@ -182,6 +188,7 @@ bsd_startupearly(void) desiredvnodes = (sane_size/65536) + 1024; if (desiredvnodes > CONFIG_VNODES) desiredvnodes = CONFIG_VNODES; + } } void @@ -189,7 +196,10 @@ bsd_bufferinit(void) { kern_return_t ret; - cons.t_dev = makedev(12, 0); + /* + * Note: Console device initialized in kminit() from bsd_autoconf() + * prior to call to us in bsd_init(). + */ bsd_startupearly(); @@ -211,9 +221,13 @@ bsd_bufferinit(void) bufinit(); } -/* 512 MB hard limit on size of the mbuf pool */ -#define MAX_MBUF_POOL (512 << MBSHIFT) -#define MAX_NCL (MAX_MBUF_POOL >> MCLSHIFT) +/* 512 MB (K32) or 2 GB (K64) hard limit on size of the mbuf pool */ +#if !defined(__LP64__) +#define MAX_MBUF_POOL (512 << MBSHIFT) +#else +#define MAX_MBUF_POOL (2ULL << GBSHIFT) +#endif /* !__LP64__ */ +#define MAX_NCL (MAX_MBUF_POOL >> MCLSHIFT) /* * this has been broken out into a separate routine that @@ -222,30 +236,90 @@ bsd_bufferinit(void) * DMA hardware that can't fully address all of the physical * memory that is present. */ -int +unsigned int bsd_mbuf_cluster_reserve(void) { + int mbuf_pool = 0; + /* If called more than once, return the previously calculated size */ - if (mbuf_poolsz != 0) - goto done; + if (mbuf_poolsz != 0) + goto done; - PE_parse_boot_argn("ncl", &ncl, sizeof (ncl)); + /* + * Some of these are parsed in parse_bsd_args(), but for x86 we get + * here early from i386_vm_init() and so we parse them now, in order + * to correctly compute the size of the low-memory VM pool. It is + * redundant but rather harmless. + */ + //(void) PE_parse_boot_argn("srv", &srv, sizeof (srv)); + (void) PE_parse_boot_argn("ncl", &ncl, sizeof (ncl)); + (void) PE_parse_boot_argn("mbuf_pool", &mbuf_pool, sizeof (mbuf_pool)); + + /* + * Convert "mbuf_pool" from MB to # of 2KB clusters; it is + * equivalent to "ncl", except that it uses different unit. + */ + if (mbuf_pool != 0) + ncl = (mbuf_pool << MBSHIFT) >> MCLSHIFT; - if (sane_size > (64 * 1024 * 1024) || ncl) { + if (sane_size > (64 * 1024 * 1024) || ncl != 0) { if ((nmbclusters = ncl) == 0) { - if ((nmbclusters = ((sane_size / 16)/MCLBYTES)) > 32768) - nmbclusters = 32768; + /* Auto-configure the mbuf pool size */ + nmbclusters = mbuf_default_ncl(srv, sane_size); + } else { + /* Make sure it's not odd in case ncl is manually set */ + if (nmbclusters & 0x1) + --nmbclusters; + + /* And obey the upper limit */ + if (nmbclusters > MAX_NCL) + nmbclusters = MAX_NCL; } - /* Make sure it's not odd in case ncl is manually set */ - if (nmbclusters & 0x1) - --nmbclusters; - - /* And obey the upper limit */ - if (nmbclusters > MAX_NCL) - nmbclusters = MAX_NCL; - } mbuf_poolsz = nmbclusters << MCLSHIFT; done: - return (nmbclusters * MCLBYTES); + return (mbuf_poolsz); } +#if defined(__LP64__) +extern int tcp_tcbhashsize; +extern int max_cached_sock_count; +void IOSleep(int); +#endif + + +void +bsd_srv_setup(int scale) +{ +#if defined(__LP64__) + /* if memory is more than 16G, then apply rules for processes */ + if (scale > 0) { + maxproc = 2500 * scale; + hard_maxproc = maxproc; + /* no fp usage */ + maxprocperuid = (maxproc*3)/4; + maxfiles = (150000 * scale); + maxfilesperproc = maxfiles/2; + desiredvnodes = maxfiles; + vnodes_sized = 1; + if (scale > 4) { + /* clip them at 32G level */ + somaxconn = 2048; + /* 64G or more the hash size is 32k */ + if (scale > 7) { + /* clip at 64G level */ + tcp_tcbhashsize = 16 *1024; + max_cached_sock_count = 165000; + } else { + tcp_tcbhashsize = 32 *1024; + max_cached_sock_count = 60000 + ((scale-1) * 15000); + } + } else { + somaxconn = 512*scale; + tcp_tcbhashsize = 4*1024*scale; + max_cached_sock_count = 60000 + ((scale-1) * 15000); + } + } +#endif + bsd_exec_setup(scale); +} + diff --git a/bsd/dev/vn/Makefile b/bsd/dev/vn/Makefile index 313fb2a7b..64ae209ac 100644 --- a/bsd/dev/vn/Makefile +++ b/bsd/dev/vn/Makefile @@ -14,12 +14,16 @@ INSTINC_SUBDIRS_PPC = \ INSTINC_SUBDIRS_I386 = \ +INSTINC_SUBDIRS_X86_64 = \ + EXPINC_SUBDIRS = \ EXPINC_SUBDIRS_PPC = \ EXPINC_SUBDIRS_I386 = \ +EXPINC_SUBDIRS_X86_64 = \ + DATAFILES = \ vnioctl.h diff --git a/bsd/dev/vn/shadow.c b/bsd/dev/vn/shadow.c index 52ed5dfac..731f49989 100644 --- a/bsd/dev/vn/shadow.c +++ b/bsd/dev/vn/shadow.c @@ -72,7 +72,7 @@ #include "shadow.h" -#define ULONG_ALL_ONES ((u_long)(-1)) +#define UINT32_ALL_ONES ((uint32_t)(-1)) #define USHORT_ALL_ONES ((u_short)(-1)) #define UCHAR_ALL_ONES ((u_char)(-1)) @@ -87,20 +87,20 @@ typedef u_short band_number_t; #define BAND_MAX ((band_number_t)65535) struct shadow_map { - u_long blocks_per_band;/* size in blocks */ - u_long block_size; + uint32_t blocks_per_band;/* size in blocks */ + uint32_t block_size; u_char * block_bitmap; /* 1 bit per block; 1=written */ band_number_t * bands; /* band map array */ - u_long file_size_blocks; /* size of file in bands */ - u_long shadow_size_bands; /* size of shadow in bands */ - u_long next_band; /* next free band */ - u_long zeroth_band; /* special-case 0th band */ + uint32_t file_size_blocks; /* size of file in bands */ + uint32_t shadow_size_bands; /* size of shadow in bands */ + uint32_t next_band; /* next free band */ + uint32_t zeroth_band; /* special-case 0th band */ }; typedef struct { - u_long byte; - u_long bit; + uint32_t byte; + uint32_t bit; } bitmap_offset_t; static __inline__ u_char @@ -151,7 +151,7 @@ bitmap_offset(off_t where) * units, using longs, then a short, then a byte, then bits. */ static void -bitmap_set(u_char * map, u_long start_bit, u_long bit_count) +bitmap_set(u_char * map, uint32_t start_bit, uint32_t bit_count) { bitmap_offset_t start; bitmap_offset_t end; @@ -159,7 +159,7 @@ bitmap_set(u_char * map, u_long start_bit, u_long bit_count) start = bitmap_offset(start_bit); end = bitmap_offset(start_bit + bit_count); if (start.byte < end.byte) { - u_long n_bytes; + uint32_t n_bytes; if (start.bit) { map[start.byte] |= byte_set_bits(start.bit, NBBY - 1); @@ -171,10 +171,10 @@ bitmap_set(u_char * map, u_long start_bit, u_long bit_count) n_bytes = end.byte - start.byte; - while (n_bytes >= (sizeof(u_long))) { - *((u_long *)(map + start.byte)) = ULONG_ALL_ONES; - start.byte += sizeof(u_long); - n_bytes -= sizeof(u_long); + while (n_bytes >= (sizeof(uint32_t))) { + *((uint32_t *)(map + start.byte)) = UINT32_ALL_ONES; + start.byte += sizeof(uint32_t); + n_bytes -= sizeof(uint32_t); } if (n_bytes >= sizeof(u_short)) { *((u_short *)(map + start.byte)) = USHORT_ALL_ONES; @@ -208,11 +208,11 @@ bitmap_set(u_char * map, u_long start_bit, u_long bit_count) * a byte, then any remaining bits to find the bit that is different. */ -static u_long -bitmap_get(u_char * map, u_long start_bit, u_long bit_count, +static uint32_t +bitmap_get(u_char * map, uint32_t start_bit, uint32_t bit_count, boolean_t * ret_is_set) { - u_long count; + uint32_t count; int i; boolean_t is_set; bitmap_offset_t start; @@ -225,7 +225,7 @@ bitmap_get(u_char * map, u_long start_bit, u_long bit_count, count = 0; if (start.byte < end.byte) { - u_long n_bytes; + uint32_t n_bytes; if (start.bit) { /* try to align to a byte */ for (i = start.bit; i < NBBY; i++) { @@ -246,9 +246,9 @@ bitmap_get(u_char * map, u_long start_bit, u_long bit_count, n_bytes = end.byte - start.byte; /* check for 4 bytes of the same bits */ - while (n_bytes >= sizeof(u_long)) { - u_long * valPtr = (u_long *)(map + start.byte); - if ((is_set && *valPtr == ULONG_ALL_ONES) + while (n_bytes >= sizeof(uint32_t)) { + uint32_t * valPtr = (uint32_t *)(map + start.byte); + if ((is_set && *valPtr == UINT32_ALL_ONES) || (!is_set && *valPtr == 0)) { count += sizeof(*valPtr) * NBBY; start.byte += sizeof(*valPtr); @@ -310,7 +310,7 @@ bitmap_get(u_char * map, u_long start_bit, u_long bit_count, } static __inline__ band_number_t -shadow_map_block_to_band(shadow_map_t * map, unsigned long block) +shadow_map_block_to_band(shadow_map_t * map, uint32_t block) { return (block / map->blocks_per_band); } @@ -361,16 +361,16 @@ shadow_map_mapped_band(shadow_map_t * map, band_number_t band, * * If called with is_write = TRUE, this function will map bands as it goes. */ -static u_long -shadow_map_contiguous(shadow_map_t * map, u_long start_block, - u_long num_blocks, boolean_t is_write) +static uint32_t +shadow_map_contiguous(shadow_map_t * map, uint32_t start_block, + uint32_t num_blocks, boolean_t is_write) { band_number_t band = shadow_map_block_to_band(map, start_block); - u_long end_block = start_block + num_blocks; + uint32_t end_block = start_block + num_blocks; boolean_t is_mapped; band_number_t mapped_band; - u_long ret_end_block = end_block; - u_long p; + uint32_t ret_end_block = end_block; + uint32_t p; is_mapped = shadow_map_mapped_band(map, band, is_write, &mapped_band); if (is_write == FALSE && is_mapped == FALSE) { @@ -419,8 +419,8 @@ shadow_map_contiguous(shadow_map_t * map, u_long start_block, * particularly since most of the bits will be zero. * A sparse bitmap would really help in this case. */ -static __inline__ u_long -block_bitmap_size(off_t file_size, u_long block_size) +static __inline__ uint32_t +block_bitmap_size(off_t file_size, uint32_t block_size) { off_t blocks = howmany(file_size, block_size); return (howmany(blocks, NBBY)); @@ -448,15 +448,15 @@ block_bitmap_size(off_t file_size, u_long block_size) * should be read. */ boolean_t -shadow_map_read(shadow_map_t * map, u_long block_offset, u_long block_count, - u_long * incr_block_offset, u_long * incr_block_count) +shadow_map_read(shadow_map_t * map, uint32_t block_offset, uint32_t block_count, + uint32_t * incr_block_offset, uint32_t * incr_block_count) { boolean_t written = FALSE; - u_long n_blocks; + uint32_t n_blocks; if (block_offset >= map->file_size_blocks || (block_offset + block_count) > map->file_size_blocks) { - printf("shadow_map_read: request (%ld, %ld) exceeds file size %ld\n", + printf("shadow_map_read: request (%d, %d) exceeds file size %d\n", block_offset, block_count, map->file_size_blocks); *incr_block_count = 0; } @@ -468,7 +468,7 @@ shadow_map_read(shadow_map_t * map, u_long block_offset, u_long block_count, } else { /* start has been written, and therefore mapped */ band_number_t mapped_band; - u_long band_limit; + uint32_t band_limit; mapped_band = map->bands[shadow_map_block_to_band(map, block_offset)]; *incr_block_offset = mapped_band * map->blocks_per_band @@ -500,17 +500,17 @@ shadow_map_read(shadow_map_t * map, u_long block_offset, u_long block_count, * TRUE if the shadow file was grown, FALSE otherwise. */ boolean_t -shadow_map_write(shadow_map_t * map, u_long block_offset, - u_long block_count, u_long * incr_block_offset, - u_long * incr_block_count) +shadow_map_write(shadow_map_t * map, uint32_t block_offset, + uint32_t block_count, uint32_t * incr_block_offset, + uint32_t * incr_block_count) { - u_long band_limit; + uint32_t band_limit; band_number_t mapped_band; boolean_t shadow_grew = FALSE; if (block_offset >= map->file_size_blocks || (block_offset + block_count) > map->file_size_blocks) { - printf("shadow_map_write: request (%ld, %ld) exceeds file size %ld\n", + printf("shadow_map_write: request (%d, %d) exceeds file size %d\n", block_offset, block_count, map->file_size_blocks); *incr_block_count = 0; } @@ -532,7 +532,7 @@ shadow_map_write(shadow_map_t * map, u_long block_offset, } boolean_t -shadow_map_is_written(shadow_map_t * map, u_long block_offset) +shadow_map_is_written(shadow_map_t * map, uint32_t block_offset) { bitmap_offset_t b; @@ -546,7 +546,7 @@ shadow_map_is_written(shadow_map_t * map, u_long block_offset) * Purpose: * To return the size of the shadow file in blocks. */ -u_long +uint32_t shadow_map_shadow_size(shadow_map_t * map) { return (map->shadow_size_bands * map->blocks_per_band); @@ -563,13 +563,13 @@ shadow_map_shadow_size(shadow_map_t * map) */ shadow_map_t * shadow_map_create(off_t file_size, off_t shadow_size, - u_long band_size, u_long block_size) + uint32_t band_size, uint32_t block_size) { void * block_bitmap = NULL; - u_long bitmap_size; + uint32_t bitmap_size; band_number_t * bands = NULL; shadow_map_t * map; - u_long n_bands = 0; + uint32_t n_bands = 0; if (band_size == 0) { band_size = BAND_SIZE_DEFAULT; @@ -577,7 +577,7 @@ shadow_map_create(off_t file_size, off_t shadow_size, n_bands = howmany(file_size, band_size); if (n_bands > (BAND_MAX + 1)) { - printf("file is too big: %ld > %d\n", + printf("file is too big: %d > %d\n", n_bands, BAND_MAX); goto failure; } @@ -650,8 +650,8 @@ enum { typedef struct { int type; - u_long offset; - u_long count; + uint32_t offset; + uint32_t count; } block_request_t; int @@ -675,8 +675,8 @@ main() exit(1); } for (i = 0; TRUE; i++) { - u_long offset; - u_long resid; + uint32_t offset; + uint32_t resid; boolean_t shadow_grew; boolean_t read_shadow; @@ -691,8 +691,8 @@ main() switch (requests[i].type) { case WriteRequest: while (resid > 0) { - u_long this_offset; - u_long this_count; + uint32_t this_offset; + uint32_t this_count; shadow_grew = shadow_map_write(map, offset, resid, @@ -710,8 +710,8 @@ main() break; case ReadRequest: while (resid > 0) { - u_long this_offset; - u_long this_count; + uint32_t this_offset; + uint32_t this_count; read_shadow = shadow_map_read(map, offset, resid, diff --git a/bsd/dev/vn/shadow.h b/bsd/dev/vn/shadow.h index 48b2c6b7a..ce2c677dc 100644 --- a/bsd/dev/vn/shadow.h +++ b/bsd/dev/vn/shadow.h @@ -36,20 +36,20 @@ typedef struct shadow_map shadow_map_t; boolean_t -shadow_map_read(shadow_map_t * map, u_long block_offset, u_long block_count, - u_long * incr_block_offset, u_long * incr_block_count); +shadow_map_read(shadow_map_t * map, uint32_t block_offset, uint32_t block_count, + uint32_t * incr_block_offset, uint32_t * incr_block_count); boolean_t -shadow_map_write(shadow_map_t * map, u_long block_offset, u_long block_count, - u_long * incr_block_offset, u_long * incr_block_count); +shadow_map_write(shadow_map_t * map, uint32_t block_offset, uint32_t block_count, + uint32_t * incr_block_offset, uint32_t * incr_block_count); boolean_t -shadow_map_is_written(shadow_map_t * map, u_long block_offset); +shadow_map_is_written(shadow_map_t * map, uint32_t block_offset); -u_long +uint32_t shadow_map_shadow_size(shadow_map_t * map); shadow_map_t * shadow_map_create(off_t file_size, off_t shadow_size, - unsigned long band_size, unsigned long block_size); + uint32_t band_size, uint32_t block_size); void shadow_map_free(shadow_map_t * map); diff --git a/bsd/dev/vn/vn.c b/bsd/dev/vn/vn.c index 4ac024c27..bac913331 100644 --- a/bsd/dev/vn/vn.c +++ b/bsd/dev/vn/vn.c @@ -172,7 +172,7 @@ struct vn_softc { u_int64_t sc_fsize; /* file size in bytes */ u_int64_t sc_size; /* size of vn, sc_secsize scale */ int sc_flags; /* flags */ - u_long sc_secsize; /* sector size */ + u_int32_t sc_secsize; /* sector size */ struct vnode *sc_vp; /* vnode if not NULL */ uint32_t sc_vid; int sc_open_flags; @@ -198,19 +198,19 @@ static u_int32_t vn_options; static int setcred(struct vnode * vp, kauth_cred_t cred); static void vnclear (struct vn_softc *vn, vfs_context_t ctx); -static void vn_ioctl_to_64(struct vn_ioctl *from, struct user_vn_ioctl *to); +static void vn_ioctl_to_64(struct vn_ioctl_32 *from, struct vn_ioctl_64 *to); void vndevice_init(void); int vndevice_root_image(char * path, char devname[], dev_t * dev_p); static int vniocattach_file(struct vn_softc *vn, - struct user_vn_ioctl *vniop, + struct vn_ioctl_64 *vniop, dev_t dev, int in_kernel, proc_t p); static int vniocattach_shadow(struct vn_softc * vn, - struct user_vn_ioctl *vniop, + struct vn_ioctl_64 *vniop, dev_t dev, int in_kernel, proc_t p); @@ -289,7 +289,7 @@ static int vnread_shadow(struct vn_softc * vn, struct uio *uio, int ioflag, vfs_context_t ctx) { - u_long blocksize = vn->sc_secsize; + u_int32_t blocksize = vn->sc_secsize; int error = 0; off_t offset; user_ssize_t resid; @@ -300,9 +300,9 @@ vnread_shadow(struct vn_softc * vn, struct uio *uio, int ioflag, orig_offset = offset = uio_offset(uio); while (resid > 0) { - u_long remainder; - u_long this_block_number; - u_long this_block_count; + u_int32_t remainder; + u_int32_t this_block_number; + u_int32_t this_block_count; off_t this_offset; user_ssize_t this_resid; struct vnode * vp; @@ -348,7 +348,7 @@ vnread_shadow(struct vn_softc * vn, struct uio *uio, int ioflag, static int vncopy_block_to_shadow(struct vn_softc * vn, vfs_context_t ctx, - u_long file_block, u_long shadow_block) + u_int32_t file_block, u_int32_t shadow_block) { int error; char * tmpbuf; @@ -382,7 +382,7 @@ static int vnwrite_shadow(struct vn_softc * vn, struct uio *uio, int ioflag, vfs_context_t ctx) { - u_long blocksize = vn->sc_secsize; + u_int32_t blocksize = vn->sc_secsize; int error = 0; user_ssize_t resid; off_t offset; @@ -392,11 +392,11 @@ vnwrite_shadow(struct vn_softc * vn, struct uio *uio, int ioflag, while (resid > 0) { int flags = 0; - u_long offset_block_number; - u_long remainder; - u_long resid_block_count; - u_long shadow_block_count; - u_long shadow_block_number; + u_int32_t offset_block_number; + u_int32_t remainder; + u_int32_t resid_block_count; + u_int32_t shadow_block_count; + u_int32_t shadow_block_number; user_ssize_t this_resid; /* figure out which blocks to write */ @@ -429,7 +429,7 @@ vnwrite_shadow(struct vn_softc * vn, struct uio *uio, int ioflag, size = (off_t)shadow_map_shadow_size(vn->sc_shadow_map) * vn->sc_secsize; vnode_setsize(vn->sc_shadow_vp, size, IO_SYNC, ctx); -#endif 0 +#endif } /* write the blocks (or parts thereof) */ uio_setoffset(uio, (off_t) @@ -439,8 +439,8 @@ vnwrite_shadow(struct vn_softc * vn, struct uio *uio, int ioflag, this_resid = resid; if ((flags & FLAGS_LAST_BLOCK_PARTIAL) != 0) { /* copy the last block to the shadow */ - u_long d; - u_long s; + u_int32_t d; + u_int32_t s; s = offset_block_number + resid_block_count - 1; @@ -449,7 +449,7 @@ vnwrite_shadow(struct vn_softc * vn, struct uio *uio, int ioflag, error = vncopy_block_to_shadow(vn, ctx, s, d); if (error) { printf("vnwrite_shadow: failed to copy" - " block %lu to shadow block %lu\n", + " block %u to shadow block %u\n", s, d); break; } @@ -463,7 +463,7 @@ vnwrite_shadow(struct vn_softc * vn, struct uio *uio, int ioflag, shadow_block_number); if (error) { printf("vnwrite_shadow: failed to" - " copy block %lu to shadow block %lu\n", + " copy block %u to shadow block %u\n", offset_block_number, shadow_block_number); break; @@ -648,19 +648,19 @@ static int shadow_read(struct vn_softc * vn, struct buf * bp, char * base, vfs_context_t ctx) { - u_long blocksize = vn->sc_secsize; + u_int32_t blocksize = vn->sc_secsize; int error = 0; - u_long offset; + u_int32_t offset; boolean_t read_shadow; - u_long resid; - u_long start = 0; + u_int32_t resid; + u_int32_t start = 0; offset = buf_blkno(bp); resid = buf_resid(bp) / blocksize; while (resid > 0) { user_ssize_t temp_resid; - u_long this_offset; - u_long this_resid; + u_int32_t this_offset; + u_int32_t this_resid; struct vnode * vp; read_shadow = shadow_map_read(vn->sc_shadow_map, @@ -696,19 +696,19 @@ static int shadow_write(struct vn_softc * vn, struct buf * bp, char * base, vfs_context_t ctx) { - u_long blocksize = vn->sc_secsize; + u_int32_t blocksize = vn->sc_secsize; int error = 0; - u_long offset; + u_int32_t offset; boolean_t shadow_grew; - u_long resid; - u_long start = 0; + u_int32_t resid; + u_int32_t start = 0; offset = buf_blkno(bp); resid = buf_resid(bp) / blocksize; while (resid > 0) { user_ssize_t temp_resid; - u_long this_offset; - u_long this_resid; + u_int32_t this_offset; + u_int32_t this_resid; shadow_grew = shadow_map_write(vn->sc_shadow_map, offset, resid, @@ -874,13 +874,13 @@ vnioctl(dev_t dev, u_long cmd, caddr_t data, int is_char) { struct vn_softc *vn; - struct user_vn_ioctl *viop; + struct vn_ioctl_64 *viop; int error; u_int32_t *f; u_int64_t * o; int unit; struct vfsioattr ioattr; - struct user_vn_ioctl user_vnio; + struct vn_ioctl_64 user_vnio; boolean_t funnel_state; struct vfs_context context; @@ -899,12 +899,17 @@ vnioctl(dev_t dev, u_long cmd, caddr_t data, context.vc_thread = current_thread(); context.vc_ucred = vn->sc_cred; - viop = (struct user_vn_ioctl *)data; + viop = (struct vn_ioctl_64 *)data; f = (u_int32_t *)data; o = (u_int64_t *)data; switch (cmd) { +#ifdef __LP64__ + case VNIOCDETACH32: + case VNIOCDETACH: +#else case VNIOCDETACH: case VNIOCDETACH64: +#endif case DKIOCGETBLOCKSIZE: case DKIOCSETBLOCKSIZE: case DKIOCGETMAXBLOCKCOUNTREAD: @@ -991,8 +996,13 @@ vnioctl(dev_t dev, u_long cmd, caddr_t data, case DKIOCGETBLOCKCOUNT: *o = vn->sc_size; break; +#ifdef __LP64__ + case VNIOCSHADOW32: + case VNIOCSHADOW: +#else case VNIOCSHADOW: case VNIOCSHADOW64: +#endif if (vn->sc_shadow_vp != NULL) { error = EBUSY; break; @@ -1004,7 +1014,7 @@ vnioctl(dev_t dev, u_long cmd, caddr_t data, } if (!proc_is64bit(p)) { /* downstream code expects LP64 version of vn_ioctl structure */ - vn_ioctl_to_64((struct vn_ioctl *)viop, &user_vnio); + vn_ioctl_to_64((struct vn_ioctl_32 *)viop, &user_vnio); viop = &user_vnio; } if (viop->vn_file == USER_ADDR_NULL) { @@ -1014,8 +1024,13 @@ vnioctl(dev_t dev, u_long cmd, caddr_t data, error = vniocattach_shadow(vn, viop, dev, 0, p); break; +#ifdef __LP64__ + case VNIOCATTACH32: + case VNIOCATTACH: +#else case VNIOCATTACH: case VNIOCATTACH64: +#endif if (is_char) { /* attach only on block device */ error = ENODEV; @@ -1027,7 +1042,7 @@ vnioctl(dev_t dev, u_long cmd, caddr_t data, } if (!proc_is64bit(p)) { /* downstream code expects LP64 version of vn_ioctl structure */ - vn_ioctl_to_64((struct vn_ioctl *)viop, &user_vnio); + vn_ioctl_to_64((struct vn_ioctl_32 *)viop, &user_vnio); viop = &user_vnio; } if (viop->vn_file == USER_ADDR_NULL) { @@ -1037,8 +1052,13 @@ vnioctl(dev_t dev, u_long cmd, caddr_t data, error = vniocattach_file(vn, viop, dev, 0, p); break; +#ifdef __LP64__ + case VNIOCDETACH32: + case VNIOCDETACH: +#else case VNIOCDETACH: case VNIOCDETACH64: +#endif if (is_char) { /* detach only on block device */ error = ENODEV; @@ -1107,7 +1127,7 @@ vnioctl_blk(dev_t dev, u_long cmd, caddr_t data, int flag, proc_t p) static int vniocattach_file(struct vn_softc *vn, - struct user_vn_ioctl *vniop, + struct vn_ioctl_64 *vniop, dev_t dev, int in_kernel, proc_t p) @@ -1121,7 +1141,7 @@ vniocattach_file(struct vn_softc *vn, flags = FREAD|FWRITE; if (in_kernel) { - NDINIT(&nd, LOOKUP, FOLLOW, UIO_SYSSPACE32, vniop->vn_file, ctx); + NDINIT(&nd, LOOKUP, FOLLOW, UIO_SYSSPACE, vniop->vn_file, ctx); } else { NDINIT(&nd, LOOKUP, FOLLOW, @@ -1136,7 +1156,7 @@ vniocattach_file(struct vn_softc *vn, } flags &= ~FWRITE; if (in_kernel) { - NDINIT(&nd, LOOKUP, FOLLOW, UIO_SYSSPACE32, + NDINIT(&nd, LOOKUP, FOLLOW, UIO_SYSSPACE, vniop->vn_file, ctx); } else { @@ -1190,8 +1210,8 @@ vniocattach_file(struct vn_softc *vn, } static int -vniocattach_shadow(struct vn_softc *vn, struct user_vn_ioctl *vniop, - __unused int dev, int in_kernel, proc_t p) +vniocattach_shadow(struct vn_softc *vn, struct vn_ioctl_64 *vniop, + __unused dev_t dev, int in_kernel, proc_t p) { vfs_context_t ctx = vfs_context_current(); struct nameidata nd; @@ -1201,7 +1221,7 @@ vniocattach_shadow(struct vn_softc *vn, struct user_vn_ioctl *vniop, flags = FREAD|FWRITE; if (in_kernel) { - NDINIT(&nd, LOOKUP, FOLLOW, UIO_SYSSPACE32, vniop->vn_file, ctx); + NDINIT(&nd, LOOKUP, FOLLOW, UIO_SYSSPACE, vniop->vn_file, ctx); } else { NDINIT(&nd, LOOKUP, FOLLOW, @@ -1244,7 +1264,7 @@ vndevice_root_image(char * path, char devname[], dev_t * dev_p) { int error = 0; struct vn_softc * vn; - struct user_vn_ioctl vnio; + struct vn_ioctl_64 vnio; vnio.vn_file = CAST_USER_ADDR_T(path); vnio.vn_size = 0; @@ -1377,7 +1397,7 @@ vndevice_do_init( void ) } static void -vn_ioctl_to_64(struct vn_ioctl *from, struct user_vn_ioctl *to) +vn_ioctl_to_64(struct vn_ioctl_32 *from, struct vn_ioctl_64 *to) { to->vn_file = CAST_USER_ADDR_T(from->vn_file); to->vn_size = from->vn_size; diff --git a/bsd/dev/x86_64/munge.s b/bsd/dev/x86_64/munge.s new file mode 100644 index 000000000..ec5b6123b --- /dev/null +++ b/bsd/dev/x86_64/munge.s @@ -0,0 +1,243 @@ +/* + * Coyright (c) 2005-2008 Apple Computer, Inc. All rights reserved. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. The rights granted to you under the License + * may not be used to create, or enable the creation or redistribution of, + * unlawful or unlicensed copies of an Apple operating system, or to + * circumvent, violate, or enable the circumvention or violation of, any + * terms of an Apple operating system software license agreement. + * + * Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ + */ + +/* + * Syscall argument mungers. + * + * The data to be munged has been explicitly copied in to the argument area, + * and will be munged in place in the uu_arg[] array. Because of this, the + * functions all take the same arguments as their PPC equivalents, but the + * first argument is ignored. These mungers are for 32-bit app's syscalls, + * since 64-bit args are stored into the save area (which overlays the + * uu_args) in the order the syscall ABI calls for. + * + * The issue is that the incoming args are 32-bit, but we must expand + * them in place into 64-bit args, as if they were from a 64-bit process. + * + * There are several functions in this file. Each takes two parameters: + * + * void munge_XXXX(const void *regs, // %rdi + * void *uu_args); // %rsi + * + * The name of the function encodes the number and type of the parameters, + * as follows: + * + * w = a 32-bit value such as an int or a 32-bit ptr, that does not + * require sign extension. These are handled by zeroing a word + * of output, and copying a word from input to output. + * + * s = a 32-bit value such as a long, which must be sign-extended to + * a 64-bit long-long in the uu_args. These are handled by + * loading a word of input and sign extending it to a double, + * and storing two words of output. + * + * l = a 64-bit long-long. These are handled by copying two words + * of input to the output. + * + * For example, "munge_wls" takes a word, a long-long, and a word. This + * takes four words in the uu_arg[] area: the first word is in one, the + * long-long takes two, and the final word is in the fourth. We store six + * words: the low word is left in place, followed by a 0, followed by the + * two words of the long-long, followed by the low word and the sign extended + * high word of the preceeding low word. + * + * Because this is an in-place modification, we actually start at the end + * of uu_arg[] and work our way back to the beginning of the array. + * + * As you can see, we save a lot of code by collapsing mungers that are + * prefixes or suffixes of each other. + */ +#include + +ENTRY(munge_w) + movl $0,4(%rsi) + ret + +ENTRY(munge_ww) + xorl %edx,%edx + jmp Lw2 +ENTRY(munge_www) + xorl %edx,%edx + jmp Lw3 +ENTRY(munge_wwww) + xorl %edx,%edx + jmp Lw4 +ENTRY(munge_wwwww) + xorl %edx,%edx + jmp Lw5 +ENTRY(munge_wwwwww) + xorl %edx,%edx + jmp Lw6 +ENTRY(munge_wwwwwww) + xorl %edx,%edx + jmp Lw7 +ENTRY(munge_wwwwwwww) + xorl %edx,%edx + movl 28(%rsi),%eax + movl %eax,56(%rsi) + movl %edx,60(%rsi) +Lw7: + movl 24(%rsi),%eax + movl %eax,48(%rsi) + movl %edx,52(%rsi) +Lw6: + movl 20(%rsi),%eax + movl %eax,40(%rsi) + movl %edx,44(%rsi) +Lw5: + movl 16(%rsi),%eax + movl %eax,32(%rsi) + movl %edx,36(%rsi) +Lw4: + movl 12(%rsi),%eax + movl %eax,24(%rsi) + movl %edx,28(%rsi) +Lw3: + movl 8(%rsi),%eax + movl %eax,16(%rsi) + movl %edx,20(%rsi) +Lw2: + movl 4(%rsi),%eax + movl %eax,8(%rsi) + movl %edx,12(%rsi) + movl %edx,4(%rsi) + ret + + +Entry(munge_wl) /* Costs an extra w move to do this */ +ENTRY(munge_wlw) + xorl %edx,%edx + movl 12(%rsi),%eax + movl %eax,16(%rsi) + movl %edx,20(%rsi) + movl 8(%rsi),%eax + movl %eax,12(%rsi) + movl 4(%rsi),%eax + movl %eax,8(%rsi) + movl %edx,4(%rsi) + ret + +Entry(munge_wwwlw) + xorl %edx,%edx + movl 20(%rsi),%eax + movl %eax,32(%rsi) + movl %edx,36(%rsi) + jmp Lwwwl + + +ENTRY(munge_wwwl) + xorl %edx,%edx +Lwwwl: + movl 12(%rsi),%eax + movl %eax,24(%rsi) + movl 16(%rsi),%eax + movl %eax,28(%rsi) + jmp Lw3 + +ENTRY(munge_wwwwlw) + xorl %edx,%edx + movl 24(%rsi),%eax + movl %eax,40(%rsi) + movl %edx,44(%rsi) + jmp Lwwwwl + +ENTRY(munge_wwwwl) + xorl %edx,%edx +Lwwwwl: + movl 16(%rsi),%eax + movl %eax,32(%rsi) + movl 20(%rsi),%eax + movl %eax,36(%rsi) + jmp Lw4 + +ENTRY(munge_wwwwwl) + xorl %edx,%edx + movl 20(%rsi),%eax + movl %eax,40(%rsi) + movl 24(%rsi),%eax + movl %eax,44(%rsi) + jmp Lw5 + +ENTRY(munge_wwwwwwlw) + xorl %edx,%edx + movl 32(%rsi),%eax + movl %eax,56(%rsi) + movl %edx,60(%rsi) + movl 24(%rsi),%eax + movl %eax,48(%rsi) + movl 28(%rsi),%eax + movl %eax,52(%rsi) + jmp Lw6 + +ENTRY(munge_wwwwwwll) + xorl %edx,%edx + movl 32(%rsi),%eax + movl %eax,56(%rsi) + movl 36(%rsi),%eax + movl %eax,60(%rsi) + movl 24(%rsi),%eax + movl %eax,48(%rsi) + movl 28(%rsi),%eax + movl %eax,52(%rsi) + jmp Lw6 + +ENTRY(munge_wsw) + movl 8(%rsi),%eax + movl %eax,16(%rsi) + movl $0,20(%rsi) + movl 4(%rsi),%eax + cltd + movl %eax,8(%rsi) + movl %edx,12(%rsi) + movl $0,4(%rsi) + ret + +ENTRY(munge_wws) + movl 8(%rsi),%eax + cltd + movl %eax,16(%rsi) + movl %edx,20(%rsi) + xorl %edx,%edx + jmp Lw2 + +ENTRY(munge_wwwsw) + movl 16(%rsi),%eax + movl %eax,32(%rsi) + movl $0,36(%rsi) + movl 12(%rsi),%eax + cltd + movl %eax,24(%rsi) + movl %edx,28(%rsi) + xorl %edx,%edx + jmp Lw3 + +ENTRY(munge_llllll) + ret // nothing to do here, either - all args are already + // 64-bit and do not require sign/zero extension + // also, there is no mixing in of shorter args that + // do need extension diff --git a/bsd/hfs/MacOSStubs.c b/bsd/hfs/MacOSStubs.c index 43459f1ae..0a1bded31 100644 --- a/bsd/hfs/MacOSStubs.c +++ b/bsd/hfs/MacOSStubs.c @@ -159,14 +159,10 @@ void DisposePtr (Ptr p) void DebugStr( -#if CONFIG_NO_KPRINTF_STRINGS - __unused ConstStr255Param debuggerMsg -#else - ConstStr255Param debuggerMsg -#endif + const char * debuggerMsg ) { - kprintf ("*** Mac OS Debugging Message: %s\n", &debuggerMsg[1]); + kprintf ("*** Mac OS Debugging Message: %s\n", debuggerMsg); DEBUG_BREAK; } diff --git a/bsd/hfs/Makefile b/bsd/hfs/Makefile index cdc1fb8ba..814b9184d 100644 --- a/bsd/hfs/Makefile +++ b/bsd/hfs/Makefile @@ -13,12 +13,16 @@ INSTINC_SUBDIRS_PPC = \ INSTINC_SUBDIRS_I386 = \ +INSTINC_SUBDIRS_X86_64 = \ + EXPINC_SUBDIRS = \ EXPINC_SUBDIRS_PPC = \ EXPINC_SUBDIRS_I386 = \ +EXPINC_SUBDIRS_X86_64 = \ + DATAFILES = \ hfs_encodings.h hfs_format.h hfs_mount.h diff --git a/bsd/hfs/hfs.h b/bsd/hfs/hfs.h index 5bb4ec920..67ecb6d1e 100644 --- a/bsd/hfs/hfs.h +++ b/bsd/hfs/hfs.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2007 Apple Inc. All rights reserved. + * Copyright (c) 2000-2009 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -31,7 +31,7 @@ #define HFS_SPARSE_DEV 1 -#ifdef DEBUG +#if DEBUG #define HFS_CHECK_LOCK_ORDER 1 #endif @@ -108,15 +108,19 @@ extern struct timezone gTimeZone; * volume size, all capped at a certain fixed level */ -#define HFS_ROOTLOWDISKTRIGGERFRACTION 5 +#define HFS_ROOTVERYLOWDISKTRIGGERFRACTION 5 +#define HFS_ROOTVERYLOWDISKTRIGGERLEVEL ((u_int64_t)(125*1024*1024)) +#define HFS_ROOTLOWDISKTRIGGERFRACTION 10 #define HFS_ROOTLOWDISKTRIGGERLEVEL ((u_int64_t)(250*1024*1024)) -#define HFS_ROOTLOWDISKSHUTOFFFRACTION 6 +#define HFS_ROOTLOWDISKSHUTOFFFRACTION 11 #define HFS_ROOTLOWDISKSHUTOFFLEVEL ((u_int64_t)(375*1024*1024)) -#define HFS_LOWDISKTRIGGERFRACTION 1 -#define HFS_LOWDISKTRIGGERLEVEL ((u_int64_t)(50*1024*1024)) -#define HFS_LOWDISKSHUTOFFFRACTION 2 -#define HFS_LOWDISKSHUTOFFLEVEL ((u_int64_t)(75*1024*1024)) +#define HFS_VERYLOWDISKTRIGGERFRACTION 1 +#define HFS_VERYLOWDISKTRIGGERLEVEL ((u_int64_t)(100*1024*1024)) +#define HFS_LOWDISKTRIGGERFRACTION 2 +#define HFS_LOWDISKTRIGGERLEVEL ((u_int64_t)(150*1024*1024)) +#define HFS_LOWDISKSHUTOFFFRACTION 3 +#define HFS_LOWDISKSHUTOFFLEVEL ((u_int64_t)(200*1024*1024)) /* Internal Data structures*/ @@ -153,7 +157,7 @@ typedef struct hfsmount { gid_t hfs_gid; /* gid to set as owner of the files */ mode_t hfs_dir_mask; /* mask to and with directory protection bits */ mode_t hfs_file_mask; /* mask to and with file protection bits */ - u_long hfs_encoding; /* Default encoding for non hfs+ volumes */ + u_int32_t hfs_encoding; /* Default encoding for non hfs+ volumes */ /* Persistent fields (on disk, dynamic) */ time_t hfs_mtime; /* file system last modification time */ @@ -161,6 +165,7 @@ typedef struct hfsmount { u_int32_t hfs_dircount; /* number of directories in file system */ u_int32_t freeBlocks; /* free allocation blocks */ u_int32_t nextAllocation; /* start of next allocation search */ + u_int32_t sparseAllocation; /* start of allocations for sparse devices */ u_int32_t vcbNxtCNID; /* next unused catalog node ID - protected by catalog lock */ u_int32_t vcbWrCnt; /* file system write count */ u_int64_t encodingsBitmap; /* in-use encodings */ @@ -230,7 +235,8 @@ typedef struct hfsmount { u_int32_t hfs_global_lock_nesting; /* Notification variables: */ - unsigned long hfs_notification_conditions; + u_int32_t hfs_notification_conditions; + u_int32_t hfs_freespace_notify_dangerlimit; u_int32_t hfs_freespace_notify_warninglimit; u_int32_t hfs_freespace_notify_desiredlevel; @@ -243,6 +249,8 @@ typedef struct hfsmount { u_int32_t hfs_metazone_end; u_int32_t hfs_hotfile_start; u_int32_t hfs_hotfile_end; + u_int32_t hfs_min_alloc_start; + u_int32_t hfs_freed_block_count; int hfs_hotfile_freeblks; int hfs_hotfile_maxblks; int hfs_overflow_maxblks; @@ -262,6 +270,7 @@ typedef struct hfsmount { struct vnode * hfs_backingfs_rootvp; u_int32_t hfs_last_backingstatfs; int hfs_sparsebandblks; + u_int64_t hfs_backingfs_maxblocks; #endif size_t hfs_max_inline_attrsize; @@ -274,6 +283,12 @@ typedef struct hfsmount { u_int32_t hfs_resize_filesmoved; u_int32_t hfs_resize_totalfiles; + /* Per mount cnode hash variables: */ + lck_mtx_t hfs_chash_mutex; /* protects access to cnode hash table */ + u_long hfs_cnodehash; /* size of cnode hash table - 1 */ + LIST_HEAD(cnodehashhead, cnode) *hfs_cnodehashtbl; /* base of cnode hash */ + + /* * About the sync counters: * hfs_sync_scheduled keeps track whether a timer was scheduled but we @@ -293,7 +308,9 @@ typedef struct hfsmount { int32_t hfs_sync_incomplete; u_int64_t hfs_last_sync_request_time; u_int64_t hfs_last_sync_time; - uint32_t hfs_active_threads; + u_int32_t hfs_active_threads; + u_int64_t hfs_max_pending_io; + thread_call_t hfs_syncer; // removeable devices get sync'ed by this guy } hfsmount_t; @@ -370,8 +387,10 @@ enum privdirtype {FILE_HARDLINKS, DIR_HARDLINKS}; /* When set, we're in hfs_changefs, so hfs_sync should do nothing. */ #define HFS_IN_CHANGEFS 0x40000 /* When set, we are in process of downgrading or have downgraded to read-only, - * so hfs_start_transaction should return EROFS. */ + * so hfs_start_transaction should return EROFS. + */ #define HFS_RDONLY_DOWNGRADE 0x80000 +#define HFS_DID_CONTIG_SCAN 0x100000 /* Macro to update next allocation block in the HFS mount structure. If @@ -477,9 +496,6 @@ enum { kHFSPlusMaxFileNameBytes = kHFSPlusMaxFileNameChars * 3 }; #define FCBTOVCB(FCB) FCBTOHFS(FCB) -#define HFS_KNOTE(vp, hint) KNOTE(&VTOC(vp)->c_knotes, (hint)) - - #define E_NONE 0 #define kHFSBlockSize 512 @@ -614,7 +630,7 @@ void hfs_generate_volume_notifications(struct hfsmount *hfsmp); ******************************************************************************/ extern int hfs_relocate(struct vnode *, u_int32_t, kauth_cred_t, struct proc *); -extern int hfs_truncate(struct vnode *, off_t, int, int, vfs_context_t); +extern int hfs_truncate(struct vnode *, off_t, int, int, int, vfs_context_t); extern int hfs_bmap(struct vnode *, daddr_t, struct vnode **, daddr64_t *, unsigned int *); @@ -628,7 +644,7 @@ extern int hfs_set_volxattr(struct hfsmount *hfsmp, unsigned int xattrtype, int extern void hfs_check_volxattr(struct hfsmount *hfsmp, unsigned int xattrtype); -extern int hfs_isallocated(struct hfsmount *, u_long, u_long); +extern int hfs_isallocated(struct hfsmount *, u_int32_t, u_int32_t); /***************************************************************************** @@ -661,9 +677,9 @@ void hfs_mark_volume_inconsistent(struct hfsmount *hfsmp); /***************************************************************************** Functions from hfs_vfsutils.c ******************************************************************************/ -unsigned long BestBlockSizeFit(unsigned long allocationBlockSize, - unsigned long blockSizeLimit, - unsigned long baseMultiple); +u_int32_t BestBlockSizeFit(u_int32_t allocationBlockSize, + u_int32_t blockSizeLimit, + u_int32_t baseMultiple); OSErr hfs_MountHFSVolume(struct hfsmount *hfsmp, HFSMasterDirectoryBlock *mdb, struct proc *p); @@ -689,7 +705,7 @@ extern int hfs_owner_rights(struct hfsmount *hfsmp, uid_t cnode_uid, kauth_cred_ extern int hfs_systemfile_lock(struct hfsmount *, int, enum hfslocktype); extern void hfs_systemfile_unlock(struct hfsmount *, int); -extern u_long GetFileInfo(ExtendedVCB *vcb, u_int32_t dirid, const char *name, +extern u_int32_t GetFileInfo(ExtendedVCB *vcb, u_int32_t dirid, const char *name, struct cat_attr *fattr, struct cat_fork *forkinfo); extern void hfs_remove_orphans(struct hfsmount *); @@ -716,8 +732,12 @@ extern int hfs_virtualmetafile(struct cnode *); extern int hfs_start_transaction(struct hfsmount *hfsmp); extern int hfs_end_transaction(struct hfsmount *hfsmp); +extern int hfs_journal_flush(struct hfsmount *hfsmp); extern void hfs_sync_ejectable(struct hfsmount *hfsmp); +/* Erase unused Catalog nodes due to . */ +extern int hfs_erase_unused_nodes(struct hfsmount *hfsmp); + /***************************************************************************** Functions from hfs_vnops.c diff --git a/bsd/hfs/hfs_attrlist.c b/bsd/hfs/hfs_attrlist.c index a3e9c55b7..dc24f99a7 100644 --- a/bsd/hfs/hfs_attrlist.c +++ b/bsd/hfs/hfs_attrlist.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2007 Apple Inc. All rights reserved. + * Copyright (c) 2000-2008 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -58,11 +58,11 @@ static void packnameattr(struct attrblock *abp, struct vnode *vp, static void packcommonattr(struct attrblock *abp, struct hfsmount *hfsmp, struct vnode *vp, struct cat_desc * cdp, - struct cat_attr * cap, struct proc *p); + struct cat_attr * cap, struct vfs_context *ctx); static void packfileattr(struct attrblock *abp, struct hfsmount *hfsmp, struct cat_attr *cattrp, struct cat_fork *datafork, - struct cat_fork *rsrcfork); + struct cat_fork *rsrcfork, struct vnode *vp); static void packdirattr(struct attrblock *abp, struct hfsmount *hfsmp, struct vnode *vp, struct cat_desc * descp, @@ -101,7 +101,6 @@ hfs_vnop_readdirattr(ap) struct attrlist *alist = ap->a_alist; uio_t uio = ap->a_uio; int maxcount = ap->a_maxcount; - struct proc *p = vfs_context_proc(ap->a_context); u_int32_t fixedblocksize; u_int32_t maxattrblocksize; u_int32_t currattrbufsize; @@ -118,6 +117,7 @@ hfs_vnop_readdirattr(ap) unsigned int tag; int maxentries; int lockflags; + u_int32_t dirchg = 0; *(ap->a_actualcount) = 0; *(ap->a_eofflag) = 0; @@ -148,6 +148,7 @@ hfs_vnop_readdirattr(ap) hfsmp = VTOHFS(dvp); dir_entries = dcp->c_entries; + dirchg = dcp->c_dirchangecnt; /* Extract directory index and tag (sequence number) from uio_offset */ index = uio_offset(uio) & HFS_INDEX_MASK; @@ -164,6 +165,10 @@ hfs_vnop_readdirattr(ap) if (alist->commonattr & ATTR_CMN_NAME) maxattrblocksize += kHFSPlusMaxFileNameBytes + 1; MALLOC(attrbufptr, void *, maxattrblocksize, M_TEMP, M_WAITOK); + if (attrbufptr == NULL) { + error = ENOMEM; + goto exit2; + } attrptr = attrbufptr; varptr = (char *)attrbufptr + fixedblocksize; /* Point to variable-length storage */ @@ -185,7 +190,6 @@ hfs_vnop_readdirattr(ap) * Constrain our list size. */ maxentries = uio_resid(uio) / (fixedblocksize + HFS_AVERAGE_NAME_SIZE); - maxentries = min(maxentries, dcp->c_entries - index); maxentries = min(maxentries, maxcount); maxentries = min(maxentries, MAXCATENTRIES); if (maxentries < 1) { @@ -195,6 +199,10 @@ hfs_vnop_readdirattr(ap) /* Initialize a catalog entry list. */ MALLOC(ce_list, struct cat_entrylist *, CE_LIST_SIZE(maxentries), M_TEMP, M_WAITOK); + if (ce_list == NULL) { + error = ENOMEM; + goto exit2; + } bzero(ce_list, CE_LIST_SIZE(maxentries)); ce_list->maxentries = maxentries; @@ -253,7 +261,7 @@ hfs_vnop_readdirattr(ap) } } else if (!(ap->a_options & FSOPT_NOINMEMUPDATE)) { /* Get in-memory cnode data (if any). */ - vp = hfs_chash_getvnode(hfsmp->hfs_raw_dev, cattrp->ca_fileid, 0, 0); + vp = hfs_chash_getvnode(hfsmp, cattrp->ca_fileid, 0, 0); } if (vp != NULL) { cp = VTOC(vp); @@ -284,7 +292,7 @@ hfs_vnop_readdirattr(ap) attrblk.ab_context = ap->a_context; /* Pack catalog entries into attribute buffer. */ - hfs_packattrblk(&attrblk, hfsmp, vp, cdescp, cattrp, &c_datafork, &c_rsrcfork, p); + hfs_packattrblk(&attrblk, hfsmp, vp, cdescp, cattrp, &c_datafork, &c_rsrcfork, ap->a_context); currattrbufsize = ((char *)varptr - (char *)attrbufptr); /* All done with vnode. */ @@ -315,8 +323,7 @@ hfs_vnop_readdirattr(ap) if ((--maxcount <= 0) || // LP64todo - fix this! uio_resid(uio) < 0 || - ((u_int32_t)uio_resid(uio) < (fixedblocksize + HFS_AVERAGE_NAME_SIZE)) || - (index >= dir_entries)) { + ((u_int32_t)uio_resid(uio) < (fixedblocksize + HFS_AVERAGE_NAME_SIZE))){ break; } } @@ -374,7 +381,7 @@ hfs_vnop_readdirattr(ap) dirhint->dh_index |= tag; exit2: - *ap->a_newstate = dcp->c_dirchangecnt; + *ap->a_newstate = dirchg; /* Drop directory hint on error or if there are no more entries */ if (dirhint) { @@ -407,18 +414,18 @@ hfs_packattrblk(struct attrblock *abp, struct cat_attr *attrp, struct cat_fork *datafork, struct cat_fork *rsrcfork, - struct proc *p) + struct vfs_context *ctx) { struct attrlist *attrlistp = abp->ab_attrlist; if (attrlistp->commonattr) - packcommonattr(abp, hfsmp, vp, descp, attrp, p); + packcommonattr(abp, hfsmp, vp, descp, attrp, ctx); if (attrlistp->dirattr && S_ISDIR(attrp->ca_mode)) packdirattr(abp, hfsmp, vp, descp,attrp); if (attrlistp->fileattr && !S_ISDIR(attrp->ca_mode)) - packfileattr(abp, hfsmp, attrp, datafork, rsrcfork); + packfileattr(abp, hfsmp, attrp, datafork, rsrcfork, vp); } @@ -515,18 +522,18 @@ packcommonattr( struct vnode *vp, struct cat_desc * cdp, struct cat_attr * cap, - struct proc *p) + struct vfs_context * ctx) { attrgroup_t attr = abp->ab_attrlist->commonattr; struct mount *mp = HFSTOVFS(hfsmp); void *attrbufptr = *abp->ab_attrbufpp; void *varbufptr = *abp->ab_varbufpp; - boolean_t is_64_bit = proc_is64bit(p); + boolean_t is_64_bit = proc_is64bit(vfs_context_proc(ctx)); uid_t cuid = 1; int isroot = 0; if (attr & (ATTR_CMN_OWNERID | ATTR_CMN_GRPID)) { - cuid = kauth_cred_getuid(proc_ucred(p)); + cuid = kauth_cred_getuid(vfs_context_ucred(ctx)); isroot = cuid == 0; } @@ -588,62 +595,62 @@ packcommonattr( } if (ATTR_CMN_CRTIME & attr) { if (is_64_bit) { - ((struct user_timespec *)attrbufptr)->tv_sec = cap->ca_itime; - ((struct user_timespec *)attrbufptr)->tv_nsec = 0; - attrbufptr = ((struct user_timespec *)attrbufptr) + 1; + ((struct user64_timespec *)attrbufptr)->tv_sec = cap->ca_itime; + ((struct user64_timespec *)attrbufptr)->tv_nsec = 0; + attrbufptr = ((struct user64_timespec *)attrbufptr) + 1; } else { - ((struct timespec *)attrbufptr)->tv_sec = cap->ca_itime; - ((struct timespec *)attrbufptr)->tv_nsec = 0; - attrbufptr = ((struct timespec *)attrbufptr) + 1; + ((struct user32_timespec *)attrbufptr)->tv_sec = cap->ca_itime; + ((struct user32_timespec *)attrbufptr)->tv_nsec = 0; + attrbufptr = ((struct user32_timespec *)attrbufptr) + 1; } } if (ATTR_CMN_MODTIME & attr) { if (is_64_bit) { - ((struct user_timespec *)attrbufptr)->tv_sec = cap->ca_mtime; - ((struct user_timespec *)attrbufptr)->tv_nsec = 0; - attrbufptr = ((struct user_timespec *)attrbufptr) + 1; + ((struct user64_timespec *)attrbufptr)->tv_sec = cap->ca_mtime; + ((struct user64_timespec *)attrbufptr)->tv_nsec = 0; + attrbufptr = ((struct user64_timespec *)attrbufptr) + 1; } else { - ((struct timespec *)attrbufptr)->tv_sec = cap->ca_mtime; - ((struct timespec *)attrbufptr)->tv_nsec = 0; - attrbufptr = ((struct timespec *)attrbufptr) + 1; + ((struct user32_timespec *)attrbufptr)->tv_sec = cap->ca_mtime; + ((struct user32_timespec *)attrbufptr)->tv_nsec = 0; + attrbufptr = ((struct user32_timespec *)attrbufptr) + 1; } } if (ATTR_CMN_CHGTIME & attr) { if (is_64_bit) { - ((struct user_timespec *)attrbufptr)->tv_sec = cap->ca_ctime; - ((struct user_timespec *)attrbufptr)->tv_nsec = 0; - attrbufptr = ((struct user_timespec *)attrbufptr) + 1; + ((struct user64_timespec *)attrbufptr)->tv_sec = cap->ca_ctime; + ((struct user64_timespec *)attrbufptr)->tv_nsec = 0; + attrbufptr = ((struct user64_timespec *)attrbufptr) + 1; } else { - ((struct timespec *)attrbufptr)->tv_sec = cap->ca_ctime; - ((struct timespec *)attrbufptr)->tv_nsec = 0; - attrbufptr = ((struct timespec *)attrbufptr) + 1; + ((struct user32_timespec *)attrbufptr)->tv_sec = cap->ca_ctime; + ((struct user32_timespec *)attrbufptr)->tv_nsec = 0; + attrbufptr = ((struct user32_timespec *)attrbufptr) + 1; } } if (ATTR_CMN_ACCTIME & attr) { if (is_64_bit) { - ((struct user_timespec *)attrbufptr)->tv_sec = cap->ca_atime; - ((struct user_timespec *)attrbufptr)->tv_nsec = 0; - attrbufptr = ((struct user_timespec *)attrbufptr) + 1; + ((struct user64_timespec *)attrbufptr)->tv_sec = cap->ca_atime; + ((struct user64_timespec *)attrbufptr)->tv_nsec = 0; + attrbufptr = ((struct user64_timespec *)attrbufptr) + 1; } else { - ((struct timespec *)attrbufptr)->tv_sec = cap->ca_atime; - ((struct timespec *)attrbufptr)->tv_nsec = 0; - attrbufptr = ((struct timespec *)attrbufptr) + 1; + ((struct user32_timespec *)attrbufptr)->tv_sec = cap->ca_atime; + ((struct user32_timespec *)attrbufptr)->tv_nsec = 0; + attrbufptr = ((struct user32_timespec *)attrbufptr) + 1; } } if (ATTR_CMN_BKUPTIME & attr) { if (is_64_bit) { - ((struct user_timespec *)attrbufptr)->tv_sec = cap->ca_btime; - ((struct user_timespec *)attrbufptr)->tv_nsec = 0; - attrbufptr = ((struct user_timespec *)attrbufptr) + 1; + ((struct user64_timespec *)attrbufptr)->tv_sec = cap->ca_btime; + ((struct user64_timespec *)attrbufptr)->tv_nsec = 0; + attrbufptr = ((struct user64_timespec *)attrbufptr) + 1; } else { - ((struct timespec *)attrbufptr)->tv_sec = cap->ca_btime; - ((struct timespec *)attrbufptr)->tv_nsec = 0; - attrbufptr = ((struct timespec *)attrbufptr) + 1; + ((struct user32_timespec *)attrbufptr)->tv_sec = cap->ca_btime; + ((struct user32_timespec *)attrbufptr)->tv_nsec = 0; + attrbufptr = ((struct user32_timespec *)attrbufptr) + 1; } } if (ATTR_CMN_FNDRINFO & attr) { @@ -675,7 +682,7 @@ packcommonattr( gid_t ngid = cap->ca_gid; if (!isroot) { - gid_t cgid = kauth_cred_getgid(proc_ucred(p)); + gid_t cgid = kauth_cred_getgid(vfs_context_ucred(ctx)); if (((unsigned int)vfs_flags(HFSTOVFS(hfsmp))) & MNT_UNKNOWNPERMISSIONS) ngid = cgid; else if (ngid == UNKNOWNUID) @@ -791,8 +798,12 @@ packfileattr( struct hfsmount *hfsmp, struct cat_attr *cattrp, struct cat_fork *datafork, - struct cat_fork *rsrcfork) + struct cat_fork *rsrcfork, + struct vnode *vp) { +#if !HFS_COMPRESSION +#pragma unused(vp) +#endif attrgroup_t attr = abp->ab_attrlist->fileattr; void *attrbufptr = *abp->ab_attrbufpp; void *varbufptr = *abp->ab_varbufpp; @@ -800,12 +811,25 @@ packfileattr( allocblksize = HFSTOVCB(hfsmp)->blockSize; + off_t datasize = datafork->cf_size; + off_t totalsize = datasize + rsrcfork->cf_size; +#if HFS_COMPRESSION + if ( cattrp->ca_flags & UF_COMPRESSED ) { + if (attr & (ATTR_FILE_DATALENGTH|ATTR_FILE_TOTALSIZE)) { + if ( 0 == hfs_uncompressed_size_of_compressed_file(hfsmp, vp, cattrp->ca_fileid, &datasize, 1) ) { /* 1 == don't take the cnode lock */ + /* total size of a compressed file is just the data size */ + totalsize = datasize; + } + } + } +#endif + if (ATTR_FILE_LINKCOUNT & attr) { *((u_int32_t *)attrbufptr) = cattrp->ca_linkcount; attrbufptr = ((u_int32_t *)attrbufptr) + 1; } if (ATTR_FILE_TOTALSIZE & attr) { - *((off_t *)attrbufptr) = datafork->cf_size + rsrcfork->cf_size; + *((off_t *)attrbufptr) = totalsize; attrbufptr = ((off_t *)attrbufptr) + 1; } if (ATTR_FILE_ALLOCSIZE & attr) { @@ -828,23 +852,48 @@ packfileattr( *((u_int32_t *)attrbufptr) = 0; attrbufptr = ((u_int32_t *)attrbufptr) + 1; } + if (ATTR_FILE_DATALENGTH & attr) { - *((off_t *)attrbufptr) = datafork->cf_size; - attrbufptr = ((off_t *)attrbufptr) + 1; - } - if (ATTR_FILE_DATAALLOCSIZE & attr) { - *((off_t *)attrbufptr) = - (off_t)datafork->cf_blocks * (off_t)allocblksize; + *((off_t *)attrbufptr) = datasize; attrbufptr = ((off_t *)attrbufptr) + 1; } - if (ATTR_FILE_RSRCLENGTH & attr) { - *((off_t *)attrbufptr) = rsrcfork->cf_size; - attrbufptr = ((off_t *)attrbufptr) + 1; + +#if HFS_COMPRESSION + /* fake the data fork size on a decmpfs compressed file to reflect the + * uncompressed size. This ensures proper reading and copying of these files. + * NOTE: we may need to get the vnode here because the vnode parameter + * passed by hfs_vnop_readdirattr() may be null. + */ + + if ( cattrp->ca_flags & UF_COMPRESSED ) { + if (attr & ATTR_FILE_DATAALLOCSIZE) { + *((off_t *)attrbufptr) = (off_t)rsrcfork->cf_blocks * (off_t)allocblksize; + attrbufptr = ((off_t *)attrbufptr) + 1; + } + if (attr & ATTR_FILE_RSRCLENGTH) { + *((off_t *)attrbufptr) = 0; + attrbufptr = ((off_t *)attrbufptr) + 1; + } + if (attr & ATTR_FILE_RSRCALLOCSIZE) { + *((off_t *)attrbufptr) = 0; + attrbufptr = ((off_t *)attrbufptr) + 1; + } } - if (ATTR_FILE_RSRCALLOCSIZE & attr) { - *((off_t *)attrbufptr) = - (off_t)rsrcfork->cf_blocks * (off_t)allocblksize; - attrbufptr = ((off_t *)attrbufptr) + 1; + else +#endif + { + if (ATTR_FILE_DATAALLOCSIZE & attr) { + *((off_t *)attrbufptr) = (off_t)datafork->cf_blocks * (off_t)allocblksize; + attrbufptr = ((off_t *)attrbufptr) + 1; + } + if (ATTR_FILE_RSRCLENGTH & attr) { + *((off_t *)attrbufptr) = rsrcfork->cf_size; + attrbufptr = ((off_t *)attrbufptr) + 1; + } + if (ATTR_FILE_RSRCALLOCSIZE & attr) { + *((off_t *)attrbufptr) = (off_t)rsrcfork->cf_blocks * (off_t)allocblksize; + attrbufptr = ((off_t *)attrbufptr) + 1; + } } *abp->ab_attrbufpp = attrbufptr; *abp->ab_varbufpp = varbufptr; @@ -863,9 +912,9 @@ hfs_attrblksize(struct attrlist *attrlist) boolean_t is_64_bit = proc_is64bit(current_proc()); if (is_64_bit) - sizeof_timespec = sizeof(struct user_timespec); + sizeof_timespec = sizeof(struct user64_timespec); else - sizeof_timespec = sizeof(struct timespec); + sizeof_timespec = sizeof(struct user32_timespec); DBG_ASSERT((attrlist->commonattr & ~ATTR_CMN_VALIDMASK) == 0); @@ -967,11 +1016,11 @@ hfs_real_user_access(vnode_t vp, vfs_context_t ctx) __private_extern__ -unsigned long +u_int32_t DerivePermissionSummary(uid_t obj_uid, gid_t obj_gid, mode_t obj_mode, struct mount *mp, kauth_cred_t cred, __unused struct proc *p) { - unsigned long permissions; + u_int32_t permissions; if (obj_uid == UNKNOWNUID) obj_uid = kauth_cred_getuid(cred); @@ -984,7 +1033,7 @@ DerivePermissionSummary(uid_t obj_uid, gid_t obj_gid, mode_t obj_mode, /* Otherwise, check the owner. */ if (hfs_owner_rights(VFSTOHFS(mp), obj_uid, cred, NULL, false) == 0) { - permissions = ((unsigned long)obj_mode & S_IRWXU) >> 6; + permissions = ((u_int32_t)obj_mode & S_IRWXU) >> 6; goto Exit; } @@ -993,13 +1042,13 @@ DerivePermissionSummary(uid_t obj_uid, gid_t obj_gid, mode_t obj_mode, int is_member; if (kauth_cred_ismember_gid(cred, obj_gid, &is_member) == 0 && is_member) { - permissions = ((unsigned long)obj_mode & S_IRWXG) >> 3; + permissions = ((u_int32_t)obj_mode & S_IRWXG) >> 3; goto Exit; } } /* Otherwise, settle for 'others' access. */ - permissions = (unsigned long)obj_mode & S_IRWXO; + permissions = (u_int32_t)obj_mode & S_IRWXO; Exit: return (permissions); diff --git a/bsd/hfs/hfs_attrlist.h b/bsd/hfs/hfs_attrlist.h index 990adf227..c40ba1e56 100644 --- a/bsd/hfs/hfs_attrlist.h +++ b/bsd/hfs/hfs_attrlist.h @@ -63,9 +63,7 @@ struct attrblock { ATTR_CMN_FNDRINFO |ATTR_CMN_OWNERID | \ ATTR_CMN_GRPID | ATTR_CMN_ACCESSMASK | \ ATTR_CMN_FLAGS | ATTR_CMN_USERACCESS | \ - ATTR_CMN_EXTENDED_SECURITY | ATTR_CMN_UUID | \ - ATTR_CMN_GRPUUID | ATTR_CMN_FILEID | \ - ATTR_CMN_PARENTID ) + ATTR_CMN_FILEID | ATTR_CMN_PARENTID ) #define HFS_ATTR_DIR_VALID \ (ATTR_DIR_LINKCOUNT | ATTR_DIR_ENTRYCOUNT | ATTR_DIR_MOUNTSTATUS) @@ -74,20 +72,19 @@ struct attrblock { (ATTR_FILE_LINKCOUNT |ATTR_FILE_TOTALSIZE | \ ATTR_FILE_ALLOCSIZE | ATTR_FILE_IOBLOCKSIZE | \ ATTR_FILE_CLUMPSIZE | ATTR_FILE_DEVTYPE | \ - ATTR_FILE_FORKCOUNT | ATTR_FILE_FORKLIST | \ ATTR_FILE_DATALENGTH | ATTR_FILE_DATAALLOCSIZE | \ ATTR_FILE_RSRCLENGTH | ATTR_FILE_RSRCALLOCSIZE) extern int hfs_attrblksize(struct attrlist *attrlist); -extern unsigned long DerivePermissionSummary(uid_t obj_uid, gid_t obj_gid, +extern u_int32_t DerivePermissionSummary(uid_t obj_uid, gid_t obj_gid, mode_t obj_mode, struct mount *mp, kauth_cred_t cred, struct proc *p); extern void hfs_packattrblk(struct attrblock *abp, struct hfsmount *hfsmp, struct vnode *vp, struct cat_desc *descp, struct cat_attr *attrp, - struct cat_fork *datafork, struct cat_fork *rsrcfork, struct proc *p); + struct cat_fork *datafork, struct cat_fork *rsrcfork, struct vfs_context *ctx); #endif /* __APPLE_API_PRIVATE */ #endif /* KERNEL */ diff --git a/bsd/hfs/hfs_btreeio.c b/bsd/hfs/hfs_btreeio.c index 0b9a39160..7e592c982 100644 --- a/bsd/hfs/hfs_btreeio.c +++ b/bsd/hfs/hfs_btreeio.c @@ -177,7 +177,7 @@ void ModifyBlockStart(FileReference vp, BlockDescPtr blockPtr) bp = (struct buf *) blockPtr->blockHeader; if (bp == NULL) { - panic("ModifyBlockStart: null bp for blockdescptr %p?!?\n", blockPtr); + panic("hfs: ModifyBlockStart: null bp for blockdescptr %p?!?\n", blockPtr); return; } @@ -207,7 +207,7 @@ btree_swap_node(struct buf *bp, __unused void *arg) */ retval = hfs_swap_BTNode (&block, vp, kSwapBTNodeHostToBig, true); if (retval) - panic("btree_swap_node: about to write corrupt node!\n"); + panic("hfs: btree_swap_node: about to write corrupt node!\n"); } @@ -464,7 +464,7 @@ OSStatus ExtendBTreeFile(FileReference vp, FSSize minEOF, FSSize maxEOF) if (ret) { // XXXdbg - this probably doesn't need to be a panic() panic("hfs: error truncating btree files (sz 0x%llx, trim %lld, ret %ld)\n", - filePtr->fcbEOF, trim, ret); + filePtr->fcbEOF, trim, (long)ret); goto out; } } @@ -588,7 +588,11 @@ hfs_create_attr_btree(struct hfsmount *hfsmp, u_int32_t nodesize, u_int32_t node BTreeControlBlockPtr btcb = NULL; struct buf *bp = NULL; void * buffer; + u_int8_t *bitmap; u_int16_t *index; + u_int32_t node_num, num_map_nodes; + u_int32_t bytes_per_map_record; + u_int32_t temp; u_int16_t offset; int intrans = 0; int result; @@ -674,8 +678,34 @@ hfs_create_attr_btree(struct hfsmount *hfsmp, u_int32_t nodesize, u_int32_t node goto exit; btcb->totalNodes = VTOF(vp)->ff_size / nodesize; - btcb->freeNodes = btcb->totalNodes - 1; + /* + * Figure out how many map nodes we'll need. + * + * bytes_per_map_record = the number of bytes in the map record of a + * map node. Since that is the only record in the node, it is the size + * of the node minus the node descriptor at the start, and two record + * offsets at the end of the node. The "- 2" is to round the size down + * to a multiple of 4 bytes (since sizeof(BTNodeDescriptor) is not a + * multiple of 4). + * + * The value "temp" here is the number of *bits* in the map record of + * the header node. + */ + bytes_per_map_record = nodesize - sizeof(BTNodeDescriptor) - 2*sizeof(u_int16_t) - 2; + temp = 8 * (nodesize - sizeof(BTNodeDescriptor) + - sizeof(BTHeaderRec) + - kBTreeHeaderUserBytes + - 4 * sizeof(u_int16_t)); + if (btcb->totalNodes > temp) { + num_map_nodes = howmany(btcb->totalNodes - temp, bytes_per_map_record * 8); + } + else { + num_map_nodes = 0; + } + + btcb->freeNodes = btcb->totalNodes - 1 - num_map_nodes; + /* * Initialize the b-tree header on disk */ @@ -701,6 +731,8 @@ hfs_create_attr_btree(struct hfsmount *hfsmp, u_int32_t nodesize, u_int32_t node /* FILL IN THE NODE DESCRIPTOR: */ ndp = (BTNodeDescriptor *)buffer; + if (num_map_nodes != 0) + ndp->fLink = 1; ndp->kind = kBTHeaderNode; ndp->numRecords = 3; offset = sizeof(BTNodeDescriptor); @@ -723,8 +755,19 @@ hfs_create_attr_btree(struct hfsmount *hfsmp, u_int32_t nodesize, u_int32_t node offset += kBTreeHeaderUserBytes; index[(nodesize / 2) - 3] = offset; - /* FILL IN THE MAP RECORD (only one node in use). */ - *((u_int8_t *)buffer + offset) = 0x80; + /* Mark the header node and map nodes in use in the map record. + * + * NOTE: Assumes that the header node's map record has at least + * (num_map_nodes + 1) bits. + */ + bitmap = (u_int8_t *) buffer + offset; + temp = num_map_nodes + 1; /* +1 for the header node */ + while (temp >= 8) { + *(bitmap++) = 0xFF; + temp -= 8; + } + *bitmap = ~(0xFF >> temp); + offset += nodesize - sizeof(BTNodeDescriptor) - sizeof(BTHeaderRec) - kBTreeHeaderUserBytes - (4 * sizeof(int16_t)); index[(nodesize / 2) - 4] = offset; @@ -737,6 +780,48 @@ hfs_create_attr_btree(struct hfsmount *hfsmp, u_int32_t nodesize, u_int32_t node if (result) goto exit; + /* Create the map nodes: node numbers 1 .. num_map_nodes */ + for (node_num=1; node_num <= num_map_nodes; ++node_num) { + bp = buf_getblk(vp, node_num, nodesize, 0, 0, BLK_META); + if (bp == NULL) { + result = EIO; + goto exit; + } + buffer = (void *)buf_dataptr(bp); + blkdesc.buffer = buffer; + blkdesc.blockHeader = (void *)bp; + blkdesc.blockReadFromDisk = 0; + blkdesc.isModified = 0; + + ModifyBlockStart(vp, &blkdesc); + + bzero(buffer, nodesize); + index = (u_int16_t *)buffer; + + /* Fill in the node descriptor */ + ndp = (BTNodeDescriptor *)buffer; + if (node_num != num_map_nodes) + ndp->fLink = node_num + 1; + ndp->kind = kBTMapNode; + ndp->numRecords = 1; + offset = sizeof(BTNodeDescriptor); + index[(nodesize / 2) - 1] = offset; + + + /* Fill in the map record's offset */ + /* Note: We assume that the map record is all zeroes */ + offset = sizeof(BTNodeDescriptor) + bytes_per_map_record; + index[(nodesize / 2) - 2] = offset; + + if (hfsmp->jnl) { + result = btree_journal_modify_block_end(hfsmp, bp); + } else { + result = VNOP_BWRITE(bp); + } + if (result) + goto exit; + } + /* Update vp/cp for attribute btree */ lck_mtx_lock(&hfsmp->hfs_mutex); hfsmp->hfs_attribute_cp = VTOC(vp); diff --git a/bsd/hfs/hfs_catalog.c b/bsd/hfs/hfs_catalog.c index 6f190bce5..5e7e13d99 100644 --- a/bsd/hfs/hfs_catalog.c +++ b/bsd/hfs/hfs_catalog.c @@ -90,7 +90,7 @@ u_char modetodirtype[16] = { #define MODE_TO_DT(mode) (modetodirtype[((mode) & S_IFMT) >> 12]) -static int cat_lookupbykey(struct hfsmount *hfsmp, CatalogKey *keyp, int allow_system_files, u_long hint, int wantrsrc, +static int cat_lookupbykey(struct hfsmount *hfsmp, CatalogKey *keyp, int allow_system_files, u_int32_t hint, int wantrsrc, struct cat_desc *descp, struct cat_attr *attrp, struct cat_fork *forkp, cnid_t *desc_cnid); static int cat_lookupmangled(struct hfsmount *hfsmp, struct cat_desc *descp, int wantrsrc, @@ -101,7 +101,7 @@ static int cat_lookupmangled(struct hfsmount *hfsmp, struct cat_desc *descp, int static int cat_findposition(const CatalogKey *ckp, const CatalogRecord *crp, struct position_state *state); -static int resolvelinkid(struct hfsmount *hfsmp, u_long linkref, ino_t *ino); +static int resolvelinkid(struct hfsmount *hfsmp, u_int32_t linkref, ino_t *ino); static int getkey(struct hfsmount *hfsmp, cnid_t cnid, CatalogKey * key); @@ -114,17 +114,17 @@ static void buildrecord(struct cat_attr *attrp, cnid_t cnid, int std_hfs, u_int3 static int catrec_update(const CatalogKey *ckp, CatalogRecord *crp, struct update_state *state); -static int builddesc(const HFSPlusCatalogKey *key, cnid_t cnid, u_long hint, u_long encoding, +static int builddesc(const HFSPlusCatalogKey *key, cnid_t cnid, u_int32_t hint, u_int32_t encoding, int isdir, struct cat_desc *descp); static void getbsdattr(struct hfsmount *hfsmp, const struct HFSPlusCatalogFile *crp, struct cat_attr * attrp); -static void promotekey(struct hfsmount *hfsmp, const HFSCatalogKey *hfskey, HFSPlusCatalogKey *keyp, u_long *encoding); +static void promotekey(struct hfsmount *hfsmp, const HFSCatalogKey *hfskey, HFSPlusCatalogKey *keyp, u_int32_t *encoding); static void promotefork(struct hfsmount *hfsmp, const struct HFSCatalogFile *file, int resource, struct cat_fork * forkp); static void promoteattr(struct hfsmount *hfsmp, const CatalogRecord *dataPtr, struct HFSPlusCatalogFile *crp); static cnid_t getcnid(const CatalogRecord *crp); -static u_long getencoding(const CatalogRecord *crp); +static u_int32_t getencoding(const CatalogRecord *crp); static cnid_t getparentcnid(const CatalogRecord *recp); static int isadir(const CatalogRecord *crp); @@ -243,7 +243,7 @@ cat_convertkey( { int std_hfs = HFSTOVCB(hfsmp)->vcbSigWord == kHFSSigWord; HFSPlusCatalogKey * pluskey = NULL; - u_long encoding; + u_int32_t encoding; if (std_hfs) { MALLOC(pluskey, HFSPlusCatalogKey *, sizeof(HFSPlusCatalogKey), M_TEMP, M_WAITOK); @@ -459,7 +459,7 @@ cat_findname(struct hfsmount *hfsmp, cnid_t cnid, struct cat_desc *outdescp) } if (std_hfs) { HFSPlusCatalogKey * pluskey = NULL; - u_long encoding; + u_int32_t encoding; MALLOC(pluskey, HFSPlusCatalogKey *, sizeof(HFSPlusCatalogKey), M_TEMP, M_WAITOK); promotekey(hfsmp, &keyp->hfs, pluskey, &encoding); @@ -514,12 +514,26 @@ cat_idlookup(struct hfsmount *hfsmp, cnid_t cnid, int allow_system_files, case kHFSFileThreadRecord: case kHFSFolderThreadRecord: keyp = (CatalogKey *)((char *)&recp->hfsThread.reserved + 6); + + /* check for NULL name */ + if (keyp->hfs.nodeName[0] == 0) { + result = ENOENT; + goto exit; + } + keyp->hfs.keyLength = kHFSCatalogKeyMinimumLength + keyp->hfs.nodeName[0]; break; case kHFSPlusFileThreadRecord: case kHFSPlusFolderThreadRecord: keyp = (CatalogKey *)&recp->hfsPlusThread.reserved; + + /* check for NULL name */ + if (keyp->hfsPlus.nodeName.length == 0) { + result = ENOENT; + goto exit; + } + keyp->hfsPlus.keyLength = kHFSPlusCatalogKeyMinimumLength + (keyp->hfsPlus.nodeName.length * 2); break; @@ -540,7 +554,7 @@ cat_idlookup(struct hfsmount *hfsmp, cnid_t cnid, int allow_system_files, * the key in the thread matches the key in the record. */ if (cnid != dcnid) { - printf("Requested cnid (%d / %08x) != dcnid (%d / %08x)\n", cnid, cnid, dcnid, dcnid); + printf("hfs: cat_idlookup: Requested cnid (%d / %08x) != dcnid (%d / %08x)\n", cnid, cnid, dcnid, dcnid); result = ENOENT; } } @@ -562,6 +576,7 @@ cat_lookupmangled(struct hfsmount *hfsmp, struct cat_desc *descp, int wantrsrc, cnid_t fileID; u_int32_t prefixlen; int result; + int extlen1, extlen2; if (wantrsrc) return (ENOENT); @@ -588,6 +603,16 @@ cat_lookupmangled(struct hfsmount *hfsmp, struct cat_desc *descp, int wantrsrc, bcmp(outdescp->cd_nameptr, descp->cd_nameptr, prefixlen-6) != 0) goto falsematch; + extlen1 = CountFilenameExtensionChars(descp->cd_nameptr, descp->cd_namelen); + extlen2 = CountFilenameExtensionChars(outdescp->cd_nameptr, outdescp->cd_namelen); + if (extlen1 != extlen2) + goto falsematch; + + if (bcmp(outdescp->cd_nameptr + (outdescp->cd_namelen - extlen2), + descp->cd_nameptr + (descp->cd_namelen - extlen1), + extlen1) != 0) + goto falsematch; + return (0); falsematch: @@ -600,7 +625,7 @@ cat_lookupmangled(struct hfsmount *hfsmp, struct cat_desc *descp, int wantrsrc, * cat_lookupbykey - lookup a catalog node using a cnode key */ static int -cat_lookupbykey(struct hfsmount *hfsmp, CatalogKey *keyp, int allow_system_files, u_long hint, int wantrsrc, +cat_lookupbykey(struct hfsmount *hfsmp, CatalogKey *keyp, int allow_system_files, u_int32_t hint, int wantrsrc, struct cat_desc *descp, struct cat_attr *attrp, struct cat_fork *forkp, cnid_t *desc_cnid) { struct BTreeIterator * iterator; @@ -609,9 +634,9 @@ cat_lookupbykey(struct hfsmount *hfsmp, CatalogKey *keyp, int allow_system_files u_int16_t datasize; int result; int std_hfs; - u_long ilink = 0; + u_int32_t ilink = 0; cnid_t cnid = 0; - u_long encoding = 0; + u_int32_t encoding = 0; std_hfs = (HFSTOVCB(hfsmp)->vcbSigWord == kHFSSigWord); @@ -800,7 +825,7 @@ cat_create(struct hfsmount *hfsmp, struct cat_desc *descp, struct cat_attr *attr u_int32_t datalen; int std_hfs; int result = 0; - u_long encoding = kTextEncodingMacRoman; + u_int32_t encoding = kTextEncodingMacRoman; int modeformat; modeformat = attrp->ca_mode & S_IFMT; @@ -986,7 +1011,7 @@ cat_rename ( int directory = from_cdp->cd_flags & CD_ISDIR; int is_dirlink = 0; int std_hfs; - u_long encoding = 0; + u_int32_t encoding = 0; vcb = HFSTOVCB(hfsmp); fcb = GetFileControlBlock(vcb->catalogRefNum); @@ -1152,7 +1177,7 @@ cat_rename ( int err; err = BTInsertRecord(fcb, from_iterator, &btdata, datasize); if (err) { - printf("cat_create: could not undo (BTInsert = %d)", err); + printf("hfs: cat_create: could not undo (BTInsert = %d)", err); hfs_mark_volume_inconsistent(hfsmp); result = err; goto exit; @@ -1179,7 +1204,7 @@ cat_rename ( int err; err = BTDeleteRecord(fcb, to_iterator); if (err) { - printf("cat_create: could not undo (BTDelete = %d)", err); + printf("hfs: cat_create: could not undo (BTDelete = %d)", err); hfs_mark_volume_inconsistent(hfsmp); result = err; goto exit; @@ -1227,7 +1252,7 @@ cat_rename ( /* Save the real encoding hint in the Finder Info (field 4). */ if (directory && from_cdp->cd_cnid == kHFSRootFolderID) { - u_long realhint; + u_int32_t realhint; realhint = hfs_pickencoding(pluskey->nodeName.unicode, pluskey->nodeName.length); vcb->vcbFndrInfo[4] = SET_HFS_TEXT_ENCODING(realhint); @@ -1499,7 +1524,7 @@ catrec_update(const CatalogKey *ckp, CatalogRecord *crp, struct update_state *st dir = (struct HFSPlusCatalogFolder *)crp; /* Do a quick sanity check */ if (dir->folderID != attrp->ca_fileid) { - printf("catrec_update: id %d != %d\n", dir->folderID, attrp->ca_fileid); + printf("hfs: catrec_update: id %d != %d\n", dir->folderID, attrp->ca_fileid); return (btNotFound); } dir->flags = attrp->ca_recflags; @@ -1642,7 +1667,7 @@ catrec_update(const CatalogKey *ckp, CatalogRecord *crp, struct update_state *st if ((file->resourceFork.extents[0].startBlock != 0) && (file->resourceFork.extents[0].startBlock == file->dataFork.extents[0].startBlock)) { - panic("catrec_update: rsrc fork == data fork"); + panic("hfs: catrec_update: rsrc fork == data fork"); } /* Synchronize the lock state */ @@ -1686,7 +1711,7 @@ cat_set_childlinkbit(struct hfsmount *hfsmp, cnid_t cnid) /* Update the bit in corresponding cnode, if any, in the hash. * If the cnode has the bit already set, stop the traversal. */ - retval = hfs_chash_set_childlinkbit(hfsmp->hfs_raw_dev, cnid); + retval = hfs_chash_set_childlinkbit(hfsmp, cnid); if (retval == 0) { break; } @@ -1762,12 +1787,12 @@ cat_check_link_ancestry(struct hfsmount *hfsmp, cnid_t cnid, cnid_t pointed_at_c break; } if ((result = getkey(hfsmp, cnid, (CatalogKey *)keyp))) { - printf("cat_check_link_ancestry: getkey for %u failed\n", cnid); + printf("hfs: cat_check_link_ancestry: getkey for %u failed\n", cnid); invalid = 1; /* On errors, assume an invalid parent */ break; } if ((result = BTSearchRecord(fcb, ip, &btdata, NULL, NULL))) { - printf("cat_check_link_ancestry: cannot find %u\n", cnid); + printf("hfs: cat_check_link_ancestry: cannot find %u\n", cnid); invalid = 1; /* On errors, assume an invalid parent */ break; } @@ -1799,7 +1824,7 @@ updatelink_callback(__unused const CatalogKey *ckp, CatalogRecord *crp, struct l HFSPlusCatalogFile *file; if (crp->recordType != kHFSPlusFileRecord) { - printf("updatelink_callback: unexpected rec type %d\n", crp->recordType); + printf("hfs: updatelink_callback: unexpected rec type %d\n", crp->recordType); return (btNotFound); } @@ -1812,7 +1837,7 @@ updatelink_callback(__unused const CatalogKey *ckp, CatalogRecord *crp, struct l file->hl_nextLinkID = state->nextlinkid; } } else { - printf("updatelink_callback: file %d isn't a chain\n", file->fileID); + printf("hfs: updatelink_callback: file %d isn't a chain\n", file->fileID); } return (0); } @@ -1843,7 +1868,7 @@ cat_updatelink(struct hfsmount *hfsmp, cnid_t linkfileid, cnid_t prevlinkid, cni result = BTUpdateRecord(fcb, iterator, (IterateCallBackProcPtr)updatelink_callback, &state); (void) BTFlushPath(fcb); } else { - printf("cat_updatelink: couldn't resolve cnid %d\n", linkfileid); + printf("hfs: cat_updatelink: couldn't resolve cnid %d\n", linkfileid); } return MacToVFSError(result); } @@ -1913,13 +1938,13 @@ cat_lookuplinkbyid(struct hfsmount *hfsmp, cnid_t linkfileid, cnid_t *prevlinkid iterator->hint.nodeNum = 0; if ((result = getkey(hfsmp, linkfileid, (CatalogKey *)&iterator->key))) { - printf("cat_lookuplinkbyid: getkey for %d failed %d\n", linkfileid, result); + printf("hfs: cat_lookuplinkbyid: getkey for %d failed %d\n", linkfileid, result); goto exit; } BDINIT(btdata, &file); if ((result = BTSearchRecord(fcb, iterator, &btdata, NULL, NULL))) { - printf("cat_lookuplinkbyid: cannot find %d\n", linkfileid); + printf("hfs: cat_lookuplinkbyid: cannot find %d\n", linkfileid); goto exit; } /* The prev/next chain is only valid when kHFSHasLinkChainMask is set. */ @@ -1969,7 +1994,7 @@ cat_createlink(struct hfsmount *hfsmp, struct cat_desc *descp, struct cat_attr * HFSPlusForkData *rsrcforkp; u_int32_t nextCNID; u_int32_t datalen; - u_long encoding; + u_int32_t encoding; int thread_inserted = 0; int alias_allocated = 0; int result = 0; @@ -1997,7 +2022,7 @@ cat_createlink(struct hfsmount *hfsmp, struct cat_desc *descp, struct cat_attr * result = buildkey(hfsmp, descp, &bto->key, 0); if (result) { - printf("cat_createlink: err %d from buildkey\n", result); + printf("hfs: cat_createlink: err %d from buildkey\n", result); goto exit; } @@ -2076,7 +2101,7 @@ cat_createlink(struct hfsmount *hfsmp, struct cat_desc *descp, struct cat_attr * exit: if (result) { if (thread_inserted) { - printf("cat_createlink: err %d from BTInsertRecord\n", MacToVFSError(result)); + printf("hfs: cat_createlink: err %d from BTInsertRecord\n", MacToVFSError(result)); buildthreadkey(nextCNID, 0, (CatalogKey *)&bto->iterator.key); if (BTDeleteRecord(fcb, &bto->iterator)) { @@ -2380,7 +2405,7 @@ getentriesattr_callback(const CatalogKey *key, const CatalogRecord *rec, if (state->stdhfs) { struct HFSPlusCatalogFile cnoderec; HFSPlusCatalogKey * pluskey; - u_long encoding; + u_int32_t encoding; promoteattr(hfsmp, rec, &cnoderec); getbsdattr(hfsmp, &cnoderec, &cep->ce_attr); @@ -2581,7 +2606,7 @@ cat_getentriesattr(struct hfsmount *hfsmp, directoryhint_t *dirhint, struct cat_ /* Hard link information collected during cat_getdirentries. */ struct linkinfo { - u_long link_ref; + u_int32_t link_ref; user_addr_t dirent_addr; }; typedef struct linkinfo linkinfo_t; @@ -2630,6 +2655,7 @@ getdirentries_callback(const CatalogKey *ckp, const CatalogRecord *crp, int hide = 0; u_int8_t type = DT_UNKNOWN; u_int8_t is_mangled = 0; + u_int8_t is_link = 0; u_int8_t *nameptr; user_addr_t uiobase = USER_ADDR_NULL; size_t namelen = 0; @@ -2701,6 +2727,7 @@ getdirentries_callback(const CatalogKey *ckp, const CatalogRecord *crp, } else { ilinkref = crp->hfsPlusFile.hl_linkReference; } + is_link =1; } else if ((SWAP_BE32(crp->hfsPlusFile.userInfo.fdType) == kHFSAliasType) && (SWAP_BE32(crp->hfsPlusFile.userInfo.fdCreator) == kHFSAliasCreator) && (crp->hfsPlusFile.flags & kHFSHasLinkChainMask) && @@ -2711,6 +2738,7 @@ getdirentries_callback(const CatalogKey *ckp, const CatalogRecord *crp, type = DT_DIR; /* A directory's link ref is always inode's file id. */ cnid = crp->hfsPlusFile.hl_linkReference; + is_link = 1; } /* Hide the journal files */ if ((curID == kHFSRootFolderID) && @@ -2756,9 +2784,22 @@ getdirentries_callback(const CatalogKey *ckp, const CatalogRecord *crp, /* Check result returned from encoding the filename to utf8 */ if (result == ENAMETOOLONG) { + /* + * If we were looking at a catalog record for a hardlink (not the inode), + * then we want to use its link ID as opposed to the inode ID for + * a mangled name. For all other cases, they are the same. Note that + * due to the way directory hardlinks are implemented, the actual link + * is going to be counted as a file record, so we can catch both + * with is_link. + */ + cnid_t linkid = cnid; + if (is_link) { + linkid = crp->hfsPlusFile.fileID; + } + result = ConvertUnicodeToUTF8Mangled(cnp->ustr.length * sizeof(UniChar), cnp->ustr.unicode, maxnamelen + 1, - (ByteCount*)&namelen, nameptr, cnid); + (ByteCount*)&namelen, nameptr, linkid); is_mangled = 1; } } @@ -2790,7 +2831,7 @@ getdirentries_callback(const CatalogKey *ckp, const CatalogRecord *crp, uiobase = uio_curriovbase(state->cbs_uio); } /* If this entry won't fit then we're done */ - if ((uiosize > uio_resid(state->cbs_uio)) || + if ((uiosize > (user_size_t)uio_resid(state->cbs_uio)) || (ilinkref != 0 && state->cbs_nlinks == state->cbs_maxlinks)) { return (0); /* stop */ } @@ -2949,7 +2990,7 @@ getdirentries_std_callback(const CatalogKey *ckp, const CatalogRecord *crp, uioaddr = (caddr_t) &catent; /* If this entry won't fit then we're done */ - if (uiosize > uio_resid(state->cbs_uio)) { + if (uiosize > (user_size_t)uio_resid(state->cbs_uio)) { return (0); /* stop */ } @@ -3168,7 +3209,7 @@ cat_getdirentries(struct hfsmount *hfsmp, int entrycnt, directoryhint_t *dirhint * Post process any hard links to get the real file id. */ if (state.cbs_nlinks > 0) { - u_int32_t fileid = 0; + ino_t fileid = 0; user_addr_t address; int i; @@ -3243,7 +3284,7 @@ cat_findposition(const CatalogKey *ckp, const CatalogRecord *crp, ++state->count; break; default: - printf("cat_findposition: invalid record type %d in dir %d\n", + printf("hfs: cat_findposition: invalid record type %d in dir %d\n", crp->recordType, curID); state->error = EINVAL; return (0); /* stop */ @@ -3420,11 +3461,15 @@ buildkey(struct hfsmount *hfsmp, struct cat_desc *descp, hfskey.parentID = key->parentID; hfskey.nodeName[0] = 0; if (key->nodeName.length > 0) { - if (unicode_to_hfs(HFSTOVCB(hfsmp), + int res; + if ((res = unicode_to_hfs(HFSTOVCB(hfsmp), key->nodeName.length * 2, key->nodeName.unicode, - &hfskey.nodeName[0], retry) != 0) { - return (EINVAL); + &hfskey.nodeName[0], retry)) != 0) { + if (res != ENAMETOOLONG) + res = EINVAL; + + return res; } hfskey.keyLength += hfskey.nodeName[0]; } @@ -3439,7 +3484,7 @@ buildkey(struct hfsmount *hfsmp, struct cat_desc *descp, */ __private_extern__ int -cat_resolvelink(struct hfsmount *hfsmp, u_long linkref, int isdirlink, struct HFSPlusCatalogFile *recp) +cat_resolvelink(struct hfsmount *hfsmp, u_int32_t linkref, int isdirlink, struct HFSPlusCatalogFile *recp) { FSBufferDescriptor btdata; struct BTreeIterator *iterator; @@ -3479,7 +3524,7 @@ cat_resolvelink(struct hfsmount *hfsmp, u_long linkref, int isdirlink, struct HF if (recp->hl_linkCount == 0) recp->hl_linkCount = 2; } else { - printf("HFS resolvelink: can't find %s\n", inodename); + printf("hfs: cat_resolvelink: can't find %s\n", inodename); } FREE(iterator, M_TEMP); @@ -3491,7 +3536,7 @@ cat_resolvelink(struct hfsmount *hfsmp, u_long linkref, int isdirlink, struct HF * Resolve hard link reference to obtain the inode number. */ static int -resolvelinkid(struct hfsmount *hfsmp, u_long linkref, ino_t *ino) +resolvelinkid(struct hfsmount *hfsmp, u_int32_t linkref, ino_t *ino) { struct HFSPlusCatalogFile record; int error; @@ -3701,7 +3746,7 @@ buildrecord(struct cat_attr *attrp, cnid_t cnid, int std_hfs, u_int32_t encoding * builddesc - build a cnode descriptor from an HFS+ key */ static int -builddesc(const HFSPlusCatalogKey *key, cnid_t cnid, u_long hint, u_long encoding, +builddesc(const HFSPlusCatalogKey *key, cnid_t cnid, u_int32_t hint, u_int32_t encoding, int isdir, struct cat_desc *descp) { int result = 0; @@ -3869,7 +3914,7 @@ getbsdattr(struct hfsmount *hfsmp, const struct HFSPlusCatalogFile *crp, struct */ static void promotekey(struct hfsmount *hfsmp, const HFSCatalogKey *hfskey, - HFSPlusCatalogKey *keyp, u_long *encoding) + HFSPlusCatalogKey *keyp, u_int32_t *encoding) { hfs_to_unicode_func_t hfs_get_unicode = hfsmp->hfs_get_unicode; u_int32_t uniCount; @@ -3904,7 +3949,7 @@ promotefork(struct hfsmount *hfsmp, const struct HFSCatalogFile *filep, int resource, struct cat_fork * forkp) { struct HFSPlusExtentDescriptor *xp; - u_long blocksize = HFSTOVCB(hfsmp)->blockSize; + u_int32_t blocksize = HFSTOVCB(hfsmp)->blockSize; bzero(forkp, sizeof(*forkp)); xp = &forkp->cf_extents[0]; @@ -3940,7 +3985,7 @@ promotefork(struct hfsmount *hfsmp, const struct HFSCatalogFile *filep, static void promoteattr(struct hfsmount *hfsmp, const CatalogRecord *dataPtr, struct HFSPlusCatalogFile *crp) { - u_long blocksize = HFSTOVCB(hfsmp)->blockSize; + u_int32_t blocksize = HFSTOVCB(hfsmp)->blockSize; if (dataPtr->recordType == kHFSFolderRecord) { const struct HFSCatalogFolder * folder; @@ -4043,10 +4088,10 @@ buildthreadkey(HFSCatalogNodeID parentID, int std_hfs, CatalogKey *key) /* * Extract the text encoding from a catalog node record. */ -static u_long +static u_int32_t getencoding(const CatalogRecord *crp) { - u_long encoding; + u_int32_t encoding; if (crp->recordType == kHFSPlusFolderRecord) encoding = crp->hfsPlusFolder.textEncoding; diff --git a/bsd/hfs/hfs_catalog.h b/bsd/hfs/hfs_catalog.h index 0c511ff67..2eaf3811f 100644 --- a/bsd/hfs/hfs_catalog.h +++ b/bsd/hfs/hfs_catalog.h @@ -57,10 +57,10 @@ struct cat_desc { u_int8_t cd_flags; /* see below (8 bits) */ u_int8_t cd_encoding; /* name encoding */ int16_t cd_namelen; /* length of cnode name */ - const u_int8_t * cd_nameptr; /* pointer to cnode name */ cnid_t cd_parentcnid; /* parent directory CNID */ - u_long cd_hint; /* catalog file hint */ + u_int32_t cd_hint; /* catalog file hint */ cnid_t cd_cnid; /* cnode id (for getattrlist) */ + const u_int8_t * cd_nameptr; /* pointer to cnode name */ }; /* cd_flags */ @@ -173,8 +173,8 @@ struct cat_entry { struct cat_attr ce_attr; off_t ce_datasize; off_t ce_rsrcsize; - u_long ce_datablks; - u_long ce_rsrcblks; + u_int32_t ce_datablks; + u_int32_t ce_rsrcblks; }; /* @@ -194,9 +194,9 @@ struct cat_entry { * A cat_entrylist is a list of Catalog Node Entries. */ struct cat_entrylist { - u_long maxentries; /* number of entries requested */ - u_long realentries; /* number of valid entries returned */ - u_long skipentries; /* number of entries skipped (reserved HFS+ files) */ + u_int32_t maxentries; /* number of entries requested */ + u_int32_t realentries; /* number of valid entries returned */ + u_int32_t skipentries; /* number of entries skipped (reserved HFS+ files) */ struct cat_entry entry[1]; /* array of entries */ }; @@ -223,7 +223,11 @@ typedef u_int32_t catops_t; * the nreserve struct (in BTreeNodeReserve.c). */ typedef struct cat_cookie_t { +#if defined(__LP64__) + char opaque[40]; +#else char opaque[24]; +#endif } cat_cookie_t; /* Universal catalog key */ @@ -372,7 +376,7 @@ extern int cat_set_childlinkbit( #define HFS_IGNORABLE_LINK 0x00000001 extern int cat_resolvelink( struct hfsmount *hfsmp, - u_long linkref, + u_int32_t linkref, int isdirlink, struct HFSPlusCatalogFile *recp); diff --git a/bsd/hfs/hfs_chash.c b/bsd/hfs/hfs_chash.c index 4c6401fcd..b2db58e75 100644 --- a/bsd/hfs/hfs_chash.c +++ b/bsd/hfs/hfs_chash.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2002-2006 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2002-2008 Apple Computer, Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -82,14 +82,9 @@ lck_grp_t * chash_lck_grp; lck_grp_attr_t * chash_lck_grp_attr; lck_attr_t * chash_lck_attr; -/* - * Structures associated with cnode caching. - */ -LIST_HEAD(cnodehashhead, cnode) *cnodehashtbl; -u_long cnodehash; /* size of hash table - 1 */ -#define CNODEHASH(device, inum) (&cnodehashtbl[((device) + (inum)) & cnodehash]) -lck_mtx_t hfs_chash_mutex; +#define CNODEHASH(hfsmp, inum) (&hfsmp->hfs_cnodehashtbl[(inum) & hfsmp->hfs_cnodehash]) + /* * Initialize cnode hash table. @@ -101,28 +96,48 @@ hfs_chashinit() chash_lck_grp_attr= lck_grp_attr_alloc_init(); chash_lck_grp = lck_grp_alloc_init("cnode_hash", chash_lck_grp_attr); chash_lck_attr = lck_attr_alloc_init(); +} - lck_mtx_init(&hfs_chash_mutex, chash_lck_grp, chash_lck_attr); +static void hfs_chash_lock(struct hfsmount *hfsmp) +{ + lck_mtx_lock(&hfsmp->hfs_chash_mutex); } -static void hfs_chash_lock(void) +static void hfs_chash_lock_spin(struct hfsmount *hfsmp) { - lck_mtx_lock(&hfs_chash_mutex); + lck_mtx_lock_spin(&hfsmp->hfs_chash_mutex); } -static void hfs_chash_unlock(void) +#ifdef i386 +static void hfs_chash_lock_convert (struct hfsmount *hfsmp) +#else +static void hfs_chash_lock_convert (__unused struct hfsmount *hfsmp) +#endif { - lck_mtx_unlock(&hfs_chash_mutex); + lck_mtx_convert_spin(&hfsmp->hfs_chash_mutex); +} + +static void hfs_chash_unlock(struct hfsmount *hfsmp) +{ + lck_mtx_unlock(&hfsmp->hfs_chash_mutex); } __private_extern__ void -hfs_chashinit_finish() +hfs_chashinit_finish(struct hfsmount *hfsmp) { - hfs_chash_lock(); - if (!cnodehashtbl) - cnodehashtbl = hashinit(desiredvnodes, M_HFSMNT, &cnodehash); - hfs_chash_unlock(); + lck_mtx_init(&hfsmp->hfs_chash_mutex, chash_lck_grp, chash_lck_attr); + + hfsmp->hfs_cnodehashtbl = hashinit(desiredvnodes / 4, M_HFSMNT, &hfsmp->hfs_cnodehash); +} + +__private_extern__ +void +hfs_delete_chash(struct hfsmount *hfsmp) +{ + lck_mtx_destroy(&hfsmp->hfs_chash_mutex, chash_lck_grp); + + FREE(hfsmp->hfs_cnodehashtbl, M_HFSMNT); } @@ -133,7 +148,7 @@ hfs_chashinit_finish() */ __private_extern__ struct vnode * -hfs_chash_getvnode(dev_t dev, ino_t inum, int wantrsrc, int skiplock) +hfs_chash_getvnode(struct hfsmount *hfsmp, ino_t inum, int wantrsrc, int skiplock) { struct cnode *cp; struct vnode *vp; @@ -146,15 +161,16 @@ hfs_chash_getvnode(dev_t dev, ino_t inum, int wantrsrc, int skiplock) * allocated, wait for it to be finished and then try again. */ loop: - hfs_chash_lock(); - for (cp = CNODEHASH(dev, inum)->lh_first; cp; cp = cp->c_hash.le_next) { - if ((cp->c_fileid != inum) || (cp->c_dev != dev)) + hfs_chash_lock_spin(hfsmp); + + for (cp = CNODEHASH(hfsmp, inum)->lh_first; cp; cp = cp->c_hash.le_next) { + if (cp->c_fileid != inum) continue; /* Wait if cnode is being created or reclaimed. */ if (ISSET(cp->c_hflag, H_ALLOC | H_TRANSIT | H_ATTACH)) { SET(cp->c_hflag, H_WAITING); - (void) msleep(cp, &hfs_chash_mutex, PDROP | PINOD, + (void) msleep(cp, &hfsmp->hfs_chash_mutex, PDROP | PINOD, "hfs_chash_getvnode", 0); goto loop; } @@ -164,7 +180,7 @@ hfs_chash_getvnode(dev_t dev, ino_t inum, int wantrsrc, int skiplock) goto exit; vid = vnode_vid(vp); - hfs_chash_unlock(); + hfs_chash_unlock(hfsmp); if ((error = vnode_getwithvid(vp, vid))) { /* @@ -195,7 +211,7 @@ hfs_chash_getvnode(dev_t dev, ino_t inum, int wantrsrc, int skiplock) return (vp); } exit: - hfs_chash_unlock(); + hfs_chash_unlock(hfsmp); return (NULL); } @@ -205,7 +221,7 @@ hfs_chash_getvnode(dev_t dev, ino_t inum, int wantrsrc, int skiplock) */ __private_extern__ int -hfs_chash_snoop(dev_t dev, ino_t inum, int (*callout)(const struct cat_desc *, +hfs_chash_snoop(struct hfsmount *hfsmp, ino_t inum, int (*callout)(const struct cat_desc *, const struct cat_attr *, void *), void * arg) { struct cnode *cp; @@ -216,9 +232,10 @@ hfs_chash_snoop(dev_t dev, ino_t inum, int (*callout)(const struct cat_desc *, * If a cnode is in the process of being cleaned out or being * allocated, wait for it to be finished and then try again. */ - hfs_chash_lock(); - for (cp = CNODEHASH(dev, inum)->lh_first; cp; cp = cp->c_hash.le_next) { - if ((cp->c_fileid != inum) || (cp->c_dev != dev)) + hfs_chash_lock(hfsmp); + + for (cp = CNODEHASH(hfsmp, inum)->lh_first; cp; cp = cp->c_hash.le_next) { + if (cp->c_fileid != inum) continue; /* Skip cnodes being created or reclaimed. */ if (!ISSET(cp->c_hflag, H_ALLOC | H_TRANSIT | H_ATTACH)) { @@ -226,7 +243,8 @@ hfs_chash_snoop(dev_t dev, ino_t inum, int (*callout)(const struct cat_desc *, } break; } - hfs_chash_unlock(); + hfs_chash_unlock(hfsmp); + return (result); } @@ -242,7 +260,7 @@ hfs_chash_snoop(dev_t dev, ino_t inum, int (*callout)(const struct cat_desc *, */ __private_extern__ struct cnode * -hfs_chash_getcnode(dev_t dev, ino_t inum, struct vnode **vpp, int wantrsrc, int skiplock) +hfs_chash_getcnode(struct hfsmount *hfsmp, ino_t inum, struct vnode **vpp, int wantrsrc, int skiplock) { struct cnode *cp; struct cnode *ncp = NULL; @@ -255,11 +273,11 @@ hfs_chash_getcnode(dev_t dev, ino_t inum, struct vnode **vpp, int wantrsrc, int * allocated, wait for it to be finished and then try again. */ loop: - hfs_chash_lock(); + hfs_chash_lock_spin(hfsmp); loop_with_lock: - for (cp = CNODEHASH(dev, inum)->lh_first; cp; cp = cp->c_hash.le_next) { - if ((cp->c_fileid != inum) || (cp->c_dev != dev)) + for (cp = CNODEHASH(hfsmp, inum)->lh_first; cp; cp = cp->c_hash.le_next) { + if (cp->c_fileid != inum) continue; /* * Wait if cnode is being created, attached to or reclaimed. @@ -267,7 +285,7 @@ hfs_chash_getcnode(dev_t dev, ino_t inum, struct vnode **vpp, int wantrsrc, int if (ISSET(cp->c_hflag, H_ALLOC | H_ATTACH | H_TRANSIT)) { SET(cp->c_hflag, H_WAITING); - (void) msleep(cp, &hfs_chash_mutex, PINOD, + (void) msleep(cp, &hfsmp->hfs_chash_mutex, PINOD, "hfs_chash_getcnode", 0); goto loop_with_lock; } @@ -278,11 +296,11 @@ hfs_chash_getcnode(dev_t dev, ino_t inum, struct vnode **vpp, int wantrsrc, int */ SET(cp->c_hflag, H_ATTACH); - hfs_chash_unlock(); + hfs_chash_unlock(hfsmp); } else { vid = vnode_vid(vp); - hfs_chash_unlock(); + hfs_chash_unlock(hfsmp); if (vnode_getwithvid(vp, vid)) goto loop; @@ -317,13 +335,13 @@ hfs_chash_getcnode(dev_t dev, ino_t inum, struct vnode **vpp, int wantrsrc, int if (vp != NULLVP) { vnode_put(vp); } else { - hfs_chash_lock(); + hfs_chash_lock_spin(hfsmp); CLR(cp->c_hflag, H_ATTACH); if (ISSET(cp->c_hflag, H_WAITING)) { CLR(cp->c_hflag, H_WAITING); wakeup((caddr_t)cp); } - hfs_chash_unlock(); + hfs_chash_unlock(hfsmp); } vp = NULL; cp = NULL; @@ -339,7 +357,7 @@ hfs_chash_getcnode(dev_t dev, ino_t inum, struct vnode **vpp, int wantrsrc, int panic("%s - should never get here when skiplock is set \n", __FUNCTION__); if (ncp == NULL) { - hfs_chash_unlock(); + hfs_chash_unlock(hfsmp); MALLOC_ZONE(ncp, struct cnode *, sizeof(struct cnode), M_HFSNODE, M_WAITOK); /* @@ -350,10 +368,11 @@ hfs_chash_getcnode(dev_t dev, ino_t inum, struct vnode **vpp, int wantrsrc, int */ goto loop; } + hfs_chash_lock_convert(hfsmp); + bzero(ncp, sizeof(struct cnode)); SET(ncp->c_hflag, H_ALLOC); ncp->c_fileid = inum; - ncp->c_dev = dev; TAILQ_INIT(&ncp->c_hintlist); /* make the list empty */ TAILQ_INIT(&ncp->c_originlist); @@ -362,8 +381,8 @@ hfs_chash_getcnode(dev_t dev, ino_t inum, struct vnode **vpp, int wantrsrc, int (void) hfs_lock(ncp, HFS_EXCLUSIVE_LOCK); /* Insert the new cnode with it's H_ALLOC flag set */ - LIST_INSERT_HEAD(CNODEHASH(dev, inum), ncp, c_hash); - hfs_chash_unlock(); + LIST_INSERT_HEAD(CNODEHASH(hfsmp, inum), ncp, c_hash); + hfs_chash_unlock(hfsmp); *vpp = NULL; return (ncp); @@ -372,9 +391,9 @@ hfs_chash_getcnode(dev_t dev, ino_t inum, struct vnode **vpp, int wantrsrc, int __private_extern__ void -hfs_chashwakeup(struct cnode *cp, int hflags) +hfs_chashwakeup(struct hfsmount *hfsmp, struct cnode *cp, int hflags) { - hfs_chash_lock(); + hfs_chash_lock_spin(hfsmp); CLR(cp->c_hflag, hflags); @@ -382,7 +401,7 @@ hfs_chashwakeup(struct cnode *cp, int hflags) CLR(cp->c_hflag, H_WAITING); wakeup((caddr_t)cp); } - hfs_chash_unlock(); + hfs_chash_unlock(hfsmp); } @@ -391,16 +410,16 @@ hfs_chashwakeup(struct cnode *cp, int hflags) */ __private_extern__ void -hfs_chash_rehash(struct cnode *cp1, struct cnode *cp2) +hfs_chash_rehash(struct hfsmount *hfsmp, struct cnode *cp1, struct cnode *cp2) { - hfs_chash_lock(); + hfs_chash_lock_spin(hfsmp); LIST_REMOVE(cp1, c_hash); LIST_REMOVE(cp2, c_hash); - LIST_INSERT_HEAD(CNODEHASH(cp1->c_dev, cp1->c_fileid), cp1, c_hash); - LIST_INSERT_HEAD(CNODEHASH(cp2->c_dev, cp2->c_fileid), cp2, c_hash); + LIST_INSERT_HEAD(CNODEHASH(hfsmp, cp1->c_fileid), cp1, c_hash); + LIST_INSERT_HEAD(CNODEHASH(hfsmp, cp2->c_fileid), cp2, c_hash); - hfs_chash_unlock(); + hfs_chash_unlock(hfsmp); } @@ -409,13 +428,13 @@ hfs_chash_rehash(struct cnode *cp1, struct cnode *cp2) */ __private_extern__ int -hfs_chashremove(struct cnode *cp) +hfs_chashremove(struct hfsmount *hfsmp, struct cnode *cp) { - hfs_chash_lock(); + hfs_chash_lock_spin(hfsmp); /* Check if a vnode is getting attached */ if (ISSET(cp->c_hflag, H_ATTACH)) { - hfs_chash_unlock(); + hfs_chash_unlock(hfsmp); return (EBUSY); } if (cp->c_hash.le_next || cp->c_hash.le_prev) { @@ -423,7 +442,8 @@ hfs_chashremove(struct cnode *cp) cp->c_hash.le_next = NULL; cp->c_hash.le_prev = NULL; } - hfs_chash_unlock(); + hfs_chash_unlock(hfsmp); + return (0); } @@ -432,9 +452,9 @@ hfs_chashremove(struct cnode *cp) */ __private_extern__ void -hfs_chash_abort(struct cnode *cp) +hfs_chash_abort(struct hfsmount *hfsmp, struct cnode *cp) { - hfs_chash_lock(); + hfs_chash_lock_spin(hfsmp); LIST_REMOVE(cp, c_hash); cp->c_hash.le_next = NULL; @@ -445,7 +465,7 @@ hfs_chash_abort(struct cnode *cp) CLR(cp->c_hflag, H_WAITING); wakeup((caddr_t)cp); } - hfs_chash_unlock(); + hfs_chash_unlock(hfsmp); } @@ -454,13 +474,13 @@ hfs_chash_abort(struct cnode *cp) */ __private_extern__ void -hfs_chash_mark_in_transit(struct cnode *cp) +hfs_chash_mark_in_transit(struct hfsmount *hfsmp, struct cnode *cp) { - hfs_chash_lock(); + hfs_chash_lock_spin(hfsmp); SET(cp->c_hflag, H_TRANSIT); - hfs_chash_unlock(); + hfs_chash_unlock(hfsmp); } /* Search a cnode in the hash. This function does not return cnode which @@ -470,12 +490,12 @@ hfs_chash_mark_in_transit(struct cnode *cp) */ static struct cnode * -hfs_chash_search_cnid(dev_t dev, cnid_t cnid) +hfs_chash_search_cnid(struct hfsmount *hfsmp, cnid_t cnid) { struct cnode *cp; - for (cp = CNODEHASH(dev, cnid)->lh_first; cp; cp = cp->c_hash.le_next) { - if ((cp->c_fileid == cnid) && (cp->c_dev == dev)) { + for (cp = CNODEHASH(hfsmp, cnid)->lh_first; cp; cp = cp->c_hash.le_next) { + if (cp->c_fileid == cnid) { break; } } @@ -500,13 +520,14 @@ hfs_chash_search_cnid(dev_t dev, cnid_t cnid) */ __private_extern__ int -hfs_chash_set_childlinkbit(dev_t dev, cnid_t cnid) +hfs_chash_set_childlinkbit(struct hfsmount *hfsmp, cnid_t cnid) { int retval = -1; struct cnode *cp; - hfs_chash_lock(); - cp = hfs_chash_search_cnid(dev, cnid); + hfs_chash_lock_spin(hfsmp); + + cp = hfs_chash_search_cnid(hfsmp, cnid); if (cp) { if (cp->c_attr.ca_recflags & kHFSHasChildLinkMask) { retval = 0; @@ -515,7 +536,7 @@ hfs_chash_set_childlinkbit(dev_t dev, cnid_t cnid) retval = 1; } } - hfs_chash_unlock(); + hfs_chash_unlock(hfsmp); return retval; } diff --git a/bsd/hfs/hfs_cnode.c b/bsd/hfs/hfs_cnode.c index b0ba0f1ae..7123f603f 100644 --- a/bsd/hfs/hfs_cnode.c +++ b/bsd/hfs/hfs_cnode.c @@ -92,8 +92,12 @@ hfs_vnop_inactive(struct vnop_inactive_args *ap) /* * Ignore nodes related to stale file handles. + * We are peeking at the cnode flag without the lock, but if C_NOEXISTS + * is set, that means the cnode doesn't have any backing store in the + * catalog anymore, and is otherwise safe to force a recycle */ - if (cp->c_mode == 0) { + + if (cp->c_flag & C_NOEXISTS) { vnode_recycle(vp); return (0); } @@ -105,16 +109,22 @@ hfs_vnop_inactive(struct vnop_inactive_args *ap) (void) hfs_lock(cp, HFS_FORCE_LOCK); + if (cp->c_datafork) + ++forkcount; + if (cp->c_rsrcfork) + ++forkcount; + /* * We should lock cnode before checking the flags in the * condition below and should unlock the cnode before calling * ubc_setsize() as cluster code can call other HFS vnops which * will try to acquire the same cnode lock and cause deadlock. + * Only call ubc_setsize to 0 if we are the last fork. */ if ((v_type == VREG || v_type == VLNK) && - (cp->c_flag & C_DELETED) && - (VTOF(vp)->ff_blocks != 0)) { - hfs_unlock(cp); + (cp->c_flag & C_DELETED) && + (VTOF(vp)->ff_blocks != 0) && (forkcount == 1)) { + hfs_unlock(cp); ubc_setsize(vp, 0); (void) hfs_lock(cp, HFS_FORCE_LOCK); } @@ -128,34 +138,59 @@ hfs_vnop_inactive(struct vnop_inactive_args *ap) if (v_type == VDIR) { hfs_reldirhints(cp, 0); } - if (cp->c_flag & C_HARDLINK) { hfs_relorigins(cp); } - if (cp->c_datafork) - ++forkcount; - if (cp->c_rsrcfork) - ++forkcount; + /* Hurry the recycling process along if we're an open-unlinked file */ + if((v_type == VREG || v_type == VLNK) && (cp->c_flag & C_DELETED)) { + recycle = 1; + } + + /* + * This check is slightly complicated. We should only truncate data + * in very specific cases for open-unlinked files. This is because + * we want to ensure that the resource fork continues to be available + * if the caller has the data fork open. However, this is not symmetric; + * someone who has the resource fork open need not be able to access the data + * fork once the data fork has gone inactive. + * + * If we're the last fork, then we have cleaning up to do. + * + * A) last fork, and vp == c_vp + * Truncate away own fork dat. If rsrc fork is not in core, truncate it too. + * + * B) last fork, and vp == c_rsrc_vp + * Truncate ourselves, assume data fork has been cleaned due to C). + * + * If we're not the last fork, then things are a little different: + * + * C) not the last fork, vp == c_vp + * Truncate ourselves. Once the file has gone out of the namespace, + * it cannot be further opened. Further access to the rsrc fork may + * continue, however. + * + * D) not the last fork, vp == c_rsrc_vp + * Don't enter the block below, just clean up vnode and push it out of core. + */ - /* If needed, get rid of any fork's data for a deleted file */ - if ((v_type == VREG || v_type == VLNK) && (cp->c_flag & C_DELETED)) { + if ((v_type == VREG || v_type == VLNK) && (cp->c_flag & C_DELETED) && + ((forkcount == 1) || (!VNODE_IS_RSRC(vp)))) { if (VTOF(vp)->ff_blocks != 0) { /* * Since we're already inside a transaction, * tell hfs_truncate to skip the ubc_setsize. */ - error = hfs_truncate(vp, (off_t)0, IO_NDELAY, 1, ap->a_context); + error = hfs_truncate(vp, (off_t)0, IO_NDELAY, 1, 0, ap->a_context); if (error) goto out; truncated = 1; } - recycle = 1; - /* - * Check if there's any resource fork blocks that need to - * be reclaimed. This covers the case where there is a - * resource fork but its not in core. + /* + * If c_blocks > 0 and we are the last fork (data fork), then + * we can go and and truncate away the rsrc fork blocks if + * they were not in core. */ if ((cp->c_blocks > 0) && (forkcount == 1) && (vp != cp->c_rsrc_vp)) { struct vnode *rvp = NULLVP; @@ -167,20 +202,21 @@ hfs_vnop_inactive(struct vnop_inactive_args *ap) * Defer the vnode_put and ubc_setsize on rvp until hfs_unlock(). */ cp->c_flag |= C_NEED_RVNODE_PUT | C_NEED_RSRC_SETSIZE; - error = hfs_truncate(rvp, (off_t)0, IO_NDELAY, 1, ap->a_context); + error = hfs_truncate(rvp, (off_t)0, IO_NDELAY, 1, 0, ap->a_context); if (error) goto out; vnode_recycle(rvp); /* all done with this vnode */ } } - // If needed, get rid of any xattrs that this file may have. + // If needed, get rid of any xattrs that this file (or directory) may have. // Note that this must happen outside of any other transactions // because it starts/ends its own transactions and grabs its // own locks. This is to prevent a file with a lot of attributes // from creating a transaction that is too large (which panics). // - if ((cp->c_attr.ca_recflags & kHFSHasAttributesMask) != 0 && (cp->c_flag & C_DELETED)) { + if ((cp->c_attr.ca_recflags & kHFSHasAttributesMask) != 0 && + (cp->c_flag & C_DELETED) && (forkcount <= 1)) { hfs_removeallattr(hfsmp, cp->c_fileid); } @@ -193,12 +229,12 @@ hfs_vnop_inactive(struct vnop_inactive_args *ap) * Mark cnode in transit so that no one can get this * cnode from cnode hash. */ - // hfs_chash_mark_in_transit(cp); + // hfs_chash_mark_in_transit(hfsmp, cp); // XXXdbg - remove the cnode from the hash table since it's deleted // otherwise someone could go to sleep on the cnode and not // be woken up until this vnode gets recycled which could be // a very long time... - hfs_chashremove(cp); + hfs_chashremove(hfsmp, cp); cp->c_flag |= C_NOEXISTS; // XXXdbg cp->c_rdev = 0; @@ -264,8 +300,8 @@ hfs_vnop_inactive(struct vnop_inactive_args *ap) if (hfsmp->hfs_flags & HFS_QUOTAS) (void)hfs_chkiq(cp, -1, NOCRED, 0); #endif /* QUOTA */ - - cp->c_mode = 0; + + /* Already set C_NOEXISTS at the beginning of this block */ cp->c_flag &= ~C_DELETED; cp->c_touch_chgtime = TRUE; cp->c_touch_modtime = TRUE; @@ -282,9 +318,9 @@ hfs_vnop_inactive(struct vnop_inactive_args *ap) */ if ((cp->c_flag & C_MODIFIED) || cp->c_touch_acctime || cp->c_touch_chgtime || cp->c_touch_modtime) { - if ((cp->c_flag & C_MODIFIED) || cp->c_touch_modtime){ + if ((cp->c_flag & C_MODIFIED) || cp->c_touch_modtime){ cp->c_flag |= C_FORCEUPDATE; - } + } hfs_update(vp, 0); } out: @@ -296,6 +332,13 @@ hfs_vnop_inactive(struct vnop_inactive_args *ap) hfs_end_transaction(hfsmp); started_tr = 0; } + /* + * This has been removed from the namespace and has no backing store + * in the catalog, so we should force a reclaim as soon as possible. + * Also, we want to check the flag while we still have the cnode lock. + */ + if (cp->c_flag & C_NOEXISTS) + recycle = 1; hfs_unlock(cp); @@ -306,7 +349,7 @@ hfs_vnop_inactive(struct vnop_inactive_args *ap) * If we are done with the vnode, reclaim it * so that it can be reused immediately. */ - if (cp->c_mode == 0 || recycle) + if (recycle) vnode_recycle(vp); return (error); @@ -321,8 +364,9 @@ hfs_filedone(struct vnode *vp, vfs_context_t context) struct cnode *cp; struct filefork *fp; struct hfsmount *hfsmp; + struct rl_entry *invalid_range; off_t leof; - u_long blks, blocksize; + u_int32_t blks, blocksize; cp = VTOC(vp); fp = VTOF(vp); @@ -340,8 +384,7 @@ hfs_filedone(struct vnode *vp, vfs_context_t context) * Explicitly zero out the areas of file * that are currently marked invalid. */ - while (!CIRCLEQ_EMPTY(&fp->ff_invalidranges)) { - struct rl_entry *invalid_range = CIRCLEQ_FIRST(&fp->ff_invalidranges); + while ((invalid_range = TAILQ_FIRST(&fp->ff_invalidranges))) { off_t start = invalid_range->rl_start; off_t end = invalid_range->rl_end; @@ -368,7 +411,7 @@ hfs_filedone(struct vnode *vp, vfs_context_t context) * Shrink the peof to the smallest size neccessary to contain the leof. */ if (blks < fp->ff_blocks) - (void) hfs_truncate(vp, leof, IO_NDELAY, 0, context); + (void) hfs_truncate(vp, leof, IO_NDELAY, 0, 0, context); hfs_unlock(cp); (void) cluster_push(vp, IO_CLOSE); hfs_lock(cp, HFS_FORCE_LOCK); @@ -396,28 +439,12 @@ hfs_vnop_reclaim(struct vnop_reclaim_args *ap) struct cnode *cp; struct filefork *fp = NULL; struct filefork *altfp = NULL; + struct hfsmount *hfsmp = VTOHFS(vp); int reclaim_cnode = 0; (void) hfs_lock(VTOC(vp), HFS_FORCE_LOCK); cp = VTOC(vp); - - /* - * Check if a deleted resource fork vnode missed a - * VNOP_INACTIVE call and requires truncation. - */ - if (VNODE_IS_RSRC(vp) && - (cp->c_flag & C_DELETED) && - (VTOF(vp)->ff_blocks != 0)) { - hfs_unlock(cp); - ubc_setsize(vp, 0); - - hfs_lock_truncate(cp, TRUE); - (void) hfs_lock(VTOC(vp), HFS_FORCE_LOCK); - - (void) hfs_truncate(vp, (off_t)0, IO_NDELAY, 1, ap->a_context); - - hfs_unlock_truncate(cp, TRUE); - } + /* * A file may have had delayed allocations, in which case hfs_update * would not have updated the catalog record (cat_update). We need @@ -425,10 +452,10 @@ hfs_vnop_reclaim(struct vnop_reclaim_args *ap) * force the update, or hfs_update will again skip the cat_update. */ if ((cp->c_flag & C_MODIFIED) || - cp->c_touch_acctime || cp->c_touch_chgtime || cp->c_touch_modtime) { - if ((cp->c_flag & C_MODIFIED) || cp->c_touch_modtime){ + cp->c_touch_acctime || cp->c_touch_chgtime || cp->c_touch_modtime) { + if ((cp->c_flag & C_MODIFIED) || cp->c_touch_modtime){ cp->c_flag |= C_FORCEUPDATE; - } + } hfs_update(vp, 0); } @@ -459,14 +486,14 @@ hfs_vnop_reclaim(struct vnop_reclaim_args *ap) cp->c_rsrcfork = NULL; cp->c_rsrc_vp = NULL; } else { - panic("hfs_vnop_reclaim: vp points to wrong cnode\n"); + panic("hfs_vnop_reclaim: vp points to wrong cnode (vp=%p cp->c_vp=%p cp->c_rsrc_vp=%p)\n", vp, cp->c_vp, cp->c_rsrc_vp); } /* * On the last fork, remove the cnode from its hash chain. */ if (altfp == NULL) { /* If we can't remove it then the cnode must persist! */ - if (hfs_chashremove(cp) == 0) + if (hfs_chashremove(hfsmp, cp) == 0) reclaim_cnode = 1; /* * Remove any directory hints @@ -474,11 +501,10 @@ hfs_vnop_reclaim(struct vnop_reclaim_args *ap) if (vnode_isdir(vp)) { hfs_reldirhints(cp, 0); } - - if (cp->c_flag & C_HARDLINK) { + + if(cp->c_flag & C_HARDLINK) { hfs_relorigins(cp); } - } /* Release the file fork and related data */ if (fp) { @@ -493,7 +519,7 @@ hfs_vnop_reclaim(struct vnop_reclaim_args *ap) * If there was only one active fork then we can release the cnode. */ if (reclaim_cnode) { - hfs_chashwakeup(cp, H_ALLOC | H_TRANSIT); + hfs_chashwakeup(hfsmp, cp, H_ALLOC | H_TRANSIT); hfs_reclaim_cnode(cp); } else /* cnode in use */ { hfs_unlock(cp); @@ -505,6 +531,7 @@ hfs_vnop_reclaim(struct vnop_reclaim_args *ap) extern int (**hfs_vnodeop_p) (void *); +extern int (**hfs_std_vnodeop_p) (void *); extern int (**hfs_specop_p) (void *); #if FIFO extern int (**hfs_fifoop_p) (void *); @@ -533,6 +560,7 @@ hfs_getnewvnode( struct vnode *tvp = NULLVP; struct cnode *cp = NULL; struct filefork *fp = NULL; + int hfs_standard = 0; int retval; int issystemfile; int wantrsrc; @@ -542,6 +570,8 @@ hfs_getnewvnode( int i; #endif /* QUOTA */ + hfs_standard = (hfsmp->hfs_flags & HFS_STANDARD); + if (attrp->ca_fileid == 0) { *vpp = NULL; return (ENOENT); @@ -573,7 +603,7 @@ hfs_getnewvnode( /* * Get a cnode (new or existing) */ - cp = hfs_chash_getcnode(hfsmp->hfs_raw_dev, attrp->ca_fileid, vpp, wantrsrc, (flags & GNV_SKIPLOCK)); + cp = hfs_chash_getcnode(hfsmp, attrp->ca_fileid, vpp, wantrsrc, (flags & GNV_SKIPLOCK)); /* * If the id is no longer valid for lookups we'll get back a NULL cp. @@ -595,11 +625,14 @@ hfs_getnewvnode( */ if (ISSET(cp->c_hflag, H_ALLOC)) { lck_rw_init(&cp->c_truncatelock, hfs_rwlock_group, hfs_lock_attr); +#if HFS_COMPRESSION + cp->c_decmp = NULL; +#endif /* Make sure its still valid (ie exists on disk). */ if (!(flags & GNV_CREATE) && !hfs_valid_cnode(hfsmp, dvp, (wantrsrc ? NULL : cnp), cp->c_fileid)) { - hfs_chash_abort(cp); + hfs_chash_abort(hfsmp, cp); hfs_reclaim_cnode(cp); *vpp = NULL; return (ENOENT); @@ -730,16 +763,24 @@ hfs_getnewvnode( vfsp.vnfs_cnp = cnp; } vfsp.vnfs_fsnode = cp; + + /* + * Special Case HFS Standard VNOPs from HFS+, since + * HFS standard is readonly/deprecated as of 10.6 + */ + #if FIFO - if (vtype == VFIFO ) + if (vtype == VFIFO ) vfsp.vnfs_vops = hfs_fifoop_p; else #endif if (vtype == VBLK || vtype == VCHR) vfsp.vnfs_vops = hfs_specop_p; - else + else if (hfs_standard) + vfsp.vnfs_vops = hfs_std_vnodeop_p; + else vfsp.vnfs_vops = hfs_vnodeop_p; - + if (vtype == VBLK || vtype == VCHR) vfsp.vnfs_rdev = attrp->ca_rdev; else @@ -777,11 +818,11 @@ hfs_getnewvnode( * occurred during the attachment, then cleanup the cnode. */ if ((cp->c_vp == NULL) && (cp->c_rsrc_vp == NULL)) { - hfs_chash_abort(cp); + hfs_chash_abort(hfsmp, cp); hfs_reclaim_cnode(cp); } else { - hfs_chashwakeup(cp, H_ALLOC | H_ATTACH); + hfs_chashwakeup(hfsmp, cp, H_ALLOC | H_ATTACH); if ((flags & GNV_SKIPLOCK) == 0){ hfs_unlock(cp); } @@ -800,11 +841,16 @@ hfs_getnewvnode( * have the chance to process the resource fork. */ if (VNODE_IS_RSRC(vp)) { + int err; + KERNEL_DEBUG_CONSTANT((FSDBG_CODE(DBG_FSRW, 37)), cp->c_vp, cp->c_rsrc_vp, 0, 0, 0); + /* Force VL_NEEDINACTIVE on this vnode */ - vnode_ref(vp); - vnode_rele(vp); + err = vnode_ref(vp); + if (err == 0) { + vnode_rele(vp); + } } - hfs_chashwakeup(cp, H_ALLOC | H_ATTACH); + hfs_chashwakeup(hfsmp, cp, H_ALLOC | H_ATTACH); /* * Stop tracking an active hot file. @@ -847,6 +893,12 @@ hfs_reclaim_cnode(struct cnode *cp) lck_rw_destroy(&cp->c_rwlock, hfs_rwlock_group); lck_rw_destroy(&cp->c_truncatelock, hfs_rwlock_group); +#if HFS_COMPRESSION + if (cp->c_decmp) { + decmpfs_cnode_destroy(cp->c_decmp); + FREE_ZONE(cp->c_decmp, sizeof(*(cp->c_decmp)), M_DECMPFS_CNODE); + } +#endif bzero(cp, sizeof(struct cnode)); FREE_ZONE(cp, sizeof(struct cnode), M_HFSNODE); } @@ -917,11 +969,13 @@ hfs_touchtimes(struct hfsmount *hfsmp, struct cnode* cp) * . MNT_NOATIME is set * . a file system freeze is in progress * . a file system resize is in progress + * . the vnode associated with this cnode is marked for rapid aging */ if (cp->c_touch_acctime) { if ((vfs_flags(hfsmp->hfs_mp) & MNT_NOATIME) || (hfsmp->hfs_freezing_proc != NULL) || - (hfsmp->hfs_flags & HFS_RESIZE_IN_PROGRESS)) + (hfsmp->hfs_flags & HFS_RESIZE_IN_PROGRESS) || + (cp->c_vp && vnode_israge(cp->c_vp))) cp->c_touch_acctime = FALSE; } if (cp->c_touch_acctime || cp->c_touch_chgtime || cp->c_touch_modtime) { @@ -1121,7 +1175,7 @@ hfs_isordered(struct cnode *cp1, struct cnode *cp2) __private_extern__ int hfs_lockfour(struct cnode *cp1, struct cnode *cp2, struct cnode *cp3, - struct cnode *cp4, enum hfslocktype locktype) + struct cnode *cp4, enum hfslocktype locktype, struct cnode **error_cnode) { struct cnode * a[3]; struct cnode * b[3]; @@ -1129,6 +1183,9 @@ hfs_lockfour(struct cnode *cp1, struct cnode *cp2, struct cnode *cp3, struct cnode * tmp; int i, j, k; int error; + if (error_cnode) { + *error_cnode = NULL; + } if (hfs_isordered(cp1, cp2)) { a[0] = cp1; a[1] = cp2; @@ -1159,6 +1216,10 @@ hfs_lockfour(struct cnode *cp1, struct cnode *cp2, struct cnode *cp3, for (i = 0; i < k; ++i) { if (list[i]) if ((error = hfs_lock(list[i], locktype))) { + /* Only stuff error_cnode if requested */ + if (error_cnode) { + *error_cnode = list[i]; + } /* Drop any locks we acquired. */ while (--i >= 0) { if (list[i]) diff --git a/bsd/hfs/hfs_cnode.h b/bsd/hfs/hfs_cnode.h index c4a930d16..27c1b9a55 100644 --- a/bsd/hfs/hfs_cnode.h +++ b/bsd/hfs/hfs_cnode.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2002-2007 Apple Inc. All rights reserved. + * Copyright (c) 2002-2008 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -42,7 +42,9 @@ #include #include - +#if HFS_COMPRESSION +#include +#endif /* * The filefork is used to represent an HFS file fork (data or resource). @@ -109,9 +111,7 @@ struct cnode { u_int32_t c_hflag; /* cnode's flags for maintaining hash - protected by global hash lock */ struct vnode *c_vp; /* vnode for data fork or dir */ struct vnode *c_rsrc_vp; /* vnode for resource fork */ - dev_t c_dev; /* cnode's device */ - struct dquot *c_dquot[MAXQUOTAS]; /* cnode's quota info */ - struct klist c_knotes; /* knotes attached to this vnode */ + struct dquot *c_dquot[MAXQUOTAS]; /* cnode's quota info */ u_int32_t c_childhint; /* catalog hint for children (small dirs only) */ u_int32_t c_dirthreadhint; /* catalog hint for directory's thread rec */ struct cat_desc c_desc; /* cnode's descriptor */ @@ -129,6 +129,9 @@ struct cnode { atomicflag_t c_touch_acctime; atomicflag_t c_touch_chgtime; atomicflag_t c_touch_modtime; +#if HFS_COMPRESSION + decmpfs_cnode *c_decmp; +#endif /* HFS_COMPRESSION */ }; typedef struct cnode cnode_t; @@ -185,6 +188,7 @@ typedef struct cnode cnode_t; #define C_NEED_DATA_SETSIZE 0x01000 /* Do a ubc_setsize(0) on c_rsrc_vp after the unlock */ #define C_NEED_RSRC_SETSIZE 0x02000 /* Do a ubc_setsize(0) on c_vp after the unlock */ #define C_DIR_MODIFICATION 0x04000 /* Directory is being modified, wait for lookups */ +#define C_ALWAYS_ZEROFILL 0x08000 /* Always zero-fill the file on an fsync */ #define ZFTIMELIMIT (5 * 60) @@ -231,6 +235,18 @@ enum { kFinderInvisibleMask = 1 << 14 }; #define VNODE_IS_RSRC(vp) ((vp) == VTOC((vp))->c_rsrc_vp) +#if HFS_COMPRESSION +/* + * VTOCMP(vp) returns a pointer to vp's decmpfs_cnode; this could be NULL + * if the file is not compressed or if hfs_file_is_compressed() hasn't + * yet been called on this file. + */ +#define VTOCMP(vp) (VTOC((vp))->c_decmp) +int hfs_file_is_compressed(struct cnode *cp, int skiplock); +int hfs_uncompressed_size_of_compressed_file(struct hfsmount *hfsmp, struct vnode *vp, cnid_t fid, off_t *size, int skiplock); +int hfs_hides_rsrc(vfs_context_t ctx, struct cnode *cp, int skiplock); +int hfs_hides_xattr(vfs_context_t ctx, struct cnode *cp, const char *name, int skiplock); +#endif #define ATIME_ONDISK_ACCURACY 300 @@ -260,21 +276,21 @@ extern void hfs_touchtimes(struct hfsmount *, struct cnode *); * HFS cnode hash functions. */ extern void hfs_chashinit(void); -extern void hfs_chashinit_finish(void); -extern void hfs_chashinsert(struct cnode *cp); -extern int hfs_chashremove(struct cnode *cp); -extern void hfs_chash_abort(struct cnode *cp); -extern void hfs_chash_rehash(struct cnode *cp1, struct cnode *cp2); -extern void hfs_chashwakeup(struct cnode *cp, int flags); -extern void hfs_chash_mark_in_transit(struct cnode *cp); - -extern struct vnode * hfs_chash_getvnode(dev_t dev, ino_t inum, int wantrsrc, int skiplock); -extern struct cnode * hfs_chash_getcnode(dev_t dev, ino_t inum, struct vnode **vpp, int wantrsrc, int skiplock); -extern int hfs_chash_snoop(dev_t, ino_t, int (*)(const struct cat_desc *, +extern void hfs_chashinit_finish(struct hfsmount *hfsmp); +extern void hfs_delete_chash(struct hfsmount *hfsmp); +extern int hfs_chashremove(struct hfsmount *hfsmp, struct cnode *cp); +extern void hfs_chash_abort(struct hfsmount *hfsmp, struct cnode *cp); +extern void hfs_chash_rehash(struct hfsmount *hfsmp, struct cnode *cp1, struct cnode *cp2); +extern void hfs_chashwakeup(struct hfsmount *hfsmp, struct cnode *cp, int flags); +extern void hfs_chash_mark_in_transit(struct hfsmount *hfsmp, struct cnode *cp); + +extern struct vnode * hfs_chash_getvnode(struct hfsmount *hfsmp, ino_t inum, int wantrsrc, int skiplock); +extern struct cnode * hfs_chash_getcnode(struct hfsmount *hfsmp, ino_t inum, struct vnode **vpp, int wantrsrc, int skiplock); +extern int hfs_chash_snoop(struct hfsmount *, ino_t, int (*)(const struct cat_desc *, const struct cat_attr *, void *), void *); extern int hfs_valid_cnode(struct hfsmount *hfsmp, struct vnode *dvp, struct componentname *cnp, cnid_t cnid); -extern int hfs_chash_set_childlinkbit(dev_t dev, cnid_t cnid); +extern int hfs_chash_set_childlinkbit(struct hfsmount *hfsmp, cnid_t cnid); /* * HFS cnode lock functions. @@ -299,7 +315,7 @@ enum hfslocktype {HFS_SHARED_LOCK = 1, HFS_EXCLUSIVE_LOCK = 2, HFS_FORCE_LOCK = extern int hfs_lock(struct cnode *, enum hfslocktype); extern int hfs_lockpair(struct cnode *, struct cnode *, enum hfslocktype); extern int hfs_lockfour(struct cnode *, struct cnode *, struct cnode *, struct cnode *, - enum hfslocktype); + enum hfslocktype, struct cnode **); extern void hfs_unlock(struct cnode *); extern void hfs_unlockpair(struct cnode *, struct cnode *); diff --git a/bsd/hfs/hfs_encodings.c b/bsd/hfs/hfs_encodings.c index c531aa28b..bc644b39d 100644 --- a/bsd/hfs/hfs_encodings.c +++ b/bsd/hfs/hfs_encodings.c @@ -35,6 +35,8 @@ #include #include #include +#include +#include #include "hfs.h" @@ -205,14 +207,13 @@ hfs_relconverter(u_int32_t encoding) /* if converter is no longer in use, release it */ if (encp->refcount <= 0 && encp->kmod_id != 0) { - int id = encp->kmod_id; + uint32_t loadTag = (uint32_t)encp->kmod_id; SLIST_REMOVE(&hfs_encoding_list, encp, hfs_encoding, link); lck_mtx_unlock(&encodinglst_mutex); FREE(encp, M_TEMP); - record_kext_unload(id); - kmod_destroy((host_priv_t) host_priv_self(), id); + (void)OSKextUnloadKextWithLoadTag(loadTag); return (0); } lck_mtx_unlock(&encodinglst_mutex); diff --git a/bsd/hfs/hfs_endian.c b/bsd/hfs/hfs_endian.c index db0f489d5..6f840045d 100644 --- a/bsd/hfs/hfs_endian.c +++ b/bsd/hfs/hfs_endian.c @@ -90,16 +90,16 @@ hfs_swap_BTNode ( BTNodeDescriptor *srcDesc = src->buffer; u_int16_t *srcOffs = NULL; BTreeControlBlockPtr btcb = (BTreeControlBlockPtr)VTOF(vp)->fcbBTCBPtr; - u_int32_t i; + u_int16_t i; /* index to match srcDesc->numRecords */ int error = 0; #ifdef ENDIAN_DEBUG if (direction == kSwapBTNodeBigToHost) { - printf ("BE -> Native Swap\n"); + printf ("hfs: BE -> Native Swap\n"); } else if (direction == kSwapBTNodeHostToBig) { - printf ("Native -> BE Swap\n"); + printf ("hfs: Native -> BE Swap\n"); } else if (direction == kSwapBTNodeHeaderRecordOnly) { - printf ("Not swapping descriptors\n"); + printf ("hfs: Not swapping descriptors\n"); } else { panic ("hfs_swap_BTNode: This is impossible"); } @@ -145,6 +145,7 @@ hfs_swap_BTNode ( goto fail; } + } /* @@ -380,7 +381,7 @@ hfs_swap_BTNode ( /* * Log some useful information about where the corrupt node is. */ - printf("node=%lld fileID=%u volume=%s device=%s\n", src->blockNum, VTOC(vp)->c_fileid, + printf("hfs: node=%lld fileID=%u volume=%s device=%s\n", src->blockNum, VTOC(vp)->c_fileid, VTOVCB(vp)->vcbVN, vfs_statfs(vnode_mount(vp))->f_mntfromname); hfs_mark_volume_inconsistent(VTOVCB(vp)); } @@ -489,7 +490,7 @@ hfs_swap_HFSPlusBTInternalNode ( * to be sure the current record doesn't overflow into the next * record. */ - nextRecord = (char *)src->buffer + srcOffs[i-1]; + nextRecord = (char *)src->buffer + (uintptr_t)(srcOffs[i-1]); /* * Make sure we can safely dereference the keyLength and parentID fields. diff --git a/bsd/hfs/hfs_format.h b/bsd/hfs/hfs_format.h index f0d60d8bb..151cadde7 100644 --- a/bsd/hfs/hfs_format.h +++ b/bsd/hfs/hfs_format.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2007 Apple Inc. All rights reserved. + * Copyright (c) 2000-2009 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -105,6 +105,13 @@ enum { #define FIRST_LINK_XATTR_NAME "com.apple.system.hfs.firstlink" #define FIRST_LINK_XATTR_REC_SIZE (sizeof(HFSPlusAttrData) - 2 + 12) +/* + * The name space ID for generating an HFS volume UUID + * + * B3E20F39-F292-11D6-97A4-00306543ECAC + */ +#define HFS_UUID_NAMESPACE_ID "\xB3\xE2\x0F\x39\xF2\x92\x11\xD6\x97\xA4\x00\x30\x65\x43\xEC\xAC" + #endif /* __APPLE_API_PRIVATE */ /* @@ -555,7 +562,7 @@ enum { /* HFS and HFS Plus volume attribute bits */ enum { - /* Bits 0-6 are reserved (always cleared by MountVol call) */ + /* Bits 0-6 are reserved (always cleared by MountVol call) */ kHFSVolumeHardwareLockBit = 7, /* volume is locked by hardware */ kHFSVolumeUnmountedBit = 8, /* volume was successfully unmounted */ kHFSVolumeSparedBlocksBit = 9, /* volume has bad blocks spared */ @@ -565,7 +572,12 @@ enum { kHFSVolumeJournaledBit = 13, /* this volume has a journal on it */ kHFSVolumeInconsistentBit = 14, /* serious inconsistencies detected at runtime */ kHFSVolumeSoftwareLockBit = 15, /* volume is locked by software */ - + /* + * HFS only has 16 bits of attributes in the MDB, but HFS Plus has 32 bits. + * Therefore, bits 16-31 can only be used on HFS Plus. + */ + kHFSUnusedNodeFixBit = 31, /* Unused nodes in the Catalog B-tree have been zero-filled. See Radar #6947811. */ + kHFSVolumeHardwareLockMask = 1 << kHFSVolumeHardwareLockBit, kHFSVolumeUnmountedMask = 1 << kHFSVolumeUnmountedBit, kHFSVolumeSparedBlocksMask = 1 << kHFSVolumeSparedBlocksBit, @@ -575,9 +587,13 @@ enum { kHFSVolumeJournaledMask = 1 << kHFSVolumeJournaledBit, kHFSVolumeInconsistentMask = 1 << kHFSVolumeInconsistentBit, kHFSVolumeSoftwareLockMask = 1 << kHFSVolumeSoftwareLockBit, + kHFSUnusedNodeFixMask = 1 << kHFSUnusedNodeFixBit, kHFSMDBAttributesMask = 0x8380 }; +enum { + kHFSUnusedNodesFixDate = 0xc5ef2480 /* March 25, 2009 */ +}; /* HFS Master Directory Block - 162 bytes */ /* Stored at sector #2 (3rd sector) and second-to-last sector. */ @@ -729,13 +745,26 @@ enum { kHFSBinaryCompare = 0xBC /* binary compare (case-sensitive) */ }; +#include + /* JournalInfoBlock - Structure that describes where our journal lives */ + +// the original size of the reserved field in the JournalInfoBlock was +// 32*sizeof(u_int32_t). To keep the total size of the structure the +// same we subtract the size of new fields (currently: ext_jnl_uuid and +// machine_uuid). If you add additional fields, place them before the +// reserved field and subtract their size in this macro. +// +#define JIB_RESERVED_SIZE ((32*sizeof(u_int32_t)) - sizeof(uuid_string_t) - 48) + struct JournalInfoBlock { u_int32_t flags; u_int32_t device_signature[8]; // signature used to locate our device. u_int64_t offset; // byte offset to the journal on the device u_int64_t size; // size in bytes of the journal - u_int32_t reserved[32]; + uuid_string_t ext_jnl_uuid; + char machine_serial_num[48]; + char reserved[JIB_RESERVED_SIZE]; } __attribute__((aligned(2), packed)); typedef struct JournalInfoBlock JournalInfoBlock; @@ -745,6 +774,13 @@ enum { kJIJournalNeedInitMask = 0x00000004 }; +// +// This the content type uuid for "external journal" GPT +// partitions. Each instance of a partition also has a +// uuid that uniquely identifies that instance. +// +#define EXTJNL_CONTENT_TYPE_UUID "4A6F7572-6E61-11AA-AA11-00306543ECAC" + #ifdef __cplusplus } diff --git a/bsd/hfs/hfs_fsctl.h b/bsd/hfs/hfs_fsctl.h index fffe15d26..7759e799a 100644 --- a/bsd/hfs/hfs_fsctl.h +++ b/bsd/hfs/hfs_fsctl.h @@ -48,6 +48,11 @@ struct hfs_backingstoreinfo { typedef char pathname_t[MAXPATHLEN]; +struct hfs_journal_info { + off_t jstart; + off_t jsize; +}; + /* HFS FS CONTROL COMMANDS */ @@ -73,7 +78,7 @@ typedef char pathname_t[MAXPATHLEN]; #define HFSIOC_CLRBACKINGSTOREINFO _IO('h', 8) #define HFS_CLRBACKINGSTOREINFO IOCBASECMD(HFSIOC_CLRBACKINGSTOREINFO) -#define HFSIOC_BULKACCESS _IOW('h', 9, struct access_t) +#define HFSIOC_BULKACCESS _IOW('h', 9, struct user32_access_t) #define HFS_BULKACCESS_FSCTL IOCBASECMD(HFSIOC_BULKACCESS) #define HFSIOC_SETACLSTATE _IOW('h', 10, int32_t) @@ -92,12 +97,30 @@ typedef char pathname_t[MAXPATHLEN]; #define HFSIOC_SET_XATTREXTENTS_STATE _IOW('h', 14, u_int32_t) #define HFS_SET_XATTREXTENTS_STATE IOCBASECMD(HFSIOC_SET_XATTREXTENTS_STATE) -#define HFSIOC_EXT_BULKACCESS _IOW('h', 15, struct ext_access_t) +#define HFSIOC_EXT_BULKACCESS _IOW('h', 15, struct user32_ext_access_t) #define HFS_EXT_BULKACCESS_FSCTL IOCBASECMD(HFSIOC_EXT_BULKACCESS) #define HFSIOC_MARK_BOOT_CORRUPT _IO('h', 16) #define HFS_MARK_BOOT_CORRUPT IOCBASECMD(HFSIOC_MARK_BOOT_CORRUPT) +#define HFSIOC_GET_JOURNAL_INFO _IOR('h', 17, struct hfs_journal_info) +#define HFS_FSCTL_GET_JOURNAL_INFO IOCBASECMD(HFSIOC_GET_JOURNAL_INFO) + +#define HFSIOC_SET_VERY_LOW_DISK _IOW('h', 20, u_int32_t) +#define HFS_FSCTL_SET_VERY_LOW_DISK IOCBASECMD(HFSIOC_SET_VERY_LOW_DISK) + +#define HFSIOC_SET_LOW_DISK _IOW('h', 21, u_int32_t) +#define HFS_FSCTL_SET_LOW_DISK IOCBASECMD(HFSIOC_SET_LOW_DISK) + +#define HFSIOC_SET_DESIRED_DISK _IOW('h', 22, u_int32_t) +#define HFS_FSCTL_SET_DESIRED_DISK IOCBASECMD(HFSIOC_SET_DESIRED_DISK) + +#define HFSIOC_SET_ALWAYS_ZEROFILL _IOW('h', 23, int32_t) +#define HFS_SET_ALWAYS_ZEROFILL IOCBASECMD(HFSIOC_SET_ALWAYS_ZEROFILL) + +#define HFSIOC_VOLUME_STATUS _IOR('h', 24, u_int32_t) +#define HFS_VOLUME_STATUS IOCBASECMD(HFSIOC_VOLUME_STATUS) + #endif /* __APPLE_API_UNSTABLE */ diff --git a/bsd/hfs/hfs_hotfiles.c b/bsd/hfs/hfs_hotfiles.c index 4b23e1b56..ce0fe4dcf 100644 --- a/bsd/hfs/hfs_hotfiles.c +++ b/bsd/hfs/hfs_hotfiles.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2003-2006 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2003-2008 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -214,7 +214,7 @@ hfs_recording_start(struct hfsmount *hfsmp) hfsmp->hfc_timebase = hfsmp->hfc_timeout - HFC_DEFAULT_DURATION; } #if HFC_VERBOSE - printf("Resume recording hot files on %s (%d secs left)\n", + printf("hfs: Resume recording hot files on %s (%d secs left)\n", hfsmp->vcbVN, SWAP_BE32 (hotfileinfo.timeleft)); #endif } else { @@ -240,7 +240,7 @@ hfs_recording_start(struct hfsmount *hfsmp) return (error); } #if HFC_VERBOSE - printf("HFS: begin recording hot files on %s\n", hfsmp->vcbVN); + printf("hfs: begin recording hot files on %s\n", hfsmp->vcbVN); #endif hfsmp->hfc_maxfiles = HFC_DEFAULT_FILE_COUNT; hfsmp->hfc_timeout = tv.tv_sec + HFC_DEFAULT_DURATION; @@ -264,6 +264,13 @@ hfs_recording_start(struct hfsmount *hfsmp) size = sizeof(hotfile_data_t) + (maxentries * sizeof(hotfile_entry_t)); MALLOC(hotdata, hotfile_data_t *, size, M_TEMP, M_WAITOK); + if (hotdata == NULL) { + hfsmp->hfc_recdata = NULL; + hfsmp->hfc_stage = HFC_IDLE; + wakeup((caddr_t)&hfsmp->hfc_stage); + return(ENOMEM); + } + bzero(hotdata, size); for (i = 1; i < maxentries ; i++) @@ -312,7 +319,7 @@ hfs_recording_stop(struct hfsmount *hfsmp) * then dump the sample data */ #if HFC_VERBOSE - printf("HFS: end of hot file recording on %s\n", hfsmp->vcbVN); + printf("hfs: end of hot file recording on %s\n", hfsmp->vcbVN); #endif hotdata = (hotfile_data_t *)hfsmp->hfc_recdata; if (hotdata == NULL) @@ -322,7 +329,7 @@ hfs_recording_stop(struct hfsmount *hfsmp) wakeup((caddr_t)&hfsmp->hfc_stage); #if HFC_VERBOSE - printf(" curentries: %d\n", hotdata->activefiles); + printf("hfs: curentries: %d\n", hotdata->activefiles); #endif /* * If no hot files recorded then we're done. @@ -407,9 +414,9 @@ hfs_recording_stop(struct hfsmount *hfsmp) out: #if HFC_VERBOSE if (newstage == HFC_EVICTION) - printf("HFS: evicting coldest files\n"); + printf("hfs: evicting coldest files\n"); else if (newstage == HFC_ADOPTION) - printf("HFS: adopting hotest files\n"); + printf("hfs: adopting hotest files\n"); #endif FREE(hotdata, M_TEMP); @@ -450,7 +457,7 @@ hfs_recording_suspend(struct hfsmount *hfsmp) hfsmp->hfc_stage = HFC_BUSY; #if HFC_VERBOSE - printf("HFS: suspend hot file recording on %s\n", hfsmp->vcbVN); + printf("hfs: suspend hot file recording on %s\n", hfsmp->vcbVN); #endif error = hfc_btree_open(hfsmp, &hfsmp->hfc_filevp); if (error) { @@ -513,7 +520,7 @@ hfs_recording_init(struct hfsmount *hfsmp) u_int32_t dataSize; HFSPlusCatalogFile *filep; BTScanState scanstate; - BTreeIterator * iterator; + BTreeIterator * iterator = NULL; FSBufferDescriptor record; HotFileKey * key; filefork_t * filefork; @@ -555,7 +562,7 @@ hfs_recording_init(struct hfsmount *hfsmp) error = hfc_btree_create(hfsmp, HFSTOVCB(hfsmp)->blockSize, HFC_DEFAULT_FILE_COUNT); if (error) { #if HFC_VERBOSE - printf("Error %d creating hot file b-tree on %s \n", error, hfsmp->vcbVN); + printf("hfs: Error %d creating hot file b-tree on %s \n", error, hfsmp->vcbVN); #endif return (error); } @@ -567,11 +574,17 @@ hfs_recording_init(struct hfsmount *hfsmp) error = hfc_btree_open(hfsmp, &hfsmp->hfc_filevp); if (error) { #if HFC_VERBOSE - printf("Error %d opening hot file b-tree on %s \n", error, hfsmp->vcbVN); + printf("hfs: Error %d opening hot file b-tree on %s \n", error, hfsmp->vcbVN); #endif return (error); } MALLOC(iterator, BTreeIterator *, sizeof(*iterator), M_TEMP, M_WAITOK); + if (iterator == NULL) { + error = ENOMEM; + (void) hfc_btree_close(hfsmp, hfsmp->hfc_filevp); + hfsmp->hfc_filevp = NULL; + goto out2; + } bzero(iterator, sizeof(*iterator)); key = (HotFileKey*) &iterator->key; key->keyLength = HFC_KEYLENGTH; @@ -580,7 +593,7 @@ hfs_recording_init(struct hfsmount *hfsmp) record.itemSize = sizeof(u_int32_t); record.itemCount = 1; #if HFC_VERBOSE - printf("Evaluating space for \"%s\" metadata zone...\n", HFSTOVCB(hfsmp)->vcbVN); + printf("hfs: Evaluating space for \"%s\" metadata zone...\n", HFSTOVCB(hfsmp)->vcbVN); #endif /* * Get ready to scan the Catalog file. @@ -678,13 +691,14 @@ hfs_recording_init(struct hfsmount *hfsmp) out0: hfs_end_transaction(hfsmp); #if HFC_VERBOSE - printf("%d files identified out of %d\n", inserted, filecount); + printf("hfs: %d files identified out of %d\n", inserted, filecount); #endif out1: (void) BTScanTerminate(&scanstate, &data, &data, &data); out2: - FREE(iterator, M_TEMP); + if (iterator) + FREE(iterator, M_TEMP); if (hfsmp->hfc_filevp) { (void) hfc_btree_close(hfsmp, hfsmp->hfc_filevp); hfsmp->hfc_filevp = NULL; @@ -938,7 +952,7 @@ update_callback(const HotFileKey *key, u_int32_t *data, u_int32_t *state) static int hotfiles_refine(struct hfsmount *hfsmp) { - BTreeIterator * iterator; + BTreeIterator * iterator = NULL; struct mount *mp; filefork_t * filefork; hotfilelist_t *listp; @@ -955,6 +969,10 @@ hotfiles_refine(struct hfsmount *hfsmp) mp = HFSTOVFS(hfsmp); MALLOC(iterator, BTreeIterator *, sizeof(*iterator), M_TEMP, M_WAITOK); + if (iterator == NULL) { + error = ENOMEM; + goto out; + } bzero(iterator, sizeof(*iterator)); key = (HotFileKey*) &iterator->key; @@ -992,7 +1010,7 @@ hotfiles_refine(struct hfsmount *hfsmp) (IterateCallBackProcPtr)update_callback, &listp->hfl_hotfile[i].hf_temperature); if (error) { - printf("hotfiles_refine: BTUpdateRecord failed %d (file %d)\n", error, key->fileID); + printf("hfs: hotfiles_refine: BTUpdateRecord failed %d (file %d)\n", error, key->fileID); error = MacToVFSError(error); // break; } @@ -1008,7 +1026,7 @@ hotfiles_refine(struct hfsmount *hfsmp) (void) BTSearchRecord(filefork, iterator, &record, NULL, iterator); error = BTDeleteRecord(filefork, iterator); if (error) { - printf("hotfiles_refine: BTDeleteRecord failed %d (file %d)\n", error, key->fileID); + printf("hfs: hotfiles_refine: BTDeleteRecord failed %d (file %d)\n", error, key->fileID); error = MacToVFSError(error); break; } @@ -1018,7 +1036,7 @@ hotfiles_refine(struct hfsmount *hfsmp) key->forkType = 0; error = BTInsertRecord(filefork, iterator, &record, record.itemSize); if (error) { - printf("hotfiles_refine: BTInsertRecord failed %d (file %d)\n", error, key->fileID); + printf("hfs: hotfiles_refine: BTInsertRecord failed %d (file %d)\n", error, key->fileID); error = MacToVFSError(error); break; } @@ -1037,7 +1055,8 @@ hotfiles_refine(struct hfsmount *hfsmp) out1: hfs_end_transaction(hfsmp); out: - FREE(iterator, M_TEMP); + if (iterator) + FREE(iterator, M_TEMP); return (error); } @@ -1049,7 +1068,7 @@ hotfiles_refine(struct hfsmount *hfsmp) static int hotfiles_adopt(struct hfsmount *hfsmp) { - BTreeIterator * iterator; + BTreeIterator * iterator = NULL; struct vnode *vp; filefork_t * filefork; hotfilelist_t *listp; @@ -1074,6 +1093,12 @@ hotfiles_adopt(struct hfsmount *hfsmp) return (EPERM); } + MALLOC(iterator, BTreeIterator *, sizeof(*iterator), M_TEMP, M_WAITOK); + if (iterator == NULL) { + hfs_unlock(VTOC(hfsmp->hfc_filevp)); + return (ENOMEM); + } + stage = hfsmp->hfc_stage; hfsmp->hfc_stage = HFC_BUSY; @@ -1082,7 +1107,6 @@ hotfiles_adopt(struct hfsmount *hfsmp) if (last > listp->hfl_count) last = listp->hfl_count; - MALLOC(iterator, BTreeIterator *, sizeof(*iterator), M_TEMP, M_WAITOK); bzero(iterator, sizeof(*iterator)); key = (HotFileKey*) &iterator->key; key->keyLength = HFC_KEYLENGTH; @@ -1114,7 +1138,7 @@ hotfiles_adopt(struct hfsmount *hfsmp) break; } if (!vnode_isreg(vp) && !vnode_islnk(vp)) { - printf("hotfiles_adopt: huh, not a file %d (%d)\n", listp->hfl_hotfile[i].hf_fileid, VTOC(vp)->c_cnid); + printf("hfs: hotfiles_adopt: huh, not a file %d (%d)\n", listp->hfl_hotfile[i].hf_fileid, VTOC(vp)->c_cnid); hfs_unlock(VTOC(vp)); vnode_put(vp); listp->hfl_hotfile[i].hf_temperature = 0; @@ -1176,7 +1200,7 @@ hotfiles_adopt(struct hfsmount *hfsmp) error = BTInsertRecord(filefork, iterator, &record, record.itemSize); if (error) { - printf("hotfiles_adopt: BTInsertRecord failed %d (fileid %d)\n", error, key->fileID); + printf("hfs: hotfiles_adopt: BTInsertRecord failed %d (fileid %d)\n", error, key->fileID); error = MacToVFSError(error); stage = HFC_IDLE; break; @@ -1190,7 +1214,7 @@ hotfiles_adopt(struct hfsmount *hfsmp) data = listp->hfl_hotfile[i].hf_temperature; error = BTInsertRecord(filefork, iterator, &record, record.itemSize); if (error) { - printf("hotfiles_adopt: BTInsertRecord failed %d (fileid %d)\n", error, key->fileID); + printf("hfs: hotfiles_adopt: BTInsertRecord failed %d (fileid %d)\n", error, key->fileID); error = MacToVFSError(error); stage = HFC_IDLE; break; @@ -1210,14 +1234,14 @@ hotfiles_adopt(struct hfsmount *hfsmp) } if (hfsmp->hfs_hotfile_freeblks <= 0) { #if HFC_VERBOSE - printf("hotfiles_adopt: free space exhausted (%d)\n", hfsmp->hfs_hotfile_freeblks); + printf("hfs: hotfiles_adopt: free space exhausted (%d)\n", hfsmp->hfs_hotfile_freeblks); #endif break; } } /* end for */ #if HFC_VERBOSE - printf("hotfiles_adopt: [%d] adopted %d blocks (%d left)\n", listp->hfl_next, blksmoved, listp->hfl_totalblocks); + printf("hfs: hotfiles_adopt: [%d] adopted %d blocks (%d left)\n", listp->hfl_next, blksmoved, listp->hfl_totalblocks); #endif /* Finish any outstanding transactions. */ if (startedtrans) { @@ -1229,8 +1253,8 @@ hotfiles_adopt(struct hfsmount *hfsmp) if ((listp->hfl_next >= listp->hfl_count) || (hfsmp->hfs_hotfile_freeblks <= 0)) { #if HFC_VERBOSE - printf("hotfiles_adopt: all done relocating %d files\n", listp->hfl_count); - printf("hotfiles_adopt: %d blocks free in hot file band\n", hfsmp->hfs_hotfile_freeblks); + printf("hfs: hotfiles_adopt: all done relocating %d files\n", listp->hfl_count); + printf("hfs: hotfiles_adopt: %d blocks free in hot file band\n", hfsmp->hfs_hotfile_freeblks); #endif stage = HFC_IDLE; } @@ -1253,7 +1277,7 @@ hotfiles_adopt(struct hfsmount *hfsmp) static int hotfiles_evict(struct hfsmount *hfsmp, vfs_context_t ctx) { - BTreeIterator * iterator; + BTreeIterator * iterator = NULL; struct vnode *vp; HotFileKey * key; filefork_t * filefork; @@ -1278,13 +1302,18 @@ hotfiles_evict(struct hfsmount *hfsmp, vfs_context_t ctx) return (EPERM); } + MALLOC(iterator, BTreeIterator *, sizeof(*iterator), M_TEMP, M_WAITOK); + if (iterator == NULL) { + hfs_unlock(VTOC(hfsmp->hfc_filevp)); + return (ENOMEM); + } + stage = hfsmp->hfc_stage; hfsmp->hfc_stage = HFC_BUSY; filesmoved = blksmoved = 0; bt_op = kBTreeFirstRecord; - MALLOC(iterator, BTreeIterator *, sizeof(*iterator), M_TEMP, M_WAITOK); bzero(iterator, sizeof(*iterator)); key = (HotFileKey*) &iterator->key; @@ -1299,20 +1328,20 @@ hotfiles_evict(struct hfsmount *hfsmp, vfs_context_t ctx) */ if (BTIterateRecord(filefork, bt_op, iterator, NULL, NULL) != 0) { #if HFC_VERBOSE - printf("hotfiles_evict: no more records\n"); + printf("hfs: hotfiles_evict: no more records\n"); #endif error = 0; stage = HFC_ADOPTION; break; } if (key->keyLength != HFC_KEYLENGTH) { - printf("hotfiles_evict: invalid key length %d\n", key->keyLength); + printf("hfs: hotfiles_evict: invalid key length %d\n", key->keyLength); error = EFTYPE; break; } if (key->temperature == HFC_LOOKUPTAG) { #if HFC_VERBOSE - printf("hotfiles_evict: ran into thread records\n"); + printf("hfs: hotfiles_evict: ran into thread records\n"); #endif error = 0; stage = HFC_ADOPTION; @@ -1326,13 +1355,13 @@ hotfiles_evict(struct hfsmount *hfsmp, vfs_context_t ctx) if (error == ENOENT) { goto delete; /* stale entry, go to next */ } else { - printf("hotfiles_evict: err %d getting file %d\n", + printf("hfs: hotfiles_evict: err %d getting file %d\n", error, key->fileID); } break; } if (!vnode_isreg(vp) && !vnode_islnk(vp)) { - printf("hotfiles_evict: huh, not a file %d\n", key->fileID); + printf("hfs: hotfiles_evict: huh, not a file %d\n", key->fileID); hfs_unlock(VTOC(vp)); vnode_put(vp); goto delete; /* invalid entry, go to next */ @@ -1349,7 +1378,7 @@ hotfiles_evict(struct hfsmount *hfsmp, vfs_context_t ctx) */ if (!hotextents(hfsmp, &VTOF(vp)->ff_extents[0])) { #if HFC_VERBOSE - printf("hotfiles_evict: file %d isn't hot!\n", key->fileID); + printf("hfs: hotfiles_evict: file %d isn't hot!\n", key->fileID); #endif hfs_unlock(VTOC(vp)); vnode_put(vp); @@ -1361,7 +1390,7 @@ hotfiles_evict(struct hfsmount *hfsmp, vfs_context_t ctx) */ error = hfs_relocate(vp, HFSTOVCB(hfsmp)->nextAllocation, vfs_context_ucred(ctx), vfs_context_proc(ctx)); if (error) { - printf("hotfiles_evict: err %d relocating file %d\n", error, key->fileID); + printf("hfs: hotfiles_evict: err %d relocating file %d\n", error, key->fileID); hfs_unlock(VTOC(vp)); vnode_put(vp); bt_op = kBTreeNextRecord; @@ -1417,7 +1446,7 @@ hotfiles_evict(struct hfsmount *hfsmp, vfs_context_t ctx) } /* end while */ #if HFC_VERBOSE - printf("hotfiles_evict: moved %d files (%d blks, %d to go)\n", filesmoved, blksmoved, listp->hfl_reclaimblks); + printf("hfs: hotfiles_evict: moved %d files (%d blks, %d to go)\n", filesmoved, blksmoved, listp->hfl_reclaimblks); #endif /* Finish any outstanding transactions. */ if (startedtrans) { @@ -1433,7 +1462,7 @@ hotfiles_evict(struct hfsmount *hfsmp, vfs_context_t ctx) if (listp->hfl_reclaimblks <= 0) { stage = HFC_ADOPTION; #if HFC_VERBOSE - printf("hotfiles_evict: %d blocks free in hot file band\n", hfsmp->hfs_hotfile_freeblks); + printf("hfs: hotfiles_evict: %d blocks free in hot file band\n", hfsmp->hfs_hotfile_freeblks); #endif } FREE(iterator, M_TEMP); @@ -1449,7 +1478,7 @@ static int hotfiles_age(struct hfsmount *hfsmp) { BTreeInfoRec btinfo; - BTreeIterator * iterator; + BTreeIterator * iterator = NULL; BTreeIterator * prev_iterator; FSBufferDescriptor record; FSBufferDescriptor prev_record; @@ -1467,6 +1496,10 @@ hotfiles_age(struct hfsmount *hfsmp) MALLOC(iterator, BTreeIterator *, 2 * sizeof(*iterator), M_TEMP, M_WAITOK); + if (iterator == NULL) { + error = ENOMEM; + goto out2; + } bzero(iterator, 2 * sizeof(*iterator)); key = (HotFileKey*) &iterator->key; @@ -1595,7 +1628,8 @@ hotfiles_age(struct hfsmount *hfsmp) out1: hfs_end_transaction(hfsmp); out2: - FREE(iterator, M_TEMP); + if (iterator) + FREE(iterator, M_TEMP); return (error); } @@ -1666,20 +1700,20 @@ hfc_btree_open(struct hfsmount *hfsmp, struct vnode **vpp) hfs_systemfile_unlock(hfsmp, lockflags); if (error) { - printf("hfc_btree_open: cat_lookup error %d\n", error); + printf("hfs: hfc_btree_open: cat_lookup error %d\n", error); return (error); } again: cdesc.cd_flags |= CD_ISMETA; error = hfs_getnewvnode(hfsmp, NULL, NULL, &cdesc, 0, &cattr, &cfork, &vp); if (error) { - printf("hfc_btree_open: hfs_getnewvnode error %d\n", error); + printf("hfs: hfc_btree_open: hfs_getnewvnode error %d\n", error); cat_releasedesc(&cdesc); return (error); } if (!vnode_issystem(vp)) { #if HFC_VERBOSE - printf("hfc_btree_open: file has UBC, try again\n"); + printf("hfs: hfc_btree_open: file has UBC, try again\n"); #endif hfs_unlock(VTOC(vp)); vnode_recycle(vp); @@ -1693,7 +1727,7 @@ hfc_btree_open(struct hfsmount *hfsmp, struct vnode **vpp) /* Open the B-tree file for writing... */ error = BTOpenPath(VTOF(vp), (KeyCompareProcPtr) hfc_comparekeys); if (error) { - printf("hfc_btree_open: BTOpenPath error %d\n", error); + printf("hfs: hfc_btree_open: BTOpenPath error %d\n", error); error = MacToVFSError(error); } @@ -1705,7 +1739,7 @@ hfc_btree_open(struct hfsmount *hfsmp, struct vnode **vpp) vnode_put(vp); if (!vnode_issystem(vp)) - panic("hfc_btree_open: not a system file (vp = %p)", vp); + panic("hfs: hfc_btree_open: not a system file (vp = %p)", vp); return (error); } @@ -1723,7 +1757,7 @@ hfc_btree_close(struct hfsmount *hfsmp, struct vnode *vp) if (hfsmp->jnl) { - journal_flush(hfsmp->jnl); + hfs_journal_flush(hfsmp); } if (vnode_get(vp) == 0) { @@ -1758,7 +1792,7 @@ hfc_btree_create(struct hfsmount *hfsmp, unsigned int nodesize, unsigned int ent int error; if (hfsmp->hfc_filevp) - panic("hfc_btree_create: hfc_filevp exists (vp = %p)", hfsmp->hfc_filevp); + panic("hfs: hfc_btree_create: hfc_filevp exists (vp = %p)", hfsmp->hfc_filevp); error = VFS_ROOT(HFSTOVFS(hfsmp), &dvp, ctx); if (error) { @@ -1783,7 +1817,7 @@ hfc_btree_create(struct hfsmount *hfsmp, unsigned int nodesize, unsigned int ent /* call ourselves directly, ignore the higher-level VFS file creation code */ error = VNOP_CREATE(dvp, &vp, &cname, &va, ctx); if (error) { - printf("HFS: error %d creating HFBT on %s\n", error, HFSTOVCB(hfsmp)->vcbVN); + printf("hfs: error %d creating HFBT on %s\n", error, HFSTOVCB(hfsmp)->vcbVN); goto out; } if (dvp) { @@ -1801,7 +1835,7 @@ hfc_btree_create(struct hfsmount *hfsmp, unsigned int nodesize, unsigned int ent goto out; } - printf("HFS: created HFBT on %s\n", HFSTOVCB(hfsmp)->vcbVN); + printf("hfs: created HFBT on %s\n", HFSTOVCB(hfsmp)->vcbVN); if (VTOF(vp)->ff_size < nodesize) { caddr_t buffer; @@ -1874,9 +1908,9 @@ hfc_btree_create(struct hfsmount *hfsmp, unsigned int nodesize, unsigned int ent index[(nodesize / 2) - 4] = SWAP_BE16 (offset); vnode_setnoflush(vp); - error = hfs_truncate(vp, (off_t)filesize, IO_NDELAY, 0, ctx); + error = hfs_truncate(vp, (off_t)filesize, IO_NDELAY, 0, 0, ctx); if (error) { - printf("HFS: error %d growing HFBT on %s\n", error, HFSTOVCB(hfsmp)->vcbVN); + printf("hfs: error %d growing HFBT on %s\n", error, HFSTOVCB(hfsmp)->vcbVN); goto out; } cp->c_flag |= C_ZFWANTSYNC; @@ -1886,7 +1920,7 @@ hfc_btree_create(struct hfsmount *hfsmp, unsigned int nodesize, unsigned int ent struct vnop_write_args args; uio_t auio; - auio = uio_create(1, 0, UIO_SYSSPACE32, UIO_WRITE); + auio = uio_create(1, 0, UIO_SYSSPACE, UIO_WRITE); uio_addiov(auio, (uintptr_t)buffer, nodesize); args.a_desc = &vnop_write_desc; @@ -1900,7 +1934,7 @@ hfc_btree_create(struct hfsmount *hfsmp, unsigned int nodesize, unsigned int ent error = hfs_vnop_write(&args); if (error) - printf("HFS: error %d writing HFBT on %s\n", error, HFSTOVCB(hfsmp)->vcbVN); + printf("hfs: error %d writing HFBT on %s\n", error, HFSTOVCB(hfsmp)->vcbVN); uio_free(auio); } @@ -2196,7 +2230,7 @@ hf_getsortedlist(hotfile_data_t * hotdata, hotfilelist_t *sortedlist) sortedlist->hfl_count = i; #if HFC_VERBOSE - printf("HFS: hf_getsortedlist returned %d entries\n", i); + printf("hfs: hf_getsortedlist returned %d entries\n", i); #endif } @@ -2219,7 +2253,7 @@ hf_printtree(hotfile_entry_t * root) { if (root) { hf_printtree(root->left); - printf("temperature: % 8d, fileid %d\n", root->temperature, root->fileid); + printf("hfs: temperature: % 8d, fileid %d\n", root->temperature, root->fileid); hf_printtree(root->right); } } diff --git a/bsd/hfs/hfs_link.c b/bsd/hfs/hfs_link.c index ba47918e3..878c9def0 100644 --- a/bsd/hfs/hfs_link.c +++ b/bsd/hfs/hfs_link.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 1999-2007 Apple Inc. All rights reserved. + * Copyright (c) 1999-2008 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -83,7 +83,7 @@ createindirectlink(struct hfsmount *hfsmp, u_int32_t linknum, struct cat_desc *d struct cat_attr attr; if (linknum == 0) { - printf("createindirectlink: linknum is zero!\n"); + printf("hfs: createindirectlink: linknum is zero!\n"); return (EINVAL); } @@ -377,6 +377,7 @@ hfs_vnop_link(struct vnop_link_args *ap) struct cnode *tdcp; struct cnode *fdcp = NULL; struct cat_desc todesc; + cnid_t parentcnid; int lockflags = 0; int intrans = 0; enum vtype v_type; @@ -424,7 +425,7 @@ hfs_vnop_link(struct vnop_link_args *ap) } /* Lock the cnodes. */ if (fdvp) { - if ((error = hfs_lockfour(VTOC(tdvp), VTOC(vp), VTOC(fdvp), NULL, HFS_EXCLUSIVE_LOCK))) { + if ((error = hfs_lockfour(VTOC(tdvp), VTOC(vp), VTOC(fdvp), NULL, HFS_EXCLUSIVE_LOCK, NULL))) { if (fdvp) { vnode_put(fdvp); } @@ -438,9 +439,11 @@ hfs_vnop_link(struct vnop_link_args *ap) } tdcp = VTOC(tdvp); cp = VTOC(vp); - - /* - * Make sure we don't race the src or dst parent directories with rmdir. + /* grab the parent CNID from originlist after grabbing cnode locks */ + parentcnid = hfs_currentparent(cp); + + /* + * Make sure we didn't race the src or dst parent directories with rmdir. * Note that we should only have a src parent directory cnode lock * if we're dealing with a directory hardlink here. */ @@ -450,13 +453,15 @@ hfs_vnop_link(struct vnop_link_args *ap) goto out; } } - + if (tdcp->c_flag & (C_NOEXISTS | C_DELETED)) { error = ENOENT; goto out; } - - /* Check src for errors: too many links, immutable, race with unlink */ + + /* Check the source for errors: + * too many links, immutable, race with unlink + */ if (cp->c_linkcount >= HFS_LINK_MAX) { error = EMLINK; goto out; @@ -525,9 +530,9 @@ hfs_vnop_link(struct vnop_link_args *ap) * - No ancestor of the new directory hard link (destination) * is a directory hard link. */ - if ((cp->c_parentcnid == tdcp->c_fileid) || + if ((parentcnid == tdcp->c_fileid) || (tdcp->c_fileid == kHFSRootFolderID) || - (cp->c_parentcnid == kHFSRootFolderID) || + (parentcnid == kHFSRootFolderID) || cat_check_link_ancestry(hfsmp, tdcp->c_fileid, cp->c_fileid)) { error = EPERM; /* abide by the rules, you did not */ goto out; @@ -569,7 +574,7 @@ hfs_vnop_link(struct vnop_link_args *ap) error = hfs_update(tdvp, 0); if (error && error != EIO && error != ENXIO) { - panic("hfs_vnop_link: error updating tdvp %p\n", tdvp); + panic("hfs_vnop_link: error %d updating tdvp %p\n", error, tdvp); } if ((v_type == VDIR) && @@ -581,7 +586,7 @@ hfs_vnop_link(struct vnop_link_args *ap) fdcp->c_flag |= C_FORCEUPDATE; error = hfs_update(fdvp, 0); if (error && error != EIO && error != ENXIO) { - panic("hfs_vnop_link: error updating fdvp %p\n", fdvp); + panic("hfs_vnop_link: error %d updating fdvp %p\n", error, fdvp); } /* Set kHFSHasChildLinkBit in the source hierarchy */ @@ -601,8 +606,6 @@ hfs_vnop_link(struct vnop_link_args *ap) panic("hfs_vnop_link: error %d updating vp @ %p\n", ret, vp); } - HFS_KNOTE(vp, NOTE_LINK); - HFS_KNOTE(tdvp, NOTE_WRITE); out: if (lockflags) { hfs_systemfile_unlock(hfsmp, lockflags); @@ -644,7 +647,6 @@ hfs_unlink(struct hfsmount *hfsmp, struct vnode *dvp, struct vnode *vp, struct c cnid_t nextlinkid; int lockflags = 0; int started_tr; - int rm_priv_file = 0; int error; if (hfsmp->hfs_flags & HFS_STANDARD) { @@ -822,10 +824,7 @@ hfs_unlink(struct hfsmount *hfsmp, struct vnode *dvp, struct vnode *vp, struct c /* Update file system stats. */ hfs_volupdate(hfsmp, VOL_RMFILE, (dcp->c_cnid == kHFSRootFolderID)); - /* The last link of a directory removed the inode. */ - if (rm_priv_file) { - hfs_volupdate(hfsmp, VOL_RMFILE, 0); - } + /* * All done with this cnode's descriptor... * @@ -835,8 +834,6 @@ hfs_unlink(struct hfsmount *hfsmp, struct vnode *dvp, struct vnode *vp, struct c */ cat_releasedesc(&cp->c_desc); - HFS_KNOTE(dvp, NOTE_WRITE); - HFS_KNOTE(vp, NOTE_DELETE); out: if (lockflags) { hfs_systemfile_unlock(hfsmp, lockflags); @@ -1025,7 +1022,6 @@ hfs_savelinkorigin(cnode_t *cp, cnid_t parentcnid) void * thread = current_thread(); int count = 0; int maxorigins = (S_ISDIR(cp->c_mode)) ? MAX_CACHED_ORIGINS : MAX_CACHED_FILE_ORIGINS; - /* * Look for an existing origin first. If not found, create/steal one. */ diff --git a/bsd/hfs/hfs_lookup.c b/bsd/hfs/hfs_lookup.c index 1e94b8fb8..c82e68cb4 100644 --- a/bsd/hfs/hfs_lookup.c +++ b/bsd/hfs/hfs_lookup.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 1999-2007 Apple Inc. All rights reserved. + * Copyright (c) 1999-2008 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -362,7 +362,7 @@ hfs_lookup(struct vnode *dvp, struct vnode **vpp, struct componentname *cnp, int } goto exit; } - + /* * Save the origin info for file and directory hardlinks. Directory hardlinks * need the origin for '..' lookups, and file hardlinks need it to ensure that @@ -370,7 +370,7 @@ hfs_lookup(struct vnode *dvp, struct vnode **vpp, struct componentname *cnp, int * We want to restrict saving the cache entries to LOOKUP namei operations, since * we're really doing this to protect getattr. */ - if ((cnp->cn_nameiop == LOOKUP) && (VTOC(tvp)->c_flag & C_HARDLINK)) { + if ((nameiop == LOOKUP) && (VTOC(tvp)->c_flag & C_HARDLINK)) { hfs_savelinkorigin(VTOC(tvp), VTOC(dvp)->c_fileid); } *cnode_locked = 1; @@ -379,7 +379,7 @@ hfs_lookup(struct vnode *dvp, struct vnode **vpp, struct componentname *cnp, int if (rsrc_warn) { if ((VTOC(tvp)->c_flag & C_WARNED_RSRC) == 0) { VTOC(tvp)->c_flag |= C_WARNED_RSRC; - printf("%.200s: file access by '/rsrc' was deprecated in 10.4\n", + printf("hfs: %.200s: file access by '/rsrc' was deprecated in 10.4\n", cnp->cn_nameptr); } } @@ -482,19 +482,23 @@ hfs_vnop_lookup(struct vnop_lookup_args *ap) desc.cd_cnid = 0; desc.cd_flags = S_ISDIR(cp->c_mode) ? CD_ISDIR : 0; + lockflags = hfs_systemfile_lock(VTOHFS(dvp), SFL_CATALOG, HFS_SHARED_LOCK); if (cat_lookup(VTOHFS(vp), &desc, 0, &desc, NULL, NULL, NULL) == 0) replace_desc(cp, &desc); hfs_systemfile_unlock(VTOHFS(dvp), lockflags); - } - - /* Save the lookup result in the origin list for future lookups, but - * only if it was through a LOOKUP nameiop - */ - if (cnp->cn_nameiop == LOOKUP) { - hfs_savelinkorigin(cp, dcp->c_fileid); - } + /* + * Save the origin info for file and directory hardlinks. Directory hardlinks + * need the origin for '..' lookups, and file hardlinks need it to ensure that + * competing lookups do not cause us to vend different hardlinks than the ones requested. + * We want to restrict saving the cache entries to LOOKUP namei operations, since + * we're really doing this to protect getattr. + */ + if (cnp->cn_nameiop == LOOKUP) { + hfs_savelinkorigin(cp, dcp->c_fileid); + } + } hfs_unlock(cp); } #if NAMEDRSRCFORK @@ -519,7 +523,7 @@ hfs_vnop_lookup(struct vnop_lookup_args *ap) hfs_lock(cp, HFS_FORCE_LOCK); if ((cp->c_flag & C_WARNED_RSRC) == 0) { cp->c_flag |= C_WARNED_RSRC; - printf("%.200s: file access by '/rsrc' was deprecated in 10.4\n", cnp->cn_nameptr); + printf("hfs: %.200s: file access by '/rsrc' was deprecated in 10.4\n", cnp->cn_nameptr); } hfs_unlock(cp); } diff --git a/bsd/hfs/hfs_macos_defs.h b/bsd/hfs/hfs_macos_defs.h index 7bc43b99f..f150e2905 100644 --- a/bsd/hfs/hfs_macos_defs.h +++ b/bsd/hfs/hfs_macos_defs.h @@ -159,7 +159,7 @@ enum { #endif /* !TYPE_BOOL */ -EXTERN_API( void ) DebugStr(ConstStr255Param debuggerMsg); +EXTERN_API( void ) DebugStr(const char * debuggerMsg); /********************************************************************************* diff --git a/bsd/hfs/hfs_notification.c b/bsd/hfs/hfs_notification.c index 5ea5825a6..517c8ecdc 100644 --- a/bsd/hfs/hfs_notification.c +++ b/bsd/hfs/hfs_notification.c @@ -55,21 +55,40 @@ void hfs_generate_volume_notifications(struct hfsmount *hfsmp) { fsid_t fsid; + u_int32_t freeblks, state=999; fsid.val[0] = (long)hfsmp->hfs_raw_dev; fsid.val[1] = (long)vfs_typenum(HFSTOVFS(hfsmp)); - if (hfsmp->hfs_notification_conditions & VQ_LOWDISK) { - /* Check to see whether the free space is back above the minimal level: */ - if (hfs_freeblks(hfsmp, 1) > hfsmp->hfs_freespace_notify_desiredlevel) { - hfsmp->hfs_notification_conditions &= ~VQ_LOWDISK; - vfs_event_signal(&fsid, hfsmp->hfs_notification_conditions, (intptr_t)NULL); + freeblks = hfs_freeblks(hfsmp, 1); + + if (freeblks < hfsmp->hfs_freespace_notify_dangerlimit) { + state = 2; + } else if (freeblks < hfsmp->hfs_freespace_notify_warninglimit) { + state = 1; + } else if (freeblks >= hfsmp->hfs_freespace_notify_desiredlevel) { + state = 0; + } + + if (state == 2 && !(hfsmp->hfs_notification_conditions & VQ_VERYLOWDISK)) { + hfsmp->hfs_notification_conditions |= (VQ_VERYLOWDISK|VQ_LOWDISK); + vfs_event_signal(&fsid, hfsmp->hfs_notification_conditions, (intptr_t)NULL); + } else if (state == 1) { + if (!(hfsmp->hfs_notification_conditions & VQ_LOWDISK)) { + hfsmp->hfs_notification_conditions |= VQ_LOWDISK; + vfs_event_signal(&fsid, hfsmp->hfs_notification_conditions, (intptr_t)NULL); + } else if (hfsmp->hfs_notification_conditions & VQ_VERYLOWDISK) { + hfsmp->hfs_notification_conditions &= ~VQ_VERYLOWDISK; + vfs_event_signal(&fsid, hfsmp->hfs_notification_conditions, (intptr_t)NULL); } - } else { - /* Check to see whether the free space fell below the requested limit: */ - if (hfs_freeblks(hfsmp, 1) < hfsmp->hfs_freespace_notify_warninglimit) { - hfsmp->hfs_notification_conditions |= VQ_LOWDISK; - vfs_event_signal(&fsid, hfsmp->hfs_notification_conditions, (intptr_t)NULL); + } else if (state == 0) { + if (hfsmp->hfs_notification_conditions & (VQ_LOWDISK|VQ_VERYLOWDISK)) { + hfsmp->hfs_notification_conditions &= ~(VQ_VERYLOWDISK|VQ_LOWDISK); + if (hfsmp->hfs_notification_conditions == 0) { + vfs_event_signal(&fsid, VQ_UPDATE, (intptr_t)NULL); + } else { + vfs_event_signal(&fsid, hfsmp->hfs_notification_conditions, (intptr_t)NULL); + } } - }; + } } diff --git a/bsd/hfs/hfs_quota.c b/bsd/hfs/hfs_quota.c index 56bc0e4dc..5c219f0d8 100644 --- a/bsd/hfs/hfs_quota.c +++ b/bsd/hfs/hfs_quota.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2002-2005 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2002-2008 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -227,7 +227,7 @@ hfs_chkdqchg(cp, change, cred, type) if ((dq->dq_flags & DQ_BLKS) == 0 && cp->c_uid == kauth_cred_getuid(cred)) { #if 0 - printf("\nwrite failed, %s disk limit reached\n", + printf("\nhfs: write failed, %s disk limit reached\n", quotatypes[type]); #endif dq->dq_flags |= DQ_BLKS; @@ -249,18 +249,18 @@ hfs_chkdqchg(cp, change, cred, type) VTOHFS(vp)->hfs_qfiles[type].qf_btime; #if 0 if (cp->c_uid == kauth_cred_getuid(cred)) - printf("\nwarning, %s %s\n", + printf("\nhfs: warning, %s %s\n", quotatypes[type], "disk quota exceeded"); #endif dqunlock(dq); return (0); } - if (tv.tv_sec > dq->dq_btime) { + if (tv.tv_sec > (time_t)dq->dq_btime) { if ((dq->dq_flags & DQ_BLKS) == 0 && cp->c_uid == kauth_cred_getuid(cred)) { #if 0 - printf("\nwrite failed, %s %s\n", + printf("\nhfs: write failed, %s %s\n", quotatypes[type], "disk quota exceeded for too long"); #endif @@ -282,7 +282,7 @@ hfs_chkdqchg(cp, change, cred, type) int hfs_chkiq(cp, change, cred, flags) register struct cnode *cp; - long change; + int32_t change; kauth_cred_t cred; int flags; { @@ -347,6 +347,66 @@ hfs_chkiq(cp, change, cred, flags) return (error); } + +/* + * Check to see if a change to a user's allocation should be permitted or not. + * Issue an error message if it should not be permitted. Return 0 if + * it should be allowed. + */ +int hfs_isiqchg_allowed(dq, hfsmp, change, cred, type, uid) + struct dquot* dq; + struct hfsmount* hfsmp; + int32_t change; + kauth_cred_t cred; + int type; + uid_t uid; +{ + u_int32_t ncurinodes; + + dqlock(dq); + + ncurinodes = dq->dq_curinodes + change; + /* + * If user would exceed their hard limit, disallow cnode allocation. + */ + if (ncurinodes >= dq->dq_ihardlimit && dq->dq_ihardlimit) { + if ((dq->dq_flags & DQ_INODS) == 0 && + uid == kauth_cred_getuid(cred)) { + dq->dq_flags |= DQ_INODS; + } + dqunlock(dq); + + return (EDQUOT); + } + /* + * If user is over their soft limit for too long, disallow cnode + * allocation. Reset time limit as they cross their soft limit. + */ + if (ncurinodes >= dq->dq_isoftlimit && dq->dq_isoftlimit) { + struct timeval tv; + + microuptime(&tv); + if (dq->dq_curinodes < dq->dq_isoftlimit) { + dq->dq_itime = tv.tv_sec + hfsmp->hfs_qfiles[type].qf_itime; + dqunlock(dq); + return (0); + } + if (tv.tv_sec > (time_t)dq->dq_itime) { + if (((dq->dq_flags & DQ_INODS) == 0) && + (uid == kauth_cred_getuid(cred))) { + dq->dq_flags |= DQ_INODS; + } + dqunlock(dq); + + return (EDQUOT); + } + } + dqunlock(dq); + + return (0); +} + + /* * Check for a valid change to a users allocation. * Issue an error message if appropriate. @@ -354,12 +414,12 @@ hfs_chkiq(cp, change, cred, flags) int hfs_chkiqchg(cp, change, cred, type) struct cnode *cp; - long change; + int32_t change; kauth_cred_t cred; int type; { register struct dquot *dq = cp->c_dquot[type]; - unsigned long ncurinodes; + u_int32_t ncurinodes; struct vnode *vp = cp->c_vp ? cp->c_vp : cp->c_rsrc_vp; dqlock(dq); @@ -372,7 +432,7 @@ hfs_chkiqchg(cp, change, cred, type) if ((dq->dq_flags & DQ_INODS) == 0 && cp->c_uid == kauth_cred_getuid(cred)) { #if 0 - printf("\nwrite failed, %s cnode limit reached\n", + printf("\nhfs: write failed, %s cnode limit reached\n", quotatypes[type]); #endif dq->dq_flags |= DQ_INODS; @@ -394,18 +454,18 @@ hfs_chkiqchg(cp, change, cred, type) VTOHFS(vp)->hfs_qfiles[type].qf_itime; #if 0 if (cp->c_uid == kauth_cred_getuid(cred)) - printf("\nwarning, %s %s\n", + printf("\nhfs: warning, %s %s\n", quotatypes[type], "cnode quota exceeded"); #endif dqunlock(dq); return (0); } - if (tv.tv_sec > dq->dq_itime) { + if (tv.tv_sec > (time_t)dq->dq_itime) { if ((dq->dq_flags & DQ_INODS) == 0 && cp->c_uid == kauth_cred_getuid(cred)) { #if 0 - printf("\nwrite failed, %s %s\n", + printf("\nhfs: write failed, %s %s\n", quotatypes[type], "cnode quota exceeded for too long"); #endif @@ -607,7 +667,7 @@ hfs_quotaoff(__unused struct proc *p, struct mount *mp, register int type) /* * Search vnodes associated with this mount point, * deleting any references to quota file being closed. - * + * * hfs_quotaoff_callback will be called for each vnode * hung off of this mount point * the vnode will be in an 'unbusy' state (VNODE_WAIT) and @@ -643,13 +703,75 @@ hfs_quotaoff(__unused struct proc *p, struct mount *mp, register int type) return (error); } +/* + * hfs_quotacheck - checks quotas mountwide for a hypothetical situation. It probes + * the quota data structures to see if adding an inode would be allowed or not. If it + * will be allowed, the change is made. Otherwise, it reports an error back out so the + * caller will know not to proceed with inode allocation in the HFS Catalog. + * + * Note that this function ONLY tests for addition of inodes, not subtraction. + */ +int hfs_quotacheck(hfsmp, change, uid, gid, cred) + struct hfsmount *hfsmp; + int change; + uid_t uid; + gid_t gid; + kauth_cred_t cred; +{ + struct dquot *dq = NULL; + struct proc *p; + int error = 0; + int i; + id_t id = uid; + + p = current_proc(); + if (!IS_VALID_CRED(cred)) { + /* This use of proc_ucred() is safe because kernproc credential never changes */ + cred = proc_ucred(kernproc); + } + + if (suser(cred, NULL) || proc_forcequota(p)) { + for (i = 0; i < MAXQUOTAS; i++) { + /* Select if user or group id should be used */ + if (i == USRQUOTA) + id = uid; + else if (i == GRPQUOTA) + id = gid; + + error = dqget(id, &hfsmp->hfs_qfiles[i], i, &dq); + if (error && (error != EINVAL)) + break; + + error = 0; + if (dq == NODQUOT) + continue; + + /* Check quota information */ + error = hfs_isiqchg_allowed(dq, hfsmp, change, cred, i, id); + if (error) { + dqrele(dq); + break; + } + + dqlock(dq); + /* Update quota information */ + dq->dq_curinodes += change; + dqunlock(dq); + dqrele(dq); + } + } + + return error; +} + + /* * Q_GETQUOTA - return current values in a dqblk structure. */ int hfs_getquota(mp, id, type, datap) struct mount *mp; - u_long id; + u_int32_t id; int type; caddr_t datap; { @@ -675,7 +797,7 @@ hfs_getquota(mp, id, type, datap) int hfs_setquota(mp, id, type, datap) struct mount *mp; - u_long id; + u_int32_t id; int type; caddr_t datap; { @@ -737,7 +859,7 @@ hfs_setquota(mp, id, type, datap) int hfs_setuse(mp, id, type, datap) struct mount *mp; - u_long id; + u_int32_t id; int type; caddr_t datap; { diff --git a/bsd/hfs/hfs_quota.h b/bsd/hfs/hfs_quota.h index c36025242..a57dbdff3 100644 --- a/bsd/hfs/hfs_quota.h +++ b/bsd/hfs/hfs_quota.h @@ -86,16 +86,18 @@ typedef struct ucred *kauth_cred_t; __BEGIN_DECLS int hfs_chkdq(struct cnode *, int64_t, kauth_cred_t, int); int hfs_chkdqchg(struct cnode *, int64_t, kauth_cred_t, int); -int hfs_chkiq(struct cnode *, long, kauth_cred_t, int); -int hfs_chkiqchg(struct cnode *, long, kauth_cred_t, int); +int hfs_chkiq(struct cnode *, int32_t, kauth_cred_t, int); +int hfs_chkiqchg(struct cnode *, int32_t, kauth_cred_t, int); int hfs_getinoquota(struct cnode *); -int hfs_getquota(struct mount *, u_long, int, caddr_t); +int hfs_getquota(struct mount *, u_int32_t, int, caddr_t); int hfs_qsync(struct mount *mp); int hfs_quotaoff(struct proc *, struct mount *, int); int hfs_quotaon(struct proc *, struct mount *, int, caddr_t); int hfs_quotastat(struct mount *, int, caddr_t); -int hfs_setquota(struct mount *, u_long, int, caddr_t); -int hfs_setuse(struct mount *, u_long, int, caddr_t); +int hfs_setquota(struct mount *, u_int32_t, int, caddr_t); +int hfs_setuse(struct mount *, u_int32_t, int, caddr_t); +int hfs_isiqchg_allowed(struct dquot *, struct hfsmount *, int32_t, kauth_cred_t, int, uid_t); +int hfs_quotacheck (struct hfsmount *, int , uid_t, gid_t, kauth_cred_t); __END_DECLS #if DIAGNOSTIC diff --git a/bsd/hfs/hfs_readwrite.c b/bsd/hfs/hfs_readwrite.c index e5ab6c8b9..6dc30afad 100644 --- a/bsd/hfs/hfs_readwrite.c +++ b/bsd/hfs/hfs_readwrite.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2008 Apple Inc. All rights reserved. + * Copyright (c) 2000-2009 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -51,6 +51,7 @@ #include #include #include +#include #include @@ -78,14 +79,15 @@ enum { MAXHFSFILESIZE = 0x7FFFFFFF /* this needs to go in the mount structure */ }; -/* from bsd/vfs/vfs_cluster.c */ -extern int is_file_clean(vnode_t vp, off_t filesize); /* from bsd/hfs/hfs_vfsops.c */ -extern int hfs_vfs_vget(struct mount *mp, ino64_t ino, struct vnode **vpp, vfs_context_t context); +extern int hfs_vfs_vget (struct mount *mp, ino64_t ino, struct vnode **vpp, vfs_context_t context); static int hfs_clonelink(struct vnode *, int, kauth_cred_t, struct proc *); static int hfs_clonefile(struct vnode *, int, int, int); static int hfs_clonesysfile(struct vnode *, int, int, int, kauth_cred_t, struct proc *); +static int hfs_minorupdate(struct vnode *vp); +static int do_hfs_truncate(struct vnode *vp, off_t length, int flags, int skip, vfs_context_t context); + int flush_cache_on_write = 0; SYSCTL_INT (_kern, OID_AUTO, flush_cache_on_write, CTLFLAG_RW, &flush_cache_on_write, 0, "always flush the drive cache on writes to uncached files"); @@ -108,7 +110,6 @@ hfs_vnop_read(struct vnop_read_args *ap) off_t offset = uio_offset(uio); int retval = 0; - /* Preflight checks */ if (!vnode_isreg(vp)) { /* can only read regular files */ @@ -121,6 +122,34 @@ hfs_vnop_read(struct vnop_read_args *ap) return (0); /* Nothing left to do */ if (offset < 0) return (EINVAL); /* cant read from a negative offset */ + +#if HFS_COMPRESSION + if (VNODE_IS_RSRC(vp)) { + if (hfs_hides_rsrc(ap->a_context, VTOC(vp), 1)) { /* 1 == don't take the cnode lock */ + return 0; + } + /* otherwise read the resource fork normally */ + } else { + int compressed = hfs_file_is_compressed(VTOC(vp), 1); /* 1 == don't take the cnode lock */ + if (compressed) { + retval = decmpfs_read_compressed(ap, &compressed, VTOCMP(vp)); + if (compressed) { + if (retval == 0) { + /* successful read, update the access time */ + VTOC(vp)->c_touch_acctime = TRUE; + + /* compressed files are not hot file candidates */ + if (VTOHFS(vp)->hfc_stage == HFC_RECORDING) { + VTOF(vp)->ff_bytesread = 0; + } + } + return retval; + } + /* otherwise the file was converted back to a regular file while we were reading it */ + retval = 0; + } + } +#endif /* HFS_COMPRESSION */ cp = VTOC(vp); fp = VTOF(vp); @@ -202,7 +231,7 @@ hfs_vnop_write(struct vnop_write_args *ap) off_t actualBytesAdded; off_t filebytes; off_t offset; - size_t resid; + ssize_t resid; int eflags; int ioflag = ap->a_ioflag; int retval = 0; @@ -211,6 +240,22 @@ hfs_vnop_write(struct vnop_write_args *ap) int partialwrite = 0; int exclusive_lock = 0; +#if HFS_COMPRESSION + if ( hfs_file_is_compressed(VTOC(vp), 1) ) { /* 1 == don't take the cnode lock */ + int state = decmpfs_cnode_get_vnode_state(VTOCMP(vp)); + switch(state) { + case FILE_IS_COMPRESSED: + return EACCES; + case FILE_IS_CONVERTING: + /* if FILE_IS_CONVERTING, we allow writes */ + break; + default: + printf("invalid state %d for compressed file\n", state); + /* fall through */ + } + } +#endif + // LP64todo - fix this! uio_resid may be 64-bit value resid = uio_resid(uio); offset = uio_offset(uio); @@ -264,10 +309,12 @@ hfs_vnop_write(struct vnop_write_args *ap) /* If the truncate lock is shared, and if we either have virtual * blocks or will need to extend the file, upgrade the truncate * to exclusive lock. If upgrade fails, we lose the lock and - * have to get exclusive lock again + * have to get exclusive lock again. Note that we want to + * grab the truncate lock exclusive even if we're not allocating new blocks + * because we could still be growing past the LEOF. */ if ((exclusive_lock == 0) && - ((fp->ff_unallocblocks != 0) || (writelimit > filebytes))) { + ((fp->ff_unallocblocks != 0) || (writelimit > origFileSize))) { exclusive_lock = 1; /* Lock upgrade failed and we lost our shared lock, try again */ if (lck_rw_lock_shared_to_exclusive(&cp->c_truncatelock) == FALSE) { @@ -540,7 +587,6 @@ hfs_vnop_write(struct vnop_write_args *ap) VNOP_IOCTL(hfsmp->hfs_devvp, DKIOCSYNCHRONIZECACHE, NULL, FWRITE, NULL); } } - HFS_KNOTE(vp, NOTE_WRITE); ioerr_exit: /* @@ -565,7 +611,7 @@ hfs_vnop_write(struct vnop_write_args *ap) cnode_locked = 1; } (void)hfs_truncate(vp, origFileSize, ioflag & IO_SYNC, - 0, ap->a_context); + 0, 0, ap->a_context); // LP64todo - fix this! resid needs to by user_ssize_t uio_setoffset(uio, (uio_offset(uio) - (resid - uio_resid(uio)))); uio_setresid(uio, resid); @@ -612,16 +658,26 @@ struct access_t { int *file_ids; /* IN: array of file ids */ gid_t *groups; /* IN: array of groups */ short *access; /* OUT: access info for each file (0 for 'has access') */ +} __attribute__((unavailable)); // this structure is for reference purposes only + +struct user32_access_t { + uid_t uid; /* IN: effective user id */ + short flags; /* IN: access requested (i.e. R_OK) */ + short num_groups; /* IN: number of groups user belongs to */ + int num_files; /* IN: number of files to process */ + user32_addr_t file_ids; /* IN: array of file ids */ + user32_addr_t groups; /* IN: array of groups */ + user32_addr_t access; /* OUT: access info for each file (0 for 'has access') */ }; -struct user_access_t { +struct user64_access_t { uid_t uid; /* IN: effective user id */ short flags; /* IN: access requested (i.e. R_OK) */ short num_groups; /* IN: number of groups user belongs to */ int num_files; /* IN: number of files to process */ - user_addr_t file_ids; /* IN: array of file ids */ - user_addr_t groups; /* IN: array of groups */ - user_addr_t access; /* OUT: access info for each file (0 for 'has access') */ + user64_addr_t file_ids; /* IN: array of file ids */ + user64_addr_t groups; /* IN: array of groups */ + user64_addr_t access; /* OUT: access info for each file (0 for 'has access') */ }; @@ -637,17 +693,28 @@ struct ext_access_t { short *access; /* OUT: access info for each file (0 for 'has access') */ uint32_t num_parents; /* future use */ cnid_t *parents; /* future use */ +} __attribute__((unavailable)); // this structure is for reference purposes only + +struct user32_ext_access_t { + uint32_t flags; /* IN: access requested (i.e. R_OK) */ + uint32_t num_files; /* IN: number of files to process */ + uint32_t map_size; /* IN: size of the bit map */ + user32_addr_t file_ids; /* IN: Array of file ids */ + user32_addr_t bitmap; /* OUT: hash-bitmap of interesting directory ids */ + user32_addr_t access; /* OUT: access info for each file (0 for 'has access') */ + uint32_t num_parents; /* future use */ + user32_addr_t parents; /* future use */ }; -struct ext_user_access_t { +struct user64_ext_access_t { uint32_t flags; /* IN: access requested (i.e. R_OK) */ uint32_t num_files; /* IN: number of files to process */ uint32_t map_size; /* IN: size of the bit map */ - user_addr_t file_ids; /* IN: array of file ids */ - user_addr_t bitmap; /* IN: array of groups */ - user_addr_t access; /* OUT: access info for each file (0 for 'has access') */ + user64_addr_t file_ids; /* IN: array of file ids */ + user64_addr_t bitmap; /* IN: array of groups */ + user64_addr_t access; /* OUT: access info for each file (0 for 'has access') */ uint32_t num_parents;/* future use */ - user_addr_t parents;/* future use */ + user64_addr_t parents;/* future use */ }; @@ -708,7 +775,7 @@ lookup_bucket(struct access_cache *cache, int *indexp, cnid_t parent_id) } if (cache->numcached > NUM_CACHE_ENTRIES) { - /*printf("EGAD! numcached is %d... cut our losses and trim to %d\n", + /*printf("hfs: EGAD! numcached is %d... cut our losses and trim to %d\n", cache->numcached, NUM_CACHE_ENTRIES);*/ cache->numcached = NUM_CACHE_ENTRIES; } @@ -757,11 +824,11 @@ add_node(struct access_cache *cache, int index, cnid_t nodeID, int access) /* if the cache is full, do a replace rather than an insert */ if (cache->numcached >= NUM_CACHE_ENTRIES) { - //printf("cache is full (%d). replace at index %d\n", cache->numcached, index); + //printf("hfs: cache is full (%d). replace at index %d\n", cache->numcached, index); cache->numcached = NUM_CACHE_ENTRIES-1; if (index > cache->numcached) { - // printf("index %d pinned to %d\n", index, cache->numcached); + // printf("hfs: index %d pinned to %d\n", index, cache->numcached); index = cache->numcached; } } @@ -809,7 +876,7 @@ snoop_callback(const struct cat_desc *descp, const struct cat_attr *attrp, void * isn't incore, then go to the catalog. */ static int -do_attr_lookup(struct hfsmount *hfsmp, struct access_cache *cache, dev_t dev, cnid_t cnid, +do_attr_lookup(struct hfsmount *hfsmp, struct access_cache *cache, cnid_t cnid, struct cnode *skip_cp, CatalogKey *keyp, struct cat_attr *cnattrp) { int error = 0; @@ -819,12 +886,13 @@ do_attr_lookup(struct hfsmount *hfsmp, struct access_cache *cache, dev_t dev, cn cnattrp->ca_uid = skip_cp->c_uid; cnattrp->ca_gid = skip_cp->c_gid; cnattrp->ca_mode = skip_cp->c_mode; + cnattrp->ca_recflags = skip_cp->c_attr.ca_recflags; keyp->hfsPlus.parentID = skip_cp->c_parentcnid; } else { struct cinfo c_info; /* otherwise, check the cnode hash incase the file/dir is incore */ - if (hfs_chash_snoop(dev, cnid, snoop_callback, &c_info) == 0) { + if (hfs_chash_snoop(hfsmp, cnid, snoop_callback, &c_info) == 0) { cnattrp->ca_uid = c_info.uid; cnattrp->ca_gid = c_info.gid; cnattrp->ca_mode = c_info.mode; @@ -854,7 +922,7 @@ do_attr_lookup(struct hfsmount *hfsmp, struct access_cache *cache, dev_t dev, cn */ static int do_access_check(struct hfsmount *hfsmp, int *err, struct access_cache *cache, HFSCatalogNodeID nodeID, - struct cnode *skip_cp, struct proc *theProcPtr, kauth_cred_t myp_ucred, dev_t dev, + struct cnode *skip_cp, struct proc *theProcPtr, kauth_cred_t myp_ucred, struct vfs_context *my_context, char *bitmap, uint32_t map_size, @@ -919,7 +987,7 @@ do_access_check(struct hfsmount *hfsmp, int *err, struct access_cache *cache, HF /* do the lookup (checks the cnode hash, then the catalog) */ - myErr = do_attr_lookup(hfsmp, cache, dev, thisNodeID, skip_cp, &catkey, &cnattr); + myErr = do_attr_lookup(hfsmp, cache, thisNodeID, skip_cp, &catkey, &cnattr); if (myErr) { goto ExitThisRoutine; /* no access */ } @@ -1019,15 +1087,13 @@ do_bulk_access_check(struct hfsmount *hfsmp, struct vnode *vp, */ Boolean check_leaf = true; - struct ext_user_access_t *user_access_structp; - struct ext_user_access_t tmp_user_access; + struct user64_ext_access_t *user_access_structp; + struct user64_ext_access_t tmp_user_access; struct access_cache cache; - int error = 0; + int error = 0, prev_parent_check_ok=1; unsigned int i; - dev_t dev = VTOC(vp)->c_dev; - short flags; unsigned int num_files = 0; int map_size = 0; @@ -1064,15 +1130,15 @@ do_bulk_access_check(struct hfsmount *hfsmp, struct vnode *vp, } if (is64bit) { - if (arg_size != sizeof(struct ext_user_access_t)) { + if (arg_size != sizeof(struct user64_ext_access_t)) { error = EINVAL; goto err_exit_bulk_access; } - user_access_structp = (struct ext_user_access_t *)ap->a_data; + user_access_structp = (struct user64_ext_access_t *)ap->a_data; - } else if (arg_size == sizeof(struct access_t)) { - struct access_t *accessp = (struct access_t *)ap->a_data; + } else if (arg_size == sizeof(struct user32_access_t)) { + struct user32_access_t *accessp = (struct user32_access_t *)ap->a_data; // convert an old style bulk-access struct to the new style tmp_user_access.flags = accessp->flags; @@ -1084,8 +1150,8 @@ do_bulk_access_check(struct hfsmount *hfsmp, struct vnode *vp, tmp_user_access.num_parents = 0; user_access_structp = &tmp_user_access; - } else if (arg_size == sizeof(struct ext_access_t)) { - struct ext_access_t *accessp = (struct ext_access_t *)ap->a_data; + } else if (arg_size == sizeof(struct user32_ext_access_t)) { + struct user32_ext_access_t *accessp = (struct user32_ext_access_t *)ap->a_data; // up-cast from a 32-bit version of the struct tmp_user_access.flags = accessp->flags; @@ -1198,7 +1264,7 @@ do_bulk_access_check(struct hfsmount *hfsmp, struct vnode *vp, if (check_leaf) { /* do the lookup (checks the cnode hash, then the catalog) */ - error = do_attr_lookup(hfsmp, &cache, dev, cnid, skip_cp, &catkey, &cnattr); + error = do_attr_lookup(hfsmp, &cache, cnid, skip_cp, &catkey, &cnattr); if (error) { access[i] = (short) error; continue; @@ -1207,6 +1273,10 @@ do_bulk_access_check(struct hfsmount *hfsmp, struct vnode *vp, if (parents) { // Check if the leaf matches one of the parent scopes leaf_index = cache_binSearch(parents, num_parents-1, cnid, NULL); + if (leaf_index >= 0 && parents[leaf_index] == cnid) + prev_parent_check_ok = 0; + else if (leaf_index >= 0) + prev_parent_check_ok = 1; } // if the thing has acl's, do the full permission check @@ -1250,14 +1320,14 @@ do_bulk_access_check(struct hfsmount *hfsmp, struct vnode *vp, } /* if the last guy had the same parent and had access, we're done */ - if (i > 0 && catkey.hfsPlus.parentID == prevParent_cnid && access[i-1] == 0) { + if (i > 0 && catkey.hfsPlus.parentID == prevParent_cnid && access[i-1] == 0 && prev_parent_check_ok) { cache.cachehits++; access[i] = 0; continue; } myaccess = do_access_check(hfsmp, &error, &cache, catkey.hfsPlus.parentID, - skip_cp, p, cred, dev, context,bitmap, map_size, parents, num_parents); + skip_cp, p, cred, context,bitmap, map_size, parents, num_parents); if (myaccess || (error == ESRCH && leaf_index != -1)) { access[i] = 0; // have access.. no errors to report @@ -1283,7 +1353,7 @@ do_bulk_access_check(struct hfsmount *hfsmp, struct vnode *vp, err_exit_bulk_access: - //printf("on exit (err %d), numfiles/numcached/cachehits/lookups is %d/%d/%d/%d\n", error, num_files, cache.numcached, cache.cachehits, cache.lookups); + //printf("hfs: on exit (err %d), numfiles/numcached/cachehits/lookups is %d/%d/%d/%d\n", error, num_files, cache.numcached, cache.cachehits, cache.lookups); if (file_ids) kfree(file_ids, sizeof(int) * num_files); @@ -1335,6 +1405,30 @@ hfs_vnop_ioctl( struct vnop_ioctl_args /* { proc_t p = vfs_context_proc(context); struct vfsstatfs *vfsp; boolean_t is64bit; + off_t jnl_start, jnl_size; + struct hfs_journal_info *jip; +#if HFS_COMPRESSION + int compressed = 0; + off_t uncompressed_size = -1; + int decmpfs_error = 0; + + if (ap->a_command == F_RDADVISE) { + /* we need to inspect the decmpfs state of the file as early as possible */ + compressed = hfs_file_is_compressed(VTOC(vp), 0); + if (compressed) { + if (VNODE_IS_RSRC(vp)) { + /* if this is the resource fork, treat it as if it were empty */ + uncompressed_size = 0; + } else { + decmpfs_error = hfs_uncompressed_size_of_compressed_file(NULL, vp, 0, &uncompressed_size, 0); + if (decmpfs_error != 0) { + /* failed to get the uncompressed size, we'll check for this later */ + uncompressed_size = -1; + } + } + } + } +#endif /* HFS_COMPRESSION */ is64bit = proc_is64bit(p); @@ -1361,10 +1455,11 @@ hfs_vnop_ioctl( struct vnop_ioctl_args /* { bufptr = (char *)ap->a_data; cnid = strtoul(bufptr, NULL, 10); - /* We need to call hfs_vfs_vget to leverage the code that will fix the - * origin list for us if needed, as opposed to calling hfs_vget, since - * we will need it for the subsequent build_path call. + /* We need to call hfs_vfs_vget to leverage the code that will + * fix the origin list for us if needed, as opposed to calling + * hfs_vget, since we will need the parent for build_path call. */ + if ((error = hfs_vfs_vget(HFSTOVFS(hfsmp), cnid, &file_vp, context))) { return (error); } @@ -1417,6 +1512,11 @@ hfs_vnop_ioctl( struct vnop_ioctl_args /* { if (!vnode_isvroot(vp)) { return (EINVAL); } + /* file system must not be mounted read-only */ + if (hfsmp->hfs_flags & HFS_READ_ONLY) { + return (EROFS); + } + return hfs_resize_progress(hfsmp, (u_int32_t *)ap->a_data); } @@ -1432,6 +1532,11 @@ hfs_vnop_ioctl( struct vnop_ioctl_args /* { if (!vnode_isvroot(vp)) { return (EINVAL); } + + /* filesystem must not be mounted read only */ + if (hfsmp->hfs_flags & HFS_READ_ONLY) { + return (EROFS); + } newsize = *(u_int64_t *)ap->a_data; cursize = (u_int64_t)hfsmp->totalBlocks * (u_int64_t)hfsmp->blockSize; @@ -1472,7 +1577,9 @@ hfs_vnop_ioctl( struct vnop_ioctl_args /* { * after metadata zone and set flag in mount structure to indicate * that nextAllocation should not be updated again. */ - HFS_UPDATE_NEXT_ALLOCATION(hfsmp, hfsmp->hfs_metazone_end + 1); + if (hfsmp->hfs_metazone_end != 0) { + HFS_UPDATE_NEXT_ALLOCATION(hfsmp, hfsmp->hfs_metazone_end + 1); + } hfsmp->hfs_flags |= HFS_SKIP_UPDATE_NEXT_ALLOCATION; } else { hfsmp->hfs_flags &= ~HFS_SKIP_UPDATE_NEXT_ALLOCATION; @@ -1491,6 +1598,9 @@ hfs_vnop_ioctl( struct vnop_ioctl_args /* { struct hfs_backingstoreinfo *bsdata; int error = 0; + if (hfsmp->hfs_flags & HFS_READ_ONLY) { + return (EROFS); + } if (hfsmp->hfs_flags & HFS_HAS_SPARSE_DEVICE) { return (EALREADY); } @@ -1537,6 +1647,25 @@ hfs_vnop_ioctl( struct vnop_ioctl_args /* { vfs_markdependency(hfsmp->hfs_mp); + /* + * If the sparse image is on a sparse image file (as opposed to a sparse + * bundle), then we may need to limit the free space to the maximum size + * of a file on that volume. So we query (using pathconf), and if we get + * a meaningful result, we cache the number of blocks for later use in + * hfs_freeblks(). + */ + hfsmp->hfs_backingfs_maxblocks = 0; + if (vnode_vtype(di_vp) == VREG) { + int terr; + int hostbits; + terr = vn_pathconf(di_vp, _PC_FILESIZEBITS, &hostbits, context); + if (terr == 0 && hostbits != 0 && hostbits < 64) { + u_int64_t hostfilesizemax = ((u_int64_t)1) << hostbits; + + hfsmp->hfs_backingfs_maxblocks = hostfilesizemax / hfsmp->blockSize; + } + } + (void)vnode_put(di_vp); file_drop(bsdata->backingfd); return (0); @@ -1549,6 +1678,10 @@ hfs_vnop_ioctl( struct vnop_ioctl_args /* { kauth_cred_getuid(cred) != vfsp->f_owner) { return (EACCES); /* must be owner of file system */ } + if (hfsmp->hfs_flags & HFS_READ_ONLY) { + return (EROFS); + } + if ((hfsmp->hfs_flags & HFS_HAS_SPARSE_DEVICE) && hfsmp->hfs_backingfs_rootvp) { @@ -1565,15 +1698,18 @@ hfs_vnop_ioctl( struct vnop_ioctl_args /* { case F_FREEZE_FS: { struct mount *mp; - if (!is_suser()) - return (EACCES); - mp = vnode_mount(vp); hfsmp = VFSTOHFS(mp); if (!(hfsmp->jnl)) return (ENOTSUP); + vfsp = vfs_statfs(mp); + + if (kauth_cred_getuid(cred) != vfsp->f_owner && + !kauth_cred_issuser(cred)) + return (EACCES); + lck_rw_lock_exclusive(&hfsmp->hfs_insync); // flush things before we get started to try and prevent @@ -1582,6 +1718,9 @@ hfs_vnop_ioctl( struct vnop_ioctl_args /* { // deadlock against ourselves. vnode_iterate(mp, 0, hfs_freezewrite_callback, NULL); hfs_global_exclusive_lock_acquire(hfsmp); + + // DO NOT call hfs_journal_flush() because that takes a + // shared lock on the global exclusive lock! journal_flush(hfsmp->jnl); // don't need to iterate on all vnodes, we just need to @@ -1602,7 +1741,9 @@ hfs_vnop_ioctl( struct vnop_ioctl_args /* { } case F_THAW_FS: { - if (!is_suser()) + vfsp = vfs_statfs(vnode_mount(vp)); + if (kauth_cred_getuid(cred) != vfsp->f_owner && + !kauth_cred_issuser(cred)) return (EACCES); // if we're not the one who froze the fs then we @@ -1629,9 +1770,9 @@ hfs_vnop_ioctl( struct vnop_ioctl_args /* { } if (is64bit) { - size = sizeof(struct user_access_t); + size = sizeof(struct user64_access_t); } else { - size = sizeof(struct access_t); + size = sizeof(struct user32_access_t); } return do_bulk_access_check(hfsmp, vp, ap, size, context); @@ -1645,9 +1786,9 @@ hfs_vnop_ioctl( struct vnop_ioctl_args /* { } if (is64bit) { - size = sizeof(struct ext_user_access_t); + size = sizeof(struct user64_ext_access_t); } else { - size = sizeof(struct ext_access_t); + size = sizeof(struct user32_ext_access_t); } return do_bulk_access_check(hfsmp, vp, ap, size, context); @@ -1663,6 +1804,9 @@ hfs_vnop_ioctl( struct vnop_ioctl_args /* { vfsp = vfs_statfs(HFSTOVFS(hfsmp)); state = *(int *)ap->a_data; + if (hfsmp->hfs_flags & HFS_READ_ONLY) { + return (EROFS); + } // super-user can enable or disable acl's on a volume. // the volume owner can only enable acl's if (!is_suser() && (state == 0 || kauth_cred_getuid(cred) != vfsp->f_owner)) { @@ -1682,6 +1826,10 @@ hfs_vnop_ioctl( struct vnop_ioctl_args /* { } state = *(int *)ap->a_data; + + if (hfsmp->hfs_flags & HFS_READ_ONLY) { + return (EROFS); + } /* Super-user can enable or disable extent-based extended * attribute support on a volume @@ -1697,7 +1845,10 @@ hfs_vnop_ioctl( struct vnop_ioctl_args /* { case F_FULLFSYNC: { int error; - + + if (hfsmp->hfs_flags & HFS_READ_ONLY) { + return (EROFS); + } error = hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK); if (error == 0) { error = hfs_fsync(vp, MNT_WAIT, TRUE, p); @@ -1742,9 +1893,20 @@ hfs_vnop_ioctl( struct vnop_ioctl_args /* { /* Protect against a size change. */ hfs_lock_truncate(VTOC(vp), TRUE); +#if HFS_COMPRESSION + if (compressed && (uncompressed_size == -1)) { + /* fetching the uncompressed size failed above, so return the error */ + error = decmpfs_error; + } else if ((compressed && (ra->ra_offset >= uncompressed_size)) || + (!compressed && (ra->ra_offset >= fp->ff_size))) { + error = EFBIG; + } +#else /* HFS_COMPRESSION */ if (ra->ra_offset >= fp->ff_size) { error = EFBIG; - } else { + } +#endif /* HFS_COMPRESSION */ + else { error = advisory_read(vp, fp->ff_size, ra->ra_offset, ra->ra_count); } @@ -1761,8 +1923,8 @@ hfs_vnop_ioctl( struct vnop_ioctl_args /* { int error; uio_t auio; daddr64_t blockNumber; - u_long blockOffset; - u_long xfersize; + u_int32_t blockOffset; + u_int32_t xfersize; struct buf *bp; user_fbootstraptransfer_t user_bootstrap; @@ -1772,11 +1934,14 @@ hfs_vnop_ioctl( struct vnop_ioctl_args /* { * to a user_fbootstraptransfer_t else we get a pointer to a * fbootstraptransfer_t which we munge into a user_fbootstraptransfer_t */ + if (hfsmp->hfs_flags & HFS_READ_ONLY) { + return (EROFS); + } if (is64bit) { user_bootstrapp = (user_fbootstraptransfer_t *)ap->a_data; } else { - fbootstraptransfer_t *bootstrapp = (fbootstraptransfer_t *)ap->a_data; + user32_fbootstraptransfer_t *bootstrapp = (user32_fbootstraptransfer_t *)ap->a_data; user_bootstrapp = &user_bootstrap; user_bootstrap.fbt_offset = bootstrapp->fbt_offset; user_bootstrap.fbt_length = bootstrapp->fbt_length; @@ -1830,32 +1995,57 @@ hfs_vnop_ioctl( struct vnop_ioctl_args /* { *(user_time_t *)(ap->a_data) = (user_time_t) (to_bsd_time(VTOVCB(vp)->localCreateDate)); } else { - *(time_t *)(ap->a_data) = to_bsd_time(VTOVCB(vp)->localCreateDate); + *(user32_time_t *)(ap->a_data) = (user32_time_t) (to_bsd_time(VTOVCB(vp)->localCreateDate)); } return 0; } - case HFS_GET_MOUNT_TIME: - if (is64bit) { - *(user_time_t *)(ap->a_data) = (user_time_t) hfsmp->hfs_mount_time; - } else { - *(time_t *)(ap->a_data) = (time_t) hfsmp->hfs_mount_time; + case SPOTLIGHT_FSCTL_GET_MOUNT_TIME: + *(uint32_t *)ap->a_data = hfsmp->hfs_mount_time; + break; + + case SPOTLIGHT_FSCTL_GET_LAST_MTIME: + *(uint32_t *)ap->a_data = hfsmp->hfs_last_mounted_mtime; + break; + + case HFS_FSCTL_SET_VERY_LOW_DISK: + if (*(uint32_t *)ap->a_data >= hfsmp->hfs_freespace_notify_warninglimit) { + return EINVAL; } - return 0; - case HFS_GET_LAST_MTIME: - if (is64bit) { - *(user_time_t *)(ap->a_data) = (user_time_t) hfsmp->hfs_last_mounted_mtime; - } else { - *(time_t *)(ap->a_data) = (time_t) hfsmp->hfs_last_mounted_mtime; + hfsmp->hfs_freespace_notify_dangerlimit = *(uint32_t *)ap->a_data; + break; + + case HFS_FSCTL_SET_LOW_DISK: + if ( *(uint32_t *)ap->a_data >= hfsmp->hfs_freespace_notify_desiredlevel + || *(uint32_t *)ap->a_data <= hfsmp->hfs_freespace_notify_dangerlimit) { + + return EINVAL; } - return 0; + + hfsmp->hfs_freespace_notify_warninglimit = *(uint32_t *)ap->a_data; + break; + + case HFS_FSCTL_SET_DESIRED_DISK: + if (*(uint32_t *)ap->a_data <= hfsmp->hfs_freespace_notify_warninglimit) { + return EINVAL; + } + + hfsmp->hfs_freespace_notify_desiredlevel = *(uint32_t *)ap->a_data; + break; + + case HFS_VOLUME_STATUS: + *(uint32_t *)ap->a_data = hfsmp->hfs_notification_conditions; + break; case HFS_SET_BOOT_INFO: if (!vnode_isvroot(vp)) return(EINVAL); if (!kauth_cred_issuser(cred) && (kauth_cred_getuid(cred) != vfs_statfs(HFSTOVFS(hfsmp))->f_owner)) return(EACCES); /* must be superuser or owner of filesystem */ + if (hfsmp->hfs_flags & HFS_READ_ONLY) { + return (EROFS); + } HFS_MOUNT_LOCK(hfsmp, TRUE); bcopy(ap->a_data, &hfsmp->vcbFndrInfo, sizeof(hfsmp->vcbFndrInfo)); HFS_MOUNT_UNLOCK(hfsmp, TRUE); @@ -1878,22 +2068,52 @@ hfs_vnop_ioctl( struct vnop_ioctl_args /* { if (!is_suser()) { return EACCES; } - + /* Allowed only on the root vnode of the boot volume */ if (!(vfs_flags(HFSTOVFS(hfsmp)) & MNT_ROOTFS) || !vnode_isvroot(vp)) { return EINVAL; } - + if (hfsmp->hfs_flags & HFS_READ_ONLY) { + return (EROFS); + } printf ("hfs_vnop_ioctl: Marking the boot volume corrupt.\n"); hfs_mark_volume_inconsistent(hfsmp); break; + case HFS_FSCTL_GET_JOURNAL_INFO: + jip = (struct hfs_journal_info*)ap->a_data; + + if (vp == NULLVP) + return EINVAL; + + if (hfsmp->jnl == NULL) { + jnl_start = 0; + jnl_size = 0; + } else { + jnl_start = (off_t)(hfsmp->jnl_start * HFSTOVCB(hfsmp)->blockSize) + (off_t)HFSTOVCB(hfsmp)->hfsPlusIOPosOffset; + jnl_size = (off_t)hfsmp->jnl_size; + } + + jip->jstart = jnl_start; + jip->jsize = jnl_size; + break; + + case HFS_SET_ALWAYS_ZEROFILL: { + struct cnode *cp = VTOC(vp); + + if (*(int *)ap->a_data) { + cp->c_flag |= C_ALWAYS_ZEROFILL; + } else { + cp->c_flag &= ~C_ALWAYS_ZEROFILL; + } + break; + } + default: return (ENOTTY); } - /* Should never get here */ return 0; } @@ -2060,6 +2280,26 @@ hfs_vnop_blockmap(struct vnop_blockmap_args *ap) int started_tr = 0; int tooklock = 0; +#if HFS_COMPRESSION + if (VNODE_IS_RSRC(vp)) { + /* allow blockmaps to the resource fork */ + } else { + if ( hfs_file_is_compressed(VTOC(vp), 1) ) { /* 1 == don't take the cnode lock */ + int state = decmpfs_cnode_get_vnode_state(VTOCMP(vp)); + switch(state) { + case FILE_IS_COMPRESSED: + return ENOTSUP; + case FILE_IS_CONVERTING: + /* if FILE_IS_CONVERTING, we allow blockmap */ + break; + default: + printf("invalid state %d for compressed file\n", state); + /* fall through */ + } + } + } +#endif /* HFS_COMPRESSION */ + /* Do not allow blockmap operation on a directory */ if (vnode_isdir(vp)) { return (ENOTSUP); @@ -2241,7 +2481,7 @@ hfs_vnop_blockmap(struct vnop_blockmap_args *ap) * end of this range and the file's EOF): */ if (((off_t)fp->ff_size > (invalid_range->rl_end + 1)) && - (invalid_range->rl_end + 1 - ap->a_foffset < bytesContAvail)) { + ((size_t)(invalid_range->rl_end + 1 - ap->a_foffset) < bytesContAvail)) { bytesContAvail = invalid_range->rl_end + 1 - ap->a_foffset; } break; @@ -2253,7 +2493,7 @@ hfs_vnop_blockmap(struct vnop_blockmap_args *ap) /* There's actually no valid information to be had starting here: */ *ap->a_bpn = (daddr64_t)-1; if (((off_t)fp->ff_size > (invalid_range->rl_end + 1)) && - (invalid_range->rl_end + 1 - ap->a_foffset < bytesContAvail)) { + ((size_t)(invalid_range->rl_end + 1 - ap->a_foffset) < bytesContAvail)) { bytesContAvail = invalid_range->rl_end + 1 - ap->a_foffset; } } else { @@ -2299,9 +2539,19 @@ hfs_vnop_strategy(struct vnop_strategy_args *ap) return (buf_strategy(VTOHFS(vp)->hfs_devvp, ap)); } +static int +hfs_minorupdate(struct vnode *vp) { + struct cnode *cp = VTOC(vp); + cp->c_flag &= ~C_MODIFIED; + cp->c_touch_acctime = 0; + cp->c_touch_chgtime = 0; + cp->c_touch_modtime = 0; + + return 0; +} static int -do_hfs_truncate(struct vnode *vp, off_t length, int flags, vfs_context_t context) +do_hfs_truncate(struct vnode *vp, off_t length, int flags, int skipupdate, vfs_context_t context) { register struct cnode *cp = VTOC(vp); struct filefork *fp = VTOF(vp); @@ -2311,7 +2561,7 @@ do_hfs_truncate(struct vnode *vp, off_t length, int flags, vfs_context_t context off_t bytesToAdd; off_t actualBytesAdded; off_t filebytes; - u_long fileblocks; + u_int32_t fileblocks; int blksize; struct hfsmount *hfsmp; int lockflags; @@ -2369,7 +2619,7 @@ do_hfs_truncate(struct vnode *vp, off_t length, int flags, vfs_context_t context */ if (length > filebytes) { int eflags; - u_long blockHint = 0; + u_int32_t blockHint = 0; /* All or nothing and don't round up to clumpsize. */ eflags = kEFAllMask | kEFNoClumpMask; @@ -2417,8 +2667,13 @@ do_hfs_truncate(struct vnode *vp, off_t length, int flags, vfs_context_t context hfs_systemfile_unlock(hfsmp, lockflags); if (hfsmp->jnl) { - (void) hfs_update(vp, TRUE); - (void) hfs_volupdate(hfsmp, VOL_UPDATE, 0); + if (skipupdate) { + (void) hfs_minorupdate(vp); + } + else { + (void) hfs_update(vp, TRUE); + (void) hfs_volupdate(hfsmp, VOL_UPDATE, 0); + } } hfs_end_transaction(hfsmp); @@ -2549,10 +2804,14 @@ do_hfs_truncate(struct vnode *vp, off_t length, int flags, vfs_context_t context if (retval == 0) { fp->ff_size = length; } - (void) hfs_update(vp, TRUE); - (void) hfs_volupdate(hfsmp, VOL_UPDATE, 0); + if (skipupdate) { + (void) hfs_minorupdate(vp); + } + else { + (void) hfs_update(vp, TRUE); + (void) hfs_volupdate(hfsmp, VOL_UPDATE, 0); + } } - hfs_end_transaction(hfsmp); filebytes = (off_t)fp->ff_blocks * (off_t)blksize; @@ -2568,9 +2827,20 @@ do_hfs_truncate(struct vnode *vp, off_t length, int flags, vfs_context_t context cp->c_touch_modtime = TRUE; fp->ff_size = length; } - cp->c_touch_chgtime = TRUE; /* status changed */ - cp->c_touch_modtime = TRUE; /* file data was modified */ - retval = hfs_update(vp, MNT_WAIT); + if (cp->c_mode & (S_ISUID | S_ISGID)) { + if (!vfs_context_issuser(context)) { + cp->c_mode &= ~(S_ISUID | S_ISGID); + skipupdate = 0; + } + } + if (skipupdate) { + retval = hfs_minorupdate(vp); + } + else { + cp->c_touch_chgtime = TRUE; /* status changed */ + cp->c_touch_modtime = TRUE; /* file data was modified */ + retval = hfs_update(vp, MNT_WAIT); + } if (retval) { KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 7)) | DBG_FUNC_NONE, -1, -1, -1, retval, 0); @@ -2593,11 +2863,11 @@ do_hfs_truncate(struct vnode *vp, off_t length, int flags, vfs_context_t context __private_extern__ int hfs_truncate(struct vnode *vp, off_t length, int flags, int skipsetsize, - vfs_context_t context) + int skipupdate, vfs_context_t context) { struct filefork *fp = VTOF(vp); off_t filebytes; - u_long fileblocks; + u_int32_t fileblocks; int blksize, error = 0; struct cnode *cp = VTOC(vp); @@ -2622,7 +2892,15 @@ hfs_truncate(struct vnode *vp, off_t length, int flags, int skipsetsize, // If skipsetsize is set, then the caller is responsible // for the ubc_setsize. // - if (!skipsetsize) + // Even if skipsetsize is set, if the length is zero we + // want to call ubc_setsize() because as of SnowLeopard + // it will no longer cause any page-ins and it will drop + // any dirty pages so that we don't do any i/o that we + // don't have to. This also prevents a race where i/o + // for truncated blocks may overwrite later data if the + // blocks get reallocated to a different file. + // + if (!skipsetsize || length == 0) ubc_setsize(vp, length); // have to loop truncating or growing files that are @@ -2637,7 +2915,7 @@ hfs_truncate(struct vnode *vp, off_t length, int flags, int skipsetsize, filebytes = length; } cp->c_flag |= C_FORCEUPDATE; - error = do_hfs_truncate(vp, filebytes, flags, context); + error = do_hfs_truncate(vp, filebytes, flags, skipupdate, context); if (error) break; } @@ -2649,13 +2927,13 @@ hfs_truncate(struct vnode *vp, off_t length, int flags, int skipsetsize, filebytes = length; } cp->c_flag |= C_FORCEUPDATE; - error = do_hfs_truncate(vp, filebytes, flags, context); + error = do_hfs_truncate(vp, filebytes, flags, skipupdate, context); if (error) break; } } else /* Same logical size */ { - error = do_hfs_truncate(vp, length, flags, context); + error = do_hfs_truncate(vp, length, flags, skipupdate, context); } /* Files that are changing size are not hot file candidates. */ if (VTOHFS(vp)->hfc_stage == HFC_RECORDING) { @@ -2689,7 +2967,7 @@ hfs_vnop_allocate(struct vnop_allocate_args /* { off_t moreBytesRequested; off_t actualBytesAdded; off_t filebytes; - u_long fileblocks; + u_int32_t fileblocks; int retval, retval2; u_int32_t blockHint; u_int32_t extendFlags; /* For call to ExtendFileC */ @@ -2733,6 +3011,8 @@ hfs_vnop_allocate(struct vnop_allocate_args /* { extendFlags |= kEFAllMask; if (cred && suser(cred, NULL) != 0) extendFlags |= kEFReserveMask; + if (hfs_virtualmetafile(cp)) + extendFlags |= kEFMetadataMask; retval = E_NONE; blockHint = 0; @@ -2773,7 +3053,6 @@ hfs_vnop_allocate(struct vnop_allocate_args /* { * Allocate Journal and Quota files in metadata zone. */ if (hfs_virtualmetafile(cp)) { - extendFlags |= kEFMetadataMask; blockHint = hfsmp->hfs_metazone_start; } else if ((blockHint >= hfsmp->hfs_metazone_start) && (blockHint <= hfsmp->hfs_metazone_end)) { @@ -2805,6 +3084,12 @@ hfs_vnop_allocate(struct vnop_allocate_args /* { bytesRequested = moreBytesRequested; } + if (extendFlags & kEFContigMask) { + // if we're on a sparse device, this will force it to do a + // full scan to find the space needed. + hfsmp->hfs_flags &= ~HFS_DID_CONTIG_SCAN; + } + retval = MacToVFSError(ExtendFileC(vcb, (FCB*)fp, bytesRequested, @@ -2860,7 +3145,7 @@ hfs_vnop_allocate(struct vnop_allocate_args /* { */ } - retval = hfs_truncate(vp, length, 0, 0, ap->a_context); + retval = hfs_truncate(vp, length, 0, 0, 0, ap->a_context); filebytes = (off_t)fp->ff_blocks * (off_t)vcb->blockSize; /* @@ -2916,6 +3201,30 @@ hfs_vnop_pagein(struct vnop_pagein_args *ap) vnode_t vp = ap->a_vp; int error; +#if HFS_COMPRESSION + if (VNODE_IS_RSRC(vp)) { + /* allow pageins of the resource fork */ + } else { + int compressed = hfs_file_is_compressed(VTOC(vp), 1); /* 1 == don't take the cnode lock */ + if (compressed) { + error = decmpfs_pagein_compressed(ap, &compressed, VTOCMP(vp)); + if (compressed) { + if (error == 0) { + /* successful page-in, update the access time */ + VTOC(vp)->c_touch_acctime = TRUE; + + /* compressed files are not hot file candidates */ + if (VTOHFS(vp)->hfc_stage == HFC_RECORDING) { + VTOF(vp)->ff_bytesread = 0; + } + } + return error; + } + /* otherwise the file was converted back to a regular file while we were reading it */ + } + } +#endif + error = cluster_pagein(vp, ap->a_pl, ap->a_pl_offset, ap->a_f_offset, ap->a_size, (off_t)VTOF(vp)->ff_size, ap->a_flags); /* @@ -2980,8 +3289,13 @@ hfs_vnop_pageout(struct vnop_pageout_args *ap) vnode_t vp = ap->a_vp; struct cnode *cp; struct filefork *fp; - int retval; + int retval = 0; off_t filesize; + upl_t upl; + upl_page_info_t* pl; + vm_offset_t a_pl_offset; + int a_flags; + int is_pageoutv2 = 0; cp = VTOC(vp); fp = VTOF(vp); @@ -2995,54 +3309,248 @@ hfs_vnop_pageout(struct vnop_pageout_args *ap) filesize = fp->ff_size; if (fp->ff_new_size > filesize) filesize = fp->ff_new_size; - - if (!vnode_isswap(vp)) { - off_t end_of_range; - int tooklock = 0; - - if (cp->c_lockowner != current_thread()) { - if ( (retval = hfs_lock(cp, HFS_EXCLUSIVE_LOCK))) { - if (!(ap->a_flags & UPL_NOCOMMIT)) { - ubc_upl_abort_range(ap->a_pl, - ap->a_pl_offset, - ap->a_size, - UPL_ABORT_FREE_ON_EMPTY); + + a_flags = ap->a_flags; + a_pl_offset = ap->a_pl_offset; + + /* + * we can tell if we're getting the new or old behavior from the UPL + */ + if ((upl = ap->a_pl) == NULL) { + int request_flags; + + is_pageoutv2 = 1; + /* + * we're in control of any UPL we commit + * make sure someone hasn't accidentally passed in UPL_NOCOMMIT + */ + a_flags &= ~UPL_NOCOMMIT; + a_pl_offset = 0; + + /* + * take truncate lock (shared) to guard against + * zero-fill thru fsync interfering, but only for v2 + */ + hfs_lock_truncate(cp, 0); + + if (a_flags & UPL_MSYNC) { + request_flags = UPL_UBC_MSYNC | UPL_RET_ONLY_DIRTY; + } + else { + request_flags = UPL_UBC_PAGEOUT | UPL_RET_ONLY_DIRTY; + } + ubc_create_upl(vp, ap->a_f_offset, ap->a_size, &upl, &pl, request_flags); + + if (upl == (upl_t) NULL) { + retval = EINVAL; + goto pageout_done; + } + } + /* + * from this point forward upl points at the UPL we're working with + * it was either passed in or we succesfully created it + */ + + /* + * Now that HFS is opting into VFC_VFSVNOP_PAGEOUTV2, we may need to operate on our own + * UPL instead of relying on the UPL passed into us. We go ahead and do that here, + * scanning for dirty ranges. We'll issue our own N cluster_pageout calls, for + * N dirty ranges in the UPL. Note that this is almost a direct copy of the + * logic in vnode_pageout except that we need to do it after grabbing the truncate + * lock in HFS so that we don't lock invert ourselves. + * + * Note that we can still get into this function on behalf of the default pager with + * non-V2 behavior (swapfiles). However in that case, we did not grab locks above + * since fsync and other writing threads will grab the locks, then mark the + * relevant pages as busy. But the pageout codepath marks the pages as busy, + * and THEN would attempt to grab the truncate lock, which would result in deadlock. So + * we do not try to grab anything for the pre-V2 case, which should only be accessed + * by the paging/VM system. + */ + + if (is_pageoutv2) { + off_t f_offset; + int offset; + int isize; + int pg_index; + int error; + int error_ret = 0; + + isize = ap->a_size; + f_offset = ap->a_f_offset; + + /* + * Scan from the back to find the last page in the UPL, so that we + * aren't looking at a UPL that may have already been freed by the + * preceding aborts/completions. + */ + for (pg_index = ((isize) / PAGE_SIZE); pg_index > 0;) { + if (upl_page_present(pl, --pg_index)) + break; + if (pg_index == 0) { + ubc_upl_abort_range(upl, 0, isize, UPL_ABORT_FREE_ON_EMPTY); + goto pageout_done; } - return (retval); - } - tooklock = 1; } + + /* + * initialize the offset variables before we touch the UPL. + * a_f_offset is the position into the file, in bytes + * offset is the position into the UPL, in bytes + * pg_index is the pg# of the UPL we're operating on. + * isize is the offset into the UPL of the last non-clean page. + */ + isize = ((pg_index + 1) * PAGE_SIZE); + + offset = 0; + pg_index = 0; + + while (isize) { + int xsize; + int num_of_pages; + + if ( !upl_page_present(pl, pg_index)) { + /* + * we asked for RET_ONLY_DIRTY, so it's possible + * to get back empty slots in the UPL. + * just skip over them + */ + f_offset += PAGE_SIZE; + offset += PAGE_SIZE; + isize -= PAGE_SIZE; + pg_index++; + + continue; + } + if ( !upl_dirty_page(pl, pg_index)) { + panic ("hfs_vnop_pageout: unforeseen clean page @ index %d for UPL %p\n", pg_index, upl); + } + + /* + * We know that we have at least one dirty page. + * Now checking to see how many in a row we have + */ + num_of_pages = 1; + xsize = isize - PAGE_SIZE; + + while (xsize) { + if ( !upl_dirty_page(pl, pg_index + num_of_pages)) + break; + num_of_pages++; + xsize -= PAGE_SIZE; + } + xsize = num_of_pages * PAGE_SIZE; + + if (!vnode_isswap(vp)) { + off_t end_of_range; + int tooklock; + + tooklock = 0; + + if (cp->c_lockowner != current_thread()) { + if ((retval = hfs_lock(cp, HFS_EXCLUSIVE_LOCK))) { + /* + * we're in the v2 path, so we are the + * owner of the UPL... we may have already + * processed some of the UPL, so abort it + * from the current working offset to the + * end of the UPL + */ + ubc_upl_abort_range(upl, + offset, + ap->a_size - offset, + UPL_ABORT_FREE_ON_EMPTY); + goto pageout_done; + } + tooklock = 1; + } + end_of_range = f_offset + xsize - 1; - end_of_range = ap->a_f_offset + ap->a_size - 1; - - if (end_of_range >= filesize) { - end_of_range = (off_t)(filesize - 1); + if (end_of_range >= filesize) { + end_of_range = (off_t)(filesize - 1); + } + if (f_offset < filesize) { + rl_remove(f_offset, end_of_range, &fp->ff_invalidranges); + cp->c_flag |= C_MODIFIED; /* leof is dirty */ + } + if (tooklock) { + hfs_unlock(cp); + } + } + if ((error = cluster_pageout(vp, upl, offset, f_offset, + xsize, filesize, a_flags))) { + if (error_ret == 0) + error_ret = error; + } + f_offset += xsize; + offset += xsize; + isize -= xsize; + pg_index += num_of_pages; } - if (ap->a_f_offset < filesize) { - rl_remove(ap->a_f_offset, end_of_range, &fp->ff_invalidranges); - cp->c_flag |= C_MODIFIED; /* leof is dirty */ + /* capture errnos bubbled out of cluster_pageout if they occurred */ + if (error_ret != 0) { + retval = error_ret; } + } /* end block for v2 pageout behavior */ + else { + if (!vnode_isswap(vp)) { + off_t end_of_range; + int tooklock = 0; + + if (cp->c_lockowner != current_thread()) { + if ((retval = hfs_lock(cp, HFS_EXCLUSIVE_LOCK))) { + if (!(a_flags & UPL_NOCOMMIT)) { + ubc_upl_abort_range(upl, + a_pl_offset, + ap->a_size, + UPL_ABORT_FREE_ON_EMPTY); + } + goto pageout_done; + } + tooklock = 1; + } + end_of_range = ap->a_f_offset + ap->a_size - 1; + + if (end_of_range >= filesize) { + end_of_range = (off_t)(filesize - 1); + } + if (ap->a_f_offset < filesize) { + rl_remove(ap->a_f_offset, end_of_range, &fp->ff_invalidranges); + cp->c_flag |= C_MODIFIED; /* leof is dirty */ + } - if (tooklock) { - hfs_unlock(cp); + if (tooklock) { + hfs_unlock(cp); + } } + /* + * just call cluster_pageout for old pre-v2 behavior + */ + retval = cluster_pageout(vp, upl, a_pl_offset, ap->a_f_offset, + ap->a_size, filesize, a_flags); } - retval = cluster_pageout(vp, ap->a_pl, ap->a_pl_offset, ap->a_f_offset, - ap->a_size, filesize, ap->a_flags); - /* - * If data was written, and setuid or setgid bits are set and - * this process is not the superuser then clear the setuid and - * setgid bits as a precaution against tampering. + * If data was written, update the modification time of the file. + * If setuid or setgid bits are set and this process is not the + * superuser then clear the setuid and setgid bits as a precaution + * against tampering. */ - if ((retval == 0) && - (cp->c_mode & (S_ISUID | S_ISGID)) && - (vfs_context_suser(ap->a_context) != 0)) { - hfs_lock(cp, HFS_FORCE_LOCK); - cp->c_mode &= ~(S_ISUID | S_ISGID); + if (retval == 0) { + cp->c_touch_modtime = TRUE; cp->c_touch_chgtime = TRUE; - hfs_unlock(cp); + if ((cp->c_mode & (S_ISUID | S_ISGID)) && + (vfs_context_suser(ap->a_context) != 0)) { + hfs_lock(cp, HFS_FORCE_LOCK); + cp->c_mode &= ~(S_ISUID | S_ISGID); + hfs_unlock(cp); + } + } + +pageout_done: + if (is_pageoutv2) { + /* release truncate lock (shared) */ + hfs_unlock_truncate(cp, 0); } return (retval); } @@ -3267,6 +3775,7 @@ hfs_relocate(struct vnode *vp, u_int32_t blockHint, kauth_cred_t cred, } else if ((eflags & kEFMetadataMask) && ((((u_int64_t)sector_b * hfsmp->hfs_logical_block_size) / blksize) > hfsmp->hfs_metazone_end)) { +#if 0 const char * filestr; char emptystr = '\0'; @@ -3277,7 +3786,7 @@ hfs_relocate(struct vnode *vp, u_int32_t blockHint, kauth_cred_t cred, } else { filestr = &emptystr; } - printf("hfs_relocate: %s didn't move into MDZ (%d blks)\n", filestr, fp->ff_blocks); +#endif retval = ENOSPC; goto restore; } @@ -3425,16 +3934,14 @@ static int hfs_clonefile(struct vnode *vp, int blkstart, int blkcnt, int blksize) { caddr_t bufp; - size_t writebase; size_t bufsize; size_t copysize; size_t iosize; - off_t filesize; size_t offset; + off_t writebase; uio_t auio; int error = 0; - filesize = VTOF(vp)->ff_blocks * blksize; /* virtual file size */ writebase = blkstart * blksize; copysize = blkcnt * blksize; iosize = bufsize = MIN(copysize, 128 * 1024); @@ -3445,12 +3952,12 @@ hfs_clonefile(struct vnode *vp, int blkstart, int blkcnt, int blksize) } hfs_unlock(VTOC(vp)); - auio = uio_create(1, 0, UIO_SYSSPACE32, UIO_READ); + auio = uio_create(1, 0, UIO_SYSSPACE, UIO_READ); while (offset < copysize) { iosize = MIN(copysize - offset, iosize); - uio_reset(auio, offset, UIO_SYSSPACE32, UIO_READ); + uio_reset(auio, offset, UIO_SYSSPACE, UIO_READ); uio_addiov(auio, (uintptr_t)bufp, iosize); error = cluster_read(vp, auio, copysize, IO_NOCACHE); @@ -3459,16 +3966,16 @@ hfs_clonefile(struct vnode *vp, int blkstart, int blkcnt, int blksize) break; } if (uio_resid(auio) != 0) { - printf("clonedata: cluster_read: uio_resid = %lld\n", uio_resid(auio)); + printf("hfs_clonefile: cluster_read: uio_resid = %lld\n", uio_resid(auio)); error = EIO; break; } - uio_reset(auio, writebase + offset, UIO_SYSSPACE32, UIO_WRITE); + uio_reset(auio, writebase + offset, UIO_SYSSPACE, UIO_WRITE); uio_addiov(auio, (uintptr_t)bufp, iosize); - error = cluster_write(vp, auio, filesize + offset, - filesize + offset + iosize, + error = cluster_write(vp, auio, writebase + offset, + writebase + offset + iosize, uio_offset(auio), 0, IO_NOCACHE | IO_SYNC); if (error) { printf("hfs_clonefile: cluster_write failed - %d\n", error); @@ -3483,11 +3990,25 @@ hfs_clonefile(struct vnode *vp, int blkstart, int blkcnt, int blksize) } uio_free(auio); - /* - * No need to call ubc_sync_range or hfs_invalbuf - * since the file was copied using IO_NOCACHE. - */ - + if ((blksize & PAGE_MASK)) { + /* + * since the copy may not have started on a PAGE + * boundary (or may not have ended on one), we + * may have pages left in the cache since NOCACHE + * will let partially written pages linger... + * lets just flush the entire range to make sure + * we don't have any pages left that are beyond + * (or intersect) the real LEOF of this file + */ + ubc_msync(vp, writebase, writebase + offset, NULL, UBC_INVALIDATE | UBC_PUSHDIRTY); + } else { + /* + * No need to call ubc_sync_range or hfs_invalbuf + * since the file was copied using IO_NOCACHE and + * the copy was done starting and ending on a page + * boundary in the file. + */ + } kmem_free(kernel_map, (vm_offset_t)bufp, bufsize); hfs_lock(VTOC(vp), HFS_FORCE_LOCK); diff --git a/bsd/hfs/hfs_search.c b/bsd/hfs/hfs_search.c index 5c48b3282..6a8a8b74f 100644 --- a/bsd/hfs/hfs_search.c +++ b/bsd/hfs/hfs_search.c @@ -82,10 +82,10 @@ struct fileInfoSpec struct searchinfospec { u_char name[kHFSPlusMaxFileNameBytes]; - u_long nameLength; + u_int32_t nameLength; char attributes; // see IM:Files 2-100 - u_long nodeID; - u_long parentDirID; + u_int32_t nodeID; + u_int32_t parentDirID; struct timespec creationDate; struct timespec modificationDate; struct timespec changeDate; @@ -104,7 +104,7 @@ static void ResolveHardlink(struct hfsmount *hfsmp, HFSPlusCatalogFile *recp); static int UnpackSearchAttributeBlock(struct hfsmount *hfsmp, struct attrlist *alist, - searchinfospec_t *searchInfo, void *attributeBuffer); + searchinfospec_t *searchInfo, void *attributeBuffer, int firstblock); static int CheckCriteria( ExtendedVCB *vcb, u_long searchBits, @@ -119,7 +119,7 @@ static int CheckAccess(ExtendedVCB *vcb, u_long searchBits, CatalogKey *key, str static int InsertMatch(struct hfsmount *hfsmp, uio_t a_uio, CatalogRecord *rec, CatalogKey *key, struct attrlist *returnAttrList, void *attributesBuffer, void *variableBuffer, - u_long * nummatches ); + uint32_t * nummatches ); static Boolean CompareRange(u_long val, u_long low, u_long high); static Boolean CompareWideRange(u_int64_t val, u_int64_t low, u_int64_t high); @@ -179,10 +179,10 @@ hfs_vnop_search(ap) FCB * catalogFCB; searchinfospec_t searchInfo1; searchinfospec_t searchInfo2; - void *attributesBuffer; + void *attributesBuffer = NULL; void *variableBuffer; - u_long fixedBlockSize; - u_long eachReturnBufferSize; + u_int32_t fixedBlockSize; + u_int32_t eachReturnBufferSize; struct proc *p = current_proc(); int err = E_NONE; int isHFSPlus; @@ -254,10 +254,10 @@ hfs_vnop_search(ap) /* UnPack the search boundries, searchInfo1, searchInfo2 */ err = UnpackSearchAttributeBlock(hfsmp, ap->a_searchattrs, - &searchInfo1, ap->a_searchparams1); + &searchInfo1, ap->a_searchparams1, 1); if (err) return err; err = UnpackSearchAttributeBlock(hfsmp, ap->a_searchattrs, - &searchInfo2, ap->a_searchparams2); + &searchInfo2, ap->a_searchparams2, 0); if (err) return err; //shadow search bits if 64-bit file/parent ids are used @@ -274,6 +274,10 @@ hfs_vnop_search(ap) eachReturnBufferSize += kHFSPlusMaxFileNameBytes + 1; MALLOC( attributesBuffer, void *, eachReturnBufferSize, M_TEMP, M_WAITOK ); + if (attributesBuffer == NULL) { + err = ENOMEM; + goto ExitThisRoutine; + } variableBuffer = (void*)((char*) attributesBuffer + fixedBlockSize); // XXXdbg - have to lock the user's buffer so we don't fault @@ -303,7 +307,7 @@ hfs_vnop_search(ap) (void) hfs_fsync(vcb->catalogRefNum, MNT_WAIT, 0, p); if (hfsmp->jnl) { hfs_systemfile_unlock(hfsmp, lockflags); - journal_flush(hfsmp->jnl); + hfs_journal_flush(hfsmp); lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG, HFS_SHARED_LOCK); } @@ -405,7 +409,8 @@ hfs_vnop_search(ap) } ExitThisRoutine: - FREE( attributesBuffer, M_TEMP ); + if (attributesBuffer) + FREE(attributesBuffer, M_TEMP); if (hfsmp->jnl && user_start) { vsunlock(user_start, user_len, TRUE); @@ -518,30 +523,6 @@ ComparePartialPascalName ( register ConstStr31Param str, register ConstStr31Para } -// -// Determine if a name is "inappropriate" where the definition -// of "inappropriate" is up to higher level execs. Currently -// that's limited to /System. -// -static int -is_inappropriate_name(const char *name, int len) -{ - const char *bad_names[] = { "System" }; - int bad_len[] = { 6 }; - int i; - - for(i=0; i < (int) (sizeof(bad_names) / sizeof(bad_names[0])); i++) { - if (len == bad_len[i] && strncmp(name, bad_names[i], strlen(bad_names[i]) + 1) == 0) { - return 1; - } - } - - // if we get here, no name matched - return 0; -} - - - /* * Check to see if caller has access rights to this item */ @@ -594,7 +575,7 @@ CheckAccess(ExtendedVCB *theVCBPtr, u_long searchBits, CatalogKey *theKeyPtr, st if ( searchBits & SRCHFS_SKIPINAPPROPRIATE ) { if ( cp->c_parentcnid == kRootDirID && cp->c_desc.cd_nameptr != NULL && - is_inappropriate_name((const char *)cp->c_desc.cd_nameptr, cp->c_desc.cd_namelen) ) { + vn_searchfs_inappropriate_name((const char *)cp->c_desc.cd_nameptr, cp->c_desc.cd_namelen) ) { myResult = 0; goto ExitThisRoutine; } @@ -794,7 +775,25 @@ CheckCriteria( ExtendedVCB *vcb, } else if ((attrList->fileattr & ATTR_FILE_VALIDMASK) != 0) { searchAttributes = attrList->fileattr; - + +#if HFS_COMPRESSION + if ( c_attr.ca_flags & UF_COMPRESSED ) { + /* for compressed files, set the data length to the uncompressed data size */ + if (( searchAttributes & ATTR_FILE_DATALENGTH ) || + ( searchAttributes & ATTR_FILE_DATAALLOCSIZE ) ) { + if ( 0 == hfs_uncompressed_size_of_compressed_file(vcb, NULL, c_attr.ca_fileid, &datafork.cf_size, 1) ) { /* 1 == don't take the cnode lock */ + datafork.cf_blocks = rsrcfork.cf_blocks; + } + } + /* treat compressed files as if their resource fork is empty */ + if (( searchAttributes & ATTR_FILE_RSRCLENGTH ) || + ( searchAttributes & ATTR_FILE_RSRCALLOCSIZE ) ) { + rsrcfork.cf_size = 0; + rsrcfork.cf_blocks = 0; + } + } +#endif /* HFS_COMPRESSION */ + /* File logical length (data fork) */ if ( searchAttributes & ATTR_FILE_DATALENGTH ) { matched = CompareWideRange( @@ -1003,12 +1002,12 @@ CheckCriteria( ExtendedVCB *vcb, static int InsertMatch(struct hfsmount *hfsmp, uio_t a_uio, CatalogRecord *rec, CatalogKey *key, struct attrlist *returnAttrList, - void *attributesBuffer, void *variableBuffer, u_long * nummatches) + void *attributesBuffer, void *variableBuffer, uint32_t * nummatches) { int err; void *rovingAttributesBuffer; void *rovingVariableBuffer; - u_long packedBufferSize; + long packedBufferSize; struct attrblock attrblk; struct cat_desc c_desc; struct cat_attr c_attr; @@ -1017,7 +1016,7 @@ InsertMatch(struct hfsmount *hfsmp, uio_t a_uio, CatalogRecord *rec, bzero(&c_desc, sizeof(c_desc)); bzero(&c_attr, sizeof(c_attr)); - rovingAttributesBuffer = (char*)attributesBuffer + sizeof(u_long); /* Reserve space for length field */ + rovingAttributesBuffer = (char*)attributesBuffer + sizeof(u_int32_t); /* Reserve space for length field */ rovingVariableBuffer = variableBuffer; /* Convert catalog record into cat_attr format. */ @@ -1055,7 +1054,7 @@ InsertMatch(struct hfsmount *hfsmp, uio_t a_uio, CatalogRecord *rec, attrblk.ab_blocksize = 0; attrblk.ab_context = vfs_context_current(); - hfs_packattrblk(&attrblk, hfsmp, NULL, &c_desc, &c_attr, &datafork, &rsrcfork, current_proc()); + hfs_packattrblk(&attrblk, hfsmp, NULL, &c_desc, &c_attr, &datafork, &rsrcfork, vfs_context_current()); packedBufferSize = (char*)rovingVariableBuffer - (char*)attributesBuffer; @@ -1064,7 +1063,7 @@ InsertMatch(struct hfsmount *hfsmp, uio_t a_uio, CatalogRecord *rec, (* nummatches)++; - *((u_long *)attributesBuffer) = packedBufferSize; /* Store length of fixed + var block */ + *((u_int32_t *)attributesBuffer) = packedBufferSize; /* Store length of fixed + var block */ err = uiomove( (caddr_t)attributesBuffer, packedBufferSize, a_uio ); /* XXX should be packedBufferSize */ exit: @@ -1075,10 +1074,11 @@ InsertMatch(struct hfsmount *hfsmp, uio_t a_uio, CatalogRecord *rec, static int -UnpackSearchAttributeBlock( struct hfsmount *hfsmp, struct attrlist *alist, searchinfospec_t *searchInfo, void *attributeBuffer ) +UnpackSearchAttributeBlock( struct hfsmount *hfsmp, struct attrlist *alist, + searchinfospec_t *searchInfo, void *attributeBuffer, int firstblock) { attrgroup_t a; - u_long bufferSize; + u_int32_t bufferSize; boolean_t is_64_bit; DBG_ASSERT(searchInfo != NULL); @@ -1097,42 +1097,44 @@ UnpackSearchAttributeBlock( struct hfsmount *hfsmp, struct attrlist *alist, sear a = alist->commonattr; if ( a != 0 ) { if ( a & ATTR_CMN_NAME ) { - char *s; - u_int32_t len; - - s = (char*) attributeBuffer + ((attrreference_t *) attributeBuffer)->attr_dataoffset; - len = ((attrreference_t *) attributeBuffer)->attr_length; + if (firstblock) { + /* Only use the attrreference_t for the first searchparams */ + char *s; + u_int32_t len; - if (len > sizeof(searchInfo->name)) - return (EINVAL); + s = (char*) attributeBuffer + ((attrreference_t *) attributeBuffer)->attr_dataoffset; + len = ((attrreference_t *) attributeBuffer)->attr_length; - if (hfsmp->hfs_flags & HFS_STANDARD) { - /* Convert name to pascal string to match HFS B-Tree names */ + if (len > sizeof(searchInfo->name)) + return (EINVAL); - if (len > 0) { - if (utf8_to_hfs(HFSTOVCB(hfsmp), len-1, (u_char *)s, (u_char*)searchInfo->name) != 0) - return (EINVAL); - - searchInfo->nameLength = searchInfo->name[0]; - } else { - searchInfo->name[0] = searchInfo->nameLength = 0; - } - attributeBuffer = (attrreference_t *)attributeBuffer + 1; - } else { - size_t ucslen; - /* Convert name to Unicode to match HFS Plus B-Tree names */ + if (hfsmp->hfs_flags & HFS_STANDARD) { + /* Convert name to pascal string to match HFS B-Tree names */ - if (len > 0) { - if (utf8_decodestr((u_int8_t *)s, len-1, (UniChar*)searchInfo->name, &ucslen, - sizeof(searchInfo->name), ':', UTF_DECOMPOSED | UTF_ESCAPE_ILLEGAL)) - return (EINVAL); + if (len > 0) { + if (utf8_to_hfs(HFSTOVCB(hfsmp), len-1, (u_char *)s, (u_char*)searchInfo->name) != 0) + return (EINVAL); - searchInfo->nameLength = ucslen / sizeof(UniChar); + searchInfo->nameLength = searchInfo->name[0]; + } else { + searchInfo->name[0] = searchInfo->nameLength = 0; + } } else { - searchInfo->nameLength = 0; + size_t ucslen; + /* Convert name to Unicode to match HFS Plus B-Tree names */ + + if (len > 0) { + if (utf8_decodestr((u_int8_t *)s, len-1, (UniChar*)searchInfo->name, &ucslen, + sizeof(searchInfo->name), ':', UTF_DECOMPOSED | UTF_ESCAPE_ILLEGAL)) + return (EINVAL); + + searchInfo->nameLength = ucslen / sizeof(UniChar); + } else { + searchInfo->nameLength = 0; + } } - attributeBuffer = (attrreference_t *)attributeBuffer + 1; } + attributeBuffer = (attrreference_t*) attributeBuffer +1; } if ( a & ATTR_CMN_OBJID ) { searchInfo->nodeID = ((fsobj_id_t *) attributeBuffer)->fid_objno; /* ignore fid_generation */ @@ -1145,67 +1147,82 @@ UnpackSearchAttributeBlock( struct hfsmount *hfsmp, struct attrlist *alist, sear if ( a & ATTR_CMN_CRTIME ) { if (is_64_bit) { - struct user_timespec tmp; - tmp = *((struct user_timespec *)attributeBuffer); + struct user64_timespec tmp; + tmp = *((struct user64_timespec *)attributeBuffer); searchInfo->creationDate.tv_sec = (time_t)tmp.tv_sec; searchInfo->creationDate.tv_nsec = tmp.tv_nsec; - attributeBuffer = (struct user_timespec *)attributeBuffer + 1; + attributeBuffer = (struct user64_timespec *)attributeBuffer + 1; } else { - searchInfo->creationDate = *((struct timespec *)attributeBuffer); - attributeBuffer = (struct timespec *)attributeBuffer + 1; + struct user32_timespec tmp; + tmp = *((struct user32_timespec *)attributeBuffer); + searchInfo->creationDate.tv_sec = (time_t)tmp.tv_sec; + searchInfo->creationDate.tv_nsec = tmp.tv_nsec; + attributeBuffer = (struct user32_timespec *)attributeBuffer + 1; } } if ( a & ATTR_CMN_MODTIME ) { if (is_64_bit) { - struct user_timespec tmp; - tmp = *((struct user_timespec *)attributeBuffer); + struct user64_timespec tmp; + tmp = *((struct user64_timespec *)attributeBuffer); searchInfo->modificationDate.tv_sec = (time_t)tmp.tv_sec; searchInfo->modificationDate.tv_nsec = tmp.tv_nsec; - attributeBuffer = (struct user_timespec *)attributeBuffer + 1; + attributeBuffer = (struct user64_timespec *)attributeBuffer + 1; } else { - searchInfo->modificationDate = *((struct timespec *)attributeBuffer); - attributeBuffer = (struct timespec *)attributeBuffer + 1; + struct user32_timespec tmp; + tmp = *((struct user32_timespec *)attributeBuffer); + searchInfo->modificationDate.tv_sec = (time_t)tmp.tv_sec; + searchInfo->modificationDate.tv_nsec = tmp.tv_nsec; + attributeBuffer = (struct user32_timespec *)attributeBuffer + 1; } } if ( a & ATTR_CMN_CHGTIME ) { if (is_64_bit) { - struct user_timespec tmp; - tmp = *((struct user_timespec *)attributeBuffer); + struct user64_timespec tmp; + tmp = *((struct user64_timespec *)attributeBuffer); searchInfo->changeDate.tv_sec = (time_t)tmp.tv_sec; searchInfo->changeDate.tv_nsec = tmp.tv_nsec; - attributeBuffer = (struct user_timespec *)attributeBuffer + 1; + attributeBuffer = (struct user64_timespec *)attributeBuffer + 1; } else { - searchInfo->changeDate = *((struct timespec *)attributeBuffer); - attributeBuffer = (struct timespec *)attributeBuffer + 1; + struct user32_timespec tmp; + tmp = *((struct user32_timespec *)attributeBuffer); + searchInfo->changeDate.tv_sec = (time_t)tmp.tv_sec; + searchInfo->changeDate.tv_nsec = tmp.tv_nsec; + attributeBuffer = (struct user32_timespec *)attributeBuffer + 1; } } if ( a & ATTR_CMN_ACCTIME ) { if (is_64_bit) { - struct user_timespec tmp; - tmp = *((struct user_timespec *)attributeBuffer); + struct user64_timespec tmp; + tmp = *((struct user64_timespec *)attributeBuffer); searchInfo->accessDate.tv_sec = (time_t)tmp.tv_sec; searchInfo->accessDate.tv_nsec = tmp.tv_nsec; - attributeBuffer = (struct user_timespec *)attributeBuffer + 1; + attributeBuffer = (struct user64_timespec *)attributeBuffer + 1; } else { - searchInfo->accessDate = *((struct timespec *)attributeBuffer); - attributeBuffer = (struct timespec *)attributeBuffer + 1; + struct user32_timespec tmp; + tmp = *((struct user32_timespec *)attributeBuffer); + searchInfo->accessDate.tv_sec = (time_t)tmp.tv_sec; + searchInfo->accessDate.tv_nsec = tmp.tv_nsec; + attributeBuffer = (struct user32_timespec *)attributeBuffer + 1; } } if ( a & ATTR_CMN_BKUPTIME ) { if (is_64_bit) { - struct user_timespec tmp; - tmp = *((struct user_timespec *)attributeBuffer); + struct user64_timespec tmp; + tmp = *((struct user64_timespec *)attributeBuffer); searchInfo->lastBackupDate.tv_sec = (time_t)tmp.tv_sec; searchInfo->lastBackupDate.tv_nsec = tmp.tv_nsec; - attributeBuffer = (struct user_timespec *)attributeBuffer + 1; + attributeBuffer = (struct user64_timespec *)attributeBuffer + 1; } else { - searchInfo->lastBackupDate = *((struct timespec *)attributeBuffer); - attributeBuffer = (struct timespec *)attributeBuffer + 1; + struct user32_timespec tmp; + tmp = *((struct user32_timespec *)attributeBuffer); + searchInfo->lastBackupDate.tv_sec = (time_t)tmp.tv_sec; + searchInfo->lastBackupDate.tv_nsec = tmp.tv_nsec; + attributeBuffer = (struct user32_timespec *)attributeBuffer + 1; } } if ( a & ATTR_CMN_FNDRINFO ) { diff --git a/bsd/hfs/hfs_vfsops.c b/bsd/hfs/hfs_vfsops.c index 6f5c3eb53..8148697b2 100644 --- a/bsd/hfs/hfs_vfsops.c +++ b/bsd/hfs/hfs_vfsops.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 1999-2008 Apple Inc. All rights reserved. + * Copyright (c) 1999-2009 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -95,6 +95,9 @@ #include #include +#include +#include + #include "hfs.h" #include "hfs_catalog.h" #include "hfs_cnode.h" @@ -118,9 +121,10 @@ lck_grp_t * hfs_mutex_group; lck_grp_t * hfs_rwlock_group; extern struct vnodeopv_desc hfs_vnodeop_opv_desc; -/* not static so we can re-use in hfs_readwrite.c for build_path */ -int hfs_vfs_vget(struct mount *mp, ino64_t ino, struct vnode **vpp, vfs_context_t context); +extern struct vnodeopv_desc hfs_std_vnodeop_opv_desc; +/* not static so we can re-use in hfs_readwrite.c for build_path calls */ +int hfs_vfs_vget(struct mount *mp, ino64_t ino, struct vnode **vpp, vfs_context_t context); static int hfs_changefs(struct mount *mp, struct hfs_mount_args *args); static int hfs_fhtovp(struct mount *mp, int fhlen, unsigned char *fhp, struct vnode **vpp, vfs_context_t context); @@ -141,10 +145,10 @@ static int hfs_sysctl(int *name, u_int namelen, user_addr_t oldp, size_t *oldlen static int hfs_unmount(struct mount *mp, int mntflags, vfs_context_t context); static int hfs_vptofh(struct vnode *vp, int *fhlenp, unsigned char *fhp, vfs_context_t context); -static int hfs_reclaimspace(struct hfsmount *hfsmp, u_long startblk, u_long reclaimblks, vfs_context_t context); +static int hfs_reclaimspace(struct hfsmount *hfsmp, u_int32_t startblk, u_int32_t reclaimblks, vfs_context_t context); static int hfs_overlapped_overflow_extents(struct hfsmount *hfsmp, u_int32_t startblk, u_int32_t catblks, u_int32_t fileID, int rsrcfork); -static int hfs_journal_replay(const char *devnode, vfs_context_t context); +static int hfs_journal_replay(vnode_t devvp, vfs_context_t context); /* @@ -160,8 +164,6 @@ hfs_mountroot(mount_t mp, vnode_t rvp, vfs_context_t context) struct vfsstatfs *vfsp; int error; - hfs_chashinit_finish(); - if ((error = hfs_mountfs(rvp, mp, NULL, 0, context))) return (error); @@ -333,7 +335,12 @@ hfs_mount(struct mount *mp, vnode_t devvp, user_addr_t data, vfs_context_t conte } - /* Only clear HFS_READ_ONLY after a successfull write */ + /* See if we need to erase unused Catalog nodes due to . */ + retval = hfs_erase_unused_nodes(hfsmp); + if (retval != E_NONE) + goto out; + + /* Only clear HFS_READ_ONLY after a successful write */ hfsmp->hfs_flags &= ~HFS_READ_ONLY; /* If this mount point was downgraded from read-write @@ -378,8 +385,6 @@ hfs_mount(struct mount *mp, vnode_t devvp, user_addr_t data, vfs_context_t conte /* Set the mount flag to indicate that we support volfs */ vfs_setflags(mp, (u_int64_t)((unsigned int)MNT_DOVOLFS)); - hfs_chashinit_finish(); - retval = hfs_mountfs(devvp, mp, &args, 0, context); } out: @@ -462,7 +467,7 @@ hfs_changefs(struct mount *mp, struct hfs_mount_args *args) ExtendedVCB *vcb; hfs_to_unicode_func_t get_unicode_func; unicode_to_hfs_func_t get_hfsname_func; - u_long old_encoding = 0; + u_int32_t old_encoding = 0; struct hfs_changefs_cargs cargs; u_int32_t mount_flags; @@ -527,7 +532,7 @@ hfs_changefs(struct mount *mp, struct hfs_mount_args *args) /* Change the hfs encoding value (hfs only) */ if ((vcb->vcbSigWord == kHFSSigWord) && - (args->hfs_encoding != (u_long)VNOVAL) && + (args->hfs_encoding != (u_int32_t)VNOVAL) && (hfsmp->hfs_encoding != args->hfs_encoding)) { retval = hfs_getconverter(args->hfs_encoding, &get_unicode_func, &get_hfsname_func); @@ -827,8 +832,7 @@ hfs_reload(struct mount *mountp) return (0); } -int hfs_last_io_wait_time = 125000; -SYSCTL_INT (_kern, OID_AUTO, hfs_last_io_wait_time, CTLFLAG_RW, &hfs_last_io_wait_time, 0, "number of usecs to wait after an i/o before syncing ejectable media"); + static void hfs_syncer(void *arg0, void *unused) @@ -836,76 +840,105 @@ hfs_syncer(void *arg0, void *unused) #pragma unused(unused) struct hfsmount *hfsmp = arg0; - uint32_t secs, usecs, delay = HFS_META_DELAY; + clock_sec_t secs; + clock_usec_t usecs; + uint32_t delay = HFS_META_DELAY; uint64_t now; - struct timeval nowtv, last_io; + static int no_max=1; clock_get_calendar_microtime(&secs, &usecs); - now = ((uint64_t)secs * 1000000LL) + usecs; - // - // If we have put off the last sync for more than - // 5 seconds, force it so that we don't let too - // much i/o queue up (since flushing the journal - // causes the i/o queue to drain) - // - if ((now - hfsmp->hfs_last_sync_time) >= 5000000LL) { - goto doit; - } + now = ((uint64_t)secs * 1000000ULL) + (uint64_t)usecs; // - // Find out when the last i/o was done to this device (read or write). + // If the amount of pending writes is more than our limit, wait + // for 2/3 of it to drain and then flush the journal. // - throttle_info_get_last_io_time(hfsmp->hfs_mp, &last_io); - microuptime(&nowtv); - timevalsub(&nowtv, &last_io); + if (hfsmp->hfs_mp->mnt_pending_write_size > hfsmp->hfs_max_pending_io) { + int counter=0; + uint64_t pending_io, start, rate; + + no_max = 0; - // - // If the last i/o was too recent, defer this sync until later. - // The limit chosen (125 milli-seconds) was picked based on - // some experiments copying data to an SD card and seems to - // prevent us from issuing too many syncs. - // - if (nowtv.tv_sec >= 0 && nowtv.tv_usec > 0 && nowtv.tv_usec < hfs_last_io_wait_time) { - delay /= 2; - goto resched; - } - - // - // If there's pending i/o, also skip the sync. - // - if (hfsmp->hfs_devvp && hfsmp->hfs_devvp->v_numoutput > 0) { - goto resched; - } + hfs_start_transaction(hfsmp); // so we hold off any new i/o's - - // - // Only flush the journal if we have not sync'ed recently - // and the last sync request time was more than 100 milli - // seconds ago and there is no one in the middle of a - // transaction right now. Else we defer the sync and - // reschedule it for later. - // - if ( ((now - hfsmp->hfs_last_sync_time) >= 100000LL) - && ((now - hfsmp->hfs_last_sync_request_time) >= 100000LL) - && (hfsmp->hfs_active_threads == 0) - && (hfsmp->hfs_global_lock_nesting == 0)) { + pending_io = hfsmp->hfs_mp->mnt_pending_write_size; + + clock_get_calendar_microtime(&secs, &usecs); + start = ((uint64_t)secs * 1000000ULL) + (uint64_t)usecs; + + while(hfsmp->hfs_mp->mnt_pending_write_size > (pending_io/3) && counter++ < 500) { + tsleep((caddr_t)hfsmp, PRIBIO, "hfs-wait-for-io-to-drain", 10); + } + + if (counter >= 500) { + printf("hfs: timed out waiting for io to drain (%lld)\n", (int64_t)hfsmp->hfs_mp->mnt_pending_write_size); + } + + if (hfsmp->jnl) { + journal_flush(hfsmp->jnl); + } else { + hfs_sync(hfsmp->hfs_mp, MNT_WAIT, vfs_context_kernel()); + } + + clock_get_calendar_microtime(&secs, &usecs); + now = ((uint64_t)secs * 1000000ULL) + (uint64_t)usecs; + hfsmp->hfs_last_sync_time = now; + rate = ((pending_io * 1000000ULL) / (now - start)); // yields bytes per second - doit: - OSAddAtomic(1, (SInt32 *)&hfsmp->hfs_active_threads); + hfs_end_transaction(hfsmp); + + // + // If a reasonable amount of time elapsed then check the + // i/o rate. If it's taking less than 1 second or more + // than 2 seconds, adjust hfs_max_pending_io so that we + // will allow about 1.5 seconds of i/o to queue up. + // + if ((now - start) >= 300000) { + uint64_t scale = (pending_io * 100) / rate; + + if (scale < 100 || scale > 200) { + // set it so that it should take about 1.5 seconds to drain + hfsmp->hfs_max_pending_io = (rate * 150ULL) / 100ULL; + } + } + + } else if ( ((now - hfsmp->hfs_last_sync_time) >= 5000000ULL) + || (((now - hfsmp->hfs_last_sync_time) >= 100000LL) + && ((now - hfsmp->hfs_last_sync_request_time) >= 100000LL) + && (hfsmp->hfs_active_threads == 0) + && (hfsmp->hfs_global_lock_nesting == 0))) { + + // + // Flush the journal if more than 5 seconds elapsed since + // the last sync OR we have not sync'ed recently and the + // last sync request time was more than 100 milliseconds + // ago and no one is in the middle of a transaction right + // now. Else we defer the sync and reschedule it. + // if (hfsmp->jnl) { + lck_rw_lock_shared(&hfsmp->hfs_global_lock); + journal_flush(hfsmp->jnl); + + lck_rw_unlock_shared(&hfsmp->hfs_global_lock); + } else { + hfs_sync(hfsmp->hfs_mp, MNT_WAIT, vfs_context_kernel()); } - OSAddAtomic(-1, (SInt32 *)&hfsmp->hfs_active_threads); - + clock_get_calendar_microtime(&secs, &usecs); - hfsmp->hfs_last_sync_time = ((int64_t)secs * 1000000) + usecs; + now = ((uint64_t)secs * 1000000ULL) + (uint64_t)usecs; + hfsmp->hfs_last_sync_time = now; } else if (hfsmp->hfs_active_threads == 0) { uint64_t deadline; - resched: clock_interval_to_deadline(delay, HFS_MILLISEC_SCALE, &deadline); thread_call_enter_delayed(hfsmp->hfs_syncer, deadline); + + // note: we intentionally return early here and do not + // decrement the sync_scheduled and sync_incomplete + // variables because we rescheduled the timer. + return; } @@ -919,6 +952,7 @@ hfs_syncer(void *arg0, void *unused) wakeup((caddr_t)&hfsmp->hfs_sync_incomplete); } + extern int IOBSDIsMediaEjectable( const char *cdev_name ); /* @@ -930,10 +964,10 @@ hfs_mountfs(struct vnode *devvp, struct mount *mp, struct hfs_mount_args *args, { struct proc *p = vfs_context_proc(context); int retval = E_NONE; - struct hfsmount *hfsmp; + struct hfsmount *hfsmp = NULL; struct buf *bp; dev_t dev; - HFSMasterDirectoryBlock *mdbp; + HFSMasterDirectoryBlock *mdbp = NULL; int ronly; #if QUOTA int i; @@ -950,16 +984,16 @@ hfs_mountfs(struct vnode *devvp, struct mount *mp, struct hfs_mount_args *args, int isvirtual = 0; int isroot = 0; + if (args == NULL) { + /* only hfs_mountroot passes us NULL as the 'args' argument */ + isroot = 1; + } + ronly = vfs_isrdonly(mp); dev = vnode_specrdev(devvp); cred = p ? vfs_context_ucred(context) : NOCRED; mntwrapper = 0; - if (args == NULL) { - /* only hfs_mountroot passes us NULL as the 'args' argument */ - isroot = 1; - } - bp = NULL; hfsmp = NULL; mdbp = NULL; @@ -973,6 +1007,12 @@ hfs_mountfs(struct vnode *devvp, struct mount *mp, struct hfs_mount_args *args, retval = ENXIO; goto error_exit; } + if (log_blksize == 0 || log_blksize > 1024*1024*1024) { + printf("hfs: logical block size 0x%x looks bad. Not mounting.\n", log_blksize); + retval = ENXIO; + goto error_exit; + } + /* Get the physical block size. */ retval = VNOP_IOCTL(devvp, DKIOCGETPHYSICALBLOCKSIZE, (caddr_t)&phys_blksize, 0, context); if (retval) { @@ -985,6 +1025,12 @@ hfs_mountfs(struct vnode *devvp, struct mount *mp, struct hfs_mount_args *args, */ phys_blksize = log_blksize; } + if (phys_blksize == 0 || phys_blksize > 1024*1024*1024) { + printf("hfs: physical block size 0x%x looks bad. Not mounting.\n", phys_blksize); + retval = ENXIO; + goto error_exit; + } + /* Switch to 512 byte sectors (temporarily) */ if (log_blksize > 512) { u_int32_t size512 = 512; @@ -1022,6 +1068,15 @@ hfs_mountfs(struct vnode *devvp, struct mount *mp, struct hfs_mount_args *args, phys_blksize = log_blksize; } + /* + * The cluster layer is not currently prepared to deal with a logical + * block size larger than the system's page size. (It can handle + * blocks per page, but not multiple pages per block.) So limit the + * logical block size to the page size. + */ + if (log_blksize > PAGE_SIZE) + log_blksize = PAGE_SIZE; + /* Now switch to our preferred physical block size. */ if (log_blksize > 512) { if (VNOP_IOCTL(devvp, DKIOCSETBLOCKSIZE, (caddr_t)&log_blksize, FWRITE, context)) { @@ -1048,13 +1103,23 @@ hfs_mountfs(struct vnode *devvp, struct mount *mp, struct hfs_mount_args *args, goto error_exit; } MALLOC(mdbp, HFSMasterDirectoryBlock *, kMDBSize, M_TEMP, M_WAITOK); + if (mdbp == NULL) { + retval = ENOMEM; + goto error_exit; + } bcopy((char *)buf_dataptr(bp) + HFS_PRI_OFFSET(phys_blksize), mdbp, kMDBSize); buf_brelse(bp); bp = NULL; MALLOC(hfsmp, struct hfsmount *, sizeof(struct hfsmount), M_HFSMNT, M_WAITOK); + if (hfsmp == NULL) { + retval = ENOMEM; + goto error_exit; + } bzero(hfsmp, sizeof(struct hfsmount)); + hfs_chashinit_finish(hfsmp); + /* * Init the volume information structure */ @@ -1131,7 +1196,16 @@ hfs_mountfs(struct vnode *devvp, struct mount *mp, struct hfs_mount_args *args, if ((SWAP_BE16(mdbp->drSigWord) == kHFSSigWord) && (mntwrapper || (SWAP_BE16(mdbp->drEmbedSigWord) != kHFSPlusSigWord))) { - /* If only journal replay is requested, exit immediately */ + /* On 10.6 and beyond, non read-only mounts for HFS standard vols get rejected */ + if (vfs_isrdwr(mp)) { + retval = EROFS; + goto error_exit; + } + /* Treat it as if it's read-only and not writeable */ + hfsmp->hfs_flags |= HFS_READ_ONLY; + hfsmp->hfs_flags &= ~HFS_WRITEABLE_MEDIA; + + /* If only journal replay is requested, exit immediately */ if (journal_replay_only) { retval = 0; goto error_exit; @@ -1192,7 +1266,7 @@ hfs_mountfs(struct vnode *devvp, struct mount *mp, struct hfs_mount_args *args, * boundary. */ if ((embeddedOffset % log_blksize) != 0) { - printf("HFS Mount: embedded volume offset not" + printf("hfs_mountfs: embedded volume offset not" " a multiple of physical block size (%d);" " switching to 512\n", log_blksize); log_blksize = 512; @@ -1209,9 +1283,9 @@ hfs_mountfs(struct vnode *devvp, struct mount *mp, struct hfs_mount_args *args, /* Note: relative block count adjustment */ hfsmp->hfs_logical_block_count *= hfsmp->hfs_logical_block_size / log_blksize; - hfsmp->hfs_logical_block_size = log_blksize; - /* Update logical/physical block size */ + /* Update logical /physical block size */ + hfsmp->hfs_logical_block_size = log_blksize; hfsmp->hfs_physical_block_size = log_blksize; phys_blksize = log_blksize; hfsmp->hfs_log_per_phys = 1; @@ -1239,11 +1313,16 @@ hfs_mountfs(struct vnode *devvp, struct mount *mp, struct hfs_mount_args *args, /* * On inconsistent disks, do not allow read-write mount - * unless it is the boot volume being mounted. + * unless it is the boot volume being mounted. We also + * always want to replay the journal if the journal_replay_only + * flag is set because that will (most likely) get the + * disk into a consistent state before fsck_hfs starts + * looking at it. */ - if (!(vfs_flags(mp) & MNT_ROOTFS) && - (SWAP_BE32(vhp->attributes) & kHFSVolumeInconsistentMask) && - !(hfsmp->hfs_flags & HFS_READ_ONLY)) { + if ( !(vfs_flags(mp) & MNT_ROOTFS) + && (SWAP_BE32(vhp->attributes) & kHFSVolumeInconsistentMask) + && !journal_replay_only + && !(hfsmp->hfs_flags & HFS_READ_ONLY)) { retval = EINVAL; goto error_exit; } @@ -1274,9 +1353,17 @@ hfs_mountfs(struct vnode *devvp, struct mount *mp, struct hfs_mount_args *args, // if we're able to init the journal, mark the mount // point as journaled. // - if (hfs_early_journal_init(hfsmp, vhp, args, embeddedOffset, mdb_offset, mdbp, cred) == 0) { + if ((retval = hfs_early_journal_init(hfsmp, vhp, args, embeddedOffset, mdb_offset, mdbp, cred)) == 0) { vfs_setflags(mp, (u_int64_t)((unsigned int)MNT_JOURNALED)); } else { + if (retval == EROFS) { + // EROFS is a special error code that means the volume has an external + // journal which we couldn't find. in that case we do not want to + // rewrite the volume header - we'll just refuse to mount the volume. + retval = EINVAL; + goto error_exit; + } + // if the journal failed to open, then set the lastMountedVersion // to be "FSK!" which fsck_hfs will see and force the fsck instead // of just bailing out because the volume is journaled. @@ -1340,7 +1427,7 @@ hfs_mountfs(struct vnode *devvp, struct mount *mp, struct hfs_mount_args *args, * then retry with physical blocksize of 512. */ if ((retval == ENXIO) && (log_blksize > 512) && (log_blksize != minblksize)) { - printf("HFS Mount: could not use physical block size " + printf("hfs_mountfs: could not use physical block size " "(%d) switching to 512\n", log_blksize); log_blksize = 512; if (VNOP_IOCTL(devvp, DKIOCSETBLOCKSIZE, (caddr_t)&log_blksize, FWRITE, context)) { @@ -1357,7 +1444,7 @@ hfs_mountfs(struct vnode *devvp, struct mount *mp, struct hfs_mount_args *args, hfsmp->hfs_logical_block_size = log_blksize; hfsmp->hfs_log_per_phys = hfsmp->hfs_physical_block_size / log_blksize; - if (hfsmp->jnl) { + if (hfsmp->jnl && hfsmp->jvp == devvp) { // close and re-open this with the new block size journal_close(hfsmp->jnl); hfsmp->jnl = NULL; @@ -1426,7 +1513,7 @@ hfs_mountfs(struct vnode *devvp, struct mount *mp, struct hfs_mount_args *args, mp->mnt_vfsstat.f_fsid.val[0] = (long)dev; mp->mnt_vfsstat.f_fsid.val[1] = vfs_typenum(mp); vfs_setmaxsymlen(mp, 0); - mp->mnt_vtable->vfc_threadsafe = TRUE; + mp->mnt_vtable->vfc_vfsflags |= VFC_VFSNATIVEXATTR; #if NAMEDSTREAMS mp->mnt_kern_flag |= MNTK_NAMED_STREAMS; @@ -1443,12 +1530,14 @@ hfs_mountfs(struct vnode *devvp, struct mount *mp, struct hfs_mount_args *args, /* * Set the free space warning levels for a non-root volume: * - * Set the lower freespace limit (the level that will trigger a warning) - * to 5% of the volume size or 250MB, whichever is less, and the desired - * level (which will cancel the alert request) to 1/2 above that limit. - * Start looking for free space to drop below this level and generate a - * warning immediately if needed: + * Set the "danger" limit to 1% of the volume size or 100MB, whichever + * is less. Set the "warning" limit to 2% of the volume size or 150MB, + * whichever is less. And last, set the "desired" freespace level to + * to 3% of the volume size or 200MB, whichever is less. */ + hfsmp->hfs_freespace_notify_dangerlimit = + MIN(HFS_VERYLOWDISKTRIGGERLEVEL / HFSTOVCB(hfsmp)->blockSize, + (HFSTOVCB(hfsmp)->totalBlocks / 100) * HFS_VERYLOWDISKTRIGGERFRACTION); hfsmp->hfs_freespace_notify_warninglimit = MIN(HFS_LOWDISKTRIGGERLEVEL / HFSTOVCB(hfsmp)->blockSize, (HFSTOVCB(hfsmp)->totalBlocks / 100) * HFS_LOWDISKTRIGGERFRACTION); @@ -1459,10 +1548,14 @@ hfs_mountfs(struct vnode *devvp, struct mount *mp, struct hfs_mount_args *args, /* * Set the free space warning levels for the root volume: * - * Set the lower freespace limit (the level that will trigger a warning) - * to 1% of the volume size or 50MB, whichever is less, and the desired - * level (which will cancel the alert request) to 2% or 75MB, whichever is less. + * Set the "danger" limit to 5% of the volume size or 125MB, whichever + * is less. Set the "warning" limit to 10% of the volume size or 250MB, + * whichever is less. And last, set the "desired" freespace level to + * to 11% of the volume size or 375MB, whichever is less. */ + hfsmp->hfs_freespace_notify_dangerlimit = + MIN(HFS_ROOTVERYLOWDISKTRIGGERLEVEL / HFSTOVCB(hfsmp)->blockSize, + (HFSTOVCB(hfsmp)->totalBlocks / 100) * HFS_ROOTVERYLOWDISKTRIGGERFRACTION); hfsmp->hfs_freespace_notify_warninglimit = MIN(HFS_ROOTLOWDISKTRIGGERLEVEL / HFSTOVCB(hfsmp)->blockSize, (HFSTOVCB(hfsmp)->totalBlocks / 100) * HFS_ROOTLOWDISKTRIGGERFRACTION); @@ -1478,10 +1571,11 @@ hfs_mountfs(struct vnode *devvp, struct mount *mp, struct hfs_mount_args *args, } } - /* ejectability checks will time out when the device is root_device, so skip them */ + /* do not allow ejectability checks on the root device */ if (isroot == 0) { if ((hfsmp->hfs_flags & HFS_VIRTUAL_DEVICE) == 0 && IOBSDIsMediaEjectable(mp->mnt_vfsstat.f_mntfromname)) { + hfsmp->hfs_max_pending_io = 4096*1024; // a reasonable value to start with. hfsmp->hfs_syncer = thread_call_allocate(hfs_syncer, hfsmp); if (hfsmp->hfs_syncer == NULL) { printf("hfs: failed to allocate syncer thread callback for %s (%s)\n", @@ -1510,13 +1604,16 @@ hfs_mountfs(struct vnode *devvp, struct mount *mp, struct hfs_mount_args *args, FREE(mdbp, M_TEMP); if (hfsmp && hfsmp->jvp && hfsmp->jvp != hfsmp->hfs_devvp) { - (void)VNOP_CLOSE(hfsmp->jvp, ronly ? FREAD : FREAD|FWRITE, context); + vnode_clearmountedon(hfsmp->jvp); + (void)VNOP_CLOSE(hfsmp->jvp, ronly ? FREAD : FREAD|FWRITE, vfs_context_kernel()); hfsmp->jvp = NULL; } if (hfsmp) { if (hfsmp->hfs_devvp) { vnode_rele(hfsmp->hfs_devvp); } + hfs_delete_chash(hfsmp); + FREE(hfsmp, M_HFSMNT); vfs_setfsprivate(mp, NULL); } @@ -1591,7 +1688,7 @@ hfs_unmount(struct mount *mp, int mntflags, vfs_context_t context) } if (hfsmp->hfs_sync_incomplete < 0) - printf("hfs_unmount: pm_sync_incomplete underflow (%d)!\n", hfsmp->hfs_sync_incomplete); + panic("hfs_unmount: pm_sync_incomplete underflow!\n"); } /* @@ -1656,6 +1753,23 @@ hfs_unmount(struct mount *mp, int mntflags, vfs_context_t context) HFSTOVCB(hfsmp)->vcbAtrb |= kHFSVolumeUnmountedMask; } + if (hfsmp->hfs_flags & HFS_HAS_SPARSE_DEVICE) { + int i; + u_int32_t min_start = hfsmp->totalBlocks; + + // set the nextAllocation pointer to the smallest free block number + // we've seen so on the next mount we won't rescan unnecessarily + for(i=0; i < (int)hfsmp->vcbFreeExtCnt; i++) { + if (hfsmp->vcbFreeExt[i].startBlock < min_start) { + min_start = hfsmp->vcbFreeExt[i].startBlock; + } + } + if (min_start < hfsmp->nextAllocation) { + hfsmp->nextAllocation = min_start; + } + } + + retval = hfs_flushvolumeheader(hfsmp, MNT_WAIT, 0); if (retval) { HFSTOVCB(hfsmp)->vcbAtrb &= ~kHFSVolumeUnmountedMask; @@ -1670,7 +1784,7 @@ hfs_unmount(struct mount *mp, int mntflags, vfs_context_t context) } if (hfsmp->jnl) { - journal_flush(hfsmp->jnl); + hfs_journal_flush(hfsmp); } /* @@ -1695,9 +1809,10 @@ hfs_unmount(struct mount *mp, int mntflags, vfs_context_t context) VNOP_FSYNC(hfsmp->hfs_devvp, MNT_WAIT, context); if (hfsmp->jvp && hfsmp->jvp != hfsmp->hfs_devvp) { + vnode_clearmountedon(hfsmp->jvp); retval = VNOP_CLOSE(hfsmp->jvp, hfsmp->hfs_flags & HFS_READ_ONLY ? FREAD : FREAD|FWRITE, - context); + vfs_context_kernel()); vnode_put(hfsmp->jvp); hfsmp->jvp = NULL; } @@ -1716,6 +1831,8 @@ hfs_unmount(struct mount *mp, int mntflags, vfs_context_t context) #endif /* HFS_SPARSE_DEV */ lck_mtx_destroy(&hfsmp->hfc_mutex, hfs_mutex_group); vnode_rele(hfsmp->hfs_devvp); + + hfs_delete_chash(hfsmp); FREE(hfsmp, M_HFSMNT); return (0); @@ -1831,18 +1948,18 @@ hfs_statfs(struct mount *mp, register struct vfsstatfs *sbp, __unused vfs_contex { ExtendedVCB *vcb = VFSTOVCB(mp); struct hfsmount *hfsmp = VFSTOHFS(mp); - u_long freeCNIDs; + u_int32_t freeCNIDs; u_int16_t subtype = 0; - freeCNIDs = (u_long)0xFFFFFFFF - (u_long)vcb->vcbNxtCNID; + freeCNIDs = (u_int32_t)0xFFFFFFFF - (u_int32_t)vcb->vcbNxtCNID; sbp->f_bsize = (u_int32_t)vcb->blockSize; sbp->f_iosize = (size_t)cluster_max_io_size(mp, 0); - sbp->f_blocks = (u_int64_t)((unsigned long)vcb->totalBlocks); - sbp->f_bfree = (u_int64_t)((unsigned long )hfs_freeblks(hfsmp, 0)); - sbp->f_bavail = (u_int64_t)((unsigned long )hfs_freeblks(hfsmp, 1)); - sbp->f_files = (u_int64_t)((unsigned long )(vcb->totalBlocks - 2)); /* max files is constrained by total blocks */ - sbp->f_ffree = (u_int64_t)((unsigned long )(MIN(freeCNIDs, sbp->f_bavail))); + sbp->f_blocks = (u_int64_t)((u_int32_t)vcb->totalBlocks); + sbp->f_bfree = (u_int64_t)((u_int32_t )hfs_freeblks(hfsmp, 0)); + sbp->f_bavail = (u_int64_t)((u_int32_t )hfs_freeblks(hfsmp, 1)); + sbp->f_files = (u_int64_t)((u_int32_t )(vcb->totalBlocks - 2)); /* max files is constrained by total blocks */ + sbp->f_ffree = (u_int64_t)((u_int32_t )(MIN(freeCNIDs, sbp->f_bavail))); /* * Subtypes (flavors) for HFS @@ -2071,15 +2188,16 @@ hfs_sync(struct mount *mp, int waitfor, vfs_context_t context) } if (hfsmp->jnl) { - journal_flush(hfsmp->jnl); + hfs_journal_flush(hfsmp); } { - uint32_t secs, usecs; + clock_sec_t secs; + clock_usec_t usecs; uint64_t now; clock_get_calendar_microtime(&secs, &usecs); - now = ((uint64_t)secs * 1000000LL) + usecs; + now = ((uint64_t)secs * 1000000ULL) + (uint64_t)usecs; hfsmp->hfs_last_sync_time = now; } @@ -2117,23 +2235,20 @@ hfs_fhtovp(struct mount *mp, int fhlen, unsigned char *fhp, struct vnode **vpp, result = ESTALE; return result; } - - /* The createtime can be changed by hfs_setattr or hfs_setattrlist. - * For NFS, we are assuming that only if the createtime was moved - * forward would it mean the fileID got reused in that session by - * wrapping. We don't have a volume ID or other unique identifier to - * to use here for a generation ID across reboots, crashes where - * metadata noting lastFileID didn't make it to disk but client has - * it, or volume erasures where fileIDs start over again. Lastly, - * with HFS allowing "wraps" of fileIDs now, this becomes more - * error prone. Future, would be change the "wrap bit" to a unique - * wrap number and use that for generation number. For now do this. - */ - if (((time_t)(ntohl(hfsfhp->hfsfid_gen)) < VTOC(nvp)->c_itime)) { - hfs_unlock(VTOC(nvp)); - vnode_put(nvp); - return (ESTALE); - } + + /* + * We used to use the create time as the gen id of the file handle, + * but it is not static enough because it can change at any point + * via system calls. We still don't have another volume ID or other + * unique identifier to use for a generation ID across reboots that + * persists until the file is removed. Using only the CNID exposes + * us to the potential wrap-around case, but as of 2/2008, it would take + * over 2 months to wrap around if the machine did nothing but allocate + * CNIDs. Using some kind of wrap counter would only be effective if + * each file had the wrap counter associated with it. For now, + * we use only the CNID to identify the file as it's good enough. + */ + *vpp = nvp; hfs_unlock(VTOC(nvp)); @@ -2159,8 +2274,9 @@ hfs_vptofh(struct vnode *vp, int *fhlenp, unsigned char *fhp, __unused vfs_conte cp = VTOC(vp); hfsfhp = (struct hfsfid *)fhp; + /* only the CNID is used to identify the file now */ hfsfhp->hfsfid_cnid = htonl(cp->c_fileid); - hfsfhp->hfsfid_gen = htonl(cp->c_itime); + hfsfhp->hfsfid_gen = htonl(cp->c_fileid); *fhlenp = sizeof(struct hfsfid); return (0); @@ -2189,6 +2305,9 @@ hfs_init(__unused struct vfsconf *vfsp) hfs_mutex_group = lck_grp_alloc_init("hfs-mutex", hfs_group_attr); hfs_rwlock_group = lck_grp_alloc_init("hfs-rwlock", hfs_group_attr); +#if HFS_COMPRESSION + decmpfs_init(); +#endif return (0); } @@ -2263,15 +2382,23 @@ hfs_sysctl(int *name, __unused u_int namelen, user_addr_t oldp, size_t *oldlenp, size_t bufsize; size_t bytes; u_int32_t hint; - u_int16_t *unicode_name; - char *filename; + u_int16_t *unicode_name = NULL; + char *filename = NULL; if ((newlen <= 0) || (newlen > MAXPATHLEN)) return (EINVAL); bufsize = MAX(newlen * 3, MAXPATHLEN); MALLOC(filename, char *, newlen, M_TEMP, M_WAITOK); + if (filename == NULL) { + error = ENOMEM; + goto encodinghint_exit; + } MALLOC(unicode_name, u_int16_t *, bufsize, M_TEMP, M_WAITOK); + if (filename == NULL) { + error = ENOMEM; + goto encodinghint_exit; + } error = copyin(newp, (caddr_t)filename, newlen); if (error == 0) { @@ -2282,8 +2409,12 @@ hfs_sysctl(int *name, __unused u_int namelen, user_addr_t oldp, size_t *oldlenp, error = sysctl_int(oldp, oldlenp, USER_ADDR_NULL, 0, (int32_t *)&hint); } } - FREE(unicode_name, M_TEMP); - FREE(filename, M_TEMP); + +encodinghint_exit: + if (unicode_name) + FREE(unicode_name, M_TEMP); + if (filename) + FREE(filename, M_TEMP); return (error); } else if (name[0] == HFS_ENABLE_JOURNALING) { @@ -2340,6 +2471,14 @@ hfs_sysctl(int *name, __unused u_int namelen, user_addr_t oldp, size_t *oldlenp, printf("hfs: Initializing the journal (joffset 0x%llx sz 0x%llx)...\n", (off_t)name[2], (off_t)name[3]); + // + // XXXdbg - note that currently (Sept, 08) hfs_util does not support + // enabling the journal on a separate device so it is safe + // to just copy hfs_devvp here. If hfs_util gets the ability + // to dynamically enable the journal on a separate device then + // we will have to do the same thing as hfs_early_journal_init() + // to locate and open the journal device. + // jvp = hfsmp->hfs_devvp; jnl = journal_create(jvp, (off_t)name[2] * (off_t)HFSTOVCB(hfsmp)->blockSize @@ -2354,7 +2493,8 @@ hfs_sysctl(int *name, __unused u_int namelen, user_addr_t oldp, size_t *oldlenp, if (jnl == NULL) { printf("hfs: FAILED to create the journal!\n"); if (jvp && jvp != hfsmp->hfs_devvp) { - VNOP_CLOSE(jvp, hfsmp->hfs_flags & HFS_READ_ONLY ? FREAD : FREAD|FWRITE, context); + vnode_clearmountedon(jvp); + VNOP_CLOSE(jvp, hfsmp->hfs_flags & HFS_READ_ONLY ? FREAD : FREAD|FWRITE, vfs_context_kernel()); } jvp = NULL; @@ -2389,6 +2529,13 @@ hfs_sysctl(int *name, __unused u_int namelen, user_addr_t oldp, size_t *oldlenp, hfs_global_exclusive_lock_release(hfsmp); hfs_flushvolumeheader(hfsmp, MNT_WAIT, 1); + { + fsid_t fsid; + + fsid.val[0] = (int32_t)hfsmp->hfs_raw_dev; + fsid.val[1] = (int32_t)vfs_typenum(HFSTOVFS(hfsmp)); + vfs_event_signal(&fsid, VQ_UPDATE, (intptr_t)NULL); + } return 0; } else if (name[0] == HFS_DISABLE_JOURNALING) { // clear the journaling bit @@ -2421,7 +2568,9 @@ hfs_sysctl(int *name, __unused u_int namelen, user_addr_t oldp, size_t *oldlenp, hfsmp->jnl = NULL; if (hfsmp->jvp && hfsmp->jvp != hfsmp->hfs_devvp) { - VNOP_CLOSE(hfsmp->jvp, hfsmp->hfs_flags & HFS_READ_ONLY ? FREAD : FREAD|FWRITE, context); + vnode_clearmountedon(hfsmp->jvp); + VNOP_CLOSE(hfsmp->jvp, hfsmp->hfs_flags & HFS_READ_ONLY ? FREAD : FREAD|FWRITE, vfs_context_kernel()); + vnode_put(hfsmp->jvp); } hfsmp->jvp = NULL; vfs_clearflags(hfsmp->hfs_mp, (u_int64_t)((unsigned int)MNT_JOURNALED)); @@ -2434,6 +2583,13 @@ hfs_sysctl(int *name, __unused u_int namelen, user_addr_t oldp, size_t *oldlenp, hfs_global_exclusive_lock_release(hfsmp); hfs_flushvolumeheader(hfsmp, MNT_WAIT, 1); + { + fsid_t fsid; + + fsid.val[0] = (int32_t)hfsmp->hfs_raw_dev; + fsid.val[1] = (int32_t)vfs_typenum(HFSTOVFS(hfsmp)); + vfs_event_signal(&fsid, VQ_UPDATE, (intptr_t)NULL); + } return 0; } else if (name[0] == HFS_GET_JOURNAL_INFO) { vnode_t vp = vfs_context_cwd(context); @@ -2442,6 +2598,10 @@ hfs_sysctl(int *name, __unused u_int namelen, user_addr_t oldp, size_t *oldlenp, if (vp == NULLVP) return EINVAL; + /* 64-bit processes won't work with this sysctl -- can't fit a pointer into an int! */ + if (proc_is64bit(current_proc())) + return EINVAL; + hfsmp = VTOHFS(vp); if (hfsmp->jnl == NULL) { jnl_start = 0; @@ -2461,31 +2621,20 @@ hfs_sysctl(int *name, __unused u_int namelen, user_addr_t oldp, size_t *oldlenp, return 0; } else if (name[0] == HFS_SET_PKG_EXTENSIONS) { - return set_package_extensions_table((void *)name[1], name[2], name[3]); + return set_package_extensions_table((user_addr_t)((unsigned)name[1]), name[2], name[3]); } else if (name[0] == VFS_CTL_QUERY) { struct sysctl_req *req; - struct vfsidctl vc; - struct user_vfsidctl user_vc; + union union_vfsidctl vc; struct mount *mp; struct vfsquery vq; - boolean_t is_64_bit; - is_64_bit = proc_is64bit(p); req = CAST_DOWN(struct sysctl_req *, oldp); /* we're new style vfs sysctl. */ - if (is_64_bit) { - error = SYSCTL_IN(req, &user_vc, sizeof(user_vc)); - if (error) return (error); - - mp = vfs_getvfs(&user_vc.vc_fsid); - } - else { - error = SYSCTL_IN(req, &vc, sizeof(vc)); - if (error) return (error); - - mp = vfs_getvfs(&vc.vc_fsid); - } + error = SYSCTL_IN(req, &vc, proc_is64bit(p)? sizeof(vc.vc64):sizeof(vc.vc32)); + if (error) return (error); + + mp = vfs_getvfs(&vc.vc32.vc_fsid); /* works for 32 and 64 */ if (mp == NULL) return (ENOENT); hfsmp = VFSTOHFS(mp); @@ -2493,34 +2642,36 @@ hfs_sysctl(int *name, __unused u_int namelen, user_addr_t oldp, size_t *oldlenp, vq.vq_flags = hfsmp->hfs_notification_conditions; return SYSCTL_OUT(req, &vq, sizeof(vq));; } else if (name[0] == HFS_REPLAY_JOURNAL) { - char *devnode = NULL; - size_t devnode_len; - - devnode_len = *oldlenp; - MALLOC(devnode, char *, devnode_len + 1, M_TEMP, M_WAITOK); - if (devnode == NULL) { - return ENOMEM; + vnode_t devvp = NULL; + int device_fd; + if (namelen != 2) { + return (EINVAL); } - - error = copyin(oldp, (caddr_t)devnode, devnode_len); + device_fd = name[1]; + error = file_vnode(device_fd, &devvp); if (error) { - FREE(devnode, M_TEMP); return error; } - devnode[devnode_len] = 0; - - error = hfs_journal_replay(devnode, context); - FREE(devnode, M_TEMP); + error = vnode_getwithref(devvp); + if (error) { + file_drop(device_fd); + return error; + } + error = hfs_journal_replay(devvp, context); + file_drop(device_fd); + vnode_put(devvp); return error; } return (ENOTSUP); } -/* hfs_vfs_vget is not static since it is used in hfs_readwrite.c to support the - * build_path ioctl. We use it to leverage the code below that updates the origin - * cache if necessary. +/* + * hfs_vfs_vget is not static since it is used in hfs_readwrite.c to support + * the build_path ioctl. We use it to leverage the code below that updates + * the origin list cache if necessary */ + int hfs_vfs_vget(struct mount *mp, ino64_t ino, struct vnode **vpp, __unused vfs_context_t context) { @@ -2536,10 +2687,10 @@ hfs_vfs_vget(struct mount *mp, ino64_t ino, struct vnode **vpp, __unused vfs_con /* * ADLs may need to have their origin state updated - * since build_path needs a valid parent. The same is true - * for hardlinked files as well. There isn't a race window here in re-acquiring - * the cnode lock since we aren't pulling any data out of the cnode; instead, we're - * going back to the catalog. + * since build_path needs a valid parent. The same is true + * for hardlinked files as well. There isn't a race window here + * in re-acquiring the cnode lock since we aren't pulling any data + * out of the cnode; instead, we're going to the catalog. */ if ((VTOC(*vpp)->c_flag & C_HARDLINK) && (hfs_lock(VTOC(*vpp), HFS_EXCLUSIVE_LOCK) == 0)) { @@ -2548,13 +2699,11 @@ hfs_vfs_vget(struct mount *mp, ino64_t ino, struct vnode **vpp, __unused vfs_con if (!hfs_haslinkorigin(cp)) { lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG, HFS_SHARED_LOCK); - error = cat_findname(hfsmp, (cnid_t)ino, &cdesc); + error = cat_findname(hfsmp, (cnid_t)ino, &cdesc); hfs_systemfile_unlock(hfsmp, lockflags); if (error == 0) { - if ((cdesc.cd_parentcnid != - hfsmp->hfs_private_desc[DIR_HARDLINKS].cd_cnid) && - (cdesc.cd_parentcnid != - hfsmp->hfs_private_desc[FILE_HARDLINKS].cd_cnid)) { + if ((cdesc.cd_parentcnid != hfsmp->hfs_private_desc[DIR_HARDLINKS].cd_cnid) && + (cdesc.cd_parentcnid != hfsmp->hfs_private_desc[FILE_HARDLINKS].cd_cnid)) { hfs_savelinkorigin(cp, cdesc.cd_parentcnid); } cat_releasedesc(&cdesc); @@ -2597,7 +2746,7 @@ hfs_vget(struct hfsmount *hfsmp, cnid_t cnid, struct vnode **vpp, int skiplock) /* * Check the hash first */ - vp = hfs_chash_getvnode(hfsmp->hfs_raw_dev, cnid, 0, skiplock); + vp = hfs_chash_getvnode(hfsmp, cnid, 0, skiplock); if (vp) { *vpp = vp; return(0); @@ -2712,7 +2861,7 @@ hfs_vget(struct hfsmount *hfsmp, cnid_t cnid, struct vnode **vpp, int skiplock) error = hfs_getnewvnode(hfsmp, NULLVP, &cn, &cndesc, 0, &cnattr, &cnfork, &vp); - if ((error == 0) && (VTOC(vp)->c_flag & C_HARDLINK)) { + if (error == 0 && (VTOC(vp)->c_flag & C_HARDLINK)) { hfs_savelinkorigin(VTOC(vp), cndesc.cd_parentcnid); } FREE_ZONE(cn.cn_pnbuf, cn.cn_pnlen, M_NAMEI); @@ -2763,7 +2912,7 @@ hfs_flushfiles(struct mount *mp, int flags, __unused struct proc *p) } /* Obtain the root vnode so we can skip over it. */ - skipvp = hfs_chash_getvnode(hfsmp->hfs_raw_dev, kHFSRootFolderID, 0, 0); + skipvp = hfs_chash_getvnode(hfsmp, kHFSRootFolderID, 0, 0); } #endif /* QUOTA */ @@ -2998,9 +3147,9 @@ hfs_flushvolumeheader(struct hfsmount *hfsmp, int waitfor, int altflush) { ExtendedVCB *vcb = HFSTOVCB(hfsmp); struct filefork *fp; - HFSPlusVolumeHeader *volumeHeader; + HFSPlusVolumeHeader *volumeHeader, *altVH; int retval; - struct buf *bp; + struct buf *bp, *alt_bp; int i; daddr64_t priIDSector; int critical; @@ -3021,42 +3170,72 @@ hfs_flushvolumeheader(struct hfsmount *hfsmp, int waitfor, int altflush) return EINVAL; } + bp = NULL; + alt_bp = NULL; + retval = (int)buf_meta_bread(hfsmp->hfs_devvp, HFS_PHYSBLK_ROUNDDOWN(priIDSector, hfsmp->hfs_log_per_phys), hfsmp->hfs_physical_block_size, NOCRED, &bp); if (retval) { - if (bp) - buf_brelse(bp); - - hfs_end_transaction(hfsmp); - - printf("HFS: err %d reading VH blk (%s)\n", retval, vcb->vcbVN); - return (retval); - } - - if (hfsmp->jnl) { - journal_modify_block_start(hfsmp->jnl, bp); + printf("hfs: err %d reading VH blk (%s)\n", retval, vcb->vcbVN); + goto err_exit; } volumeHeader = (HFSPlusVolumeHeader *)((char *)buf_dataptr(bp) + HFS_PRI_OFFSET(hfsmp->hfs_physical_block_size)); /* - * Sanity check what we just read. + * Sanity check what we just read. If it's bad, try the alternate + * instead. */ signature = SWAP_BE16 (volumeHeader->signature); hfsversion = SWAP_BE16 (volumeHeader->version); if ((signature != kHFSPlusSigWord && signature != kHFSXSigWord) || (hfsversion < kHFSPlusVersion) || (hfsversion > 100) || (SWAP_BE32 (volumeHeader->blockSize) != vcb->blockSize)) { -#if 1 - panic("HFS: corrupt VH on %s, sig 0x%04x, ver %d, blksize %d", + printf("hfs: corrupt VH on %s, sig 0x%04x, ver %d, blksize %d%s\n", vcb->vcbVN, signature, hfsversion, - SWAP_BE32 (volumeHeader->blockSize)); -#endif - printf("HFS: corrupt VH blk (%s)\n", vcb->vcbVN); - buf_brelse(bp); - return (EIO); + SWAP_BE32 (volumeHeader->blockSize), + hfsmp->hfs_alt_id_sector ? "; trying alternate" : ""); + hfs_mark_volume_inconsistent(hfsmp); + + if (hfsmp->hfs_alt_id_sector) { + retval = buf_meta_bread(hfsmp->hfs_devvp, + HFS_PHYSBLK_ROUNDDOWN(hfsmp->hfs_alt_id_sector, hfsmp->hfs_log_per_phys), + hfsmp->hfs_physical_block_size, NOCRED, &alt_bp); + if (retval) { + printf("hfs: err %d reading alternate VH (%s)\n", retval, vcb->vcbVN); + goto err_exit; + } + + altVH = (HFSPlusVolumeHeader *)((char *)buf_dataptr(alt_bp) + + HFS_ALT_OFFSET(hfsmp->hfs_physical_block_size)); + signature = SWAP_BE16(altVH->signature); + hfsversion = SWAP_BE16(altVH->version); + + if ((signature != kHFSPlusSigWord && signature != kHFSXSigWord) || + (hfsversion < kHFSPlusVersion) || (kHFSPlusVersion > 100) || + (SWAP_BE32(altVH->blockSize) != vcb->blockSize)) { + printf("hfs: corrupt alternate VH on %s, sig 0x%04x, ver %d, blksize %d\n", + vcb->vcbVN, signature, hfsversion, + SWAP_BE32(altVH->blockSize)); + retval = EIO; + goto err_exit; + } + + /* The alternate is plausible, so use it. */ + bcopy(altVH, volumeHeader, kMDBSize); + buf_brelse(alt_bp); + alt_bp = NULL; + } else { + /* No alternate VH, nothing more we can do. */ + retval = EIO; + goto err_exit; + } + } + + if (hfsmp->jnl) { + journal_modify_block_start(hfsmp->jnl, bp); } /* @@ -3219,8 +3398,6 @@ hfs_flushvolumeheader(struct hfsmount *hfsmp, int waitfor, int altflush) /* If requested, flush out the alternate volume header */ if (altflush && hfsmp->hfs_alt_id_sector) { - struct buf *alt_bp = NULL; - if (buf_meta_bread(hfsmp->hfs_devvp, HFS_PHYSBLK_ROUNDDOWN(hfsmp->hfs_alt_id_sector, hfsmp->hfs_log_per_phys), hfsmp->hfs_physical_block_size, NOCRED, &alt_bp) == 0) { @@ -3258,6 +3435,14 @@ hfs_flushvolumeheader(struct hfsmount *hfsmp, int waitfor, int altflush) hfs_end_transaction(hfsmp); return (retval); + +err_exit: + if (alt_bp) + buf_brelse(alt_bp); + if (bp) + buf_brelse(bp); + hfs_end_transaction(hfsmp); + return retval; } @@ -3660,7 +3845,7 @@ hfs_truncatefs(struct hfsmount *hfsmp, u_int64_t newsize, vfs_context_t context) } /* Start with a clean journal. */ - journal_flush(hfsmp->jnl); + hfs_journal_flush(hfsmp); if (hfs_start_transaction(hfsmp) != 0) { error = EINVAL; @@ -3815,7 +4000,7 @@ hfs_truncatefs(struct hfsmount *hfsmp, u_int64_t newsize, vfs_context_t context) } if (transaction_begun) { hfs_end_transaction(hfsmp); - journal_flush(hfsmp->jnl); + hfs_journal_flush(hfsmp); } return (error); @@ -3924,7 +4109,7 @@ hfs_copy_extent( srcSector = (daddr64_t) oldStart * hfsmp->blockSize / hfsmp->hfs_logical_block_size; destSector = (daddr64_t) newStart * hfsmp->blockSize / hfsmp->hfs_logical_block_size; while (resid > 0) { - ioSize = MIN(bufferSize, resid); + ioSize = MIN(bufferSize, (size_t) resid); ioSizeSectors = ioSize / hfsmp->hfs_logical_block_size; /* Prepare the buffer for reading */ @@ -4446,7 +4631,7 @@ hfs_reclaim_journal_info_block(struct hfsmount *hfsmp, vfs_context_t context) if (error) { printf("hfs_reclaim_journal_info_block: hfs_end_transaction returned %d\n", error); } - error = journal_flush(hfsmp->jnl); + error = hfs_journal_flush(hfsmp); if (error) { printf("hfs_reclaim_journal_info_block: journal_flush returned %d\n", error); } @@ -4465,7 +4650,7 @@ hfs_reclaim_journal_info_block(struct hfsmount *hfsmp, vfs_context_t context) * Reclaim space at the end of a file system. */ static int -hfs_reclaimspace(struct hfsmount *hfsmp, u_long startblk, u_long reclaimblks, vfs_context_t context) +hfs_reclaimspace(struct hfsmount *hfsmp, u_int32_t startblk, u_int32_t reclaimblks, vfs_context_t context) { struct vnode *vp = NULL; FCB *fcb; @@ -4477,10 +4662,10 @@ hfs_reclaimspace(struct hfsmount *hfsmp, u_long startblk, u_long reclaimblks, vf size_t cnidbufsize; int filecnt = 0; int maxfilecnt; - u_long block; - u_long datablks; - u_long rsrcblks; - u_long blkstomove = 0; + u_int32_t block; + u_int32_t datablks; + u_int32_t rsrcblks; + u_int32_t blkstomove = 0; int lockflags; int i; int error; @@ -4529,7 +4714,7 @@ hfs_reclaimspace(struct hfsmount *hfsmp, u_long startblk, u_long reclaimblks, vf * strictly required, but shouldn't hurt. */ if (system_file_moved) - journal_flush(hfsmp->jnl); + hfs_journal_flush(hfsmp); if (hfsmp->jnl_start + (hfsmp->jnl_size / hfsmp->blockSize) > startblk) { error = hfs_reclaim_journal_file(hfsmp, context); @@ -4549,7 +4734,7 @@ hfs_reclaimspace(struct hfsmount *hfsmp, u_long startblk, u_long reclaimblks, vf /* For now move a maximum of 250,000 files. */ maxfilecnt = MIN(hfsmp->hfs_filecount, 250000); - maxfilecnt = MIN((u_long)maxfilecnt, reclaimblks); + maxfilecnt = MIN((u_int32_t)maxfilecnt, reclaimblks); cnidbufsize = maxfilecnt * sizeof(cnid_t); if (kmem_alloc(kernel_map, (vm_offset_t *)&cnidbufp, cnidbufsize)) { return (ENOMEM); @@ -4671,7 +4856,7 @@ hfs_reclaimspace(struct hfsmount *hfsmp, u_long startblk, u_long reclaimblks, vf * */ if (blkstomove >= hfs_freeblks(hfsmp, 1)) { - printf("hfs_truncatefs: insufficient space (need %lu blocks; have %u blocks)\n", blkstomove, hfs_freeblks(hfsmp, 1)); + printf("hfs_truncatefs: insufficient space (need %u blocks; have %u blocks)\n", blkstomove, hfs_freeblks(hfsmp, 1)); error = ENOSPC; goto out; } @@ -4836,6 +5021,28 @@ hfs_resize_progress(struct hfsmount *hfsmp, u_int32_t *progress) } +/* + * Creates a UUID from a unique "name" in the HFS UUID Name space. + * See version 3 UUID. + */ +static void +hfs_getvoluuid(struct hfsmount *hfsmp, uuid_t result) +{ + MD5_CTX md5c; + uint8_t rawUUID[8]; + + ((uint32_t *)rawUUID)[0] = hfsmp->vcbFndrInfo[6]; + ((uint32_t *)rawUUID)[1] = hfsmp->vcbFndrInfo[7]; + + MD5Init( &md5c ); + MD5Update( &md5c, HFS_UUID_NAMESPACE_ID, sizeof( uuid_t ) ); + MD5Update( &md5c, rawUUID, sizeof (rawUUID) ); + MD5Final( result, &md5c ); + + result[6] = 0x30 | ( result[6] & 0x0F ); + result[8] = 0x80 | ( result[8] & 0x3F ); +} + /* * Get file system attributes. */ @@ -4847,9 +5054,9 @@ hfs_vfs_getattr(struct mount *mp, struct vfs_attr *fsap, __unused vfs_context_t ExtendedVCB *vcb = VFSTOVCB(mp); struct hfsmount *hfsmp = VFSTOHFS(mp); - u_long freeCNIDs; + u_int32_t freeCNIDs; - freeCNIDs = (u_long)0xFFFFFFFF - (u_long)hfsmp->vcbNxtCNID; + freeCNIDs = (u_int32_t)0xFFFFFFFF - (u_int32_t)hfsmp->vcbNxtCNID; VFSATTR_RETURN(fsap, f_objcount, (u_int64_t)hfsmp->vcbFilCnt + (u_int64_t)hfsmp->vcbDirCnt); VFSATTR_RETURN(fsap, f_filecount, (u_int64_t)hfsmp->vcbFilCnt); @@ -4898,7 +5105,12 @@ hfs_vfs_getattr(struct mount *mp, struct vfs_attr *fsap, __unused vfs_context_t VOL_CAP_FMT_FAST_STATFS | VOL_CAP_FMT_2TB_FILESIZE | VOL_CAP_FMT_HIDDEN_FILES | +#if HFS_COMPRESSION + VOL_CAP_FMT_PATH_FROM_ID | + VOL_CAP_FMT_DECMPFS_COMPRESSION; +#else VOL_CAP_FMT_PATH_FROM_ID; +#endif } cap->capabilities[VOL_CAPABILITIES_INTERFACES] = VOL_CAP_INT_SEARCHFS | @@ -4934,7 +5146,12 @@ hfs_vfs_getattr(struct mount *mp, struct vfs_attr *fsap, __unused vfs_context_t VOL_CAP_FMT_2TB_FILESIZE | VOL_CAP_FMT_OPENDENYMODES | VOL_CAP_FMT_HIDDEN_FILES | +#if HFS_COMPRESSION + VOL_CAP_FMT_PATH_FROM_ID | + VOL_CAP_FMT_DECMPFS_COMPRESSION; +#else VOL_CAP_FMT_PATH_FROM_ID; +#endif cap->valid[VOL_CAPABILITIES_INTERFACES] = VOL_CAP_INT_SEARCHFS | VOL_CAP_INT_ATTRLIST | @@ -5012,6 +5229,10 @@ hfs_vfs_getattr(struct mount *mp, struct vfs_attr *fsap, __unused vfs_context_t strlcpy(fsap->f_vol_name, (char *) hfsmp->vcbVN, MAXPATHLEN); VFSATTR_SET_SUPPORTED(fsap, f_vol_name); } + if (VFSATTR_IS_ACTIVE(fsap, f_uuid)) { + hfs_getvoluuid(hfsmp, fsap->f_uuid); + VFSATTR_SET_SUPPORTED(fsap, f_uuid); + } return (0); } @@ -5146,28 +5367,23 @@ void hfs_mark_volume_inconsistent(struct hfsmount *hfsmp) hfsmp->vcbAtrb |= kHFSVolumeInconsistentMask; MarkVCBDirty(hfsmp); } - /* Log information to ASL log */ - fslog_fs_corrupt(hfsmp->hfs_mp); - printf("HFS: Runtime corruption detected on %s, fsck will be forced on next mount.\n", hfsmp->vcbVN); + if ((hfsmp->hfs_flags & HFS_READ_ONLY)==0) { + /* Log information to ASL log */ + fslog_fs_corrupt(hfsmp->hfs_mp); + printf("hfs: Runtime corruption detected on %s, fsck will be forced on next mount.\n", hfsmp->vcbVN); + } HFS_MOUNT_UNLOCK(hfsmp, TRUE); } /* Replay the journal on the device node provided. Returns zero if * journal replay succeeded or no journal was supposed to be replayed. */ -static int hfs_journal_replay(const char *devnode, vfs_context_t context) +static int hfs_journal_replay(vnode_t devvp, vfs_context_t context) { int retval = 0; - struct vnode *devvp = NULL; struct mount *mp = NULL; struct hfs_mount_args *args = NULL; - /* Lookup vnode for given raw device path */ - retval = vnode_open(devnode, FREAD|FWRITE, 0, 0, &devvp, NULL); - if (retval) { - goto out; - } - /* Replay allowed only on raw devices */ if (!vnode_ischr(devvp)) { retval = EINVAL; @@ -5176,10 +5392,18 @@ static int hfs_journal_replay(const char *devnode, vfs_context_t context) /* Create dummy mount structures */ MALLOC(mp, struct mount *, sizeof(struct mount), M_TEMP, M_WAITOK); + if (mp == NULL) { + retval = ENOMEM; + goto out; + } bzero(mp, sizeof(struct mount)); mount_lock_init(mp); MALLOC(args, struct hfs_mount_args *, sizeof(struct hfs_mount_args), M_TEMP, M_WAITOK); + if (args == NULL) { + retval = ENOMEM; + goto out; + } bzero(args, sizeof(struct hfs_mount_args)); retval = hfs_mountfs(devvp, mp, args, 1, context); @@ -5193,9 +5417,6 @@ static int hfs_journal_replay(const char *devnode, vfs_context_t context) if (args) { FREE(args, M_TEMP); } - if (devvp) { - vnode_close(devvp, FREAD|FWRITE, NULL); - } return retval; } diff --git a/bsd/hfs/hfs_vfsutils.c b/bsd/hfs/hfs_vfsutils.c index d6dc1e356..2485c73f6 100644 --- a/bsd/hfs/hfs_vfsutils.c +++ b/bsd/hfs/hfs_vfsutils.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2007 Apple Inc. All rights reserved. + * Copyright (c) 2000-2009 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -38,6 +38,7 @@ #include #include #include +#include #include #include #include @@ -46,6 +47,7 @@ #include #include #include +#include #include @@ -612,6 +614,14 @@ OSErr hfs_MountHFSPlusVolume(struct hfsmount *hfsmp, HFSPlusVolumeHeader *vhp, retval = hfs_late_journal_init(hfsmp, vhp, args); if (retval != 0) { + if (retval == EROFS) { + // EROFS is a special error code that means the volume has an external + // journal which we couldn't find. in that case we do not want to + // rewrite the volume header - we'll just refuse to mount the volume. + retval = EINVAL; + goto ErrorExit; + } + hfsmp->jnl = NULL; // if the journal failed to open, then set the lastMountedVersion @@ -626,6 +636,7 @@ OSErr hfs_MountHFSPlusVolume(struct hfsmount *hfsmp, HFSPlusVolumeHeader *vhp, mdb_offset = (daddr64_t)((embeddedOffset / blockSize) + HFS_PRI_SECTOR(blockSize)); + bp = NULL; retval = (int)buf_meta_bread(hfsmp->hfs_devvp, HFS_PHYSBLK_ROUNDDOWN(mdb_offset, hfsmp->hfs_log_per_phys), hfsmp->hfs_physical_block_size, cred, &bp); @@ -691,7 +702,12 @@ OSErr hfs_MountHFSPlusVolume(struct hfsmount *hfsmp, HFSPlusVolumeHeader *vhp, vcb->nextAllocation <= hfsmp->hfs_metazone_end) { HFS_UPDATE_NEXT_ALLOCATION(hfsmp, hfsmp->hfs_metazone_end + 1); } + } else { + if (vcb->nextAllocation <= 1) { + vcb->nextAllocation = hfsmp->hfs_min_alloc_start; + } } + vcb->sparseAllocation = hfsmp->hfs_min_alloc_start; /* Setup private/hidden directories for hardlinks. */ hfs_privatedir_init(hfsmp, FILE_HARDLINKS); @@ -700,6 +716,14 @@ OSErr hfs_MountHFSPlusVolume(struct hfsmount *hfsmp, HFSPlusVolumeHeader *vhp, if ((hfsmp->hfs_flags & HFS_READ_ONLY) == 0) hfs_remove_orphans(hfsmp); + /* See if we need to erase unused Catalog nodes due to . */ + if ((hfsmp->hfs_flags & HFS_READ_ONLY) == 0) + { + retval = hfs_erase_unused_nodes(hfsmp); + if (retval) + goto ErrorExit; + } + if ( !(vcb->vcbAtrb & kHFSVolumeHardwareLockMask) ) // if the disk is not write protected { MarkVCBDirty( vcb ); // mark VCB dirty so it will be written @@ -816,7 +840,7 @@ __private_extern__ int overflow_extents(struct filefork *fp) { - u_long blocks; + u_int32_t blocks; // // If the vnode pointer is NULL then we're being called @@ -1044,20 +1068,20 @@ void RequireFileLock(FileReference vp, int shareable) if (!locked && !shareable) { switch (VTOC(vp)->c_fileid) { case kHFSExtentsFileID: - panic("extents btree not locked! v: 0x%08X\n #\n", (u_int)vp); + panic("hfs: extents btree not locked! v: 0x%08X\n #\n", (u_int)vp); break; case kHFSCatalogFileID: - panic("catalog btree not locked! v: 0x%08X\n #\n", (u_int)vp); + panic("hfs: catalog btree not locked! v: 0x%08X\n #\n", (u_int)vp); break; case kHFSAllocationFileID: /* The allocation file can hide behind the jornal lock. */ if (VTOHFS(vp)->jnl == NULL) - panic("allocation file not locked! v: 0x%08X\n #\n", (u_int)vp); + panic("hfs: allocation file not locked! v: 0x%08X\n #\n", (u_int)vp); break; case kHFSStartupFileID: - panic("startup file not locked! v: 0x%08X\n #\n", (u_int)vp); + panic("hfs: startup file not locked! v: 0x%08X\n #\n", (u_int)vp); case kHFSAttributesFileID: - panic("attributes btree not locked! v: 0x%08X\n #\n", (u_int)vp); + panic("hfs: attributes btree not locked! v: 0x%08X\n #\n", (u_int)vp); break; } } @@ -1094,15 +1118,15 @@ hfs_owner_rights(struct hfsmount *hfsmp, uid_t cnode_uid, kauth_cred_t cred, } -unsigned long BestBlockSizeFit(unsigned long allocationBlockSize, - unsigned long blockSizeLimit, - unsigned long baseMultiple) { +u_int32_t BestBlockSizeFit(u_int32_t allocationBlockSize, + u_int32_t blockSizeLimit, + u_int32_t baseMultiple) { /* Compute the optimal (largest) block size (no larger than allocationBlockSize) that is less than the specified limit but still an even multiple of the baseMultiple. */ int baseBlockCount, blockCount; - unsigned long trialBlockSize; + u_int32_t trialBlockSize; if (allocationBlockSize % baseMultiple != 0) { /* @@ -1138,7 +1162,7 @@ unsigned long BestBlockSizeFit(unsigned long allocationBlockSize, __private_extern__ -u_long +u_int32_t GetFileInfo(ExtendedVCB *vcb, __unused u_int32_t dirid, const char *name, struct cat_attr *fattr, struct cat_fork *forkinfo) { @@ -1197,7 +1221,8 @@ hfs_remove_orphans(struct hfsmount * hfsmp) int started_tr = 0; int lockflags; int result; - int orphanedlinks = 0; + int orphaned_files = 0; + int orphaned_dirs = 0; bzero(&cookie, sizeof(cookie)); @@ -1313,7 +1338,7 @@ hfs_remove_orphans(struct hfsmount * hfsmp) } if (TruncateFileC(vcb, (FCB*)&dfork, fsize, false) != 0) { - printf("error truncting data fork!\n"); + printf("hfs: error truncting data fork!\n"); break; } @@ -1345,7 +1370,7 @@ hfs_remove_orphans(struct hfsmount * hfsmp) cnode.c_datafork = NULL; cnode.c_rsrcfork = &rfork; if (TruncateFileC(vcb, (FCB*)&rfork, 0, false) != 0) { - printf("error truncting rsrc fork!\n"); + printf("hfs: error truncting rsrc fork!\n"); break; } } @@ -1358,7 +1383,13 @@ hfs_remove_orphans(struct hfsmount * hfsmp) hfs_volupdate(hfsmp, VOL_UPDATE, 0); break; } - ++orphanedlinks; + + if (cnode.c_attr.ca_mode & S_IFDIR) { + orphaned_dirs++; + } + else { + orphaned_files++; + } /* Update parent and volume counts */ hfsmp->hfs_private_attr[FILE_HARDLINKS].ca_entries--; @@ -1392,8 +1423,8 @@ hfs_remove_orphans(struct hfsmount * hfsmp) } /* end if */ } /* end for */ - if (orphanedlinks > 0) - printf("HFS: Removed %d orphaned unlinked files or directories \n", orphanedlinks); + if (orphaned_files > 0 || orphaned_dirs > 0) + printf("hfs: Removed %d orphaned / unlinked files and %d directories \n", orphaned_files, orphaned_dirs); exit: if (catlock) { hfs_systemfile_unlock(hfsmp, lockflags); @@ -1518,6 +1549,9 @@ hfs_freeblks(struct hfsmount * hfsmp, int wantreserve) else vfreeblks = 0; + if (hfsmp->hfs_backingfs_maxblocks) { + vfreeblks = MIN(vfreeblks, hfsmp->hfs_backingfs_maxblocks); + } freeblks = MIN(vfreeblks, freeblks); HFS_MOUNT_UNLOCK(hfsmp, TRUE); } @@ -1772,6 +1806,133 @@ hfs_namecmp(const u_int8_t *str1, size_t len1, const u_int8_t *str2, size_t len2 } +typedef struct jopen_cb_info { + off_t jsize; + char *desired_uuid; + struct vnode *jvp; + size_t blksize; + int need_clean; + int need_init; +} jopen_cb_info; + +static int +journal_open_cb(const char *bsd_dev_name, const char *uuid_str, void *arg) +{ + struct nameidata nd; + jopen_cb_info *ji = (jopen_cb_info *)arg; + char bsd_name[256]; + int error; + + strlcpy(&bsd_name[0], "/dev/", sizeof(bsd_name)); + strlcpy(&bsd_name[5], bsd_dev_name, sizeof(bsd_name)-5); + + if (ji->desired_uuid && ji->desired_uuid[0] && strcmp(uuid_str, ji->desired_uuid) != 0) { + return 1; // keep iterating + } + + // if we're here, either the desired uuid matched or there was no + // desired uuid so let's try to open the device for writing and + // see if it works. if it does, we'll use it. + + NDINIT(&nd, LOOKUP, LOCKLEAF, UIO_SYSSPACE32, CAST_USER_ADDR_T(bsd_name), vfs_context_kernel()); + if ((error = namei(&nd))) { + printf("hfs: journal open cb: error %d looking up device %s (dev uuid %s)\n", error, bsd_name, uuid_str); + return 1; // keep iterating + } + + ji->jvp = nd.ni_vp; + nameidone(&nd); + + if (ji->jvp == NULL) { + printf("hfs: journal open cb: did not find %s (error %d)\n", bsd_name, error); + } else { + error = VNOP_OPEN(ji->jvp, FREAD|FWRITE, vfs_context_kernel()); + if (error == 0) { + // if the journal is dirty and we didn't specify a desired + // journal device uuid, then do not use the journal. but + // if the journal is just invalid (e.g. it hasn't been + // initialized) then just set the need_init flag. + if (ji->need_clean && ji->desired_uuid && ji->desired_uuid[0] == '\0') { + error = journal_is_clean(ji->jvp, 0, ji->jsize, (void *)1, ji->blksize); + if (error == EBUSY) { + VNOP_CLOSE(ji->jvp, FREAD|FWRITE, vfs_context_kernel()); + vnode_put(ji->jvp); + ji->jvp = NULL; + return 1; // keep iterating + } else if (error == EINVAL) { + ji->need_init = 1; + } + } + + if (ji->desired_uuid && ji->desired_uuid[0] == '\0') { + strlcpy(ji->desired_uuid, uuid_str, 128); + } + vnode_setmountedon(ji->jvp); + // printf("hfs: journal open cb: got device %s (%s)\n", bsd_name, uuid_str); + return 0; // stop iterating + } else { + vnode_put(ji->jvp); + ji->jvp = NULL; + } + } + + return 1; // keep iterating +} + +extern dev_t IOBSDGetMediaWithUUID(const char *uuid_cstring, char *bsd_name, int bsd_name_len, int timeout); +extern void IOBSDIterateMediaWithContent(const char *uuid_cstring, int (*func)(const char *bsd_dev_name, const char *uuid_str, void *arg), void *arg); +extern kern_return_t IOBSDGetPlatformUUID(__darwin_uuid_t uuid, mach_timespec_t timeoutp); +kern_return_t IOBSDGetPlatformSerialNumber(char *serial_number_str, u_int32_t len); + + +static vnode_t +open_journal_dev(const char *vol_device, + int need_clean, + char *uuid_str, + char *machine_serial_num, + off_t jsize, + size_t blksize, + int *need_init) +{ + int retry_counter=0; + jopen_cb_info ji; + + ji.jsize = jsize; + ji.desired_uuid = uuid_str; + ji.jvp = NULL; + ji.blksize = blksize; + ji.need_clean = need_clean; + ji.need_init = 0; + +// if (uuid_str[0] == '\0') { +// printf("hfs: open journal dev: %s: locating any available non-dirty external journal partition\n", vol_device); +// } else { +// printf("hfs: open journal dev: %s: trying to find the external journal partition w/uuid %s\n", vol_device, uuid_str); +// } + while (ji.jvp == NULL && retry_counter++ < 4) { + if (retry_counter > 1) { + if (uuid_str[0]) { + printf("hfs: open_journal_dev: uuid %s not found. waiting 10sec.\n", uuid_str); + } else { + printf("hfs: open_journal_dev: no available external journal partition found. waiting 10sec.\n"); + } + delay_for_interval(10* 1000000, NSEC_PER_USEC); // wait for ten seconds and then try again + } + + IOBSDIterateMediaWithContent(EXTJNL_CONTENT_TYPE_UUID, journal_open_cb, &ji); + } + + if (ji.jvp == NULL) { + printf("hfs: volume: %s: did not find jnl device uuid: %s from machine serial number: %s\n", + vol_device, uuid_str, machine_serial_num); + } + + *need_init = ji.need_init; + + return ji.jvp; +} + + __private_extern__ int hfs_early_journal_init(struct hfsmount *hfsmp, HFSPlusVolumeHeader *vhp, @@ -1781,15 +1942,20 @@ hfs_early_journal_init(struct hfsmount *hfsmp, HFSPlusVolumeHeader *vhp, JournalInfoBlock *jibp; struct buf *jinfo_bp, *bp; int sectors_per_fsblock, arg_flags=0, arg_tbufsz=0; - int retval; + int retval, write_jibp = 0; uint32_t blksize = hfsmp->hfs_logical_block_size; struct vnode *devvp; struct hfs_mount_args *args = _args; u_int32_t jib_flags; u_int64_t jib_offset; u_int64_t jib_size; + const char *dev_name; devvp = hfsmp->hfs_devvp; + dev_name = vnode_name(devvp); + if (dev_name == NULL) { + dev_name = "unknown-dev"; + } if (args != NULL && (args->flags & HFSFSMNT_EXTENDED_ARGS)) { arg_flags = args->journal_flags; @@ -1798,24 +1964,56 @@ hfs_early_journal_init(struct hfsmount *hfsmp, HFSPlusVolumeHeader *vhp, sectors_per_fsblock = SWAP_BE32(vhp->blockSize) / blksize; + jinfo_bp = NULL; retval = (int)buf_meta_bread(devvp, (daddr64_t)((embeddedOffset/blksize) + - (SWAP_BE32(vhp->journalInfoBlock)*sectors_per_fsblock)), - SWAP_BE32(vhp->blockSize), cred, &jinfo_bp); - if (retval) + ((u_int64_t)SWAP_BE32(vhp->journalInfoBlock)*sectors_per_fsblock)), + hfsmp->hfs_physical_block_size, cred, &jinfo_bp); + if (retval) { + if (jinfo_bp) { + buf_brelse(jinfo_bp); + } return retval; - + } + jibp = (JournalInfoBlock *)buf_dataptr(jinfo_bp); jib_flags = SWAP_BE32(jibp->flags); - jib_offset = SWAP_BE64(jibp->offset); jib_size = SWAP_BE64(jibp->size); if (jib_flags & kJIJournalInFSMask) { hfsmp->jvp = hfsmp->hfs_devvp; + jib_offset = SWAP_BE64(jibp->offset); } else { - printf("hfs: journal not stored in fs! don't know what to do.\n"); + int need_init=0; + + // if the volume was unmounted cleanly then we'll pick any + // available external journal partition + // + if (SWAP_BE32(vhp->attributes) & kHFSVolumeUnmountedMask) { + *((char *)&jibp->ext_jnl_uuid[0]) = '\0'; + } + + hfsmp->jvp = open_journal_dev(dev_name, + !(jib_flags & kJIJournalNeedInitMask), + (char *)&jibp->ext_jnl_uuid[0], + (char *)&jibp->machine_serial_num[0], + jib_size, + hfsmp->hfs_logical_block_size, + &need_init); + if (hfsmp->jvp == NULL) { buf_brelse(jinfo_bp); - return EINVAL; + return EROFS; + } else { + if (IOBSDGetPlatformSerialNumber(&jibp->machine_serial_num[0], sizeof(jibp->machine_serial_num)) != KERN_SUCCESS) { + strlcpy(&jibp->machine_serial_num[0], "unknown-machine-uuid", sizeof(jibp->machine_serial_num)); + } + } + + jib_offset = 0; + write_jibp = 1; + if (need_init) { + jib_flags |= kJIJournalNeedInitMask; + } } // save this off for the hack-y check in hfs_remove() @@ -1827,18 +2025,21 @@ hfs_early_journal_init(struct hfsmount *hfsmp, HFSPlusVolumeHeader *vhp, // if it is, then we can allow the mount. otherwise we have to // return failure. retval = journal_is_clean(hfsmp->jvp, - jib_offset + embeddedOffset, + jib_offset + embeddedOffset, jib_size, devvp, - hfsmp->hfs_logical_block_size); + hfsmp->hfs_logical_block_size); hfsmp->jnl = NULL; buf_brelse(jinfo_bp); if (retval) { + const char *name = vnode_getname(devvp); printf("hfs: early journal init: volume on %s is read-only and journal is dirty. Can not mount volume.\n", - vnode_name(devvp)); + name ? name : ""); + if (name) + vnode_putname(name); } return retval; @@ -1877,7 +2078,11 @@ hfs_early_journal_init(struct hfsmount *hfsmp, HFSPlusVolumeHeader *vhp, arg_tbufsz, hfs_sync_metadata, hfsmp->hfs_mp); - buf_brelse(jinfo_bp); + if (write_jibp) { + buf_bwrite(jinfo_bp); + } else { + buf_brelse(jinfo_bp); + } jinfo_bp = NULL; jibp = NULL; @@ -1887,11 +2092,14 @@ hfs_early_journal_init(struct hfsmount *hfsmp, HFSPlusVolumeHeader *vhp, if (mdb_offset == 0) { mdb_offset = (daddr64_t)((embeddedOffset / blksize) + HFS_PRI_SECTOR(blksize)); } + bp = NULL; retval = (int)buf_meta_bread(devvp, HFS_PHYSBLK_ROUNDDOWN(mdb_offset, hfsmp->hfs_log_per_phys), hfsmp->hfs_physical_block_size, cred, &bp); if (retval) { - buf_brelse(bp); + if (bp) { + buf_brelse(bp); + } printf("hfs: failed to reload the mdb after opening the journal (retval %d)!\n", retval); return retval; @@ -1941,7 +2149,7 @@ hfs_late_journal_init(struct hfsmount *hfsmp, HFSPlusVolumeHeader *vhp, void *_a struct cat_attr jib_attr, jattr; struct cat_fork jib_fork, jfork; ExtendedVCB *vcb; - u_long fid; + u_int32_t fid; struct hfs_mount_args *args = _args; u_int32_t jib_flags; u_int64_t jib_offset; @@ -1980,11 +2188,15 @@ hfs_late_journal_init(struct hfsmount *hfsmp, HFSPlusVolumeHeader *vhp, void *_a sectors_per_fsblock = SWAP_BE32(vhp->blockSize) / hfsmp->hfs_logical_block_size; + jinfo_bp = NULL; retval = (int)buf_meta_bread(devvp, - (daddr64_t)(vcb->hfsPlusIOPosOffset / hfsmp->hfs_logical_block_size + - (SWAP_BE32(vhp->journalInfoBlock)*sectors_per_fsblock)), - SWAP_BE32(vhp->blockSize), NOCRED, &jinfo_bp); + (vcb->hfsPlusIOPosOffset / hfsmp->hfs_logical_block_size + + ((u_int64_t)SWAP_BE32(vhp->journalInfoBlock)*sectors_per_fsblock)), + hfsmp->hfs_physical_block_size, NOCRED, &jinfo_bp); if (retval) { + if (jinfo_bp) { + buf_brelse(jinfo_bp); + } printf("hfs: can't read journal info block. disabling journaling.\n"); vcb->vcbAtrb &= ~kHFSVolumeJournaledMask; return 0; @@ -2006,7 +2218,7 @@ hfs_late_journal_init(struct hfsmount *hfsmp, HFSPlusVolumeHeader *vhp, void *_a hfsmp->hfs_jnlfileid = fid; // make sure the journal file begins where we think it should. - if ((jib_offset / (u_int64_t)vcb->blockSize) != jfork.cf_extents[0].startBlock) { + if ((jib_flags & kJIJournalInFSMask) && (jib_offset / (u_int64_t)vcb->blockSize) != jfork.cf_extents[0].startBlock) { printf("hfs: The journal file moved (was: %lld; is: %d). Fixing up\n", (jib_offset / (u_int64_t)vcb->blockSize), jfork.cf_extents[0].startBlock); @@ -2027,10 +2239,41 @@ hfs_late_journal_init(struct hfsmount *hfsmp, HFSPlusVolumeHeader *vhp, void *_a if (jib_flags & kJIJournalInFSMask) { hfsmp->jvp = hfsmp->hfs_devvp; + jib_offset += (off_t)vcb->hfsPlusIOPosOffset; } else { - printf("hfs: journal not stored in fs! don't know what to do.\n"); + const char *dev_name; + int need_init = 0; + + dev_name = vnode_name(devvp); + if (dev_name == NULL) { + dev_name = "unknown-dev"; + } + + // since the journal is empty, just use any available external journal + *((char *)&jibp->ext_jnl_uuid[0]) = '\0'; + + // this fills in the uuid of the device we actually get + hfsmp->jvp = open_journal_dev(dev_name, + !(jib_flags & kJIJournalNeedInitMask), + (char *)&jibp->ext_jnl_uuid[0], + (char *)&jibp->machine_serial_num[0], + jib_size, + hfsmp->hfs_logical_block_size, + &need_init); + if (hfsmp->jvp == NULL) { buf_brelse(jinfo_bp); - return EINVAL; + return EROFS; + } else { + if (IOBSDGetPlatformSerialNumber(&jibp->machine_serial_num[0], sizeof(jibp->machine_serial_num)) != KERN_SUCCESS) { + strlcpy(&jibp->machine_serial_num[0], "unknown-machine-serial-num", sizeof(jibp->machine_serial_num)); + } + } + jib_offset = 0; + recreate_journal = 1; + write_jibp = 1; + if (need_init) { + jib_flags |= kJIJournalNeedInitMask; + } } // save this off for the hack-y check in hfs_remove() @@ -2042,7 +2285,7 @@ hfs_late_journal_init(struct hfsmount *hfsmp, HFSPlusVolumeHeader *vhp, void *_a // if it is, then we can allow the mount. otherwise we have to // return failure. retval = journal_is_clean(hfsmp->jvp, - jib_offset + (off_t)vcb->hfsPlusIOPosOffset, + jib_offset, jib_size, devvp, hfsmp->hfs_logical_block_size); @@ -2052,8 +2295,11 @@ hfs_late_journal_init(struct hfsmount *hfsmp, HFSPlusVolumeHeader *vhp, void *_a buf_brelse(jinfo_bp); if (retval) { + const char *name = vnode_getname(devvp); printf("hfs: late journal init: volume on %s is read-only and journal is dirty. Can not mount volume.\n", - vnode_name(devvp)); + name ? name : ""); + if (name) + vnode_putname(name); } return retval; @@ -2061,9 +2307,9 @@ hfs_late_journal_init(struct hfsmount *hfsmp, HFSPlusVolumeHeader *vhp, void *_a if ((jib_flags & kJIJournalNeedInitMask) || recreate_journal) { printf("hfs: Initializing the journal (joffset 0x%llx sz 0x%llx)...\n", - jib_offset + (off_t)vcb->hfsPlusIOPosOffset, jib_size); + jib_offset, jib_size); hfsmp->jnl = journal_create(hfsmp->jvp, - jib_offset + (off_t)vcb->hfsPlusIOPosOffset, + jib_offset, jib_size, devvp, hfsmp->hfs_logical_block_size, @@ -2088,11 +2334,11 @@ hfs_late_journal_init(struct hfsmount *hfsmp, HFSPlusVolumeHeader *vhp, void *_a arg_flags |= JOURNAL_RESET; //printf("hfs: Opening the journal (joffset 0x%llx sz 0x%llx vhp_blksize %d)...\n", - // jib_offset + (off_t)vcb->hfsPlusIOPosOffset, + // jib_offset, // jib_size, SWAP_BE32(vhp->blockSize)); hfsmp->jnl = journal_open(hfsmp->jvp, - jib_offset + (off_t)vcb->hfsPlusIOPosOffset, + jib_offset, jib_size, devvp, hfsmp->hfs_logical_block_size, @@ -2114,7 +2360,7 @@ hfs_late_journal_init(struct hfsmount *hfsmp, HFSPlusVolumeHeader *vhp, void *_a jinfo_bp = NULL; jibp = NULL; - //printf("journal @ 0x%x\n", hfsmp->jnl); + //printf("hfs: journal @ 0x%x\n", hfsmp->jnl); // if we expected the journal to be there and we couldn't // create it or open it then we have to bail out. @@ -2164,7 +2410,7 @@ hfs_metadatazone_init(struct hfsmount *hfsmp) u_int64_t temp; u_int64_t filesize; u_int32_t blk; - int items; + int items, really_do_it=1; vcb = HFSTOVCB(hfsmp); fs_size = (u_int64_t)vcb->blockSize * (u_int64_t)vcb->totalBlocks; @@ -2172,50 +2418,65 @@ hfs_metadatazone_init(struct hfsmount *hfsmp) /* * For volumes less than 10 GB, don't bother. */ - if (fs_size < ((u_int64_t)10 * GIGABYTE)) - return; + if (fs_size < ((u_int64_t)10 * GIGABYTE)) { + really_do_it = 0; + } + /* * Skip non-journaled volumes as well. */ - if (hfsmp->jnl == NULL) - return; + if (hfsmp->jnl == NULL) { + really_do_it = 0; + } /* - * Start with allocation bitmap (a fixed size). + * Start with space for the boot blocks and Volume Header. + * 1536 = byte offset from start of volume to end of volume header: + * 1024 bytes is the offset from the start of the volume to the + * start of the volume header (defined by the volume format) + * + 512 bytes (the size of the volume header). */ - zonesize = roundup(vcb->totalBlocks / 8, vcb->vcbVBMIOSize); - + zonesize = roundup(1536, hfsmp->blockSize); + /* - * Overflow Extents file gets 4 MB per 100 GB. + * Add the on-disk size of allocation bitmap. */ - items = fs_size / ((u_int64_t)100 * GIGABYTE); - filesize = (u_int64_t)(items + 1) * OVERFLOW_DEFAULT_SIZE; - if (filesize > OVERFLOW_MAXIMUM_SIZE) - filesize = OVERFLOW_MAXIMUM_SIZE; - zonesize += filesize; - hfsmp->hfs_overflow_maxblks = filesize / vcb->blockSize; - + zonesize += hfsmp->hfs_allocation_cp->c_datafork->ff_blocks * hfsmp->blockSize; + + /* + * Add space for the Journal Info Block and Journal (if they're in + * this file system). + */ + if (hfsmp->jnl && hfsmp->jvp == hfsmp->hfs_devvp) { + zonesize += hfsmp->blockSize + hfsmp->jnl_size; + } + /* - * Plan for at least 8 MB of journal for each - * 100 GB of disk space (up to a 512 MB). + * Add the existing size of the Extents Overflow B-tree. + * (It rarely grows, so don't bother reserving additional room for it.) */ - items = fs_size / ((u_int64_t)100 * GIGABYTE); - filesize = (u_int64_t)(items + 1) * JOURNAL_DEFAULT_SIZE; - if (filesize > JOURNAL_MAXIMUM_SIZE) - filesize = JOURNAL_MAXIMUM_SIZE; - zonesize += filesize; - + zonesize += hfsmp->hfs_extents_cp->c_datafork->ff_blocks * hfsmp->blockSize; + /* - * Catalog file gets 10 MB per 1 GB. - * - * How about considering the current catalog size (used nodes * node size) - * and the current file data size to help estimate the required - * catalog size. + * If there is an Attributes B-tree, leave room for 11 clumps worth. + * newfs_hfs allocates one clump, and leaves a gap of 10 clumps. + * When installing a full OS install onto a 20GB volume, we use + * 7 to 8 clumps worth of space (depending on packages), so that leaves + * us with another 3 or 4 clumps worth before we need another extent. */ - filesize = MIN((fs_size / 1024) * 10, GIGABYTE); - hfsmp->hfs_catalog_maxblks = filesize / vcb->blockSize; - zonesize += filesize; - + if (hfsmp->hfs_attribute_cp) { + zonesize += 11 * hfsmp->hfs_attribute_cp->c_datafork->ff_clumpsize; + } + + /* + * Leave room for 11 clumps of the Catalog B-tree. + * Again, newfs_hfs allocates one clump plus a gap of 10 clumps. + * When installing a full OS install onto a 20GB volume, we use + * 7 to 8 clumps worth of space (depending on packages), so that leaves + * us with another 3 or 4 clumps worth before we need another extent. + */ + zonesize += 11 * hfsmp->hfs_catalog_cp->c_datafork->ff_clumpsize; + /* * Add space for hot file region. * @@ -2229,38 +2490,40 @@ hfs_metadatazone_init(struct hfsmount *hfsmp) /* * Calculate user quota file requirements. */ - items = QF_USERS_PER_GB * (fs_size / GIGABYTE); - if (items < QF_MIN_USERS) - items = QF_MIN_USERS; - else if (items > QF_MAX_USERS) - items = QF_MAX_USERS; - if (!powerof2(items)) { - int x = items; - items = 4; - while (x>>1 != 1) { - x = x >> 1; - items = items << 1; + if (hfsmp->hfs_flags & HFS_QUOTAS) { + items = QF_USERS_PER_GB * (fs_size / GIGABYTE); + if (items < QF_MIN_USERS) + items = QF_MIN_USERS; + else if (items > QF_MAX_USERS) + items = QF_MAX_USERS; + if (!powerof2(items)) { + int x = items; + items = 4; + while (x>>1 != 1) { + x = x >> 1; + items = items << 1; + } } - } - filesize += (items + 1) * sizeof(struct dqblk); - /* - * Calculate group quota file requirements. - * - */ - items = QF_GROUPS_PER_GB * (fs_size / GIGABYTE); - if (items < QF_MIN_GROUPS) - items = QF_MIN_GROUPS; - else if (items > QF_MAX_GROUPS) - items = QF_MAX_GROUPS; - if (!powerof2(items)) { - int x = items; - items = 4; - while (x>>1 != 1) { - x = x >> 1; - items = items << 1; + filesize += (items + 1) * sizeof(struct dqblk); + /* + * Calculate group quota file requirements. + * + */ + items = QF_GROUPS_PER_GB * (fs_size / GIGABYTE); + if (items < QF_MIN_GROUPS) + items = QF_MIN_GROUPS; + else if (items > QF_MAX_GROUPS) + items = QF_MAX_GROUPS; + if (!powerof2(items)) { + int x = items; + items = 4; + while (x>>1 != 1) { + x = x >> 1; + items = items << 1; + } } + filesize += (items + 1) * sizeof(struct dqblk); } - filesize += (items + 1) * sizeof(struct dqblk); zonesize += filesize; /* @@ -2269,6 +2532,22 @@ hfs_metadatazone_init(struct hfsmount *hfsmp) */ temp = zonesize; zonesize = roundup(zonesize, (u_int64_t)vcb->vcbVBMIOSize * 8 * vcb->blockSize); + hfsmp->hfs_min_alloc_start = zonesize / vcb->blockSize; + /* + * If doing the round up for hfs_min_alloc_start would push us past + * totalBlocks, then just reset it back to 0. Though using a value + * bigger than totalBlocks would not cause damage in the block allocator + * code, this value could get stored in the volume header and make it out + * to disk, making the volume header technically corrupt. + */ + if (hfsmp->hfs_min_alloc_start >= hfsmp->totalBlocks) { + hfsmp->hfs_min_alloc_start = 0; + } + + if (really_do_it == 0) { + return; + } + temp = zonesize - temp; /* temp has extra space */ filesize += temp / 3; hfsmp->hfs_catalog_maxblks += (temp - (temp / 3)) / vcb->blockSize; @@ -2287,9 +2566,9 @@ hfs_metadatazone_init(struct hfsmount *hfsmp) hfsmp->hfs_hotfile_end = hfsmp->hfs_metazone_end; hfsmp->hfs_hotfile_freeblks = hfs_hotfile_freeblocks(hfsmp); #if 0 - printf("HFS: metadata zone is %d to %d\n", hfsmp->hfs_metazone_start, hfsmp->hfs_metazone_end); - printf("HFS: hot file band is %d to %d\n", hfsmp->hfs_hotfile_start, hfsmp->hfs_hotfile_end); - printf("HFS: hot file band free blocks = %d\n", hfsmp->hfs_hotfile_freeblks); + printf("hfs: metadata zone is %d to %d\n", hfsmp->hfs_metazone_start, hfsmp->hfs_metazone_end); + printf("hfs: hot file band is %d to %d\n", hfsmp->hfs_hotfile_start, hfsmp->hfs_hotfile_end); + printf("hfs: hot file band free blocks = %d\n", hfsmp->hfs_hotfile_freeblks); #endif hfsmp->hfs_flags |= HFS_METADATA_ZONE; } @@ -2347,7 +2626,6 @@ hfs_virtualmetafile(struct cnode *cp) } - // // Fire off a timed callback to sync the disk if the // volume is on ejectable media. @@ -2357,11 +2635,19 @@ void hfs_sync_ejectable(struct hfsmount *hfsmp) { if (hfsmp->hfs_syncer) { - uint32_t secs, usecs; + clock_sec_t secs; + clock_usec_t usecs; uint64_t now; clock_get_calendar_microtime(&secs, &usecs); - now = ((uint64_t)secs * 1000000) + usecs; + now = ((uint64_t)secs * 1000000ULL) + (uint64_t)usecs; + + if (hfsmp->hfs_sync_incomplete && hfsmp->hfs_mp->mnt_pending_write_size >= hfsmp->hfs_max_pending_io) { + // if we have a sync scheduled but i/o is starting to pile up, + // don't call thread_call_enter_delayed() again because that + // will defer the sync. + return; + } if (hfsmp->hfs_sync_scheduled == 0) { uint64_t deadline; @@ -2431,7 +2717,7 @@ hfs_start_transaction(struct hfsmount *hfsmp) if (hfsmp->jnl) { ret = journal_start_transaction(hfsmp->jnl); if (ret == 0) { - OSAddAtomic(1, (SInt32 *)&hfsmp->hfs_global_lock_nesting); + OSAddAtomic(1, &hfsmp->hfs_global_lock_nesting); } } else { ret = 0; @@ -2454,7 +2740,7 @@ hfs_end_transaction(struct hfsmount *hfsmp) if ( hfsmp->jnl == NULL || ( journal_owner(hfsmp->jnl) == current_thread() - && (OSAddAtomic(-1, (SInt32 *)&hfsmp->hfs_global_lock_nesting) == 1)) ) { + && (OSAddAtomic(-1, &hfsmp->hfs_global_lock_nesting) == 1)) ) { need_unlock = 1; } @@ -2473,3 +2759,95 @@ hfs_end_transaction(struct hfsmount *hfsmp) return ret; } + + +__private_extern__ +int +hfs_journal_flush(struct hfsmount *hfsmp) +{ + int ret; + + if (hfsmp->jnl) { + lck_rw_lock_shared(&hfsmp->hfs_global_lock); + ret = journal_flush(hfsmp->jnl); + lck_rw_unlock_shared(&hfsmp->hfs_global_lock); + } else { + ret = 0; + } + + return ret; +} + + +/* + * hfs_erase_unused_nodes + * + * Check wheter a volume may suffer from unused Catalog B-tree nodes that + * are not zeroed (due to ). If so, just write + * zeroes to the unused nodes. + * + * How do we detect when a volume needs this repair? We can't always be + * certain. If a volume was created after a certain date, then it may have + * been created with the faulty newfs_hfs. Since newfs_hfs only created one + * clump, we can assume that if a Catalog B-tree is larger than its clump size, + * that means that the entire first clump must have been written to, which means + * there shouldn't be unused and unwritten nodes in that first clump, and this + * repair is not needed. + * + * We have defined a bit in the Volume Header's attributes to indicate when the + * unused nodes have been repaired. A newer newfs_hfs will set this bit. + * As will fsck_hfs when it repairs the unused nodes. + */ +__private_extern__ +int hfs_erase_unused_nodes(struct hfsmount *hfsmp) +{ + int result; + struct filefork *catalog; + int lockflags; + + if (hfsmp->vcbAtrb & kHFSUnusedNodeFixMask) + { + /* This volume has already been checked and repaired. */ + return 0; + } + + if ((hfsmp->localCreateDate < kHFSUnusedNodesFixDate)) + { + /* This volume is too old to have had the problem. */ + hfsmp->vcbAtrb |= kHFSUnusedNodeFixMask; + return 0; + } + + catalog = hfsmp->hfs_catalog_cp->c_datafork; + if (catalog->ff_size > catalog->ff_clumpsize) + { + /* The entire first clump must have been in use at some point. */ + hfsmp->vcbAtrb |= kHFSUnusedNodeFixMask; + return 0; + } + + /* + * If we get here, we need to zero out those unused nodes. + * + * We start a transaction and lock the catalog since we're going to be + * making on-disk changes. But note that BTZeroUnusedNodes doens't actually + * do its writing via the journal, because that would be too much I/O + * to fit in a transaction, and it's a pain to break it up into multiple + * transactions. (It behaves more like growing a B-tree would.) + */ + printf("hfs_erase_unused_nodes: updating volume %s.\n", hfsmp->vcbVN); + result = hfs_start_transaction(hfsmp); + if (result) + goto done; + lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG, HFS_EXCLUSIVE_LOCK); + result = BTZeroUnusedNodes(catalog); + vnode_waitforwrites(hfsmp->hfs_catalog_vp, 0, 0, 0, "hfs_erase_unused_nodes"); + hfs_systemfile_unlock(hfsmp, lockflags); + hfs_end_transaction(hfsmp); + if (result == 0) + hfsmp->vcbAtrb |= kHFSUnusedNodeFixMask; + printf("hfs_erase_unused_nodes: done updating volume %s.\n", hfsmp->vcbVN); + +done: + return result; +} diff --git a/bsd/hfs/hfs_vnops.c b/bsd/hfs/hfs_vnops.c index 6d8d6ad33..726e230e5 100644 --- a/bsd/hfs/hfs_vnops.c +++ b/bsd/hfs/hfs_vnops.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2008 Apple Inc. All rights reserved. + * Copyright (c) 2000-2009 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -27,8 +27,8 @@ */ #include -#include #include +#include #include #include #include @@ -65,17 +65,14 @@ #include "hfscommon/headers/BTreesInternal.h" #include "hfscommon/headers/FileMgrInternal.h" - #define KNDETACH_VNLOCKED 0x00000001 -#define CARBON_TEMP_DIR_NAME "Cleanup At Startup" - - /* Global vfs data structures for hfs */ /* Always F_FULLFSYNC? 1=yes,0=no (default due to "various" reasons is 'no') */ int always_do_fullfsync = 0; -SYSCTL_INT (_kern, OID_AUTO, always_do_fullfsync, CTLFLAG_RW, &always_do_fullfsync, 0, "always F_FULLFSYNC when fsync is called"); +SYSCTL_DECL(_vfs_generic); +SYSCTL_INT (_vfs_generic, OID_AUTO, always_do_fullfsync, CTLFLAG_RW, &always_do_fullfsync, 0, "always F_FULLFSYNC when fsync is called"); static int hfs_makenode(struct vnode *dvp, struct vnode **vpp, struct componentname *cnp, struct vnode_attr *vap, @@ -88,14 +85,12 @@ static int hfs_removedir(struct vnode *, struct vnode *, struct componentname *, int); static int hfs_removefile(struct vnode *, struct vnode *, struct componentname *, - int, int, int); + int, int, int, struct vnode *); #if FIFO static int hfsfifo_read(struct vnop_read_args *); static int hfsfifo_write(struct vnop_write_args *); static int hfsfifo_close(struct vnop_close_args *); -static int hfsfifo_kqfilt_add(struct vnop_kqfilt_add_args *); -static int hfsfifo_kqfilt_remove(struct vnop_kqfilt_remove_args *); extern int (**fifo_vnodeop_p)(void *); #endif /* FIFO */ @@ -116,7 +111,6 @@ static int hfs_vnop_symlink(struct vnop_symlink_args*); static int hfs_vnop_setattr(struct vnop_setattr_args*); static int hfs_vnop_readlink(struct vnop_readlink_args *); static int hfs_vnop_pathconf(struct vnop_pathconf_args *); -static int hfs_vnop_kqfiltremove(struct vnop_kqfilt_remove_args *); static int hfs_vnop_whiteout(struct vnop_whiteout_args *); static int hfsspec_read(struct vnop_read_args *); static int hfsspec_write(struct vnop_write_args *); @@ -210,6 +204,187 @@ hfs_vnop_mknod(struct vnop_mknod_args *ap) return (0); } +#if HFS_COMPRESSION +/* + * hfs_ref_data_vp(): returns the data fork vnode for a given cnode. + * In the (hopefully rare) case where the data fork vnode is not + * present, it will use hfs_vget() to create a new vnode for the + * data fork. + * + * NOTE: If successful and a vnode is returned, the caller is responsible + * for releasing the returned vnode with vnode_rele(). + */ +static int +hfs_ref_data_vp(struct cnode *cp, struct vnode **data_vp, int skiplock) +{ + if (!data_vp || !cp) /* sanity check incoming parameters */ + return EINVAL; + + /* maybe we should take the hfs cnode lock here, and if so, use the skiplock parameter to tell us not to */ + + if (!skiplock) hfs_lock(cp, HFS_SHARED_LOCK); + struct vnode *c_vp = cp->c_vp; + if (c_vp) { + /* we already have a data vnode */ + *data_vp = c_vp; + vnode_ref(*data_vp); + if (!skiplock) hfs_unlock(cp); + return 0; + } + /* no data fork vnode in the cnode, so ask hfs for one. */ + + if (!cp->c_rsrc_vp) { + /* if we don't have either a c_vp or c_rsrc_vp, we can't really do anything useful */ + *data_vp = NULL; + if (!skiplock) hfs_unlock(cp); + return EINVAL; + } + + if (0 == hfs_vget(VTOHFS(cp->c_rsrc_vp), cp->c_cnid, data_vp, 1) && + 0 != data_vp) { + vnode_ref(*data_vp); + vnode_put(*data_vp); + if (!skiplock) hfs_unlock(cp); + return 0; + } + /* there was an error getting the vnode */ + *data_vp = NULL; + if (!skiplock) hfs_unlock(cp); + return EINVAL; +} + +/* + * hfs_lazy_init_decmpfs_cnode(): returns the decmpfs_cnode for a cnode, + * allocating it if necessary; returns NULL if there was an allocation error + */ +static decmpfs_cnode * +hfs_lazy_init_decmpfs_cnode(struct cnode *cp) +{ + if (!cp->c_decmp) { + decmpfs_cnode *dp = NULL; + MALLOC_ZONE(dp, decmpfs_cnode *, sizeof(decmpfs_cnode), M_DECMPFS_CNODE, M_WAITOK); + if (!dp) { + /* error allocating a decmpfs cnode */ + return NULL; + } + decmpfs_cnode_init(dp); + if (!OSCompareAndSwapPtr(NULL, dp, (void * volatile *)&cp->c_decmp)) { + /* another thread got here first, so free the decmpfs_cnode we allocated */ + decmpfs_cnode_destroy(dp); + FREE_ZONE(dp, sizeof(*dp), M_DECMPFS_CNODE); + } + } + + return cp->c_decmp; +} + +/* + * hfs_file_is_compressed(): returns 1 if the file is compressed, and 0 (zero) if not. + * if the file's compressed flag is set, makes sure that the decmpfs_cnode field + * is allocated by calling hfs_lazy_init_decmpfs_cnode(), then makes sure it is populated, + * or else fills it in via the decmpfs_file_is_compressed() function. + */ +int +hfs_file_is_compressed(struct cnode *cp, int skiplock) +{ + int ret = 0; + + /* fast check to see if file is compressed. If flag is clear, just answer no */ + if (!(cp->c_flags & UF_COMPRESSED)) { + return 0; + } + + decmpfs_cnode *dp = hfs_lazy_init_decmpfs_cnode(cp); + if (!dp) { + /* error allocating a decmpfs cnode, treat the file as uncompressed */ + return 0; + } + + /* flag was set, see if the decmpfs_cnode state is valid (zero == invalid) */ + uint32_t decmpfs_state = decmpfs_cnode_get_vnode_state(dp); + switch(decmpfs_state) { + case FILE_IS_COMPRESSED: + case FILE_IS_CONVERTING: /* treat decompressing files as if they are compressed */ + return 1; + case FILE_IS_NOT_COMPRESSED: + return 0; + /* otherwise the state is not cached yet */ + } + + /* decmpfs hasn't seen this file yet, so call decmpfs_file_is_compressed() to init the decmpfs_cnode struct */ + struct vnode *data_vp = NULL; + if (0 == hfs_ref_data_vp(cp, &data_vp, skiplock)) { + if (data_vp) { + ret = decmpfs_file_is_compressed(data_vp, VTOCMP(data_vp)); // fill in decmpfs_cnode + vnode_rele(data_vp); + } + } + return ret; +} + +/* hfs_uncompressed_size_of_compressed_file() - get the uncompressed size of the file. + * if the caller has passed a valid vnode (has a ref count > 0), then hfsmp and fid are not required. + * if the caller doesn't have a vnode, pass NULL in vp, and pass valid hfsmp and fid. + * files size is returned in size (required) + */ +int +hfs_uncompressed_size_of_compressed_file(struct hfsmount *hfsmp, struct vnode *vp, cnid_t fid, off_t *size, int skiplock) +{ + int ret = 0; + int putaway = 0; /* flag to remember if we used hfs_vget() */ + + if (!size) { + return EINVAL; /* no place to put the file size */ + } + + if (NULL == vp) { + if (!hfsmp || !fid) { /* make sure we have the required parameters */ + return EINVAL; + } + if (0 != hfs_vget(hfsmp, fid, &vp, skiplock)) { /* vnode is null, use hfs_vget() to get it */ + vp = NULL; + } else { + putaway = 1; /* note that hfs_vget() was used to aquire the vnode */ + } + } + /* this double check for compression (hfs_file_is_compressed) + * ensures the cached size is present in case decmpfs hasn't + * encountered this node yet. + */ + if ( ( NULL != vp ) && hfs_file_is_compressed(VTOC(vp), skiplock) ) { + *size = decmpfs_cnode_get_vnode_cached_size(VTOCMP(vp)); /* file info will be cached now, so get size */ + } else { + ret = EINVAL; + } + + if (putaway) { /* did we use hfs_vget() to get this vnode? */ + vnode_put(vp); /* if so, release it and set it to null */ + vp = NULL; + } + return ret; +} + +int +hfs_hides_rsrc(vfs_context_t ctx, struct cnode *cp, int skiplock) +{ + if (ctx == decmpfs_ctx) + return 0; + if (!hfs_file_is_compressed(cp, skiplock)) + return 0; + return decmpfs_hides_rsrc(ctx, cp->c_decmp); +} + +int +hfs_hides_xattr(vfs_context_t ctx, struct cnode *cp, const char *name, int skiplock) +{ + if (ctx == decmpfs_ctx) + return 0; + if (!hfs_file_is_compressed(cp, skiplock)) + return 0; + return decmpfs_hides_xattr(ctx, cp->c_decmp, name); +} +#endif /* HFS_COMPRESSION */ + /* * Open a file/directory. */ @@ -220,11 +395,47 @@ hfs_vnop_open(struct vnop_open_args *ap) struct filefork *fp; struct timeval tv; int error; + static int past_bootup = 0; + struct cnode *cp = VTOC(vp); + struct hfsmount *hfsmp = VTOHFS(vp); + +#if HFS_COMPRESSION + if (ap->a_mode & FWRITE) { + /* open for write */ + if ( hfs_file_is_compressed(cp, 1) ) { /* 1 == don't take the cnode lock */ + /* opening a compressed file for write, so convert it to decompressed */ + struct vnode *data_vp = NULL; + error = hfs_ref_data_vp(cp, &data_vp, 1); /* 1 == don't take the cnode lock */ + if (0 == error) { + if (data_vp) { + error = decmpfs_decompress_file(data_vp, VTOCMP(data_vp), -1, 1, 0); + vnode_rele(data_vp); + } else { + error = EINVAL; + } + } + if (error != 0) + return error; + } + } else { + /* open for read */ + if (hfs_file_is_compressed(cp, 1) ) { /* 1 == don't take the cnode lock */ + if (VNODE_IS_RSRC(vp)) { + /* opening the resource fork of a compressed file, so nothing to do */ + } else { + /* opening a compressed file for read, make sure it validates */ + error = decmpfs_validate_compressed_file(vp, VTOCMP(vp)); + if (error != 0) + return error; + } + } + } +#endif /* * Files marked append-only must be opened for appending. */ - if ((VTOC(vp)->c_flags & APPEND) && !vnode_isdir(vp) && + if ((cp->c_flags & APPEND) && !vnode_isdir(vp) && (ap->a_mode & (FWRITE | O_APPEND)) == FWRITE) return (EPERM); @@ -232,14 +443,21 @@ hfs_vnop_open(struct vnop_open_args *ap) return (EBUSY); /* file is in use by the kernel */ /* Don't allow journal file to be opened externally. */ - if (VTOC(vp)->c_fileid == VTOHFS(vp)->hfs_jnlfileid) + if (cp->c_fileid == hfsmp->hfs_jnlfileid) return (EPERM); + + /* If we're going to write to the file, initialize quotas. */ +#if QUOTA + if ((ap->a_mode & FWRITE) && (hfsmp->hfs_flags & HFS_QUOTAS)) + (void)hfs_getinoquota(cp); +#endif /* QUOTA */ + /* * On the first (non-busy) open of a fragmented * file attempt to de-frag it (if its less than 20MB). */ - if ((VTOHFS(vp)->hfs_flags & HFS_READ_ONLY) || - (VTOHFS(vp)->jnl == NULL) || + if ((hfsmp->hfs_flags & HFS_READ_ONLY) || + (hfsmp->jnl == NULL) || #if NAMEDSTREAMS !vnode_isreg(vp) || vnode_isinuse(vp, 0) || vnode_isnamedstream(vp)) { #else @@ -248,30 +466,40 @@ hfs_vnop_open(struct vnop_open_args *ap) return (0); } - if ((error = hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK))) + if ((error = hfs_lock(cp, HFS_EXCLUSIVE_LOCK))) return (error); fp = VTOF(vp); if (fp->ff_blocks && fp->ff_extents[7].blockCount != 0 && fp->ff_size <= (20 * 1024 * 1024)) { + int no_mods = 0; struct timeval now; - struct cnode *cp = VTOC(vp); /* * Wait until system bootup is done (3 min). * And don't relocate a file that's been modified * within the past minute -- this can lead to * system thrashing. */ - microuptime(&tv); + + if (!past_bootup) { + microuptime(&tv); + if (tv.tv_sec > (60*3)) { + past_bootup = 1; + } + } + microtime(&now); - if (tv.tv_sec > (60 * 3) && - ((now.tv_sec - cp->c_mtime) > 60)) { - (void) hfs_relocate(vp, VTOVCB(vp)->nextAllocation + 4096, - vfs_context_ucred(ap->a_context), - vfs_context_proc(ap->a_context)); + if ((now.tv_sec - cp->c_mtime) > 60) { + no_mods = 1; + } + + if (past_bootup && no_mods) { + (void) hfs_relocate(vp, hfsmp->nextAllocation + 4096, + vfs_context_ucred(ap->a_context), + vfs_context_proc(ap->a_context)); } } - hfs_unlock(VTOC(vp)); + hfs_unlock(cp); return (0); } @@ -293,58 +521,59 @@ hfs_vnop_close(ap) struct proc *p = vfs_context_proc(ap->a_context); struct hfsmount *hfsmp; int busy; - int knownrefs = 0; int tooktrunclock = 0; + int knownrefs = 0; if ( hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK) != 0) return (0); cp = VTOC(vp); hfsmp = VTOHFS(vp); - /* - * If the rsrc fork is a named stream, it holds a usecount on - * the data fork, which prevents the data fork from getting recycled, which - * then prevents the de-allocation of its extra blocks. - * Do checks for truncation on close. Purge extra extents if they - * exist. Make sure the vp is not a directory, that it has a resource - * fork, and that rsrc fork is a named stream. + /* + * If the rsrc fork is a named stream, it can cause the data fork to + * stay around, preventing de-allocation of these blocks. + * Do checks for truncation on close. Purge extra extents if they exist. + * Make sure the vp is not a directory, and that it has a resource fork, + * and that resource fork is also a named stream. */ - + if ((vp->v_type == VREG) && (cp->c_rsrc_vp) && (vnode_isnamedstream(cp->c_rsrc_vp))) { uint32_t blks; blks = howmany(VTOF(vp)->ff_size, VTOVCB(vp)->blockSize); /* - * If there are any extra blocks and there are only 2 refs on - * this vp (ourselves + rsrc fork holding ref on us), go ahead - * and try to truncate the extra blocks away. + * If there are extra blocks and there are only 2 refs on + * this vp (ourselves + rsrc fork holding ref on us), go ahead + * and try to truncate. */ if ((blks < VTOF(vp)->ff_blocks) && (!vnode_isinuse(vp, 2))) { - // release cnode lock ; must acquire truncate lock BEFORE cnode lock - hfs_unlock (cp); + // release cnode lock; must acquire truncate lock BEFORE cnode lock + hfs_unlock(cp); hfs_lock_truncate(cp, TRUE); tooktrunclock = 1; - - if (hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK) != 0) { + + if (hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK) != 0) { hfs_unlock_truncate(cp, TRUE); - return (0); + // bail out if we can't re-acquire cnode lock + return 0; } - - //now re-test to make sure it's still valid. + // now re-test to make sure it's still valid if (cp->c_rsrc_vp) { knownrefs = 1 + vnode_isnamedstream(cp->c_rsrc_vp); - if (!vnode_isinuse(vp, knownrefs)) { + if (!vnode_isinuse(vp, knownrefs)){ + // now we can truncate the file, if necessary blks = howmany(VTOF(vp)->ff_size, VTOVCB(vp)->blockSize); - if (blks < VTOF(vp)->ff_blocks) { - (void) hfs_truncate(vp, VTOF(vp)->ff_size, IO_NDELAY, 0, ap->a_context); + if (blks < VTOF(vp)->ff_blocks){ + (void) hfs_truncate(vp, VTOF(vp)->ff_size, IO_NDELAY, 0, 0, ap->a_context); } } } } } + // if we froze the fs and we're exiting, then "thaw" the fs if (hfsmp->hfs_freezing_proc == p && proc_exiting(p)) { hfsmp->hfs_freezing_proc = NULL; @@ -362,10 +591,10 @@ hfs_vnop_close(ap) } else if (vnode_issystem(vp) && !busy) { vnode_recycle(vp); } - if (tooktrunclock) { + + if (tooktrunclock){ hfs_unlock_truncate(cp, TRUE); } - hfs_unlock(cp); if (ap->a_fflag & FWASWRITTEN) { @@ -395,9 +624,31 @@ hfs_vnop_getattr(struct vnop_getattr_args *ap) uint64_t data_size; enum vtype v_type; int error = 0; - cp = VTOC(vp); +#if HFS_COMPRESSION + /* we need to inspect the decmpfs state of the file before we take the hfs cnode lock */ + int compressed = 0; + int hide_size = 0; + off_t uncompressed_size = -1; + if (VATTR_IS_ACTIVE(vap, va_data_size) || VATTR_IS_ACTIVE(vap, va_total_alloc) || VATTR_IS_ACTIVE(vap, va_data_alloc) || VATTR_IS_ACTIVE(vap, va_total_size)) { + /* we only care about whether the file is compressed if asked for the uncompressed size */ + if (VNODE_IS_RSRC(vp)) { + /* if it's a resource fork, decmpfs may want us to hide the size */ + hide_size = hfs_hides_rsrc(ap->a_context, cp, 0); + } else { + /* if it's a data fork, we need to know if it was compressed so we can report the uncompressed size */ + compressed = hfs_file_is_compressed(cp, 0); + } + if (compressed && (VATTR_IS_ACTIVE(vap, va_data_size) || VATTR_IS_ACTIVE(vap, va_total_size))) { + if (0 != hfs_uncompressed_size_of_compressed_file(NULL, vp, 0, &uncompressed_size, 0)) { + /* failed to get the uncompressed size, we'll check for this later */ + uncompressed_size = -1; + } + } + } +#endif + /* * Shortcut for vnode_authorize path. Each of the attributes * in this set is updated atomically so we don't need to take @@ -418,11 +669,12 @@ hfs_vnop_getattr(struct vnop_getattr_args *ap) vap->va_acl = (kauth_acl_t) KAUTH_FILESEC_NONE; VATTR_SET_SUPPORTED(vap, va_acl); } + return (0); } + hfsmp = VTOHFS(vp); v_type = vnode_vtype(vp); - /* * If time attributes are requested and we have cnode times * that require updating, then acquire an exclusive lock on @@ -434,7 +686,8 @@ hfs_vnop_getattr(struct vnop_getattr_args *ap) if ((error = hfs_lock(cp, HFS_EXCLUSIVE_LOCK))) return (error); hfs_touchtimes(hfsmp, cp); - } else { + } + else { if ((error = hfs_lock(cp, HFS_SHARED_LOCK))) return (error); } @@ -503,8 +756,20 @@ hfs_vnop_getattr(struct vnop_getattr_args *ap) if (VATTR_IS_ACTIVE(vap, va_data_alloc)) { u_int64_t blocks; - blocks = VCTOF(vp, cp)->ff_blocks; - VATTR_RETURN(vap, va_data_alloc, blocks * (u_int64_t)hfsmp->blockSize); +#if HFS_COMPRESSION + if (hide_size) { + VATTR_RETURN(vap, va_data_alloc, 0); + } else if (compressed) { + /* for compressed files, we report all allocated blocks as belonging to the data fork */ + blocks = cp->c_blocks; + VATTR_RETURN(vap, va_data_alloc, blocks * (u_int64_t)hfsmp->blockSize); + } + else +#endif + { + blocks = VCTOF(vp, cp)->ff_blocks; + VATTR_RETURN(vap, va_data_alloc, blocks * (u_int64_t)hfsmp->blockSize); + } } } @@ -513,25 +778,44 @@ hfs_vnop_getattr(struct vnop_getattr_args *ap) if (v_type == VDIR) { VATTR_RETURN(vap, va_total_size, (cp->c_entries + 2) * AVERAGE_HFSDIRENTRY_SIZE); } else { - u_int64_t total_size = 0; + u_int64_t total_size = ~0ULL; struct cnode *rcp; - - if (cp->c_datafork) { - total_size = cp->c_datafork->ff_size; +#if HFS_COMPRESSION + if (hide_size) { + /* we're hiding the size of this file, so just return 0 */ + total_size = 0; + } else if (compressed) { + if (uncompressed_size == -1) { + /* + * We failed to get the uncompressed size above, + * so we'll fall back to the standard path below + * since total_size is still -1 + */ + } else { + /* use the uncompressed size we fetched above */ + total_size = uncompressed_size; + } } - - if (cp->c_blocks - VTOF(vp)->ff_blocks) { - /* We deal with resource fork vnode iocount at the end of the function */ - error = hfs_vgetrsrc(hfsmp, vp, &rvp, TRUE); - if (error) { - goto out; +#endif + if (total_size == ~0ULL) { + if (cp->c_datafork) { + total_size = cp->c_datafork->ff_size; } - rcp = VTOC(rvp); - if (rcp && rcp->c_rsrcfork) { - total_size += rcp->c_rsrcfork->ff_size; + + if (cp->c_blocks - VTOF(vp)->ff_blocks) { + /* We deal with rsrc fork vnode iocount at the end of the function */ + error = hfs_vgetrsrc(hfsmp, vp, &rvp, TRUE); + if (error) { + goto out; + } + + rcp = VTOC(rvp); + if (rcp && rcp->c_rsrcfork) { + total_size += rcp->c_rsrcfork->ff_size; + } } } - + VATTR_RETURN(vap, va_total_size, total_size); } } @@ -612,12 +896,32 @@ hfs_vnop_getattr(struct vnop_getattr_args *ap) vap->va_linkid = (u_int64_t)cp->c_cnid; vap->va_parentid = (u_int64_t)cp->c_parentcnid; } - vap->va_fsid = cp->c_dev; + vap->va_fsid = hfsmp->hfs_raw_dev; vap->va_filerev = 0; vap->va_encoding = cp->c_encoding; vap->va_rdev = (v_type == VBLK || v_type == VCHR) ? cp->c_rdev : 0; +#if HFS_COMPRESSION + if (VATTR_IS_ACTIVE(vap, va_data_size)) { + if (hide_size) + vap->va_data_size = 0; + else if (compressed) { + if (uncompressed_size == -1) { + /* failed to get the uncompressed size above, so just return data_size */ + vap->va_data_size = data_size; + } else { + /* use the uncompressed size we fetched above */ + vap->va_data_size = uncompressed_size; + } + } else + vap->va_data_size = data_size; +// vap->va_supported |= VNODE_ATTR_va_data_size; + VATTR_SET_SUPPORTED(vap, va_data_size); + } +#else vap->va_data_size = data_size; - + vap->va_supported |= VNODE_ATTR_va_data_size; +#endif + /* Mark them all at once instead of individual VATTR_SET_SUPPORTED calls. */ vap->va_supported |= VNODE_ATTR_va_create_time | VNODE_ATTR_va_modify_time | VNODE_ATTR_va_change_time| VNODE_ATTR_va_backup_time | @@ -626,85 +930,88 @@ hfs_vnop_getattr(struct vnop_getattr_args *ap) VNODE_ATTR_va_flags |VNODE_ATTR_va_fileid | VNODE_ATTR_va_linkid | VNODE_ATTR_va_parentid | VNODE_ATTR_va_fsid | VNODE_ATTR_va_filerev | - VNODE_ATTR_va_encoding | VNODE_ATTR_va_rdev | - VNODE_ATTR_va_data_size; + VNODE_ATTR_va_encoding | VNODE_ATTR_va_rdev; - /* If this is the root, let VFS to find out the mount name, which may be different from the real name. - * Otherwise, we need to just take care for hardlinked files, which need to be looked up, if necessary + /* If this is the root, let VFS to find out the mount name, which + * may be different from the real name. Otherwise, we need to take care + * for hardlinked files, which need to be looked up, if necessary */ if (VATTR_IS_ACTIVE(vap, va_name) && (cp->c_cnid != kHFSRootFolderID)) { struct cat_desc linkdesc; int lockflags; int uselinkdesc = 0; cnid_t nextlinkid = 0; - cnid_t prevlinkid = 0; + cnid_t prevlinkid = 0; /* Get the name for ATTR_CMN_NAME. We need to take special care for hardlinks * here because the info. for the link ID requested by getattrlist may be * different than what's currently in the cnode. This is because the cnode * will be filled in with the information for the most recent link ID that went * through namei/lookup(). If there are competing lookups for hardlinks that point - * to the same inode, one (or more) getattrlists could be vended incorrect name information. - * Also, we need to beware of open-unlinked files which could have a namelen of 0. Note - * that if another hardlink sibling of this file is being unlinked, that could also thrash - * the name fields but it should *not* be treated like an open-unlinked file here. + * to the same inode, one (or more) getattrlists could be vended incorrect name information. + * Also, we need to beware of open-unlinked files which could have a namelen of 0. */ - if ((cp->c_flag & C_HARDLINK) && + + if ((cp->c_flag & C_HARDLINK) && ((cp->c_desc.cd_namelen == 0) || (vap->va_linkid != cp->c_cnid))) { - /* If we have no name and our linkID is the raw inode number, then we may - * have an open-unlinked file. Go to the next link in this case. + /* If we have no name and our link ID is the raw inode number, then we may + * have an open-unlinked file. Go to the next link in this case. */ if ((cp->c_desc.cd_namelen == 0) && (vap->va_linkid == cp->c_fileid)) { - if ((error = hfs_lookuplink(hfsmp, vap->va_linkid, &prevlinkid, &nextlinkid))) { + if ((error = hfs_lookuplink(hfsmp, vap->va_linkid, &prevlinkid, &nextlinkid))){ goto out; } - } + } else { + /* just use link obtained from vap above */ nextlinkid = vap->va_linkid; } - /* Now probe the catalog for the linkID. Note that we don't know if we have - * the exclusive lock here for the cnode, so we can't just update the descriptor. - * Instead, we should just store the descriptor's value locally and then use it to pass - * out the name value as needed below. - */ - if (nextlinkid) { + + /* We need to probe the catalog for the descriptor corresponding to the link ID + * stored in nextlinkid. Note that we don't know if we have the exclusive lock + * for the cnode here, so we can't just update the descriptor. Instead, + * we should just store the descriptor's value locally and then use it to pass + * out the name value as needed below. + */ + if (nextlinkid){ lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG, HFS_SHARED_LOCK); - error = cat_findname(hfsmp, nextlinkid, &linkdesc); - hfs_systemfile_unlock(hfsmp, lockflags); + error = cat_findname(hfsmp, nextlinkid, &linkdesc); + hfs_systemfile_unlock(hfsmp, lockflags); if (error == 0) { uselinkdesc = 1; } } } - - /* By this point, we either patched the name above, and the c_desc points - * to correct data, or it already did, in which case we just proceed by copying - * the name into the VAP. Note that we will never set va_name to supported if - * nextlinkid is never initialized. This could happen in the degenerate case above - * involving the raw inode number, where it has no nextlinkid. In this case, we will - * simply not export the name as supported. + + /* By this point, we've either patched up the name above and the c_desc + * points to the correct data, or it already did, in which case we just proceed + * by copying the name into the vap. Note that we will never set va_name to + * supported if nextlinkid is never initialized. This could happen in the degenerate + * case above involving the raw inode number, where it has no nextlinkid. In this case + * we will simply not mark the name bit as supported. */ if (uselinkdesc) { - strlcpy(vap->va_name, (const char *)linkdesc.cd_nameptr, MAXPATHLEN); + strlcpy(vap->va_name, (const char*) linkdesc.cd_nameptr, MAXPATHLEN); VATTR_SET_SUPPORTED(vap, va_name); - cat_releasedesc(&linkdesc); - } + cat_releasedesc(&linkdesc); + } else if (cp->c_desc.cd_namelen) { - strlcpy(vap->va_name, (const char *)cp->c_desc.cd_nameptr, MAXPATHLEN); + strlcpy(vap->va_name, (const char*) cp->c_desc.cd_nameptr, MAXPATHLEN); VATTR_SET_SUPPORTED(vap, va_name); } } out: hfs_unlock(cp); - /* - * We need to drop the iocount on the rsrc fork vnode only *after* we've - * released the cnode lock, since vnode_put can trigger an inactive call, which - * will go back into the HFS and try to acquire a cnode lock. + /* + * We need to vnode_put the rsrc fork vnode only *after* we've released + * the cnode lock, since vnode_put can trigger an inactive call, which + * will go back into HFS and try to acquire a cnode lock. */ if (rvp) { - vnode_put(rvp); + vnode_put (rvp); } + return (error); } @@ -726,6 +1033,17 @@ hfs_vnop_setattr(ap) uid_t nuid; gid_t ngid; +#if HFS_COMPRESSION + int decmpfs_reset_state = 0; + /* + we call decmpfs_update_attributes even if the file is not compressed + because we want to update the incoming flags if the xattrs are invalid + */ + error = decmpfs_update_attributes(vp, vap); + if (error) + return error; +#endif + hfsmp = VTOHFS(vp); /* Don't allow modification of the journal file. */ @@ -737,9 +1055,36 @@ hfs_vnop_setattr(ap) * File size change request. * We are guaranteed that this is not a directory, and that * the filesystem object is writeable. + * + * NOTE: HFS COMPRESSION depends on the data_size being set *before* the bsd flags are updated */ VATTR_SET_SUPPORTED(vap, va_data_size); if (VATTR_IS_ACTIVE(vap, va_data_size) && !vnode_islnk(vp)) { +#if HFS_COMPRESSION + /* keep the compressed state locked until we're done truncating the file */ + decmpfs_cnode *dp = VTOCMP(vp); + if (!dp) { + /* + * call hfs_lazy_init_decmpfs_cnode() to make sure that the decmpfs_cnode + * is filled in; we need a decmpfs_cnode to lock out decmpfs state changes + * on this file while it's truncating + */ + dp = hfs_lazy_init_decmpfs_cnode(VTOC(vp)); + if (!dp) { + /* failed to allocate a decmpfs_cnode */ + return ENOMEM; /* what should this be? */ + } + } + + decmpfs_lock_compressed_data(dp, 1); + if (hfs_file_is_compressed(VTOC(vp), 1)) { + error = decmpfs_decompress_file(vp, dp, -1/*vap->va_data_size*/, 0, 1); + if (error != 0) { + decmpfs_unlock_compressed_data(dp, 1); + return error; + } + } +#endif /* Take truncate lock before taking cnode lock. */ hfs_lock_truncate(VTOC(vp), TRUE); @@ -749,13 +1094,19 @@ hfs_vnop_setattr(ap) if ((error = hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK))) { hfs_unlock_truncate(VTOC(vp), TRUE); +#if HFS_COMPRESSION + decmpfs_unlock_compressed_data(dp, 1); +#endif return (error); } cp = VTOC(vp); - error = hfs_truncate(vp, vap->va_data_size, vap->va_vaflags & 0xffff, 1, ap->a_context); + error = hfs_truncate(vp, vap->va_data_size, vap->va_vaflags & 0xffff, 1, 0, ap->a_context); hfs_unlock_truncate(cp, TRUE); +#if HFS_COMPRESSION + decmpfs_unlock_compressed_data(dp, 1); +#endif if (error) goto out; } @@ -796,7 +1147,7 @@ hfs_vnop_setattr(ap) * Mode change request. * We are guaranteed that the mode value is valid and that in * conjunction with the owner and group, this change is legal. - */ + */ VATTR_SET_SUPPORTED(vap, va_mode); if (VATTR_IS_ACTIVE(vap, va_mode) && ((error = hfs_chmod(vp, (int)vap->va_mode, cred, p)) != 0)) @@ -811,6 +1162,18 @@ hfs_vnop_setattr(ap) if (VATTR_IS_ACTIVE(vap, va_flags)) { u_int16_t *fdFlags; +#if HFS_COMPRESSION + if ((cp->c_flags ^ vap->va_flags) & UF_COMPRESSED) { + /* + * the UF_COMPRESSED was toggled, so reset our cached compressed state + * but we don't want to actually do the update until we've released the cnode lock down below + * NOTE: turning the flag off doesn't actually decompress the file, so that we can + * turn off the flag and look at the "raw" file for debugging purposes + */ + decmpfs_reset_state = 1; + } +#endif + cp->c_flags = vap->va_flags; cp->c_touch_chgtime = TRUE; @@ -879,10 +1242,39 @@ hfs_vnop_setattr(ap) if ((error = hfs_update(vp, TRUE)) != 0) goto out; - HFS_KNOTE(vp, NOTE_ATTRIB); out: - if (cp) + if (cp) { + /* Purge origin cache for cnode, since caller now has correct link ID for it + * We purge it here since it was acquired for us during lookup, and we no longer need it. + */ + if ((cp->c_flag & C_HARDLINK) && (vp->v_type != VDIR)){ + hfs_relorigin(cp, 0); + } + hfs_unlock(cp); +#if HFS_COMPRESSION + if (decmpfs_reset_state) { + /* + * we've changed the UF_COMPRESSED flag, so reset the decmpfs state for this cnode + * but don't do it while holding the hfs cnode lock + */ + decmpfs_cnode *dp = VTOCMP(vp); + if (!dp) { + /* + * call hfs_lazy_init_decmpfs_cnode() to make sure that the decmpfs_cnode + * is filled in; we need a decmpfs_cnode to prevent decmpfs state changes + * on this file if it's locked + */ + dp = hfs_lazy_init_decmpfs_cnode(VTOC(vp)); + if (!dp) { + /* failed to allocate a decmpfs_cnode */ + return ENOMEM; /* what should this be? */ + } + } + decmpfs_cnode_set_vnode_state(dp, FILE_TYPE_UNKNOWN, 0); + } +#endif + } return (error); } @@ -1140,6 +1532,20 @@ hfs_vnop_exchange(ap) if (from_vp == to_vp) return (EINVAL); +#if HFS_COMPRESSION + if ( hfs_file_is_compressed(VTOC(from_vp), 0) ) { + if ( 0 != ( error = decmpfs_decompress_file(from_vp, VTOCMP(from_vp), -1, 0, 1) ) ) { + return error; + } + } + + if ( hfs_file_is_compressed(VTOC(to_vp), 0) ) { + if ( 0 != ( error = decmpfs_decompress_file(to_vp, VTOCMP(to_vp), -1, 0, 1) ) ) { + return error; + } + } +#endif // HFS_COMPRESSION + if ((error = hfs_lockpair(VTOC(from_vp), VTOC(to_vp), HFS_EXCLUSIVE_LOCK))) return (error); @@ -1280,7 +1686,7 @@ hfs_vnop_exchange(ap) bcopy(tempattr.ca_finderinfo, to_cp->c_finderinfo, 32); /* Rehash the cnodes using their new file IDs */ - hfs_chash_rehash(from_cp, to_cp); + hfs_chash_rehash(hfsmp, from_cp, to_cp); /* * When a file moves out of "Cleanup At Startup" @@ -1297,9 +1703,6 @@ hfs_vnop_exchange(ap) to_cp->c_touch_chgtime = TRUE; } - HFS_KNOTE(from_vp, NOTE_ATTRIB); - HFS_KNOTE(to_vp, NOTE_ATTRIB); - exit: if (got_cookie) { cat_postflight(hfsmp, &cookie, vfs_context_proc(ap->a_context)); @@ -1324,18 +1727,28 @@ hfs_fsync(struct vnode *vp, int waitfor, int fullsync, struct proc *p) struct filefork *fp = NULL; int retval = 0; struct hfsmount *hfsmp = VTOHFS(vp); + struct rl_entry *invalid_range; struct timeval tv; - int wait; + int waitdata; /* attributes necessary for data retrieval */ + int wait; /* all other attributes (e.g. atime, etc.) */ int lockflag; int took_trunc_lock = 0; + boolean_t trunc_lock_exclusive = FALSE; + /* + * Applications which only care about data integrity rather than full + * file integrity may opt out of (delay) expensive metadata update + * operations as a performance optimization. + */ wait = (waitfor == MNT_WAIT); + waitdata = (waitfor == MNT_DWAIT) | wait; if (always_do_fullfsync) fullsync = 1; /* HFS directories don't have any data blocks. */ if (vnode_isdir(vp)) goto metasync; + fp = VTOF(vp); /* * For system files flush the B-tree header and @@ -1350,11 +1763,17 @@ hfs_fsync(struct vnode *vp, int waitfor, int fullsync, struct proc *p) } } else if (UBCINFOEXISTS(vp)) { hfs_unlock(cp); - hfs_lock_truncate(cp, TRUE); + hfs_lock_truncate(cp, trunc_lock_exclusive); took_trunc_lock = 1; + if (fp->ff_unallocblocks != 0) { + hfs_unlock_truncate(cp, trunc_lock_exclusive); + + trunc_lock_exclusive = TRUE; + hfs_lock_truncate(cp, trunc_lock_exclusive); + } /* Don't hold cnode lock when calling into cluster layer. */ - (void) cluster_push(vp, wait ? IO_SYNC : 0); + (void) cluster_push(vp, waitdata ? IO_SYNC : 0); hfs_lock(cp, HFS_FORCE_LOCK); } @@ -1365,53 +1784,59 @@ hfs_fsync(struct vnode *vp, int waitfor, int fullsync, struct proc *p) * * Files with NODUMP can bypass zero filling here. */ - if ((wait || (cp->c_flag & C_ZFWANTSYNC)) && - ((cp->c_flags & UF_NODUMP) == 0) && - UBCINFOEXISTS(vp) && (vnode_issystem(vp) ==0) && (fp = VTOF(vp)) && - cp->c_zftimeout != 0) { + if (fp && (((cp->c_flag & C_ALWAYS_ZEROFILL) && !TAILQ_EMPTY(&fp->ff_invalidranges)) || + ((wait || (cp->c_flag & C_ZFWANTSYNC)) && + ((cp->c_flags & UF_NODUMP) == 0) && + UBCINFOEXISTS(vp) && (vnode_issystem(vp) ==0) && + cp->c_zftimeout != 0))) { + microuptime(&tv); - if (!fullsync && tv.tv_sec < (long)cp->c_zftimeout) { + if ((cp->c_flag & C_ALWAYS_ZEROFILL) == 0 && !fullsync && tv.tv_sec < (long)cp->c_zftimeout) { /* Remember that a force sync was requested. */ cp->c_flag |= C_ZFWANTSYNC; goto datasync; } - if (!took_trunc_lock) { - hfs_unlock(cp); - hfs_lock_truncate(cp, TRUE); - hfs_lock(cp, HFS_FORCE_LOCK); - took_trunc_lock = 1; - } + if (!TAILQ_EMPTY(&fp->ff_invalidranges)) { + if (!took_trunc_lock || trunc_lock_exclusive == FALSE) { + hfs_unlock(cp); + if (took_trunc_lock) + hfs_unlock_truncate(cp, trunc_lock_exclusive); - while (!CIRCLEQ_EMPTY(&fp->ff_invalidranges)) { - struct rl_entry *invalid_range = CIRCLEQ_FIRST(&fp->ff_invalidranges); - off_t start = invalid_range->rl_start; - off_t end = invalid_range->rl_end; + trunc_lock_exclusive = TRUE; + hfs_lock_truncate(cp, trunc_lock_exclusive); + hfs_lock(cp, HFS_FORCE_LOCK); + took_trunc_lock = 1; + } + while ((invalid_range = TAILQ_FIRST(&fp->ff_invalidranges))) { + off_t start = invalid_range->rl_start; + off_t end = invalid_range->rl_end; - /* The range about to be written must be validated - * first, so that VNOP_BLOCKMAP() will return the - * appropriate mapping for the cluster code: - */ - rl_remove(start, end, &fp->ff_invalidranges); + /* The range about to be written must be validated + * first, so that VNOP_BLOCKMAP() will return the + * appropriate mapping for the cluster code: + */ + rl_remove(start, end, &fp->ff_invalidranges); - /* Don't hold cnode lock when calling into cluster layer. */ + /* Don't hold cnode lock when calling into cluster layer. */ + hfs_unlock(cp); + (void) cluster_write(vp, (struct uio *) 0, + fp->ff_size, end + 1, start, (off_t)0, + IO_HEADZEROFILL | IO_NOZERODIRTY | IO_NOCACHE); + hfs_lock(cp, HFS_FORCE_LOCK); + cp->c_flag |= C_MODIFIED; + } hfs_unlock(cp); - (void) cluster_write(vp, (struct uio *) 0, - fp->ff_size, end + 1, start, (off_t)0, - IO_HEADZEROFILL | IO_NOZERODIRTY | IO_NOCACHE); + (void) cluster_push(vp, waitdata ? IO_SYNC : 0); hfs_lock(cp, HFS_FORCE_LOCK); - cp->c_flag |= C_MODIFIED; } - hfs_unlock(cp); - (void) cluster_push(vp, wait ? IO_SYNC : 0); - hfs_lock(cp, HFS_FORCE_LOCK); - cp->c_flag &= ~C_ZFWANTSYNC; cp->c_zftimeout = 0; } datasync: - if (took_trunc_lock) - hfs_unlock_truncate(cp, TRUE); - + if (took_trunc_lock) { + hfs_unlock_truncate(cp, trunc_lock_exclusive); + took_trunc_lock = 0; + } /* * if we have a journal and if journal_active() returns != 0 then the * we shouldn't do anything to a locked block (because it is part @@ -1430,7 +1855,7 @@ hfs_fsync(struct vnode *vp, int waitfor, int fullsync, struct proc *p) /* * Flush all dirty buffers associated with a vnode. */ - buf_flushdirtyblks(vp, wait, lockflag, "hfs_fsync"); + buf_flushdirtyblks(vp, waitdata, lockflag, "hfs_fsync"); metasync: if (vnode_isreg(vp) && vnode_issystem(vp)) { @@ -1461,7 +1886,7 @@ hfs_fsync(struct vnode *vp, int waitfor, int fullsync, struct proc *p) */ if (fullsync) { if (hfsmp->jnl) { - journal_flush(hfsmp->jnl); + hfs_journal_flush(hfsmp); } else { retval = hfs_metasync_all(hfsmp); /* XXX need to pass context! */ @@ -1615,7 +2040,14 @@ hfs_vnop_rmdir(ap) if ((error = hfs_lockpair(dcp, cp, HFS_EXCLUSIVE_LOCK))) { return (error); } + + /* Check for a race with rmdir on the parent directory */ + if (dcp->c_flag & (C_DELETED | C_NOEXISTS)) { + hfs_unlockpair (dcp, cp); + return ENOENT; + } error = hfs_removedir(dvp, vp, ap->a_cnp, 0); + hfs_unlockpair(dcp, cp); return (error); @@ -1656,11 +2088,18 @@ hfs_removedir(struct vnode *dvp, struct vnode *vp, struct componentname *cnp, /* We could also return EBUSY here */ return hfs_unlink(hfsmp, dvp, vp, cnp, skip_reserve); } - + + /* + * We want to make sure that if the directory has a lot of attributes, we process them + * in separate transactions to ensure we don't panic in the journal with a gigantic + * transaction. This means we'll let hfs_removefile deal with the directory, which generally + * follows the same codepath as open-unlinked files. Note that the last argument to + * hfs_removefile specifies that it is supposed to handle directories for this case. + */ if ((hfsmp->hfs_attribute_vp != NULL) && (cp->c_attr.ca_recflags & kHFSHasAttributesMask) != 0) { - return hfs_removefile(dvp, vp, cnp, 0, 0, 1); + return hfs_removefile(dvp, vp, cnp, 0, 0, 1, NULL); } dcp->c_flag |= C_DIR_MODIFICATION; @@ -1744,8 +2183,6 @@ hfs_removedir(struct vnode *dvp, struct vnode *vp, struct componentname *cnp, (void)hfs_chkiq(cp, -1, NOCRED, 0); #endif /* QUOTA */ - HFS_KNOTE(dvp, NOTE_WRITE | NOTE_LINK | NOTE_ATTRIB); - hfs_volupdate(hfsmp, VOL_RMDIR, (dcp->c_cnid == kHFSRootFolderID)); /* @@ -1756,15 +2193,12 @@ hfs_removedir(struct vnode *dvp, struct vnode *vp, struct componentname *cnp, if (vnode_isinuse(vp, 0)) { cp->c_flag |= C_DELETED; } else { - cp->c_mode = 0; /* Makes the vnode go away...see inactive */ cp->c_flag |= C_NOEXISTS; } out: dcp->c_flag &= ~C_DIR_MODIFICATION; wakeup((caddr_t)&dcp->c_flag); - HFS_KNOTE(vp, NOTE_DELETE); - if (started_tr) { hfs_end_transaction(hfsmp); } @@ -1790,20 +2224,67 @@ hfs_vnop_remove(ap) struct vnode *vp = ap->a_vp; struct cnode *dcp = VTOC(dvp); struct cnode *cp = VTOC(vp); - struct vnode *rvp = cp->c_rsrc_vp; - int error=0, recycle_rsrc=0, rvid=0; + struct vnode *rvp = NULL; + struct hfsmount *hfsmp = VTOHFS(vp); + int error=0, recycle_rsrc=0; + int drop_rsrc_vnode = 0; + int vref; if (dvp == vp) { return (EINVAL); } + /* + * We need to grab the cnode lock on 'cp' before the lockpair() + * to get an iocount on the rsrc fork BEFORE we enter hfs_removefile. + * To prevent other deadlocks, it's best to call hfs_vgetrsrc in a way that + * allows it to drop the cnode lock that it expects to be held coming in. + * If we don't, we could commit a lock order violation, causing a deadlock. + * In order to safely get the rsrc vnode with an iocount, we need to only hold the + * lock on the file temporarily. Unlike hfs_vnop_rename, we don't have to worry + * about one rsrc fork getting recycled for another, but we do want to ensure + * that there are no deadlocks due to lock ordering issues. + * + * Note: this function may be invoked for directory hardlinks, so just skip these + * steps if 'vp' is a directory. + */ + + + if ((vp->v_type == VLNK) || (vp->v_type == VREG)) { + + if ((error = hfs_lock (cp, HFS_EXCLUSIVE_LOCK))) { + return (error); + } + + error = hfs_vgetrsrc(hfsmp, vp, &rvp, TRUE); + hfs_unlock(cp); + if (error) { + return (error); + } + drop_rsrc_vnode = 1; + } + /* Now that we may have an iocount on rvp, do the lock pair */ hfs_lock_truncate(cp, TRUE); if ((error = hfs_lockpair(dcp, cp, HFS_EXCLUSIVE_LOCK))) { hfs_unlock_truncate(cp, TRUE); + /* drop the iocount on rvp if necessary */ + if (drop_rsrc_vnode) { + vnode_put (rvp); + } return (error); } - error = hfs_removefile(dvp, vp, ap->a_cnp, ap->a_flags, 0, 0); + + /* + * Check to see if we raced rmdir for the parent directory + * hfs_removefile already checks for a race on vp/cp + */ + if (dcp->c_flag & (C_DELETED | C_NOEXISTS)) { + error = ENOENT; + goto rm_done; + } + + error = hfs_removefile(dvp, vp, ap->a_cnp, ap->a_flags, 0, 0, rvp); // // If the remove succeeded and it's an open-unlinked file that has @@ -1815,8 +2296,8 @@ hfs_vnop_remove(ap) // something forces the resource vnode to get recycled (and that can // take a very long time). // - if (error == 0 && (cp->c_flag & C_DELETED) && rvp && !vnode_isinuse(rvp, 0)) { - rvid = vnode_vid(rvp); + if (error == 0 && (cp->c_flag & C_DELETED) && + (rvp) && !vnode_isinuse(rvp, 0)) { recycle_rsrc = 1; } @@ -1826,16 +2307,24 @@ hfs_vnop_remove(ap) * recycle the vnode which in turn might require the * truncate lock) */ +rm_done: hfs_unlock_truncate(cp, TRUE); hfs_unlockpair(dcp, cp); - if (recycle_rsrc && vnode_getwithvid(rvp, rvid) == 0) { - vnode_ref(rvp); - vnode_rele(rvp); + if (recycle_rsrc) { + vref = vnode_ref(rvp); + if (vref == 0) { + /* vnode_ref could return an error, only release if we got a ref */ + vnode_rele(rvp); + } vnode_recycle(rvp); - vnode_put(rvp); } + if (drop_rsrc_vnode) { + /* drop iocount on rsrc fork, was obtained at beginning of fxn */ + vnode_put(rvp); + } + return (error); } @@ -1857,14 +2346,22 @@ hfs_removefile_callback(struct buf *bp, void *hfsmp) { * hfs_removefile * * Similar to hfs_vnop_remove except there are additional options. + * This function may be used to remove directories if they have + * lots of EA's -- note the 'allow_dirs' argument. + * + * The 'rvp' argument is used to pass in a resource fork vnode with + * an iocount to prevent it from getting recycled during usage. If it + * is NULL, then it is assumed the caller is a VNOP that cannot operate + * on resource forks, like hfs_vnop_symlink or hfs_removedir. Otherwise in + * a VNOP that takes multiple vnodes, we could violate lock order and + * cause a deadlock. * * Requires cnode and truncate locks to be held. */ static int hfs_removefile(struct vnode *dvp, struct vnode *vp, struct componentname *cnp, - int flags, int skip_reserve, int allow_dirs) + int flags, int skip_reserve, int allow_dirs, struct vnode *rvp) { - struct vnode *rvp = NULL; struct cnode *cp; struct cnode *dcp; struct hfsmount *hfsmp; @@ -1941,31 +2438,22 @@ hfs_removefile(struct vnode *dvp, struct vnode *vp, struct componentname *cnp, cache_purge(vp); /* - * Acquire a vnode for a non-empty resource fork. - * (needed for hfs_truncate) + * We expect the caller, if operating on files, + * will have passed in a resource fork vnode with + * an iocount, even if there was no content. + * We only do the hfs_truncate on the rsrc fork + * if we know that it DID have content, however. + * This has the bonus of not requiring us to defer + * its removal, unless it is in use. */ - if (isdir == 0 && (cp->c_blocks - VTOF(vp)->ff_blocks)) { - /* - * We must avoid calling hfs_vgetrsrc() when we have - * an active resource fork vnode to avoid deadlocks - * when that vnode is in the VL_TERMINATE state. We - * can defer removing the file and its resource fork - * until the call to hfs_vnop_inactive() occurs. - */ - if (cp->c_rsrc_vp) { - defer_remove = 1; - } else { - error = hfs_vgetrsrc(hfsmp, vp, &rvp, FALSE); - if (error) - goto out; - /* Defer the vnode_put on rvp until the hfs_unlock(). */ - cp->c_flag |= C_NEED_RVNODE_PUT; - } - } + /* Check if this file is being used. */ if (isdir == 0) { dataforkbusy = vnode_isinuse(vp, 0); - rsrcforkbusy = rvp ? vnode_isinuse(rvp, 0) : 0; + /* Only need to defer resource fork removal if in use and has content */ + if (rvp && (cp->c_blocks - VTOF(vp)->ff_blocks)) { + rsrcforkbusy = vnode_isinuse(rvp, 0); + } } /* Check if we have to break the deletion into multiple pieces. */ @@ -2025,25 +2513,28 @@ hfs_removefile(struct vnode *dvp, struct vnode *vp, struct componentname *cnp, /* * Truncate any non-busy forks. Busy forks will * get truncated when their vnode goes inactive. - * + * Note that we will only enter this region if we + * can avoid creating an open-unlinked file. If + * either region is busy, we will have to create an open + * unlinked file. * Since we're already inside a transaction, * tell hfs_truncate to skip the ubc_setsize. */ - if (isdir == 0) { - int mode = cp->c_mode; - + if (isdir == 0 && (!dataforkbusy && !rsrcforkbusy)) { + /* + * Note that 5th argument to hfs_truncate indicates whether or not + * hfs_update calls should be suppressed in call to do_hfs_truncate + */ if (!dataforkbusy && !isbigfile && cp->c_datafork->ff_blocks != 0) { - cp->c_mode = 0; /* Suppress hfs_update */ - error = hfs_truncate(vp, (off_t)0, IO_NDELAY, 1, ctx); - cp->c_mode = mode; + /* skip update in hfs_truncate */ + error = hfs_truncate(vp, (off_t)0, IO_NDELAY, 1, 1, ctx); if (error) goto out; truncated = 1; } if (!rsrcforkbusy && rvp) { - cp->c_mode = 0; /* Suppress hfs_update */ - error = hfs_truncate(rvp, (off_t)0, IO_NDELAY, 1, ctx); - cp->c_mode = mode; + /* skip update in hfs_truncate */ + error = hfs_truncate(rvp, (off_t)0, IO_NDELAY, 1, 1, ctx); if (error) goto out; truncated = 1; @@ -2053,9 +2544,16 @@ hfs_removefile(struct vnode *dvp, struct vnode *vp, struct componentname *cnp, /* * Protect against a race with rename by using the component * name passed in and parent id from dvp (instead of using - * the cp->c_desc which may have changed). + * the cp->c_desc which may have changed). Also, be aware that + * because we allow directories to be passed in, we need to special case + * this temporary descriptor in case we were handed a directory. */ - desc.cd_flags = 0; + if (isdir) { + desc.cd_flags = CD_ISDIR; + } + else { + desc.cd_flags = 0; + } desc.cd_encoding = cp->c_desc.cd_encoding; desc.cd_nameptr = (const u_int8_t *)cnp->cn_nameptr; desc.cd_namelen = cnp->cn_namelen; @@ -2066,8 +2564,11 @@ hfs_removefile(struct vnode *dvp, struct vnode *vp, struct componentname *cnp, /* * There are two cases to consider: - * 1. File is busy/big/defer_remove ==> move/rename the file + * 1. File/Dir is busy/big/defer_remove ==> move/rename the file/dir * 2. File is not in use ==> remove the file + * + * We can get a directory in case 1 because it may have had lots of attributes, + * which need to get removed here. */ if (dataforkbusy || rsrcforkbusy || isbigfile || defer_remove) { char delname[32]; @@ -2075,7 +2576,13 @@ hfs_removefile(struct vnode *dvp, struct vnode *vp, struct componentname *cnp, struct cat_desc todir_desc; /* - * Orphan this file (move to hidden directory). + * Orphan this file or directory (move to hidden directory). + * Again, we need to take care that we treat directories as directories, + * and files as files. Because directories with attributes can be passed in + * check to make sure that we have a directory or a file before filling in the + * temporary descriptor's flags. We keep orphaned directories AND files in + * the FILE_HARDLINKS private directory since we're generalizing over all + * orphaned filesystem objects. */ bzero(&todir_desc, sizeof(todir_desc)); todir_desc.cd_parentcnid = 2; @@ -2085,7 +2592,12 @@ hfs_removefile(struct vnode *dvp, struct vnode *vp, struct componentname *cnp, to_desc.cd_nameptr = (const u_int8_t *)delname; to_desc.cd_namelen = strlen(delname); to_desc.cd_parentcnid = hfsmp->hfs_private_desc[FILE_HARDLINKS].cd_cnid; - to_desc.cd_flags = 0; + if (isdir) { + to_desc.cd_flags = CD_ISDIR; + } + else { + to_desc.cd_flags = 0; + } to_desc.cd_cnid = cp->c_cnid; lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG, HFS_EXCLUSIVE_LOCK); @@ -2118,7 +2630,7 @@ hfs_removefile(struct vnode *dvp, struct vnode *vp, struct componentname *cnp, dcp->c_mtime = tv.tv_sec; (void) cat_update(hfsmp, &dcp->c_desc, &dcp->c_attr, NULL, NULL); - /* Update the file's state */ + /* Update the file or directory's state */ cp->c_flag |= C_DELETED; cp->c_ctime = tv.tv_sec; --cp->c_linkcount; @@ -2149,14 +2661,18 @@ hfs_removefile(struct vnode *dvp, struct vnode *vp, struct componentname *cnp, if (error && error != ENXIO && error != ENOENT && truncated) { if ((cp->c_datafork && cp->c_datafork->ff_size != 0) || - (cp->c_rsrcfork && cp->c_rsrcfork->ff_size != 0)) { - panic("hfs: remove: couldn't delete a truncated file! (%d, data sz %lld; rsrc sz %lld)", - error, cp->c_datafork->ff_size, cp->c_rsrcfork->ff_size); + (cp->c_rsrcfork && cp->c_rsrcfork->ff_size != 0)) { + printf("hfs: remove: couldn't delete a truncated file (%s)" + "(error %d, data sz %lld; rsrc sz %lld)", + cp->c_desc.cd_nameptr, error, cp->c_datafork->ff_size, + cp->c_rsrcfork->ff_size); + hfs_mark_volume_inconsistent(hfsmp); } else { printf("hfs: remove: strangely enough, deleting truncated file %s (%d) got err %d\n", - cp->c_desc.cd_nameptr, cp->c_attr.ca_fileid, error); - } + cp->c_desc.cd_nameptr, cp->c_attr.ca_fileid, error); + } } + if (error == 0) { /* Update the parent directory */ if (dcp->c_entries > 0) @@ -2175,13 +2691,17 @@ hfs_removefile(struct vnode *dvp, struct vnode *vp, struct componentname *cnp, (void)hfs_chkiq(cp, -1, NOCRED, 0); #endif /* QUOTA */ - cp->c_mode = 0; - truncated = 0; // because the catalog entry is gone cp->c_flag |= C_NOEXISTS; cp->c_flag &= ~C_DELETED; + truncated = 0; // because the catalog entry is gone + cp->c_touch_chgtime = TRUE; /* XXX needed ? */ --cp->c_linkcount; + /* + * We must never get a directory if we're in this else block. We could + * accidentally drop the number of files in the volume header if we did. + */ hfs_volupdate(hfsmp, VOL_RMFILE, (dcp->c_cnid == kHFSRootFolderID)); } @@ -2195,8 +2715,6 @@ hfs_removefile(struct vnode *dvp, struct vnode *vp, struct componentname *cnp, */ cat_releasedesc(&cp->c_desc); - HFS_KNOTE(dvp, NOTE_WRITE); - out: if (error) { cp->c_flag &= ~C_DELETED; @@ -2217,11 +2735,6 @@ hfs_removefile(struct vnode *dvp, struct vnode *vp, struct componentname *cnp, dcp->c_flag &= ~C_DIR_MODIFICATION; wakeup((caddr_t)&dcp->c_flag); - HFS_KNOTE(vp, NOTE_DELETE); - if (rvp) { - HFS_KNOTE(rvp, NOTE_DELETE); - } - return (error); } @@ -2261,6 +2774,19 @@ replace_desc(struct cnode *cp, struct cat_desc *cdp) * - all the vnodes are from the same file system * * When the target is a directory, HFS must ensure that its empty. + * + * Note that this function requires up to 6 vnodes in order to work properly + * if it is operating on files (and not on directories). This is because only + * files can have resource forks, and we now require iocounts to be held on the + * vnodes corresponding to the resource forks (if applicable) as well as + * the files or directories undergoing rename. The problem with not holding + * iocounts on the resource fork vnodes is that it can lead to a deadlock + * situation: The rsrc fork of the source file may be recycled and reclaimed + * in order to provide a vnode for the destination file's rsrc fork. Since + * data and rsrc forks share the same cnode, we'd eventually try to lock the + * source file's cnode in order to sync its rsrc fork to disk, but it's already + * been locked. By taking the rsrc fork vnodes up front we ensure that they + * cannot be recycled, and that the situation mentioned above cannot happen. */ static int hfs_vnop_rename(ap) @@ -2278,7 +2804,8 @@ hfs_vnop_rename(ap) struct vnode *tdvp = ap->a_tdvp; struct vnode *fvp = ap->a_fvp; struct vnode *fdvp = ap->a_fdvp; - struct vnode *rvp = NULLVP; + struct vnode *fvp_rsrc = NULLVP; + struct vnode *tvp_rsrc = NULLVP; struct componentname *tcnp = ap->a_tcnp; struct componentname *fcnp = ap->a_fcnp; struct proc *p = vfs_context_proc(ap->a_context); @@ -2286,6 +2813,7 @@ hfs_vnop_rename(ap) struct cnode *fdcp; struct cnode *tdcp; struct cnode *tcp; + struct cnode *error_cnode; struct cat_desc from_desc; struct cat_desc to_desc; struct cat_desc out_desc; @@ -2296,10 +2824,51 @@ hfs_vnop_rename(ap) int took_trunc_lock = 0; int lockflags; int error; - int rsrc_vid = 0; int recycle_rsrc = 0; - - /* When tvp exist, take the truncate lock for the hfs_removefile(). */ + + + /* + * Before grabbing the four locks, we may need to get an iocount on the resource fork + * vnodes in question, just like hfs_vnop_remove. If fvp and tvp are not + * directories, then go ahead and grab the resource fork vnodes now + * one at a time. We don't actively need the fvp_rsrc to do the rename operation, + * but we need the iocount to prevent the vnode from getting recycled/reclaimed + * during the middle of the VNOP. + */ + + + if ((vnode_isreg(fvp)) || (vnode_islnk(fvp))) { + + if ((error = hfs_lock (VTOC(fvp), HFS_EXCLUSIVE_LOCK))) { + return (error); + } + + error = hfs_vgetrsrc(VTOHFS(fvp), fvp, &fvp_rsrc, TRUE); + hfs_unlock (VTOC(fvp)); + if (error) { + return error; + } + } + + if (tvp && (vnode_isreg(tvp) || vnode_islnk(tvp))) { + /* + * Lock failure is OK on tvp, since we may race with a remove on the dst. + * But this shouldn't stop rename from proceeding, so only try to + * grab the resource fork if the lock succeeded. + */ + if (hfs_lock (VTOC(tvp), HFS_EXCLUSIVE_LOCK) == 0) { + error = hfs_vgetrsrc(VTOHFS(tvp), tvp, &tvp_rsrc, TRUE); + hfs_unlock (VTOC(tvp)); + if (error) { + if (fvp_rsrc) { + vnode_put (fvp_rsrc); + } + return error; + } + } + } + + /* When tvp exists, take the truncate lock for hfs_removefile(). */ if (tvp && (vnode_isreg(tvp) || vnode_islnk(tvp))) { hfs_lock_truncate(VTOC(tvp), TRUE); took_trunc_lock = 1; @@ -2307,34 +2876,30 @@ hfs_vnop_rename(ap) retry: error = hfs_lockfour(VTOC(fdvp), VTOC(fvp), VTOC(tdvp), tvp ? VTOC(tvp) : NULL, - HFS_EXCLUSIVE_LOCK); + HFS_EXCLUSIVE_LOCK, &error_cnode); if (error) { if (took_trunc_lock) { - hfs_unlock_truncate(VTOC(tvp), TRUE); + hfs_unlock_truncate(VTOC(tvp), TRUE); took_trunc_lock = 0; } - /* - * tvp might no longer exist. if we get ENOENT, re-check the - * C_NOEXISTS flag on tvp to find out whether it's still in the - * namespace. - */ - if (error == ENOENT && tvp) { - /* - * It's okay to just check C_NOEXISTS without having a lock, - * because we have an iocount on it from the vfs layer so it can't - * have disappeared. - */ - if (VTOC(tvp)->c_flag & C_NOEXISTS) { - /* - * tvp is no longer in the namespace. Try again with NULL - * tvp/tcp (NULLing these out is fine because the vfs syscall - * will vnode_put the vnodes). - */ - tcp = NULL; - tvp = NULL; - goto retry; - } - } + /* + * tvp might no longer exist. If the cause of the lock failure + * was tvp, then we can try again with tvp/tcp set to NULL. + * This is ok because the vfs syscall will vnode_put the vnodes + * after we return from hfs_vnop_rename. + */ + if ((error == ENOENT) && (tvp != NULL) && (error_cnode == VTOC(tvp))) { + tcp = NULL; + tvp = NULL; + goto retry; + } + /* otherwise, drop iocounts on the rsrc forks and bail out */ + if (fvp_rsrc) { + vnode_put (fvp_rsrc); + } + if (tvp_rsrc) { + vnode_put (tvp_rsrc); + } return (error); } @@ -2344,7 +2909,22 @@ hfs_vnop_rename(ap) tcp = tvp ? VTOC(tvp) : NULL; hfsmp = VTOHFS(tdvp); - /* Check for a race against unlink. */ + /* Ensure we didn't race src or dst parent directories with rmdir. */ + if (fdcp->c_flag & (C_NOEXISTS | C_DELETED)) { + error = ENOENT; + goto out; + } + + if (tdcp->c_flag & (C_NOEXISTS | C_DELETED)) { + error = ENOENT; + goto out; + } + + + /* Check for a race against unlink. The hfs_valid_cnode checks validate + * the parent/child relationship with fdcp and tdcp, as well as the + * component name of the target cnodes. + */ if ((fcp->c_flag & (C_NOEXISTS | C_DELETED)) || !hfs_valid_cnode(hfsmp, fdvp, fcnp, fcp->c_fileid)) { error = ENOENT; goto out; @@ -2483,22 +3063,6 @@ hfs_vnop_rename(ap) /* Preflighting done, take fvp out of the name space. */ cache_purge(fvp); - /* - * When a file moves out of "Cleanup At Startup" - * we can drop its NODUMP status. - */ - if ((fcp->c_flags & UF_NODUMP) && - vnode_isreg(fvp) && - (fdvp != tdvp) && - (fdcp->c_desc.cd_nameptr != NULL) && - (strncmp((const char *)fdcp->c_desc.cd_nameptr, - CARBON_TEMP_DIR_NAME, - sizeof(CARBON_TEMP_DIR_NAME)) == 0)) { - fcp->c_flags &= ~UF_NODUMP; - fcp->c_touch_chgtime = TRUE; - (void) hfs_update(fvp, 0); - } - bzero(&from_desc, sizeof(from_desc)); from_desc.cd_nameptr = (const u_int8_t *)fcnp->cn_nameptr; from_desc.cd_namelen = fcnp->cn_namelen; @@ -2599,22 +3163,20 @@ hfs_vnop_rename(ap) if (vnode_isdir(tvp)) error = hfs_removedir(tdvp, tvp, tcnp, HFSRM_SKIP_RESERVE); else { - if (tcp){ - rvp = tcp->c_rsrc_vp; - } - error = hfs_removefile(tdvp, tvp, tcnp, 0, HFSRM_SKIP_RESERVE, 0); - - /* If the destination file had a resource fork vnode, we couldn't do - * anything about it in hfs_removefile because we didn't have a reference on it. - * We need to take action here to prevent it from leaking blocks. If removefile - * succeeded, then squirrel away the vid of the resource fork vnode and force a - * recycle after dropping all of the locks. The vid is guaranteed not to change - * at this point because we still hold the cnode lock. + error = hfs_removefile(tdvp, tvp, tcnp, 0, HFSRM_SKIP_RESERVE, 0, tvp_rsrc); + + /* + * If the destination file had a rsrc fork vnode, it may have been cleaned up + * in hfs_removefile if it was not busy (had no usecounts). This is possible + * because we grabbed the iocount on the rsrc fork safely at the beginning + * of the function before we did the lockfour. However, we may still need + * to take action to prevent block leaks, so aggressively recycle the vnode + * if possible. The vnode cannot be recycled because we hold an iocount on it. */ - if ((error == 0) && (tcp->c_flag & C_DELETED) && rvp && !vnode_isinuse(rvp, 0)) { - rsrc_vid = vnode_vid(rvp); + + if ((error == 0) && (tcp->c_flag & C_DELETED) && tvp_rsrc && !vnode_isinuse(tvp_rsrc, 0)) { recycle_rsrc = 1; - } + } } if (error) @@ -2623,8 +3185,8 @@ hfs_vnop_rename(ap) } skip_rm: /* - * All done with tvp and fvp - * + * All done with tvp and fvp. + * * We also jump to this point if there was no destination observed during lookup and namei. * However, because only iocounts are held at the VFS layer, there is nothing preventing a * competing thread from racing us and creating a file or dir at the destination of this rename @@ -2635,7 +3197,7 @@ hfs_vnop_rename(ap) * To signal VFS, we return ERECYCLE (which is also used for lookup restarts). This errno * will be swallowed and it will restart the operation. */ - + lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG, HFS_EXCLUSIVE_LOCK); error = cat_rename(hfsmp, &from_desc, &tdcp->c_desc, &to_desc, &out_desc); hfs_systemfile_unlock(hfsmp, lockflags); @@ -2709,15 +3271,6 @@ hfs_vnop_rename(ap) hfs_end_transaction(hfsmp); } - /* Note that if hfs_removedir or hfs_removefile was invoked above they will already have - generated a NOTE_WRITE for tdvp and a NOTE_DELETE for tvp. - */ - if (error == 0) { - HFS_KNOTE(fvp, NOTE_RENAME); - HFS_KNOTE(fdvp, NOTE_WRITE); - if (tdvp != fdvp) HFS_KNOTE(tdvp, NOTE_WRITE); - }; - fdcp->c_flag &= ~C_DIR_MODIFICATION; wakeup((caddr_t)&fdcp->c_flag); if (fdvp != tdvp) { @@ -2729,19 +3282,31 @@ hfs_vnop_rename(ap) hfs_unlock_truncate(VTOC(tvp), TRUE); hfs_unlockfour(fdcp, fcp, tdcp, tcp); - - /* Now that we've dropped locks, see if we need to force recycle on the old - * destination's rsrc fork, preventing a leak of the rsrc fork's blocks. Note that - * doing the ref/rele is in order to twiddle the VL_INACTIVE bit to the vnode's flags - * so that on the last vnode_put for this vnode, we will force vnop_inactive to be triggered. + + /* + * Now that we've dropped all of the locks, we need to force an inactive and a recycle + * on the old destination's rsrc fork to prevent a leak of its blocks. Note that + * doing the ref/rele is to twiddle the VL_NEEDINACTIVE bit of the vnode's flags, so that + * on the last vnode_put for this vnode, we will force inactive to get triggered. + * We hold an iocount from the beginning of this function so we know it couldn't have been + * recycled already. */ - if ((recycle_rsrc) && (vnode_getwithvid(rvp, rsrc_vid) == 0)) { - vnode_ref(rvp); - vnode_rele(rvp); - vnode_recycle(rvp); - vnode_put (rvp); + if (recycle_rsrc) { + int vref; + vref = vnode_ref(tvp_rsrc); + if (vref == 0) { + vnode_rele(tvp_rsrc); + } + vnode_recycle(tvp_rsrc); } + /* Now vnode_put the resource forks vnodes if necessary */ + if (tvp_rsrc) { + vnode_put(tvp_rsrc); + } + if (fvp_rsrc) { + vnode_put(fvp_rsrc); + } /* After tvp is removed the only acceptable error is EIO */ if (error && tvp_deleted) @@ -2830,7 +3395,7 @@ hfs_vnop_symlink(struct vnop_symlink_args *ap) * * Don't need truncate lock since a symlink is treated as a system file. */ - error = hfs_truncate(vp, len, IO_NOZEROFILL, 1, ap->a_context); + error = hfs_truncate(vp, len, IO_NOZEROFILL, 1, 0, ap->a_context); /* On errors, remove the symlink file */ if (error) { @@ -2851,7 +3416,7 @@ hfs_vnop_symlink(struct vnop_symlink_args *ap) goto out; } - (void) hfs_removefile(dvp, vp, ap->a_cnp, 0, 0, 0); + (void) hfs_removefile(dvp, vp, ap->a_cnp, 0, 0, 0, NULL); hfs_unlock_truncate(cp, TRUE); goto out; } @@ -2977,20 +3542,16 @@ hfs_vnop_readdir(ap) int lockflags; int extended; int nfs_cookies; - caddr_t bufstart; cnid_t cnid_hint = 0; items = 0; startoffset = offset = uio_offset(uio); - bufstart = CAST_DOWN(caddr_t, uio_iov_base(uio)); extended = (ap->a_flags & VNODE_READDIR_EXTENDED); nfs_cookies = extended && (ap->a_flags & VNODE_READDIR_REQSEEKOFF); /* Sanity check the uio data. */ - if ((uio_iovcnt(uio) > 1) || - (uio_resid(uio) < (int)sizeof(struct dirent))) { + if (uio_iovcnt(uio) > 1) return (EINVAL); - } /* Note that the dirhint calls require an exclusive lock. */ if ((error = hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK))) return (error); @@ -3226,6 +3787,10 @@ hfs_vnop_readlink(ap) struct buf *bp = NULL; MALLOC(fp->ff_symlinkptr, char *, fp->ff_size, M_TEMP, M_WAITOK); + if (fp->ff_symlinkptr == NULL) { + error = ENOMEM; + goto exit; + } error = (int)buf_meta_bread(vp, (daddr64_t)0, roundup((int)fp->ff_size, VTOHFS(vp)->hfs_physical_block_size), vfs_context_ucred(ap->a_context), &bp); @@ -3321,7 +3886,10 @@ hfs_vnop_pathconf(ap) *ap->a_retval = 1; break; case _PC_FILESIZEBITS: - *ap->a_retval = 64; /* number of bits to store max file size */ + if (VTOHFS(ap->a_vp)->hfs_flags & HFS_STANDARD) + *ap->a_retval = 32; + else + *ap->a_retval = 64; /* number of bits to store max file size */ break; default: return (EINVAL); @@ -3397,8 +3965,6 @@ hfs_update(struct vnode *vp, __unused int waitfor) // cp->c_flag &= ~(C_ACCESS | C_CHANGE | C_UPDATE); cp->c_flag |= C_MODIFIED; - HFS_KNOTE(vp, NOTE_ATTRIB); - return (0); } @@ -3411,9 +3977,9 @@ hfs_update(struct vnode *vp, __unused int waitfor) * field representing the size of the file (cf_size) * must be no larger than the start of the first hole. */ - if (dataforkp && !CIRCLEQ_EMPTY(&cp->c_datafork->ff_invalidranges)) { + if (dataforkp && !TAILQ_EMPTY(&cp->c_datafork->ff_invalidranges)) { bcopy(dataforkp, &datafork, sizeof(datafork)); - datafork.cf_size = CIRCLEQ_FIRST(&cp->c_datafork->ff_invalidranges)->rl_start; + datafork.cf_size = TAILQ_FIRST(&cp->c_datafork->ff_invalidranges)->rl_start; dataforkp = &datafork; } else if (dataforkp && (cp->c_datafork->ff_unallocblocks != 0)) { // always make sure the block count and the size @@ -3457,8 +4023,6 @@ hfs_update(struct vnode *vp, __unused int waitfor) hfs_end_transaction(hfsmp); - HFS_KNOTE(vp, NOTE_ATTRIB); - return (error); } @@ -3471,7 +4035,7 @@ hfs_makenode(struct vnode *dvp, struct vnode **vpp, struct componentname *cnp, struct vnode_attr *vap, vfs_context_t ctx) { struct cnode *cp = NULL; - struct cnode *dcp; + struct cnode *dcp = NULL; struct vnode *tvp; struct hfsmount *hfsmp; struct cat_desc in_desc, out_desc; @@ -3482,10 +4046,11 @@ hfs_makenode(struct vnode *dvp, struct vnode **vpp, struct componentname *cnp, enum vtype vnodetype; int mode; - dcp = VTOC(dvp); - if ((error = hfs_lock(dcp, HFS_EXCLUSIVE_LOCK))) + if ((error = hfs_lock(VTOC(dvp), HFS_EXCLUSIVE_LOCK))) return (error); + /* set the cnode pointer only after successfully acquiring lock */ + dcp = VTOC(dvp); dcp->c_flag |= C_DIR_MODIFICATION; hfsmp = VTOHFS(dvp); @@ -3549,6 +4114,19 @@ hfs_makenode(struct vnode *dvp, struct vnode **vpp, struct componentname *cnp, VATTR_SET_SUPPORTED(vap, va_uid); VATTR_SET_SUPPORTED(vap, va_gid); +#if QUOTA + /* check to see if this node's creation would cause us to go over + * quota. If so, abort this operation. + */ + if (hfsmp->hfs_flags & HFS_QUOTAS) { + if ((error = hfs_quotacheck(hfsmp, 1, attr.ca_uid, attr.ca_gid, + vfs_context_ucred(ctx)))) { + goto exit; + } + } +#endif + + /* Tag symlinks with a type and creator. */ if (vnodetype == VLNK) { struct FndrFileInfo *fip; @@ -3595,7 +4173,6 @@ hfs_makenode(struct vnode *dvp, struct vnode **vpp, struct componentname *cnp, dcp->c_ctime = tv.tv_sec; dcp->c_mtime = tv.tv_sec; (void) cat_update(hfsmp, &dcp->c_desc, &dcp->c_attr, NULL, NULL); - HFS_KNOTE(dvp, NOTE_ATTRIB); } hfs_systemfile_unlock(hfsmp, lockflags); if (error) @@ -3607,12 +4184,6 @@ hfs_makenode(struct vnode *dvp, struct vnode **vpp, struct componentname *cnp, dcp->c_flag &= ~C_NEG_ENTRIES; } - if (vnodetype == VDIR) { - HFS_KNOTE(dvp, NOTE_WRITE | NOTE_LINK); - } else { - HFS_KNOTE(dvp, NOTE_WRITE); - }; - hfs_volupdate(hfsmp, vnodetype == VDIR ? VOL_MKDIR : VOL_MKFILE, (dcp->c_cnid == kHFSRootFolderID)); @@ -3637,9 +4208,31 @@ hfs_makenode(struct vnode *dvp, struct vnode **vpp, struct componentname *cnp, if (S_ISWHT(mode)) { goto exit; } + + /* + * We need to release the cnode lock on dcp before calling into + * hfs_getnewvnode to make sure we don't double lock this node + */ + if (dcp) { + dcp->c_flag &= ~C_DIR_MODIFICATION; + wakeup((caddr_t)&dcp->c_flag); + + hfs_unlock(dcp); + /* so we don't double-unlock it later */ + dcp = NULL; + } /* * Create a vnode for the object just created. + * + * NOTE: Because we have just unlocked the parent directory above (dcp), + * we are open to a race condition wherein another thread could look up the + * entry we just added to the catalog and delete it BEFORE we actually get the + * vnode out of the call below. In that case, we may return ENOENT because the + * cnode was already marked for C_DELETE. This is because we are grabbing the cnode + * out of the hash via the CNID/fileid provided in attr, and with the parent + * directory unlocked, it is now accessible. In this case, the VFS should re-drive the + * create operation to re-attempt. * * The cnode is locked on successful return. */ @@ -3648,80 +4241,15 @@ hfs_makenode(struct vnode *dvp, struct vnode **vpp, struct componentname *cnp, goto exit; cp = VTOC(tvp); -#if QUOTA - /* - * We call hfs_chkiq with FORCE flag so that if we - * fall through to the rmdir we actually have - * accounted for the inode - */ - if (hfsmp->hfs_flags & HFS_QUOTAS) { - if ((error = hfs_getinoquota(cp)) || - (error = hfs_chkiq(cp, 1, vfs_context_ucred(ctx), FORCE))) { - - if (vnode_isdir(tvp)) - (void) hfs_removedir(dvp, tvp, cnp, 0); - else { - hfs_unlock(cp); - hfs_lock_truncate(cp, TRUE); - hfs_lock(cp, HFS_FORCE_LOCK); - (void) hfs_removefile(dvp, tvp, cnp, 0, 0, 0); - hfs_unlock_truncate(cp, TRUE); - } - /* - * we successfully allocated a new vnode, but - * the quota check is telling us we're beyond - * our limit, so we need to dump our lock + reference - */ - hfs_unlock(cp); - vnode_put(tvp); - - goto exit; - } - } -#endif /* QUOTA */ - *vpp = tvp; exit: cat_releasedesc(&out_desc); - + /* - * Check if a file is located in the "Cleanup At Startup" - * directory. If it is then tag it as NODUMP so that we - * can be lazy about zero filling data holes. + * In case we get here via error handling, make sure we release cnode lock on dcp if we + * didn't do it already. */ - if ((error == 0) && dvp && (vnodetype == VREG) && - (dcp->c_desc.cd_nameptr != NULL) && - (strncmp((const char *)dcp->c_desc.cd_nameptr, - CARBON_TEMP_DIR_NAME, - sizeof(CARBON_TEMP_DIR_NAME)) == 0)) { - struct vnode *ddvp; - - dcp->c_flag &= ~C_DIR_MODIFICATION; - wakeup((caddr_t)&dcp->c_flag); - - hfs_unlock(dcp); - dvp = NULL; - - /* - * The parent of "Cleanup At Startup" should - * have the ASCII name of the userid. - */ - if (hfs_vget(hfsmp, dcp->c_parentcnid, &ddvp, 0) == 0) { - if (VTOC(ddvp)->c_desc.cd_nameptr) { - uid_t uid; - - uid = strtoul((const char *)VTOC(ddvp)->c_desc.cd_nameptr, 0, 0); - if ((uid == cp->c_uid) || - (uid == vfs_context_ucred(ctx)->cr_uid)) { - cp->c_flags |= UF_NODUMP; - cp->c_touch_chgtime = TRUE; - } - } - hfs_unlock(VTOC(ddvp)); - vnode_put(ddvp); - } - } - if (dvp) { + if (dcp) { dcp->c_flag &= ~C_DIR_MODIFICATION; wakeup((caddr_t)&dcp->c_flag); @@ -3744,7 +4272,7 @@ hfs_makenode(struct vnode *dvp, struct vnode **vpp, struct componentname *cnp, * * cnode for vnode vp must already be locked. * - * can_drop_lock is true if its safe to temporally drop/re-acquire the cnode lock + * can_drop_lock is true if its safe to temporarily drop/re-acquire the cnode lock */ __private_extern__ int @@ -3800,6 +4328,9 @@ hfs_vgetrsrc(struct hfsmount *hfsmp, struct vnode *vp, struct vnode **rvpp, int } else { struct cat_fork rsrcfork; struct componentname cn; + struct cat_desc *descptr = NULL; + struct cat_desc to_desc; + char delname[32]; int lockflags; /* @@ -3809,8 +4340,9 @@ hfs_vgetrsrc(struct hfsmount *hfsmp, struct vnode *vp, struct vnode **rvpp, int * and that its safe to have the cnode lock dropped and reacquired. */ if (cp->c_lockowner != current_thread()) { - if (!can_drop_lock) + if (!can_drop_lock) { return (EINVAL); + } /* * If the upgrade fails we loose the lock and * have to take the exclusive lock on our own. @@ -3820,21 +4352,46 @@ hfs_vgetrsrc(struct hfsmount *hfsmp, struct vnode *vp, struct vnode **rvpp, int cp->c_lockowner = current_thread(); } + /* + * hfs_vgetsrc may be invoked for a cnode that has already been marked + * C_DELETED. This is because we need to continue to provide rsrc + * fork access to open-unlinked files. In this case, build a fake descriptor + * like in hfs_removefile. If we don't do this, buildkey will fail in + * cat_lookup because this cnode has no name in its descriptor. + */ + + if ((cp->c_flag & C_DELETED ) && (cp->c_desc.cd_namelen == 0)) { + bzero (&to_desc, sizeof(to_desc)); + bzero (delname, 32); + MAKE_DELETED_NAME(delname, sizeof(delname), cp->c_fileid); + to_desc.cd_nameptr = (const u_int8_t*) delname; + to_desc.cd_namelen = strlen(delname); + to_desc.cd_parentcnid = hfsmp->hfs_private_desc[FILE_HARDLINKS].cd_cnid; + to_desc.cd_flags = 0; + to_desc.cd_cnid = cp->c_cnid; + + descptr = &to_desc; + } + else { + descptr = &cp->c_desc; + } + + lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG, HFS_SHARED_LOCK); /* Get resource fork data */ - error = cat_lookup(hfsmp, &cp->c_desc, 1, (struct cat_desc *)0, + error = cat_lookup(hfsmp, descptr, 1, (struct cat_desc *)0, (struct cat_attr *)0, &rsrcfork, NULL); hfs_systemfile_unlock(hfsmp, lockflags); - if (error) + if (error) { return (error); - + } /* * Supply hfs_getnewvnode with a component name. */ cn.cn_pnbuf = NULL; - if (cp->c_desc.cd_nameptr) { + if (descptr->cd_nameptr) { MALLOC_ZONE(cn.cn_pnbuf, caddr_t, MAXPATHLEN, M_NAMEI, M_WAITOK); cn.cn_nameiop = LOOKUP; cn.cn_flags = ISLASTCN | HASBUF; @@ -3844,12 +4401,12 @@ hfs_vgetrsrc(struct hfsmount *hfsmp, struct vnode *vp, struct vnode **rvpp, int cn.cn_hash = 0; cn.cn_consume = 0; cn.cn_namelen = snprintf(cn.cn_nameptr, MAXPATHLEN, - "%s%s", cp->c_desc.cd_nameptr, + "%s%s", descptr->cd_nameptr, _PATH_RSRCFORKSPEC); } dvp = vnode_getparent(vp); error = hfs_getnewvnode(hfsmp, dvp, cn.cn_pnbuf ? &cn : NULL, - &cp->c_desc, GNV_WANTRSRC | GNV_SKIPLOCK, &cp->c_attr, + descptr, GNV_WANTRSRC | GNV_SKIPLOCK, &cp->c_attr, &rsrcfork, &rvp); if (dvp) vnode_put(dvp); @@ -3863,189 +4420,6 @@ hfs_vgetrsrc(struct hfsmount *hfsmp, struct vnode *vp, struct vnode **rvpp, int return (0); } - -static void -filt_hfsdetach(struct knote *kn) -{ - struct vnode *vp; - - vp = (struct vnode *)kn->kn_hook; - if (vnode_getwithvid(vp, kn->kn_hookid)) - return; - - if (1) { /* ! KNDETACH_VNLOCKED */ - if (hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK) == 0) { - (void) KNOTE_DETACH(&VTOC(vp)->c_knotes, kn); - hfs_unlock(VTOC(vp)); - } - } - - vnode_put(vp); -} - -/*ARGSUSED*/ -static int -filt_hfsread(struct knote *kn, long hint) -{ - struct vnode *vp = (struct vnode *)kn->kn_hook; - int dropvp = 0; - - if (hint == 0) { - if ((vnode_getwithvid(vp, kn->kn_hookid) != 0)) { - hint = NOTE_REVOKE; - } else - dropvp = 1; - } - if (hint == NOTE_REVOKE) { - /* - * filesystem is gone, so set the EOF flag and schedule - * the knote for deletion. - */ - kn->kn_flags |= (EV_EOF | EV_ONESHOT); - return (1); - } - - /* poll(2) semantics dictate always saying there is data */ - if (!(kn->kn_flags & EV_POLL)) { - off_t amount; - - amount = VTOF(vp)->ff_size - kn->kn_fp->f_fglob->fg_offset; - if (amount > (off_t)INTPTR_MAX) - kn->kn_data = INTPTR_MAX; - else if (amount < (off_t)INTPTR_MIN) - kn->kn_data = INTPTR_MIN; - else - kn->kn_data = (intptr_t)amount; - } else { - kn->kn_data = 1; - } - - if (dropvp) - vnode_put(vp); - - return (kn->kn_data != 0); -} - -/*ARGSUSED*/ -static int -filt_hfswrite(struct knote *kn, long hint) -{ - struct vnode *vp = (struct vnode *)kn->kn_hook; - - if (hint == 0) { - if ((vnode_getwithvid(vp, kn->kn_hookid) != 0)) { - hint = NOTE_REVOKE; - } else - vnode_put(vp); - } - if (hint == NOTE_REVOKE) { - /* - * filesystem is gone, so set the EOF flag and schedule - * the knote for deletion. - */ - kn->kn_data = 0; - kn->kn_flags |= (EV_EOF | EV_ONESHOT); - return (1); - } - kn->kn_data = 0; - return (1); -} - -static int -filt_hfsvnode(struct knote *kn, long hint) -{ - struct vnode *vp = (struct vnode *)kn->kn_hook; - - if (hint == 0) { - if ((vnode_getwithvid(vp, kn->kn_hookid) != 0)) { - hint = NOTE_REVOKE; - } else - vnode_put(vp); - } - if (kn->kn_sfflags & hint) - kn->kn_fflags |= hint; - if ((hint == NOTE_REVOKE)) { - kn->kn_flags |= (EV_EOF | EV_ONESHOT); - return (1); - } - - return (kn->kn_fflags != 0); -} - -static struct filterops hfsread_filtops = - { 1, NULL, filt_hfsdetach, filt_hfsread }; -static struct filterops hfswrite_filtops = - { 1, NULL, filt_hfsdetach, filt_hfswrite }; -static struct filterops hfsvnode_filtops = - { 1, NULL, filt_hfsdetach, filt_hfsvnode }; - -/* - * Add a kqueue filter. - */ -static int -hfs_vnop_kqfiltadd( - struct vnop_kqfilt_add_args /* { - struct vnode *a_vp; - struct knote *a_kn; - struct proc *p; - vfs_context_t a_context; - } */ *ap) -{ - struct vnode *vp = ap->a_vp; - struct knote *kn = ap->a_kn; - int error; - - switch (kn->kn_filter) { - case EVFILT_READ: - if (vnode_isreg(vp)) { - kn->kn_fop = &hfsread_filtops; - } else { - return EINVAL; - }; - break; - case EVFILT_WRITE: - if (vnode_isreg(vp)) { - kn->kn_fop = &hfswrite_filtops; - } else { - return EINVAL; - }; - break; - case EVFILT_VNODE: - kn->kn_fop = &hfsvnode_filtops; - break; - default: - return (1); - } - - kn->kn_hook = (caddr_t)vp; - kn->kn_hookid = vnode_vid(vp); - - if ((error = hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK))) - return (error); - KNOTE_ATTACH(&VTOC(vp)->c_knotes, kn); - hfs_unlock(VTOC(vp)); - - return (0); -} - -/* - * Remove a kqueue filter - */ -static int -hfs_vnop_kqfiltremove(ap) - struct vnop_kqfilt_remove_args /* { - struct vnode *a_vp; - uintptr_t ident; - vfs_context_t a_context; - } */__unused *ap; -{ - int result; - - result = ENOTSUP; /* XXX */ - - return (result); -} - /* * Wrapper for special device reads */ @@ -4101,7 +4475,7 @@ hfsspec_close(ap) struct vnode *vp = ap->a_vp; struct cnode *cp; - if (vnode_isinuse(ap->a_vp, 1)) { + if (vnode_isinuse(ap->a_vp, 0)) { if (hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK) == 0) { cp = VTOC(vp); hfs_touchtimes(VTOHFS(vp), cp); @@ -4177,39 +4551,6 @@ hfsfifo_close(ap) return (VOCALL (fifo_vnodeop_p, VOFFSET(vnop_close), ap)); } -/* - * kqfilt_add wrapper for fifos. - * - * Fall through to hfs kqfilt_add routines if needed - */ -int -hfsfifo_kqfilt_add(ap) - struct vnop_kqfilt_add_args *ap; -{ - int error; - - error = VOCALL(fifo_vnodeop_p, VOFFSET(vnop_kqfilt_add), ap); - if (error) - error = hfs_vnop_kqfiltadd(ap); - return (error); -} - -/* - * kqfilt_remove wrapper for fifos. - * - * Fall through to hfs kqfilt_remove routines if needed - */ -int -hfsfifo_kqfilt_remove(ap) - struct vnop_kqfilt_remove_args *ap; -{ - int error; - - error = VOCALL(fifo_vnodeop_p, VOFFSET(vnop_kqfilt_remove), ap); - if (error) - error = hfs_vnop_kqfiltremove(ap); - return (error); -} #endif /* FIFO */ @@ -4310,9 +4651,75 @@ hfs_vnop_whiteout(ap) } int (**hfs_vnodeop_p)(void *); +int (**hfs_std_vnodeop_p) (void *); #define VOPFUNC int (*)(void *) +static int hfs_readonly_op (__unused void* ap) { return (EROFS); } + +/* + * In 10.6 and forward, HFS Standard is read-only and deprecated. The vnop table below + * is for use with HFS standard to block out operations that would modify the file system + */ + +struct vnodeopv_entry_desc hfs_standard_vnodeop_entries[] = { + { &vnop_default_desc, (VOPFUNC)vn_default_error }, + { &vnop_lookup_desc, (VOPFUNC)hfs_vnop_lookup }, /* lookup */ + { &vnop_create_desc, (VOPFUNC)hfs_readonly_op }, /* create (READONLY) */ + { &vnop_mknod_desc, (VOPFUNC)hfs_readonly_op }, /* mknod (READONLY) */ + { &vnop_open_desc, (VOPFUNC)hfs_vnop_open }, /* open */ + { &vnop_close_desc, (VOPFUNC)hfs_vnop_close }, /* close */ + { &vnop_getattr_desc, (VOPFUNC)hfs_vnop_getattr }, /* getattr */ + { &vnop_setattr_desc, (VOPFUNC)hfs_readonly_op }, /* setattr */ + { &vnop_read_desc, (VOPFUNC)hfs_vnop_read }, /* read */ + { &vnop_write_desc, (VOPFUNC)hfs_readonly_op }, /* write (READONLY) */ + { &vnop_ioctl_desc, (VOPFUNC)hfs_vnop_ioctl }, /* ioctl */ + { &vnop_select_desc, (VOPFUNC)hfs_vnop_select }, /* select */ + { &vnop_revoke_desc, (VOPFUNC)nop_revoke }, /* revoke */ + { &vnop_exchange_desc, (VOPFUNC)hfs_readonly_op }, /* exchange (READONLY)*/ + { &vnop_mmap_desc, (VOPFUNC)err_mmap }, /* mmap */ + { &vnop_fsync_desc, (VOPFUNC)hfs_readonly_op}, /* fsync (READONLY) */ + { &vnop_remove_desc, (VOPFUNC)hfs_readonly_op }, /* remove (READONLY) */ + { &vnop_link_desc, (VOPFUNC)hfs_readonly_op }, /* link ( READONLLY) */ + { &vnop_rename_desc, (VOPFUNC)hfs_readonly_op }, /* rename (READONLY)*/ + { &vnop_mkdir_desc, (VOPFUNC)hfs_readonly_op }, /* mkdir (READONLY) */ + { &vnop_rmdir_desc, (VOPFUNC)hfs_readonly_op }, /* rmdir (READONLY) */ + { &vnop_symlink_desc, (VOPFUNC)hfs_readonly_op }, /* symlink (READONLY) */ + { &vnop_readdir_desc, (VOPFUNC)hfs_vnop_readdir }, /* readdir */ + { &vnop_readdirattr_desc, (VOPFUNC)hfs_vnop_readdirattr }, /* readdirattr */ + { &vnop_readlink_desc, (VOPFUNC)hfs_vnop_readlink }, /* readlink */ + { &vnop_inactive_desc, (VOPFUNC)hfs_vnop_inactive }, /* inactive */ + { &vnop_reclaim_desc, (VOPFUNC)hfs_vnop_reclaim }, /* reclaim */ + { &vnop_strategy_desc, (VOPFUNC)hfs_vnop_strategy }, /* strategy */ + { &vnop_pathconf_desc, (VOPFUNC)hfs_vnop_pathconf }, /* pathconf */ + { &vnop_advlock_desc, (VOPFUNC)err_advlock }, /* advlock */ + { &vnop_allocate_desc, (VOPFUNC)hfs_readonly_op }, /* allocate (READONLY) */ + { &vnop_searchfs_desc, (VOPFUNC)hfs_vnop_search }, /* search fs */ + { &vnop_bwrite_desc, (VOPFUNC)hfs_readonly_op }, /* bwrite (READONLY) */ + { &vnop_pagein_desc, (VOPFUNC)hfs_vnop_pagein }, /* pagein */ + { &vnop_pageout_desc,(VOPFUNC) hfs_readonly_op }, /* pageout (READONLY) */ + { &vnop_copyfile_desc, (VOPFUNC)hfs_readonly_op }, /* copyfile (READONLY)*/ + { &vnop_blktooff_desc, (VOPFUNC)hfs_vnop_blktooff }, /* blktooff */ + { &vnop_offtoblk_desc, (VOPFUNC)hfs_vnop_offtoblk }, /* offtoblk */ + { &vnop_blockmap_desc, (VOPFUNC)hfs_vnop_blockmap }, /* blockmap */ + { &vnop_getxattr_desc, (VOPFUNC)hfs_vnop_getxattr}, + { &vnop_setxattr_desc, (VOPFUNC)hfs_readonly_op}, /* set xattr (READONLY) */ + { &vnop_removexattr_desc, (VOPFUNC)hfs_readonly_op}, /* remove xattr (READONLY) */ + { &vnop_listxattr_desc, (VOPFUNC)hfs_vnop_listxattr}, + { &vnop_whiteout_desc, (VOPFUNC)hfs_readonly_op}, /* whiteout (READONLY) */ +#if NAMEDSTREAMS + { &vnop_getnamedstream_desc, (VOPFUNC)hfs_vnop_getnamedstream }, + { &vnop_makenamedstream_desc, (VOPFUNC)hfs_readonly_op }, + { &vnop_removenamedstream_desc, (VOPFUNC)hfs_readonly_op }, +#endif + { NULL, (VOPFUNC)NULL } +}; + +struct vnodeopv_desc hfs_std_vnodeop_opv_desc = +{ &hfs_std_vnodeop_p, hfs_standard_vnodeop_entries }; + + +/* VNOP table for HFS+ */ struct vnodeopv_entry_desc hfs_vnodeop_entries[] = { { &vnop_default_desc, (VOPFUNC)vn_default_error }, { &vnop_lookup_desc, (VOPFUNC)hfs_vnop_lookup }, /* lookup */ @@ -4353,8 +4760,6 @@ struct vnodeopv_entry_desc hfs_vnodeop_entries[] = { { &vnop_blktooff_desc, (VOPFUNC)hfs_vnop_blktooff }, /* blktooff */ { &vnop_offtoblk_desc, (VOPFUNC)hfs_vnop_offtoblk }, /* offtoblk */ { &vnop_blockmap_desc, (VOPFUNC)hfs_vnop_blockmap }, /* blockmap */ - { &vnop_kqfilt_add_desc, (VOPFUNC)hfs_vnop_kqfiltadd }, /* kqfilt_add */ - { &vnop_kqfilt_remove_desc, (VOPFUNC)hfs_vnop_kqfiltremove }, /* kqfilt_remove */ { &vnop_getxattr_desc, (VOPFUNC)hfs_vnop_getxattr}, { &vnop_setxattr_desc, (VOPFUNC)hfs_vnop_setxattr}, { &vnop_removexattr_desc, (VOPFUNC)hfs_vnop_removexattr}, @@ -4371,6 +4776,8 @@ struct vnodeopv_entry_desc hfs_vnodeop_entries[] = { struct vnodeopv_desc hfs_vnodeop_opv_desc = { &hfs_vnodeop_p, hfs_vnodeop_entries }; + +/* Spec Op vnop table for HFS+ */ int (**hfs_specop_p)(void *); struct vnodeopv_entry_desc hfs_specop_entries[] = { { &vnop_default_desc, (VOPFUNC)vn_default_error }, @@ -4404,7 +4811,7 @@ struct vnodeopv_entry_desc hfs_specop_entries[] = { { &vnop_bwrite_desc, (VOPFUNC)hfs_vnop_bwrite }, { &vnop_pagein_desc, (VOPFUNC)hfs_vnop_pagein }, /* Pagein */ { &vnop_pageout_desc, (VOPFUNC)hfs_vnop_pageout }, /* Pageout */ - { &vnop_copyfile_desc, (VOPFUNC)err_copyfile }, /* copyfile */ + { &vnop_copyfile_desc, (VOPFUNC)err_copyfile }, /* copyfile */ { &vnop_blktooff_desc, (VOPFUNC)hfs_vnop_blktooff }, /* blktooff */ { &vnop_offtoblk_desc, (VOPFUNC)hfs_vnop_offtoblk }, /* offtoblk */ { (struct vnodeop_desc*)NULL, (VOPFUNC)NULL } @@ -4413,6 +4820,7 @@ struct vnodeopv_desc hfs_specop_opv_desc = { &hfs_specop_p, hfs_specop_entries }; #if FIFO +/* HFS+ FIFO VNOP table */ int (**hfs_fifoop_p)(void *); struct vnodeopv_entry_desc hfs_fifoop_entries[] = { { &vnop_default_desc, (VOPFUNC)vn_default_error }, @@ -4450,8 +4858,6 @@ struct vnodeopv_entry_desc hfs_fifoop_entries[] = { { &vnop_blktooff_desc, (VOPFUNC)hfs_vnop_blktooff }, /* blktooff */ { &vnop_offtoblk_desc, (VOPFUNC)hfs_vnop_offtoblk }, /* offtoblk */ { &vnop_blockmap_desc, (VOPFUNC)hfs_vnop_blockmap }, /* blockmap */ - { &vnop_kqfilt_add_desc, (VOPFUNC)hfsfifo_kqfilt_add }, /* kqfilt_add */ - { &vnop_kqfilt_remove_desc, (VOPFUNC)hfsfifo_kqfilt_remove }, /* kqfilt_remove */ { (struct vnodeop_desc*)NULL, (VOPFUNC)NULL } }; struct vnodeopv_desc hfs_fifoop_opv_desc = diff --git a/bsd/hfs/hfs_xattr.c b/bsd/hfs/hfs_xattr.c index d025ae1cf..fc5cd3f7c 100644 --- a/bsd/hfs/hfs_xattr.c +++ b/bsd/hfs/hfs_xattr.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2004-2007 Apple Inc. All rights reserved. + * Copyright (c) 2004-2008 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -59,6 +59,11 @@ struct listattr_callback_state { int result; uio_t uio; size_t size; +#if HFS_COMPRESSION + int showcompressed; + vfs_context_t ctx; + vnode_t vp; +#endif /* HFS_COMPRESSION */ }; #define HFS_MAXATTRIBUTESIZE (128 * 1024) @@ -122,10 +127,17 @@ hfs_vnop_getnamedstream(struct vnop_getnamedstream_args* ap) if ( !S_ISREG(cp->c_mode) ) { return (EPERM); } +#if HFS_COMPRESSION + int hide_rsrc = hfs_hides_rsrc(ap->a_context, VTOC(vp), 1); +#endif /* HFS_COMPRESSION */ if ((error = hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK))) { return (error); } - if (!RSRC_FORK_EXISTS(cp) && (ap->a_operation != NS_OPEN)) { + if ((!RSRC_FORK_EXISTS(cp) +#if HFS_COMPRESSION + || hide_rsrc +#endif /* HFS_COMPRESSION */ + ) && (ap->a_operation != NS_OPEN)) { hfs_unlock(cp); return (ENOATTR); } @@ -158,6 +170,17 @@ hfs_vnop_makenamedstream(struct vnop_makenamedstream_args* ap) if ( !S_ISREG(cp->c_mode) ) { return (EPERM); } +#if HFS_COMPRESSION + if (hfs_hides_rsrc(ap->a_context, VTOC(vp), 1)) { + if (VNODE_IS_RSRC(vp)) { + return EINVAL; + } else { + error = decmpfs_decompress_file(vp, VTOCMP(vp), -1, 1, 0); + if (error != 0) + return error; + } + } +#endif /* HFS_COMPRESSION */ if ((error = hfs_lock(cp, HFS_EXCLUSIVE_LOCK))) { return (error); } @@ -183,6 +206,13 @@ hfs_vnop_removenamedstream(struct vnop_removenamedstream_args* ap) if (bcmp(ap->a_name, XATTR_RESOURCEFORK_NAME, sizeof(XATTR_RESOURCEFORK_NAME)) != 0) { return (ENOATTR); } +#if HFS_COMPRESSION + if (hfs_hides_rsrc(ap->a_context, VTOC(svp), 1)) { + /* do nothing */ + return 0; + } +#endif /* HFS_COMPRESSION */ + scp = VTOC(svp); /* Take truncate lock before taking cnode lock. */ @@ -191,7 +221,7 @@ hfs_vnop_removenamedstream(struct vnop_removenamedstream_args* ap) goto out; } if (VTOF(svp)->ff_size != 0) { - error = hfs_truncate(svp, 0, IO_NDELAY, 0, ap->a_context); + error = hfs_truncate(svp, 0, IO_NDELAY, 0, 0, ap->a_context); } hfs_unlock(scp); out: @@ -234,6 +264,12 @@ hfs_vnop_getxattr(struct vnop_getxattr_args *ap) cp = VTOC(vp); if (vp == cp->c_vp) { +#if HFS_COMPRESSION + int decmpfs_hide = hfs_hides_xattr(ap->a_context, VTOC(vp), ap->a_name, 1); /* 1 == don't take the cnode lock */ + if (decmpfs_hide && !(ap->a_options & XATTR_SHOWCOMPRESSION)) + return ENOATTR; +#endif /* HFS_COMPRESSION */ + /* Get the Finder Info. */ if (bcmp(ap->a_name, XATTR_FINDERINFO_NAME, sizeof(XATTR_FINDERINFO_NAME)) == 0) { u_int8_t finderinfo[32]; @@ -262,7 +298,7 @@ hfs_vnop_getxattr(struct vnop_getxattr_args *ap) *ap->a_size = bufsize; return (0); } - if (uio_resid(uio) < bufsize) + if ((user_size_t)uio_resid(uio) < bufsize) return (ERANGE); result = uiomove((caddr_t)&finderinfo , bufsize, uio); @@ -272,6 +308,8 @@ hfs_vnop_getxattr(struct vnop_getxattr_args *ap) /* Read the Resource Fork. */ if (bcmp(ap->a_name, XATTR_RESOURCEFORK_NAME, sizeof(XATTR_RESOURCEFORK_NAME)) == 0) { struct vnode *rvp = NULL; + int openunlinked = 0; + int namelen = 0; if ( !S_ISREG(cp->c_mode) ) { return (EPERM); @@ -279,11 +317,17 @@ hfs_vnop_getxattr(struct vnop_getxattr_args *ap) if ((result = hfs_lock(cp, HFS_EXCLUSIVE_LOCK))) { return (result); } + namelen = cp->c_desc.cd_namelen; + if ( !RSRC_FORK_EXISTS(cp)) { hfs_unlock(cp); return (ENOATTR); } hfsmp = VTOHFS(vp); + if ((cp->c_flag & C_DELETED) && (namelen == 0)) { + openunlinked = 1; + } + result = hfs_vgetrsrc(hfsmp, vp, &rvp, TRUE); hfs_unlock(cp); if (result) { @@ -292,7 +336,41 @@ hfs_vnop_getxattr(struct vnop_getxattr_args *ap) if (uio == NULL) { *ap->a_size = (size_t)VTOF(rvp)->ff_size; } else { +#if HFS_COMPRESSION + user_ssize_t uio_size = 0; + if (decmpfs_hide) + uio_size = uio_resid(uio); +#endif /* HFS_COMPRESSION */ result = VNOP_READ(rvp, uio, 0, ap->a_context); +#if HFS_COMPRESSION + if (decmpfs_hide && + (result == 0) && + (uio_resid(uio) == uio_size)) { + /* + we intentionally make the above call to VNOP_READ so that + it can return an authorization/permission/etc. error + based on ap->a_context and thus deny this operation; + in that case, result != 0 and we won't proceed + + however, if result == 0, it will have returned no data + because hfs_vnop_read hid the resource fork + (hence uio_resid(uio) == uio_size, i.e. the uio is untouched) + + in that case, we try again with the decmpfs_ctx context + to get the actual data + */ + result = VNOP_READ(rvp, uio, 0, decmpfs_ctx); + } +#endif /* HFS_COMPRESSION */ + } + /* force the rsrc fork vnode to recycle right away */ + if (openunlinked) { + int vref; + vref = vnode_ref (rvp); + if (vref == 0) { + vnode_rele (rvp); + } + vnode_recycle(rvp); } vnode_put(rvp); return (result); @@ -368,7 +446,7 @@ hfs_vnop_getxattr(struct vnop_getxattr_args *ap) } *ap->a_size = recp->attrData.attrSize; if (uio && recp->attrData.attrSize != 0) { - if (*ap->a_size > uio_resid(uio)) + if (*ap->a_size > (user_size_t)uio_resid(uio)) result = ERANGE; else result = uiomove((caddr_t) &recp->attrData.attrData , recp->attrData.attrSize, uio); @@ -386,7 +464,7 @@ hfs_vnop_getxattr(struct vnop_getxattr_args *ap) if (uio == NULL) { break; } - if (*ap->a_size > uio_resid(uio)) { + if (*ap->a_size > (user_size_t)uio_resid(uio)) { result = ERANGE; break; } @@ -514,6 +592,15 @@ hfs_vnop_setxattr(struct vnop_setxattr_args *ap) if (VNODE_IS_RSRC(vp)) { return (EPERM); } + +#if HFS_COMPRESSION + if (hfs_hides_xattr(ap->a_context, VTOC(vp), ap->a_name, 1) ) { /* 1 == don't take the cnode lock */ + result = decmpfs_decompress_file(vp, VTOCMP(vp), -1, 1, 0); + if (result != 0) + return result; + } +#endif /* HFS_COMPRESSION */ + /* Set the Finder Info. */ if (bcmp(ap->a_name, XATTR_FINDERINFO_NAME, sizeof(XATTR_FINDERINFO_NAME)) == 0) { u_int8_t finderinfo[32]; @@ -523,7 +610,7 @@ hfs_vnop_setxattr(struct vnop_setxattr_args *ap) attrsize = sizeof(VTOC(vp)->c_finderinfo); - if (uio_resid(uio) != attrsize) { + if ((user_size_t)uio_resid(uio) != attrsize) { return (ERANGE); } /* Grab the new Finder Info data. */ @@ -598,6 +685,8 @@ hfs_vnop_setxattr(struct vnop_setxattr_args *ap) /* Write the Resource Fork. */ if (bcmp(ap->a_name, XATTR_RESOURCEFORK_NAME, sizeof(XATTR_RESOURCEFORK_NAME)) == 0) { struct vnode *rvp = NULL; + int namelen = 0; + int openunlinked = 0; if (!vnode_isreg(vp)) { return (EPERM); @@ -606,6 +695,7 @@ hfs_vnop_setxattr(struct vnop_setxattr_args *ap) return (result); } cp = VTOC(vp); + namelen = cp->c_desc.cd_namelen; if (RSRC_FORK_EXISTS(cp)) { /* attr exists and "create" was specified. */ @@ -620,6 +710,15 @@ hfs_vnop_setxattr(struct vnop_setxattr_args *ap) return (ENOATTR); } } + + /* + * Note that we could be called on to grab the rsrc fork vnode + * for a file that has become open-unlinked. + */ + if ((cp->c_flag & C_DELETED) && (namelen == 0)) { + openunlinked = 1; + } + result = hfs_vgetrsrc(hfsmp, vp, &rvp, TRUE); hfs_unlock(cp); if (result) { @@ -627,6 +726,18 @@ hfs_vnop_setxattr(struct vnop_setxattr_args *ap) } /* VNOP_WRITE will update timestamps accordingly */ result = VNOP_WRITE(rvp, uio, 0, ap->a_context); + + /* if open unlinked, force it inactive */ + if (openunlinked) { + int vref; + vref = vnode_ref (rvp); + if (vref == 0) { + vnode_rele(rvp); + } + vnode_recycle (rvp); + } + + vnode_put(rvp); return (result); } @@ -651,7 +762,8 @@ hfs_vnop_setxattr(struct vnop_setxattr_args *ap) attrsize < hfsmp->hfs_max_inline_attrsize) { MALLOC(user_data_ptr, void *, attrsize, M_TEMP, M_WAITOK); if (user_data_ptr == NULL) { - return (ENOMEM); + result = ENOMEM; + goto exit; } result = uiomove((caddr_t)user_data_ptr, attrsize, uio); @@ -760,8 +872,11 @@ hfs_vnop_setxattr(struct vnop_setxattr_args *ap) /* Copy data into the blocks. */ result = write_attr_data(hfsmp, uio, attrsize, extentptr); if (result) { + const char *name = vnode_getname(vp); printf("hfs_setxattr: write_attr_data err (%d) %s:%s\n", - result, vnode_name(vp) ? vnode_name(vp) : "", ap->a_name); + result, name ? name : "", ap->a_name); + if (name) + vnode_putname(name); goto exit; } @@ -769,8 +884,11 @@ hfs_vnop_setxattr(struct vnop_setxattr_args *ap) if (exists) { result = remove_attribute_records(hfsmp, iterator); if (result) { + const char *name = vnode_getname(vp); printf("hfs_setxattr: remove_attribute_records err (%d) %s:%s\n", - result, vnode_name(vp) ? vnode_name(vp) : "", ap->a_name); + result, name ? name : "", ap->a_name); + if (name) + vnode_putname(name); goto exit; } } @@ -797,8 +915,11 @@ hfs_vnop_setxattr(struct vnop_setxattr_args *ap) result = BTInsertRecord(btfile, iterator, &btdata, btdata.itemSize); if (result) { #if HFS_XATTR_VERBOSE + const char *name = vnode_getname(vp); printf("hfs_setxattr: BTInsertRecord err (%d) %s:%s\n", - MacToVFSError(result), vnode_name(vp) ? vnode_name(vp) : "", ap->a_name); + MacToVFSError(result), name ? name : "", ap->a_name); + if (name) + vnode_putname(name); #endif goto exit; } @@ -821,8 +942,11 @@ hfs_vnop_setxattr(struct vnop_setxattr_args *ap) result = BTInsertRecord(btfile, iterator, &btdata, btdata.itemSize); if (result) { + const char *name = vnode_getname(vp); printf("hfs_setxattr: BTInsertRecord err (%d) %s:%s\n", - MacToVFSError(result), vnode_name(vp) ? vnode_name(vp) : "", ap->a_name); + MacToVFSError(result), name ? name : "", ap->a_name); + if (name) + vnode_putname(name); goto exit; } extentblks = count_extent_blocks(blkcnt, recp->overflowExtents.extents); @@ -893,9 +1017,6 @@ hfs_vnop_setxattr(struct vnop_setxattr_args *ap) if (cp) { hfs_unlock(cp); } - if (result == 0) { - HFS_KNOTE(vp, NOTE_ATTRIB); - } if (user_data_ptr) { FREE(user_data_ptr, M_TEMP); } @@ -942,6 +1063,11 @@ hfs_vnop_removexattr(struct vnop_removexattr_args *ap) return (EPERM); } +#if HFS_COMPRESSION + if (hfs_hides_xattr(ap->a_context, VTOC(vp), ap->a_name, 1) && !(ap->a_options & XATTR_SHOWCOMPRESSION)) + return ENOATTR; +#endif /* HFS_COMPRESSION */ + /* If Resource Fork is non-empty then truncate it. */ if (bcmp(ap->a_name, XATTR_RESOURCEFORK_NAME, sizeof(XATTR_RESOURCEFORK_NAME)) == 0) { struct vnode *rvp = NULL; @@ -979,7 +1105,7 @@ hfs_vnop_removexattr(struct vnop_removexattr_args *ap) return (result); } - result = hfs_truncate(rvp, (off_t)0, IO_NDELAY, 0, ap->a_context); + result = hfs_truncate(rvp, (off_t)0, IO_NDELAY, 0, 0, ap->a_context); if (result == 0) { cp->c_touch_chgtime = TRUE; cp->c_flag |= C_MODIFIED; @@ -1088,9 +1214,6 @@ hfs_vnop_removexattr(struct vnop_removexattr_args *ap) hfs_end_transaction(hfsmp); exit: hfs_unlock(cp); - if (result == 0) { - HFS_KNOTE(vp, NOTE_ATTRIB); - } FREE(iterator, M_TEMP); return MacToVFSError(result); } @@ -1198,7 +1321,7 @@ remove_attribute_records(struct hfsmount *hfsmp, BTreeIterator * iterator) #if HFS_XATTR_VERBOSE if (datasize < sizeof(HFSPlusAttrForkData)) { - printf("remove_attribute_records: bad record size %d (expecting %d)\n", datasize, sizeof(HFSPlusAttrForkData)); + printf("hfs: remove_attribute_records: bad record size %d (expecting %d)\n", datasize, sizeof(HFSPlusAttrForkData)); } #endif totalblks = attrdata.forkData.theFork.totalBlocks; @@ -1206,7 +1329,7 @@ remove_attribute_records(struct hfsmount *hfsmp, BTreeIterator * iterator) /* Process the first 8 extents. */ extentblks = count_extent_blocks(totalblks, attrdata.forkData.theFork.extents); if (extentblks > totalblks) - panic("remove_attribute_records: corruption..."); + panic("hfs: remove_attribute_records: corruption..."); if (BTDeleteRecord(btfile, iterator) == 0) { free_attr_blks(hfsmp, extentblks, attrdata.forkData.theFork.extents); } @@ -1221,7 +1344,7 @@ remove_attribute_records(struct hfsmount *hfsmp, BTreeIterator * iterator) if (result || (attrdata.recordType != kHFSPlusAttrExtents) || (datasize < sizeof(HFSPlusAttrExtents))) { - printf("remove_attribute_records: BTSearchRecord %d (%d), totalblks %d\n", + printf("hfs: remove_attribute_records: BTSearchRecord %d (%d), totalblks %d\n", MacToVFSError(result), attrdata.recordType != kHFSPlusAttrExtents, totalblks); result = ENOATTR; break; /* break from while */ @@ -1229,7 +1352,7 @@ remove_attribute_records(struct hfsmount *hfsmp, BTreeIterator * iterator) /* Process the next 8 extents. */ extentblks = count_extent_blocks(totalblks, attrdata.overflowExtents.extents); if (extentblks > totalblks) - panic("remove_attribute_records: corruption..."); + panic("hfs: remove_attribute_records: corruption..."); if (BTDeleteRecord(btfile, iterator) == 0) { free_attr_blks(hfsmp, extentblks, attrdata.overflowExtents.extents); } @@ -1277,6 +1400,11 @@ hfs_vnop_listxattr(struct vnop_listxattr_args *ap) if (VNODE_IS_RSRC(vp)) { return (EPERM); } + +#if HFS_COMPRESSION + int compressed = hfs_file_is_compressed(cp, 1); /* 1 == don't take the cnode lock */ +#endif /* HFS_COMPRESSION */ + hfsmp = VTOHFS(vp); *ap->a_size = 0; @@ -1297,7 +1425,7 @@ hfs_vnop_listxattr(struct vnop_listxattr_args *ap) if (bcmp(finderinfo_start, emptyfinfo, finderinfo_size) != 0) { if (uio == NULL) { *ap->a_size += sizeof(XATTR_FINDERINFO_NAME); - } else if (uio_resid(uio) < sizeof(XATTR_FINDERINFO_NAME)) { + } else if ((user_size_t)uio_resid(uio) < sizeof(XATTR_FINDERINFO_NAME)) { result = ERANGE; goto exit; } else { @@ -1309,16 +1437,24 @@ hfs_vnop_listxattr(struct vnop_listxattr_args *ap) } /* If Resource Fork is non-empty then export it's name. */ if (S_ISREG(cp->c_mode) && RSRC_FORK_EXISTS(cp)) { - if (uio == NULL) { - *ap->a_size += sizeof(XATTR_RESOURCEFORK_NAME); - } else if (uio_resid(uio) < sizeof(XATTR_RESOURCEFORK_NAME)) { - result = ERANGE; - goto exit; - } else { - result = uiomove(XATTR_RESOURCEFORK_NAME, - sizeof(XATTR_RESOURCEFORK_NAME), uio); - if (result) +#if HFS_COMPRESSION + if ((ap->a_options & XATTR_SHOWCOMPRESSION) || + !compressed || + !hfs_hides_rsrc(ap->a_context, VTOC(vp), 1) /* 1 == don't take the cnode lock */ + ) +#endif /* HFS_COMPRESSION */ + { + if (uio == NULL) { + *ap->a_size += sizeof(XATTR_RESOURCEFORK_NAME); + } else if ((user_size_t)uio_resid(uio) < sizeof(XATTR_RESOURCEFORK_NAME)) { + result = ERANGE; goto exit; + } else { + result = uiomove(XATTR_RESOURCEFORK_NAME, + sizeof(XATTR_RESOURCEFORK_NAME), uio); + if (result) + goto exit; + } } } /* @@ -1372,6 +1508,11 @@ hfs_vnop_listxattr(struct vnop_listxattr_args *ap) state.result = 0; state.uio = uio; state.size = 0; +#if HFS_COMPRESSION + state.showcompressed = !compressed || ap->a_options & XATTR_SHOWCOMPRESSION; + state.ctx = ap->a_context; + state.vp = vp; +#endif /* HFS_COMPRESSION */ /* * Process entries starting just after iterator->key. @@ -1405,7 +1546,7 @@ static int listattr_callback(const HFSPlusAttrKey *key, __unused const HFSPlusAttrData *data, struct listattr_callback_state *state) { char attrname[XATTR_MAXNAMELEN + 1]; - size_t bytecount; + ssize_t bytecount; int result; if (state->fileID != key->fileID) { @@ -1421,7 +1562,7 @@ listattr_callback(const HFSPlusAttrKey *key, __unused const HFSPlusAttrData *dat /* Convert the attribute name into UTF-8. */ result = utf8_encodestr(key->attrName, key->attrNameLen * sizeof(UniChar), - (u_int8_t *)attrname, &bytecount, sizeof(attrname), '/', 0); + (u_int8_t *)attrname, (size_t *)&bytecount, sizeof(attrname), '/', 0); if (result) { state->result = result; return (0); /* stop */ @@ -1431,6 +1572,11 @@ listattr_callback(const HFSPlusAttrKey *key, __unused const HFSPlusAttrData *dat if (xattr_protected(attrname)) return (1); /* continue */ +#if HFS_COMPRESSION + if (!state->showcompressed && hfs_hides_xattr(state->ctx, VTOC(state->vp), attrname, 1) ) /* 1 == don't take the cnode lock */ + return 1; /* continue */ +#endif /* HFS_COMPRESSION */ + if (state->uio == NULL) { state->size += bytecount; } else { @@ -1461,7 +1607,7 @@ __private_extern__ int hfs_removeallattr(struct hfsmount *hfsmp, u_int32_t fileid) { - BTreeIterator *iterator; + BTreeIterator *iterator = NULL; HFSPlusAttrKey *key; struct filefork *btfile; int result, lockflags; @@ -1472,6 +1618,9 @@ hfs_removeallattr(struct hfsmount *hfsmp, u_int32_t fileid) btfile = VTOF(hfsmp->hfs_attribute_vp); MALLOC(iterator, BTreeIterator *, sizeof(BTreeIterator), M_TEMP, M_WAITOK); + if (iterator == NULL) { + return (ENOMEM); + } bzero(iterator, sizeof(BTreeIterator)); key = (HFSPlusAttrKey *)&iterator->key; @@ -1979,7 +2128,7 @@ read_attr_data(struct hfsmount *hfsmp, uio_t uio, size_t datasize, HFSPlusExtent result = cluster_read(evp, uio, VTOF(evp)->ff_size, IO_SYNC | IO_UNIT); #if HFS_XATTR_VERBOSE - printf("read_attr_data: cr iosize %d [%d, %d] (%d)\n", + printf("hfs: read_attr_data: cr iosize %d [%d, %d] (%d)\n", iosize, extents[i].startBlock, extents[i].blockCount, result); #endif if (result) @@ -2034,7 +2183,7 @@ write_attr_data(struct hfsmount *hfsmp, uio_t uio, size_t datasize, HFSPlusExten result = cluster_write(evp, uio, filesize, filesize, filesize, (off_t) 0, IO_SYNC | IO_UNIT); #if HFS_XATTR_VERBOSE - printf("write_attr_data: cw iosize %d [%d, %d] (%d)\n", + printf("hfs: write_attr_data: cw iosize %d [%d, %d] (%d)\n", iosize, extents[i].startBlock, extents[i].blockCount, result); #endif if (result) @@ -2077,7 +2226,7 @@ alloc_attr_blks(struct hfsmount *hfsmp, size_t attrsize, size_t extentbufsize, H result = BlockAllocate(hfsmp, startblk, blkcnt, blkcnt, 0, 0, &extents[i].startBlock, &extents[i].blockCount); #if HFS_XATTR_VERBOSE - printf("alloc_attr_blks: BA blkcnt %d [%d, %d] (%d)\n", + printf("hfs: alloc_attr_blks: BA blkcnt %d [%d, %d] (%d)\n", blkcnt, extents[i].startBlock, extents[i].blockCount, result); #endif if (result) { @@ -2095,7 +2244,7 @@ alloc_attr_blks(struct hfsmount *hfsmp, size_t attrsize, size_t extentbufsize, H result = ENOSPC; #if HFS_XATTR_VERBOSE - printf("alloc_attr_blks: unexpected failure, %d blocks unallocated\n", blkcnt); + printf("hfs: alloc_attr_blks: unexpected failure, %d blocks unallocated\n", blkcnt); #endif for (; i <= 0; i--) { if ((blkcnt = extents[i].blockCount) != 0) { @@ -2129,7 +2278,7 @@ free_attr_blks(struct hfsmount *hfsmp, int blkcnt, HFSPlusExtentDescriptor *exte for (i = 0; (remblks > 0) && (extents[i].blockCount != 0); i++) { if (extents[i].blockCount > (u_int32_t)blkcnt) { #if HFS_XATTR_VERBOSE - printf("free_attr_blks: skipping bad extent [%d, %d]\n", + printf("hfs: free_attr_blks: skipping bad extent [%d, %d]\n", extents[i].startBlock, extents[i].blockCount); #endif extents[i].blockCount = 0; @@ -2144,7 +2293,7 @@ free_attr_blks(struct hfsmount *hfsmp, int blkcnt, HFSPlusExtentDescriptor *exte remblks -= extents[i].blockCount; #if HFS_XATTR_VERBOSE - printf("free_attr_blks: BlockDeallocate [%d, %d]\n", + printf("hfs: free_attr_blks: BlockDeallocate [%d, %d]\n", extents[i].startBlock, extents[i].blockCount); #endif /* Discard any resident pages for this block range. */ diff --git a/bsd/hfs/hfscommon/BTree/BTree.c b/bsd/hfs/hfscommon/BTree/BTree.c index 70108d229..80b815129 100644 --- a/bsd/hfs/hfscommon/BTree/BTree.c +++ b/bsd/hfs/hfscommon/BTree/BTree.c @@ -215,7 +215,7 @@ OSStatus BTOpenPath(FCB *filePtr, KeyCompareProcPtr keyCompareProc) btreePtr = (BTreeControlBlock*) NewPtrSysClear( sizeof( BTreeControlBlock ) ); if (btreePtr == nil) { - Panic ("\pBTOpen: no memory for btreePtr."); + Panic ("BTOpen: no memory for btreePtr."); return memFullErr; } @@ -254,11 +254,11 @@ OSStatus BTOpenPath(FCB *filePtr, KeyCompareProcPtr keyCompareProc) { nodeRec.buffer = nil; nodeRec.blockHeader = nil; - Panic("\pBTOpen: getNodeProc returned error getting header node."); + Panic("BTOpen: getNodeProc returned error getting header node."); goto ErrorExit; } ++btreePtr->numGetNodes; - header = (BTHeaderRec*) ((u_long)nodeRec.buffer + sizeof(BTNodeDescriptor)); + header = (BTHeaderRec*) ((uintptr_t)nodeRec.buffer + sizeof(BTNodeDescriptor)); ///////////////////////////// verify header ///////////////////////////////// @@ -269,7 +269,7 @@ OSStatus BTOpenPath(FCB *filePtr, KeyCompareProcPtr keyCompareProc) ///////////////////// Initalize fields from header ////////////////////////// - PanicIf ( (FCBTOVCB(filePtr)->vcbSigWord != 0x4244) && (header->nodeSize == 512), "\p BTOpenPath: wrong node size for HFS+ volume!"); // 0x4244 = 'BD' + PanicIf ( (FCBTOVCB(filePtr)->vcbSigWord != 0x4244) && (header->nodeSize == 512), " BTOpenPath: wrong node size for HFS+ volume!"); // 0x4244 = 'BD' btreePtr->treeDepth = header->treeDepth; btreePtr->rootNode = header->rootNode; diff --git a/bsd/hfs/hfscommon/BTree/BTreeAllocate.c b/bsd/hfs/hfscommon/BTree/BTreeAllocate.c index 41adf8863..64c7b86f0 100644 --- a/bsd/hfs/hfscommon/BTree/BTreeAllocate.c +++ b/bsd/hfs/hfscommon/BTree/BTreeAllocate.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2003, 2005-2008 Apple Inc. All rights reserved. + * Copyright (c) 2000-2003, 2005-2009 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -359,7 +359,7 @@ OSStatus ExtendBTree (BTreeControlBlockPtr btreePtr, } while ( ((BTNodeDescriptor*)mapNode.buffer)->fLink != 0 ); if (DEBUG_BUILD && totalMapBits != CalcMapBits (btreePtr)) - Panic ("\pExtendBTree: totalMapBits != CalcMapBits"); + Panic ("ExtendBTree: totalMapBits != CalcMapBits"); /////////////////////// Extend LEOF If Necessary //////////////////////////// @@ -464,7 +464,7 @@ OSStatus ExtendBTree (BTreeControlBlockPtr btreePtr, if (DEBUG_BUILD && mapSize != M_MapRecordSize (btreePtr->nodeSize) ) { - Panic ("\pExtendBTree: mapSize != M_MapRecordSize"); + Panic ("ExtendBTree: mapSize != M_MapRecordSize"); } mapBits = mapSize << 3; // mapSize (in bytes) * 8 @@ -614,3 +614,112 @@ u_int32_t CalcMapBits (BTreeControlBlockPtr btreePtr) return mapBits; } + + +/*------------------------------------------------------------------------------- +Routine: BTZeroUnusedNodes + +Function: Write zeros to all nodes in the B-tree that are not currently in use. +-------------------------------------------------------------------------------*/ +__private_extern__ +int +BTZeroUnusedNodes(FCB *filePtr) +{ + int err; + vnode_t vp; + BTreeControlBlockPtr btreePtr; + BlockDescriptor mapNode; + buf_t bp; + u_int32_t nodeNumber; + u_int16_t *mapPtr, *pos; + u_int16_t mapSize, size; + u_int16_t mask; + u_int16_t bitNumber; + u_int16_t word; + int numWritten; + + vp = FTOV(filePtr); + btreePtr = (BTreeControlBlockPtr) filePtr->fcbBTCBPtr; + bp = NULL; + nodeNumber = 0; + mapNode.buffer = nil; + mapNode.blockHeader = nil; + numWritten = 0; + + /* Iterate over map nodes. */ + while (true) + { + err = GetMapNode (btreePtr, &mapNode, &mapPtr, &mapSize); + if (err) + { + err = MacToVFSError(err); + goto ErrorExit; + } + + pos = mapPtr; + size = mapSize; + size >>= 1; /* convert to number of 16-bit words */ + + /* Iterate over 16-bit words in the map record. */ + while (size--) + { + if (*pos != 0xFFFF) /* Anything free in this word? */ + { + word = SWAP_BE16(*pos); + + /* Iterate over bits in the word. */ + for (bitNumber = 0, mask = 0x8000; + bitNumber < 16; + ++bitNumber, mask >>= 1) + { + if (word & mask) + continue; /* This node is in use. */ + + if (nodeNumber + bitNumber >= btreePtr->totalNodes) + { + /* We've processed all of the nodes. */ + goto done; + } + + /* + * Get a buffer full of zeros and write it to the unused + * node. Since we'll probably be writing a lot of nodes, + * bypass the journal (to avoid a transaction that's too + * big). Instead, this behaves more like clearing out + * nodes when extending a B-tree (eg., ClearBTNodes). + */ + bp = buf_getblk(vp, nodeNumber + bitNumber, btreePtr->nodeSize, 0, 0, BLK_META); + if (bp == NULL) + { + printf("hfs: BTZeroUnusedNodes: unable to read node %u\n", nodeNumber + bitNumber); + err = EIO; + goto ErrorExit; + } + + buf_clear(bp); + buf_markaged(bp); + + /* + * Try not to hog the buffer cache. Wait for the write + * every 32 nodes. + */ + ++numWritten; + if (numWritten % 32 == 0) + VNOP_BWRITE(bp); + else + buf_bawrite(bp); + } + } + + /* Go to the next word in the bitmap */ + ++pos; + nodeNumber += 16; + } + } + +ErrorExit: +done: + (void) ReleaseNode(btreePtr, &mapNode); + + return err; +} diff --git a/bsd/hfs/hfscommon/BTree/BTreeMiscOps.c b/bsd/hfs/hfscommon/BTree/BTreeMiscOps.c index 0b1d26591..2574b8a84 100644 --- a/bsd/hfs/hfscommon/BTree/BTreeMiscOps.c +++ b/bsd/hfs/hfscommon/BTree/BTreeMiscOps.c @@ -424,7 +424,7 @@ OSStatus FindIteratorPosition (BTreeControlBlockPtr btreePtr, { nodeNum = ((NodeDescPtr) left->buffer)->fLink; - PanicIf (index != 0, "\pFindIteratorPosition: index != 0"); //�� just checking... + PanicIf (index != 0, "FindIteratorPosition: index != 0"); //�� just checking... goto SuccessfulExit; } else @@ -627,7 +627,7 @@ OSStatus TrySimpleReplace (BTreeControlBlockPtr btreePtr, didItFit = InsertKeyRecord (btreePtr, nodePtr, index, &iterator->key, KeyLength(btreePtr, &iterator->key), record->bufferAddress, recordLen); - PanicIf (didItFit == false, "\pTrySimpleInsert: InsertKeyRecord returned false!"); + PanicIf (didItFit == false, "TrySimpleInsert: InsertKeyRecord returned false!"); *recordInserted = true; } diff --git a/bsd/hfs/hfscommon/BTree/BTreeNodeOps.c b/bsd/hfs/hfscommon/BTree/BTreeNodeOps.c index 2db71479d..89f4eaf13 100644 --- a/bsd/hfs/hfscommon/BTree/BTreeNodeOps.c +++ b/bsd/hfs/hfscommon/BTree/BTreeNodeOps.c @@ -202,7 +202,7 @@ OSStatus GetNode (BTreeControlBlockPtr btreePtr, // is nodeNum within proper range? if( nodeNum >= btreePtr->totalNodes ) { - Panic("\pGetNode:nodeNum >= totalNodes"); + Panic("GetNode:nodeNum >= totalNodes"); err = fsBTInvalidNodeErr; goto ErrorExit; } @@ -223,7 +223,7 @@ OSStatus GetNode (BTreeControlBlockPtr btreePtr, if (err != noErr) { - Panic ("\pGetNode: getNodeProc returned error."); + Panic ("GetNode: getNodeProc returned error."); goto ErrorExit; } ++btreePtr->numGetNodes; @@ -277,7 +277,7 @@ OSStatus GetNewNode (BTreeControlBlockPtr btreePtr, if (err != noErr) { - Panic ("\pGetNewNode: getNodeProc returned error."); + Panic ("GetNewNode: getNodeProc returned error."); // returnNodePtr->buffer = nil; return err; } @@ -327,7 +327,7 @@ OSStatus ReleaseNode (BTreeControlBlockPtr btreePtr, err = releaseNodeProc (btreePtr->fileRefNum, nodePtr, kReleaseBlock ); - PanicIf (err, "\pReleaseNode: releaseNodeProc returned error."); + PanicIf (err, "ReleaseNode: releaseNodeProc returned error."); ++btreePtr->numReleaseNodes; } @@ -369,7 +369,7 @@ OSStatus TrashNode (BTreeControlBlockPtr btreePtr, err = releaseNodeProc (btreePtr->fileRefNum, nodePtr, kReleaseBlock | kTrashBlock ); - PanicIf (err, "\TrashNode: releaseNodeProc returned error."); + PanicIf (err, "TrashNode: releaseNodeProc returned error."); ++btreePtr->numReleaseNodes; } diff --git a/bsd/hfs/hfscommon/BTree/BTreeNodeReserve.c b/bsd/hfs/hfscommon/BTree/BTreeNodeReserve.c index 547a0cf50..e58dde4e6 100644 --- a/bsd/hfs/hfscommon/BTree/BTreeNodeReserve.c +++ b/bsd/hfs/hfscommon/BTree/BTreeNodeReserve.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2004-2007 Apple Inc. All rights reserved. + * Copyright (c) 2004-2008 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -64,7 +64,7 @@ struct nreserve { #define NR_CACHE 17 #define NR_HASH(btvp, tag) \ - (&nr_hashtbl[((((int)(btvp)) >> 8) ^ ((int)(tag) >> 4)) & nr_hashmask]) + (&nr_hashtbl[((((intptr_t)(btvp)) >> 8) ^ ((intptr_t)(tag) >> 4)) & nr_hashmask]) LIST_HEAD(nodereserve, nreserve) *nr_hashtbl; @@ -90,7 +90,7 @@ void BTReserveSetup() { if (sizeof(struct nreserve) != sizeof(cat_cookie_t)) - panic("BTReserveSetup: nreserve size != opaque struct size"); + panic("hfs: BTReserveSetup: nreserve size != opaque struct size"); nr_hashtbl = hashinit(NR_CACHE, M_HFSMNT, &nr_hashmask); @@ -290,7 +290,7 @@ nr_delete(struct vnode * btvp, struct nreserve *nrp, int *nodecnt) lck_mtx_lock(&nr_mutex); if (nrp->nr_tag) { if ((nrp->nr_tag != tag) || (nrp->nr_btvp != btvp)) - panic("nr_delete: invalid NR (%p)", nrp); + panic("hfs: nr_delete: invalid NR (%p)", nrp); LIST_REMOVE(nrp, nr_hash); *nodecnt = nrp->nr_nodecnt; bzero(nrp, sizeof(struct nreserve)); diff --git a/bsd/hfs/hfscommon/BTree/BTreeScanner.c b/bsd/hfs/hfscommon/BTree/BTreeScanner.c index 7eb4013d4..1ce08e385 100644 --- a/bsd/hfs/hfscommon/BTree/BTreeScanner.c +++ b/bsd/hfs/hfscommon/BTree/BTreeScanner.c @@ -205,7 +205,7 @@ static int FindNextLeafNode( BTScanState *scanState, Boolean avoidIO ) */ err = hfs_swap_BTNode(&block, fref, kSwapBTNodeBigToHost, true); if ( err != noErr ) { - printf("FindNextLeafNode: Error from hfs_swap_BTNode (node %u)\n", scanState->nodeNum); + printf("hfs: FindNextLeafNode: Error from hfs_swap_BTNode (node %u)\n", scanState->nodeNum); continue; } diff --git a/bsd/hfs/hfscommon/BTree/BTreeTreeOps.c b/bsd/hfs/hfscommon/BTree/BTreeTreeOps.c index 9ac5c926f..71d9e06c9 100644 --- a/bsd/hfs/hfscommon/BTree/BTreeTreeOps.c +++ b/bsd/hfs/hfscommon/BTree/BTreeTreeOps.c @@ -235,7 +235,7 @@ OSStatus SearchTree (BTreeControlBlockPtr btreePtr, // if (curNodeNum == 0) { -// Panic("\pSearchTree: curNodeNum is zero!"); +// Panic("SearchTree: curNodeNum is zero!"); err = btBadNode; goto ErrorExit; } @@ -254,7 +254,7 @@ OSStatus SearchTree (BTreeControlBlockPtr btreePtr, // if (((BTNodeDescriptor*)nodeRec.buffer)->height != level) { -// Panic("\pIncorrect node height"); +// Panic("Incorrect node height"); err = btBadNode; goto ReleaseAndExit; } @@ -264,7 +264,7 @@ OSStatus SearchTree (BTreeControlBlockPtr btreePtr, // Nodes at level 1 must be leaves, by definition if (nodeKind != kBTLeafNode) { - // Panic("\pIncorrect node type: expected leaf"); + // Panic("Incorrect node type: expected leaf"); err = btBadNode; goto ReleaseAndExit; } @@ -274,7 +274,7 @@ OSStatus SearchTree (BTreeControlBlockPtr btreePtr, // A node at any other depth must be an index node if (nodeKind != kBTIndexNode) { -// Panic("\pIncorrect node type: expected index"); +// Panic("Incorrect node type: expected index"); err = btBadNode; goto ReleaseAndExit; } @@ -399,7 +399,7 @@ OSStatus InsertLevel (BTreeControlBlockPtr btreePtr, InsertKey insertKey; #if defined(applec) && !defined(__SC__) - PanicIf ((level == 1) && (((NodeDescPtr)targetNode->buffer)->kind != kBTLeafNode), "\P InsertLevel: non-leaf at level 1! "); + PanicIf ((level == 1) && (((NodeDescPtr)targetNode->buffer)->kind != kBTLeafNode), " InsertLevel: non-leaf at level 1! "); #endif leftNode.buffer = nil; leftNode.blockHeader = nil; @@ -444,7 +444,7 @@ OSStatus InsertLevel (BTreeControlBlockPtr btreePtr, M_ExitOnError (err); if ( DEBUG_BUILD && updateParent && newRoot ) - DebugStr("\p InsertLevel: New root from primary key, update from secondary key..."); + DebugStr(" InsertLevel: New root from primary key, update from secondary key..."); } //////////////////////// Update Parent(s) /////////////////////////////// @@ -462,7 +462,7 @@ OSStatus InsertLevel (BTreeControlBlockPtr btreePtr, secondaryKey = nil; - PanicIf ( (level == btreePtr->treeDepth), "\p InsertLevel: unfinished insert!?"); + PanicIf ( (level == btreePtr->treeDepth), " InsertLevel: unfinished insert!?"); ++level; @@ -470,13 +470,13 @@ OSStatus InsertLevel (BTreeControlBlockPtr btreePtr, index = treePathTable [level].index; parentNodeNum = treePathTable [level].node; - PanicIf ( parentNodeNum == 0, "\p InsertLevel: parent node is zero!?"); + PanicIf ( parentNodeNum == 0, " InsertLevel: parent node is zero!?"); err = GetNode (btreePtr, parentNodeNum, 0, &parentNode); // released as target node in next level up M_ExitOnError (err); #if defined(applec) && !defined(__SC__) if (DEBUG_BUILD && level > 1) - PanicIf ( ((NodeDescPtr)parentNode.buffer)->kind != kBTIndexNode, "\P InsertLevel: parent node not an index node! "); + PanicIf ( ((NodeDescPtr)parentNode.buffer)->kind != kBTIndexNode, " InsertLevel: parent node not an index node! "); #endif ////////////////////////// Update Parent Index ////////////////////////////// @@ -487,7 +487,7 @@ OSStatus InsertLevel (BTreeControlBlockPtr btreePtr, //���debug: check if ptr == targetNodeNum GetRecordByIndex (btreePtr, parentNode.buffer, index, &keyPtr, &recPtr, &recSize); - PanicIf( (*(u_int32_t *) recPtr) != targetNodeNum, "\p InsertLevel: parent ptr doesn't match target node!"); + PanicIf( (*(u_int32_t *) recPtr) != targetNodeNum, " InsertLevel: parent ptr doesn't match target node!"); // need to delete and re-insert this parent key/ptr // we delete it here and it gets re-inserted in the @@ -544,7 +544,7 @@ OSStatus InsertLevel (BTreeControlBlockPtr btreePtr, (void) ReleaseNode (btreePtr, targetNode); (void) ReleaseNode (btreePtr, &leftNode); - Panic ("\p InsertLevel: an error occurred!"); + Panic (" InsertLevel: an error occurred!"); return err; @@ -577,7 +577,7 @@ static OSErr InsertNode (BTreeControlBlockPtr btreePtr, *rootSplit = false; - PanicIf ( rightNode->buffer == leftNode->buffer, "\p InsertNode: rightNode == leftNode, huh?"); + PanicIf ( rightNode->buffer == leftNode->buffer, " InsertNode: rightNode == leftNode, huh?"); leftNodeNum = ((NodeDescPtr) rightNode->buffer)->bLink; @@ -616,7 +616,7 @@ static OSErr InsertNode (BTreeControlBlockPtr btreePtr, if ( !recordFit && leftNodeNum > 0 ) { - PanicIf ( leftNode->buffer != nil, "\p InsertNode: leftNode already acquired!"); + PanicIf ( leftNode->buffer != nil, " InsertNode: leftNode already acquired!"); if ( leftNode->buffer == nil ) { @@ -626,7 +626,7 @@ static OSErr InsertNode (BTreeControlBlockPtr btreePtr, ModifyBlockStart(btreePtr->fileRefNum, leftNode); } - PanicIf ( ((NodeDescPtr) leftNode->buffer)->fLink != node, "\p InsertNode, RotateLeft: invalid sibling link!" ); + PanicIf ( ((NodeDescPtr) leftNode->buffer)->fLink != node, " InsertNode, RotateLeft: invalid sibling link!" ); if ( !key->skipRotate ) // are rotates allowed? { @@ -717,7 +717,7 @@ OSStatus DeleteTree (BTreeControlBlockPtr btreePtr, targetNodeNum = treePathTable[level].node; targetNodePtr = targetNode->buffer; - PanicIf (targetNodePtr == nil, "\pDeleteTree: targetNode has nil buffer!"); + PanicIf (targetNodePtr == nil, "DeleteTree: targetNode has nil buffer!"); // XXXdbg ModifyBlockStart(btreePtr->fileRefNum, targetNode); @@ -829,7 +829,7 @@ OSStatus DeleteTree (BTreeControlBlockPtr btreePtr, //���debug: check if ptr == targetNodeNum GetRecordByIndex (btreePtr, parentNode.buffer, index, &keyPtr, &recPtr, &recSize); - PanicIf( (*(u_int32_t *) recPtr) != targetNodeNum, "\p DeleteTree: parent ptr doesn't match targetNodeNum!!"); + PanicIf( (*(u_int32_t *) recPtr) != targetNodeNum, " DeleteTree: parent ptr doesn't match targetNodeNum!!"); // need to delete and re-insert this parent key/ptr DeleteRecord (btreePtr, parentNode.buffer, index); @@ -1055,7 +1055,7 @@ static OSStatus RotateLeft (BTreeControlBlockPtr btreePtr, keyPtr, keyLength, recPtr, recSize); if ( !didItFit ) { - Panic ("\pRotateLeft: InsertKeyRecord (left) returned false!"); + Panic ("RotateLeft: InsertKeyRecord (left) returned false!"); err = fsBTBadRotateErr; goto ErrorExit; } @@ -1068,7 +1068,7 @@ static OSStatus RotateLeft (BTreeControlBlockPtr btreePtr, didItFit = RotateRecordLeft (btreePtr, leftNode, rightNode); if ( !didItFit ) { - Panic ("\pRotateLeft: RotateRecordLeft returned false!"); + Panic ("RotateLeft: RotateRecordLeft returned false!"); err = fsBTBadRotateErr; goto ErrorExit; } @@ -1085,7 +1085,7 @@ static OSStatus RotateLeft (BTreeControlBlockPtr btreePtr, keyPtr, keyLength, recPtr, recSize); if ( !didItFit ) { - Panic ("\pRotateLeft: InsertKeyRecord (right) returned false!"); + Panic ("RotateLeft: InsertKeyRecord (right) returned false!"); err = fsBTBadRotateErr; goto ErrorExit; } @@ -1137,7 +1137,7 @@ static OSStatus SplitLeft (BTreeControlBlockPtr btreePtr, right = rightNode->buffer; left = leftNode->buffer; - PanicIf ( right->bLink != 0 && left == 0, "\p SplitLeft: left sibling missing!?" ); + PanicIf ( right->bLink != 0 && left == 0, " SplitLeft: left sibling missing!?" ); /* type should be kBTLeafNode or kBTIndexNode */ @@ -1269,8 +1269,8 @@ static OSStatus AddNewRootNode (BTreeControlBlockPtr btreePtr, rootNode.buffer = nil; rootNode.blockHeader = nil; - PanicIf (leftNode == nil, "\pAddNewRootNode: leftNode == nil"); - PanicIf (rightNode == nil, "\pAddNewRootNode: rightNode == nil"); + PanicIf (leftNode == nil, "AddNewRootNode: leftNode == nil"); + PanicIf (rightNode == nil, "AddNewRootNode: rightNode == nil"); /////////////////////// Initialize New Root Node //////////////////////////// @@ -1296,7 +1296,7 @@ static OSStatus AddNewRootNode (BTreeControlBlockPtr btreePtr, didItFit = InsertKeyRecord ( btreePtr, rootNode.buffer, 0, keyPtr, keyLength, (u_int8_t *) &rightNode->bLink, 4 ); - PanicIf ( !didItFit, "\pAddNewRootNode:InsertKeyRecord failed for left index record"); + PanicIf ( !didItFit, "AddNewRootNode:InsertKeyRecord failed for left index record"); //////////////////// Insert Right Node Index Record ///////////////////////// @@ -1307,7 +1307,7 @@ static OSStatus AddNewRootNode (BTreeControlBlockPtr btreePtr, didItFit = InsertKeyRecord ( btreePtr, rootNode.buffer, 1, keyPtr, keyLength, (u_int8_t *) &leftNode->fLink, 4 ); - PanicIf ( !didItFit, "\pAddNewRootNode:InsertKeyRecord failed for right index record"); + PanicIf ( !didItFit, "AddNewRootNode:InsertKeyRecord failed for right index record"); /////////////////////////// Release Root Node /////////////////////////////// diff --git a/bsd/hfs/hfscommon/Catalog/CatalogUtilities.c b/bsd/hfs/hfscommon/Catalog/CatalogUtilities.c index 1e24463f1..219cd538f 100644 --- a/bsd/hfs/hfscommon/Catalog/CatalogUtilities.c +++ b/bsd/hfs/hfscommon/Catalog/CatalogUtilities.c @@ -33,55 +33,9 @@ #include "../headers/BTreesInternal.h" #include "../headers/CatalogPrivate.h" #include "../headers/HFSUnicodeWrappers.h" +#include "../headers/BTreesPrivate.h" #include - -//******************************************************************************* -// Routine: LocateCatalogNode -// -// Function: Locates the catalog record for an existing folder or file -// CNode and returns pointers to the key and data records. -// -//******************************************************************************* - -OSErr -LocateCatalogNode(const ExtendedVCB *volume, HFSCatalogNodeID folderID, const CatalogName *name, - u_int32_t hint, CatalogKey *keyPtr, CatalogRecord *dataPtr, u_int32_t *newHint) -{ - OSErr result; - CatalogName *nodeName = NULL; /* To ward off uninitialized use warnings from compiler */ - HFSCatalogNodeID threadParentID; - - - result = LocateCatalogRecord(volume, folderID, name, hint, keyPtr, dataPtr, newHint); - ReturnIfError(result); - - // if we got a thread record, then go look up real record - switch ( dataPtr->recordType ) - { - case kHFSFileThreadRecord: - case kHFSFolderThreadRecord: - threadParentID = dataPtr->hfsThread.parentID; - nodeName = (CatalogName *) &dataPtr->hfsThread.nodeName; - break; - - case kHFSPlusFileThreadRecord: - case kHFSPlusFolderThreadRecord: - threadParentID = dataPtr->hfsPlusThread.parentID; - nodeName = (CatalogName *) &dataPtr->hfsPlusThread.nodeName; - break; - - default: - threadParentID = 0; - break; - } - - if ( threadParentID ) // found a thread - result = LocateCatalogRecord(volume, threadParentID, nodeName, kNoHint, keyPtr, dataPtr, newHint); - - return result; -} - // // Routine: LocateCatalogNodeByKey // @@ -163,20 +117,33 @@ LocateCatalogNodeByKey(const ExtendedVCB *volume, u_int32_t hint, CatalogKey *ke OSErr LocateCatalogRecord(const ExtendedVCB *volume, HFSCatalogNodeID folderID, const CatalogName *name, - u_int32_t hint, CatalogKey *keyPtr, CatalogRecord *dataPtr, u_int32_t *newHint) + __unused u_int32_t hint, CatalogKey *keyPtr, CatalogRecord *dataPtr, u_int32_t *newHint) { - OSErr result; - CatalogKey tempKey; // 518 bytes - u_int16_t tempSize; + OSErr result; + uint16_t tempSize; + FSBufferDescriptor btRecord; + BTreeIterator searchIterator; + FCB *fcb; + BTreeControlBlock *btcb; - BuildCatalogKey(folderID, name, (volume->vcbSigWord == kHFSPlusSigWord), &tempKey); - - if ( name == NULL ) - hint = kNoHint; // no CName given so clear the hint + bzero(&searchIterator, sizeof(searchIterator)); - result = SearchBTreeRecord(volume->catalogRefNum, &tempKey, hint, keyPtr, dataPtr, &tempSize, newHint); + fcb = GetFileControlBlock(volume->catalogRefNum); + btcb = (BTreeControlBlock *)fcb->fcbBTCBPtr; - return (result == btNotFound ? cmNotFound : result); + btRecord.bufferAddress = dataPtr; + btRecord.itemCount = 1; + btRecord.itemSize = sizeof(CatalogRecord); + + BuildCatalogKey(folderID, name, (volume->vcbSigWord == kHFSPlusSigWord), (CatalogKey *)&searchIterator.key); + + result = BTSearchRecord(fcb, &searchIterator, &btRecord, &tempSize, &searchIterator); + if (result == noErr) { + *newHint = searchIterator.hint.nodeNum; + BlockMoveData(&searchIterator.key, keyPtr, CalcKeySize(btcb, &searchIterator.key)); + } + + return (result == btNotFound ? cmNotFound : result); } diff --git a/bsd/hfs/hfscommon/Catalog/FileIDsServices.c b/bsd/hfs/hfscommon/Catalog/FileIDsServices.c index ab2cfdb29..dbbc33b58 100644 --- a/bsd/hfs/hfscommon/Catalog/FileIDsServices.c +++ b/bsd/hfs/hfscommon/Catalog/FileIDsServices.c @@ -447,7 +447,7 @@ static OSErr MoveExtents( ExtendedVCB *vcb, u_int32_t srcFileID, u_int32_t dest if (err != btNotFound) { if ( DEBUG_BUILD ) - DebugStr("\pUnexpected error from SearchBTreeRecord"); + DebugStr("Unexpected error from SearchBTreeRecord"); if (err == noErr) // If we found such a bogus extent record, then the tree is really messed up err = cmBadNews; // so return an error that conveys the disk is hosed. @@ -507,7 +507,7 @@ static OSErr MoveExtents( ExtendedVCB *vcb, u_int32_t srcFileID, u_int32_t dest if ( err == btExists ) { if ( DEBUG_BUILD ) - DebugStr("\pCan't insert record -- already exists"); + DebugStr("Can't insert record -- already exists"); return( cmBadNews ); } else @@ -522,7 +522,7 @@ static OSErr MoveExtents( ExtendedVCB *vcb, u_int32_t srcFileID, u_int32_t dest { err = DeleteExtents( vcb, srcFileID, isHFSPlus ); // Now delete all the extent entries with the sourceID if ( DEBUG_BUILD && err != noErr ) - DebugStr("\pError from DeleteExtents"); + DebugStr("Error from DeleteExtents"); break; // we're done! } } while ( true ); diff --git a/bsd/hfs/hfscommon/Misc/BTreeWrapper.c b/bsd/hfs/hfscommon/Misc/BTreeWrapper.c index eb7fc628f..5ce31fd3c 100644 --- a/bsd/hfs/hfscommon/Misc/BTreeWrapper.c +++ b/bsd/hfs/hfscommon/Misc/BTreeWrapper.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000, 2002, 2005 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000, 2002, 2005-2008 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -34,66 +34,6 @@ static OSErr CheckBTreeKey(const BTreeKey *key, const BTreeControlBlock *btcb); static Boolean ValidHFSRecord(const void *record, const BTreeControlBlock *btcb, u_int16_t recordSize); - - -OSErr SearchBTreeRecord(__unused FileReference refNum, __unused const void* key, __unused u_int32_t hint, __unused void* foundKey, __unused void* data, __unused u_int16_t *dataSize, __unused u_int32_t *newHint) -{ - panic("SearchBTreeRecord is dead code!"); - return (-1); -#if 0 - FSBufferDescriptor btRecord; - BTreeIterator searchIterator; - FCB *fcb; - BTreeControlBlock *btcb; - OSStatus result; - - - fcb = GetFileControlBlock(refNum); - btcb = (BTreeControlBlock*) fcb->fcbBTCBPtr; - - btRecord.bufferAddress = data; - btRecord.itemCount = 1; - if ( btcb->maxKeyLength == kHFSExtentKeyMaximumLength ) - btRecord.itemSize = sizeof(HFSExtentRecord); - else if ( btcb->maxKeyLength == kHFSPlusExtentKeyMaximumLength ) - btRecord.itemSize = sizeof(HFSPlusExtentRecord); - else - btRecord.itemSize = sizeof(CatalogRecord); - - searchIterator.hint.writeCount = 0; // clear these out for debugging... - searchIterator.hint.reserved1 = 0; - searchIterator.hint.reserved2 = 0; - - searchIterator.hint.nodeNum = hint; - searchIterator.hint.index = 0; - - result = CheckBTreeKey((BTreeKey *) key, btcb); - ExitOnError(result); - - BlockMoveData(key, &searchIterator.key, CalcKeySize(btcb, (BTreeKey *) key)); //�� should we range check against maxkeylen? - - result = BTSearchRecord( fcb, &searchIterator, &btRecord, dataSize, &searchIterator ); - - if (result == noErr) - { - *newHint = searchIterator.hint.nodeNum; - - result = CheckBTreeKey(&searchIterator.key, btcb); - ExitOnError(result); - - BlockMoveData(&searchIterator.key, foundKey, CalcKeySize(btcb, &searchIterator.key)); //�� warning, this could overflow user's buffer!!! - - if ( DEBUG_BUILD && !ValidHFSRecord(data, btcb, *dataSize) ) - DebugStr("\pSearchBTreeRecord: bad record?"); - } - -ErrorExit: - - return result; -#endif -} - - OSErr ReplaceBTreeRecord(FileReference refNum, const void* key, u_int32_t hint, void *newData, u_int16_t dataSize, u_int32_t *newHint) { FSBufferDescriptor btRecord; @@ -118,7 +58,7 @@ OSErr ReplaceBTreeRecord(FileReference refNum, const void* key, u_int32_t hint, BlockMoveData(key, &iterator.key, CalcKeySize(btcb, (const BTreeKey *) key)); //�� should we range check against maxkeylen? if ( DEBUG_BUILD && !ValidHFSRecord(newData, btcb, dataSize) ) - DebugStr("\pReplaceBTreeRecord: bad record?"); + DebugStr("ReplaceBTreeRecord: bad record?"); result = BTReplaceRecord( fcb, &iterator, &btRecord, dataSize ); @@ -145,7 +85,7 @@ static OSErr CheckBTreeKey(const BTreeKey *key, const BTreeControlBlock *btcb) if ( (keyLen < 6) || (keyLen > btcb->maxKeyLength) ) { if ( DEBUG_BUILD ) - DebugStr("\pCheckBTreeKey: bad key length!"); + DebugStr("CheckBTreeKey: bad key length!"); return fsBTInvalidKeyLengthErr; } diff --git a/bsd/hfs/hfscommon/Misc/FileExtentMapping.c b/bsd/hfs/hfscommon/Misc/FileExtentMapping.c index 718a87bdc..34c53fe74 100644 --- a/bsd/hfs/hfscommon/Misc/FileExtentMapping.c +++ b/bsd/hfs/hfscommon/Misc/FileExtentMapping.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2005 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2008 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -224,7 +224,7 @@ static OSErr FindExtentRecord( u_int32_t *foundHint) { FCB * fcb; - BTreeIterator *btIterator; + BTreeIterator btIterator; FSBufferDescriptor btRecord; OSErr err; u_int16_t btRecordSize; @@ -234,14 +234,13 @@ static OSErr FindExtentRecord( *foundHint = 0; fcb = GetFileControlBlock(vcb->extentsRefNum); - MALLOC(btIterator, BTreeIterator *, sizeof(*btIterator), M_TEMP, M_WAITOK); - bzero(btIterator, sizeof(*btIterator)); + bzero(&btIterator, sizeof(btIterator)); if (vcb->vcbSigWord == kHFSSigWord) { HFSExtentKey * extentKeyPtr; HFSExtentRecord extentData; - extentKeyPtr = (HFSExtentKey*) &btIterator->key; + extentKeyPtr = (HFSExtentKey*) &btIterator.key; extentKeyPtr->keyLength = kHFSExtentKeyMaximumLength; extentKeyPtr->forkType = forkType; extentKeyPtr->fileID = fileID; @@ -251,10 +250,10 @@ static OSErr FindExtentRecord( btRecord.itemSize = sizeof(HFSExtentRecord); btRecord.itemCount = 1; - err = BTSearchRecord(fcb, btIterator, &btRecord, &btRecordSize, btIterator); + err = BTSearchRecord(fcb, &btIterator, &btRecord, &btRecordSize, &btIterator); if (err == btNotFound && allowPrevious) { - err = BTIterateRecord(fcb, kBTreePrevRecord, btIterator, &btRecord, &btRecordSize); + err = BTIterateRecord(fcb, kBTreePrevRecord, &btIterator, &btRecord, &btRecordSize); // A previous record may not exist, so just return btNotFound (like we would if // it was for the wrong file/fork). @@ -298,7 +297,7 @@ static OSErr FindExtentRecord( HFSPlusExtentKey * extentKeyPtr; HFSPlusExtentRecord extentData; - extentKeyPtr = (HFSPlusExtentKey*) &btIterator->key; + extentKeyPtr = (HFSPlusExtentKey*) &btIterator.key; extentKeyPtr->keyLength = kHFSPlusExtentKeyMaximumLength; extentKeyPtr->forkType = forkType; extentKeyPtr->pad = 0; @@ -309,10 +308,10 @@ static OSErr FindExtentRecord( btRecord.itemSize = sizeof(HFSPlusExtentRecord); btRecord.itemCount = 1; - err = BTSearchRecord(fcb, btIterator, &btRecord, &btRecordSize, btIterator); + err = BTSearchRecord(fcb, &btIterator, &btRecord, &btRecordSize, &btIterator); if (err == btNotFound && allowPrevious) { - err = BTIterateRecord(fcb, kBTreePrevRecord, btIterator, &btRecord, &btRecordSize); + err = BTIterateRecord(fcb, kBTreePrevRecord, &btIterator, &btRecord, &btRecordSize); // A previous record may not exist, so just return btNotFound (like we would if // it was for the wrong file/fork). @@ -336,8 +335,7 @@ static OSErr FindExtentRecord( } if (foundHint) - *foundHint = btIterator->hint.nodeNum; - FREE(btIterator, M_TEMP); + *foundHint = btIterator.hint.nodeNum; return err; } @@ -349,7 +347,7 @@ static OSErr CreateExtentRecord( HFSPlusExtentRecord extents, u_int32_t *hint) { - BTreeIterator * btIterator; + BTreeIterator btIterator; FSBufferDescriptor btRecord; u_int16_t btRecordSize; int lockflags; @@ -358,8 +356,7 @@ static OSErr CreateExtentRecord( err = noErr; *hint = 0; - MALLOC(btIterator, BTreeIterator *, sizeof(*btIterator), M_TEMP, M_WAITOK); - bzero(btIterator, sizeof(*btIterator)); + bzero(&btIterator, sizeof(btIterator)); /* * The lock taken by callers of ExtendFileC is speculative and @@ -379,7 +376,7 @@ static OSErr CreateExtentRecord( btRecord.itemSize = btRecordSize; btRecord.itemCount = 1; - keyPtr = (HFSExtentKey*) &btIterator->key; + keyPtr = (HFSExtentKey*) &btIterator.key; keyPtr->keyLength = kHFSExtentKeyMaximumLength; keyPtr->forkType = key->forkType; keyPtr->fileID = key->fileID; @@ -393,20 +390,19 @@ static OSErr CreateExtentRecord( btRecord.itemSize = btRecordSize; btRecord.itemCount = 1; - BlockMoveData(key, &btIterator->key, sizeof(HFSPlusExtentKey)); + BlockMoveData(key, &btIterator.key, sizeof(HFSPlusExtentKey)); } if (err == noErr) - err = BTInsertRecord(GetFileControlBlock(vcb->extentsRefNum), btIterator, &btRecord, btRecordSize); + err = BTInsertRecord(GetFileControlBlock(vcb->extentsRefNum), &btIterator, &btRecord, btRecordSize); if (err == noErr) - *hint = btIterator->hint.nodeNum; + *hint = btIterator.hint.nodeNum; (void) BTFlushPath(GetFileControlBlock(vcb->extentsRefNum)); hfs_systemfile_unlock(vcb, lockflags); - FREE(btIterator, M_TEMP); return err; } @@ -417,18 +413,17 @@ static OSErr DeleteExtentRecord( u_int32_t fileID, u_int32_t startBlock) { - BTreeIterator * btIterator; + BTreeIterator btIterator; OSErr err; err = noErr; - MALLOC(btIterator, BTreeIterator *, sizeof(*btIterator), M_TEMP, M_WAITOK); - bzero(btIterator, sizeof(*btIterator)); + bzero(&btIterator, sizeof(btIterator)); if (vcb->vcbSigWord == kHFSSigWord) { HFSExtentKey * keyPtr; - keyPtr = (HFSExtentKey*) &btIterator->key; + keyPtr = (HFSExtentKey*) &btIterator.key; keyPtr->keyLength = kHFSExtentKeyMaximumLength; keyPtr->forkType = forkType; keyPtr->fileID = fileID; @@ -437,7 +432,7 @@ static OSErr DeleteExtentRecord( else { // HFS Plus volume HFSPlusExtentKey * keyPtr; - keyPtr = (HFSPlusExtentKey*) &btIterator->key; + keyPtr = (HFSPlusExtentKey*) &btIterator.key; keyPtr->keyLength = kHFSPlusExtentKeyMaximumLength; keyPtr->forkType = forkType; keyPtr->pad = 0; @@ -445,10 +440,9 @@ static OSErr DeleteExtentRecord( keyPtr->startBlock = startBlock; } - err = BTDeleteRecord(GetFileControlBlock(vcb->extentsRefNum), btIterator); + err = BTDeleteRecord(GetFileControlBlock(vcb->extentsRefNum), &btIterator); (void) BTFlushPath(GetFileControlBlock(vcb->extentsRefNum)); - FREE(btIterator, M_TEMP); return err; } @@ -730,9 +724,9 @@ int32_t CompareExtentKeys( const HFSExtentKey *searchKey, const HFSExtentKey *tr #if DEBUG_BUILD if (searchKey->keyLength != kHFSExtentKeyMaximumLength) - DebugStr("\pHFS: search Key is wrong length"); + DebugStr("HFS: search Key is wrong length"); if (trialKey->keyLength != kHFSExtentKeyMaximumLength) - DebugStr("\pHFS: trial Key is wrong length"); + DebugStr("HFS: trial Key is wrong length"); #endif result = -1; // assume searchKey < trialKey @@ -794,9 +788,9 @@ int32_t CompareExtentKeysPlus( const HFSPlusExtentKey *searchKey, const HFSPlusE #if DEBUG_BUILD if (searchKey->keyLength != kHFSPlusExtentKeyMaximumLength) - DebugStr("\pHFS: search Key is wrong length"); + DebugStr("HFS: search Key is wrong length"); if (trialKey->keyLength != kHFSPlusExtentKeyMaximumLength) - DebugStr("\pHFS: trial Key is wrong length"); + DebugStr("HFS: trial Key is wrong length"); #endif result = -1; // assume searchKey < trialKey @@ -1087,10 +1081,30 @@ OSErr ExtendFileC ( // If that fails, get whatever we can. // If forceContig, then take whatever we got // else, keep getting bits and pieces (non-contig) + + /* + * Note that for sparse devices (like sparse bundle dmgs), we + * should only be aggressive with re-using once-allocated pieces + * if we're not dealing with system files. If we're trying to operate + * on behalf of a system file, we need the maximum contiguous amount + * possible. + */ err = noErr; - wantContig = true; + if ( (vcb->hfs_flags & HFS_HAS_SPARSE_DEVICE) + && (fcb->ff_cp->c_fileid >= kHFSFirstUserCatalogNodeID) + && (flags & kEFMetadataMask) == 0) { + if (vcb->hfs_flags & HFS_DID_CONTIG_SCAN) { + wantContig = false; + } else { + // we only want to do this once to scan the bitmap to + // fill in the vcbFreeExt table of free blocks + vcb->hfs_flags |= HFS_DID_CONTIG_SCAN; + wantContig = true; + } + } else { + wantContig = true; + } useMetaZone = flags & kEFMetadataMask; - vcb->vcbFreeExtCnt = 0; /* For now, force rebuild of free extent list */ do { if (blockHint != 0) startBlock = blockHint; @@ -1322,7 +1336,7 @@ OSErr TruncateFileC ( peof = (int64_t)((int64_t)nextBlock * (int64_t)vcb->blockSize); // number of bytes in those blocks if ((vcb->vcbSigWord == kHFSSigWord) && (peof >= kTwoGigabytes)) { #if DEBUG_BUILD - DebugStr("\pHFS: Trying to truncate a file to 2GB or more"); + DebugStr("HFS: Trying to truncate a file to 2GB or more"); #endif err = fileBoundsErr; goto ErrorExit; @@ -1499,7 +1513,7 @@ OSErr HeadTruncateFile ( goto ErrorExit; /* uh oh */ else { error = 0; - printf("HeadTruncateFile: problems deallocating %s (%d)\n", + printf("hfs: HeadTruncateFile: problems deallocating %s (%d)\n", FTOC(fcb)->c_desc.cd_nameptr ? (const char *)FTOC(fcb)->c_desc.cd_nameptr : "", error); } } @@ -1534,7 +1548,7 @@ OSErr HeadTruncateFile ( * a known state. */ if (error != btNotFound) - printf("HeadTruncateFile: problems finding extents %s (%d)\n", + printf("hfs: HeadTruncateFile: problems finding extents %s (%d)\n", FTOC(fcb)->c_desc.cd_nameptr ? (const char *)FTOC(fcb)->c_desc.cd_nameptr : "", error); error = 0; break; @@ -1548,7 +1562,7 @@ OSErr HeadTruncateFile ( if (blksfreed < headblks) { error = BlockDeallocate(vcb, extents[i].startBlock, blkcnt); if (error) { - printf("HeadTruncateFile: problems deallocating %s (%d)\n", + printf("hfs: HeadTruncateFile: problems deallocating %s (%d)\n", FTOC(fcb)->c_desc.cd_nameptr ? (const char *)FTOC(fcb)->c_desc.cd_nameptr : "", error); error = 0; } @@ -1563,7 +1577,7 @@ OSErr HeadTruncateFile ( error = DeleteExtentRecord(vcb, forkType, fileID, startblk); if (error) { - printf("HeadTruncateFile: problems deallocating %s (%d)\n", + printf("hfs: HeadTruncateFile: problems deallocating %s (%d)\n", FTOC(fcb)->c_desc.cd_nameptr ? (const char *)FTOC(fcb)->c_desc.cd_nameptr : "", error); error = 0; } @@ -1834,7 +1848,7 @@ static OSErr UpdateExtentRecord ( FTOC(fcb)->c_flag |= C_MODIFIED; } else { - BTreeIterator * btIterator; + BTreeIterator btIterator; FSBufferDescriptor btRecord; u_int16_t btRecordSize; FCB * btFCB; @@ -1845,8 +1859,7 @@ static OSErr UpdateExtentRecord ( // btFCB = GetFileControlBlock(vcb->extentsRefNum); - MALLOC(btIterator, BTreeIterator *, sizeof(*btIterator), M_TEMP, M_WAITOK); - bzero(btIterator, sizeof(*btIterator)); + bzero(&btIterator, sizeof(btIterator)); /* * The lock taken by callers of ExtendFileC/TruncateFileC is @@ -1861,50 +1874,49 @@ static OSErr UpdateExtentRecord ( HFSExtentKey * key; // Actual extent key used on disk in HFS HFSExtentRecord foundData; // The extent data actually found - key = (HFSExtentKey*) &btIterator->key; + key = (HFSExtentKey*) &btIterator.key; key->keyLength = kHFSExtentKeyMaximumLength; key->forkType = extentFileKey->forkType; key->fileID = extentFileKey->fileID; key->startBlock = extentFileKey->startBlock; - btIterator->hint.index = 0; - btIterator->hint.nodeNum = extentBTreeHint; + btIterator.hint.index = 0; + btIterator.hint.nodeNum = extentBTreeHint; btRecord.bufferAddress = &foundData; btRecord.itemSize = sizeof(HFSExtentRecord); btRecord.itemCount = 1; - err = BTSearchRecord(btFCB, btIterator, &btRecord, &btRecordSize, btIterator); + err = BTSearchRecord(btFCB, &btIterator, &btRecord, &btRecordSize, &btIterator); if (err == noErr) err = HFSPlusToHFSExtents(extentData, (HFSExtentDescriptor *)&foundData); if (err == noErr) - err = BTReplaceRecord(btFCB, btIterator, &btRecord, btRecordSize); + err = BTReplaceRecord(btFCB, &btIterator, &btRecord, btRecordSize); (void) BTFlushPath(btFCB); } else { // HFS Plus volume HFSPlusExtentRecord foundData; // The extent data actually found - BlockMoveData(extentFileKey, &btIterator->key, sizeof(HFSPlusExtentKey)); + BlockMoveData(extentFileKey, &btIterator.key, sizeof(HFSPlusExtentKey)); - btIterator->hint.index = 0; - btIterator->hint.nodeNum = extentBTreeHint; + btIterator.hint.index = 0; + btIterator.hint.nodeNum = extentBTreeHint; btRecord.bufferAddress = &foundData; btRecord.itemSize = sizeof(HFSPlusExtentRecord); btRecord.itemCount = 1; - err = BTSearchRecord(btFCB, btIterator, &btRecord, &btRecordSize, btIterator); + err = BTSearchRecord(btFCB, &btIterator, &btRecord, &btRecordSize, &btIterator); if (err == noErr) { BlockMoveData(extentData, &foundData, sizeof(HFSPlusExtentRecord)); - err = BTReplaceRecord(btFCB, btIterator, &btRecord, btRecordSize); + err = BTReplaceRecord(btFCB, &btIterator, &btRecord, btRecordSize); } (void) BTFlushPath(btFCB); } hfs_systemfile_unlock(vcb, lockflags); - FREE(btIterator, M_TEMP); } return err; @@ -1931,7 +1943,7 @@ static OSErr HFSPlusToHFSExtents( #if DEBUG_BUILD if (oldExtents[3].startBlock || oldExtents[3].blockCount) { - DebugStr("\pExtentRecord with > 3 extents is invalid for HFS"); + DebugStr("ExtentRecord with > 3 extents is invalid for HFS"); err = fsDSIntErr; } #endif diff --git a/bsd/hfs/hfscommon/Misc/VolumeAllocation.c b/bsd/hfs/hfscommon/Misc/VolumeAllocation.c index be4d28c5e..9ea66862c 100644 --- a/bsd/hfs/hfscommon/Misc/VolumeAllocation.c +++ b/bsd/hfs/hfscommon/Misc/VolumeAllocation.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2007 Apple Inc. All rights reserved. + * Copyright (c) 2000-2008 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -113,11 +113,11 @@ static OSErr ReadBitmapBlock( ExtendedVCB *vcb, u_int32_t bit, u_int32_t **buffer, - u_int32_t *blockRef); + uintptr_t *blockRef); static OSErr ReleaseBitmapBlock( ExtendedVCB *vcb, - u_int32_t blockRef, + uintptr_t blockRef, Boolean dirty); static OSErr BlockAllocateAny( @@ -195,6 +195,39 @@ static OSErr BlockAllocateKnown( ; The volume bitmap is read and updated; the volume bitmap cache may be changed. ;________________________________________________________________________________ */ +static void +sanity_check_free_ext(__unused ExtendedVCB *vcb, __unused int check_allocated) +{ +#if DEBUG + u_int32_t i, j; + + for(i=0; i < vcb->vcbFreeExtCnt; i++) { + u_int32_t start, nblocks; + + start = vcb->vcbFreeExt[i].startBlock; + nblocks = vcb->vcbFreeExt[i].blockCount; + + + if (nblocks == 0) { + panic("hfs: %p: slot %d in the free extent array had a zero count (%d)\n", vcb, i, start); + } + + if (check_allocated && hfs_isallocated(vcb, start, nblocks)) { + panic("hfs: %p: slot %d in the free extent array is bad (%d / %d)\n", + vcb, i, start, nblocks); + } + + for(j=i+1; j < vcb->vcbFreeExtCnt; j++) { + if (start == vcb->vcbFreeExt[j].startBlock) { + panic("hfs: %p: slot %d/%d are dups?! (%d / %d ; %d / %d)\n", + vcb, i, j, start, nblocks, vcb->vcbFreeExt[i].startBlock, + vcb->vcbFreeExt[i].blockCount); + } + } + } +#endif +} + __private_extern__ OSErr BlockAllocate ( @@ -248,7 +281,11 @@ OSErr BlockAllocate ( // if (startingBlock == 0) { HFS_MOUNT_LOCK(vcb, TRUE); - startingBlock = vcb->nextAllocation; + if (vcb->hfs_flags & HFS_HAS_SPARSE_DEVICE) { + startingBlock = vcb->sparseAllocation; + } else { + startingBlock = vcb->nextAllocation; + } HFS_MOUNT_UNLOCK(vcb, TRUE); updateAllocPtr = true; } @@ -271,9 +308,8 @@ OSErr BlockAllocate ( (*actualStartBlock > startingBlock) && ((*actualStartBlock < VCBTOHFS(vcb)->hfs_metazone_start) || (*actualStartBlock > VCBTOHFS(vcb)->hfs_metazone_end))) { - HFS_MOUNT_LOCK(vcb, TRUE); - HFS_UPDATE_NEXT_ALLOCATION(vcb, *actualStartBlock); - HFS_MOUNT_UNLOCK(vcb, TRUE); + + updateAllocPtr = true; } } else { /* @@ -301,6 +337,8 @@ OSErr BlockAllocate ( // still need to update things like the free block count). // if (*actualNumBlocks != 0) { + int i,j; + // // If we used the volume's roving allocation pointer, then we need to update it. // Adding in the length of the current allocation might reduce the next allocate @@ -311,11 +349,42 @@ OSErr BlockAllocate ( // HFS_MOUNT_LOCK(vcb, TRUE); + if (vcb->vcbFreeExtCnt == 0 && vcb->hfs_freed_block_count == 0) { + vcb->sparseAllocation = *actualStartBlock; + } + if (*actualNumBlocks < vcb->hfs_freed_block_count) { + vcb->hfs_freed_block_count -= *actualNumBlocks; + } else { + vcb->hfs_freed_block_count = 0; + } + if (updateAllocPtr && ((*actualStartBlock < VCBTOHFS(vcb)->hfs_metazone_start) || (*actualStartBlock > VCBTOHFS(vcb)->hfs_metazone_end))) { HFS_UPDATE_NEXT_ALLOCATION(vcb, *actualStartBlock); } + + for(i=0; i < (int)vcb->vcbFreeExtCnt; i++) { + u_int32_t start, end; + + start = vcb->vcbFreeExt[i].startBlock; + end = start + vcb->vcbFreeExt[i].blockCount; + + if ( (*actualStartBlock >= start && *actualStartBlock < end) + || ((*actualStartBlock + *actualNumBlocks) > start && *actualStartBlock < start)) { + + for(j=i; j < (int)vcb->vcbFreeExtCnt-1; j++) { + vcb->vcbFreeExt[j] = vcb->vcbFreeExt[j+1]; + } + + vcb->vcbFreeExtCnt--; + i--; // so we'll check the guy we just copied down... + + // keep looping because we may have invalidated more + // than one entry in the array + } + } + // // Update the number of free blocks on the volume // @@ -323,6 +392,8 @@ OSErr BlockAllocate ( MarkVCBDirty(vcb); HFS_MOUNT_UNLOCK(vcb, TRUE); + sanity_check_free_ext(vcb, 1); + hfs_generate_volume_notifications(VCBTOHFS(vcb)); } @@ -357,6 +428,7 @@ OSErr BlockDeallocate ( u_int32_t numBlocks) // Number of contiguous blocks to deallocate { OSErr err; + u_int32_t tempWord; // // If no blocks to deallocate, then exit early @@ -378,11 +450,68 @@ OSErr BlockDeallocate ( // HFS_MOUNT_LOCK(vcb, TRUE); vcb->freeBlocks += numBlocks; - if (vcb->nextAllocation == (firstBlock + numBlocks)) + vcb->hfs_freed_block_count += numBlocks; + if (firstBlock < vcb->sparseAllocation) { + vcb->sparseAllocation = firstBlock; + } + + if (vcb->nextAllocation == (firstBlock + numBlocks)) { HFS_UPDATE_NEXT_ALLOCATION(vcb, (vcb->nextAllocation - numBlocks)); + } + + tempWord = vcb->vcbFreeExtCnt; + // Add this free chunk to the free extent list + if (vcb->hfs_flags & HFS_HAS_SPARSE_DEVICE) { + // Sorted by start block + if (tempWord == kMaxFreeExtents && vcb->vcbFreeExt[kMaxFreeExtents-1].startBlock > firstBlock) + --tempWord; + if (tempWord < kMaxFreeExtents) + { + // We're going to add this extent. Bubble any smaller extents down in the list. + while (tempWord && vcb->vcbFreeExt[tempWord-1].startBlock > firstBlock) + { + vcb->vcbFreeExt[tempWord] = vcb->vcbFreeExt[tempWord-1]; + if (vcb->vcbFreeExt[tempWord].startBlock < vcb->sparseAllocation) { + vcb->sparseAllocation = vcb->vcbFreeExt[tempWord].startBlock; + } + --tempWord; + } + vcb->vcbFreeExt[tempWord].startBlock = firstBlock; + vcb->vcbFreeExt[tempWord].blockCount = numBlocks; + + if (vcb->vcbFreeExtCnt < kMaxFreeExtents) { + ++vcb->vcbFreeExtCnt; + } + } + } else { + // Sorted by num blocks + if (tempWord == kMaxFreeExtents && vcb->vcbFreeExt[kMaxFreeExtents-1].blockCount < numBlocks) + --tempWord; + if (tempWord < kMaxFreeExtents) + { + // We're going to add this extent. Bubble any smaller extents down in the list. + while (tempWord && vcb->vcbFreeExt[tempWord-1].blockCount < numBlocks) + { + vcb->vcbFreeExt[tempWord] = vcb->vcbFreeExt[tempWord-1]; + if (vcb->vcbFreeExt[tempWord].startBlock < vcb->sparseAllocation) { + vcb->sparseAllocation = vcb->vcbFreeExt[tempWord].startBlock; + } + --tempWord; + } + vcb->vcbFreeExt[tempWord].startBlock = firstBlock; + vcb->vcbFreeExt[tempWord].blockCount = numBlocks; + + if (vcb->vcbFreeExtCnt < kMaxFreeExtents) { + ++vcb->vcbFreeExtCnt; + } + } + } + MarkVCBDirty(vcb); HFS_MOUNT_UNLOCK(vcb, TRUE); + sanity_check_free_ext(vcb, 1); + hfs_generate_volume_notifications(VCBTOHFS(vcb)); Exit: @@ -401,7 +530,7 @@ MetaZoneFreeBlocks(ExtendedVCB *vcb) { u_int32_t freeblocks; u_int32_t *currCache; - u_int32_t blockRef; + uintptr_t blockRef; u_int32_t bit; u_int32_t lastbit; int bytesleft; @@ -494,7 +623,7 @@ static OSErr ReadBitmapBlock( ExtendedVCB *vcb, u_int32_t bit, u_int32_t **buffer, - u_int32_t *blockRef) + uintptr_t *blockRef) { OSErr err; struct buf *bp = NULL; @@ -526,7 +655,7 @@ static OSErr ReadBitmapBlock( *blockRef = 0; *buffer = NULL; } else { - *blockRef = (u_int32_t)bp; + *blockRef = (uintptr_t)bp; *buffer = (u_int32_t *)buf_dataptr(bp); } } @@ -550,14 +679,14 @@ static OSErr ReadBitmapBlock( */ static OSErr ReleaseBitmapBlock( ExtendedVCB *vcb, - u_int32_t blockRef, + uintptr_t blockRef, Boolean dirty) { struct buf *bp = (struct buf *)blockRef; if (blockRef == 0) { if (dirty) - panic("ReleaseBitmapBlock: missing bp"); + panic("hfs: ReleaseBitmapBlock: missing bp"); return (0); } @@ -684,7 +813,7 @@ static OSErr BlockAllocateAny( register u_int32_t wordsLeft; // Number of words left in this bitmap block u_int32_t *buffer = NULL; u_int32_t *currCache = NULL; - u_int32_t blockRef; + uintptr_t blockRef; u_int32_t bitsPerBlock; u_int32_t wordsPerBlock; Boolean dirty = false; @@ -865,7 +994,7 @@ static OSErr BlockAllocateAny( // sanity check if ((*actualStartBlock + *actualNumBlocks) > vcb->allocLimit) - panic("BlockAllocateAny: allocation overflow on \"%s\"", vcb->vcbVN); + panic("hfs: BlockAllocateAny: allocation overflow on \"%s\"", vcb->vcbVN); } else { *actualStartBlock = 0; @@ -899,6 +1028,7 @@ Function: Try to allocate space from known free space in the free dskFulErr Free extent cache is empty _______________________________________________________________________ */ + static OSErr BlockAllocateKnown( ExtendedVCB *vcb, u_int32_t maxBlocks, @@ -909,8 +1039,8 @@ static OSErr BlockAllocateKnown( u_int32_t i; u_int32_t foundBlocks; u_int32_t newStartBlock, newBlockCount; - - if (vcb->vcbFreeExtCnt == 0) + + if (vcb->vcbFreeExtCnt == 0 || vcb->vcbFreeExt[0].blockCount == 0) return dskFulErr; // Just grab up to maxBlocks of the first (largest) free exent. @@ -920,9 +1050,26 @@ static OSErr BlockAllocateKnown( foundBlocks = maxBlocks; *actualNumBlocks = foundBlocks; + if (vcb->hfs_flags & HFS_HAS_SPARSE_DEVICE) { + // since sparse volumes keep the free extent list sorted by starting + // block number, the list won't get re-ordered, it may only shrink + // + vcb->vcbFreeExt[0].startBlock += foundBlocks; + vcb->vcbFreeExt[0].blockCount -= foundBlocks; + if (vcb->vcbFreeExt[0].blockCount == 0) { + for(i=1; i < vcb->vcbFreeExtCnt; i++) { + vcb->vcbFreeExt[i-1] = vcb->vcbFreeExt[i]; + } + vcb->vcbFreeExtCnt--; + } + + goto done; + } + // Adjust the start and length of that extent. newStartBlock = vcb->vcbFreeExt[0].startBlock + foundBlocks; newBlockCount = vcb->vcbFreeExt[0].blockCount - foundBlocks; + // The first extent might not be the largest anymore. Bubble up any // (now larger) extents to the top of the list. @@ -942,9 +1089,9 @@ static OSErr BlockAllocateKnown( // If this is now the smallest known free extent, then it might be smaller than // other extents we didn't keep track of. So, just forget about this extent. // After the previous loop, (i-1) is the index of the extent we just allocated from. - if (i == vcb->vcbFreeExtCnt) + if (newBlockCount == 0) { - // It's now the smallest extent, so forget about it + // then just reduce the number of free extents since this guy got deleted --vcb->vcbFreeExtCnt; } else @@ -953,7 +1100,8 @@ static OSErr BlockAllocateKnown( vcb->vcbFreeExt[i-1].startBlock = newStartBlock; vcb->vcbFreeExt[i-1].blockCount = newBlockCount; } - + +done: // sanity check if ((*actualStartBlock + *actualNumBlocks) > vcb->allocLimit) { @@ -971,6 +1119,8 @@ static OSErr BlockAllocateKnown( err = BlockMarkAllocated(vcb, *actualStartBlock, *actualNumBlocks); } + sanity_check_free_ext(vcb, 1); + return err; } @@ -1004,7 +1154,7 @@ OSErr BlockMarkAllocated( u_int32_t firstBit; // Bit index within word of first bit to allocate u_int32_t numBits; // Number of bits in word to allocate u_int32_t *buffer = NULL; - u_int32_t blockRef; + uintptr_t blockRef; u_int32_t bitsPerBlock; u_int32_t wordsPerBlock; // XXXdbg @@ -1052,7 +1202,7 @@ OSErr BlockMarkAllocated( } #if DEBUG_BUILD if ((*currentWord & SWAP_BE32 (bitMask)) != 0) { - panic("BlockMarkAllocated: blocks already allocated!"); + panic("hfs: BlockMarkAllocated: blocks already allocated!"); } #endif *currentWord |= SWAP_BE32 (bitMask); // set the bits in the bitmap @@ -1090,7 +1240,7 @@ OSErr BlockMarkAllocated( } #if DEBUG_BUILD if (*currentWord != 0) { - panic("BlockMarkAllocated: blocks already allocated!"); + panic("hfs: BlockMarkAllocated: blocks already allocated!"); } #endif *currentWord = SWAP_BE32 (bitMask); @@ -1128,7 +1278,7 @@ OSErr BlockMarkAllocated( } #if DEBUG_BUILD if ((*currentWord & SWAP_BE32 (bitMask)) != 0) { - panic("BlockMarkAllocated: blocks already allocated!"); + panic("hfs: BlockMarkAllocated: blocks already allocated!"); } #endif *currentWord |= SWAP_BE32 (bitMask); // set the bits in the bitmap @@ -1173,7 +1323,7 @@ OSErr BlockMarkFree( u_int32_t firstBit; // Bit index within word of first bit to allocate u_int32_t numBits; // Number of bits in word to allocate u_int32_t *buffer = NULL; - u_int32_t blockRef; + uintptr_t blockRef; u_int32_t bitsPerBlock; u_int32_t wordsPerBlock; // XXXdbg @@ -1329,7 +1479,7 @@ OSErr BlockMarkFree( Corruption: #if DEBUG_BUILD - panic("BlockMarkFree: blocks not allocated!"); + panic("hfs: BlockMarkFree: blocks not allocated!"); #else printf ("hfs: BlockMarkFree() trying to free unallocated blocks on volume %s\n", vcb->vcbVN); hfs_mark_volume_inconsistent(vcb); @@ -1386,8 +1536,9 @@ static OSErr BlockFindContiguous( register u_int32_t bitMask; register u_int32_t wordsLeft; register u_int32_t tempWord; - u_int32_t blockRef; + uintptr_t blockRef; u_int32_t wordsPerBlock; + u_int32_t j, updated_free_extents = 0, really_add; /* * When we're skipping the metadata zone and the start/end @@ -1625,23 +1776,71 @@ static OSErr BlockFindContiguous( // This free chunk wasn't big enough. Try inserting it into the free extent cache in case // the allocation wasn't forced contiguous. + really_add = 0; + for(j=0; j < vcb->vcbFreeExtCnt; j++) { + u_int32_t start, end; + + start = vcb->vcbFreeExt[j].startBlock; + end = start + vcb->vcbFreeExt[j].blockCount; + + if ( (firstBlock >= start && firstBlock < end) + || ((firstBlock + foundBlocks) > start && firstBlock < start)) { + + // there's overlap with an existing entry so do not add this + break; + } + + } + + if (j >= vcb->vcbFreeExtCnt) { + really_add = 1; + } + tempWord = vcb->vcbFreeExtCnt; - if (tempWord == kMaxFreeExtents && vcb->vcbFreeExt[kMaxFreeExtents-1].blockCount < foundBlocks) - --tempWord; - if (tempWord < kMaxFreeExtents) - { - // We're going to add this extent. Bubble any smaller extents down in the list. - while (tempWord && vcb->vcbFreeExt[tempWord-1].blockCount < foundBlocks) - { - vcb->vcbFreeExt[tempWord] = vcb->vcbFreeExt[tempWord-1]; + if (really_add && (vcb->hfs_flags & HFS_HAS_SPARSE_DEVICE)) { + // Sorted by starting block + if (tempWord == kMaxFreeExtents && vcb->vcbFreeExt[kMaxFreeExtents-1].startBlock > firstBlock) --tempWord; + if (tempWord < kMaxFreeExtents) + { + // We're going to add this extent. Bubble any smaller extents down in the list. + while (tempWord && vcb->vcbFreeExt[tempWord-1].startBlock > firstBlock) + { + vcb->vcbFreeExt[tempWord] = vcb->vcbFreeExt[tempWord-1]; + --tempWord; + } + vcb->vcbFreeExt[tempWord].startBlock = firstBlock; + vcb->vcbFreeExt[tempWord].blockCount = foundBlocks; + + if (vcb->vcbFreeExtCnt < kMaxFreeExtents) { + ++vcb->vcbFreeExtCnt; + } + updated_free_extents = 1; } - vcb->vcbFreeExt[tempWord].startBlock = firstBlock; - vcb->vcbFreeExt[tempWord].blockCount = foundBlocks; + } else if (really_add) { + // Sorted by blockCount + if (tempWord == kMaxFreeExtents && vcb->vcbFreeExt[kMaxFreeExtents-1].blockCount < foundBlocks) + --tempWord; + if (tempWord < kMaxFreeExtents) + { + // We're going to add this extent. Bubble any smaller extents down in the list. + while (tempWord && vcb->vcbFreeExt[tempWord-1].blockCount < foundBlocks) + { + vcb->vcbFreeExt[tempWord] = vcb->vcbFreeExt[tempWord-1]; + --tempWord; + } + vcb->vcbFreeExt[tempWord].startBlock = firstBlock; + vcb->vcbFreeExt[tempWord].blockCount = foundBlocks; - if (vcb->vcbFreeExtCnt < kMaxFreeExtents) - ++vcb->vcbFreeExtCnt; + if (vcb->vcbFreeExtCnt < kMaxFreeExtents) { + ++vcb->vcbFreeExtCnt; + } + updated_free_extents = 1; + } } + + sanity_check_free_ext(vcb, 0); + } while (currentBlock < stopBlock); LoopExit: @@ -1663,15 +1862,38 @@ static OSErr BlockFindContiguous( * Sanity check for overflow */ if ((firstBlock + foundBlocks) > vcb->allocLimit) { - panic("blk allocation overflow on \"%s\" sb:0x%08x eb:0x%08x cb:0x%08x fb:0x%08x stop:0x%08x min:0x%08x found:0x%08x", + panic("hfs: blk allocation overflow on \"%s\" sb:0x%08x eb:0x%08x cb:0x%08x fb:0x%08x stop:0x%08x min:0x%08x found:0x%08x", vcb->vcbVN, startingBlock, endingBlock, currentBlock, firstBlock, stopBlock, minBlocks, foundBlocks); } } + if (updated_free_extents && (vcb->hfs_flags & HFS_HAS_SPARSE_DEVICE)) { + int i; + u_int32_t min_start = vcb->totalBlocks; + + // set the nextAllocation pointer to the smallest free block number + // we've seen so on the next mount we won't rescan unnecessarily + for(i=0; i < (int)vcb->vcbFreeExtCnt; i++) { + if (vcb->vcbFreeExt[i].startBlock < min_start) { + min_start = vcb->vcbFreeExt[i].startBlock; + } + } + if (min_start != vcb->totalBlocks) { + if (min_start < vcb->nextAllocation) { + vcb->nextAllocation = min_start; + } + if (min_start < vcb->sparseAllocation) { + vcb->sparseAllocation = min_start; + } + } + } + if (buffer) (void) ReleaseBitmapBlock(vcb, blockRef, false); + sanity_check_free_ext(vcb, 1); + return err; } @@ -1682,7 +1904,7 @@ static OSErr BlockFindContiguous( */ __private_extern__ int -hfs_isallocated(struct hfsmount *hfsmp, u_long startingBlock, u_long numBlocks) +hfs_isallocated(struct hfsmount *hfsmp, u_int32_t startingBlock, u_int32_t numBlocks) { u_int32_t *currentWord; // Pointer to current word within bitmap block u_int32_t wordsLeft; // Number of words left in this bitmap block @@ -1690,7 +1912,7 @@ hfs_isallocated(struct hfsmount *hfsmp, u_long startingBlock, u_long numBlocks) u_int32_t firstBit; // Bit index within word of first bit to allocate u_int32_t numBits; // Number of bits in word to allocate u_int32_t *buffer = NULL; - u_int32_t blockRef; + uintptr_t blockRef; u_int32_t bitsPerBlock; u_int32_t wordsPerBlock; int inuse = 0; diff --git a/bsd/hfs/hfscommon/Unicode/UnicodeWrappers.c b/bsd/hfs/hfscommon/Unicode/UnicodeWrappers.c index 4436f0146..2eece14e1 100644 --- a/bsd/hfs/hfscommon/Unicode/UnicodeWrappers.c +++ b/bsd/hfs/hfscommon/Unicode/UnicodeWrappers.c @@ -121,7 +121,7 @@ GetFilenameExtension(ItemCount length, ConstUniCharArrayPtr unicodeStr, char * e /* * Count filename extension characters (if any) */ -static u_int32_t +__private_extern__ u_int32_t CountFilenameExtensionChars( const unsigned char * filename, u_int32_t length ) { u_int32_t i; diff --git a/bsd/hfs/hfscommon/headers/BTreesInternal.h b/bsd/hfs/hfscommon/headers/BTreesInternal.h index 4e2a1df12..c683d0b74 100644 --- a/bsd/hfs/hfscommon/headers/BTreesInternal.h +++ b/bsd/hfs/hfscommon/headers/BTreesInternal.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2008 Apple Inc. All rights reserved. + * Copyright (c) 2000-2009 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -361,6 +361,7 @@ extern int BTReserveSpace(FCB *file, int operations, void * data); extern int BTReleaseReserve(FCB *file, void * data); +extern int BTZeroUnusedNodes(FCB *file); #endif /* __APPLE_API_PRIVATE */ #endif /* KERNEL */ diff --git a/bsd/hfs/hfscommon/headers/BTreesPrivate.h b/bsd/hfs/hfscommon/headers/BTreesPrivate.h index 6b7a1eb03..3b8dd7ac1 100644 --- a/bsd/hfs/hfscommon/headers/BTreesPrivate.h +++ b/bsd/hfs/hfscommon/headers/BTreesPrivate.h @@ -289,7 +289,7 @@ typedef BTreeKeyPtr KeyPtr; //////////////////////////////////// Macros ///////////////////////////////////// #if DEBUG_BUILD - #define Panic( message ) DebugStr( (ConstStr255Param) message ) + #define Panic( message ) DebugStr( message ) #define PanicIf( condition, message ) do { if ( condition != 0 ) DebugStr( message ); } while(0) #else #define Panic( message ) do { } while(0) diff --git a/bsd/hfs/hfscommon/headers/CatalogPrivate.h b/bsd/hfs/hfscommon/headers/CatalogPrivate.h index 319cd0ad9..bd3f00ddd 100644 --- a/bsd/hfs/hfscommon/headers/CatalogPrivate.h +++ b/bsd/hfs/hfscommon/headers/CatalogPrivate.h @@ -90,9 +90,6 @@ // -extern OSErr LocateCatalogNode( const ExtendedVCB *volume, HFSCatalogNodeID folderID, const CatalogName *name, - u_int32_t hint, CatalogKey *key, CatalogRecord *data, u_int32_t *newHint); - extern OSErr LocateCatalogNodeByKey ( const ExtendedVCB *volume, u_int32_t hint, CatalogKey *keyPtr, CatalogRecord *dataPtr, u_int32_t *newHint ); diff --git a/bsd/hfs/hfscommon/headers/FileMgrInternal.h b/bsd/hfs/hfscommon/headers/FileMgrInternal.h index 7bb16bc5a..2f199e2c7 100644 --- a/bsd/hfs/hfscommon/headers/FileMgrInternal.h +++ b/bsd/hfs/hfscommon/headers/FileMgrInternal.h @@ -52,26 +52,10 @@ #include "../../hfs_cnode.h" -#if PRAGMA_ONCE -#pragma once -#endif - #ifdef __cplusplus extern "C" { #endif -#if PRAGMA_IMPORT -#pragma import on -#endif - -#if PRAGMA_STRUCT_ALIGN - #pragma options align=mac68k -#elif PRAGMA_STRUCT_PACKPUSH - #pragma pack(push, 2) -#elif PRAGMA_STRUCT_PACK - #pragma pack(2) -#endif - /* CatalogNodeID is used to track catalog objects */ typedef u_int32_t HFSCatalogNodeID; @@ -211,15 +195,6 @@ ExchangeFileIDs (ExtendedVCB * volume, typedef CALLBACK_API_C( int32_t , KeyCompareProcPtr )(void *a, void *b); -EXTERN_API_C( OSErr ) -SearchBTreeRecord (FileReference refNum, - const void * key, - u_int32_t hint, - void * foundKey, - void * data, - u_int16_t * dataSize, - u_int32_t * newHint); - EXTERN_API_C( OSErr ) ReplaceBTreeRecord (FileReference refNum, const void * key, @@ -313,20 +288,6 @@ EXTERN_API_C( u_int32_t ) UTCToLocal (u_int32_t utcTime); -#if PRAGMA_STRUCT_ALIGN - #pragma options align=reset -#elif PRAGMA_STRUCT_PACKPUSH - #pragma pack(pop) -#elif PRAGMA_STRUCT_PACK - #pragma pack() -#endif - -#ifdef PRAGMA_IMPORT_OFF -#pragma import off -#elif PRAGMA_IMPORT -#pragma import reset -#endif - #ifdef __cplusplus } #endif diff --git a/bsd/hfs/hfscommon/headers/HFSUnicodeWrappers.h b/bsd/hfs/hfscommon/headers/HFSUnicodeWrappers.h index 8fbb287f9..8508f5087 100644 --- a/bsd/hfs/hfscommon/headers/HFSUnicodeWrappers.h +++ b/bsd/hfs/hfscommon/headers/HFSUnicodeWrappers.h @@ -111,6 +111,7 @@ extern int32_t FastRelString( ConstStr255Param str1, ConstStr255Param str2 ); extern HFSCatalogNodeID GetEmbeddedFileID( ConstStr31Param filename, u_int32_t length, u_int32_t *prefixLength ); +extern u_int32_t CountFilenameExtensionChars( const unsigned char * filename, u_int32_t length ); #endif /* __APPLE_API_PRIVATE */ #endif /* KERNEL */ diff --git a/bsd/hfs/rangelist.c b/bsd/hfs/rangelist.c index e21a962dd..74ced2e58 100644 --- a/bsd/hfs/rangelist.c +++ b/bsd/hfs/rangelist.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2001 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2001, 2006-2008 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -45,17 +45,13 @@ static void rl_collapse_neighbors(struct rl_head *rangelist, struct rl_entry *ra static void rl_verify(struct rl_head *rangelist) { struct rl_entry *entry; + struct rl_entry *next; off_t limit = 0; - if (CIRCLEQ_EMPTY(rangelist)) return; - entry = CIRCLEQ_FIRST(rangelist); - while (1) { - if (CIRCLEQ_NEXT(entry, rl_link) == entry) panic("rl_verify: circular rangelist?!"); - if ((limit > 0) && (entry->rl_start <= limit)) panic("rl_verify: bad entry start?!"); - if (entry->rl_end < entry->rl_start) panic("rl_verify: bad entry end?!"); + TAILQ_FOREACH_SAFE(rangelist, entry, rl_link, next) { + if ((limit > 0) && (entry->rl_start <= limit)) panic("hfs: rl_verify: bad entry start?!"); + if (entry->rl_end < entry->rl_start) panic("hfs: rl_verify: bad entry end?!"); limit = entry->rl_end; - if (entry == CIRCLEQ_LAST(rangelist)) return; - entry = CIRCLEQ_NEXT(entry, rl_link); }; } #endif @@ -68,7 +64,7 @@ rl_verify(struct rl_head *rangelist) { void rl_init(struct rl_head *rangelist) { - CIRCLEQ_INIT(rangelist); + TAILQ_INIT(rangelist); } @@ -84,7 +80,7 @@ rl_add(off_t start, off_t end, struct rl_head *rangelist) enum rl_overlaptype ovcase; #ifdef RL_DIAGNOSTIC - if (end < start) panic("rl_add: end < start?!"); + if (end < start) panic("hfs: rl_add: end < start?!"); #endif ovcase = rl_scan(rangelist, start, end, &overlap); @@ -112,9 +108,9 @@ rl_add(off_t start, off_t end, struct rl_head *rangelist) /* Link in the new range: */ if (overlap) { - CIRCLEQ_INSERT_AFTER(rangelist, overlap, range, rl_link); + TAILQ_INSERT_AFTER(rangelist, overlap, range, rl_link); } else { - CIRCLEQ_INSERT_HEAD(rangelist, range, rl_link); + TAILQ_INSERT_HEAD(rangelist, range, rl_link); } /* Check to see if any ranges can be combined (possibly including the immediately @@ -174,22 +170,22 @@ void rl_remove(off_t start, off_t end, struct rl_head *rangelist) { struct rl_entry *range, *next_range, *overlap, *splitrange; - int ovcase, moretotest; + int ovcase; #ifdef RL_DIAGNOSTIC - if (end < start) panic("rl_remove: end < start?!"); + if (end < start) panic("hfs: rl_remove: end < start?!"); #endif - if (CIRCLEQ_EMPTY(rangelist)) { + if (TAILQ_EMPTY(rangelist)) { return; }; - range = CIRCLEQ_FIRST(rangelist); + range = TAILQ_FIRST(rangelist); while ((ovcase = rl_scan_from(rangelist, start, end, &overlap, range))) { switch (ovcase) { case RL_MATCHINGOVERLAP: /* 1: overlap == range */ - CIRCLEQ_REMOVE(rangelist, overlap, rl_link); + TAILQ_REMOVE(rangelist, overlap, rl_link); FREE(overlap, M_TEMP); break; @@ -215,33 +211,26 @@ rl_remove(off_t start, off_t end, struct rl_head *rangelist) /* * Now link the new entry into the range list after the range from which it was split: */ - CIRCLEQ_INSERT_AFTER(rangelist, overlap, splitrange, rl_link); + TAILQ_INSERT_AFTER(rangelist, overlap, splitrange, rl_link); break; case RL_OVERLAPISCONTAINED: /* 3: range contains overlap */ - moretotest = (overlap != CIRCLEQ_LAST(rangelist)); -#ifdef RL_DIAGNOSTIC - if (CIRCLEQ_NEXT(overlap, rl_link) == overlap) panic("rl_remove: circular range list?!"); -#endif - next_range = CIRCLEQ_NEXT(overlap, rl_link); /* Check before discarding overlap entry */ - CIRCLEQ_REMOVE(rangelist, overlap, rl_link); + /* Check before discarding overlap entry */ + next_range = TAILQ_NEXT(overlap, rl_link); + TAILQ_REMOVE(rangelist, overlap, rl_link); FREE(overlap, M_TEMP); - if (moretotest) { + if (next_range) { range = next_range; continue; }; break; case RL_OVERLAPSTARTSBEFORE: /* 4: overlap starts before range */ - moretotest = (overlap != CIRCLEQ_LAST(rangelist)); overlap->rl_end = start - 1; - if (moretotest) { -#ifdef RL_DIAGNOSTIC - if (CIRCLEQ_NEXT(overlap, rl_link) == overlap) panic("rl_remove: circular range list?!"); -#endif - range = CIRCLEQ_NEXT(overlap, rl_link); + range = TAILQ_NEXT(overlap, rl_link); + if (range) { continue; - }; + } break; case RL_OVERLAPENDSAFTER: /* 5: overlap ends after range */ @@ -271,12 +260,12 @@ rl_scan(struct rl_head *rangelist, off_t end, struct rl_entry **overlap) { - if (CIRCLEQ_EMPTY(rangelist)) { + if (TAILQ_EMPTY(rangelist)) { *overlap = NULL; return RL_NOOVERLAP; }; - return rl_scan_from(rangelist, start, end, overlap, CIRCLEQ_FIRST(rangelist)); + return rl_scan_from(rangelist, start, end, overlap, TAILQ_FIRST(rangelist)); } @@ -295,7 +284,7 @@ rl_scan_from(struct rl_head *rangelist, struct rl_entry **overlap, struct rl_entry *range) { - if (CIRCLEQ_EMPTY(rangelist)) { + if (TAILQ_EMPTY(rangelist)) { *overlap = NULL; return RL_NOOVERLAP; }; @@ -325,14 +314,13 @@ rl_scan_from(struct rl_head *rangelist, return RL_NOOVERLAP; }; + range = TAILQ_NEXT(range, rl_link); /* Check the other entries in the list: */ - if (range == CIRCLEQ_LAST(rangelist)) { + if (range == NULL) { return RL_NOOVERLAP; - }; -#ifdef RL_DIAGNOSTIC - if (CIRCLEQ_NEXT(range, rl_link) == range) panic("rl_scan_from: circular range list?!"); -#endif - *overlap = range = CIRCLEQ_NEXT(range, rl_link); + } + + *overlap = range; continue; } @@ -370,7 +358,7 @@ rl_scan_from(struct rl_head *rangelist, /* Control should never reach here... */ #ifdef RL_DIAGNOSTIC - panic("rl_scan_from: unhandled overlap condition?!"); + panic("hfs: rl_scan_from: unhandled overlap condition?!"); #endif } @@ -380,28 +368,22 @@ rl_scan_from(struct rl_head *rangelist, static void rl_collapse_forwards(struct rl_head *rangelist, struct rl_entry *range) { - struct rl_entry *next_range; - - while (1) { - if (range == CIRCLEQ_LAST(rangelist)) return; - -#ifdef RL_DIAGNOSTIC - if (CIRCLEQ_NEXT(range, rl_link) == range) panic("rl_collapse_forwards: circular range list?!"); -#endif - next_range = CIRCLEQ_NEXT(range, rl_link); - if ((range->rl_end != RL_INFINITY) && (range->rl_end < next_range->rl_start - 1)) return; + struct rl_entry *next_range; + + while ((next_range = TAILQ_NEXT(range, rl_link))) { + if ((range->rl_end != RL_INFINITY) && (range->rl_end < next_range->rl_start - 1)) return; - /* Expand this range to include the next range: */ - range->rl_end = next_range->rl_end; - - /* Remove the now covered range from the list: */ - CIRCLEQ_REMOVE(rangelist, next_range, rl_link); - FREE(next_range, M_TEMP); + /* Expand this range to include the next range: */ + range->rl_end = next_range->rl_end; + + /* Remove the now covered range from the list: */ + TAILQ_REMOVE(rangelist, next_range, rl_link); + FREE(next_range, M_TEMP); #ifdef RL_DIAGNOSTIC rl_verify(rangelist); #endif - }; + }; } @@ -410,14 +392,8 @@ static void rl_collapse_backwards(struct rl_head *rangelist, struct rl_entry *range) { struct rl_entry *prev_range; - while (1) { - if (range == CIRCLEQ_FIRST(rangelist)) return; - -#ifdef RL_DIAGNOSTIC - if (CIRCLEQ_PREV(range, rl_link) == range) panic("rl_collapse_backwards: circular range list?!"); -#endif - prev_range = CIRCLEQ_PREV(range, rl_link); - if (prev_range->rl_end < range->rl_start - 1) { + while ((prev_range = TAILQ_PREV(range, rl_head, rl_link))) { + if (prev_range->rl_end < range->rl_start -1) { #ifdef RL_DIAGNOSTIC rl_verify(rangelist); #endif @@ -428,7 +404,7 @@ rl_collapse_backwards(struct rl_head *rangelist, struct rl_entry *range) { range->rl_start = prev_range->rl_start; /* Remove the now covered range from the list: */ - CIRCLEQ_REMOVE(rangelist, prev_range, rl_link); + TAILQ_REMOVE(rangelist, prev_range, rl_link); FREE(prev_range, M_TEMP); }; } diff --git a/bsd/hfs/rangelist.h b/bsd/hfs/rangelist.h index a859d222a..7cfa0e8d9 100644 --- a/bsd/hfs/rangelist.h +++ b/bsd/hfs/rangelist.h @@ -46,10 +46,10 @@ enum rl_overlaptype { #define RL_INFINITY ((off_t)-1) -CIRCLEQ_HEAD(rl_head, rl_entry); +TAILQ_HEAD(rl_head, rl_entry); struct rl_entry { - CIRCLEQ_ENTRY(rl_entry) rl_link; + TAILQ_ENTRY(rl_entry) rl_link; off_t rl_start; off_t rl_end; }; diff --git a/bsd/i386/Makefile b/bsd/i386/Makefile index 9f8e9e84b..96c9ddbb8 100644 --- a/bsd/i386/Makefile +++ b/bsd/i386/Makefile @@ -9,12 +9,12 @@ include $(MakeInc_def) DATAFILES = \ endian.h fasttrap_isa.h param.h \ - profile.h setjmp.h signal.h \ + profile.h setjmp.h signal.h limits.h _limits.h \ types.h vmparam.h _structs.h _types.h _param.h KERNELFILES = \ endian.h param.h \ - profile.h setjmp.h signal.h \ + profile.h setjmp.h signal.h limits.h _limits.h \ types.h vmparam.h _structs.h _types.h _param.h diff --git a/bsd/i386/_limits.h b/bsd/i386/_limits.h new file mode 100644 index 000000000..3b9e7a6f7 --- /dev/null +++ b/bsd/i386/_limits.h @@ -0,0 +1,27 @@ +/* + * Copyright (c) 2004 Apple Computer, Inc. All rights reserved. + * + * @APPLE_LICENSE_HEADER_START@ + * + * The contents of this file constitute Original Code as defined in and + * are subject to the Apple Public Source License Version 1.1 (the + * "License"). You may not use this file except in compliance with the + * License. Please obtain a copy of the License at + * http://www.apple.com/publicsource and read it before using this file. + * + * This Original Code and all software distributed under the License are + * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the + * License for the specific language governing rights and limitations + * under the License. + * + * @APPLE_LICENSE_HEADER_END@ + */ +#ifndef _I386__LIMITS_H_ +#define _I386__LIMITS_H_ + +#define __DARWIN_CLK_TCK 100 /* ticks per second */ + +#endif /* _I386__LIMITS_H_ */ diff --git a/bsd/i386/_param.h b/bsd/i386/_param.h index 71bf27959..61f0d24c3 100644 --- a/bsd/i386/_param.h +++ b/bsd/i386/_param.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2004, 2008 Apple Inc. All rights reserved. + * Copyright (c) 2008 Apple Computer, Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -37,9 +37,10 @@ * cast to any desired pointer type. */ #define __DARWIN_ALIGNBYTES (sizeof(__darwin_size_t) - 1) -#define __DARWIN_ALIGN(p) ((__darwin_size_t)((char *)(__darwin_intptr_t)(p) + __DARWIN_ALIGNBYTES) &~ __DARWIN_ALIGNBYTES) +#define __DARWIN_ALIGN(p) ((__darwin_size_t)((char *)(__darwin_size_t)(p) + __DARWIN_ALIGNBYTES) &~ __DARWIN_ALIGNBYTES) + +#define __DARWIN_ALIGNBYTES32 (sizeof(__uint32_t) - 1) +#define __DARWIN_ALIGN32(p) ((__darwin_size_t)((char *)(__darwin_size_t)(p) + __DARWIN_ALIGNBYTES32) &~ __DARWIN_ALIGNBYTES32) -#define __DARWIN_ALIGNBYTES32 (sizeof(__uint32_t) - 1) -#define __DARWIN_ALIGN32(p) ((__darwin_size_t)((char *)(__darwin_intptr_t)(p) + __DARWIN_ALIGNBYTES32) &~ __DARWIN_ALIGNBYTES32) #endif /* _I386__PARAM_H_ */ diff --git a/bsd/i386/dis_tables.h b/bsd/i386/dis_tables.h index 548b22b60..a0db708fe 100644 --- a/bsd/i386/dis_tables.h +++ b/bsd/i386/dis_tables.h @@ -19,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -30,7 +30,7 @@ #ifndef _DIS_TABLES_H #define _DIS_TABLES_H -/* #pragma ident "@(#)dis_tables.h 1.8 06/05/30 SMI" */ +/* #pragma ident "@(#)dis_tables.h 1.10 07/07/10 SMI" */ /* * Constants and prototypes for the IA32 disassembler backend. See dis_tables.c @@ -87,7 +87,7 @@ typedef struct dis86 { uint_t d86_opnd_size; uint_t d86_addr_size; uint_t d86_got_modrm; - struct d86opnd d86_opnd[3]; /* up to 3 operands */ + struct d86opnd d86_opnd[4]; /* up to 4 operands */ int (*d86_check_func)(void *); int (*d86_get_byte)(void *); #ifdef DIS_TEXT @@ -102,6 +102,7 @@ typedef struct dis86 { extern int dtrace_disx86(dis86_t *x, uint_t cpu_mode); #define DIS_F_OCTAL 0x1 /* Print all numbers in octal */ +#define DIS_F_NOIMMSYM 0x2 /* Don't print symbols for immediates (.o) */ #ifdef DIS_TEXT extern void dtrace_disx86_str(dis86_t *x, uint_t cpu_mode, uint64_t pc, diff --git a/EXTERNAL_HEADERS/i386/limits.h b/bsd/i386/limits.h similarity index 62% rename from EXTERNAL_HEADERS/i386/limits.h rename to bsd/i386/limits.h index 50bdd3c69..9ee7c03be 100644 --- a/EXTERNAL_HEADERS/i386/limits.h +++ b/bsd/i386/limits.h @@ -1,30 +1,3 @@ -/* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ - * - * This file contains Original Code and/or Modifications of Original Code - * as defined in and that are subject to the Apple Public Source License - * Version 2.0 (the 'License'). You may not use this file except in - * compliance with the License. The rights granted to you under the License - * may not be used to create, or enable the creation or redistribution of, - * unlawful or unlicensed copies of an Apple operating system, or to - * circumvent, violate, or enable the circumvention or violation of, any - * terms of an Apple operating system software license agreement. - * - * Please obtain a copy of the License at - * http://www.opensource.apple.com/apsl/ and read it before using this file. - * - * The Original Code and all software distributed under the License are - * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER - * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, - * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. - * Please see the License for the specific language governing rights and - * limitations under the License. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ - */ /* * Copyright (c) 1988, 1993 * The Regents of the University of California. All rights reserved. @@ -59,24 +32,19 @@ * * @(#)limits.h 8.3 (Berkeley) 1/4/94 */ -/* - * HISTORY - * - * 10-July-97 Umesh Vaishampayan (umeshv@apple.com) - * Avoid multiple includes. - */ #ifndef _I386_LIMITS_H_ #define _I386_LIMITS_H_ +#include #include #define CHAR_BIT 8 /* number of bits in a char */ #define MB_LEN_MAX 6 /* Allow 31 bit UTF2 */ -#ifndef CLK_TCK +#if !defined(_ANSI_SOURCE) && (!defined(_POSIX_C_SOURCE) || defined(_DARWIN_C_SOURCE)) #define CLK_TCK __DARWIN_CLK_TCK /* ticks per second */ -#endif +#endif /* !_ANSI_SOURCE && (!_POSIX_C_SOURCE || _DARWIN_C_SOURCE) */ /* * According to ANSI (section 2.2.4.2), the values below must be usable by @@ -103,23 +71,37 @@ #define INT_MAX 2147483647 /* max value for an int */ #define INT_MIN (-2147483647-1) /* min value for an int */ -#define ULONG_MAX 0xffffffff /* max value for an unsigned long */ -#define LONG_MAX 2147483647 /* max value for a long */ -#define LONG_MIN (-2147483647-1) /* min value for a long */ +#ifdef __LP64__ +#define ULONG_MAX 0xffffffffffffffffUL /* max unsigned long */ +#define LONG_MAX 0x7fffffffffffffffL /* max signed long */ +#define LONG_MIN (-0x7fffffffffffffffL-1) /* min signed long */ +#else /* !__LP64__ */ +#define ULONG_MAX 0xffffffffUL /* max unsigned long */ +#define LONG_MAX 2147483647L /* max signed long */ +#define LONG_MIN (-2147483647L-1) /* min signed long */ +#endif /* __LP64__ */ + +#define ULLONG_MAX 0xffffffffffffffffULL /* max unsigned long long */ +#define LLONG_MAX 0x7fffffffffffffffLL /* max signed long long */ +#define LLONG_MIN (-0x7fffffffffffffffLL-1) /* min signed long long */ #if !defined(_ANSI_SOURCE) -#define SSIZE_MAX INT_MAX /* max value for a ssize_t */ +#ifdef __LP64__ +#define LONG_BIT 64 +#else /* !__LP64__ */ +#define LONG_BIT 32 +#endif /* __LP64__ */ +#define SSIZE_MAX LONG_MAX /* max value for a ssize_t */ +#define WORD_BIT 32 -#if !defined(_POSIX_SOURCE) -#define SIZE_T_MAX UINT_MAX /* max value for a size_t */ +#if (!defined(_POSIX_C_SOURCE) && !defined(_XOPEN_SOURCE)) || defined(_DARWIN_C_SOURCE) +#define SIZE_T_MAX ULONG_MAX /* max value for a size_t */ -/* GCC requires that quad constants be written as expressions. */ -#define UQUAD_MAX ((u_quad_t)0-1) /* max value for a uquad_t */ - /* max value for a quad_t */ -#define QUAD_MAX ((quad_t)(UQUAD_MAX >> 1)) -#define QUAD_MIN (-QUAD_MAX-1) /* min value for a quad_t */ +#define UQUAD_MAX ULLONG_MAX +#define QUAD_MAX LLONG_MAX +#define QUAD_MIN LLONG_MIN -#endif /* !_POSIX_SOURCE */ +#endif /* (!_POSIX_C_SOURCE && !_XOPEN_SOURCE) || _DARWIN_C_SOURCE */ #endif /* !_ANSI_SOURCE */ #endif /* _I386_LIMITS_H_ */ diff --git a/bsd/i386/types.h b/bsd/i386/types.h index d02ab0bb2..3e31ca89c 100644 --- a/bsd/i386/types.h +++ b/bsd/i386/types.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2006 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2008 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -28,7 +28,7 @@ /* * Copyright 1995 NeXT Computer, Inc. All rights reserved. */ -/*- +/* * Copyright (c) 1990, 1993 * The Regents of the University of California. All rights reserved. * @@ -106,7 +106,7 @@ typedef __darwin_intptr_t intptr_t; #endif #ifndef _UINTPTR_T #define _UINTPTR_T -typedef unsigned long int uintptr_t; +typedef unsigned long uintptr_t; #endif #if !defined(_ANSI_SOURCE) && (!defined(_POSIX_C_SOURCE) || defined(_DARWIN_C_SOURCE)) @@ -117,8 +117,46 @@ typedef int64_t user_ssize_t; typedef int64_t user_long_t; typedef u_int64_t user_ulong_t; typedef int64_t user_time_t; +typedef int64_t user_off_t; #define USER_ADDR_NULL ((user_addr_t) 0) #define CAST_USER_ADDR_T(a_ptr) ((user_addr_t)((uintptr_t)(a_ptr))) + +#ifdef KERNEL + +/* + * These types are used when you know the word size of the target + * user process. They can be used to create struct layouts independent + * of the types and alignment requirements of the current running + * kernel. + */ + +/* + * The default ABI for the 32-bit Intel userspace aligns fundamental + * integral data types to their natural boundaries, with a maximum alignment + * of 4, even for 8-byte quantites. The default ABI for 64-bit Intel + * userspace aligns fundamental integral data types for their natural + * boundaries, including those in composite data types. PowerPC applications + * running under translation must conform to the 32-bit Intel ABI. + */ + +typedef __uint64_t user64_addr_t __attribute__((aligned(8))); +typedef __uint64_t user64_size_t __attribute__((aligned(8))); +typedef __int64_t user64_ssize_t __attribute__((aligned(8))); +typedef __int64_t user64_long_t __attribute__((aligned(8))); +typedef __uint64_t user64_ulong_t __attribute__((aligned(8))); +typedef __int64_t user64_time_t __attribute__((aligned(8))); +typedef __int64_t user64_off_t __attribute__((aligned(8))); + +typedef __uint32_t user32_addr_t; +typedef __uint32_t user32_size_t; +typedef __int32_t user32_ssize_t; +typedef __int32_t user32_long_t; +typedef __uint32_t user32_ulong_t; +typedef __int32_t user32_time_t; +typedef __int64_t user32_off_t __attribute__((aligned(4))); + +#endif /* KERNEL */ + #endif /* !_ANSI_SOURCE && (!_POSIX_C_SOURCE || _DARWIN_C_SOURCE) */ /* This defines the size of syscall arguments after copying into the kernel: */ diff --git a/bsd/i386/vmparam.h b/bsd/i386/vmparam.h index 37a73b4c3..b6389b47d 100644 --- a/bsd/i386/vmparam.h +++ b/bsd/i386/vmparam.h @@ -41,7 +41,7 @@ * Virtual memory related constants, all in bytes */ #ifndef DFLDSIZ -#define DFLDSIZ (6*1024*1024) /* initial data size limit */ +#define DFLDSIZ (RLIM_INFINITY) /* initial data size limit */ #endif #ifndef MAXDSIZ #define MAXDSIZ (RLIM_INFINITY) /* max data size */ diff --git a/bsd/isofs/cd9660/TODO b/bsd/isofs/cd9660/TODO deleted file mode 100644 index 7951ff87c..000000000 --- a/bsd/isofs/cd9660/TODO +++ /dev/null @@ -1,47 +0,0 @@ -# $NetBSD: TODO,v 1.4 1994/07/19 11:34:48 mycroft Exp $ - - 1) should understand "older", original High Sierra ("CDROM001") type - - Not yet. ( I don't have this technical information, yet. ) - - 2) should understand Rock Ridge - - Yes, we have follows function. - - o Symbolic Link - o Real Name(long name) - o File Attribute - o Time stamp - o uid, gid - o Devices - o Relocated directories - - Except follows: - - o POSIX device number mapping - - There is some preliminary stuff in there that (ab-)uses the mknod - system call, but this needs a writable filesystem - - 5) should have name translation enabled by mount flag - - Yes. we can disable the Rock Ridge Extension by follows option; - - "mount -t isofs -o -norrip /dev/cd0d /cdrom" - - 6) should run as a user process, and not take up kernel space (cdroms - are slow) - - Not yet. - - 7) ECMA support. - - Not yet. we need not only a technical spec but also ECMA format - cd-rom itself! - - 8) Character set change by SVD ( multi SVD support ) - - Not yet. We should also hack the other part of system as 8 bit - clean. As far as I know, if you export the cdrom by NFS, the client - can access the 8 bit clean (ie. Solaris Japanese with EUC code ) - diff --git a/bsd/isofs/cd9660/TODO.hibler b/bsd/isofs/cd9660/TODO.hibler deleted file mode 100644 index c0de4da0a..000000000 --- a/bsd/isofs/cd9660/TODO.hibler +++ /dev/null @@ -1,9 +0,0 @@ -# $NetBSD: TODO.hibler,v 1.6 1994/12/13 22:33:10 mycroft Exp $ - -1. Investiate making ISOFS another UFS shared filesystem (ala FFS/MFS/LFS). - Since it was modelled after the inode code, we might be able to merge - them back. It looks like a seperate (but very similar) lookup routine - will be needed due to the associated file stuff. - -2. Seems like there should be a "notrans" or some such mount option to show - filenames as they really are without lower-casing. Does this make sense? diff --git a/bsd/isofs/cd9660/cd9660_bmap.c b/bsd/isofs/cd9660/cd9660_bmap.c deleted file mode 100644 index e34671f87..000000000 --- a/bsd/isofs/cd9660/cd9660_bmap.c +++ /dev/null @@ -1,167 +0,0 @@ -/* - * Copyright (c) 2000-2003 Apple Computer, Inc. All rights reserved. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ - * - * This file contains Original Code and/or Modifications of Original Code - * as defined in and that are subject to the Apple Public Source License - * Version 2.0 (the 'License'). You may not use this file except in - * compliance with the License. The rights granted to you under the License - * may not be used to create, or enable the creation or redistribution of, - * unlawful or unlicensed copies of an Apple operating system, or to - * circumvent, violate, or enable the circumvention or violation of, any - * terms of an Apple operating system software license agreement. - * - * Please obtain a copy of the License at - * http://www.opensource.apple.com/apsl/ and read it before using this file. - * - * The Original Code and all software distributed under the License are - * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER - * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, - * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. - * Please see the License for the specific language governing rights and - * limitations under the License. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ - */ -/* $NetBSD: cd9660_bmap.c,v 1.5 1994/12/13 22:33:12 mycroft Exp $ */ - -/*- - * Copyright (c) 1994 - * The Regents of the University of California. All rights reserved. - * - * This code is derived from software contributed to Berkeley - * by Pace Willisson (pace@blitz.com). The Rock Ridge Extension - * Support code is derived from software contributed to Berkeley - * by Atsushi Murai (amurai@spec.co.jp). - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. All advertising materials mentioning features or use of this software - * must display the following acknowledgement: - * This product includes software developed by the University of - * California, Berkeley and its contributors. - * 4. Neither the name of the University nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - * @(#)cd9660_bmap.c 8.4 (Berkeley) 12/5/94 - */ - -#include -#include -#include -#include -#include - -#include -#include - - -/* blktooff converts a logical block number to a file offset */ -int -cd9660_blktooff(struct vnop_blktooff_args *ap) -{ - register struct iso_node *ip; - register struct iso_mnt *imp; - - if (ap->a_vp == NULL) - return (EINVAL); - - ip = VTOI(ap->a_vp); - imp = ip->i_mnt; - - *ap->a_offset = (off_t)lblktosize(imp, ap->a_lblkno); - return (0); -} - -/* offtoblk converts a file offset to a logical block number */ -int -cd9660_offtoblk(struct vnop_offtoblk_args *ap) -{ - register struct iso_node *ip; - register struct iso_mnt *imp; - - if (ap->a_vp == NULL) - return (EINVAL); - - ip = VTOI(ap->a_vp); - imp = ip->i_mnt; - - *ap->a_lblkno = (daddr64_t)lblkno(imp, ap->a_offset); - return (0); -} - -int -cd9660_blockmap(struct vnop_blockmap_args *ap) -{ - struct iso_node *ip = VTOI(ap->a_vp); - size_t cbytes; - int devBlockSize = 0; - off_t offset = ap->a_foffset; - - /* - * Check for underlying vnode requests and ensure that logical - * to physical mapping is requested. - */ - if (ap->a_bpn == NULL) - return (0); - - devBlockSize = vfs_devblocksize(vnode_mount(ap->a_vp)); - - /* - * Associated files have an Apple Double header - */ - if (ip->i_flag & ISO_ASSOCIATED) { - if (offset < ADH_SIZE) { - if (ap->a_run) - *ap->a_run = 0; - *ap->a_bpn = (daddr64_t)-1; - goto out; - } else { - offset -= ADH_SIZE; - } - } - - *ap->a_bpn = (daddr64_t)(ip->iso_start + lblkno(ip->i_mnt, offset)); - - /* - * Determine maximum number of contiguous bytes following the - * requested offset. - */ - if (ap->a_run) { - if (ip->i_size > offset) - cbytes = ip->i_size - offset; - else - cbytes = 0; - - cbytes = (cbytes + (devBlockSize - 1)) & ~(devBlockSize - 1); - - *ap->a_run = MIN(cbytes, ap->a_size); - }; -out: - if (ap->a_poff) - *(int *)ap->a_poff = (long)offset & (devBlockSize - 1); - - return (0); -} - diff --git a/bsd/isofs/cd9660/cd9660_lookup.c b/bsd/isofs/cd9660/cd9660_lookup.c deleted file mode 100644 index 13f7ad5ed..000000000 --- a/bsd/isofs/cd9660/cd9660_lookup.c +++ /dev/null @@ -1,467 +0,0 @@ -/* - * Copyright (c) 2000-2003 Apple Computer, Inc. All rights reserved. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ - * - * This file contains Original Code and/or Modifications of Original Code - * as defined in and that are subject to the Apple Public Source License - * Version 2.0 (the 'License'). You may not use this file except in - * compliance with the License. The rights granted to you under the License - * may not be used to create, or enable the creation or redistribution of, - * unlawful or unlicensed copies of an Apple operating system, or to - * circumvent, violate, or enable the circumvention or violation of, any - * terms of an Apple operating system software license agreement. - * - * Please obtain a copy of the License at - * http://www.opensource.apple.com/apsl/ and read it before using this file. - * - * The Original Code and all software distributed under the License are - * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER - * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, - * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. - * Please see the License for the specific language governing rights and - * limitations under the License. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ - */ -/* $NetBSD: cd9660_lookup.c,v 1.13 1994/12/24 15:30:03 cgd Exp $ */ - -/*- - * Copyright (c) 1989, 1993, 1994 - * The Regents of the University of California. All rights reserved. - * - * This code is derived from software contributed to Berkeley - * by Pace Willisson (pace@blitz.com). The Rock Ridge Extension - * Support code is derived from software contributed to Berkeley - * by Atsushi Murai (amurai@spec.co.jp). - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. All advertising materials mentioning features or use of this software - * must display the following acknowledgement: - * This product includes software developed by the University of - * California, Berkeley and its contributors. - * 4. Neither the name of the University nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - * from: @(#)ufs_lookup.c 7.33 (Berkeley) 5/19/91 - * - * @(#)cd9660_lookup.c 8.5 (Berkeley) 12/5/94 - * - */ - -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include - -struct nchstats iso_nchstats; - -/* - * Convert a component of a pathname into a pointer to a locked inode. - * This is a very central and rather complicated routine. - * If the file system is not maintained in a strict tree hierarchy, - * this can result in a deadlock situation (see comments in code below). - * - * The flag argument is LOOKUP, CREATE, RENAME, or DELETE depending on - * whether the name is to be looked up, created, renamed, or deleted. - * When CREATE, RENAME, or DELETE is specified, information usable in - * creating, renaming, or deleting a directory entry may be calculated. - * If flag has LOCKPARENT or'ed into it and the target of the pathname - * exists, lookup returns both the target and its parent directory locked. - * When creating or renaming and LOCKPARENT is specified, the target may - * not be ".". When deleting and LOCKPARENT is specified, the target may - * be "."., but the caller must check to ensure it does an vrele and iput - * instead of two iputs. - * - * Overall outline of ufs_lookup: - * - * check accessibility of directory - * look for name in cache, if found, then if at end of path - * and deleting or creating, drop it, else return name - * search for name in directory, to found or notfound - * notfound: - * if creating, return locked directory, leaving info on available slots - * else return error - * found: - * if at end of path and deleting, return information to allow delete - * if at end of path and rewriting (RENAME and LOCKPARENT), lock target - * inode and return info to allow rewrite - * if not at end, add name to cache; if at end and neither creating - * nor deleting, add name to cache - * - * NOTE: (LOOKUP | LOCKPARENT) currently returns the parent inode unlocked. - */ -int -cd9660_lookup(struct vnop_lookup_args *ap) -{ - register struct vnode *vdp; /* vnode for directory being searched */ - register struct iso_node *dp; /* inode for directory being searched */ - register struct iso_mnt *imp; /* file system that directory is in */ - struct buf *bp; /* a buffer of directory entries */ - struct iso_directory_record *ep = NULL;/* the current directory entry */ - int entryoffsetinblock; /* offset of ep in bp's buffer */ - int saveoffset = 0; /* offset of last directory entry in dir */ - int numdirpasses; /* strategy for directory search */ - doff_t endsearch; /* offset to end directory search */ - struct vnode *pdp; /* saved dp during symlink work */ - struct vnode *tdp; /* returned by cd9660_vget_internal */ - u_long bmask; /* block offset mask */ - int lockparent; /* 1 => lockparent flag is set */ - int wantparent; /* 1 => wantparent or lockparent flag */ - int wantassoc; - int error; - ino_t ino = 0; - int reclen; - u_short namelen; - int isoflags; - char altname[ISO_RRIP_NAMEMAX]; - int res; - int len; - char *name; - struct vnode **vpp = ap->a_vpp; - struct componentname *cnp = ap->a_cnp; - int flags = cnp->cn_flags; - int nameiop = cnp->cn_nameiop; - vfs_context_t ctx = cnp->cn_context; - size_t altlen; - - bp = NULL; - *vpp = NULL; - vdp = ap->a_dvp; - dp = VTOI(vdp); - imp = dp->i_mnt; - lockparent = flags & LOCKPARENT; - wantparent = flags & (LOCKPARENT|WANTPARENT); - wantassoc = 0; - - - /* - * We now have a segment name to search for, and a directory to search. - * - * Before tediously performing a linear scan of the directory, - * check the name cache to see if the directory/name pair - * we are looking for is known already. - */ - if ((error = cache_lookup(vdp, vpp, cnp))) { - if (error == ENOENT) - return (error); - return (0); - } - - len = cnp->cn_namelen; - name = cnp->cn_nameptr; - altname[0] = '\0'; - /* - * A "._" prefix means, we are looking for an associated file - */ - if (imp->iso_ftype != ISO_FTYPE_RRIP && - *name == ASSOCCHAR1 && *(name+1) == ASSOCCHAR2) { - wantassoc = 1; - len -= 2; - name += 2; - } - /* - * Decode search name into UCS-2 (Unicode) - */ - if ((imp->iso_ftype == ISO_FTYPE_JOLIET) && - !((len == 1 && *name == '.') || (flags & ISDOTDOT))) { - int flags1 = UTF_PRECOMPOSED; - - (void) utf8_decodestr(name, len, (u_int16_t*) altname, &altlen, - sizeof(altname), 0, flags1); - name = altname; - len = altlen; - } - /* - * If there is cached information on a previous search of - * this directory, pick up where we last left off. - * We cache only lookups as these are the most common - * and have the greatest payoff. Caching CREATE has little - * benefit as it usually must search the entire directory - * to determine that the entry does not exist. Caching the - * location of the last DELETE or RENAME has not reduced - * profiling time and hence has been removed in the interest - * of simplicity. - */ - bmask = imp->im_sector_size - 1; - if (nameiop != LOOKUP || dp->i_diroff == 0 || - dp->i_diroff > dp->i_size) { - entryoffsetinblock = 0; - dp->i_offset = 0; - numdirpasses = 1; - } else { - dp->i_offset = dp->i_diroff; - - if ((entryoffsetinblock = dp->i_offset & bmask) && - (error = cd9660_blkatoff(vdp, SECTOFF(imp, dp->i_offset), NULL, &bp))) - return (error); - numdirpasses = 2; - iso_nchstats.ncs_2passes++; - } - endsearch = dp->i_size; - -searchloop: - while (dp->i_offset < endsearch) { - /* - * If offset is on a block boundary, - * read the next directory block. - * Release previous if it exists. - */ - if ((dp->i_offset & bmask) == 0) { - if (bp != NULL) - buf_brelse(bp); - if ( (error = cd9660_blkatoff(vdp, SECTOFF(imp,dp->i_offset), NULL, &bp)) ) - return (error); - entryoffsetinblock = 0; - } - /* - * Get pointer to next entry. - */ - ep = (struct iso_directory_record *) - ((char *)0 + buf_dataptr(bp) + entryoffsetinblock); - - reclen = isonum_711(ep->length); - if (reclen == 0) { - /* skip to next block, if any */ - dp->i_offset = - (dp->i_offset & ~bmask) + imp->im_sector_size; - continue; - } - - if (reclen < ISO_DIRECTORY_RECORD_SIZE) { - /* illegal entry, stop */ - break; - } - if (entryoffsetinblock + reclen > imp->im_sector_size) { - /* entries are not allowed to cross sector boundaries */ - break; - } - namelen = isonum_711(ep->name_len); - isoflags = isonum_711(ep->flags); - - if (reclen < ISO_DIRECTORY_RECORD_SIZE + namelen) - /* illegal entry, stop */ - break; - /* - * Check for a name match. - */ - if (imp->iso_ftype == ISO_FTYPE_RRIP) { - if (isoflags & directoryBit) - ino = isodirino(ep, imp); - else - ino = ((daddr_t)buf_blkno(bp) << imp->im_bshift) + entryoffsetinblock; - dp->i_ino = ino; - cd9660_rrip_getname(ep,altname,&namelen,&dp->i_ino,imp); - if (namelen == cnp->cn_namelen - && !bcmp(name,altname,namelen)) - goto found; - ino = 0; - } else { - if ((!(isoflags & associatedBit)) == !wantassoc) { - if ((len == 1 - && *name == '.') - || (flags & ISDOTDOT)) { - if (namelen == 1 - && ep->name[0] == ((flags & ISDOTDOT) ? 1 : 0)) { - /* - * Save directory entry's inode number and - * release directory buffer. - */ - dp->i_ino = isodirino(ep, imp); - goto found; - } - if (namelen != 1 - || ep->name[0] != 0) - goto notfound; - } else if (imp->iso_ftype != ISO_FTYPE_JOLIET && !(res = isofncmp(name, len, ep->name, namelen))) { - if ( isoflags & directoryBit ) - ino = isodirino(ep, imp); - else - ino = ((daddr_t)buf_blkno(bp) << imp->im_bshift) + entryoffsetinblock; - saveoffset = dp->i_offset; - } else if (imp->iso_ftype == ISO_FTYPE_JOLIET && !(res = ucsfncmp((u_int16_t*)name, len, - (u_int16_t*) ep->name, namelen))) { - if ( isoflags & directoryBit ) - ino = isodirino(ep, imp); - else - ino = ((daddr_t)buf_blkno(bp) << imp->im_bshift) + entryoffsetinblock; - saveoffset = dp->i_offset; - } else if (ino) - goto foundino; -#ifdef NOSORTBUG /* On some CDs directory entries are not sorted correctly */ - else if (res < 0) - goto notfound; - else if (res > 0 && numdirpasses == 2) - numdirpasses++; -#endif - } - } - dp->i_offset += reclen; - entryoffsetinblock += reclen; - } /* endwhile */ - - if (ino) { -foundino: - dp->i_ino = ino; - if (saveoffset != dp->i_offset) { - if (lblkno(imp, dp->i_offset) != - lblkno(imp, saveoffset)) { - if (bp != NULL) - buf_brelse(bp); - if ( (error = cd9660_blkatoff(vdp, SECTOFF(imp, saveoffset), NULL, &bp)) ) - return (error); - } - entryoffsetinblock = saveoffset & bmask; - ep = (struct iso_directory_record *) - ((char *)0 + buf_dataptr(bp) + entryoffsetinblock); - dp->i_offset = saveoffset; - } - goto found; - } -notfound: - /* - * If we started in the middle of the directory and failed - * to find our target, we must check the beginning as well. - */ - if (numdirpasses == 2) { - numdirpasses--; - dp->i_offset = 0; - endsearch = dp->i_diroff; - goto searchloop; - } - if (bp != NULL) - buf_brelse(bp); - - /* - * Insert name into cache (as non-existent) if appropriate. - */ - if (cnp->cn_flags & MAKEENTRY) - cache_enter(vdp, *vpp, cnp); - return (ENOENT); - -found: - if (numdirpasses == 2) - iso_nchstats.ncs_pass2++; - - /* - * Found component in pathname. - * If the final component of path name, save information - * in the cache as to where the entry was found. - */ - if ((flags & ISLASTCN) && nameiop == LOOKUP) - dp->i_diroff = dp->i_offset; - - /* - * Step through the translation in the name. We do not `iput' the - * directory because we may need it again if a symbolic link - * is relative to the current directory. Instead we save it - * unlocked as "pdp". We must get the target inode before unlocking - * the directory to insure that the inode will not be removed - * before we get it. We prevent deadlock by always fetching - * inodes from the root, moving down the directory tree. Thus - * when following backward pointers ".." we must unlock the - * parent directory before getting the requested directory. - * There is a potential race condition here if both the current - * and parent directories are removed before the `iget' for the - * inode associated with ".." returns. We hope that this occurs - * infrequently since we cannot avoid this race condition without - * implementing a sophisticated deadlock detection algorithm. - * Note also that this simple deadlock detection scheme will not - * work if the file system has any hard links other than ".." - * that point backwards in the directory structure. - */ - pdp = vdp; - /* - * If ino is different from dp->i_ino, - * it's a relocated directory. - */ - if (flags & ISDOTDOT) { - error = cd9660_vget_internal(vnode_mount(vdp), dp->i_ino, &tdp, NULL, NULL, - dp->i_ino != ino, ep, - vfs_context_proc(ctx)); - VTOI(tdp)->i_parent = VTOI(pdp)->i_number; - buf_brelse(bp); - - *vpp = tdp; - } else if (dp->i_number == dp->i_ino) { - buf_brelse(bp); - vnode_get(vdp); /* we want ourself, ie "." */ - *vpp = vdp; - } else { - error = cd9660_vget_internal(vnode_mount(vdp), dp->i_ino, &tdp, vdp, cnp, - dp->i_ino != ino, ep, vfs_context_proc(ctx)); - /* save parent inode number */ - VTOI(tdp)->i_parent = VTOI(pdp)->i_number; - buf_brelse(bp); - if (error) - return (error); - *vpp = tdp; - } - return (0); -} - - -/* - * Return buffer with the contents of block "offset" from the beginning of - * directory "ip". If "res" is non-zero, fill it in with a pointer to the - * remaining space in the directory. - */ -int -cd9660_blkatoff(vnode_t vp, off_t offset, char **res, buf_t *bpp) -{ - struct iso_node *ip; - register struct iso_mnt *imp; - buf_t bp; - daddr_t lbn; - int bsize, error; - - ip = VTOI(vp); - imp = ip->i_mnt; - lbn = lblkno(imp, offset); - bsize = blksize(imp, ip, lbn); - - if ((bsize != imp->im_sector_size) && - (offset & (imp->im_sector_size - 1)) == 0) { - bsize = imp->im_sector_size; - } - - if ( (error = (int)buf_bread(vp, (daddr64_t)((unsigned)lbn), bsize, NOCRED, &bp)) ) { - buf_brelse(bp); - *bpp = NULL; - return (error); - } - if (res) - *res = (char *)0 + buf_dataptr(bp) + blkoff(imp, offset); - *bpp = bp; - - return (0); -} diff --git a/bsd/isofs/cd9660/cd9660_mount.h b/bsd/isofs/cd9660/cd9660_mount.h deleted file mode 100644 index 90ef397f4..000000000 --- a/bsd/isofs/cd9660/cd9660_mount.h +++ /dev/null @@ -1,110 +0,0 @@ -/* - * Copyright (c) 2000-2006 Apple Computer, Inc. All rights reserved. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ - * - * This file contains Original Code and/or Modifications of Original Code - * as defined in and that are subject to the Apple Public Source License - * Version 2.0 (the 'License'). You may not use this file except in - * compliance with the License. The rights granted to you under the License - * may not be used to create, or enable the creation or redistribution of, - * unlawful or unlicensed copies of an Apple operating system, or to - * circumvent, violate, or enable the circumvention or violation of, any - * terms of an Apple operating system software license agreement. - * - * Please obtain a copy of the License at - * http://www.opensource.apple.com/apsl/ and read it before using this file. - * - * The Original Code and all software distributed under the License are - * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER - * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, - * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. - * Please see the License for the specific language governing rights and - * limitations under the License. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ - */ -/* - * Copyright (c) 1995 - * The Regents of the University of California. All rights reserved. - * - * This code is derived from software contributed to Berkeley - * by Pace Willisson (pace@blitz.com). The Rock Ridge Extension - * Support code is derived from software contributed to Berkeley - * by Atsushi Murai (amurai@spec.co.jp). - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. All advertising materials mentioning features or use of this software - * must display the following acknowledgement: - * This product includes software developed by the University of - * California, Berkeley and its contributors. - * 4. Neither the name of the University nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - * @(#)cd9660_mount.h 8.1 (Berkeley) 5/24/95 - */ - -#ifndef __ISOFS_CD9660_CD9660_MOUNT_H__ -#define __ISOFS_CD9660_CD9660_MOUNT_H__ - -#include -#include - -#ifdef __APPLE_API_UNSTABLE -/* - * Arguments to mount ISO 9660 filesystems. - */ -struct CDTOC; -struct iso_args { -#ifndef KERNEL - char *fspec; /* block special device to mount */ -#endif - int flags; /* mounting flags, see below */ - int ssector; /* starting sector, 0 for 1st session */ - int toc_length; /* Size of *toc, including the toc.length field */ - struct CDTOC *toc; -}; -#define ISOFSMNT_NORRIP 0x00000001 /* disable Rock Ridge Ext.*/ -#define ISOFSMNT_GENS 0x00000002 /* enable generation numbers */ -#define ISOFSMNT_EXTATT 0x00000004 /* enable extended attributes */ -#define ISOFSMNT_NOJOLIET 0x00000008 /* disable Joliet Ext.*/ -#define ISOFSMNT_TOC 0x00000010 /* iso_args.toc is valid */ - -#ifdef KERNEL -/* LP64 version of iso_args. all pointers - * grow when we're dealing with a 64-bit process. - * WARNING - keep in sync with iso_args - */ - -struct user_iso_args { - int flags; /* mounting flags, see below */ - int ssector; /* starting sector, 0 for 1st session */ - int toc_length; /* Size of *toc, including the toc.length field */ - user_addr_t toc __attribute((aligned(8))); -}; - -#endif /* KERNEL */ - -#endif /* __APPLE_API_UNSTABLE */ -#endif /* __ISOFS_CD9660_CD9660_MOUNT_H__ */ diff --git a/bsd/isofs/cd9660/cd9660_node.c b/bsd/isofs/cd9660/cd9660_node.c deleted file mode 100644 index 0b9ddcff8..000000000 --- a/bsd/isofs/cd9660/cd9660_node.c +++ /dev/null @@ -1,511 +0,0 @@ -/* - * Copyright (c) 2000-2003 Apple Computer, Inc. All rights reserved. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ - * - * This file contains Original Code and/or Modifications of Original Code - * as defined in and that are subject to the Apple Public Source License - * Version 2.0 (the 'License'). You may not use this file except in - * compliance with the License. The rights granted to you under the License - * may not be used to create, or enable the creation or redistribution of, - * unlawful or unlicensed copies of an Apple operating system, or to - * circumvent, violate, or enable the circumvention or violation of, any - * terms of an Apple operating system software license agreement. - * - * Please obtain a copy of the License at - * http://www.opensource.apple.com/apsl/ and read it before using this file. - * - * The Original Code and all software distributed under the License are - * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER - * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, - * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. - * Please see the License for the specific language governing rights and - * limitations under the License. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ - */ -/* $NetBSD: cd9660_node.c,v 1.13 1994/12/24 15:30:07 cgd Exp $ */ - -/*- - * Copyright (c) 1982, 1986, 1989, 1994 - * The Regents of the University of California. All rights reserved. - * - * This code is derived from software contributed to Berkeley - * by Pace Willisson (pace@blitz.com). The Rock Ridge Extension - * Support code is derived from software contributed to Berkeley - * by Atsushi Murai (amurai@spec.co.jp). - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. All advertising materials mentioning features or use of this software - * must display the following acknowledgement: - * This product includes software developed by the University of - * California, Berkeley and its contributors. - * 4. Neither the name of the University nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - * @(#)cd9660_node.c 8.5 (Berkeley) 12/5/94 - - - - * HISTORY - * 22-Jan-98 radar 1669467 - ISO 9660 CD support - jwc - * 17-Feb-98 radar 1669467 - changed lock protocols to use the lock manager - chw - - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include - -/* - * Structures associated with iso_node caching. - */ -struct iso_node **isohashtbl; -u_long isohash; -#define INOHASH(device, inum) (((device) + ((inum)>>12)) & isohash) - -#ifdef ISODEVMAP -struct iso_node **idvhashtbl; -u_long idvhash; -#define DNOHASH(device, inum) (((device) + ((inum)>>12)) & idvhash) -#endif - -/* defined in bsd/vfs/vfs_subr.c */ -extern int prtactive; /* 1 => print out reclaim of active vnodes */ - -extern u_char isonullname[]; -/* - * Initialize hash links for inodes and dnodes. - */ -int -cd9660_init(__unused struct vfsconf *cp) -{ - return 0; -} - -int -cd9660_hashinit(void) -{ - if (!isohashtbl) - isohashtbl = hashinit(desiredvnodes, M_ISOFSMNT, &isohash); -#ifdef ISODEVMAP - if (!idvhashtbl) - idvhashtbl = hashinit(desiredvnodes / 8, M_ISOFSMNT, &idvhash); -#endif - return 0; -} - -#ifdef ISODEVMAP -/* - * Enter a new node into the device hash list - */ -struct iso_dnode * -iso_dmap(dev_t device, ino_t inum, int create) -{ - register struct iso_dnode **dpp, *dp, *dq; - - dpp = &idvhashtbl[DNOHASH(device, inum)]; - for (dp = *dpp;; dp = dp->d_next) { - if (dp == NULL) - return (NULL); - if (inum == dp->i_number && device == dp->i_dev) - return (dp); - - if (!create) - return (NULL); - - MALLOC(dp, struct iso_dnode *, sizeof(struct iso_dnode), M_CACHE, - M_WAITOK); - dp->i_dev = dev; - dp->i_number = ino; - - if (dq = *dpp) - dq->d_prev = dp->d_next; - dp->d_next = dq; - dp->d_prev = dpp; - *dpp = dp; - - return (dp); -} - -void -iso_dunmap(dev_t device) -{ - struct iso_dnode **dpp, *dp, *dq; - - for (dpp = idvhashtbl; dpp <= idvhashtbl + idvhash; dpp++) { - for (dp = *dpp; dp != NULL; dp = dq) - dq = dp->d_next; - if (device == dp->i_dev) { - if (dq) - dq->d_prev = dp->d_prev; - *dp->d_prev = dq; - FREE(dp, M_CACHE); - } - } - } -} -#endif - -/* - * Use the device/inum pair to find the incore inode, and return a pointer - * to it. If it is in core, but locked, wait for it. - */ -struct vnode * -cd9660_ihashget(dev_t device, ino_t inum, __unused struct proc *p) -{ - register struct iso_node *ip; - struct vnode *vp; - uint32_t vid; - -retry: - for (ip = isohashtbl[INOHASH(device, inum)]; ip; ip = ip->i_next) { - if (inum == ip->i_number && device == ip->i_dev) { - - if (ISSET(ip->i_flag, ISO_INALLOC)) { - /* - * inode is being created... wait for it - * to be ready for consumption - */ - SET(ip->i_flag, ISO_INWALLOC); - tsleep((caddr_t)ip, PINOD, "cd9960_ihashget", 0); - goto retry; - } - vp = ITOV(ip); - /* - * the vid needs to be grabbed before we drop - * lock protecting the hash - */ - vid = vnode_vid(vp); - - /* - * we currently depend on running under the FS funnel - * when we do proper locking and advertise ourselves - * as thread safe, we'll need a lock to protect the - * hash lookup... this is where we would drop it - */ - if (vnode_getwithvid(vp, vid)) { - /* - * If vnode is being reclaimed, or has - * already changed identity, no need to wait - */ - return (NULL); - } - return (vp); - } - } - return (NULL); -} - -/* - * Insert the inode into the hash table, and return it locked. - */ -void -cd9660_ihashins(struct iso_node *ip) -{ - struct iso_node **ipp, *iq; - - /* lock the inode, then put it on the appropriate hash list */ - - ipp = &isohashtbl[INOHASH(ip->i_dev, ip->i_number)]; - if ((iq = *ipp)) - iq->i_prev = &ip->i_next; - ip->i_next = iq; - ip->i_prev = ipp; - *ipp = ip; -} - -/* - * Remove the inode from the hash table. - */ -void -cd9660_ihashrem(register struct iso_node *ip) -{ - register struct iso_node *iq; - - if ((iq = ip->i_next)) - iq->i_prev = ip->i_prev; - *ip->i_prev = iq; -#if 1 /* was ifdef DIAGNOSTIC */ - ip->i_next = NULL; - ip->i_prev = NULL; -#endif -} - -/* - * Last reference to an inode... if we're done with - * it, go ahead and recycle it for other use - */ -int -cd9660_inactive(struct vnop_inactive_args *ap) -{ - vnode_t vp = ap->a_vp; - struct iso_node *ip = VTOI(vp); - - /* - * If we are done with the inode, reclaim it - * so that it can be reused immediately. - */ - if (ip->inode.iso_mode == 0) - vnode_recycle(vp); - - return 0; -} - -/* - * Reclaim an inode so that it can be used for other purposes. - */ -int -cd9660_reclaim(struct vnop_reclaim_args *ap) -{ - vnode_t vp = ap->a_vp; - struct iso_node *ip = VTOI(vp); - - vnode_removefsref(vp); - /* - * Remove the inode from its hash chain. - */ - cd9660_ihashrem(ip); - - if (ip->i_devvp) { - vnode_t devvp = ip->i_devvp; - ip->i_devvp = NULL; - vnode_rele(devvp); - } - vnode_clearfsnode(vp); - - if (ip->i_namep != isonullname) - FREE(ip->i_namep, M_TEMP); - if (ip->i_riff != NULL) - FREE(ip->i_riff, M_TEMP); - FREE_ZONE(ip, sizeof(struct iso_node), M_ISOFSNODE); - - return (0); -} - -/* - * File attributes - */ -void -cd9660_defattr(struct iso_directory_record *isodir, struct iso_node *inop, - struct buf *bp) -{ - struct buf *bp2 = NULL; - struct iso_mnt *imp; - struct iso_extended_attributes *ap = NULL; - int off; - - if ( isonum_711(isodir->flags) & directoryBit ) { - inop->inode.iso_mode = S_IFDIR; - /* - * If we return 2, fts() will assume there are no subdirectories - * (just links for the path and .), so instead we return 1. - */ - inop->inode.iso_links = 1; - } else { - inop->inode.iso_mode = S_IFREG; - inop->inode.iso_links = 1; - } - if (!bp - && ((imp = inop->i_mnt)->im_flags & ISOFSMNT_EXTATT) - && (off = isonum_711(isodir->ext_attr_length))) { - cd9660_blkatoff(ITOV(inop), (off_t)-(off << imp->im_bshift), NULL, &bp2); - bp = bp2; - } - if (bp) { - ap = (struct iso_extended_attributes *)((char *)0 + buf_dataptr(bp)); - - if (isonum_711(ap->version) == 1) { - if (!(ap->perm[0]&0x40)) - inop->inode.iso_mode |= VEXEC >> 6; - if (!(ap->perm[0]&0x10)) - inop->inode.iso_mode |= VREAD >> 6; - if (!(ap->perm[0]&4)) - inop->inode.iso_mode |= VEXEC >> 3; - if (!(ap->perm[0]&1)) - inop->inode.iso_mode |= VREAD >> 3; - if (!(ap->perm[1]&0x40)) - inop->inode.iso_mode |= VEXEC; - if (!(ap->perm[1]&0x10)) - inop->inode.iso_mode |= VREAD; - inop->inode.iso_uid = isonum_723(ap->owner); /* what about 0? */ - inop->inode.iso_gid = isonum_723(ap->group); /* what about 0? */ - } else - ap = NULL; - } - if (!ap) { - inop->inode.iso_mode |= VREAD|VWRITE|VEXEC|(VREAD|VEXEC)>>3|(VREAD|VEXEC)>>6; - inop->inode.iso_uid = ISO_UNKNOWNUID; - inop->inode.iso_gid = ISO_UNKNOWNGID; - } - if (bp2) - buf_brelse(bp2); -} - -/* - * Time stamps - */ -void -cd9660_deftstamp(struct iso_directory_record *isodir, struct iso_node *inop, - struct buf *bp) -{ - struct buf *bp2 = NULL; - struct iso_mnt *imp; - struct iso_extended_attributes *ap = NULL; - int off; - - if (!bp - && ((imp = inop->i_mnt)->im_flags & ISOFSMNT_EXTATT) - && (off = isonum_711(isodir->ext_attr_length))) - { - cd9660_blkatoff(ITOV(inop), (off_t)-(off << imp->im_bshift), NULL, &bp2); - bp = bp2; - } - if (bp) { - ap = (struct iso_extended_attributes *)((char *)0 + buf_dataptr(bp)); - - if (isonum_711(ap->version) == 1) { - if (!cd9660_tstamp_conv17(ap->ftime,&inop->inode.iso_atime)) - cd9660_tstamp_conv17(ap->ctime,&inop->inode.iso_atime); - if (!cd9660_tstamp_conv17(ap->ctime,&inop->inode.iso_ctime)) - inop->inode.iso_ctime = inop->inode.iso_atime; - if (!cd9660_tstamp_conv17(ap->mtime,&inop->inode.iso_mtime)) - inop->inode.iso_mtime = inop->inode.iso_ctime; - } else - ap = NULL; - } - if (!ap) { - cd9660_tstamp_conv7(isodir->date,&inop->inode.iso_ctime); - inop->inode.iso_atime = inop->inode.iso_ctime; - inop->inode.iso_mtime = inop->inode.iso_ctime; - } - if (bp2) - buf_brelse(bp2); -} - -int -cd9660_tstamp_conv7(u_char *pi, struct timespec *pu) -{ - int crtime, days; - int y, m, d, hour, minute, second, mytz; - - y = pi[0] + 1900; - m = pi[1]; - d = pi[2]; - hour = pi[3]; - minute = pi[4]; - second = pi[5]; - mytz = pi[6]; - - if (y < 1970) { - pu->tv_sec = 0; - pu->tv_nsec = 0; - return 0; - } else { -#ifdef ORIGINAL - /* computes day number relative to Sept. 19th,1989 */ - /* don't even *THINK* about changing formula. It works! */ - days = 367*(y-1980)-7*(y+(m+9)/12)/4-3*((y+(m-9)/7)/100+1)/4+275*m/9+d-100; -#else - /* - * Changed :-) to make it relative to Jan. 1st, 1970 - * and to disambiguate negative division - */ - days = 367*(y-1960)-7*(y+(m+9)/12)/4-3*((y+(m+9)/12-1)/100+1)/4+275*m/9+d-239; -#endif - crtime = ((((days * 24) + hour) * 60 + minute) * 60) + second; - - /* timezone offset is unreliable on some disks */ - if (-48 <= mytz && mytz <= 52) - crtime -= mytz * 15 * 60; - } - pu->tv_sec = crtime; - pu->tv_nsec = 0; - return 1; -} - -static u_int -cd9660_chars2ui(u_char *begin, int len) -{ - u_int rc; - - for (rc = 0; --len >= 0;) { - rc *= 10; - rc += *begin++ - '0'; - } - return rc; -} - -int -cd9660_tstamp_conv17(u_char *pi, struct timespec *pu) -{ - u_char buf[7]; - - /* year:"0001"-"9999" -> -1900 */ - buf[0] = cd9660_chars2ui(pi,4) - 1900; - - /* month: " 1"-"12" -> 1 - 12 */ - buf[1] = cd9660_chars2ui(pi + 4,2); - - /* day: " 1"-"31" -> 1 - 31 */ - buf[2] = cd9660_chars2ui(pi + 6,2); - - /* hour: " 0"-"23" -> 0 - 23 */ - buf[3] = cd9660_chars2ui(pi + 8,2); - - /* minute:" 0"-"59" -> 0 - 59 */ - buf[4] = cd9660_chars2ui(pi + 10,2); - - /* second:" 0"-"59" -> 0 - 59 */ - buf[5] = cd9660_chars2ui(pi + 12,2); - - /* difference of GMT */ - buf[6] = pi[16]; - - return cd9660_tstamp_conv7(buf,pu); -} - -ino_t -isodirino(struct iso_directory_record *isodir, struct iso_mnt *imp) -{ - ino_t ino; - - ino = (isonum_733(isodir->extent) + isonum_711(isodir->ext_attr_length)) - << imp->im_bshift; - return (ino); -} diff --git a/bsd/isofs/cd9660/cd9660_node.h b/bsd/isofs/cd9660/cd9660_node.h deleted file mode 100644 index 80a78c2a1..000000000 --- a/bsd/isofs/cd9660/cd9660_node.h +++ /dev/null @@ -1,221 +0,0 @@ -/* - * Copyright (c) 2000-2003 Apple Computer, Inc. All rights reserved. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ - * - * This file contains Original Code and/or Modifications of Original Code - * as defined in and that are subject to the Apple Public Source License - * Version 2.0 (the 'License'). You may not use this file except in - * compliance with the License. The rights granted to you under the License - * may not be used to create, or enable the creation or redistribution of, - * unlawful or unlicensed copies of an Apple operating system, or to - * circumvent, violate, or enable the circumvention or violation of, any - * terms of an Apple operating system software license agreement. - * - * Please obtain a copy of the License at - * http://www.opensource.apple.com/apsl/ and read it before using this file. - * - * The Original Code and all software distributed under the License are - * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER - * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, - * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. - * Please see the License for the specific language governing rights and - * limitations under the License. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ - */ -/* $NetBSD: cd9660_node.h,v 1.10 1994/12/24 15:30:09 cgd Exp $ */ - -/*- - * Copyright (c) 1994 - * The Regents of the University of California. All rights reserved. - * - * This code is derived from software contributed to Berkeley - * by Pace Willisson (pace@blitz.com). The Rock Ridge Extension - * Support code is derived from software contributed to Berkeley - * by Atsushi Murai (amurai@spec.co.jp). - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. All advertising materials mentioning features or use of this software - * must display the following acknowledgement: - * This product includes software developed by the University of - * California, Berkeley and its contributors. - * 4. Neither the name of the University nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - * @(#)cd9660_node.h 8.4 (Berkeley) 12/5/94 - */ -#ifndef _CD9660_NODE_H_ -#define _CD9660_NODE_H_ - -#include - -#ifdef __APPLE_API_PRIVATE -/* - * Theoretically, directories can be more than 2Gb in length, - * however, in practice this seems unlikely. So, we define - * the type doff_t as a long to keep down the cost of doing - * lookup on a 32-bit machine. If you are porting to a 64-bit - * architecture, you should make doff_t the same as off_t. - */ - -#include -#include -#include - -#ifndef doff_t -#define doff_t long -#endif - -typedef struct { - struct timespec iso_atime; /* time of last access */ - struct timespec iso_mtime; /* time of last modification */ - struct timespec iso_ctime; /* time file changed */ - uid_t iso_uid; /* owner user id */ - gid_t iso_gid; /* owner group id */ - u_short iso_mode; /* files access mode and type */ - short iso_links; /* links of file */ - dev_t iso_rdev; /* Major/Minor number for special */ -} ISO_RRIP_INODE; - -#ifdef ISODEVMAP -/* - * FOr device# (major,minor) translation table - */ -struct iso_dnode { - struct iso_dnode *d_next, **d_prev; /* hash chain */ - dev_t i_dev; /* device where dnode resides */ - ino_t i_number; /* the identity of the inode */ - dev_t d_dev; /* device # for translation */ -}; -#endif - -/* defines i_size as a macro */ -#undef i_size - -struct iso_node { - struct iso_node *i_next, **i_prev; /* hash chain */ - struct vnode *i_vnode; /* vnode associated with this inode */ - struct vnode *i_devvp; /* vnode for block I/O */ - u_int32_t i_flag; /* flags, see below */ - dev_t i_dev; /* device where inode resides */ - ino_t i_number; /* the identity of the inode */ - /* we use the actual starting block of the file */ - struct iso_mnt *i_mnt; /* filesystem associated with this inode */ - struct lockf *i_lockf; /* head of byte-level lock list */ - doff_t i_endoff; /* end of useful stuff in directory */ - doff_t i_diroff; /* offset in dir, where we found last entry */ - doff_t i_offset; /* offset of free space in directory */ - ino_t i_ino; /* inode number of found directory */ - daddr_t i_lastr; /* last read (read ahead) */ - long iso_extent; /* extent of file */ - long i_size; - long iso_start; /* actual start of data of file (may be different */ - /* from iso_extent, if file has extended attributes) */ - ISO_RRIP_INODE inode; - - ino_t i_parent; /* inode number of parent directory */ - u_char *i_namep; /* node name buffer */ - - /* support Apple extensions to ISO directory rec */ - long i_rsrcsize; /* cached size of associated file */ - u_int32_t i_FileType; /* MacOS file type */ - u_int32_t i_Creator; /* MacOS file creator */ - u_int16_t i_FinderFlags; /* MacOS finder flags */ - - u_int16_t i_entries; /* count of directory entries */ - - struct riff_header *i_riff; -}; - -#define i_forw i_chain[0] -#define i_back i_chain[1] - -/* These flags are kept in i_flag. */ -#define ISO_ASSOCIATED 0x0001 /* node is an associated file. */ -#define ISO_INALLOC 0x0002 -#define ISO_INWALLOC 0x0004 - - -/* defines VTOI and ITOV macros */ -#undef VTOI -#undef ITOV - -#define VTOI(vp) ((struct iso_node *)(vnode_fsnode(vp))) -#define ITOV(ip) ((ip)->i_vnode) - -/* similar in as default UID and GID */ -#define ISO_UNKNOWNUID ((uid_t)99) -#define ISO_UNKNOWNGID ((gid_t)99) - -int cd9660_access_internal(vnode_t, mode_t, kauth_cred_t); - -/* - * Prototypes for ISOFS vnode operations - */ -int cd9660_blktooff(struct vnop_blktooff_args *); -int cd9660_offtoblk(struct vnop_offtoblk_args *); -int cd9660_blockmap(struct vnop_blockmap_args *); -int cd9660_lookup (struct vnop_lookup_args *); -int cd9660_open (struct vnop_open_args *); -int cd9660_close (struct vnop_close_args *); -int cd9660_access (struct vnop_access_args *); -int cd9660_getattr (struct vnop_getattr_args *); -int cd9660_read (struct vnop_read_args *); -int cd9660_xa_read (struct vnop_read_args *); -int cd9660_ioctl (struct vnop_ioctl_args *); -int cd9660_select (struct vnop_select_args *); -int cd9660_mmap (struct vnop_mmap_args *); -int cd9660_readdir (struct vnop_readdir_args *); -int cd9660_readlink (struct vnop_readlink_args *); -int cd9660_inactive (struct vnop_inactive_args *); -int cd9660_reclaim (struct vnop_reclaim_args *); -int cd9660_strategy (struct vnop_strategy_args *); -int cd9660_pathconf (struct vnop_pathconf_args *); -int cd9660_enotsupp(void); -int cd9660_pagein(struct vnop_pagein_args *ap); -int cd9660_remove(struct vnop_remove_args *ap); -int cd9660_rmdir(struct vnop_rmdir_args *ap); - -__private_extern__ void cd9660_xa_init(struct iso_node *ip, - struct iso_directory_record *isodir); -__private_extern__ int cd9660_blkatoff (vnode_t, off_t, char **, buf_t *); - -void cd9660_defattr (struct iso_directory_record *, - struct iso_node *, struct buf *); -void cd9660_deftstamp (struct iso_directory_record *, - struct iso_node *, struct buf *); -struct vnode *cd9660_ihashget (dev_t, ino_t, struct proc *); -void cd9660_ihashins (struct iso_node *); -void cd9660_ihashrem (struct iso_node *); -int cd9660_tstamp_conv7 (u_char *, struct timespec *); -int cd9660_tstamp_conv17 (u_char *, struct timespec *); -ino_t isodirino (struct iso_directory_record *, struct iso_mnt *); -#ifdef ISODEVMAP -struct iso_dnode *iso_dmap (dev_t, ino_t, int); -void iso_dunmap (dev_t); -#endif - -#endif /* __APPLE_API_PRIVATE */ -#endif /* ! _CD9660_NODE_H_ */ diff --git a/bsd/isofs/cd9660/cd9660_rrip.c b/bsd/isofs/cd9660/cd9660_rrip.c deleted file mode 100644 index 643c7ad54..000000000 --- a/bsd/isofs/cd9660/cd9660_rrip.c +++ /dev/null @@ -1,712 +0,0 @@ -/* - * Copyright (c) 2000-2006 Apple Computer, Inc. All rights reserved. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ - * - * This file contains Original Code and/or Modifications of Original Code - * as defined in and that are subject to the Apple Public Source License - * Version 2.0 (the 'License'). You may not use this file except in - * compliance with the License. The rights granted to you under the License - * may not be used to create, or enable the creation or redistribution of, - * unlawful or unlicensed copies of an Apple operating system, or to - * circumvent, violate, or enable the circumvention or violation of, any - * terms of an Apple operating system software license agreement. - * - * Please obtain a copy of the License at - * http://www.opensource.apple.com/apsl/ and read it before using this file. - * - * The Original Code and all software distributed under the License are - * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER - * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, - * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. - * Please see the License for the specific language governing rights and - * limitations under the License. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ - */ -/* $NetBSD: cd9660_rrip.c,v 1.11 1994/12/24 15:30:10 cgd Exp $ */ - -/*- - * Copyright (c) 1993, 1994 - * The Regents of the University of California. All rights reserved. - * - * This code is derived from software contributed to Berkeley - * by Pace Willisson (pace@blitz.com). The Rock Ridge Extension - * Support code is derived from software contributed to Berkeley - * by Atsushi Murai (amurai@spec.co.jp). - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. All advertising materials mentioning features or use of this software - * must display the following acknowledgement: - * This product includes software developed by the University of - * California, Berkeley and its contributors. - * 4. Neither the name of the University nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - * @(#)cd9660_rrip.c 8.6 (Berkeley) 12/5/94 - - - - * HISTORY - * 22-Jan-98 radar 1669467 - ISO 9660 CD support - jwc - - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include - -#include -#include -#include -#include - -/* - * POSIX file attribute - */ -static int -cd9660_rrip_attr(ISO_RRIP_ATTR *p, ISO_RRIP_ANALYZE *ana) -{ - ana->inop->inode.iso_mode = isonum_733(p->mode); - ana->inop->inode.iso_uid = isonum_733(p->uid); - ana->inop->inode.iso_gid = isonum_733(p->gid); - ana->inop->inode.iso_links = isonum_733(p->links); - ana->fields &= ~ISO_SUSP_ATTR; - return ISO_SUSP_ATTR; -} - -static void -cd9660_rrip_defattr(struct iso_directory_record *isodir, ISO_RRIP_ANALYZE *ana) -{ - /* But this is a required field! */ - printf("RRIP without PX field?\n"); - cd9660_defattr(isodir,ana->inop,NULL); -} - -/* - * Symbolic Links - */ -static int -cd9660_rrip_slink(ISO_RRIP_SLINK *p, ISO_RRIP_ANALYZE *ana) -{ - register ISO_RRIP_SLINK_COMPONENT *pcomp; - register ISO_RRIP_SLINK_COMPONENT *pcompe; - int len, wlen, cont; - char *outbuf; - const char *inbuf; - - pcomp = (ISO_RRIP_SLINK_COMPONENT *)p->component; - pcompe = (ISO_RRIP_SLINK_COMPONENT *)((char *)p + isonum_711(p->h.length)); - len = *ana->outlen; - outbuf = ana->outbuf; - cont = ana->cont; - - /* - * Gathering a Symbolic name from each component with path - */ - for (; - pcomp < pcompe; - pcomp = (ISO_RRIP_SLINK_COMPONENT *)((char *)pcomp + ISO_RRIP_SLSIZ - + isonum_711(pcomp->clen))) { - - if (!cont) { - if (len < ana->maxlen) { - len++; - *outbuf++ = '/'; - } - } - cont = 0; - - inbuf = ".."; - wlen = 0; - - switch (*pcomp->cflag) { - - case ISO_SUSP_CFLAG_CURRENT: - /* Inserting Current */ - wlen = 1; - break; - - case ISO_SUSP_CFLAG_PARENT: - /* Inserting Parent */ - wlen = 2; - break; - - case ISO_SUSP_CFLAG_ROOT: - /* Inserting slash for ROOT */ - /* start over from beginning(?) */ - outbuf -= len; - len = 0; - break; - - case ISO_SUSP_CFLAG_VOLROOT: - /* Inserting a mount point i.e. "/cdrom" */ - /* same as above */ - outbuf -= len; - len = 0; - inbuf = vfs_statfs(ana->imp->im_mountp)->f_mntonname; - wlen = strlen(inbuf); - break; - - case ISO_SUSP_CFLAG_HOST: - /* Inserting hostname i.e. "kurt.tools.de" */ - inbuf = hostname; - wlen = hostnamelen; - break; - - case ISO_SUSP_CFLAG_CONTINUE: - cont = 1; - /* fall thru */ - case 0: - /* Inserting component */ - wlen = isonum_711(pcomp->clen); - inbuf = pcomp->name; - break; - default: - printf("RRIP with incorrect flags?"); - wlen = ana->maxlen + 1; - break; - } - - if (len + wlen > ana->maxlen) { - /* indicate error to caller */ - ana->cont = 1; - ana->fields = 0; - ana->outbuf -= *ana->outlen; - *ana->outlen = 0; - return 0; - } - - bcopy(inbuf,outbuf,wlen); - outbuf += wlen; - len += wlen; - - } - ana->outbuf = outbuf; - *ana->outlen = len; - ana->cont = cont; - - if (!isonum_711(p->flags)) { - ana->fields &= ~ISO_SUSP_SLINK; - return ISO_SUSP_SLINK; - } - return 0; -} - -/* - * Alternate name - */ -static int -cd9660_rrip_altname(ISO_RRIP_ALTNAME *p, ISO_RRIP_ANALYZE *ana) -{ - char *inbuf; - int wlen; - int cont; - - inbuf = ".."; - wlen = 0; - cont = 0; - - switch (*p->flags) { - case ISO_SUSP_CFLAG_CURRENT: - /* Inserting Current */ - wlen = 1; - break; - - case ISO_SUSP_CFLAG_PARENT: - /* Inserting Parent */ - wlen = 2; - break; - - case ISO_SUSP_CFLAG_HOST: - /* Inserting hostname i.e. "kurt.tools.de" */ - inbuf = hostname; - wlen = hostnamelen; - break; - - case ISO_SUSP_CFLAG_CONTINUE: - cont = 1; - /* fall thru */ - case 0: - /* Inserting component */ - wlen = isonum_711(p->h.length) - 5; - inbuf = (char *)p + 5; - break; - - default: - printf("RRIP with incorrect NM flags?\n"); - wlen = ana->maxlen + 1; - break; - } - - if ((*ana->outlen += wlen) > ana->maxlen) { - /* treat as no name field */ - ana->fields &= ~ISO_SUSP_ALTNAME; - ana->outbuf -= *ana->outlen - wlen; - *ana->outlen = 0; - return 0; - } - - bcopy(inbuf,ana->outbuf,wlen); - ana->outbuf += wlen; - - if (!cont) { - ana->fields &= ~ISO_SUSP_ALTNAME; - return ISO_SUSP_ALTNAME; - } - return 0; -} - -static void -cd9660_rrip_defname(struct iso_directory_record *isodir, ISO_RRIP_ANALYZE *ana) -{ - strlcpy(ana->outbuf, "..", ana->outlen); - switch (*isodir->name) { - default: - isofntrans(isodir->name, isonum_711(isodir->name_len), - ana->outbuf, ana->outlen, 1, - isonum_711(isodir->flags) & associatedBit); - break; - case 0: - *ana->outlen = 1; - break; - case 1: - *ana->outlen = 2; - break; - } -} - -/* - * Parent or Child Link - */ -static int -cd9660_rrip_pclink(ISO_RRIP_CLINK *p, ISO_RRIP_ANALYZE *ana) -{ - *ana->inump = isonum_733(p->dir_loc) << ana->imp->im_bshift; - ana->fields &= ~(ISO_SUSP_CLINK|ISO_SUSP_PLINK); - return *p->h.type == 'C' ? ISO_SUSP_CLINK : ISO_SUSP_PLINK; -} - -/* - * Relocated directory - */ -/* ARGSUSED */ -static int -cd9660_rrip_reldir(__unused ISO_RRIP_RELDIR *p, ISO_RRIP_ANALYZE *ana) -{ - /* special hack to make caller aware of RE field */ - *ana->outlen = 0; - ana->fields = 0; - return ISO_SUSP_RELDIR|ISO_SUSP_ALTNAME|ISO_SUSP_CLINK|ISO_SUSP_PLINK; -} - -static int -cd9660_rrip_tstamp(ISO_RRIP_TSTAMP *p, ISO_RRIP_ANALYZE *ana) -{ - u_char *ptime; - - ptime = p->time; - - /* Check a format of time stamp (7bytes/17bytes) */ - if (!(*p->flags&ISO_SUSP_TSTAMP_FORM17)) { - if (*p->flags&ISO_SUSP_TSTAMP_CREAT) - ptime += 7; - - if (*p->flags&ISO_SUSP_TSTAMP_MODIFY) { - cd9660_tstamp_conv7(ptime,&ana->inop->inode.iso_mtime); - ptime += 7; - } else - bzero(&ana->inop->inode.iso_mtime, sizeof(struct timespec)); - - if (*p->flags&ISO_SUSP_TSTAMP_ACCESS) { - cd9660_tstamp_conv7(ptime,&ana->inop->inode.iso_atime); - ptime += 7; - } else - ana->inop->inode.iso_atime = ana->inop->inode.iso_mtime; - - if (*p->flags&ISO_SUSP_TSTAMP_ATTR) - cd9660_tstamp_conv7(ptime,&ana->inop->inode.iso_ctime); - else - ana->inop->inode.iso_ctime = ana->inop->inode.iso_mtime; - - } else { - if (*p->flags&ISO_SUSP_TSTAMP_CREAT) - ptime += 17; - - if (*p->flags&ISO_SUSP_TSTAMP_MODIFY) { - cd9660_tstamp_conv17(ptime,&ana->inop->inode.iso_mtime); - ptime += 17; - } else - bzero(&ana->inop->inode.iso_mtime, sizeof(struct timespec)); - - if (*p->flags&ISO_SUSP_TSTAMP_ACCESS) { - cd9660_tstamp_conv17(ptime,&ana->inop->inode.iso_atime); - ptime += 17; - } else - ana->inop->inode.iso_atime = ana->inop->inode.iso_mtime; - - if (*p->flags&ISO_SUSP_TSTAMP_ATTR) - cd9660_tstamp_conv17(ptime,&ana->inop->inode.iso_ctime); - else - ana->inop->inode.iso_ctime = ana->inop->inode.iso_mtime; - - } - ana->fields &= ~ISO_SUSP_TSTAMP; - return ISO_SUSP_TSTAMP; -} - -static void -cd9660_rrip_deftstamp(struct iso_directory_record *isodir, - ISO_RRIP_ANALYZE *ana) -{ - cd9660_deftstamp(isodir,ana->inop,NULL); -} - -/* - * POSIX device modes - */ -static int -cd9660_rrip_device(ISO_RRIP_DEVICE *p, ISO_RRIP_ANALYZE *ana) -{ - u_int high, low; - - high = isonum_733(p->dev_t_high); - low = isonum_733(p->dev_t_low); - - if (high == 0) - ana->inop->inode.iso_rdev = makedev(major(low), minor(low)); - else - ana->inop->inode.iso_rdev = makedev(high, minor(low)); - ana->fields &= ~ISO_SUSP_DEVICE; - return ISO_SUSP_DEVICE; -} - -/* - * Flag indicating - */ -static int -cd9660_rrip_idflag(ISO_RRIP_IDFLAG *p, ISO_RRIP_ANALYZE *ana) -{ - ana->fields &= isonum_711(p->flags)|~0xff; /* don't touch high bits */ - /* special handling of RE field */ - if (ana->fields&ISO_SUSP_RELDIR) - return cd9660_rrip_reldir(p,ana); - - return ISO_SUSP_IDFLAG; -} - -/* - * Continuation pointer - */ -static int -cd9660_rrip_cont(ISO_RRIP_CONT *p, ISO_RRIP_ANALYZE *ana) -{ - ana->iso_ce_blk = isonum_733(p->location); - ana->iso_ce_off = isonum_733(p->offset); - ana->iso_ce_len = isonum_733(p->length); - return ISO_SUSP_CONT; -} - -/* - * System Use end - */ -/* ARGSUSED */ -static int -cd9660_rrip_stop(__unused ISO_SUSP_HEADER *p, __unused ISO_RRIP_ANALYZE *ana) -{ - return ISO_SUSP_STOP; -} - -/* - * Extension reference - */ -static int -cd9660_rrip_extref(ISO_RRIP_EXTREF *p, ISO_RRIP_ANALYZE *ana) -{ - if (isonum_711(p->len_id) != 10 - || bcmp((char *)p + 8,"RRIP_1991A",10) - || isonum_711(p->version) != 1) - return 0; - ana->fields &= ~ISO_SUSP_EXTREF; - return ISO_SUSP_EXTREF; -} - -typedef int (*rrip_table_func)(ISO_SUSP_HEADER *phead, ISO_RRIP_ANALYZE *ana); -typedef int (*rrip_table_func2)(struct iso_directory_record *isodir, - ISO_RRIP_ANALYZE *ana); -typedef struct { - char type[2]; - rrip_table_func func; - rrip_table_func2 func2; - int result; -} RRIP_TABLE; - -static int -cd9660_rrip_loop(struct iso_directory_record *isodir, ISO_RRIP_ANALYZE *ana, - RRIP_TABLE *table) -{ - register RRIP_TABLE *ptable; - register ISO_SUSP_HEADER *phead; - register ISO_SUSP_HEADER *pend; - struct buf *bp = NULL; - char *pwhead; - int result; - - /* - * Note: If name length is odd, - * it will be padding 1 byte after the name - */ - pwhead = isodir->name + isonum_711(isodir->name_len); - if (!(isonum_711(isodir->name_len)&1)) - pwhead++; - - /* If it's not the '.' entry of the root dir obey SP field */ - if (*isodir->name != 0 - || isonum_733(isodir->extent) != ana->imp->root_extent) - pwhead += ana->imp->rr_skip; - else - pwhead += ana->imp->rr_skip0; - - phead = (ISO_SUSP_HEADER *)pwhead; - pend = (ISO_SUSP_HEADER *)((char *)isodir + isonum_711(isodir->length)); - - result = 0; - while (1) { - ana->iso_ce_len = 0; - /* - * Note: "pend" should be more than one SUSP header - */ - while (pend >= phead + 1) { - if (isonum_711(phead->version) == 1) { - for (ptable = table; ptable->func; ptable++) { - if (*phead->type == *ptable->type - && phead->type[1] == ptable->type[1]) { - result |= (ptable->func(phead,ana)); - break; - } - } - if (!ana->fields) - break; - } - if (result&ISO_SUSP_STOP) { - result &= ~ISO_SUSP_STOP; - break; - } - /* plausibility check */ - if (isonum_711(phead->length) < sizeof(*phead)) - break; - /* - * move to next SUSP - * Hopefully this works with newer versions, too - */ - phead = (ISO_SUSP_HEADER *)((char *)phead + isonum_711(phead->length)); - } - - if (ana->fields && ana->iso_ce_len) { - if (ana->iso_ce_blk >= ana->imp->volume_space_size - || ana->iso_ce_off + ana->iso_ce_len > ana->imp->logical_block_size - || buf_bread(ana->imp->im_devvp, -#if 1 // radar 1669467 - logical and physical blocksize are the same - (daddr64_t)((unsigned)ana->iso_ce_blk), -#else - (daddr64_t)((unsigned)(ana->iso_ce_blk << (ana->imp->im_bshift - DEV_BSHIFT))), -#endif // radar 1669467 - ana->imp->logical_block_size, NOCRED, &bp)) - /* what to do now? */ - break; - phead = (ISO_SUSP_HEADER *)((char *)buf_dataptr(bp) + ana->iso_ce_off); - pend = (ISO_SUSP_HEADER *) ((char *)phead + ana->iso_ce_len); - } else - break; - } - if (bp) - buf_brelse(bp); - /* - * If we don't find the Basic SUSP stuffs, just set default value - * (attribute/time stamp) - */ - for (ptable = table; ptable->func2; ptable++) - if (!(ptable->result&result)) - ptable->func2(isodir,ana); - - return result; -} - -/* - * Get Attributes. - */ -static RRIP_TABLE rrip_table_analyze[] = { - { "PX", (rrip_table_func)cd9660_rrip_attr, - (rrip_table_func2)cd9660_rrip_defattr, - ISO_SUSP_ATTR }, - { "TF", (rrip_table_func)cd9660_rrip_tstamp, - (rrip_table_func2)cd9660_rrip_deftstamp, - ISO_SUSP_TSTAMP }, - { "PN", (rrip_table_func)cd9660_rrip_device, - 0, ISO_SUSP_DEVICE }, - { "RR", (rrip_table_func)cd9660_rrip_idflag, - 0, ISO_SUSP_IDFLAG }, - { "CE", (rrip_table_func)cd9660_rrip_cont, - 0, ISO_SUSP_CONT }, - { "ST", cd9660_rrip_stop, 0, ISO_SUSP_STOP }, - { "", 0, 0, 0 } -}; - -int -cd9660_rrip_analyze(struct iso_directory_record *isodir, struct iso_node *inop, - struct iso_mnt *imp) -{ - ISO_RRIP_ANALYZE analyze; - - analyze.inop = inop; - analyze.imp = imp; - analyze.fields = ISO_SUSP_ATTR|ISO_SUSP_TSTAMP|ISO_SUSP_DEVICE; - - return cd9660_rrip_loop(isodir,&analyze,rrip_table_analyze); -} - -/* - * Get Alternate Name. - */ -static RRIP_TABLE rrip_table_getname[] = { - { "NM", (rrip_table_func)cd9660_rrip_altname, - (rrip_table_func2)cd9660_rrip_defname, - ISO_SUSP_ALTNAME }, - { "CL", (rrip_table_func)cd9660_rrip_pclink, - 0, ISO_SUSP_CLINK|ISO_SUSP_PLINK }, - { "PL", (rrip_table_func)cd9660_rrip_pclink, - 0, ISO_SUSP_CLINK|ISO_SUSP_PLINK }, - { "RE", (rrip_table_func)cd9660_rrip_reldir, - 0, ISO_SUSP_RELDIR }, - { "RR", (rrip_table_func)cd9660_rrip_idflag, - 0, ISO_SUSP_IDFLAG }, - { "CE", (rrip_table_func)cd9660_rrip_cont, - 0, ISO_SUSP_CONT }, - { "ST", cd9660_rrip_stop, 0, ISO_SUSP_STOP }, - { "", 0, 0, 0 } -}; - -int -cd9660_rrip_getname(struct iso_directory_record *isodir, char *outbuf, - u_short *outlen, ino_t *inump, struct iso_mnt *imp) -{ - ISO_RRIP_ANALYZE analyze; - RRIP_TABLE *tab; - - analyze.outbuf = outbuf; - analyze.outlen = outlen; - analyze.maxlen = ISO_RRIP_NAMEMAX; - analyze.inump = inump; - analyze.imp = imp; - analyze.fields = ISO_SUSP_ALTNAME|ISO_SUSP_RELDIR|ISO_SUSP_CLINK|ISO_SUSP_PLINK; - *outlen = 0; - - tab = rrip_table_getname; - if (*isodir->name == 0 - || *isodir->name == 1) { - cd9660_rrip_defname(isodir,&analyze); - - analyze.fields &= ~ISO_SUSP_ALTNAME; - tab++; - } - - return cd9660_rrip_loop(isodir,&analyze,tab); -} - -/* - * Get Symbolic Link. - */ -static RRIP_TABLE rrip_table_getsymname[] = { - { "SL", (rrip_table_func)cd9660_rrip_slink, - 0, ISO_SUSP_SLINK }, - { "RR", (rrip_table_func)cd9660_rrip_idflag, - 0, ISO_SUSP_IDFLAG }, - { "CE", (rrip_table_func)cd9660_rrip_cont, - 0, ISO_SUSP_CONT }, - { "ST", cd9660_rrip_stop, 0, ISO_SUSP_STOP }, - { "", 0, 0, 0 } -}; - -int -cd9660_rrip_getsymname(struct iso_directory_record *isodir, char *outbuf, - u_short *outlen, struct iso_mnt *imp) -{ - ISO_RRIP_ANALYZE analyze; - - analyze.outbuf = outbuf; - analyze.outlen = outlen; - *outlen = 0; - analyze.maxlen = MAXPATHLEN; - analyze.cont = 1; /* don't start with a slash */ - analyze.imp = imp; - analyze.fields = ISO_SUSP_SLINK; - - return (cd9660_rrip_loop(isodir,&analyze,rrip_table_getsymname)&ISO_SUSP_SLINK); -} - -static RRIP_TABLE rrip_table_extref[] = { - { "ER", (rrip_table_func)cd9660_rrip_extref, - 0, ISO_SUSP_EXTREF }, - { "CE", (rrip_table_func)cd9660_rrip_cont, - 0, ISO_SUSP_CONT }, - { "ST", cd9660_rrip_stop, 0, ISO_SUSP_STOP }, - { "", 0, 0, 0 } -}; - -/* - * Check for Rock Ridge Extension and return offset of its fields. - * Note: We insist on the ER field. - */ -int -cd9660_rrip_offset(struct iso_directory_record *isodir, struct iso_mnt *imp) -{ - ISO_RRIP_OFFSET *p; - ISO_RRIP_ANALYZE analyze; - - imp->rr_skip0 = 0; - p = (ISO_RRIP_OFFSET *)(isodir->name + 1); - if (bcmp(p,"SP\7\1\276\357",6)) { - /* Maybe, it's a CDROM XA disc? */ - imp->rr_skip0 = 15; - p = (ISO_RRIP_OFFSET *)((char *)p + 15); - if (bcmp(p,"SP\7\1\276\357",6)) - return -1; - } - - analyze.imp = imp; - analyze.fields = ISO_SUSP_EXTREF; - if (!(cd9660_rrip_loop(isodir,&analyze,rrip_table_extref)&ISO_SUSP_EXTREF)) - return -1; - - return isonum_711(p->skip); -} diff --git a/bsd/isofs/cd9660/cd9660_rrip.h b/bsd/isofs/cd9660/cd9660_rrip.h deleted file mode 100644 index a583811bb..000000000 --- a/bsd/isofs/cd9660/cd9660_rrip.h +++ /dev/null @@ -1,177 +0,0 @@ -/* - * Copyright (c) 2000-2002 Apple Computer, Inc. All rights reserved. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ - * - * This file contains Original Code and/or Modifications of Original Code - * as defined in and that are subject to the Apple Public Source License - * Version 2.0 (the 'License'). You may not use this file except in - * compliance with the License. The rights granted to you under the License - * may not be used to create, or enable the creation or redistribution of, - * unlawful or unlicensed copies of an Apple operating system, or to - * circumvent, violate, or enable the circumvention or violation of, any - * terms of an Apple operating system software license agreement. - * - * Please obtain a copy of the License at - * http://www.opensource.apple.com/apsl/ and read it before using this file. - * - * The Original Code and all software distributed under the License are - * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER - * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, - * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. - * Please see the License for the specific language governing rights and - * limitations under the License. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ - */ -/* $NetBSD: cd9660_rrip.h,v 1.6 1994/12/13 22:33:24 mycroft Exp $ */ - -/*- - * Copyright (c) 1993, 1994 - * The Regents of the University of California. All rights reserved. - * - * This code is derived from software contributed to Berkeley - * by Pace Willisson (pace@blitz.com). The Rock Ridge Extension - * Support code is derived from software contributed to Berkeley - * by Atsushi Murai (amurai@spec.co.jp). - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. All advertising materials mentioning features or use of this software - * must display the following acknowledgement: - * This product includes software developed by the University of - * California, Berkeley and its contributors. - * 4. Neither the name of the University nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - * @(#)cd9660_rrip.h 8.2 (Berkeley) 12/5/94 - */ -#ifndef __ISOFS_CD9660_CD9660_RRIP_H__ -#define __ISOFS_CD9660_CD9660_RRIP_H__ - -#include - -#ifdef __APPLE_API_PRIVATE -typedef struct { - char type [ISODCL ( 0, 1)]; - u_char length [ISODCL ( 2, 2)]; /* 711 */ - u_char version [ISODCL ( 3, 3)]; -} ISO_SUSP_HEADER; - -typedef struct { - ISO_SUSP_HEADER h; - char mode [ISODCL ( 4, 11)]; /* 733 */ - char links [ISODCL ( 12, 19)]; /* 733 */ - char uid [ISODCL ( 20, 27)]; /* 733 */ - char gid [ISODCL ( 28, 35)]; /* 733 */ -} ISO_RRIP_ATTR; - -typedef struct { - ISO_SUSP_HEADER h; - char dev_t_high [ISODCL ( 4, 11)]; /* 733 */ - char dev_t_low [ISODCL ( 12, 19)]; /* 733 */ -} ISO_RRIP_DEVICE; - -#define ISO_SUSP_CFLAG_CONTINUE 0x01 -#define ISO_SUSP_CFLAG_CURRENT 0x02 -#define ISO_SUSP_CFLAG_PARENT 0x04 -#define ISO_SUSP_CFLAG_ROOT 0x08 -#define ISO_SUSP_CFLAG_VOLROOT 0x10 -#define ISO_SUSP_CFLAG_HOST 0x20 - -typedef struct { - u_char cflag [ISODCL ( 1, 1)]; - u_char clen [ISODCL ( 2, 2)]; - u_char name [1]; /* XXX */ -} ISO_RRIP_SLINK_COMPONENT; -#define ISO_RRIP_SLSIZ 2 - -typedef struct { - ISO_SUSP_HEADER h; - u_char flags [ISODCL ( 4, 4)]; - u_char component [ISODCL ( 5, 5)]; -} ISO_RRIP_SLINK; - -typedef struct { - ISO_SUSP_HEADER h; - char flags [ISODCL ( 4, 4)]; -} ISO_RRIP_ALTNAME; - -typedef struct { - ISO_SUSP_HEADER h; - char dir_loc [ISODCL ( 4, 11)]; /* 733 */ -} ISO_RRIP_CLINK; - -typedef struct { - ISO_SUSP_HEADER h; - char dir_loc [ISODCL ( 4, 11)]; /* 733 */ -} ISO_RRIP_PLINK; - -typedef struct { - ISO_SUSP_HEADER h; -} ISO_RRIP_RELDIR; - -#define ISO_SUSP_TSTAMP_FORM17 0x80 -#define ISO_SUSP_TSTAMP_FORM7 0x00 -#define ISO_SUSP_TSTAMP_CREAT 0x01 -#define ISO_SUSP_TSTAMP_MODIFY 0x02 -#define ISO_SUSP_TSTAMP_ACCESS 0x04 -#define ISO_SUSP_TSTAMP_ATTR 0x08 -#define ISO_SUSP_TSTAMP_BACKUP 0x10 -#define ISO_SUSP_TSTAMP_EXPIRE 0x20 -#define ISO_SUSP_TSTAMP_EFFECT 0x40 - -typedef struct { - ISO_SUSP_HEADER h; - u_char flags [ISODCL ( 4, 4)]; - u_char time [ISODCL ( 5, 5)]; -} ISO_RRIP_TSTAMP; - -typedef struct { - ISO_SUSP_HEADER h; - u_char flags [ISODCL ( 4, 4)]; -} ISO_RRIP_IDFLAG; - -typedef struct { - ISO_SUSP_HEADER h; - char len_id [ISODCL ( 4, 4)]; - char len_des [ISODCL ( 5, 5)]; - char len_src [ISODCL ( 6, 6)]; - char version [ISODCL ( 7, 7)]; -} ISO_RRIP_EXTREF; - -typedef struct { - ISO_SUSP_HEADER h; - char check [ISODCL ( 4, 5)]; - char skip [ISODCL ( 6, 6)]; -} ISO_RRIP_OFFSET; - -typedef struct { - ISO_SUSP_HEADER h; - char location [ISODCL ( 4, 11)]; - char offset [ISODCL ( 12, 19)]; - char length [ISODCL ( 20, 27)]; -} ISO_RRIP_CONT; - -#endif /* __APPLE_API_PRIVATE */ -#endif /* __ISOFS_CD9660_CD9660_RRIP_H__ */ diff --git a/bsd/isofs/cd9660/cd9660_util.c b/bsd/isofs/cd9660/cd9660_util.c deleted file mode 100644 index b3ae1af55..000000000 --- a/bsd/isofs/cd9660/cd9660_util.c +++ /dev/null @@ -1,433 +0,0 @@ -/* - * Copyright (c) 2000-2005 Apple Computer, Inc. All rights reserved. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ - * - * This file contains Original Code and/or Modifications of Original Code - * as defined in and that are subject to the Apple Public Source License - * Version 2.0 (the 'License'). You may not use this file except in - * compliance with the License. The rights granted to you under the License - * may not be used to create, or enable the creation or redistribution of, - * unlawful or unlicensed copies of an Apple operating system, or to - * circumvent, violate, or enable the circumvention or violation of, any - * terms of an Apple operating system software license agreement. - * - * Please obtain a copy of the License at - * http://www.opensource.apple.com/apsl/ and read it before using this file. - * - * The Original Code and all software distributed under the License are - * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER - * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, - * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. - * Please see the License for the specific language governing rights and - * limitations under the License. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ - */ -/* $NetBSD: cd9660_util.c,v 1.8 1994/12/13 22:33:25 mycroft Exp $ */ - -/*- - * Copyright (c) 1994 - * The Regents of the University of California. All rights reserved. - * - * This code is derived from software contributed to Berkeley - * by Pace Willisson (pace@blitz.com). The Rock Ridge Extension - * Support code is derived from software contributed to Berkeley - * by Atsushi Murai (amurai@spec.co.jp). - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. All advertising materials mentioning features or use of this software - * must display the following acknowledgement: - * This product includes software developed by the University of - * California, Berkeley and its contributors. - * 4. Neither the name of the University nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - * @(#)cd9660_util.c 8.3 (Berkeley) 12/5/94 - * - * HISTORY - * 7-Dec-98 Add ATTR_VOL_MOUNTFLAGS attribute support - djb - * 18-Nov-98 Add support for volfs - djb - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include /* XXX */ -#include /* XXX */ -#include -#include -#include -#include - -#include -#include -#include - -#include - -/* - * translate and compare a filename - * Note: Version number plus ';' may be omitted. - */ -int -isofncmp(u_char *fn, int fnlen, u_char *isofn, int isolen) -{ - int i, j; - char c; - - while (--fnlen >= 0) { - if (--isolen < 0) - return *fn; - if ((c = *isofn++) == ';') { - switch (*fn++) { - default: - return *--fn; - case 0: - return 0; - case ';': - break; - } - for (i = 0; --fnlen >= 0; i = i * 10 + *fn++ - '0') { - if (*fn < '0' || *fn > '9') { - return -1; - } - } - for (j = 0; --isolen >= 0; j = j * 10 + *isofn++ - '0'); - return i - j; - } - /* if raw comparison fails, check if char was mapped */ - if (c != *fn) { - if (c >= 'A' && c <= 'Z') { - if (c + ('a' - 'A') != *fn) { - if (*fn >= 'a' && *fn <= 'z') - return *fn - ('a' - 'A') - c; - else - return *fn - c; - } - } else if (c == '/') { - if (*fn != ':') - return *fn - c; - } else if (c > 0 || *fn != '_') - return *fn - c; - } - fn++; - } - if (isolen > 0) { - switch (*isofn) { - default: - return -1; - case '.': - if (isofn[1] != ';') - return -1; - case ';': - return 0; - } - } - return 0; -} - - -/* - * translate and compare a UCS-2 filename - * Note: Version number plus ';' may be omitted. - * - * The name pointed to by "fn" is the search name, whose characters are - * in native endian order. The name "ucsfn" is the on-disk name, whose - * characters are in big endian order. - */ - -int -ucsfncmp(u_int16_t *fn, int fnlen, u_int16_t *ucsfn, int ucslen) -{ - int i, j; - u_int16_t c; - - /* convert byte count to char count */ - ucslen /= 2; - fnlen /= 2; - - while (--fnlen >= 0) { - if (--ucslen < 0) - return *fn; - if ((c = OSSwapBigToHostInt16(*ucsfn++)) == UCS_SEPARATOR2) { - switch (*fn++) { - default: - return *--fn; - case 0: - return 0; - case UCS_SEPARATOR2: - break; - } - for (i = 0; --fnlen >= 0; i = i * 10 + *fn++ - '0') { - if (*fn < '0' || *fn > '9') { - return -1; - } - } - for (j = 0; --ucslen >= 0; j = j * 10 + OSSwapBigToHostInt16(*ucsfn++) - '0'); - return i - j; - } - if (c != *fn) - return *fn - c; - fn++; - } - if (ucslen > 0) { - switch (*ucsfn) { - default: - return -1; - case OSSwapHostToBigConstInt16(UCS_SEPARATOR1): - if (ucsfn[1] != OSSwapHostToBigConstInt16(UCS_SEPARATOR2)) - return -1; - case OSSwapHostToBigConstInt16(UCS_SEPARATOR2): - return 0; - } - } - return 0; -} - - -/* - * translate a filename - */ -void -isofntrans(u_char *infn, int infnlen, u_char *outfn, u_short *outfnlen, - int original, int assoc) -{ - int fnidx = 0; - - /* - * Add a "._" prefix for associated files - */ - if (assoc) { - *outfn++ = ASSOCCHAR1; - *outfn++ = ASSOCCHAR2; - fnidx += 2; - infnlen +=2; - } - for (; fnidx < infnlen; fnidx++) { - char c = *infn++; - - /* - * Some ISO 9600 CD names contain 8-bit chars. - * These chars are mapped to '_' because there - * is no context for mapping them to UTF-8. - * In addition '/' is mapped to ':'. - * - * isofncmp accounts for these mappings. - */ - if (!original) { - if (c < 0) - c = '_'; - else if (c == '/') - c = ':'; - else if (c == '.' && *infn == ';') - break; - else if (c == ';') - break; - } - *outfn++ = c; - } - *outfnlen = fnidx; -} - - - -/* - * translate a UCS-2 filename to UTF-8 - */ -void -ucsfntrans(u_int16_t *infn, int infnlen, u_char *outfn, u_short *outfnlen, - int dir, int assoc) -{ - if (infnlen == 1) { - strcpy(outfn, ".."); - - if (*(u_char*)infn == 0) - *outfnlen = 1; - else if (*(u_char*)infn == 1) - *outfnlen = 2; - } else { - int fnidx; - size_t outbytes; - int flags; - - fnidx = infnlen/2; - flags = 0; - - /* - * Add a "._" prefix for associated files - */ - if (assoc) { - *outfn++ = ASSOCCHAR1; - *outfn++ = ASSOCCHAR2; - } - if (!dir) { - /* strip file version number */ - for (fnidx--; fnidx > 0; fnidx--) { - /* stop when ';' is found */ - if (infn[fnidx] == OSSwapHostToBigConstInt16(UCS_SEPARATOR2)) { - /* drop dangling dot */ - if (fnidx > 0 && infn[fnidx-1] == OSSwapHostToBigConstInt16(UCS_SEPARATOR1)) - fnidx--; - break; - } - } - if (fnidx <= 0) - fnidx = infnlen/2; - } - - flags = UTF_NO_NULL_TERM | UTF_DECOMPOSED | UTF_BIG_ENDIAN; - - (void) utf8_encodestr(infn, fnidx * 2, outfn, &outbytes, ISO_JOLIET_NAMEMAX, 0, flags); - *outfnlen = assoc ? outbytes + 2 : outbytes; - } -} - - -/* - * count the number of children by enumerating the directory - */ -static int -isochildcount(struct vnode *vdp, int *dircnt, int *filcnt) -{ - struct iso_node *dp; - struct buf *bp = NULL; - struct iso_mnt *imp; - struct iso_directory_record *ep; - uint32_t bmask; - int error = 0; - int reclen; - int dirs, files; - int blkoffset; - int logblksize; - int32_t diroffset; - - dp = VTOI(vdp); - imp = dp->i_mnt; - bmask = imp->im_sector_size - 1; - logblksize = imp->im_sector_size; - blkoffset = diroffset = 0; - dirs = files = 0; - - while (diroffset < dp->i_size) { - /* - * If offset is on a block boundary, read the next - * directory block. Release previous if it exists. - */ - if ((diroffset & bmask) == 0) { - if (bp != NULL) - buf_brelse(bp); - if ( (error = cd9660_blkatoff(vdp, SECTOFF(imp, diroffset), NULL, &bp)) ) - break; - blkoffset = 0; - } - - ep = (struct iso_directory_record *) - (buf_dataptr(bp) + blkoffset); - - reclen = isonum_711(ep->length); - if (reclen == 0) { - /* skip to next block, if any */ - diroffset = - (diroffset & ~bmask) + logblksize; - continue; - } - - if ((reclen < ISO_DIRECTORY_RECORD_SIZE) || - (blkoffset + reclen > logblksize) || - (reclen < ISO_DIRECTORY_RECORD_SIZE + isonum_711(ep->name_len))){ - /* illegal, so give up */ - break; - } - - /* - * Some poorly mastered discs have an incorrect directory - * file size. If the '.' entry has a better size (bigger) - * then use that instead. - */ - if ((diroffset == 0) && (isonum_733(ep->size) > dp->i_size)) { - dp->i_size = isonum_733(ep->size); - } - - if ( isonum_711(ep->flags) & directoryBit ) - dirs++; - else if ((isonum_711(ep->flags) & associatedBit) == 0) - files++; - - diroffset += reclen; - blkoffset += reclen; - } - - if (bp) - buf_brelse (bp); - - *dircnt = dirs; - *filcnt = files; - - return (error); -} - - -static uint32_t -DerivePermissionSummary(uid_t owner, gid_t group, mode_t obj_mode, __unused struct iso_mnt *imp) -{ - kauth_cred_t cred = kauth_cred_get(); - uint32_t permissions; - int is_member; - - /* User id 0 (root) always gets access. */ - if (!suser(cred, NULL)) { - permissions = R_OK | X_OK; - goto Exit; - }; - - /* Otherwise, check the owner. */ - if (owner == kauth_cred_getuid(cred)) { - permissions = ((uint32_t)obj_mode & S_IRWXU) >> 6; - goto Exit; - } - - /* Otherwise, check the groups. */ - if (kauth_cred_ismember_gid(cred, group, &is_member) == 0 && is_member) { - permissions = ((uint32_t)obj_mode & S_IRWXG) >> 3; - goto Exit; - } - - /* Otherwise, settle for 'others' access. */ - permissions = (uint32_t)obj_mode & S_IRWXO; - -Exit: - return permissions & ~W_OK; /* Write access is always impossible */ -} diff --git a/bsd/isofs/cd9660/cd9660_vfsops.c b/bsd/isofs/cd9660/cd9660_vfsops.c deleted file mode 100644 index acdb699a2..000000000 --- a/bsd/isofs/cd9660/cd9660_vfsops.c +++ /dev/null @@ -1,1628 +0,0 @@ -/* - * Copyright (c) 2000-2006 Apple Computer, Inc. All rights reserved. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ - * - * This file contains Original Code and/or Modifications of Original Code - * as defined in and that are subject to the Apple Public Source License - * Version 2.0 (the 'License'). You may not use this file except in - * compliance with the License. The rights granted to you under the License - * may not be used to create, or enable the creation or redistribution of, - * unlawful or unlicensed copies of an Apple operating system, or to - * circumvent, violate, or enable the circumvention or violation of, any - * terms of an Apple operating system software license agreement. - * - * Please obtain a copy of the License at - * http://www.opensource.apple.com/apsl/ and read it before using this file. - * - * The Original Code and all software distributed under the License are - * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER - * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, - * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. - * Please see the License for the specific language governing rights and - * limitations under the License. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ - */ -/* $NetBSD: cd9660_vfsops.c,v 1.18 1995/03/09 12:05:36 mycroft Exp $ */ - -/*- - * Copyright (c) 1994 - * The Regents of the University of California. All rights reserved. - * - * This code is derived from software contributed to Berkeley - * by Pace Willisson (pace@blitz.com). The Rock Ridge Extension - * Support code is derived from software contributed to Berkeley - * by Atsushi Murai (amurai@spec.co.jp). - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. All advertising materials mentioning features or use of this software - * must display the following acknowledgement: - * This product includes software developed by the University of - * California, Berkeley and its contributors. - * 4. Neither the name of the University nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - * @(#)cd9660_vfsops.c 8.9 (Berkeley) 12/5/94 - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include - -/* - * Minutes, Seconds, Frames (M:S:F) - */ -struct CDMSF { - u_char minute; - u_char second; - u_char frame; -}; - -/* - * Table Of Contents - */ -struct CDTOC_Desc { - u_char session; - u_char ctrl_adr; /* typed to be machine and compiler independent */ - u_char tno; - u_char point; - struct CDMSF address; - u_char zero; - struct CDMSF p; -}; - -struct CDTOC { - u_short length; /* in native cpu endian */ - u_char first_session; - u_char last_session; - struct CDTOC_Desc trackdesc[1]; -}; - -#define MSF_TO_LBA(msf) \ - (((((msf).minute * 60UL) + (msf).second) * 75UL) + (msf).frame - 150) - -u_char isonullname[] = "\0"; - -struct vfsops cd9660_vfsops = { - cd9660_mount, - cd9660_start, - cd9660_unmount, - cd9660_root, - NULL, /* quotactl */ - cd9660_vfs_getattr, - cd9660_sync, - cd9660_vget, - cd9660_fhtovp, - cd9660_vptofh, - cd9660_init, - cd9660_sysctl, - NULL, - {NULL} -}; - -/* - * Called by vfs_mountroot when iso is going to be mounted as root. - * - * Name is updated by mount(8) after booting. - */ -#define ROOTNAME "root_device" - -static int iso_mountfs(struct vnode *devvp, struct mount *mp, struct user_iso_args *argp, - vfs_context_t context); - -static void DRGetTypeCreatorAndFlags( - struct iso_mnt * theMountPointPtr, - struct iso_directory_record * theDirRecPtr, - u_int32_t * theTypePtr, - u_int32_t * theCreatorPtr, - u_int16_t * theFlagsPtr); - -int -cd9660_mountroot(mount_t mp, vnode_t rvp, vfs_context_t context) -{ - int error; - struct user_iso_args args; - - args.flags = ISOFSMNT_ROOT; - args.ssector = 0; - args.toc_length = 0; - args.toc = USER_ADDR_NULL; - - if ((error = iso_mountfs(rvp, mp, &args, context))) - return (error); - - (void)cd9660_statfs(mp, vfs_statfs(mp), context); - - return (0); -} - -/* - * VFS Operations. - * - * mount system call - */ -int -cd9660_mount(mount_t mp, vnode_t devvp, user_addr_t data, vfs_context_t context) -{ - struct user_iso_args args; - int error; - struct iso_mnt *imp = NULL; - - if (vfs_context_is64bit(context)) { - error = copyin(data, (caddr_t)&args, sizeof(args)); - } - else { - struct iso_args temp; - error = copyin(data, (caddr_t)&temp, sizeof(temp)); - args.flags = temp.flags; - args.ssector = temp.ssector; - args.toc_length = temp.toc_length; - args.toc = CAST_USER_ADDR_T(temp.toc); - } - if (error) - return (error); - - if (vfs_isrdwr(mp)) - return (EROFS); - - /* - * If updating, check whether changing from read-only to - * read/write; if there is no device name, that's all we do. - */ - if (vfs_isupdate(mp)) { - imp = VFSTOISOFS(mp); - if (devvp == 0) - return (0); - } - if ( !vfs_isupdate(mp)) - error = iso_mountfs(devvp, mp, &args, context); - else { - if (devvp != imp->im_devvp) - error = EINVAL; /* needs translation */ - } - if (error) { - return (error); - } - - /* Indicate that we don't support volfs */ - vfs_clearflags(mp, MNT_DOVOLFS); - - return (0); -} - -/* - * Find the BSD device for the physical disk corresponding to the - * mount point's device. We use this physical device to read whole - * (2352 byte) sectors from the CD to get the content for the video - * files (tracks). - * - * The "path" argument is the path to the block device that the volume - * is being mounted on (args.fspec). It should be of the form: - * /dev/disk1s0 - * where the last "s0" part is stripped off to determine the physical - * device's path. It is assumed to be in user memory. - */ -static struct vnode * -cd9660_phys_device(mount_t mp, vfs_context_t context) -{ - int err; - char whole_path[64]; // path to "whole" device - char *s, *saved; - struct nameidata nd; - struct vnode *result; - struct vfsstatfs * sfs; - - sfs = vfs_statfs(mp); - result = NULL; - - if (strlen(sfs->f_mntfromname) >= sizeof(whole_path)) - return (NULL); - - /* Make a copy of the mount from name, then remove trailing "s...". */ - strlcpy(whole_path, sfs->f_mntfromname, sizeof(whole_path)); - - /* - * I would use strrchr or rindex here, but those are declared __private_extern__, - * and can't be used across component boundaries at this time. - */ - for (s=whole_path, saved=NULL; *s; ++s) - if (*s == 's') - saved = s; - *saved = '\0'; - - /* Lookup the "whole" device. */ - NDINIT(&nd, LOOKUP, FOLLOW, UIO_SYSSPACE, CAST_USER_ADDR_T(whole_path), context); - err = namei(&nd); - if (err) { - printf("isofs: Cannot find physical device: %s\n", whole_path); - goto done; - } - nameidone(&nd); - - /* Open the "whole" device. */ - err = VNOP_OPEN(nd.ni_vp, FREAD, context); - if (err) { - vnode_put(nd.ni_vp); - printf("isofs: Cannot open physical device: %s\n", whole_path); - goto done; - } - result = nd.ni_vp; -done: - return result; -} - - -/* - * See if the given CD-ROM XA disc appears to be a Video CD - * (version < 2.0; so, not SVCD). If so, fill in the extent - * information for the MPEGAV directory, set the VCD flag, - * and return true. - */ -static int -cd9660_find_video_dir(struct iso_mnt *isomp) -{ - int result, err; - struct vnode *rvp = NULL; - struct vnode *videovp = NULL; - struct componentname cn; - char dirname[] = "MPEGAV"; - - result = 0; /* Assume not a video CD */ - - err = cd9660_root(isomp->im_mountp, &rvp, NULL); - if (err) { - printf("cd9660_find_video_dir: cd9660_root failed (%d)\n", err); - return 0; /* couldn't find video dir */ - } - - cn.cn_nameiop = LOOKUP; - cn.cn_flags = ISLASTCN; - cn.cn_context = vfs_context_current(); - cn.cn_pnbuf = dirname; - cn.cn_pnlen = sizeof(dirname)-1; - cn.cn_nameptr = cn.cn_pnbuf; - cn.cn_namelen = cn.cn_pnlen; - - err = VNOP_LOOKUP(rvp, &videovp, &cn, cn.cn_context); - if (err == 0) { - struct iso_node *ip = VTOI(videovp); - result = 1; /* Looks like video CD */ - isomp->video_dir_start = ip->iso_start; - isomp->video_dir_end = ip->iso_start + (ip->i_size >> isomp->im_bshift); - isomp->im_flags2 |= IMF2_IS_VCD; - - vnode_put(videovp); - } - vnode_put(rvp); - - return result; -} - -/* - * Common code for mount and mountroot - */ -static int -iso_mountfs(struct vnode *devvp, struct mount *mp, struct user_iso_args *argp, - vfs_context_t context) -{ - struct iso_mnt *isomp = (struct iso_mnt *)0; - struct buf *bp = NULL; - struct buf *pribp = NULL, *supbp = NULL; - dev_t dev = vnode_specrdev(devvp); - int error = EINVAL; - int breaderr = 0; - u_long iso_bsize, orig_bsize; - int iso_blknum; - int joliet_level; - struct iso_volume_descriptor *vdp = NULL; - struct iso_primary_descriptor *pri = NULL; - struct iso_primary_descriptor *sup = NULL; - struct iso_directory_record *rootp; - int logical_block_size; - u_int8_t vdtype; - int blkoff = argp->ssector; - - if (vfs_isrdwr(mp)) - return (EROFS); - - /* Advisory locking should be handled at the VFS layer */ - vfs_setlocklocal(mp); - - /* Finish initializing hash tables */ - cd9660_hashinit(); - - if ((error = VNOP_IOCTL(devvp, DKIOCGETBLOCKSIZE, - (caddr_t)&orig_bsize, 0, context))) - return (error); - - /* This is the "logical sector size". The standard says this - * should be 2048 or the physical sector size on the device, - * whichever is greater. For now, we'll just use a constant. - */ - iso_bsize = ISO_DEFAULT_BLOCK_SIZE; - - /* tell IOKit that we're assuming 2K sectors */ - if ((error = VNOP_IOCTL(devvp, DKIOCSETBLOCKSIZE, - (caddr_t)&iso_bsize, FWRITE, context))) - return (error); - - joliet_level = 0; - for (iso_blknum = 16 + blkoff; iso_blknum < (100 + blkoff); iso_blknum++) { - if ((error = (int)buf_bread(devvp, (daddr64_t)((unsigned)iso_blknum), iso_bsize, NOCRED, &bp))) { - if (bp) { - buf_markaged(bp); - buf_brelse(bp); - bp = NULL; - } - breaderr = error; - printf("iso_mountfs: buf_bread error %d reading block %d\n", error, iso_blknum); - continue; - } - - vdp = (struct iso_volume_descriptor *)((char *)0 + buf_dataptr(bp)); - if (bcmp (vdp->volume_desc_id, ISO_STANDARD_ID, sizeof(vdp->volume_desc_id)) != 0) { -#ifdef DEBUG - printf("cd9660_vfsops.c: iso_mountfs: " - "Invalid ID in volume desciptor.\n"); -#endif - /* There should be a primary volume descriptor followed by any - * secondary volume descriptors, then an end volume descriptor. - * Some discs are mastered without an end volume descriptor or - * they have the type field set and the volume descriptor ID is - * not set. If we at least found a primary volume descriptor, - * mount the disc. - */ - if (pri != NULL) - break; - - error = EINVAL; - goto out; - } - - vdtype = isonum_711 (vdp->type); - if (vdtype == ISO_VD_END) - break; - - if (vdtype == ISO_VD_PRIMARY) { - if (pribp == NULL) { - pribp = bp; - bp = NULL; - pri = (struct iso_primary_descriptor *)vdp; - } - } else if(vdtype == ISO_VD_SUPPLEMENTARY) { - if (supbp == NULL) { - supbp = bp; - bp = NULL; - sup = (struct iso_primary_descriptor *)vdp; - - if ((argp->flags & ISOFSMNT_NOJOLIET) == 0) { - /* - * some Joliet CDs are "out-of-spec and don't correctly - * set the SVD flags. We ignore the flags and rely soely - * on the escape_seq - */ - if (bcmp(sup->escape_seq, ISO_UCS2_Level_1, 3) == 0) - joliet_level = 1; - else if (bcmp(sup->escape_seq, ISO_UCS2_Level_2, 3) == 0) - joliet_level = 2; - else if (bcmp(sup->escape_seq, ISO_UCS2_Level_3, 3) == 0) - joliet_level = 3; - } - } - } - - if (bp) { - buf_markaged(bp); - buf_brelse(bp); - bp = NULL; - } - } - - if (bp) { - buf_markaged(bp); - buf_brelse(bp); - bp = NULL; - } - - if (pri == NULL) { - if (breaderr) - error = breaderr; - else - error = EINVAL; - goto out; - } - - logical_block_size = isonum_723 (pri->logical_block_size); - - if (logical_block_size < DEV_BSIZE || logical_block_size > MAXBSIZE - || (logical_block_size & (logical_block_size - 1)) != 0) { - error = EINVAL; - goto out; - } - - rootp = (struct iso_directory_record *)pri->root_directory_record; - - MALLOC(isomp, struct iso_mnt *, sizeof(*isomp), M_ISOFSMNT, M_WAITOK); - bzero((caddr_t)isomp, sizeof(*isomp)); - isomp->im_sector_size = ISO_DEFAULT_BLOCK_SIZE; - isomp->logical_block_size = logical_block_size; - isomp->volume_space_size = isonum_733 (pri->volume_space_size); - /* - * Since an ISO9660 multi-session CD can also access previous - * sessions, we have to include them into the space consider- - * ations. This doesn't yield a very accurate number since - * parts of the old sessions might be inaccessible now, but we - * can't do much better. This is also important for the NFS - * filehandle validation. - */ - isomp->volume_space_size += blkoff; - bcopy (rootp, isomp->root, sizeof(isomp->root)); - isomp->root_extent = isonum_733 (rootp->extent); - isomp->root_size = isonum_733 (rootp->size); - - /* - * getattrlist wants the volume name, create date and modify date - */ - - /* Remove any trailing white space */ - if ( strlen(pri->volume_id) ) { - char *myPtr; - - myPtr = pri->volume_id + strlen( pri->volume_id ) - 1; - while ( *myPtr == ' ' && myPtr >= pri->volume_id ) { - *myPtr = 0x00; - myPtr--; - } - } - - if (pri->volume_id[0] == 0) - strlcpy(isomp->volume_id, ISO_DFLT_VOLUME_ID, sizeof(isomp->volume_id)); - else - bcopy(pri->volume_id, isomp->volume_id, sizeof(isomp->volume_id)); - cd9660_tstamp_conv17(pri->creation_date, &isomp->creation_date); - cd9660_tstamp_conv17(pri->modification_date, &isomp->modification_date); - - /* See if this is a CD-XA volume */ - if (bcmp( pri->CDXASignature, ISO_XA_ID, - sizeof(pri->CDXASignature) ) == 0 ) { - isomp->im_flags2 |= IMF2_IS_CDXA; - } - - isomp->im_bmask = logical_block_size - 1; - isomp->im_bshift = 0; - while ((1 << isomp->im_bshift) < isomp->logical_block_size) - isomp->im_bshift++; - - buf_markaged(pribp); - buf_brelse(pribp); - pribp = NULL; - - vfs_setfsprivate(mp, (void *)isomp); - vfs_statfs(mp)->f_fsid.val[0] = (long)dev; - vfs_statfs(mp)->f_fsid.val[1] = vfs_typenum(mp); - vfs_setmaxsymlen(mp, 0); - vfs_setflags(mp, MNT_LOCAL); - - isomp->im_mountp = mp; - isomp->im_dev = dev; - isomp->im_devvp = devvp; - - /* - * If the logical block size is not 2K then we must - * set the block device's physical block size to this - * disc's logical block size. - * - */ - if (logical_block_size != iso_bsize) { - iso_bsize = logical_block_size; - if ((error = VNOP_IOCTL(devvp, DKIOCSETBLOCKSIZE, - (caddr_t)&iso_bsize, FWRITE, context))) - goto out; - } - - /* Check the Rock Ridge Extention support */ - if (!(argp->flags & ISOFSMNT_NORRIP)) { - if ( (error = (int)buf_bread(isomp->im_devvp, - (daddr64_t)((unsigned)((isomp->root_extent + isonum_711(rootp->ext_attr_length)))), - isomp->logical_block_size, NOCRED, &bp)) ) { - - printf("iso_mountfs: buf_bread error %d reading block %d\n", - error, isomp->root_extent + isonum_711(rootp->ext_attr_length)); - argp->flags |= ISOFSMNT_NORRIP; - goto skipRRIP; - } - rootp = (struct iso_directory_record *)((char *)0 + buf_dataptr(bp)); - - if ((isomp->rr_skip = cd9660_rrip_offset(rootp,isomp)) < 0) { - argp->flags |= ISOFSMNT_NORRIP; - } else { - argp->flags &= ~ISOFSMNT_GENS; - } - - /* - * The contents are valid, - * but they will get reread as part of another vnode, so... - */ - buf_markaged(bp); - buf_brelse(bp); - bp = NULL; - } -skipRRIP: - - isomp->im_flags = argp->flags & (ISOFSMNT_NORRIP | ISOFSMNT_GENS | - ISOFSMNT_EXTATT | ISOFSMNT_NOJOLIET); - - switch (isomp->im_flags&(ISOFSMNT_NORRIP|ISOFSMNT_GENS)) { - default: - isomp->iso_ftype = ISO_FTYPE_DEFAULT; - break; - case ISOFSMNT_GENS|ISOFSMNT_NORRIP: - isomp->iso_ftype = ISO_FTYPE_9660; - break; - case 0: - isomp->iso_ftype = ISO_FTYPE_RRIP; - break; - } - - /* Decide whether to use the Joliet descriptor */ - - if (isomp->iso_ftype != ISO_FTYPE_RRIP && joliet_level != 0) { - char vol_id[32]; - int i, convflags; - size_t convbytes; - u_int16_t *uchp; - - /* - * On Joliet CDs use the UCS-2 volume identifier. - * - * This name can have up to 16 UCS-2 chars. - */ - convflags = UTF_DECOMPOSED | UTF_BIG_ENDIAN; - uchp = (u_int16_t *)sup->volume_id; - for (i = 0; i < 16 && uchp[i]; ++i); - if ((utf8_encodestr((u_int16_t *)sup->volume_id, (i * 2), vol_id, - &convbytes, sizeof(vol_id), 0, convflags) == 0) - && convbytes && (vol_id[0] != ' ')) { - char * strp; - - /* Remove trailing spaces */ - strp = vol_id + convbytes - 1; - while (strp > vol_id && *strp == ' ') - *strp-- = '\0'; - bcopy(vol_id, isomp->volume_id, convbytes + 1); - } - - rootp = (struct iso_directory_record *) - sup->root_directory_record; - bcopy (rootp, isomp->root, sizeof(isomp->root)); - isomp->root_extent = isonum_733 (rootp->extent); - isomp->root_size = isonum_733 (rootp->size); - buf_markaged(supbp); - isomp->iso_ftype = ISO_FTYPE_JOLIET; - } - - if (supbp) { - buf_brelse(supbp); - supbp = NULL; - } - - /* If there was a TOC in the arguments, copy it in. */ - if (argp->flags & ISOFSMNT_TOC) { - MALLOC(isomp->toc, struct CDTOC *, argp->toc_length, M_ISOFSMNT, M_WAITOK); - if ((error = copyin(argp->toc, isomp->toc, argp->toc_length))) - goto out; - } - - /* See if this could be a Video CD */ - if ((isomp->im_flags2 & IMF2_IS_CDXA) && cd9660_find_video_dir(isomp)) { - /* Get the 2352-bytes-per-block device. */ - isomp->phys_devvp = cd9660_phys_device(mp, context); - } - - /* Fill the default statfs information */ - (void) cd9660_statfs(mp, vfs_statfs(mp), context); - - return (0); -out: - if (orig_bsize != iso_bsize) { - (void)VNOP_IOCTL(devvp, DKIOCSETBLOCKSIZE, - (caddr_t)&orig_bsize, FWRITE, context); - } - - if (bp) - buf_brelse(bp); - if (pribp) - buf_brelse(pribp); - if (supbp) - buf_brelse(supbp); - - if (isomp) { - if (isomp->toc) - FREE((caddr_t)isomp->toc, M_ISOFSMNT); - FREE((caddr_t)isomp, M_ISOFSMNT); - - vfs_setfsprivate(mp, (void *)0); - } - return (error); -} - -/* - * Make a filesystem operational. - * Nothing to do at the moment. - */ -/* ARGSUSED */ -int -cd9660_start(__unused struct mount *mp, __unused int flags, - __unused vfs_context_t context) -{ - return (0); -} - -/* - * unmount system call - */ -int -cd9660_unmount(struct mount *mp, int mntflags, vfs_context_t context) -{ - struct iso_mnt *isomp; - int error, flags = 0; - int force = 0; - - if ( (mntflags & MNT_FORCE) ) { - flags |= FORCECLOSE; - force = 1; - } - - if ( (error = vflush(mp, NULLVP, flags)) && !force ) - return (error); - - isomp = VFSTOISOFS(mp); - -#ifdef ISODEVMAP - if (isomp->iso_ftype == ISO_FTYPE_RRIP) - iso_dunmap(isomp->im_dev); -#endif - if (isomp->phys_devvp) { - error = VNOP_CLOSE(isomp->phys_devvp, FREAD, context); - if (error && !force) - return error; - vnode_put(isomp->phys_devvp); - } - - if (isomp->toc) - FREE((caddr_t)isomp->toc, M_ISOFSMNT); - FREE((caddr_t)isomp, M_ISOFSMNT); - - return (0); -} - -/* - * Return root of a filesystem - */ -int -cd9660_root(struct mount *mp, struct vnode **vpp, __unused vfs_context_t context) -{ - struct iso_mnt *imp = VFSTOISOFS(mp); - struct iso_directory_record *dp = - (struct iso_directory_record *)imp->root; - ino_t ino = isodirino(dp, imp); - - /* - * With RRIP we must use the `.' entry of the root directory. - * Simply tell vget, that it's a relocated directory. - */ - return (cd9660_vget_internal(mp, ino, vpp, NULL, NULL, - imp->iso_ftype == ISO_FTYPE_RRIP, dp, current_proc())); -} - -/* - * Get file system statistics. - */ -/* ARGSUSED */ -int -cd9660_statfs(struct mount *mp, struct vfsstatfs *sbp, - __unused vfs_context_t context) -{ - struct iso_mnt *isomp; - - isomp = VFSTOISOFS(mp); - -#if 0 -#ifdef COMPAT_09 - sbp->f_type = 5; -#else - sbp->f_type = 0; -#endif -#endif - sbp->f_bsize = (uint32_t)isomp->logical_block_size; - sbp->f_iosize = (size_t)sbp->f_bsize; /* XXX */ - sbp->f_blocks = (uint64_t)((unsigned long)isomp->volume_space_size); - sbp->f_bfree = (uint64_t)0; /* total free blocks */ - sbp->f_bavail = (uint64_t)0; /* blocks free for non superuser */ - sbp->f_files = (uint64_t)0; /* total files */ - sbp->f_ffree = (uint64_t)0; /* free file nodes */ - sbp->f_fstypename[(MFSTYPENAMELEN - 1)] = '\0'; - - /* - * Subtypes (flavors) for ISO 9660 - * 0: ISO-9660 - * 1: ISO-9660 (Joliet) - * 2: ISO-9660 (Rockridge) - */ - if (isomp->iso_ftype == ISO_FTYPE_JOLIET) - sbp->f_fssubtype = 1; - else if (isomp->iso_ftype == ISO_FTYPE_RRIP) - sbp->f_fssubtype = 2; - else - sbp->f_fssubtype = 0; - - /* DO NOT use the first spare for flags; it's been reassigned for another use: */ - /* sbp->f_spare[0] = isomp->im_flags; */ - - return (0); -} - -int cd9660_vfs_getattr(struct mount *mp, struct vfs_attr *fsap, __unused vfs_context_t context) -{ - struct iso_mnt *imp; - struct vfsstatfs *stats = vfs_statfs(mp); - - imp = VFSTOISOFS(mp); - - /* - * We don't know reasonable values for f_objcount, f_filecount, - * f_dircount, f_maxobjcount so don't bother making up (poor) - * numbers like 10.3.x and earlier did. - */ - - VFSATTR_RETURN(fsap, f_iosize, stats->f_iosize); - VFSATTR_RETURN(fsap, f_blocks, stats->f_blocks); - VFSATTR_RETURN(fsap, f_bfree, stats->f_bfree); - VFSATTR_RETURN(fsap, f_bavail, stats->f_bavail); - VFSATTR_RETURN(fsap, f_bused, stats->f_blocks); - - /* We don't have file counts, so don't return them */ - - /* f_fsid and f_owner should be handled by VFS */ - - /* We don't have a value for f_uuid */ - - if (VFSATTR_IS_ACTIVE(fsap, f_capabilities)) { - fsap->f_capabilities.capabilities[VOL_CAPABILITIES_FORMAT] = - (imp->iso_ftype == ISO_FTYPE_RRIP ? VOL_CAP_FMT_SYMBOLICLINKS : 0) | - (imp->iso_ftype == ISO_FTYPE_RRIP ? VOL_CAP_FMT_HARDLINKS : 0) | - (imp->iso_ftype == ISO_FTYPE_RRIP || imp->iso_ftype == ISO_FTYPE_JOLIET - ? VOL_CAP_FMT_CASE_SENSITIVE : 0) | - VOL_CAP_FMT_CASE_PRESERVING | - VOL_CAP_FMT_FAST_STATFS; - fsap->f_capabilities.capabilities[VOL_CAPABILITIES_INTERFACES] = - VOL_CAP_INT_ATTRLIST | - VOL_CAP_INT_NFSEXPORT; - fsap->f_capabilities.capabilities[VOL_CAPABILITIES_RESERVED1] = 0; - fsap->f_capabilities.capabilities[VOL_CAPABILITIES_RESERVED2] = 0; - - fsap->f_capabilities.valid[VOL_CAPABILITIES_FORMAT] = - VOL_CAP_FMT_PERSISTENTOBJECTIDS | - VOL_CAP_FMT_SYMBOLICLINKS | - VOL_CAP_FMT_HARDLINKS | - VOL_CAP_FMT_JOURNAL | - VOL_CAP_FMT_JOURNAL_ACTIVE | - VOL_CAP_FMT_NO_ROOT_TIMES | - VOL_CAP_FMT_SPARSE_FILES | - VOL_CAP_FMT_ZERO_RUNS | - VOL_CAP_FMT_CASE_SENSITIVE | - VOL_CAP_FMT_CASE_PRESERVING | - VOL_CAP_FMT_FAST_STATFS | - VOL_CAP_FMT_2TB_FILESIZE; - fsap->f_capabilities.valid[VOL_CAPABILITIES_INTERFACES] = - VOL_CAP_INT_SEARCHFS | - VOL_CAP_INT_ATTRLIST | - VOL_CAP_INT_NFSEXPORT | - VOL_CAP_INT_ALLOCATE | - VOL_CAP_INT_ADVLOCK | - VOL_CAP_INT_FLOCK; - fsap->f_capabilities.valid[VOL_CAPABILITIES_RESERVED1] = 0; - fsap->f_capabilities.valid[VOL_CAPABILITIES_RESERVED2] = 0; - - VFSATTR_SET_SUPPORTED(fsap, f_capabilities); - } - - if (VFSATTR_IS_ACTIVE(fsap, f_attributes)) { - /* - * VFS should really set these based on the vfs_attr and vnop_attr - * fields the file system supports, combined with the conversions - * VFS has implemented. - */ - -#define ISOFS_ATTR_CMN_VALIDMASK (ATTR_CMN_VALIDMASK & ~(ATTR_CMN_PAROBJID | ATTR_CMN_CRTIME | ATTR_CMN_BKUPTIME | ATTR_CMN_PARENTID)) -#define ISOFS_ATTR_VOL_VALIDMASK (ATTR_VOL_VALIDMASK & ~(ATTR_VOL_OBJCOUNT | ATTR_VOL_FILECOUNT | ATTR_VOL_DIRCOUNT | ATTR_VOL_MAXOBJCOUNT | ATTR_VOL_NAME)) -#define ISOFS_ATTR_DIR_VALIDMASK (ATTR_DIR_VALIDMASK & ~(ATTR_DIR_ENTRYCOUNT)) - - fsap->f_attributes.validattr.commonattr = ISOFS_ATTR_CMN_VALIDMASK; - fsap->f_attributes.validattr.volattr = ISOFS_ATTR_VOL_VALIDMASK; - fsap->f_attributes.validattr.dirattr = ISOFS_ATTR_DIR_VALIDMASK; - fsap->f_attributes.validattr.fileattr = ATTR_FILE_VALIDMASK; - fsap->f_attributes.validattr.forkattr = ATTR_FORK_VALIDMASK; - - fsap->f_attributes.nativeattr.commonattr = ISOFS_ATTR_CMN_VALIDMASK; - fsap->f_attributes.nativeattr.volattr = ISOFS_ATTR_VOL_VALIDMASK; - fsap->f_attributes.nativeattr.dirattr = ISOFS_ATTR_DIR_VALIDMASK; - fsap->f_attributes.nativeattr.fileattr = ATTR_FILE_VALIDMASK; - fsap->f_attributes.nativeattr.forkattr = ATTR_FORK_VALIDMASK; - - VFSATTR_SET_SUPPORTED(fsap, f_attributes); - } - - VFSATTR_RETURN(fsap, f_create_time, imp->creation_date); - VFSATTR_RETURN(fsap, f_modify_time, imp->modification_date); - /* No explicit access time, so let VFS pick a default value */ - /* No explicit backup time, so let VFS pick a default value */ - - return 0; -} - -/* ARGSUSED */ -int -cd9660_sync(__unused struct mount *mp, __unused int waitfor, - __unused vfs_context_t context) -{ - - return (0); -} - -/* - * File handle to vnode - * - * Have to be really careful about stale file handles: - * - check that the inode number is in range - * - call iget() to get the locked inode - * - check for an unallocated inode (i_mode == 0) - * - check that the generation number matches - */ - -struct ifid { - int ifid_ino; - long ifid_start; -}; - -/* ARGSUSED */ -int -cd9660_fhtovp(mount_t mp, int fhlen, unsigned char *fhp, vnode_t *vpp, vfs_context_t context) -{ - struct ifid *ifhp = (struct ifid *)fhp; - struct iso_node *ip; - struct vnode *nvp; - int error; - - if (fhlen < (int)sizeof(struct ifid)) - return (EINVAL); - -#ifdef ISOFS_DBG - printf("fhtovp: ino %d, start %ld\n", - ifhp->ifid_ino, ifhp->ifid_start); -#endif - - if ( (error = VFS_VGET(mp, (ino64_t)ntohl(ifhp->ifid_ino), &nvp, context)) ) { - *vpp = NULLVP; - return (error); - } - ip = VTOI(nvp); - if (ip->inode.iso_mode == 0) { - vnode_put(nvp); - *vpp = NULLVP; - return (ESTALE); - } - *vpp = nvp; - return (0); -} - -/* - * Scan the TOC for the track which contains the given sector. - * - * If there is no matching track, or no TOC, then return -1. - */ -static int -cd9660_track_for_sector(struct CDTOC *toc, u_int sector) -{ - int i, tracks, result; - - if (toc == NULL) - return -1; - - tracks = toc->length / sizeof(struct CDTOC_Desc); - - result = -1; /* Sentinel in case we don't find the right track. */ - for (i=0; itrackdesc[i].point < 100 && MSF_TO_LBA(toc->trackdesc[i].p) <= sector) { - result = toc->trackdesc[i].point; - } - } - - return result; -} - -/* - * Determine whether the given node is really a video CD video - * file. Return non-zero if it appears to be a video file. - */ -static int -cd9660_is_video_file(struct iso_node *ip, struct iso_mnt *imp) -{ - int lbn; - int track; - - /* Check whether this could really be a Video CD at all */ - if (((imp->im_flags2 & IMF2_IS_VCD) == 0) || - imp->phys_devvp == NULL || - imp->toc == NULL) - { - return 0; /* Doesn't even look like VCD... */ - } - - /* Make sure it is a file */ - if ((ip->inode.iso_mode & S_IFMT) != S_IFREG) - return 0; /* Not even a file... */ - - /* - * And in the right directory. This assumes the same inode - * number convention that cd9660_vget_internal uses (that - * part of the inode number is the block containing the - * file's directory entry). - */ - lbn = lblkno(imp, ip->i_number); - if (lbn < imp->video_dir_start || lbn >= imp->video_dir_end) - return 0; /* Not in the correct directory */ - - /* - * If we get here, the file should be a video file, but - * do a couple of extra sanity checks just to be sure. - * First, verify the form of the name - */ - if (strlen(ip->i_namep) != 11 || /* Wrong length? */ - bcmp(ip->i_namep+7, ".DAT", 4) || /* Wrong extension? */ - (bcmp(ip->i_namep, "AVSEQ", 5) && /* Wrong beginning? */ - bcmp(ip->i_namep, "MUSIC", 5))) - { - return 0; /* Invalid name format */ - } - - /* - * Verify that AVSEQnn.DAT is in track #(nn+1). This would - * not be appropriate for Super Video CD, which allows - * multiple sessions, so the track numbers might not - * match up like this. - */ - track = (ip->i_namep[5] - '0') * 10 + ip->i_namep[6] - '0'; - if (track != (cd9660_track_for_sector(imp->toc, ip->iso_start) - 1)) - { - return 0; /* Wrong number in name */ - } - - /* It must be a video file if we got here. */ - return 1; -} - -int -cd9660_vget(struct mount *mp, ino64_t ino, struct vnode **vpp, __unused vfs_context_t context) -{ - /* - * XXXX - * It would be nice if we didn't always set the `relocated' flag - * and force the extra read, but I don't want to think about fixing - * that right now. - */ - - return ( cd9660_vget_internal( mp, (ino_t)ino, vpp, NULL, NULL, - 0, (struct iso_directory_record *) 0, current_proc()) ); -} - -int -cd9660_vget_internal(mount_t mp, ino_t ino, vnode_t *vpp, vnode_t dvp, - struct componentname *cnp, int relocated, - struct iso_directory_record *isodir, proc_t p) -{ - struct iso_mnt *imp; - struct iso_node *ip; - buf_t bp = NULL; - vnode_t vp; - dev_t dev; - int error; - struct vnode_fsparam vfsp; - enum vtype vtype; - int is_video_file = 0; - - *vpp = NULLVP; - imp = VFSTOISOFS(mp); - dev = imp->im_dev; -#if 0 - /* Check for unmount in progress */ - if (mp->mnt_kern_flag & MNTK_UNMOUNT) - return (EPERM); -#endif - - MALLOC_ZONE(ip, struct iso_node *, sizeof(struct iso_node), - M_ISOFSNODE, M_WAITOK); - /* - * MALLOC_ZONE may block, so check for the inode being - * present in the hash after we get back... - * we also assume that we're under a filesystem lock - * so that we're not reentered between the ihashget and - * the ihashins... - */ - if ((*vpp = cd9660_ihashget(dev, ino, p)) != NULLVP) { - FREE_ZONE(ip, sizeof(struct iso_node), M_ISOFSNODE); - return (0); - } - bzero((caddr_t)ip, sizeof(struct iso_node)); - - ip->i_dev = dev; - ip->i_number = ino; - ip->i_namep = &isonullname[0]; - ip->i_mnt = imp; - ip->i_devvp = imp->im_devvp; - - SET(ip->i_flag, ISO_INALLOC); - /* - * Put it onto its hash chain and lock it so that other requests for - * this inode will block if they arrive while we are sleeping waiting - * for old data structures to be purged or for the contents of the - * disk portion of this inode to be read. - */ - cd9660_ihashins(ip); - - if (isodir == 0) { - int lbn, off; - - lbn = lblkno(imp, ino); - - if (lbn >= imp->volume_space_size) { - printf("fhtovp: lbn exceed volume space %d\n", lbn); - error = ESTALE; - goto errout; - } - off = blkoff(imp, ino); - - if (off + ISO_DIRECTORY_RECORD_SIZE > imp->logical_block_size) { - printf("fhtovp: crosses block boundary %d\n", - off + ISO_DIRECTORY_RECORD_SIZE); - error = ESTALE; - goto errout; - } - - error = (int)buf_bread(imp->im_devvp, (daddr64_t)((unsigned)lbn), - imp->logical_block_size, NOCRED, &bp); - if (error) { - printf("fhtovp: buf_bread error %d\n",error); - goto errout; - } - isodir = (struct iso_directory_record *)(buf_dataptr(bp) + off); - - if (off + isonum_711(isodir->length) > imp->logical_block_size) { - printf("fhtovp: directory crosses block boundary " - "%d[off=%d/len=%d]\n", - off +isonum_711(isodir->length), off, - isonum_711(isodir->length)); - error = ESTALE; - goto errout; - } - - /* - * for directories we can get parentID from adjacent - * parent directory record - */ - if ((isonum_711(isodir->flags) & directoryBit) - && (isodir->name[0] == 0)) { - struct iso_directory_record *pdp; - - pdp = (struct iso_directory_record *) - ((char *)0 + buf_dataptr(bp) + isonum_711(isodir->length)); - if ((isonum_711(pdp->flags) & directoryBit) - && (pdp->name[0] == 1)) - ip->i_parent = isodirino(pdp, imp); - } - } - if (relocated) { - daddr64_t lbn; - - if (bp) { - buf_brelse(bp); - bp = NULL; - } - /* - * On relocated directories we must - * read the `.' entry out of a dir. - */ - ip->iso_start = ino >> imp->im_bshift; - /* - * caclulate the correct lbn to read block 0 - * of this node... this used to be a cd9660_blkatoff, but - * that requires the vnode to already be 'cooked'... in - * the new world, we don't create a vnode until the inode - * has been fully initialized... cd9660_blkatoff generates - * a buf_bread for im_sector_size associated with the node's vp - * I'm replacing it with a buf_bread for the same size and from - * the same location on the disk, but associated with the devvp - */ - lbn = (daddr64_t)((unsigned)ip->iso_start) + 0; - - if ((error = (int)buf_bread(imp->im_devvp, lbn, imp->im_sector_size, NOCRED, &bp))) - goto errout; - - isodir = (struct iso_directory_record *)((char *)0 + buf_dataptr(bp)); - } - - /* - * go get apple extensions to ISO directory record or use - * defaults when there are no apple extensions. - */ - if ( ((isonum_711( isodir->flags ) & directoryBit) == 0) && - (imp->iso_ftype != ISO_FTYPE_RRIP) ) { - /* This is an ISO directory record for a file */ - DRGetTypeCreatorAndFlags(imp, isodir, &ip->i_FileType, - &ip->i_Creator, &ip->i_FinderFlags); - - if (isonum_711(isodir->flags) & associatedBit) - ip->i_flag |= ISO_ASSOCIATED; - } - - /* - * Shadow the ISO 9660 invisible state to the FinderInfo - */ - if (isonum_711(isodir->flags) & existenceBit) { - ip->i_FinderFlags |= fInvisibleBit; - } - - ip->iso_extent = isonum_733(isodir->extent); - ip->i_size = isonum_733(isodir->size); - ip->iso_start = isonum_711(isodir->ext_attr_length) + ip->iso_extent; - /* - * account for AppleDouble header - */ - if (ip->i_flag & ISO_ASSOCIATED) - ip->i_size += ADH_SIZE; - - /* - * if we have a valid name, fill in i_namep with UTF-8 name - */ - if (isonum_711(isodir->name_len) != 0) { - u_char *utf8namep; - u_short namelen; - ino_t inump = 0; - - MALLOC(utf8namep, u_char *, ISO_RRIP_NAMEMAX + 1, M_TEMP, M_WAITOK); - namelen = isonum_711(isodir->name_len); - - switch (imp->iso_ftype) { - case ISO_FTYPE_RRIP: - cd9660_rrip_getname(isodir, utf8namep, &namelen, &inump, imp); - break; - - case ISO_FTYPE_JOLIET: - ucsfntrans((u_int16_t *)isodir->name, namelen, - utf8namep, &namelen, - isonum_711(isodir->flags) & directoryBit, ip->i_flag & ISO_ASSOCIATED); - break; - - default: - isofntrans (isodir->name, namelen, - utf8namep, &namelen, - imp->iso_ftype == ISO_FTYPE_9660, ip->i_flag & ISO_ASSOCIATED); - } - - utf8namep[namelen] = '\0'; - MALLOC(ip->i_namep, u_char *, namelen + 1, M_TEMP, M_WAITOK); - bcopy(utf8namep, ip->i_namep, namelen + 1); - FREE(utf8namep, M_TEMP); - } - - /* - * Setup time stamp, attribute - */ - switch (imp->iso_ftype) { - default: /* ISO_FTYPE_9660 */ - { - buf_t bp2 = NULL; - daddr64_t lbn; - int off; - - if ((imp->im_flags & ISOFSMNT_EXTATT) && (off = isonum_711(isodir->ext_attr_length))) { - - lbn = (daddr64_t)((unsigned)ip->iso_start - off); - - if ((error = (int)buf_bread(imp->im_devvp, lbn, imp->im_sector_size, NOCRED, &bp2))) { - if (bp2) - buf_brelse(bp2); - goto errout; - } - } else - bp2 = NULL; - - cd9660_defattr(isodir, ip, bp2); - cd9660_deftstamp(isodir, ip, bp2); - - if (bp2) - buf_brelse(bp2); - break; - } - case ISO_FTYPE_RRIP: - cd9660_rrip_analyze(isodir, ip, imp); - break; - } - /* - * See if this is a Video CD file. If so, we must adjust the - * length to account for larger sectors plus the RIFF header. - * We also must substitute the vnop_read and vnop_pagein functions. - * - * The cd9660_is_video_file routine assumes that the inode has - * been completely set up; it refers to several fields. - * - * This must be done before we release bp, because isodir - * points into bp's data. - */ - if (cd9660_is_video_file(ip, imp)) - { - cd9660_xa_init(ip, isodir); - - is_video_file = 1; - } - if (ip->iso_extent == imp->root_extent) { - ip->i_parent = 1; /* root's parent is always 1 by convention */ - /* mode type must be S_IFDIR */ - ip->inode.iso_mode = (ip->inode.iso_mode & ~S_IFMT) | S_IFDIR; - } - vtype = IFTOVT(ip->inode.iso_mode); -#if !FIFO - if (vtype == VFIFO) { - error = ENOTSUP; - goto errout; - } -#endif -#ifdef ISODEVMAP - if (vtype == VCHR || vtype == VBLK) { - struct iso_dnode *dp; - - if (dp = iso_dmap(dev, ino, 0)) - ip->inode.iso_rdev = dp->d_dev; - } -#endif - /* - * create the associated vnode - */ - //bzero(&vfsp, sizeof(struct vnode_fsparam)); - vfsp.vnfs_mp = mp; - vfsp.vnfs_vtype = vtype; - vfsp.vnfs_str = "cd9660"; - vfsp.vnfs_dvp = dvp; - vfsp.vnfs_fsnode = ip; - vfsp.vnfs_cnp = cnp; - - if (is_video_file) - vfsp.vnfs_vops = cd9660_cdxaop_p; - else if (vtype == VFIFO ) - vfsp.vnfs_vops = cd9660_fifoop_p; - else if (vtype == VBLK || vtype == VCHR) - vfsp.vnfs_vops = cd9660_specop_p; - else - vfsp.vnfs_vops = cd9660_vnodeop_p; - - if (vtype == VBLK || vtype == VCHR) - vfsp.vnfs_rdev = ip->inode.iso_rdev; - else - vfsp.vnfs_rdev = 0; - - vfsp.vnfs_filesize = ip->i_size; - - if (dvp && cnp && (cnp->cn_flags & MAKEENTRY)) - vfsp.vnfs_flags = 0; - else - vfsp.vnfs_flags = VNFS_NOCACHE; - - /* Tag root directory */ - if (ip->iso_extent == imp->root_extent) - vfsp.vnfs_markroot = 1; - else - vfsp.vnfs_markroot = 0; - - vfsp.vnfs_marksystem = 0; - - if ( (error = vnode_create(VNCREATE_FLAVOR, VCREATESIZE, &vfsp, &vp)) ) - goto errout; - - ip->i_vnode = vp; - - vnode_ref(ip->i_devvp); - vnode_addfsref(vp); - vnode_settag(vp, VT_ISOFS); - - if (bp) - buf_brelse(bp); - *vpp = vp; - - CLR(ip->i_flag, ISO_INALLOC); - - if (ISSET(ip->i_flag, ISO_INWALLOC)) - wakeup(ip); - - return (0); - -errout: - if (bp) - buf_brelse(bp); - cd9660_ihashrem(ip); - - if (ISSET(ip->i_flag, ISO_INWALLOC)) - wakeup(ip); - - FREE_ZONE(ip, sizeof(struct iso_node), M_ISOFSNODE); - - return (error); -} - - -/************************************************************************ - * - * Function: DRGetTypeCreatorAndFlags - * - * Purpose: Set up the fileType, fileCreator and fileFlags - * - * Returns: none - * - * Side Effects: sets *theTypePtr, *theCreatorPtr, and *theFlagsPtr - * - * Description: - * - * Revision History: - * 28 Jul 88 BL�B Added a new extension type of 6, which allows - * the specification of four of the finder flags. - * We let the creator of the disk just copy over - * the finder flags, but we only look at always - * switch launch, system, bundle, and locked bits. - * 15 Aug 88 BL�B The Apple extensions to ISO 9660 implemented the - * padding field at the end of a directory record - * incorrectly. - * 19 Jul 89 BG Rewrote routine to handle the "new" Apple - * Extensions definition, as well as take into - * account the possibility of "other" definitions. - * 02 Nov 89 BG Corrected the 'AA' SystemUseID processing to - * check for SystemUseID == 2 (HFS). Was incorrectly - * checking for SystemUseID == 1 (ProDOS) before. - * 18 Mar 92 CMP Fixed the check for whether len_fi was odd or even. - * Before it would always assume even for an XA record. - * 26 Dec 97 jwc Swiped from MacOS implementation of ISO 9660 CD-ROM - * support and modified to work in MacOSX file system. - * - *********************************************************************** */ - -static void -DRGetTypeCreatorAndFlags( struct iso_mnt * theMountPointPtr, - struct iso_directory_record * theDirRecPtr, - u_int32_t * theTypePtr, - u_int32_t * theCreatorPtr, - u_int16_t * theFlagsPtr ) -{ - int foundStuff; - u_int32_t myType; - u_int32_t myCreator; - AppleExtension *myAppleExtPtr; - NewAppleExtension *myNewAppleExtPtr; - u_int16_t myFinderFlags; - char *myPtr; - - foundStuff = 1; - myType = 0x3f3f3f3f; - myCreator = 0x3f3f3f3f; - myFinderFlags = 0; - *theFlagsPtr = 0x0000; - - /* - * handle the fact that our original apple extensions didn't take - * into account the padding byte on a file name - */ - - myPtr = &theDirRecPtr->name[ (isonum_711(theDirRecPtr->name_len)) ]; - - /* if string length is even, bump myPtr for padding byte */ - if ( ((isonum_711(theDirRecPtr->name_len)) & 0x01) == 0 ) - myPtr++; - myAppleExtPtr = (AppleExtension *) myPtr; - - /* - * checking for whether or not the new 'AA' code is being - * called (and if so, correctly) - */ - if ( (isonum_711(theDirRecPtr->length)) <= - ISO_DIRECTORY_RECORD_SIZE + (isonum_711(theDirRecPtr->name_len)) ) { - foundStuff = 0; - goto DoneLooking; - } - - foundStuff = 0; /* now we default to *false* until we find a good one */ - myPtr = (char *) myAppleExtPtr; - - if ( (theMountPointPtr->im_flags2 & IMF2_IS_CDXA) != 0 ) - myPtr += 14;/* add in CD-XA fixed record offset (tnx, Phillips) */ - myNewAppleExtPtr = (NewAppleExtension *) myPtr; - - /* - * Calculate the "real" end of the directory record information. - * - * Note: We always read the first 4 bytes of the System-Use data, so - * adjust myPtr down so we don't read off the end of the directory! - */ - myPtr = ((char *) theDirRecPtr) + (isonum_711(theDirRecPtr->length)); - myPtr -= sizeof(NewAppleExtension) - 1; - while( (char *) myNewAppleExtPtr < myPtr ) /* end of directory buffer */ - { - /* - * If we get here, we can assume that ALL further entries in this - * directory record are of the form: - * - * struct OptionalSystemUse - * { - * byte Signature[2]; - * byte OSULength; - * byte systemUseID; - * byte fileType[4]; # only if HFS - * byte fileCreator[4]; # only if HFS - * byte finderFlags[2]; # only if HFS - * }; - * - * This means that we can examine the Signature bytes to see - * if they are 'AA' (the NEW Apple extension signature). - * If they are, deal with them. If they aren't, - * the OSULength field will tell us how long this extension - * info is (including the signature and length bytes) and that - * will allow us to walk the OptionalSystemUse records until - * we hit the end of them or run off the end of the - * directory record. - */ - u_char *myFromPtr, *myToPtr; - union - { - u_int32_t fourchars; - u_char chars[4]; - } myChars; - - if ( (myNewAppleExtPtr->signature[0] == 'A') && - (myNewAppleExtPtr->signature[1] == 'A') ) { - if ( isonum_711(myNewAppleExtPtr->systemUseID) == 2 ) { - /* HFS */ - foundStuff = 1; /* we got one! */ - - myFromPtr = &myNewAppleExtPtr->fileType[0]; - myToPtr = &myChars.chars[0]; - *myToPtr++ = *myFromPtr++; - *myToPtr++ = *myFromPtr++; - *myToPtr++ = *myFromPtr++; - *myToPtr = *myFromPtr; - myType = myChars.fourchars; /* copy file type to user var */ - - myFromPtr = &myNewAppleExtPtr->fileCreator[0]; - myToPtr = &myChars.chars[0]; - *myToPtr++ = *myFromPtr++; - *myToPtr++ = *myFromPtr++; - *myToPtr++ = *myFromPtr++; - *myToPtr = *myFromPtr; - myCreator = myChars.fourchars; /* copy creator to user var */ - - myFromPtr = &myNewAppleExtPtr->finderFlags[0]; - myToPtr = &myChars.chars[2]; /* *flags* is a short */ - myChars.fourchars = 0; - *myToPtr++ = *myFromPtr++; - *myToPtr = *myFromPtr; - myFinderFlags = myChars.fourchars; - myFinderFlags &= - ( fAlwaysBit | fSystemBit | fHasBundleBit | fLockedBit ); - /* return Finder flags to user var */ - *theFlagsPtr = (myFinderFlags | fInitedBit); - - break; /* exit the loop */ - } - } - - /* - * Check to see if we have a reasonable OSULength value. - * ZERO is not an acceptable value. Nor is any value less than 4. - */ - - if ( (isonum_711(myNewAppleExtPtr->OSULength)) < 4 ) - break; /* not acceptable - get out! */ - - /* otherwise, step past this SystemUse record */ - (char *)myNewAppleExtPtr += (isonum_711(myNewAppleExtPtr->OSULength)); - - } /* end of while loop */ - -DoneLooking: - if ( foundStuff != 0 ) { - *theTypePtr = myType; - *theCreatorPtr = myCreator; - } else { - *theTypePtr = 0; - *theCreatorPtr = 0; - } - - return; - -} /* DRGetTypeCreatorAndFlags */ - - -/* - * Vnode pointer to File handle - */ -/* ARGSUSED */ -int -cd9660_vptofh(struct vnode *vp, int *fhlenp, unsigned char *fhp, __unused vfs_context_t context) -{ - struct iso_node *ip = VTOI(vp); - struct ifid *ifhp; - - if (*fhlenp < (int)sizeof(struct ifid)) - return (EOVERFLOW); - - ifhp = (struct ifid *)fhp; - - ifhp->ifid_ino = htonl(ip->i_number); - ifhp->ifid_start = htonl(ip->iso_start); - *fhlenp = sizeof(struct ifid); - -#ifdef ISOFS_DBG - printf("vptofh: ino %d, start %ld\n", - ifhp->ifid_ino,ifhp->ifid_start); -#endif - return (0); -} - -/* - * Fast-FileSystem only? - */ -int -cd9660_sysctl(__unused int *name, __unused u_int namelen, __unused user_addr_t oldp, - __unused size_t *oldlenp, __unused user_addr_t newp, - __unused size_t newlen, __unused vfs_context_t context) -{ - return (ENOTSUP); -} - diff --git a/bsd/isofs/cd9660/cd9660_vnops.c b/bsd/isofs/cd9660/cd9660_vnops.c deleted file mode 100644 index a9eadab4f..000000000 --- a/bsd/isofs/cd9660/cd9660_vnops.c +++ /dev/null @@ -1,1320 +0,0 @@ -/* - * Copyright (c) 2000-2006 Apple Computer, Inc. All rights reserved. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ - * - * This file contains Original Code and/or Modifications of Original Code - * as defined in and that are subject to the Apple Public Source License - * Version 2.0 (the 'License'). You may not use this file except in - * compliance with the License. The rights granted to you under the License - * may not be used to create, or enable the creation or redistribution of, - * unlawful or unlicensed copies of an Apple operating system, or to - * circumvent, violate, or enable the circumvention or violation of, any - * terms of an Apple operating system software license agreement. - * - * Please obtain a copy of the License at - * http://www.opensource.apple.com/apsl/ and read it before using this file. - * - * The Original Code and all software distributed under the License are - * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER - * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, - * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. - * Please see the License for the specific language governing rights and - * limitations under the License. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ - */ -/* $NetBSD: cd9660_vnops.c,v 1.22 1994/12/27 19:05:12 mycroft Exp $ */ - -/*- - * Copyright (c) 1994 - * The Regents of the University of California. All rights reserved. - * - * This code is derived from software contributed to Berkeley - * by Pace Willisson (pace@blitz.com). The Rock Ridge Extension - * Support code is derived from software contributed to Berkeley - * by Atsushi Murai (amurai@spec.co.jp). - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. All advertising materials mentioning features or use of this software - * must display the following acknowledgement: - * This product includes software developed by the University of - * California, Berkeley and its contributors. - * 4. Neither the name of the University nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - * @(#)cd9660_vnops.c 8.15 (Berkeley) 12/5/94 - * - * HISTORY - * 02-Feb-00 chw Add cd9660_copyfile to return error - * 29-Sep-98 djb Add cd9660_getattrlist VOP for VDI support. - * 15-sep-98 added cd9660_rmdir to do proper unlocking - chw - * 12-aug-98 added cd9660_remove which will do proper unlocking - chw - * 17-Feb-98 radar 1669467 - changed lock protocols to use the lock manager - chw - * 22-Jan-98 radar 1669467 - ISO 9660 CD support - jwc - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include /* kmem_alloc, kmem_free */ - -#include -#include -#include - -/* - * Open called. - * - * Nothing to do. - */ -int -cd9660_open(__unused struct vnop_open_args *ap) -{ - return (0); -} - -/* - * Close called - * - * Update the times on the inode on writeable file systems. - */ -int -cd9660_close(__unused struct vnop_close_args *ap) -{ - return (0); -} - -int -cd9660_getattr(struct vnop_getattr_args *ap) -{ - struct vnode *vp = ap->a_vp; - register struct vnode_attr *vap = ap->a_vap; - register struct iso_node *ip = VTOI(vp); - - VATTR_RETURN(vap, va_fsid, ip->i_dev); - VATTR_RETURN(vap, va_fileid, ip->i_number); - - VATTR_RETURN(vap, va_mode, ip->inode.iso_mode); - VATTR_RETURN(vap, va_nlink, ip->inode.iso_links); - VATTR_RETURN(vap, va_uid, ip->inode.iso_uid); - VATTR_RETURN(vap, va_gid, ip->inode.iso_gid); - VATTR_RETURN(vap, va_access_time, ip->inode.iso_atime); - VATTR_RETURN(vap, va_modify_time, ip->inode.iso_mtime); - VATTR_RETURN(vap, va_change_time, ip->inode.iso_ctime); - VATTR_RETURN(vap, va_rdev, ip->inode.iso_rdev); - - VATTR_RETURN(vap, va_data_size, (off_t)ip->i_size); - if (ip->i_size == 0 && (vap->va_mode & S_IFMT) == S_IFLNK) { - struct vnop_readlink_args rdlnk; - uio_t auio; - char uio_buf[ UIO_SIZEOF(1) ]; - char *cp; - - MALLOC(cp, char *, MAXPATHLEN, M_TEMP, M_WAITOK); - auio = uio_createwithbuffer(1, 0, UIO_SYSSPACE, UIO_READ, - &uio_buf[0], sizeof(uio_buf)); - uio_addiov(auio, CAST_USER_ADDR_T(cp), MAXPATHLEN); - - rdlnk.a_uio = auio; - rdlnk.a_vp = ap->a_vp; - rdlnk.a_context = ap->a_context; - if (cd9660_readlink(&rdlnk) == 0) - // LP64todo - fix this! - VATTR_RETURN(vap, va_data_size, MAXPATHLEN - uio_resid(auio)); - FREE(cp, M_TEMP); - } - VATTR_RETURN(vap, va_flags, 0); - VATTR_RETURN(vap, va_gen, 1); - VATTR_RETURN(vap, va_iosize, ip->i_mnt->logical_block_size); - VATTR_RETURN(vap, va_total_size, ip->i_size + ip->i_rsrcsize); - - return (0); -} - - -/* - * Vnode op for reading. - */ -int -cd9660_read(struct vnop_read_args *ap) -{ - struct vnode *vp = ap->a_vp; - register struct uio *uio = ap->a_uio; - register struct iso_node *ip = VTOI(vp); - register struct iso_mnt *imp; - struct buf *bp; - daddr_t lbn; - daddr64_t rablock; - off_t diff; - int rasize, error = 0; - int32_t size, n, on; - - if (uio_resid(uio) == 0) - return (0); - if (uio->uio_offset < 0) - return (EINVAL); - - imp = ip->i_mnt; - - if (UBCINFOEXISTS(vp)) { - /* - * Copy any part of the Apple Double header. - */ - if ((ip->i_flag & ISO_ASSOCIATED) && (uio->uio_offset < ADH_SIZE)) { - apple_double_header_t header; - int bytes; - - if (uio->uio_offset < sizeof(apple_double_header_t)) { - header.magic = APPLEDOUBLE_MAGIC; - header.version = APPLEDOUBLE_VERSION; - header.count = 2; - header.entries[0].entryID = APPLEDOUBLE_FINDERINFO; - header.entries[0].offset = offsetof(apple_double_header_t, finfo); - header.entries[0].length = 32; - header.entries[1].entryID = APPLEDOUBLE_RESFORK; - header.entries[1].offset = ADH_SIZE; - header.entries[1].length = ip->i_size - ADH_SIZE; - header.finfo.fdType = ip->i_FileType; - header.finfo.fdCreator = ip->i_Creator; - header.finfo.fdFlags = ip->i_FinderFlags; - header.finfo.fdLocation.v = -1; - header.finfo.fdLocation.h = -1; - header.finfo.fdReserved = 0; - - bytes = min(uio_resid(uio), sizeof(apple_double_header_t) - uio->uio_offset); - error = uiomove(((char *) &header) + uio->uio_offset, bytes, uio); - if (error) - return error; - } - if (uio_resid(uio) && uio->uio_offset < ADH_SIZE) { - caddr_t buffer; - - if (kmem_alloc(kernel_map, (vm_offset_t *)&buffer, ADH_SIZE)) { - return (ENOMEM); - } - bytes = min(uio_resid(uio), ADH_SIZE - uio->uio_offset); - error = uiomove(((char *) buffer) + uio->uio_offset, bytes, uio); - kmem_free(kernel_map, (vm_offset_t)buffer, ADH_SIZE); - if (error) - return error; - } - } - if (uio_resid(uio) > 0) - error = cluster_read(vp, uio, (off_t)ip->i_size, ap->a_ioflag); - } else { - - do { - lbn = lblkno(imp, uio->uio_offset); - on = blkoff(imp, uio->uio_offset); - n = min((u_int)(imp->logical_block_size - on), - uio_resid(uio)); - diff = (off_t)ip->i_size - uio->uio_offset; - if (diff <= 0) - return (0); - if (diff < n) - n = diff; - size = blksize(imp, ip, lbn); - rablock = (daddr64_t)lbn + 1; - - if (ip->i_lastr + 1 == lbn && - lblktosize(imp, rablock) < ip->i_size) { - rasize = blksize(imp, ip, (daddr_t)rablock); - error = (int)buf_breadn(vp, (daddr64_t)((unsigned)lbn), size, &rablock, - &rasize, 1, NOCRED, &bp); - } else - error = (int)buf_bread(vp, (daddr64_t)((unsigned)lbn), size, NOCRED, &bp); - - ip->i_lastr = lbn; - n = min(n, size - buf_resid(bp)); - if (error) { - buf_brelse(bp); - return (error); - } - - error = uiomove((caddr_t)(buf_dataptr(bp) + on), (int)n, uio); - if (n + on == imp->logical_block_size || - uio->uio_offset == (off_t)ip->i_size) - buf_markaged(bp); - buf_brelse(bp); - } while (error == 0 && uio_resid(uio) > 0 && n != 0); - } - - return (error); -} - -int -cd9660_ioctl(__unused struct vnop_ioctl_args *ap) -{ - return (ENOTTY); -} - -int -cd9660_select(__unused struct vnop_select_args *ap) -{ - /* - * We should really check to see if I/O is possible. - */ - return (1); -} - -/* - * Mmap a file - * - * NB Currently unsupported. - */ -int -cd9660_mmap(__unused struct vnop_mmap_args *ap) -{ - - return (EINVAL); -} - -/* - * Structure for reading directories - */ -struct isoreaddir { - struct dirent saveent; - struct dirent current; - off_t saveoff; - off_t curroff; - struct uio *uio; - off_t uio_off; - int eofflag; -// u_long **cookies; -// int *ncookies; -}; - -static int -iso_uiodir(struct isoreaddir *idp, struct dirent *dp, off_t off) -{ - int error; - - dp->d_name[dp->d_namlen] = 0; - dp->d_reclen = DIRSIZ(dp); - - if (uio_resid(idp->uio) < dp->d_reclen) { - idp->eofflag = 0; - return (-1); - } - -#if 0 - if (idp->cookies) { - if (*idp->ncookies <= 0) { - idp->eofflag = 0; - return (-1); - } - - **idp->cookies++ = off; - --*idp->ncookies; - } -#endif - - if ( (error = uiomove( (caddr_t)dp, dp->d_reclen, idp->uio )) ) - return (error); - idp->uio_off = off; - return (0); -} - -static int -iso_shipdir(struct isoreaddir *idp) -{ - struct dirent *dp; - int cl, sl; - int error; - char *cname, *sname; - - cl = idp->current.d_namlen; - cname = idp->current.d_name; - - dp = &idp->saveent; - sname = dp->d_name; - sl = dp->d_namlen; - if (sl > 0) { - if (sl != cl - || bcmp(sname,cname,sl)) { - if (idp->saveent.d_namlen) { - if ( (error = iso_uiodir(idp,&idp->saveent,idp->saveoff)) ) - return (error); - idp->saveent.d_namlen = 0; - } - } - } - idp->current.d_reclen = DIRSIZ(&idp->current); - idp->saveoff = idp->curroff; - bcopy(&idp->current,&idp->saveent,idp->current.d_reclen); - return (0); -} - -/* - * Vnode op for readdir - * - * Note that directories are sector aligned (2K) and - * that an entry can cross a logical block but not - * a sector. - */ -int -cd9660_readdir(struct vnop_readdir_args *ap) -{ - register struct uio *uio = ap->a_uio; -#if 0 - off_t startingOffset = uio->uio_offset; - size_t lost = 0; -#endif /* 0 */ - struct isoreaddir *idp; - struct vnode *vdp = ap->a_vp; - struct iso_node *dp; - struct iso_mnt *imp; - struct buf *bp = NULL; - struct iso_directory_record *ep; - int entryoffsetinblock; - doff_t endsearch; - uint32_t bmask; - int error = 0; - int reclen; - u_short namelen; - - if (ap->a_flags & (VNODE_READDIR_EXTENDED | VNODE_READDIR_REQSEEKOFF)) - return (EINVAL); - - dp = VTOI(vdp); - imp = dp->i_mnt; - bmask = imp->im_sector_size - 1; - - MALLOC(idp, struct isoreaddir *, sizeof(*idp), M_TEMP, M_WAITOK); - idp->saveent.d_namlen = 0; - /* - * XXX - * Is it worth trying to figure out the type? - */ - idp->saveent.d_type = idp->current.d_type = DT_UNKNOWN; - idp->uio = uio; - idp->eofflag = 1; - idp->curroff = uio->uio_offset; - - if ((entryoffsetinblock = idp->curroff & bmask) && - (error = cd9660_blkatoff(vdp, SECTOFF(imp, idp->curroff), NULL, &bp))) { - FREE(idp, M_TEMP); - return (error); - } - endsearch = dp->i_size; - - while (idp->curroff < endsearch) { - /* - * If offset is on a block boundary, - * read the next directory block. - * Release previous if it exists. - */ - if ((idp->curroff & bmask) == 0) { - if (bp != NULL) - buf_brelse(bp); - if ((error = cd9660_blkatoff(vdp, SECTOFF(imp, idp->curroff), NULL, &bp))) - break; - entryoffsetinblock = 0; - } - /* - * Get pointer to next entry. - */ - ep = (struct iso_directory_record *) - (buf_dataptr(bp) + entryoffsetinblock); - - reclen = isonum_711(ep->length); - if (reclen == 0) { - /* skip to next block, if any */ - idp->curroff = - (idp->curroff & ~bmask) + imp->im_sector_size; - continue; - } - - if (reclen < ISO_DIRECTORY_RECORD_SIZE) { - error = EINVAL; - /* illegal entry, stop */ - break; - } - - if (entryoffsetinblock + reclen > imp->im_sector_size) { - error = EINVAL; - /* illegal directory, so stop looking */ - break; - } - - idp->current.d_namlen = isonum_711(ep->name_len); - - if (reclen < ISO_DIRECTORY_RECORD_SIZE + idp->current.d_namlen) { - error = EINVAL; - /* illegal entry, stop */ - break; - } - - /* - * Some poorly mastered discs have an incorrect directory - * file size. If the '.' entry has a better size (bigger) - * then use that instead. - */ - if ((uio->uio_offset == 0) && (isonum_733(ep->size) > endsearch)) { - dp->i_size = endsearch = isonum_733(ep->size); - } - - if ( isonum_711(ep->flags) & directoryBit ) - idp->current.d_fileno = isodirino(ep, imp); - else { - idp->current.d_fileno = ((daddr_t)buf_blkno(bp) << imp->im_bshift) + - entryoffsetinblock; - } - - idp->curroff += reclen; - - switch (imp->iso_ftype) { - case ISO_FTYPE_RRIP: - cd9660_rrip_getname(ep,idp->current.d_name, &namelen, - &idp->current.d_fileno,imp); - idp->current.d_namlen = (u_char)namelen; - if (idp->current.d_namlen) - error = iso_uiodir(idp,&idp->current,idp->curroff); - break; - - case ISO_FTYPE_JOLIET: - ucsfntrans((u_int16_t *)ep->name, idp->current.d_namlen, - idp->current.d_name, &namelen, - isonum_711(ep->flags) & directoryBit, - isonum_711(ep->flags) & associatedBit); - idp->current.d_namlen = (u_char)namelen; - if (idp->current.d_namlen) - error = iso_uiodir(idp,&idp->current,idp->curroff); - break; - - default: /* ISO_FTYPE_DEFAULT || ISO_FTYPE_9660 */ - strlcpy(idp->current.d_name, "..", - __DARWIN_MAXNAMLEN + 1); - switch (ep->name[0]) { - case 0: - idp->current.d_namlen = 1; - error = iso_uiodir(idp,&idp->current,idp->curroff); - break; - case 1: - idp->current.d_namlen = 2; - error = iso_uiodir(idp,&idp->current,idp->curroff); - break; - default: - isofntrans(ep->name,idp->current.d_namlen, - idp->current.d_name, &namelen, - imp->iso_ftype == ISO_FTYPE_9660, - isonum_711(ep->flags) & associatedBit); - idp->current.d_namlen = (u_char)namelen; - if (imp->iso_ftype == ISO_FTYPE_DEFAULT) - error = iso_shipdir(idp); - else - error = iso_uiodir(idp,&idp->current,idp->curroff); - break; - } - } - if (error) - break; - - entryoffsetinblock += reclen; - } - - if (!error && imp->iso_ftype == ISO_FTYPE_DEFAULT) { - idp->current.d_namlen = 0; - error = iso_shipdir(idp); - } -#if 0 - if (!error && ap->a_ncookies) { - struct dirent *dirp, *dpstart; - off_t bufferOffset; - u_long *cookies; - int ncookies; - - /* - * Only the NFS server uses cookies, and it loads the - * directory block into system space, so we can just look at - * it directly. - * - * We assume the entire transfer is done to a single contiguous buffer. - */ - if (UIO_SEG_IS_USER_SPACE(uio->uio_segflg) || uio->uio_iovcnt != 1) - panic("ufs_readdir: lost in space"); - - /* - * Make a first pass over the buffer just generated, - * counting the number of entries: - */ - // LP64todo - fix this! - dpstart = (struct dirent *) - CAST_DOWN(caddr_t, (uio_iov_base(uio) - (uio->uio_offset - startingOffset))); - for (dirp = dpstart, bufferOffset = startingOffset, ncookies = 0; - bufferOffset < uio->uio_offset; ) { - if (dirp->d_reclen == 0) - break; - bufferOffset += dirp->d_reclen; - ncookies++; - dirp = (struct dirent *)((caddr_t)dirp + dirp->d_reclen); - } - lost += uio->uio_offset - bufferOffset; - uio->uio_offset = bufferOffset; - - /* - * Allocate a buffer to hold the cookies requested: - */ - MALLOC(cookies, u_long *, ncookies * sizeof(u_long), M_TEMP, M_WAITOK); - *ap->a_ncookies = ncookies; - *ap->a_cookies = cookies; - - /* - * Fill in the offsets for each entry in the buffer just allocated: - */ - for (bufferOffset = startingOffset, dirp = dpstart; bufferOffset < uio->uio_offset; ) { - *(cookies++) = bufferOffset; - bufferOffset += dirp->d_reclen; - dirp = (struct dirent *)((caddr_t)dirp + dirp->d_reclen); - } - } -#endif - if (error < 0) - error = 0; - - if (bp) - buf_brelse (bp); - - uio->uio_offset = idp->uio_off; - *ap->a_eofflag = idp->eofflag; - - FREE(idp, M_TEMP); - - return (error); -} - -/* - * Return target name of a symbolic link - * Shouldn't we get the parent vnode and read the data from there? - * This could eventually result in deadlocks in cd9660_lookup. - * But otherwise the block read here is in the block buffer two times. - */ -typedef struct iso_directory_record ISODIR; -typedef struct iso_node ISONODE; -typedef struct iso_mnt ISOMNT; -int -cd9660_readlink(struct vnop_readlink_args *ap) -{ - ISONODE *ip; - ISODIR *dirp; - ISOMNT *imp; - struct buf *bp; - struct uio *uio; - u_short symlen; - int error; - char *symname; - - ip = VTOI(ap->a_vp); - imp = ip->i_mnt; - uio = ap->a_uio; - - if (imp->iso_ftype != ISO_FTYPE_RRIP) - return (EINVAL); - - /* - * Get parents directory record block that this inode included. - */ - error = (int)buf_bread(imp->im_devvp, - (daddr64_t)((unsigned)(ip->i_number >> imp->im_bshift)), - imp->logical_block_size, NOCRED, &bp); - if (error) { - buf_brelse(bp); - return (EINVAL); - } - - /* - * Setup the directory pointer for this inode - */ - dirp = (ISODIR *)(buf_dataptr(bp) + (ip->i_number & imp->im_bmask)); - - /* - * Just make sure, we have a right one.... - * 1: Check not cross boundary on block - */ - if ((ip->i_number & imp->im_bmask) + isonum_711(dirp->length) - > imp->logical_block_size) { - buf_brelse(bp); - return (EINVAL); - } - - /* - * Now get a buffer - * Abuse a namei buffer for now. - */ - if (UIO_SEG_IS_USER_SPACE(uio->uio_segflg)) - MALLOC_ZONE(symname, char *, MAXPATHLEN, M_NAMEI, M_WAITOK); - else - // LP64todo - fix this! - symname = CAST_DOWN(caddr_t, uio_iov_base(uio)); - - /* - * Ok, we just gathering a symbolic name in SL record. - */ - if (cd9660_rrip_getsymname(dirp, symname, &symlen, imp) == 0) { - if (UIO_SEG_IS_USER_SPACE(uio->uio_segflg)) - FREE_ZONE(symname, MAXPATHLEN, M_NAMEI); - buf_brelse(bp); - return (EINVAL); - } - /* - * Don't forget before you leave from home ;-) - */ - buf_brelse(bp); - - /* - * return with the symbolic name to caller's. - */ - if (UIO_SEG_IS_USER_SPACE(uio->uio_segflg)) { - error = uiomove(symname, symlen, uio); - FREE_ZONE(symname, MAXPATHLEN, M_NAMEI); - return (error); - } -#if LP64KERN - uio_setresid(uio, (uio_resid(uio) - symlen)); - uio_iov_len_add(uio, -((int64_t)symlen)); -#else - uio_setresid(uio, (uio_resid(uio) - symlen)); - uio_iov_len_add(uio, -((int)symlen)); -#endif - uio_iov_base_add(uio, symlen); - return (0); -} - - -/* - * prepare and issue the I/O - */ -int -cd9660_strategy(struct vnop_strategy_args *ap) -{ - buf_t bp = ap->a_bp; - vnode_t vp = buf_vnode(bp); - struct iso_node *ip = VTOI(vp); - - return (buf_strategy(ip->i_devvp, ap)); -} - - -/* - * Return POSIX pathconf information applicable to cd9660 filesystems. - */ -int -cd9660_pathconf(struct vnop_pathconf_args *ap) -{ - - switch (ap->a_name) { - case _PC_LINK_MAX: - *ap->a_retval = 1; - return (0); - case _PC_NAME_MAX: - switch (VTOI(ap->a_vp)->i_mnt->iso_ftype) { - case ISO_FTYPE_RRIP: - *ap->a_retval = ISO_RRIP_NAMEMAX; - break; - case ISO_FTYPE_JOLIET: - *ap->a_retval = ISO_JOLIET_NAMEMAX; - break; - default: - *ap->a_retval = ISO_NAMEMAX; - } - return (0); - case _PC_PATH_MAX: - *ap->a_retval = PATH_MAX; - return (0); - case _PC_PIPE_BUF: - *ap->a_retval = PIPE_BUF; - return (0); - case _PC_CHOWN_RESTRICTED: - *ap->a_retval = 200112; /* _POSIX_CHOWN_RESTRICTED */ - return (0); - case _PC_NO_TRUNC: - *ap->a_retval = 200112; /* _POSIX_NO_TRUNC */ - return (0); - default: - return (EINVAL); - } - /* NOTREACHED */ -} - -/* - * Unsupported operation - */ -int -cd9660_enotsupp(void) -{ - return (ENOTSUP); -} -/* Pagein. similar to read */ -int -cd9660_pagein(struct vnop_pagein_args *ap) -{ - struct vnode *vp = ap->a_vp; - upl_t pl = ap->a_pl; - size_t size = ap->a_size; - off_t f_offset = ap->a_f_offset; - vm_offset_t pl_offset = ap->a_pl_offset; - int flags = ap->a_flags; - register struct iso_node *ip = VTOI(vp); - int error = 0; - - /* - * Copy the Apple Double header. - */ - if ((ip->i_flag & ISO_ASSOCIATED) && (f_offset == 0) && (size == ADH_SIZE)) { - apple_double_header_t header; - kern_return_t kret; - vm_offset_t ioaddr; - - kret = ubc_upl_map(pl, &ioaddr); - if (kret != KERN_SUCCESS) - panic("cd9660_xa_pagein: ubc_upl_map error = %d", kret); - ioaddr += pl_offset; - bzero((caddr_t)ioaddr, ADH_SIZE); - - header.magic = APPLEDOUBLE_MAGIC; - header.version = APPLEDOUBLE_VERSION; - header.count = 2; - header.entries[0].entryID = APPLEDOUBLE_FINDERINFO; - header.entries[0].offset = offsetof(apple_double_header_t, finfo); - header.entries[0].length = 32; - header.entries[1].entryID = APPLEDOUBLE_RESFORK; - header.entries[1].offset = ADH_SIZE; - header.entries[1].length = ip->i_size - ADH_SIZE; - header.finfo.fdType = ip->i_FileType; - header.finfo.fdCreator = ip->i_Creator; - header.finfo.fdFlags = ip->i_FinderFlags; - header.finfo.fdLocation.v = -1; - header.finfo.fdLocation.h = -1; - header.finfo.fdReserved = 0; - - bcopy((caddr_t)&header, (caddr_t)ioaddr, sizeof(apple_double_header_t)); - - kret = ubc_upl_unmap(pl); - if (kret != KERN_SUCCESS) - panic("cd9660_xa_pagein: ubc_upl_unmap error = %d", kret); - - if ((flags & UPL_NOCOMMIT) == 0) { - ubc_upl_commit_range(pl, pl_offset, size, UPL_COMMIT_FREE_ON_EMPTY); - } - } else { - /* check pageouts are for reg file only and ubc info is present*/ - error = cluster_pagein(vp, pl, pl_offset, f_offset, size, - (off_t)ip->i_size, flags); - } - return (error); -} - -/* - * cd9660_remove - not possible to remove a file from iso cds - * - * Locking policy: a_dvp and vp locked on entry, unlocked on exit - */ -int -cd9660_remove(__unused struct vnop_remove_args *ap) -{ - return (EROFS); -} - - -/* - * cd9660_rmdir - not possible to remove a directory from iso cds - * - * Locking policy: a_dvp and vp locked on entry, unlocked on exit - */ -int -cd9660_rmdir(struct vnop_rmdir_args *ap) -{ - (void) nop_rmdir(ap); - return (EROFS); -} - -/* - * Make a RIFF file header for a CD-ROM XA media file. - */ -__private_extern__ void -cd9660_xa_init(struct iso_node *ip, struct iso_directory_record *isodir) -{ - uint32_t sectors; - struct riff_header *header; - u_char name_len; - char *cdxa; - - MALLOC(header, struct riff_header *, sizeof(struct riff_header), M_TEMP, M_WAITOK); - - sectors = ip->i_size / 2048; - - strncpy(header->riff, "RIFF", 4); - header->fileSize = OSSwapHostToLittleInt32(sectors * CDXA_SECTOR_SIZE + sizeof(struct riff_header) - 8); - strncpy(header->cdxa, "CDXA", 4); - strncpy(header->fmt, "fmt ", 4); - header->fmtSize = OSSwapHostToLittleConstInt32(16); - strncpy(header->data, "data", 4); - header->dataSize = OSSwapHostToLittleInt32(sectors * CDXA_SECTOR_SIZE); - - /* - * Copy the CD-ROM XA extended directory information into the header. As far as - * I can tell, it's always 14 bytes in the directory record, but allocated 16 bytes - * in the header (the last two being zeroed pad bytes). - */ - name_len = isonum_711(isodir->name_len); - cdxa = &isodir->name[name_len]; - if ((name_len & 0x01) == 0) - ++cdxa; /* Skip pad byte */ - bcopy(cdxa, header->fmtData, 14); - header->fmtData[14] = 0; - header->fmtData[15] = 0; - - /* - * Point this i-node to the "whole sector" device instead of the normal - * device. This allows cd9660_strategy to be ignorant of the block - * (sector) size. - */ - ip->i_devvp = ip->i_mnt->phys_devvp; - - ip->i_size = sectors * CDXA_SECTOR_SIZE + sizeof(struct riff_header); - ip->i_riff = header; -} - -/* - * Helper routine for vnop_read and vnop_pagein of CD-ROM XA multimedia files. - * This routine determines the physical location of the file, then reads - * sectors directly from the device into a buffer. It also handles inserting - * the RIFF header at the beginning of the file. - * - * Exactly one of buffer or uio must be non-zero. It will either bcopy to - * buffer, or uiomove via uio. - * - * XXX Should this code be using buf_breadn and ip->i_lastr to support single-block - * read-ahead? Should we try more aggressive read-ahead like cluster_io does? - * - * XXX This could be made to do larger I/O to the device (reading all the - * whole sectors directly into the buffer). That would make the code more - * complex, and the current code only adds 2.5% overhead compared to reading - * from the device directly (at least on my test machine). - */ -static int -cd9660_xa_read_common( - struct vnode *vp, - off_t offset, - size_t amount, - caddr_t buffer, - struct uio *uio) -{ - struct iso_node *ip = VTOI(vp); - struct buf *bp; - off_t diff; /* number of bytes from offset to file's EOF */ - daddr_t block; /* physical disk block containing offset */ - off_t sect_off; /* starting offset into current sector */ - u_int count; /* number of bytes to transfer in current block */ - int error=0; - - /* - * Copy any part of the RIFF header. - */ - if (offset < sizeof(struct riff_header)) { - char *p; - - p = ((char *) ip->i_riff) + offset; - count = min(amount, sizeof(struct riff_header) - offset); - if (buffer) { - bcopy(p, buffer, count); - buffer += count; - } else { - error = uiomove(p, count, uio); - } - amount -= count; - offset += count; - } - if (error) - return error; - - /* - * Loop over (possibly partial) blocks to transfer. - */ - while (error == 0 && amount > 0) { - /* - * Determine number of bytes until EOF. If we've hit - * EOF then return. - */ - diff = ip->i_size - offset; - if (diff <= 0) - return 0; - - /* Get a block from the underlying device */ - block = ip->iso_start + (offset - sizeof(struct riff_header))/CDXA_SECTOR_SIZE; - error = (int)buf_bread(ip->i_devvp, (daddr64_t)((unsigned)block), CDXA_SECTOR_SIZE, NOCRED, &bp); - if (error) { - buf_brelse(bp); - return error; - } - if (buf_resid(bp)) { - printf("isofs: cd9660_xa_read_common: buf_bread didn't read full sector\n"); - return EIO; - } - - /* Figure out which part of the block to copy, and copy it */ - sect_off = (offset - sizeof(struct riff_header)) % CDXA_SECTOR_SIZE; - count = min(CDXA_SECTOR_SIZE-sect_off, amount); - if (diff < count) /* Pin transfer amount to EOF */ - count = diff; - - if (buffer) { - bcopy(CAST_DOWN(caddr_t, (buf_dataptr(bp)+sect_off)), buffer, count); - buffer += count; - } else { - error = uiomove(CAST_DOWN(caddr_t, (buf_dataptr(bp)+sect_off)), count, uio); - } - amount -= count; - offset += count; - - /* - * If we copied through the end of the block, or the end of file, then - * age the device block. This is optimized for sequential access. - */ - if (sect_off+count == CDXA_SECTOR_SIZE || offset == (off_t)ip->i_size) - buf_markaged(bp); - buf_brelse(bp); - } - - return error; -} - -/* - * Read from a CD-ROM XA multimedia file. - * - * This uses the same common routine as pagein for doing the actual read - * from the device. - * - * This routine doesn't do any caching beyond what the block device does. - * Even then, cd9660_xa_read_common ages the blocks once we read up to - * the end. - * - * We don't even take advantage if the file has been memory mapped and has - * valid pages already (in which case we could just uiomove from the page - * to the caller). Since we're a read-only filesystem, there can't be - * any cache coherency problems. Multimedia files are expected to be - * large and streamed anyway, so caching file contents probably isn't - * important. - */ -int -cd9660_xa_read(struct vnop_read_args *ap) -{ - struct vnode *vp = ap->a_vp; - register struct uio *uio = ap->a_uio; - register struct iso_node *ip = VTOI(vp); - off_t offset = uio->uio_offset; - // LP64todo - fix this! - size_t size = uio_resid(uio); - - /* Check for some obvious parameter problems */ - if (offset < 0) - return EINVAL; - if (size == 0) - return 0; - if (offset >= ip->i_size) - return 0; - - /* Pin the size of the read to the file's EOF */ - if (offset + size > ip->i_size) - size = ip->i_size - offset; - - return cd9660_xa_read_common(vp, offset, size, NULL, uio); -} - -/* - * Page in from a CD-ROM XA media file. - * - * Since our device block size isn't a power of two, we can't use - * cluster_pagein. Instead, we have to map the page and read into it. - */ -static int -cd9660_xa_pagein(struct vnop_pagein_args *ap) -{ - struct vnode *vp = ap->a_vp; - upl_t pl = ap->a_pl; - size_t size= ap->a_size; - off_t f_offset = ap->a_f_offset; - vm_offset_t pl_offset = ap->a_pl_offset; - int flags = ap->a_flags; - register struct iso_node *ip = VTOI(vp); - int error; - kern_return_t kret; - vm_offset_t ioaddr; - - if (size <= 0) - panic("cd9660_xa_pagein: size = %d", size); - - kret = ubc_upl_map(pl, &ioaddr); - if (kret != KERN_SUCCESS) - panic("cd9660_xa_pagein: ubc_upl_map error = %d", kret); - - ioaddr += pl_offset; - - /* Make sure pagein doesn't extend past EOF */ - if (f_offset + size > ip->i_size) - size = ip->i_size - f_offset; /* pin size to EOF */ - - /* Read the data in using the underlying device */ - error = cd9660_xa_read_common(vp, f_offset, size, (caddr_t)ioaddr, NULL); - - /* Zero fill part of page past EOF */ - if (ap->a_size > size) - bzero((caddr_t)ioaddr+size, ap->a_size-size); - - kret = ubc_upl_unmap(pl); - if (kret != KERN_SUCCESS) - panic("cd9660_xa_pagein: ubc_upl_unmap error = %d", kret); - - if ((flags & UPL_NOCOMMIT) == 0) - { - if (error) - ubc_upl_abort_range(pl, pl_offset, ap->a_size, UPL_ABORT_FREE_ON_EMPTY); - else - ubc_upl_commit_range(pl, pl_offset, ap->a_size, UPL_COMMIT_FREE_ON_EMPTY); - } - - return error; -} - -/* - * Global vfs data structures for isofs - */ -#define cd9660_create \ - ((int (*)(struct vnop_create_args *))err_create) -#define cd9660_mknod ((int (*)(struct vnop_mknod_args *))err_mknod) -#define cd9660_write ((int (*)(struct vnop_write_args *))cd9660_enotsupp) -#define cd9660_fsync ((int (*)(struct vnop_fsync_args *))nullop) -#define cd9660_rename \ - ((int (*)(struct vnop_rename_args *))err_rename) -#define cd9660_copyfile \ - ((int (*)(struct vnop_copyfile_args *))err_copyfile) -#define cd9660_link ((int (*)(struct vnop_link_args *))err_link) -#define cd9660_mkdir ((int (*)(struct vnop_mkdir_args *))err_mkdir) -#define cd9660_symlink \ - ((int (*)(struct vnop_symlink_args *))err_symlink) -#define cd9660_advlock \ - ((int (*)(struct vnop_advlock_args *))cd9660_enotsupp) -#define cd9660_bwrite \ - ((int (*)(struct vnop_bwrite_args *))cd9660_enotsupp) -#define cd9660_pageout \ - ((int (*)(struct vnop_pageout_args *))cd9660_enotsupp) -int cd9660_blktooff(struct vnop_blktooff_args *ap); -int cd9660_offtoblk(struct vnop_offtoblk_args *ap); -int cd9660_blockmap(struct vnop_blockmap_args *ap); - -#define VOPFUNC int (*)(void *) -/* - * Global vfs data structures for cd9660 - */ -int (**cd9660_vnodeop_p)(void *); -struct vnodeopv_entry_desc cd9660_vnodeop_entries[] = { - { &vnop_default_desc, (VOPFUNC)vn_default_error }, - { &vnop_lookup_desc, (VOPFUNC)cd9660_lookup }, /* lookup */ - { &vnop_create_desc, (VOPFUNC)cd9660_create }, /* create */ - { &vnop_mknod_desc, (VOPFUNC)cd9660_mknod }, /* mknod */ - { &vnop_open_desc, (VOPFUNC)cd9660_open }, /* open */ - { &vnop_close_desc, (VOPFUNC)cd9660_close }, /* close */ - { &vnop_getattr_desc, (VOPFUNC)cd9660_getattr }, /* getattr */ - { &vnop_read_desc, (VOPFUNC)cd9660_read }, /* read */ - { &vnop_write_desc, (VOPFUNC)cd9660_write }, /* write */ - { &vnop_ioctl_desc, (VOPFUNC)cd9660_ioctl }, /* ioctl */ - { &vnop_select_desc, (VOPFUNC)cd9660_select }, /* select */ - { &vnop_mmap_desc, (VOPFUNC)cd9660_mmap }, /* mmap */ - { &vnop_fsync_desc, (VOPFUNC)cd9660_fsync }, /* fsync */ - { &vnop_remove_desc, (VOPFUNC)cd9660_remove }, /* remove */ - { &vnop_link_desc, (VOPFUNC)cd9660_link }, /* link */ - { &vnop_rename_desc, (VOPFUNC)cd9660_rename }, /* rename */ - { &vnop_copyfile_desc, (VOPFUNC)cd9660_copyfile },/* copyfile */ - { &vnop_mkdir_desc, (VOPFUNC)cd9660_mkdir }, /* mkdir */ - { &vnop_rmdir_desc, (VOPFUNC)cd9660_rmdir }, /* rmdir */ - { &vnop_symlink_desc, (VOPFUNC)cd9660_symlink }, /* symlink */ - { &vnop_readdir_desc, (VOPFUNC)cd9660_readdir }, /* readdir */ - { &vnop_readlink_desc, (VOPFUNC)cd9660_readlink },/* readlink */ - { &vnop_inactive_desc, (VOPFUNC)cd9660_inactive },/* inactive */ - { &vnop_reclaim_desc, (VOPFUNC)cd9660_reclaim }, /* reclaim */ - { &vnop_strategy_desc, (VOPFUNC)cd9660_strategy },/* strategy */ - { &vnop_pathconf_desc, (VOPFUNC)cd9660_pathconf },/* pathconf */ - { &vnop_advlock_desc, (VOPFUNC)cd9660_advlock }, /* advlock */ - { &vnop_bwrite_desc, (VOPFUNC)vn_bwrite }, - { &vnop_pagein_desc, (VOPFUNC)cd9660_pagein }, /* Pagein */ - { &vnop_pageout_desc, (VOPFUNC)cd9660_pageout }, /* Pageout */ - { &vnop_blktooff_desc, (VOPFUNC)cd9660_blktooff }, /* blktooff */ - { &vnop_offtoblk_desc, (VOPFUNC)cd9660_offtoblk }, /* offtoblk */ - { &vnop_blockmap_desc, (VOPFUNC)cd9660_blockmap }, /* blockmap */ - { (struct vnodeop_desc*)NULL, (VOPFUNC)NULL } -}; -struct vnodeopv_desc cd9660_vnodeop_opv_desc = - { &cd9660_vnodeop_p, cd9660_vnodeop_entries }; - -/* - * The VOP table for CD-ROM XA (media) files is almost the same - * as for ordinary files, except for read, and pagein. - * Note that cd9660_xa_read doesn't use cluster I/O, so blockmap - * isn't needed, and isn't implemented. Similarly, it doesn't - * do buf_bread() on CD XA vnodes, so bmap, blktooff, offtoblk - * aren't needed. - */ -int (**cd9660_cdxaop_p)(void *); -struct vnodeopv_entry_desc cd9660_cdxaop_entries[] = { - { &vnop_default_desc, (VOPFUNC)vn_default_error }, - { &vnop_lookup_desc, (VOPFUNC)cd9660_lookup }, /* lookup */ - { &vnop_create_desc, (VOPFUNC)cd9660_create }, /* create */ - { &vnop_mknod_desc, (VOPFUNC)cd9660_mknod }, /* mknod */ - { &vnop_open_desc, (VOPFUNC)cd9660_open }, /* open */ - { &vnop_close_desc, (VOPFUNC)cd9660_close }, /* close */ - { &vnop_getattr_desc, (VOPFUNC)cd9660_getattr }, /* getattr */ - { &vnop_read_desc, (VOPFUNC)cd9660_xa_read }, /* read */ - { &vnop_write_desc, (VOPFUNC)cd9660_write }, /* write */ - { &vnop_ioctl_desc, (VOPFUNC)cd9660_ioctl }, /* ioctl */ - { &vnop_select_desc, (VOPFUNC)cd9660_select }, /* select */ - { &vnop_mmap_desc, (VOPFUNC)cd9660_mmap }, /* mmap */ - { &vnop_fsync_desc, (VOPFUNC)cd9660_fsync }, /* fsync */ - { &vnop_remove_desc, (VOPFUNC)cd9660_remove }, /* remove */ - { &vnop_link_desc, (VOPFUNC)cd9660_link }, /* link */ - { &vnop_rename_desc, (VOPFUNC)cd9660_rename }, /* rename */ - { &vnop_copyfile_desc, (VOPFUNC)cd9660_copyfile },/* copyfile */ - { &vnop_mkdir_desc, (VOPFUNC)cd9660_mkdir }, /* mkdir */ - { &vnop_rmdir_desc, (VOPFUNC)cd9660_rmdir }, /* rmdir */ - { &vnop_symlink_desc, (VOPFUNC)cd9660_symlink }, /* symlink */ - { &vnop_readdir_desc, (VOPFUNC)cd9660_readdir }, /* readdir */ - { &vnop_readlink_desc, (VOPFUNC)cd9660_readlink },/* readlink */ - { &vnop_inactive_desc, (VOPFUNC)cd9660_inactive },/* inactive */ - { &vnop_reclaim_desc, (VOPFUNC)cd9660_reclaim }, /* reclaim */ - { &vnop_strategy_desc, (VOPFUNC)cd9660_strategy },/* strategy */ - { &vnop_pathconf_desc, (VOPFUNC)cd9660_pathconf },/* pathconf */ - { &vnop_advlock_desc, (VOPFUNC)cd9660_advlock }, /* advlock */ - { &vnop_bwrite_desc, (VOPFUNC)vn_bwrite }, - { &vnop_pagein_desc, (VOPFUNC)cd9660_xa_pagein }, /* Pagein */ - { &vnop_pageout_desc, (VOPFUNC)cd9660_pageout }, /* Pageout */ - { (struct vnodeop_desc*)NULL, (VOPFUNC)NULL } -}; -struct vnodeopv_desc cd9660_cdxaop_opv_desc = - { &cd9660_cdxaop_p, cd9660_cdxaop_entries }; - -/* - * Special device vnode ops - */ -int (**cd9660_specop_p)(void *); -struct vnodeopv_entry_desc cd9660_specop_entries[] = { - { &vnop_default_desc, (VOPFUNC)vn_default_error }, - { &vnop_lookup_desc, (VOPFUNC)spec_lookup }, /* lookup */ - { &vnop_create_desc, (VOPFUNC)spec_create }, /* create */ - { &vnop_mknod_desc, (VOPFUNC)spec_mknod }, /* mknod */ - { &vnop_open_desc, (VOPFUNC)spec_open }, /* open */ - { &vnop_close_desc, (VOPFUNC)spec_close }, /* close */ - { &vnop_getattr_desc, (VOPFUNC)cd9660_getattr }, /* getattr */ - { &vnop_read_desc, (VOPFUNC)spec_read }, /* read */ - { &vnop_write_desc, (VOPFUNC)spec_write }, /* write */ - { &vnop_ioctl_desc, (VOPFUNC)spec_ioctl }, /* ioctl */ - { &vnop_select_desc, (VOPFUNC)spec_select }, /* select */ - { &vnop_mmap_desc, (VOPFUNC)spec_mmap }, /* mmap */ - { &vnop_fsync_desc, (VOPFUNC)spec_fsync }, /* fsync */ - { &vnop_remove_desc, (VOPFUNC)spec_remove }, /* remove */ - { &vnop_link_desc, (VOPFUNC)spec_link }, /* link */ - { &vnop_rename_desc, (VOPFUNC)spec_rename }, /* rename */ - { &vnop_mkdir_desc, (VOPFUNC)spec_mkdir }, /* mkdir */ - { &vnop_rmdir_desc, (VOPFUNC)spec_rmdir }, /* rmdir */ - { &vnop_symlink_desc, (VOPFUNC)spec_symlink }, /* symlink */ - { &vnop_readdir_desc, (VOPFUNC)spec_readdir }, /* readdir */ - { &vnop_readlink_desc, (VOPFUNC)spec_readlink }, /* readlink */ - { &vnop_inactive_desc, (VOPFUNC)cd9660_inactive },/* inactive */ - { &vnop_reclaim_desc, (VOPFUNC)cd9660_reclaim }, /* reclaim */ - { &vnop_strategy_desc, (VOPFUNC)spec_strategy }, /* strategy */ - { &vnop_pathconf_desc, (VOPFUNC)spec_pathconf }, /* pathconf */ - { &vnop_advlock_desc, (VOPFUNC)spec_advlock }, /* advlock */ - { &vnop_bwrite_desc, (VOPFUNC)vn_bwrite }, - { &vnop_pagein_desc, (VOPFUNC)cd9660_pagein }, /* Pagein */ - { &vnop_pageout_desc, (VOPFUNC)cd9660_pageout }, /* Pageout */ - { &vnop_blktooff_desc, (VOPFUNC)cd9660_blktooff }, /* blktooff */ - { &vnop_offtoblk_desc, (VOPFUNC)cd9660_offtoblk }, /* offtoblk */ - { &vnop_blockmap_desc, (VOPFUNC)cd9660_blockmap }, /* blockmap */ - { (struct vnodeop_desc*)NULL, (VOPFUNC)NULL } -}; -struct vnodeopv_desc cd9660_specop_opv_desc = - { &cd9660_specop_p, cd9660_specop_entries }; - -#if FIFO -int (**cd9660_fifoop_p)(void *); -struct vnodeopv_entry_desc cd9660_fifoop_entries[] = { - { &vnop_default_desc, (VOPFUNC)vn_default_error }, - { &vnop_lookup_desc, (VOPFUNC)fifo_lookup }, /* lookup */ - { &vnop_create_desc, (VOPFUNC)fifo_create }, /* create */ - { &vnop_mknod_desc, (VOPFUNC)fifo_mknod }, /* mknod */ - { &vnop_open_desc, (VOPFUNC)fifo_open }, /* open */ - { &vnop_close_desc, (VOPFUNC)fifo_close }, /* close */ - { &vnop_getattr_desc, (VOPFUNC)cd9660_getattr }, /* getattr */ - { &vnop_read_desc, (VOPFUNC)fifo_read }, /* read */ - { &vnop_write_desc, (VOPFUNC)fifo_write }, /* write */ - { &vnop_ioctl_desc, (VOPFUNC)fifo_ioctl }, /* ioctl */ - { &vnop_select_desc, (VOPFUNC)fifo_select }, /* select */ - { &vnop_mmap_desc, (VOPFUNC)fifo_mmap }, /* mmap */ - { &vnop_fsync_desc, (VOPFUNC)fifo_fsync }, /* fsync */ - { &vnop_remove_desc, (VOPFUNC)fifo_remove }, /* remove */ - { &vnop_link_desc, (VOPFUNC)fifo_link } , /* link */ - { &vnop_rename_desc, (VOPFUNC)fifo_rename }, /* rename */ - { &vnop_mkdir_desc, (VOPFUNC)fifo_mkdir }, /* mkdir */ - { &vnop_rmdir_desc, (VOPFUNC)fifo_rmdir }, /* rmdir */ - { &vnop_symlink_desc, (VOPFUNC)fifo_symlink }, /* symlink */ - { &vnop_readdir_desc, (VOPFUNC)fifo_readdir }, /* readdir */ - { &vnop_readlink_desc, (VOPFUNC)fifo_readlink }, /* readlink */ - { &vnop_inactive_desc, (VOPFUNC)cd9660_inactive },/* inactive */ - { &vnop_reclaim_desc, (VOPFUNC)cd9660_reclaim }, /* reclaim */ - { &vnop_strategy_desc, (VOPFUNC)fifo_strategy }, /* strategy */ - { &vnop_pathconf_desc, (VOPFUNC)fifo_pathconf }, /* pathconf */ - { &vnop_advlock_desc, (VOPFUNC)fifo_advlock }, /* advlock */ - { &vnop_bwrite_desc, (VOPFUNC)vn_bwrite }, - { &vnop_pagein_desc, (VOPFUNC)cd9660_pagein }, /* Pagein */ - { &vnop_pageout_desc, (VOPFUNC)cd9660_pageout }, /* Pageout */ - { &vnop_blktooff_desc, (VOPFUNC)cd9660_blktooff }, /* blktooff */ - { &vnop_offtoblk_desc, (VOPFUNC)cd9660_offtoblk }, /* offtoblk */ - { (struct vnodeop_desc*)NULL, (VOPFUNC)NULL } -}; -struct vnodeopv_desc cd9660_fifoop_opv_desc = - { &cd9660_fifoop_p, cd9660_fifoop_entries }; -#endif /* FIFO */ diff --git a/bsd/isofs/cd9660/iso.h b/bsd/isofs/cd9660/iso.h deleted file mode 100644 index b122c4b6c..000000000 --- a/bsd/isofs/cd9660/iso.h +++ /dev/null @@ -1,467 +0,0 @@ -/* - * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ - * - * This file contains Original Code and/or Modifications of Original Code - * as defined in and that are subject to the Apple Public Source License - * Version 2.0 (the 'License'). You may not use this file except in - * compliance with the License. The rights granted to you under the License - * may not be used to create, or enable the creation or redistribution of, - * unlawful or unlicensed copies of an Apple operating system, or to - * circumvent, violate, or enable the circumvention or violation of, any - * terms of an Apple operating system software license agreement. - * - * Please obtain a copy of the License at - * http://www.opensource.apple.com/apsl/ and read it before using this file. - * - * The Original Code and all software distributed under the License are - * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER - * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, - * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. - * Please see the License for the specific language governing rights and - * limitations under the License. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ - */ -/* $NetBSD: iso.h,v 1.9 1995/01/18 09:23:19 mycroft Exp $ */ - -/*- - * Copyright (c) 1994 - * The Regents of the University of California. All rights reserved. - * - * This code is derived from software contributed to Berkeley - * by Pace Willisson (pace@blitz.com). The Rock Ridge Extension - * Support code is derived from software contributed to Berkeley - * by Atsushi Murai (amurai@spec.co.jp). - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. All advertising materials mentioning features or use of this software - * must display the following acknowledgement: - * This product includes software developed by the University of - * California, Berkeley and its contributors. - * 4. Neither the name of the University nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - * @(#)iso.h 8.4 (Berkeley) 12/5/94 - */ -#ifndef _ISO_H_ -#define _ISO_H_ - -#include - -#ifdef __APPLE_API_PRIVATE -#define ISODCL(from, to) (to - from + 1) - -struct iso_volume_descriptor { - char type [ISODCL(1,1)]; /* 711 */ - char volume_desc_id [ISODCL(2,6)]; - char version [ISODCL(7,7)]; - char data [ISODCL(8,2048)]; -}; - -/* volume descriptor types */ -#define ISO_VD_BOOT 0 -#define ISO_VD_PRIMARY 1 -#define ISO_VD_SUPPLEMENTARY 2 -#define ISO_VD_PARTITION 3 -#define ISO_VD_END 255 - -#define ISO_STANDARD_ID "CD001" -#define ISO_ECMA_ID "CDW01" -#define ISO_XA_ID "CD-XA001" /* XA style disk signature */ -#define ISO9660SIGNATURE 0x4147 /* for getattrlist ATTR_VOL_SIGNATURE */ - -/* Universal Character Set implementation levels (for Joliet) */ -#define ISO_UCS2_Level_1 "%/@" /* No combining chars */ -#define ISO_UCS2_Level_2 "%/C" /* Combining chars allowed with restrictions */ -#define ISO_UCS2_Level_3 "%/E" /* Combining chars allowed, no restrictions */ - -#define UCS_SEPARATOR1 0x002e -#define UCS_SEPARATOR2 0x003b - -#define ISO_DFLT_VOLUME_ID "ISO_9660_CD" - -/* pathconf filename lengths */ -#define ISO_NAMEMAX (31+1) -#define ISO_JOLIET_NAMEMAX (64*3) -#define ISO_RRIP_NAMEMAX 255 - -/* Finder flags, from Technical Note 40 */ -#define fLockedBit 0x8000 -#define fInvisibleBit 0x4000 -#define fHasBundleBit 0x2000 -#define fSystemBit 0x1000 -#define fNoCopyBit 0x0800 -#define fBusyBit 0x0400 -#define fChangedBit 0x0200 -#define fInitedBit 0x0100 -#define fCachedBit 0x0080 -#define fSharedBit 0x0040 -#define fAlwaysBit 0x0020 /* always switch-launch */ -#define fNeverBit 0x0010 /* never switch-launch */ -#define fOwnApplBit 0x0002 -#define fOnDesktopBit 0x0001 - -#define EXTFNDRINFOSIZE 16 - -struct finder_info { - unsigned long fdType; - unsigned long fdCreator; - unsigned short fdFlags; - struct { - short v; /* file's location */ - short h; - } fdLocation; - unsigned short fdReserved; -}; - -struct iso_primary_descriptor { - char type [ISODCL ( 1, 1)]; /* 711 */ - char volume_desc_id [ISODCL ( 2, 6)]; - char version [ISODCL ( 7, 7)]; /* 711 */ - char flags [ISODCL ( 8, 8)]; /* SVD only */ - char system_id [ISODCL ( 9, 40)]; /* achars */ - char volume_id [ISODCL ( 41, 72)]; /* dchars */ - char unused2 [ISODCL ( 73, 80)]; - char volume_space_size [ISODCL ( 81, 88)]; /* 733 */ - char escape_seq [ISODCL ( 89, 120)]; /* SVD only */ - char volume_set_size [ISODCL (121, 124)]; /* 723 */ - char volume_sequence_number [ISODCL (125, 128)]; /* 723 */ - char logical_block_size [ISODCL (129, 132)]; /* 723 */ - char path_table_size [ISODCL (133, 140)]; /* 733 */ - char type_l_path_table [ISODCL (141, 144)]; /* 731 */ - char opt_type_l_path_table [ISODCL (145, 148)]; /* 731 */ - char type_m_path_table [ISODCL (149, 152)]; /* 732 */ - char opt_type_m_path_table [ISODCL (153, 156)]; /* 732 */ - char root_directory_record [ISODCL (157, 190)]; /* 9.1 */ - char volume_set_id [ISODCL (191, 318)]; /* dchars */ - char publisher_id [ISODCL (319, 446)]; /* achars */ - char preparer_id [ISODCL (447, 574)]; /* achars */ - char application_id [ISODCL (575, 702)]; /* achars */ - char copyright_file_id [ISODCL (703, 739)]; /* 7.5 dchars */ - char abstract_file_id [ISODCL (740, 776)]; /* 7.5 dchars */ - char bibliographic_file_id [ISODCL (777, 813)]; /* 7.5 dchars */ - char creation_date [ISODCL (814, 830)]; /* 8.4.26.1 */ - char modification_date [ISODCL (831, 847)]; /* 8.4.26.1 */ - char expiration_date [ISODCL (848, 864)]; /* 8.4.26.1 */ - char effective_date [ISODCL (865, 881)]; /* 8.4.26.1 */ - char file_structure_version [ISODCL (882, 882)]; /* 711 */ - char unused4 [ISODCL (883, 883)]; - char application_data1 [ISODCL (884, 1024)]; - char CDXASignature [ISODCL (1025, 1032)]; - char CDXAResv [ISODCL (1033, 1050)]; - char application_data2 [ISODCL (1051, 1395)]; -}; -#define ISO_DEFAULT_BLOCK_SIZE 2048 - -/* from HighSierra.h in MacOS land */ -typedef struct -{ - char signature [ISODCL (1, 2)]; /* x42 x41 - 'BA' signature */ - u_char systemUseID [ISODCL (3, 3)]; /* 02 = no icon, 03 = icon, 04 = icon + bundle */ - u_char fileType [ISODCL (4, 7)]; /* such as 'TEXT' or 'STAK' */ - u_char fileCreator [ISODCL (8, 11)]; /* such as 'hscd' or 'WILD' */ - u_char finderFlags [ISODCL (12, 13)]; /* optional for type 06 */ -} AppleExtension; - -typedef struct -{ - char signature [ISODCL (1, 2)]; /* x41 x41 - 'AA' signature */ - u_char OSULength [ISODCL (3, 3)]; /* optional SystemUse length (size of this struct) */ - u_char systemUseID [ISODCL (4, 4)]; /* 1 = ProDOS 2 = HFS */ - u_char fileType [ISODCL (5, 8)]; /* such as 'TEXT' or 'STAK' */ - u_char fileCreator [ISODCL (9, 12)]; /* such as 'hscd' or 'WILD' */ - u_char finderFlags [ISODCL (13, 14)]; /* only certain bits of this are used */ -} NewAppleExtension; - -struct iso_directory_record { - char length [ISODCL (1, 1)]; /* 711 */ - char ext_attr_length [ISODCL (2, 2)]; /* 711 */ - u_char extent [ISODCL (3, 10)]; /* 733 */ - u_char size [ISODCL (11, 18)]; /* 733 */ - char date [ISODCL (19, 25)]; /* 7 by 711 */ - char flags [ISODCL (26, 26)]; - char file_unit_size [ISODCL (27, 27)]; /* 711 */ - char interleave [ISODCL (28, 28)]; /* 711 */ - char volume_sequence_number [ISODCL (29, 32)]; /* 723 */ - char name_len [ISODCL (33, 33)]; /* 711 */ - char name [1]; /* XXX */ -}; -/* - * cannot take sizeof(iso_directory_record), because of - * possible alignment - * of the last entry (34 instead of 33) - */ -#define ISO_DIRECTORY_RECORD_SIZE 33 - -/* - * iso_directory_record.flags for Directory Records (except CD-I discs) - */ -#define existenceBit 0x01 /* Invisible */ -#define directoryBit 0x02 -#define associatedBit 0x04 -#define recordBit 0x08 -#define protectionBit 0x10 -#define multiextentBit 0x80 - -struct iso_extended_attributes { - u_char owner [ISODCL (1, 4)]; /* 723 */ - u_char group [ISODCL (5, 8)]; /* 723 */ - u_char perm [ISODCL (9, 10)]; /* 9.5.3 */ - char ctime [ISODCL (11, 27)]; /* 8.4.26.1 */ - char mtime [ISODCL (28, 44)]; /* 8.4.26.1 */ - char xtime [ISODCL (45, 61)]; /* 8.4.26.1 */ - char ftime [ISODCL (62, 78)]; /* 8.4.26.1 */ - char recfmt [ISODCL (79, 79)]; /* 711 */ - char recattr [ISODCL (80, 80)]; /* 711 */ - u_char reclen [ISODCL (81, 84)]; /* 723 */ - char system_id [ISODCL (85, 116)]; /* achars */ - char system_use [ISODCL (117, 180)]; - char version [ISODCL (181, 181)]; /* 711 */ - char len_esc [ISODCL (182, 182)]; /* 711 */ - char reserved [ISODCL (183, 246)]; - u_char len_au [ISODCL (247, 250)]; /* 723 */ -}; - -/* CD-ROM Format type */ -enum ISO_FTYPE { ISO_FTYPE_DEFAULT, ISO_FTYPE_9660, ISO_FTYPE_RRIP, - ISO_FTYPE_JOLIET, ISO_FTYPE_ECMA }; - -#ifndef ISOFSMNT_ROOT -#define ISOFSMNT_ROOT 0 -#endif - -struct iso_mnt { - int im_flags; /* mount flags */ - int im_flags2; /* misc flags */ - - struct mount *im_mountp; - dev_t im_dev; - struct vnode *im_devvp; - - int logical_block_size; - int im_bshift; - int im_bmask; - int im_sector_size; - - int volume_space_size; - - char root[ISODCL (157, 190)]; - int root_extent; - int root_size; - enum ISO_FTYPE iso_ftype; - - int rr_skip; - int rr_skip0; - - struct timespec creation_date; /* needed for getattrlist */ - struct timespec modification_date; /* needed for getattrlist */ - u_char volume_id[32]; /* name of volume */ - struct vnode *phys_devvp; /* device for 2352-byte blocks */ - struct CDTOC *toc; /* the TOC, or NULL for none */ - int video_dir_start; /* start sector of the "MPEGAV" dir */ - int video_dir_end; /* sector following end of "MPEGAV" dir */ -}; - -/* bit settings for iso_mnt.im_flags2 */ - -/* - * CD is in XA format. Need this to find where apple extensions - * are in the iso_directory_record - */ -#define IMF2_IS_CDXA 0x00000001 - -/* CD is Video CD (version < 2.0) */ -#define IMF2_IS_VCD 0x00000002 - -#define VFSTOISOFS(mp) ((struct iso_mnt *)(vfs_fsprivate(mp))) - -#define blkoff(imp, loc) ((loc) & (imp)->im_bmask) -#define lblktosize(imp, blk) ((blk) << (imp)->im_bshift) -#define lblkno(imp, loc) ((loc) >> (imp)->im_bshift) -#define blksize(imp, ip, lbn) ((imp)->logical_block_size) - -#define SECTOFF(imp, off) \ - (off_t)(((off) / (imp)->im_sector_size) * (imp)->im_sector_size) - - -int cd9660_mount(struct mount *, vnode_t, user_addr_t, vfs_context_t); -int cd9660_start(struct mount *, int, vfs_context_t); -int cd9660_unmount(struct mount *, int, vfs_context_t); -int cd9660_root(struct mount *, struct vnode **, vfs_context_t); -int cd9660_statfs(struct mount *, struct vfsstatfs *, vfs_context_t); -int cd9660_vfs_getattr(struct mount *mp, struct vfs_attr *fsap, vfs_context_t context); -int cd9660_sync(struct mount *, int, vfs_context_t); -int cd9660_vget(struct mount *, ino64_t, struct vnode **, vfs_context_t); -int cd9660_fhtovp(struct mount *, int, unsigned char *, struct vnode **, vfs_context_t); -int cd9660_vptofh(struct vnode *, int *, unsigned char *, vfs_context_t); -int cd9660_init(struct vfsconf *); -int cd9660_hashinit(void); -int cd9660_mountroot(mount_t, vnode_t, vfs_context_t); -int cd9660_sysctl(int *, u_int, user_addr_t, size_t *, user_addr_t, size_t, vfs_context_t); - -int cd9660_vget_internal(mount_t, ino_t, vnode_t *, vnode_t, struct componentname *, - int, struct iso_directory_record *, proc_t); - -extern int (**cd9660_vnodeop_p)(void *); -extern int (**cd9660_specop_p)(void *); -#if FIFO -extern int (**cd9660_fifoop_p)(void *); -#endif -extern int (**cd9660_cdxaop_p)(void *); - -static __inline int -isonum_711(u_char *p) -{ - return *p; -} - -static __inline int -isonum_712(char *p) -{ - return *p; -} - -#ifndef UNALIGNED_ACCESS - -static __inline int -isonum_723(u_char *p) -{ - return *p|(p[1] << 8); -} - -static __inline int -isonum_733(u_char *p) -{ - return *p|(p[1] << 8)|(p[2] << 16)|(p[3] << 24); -} - -#else /* UNALIGNED_ACCESS */ - -#if BYTE_ORDER == LITTLE_ENDIAN - -static __inline int -isonum_723(u_char *p) -{ - return *(u_int16t *)p; -} - -static __inline int -isonum_733(u_char *p) -{ - return *(u_int32t *)p; -} - -#endif - -#if BYTE_ORDER == BIG_ENDIAN - -static __inline int -isonum_723(u_char *p) -{ - return *(u_int16t *)(p + 2); -} - -static __inline int -isonum_733(u_char *p) -{ - return *(u_int32t *)(p + 4); -} - -#endif - -#endif /* UNALIGNED_ACCESS */ - -int isofncmp(u_char *fn, int fnlen, u_char *isofn, int isolen); -int ucsfncmp(u_int16_t *, int, u_int16_t *, int); -void isofntrans(u_char *infn, int infnlen, u_char *outfn, u_short *outfnlen, - int original, int assoc); -void ucsfntrans(u_int16_t *, int, u_char *, u_short *, int, int); -int attrcalcsize(struct attrlist *attrlist); - -/* - * Associated files have a leading "._". - */ -#define ASSOCCHAR1 '.' -#define ASSOCCHAR2 '_' - -/* - * This header is prepended on media tracks, such as Video CD MPEG files. - */ -struct riff_header { - char riff[4]; // "RIFF" - u_int32_t fileSize; // little endian file size, not including this field or sig - char cdxa[4]; // "CDXA" - char fmt[4]; // "fmt " - u_int32_t fmtSize; // always 16 (XXX this is an assumption) - char fmtData[16]; // CDXA extension of ISO directory entry, padded to 16 bytes - char data[4]; // "data" - u_int32_t dataSize; // number of sectors * 2352, little endian -}; - -#define CDXA_SECTOR_SIZE 2352 - - -/* - * AppleDouble constants - */ -#define APPLEDOUBLE_MAGIC 0x00051607 -#define APPLEDOUBLE_VERSION 0x00020000 - -#define APPLEDOUBLE_DATAFORK 1 -#define APPLEDOUBLE_RESFORK 2 -#define APPLEDOUBLE_FINDERINFO 9 - -/* - * Note that the structures are padded and aligned to 2 bytes; - * this is to mimic the "#pragma options align=mac68k" formerly - * used. This is needed to make sure that the first - * AppleDoubleEntry (after the numEntries below) is *immediately* - * after the numEntries, and not padded by 2 bytes. - * - * Consult RFC 1740 for details on AppleSingle/AppleDouble formats. - */ -struct apple_double_entry { - u_int32_t entryID; - u_int32_t offset; - u_int32_t length; -} __attribute__((aligned(2), packed)); -typedef struct apple_double_entry apple_double_entry_t; - -struct apple_double_header { - u_int32_t magic; - u_int32_t version; - u_int8_t filler[16]; - u_int16_t count; - apple_double_entry_t entries[2]; /* FinderInfo + ResourceFork */ - struct finder_info finfo; -} __attribute__((aligned(2), packed)); -typedef struct apple_double_header apple_double_header_t; - -#define ADH_SIZE 4096 -#define ADH_BLKS 2 - -#endif /* __APPLE_API_PRIVATE */ -#endif /* ! _ISO_H_ */ diff --git a/bsd/isofs/cd9660/iso_rrip.h b/bsd/isofs/cd9660/iso_rrip.h deleted file mode 100644 index 1ce14ba8c..000000000 --- a/bsd/isofs/cd9660/iso_rrip.h +++ /dev/null @@ -1,119 +0,0 @@ -/* - * Copyright (c) 2000-2002 Apple Computer, Inc. All rights reserved. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ - * - * This file contains Original Code and/or Modifications of Original Code - * as defined in and that are subject to the Apple Public Source License - * Version 2.0 (the 'License'). You may not use this file except in - * compliance with the License. The rights granted to you under the License - * may not be used to create, or enable the creation or redistribution of, - * unlawful or unlicensed copies of an Apple operating system, or to - * circumvent, violate, or enable the circumvention or violation of, any - * terms of an Apple operating system software license agreement. - * - * Please obtain a copy of the License at - * http://www.opensource.apple.com/apsl/ and read it before using this file. - * - * The Original Code and all software distributed under the License are - * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER - * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, - * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. - * Please see the License for the specific language governing rights and - * limitations under the License. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ - */ -/* $NetBSD: iso_rrip.h,v 1.3 1994/06/29 06:32:02 cgd Exp $ */ - -/*- - * Copyright (c) 1993, 1994 - * The Regents of the University of California. All rights reserved. - * - * This code is derived from software contributed to Berkeley - * by Pace Willisson (pace@blitz.com). The Rock Ridge Extension - * Support code is derived from software contributed to Berkeley - * by Atsushi Murai (amurai@spec.co.jp). - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. All advertising materials mentioning features or use of this software - * must display the following acknowledgement: - * This product includes software developed by the University of - * California, Berkeley and its contributors. - * 4. Neither the name of the University nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - * @(#)iso_rrip.h 8.2 (Berkeley) 1/23/94 - */ -#ifndef __ISOFS_CD9660_ISO_RRIP_H__ -#define __ISOFS_CD9660_ISO_RRIP_H__ - -#include - - -#ifdef __APPLE_API_PRIVATE -/* - * Analyze function flag (similar to RR field bits) - */ -#define ISO_SUSP_ATTR 0x0001 -#define ISO_SUSP_DEVICE 0x0002 -#define ISO_SUSP_SLINK 0x0004 -#define ISO_SUSP_ALTNAME 0x0008 -#define ISO_SUSP_CLINK 0x0010 -#define ISO_SUSP_PLINK 0x0020 -#define ISO_SUSP_RELDIR 0x0040 -#define ISO_SUSP_TSTAMP 0x0080 -#define ISO_SUSP_IDFLAG 0x0100 -#define ISO_SUSP_EXTREF 0x0200 -#define ISO_SUSP_CONT 0x0400 -#define ISO_SUSP_OFFSET 0x0800 -#define ISO_SUSP_STOP 0x1000 -#define ISO_SUSP_UNKNOWN 0x8000 - -typedef struct { - struct iso_node *inop; - int fields; /* interesting fields in this analysis */ - daddr_t iso_ce_blk; /* block of continuation area */ - off_t iso_ce_off; /* offset of continuation area */ - int iso_ce_len; /* length of continuation area */ - struct iso_mnt *imp; /* mount structure */ - ino_t *inump; /* inode number pointer */ - char *outbuf; /* name/symbolic link output area */ - u_short *outlen; /* length of above */ - u_short maxlen; /* maximum length of above */ - int cont; /* continuation of above */ -} ISO_RRIP_ANALYZE; - -int cd9660_rrip_analyze(struct iso_directory_record *isodir, - struct iso_node *inop, struct iso_mnt *imp); -int cd9660_rrip_getname(struct iso_directory_record *isodir, - char *outbuf, u_short *outlen, - ino_t *inump, struct iso_mnt *imp); -int cd9660_rrip_getsymname(struct iso_directory_record *isodir, - char *outbuf, u_short *outlen, - struct iso_mnt *imp); -int cd9660_rrip_offset(struct iso_directory_record *isodir, - struct iso_mnt *imp); -#endif /* __APPLE_API_PRIVATE */ -#endif /* __ISOFS_CD9660_ISO_RRIP_H__ */ diff --git a/bsd/kern/bsd_init.c b/bsd/kern/bsd_init.c index cf4ee656a..f36d6d4ce 100644 --- a/bsd/kern/bsd_init.c +++ b/bsd/kern/bsd_init.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2007 Apple Inc. All rights reserved. + * Copyright (c) 2000-2009 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -24,8 +24,8 @@ * limitations under the License. * * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ - */ -/* + * + * * Copyright (c) 1982, 1986, 1989, 1991, 1992, 1993 * The Regents of the University of California. All rights reserved. * (c) UNIX System Laboratories, Inc. @@ -95,7 +95,7 @@ #include #include -#include +#include #include #include @@ -105,6 +105,7 @@ #include #include #include +#include #include @@ -147,6 +148,9 @@ #include /* for kmem_suballoc() */ #include /* for psem_lock_init() */ #include /* for log_setsize() */ +#include /* for tty_init() */ +#include /* for utun_register_control() */ +#include /* for net_str_id_init() */ #include @@ -162,13 +166,23 @@ #include #endif +#if PFLOG +#include +#endif + #include +#if CONFIG_EMBEDDED +#include +#endif + void * get_user_regs(thread_t); /* XXX kludge for */ void IOKitInitializeTime(void); /* XXX */ +void IOSleep(unsigned int); /* XXX */ void loopattach(void); /* XXX */ +void vc_progress_set(boolean_t, uint32_t); /* XXX */ -char copyright[] = +const char copyright[] = "Copyright (c) 1982, 1986, 1989, 1991, 1993\n\t" "The Regents of the University of California. " "All rights reserved.\n\n"; @@ -204,10 +218,11 @@ char hostname[MAXHOSTNAMELEN]; int hostnamelen; char domainname[MAXDOMNAMELEN]; int domainnamelen; -#if __i386__ +#if defined(__i386__) || defined(__x86_64__) struct exec_archhandler exec_archhandler_ppc = { .path = "/usr/libexec/oah/translate", }; +const char * const kRosettaStandIn_str = "/usr/libexec/oah/RosettaNonGrata"; #else /* __i386__ */ struct exec_archhandler exec_archhandler_ppc; #endif /* __i386__ */ @@ -235,18 +250,27 @@ extern void bsd_bufferinit(void); extern int srv; extern int ncl; -#define BSD_SIMUL_EXECS 33 /* 32 , allow for rounding */ -#define BSD_PAGABLE_MAP_SIZE (BSD_SIMUL_EXECS * (NCARGS + PAGE_SIZE)) vm_map_t bsd_pageable_map; vm_map_t mb_map; -semaphore_t execve_semaphore; + +static int bsd_simul_execs = BSD_SIMUL_EXECS; +static int bsd_pageable_map_size = BSD_PAGABLE_MAP_SIZE; +__private_extern__ int execargs_cache_size = BSD_SIMUL_EXECS; +__private_extern__ int execargs_free_count = BSD_SIMUL_EXECS; +__private_extern__ vm_offset_t * execargs_cache = NULL; + +void bsd_exec_setup(int); + +/* + * Set to disable grading 64 bit Mach-o binaries as executable, for testing; + * Intel only. + */ +__private_extern__ int bootarg_no64exec = 0; int cmask = CMASK; extern int customnbuf; void bsd_init(void) __attribute__((section("__TEXT, initcode"))); -__private_extern__ void ubc_init(void ) __attribute__((section("__TEXT, initcode"))); -void vfsinit(void) __attribute__((section("__TEXT, initcode"))); kern_return_t bsd_autoconf(void) __attribute__((section("__TEXT, initcode"))); void bsd_utaskbootstrap(void) __attribute__((section("__TEXT, initcode"))); @@ -283,6 +307,13 @@ int turn_on_log_leaks = 0; extern void stackshot_lock_init(void); + +/* If we are using CONFIG_DTRACE */ +#if CONFIG_DTRACE + extern void dtrace_postinit(void); +#endif + + /* * Initialization code. * Called from cold start routine as @@ -314,19 +345,25 @@ struct rlimit vm_initial_limit_stack = { DFLSSIZ, MAXSSIZ - PAGE_SIZE }; struct rlimit vm_initial_limit_data = { DFLDSIZ, MAXDSIZ }; struct rlimit vm_initial_limit_core = { DFLCSIZ, MAXCSIZ }; -extern thread_t cloneproc(proc_t, int); +extern thread_t cloneproc(task_t, proc_t, int); extern int (*mountroot)(void); extern int netboot_mountroot(void); /* netboot.c */ extern int netboot_setup(void); lck_grp_t * proc_lck_grp; +lck_grp_t * proc_slock_grp; +lck_grp_t * proc_fdmlock_grp; +lck_grp_t * proc_mlock_grp; lck_grp_attr_t * proc_lck_grp_attr; lck_attr_t * proc_lck_attr; lck_mtx_t * proc_list_mlock; lck_mtx_t * proc_klist_mlock; +extern lck_mtx_t * execargs_cache_lock; + /* hook called after root is mounted XXX temporary hack */ void (*mountroot_post_hook)(void); +void (*unmountroot_pre_hook)(void); /* * This function is called very early on in the Mach startup, from the @@ -345,10 +382,9 @@ void (*mountroot_post_hook)(void); void bsd_init(void) { - proc_t p; struct uthread *ut; unsigned int i; -#if __i386__ +#if __i386__ || __x86_64__ int error; #endif struct vfs_context context; @@ -378,25 +414,28 @@ bsd_init(void) bsd_init_kprintf("calling procinit\n"); procinit(); - kernproc = &proc0; + /* Initialize the ttys (MUST be before kminit()/bsd_autoconf()!)*/ + tty_init(); - p = kernproc; + kernproc = &proc0; /* implicitly bzero'ed */ /* kernel_task->proc = kernproc; */ - set_bsdtask_info(kernel_task,(void *)p); - p->p_pid = 0; - p->p_ppid = 0; + set_bsdtask_info(kernel_task,(void *)kernproc); /* give kernproc a name */ bsd_init_kprintf("calling process_name\n"); - process_name("kernel_task", p); + process_name("kernel_task", kernproc); /* allocate proc lock group attribute and group */ bsd_init_kprintf("calling lck_grp_attr_alloc_init\n"); proc_lck_grp_attr= lck_grp_attr_alloc_init(); - - proc_lck_grp = lck_grp_alloc_init("proc", proc_lck_grp_attr); + proc_lck_grp = lck_grp_alloc_init("proc", proc_lck_grp_attr); +#ifndef CONFIG_EMBEDDED + proc_slock_grp = lck_grp_alloc_init("proc-slock", proc_lck_grp_attr); + proc_fdmlock_grp = lck_grp_alloc_init("proc-fdmlock", proc_lck_grp_attr); + proc_mlock_grp = lck_grp_alloc_init("proc-mlock", proc_lck_grp_attr); +#endif /* Allocate proc lock attribute */ proc_lck_attr = lck_attr_alloc_init(); #if 0 @@ -405,12 +444,26 @@ bsd_init(void) #endif #endif +#ifdef CONFIG_EMBEDDED proc_list_mlock = lck_mtx_alloc_init(proc_lck_grp, proc_lck_attr); proc_klist_mlock = lck_mtx_alloc_init(proc_lck_grp, proc_lck_attr); - lck_mtx_init(&p->p_mlock, proc_lck_grp, proc_lck_attr); - lck_mtx_init(&p->p_fdmlock, proc_lck_grp, proc_lck_attr); - lck_spin_init(&p->p_slock, proc_lck_grp, proc_lck_attr); + lck_mtx_init(&kernproc->p_mlock, proc_lck_grp, proc_lck_attr); + lck_mtx_init(&kernproc->p_fdmlock, proc_lck_grp, proc_lck_attr); + lck_spin_init(&kernproc->p_slock, proc_lck_grp, proc_lck_attr); +#else + proc_list_mlock = lck_mtx_alloc_init(proc_mlock_grp, proc_lck_attr); + proc_klist_mlock = lck_mtx_alloc_init(proc_mlock_grp, proc_lck_attr); + lck_mtx_init(&kernproc->p_mlock, proc_mlock_grp, proc_lck_attr); + lck_mtx_init(&kernproc->p_fdmlock, proc_fdmlock_grp, proc_lck_attr); + lck_spin_init(&kernproc->p_slock, proc_slock_grp, proc_lck_attr); +#endif + execargs_cache_lock = lck_mtx_alloc_init(proc_lck_grp, proc_lck_attr); + execargs_cache_size = bsd_simul_execs; + execargs_free_count = bsd_simul_execs; + execargs_cache = (vm_offset_t *)kalloc(bsd_simul_execs * sizeof(vm_offset_t)); + bzero(execargs_cache, bsd_simul_execs * sizeof(vm_offset_t)); + if (current_task() != kernel_task) printf("bsd_init: We have a problem, " "current task is not kernel task\n"); @@ -423,86 +476,96 @@ bsd_init(void) * Initialize the MAC Framework */ mac_policy_initbsd(); - p->p_mac_enforce = 0; + kernproc->p_mac_enforce = 0; #endif /* MAC */ /* * Create process 0. */ proc_list_lock(); - LIST_INSERT_HEAD(&allproc, p, p_list); - p->p_pgrp = &pgrp0; + LIST_INSERT_HEAD(&allproc, kernproc, p_list); + kernproc->p_pgrp = &pgrp0; LIST_INSERT_HEAD(PGRPHASH(0), &pgrp0, pg_hash); LIST_INIT(&pgrp0.pg_members); - lck_mtx_init(&pgrp0.pg_mlock, proc_lck_grp, proc_lck_attr); +#ifdef CONFIG_EMBEDDED + lck_mtx_init(&pgrp0.pg_mlock, proc_lck_grp, proc_lck_attr); +#else + lck_mtx_init(&pgrp0.pg_mlock, proc_mlock_grp, proc_lck_attr); +#endif /* There is no other bsd thread this point and is safe without pgrp lock */ - LIST_INSERT_HEAD(&pgrp0.pg_members, p, p_pglist); - p->p_listflag |= P_LIST_INPGRP; - p->p_pgrpid = 0; + LIST_INSERT_HEAD(&pgrp0.pg_members, kernproc, p_pglist); + kernproc->p_listflag |= P_LIST_INPGRP; + kernproc->p_pgrpid = 0; pgrp0.pg_session = &session0; pgrp0.pg_membercnt = 1; session0.s_count = 1; - session0.s_leader = p; + session0.s_leader = kernproc; session0.s_listflags = 0; +#ifdef CONFIG_EMBEDDED lck_mtx_init(&session0.s_mlock, proc_lck_grp, proc_lck_attr); +#else + lck_mtx_init(&session0.s_mlock, proc_mlock_grp, proc_lck_attr); +#endif LIST_INSERT_HEAD(SESSHASH(0), &session0, s_hash); proc_list_unlock(); #if CONFIG_LCTX - p->p_lctx = NULL; + kernproc->p_lctx = NULL; #endif - p->task = kernel_task; + kernproc->task = kernel_task; - p->p_stat = SRUN; - p->p_flag = P_SYSTEM; - p->p_nice = NZERO; - p->p_pptr = p; + kernproc->p_stat = SRUN; + kernproc->p_flag = P_SYSTEM; + kernproc->p_nice = NZERO; + kernproc->p_pptr = kernproc; - TAILQ_INIT(&p->p_uthlist); - TAILQ_INSERT_TAIL(&p->p_uthlist, ut, uu_list); + TAILQ_INIT(&kernproc->p_uthlist); + TAILQ_INSERT_TAIL(&kernproc->p_uthlist, ut, uu_list); - p->sigwait = FALSE; - p->sigwait_thread = THREAD_NULL; - p->exit_thread = THREAD_NULL; - p->p_csflags = CS_VALID; + kernproc->sigwait = FALSE; + kernproc->sigwait_thread = THREAD_NULL; + kernproc->exit_thread = THREAD_NULL; + kernproc->p_csflags = CS_VALID; /* * Create credential. This also Initializes the audit information. - * XXX It is not clear what the initial values should be for audit ID, - * XXX session ID, etc.. */ bsd_init_kprintf("calling bzero\n"); bzero(&temp_cred, sizeof(temp_cred)); temp_cred.cr_ngroups = 1; + temp_cred.cr_audit.as_aia_p = &audit_default_aia; + /* XXX the following will go away with cr_au */ + temp_cred.cr_au.ai_auid = AU_DEFAUDITID; + bsd_init_kprintf("calling kauth_cred_create\n"); - p->p_ucred = kauth_cred_create(&temp_cred); + kernproc->p_ucred = kauth_cred_create(&temp_cred); /* give the (already exisiting) initial thread a reference on it */ bsd_init_kprintf("calling kauth_cred_ref\n"); - kauth_cred_ref(p->p_ucred); - ut->uu_context.vc_ucred = p->p_ucred; + kauth_cred_ref(kernproc->p_ucred); + ut->uu_context.vc_ucred = kernproc->p_ucred; ut->uu_context.vc_thread = current_thread(); - TAILQ_INIT(&p->aio_activeq); - TAILQ_INIT(&p->aio_doneq); - p->aio_active_count = 0; - p->aio_done_count = 0; + TAILQ_INIT(&kernproc->p_aio_activeq); + TAILQ_INIT(&kernproc->p_aio_doneq); + kernproc->p_aio_total_count = 0; + kernproc->p_aio_active_count = 0; bsd_init_kprintf("calling file_lock_init\n"); file_lock_init(); #if CONFIG_MACF - mac_cred_label_associate_kernel(p->p_ucred); - mac_task_label_update_cred (p->p_ucred, (struct task *) p->task); + mac_cred_label_associate_kernel(kernproc->p_ucred); + mac_task_label_update_cred (kernproc->p_ucred, (struct task *) kernproc->task); #endif /* Create the file descriptor table. */ filedesc0.fd_refcnt = 1+1; /* +1 so shutdown will not _FREE_ZONE */ - p->p_fd = &filedesc0; + kernproc->p_fd = &filedesc0; filedesc0.fd_cmask = cmask; filedesc0.fd_knlistsize = -1; filedesc0.fd_knlist = NULL; @@ -510,8 +573,8 @@ bsd_init(void) filedesc0.fd_knhashmask = 0; /* Create the limits structures. */ - p->p_limit = &limit0; - for (i = 0; i < sizeof(p->p_rlimit)/sizeof(p->p_rlimit[0]); i++) + kernproc->p_limit = &limit0; + for (i = 0; i < sizeof(kernproc->p_rlimit)/sizeof(kernproc->p_rlimit[0]); i++) limit0.pl_rlimit[i].rlim_cur = limit0.pl_rlimit[i].rlim_max = RLIM_INFINITY; limit0.pl_rlimit[RLIMIT_NOFILE].rlim_cur = NOFILE; @@ -522,8 +585,8 @@ bsd_init(void) limit0.pl_rlimit[RLIMIT_CORE] = vm_initial_limit_core; limit0.pl_refcnt = 1; - p->p_stats = &pstats0; - p->p_sigacts = &sigacts0; + kernproc->p_stats = &pstats0; + kernproc->p_sigacts = &sigacts0; /* * Charge root for two processes: init and mach_init. @@ -541,7 +604,7 @@ bsd_init(void) bsd_init_kprintf("calling kmem_suballoc\n"); ret = kmem_suballoc(kernel_map, &minimum, - (vm_size_t)BSD_PAGABLE_MAP_SIZE, + (vm_size_t)bsd_pageable_map_size, TRUE, VM_FLAGS_ANYWHERE, &bsd_pageable_map); @@ -561,8 +624,7 @@ bsd_init(void) /* Initialize the execve() semaphore */ bsd_init_kprintf("calling semaphore_create\n"); - ret = semaphore_create(kernel_task, &execve_semaphore, - SYNC_POLICY_FIFO, BSD_SIMUL_EXECS -1); + if (ret != KERN_SUCCESS) panic("bsd_init: Failed to create execve semaphore"); @@ -592,13 +654,14 @@ bsd_init(void) /* Initialize mbuf's. */ bsd_init_kprintf("calling mbinit\n"); mbinit(); + net_str_id_init(); /* for mbuf tags */ #endif /* SOCKETS */ /* * Initializes security event auditing. * XXX: Should/could this occur later? */ -#if AUDIT +#if CONFIG_AUDIT bsd_init_kprintf("calling audit_init\n"); audit_init(); #endif @@ -673,8 +736,8 @@ bsd_init(void) domaininit(); #endif /* SOCKETS */ - p->p_fd->fd_cdir = NULL; - p->p_fd->fd_rdir = NULL; + kernproc->p_fd->fd_cdir = NULL; + kernproc->p_fd->fd_rdir = NULL; #ifdef GPROF /* Initialize kernel profiling. */ @@ -689,7 +752,6 @@ bsd_init(void) bsd_autoconf(); #if CONFIG_DTRACE - extern void dtrace_postinit(void); dtrace_postinit(); #endif @@ -703,7 +765,12 @@ bsd_init(void) bsd_init_kprintf("calling loopattach\n"); loopattach(); /* XXX */ #endif - + +#if PFLOG + /* Initialize packet filter log interface */ + pfloginit(); +#endif /* PFLOG */ + #if NETHER > 0 /* Register the built-in dlil ethernet interface family */ bsd_init_kprintf("calling ether_family_init\n"); @@ -714,6 +781,9 @@ bsd_init(void) /* Call any kext code that wants to run just after network init */ bsd_init_kprintf("calling net_init_run\n"); net_init_run(); + + /* register user tunnel kernel control handler */ + utun_register_control(); #endif /* NETWORKING */ bsd_init_kprintf("calling vnode_pager_bootstrap\n"); @@ -750,7 +820,7 @@ bsd_init(void) "Pages zero filled:\t\t%u.\n" "Pages reactivated:\t\t%u.\n" "Pageins:\t\t\t%u.\n" - "Pageouts:\t\t\t\%u.\n" + "Pageouts:\t\t\t%u.\n" "Object cache: %u hits of %u lookups (%d%% hit rate)\n", stat.free_count, @@ -783,9 +853,15 @@ bsd_init(void) rootdevice[0] = '\0'; #if NFSCLIENT if (mountroot == netboot_mountroot) { - printf("bsd_init: netboot_mountroot failed," - " errno = %d\n", err); - panic("bsd_init: failed to mount network root: %s", PE_boot_args()); + PE_display_icon( 0, "noroot"); /* XXX a netboot-specific icon would be nicer */ + vc_progress_set(FALSE, 0); + for (i=1; 1; i*=2) { + printf("bsd_init: failed to mount network root, error %d, %s\n", + err, PE_boot_args()); + printf("We are hanging here...\n"); + IOSleep(i*60*1000); + } + /*NOTREACHED*/ } #endif printf("cannot mount root, errno = %d\n", err); @@ -795,7 +871,7 @@ bsd_init(void) IOSecureBSDRoot(rootdevice); context.vc_thread = current_thread(); - context.vc_ucred = p->p_ucred; + context.vc_ucred = kernproc->p_ucred; mountlist.tqh_first->mnt_flag |= MNT_ROOTFS; bsd_init_kprintf("calling VFS_ROOT\n"); @@ -812,7 +888,15 @@ bsd_init(void) int err; /* post mount setup */ if ((err = netboot_setup()) != 0) { - panic("bsd_init: NetBoot could not find root, %d: %s", err, PE_boot_args()); + PE_display_icon( 0, "noroot"); /* XXX a netboot-specific icon would be nicer */ + vc_progress_set(FALSE, 0); + for (i=1; 1; i*=2) { + printf("bsd_init: NetBoot could not find root, error %d: %s\n", + err, PE_boot_args()); + printf("We are hanging here...\n"); + IOSleep(i*60*1000); + } + /*NOTREACHED*/ } } #endif @@ -839,9 +923,9 @@ bsd_init(void) } #endif /* CONFIG_IMAGEBOOT */ - microtime(&p->p_stats->p_start); /* for compat sake */ - microtime(&p->p_start); - p->p_rtime.tv_sec = p->p_rtime.tv_usec = 0; + /* set initial time; all other resource data is already zero'ed */ + microtime(&kernproc->p_start); + kernproc->p_stats->p_start = kernproc->p_start; /* for compat */ #if DEVFS { @@ -854,14 +938,24 @@ bsd_init(void) /* Initialize signal state for process 0. */ bsd_init_kprintf("calling siginit\n"); - siginit(p); + siginit(kernproc); bsd_init_kprintf("calling bsd_utaskbootstrap\n"); bsd_utaskbootstrap(); -#if __i386__ +#if defined(__LP64__) + kernproc->p_flag |= P_LP64; + printf("Kernel is LP64\n"); +#endif +#if __i386__ || __x86_64__ /* this should be done after the root filesystem is mounted */ - error = set_archhandler(p, CPU_TYPE_POWERPC); + error = set_archhandler(kernproc, CPU_TYPE_POWERPC); + // 10/30/08 - gab: + // if default 'translate' can't be found, see if the understudy is available + if (ENOENT == error) { + strlcpy(exec_archhandler_ppc.path, kRosettaStandIn_str, MAXPATHLEN); + error = set_archhandler(kernproc, CPU_TYPE_POWERPC); + } if (error) /* XXX make more generic */ exec_archhandler_ppc.path[0] = 0; #endif @@ -873,10 +967,19 @@ bsd_init(void) mountroot_post_hook(); #if 0 /* not yet */ - IOKitJettisonKLD(); - consider_zone_gc(); + consider_zone_gc(FALSE); #endif - + +#if CONFIG_EMBEDDED + /* + * XXX workaround for: + * Kirkwood7A135: PPP KEXT no longer loads + */ + OSKextLoadKextWithIdentifier("com.apple.nke.ppp"); + OSKextLoadKextWithIdentifier("com.apple.nke.l2tp"); + OSKextLoadKextWithIdentifier("com.apple.nke.pptp"); +#endif + bsd_init_kprintf("done\n"); } @@ -894,11 +997,8 @@ bsdinit_task(void) thread = current_thread(); (void) host_set_exception_ports(host_priv_self(), - EXC_MASK_ALL & ~(EXC_MASK_SYSCALL | - EXC_MASK_MACH_SYSCALL | - EXC_MASK_RPC_ALERT | - EXC_MASK_CRASH), - (mach_port_t)ux_exception_port, + EXC_MASK_ALL & ~(EXC_MASK_RPC_ALERT),//pilotfish (shark) needs this port + (mach_port_t) ux_exception_port, EXCEPTION_DEFAULT| MACH_EXCEPTION_CODES, 0); @@ -991,17 +1091,24 @@ bsd_utaskbootstrap(void) thread_t thread; struct uthread *ut; - thread = cloneproc(kernproc, 0); + /* + * Clone the bootstrap process from the kernel process, without + * inheriting either task characteristics or memory from the kernel; + */ + thread = cloneproc(TASK_NULL, kernproc, FALSE); + /* Hold the reference as it will be dropped during shutdown */ initproc = proc_find(1); #if __PROC_INTERNAL_DEBUG if (initproc == PROC_NULL) panic("bsd_utaskbootstrap: initproc not set\n"); #endif - /* Set the launch time for init */ - microtime(&initproc->p_start); - microtime(&initproc->p_stats->p_start); /* for compat sake */ - + /* + * Since we aren't going back out the normal way to our parent, + * we have to drop the transition locks explicitly. + */ + proc_signalend(initproc, 0); + proc_transend(initproc, 0); ut = (struct uthread *)get_bsdthread_info(thread); ut->uu_sigmask = 0; @@ -1027,9 +1134,13 @@ parse_bsd_args(void) if (PE_parse_boot_argn("-l", namep, sizeof (namep))) /* leaks logging */ turn_on_log_leaks = 1; - PE_parse_boot_argn("srv", &srv, sizeof (srv)); + /* disable 64 bit grading */ + if (PE_parse_boot_argn("-no64exec", namep, sizeof (namep))) + bootarg_no64exec = 1; + PE_parse_boot_argn("ncl", &ncl, sizeof (ncl)); - if (PE_parse_boot_argn("nbuf", &max_nbuf_headers, sizeof (max_nbuf_headers))) { + if (PE_parse_boot_argn("nbuf", &max_nbuf_headers, + sizeof (max_nbuf_headers))) { customnbuf = 1; } #if !defined(SECURE_KERNEL) @@ -1042,6 +1153,35 @@ parse_bsd_args(void) } } +void +bsd_exec_setup(int scale) +{ + + switch (scale) { + case 0: + case 1: + bsd_simul_execs = BSD_SIMUL_EXECS; + break; + case 2: + case 3: + bsd_simul_execs = 65; + break; + case 4: + case 5: + bsd_simul_execs = 129; + break; + case 6: + case 7: + bsd_simul_execs = 257; + break; + default: + bsd_simul_execs = 513; + break; + + } + bsd_pageable_map_size = (bsd_simul_execs * (NCARGS + PAGE_SIZE)); +} + #if !NFSCLIENT int netboot_root(void) diff --git a/bsd/kern/bsd_stubs.c b/bsd/kern/bsd_stubs.c index 37a5afb34..64127d32a 100644 --- a/bsd/kern/bsd_stubs.c +++ b/bsd/kern/bsd_stubs.c @@ -36,17 +36,17 @@ #include #include #include /* for SET */ +#include #include #include #include /* XXX these should be in a common header somwhere, but aren't */ extern int chrtoblk_set(int, int); -extern vm_offset_t kmem_mb_alloc(vm_map_t, int); +extern vm_offset_t kmem_mb_alloc(vm_map_t, int, int); /* XXX most of these just exist to export; there's no good header for them*/ void pcb_synch(void); -int issingleuser(void); void tbeproc(void *); @@ -54,16 +54,22 @@ void tbeproc(void *); int dmmin, dmmax, dmtext; vm_offset_t -kmem_mb_alloc(vm_map_t mbmap, int size) +kmem_mb_alloc(vm_map_t mbmap, int size, int physContig) { - vm_offset_t addr; - if (kernel_memory_allocate(mbmap, &addr, size, - 0, - KMA_NOPAGEWAIT|KMA_KOBJECT|KMA_LOMEM) == KERN_SUCCESS) - return(addr); + vm_offset_t addr = 0; + kern_return_t kr = KERN_SUCCESS; + + if(!physContig) + kr = kernel_memory_allocate(mbmap, &addr, size, + 0, KMA_NOPAGEWAIT|KMA_KOBJECT|KMA_LOMEM); else - return(0); - + kr = kmem_alloc_contig(mbmap, &addr, size, PAGE_MASK, + 0xfffff, 0, KMA_NOPAGEWAIT | KMA_KOBJECT | KMA_LOMEM); + + if( kr != KERN_SUCCESS) + addr = 0; + + return addr; } /* @@ -299,32 +305,34 @@ cdevsw_add_with_bdev(int index, struct cdevsw * csw, int bdev) #include /* for PE_parse_boot_arg */ -/* - * Notes: This function is used solely by UFS, apparently in an effort - * to work around an issue with single user mounts. - * - * It's not technically correct to reference PE_parse_boot_arg() - * from this file. - */ -int -issingleuser(void) -{ - char namep[16]; - - if (PE_parse_boot_argn("-s", namep, sizeof(namep))) { - return(1); - } else { - return(0); - } -} - void tbeproc(void *procp) { struct proc *p = procp; if (p) - OSBitOrAtomic(P_TBE, (UInt32 *)&p->p_flag); + OSBitOrAtomic(P_TBE, &p->p_flag); return; } +/* + * Copy the "hostname" variable into a caller-provided buffer + * Returns: 0 for success, ENAMETOOLONG for insufficient buffer space. + * On success, "len" will be set to the number of characters preceding + * the NULL character in the hostname. + */ +int +bsd_hostname(char *buf, int bufsize, int *len) +{ + /* + * "hostname" is null-terminated, and "hostnamelen" is equivalent to strlen(hostname). + */ + if (hostnamelen < bufsize) { + strlcpy(buf, hostname, bufsize); + *len = hostnamelen; + return 0; + } else { + return ENAMETOOLONG; + } +} + diff --git a/bsd/kern/decmpfs.c b/bsd/kern/decmpfs.c new file mode 100644 index 000000000..15a220c7c --- /dev/null +++ b/bsd/kern/decmpfs.c @@ -0,0 +1,1715 @@ +/* + * Copyright (c) 2008 Apple Inc. All rights reserved. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. The rights granted to you under the License + * may not be used to create, or enable the creation or redistribution of, + * unlawful or unlicensed copies of an Apple operating system, or to + * circumvent, violate, or enable the circumvention or violation of, any + * terms of an Apple operating system software license agreement. + * + * Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ + */ +#if !HFS_COMPRESSION +/* we need these symbols even though compression is turned off */ +char register_decmpfs_decompressor; +char unregister_decmpfs_decompressor; +#else /* HFS_COMPRESSION */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#pragma mark --- debugging --- + +#define COMPRESSION_DEBUG 0 +#define COMPRESSION_DEBUG_VERBOSE 0 +#define MALLOC_DEBUG 0 + +static const char * +baseName(const char *path) +{ + if (!path) + return NULL; + const char *ret = path; + int i; + for (i = 0; path[i] != 0; i++) { + if (path[i] == '/') + ret = &path[i + 1]; + } + return ret; +} + +#define ErrorLog(x, args...) printf("%s:%d:%s: " x, baseName(__FILE__), __LINE__, __FUNCTION__, ## args) + +#if COMPRESSION_DEBUG +#define DebugLog ErrorLog +#else +#define DebugLog(x...) do { } while(0) +#endif + +#if COMPRESSION_DEBUG_VERBOSE +#define VerboseLog ErrorLog +#else +#define VerboseLog(x...) do { } while(0) +#endif + +#if MALLOC_DEBUG + +static SInt32 totalAlloc; + +typedef struct { + uint32_t allocSz; + uint32_t magic; + const char *file; + int line; +} allocated; + +static void * +_malloc(uint32_t sz, __unused int type, __unused int flags, const char *file, int line) +{ + uint32_t allocSz = sz + 2 * sizeof(allocated); + + allocated *alloc = NULL; + MALLOC(alloc, allocated *, allocSz, type, flags); + if (!alloc) { + ErrorLog("malloc failed\n"); + return NULL; + } + + char *ret = (char*)&alloc[1]; + allocated *alloc2 = (allocated*)(ret + sz); + + alloc->allocSz = allocSz; + alloc->magic = 0xdadadada; + alloc->file = file; + alloc->line = line; + + *alloc2 = *alloc; + + int s = OSAddAtomic(sz, &totalAlloc); + ErrorLog("malloc(%d) -> %p, total allocations %d\n", sz, ret, s + sz); + + return ret; +} + +static void +_free(char *ret, __unused int type, const char *file, int line) +{ + if (!ret) { + ErrorLog("freeing null\n"); + return; + } + allocated *alloc = (allocated*)ret; + alloc--; + uint32_t sz = alloc->allocSz - 2 * sizeof(allocated); + allocated *alloc2 = (allocated*)(ret + sz); + + if (alloc->magic != 0xdadadada) { + panic("freeing bad pointer"); + } + + if (memcmp(alloc, alloc2, sizeof(*alloc)) != 0) { + panic("clobbered data"); + } + + memset(ret, 0xce, sz); + alloc2->file = file; + alloc2->line = line; + FREE(alloc, type); + int s = OSAddAtomic(-sz, &totalAlloc); + ErrorLog("free(%p,%d) -> total allocations %d\n", ret, sz, s - sz); +} + +#undef MALLOC +#undef FREE +#define MALLOC(space, cast, size, type, flags) (space) = (cast)_malloc(size, type, flags, __FILE__, __LINE__) +#define FREE(addr, type) _free((void *)addr, type, __FILE__, __LINE__) + +#endif /* MALLOC_DEBUG */ + +#pragma mark --- globals --- + +static lck_grp_t *decmpfs_lockgrp; + +static decmpfs_registration * decompressors[CMP_MAX]; /* the registered compressors */ +static lck_rw_t * decompressorsLock; +static int decompress_channel; /* channel used by decompress_file to wake up waiters */ +static lck_mtx_t *decompress_channel_mtx; + +vfs_context_t decmpfs_ctx; + +#pragma mark --- decmp_get_func --- + +#define offsetof_func(func) ((uintptr_t)(&(((decmpfs_registration*)NULL)->func))) + +static void * +_func_from_offset(uint32_t type, int offset) +{ + /* get the function at the given offset in the registration for the given type */ + decmpfs_registration *reg = decompressors[type]; + char *regChar = (char*)reg; + char *func = ®Char[offset]; + void **funcPtr = (void**)func; + return funcPtr[0]; +} + +static void * +_decmp_get_func(uint32_t type, int offset) +{ + /* + this function should be called while holding a shared lock to decompressorsLock, + and will return with the lock held + */ + + if (type >= CMP_MAX) + return NULL; + + if (decompressors[type] != NULL) { + // the compressor has already registered but the function might be null + return _func_from_offset(type, offset); + } + + // the compressor hasn't registered, so it never will unless someone manually kextloads it + ErrorLog("tried to access a compressed file of unregistered type %d\n", type); + return NULL; +} + +#define decmp_get_func(type, func) _decmp_get_func(type, offsetof_func(func)) + +#pragma mark --- utilities --- + +#if COMPRESSION_DEBUG +static char* +vnpath(vnode_t vp, char *path, int len) +{ + int origlen = len; + path[0] = 0; + vn_getpath(vp, path, &len); + path[origlen - 1] = 0; + return path; +} + +static int +vnsize(vnode_t vp, uint64_t *size) +{ + struct vnode_attr va; + VATTR_INIT(&va); + VATTR_WANTED(&va, va_data_size); + int error = vnode_getattr(vp, &va, decmpfs_ctx); + if (error != 0) { + ErrorLog("vnode_getattr err %d\n", error); + return error; + } + *size = va.va_data_size; + return 0; +} +#endif /* COMPRESSION_DEBUG */ + +#pragma mark --- cnode routines --- + +void +decmpfs_cnode_init(decmpfs_cnode *cp) +{ + memset(cp, 0, sizeof(*cp)); + lck_rw_init(&cp->compressed_data_lock, decmpfs_lockgrp, NULL); +#if !DECMPFS_SUPPORTS_SWAP64 + lck_mtx_init(&cp->uncompressed_size_mtx, decmpfs_lockgrp, NULL); +#endif +} + +void +decmpfs_cnode_destroy(decmpfs_cnode *cp) +{ + lck_rw_destroy(&cp->compressed_data_lock, decmpfs_lockgrp); +#if !DECMPFS_SUPPORTS_SWAP64 + lck_mtx_destroy(&cp->uncompressed_size_mtx, decmpfs_lockgrp); +#endif +} + +boolean_t +decmpfs_trylock_compressed_data(decmpfs_cnode *cp, int exclusive) +{ + void *thread = current_thread(); + boolean_t retval = FALSE; + + if (cp->lockowner == thread) { + /* this thread is already holding an exclusive lock, so bump the count */ + cp->lockcount++; + retval = TRUE; + } else if (exclusive) { + if ((retval = lck_rw_try_lock_exclusive(&cp->compressed_data_lock))) { + cp->lockowner = thread; + cp->lockcount = 1; + } + } else { + if ((retval = lck_rw_try_lock_shared(&cp->compressed_data_lock))) { + cp->lockowner = (void *)-1; + } + } + return retval; +} + +void +decmpfs_lock_compressed_data(decmpfs_cnode *cp, int exclusive) +{ + void *thread = current_thread(); + + if (cp->lockowner == thread) { + /* this thread is already holding an exclusive lock, so bump the count */ + cp->lockcount++; + } else if (exclusive) { + lck_rw_lock_exclusive(&cp->compressed_data_lock); + cp->lockowner = thread; + cp->lockcount = 1; + } else { + lck_rw_lock_shared(&cp->compressed_data_lock); + cp->lockowner = (void *)-1; + } +} + +void +decmpfs_unlock_compressed_data(decmpfs_cnode *cp, __unused int exclusive) +{ + void *thread = current_thread(); + + if (cp->lockowner == thread) { + /* this thread is holding an exclusive lock, so decrement the count */ + if ((--cp->lockcount) > 0) { + /* the caller still has outstanding locks, so we're done */ + return; + } + cp->lockowner = NULL; + } + + lck_rw_done(&cp->compressed_data_lock); +} + +uint32_t +decmpfs_cnode_get_vnode_state(decmpfs_cnode *cp) +{ + return cp->cmp_state; +} + +void +decmpfs_cnode_set_vnode_state(decmpfs_cnode *cp, uint32_t state, int skiplock) +{ + if (!skiplock) decmpfs_lock_compressed_data(cp, 1); + cp->cmp_state = state; + if (state == FILE_TYPE_UNKNOWN) { + /* clear out the compression type too */ + cp->cmp_type = 0; + } + if (!skiplock) decmpfs_unlock_compressed_data(cp, 1); +} + +static void +decmpfs_cnode_set_vnode_cmp_type(decmpfs_cnode *cp, uint32_t cmp_type, int skiplock) +{ + if (!skiplock) decmpfs_lock_compressed_data(cp, 1); + cp->cmp_type = cmp_type; + if (!skiplock) decmpfs_unlock_compressed_data(cp, 1); +} + +static void +decmpfs_cnode_set_vnode_minimal_xattr(decmpfs_cnode *cp, int minimal_xattr, int skiplock) +{ + if (!skiplock) decmpfs_lock_compressed_data(cp, 1); + cp->cmp_minimal_xattr = minimal_xattr; + if (!skiplock) decmpfs_unlock_compressed_data(cp, 1); +} + +uint64_t +decmpfs_cnode_get_vnode_cached_size(decmpfs_cnode *cp) +{ +#if DECMPFS_SUPPORTS_SWAP64 + return cp->uncompressed_size; +#else + /* + since this is a 64-bit field, we may not be able to access it atomically + so lock access + */ + + lck_mtx_lock(&(cp->uncompressed_size_mtx)); + uint64_t ret = cp->uncompressed_size; + lck_mtx_unlock(&(cp->uncompressed_size_mtx)); + return ret; +#endif +} + +static void +decmpfs_cnode_set_vnode_cached_size(decmpfs_cnode *cp, uint64_t size) +{ +#if DECMPFS_SUPPORTS_SWAP64 + while(1) { + uint64_t old = cp->uncompressed_size; + if (OSCompareAndSwap64(old, size, (UInt64*)&cp->uncompressed_size)) { + return; + } else { + /* failed to write our value, so loop */ + } + } +#else + /* + since this is a 64-bit field, we may not be able to access it atomically + so lock access + */ + + lck_mtx_lock(&(cp->uncompressed_size_mtx)); + cp->uncompressed_size = size; + lck_mtx_unlock(&(cp->uncompressed_size_mtx)); +#endif +} + +#pragma mark --- decmpfs state routines --- + +static int +decmpfs_fetch_compressed_header(vnode_t vp, decmpfs_cnode *cp, decmpfs_header **hdrOut, int returnInvalid) +{ + /* + fetches vp's compression xattr, converting it into a decmpfs_header; returns 0 or errno + if returnInvalid == 1, returns the header even if the type was invalid (out of range), + and return ERANGE in that case + */ + + size_t read_size = 0; + size_t attr_size = 0; + uio_t attr_uio = NULL; + int err = 0; + char *data = NULL; + decmpfs_header *hdr = NULL; + char uio_buf[ UIO_SIZEOF(1) ]; + + if ((cp != NULL) && + (cp->cmp_type != 0) && + (cp->cmp_minimal_xattr != 0)) { + /* this file's xattr didn't have any extra data when we fetched it, so we can synthesize a header from the data in the cnode */ + + MALLOC(data, char *, sizeof(decmpfs_header), M_TEMP, M_WAITOK); + if (!data) { + err = ENOMEM; + goto out; + } + hdr = (decmpfs_header*)data; + hdr->attr_size = sizeof(decmpfs_disk_header); + hdr->compression_magic = DECMPFS_MAGIC; + hdr->compression_type = cp->cmp_type; + hdr->uncompressed_size = decmpfs_cnode_get_vnode_cached_size(cp); + } else { + /* figure out how big the xattr is on disk */ + err = vn_getxattr(vp, DECMPFS_XATTR_NAME, NULL, &attr_size, XATTR_NOSECURITY, decmpfs_ctx); + if (err != 0) + goto out; + + if (attr_size < sizeof(decmpfs_disk_header) || attr_size > MAX_DECMPFS_XATTR_SIZE) { + err = EINVAL; + goto out; + } + + /* allocation includes space for the extra attr_size field of a compressed_header */ + MALLOC(data, char *, attr_size + sizeof(hdr->attr_size), M_TEMP, M_WAITOK); + if (!data) { + err = ENOMEM; + goto out; + } + + /* read the xattr into our buffer, skipping over the attr_size field at the beginning */ + attr_uio = uio_createwithbuffer(1, 0, UIO_SYSSPACE, UIO_READ, &uio_buf[0], sizeof(uio_buf)); + uio_addiov(attr_uio, CAST_USER_ADDR_T(data + sizeof(hdr->attr_size)), attr_size); + + err = vn_getxattr(vp, DECMPFS_XATTR_NAME, attr_uio, &read_size, XATTR_NOSECURITY, decmpfs_ctx); + if (err != 0) + goto out; + if (read_size != attr_size) { + err = EINVAL; + goto out; + } + hdr = (decmpfs_header*)data; + hdr->attr_size = attr_size; + /* swap the fields to native endian */ + hdr->compression_magic = OSSwapLittleToHostInt32(hdr->compression_magic); + hdr->compression_type = OSSwapLittleToHostInt32(hdr->compression_type); + hdr->uncompressed_size = OSSwapLittleToHostInt64(hdr->uncompressed_size); + } + + if (hdr->compression_magic != DECMPFS_MAGIC) { + ErrorLog("invalid compression_magic 0x%08x, should be 0x%08x\n", hdr->compression_magic, DECMPFS_MAGIC); + err = EINVAL; + goto out; + } + + if (hdr->compression_type >= CMP_MAX) { + if (returnInvalid) { + /* return the header even though the type is out of range */ + err = ERANGE; + } else { + ErrorLog("compression_type %d out of range\n", hdr->compression_type); + err = EINVAL; + } + goto out; + } + +out: + if (err && (err != ERANGE)) { + DebugLog("err %d\n", err); + if (data) FREE(data, M_TEMP); + *hdrOut = NULL; + } else { + *hdrOut = hdr; + } + return err; +} + +static int +decmpfs_fast_get_state(decmpfs_cnode *cp) +{ + /* + return the cached state + this should *only* be called when we know that decmpfs_file_is_compressed has already been called, + because this implies that the cached state is valid + */ + int cmp_state = decmpfs_cnode_get_vnode_state(cp); + + switch(cmp_state) { + case FILE_IS_NOT_COMPRESSED: + case FILE_IS_COMPRESSED: + case FILE_IS_CONVERTING: + return cmp_state; + case FILE_TYPE_UNKNOWN: + /* + we should only get here if decmpfs_file_is_compressed was not called earlier on this vnode, + which should not be possible + */ + ErrorLog("decmpfs_fast_get_state called on unknown file\n"); + return FILE_IS_NOT_COMPRESSED; + default: + /* */ + ErrorLog("unknown cmp_state %d\n", cmp_state); + return FILE_IS_NOT_COMPRESSED; + } +} + +static int +decmpfs_fast_file_is_compressed(decmpfs_cnode *cp) +{ + int cmp_state = decmpfs_cnode_get_vnode_state(cp); + + switch(cmp_state) { + case FILE_IS_NOT_COMPRESSED: + return 0; + case FILE_IS_COMPRESSED: + case FILE_IS_CONVERTING: + return 1; + case FILE_TYPE_UNKNOWN: + /* + we should only get here if decmpfs_file_is_compressed was not called earlier on this vnode, + which should not be possible + */ + ErrorLog("decmpfs_fast_get_state called on unknown file\n"); + return 0; + default: + /* */ + ErrorLog("unknown cmp_state %d\n", cmp_state); + return 0; + } +} + +errno_t +decmpfs_validate_compressed_file(vnode_t vp, decmpfs_cnode *cp) +{ + /* give a compressor a chance to indicate that a compressed file is invalid */ + + decmpfs_header *hdr = NULL; + errno_t err = decmpfs_fetch_compressed_header(vp, cp, &hdr, 0); + if (err) { + /* we couldn't get the header */ + if (decmpfs_fast_get_state(cp) == FILE_IS_NOT_COMPRESSED) { + /* the file is no longer compressed, so return success */ + err = 0; + } + goto out; + } + + lck_rw_lock_shared(decompressorsLock); + decmpfs_validate_compressed_file_func validate = decmp_get_func(hdr->compression_type, validate); + if (validate) { /* make sure this validation function is valid */ + /* is the data okay? */ + err = validate(vp, decmpfs_ctx, hdr); + } else if (decmp_get_func(hdr->compression_type, fetch) == NULL) { + /* the type isn't registered */ + err = EIO; + } else { + /* no validate registered, so nothing to do */ + err = 0; + } + lck_rw_done(decompressorsLock); +out: + if (hdr) FREE(hdr, M_TEMP); +#if COMPRESSION_DEBUG + if (err) { + DebugLog("decmpfs_validate_compressed_file ret %d, vp->v_flag %d\n", err, vp->v_flag); + } +#endif + return err; +} + +int +decmpfs_file_is_compressed(vnode_t vp, decmpfs_cnode *cp) +{ + /* + determines whether vp points to a compressed file + + to speed up this operation, we cache the result in the cnode, and do as little as possible + in the case where the cnode already has a valid cached state + + */ + + int ret = 0; + int error = 0; + uint32_t cmp_state; + struct vnode_attr va_fetch; + decmpfs_header *hdr = NULL; + mount_t mp = NULL; + int cnode_locked = 0; + int saveInvalid = 0; // save the header data even though the type was out of range + + if (vnode_isnamedstream(vp)) { + /* + named streams can't be compressed + since named streams of the same file share the same cnode, + we don't want to get/set the state in the cnode, just return 0 + */ + return 0; + } + + /* examine the cached a state in this cnode */ + cmp_state = decmpfs_cnode_get_vnode_state(cp); + switch(cmp_state) { + case FILE_IS_NOT_COMPRESSED: + return 0; + case FILE_IS_COMPRESSED: + return 1; + case FILE_IS_CONVERTING: + /* treat the file as compressed, because this gives us a way to block future reads until decompression is done */ + return 1; + case FILE_TYPE_UNKNOWN: + /* the first time we encountered this vnode, so we need to check it out */ + break; + default: + /* unknown state, assume file is not compressed */ + ErrorLog("unknown cmp_state %d\n", cmp_state); + return 0; + } + + if (!vnode_isreg(vp)) { + /* only regular files can be compressed */ + ret = FILE_IS_NOT_COMPRESSED; + goto done; + } + + mp = vnode_mount(vp); + if (mp == NULL) { + /* + this should only be true before we mount the root filesystem + we short-cut this return to avoid the call to getattr below, which + will fail before root is mounted + */ + ret = FILE_IS_NOT_COMPRESSED; + goto done; + } + if ((mp->mnt_flag & MNT_LOCAL) == 0) { + /* compression only supported on local filesystems */ + ret = FILE_IS_NOT_COMPRESSED; + goto done; + } + + /* lock our cnode data so that another caller doesn't change the state under us */ + decmpfs_lock_compressed_data(cp, 1); + cnode_locked = 1; + + VATTR_INIT(&va_fetch); + VATTR_WANTED(&va_fetch, va_flags); + error = vnode_getattr(vp, &va_fetch, decmpfs_ctx); + if (error) { + /* failed to get the bsd flags so the file is not compressed */ + ret = FILE_IS_NOT_COMPRESSED; + goto done; + } + if (va_fetch.va_flags & UF_COMPRESSED) { + /* UF_COMPRESSED is on, make sure the file has the DECMPFS_XATTR_NAME xattr */ + error = decmpfs_fetch_compressed_header(vp, cp, &hdr, 1); + if ((hdr != NULL) && (error == ERANGE)) { + saveInvalid = 1; + } + if (error) { + /* failed to get the xattr so the file is not compressed */ + ret = FILE_IS_NOT_COMPRESSED; + goto done; + } + /* we got the xattr, so the file is compressed */ + ret = FILE_IS_COMPRESSED; + goto done; + } + /* UF_COMPRESSED isn't on, so the file isn't compressed */ + ret = FILE_IS_NOT_COMPRESSED; + +done: + if (((ret == FILE_IS_COMPRESSED) || saveInvalid) && hdr) { + /* + cache the uncompressed size away in the cnode + */ + + if (!cnode_locked) { + /* + we should never get here since the only place ret is set to FILE_IS_COMPRESSED + is after the call to decmpfs_lock_compressed_data above + */ + decmpfs_lock_compressed_data(cp, 1); + cnode_locked = 1; + } + + decmpfs_cnode_set_vnode_cached_size(cp, hdr->uncompressed_size); + decmpfs_cnode_set_vnode_state(cp, ret, 1); + decmpfs_cnode_set_vnode_cmp_type(cp, hdr->compression_type, 1); + /* remember if the xattr's size was equal to the minimal xattr */ + if (hdr->attr_size == sizeof(decmpfs_disk_header)) { + decmpfs_cnode_set_vnode_minimal_xattr(cp, 1, 1); + } + if (ret == FILE_IS_COMPRESSED) { + /* update the ubc's size for this file */ + ubc_setsize(vp, hdr->uncompressed_size); + } + } else { + /* we might have already taken the lock above; if so, skip taking it again by passing cnode_locked as the skiplock parameter */ + decmpfs_cnode_set_vnode_state(cp, ret, cnode_locked); + } + + if (cnode_locked) decmpfs_unlock_compressed_data(cp, 1); + + if (hdr) FREE(hdr, M_TEMP); + + switch(ret) { + case FILE_IS_NOT_COMPRESSED: + return 0; + case FILE_IS_COMPRESSED: + case FILE_IS_CONVERTING: + return 1; + default: + /* unknown state, assume file is not compressed */ + ErrorLog("unknown ret %d\n", ret); + return 0; + } +} + +int +decmpfs_update_attributes(vnode_t vp, struct vnode_attr *vap) +{ + int error = 0; + + if (VATTR_IS_ACTIVE(vap, va_flags)) { + /* the BSD flags are being updated */ + if (vap->va_flags & UF_COMPRESSED) { + /* the compressed bit is being set, did it change? */ + struct vnode_attr va_fetch; + int old_flags = 0; + VATTR_INIT(&va_fetch); + VATTR_WANTED(&va_fetch, va_flags); + error = vnode_getattr(vp, &va_fetch, decmpfs_ctx); + if (error) + return error; + + old_flags = va_fetch.va_flags; + + if (!(old_flags & UF_COMPRESSED)) { + /* + * Compression bit was turned on, make sure the file has the DECMPFS_XATTR_NAME attribute. + * This precludes anyone from using the UF_COMPRESSED bit for anything else, and it enforces + * an order of operation -- you must first do the setxattr and then the chflags. + */ + + if (VATTR_IS_ACTIVE(vap, va_data_size)) { + /* + * don't allow the caller to set the BSD flag and the size in the same call + * since this doesn't really make sense + */ + vap->va_flags &= ~UF_COMPRESSED; + return 0; + } + + decmpfs_header *hdr = NULL; + error = decmpfs_fetch_compressed_header(vp, NULL, &hdr, 1); + if (error == 0) { + /* + allow the flag to be set since the decmpfs attribute is present + in that case, we also want to truncate the data fork of the file + */ + VATTR_SET_ACTIVE(vap, va_data_size); + vap->va_data_size = 0; + } else if (error == ERANGE) { + /* the file had a decmpfs attribute but the type was out of range, so don't muck with the file's data size */ + } else { + /* no DECMPFS_XATTR_NAME attribute, so deny the update */ + vap->va_flags &= ~UF_COMPRESSED; + } + if (hdr) FREE(hdr, M_TEMP); + } + } + } + + return 0; +} + +static int +wait_for_decompress(decmpfs_cnode *cp) +{ + int state; + lck_mtx_lock(decompress_channel_mtx); + do { + state = decmpfs_fast_get_state(cp); + if (state != FILE_IS_CONVERTING) { + /* file is not decompressing */ + lck_mtx_unlock(decompress_channel_mtx); + return state; + } + msleep((caddr_t)&decompress_channel, decompress_channel_mtx, PINOD, "wait_for_decompress", NULL); + } while(1); +} + +#pragma mark --- decmpfs hide query routines --- + +int +decmpfs_hides_rsrc(vfs_context_t ctx, decmpfs_cnode *cp) +{ + /* + WARNING!!! + callers may (and do) pass NULL for ctx, so we should only use it + for this equality comparison + + This routine should only be called after a file has already been through decmpfs_file_is_compressed + */ + + if (ctx == decmpfs_ctx) + return 0; + + if (!decmpfs_fast_file_is_compressed(cp)) + return 0; + + /* all compressed files hide their resource fork */ + return 1; +} + +int +decmpfs_hides_xattr(vfs_context_t ctx, decmpfs_cnode *cp, const char *xattr) +{ + /* + WARNING!!! + callers may (and do) pass NULL for ctx, so we should only use it + for this equality comparison + + This routine should only be called after a file has already been through decmpfs_file_is_compressed + */ + + if (ctx == decmpfs_ctx) + return 0; + if (strncmp(xattr, XATTR_RESOURCEFORK_NAME, 22) == 0) + return decmpfs_hides_rsrc(ctx, cp); + if (!decmpfs_fast_file_is_compressed(cp)) + /* file is not compressed, so don't hide this xattr */ + return 0; + if (strncmp(xattr, DECMPFS_XATTR_NAME, 11) == 0) + /* it's our xattr, so hide it */ + return 1; + /* don't hide this xattr */ + return 0; +} + +#pragma mark --- registration/validation routines --- + +errno_t +register_decmpfs_decompressor(uint32_t compression_type, decmpfs_registration *registration) +{ + /* called by kexts to register decompressors */ + + errno_t ret = 0; + int locked = 0; + + if ((compression_type >= CMP_MAX) || + (!registration) || + (registration->decmpfs_registration != DECMPFS_REGISTRATION_VERSION)) { + ret = EINVAL; + goto out; + } + + lck_rw_lock_exclusive(decompressorsLock); locked = 1; + + /* make sure the registration for this type is zero */ + if (decompressors[compression_type] != NULL) { + ret = EEXIST; + goto out; + } + decompressors[compression_type] = registration; + wakeup((caddr_t)&decompressors); + +out: + if (locked) lck_rw_done(decompressorsLock); + return ret; +} + +errno_t +unregister_decmpfs_decompressor(uint32_t compression_type, decmpfs_registration *registration) +{ + /* called by kexts to unregister decompressors */ + + errno_t ret = 0; + int locked = 0; + + if ((compression_type >= CMP_MAX) || + (!registration) || + (registration->decmpfs_registration != DECMPFS_REGISTRATION_VERSION)) { + ret = EINVAL; + goto out; + } + + lck_rw_lock_exclusive(decompressorsLock); locked = 1; + if (decompressors[compression_type] != registration) { + ret = EEXIST; + goto out; + } + decompressors[compression_type] = NULL; + wakeup((caddr_t)&decompressors); + +out: + if (locked) lck_rw_done(decompressorsLock); + return ret; +} + +static int +compression_type_valid(decmpfs_header *hdr) +{ + /* fast pre-check to determine if the given compressor has checked in */ + int ret = 0; + + /* every compressor must have at least a fetch function */ + lck_rw_lock_shared(decompressorsLock); + if (decmp_get_func(hdr->compression_type, fetch) != NULL) { + ret = 1; + } + lck_rw_done(decompressorsLock); + + return ret; +} + +#pragma mark --- compression/decompression routines --- + +static int +decmpfs_fetch_uncompressed_data(vnode_t vp, decmpfs_header *hdr, off_t offset, user_ssize_t size, int nvec, decmpfs_vector *vec, uint64_t *bytes_read) +{ + /* get the uncompressed bytes for the specified region of vp by calling out to the registered compressor */ + + int err = 0; + + *bytes_read = 0; + + if ((uint64_t)offset >= hdr->uncompressed_size) { + /* reading past end of file; nothing to do */ + err = 0; + goto out; + } + if (offset < 0) { + /* tried to read from before start of file */ + err = EINVAL; + goto out; + } + if ((uint64_t)(offset + size) > hdr->uncompressed_size) { + /* adjust size so we don't read past the end of the file */ + size = hdr->uncompressed_size - offset; + } + if (size == 0) { + /* nothing to read */ + err = 0; + goto out; + } + + lck_rw_lock_shared(decompressorsLock); + decmpfs_fetch_uncompressed_data_func fetch = decmp_get_func(hdr->compression_type, fetch); + if (fetch) { + err = fetch(vp, decmpfs_ctx, hdr, offset, size, nvec, vec, bytes_read); + } else { + err = ENOTSUP; + } + lck_rw_done(decompressorsLock); + +out: + return err; +} + +static kern_return_t +commit_upl(upl_t upl, upl_offset_t pl_offset, size_t uplSize, int flags, int abort) +{ + kern_return_t kr = 0; + + /* commit the upl pages */ + if (abort) { + VerboseLog("aborting upl, flags 0x%08x\n", flags); + kr = ubc_upl_abort_range(upl, pl_offset, uplSize, flags); + if (kr != KERN_SUCCESS) + ErrorLog("ubc_upl_commit_range error %d\n", (int)kr); + } else { + VerboseLog("committing upl, flags 0x%08x\n", flags | UPL_COMMIT_CLEAR_DIRTY); + kr = ubc_upl_commit_range(upl, pl_offset, uplSize, flags | UPL_COMMIT_CLEAR_DIRTY); + if (kr != KERN_SUCCESS) + ErrorLog("ubc_upl_commit_range error %d\n", (int)kr); + } + return kr; +} + +errno_t +decmpfs_pagein_compressed(struct vnop_pagein_args *ap, int *is_compressed, decmpfs_cnode *cp) +{ + /* handles a page-in request from vfs for a compressed file */ + + int err = 0; + struct vnode *vp = ap->a_vp; + upl_t pl = ap->a_pl; + upl_offset_t pl_offset = ap->a_pl_offset; + off_t f_offset = ap->a_f_offset; + size_t size = ap->a_size; + int flags = ap->a_flags; + off_t uplPos = 0; + user_ssize_t uplSize = 0; + void *data = NULL; + decmpfs_header *hdr = NULL; + int abort_pagein = 0; + uint64_t cachedSize = 0; + int cmpdata_locked = 0; + + if(!decmpfs_trylock_compressed_data(cp, 0)) { + return EAGAIN; + } + cmpdata_locked = 1; + + + if (flags & ~(UPL_IOSYNC | UPL_NOCOMMIT | UPL_NORDAHEAD)) { + DebugLog("pagein: unknown flags 0x%08x\n", (flags & ~(UPL_IOSYNC | UPL_NOCOMMIT | UPL_NORDAHEAD))); + } + + err = decmpfs_fetch_compressed_header(vp, cp, &hdr, 0); + if (err != 0) { + goto out; + } + + cachedSize = hdr->uncompressed_size; + + if (!compression_type_valid(hdr)) { + /* compressor not registered */ + err = ENOTSUP; + goto out; + } + + /* map the upl so we can fetch into it */ + kern_return_t kr = ubc_upl_map(pl, (vm_offset_t*)&data); + if ((kr != KERN_SUCCESS) || (data == NULL)) { + goto out; + } + + uplPos = f_offset; + uplSize = size; + + /* clip the size to the size of the file */ + if ((uint64_t)uplPos + uplSize > cachedSize) { + /* truncate the read to the size of the file */ + uplSize = cachedSize - uplPos; + } + + /* do the fetch */ + decmpfs_vector vec; + +decompress: + /* the mapped data pointer points to the first page of the page list, so we want to start filling in at an offset of pl_offset */ + vec.buf = (char*)data + pl_offset; + vec.size = size; + + uint64_t did_read = 0; + if (decmpfs_fast_get_state(cp) == FILE_IS_CONVERTING) { + ErrorLog("unexpected pagein during decompress\n"); + /* + if the file is converting, this must be a recursive call to pagein from underneath a call to decmpfs_decompress_file; + pretend that it succeeded but don't do anything since we're just going to write over the pages anyway + */ + err = 0; + did_read = 0; + } else { + err = decmpfs_fetch_uncompressed_data(vp, hdr, uplPos, uplSize, 1, &vec, &did_read); + } + if (err) { + DebugLog("decmpfs_fetch_uncompressed_data err %d\n", err); + int cmp_state = decmpfs_fast_get_state(cp); + if (cmp_state == FILE_IS_CONVERTING) { + DebugLog("cmp_state == FILE_IS_CONVERTING\n"); + cmp_state = wait_for_decompress(cp); + if (cmp_state == FILE_IS_COMPRESSED) { + DebugLog("cmp_state == FILE_IS_COMPRESSED\n"); + /* a decompress was attempted but it failed, let's try calling fetch again */ + goto decompress; + } + } + if (cmp_state == FILE_IS_NOT_COMPRESSED) { + DebugLog("cmp_state == FILE_IS_NOT_COMPRESSED\n"); + /* the file was decompressed after we started reading it */ + abort_pagein = 1; /* we're not going to commit our data */ + *is_compressed = 0; /* instruct caller to fall back to its normal path */ + } + } + + /* zero out whatever we didn't read, and zero out the end of the last page(s) */ + uint64_t total_size = (size + (PAGE_SIZE - 1)) & ~(PAGE_SIZE - 1); + if (did_read < total_size) { + memset((char*)vec.buf + did_read, 0, total_size - did_read); + } + + kr = ubc_upl_unmap(pl); data = NULL; /* make sure to set data to NULL so we don't try to unmap again below */ + if (kr != KERN_SUCCESS) + ErrorLog("ubc_upl_unmap error %d\n", (int)kr); + else { + if (!abort_pagein) { + /* commit our pages */ + kr = commit_upl(pl, pl_offset, total_size, UPL_COMMIT_FREE_ON_EMPTY | UPL_COMMIT_INACTIVATE, 0); + } + } + +out: + if (data) ubc_upl_unmap(pl); + if (hdr) FREE(hdr, M_TEMP); + if (cmpdata_locked) decmpfs_unlock_compressed_data(cp, 0); + if (err) + ErrorLog("err %d\n", err); + + return err; +} + +errno_t +decmpfs_read_compressed(struct vnop_read_args *ap, int *is_compressed, decmpfs_cnode *cp) +{ + /* handles a read request from vfs for a compressed file */ + + uio_t uio = ap->a_uio; + vnode_t vp = ap->a_vp; + int err = 0; + int countInt = 0; + off_t uplPos = 0; + user_ssize_t uplSize = 0; + user_ssize_t uplRemaining = 0; + off_t curUplPos = 0; + user_ssize_t curUplSize = 0; + kern_return_t kr = KERN_SUCCESS; + int abort_read = 0; + void *data = NULL; + uint64_t did_read = 0; + upl_t upl = NULL; + upl_page_info_t *pli = NULL; + decmpfs_header *hdr = NULL; + uint64_t cachedSize = 0; + off_t uioPos = 0; + user_ssize_t uioRemaining = 0; + int cmpdata_locked = 0; + + decmpfs_lock_compressed_data(cp, 0); cmpdata_locked = 1; + + uplPos = uio_offset(uio); + uplSize = uio_resid(uio); + VerboseLog("uplPos %lld uplSize %lld\n", uplPos, uplSize); + + cachedSize = decmpfs_cnode_get_vnode_cached_size(cp); + + if ((uint64_t)uplPos + uplSize > cachedSize) { + /* truncate the read to the size of the file */ + uplSize = cachedSize - uplPos; + } + + /* give the cluster layer a chance to fill in whatever it already has */ + countInt = (uplSize > INT_MAX) ? INT_MAX : uplSize; + err = cluster_copy_ubc_data(vp, uio, &countInt, 0); + if (err != 0) + goto out; + + /* figure out what's left */ + uioPos = uio_offset(uio); + uioRemaining = uio_resid(uio); + if ((uint64_t)uioPos + uioRemaining > cachedSize) { + /* truncate the read to the size of the file */ + uioRemaining = cachedSize - uioPos; + } + + if (uioRemaining <= 0) { + /* nothing left */ + goto out; + } + + err = decmpfs_fetch_compressed_header(vp, cp, &hdr, 0); + if (err != 0) { + goto out; + } + if (!compression_type_valid(hdr)) { + err = ENOTSUP; + goto out; + } + + uplPos = uioPos; + uplSize = uioRemaining; +#if COMPRESSION_DEBUG + char path[PATH_MAX]; + DebugLog("%s: uplPos %lld uplSize %lld\n", vnpath(vp, path, sizeof(path)), (uint64_t)uplPos, (uint64_t)uplSize); +#endif + + lck_rw_lock_shared(decompressorsLock); + decmpfs_adjust_fetch_region_func adjust_fetch = decmp_get_func(hdr->compression_type, adjust_fetch); + if (adjust_fetch) { + /* give the compressor a chance to adjust the portion of the file that we read */ + adjust_fetch(vp, decmpfs_ctx, hdr, &uplPos, &uplSize); + VerboseLog("adjusted uplPos %lld uplSize %lld\n", (uint64_t)uplPos, (uint64_t)uplSize); + } + lck_rw_done(decompressorsLock); + + /* clip the adjusted size to the size of the file */ + if ((uint64_t)uplPos + uplSize > cachedSize) { + /* truncate the read to the size of the file */ + uplSize = cachedSize - uplPos; + } + + if (uplSize <= 0) { + /* nothing left */ + goto out; + } + + /* + since we're going to create a upl for the given region of the file, + make sure we're on page boundaries + */ + + if (uplPos & (PAGE_SIZE - 1)) { + /* round position down to page boundary */ + uplSize += (uplPos & (PAGE_SIZE - 1)); + uplPos &= ~(PAGE_SIZE - 1); + } + /* round size up to page multiple */ + uplSize = (uplSize + (PAGE_SIZE - 1)) & ~(PAGE_SIZE - 1); + + VerboseLog("new uplPos %lld uplSize %lld\n", (uint64_t)uplPos, (uint64_t)uplSize); + + uplRemaining = uplSize; + curUplPos = uplPos; + curUplSize = 0; + + while(uplRemaining > 0) { + /* start after the last upl */ + curUplPos += curUplSize; + + /* clip to max upl size */ + curUplSize = uplRemaining; + if (curUplSize > MAX_UPL_SIZE * PAGE_SIZE) { + curUplSize = MAX_UPL_SIZE * PAGE_SIZE; + } + + /* create the upl */ + kr = ubc_create_upl(vp, curUplPos, curUplSize, &upl, &pli, UPL_SET_LITE); + if (kr != KERN_SUCCESS) { + ErrorLog("ubc_create_upl error %d\n", (int)kr); + err = EINVAL; + goto out; + } + VerboseLog("curUplPos %lld curUplSize %lld\n", (uint64_t)curUplPos, (uint64_t)curUplSize); + + /* map the upl */ + kr = ubc_upl_map(upl, (vm_offset_t*)&data); + if (kr != KERN_SUCCESS) { + ErrorLog("ubc_upl_map error %d\n", (int)kr); + err = EINVAL; + goto out; + } + + /* make sure the map succeeded */ + if (!data) { + ErrorLog("ubc_upl_map mapped null\n"); + err = EINVAL; + goto out; + } + + /* fetch uncompressed data into the mapped upl */ + decmpfs_vector vec; + decompress: + vec = (decmpfs_vector){ .buf = data, .size = curUplSize }; + err = decmpfs_fetch_uncompressed_data(vp, hdr, curUplPos, curUplSize, 1, &vec, &did_read); + if (err) { + ErrorLog("decmpfs_fetch_uncompressed_data err %d\n", err); + + /* maybe the file is converting to decompressed */ + int cmp_state = decmpfs_fast_get_state(cp); + if (cmp_state == FILE_IS_CONVERTING) { + ErrorLog("cmp_state == FILE_IS_CONVERTING\n"); + cmp_state = wait_for_decompress(cp); + if (cmp_state == FILE_IS_COMPRESSED) { + ErrorLog("cmp_state == FILE_IS_COMPRESSED\n"); + /* a decompress was attempted but it failed, let's try fetching again */ + goto decompress; + } + } + if (cmp_state == FILE_IS_NOT_COMPRESSED) { + ErrorLog("cmp_state == FILE_IS_NOT_COMPRESSED\n"); + /* the file was decompressed after we started reading it */ + abort_read = 1; /* we're not going to commit our data */ + *is_compressed = 0; /* instruct caller to fall back to its normal path */ + } + kr = KERN_FAILURE; + did_read = 0; + } + /* zero out the remainder of the last page */ + memset((char*)data + did_read, 0, curUplSize - did_read); + kr = ubc_upl_unmap(upl); + if (kr == KERN_SUCCESS) { + if (abort_read) { + kr = commit_upl(upl, 0, curUplSize, UPL_ABORT_FREE_ON_EMPTY, 1); + } else { + VerboseLog("uioPos %lld uioRemaining %lld\n", (uint64_t)uioPos, (uint64_t)uioRemaining); + if (uioRemaining) { + off_t uplOff = uioPos - curUplPos; + if (uplOff < 0) { + ErrorLog("uplOff %lld should never be negative\n", (int64_t)uplOff); + err = EINVAL; + } else { + off_t count = curUplPos + curUplSize - uioPos; + if (count < 0) { + /* this upl is entirely before the uio */ + } else { + if (count > uioRemaining) + count = uioRemaining; + int io_resid = count; + err = cluster_copy_upl_data(uio, upl, uplOff, &io_resid); + int copied = count - io_resid; + VerboseLog("uplOff %lld count %lld copied %lld\n", (uint64_t)uplOff, (uint64_t)count, (uint64_t)copied); + if (err) { + ErrorLog("cluster_copy_upl_data err %d\n", err); + } + uioPos += copied; + uioRemaining -= copied; + } + } + } + kr = commit_upl(upl, 0, curUplSize, UPL_COMMIT_FREE_ON_EMPTY | UPL_COMMIT_INACTIVATE, 0); + if (err) { + goto out; + } + } + } else { + ErrorLog("ubc_upl_unmap error %d\n", (int)kr); + } + + uplRemaining -= curUplSize; + } + +out: + if (hdr) FREE(hdr, M_TEMP); + if (cmpdata_locked) decmpfs_unlock_compressed_data(cp, 0); + if (err) {/* something went wrong */ + ErrorLog("err %d\n", err); + return err; + } + +#if COMPRESSION_DEBUG + uplSize = uio_resid(uio); + if (uplSize) + VerboseLog("still %lld bytes to copy\n", uplSize); +#endif + return 0; +} + +int +decmpfs_free_compressed_data(vnode_t vp, decmpfs_cnode *cp) +{ + /* + call out to the decompressor to free remove any data associated with this compressed file + then delete the file's compression xattr + */ + + decmpfs_header *hdr = NULL; + int err = decmpfs_fetch_compressed_header(vp, cp, &hdr, 0); + if (err) { + ErrorLog("decmpfs_fetch_compressed_header err %d\n", err); + } else { + lck_rw_lock_shared(decompressorsLock); + decmpfs_free_compressed_data_func free_data = decmp_get_func(hdr->compression_type, free_data); + if (free_data) { + err = free_data(vp, decmpfs_ctx, hdr); + } else { + /* nothing to do, so no error */ + err = 0; + } + lck_rw_done(decompressorsLock); + + if (err != 0) { + ErrorLog("decompressor err %d\n", err); + } + } + + /* delete the xattr */ + err = vn_removexattr(vp, DECMPFS_XATTR_NAME, 0, decmpfs_ctx); + if (err != 0) { + goto out; + } + +out: + if (hdr) FREE(hdr, M_TEMP); + return err; +} + +#pragma mark --- file conversion routines --- + +static int +unset_compressed_flag(vnode_t vp) +{ + int err = 0; + struct vnode_attr va; + int new_bsdflags = 0; + + VATTR_INIT(&va); + VATTR_WANTED(&va, va_flags); + err = vnode_getattr(vp, &va, decmpfs_ctx); + + if (err != 0) { + ErrorLog("vnode_getattr err %d\n", err); + } else { + new_bsdflags = va.va_flags & ~UF_COMPRESSED; + + VATTR_INIT(&va); + VATTR_SET(&va, va_flags, new_bsdflags); + err = vnode_setattr(vp, &va, decmpfs_ctx); + if (err != 0) { + ErrorLog("vnode_setattr err %d\n", err); + } + } + return err; +} + +int +decmpfs_decompress_file(vnode_t vp, decmpfs_cnode *cp, off_t toSize, int truncate_okay, int skiplock) +{ + /* convert a compressed file to an uncompressed file */ + + int err = 0; + char *data = NULL; + uio_t uio_w = 0; + off_t offset = 0; + uint32_t old_state = 0; + uint32_t new_state = 0; + int update_file_state = 0; + int allocSize = 0; + decmpfs_header *hdr = NULL; + int cmpdata_locked = 0; + off_t remaining = 0; + uint64_t uncompressed_size = 0; + + if (!skiplock) { + decmpfs_lock_compressed_data(cp, 1); cmpdata_locked = 1; + } + +decompress: + old_state = decmpfs_fast_get_state(cp); + + switch(old_state) { + case FILE_IS_NOT_COMPRESSED: + { + /* someone else decompressed the file */ + err = 0; + goto out; + } + + case FILE_TYPE_UNKNOWN: + { + /* the file is in an unknown state, so update the state and retry */ + (void)decmpfs_file_is_compressed(vp, cp); + + /* try again */ + goto decompress; + } + + case FILE_IS_COMPRESSED: + { + /* the file is compressed, so decompress it */ + break; + } + + default: + { + /* + this shouldn't happen since multiple calls to decmpfs_decompress_file lock each other out, + and when decmpfs_decompress_file returns, the state should be always be set back to + FILE_IS_NOT_COMPRESSED or FILE_IS_UNKNOWN + */ + err = EINVAL; + goto out; + } + } + + err = decmpfs_fetch_compressed_header(vp, cp, &hdr, 0); + if (err != 0) { + goto out; + } + + uncompressed_size = hdr->uncompressed_size; + if (toSize == -1) + toSize = hdr->uncompressed_size; + + if (toSize == 0) { + /* special case truncating the file to zero bytes */ + goto nodecmp; + } else if ((uint64_t)toSize > hdr->uncompressed_size) { + /* the caller is trying to grow the file, so we should decompress all the data */ + toSize = hdr->uncompressed_size; + } + + allocSize = MIN(64*1024, toSize); + MALLOC(data, char *, allocSize, M_TEMP, M_WAITOK); + if (!data) { + err = ENOMEM; + goto out; + } + + uio_w = uio_create(1, 0LL, UIO_SYSSPACE, UIO_WRITE); + if (!uio_w) { + err = ENOMEM; + goto out; + } + uio_w->uio_flags |= UIO_FLAGS_IS_COMPRESSED_FILE; + + remaining = toSize; + + /* tell the buffer cache that this is an empty file */ + ubc_setsize(vp, 0); + + /* if we got here, we need to decompress the file */ + decmpfs_cnode_set_vnode_state(cp, FILE_IS_CONVERTING, 1); + + while(remaining > 0) { + /* loop decompressing data from the file and writing it into the data fork */ + + uint64_t bytes_read = 0; + decmpfs_vector vec = { .buf = data, .size = MIN(allocSize, remaining) }; + err = decmpfs_fetch_uncompressed_data(vp, hdr, offset, vec.size, 1, &vec, &bytes_read); + if (err != 0) { + ErrorLog("decmpfs_fetch_uncompressed_data err %d\n", err); + goto out; + } + + if (bytes_read == 0) { + /* we're done reading data */ + break; + } + + uio_reset(uio_w, offset, UIO_SYSSPACE, UIO_WRITE); + err = uio_addiov(uio_w, CAST_USER_ADDR_T(data), bytes_read); + if (err != 0) { + ErrorLog("uio_addiov err %d\n", err); + err = ENOMEM; + goto out; + } + + err = VNOP_WRITE(vp, uio_w, 0, decmpfs_ctx); + if (err != 0) { + /* if the write failed, truncate the file to zero bytes */ + ErrorLog("VNOP_WRITE err %d\n", err); + break; + } + offset += bytes_read; + remaining -= bytes_read; + } + + if (err == 0) { + if (offset != toSize) { + ErrorLog("file decompressed to %lld instead of %lld\n", offset, toSize); + err = EINVAL; + goto out; + } + } + + if (err == 0) { + /* sync the data and metadata */ + err = VNOP_FSYNC(vp, MNT_WAIT, decmpfs_ctx); + if (err != 0) { + ErrorLog("VNOP_FSYNC err %d\n", err); + goto out; + } + } + + if (err != 0) { + /* write, setattr, or fsync failed */ + ErrorLog("aborting decompress, err %d\n", err); + if (truncate_okay) { + /* truncate anything we might have written */ + int error = vnode_setsize(vp, 0, 0, decmpfs_ctx); + ErrorLog("vnode_setsize err %d\n", error); + } + goto out; + } + +nodecmp: + /* if we're truncating the file to zero bytes, we'll skip ahead to here */ + + /* unset the compressed flag */ + unset_compressed_flag(vp); + + /* free the compressed data associated with this file */ + err = decmpfs_free_compressed_data(vp, cp); + if (err != 0) { + ErrorLog("decmpfs_free_compressed_data err %d\n", err); + } + + /* + even if free_compressed_data or vnode_getattr/vnode_setattr failed, return success + since we succeeded in writing all of the file data to the data fork + */ + err = 0; + + /* if we got this far, the file was successfully decompressed */ + update_file_state = 1; + new_state = FILE_IS_NOT_COMPRESSED; + +#if COMPRESSION_DEBUG + { + uint64_t filesize = 0; + vnsize(vp, &filesize); + DebugLog("new file size %lld\n", filesize); + } +#endif + +out: + if (hdr) FREE(hdr, M_TEMP); + if (data) FREE(data, M_TEMP); + if (uio_w) uio_free(uio_w); + + if (err != 0) { + /* if there was a failure, reset compression flags to unknown and clear the buffer cache data */ + update_file_state = 1; + new_state = FILE_TYPE_UNKNOWN; + if (uncompressed_size) { + ubc_setsize(vp, 0); + ubc_setsize(vp, uncompressed_size); + } + } + + if (update_file_state) { + lck_mtx_lock(decompress_channel_mtx); + decmpfs_cnode_set_vnode_state(cp, new_state, 1); + wakeup((caddr_t)&decompress_channel); /* wake up anyone who might have been waiting for decompression */ + lck_mtx_unlock(decompress_channel_mtx); + } + + if (cmpdata_locked) decmpfs_unlock_compressed_data(cp, 1); + + return err; +} + +#pragma mark --- Type1 compressor --- + +/* + The "Type1" compressor stores the data fork directly in the compression xattr + */ + +static int +decmpfs_validate_compressed_file_Type1(__unused vnode_t vp, __unused vfs_context_t ctx, decmpfs_header *hdr) +{ + int err = 0; + + if (hdr->uncompressed_size + sizeof(decmpfs_disk_header) != (uint64_t)hdr->attr_size) { + err = EINVAL; + goto out; + } +out: + return err; +} + +static int +decmpfs_fetch_uncompressed_data_Type1(__unused vnode_t vp, __unused vfs_context_t ctx, decmpfs_header *hdr, off_t offset, user_ssize_t size, int nvec, decmpfs_vector *vec, uint64_t *bytes_read) +{ + int err = 0; + int i; + user_ssize_t remaining; + + if (hdr->uncompressed_size + sizeof(decmpfs_disk_header) != (uint64_t)hdr->attr_size) { + err = EINVAL; + goto out; + } + +#if COMPRESSION_DEBUG + static int dummy = 0; // prevent syslog from coalescing printfs + char path[PATH_MAX]; + DebugLog("%s: %d memcpy %lld at %lld\n", vnpath(vp, path, sizeof(path)), dummy++, size, (uint64_t)offset); +#endif + + remaining = size; + for (i = 0; (i < nvec) && (remaining > 0); i++) { + user_ssize_t curCopy = vec[i].size; + if (curCopy > remaining) + curCopy = remaining; + memcpy(vec[i].buf, hdr->attr_bytes + offset, curCopy); + offset += curCopy; + remaining -= curCopy; + } + + if ((bytes_read) && (err == 0)) + *bytes_read = (size - remaining); + +out: + return err; +} + +static decmpfs_registration Type1Reg = +{ + .decmpfs_registration = DECMPFS_REGISTRATION_VERSION, + .validate = decmpfs_validate_compressed_file_Type1, + .adjust_fetch = NULL, /* no adjust necessary */ + .fetch = decmpfs_fetch_uncompressed_data_Type1, + .free_data = NULL /* no free necessary */ +}; + +#pragma mark --- decmpfs initialization --- + +void decmpfs_init() +{ + static int done = 0; + if (done) return; + + decmpfs_ctx = vfs_context_create(vfs_context_kernel()); + + lck_grp_attr_t *attr = lck_grp_attr_alloc_init(); + decmpfs_lockgrp = lck_grp_alloc_init("VFSCOMP", attr); + decompressorsLock = lck_rw_alloc_init(decmpfs_lockgrp, NULL); + decompress_channel_mtx = lck_mtx_alloc_init(decmpfs_lockgrp, NULL); + + register_decmpfs_decompressor(CMP_Type1, &Type1Reg); + + done = 1; +} +#endif /* HFS_COMPRESSION */ diff --git a/bsd/kern/init_sysent.c b/bsd/kern/init_sysent.c deleted file mode 100644 index f736a0c8a..000000000 --- a/bsd/kern/init_sysent.c +++ /dev/null @@ -1,624 +0,0 @@ -/* - * Copyright (c) 2004-2007 Apple Inc. All rights reserved. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ - * - * This file contains Original Code and/or Modifications of Original Code - * as defined in and that are subject to the Apple Public Source License - * Version 2.0 (the 'License'). You may not use this file except in - * compliance with the License. The rights granted to you under the License - * may not be used to create, or enable the creation or redistribution of, - * unlawful or unlicensed copies of an Apple operating system, or to - * circumvent, violate, or enable the circumvention or violation of, any - * terms of an Apple operating system software license agreement. - * - * Please obtain a copy of the License at - * http://www.opensource.apple.com/apsl/ and read it before using this file. - * - * The Original Code and all software distributed under the License are - * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER - * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, - * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. - * Please see the License for the specific language governing rights and - * limitations under the License. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ - * - * - * System call switch table. - * - * DO NOT EDIT-- this file is automatically generated. - * created from syscalls.master - */ - -#define __INIT_SYSENT_C__ 1 -#include -#include -#include -#include -#include -#include -#define AC(name) (sizeof(struct name) / sizeof(syscall_arg_t)) - - -/* The casts are bogus but will do for now. */ -__private_extern__ struct sysent sysent[] = { - {0, 0, 0, (sy_call_t *)nosys, NULL, NULL, _SYSCALL_RET_INT_T, 0}, /* 0 = nosys indirect syscall */ - {AC(exit_args), 0, 0, (sy_call_t *)exit, munge_w, munge_d, _SYSCALL_RET_NONE, 4}, /* 1 = exit */ - {0, 0, 0, (sy_call_t *)fork, NULL, NULL, _SYSCALL_RET_INT_T, 0}, /* 2 = fork */ - {AC(read_args), 0, 0, (sy_call_t *)read, munge_www, munge_ddd, _SYSCALL_RET_SSIZE_T, 12}, /* 3 = read */ - {AC(write_args), 0, 0, (sy_call_t *)write, munge_www, munge_ddd, _SYSCALL_RET_SSIZE_T, 12}, /* 4 = write */ - {AC(open_args), 0, 0, (sy_call_t *)open, munge_www, munge_ddd, _SYSCALL_RET_INT_T, 12}, /* 5 = open */ - {AC(close_args), 0, 0, (sy_call_t *)close, munge_w, munge_d, _SYSCALL_RET_INT_T, 4}, /* 6 = close */ - {AC(wait4_args), 0, 0, (sy_call_t *)wait4, munge_wwww, munge_dddd, _SYSCALL_RET_INT_T, 16}, /* 7 = wait4 */ - {0, 0, 0, (sy_call_t *)nosys, NULL, NULL, _SYSCALL_RET_INT_T, 0}, /* 8 = nosys old creat */ - {AC(link_args), 0, 0, (sy_call_t *)link, munge_ww, munge_dd, _SYSCALL_RET_INT_T, 8}, /* 9 = link */ - {AC(unlink_args), 0, 0, (sy_call_t *)unlink, munge_w, munge_d, _SYSCALL_RET_INT_T, 4}, /* 10 = unlink */ - {0, 0, 0, (sy_call_t *)nosys, NULL, NULL, _SYSCALL_RET_INT_T, 0}, /* 11 = nosys old execv */ - {AC(chdir_args), 0, 0, (sy_call_t *)chdir, munge_w, munge_d, _SYSCALL_RET_INT_T, 4}, /* 12 = chdir */ - {AC(fchdir_args), 0, 0, (sy_call_t *)fchdir, munge_w, munge_d, _SYSCALL_RET_INT_T, 4}, /* 13 = fchdir */ - {AC(mknod_args), 0, 0, (sy_call_t *)mknod, munge_www, munge_ddd, _SYSCALL_RET_INT_T, 12}, /* 14 = mknod */ - {AC(chmod_args), 0, 0, (sy_call_t *)chmod, munge_ww, munge_dd, _SYSCALL_RET_INT_T, 8}, /* 15 = chmod */ - {AC(chown_args), 0, 0, (sy_call_t *)chown, munge_www, munge_ddd, _SYSCALL_RET_INT_T, 12}, /* 16 = chown */ - {AC(obreak_args), 0, UNSAFE_64BIT, (sy_call_t *)obreak, munge_w, munge_d, _SYSCALL_RET_INT_T, 4}, /* 17 = obreak old break */ -#if COMPAT_GETFSSTAT - {AC(ogetfsstat_args), 0, 0, (sy_call_t *)ogetfsstat, munge_www, munge_ddd, _SYSCALL_RET_INT_T, 12}, /* 18 = ogetfsstat */ -#else - {AC(getfsstat_args), 0, 0, (sy_call_t *)getfsstat, munge_www, munge_ddd, _SYSCALL_RET_INT_T, 12}, /* 18 = getfsstat */ -#endif - {0, 0, 0, (sy_call_t *)nosys, NULL, NULL, _SYSCALL_RET_INT_T, 0}, /* 19 = nosys old lseek */ - {0, 0, 0, (sy_call_t *)getpid, NULL, NULL, _SYSCALL_RET_INT_T, 0}, /* 20 = getpid */ - {0, 0, 0, (sy_call_t *)nosys, NULL, NULL, _SYSCALL_RET_INT_T, 0}, /* 21 = nosys old mount */ - {0, 0, 0, (sy_call_t *)nosys, NULL, NULL, _SYSCALL_RET_INT_T, 0}, /* 22 = nosys old umount */ - {AC(setuid_args), 0, 0, (sy_call_t *)setuid, munge_w, munge_d, _SYSCALL_RET_INT_T, 4}, /* 23 = setuid */ - {0, 0, 0, (sy_call_t *)getuid, NULL, NULL, _SYSCALL_RET_INT_T, 0}, /* 24 = getuid */ - {0, 0, 0, (sy_call_t *)geteuid, NULL, NULL, _SYSCALL_RET_INT_T, 0}, /* 25 = geteuid */ - {AC(ptrace_args), 0, 0, (sy_call_t *)ptrace, munge_wwww, munge_dddd, _SYSCALL_RET_INT_T, 16}, /* 26 = ptrace */ -#if SOCKETS - {AC(recvmsg_args), 0, 0, (sy_call_t *)recvmsg, munge_www, munge_ddd, _SYSCALL_RET_INT_T, 12}, /* 27 = recvmsg */ - {AC(sendmsg_args), 0, 0, (sy_call_t *)sendmsg, munge_www, munge_ddd, _SYSCALL_RET_INT_T, 12}, /* 28 = sendmsg */ - {AC(recvfrom_args), 0, 0, (sy_call_t *)recvfrom, munge_wwwwww, munge_dddddd, _SYSCALL_RET_INT_T, 24}, /* 29 = recvfrom */ - {AC(accept_args), 0, 0, (sy_call_t *)accept, munge_www, munge_ddd, _SYSCALL_RET_INT_T, 12}, /* 30 = accept */ - {AC(getpeername_args), 0, 0, (sy_call_t *)getpeername, munge_www, munge_ddd, _SYSCALL_RET_INT_T, 12}, /* 31 = getpeername */ - {AC(getsockname_args), 0, 0, (sy_call_t *)getsockname, munge_www, munge_ddd, _SYSCALL_RET_INT_T, 12}, /* 32 = getsockname */ -#else - {0, 0, 0, (sy_call_t *)nosys, NULL, NULL, _SYSCALL_RET_INT_T, 0}, /* 27 = nosys */ - {0, 0, 0, (sy_call_t *)nosys, NULL, NULL, _SYSCALL_RET_INT_T, 0}, /* 28 = nosys */ - {0, 0, 0, (sy_call_t *)nosys, NULL, NULL, _SYSCALL_RET_INT_T, 0}, /* 29 = nosys */ - {0, 0, 0, (sy_call_t *)nosys, NULL, NULL, _SYSCALL_RET_INT_T, 0}, /* 30 = nosys */ - {0, 0, 0, (sy_call_t *)nosys, NULL, NULL, _SYSCALL_RET_INT_T, 0}, /* 31 = nosys */ - {0, 0, 0, (sy_call_t *)nosys, NULL, NULL, _SYSCALL_RET_INT_T, 0}, /* 32 = nosys */ -#endif /* SOCKETS */ - {AC(access_args), 0, 0, (sy_call_t *)access, munge_ww, munge_dd, _SYSCALL_RET_INT_T, 8}, /* 33 = access */ - {AC(chflags_args), 0, 0, (sy_call_t *)chflags, munge_ww, munge_dd, _SYSCALL_RET_INT_T, 8}, /* 34 = chflags */ - {AC(fchflags_args), 0, 0, (sy_call_t *)fchflags, munge_ww, munge_dd, _SYSCALL_RET_INT_T, 8}, /* 35 = fchflags */ - {0, 0, 0, (sy_call_t *)sync, NULL, NULL, _SYSCALL_RET_INT_T, 0}, /* 36 = sync */ - {AC(kill_args), 0, 0, (sy_call_t *)kill, munge_www, munge_ddd, _SYSCALL_RET_INT_T, 12}, /* 37 = kill */ - {0, 0, 0, (sy_call_t *)nosys, NULL, NULL, _SYSCALL_RET_INT_T, 0}, /* 38 = nosys old stat */ - {0, 0, 0, (sy_call_t *)getppid, NULL, NULL, _SYSCALL_RET_INT_T, 0}, /* 39 = getppid */ - {0, 0, 0, (sy_call_t *)nosys, NULL, NULL, _SYSCALL_RET_INT_T, 0}, /* 40 = nosys old lstat */ - {AC(dup_args), 0, 0, (sy_call_t *)dup, munge_w, munge_d, _SYSCALL_RET_INT_T, 4}, /* 41 = dup */ - {0, 0, 0, (sy_call_t *)pipe, NULL, NULL, _SYSCALL_RET_INT_T, 0}, /* 42 = pipe */ - {0, 0, 0, (sy_call_t *)getegid, NULL, NULL, _SYSCALL_RET_INT_T, 0}, /* 43 = getegid */ - {AC(profil_args), 0, 0, (sy_call_t *)profil, munge_wwww, munge_dddd, _SYSCALL_RET_INT_T, 16}, /* 44 = profil */ - {0, 0, 0, (sy_call_t *)nosys, NULL, NULL, _SYSCALL_RET_INT_T, 0}, /* 45 = nosys old ktrace */ - {AC(sigaction_args), 0, 0, (sy_call_t *)sigaction, munge_www, munge_ddd, _SYSCALL_RET_INT_T, 12}, /* 46 = sigaction */ - {0, 0, 0, (sy_call_t *)getgid, NULL, NULL, _SYSCALL_RET_INT_T, 0}, /* 47 = getgid */ - {AC(sigprocmask_args), 0, 0, (sy_call_t *)sigprocmask, munge_www, munge_ddd, _SYSCALL_RET_INT_T, 12}, /* 48 = sigprocmask */ - {AC(getlogin_args), 0, 0, (sy_call_t *)getlogin, munge_ww, munge_dd, _SYSCALL_RET_INT_T, 8}, /* 49 = getlogin */ - {AC(setlogin_args), 0, 0, (sy_call_t *)setlogin, munge_w, munge_d, _SYSCALL_RET_INT_T, 4}, /* 50 = setlogin */ - {AC(acct_args), 0, 0, (sy_call_t *)acct, munge_w, munge_d, _SYSCALL_RET_INT_T, 4}, /* 51 = acct */ - {AC(sigpending_args), 0, 0, (sy_call_t *)sigpending, munge_w, munge_d, _SYSCALL_RET_INT_T, 4}, /* 52 = sigpending */ - {AC(sigaltstack_args), 0, 0, (sy_call_t *)sigaltstack, munge_ww, munge_dd, _SYSCALL_RET_INT_T, 8}, /* 53 = sigaltstack */ - {AC(ioctl_args), 0, 0, (sy_call_t *)ioctl, munge_www, munge_ddd, _SYSCALL_RET_INT_T, 12}, /* 54 = ioctl */ - {AC(reboot_args), 0, 0, (sy_call_t *)reboot, munge_ww, munge_dd, _SYSCALL_RET_INT_T, 8}, /* 55 = reboot */ - {AC(revoke_args), 0, 0, (sy_call_t *)revoke, munge_w, munge_d, _SYSCALL_RET_INT_T, 4}, /* 56 = revoke */ - {AC(symlink_args), 0, 0, (sy_call_t *)symlink, munge_ww, munge_dd, _SYSCALL_RET_INT_T, 8}, /* 57 = symlink */ - {AC(readlink_args), 0, 0, (sy_call_t *)readlink, munge_www, munge_ddd, _SYSCALL_RET_INT_T, 12}, /* 58 = readlink */ - {AC(execve_args), 0, 0, (sy_call_t *)execve, munge_www, munge_ddd, _SYSCALL_RET_INT_T, 12}, /* 59 = execve */ - {AC(umask_args), 0, 0, (sy_call_t *)umask, munge_w, munge_d, _SYSCALL_RET_INT_T, 4}, /* 60 = umask */ - {AC(chroot_args), 0, 0, (sy_call_t *)chroot, munge_w, munge_d, _SYSCALL_RET_INT_T, 4}, /* 61 = chroot */ - {0, 0, 0, (sy_call_t *)nosys, NULL, NULL, _SYSCALL_RET_INT_T, 0}, /* 62 = nosys old fstat */ - {0, 0, 0, (sy_call_t *)nosys, NULL, NULL, _SYSCALL_RET_INT_T, 0}, /* 63 = nosys used internally , reserved */ - {0, 0, 0, (sy_call_t *)nosys, NULL, NULL, _SYSCALL_RET_INT_T, 0}, /* 64 = nosys old getpagesize */ - {AC(msync_args), 0, 0, (sy_call_t *)msync, munge_www, munge_ddd, _SYSCALL_RET_INT_T, 12}, /* 65 = msync */ - {0, 0, 0, (sy_call_t *)vfork, NULL, NULL, _SYSCALL_RET_INT_T, 0}, /* 66 = vfork */ - {0, 0, 0, (sy_call_t *)nosys, NULL, NULL, _SYSCALL_RET_INT_T, 0}, /* 67 = nosys old vread */ - {0, 0, 0, (sy_call_t *)nosys, NULL, NULL, _SYSCALL_RET_INT_T, 0}, /* 68 = nosys old vwrite */ - {AC(sbrk_args), 0, 0, (sy_call_t *)sbrk, munge_w, munge_d, _SYSCALL_RET_INT_T, 4}, /* 69 = sbrk */ - {AC(sstk_args), 0, 0, (sy_call_t *)sstk, munge_w, munge_d, _SYSCALL_RET_INT_T, 4}, /* 70 = sstk */ - {0, 0, 0, (sy_call_t *)nosys, NULL, NULL, _SYSCALL_RET_INT_T, 0}, /* 71 = nosys old mmap */ - {0, 0, 0, (sy_call_t *)ovadvise, NULL, NULL, _SYSCALL_RET_INT_T, 0}, /* 72 = ovadvise old vadvise */ - {AC(munmap_args), 0, 0, (sy_call_t *)munmap, munge_ww, munge_dd, _SYSCALL_RET_INT_T, 8}, /* 73 = munmap */ - {AC(mprotect_args), 0, 0, (sy_call_t *)mprotect, munge_www, munge_ddd, _SYSCALL_RET_INT_T, 12}, /* 74 = mprotect */ - {AC(madvise_args), 0, 0, (sy_call_t *)madvise, munge_www, munge_ddd, _SYSCALL_RET_INT_T, 12}, /* 75 = madvise */ - {0, 0, 0, (sy_call_t *)nosys, NULL, NULL, _SYSCALL_RET_INT_T, 0}, /* 76 = nosys old vhangup */ - {0, 0, 0, (sy_call_t *)nosys, NULL, NULL, _SYSCALL_RET_INT_T, 0}, /* 77 = nosys old vlimit */ - {AC(mincore_args), 0, 0, (sy_call_t *)mincore, munge_www, munge_ddd, _SYSCALL_RET_INT_T, 12}, /* 78 = mincore */ - {AC(getgroups_args), 0, 0, (sy_call_t *)getgroups, munge_ww, munge_dd, _SYSCALL_RET_INT_T, 8}, /* 79 = getgroups */ - {AC(setgroups_args), 0, 0, (sy_call_t *)setgroups, munge_ww, munge_dd, _SYSCALL_RET_INT_T, 8}, /* 80 = setgroups */ - {0, 0, 0, (sy_call_t *)getpgrp, NULL, NULL, _SYSCALL_RET_INT_T, 0}, /* 81 = getpgrp */ - {AC(setpgid_args), 0, 0, (sy_call_t *)setpgid, munge_ww, munge_dd, _SYSCALL_RET_INT_T, 8}, /* 82 = setpgid */ - {AC(setitimer_args), 0, 0, (sy_call_t *)setitimer, munge_www, munge_ddd, _SYSCALL_RET_INT_T, 12}, /* 83 = setitimer */ - {0, 0, 0, (sy_call_t *)nosys, NULL, NULL, _SYSCALL_RET_INT_T, 0}, /* 84 = nosys old wait */ - {0, 0, 0, (sy_call_t *)swapon, NULL, NULL, _SYSCALL_RET_INT_T, 0}, /* 85 = swapon */ - {AC(getitimer_args), 0, 0, (sy_call_t *)getitimer, munge_ww, munge_dd, _SYSCALL_RET_INT_T, 8}, /* 86 = getitimer */ - {0, 0, 0, (sy_call_t *)nosys, NULL, NULL, _SYSCALL_RET_INT_T, 0}, /* 87 = nosys old gethostname */ - {0, 0, 0, (sy_call_t *)nosys, NULL, NULL, _SYSCALL_RET_INT_T, 0}, /* 88 = nosys old sethostname */ - {0, 0, 0, (sy_call_t *)getdtablesize, NULL, NULL, _SYSCALL_RET_INT_T, 0}, /* 89 = getdtablesize */ - {AC(dup2_args), 0, 0, (sy_call_t *)dup2, munge_ww, munge_dd, _SYSCALL_RET_INT_T, 8}, /* 90 = dup2 */ - {0, 0, 0, (sy_call_t *)nosys, NULL, NULL, _SYSCALL_RET_INT_T, 0}, /* 91 = nosys old getdopt */ - {AC(fcntl_args), 0, 0, (sy_call_t *)fcntl, munge_wws, munge_ddd, _SYSCALL_RET_INT_T, 12}, /* 92 = fcntl */ - {AC(select_args), 0, 0, (sy_call_t *)select, munge_wwwww, munge_ddddd, _SYSCALL_RET_INT_T, 20}, /* 93 = select */ - {0, 0, 0, (sy_call_t *)nosys, NULL, NULL, _SYSCALL_RET_INT_T, 0}, /* 94 = nosys old setdopt */ - {AC(fsync_args), 0, 0, (sy_call_t *)fsync, munge_w, munge_d, _SYSCALL_RET_INT_T, 4}, /* 95 = fsync */ - {AC(setpriority_args), 0, 0, (sy_call_t *)setpriority, munge_www, munge_ddd, _SYSCALL_RET_INT_T, 12}, /* 96 = setpriority */ -#if SOCKETS - {AC(socket_args), 0, 0, (sy_call_t *)socket, munge_www, munge_ddd, _SYSCALL_RET_INT_T, 12}, /* 97 = socket */ - {AC(connect_args), 0, 0, (sy_call_t *)connect, munge_www, munge_ddd, _SYSCALL_RET_INT_T, 12}, /* 98 = connect */ -#else - {0, 0, 0, (sy_call_t *)nosys, NULL, NULL, _SYSCALL_RET_INT_T, 0}, /* 97 = nosys */ - {0, 0, 0, (sy_call_t *)nosys, NULL, NULL, _SYSCALL_RET_INT_T, 0}, /* 98 = nosys */ -#endif /* SOCKETS */ - {0, 0, 0, (sy_call_t *)nosys, NULL, NULL, _SYSCALL_RET_INT_T, 0}, /* 99 = nosys old accept */ - {AC(getpriority_args), 0, 0, (sy_call_t *)getpriority, munge_ww, munge_dd, _SYSCALL_RET_INT_T, 8}, /* 100 = getpriority */ - {0, 0, 0, (sy_call_t *)nosys, NULL, NULL, _SYSCALL_RET_INT_T, 0}, /* 101 = nosys old send */ - {0, 0, 0, (sy_call_t *)nosys, NULL, NULL, _SYSCALL_RET_INT_T, 0}, /* 102 = nosys old recv */ - {0, 0, 0, (sy_call_t *)nosys, NULL, NULL, _SYSCALL_RET_INT_T, 0}, /* 103 = nosys old sigreturn */ -#if SOCKETS - {AC(bind_args), 0, 0, (sy_call_t *)bind, munge_www, munge_ddd, _SYSCALL_RET_INT_T, 12}, /* 104 = bind */ - {AC(setsockopt_args), 0, 0, (sy_call_t *)setsockopt, munge_wwwww, munge_ddddd, _SYSCALL_RET_INT_T, 20}, /* 105 = setsockopt */ - {AC(listen_args), 0, 0, (sy_call_t *)listen, munge_ww, munge_dd, _SYSCALL_RET_INT_T, 8}, /* 106 = listen */ -#else - {0, 0, 0, (sy_call_t *)nosys, NULL, NULL, _SYSCALL_RET_INT_T, 0}, /* 104 = nosys */ - {0, 0, 0, (sy_call_t *)nosys, NULL, NULL, _SYSCALL_RET_INT_T, 0}, /* 105 = nosys */ - {0, 0, 0, (sy_call_t *)nosys, NULL, NULL, _SYSCALL_RET_INT_T, 0}, /* 106 = nosys */ -#endif /* SOCKETS */ - {0, 0, 0, (sy_call_t *)nosys, NULL, NULL, _SYSCALL_RET_INT_T, 0}, /* 107 = nosys old vtimes */ - {0, 0, 0, (sy_call_t *)nosys, NULL, NULL, _SYSCALL_RET_INT_T, 0}, /* 108 = nosys old sigvec */ - {0, 0, 0, (sy_call_t *)nosys, NULL, NULL, _SYSCALL_RET_INT_T, 0}, /* 109 = nosys old sigblock */ - {0, 0, 0, (sy_call_t *)nosys, NULL, NULL, _SYSCALL_RET_INT_T, 0}, /* 110 = nosys old sigsetmask */ - {AC(sigsuspend_args), 0, 0, (sy_call_t *)sigsuspend, munge_w, munge_d, _SYSCALL_RET_INT_T, 4}, /* 111 = sigsuspend */ - {0, 0, 0, (sy_call_t *)nosys, NULL, NULL, _SYSCALL_RET_INT_T, 0}, /* 112 = nosys old sigstack */ -#if SOCKETS - {0, 0, 0, (sy_call_t *)nosys, NULL, NULL, _SYSCALL_RET_INT_T, 0}, /* 113 = nosys old recvmsg */ - {0, 0, 0, (sy_call_t *)nosys, NULL, NULL, _SYSCALL_RET_INT_T, 0}, /* 114 = nosys old sendmsg */ -#else - {0, 0, 0, (sy_call_t *)nosys, NULL, NULL, _SYSCALL_RET_INT_T, 0}, /* 113 = nosys */ - {0, 0, 0, (sy_call_t *)nosys, NULL, NULL, _SYSCALL_RET_INT_T, 0}, /* 114 = nosys */ -#endif /* SOCKETS */ - {0, 0, 0, (sy_call_t *)nosys, NULL, NULL, _SYSCALL_RET_INT_T, 0}, /* 115 = nosys old vtrace */ - {AC(gettimeofday_args), 0, 0, (sy_call_t *)gettimeofday, munge_ww, munge_dd, _SYSCALL_RET_INT_T, 8}, /* 116 = gettimeofday */ - {AC(getrusage_args), 0, 0, (sy_call_t *)getrusage, munge_ww, munge_dd, _SYSCALL_RET_INT_T, 8}, /* 117 = getrusage */ -#if SOCKETS - {AC(getsockopt_args), 0, 0, (sy_call_t *)getsockopt, munge_wwwww, munge_ddddd, _SYSCALL_RET_INT_T, 20}, /* 118 = getsockopt */ -#else - {0, 0, 0, (sy_call_t *)nosys, NULL, NULL, _SYSCALL_RET_INT_T, 0}, /* 118 = nosys */ -#endif /* SOCKETS */ - {0, 0, 0, (sy_call_t *)nosys, NULL, NULL, _SYSCALL_RET_INT_T, 0}, /* 119 = nosys old resuba */ - {AC(readv_args), 0, 0, (sy_call_t *)readv, munge_www, munge_ddd, _SYSCALL_RET_SSIZE_T, 12}, /* 120 = readv */ - {AC(writev_args), 0, 0, (sy_call_t *)writev, munge_www, munge_ddd, _SYSCALL_RET_SSIZE_T, 12}, /* 121 = writev */ - {AC(settimeofday_args), 0, 0, (sy_call_t *)settimeofday, munge_ww, munge_dd, _SYSCALL_RET_INT_T, 8}, /* 122 = settimeofday */ - {AC(fchown_args), 0, 0, (sy_call_t *)fchown, munge_www, munge_ddd, _SYSCALL_RET_INT_T, 12}, /* 123 = fchown */ - {AC(fchmod_args), 0, 0, (sy_call_t *)fchmod, munge_ww, munge_dd, _SYSCALL_RET_INT_T, 8}, /* 124 = fchmod */ - {0, 0, 0, (sy_call_t *)nosys, NULL, NULL, _SYSCALL_RET_INT_T, 0}, /* 125 = nosys old recvfrom */ - {AC(setreuid_args), 0, 0, (sy_call_t *)setreuid, munge_ww, munge_dd, _SYSCALL_RET_INT_T, 8}, /* 126 = setreuid */ - {AC(setregid_args), 0, 0, (sy_call_t *)setregid, munge_ww, munge_dd, _SYSCALL_RET_INT_T, 8}, /* 127 = setregid */ - {AC(rename_args), 0, 0, (sy_call_t *)rename, munge_ww, munge_dd, _SYSCALL_RET_INT_T, 8}, /* 128 = rename */ - {0, 0, 0, (sy_call_t *)nosys, NULL, NULL, _SYSCALL_RET_INT_T, 0}, /* 129 = nosys old truncate */ - {0, 0, 0, (sy_call_t *)nosys, NULL, NULL, _SYSCALL_RET_INT_T, 0}, /* 130 = nosys old ftruncate */ - {AC(flock_args), 0, 0, (sy_call_t *)flock, munge_ww, munge_dd, _SYSCALL_RET_INT_T, 8}, /* 131 = flock */ - {AC(mkfifo_args), 0, 0, (sy_call_t *)mkfifo, munge_ww, munge_dd, _SYSCALL_RET_INT_T, 8}, /* 132 = mkfifo */ -#if SOCKETS - {AC(sendto_args), 0, 0, (sy_call_t *)sendto, munge_wwwwww, munge_dddddd, _SYSCALL_RET_INT_T, 24}, /* 133 = sendto */ - {AC(shutdown_args), 0, 0, (sy_call_t *)shutdown, munge_ww, munge_dd, _SYSCALL_RET_INT_T, 8}, /* 134 = shutdown */ - {AC(socketpair_args), 0, 0, (sy_call_t *)socketpair, munge_wwww, munge_dddd, _SYSCALL_RET_INT_T, 16}, /* 135 = socketpair */ -#else - {0, 0, 0, (sy_call_t *)nosys, NULL, NULL, _SYSCALL_RET_INT_T, 0}, /* 133 = nosys */ - {0, 0, 0, (sy_call_t *)nosys, NULL, NULL, _SYSCALL_RET_INT_T, 0}, /* 134 = nosys */ - {0, 0, 0, (sy_call_t *)nosys, NULL, NULL, _SYSCALL_RET_INT_T, 0}, /* 135 = nosys */ -#endif /* SOCKETS */ - {AC(mkdir_args), 0, 0, (sy_call_t *)mkdir, munge_ww, munge_dd, _SYSCALL_RET_INT_T, 8}, /* 136 = mkdir */ - {AC(rmdir_args), 0, 0, (sy_call_t *)rmdir, munge_w, munge_d, _SYSCALL_RET_INT_T, 4}, /* 137 = rmdir */ - {AC(utimes_args), 0, 0, (sy_call_t *)utimes, munge_ww, munge_dd, _SYSCALL_RET_INT_T, 8}, /* 138 = utimes */ - {AC(futimes_args), 0, 0, (sy_call_t *)futimes, munge_ww, munge_dd, _SYSCALL_RET_INT_T, 8}, /* 139 = futimes */ - {AC(adjtime_args), 0, 0, (sy_call_t *)adjtime, munge_ww, munge_dd, _SYSCALL_RET_INT_T, 8}, /* 140 = adjtime */ - {0, 0, 0, (sy_call_t *)nosys, NULL, NULL, _SYSCALL_RET_INT_T, 0}, /* 141 = nosys old getpeername */ - {AC(gethostuuid_args), 0, 0, (sy_call_t *)gethostuuid, munge_ww, munge_dd, _SYSCALL_RET_INT_T, 8}, /* 142 = gethostuuid */ - {0, 0, 0, (sy_call_t *)nosys, NULL, NULL, _SYSCALL_RET_INT_T, 0}, /* 143 = nosys old sethostid */ - {0, 0, 0, (sy_call_t *)nosys, NULL, NULL, _SYSCALL_RET_INT_T, 0}, /* 144 = nosys old getrlimit */ - {0, 0, 0, (sy_call_t *)nosys, NULL, NULL, _SYSCALL_RET_INT_T, 0}, /* 145 = nosys old setrlimit */ - {0, 0, 0, (sy_call_t *)nosys, NULL, NULL, _SYSCALL_RET_INT_T, 0}, /* 146 = nosys old killpg */ - {0, 0, 0, (sy_call_t *)setsid, NULL, NULL, _SYSCALL_RET_INT_T, 0}, /* 147 = setsid */ - {0, 0, 0, (sy_call_t *)nosys, NULL, NULL, _SYSCALL_RET_INT_T, 0}, /* 148 = nosys old setquota */ - {0, 0, 0, (sy_call_t *)nosys, NULL, NULL, _SYSCALL_RET_INT_T, 0}, /* 149 = nosys old qquota */ - {0, 0, 0, (sy_call_t *)nosys, NULL, NULL, _SYSCALL_RET_INT_T, 0}, /* 150 = nosys old getsockname */ - {AC(getpgid_args), 0, 0, (sy_call_t *)getpgid, munge_w, munge_d, _SYSCALL_RET_INT_T, 4}, /* 151 = getpgid */ - {AC(setprivexec_args), 0, 0, (sy_call_t *)setprivexec, munge_w, munge_d, _SYSCALL_RET_INT_T, 4}, /* 152 = setprivexec */ - {AC(pread_args), 0, 0, (sy_call_t *)pread, munge_wwwl, munge_dddd, _SYSCALL_RET_SSIZE_T, 20}, /* 153 = pread */ - {AC(pwrite_args), 0, 0, (sy_call_t *)pwrite, munge_wwwl, munge_dddd, _SYSCALL_RET_SSIZE_T, 20}, /* 154 = pwrite */ -#if NFSSERVER - {AC(nfssvc_args), 0, 0, (sy_call_t *)nfssvc, munge_ww, munge_dd, _SYSCALL_RET_INT_T, 8}, /* 155 = nfssvc */ -#else - {0, 0, 0, (sy_call_t *)nosys, NULL, NULL, _SYSCALL_RET_INT_T, 0}, /* 155 = nosys */ -#endif - {0, 0, 0, (sy_call_t *)nosys, NULL, NULL, _SYSCALL_RET_INT_T, 0}, /* 156 = nosys old getdirentries */ - {AC(statfs_args), 0, 0, (sy_call_t *)statfs, munge_ww, munge_dd, _SYSCALL_RET_INT_T, 8}, /* 157 = statfs */ - {AC(fstatfs_args), 0, 0, (sy_call_t *)fstatfs, munge_ww, munge_dd, _SYSCALL_RET_INT_T, 8}, /* 158 = fstatfs */ - {AC(unmount_args), 0, 0, (sy_call_t *)unmount, munge_ww, munge_dd, _SYSCALL_RET_INT_T, 8}, /* 159 = unmount */ - {0, 0, 0, (sy_call_t *)nosys, NULL, NULL, _SYSCALL_RET_INT_T, 0}, /* 160 = nosys old async_daemon */ -#if NFSSERVER - {AC(getfh_args), 0, 0, (sy_call_t *)getfh, munge_ww, munge_dd, _SYSCALL_RET_INT_T, 8}, /* 161 = getfh */ -#else - {0, 0, 0, (sy_call_t *)nosys, NULL, NULL, _SYSCALL_RET_INT_T, 0}, /* 161 = nosys */ -#endif - {0, 0, 0, (sy_call_t *)nosys, NULL, NULL, _SYSCALL_RET_INT_T, 0}, /* 162 = nosys old getdomainname */ - {0, 0, 0, (sy_call_t *)nosys, NULL, NULL, _SYSCALL_RET_INT_T, 0}, /* 163 = nosys old setdomainname */ - {0, 0, 0, (sy_call_t *)nosys, NULL, NULL, _SYSCALL_RET_INT_T, 0}, /* 164 = nosys */ - {AC(quotactl_args), 0, 0, (sy_call_t *)quotactl, munge_wwww, munge_dddd, _SYSCALL_RET_INT_T, 16}, /* 165 = quotactl */ - {0, 0, 0, (sy_call_t *)nosys, NULL, NULL, _SYSCALL_RET_INT_T, 0}, /* 166 = nosys old exportfs */ - {AC(mount_args), 0, 0, (sy_call_t *)mount, munge_wwww, munge_dddd, _SYSCALL_RET_INT_T, 16}, /* 167 = mount */ - {0, 0, 0, (sy_call_t *)nosys, NULL, NULL, _SYSCALL_RET_INT_T, 0}, /* 168 = nosys old ustat */ - {AC(csops_args), 0, 0, (sy_call_t *)csops, munge_wwww, munge_dddd, _SYSCALL_RET_INT_T, 16}, /* 169 = csops */ - {0, 0, 0, (sy_call_t *)nosys, NULL, NULL, _SYSCALL_RET_NONE, 0}, /* 170 = table old table */ - {0, 0, 0, (sy_call_t *)nosys, NULL, NULL, _SYSCALL_RET_INT_T, 0}, /* 171 = nosys old wait3 */ - {0, 0, 0, (sy_call_t *)nosys, NULL, NULL, _SYSCALL_RET_INT_T, 0}, /* 172 = nosys old rpause */ - {AC(waitid_args), 0, 0, (sy_call_t *)waitid, munge_wwww, munge_dddd, _SYSCALL_RET_INT_T, 16}, /* 173 = waitid */ - {0, 0, 0, (sy_call_t *)nosys, NULL, NULL, _SYSCALL_RET_INT_T, 0}, /* 174 = nosys old getdents */ - {0, 0, 0, (sy_call_t *)nosys, NULL, NULL, _SYSCALL_RET_INT_T, 0}, /* 175 = nosys old gc_control */ - {AC(add_profil_args), 0, 0, (sy_call_t *)add_profil, munge_wwww, munge_dddd, _SYSCALL_RET_INT_T, 16}, /* 176 = add_profil */ - {0, 0, 0, (sy_call_t *)nosys, NULL, NULL, _SYSCALL_RET_INT_T, 0}, /* 177 = nosys */ - {0, 0, 0, (sy_call_t *)nosys, NULL, NULL, _SYSCALL_RET_INT_T, 0}, /* 178 = nosys */ - {0, 0, 0, (sy_call_t *)nosys, NULL, NULL, _SYSCALL_RET_INT_T, 0}, /* 179 = nosys */ - {AC(kdebug_trace_args), 0, 0, (sy_call_t *)kdebug_trace, munge_wwwwww, munge_dddddd, _SYSCALL_RET_INT_T, 24}, /* 180 = kdebug_trace */ - {AC(setgid_args), 0, 0, (sy_call_t *)setgid, munge_w, munge_d, _SYSCALL_RET_INT_T, 4}, /* 181 = setgid */ - {AC(setegid_args), 0, 0, (sy_call_t *)setegid, munge_w, munge_d, _SYSCALL_RET_INT_T, 4}, /* 182 = setegid */ - {AC(seteuid_args), 0, 0, (sy_call_t *)seteuid, munge_w, munge_d, _SYSCALL_RET_INT_T, 4}, /* 183 = seteuid */ - {AC(sigreturn_args), 0, 0, (sy_call_t *)sigreturn, munge_ww, munge_dd, _SYSCALL_RET_INT_T, 8}, /* 184 = sigreturn */ - {AC(chud_args), 0, UNSAFE_64BIT, (sy_call_t *)chud, munge_wwwwww, munge_dddddd, _SYSCALL_RET_INT_T, 24}, /* 185 = chud */ - {0, 0, 0, (sy_call_t *)nosys, NULL, NULL, _SYSCALL_RET_INT_T, 0}, /* 186 = nosys */ - {0, 0, 0, (sy_call_t *)nosys, NULL, NULL, _SYSCALL_RET_INT_T, 0}, /* 187 = nosys */ - {AC(stat_args), 0, 0, (sy_call_t *)stat, munge_ww, munge_dd, _SYSCALL_RET_INT_T, 8}, /* 188 = stat */ - {AC(fstat_args), 0, 0, (sy_call_t *)fstat, munge_ww, munge_dd, _SYSCALL_RET_INT_T, 8}, /* 189 = fstat */ - {AC(lstat_args), 0, 0, (sy_call_t *)lstat, munge_ww, munge_dd, _SYSCALL_RET_INT_T, 8}, /* 190 = lstat */ - {AC(pathconf_args), 0, 0, (sy_call_t *)pathconf, munge_ww, munge_dd, _SYSCALL_RET_INT_T, 8}, /* 191 = pathconf */ - {AC(fpathconf_args), 0, 0, (sy_call_t *)fpathconf, munge_ww, munge_dd, _SYSCALL_RET_INT_T, 8}, /* 192 = fpathconf */ - {0, 0, 0, (sy_call_t *)nosys, NULL, NULL, _SYSCALL_RET_INT_T, 0}, /* 193 = nosys */ - {AC(getrlimit_args), 0, 0, (sy_call_t *)getrlimit, munge_ww, munge_dd, _SYSCALL_RET_INT_T, 8}, /* 194 = getrlimit */ - {AC(setrlimit_args), 0, 0, (sy_call_t *)setrlimit, munge_ww, munge_dd, _SYSCALL_RET_INT_T, 8}, /* 195 = setrlimit */ - {AC(getdirentries_args), 0, 0, (sy_call_t *)getdirentries, munge_wwww, munge_dddd, _SYSCALL_RET_INT_T, 16}, /* 196 = getdirentries */ - {AC(mmap_args), 0, 0, (sy_call_t *)mmap, munge_wwwwwl, munge_dddddd, _SYSCALL_RET_ADDR_T, 28}, /* 197 = mmap */ - {0, 0, 0, (sy_call_t *)nosys, NULL, NULL, _SYSCALL_RET_INT_T, 0}, /* 198 = nosys __syscall */ - {AC(lseek_args), 0, 0, (sy_call_t *)lseek, munge_wlw, munge_ddd, _SYSCALL_RET_OFF_T, 16}, /* 199 = lseek */ - {AC(truncate_args), 0, 0, (sy_call_t *)truncate, munge_wl, munge_dd, _SYSCALL_RET_INT_T, 12}, /* 200 = truncate */ - {AC(ftruncate_args), 0, 0, (sy_call_t *)ftruncate, munge_wl, munge_dd, _SYSCALL_RET_INT_T, 12}, /* 201 = ftruncate */ - {AC(__sysctl_args), 0, 0, (sy_call_t *)__sysctl, munge_wwwwww, munge_dddddd, _SYSCALL_RET_INT_T, 24}, /* 202 = __sysctl */ - {AC(mlock_args), 0, 0, (sy_call_t *)mlock, munge_ww, munge_dd, _SYSCALL_RET_INT_T, 8}, /* 203 = mlock */ - {AC(munlock_args), 0, 0, (sy_call_t *)munlock, munge_ww, munge_dd, _SYSCALL_RET_INT_T, 8}, /* 204 = munlock */ - {AC(undelete_args), 0, 0, (sy_call_t *)undelete, munge_w, munge_d, _SYSCALL_RET_INT_T, 4}, /* 205 = undelete */ -#if NETAT - {AC(ATsocket_args), 0, 0, (sy_call_t *)ATsocket, munge_w, munge_d, _SYSCALL_RET_INT_T, 4}, /* 206 = ATsocket */ - {AC(ATgetmsg_args), 0, UNSAFE_64BIT, (sy_call_t *)ATgetmsg, munge_wwww, munge_dddd, _SYSCALL_RET_INT_T, 16}, /* 207 = ATgetmsg */ - {AC(ATputmsg_args), 0, UNSAFE_64BIT, (sy_call_t *)ATputmsg, munge_wwww, munge_dddd, _SYSCALL_RET_INT_T, 16}, /* 208 = ATputmsg */ - {AC(ATPsndreq_args), 0, UNSAFE_64BIT, (sy_call_t *)ATPsndreq, munge_wwww, munge_dddd, _SYSCALL_RET_INT_T, 16}, /* 209 = ATPsndreq */ - {AC(ATPsndrsp_args), 0, UNSAFE_64BIT, (sy_call_t *)ATPsndrsp, munge_wwww, munge_dddd, _SYSCALL_RET_INT_T, 16}, /* 210 = ATPsndrsp */ - {AC(ATPgetreq_args), 0, UNSAFE_64BIT, (sy_call_t *)ATPgetreq, munge_www, munge_ddd, _SYSCALL_RET_INT_T, 12}, /* 211 = ATPgetreq */ - {AC(ATPgetrsp_args), 0, UNSAFE_64BIT, (sy_call_t *)ATPgetrsp, munge_ww, munge_dd, _SYSCALL_RET_INT_T, 8}, /* 212 = ATPgetrsp */ - {0, 0, 0, (sy_call_t *)nosys, NULL, NULL, _SYSCALL_RET_INT_T, 0}, /* 213 = nosys Reserved for AppleTalk */ -#else - {0, 0, 0, (sy_call_t *)nosys, NULL, NULL, _SYSCALL_RET_INT_T, 0}, /* 206 = nosys */ - {0, 0, 0, (sy_call_t *)nosys, NULL, NULL, _SYSCALL_RET_INT_T, 0}, /* 207 = nosys */ - {0, 0, 0, (sy_call_t *)nosys, NULL, NULL, _SYSCALL_RET_INT_T, 0}, /* 208 = nosys */ - {0, 0, 0, (sy_call_t *)nosys, NULL, NULL, _SYSCALL_RET_INT_T, 0}, /* 209 = nosys */ - {0, 0, 0, (sy_call_t *)nosys, NULL, NULL, _SYSCALL_RET_INT_T, 0}, /* 210 = nosys */ - {0, 0, 0, (sy_call_t *)nosys, NULL, NULL, _SYSCALL_RET_INT_T, 0}, /* 211 = nosys */ - {0, 0, 0, (sy_call_t *)nosys, NULL, NULL, _SYSCALL_RET_INT_T, 0}, /* 212 = nosys */ - {0, 0, 0, (sy_call_t *)nosys, NULL, NULL, _SYSCALL_RET_INT_T, 0}, /* 213 = nosys Reserved for AppleTalk */ -#endif /* NETAT */ - {AC(kqueue_from_portset_np_args), 0, 0, (sy_call_t *)kqueue_from_portset_np, munge_w, munge_d, _SYSCALL_RET_INT_T, 4}, /* 214 = kqueue_from_portset_np */ - {AC(kqueue_portset_np_args), 0, 0, (sy_call_t *)kqueue_portset_np, munge_w, munge_d, _SYSCALL_RET_INT_T, 4}, /* 215 = kqueue_portset_np */ - {0, 0, UNSAFE_64BIT, (sy_call_t *)nosys, NULL, NULL, _SYSCALL_RET_NONE, 12}, /* 216 = mkcomplex soon to be obsolete */ - {0, 0, UNSAFE_64BIT, (sy_call_t *)nosys, NULL, NULL, _SYSCALL_RET_NONE, 8}, /* 217 = statv soon to be obsolete */ - {0, 0, UNSAFE_64BIT, (sy_call_t *)nosys, NULL, NULL, _SYSCALL_RET_NONE, 8}, /* 218 = lstatv soon to be obsolete */ - {0, 0, UNSAFE_64BIT, (sy_call_t *)nosys, NULL, NULL, _SYSCALL_RET_NONE, 8}, /* 219 = fstatv soon to be obsolete */ - {AC(getattrlist_args), 0, 0, (sy_call_t *)getattrlist, munge_wwwww, munge_ddddd, _SYSCALL_RET_INT_T, 20}, /* 220 = getattrlist */ - {AC(setattrlist_args), 0, 0, (sy_call_t *)setattrlist, munge_wwwww, munge_ddddd, _SYSCALL_RET_INT_T, 20}, /* 221 = setattrlist */ - {AC(getdirentriesattr_args), 0, 0, (sy_call_t *)getdirentriesattr, munge_wwwwwwww, munge_dddddddd, _SYSCALL_RET_INT_T, 32}, /* 222 = getdirentriesattr */ - {AC(exchangedata_args), 0, 0, (sy_call_t *)exchangedata, munge_www, munge_ddd, _SYSCALL_RET_INT_T, 12}, /* 223 = exchangedata */ - {0, 0, 0, (sy_call_t *)nosys, NULL, NULL, _SYSCALL_RET_INT_T, 0}, /* 224 = nosys was checkuseraccess */ - {AC(searchfs_args), 0, 0, (sy_call_t *)searchfs, munge_wwwwww, munge_dddddd, _SYSCALL_RET_INT_T, 24}, /* 225 = searchfs */ - {AC(delete_args), 0, 0, (sy_call_t *)delete, munge_w, munge_d, _SYSCALL_RET_INT_T, 4}, /* 226 = delete private delete ( Carbon semantics ) */ - {AC(copyfile_args), 0, 0, (sy_call_t *)copyfile, munge_wwww, munge_dddd, _SYSCALL_RET_INT_T, 16}, /* 227 = copyfile */ - {0, 0, 0, (sy_call_t *)nosys, NULL, NULL, _SYSCALL_RET_INT_T, 0}, /* 228 = nosys */ - {0, 0, 0, (sy_call_t *)nosys, NULL, NULL, _SYSCALL_RET_INT_T, 0}, /* 229 = nosys */ - {AC(poll_args), 0, 0, (sy_call_t *)poll, munge_www, munge_ddd, _SYSCALL_RET_INT_T, 12}, /* 230 = poll */ - {AC(watchevent_args), 0, 0, (sy_call_t *)watchevent, munge_ww, munge_dd, _SYSCALL_RET_INT_T, 8}, /* 231 = watchevent */ - {AC(waitevent_args), 0, 0, (sy_call_t *)waitevent, munge_ww, munge_dd, _SYSCALL_RET_INT_T, 8}, /* 232 = waitevent */ - {AC(modwatch_args), 0, 0, (sy_call_t *)modwatch, munge_ww, munge_dd, _SYSCALL_RET_INT_T, 8}, /* 233 = modwatch */ - {AC(getxattr_args), 0, 0, (sy_call_t *)getxattr, munge_wwwwww, munge_dddddd, _SYSCALL_RET_SSIZE_T, 24}, /* 234 = getxattr */ - {AC(fgetxattr_args), 0, 0, (sy_call_t *)fgetxattr, munge_wwwwww, munge_dddddd, _SYSCALL_RET_SSIZE_T, 24}, /* 235 = fgetxattr */ - {AC(setxattr_args), 0, 0, (sy_call_t *)setxattr, munge_wwwwww, munge_dddddd, _SYSCALL_RET_INT_T, 24}, /* 236 = setxattr */ - {AC(fsetxattr_args), 0, 0, (sy_call_t *)fsetxattr, munge_wwwwww, munge_dddddd, _SYSCALL_RET_INT_T, 24}, /* 237 = fsetxattr */ - {AC(removexattr_args), 0, 0, (sy_call_t *)removexattr, munge_www, munge_ddd, _SYSCALL_RET_INT_T, 12}, /* 238 = removexattr */ - {AC(fremovexattr_args), 0, 0, (sy_call_t *)fremovexattr, munge_www, munge_ddd, _SYSCALL_RET_INT_T, 12}, /* 239 = fremovexattr */ - {AC(listxattr_args), 0, 0, (sy_call_t *)listxattr, munge_wwww, munge_dddd, _SYSCALL_RET_SSIZE_T, 16}, /* 240 = listxattr */ - {AC(flistxattr_args), 0, 0, (sy_call_t *)flistxattr, munge_wwww, munge_dddd, _SYSCALL_RET_SSIZE_T, 16}, /* 241 = flistxattr */ - {AC(fsctl_args), 0, 0, (sy_call_t *)fsctl, munge_wwww, munge_dddd, _SYSCALL_RET_INT_T, 16}, /* 242 = fsctl */ - {AC(initgroups_args), 0, 0, (sy_call_t *)initgroups, munge_www, munge_ddd, _SYSCALL_RET_INT_T, 12}, /* 243 = initgroups */ - {AC(posix_spawn_args), 0, 0, (sy_call_t *)posix_spawn, munge_wwwww, munge_ddddd, _SYSCALL_RET_INT_T, 20}, /* 244 = posix_spawn */ - {0, 0, 0, (sy_call_t *)nosys, NULL, NULL, _SYSCALL_RET_INT_T, 0}, /* 245 = nosys */ - {0, 0, 0, (sy_call_t *)nosys, NULL, NULL, _SYSCALL_RET_INT_T, 0}, /* 246 = nosys */ -#if NFSCLIENT - {AC(nfsclnt_args), 0, 0, (sy_call_t *)nfsclnt, munge_ww, munge_dd, _SYSCALL_RET_INT_T, 8}, /* 247 = nfsclnt */ -#else - {0, 0, 0, (sy_call_t *)nosys, NULL, NULL, _SYSCALL_RET_INT_T, 0}, /* 247 = nosys */ -#endif -#if NFSSERVER - {AC(fhopen_args), 0, 0, (sy_call_t *)fhopen, munge_ww, munge_dd, _SYSCALL_RET_INT_T, 8}, /* 248 = fhopen */ -#else - {0, 0, 0, (sy_call_t *)nosys, NULL, NULL, _SYSCALL_RET_INT_T, 0}, /* 248 = nosys */ -#endif - {0, 0, 0, (sy_call_t *)nosys, NULL, NULL, _SYSCALL_RET_INT_T, 0}, /* 249 = nosys */ - {AC(minherit_args), 0, 0, (sy_call_t *)minherit, munge_www, munge_ddd, _SYSCALL_RET_INT_T, 12}, /* 250 = minherit */ -#if SYSV_SEM - {AC(semsys_args), 0, 0, (sy_call_t *)semsys, munge_wwwww, munge_ddddd, _SYSCALL_RET_INT_T, 20}, /* 251 = semsys */ -#else - {0, 0, 0, (sy_call_t *)nosys, NULL, NULL, _SYSCALL_RET_INT_T, 0}, /* 251 = nosys */ -#endif -#if SYSV_MSG - {AC(msgsys_args), 0, 0, (sy_call_t *)msgsys, munge_wwwww, munge_ddddd, _SYSCALL_RET_INT_T, 20}, /* 252 = msgsys */ -#else - {0, 0, 0, (sy_call_t *)nosys, NULL, NULL, _SYSCALL_RET_INT_T, 0}, /* 252 = nosys */ -#endif -#if SYSV_SHM - {AC(shmsys_args), 0, 0, (sy_call_t *)shmsys, munge_wwww, munge_dddd, _SYSCALL_RET_INT_T, 16}, /* 253 = shmsys */ -#else - {0, 0, 0, (sy_call_t *)nosys, NULL, NULL, _SYSCALL_RET_INT_T, 0}, /* 253 = nosys */ -#endif -#if SYSV_SEM - {AC(semctl_args), 0, 0, (sy_call_t *)semctl, munge_wwww, munge_dddd, _SYSCALL_RET_INT_T, 16}, /* 254 = semctl */ - {AC(semget_args), 0, 0, (sy_call_t *)semget, munge_www, munge_ddd, _SYSCALL_RET_INT_T, 12}, /* 255 = semget */ - {AC(semop_args), 0, 0, (sy_call_t *)semop, munge_www, munge_ddd, _SYSCALL_RET_INT_T, 12}, /* 256 = semop */ - {0, 0, 0, (sy_call_t *)nosys, NULL, NULL, _SYSCALL_RET_INT_T, 0}, /* 257 = nosys */ -#else - {0, 0, 0, (sy_call_t *)nosys, NULL, NULL, _SYSCALL_RET_INT_T, 0}, /* 254 = nosys */ - {0, 0, 0, (sy_call_t *)nosys, NULL, NULL, _SYSCALL_RET_INT_T, 0}, /* 255 = nosys */ - {0, 0, 0, (sy_call_t *)nosys, NULL, NULL, _SYSCALL_RET_INT_T, 0}, /* 256 = nosys */ - {0, 0, 0, (sy_call_t *)nosys, NULL, NULL, _SYSCALL_RET_INT_T, 0}, /* 257 = nosys */ -#endif -#if SYSV_MSG - {AC(msgctl_args), 0, 0, (sy_call_t *)msgctl, munge_www, munge_ddd, _SYSCALL_RET_INT_T, 12}, /* 258 = msgctl */ - {AC(msgget_args), 0, 0, (sy_call_t *)msgget, munge_ww, munge_dd, _SYSCALL_RET_INT_T, 8}, /* 259 = msgget */ - {AC(msgsnd_args), 0, 0, (sy_call_t *)msgsnd, munge_wwww, munge_dddd, _SYSCALL_RET_INT_T, 16}, /* 260 = msgsnd */ - {AC(msgrcv_args), 0, 0, (sy_call_t *)msgrcv, munge_wwwsw, munge_ddddd, _SYSCALL_RET_SSIZE_T, 20}, /* 261 = msgrcv */ -#else - {0, 0, 0, (sy_call_t *)nosys, NULL, NULL, _SYSCALL_RET_INT_T, 0}, /* 258 = nosys */ - {0, 0, 0, (sy_call_t *)nosys, NULL, NULL, _SYSCALL_RET_INT_T, 0}, /* 259 = nosys */ - {0, 0, 0, (sy_call_t *)nosys, NULL, NULL, _SYSCALL_RET_INT_T, 0}, /* 260 = nosys */ - {0, 0, 0, (sy_call_t *)nosys, NULL, NULL, _SYSCALL_RET_INT_T, 0}, /* 261 = nosys */ -#endif -#if SYSV_SHM - {AC(shmat_args), 0, 0, (sy_call_t *)shmat, munge_www, munge_ddd, _SYSCALL_RET_ADDR_T, 12}, /* 262 = shmat */ - {AC(shmctl_args), 0, 0, (sy_call_t *)shmctl, munge_www, munge_ddd, _SYSCALL_RET_INT_T, 12}, /* 263 = shmctl */ - {AC(shmdt_args), 0, 0, (sy_call_t *)shmdt, munge_w, munge_d, _SYSCALL_RET_INT_T, 4}, /* 264 = shmdt */ - {AC(shmget_args), 0, 0, (sy_call_t *)shmget, munge_www, munge_ddd, _SYSCALL_RET_INT_T, 12}, /* 265 = shmget */ -#else - {0, 0, 0, (sy_call_t *)nosys, NULL, NULL, _SYSCALL_RET_INT_T, 0}, /* 262 = nosys */ - {0, 0, 0, (sy_call_t *)nosys, NULL, NULL, _SYSCALL_RET_INT_T, 0}, /* 263 = nosys */ - {0, 0, 0, (sy_call_t *)nosys, NULL, NULL, _SYSCALL_RET_INT_T, 0}, /* 264 = nosys */ - {0, 0, 0, (sy_call_t *)nosys, NULL, NULL, _SYSCALL_RET_INT_T, 0}, /* 265 = nosys */ -#endif - {AC(shm_open_args), 0, 0, (sy_call_t *)shm_open, munge_www, munge_ddd, _SYSCALL_RET_INT_T, 12}, /* 266 = shm_open */ - {AC(shm_unlink_args), 0, 0, (sy_call_t *)shm_unlink, munge_w, munge_d, _SYSCALL_RET_INT_T, 4}, /* 267 = shm_unlink */ - {AC(sem_open_args), 0, 0, (sy_call_t *)sem_open, munge_wwww, munge_dddd, _SYSCALL_RET_ADDR_T, 16}, /* 268 = sem_open */ - {AC(sem_close_args), 0, 0, (sy_call_t *)sem_close, munge_w, munge_d, _SYSCALL_RET_INT_T, 4}, /* 269 = sem_close */ - {AC(sem_unlink_args), 0, 0, (sy_call_t *)sem_unlink, munge_w, munge_d, _SYSCALL_RET_INT_T, 4}, /* 270 = sem_unlink */ - {AC(sem_wait_args), 0, 0, (sy_call_t *)sem_wait, munge_w, munge_d, _SYSCALL_RET_INT_T, 4}, /* 271 = sem_wait */ - {AC(sem_trywait_args), 0, 0, (sy_call_t *)sem_trywait, munge_w, munge_d, _SYSCALL_RET_INT_T, 4}, /* 272 = sem_trywait */ - {AC(sem_post_args), 0, 0, (sy_call_t *)sem_post, munge_w, munge_d, _SYSCALL_RET_INT_T, 4}, /* 273 = sem_post */ - {AC(sem_getvalue_args), 0, 0, (sy_call_t *)sem_getvalue, munge_ww, munge_dd, _SYSCALL_RET_INT_T, 8}, /* 274 = sem_getvalue */ - {AC(sem_init_args), 0, 0, (sy_call_t *)sem_init, munge_www, munge_ddd, _SYSCALL_RET_INT_T, 12}, /* 275 = sem_init */ - {AC(sem_destroy_args), 0, 0, (sy_call_t *)sem_destroy, munge_w, munge_d, _SYSCALL_RET_INT_T, 4}, /* 276 = sem_destroy */ - {AC(open_extended_args), 0, 0, (sy_call_t *)open_extended, munge_wwwwww, munge_dddddd, _SYSCALL_RET_INT_T, 24}, /* 277 = open_extended */ - {AC(umask_extended_args), 0, 0, (sy_call_t *)umask_extended, munge_ww, munge_dd, _SYSCALL_RET_INT_T, 8}, /* 278 = umask_extended */ - {AC(stat_extended_args), 0, 0, (sy_call_t *)stat_extended, munge_wwww, munge_dddd, _SYSCALL_RET_INT_T, 16}, /* 279 = stat_extended */ - {AC(lstat_extended_args), 0, 0, (sy_call_t *)lstat_extended, munge_wwww, munge_dddd, _SYSCALL_RET_INT_T, 16}, /* 280 = lstat_extended */ - {AC(fstat_extended_args), 0, 0, (sy_call_t *)fstat_extended, munge_wwww, munge_dddd, _SYSCALL_RET_INT_T, 16}, /* 281 = fstat_extended */ - {AC(chmod_extended_args), 0, 0, (sy_call_t *)chmod_extended, munge_wwwww, munge_ddddd, _SYSCALL_RET_INT_T, 20}, /* 282 = chmod_extended */ - {AC(fchmod_extended_args), 0, 0, (sy_call_t *)fchmod_extended, munge_wwwww, munge_ddddd, _SYSCALL_RET_INT_T, 20}, /* 283 = fchmod_extended */ - {AC(access_extended_args), 0, 0, (sy_call_t *)access_extended, munge_wwww, munge_dddd, _SYSCALL_RET_INT_T, 16}, /* 284 = access_extended */ - {AC(settid_args), 0, 0, (sy_call_t *)settid, munge_ww, munge_dd, _SYSCALL_RET_INT_T, 8}, /* 285 = settid */ - {AC(gettid_args), 0, 0, (sy_call_t *)gettid, munge_ww, munge_dd, _SYSCALL_RET_INT_T, 8}, /* 286 = gettid */ - {AC(setsgroups_args), 0, 0, (sy_call_t *)setsgroups, munge_ww, munge_dd, _SYSCALL_RET_INT_T, 8}, /* 287 = setsgroups */ - {AC(getsgroups_args), 0, 0, (sy_call_t *)getsgroups, munge_ww, munge_dd, _SYSCALL_RET_INT_T, 8}, /* 288 = getsgroups */ - {AC(setwgroups_args), 0, 0, (sy_call_t *)setwgroups, munge_ww, munge_dd, _SYSCALL_RET_INT_T, 8}, /* 289 = setwgroups */ - {AC(getwgroups_args), 0, 0, (sy_call_t *)getwgroups, munge_ww, munge_dd, _SYSCALL_RET_INT_T, 8}, /* 290 = getwgroups */ - {AC(mkfifo_extended_args), 0, 0, (sy_call_t *)mkfifo_extended, munge_wwwww, munge_ddddd, _SYSCALL_RET_INT_T, 20}, /* 291 = mkfifo_extended */ - {AC(mkdir_extended_args), 0, 0, (sy_call_t *)mkdir_extended, munge_wwwww, munge_ddddd, _SYSCALL_RET_INT_T, 20}, /* 292 = mkdir_extended */ - {AC(identitysvc_args), 0, 0, (sy_call_t *)identitysvc, munge_ww, munge_dd, _SYSCALL_RET_INT_T, 8}, /* 293 = identitysvc */ - {AC(shared_region_check_np_args), 0, 0, (sy_call_t *)shared_region_check_np, munge_w, munge_d, _SYSCALL_RET_INT_T, 4}, /* 294 = shared_region_check_np */ - {AC(shared_region_map_np_args), 0, 0, (sy_call_t *)shared_region_map_np, munge_www, munge_ddd, _SYSCALL_RET_INT_T, 12}, /* 295 = shared_region_map_np */ - {0, 0, 0, (sy_call_t *)nosys, NULL, NULL, _SYSCALL_RET_INT_T, 0}, /* 296 = nosys old load_shared_file */ - {0, 0, 0, (sy_call_t *)nosys, NULL, NULL, _SYSCALL_RET_INT_T, 0}, /* 297 = nosys old reset_shared_file */ - {0, 0, 0, (sy_call_t *)nosys, NULL, NULL, _SYSCALL_RET_INT_T, 0}, /* 298 = nosys old new_system_shared_regions */ - {0, 0, 0, (sy_call_t *)enosys, NULL, NULL, _SYSCALL_RET_INT_T, 0}, /* 299 = enosys old shared_region_map_file_np */ - {0, 0, 0, (sy_call_t *)enosys, NULL, NULL, _SYSCALL_RET_INT_T, 0}, /* 300 = enosys old shared_region_make_private_np */ - {AC(__pthread_mutex_destroy_args), 0, 0, (sy_call_t *)__pthread_mutex_destroy, munge_w, munge_d, _SYSCALL_RET_INT_T, 4}, /* 301 = __pthread_mutex_destroy */ - {AC(__pthread_mutex_init_args), 0, 0, (sy_call_t *)__pthread_mutex_init, munge_ww, munge_dd, _SYSCALL_RET_INT_T, 8}, /* 302 = __pthread_mutex_init */ - {AC(__pthread_mutex_lock_args), 0, 0, (sy_call_t *)__pthread_mutex_lock, munge_w, munge_d, _SYSCALL_RET_INT_T, 4}, /* 303 = __pthread_mutex_lock */ - {AC(__pthread_mutex_trylock_args), 0, 0, (sy_call_t *)__pthread_mutex_trylock, munge_w, munge_d, _SYSCALL_RET_INT_T, 4}, /* 304 = __pthread_mutex_trylock */ - {AC(__pthread_mutex_unlock_args), 0, 0, (sy_call_t *)__pthread_mutex_unlock, munge_w, munge_d, _SYSCALL_RET_INT_T, 4}, /* 305 = __pthread_mutex_unlock */ - {AC(__pthread_cond_init_args), 0, 0, (sy_call_t *)__pthread_cond_init, munge_ww, munge_dd, _SYSCALL_RET_INT_T, 8}, /* 306 = __pthread_cond_init */ - {AC(__pthread_cond_destroy_args), 0, 0, (sy_call_t *)__pthread_cond_destroy, munge_w, munge_d, _SYSCALL_RET_INT_T, 4}, /* 307 = __pthread_cond_destroy */ - {AC(__pthread_cond_broadcast_args), 0, 0, (sy_call_t *)__pthread_cond_broadcast, munge_w, munge_d, _SYSCALL_RET_INT_T, 4}, /* 308 = __pthread_cond_broadcast */ - {AC(__pthread_cond_signal_args), 0, 0, (sy_call_t *)__pthread_cond_signal, munge_w, munge_d, _SYSCALL_RET_INT_T, 4}, /* 309 = __pthread_cond_signal */ - {AC(getsid_args), 0, 0, (sy_call_t *)getsid, munge_w, munge_d, _SYSCALL_RET_INT_T, 4}, /* 310 = getsid */ - {AC(settid_with_pid_args), 0, 0, (sy_call_t *)settid_with_pid, munge_ww, munge_dd, _SYSCALL_RET_INT_T, 8}, /* 311 = settid_with_pid */ - {AC(__pthread_cond_timedwait_args), 0, 0, (sy_call_t *)__pthread_cond_timedwait, munge_www, munge_ddd, _SYSCALL_RET_INT_T, 12}, /* 312 = __pthread_cond_timedwait */ - {AC(aio_fsync_args), 0, 0, (sy_call_t *)aio_fsync, munge_ww, munge_dd, _SYSCALL_RET_INT_T, 8}, /* 313 = aio_fsync */ - {AC(aio_return_args), 0, 0, (sy_call_t *)aio_return, munge_w, munge_d, _SYSCALL_RET_SSIZE_T, 4}, /* 314 = aio_return */ - {AC(aio_suspend_args), 0, 0, (sy_call_t *)aio_suspend, munge_www, munge_ddd, _SYSCALL_RET_INT_T, 12}, /* 315 = aio_suspend */ - {AC(aio_cancel_args), 0, 0, (sy_call_t *)aio_cancel, munge_ww, munge_dd, _SYSCALL_RET_INT_T, 8}, /* 316 = aio_cancel */ - {AC(aio_error_args), 0, 0, (sy_call_t *)aio_error, munge_w, munge_d, _SYSCALL_RET_INT_T, 4}, /* 317 = aio_error */ - {AC(aio_read_args), 0, 0, (sy_call_t *)aio_read, munge_w, munge_d, _SYSCALL_RET_INT_T, 4}, /* 318 = aio_read */ - {AC(aio_write_args), 0, 0, (sy_call_t *)aio_write, munge_w, munge_d, _SYSCALL_RET_INT_T, 4}, /* 319 = aio_write */ - {AC(lio_listio_args), 0, 0, (sy_call_t *)lio_listio, munge_wwww, munge_dddd, _SYSCALL_RET_INT_T, 16}, /* 320 = lio_listio */ - {AC(__pthread_cond_wait_args), 0, 0, (sy_call_t *)__pthread_cond_wait, munge_ww, munge_dd, _SYSCALL_RET_INT_T, 8}, /* 321 = __pthread_cond_wait */ - {AC(iopolicysys_args), 0, 0, (sy_call_t *)iopolicysys, munge_ww, munge_dd, _SYSCALL_RET_INT_T, 8}, /* 322 = iopolicysys */ - {0, 0, 0, (sy_call_t *)nosys, NULL, NULL, _SYSCALL_RET_INT_T, 0}, /* 323 = nosys */ - {AC(mlockall_args), 0, 0, (sy_call_t *)mlockall, munge_w, munge_d, _SYSCALL_RET_INT_T, 4}, /* 324 = mlockall */ - {AC(munlockall_args), 0, 0, (sy_call_t *)munlockall, munge_w, munge_d, _SYSCALL_RET_INT_T, 4}, /* 325 = munlockall */ - {0, 0, 0, (sy_call_t *)nosys, NULL, NULL, _SYSCALL_RET_INT_T, 0}, /* 326 = nosys */ - {0, 0, 0, (sy_call_t *)issetugid, NULL, NULL, _SYSCALL_RET_INT_T, 0}, /* 327 = issetugid */ - {AC(__pthread_kill_args), 0, 0, (sy_call_t *)__pthread_kill, munge_ww, munge_dd, _SYSCALL_RET_INT_T, 8}, /* 328 = __pthread_kill */ - {AC(__pthread_sigmask_args), 0, 0, (sy_call_t *)__pthread_sigmask, munge_www, munge_ddd, _SYSCALL_RET_INT_T, 12}, /* 329 = __pthread_sigmask */ - {AC(__sigwait_args), 0, 0, (sy_call_t *)__sigwait, munge_ww, munge_dd, _SYSCALL_RET_INT_T, 8}, /* 330 = __sigwait */ - {AC(__disable_threadsignal_args), 0, 0, (sy_call_t *)__disable_threadsignal, munge_w, munge_d, _SYSCALL_RET_INT_T, 4}, /* 331 = __disable_threadsignal */ - {AC(__pthread_markcancel_args), 0, 0, (sy_call_t *)__pthread_markcancel, munge_w, munge_d, _SYSCALL_RET_INT_T, 4}, /* 332 = __pthread_markcancel */ - {AC(__pthread_canceled_args), 0, 0, (sy_call_t *)__pthread_canceled, munge_w, munge_d, _SYSCALL_RET_INT_T, 4}, /* 333 = __pthread_canceled */ - {AC(__semwait_signal_args), 0, 0, (sy_call_t *)__semwait_signal, munge_wwwwww, munge_dddddd, _SYSCALL_RET_INT_T, 24}, /* 334 = __semwait_signal */ - {0, 0, 0, (sy_call_t *)nosys, NULL, NULL, _SYSCALL_RET_INT_T, 0}, /* 335 = nosys old utrace */ - {AC(proc_info_args), 0, 0, (sy_call_t *)proc_info, munge_wwwlww, munge_dddddd, _SYSCALL_RET_INT_T, 28}, /* 336 = proc_info */ -#if SENDFILE - {AC(sendfile_args), 0, 0, (sy_call_t *)sendfile, munge_wwlwww, munge_dddddd, _SYSCALL_RET_INT_T, 28}, /* 337 = sendfile */ -#else /* !SENDFILE */ - {0, 0, 0, (sy_call_t *)nosys, NULL, NULL, _SYSCALL_RET_INT_T, 0}, /* 337 = nosys */ -#endif /* SENDFILE */ - {AC(stat64_args), 0, 0, (sy_call_t *)stat64, munge_ww, munge_dd, _SYSCALL_RET_INT_T, 8}, /* 338 = stat64 */ - {AC(fstat64_args), 0, 0, (sy_call_t *)fstat64, munge_ww, munge_dd, _SYSCALL_RET_INT_T, 8}, /* 339 = fstat64 */ - {AC(lstat64_args), 0, 0, (sy_call_t *)lstat64, munge_ww, munge_dd, _SYSCALL_RET_INT_T, 8}, /* 340 = lstat64 */ - {AC(stat64_extended_args), 0, 0, (sy_call_t *)stat64_extended, munge_wwww, munge_dddd, _SYSCALL_RET_INT_T, 16}, /* 341 = stat64_extended */ - {AC(lstat64_extended_args), 0, 0, (sy_call_t *)lstat64_extended, munge_wwww, munge_dddd, _SYSCALL_RET_INT_T, 16}, /* 342 = lstat64_extended */ - {AC(fstat64_extended_args), 0, 0, (sy_call_t *)fstat64_extended, munge_wwww, munge_dddd, _SYSCALL_RET_INT_T, 16}, /* 343 = fstat64_extended */ - {AC(getdirentries64_args), 0, 0, (sy_call_t *)getdirentries64, munge_wwww, munge_dddd, _SYSCALL_RET_SSIZE_T, 16}, /* 344 = getdirentries64 */ - {AC(statfs64_args), 0, 0, (sy_call_t *)statfs64, munge_ww, munge_dd, _SYSCALL_RET_INT_T, 8}, /* 345 = statfs64 */ - {AC(fstatfs64_args), 0, 0, (sy_call_t *)fstatfs64, munge_ww, munge_dd, _SYSCALL_RET_INT_T, 8}, /* 346 = fstatfs64 */ - {AC(getfsstat64_args), 0, 0, (sy_call_t *)getfsstat64, munge_www, munge_ddd, _SYSCALL_RET_INT_T, 12}, /* 347 = getfsstat64 */ - {AC(__pthread_chdir_args), 0, 0, (sy_call_t *)__pthread_chdir, munge_w, munge_d, _SYSCALL_RET_INT_T, 4}, /* 348 = __pthread_chdir */ - {AC(__pthread_fchdir_args), 0, 0, (sy_call_t *)__pthread_fchdir, munge_w, munge_d, _SYSCALL_RET_INT_T, 4}, /* 349 = __pthread_fchdir */ -#if AUDIT - {AC(audit_args), 0, 0, (sy_call_t *)audit, munge_ww, munge_dd, _SYSCALL_RET_INT_T, 8}, /* 350 = audit */ - {AC(auditon_args), 0, 0, (sy_call_t *)auditon, munge_www, munge_ddd, _SYSCALL_RET_INT_T, 12}, /* 351 = auditon */ - {0, 0, 0, (sy_call_t *)nosys, NULL, NULL, _SYSCALL_RET_INT_T, 0}, /* 352 = nosys */ - {AC(getauid_args), 0, 0, (sy_call_t *)getauid, munge_w, munge_d, _SYSCALL_RET_INT_T, 4}, /* 353 = getauid */ - {AC(setauid_args), 0, 0, (sy_call_t *)setauid, munge_w, munge_d, _SYSCALL_RET_INT_T, 4}, /* 354 = setauid */ - {AC(getaudit_args), 0, 0, (sy_call_t *)getaudit, munge_w, munge_d, _SYSCALL_RET_INT_T, 4}, /* 355 = getaudit */ - {AC(setaudit_args), 0, 0, (sy_call_t *)setaudit, munge_w, munge_d, _SYSCALL_RET_INT_T, 4}, /* 356 = setaudit */ - {AC(getaudit_addr_args), 0, 0, (sy_call_t *)getaudit_addr, munge_ww, munge_dd, _SYSCALL_RET_INT_T, 8}, /* 357 = getaudit_addr */ - {AC(setaudit_addr_args), 0, 0, (sy_call_t *)setaudit_addr, munge_ww, munge_dd, _SYSCALL_RET_INT_T, 8}, /* 358 = setaudit_addr */ - {AC(auditctl_args), 0, 0, (sy_call_t *)auditctl, munge_w, munge_d, _SYSCALL_RET_INT_T, 4}, /* 359 = auditctl */ -#else - {0, 0, 0, (sy_call_t *)nosys, NULL, NULL, _SYSCALL_RET_INT_T, 0}, /* 350 = nosys */ - {0, 0, 0, (sy_call_t *)nosys, NULL, NULL, _SYSCALL_RET_INT_T, 0}, /* 351 = nosys */ - {0, 0, 0, (sy_call_t *)nosys, NULL, NULL, _SYSCALL_RET_INT_T, 0}, /* 352 = nosys */ - {0, 0, 0, (sy_call_t *)nosys, NULL, NULL, _SYSCALL_RET_INT_T, 0}, /* 353 = nosys */ - {0, 0, 0, (sy_call_t *)nosys, NULL, NULL, _SYSCALL_RET_INT_T, 0}, /* 354 = nosys */ - {0, 0, 0, (sy_call_t *)nosys, NULL, NULL, _SYSCALL_RET_INT_T, 0}, /* 355 = nosys */ - {0, 0, 0, (sy_call_t *)nosys, NULL, NULL, _SYSCALL_RET_INT_T, 0}, /* 356 = nosys */ - {0, 0, 0, (sy_call_t *)nosys, NULL, NULL, _SYSCALL_RET_INT_T, 0}, /* 357 = nosys */ - {0, 0, 0, (sy_call_t *)nosys, NULL, NULL, _SYSCALL_RET_INT_T, 0}, /* 358 = nosys */ - {0, 0, 0, (sy_call_t *)nosys, NULL, NULL, _SYSCALL_RET_INT_T, 0}, /* 359 = nosys */ -#endif -#if CONFIG_WORKQUEUE - {AC(bsdthread_create_args), 0, 0, (sy_call_t *)bsdthread_create, munge_wwwww, munge_ddddd, _SYSCALL_RET_ADDR_T, 20}, /* 360 = bsdthread_create */ - {AC(bsdthread_terminate_args), 0, 0, (sy_call_t *)bsdthread_terminate, munge_wwww, munge_dddd, _SYSCALL_RET_INT_T, 16}, /* 361 = bsdthread_terminate */ -#else - {0, 0, 0, (sy_call_t *)nosys, NULL, NULL, _SYSCALL_RET_INT_T, 0}, /* 360 = nosys */ - {0, 0, 0, (sy_call_t *)nosys, NULL, NULL, _SYSCALL_RET_INT_T, 0}, /* 361 = nosys */ -#endif - {0, 0, 0, (sy_call_t *)kqueue, NULL, NULL, _SYSCALL_RET_INT_T, 0}, /* 362 = kqueue */ - {AC(kevent_args), 0, 0, (sy_call_t *)kevent, munge_wwwwww, munge_dddddd, _SYSCALL_RET_INT_T, 24}, /* 363 = kevent */ - {AC(lchown_args), 0, 0, (sy_call_t *)lchown, munge_www, munge_ddd, _SYSCALL_RET_INT_T, 12}, /* 364 = lchown */ - {AC(stack_snapshot_args), 0, 0, (sy_call_t *)stack_snapshot, munge_wwww, munge_dddd, _SYSCALL_RET_INT_T, 16}, /* 365 = stack_snapshot */ -#if CONFIG_WORKQUEUE - {AC(bsdthread_register_args), 0, 0, (sy_call_t *)bsdthread_register, munge_www, munge_ddd, _SYSCALL_RET_INT_T, 12}, /* 366 = bsdthread_register */ - {0, 0, 0, (sy_call_t *)workq_open, NULL, NULL, _SYSCALL_RET_INT_T, 0}, /* 367 = workq_open */ - {AC(workq_ops_args), 0, 0, (sy_call_t *)workq_ops, munge_www, munge_ddd, _SYSCALL_RET_INT_T, 12}, /* 368 = workq_ops */ -#else - {0, 0, 0, (sy_call_t *)nosys, NULL, NULL, _SYSCALL_RET_INT_T, 0}, /* 366 = nosys */ - {0, 0, 0, (sy_call_t *)nosys, NULL, NULL, _SYSCALL_RET_INT_T, 0}, /* 367 = nosys */ - {0, 0, 0, (sy_call_t *)nosys, NULL, NULL, _SYSCALL_RET_INT_T, 0}, /* 368 = nosys */ -#endif - {0, 0, 0, (sy_call_t *)nosys, NULL, NULL, _SYSCALL_RET_INT_T, 0}, /* 369 = nosys */ - {0, 0, 0, (sy_call_t *)nosys, NULL, NULL, _SYSCALL_RET_INT_T, 0}, /* 370 = nosys */ - {0, 0, 0, (sy_call_t *)nosys, NULL, NULL, _SYSCALL_RET_INT_T, 0}, /* 371 = nosys */ - {0, 0, 0, (sy_call_t *)nosys, NULL, NULL, _SYSCALL_RET_INT_T, 0}, /* 372 = nosys */ - {0, 0, 0, (sy_call_t *)nosys, NULL, NULL, _SYSCALL_RET_INT_T, 0}, /* 373 = nosys */ - {0, 0, 0, (sy_call_t *)nosys, NULL, NULL, _SYSCALL_RET_INT_T, 0}, /* 374 = nosys */ - {0, 0, 0, (sy_call_t *)nosys, NULL, NULL, _SYSCALL_RET_INT_T, 0}, /* 375 = nosys */ - {0, 0, 0, (sy_call_t *)nosys, NULL, NULL, _SYSCALL_RET_INT_T, 0}, /* 376 = nosys */ - {0, 0, 0, (sy_call_t *)nosys, NULL, NULL, _SYSCALL_RET_INT_T, 0}, /* 377 = nosys */ - {0, 0, 0, (sy_call_t *)nosys, NULL, NULL, _SYSCALL_RET_INT_T, 0}, /* 378 = nosys */ - {0, 0, 0, (sy_call_t *)nosys, NULL, NULL, _SYSCALL_RET_INT_T, 0}, /* 379 = nosys */ - {AC(__mac_execve_args), 0, 0, (sy_call_t *)__mac_execve, munge_wwww, munge_dddd, _SYSCALL_RET_INT_T, 16}, /* 380 = __mac_execve */ - {AC(__mac_syscall_args), 0, 0, (sy_call_t *)__mac_syscall, munge_www, munge_ddd, _SYSCALL_RET_INT_T, 12}, /* 381 = __mac_syscall */ - {AC(__mac_get_file_args), 0, 0, (sy_call_t *)__mac_get_file, munge_ww, munge_dd, _SYSCALL_RET_INT_T, 8}, /* 382 = __mac_get_file */ - {AC(__mac_set_file_args), 0, 0, (sy_call_t *)__mac_set_file, munge_ww, munge_dd, _SYSCALL_RET_INT_T, 8}, /* 383 = __mac_set_file */ - {AC(__mac_get_link_args), 0, 0, (sy_call_t *)__mac_get_link, munge_ww, munge_dd, _SYSCALL_RET_INT_T, 8}, /* 384 = __mac_get_link */ - {AC(__mac_set_link_args), 0, 0, (sy_call_t *)__mac_set_link, munge_ww, munge_dd, _SYSCALL_RET_INT_T, 8}, /* 385 = __mac_set_link */ - {AC(__mac_get_proc_args), 0, 0, (sy_call_t *)__mac_get_proc, munge_w, munge_d, _SYSCALL_RET_INT_T, 4}, /* 386 = __mac_get_proc */ - {AC(__mac_set_proc_args), 0, 0, (sy_call_t *)__mac_set_proc, munge_w, munge_d, _SYSCALL_RET_INT_T, 4}, /* 387 = __mac_set_proc */ - {AC(__mac_get_fd_args), 0, 0, (sy_call_t *)__mac_get_fd, munge_ww, munge_dd, _SYSCALL_RET_INT_T, 8}, /* 388 = __mac_get_fd */ - {AC(__mac_set_fd_args), 0, 0, (sy_call_t *)__mac_set_fd, munge_ww, munge_dd, _SYSCALL_RET_INT_T, 8}, /* 389 = __mac_set_fd */ - {AC(__mac_get_pid_args), 0, 0, (sy_call_t *)__mac_get_pid, munge_ww, munge_dd, _SYSCALL_RET_INT_T, 8}, /* 390 = __mac_get_pid */ - {AC(__mac_get_lcid_args), 0, 0, (sy_call_t *)__mac_get_lcid, munge_ww, munge_dd, _SYSCALL_RET_INT_T, 8}, /* 391 = __mac_get_lcid */ - {AC(__mac_get_lctx_args), 0, 0, (sy_call_t *)__mac_get_lctx, munge_w, munge_d, _SYSCALL_RET_INT_T, 4}, /* 392 = __mac_get_lctx */ - {AC(__mac_set_lctx_args), 0, 0, (sy_call_t *)__mac_set_lctx, munge_w, munge_d, _SYSCALL_RET_INT_T, 4}, /* 393 = __mac_set_lctx */ - {AC(setlcid_args), 0, 0, (sy_call_t *)setlcid, munge_ww, munge_dd, _SYSCALL_RET_INT_T, 8}, /* 394 = setlcid */ - {AC(getlcid_args), 0, 0, (sy_call_t *)getlcid, munge_w, munge_d, _SYSCALL_RET_INT_T, 4}, /* 395 = getlcid */ - {AC(read_nocancel_args), 0, 0, (sy_call_t *)read_nocancel, munge_www, munge_ddd, _SYSCALL_RET_SSIZE_T, 12}, /* 396 = read_nocancel */ - {AC(write_nocancel_args), 0, 0, (sy_call_t *)write_nocancel, munge_www, munge_ddd, _SYSCALL_RET_SSIZE_T, 12}, /* 397 = write_nocancel */ - {AC(open_nocancel_args), 0, 0, (sy_call_t *)open_nocancel, munge_www, munge_ddd, _SYSCALL_RET_INT_T, 12}, /* 398 = open_nocancel */ - {AC(close_nocancel_args), 0, 0, (sy_call_t *)close_nocancel, munge_w, munge_d, _SYSCALL_RET_INT_T, 4}, /* 399 = close_nocancel */ - {AC(wait4_nocancel_args), 0, 0, (sy_call_t *)wait4_nocancel, munge_wwww, munge_dddd, _SYSCALL_RET_INT_T, 16}, /* 400 = wait4_nocancel */ -#if SOCKETS - {AC(recvmsg_nocancel_args), 0, 0, (sy_call_t *)recvmsg_nocancel, munge_www, munge_ddd, _SYSCALL_RET_INT_T, 12}, /* 401 = recvmsg_nocancel */ - {AC(sendmsg_nocancel_args), 0, 0, (sy_call_t *)sendmsg_nocancel, munge_www, munge_ddd, _SYSCALL_RET_INT_T, 12}, /* 402 = sendmsg_nocancel */ - {AC(recvfrom_nocancel_args), 0, 0, (sy_call_t *)recvfrom_nocancel, munge_wwwwww, munge_dddddd, _SYSCALL_RET_INT_T, 24}, /* 403 = recvfrom_nocancel */ - {AC(accept_nocancel_args), 0, 0, (sy_call_t *)accept_nocancel, munge_www, munge_ddd, _SYSCALL_RET_INT_T, 12}, /* 404 = accept_nocancel */ -#else - {0, 0, 0, (sy_call_t *)nosys, NULL, NULL, _SYSCALL_RET_INT_T, 0}, /* 401 = nosys */ - {0, 0, 0, (sy_call_t *)nosys, NULL, NULL, _SYSCALL_RET_INT_T, 0}, /* 402 = nosys */ - {0, 0, 0, (sy_call_t *)nosys, NULL, NULL, _SYSCALL_RET_INT_T, 0}, /* 403 = nosys */ - {0, 0, 0, (sy_call_t *)nosys, NULL, NULL, _SYSCALL_RET_INT_T, 0}, /* 404 = nosys */ -#endif /* SOCKETS */ - {AC(msync_nocancel_args), 0, 0, (sy_call_t *)msync_nocancel, munge_www, munge_ddd, _SYSCALL_RET_INT_T, 12}, /* 405 = msync_nocancel */ - {AC(fcntl_nocancel_args), 0, 0, (sy_call_t *)fcntl_nocancel, munge_wws, munge_ddd, _SYSCALL_RET_INT_T, 12}, /* 406 = fcntl_nocancel */ - {AC(select_nocancel_args), 0, 0, (sy_call_t *)select_nocancel, munge_wwwww, munge_ddddd, _SYSCALL_RET_INT_T, 20}, /* 407 = select_nocancel */ - {AC(fsync_nocancel_args), 0, 0, (sy_call_t *)fsync_nocancel, munge_w, munge_d, _SYSCALL_RET_INT_T, 4}, /* 408 = fsync_nocancel */ -#if SOCKETS - {AC(connect_nocancel_args), 0, 0, (sy_call_t *)connect_nocancel, munge_www, munge_ddd, _SYSCALL_RET_INT_T, 12}, /* 409 = connect_nocancel */ -#else - {0, 0, 0, (sy_call_t *)nosys, NULL, NULL, _SYSCALL_RET_INT_T, 0}, /* 409 = nosys */ -#endif /* SOCKETS */ - {AC(sigsuspend_nocancel_args), 0, 0, (sy_call_t *)sigsuspend_nocancel, munge_w, munge_d, _SYSCALL_RET_INT_T, 4}, /* 410 = sigsuspend_nocancel */ - {AC(readv_nocancel_args), 0, 0, (sy_call_t *)readv_nocancel, munge_www, munge_ddd, _SYSCALL_RET_SSIZE_T, 12}, /* 411 = readv_nocancel */ - {AC(writev_nocancel_args), 0, 0, (sy_call_t *)writev_nocancel, munge_www, munge_ddd, _SYSCALL_RET_SSIZE_T, 12}, /* 412 = writev_nocancel */ -#if SOCKETS - {AC(sendto_nocancel_args), 0, 0, (sy_call_t *)sendto_nocancel, munge_wwwwww, munge_dddddd, _SYSCALL_RET_INT_T, 24}, /* 413 = sendto_nocancel */ -#else - {0, 0, 0, (sy_call_t *)nosys, NULL, NULL, _SYSCALL_RET_INT_T, 0}, /* 413 = nosys */ -#endif /* SOCKETS */ - {AC(pread_nocancel_args), 0, 0, (sy_call_t *)pread_nocancel, munge_wwwl, munge_dddd, _SYSCALL_RET_SSIZE_T, 20}, /* 414 = pread_nocancel */ - {AC(pwrite_nocancel_args), 0, 0, (sy_call_t *)pwrite_nocancel, munge_wwwl, munge_dddd, _SYSCALL_RET_SSIZE_T, 20}, /* 415 = pwrite_nocancel */ - {AC(waitid_nocancel_args), 0, 0, (sy_call_t *)waitid_nocancel, munge_wwww, munge_dddd, _SYSCALL_RET_INT_T, 16}, /* 416 = waitid_nocancel */ - {AC(poll_nocancel_args), 0, 0, (sy_call_t *)poll_nocancel, munge_www, munge_ddd, _SYSCALL_RET_INT_T, 12}, /* 417 = poll_nocancel */ -#if SYSV_MSG - {AC(msgsnd_nocancel_args), 0, 0, (sy_call_t *)msgsnd_nocancel, munge_wwww, munge_dddd, _SYSCALL_RET_INT_T, 16}, /* 418 = msgsnd_nocancel */ - {AC(msgrcv_nocancel_args), 0, 0, (sy_call_t *)msgrcv_nocancel, munge_wwwsw, munge_ddddd, _SYSCALL_RET_SSIZE_T, 20}, /* 419 = msgrcv_nocancel */ -#else - {0, 0, 0, (sy_call_t *)nosys, NULL, NULL, _SYSCALL_RET_INT_T, 0}, /* 418 = nosys */ - {0, 0, 0, (sy_call_t *)nosys, NULL, NULL, _SYSCALL_RET_INT_T, 0}, /* 419 = nosys */ -#endif - {AC(sem_wait_nocancel_args), 0, 0, (sy_call_t *)sem_wait_nocancel, munge_w, munge_d, _SYSCALL_RET_INT_T, 4}, /* 420 = sem_wait_nocancel */ - {AC(aio_suspend_nocancel_args), 0, 0, (sy_call_t *)aio_suspend_nocancel, munge_www, munge_ddd, _SYSCALL_RET_INT_T, 12}, /* 421 = aio_suspend_nocancel */ - {AC(__sigwait_nocancel_args), 0, 0, (sy_call_t *)__sigwait_nocancel, munge_ww, munge_dd, _SYSCALL_RET_INT_T, 8}, /* 422 = __sigwait_nocancel */ - {AC(__semwait_signal_nocancel_args), 0, 0, (sy_call_t *)__semwait_signal_nocancel, munge_wwwwww, munge_dddddd, _SYSCALL_RET_INT_T, 24}, /* 423 = __semwait_signal_nocancel */ - {AC(__mac_mount_args), 0, 0, (sy_call_t *)__mac_mount, munge_wwwww, munge_ddddd, _SYSCALL_RET_INT_T, 20}, /* 424 = __mac_mount */ - {AC(__mac_get_mount_args), 0, 0, (sy_call_t *)__mac_get_mount, munge_ww, munge_dd, _SYSCALL_RET_INT_T, 8}, /* 425 = __mac_get_mount */ - {AC(__mac_getfsstat_args), 0, 0, (sy_call_t *)__mac_getfsstat, munge_wwwww, munge_ddddd, _SYSCALL_RET_INT_T, 20}, /* 426 = __mac_getfsstat */ -}; -int nsysent = sizeof(sysent) / sizeof(sysent[0]); -/* Verify that NUM_SYSENT reflects the latest syscall count */ -int nsysent_size_check[((sizeof(sysent) / sizeof(sysent[0])) == NUM_SYSENT) ? 1 : -1] __unused; diff --git a/bsd/kern/kdebug.c b/bsd/kern/kdebug.c index ee4b63f40..ee97c249c 100644 --- a/bsd/kern/kdebug.c +++ b/bsd/kern/kdebug.c @@ -37,6 +37,9 @@ #include #include +#if defined(__i386__) || defined(__x86_64__) +#include +#endif #include #include #include @@ -44,8 +47,13 @@ #include #include +#include #include +#include +#include +#include + #include /* for host_info() */ #include @@ -54,11 +62,11 @@ void task_act_iterate_wth_args(task_t, void(*)(thread_t, void *), void *); int cpu_number(void); /* XXX include path broken */ /* XXX should probably be static, but it's debugging code... */ -int kdbg_read(user_addr_t, size_t *); +int kdbg_read(user_addr_t, size_t *, vnode_t, vfs_context_t); void kdbg_control_chud(int, void *); int kdbg_control(int *, u_int, user_addr_t, size_t *); int kdbg_getentropy (user_addr_t, size_t *, int); -int kdbg_readmap(user_addr_t, size_t *); +int kdbg_readmap(user_addr_t, size_t *, vnode_t, vfs_context_t); int kdbg_getreg(kd_regtype *); int kdbg_setreg(kd_regtype *); int kdbg_setrtcdec(kd_regtype *); @@ -85,38 +93,59 @@ uint64_t * kd_entropy_buffer = 0; unsigned int kd_entropy_bufsize = 0; unsigned int kd_entropy_count = 0; unsigned int kd_entropy_indx = 0; -unsigned int kd_entropy_buftomem = 0; +vm_offset_t kd_entropy_buftomem = 0; #define SLOW_NOLOG 0x01 #define SLOW_CHECKS 0x02 #define SLOW_ENTROPY 0x04 -unsigned int kdebug_slowcheck=SLOW_NOLOG; +unsigned int kdebug_slowcheck = SLOW_NOLOG; unsigned int kd_cpus; -struct kd_bufinfo { - kd_buf * kd_stop; - kd_buf * kd_bufptr; - kd_buf * kd_buffer; - kd_buf * kd_buflast; - kd_buf * kd_readlast; - int kd_wrapped; /* plus, the global flag KDBG_WRAPPED is set if one of the buffers has wrapped */ - uint64_t kd_prev_timebase; - int kd_pad[24]; /* pad out to 128 bytes so that no cache line is shared between CPUs */ +#define EVENTS_PER_STORAGE_UNIT 2048 +#define MIN_STORAGE_UNITS_PER_CPU 4 + +struct kd_storage { + struct kd_storage *kds_next; + kd_buf *kds_bufptr; + kd_buf *kds_buflast; + kd_buf *kds_readlast; + kd_buf kds_records[EVENTS_PER_STORAGE_UNIT]; }; +#define MAX_BUFFER_SIZE (1024 * 1024 * 128) +#define N_STORAGE_UNITS_PER_BUFFER (MAX_BUFFER_SIZE / sizeof(struct kd_storage)) + + +struct kd_storage_buffers { + struct kd_storage *kdsb_addr; + uint32_t kdsb_size; +}; + + +struct kd_storage *kds_free_list = NULL; +struct kd_storage_buffers *kd_bufs = NULL; +int n_storage_units = 0; +int n_storage_buffers = 0; + +struct kd_bufinfo { + struct kd_storage *kd_list_head; + struct kd_storage *kd_list_tail; + struct kd_storage *kd_active; + uint64_t kd_prev_timebase; +} __attribute__(( aligned(CPU_CACHE_SIZE) )); + struct kd_bufinfo *kdbip = NULL; -#define KDCOPYBUF_COUNT 1024 +#define KDCOPYBUF_COUNT 2048 #define KDCOPYBUF_SIZE (KDCOPYBUF_COUNT * sizeof(kd_buf)) kd_buf *kdcopybuf = NULL; unsigned int nkdbufs = 8192; -unsigned int kd_bufsize = 0; unsigned int kdebug_flags = 0; unsigned int kdlog_beg=0; unsigned int kdlog_end=0; @@ -125,6 +154,7 @@ unsigned int kdlog_value2=0; unsigned int kdlog_value3=0; unsigned int kdlog_value4=0; +static lck_spin_t * kds_spin_lock; static lck_mtx_t * kd_trace_mtx_sysctl; static lck_grp_t * kd_trace_mtx_sysctl_grp; static lck_attr_t * kd_trace_mtx_sysctl_attr; @@ -138,7 +168,7 @@ static lck_mtx_t stackshot_subsys_mutex; void *stackshot_snapbuf = NULL; int -stack_snapshot2(pid_t pid, user_addr_t tracebuf, uint32_t tracebuf_size, uint32_t options, register_t *retval); +stack_snapshot2(pid_t pid, user_addr_t tracebuf, uint32_t tracebuf_size, uint32_t options, int32_t *retval); extern void kdp_snapshot_preflight(int pid, void *tracebuf, uint32_t tracebuf_size, uint32_t options); @@ -151,7 +181,9 @@ kdp_stack_snapshot_bytes_traced(void); kd_threadmap *kd_mapptr = 0; unsigned int kd_mapsize = 0; unsigned int kd_mapcount = 0; -unsigned int kd_maptomem = 0; +vm_offset_t kd_maptomem = 0; + +off_t RAW_file_offset = 0; pid_t global_state_pid = -1; /* Used to control exclusive use of kd_buffer */ @@ -178,9 +210,9 @@ struct krt typedef struct krt krt_t; /* This is for the CHUD toolkit call */ -typedef void (*kd_chudhook_fn) (unsigned int debugid, unsigned int arg1, - unsigned int arg2, unsigned int arg3, - unsigned int arg4, unsigned int arg5); +typedef void (*kd_chudhook_fn) (uint32_t debugid, uintptr_t arg1, + uintptr_t arg2, uintptr_t arg3, + uintptr_t arg4, uintptr_t arg5); kd_chudhook_fn kdebug_chudhook = 0; /* pointer to CHUD toolkit function */ @@ -188,7 +220,7 @@ __private_extern__ void stackshot_lock_init( void ) __attribute__((section("__TE /* Support syscall SYS_kdebug_trace */ int -kdebug_trace(__unused struct proc *p, struct kdebug_trace_args *uap, __unused register_t *retval) +kdebug_trace(__unused struct proc *p, struct kdebug_trace_args *uap, __unused int32_t *retval) { if ( (kdebug_enable == 0) ) return(EINVAL); @@ -197,146 +229,322 @@ kdebug_trace(__unused struct proc *p, struct kdebug_trace_args *uap, __unused re return(0); } + static int create_buffers(void) { - unsigned int cpu, i; - int nentries; + int i; + int p_buffer_size; + int f_buffer_size; + int f_buffers; + int error = 0; + + if (nkdbufs < (kd_cpus * EVENTS_PER_STORAGE_UNIT * MIN_STORAGE_UNITS_PER_CPU)) + n_storage_units = kd_cpus * MIN_STORAGE_UNITS_PER_CPU; + else + n_storage_units = nkdbufs / EVENTS_PER_STORAGE_UNIT; - nentries = nkdbufs / kd_cpus; - nkdbufs = nentries * kd_cpus; + nkdbufs = n_storage_units * EVENTS_PER_STORAGE_UNIT; - kd_bufsize = nentries * sizeof(kd_buf); + f_buffers = n_storage_units / N_STORAGE_UNITS_PER_BUFFER; + n_storage_buffers = f_buffers; - bzero((char *)kdbip, sizeof(struct kd_bufinfo) * kd_cpus); + f_buffer_size = N_STORAGE_UNITS_PER_BUFFER * sizeof(struct kd_storage); + p_buffer_size = (n_storage_units % N_STORAGE_UNITS_PER_BUFFER) * sizeof(struct kd_storage); + + if (p_buffer_size) + n_storage_buffers++; + + kd_bufs = NULL; if (kdcopybuf == 0) { - if (kmem_alloc(kernel_map, (unsigned int *)&kdcopybuf, (vm_size_t)KDCOPYBUF_SIZE) != KERN_SUCCESS) - return(ENOMEM); + if (kmem_alloc(kernel_map, (vm_offset_t *)&kdcopybuf, (vm_size_t)KDCOPYBUF_SIZE) != KERN_SUCCESS) { + error = ENOSPC; + goto out; + } } - for (cpu = 0; cpu < kd_cpus; cpu++) { - if (kmem_alloc(kernel_map, (unsigned int *)&kdbip[cpu].kd_buffer, kd_bufsize) != KERN_SUCCESS) - break; + if (kmem_alloc(kernel_map, (vm_offset_t *)&kd_bufs, (vm_size_t)(n_storage_buffers * sizeof(struct kd_storage_buffers))) != KERN_SUCCESS) { + error = ENOSPC; + goto out; } - if (cpu < kd_cpus) { - for (i = 0; i < cpu; i++) - kmem_free(kernel_map, (vm_offset_t)kdbip[i].kd_buffer, kd_bufsize); - kd_bufsize = 0; + bzero(kd_bufs, n_storage_buffers * sizeof(struct kd_storage_buffers)); - kmem_free(kernel_map, (vm_offset_t)kdcopybuf, KDCOPYBUF_SIZE); - kdcopybuf = NULL; - - return(ENOMEM); + for (i = 0; i < f_buffers; i++) { + if (kmem_alloc(kernel_map, (vm_offset_t *)&kd_bufs[i].kdsb_addr, (vm_size_t)f_buffer_size) != KERN_SUCCESS) { + error = ENOSPC; + goto out; + } + kd_bufs[i].kdsb_size = f_buffer_size; } - for (cpu = 0; cpu < kd_cpus; cpu++) { - kdbip[cpu].kd_bufptr = kdbip[cpu].kd_buffer; - kdbip[cpu].kd_buflast = &kdbip[cpu].kd_bufptr[nentries]; - kdbip[cpu].kd_readlast = kdbip[cpu].kd_bufptr; + if (p_buffer_size) { + if (kmem_alloc(kernel_map, (vm_offset_t *)&kd_bufs[i].kdsb_addr, (vm_size_t)p_buffer_size) != KERN_SUCCESS) { + error = ENOSPC; + goto out; + } + kd_bufs[i].kdsb_size = p_buffer_size; + } + + for (i = 0; i < n_storage_buffers; i++) { + struct kd_storage *kds; + int n_elements; + int n; + + n_elements = kd_bufs[i].kdsb_size / sizeof(struct kd_storage); + kds = kd_bufs[i].kdsb_addr; + + for (n = 0; n < n_elements; n++) { + kds[n].kds_next = kds_free_list; + kds_free_list = &kds[n]; + + kds[n].kds_buflast = &kds[n].kds_records[EVENTS_PER_STORAGE_UNIT]; + } } + bzero((char *)kdbip, sizeof(struct kd_bufinfo) * kd_cpus); + kdebug_flags |= KDBG_BUFINIT; +out: + if (error) + delete_buffers(); - return(0); + return(error); } static void delete_buffers(void) { - unsigned int cpu; + int i; + + if (kd_bufs) { + for (i = 0; i < n_storage_buffers; i++) { + if (kd_bufs[i].kdsb_addr) + kmem_free(kernel_map, (vm_offset_t)kd_bufs[i].kdsb_addr, (vm_size_t)kd_bufs[i].kdsb_size); + } + kmem_free(kernel_map, (vm_offset_t)kd_bufs, (vm_size_t)(n_storage_buffers * sizeof(struct kd_storage_buffers))); - if (kd_bufsize && (kdebug_flags & KDBG_BUFINIT)) { - for (cpu = 0; cpu < kd_cpus; cpu++) - kmem_free(kernel_map, (vm_offset_t)kdbip[cpu].kd_buffer, kd_bufsize); - kd_bufsize = 0; + kd_bufs = NULL; + n_storage_buffers = 0; } if (kdcopybuf) { kmem_free(kernel_map, (vm_offset_t)kdcopybuf, KDCOPYBUF_SIZE); + kdcopybuf = NULL; } + kds_free_list = NULL; + kdebug_flags &= ~KDBG_BUFINIT; } static void -kernel_debug_internal(unsigned int debugid, unsigned int arg1, unsigned int arg2, unsigned int arg3, - unsigned int arg4, unsigned int arg5, int entropy_flag) +release_storage_unit(struct kd_bufinfo *kdbp, struct kd_storage *kdsp) { - int s; - kd_buf * kd; - struct proc *curproc; - unsigned long long now; - int cpu; + + int s = 0; + s = ml_set_interrupts_enabled(FALSE); + lck_spin_lock(kds_spin_lock); + + if (kdsp == kdbp->kd_list_head) { + /* + * its possible for the storage unit pointed to + * by kdsp to have already been stolen... so + * check to see if its still the head of the list + * now that we're behind the lock that protects + * adding and removing from the queue... + * since we only ever release and steal units from + * that position, if its no longer the head + * we having nothing to do in this context + */ + kdbp->kd_list_head = kdsp->kds_next; + kdsp->kds_next = kds_free_list; + kds_free_list = kdsp; + } + lck_spin_unlock(kds_spin_lock); + ml_set_interrupts_enabled(s); +} + + +/* + * Interrupts are disabled when we enter this routine. + */ +static struct kd_storage * +allocate_storage_unit(struct kd_bufinfo *kdbp) +{ + struct kd_storage *kdsp; + struct kd_bufinfo *kdbp_vict, *kdbp_try; + uint64_t oldest_ts, ts; + + lck_spin_lock(kds_spin_lock); + + if ((kdsp = kds_free_list)) + kds_free_list = kdsp->kds_next; + else { + if (kdebug_flags & KDBG_NOWRAP) { + kdebug_slowcheck |= SLOW_NOLOG; + goto out; + } + kdbp_vict = NULL; + oldest_ts = (uint64_t)-1; + + for (kdbp_try = &kdbip[0]; kdbp_try < &kdbip[kd_cpus]; kdbp_try++) { + + if ((kdsp = kdbp_try->kd_list_head) == NULL) { + /* + * no storage unit to steal + */ + continue; + } + if (kdsp == kdbp_try->kd_active) { + /* + * make sure we don't steal the storage unit + * being actively recorded to... this state + * also implies that this is the only unit assigned + * to this CPU, so we can immediately move on + */ + continue; + } + ts = kdbg_get_timestamp(&(kdbp_try->kd_list_head->kds_records[0])); + + if (ts < oldest_ts) { + /* + * when 'wrapping', we want to steal the + * storage unit that has the 'earliest' time + * associated with it (first event time) + */ + oldest_ts = ts; + kdbp_vict = kdbp_try; + } + } +#if 1 + if (kdbp_vict == NULL) { + kdebug_enable = 0; + + panic("allocate_storage_unit: no storage units available\n"); + } +#endif + kdsp = kdbp_vict->kd_list_head; + + kdbp_vict->kd_list_head = kdsp->kds_next; + + kdebug_flags |= KDBG_WRAPPED; + } + kdsp->kds_next = NULL; + kdsp->kds_bufptr = &kdsp->kds_records[0]; + kdsp->kds_readlast = kdsp->kds_bufptr; + + if (kdbp->kd_list_head == NULL) + kdbp->kd_list_head = kdsp; + else + kdbp->kd_list_tail->kds_next = kdsp; + kdbp->kd_list_tail = kdsp; +out: + lck_spin_unlock(kds_spin_lock); + + return (kdsp); +} + + + +static void +kernel_debug_internal( + uint32_t debugid, + uintptr_t arg1, + uintptr_t arg2, + uintptr_t arg3, + uintptr_t arg4, + uintptr_t arg5, + int entropy_flag) +{ + struct proc *curproc; + uint64_t now; + int s; + kd_buf *kd; + int cpu; + struct kd_bufinfo *kdbp; + struct kd_storage *kdsp; + s = ml_set_interrupts_enabled(FALSE); now = mach_absolute_time() & KDBG_TIMESTAMP_MASK; cpu = cpu_number(); if (kdebug_enable & KDEBUG_ENABLE_CHUD) { - if (kdebug_chudhook) - kdebug_chudhook(debugid, arg1, arg2, arg3, arg4, arg5); + if (kdebug_chudhook) + kdebug_chudhook(debugid, arg1, arg2, arg3, arg4, arg5); - if ( !(kdebug_enable & (KDEBUG_ENABLE_ENTROPY | KDEBUG_ENABLE_TRACE))) - goto out; + if ( !(kdebug_enable & (KDEBUG_ENABLE_ENTROPY | KDEBUG_ENABLE_TRACE))) + goto out; } - if (kdebug_slowcheck == 0) - goto record_trace; - - if (entropy_flag && (kdebug_enable & KDEBUG_ENABLE_ENTROPY)) - { - if (kd_entropy_indx < kd_entropy_count) - { - kd_entropy_buffer [ kd_entropy_indx] = mach_absolute_time(); - kd_entropy_indx++; - } - - if (kd_entropy_indx == kd_entropy_count) - { - /* Disable entropy collection */ - kdebug_enable &= ~KDEBUG_ENABLE_ENTROPY; - kdebug_slowcheck &= ~SLOW_ENTROPY; - } - } + goto record_trace; + if (entropy_flag && (kdebug_enable & KDEBUG_ENABLE_ENTROPY)) { + if (kd_entropy_indx < kd_entropy_count) { + kd_entropy_buffer [ kd_entropy_indx] = mach_absolute_time(); + kd_entropy_indx++; + } + + if (kd_entropy_indx == kd_entropy_count) { + /* + * Disable entropy collection + */ + kdebug_enable &= ~KDEBUG_ENABLE_ENTROPY; + kdebug_slowcheck &= ~SLOW_ENTROPY; + } + } if ( (kdebug_slowcheck & SLOW_NOLOG) ) - goto out; - - if (kdebug_flags & KDBG_PIDCHECK) - { - /* If kdebug flag is not set for current proc, return */ - curproc = current_proc(); - if ((curproc && !(curproc->p_kdebug)) && - ((debugid&0xffff0000) != (MACHDBG_CODE(DBG_MACH_SCHED, 0) | DBG_FUNC_NONE))) - goto out; - } - else if (kdebug_flags & KDBG_PIDEXCLUDE) - { - /* If kdebug flag is set for current proc, return */ - curproc = current_proc(); - if ((curproc && curproc->p_kdebug) && - ((debugid&0xffff0000) != (MACHDBG_CODE(DBG_MACH_SCHED, 0) | DBG_FUNC_NONE))) goto out; - } + + if (kdebug_flags & KDBG_PIDCHECK) { + /* + * If kdebug flag is not set for current proc, return + */ + curproc = current_proc(); - if (kdebug_flags & KDBG_RANGECHECK) - { - if ((debugid < kdlog_beg) - || ((debugid >= kdlog_end) && (debugid >> 24 != DBG_TRACE))) - goto out; - } - else if (kdebug_flags & KDBG_VALCHECK) - { - if ((debugid & DBG_FUNC_MASK) != kdlog_value1 && - (debugid & DBG_FUNC_MASK) != kdlog_value2 && - (debugid & DBG_FUNC_MASK) != kdlog_value3 && - (debugid & DBG_FUNC_MASK) != kdlog_value4 && - (debugid >> 24 != DBG_TRACE)) - goto out; - } + if ((curproc && !(curproc->p_kdebug)) && + ((debugid & 0xffff0000) != (MACHDBG_CODE(DBG_MACH_SCHED, 0) | DBG_FUNC_NONE))) + goto out; + } + else if (kdebug_flags & KDBG_PIDEXCLUDE) { + /* + * If kdebug flag is set for current proc, return + */ + curproc = current_proc(); + + if ((curproc && curproc->p_kdebug) && + ((debugid & 0xffff0000) != (MACHDBG_CODE(DBG_MACH_SCHED, 0) | DBG_FUNC_NONE))) + goto out; + } + if (kdebug_flags & KDBG_RANGECHECK) { + if ((debugid < kdlog_beg) + || ((debugid >= kdlog_end) && (debugid >> 24 != DBG_TRACE))) + goto out; + } + else if (kdebug_flags & KDBG_VALCHECK) { + if ((debugid & DBG_FUNC_MASK) != kdlog_value1 && + (debugid & DBG_FUNC_MASK) != kdlog_value2 && + (debugid & DBG_FUNC_MASK) != kdlog_value3 && + (debugid & DBG_FUNC_MASK) != kdlog_value4 && + (debugid >> 24 != DBG_TRACE)) + goto out; + } record_trace: - kd = kdbip[cpu].kd_bufptr; + kdbp = &kdbip[cpu]; + + if ((kdsp = kdbp->kd_active) == NULL) { + if ((kdsp = allocate_storage_unit(kdbp)) == NULL) { + /* + * this can only happen if wrapping + * has been disabled + */ + goto out; + } + kdbp->kd_active = kdsp; + } + kd = kdsp->kds_bufptr; + kd->debugid = debugid; kd->arg1 = arg1; kd->arg2 = arg2; @@ -344,48 +552,36 @@ kernel_debug_internal(unsigned int debugid, unsigned int arg1, unsigned int arg2 kd->arg4 = arg4; kd->arg5 = arg5; - /* - * Watch for out of order timestamps - */ - if (now < kdbip[cpu].kd_prev_timebase) - { - /* - * if so, just store the previous timestamp + a cycle - */ - now = ++kdbip[cpu].kd_prev_timebase & KDBG_TIMESTAMP_MASK; - } - else - { - kdbip[cpu].kd_prev_timebase = now; - } - kd->timestamp = now | (((uint64_t)cpu) << KDBG_CPU_SHIFT); - - kdbip[cpu].kd_bufptr++; + kdbg_set_timestamp_and_cpu(kd, now, cpu); - if (kdbip[cpu].kd_bufptr >= kdbip[cpu].kd_buflast) - kdbip[cpu].kd_bufptr = kdbip[cpu].kd_buffer; - - if (kdbip[cpu].kd_bufptr == kdbip[cpu].kd_readlast) { - if (kdebug_flags & KDBG_NOWRAP) - kdebug_slowcheck |= SLOW_NOLOG; - kdbip[cpu].kd_wrapped = 1; - kdebug_flags |= KDBG_WRAPPED; - } + kdsp->kds_bufptr++; + if (kdsp->kds_bufptr >= kdsp->kds_buflast) + kdbp->kd_active = NULL; out: ml_set_interrupts_enabled(s); } void -kernel_debug(unsigned int debugid, unsigned int arg1, unsigned int arg2, unsigned int arg3, - unsigned int arg4, __unused unsigned int arg5) +kernel_debug( + uint32_t debugid, + uintptr_t arg1, + uintptr_t arg2, + uintptr_t arg3, + uintptr_t arg4, + __unused uintptr_t arg5) { - kernel_debug_internal(debugid, arg1, arg2, arg3, arg4, (int)current_thread(), 1); + kernel_debug_internal(debugid, arg1, arg2, arg3, arg4, (uintptr_t)thread_tid(current_thread()), 1); } void -kernel_debug1(unsigned int debugid, unsigned int arg1, unsigned int arg2, unsigned int arg3, - unsigned int arg4, unsigned int arg5) +kernel_debug1( + uint32_t debugid, + uintptr_t arg1, + uintptr_t arg2, + uintptr_t arg3, + uintptr_t arg4, + uintptr_t arg5) { kernel_debug_internal(debugid, arg1, arg2, arg3, arg4, arg5, 0); } @@ -404,7 +600,7 @@ kdbg_lock_init(void) host_info((host_t)BSD_HOST, HOST_BASIC_INFO, (host_info_t)&hinfo, &count); kd_cpus = hinfo.logical_cpu_max; - if (kmem_alloc(kernel_map, (unsigned int *)&kdbip, + if (kmem_alloc(kernel_map, (vm_offset_t *)&kdbip, sizeof(struct kd_bufinfo) * kd_cpus) != KERN_SUCCESS) return; @@ -424,6 +620,7 @@ kdbg_lock_init(void) * allocate and initialize spin lock and mutex */ kd_trace_mtx_sysctl = lck_mtx_alloc_init(kd_trace_mtx_sysctl_grp, kd_trace_mtx_sysctl_attr); + kds_spin_lock = lck_spin_alloc_init(kd_trace_mtx_sysctl_grp, kd_trace_mtx_sysctl_attr); kdebug_flags |= KDBG_LOCKINIT; } @@ -440,138 +637,133 @@ kdbg_bootstrap(void) int kdbg_reinit(void) { - int ret=0; - - /* - * Disable trace collecting - * First make sure we're not in - * the middle of cutting a trace - */ - - kdebug_enable &= ~KDEBUG_ENABLE_TRACE; - kdebug_slowcheck |= SLOW_NOLOG; + int ret = 0; - /* - * make sure the SLOW_NOLOG is seen - * by everyone that might be trying - * to cut a trace.. - */ - IOSleep(100); + /* + * Disable trace collecting + * First make sure we're not in + * the middle of cutting a trace + */ + kdebug_enable &= ~KDEBUG_ENABLE_TRACE; + kdebug_slowcheck |= SLOW_NOLOG; - delete_buffers(); + /* + * make sure the SLOW_NOLOG is seen + * by everyone that might be trying + * to cut a trace.. + */ + IOSleep(100); - if ((kdebug_flags & KDBG_MAPINIT) && kd_mapsize && kd_mapptr) - { - kmem_free(kernel_map, (vm_offset_t)kd_mapptr, kd_mapsize); - kdebug_flags &= ~KDBG_MAPINIT; - kd_mapsize = 0; - kd_mapptr = (kd_threadmap *) 0; - kd_mapcount = 0; - } + delete_buffers(); - ret = kdbg_bootstrap(); + if ((kdebug_flags & KDBG_MAPINIT) && kd_mapsize && kd_mapptr) { + kmem_free(kernel_map, (vm_offset_t)kd_mapptr, kd_mapsize); + kdebug_flags &= ~KDBG_MAPINIT; + kd_mapsize = 0; + kd_mapptr = (kd_threadmap *) 0; + kd_mapcount = 0; + } + ret = kdbg_bootstrap(); - return(ret); + return(ret); } void kdbg_trace_data(struct proc *proc, long *arg_pid) { - if (!proc) - *arg_pid = 0; - else - *arg_pid = proc->p_pid; - - return; + if (!proc) + *arg_pid = 0; + else + *arg_pid = proc->p_pid; } void kdbg_trace_string(struct proc *proc, long *arg1, long *arg2, long *arg3, long *arg4) { - char *dbg_nameptr; - int dbg_namelen; - long dbg_parms[4]; - if (!proc) - { - *arg1 = 0; - *arg2 = 0; - *arg3 = 0; - *arg4 = 0; - return; - } - - /* Collect the pathname for tracing */ - dbg_nameptr = proc->p_comm; - dbg_namelen = strlen(proc->p_comm); - dbg_parms[0]=0L; - dbg_parms[1]=0L; - dbg_parms[2]=0L; - dbg_parms[3]=0L; + char *dbg_nameptr; + int dbg_namelen; + long dbg_parms[4]; + + if (!proc) { + *arg1 = 0; + *arg2 = 0; + *arg3 = 0; + *arg4 = 0; + return; + } + /* + * Collect the pathname for tracing + */ + dbg_nameptr = proc->p_comm; + dbg_namelen = (int)strlen(proc->p_comm); + dbg_parms[0]=0L; + dbg_parms[1]=0L; + dbg_parms[2]=0L; + dbg_parms[3]=0L; - if(dbg_namelen > (int)sizeof(dbg_parms)) - dbg_namelen = sizeof(dbg_parms); + if(dbg_namelen > (int)sizeof(dbg_parms)) + dbg_namelen = (int)sizeof(dbg_parms); - strncpy((char *)dbg_parms, dbg_nameptr, dbg_namelen); + strncpy((char *)dbg_parms, dbg_nameptr, dbg_namelen); - *arg1=dbg_parms[0]; - *arg2=dbg_parms[1]; - *arg3=dbg_parms[2]; - *arg4=dbg_parms[3]; + *arg1=dbg_parms[0]; + *arg2=dbg_parms[1]; + *arg3=dbg_parms[2]; + *arg4=dbg_parms[3]; } static void kdbg_resolve_map(thread_t th_act, void *opaque) { - kd_threadmap *mapptr; - krt_t *t = (krt_t *)opaque; - - if(t->count < t->maxcount) - { - mapptr=&t->map[t->count]; - mapptr->thread = (unsigned int)th_act; - (void) strlcpy (mapptr->command, t->atts->task_comm, - sizeof(t->atts->task_comm)); - - /* - Some kernel threads have no associated pid. - We still need to mark the entry as valid. - */ - if (t->atts->pid) - mapptr->valid = t->atts->pid; - else - mapptr->valid = 1; - - t->count++; - } + kd_threadmap *mapptr; + krt_t *t = (krt_t *)opaque; + + if (t->count < t->maxcount) { + mapptr = &t->map[t->count]; + mapptr->thread = (uintptr_t)thread_tid(th_act); + + (void) strlcpy (mapptr->command, t->atts->task_comm, + sizeof(t->atts->task_comm)); + /* + * Some kernel threads have no associated pid. + * We still need to mark the entry as valid. + */ + if (t->atts->pid) + mapptr->valid = t->atts->pid; + else + mapptr->valid = 1; + + t->count++; + } } void kdbg_mapinit(void) { - struct proc *p; - struct krt akrt; - int tts_count; /* number of task-to-string structures */ - struct tts *tts_mapptr; - unsigned int tts_mapsize = 0; - unsigned int tts_maptomem=0; - int i; - + struct proc *p; + struct krt akrt; + int tts_count; /* number of task-to-string structures */ + struct tts *tts_mapptr; + unsigned int tts_mapsize = 0; + vm_offset_t tts_maptomem=0; + int i; if (kdebug_flags & KDBG_MAPINIT) - return; + return; - /* need to use PROC_SCANPROCLIST with proc_iterate */ + /* + * need to use PROC_SCANPROCLIST with proc_iterate + */ proc_list_lock(); - /* Calculate the sizes of map buffers*/ - for (p = allproc.lh_first, kd_mapcount=0, tts_count=0; p; - p = p->p_list.le_next) - { - kd_mapcount += get_task_numacts((task_t)p->task); - tts_count++; - } - + /* + * Calculate the sizes of map buffers + */ + for (p = allproc.lh_first, kd_mapcount=0, tts_count=0; p; p = p->p_list.le_next) { + kd_mapcount += get_task_numacts((task_t)p->task); + tts_count++; + } proc_list_unlock(); /* @@ -584,38 +776,33 @@ kdbg_mapinit(void) tts_count += tts_count/10; kd_mapsize = kd_mapcount * sizeof(kd_threadmap); - if((kmem_alloc(kernel_map, & kd_maptomem, - (vm_size_t)kd_mapsize) == KERN_SUCCESS)) - { - kd_mapptr = (kd_threadmap *) kd_maptomem; - bzero(kd_mapptr, kd_mapsize); - } - else - kd_mapptr = (kd_threadmap *) 0; + + if ((kmem_alloc(kernel_map, & kd_maptomem, (vm_size_t)kd_mapsize) == KERN_SUCCESS)) { + kd_mapptr = (kd_threadmap *) kd_maptomem; + bzero(kd_mapptr, kd_mapsize); + } else + kd_mapptr = (kd_threadmap *) 0; tts_mapsize = tts_count * sizeof(struct tts); - if((kmem_alloc(kernel_map, & tts_maptomem, - (vm_size_t)tts_mapsize) == KERN_SUCCESS)) - { - tts_mapptr = (struct tts *) tts_maptomem; - bzero(tts_mapptr, tts_mapsize); - } - else - tts_mapptr = (struct tts *) 0; + if ((kmem_alloc(kernel_map, & tts_maptomem, (vm_size_t)tts_mapsize) == KERN_SUCCESS)) { + tts_mapptr = (struct tts *) tts_maptomem; + bzero(tts_mapptr, tts_mapsize); + } else + tts_mapptr = (struct tts *) 0; /* * We need to save the procs command string * and take a reference for each task associated * with a valid process */ - if (tts_mapptr) { - /* should use proc_iterate */ + /* + * should use proc_iterate + */ proc_list_lock(); - for (p = allproc.lh_first, i=0; p && i < tts_count; - p = p->p_list.le_next) { + for (p = allproc.lh_first, i=0; p && i < tts_count; p = p->p_list.le_next) { if (p->p_lflag & P_LEXIT) continue; @@ -630,26 +817,25 @@ kdbg_mapinit(void) tts_count = i; proc_list_unlock(); - } + if (kd_mapptr && tts_mapptr) { + kdebug_flags |= KDBG_MAPINIT; - if (kd_mapptr && tts_mapptr) - { - kdebug_flags |= KDBG_MAPINIT; - /* Initialize thread map data */ - akrt.map = kd_mapptr; - akrt.count = 0; - akrt.maxcount = kd_mapcount; + /* + * Initialize thread map data + */ + akrt.map = kd_mapptr; + akrt.count = 0; + akrt.maxcount = kd_mapcount; - for (i=0; i < tts_count; i++) - { - akrt.atts = &tts_mapptr[i]; - task_act_iterate_wth_args(tts_mapptr[i].task, kdbg_resolve_map, &akrt); - task_deallocate((task_t) tts_mapptr[i].task); - } - kmem_free(kernel_map, (vm_offset_t)tts_mapptr, tts_mapsize); - } + for (i = 0; i < tts_count; i++) { + akrt.atts = &tts_mapptr[i]; + task_act_iterate_wth_args(tts_mapptr[i].task, kdbg_resolve_map, &akrt); + task_deallocate((task_t) tts_mapptr[i].task); + } + kmem_free(kernel_map, (vm_offset_t)tts_mapptr, tts_mapsize); + } } static void @@ -683,8 +869,10 @@ kdbg_clear(void) /* Clean up the thread map buffer */ kdebug_flags &= ~KDBG_MAPINIT; - kmem_free(kernel_map, (vm_offset_t)kd_mapptr, kd_mapsize); - kd_mapptr = (kd_threadmap *) 0; + if (kd_mapptr) { + kmem_free(kernel_map, (vm_offset_t)kd_mapptr, kd_mapsize); + kd_mapptr = (kd_threadmap *) 0; + } kd_mapsize = 0; kd_mapcount = 0; } @@ -692,91 +880,100 @@ kdbg_clear(void) int kdbg_setpid(kd_regtype *kdr) { - pid_t pid; - int flag, ret=0; - struct proc *p; - - pid = (pid_t)kdr->value1; - flag = (int)kdr->value2; - - if (pid > 0) - { - if ((p = proc_find(pid)) == NULL) - ret = ESRCH; - else - { - if (flag == 1) /* turn on pid check for this and all pids */ - { - kdebug_flags |= KDBG_PIDCHECK; - kdebug_flags &= ~KDBG_PIDEXCLUDE; - kdebug_slowcheck |= SLOW_CHECKS; - - p->p_kdebug = 1; - } - else /* turn off pid check for this pid value */ - { - /* Don't turn off all pid checking though */ - /* kdebug_flags &= ~KDBG_PIDCHECK;*/ - p->p_kdebug = 0; - } - proc_rele(p); + pid_t pid; + int flag, ret=0; + struct proc *p; + + pid = (pid_t)kdr->value1; + flag = (int)kdr->value2; + + if (pid > 0) { + if ((p = proc_find(pid)) == NULL) + ret = ESRCH; + else { + if (flag == 1) { + /* + * turn on pid check for this and all pids + */ + kdebug_flags |= KDBG_PIDCHECK; + kdebug_flags &= ~KDBG_PIDEXCLUDE; + kdebug_slowcheck |= SLOW_CHECKS; + + p->p_kdebug = 1; + } else { + /* + * turn off pid check for this pid value + * Don't turn off all pid checking though + * + * kdebug_flags &= ~KDBG_PIDCHECK; + */ + p->p_kdebug = 0; + } + proc_rele(p); + } } - } - else - ret = EINVAL; - return(ret); + else + ret = EINVAL; + + return(ret); } /* This is for pid exclusion in the trace buffer */ int kdbg_setpidex(kd_regtype *kdr) { - pid_t pid; - int flag, ret=0; - struct proc *p; - - pid = (pid_t)kdr->value1; - flag = (int)kdr->value2; - - if (pid > 0) - { - if ((p = proc_find(pid)) == NULL) - ret = ESRCH; - else - { - if (flag == 1) /* turn on pid exclusion */ - { - kdebug_flags |= KDBG_PIDEXCLUDE; - kdebug_flags &= ~KDBG_PIDCHECK; - kdebug_slowcheck |= SLOW_CHECKS; - - p->p_kdebug = 1; - } - else /* turn off pid exclusion for this pid value */ - { - /* Don't turn off all pid exclusion though */ - /* kdebug_flags &= ~KDBG_PIDEXCLUDE;*/ - p->p_kdebug = 0; - } - proc_rele(p); - } - } - else - ret = EINVAL; - return(ret); + pid_t pid; + int flag, ret=0; + struct proc *p; + + pid = (pid_t)kdr->value1; + flag = (int)kdr->value2; + + if (pid > 0) { + if ((p = proc_find(pid)) == NULL) + ret = ESRCH; + else { + if (flag == 1) { + /* + * turn on pid exclusion + */ + kdebug_flags |= KDBG_PIDEXCLUDE; + kdebug_flags &= ~KDBG_PIDCHECK; + kdebug_slowcheck |= SLOW_CHECKS; + + p->p_kdebug = 1; + } + else { + /* + * turn off pid exclusion for this pid value + * Don't turn off all pid exclusion though + * + * kdebug_flags &= ~KDBG_PIDEXCLUDE; + */ + p->p_kdebug = 0; + } + proc_rele(p); + } + } else + ret = EINVAL; + + return(ret); } -/* This is for setting a maximum decrementer value */ + +/* + * This is for setting a maximum decrementer value + */ int kdbg_setrtcdec(kd_regtype *kdr) { - int ret=0; - natural_t decval; + int ret = 0; + natural_t decval; - decval = (natural_t)kdr->value1; + decval = (natural_t)kdr->value1; - if (decval && decval < KDBG_MINRTCDEC) - ret = EINVAL; + if (decval && decval < KDBG_MINRTCDEC) + ret = EINVAL; #ifdef ppc else { maxDec = decval ? decval : 0x7FFFFFFF; /* Set or reset the max decrementer */ @@ -786,7 +983,7 @@ kdbg_setrtcdec(kd_regtype *kdr) ret = ENOTSUP; #endif /* ppc */ - return(ret); + return(ret); } int @@ -899,105 +1096,128 @@ kdbg_getreg(__unused kd_regtype * kdr) int -kdbg_readmap(user_addr_t buffer, size_t *number) +kdbg_readmap(user_addr_t buffer, size_t *number, vnode_t vp, vfs_context_t ctx) { - int avail = *number; - int ret = 0; - unsigned int count = 0; + int avail = *number; + int ret = 0; + uint32_t count = 0; - count = avail/sizeof (kd_threadmap); + count = avail/sizeof (kd_threadmap); - if (count && (count <= kd_mapcount)) - { - if((kdebug_flags & KDBG_MAPINIT) && kd_mapsize && kd_mapptr) + if (count && (count <= kd_mapcount)) { - if (*number < kd_mapsize) - ret=EINVAL; - else - { - if (copyout(kd_mapptr, buffer, kd_mapsize)) - ret=EINVAL; - } + if ((kdebug_flags & KDBG_MAPINIT) && kd_mapsize && kd_mapptr) + { + if (*number < kd_mapsize) + ret = EINVAL; + else + { + if (vp) { + vn_rdwr(UIO_WRITE, vp, (caddr_t)&count, sizeof(uint32_t), RAW_file_offset, + UIO_SYSSPACE, IO_NODELOCKED|IO_UNIT, vfs_context_ucred(ctx), (int *) 0, vfs_context_proc(ctx)); + RAW_file_offset += sizeof(uint32_t); + + vn_rdwr(UIO_WRITE, vp, (caddr_t)kd_mapptr, kd_mapsize, RAW_file_offset, + UIO_SYSSPACE, IO_NODELOCKED|IO_UNIT, vfs_context_ucred(ctx), (int *) 0, vfs_context_proc(ctx)); + RAW_file_offset += kd_mapsize; + + } else { + if (copyout(kd_mapptr, buffer, kd_mapsize)) + ret = EINVAL; + } + } + } + else + ret = EINVAL; + } + else + ret = EINVAL; + + if (ret && vp) { + count = 0; + + vn_rdwr(UIO_WRITE, vp, (caddr_t)&count, sizeof(uint32_t), RAW_file_offset, + UIO_SYSSPACE, IO_NODELOCKED|IO_UNIT, vfs_context_ucred(ctx), (int *) 0, vfs_context_proc(ctx)); + RAW_file_offset += sizeof(uint32_t); } - else - ret=EINVAL; - } - else - ret=EINVAL; - - if ((kdebug_flags & KDBG_MAPINIT) && kd_mapsize && kd_mapptr) - { - kmem_free(kernel_map, (vm_offset_t)kd_mapptr, kd_mapsize); - kdebug_flags &= ~KDBG_MAPINIT; - kd_mapsize = 0; - kd_mapptr = (kd_threadmap *) 0; - kd_mapcount = 0; - } - - return(ret); + if ((kdebug_flags & KDBG_MAPINIT) && kd_mapsize && kd_mapptr) + { + kmem_free(kernel_map, (vm_offset_t)kd_mapptr, kd_mapsize); + kdebug_flags &= ~KDBG_MAPINIT; + kd_mapsize = 0; + kd_mapptr = (kd_threadmap *) 0; + kd_mapcount = 0; + } + + return(ret); } int kdbg_getentropy (user_addr_t buffer, size_t *number, int ms_timeout) { - int avail = *number; - int ret = 0; - - if (kd_entropy_buffer) - return(EBUSY); - - kd_entropy_count = avail/sizeof(mach_timespec_t); - kd_entropy_bufsize = kd_entropy_count * sizeof(mach_timespec_t); - kd_entropy_indx = 0; - - /* Enforce maximum entropy entries here if needed */ - - /* allocate entropy buffer */ - if (kmem_alloc(kernel_map, &kd_entropy_buftomem, - (vm_size_t)kd_entropy_bufsize) == KERN_SUCCESS) - { - kd_entropy_buffer = (uint64_t *) kd_entropy_buftomem; - } - else - { - kd_entropy_buffer = (uint64_t *) 0; - kd_entropy_count = 0; - kd_entropy_indx = 0; - return (EINVAL); - } - - if (ms_timeout < 10) - ms_timeout = 10; - - /* Enable entropy sampling */ - kdebug_enable |= KDEBUG_ENABLE_ENTROPY; - kdebug_slowcheck |= SLOW_ENTROPY; - - ret = tsleep (kdbg_getentropy, PRIBIO | PCATCH, "kd_entropy", (ms_timeout/(1000/HZ))); - - /* Disable entropy sampling */ - kdebug_enable &= ~KDEBUG_ENABLE_ENTROPY; - kdebug_slowcheck &= ~SLOW_ENTROPY; - - *number = 0; - ret = 0; - - if (kd_entropy_indx > 0) - { - /* copyout the buffer */ - if (copyout(kd_entropy_buffer, buffer, kd_entropy_indx * sizeof(mach_timespec_t))) - ret = EINVAL; - else - *number = kd_entropy_indx; - } - - /* Always cleanup */ - kd_entropy_count = 0; - kd_entropy_indx = 0; - kd_entropy_buftomem = 0; - kmem_free(kernel_map, (vm_offset_t)kd_entropy_buffer, kd_entropy_bufsize); - kd_entropy_buffer = (uint64_t *) 0; - return(ret); + int avail = *number; + int ret = 0; + + if (kd_entropy_buffer) + return(EBUSY); + + kd_entropy_count = avail/sizeof(mach_timespec_t); + kd_entropy_bufsize = kd_entropy_count * sizeof(mach_timespec_t); + kd_entropy_indx = 0; + + /* + * Enforce maximum entropy entries here if needed + * allocate entropy buffer + */ + if (kmem_alloc(kernel_map, &kd_entropy_buftomem, + (vm_size_t)kd_entropy_bufsize) == KERN_SUCCESS) { + kd_entropy_buffer = (uint64_t *) kd_entropy_buftomem; + } else { + kd_entropy_buffer = (uint64_t *) 0; + kd_entropy_count = 0; + kd_entropy_indx = 0; + return (EINVAL); + } + + if (ms_timeout < 10) + ms_timeout = 10; + + /* + * Enable entropy sampling + */ + kdebug_enable |= KDEBUG_ENABLE_ENTROPY; + kdebug_slowcheck |= SLOW_ENTROPY; + + ret = tsleep (kdbg_getentropy, PRIBIO | PCATCH, "kd_entropy", (ms_timeout/(1000/HZ))); + + /* + * Disable entropy sampling + */ + kdebug_enable &= ~KDEBUG_ENABLE_ENTROPY; + kdebug_slowcheck &= ~SLOW_ENTROPY; + + *number = 0; + ret = 0; + + if (kd_entropy_indx > 0) { + /* + * copyout the buffer + */ + if (copyout(kd_entropy_buffer, buffer, kd_entropy_indx * sizeof(mach_timespec_t))) + ret = EINVAL; + else + *number = kd_entropy_indx; + } + /* + * Always cleanup + */ + kd_entropy_count = 0; + kd_entropy_indx = 0; + kd_entropy_buftomem = 0; + kmem_free(kernel_map, (vm_offset_t)kd_entropy_buffer, kd_entropy_bufsize); + kd_entropy_buffer = (uint64_t *) 0; + + return(ret); } @@ -1005,10 +1225,10 @@ static void kdbg_set_nkdbufs(unsigned int value) { /* - * We allow a maximum buffer size of 25% of either ram or max mapped address, whichever is smaller + * We allow a maximum buffer size of 50% of either ram or max mapped address, whichever is smaller * 'value' is the desired number of trace entries */ - unsigned int max_entries = (sane_size/4) / sizeof(kd_buf); + unsigned int max_entries = (sane_size/2) / sizeof(kd_buf); if (value <= max_entries) nkdbufs = value; @@ -1045,8 +1265,8 @@ kdbg_control_chud(int val, void *fn) int kdbg_control(int *name, u_int namelen, user_addr_t where, size_t *sizep) { - int ret=0; - size_t size=*sizep; + int ret = 0; + size_t size = *sizep; unsigned int value = 0; kd_regtype kd_Reg; kbufinfo_t kd_bufinfo; @@ -1067,92 +1287,84 @@ kdbg_control(int *name, u_int namelen, user_addr_t where, size_t *sizep) kdbg_lock_init(); if ( !(kdebug_flags & KDBG_LOCKINIT)) - return(ENOMEM); + return(ENOSPC); lck_mtx_lock(kd_trace_mtx_sysctl); if (name[0] == KERN_KDGETBUF) { - /* - * Does not alter the global_state_pid - * This is a passive request. - */ - if (size < sizeof(kd_bufinfo.nkdbufs)) { - /* - * There is not enough room to return even - * the first element of the info structure. + /* + * Does not alter the global_state_pid + * This is a passive request. */ - lck_mtx_unlock(kd_trace_mtx_sysctl); - - return(EINVAL); - } - kd_bufinfo.nkdbufs = nkdbufs; - kd_bufinfo.nkdthreads = kd_mapsize / sizeof(kd_threadmap); - - if ( (kdebug_slowcheck & SLOW_NOLOG) ) - kd_bufinfo.nolog = 1; - else - kd_bufinfo.nolog = 0; - kd_bufinfo.flags = kdebug_flags; - kd_bufinfo.bufid = global_state_pid; + if (size < sizeof(kd_bufinfo.nkdbufs)) { + /* + * There is not enough room to return even + * the first element of the info structure. + */ + ret = EINVAL; + goto out; + } + kd_bufinfo.nkdbufs = nkdbufs; + kd_bufinfo.nkdthreads = kd_mapsize / sizeof(kd_threadmap); + + if ( (kdebug_slowcheck & SLOW_NOLOG) ) + kd_bufinfo.nolog = 1; + else + kd_bufinfo.nolog = 0; + + kd_bufinfo.flags = kdebug_flags; +#if defined(__LP64__) + kd_bufinfo.flags |= KDBG_LP64; +#endif + kd_bufinfo.bufid = global_state_pid; - if (size >= sizeof(kd_bufinfo)) { - /* - * Provide all the info we have - */ - if (copyout (&kd_bufinfo, where, sizeof(kd_bufinfo))) { - lck_mtx_unlock(kd_trace_mtx_sysctl); - - return(EINVAL); - } - } - else { - /* + if (size >= sizeof(kd_bufinfo)) { + /* + * Provide all the info we have + */ + if (copyout(&kd_bufinfo, where, sizeof(kd_bufinfo))) + ret = EINVAL; + } else { + /* * For backwards compatibility, only provide * as much info as there is room for. */ - if (copyout (&kd_bufinfo, where, size)) { - lck_mtx_unlock(kd_trace_mtx_sysctl); - - return(EINVAL); - } - } - lck_mtx_unlock(kd_trace_mtx_sysctl); - - return(0); - } else if (name[0] == KERN_KDGETENTROPY) { - if (kd_entropy_buffer) - ret = EBUSY; - else - ret = kdbg_getentropy(where, sizep, value); - lck_mtx_unlock(kd_trace_mtx_sysctl); - - return (ret); + if (copyout(&kd_bufinfo, where, size)) + ret = EINVAL; + } + goto out; + + } else if (name[0] == KERN_KDGETENTROPY) { + if (kd_entropy_buffer) + ret = EBUSY; + else + ret = kdbg_getentropy(where, sizep, value); + goto out; } if ((curproc = current_proc()) != NULL) - curpid = curproc->p_pid; + curpid = curproc->p_pid; else { - lck_mtx_unlock(kd_trace_mtx_sysctl); - - return (ESRCH); + ret = ESRCH; + goto out; } if (global_state_pid == -1) - global_state_pid = curpid; + global_state_pid = curpid; else if (global_state_pid != curpid) { - if ((p = proc_find(global_state_pid)) == NULL) { - /* - * The global pid no longer exists - */ - global_state_pid = curpid; - } else { - /* - * The global pid exists, deny this request - */ - proc_rele(p); - lck_mtx_unlock(kd_trace_mtx_sysctl); + if ((p = proc_find(global_state_pid)) == NULL) { + /* + * The global pid no longer exists + */ + global_state_pid = curpid; + } else { + /* + * The global pid exists, deny this request + */ + proc_rele(p); - return(EBUSY); - } + ret = EBUSY; + goto out; + } } switch(name[0]) { @@ -1164,58 +1376,60 @@ kdbg_control(int *name, u_int namelen, user_addr_t where, size_t *sizep) value &= KDBG_USERFLAGS; kdebug_flags &= ~value; break; - case KERN_KDENABLE: /* used to enable or disable */ - if (value) - { - /* enable only if buffer is initialized */ - if (!(kdebug_flags & KDBG_BUFINIT)) - { - ret=EINVAL; - break; + case KERN_KDENABLE: + /* + * used to enable or disable + */ + if (value) { + /* + * enable only if buffer is initialized + */ + if (!(kdebug_flags & KDBG_BUFINIT)) { + ret = EINVAL; + break; + } + kdbg_mapinit(); + + kdebug_enable |= KDEBUG_ENABLE_TRACE; + kdebug_slowcheck &= ~SLOW_NOLOG; + } + else { + kdebug_enable &= ~KDEBUG_ENABLE_TRACE; + kdebug_slowcheck |= SLOW_NOLOG; } - kdbg_mapinit(); - - kdebug_enable |= KDEBUG_ENABLE_TRACE; - kdebug_slowcheck &= ~SLOW_NOLOG; - } - else - { - kdebug_enable &= ~KDEBUG_ENABLE_TRACE; - kdebug_slowcheck |= SLOW_NOLOG; - } - break; + break; case KERN_KDSETBUF: kdbg_set_nkdbufs(value); break; case KERN_KDSETUP: - ret=kdbg_reinit(); + ret = kdbg_reinit(); break; case KERN_KDREMOVE: kdbg_clear(); break; case KERN_KDSETREG: if(size < sizeof(kd_regtype)) { - ret=EINVAL; + ret = EINVAL; break; } if (copyin(where, &kd_Reg, sizeof(kd_regtype))) { - ret= EINVAL; + ret = EINVAL; break; } ret = kdbg_setreg(&kd_Reg); break; case KERN_KDGETREG: - if(size < sizeof(kd_regtype)) { + if (size < sizeof(kd_regtype)) { ret = EINVAL; break; } ret = kdbg_getreg(&kd_Reg); - if (copyout(&kd_Reg, where, sizeof(kd_regtype))){ - ret=EINVAL; + if (copyout(&kd_Reg, where, sizeof(kd_regtype))) { + ret = EINVAL; } break; case KERN_KDREADTR: - ret = kdbg_read(where, sizep); + ret = kdbg_read(where, sizep, NULL, NULL); break; case KERN_KDPIDTR: if (size < sizeof(kd_regtype)) { @@ -1223,7 +1437,7 @@ kdbg_control(int *name, u_int namelen, user_addr_t where, size_t *sizep) break; } if (copyin(where, &kd_Reg, sizeof(kd_regtype))) { - ret= EINVAL; + ret = EINVAL; break; } ret = kdbg_setpid(&kd_Reg); @@ -1234,13 +1448,13 @@ kdbg_control(int *name, u_int namelen, user_addr_t where, size_t *sizep) break; } if (copyin(where, &kd_Reg, sizeof(kd_regtype))) { - ret= EINVAL; + ret = EINVAL; break; } ret = kdbg_setpidex(&kd_Reg); break; case KERN_KDTHRMAP: - ret = kdbg_readmap(where, sizep); + ret = kdbg_readmap(where, sizep, NULL, NULL); break; case KERN_KDSETRTCDEC: if (size < sizeof(kd_regtype)) { @@ -1248,15 +1462,16 @@ kdbg_control(int *name, u_int namelen, user_addr_t where, size_t *sizep) break; } if (copyin(where, &kd_Reg, sizeof(kd_regtype))) { - ret= EINVAL; + ret = EINVAL; break; } ret = kdbg_setrtcdec(&kd_Reg); break; default: - ret= EINVAL; + ret = EINVAL; } +out: lck_mtx_unlock(kd_trace_mtx_sysctl); return(ret); @@ -1264,25 +1479,27 @@ kdbg_control(int *name, u_int namelen, user_addr_t where, size_t *sizep) /* - * This code can run concurrently with kernel_debug_internal() - * without the need of any locks, because all reads of kd_bufptr[i], - * which get modified by kernel_debug_internal(), are safe. + * This code can run for the most part concurrently with kernel_debug_internal()... + * 'release_storage_unit' will take the kds_spin_lock which may cause us to briefly + * synchronize with the recording side of this puzzle... otherwise, we are able to + * move through the lists w/o use of any locks */ int -kdbg_read(user_addr_t buffer, size_t *number) +kdbg_read(user_addr_t buffer, size_t *number, vnode_t vp, vfs_context_t ctx) { unsigned int count; - unsigned int cpu; - int mincpu; - uint64_t mintime, t, last_wrap_time; - int last_wrap_cpu; - int error = 0; + unsigned int cpu, mincpu; + uint64_t mintime, t; + int error = 0,s = 0; kd_buf *tempbuf; + kd_buf *rcursor; + kd_buf *min_rcursor; + struct kd_storage *kdsp; + struct kd_bufinfo *kdbp; uint32_t tempbuf_count; uint32_t tempbuf_number; - unsigned int old_kdebug_flags, new_kdebug_flags; - unsigned int old_kdebug_slowcheck, new_kdebug_slowcheck; - boolean_t first_event = TRUE; + uint32_t old_kdebug_flags; + uint32_t old_kdebug_slowcheck; count = *number/sizeof(kd_buf); *number = 0; @@ -1293,51 +1510,28 @@ kdbg_read(user_addr_t buffer, size_t *number) /* * because we hold kd_trace_mtx_sysctl, no other control threads can * be playing with kdebug_flags... the code that cuts new events could - * be running, but it only reads kdebug_flags, it doesn't write it.. - * use an OSCompareAndSwap to make sure the other processors see the - * change of state immediately, not to protect against 2 threads racing to update it + * be running, but it grabs kds_spin_lock if it needs to acquire a new + * storage chunk which is where it examines kdebug_flags... it its adding + * to the same chunk we're reading from, no problem... */ - old_kdebug_slowcheck = kdebug_slowcheck; - do { - old_kdebug_flags = kdebug_flags; - new_kdebug_flags = old_kdebug_flags & ~KDBG_WRAPPED; - new_kdebug_flags |= KDBG_NOWRAP; - } while ( !OSCompareAndSwap((UInt32)old_kdebug_flags, (UInt32)new_kdebug_flags, (UInt32 *)&kdebug_flags)); + s = ml_set_interrupts_enabled(FALSE); + lck_spin_lock(kds_spin_lock); - last_wrap_time = 0; - last_wrap_cpu = -1; + old_kdebug_slowcheck = kdebug_slowcheck; + old_kdebug_flags = kdebug_flags; - for (cpu = 0; cpu < kd_cpus; cpu++) { - kd_buf *cur_bufptr; - - if ((cur_bufptr = kdbip[cpu].kd_bufptr) >= kdbip[cpu].kd_buflast) - cur_bufptr = kdbip[cpu].kd_buffer; + kdebug_flags &= ~KDBG_WRAPPED; + kdebug_flags |= KDBG_NOWRAP; - if (kdbip[cpu].kd_wrapped) { - kdbip[cpu].kd_wrapped = 0; - kdbip[cpu].kd_readlast = cur_bufptr; - kdbip[cpu].kd_stop = cur_bufptr; + lck_spin_unlock(kds_spin_lock); + ml_set_interrupts_enabled(s); - if (kd_cpus > 1 && ((cur_bufptr->timestamp & KDBG_TIMESTAMP_MASK) > last_wrap_time)) { - last_wrap_time = cur_bufptr->timestamp & KDBG_TIMESTAMP_MASK; - last_wrap_cpu = cpu; - } - } else { - if (kdbip[cpu].kd_readlast == cur_bufptr) - kdbip[cpu].kd_stop = 0; - else - kdbip[cpu].kd_stop = cur_bufptr; - } - } if (count > nkdbufs) count = nkdbufs; if ((tempbuf_count = count) > KDCOPYBUF_COUNT) tempbuf_count = KDCOPYBUF_COUNT; - if (last_wrap_cpu == -1) - first_event = FALSE; - while (count) { tempbuf = kdcopybuf; tempbuf_number = 0; @@ -1345,78 +1539,83 @@ kdbg_read(user_addr_t buffer, size_t *number) while (tempbuf_count) { mintime = 0xffffffffffffffffULL; /* all actual timestamps are below */ mincpu = -1; + min_rcursor = NULL; - for (cpu = 0; cpu < kd_cpus; cpu++) { - if (kdbip[cpu].kd_stop == 0) /* empty buffer */ + for (cpu = 0, kdbp = &kdbip[0]; cpu < kd_cpus; cpu++, kdbp++) { + + if ((kdsp = kdbp->kd_list_head) == NULL) continue; - t = kdbip[cpu].kd_readlast[0].timestamp & KDBG_TIMESTAMP_MASK; + rcursor = kdsp->kds_readlast; + + if (rcursor == kdsp->kds_bufptr) + continue; + t = kdbg_get_timestamp(rcursor); if (t < mintime) { - mintime = t; mincpu = cpu; + mintime = t; + min_rcursor = rcursor; } } - if (mincpu < 0) + if (mincpu == (unsigned int)-1) /* - * all buffers ran empty early + * all buffers ran empty */ break; + + kdbp = &kdbip[mincpu]; + kdsp = kdbp->kd_list_head; - if (first_event == TRUE) { - /* - * make sure we leave room for the - * LAST_WRAPPER event we inject - * by throwing away the first event - * it's better to lose that one - * than the last one - */ - first_event = FALSE; - - kdbip[mincpu].kd_readlast++; - - if (kdbip[mincpu].kd_readlast == kdbip[mincpu].kd_buflast) - kdbip[mincpu].kd_readlast = kdbip[mincpu].kd_buffer; - if (kdbip[mincpu].kd_readlast == kdbip[mincpu].kd_stop) - kdbip[mincpu].kd_stop = 0; + *tempbuf = *min_rcursor; + if (mintime != kdbg_get_timestamp(tempbuf)) { + /* + * we stole this storage unit and used it + * before we could slurp the selected event out + * so we need to re-evaluate + */ continue; } - if (last_wrap_cpu == mincpu) { - tempbuf->debugid = MISCDBG_CODE(DBG_BUFFER, 0) | DBG_FUNC_NONE; - tempbuf->arg1 = kd_bufsize / sizeof(kd_buf); - tempbuf->arg2 = kd_cpus; - tempbuf->arg3 = 0; - tempbuf->arg4 = 0; - tempbuf->arg5 = (int)current_thread(); - - tempbuf->timestamp = last_wrap_time | (((uint64_t)last_wrap_cpu) << KDBG_CPU_SHIFT); - - tempbuf++; - - last_wrap_cpu = -1; + /* + * Watch for out of order timestamps + */ + if (mintime < kdbp->kd_prev_timebase) { + /* + * if so, use the previous timestamp + 1 cycle + */ + kdbp->kd_prev_timebase++; + kdbg_set_timestamp_and_cpu(tempbuf, kdbp->kd_prev_timebase, mincpu); + } else + kdbp->kd_prev_timebase = mintime; - } else { - *(tempbuf++) = kdbip[mincpu].kd_readlast[0]; + if (min_rcursor == kdsp->kds_readlast) + kdsp->kds_readlast++; - kdbip[mincpu].kd_readlast++; + if (kdsp->kds_readlast == kdsp->kds_buflast) + release_storage_unit(kdbp, kdsp); - if (kdbip[mincpu].kd_readlast == kdbip[mincpu].kd_buflast) - kdbip[mincpu].kd_readlast = kdbip[mincpu].kd_buffer; - if (kdbip[mincpu].kd_readlast == kdbip[mincpu].kd_stop) - kdbip[mincpu].kd_stop = 0; - } tempbuf_count--; tempbuf_number++; + tempbuf++; } if (tempbuf_number) { - if ((error = copyout(kdcopybuf, buffer, tempbuf_number * sizeof(kd_buf)))) { - *number = 0; + + if (vp) { + error = vn_rdwr(UIO_WRITE, vp, (caddr_t)kdcopybuf, tempbuf_number * sizeof(kd_buf), RAW_file_offset, + UIO_SYSSPACE, IO_NODELOCKED|IO_UNIT, vfs_context_ucred(ctx), (int *) 0, vfs_context_proc(ctx)); + + RAW_file_offset += (tempbuf_number * sizeof(kd_buf)); + } else { + error = copyout(kdcopybuf, buffer, tempbuf_number * sizeof(kd_buf)); + buffer += (tempbuf_number * sizeof(kd_buf)); + } + if (error) { + *number = 0; error = EINVAL; break; } count -= tempbuf_number; *number += tempbuf_number; - buffer += (tempbuf_number * sizeof(kd_buf)); } if (tempbuf_count) /* @@ -1428,17 +1627,17 @@ kdbg_read(user_addr_t buffer, size_t *number) tempbuf_count = KDCOPYBUF_COUNT; } if ( !(old_kdebug_flags & KDBG_NOWRAP)) { - do { - old_kdebug_flags = kdebug_flags; - new_kdebug_flags = old_kdebug_flags & ~KDBG_NOWRAP; - } while ( !OSCompareAndSwap((UInt32)old_kdebug_flags, (UInt32)new_kdebug_flags, (UInt32 *)&kdebug_flags)); - - if ( !(old_kdebug_slowcheck & SLOW_NOLOG)) { - do { - old_kdebug_slowcheck = kdebug_slowcheck; - new_kdebug_slowcheck = old_kdebug_slowcheck & ~SLOW_NOLOG; - } while ( !OSCompareAndSwap((UInt32)old_kdebug_slowcheck, (UInt32)new_kdebug_slowcheck, (UInt32 *)&kdebug_slowcheck)); - } + + s = ml_set_interrupts_enabled(FALSE); + lck_spin_lock(kds_spin_lock); + + kdebug_flags &= ~KDBG_NOWRAP; + + if ( !(old_kdebug_slowcheck & SLOW_NOLOG)) + kdebug_slowcheck &= ~SLOW_NOLOG; + + lck_spin_unlock(kds_spin_lock); + ml_set_interrupts_enabled(s); } return (error); } @@ -1453,7 +1652,7 @@ unsigned char *getProcName(struct proc *proc) { #define STACKSHOT_SUBSYS_LOCK() lck_mtx_lock(&stackshot_subsys_mutex) #define STACKSHOT_SUBSYS_UNLOCK() lck_mtx_unlock(&stackshot_subsys_mutex) -#ifdef __i386__ +#if defined(__i386__) || defined (__x86_64__) #define TRAP_DEBUGGER __asm__ volatile("int3"); #endif #ifdef __ppc__ @@ -1498,9 +1697,10 @@ stackshot_lock_init( void ) * tracebuffer exhaustion, we copyout as much as possible. */ int -stack_snapshot(struct proc *p, register struct stack_snapshot_args *uap, register_t *retval) { +stack_snapshot(struct proc *p, register struct stack_snapshot_args *uap, int32_t *retval) { int error = 0; + if ((error = suser(kauth_cred_get(), &p->p_acflag))) return(error); @@ -1509,7 +1709,7 @@ stack_snapshot(struct proc *p, register struct stack_snapshot_args *uap, registe } int -stack_snapshot2(pid_t pid, user_addr_t tracebuf, uint32_t tracebuf_size, uint32_t options, register_t *retval) +stack_snapshot2(pid_t pid, user_addr_t tracebuf, uint32_t tracebuf_size, uint32_t options, int32_t *retval) { int error = 0; unsigned bytesTraced = 0; @@ -1581,5 +1781,56 @@ start_kern_tracing(unsigned int new_nkdbufs) { kdebug_enable |= KDEBUG_ENABLE_TRACE; kdebug_slowcheck &= ~SLOW_NOLOG; kdbg_mapinit(); + +#if defined(__i386__) || defined(__x86_64__) + uint64_t now = mach_absolute_time(); + + KERNEL_DEBUG_CONSTANT((TRACEDBG_CODE(DBG_TRACE_INFO, 1)) | DBG_FUNC_NONE, + (uint32_t)(tsc_rebase_abs_time >> 32), (uint32_t)tsc_rebase_abs_time, + (uint32_t)(now >> 32), (uint32_t)now, + 0); +#endif printf("kernel tracing started\n"); } + +void +kdbg_dump_trace_to_file(const char *filename) +{ + vfs_context_t ctx; + vnode_t vp; + int error; + size_t number; + + + if (kdebug_enable & (KDEBUG_ENABLE_CHUD | KDEBUG_ENABLE_ENTROPY)) + return; + + if (global_state_pid != -1) { + if ((proc_find(global_state_pid)) != NULL) { + /* + * The global pid exists, we're running + * due to fs_usage, latency, etc... + * don't cut the panic/shutdown trace file + */ + return; + } + } + KERNEL_DEBUG_CONSTANT((TRACEDBG_CODE(DBG_TRACE_INFO, 0)) | DBG_FUNC_NONE, 0, 0, 0, 0, 0); + + kdebug_enable = 0; + + ctx = vfs_context_kernel(); + + if ((error = vnode_open(filename, (O_CREAT | FWRITE | O_NOFOLLOW), 0600, 0, &vp, ctx))) + return; + + number = kd_mapsize; + kdbg_readmap(0, &number, vp, ctx); + + number = nkdbufs*sizeof(kd_buf); + kdbg_read(0, &number, vp, ctx); + + vnode_close(vp, FWRITE, ctx); + + sync(current_proc(), (void *)NULL, (int *)NULL); +} diff --git a/bsd/kern/kern_acct.c b/bsd/kern/kern_acct.c index 1e378b1db..747f09221 100644 --- a/bsd/kern/kern_acct.c +++ b/bsd/kern/kern_acct.c @@ -114,7 +114,7 @@ * The former's operation is described in Leffler, et al., and the latter * was provided by UCB with the 4.4BSD-Lite release */ -comp_t encode_comp_t(u_long, u_long); +comp_t encode_comp_t(uint32_t, uint32_t); void acctwatch(void *); void acctwatch_funnel(void *); @@ -227,7 +227,7 @@ acct_process(proc_t p) struct vnode *vp; kauth_cred_t safecred; struct session * sessp; - boolean_t fstate; + struct tty *tp; /* If accounting isn't enabled, don't bother */ vp = acctp; @@ -277,10 +277,10 @@ acct_process(proc_t p) /* (7) The terminal from which the process was started */ sessp = proc_session(p); - if ((p->p_flag & P_CONTROLT) && (sessp != SESSION_NULL) && (sessp->s_ttyp != TTY_NULL)) { - fstate = thread_funnel_set(kernel_flock, TRUE); - an_acct.ac_tty = sessp->s_ttyp->t_dev; - (void) thread_funnel_set(kernel_flock, fstate); + if ((p->p_flag & P_CONTROLT) && (sessp != SESSION_NULL) && ((tp = SESSION_TP(sessp)) != TTY_NULL)) { + tty_lock(tp); + an_acct.ac_tty = tp->t_dev; + tty_unlock(tp); }else an_acct.ac_tty = NODEV; @@ -295,7 +295,7 @@ acct_process(proc_t p) */ if ((error = vnode_getwithref(vp)) == 0) { error = vn_rdwr(UIO_WRITE, vp, (caddr_t)&an_acct, sizeof (an_acct), - (off_t)0, UIO_SYSSPACE32, IO_APPEND|IO_UNIT, safecred, + (off_t)0, UIO_SYSSPACE, IO_APPEND|IO_UNIT, safecred, (int *)0, p); vnode_put(vp); } @@ -315,7 +315,7 @@ acct_process(proc_t p) #define MAXFRACT ((1 << MANTSIZE) - 1) /* Maximum fractional value. */ comp_t -encode_comp_t(u_long s, u_long us) +encode_comp_t(uint32_t s, uint32_t us) { int exp, rnd; @@ -342,6 +342,7 @@ encode_comp_t(u_long s, u_long us) return (exp); } +/* XXX The acctwatch() thread need to be protected by a mutex instead. */ void acctwatch_funnel(void *a) { diff --git a/bsd/kern/kern_aio.c b/bsd/kern/kern_aio.c index 813456ab9..b829fa26d 100644 --- a/bsd/kern/kern_aio.c +++ b/bsd/kern/kern_aio.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2003-2004 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2003-2008 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -69,6 +69,8 @@ #include +#include + #include #define AIO_work_queued 1 #define AIO_worker_wake 2 @@ -117,17 +119,34 @@ * user process calls aio_return or the process exits, either way that is our * trigger to release aio resources. */ +typedef struct aio_workq { + TAILQ_HEAD(, aio_workq_entry) aioq_entries; + int aioq_count; + lck_mtx_t aioq_mtx; + wait_queue_t aioq_waitq; +} *aio_workq_t; + +#define AIO_NUM_WORK_QUEUES 1 struct aio_anchor_cb { - int aio_async_workq_count; /* entries on aio_async_workq */ - int lio_sync_workq_count; /* entries on lio_sync_workq */ - int aio_active_count; /* entries on all active queues (proc.aio_activeq) */ - int aio_done_count; /* entries on all done queues (proc.aio_doneq) */ - TAILQ_HEAD( , aio_workq_entry ) aio_async_workq; - TAILQ_HEAD( , aio_workq_entry ) lio_sync_workq; + volatile int32_t aio_inflight_count; /* entries that have been taken from a workq */ + volatile int32_t aio_done_count; /* entries on all done queues (proc.aio_doneq) */ + volatile int32_t aio_total_count; /* total extant entries */ + + /* Hash table of queues here */ + int aio_num_workqs; + struct aio_workq aio_async_workqs[AIO_NUM_WORK_QUEUES]; }; typedef struct aio_anchor_cb aio_anchor_cb; +struct aio_lio_context +{ + int io_waiter; + int io_issued; + int io_completed; +}; +typedef struct aio_lio_context aio_lio_context; + /* * Notes on aio sleep / wake channels. @@ -135,88 +154,344 @@ typedef struct aio_anchor_cb aio_anchor_cb; * us sleep channels that currently do not collide with any other kernel routines. * At this time, for binary compatibility reasons, we cannot create new proc fields. */ -#define AIO_SUSPEND_SLEEP_CHAN aio_active_count -#define AIO_CLEANUP_SLEEP_CHAN aio_done_count - - -/* - * aysnc IO locking macros used to protect critical sections. - */ -#define AIO_LOCK lck_mtx_lock(aio_lock) -#define AIO_UNLOCK lck_mtx_unlock(aio_lock) +#define AIO_SUSPEND_SLEEP_CHAN p_aio_active_count +#define AIO_CLEANUP_SLEEP_CHAN p_aio_total_count +#define ASSERT_AIO_FROM_PROC(aiop, theproc) \ + if ((aiop)->procp != (theproc)) { \ + panic("AIO on a proc list that does not belong to that proc.\n"); \ + } /* * LOCAL PROTOTYPES */ -static int aio_active_requests_for_process(proc_t procp ); +static void aio_proc_lock(proc_t procp); +static void aio_proc_lock_spin(proc_t procp); +static void aio_proc_unlock(proc_t procp); +static lck_mtx_t* aio_proc_mutex(proc_t procp); +static void aio_proc_move_done_locked(proc_t procp, aio_workq_entry *entryp); +static void aio_proc_remove_done_locked(proc_t procp, aio_workq_entry *entryp); +static int aio_get_process_count(proc_t procp ); +static int aio_active_requests_for_process(proc_t procp ); +static int aio_proc_active_requests_for_file(proc_t procp, int fd); +static boolean_t is_already_queued(proc_t procp, user_addr_t aiocbp ); +static boolean_t should_cancel(aio_workq_entry *entryp, user_addr_t aiocbp, int fd); + +static void aio_entry_lock(aio_workq_entry *entryp); +static void aio_entry_lock_spin(aio_workq_entry *entryp); +static aio_workq_t aio_entry_workq(aio_workq_entry *entryp); +static lck_mtx_t* aio_entry_mutex(__unused aio_workq_entry *entryp); +static void aio_workq_remove_entry_locked(aio_workq_t queue, aio_workq_entry *entryp); +static void aio_workq_add_entry_locked(aio_workq_t queue, aio_workq_entry *entryp); +static void aio_entry_ref_locked(aio_workq_entry *entryp); +static void aio_entry_unref_locked(aio_workq_entry *entryp); +static void aio_entry_ref(aio_workq_entry *entryp); +static void aio_entry_unref(aio_workq_entry *entryp); +static void aio_entry_update_for_cancel(aio_workq_entry *entryp, boolean_t cancelled, + int wait_for_completion, boolean_t disable_notification); +static int aio_entry_try_workq_remove(aio_workq_entry *entryp); static boolean_t aio_delay_fsync_request( aio_workq_entry *entryp ); -static int aio_free_request( aio_workq_entry *entryp, vm_map_t the_map ); -static int aio_get_all_queues_count( void ); -static int aio_get_process_count(proc_t procp ); -static aio_workq_entry * aio_get_some_work( void ); -static boolean_t aio_last_group_io( aio_workq_entry *entryp ); -static void aio_mark_requests( aio_workq_entry *entryp ); -static int aio_queue_async_request(proc_t procp, - user_addr_t aiocbp, - int kindOfIO ); -static int aio_validate( aio_workq_entry *entryp ); -static void aio_work_thread( void ); -static int do_aio_cancel(proc_t p, - int fd, - user_addr_t aiocbp, - boolean_t wait_for_completion, - boolean_t disable_notification ); -static void do_aio_completion( aio_workq_entry *entryp ); -static int do_aio_fsync( aio_workq_entry *entryp ); -static int do_aio_read( aio_workq_entry *entryp ); -static int do_aio_write( aio_workq_entry *entryp ); -static void do_munge_aiocb( struct aiocb *my_aiocbp, struct user_aiocb *the_user_aiocbp ); -static boolean_t is_already_queued(proc_t procp, - user_addr_t aiocbp ); -static int lio_create_async_entry(proc_t procp, - user_addr_t aiocbp, - user_addr_t sigp, - long group_tag, - aio_workq_entry **entrypp ); -static int lio_create_sync_entry(proc_t procp, - user_addr_t aiocbp, - long group_tag, - aio_workq_entry **entrypp ); - +static int aio_free_request(aio_workq_entry *entryp); + +static void aio_workq_init(aio_workq_t wq); +static void aio_workq_lock_spin(aio_workq_t wq); +static void aio_workq_unlock(aio_workq_t wq); +static lck_mtx_t* aio_workq_mutex(aio_workq_t wq); + +static void aio_work_thread( void ); +static aio_workq_entry *aio_get_some_work( void ); + +static int aio_get_all_queues_count( void ); +static int aio_queue_async_request(proc_t procp, user_addr_t aiocbp, int kindOfIO ); +static int aio_validate( aio_workq_entry *entryp ); +static int aio_increment_total_count(void); +static int aio_decrement_total_count(void); + +static int do_aio_cancel_locked(proc_t p, int fd, user_addr_t aiocbp, int wait_for_completion, boolean_t disable_notification ); +static void do_aio_completion( aio_workq_entry *entryp ); +static int do_aio_fsync( aio_workq_entry *entryp ); +static int do_aio_read( aio_workq_entry *entryp ); +static int do_aio_write( aio_workq_entry *entryp ); +static void do_munge_aiocb_user32_to_user( struct user32_aiocb *my_aiocbp, struct user_aiocb *the_user_aiocbp ); +static void do_munge_aiocb_user64_to_user( struct user64_aiocb *my_aiocbp, struct user_aiocb *the_user_aiocbp ); +static int lio_create_entry(proc_t procp, + user_addr_t aiocbp, + void *group_tag, + aio_workq_entry **entrypp ); +static aio_workq_entry *aio_create_queue_entry(proc_t procp, + user_addr_t aiocbp, + void *group_tag, + int kindOfIO); +static user_addr_t *aio_copy_in_list(proc_t procp, user_addr_t aiocblist, int nent); +static void free_lio_context(aio_lio_context* context); +static void aio_enqueue_work( proc_t procp, aio_workq_entry *entryp, int proc_locked); + +#define ASSERT_AIO_PROC_LOCK_OWNED(p) lck_mtx_assert(aio_proc_mutex((p)), LCK_MTX_ASSERT_OWNED) +#define ASSERT_AIO_WORKQ_LOCK_OWNED(q) lck_mtx_assert(aio_workq_mutex((q)), LCK_MTX_ASSERT_OWNED) +#define ASSERT_AIO_ENTRY_LOCK_OWNED(e) lck_mtx_assert(aio_entry_mutex((e)), LCK_MTX_ASSERT_OWNED) /* * EXTERNAL PROTOTYPES */ /* in ...bsd/kern/sys_generic.c */ -extern int dofileread(vfs_context_t ctx, struct fileproc *fp, - user_addr_t bufp, user_size_t nbyte, - off_t offset, int flags, user_ssize_t *retval ); -extern int dofilewrite(vfs_context_t ctx, struct fileproc *fp, - user_addr_t bufp, user_size_t nbyte, off_t offset, - int flags, user_ssize_t *retval ); +extern int dofileread(vfs_context_t ctx, struct fileproc *fp, + user_addr_t bufp, user_size_t nbyte, + off_t offset, int flags, user_ssize_t *retval ); +extern int dofilewrite(vfs_context_t ctx, struct fileproc *fp, + user_addr_t bufp, user_size_t nbyte, off_t offset, + int flags, user_ssize_t *retval ); +#if DEBUG +static uint32_t lio_contexts_alloced = 0; +#endif /* DEBUG */ /* * aio external global variables. */ -extern int aio_max_requests; /* AIO_MAX - configurable */ +extern int aio_max_requests; /* AIO_MAX - configurable */ extern int aio_max_requests_per_process; /* AIO_PROCESS_MAX - configurable */ -extern int aio_worker_threads; /* AIO_THREAD_COUNT - configurable */ +extern int aio_worker_threads; /* AIO_THREAD_COUNT - configurable */ /* * aio static variables. */ -static aio_anchor_cb aio_anchor; -static lck_mtx_t * aio_lock; -static lck_grp_t * aio_lock_grp; -static lck_attr_t * aio_lock_attr; -static lck_grp_attr_t * aio_lock_grp_attr; -static struct zone *aio_workq_zonep; +static aio_anchor_cb aio_anchor; +static lck_grp_t *aio_proc_lock_grp; +static lck_grp_t *aio_entry_lock_grp; +static lck_grp_t *aio_queue_lock_grp; +static lck_attr_t *aio_lock_attr; +static lck_grp_attr_t *aio_lock_grp_attr; +static struct zone *aio_workq_zonep; +static lck_mtx_t aio_entry_mtx; +static lck_mtx_t aio_proc_mtx; + +static void +aio_entry_lock(__unused aio_workq_entry *entryp) +{ + lck_mtx_lock(&aio_entry_mtx); +} + +static void +aio_entry_lock_spin(__unused aio_workq_entry *entryp) +{ + lck_mtx_lock_spin(&aio_entry_mtx); +} + +static void +aio_entry_unlock(__unused aio_workq_entry *entryp) +{ + lck_mtx_unlock(&aio_entry_mtx); +} + +/* Hash */ +static aio_workq_t +aio_entry_workq(__unused aio_workq_entry *entryp) +{ + return &aio_anchor.aio_async_workqs[0]; +} + +static lck_mtx_t* +aio_entry_mutex(__unused aio_workq_entry *entryp) +{ + return &aio_entry_mtx; +} + +static void +aio_workq_init(aio_workq_t wq) +{ + TAILQ_INIT(&wq->aioq_entries); + wq->aioq_count = 0; + lck_mtx_init(&wq->aioq_mtx, aio_queue_lock_grp, aio_lock_attr); + wq->aioq_waitq = wait_queue_alloc(SYNC_POLICY_FIFO); +} + + +/* + * Can be passed a queue which is locked spin. + */ +static void +aio_workq_remove_entry_locked(aio_workq_t queue, aio_workq_entry *entryp) +{ + ASSERT_AIO_WORKQ_LOCK_OWNED(queue); + + if (entryp->aio_workq_link.tqe_prev == NULL) { + panic("Trying to remove an entry from a work queue, but it is not on a queue\n"); + } + + TAILQ_REMOVE(&queue->aioq_entries, entryp, aio_workq_link); + queue->aioq_count--; + entryp->aio_workq_link.tqe_prev = NULL; /* Not on a workq */ + + if (queue->aioq_count < 0) { + panic("Negative count on a queue.\n"); + } +} + +static void +aio_workq_add_entry_locked(aio_workq_t queue, aio_workq_entry *entryp) +{ + ASSERT_AIO_WORKQ_LOCK_OWNED(queue); + + TAILQ_INSERT_TAIL(&queue->aioq_entries, entryp, aio_workq_link); + if (queue->aioq_count < 0) { + panic("Negative count on a queue.\n"); + } + queue->aioq_count++; +} + +static void +aio_proc_lock(proc_t procp) +{ + lck_mtx_lock(aio_proc_mutex(procp)); +} + +static void +aio_proc_lock_spin(proc_t procp) +{ + lck_mtx_lock_spin(aio_proc_mutex(procp)); +} + +static void +aio_proc_move_done_locked(proc_t procp, aio_workq_entry *entryp) +{ + ASSERT_AIO_PROC_LOCK_OWNED(procp); + + TAILQ_REMOVE(&procp->p_aio_activeq, entryp, aio_proc_link ); + TAILQ_INSERT_TAIL( &procp->p_aio_doneq, entryp, aio_proc_link); + procp->p_aio_active_count--; + OSIncrementAtomic(&aio_anchor.aio_done_count); +} + +static void +aio_proc_remove_done_locked(proc_t procp, aio_workq_entry *entryp) +{ + TAILQ_REMOVE(&procp->p_aio_doneq, entryp, aio_proc_link); + OSDecrementAtomic(&aio_anchor.aio_done_count); + aio_decrement_total_count(); + procp->p_aio_total_count--; +} + +static void +aio_proc_unlock(proc_t procp) +{ + lck_mtx_unlock(aio_proc_mutex(procp)); +} + +static lck_mtx_t* +aio_proc_mutex(proc_t procp) +{ + return &procp->p_mlock; +} + +static void +aio_entry_ref_locked(aio_workq_entry *entryp) +{ + ASSERT_AIO_ENTRY_LOCK_OWNED(entryp); + + if (entryp->aio_refcount < 0) { + panic("AIO workq entry with a negative refcount.\n"); + } + entryp->aio_refcount++; +} + + +/* Return 1 if you've freed it */ +static void +aio_entry_unref_locked(aio_workq_entry *entryp) +{ + ASSERT_AIO_ENTRY_LOCK_OWNED(entryp); + + entryp->aio_refcount--; + if (entryp->aio_refcount < 0) { + panic("AIO workq entry with a negative refcount.\n"); + } +} + +static void +aio_entry_ref(aio_workq_entry *entryp) +{ + aio_entry_lock_spin(entryp); + aio_entry_ref_locked(entryp); + aio_entry_unlock(entryp); +} +static void +aio_entry_unref(aio_workq_entry *entryp) +{ + aio_entry_lock_spin(entryp); + aio_entry_unref_locked(entryp); + + if ((entryp->aio_refcount == 0) && ((entryp->flags & AIO_DO_FREE) != 0)) { + aio_entry_unlock(entryp); + aio_free_request(entryp); + } else { + aio_entry_unlock(entryp); + } + + return; +} + +static void +aio_entry_update_for_cancel(aio_workq_entry *entryp, boolean_t cancelled, int wait_for_completion, boolean_t disable_notification) +{ + aio_entry_lock_spin(entryp); + + if (cancelled) { + aio_entry_ref_locked(entryp); + entryp->errorval = ECANCELED; + entryp->returnval = -1; + } + + if ( wait_for_completion ) { + entryp->flags |= wait_for_completion; /* flag for special completion processing */ + } + + if ( disable_notification ) { + entryp->flags |= AIO_DISABLE; /* Don't want a signal */ + } + + aio_entry_unlock(entryp); +} + +static int +aio_entry_try_workq_remove(aio_workq_entry *entryp) +{ + /* Can only be cancelled if it's still on a work queue */ + if (entryp->aio_workq_link.tqe_prev != NULL) { + aio_workq_t queue; + + /* Will have to check again under the lock */ + queue = aio_entry_workq(entryp); + aio_workq_lock_spin(queue); + if (entryp->aio_workq_link.tqe_prev != NULL) { + aio_workq_remove_entry_locked(queue, entryp); + aio_workq_unlock(queue); + return 1; + } else { + aio_workq_unlock(queue); + } + } + return 0; +} + +static void +aio_workq_lock_spin(aio_workq_t wq) +{ + lck_mtx_lock_spin(aio_workq_mutex(wq)); +} +static void +aio_workq_unlock(aio_workq_t wq) +{ + lck_mtx_unlock(aio_workq_mutex(wq)); +} +static lck_mtx_t* +aio_workq_mutex(aio_workq_t wq) +{ + return &wq->aioq_mtx; +} /* * aio_cancel - attempt to cancel one or more async IO requests currently @@ -225,7 +500,6 @@ static struct zone *aio_workq_zonep; * is NULL then all outstanding async IO request for the given file * descriptor are cancelled (if possible). */ - int aio_cancel(proc_t p, struct aio_cancel_args *uap, int *retval ) { @@ -236,10 +510,7 @@ aio_cancel(proc_t p, struct aio_cancel_args *uap, int *retval ) (int)p, (int)uap->aiocbp, 0, 0, 0 ); /* quick check to see if there are any async IO requests queued up */ - AIO_LOCK; - result = aio_get_all_queues_count( ); - AIO_UNLOCK; - if ( result < 1 ) { + if (aio_get_all_queues_count() < 1) { result = 0; *retval = AIO_ALLDONE; goto ExitRoutine; @@ -247,14 +518,20 @@ aio_cancel(proc_t p, struct aio_cancel_args *uap, int *retval ) *retval = -1; if ( uap->aiocbp != USER_ADDR_NULL ) { - if ( !IS_64BIT_PROCESS(p) ) { - struct aiocb aiocb32; + if ( proc_is64bit(p) ) { + struct user64_aiocb aiocb64; + + result = copyin( uap->aiocbp, &aiocb64, sizeof(aiocb64) ); + if (result == 0 ) + do_munge_aiocb_user64_to_user(&aiocb64, &my_aiocb); + + } else { + struct user32_aiocb aiocb32; result = copyin( uap->aiocbp, &aiocb32, sizeof(aiocb32) ); if ( result == 0 ) - do_munge_aiocb( &aiocb32, &my_aiocb ); - } else - result = copyin( uap->aiocbp, &my_aiocb, sizeof(my_aiocb) ); + do_munge_aiocb_user32_to_user( &aiocb32, &my_aiocb ); + } if ( result != 0 ) { result = EAGAIN; @@ -270,7 +547,11 @@ aio_cancel(proc_t p, struct aio_cancel_args *uap, int *retval ) goto ExitRoutine; } } - result = do_aio_cancel( p, uap->fd, uap->aiocbp, FALSE, FALSE ); + + aio_proc_lock(p); + result = do_aio_cancel_locked( p, uap->fd, uap->aiocbp, 0, FALSE ); + ASSERT_AIO_PROC_LOCK_OWNED(p); + aio_proc_unlock(p); if ( result != -1 ) { *retval = result; @@ -294,24 +575,23 @@ aio_cancel(proc_t p, struct aio_cancel_args *uap, int *retval ) * a file descriptor that is closing. * THIS MAY BLOCK. */ - __private_extern__ void _aio_close(proc_t p, int fd ) { - int error, count; + int error; /* quick check to see if there are any async IO requests queued up */ - AIO_LOCK; - count = aio_get_all_queues_count( ); - AIO_UNLOCK; - if ( count < 1 ) + if (aio_get_all_queues_count() < 1) { return; + } KERNEL_DEBUG( (BSDDBG_CODE(DBG_BSD_AIO, AIO_close)) | DBG_FUNC_START, (int)p, fd, 0, 0, 0 ); /* cancel all async IO requests on our todo queues for this file descriptor */ - error = do_aio_cancel( p, fd, 0, TRUE, FALSE ); + aio_proc_lock(p); + error = do_aio_cancel_locked( p, fd, 0, AIO_CLOSE_WAIT, FALSE ); + ASSERT_AIO_PROC_LOCK_OWNED(p); if ( error == AIO_NOTCANCELED ) { /* * AIO_NOTCANCELED is returned when we find an aio request for this process @@ -325,9 +605,15 @@ _aio_close(proc_t p, int fd ) KERNEL_DEBUG( (BSDDBG_CODE(DBG_BSD_AIO, AIO_close_sleep)) | DBG_FUNC_NONE, (int)p, fd, 0, 0, 0 ); - tsleep( &p->AIO_CLEANUP_SLEEP_CHAN, PRIBIO, "aio_close", 0 ); + while (aio_proc_active_requests_for_file(p, fd) > 0) { + msleep(&p->AIO_CLEANUP_SLEEP_CHAN, aio_proc_mutex(p), PRIBIO | PDROP, "aio_close", 0 ); + } + + } else { + aio_proc_unlock(p); } + KERNEL_DEBUG( (BSDDBG_CODE(DBG_BSD_AIO, AIO_close)) | DBG_FUNC_END, (int)p, fd, 0, 0, 0 ); @@ -342,7 +628,6 @@ _aio_close(proc_t p, int fd ) * value that would be set by the corresponding IO request (read, wrtie, * fdatasync, or sync). */ - int aio_error(proc_t p, struct aio_error_args *uap, int *retval ) { @@ -352,19 +637,22 @@ aio_error(proc_t p, struct aio_error_args *uap, int *retval ) KERNEL_DEBUG( (BSDDBG_CODE(DBG_BSD_AIO, AIO_error)) | DBG_FUNC_START, (int)p, (int)uap->aiocbp, 0, 0, 0 ); - AIO_LOCK; - - /* quick check to see if there are any async IO requests queued up */ - if ( aio_get_all_queues_count( ) < 1 ) { - error = EINVAL; - goto ExitRoutine; + /* see if there are any aios to check */ + if (aio_get_all_queues_count() < 1) { + return EINVAL; } + aio_proc_lock(p); + /* look for a match on our queue of async IO requests that have completed */ - TAILQ_FOREACH( entryp, &p->aio_doneq, aio_workq_link ) { + TAILQ_FOREACH( entryp, &p->p_aio_doneq, aio_proc_link) { if ( entryp->uaiocbp == uap->aiocbp ) { + ASSERT_AIO_FROM_PROC(entryp, p); + + aio_entry_lock_spin(entryp); *retval = entryp->errorval; error = 0; + aio_entry_unlock(entryp); KERNEL_DEBUG( (BSDDBG_CODE(DBG_BSD_AIO, AIO_error_val)) | DBG_FUNC_NONE, (int)p, (int)uap->aiocbp, *retval, 0, 0 ); goto ExitRoutine; @@ -372,8 +660,9 @@ aio_error(proc_t p, struct aio_error_args *uap, int *retval ) } /* look for a match on our queue of active async IO requests */ - TAILQ_FOREACH( entryp, &p->aio_activeq, aio_workq_link ) { + TAILQ_FOREACH( entryp, &p->p_aio_activeq, aio_proc_link) { if ( entryp->uaiocbp == uap->aiocbp ) { + ASSERT_AIO_FROM_PROC(entryp, p); *retval = EINPROGRESS; error = 0; KERNEL_DEBUG( (BSDDBG_CODE(DBG_BSD_AIO, AIO_error_activeq)) | DBG_FUNC_NONE, @@ -381,23 +670,13 @@ aio_error(proc_t p, struct aio_error_args *uap, int *retval ) goto ExitRoutine; } } - - /* look for a match on our queue of todo work */ - TAILQ_FOREACH( entryp, &aio_anchor.aio_async_workq, aio_workq_link ) { - if ( p == entryp->procp && entryp->uaiocbp == uap->aiocbp ) { - *retval = EINPROGRESS; - error = 0; - KERNEL_DEBUG( (BSDDBG_CODE(DBG_BSD_AIO, AIO_error_workq)) | DBG_FUNC_NONE, - (int)p, (int)uap->aiocbp, *retval, 0, 0 ); - goto ExitRoutine; - } - } + error = EINVAL; ExitRoutine: KERNEL_DEBUG( (BSDDBG_CODE(DBG_BSD_AIO, AIO_error)) | DBG_FUNC_END, (int)p, (int)uap->aiocbp, error, 0, 0 ); - AIO_UNLOCK; + aio_proc_unlock(p); return( error ); @@ -411,7 +690,6 @@ aio_error(proc_t p, struct aio_error_args *uap, int *retval ) * NOTE - we do not support op O_DSYNC at this point since we do not support the * fdatasync() call. */ - int aio_fsync(proc_t p, struct aio_fsync_args *uap, int *retval ) { @@ -425,10 +703,8 @@ aio_fsync(proc_t p, struct aio_fsync_args *uap, int *retval ) /* 0 := O_SYNC for binary backward compatibility with Panther */ if (uap->op == O_SYNC || uap->op == 0) fsync_kind = AIO_FSYNC; -#if 0 // we don't support fdatasync() call yet else if ( uap->op == O_DSYNC ) fsync_kind = AIO_DSYNC; -#endif else { *retval = -1; error = EINVAL; @@ -452,7 +728,6 @@ aio_fsync(proc_t p, struct aio_fsync_args *uap, int *retval ) * file descriptor (uap->aiocbp->aio_fildes) into the buffer * (uap->aiocbp->aio_buf). */ - int aio_read(proc_t p, struct aio_read_args *uap, int *retval ) { @@ -478,54 +753,57 @@ aio_read(proc_t p, struct aio_read_args *uap, int *retval ) /* * aio_return - return the return status associated with the async IO * request referred to by uap->aiocbp. The return status is the value - * that would be returned by corresponding IO request (read, wrtie, + * that would be returned by corresponding IO request (read, write, * fdatasync, or sync). This is where we release kernel resources * held for async IO call associated with the given aiocb pointer. */ - int aio_return(proc_t p, struct aio_return_args *uap, user_ssize_t *retval ) { aio_workq_entry *entryp; int error; - boolean_t lock_held; + boolean_t proc_lock_held = FALSE; KERNEL_DEBUG( (BSDDBG_CODE(DBG_BSD_AIO, AIO_return)) | DBG_FUNC_START, (int)p, (int)uap->aiocbp, 0, 0, 0 ); - AIO_LOCK; - lock_held = TRUE; - *retval = 0; - - /* quick check to see if there are any async IO requests queued up */ - if ( aio_get_all_queues_count( ) < 1 ) { + /* See if there are any entries to check */ + if (aio_get_all_queues_count() < 1) { error = EINVAL; goto ExitRoutine; } + aio_proc_lock(p); + proc_lock_held = TRUE; + *retval = 0; + /* look for a match on our queue of async IO requests that have completed */ - TAILQ_FOREACH( entryp, &p->aio_doneq, aio_workq_link ) { + TAILQ_FOREACH( entryp, &p->p_aio_doneq, aio_proc_link) { + ASSERT_AIO_FROM_PROC(entryp, p); if ( entryp->uaiocbp == uap->aiocbp ) { - TAILQ_REMOVE( &p->aio_doneq, entryp, aio_workq_link ); - aio_anchor.aio_done_count--; - p->aio_done_count--; + /* Done and valid for aio_return(), pull it off the list */ + aio_proc_remove_done_locked(p, entryp); + /* Drop the proc lock, but keep the entry locked */ + aio_entry_lock(entryp); + aio_proc_unlock(p); + proc_lock_held = FALSE; + *retval = entryp->returnval; + error = 0; - /* we cannot free requests that are still completing */ - if ( (entryp->flags & AIO_COMPLETION) == 0 ) { - vm_map_t my_map; - - my_map = entryp->aio_map; - entryp->aio_map = VM_MAP_NULL; - AIO_UNLOCK; - lock_held = FALSE; - aio_free_request( entryp, my_map ); + /* No references and off all lists, safe to free */ + if (entryp->aio_refcount == 0) { + aio_entry_unlock(entryp); + aio_free_request(entryp); } - else - /* tell completion code to free this request */ + else { + /* Whoever has the refcount will have to free it */ entryp->flags |= AIO_DO_FREE; - error = 0; + aio_entry_unlock(entryp); + } + + KERNEL_DEBUG( (BSDDBG_CODE(DBG_BSD_AIO, AIO_return_val)) | DBG_FUNC_NONE, (int)p, (int)uap->aiocbp, *retval, 0, 0 ); goto ExitRoutine; @@ -533,7 +811,8 @@ aio_return(proc_t p, struct aio_return_args *uap, user_ssize_t *retval ) } /* look for a match on our queue of active async IO requests */ - TAILQ_FOREACH( entryp, &p->aio_activeq, aio_workq_link ) { + TAILQ_FOREACH( entryp, &p->p_aio_activeq, aio_proc_link) { + ASSERT_AIO_FROM_PROC(entryp, p); if ( entryp->uaiocbp == uap->aiocbp ) { error = EINPROGRESS; KERNEL_DEBUG( (BSDDBG_CODE(DBG_BSD_AIO, AIO_return_activeq)) | DBG_FUNC_NONE, @@ -542,20 +821,11 @@ aio_return(proc_t p, struct aio_return_args *uap, user_ssize_t *retval ) } } - /* look for a match on our queue of todo work */ - TAILQ_FOREACH( entryp, &aio_anchor.aio_async_workq, aio_workq_link ) { - if ( p == entryp->procp && entryp->uaiocbp == uap->aiocbp ) { - error = EINPROGRESS; - KERNEL_DEBUG( (BSDDBG_CODE(DBG_BSD_AIO, AIO_return_workq)) | DBG_FUNC_NONE, - (int)p, (int)uap->aiocbp, *retval, 0, 0 ); - goto ExitRoutine; - } - } error = EINVAL; ExitRoutine: - if ( lock_held ) - AIO_UNLOCK; + if (proc_lock_held) + aio_proc_unlock(p); KERNEL_DEBUG( (BSDDBG_CODE(DBG_BSD_AIO, AIO_return)) | DBG_FUNC_END, (int)p, (int)uap->aiocbp, error, 0, 0 ); @@ -571,7 +841,6 @@ aio_return(proc_t p, struct aio_return_args *uap, user_ssize_t *retval ) * for cancelled or active aio requests that complete. * This routine MAY block! */ - __private_extern__ void _aio_exec(proc_t p ) { @@ -595,29 +864,29 @@ _aio_exec(proc_t p ) * we can and wait for those already active. We also disable signaling * for cancelled or active aio requests that complete. This routine MAY block! */ - __private_extern__ void _aio_exit(proc_t p ) { - int error, count; + int error; aio_workq_entry *entryp; + /* quick check to see if there are any async IO requests queued up */ - AIO_LOCK; - count = aio_get_all_queues_count( ); - AIO_UNLOCK; - if ( count < 1 ) { + if (aio_get_all_queues_count() < 1) { return; } KERNEL_DEBUG( (BSDDBG_CODE(DBG_BSD_AIO, AIO_exit)) | DBG_FUNC_START, (int)p, 0, 0, 0, 0 ); + aio_proc_lock(p); + /* * cancel async IO requests on the todo work queue and wait for those * already active to complete. */ - error = do_aio_cancel( p, 0, 0, TRUE, TRUE ); + error = do_aio_cancel_locked( p, 0, 0, AIO_EXIT_WAIT, TRUE ); + ASSERT_AIO_PROC_LOCK_OWNED(p); if ( error == AIO_NOTCANCELED ) { /* * AIO_NOTCANCELED is returned when we find an aio request for this process @@ -631,52 +900,69 @@ _aio_exit(proc_t p ) KERNEL_DEBUG( (BSDDBG_CODE(DBG_BSD_AIO, AIO_exit_sleep)) | DBG_FUNC_NONE, (int)p, 0, 0, 0, 0 ); - tsleep( &p->AIO_CLEANUP_SLEEP_CHAN, PRIBIO, "aio_exit", 0 ); + while (p->p_aio_active_count != 0) { + msleep(&p->AIO_CLEANUP_SLEEP_CHAN, aio_proc_mutex(p), PRIBIO, "aio_exit", 0 ); + } + } + + if (p->p_aio_active_count != 0) { + panic("Exiting process has %d active AIOs after cancellation has completed.\n", p->p_aio_active_count); } /* release all aio resources used by this process */ - AIO_LOCK; - entryp = TAILQ_FIRST( &p->aio_doneq ); + entryp = TAILQ_FIRST( &p->p_aio_doneq ); while ( entryp != NULL ) { + ASSERT_AIO_FROM_PROC(entryp, p); aio_workq_entry *next_entryp; - next_entryp = TAILQ_NEXT( entryp, aio_workq_link ); - TAILQ_REMOVE( &p->aio_doneq, entryp, aio_workq_link ); - aio_anchor.aio_done_count--; - p->aio_done_count--; + next_entryp = TAILQ_NEXT( entryp, aio_proc_link); + aio_proc_remove_done_locked(p, entryp); /* we cannot free requests that are still completing */ - if ( (entryp->flags & AIO_COMPLETION) == 0 ) { - vm_map_t my_map; - - my_map = entryp->aio_map; - entryp->aio_map = VM_MAP_NULL; - AIO_UNLOCK; - aio_free_request( entryp, my_map ); + aio_entry_lock_spin(entryp); + if (entryp->aio_refcount == 0) { + aio_proc_unlock(p); + aio_entry_unlock(entryp); + aio_free_request(entryp); /* need to start over since aio_doneq may have been */ /* changed while we were away. */ - AIO_LOCK; - entryp = TAILQ_FIRST( &p->aio_doneq ); + aio_proc_lock(p); + entryp = TAILQ_FIRST( &p->p_aio_doneq ); continue; } - else - /* tell completion code to free this request */ + else { + /* whoever has the reference will have to do the free */ entryp->flags |= AIO_DO_FREE; + } + + aio_entry_unlock(entryp); entryp = next_entryp; } - AIO_UNLOCK; - + + aio_proc_unlock(p); + KERNEL_DEBUG( (BSDDBG_CODE(DBG_BSD_AIO, AIO_exit)) | DBG_FUNC_END, (int)p, 0, 0, 0, 0 ); - return; } /* _aio_exit */ +static boolean_t +should_cancel(aio_workq_entry *entryp, user_addr_t aiocbp, int fd) +{ + if ( (aiocbp == USER_ADDR_NULL && fd == 0) || + (aiocbp != USER_ADDR_NULL && entryp->uaiocbp == aiocbp) || + (aiocbp == USER_ADDR_NULL && fd == entryp->aiocb.aio_fildes) ) { + return TRUE; + } + + return FALSE; +} + /* - * do_aio_cancel - cancel async IO requests (if possible). We get called by + * do_aio_cancel_locked - cancel async IO requests (if possible). We get called by * aio_cancel, close, and at exit. * There are three modes of operation: 1) cancel all async IOs for a process - * fd is 0 and aiocbp is NULL 2) cancel all async IOs for file descriptor - fd @@ -688,167 +974,113 @@ _aio_exit(proc_t p ) * were already complete. * WARNING - do not deference aiocbp in this routine, it may point to user * land data that has not been copied in (when called from aio_cancel() ) + * + * Called with proc locked, and returns the same way. */ - static int -do_aio_cancel(proc_t p, int fd, user_addr_t aiocbp, - boolean_t wait_for_completion, boolean_t disable_notification ) +do_aio_cancel_locked(proc_t p, int fd, user_addr_t aiocbp, + int wait_for_completion, boolean_t disable_notification ) { + ASSERT_AIO_PROC_LOCK_OWNED(p); + aio_workq_entry *entryp; int result; result = -1; /* look for a match on our queue of async todo work. */ - AIO_LOCK; - entryp = TAILQ_FIRST( &aio_anchor.aio_async_workq ); + entryp = TAILQ_FIRST(&p->p_aio_activeq); while ( entryp != NULL ) { + ASSERT_AIO_FROM_PROC(entryp, p); aio_workq_entry *next_entryp; - - next_entryp = TAILQ_NEXT( entryp, aio_workq_link ); - if ( p == entryp->procp ) { - if ( (aiocbp == USER_ADDR_NULL && fd == 0) || - (aiocbp != USER_ADDR_NULL && entryp->uaiocbp == aiocbp) || - (aiocbp == USER_ADDR_NULL && fd == entryp->aiocb.aio_fildes) ) { - /* we found a match so we remove the entry from the */ - /* todo work queue and place it on the done queue */ - TAILQ_REMOVE( &aio_anchor.aio_async_workq, entryp, aio_workq_link ); - aio_anchor.aio_async_workq_count--; - entryp->errorval = ECANCELED; - entryp->returnval = -1; - if ( disable_notification ) - entryp->flags |= AIO_DISABLE; /* flag for special completion processing */ - result = AIO_CANCELED; - - KERNEL_DEBUG( (BSDDBG_CODE(DBG_BSD_AIO, AIO_cancel_async_workq)) | DBG_FUNC_NONE, - (int)entryp->procp, (int)entryp->uaiocbp, fd, 0, 0 ); - - TAILQ_INSERT_TAIL( &p->aio_doneq, entryp, aio_workq_link ); - aio_anchor.aio_done_count++; - p->aio_done_count++; - entryp->flags |= AIO_COMPLETION; - AIO_UNLOCK; - - /* do completion processing for this request */ - do_aio_completion( entryp ); - - AIO_LOCK; - entryp->flags &= ~AIO_COMPLETION; - if ( (entryp->flags & AIO_DO_FREE) != 0 ) { - vm_map_t my_map; - - my_map = entryp->aio_map; - entryp->aio_map = VM_MAP_NULL; - AIO_UNLOCK; - aio_free_request( entryp, my_map ); - } - else - AIO_UNLOCK; - if ( aiocbp != USER_ADDR_NULL ) { - return( result ); - } - - /* need to start over since aio_async_workq may have been */ - /* changed while we were away doing completion processing. */ - AIO_LOCK; - entryp = TAILQ_FIRST( &aio_anchor.aio_async_workq ); - continue; - } + next_entryp = TAILQ_NEXT( entryp, aio_proc_link); + if (!should_cancel(entryp, aiocbp, fd)) { + entryp = next_entryp; + continue; } - entryp = next_entryp; - } /* while... */ - - /* - * look for a match on our queue of synchronous todo work. This will - * be a rare occurrence but could happen if a process is terminated while - * processing a lio_listio call. - */ - entryp = TAILQ_FIRST( &aio_anchor.lio_sync_workq ); - while ( entryp != NULL ) { - aio_workq_entry *next_entryp; - - next_entryp = TAILQ_NEXT( entryp, aio_workq_link ); - if ( p == entryp->procp ) { - if ( (aiocbp == USER_ADDR_NULL && fd == 0) || - (aiocbp != USER_ADDR_NULL && entryp->uaiocbp == aiocbp) || - (aiocbp == USER_ADDR_NULL && fd == entryp->aiocb.aio_fildes) ) { - /* we found a match so we remove the entry from the */ - /* todo work queue and place it on the done queue */ - TAILQ_REMOVE( &aio_anchor.lio_sync_workq, entryp, aio_workq_link ); - aio_anchor.lio_sync_workq_count--; - entryp->errorval = ECANCELED; - entryp->returnval = -1; - if ( disable_notification ) - entryp->flags |= AIO_DISABLE; /* flag for special completion processing */ - result = AIO_CANCELED; - - KERNEL_DEBUG( (BSDDBG_CODE(DBG_BSD_AIO, AIO_cancel_sync_workq)) | DBG_FUNC_NONE, - (int)entryp->procp, (int)entryp->uaiocbp, fd, 0, 0 ); - - TAILQ_INSERT_TAIL( &p->aio_doneq, entryp, aio_workq_link ); - aio_anchor.aio_done_count++; - p->aio_done_count++; - if ( aiocbp != USER_ADDR_NULL ) { - AIO_UNLOCK; - return( result ); - } + + /* Can only be cancelled if it's still on a work queue */ + if (aio_entry_try_workq_remove(entryp) != 0) { + /* Have removed from workq. Update entry state and take a ref */ + aio_entry_update_for_cancel(entryp, TRUE, 0, disable_notification); + + /* Put on the proc done queue and update counts, then unlock the proc */ + aio_proc_move_done_locked(p, entryp); + aio_proc_unlock(p); + + /* Now it's officially cancelled. Do the completion */ + result = AIO_CANCELED; + KERNEL_DEBUG( (BSDDBG_CODE(DBG_BSD_AIO, AIO_cancel_async_workq)) | DBG_FUNC_NONE, + (int)entryp->procp, (int)entryp->uaiocbp, fd, 0, 0 ); + do_aio_completion(entryp); + + /* This will free if the aio_return() has already happened ... */ + aio_entry_unref(entryp); + aio_proc_lock(p); + + if ( aiocbp != USER_ADDR_NULL ) { + return( result ); } - } - entryp = next_entryp; - } /* while... */ - /* - * look for a match on our queue of active async IO requests and - * return AIO_NOTCANCELED result. - */ - TAILQ_FOREACH( entryp, &p->aio_activeq, aio_workq_link ) { - if ( (aiocbp == USER_ADDR_NULL && fd == 0) || - (aiocbp != USER_ADDR_NULL && entryp->uaiocbp == aiocbp) || - (aiocbp == USER_ADDR_NULL && fd == entryp->aiocb.aio_fildes) ) { + /* + * Restart from the head of the proc active queue since it + * may have been changed while we were away doing completion + * processing. + * + * Note that if we found an uncancellable AIO before, we will + * either find it again or discover that it's been completed, + * so resetting the result will not cause us to return success + * despite outstanding AIOs. + */ + entryp = TAILQ_FIRST(&p->p_aio_activeq); + result = -1; /* As if beginning anew */ + } else { + /* + * It's been taken off the active queue already, i.e. is in flight. + * All we can do is ask for notification. + */ result = AIO_NOTCANCELED; KERNEL_DEBUG( (BSDDBG_CODE(DBG_BSD_AIO, AIO_cancel_activeq)) | DBG_FUNC_NONE, - (int)entryp->procp, (int)entryp->uaiocbp, fd, 0, 0 ); + (int)entryp->procp, (int)entryp->uaiocbp, fd, 0, 0 ); + + /* Mark for waiting and such; will not take a ref if "cancelled" arg is FALSE */ + aio_entry_update_for_cancel(entryp, FALSE, wait_for_completion, disable_notification); - if ( wait_for_completion ) - entryp->flags |= AIO_WAITING; /* flag for special completion processing */ - if ( disable_notification ) - entryp->flags |= AIO_DISABLE; /* flag for special completion processing */ if ( aiocbp != USER_ADDR_NULL ) { - AIO_UNLOCK; return( result ); } + entryp = next_entryp; } - } - + } /* while... */ + /* * if we didn't find any matches on the todo or active queues then look for a * match on our queue of async IO requests that have completed and if found * return AIO_ALLDONE result. + * + * Proc AIO lock is still held. */ if ( result == -1 ) { - TAILQ_FOREACH( entryp, &p->aio_doneq, aio_workq_link ) { - if ( (aiocbp == USER_ADDR_NULL && fd == 0) || - (aiocbp != USER_ADDR_NULL && entryp->uaiocbp == aiocbp) || - (aiocbp == USER_ADDR_NULL && fd == entryp->aiocb.aio_fildes) ) { + TAILQ_FOREACH(entryp, &p->p_aio_doneq, aio_proc_link) { + ASSERT_AIO_FROM_PROC(entryp, p); + if (should_cancel(entryp, aiocbp, fd)) { result = AIO_ALLDONE; - KERNEL_DEBUG( (BSDDBG_CODE(DBG_BSD_AIO, AIO_cancel_doneq)) | DBG_FUNC_NONE, - (int)entryp->procp, (int)entryp->uaiocbp, fd, 0, 0 ); + (int)entryp->procp, (int)entryp->uaiocbp, fd, 0, 0 ); if ( aiocbp != USER_ADDR_NULL ) { - AIO_UNLOCK; return( result ); } } } } - AIO_UNLOCK; return( result ); -} /* do_aio_cancel */ +} + /* do_aio_cancel_locked */ /* @@ -885,10 +1117,7 @@ aio_suspend_nocancel(proc_t p, struct aio_suspend_nocancel_args *uap, int *retva abstime = 0; aiocbpp = NULL; - /* quick check to see if there are any async IO requests queued up */ - AIO_LOCK; - count = aio_get_all_queues_count( ); - AIO_UNLOCK; + count = aio_get_all_queues_count( ); if ( count < 1 ) { error = EINVAL; goto ExitThisRoutine; @@ -901,10 +1130,15 @@ aio_suspend_nocancel(proc_t p, struct aio_suspend_nocancel_args *uap, int *retva if ( uap->timeoutp != USER_ADDR_NULL ) { if ( proc_is64bit(p) ) { - error = copyin( uap->timeoutp, &ts, sizeof(ts) ); + struct user64_timespec temp; + error = copyin( uap->timeoutp, &temp, sizeof(temp) ); + if ( error == 0 ) { + ts.tv_sec = temp.tv_sec; + ts.tv_nsec = temp.tv_nsec; + } } else { - struct timespec temp; + struct user32_timespec temp; error = copyin( uap->timeoutp, &temp, sizeof(temp) ); if ( error == 0 ) { ts.tv_sec = temp.tv_sec; @@ -926,36 +1160,15 @@ aio_suspend_nocancel(proc_t p, struct aio_suspend_nocancel_args *uap, int *retva clock_absolutetime_interval_to_deadline( abstime, &abstime ); } - /* we reserve enough space for largest possible pointer size */ - MALLOC( aiocbpp, user_addr_t *, (uap->nent * sizeof(user_addr_t)), M_TEMP, M_WAITOK ); + aiocbpp = aio_copy_in_list(p, uap->aiocblist, uap->nent); if ( aiocbpp == NULL ) { error = EAGAIN; goto ExitThisRoutine; } - /* copyin our aiocb pointers from list */ - error = copyin( uap->aiocblist, aiocbpp, - proc_is64bit(p) ? (uap->nent * sizeof(user_addr_t)) - : (uap->nent * sizeof(uintptr_t)) ); - if ( error != 0 ) { - error = EAGAIN; - goto ExitThisRoutine; - } - - /* we depend on a list of user_addr_t's so we need to munge and expand */ - /* when these pointers came from a 32-bit process */ - if ( !proc_is64bit(p) && sizeof(uintptr_t) < sizeof(user_addr_t) ) { - /* position to the last entry and work back from there */ - uintptr_t *my_ptrp = ((uintptr_t *)aiocbpp) + (uap->nent - 1); - user_addr_t *my_addrp = aiocbpp + (uap->nent - 1); - for (i = 0; i < uap->nent; i++, my_ptrp--, my_addrp--) { - *my_addrp = (user_addr_t) (*my_ptrp); - } - } - /* check list of aio requests to see if any have completed */ check_for_our_aiocbp: - AIO_LOCK; + aio_proc_lock_spin(p); for ( i = 0; i < uap->nent; i++ ) { user_addr_t aiocbp; @@ -965,11 +1178,12 @@ aio_suspend_nocancel(proc_t p, struct aio_suspend_nocancel_args *uap, int *retva continue; /* return immediately if any aio request in the list is done */ - TAILQ_FOREACH( entryp, &p->aio_doneq, aio_workq_link ) { + TAILQ_FOREACH( entryp, &p->p_aio_doneq, aio_proc_link) { + ASSERT_AIO_FROM_PROC(entryp, p); if ( entryp->uaiocbp == aiocbp ) { + aio_proc_unlock(p); *retval = 0; error = 0; - AIO_UNLOCK; goto ExitThisRoutine; } } @@ -984,11 +1198,8 @@ aio_suspend_nocancel(proc_t p, struct aio_suspend_nocancel_args *uap, int *retva * interrupts us. If an async IO completes before a signal fires or our * timeout expires, we get a wakeup call from aio_work_thread(). */ - assert_wait_deadline( (event_t) &p->AIO_SUSPEND_SLEEP_CHAN, THREAD_ABORTSAFE, abstime ); - AIO_UNLOCK; - - error = thread_block( THREAD_CONTINUE_NULL ); + error = msleep1(&p->AIO_SUSPEND_SLEEP_CHAN, aio_proc_mutex(p), PCATCH | PWAIT | PDROP, "aio_suspend", abstime); /* XXX better priority? */ if ( error == THREAD_AWAKENED ) { /* * got our wakeup call from aio_work_thread(). @@ -1048,28 +1259,217 @@ aio_write(proc_t p, struct aio_write_args *uap, int *retval ) } /* aio_write */ -/* - * lio_listio - initiate a list of IO requests. We process the list of aiocbs - * either synchronously (mode == LIO_WAIT) or asynchronously (mode == LIO_NOWAIT). - * The caller gets error and return status for each aiocb in the list via aio_error - * and aio_return. We must keep completed requests until released by the - * aio_return call. - */ - -int -lio_listio(proc_t p, struct lio_listio_args *uap, int *retval ) +static user_addr_t * +aio_copy_in_list(proc_t procp, user_addr_t aiocblist, int nent) { - int i; - int call_result; - int result; - long group_tag; - aio_workq_entry * *entryp_listp; - user_addr_t *aiocbpp; + user_addr_t *aiocbpp; + int i, result; - KERNEL_DEBUG( (BSDDBG_CODE(DBG_BSD_AIO, AIO_listio)) | DBG_FUNC_START, - (int)p, uap->nent, uap->mode, 0, 0 ); + /* we reserve enough space for largest possible pointer size */ + MALLOC( aiocbpp, user_addr_t *, (nent * sizeof(user_addr_t)), M_TEMP, M_WAITOK ); + if ( aiocbpp == NULL ) + goto err; + + /* copyin our aiocb pointers from list */ + result = copyin( aiocblist, aiocbpp, + proc_is64bit(procp) ? (nent * sizeof(user64_addr_t)) + : (nent * sizeof(user32_addr_t)) ); + if ( result) { + FREE( aiocbpp, M_TEMP ); + aiocbpp = NULL; + goto err; + } + + /* + * We depend on a list of user_addr_t's so we need to + * munge and expand when these pointers came from a + * 32-bit process + */ + if ( !proc_is64bit(procp) ) { + /* copy from last to first to deal with overlap */ + user32_addr_t *my_ptrp = ((user32_addr_t *)aiocbpp) + (nent - 1); + user_addr_t *my_addrp = aiocbpp + (nent - 1); + + for (i = 0; i < nent; i++, my_ptrp--, my_addrp--) { + *my_addrp = (user_addr_t) (*my_ptrp); + } + } + +err: + return (aiocbpp); +} + + +static int +aio_copy_in_sigev(proc_t procp, user_addr_t sigp, struct user_sigevent *sigev) +{ + int result = 0; + + if (sigp == USER_ADDR_NULL) + goto out; + + /* + * We need to munge aio_sigevent since it contains pointers. + * Since we do not know if sigev_value is an int or a ptr we do + * NOT cast the ptr to a user_addr_t. This means if we send + * this info back to user space we need to remember sigev_value + * was not expanded for the 32-bit case. + * + * Notes: This does NOT affect us since we don't support + * sigev_value yet in the aio context. + */ + if ( proc_is64bit(procp) ) { + struct user64_sigevent sigevent64; + + result = copyin( sigp, &sigevent64, sizeof(sigevent64) ); + if ( result == 0 ) { + sigev->sigev_notify = sigevent64.sigev_notify; + sigev->sigev_signo = sigevent64.sigev_signo; + sigev->sigev_value.size_equivalent.sival_int = sigevent64.sigev_value.size_equivalent.sival_int; + sigev->sigev_notify_function = sigevent64.sigev_notify_function; + sigev->sigev_notify_attributes = sigevent64.sigev_notify_attributes; + } + + } else { + struct user32_sigevent sigevent32; + + result = copyin( sigp, &sigevent32, sizeof(sigevent32) ); + if ( result == 0 ) { + sigev->sigev_notify = sigevent32.sigev_notify; + sigev->sigev_signo = sigevent32.sigev_signo; + sigev->sigev_value.size_equivalent.sival_int = sigevent32.sigev_value.sival_int; + sigev->sigev_notify_function = CAST_USER_ADDR_T(sigevent32.sigev_notify_function); + sigev->sigev_notify_attributes = CAST_USER_ADDR_T(sigevent32.sigev_notify_attributes); + } + } + + if ( result != 0 ) { + result = EAGAIN; + } + +out: + return (result); +} + +/* + * aio_enqueue_work + * + * Queue up the entry on the aio asynchronous work queue in priority order + * based on the relative priority of the request. We calculate the relative + * priority using the nice value of the caller and the value + * + * Parameters: procp Process queueing the I/O + * entryp The work queue entry being queued + * + * Returns: (void) No failure modes + * + * Notes: This function is used for both lio_listio and aio + * + * XXX: At some point, we may have to consider thread priority + * rather than process priority, but we don't maintain the + * adjusted priority for threads the POSIX way. + * + * + * Called with proc locked. + */ +static void +aio_enqueue_work( proc_t procp, aio_workq_entry *entryp, int proc_locked) +{ +#if 0 + aio_workq_entry *my_entryp; /* used for insertion sort */ +#endif /* 0 */ + aio_workq_t queue = aio_entry_workq(entryp); + + if (proc_locked == 0) { + aio_proc_lock(procp); + } + + ASSERT_AIO_PROC_LOCK_OWNED(procp); + + /* Onto proc queue */ + TAILQ_INSERT_TAIL(&procp->p_aio_activeq, entryp, aio_proc_link); + procp->p_aio_active_count++; + procp->p_aio_total_count++; + + /* And work queue */ + aio_workq_lock_spin(queue); + aio_workq_add_entry_locked(queue, entryp); + wait_queue_wakeup_one(queue->aioq_waitq, queue, THREAD_AWAKENED); + aio_workq_unlock(queue); + + if (proc_locked == 0) { + aio_proc_unlock(procp); + } + +#if 0 + /* + * Procedure: + * + * (1) The nice value is in the range PRIO_MIN..PRIO_MAX [-20..20] + * (2) The normalized nice value is in the range 0..((2 * NZERO) - 1) + * which is [0..39], with 0 not being used. In nice values, the + * lower the nice value, the higher the priority. + * (3) The normalized scheduling prioritiy is the highest nice value + * minus the current nice value. In I/O scheduling priority, the + * higher the value the lower the priority, so it is the inverse + * of the nice value (the higher the number, the higher the I/O + * priority). + * (4) From the normalized scheduling priority, we subtract the + * request priority to get the request priority value number; + * this means that requests are only capable of depressing their + * priority relative to other requests, + */ + entryp->priority = (((2 * NZERO) - 1) - procp->p_nice); + + /* only premit depressing the priority */ + if (entryp->aiocb.aio_reqprio < 0) + entryp->aiocb.aio_reqprio = 0; + if (entryp->aiocb.aio_reqprio > 0) { + entryp->priority -= entryp->aiocb.aio_reqprio; + if (entryp->priority < 0) + entryp->priority = 0; + } + + /* Insertion sort the entry; lowest ->priority to highest */ + TAILQ_FOREACH(my_entryp, &aio_anchor.aio_async_workq, aio_workq_link) { + if ( entryp->priority <= my_entryp->priority) { + TAILQ_INSERT_BEFORE(my_entryp, entryp, aio_workq_link); + break; + } + } + if (my_entryp == NULL) + TAILQ_INSERT_TAIL( &aio_anchor.aio_async_workq, entryp, aio_workq_link ); +#endif /* 0 */ +} + + +/* + * lio_listio - initiate a list of IO requests. We process the list of + * aiocbs either synchronously (mode == LIO_WAIT) or asynchronously + * (mode == LIO_NOWAIT). + * + * The caller gets error and return status for each aiocb in the list + * via aio_error and aio_return. We must keep completed requests until + * released by the aio_return call. + */ +int +lio_listio(proc_t p, struct lio_listio_args *uap, int *retval ) +{ + int i; + int call_result; + int result; + int old_count; + aio_workq_entry **entryp_listp; + user_addr_t *aiocbpp; + struct user_sigevent aiosigev; + aio_lio_context *lio_context; + boolean_t free_context = FALSE; + + KERNEL_DEBUG( (BSDDBG_CODE(DBG_BSD_AIO, AIO_listio)) | DBG_FUNC_START, + (int)p, uap->nent, uap->mode, 0, 0 ); entryp_listp = NULL; + lio_context = NULL; aiocbpp = NULL; call_result = -1; *retval = -1; @@ -1082,171 +1482,149 @@ lio_listio(proc_t p, struct lio_listio_args *uap, int *retval ) call_result = EINVAL; goto ExitRoutine; } - - /* - * we use group_tag to mark IO requests for delayed completion processing - * which means we wait until all IO requests in the group have completed - * before we either return to the caller when mode is LIO_WAIT or signal - * user when mode is LIO_NOWAIT. - */ - group_tag = random(); /* - * allocate a list of aio_workq_entry pointers that we will use to queue - * up all our requests at once while holding our lock. + * allocate a list of aio_workq_entry pointers that we will use + * to queue up all our requests at once while holding our lock. */ MALLOC( entryp_listp, void *, (uap->nent * sizeof(aio_workq_entry *)), M_TEMP, M_WAITOK ); if ( entryp_listp == NULL ) { call_result = EAGAIN; goto ExitRoutine; } - - /* we reserve enough space for largest possible pointer size */ - MALLOC( aiocbpp, user_addr_t *, (uap->nent * sizeof(user_addr_t)), M_TEMP, M_WAITOK ); - if ( aiocbpp == NULL ) { + + MALLOC( lio_context, aio_lio_context*, sizeof(aio_lio_context), M_TEMP, M_WAITOK ); + if ( lio_context == NULL ) { call_result = EAGAIN; goto ExitRoutine; } - /* copyin our aiocb pointers from list */ - result = copyin( uap->aiocblist, aiocbpp, - IS_64BIT_PROCESS(p) ? (uap->nent * sizeof(user_addr_t)) - : (uap->nent * sizeof(uintptr_t)) ); - if ( result != 0 ) { +#if DEBUG + OSIncrementAtomic(&lio_contexts_alloced); +#endif /* DEBUG */ + + bzero(lio_context, sizeof(aio_lio_context)); + + aiocbpp = aio_copy_in_list(p, uap->aiocblist, uap->nent); + if ( aiocbpp == NULL ) { call_result = EAGAIN; goto ExitRoutine; } - - /* we depend on a list of user_addr_t's so we need to munge and expand */ - /* when these pointers came from a 32-bit process */ - if ( !IS_64BIT_PROCESS(p) && sizeof(uintptr_t) < sizeof(user_addr_t) ) { - /* position to the last entry and work back from there */ - uintptr_t *my_ptrp = ((uintptr_t *)aiocbpp) + (uap->nent - 1); - user_addr_t *my_addrp = aiocbpp + (uap->nent - 1); - for (i = 0; i < uap->nent; i++, my_ptrp--, my_addrp--) { - *my_addrp = (user_addr_t) (*my_ptrp); - } + + /* + * Use sigevent passed in to lio_listio for each of our calls, but + * only do completion notification after the last request completes. + */ + bzero(&aiosigev, sizeof(aiosigev)); + /* Only copy in an sigev if the user supplied one */ + if (uap->sigp != USER_ADDR_NULL) { + call_result = aio_copy_in_sigev(p, uap->sigp, &aiosigev); + if ( call_result) + goto ExitRoutine; } /* process list of aio requests */ + lio_context->io_issued = uap->nent; + lio_context->io_waiter = uap->mode == LIO_WAIT ? 1 : 0; /* Should it be freed by last AIO */ for ( i = 0; i < uap->nent; i++ ) { user_addr_t my_aiocbp; + aio_workq_entry *entryp; *(entryp_listp + i) = NULL; my_aiocbp = *(aiocbpp + i); /* NULL elements are legal so check for 'em */ - if ( my_aiocbp == USER_ADDR_NULL ) + if ( my_aiocbp == USER_ADDR_NULL ) { + aio_proc_lock_spin(p); + lio_context->io_issued--; + aio_proc_unlock(p); continue; + } - if ( uap->mode == LIO_NOWAIT ) - result = lio_create_async_entry( p, my_aiocbp, uap->sigp, - group_tag, (entryp_listp + i) ); - else - result = lio_create_sync_entry( p, my_aiocbp, group_tag, - (entryp_listp + i) ); - + /* + * We use lio_context to mark IO requests for delayed completion + * processing which means we wait until all IO requests in the + * group have completed before we either return to the caller + * when mode is LIO_WAIT or signal user when mode is LIO_NOWAIT. + * + * We use the address of the lio_context for this, since it is + * unique in the address space. + */ + result = lio_create_entry( p, my_aiocbp, lio_context, (entryp_listp + i) ); if ( result != 0 && call_result == -1 ) call_result = result; - } - - /* - * we need to protect this section since we do not want any of these grouped - * IO requests to begin until we have them all on the queue. - */ - AIO_LOCK; - for ( i = 0; i < uap->nent; i++ ) { - aio_workq_entry *entryp; /* NULL elements are legal so check for 'em */ entryp = *(entryp_listp + i); - if ( entryp == NULL ) + if ( entryp == NULL ) { + aio_proc_lock_spin(p); + lio_context->io_issued--; + aio_proc_unlock(p); continue; + } + + if ( uap->mode == LIO_NOWAIT ) { + /* Set signal hander, if any */ + entryp->aiocb.aio_sigevent = aiosigev; + } else { + /* flag that this thread blocks pending completion */ + entryp->flags |= AIO_LIO_NOTIFY; + } /* check our aio limits to throttle bad or rude user land behavior */ - if ( aio_get_all_queues_count( ) >= aio_max_requests || + old_count = aio_increment_total_count(); + + aio_proc_lock_spin(p); + if ( old_count >= aio_max_requests || aio_get_process_count( entryp->procp ) >= aio_max_requests_per_process || is_already_queued( entryp->procp, entryp->uaiocbp ) == TRUE ) { - vm_map_t my_map; - my_map = entryp->aio_map; - entryp->aio_map = VM_MAP_NULL; + lio_context->io_issued--; + aio_proc_unlock(p); + + aio_decrement_total_count(); + if ( call_result == -1 ) - call_result = EAGAIN; - AIO_UNLOCK; - aio_free_request( entryp, my_map ); - AIO_LOCK; + call_result = EAGAIN; + aio_free_request(entryp); + entryp_listp[i] = NULL; continue; } - /* place the request on the appropriate queue */ - if ( uap->mode == LIO_NOWAIT ) { - TAILQ_INSERT_TAIL( &aio_anchor.aio_async_workq, entryp, aio_workq_link ); - aio_anchor.aio_async_workq_count++; - - KERNEL_DEBUG( (BSDDBG_CODE(DBG_BSD_AIO, AIO_work_queued)) | DBG_FUNC_NONE, - (int)p, (int)entryp->uaiocbp, 0, 0, 0 ); - } - else { - TAILQ_INSERT_TAIL( &aio_anchor.lio_sync_workq, entryp, aio_workq_link ); - aio_anchor.lio_sync_workq_count++; - } - } - - if ( uap->mode == LIO_NOWAIT ) { - /* caller does not want to wait so we'll fire off a worker thread and return */ - wakeup_one( (caddr_t) &aio_anchor.aio_async_workq ); + lck_mtx_convert_spin(aio_proc_mutex(p)); + aio_enqueue_work(p, entryp, 1); + aio_proc_unlock(p); + + KERNEL_DEBUG( (BSDDBG_CODE(DBG_BSD_AIO, AIO_work_queued)) | DBG_FUNC_NONE, + (int)p, (int)entryp->uaiocbp, 0, 0, 0 ); } - else { - aio_workq_entry *entryp; - int error; - /* - * mode is LIO_WAIT - handle the IO requests now. - */ - entryp = TAILQ_FIRST( &aio_anchor.lio_sync_workq ); - while ( entryp != NULL ) { - if ( p == entryp->procp && group_tag == entryp->group_tag ) { - - TAILQ_REMOVE( &aio_anchor.lio_sync_workq, entryp, aio_workq_link ); - aio_anchor.lio_sync_workq_count--; - AIO_UNLOCK; - - if ( (entryp->flags & AIO_READ) != 0 ) { - error = do_aio_read( entryp ); - } - else if ( (entryp->flags & AIO_WRITE) != 0 ) { - error = do_aio_write( entryp ); - } - else if ( (entryp->flags & AIO_FSYNC) != 0 ) { - error = do_aio_fsync( entryp ); - } - else { - printf( "%s - unknown aio request - flags 0x%02X \n", - __FUNCTION__, entryp->flags ); - error = EINVAL; - } - entryp->errorval = error; - if ( error != 0 && call_result == -1 ) - call_result = EIO; - - AIO_LOCK; - /* we're done with the IO request so move it on the done queue */ - TAILQ_INSERT_TAIL( &p->aio_doneq, entryp, aio_workq_link ); - aio_anchor.aio_done_count++; - p->aio_done_count++; - - /* need to start over since lio_sync_workq may have been changed while we */ - /* were away doing the IO. */ - entryp = TAILQ_FIRST( &aio_anchor.lio_sync_workq ); - continue; - } /* p == entryp->procp */ + switch(uap->mode) { + case LIO_WAIT: + aio_proc_lock_spin(p); + while (lio_context->io_completed < lio_context->io_issued) { + result = msleep(lio_context, aio_proc_mutex(p), PCATCH | PRIBIO | PSPIN, "lio_listio", 0); - entryp = TAILQ_NEXT( entryp, aio_workq_link ); - } /* while ( entryp != NULL ) */ - } /* uap->mode == LIO_WAIT */ - AIO_UNLOCK; + /* If we were interrupted, fail out (even if all finished) */ + if (result != 0) { + call_result = EINTR; + lio_context->io_waiter = 0; + break; + } + } + + /* If all IOs have finished must free it */ + if (lio_context->io_completed == lio_context->io_issued) { + free_context = TRUE; + } + aio_proc_unlock(p); + break; + + case LIO_NOWAIT: + break; + } + /* call_result == -1 means we had no trouble queueing up requests */ if ( call_result == -1 ) { call_result = 0; @@ -1258,7 +1636,10 @@ lio_listio(proc_t p, struct lio_listio_args *uap, int *retval ) FREE( entryp_listp, M_TEMP ); if ( aiocbpp != NULL ) FREE( aiocbpp, M_TEMP ); - + if ((lio_context != NULL) && ((lio_context->io_issued == 0) || (free_context == TRUE))) { + free_lio_context(lio_context); + } + KERNEL_DEBUG( (BSDDBG_CODE(DBG_BSD_AIO, AIO_listio)) | DBG_FUNC_END, (int)p, call_result, 0, 0, 0 ); @@ -1272,115 +1653,93 @@ lio_listio(proc_t p, struct lio_listio_args *uap, int *retval ) * we get a wake up call on sleep channel &aio_anchor.aio_async_workq * after new work is queued up. */ - static void aio_work_thread( void ) { aio_workq_entry *entryp; + int error; + vm_map_t currentmap; + vm_map_t oldmap = VM_MAP_NULL; + task_t oldaiotask = TASK_NULL; + struct uthread *uthreadp = NULL; for( ;; ) { - AIO_LOCK; - entryp = aio_get_some_work(); - if ( entryp == NULL ) { - /* - * aio worker threads wait for some work to get queued up - * by aio_queue_async_request. Once some work gets queued - * it will wake up one of these worker threads just before - * returning to our caller in user land. - */ - assert_wait( (event_t) &aio_anchor.aio_async_workq, THREAD_UNINT ); - AIO_UNLOCK; - - thread_block( (thread_continue_t)aio_work_thread ); - /* NOT REACHED */ - } + /* + * returns with the entry ref'ed. + * sleeps until work is available. + */ + entryp = aio_get_some_work(); + + KERNEL_DEBUG( (BSDDBG_CODE(DBG_BSD_AIO, AIO_worker_thread)) | DBG_FUNC_START, + (int)entryp->procp, (int)entryp->uaiocbp, entryp->flags, 0, 0 ); + + /* + * Assume the target's address space identity for the duration + * of the IO. Note: don't need to have the entryp locked, + * because the proc and map don't change until it's freed. + */ + currentmap = get_task_map( (current_proc())->task ); + if ( currentmap != entryp->aio_map ) { + uthreadp = (struct uthread *) get_bsdthread_info(current_thread()); + oldaiotask = uthreadp->uu_aio_task; + uthreadp->uu_aio_task = entryp->procp->task; + oldmap = vm_map_switch( entryp->aio_map ); + } + + if ( (entryp->flags & AIO_READ) != 0 ) { + error = do_aio_read( entryp ); + } + else if ( (entryp->flags & AIO_WRITE) != 0 ) { + error = do_aio_write( entryp ); + } + else if ( (entryp->flags & (AIO_FSYNC | AIO_DSYNC)) != 0 ) { + error = do_aio_fsync( entryp ); + } else { - int error; - vm_map_t currentmap; - vm_map_t oldmap = VM_MAP_NULL; - task_t oldaiotask = TASK_NULL; - struct uthread *uthreadp = NULL; + printf( "%s - unknown aio request - flags 0x%02X \n", + __FUNCTION__, entryp->flags ); + error = EINVAL; + } - AIO_UNLOCK; + /* Restore old map */ + if ( currentmap != entryp->aio_map ) { + (void) vm_map_switch( oldmap ); + uthreadp->uu_aio_task = oldaiotask; + } - KERNEL_DEBUG( (BSDDBG_CODE(DBG_BSD_AIO, AIO_worker_thread)) | DBG_FUNC_START, - (int)entryp->procp, (int)entryp->uaiocbp, entryp->flags, 0, 0 ); - - /* - * Assume the target's address space identity for the duration - * of the IO. - */ - currentmap = get_task_map( (current_proc())->task ); - if ( currentmap != entryp->aio_map ) { - uthreadp = (struct uthread *) get_bsdthread_info(current_thread()); - oldaiotask = uthreadp->uu_aio_task; - uthreadp->uu_aio_task = entryp->procp->task; - oldmap = vm_map_switch( entryp->aio_map ); - } - - if ( (entryp->flags & AIO_READ) != 0 ) { - error = do_aio_read( entryp ); - } - else if ( (entryp->flags & AIO_WRITE) != 0 ) { - error = do_aio_write( entryp ); - } - else if ( (entryp->flags & AIO_FSYNC) != 0 ) { - error = do_aio_fsync( entryp ); - } - else { - printf( "%s - unknown aio request - flags 0x%02X \n", - __FUNCTION__, entryp->flags ); - error = EINVAL; - } - entryp->errorval = error; - if ( currentmap != entryp->aio_map ) { - (void) vm_map_switch( oldmap ); - uthreadp->uu_aio_task = oldaiotask; - } - - /* we're done with the IO request so pop it off the active queue and */ - /* push it on the done queue */ - AIO_LOCK; - TAILQ_REMOVE( &entryp->procp->aio_activeq, entryp, aio_workq_link ); - aio_anchor.aio_active_count--; - entryp->procp->aio_active_count--; - TAILQ_INSERT_TAIL( &entryp->procp->aio_doneq, entryp, aio_workq_link ); - aio_anchor.aio_done_count++; - entryp->procp->aio_done_count++; - entryp->flags |= AIO_COMPLETION; - - /* remove our reference to the user land map. */ - if ( VM_MAP_NULL != entryp->aio_map ) { - vm_map_t my_map; - - my_map = entryp->aio_map; - entryp->aio_map = VM_MAP_NULL; - AIO_UNLOCK; /* must unlock before calling vm_map_deallocate() */ - vm_map_deallocate( my_map ); - } - else { - AIO_UNLOCK; - } - - do_aio_completion( entryp ); - - KERNEL_DEBUG( (BSDDBG_CODE(DBG_BSD_AIO, AIO_worker_thread)) | DBG_FUNC_END, - (int)entryp->procp, (int)entryp->uaiocbp, entryp->errorval, - entryp->returnval, 0 ); - - AIO_LOCK; - entryp->flags &= ~AIO_COMPLETION; - if ( (entryp->flags & AIO_DO_FREE) != 0 ) { - vm_map_t my_map; - - my_map = entryp->aio_map; - entryp->aio_map = VM_MAP_NULL; - AIO_UNLOCK; - aio_free_request( entryp, my_map ); - } - else - AIO_UNLOCK; + KERNEL_DEBUG( (BSDDBG_CODE(DBG_BSD_AIO, AIO_worker_thread)) | DBG_FUNC_END, + (int)entryp->procp, (int)entryp->uaiocbp, entryp->errorval, + entryp->returnval, 0 ); + + + /* XXX COUNTS */ + aio_entry_lock_spin(entryp); + entryp->errorval = error; + aio_entry_unlock(entryp); + + /* we're done with the IO request so pop it off the active queue and */ + /* push it on the done queue */ + aio_proc_lock(entryp->procp); + aio_proc_move_done_locked(entryp->procp, entryp); + aio_proc_unlock(entryp->procp); + + OSDecrementAtomic(&aio_anchor.aio_inflight_count); + + /* remove our reference to the user land map. */ + if ( VM_MAP_NULL != entryp->aio_map ) { + vm_map_t my_map; + + my_map = entryp->aio_map; + entryp->aio_map = VM_MAP_NULL; + vm_map_deallocate( my_map ); } + + /* Provide notifications */ + do_aio_completion( entryp ); + + /* Will free if needed */ + aio_entry_unref(entryp); + } /* for ( ;; ) */ /* NOT REACHED */ @@ -1394,387 +1753,311 @@ aio_work_thread( void ) * IO requests at the time the aio_fsync call came in have completed. * NOTE - AIO_LOCK must be held by caller */ - static aio_workq_entry * aio_get_some_work( void ) { - aio_workq_entry *entryp; - - /* pop some work off the work queue and add to our active queue */ - for ( entryp = TAILQ_FIRST( &aio_anchor.aio_async_workq ); - entryp != NULL; - entryp = TAILQ_NEXT( entryp, aio_workq_link ) ) { + aio_workq_entry *entryp = NULL; + aio_workq_t queue = NULL; + + /* Just one queue for the moment. In the future there will be many. */ + queue = &aio_anchor.aio_async_workqs[0]; + aio_workq_lock_spin(queue); + if (queue->aioq_count == 0) { + goto nowork; + } + + /* + * Hold the queue lock. + * + * pop some work off the work queue and add to our active queue + * Always start with the queue lock held. + */ + for(;;) { + /* + * Pull of of work queue. Once it's off, it can't be cancelled, + * so we can take our ref once we drop the queue lock. + */ + entryp = TAILQ_FIRST(&queue->aioq_entries); + /* + * If there's no work or only fsyncs that need delay, go to sleep + * and then start anew from aio_work_thread + */ + if (entryp == NULL) { + goto nowork; + } + + aio_workq_remove_entry_locked(queue, entryp); + + aio_workq_unlock(queue); + + /* + * Check if it's an fsync that must be delayed. No need to lock the entry; + * that flag would have been set at initialization. + */ if ( (entryp->flags & AIO_FSYNC) != 0 ) { - /* leave aio_fsync calls on the work queue if there are IO */ - /* requests on the active queue for the same file descriptor. */ + /* + * Check for unfinished operations on the same file + * in this proc's queue. + */ + aio_proc_lock_spin(entryp->procp); if ( aio_delay_fsync_request( entryp ) ) { - + /* It needs to be delayed. Put it back on the end of the work queue */ KERNEL_DEBUG( (BSDDBG_CODE(DBG_BSD_AIO, AIO_fsync_delay)) | DBG_FUNC_NONE, (int)entryp->procp, (int)entryp->uaiocbp, 0, 0, 0 ); + + aio_proc_unlock(entryp->procp); + + aio_workq_lock_spin(queue); + aio_workq_add_entry_locked(queue, entryp); continue; - } + } + aio_proc_unlock(entryp->procp); } + break; } - - if ( entryp != NULL ) { - TAILQ_REMOVE( &aio_anchor.aio_async_workq, entryp, aio_workq_link ); - aio_anchor.aio_async_workq_count--; - TAILQ_INSERT_TAIL( &entryp->procp->aio_activeq, entryp, aio_workq_link ); - aio_anchor.aio_active_count++; - entryp->procp->aio_active_count++; - } - + + aio_entry_ref(entryp); + + OSIncrementAtomic(&aio_anchor.aio_inflight_count); return( entryp ); - -} /* aio_get_some_work */ +nowork: + /* We will wake up when someone enqueues something */ + wait_queue_assert_wait(queue->aioq_waitq, queue, THREAD_UNINT, 0); + aio_workq_unlock(queue); + thread_block( (thread_continue_t)aio_work_thread ); + + // notreached + return NULL; +} /* - * aio_delay_fsync_request - look to see if this aio_fsync request should be delayed at - * this time. Delay will happen when there are any active IOs for the same file - * descriptor that were queued at time the aio_sync call was queued. - * NOTE - AIO_LOCK must be held by caller + * aio_delay_fsync_request - look to see if this aio_fsync request should be delayed. + * A big, simple hammer: only send it off if it's the most recently filed IO which has + * not been completed. */ static boolean_t aio_delay_fsync_request( aio_workq_entry *entryp ) { - aio_workq_entry *my_entryp; - - TAILQ_FOREACH( my_entryp, &entryp->procp->aio_activeq, aio_workq_link ) { - if ( my_entryp->fsyncp != USER_ADDR_NULL && - entryp->uaiocbp == my_entryp->fsyncp && - entryp->aiocb.aio_fildes == my_entryp->aiocb.aio_fildes ) { - return( TRUE ); - } + if (entryp == TAILQ_FIRST(&entryp->procp->p_aio_activeq)) { + return FALSE; } - return( FALSE ); - + return TRUE; } /* aio_delay_fsync_request */ - -/* - * aio_queue_async_request - queue up an async IO request on our work queue then - * wake up one of our worker threads to do the actual work. We get a reference - * to our caller's user land map in order to keep it around while we are - * processing the request. - */ - -static int -aio_queue_async_request(proc_t procp, user_addr_t aiocbp, int kindOfIO ) +static aio_workq_entry * +aio_create_queue_entry(proc_t procp, user_addr_t aiocbp, void *group_tag, int kindOfIO) { - aio_workq_entry *entryp; - int result; + aio_workq_entry *entryp; + int result = 0; entryp = (aio_workq_entry *) zalloc( aio_workq_zonep ); if ( entryp == NULL ) { result = EAGAIN; goto error_exit; } - bzero( entryp, sizeof(*entryp) ); - - /* fill in the rest of the aio_workq_entry */ - entryp->procp = procp; - entryp->uaiocbp = aiocbp; - entryp->flags |= kindOfIO; - entryp->aio_map = VM_MAP_NULL; - - if ( !IS_64BIT_PROCESS(procp) ) { - struct aiocb aiocb32; - result = copyin( aiocbp, &aiocb32, sizeof(aiocb32) ); - if ( result == 0 ) - do_munge_aiocb( &aiocb32, &entryp->aiocb ); - } else - result = copyin( aiocbp, &entryp->aiocb, sizeof(entryp->aiocb) ); - - if ( result != 0 ) { - result = EAGAIN; - goto error_exit; - } - - /* do some more validation on the aiocb and embedded file descriptor */ - result = aio_validate( entryp ); - if ( result != 0 ) - goto error_exit; - - /* get a reference to the user land map in order to keep it around */ - entryp->aio_map = get_task_map( procp->task ); - vm_map_reference( entryp->aio_map ); - - AIO_LOCK; - - if ( is_already_queued( entryp->procp, entryp->uaiocbp ) == TRUE ) { - AIO_UNLOCK; - result = EAGAIN; - goto error_exit; - } - - /* check our aio limits to throttle bad or rude user land behavior */ - if ( aio_get_all_queues_count( ) >= aio_max_requests || - aio_get_process_count( procp ) >= aio_max_requests_per_process ) { - AIO_UNLOCK; - result = EAGAIN; - goto error_exit; - } - - /* - * aio_fsync calls sync up all async IO requests queued at the time - * the aio_fsync call was made. So we mark each currently queued async - * IO with a matching file descriptor as must complete before we do the - * fsync. We set the fsyncp field of each matching async IO - * request with the aiocb pointer passed in on the aio_fsync call to - * know which IOs must complete before we process the aio_fsync call. - */ - if ( (kindOfIO & AIO_FSYNC) != 0 ) - aio_mark_requests( entryp ); - - /* queue up on our aio asynchronous work queue */ - TAILQ_INSERT_TAIL( &aio_anchor.aio_async_workq, entryp, aio_workq_link ); - aio_anchor.aio_async_workq_count++; - - wakeup_one( (caddr_t) &aio_anchor.aio_async_workq ); - AIO_UNLOCK; - - KERNEL_DEBUG( (BSDDBG_CODE(DBG_BSD_AIO, AIO_work_queued)) | DBG_FUNC_NONE, - (int)procp, (int)aiocbp, 0, 0, 0 ); - - return( 0 ); - -error_exit: - if ( entryp != NULL ) { - /* this entry has not been queued up so no worries about unlocked */ - /* state and aio_map */ - aio_free_request( entryp, entryp->aio_map ); - } - - return( result ); - -} /* aio_queue_async_request */ - - -/* - * lio_create_async_entry - allocate an aio_workq_entry and fill it in. - * If all goes well return 0 and pass the aio_workq_entry pointer back to - * our caller. We get a reference to our caller's user land map in order to keep - * it around while we are processing the request. - * lio_listio calls behave differently at completion they do completion notification - * when all async IO requests have completed. We use group_tag to tag IO requests - * that behave in the delay notification manner. - */ - -static int -lio_create_async_entry(proc_t procp, user_addr_t aiocbp, - user_addr_t sigp, long group_tag, - aio_workq_entry **entrypp ) -{ - aio_workq_entry *entryp; - int result; - - entryp = (aio_workq_entry *) zalloc( aio_workq_zonep ); - if ( entryp == NULL ) { - result = EAGAIN; - goto error_exit; - } bzero( entryp, sizeof(*entryp) ); /* fill in the rest of the aio_workq_entry */ entryp->procp = procp; entryp->uaiocbp = aiocbp; - entryp->flags |= AIO_LIO; + entryp->flags |= kindOfIO; entryp->group_tag = group_tag; entryp->aio_map = VM_MAP_NULL; + entryp->aio_refcount = 0; - if ( !IS_64BIT_PROCESS(procp) ) { - struct aiocb aiocb32; - + if ( proc_is64bit(procp) ) { + struct user64_aiocb aiocb64; + + result = copyin( aiocbp, &aiocb64, sizeof(aiocb64) ); + if (result == 0 ) + do_munge_aiocb_user64_to_user(&aiocb64, &entryp->aiocb); + + } else { + struct user32_aiocb aiocb32; + result = copyin( aiocbp, &aiocb32, sizeof(aiocb32) ); if ( result == 0 ) - do_munge_aiocb( &aiocb32, &entryp->aiocb ); - } else - result = copyin( aiocbp, &entryp->aiocb, sizeof(entryp->aiocb) ); - - if ( result != 0 ) { - result = EAGAIN; - goto error_exit; - } - - /* look for lio_listio LIO_NOP requests and ignore them. */ - /* Not really an error, but we need to free our aio_workq_entry. */ - if ( entryp->aiocb.aio_lio_opcode == LIO_NOP ) { - result = 0; - goto error_exit; - } - - /* use sigevent passed in to lio_listio for each of our calls, but only */ - /* do completion notification after the last request completes. */ - if ( sigp != USER_ADDR_NULL ) { - if ( !IS_64BIT_PROCESS(procp) ) { - struct sigevent sigevent32; - - result = copyin( sigp, &sigevent32, sizeof(sigevent32) ); - if ( result == 0 ) { - /* also need to munge aio_sigevent since it contains pointers */ - /* special case here. since we do not know if sigev_value is an */ - /* int or a ptr we do NOT cast the ptr to a user_addr_t. This */ - /* means if we send this info back to user space we need to remember */ - /* sigev_value was not expanded for the 32-bit case. */ - /* NOTE - this does NOT affect us since we don't support sigev_value */ - /* yet in the aio context. */ - //LP64 - entryp->aiocb.aio_sigevent.sigev_notify = sigevent32.sigev_notify; - entryp->aiocb.aio_sigevent.sigev_signo = sigevent32.sigev_signo; - entryp->aiocb.aio_sigevent.sigev_value.size_equivalent.sival_int = - sigevent32.sigev_value.sival_int; - entryp->aiocb.aio_sigevent.sigev_notify_function = - CAST_USER_ADDR_T(sigevent32.sigev_notify_function); - entryp->aiocb.aio_sigevent.sigev_notify_attributes = - CAST_USER_ADDR_T(sigevent32.sigev_notify_attributes); - } - } else - result = copyin( sigp, &entryp->aiocb.aio_sigevent, sizeof(entryp->aiocb.aio_sigevent) ); - - if ( result != 0 ) { - result = EAGAIN; - goto error_exit; - } + do_munge_aiocb_user32_to_user( &aiocb32, &entryp->aiocb ); } - /* do some more validation on the aiocb and embedded file descriptor */ - result = aio_validate( entryp ); - if ( result != 0 ) + if ( result != 0 ) { + result = EAGAIN; goto error_exit; + } /* get a reference to the user land map in order to keep it around */ entryp->aio_map = get_task_map( procp->task ); vm_map_reference( entryp->aio_map ); - - *entrypp = entryp; - return( 0 ); - + + /* do some more validation on the aiocb and embedded file descriptor */ + result = aio_validate( entryp ); + error_exit: - if ( entryp != NULL ) + if ( result && entryp != NULL ) { zfree( aio_workq_zonep, entryp ); - - return( result ); - -} /* lio_create_async_entry */ + entryp = NULL; + } + + return ( entryp ); +} /* - * aio_mark_requests - aio_fsync calls synchronize file data for all queued async IO - * requests at the moment the aio_fsync call is queued. We use aio_workq_entry.fsyncp - * to mark each async IO that must complete before the fsync is done. We use the uaiocbp - * field from the aio_fsync call as the aio_workq_entry.fsyncp in marked requests. - * NOTE - AIO_LOCK must be held by caller + * aio_queue_async_request - queue up an async IO request on our work queue then + * wake up one of our worker threads to do the actual work. We get a reference + * to our caller's user land map in order to keep it around while we are + * processing the request. */ - -static void -aio_mark_requests( aio_workq_entry *entryp ) +static int +aio_queue_async_request(proc_t procp, user_addr_t aiocbp, int kindOfIO ) { - aio_workq_entry *my_entryp; + aio_workq_entry *entryp; + int result; + int old_count; - TAILQ_FOREACH( my_entryp, &entryp->procp->aio_activeq, aio_workq_link ) { - if ( entryp->aiocb.aio_fildes == my_entryp->aiocb.aio_fildes ) { - my_entryp->fsyncp = entryp->uaiocbp; - } + old_count = aio_increment_total_count(); + if (old_count >= aio_max_requests) { + result = EAGAIN; + goto error_noalloc; } - - TAILQ_FOREACH( my_entryp, &aio_anchor.aio_async_workq, aio_workq_link ) { - if ( entryp->procp == my_entryp->procp && - entryp->aiocb.aio_fildes == my_entryp->aiocb.aio_fildes ) { - my_entryp->fsyncp = entryp->uaiocbp; - } + + entryp = aio_create_queue_entry( procp, aiocbp, 0, kindOfIO); + if ( entryp == NULL ) { + result = EAGAIN; + goto error_noalloc; } - -} /* aio_mark_requests */ -/* - * lio_create_sync_entry - allocate an aio_workq_entry and fill it in. - * If all goes well return 0 and pass the aio_workq_entry pointer back to - * our caller. - * lio_listio calls behave differently at completion they do completion notification - * when all async IO requests have completed. We use group_tag to tag IO requests - * that behave in the delay notification manner. - */ + aio_proc_lock_spin(procp); -static int -lio_create_sync_entry(proc_t procp, user_addr_t aiocbp, - long group_tag, aio_workq_entry **entrypp ) -{ - aio_workq_entry *entryp; - int result; + if ( is_already_queued( entryp->procp, entryp->uaiocbp ) == TRUE ) { + result = EAGAIN; + goto error_exit; + } - entryp = (aio_workq_entry *) zalloc( aio_workq_zonep ); - if ( entryp == NULL ) { + /* check our aio limits to throttle bad or rude user land behavior */ + if (aio_get_process_count( procp ) >= aio_max_requests_per_process) { + printf("aio_queue_async_request(): too many in flight for proc: %d.\n", procp->p_aio_total_count); result = EAGAIN; goto error_exit; } - bzero( entryp, sizeof(*entryp) ); + + /* Add the IO to proc and work queues, wake up threads as appropriate */ + lck_mtx_convert_spin(aio_proc_mutex(procp)); + aio_enqueue_work(procp, entryp, 1); + + aio_proc_unlock(procp); + + KERNEL_DEBUG( (BSDDBG_CODE(DBG_BSD_AIO, AIO_work_queued)) | DBG_FUNC_NONE, + (int)procp, (int)aiocbp, 0, 0, 0 ); - /* fill in the rest of the aio_workq_entry */ - entryp->procp = procp; - entryp->uaiocbp = aiocbp; - entryp->flags |= AIO_LIO; - entryp->group_tag = group_tag; - entryp->aio_map = VM_MAP_NULL; + return( 0 ); + +error_exit: + /* + * This entry has not been queued up so no worries about + * unlocked state and aio_map + */ + aio_proc_unlock(procp); + aio_free_request(entryp); - if ( !IS_64BIT_PROCESS(procp) ) { - struct aiocb aiocb32; +error_noalloc: + aio_decrement_total_count(); - result = copyin( aiocbp, &aiocb32, sizeof(aiocb32) ); - if ( result == 0 ) - do_munge_aiocb( &aiocb32, &entryp->aiocb ); - } else - result = copyin( aiocbp, &entryp->aiocb, sizeof(entryp->aiocb) ); + return( result ); + +} /* aio_queue_async_request */ - if ( result != 0 ) { - result = EAGAIN; + +/* + * lio_create_entry + * + * Allocate an aio_workq_entry and fill it in. If all goes well return 0 + * and pass the aio_workq_entry pointer back to our caller. + * + * Parameters: procp The process makign the request + * aiocbp The aio context buffer pointer + * group_tag The group tag used to indicate a + * group of operations has completed + * entrypp Pointer to the pointer to receive the + * address of the created aio_workq_entry + * + * Returns: 0 Successfully created + * EAGAIN Try again (usually resource shortage) + * + * + * Notes: We get a reference to our caller's user land map in order + * to keep it around while we are processing the request. + * + * lio_listio calls behave differently at completion they do + * completion notification when all async IO requests have + * completed. We use group_tag to tag IO requests that behave + * in the delay notification manner. + * + * All synchronous operations are considered to not have a + * signal routine associated with them (sigp == USER_ADDR_NULL). + */ +static int +lio_create_entry(proc_t procp, user_addr_t aiocbp, void *group_tag, + aio_workq_entry **entrypp ) +{ + aio_workq_entry *entryp; + int result; + + entryp = aio_create_queue_entry( procp, aiocbp, group_tag, AIO_LIO); + if ( entryp == NULL ) { + result = EAGAIN; goto error_exit; } - /* look for lio_listio LIO_NOP requests and ignore them. */ - /* Not really an error, but we need to free our aio_workq_entry. */ + /* + * Look for lio_listio LIO_NOP requests and ignore them; this is + * not really an error, but we need to free our aio_workq_entry. + */ if ( entryp->aiocb.aio_lio_opcode == LIO_NOP ) { result = 0; goto error_exit; } - result = aio_validate( entryp ); - if ( result != 0 ) { - goto error_exit; - } - *entrypp = entryp; return( 0 ); error_exit: - if ( entryp != NULL ) - zfree( aio_workq_zonep, entryp ); + + if ( entryp != NULL ) { + /* + * This entry has not been queued up so no worries about + * unlocked state and aio_map + */ + aio_free_request(entryp); + } return( result ); -} /* lio_create_sync_entry */ +} /* lio_create_entry */ /* * aio_free_request - remove our reference on the user land map and - * free the work queue entry resources. - * We are not holding the lock here thus aio_map is passed in and - * zeroed while we did have the lock. + * free the work queue entry resources. The entry is off all lists + * and has zero refcount, so no one can have a pointer to it. */ static int -aio_free_request( aio_workq_entry *entryp, vm_map_t the_map ) +aio_free_request(aio_workq_entry *entryp) { /* remove our reference to the user land map. */ - if ( VM_MAP_NULL != the_map ) { - vm_map_deallocate( the_map ); + if ( VM_MAP_NULL != entryp->aio_map) { + vm_map_deallocate(entryp->aio_map); } - + + entryp->aio_refcount = -1; /* A bit of poisoning in case of bad refcounting. */ + zfree( aio_workq_zonep, entryp ); return( 0 ); @@ -1782,9 +2065,11 @@ aio_free_request( aio_workq_entry *entryp, vm_map_t the_map ) } /* aio_free_request */ -/* aio_validate - validate the aiocb passed in by one of the aio syscalls. +/* + * aio_validate + * + * validate the aiocb passed in by one of the aio syscalls. */ - static int aio_validate( aio_workq_entry *entryp ) { @@ -1806,32 +2091,46 @@ aio_validate( aio_workq_entry *entryp ) } flag = FREAD; - if ( (entryp->flags & (AIO_WRITE | AIO_FSYNC)) != 0 ) { + if ( (entryp->flags & (AIO_WRITE | AIO_FSYNC | AIO_DSYNC)) != 0 ) { flag = FWRITE; } if ( (entryp->flags & (AIO_READ | AIO_WRITE)) != 0 ) { - // LP64todo - does max value for aio_nbytes need to grow? if ( entryp->aiocb.aio_nbytes > INT_MAX || entryp->aiocb.aio_buf == USER_ADDR_NULL || entryp->aiocb.aio_offset < 0 ) return( EINVAL ); } - /* validate aiocb.aio_sigevent. at this point we only support sigev_notify - * equal to SIGEV_SIGNAL or SIGEV_NONE. this means sigev_value, - * sigev_notify_function, and sigev_notify_attributes are ignored. + /* + * validate aiocb.aio_sigevent. at this point we only support + * sigev_notify equal to SIGEV_SIGNAL or SIGEV_NONE. this means + * sigev_value, sigev_notify_function, and sigev_notify_attributes + * are ignored, since SIGEV_THREAD is unsupported. This is consistent + * with no [RTS] (RalTime Signal) option group support. */ - if ( entryp->aiocb.aio_sigevent.sigev_notify == SIGEV_SIGNAL ) { + switch ( entryp->aiocb.aio_sigevent.sigev_notify ) { + case SIGEV_SIGNAL: + { int signum; + /* make sure we have a valid signal number */ signum = entryp->aiocb.aio_sigevent.sigev_signo; if ( signum <= 0 || signum >= NSIG || signum == SIGKILL || signum == SIGSTOP ) return (EINVAL); - } - else if ( entryp->aiocb.aio_sigevent.sigev_notify != SIGEV_NONE ) + } + break; + + case SIGEV_NONE: + break; + + case SIGEV_THREAD: + /* Unsupported [RTS] */ + + default: return (EINVAL); + } /* validate the file descriptor and that the file was opened * for the appropriate read / write access. @@ -1862,61 +2161,34 @@ aio_validate( aio_workq_entry *entryp ) } /* aio_validate */ +static int +aio_increment_total_count() +{ + return OSIncrementAtomic(&aio_anchor.aio_total_count); +} + +static int +aio_decrement_total_count() +{ + int old = OSDecrementAtomic(&aio_anchor.aio_total_count); + if (old <= 0) { + panic("Negative total AIO count!\n"); + } -/* - * aio_get_process_count - runs through our queues that hold outstanding - * async IO reqests and totals up number of requests for the given - * process. - * NOTE - caller must hold aio lock! - */ + return old; +} static int aio_get_process_count(proc_t procp ) { - aio_workq_entry *entryp; - int count; - - /* begin with count of completed async IO requests for this process */ - count = procp->aio_done_count; - - /* add in count of active async IO requests for this process */ - count += procp->aio_active_count; - - /* look for matches on our queue of asynchronous todo work */ - TAILQ_FOREACH( entryp, &aio_anchor.aio_async_workq, aio_workq_link ) { - if ( procp == entryp->procp ) { - count++; - } - } - - /* look for matches on our queue of synchronous todo work */ - TAILQ_FOREACH( entryp, &aio_anchor.lio_sync_workq, aio_workq_link ) { - if ( procp == entryp->procp ) { - count++; - } - } - - return( count ); + return procp->p_aio_total_count; } /* aio_get_process_count */ - -/* - * aio_get_all_queues_count - get total number of entries on all aio work queues. - * NOTE - caller must hold aio lock! - */ - static int aio_get_all_queues_count( void ) { - int count; - - count = aio_anchor.aio_async_workq_count; - count += aio_anchor.lio_sync_workq_count; - count += aio_anchor.aio_active_count; - count += aio_anchor.aio_done_count; - - return( count ); + return aio_anchor.aio_total_count; } /* aio_get_all_queues_count */ @@ -1924,113 +2196,143 @@ aio_get_all_queues_count( void ) /* * do_aio_completion. Handle async IO completion. */ - static void do_aio_completion( aio_workq_entry *entryp ) { - /* signal user land process if appropriate */ + + boolean_t lastLioCompleted = FALSE; + aio_lio_context *lio_context = NULL; + int waiter = 0; + + lio_context = (aio_lio_context *)entryp->group_tag; + + if (lio_context != NULL) { + + aio_proc_lock_spin(entryp->procp); + + /* Account for this I/O completing. */ + lio_context->io_completed++; + + /* Are we done with this lio context? */ + if (lio_context->io_issued == lio_context->io_completed) { + lastLioCompleted = TRUE; + } + + waiter = lio_context->io_waiter; + + /* explicit wakeup of lio_listio() waiting in LIO_WAIT */ + if ((entryp->flags & AIO_LIO_NOTIFY) && (lastLioCompleted) && (waiter != 0)) { + /* wake up the waiter */ + wakeup(lio_context); + } + + aio_proc_unlock(entryp->procp); + } + if ( entryp->aiocb.aio_sigevent.sigev_notify == SIGEV_SIGNAL && (entryp->flags & AIO_DISABLE) == 0 ) { - - /* - * if group_tag is non zero then make sure this is the last IO request - * in the group before we signal. - */ - if ( entryp->group_tag == 0 || - (entryp->group_tag != 0 && aio_last_group_io( entryp )) ) { + + boolean_t performSignal = FALSE; + if (lio_context == NULL) { + performSignal = TRUE; + } + else { + /* + * If this was the last request in the group and a signal + * is desired, send one. + */ + performSignal = lastLioCompleted; + } + + if (performSignal) { + KERNEL_DEBUG( (BSDDBG_CODE(DBG_BSD_AIO, AIO_completion_sig)) | DBG_FUNC_NONE, - (int)entryp->procp, (int)entryp->uaiocbp, - entryp->aiocb.aio_sigevent.sigev_signo, 0, 0 ); + (int)entryp->procp, (int)entryp->uaiocbp, + entryp->aiocb.aio_sigevent.sigev_signo, 0, 0 ); psignal( entryp->procp, entryp->aiocb.aio_sigevent.sigev_signo ); - return; } } + if ((entryp->flags & AIO_EXIT_WAIT) && (entryp->flags & AIO_CLOSE_WAIT)) { + panic("Close and exit flags set at the same time\n"); + } + /* - * need to handle case where a process is trying to exit, exec, or close - * and is currently waiting for active aio requests to complete. If - * AIO_WAITING is set then we need to look to see if there are any + * need to handle case where a process is trying to exit, exec, or + * close and is currently waiting for active aio requests to complete. + * If AIO_CLEANUP_WAIT is set then we need to look to see if there are any * other requests in the active queue for this process. If there are - * none then wakeup using the AIO_CLEANUP_SLEEP_CHAN tsleep channel. If - * there are some still active then do nothing - we only want to wakeup - * when all active aio requests for the process are complete. + * none then wakeup using the AIO_CLEANUP_SLEEP_CHAN tsleep channel. + * If there are some still active then do nothing - we only want to + * wakeup when all active aio requests for the process are complete. + * + * Don't need to lock the entry or proc to check the cleanup flag. It can only be + * set for cancellation, while the entryp is still on a proc list; now it's + * off, so that flag is already set if it's going to be. */ - if ( (entryp->flags & AIO_WAITING) != 0 ) { + if ( (entryp->flags & AIO_EXIT_WAIT) != 0 ) { int active_requests; KERNEL_DEBUG( (BSDDBG_CODE(DBG_BSD_AIO, AIO_completion_cleanup_wait)) | DBG_FUNC_NONE, (int)entryp->procp, (int)entryp->uaiocbp, 0, 0, 0 ); - AIO_LOCK; + aio_proc_lock_spin(entryp->procp); active_requests = aio_active_requests_for_process( entryp->procp ); - //AIO_UNLOCK; if ( active_requests < 1 ) { - /* no active aio requests for this process, continue exiting */ - wakeup_one( (caddr_t) &entryp->procp->AIO_CLEANUP_SLEEP_CHAN ); + /* + * no active aio requests for this process, continue exiting. In this + * case, there should be no one else waiting ont he proc in AIO... + */ + wakeup_one((caddr_t)&entryp->procp->AIO_CLEANUP_SLEEP_CHAN); + aio_proc_unlock(entryp->procp); KERNEL_DEBUG( (BSDDBG_CODE(DBG_BSD_AIO, AIO_completion_cleanup_wake)) | DBG_FUNC_NONE, (int)entryp->procp, (int)entryp->uaiocbp, 0, 0, 0 ); + } else { + aio_proc_unlock(entryp->procp); } - AIO_UNLOCK; - return; } + + if ( (entryp->flags & AIO_CLOSE_WAIT) != 0 ) { + int active_requests; + KERNEL_DEBUG( (BSDDBG_CODE(DBG_BSD_AIO, AIO_completion_cleanup_wait)) | DBG_FUNC_NONE, + (int)entryp->procp, (int)entryp->uaiocbp, 0, 0, 0 ); + + aio_proc_lock_spin(entryp->procp); + active_requests = aio_proc_active_requests_for_file( entryp->procp, entryp->aiocb.aio_fildes); + if ( active_requests < 1 ) { + /* Can't wakeup_one(); multiple closes might be in progress. */ + wakeup(&entryp->procp->AIO_CLEANUP_SLEEP_CHAN); + aio_proc_unlock(entryp->procp); + + KERNEL_DEBUG( (BSDDBG_CODE(DBG_BSD_AIO, AIO_completion_cleanup_wake)) | DBG_FUNC_NONE, + (int)entryp->procp, (int)entryp->uaiocbp, 0, 0, 0 ); + } else { + aio_proc_unlock(entryp->procp); + } + } /* - * aio_suspend case when a signal was not requested. In that scenario we - * are sleeping on the AIO_SUSPEND_SLEEP_CHAN channel. - * NOTE - the assumption here is that this wakeup call is inexpensive. - * we really only need to do this when an aio_suspend call is pending. - * If we find the wakeup call should be avoided we could mark the - * async IO requests given in the list provided by aio_suspend and only - * call wakeup for them. If we do mark them we should unmark them after - * the aio_suspend wakes up. + * A thread in aio_suspend() wants to known about completed IOs. If it checked + * the done list before we moved our AIO there, then it already asserted its wait, + * and we can wake it up without holding the lock. If it checked the list after + * we did our move, then it already has seen the AIO that we moved. Herego, we + * can do our wakeup without holding the lock. */ - AIO_LOCK; - wakeup_one( (caddr_t) &entryp->procp->AIO_SUSPEND_SLEEP_CHAN ); - AIO_UNLOCK; - + wakeup( (caddr_t) &entryp->procp->AIO_SUSPEND_SLEEP_CHAN ); KERNEL_DEBUG( (BSDDBG_CODE(DBG_BSD_AIO, AIO_completion_suspend_wake)) | DBG_FUNC_NONE, (int)entryp->procp, (int)entryp->uaiocbp, 0, 0, 0 ); - - return; - -} /* do_aio_completion */ - - -/* - * aio_last_group_io - checks to see if this is the last unfinished IO request - * for the given group_tag. Returns TRUE if there are no other active IO - * requests for this group or FALSE if the are active IO requests - * NOTE - AIO_LOCK must be held by caller - */ -static boolean_t -aio_last_group_io( aio_workq_entry *entryp ) -{ - aio_workq_entry *my_entryp; - - /* look for matches on our queue of active async IO requests */ - TAILQ_FOREACH( my_entryp, &entryp->procp->aio_activeq, aio_workq_link ) { - if ( my_entryp->group_tag == entryp->group_tag ) - return( FALSE ); - } - - /* look for matches on our queue of asynchronous todo work */ - TAILQ_FOREACH( my_entryp, &aio_anchor.aio_async_workq, aio_workq_link ) { - if ( my_entryp->group_tag == entryp->group_tag ) - return( FALSE ); - } - - /* look for matches on our queue of synchronous todo work */ - TAILQ_FOREACH( my_entryp, &aio_anchor.lio_sync_workq, aio_workq_link ) { - if ( my_entryp->group_tag == entryp->group_tag ) - return( FALSE ); - } + /* + * free the LIO context if the last lio completed and no thread is + * waiting + */ + if (lastLioCompleted && (waiter == 0)) + free_lio_context (lio_context); - return( TRUE ); -} /* aio_last_group_io */ +} /* do_aio_completion */ /* @@ -2076,7 +2378,7 @@ static int do_aio_write( aio_workq_entry *entryp ) { struct fileproc *fp; - int error; + int error, flags; struct vfs_context context; if ( (error = fp_lookup(entryp->procp, entryp->aiocb.aio_fildes, &fp , 0)) ) @@ -2086,6 +2388,11 @@ do_aio_write( aio_workq_entry *entryp ) return(EBADF); } + flags = FOF_PCRED; + if ( (fp->f_fglob->fg_flag & O_APPEND) == 0 ) { + flags |= FOF_OFFSET; + } + /* * * Needs vfs_context_t from vfs_context_create() in entryp! @@ -2099,7 +2406,7 @@ do_aio_write( aio_workq_entry *entryp ) entryp->aiocb.aio_buf, entryp->aiocb.aio_nbytes, entryp->aiocb.aio_offset, - FOF_OFFSET | FOF_PCRED, + flags, &entryp->returnval); fp_drop(entryp->procp, entryp->aiocb.aio_fildes, fp, 0); @@ -2112,18 +2419,33 @@ do_aio_write( aio_workq_entry *entryp ) /* * aio_active_requests_for_process - return number of active async IO * requests for the given process. - * NOTE - caller must hold aio lock! */ - static int aio_active_requests_for_process(proc_t procp ) { - - return( procp->aio_active_count ); + return( procp->p_aio_active_count ); + +} /* aio_active_requests_for_process */ + +/* + * Called with the proc locked. + */ +static int +aio_proc_active_requests_for_file(proc_t procp, int fd) +{ + int count = 0; + aio_workq_entry *entryp; + TAILQ_FOREACH(entryp, &procp->p_aio_activeq, aio_proc_link) { + if (entryp->aiocb.aio_fildes == fd) { + count++; + } + } + return count; } /* aio_active_requests_for_process */ + /* * do_aio_fsync */ @@ -2133,14 +2455,28 @@ do_aio_fsync( aio_workq_entry *entryp ) struct vfs_context context; struct vnode *vp; struct fileproc *fp; - int error; - - /* - * NOTE - we will not support AIO_DSYNC until fdatasync() is supported. - * AIO_DSYNC is caught before we queue up a request and flagged as an error. - * The following was shamelessly extracted from fsync() implementation. - */ + int sync_flag; + int error; + /* + * We are never called unless either AIO_FSYNC or AIO_DSYNC are set. + * + * If AIO_DSYNC is set, we can tell the lower layers that it is OK + * to mark for update the metadata not strictly necessary for data + * retrieval, rather than forcing it to disk. + * + * If AIO_FSYNC is set, we have to also wait for metadata not really + * necessary to data retrival are committed to stable storage (e.g. + * atime, mtime, ctime, etc.). + * + * Metadata necessary for data retrieval ust be committed to stable + * storage in either case (file length, etc.). + */ + if (entryp->flags & AIO_FSYNC) + sync_flag = MNT_WAIT; + else + sync_flag = MNT_DWAIT; + error = fp_getfvp( entryp->procp, entryp->aiocb.aio_fildes, &fp, &vp); if ( error == 0 ) { if ( (error = vnode_getwithref(vp)) ) { @@ -2151,7 +2487,7 @@ do_aio_fsync( aio_workq_entry *entryp ) context.vc_thread = current_thread(); context.vc_ucred = fp->f_fglob->fg_cred; - error = VNOP_FSYNC( vp, MNT_WAIT, &context); + error = VNOP_FSYNC( vp, sync_flag, &context); (void)vnode_put(vp); @@ -2169,20 +2505,20 @@ do_aio_fsync( aio_workq_entry *entryp ) * is_already_queued - runs through our queues to see if the given * aiocbp / process is there. Returns TRUE if there is a match * on any of our aio queues. - * NOTE - callers must hold aio lock! + * + * Called with proc aio lock held (can be held spin) */ - static boolean_t is_already_queued(proc_t procp, user_addr_t aiocbp ) { aio_workq_entry *entryp; boolean_t result; - + result = FALSE; /* look for matches on our queue of async IO requests that have completed */ - TAILQ_FOREACH( entryp, &procp->aio_doneq, aio_workq_link ) { + TAILQ_FOREACH( entryp, &procp->p_aio_doneq, aio_proc_link ) { if ( aiocbp == entryp->uaiocbp ) { result = TRUE; goto ExitThisRoutine; @@ -2190,35 +2526,32 @@ is_already_queued(proc_t procp, } /* look for matches on our queue of active async IO requests */ - TAILQ_FOREACH( entryp, &procp->aio_activeq, aio_workq_link ) { + TAILQ_FOREACH( entryp, &procp->p_aio_activeq, aio_proc_link ) { if ( aiocbp == entryp->uaiocbp ) { result = TRUE; goto ExitThisRoutine; } } - /* look for matches on our queue of asynchronous todo work */ - TAILQ_FOREACH( entryp, &aio_anchor.aio_async_workq, aio_workq_link ) { - if ( procp == entryp->procp && aiocbp == entryp->uaiocbp ) { - result = TRUE; - goto ExitThisRoutine; - } - } - - /* look for matches on our queue of synchronous todo work */ - TAILQ_FOREACH( entryp, &aio_anchor.lio_sync_workq, aio_workq_link ) { - if ( procp == entryp->procp && aiocbp == entryp->uaiocbp ) { - result = TRUE; - goto ExitThisRoutine; - } - } - ExitThisRoutine: return( result ); } /* is_already_queued */ +static void +free_lio_context(aio_lio_context* context) +{ + +#if DEBUG + OSDecrementAtomic(&lio_contexts_alloced); +#endif /* DEBUG */ + + FREE( context, M_TEMP ); + +} /* free_lio_context */ + + /* * aio initialization */ @@ -2228,26 +2561,28 @@ aio_init( void ) int i; aio_lock_grp_attr = lck_grp_attr_alloc_init(); - aio_lock_grp = lck_grp_alloc_init("aio", aio_lock_grp_attr); + aio_proc_lock_grp = lck_grp_alloc_init("aio_proc", aio_lock_grp_attr);; + aio_entry_lock_grp = lck_grp_alloc_init("aio_entry", aio_lock_grp_attr);; + aio_queue_lock_grp = lck_grp_alloc_init("aio_queue", aio_lock_grp_attr);; aio_lock_attr = lck_attr_alloc_init(); - aio_lock = lck_mtx_alloc_init(aio_lock_grp, aio_lock_attr); + lck_mtx_init(&aio_entry_mtx, aio_entry_lock_grp, aio_lock_attr); + lck_mtx_init(&aio_proc_mtx, aio_proc_lock_grp, aio_lock_attr); - AIO_LOCK; - TAILQ_INIT( &aio_anchor.aio_async_workq ); - TAILQ_INIT( &aio_anchor.lio_sync_workq ); - aio_anchor.aio_async_workq_count = 0; - aio_anchor.lio_sync_workq_count = 0; - aio_anchor.aio_active_count = 0; + aio_anchor.aio_inflight_count = 0; aio_anchor.aio_done_count = 0; - AIO_UNLOCK; + aio_anchor.aio_total_count = 0; + aio_anchor.aio_num_workqs = AIO_NUM_WORK_QUEUES; + + for (i = 0; i < AIO_NUM_WORK_QUEUES; i++) { + aio_workq_init(&aio_anchor.aio_async_workqs[i]); + } + i = sizeof( aio_workq_entry ); aio_workq_zonep = zinit( i, i * aio_max_requests, i * aio_max_requests, "aiowq" ); _aio_create_worker_threads( aio_worker_threads ); - - return; } /* aio_init */ @@ -2264,10 +2599,11 @@ _aio_create_worker_threads( int num ) for ( i = 0; i < num; i++ ) { thread_t myThread; - myThread = kernel_thread( kernel_task, aio_work_thread ); - if ( THREAD_NULL == myThread ) { + if ( KERN_SUCCESS != kernel_thread_start((thread_continue_t)aio_work_thread, NULL, &myThread) ) { printf( "%s - failed to create a work thread \n", __FUNCTION__ ); } + else + thread_deallocate(myThread); } return; @@ -2291,7 +2627,7 @@ get_aiotask(void) * aiocb (in our case that is a user_aiocb) */ static void -do_munge_aiocb( struct aiocb *my_aiocbp, struct user_aiocb *the_user_aiocbp ) +do_munge_aiocb_user32_to_user( struct user32_aiocb *my_aiocbp, struct user_aiocb *the_user_aiocbp ) { the_user_aiocbp->aio_fildes = my_aiocbp->aio_fildes; the_user_aiocbp->aio_offset = my_aiocbp->aio_offset; @@ -2316,3 +2652,26 @@ do_munge_aiocb( struct aiocb *my_aiocbp, struct user_aiocb *the_user_aiocbp ) the_user_aiocbp->aio_sigevent.sigev_notify_attributes = CAST_USER_ADDR_T(my_aiocbp->aio_sigevent.sigev_notify_attributes); } + +/* Similar for 64-bit user process, so that we don't need to satisfy + * the alignment constraints of the original user64_aiocb + */ +static void +do_munge_aiocb_user64_to_user( struct user64_aiocb *my_aiocbp, struct user_aiocb *the_user_aiocbp ) +{ + the_user_aiocbp->aio_fildes = my_aiocbp->aio_fildes; + the_user_aiocbp->aio_offset = my_aiocbp->aio_offset; + the_user_aiocbp->aio_buf = my_aiocbp->aio_buf; + the_user_aiocbp->aio_nbytes = my_aiocbp->aio_nbytes; + the_user_aiocbp->aio_reqprio = my_aiocbp->aio_reqprio; + the_user_aiocbp->aio_lio_opcode = my_aiocbp->aio_lio_opcode; + + the_user_aiocbp->aio_sigevent.sigev_notify = my_aiocbp->aio_sigevent.sigev_notify; + the_user_aiocbp->aio_sigevent.sigev_signo = my_aiocbp->aio_sigevent.sigev_signo; + the_user_aiocbp->aio_sigevent.sigev_value.size_equivalent.sival_int = + my_aiocbp->aio_sigevent.sigev_value.size_equivalent.sival_int; + the_user_aiocbp->aio_sigevent.sigev_notify_function = + my_aiocbp->aio_sigevent.sigev_notify_function; + the_user_aiocbp->aio_sigevent.sigev_notify_attributes = + my_aiocbp->aio_sigevent.sigev_notify_attributes; +} diff --git a/bsd/kern/kern_audit.c b/bsd/kern/kern_audit.c deleted file mode 100644 index df9f61d2d..000000000 --- a/bsd/kern/kern_audit.c +++ /dev/null @@ -1,2798 +0,0 @@ -/* - * Copyright (c) 2003-2007 Apple Inc. All rights reserved. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ - * - * This file contains Original Code and/or Modifications of Original Code - * as defined in and that are subject to the Apple Public Source License - * Version 2.0 (the 'License'). You may not use this file except in - * compliance with the License. The rights granted to you under the License - * may not be used to create, or enable the creation or redistribution of, - * unlawful or unlicensed copies of an Apple operating system, or to - * circumvent, violate, or enable the circumvention or violation of, any - * terms of an Apple operating system software license agreement. - * - * Please obtain a copy of the License at - * http://www.opensource.apple.com/apsl/ and read it before using this file. - * - * The Original Code and all software distributed under the License are - * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER - * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, - * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. - * Please see the License for the specific language governing rights and - * limitations under the License. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ - */ -/* - * NOTICE: This file was modified by McAfee Research in 2004 to introduce - * support for mandatory and extensible security protections. This notice - * is included in support of clause 2.2 (b) of the Apple Public License, - * Version 2.0. - */ -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include - -#include -#include -#include - -#include -#include -#include -#include -#include -#include - -#if CONFIG_MACF -#include -#include -#include -#include -#define MAC_ARG_PREFIX "arg: " -#define MAC_ARG_PREFIX_LEN 5 -#endif - -#include - -#include -#include - -#if AUDIT - -/* - * The AUDIT_EXCESSIVELY_VERBOSE define enables a number of - * gratuitously noisy printf's to the console. Due to the - * volume, it should be left off unless you want your system - * to churn a lot whenever the audit record flow gets high. - */ -/* #define AUDIT_EXCESSIVELY_VERBOSE */ -#ifdef AUDIT_EXCESSIVELY_VERBOSE -#define AUDIT_PRINTF_ONLY -#define AUDIT_PRINTF(x) printf x -#else -#define AUDIT_PRINTF_ONLY __unused -#define AUDIT_PRINTF(X) -#endif - -#if DIAGNOSTIC -#if defined(assert) -#undef assert() -#endif -#define assert(cond) \ - ((void) ((cond) ? 0 : panic("Assert failed: %s", # cond))) -#else -#include -#endif /* DIAGNOSTIC */ - -/* - * Define the audit control flags. - */ -int audit_enabled; -int audit_suspended; - -/* - * Mutex to protect global variables shared between various threads and - * processes. - */ -static lck_grp_t *audit_grp; -static lck_attr_t *audit_attr; -static lck_grp_attr_t *audit_grp_attr; -static lck_mtx_t *audit_mtx; - -/* - * Queue of audit records ready for delivery to disk. We insert new - * records at the tail, and remove records from the head. Also, - * a count of the number of records used for checking queue depth. - * In addition, a counter of records that we have allocated but are - * not yet in the queue, which is needed to estimate the total - * size of the combined set of records outstanding in the system. - */ -static TAILQ_HEAD(, kaudit_record) audit_q; -static size_t audit_q_len; -static size_t audit_pre_q_len; - -static wait_queue_t audit_wait_queue; -static zone_t audit_zone; -#if CONFIG_MACF -static zone_t audit_mac_label_zone; -#endif - -/* - * Condition variable to signal to the worker that it has work to do: - * either new records are in the queue, or a log replacement is taking - * place. - */ -static int audit_worker_event; -#define AUDIT_WORKER_EVENT ((event_t)&audit_worker_event) - -/* - * The audit worker thread (which is lazy started when we first - * rotate the audit log. - */ -static thread_t audit_worker_thread = THREAD_NULL; - -/* - * When an audit log is rotated, the actual rotation must be performed - * by the audit worker thread, as it may have outstanding writes on the - * current audit log. audit_replacement_vp holds the vnode replacing - * the current vnode. We can't let more than one replacement occur - * at a time, so if more than one thread requests a replacement, only - * one can have the replacement "in progress" at any given moment. If - * a thread tries to replace the audit vnode and discovers a replacement - * is already in progress (i.e., audit_replacement_flag != 0), then it - * will sleep on audit_replacement_cv waiting its turn to perform a - * replacement. When a replacement is completed, this cv is signalled - * by the worker thread so a waiting thread can start another replacement. - * We also store a credential to perform audit log write operations with. - */ -static int audit_replacement_event; -#define AUDIT_REPLACEMENT_EVENT ((event_t)&audit_replacement_event) - -static int audit_replacement_flag; -static struct vnode *audit_replacement_vp; -static kauth_cred_t audit_replacement_cred; - -/* - * Wait queue for auditing threads that cannot commit the audit - * record at the present time. Also, the queue control parameter - * structure. - */ -static int audit_commit_event; -#define AUDIT_COMMIT_EVENT ((event_t)&audit_commit_event) - -static struct au_qctrl audit_qctrl; - -/* - * Flags to use on audit files when opening and closing. - */ -static const int audit_open_flags = FWRITE | O_APPEND; -static const int audit_close_flags = FWRITE | O_APPEND; - -/* - * Global audit statistiscs. - */ -static struct audit_fstat audit_fstat; - -/* - Preselection mask for non-attributable events. - */ -static struct au_mask audit_nae_mask; - -/* - * Flags related to Kernel->user-space communication. - */ -static int audit_file_rotate_wait; - -/* - * Flags controlling behavior in low storage situations. - * Should we panic if a write fails? Should we fail stop - * if we're out of disk space? Are we currently "failing - * stop" due to out of disk space? - */ -static int audit_panic_on_write_fail; -static int audit_fail_stop; -static int audit_in_failure; - -/* - * When in a fail-stop mode, threads will drop into this wait queue - * rather than perform auditable events. They won't ever get woken - * up. - */ -static int audit_failure_event; -#define AUDIT_FAILURE_EVENT ((event_t)&audit_failure_event) - -/* - * XXX: Couldn't find the include file for this, so copied kern_exec.c's - * behavior. - */ -extern task_t kernel_task; - -extern zone_t mac_audit_data_zone; -static void -audit_free(struct kaudit_record *ar) -{ - if (ar->k_ar.ar_arg_upath1 != NULL) { - kfree(ar->k_ar.ar_arg_upath1, MAXPATHLEN); - } - if (ar->k_ar.ar_arg_upath2 != NULL) { - kfree(ar->k_ar.ar_arg_upath2, MAXPATHLEN); - - } - if (ar->k_ar.ar_arg_kpath1 != NULL) { - kfree(ar->k_ar.ar_arg_kpath1, MAXPATHLEN); - - } - if (ar->k_ar.ar_arg_kpath2 != NULL) { - kfree(ar->k_ar.ar_arg_kpath2, MAXPATHLEN); - - } - if (ar->k_ar.ar_arg_text != NULL) { - kfree(ar->k_ar.ar_arg_text, MAXPATHLEN); - - } - if (ar->k_udata != NULL) { - kfree(ar->k_udata, ar->k_ulen); - } - -#if CONFIG_MACF - if (ar->k_ar.ar_vnode1_mac_labels != NULL) { - zfree(audit_mac_label_zone, ar->k_ar.ar_vnode1_mac_labels); - } - if (ar->k_ar.ar_vnode2_mac_labels != NULL) { - zfree(audit_mac_label_zone, ar->k_ar.ar_vnode2_mac_labels); - } - if (ar->k_ar.ar_cred_mac_labels != NULL) { - zfree(audit_mac_label_zone, ar->k_ar.ar_cred_mac_labels); - } - if (ar->k_ar.ar_arg_mac_string != NULL) { - kfree(ar->k_ar.ar_arg_mac_string, - MAC_MAX_LABEL_BUF_LEN + MAC_ARG_PREFIX_LEN); - } - - /* Free the audit data from the MAC policies. */ - do { - struct mac_audit_record *head, *next; - - head = LIST_FIRST(ar->k_ar.ar_mac_records); - while (head != NULL) { - next = LIST_NEXT(head, records); - zfree(mac_audit_data_zone, head->data); - kfree(head, sizeof(*head)); - head = next; - } - - kfree(ar->k_ar.ar_mac_records, - sizeof(*ar->k_ar.ar_mac_records)); - } while (0); -#endif - - zfree(audit_zone, ar); -} - -/* - * Converts an audit record into the BSM format before writing out to the - * audit logfile. Will perform it's own vnode iocounting. - * - * Returns: - * -1 if it could not get an ioreference on the vnode. - * EINVAL if the kaudit_record ar is not a valid audit record. - */ -static int -audit_write(struct vnode *vp, struct kaudit_record *ar, vfs_context_t ctx) -{ - struct vfsstatfs *mnt_stat = &vp->v_mount->mnt_vfsstat; - int ret = 0; - struct au_record *bsm; - off_t file_size; - - mach_port_t audit_port; - - if (vnode_getwithref(vp)) - return ENOENT; - - /* - * First, gather statistics on the audit log file and file system - * so that we know how we're doing on space. In both cases, - * if we're unable to perform the operation, we drop the record - * and return. However, this is arguably an assertion failure. - */ - ret = vfs_update_vfsstat(vp->v_mount, ctx, VFS_KERNEL_EVENT); - if (ret) - goto out; - - /* update the global stats struct */ - if ((ret = vnode_size(vp, &file_size, ctx)) != 0) - goto out; - audit_fstat.af_currsz = file_size; - - /* - * Send a message to the audit daemon when disk space is getting - * low. - * XXX Need to decide what to do if the trigger to the audit daemon - * fails. - */ - if(host_get_audit_control_port(host_priv_self(), &audit_port) - != KERN_SUCCESS) - printf("Cannot get audit control port\n"); - - if (audit_port != MACH_PORT_NULL) { - uint64_t temp; - - /* - * If we fall below percent free blocks, then trigger the - * audit daemon to do something about it. - */ - if (audit_qctrl.aq_minfree != 0) { - temp = mnt_stat->f_blocks / (100 / audit_qctrl.aq_minfree); - if (mnt_stat->f_bfree < temp) { - ret = audit_triggers(audit_port, - AUDIT_TRIGGER_LOW_SPACE); - if (ret != KERN_SUCCESS) { - printf( - "Failed audit_triggers(AUDIT_TRIGGER_LOW_SPACE): %d\n", ret); - /* - * XXX: What to do here? Disable auditing? - * panic? - */ - } - } - } - /* Check if the current log file is full; if so, call for - * a log rotate. This is not an exact comparison; we may - * write some records over the limit. If that's not - * acceptable, then add a fudge factor here. - */ - if ((audit_fstat.af_filesz != 0) && - (audit_file_rotate_wait == 0) && - (file_size >= (off_t)audit_fstat.af_filesz)) { - audit_file_rotate_wait = 1; - ret = audit_triggers(audit_port, - AUDIT_TRIGGER_FILE_FULL); - if (ret != KERN_SUCCESS) { - printf( - "Failed audit_triggers(AUDIT_TRIGGER_FILE_FULL): %d\n", ret); - /* XXX what to do here? */ - } - } - } - - /* - * If the estimated amount of audit data in the audit event queue - * (plus records allocated but not yet queued) has reached the - * amount of free space on the disk, then we need to go into an - * audit fail stop state, in which we do not permit the - * allocation/committing of any new audit records. We continue to - * process packets but don't allow any activities that might - * generate new records. In the future, we might want to detect - * when space is available again and allow operation to continue, - * but this behavior is sufficient to meet fail stop requirements - * in CAPP. - */ - if (audit_fail_stop && - (unsigned long) - ((audit_q_len + audit_pre_q_len + 1) * MAX_AUDIT_RECORD_SIZE) / - mnt_stat->f_bsize >= (unsigned long)(mnt_stat->f_bfree)) { - printf( - "audit_worker: free space below size of audit queue, failing stop\n"); - audit_in_failure = 1; - } - - /* - * If there is a user audit record attached to the kernel record, - * then write the user record. - */ - /* XXX Need to decide a few things here: IF the user audit - * record is written, but the write of the kernel record fails, - * what to do? Should the kernel record come before or after the - * user record? For now, we write the user record first, and - * we ignore errors. - */ - if (ar->k_ar_commit & AR_COMMIT_USER) { - ret = vn_rdwr(UIO_WRITE, vp, (void *)ar->k_udata, ar->k_ulen, - (off_t)0, UIO_SYSSPACE32, IO_APPEND|IO_UNIT, vfs_context_ucred(ctx), NULL, vfs_context_proc(ctx)); - if (ret) - goto out; - } - - /* - * Convert the internal kernel record to BSM format and write it - * out if everything's OK. - */ - if (!(ar->k_ar_commit & AR_COMMIT_KERNEL)) { - ret = 0; - goto out; - } - - ret = kaudit_to_bsm(ar, &bsm); - if (ret == BSM_NOAUDIT) { - ret = 0; - goto out; - } - - /* - * XXX: We drop the record on BSM conversion failure, but really - * this is an assertion failure. - */ - if (ret == BSM_FAILURE) { - AUDIT_PRINTF(("BSM conversion failure\n")); - ret = EINVAL; - goto out; - } - - /* XXX: We should break the write functionality - * away from the BSM record generation and have the BSM generation - * done before this function is called. This function will then - * take the BSM record as a parameter. - */ - ret = (vn_rdwr(UIO_WRITE, vp, (void *)bsm->data, bsm->len, - (off_t)0, UIO_SYSSPACE32, IO_APPEND|IO_UNIT, vfs_context_ucred(ctx), NULL, vfs_context_proc(ctx))); - kau_free(bsm); - -out: - /* - * When we're done processing the current record, we have to - * check to see if we're in a failure mode, and if so, whether - * this was the last record left to be drained. If we're done - * draining, then we fsync the vnode and panic. - */ - if (audit_in_failure && - audit_q_len == 0 && audit_pre_q_len == 0) { - (void)VNOP_FSYNC(vp, MNT_WAIT, ctx); - panic("Audit store overflow; record queue drained."); - } - - vnode_put(vp); - return (ret); -} - -static void -audit_worker(void) -{ - int do_replacement_signal, error; - TAILQ_HEAD(, kaudit_record) ar_worklist; - struct kaudit_record *ar; - struct vnode *audit_vp, *old_vp; - kauth_cred_t audit_cred; - proc_t audit_p; - - AUDIT_PRINTF(("audit_worker starting\n")); - - TAILQ_INIT(&ar_worklist); - audit_cred = NOCRED; - audit_p = current_proc(); - audit_vp = NULL; - - - lck_mtx_lock(audit_mtx); - while (1) { - struct vfs_context context; - - /* - * First priority: replace the audit log target if requested. - * - * XXX It could well be we should drain existing records - * first to ensure that the timestamps and ordering - * are right. - */ - do_replacement_signal = 0; - while (audit_replacement_flag != 0) { - kauth_cred_t old_cred = audit_cred; - - old_vp = audit_vp; - audit_cred = audit_replacement_cred; - audit_vp = audit_replacement_vp; - audit_replacement_cred = NOCRED; - audit_replacement_vp = NULL; - audit_replacement_flag = 0; - - audit_enabled = (audit_vp != NULL); - - /* - * XXX: What to do about write failures here? - */ - if (old_vp != NULL) { - AUDIT_PRINTF(("Closing old audit file vnode %p\n", old_vp)); - if (vnode_get(old_vp) == 0) { - vn_close(old_vp, audit_close_flags, vfs_context_kernel()); - vnode_put(old_vp); - AUDIT_PRINTF(("Audit file closed\n")); - } - else - printf("audit_worker(): Couldn't close audit file.\n"); - kauth_cred_unref(&old_cred); - old_vp = NULL; - } - if (audit_vp != NULL) { - AUDIT_PRINTF(("Opening new audit file\n")); - } - do_replacement_signal = 1; - } - /* - * Signal that replacement have occurred to wake up and - * start any other replacements started in parallel. We can - * continue about our business in the mean time. We - * broadcast so that both new replacements can be inserted, - * but also so that the source(s) of replacement can return - * successfully. - */ - if (do_replacement_signal) - wait_queue_wakeup_all(audit_wait_queue, - AUDIT_REPLACEMENT_EVENT, THREAD_AWAKENED); - - /* - * Next, check to see if we have any records to drain into - * the vnode. If not, go back to waiting for an event. - */ - if (TAILQ_EMPTY(&audit_q)) { - int ret; - - AUDIT_PRINTF(("audit_worker waiting\n")); - ret = wait_queue_assert_wait(audit_wait_queue, - AUDIT_WORKER_EVENT, - THREAD_UNINT, - 0); - lck_mtx_unlock(audit_mtx); - - assert(ret == THREAD_WAITING); - ret = thread_block(THREAD_CONTINUE_NULL); - assert(ret == THREAD_AWAKENED); - AUDIT_PRINTF(("audit_worker woken up\n")); - AUDIT_PRINTF(("audit_worker: new vp = %p; value of flag %d\n", - audit_replacement_vp, audit_replacement_flag)); - - lck_mtx_lock(audit_mtx); - continue; - } - - /* - * If we have records, but there's no active vnode to - * write to, drain the record queue. Generally, we - * prevent the unnecessary allocation of records - * elsewhere, but we need to allow for races between - * conditional allocation and queueing. Go back to - * waiting when we're done. - * - * XXX: We go out of our way to avoid calling audit_free() - * with the audit_mtx held, to avoid a lock order reversal - * as free() may grab the funnel. This will be fixed at - * some point. - */ - if (audit_vp == NULL) { - while ((ar = TAILQ_FIRST(&audit_q))) { - TAILQ_REMOVE(&audit_q, ar, k_q); - audit_q_len--; - if (audit_q_len <= audit_qctrl.aq_lowater) - wait_queue_wakeup_one( - audit_wait_queue, - AUDIT_COMMIT_EVENT, - THREAD_AWAKENED); - - TAILQ_INSERT_TAIL(&ar_worklist, ar, k_q); - } - lck_mtx_unlock(audit_mtx); - while ((ar = TAILQ_FIRST(&ar_worklist))) { - TAILQ_REMOVE(&ar_worklist, ar, k_q); - audit_free(ar); - } - lck_mtx_lock(audit_mtx); - continue; - } - - /* - * We have both records to write, and an active vnode - * to write to. Dequeue a record, and start the write. - * Eventually, it might make sense to dequeue several - * records and perform our own clustering, if the lower - * layers aren't doing it automatically enough. - * - * XXX: We go out of our way to avoid calling audit_free() - * with the audit_mtx held, to avoid a lock order reversal - * as free() may grab the funnel. This will be fixed at - * some point. - */ - while ((ar = TAILQ_FIRST(&audit_q))) { - TAILQ_REMOVE(&audit_q, ar, k_q); - audit_q_len--; - if (audit_q_len <= audit_qctrl.aq_lowater) { - wait_queue_wakeup_one(audit_wait_queue, - AUDIT_COMMIT_EVENT, THREAD_AWAKENED); - } - - TAILQ_INSERT_TAIL(&ar_worklist, ar, k_q); - } - lck_mtx_unlock(audit_mtx); - context.vc_thread = current_thread(); - context.vc_ucred = audit_cred; - while ((ar = TAILQ_FIRST(&ar_worklist))) { - TAILQ_REMOVE(&ar_worklist, ar, k_q); - if (audit_vp != NULL) { - /* - * XXX: What should happen if there's a write - * error here? - */ - error = audit_write(audit_vp, ar, &context); - if (error && audit_panic_on_write_fail) { - panic("audit_worker: write error %d\n", - error); - } else if (error) { - printf("audit_worker: write error %d\n", - error); - } - } - audit_free(ar); - } - lck_mtx_lock(audit_mtx); - } -} - -void -audit_init(void) -{ - printf("Security auditing service present\n"); - TAILQ_INIT(&audit_q); - audit_q_len = 0; - audit_enabled = 0; - audit_suspended = 0; - audit_replacement_cred = NULL; - audit_replacement_flag = 0; - audit_file_rotate_wait = 0; - audit_replacement_vp = NULL; - audit_fstat.af_filesz = 0; /* '0' means unset, unbounded */ - audit_fstat.af_currsz = 0; - audit_qctrl.aq_hiwater = AQ_HIWATER; - audit_qctrl.aq_lowater = AQ_LOWATER; - audit_qctrl.aq_bufsz = AQ_BUFSZ; - audit_qctrl.aq_minfree = AU_FS_MINFREE; - - audit_grp_attr = lck_grp_attr_alloc_init(); - audit_grp = lck_grp_alloc_init("audit", audit_grp_attr); - audit_attr = lck_attr_alloc_init(); - audit_mtx = lck_mtx_alloc_init(audit_grp, audit_attr); - - audit_wait_queue = wait_queue_alloc(SYNC_POLICY_FIFO); - audit_zone = zinit(sizeof(struct kaudit_record), - AQ_HIWATER*sizeof(struct kaudit_record), - 8192, - "audit_zone"); -#if CONFIG_MACF - /* Assume 3 MAC labels for each audit record: two for vnodes, - * one for creds. - */ - audit_mac_label_zone = zinit(MAC_AUDIT_LABEL_LEN, - AQ_HIWATER * 3*MAC_AUDIT_LABEL_LEN, - 8192, - "audit_mac_label_zone"); -#endif - - /* Initialize the BSM audit subsystem. */ - kau_init(); -} - -static void -audit_rotate_vnode(kauth_cred_t cred, struct vnode *vp) -{ - int ret; - - /* - * If other parallel log replacements have been requested, we wait - * until they've finished before continuing. - */ - lck_mtx_lock(audit_mtx); - while (audit_replacement_flag != 0) { - - AUDIT_PRINTF(("audit_rotate_vnode: sleeping to wait for " - "flag\n")); - ret = wait_queue_assert_wait(audit_wait_queue, - AUDIT_REPLACEMENT_EVENT, - THREAD_UNINT, - 0); - lck_mtx_unlock(audit_mtx); - - assert(ret == THREAD_WAITING); - ret = thread_block(THREAD_CONTINUE_NULL); - assert(ret == THREAD_AWAKENED); - AUDIT_PRINTF(("audit_rotate_vnode: woken up (flag %d)\n", - audit_replacement_flag)); - - lck_mtx_lock(audit_mtx); - } - audit_replacement_cred = cred; - audit_replacement_flag = 1; - audit_replacement_vp = vp; - - /* - * Start or wake up the audit worker to perform the exchange. - * It will have to wait until we release the mutex. - */ - if (audit_worker_thread == THREAD_NULL) - audit_worker_thread = kernel_thread(kernel_task, - audit_worker); - else - wait_queue_wakeup_one(audit_wait_queue, - AUDIT_WORKER_EVENT, - THREAD_AWAKENED); - - /* - * Wait for the audit_worker to broadcast that a replacement has - * taken place; we know that once this has happened, our vnode - * has been replaced in, so we can return successfully. - */ - AUDIT_PRINTF(("audit_rotate_vnode: waiting for news of " - "replacement\n")); - ret = wait_queue_assert_wait(audit_wait_queue, - AUDIT_REPLACEMENT_EVENT, - THREAD_UNINT, - 0); - lck_mtx_unlock(audit_mtx); - - assert(ret == THREAD_WAITING); - ret = thread_block(THREAD_CONTINUE_NULL); - assert(ret == THREAD_AWAKENED); - AUDIT_PRINTF(("audit_rotate_vnode: change acknowledged by " - "audit_worker (flag " "now %d)\n", audit_replacement_flag)); - - audit_file_rotate_wait = 0; /* We can now request another rotation */ -} - -/* - * Drain the audit queue and close the log at shutdown. - */ -void -audit_shutdown(void) -{ - if (audit_mtx) - audit_rotate_vnode(NULL, NULL); -} - -static __inline__ struct uthread * -curuthread(void) -{ - return (get_bsdthread_info(current_thread())); -} - -static __inline__ struct kaudit_record * -currecord(void) -{ - return (curuthread()->uu_ar); -} - -/********************************** - * Begin system calls. * - **********************************/ -/* - * System call to allow a user space application to submit a BSM audit - * record to the kernel for inclusion in the audit log. This function - * does little verification on the audit record that is submitted. - * - * XXXAUDIT: Audit preselection for user records does not currently - * work, since we pre-select only based on the AUE_audit event type, - * not the event type submitted as part of the user audit data. - */ -/* ARGSUSED */ -int -audit(proc_t p, struct audit_args *uap, __unused register_t *retval) -{ - int error; - void * rec; - struct kaudit_record *ar; - struct uthread *uthr; - - error = suser(kauth_cred_get(), &p->p_acflag); - if (error) - return (error); - - lck_mtx_lock(audit_mtx); - if ((uap->length <= 0) || (uap->length > (int)audit_qctrl.aq_bufsz)) { - lck_mtx_unlock(audit_mtx); - return (EINVAL); - } - lck_mtx_unlock(audit_mtx); - - ar = currecord(); - - /* If there's no current audit record (audit() itself not audited) - * commit the user audit record. - */ - if (ar == NULL) { - uthr = curuthread(); - if (uthr == NULL) /* can this happen? */ - return (ENOTSUP); - - /* This is not very efficient; we're required to allocate - * a complete kernel audit record just so the user record - * can tag along. - */ - uthr->uu_ar = audit_new(AUE_NULL, p, uthr); - if (uthr->uu_ar == NULL) /* auditing not on, or memory error */ - return (ENOTSUP); - ar = uthr->uu_ar; - } - - if (uap->length > MAX_AUDIT_RECORD_SIZE) - return (EINVAL); - - rec = (void *)kalloc((vm_size_t)uap->length); - - error = copyin(uap->record, rec, uap->length); - if (error) - goto free_out; - -#if CONFIG_MACF - error = mac_system_check_audit(kauth_cred_get(), rec, uap->length); - if (error) - goto free_out; -#endif - - /* Verify the record */ - if (bsm_rec_verify(rec) == 0) { - error = EINVAL; - goto free_out; - } - - /* Attach the user audit record to the kernel audit record. Because - * this system call is an auditable event, we will write the user - * record along with the record for this audit event. - */ - ar->k_udata = rec; - ar->k_ar_commit |= AR_COMMIT_USER; - ar->k_ulen = uap->length; - return (0); - -free_out: - /* audit_syscall_exit() will free the audit record on the thread - * even if we allocated it above. - */ - kfree(rec, uap->length); - return (error); -} - -/* - * System call to manipulate auditing. - */ -/* ARGSUSED */ -int -auditon(proc_t p, struct auditon_args *uap, __unused register_t *retval) -{ - int ret; - int len; - union auditon_udata udata; - proc_t tp = PROC_NULL; - kauth_cred_t my_cred; - - AUDIT_ARG(cmd, uap->cmd); - ret = suser(kauth_cred_get(), &p->p_acflag); - if (ret) - return (ret); - -#if CONFIG_MACF - ret = mac_system_check_auditon(kauth_cred_get(), uap->cmd); - if (ret) - return (ret); -#endif - - len = uap->length; - if ((len <= 0) || (len > (int)sizeof(union auditon_udata))) - return (EINVAL); - - memset((void *)&udata, 0, sizeof(udata)); - - switch (uap->cmd) { - /* Some of the GET commands use the arguments too */ - case A_SETPOLICY: - case A_SETKMASK: - case A_SETQCTRL: - case A_SETSTAT: - case A_SETUMASK: - case A_SETSMASK: - case A_SETCOND: - case A_SETCLASS: - case A_SETPMASK: - case A_SETFSIZE: - case A_SETKAUDIT: - case A_GETCLASS: - case A_GETPINFO: - case A_GETPINFO_ADDR: - ret = copyin(uap->data, (void *)&udata, uap->length); - if (ret) - return (ret); - AUDIT_ARG(auditon, &udata); - break; - } - - /* XXX Need to implement these commands by accessing the global - * values associated with the commands. - */ - lck_mtx_lock(audit_mtx); - switch (uap->cmd) { - case A_GETPOLICY: - if (!audit_fail_stop) - udata.au_policy |= AUDIT_CNT; - if (audit_panic_on_write_fail) - udata.au_policy |= AUDIT_AHLT; - break; - case A_SETPOLICY: - if (udata.au_policy & ~(AUDIT_CNT|AUDIT_AHLT)) { - ret = EINVAL; - break; - } - /* - * XXX - Need to wake up waiters if the policy relaxes? - */ - audit_fail_stop = ((udata.au_policy & AUDIT_CNT) == 0); - audit_panic_on_write_fail = (udata.au_policy & AUDIT_AHLT); - break; - case A_GETKMASK: - udata.au_mask = audit_nae_mask; - break; - case A_SETKMASK: - audit_nae_mask = udata.au_mask; - break; - case A_GETQCTRL: - udata.au_qctrl = audit_qctrl; - break; - case A_SETQCTRL: - if ((udata.au_qctrl.aq_hiwater > AQ_MAXHIGH) || - (udata.au_qctrl.aq_lowater >= udata.au_qctrl.aq_hiwater) || - (udata.au_qctrl.aq_bufsz > AQ_MAXBUFSZ) || - (udata.au_qctrl.aq_minfree < 0) || - (udata.au_qctrl.aq_minfree > 100)) { - ret = EINVAL; - break; - } - - audit_qctrl = udata.au_qctrl; - /* XXX The queue delay value isn't used with the kernel. */ - audit_qctrl.aq_delay = -1; - break; - case A_GETCWD: - ret = ENOSYS; - break; - case A_GETCAR: - ret = ENOSYS; - break; - case A_GETSTAT: - ret = ENOSYS; - break; - case A_SETSTAT: - ret = ENOSYS; - break; - case A_SETUMASK: - ret = ENOSYS; - break; - case A_SETSMASK: - ret = ENOSYS; - break; - case A_GETCOND: - if (audit_enabled && !audit_suspended) - udata.au_cond = AUC_AUDITING; - else - udata.au_cond = AUC_NOAUDIT; - break; - case A_SETCOND: - if (udata.au_cond == AUC_NOAUDIT) - audit_suspended = 1; - if (udata.au_cond == AUC_AUDITING) - audit_suspended = 0; - if (udata.au_cond == AUC_DISABLED) { - audit_suspended = 1; - audit_shutdown(); - } - break; - case A_GETCLASS: - udata.au_evclass.ec_class = - au_event_class(udata.au_evclass.ec_number); - break; - case A_SETCLASS: - au_evclassmap_insert(udata.au_evclass.ec_number, - udata.au_evclass.ec_class); - break; - case A_GETPINFO: - if (udata.au_aupinfo.ap_pid < 1) { - ret = EINVAL; - break; - } - if ((tp = proc_find(udata.au_aupinfo.ap_pid)) == NULL) { - ret = EINVAL; - break; - } - - lck_mtx_unlock(audit_mtx); - my_cred = kauth_cred_proc_ref(tp); - - udata.au_aupinfo.ap_auid = my_cred->cr_au.ai_auid; - udata.au_aupinfo.ap_mask.am_success = - my_cred->cr_au.ai_mask.am_success; - udata.au_aupinfo.ap_mask.am_failure = - my_cred->cr_au.ai_mask.am_failure; - udata.au_aupinfo.ap_termid.machine = - my_cred->cr_au.ai_termid.machine; - udata.au_aupinfo.ap_termid.port = - my_cred->cr_au.ai_termid.port; - udata.au_aupinfo.ap_asid = my_cred->cr_au.ai_asid; - - kauth_cred_unref(&my_cred); - - proc_rele(tp); - tp = PROC_NULL; - lck_mtx_lock(audit_mtx); - break; - case A_SETPMASK: - if (udata.au_aupinfo.ap_pid < 1) { - ret = EINVAL; - break; - } - if ((tp = proc_find(udata.au_aupinfo.ap_pid)) == NULL) { - ret = EINVAL; - break; - } - - /* - * we are modifying the audit info in a credential so we need a new - * credential (or take another reference on an existing credential that - * matches our new one). We must do this because the audit info in the - * credential is used as part of our hash key. Get current credential - * in the target process and take a reference while we muck with it. - */ - lck_mtx_unlock(audit_mtx); - for (;;) { - kauth_cred_t my_new_cred; - struct auditinfo temp_auditinfo; - - my_cred = kauth_cred_proc_ref(tp); - /* - * Set the credential with new info. If there is no - * change, we get back the same credential we passed - * in; if there is a change, we drop the reference on - * the credential we passed in. The subsequent - * compare is safe, because it is a pointer compare - * rather than a contents compare. - */ - temp_auditinfo = my_cred->cr_au; - temp_auditinfo.ai_mask.am_success = - udata.au_aupinfo.ap_mask.am_success; - temp_auditinfo.ai_mask.am_failure = - udata.au_aupinfo.ap_mask.am_failure; - my_new_cred = kauth_cred_setauditinfo(my_cred, &temp_auditinfo); - - if (my_cred != my_new_cred) { - proc_lock(tp); - /* need to protect for a race where another thread also changed - * the credential after we took our reference. If p_ucred has - * changed then we should restart this again with the new cred. - */ - if (tp->p_ucred != my_cred) { - proc_unlock(tp); - kauth_cred_unref(&my_new_cred); - /* try again */ - continue; - } - tp->p_ucred = my_new_cred; - proc_unlock(tp); - } - /* drop old proc reference or our extra reference */ - kauth_cred_unref(&my_cred); - break; - } - proc_rele(tp); - lck_mtx_lock(audit_mtx); - break; - case A_SETFSIZE: - if ((udata.au_fstat.af_filesz != 0) && - (udata.au_fstat.af_filesz < MIN_AUDIT_FILE_SIZE)) { - ret = EINVAL; - break; - } - audit_fstat.af_filesz = udata.au_fstat.af_filesz; - break; - case A_GETFSIZE: - udata.au_fstat.af_filesz = audit_fstat.af_filesz; - udata.au_fstat.af_currsz = audit_fstat.af_currsz; - break; - case A_GETPINFO_ADDR: - ret = ENOSYS; - break; - case A_GETKAUDIT: - ret = ENOSYS; - break; - case A_SETKAUDIT: - ret = ENOSYS; - break; - } - /* Copy data back to userspace for the GET comands */ - if (ret == 0) { - switch (uap->cmd) { - case A_GETPOLICY: - case A_GETKMASK: - case A_GETQCTRL: - case A_GETCWD: - case A_GETCAR: - case A_GETSTAT: - case A_GETCOND: - case A_GETCLASS: - case A_GETPINFO: - case A_GETFSIZE: - case A_GETPINFO_ADDR: - case A_GETKAUDIT: - ret = copyout((void *)&udata, uap->data, uap->length); - break; - } - } - - lck_mtx_unlock(audit_mtx); - return (ret); -} - -/* - * System calls to manage the user audit information. - */ -/* ARGSUSED */ -int -getauid(__unused proc_t p, struct getauid_args *uap, __unused register_t *retval) -{ - int error; - -#if CONFIG_MACF - error = mac_proc_check_getauid(p); - if (error) - return (error); -#endif - - error = copyout((void *)&kauth_cred_get()->cr_au.ai_auid, - uap->auid, sizeof(au_id_t)); - if (error) - return (error); - - return (0); -} - -/* ARGSUSED */ -int -setauid(proc_t p, struct setauid_args *uap, __unused register_t *retval) -{ - int error; - au_id_t temp_au_id; - - error = suser(kauth_cred_get(), &p->p_acflag); - if (error) - return (error); - - error = copyin(uap->auid, - (void *)&temp_au_id, - sizeof(au_id_t)); - if (error) - return (error); -#if CONFIG_MACF - error = mac_proc_check_setauid(p, temp_au_id); - if (error) - return (error); -#endif - - /* - * we are modifying the audit info in a credential so we need a new - * credential (or take another reference on an existing credential that - * matches our new one). We must do this because the audit info in the - * credential is used as part of our hash key. Get current credential - * in the target process and take a reference while we muck with it. - */ - for (;;) { - kauth_cred_t my_cred, my_new_cred; - struct auditinfo temp_auditinfo; - - my_cred = kauth_cred_proc_ref(p); - /* - * Set the credential with new info. If there is no change, - * we get back the same credential we passed in; if there is - * a change, we drop the reference on the credential we - * passed in. The subsequent compare is safe, because it is - * a pointer compare rather than a contents compare. - */ - temp_auditinfo = my_cred->cr_au; - temp_auditinfo.ai_auid = temp_au_id; - my_new_cred = kauth_cred_setauditinfo(my_cred, &temp_auditinfo); - - if (my_cred != my_new_cred) { - proc_lock(p); - /* need to protect for a race where another thread also changed - * the credential after we took our reference. If p_ucred has - * changed then we should restart this again with the new cred. - */ - if (p->p_ucred != my_cred) { - proc_unlock(p); - kauth_cred_unref(&my_new_cred); - /* try again */ - continue; - } - p->p_ucred = my_new_cred; - proc_unlock(p); - } - /* drop old proc reference or our extra reference */ - kauth_cred_unref(&my_cred); - break; - } - - /* propagate the change from the process to Mach task */ - set_security_token(p); - - audit_arg_auid(kauth_cred_get()->cr_au.ai_auid); - return (0); -} - -/* - * System calls to get and set process audit information. - * If the caller is privileged, they get the whole set of - * audit information. Otherwise, the real audit mask is - * filtered out - but the rest of the information is - * returned. - */ -/* ARGSUSED */ -int -getaudit(proc_t p, struct getaudit_args *uap, __unused register_t *retval) -{ - struct auditinfo ai; - int error; - -#if CONFIG_MACF - error = mac_proc_check_getaudit(p); - if (error) - return (error); -#endif - - ai = kauth_cred_get()->cr_au; - - /* only superuser gets to see the real mask */ - error = suser(kauth_cred_get(), &p->p_acflag); - if (error) { - ai.ai_mask.am_success = ~0; - ai.ai_mask.am_failure = ~0; - } - - error = copyout(&ai, uap->auditinfo, sizeof(ai)); - if (error) - return (error); - - return (0); -} - -/* ARGSUSED */ -int -setaudit(proc_t p, struct setaudit_args *uap, __unused register_t *retval) -{ - int error; - struct auditinfo temp_auditinfo; - kauth_cred_t safecred; - - error = suser(kauth_cred_get(), &p->p_acflag); - if (error) - return (error); - error = copyin(uap->auditinfo, - (void *)&temp_auditinfo, - sizeof(temp_auditinfo)); - if (error) - return (error); -#if CONFIG_MACF - error = mac_proc_check_setaudit(p, &temp_auditinfo); - if (error) - return (error); - -#endif - - - /* - * we are modifying the audit info in a credential so we need a new - * credential (or take another reference on an existing credential that - * matches our new one). We must do this because the audit info in the - * credential is used as part of our hash key. Get current credential - * in the target process and take a reference while we muck with it. - */ - for (;;) { - kauth_cred_t my_cred, my_new_cred; - - my_cred = kauth_cred_proc_ref(p); - /* - * Set the credential with new info. If there is no change, - * we get back the same credential we passed in; if there is - * a change, we drop the reference on the credential we - * passed in. The subsequent compare is safe, because it is - * a pointer compare rather than a contents compare. - */ - my_new_cred = kauth_cred_setauditinfo(my_cred, &temp_auditinfo); - - if (my_cred != my_new_cred) { - proc_lock(p); - /* need to protect for a race where another thread also changed - * the credential after we took our reference. If p_ucred has - * changed then we should restart this again with the new cred. - */ - if (p->p_ucred != my_cred) { - proc_unlock(p); - kauth_cred_unref(&my_new_cred); - /* try again */ - continue; - } - p->p_ucred = my_new_cred; - proc_unlock(p); - } - /* drop old proc reference or our extra reference */ - kauth_cred_unref(&my_cred); - break; - } - - /* propagate the change from the process to Mach task */ - set_security_token(p); - - safecred = kauth_cred_proc_ref(p); - audit_arg_auditinfo(&safecred->cr_au); - kauth_cred_unref(&safecred); - - return (0); -} - -/* ARGSUSED */ -int -getaudit_addr(__unused proc_t p, __unused struct getaudit_addr_args *uap, __unused register_t *retval) -{ - return (ENOSYS); -} - -/* ARGSUSED */ -int -setaudit_addr(proc_t p, __unused struct setaudit_addr_args *uap, __unused register_t *retval) -{ - int error; - - error = suser(kauth_cred_get(), &p->p_acflag); - if (error) - return (error); - return (ENOSYS); -} - -/* - * Syscall to manage audit files. - * - */ -/* ARGSUSED */ -int -auditctl(proc_t p, struct auditctl_args *uap, __unused register_t *retval) -{ - struct nameidata nd; - kauth_cred_t cred; - struct vnode *vp; - int error; - - error = suser(kauth_cred_get(), &p->p_acflag); - if (error) - return (error); - - vp = NULL; - cred = NULL; - - /* - * If a path is specified, open the replacement vnode, perform - * validity checks, and grab another reference to the current - * credential. - */ - if (uap->path != USER_ADDR_NULL) { - NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | AUDITVNPATH1, - (IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32), - uap->path, vfs_context_current()); - error = vn_open(&nd, audit_open_flags, 0); - if (error) - goto out; - vp = nd.ni_vp; - - if (vp->v_type != VREG) { - vn_close(vp, audit_close_flags, vfs_context_current()); - vnode_put(vp); - error = EINVAL; - goto out; - } -#if CONFIG_MACF - /* - * Accessibility of the vnode was determined in - * vn_open; the mac_system_check_auditctl should only - * determine whether that vnode is appropriate for - * storing audit data, or that the caller was - * permitted to control the auditing system at all. - * For example, a confidentiality policy may want to - * ensure that audit files are always high - * sensitivity. - */ - - error = mac_system_check_auditctl(kauth_cred_get(), vp); - if (error) { - vn_close(vp, audit_close_flags, vfs_context_current()); - vnode_put(vp); - goto out; - } -#endif - cred = kauth_cred_get_with_ref(); - lck_mtx_lock(audit_mtx); - audit_suspended = 0; - lck_mtx_unlock(audit_mtx); - } -#if CONFIG_MACF - else { - error = mac_system_check_auditctl(kauth_cred_get(), NULL); - if (error) - return (error); - } -#endif - - /* - * a vp and cred of NULL is valid at this point - * and indicates we're to turn off auditing... - */ - audit_rotate_vnode(cred, vp); - if (vp) - vnode_put(vp); -out: - return (error); -} - -/********************************** - * End of system calls. * - **********************************/ - -/* - * MPSAFE - */ -struct kaudit_record * -audit_new(int event, proc_t p, __unused struct uthread *uthread) -{ - struct kaudit_record *ar; - int no_record; - kauth_cred_t safecred; - - /* - * Eventually, there may be certain classes of events that - * we will audit regardless of the audit state at the time - * the record is created. These events will generally - * correspond to changes in the audit state. The dummy - * code below is from our first prototype, but may also - * be used in the final version (with modified event numbers). - */ -#if 0 - if (event != AUDIT_EVENT_FILESTOP && event != AUDIT_EVENT_FILESTART) { -#endif - lck_mtx_lock(audit_mtx); - no_record = (audit_suspended || !audit_enabled); - lck_mtx_unlock(audit_mtx); - if (no_record) - return (NULL); -#if 0 - } -#endif - - /* - * Initialize the audit record header. - * XXX: We may want to fail-stop if allocation fails. - * XXX: The number of outstanding uncommitted audit records is - * limited by the number of concurrent threads servicing system - * calls in the kernel. - */ - - ar = (struct kaudit_record *)zalloc(audit_zone); - if (ar == NULL) - return NULL; - - bzero(ar, sizeof(*ar)); - ar->k_ar.ar_magic = AUDIT_RECORD_MAGIC; - ar->k_ar.ar_event = event; - nanotime(&ar->k_ar.ar_starttime); - - safecred = kauth_cred_proc_ref(p); - /* Export the subject credential. */ - cru2x(safecred, &ar->k_ar.ar_subj_cred); - - ar->k_ar.ar_subj_ruid = safecred->cr_ruid; - ar->k_ar.ar_subj_rgid = safecred->cr_rgid; - ar->k_ar.ar_subj_egid = safecred->cr_groups[0]; - ar->k_ar.ar_subj_auid = safecred->cr_au.ai_auid; - ar->k_ar.ar_subj_asid = safecred->cr_au.ai_asid; - ar->k_ar.ar_subj_amask = safecred->cr_au.ai_mask; - ar->k_ar.ar_subj_term = safecred->cr_au.ai_termid; - kauth_cred_unref(&safecred); - - ar->k_ar.ar_subj_pid = p->p_pid; - bcopy(p->p_comm, ar->k_ar.ar_subj_comm, MAXCOMLEN); - -#if CONFIG_MACF - do { - struct mac mac; - - /* Retrieve the MAC labels for the process. */ - ar->k_ar.ar_cred_mac_labels = - (char *)zalloc(audit_mac_label_zone); - if (ar->k_ar.ar_cred_mac_labels == NULL) { - zfree(audit_zone, ar); - return (NULL); - } - mac.m_buflen = MAC_AUDIT_LABEL_LEN; - mac.m_string = ar->k_ar.ar_cred_mac_labels; - mac_cred_label_externalize_audit(p, &mac); - - /* - * grab space for the reconds. - */ - ar->k_ar.ar_mac_records = (struct mac_audit_record_list_t *) - kalloc(sizeof(*ar->k_ar.ar_mac_records)); - if (ar->k_ar.ar_mac_records == NULL) { - zfree(audit_mac_label_zone, - ar->k_ar.ar_cred_mac_labels); - zfree(audit_zone, ar); - return (NULL); - } - - LIST_INIT(ar->k_ar.ar_mac_records); - - ar->k_ar.ar_forced_by_mac = 0; - - } while (0); -#endif - - lck_mtx_lock(audit_mtx); - audit_pre_q_len++; - lck_mtx_unlock(audit_mtx); - - return (ar); -} - -/* - * MPSAFE - * XXXAUDIT: So far, this is unused, and should probably be GC'd. - */ -void -audit_abort(struct kaudit_record *ar) -{ - lck_mtx_lock(audit_mtx); - audit_pre_q_len--; - lck_mtx_unlock(audit_mtx); - audit_free(ar); -} - -/* - * MPSAFE - */ -void -audit_commit(struct kaudit_record *ar, int error, int retval) -{ - int ret; - int sorf; - struct au_mask *aumask; - - if (ar == NULL) - return; - - /* - * Decide whether to commit the audit record by checking the - * error value from the system call and using the appropriate - * audit mask. - */ - if (ar->k_ar.ar_subj_auid == AU_DEFAUDITID) - aumask = &audit_nae_mask; - else - aumask = &ar->k_ar.ar_subj_amask; - - if (error) - sorf = AU_PRS_FAILURE; - else - sorf = AU_PRS_SUCCESS; - - switch(ar->k_ar.ar_event) { - - case AUE_OPEN_RWTC: - /* The open syscall always writes a OPEN_RWTC event; limit the - * to the proper type of event based on the flags and the error - * value. - */ - ar->k_ar.ar_event = flags_and_error_to_openevent(ar->k_ar.ar_arg_fflags, error); - break; - - case AUE_SYSCTL: - ar->k_ar.ar_event = ctlname_to_sysctlevent(ar->k_ar.ar_arg_ctlname, ar->k_ar.ar_valid_arg); - break; - - case AUE_AUDITON: - /* Convert the auditon() command to an event */ - ar->k_ar.ar_event = auditon_command_event(ar->k_ar.ar_arg_cmd); - break; - } - - if (au_preselect(ar->k_ar.ar_event, aumask, sorf) != 0) - ar->k_ar_commit |= AR_COMMIT_KERNEL; - - if ((ar->k_ar_commit & (AR_COMMIT_USER | AR_COMMIT_KERNEL)) == 0) { - lck_mtx_lock(audit_mtx); - audit_pre_q_len--; - lck_mtx_unlock(audit_mtx); - audit_free(ar); - return; - } - - ar->k_ar.ar_errno = error; - ar->k_ar.ar_retval = retval; - - /* - * We might want to do some system-wide post-filtering - * here at some point. - */ - - /* - * Timestamp system call end. - */ - nanotime(&ar->k_ar.ar_endtime); - - lck_mtx_lock(audit_mtx); - /* - * Note: it could be that some records initiated while audit was - * enabled should still be committed? - */ - if (audit_suspended || !audit_enabled) { - audit_pre_q_len--; - lck_mtx_unlock(audit_mtx); - audit_free(ar); - return; - } - - /* - * Constrain the number of committed audit records based on - * the configurable parameter. - */ - while (audit_q_len >= audit_qctrl.aq_hiwater) { - - ret = wait_queue_assert_wait(audit_wait_queue, - AUDIT_COMMIT_EVENT, - THREAD_UNINT, - 0); - lck_mtx_unlock(audit_mtx); - - assert(ret == THREAD_WAITING); - - ret = thread_block(THREAD_CONTINUE_NULL); - assert(ret == THREAD_AWAKENED); - lck_mtx_lock(audit_mtx); - } - - TAILQ_INSERT_TAIL(&audit_q, ar, k_q); - audit_q_len++; - audit_pre_q_len--; - wait_queue_wakeup_one(audit_wait_queue, AUDIT_WORKER_EVENT, THREAD_AWAKENED); - lck_mtx_unlock(audit_mtx); -} - -/* - * If we're out of space and need to suspend unprivileged - * processes, do that here rather than trying to allocate - * another audit record. - */ -static void -audit_new_wait(int audit_event, proc_t proc, struct uthread *uthread) -{ - int ret; - - if (audit_in_failure && - suser(kauth_cred_get(), &proc->p_acflag) != 0) { - ret = wait_queue_assert_wait(audit_wait_queue, - AUDIT_FAILURE_EVENT, THREAD_UNINT, 0); - assert(ret == THREAD_WAITING); - (void)thread_block(THREAD_CONTINUE_NULL); - panic("audit_failing_stop: thread continued"); - } - uthread->uu_ar = audit_new(audit_event, proc, uthread); -} - -/* - * Calls to set up and tear down audit structures associated with - * each system call. - */ -void -audit_syscall_enter(unsigned short code, proc_t proc, - struct uthread *uthread) -{ - int audit_event; - struct au_mask *aumask; - kauth_cred_t my_cred; - - audit_event = sys_au_event[code]; - if (audit_event == AUE_NULL) - return; - - assert(uthread->uu_ar == NULL); - - /* Check which audit mask to use; either the kernel non-attributable - * event mask or the process audit mask. - */ - my_cred = kauth_cred_proc_ref(proc); - - if (my_cred->cr_au.ai_auid == AU_DEFAUDITID) - aumask = &audit_nae_mask; - else - aumask = &my_cred->cr_au.ai_mask; - - /* - * Allocate an audit record, if preselection allows it, and store - * in the BSD thread for later use. - */ - -#if CONFIG_MACF - do { - int error; - - error = mac_audit_check_preselect(my_cred, code, - (void *) uthread->uu_arg); - - if (error == MAC_AUDIT_YES) { - uthread->uu_ar = audit_new(audit_event, proc, uthread); - uthread->uu_ar->k_ar.ar_forced_by_mac = 1; - au_to_text("Forced by a MAC policy"); - } - else if (error == MAC_AUDIT_NO) { - uthread->uu_ar = NULL; - } - else if (error == MAC_AUDIT_DEFAULT && - au_preselect(audit_event, &my_cred->cr_au.ai_mask, - AU_PRS_FAILURE | AU_PRS_SUCCESS)) - audit_new_wait(audit_event, proc, uthread); - } while (0); -#else - if (au_preselect(audit_event, &my_cred->cr_au.ai_mask, - AU_PRS_FAILURE | AU_PRS_SUCCESS)) { - audit_new_wait(audit_event, proc, uthread); - } else { - uthread->uu_ar = NULL; - } -#endif - kauth_cred_unref(&my_cred); -} - -/* - * Note: The audit_syscall_exit() parameter list was modified to support - * mac_audit_check_postselect(), which requires the Darwin syscall number. - */ -#if CONFIG_MACF -void -audit_syscall_exit(unsigned short code, int error, AUDIT_PRINTF_ONLY proc_t proc, struct uthread *uthread) -#else -void -audit_syscall_exit(int error, AUDIT_PRINTF_ONLY proc_t proc, struct uthread *uthread) -#endif -{ - int retval; - - /* - * Commit the audit record as desired; once we pass the record - * into audit_commit(), the memory is owned by the audit - * subsystem. - * The return value from the system call is stored on the user - * thread. If there was an error, the return value is set to -1, - * imitating the behavior of the cerror routine. - */ - if (error) - retval = -1; - else - retval = uthread->uu_rval[0]; - -#if CONFIG_MACF - do { - int mac_error; - - if (uthread->uu_ar == NULL) /* syscall wasn't audited */ - goto out; - - /* - * Note, no other postselect mechanism exists. If - * mac_audit_check_postselect returns MAC_AUDIT_NO, the - * record will be suppressed. Other values at this - * point result in the audit record being committed. - * This suppression behavior will probably go away in - * the port to 10.3.4. - */ - mac_error = mac_audit_check_postselect(kauth_cred_get(), code, - (void *) uthread->uu_arg, error, retval, - uthread->uu_ar->k_ar.ar_forced_by_mac); - - if (mac_error == MAC_AUDIT_YES) - uthread->uu_ar->k_ar_commit |= AR_COMMIT_KERNEL; - else if (mac_error == MAC_AUDIT_NO) { - audit_free(uthread->uu_ar); - goto out; - } - - } while (0); - -#endif - audit_commit(uthread->uu_ar, error, retval); - if (uthread->uu_ar != NULL) { - AUDIT_PRINTF(("audit record committed by pid %d\n", proc->p_pid)); - } - -#if CONFIG_MACF -out: -#endif - uthread->uu_ar = NULL; - -} - -/* - * Calls to set up and tear down audit structures used during Mach - * system calls. - */ -void -audit_mach_syscall_enter(unsigned short audit_event) -{ - struct uthread *uthread; - proc_t proc; - struct au_mask *aumask; - kauth_cred_t my_cred; - - if (audit_event == AUE_NULL) - return; - - uthread = curuthread(); - if (uthread == NULL) - return; - - proc = current_proc(); - if (proc == NULL) - return; - - assert(uthread->uu_ar == NULL); - - my_cred = kauth_cred_proc_ref(proc); - - /* Check which audit mask to use; either the kernel non-attributable - * event mask or the process audit mask. - */ - if (my_cred->cr_au.ai_auid == AU_DEFAUDITID) - aumask = &audit_nae_mask; - else - aumask = &my_cred->cr_au.ai_mask; - - kauth_cred_unref(&my_cred); - - /* - * Allocate an audit record, if desired, and store in the BSD - * thread for later use. - */ - if (au_preselect(audit_event, aumask, - AU_PRS_FAILURE | AU_PRS_SUCCESS)) { - uthread->uu_ar = audit_new(audit_event, proc, uthread); - } else { - uthread->uu_ar = NULL; - } -} - -void -audit_mach_syscall_exit(int retval, struct uthread *uthread) -{ - /* The error code from Mach system calls is the same as the - * return value - */ - /* XXX Is the above statement always true? */ - audit_commit(uthread->uu_ar, retval, retval); - uthread->uu_ar = NULL; - -} - -/* - * Calls to manipulate elements of the audit record structure from system - * call code. Macro wrappers will prevent this functions from being - * entered if auditing is disabled, avoiding the function call cost. We - * check the thread audit record pointer anyway, as the audit condition - * could change, and pre-selection may not have allocated an audit - * record for this event. - */ -void -audit_arg_addr(user_addr_t addr) -{ - struct kaudit_record *ar; - - ar = currecord(); - if (ar == NULL) - return; - - ar->k_ar.ar_arg_addr = CAST_DOWN(void *, addr); /* XXX */ - ar->k_ar.ar_valid_arg |= ARG_ADDR; -} - -void -audit_arg_len(user_size_t len) -{ - struct kaudit_record *ar; - - ar = currecord(); - if (ar == NULL) - return; - - ar->k_ar.ar_arg_len = CAST_DOWN(int, len); /* XXX */ - ar->k_ar.ar_valid_arg |= ARG_LEN; -} - -void -audit_arg_fd(int fd) -{ - struct kaudit_record *ar; - - ar = currecord(); - if (ar == NULL) - return; - - ar->k_ar.ar_arg_fd = fd; - ar->k_ar.ar_valid_arg |= ARG_FD; -} - -void -audit_arg_fflags(int fflags) -{ - struct kaudit_record *ar; - - ar = currecord(); - if (ar == NULL) - return; - - ar->k_ar.ar_arg_fflags = fflags; - ar->k_ar.ar_valid_arg |= ARG_FFLAGS; -} - -void -audit_arg_gid(gid_t gid, gid_t egid, gid_t rgid, gid_t sgid) -{ - struct kaudit_record *ar; - - ar = currecord(); - if (ar == NULL) - return; - - ar->k_ar.ar_arg_gid = gid; - ar->k_ar.ar_arg_egid = egid; - ar->k_ar.ar_arg_rgid = rgid; - ar->k_ar.ar_arg_sgid = sgid; - ar->k_ar.ar_valid_arg |= (ARG_GID | ARG_EGID | ARG_RGID | ARG_SGID); -} - -void -audit_arg_uid(uid_t uid, uid_t euid, uid_t ruid, uid_t suid) -{ - struct kaudit_record *ar; - - ar = currecord(); - if (ar == NULL) - return; - - ar->k_ar.ar_arg_uid = uid; - ar->k_ar.ar_arg_euid = euid; - ar->k_ar.ar_arg_ruid = ruid; - ar->k_ar.ar_arg_suid = suid; - ar->k_ar.ar_valid_arg |= (ARG_UID | ARG_EUID | ARG_RUID | ARG_SUID); -} - -void -audit_arg_groupset(const gid_t *gidset, u_int gidset_size) -{ - uint i; - struct kaudit_record *ar; - - ar = currecord(); - if (ar == NULL) - return; - - for (i = 0; i < gidset_size; i++) - ar->k_ar.ar_arg_groups.gidset[i] = gidset[i]; - ar->k_ar.ar_arg_groups.gidset_size = gidset_size; - ar->k_ar.ar_valid_arg |= ARG_GROUPSET; -} - -void -audit_arg_login(const char *login) -{ - struct kaudit_record *ar; - - ar = currecord(); - if (ar == NULL) - return; - - strlcpy(ar->k_ar.ar_arg_login, login, MAXLOGNAME); - - ar->k_ar.ar_valid_arg |= ARG_LOGIN; -} - -void -audit_arg_ctlname(const int *name, int namelen) -{ - struct kaudit_record *ar; - - ar = currecord(); - if (ar == NULL) - return; - - bcopy(name, &ar->k_ar.ar_arg_ctlname, namelen * sizeof(int)); - ar->k_ar.ar_arg_len = namelen; - ar->k_ar.ar_valid_arg |= (ARG_CTLNAME | ARG_LEN); -} - -void -audit_arg_mask(int mask) -{ - struct kaudit_record *ar; - - ar = currecord(); - if (ar == NULL) - return; - - ar->k_ar.ar_arg_mask = mask; - ar->k_ar.ar_valid_arg |= ARG_MASK; -} - -void -audit_arg_mode(mode_t mode) -{ - struct kaudit_record *ar; - - ar = currecord(); - if (ar == NULL) - return; - - ar->k_ar.ar_arg_mode = mode; - ar->k_ar.ar_valid_arg |= ARG_MODE; -} - -void -audit_arg_dev(int dev) -{ - struct kaudit_record *ar; - - ar = currecord(); - if (ar == NULL) - return; - - ar->k_ar.ar_arg_dev = dev; - ar->k_ar.ar_valid_arg |= ARG_DEV; -} - -void -audit_arg_value(long value) -{ - struct kaudit_record *ar; - - ar = currecord(); - if (ar == NULL) - return; - - ar->k_ar.ar_arg_value = value; - ar->k_ar.ar_valid_arg |= ARG_VALUE; -} - -void -audit_arg_owner(uid_t uid, gid_t gid) -{ - struct kaudit_record *ar; - - ar = currecord(); - if (ar == NULL) - return; - - ar->k_ar.ar_arg_uid = uid; - ar->k_ar.ar_arg_gid = gid; - ar->k_ar.ar_valid_arg |= (ARG_UID | ARG_GID); -} - -void -audit_arg_pid(pid_t pid) -{ - struct kaudit_record *ar; - - ar = currecord(); - if (ar == NULL) - return; - - ar->k_ar.ar_arg_pid = pid; - ar->k_ar.ar_valid_arg |= ARG_PID; -} - -void -audit_arg_process(proc_t p) -{ - struct kaudit_record *ar; - kauth_cred_t my_cred; - - ar = currecord(); - if ((ar == NULL) || (p == NULL)) - return; - - my_cred = kauth_cred_proc_ref(p); - ar->k_ar.ar_arg_auid = my_cred->cr_au.ai_auid; - ar->k_ar.ar_arg_euid = my_cred->cr_uid; - ar->k_ar.ar_arg_egid = my_cred->cr_groups[0]; - ar->k_ar.ar_arg_ruid = my_cred->cr_ruid; - ar->k_ar.ar_arg_rgid = my_cred->cr_rgid; - ar->k_ar.ar_arg_asid = my_cred->cr_au.ai_asid; - ar->k_ar.ar_arg_termid = my_cred->cr_au.ai_termid; - kauth_cred_unref(&my_cred); - - ar->k_ar.ar_valid_arg |= ARG_AUID | ARG_EUID | ARG_EGID | ARG_RUID | - ARG_RGID | ARG_ASID | ARG_TERMID | ARG_PROCESS; -} - -void -audit_arg_signum(u_int signum) -{ - struct kaudit_record *ar; - - ar = currecord(); - if (ar == NULL) - return; - - ar->k_ar.ar_arg_signum = signum; - ar->k_ar.ar_valid_arg |= ARG_SIGNUM; -} - -void -audit_arg_socket(int sodomain, int sotype, int soprotocol) -{ - - struct kaudit_record *ar; - - ar = currecord(); - if (ar == NULL) - return; - - ar->k_ar.ar_arg_sockinfo.so_domain = sodomain; - ar->k_ar.ar_arg_sockinfo.so_type = sotype; - ar->k_ar.ar_arg_sockinfo.so_protocol = soprotocol; - ar->k_ar.ar_valid_arg |= ARG_SOCKINFO; -} - -/* - * Note that the current working directory vp must be supplied at the audit - * call site to permit per thread current working directories, and that it - * must take a upath starting with '/' into account for chroot if the path - * is absolute. This results in the real (non-chroot) path being recorded - * in the audit record. - */ -void -audit_arg_sockaddr(struct vnode *cwd_vp, struct sockaddr *so) -{ - struct kaudit_record *ar; - - ar = currecord(); - if (ar == NULL || cwd_vp == NULL || so == NULL) - return; - - bcopy(so, &ar->k_ar.ar_arg_sockaddr, sizeof(ar->k_ar.ar_arg_sockaddr)); - switch (so->sa_family) { - case AF_INET: - ar->k_ar.ar_valid_arg |= ARG_SADDRINET; - break; - case AF_INET6: - ar->k_ar.ar_valid_arg |= ARG_SADDRINET6; - break; - case AF_UNIX: - audit_arg_upath(cwd_vp, ((struct sockaddr_un *)so)->sun_path, - ARG_UPATH1); - ar->k_ar.ar_valid_arg |= ARG_SADDRUNIX; - break; - } -} - -void -audit_arg_auid(uid_t auid) -{ - struct kaudit_record *ar; - - ar = currecord(); - if (ar == NULL) - return; - - ar->k_ar.ar_arg_auid = auid; - ar->k_ar.ar_valid_arg |= ARG_AUID; -} - -void -audit_arg_auditinfo(const struct auditinfo *au_info) -{ - struct kaudit_record *ar; - - ar = currecord(); - if (ar == NULL) - return; - - ar->k_ar.ar_arg_auid = au_info->ai_auid; - ar->k_ar.ar_arg_asid = au_info->ai_asid; - ar->k_ar.ar_arg_amask.am_success = au_info->ai_mask.am_success; - ar->k_ar.ar_arg_amask.am_failure = au_info->ai_mask.am_failure; - ar->k_ar.ar_arg_termid.port = au_info->ai_termid.port; - ar->k_ar.ar_arg_termid.machine = au_info->ai_termid.machine; - ar->k_ar.ar_valid_arg |= ARG_AUID | ARG_ASID | ARG_AMASK | ARG_TERMID; -} - -void -audit_arg_text(const char *text) -{ - struct kaudit_record *ar; - - ar = currecord(); - if (ar == NULL) - return; - - /* Invalidate the text string */ - ar->k_ar.ar_valid_arg &= (ARG_ALL ^ ARG_TEXT); - if (text == NULL) - return; - - if (ar->k_ar.ar_arg_text == NULL) { - ar->k_ar.ar_arg_text = (char *)kalloc(MAXPATHLEN); - if (ar->k_ar.ar_arg_text == NULL) - return; - } - - strlcpy(ar->k_ar.ar_arg_text, text, MAXPATHLEN); - ar->k_ar.ar_valid_arg |= ARG_TEXT; -} - -void -audit_arg_cmd(int cmd) -{ - struct kaudit_record *ar; - - ar = currecord(); - if (ar == NULL) - return; - - ar->k_ar.ar_arg_cmd = cmd; - ar->k_ar.ar_valid_arg |= ARG_CMD; -} - -void -audit_arg_svipc_cmd(int cmd) -{ - struct kaudit_record *ar; - - ar = currecord(); - if (ar == NULL) - return; - - ar->k_ar.ar_arg_svipc_cmd = cmd; - ar->k_ar.ar_valid_arg |= ARG_SVIPC_CMD; -} - -void -audit_arg_svipc_perm(const struct ipc_perm *perm) -{ - struct kaudit_record *ar; - - ar = currecord(); - if (ar == NULL) - return; - - bcopy(perm, &ar->k_ar.ar_arg_svipc_perm, - sizeof(ar->k_ar.ar_arg_svipc_perm)); - ar->k_ar.ar_valid_arg |= ARG_SVIPC_PERM; -} - -void -audit_arg_svipc_id(int id) -{ - struct kaudit_record *ar; - - ar = currecord(); - if (ar == NULL) - return; - - ar->k_ar.ar_arg_svipc_id = id; - ar->k_ar.ar_valid_arg |= ARG_SVIPC_ID; -} - -void -audit_arg_svipc_addr(user_addr_t addr) -{ - struct kaudit_record *ar; - - ar = currecord(); - if (ar == NULL) - return; - - ar->k_ar.ar_arg_svipc_addr = addr; - ar->k_ar.ar_valid_arg |= ARG_SVIPC_ADDR; -} - -void -audit_arg_posix_ipc_perm(uid_t uid, gid_t gid, mode_t mode) -{ - struct kaudit_record *ar; - - ar = currecord(); - if (ar == NULL) - return; - - ar->k_ar.ar_arg_pipc_perm.pipc_uid = uid; - ar->k_ar.ar_arg_pipc_perm.pipc_gid = gid; - ar->k_ar.ar_arg_pipc_perm.pipc_mode = mode; - ar->k_ar.ar_valid_arg |= ARG_POSIX_IPC_PERM; -} - -void -audit_arg_auditon(const union auditon_udata *udata) -{ - struct kaudit_record *ar; - - ar = currecord(); - if (ar == NULL) - return; - - bcopy((const void *)udata, &ar->k_ar.ar_arg_auditon, - sizeof(ar->k_ar.ar_arg_auditon)); - ar->k_ar.ar_valid_arg |= ARG_AUDITON; -} - -/* - * Audit information about a file, either the file's vnode info, or its - * socket address info. - */ -void -audit_arg_file(__unused proc_t p, const struct fileproc *fp) -{ - struct kaudit_record *ar; - struct socket *so; - struct inpcb *pcb; - - if (fp->f_fglob->fg_type == DTYPE_VNODE) { - audit_arg_vnpath_withref((struct vnode *)fp->f_fglob->fg_data, ARG_VNODE1); - return; - } - - if (fp->f_fglob->fg_type == DTYPE_SOCKET) { - ar = currecord(); - if (ar == NULL) - return; - so = (struct socket *)fp->f_fglob->fg_data; - if (INP_CHECK_SOCKAF(so, PF_INET)) { - if (so->so_pcb == NULL) - return; - ar->k_ar.ar_arg_sockinfo.so_type = - so->so_type; - ar->k_ar.ar_arg_sockinfo.so_domain = - INP_SOCKAF(so); - ar->k_ar.ar_arg_sockinfo.so_protocol = - so->so_proto->pr_protocol; - pcb = (struct inpcb *)so->so_pcb; - ar->k_ar.ar_arg_sockinfo.so_raddr = - pcb->inp_faddr.s_addr; - ar->k_ar.ar_arg_sockinfo.so_laddr = - pcb->inp_laddr.s_addr; - ar->k_ar.ar_arg_sockinfo.so_rport = - pcb->inp_fport; - ar->k_ar.ar_arg_sockinfo.so_lport = - pcb->inp_lport; - ar->k_ar.ar_valid_arg |= ARG_SOCKINFO; - } - } - -} - - -/* - * Store a path as given by the user process for auditing into the audit - * record stored on the user thread. This function will allocate the memory to - * store the path info if not already available. This memory will be - * freed when the audit record is freed. Note that the current working - * directory vp must be supplied at the audit call site to permit per thread - * current working directories, and that it must take a upath starting with - * '/' into account for chroot if the path is absolute. This results in the - * real (non-chroot) path being recorded in the audit record. - */ -void -audit_arg_upath(struct vnode *cwd_vp, char *upath, u_int64_t flags) -{ - struct kaudit_record *ar; - char **pathp; - - if (cwd_vp == NULL || upath == NULL) - return; /* nothing to do! */ - - if ((flags & (ARG_UPATH1 | ARG_UPATH2)) == 0) - return; - - ar = currecord(); - if (ar == NULL) /* This will be the case for unaudited system calls */ - return; - - if (flags & ARG_UPATH1) { - ar->k_ar.ar_valid_arg &= (ARG_ALL ^ ARG_UPATH1); - pathp = &ar->k_ar.ar_arg_upath1; - } - else { - ar->k_ar.ar_valid_arg &= (ARG_ALL ^ ARG_UPATH2); - pathp = &ar->k_ar.ar_arg_upath2; - } - - if (*pathp == NULL) { - *pathp = (char *)kalloc(MAXPATHLEN); - if (*pathp == NULL) - return; - } - - if (canon_path(cwd_vp, upath, *pathp) == 0) { - if (flags & ARG_UPATH1) - ar->k_ar.ar_valid_arg |= ARG_UPATH1; - else - ar->k_ar.ar_valid_arg |= ARG_UPATH2; - } else { - kfree(*pathp, MAXPATHLEN); - *pathp = NULL; - } -} - -/* - * Function to save the path and vnode attr information into the audit - * record. - * - * It is assumed that the caller will hold any vnode locks necessary to - * perform a VNOP_GETATTR() on the passed vnode. - * - * XXX: The attr code is very similar to vfs_vnops.c:vn_stat(), but - * always provides access to the generation number as we need that - * to construct the BSM file ID. - * XXX: We should accept the process argument from the caller, since - * it's very likely they already have a reference. - * XXX: Error handling in this function is poor. - */ -void -audit_arg_vnpath(struct vnode *vp, u_int64_t flags) -{ - struct kaudit_record *ar; - struct vnode_attr va; - int error; - int len; - char **pathp; - struct vnode_au_info *vnp; - proc_t p; -#if CONFIG_MACF - char **vnode_mac_labelp; - struct mac mac; -#endif - - if (vp == NULL) - return; - - ar = currecord(); - if (ar == NULL) /* This will be the case for unaudited system calls */ - return; - - if ((flags & (ARG_VNODE1 | ARG_VNODE2)) == 0) - return; - - p = current_proc(); - - if (flags & ARG_VNODE1) { - ar->k_ar.ar_valid_arg &= (ARG_ALL ^ ARG_KPATH1); - ar->k_ar.ar_valid_arg &= (ARG_ALL ^ ARG_VNODE1); - pathp = &ar->k_ar.ar_arg_kpath1; - vnp = &ar->k_ar.ar_arg_vnode1; -#if CONFIG_MACF - vnode_mac_labelp = &ar->k_ar.ar_vnode1_mac_labels; -#endif - } - else { - ar->k_ar.ar_valid_arg &= (ARG_ALL ^ ARG_KPATH2); - ar->k_ar.ar_valid_arg &= (ARG_ALL ^ ARG_VNODE2); - pathp = &ar->k_ar.ar_arg_kpath2; - vnp = &ar->k_ar.ar_arg_vnode2; -#if CONFIG_MACF - vnode_mac_labelp = &ar->k_ar.ar_vnode2_mac_labels; -#endif - } - - if (*pathp == NULL) { - *pathp = (char *)kalloc(MAXPATHLEN); - if (*pathp == NULL) - return; - } - - /* - * If vn_getpath() succeeds, place it in a string buffer - * attached to the audit record, and set a flag indicating - * it is present. - */ - len = MAXPATHLEN; - if (vn_getpath(vp, *pathp, &len) == 0) { - if (flags & ARG_VNODE1) - ar->k_ar.ar_valid_arg |= ARG_KPATH1; - else - ar->k_ar.ar_valid_arg |= ARG_KPATH2; - } else { - kfree(*pathp, MAXPATHLEN); - *pathp = NULL; - } - - VATTR_INIT(&va); - VATTR_WANTED(&va, va_mode); - VATTR_WANTED(&va, va_uid); - VATTR_WANTED(&va, va_gid); - VATTR_WANTED(&va, va_rdev); - VATTR_WANTED(&va, va_fsid); - VATTR_WANTED(&va, va_fileid); - VATTR_WANTED(&va, va_gen); - error = vnode_getattr(vp, &va, vfs_context_current()); - if (error) { - /* XXX: How to handle this case? */ - return; - } - -#if CONFIG_MACF - if (*vnode_mac_labelp == NULL) { - *vnode_mac_labelp = (char *)zalloc(audit_mac_label_zone); - if (*vnode_mac_labelp != NULL) { - mac.m_buflen = MAC_AUDIT_LABEL_LEN; - mac.m_string = *vnode_mac_labelp; - mac_vnode_label_externalize_audit(vp, &mac); - } - - - - } -#endif - - /* XXX do we want to fall back here when these aren't supported? */ - vnp->vn_mode = va.va_mode; - vnp->vn_uid = va.va_uid; - vnp->vn_gid = va.va_gid; - vnp->vn_dev = va.va_rdev; - vnp->vn_fsid = va.va_fsid; - vnp->vn_fileid = (u_long)va.va_fileid; - vnp->vn_gen = va.va_gen; - if (flags & ARG_VNODE1) - ar->k_ar.ar_valid_arg |= ARG_VNODE1; - else - ar->k_ar.ar_valid_arg |= ARG_VNODE2; - -} - -void -audit_arg_vnpath_withref(struct vnode *vp, u_int64_t flags) -{ - if (vp == NULL || vnode_getwithref(vp)) - return; - audit_arg_vnpath(vp, flags); - (void)vnode_put(vp); -} - -void -audit_arg_mach_port1(mach_port_name_t port) -{ - struct kaudit_record *ar; - - ar = currecord(); - if (ar == NULL) - return; - - ar->k_ar.ar_arg_mach_port1 = port; - ar->k_ar.ar_valid_arg |= ARG_MACHPORT1; -} - -void -audit_arg_mach_port2(mach_port_name_t port) -{ - struct kaudit_record *ar; - - ar = currecord(); - if (ar == NULL) - return; - - ar->k_ar.ar_arg_mach_port2 = port; - ar->k_ar.ar_valid_arg |= ARG_MACHPORT2; -} - -/* - * The close() system call uses it's own audit call to capture the - * path/vnode information because those pieces are not easily obtained - * within the system call itself. - */ -void -audit_sysclose(proc_t p, int fd) -{ - struct fileproc *fp; - struct vnode *vp; - - audit_arg_fd(fd); - - if (fp_getfvp(p, fd, &fp, &vp) != 0) - return; - - audit_arg_vnpath_withref((struct vnode *)fp->f_fglob->fg_data, ARG_VNODE1); - file_drop(fd); -} - -#if CONFIG_MACF -/* - * This function is called by the MAC Framework to add audit data - * from a policy to the current audit record. - */ -int -audit_mac_data(int type, int len, u_char *data) { - struct kaudit_record *cur; - struct mac_audit_record *record; - int ret = 0; - - if (audit_enabled == 0) { - ret = ENOTSUP; - goto out_fail; - } - - cur = currecord(); - if (cur == NULL) { - ret = ENOTSUP; - goto out_fail; - } - - /* - * XXX: Note that we silently drop the audit data if this - * allocation fails - this is consistent with the rest of the - * audit implementation. - */ - record = (struct mac_audit_record *)kalloc(sizeof(*record)); - if (record == NULL) - goto out_fail; - - record->type = type; - record->length = len; - record->data = data; - LIST_INSERT_HEAD(cur->k_ar.ar_mac_records, record, records); - - return (0); - -out_fail: - kfree(data, len); - return (ret); -} - -void -audit_arg_mac_string(const char *string) -{ - struct kaudit_record *ar; - - ar = currecord(); - if (ar == NULL) - return; - - if (ar->k_ar.ar_arg_mac_string == NULL) { - ar->k_ar.ar_arg_mac_string = - (char *)kalloc(MAC_MAX_LABEL_BUF_LEN + MAC_ARG_PREFIX_LEN); - /* This should be a rare event. If kalloc() returns NULL, the - * system is low on kernel virtual memory. To be consistent with the - * rest of audit, just return (may need to panic if required to for audit6). - */ - if (ar->k_ar.ar_arg_mac_string == NULL) - return; - } - strncpy(ar->k_ar.ar_arg_mac_string, MAC_ARG_PREFIX, MAC_ARG_PREFIX_LEN); - strncpy(ar->k_ar.ar_arg_mac_string + MAC_ARG_PREFIX_LEN, string, MAC_MAX_LABEL_BUF_LEN); - ar->k_ar.ar_valid_arg |= ARG_MAC_STRING; - -} -#endif /* MAC */ - -/* - * kau_will_audit can be used by a security policy to determine - * if an audit record will be stored, reducing wasted memory allocation - * and string handling. - */ - -int -kau_will_audit(void) -{ - - return (audit_enabled && currecord() != NULL); -} - -#else /* !AUDIT */ - -void -audit_init(void) -{ - -} - -void -audit_shutdown(void) -{ - -} - -int -audit(proc_t p, struct audit_args *uap, register_t *retval) -{ - return (ENOSYS); -} - -int -auditon(proc_t p, struct auditon_args *uap, register_t *retval) -{ - return (ENOSYS); -} - -int -getauid(proc_t p, struct getauid_args *uap, register_t *retval) -{ - return (ENOSYS); -} - -int -setauid(proc_t p, struct setauid_args *uap, register_t *retval) -{ - return (ENOSYS); -} - -int -getaudit(proc_t p, struct getaudit_args *uap, register_t *retval) -{ - return (ENOSYS); -} - -int -setaudit(proc_t p, struct setaudit_args *uap, register_t *retval) -{ - return (ENOSYS); -} - -int -getaudit_addr(proc_t p, struct getaudit_addr_args *uap, register_t *retval) -{ - return (ENOSYS); -} - -int -setaudit_addr(proc_t p, struct setaudit_addr_args *uap, register_t *retval) -{ - return (ENOSYS); -} - -int -auditctl(proc_t p, struct auditctl_args *uap, register_t *retval) -{ - return (ENOSYS); -} - -#if CONFIG_MACF -void -audit_mac_data(int type, int len, u_char *data) -{ -} - -int -kau_will_audit() -{ - return (0); -} -#endif - -#endif /* AUDIT */ diff --git a/bsd/kern/kern_authorization.c b/bsd/kern/kern_authorization.c index b65a828c2..263fc28b4 100644 --- a/bsd/kern/kern_authorization.c +++ b/bsd/kern/kern_authorization.c @@ -43,7 +43,7 @@ #include #include -#include +#include #include #include @@ -873,6 +873,7 @@ kauth_acl_inherit(vnode_t dvp, kauth_acl_t initial, kauth_acl_t *product, int is if (inherit->acl_ace[i].ace_flags & (isdir ? KAUTH_ACE_DIRECTORY_INHERIT : KAUTH_ACE_FILE_INHERIT)) { result->acl_ace[index] = inherit->acl_ace[i]; result->acl_ace[index].ace_flags |= KAUTH_ACE_INHERITED; + result->acl_ace[index].ace_flags &= ~KAUTH_ACE_ONLY_INHERIT; /* * We do not re-inherit inheritance flags * if the ACE from the container has a @@ -947,7 +948,7 @@ kauth_copyinfilesec(user_addr_t xsecurity, kauth_filesec_t *xsecdestpp) * The upper bound must be less than KAUTH_ACL_MAX_ENTRIES. The * value here is fairly arbitrary. It's ok to have a zero count. */ - known_bound = xsecurity + sizeof(struct kauth_filesec); + known_bound = xsecurity + KAUTH_FILESEC_SIZE(0); uaddr = mach_vm_round_page(known_bound); count = (uaddr - known_bound) / sizeof(struct kauth_ace); if (count > 32) @@ -988,6 +989,7 @@ kauth_copyinfilesec(user_addr_t xsecurity, kauth_filesec_t *xsecdestpp) kauth_filesec_free(fsec); } else { *xsecdestpp = fsec; + AUDIT_ARG(opaque, fsec, copysize); } return(error); } diff --git a/bsd/kern/kern_bsm_audit.c b/bsd/kern/kern_bsm_audit.c deleted file mode 100644 index 4c58ed475..000000000 --- a/bsd/kern/kern_bsm_audit.c +++ /dev/null @@ -1,1321 +0,0 @@ -/* - * Copyright (c) 2003-2007 Apple Inc. All rights reserved. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ - * - * This file contains Original Code and/or Modifications of Original Code - * as defined in and that are subject to the Apple Public Source License - * Version 2.0 (the 'License'). You may not use this file except in - * compliance with the License. The rights granted to you under the License - * may not be used to create, or enable the creation or redistribution of, - * unlawful or unlicensed copies of an Apple operating system, or to - * circumvent, violate, or enable the circumvention or violation of, any - * terms of an Apple operating system software license agreement. - * - * Please obtain a copy of the License at - * http://www.opensource.apple.com/apsl/ and read it before using this file. - * - * The Original Code and all software distributed under the License are - * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER - * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, - * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. - * Please see the License for the specific language governing rights and - * limitations under the License. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ - */ -/* - * NOTICE: This file was modified by SPARTA, Inc. in 2005 to introduce - * support for mandatory and extensible security protections. This notice - * is included in support of clause 2.2 (b) of the Apple Public License, - * Version 2.0. - */ -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#include - -#include -#include -#include - -#include -#include - -#if CONFIG_MACF -#include -#endif - -/* The number of BSM records allocated. */ -static int bsm_rec_count = 0; - -/* - * Records that can be recycled are maintained in the list given below - * The maximum number of elements that can be present in this list is - * bounded by MAX_AUDIT_RECORDS. Memory allocated for these records are never - * freed - */ -LIST_HEAD(, au_record) bsm_free_q; - -/* - * Lock for serializing access to the list of audit records. - */ -static lck_grp_t *bsm_audit_grp; -static lck_attr_t *bsm_audit_attr; -static lck_grp_attr_t *bsm_audit_grp_attr; -static lck_mtx_t *bsm_audit_mutex; - -static void audit_sys_auditon(struct audit_record *ar, struct au_record *rec); - -/* - * Initialize the BSM auditing subsystem. - */ -void -kau_init(void) -{ - printf("BSM auditing present\n"); - LIST_INIT(&bsm_free_q); - bsm_audit_grp_attr = lck_grp_attr_alloc_init(); - bsm_audit_grp = lck_grp_alloc_init("BSM Audit", bsm_audit_grp_attr); - bsm_audit_attr = lck_attr_alloc_init(); - bsm_audit_mutex = lck_mtx_alloc_init(bsm_audit_grp, bsm_audit_attr); - au_evclassmap_init(); -} - -/* - * This call reserves memory for the audit record. - * Memory must be guaranteed before any auditable event can be - * generated. - * The au_record structure maintains a reference to the - * memory allocated above and also the list of tokens associated - * with this record - */ -struct au_record * -kau_open(void) -{ - struct au_record *rec = NULL; - - /* - * Find an unused record, remove it from the free list, mark as used - */ - lck_mtx_lock(bsm_audit_mutex); - if (!LIST_EMPTY(&bsm_free_q)) { - rec = LIST_FIRST(&bsm_free_q); - LIST_REMOVE(rec, au_rec_q); - } - lck_mtx_unlock(bsm_audit_mutex); - - if (rec == NULL) { - lck_mtx_lock(bsm_audit_mutex); - if (bsm_rec_count >= MAX_AUDIT_RECORDS) { - /* XXX We need to increase size of MAX_AUDIT_RECORDS */ - lck_mtx_unlock(bsm_audit_mutex); - return NULL; - } - lck_mtx_unlock(bsm_audit_mutex); - - /* - * Create a new BSM kernel record. - */ - rec = (struct au_record *)kalloc(sizeof(*rec)); - if(rec == NULL) { - return NULL; - } - rec->data = (u_char *)kalloc(MAX_AUDIT_RECORD_SIZE * sizeof(u_char)); - if((rec->data) == NULL) { - kfree(rec, sizeof(*rec)); - return NULL; - } - lck_mtx_lock(bsm_audit_mutex); - bsm_rec_count++; - lck_mtx_unlock(bsm_audit_mutex); - } - memset(rec->data, 0, MAX_AUDIT_RECORD_SIZE); - - TAILQ_INIT(&rec->token_q); - rec->len = 0; - rec->used = 1; - - return rec; -} - -/* - * Store the token with the record descriptor - * - */ -int kau_write(struct au_record *rec, struct au_token *tok) -{ - if(tok == NULL) { - return -1; /* Invalid Token */ - } - - /* Add the token to the tail */ - /* - * XXX Not locking here -- we should not be writing to - * XXX the same audit record from different threads - */ - TAILQ_INSERT_TAIL(&rec->token_q, tok, tokens); - - rec->len += tok->len; /* grow record length by token size bytes */ - - return 0; -} - -/* - * Close out the audit record by adding the header token, identifying - * any missing tokens. Write out the tokens to the record memory. - */ -int -kau_close(struct au_record *rec, struct timespec *ctime, short event) -{ - u_char *dptr; - size_t tot_rec_size; - token_t *cur, *hdr, *trail; - int retval = 0; - - tot_rec_size = rec->len + HEADER_SIZE + TRAILER_SIZE; - if(tot_rec_size <= MAX_AUDIT_RECORD_SIZE) { - /* Create the header token */ - hdr = kau_to_header32(ctime, tot_rec_size, event, 0); - - if(hdr != NULL) { - /* Add to head of list */ - TAILQ_INSERT_HEAD(&rec->token_q, hdr, tokens); - - trail = au_to_trailer(tot_rec_size); - if(trail != NULL) { - TAILQ_INSERT_TAIL(&rec->token_q, trail, tokens); - } - } - /* Serialize token data to the record */ - - rec->len = tot_rec_size; - dptr = rec->data; - TAILQ_FOREACH(cur, &rec->token_q, tokens) { - memcpy(dptr, cur->t_data, cur->len); - dptr += cur->len; - } - } - - return(retval); -} - -/* - * Free a BSM audit record by releasing all the tokens and clearing the - * audit record information. - */ -void kau_free(struct au_record *rec) -{ - struct au_token *tok; - - /* Free the token list */ - while ((tok = TAILQ_FIRST(&rec->token_q))) { - TAILQ_REMOVE(&rec->token_q, tok, tokens); - kfree(tok, sizeof(*tok) + tok->len); - } - - rec->used = 0; - rec->len = 0; - - lck_mtx_lock(bsm_audit_mutex); - - /* Add the record to the freelist */ - LIST_INSERT_HEAD(&bsm_free_q, rec, au_rec_q); - - lck_mtx_unlock(bsm_audit_mutex); - -} - -/* - * XXX May want turn some (or all) of these macros into functions in order - * to reduce the generated code size. - */ -#if CONFIG_MACF -#define MAC_VNODE1_LABEL_TOKEN \ - do { \ - if (ar->ar_vnode1_mac_labels != NULL) { \ - tok = au_to_text(ar->ar_vnode1_mac_labels); \ - kau_write(rec, tok); \ - } \ - } while (0) - -#define MAC_VNODE2_LABEL_TOKEN \ - do { \ - if (ar->ar_vnode2_mac_labels != NULL) { \ - tok = au_to_text(ar->ar_vnode2_mac_labels); \ - kau_write(rec, tok); \ - } \ - } while (0) -#else -#define MAC_VNODE1_LABEL_TOKEN -#define MAC_VNODE2_LABEL_TOKEN -#endif -#define UPATH1_TOKENS \ - do { \ - if (ar->ar_valid_arg & ARG_UPATH1) { \ - tok = au_to_path(ar->ar_arg_upath1); \ - kau_write(rec, tok); \ - } \ - } while (0) - -#define UPATH2_TOKENS \ - do { \ - if (ar->ar_valid_arg & ARG_UPATH2) { \ - tok = au_to_path(ar->ar_arg_upath2); \ - kau_write(rec, tok); \ - } \ - } while (0) - -#define UPATH1_KPATH1_VNODE1_TOKENS \ - do { \ - if (ar->ar_valid_arg & ARG_UPATH1) { \ - tok = au_to_path(ar->ar_arg_upath1); \ - kau_write(rec, tok); \ - } \ - if (ar->ar_valid_arg & ARG_KPATH1) { \ - tok = au_to_path(ar->ar_arg_kpath1); \ - kau_write(rec, tok); \ - } \ - if (ar->ar_valid_arg & ARG_VNODE1) { \ - tok = kau_to_attr32(&ar->ar_arg_vnode1);\ - kau_write(rec, tok); \ - MAC_VNODE1_LABEL_TOKEN; \ - } \ - } while (0) - -#define KPATH1_VNODE1_TOKENS \ - do { \ - if (ar->ar_valid_arg & ARG_KPATH1) { \ - tok = au_to_path(ar->ar_arg_kpath1); \ - kau_write(rec, tok); \ - } \ - if (ar->ar_valid_arg & ARG_VNODE1) { \ - tok = kau_to_attr32(&ar->ar_arg_vnode1);\ - kau_write(rec, tok); \ - MAC_VNODE1_LABEL_TOKEN; \ - } \ - } while (0) - -#define KPATH2_VNODE2_TOKENS \ - do { \ - if (ar->ar_valid_arg & ARG_KPATH2) { \ - tok = au_to_path(ar->ar_arg_kpath2); \ - kau_write(rec, tok); \ - } \ - if (ar->ar_valid_arg & ARG_VNODE2) { \ - tok = kau_to_attr32(&ar->ar_arg_vnode2);\ - kau_write(rec, tok); \ - MAC_VNODE2_LABEL_TOKEN; \ - } \ - } while (0) - -#define FD_KPATH1_VNODE1_TOKENS \ - do { \ - if (ar->ar_valid_arg & ARG_KPATH1) { \ - tok = au_to_path(ar->ar_arg_kpath1); \ - kau_write(rec, tok); \ - if (ar->ar_valid_arg & ARG_VNODE1) { \ - tok = kau_to_attr32(&ar->ar_arg_vnode1);\ - kau_write(rec, tok); \ - MAC_VNODE1_LABEL_TOKEN; \ - } \ - } else { \ - tok = au_to_arg32(1, "no path: fd", ar->ar_arg_fd); \ - kau_write(rec, tok); \ - } \ - } while (0) - -#define PROCESS_PID_TOKENS(argn) \ - do { \ - if ((ar->ar_arg_pid > 0) /* Kill a single process */ \ - && (ar->ar_valid_arg & ARG_PROCESS)) { \ - tok = au_to_process(ar->ar_arg_auid, ar->ar_arg_euid, \ - ar->ar_arg_egid, ar->ar_arg_ruid, \ - ar->ar_arg_rgid, ar->ar_arg_pid, \ - ar->ar_arg_asid, &ar->ar_arg_termid); \ - kau_write(rec, tok); \ - } else { \ - tok = au_to_arg32(argn, "process", ar->ar_arg_pid);\ - kau_write(rec, tok); \ - } \ - } while (0) \ - -#define PROCESS_MAC_TOKENS \ - do { \ - if (ar->ar_valid_arg & ARG_MAC_STRING) { \ - tok = au_to_text(ar->ar_arg_mac_string); \ - kau_write(rec, tok); \ - } \ - } while (0) \ - -/* - * Implement auditing for the auditon() system call. The audit tokens - * that are generated depend on the command that was sent into the - * auditon() system call. - * - */ -static void -audit_sys_auditon(struct audit_record *ar, struct au_record *rec) -{ - struct au_token *tok; - - switch (ar->ar_arg_cmd) { - case A_SETPOLICY: - if (sizeof(ar->ar_arg_auditon.au_flags) > 4) - tok = au_to_arg64(1, "policy", - ar->ar_arg_auditon.au_flags); - else - tok = au_to_arg32(1, "policy", - ar->ar_arg_auditon.au_flags); - kau_write(rec, tok); - break; - case A_SETKMASK: - tok = au_to_arg32(2, "setkmask:as_success", - ar->ar_arg_auditon.au_mask.am_success); - kau_write(rec, tok); - tok = au_to_arg32(2, "setkmask:as_failure", - ar->ar_arg_auditon.au_mask.am_failure); - kau_write(rec, tok); - break; - case A_SETQCTRL: - tok = au_to_arg32(3, "setqctrl:aq_hiwater", - ar->ar_arg_auditon.au_qctrl.aq_hiwater); - kau_write(rec, tok); - tok = au_to_arg32(3, "setqctrl:aq_lowater", - ar->ar_arg_auditon.au_qctrl.aq_lowater); - kau_write(rec, tok); - tok = au_to_arg32(3, "setqctrl:aq_bufsz", - ar->ar_arg_auditon.au_qctrl.aq_bufsz); - kau_write(rec, tok); - tok = au_to_arg32(3, "setqctrl:aq_delay", - ar->ar_arg_auditon.au_qctrl.aq_delay); - kau_write(rec, tok); - tok = au_to_arg32(3, "setqctrl:aq_minfree", - ar->ar_arg_auditon.au_qctrl.aq_minfree); - kau_write(rec, tok); - break; - case A_SETUMASK: - tok = au_to_arg32(3, "setumask:as_success", - ar->ar_arg_auditon.au_auinfo.ai_mask.am_success); - kau_write(rec, tok); - tok = au_to_arg32(3, "setumask:as_failure", - ar->ar_arg_auditon.au_auinfo.ai_mask.am_failure); - kau_write(rec, tok); - break; - case A_SETSMASK: - tok = au_to_arg32(3, "setsmask:as_success", - ar->ar_arg_auditon.au_auinfo.ai_mask.am_success); - kau_write(rec, tok); - tok = au_to_arg32(3, "setsmask:as_failure", - ar->ar_arg_auditon.au_auinfo.ai_mask.am_failure); - kau_write(rec, tok); - break; - case A_SETCOND: - if (sizeof(ar->ar_arg_auditon.au_cond) > 4) - tok = au_to_arg64(3, "setcond", - ar->ar_arg_auditon.au_cond); - else - tok = au_to_arg32(3, "setcond", - ar->ar_arg_auditon.au_cond); - kau_write(rec, tok); - break; - case A_SETCLASS: - tok = au_to_arg32(2, "setclass:ec_event", - ar->ar_arg_auditon.au_evclass.ec_number); - kau_write(rec, tok); - tok = au_to_arg32(3, "setclass:ec_class", - ar->ar_arg_auditon.au_evclass.ec_class); - kau_write(rec, tok); - break; - case A_SETPMASK: - tok = au_to_arg32(2, "setpmask:as_success", - ar->ar_arg_auditon.au_aupinfo.ap_mask.am_success); - kau_write(rec, tok); - tok = au_to_arg32(2, "setpmask:as_failure", - ar->ar_arg_auditon.au_aupinfo.ap_mask.am_failure); - kau_write(rec, tok); - break; - case A_SETFSIZE: - tok = au_to_arg32(2, "setfsize:filesize", - ar->ar_arg_auditon.au_fstat.af_filesz); - kau_write(rec, tok); - break; - default: - break; - } -} - -/* - * Convert an internal kernel audit record to a BSM record and return - * a success/failure indicator. The BSM record is passed as an out - * parameter to this function. - * Return conditions: - * BSM_SUCCESS: The BSM record is valid - * BSM_FAILURE: Failure; the BSM record is NULL. - * BSM_NOAUDIT: The event is not auditable for BSM; the BSM record is NULL. - */ -int -kaudit_to_bsm(struct kaudit_record *kar, struct au_record **pau) -{ - struct au_token *tok = NULL, *subj_tok; - struct au_record *rec; - au_tid_t tid; - struct audit_record *ar; - - *pau = NULL; - if (kar == NULL) - return (BSM_FAILURE); - - ar = &kar->k_ar; - - rec = kau_open(); - if (rec == NULL) - return (BSM_FAILURE); - - /* Create the subject token */ - tid.port = ar->ar_subj_term.port; - tid.machine = ar->ar_subj_term.machine; - subj_tok = au_to_subject32(ar->ar_subj_auid, /* audit ID */ - ar->ar_subj_cred.cr_uid, /* eff uid */ - ar->ar_subj_egid, /* eff group id */ - ar->ar_subj_ruid, /* real uid */ - ar->ar_subj_rgid, /* real group id */ - ar->ar_subj_pid, /* process id */ - ar->ar_subj_asid, /* session ID */ - &tid); - - /* The logic inside each case fills in the tokens required for the - * event, except for the header, trailer, and return tokens. The - * header and trailer tokens are added by the kau_close() function. - * The return token is added outside of the switch statement. - */ - switch(ar->ar_event) { - - /* - * Socket-related events. - */ - case AUE_ACCEPT: - case AUE_BIND: - case AUE_CONNECT: - case AUE_RECVFROM: - case AUE_RECVMSG: - case AUE_SENDMSG: - case AUE_SENDTO: - tok = au_to_arg32(1, "fd", ar->ar_arg_fd); - kau_write(rec, tok); - if (ar->ar_valid_arg & ARG_SADDRINET) { - tok = au_to_sock_inet( - (struct sockaddr_in *)&ar->ar_arg_sockaddr); - kau_write(rec, tok); - } - if (ar->ar_valid_arg & ARG_SADDRUNIX) { - tok = au_to_sock_unix( - (struct sockaddr_un *)&ar->ar_arg_sockaddr); - kau_write(rec, tok); - UPATH1_TOKENS; - } - /* XXX Need to handle ARG_SADDRINET6 */ - break; - - case AUE_SOCKET: - case AUE_SOCKETPAIR: - tok = au_to_arg32(1,"domain", ar->ar_arg_sockinfo.so_domain); - kau_write(rec, tok); - tok = au_to_arg32(2,"type", ar->ar_arg_sockinfo.so_type); - kau_write(rec, tok); - tok = au_to_arg32(3,"protocol",ar->ar_arg_sockinfo.so_protocol); - kau_write(rec, tok); - break; - - case AUE_SETSOCKOPT: - case AUE_SHUTDOWN: - tok = au_to_arg32(1, "fd", ar->ar_arg_fd); - kau_write(rec, tok); - break; - - case AUE_ACCT: - if (ar->ar_valid_arg & (ARG_KPATH1 | ARG_UPATH1)) { - UPATH1_KPATH1_VNODE1_TOKENS; - } else { - tok = au_to_arg32(1, "accounting off", 0); - kau_write(rec, tok); - } - break; - - case AUE_SETAUID: - tok = au_to_arg32(2, "setauid", ar->ar_arg_auid); - kau_write(rec, tok); - break; - - case AUE_SETAUDIT: - if (ar->ar_valid_arg & ARG_AUID) { - tok = au_to_arg32(1, "setaudit:auid", ar->ar_arg_auid); - kau_write(rec, tok); - tok = au_to_arg32(1, "setaudit:port", - ar->ar_arg_termid.port); - kau_write(rec, tok); - tok = au_to_arg32(1, "setaudit:machine", - ar->ar_arg_termid.machine); - kau_write(rec, tok); - tok = au_to_arg32(1, "setaudit:as_success", - ar->ar_arg_amask.am_success); - kau_write(rec, tok); - tok = au_to_arg32(1, "setaudit:as_failure", - ar->ar_arg_amask.am_failure); - kau_write(rec, tok); - tok = au_to_arg32(1, "setaudit:asid", ar->ar_arg_asid); - kau_write(rec, tok); - } - break; - - case AUE_SETAUDIT_ADDR: - break; /* XXX need to add arguments */ - - case AUE_AUDITON: - /* For AUDITON commands without own event, audit the cmd */ - tok = au_to_arg32(1, "cmd", ar->ar_arg_cmd); - kau_write(rec, tok); - /* fall thru */ - - case AUE_AUDITON_GETCAR: - case AUE_AUDITON_GETCLASS: - case AUE_AUDITON_GETCOND: - case AUE_AUDITON_GETCWD: - case AUE_AUDITON_GETKMASK: - case AUE_AUDITON_GETSTAT: - case AUE_AUDITON_GPOLICY: - case AUE_AUDITON_GQCTRL: - case AUE_AUDITON_SETCLASS: - case AUE_AUDITON_SETCOND: - case AUE_AUDITON_SETKMASK: - case AUE_AUDITON_SETSMASK: - case AUE_AUDITON_SETSTAT: - case AUE_AUDITON_SETUMASK: - case AUE_AUDITON_SPOLICY: - case AUE_AUDITON_SQCTRL: - if (ar->ar_valid_arg & ARG_AUDITON) { - audit_sys_auditon(ar, rec); - } - break; - - case AUE_AUDITCTL: - UPATH1_KPATH1_VNODE1_TOKENS; - break; - - case AUE_ADJTIME: - case AUE_AUDIT: - case AUE_EXIT: - case AUE_GETAUDIT: - case AUE_GETAUDIT_ADDR: - case AUE_GETAUID: - case AUE_GETFSSTAT: - case AUE_MAC_GETFSSTAT: - case AUE_PIPE: - case AUE_SETPGRP: - case AUE_SETRLIMIT: - case AUE_SETSID: - case AUE_SETTIMEOFDAY: - /* Header, subject, and return tokens added at end */ - break; - - case AUE_ACCESS: - case AUE_CHDIR: - case AUE_CHROOT: - case AUE_EXECVE: - case AUE_GETATTRLIST: - case AUE_GETFH: - case AUE_LSTAT: - case AUE_MKFIFO: - case AUE_PATHCONF: - case AUE_READLINK: - case AUE_REVOKE: - case AUE_RMDIR: - case AUE_SEARCHFS: - case AUE_SETATTRLIST: - case AUE_STAT: - case AUE_STATFS: - case AUE_TRUNCATE: - case AUE_UNDELETE: - case AUE_UNLINK: - case AUE_UTIMES: - UPATH1_KPATH1_VNODE1_TOKENS; - break; - - case AUE_CHFLAGS: - tok = au_to_arg32(2, "flags", ar->ar_arg_fflags); - kau_write(rec, tok); - UPATH1_KPATH1_VNODE1_TOKENS; - break; - - case AUE_CHMOD: - tok = au_to_arg32(2, "new file mode", ar->ar_arg_mode); - kau_write(rec, tok); - UPATH1_KPATH1_VNODE1_TOKENS; - break; - - case AUE_CHOWN: - case AUE_LCHOWN: - tok = au_to_arg32(2, "new file uid", ar->ar_arg_uid); - kau_write(rec, tok); - tok = au_to_arg32(3, "new file gid", ar->ar_arg_gid); - kau_write(rec, tok); - UPATH1_KPATH1_VNODE1_TOKENS; - break; - - case AUE_EXCHANGEDATA: - UPATH1_KPATH1_VNODE1_TOKENS; - KPATH2_VNODE2_TOKENS; - break; - - case AUE_CLOSE: - tok = au_to_arg32(2, "fd", ar->ar_arg_fd); - kau_write(rec, tok); - UPATH1_KPATH1_VNODE1_TOKENS; - break; - - case AUE_FCHMOD: - tok = au_to_arg32(2, "new file mode", ar->ar_arg_mode); - kau_write(rec, tok); - FD_KPATH1_VNODE1_TOKENS; - break; - - case AUE_NFSSVC: - tok = au_to_arg32(1, "request", ar->ar_arg_cmd); - kau_write(rec, tok); - if (ar->ar_valid_arg & (ARG_KPATH1 | ARG_UPATH1)) { - UPATH1_KPATH1_VNODE1_TOKENS; - } - break; - - case AUE_FCHDIR: - case AUE_FPATHCONF: - case AUE_FSTAT: /* XXX Need to handle sockets and shm */ - case AUE_FSTATFS: - case AUE_FTRUNCATE: - case AUE_FUTIMES: - case AUE_GETDIRENTRIES: - case AUE_GETDIRENTRIESATTR: - case AUE_EXTATTR_GET_FD: - case AUE_EXTATTR_LIST_FD: - case AUE_EXTATTR_SET_FD: - case AUE_EXTATTR_DELETE_FD: - FD_KPATH1_VNODE1_TOKENS; - break; - - case AUE_FCHOWN: - tok = au_to_arg32(2, "new file uid", ar->ar_arg_uid); - kau_write(rec, tok); - tok = au_to_arg32(3, "new file gid", ar->ar_arg_gid); - kau_write(rec, tok); - FD_KPATH1_VNODE1_TOKENS; - break; - - case AUE_FCNTL: - tok = au_to_arg32(2, "cmd", ar->ar_arg_cmd); - kau_write(rec, tok); - if (ar->ar_valid_arg & ARG_VNODE1) { - FD_KPATH1_VNODE1_TOKENS; - } - break; - - case AUE_FCHFLAGS: - tok = au_to_arg32(2, "flags", ar->ar_arg_fflags); - kau_write(rec, tok); - FD_KPATH1_VNODE1_TOKENS; - break; - - case AUE_FLOCK: - tok = au_to_arg32(2, "operation", ar->ar_arg_cmd); - kau_write(rec, tok); - FD_KPATH1_VNODE1_TOKENS; - break; - - case AUE_FORK: - case AUE_VFORK: - tok = au_to_arg32(0, "child PID", ar->ar_arg_pid); - kau_write(rec, tok); - break; - - case AUE_GETLCID: - tok = au_to_arg32(1, "pid", (u_int32_t)ar->ar_arg_pid); - kau_write(rec, tok); - break; - - case AUE_SETLCID: - tok = au_to_arg32(1, "pid", (u_int32_t)ar->ar_arg_pid); - kau_write(rec, tok); - tok = au_to_arg32(2, "lcid", (u_int32_t)ar->ar_arg_value); - kau_write(rec, tok); - break; - - case AUE_IOCTL: - tok = au_to_arg32(2, "cmd", ar->ar_arg_cmd); - kau_write(rec, tok); - tok = au_to_arg32(1, "arg", (u_int32_t)ar->ar_arg_addr); - kau_write(rec, tok); - if (ar->ar_valid_arg & ARG_VNODE1) { - FD_KPATH1_VNODE1_TOKENS; - } else { - if (ar->ar_valid_arg & ARG_SOCKINFO) { - tok = kau_to_socket(&ar->ar_arg_sockinfo); - kau_write(rec, tok); - } else { - tok = au_to_arg32(1, "fd", ar->ar_arg_fd); - kau_write(rec, tok); - } - } - break; - - case AUE_KILL: - tok = au_to_arg32(2, "signal", ar->ar_arg_signum); - kau_write(rec, tok); - PROCESS_PID_TOKENS(1); - break; - - case AUE_LINK: - case AUE_RENAME: - UPATH1_KPATH1_VNODE1_TOKENS; - UPATH2_TOKENS; - break; - - case AUE_MKDIR: - tok = au_to_arg32(2, "mode", ar->ar_arg_mode); - kau_write(rec, tok); - UPATH1_KPATH1_VNODE1_TOKENS; - break; - - case AUE_MKNOD: - tok = au_to_arg32(2, "mode", ar->ar_arg_mode); - kau_write(rec, tok); - tok = au_to_arg32(3, "dev", ar->ar_arg_dev); - kau_write(rec, tok); - UPATH1_KPATH1_VNODE1_TOKENS; - break; - - case AUE_MMAP: - case AUE_MUNMAP: - case AUE_MPROTECT: - case AUE_MLOCK: - case AUE_MUNLOCK: - case AUE_MINHERIT: - tok = au_to_arg32(1, "addr", (u_int32_t)ar->ar_arg_addr); /* LP64todo */ - kau_write(rec, tok); - tok = au_to_arg32(2, "len", ar->ar_arg_len); /* LP64todo */ - kau_write(rec, tok); - if (ar->ar_event == AUE_MMAP) - FD_KPATH1_VNODE1_TOKENS; - if (ar->ar_event == AUE_MPROTECT) { - tok = au_to_arg32(3, "protection", ar->ar_arg_value); - kau_write(rec, tok); - } - if (ar->ar_event == AUE_MINHERIT) { - tok = au_to_arg32(3, "inherit", ar->ar_arg_value); - kau_write(rec, tok); - } - break; - -#if CONFIG_MACF - case AUE_MAC_MOUNT: - PROCESS_MAC_TOKENS; - /* fall through */ -#endif - case AUE_MOUNT: - /* XXX Need to handle NFS mounts */ - tok = au_to_arg32(3, "flags", ar->ar_arg_fflags); - kau_write(rec, tok); - if (ar->ar_valid_arg & ARG_TEXT) { - tok = au_to_text(ar->ar_arg_text); - kau_write(rec, tok); - } - /* fall through */ - case AUE_UNMOUNT: - UPATH1_KPATH1_VNODE1_TOKENS; - break; - - case AUE_MSGCTL: - ar->ar_event = msgctl_to_event(ar->ar_arg_svipc_cmd); - /* Fall through */ - case AUE_MSGRCV: - case AUE_MSGSND: - tok = au_to_arg32(1, "msg ID", ar->ar_arg_svipc_id); - kau_write(rec, tok); - if (ar->ar_errno != EINVAL) { - tok = au_to_ipc(AT_IPC_MSG, ar->ar_arg_svipc_id); - kau_write(rec, tok); - } - break; - - case AUE_MSGGET: - if (ar->ar_errno == 0) { - tok = au_to_ipc(AT_IPC_MSG, ar->ar_arg_svipc_id); - kau_write(rec, tok); - } - break; - - case AUE_OPEN_RC: - case AUE_OPEN_RTC: - case AUE_OPEN_RWC: - case AUE_OPEN_RWTC: - case AUE_OPEN_WC: - case AUE_OPEN_WTC: - tok = au_to_arg32(3, "mode", ar->ar_arg_mode); - kau_write(rec, tok); - /* fall thru */ - - case AUE_OPEN: - case AUE_OPEN_R: - case AUE_OPEN_RT: - case AUE_OPEN_RW: - case AUE_OPEN_RWT: - case AUE_OPEN_W: - case AUE_OPEN_WT: - tok = au_to_arg32(2, "flags", ar->ar_arg_fflags); - kau_write(rec, tok); - UPATH1_KPATH1_VNODE1_TOKENS; - break; - - case AUE_PTRACE: - tok = au_to_arg32(1, "request", ar->ar_arg_cmd); - kau_write(rec, tok); - tok = au_to_arg32(3, "addr", (u_int32_t)ar->ar_arg_addr); /* LP64todo */ - kau_write(rec, tok); - tok = au_to_arg32(4, "data", ar->ar_arg_value); - kau_write(rec, tok); - PROCESS_PID_TOKENS(2); - break; - - case AUE_QUOTACTL: - tok = au_to_arg32(2, "command", ar->ar_arg_cmd); - kau_write(rec, tok); - tok = au_to_arg32(3, "uid", ar->ar_arg_uid); - kau_write(rec, tok); - UPATH1_KPATH1_VNODE1_TOKENS; - break; - - case AUE_REBOOT: - tok = au_to_arg32(1, "howto", ar->ar_arg_cmd); - kau_write(rec, tok); - break; - - case AUE_SEMCTL: - ar->ar_event = semctl_to_event(ar->ar_arg_svipc_cmd); - /* Fall through */ - case AUE_SEMOP: - tok = au_to_arg32(1, "sem ID", ar->ar_arg_svipc_id); - kau_write(rec, tok); - if (ar->ar_errno != EINVAL) { - tok = au_to_ipc(AT_IPC_SEM, ar->ar_arg_svipc_id); - kau_write(rec, tok); - } - break; - case AUE_SEMGET: - if (ar->ar_errno == 0) { - tok = au_to_ipc(AT_IPC_SEM, ar->ar_arg_svipc_id); - kau_write(rec, tok); - } - break; - case AUE_SETEGID: - tok = au_to_arg32(1, "gid", ar->ar_arg_egid); - kau_write(rec, tok); - break; - case AUE_SETEUID: - tok = au_to_arg32(1, "uid", ar->ar_arg_euid); - kau_write(rec, tok); - break; - case AUE_SETGID: - tok = au_to_arg32(1, "gid", ar->ar_arg_gid); - kau_write(rec, tok); - break; - case AUE_SETUID: - tok = au_to_arg32(1, "uid", ar->ar_arg_uid); - kau_write(rec, tok); - break; - case AUE_SETGROUPS: - if (ar->ar_valid_arg & ARG_GROUPSET) { - u_int ctr; - - for(ctr = 0; ctr < ar->ar_arg_groups.gidset_size; ctr++) - { - tok = au_to_arg32(1, "setgroups", ar->ar_arg_groups.gidset[ctr]); - kau_write(rec, tok); - } - } - break; - - case AUE_SETLOGIN: - if (ar->ar_valid_arg & ARG_TEXT) { - tok = au_to_text(ar->ar_arg_text); - kau_write(rec, tok); - } - break; - - case AUE_SETPRIORITY: - tok = au_to_arg32(1, "which", ar->ar_arg_cmd); - kau_write(rec, tok); - tok = au_to_arg32(2, "who", ar->ar_arg_uid); - kau_write(rec, tok); - tok = au_to_arg32(2, "priority", ar->ar_arg_value); - kau_write(rec, tok); - break; - - case AUE_SETPRIVEXEC: - tok = au_to_arg32(1, "flag", ar->ar_arg_value); - kau_write(rec, tok); - break; - - /* AUE_SHMAT, AUE_SHMCTL, AUE_SHMDT and AUE_SHMGET are SysV IPC */ - case AUE_SHMAT: - tok = au_to_arg32(1, "shmid", ar->ar_arg_svipc_id); - kau_write(rec, tok); - tok = au_to_arg64(2, "shmaddr", ar->ar_arg_svipc_addr); - kau_write(rec, tok); - if (ar->ar_valid_arg & ARG_SVIPC_PERM) { - tok = au_to_ipc(AT_IPC_SHM, ar->ar_arg_svipc_id); - kau_write(rec, tok); - tok = au_to_ipc_perm(&ar->ar_arg_svipc_perm); - kau_write(rec, tok); - } - break; - - case AUE_SHMCTL: - tok = au_to_arg32(1, "shmid", ar->ar_arg_svipc_id); - kau_write(rec, tok); - switch (ar->ar_arg_svipc_cmd) { - case IPC_STAT: - ar->ar_event = AUE_SHMCTL_STAT; - if (ar->ar_valid_arg & ARG_SVIPC_PERM) { - tok = au_to_ipc(AT_IPC_SHM, - ar->ar_arg_svipc_id); - kau_write(rec, tok); - } - break; - case IPC_RMID: - ar->ar_event = AUE_SHMCTL_RMID; - if (ar->ar_valid_arg & ARG_SVIPC_PERM) { - tok = au_to_ipc(AT_IPC_SHM, - ar->ar_arg_svipc_id); - kau_write(rec, tok); - } - break; - case IPC_SET: - ar->ar_event = AUE_SHMCTL_SET; - if (ar->ar_valid_arg & ARG_SVIPC_PERM) { - tok = au_to_ipc(AT_IPC_SHM, - ar->ar_arg_svipc_id); - kau_write(rec, tok); - tok = au_to_ipc_perm(&ar->ar_arg_svipc_perm); - kau_write(rec, tok); - } - break; - default: - break; /* We will audit a bad command */ - } - break; - - case AUE_SHMDT: - tok = au_to_arg64(1, "shmaddr", ar->ar_arg_svipc_addr); - kau_write(rec, tok); - break; - - case AUE_SHMGET: - /* This is unusual; the return value is in an argument token */ - tok = au_to_arg32(0, "shmid", ar->ar_arg_svipc_id); - kau_write(rec, tok); - if (ar->ar_valid_arg & ARG_SVIPC_PERM) { - tok = au_to_ipc(AT_IPC_SHM, ar->ar_arg_svipc_id); - kau_write(rec, tok); - tok = au_to_ipc_perm(&ar->ar_arg_svipc_perm); - kau_write(rec, tok); - } - break; - - /* AUE_SHMOPEN, AUE_SHMUNLINK, AUE_SEMOPEN, AUE_SEMCLOSE - * and AUE_SEMUNLINK are Posix IPC */ - case AUE_SHMOPEN: - tok = au_to_arg32(2, "flags", ar->ar_arg_fflags); - kau_write(rec, tok); - tok = au_to_arg32(3, "mode", ar->ar_arg_mode); - kau_write(rec, tok); - case AUE_SHMUNLINK: - if (ar->ar_valid_arg & ARG_TEXT) { - tok = au_to_text(ar->ar_arg_text); - kau_write(rec, tok); - } - if (ar->ar_valid_arg & ARG_POSIX_IPC_PERM) { - /* Create an ipc_perm token */ - struct ipc_perm perm; - perm.uid = ar->ar_arg_pipc_perm.pipc_uid; - perm.gid = ar->ar_arg_pipc_perm.pipc_gid; - perm.cuid = ar->ar_arg_pipc_perm.pipc_uid; - perm.cgid = ar->ar_arg_pipc_perm.pipc_gid; - perm.mode = ar->ar_arg_pipc_perm.pipc_mode; - perm._seq = 0; - perm._key = 0; - tok = au_to_ipc_perm(&perm); - kau_write(rec, tok); - } - break; - - case AUE_SEMOPEN: - tok = au_to_arg32(2, "flags", ar->ar_arg_fflags); - kau_write(rec, tok); - tok = au_to_arg32(3, "mode", ar->ar_arg_mode); - kau_write(rec, tok); - tok = au_to_arg32(4, "value", ar->ar_arg_value); - kau_write(rec, tok); - /* fall through */ - case AUE_SEMUNLINK: - if (ar->ar_valid_arg & ARG_TEXT) { - tok = au_to_text(ar->ar_arg_text); - kau_write(rec, tok); - } - if (ar->ar_valid_arg & ARG_POSIX_IPC_PERM) { - /* Create an ipc_perm token */ - struct ipc_perm perm; - perm.uid = ar->ar_arg_pipc_perm.pipc_uid; - perm.gid = ar->ar_arg_pipc_perm.pipc_gid; - perm.cuid = ar->ar_arg_pipc_perm.pipc_uid; - perm.cgid = ar->ar_arg_pipc_perm.pipc_gid; - perm.mode = ar->ar_arg_pipc_perm.pipc_mode; - perm._seq = 0; - perm._key = 0; - tok = au_to_ipc_perm(&perm); - kau_write(rec, tok); - } - break; - - case AUE_SEMCLOSE: - tok = au_to_arg32(1, "sem", ar->ar_arg_fd); - kau_write(rec, tok); - break; - - case AUE_SYMLINK: - if (ar->ar_valid_arg & ARG_TEXT) { - tok = au_to_text(ar->ar_arg_text); - kau_write(rec, tok); - } - UPATH1_KPATH1_VNODE1_TOKENS; - break; - - case AUE_SYSCTL: - case AUE_SYSCTL_NONADMIN: - if (ar->ar_valid_arg & (ARG_CTLNAME | ARG_LEN)) { - int ctr; - - for (ctr = 0; ctr < ar->ar_arg_len; ctr++) { - tok = au_to_arg32(1, "name", ar->ar_arg_ctlname[ctr]); - kau_write(rec, tok); - } - } - if (ar->ar_valid_arg & ARG_VALUE) { - tok = au_to_arg32(5, "newval", ar->ar_arg_value); - kau_write(rec, tok); - } - if (ar->ar_valid_arg & ARG_TEXT) { - tok = au_to_text(ar->ar_arg_text); - kau_write(rec, tok); - } - break; - - case AUE_UMASK: - tok = au_to_arg32(1, "new mask", ar->ar_arg_mask); - kau_write(rec, tok); - tok = au_to_arg32(0, "prev mask", ar->ar_retval); - kau_write(rec, tok); - break; - - /************************ - * Mach system calls * - ************************/ - case AUE_INITPROCESS: - break; - - case AUE_PIDFORTASK: - tok = au_to_arg32(1, "port", (u_int32_t)ar->ar_arg_mach_port1); - kau_write(rec, tok); - if (ar->ar_valid_arg & ARG_PID) { - tok = au_to_arg32(2, "pid", (u_int32_t)ar->ar_arg_pid); - kau_write(rec, tok); - } - break; - - case AUE_TASKFORPID: - tok = au_to_arg32(1, "target port", - (u_int32_t)ar->ar_arg_mach_port1); - kau_write(rec, tok); - if (ar->ar_valid_arg & ARG_MACHPORT2) { - tok = au_to_arg32(3, "task port", - (u_int32_t)ar->ar_arg_mach_port2); - kau_write(rec, tok); - } - PROCESS_PID_TOKENS(2); - break; - - case AUE_SWAPON: - tok = au_to_arg32(4, "priority", - (u_int32_t)ar->ar_arg_value); - kau_write(rec, tok); - UPATH1_KPATH1_VNODE1_TOKENS; - break; - - case AUE_SWAPOFF: - UPATH1_KPATH1_VNODE1_TOKENS; - break; - - case AUE_MAPFD: - tok = au_to_arg32(3, "va", (u_int32_t)ar->ar_arg_addr); - kau_write(rec, tok); - FD_KPATH1_VNODE1_TOKENS; - break; - - case AUE_EXTATTR_GET_FILE: - case AUE_EXTATTR_SET_FILE: - case AUE_EXTATTR_LIST_FILE: - case AUE_EXTATTR_DELETE_FILE: - case AUE_EXTATTR_GET_LINK: - case AUE_EXTATTR_SET_LINK: - case AUE_EXTATTR_LIST_LINK: - case AUE_EXTATTR_DELETE_LINK: - UPATH1_KPATH1_VNODE1_TOKENS; - break; - -#if CONFIG_MACF - case AUE_MAC_GET_FILE: - case AUE_MAC_SET_FILE: - case AUE_MAC_GET_LINK: - case AUE_MAC_SET_LINK: - case AUE_MAC_GET_MOUNT: - UPATH1_KPATH1_VNODE1_TOKENS; - PROCESS_MAC_TOKENS; - break; - - case AUE_MAC_GET_FD: - case AUE_MAC_SET_FD: - FD_KPATH1_VNODE1_TOKENS; - PROCESS_MAC_TOKENS; - break; - - case AUE_MAC_SYSCALL: - PROCESS_MAC_TOKENS; - tok = au_to_arg32(3, "call", ar->ar_arg_value); - kau_write(rec, tok); - break; - - case AUE_MAC_EXECVE: - UPATH1_KPATH1_VNODE1_TOKENS; - PROCESS_MAC_TOKENS; - break; - - case AUE_MAC_GET_PID: - tok = au_to_arg32(1, "pid", (u_int32_t)ar->ar_arg_pid); - kau_write(rec, tok); - PROCESS_MAC_TOKENS; - break; - - case AUE_MAC_GET_LCID: - tok = au_to_arg32(1, "lcid", (u_int32_t)ar->ar_arg_value); - kau_write(rec, tok); - PROCESS_MAC_TOKENS; - break; - - case AUE_MAC_GET_PROC: - case AUE_MAC_SET_PROC: - case AUE_MAC_GET_LCTX: - case AUE_MAC_SET_LCTX: - PROCESS_MAC_TOKENS; - break; -#endif - - default: /* We shouldn't fall through to here. */ - printf("BSM conversion requested for unknown event %d\n", - ar->ar_event); - kau_free(rec); - return BSM_NOAUDIT; - } - -#if CONFIG_MACF - do { - /* Convert the audit data from the MAC policies */ - struct mac_audit_record *mar; - - LIST_FOREACH(mar, ar->ar_mac_records, records) { - switch (mar->type) { - case MAC_AUDIT_DATA_TYPE: - tok = au_to_data(AUP_BINARY, AUR_BYTE, - mar->length, mar->data); - break; - case MAC_AUDIT_TEXT_TYPE: - tok = au_to_text((char*) mar->data); - break; - default: - /* - * XXX: we can either continue, - * skipping this particular entry, - * or we can pre-verify the list and - * abort before writing any records - */ - printf("kaudit_to_bsm(): BSM conversion requested for unknown mac_audit data type %d\n", - mar->type); - } - - kau_write(rec, tok); - } - } while (0); -#endif - - kau_write(rec, subj_tok); - -#if CONFIG_MACF - if (ar->ar_cred_mac_labels != NULL) { - tok = au_to_text(ar->ar_cred_mac_labels); - kau_write(rec, tok); - } -#endif - - tok = au_to_return32((char)ar->ar_errno, ar->ar_retval); - kau_write(rec, tok); /* Every record gets a return token */ - - kau_close(rec, &ar->ar_endtime, ar->ar_event); - - *pau = rec; - return BSM_SUCCESS; -} - -/* - * Verify that a record is a valid BSM record. This verification is - * simple now, but may be expanded on sometime in the future. - * Return 1 if the record is good, 0 otherwise. - * - */ -int -bsm_rec_verify(void* rec) -{ - char c = *(char *)rec; - /* - * Check the token ID of the first token; it has to be a header - * token. - */ - /* XXXAUDIT There needs to be a token structure to map a token. - * XXXAUDIT 'Shouldn't be simply looking at the first char. - */ - if ( (c != AU_HEADER_32_TOKEN) && - (c != AU_HEADER_EX_32_TOKEN) && - (c != AU_HEADER_64_TOKEN) && - (c != AU_HEADER_EX_64_TOKEN) ) { - return (0); - } - return (1); -} diff --git a/bsd/kern/kern_bsm_klib.c b/bsd/kern/kern_bsm_klib.c deleted file mode 100644 index f88f5bd38..000000000 --- a/bsd/kern/kern_bsm_klib.c +++ /dev/null @@ -1,911 +0,0 @@ -/* - * Copyright (c) 2004-2007 Apple Inc. All rights reserved. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ - * - * This file contains Original Code and/or Modifications of Original Code - * as defined in and that are subject to the Apple Public Source License - * Version 2.0 (the 'License'). You may not use this file except in - * compliance with the License. The rights granted to you under the License - * may not be used to create, or enable the creation or redistribution of, - * unlawful or unlicensed copies of an Apple operating system, or to - * circumvent, violate, or enable the circumvention or violation of, any - * terms of an Apple operating system software license agreement. - * - * Please obtain a copy of the License at - * http://www.opensource.apple.com/apsl/ and read it before using this file. - * - * The Original Code and all software distributed under the License are - * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER - * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, - * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. - * Please see the License for the specific language governing rights and - * limitations under the License. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ - */ -/* - * NOTICE: This file was modified by SPARTA, Inc. in 2005 to introduce - * support for mandatory and extensible security protections. This notice - * is included in support of clause 2.2 (b) of the Apple Public License, - * Version 2.0. - */ - -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include - -#include - -/* - * Initialize the system call to audit event mapping table. This table - * must be kept in sync with the system call table. This table is meant to - * be directly accessed. - * XXX This should be improved, though, to make it independent of the syscall - * table (but we don't want to traverse a large table for every system call - * to find a match). Ultimately, it would be best to place the audit event - * number in the system call table. - */ -au_event_t sys_au_event[] = { - AUE_NULL, /* 0 = indir */ - AUE_EXIT, /* 1 = exit */ - AUE_FORK, /* 2 = fork */ - AUE_NULL, /* 3 = read */ - AUE_NULL, /* 4 = write */ - AUE_OPEN_RWTC, /* 5 = open */ - AUE_CLOSE, /* 6 = close */ - AUE_NULL, /* 7 = wait4 */ - AUE_O_CREAT, /* 8 = old creat */ - AUE_LINK, /* 9 = link */ - AUE_UNLINK, /* 10 = unlink */ - AUE_NULL, /* 11 was obsolete execv */ - AUE_CHDIR, /* 12 = chdir */ - AUE_FCHDIR, /* 13 = fchdir */ - AUE_MKNOD, /* 14 = mknod */ - AUE_CHMOD, /* 15 = chmod */ - AUE_CHOWN, /* 16 = chown; now 3 args */ - AUE_NULL, /* 17 = old break */ -#if COMPAT_GETFSSTAT - AUE_GETFSSTAT, /* 18 = getfsstat */ -#else - AUE_NULL, /* 18 = ogetfsstat */ -#endif - AUE_NULL, /* 19 = old lseek */ - AUE_NULL, /* 20 = getpid */ - AUE_NULL, /* 21 was obsolete mount */ - AUE_NULL, /* 22 was obsolete umount */ - AUE_SETUID, /* 23 = setuid */ - AUE_NULL, /* 24 = getuid */ - AUE_NULL, /* 25 = geteuid */ - AUE_PTRACE, /* 26 = ptrace */ - AUE_RECVMSG, /* 27 = recvmsg */ - AUE_SENDMSG, /* 28 = sendmsg */ - AUE_RECVFROM, /* 29 = recvfrom */ - AUE_ACCEPT, /* 30 = accept */ - AUE_NULL, /* 31 = getpeername */ - AUE_NULL, /* 32 = getsockname */ - AUE_ACCESS, /* 33 = access */ - AUE_CHFLAGS, /* 34 = chflags */ - AUE_FCHFLAGS, /* 35 = fchflags */ - AUE_NULL, /* 36 = sync */ - AUE_KILL, /* 37 = kill */ - AUE_O_STAT, /* 38 = old stat */ - AUE_NULL, /* 39 = getppid */ - AUE_O_LSTAT, /* 40 = old lstat */ - AUE_NULL, /* 41 = dup */ - AUE_PIPE, /* 42 = pipe */ - AUE_NULL, /* 43 = getegid */ - AUE_NULL, /* 44 = profil */ - AUE_NULL, /* 45 = old ktrace */ - AUE_NULL, /* 46 = sigaction */ - AUE_NULL, /* 47 = getgid */ - AUE_NULL, /* 48 = sigprocmask */ - AUE_NULL, /* 49 = getlogin */ - AUE_SETLOGIN, /* 50 = setlogin */ - AUE_ACCT, /* 51 = turn acct off/on */ - AUE_NULL, /* 52 = sigpending */ - AUE_NULL, /* 53 = sigaltstack */ - AUE_IOCTL, /* 54 = ioctl */ - AUE_REBOOT, /* 55 = reboot */ - AUE_REVOKE, /* 56 = revoke */ - AUE_SYMLINK, /* 57 = symlink */ - AUE_READLINK, /* 58 = readlink */ - AUE_EXECVE, /* 59 = execve */ - AUE_UMASK, /* 60 = umask */ - AUE_CHROOT, /* 61 = chroot */ - AUE_O_FSTAT, /* 62 = old fstat */ - AUE_NULL, /* 63 = used internally, reserved */ - AUE_NULL, /* 64 = old getpagesize */ - AUE_NULL, /* 65 = msync */ - AUE_VFORK, /* 66 = vfork */ - AUE_NULL, /* 67 was obsolete vread */ - AUE_NULL, /* 68 was obsolete vwrite */ - AUE_NULL, /* 69 = sbrk */ - AUE_NULL, /* 70 = sstk */ - AUE_O_MMAP, /* 71 = old mmap */ - AUE_NULL, /* 72 = old vadvise */ - AUE_MUNMAP, /* 73 = munmap */ - AUE_MPROTECT, /* 74 = mprotect */ - AUE_NULL, /* 75 = madvise */ - AUE_NULL, /* 76 was obsolete vhangup */ - AUE_NULL, /* 77 was obsolete vlimit */ - AUE_NULL, /* 78 = mincore */ - AUE_NULL, /* 79 = getgroups */ - AUE_SETGROUPS, /* 80 = setgroups */ - AUE_NULL, /* 81 = getpgrp */ - AUE_SETPGRP, /* 82 = setpgid */ - AUE_NULL, /* 83 = setitimer */ - AUE_NULL, /* 84 = old wait */ - AUE_NULL, /* 85 = swapon */ - AUE_NULL, /* 86 = getitimer */ - AUE_NULL, /* 87 = old gethostname */ - AUE_O_SETHOSTNAME, /* 88 = old sethostname */ - AUE_NULL, /* 89 getdtablesize */ - AUE_NULL, /* 90 = dup2 */ - AUE_NULL, /* 91 was obsolete getdopt */ - AUE_FCNTL, /* 92 = fcntl */ - AUE_NULL, /* 93 = select */ - AUE_NULL, /* 94 was obsolete setdopt */ - AUE_NULL, /* 95 = fsync */ - AUE_SETPRIORITY, /* 96 = setpriority */ - AUE_SOCKET, /* 97 = socket */ - AUE_CONNECT, /* 98 = connect */ - AUE_NULL, /* 99 = accept */ - AUE_NULL, /* 100 = getpriority */ - AUE_O_SEND, /* 101 = old send */ - AUE_O_RECV, /* 102 = old recv */ - AUE_NULL, /* 103 = sigreturn */ - AUE_BIND, /* 104 = bind */ - AUE_SETSOCKOPT, /* 105 = setsockopt */ - AUE_NULL, /* 106 = listen */ - AUE_NULL, /* 107 was vtimes */ - AUE_NULL, /* 108 = sigvec */ - AUE_NULL, /* 109 = sigblock */ - AUE_NULL, /* 110 = sigsetmask */ - AUE_NULL, /* 111 = sigpause */ - AUE_NULL, /* 112 = sigstack */ - AUE_O_RECVMSG, /* 113 = recvmsg */ - AUE_O_SENDMSG, /* 114 = sendmsg */ - AUE_NULL, /* 115 = old vtrace */ - AUE_NULL, /* 116 = gettimeofday */ - AUE_NULL, /* 117 = getrusage */ - AUE_NULL, /* 118 = getsockopt */ - AUE_NULL, /* 119 = old resuba */ - AUE_NULL, /* 120 = readv */ - AUE_NULL, /* 121 = writev */ - AUE_SETTIMEOFDAY, /* 122 = settimeofday */ - AUE_FCHOWN, /* 123 = fchown */ - AUE_FCHMOD, /* 124 = fchmod */ - AUE_O_RECVFROM, /* 125 = recvfrom */ - AUE_NULL, /* 126 = setreuid */ - AUE_NULL, /* 127 = setregid */ - AUE_RENAME, /* 128 = rename */ - AUE_O_TRUNCATE, /* 129 = old truncate */ - AUE_O_FTRUNCATE, /* 130 = old ftruncate */ - AUE_FLOCK, /* 131 = flock */ - AUE_MKFIFO, /* 132 = mkfifo */ - AUE_SENDTO, /* 133 = sendto */ - AUE_SHUTDOWN, /* 134 = shutdown */ - AUE_SOCKETPAIR, /* 135 = socketpair */ - AUE_MKDIR, /* 136 = mkdir */ - AUE_RMDIR, /* 137 = rmdir */ - AUE_UTIMES, /* 138 = utimes */ - AUE_FUTIMES, /* 139 = futimes */ - AUE_ADJTIME, /* 140 = adjtime */ - AUE_NULL, /* 141 = getpeername */ - AUE_NULL, /* 142 = old gethostid */ - AUE_NULL, /* 143 = old sethostid */ - AUE_NULL, /* 144 = old getrlimit */ - AUE_O_SETRLIMIT, /* 145 = old setrlimit */ - AUE_O_KILLPG, /* 146 = old killpg */ - AUE_SETSID, /* 147 = setsid */ - AUE_NULL, /* 148 was setquota */ - AUE_NULL, /* 149 was qquota */ - AUE_NULL, /* 150 = getsockname */ - AUE_NULL, /* 151 = getpgid */ - AUE_SETPRIVEXEC, /* 152 = setprivexec */ - AUE_NULL, /* 153 = pread */ - AUE_NULL, /* 154 = pwrite */ - AUE_NFSSVC, /* 155 = nfs_svc */ - AUE_O_GETDIRENTRIES, /* 156 = old getdirentries */ - AUE_STATFS, /* 157 = statfs */ - AUE_FSTATFS, /* 158 = fstatfs */ - AUE_UNMOUNT, /* 159 = unmount */ - AUE_NULL, /* 160 was async_daemon */ - AUE_GETFH, /* 161 = get file handle */ - AUE_NULL, /* 162 = getdomainname */ - AUE_O_SETDOMAINNAME, /* 163 = setdomainname */ - AUE_NULL, /* 164 */ -#if QUOTA - AUE_QUOTACTL, /* 165 = quotactl */ -#else /* QUOTA */ - AUE_NULL, /* 165 = not configured */ -#endif /* QUOTA */ - AUE_NULL, /* 166 was exportfs */ - AUE_MOUNT, /* 167 = mount */ - AUE_NULL, /* 168 was ustat */ - AUE_NULL, /* 169 = nosys */ - AUE_NULL, /* 170 was table */ - AUE_NULL, /* 171 = old wait3 */ - AUE_NULL, /* 172 was rpause */ - AUE_NULL, /* 173 = nosys */ - AUE_NULL, /* 174 was getdents */ - AUE_NULL, /* 175 was gc_control */ - AUE_NULL, /* 176 = add_profil */ - AUE_NULL, /* 177 */ - AUE_NULL, /* 178 */ - AUE_NULL, /* 179 */ - AUE_NULL, /* 180 */ - AUE_SETGID, /* 181 */ - AUE_SETEGID, /* 182 */ - AUE_SETEUID, /* 183 */ - AUE_NULL, /* 184 = nosys */ - AUE_NULL, /* 185 = nosys */ - AUE_NULL, /* 186 = nosys */ - AUE_NULL, /* 187 = nosys */ - AUE_STAT, /* 188 = stat */ - AUE_FSTAT, /* 189 = fstat */ - AUE_LSTAT, /* 190 = lstat */ - AUE_PATHCONF, /* 191 = pathconf */ - AUE_FPATHCONF, /* 192 = fpathconf */ -#if COMPAT_GETFSSTAT - AUE_GETFSSTAT, /* 193 = getfsstat */ -#else - AUE_NULL, /* 193 is unused */ -#endif - AUE_NULL, /* 194 = getrlimit */ - AUE_SETRLIMIT, /* 195 = setrlimit */ - AUE_GETDIRENTRIES, /* 196 = getdirentries */ - AUE_MMAP, /* 197 = mmap */ - AUE_NULL, /* 198 = __syscall */ - AUE_NULL, /* 199 = lseek */ - AUE_TRUNCATE, /* 200 = truncate */ - AUE_FTRUNCATE, /* 201 = ftruncate */ - AUE_SYSCTL, /* 202 = __sysctl */ - AUE_MLOCK, /* 203 = mlock */ - AUE_MUNLOCK, /* 204 = munlock */ - AUE_UNDELETE, /* 205 = undelete */ - AUE_NULL, /* 206 = ATsocket */ - AUE_NULL, /* 207 = ATgetmsg*/ - AUE_NULL, /* 208 = ATputmsg*/ - AUE_NULL, /* 209 = ATPsndreq*/ - AUE_NULL, /* 210 = ATPsndrsp*/ - AUE_NULL, /* 211 = ATPgetreq*/ - AUE_NULL, /* 212 = ATPgetrsp*/ - AUE_NULL, /* 213 = Reserved for AppleTalk */ - AUE_NULL, /* 214 = Reserved for AppleTalk */ - AUE_NULL, /* 215 = Reserved for AppleTalk */ - - AUE_NULL, /* 216 = HFS make complex file call (multipel forks */ - AUE_NULL, /* 217 = HFS statv extended stat call for HFS */ - AUE_NULL, /* 218 = HFS lstatv extended lstat call for HFS */ - AUE_NULL, /* 219 = HFS fstatv extended fstat call for HFS */ - AUE_GETATTRLIST,/* 220 = HFS getarrtlist get attribute list cal */ - AUE_SETATTRLIST,/* 221 = HFS setattrlist set attribute list */ - AUE_GETDIRENTRIESATTR,/* 222 = HFS getdirentriesattr get directory attributes */ - AUE_EXCHANGEDATA,/* 223 = HFS exchangedata exchange file contents */ - AUE_CHECKUSERACCESS,/* 224 = HFS checkuseraccess check access to file */ - AUE_SEARCHFS, /* 225 = HFS searchfs to implement catalog searching */ - AUE_DELETE, /* 226 = private delete (Carbon semantics) */ - AUE_NULL, /* 227 = copyfile - orignally for AFP */ - AUE_NULL, /* 228 */ - AUE_NULL, /* 229 */ - AUE_NULL, /* 230 */ - AUE_NULL, /* 231 */ - AUE_NULL, /* 232 */ - AUE_NULL, /* 233 */ - AUE_NULL, /* 234 */ - AUE_NULL, /* 235 */ - AUE_NULL, /* 236 */ - AUE_NULL, /* 237 */ - AUE_NULL, /* 238 */ - AUE_NULL, /* 239 */ - AUE_NULL, /* 240 */ - AUE_NULL, /* 241 */ - AUE_NULL, /* 242 = fsctl */ - AUE_NULL, /* 243 */ - AUE_NULL, /* 244 */ - AUE_NULL, /* 245 */ - AUE_NULL, /* 246 */ - AUE_NULL, /* 247 = nfsclnt*/ - AUE_NULL, /* 248 = fhopen */ - AUE_NULL, /* 249 */ - AUE_MINHERIT, /* 250 = minherit */ - AUE_NULL, /* 251 = semsys */ - AUE_NULL, /* 252 = msgsys */ - AUE_NULL, /* 253 = shmsys */ - AUE_SEMCTL, /* 254 = semctl */ - AUE_SEMGET, /* 255 = semget */ - AUE_SEMOP, /* 256 = semop */ - AUE_NULL, /* 257 = */ - AUE_MSGCTL, /* 258 = msgctl */ - AUE_MSGGET, /* 259 = msgget */ - AUE_MSGSND, /* 260 = msgsnd */ - AUE_MSGRCV, /* 261 = msgrcv */ - AUE_SHMAT, /* 262 = shmat */ - AUE_SHMCTL, /* 263 = shmctl */ - AUE_SHMDT, /* 264 = shmdt */ - AUE_SHMGET, /* 265 = shmget */ - AUE_SHMOPEN, /* 266 = shm_open */ - AUE_SHMUNLINK, /* 267 = shm_unlink */ - AUE_SEMOPEN, /* 268 = sem_open */ - AUE_SEMCLOSE, /* 269 = sem_close */ - AUE_SEMUNLINK, /* 270 = sem_unlink */ - AUE_NULL, /* 271 = sem_wait */ - AUE_NULL, /* 272 = sem_trywait */ - AUE_NULL, /* 273 = sem_post */ - AUE_NULL, /* 274 = sem_getvalue */ - AUE_NULL, /* 275 = sem_init */ - AUE_NULL, /* 276 = sem_destroy */ - AUE_NULL, /* 277 */ - AUE_NULL, /* 278 */ - AUE_NULL, /* 279 */ - AUE_NULL, /* 280 */ - AUE_NULL, /* 281 */ - AUE_NULL, /* 282 */ - AUE_NULL, /* 283 */ - AUE_NULL, /* 284 */ - AUE_NULL, /* 285 */ - AUE_NULL, /* 286 */ - AUE_NULL, /* 287 */ - AUE_NULL, /* 288 */ - AUE_NULL, /* 289 */ - AUE_NULL, /* 290 */ - AUE_NULL, /* 291 */ - AUE_NULL, /* 292 */ - AUE_NULL, /* 293 */ - AUE_NULL, /* 294 */ - AUE_NULL, /* 295 */ - AUE_NULL, /* 296 */ - AUE_NULL, /* 297 */ - AUE_NULL, /* 298 */ - AUE_NULL, /* 299 */ - AUE_NULL, /* 300 */ - AUE_NULL, /* 301 */ - AUE_NULL, /* 302 */ - AUE_NULL, /* 303 */ - AUE_NULL, /* 304 */ - AUE_NULL, /* 305 */ - AUE_NULL, /* 306 */ - AUE_NULL, /* 307 */ - AUE_NULL, /* 308 */ - AUE_NULL, /* 309 */ - AUE_NULL, /* 310 = getsid */ - AUE_NULL, /* 311 */ - AUE_NULL, /* 312 */ - AUE_NULL, /* 313 */ - AUE_NULL, /* 314 */ - AUE_NULL, /* 315 */ - AUE_NULL, /* 316 */ - AUE_NULL, /* 317 */ - AUE_NULL, /* 318 */ - AUE_NULL, /* 319 */ - AUE_NULL, /* 320 */ - AUE_NULL, /* 321 */ - AUE_NULL, /* 322 */ - AUE_NULL, /* 323 */ - AUE_NULL, /* 324 = mlockall*/ - AUE_NULL, /* 325 = munlockall*/ - AUE_NULL, /* 326 */ - AUE_NULL, /* 327 = issetugid */ - AUE_NULL, /* 328 */ - AUE_NULL, /* 329 */ - AUE_NULL, /* 330 */ - AUE_NULL, /* 331 */ - AUE_NULL, /* 332 */ - AUE_NULL, /* 333 */ - AUE_NULL, /* 334 */ - AUE_NULL, /* 335 = utrace */ - AUE_NULL, /* 336 */ - AUE_NULL, /* 337 */ - AUE_NULL, /* 338 */ - AUE_NULL, /* 339 */ - AUE_NULL, /* 340 */ - AUE_NULL, /* 341 */ - AUE_NULL, /* 342 */ - AUE_NULL, /* 343 */ - AUE_NULL, /* 344 */ - AUE_NULL, /* 345 */ - AUE_NULL, /* 346 */ - AUE_NULL, /* 347 */ - AUE_NULL, /* 348 */ - AUE_NULL, /* 349 */ - AUE_AUDIT, /* 350 */ - AUE_AUDITON, /* 351 */ - AUE_NULL, /* 352 */ - AUE_GETAUID, /* 353 */ - AUE_SETAUID, /* 354 */ - AUE_GETAUDIT, /* 355 */ - AUE_SETAUDIT, /* 356 */ - AUE_GETAUDIT_ADDR, /* 357 */ - AUE_SETAUDIT_ADDR, /* 358 */ - AUE_AUDITCTL, /* 359 */ - AUE_NULL, /* 360 */ - AUE_NULL, /* 361 */ - AUE_NULL, /* 362 = kqueue */ - AUE_NULL, /* 363 = kevent */ - AUE_LCHOWN, /* 364 = lchown */ - AUE_NULL, /* 365 */ - AUE_NULL, /* 366 */ - AUE_NULL, /* 367 */ - AUE_NULL, /* 368 */ - AUE_NULL, /* 369 */ - AUE_NULL, /* 370 */ - AUE_NULL, /* 371 */ - AUE_NULL, /* 372 */ - AUE_NULL, /* 373 */ - AUE_NULL, /* 374 */ - AUE_NULL, /* 375 */ - AUE_NULL, /* 376 */ - AUE_NULL, /* 377 */ - AUE_NULL, /* 378 */ - AUE_NULL, /* 379 */ - AUE_MAC_EXECVE, /* 380 = __mac_execve */ - AUE_MAC_SYSCALL, /* 381 = __mac_syscall */ - AUE_MAC_GET_FILE, /* 382 = __mac_get_file */ - AUE_MAC_SET_FILE, /* 383 = __mac_set_file */ - AUE_MAC_GET_LINK, /* 384 = __mac_get_link */ - AUE_MAC_SET_LINK, /* 385 = __mac_set_link */ - AUE_MAC_GET_PROC, /* 386 = __mac_get_proc */ - AUE_MAC_SET_PROC, /* 387 = __mac_set_proc */ - AUE_MAC_GET_FD, /* 388 = __mac_get_fd */ - AUE_MAC_SET_FD, /* 389 = __mac_set_fd */ - AUE_MAC_GET_PID, /* 390 = __mac_get_pid */ - AUE_MAC_GET_LCID, /* 391 = __mac_get_lcid */ - AUE_MAC_GET_LCTX, /* 392 = __mac_get_lctx */ - AUE_MAC_SET_LCTX, /* 393 = __mac_set_lctx */ - AUE_SETLCID, /* 394 = setlcid */ - AUE_GETLCID, /* 395 = getlcid */ - AUE_NULL, /* 396 = read_nocancel */ - AUE_NULL, /* 397 = write_nocancel */ - AUE_OPEN_RWTC, /* 398 = open_nocancel */ - AUE_CLOSE, /* 399 = close_nocancel */ - AUE_NULL, /* 400 = wait4_nocancel */ - AUE_RECVMSG, /* 401 = recvmsg_nocancel */ - AUE_SENDMSG, /* 402 = sendmsg_nocancel */ - AUE_RECVFROM, /* 403 = recvfrom_nocancel */ - AUE_ACCEPT, /* 404 = accept_nocancel */ - AUE_NULL, /* 405 = msync_nocancel */ - AUE_FCNTL, /* 406 = fcntl_nocancel */ - AUE_NULL, /* 407 = select_nocancel */ - AUE_NULL, /* 408 = fsync_nocancel */ - AUE_CONNECT, /* 409 = connect_nocancel */ - AUE_NULL, /* 410 = sigsuspend_nocancel */ - AUE_NULL, /* 411 = readv_nocancel */ - AUE_NULL, /* 412 = writev_nocancel */ - AUE_SENDTO, /* 413 = sendto_nocancel */ - AUE_NULL, /* 414 = pread_nocancel */ - AUE_NULL, /* 415 = pwrite_nocancel */ - AUE_NULL, /* 416 = waitid_nocancel */ - AUE_NULL, /* 417 = poll_nocancel */ - AUE_MSGSND, /* 418 = msgsnd_nocancel */ - AUE_MSGRCV, /* 419 = msgrcv_nocancel */ - AUE_NULL, /* 420 = sem_wait_nocancel */ - AUE_NULL, /* 421 = aio_suspend_nocancel */ - AUE_NULL, /* 422 = __sigwait_nocancel */ - AUE_NULL, /* 423 = __semwait_signal_nocancel */ - AUE_MAC_MOUNT, /* 424 = __mac_mount */ - AUE_MAC_GET_MOUNT, /* 425 = __mac_get_mount */ - AUE_MAC_GETFSSTAT, /* 426 = __mac_getfsstat */ - -}; - -/* - * Verify that sys_au_event has an entry for every syscall. - */ -int audit_sys_table_size_check[( - (sizeof(sys_au_event) / sizeof(sys_au_event[0])) == NUM_SYSENT)? - 1 : -1] __unused; - -/* - * Hash table functions for the audit event number to event class mask mapping. - */ - -#define EVCLASSMAP_HASH_TABLE_SIZE 251 -struct evclass_elem { - au_event_t event; - au_class_t class; - LIST_ENTRY(evclass_elem) entry; -}; -struct evclass_list { - LIST_HEAD(, evclass_elem) head; -}; - -struct evclass_list evclass_hash[EVCLASSMAP_HASH_TABLE_SIZE]; - -au_class_t au_event_class(au_event_t event) -{ - - struct evclass_list *evcl; - struct evclass_elem *evc; - - evcl = &evclass_hash[event % EVCLASSMAP_HASH_TABLE_SIZE]; - - /* If an entry at our hash location matches the event, just return */ - LIST_FOREACH(evc, &evcl->head, entry) { - if (evc->event == event) - return (evc->class); - } - return (AU_NULL); -} - - /* - * Insert a event to class mapping. If the event already exists in the - * mapping, then replace the mapping with the new one. - * XXX There is currently no constraints placed on the number of mappings. - * May want to either limit to a number, or in terms of memory usage. - */ -void au_evclassmap_insert(au_event_t event, au_class_t class) -{ - struct evclass_list *evcl; - struct evclass_elem *evc; - - evcl = &evclass_hash[event % EVCLASSMAP_HASH_TABLE_SIZE]; - - LIST_FOREACH(evc, &evcl->head, entry) { - if (evc->event == event) { - evc->class = class; - return; - } - } - evc = (struct evclass_elem *)kalloc(sizeof (*evc)); - if (evc == NULL) { - return; - } - evc->event = event; - evc->class = class; - LIST_INSERT_HEAD(&evcl->head, evc, entry); -} - -void -au_evclassmap_init(void) -{ - int i; - for (i = 0; i < EVCLASSMAP_HASH_TABLE_SIZE; i++) { - LIST_INIT(&evclass_hash[i].head); - } - - /* Set up the initial event to class mapping for system calls. */ - for (i = 0; i < NUM_SYSENT; i++) { - if (sys_au_event[i] != AUE_NULL) { - au_evclassmap_insert(sys_au_event[i], AU_NULL); - } - } - /* Add the Mach system call events */ - au_evclassmap_insert(AUE_TASKFORPID, AU_NULL); - au_evclassmap_insert(AUE_PIDFORTASK, AU_NULL); - au_evclassmap_insert(AUE_SWAPON, AU_NULL); - au_evclassmap_insert(AUE_SWAPOFF, AU_NULL); - au_evclassmap_insert(AUE_MAPFD, AU_NULL); - au_evclassmap_insert(AUE_INITPROCESS, AU_NULL); - - /* Add the specific open events to the mapping. */ - au_evclassmap_insert(AUE_OPEN_R, AU_FREAD); - au_evclassmap_insert(AUE_OPEN_RC, AU_FREAD|AU_FCREATE); - au_evclassmap_insert(AUE_OPEN_RTC, AU_FREAD|AU_FCREATE|AU_FDELETE); - au_evclassmap_insert(AUE_OPEN_RT, AU_FREAD|AU_FDELETE); - au_evclassmap_insert(AUE_OPEN_RW, AU_FREAD|AU_FWRITE); - au_evclassmap_insert(AUE_OPEN_RWC, AU_FREAD|AU_FWRITE|AU_FCREATE); - au_evclassmap_insert(AUE_OPEN_RWTC, AU_FREAD|AU_FWRITE|AU_FCREATE|AU_FDELETE); - au_evclassmap_insert(AUE_OPEN_RWT, AU_FREAD|AU_FWRITE|AU_FDELETE); - au_evclassmap_insert(AUE_OPEN_W, AU_FWRITE); - au_evclassmap_insert(AUE_OPEN_WC, AU_FWRITE|AU_FCREATE); - au_evclassmap_insert(AUE_OPEN_WTC, AU_FWRITE|AU_FCREATE|AU_FDELETE); - au_evclassmap_insert(AUE_OPEN_WT, AU_FWRITE|AU_FDELETE); -} - - /* - * Check whether an event is aditable by comparing the mask of classes this - * event is part of against the given mask. - */ -int au_preselect(au_event_t event, au_mask_t *mask_p, int sorf) -{ - au_class_t effmask = 0; - au_class_t ae_class; - - if(mask_p == NULL) - return (-1); - - ae_class = au_event_class(event); - /* - * Perform the actual check of the masks against the event. - */ - if(sorf & AU_PRS_SUCCESS) { - effmask |= (mask_p->am_success & ae_class); - } - - if(sorf & AU_PRS_FAILURE) { - effmask |= (mask_p->am_failure & ae_class); - } - - if(effmask) - return (1); - else - return (0); -} - -/* - * Convert sysctl names and present arguments to events - */ -au_event_t ctlname_to_sysctlevent(int name[], uint64_t valid_arg) { - - /* can't parse it - so return the worst case */ - if ((valid_arg & (ARG_CTLNAME | ARG_LEN)) != - (ARG_CTLNAME | ARG_LEN)) - return AUE_SYSCTL; - - switch (name[0]) { - /* non-admin "lookups" treat them special */ - case KERN_OSTYPE: - case KERN_OSRELEASE: - case KERN_OSREV: - case KERN_VERSION: - case KERN_ARGMAX: - case KERN_CLOCKRATE: - case KERN_BOOTTIME: - case KERN_POSIX1: - case KERN_NGROUPS: - case KERN_JOB_CONTROL: - case KERN_SAVED_IDS: - case KERN_NETBOOT: - case KERN_SYMFILE: - case KERN_SHREG_PRIVATIZABLE: - return AUE_SYSCTL_NONADMIN; - - /* only treat the sets as admin */ - case KERN_MAXVNODES: - case KERN_MAXPROC: - case KERN_MAXFILES: - case KERN_MAXPROCPERUID: - case KERN_MAXFILESPERPROC: - case KERN_HOSTID: - case KERN_AIOMAX: - case KERN_AIOPROCMAX: - case KERN_AIOTHREADS: - case KERN_COREDUMP: - case KERN_SUGID_COREDUMP: - return (valid_arg & ARG_VALUE) ? - AUE_SYSCTL : AUE_SYSCTL_NONADMIN; - - default: - return AUE_SYSCTL; - } - /* NOTREACHED */ -} - -/* - * Convert an open flags specifier into a specific type of open event for - * auditing purposes. - */ -au_event_t flags_and_error_to_openevent(int oflags, int error) { - au_event_t aevent; - - /* Need to check only those flags we care about. */ - oflags = oflags & (O_RDONLY | O_CREAT | O_TRUNC | O_RDWR | O_WRONLY); - - /* These checks determine what flags are on with the condition - * that ONLY that combination is on, and no other flags are on. - */ - switch (oflags) { - case O_RDONLY: - aevent = AUE_OPEN_R; - break; - case (O_RDONLY | O_CREAT): - aevent = AUE_OPEN_RC; - break; - case (O_RDONLY | O_CREAT | O_TRUNC): - aevent = AUE_OPEN_RTC; - break; - case (O_RDONLY | O_TRUNC): - aevent = AUE_OPEN_RT; - break; - case O_RDWR: - aevent = AUE_OPEN_RW; - break; - case (O_RDWR | O_CREAT): - aevent = AUE_OPEN_RWC; - break; - case (O_RDWR | O_CREAT | O_TRUNC): - aevent = AUE_OPEN_RWTC; - break; - case (O_RDWR | O_TRUNC): - aevent = AUE_OPEN_RWT; - break; - case O_WRONLY: - aevent = AUE_OPEN_W; - break; - case (O_WRONLY | O_CREAT): - aevent = AUE_OPEN_WC; - break; - case (O_WRONLY | O_CREAT | O_TRUNC): - aevent = AUE_OPEN_WTC; - break; - case (O_WRONLY | O_TRUNC): - aevent = AUE_OPEN_WT; - break; - default: - aevent = AUE_OPEN; - break; -} - -/* - * Convert chatty errors to better matching events. - * Failures to find a file are really just attribute - * events - so recast them as such. -*/ - switch (aevent) { - case AUE_OPEN_R: - case AUE_OPEN_RT: - case AUE_OPEN_RW: - case AUE_OPEN_RWT: - case AUE_OPEN_W: - case AUE_OPEN_WT: - if (error == ENOENT) - aevent = AUE_OPEN; -} - return aevent; -} - -/* Convert a MSGCTL command to a specific event. */ -au_event_t msgctl_to_event(int cmd) -{ - switch (cmd) { - case IPC_RMID: - return AUE_MSGCTL_RMID; - case IPC_SET: - return AUE_MSGCTL_SET; - case IPC_STAT: - return AUE_MSGCTL_STAT; - default: - return AUE_MSGCTL; - /* We will audit a bad command */ - } -} - -/* Convert a SEMCTL command to a specific event. */ -au_event_t semctl_to_event(int cmd) -{ - switch (cmd) { - case GETALL: - return AUE_SEMCTL_GETALL; - case GETNCNT: - return AUE_SEMCTL_GETNCNT; - case GETPID: - return AUE_SEMCTL_GETPID; - case GETVAL: - return AUE_SEMCTL_GETVAL; - case GETZCNT: - return AUE_SEMCTL_GETZCNT; - case IPC_RMID: - return AUE_SEMCTL_RMID; - case IPC_SET: - return AUE_SEMCTL_SET; - case SETALL: - return AUE_SEMCTL_SETALL; - case SETVAL: - return AUE_SEMCTL_SETVAL; - case IPC_STAT: - return AUE_SEMCTL_STAT; - default: - return AUE_SEMCTL; - /* We will audit a bad command */ - } -} - -/* Convert a command for the auditon() system call to a audit event. */ -int auditon_command_event(int cmd) -{ - switch(cmd) { - case A_GETPOLICY: - return AUE_AUDITON_GPOLICY; - break; - case A_SETPOLICY: - return AUE_AUDITON_SPOLICY; - break; - case A_GETKMASK: - return AUE_AUDITON_GETKMASK; - break; - case A_SETKMASK: - return AUE_AUDITON_SETKMASK; - break; - case A_GETQCTRL: - return AUE_AUDITON_GQCTRL; - break; - case A_SETQCTRL: - return AUE_AUDITON_SQCTRL; - break; - case A_GETCWD: - return AUE_AUDITON_GETCWD; - break; - case A_GETCAR: - return AUE_AUDITON_GETCAR; - break; - case A_GETSTAT: - return AUE_AUDITON_GETSTAT; - break; - case A_SETSTAT: - return AUE_AUDITON_SETSTAT; - break; - case A_SETUMASK: - return AUE_AUDITON_SETUMASK; - break; - case A_SETSMASK: - return AUE_AUDITON_SETSMASK; - break; - case A_GETCOND: - return AUE_AUDITON_GETCOND; - break; - case A_SETCOND: - return AUE_AUDITON_SETCOND; - break; - case A_GETCLASS: - return AUE_AUDITON_GETCLASS; - break; - case A_SETCLASS: - return AUE_AUDITON_SETCLASS; - break; - case A_GETPINFO: - case A_SETPMASK: - case A_SETFSIZE: - case A_GETFSIZE: - case A_GETPINFO_ADDR: - case A_GETKAUDIT: - case A_SETKAUDIT: - default: - return AUE_AUDITON; /* No special record */ - break; - } -} - -/* - * Create a canonical path from given path by prefixing the supplied - * current working directory, which may be the root directory. - */ -int -canon_path(struct vnode *cwd_vp, char *path, char *cpath) -{ - int len; - int ret; - char *bufp = path; - - /* - * convert multiple leading '/' into a single '/' if the cwd_vp is - * NULL (i.e. an absolute path), and strip them entirely if the - * cwd_vp represents a chroot directory (i.e. the caller checked for - * an initial '/' character itself, saw one, and passed fdp->fd_rdir). - * Somewhat complicated, but it places the onus for locking structs - * involved on the caller, and makes proxy operations explicit rather - * than implicit. - */ - if (*(path) == '/') { - while (*(bufp) == '/') - bufp++; /* skip leading '/'s */ - if (cwd_vp == NULL) - bufp--; /* restore one '/' */ - } - if (cwd_vp != NULL) { - len = MAXPATHLEN; - ret = vn_getpath(cwd_vp, cpath, &len); - if (ret != 0) { - cpath[0] = '\0'; - return (ret); - } - if (len < MAXPATHLEN) - cpath[len-1] = '/'; - strlcpy(cpath + len, bufp, MAXPATHLEN - len); - } else { - strlcpy(cpath, bufp, MAXPATHLEN); - } - return (0); -} diff --git a/bsd/kern/kern_bsm_token.c b/bsd/kern/kern_bsm_token.c deleted file mode 100644 index 9da0c96fa..000000000 --- a/bsd/kern/kern_bsm_token.c +++ /dev/null @@ -1,1288 +0,0 @@ -/* - * Copyright (c) 2003-2004 Apple Computer, Inc. All rights reserved. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ - * - * This file contains Original Code and/or Modifications of Original Code - * as defined in and that are subject to the Apple Public Source License - * Version 2.0 (the 'License'). You may not use this file except in - * compliance with the License. The rights granted to you under the License - * may not be used to create, or enable the creation or redistribution of, - * unlawful or unlicensed copies of an Apple operating system, or to - * circumvent, violate, or enable the circumvention or violation of, any - * terms of an Apple operating system software license agreement. - * - * Please obtain a copy of the License at - * http://www.opensource.apple.com/apsl/ and read it before using this file. - * - * The Original Code and all software distributed under the License are - * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER - * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, - * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. - * Please see the License for the specific language governing rights and - * limitations under the License. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ - */ - -#include -#include -#include -#include - -#include -#include -#include -#include -#include - -#include -#include - -#include - -#define GET_TOKEN_AREA(tok, dptr, length) \ - do {\ - tok = (token_t *)kalloc(sizeof(*tok) + length); \ - if(tok != NULL)\ - {\ - tok->len = length;\ - dptr = tok->t_data = (u_char *)&tok[1];\ - memset(dptr, 0, length);\ - }\ - }while(0) - - - -/* - * token ID 1 byte - * argument # 1 byte - * argument value 4 bytes/8 bytes (32-bit/64-bit value) - * text length 2 bytes - * text N bytes + 1 terminating NULL byte - */ -token_t * -au_to_arg32(char n, const char *text, u_int32_t v) -{ - token_t *t; - u_char *dptr; - u_int16_t textlen; - - if(text == NULL) { - return NULL; - } - - textlen = strlen(text); - - GET_TOKEN_AREA(t, dptr, 9 + textlen); - if(t == NULL) { - return NULL; - } - - textlen += 1; - - ADD_U_CHAR(dptr, AU_ARG32_TOKEN); - ADD_U_CHAR(dptr, n); - ADD_U_INT32(dptr, v); - ADD_U_INT16(dptr, textlen); - ADD_STRING(dptr, text, textlen); - - return t; - -} - -token_t * -au_to_arg64(char n, const char *text, u_int64_t v) -{ - token_t *t; - u_char *dptr; - u_int16_t textlen; - - if(text == NULL) { - return NULL; - } - - textlen = strlen(text); - - GET_TOKEN_AREA(t, dptr, 13 + textlen); - if(t == NULL) { - return NULL; - } - - textlen += 1; - - ADD_U_CHAR(dptr, AU_ARG64_TOKEN); - ADD_U_CHAR(dptr, n); - ADD_U_INT64(dptr, v); - ADD_U_INT16(dptr, textlen); - ADD_STRING(dptr, text, textlen); - - return t; - -} - -token_t * -au_to_arg(char n, char *text, u_int32_t v) -{ - return au_to_arg32(n, text, v); -} - -/* - * token ID 1 byte - * file access mode 4 bytes - * owner user ID 4 bytes - * owner group ID 4 bytes - * file system ID 4 bytes - * node ID 8 bytes - * device 4 bytes/8 bytes (32-bit/64-bit) - */ -token_t *au_to_attr32(__unused struct vnode_attr *attr) -{ - return NULL; -} - -/* Kernel-specific version of the above function */ -token_t *kau_to_attr32(struct vnode_au_info *vni) -{ - token_t *t; - u_char *dptr; - u_int64_t fileid; - u_int16_t pad0_16 = 0; - u_int32_t pad0_32 = 0; - - if(vni == NULL) { - return NULL; - } - - GET_TOKEN_AREA(t, dptr, 29); - if(t == NULL) { - return NULL; - } - - ADD_U_CHAR(dptr, AU_ATTR32_TOKEN); - - /* - * Darwin defines the size for the file mode as 2 bytes; - * BSM defines 4. So we copy in a 0 first. - */ - ADD_U_INT16(dptr, pad0_16); - ADD_U_INT16(dptr, vni->vn_mode); - - ADD_U_INT32(dptr, vni->vn_uid); - ADD_U_INT32(dptr, vni->vn_gid); - ADD_U_INT32(dptr, vni->vn_fsid); - - /* - * Darwin defines the size for fileid as 4 bytes; - * BSM defines 8. So we copy in a 0 first. - */ - fileid = vni->vn_fileid; - ADD_U_INT32(dptr, pad0_32); - ADD_U_INT32(dptr, fileid); - - ADD_U_INT32(dptr, vni->vn_dev); - - return t; -} - -token_t *au_to_attr64(__unused struct vnode_attr *attr) -{ - return NULL; -} - -token_t *kau_to_attr64(__unused struct vnode_au_info *vni) -{ - return NULL; -} - -token_t *au_to_attr(struct vnode_attr *attr) -{ - return au_to_attr32(attr); - -} - - -/* - * token ID 1 byte - * how to print 1 byte - * basic unit 1 byte - * unit count 1 byte - * data items (depends on basic unit) - */ -token_t *au_to_data(char unit_print, char unit_type, - char unit_count, unsigned char *p) -{ - token_t *t; - u_char *dptr; - size_t datasize, totdata; - - if(p == NULL) { - return NULL; - } - - /* Determine the size of the basic unit */ - switch(unit_type) { - case AUR_BYTE: datasize = AUR_BYTE_SIZE; - break; - - case AUR_SHORT: datasize = AUR_SHORT_SIZE; - break; - - case AUR_LONG: datasize = AUR_LONG_SIZE; - break; - - default: return NULL; - } - - totdata = datasize * unit_count; - - GET_TOKEN_AREA(t, dptr, totdata + 4); - if(t == NULL) { - return NULL; - } - - ADD_U_CHAR(dptr, AU_ARB_TOKEN); - ADD_U_CHAR(dptr, unit_print); - ADD_U_CHAR(dptr, unit_type); - ADD_U_CHAR(dptr, unit_count); - ADD_MEM(dptr, p, totdata); - - return t; -} - -/* - * token ID 1 byte - * status 4 bytes - * return value 4 bytes - */ -token_t *au_to_exit(int retval, int err) -{ - token_t *t; - u_char *dptr; - - GET_TOKEN_AREA(t, dptr, 9); - if(t == NULL) { - return NULL; - } - - ADD_U_CHAR(dptr, AU_EXIT_TOKEN); - ADD_U_INT32(dptr, err); - ADD_U_INT32(dptr, retval); - - return t; -} - -/* - */ -token_t * -au_to_groups(gid_t *groups) -{ - return au_to_newgroups(MAX_GROUPS, groups); -} - -/* - * token ID 1 byte - * number groups 2 bytes - * group list count * 4 bytes - */ -token_t *au_to_newgroups(u_int16_t n, gid_t *groups) -{ - token_t *t; - u_char *dptr; - int i; - - if(groups == NULL) { - return NULL; - } - - GET_TOKEN_AREA(t, dptr, n * 4 + 3); - if(t == NULL) { - return NULL; - } - - ADD_U_CHAR(dptr, AU_NEWGROUPS_TOKEN); - ADD_U_INT16(dptr, n); - for(i = 0; i < n; i++) { - ADD_U_INT32(dptr, groups[i]); - } - - return t; -} - - - - -/* - * token ID 1 byte - * internet address 4 bytes - */ -token_t *au_to_in_addr(struct in_addr *internet_addr) -{ - token_t *t; - u_char *dptr; - - if(internet_addr == NULL) { - return NULL; - } - - GET_TOKEN_AREA(t, dptr, 5); - if(t == NULL) { - return NULL; - } - - ADD_U_CHAR(dptr, AU_IN_ADDR_TOKEN); - ADD_U_INT32(dptr, internet_addr->s_addr); - - return t; -} - -/* - * token ID 1 byte - * address type/length 4 bytes - * Address 16 bytes - */ -token_t *au_to_in_addr_ex(struct in6_addr *internet_addr) -{ - token_t *t; - u_char *dptr; - u_int32_t type = AF_INET6; - - if(internet_addr == NULL) { - return NULL; - } - - GET_TOKEN_AREA(t, dptr, 21); - if(t == NULL) { - return NULL; - } - - ADD_U_CHAR(dptr, AU_IN_ADDR_EX_TOKEN); - ADD_U_INT32(dptr, type); - ADD_U_INT32(dptr, internet_addr->__u6_addr.__u6_addr32[0]); - ADD_U_INT32(dptr, internet_addr->__u6_addr.__u6_addr32[1]); - ADD_U_INT32(dptr, internet_addr->__u6_addr.__u6_addr32[2]); - ADD_U_INT32(dptr, internet_addr->__u6_addr.__u6_addr32[3]); - - return t; -} - -/* - * token ID 1 byte - * ip header 20 bytes - */ -token_t *au_to_ip(struct ip *ip) -{ - token_t *t; - u_char *dptr; - - if(ip == NULL) { - return NULL; - } - - GET_TOKEN_AREA(t, dptr, 21); - if(t == NULL) { - return NULL; - } - - ADD_U_CHAR(dptr, AU_IP_TOKEN); - ADD_MEM(dptr, ip, sizeof(struct ip)); - - return t; -} - -/* - * token ID 1 byte - * object ID type 1 byte - * object ID 4 bytes - */ -token_t *au_to_ipc(char type, int id) -{ - token_t *t; - u_char *dptr; - - - GET_TOKEN_AREA(t, dptr, 6); - if(t == NULL) { - return NULL; - } - - ADD_U_CHAR(dptr, AU_IPC_TOKEN); - ADD_U_CHAR(dptr, type); - ADD_U_INT32(dptr, id); - - return t; -} - -/* - * token ID 1 byte - * owner user ID 4 bytes - * owner group ID 4 bytes - * creator user ID 4 bytes - * creator group ID 4 bytes - * access mode 4 bytes - * slot sequence # 4 bytes - * key 4 bytes - */ -token_t *au_to_ipc_perm(struct ipc_perm *perm) -{ - token_t *t; - u_char *dptr; - u_int16_t pad0 = 0; - - if(perm == NULL) { - return NULL; - } - - GET_TOKEN_AREA(t, dptr, 29); - if(t == NULL) { - return NULL; - } - - /* - * Darwin defines the sizes for ipc_perm members - * as 2 bytes; BSM defines 4. So we copy in a 0 first. - */ - ADD_U_CHAR(dptr, AU_IPCPERM_TOKEN); - - ADD_U_INT32(dptr, perm->uid); - ADD_U_INT32(dptr, perm->gid); - ADD_U_INT32(dptr, perm->cuid); - ADD_U_INT32(dptr, perm->cgid); - - ADD_U_INT16(dptr, pad0); - ADD_U_INT16(dptr, perm->mode); - - ADD_U_INT16(dptr, pad0); - ADD_U_INT16(dptr, perm->_seq); - - ADD_U_INT16(dptr, pad0); - ADD_U_INT16(dptr, perm->_key); - - return t; -} - - -/* - * token ID 1 byte - * port IP address 2 bytes - */ -token_t *au_to_iport(u_int16_t iport) -{ - token_t *t; - u_char *dptr; - - - GET_TOKEN_AREA(t, dptr, 3); - if(t == NULL) { - return NULL; - } - - ADD_U_CHAR(dptr, AU_IPORT_TOKEN); - ADD_U_INT16(dptr, iport); - - return t; -} - - -/* - * token ID 1 byte - * size 2 bytes - * data size bytes - */ -token_t *au_to_opaque(char *data, u_int16_t bytes) -{ - token_t *t; - u_char *dptr; - - if((data == NULL) || (bytes <= 0)) { - return NULL; - } - - GET_TOKEN_AREA(t, dptr, bytes + 3); - if(t == NULL) { - return NULL; - } - - ADD_U_CHAR(dptr, AU_OPAQUE_TOKEN); - ADD_U_INT16(dptr, bytes); - ADD_MEM(dptr, data, bytes); - - return t; -} - -/* - * Kernel version of the add file token function, where the time value - * is passed in as an additional parameter. - * token ID 1 byte - * seconds of time 4 bytes - * milliseconds of time 4 bytes - * file name len 2 bytes - * file pathname N bytes + 1 terminating NULL byte - */ -token_t *kau_to_file(const char *file, const struct timeval *tv) -{ - token_t *t; - u_char *dptr; - u_int16_t filelen; - u_int32_t timems = tv->tv_usec/1000; /* We need time in ms */ - - if(file == NULL) { - return NULL; - } - filelen = strlen(file); - - GET_TOKEN_AREA(t, dptr, filelen + 12); - if(t == NULL) { - return NULL; - } - - filelen += 1; - - ADD_U_CHAR(dptr, AU_FILE_TOKEN); - - /* Add the timestamp */ - ADD_U_INT32(dptr, tv->tv_sec); - ADD_U_INT32(dptr, timems); - - ADD_U_INT16(dptr, filelen); - ADD_STRING(dptr, file, filelen); - - return t; - -} - -/* - * token ID 1 byte - * text length 2 bytes - * text N bytes + 1 terminating NULL byte - */ -token_t *au_to_text(const char *text) -{ - token_t *t; - u_char *dptr; - u_int16_t textlen; - - if(text == NULL) { - return NULL; - } - textlen = strlen(text); - - GET_TOKEN_AREA(t, dptr, textlen + 4); - if(t == NULL) { - return NULL; - } - - textlen += 1; - - ADD_U_CHAR(dptr, AU_TEXT_TOKEN); - ADD_U_INT16(dptr, textlen); - ADD_STRING(dptr, text, textlen); - - return t; -} - -/* - * token ID 1 byte - * path length 2 bytes - * path N bytes + 1 terminating NULL byte - */ -token_t *au_to_path(char *text) -{ - token_t *t; - u_char *dptr; - u_int16_t textlen; - - if(text == NULL) { - return NULL; - } - textlen = strlen(text); - - GET_TOKEN_AREA(t, dptr, textlen + 4); - if(t == NULL) { - return NULL; - } - - textlen += 1; - - ADD_U_CHAR(dptr, AU_PATH_TOKEN); - ADD_U_INT16(dptr, textlen); - ADD_STRING(dptr, text, textlen); - - return t; -} - -/* - * token ID 1 byte - * audit ID 4 bytes - * effective user ID 4 bytes - * effective group ID 4 bytes - * real user ID 4 bytes - * real group ID 4 bytes - * process ID 4 bytes - * session ID 4 bytes - * terminal ID - * port ID 4 bytes/8 bytes (32-bit/64-bit value) - * machine address 4 bytes - */ -token_t *au_to_process32(au_id_t auid, uid_t euid, gid_t egid, - uid_t ruid, gid_t rgid, pid_t pid, - au_asid_t sid, au_tid_t *tid) -{ - token_t *t; - u_char *dptr; - - if(tid == NULL) { - return NULL; - } - - GET_TOKEN_AREA(t, dptr, 37); - if(t == NULL) { - return NULL; - } - - ADD_U_CHAR(dptr, AU_PROCESS_32_TOKEN); - ADD_U_INT32(dptr, auid); - ADD_U_INT32(dptr, euid); - ADD_U_INT32(dptr, egid); - ADD_U_INT32(dptr, ruid); - ADD_U_INT32(dptr, rgid); - ADD_U_INT32(dptr, pid); - ADD_U_INT32(dptr, sid); - ADD_U_INT32(dptr, tid->port); - ADD_U_INT32(dptr, tid->machine); - - return t; -} - -token_t *au_to_process64(__unused au_id_t auid, - __unused uid_t euid, - __unused gid_t egid, - __unused uid_t ruid, - __unused gid_t rgid, - __unused pid_t pid, - __unused au_asid_t sid, - __unused au_tid_t *tid) -{ - return NULL; - } - -token_t *au_to_process(au_id_t auid, uid_t euid, gid_t egid, - uid_t ruid, gid_t rgid, pid_t pid, - au_asid_t sid, au_tid_t *tid) -{ - return au_to_process32(auid, euid, egid, ruid, rgid, pid, - sid, tid); -} - - -/* - * token ID 1 byte - * audit ID 4 bytes - * effective user ID 4 bytes - * effective group ID 4 bytes - * real user ID 4 bytes - * real group ID 4 bytes - * process ID 4 bytes - * session ID 4 bytes - * terminal ID - * port ID 4 bytes/8 bytes (32-bit/64-bit value) - * address type-len 4 bytes - * machine address 16 bytes - */ -token_t *au_to_process32_ex(au_id_t auid, uid_t euid, gid_t egid, - uid_t ruid, gid_t rgid, pid_t pid, - au_asid_t sid, au_tid_addr_t *tid) -{ - token_t *t; - u_char *dptr; - - if(tid == NULL) { - return NULL; - } - - GET_TOKEN_AREA(t, dptr, 53); - if(t == NULL) { - return NULL; - } - - ADD_U_CHAR(dptr, AU_PROCESS_32_EX_TOKEN); - ADD_U_INT32(dptr, auid); - ADD_U_INT32(dptr, euid); - ADD_U_INT32(dptr, egid); - ADD_U_INT32(dptr, ruid); - ADD_U_INT32(dptr, rgid); - ADD_U_INT32(dptr, pid); - ADD_U_INT32(dptr, sid); - ADD_U_INT32(dptr, tid->at_port); - ADD_U_INT32(dptr, tid->at_type); - ADD_U_INT32(dptr, tid->at_addr[0]); - ADD_U_INT32(dptr, tid->at_addr[1]); - ADD_U_INT32(dptr, tid->at_addr[2]); - ADD_U_INT32(dptr, tid->at_addr[3]); - - return t; -} - -token_t *au_to_process64_ex( - __unused au_id_t auid, - __unused uid_t euid, - __unused gid_t egid, - __unused uid_t ruid, - __unused gid_t rgid, - __unused pid_t pid, - __unused au_asid_t sid, - __unused au_tid_addr_t *tid) -{ - return NULL; -} - -token_t *au_to_process_ex(au_id_t auid, uid_t euid, gid_t egid, - uid_t ruid, gid_t rgid, pid_t pid, - au_asid_t sid, au_tid_addr_t *tid) -{ - return au_to_process32_ex(auid, euid, egid, ruid, rgid, - pid, sid, tid); -} - -/* - * token ID 1 byte - * error status 1 byte - * return value 4 bytes/8 bytes (32-bit/64-bit value) - */ -token_t *au_to_return32(char status, u_int32_t ret) -{ - token_t *t; - u_char *dptr; - - - GET_TOKEN_AREA(t, dptr, 6); - if(t == NULL) { - return NULL; - } - - ADD_U_CHAR(dptr, AU_RETURN_32_TOKEN); - ADD_U_CHAR(dptr, status); - ADD_U_INT32(dptr, ret); - - return t; -} - -token_t *au_to_return64(char status, u_int64_t ret) -{ - token_t *t; - u_char *dptr; - - - GET_TOKEN_AREA(t, dptr, 10); - if(t == NULL) { - return NULL; - } - - ADD_U_CHAR(dptr, AU_RETURN_64_TOKEN); - ADD_U_CHAR(dptr, status); - ADD_U_INT64(dptr, ret); - - return t; -} - -token_t *au_to_return(char status, u_int32_t ret) -{ - return au_to_return32(status, ret); -} - -/* - * token ID 1 byte - * sequence number 4 bytes - */ -token_t *au_to_seq(u_int32_t audit_count) -{ - token_t *t; - u_char *dptr; - - - GET_TOKEN_AREA(t, dptr, 5); - if(t == NULL) { - return NULL; - } - - ADD_U_CHAR(dptr, AU_SEQ_TOKEN); - ADD_U_INT32(dptr, audit_count); - - return t; -} - -/* - * token ID 1 byte - * socket type 2 bytes - * local port 2 bytes - * local Internet address 4 bytes - * remote port 2 bytes - * remote Internet address 4 bytes - */ -token_t *au_to_socket(__unused struct socket *so) -{ - return NULL; -} - -/* - * Kernel-specific version of the above function. - */ -token_t *kau_to_socket(struct socket_au_info *soi) -{ - token_t *t; - u_char *dptr; - u_int16_t so_type; - - if(soi == NULL) { - return NULL; - } - - GET_TOKEN_AREA(t, dptr, 15); - if(t == NULL) { - return NULL; - } - - ADD_U_CHAR(dptr, AU_SOCK_TOKEN); - /* Coerce the socket type into a short value */ - so_type = soi->so_type; - ADD_U_INT16(dptr, so_type); - ADD_U_INT16(dptr, soi->so_lport); - ADD_U_INT32(dptr, soi->so_laddr); - ADD_U_INT16(dptr, soi->so_rport); - ADD_U_INT32(dptr, soi->so_raddr); - - return t; -} - -/* - * token ID 1 byte - * socket type 2 bytes - * local port 2 bytes - * address type/length 4 bytes - * local Internet address 4 bytes/16 bytes (IPv4/IPv6 address) - * remote port 4 bytes - * address type/length 4 bytes - * remote Internet address 4 bytes/16 bytes (IPv4/IPv6 address) - */ -token_t *au_to_socket_ex_32( - __unused u_int16_t lp, - __unused u_int16_t rp, - __unused struct sockaddr *la, - __unused struct sockaddr *ra) -{ - return NULL; -} - -token_t *au_to_socket_ex_128( - __unused u_int16_t lp, - __unused u_int16_t rp, - __unused struct sockaddr *la, - __unused struct sockaddr *ra) -{ - return NULL; -} - -/* - * token ID 1 byte - * socket family 2 bytes - * local port 2 bytes - * socket address 4 bytes - */ -token_t *au_to_sock_inet32(struct sockaddr_in *so) -{ - token_t *t; - u_char *dptr; - - if(so == NULL) { - return NULL; - } - - GET_TOKEN_AREA(t, dptr, 9); - if(t == NULL) { - return NULL; - } - - ADD_U_CHAR(dptr, AU_SOCK_INET_32_TOKEN); - /* In Darwin, sin_family is one octet, but BSM defines the token - * to store two. So we copy in a 0 first. - */ - ADD_U_CHAR(dptr, 0); - ADD_U_CHAR(dptr, so->sin_family); - ADD_U_INT16(dptr, so->sin_port); - ADD_U_INT32(dptr, so->sin_addr.s_addr); - - return t; - -} - -token_t *au_to_sock_inet128(struct sockaddr_in6 *so) -{ - token_t *t; - u_char *dptr; - - if(so == NULL) { - return NULL; - } - - GET_TOKEN_AREA(t, dptr, 21); - if(t == NULL) { - return NULL; - } - - ADD_U_CHAR(dptr, AU_SOCK_INET_128_TOKEN); - /* In Darwin, sin_family is one octet, but BSM defines the token - * to store two. So we copy in a 0 first. - */ - ADD_U_CHAR(dptr, 0); - ADD_U_CHAR(dptr, so->sin6_family); - ADD_U_INT16(dptr, so->sin6_port); - ADD_U_INT32(dptr, so->sin6_addr.__u6_addr.__u6_addr32[0]); - ADD_U_INT32(dptr, so->sin6_addr.__u6_addr.__u6_addr32[1]); - ADD_U_INT32(dptr, so->sin6_addr.__u6_addr.__u6_addr32[2]); - ADD_U_INT32(dptr, so->sin6_addr.__u6_addr.__u6_addr32[3]); - - return t; - - - -} - -/* - * token ID 1 byte - * socket family 2 bytes - * path 104 bytes - */ -token_t *au_to_sock_unix(struct sockaddr_un *so) -{ - token_t *t; - u_char *dptr; - - if(so == NULL) { - return NULL; - } - - GET_TOKEN_AREA(t, dptr, 107); - if(t == NULL) { - return NULL; - } - - ADD_U_CHAR(dptr, AU_SOCK_UNIX_TOKEN); - /* BSM token has two bytes for family */ - ADD_U_CHAR(dptr, 0); - ADD_U_CHAR(dptr, so->sun_family); - ADD_STRING(dptr, so->sun_path, strlen(so->sun_path)); - - return t; - -} - -token_t *au_to_sock_inet(struct sockaddr_in *so) -{ - return au_to_sock_inet32(so); -} - -/* - * token ID 1 byte - * audit ID 4 bytes - * effective user ID 4 bytes - * effective group ID 4 bytes - * real user ID 4 bytes - * real group ID 4 bytes - * process ID 4 bytes - * session ID 4 bytes - * terminal ID - * port ID 4 bytes/8 bytes (32-bit/64-bit value) - * machine address 4 bytes - */ -token_t *au_to_subject32(au_id_t auid, uid_t euid, gid_t egid, - uid_t ruid, gid_t rgid, pid_t pid, - au_asid_t sid, au_tid_t *tid) -{ - token_t *t; - u_char *dptr; - - if(tid == NULL) { - return NULL; - } - - GET_TOKEN_AREA(t, dptr, 37); - if(t == NULL) { - return NULL; - } - - ADD_U_CHAR(dptr, AU_SUBJECT_32_TOKEN); - ADD_U_INT32(dptr, auid); - ADD_U_INT32(dptr, euid); - ADD_U_INT32(dptr, egid); - ADD_U_INT32(dptr, ruid); - ADD_U_INT32(dptr, rgid); - ADD_U_INT32(dptr, pid); - ADD_U_INT32(dptr, sid); - ADD_U_INT32(dptr, tid->port); - ADD_U_INT32(dptr, tid->machine); - - return t; -} - -token_t *au_to_subject64( - __unused au_id_t auid, - __unused uid_t euid, - __unused gid_t egid, - __unused uid_t ruid, - __unused gid_t rgid, - __unused pid_t pid, - __unused au_asid_t sid, - __unused au_tid_t *tid) -{ - return NULL; - } - -token_t *au_to_subject(au_id_t auid, uid_t euid, gid_t egid, - uid_t ruid, gid_t rgid, pid_t pid, - au_asid_t sid, au_tid_t *tid) -{ - return au_to_subject32(auid, euid, egid, ruid, rgid, - pid, sid, tid); - -} - -/* - * token ID 1 byte - * audit ID 4 bytes - * effective user ID 4 bytes - * effective group ID 4 bytes - * real user ID 4 bytes - * real group ID 4 bytes - * process ID 4 bytes - * session ID 4 bytes - * terminal ID - * port ID 4 bytes/8 bytes (32-bit/64-bit value) - * address type/length 4 bytes - * machine address 16 bytes - */ -token_t *au_to_subject32_ex(au_id_t auid, uid_t euid, - gid_t egid, uid_t ruid, gid_t rgid, pid_t pid, - au_asid_t sid, au_tid_addr_t *tid) -{ - token_t *t; - u_char *dptr; - - if(tid == NULL) { - return NULL; - } - - GET_TOKEN_AREA(t, dptr, 53); - if(t == NULL) { - return NULL; - } - - ADD_U_CHAR(dptr, AU_SUBJECT_32_EX_TOKEN); - ADD_U_INT32(dptr, auid); - ADD_U_INT32(dptr, euid); - ADD_U_INT32(dptr, egid); - ADD_U_INT32(dptr, ruid); - ADD_U_INT32(dptr, rgid); - ADD_U_INT32(dptr, pid); - ADD_U_INT32(dptr, sid); - ADD_U_INT32(dptr, tid->at_port); - ADD_U_INT32(dptr, tid->at_type); - ADD_U_INT32(dptr, tid->at_addr[0]); - ADD_U_INT32(dptr, tid->at_addr[1]); - ADD_U_INT32(dptr, tid->at_addr[2]); - ADD_U_INT32(dptr, tid->at_addr[3]); - - return t; -} - -token_t *au_to_subject64_ex( - __unused au_id_t auid, - __unused uid_t euid, - __unused gid_t egid, - __unused uid_t ruid, - __unused gid_t rgid, - __unused pid_t pid, - __unused au_asid_t sid, - __unused au_tid_addr_t *tid) -{ - return NULL; -} - -token_t *au_to_subject_ex(au_id_t auid, uid_t euid, - gid_t egid, uid_t ruid, gid_t rgid, pid_t pid, - au_asid_t sid, au_tid_addr_t *tid) -{ - return au_to_subject32_ex(auid, euid, egid, ruid, rgid, - pid, sid, tid); - -} - -/* - * token ID 1 byte - * count 4 bytes - * text count null-terminated strings - */ -token_t *au_to_exec_args(const char **args) -{ - token_t *t; - u_char *dptr; - const char *nextarg; - int i, count = 0; - size_t totlen = 0; - - if(args == NULL) { - return NULL; - } - - nextarg = *args; - - while(nextarg != NULL) { - int nextlen; - - nextlen = strlen(nextarg); - totlen += nextlen + 1; - count++; - nextarg = *(args + count); - } - - - GET_TOKEN_AREA(t, dptr, 5 + totlen); - if(t == NULL) { - return NULL; - } - - ADD_U_CHAR(dptr, AU_EXEC_ARG_TOKEN); - ADD_U_INT32(dptr, count); - - for(i =0; i< count; i++) { - nextarg = *(args + i); - ADD_MEM(dptr, nextarg, strlen(nextarg) + 1); - } - - return t; -} - - -/* - * token ID 1 byte - * count 4 bytes - * text count null-terminated strings - */ -token_t *au_to_exec_env(const char **env) -{ - token_t *t; - u_char *dptr; - int i, count = 0; - size_t totlen = 0; - const char *nextenv; - - if(env == NULL) { - return NULL; - } - - nextenv = *env; - - while(nextenv != NULL) { - int nextlen; - - nextlen = strlen(nextenv); - totlen += nextlen + 1; - count++; - nextenv = *(env + count); - } - - - GET_TOKEN_AREA(t, dptr, 5 + totlen); - if(t == NULL) { - return NULL; - } - - ADD_U_CHAR(dptr, AU_EXEC_ENV_TOKEN); - ADD_U_INT32(dptr, count); - - for(i =0; i< count; i++) { - nextenv = *(env + i); - ADD_MEM(dptr, nextenv, strlen(nextenv) + 1); - } - - return t; -} - - -/* - * Kernel version of the BSM header token functions. These versions take - * a timespec struct as an additional parameter in order to obtain the - * create time value for the BSM audit record. - * token ID 1 byte - * record byte count 4 bytes - * version # 1 byte [2] - * event type 2 bytes - * event modifier 2 bytes - * seconds of time 4 bytes/8 bytes (32-bit/64-bit value) - * milliseconds of time 4 bytes/8 bytes (32-bit/64-bit value) - */ -token_t *kau_to_header32(const struct timespec *ctime, int rec_size, - au_event_t e_type, au_emod_t e_mod) -{ - token_t *t; - u_char *dptr; - u_int32_t timems = ctime->tv_nsec/1000000; /* We need time in ms */ - - GET_TOKEN_AREA(t, dptr, 18); - if(t == NULL) { - return NULL; - } - - ADD_U_CHAR(dptr, AU_HEADER_32_TOKEN); - ADD_U_INT32(dptr, rec_size); - ADD_U_CHAR(dptr, HEADER_VERSION); - ADD_U_INT16(dptr, e_type); - ADD_U_INT16(dptr, e_mod); - - /* Add the timestamp */ - ADD_U_INT32(dptr, ctime->tv_sec); - ADD_U_INT32(dptr, timems); - - return t; -} - -token_t *kau_to_header64( - __unused const struct timespec *ctime, - __unused int rec_size, - __unused au_event_t e_type, - __unused au_emod_t e_mod) -{ - return NULL; -} - -token_t *kau_to_header(const struct timespec *ctime, int rec_size, - au_event_t e_type, au_emod_t e_mod) -{ - return kau_to_header32(ctime, rec_size, e_type, e_mod); -} - -/* - * token ID 1 byte - * trailer magic number 2 bytes - * record byte count 4 bytes - */ -token_t *au_to_trailer(int rec_size) -{ - token_t *t; - u_char *dptr; - u_int16_t magic = TRAILER_PAD_MAGIC; - - - GET_TOKEN_AREA(t, dptr, 7); - if(t == NULL) { - return NULL; - } - - ADD_U_CHAR(dptr, AU_TRAILER_TOKEN); - ADD_U_INT16(dptr, magic); - ADD_U_INT32(dptr, rec_size); - - return t; - -} - diff --git a/bsd/kern/kern_callout.c b/bsd/kern/kern_callout.c new file mode 100644 index 000000000..58df65fa7 --- /dev/null +++ b/bsd/kern/kern_callout.c @@ -0,0 +1,232 @@ +/* + * Copyright (c) 2004-2007 Apple Inc. All rights reserved. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. The rights granted to you under the License + * may not be used to create, or enable the creation or redistribution of, + * unlawful or unlicensed copies of an Apple operating system, or to + * circumvent, violate, or enable the circumvention or violation of, any + * terms of an Apple operating system software license agreement. + * + * Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ + */ + +/* + * Kernel callout related functions, including moving average calculation + * to permit the kernel to know about insufficiently responsive user space + * processes. + */ + +#include /* memove, memset */ +#include /* uint64_t */ +#include + +/* + * kco_ma_init + * + * Initialize a moving average structure for use + * + * Parameters: map Pointer to the moving average state + * threshold Threshold % at which to trigger (>100) + * kind Kind of trigger(s) to set + * + * Returns: (void) + * + * Notes: The number of samples in a simple moving average is not + * controllable; this might be a future direction. + * + * The simple and weighted thresholds are not separately + * controllable; this might be a future direction, but + * will likely be unnecessary due to one type being in use + * at a time in the most likely scenarios. + */ +void +kco_ma_init(struct kco_moving_average *map, int32_t threshold, int kind) +{ + memset(map, 0, sizeof(*map)); + + /* per algorithm init required */ + map->ma_flags |= KCO_MA_F_NEEDS_INIT; + + /* set algorithm selector flags */ + map->ma_flags |= kind; + + /* set thresholds */ + map->ma_sma_threshold = threshold; + map->ma_wma_threshold = threshold; +} + + +/* + * kco_ma_info + * + * Report on the current moving average information; this is typically only + * called after a trigger event. + * + * Parameters: map Pointer to the moving average state + * kind Kind of trigger to report on + * averagep Pointer to area to receive current + * old_averagep Pointer to area to receive previous + * thresholdp Pointer to area to receive threshold + * + * Returns: 0 Information not available + * 1 Information retrieved + * + * Notes: You can only retrieve one kind of average information at a + * time; if you are collecting multiple types, then you must + * call this function one time for each type you are interested + * in obtaining. + */ +int +kco_ma_info(struct kco_moving_average *map, int kind, uint64_t *averagep, uint64_t *old_averagep, int32_t *thresholdp, int *countp) +{ + uint64_t average; + uint64_t old_average; + int32_t threshold; + int count; + + /* Not collecting this type of data or no data yet*/ + if (!(map->ma_flags & kind) || (map->ma_flags & KCO_MA_F_NEEDS_INIT)) + return(0); + + switch(kind) { + case KCO_MA_F_SMA: + average = map->ma_sma; + old_average = map->ma_old_sma; + threshold = map->ma_sma_threshold; + count = map->ma_sma_trigger_count; + break; + + case KCO_MA_F_WMA: + average = map->ma_wma; + old_average = map->ma_old_wma; + threshold = map->ma_wma_threshold; + count = map->ma_wma_trigger_count; + break; + + default: + /* + * Asking for data we don't have or more than one kind of + * data at the same time. + */ + return(0); + } + + if (averagep != NULL) + *averagep = average; + if (old_averagep != NULL) + *old_averagep = old_average; + if (thresholdp != NULL) + *thresholdp = threshold; + if (countp != NULL) + *countp = count; + + return(1); +} + + +/* + * kco_ma_addsample + * + * Accumulate a sample into a moving average + * + * Parameters: map Pointer to the moving average state + * sample_time latency delta time + * + * Returns: 0 Nothing triggered + * !0 Bitmap of KCO_MA_F_* flags for the + * algorithms which triggered + * + * Notes: Add a delta time sample to the moving average; this function + * will return bits for each algorithm which went over its + * trigger threshold as a result of receiving the sample. + * Callers can then log/complain/panic over the unresponsive + * process to which they are calling out. + */ +int +kco_ma_addsample(struct kco_moving_average *map, uint64_t sample_time) +{ + int triggered = 0; + int do_init = (map->ma_flags & KCO_MA_F_NEEDS_INIT); + + /* + * SIMPLE MOVING AVERAGE + * + * Compute simple moving average over MA_SMA_SAMPLES; incremental is + * cheaper than re-sum. + */ + if (map->ma_flags & KCO_MA_F_SMA) { + map->ma_old_sma = map->ma_sma; + + map->ma_sma = ((map->ma_sma * MA_SMA_SAMPLES) - map->ma_sma_samples[0] + sample_time) / MA_SMA_SAMPLES; + memmove(&map->ma_sma_samples[1], &map->ma_sma_samples[0], sizeof(map->ma_sma_samples[0]) *(MA_SMA_SAMPLES - 1)); + map->ma_sma_samples[0] = sample_time; + /* + * Check if percentage change exceeds the allowed trigger + * threshold; this will only happen if the sample time + * increases more than an acceptable amount; decreases will + * not cause a trigger (but will decrease the overall average, + * which can cause a trigger the next time). + * + * Note: We don't start triggering on the simple moving + * average until after we have enough samples for + * the delta to be statistically valid; this is + * defined to be MA_SMA_SAMPLES. + */ + if (map->ma_sma_samples[MA_SMA_SAMPLES-1] && ((int)((map->ma_sma * 100) / map->ma_old_sma)) > map->ma_sma_threshold) { + triggered |= KCO_MA_F_SMA; + map->ma_sma_trigger_count++; + } + } + + /* + * WEIGHTED MOVING AVERAGE + * + * Compute the weighted moving average. Do this by averaging over + * two values, one with a lesser weighting than the other; the lesser + * weighted value is the persistent historical value, whose sample + * weight decreases over time, the older the samples get. Be careful + * here to permit strict integer artimatic. + */ + if (map->ma_flags & KCO_MA_F_WMA) { + map->ma_old_wma = map->ma_wma; + + /* Prime the pump, if necessary */ + if (do_init) + map->ma_old_wma = sample_time; + + map->ma_wma = ((((map->ma_wma * 90) + sample_time * ((100*2) - 90))/100) / 2); + + /* + * Check if percentage change exceeds the allowed trigger + * threshold; this will only happen if the sample time + * increases more than an acceptable amount; decreases will + * not cause a trigger (but will decrease the overall average, + * which can cause a trigger the next time). + */ + if (((int)(((map->ma_wma * 100) / map->ma_old_wma))) > map->ma_wma_threshold) { + triggered |= KCO_MA_F_WMA; + map->ma_wma_trigger_count++; + } + } + + if (do_init) + map->ma_flags &= ~KCO_MA_F_NEEDS_INIT; + + return (triggered); +} diff --git a/bsd/kern/kern_clock.c b/bsd/kern/kern_clock.c index 432a0f0e8..0cbd41e1b 100644 --- a/bsd/kern/kern_clock.c +++ b/bsd/kern/kern_clock.c @@ -314,7 +314,7 @@ void startprofclock(struct proc *p) { if ((p->p_flag & P_PROFIL) == 0) - OSBitOrAtomic(P_PROFIL, (UInt32 *)&p->p_flag); + OSBitOrAtomic(P_PROFIL, &p->p_flag); } /* @@ -324,7 +324,7 @@ void stopprofclock(struct proc *p) { if (p->p_flag & P_PROFIL) - OSBitAndAtomic(~((uint32_t)P_PROFIL), (UInt32 *)&p->p_flag); + OSBitAndAtomic(~((uint32_t)P_PROFIL), &p->p_flag); } /* TBD locking user profiling is not resolved yet */ diff --git a/bsd/kern/kern_control.c b/bsd/kern/kern_control.c index 877fe6778..a76088eb4 100644 --- a/bsd/kern/kern_control.c +++ b/bsd/kern/kern_control.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 1999-2006 Apple Computer, Inc. All rights reserved. + * Copyright (c) 1999-2008 Apple Computer, Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -52,7 +52,6 @@ #include #include -#include #include @@ -93,10 +92,10 @@ static struct kctl *ctl_find_by_name(const char *); static struct kctl *ctl_find_by_id_unit(u_int32_t id, u_int32_t unit); static struct ctl_cb *kcb_find(struct kctl *, u_int32_t unit); -static void ctl_post_msg(u_long event_code, u_int32_t id); +static void ctl_post_msg(u_int32_t event_code, u_int32_t id); -static int ctl_lock(struct socket *, int, int); -static int ctl_unlock(struct socket *, int, int); +static int ctl_lock(struct socket *, int, void *); +static int ctl_unlock(struct socket *, int, void *); static lck_mtx_t * ctl_getlock(struct socket *, int); static struct pr_usrreqs ctl_usrreqs = @@ -509,7 +508,7 @@ ctl_enqueuedata(void *kctlref, u_int32_t unit, void *data, size_t len, u_int32_t return EINVAL; socket_lock(so, 1); - if (sbspace(&so->so_rcv) < (long)len) { + if (sbspace(&so->so_rcv) < (int)len) { error = ENOBUFS; goto bye; } @@ -896,7 +895,7 @@ kcb_find(struct kctl *kctl, u_int32_t unit) * Must be called witout lock */ static void -ctl_post_msg(u_long event_code, u_int32_t id) +ctl_post_msg(u_int32_t event_code, u_int32_t id) { struct ctl_event_data ctl_ev_data; struct kev_msg ev_msg; @@ -919,24 +918,29 @@ ctl_post_msg(u_long event_code, u_int32_t id) } static int -ctl_lock(struct socket *so, int refcount, int lr) - { - uint32_t lr_saved; - if (lr == 0) - lr_saved = (unsigned int) __builtin_return_address(0); - else lr_saved = lr; - - if (so->so_pcb) { +ctl_lock(struct socket *so, int refcount, void *lr) +{ + void *lr_saved; + + if (lr == NULL) + lr_saved = __builtin_return_address(0); + else + lr_saved = lr; + + if (so->so_pcb != NULL) { lck_mtx_lock(((struct ctl_cb *)so->so_pcb)->mtx); } else { - panic("ctl_lock: so=%p NO PCB! lr=%x\n", so, lr_saved); - lck_mtx_lock(so->so_proto->pr_domain->dom_mtx); + panic("ctl_lock: so=%p NO PCB! lr=%p lrh= %s\n", + so, lr_saved, solockhistory_nr(so)); + /* NOTREACHED */ } - - if (so->so_usecount < 0) - panic("ctl_lock: so=%p so_pcb=%p lr=%x ref=%x\n", - so, so->so_pcb, lr_saved, so->so_usecount); - + + if (so->so_usecount < 0) { + panic("ctl_lock: so=%p so_pcb=%p lr=%p ref=%x lrh= %s\n", + so, so->so_pcb, lr_saved, so->so_usecount, solockhistory_nr(so)); + /* NOTREACHED */ + } + if (refcount) so->so_usecount++; @@ -946,38 +950,44 @@ ctl_lock(struct socket *so, int refcount, int lr) } static int -ctl_unlock(struct socket *so, int refcount, int lr) +ctl_unlock(struct socket *so, int refcount, void *lr) { - uint32_t lr_saved; - lck_mtx_t * mutex_held; - - if (lr == 0) - lr_saved = (unsigned int) __builtin_return_address(0); - else lr_saved = lr; - + void *lr_saved; + lck_mtx_t *mutex_held; + + if (lr == NULL) + lr_saved = __builtin_return_address(0); + else + lr_saved = lr; + #ifdef MORE_KCTLLOCK_DEBUG - printf("ctl_unlock: so=%x sopcb=%x lock=%x ref=%x lr=%x\n", - so, so->so_pcb, ((struct ctl_cb *)so->so_pcb)->mtx, so->so_usecount, lr_saved); + printf("ctl_unlock: so=%x sopcb=%x lock=%x ref=%x lr=%p\n", + so, so->so_pcb, ((struct ctl_cb *)so->so_pcb)->mtx, + so->so_usecount, lr_saved); #endif if (refcount) so->so_usecount--; - - if (so->so_usecount < 0) - panic("ctl_unlock: so=%p usecount=%x\n", so, so->so_usecount); + + if (so->so_usecount < 0) { + panic("ctl_unlock: so=%p usecount=%x lrh= %s\n", + so, so->so_usecount, solockhistory_nr(so)); + /* NOTREACHED */ + } if (so->so_pcb == NULL) { - panic("ctl_unlock: so=%p NO PCB usecount=%x lr=%x\n", so, so->so_usecount, lr_saved); - mutex_held = so->so_proto->pr_domain->dom_mtx; - } else { - mutex_held = ((struct ctl_cb *)so->so_pcb)->mtx; + panic("ctl_unlock: so=%p NO PCB usecount=%x lr=%p lrh= %s\n", + so, so->so_usecount, (void *)lr_saved, solockhistory_nr(so)); + /* NOTREACHED */ } + mutex_held = ((struct ctl_cb *)so->so_pcb)->mtx; + lck_mtx_assert(mutex_held, LCK_MTX_ASSERT_OWNED); so->unlock_lr[so->next_unlock_lr] = lr_saved; so->next_unlock_lr = (so->next_unlock_lr+1) % SO_LCKDBG_MAX; lck_mtx_unlock(mutex_held); - + if (so->so_usecount == 0) ctl_sofreelastref(so); - + return (0); } @@ -988,10 +998,12 @@ ctl_getlock(struct socket *so, __unused int locktype) if (so->so_pcb) { if (so->so_usecount < 0) - panic("ctl_getlock: so=%p usecount=%x\n", so, so->so_usecount); + panic("ctl_getlock: so=%p usecount=%x lrh= %s\n", + so, so->so_usecount, solockhistory_nr(so)); return(kcb->mtx); } else { - panic("ctl_getlock: so=%p NULL so_pcb\n", so); + panic("ctl_getlock: so=%p NULL NO so_pcb %s\n", + so, solockhistory_nr(so)); return (so->so_proto->pr_domain->dom_mtx); } } diff --git a/bsd/kern/kern_core.c b/bsd/kern/kern_core.c index 70fb531b3..52c0a3095 100644 --- a/bsd/kern/kern_core.c +++ b/bsd/kern/kern_core.c @@ -63,6 +63,8 @@ #include /* task_suspend() */ #include /* get_task_numacts() */ +#include + typedef struct { int flavor; /* the number for this flavor */ mach_msg_type_number_t count; /* count of ints in this flavor */ @@ -85,7 +87,7 @@ mythread_state_flavor_t thread_flavor_array[]={ {PPC_VECTOR_STATE, PPC_VECTOR_STATE_COUNT} }; -#elif defined (__i386__) +#elif defined (__i386__) || defined (__x86_64__) mythread_state_flavor_t thread_flavor_array [] = { {x86_THREAD_STATE, x86_THREAD_STATE_COUNT}, {x86_FLOAT_STATE, x86_FLOAT_STATE_COUNT}, @@ -113,6 +115,8 @@ kern_return_t thread_getstatus(register thread_t act, int flavor, thread_state_t tstate, mach_msg_type_number_t *count); void task_act_iterate_wth_args(task_t, void(*)(thread_t, void *), void *); +static cpu_type_t process_cpu_type(proc_t proc); +static cpu_type_t process_cpu_subtype(proc_t proc); #ifdef SECURE_KERNEL __private_extern__ int do_coredump = 0; /* default: don't dump cores */ @@ -121,6 +125,44 @@ __private_extern__ int do_coredump = 1; /* default: dump cores */ #endif __private_extern__ int sugid_coredump = 0; /* default: but not SGUID binaries */ + +/* cpu_type returns only the most generic indication of the current CPU. */ +/* in a core we want to know the kind of process. */ + +static cpu_type_t +process_cpu_type(proc_t core_proc) +{ + cpu_type_t what_we_think; +#if defined (__i386__) || defined (__x86_64__) + if (IS_64BIT_PROCESS(core_proc)) { + what_we_think = CPU_TYPE_X86_64; + } else { + what_we_think = CPU_TYPE_I386; + } +#elif defined (__ppc__) + #pragma unused(core_proc) + what_we_think = CPU_TYPE_POWERPC; +#endif + return what_we_think; +} + +static cpu_type_t +process_cpu_subtype(proc_t core_proc) +{ + cpu_type_t what_we_think; +#if defined (__i386__) || defined (__x86_64__) + if (IS_64BIT_PROCESS(core_proc)) { + what_we_think = CPU_SUBTYPE_X86_64_ALL; + } else { + what_we_think = CPU_SUBTYPE_I386_ALL; + } +#elif defined (__ppc__) + #pragma unused(core_proc) + what_we_think = CPU_SUBTYPE_POWERPC_ALL; +#endif + return what_we_think; +} + void collectth_state(thread_t th_act, void *tirp) { @@ -221,7 +263,10 @@ coredump(proc_t core_proc) ( (sugid_coredump == 0) && /* Not dumping SUID/SGID binaries */ ( (cred->cr_svuid != cred->cr_ruid) || (cred->cr_svgid != cred->cr_rgid)))) { - + +#if CONFIG_AUDIT + audit_proc_coredump(core_proc, NULL, EFAULT); +#endif return (EFAULT); } @@ -241,7 +286,8 @@ coredump(proc_t core_proc) /* create name according to sysctl'able format string */ /* if name creation fails, fall back to historical behaviour... */ - if (proc_core_name(core_proc->p_comm, kauth_cred_getuid(cred), + if (alloced_name == NULL || + proc_core_name(core_proc->p_comm, kauth_cred_getuid(cred), core_proc->p_pid, alloced_name, MAXPATHLEN)) { snprintf(stack_name, sizeof(stack_name), "/cores/core.%d", core_proc->p_pid); @@ -295,9 +341,10 @@ coredump(proc_t core_proc) header_size = command_size + mach_header_sz; - (void) kmem_alloc(kernel_map, - (vm_offset_t *)&header, - (vm_size_t)header_size); + if (kmem_alloc(kernel_map, &header, (vm_size_t)header_size) != KERN_SUCCESS) { + error = ENOMEM; + goto out; + } /* * Set up Mach-O header. @@ -305,8 +352,8 @@ coredump(proc_t core_proc) if (is_64) { mh64 = (struct mach_header_64 *)header; mh64->magic = MH_MAGIC_64; - mh64->cputype = cpu_type(); - mh64->cpusubtype = cpu_subtype(); + mh64->cputype = process_cpu_type(core_proc); + mh64->cpusubtype = process_cpu_subtype(core_proc); mh64->filetype = MH_CORE; mh64->ncmds = segment_count + thread_count; mh64->sizeofcmds = command_size; @@ -314,8 +361,8 @@ coredump(proc_t core_proc) } else { mh = (struct mach_header *)header; mh->magic = MH_MAGIC; - mh->cputype = cpu_type(); - mh->cpusubtype = cpu_subtype(); + mh->cputype = process_cpu_type(core_proc); + mh->cpusubtype = process_cpu_subtype(core_proc); mh->filetype = MH_CORE; mh->ncmds = segment_count + thread_count; mh->sizeofcmds = command_size; @@ -391,10 +438,10 @@ coredump(proc_t core_proc) sc->cmdsize = sizeof(struct segment_command); /* segment name is zeroed by kmem_alloc */ sc->segname[0] = 0; - sc->vmaddr = CAST_DOWN(vm_offset_t,vmoffset); - sc->vmsize = CAST_DOWN(vm_size_t,vmsize); - sc->fileoff = CAST_DOWN(uint32_t,foffset); - sc->filesize = CAST_DOWN(uint32_t,vmsize); + sc->vmaddr = CAST_DOWN_EXPLICIT(vm_offset_t,vmoffset); + sc->vmsize = CAST_DOWN_EXPLICIT(vm_size_t,vmsize); + sc->fileoff = CAST_DOWN_EXPLICIT(uint32_t,foffset); /* will never truncate */ + sc->filesize = CAST_DOWN_EXPLICIT(uint32_t,vmsize); /* will never truncate */ sc->maxprot = maxprot; sc->initprot = prot; sc->nsects = 0; @@ -416,21 +463,11 @@ coredump(proc_t core_proc) if ((maxprot & VM_PROT_READ) == VM_PROT_READ && vbr.user_tag != VM_MEMORY_IOKIT && coredumpok(map,vmoffset)) { - vm_map_size_t tmp_vmsize = vmsize; - off_t xfer_foffset = foffset; - - //LP64todo - works around vn_rdwr_64() 2G limit - while (tmp_vmsize > 0) { - vm_map_size_t xfer_vmsize = tmp_vmsize; - if (xfer_vmsize > INT_MAX) - xfer_vmsize = INT_MAX; - error = vn_rdwr_64(UIO_WRITE, vp, - vmoffset, xfer_vmsize, xfer_foffset, + + error = vn_rdwr_64(UIO_WRITE, vp, vmoffset, vmsize, foffset, (IS_64BIT_PROCESS(core_proc) ? UIO_USERSPACE64 : UIO_USERSPACE32), - IO_NODELOCKED|IO_UNIT, cred, (int *) 0, core_proc); - tmp_vmsize -= xfer_vmsize; - xfer_foffset += xfer_vmsize; - } + IO_NOCACHE|IO_NODELOCKED|IO_UNIT, cred, (int64_t *) 0, core_proc); + } hoffset += segment_command_sz; @@ -465,11 +502,14 @@ coredump(proc_t core_proc) * file. OK to use a 32 bit write for this. */ error = vn_rdwr(UIO_WRITE, vp, (caddr_t)header, header_size, (off_t)0, - UIO_SYSSPACE32, IO_NODELOCKED|IO_UNIT, cred, (int *) 0, core_proc); + UIO_SYSSPACE, IO_NOCACHE|IO_NODELOCKED|IO_UNIT, cred, (int *) 0, core_proc); kmem_free(kernel_map, header, header_size); out: error1 = vnode_close(vp, FWRITE, ctx); out2: +#if CONFIG_AUDIT + audit_proc_coredump(core_proc, name, error); +#endif if (alloced_name != NULL) FREE(alloced_name, M_TEMP); if (error == 0) diff --git a/bsd/kern/kern_credential.c b/bsd/kern/kern_credential.c index c7b4ca7a8..4b5ba8450 100644 --- a/bsd/kern/kern_credential.c +++ b/bsd/kern/kern_credential.c @@ -37,7 +37,6 @@ * and identity information. */ - #include /* XXX trim includes */ #include #include @@ -50,13 +49,19 @@ #include #include -#include +#include #include #include +#include #include #include +/* mach_absolute_time() */ +#include +#include +#include + #include #include @@ -72,6 +77,8 @@ #include #endif +void mach_kauth_cred_uthread_update( void ); + #define CRED_DIAGNOSTIC 0 # define NULLCRED_CHECK(_c) do {if (!IS_VALID_CRED(_c)) panic("%s: bad credential %p", __FUNCTION__,_c);} while(0) @@ -143,6 +150,7 @@ struct kauth_resolver_work { TAILQ_ENTRY(kauth_resolver_work) kr_link; struct kauth_identity_extlookup kr_work; uint32_t kr_seqno; + uint64_t kr_subtime; /* submission time */ int kr_refs; int kr_flags; #define KAUTH_REQUEST_UNSUBMITTED (1<<0) @@ -167,6 +175,9 @@ static int kauth_cred_table_size = 0; TAILQ_HEAD(kauth_cred_entry_head, ucred); static struct kauth_cred_entry_head * kauth_cred_table_anchor = NULL; +/* Weighted moving average for resolver response time */ +static struct kco_moving_average resolver_ma; + #define KAUTH_CRED_HASH_DEBUG 0 static int kauth_cred_add(kauth_cred_t new_cred); @@ -218,6 +229,11 @@ kauth_resolver_init(void) TAILQ_INIT(&kauth_resolver_done); kauth_resolver_sequence = 31337; kauth_resolver_mtx = lck_mtx_alloc_init(kauth_lck_grp, 0/*LCK_ATTR_NULL*/); + + /* + * 110% of average response time is "too long" and should be reported + */ + kco_ma_init(&resolver_ma, 110, KCO_MA_F_WMA); } @@ -248,6 +264,7 @@ kauth_resolver_submit(struct kauth_identity_extlookup *lkp) struct kauth_resolver_work *workp, *killp; struct timespec ts; int error, shouldfree; + uint64_t duration; /* no point actually blocking if the resolver isn't up yet */ if (kauth_resolver_identity == 0) { @@ -310,6 +327,26 @@ kauth_resolver_submit(struct kauth_identity_extlookup *lkp) if (error != 0) break; } + + /* + * Update the moving average of how long it took; if it took longer + * than the time threshold, then we complain about it being slow. + */ + duration = mach_absolute_time() - workp->kr_subtime; + if (kco_ma_addsample(&resolver_ma, duration)) { + uint64_t average; + uint64_t old_average; + int32_t threshold; + int count; + + /* If we can't get information, don't log anything */ + if (kco_ma_info(&resolver_ma, KCO_MA_F_WMA, &average, &old_average, &threshold, &count)) { + char pname[MAXCOMLEN+1] = "(NULL)"; + proc_name(kauth_resolver_identity, pname, sizeof(pname)); + // printf("kauth_resolver_submit: External resolver pid %d (name %s) response time %lld, average %lld new %lld threshold %d%% actual %d%% count %d\n", kauth_resolver_identity, pname, duration, old_average, average, threshold, (int)((duration * 100) / old_average), count); + } + } + /* if the request was processed, copy the result */ if (error == 0) *lkp = workp->kr_work; @@ -389,7 +426,7 @@ kauth_resolver_submit(struct kauth_identity_extlookup *lkp) * for the next request. */ int -identitysvc(__unused struct proc *p, struct identitysvc_args *uap, __unused register_t *retval) +identitysvc(__unused struct proc *p, struct identitysvc_args *uap, __unused int32_t *retval) { int opcode = uap->opcode; user_addr_t message = uap->message; @@ -433,6 +470,24 @@ identitysvc(__unused struct proc *p, struct identitysvc_args *uap, __unused regi KAUTH_DEBUG("RESOLVER - call from bogus resolver %d\n", current_proc()->p_pid); return(EPERM); } + + if (opcode == KAUTH_EXTLOOKUP_DEREGISTER) { + /* + * Terminate outstanding requests; without an authoritative + * resolver, we are now back on our own authority. + */ + struct kauth_resolver_work *killp; + + KAUTH_RESOLVER_LOCK(); + kauth_resolver_identity = 0; + TAILQ_FOREACH(killp, &kauth_resolver_submitted, kr_link) + wakeup(killp); + TAILQ_FOREACH(killp, &kauth_resolver_unsubmitted, kr_link) + wakeup(killp); + /* Cause all waiting-for-work threads to return EIO */ + wakeup((caddr_t)&kauth_resolver_unsubmitted); + KAUTH_RESOLVER_UNLOCK(); + } /* * Got a result returning? @@ -491,6 +546,12 @@ kauth_resolver_getwork_continue(int result) int error; error = msleep0(&kauth_resolver_unsubmitted, kauth_resolver_mtx, PCATCH, "GRGetWork", 0, kauth_resolver_getwork_continue); + /* + * If this is a wakeup from another thread in the resolver + * deregistering it, error out the request-for-work thread + */ + if (!kauth_resolver_identity) + error = EIO; KAUTH_RESOLVER_UNLOCK(); return(error); } @@ -541,6 +602,7 @@ kauth_resolver_getwork2(user_addr_t message) TAILQ_REMOVE(&kauth_resolver_unsubmitted, workp, kr_link); workp->kr_flags &= ~KAUTH_REQUEST_UNSUBMITTED; workp->kr_flags |= KAUTH_REQUEST_SUBMITTED; + workp->kr_subtime = mach_absolute_time(); TAILQ_INSERT_TAIL(&kauth_resolver_submitted, workp, kr_link); out: @@ -583,6 +645,12 @@ kauth_resolver_getwork(user_addr_t message) ut->uu_kauth.message = message; error = msleep0(&kauth_resolver_unsubmitted, kauth_resolver_mtx, PCATCH, "GRGetWork", 0, kauth_resolver_getwork_continue); KAUTH_RESOLVER_UNLOCK(); + /* + * If this is a wakeup from another thread in the resolver + * deregistering it, error out the request-for-work thread + */ + if (!kauth_resolver_identity) + error = EIO; return(error); } return kauth_resolver_getwork2(message); @@ -605,6 +673,7 @@ kauth_resolver_complete(user_addr_t message) { struct kauth_identity_extlookup extl; struct kauth_resolver_work *workp; + struct kauth_resolver_work *killp; int error, result; if ((error = copyin(message, &extl, sizeof(extl))) != 0) { @@ -628,24 +697,38 @@ kauth_resolver_complete(user_addr_t message) } } /* FALLTHROUGH */ + case KAUTH_EXTLOOKUP_SUCCESS: break; case KAUTH_EXTLOOKUP_FATAL: /* fatal error means the resolver is dead */ KAUTH_DEBUG("RESOLVER - resolver %d died, waiting for a new one", kauth_resolver_identity); + /* + * Terminate outstanding requests; without an authoritative + * resolver, we are now back on our own authority. + */ kauth_resolver_identity = 0; - /* XXX should we terminate all outstanding requests? */ + TAILQ_FOREACH(killp, &kauth_resolver_submitted, kr_link) + wakeup(killp); + TAILQ_FOREACH(killp, &kauth_resolver_unsubmitted, kr_link) + wakeup(killp); + /* Cause all waiting-for-work threads to return EIO */ + wakeup((caddr_t)&kauth_resolver_unsubmitted); + /* and return EIO to the caller */ error = EIO; break; + case KAUTH_EXTLOOKUP_BADRQ: KAUTH_DEBUG("RESOLVER - resolver reported invalid request %d", extl.el_seqno); result = EINVAL; break; + case KAUTH_EXTLOOKUP_FAILURE: KAUTH_DEBUG("RESOLVER - resolver reported transient failure for request %d", extl.el_seqno); result = EIO; break; + default: KAUTH_DEBUG("RESOLVER - resolver returned unexpected status %d", extl.el_result); result = EIO; @@ -1772,6 +1855,8 @@ kauth_cred_cache_lookup(int from, int to, void *src, void *dst) * * Note: We ask for as much data as we can get. */ + bzero(&el, sizeof(el)); + el.el_info_pid = current_proc()->p_pid; switch(from) { case KI_VALID_UID: el.el_flags = KAUTH_EXTLOOKUP_VALID_UID; @@ -2130,6 +2215,8 @@ kauth_cred_ismember_gid(kauth_cred_t cred, gid_t gid, int *resultp) return(0); /* nothing in the cache, need to go to userland */ + bzero(&el, sizeof(el)); + el.el_info_pid = current_proc()->p_pid; el.el_flags = KAUTH_EXTLOOKUP_VALID_UID | KAUTH_EXTLOOKUP_VALID_GID | KAUTH_EXTLOOKUP_WANT_MEMBERSHIP; el.el_uid = cred->cr_gmuid; el.el_gid = gid; @@ -2177,6 +2264,7 @@ kauth_cred_ismember_gid(kauth_cred_t cred, gid_t gid, int *resultp) int kauth_cred_ismember_guid(kauth_cred_t cred, guid_t *guidp, int *resultp) { + struct kauth_identity ki; gid_t gid; int error, wkg; @@ -2190,7 +2278,40 @@ kauth_cred_ismember_guid(kauth_cred_t cred, guid_t *guidp, int *resultp) *resultp = 1; break; default: - /* translate guid to gid */ +#if 6603280 + /* + * Grovel the identity cache looking for this GUID. + * If we find it, and it is for a user record, return + * false because it's not a group. + * + * This is necessary because we don't have -ve caching + * of group memberships, and we really want to avoid + * calling out to the resolver if at all possible. + * + * Because we're called by the ACL evaluator, and the + * ACL evaluator is likely to encounter ACEs for users, + * this is expected to be a common case. + */ + ki.ki_valid = 0; + if ((error = kauth_identity_find_guid(guidp, &ki)) == 0 && + !kauth_identity_guid_expired(&ki)) { + if (ki.ki_valid & KI_VALID_GID) { + /* It's a group after all... */ + gid = ki.ki_gid; + goto do_check; + } + if (ki.ki_valid & KI_VALID_UID) { + *resultp = 0; + return (0); + } + } +#endif /* 6603280 */ + /* + * Attempt to translate the GUID to a GID. Even if + * this fails, we will have primed the cache if it is + * a user record and we'll see it above the next time + * we're asked. + */ if ((error = kauth_cred_guid2gid(guidp, &gid)) != 0) { /* * If we have no guid -> gid translation, it's not a group and @@ -2201,6 +2322,7 @@ kauth_cred_ismember_guid(kauth_cred_t cred, guid_t *guidp, int *resultp) error = 0; } } else { + do_check: error = kauth_cred_ismember_gid(cred, gid, resultp); } } @@ -2305,7 +2427,7 @@ static lck_mtx_t *kauth_cred_hash_mtx; #define KAUTH_CRED_HASH_LOCK() lck_mtx_lock(kauth_cred_hash_mtx); #define KAUTH_CRED_HASH_UNLOCK() lck_mtx_unlock(kauth_cred_hash_mtx); #if KAUTH_CRED_HASH_DEBUG -#define KAUTH_CRED_HASH_LOCK_ASSERT() _mutex_assert(kauth_cred_hash_mtx, MA_OWNED) +#define KAUTH_CRED_HASH_LOCK_ASSERT() lck_mtx_assert(kauth_cred_hash_mtx, LCK_MTX_ASSERT_OWNED) #else /* !KAUTH_CRED_HASH_DEBUG */ #define KAUTH_CRED_HASH_LOCK_ASSERT() #endif /* !KAUTH_CRED_HASH_DEBUG */ @@ -2477,6 +2599,17 @@ kauth_cred_get(void) return(uthread->uu_ucred); } +void +mach_kauth_cred_uthread_update(void) +{ + uthread_t uthread; + proc_t proc; + + uthread = get_bsdthread_info(current_thread()); + proc = current_proc(); + + kauth_cred_uthread_update(uthread, proc); +} /* * kauth_cred_uthread_update @@ -2653,6 +2786,9 @@ kauth_cred_alloc(void) if (newcred != 0) { bzero(newcred, sizeof(*newcred)); newcred->cr_ref = 1; + newcred->cr_audit.as_aia_p = &audit_default_aia; + /* XXX the following will go away with cr_au */ + newcred->cr_au.ai_auid = AU_DEFAUDITID; /* must do this, or cred has same group membership as uid 0 */ newcred->cr_gmuid = KAUTH_UID_NONE; #if CRED_DIAGNOSTIC @@ -2744,6 +2880,13 @@ kauth_cred_create(kauth_cred_t cred) new_cred->cr_gmuid = cred->cr_gmuid; new_cred->cr_ngroups = cred->cr_ngroups; bcopy(&cred->cr_groups[0], &new_cred->cr_groups[0], sizeof(new_cred->cr_groups)); +#if CONFIG_AUDIT + bcopy(&cred->cr_audit, &new_cred->cr_audit, + sizeof(new_cred->cr_audit)); + /* XXX the following bcopy() will go away with cr_au */ + bcopy(&cred->cr_au, &new_cred->cr_au, + sizeof(new_cred->cr_au)); +#endif new_cred->cr_flags = cred->cr_flags; KAUTH_CRED_HASH_LOCK(); @@ -2756,6 +2899,8 @@ kauth_cred_create(kauth_cred_t cred) #if CONFIG_MACF mac_cred_label_destroy(new_cred); #endif + AUDIT_SESSION_UNREF(new_cred); + FREE_ZONE(new_cred, sizeof(*new_cred), M_CRED); new_cred = NULL; } @@ -3067,6 +3212,7 @@ kauth_cred_setuidgid(kauth_cred_t cred, uid_t uid, gid_t gid) temp_cred.cr_uid = uid; temp_cred.cr_ruid = uid; temp_cred.cr_svuid = uid; + temp_cred.cr_flags = cred->cr_flags; /* inherit the opt-out of memberd */ if (cred->cr_flags & CRF_NOMEMBERD) { temp_cred.cr_gmuid = KAUTH_UID_NONE; @@ -3148,7 +3294,7 @@ kauth_cred_setsvuidgid(kauth_cred_t cred, uid_t uid, gid_t gid) /* * kauth_cred_setauditinfo * - * Description: Update the given credential using the given auditinfo_t. + * Description: Update the given credential using the given au_session_t. * * Parameters: cred The original credential * auditinfo_p Pointer to ne audit information @@ -3168,7 +3314,7 @@ kauth_cred_setsvuidgid(kauth_cred_t cred, uid_t uid, gid_t gid) * persistent reference. */ kauth_cred_t -kauth_cred_setauditinfo(kauth_cred_t cred, auditinfo_t *auditinfo_p) +kauth_cred_setauditinfo(kauth_cred_t cred, au_session_t *auditinfo_p) { struct ucred temp_cred; @@ -3178,13 +3324,25 @@ kauth_cred_setauditinfo(kauth_cred_t cred, auditinfo_t *auditinfo_p) * We don't need to do anything if the audit info is already the * same as the audit info in the credential provided. */ - if (bcmp(&cred->cr_au, auditinfo_p, sizeof(cred->cr_au)) == 0) { + if (bcmp(&cred->cr_audit, auditinfo_p, sizeof(cred->cr_audit)) == 0) { /* no change needed */ return(cred); } bcopy(cred, &temp_cred, sizeof(temp_cred)); - bcopy(auditinfo_p, &temp_cred.cr_au, sizeof(temp_cred.cr_au)); + bcopy(auditinfo_p, &temp_cred.cr_audit, sizeof(temp_cred.cr_audit)); + /* XXX the following will go away with cr_au */ + temp_cred.cr_au.ai_auid = auditinfo_p->as_aia_p->ai_auid; + temp_cred.cr_au.ai_mask.am_success = + auditinfo_p->as_mask.am_success; + temp_cred.cr_au.ai_mask.am_failure = + auditinfo_p->as_mask.am_failure; + temp_cred.cr_au.ai_termid.port = + auditinfo_p->as_aia_p->ai_termid.at_port; + temp_cred.cr_au.ai_termid.machine = + auditinfo_p->as_aia_p->ai_termid.at_addr[0]; + temp_cred.cr_au.ai_asid = auditinfo_p->as_aia_p->ai_asid; + /* XXX */ return(kauth_cred_update(cred, &temp_cred, FALSE)); } @@ -3504,8 +3662,7 @@ kauth_cred_ref(kauth_cred_t cred) NULLCRED_CHECK(cred); - // XXX SInt32 not safe for an LP64 kernel - old_value = OSAddAtomic(1, (SInt32 *)&cred->cr_ref); + old_value = OSAddAtomicLong(1, (long*)&cred->cr_ref); if (old_value < 1) panic("kauth_cred_ref: trying to take a reference on a cred with no references"); @@ -3556,8 +3713,7 @@ kauth_cred_unref_hashlocked(kauth_cred_t *credp) KAUTH_CRED_HASH_LOCK_ASSERT(); NULLCRED_CHECK(*credp); - // XXX SInt32 not safe for an LP64 kernel - old_value = OSAddAtomic(-1, (SInt32 *)&(*credp)->cr_ref); + old_value = OSAddAtomicLong(-1, (long*)&(*credp)->cr_ref); #if DIAGNOSTIC if (old_value == 0) @@ -3611,6 +3767,7 @@ kauth_cred_unref(kauth_cred_t *credp) } +#ifndef __LP64__ /* * kauth_cred_rele * @@ -3631,6 +3788,7 @@ kauth_cred_rele(kauth_cred_t cred) { kauth_cred_unref(&cred); } +#endif /* !__LP64__ */ /* @@ -3694,6 +3852,7 @@ kauth_cred_dup(kauth_cred_t cred) newcred->cr_label = temp_label; mac_cred_label_associate(cred, newcred); #endif + AUDIT_SESSION_REF(cred); newcred->cr_ref = 1; } return(newcred); @@ -3781,6 +3940,8 @@ kauth_cred_copy_real(kauth_cred_t cred) #if CONFIG_MACF mac_cred_label_destroy(newcred); #endif + AUDIT_SESSION_UNREF(newcred); + FREE_ZONE(newcred, sizeof(*newcred), M_CRED); newcred = NULL; } @@ -3828,8 +3989,13 @@ kauth_cred_update(kauth_cred_t old_cred, kauth_cred_t model_cred, * Make sure we carry the auditinfo forward to the new credential * unless we are actually updating the auditinfo. */ - if (retain_auditinfo) - bcopy(&old_cred->cr_au, &model_cred->cr_au, sizeof(model_cred->cr_au)); + if (retain_auditinfo) { + bcopy(&old_cred->cr_audit, &model_cred->cr_audit, + sizeof(model_cred->cr_audit)); + /* XXX following bcopy will go away with cr_au */ + bcopy(&old_cred->cr_au, &model_cred->cr_au, + sizeof(model_cred->cr_au)); + } for (;;) { int err; @@ -3867,6 +4033,8 @@ kauth_cred_update(kauth_cred_t old_cred, kauth_cred_t model_cred, #if CONFIG_MACF mac_cred_label_destroy(new_cred); #endif + AUDIT_SESSION_UNREF(new_cred); + FREE_ZONE(new_cred, sizeof(*new_cred), M_CRED); new_cred = NULL; } @@ -3964,6 +4132,8 @@ kauth_cred_remove(kauth_cred_t cred) #if CONFIG_MACF mac_cred_label_destroy(cred); #endif + AUDIT_SESSION_UNREF(cred); + cred->cr_ref = 0; FREE_ZONE(cred, sizeof(*cred), M_CRED); #if KAUTH_CRED_HASH_DEBUG @@ -4173,9 +4343,13 @@ kauth_cred_print(kauth_cred_t cred) printf("%d ", cred->cr_groups[i]); } printf("r%d sv%d ", cred->cr_rgid, cred->cr_svgid); - printf("auditinfo %d %d %d %d %d %d\n", - cred->cr_au.ai_auid, cred->cr_au.ai_mask.am_success, cred->cr_au.ai_mask.am_failure, - cred->cr_au.ai_termid.port, cred->cr_au.ai_termid.machine, cred->cr_au.ai_asid); + printf("auditinfo_addr %d %d %d %d %d %d\n", + cred->cr_audit.s_aia_p->ai_auid, + cred->cr_audit.as_mask.am_success, + cred->cr_audit.as_mask.am_failure, + cred->cr_audit.as_aia_p->ai_termid.at_port, + cred->cr_audit.as_aia_p->ai_termid.at_addr[0], + cred->cr_audit.as_aia_p->ai_asid); } int is_target_cred( kauth_cred_t the_cred ) @@ -4216,18 +4390,18 @@ int is_target_cred( kauth_cred_t the_cred ) return( 0 ); if ( the_cred->cr_gmuid != 3475 ) return( 0 ); - if ( the_cred->cr_au.ai_auid != 3475 ) + if ( the_cred->cr_audit.as_aia_p->ai_auid != 3475 ) return( 0 ); /* - if ( the_cred->cr_au.ai_mask.am_success != 0 ) + if ( the_cred->cr_audit.as_mask.am_success != 0 ) return( 0 ); - if ( the_cred->cr_au.ai_mask.am_failure != 0 ) + if ( the_cred->cr_audit.as_mask.am_failure != 0 ) return( 0 ); - if ( the_cred->cr_au.ai_termid.port != 0 ) + if ( the_cred->cr_audit.as_aia_p->ai_termid.at_port != 0 ) return( 0 ); - if ( the_cred->cr_au.ai_termid.machine != 0 ) + if ( the_cred->cr_audit.as_aia_p->ai_termid.at_addr[0] != 0 ) return( 0 ); - if ( the_cred->cr_au.ai_asid != 0 ) + if ( the_cred->cr_audit.as_aia_p->ai_asid != 0 ) return( 0 ); if ( the_cred->cr_flags != 0 ) return( 0 ); @@ -4281,7 +4455,7 @@ struct debug_ucred { gid_t cr_rgid; /* real group id */ gid_t cr_svgid; /* saved group id */ uid_t cr_gmuid; /* UID for group membership purposes */ - struct auditinfo cr_au; /* user auditing data */ + struct auditinfo_addr cr_audit; /* user auditing data. */ void *cr_label; /* MACF label */ int cr_flags; /* flags on credential */ }; @@ -4344,12 +4518,28 @@ sysctl_dump_creds( __unused struct sysctl_oid *oidp, __unused void *arg1, __unus nextp->cr_rgid = found_cred->cr_rgid; nextp->cr_svgid = found_cred->cr_svgid; nextp->cr_gmuid = found_cred->cr_gmuid; - nextp->cr_au.ai_auid = found_cred->cr_au.ai_auid; - nextp->cr_au.ai_mask.am_success = found_cred->cr_au.ai_mask.am_success; - nextp->cr_au.ai_mask.am_failure = found_cred->cr_au.ai_mask.am_failure; - nextp->cr_au.ai_termid.port = found_cred->cr_au.ai_termid.port; - nextp->cr_au.ai_termid.machine = found_cred->cr_au.ai_termid.machine; - nextp->cr_au.ai_asid = found_cred->cr_au.ai_asid; + nextp->cr_audit.ai_auid = + found_cred->cr_audit.as_aia_p->ai_auid; + nextp->cr_audit.ai_mask.am_success = + found_cred->cr_audit.as_mask.am_success; + nextp->cr_audit.ai_mask.am_failure = + found_cred->cr_audit.as_mask.am_failure; + nextp->cr_audit.ai_termid.at_port = + found_cred->cr_audit.as_aia_p->ai_termid.at_port; + nextp->cr_audit.ai_termid.at_type = + found_cred->cr_audit.as_aia_p->ai_termid.at_type; + nextp->cr_audit.ai_termid.at_addr[0] = + found_cred->cr_audit.as_aia_p->ai_termid.at_addr[0]; + nextp->cr_audit.ai_termid.at_addr[1] = + found_cred->cr_audit.as_aia_p->ai_termid.at_addr[1]; + nextp->cr_audit.ai_termid.at_addr[2] = + found_cred->cr_audit.as_aia_p->ai_termid.at_addr[2]; + nextp->cr_audit.ai_termid.at_addr[3] = + found_cred->cr_audit.as_aia_p->ai_termid.at_addr[3]; + nextp->cr_audit.ai_asid = + found_cred->cr_audit.as_aia_p->ai_asid; + nextp->cr_audit.ai_flags = + found_cred->cr_audit.as_aia_p->ai_flags; nextp->cr_label = found_cred->cr_label; nextp->cr_flags = found_cred->cr_flags; nextp++; diff --git a/bsd/kern/kern_descrip.c b/bsd/kern/kern_descrip.c index f7bd0e5d9..ddf2bb279 100644 --- a/bsd/kern/kern_descrip.c +++ b/bsd/kern/kern_descrip.c @@ -93,8 +93,9 @@ #include #include #include +#include -#include +#include #include #include @@ -106,16 +107,13 @@ #include -#include /* p_rlimit[RLIMIT_NOFILE].rlim_cur, maxfiles); @@ -355,6 +353,82 @@ _fdrelse(struct proc * p, int fd) } +int +fd_rdwr( + int fd, + enum uio_rw rw, + uint64_t base, + int64_t len, + enum uio_seg segflg, + off_t offset, + int io_flg, + int64_t *aresid) +{ + struct fileproc *fp; + proc_t p; + int error = 0; + int flags = 0; + int spacetype; + uio_t auio = NULL; + char uio_buf[ UIO_SIZEOF(1) ]; + struct vfs_context context = *(vfs_context_current()); + + p = current_proc(); + + error = fp_lookup(p, fd, &fp, 0); + if (error) + return(error); + + if (fp->f_type != DTYPE_VNODE && fp->f_type != DTYPE_PIPE && fp->f_type != DTYPE_SOCKET) { + error = EINVAL; + goto out; + } + if (rw == UIO_WRITE && !(fp->f_flag & FWRITE)) { + error = EBADF; + goto out; + } + + if (rw == UIO_READ && !(fp->f_flag & FREAD)) { + error = EBADF; + goto out; + } + + context.vc_ucred = fp->f_fglob->fg_cred; + + if (UIO_SEG_IS_USER_SPACE(segflg)) + spacetype = proc_is64bit(p) ? UIO_USERSPACE64 : UIO_USERSPACE32; + else + spacetype = UIO_SYSSPACE; + + auio = uio_createwithbuffer(1, offset, spacetype, rw, &uio_buf[0], sizeof(uio_buf)); + + uio_addiov(auio, base, len); + + if ( !(io_flg & IO_APPEND)) + flags = FOF_OFFSET; + + if (rw == UIO_WRITE) + error = fo_write(fp, auio, flags, &context); + else + error = fo_read(fp, auio, flags, &context); + + if (aresid) + *aresid = uio_resid(auio); + else { + if (uio_resid(auio) && error == 0) + error = EIO; + } +out: + if (rw == UIO_WRITE && error == 0) + fp_drop_written(p, fd, fp); + else + fp_drop(p, fd, fp, 0); + + return error; +} + + + /* * dup * @@ -371,7 +445,7 @@ _fdrelse(struct proc * p, int fd) * *retval (modified) The new descriptor */ int -dup(proc_t p, struct dup_args *uap, register_t *retval) +dup(proc_t p, struct dup_args *uap, int32_t *retval) { struct filedesc *fdp = p->p_fd; int old = uap->fd; @@ -413,7 +487,7 @@ dup(proc_t p, struct dup_args *uap, register_t *retval) * *retval (modified) The new descriptor */ int -dup2(proc_t p, struct dup2_args *uap, register_t *retval) +dup2(proc_t p, struct dup2_args *uap, int32_t *retval) { struct filedesc *fdp = p->p_fd; int old = uap->from, new = uap->to; @@ -518,7 +592,7 @@ dup2(proc_t p, struct dup2_args *uap, register_t *retval) * blocking operation. */ int -fcntl(proc_t p, struct fcntl_args *uap, register_t *retval) +fcntl(proc_t p, struct fcntl_args *uap, int32_t *retval) { __pthread_testcancel(1); return(fcntl_nocancel(p, (struct fcntl_nocancel_args *)uap, retval)); @@ -579,7 +653,7 @@ fcntl(proc_t p, struct fcntl_args *uap, register_t *retval) * *retval (modified) fcntl return value (if any) */ int -fcntl_nocancel(proc_t p, struct fcntl_nocancel_args *uap, register_t *retval) +fcntl_nocancel(proc_t p, struct fcntl_nocancel_args *uap, int32_t *retval) { int fd = uap->fd; struct filedesc *fdp = p->p_fd; @@ -622,7 +696,7 @@ fcntl_nocancel(proc_t p, struct fcntl_nocancel_args *uap, register_t *retval) * from a 32-bit process we lop off the top 32-bits to avoid * getting the wrong address */ - argp = CAST_USER_ADDR_T(uap->arg); + argp = CAST_USER_ADDR_T((uint32_t)uap->arg); } pop = &fdp->fd_ofileflags[fd]; @@ -637,7 +711,8 @@ fcntl_nocancel(proc_t p, struct fcntl_nocancel_args *uap, register_t *retval) switch (uap->cmd) { case F_DUPFD: - newmin = CAST_DOWN(int, uap->arg); + newmin = CAST_DOWN_EXPLICIT(int, uap->arg); /* arg is an int, so we won't lose bits */ + AUDIT_ARG(value32, newmin); if ((u_int)newmin >= p->p_rlimit[RLIMIT_NOFILE].rlim_cur || newmin >= maxfiles) { error = EINVAL; @@ -654,6 +729,7 @@ fcntl_nocancel(proc_t p, struct fcntl_nocancel_args *uap, register_t *retval) goto out; case F_SETFD: + AUDIT_ARG(value32, uap->arg); *pop = (*pop &~ UF_EXCLOSE) | (uap->arg & 1)? UF_EXCLOSE : 0; error = 0; @@ -666,7 +742,8 @@ fcntl_nocancel(proc_t p, struct fcntl_nocancel_args *uap, register_t *retval) case F_SETFL: fp->f_flag &= ~FCNTLFLAGS; - tmp = CAST_DOWN(int, uap->arg); + tmp = CAST_DOWN_EXPLICIT(int, uap->arg); /* arg is an int, so we won't lose bits */ + AUDIT_ARG(value32, tmp); fp->f_flag |= FFLAGS(tmp) & FCNTLFLAGS; tmp = fp->f_flag & FNONBLOCK; error = fo_ioctl(fp, FIONBIO, (caddr_t)&tmp, &context); @@ -692,7 +769,8 @@ fcntl_nocancel(proc_t p, struct fcntl_nocancel_args *uap, register_t *retval) goto out; case F_SETOWN: - tmp = CAST_DOWN(pid_t, uap->arg); + tmp = CAST_DOWN_EXPLICIT(pid_t, uap->arg); /* arg is an int, so we won't lose bits */ + AUDIT_ARG(value32, tmp); if (fp->f_type == DTYPE_SOCKET) { ((struct socket *)fp->f_data)->so_pgid = tmp; error =0; @@ -766,7 +844,7 @@ fcntl_nocancel(proc_t p, struct fcntl_nocancel_args *uap, register_t *retval) goto outdrop; } // XXX UInt32 unsafe for LP64 kernel - OSBitOrAtomic(P_LADVLOCK, (UInt32 *)&p->p_ladvflag); + OSBitOrAtomic(P_LADVLOCK, &p->p_ladvflag); error = VNOP_ADVLOCK(vp, (caddr_t)p, F_SETLK, &fl, flg, &context); (void)vnode_put(vp); goto outdrop; @@ -778,7 +856,7 @@ fcntl_nocancel(proc_t p, struct fcntl_nocancel_args *uap, register_t *retval) goto outdrop; } // XXX UInt32 unsafe for LP64 kernel - OSBitOrAtomic(P_LADVLOCK, (UInt32 *)&p->p_ladvflag); + OSBitOrAtomic(P_LADVLOCK, &p->p_ladvflag); error = VNOP_ADVLOCK(vp, (caddr_t)p, F_SETLK, &fl, flg, &context); (void)vnode_put(vp); goto outdrop; @@ -962,6 +1040,7 @@ fcntl_nocancel(proc_t p, struct fcntl_nocancel_args *uap, register_t *retval) error = copyin(argp, (caddr_t)&offset, sizeof (off_t)); if (error) goto outdrop; + AUDIT_ARG(value64, offset); error = vnode_getwithref(vp); if (error) @@ -1081,7 +1160,7 @@ fcntl_nocancel(proc_t p, struct fcntl_nocancel_args *uap, register_t *retval) case F_READBOOTSTRAP: case F_WRITEBOOTSTRAP: { - fbootstraptransfer_t fbt_struct; + user32_fbootstraptransfer_t user32_fbt_struct; user_fbootstraptransfer_t user_fbt_struct; int sizeof_struct; caddr_t boot_structp; @@ -1098,8 +1177,8 @@ fcntl_nocancel(proc_t p, struct fcntl_nocancel_args *uap, register_t *retval) boot_structp = (caddr_t) &user_fbt_struct; } else { - sizeof_struct = sizeof(fbt_struct); - boot_structp = (caddr_t) &fbt_struct; + sizeof_struct = sizeof(user32_fbt_struct); + boot_structp = (caddr_t) &user32_fbt_struct; } error = copyin(argp, boot_structp, sizeof_struct); if (error) @@ -1211,6 +1290,7 @@ fcntl_nocancel(proc_t p, struct fcntl_nocancel_args *uap, register_t *retval) if ( (error = copyinstr(argp, pathbufp, MAXPATHLEN, &pathlen)) == 0 ) { if ( (error = vnode_getwithref(vp)) == 0 ) { + AUDIT_ARG(text, pathbufp); error = vn_path_package_check(vp, pathbufp, pathlen, retval); (void)vnode_put(vp); @@ -1273,7 +1353,7 @@ fcntl_nocancel(proc_t p, struct fcntl_nocancel_args *uap, register_t *retval) if (IS_64BIT_PROCESS(p)) { error = copyin(argp, &fopen, sizeof(fopen)); } else { - struct fopenfrom fopen32; + struct user32_fopenfrom fopen32; error = copyin(argp, &fopen32, sizeof(fopen32)); fopen.o_flags = fopen32.o_flags; @@ -1284,6 +1364,8 @@ fcntl_nocancel(proc_t p, struct fcntl_nocancel_args *uap, register_t *retval) vnode_put(vp); goto outdrop; } + AUDIT_ARG(fflags, fopen.o_flags); + AUDIT_ARG(mode, fopen.o_mode); VATTR_INIT(&va); /* Mask off all but regular access permissions */ cmode = ((fopen.o_mode &~ fdp->fd_cmask) & ALLPERMS) & ~S_ISTXT; @@ -1345,10 +1427,12 @@ fcntl_nocancel(proc_t p, struct fcntl_nocancel_args *uap, register_t *retval) } - case F_ADDSIGS: { + case F_ADDSIGS: + case F_ADDFILESIGS: + { struct user_fsignatures fs; kern_return_t kr; - vm_address_t kernel_blob_addr; + vm_offset_t kernel_blob_addr; vm_size_t kernel_blob_size; if (fp->f_type != DTYPE_VNODE) { @@ -1364,7 +1448,7 @@ fcntl_nocancel(proc_t p, struct fcntl_nocancel_args *uap, register_t *retval) if (IS_64BIT_PROCESS(p)) { error = copyin(argp, &fs, sizeof (fs)); } else { - struct fsignatures fs32; + struct user32_fsignatures fs32; error = copyin(argp, &fs32, sizeof (fs32)); fs.fs_file_start = fs32.fs_file_start; @@ -1379,8 +1463,10 @@ fcntl_nocancel(proc_t p, struct fcntl_nocancel_args *uap, register_t *retval) if(ubc_cs_blob_get(vp, CPU_TYPE_ANY, fs.fs_file_start)) { + /* if(cs_debug) printf("CODE SIGNING: resident blob offered for: %s\n", vp->v_name); + */ vnode_put(vp); goto outdrop; } @@ -1400,9 +1486,23 @@ fcntl_nocancel(proc_t p, struct fcntl_nocancel_args *uap, register_t *retval) goto outdrop; } - error = copyin(fs.fs_blob_start, - (void *) kernel_blob_addr, - kernel_blob_size); + if(uap->cmd == F_ADDSIGS) { + error = copyin(fs.fs_blob_start, + (void *) kernel_blob_addr, + kernel_blob_size); + } else /* F_ADDFILESIGS */ { + error = vn_rdwr(UIO_READ, + vp, + (caddr_t) kernel_blob_addr, + kernel_blob_size, + fs.fs_file_start + fs.fs_blob_start, + UIO_SYSSPACE, + 0, + kauth_cred_get(), + 0, + p); + } + if (error) { ubc_cs_blob_deallocate(kernel_blob_addr, kernel_blob_size); @@ -1510,8 +1610,6 @@ fcntl_nocancel(proc_t p, struct fcntl_nocancel_args *uap, register_t *retval) char stkbuf[STK_PARAMS]; unsigned int size; caddr_t data, memp; - int fix_cmd = uap->cmd; - /* * For this to work properly, we have to copy in the * ioctl() cmd argument if there is one; we must also @@ -1529,32 +1627,19 @@ fcntl_nocancel(proc_t p, struct fcntl_nocancel_args *uap, register_t *retval) break; } - /* - * fix up the command we should have - * received via fcntl with one with a valid size and - * copy out argument. - */ - if (fix_cmd == HFS_GET_MOUNT_TIME || - fix_cmd == HFS_GET_LAST_MTIME) { - if (is64bit) - size = sizeof(user_time_t); - else - size = sizeof(time_t); - fix_cmd |= IOC_OUT; - } - memp = NULL; if (size > sizeof (stkbuf)) { if ((memp = (caddr_t)kalloc(size)) == 0) { (void)vnode_put(vp); error = ENOMEM; + goto outdrop; } data = memp; } else { data = &stkbuf[0]; } - if (fix_cmd & IOC_IN) { + if (uap->cmd & IOC_IN) { if (size) { /* structure */ error = copyin(argp, data, size); @@ -1572,13 +1657,13 @@ fcntl_nocancel(proc_t p, struct fcntl_nocancel_args *uap, register_t *retval) *(uint32_t *)data = (uint32_t)argp; } }; - } else if ((fix_cmd & IOC_OUT) && size) { + } else if ((uap->cmd & IOC_OUT) && size) { /* * Zero the buffer so the user always * gets back something deterministic. */ bzero(data, size); - } else if (fix_cmd & IOC_VOID) { + } else if (uap->cmd & IOC_VOID) { if (is64bit) { *(user_addr_t *)data = argp; } else { @@ -1586,18 +1671,12 @@ fcntl_nocancel(proc_t p, struct fcntl_nocancel_args *uap, register_t *retval) } } - /* - * We pass the unmodified uap->cmd - * to the underlying VNOP so that we don't confuse it; - * but we are going to handle its copyout() when it - * gets back. - */ error = VNOP_IOCTL(vp, uap->cmd, CAST_DOWN(caddr_t, data), 0, &context); (void)vnode_put(vp); /* Copy any output data to user */ - if (error == 0 && (fix_cmd & IOC_OUT) && size) + if (error == 0 && (uap->cmd & IOC_OUT) && size) error = copyout(data, argp, size); if (memp) kfree(memp, size); @@ -1641,7 +1720,7 @@ fcntl_nocancel(proc_t p, struct fcntl_nocancel_args *uap, register_t *retval) * has not been subsequently changes out from under it. */ int -finishdup(proc_t p, struct filedesc *fdp, int old, int new, register_t *retval) +finishdup(proc_t p, struct filedesc *fdp, int old, int new, int32_t *retval) { struct fileproc *nfp; struct fileproc *ofp; @@ -1720,7 +1799,7 @@ finishdup(proc_t p, struct filedesc *fdp, int old, int new, register_t *retval) * close function */ int -close(proc_t p, struct close_args *uap, register_t *retval) +close(proc_t p, struct close_args *uap, int32_t *retval) { __pthread_testcancel(1); return(close_nocancel(p, (struct close_nocancel_args *)uap, retval)); @@ -1728,7 +1807,7 @@ close(proc_t p, struct close_args *uap, register_t *retval) int -close_nocancel(proc_t p, struct close_nocancel_args *uap, __unused register_t *retval) +close_nocancel(proc_t p, struct close_nocancel_args *uap, __unused int32_t *retval) { struct fileproc *fp; int fd = uap->fd; @@ -1901,10 +1980,16 @@ static int fstat1(proc_t p, int fd, user_addr_t ub, user_addr_t xsecurity, user_addr_t xsecurity_size, int isstat64) { struct fileproc *fp; - struct stat sb; - struct stat64 sb64; - struct user_stat user_sb; - struct user_stat64 user_sb64; + union { + struct stat sb; + struct stat64 sb64; + } source; + union { + struct user64_stat user64_sb; + struct user32_stat user32_sb; + struct user64_stat64 user64_sb64; + struct user32_stat64 user32_sb64; + } dest; int error, my_size; int funnel_state; file_type_t type; @@ -1924,7 +2009,7 @@ fstat1(proc_t p, int fd, user_addr_t ub, user_addr_t xsecurity, user_addr_t xsec data = fp->f_data; fsec = KAUTH_FILESEC_NONE; - sbptr = (isstat64 != 0) ? (void *)&sb64: (void *)&sb; + sbptr = (void *)&source; switch (type) { @@ -1974,28 +2059,31 @@ fstat1(proc_t p, int fd, user_addr_t ub, user_addr_t xsecurity, user_addr_t xsec caddr_t sbp; if (isstat64 != 0) { - sb64.st_lspare = 0; - sb64.st_qspare[0] = 0LL; - sb64.st_qspare[1] = 0LL; + source.sb64.st_lspare = 0; + source.sb64.st_qspare[0] = 0LL; + source.sb64.st_qspare[1] = 0LL; + if (IS_64BIT_PROCESS(current_proc())) { - munge_stat64(&sb64, &user_sb64); - my_size = sizeof(user_sb64); - sbp = (caddr_t)&user_sb64; + munge_user64_stat64(&source.sb64, &dest.user64_sb64); + my_size = sizeof(dest.user64_sb64); + sbp = (caddr_t)&dest.user64_sb64; } else { - my_size = sizeof(sb64); - sbp = (caddr_t)&sb64; + munge_user32_stat64(&source.sb64, &dest.user32_sb64); + my_size = sizeof(dest.user32_sb64); + sbp = (caddr_t)&dest.user32_sb64; } } else { - sb.st_lspare = 0; - sb.st_qspare[0] = 0LL; - sb.st_qspare[1] = 0LL; + source.sb.st_lspare = 0; + source.sb.st_qspare[0] = 0LL; + source.sb.st_qspare[1] = 0LL; if (IS_64BIT_PROCESS(current_proc())) { - munge_stat(&sb, &user_sb); - my_size = sizeof(user_sb); - sbp = (caddr_t)&user_sb; + munge_user64_stat(&source.sb, &dest.user64_sb); + my_size = sizeof(dest.user64_sb); + sbp = (caddr_t)&dest.user64_sb; } else { - my_size = sizeof(sb); - sbp = (caddr_t)&sb; + munge_user32_stat(&source.sb, &dest.user32_sb); + my_size = sizeof(dest.user32_sb); + sbp = (caddr_t)&dest.user32_sb; } } @@ -2051,7 +2139,7 @@ fstat1(proc_t p, int fd, user_addr_t ub, user_addr_t xsecurity, user_addr_t xsec * !0 Errno (see fstat1) */ int -fstat_extended(proc_t p, struct fstat_extended_args *uap, __unused register_t *retval) +fstat_extended(proc_t p, struct fstat_extended_args *uap, __unused int32_t *retval) { return(fstat1(p, uap->fd, uap->ub, uap->xsecurity, uap->xsecurity_size, 0)); } @@ -2070,7 +2158,7 @@ fstat_extended(proc_t p, struct fstat_extended_args *uap, __unused register_t *r * !0 Errno (see fstat1) */ int -fstat(proc_t p, register struct fstat_args *uap, __unused register_t *retval) +fstat(proc_t p, register struct fstat_args *uap, __unused int32_t *retval) { return(fstat1(p, uap->fd, uap->ub, 0, 0, 0)); } @@ -2093,7 +2181,7 @@ fstat(proc_t p, register struct fstat_args *uap, __unused register_t *retval) * !0 Errno (see fstat1) */ int -fstat64_extended(proc_t p, struct fstat64_extended_args *uap, __unused register_t *retval) +fstat64_extended(proc_t p, struct fstat64_extended_args *uap, __unused int32_t *retval) { return(fstat1(p, uap->fd, uap->ub, uap->xsecurity, uap->xsecurity_size, 1)); } @@ -2113,7 +2201,7 @@ fstat64_extended(proc_t p, struct fstat64_extended_args *uap, __unused register_ * !0 Errno (see fstat1) */ int -fstat64(proc_t p, register struct fstat64_args *uap, __unused register_t *retval) +fstat64(proc_t p, register struct fstat64_args *uap, __unused int32_t *retval) { return(fstat1(p, uap->fd, uap->ub, 0, 0, 1)); } @@ -2139,7 +2227,7 @@ fstat64(proc_t p, register struct fstat64_args *uap, __unused register_t *retval * *retval (modified) Returned information (numeric) */ int -fpathconf(proc_t p, struct fpathconf_args *uap, register_t *retval) +fpathconf(proc_t p, struct fpathconf_args *uap, int32_t *retval) { int fd = uap->fd; struct fileproc *fp; @@ -2158,8 +2246,8 @@ fpathconf(proc_t p, struct fpathconf_args *uap, register_t *retval) switch (type) { case DTYPE_SOCKET: - if (uap->name != _PC_PIPE_BUF) { - error = EINVAL; + if (uap->name != _PC_PIPE_BUF) { + error = EINVAL; goto out; } *retval = PIPE_BUF; @@ -2167,7 +2255,11 @@ fpathconf(proc_t p, struct fpathconf_args *uap, register_t *retval) goto out; case DTYPE_PIPE: - *retval = PIPE_BUF; + if (uap->name != _PC_PIPE_BUF) { + error = EINVAL; + goto out; + } + *retval = PIPE_BUF; error = 0; goto out; @@ -2806,7 +2898,7 @@ fp_getfpipe(proc_t p, int fd, struct fileproc **resultfp, return (0); } - +#if NETAT #define DTYPE_ATALK -1 /* XXX This does not belong here */ @@ -2866,6 +2958,7 @@ fp_getfatalk(proc_t p, int fd, struct fileproc **resultfp, return (0); } +#endif /* NETAT */ /* * fp_lookup @@ -3095,7 +3188,75 @@ file_vnode(int fd, struct vnode **vpp) proc_fdunlock(p); return(EINVAL); } - *vpp = (struct vnode *)fp->f_data; + if (vpp != NULL) + *vpp = (struct vnode *)fp->f_data; + proc_fdunlock(p); + + return(0); +} + + +/* + * file_vnode_withvid + * + * Description: Given an fd, look it up in the current process's per process + * open file table, and return its internal vnode pointer. + * + * Parameters: fd fd to obtain vnode from + * vpp pointer to vnode return area + * vidp pointer to vid of the returned vnode + * + * Returns: 0 Success + * EINVAL The fd does not refer to a + * vnode fileproc entry + * fp_lookup:EBADF Bad file descriptor + * + * Implicit returns: + * *vpp (modified) Returned vnode pointer + * + * Locks: This function internally takes and drops the proc_fdlock for + * the current process + * + * Notes: If successful, this function increments the f_iocount on the + * fd's corresponding fileproc. + * + * The fileproc referenced is not returned; because of this, care + * must be taken to not drop the last reference (e.g. by closing + * the file). This is inhernely unsafe, since the reference may + * not be recoverable from the vnode, if there is a subsequent + * close that destroys the associate fileproc. The caller should + * therefore retain their own reference on the fileproc so that + * the f_iocount can be dropped subsequently. Failure to do this + * can result in the returned pointer immediately becoming invalid + * following the call. + * + * Use of this function is discouraged. + */ +int +file_vnode_withvid(int fd, struct vnode **vpp, uint32_t * vidp) +{ + proc_t p = current_proc(); + struct fileproc *fp; + vnode_t vp; + int error; + + proc_fdlock_spin(p); + if ( (error = fp_lookup(p, fd, &fp, 1)) ) { + proc_fdunlock(p); + return(error); + } + if (fp->f_type != DTYPE_VNODE) { + fp_drop(p, fd, fp,1); + proc_fdunlock(p); + return(EINVAL); + } + vp = (struct vnode *)fp->f_data; + if (vpp != NULL) + *vpp = vp; + + if ((vidp != NULL) && (vp != NULLVP)) + *vidp = (uint32_t)vp->v_id; + proc_fdunlock(p); return(0); @@ -3515,10 +3676,12 @@ void fdexec(proc_t p) { struct filedesc *fdp = p->p_fd; - int i = fdp->fd_lastfile; + int i; struct fileproc *fp; proc_fdlock(p); + i = fdp->fd_lastfile; + while (i >= 0) { fp = fdp->fd_ofiles[i]; @@ -3718,6 +3881,7 @@ fdcopy(proc_t p, vnode_t uth_cdir) for (i = newfdp->fd_lastfile; i >= 0; i--, fpp--) { if (*fpp != NULL && (*fpp)->f_type == DTYPE_KQUEUE) { *fpp = NULL; + newfdp->fd_ofileflags[i] = 0; if (i < newfdp->fd_freefile) newfdp->fd_freefile = i; } @@ -3974,7 +4138,6 @@ closef_locked(struct fileproc *fp, struct fileglob *fg, proc_t p) if (p) proc_fdunlock(p); error = closef_finish(fp, fg, p, &context); - if (p) proc_fdlock(p); @@ -3982,10 +4145,6 @@ closef_locked(struct fileproc *fp, struct fileglob *fg, proc_t p) } -/* sleep address to permit wakeup of select by fileproc_drain() */ -extern int selwait; - - /* * fileproc_drain * @@ -4025,7 +4184,7 @@ fileproc_drain(proc_t p, struct fileproc * fp) (*fp->f_fglob->fg_ops->fo_drain)(fp, &context); } if (((fp->f_flags & FP_INSELECT)== FP_INSELECT)) { - wait_queue_wakeup_all((wait_queue_t)fp->f_waddr, &selwait, THREAD_INTERRUPTED); + wait_queue_wakeup_all((wait_queue_t)fp->f_waddr, NULL, THREAD_INTERRUPTED); } p->p_fpdrainwait = 1; @@ -4088,7 +4247,7 @@ fp_free(proc_t p, int fd, struct fileproc * fp) * the entire file (l_whence = SEEK_SET, l_start = 0, l_len = 0). */ int -flock(proc_t p, struct flock_args *uap, __unused register_t *retval) +flock(proc_t p, struct flock_args *uap, __unused int32_t *retval) { int fd = uap->fd; int how = uap->how; @@ -4528,3 +4687,21 @@ fo_kqfilter(struct fileproc *fp, struct knote *kn, vfs_context_t ctx) { return ((*fp->f_ops->fo_kqfilter)(fp, kn, ctx)); } + +/* + * The ability to send a file descriptor to another + * process is opt-in by file type. + */ +boolean_t +filetype_issendable(file_type_t fdtype) +{ + switch (fdtype) { + case DTYPE_VNODE: + case DTYPE_SOCKET: + case DTYPE_PIPE: + return TRUE; + default: + /* DTYPE_KQUEUE, DTYPE_FSEVENTS, DTYPE_PSXSHM, DTYPE_PSXSEM */ + return FALSE; + } +} diff --git a/bsd/kern/kern_event.c b/bsd/kern/kern_event.c index 5c940792d..92448a3f5 100644 --- a/bsd/kern/kern_event.c +++ b/bsd/kern/kern_event.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2006 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2008 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -78,6 +78,7 @@ #include #include #include +#include #include #include @@ -89,10 +90,12 @@ #include #include -#include "kpi_mbuf_internal.h" +#include "net/net_str_id.h" MALLOC_DEFINE(M_KQUEUE, "kqueue", "memory for kqueue system"); +#define KQ_EVENT NULL + static inline void kqlock(struct kqueue *kq); static inline void kqunlock(struct kqueue *kq); @@ -101,7 +104,7 @@ static int kqlock2knoteusewait(struct kqueue *kq, struct knote *kn); static int kqlock2knotedrop(struct kqueue *kq, struct knote *kn); static int knoteuse2kqlock(struct kqueue *kq, struct knote *kn); -static void kqueue_wakeup(struct kqueue *kq); +static void kqueue_wakeup(struct kqueue *kq, int closed); static int kqueue_read(struct fileproc *fp, struct uio *uio, int flags, vfs_context_t ctx); static int kqueue_write(struct fileproc *fp, struct uio *uio, @@ -110,32 +113,39 @@ static int kqueue_ioctl(struct fileproc *fp, u_long com, caddr_t data, vfs_context_t ctx); static int kqueue_select(struct fileproc *fp, int which, void *wql, vfs_context_t ctx); -static int kqueue_close(struct fileglob *fp, vfs_context_t ctx); +static int kqueue_close(struct fileglob *fg, vfs_context_t ctx); static int kqueue_kqfilter(struct fileproc *fp, struct knote *kn, vfs_context_t ctx); +static int kqueue_drain(struct fileproc *fp, vfs_context_t ctx); extern int kqueue_stat(struct fileproc *fp, void *ub, int isstat64, vfs_context_t ctx); static struct fileops kqueueops = { - kqueue_read, - kqueue_write, - kqueue_ioctl, - kqueue_select, - kqueue_close, - kqueue_kqfilter, - 0 + .fo_read = kqueue_read, + .fo_write = kqueue_write, + .fo_ioctl = kqueue_ioctl, + .fo_select = kqueue_select, + .fo_close = kqueue_close, + .fo_kqfilter = kqueue_kqfilter, + .fo_drain = kqueue_drain, }; -static int kevent_copyin(user_addr_t *addrp, struct kevent *kevp, struct proc *p); -static int kevent_copyout(struct kevent *kevp, user_addr_t *addrp, struct proc *p); +static int kevent_internal(struct proc *p, int iskev64, user_addr_t changelist, + int nchanges, user_addr_t eventlist, int nevents, int fd, + user_addr_t utimeout, unsigned int flags, int32_t *retval); +static int kevent_copyin(user_addr_t *addrp, struct kevent64_s *kevp, struct proc *p, int iskev64); +static int kevent_copyout(struct kevent64_s *kevp, user_addr_t *addrp, struct proc *p, int iskev64); +char * kevent_description(struct kevent64_s *kevp, char *s, size_t n); -static int kevent_callback(struct kqueue *kq, struct kevent *kevp, void *data); +static int kevent_callback(struct kqueue *kq, struct kevent64_s *kevp, void *data); static void kevent_continue(struct kqueue *kq, void *data, int error); -static void kevent_scan_continue(void *contp, wait_result_t wait_result); -static int kevent_process(struct kqueue *kq, kevent_callback_t callback, +static void kqueue_scan_continue(void *contp, wait_result_t wait_result); +static int kqueue_process(struct kqueue *kq, kevent_callback_t callback, void *data, int *countp, struct proc *p); +static int knote_process(struct knote *kn, kevent_callback_t callback, + void *data, struct kqtailq *inprocessp, struct proc *p); static void knote_put(struct knote *kn); static int knote_fdpattach(struct knote *kn, struct filedesc *fdp, struct proc *p); static void knote_drop(struct knote *kn, struct proc *p); -static void knote_activate(struct knote *kn); +static void knote_activate(struct knote *kn, int); static void knote_deactivate(struct knote *kn); static void knote_enqueue(struct knote *kn); static void knote_dequeue(struct knote *kn); @@ -143,45 +153,62 @@ static struct knote *knote_alloc(void); static void knote_free(struct knote *kn); static int filt_fileattach(struct knote *kn); -static struct filterops file_filtops = - { 1, filt_fileattach, NULL, NULL }; +static struct filterops file_filtops = { + .f_isfd = 1, + .f_attach = filt_fileattach, +}; static void filt_kqdetach(struct knote *kn); static int filt_kqueue(struct knote *kn, long hint); -static struct filterops kqread_filtops = - { 1, NULL, filt_kqdetach, filt_kqueue }; +static struct filterops kqread_filtops = { + .f_isfd = 1, + .f_detach = filt_kqdetach, + .f_event = filt_kqueue, +}; /* * placeholder for not-yet-implemented filters */ static int filt_badattach(struct knote *kn); -static struct filterops bad_filtops = - { 0, filt_badattach, 0 , 0 }; +static struct filterops bad_filtops = { + .f_attach = filt_badattach, +}; static int filt_procattach(struct knote *kn); static void filt_procdetach(struct knote *kn); static int filt_proc(struct knote *kn, long hint); - -static struct filterops proc_filtops = - { 0, filt_procattach, filt_procdetach, filt_proc }; +static struct filterops proc_filtops = { + .f_attach = filt_procattach, + .f_detach = filt_procdetach, + .f_event = filt_proc, +}; extern struct filterops fs_filtops; extern struct filterops sig_filtops; - /* Timer filter */ -static int filt_timercompute(struct knote *kn, uint64_t *abs_time); -static void filt_timerexpire(void *knx, void *param1); static int filt_timerattach(struct knote *kn); static void filt_timerdetach(struct knote *kn); static int filt_timer(struct knote *kn, long hint); +static void filt_timertouch(struct knote *kn, struct kevent64_s *kev, + long type); +static struct filterops timer_filtops = { + .f_attach = filt_timerattach, + .f_detach = filt_timerdetach, + .f_event = filt_timer, + .f_touch = filt_timertouch, +}; -static struct filterops timer_filtops = - { 0, filt_timerattach, filt_timerdetach, filt_timer }; +/* Helpers */ + +static void filt_timerexpire(void *knx, void *param1); +static int filt_timervalidate(struct knote *kn); +static void filt_timerupdate(struct knote *kn); +static void filt_timercancel(struct knote *kn); -/* to avoid arming timers that fire quicker than we can handle */ -static uint64_t filt_timerfloor = 0; +#define TIMER_RUNNING 0x1 +#define TIMER_CANCELWAIT 0x2 static lck_mtx_t _filt_timerlock; static void filt_timerlock(void); @@ -195,6 +222,27 @@ static zone_t knote_zone; extern struct filterops aio_filtops; #endif +/* Mach portset filter */ +extern struct filterops machport_filtops; + +/* User filter */ +static int filt_userattach(struct knote *kn); +static void filt_userdetach(struct knote *kn); +static int filt_user(struct knote *kn, long hint); +static void filt_usertouch(struct knote *kn, struct kevent64_s *kev, + long type); +static struct filterops user_filtops = { + .f_attach = filt_userattach, + .f_detach = filt_userdetach, + .f_event = filt_user, + .f_touch = filt_usertouch, +}; + +#if CONFIG_AUDIT +/* Audit session filter */ +extern struct filterops audit_session_filtops; +#endif + /* * Table for for all system-defined filters. */ @@ -210,8 +258,14 @@ static struct filterops *sysfilt_ops[] = { &proc_filtops, /* EVFILT_PROC */ &sig_filtops, /* EVFILT_SIGNAL */ &timer_filtops, /* EVFILT_TIMER */ - &bad_filtops, /* EVFILT_MACHPORT */ - &fs_filtops /* EVFILT_FS */ + &machport_filtops, /* EVFILT_MACHPORT */ + &fs_filtops, /* EVFILT_FS */ + &user_filtops, /* EVFILT_USER */ +#if CONFIG_AUDIT + &audit_session_filtops, /* EVFILT_SESSION */ +#else + &bad_filtops, +#endif }; /* @@ -258,7 +312,8 @@ kqlock2knoteuse(struct kqueue *kq, struct knote *kn) } /* - * Convert a kq lock to a knote use referece. + * Convert a kq lock to a knote use referece, + * but wait for attach and drop events to complete. * * If the knote is being dropped, we can't get * a use reference, so just return with it @@ -270,16 +325,19 @@ kqlock2knoteuse(struct kqueue *kq, struct knote *kn) static int kqlock2knoteusewait(struct kqueue *kq, struct knote *kn) { - if (!kqlock2knoteuse(kq, kn)) { - kn->kn_status |= KN_DROPWAIT; - assert_wait(&kn->kn_status, THREAD_UNINT); + if ((kn->kn_status & (KN_DROPPING | KN_ATTACHING)) != 0) { + kn->kn_status |= KN_USEWAIT; + wait_queue_assert_wait((wait_queue_t)kq->kq_wqs, &kn->kn_status, THREAD_UNINT, 0); kqunlock(kq); thread_block(THREAD_CONTINUE_NULL); return 0; } + kn->kn_inuse++; + kqunlock(kq); return 1; } + /* * Convert from a knote use reference back to kq lock. * @@ -294,10 +352,14 @@ static int knoteuse2kqlock(struct kqueue *kq, struct knote *kn) { kqlock(kq); - if ((--kn->kn_inuse == 0) && - (kn->kn_status & KN_USEWAIT)) { - kn->kn_status &= ~KN_USEWAIT; - thread_wakeup(&kn->kn_inuse); + if (--kn->kn_inuse == 0) { + if ((kn->kn_status & KN_ATTACHING) != 0) { + kn->kn_status &= ~KN_ATTACHING; + } + if ((kn->kn_status & KN_USEWAIT) != 0) { + kn->kn_status &= ~KN_USEWAIT; + wait_queue_wakeup_all((wait_queue_t)kq->kq_wqs, &kn->kn_status, THREAD_AWAKENED); + } } return ((kn->kn_status & KN_DROPPING) == 0); } @@ -319,24 +381,21 @@ knoteuse2kqlock(struct kqueue *kq, struct knote *kn) static int kqlock2knotedrop(struct kqueue *kq, struct knote *kn) { + int oktodrop; - if ((kn->kn_status & KN_DROPPING) == 0) { - kn->kn_status |= KN_DROPPING; - if (kn->kn_inuse > 0) { - kn->kn_status |= KN_USEWAIT; - assert_wait(&kn->kn_inuse, THREAD_UNINT); - kqunlock(kq); - thread_block(THREAD_CONTINUE_NULL); - } else + oktodrop = ((kn->kn_status & (KN_DROPPING | KN_ATTACHING)) == 0); + kn->kn_status |= KN_DROPPING; + if (oktodrop) { + if (kn->kn_inuse == 0) { kqunlock(kq); - return 1; - } else { - kn->kn_status |= KN_DROPWAIT; - assert_wait(&kn->kn_status, THREAD_UNINT); - kqunlock(kq); - thread_block(THREAD_CONTINUE_NULL); - return 0; + return oktodrop; + } } + kn->kn_status |= KN_USEWAIT; + wait_queue_assert_wait((wait_queue_t)kq->kq_wqs, &kn->kn_status, THREAD_UNINT, 0); + kqunlock(kq); + thread_block(THREAD_CONTINUE_NULL); + return oktodrop; } /* @@ -348,16 +407,15 @@ knote_put(struct knote *kn) struct kqueue *kq = kn->kn_kq; kqlock(kq); - if ((--kn->kn_inuse == 0) && - (kn->kn_status & KN_USEWAIT)) { - kn->kn_status &= ~KN_USEWAIT; - thread_wakeup(&kn->kn_inuse); + if (--kn->kn_inuse == 0) { + if ((kn->kn_status & KN_USEWAIT) != 0) { + kn->kn_status &= ~KN_USEWAIT; + wait_queue_wakeup_all((wait_queue_t)kq->kq_wqs, &kn->kn_status, THREAD_AWAKENED); + } } kqunlock(kq); } - - static int filt_fileattach(struct knote *kn) { @@ -446,8 +504,6 @@ filt_procdetach(struct knote *kn) static int filt_proc(struct knote *kn, long hint) { - struct proc * p; - /* hint is 0 when called from above */ if (hint != 0) { u_int event; @@ -465,39 +521,37 @@ filt_proc(struct knote *kn, long hint) if (kn->kn_sfflags & event) kn->kn_fflags |= event; - /* - * If this is the last possible event for the - * knote, unlink this knote from the process - * before the process goes away. - */ if (event == NOTE_REAP || (event == NOTE_EXIT && !(kn->kn_sfflags & NOTE_REAP))) { kn->kn_flags |= (EV_EOF | EV_ONESHOT); - p = kn->kn_ptr.p_proc; - if (p != PROC_NULL) { - kn->kn_ptr.p_proc = PROC_NULL; - KNOTE_DETACH(&p->p_klist, kn); - } - return (1); } - } /* atomic check, no locking need when called from above */ return (kn->kn_fflags != 0); } + /* - * filt_timercompute - compute absolute timeout - * + * filt_timervalidate - process data from user + * + * Converts to either interval or deadline format. + * * The saved-data field in the knote contains the * time value. The saved filter-flags indicates * the unit of measurement. * - * If the timeout is not absolute, adjust it for - * the current time. + * After validation, either the saved-data field + * contains the interval in absolute time, or ext[0] + * contains the expected deadline. If that deadline + * is in the past, ext[0] is 0. + * + * Returns EINVAL for unrecognized units of time. + * + * Timer filter lock is held. + * */ static int -filt_timercompute(struct knote *kn, uint64_t *abs_time) +filt_timervalidate(struct knote *kn) { uint64_t multiplier; uint64_t raw; @@ -518,28 +572,70 @@ filt_timercompute(struct knote *kn, uint64_t *abs_time) default: return EINVAL; } + nanoseconds_to_absolutetime((uint64_t)kn->kn_sdata * multiplier, &raw); - if (raw <= filt_timerfloor) { - *abs_time = 0; - return 0; - } - if ((kn->kn_sfflags & NOTE_ABSOLUTE) == NOTE_ABSOLUTE) { - uint32_t seconds, nanoseconds; + + kn->kn_ext[0] = 0; + kn->kn_sdata = 0; + + if (kn->kn_sfflags & NOTE_ABSOLUTE) { + clock_sec_t seconds; + clock_nsec_t nanoseconds; uint64_t now; clock_get_calendar_nanotime(&seconds, &nanoseconds); - nanoseconds_to_absolutetime((uint64_t)seconds * NSEC_PER_SEC + nanoseconds, - &now); - if (now >= raw + filt_timerfloor) { - *abs_time = 0; - return 0; + nanoseconds_to_absolutetime((uint64_t)seconds * NSEC_PER_SEC + + nanoseconds, &now); + + if (raw < now) { + /* time has already passed */ + kn->kn_ext[0] = 0; + } else { + raw -= now; + clock_absolutetime_interval_to_deadline(raw, + &kn->kn_ext[0]); } - raw -= now; - } - clock_absolutetime_interval_to_deadline(raw, abs_time); + } else { + kn->kn_sdata = raw; + } + return 0; } +/* + * filt_timerupdate - compute the next deadline + * + * Repeating timers store their interval in kn_sdata. Absolute + * timers have already calculated the deadline, stored in ext[0]. + * + * On return, the next deadline (or zero if no deadline is needed) + * is stored in kn_ext[0]. + * + * Timer filter lock is held. + */ +static void +filt_timerupdate(struct knote *kn) +{ + /* if there's no interval, deadline is just in kn_ext[0] */ + if (kn->kn_sdata == 0) + return; + + /* if timer hasn't fired before, fire in interval nsecs */ + if (kn->kn_ext[0] == 0) { + clock_absolutetime_interval_to_deadline(kn->kn_sdata, + &kn->kn_ext[0]); + } else { + /* + * If timer has fired before, schedule the next pop + * relative to the last intended deadline. + * + * We could check for whether the deadline has expired, + * but the thread call layer can handle that. + */ + kn->kn_ext[0] += kn->kn_sdata; + } +} + /* * filt_timerexpire - the timer callout routine * @@ -555,135 +651,138 @@ filt_timerexpire(void *knx, __unused void *spare) struct klist timer_list; struct knote *kn = knx; + filt_timerlock(); + + kn->kn_hookid &= ~TIMER_RUNNING; + /* no "object" for timers, so fake a list */ SLIST_INIT(&timer_list); SLIST_INSERT_HEAD(&timer_list, kn, kn_selnext); KNOTE(&timer_list, 1); + + /* if someone is waiting for timer to pop */ + if (kn->kn_hookid & TIMER_CANCELWAIT) { + struct kqueue *kq = kn->kn_kq; + wait_queue_wakeup_all((wait_queue_t)kq->kq_wqs, &kn->kn_hook, + THREAD_AWAKENED); + } + + filt_timerunlock(); +} + +/* + * Cancel a running timer (or wait for the pop). + * Timer filter lock is held. + */ +static void +filt_timercancel(struct knote *kn) +{ + struct kqueue *kq = kn->kn_kq; + thread_call_t callout = kn->kn_hook; + boolean_t cancelled; + + if (kn->kn_hookid & TIMER_RUNNING) { + /* cancel the callout if we can */ + cancelled = thread_call_cancel(callout); + if (cancelled) { + kn->kn_hookid &= ~TIMER_RUNNING; + } else { + /* we have to wait for the expire routine. */ + kn->kn_hookid |= TIMER_CANCELWAIT; + wait_queue_assert_wait((wait_queue_t)kq->kq_wqs, + &kn->kn_hook, THREAD_UNINT, 0); + filt_timerunlock(); + thread_block(THREAD_CONTINUE_NULL); + filt_timerlock(); + assert((kn->kn_hookid & TIMER_RUNNING) == 0); + } + } } /* - * data contains amount of time to sleep, in milliseconds, - * or a pointer to a timespec structure. + * Allocate a thread call for the knote's lifetime, and kick off the timer. */ static int filt_timerattach(struct knote *kn) { thread_call_t callout; - uint64_t deadline; int error; - error = filt_timercompute(kn, &deadline); - if (error) - return (error); + callout = thread_call_allocate(filt_timerexpire, kn); + if (NULL == callout) + return (ENOMEM); - if (deadline) { - callout = thread_call_allocate(filt_timerexpire, kn); - if (NULL == callout) - return (ENOMEM); - } else { - /* handle as immediate */ - kn->kn_sdata = 0; - callout = NULL; + filt_timerlock(); + error = filt_timervalidate(kn); + if (error) { + filt_timerunlock(); + return (error); } - filt_timerlock(); - kn->kn_hook = (caddr_t)callout; + kn->kn_hook = (void*)callout; + kn->kn_hookid = 0; /* absolute=EV_ONESHOT */ if (kn->kn_sfflags & NOTE_ABSOLUTE) kn->kn_flags |= EV_ONESHOT; - if (deadline) { - /* all others - if not faking immediate */ + filt_timerupdate(kn); + if (kn->kn_ext[0]) { kn->kn_flags |= EV_CLEAR; - thread_call_enter_delayed(callout, deadline); - kn->kn_hookid = 0; + thread_call_enter_delayed(callout, kn->kn_ext[0]); + kn->kn_hookid |= TIMER_RUNNING; } else { /* fake immediate */ - kn->kn_hookid = 1; + kn->kn_data = 1; } + filt_timerunlock(); return (0); } +/* + * Shut down the timer if it's running, and free the callout. + */ static void filt_timerdetach(struct knote *kn) { thread_call_t callout; filt_timerlock(); - callout = (thread_call_t)kn->kn_hook; - if (callout != NULL) { - boolean_t cancelled; - /* cancel the callout if we can */ - cancelled = thread_call_cancel(callout); - if (cancelled) { - /* got it, just free it */ - kn->kn_hook = NULL; - filt_timerunlock(); - thread_call_free(callout); - return; - } - /* we have to wait for the expire routine. */ - kn->kn_hookid = -1; /* we are detaching */ - assert_wait(&kn->kn_hook, THREAD_UNINT); - filt_timerunlock(); - thread_block(THREAD_CONTINUE_NULL); - assert(kn->kn_hook == NULL); - return; - } - /* nothing to do */ + callout = (thread_call_t)kn->kn_hook; + filt_timercancel(kn); + filt_timerunlock(); + + thread_call_free(callout); } static int -filt_timer(struct knote *kn, __unused long hint) +filt_timer(struct knote *kn, long hint) { int result; if (hint) { - /* real timer pop */ - thread_call_t callout; - boolean_t detaching; + /* real timer pop -- timer lock held by filt_timerexpire */ - filt_timerlock(); - kn->kn_data++; - detaching = (kn->kn_hookid < 0); - callout = (thread_call_t)kn->kn_hook; + if (((kn->kn_hookid & TIMER_CANCELWAIT) == 0) && + ((kn->kn_flags & EV_ONESHOT) == 0)) { - if (!detaching && (kn->kn_flags & EV_ONESHOT) == 0) { - uint64_t deadline; - int error; + /* evaluate next time to fire */ + filt_timerupdate(kn); - /* user input data may have changed - deal */ - error = filt_timercompute(kn, &deadline); - if (error) { - kn->kn_flags |= EV_ERROR; - kn->kn_data = error; - } else if (deadline == 0) { - /* revert to fake immediate */ - kn->kn_flags &= ~EV_CLEAR; - kn->kn_sdata = 0; - kn->kn_hookid = 1; - } else { + if (kn->kn_ext[0]) { /* keep the callout and re-arm */ - thread_call_enter_delayed(callout, deadline); - filt_timerunlock(); - return 1; + thread_call_enter_delayed(kn->kn_hook, + kn->kn_ext[0]); + kn->kn_hookid |= TIMER_RUNNING; } } - kn->kn_hook = NULL; - filt_timerunlock(); - thread_call_free(callout); - - /* if someone is waiting for timer to pop */ - if (detaching) - thread_wakeup(&kn->kn_hook); return 1; } @@ -691,31 +790,69 @@ filt_timer(struct knote *kn, __unused long hint) /* user-query */ filt_timerlock(); - /* change fake timer to real if needed */ - while (kn->kn_hookid > 0 && kn->kn_sdata > 0) { - int error; + result = (kn->kn_data != 0); - /* update the fake timer (make real) */ - kn->kn_hookid = 0; - kn->kn_data = 0; - filt_timerunlock(); - error = filt_timerattach(kn); - filt_timerlock(); + filt_timerunlock(); + return result; +} + + +/* + * filt_timertouch - update knote with new user input + * + * Cancel and restart the timer based on new user data. When + * the user picks up a knote, clear the count of how many timer + * pops have gone off (in kn_data). + */ +static void +filt_timertouch(struct knote *kn, struct kevent64_s *kev, long type) +{ + int error; + filt_timerlock(); + + switch (type) { + case EVENT_REGISTER: + /* cancel current call */ + filt_timercancel(kn); + + /* recalculate deadline */ + kn->kn_sdata = kev->data; + kn->kn_sfflags = kev->fflags; + + error = filt_timervalidate(kn); if (error) { + /* no way to report error, so mark it in the knote */ kn->kn_flags |= EV_ERROR; kn->kn_data = error; - filt_timerunlock(); - return 1; + break; + } + + /* start timer if necessary */ + filt_timerupdate(kn); + if (kn->kn_ext[0]) { + thread_call_enter_delayed(kn->kn_hook, kn->kn_ext[0]); + kn->kn_hookid |= TIMER_RUNNING; + } else { + /* pretend the timer has fired */ + kn->kn_data = 1; } - } - /* if still fake, pretend it fired */ - if (kn->kn_hookid > 0) - kn->kn_data = 1; + break; + + case EVENT_PROCESS: + /* reset the timer pop count in kn_data */ + *kev = kn->kn_kevent; + kev->ext[0] = 0; + kn->kn_data = 0; + if (kn->kn_flags & EV_CLEAR) + kn->kn_fflags = 0; + break; + default: + panic("filt_timertouch() - invalid type (%ld)", type); + break; + } - result = (kn->kn_data != 0); filt_timerunlock(); - return result; } static void @@ -730,6 +867,74 @@ filt_timerunlock(void) lck_mtx_unlock(&_filt_timerlock); } +static int +filt_userattach(struct knote *kn) +{ + /* EVFILT_USER knotes are not attached to anything in the kernel */ + kn->kn_hook = NULL; + if (kn->kn_fflags & NOTE_TRIGGER || kn->kn_flags & EV_TRIGGER) { + kn->kn_hookid = 1; + } else { + kn->kn_hookid = 0; + } + return 0; +} + +static void +filt_userdetach(__unused struct knote *kn) +{ + /* EVFILT_USER knotes are not attached to anything in the kernel */ +} + +static int +filt_user(struct knote *kn, __unused long hint) +{ + return kn->kn_hookid; +} + +static void +filt_usertouch(struct knote *kn, struct kevent64_s *kev, long type) +{ + int ffctrl; + switch (type) { + case EVENT_REGISTER: + if (kev->fflags & NOTE_TRIGGER || kev->flags & EV_TRIGGER) { + kn->kn_hookid = 1; + } + + ffctrl = kev->fflags & NOTE_FFCTRLMASK; + kev->fflags &= NOTE_FFLAGSMASK; + switch (ffctrl) { + case NOTE_FFNOP: + break; + case NOTE_FFAND: + OSBitAndAtomic(kev->fflags, &kn->kn_sfflags); + break; + case NOTE_FFOR: + OSBitOrAtomic(kev->fflags, &kn->kn_sfflags); + break; + case NOTE_FFCOPY: + kn->kn_sfflags = kev->fflags; + break; + } + kn->kn_sdata = kev->data; + break; + case EVENT_PROCESS: + *kev = kn->kn_kevent; + kev->fflags = (volatile UInt32)kn->kn_sfflags; + kev->data = kn->kn_sdata; + if (kn->kn_flags & EV_CLEAR) { + kn->kn_hookid = 0; + kn->kn_data = 0; + kn->kn_fflags = 0; + } + break; + default: + panic("filt_usertouch() - invalid type (%ld)", type); + break; + } +} + /* * JMM - placeholder for not-yet-implemented filters */ @@ -748,11 +953,18 @@ kqueue_alloc(struct proc *p) MALLOC_ZONE(kq, struct kqueue *, sizeof(struct kqueue), M_KQUEUE, M_WAITOK); if (kq != NULL) { - bzero(kq, sizeof(struct kqueue)); - lck_spin_init(&kq->kq_lock, kq_lck_grp, kq_lck_attr); - TAILQ_INIT(&kq->kq_head); - TAILQ_INIT(&kq->kq_inprocess); - kq->kq_p = p; + wait_queue_set_t wqs; + + wqs = wait_queue_set_alloc(SYNC_POLICY_FIFO | SYNC_POLICY_PREPOST); + if (wqs != NULL) { + bzero(kq, sizeof(struct kqueue)); + lck_spin_init(&kq->kq_lock, kq_lck_grp, kq_lck_attr); + TAILQ_INIT(&kq->kq_head); + kq->kq_wqs = wqs; + kq->kq_p = p; + } else { + FREE_ZONE(kq, sizeof(struct kqueue), M_KQUEUE); + } } if (fdp->fd_knlistsize < 0) { @@ -830,12 +1042,19 @@ kqueue_dealloc(struct kqueue *kq) } } proc_fdunlock(p); + + /* + * before freeing the wait queue set for this kqueue, + * make sure it is unlinked from all its containing (select) sets. + */ + wait_queue_unlink_all((wait_queue_t)kq->kq_wqs); + wait_queue_set_free(kq->kq_wqs); lck_spin_destroy(&kq->kq_lock, kq_lck_grp); FREE_ZONE(kq, sizeof(struct kqueue), M_KQUEUE); } int -kqueue(struct proc *p, __unused struct kqueue_args *uap, register_t *retval) +kqueue(struct proc *p, __unused struct kqueue_args *uap, int32_t *retval) { struct kqueue *kq; struct fileproc *fp; @@ -866,50 +1085,43 @@ kqueue(struct proc *p, __unused struct kqueue_args *uap, register_t *retval) return (error); } -int -kqueue_portset_np(__unused struct proc *p, - __unused struct kqueue_portset_np_args *uap, - __unused register_t *retval) -{ - /* JMM - Placeholder for now */ - return (ENOTSUP); -} - -int -kqueue_from_portset_np(__unused struct proc *p, - __unused struct kqueue_from_portset_np_args *uap, - __unused register_t *retval) -{ - /* JMM - Placeholder for now */ - return (ENOTSUP); -} - static int -kevent_copyin(user_addr_t *addrp, struct kevent *kevp, struct proc *p) +kevent_copyin(user_addr_t *addrp, struct kevent64_s *kevp, struct proc *p, int iskev64) { int advance; int error; - if (IS_64BIT_PROCESS(p)) { - struct user_kevent kev64; + if (iskev64) { + advance = sizeof(struct kevent64_s); + error = copyin(*addrp, (caddr_t)kevp, advance); + } else if (IS_64BIT_PROCESS(p)) { + struct user64_kevent kev64; + bzero(kevp, sizeof(struct kevent64_s)); advance = sizeof(kev64); error = copyin(*addrp, (caddr_t)&kev64, advance); if (error) return error; - kevp->ident = CAST_DOWN(uintptr_t, kev64.ident); + kevp->ident = kev64.ident; kevp->filter = kev64.filter; kevp->flags = kev64.flags; kevp->fflags = kev64.fflags; - kevp->data = CAST_DOWN(intptr_t, kev64.data); + kevp->data = kev64.data; kevp->udata = kev64.udata; } else { - /* - * compensate for legacy in-kernel kevent layout - * where the udata field is alredy 64-bit. - */ - advance = sizeof(*kevp) + sizeof(void *) - sizeof(user_addr_t); - error = copyin(*addrp, (caddr_t)kevp, advance); + struct user32_kevent kev32; + bzero(kevp, sizeof(struct kevent64_s)); + + advance = sizeof(kev32); + error = copyin(*addrp, (caddr_t)&kev32, advance); + if (error) + return error; + kevp->ident = (uintptr_t)kev32.ident; + kevp->filter = kev32.filter; + kevp->flags = kev32.flags; + kevp->fflags = kev32.fflags; + kevp->data = (intptr_t)kev32.data; + kevp->udata = CAST_USER_ADDR_T(kev32.udata); } if (!error) *addrp += advance; @@ -917,13 +1129,16 @@ kevent_copyin(user_addr_t *addrp, struct kevent *kevp, struct proc *p) } static int -kevent_copyout(struct kevent *kevp, user_addr_t *addrp, struct proc *p) +kevent_copyout(struct kevent64_s *kevp, user_addr_t *addrp, struct proc *p, int iskev64) { int advance; int error; - if (IS_64BIT_PROCESS(p)) { - struct user_kevent kev64; + if (iskev64) { + advance = sizeof(struct kevent64_s); + error = copyout((caddr_t)kevp, *addrp, advance); + } else if (IS_64BIT_PROCESS(p)) { + struct user64_kevent kev64; /* * deal with the special case of a user-supplied @@ -940,12 +1155,16 @@ kevent_copyout(struct kevent *kevp, user_addr_t *addrp, struct proc *p) advance = sizeof(kev64); error = copyout((caddr_t)&kev64, *addrp, advance); } else { - /* - * compensate for legacy in-kernel kevent layout - * where the udata field is alredy 64-bit. - */ - advance = sizeof(*kevp) + sizeof(void *) - sizeof(user_addr_t); - error = copyout((caddr_t)kevp, *addrp, advance); + struct user32_kevent kev32; + + kev32.ident = (uint32_t)kevp->ident; + kev32.filter = kevp->filter; + kev32.flags = kevp->flags; + kev32.fflags = kevp->fflags; + kev32.data = (int32_t)kevp->data; + kev32.udata = kevp->udata; + advance = sizeof(kev32); + error = copyout((caddr_t)&kev32, *addrp, advance); } if (!error) *addrp += advance; @@ -963,7 +1182,7 @@ kevent_continue(__unused struct kqueue *kq, void *data, int error) { struct _kevent *cont_args; struct fileproc *fp; - register_t *retval; + int32_t *retval; int noutputs; int fd; struct proc *p = current_proc(); @@ -990,37 +1209,63 @@ kevent_continue(__unused struct kqueue *kq, void *data, int error) * kevent - [syscall] register and wait for kernel events * */ - int -kevent(struct proc *p, struct kevent_args *uap, register_t *retval) -{ - user_addr_t changelist = uap->changelist; - user_addr_t ueventlist = uap->eventlist; - int nchanges = uap->nchanges; - int nevents = uap->nevents; - int fd = uap->fd; +kevent(struct proc *p, struct kevent_args *uap, int32_t *retval) +{ + return kevent_internal(p, + 0, + uap->changelist, + uap->nchanges, + uap->eventlist, + uap->nevents, + uap->fd, + uap->timeout, + 0, /* no flags from old kevent() call */ + retval); +} + +int +kevent64(struct proc *p, struct kevent64_args *uap, int32_t *retval) +{ + return kevent_internal(p, + 1, + uap->changelist, + uap->nchanges, + uap->eventlist, + uap->nevents, + uap->fd, + uap->timeout, + uap->flags, + retval); +} +static int +kevent_internal(struct proc *p, int iskev64, user_addr_t changelist, + int nchanges, user_addr_t ueventlist, int nevents, int fd, + user_addr_t utimeout, __unused unsigned int flags, + int32_t *retval) +{ struct _kevent *cont_args; uthread_t ut; struct kqueue *kq; struct fileproc *fp; - struct kevent kev; + struct kevent64_s kev; int error, noutputs; struct timeval atv; /* convert timeout to absolute - if we have one */ - if (uap->timeout != USER_ADDR_NULL) { + if (utimeout != USER_ADDR_NULL) { struct timeval rtv; - if ( IS_64BIT_PROCESS(p) ) { - struct user_timespec ts; - error = copyin( uap->timeout, &ts, sizeof(ts) ); + if (IS_64BIT_PROCESS(p)) { + struct user64_timespec ts; + error = copyin(utimeout, &ts, sizeof(ts)); if ((ts.tv_sec & 0xFFFFFFFF00000000ull) != 0) error = EINVAL; else TIMESPEC_TO_TIMEVAL(&rtv, &ts); } else { - struct timespec ts; - error = copyin( uap->timeout, &ts, sizeof(ts) ); + struct user32_timespec ts; + error = copyin(utimeout, &ts, sizeof(ts)); TIMESPEC_TO_TIMEVAL(&rtv, &ts); } if (error) @@ -1037,11 +1282,25 @@ kevent(struct proc *p, struct kevent_args *uap, register_t *retval) /* get a usecount for the kq itself */ if ((error = fp_getfkq(p, fd, &fp, &kq)) != 0) return(error); + + /* each kq should only be used for events of one type */ + kqlock(kq); + if (kq->kq_state & (KQ_KEV32 | KQ_KEV64)) { + if (((iskev64 && (kq->kq_state & KQ_KEV32)) || + (!iskev64 && (kq->kq_state & KQ_KEV64)))) { + error = EINVAL; + kqunlock(kq); + goto errorout; + } + } else { + kq->kq_state |= (iskev64 ? KQ_KEV64 : KQ_KEV32); + } + kqunlock(kq); /* register all the change requests the user provided... */ noutputs = 0; while (nchanges > 0 && error == 0) { - error = kevent_copyin(&changelist, &kev, p); + error = kevent_copyin(&changelist, &kev, p, iskev64); if (error) break; @@ -1050,7 +1309,7 @@ kevent(struct proc *p, struct kevent_args *uap, register_t *retval) if ((error || (kev.flags & EV_RECEIPT)) && nevents > 0) { kev.flags = EV_ERROR; kev.data = error; - error = kevent_copyout(&kev, &ueventlist, p); + error = kevent_copyout(&kev, &ueventlist, p, iskev64); if (error == 0) { nevents--; noutputs++; @@ -1061,20 +1320,23 @@ kevent(struct proc *p, struct kevent_args *uap, register_t *retval) /* store the continuation/completion data in the uthread */ ut = (uthread_t)get_bsdthread_info(current_thread()); - cont_args = (struct _kevent *)&ut->uu_kevent.ss_kevent; + cont_args = &ut->uu_kevent.ss_kevent; cont_args->fp = fp; cont_args->fd = fd; cont_args->retval = retval; cont_args->eventlist = ueventlist; cont_args->eventcount = nevents; cont_args->eventout = noutputs; + cont_args->eventsize = iskev64; if (nevents > 0 && noutputs == 0 && error == 0) - error = kevent_scan(kq, kevent_callback, + error = kqueue_scan(kq, kevent_callback, kevent_continue, cont_args, &atv, p); kevent_continue(kq, cont_args, error); - /* NOTREACHED */ + +errorout: + fp_drop(p, fd, fp, 0); return error; } @@ -1087,18 +1349,22 @@ kevent(struct proc *p, struct kevent_args *uap, register_t *retval) */ static int -kevent_callback(__unused struct kqueue *kq, struct kevent *kevp, void *data) +kevent_callback(__unused struct kqueue *kq, struct kevent64_s *kevp, + void *data) { struct _kevent *cont_args; int error; + int iskev64; cont_args = (struct _kevent *)data; assert(cont_args->eventout < cont_args->eventcount); + iskev64 = cont_args->eventsize; + /* * Copy out the appropriate amount of event data for this user. */ - error = kevent_copyout(kevp, &cont_args->eventlist, current_proc()); + error = kevent_copyout(kevp, &cont_args->eventlist, current_proc(), iskev64); /* * If there isn't space for additional events, return @@ -1109,6 +1375,29 @@ kevent_callback(__unused struct kqueue *kq, struct kevent *kevp, void *data) return error; } +/* + * kevent_description - format a description of a kevent for diagnostic output + * + * called with a 128-byte string buffer + */ + +char * +kevent_description(struct kevent64_s *kevp, char *s, size_t n) +{ + snprintf(s, n, + "kevent=" + "{.ident=%#llx, .filter=%d, .flags=%#x, .fflags=%#x, .data=%#llx, .udata=%#llx, .ext[0]=%#llx, .ext[1]=%#llx}", + kevp->ident, + kevp->filter, + kevp->flags, + kevp->fflags, + kevp->data, + kevp->udata, + kevp->ext[0], + kevp->ext[1]); + return s; +} + /* * kevent_register - add a new event to a kqueue * @@ -1124,7 +1413,7 @@ kevent_callback(__unused struct kqueue *kq, struct kevent *kevp, void *data) */ int -kevent_register(struct kqueue *kq, struct kevent *kev, __unused struct proc *ctxp) +kevent_register(struct kqueue *kq, struct kevent64_s *kev, __unused struct proc *ctxp) { struct proc *p = kq->kq_p; struct filedesc *fdp = p->p_fd; @@ -1147,12 +1436,14 @@ kevent_register(struct kqueue *kq, struct kevent *kev, __unused struct proc *ctx return (EINVAL); } + restart: /* this iocount needs to be dropped if it is not registered */ - if (fops->f_isfd && (error = fp_lookup(p, kev->ident, &fp, 0)) != 0) + proc_fdlock(p); + if (fops->f_isfd && (error = fp_lookup(p, kev->ident, &fp, 1)) != 0) { + proc_fdunlock(p); return(error); + } - restart: - proc_fdlock(p); if (fops->f_isfd) { /* fd-based knotes are linked off the fd table */ if (kev->ident < (u_int)fdp->fd_knlistsize) { @@ -1197,7 +1488,7 @@ kevent_register(struct kqueue *kq, struct kevent *kev, __unused struct proc *ctx kev->data = 0; kn->kn_kevent = *kev; kn->kn_inuse = 1; /* for f_attach() */ - kn->kn_status = 0; + kn->kn_status = KN_ATTACHING; /* before anyone can find it */ if (kev->flags & EV_DISABLE) @@ -1217,14 +1508,24 @@ kevent_register(struct kqueue *kq, struct kevent *kev, __unused struct proc *ctx */ fp = NULL; + error = fops->f_attach(kn); + /* - * If the attach fails here, we can drop it knowing - * that nobody else has a reference to the knote. + * Anyone trying to drop this knote will yield to + * us, since KN_ATTACHING is set. */ - if ((error = fops->f_attach(kn)) != 0) { + kqlock(kq); + if (error != 0 || (kn->kn_status & KN_DROPPING)) { + if (error == 0) { + kn->kn_fop->f_detach(kn); + } + kn->kn_status |= KN_DROPPING; + kqunlock(kq); knote_drop(kn, p); goto done; } + kn->kn_status &= ~KN_ATTACHING; + kqunlock(kq); } else { proc_fdunlock(p); error = ENOENT; @@ -1258,26 +1559,44 @@ kevent_register(struct kqueue *kq, struct kevent *kev, __unused struct proc *ctx /* * If somebody is in the middle of dropping this * knote - go find/insert a new one. But we have - * wait for this one to go away first. + * wait for this one to go away first. Attaches + * running in parallel may also drop/modify the + * knote. Wait for those to complete as well and + * then start over if we encounter one. */ - if (!kqlock2knoteusewait(kq, kn)) - /* kqueue unlocked */ + if (!kqlock2knoteusewait(kq, kn)) { + /* kqueue, proc_fdlock both unlocked */ goto restart; + } /* * The user may change some filter values after the * initial EV_ADD, but doing so will not reset any * filter which have already been triggered. */ - kn->kn_sfflags = kev->fflags; - kn->kn_sdata = kev->data; kn->kn_kevent.udata = kev->udata; + if (!fops->f_isfd && fops->f_touch != NULL) + fops->f_touch(kn, kev, EVENT_REGISTER); + else { + kn->kn_sfflags = kev->fflags; + kn->kn_sdata = kev->data; + } + + /* We may need to push some info down to a networked filesystem */ + if (kn->kn_filter == EVFILT_VNODE) { + vnode_knoteupdate(kn); + } } - /* still have use ref on knote */ - if (kn->kn_fop->f_event(kn, 0)) { + + /* + * If the knote is not marked to always stay enqueued, + * invoke the filter routine to see if it should be + * enqueued now. + */ + if ((kn->kn_status & KN_STAYQUEUED) == 0 && kn->kn_fop->f_event(kn, 0)) { if (knoteuse2kqlock(kq, kn)) - knote_activate(kn); + knote_activate(kn, 1); kqunlock(kq); } else { knote_put(kn); @@ -1289,8 +1608,150 @@ kevent_register(struct kqueue *kq, struct kevent *kev, __unused struct proc *ctx return (error); } + +/* + * knote_process - process a triggered event + * + * Validate that it is really still a triggered event + * by calling the filter routines (if necessary). Hold + * a use reference on the knote to avoid it being detached. + * If it is still considered triggered, invoke the callback + * routine provided and move it to the provided inprocess + * queue. + * + * caller holds a reference on the kqueue. + * kqueue locked on entry and exit - but may be dropped + */ +static int +knote_process(struct knote *kn, + kevent_callback_t callback, + void *data, + struct kqtailq *inprocessp, + struct proc *p) +{ + struct kqueue *kq = kn->kn_kq; + struct kevent64_s kev; + int touch; + int result; + int error; + + /* + * Determine the kevent state we want to return. + * + * Some event states need to be revalidated before returning + * them, others we take the snapshot at the time the event + * was enqueued. + * + * Events with non-NULL f_touch operations must be touched. + * Triggered events must fill in kev for the callback. + * + * Convert our lock to a use-count and call the event's + * filter routine(s) to update. + */ + if ((kn->kn_status & KN_DISABLED) != 0) { + result = 0; + touch = 0; + } else { + int revalidate; + + result = 1; + revalidate = ((kn->kn_status & KN_STAYQUEUED) != 0 || + (kn->kn_flags & EV_ONESHOT) == 0); + touch = (!kn->kn_fop->f_isfd && kn->kn_fop->f_touch != NULL); + + if (revalidate || touch) { + if (revalidate) + knote_deactivate(kn); + + /* call the filter/touch routines with just a ref */ + if (kqlock2knoteuse(kq, kn)) { + + /* if we have to revalidate, call the filter */ + if (revalidate) { + result = kn->kn_fop->f_event(kn, 0); + } + + /* capture the kevent data - using touch if specified */ + if (result) { + if (touch) { + kn->kn_fop->f_touch(kn, &kev, EVENT_PROCESS); + } else { + kev = kn->kn_kevent; + } + } + /* convert back to a kqlock - bail if the knote went away */ + if (!knoteuse2kqlock(kq, kn)) { + return EJUSTRETURN; + } else if (result) { + /* if revalidated as alive, make sure it's active */ + if (!(kn->kn_status & KN_ACTIVE)) { + knote_activate(kn, 0); + } + } else if ((kn->kn_status & KN_STAYQUEUED) == 0) { + /* was already dequeued, so just bail on this one */ + return EJUSTRETURN; + } + } else { + return EJUSTRETURN; + } + } else { + kev = kn->kn_kevent; + } + } + + /* move knote onto inprocess queue */ + assert(kn->kn_tq == &kq->kq_head); + TAILQ_REMOVE(&kq->kq_head, kn, kn_tqe); + kn->kn_tq = inprocessp; + TAILQ_INSERT_TAIL(inprocessp, kn, kn_tqe); + + /* + * Determine how to dispatch the knote for future event handling. + * not-fired: just return (do not callout). + * One-shot: deactivate it. + * Clear: deactivate and clear the state. + * Dispatch: don't clear state, just deactivate it and mark it disabled. + * All others: just leave where they are. + */ + + if (result == 0) { + return EJUSTRETURN; + } else if (kn->kn_flags & EV_ONESHOT) { + knote_deactivate(kn); + if (kqlock2knotedrop(kq, kn)) { + kn->kn_fop->f_detach(kn); + knote_drop(kn, p); + } + } else if (kn->kn_flags & (EV_CLEAR | EV_DISPATCH)) { + knote_deactivate(kn); + /* manually clear knotes who weren't 'touch'ed */ + if ((touch == 0) && (kn->kn_flags & EV_CLEAR)) { + kn->kn_data = 0; + kn->kn_fflags = 0; + } + if (kn->kn_flags & EV_DISPATCH) + kn->kn_status |= KN_DISABLED; + kqunlock(kq); + } else { + /* + * leave on inprocess queue. We'll + * move all the remaining ones back + * the kq queue and wakeup any + * waiters when we are done. + */ + kqunlock(kq); + } + + /* callback to handle each event as we find it */ + error = (callback)(kq, &kev, data); + + kqlock(kq); + return error; +} + + /* - * kevent_process - process the triggered events in a kqueue + * kqueue_process - process the triggered events in a kqueue * * Walk the queued knotes and validate that they are * really still triggered events by calling the filter @@ -1301,20 +1762,22 @@ kevent_register(struct kqueue *kq, struct kevent *kev, __unused struct proc *ctx * * caller holds a reference on the kqueue. * kqueue locked on entry and exit - but may be dropped + * kqueue list locked (held for duration of call) */ static int -kevent_process(struct kqueue *kq, +kqueue_process(struct kqueue *kq, kevent_callback_t callback, void *data, int *countp, struct proc *p) { + struct kqtailq inprocess; struct knote *kn; - struct kevent kev; int nevents; int error; + TAILQ_INIT(&inprocess); restart: if (kq->kq_count == 0) { *countp = 0; @@ -1322,8 +1785,9 @@ kevent_process(struct kqueue *kq, } /* if someone else is processing the queue, wait */ - if (!TAILQ_EMPTY(&kq->kq_inprocess)) { - assert_wait(&kq->kq_inprocess, THREAD_UNINT); + if (hw_atomic_add(&kq->kq_nprocess, 1) != 1) { + hw_atomic_sub(&kq->kq_nprocess, 1); + wait_queue_assert_wait((wait_queue_t)kq->kq_wqs, &kq->kq_nprocess, THREAD_UNINT, 0); kq->kq_state |= KQ_PROCWAIT; kqunlock(kq); thread_block(THREAD_CONTINUE_NULL); @@ -1331,102 +1795,44 @@ kevent_process(struct kqueue *kq, goto restart; } + /* + * Clear any pre-posted status from previous runs, so we only + * detect events that occur during this run. + */ + wait_queue_sub_clearrefs(kq->kq_wqs); + + /* + * loop through the enqueued knotes, processing each one and + * revalidating those that need it. As they are processed, + * they get moved to the inprocess queue (so the loop can end). + */ error = 0; nevents = 0; + while (error == 0 && (kn = TAILQ_FIRST(&kq->kq_head)) != NULL) { - - /* - * Take note off the active queue. - * - * Non-EV_ONESHOT events must be re-validated. - * - * Convert our lock to a use-count and call the event's - * filter routine to update. - * - * If the event is valid, or triggered while the kq - * is unlocked, move to the inprocess queue for processing. - */ - - if ((kn->kn_flags & EV_ONESHOT) == 0) { - int result; - knote_deactivate(kn); - - if (kqlock2knoteuse(kq, kn)) { - - /* call the filter with just a ref */ - result = kn->kn_fop->f_event(kn, 0); - - /* if it's still alive, make sure it's active */ - if (knoteuse2kqlock(kq, kn) && result) { - /* may have been reactivated in filter*/ - if (!(kn->kn_status & KN_ACTIVE)) { - knote_activate(kn); - } - } else { - continue; - } - } else { - continue; - } - } - - /* knote is active: move onto inprocess queue */ - assert(kn->kn_tq == &kq->kq_head); - TAILQ_REMOVE(&kq->kq_head, kn, kn_tqe); - kn->kn_tq = &kq->kq_inprocess; - TAILQ_INSERT_TAIL(&kq->kq_inprocess, kn, kn_tqe); - - /* - * Got a valid triggered knote with the kqueue - * still locked. Snapshot the data, and determine - * how to dispatch the knote for future events. - */ - kev = kn->kn_kevent; - - /* now what happens to it? */ - if (kn->kn_flags & EV_ONESHOT) { - knote_deactivate(kn); - if (kqlock2knotedrop(kq, kn)) { - kn->kn_fop->f_detach(kn); - knote_drop(kn, p); - } - } else if (kn->kn_flags & EV_CLEAR) { - knote_deactivate(kn); - kn->kn_data = 0; - kn->kn_fflags = 0; - kqunlock(kq); - } else { - /* - * leave on in-process queue. We'll - * move all the remaining ones back - * the kq queue and wakeup any - * waiters when we are done. - */ - kqunlock(kq); - } - - /* callback to handle each event as we find it */ - error = (callback)(kq, &kev, data); - nevents++; - - kqlock(kq); + error = knote_process(kn, callback, data, &inprocess, p); + if (error == EJUSTRETURN) + error = 0; + else + nevents++; } /* * With the kqueue still locked, move any knotes - * remaining on the in-process queue back to the + * remaining on the inprocess queue back to the * kq's queue and wake up any waiters. */ - while ((kn = TAILQ_FIRST(&kq->kq_inprocess)) != NULL) { - assert(kn->kn_tq == &kq->kq_inprocess); - TAILQ_REMOVE(&kq->kq_inprocess, kn, kn_tqe); + while ((kn = TAILQ_FIRST(&inprocess)) != NULL) { + assert(kn->kn_tq == &inprocess); + TAILQ_REMOVE(&inprocess, kn, kn_tqe); kn->kn_tq = &kq->kq_head; TAILQ_INSERT_TAIL(&kq->kq_head, kn, kn_tqe); } + hw_atomic_sub(&kq->kq_nprocess, 1); if (kq->kq_state & KQ_PROCWAIT) { kq->kq_state &= ~KQ_PROCWAIT; - thread_wakeup(&kq->kq_inprocess); + wait_queue_wakeup_all((wait_queue_t)kq->kq_wqs, &kq->kq_nprocess, THREAD_AWAKENED); } *countp = nevents; @@ -1435,10 +1841,11 @@ kevent_process(struct kqueue *kq, static void -kevent_scan_continue(void *data, wait_result_t wait_result) +kqueue_scan_continue(void *data, wait_result_t wait_result) { - uthread_t ut = (uthread_t)get_bsdthread_info(current_thread()); - struct _kevent_scan * cont_args = &ut->uu_kevent.ss_kevent_scan; + thread_t self = current_thread(); + uthread_t ut = (uthread_t)get_bsdthread_info(self); + struct _kqueue_scan * cont_args = &ut->uu_kevent.ss_kqueue_scan; struct kqueue *kq = (struct kqueue *)data; int error; int count; @@ -1447,12 +1854,13 @@ kevent_scan_continue(void *data, wait_result_t wait_result) switch (wait_result) { case THREAD_AWAKENED: kqlock(kq); - error = kevent_process(kq, cont_args->call, cont_args, &count, current_proc()); + error = kqueue_process(kq, cont_args->call, cont_args, &count, current_proc()); if (error == 0 && count == 0) { - assert_wait_deadline(kq, THREAD_ABORTSAFE, cont_args->deadline); + wait_queue_assert_wait((wait_queue_t)kq->kq_wqs, KQ_EVENT, + THREAD_ABORTSAFE, cont_args->deadline); kq->kq_state |= KQ_SLEEP; kqunlock(kq); - thread_block_parameter(kevent_scan_continue, kq); + thread_block_parameter(kqueue_scan_continue, kq); /* NOTREACHED */ } kqunlock(kq); @@ -1475,7 +1883,7 @@ kevent_scan_continue(void *data, wait_result_t wait_result) /* - * kevent_scan - scan and wait for events in a kqueue + * kqueue_scan - scan and wait for events in a kqueue * * Process the triggered events in a kqueue. * @@ -1489,9 +1897,9 @@ kevent_scan_continue(void *data, wait_result_t wait_result) */ int -kevent_scan(struct kqueue *kq, +kqueue_scan(struct kqueue *kq, kevent_callback_t callback, - kevent_continue_t continuation, + kqueue_continue_t continuation, void *data, struct timeval *atvp, struct proc *p) @@ -1510,10 +1918,10 @@ kevent_scan(struct kqueue *kq, /* * Make a pass through the kq to find events already - * triggered. + * triggered. */ kqlock(kq); - error = kevent_process(kq, callback, data, &count, p); + error = kqueue_process(kq, callback, data, &count, p); if (error || count) break; /* lock still held */ @@ -1541,18 +1949,18 @@ kevent_scan(struct kqueue *kq, if (continuation) { uthread_t ut = (uthread_t)get_bsdthread_info(current_thread()); - struct _kevent_scan *cont_args = &ut->uu_kevent.ss_kevent_scan; + struct _kqueue_scan *cont_args = &ut->uu_kevent.ss_kqueue_scan; cont_args->call = callback; cont_args->cont = continuation; cont_args->deadline = deadline; cont_args->data = data; - cont = kevent_scan_continue; + cont = kqueue_scan_continue; } } /* go ahead and wait */ - assert_wait_deadline(kq, THREAD_ABORTSAFE, deadline); + wait_queue_assert_wait((wait_queue_t)kq->kq_wqs, KQ_EVENT, THREAD_ABORTSAFE, deadline); kq->kq_state |= KQ_SLEEP; kqunlock(kq); wait_result = thread_block_parameter(cont, kq); @@ -1612,22 +2020,69 @@ kqueue_ioctl(__unused struct fileproc *fp, /*ARGSUSED*/ static int -kqueue_select(struct fileproc *fp, int which, void *wql, vfs_context_t ctx) +kqueue_select(struct fileproc *fp, int which, void *wql, __unused vfs_context_t ctx) { struct kqueue *kq = (struct kqueue *)fp->f_data; - int retnum = 0; + int again; - if (which == FREAD) { - kqlock(kq); - if (kq->kq_count) { - retnum = 1; - } else { - selrecord(vfs_context_proc(ctx), &kq->kq_sel, wql); - kq->kq_state |= KQ_SEL; + if (which != FREAD) + return 0; + + kqlock(kq); + /* + * If this is the first pass, link the wait queue associated with the + * the kqueue onto the wait queue set for the select(). Normally we + * use selrecord() for this, but it uses the wait queue within the + * selinfo structure and we need to use the main one for the kqueue to + * catch events from KN_STAYQUEUED sources. So we do the linkage manually. + * (The select() call will unlink them when it ends). + */ + if (wql != NULL) { + thread_t cur_act = current_thread(); + struct uthread * ut = get_bsdthread_info(cur_act); + + kq->kq_state |= KQ_SEL; + wait_queue_link_noalloc((wait_queue_t)kq->kq_wqs, ut->uu_wqset, + (wait_queue_link_t)wql); + } + + retry: + again = 0; + if (kq->kq_count != 0) { + struct knote *kn; + + /* + * there is something queued - but it might be a + * KN_STAYQUEUED knote, which may or may not have + * any events pending. So, we have to walk the + * list of knotes to see, and peek at the stay- + * queued ones to be really sure. + */ + TAILQ_FOREACH(kn, &kq->kq_head, kn_tqe) { + int retnum = 0; + if ((kn->kn_status & KN_STAYQUEUED) == 0 || + (retnum = kn->kn_fop->f_peek(kn)) > 0) { + kqunlock(kq); + return 1; + } + if (retnum < 0) + again++; } + } + + /* + * If we stumbled across a knote that couldn't be peeked at, + * we have to drop the kq lock and try again. + */ + if (again > 0) { kqunlock(kq); + mutex_pause(0); + kqlock(kq); + goto retry; } - return (retnum); + + kqunlock(kq); + return 0; } /* @@ -1696,25 +2151,45 @@ kqueue_kqfilter(__unused struct fileproc *fp, struct knote *kn, __unused vfs_con } } +/* + * kqueue_drain - called when kq is closed + */ +/*ARGSUSED*/ +static int +kqueue_drain(struct fileproc *fp, __unused vfs_context_t ctx) +{ + struct kqueue *kq = (struct kqueue *)fp->f_fglob->fg_data; + kqlock(kq); + kqueue_wakeup(kq, 1); + kqunlock(kq); + return 0; +} + /*ARGSUSED*/ int kqueue_stat(struct fileproc *fp, void *ub, int isstat64, __unused vfs_context_t ctx) { - struct stat *sb = (struct stat *)0; /* warning avoidance ; protected by isstat64 */ - struct stat64 * sb64 = (struct stat64 *)0; /* warning avoidance ; protected by isstat64 */ struct kqueue *kq = (struct kqueue *)fp->f_data; if (isstat64 != 0) { - sb64 = (struct stat64 *)ub; + struct stat64 *sb64 = (struct stat64 *)ub; + bzero((void *)sb64, sizeof(*sb64)); sb64->st_size = kq->kq_count; - sb64->st_blksize = sizeof(struct kevent); + if (kq->kq_state & KQ_KEV64) + sb64->st_blksize = sizeof(struct kevent64_s); + else + sb64->st_blksize = sizeof(struct kevent); sb64->st_mode = S_IFIFO; } else { - sb = (struct stat *)ub; + struct stat *sb = (struct stat *)ub; + bzero((void *)sb, sizeof(*sb)); sb->st_size = kq->kq_count; - sb->st_blksize = sizeof(struct kevent); + if (kq->kq_state & KQ_KEV64) + sb->st_blksize = sizeof(struct kevent64_s); + else + sb->st_blksize = sizeof(struct kevent); sb->st_mode = S_IFIFO; } @@ -1725,18 +2200,13 @@ kqueue_stat(struct fileproc *fp, void *ub, int isstat64, __unused vfs_context_t * Called with the kqueue locked */ static void -kqueue_wakeup(struct kqueue *kq) +kqueue_wakeup(struct kqueue *kq, int closed) { - - if (kq->kq_state & KQ_SLEEP) { - kq->kq_state &= ~KQ_SLEEP; - thread_wakeup(kq); - } - if (kq->kq_state & KQ_SEL) { - kq->kq_state &= ~KQ_SEL; - selwakeup(&kq->kq_sel); + if ((kq->kq_state & (KQ_SLEEP | KQ_SEL)) != 0 || kq->kq_nprocess > 0) { + kq->kq_state &= ~(KQ_SLEEP | KQ_SEL); + wait_queue_wakeup_all((wait_queue_t)kq->kq_wqs, KQ_EVENT, + (closed) ? THREAD_INTERRUPTED : THREAD_AWAKENED); } - KNOTE(&kq->kq_sel.si_note, 0); } void @@ -1752,7 +2222,7 @@ klist_init(struct klist *list) * The object lock protects the list. It is assumed * that the filter/event routine for the object can * determine that the object is already locked (via - * the hind) and not deadlock itself. + * the hint) and not deadlock itself. * * The object lock should also hold off pending * detach/drop operations. But we'll prevent it here @@ -1775,7 +2245,7 @@ knote(struct klist *list, long hint) /* if its not going away and triggered */ if (knoteuse2kqlock(kq, kn) && result) - knote_activate(kn); + knote_activate(kn, 1); /* lock held again */ } kqunlock(kq); @@ -1805,6 +2275,51 @@ knote_detach(struct klist *list, struct knote *kn) return SLIST_EMPTY(list); } +/* + * For a given knote, link a provided wait queue directly with the kqueue. + * Wakeups will happen via recursive wait queue support. But nothing will move + * the knote to the active list at wakeup (nothing calls knote()). Instead, + * we permanently enqueue them here. + * + * kqueue and knote references are held by caller. + */ +int +knote_link_wait_queue(struct knote *kn, struct wait_queue *wq) +{ + struct kqueue *kq = kn->kn_kq; + kern_return_t kr; + + kr = wait_queue_link(wq, kq->kq_wqs); + if (kr == KERN_SUCCESS) { + kqlock(kq); + kn->kn_status |= KN_STAYQUEUED; + knote_enqueue(kn); + kqunlock(kq); + return 0; + } else { + return ENOMEM; + } +} + +/* + * Unlink the provided wait queue from the kqueue associated with a knote. + * Also remove it from the magic list of directly attached knotes. + * + * Note that the unlink may have already happened from the other side, so + * ignore any failures to unlink and just remove it from the kqueue list. + */ +void +knote_unlink_wait_queue(struct knote *kn, struct wait_queue *wq) +{ + struct kqueue *kq = kn->kn_kq; + + (void) wait_queue_unlink(wq, kq->kq_wqs); + kqlock(kq); + kn->kn_status &= ~KN_STAYQUEUED; + knote_dequeue(kn); + kqunlock(kq); +} + /* * remove all knotes referencing a specified fd * @@ -1902,6 +2417,7 @@ knote_drop(struct knote *kn, __unused struct proc *ctxp) struct proc *p = kq->kq_p; struct filedesc *fdp = p->p_fd; struct klist *list; + int needswakeup; proc_fdlock(p); if (kn->kn_fop->f_isfd) @@ -1912,11 +2428,13 @@ knote_drop(struct knote *kn, __unused struct proc *ctxp) SLIST_REMOVE(list, kn, knote, kn_link); kqlock(kq); knote_dequeue(kn); - if (kn->kn_status & KN_DROPWAIT) - thread_wakeup(&kn->kn_status); + needswakeup = (kn->kn_status & KN_USEWAIT); kqunlock(kq); proc_fdunlock(p); + if (needswakeup) + wait_queue_wakeup_all((wait_queue_t)kq->kq_wqs, &kn->kn_status, THREAD_AWAKENED); + if (kn->kn_fop->f_isfd) fp_drop(p, kn->kn_id, kn->kn_fp, 0); @@ -1925,14 +2443,18 @@ knote_drop(struct knote *kn, __unused struct proc *ctxp) /* called with kqueue lock held */ static void -knote_activate(struct knote *kn) +knote_activate(struct knote *kn, int propagate) { struct kqueue *kq = kn->kn_kq; kn->kn_status |= KN_ACTIVE; knote_enqueue(kn); - kqueue_wakeup(kq); - } + kqueue_wakeup(kq, 0); + + /* this is a real event: wake up the parent kq, too */ + if (propagate) + KNOTE(&kq->kq_sel.si_note, 0); +} /* called with kqueue lock held */ static void @@ -1946,10 +2468,10 @@ knote_deactivate(struct knote *kn) static void knote_enqueue(struct knote *kn) { - struct kqueue *kq = kn->kn_kq; - - if ((kn->kn_status & (KN_QUEUED | KN_DISABLED)) == 0) { + if ((kn->kn_status & (KN_QUEUED | KN_STAYQUEUED)) == KN_STAYQUEUED || + (kn->kn_status & (KN_QUEUED | KN_STAYQUEUED | KN_DISABLED)) == 0) { struct kqtailq *tq = kn->kn_tq; + struct kqueue *kq = kn->kn_kq; TAILQ_INSERT_TAIL(tq, kn, kn_tqe); kn->kn_status |= KN_QUEUED; @@ -1963,8 +2485,7 @@ knote_dequeue(struct knote *kn) { struct kqueue *kq = kn->kn_kq; - //assert((kn->kn_status & KN_DISABLED) == 0); - if ((kn->kn_status & KN_QUEUED) == KN_QUEUED) { + if ((kn->kn_status & (KN_QUEUED | KN_STAYQUEUED)) == KN_QUEUED) { struct kqtailq *tq = kn->kn_tq; TAILQ_REMOVE(tq, kn, kn_tqe); @@ -2030,25 +2551,18 @@ struct pr_usrreqs event_usrreqs = { struct protosw eventsw[] = { { - SOCK_RAW, &systemdomain, SYSPROTO_EVENT, PR_ATOMIC, - 0, 0, 0, 0, - 0, - 0, 0, 0, 0, -#if __APPLE__ - 0, -#endif - &event_usrreqs, - 0, 0, 0, -#if __APPLE__ - {0, 0}, 0, {0} -#endif + .pr_type = SOCK_RAW, + .pr_domain = &systemdomain, + .pr_protocol = SYSPROTO_EVENT, + .pr_flags = PR_ATOMIC, + .pr_usrreqs = &event_usrreqs, } }; static struct kern_event_head kern_event_head; -static u_long static_event_id = 0; +static u_int32_t static_event_id = 0; struct domain *sysdom = &systemdomain; static lck_mtx_t *sys_mtx; @@ -2127,14 +2641,14 @@ errno_t kev_vendor_code_find( if (strlen(string) >= KEV_VENDOR_CODE_MAX_STR_LEN) { return EINVAL; } - return mbuf_tag_id_find_internal(string, out_vendor_code, 1); + return net_str_id_find_internal(string, out_vendor_code, NSI_VENDOR_CODE, 1); } errno_t kev_msg_post(struct kev_msg *event_msg) { mbuf_tag_id_t min_vendor, max_vendor; - mbuf_tag_id_first_last(&min_vendor, &max_vendor); + net_str_id_first_last(&min_vendor, &max_vendor, NSI_VENDOR_CODE); if (event_msg == NULL) return EINVAL; @@ -2156,7 +2670,7 @@ int kev_post_msg(struct kev_msg *event_msg) struct kern_event_pcb *ev_pcb; struct kern_event_msg *ev; char *tmp; - unsigned long total_size; + u_int32_t total_size; int i; /* Verify the message is small enough to fit in one mbuf w/o cluster */ @@ -2243,7 +2757,7 @@ kev_control(struct socket *so, struct kev_request *kev_req = (struct kev_request *) data; struct kern_event_pcb *ev_pcb; struct kev_vendor_code *kev_vendor; - u_long *id_value = (u_long *) data; + u_int32_t *id_value = (u_int32_t *) data; switch (cmd) { @@ -2271,9 +2785,9 @@ kev_control(struct socket *so, /* Make sure string is NULL terminated */ kev_vendor->vendor_string[KEV_VENDOR_CODE_MAX_STR_LEN-1] = 0; - - return mbuf_tag_id_find_internal(kev_vendor->vendor_string, - &kev_vendor->vendor_code, 0); + + return net_str_id_find_internal(kev_vendor->vendor_string, + &kev_vendor->vendor_code, NSI_VENDOR_CODE, 0); default: return ENOTSUP; @@ -2295,7 +2809,10 @@ fill_kqueueinfo(struct kqueue *kq, struct kqueue_info * kinfo) st = &kinfo->kq_stat; st->vst_size = kq->kq_count; - st->vst_blksize = sizeof(struct kevent); + if (kq->kq_state & KQ_KEV64) + st->vst_blksize = sizeof(struct kevent64_s); + else + st->vst_blksize = sizeof(struct kevent); st->vst_mode = S_IFIFO; if (kq->kq_state & KQ_SEL) kinfo->kq_state |= PROC_KQUEUE_SELECT; diff --git a/bsd/kern/kern_exec.c b/bsd/kern/kern_exec.c index 71dd14c12..4f17498f4 100644 --- a/bsd/kern/kern_exec.c +++ b/bsd/kern/kern_exec.c @@ -110,18 +110,25 @@ #include #include #include +#include -#include +#include #include #include +#include #include +#include #include #include #include #include +#include /* thread_wakeup() */ +#include +#include + #if CONFIG_MACF #include #include @@ -132,6 +139,7 @@ #include #include + #if CONFIG_DTRACE /* Do not include dtrace.h, it redefines kmem_[alloc/free] */ extern void (*dtrace_fasttrap_exec_ptr)(proc_t); @@ -144,7 +152,9 @@ extern void dtrace_lazy_dofs_destroy(proc_t); /* support for child creation in exec after vfork */ thread_t fork_create_child(task_t parent_task, proc_t child_proc, int inherit_memory, int is64bit); void vfork_exit(proc_t p, int rv); -int setsigvec(proc_t, int, struct __user_sigaction *); +int setsigvec(proc_t, thread_t, int, struct __kern_sigaction *, boolean_t in_sigstart); +void workqueue_exit(struct proc *); + /* * Mach things for which prototypes are unavailable from Mach headers @@ -167,6 +177,7 @@ extern struct savearea *get_user_regs(thread_t); #include #include #include +#include #include #include #include @@ -196,7 +207,7 @@ extern vm_map_t bsd_pageable_map; extern struct fileops vnops; #define ROUND_PTR(type, addr) \ - (type *)( ( (unsigned)(addr) + 16 - 1) \ + (type *)( ( (uintptr_t)(addr) + 16 - 1) \ & ~(16 - 1) ) struct image_params; /* Forward */ @@ -214,10 +225,11 @@ static kern_return_t create_unix_stack(vm_map_t map, user_addr_t user_stack, int customstack, proc_t p); static int copyoutptr(user_addr_t ua, user_addr_t ptr, int ptr_size); static void exec_resettextvp(proc_t, struct image_params *); +static int check_for_signature(proc_t, struct image_params *); /* We don't want this one exported */ __private_extern__ -int open1(vfs_context_t, struct nameidata *, int, struct vnode_attr *, register_t *); +int open1(vfs_context_t, struct nameidata *, int, struct vnode_attr *, int32_t *); /* * exec_add_string @@ -245,7 +257,7 @@ exec_add_string(struct image_params *imgp, user_addr_t str) error = E2BIG; break; } - if (IS_UIO_SYS_SPACE(imgp->ip_seg)) { + if (!UIO_SEG_IS_USER_SPACE(imgp->ip_seg)) { char *kstr = CAST_DOWN(char *,str); /* SAFE */ error = copystr(kstr, imgp->ip_strendp, imgp->ip_strspace, &len); } else { @@ -310,7 +322,7 @@ exec_save_path(struct image_params *imgp, user_addr_t path, int seg) case UIO_USERSPACE64: /* Same for copyin()... */ error = copyinstr(path, imgp->ip_strings, len, &len); break; - case UIO_SYSSPACE32: + case UIO_SYSSPACE: error = copystr(kpath, imgp->ip_strings, len, &len); break; default: @@ -401,11 +413,12 @@ exec_powerpc32_imgact(struct image_params *imgp) /* * provide a replacement string for p->p_comm; we have to use an - * an alternate buffer for this, rather than replacing it directly, + * alternate buffer for this, rather than replacing it directly, * since the exec may fail and return to the parent. In that case, * we would have erroneously changed the parent p->p_comm instead. */ - strlcpy(imgp->ip_p_comm, imgp->ip_ndp->ni_cnd.cn_nameptr, MAXCOMLEN); + strlcpy(imgp->ip_p_comm, imgp->ip_ndp->ni_cnd.cn_nameptr, MAXCOMLEN+1); + /* +1 to allow MAXCOMLEN characters to be copied */ return (-3); } @@ -506,7 +519,7 @@ exec_shell_imgact(struct image_params *imgp) *interp = '\0'; exec_save_path(imgp, CAST_USER_ADDR_T(imgp->ip_interp_name), - UIO_SYSSPACE32); + UIO_SYSSPACE); ihp = &vdata[2]; while (ihp < line_endp) { @@ -661,7 +674,7 @@ exec_fat_imgact(struct image_params *imgp) /* Read the Mach-O header out of fat_arch */ error = vn_rdwr(UIO_READ, imgp->ip_vp, imgp->ip_vdata, PAGE_SIZE, fat_arch.offset, - UIO_SYSSPACE32, (IO_UNIT|IO_NODELOCKED), + UIO_SYSSPACE, (IO_UNIT|IO_NODELOCKED), cred, &resid, p); if (error) { goto bad; @@ -722,6 +735,7 @@ exec_mach_imgact(struct image_params *imgp) load_return_t lret; load_result_t load_result; struct _posix_spawnattr *psa = NULL; + int spawn = (imgp->ip_flags & IMGPF_SPAWN); /* * make sure it's a Mach-O 1.0 or Mach-O 2.0 binary; the difference @@ -750,6 +764,12 @@ exec_mach_imgact(struct image_params *imgp) thread = current_thread(); uthread = get_bsdthread_info(thread); + /* + * Save off the vfexec state up front; we have to do this, because + * we need to know if we were in this state initally subsequent to + * creating the backing task, thread, and uthread for the child + * process (from the vfs_context_t from in img_parms). + */ if (uthread->uu_flag & UT_VFORK) vfexec = 1; /* Mark in exec */ @@ -793,6 +813,11 @@ exec_mach_imgact(struct image_params *imgp) if (error) goto bad; + AUDIT_ARG(argv, imgp->ip_argv, imgp->ip_argc, + imgp->ip_strendargvp - imgp->ip_argv); + AUDIT_ARG(envv, imgp->ip_strendargvp, imgp->ip_envc, + imgp->ip_strendp - imgp->ip_strendargvp); + /* * Hack for binary compatability; put three NULs on the end of the * string area, and round it up to the next word boundary. This @@ -818,14 +843,24 @@ exec_mach_imgact(struct image_params *imgp) } #endif /* IMGPF_POWERPC */ - if (vfexec) { - imgp->ip_vfork_thread = fork_create_child(task, p, FALSE, (imgp->ip_flags & IMGPF_IS_64BIT)); - if (imgp->ip_vfork_thread == NULL) { - error = ENOMEM; - goto bad; + /* + * We are being called to activate an image subsequent to a vfork() + * operation; in this case, we know that our task, thread, and + * uthread are actualy those of our parent, and our proc, which we + * obtained indirectly from the image_params vfs_context_t, is the + * new child process. + */ + if (vfexec || spawn) { + if (vfexec) { + imgp->ip_new_thread = fork_create_child(task, p, FALSE, (imgp->ip_flags & IMGPF_IS_64BIT)); + if (imgp->ip_new_thread == NULL) { + error = ENOMEM; + goto bad; + } } + /* reset local idea of thread, uthread, task */ - thread = imgp->ip_vfork_thread; + thread = imgp->ip_new_thread; uthread = get_bsdthread_info(thread); task = new_task = get_threadtask(thread); map = get_task_map(task); @@ -837,32 +872,23 @@ exec_mach_imgact(struct image_params *imgp) * We set these flags here; this is OK, since if we fail after * this point, we have already destroyed the parent process anyway. */ + task_set_dyld_info(task, MACH_VM_MIN_ADDRESS, 0); if (imgp->ip_flags & IMGPF_IS_64BIT) { task_set_64bit(task, TRUE); - OSBitOrAtomic(P_LP64, (UInt32 *)&p->p_flag); + OSBitOrAtomic(P_LP64, &p->p_flag); } else { task_set_64bit(task, FALSE); - OSBitAndAtomic(~((uint32_t)P_LP64), (UInt32 *)&p->p_flag); + OSBitAndAtomic(~((uint32_t)P_LP64), &p->p_flag); } /* * Load the Mach-O file. - */ - - /* + * * NOTE: An error after this point indicates we have potentially * destroyed or overwrote some process state while attempting an * execve() following a vfork(), which is an unrecoverable condition. */ - /* - * We reset the task to 64-bit (or not) here. It may have picked up - * a new map, and we need that to reflect its true 64-bit nature. - */ - - task_set_64bit(task, - ((imgp->ip_flags & IMGPF_IS_64BIT) == IMGPF_IS_64BIT)); - /* * Actually load the image file we previously decided to load. */ @@ -916,20 +942,14 @@ exec_mach_imgact(struct image_params *imgp) */ error = exec_handle_sugid(imgp); - if (!vfexec && (p->p_lflag & P_LTRACED)) + /* Make sure we won't interrupt ourself signalling a partial process */ + if (!vfexec && !spawn && (p->p_lflag & P_LTRACED)) psignal(p, SIGTRAP); if (error) { goto badtoolate; } -#if CONFIG_MACF - /* Determine if the map will allow VM_PROT_COPY */ - error = mac_proc_check_map_prot_copy_allow(p); - vm_map_set_prot_copy_allow(get_task_map(task), - error ? FALSE : TRUE); -#endif - if (load_result.unixproc && create_unix_stack(get_task_map(task), load_result.user_stack, @@ -939,7 +959,15 @@ exec_mach_imgact(struct image_params *imgp) goto badtoolate; } - if (vfexec) { + /* + * There is no continuing workq context during + * vfork exec. So no need to reset then. Otherwise + * clear the workqueue context. + */ + if (vfexec == 0 && spawn == 0) { + (void)workqueue_exit(p); + } + if (vfexec || spawn) { old_map = vm_map_switch(get_task_map(task)); } @@ -953,7 +981,7 @@ exec_mach_imgact(struct image_params *imgp) ap = p->user_stack; error = exec_copyout_strings(imgp, &ap); if (error) { - if (vfexec) + if (vfexec || spawn) vm_map_switch(old_map); goto badtoolate; } @@ -973,13 +1001,15 @@ exec_mach_imgact(struct image_params *imgp) error = suword(ap, load_result.mach_header); } if (error) { - if (vfexec) + if (vfexec || spawn) vm_map_switch(old_map); goto badtoolate; } + task_set_dyld_info(task, load_result.all_image_info_addr, + load_result.all_image_info_size); } - if (vfexec) { + if (vfexec || spawn) { vm_map_switch(old_map); } /* Set the entry point */ @@ -1028,6 +1058,9 @@ exec_mach_imgact(struct image_params *imgp) p->p_comm[imgp->ip_ndp->ni_cnd.cn_namelen] = '\0'; } + memcpy(&p->p_uuid[0], &load_result.uuid[0], sizeof(p->p_uuid)); + +// dtrace code cleanup needed #if CONFIG_DTRACE /* * Invalidate any predicate evaluation already cached for this thread by DTrace. @@ -1072,11 +1105,11 @@ exec_mach_imgact(struct image_params *imgp) */ kdbg_trace_string(p, &dbg_arg1, &dbg_arg2, &dbg_arg3, &dbg_arg4); - if (vfexec) { + if (vfexec || spawn) { KERNEL_DEBUG_CONSTANT1((TRACEDBG_CODE(DBG_TRACE_DATA, 2)) | DBG_FUNC_NONE, - p->p_pid ,0,0,0, (unsigned int)thread); + p->p_pid ,0,0,0, (uintptr_t)thread_tid(thread)); KERNEL_DEBUG_CONSTANT1((TRACEDBG_CODE(DBG_TRACE_STRING, 2)) | DBG_FUNC_NONE, - dbg_arg1, dbg_arg2, dbg_arg3, dbg_arg4, (unsigned int)thread); + dbg_arg1, dbg_arg2, dbg_arg3, dbg_arg4, (uintptr_t)thread_tid(thread)); } else { KERNEL_DEBUG_CONSTANT((TRACEDBG_CODE(DBG_TRACE_DATA, 2)) | DBG_FUNC_NONE, p->p_pid ,0,0,0,0); @@ -1092,11 +1125,11 @@ exec_mach_imgact(struct image_params *imgp) * from the process. */ if (((imgp->ip_flags & IMGPF_POWERPC) != 0)) - OSBitOrAtomic(P_TRANSLATED, (UInt32 *)&p->p_flag); + OSBitOrAtomic(P_TRANSLATED, &p->p_flag); else #endif /* IMGPF_POWERPC */ - OSBitAndAtomic(~((uint32_t)P_TRANSLATED), (UInt32 *)&p->p_flag); - OSBitAndAtomic(~((uint32_t)P_AFFINITY), (UInt32 *)&p->p_flag); + OSBitAndAtomic(~((uint32_t)P_TRANSLATED), &p->p_flag); + OSBitAndAtomic(~((uint32_t)P_AFFINITY), &p->p_flag); /* * If posix_spawned with the START_SUSPENDED flag, stop the @@ -1114,9 +1147,10 @@ exec_mach_imgact(struct image_params *imgp) /* * mark as execed, wakeup the process that vforked (if any) and tell - * it that it now has it's own resources back + * it that it now has its own resources back */ - OSBitOrAtomic(P_EXEC, (UInt32 *)&p->p_flag); + OSBitOrAtomic(P_EXEC, &p->p_flag); + proc_resetregister(p); if (p->p_pptr && (p->p_lflag & P_LPPWAIT)) { proc_lock(p); p->p_lflag &= ~P_LPPWAIT; @@ -1124,14 +1158,19 @@ exec_mach_imgact(struct image_params *imgp) wakeup((caddr_t)p->p_pptr); } + /* + * Pay for our earlier safety; deliver the delayed signals from + * the incomplete vfexec process now that it's complete. + */ if (vfexec && (p->p_lflag & P_LTRACED)) { psignal_vfork(p, new_task, thread, SIGTRAP); } badtoolate: +if (!spawn) proc_knote(p, NOTE_EXEC); - if (vfexec) { + if (vfexec || spawn) { task_deallocate(new_task); thread_deallocate(thread); if (error) @@ -1229,7 +1268,9 @@ exec_activate_image(struct image_params *imgp) imgp->ip_ndp = &nd; /* successful namei(); call nameidone() later */ imgp->ip_vp = nd.ni_vp; /* if set, need to vnode_put() at some point */ - proc_transstart(p, 0); + error = proc_transstart(p, 0); + if (error) + goto bad_notrans; error = exec_check_permissions(imgp); if (error) @@ -1242,7 +1283,7 @@ exec_activate_image(struct image_params *imgp) } error = vn_rdwr(UIO_READ, imgp->ip_vp, imgp->ip_vdata, PAGE_SIZE, 0, - UIO_SYSSPACE32, IO_NODELOCKED, + UIO_SYSSPACE, IO_NODELOCKED, vfs_context_ucred(imgp->ip_vfs_context), &resid, vfs_context_proc(imgp->ip_vfs_context)); if (error) @@ -1279,13 +1320,13 @@ exec_activate_image(struct image_params *imgp) break; } mac_vnode_label_copy(imgp->ip_vp->v_label, - imgp->ip_scriptlabelp); + imgp->ip_scriptlabelp); #endif vnode_put(imgp->ip_vp); imgp->ip_vp = NULL; /* already put */ - nd.ni_cnd.cn_nameiop = LOOKUP; - nd.ni_cnd.cn_flags = (nd.ni_cnd.cn_flags & HASBUF) | - (FOLLOW | LOCKLEAF); + + NDINIT(&nd, LOOKUP, (nd.ni_cnd.cn_flags & HASBUF) | (FOLLOW | LOCKLEAF), + UIO_SYSSPACE, CAST_USER_ADDR_T(imgp->ip_interp_name), imgp->ip_vfs_context); #ifdef IMGPF_POWERPC /* @@ -1297,8 +1338,6 @@ exec_activate_image(struct image_params *imgp) nd.ni_cnd.cn_flags &= ~FOLLOW; #endif /* IMGPF_POWERPC */ - nd.ni_segflg = UIO_SYSSPACE32; - nd.ni_dirp = CAST_USER_ADDR_T(imgp->ip_interp_name); proc_transend(p, 0); goto again; @@ -1333,16 +1372,18 @@ exec_activate_image(struct image_params *imgp) * exec_handle_port_actions * * Description: Go through the _posix_port_actions_t contents, - * calling task_set_special_port and task_set_exception_ports - * for the current task. + * calling task_set_special_port, task_set_exception_ports + * and/or audit_session_spawnjoin for the current task. * * Parameters: struct image_params * Image parameter block + * short psa_flags posix spawn attribute flags * * Returns: 0 Success * KERN_FAILURE Failure + * ENOTSUP Illegal posix_spawn attr flag was set */ static int -exec_handle_port_actions(struct image_params *imgp) +exec_handle_port_actions(struct image_params *imgp, short psa_flags) { _posix_spawn_port_actions_t pacts = imgp->ip_px_spa; proc_t p = vfs_context_proc(imgp->ip_vfs_context); @@ -1356,7 +1397,7 @@ exec_handle_port_actions(struct image_params *imgp) act = &pacts->pspa_actions[i]; ret = ipc_object_copyin(get_task_ipcspace(current_task()), - (mach_port_name_t) act->new_port, + CAST_MACH_PORT_TO_NAME(act->new_port), MACH_MSG_TYPE_COPY_SEND, (ipc_object_t *) &port); @@ -1365,17 +1406,29 @@ exec_handle_port_actions(struct image_params *imgp) switch (act->port_type) { case PSPA_SPECIAL: + /* Only allowed when not under vfork */ + if (!(psa_flags & POSIX_SPAWN_SETEXEC)) + return ENOTSUP; ret = task_set_special_port(task, act->which, port); break; case PSPA_EXCEPTION: + /* Only allowed when not under vfork */ + if (!(psa_flags & POSIX_SPAWN_SETEXEC)) + return ENOTSUP; ret = task_set_exception_ports(task, act->mask, port, act->behavior, act->flavor); break; +#if CONFIG_AUDIT + case PSPA_AU_SESSION: + ret = audit_session_spawnjoin(p, + port); + break; +#endif default: ret = KERN_FAILURE; } @@ -1414,7 +1467,7 @@ exec_handle_file_actions(struct image_params *imgp) int action; proc_t p = vfs_context_proc(imgp->ip_vfs_context); _posix_spawn_file_actions_t px_sfap = imgp->ip_px_sfa; - register_t ival[2]; /* dummy retval for system calls) */ + int ival[2]; /* dummy retval for system calls) */ for (action = 0; action < px_sfap->psfa_act_count; action++) { _psfa_action_t *psfa = &px_sfap->psfa_act_acts[ action]; @@ -1550,43 +1603,51 @@ exec_handle_file_actions(struct image_params *imgp) * exec_activate_image:??? * mac_execve_enter:??? * - * TODO: More gracefully handle failures after vfork - * Expect to need __mac_posix_spawn() at some point... + * TODO: Expect to need __mac_posix_spawn() at some point... * Handle posix_spawnattr_t * Handle posix_spawn_file_actions_t */ int -posix_spawn(proc_t ap, struct posix_spawn_args *uap, register_t *retval) +posix_spawn(proc_t ap, struct posix_spawn_args *uap, int32_t *retval) { proc_t p = ap; /* quiet bogus GCC vfork() warning */ user_addr_t pid = uap->pid; - register_t ival[2]; /* dummy retval for vfork() */ - struct image_params image_params, *imgp; - struct vnode_attr va; - struct vnode_attr origva; + int ival[2]; /* dummy retval for setpgid() */ + char *bufp = NULL; + struct image_params *imgp; + struct vnode_attr *vap; + struct vnode_attr *origvap; struct uthread *uthread = 0; /* compiler complains if not set to 0*/ int error, sig; - task_t task; - int numthreads; char alt_p_comm[sizeof(p->p_comm)] = {0}; /* for PowerPC */ int is_64 = IS_64BIT_PROCESS(p); - int undo_vfork = 0; struct vfs_context context; struct user__posix_spawn_args_desc px_args; struct _posix_spawnattr px_sa; _posix_spawn_file_actions_t px_sfap = NULL; _posix_spawn_port_actions_t px_spap = NULL; - struct __user_sigaction vec; + struct __kern_sigaction vec; + boolean_t spawn_no_exec = FALSE; - imgp = &image_params; + /* + * Allocate a big chunk for locals instead of using stack since these + * structures a pretty big. + */ + MALLOC(bufp, char *, (sizeof(*imgp) + sizeof(*vap) + sizeof(*origvap)), M_TEMP, M_WAITOK | M_ZERO); + imgp = (struct image_params *) bufp; + if (bufp == NULL) { + error = ENOMEM; + goto bad; + } + vap = (struct vnode_attr *) (bufp + sizeof(*imgp)); + origvap = (struct vnode_attr *) (bufp + sizeof(*imgp) + sizeof(*vap)); /* Initialize the common data in the image_params structure */ - bzero(imgp, sizeof(*imgp)); imgp->ip_user_fname = uap->path; imgp->ip_user_argv = uap->argv; imgp->ip_user_envv = uap->envp; - imgp->ip_vattr = &va; - imgp->ip_origvattr = &origva; + imgp->ip_vattr = vap; + imgp->ip_origvattr = origvap; imgp->ip_vfs_context = &context; imgp->ip_flags = (is_64 ? IMGPF_WAS_64BIT : IMGPF_NONE); imgp->ip_p_comm = alt_p_comm; /* for PowerPC */ @@ -1596,7 +1657,7 @@ posix_spawn(proc_t ap, struct posix_spawn_args *uap, register_t *retval) if(is_64) { error = copyin(uap->adesc, &px_args, sizeof(px_args)); } else { - struct _posix_spawn_args_desc px_args32; + struct user32__posix_spawn_args_desc px_args32; error = copyin(uap->adesc, &px_args32, sizeof(px_args32)); @@ -1665,20 +1726,49 @@ posix_spawn(proc_t ap, struct posix_spawn_args *uap, register_t *retval) } } + /* set uthread to parent */ + uthread = get_bsdthread_info(current_thread()); + + /* + * ; this does not result in a behaviour change + * relative to Leopard, so there should not be any existing code + * which depends on it. + */ + if (uthread->uu_flag & UT_VFORK) { + error = EINVAL; + goto bad; + } + + /* + * If we don't have the extention flag that turns "posix_spawn()" + * into "execve() with options", then we will be creating a new + * process which does not inherit memory from the parent process, + * which is one of the most expensive things about using fork() + * and execve(). + */ if (imgp->ip_px_sa == NULL || !(px_sa.psa_flags & POSIX_SPAWN_SETEXEC)){ - if ((error = vfork(p, NULL, ival)) != 0) + if ((error = fork1(p, &imgp->ip_new_thread, PROC_CREATE_SPAWN)) != 0) goto bad; - undo_vfork = 1; + imgp->ip_flags |= IMGPF_SPAWN; /* spawn w/o exec */ + spawn_no_exec = TRUE; /* used in later tests */ } - /* "reenter the kernel" on a new vfork()'ed process */ - uthread = get_bsdthread_info(current_thread()); - if (undo_vfork) - p = uthread->uu_proc; + if (spawn_no_exec) + p = (proc_t)get_bsdthreadtask_info(imgp->ip_new_thread); + + /* By default, the thread everyone plays with is the parent */ context.vc_thread = current_thread(); context.vc_ucred = p->p_ucred; /* XXX must NOT be kauth_cred_get() */ + /* + * However, if we're not in the setexec case, redirect the context + * to the newly created process instead + */ + if (spawn_no_exec) + context.vc_thread = imgp->ip_new_thread; + + /* * Post fdcopy(), pre exec_handle_sugid() - this is where we want * to handle the file_actions. Since vfork() also ends up setting @@ -1693,18 +1783,20 @@ posix_spawn(proc_t ap, struct posix_spawn_args *uap, register_t *retval) /* Has spawn port actions? */ if (imgp->ip_px_spa != NULL) { - /* Only allowed when not under vfork */ - if (!(px_sa.psa_flags & POSIX_SPAWN_SETEXEC)) { - error = ENOTSUP; - goto bad; - } - if((error = exec_handle_port_actions(imgp)) != 0) + /* + * The check for the POSIX_SPAWN_SETEXEC flag is done in + * exec_handle_port_actions(). + */ + if((error = exec_handle_port_actions(imgp, px_sa.psa_flags)) != 0) goto bad; } /* Has spawn attr? */ if (imgp->ip_px_sa != NULL) { - /* Set the process group ID of the child process */ + /* + * Set the process group ID of the child process; this has + * to happen before the image activation. + */ if (px_sa.psa_flags & POSIX_SPAWN_SETPGROUP) { struct setpgid_args spga; spga.pid = p->p_pid; @@ -1716,11 +1808,15 @@ posix_spawn(proc_t ap, struct posix_spawn_args *uap, register_t *retval) if((error = setpgid(p, &spga, ival)) != 0) goto bad; } + /* * Reset UID/GID to parent's RUID/RGID; This works only * because the operation occurs *after* the vfork() and * before the call to exec_handle_sugid() by the image - * activator called from exec_activate_image(). + * activator called from exec_activate_image(). POSIX + * requires that any setuid/setgid bits on the process + * image will take precedence over the spawn attributes + * (re)setting them. * * The use of p_ucred is safe, since we are acting on the * new process, and it has no threads other than the one @@ -1732,16 +1828,71 @@ posix_spawn(proc_t ap, struct posix_spawn_args *uap, register_t *retval) if (my_new_cred != my_cred) p->p_ucred = my_new_cred; } + } + + /* + * Clear transition flag so we won't hang if exec_activate_image() causes + * an automount (and launchd does a proc sysctl to service it). + * + * , . + */ + if (spawn_no_exec) { + proc_transend(p, 0); + } + +#if MAC_SPAWN /* XXX */ + if (uap->mac_p != USER_ADDR_NULL) { + error = mac_execve_enter(uap->mac_p, imgp); + if (error) + goto bad; + } +#endif + + /* + * Activate the image + */ + error = exec_activate_image(imgp); + + /* Image not claimed by any activator? */ + if (error == -1) + error = ENOEXEC; + + /* + * If we have a spawn attr, and it contains signal related flags, + * the we need to process them in the "context" of the new child + * process, so we have to process it following image activation, + * prior to making the thread runnable in user space. This is + * necessitated by some signal information being per-thread rather + * than per-process, and we don't have the new allocation in hand + * until after the image is activated. + */ + if (!error && imgp->ip_px_sa != NULL) { + thread_t child_thread = current_thread(); + uthread_t child_uthread = uthread; + + /* + * If we created a new child thread, then the thread and + * uthread are different than the current ones; otherwise, + * we leave them, since we are in the exec case instead. + */ + if (spawn_no_exec) { + child_thread = imgp->ip_new_thread; + child_uthread = get_bsdthread_info(child_thread); + } + /* * Mask a list of signals, instead of them being unmasked, if * they were unmasked in the parent; note that some signals * are not maskable. */ if (px_sa.psa_flags & POSIX_SPAWN_SETSIGMASK) - uthread->uu_sigmask = (px_sa.psa_sigmask & ~sigcantmask); + child_uthread->uu_sigmask = (px_sa.psa_sigmask & ~sigcantmask); /* * Default a list of signals instead of ignoring them, if - * they were ignored in the parent. + * they were ignored in the parent. Note that we pass + * spawn_no_exec to setsigvec() to indicate that we called + * fork1() and therefore do not need to call proc_signalstart() + * internally. */ if (px_sa.psa_flags & POSIX_SPAWN_SETSIGDEF) { vec.sa_handler = SIG_DFL; @@ -1749,107 +1900,170 @@ posix_spawn(proc_t ap, struct posix_spawn_args *uap, register_t *retval) vec.sa_mask = 0; vec.sa_flags = 0; for (sig = 0; sig < NSIG; sig++) - if (px_sa.psa_sigdefault && 1 << sig) { - error = setsigvec(p, sig, &vec); + if (px_sa.psa_sigdefault & (1 << sig)) { + error = setsigvec(p, child_thread, sig + 1, &vec, spawn_no_exec); } } } - /* - * XXXAUDIT: Currently, we only audit the pathname of the binary. - * There may also be poor interaction with dyld. - */ - - task = current_task(); - - /* If we're not in vfork, don't permit a mutithreaded task to exec */ - if (!(uthread->uu_flag & UT_VFORK)) { - if (task != kernel_task) { - numthreads = get_task_numacts(task); - if (numthreads <= 0 ) { - error = EINVAL; - goto bad; - } - if (numthreads > 1) { - error = ENOTSUP; - goto bad; - } +bad: + if (error == 0) { + /* upon successful spawn, re/set the proc control state */ + if (imgp->ip_px_sa != NULL) { + switch (px_sa.psa_pcontrol) { + case POSIX_SPAWN_PCONTROL_THROTTLE: + p->p_pcaction = P_PCTHROTTLE; + break; + case POSIX_SPAWN_PCONTROL_SUSPEND: + p->p_pcaction = P_PCSUSP; + break; + case POSIX_SPAWN_PCONTROL_KILL: + p->p_pcaction = P_PCKILL; + break; + case POSIX_SPAWN_PCONTROL_NONE: + default: + p->p_pcaction = 0; + break; + }; } + exec_resettextvp(p, imgp); } -#if MAC_SPAWN /* XXX */ - if (uap->mac_p != USER_ADDR_NULL) { - error = mac_execve_enter(uap->mac_p, imgp); - if (error) - goto bad; + /* + * If we successfully called fork1(), we always need to do this; + * we identify this case by noting the IMGPF_SPAWN flag. This is + * because we come back from that call with signals blocked in the + * child, and we have to unblock them, but we want to wait until + * after we've performed any spawn actions. This has to happen + * before check_for_signature(), which uses psignal. + */ + if (spawn_no_exec) { + /* + * Drop the signal lock on the child which was taken on our + * behalf by forkproc()/cloneproc() to prevent signals being + * received by the child in a partially constructed state. + */ + proc_signalend(p, 0); + + /* flag the 'fork' has occurred */ + proc_knote(p->p_pptr, NOTE_FORK | p->p_pid); + /* then flag exec has occurred */ + proc_knote(p, NOTE_EXEC); + DTRACE_PROC1(create, proc_t, p); } -#endif - if ((error = exec_activate_image(imgp)) != 0) - goto bad; -bad: - /* Image not claimed by any activator? */ - if (error == -1) - error = ENOEXEC; + /* + * We have to delay operations which might throw a signal until after + * the signals have been unblocked; however, we want that to happen + * after exec_resettextvp() so that the textvp is correct when they + * fire. + */ if (error == 0) { - exec_resettextvp(p, imgp); - } - if (imgp->ip_vp) - vnode_put(imgp->ip_vp); - if (imgp->ip_strings) - execargs_free(imgp); - if (imgp->ip_px_sfa != NULL) - FREE(imgp->ip_px_sfa, M_TEMP); - if (imgp->ip_px_spa != NULL) - FREE(imgp->ip_px_spa, M_TEMP); + error = check_for_signature(p, imgp); + + /* + * Pay for our earlier safety; deliver the delayed signals from + * the incomplete spawn process now that it's complete. + */ + if (imgp != NULL && spawn_no_exec && (p->p_lflag & P_LTRACED)) { + psignal_vfork(p, p->task, imgp->ip_new_thread, SIGTRAP); + } + } + + if (imgp != NULL) { + if (imgp->ip_vp) + vnode_put(imgp->ip_vp); + if (imgp->ip_strings) + execargs_free(imgp); + if (imgp->ip_px_sfa != NULL) + FREE(imgp->ip_px_sfa, M_TEMP); + if (imgp->ip_px_spa != NULL) + FREE(imgp->ip_px_spa, M_TEMP); + #if CONFIG_MACF - if (imgp->ip_execlabelp) - mac_cred_label_free(imgp->ip_execlabelp); - if (imgp->ip_scriptlabelp) - mac_vnode_label_free(imgp->ip_scriptlabelp); + if (imgp->ip_execlabelp) + mac_cred_label_free(imgp->ip_execlabelp); + if (imgp->ip_scriptlabelp) + mac_vnode_label_free(imgp->ip_scriptlabelp); #endif - if (undo_vfork) { - if (error) { - DTRACE_PROC1(exec__failure, int, error); - vfork_exit(p, W_EXITCODE(-1, 0)); - } else { - DTRACE_PROC(exec__success); - } + } + + if (error) { + DTRACE_PROC1(exec__failure, int, error); + } else { + /* + * temporary - so dtrace call to current_proc() + * returns the child process instead of the parent. + */ + if (imgp != NULL && imgp->ip_flags & IMGPF_SPAWN) { + p->p_lflag |= P_LINVFORK; + p->p_vforkact = current_thread(); + uthread->uu_proc = p; + uthread->uu_flag |= UT_VFORK; + } + + DTRACE_PROC(exec__success); + + /* + * temporary - so dtrace call to current_proc() + * returns the child process instead of the parent. + */ + if (imgp != NULL && imgp->ip_flags & IMGPF_SPAWN) { + p->p_lflag &= ~P_LINVFORK; + p->p_vforkact = NULL; + uthread->uu_proc = PROC_NULL; + uthread->uu_flag &= ~UT_VFORK; + } + } + + /* Return to both the parent and the child? */ + if (imgp != NULL && spawn_no_exec) { /* - * Returning to the parent process... - * * If the parent wants the pid, copy it out */ if (pid != USER_ADDR_NULL) (void)suword(pid, p->p_pid); retval[0] = error; - /* - * Override inherited code signing flags with the - * ones for the process that is being successfully - * loaded - */ - proc_lock(p); - p->p_csflags = imgp->ip_csflags; - proc_unlock(p); - vfork_return(p, NULL, error); - (void)thread_resume(imgp->ip_vfork_thread); - } - if (!error) { /* - * Override inherited code signing flags with the - * ones for the process that is being successfully - * loaded + * If we had an error, perform an internal reap ; this is + * entirely safe, as we have a real process backing us. */ - proc_lock(p); - p->p_csflags = imgp->ip_csflags; - proc_unlock(p); - DTRACE_PROC(exec__success); - } else { - DTRACE_PROC1(exec__failure, int, error); - } + if (error) { + proc_list_lock(); + p->p_listflag |= P_LIST_DEADPARENT; + proc_list_unlock(); + proc_lock(p); + /* make sure no one else has killed it off... */ + if (p->p_stat != SZOMB && p->exit_thread == NULL) { + p->exit_thread = current_thread(); + proc_unlock(p); + exit1(p, 1, (int *)NULL); + task_deallocate(get_threadtask(imgp->ip_new_thread)); + thread_deallocate(imgp->ip_new_thread); + } else { + /* someone is doing it for us; just skip it */ + proc_unlock(p); + } + } else { + /* + * Return" to the child + * + * Note: the image activator earlier dropped the + * task/thread references to the newly spawned + * process; this is OK, since we still have suspended + * queue references on them, so we should be fine + * with the delayed resume of the thread here. + */ + (void)thread_resume(imgp->ip_new_thread); + } + } + if (bufp != NULL) { + FREE(bufp, M_TEMP); + } + return(error); } @@ -1877,7 +2091,7 @@ posix_spawn(proc_t ap, struct posix_spawn_args *uap, register_t *retval) */ /* ARGSUSED */ int -execve(proc_t p, struct execve_args *uap, register_t *retval) +execve(proc_t p, struct execve_args *uap, int32_t *retval) { struct __mac_execve_args muap; int err; @@ -1918,15 +2132,13 @@ execve(proc_t p, struct execve_args *uap, register_t *retval) * TODO: Dynamic linker header address on stack is copied via suword() */ int -__mac_execve(proc_t p, struct __mac_execve_args *uap, register_t *retval) +__mac_execve(proc_t p, struct __mac_execve_args *uap, int32_t *retval) { - struct image_params image_params, *imgp; - struct vnode_attr va; - struct vnode_attr origva; - struct uthread *uthread; + char *bufp = NULL; + struct image_params *imgp; + struct vnode_attr *vap; + struct vnode_attr *origvap; int error; - task_t task; - int numthreads; char alt_p_comm[sizeof(p->p_comm)] = {0}; /* for PowerPC */ int is_64 = IS_64BIT_PROCESS(p); struct vfs_context context; @@ -1934,53 +2146,35 @@ __mac_execve(proc_t p, struct __mac_execve_args *uap, register_t *retval) context.vc_thread = current_thread(); context.vc_ucred = kauth_cred_proc_ref(p); /* XXX must NOT be kauth_cred_get() */ - imgp = &image_params; - + /* Allocate a big chunk for locals instead of using stack since these + * structures a pretty big. + */ + MALLOC(bufp, char *, (sizeof(*imgp) + sizeof(*vap) + sizeof(*origvap)), M_TEMP, M_WAITOK | M_ZERO); + imgp = (struct image_params *) bufp; + if (bufp == NULL) { + error = ENOMEM; + goto exit_with_error; + } + vap = (struct vnode_attr *) (bufp + sizeof(*imgp)); + origvap = (struct vnode_attr *) (bufp + sizeof(*imgp) + sizeof(*vap)); + /* Initialize the common data in the image_params structure */ - bzero(imgp, sizeof(*imgp)); imgp->ip_user_fname = uap->fname; imgp->ip_user_argv = uap->argp; imgp->ip_user_envv = uap->envp; - imgp->ip_vattr = &va; - imgp->ip_origvattr = &origva; + imgp->ip_vattr = vap; + imgp->ip_origvattr = origvap; imgp->ip_vfs_context = &context; imgp->ip_flags = (is_64 ? IMGPF_WAS_64BIT : IMGPF_NONE); imgp->ip_p_comm = alt_p_comm; /* for PowerPC */ imgp->ip_seg = (is_64 ? UIO_USERSPACE64 : UIO_USERSPACE32); - /* - * XXXAUDIT: Currently, we only audit the pathname of the binary. - * There may also be poor interaction with dyld. - */ - - task = current_task(); - uthread = get_bsdthread_info(current_thread()); - - /* If we're not in vfork, don't permit a mutithreaded task to exec */ - if (!(uthread->uu_flag & UT_VFORK)) { - if (task != kernel_task) { - proc_lock(p); - numthreads = get_task_numactivethreads(task); - if (numthreads <= 0 ) { - proc_unlock(p); - kauth_cred_unref(&context.vc_ucred); - return(EINVAL); - } - if (numthreads > 1) { - proc_unlock(p); - kauth_cred_unref(&context.vc_ucred); - return(ENOTSUP); - } - proc_unlock(p); - } - } - #if CONFIG_MACF if (uap->mac_p != USER_ADDR_NULL) { error = mac_execve_enter(uap->mac_p, imgp); if (error) { kauth_cred_unref(&context.vc_ucred); - return (error); + goto exit_with_error; } } #endif @@ -1995,6 +2189,7 @@ __mac_execve(proc_t p, struct __mac_execve_args *uap, register_t *retval) if (error == 0) { exec_resettextvp(p, imgp); + error = check_for_signature(p, imgp); } if (imgp->ip_vp != NULLVP) vnode_put(imgp->ip_vp); @@ -2007,23 +2202,25 @@ __mac_execve(proc_t p, struct __mac_execve_args *uap, register_t *retval) mac_vnode_label_free(imgp->ip_scriptlabelp); #endif if (!error) { - /* - * Override inherited code signing flags with the - * ones for the process that is being successfully - * loaded - */ - proc_lock(p); - p->p_csflags = imgp->ip_csflags; - proc_unlock(p); - DTRACE_PROC(exec__success); + struct uthread *uthread; + /* Sever any extant thread affinity */ + thread_affinity_exec(current_thread()); + + DTRACE_PROC(exec__success); + uthread = get_bsdthread_info(current_thread()); if (uthread->uu_flag & UT_VFORK) { vfork_return(p, retval, p->p_pid); - (void)thread_resume(imgp->ip_vfork_thread); + (void)thread_resume(imgp->ip_new_thread); } } else { DTRACE_PROC1(exec__failure, int, error); } + +exit_with_error: + if (bufp != NULL) { + FREE(bufp, M_TEMP); + } return(error); } @@ -2086,7 +2283,7 @@ copyoutptr(user_addr_t ua, user_addr_t ptr, int ptr_size) if (ptr_size == 4) { /* 64 bit value containing 32 bit address */ - unsigned int i = CAST_DOWN(unsigned int,ua); /* SAFE */ + unsigned int i = CAST_DOWN_EXPLICIT(unsigned int,ua); /* SAFE */ error = copyout(&i, ptr, 4); } else { @@ -2172,13 +2369,13 @@ exec_copyout_strings(struct image_params *imgp, user_addr_t *stackp) user_addr_t ptr_area; /* argv[], env[], exec_path */ user_addr_t stack; int stringc = imgp->ip_argc + imgp->ip_envc; - int len; + size_t len; int error; - int strspace; + ssize_t strspace; stack = *stackp; - unsigned patharea_len = imgp->ip_argv - imgp->ip_strings; + size_t patharea_len = imgp->ip_argv - imgp->ip_strings; int envc_add = 0; /* @@ -2212,7 +2409,7 @@ exec_copyout_strings(struct image_params *imgp, user_addr_t *stackp) len = 0; error = copyoutstr(imgp->ip_strings, path_area, patharea_len, - (size_t *)&len); + &len); if (error) goto bad; @@ -2274,8 +2471,8 @@ exec_copyout_strings(struct image_params *imgp, user_addr_t *stackp) break; } error = copyoutstr(argv, string_area, - (unsigned)strspace, - (size_t *)&len); + strspace, + &len); string_area += len; argv += len; strspace -= len; @@ -2316,6 +2513,7 @@ static int exec_extract_strings(struct image_params *imgp) { int error = 0; + int strsz = 0; int ptr_size = (imgp->ip_flags & IMGPF_WAS_64BIT) ? 8 : 4; user_addr_t argv = imgp->ip_user_argv; user_addr_t envv = imgp->ip_user_envv; @@ -2329,6 +2527,14 @@ exec_extract_strings(struct image_params *imgp) /* Now, get rest of arguments */ + /* + * Adjust space reserved for the path name by however much padding it + * needs. Doing this here since we didn't know if this would be a 32- + * or 64-bit process back in exec_save_path. + */ + strsz = strlen(imgp->ip_strings) + 1; + imgp->ip_strspace -= ((strsz + ptr_size-1) & ~(ptr_size-1)) - strsz; + /* * If we are running an interpreter, replace the av[0] that was * passed to execve() with the fully qualified path name that was @@ -2373,6 +2579,9 @@ exec_extract_strings(struct image_params *imgp) goto bad; imgp->ip_argc++; } + + /* Note where the args end and env begins. */ + imgp->ip_strendargvp = imgp->ip_strendp; /* Now, get the environment */ while (envv != 0LL) { @@ -2460,12 +2669,8 @@ exec_check_permissions(struct image_params *imgp) imgp->ip_arch_size = vap->va_data_size; /* Disable setuid-ness for traced programs or if MNT_NOSUID */ - if ((vp->v_mount->mnt_flag & MNT_NOSUID) || (p->p_lflag & P_LTRACED)) { + if ((vp->v_mount->mnt_flag & MNT_NOSUID) || (p->p_lflag & P_LTRACED)) vap->va_mode &= ~(VSUID | VSGID); -#if CONFIG_MACF - imgp->ip_no_trans = 1; -#endif - } #if CONFIG_MACF error = mac_vnode_check_exec(imgp->ip_vfs_context, vp, imgp); @@ -2500,7 +2705,7 @@ exec_check_permissions(struct image_params *imgp) * cached values, then we set the PowerPC environment flag. */ if (vap->va_fsid == exec_archhandler_ppc.fsid && - vap->va_fileid == (uint64_t)((u_long)exec_archhandler_ppc.fileid)) { + vap->va_fileid == (uint64_t)((uint32_t)exec_archhandler_ppc.fileid)) { imgp->ip_flags |= IMGPF_POWERPC; } #endif /* IMGPF_POWERPC */ @@ -2565,7 +2770,7 @@ exec_handle_sugid(struct image_params *imgp) imgp->ip_execlabelp, p); #endif - OSBitAndAtomic(~((uint32_t)P_SUGID), (UInt32 *)&p->p_flag); + OSBitAndAtomic(~((uint32_t)P_SUGID), &p->p_flag); /* * Order of the following is important; group checks must go last, @@ -2668,13 +2873,13 @@ exec_handle_sugid(struct image_params *imgp) * running this code. */ if (!leave_sugid_clear) - OSBitOrAtomic(P_SUGID, (UInt32 *)&p->p_flag); + OSBitOrAtomic(P_SUGID, &p->p_flag); /* Cache the vnode for /dev/null the first time around */ if (dev_null == NULLVP) { struct nameidata nd1; - NDINIT(&nd1, LOOKUP, FOLLOW, UIO_SYSSPACE32, + NDINIT(&nd1, LOOKUP, FOLLOW, UIO_SYSSPACE, CAST_USER_ADDR_T("/dev/null"), imgp->ip_vfs_context); @@ -2862,9 +3067,9 @@ load_init_program(proc_t p) { vm_offset_t init_addr; int argc = 0; - char *argv[3]; + uint32_t argv[3]; int error; - register_t retval[2]; + int retval[2]; /* * Copy out program name. @@ -2879,7 +3084,7 @@ load_init_program(proc_t p) (void) copyout((caddr_t) init_program_name, CAST_USER_ADDR_T(init_addr), (unsigned) sizeof(init_program_name)+1); - argv[argc++] = (char *) init_addr; + argv[argc++] = (uint32_t)init_addr; init_addr += sizeof(init_program_name); init_addr = (vm_offset_t)ROUND_PTR(char, init_addr); @@ -2894,7 +3099,7 @@ load_init_program(proc_t p) copyout(init_args, CAST_USER_ADDR_T(init_addr), strlen(init_args)); - argv[argc++] = (char *)init_addr; + argv[argc++] = (uint32_t)init_addr; init_addr += strlen(init_args); init_addr = (vm_offset_t)ROUND_PTR(char, init_addr); @@ -2903,7 +3108,7 @@ load_init_program(proc_t p) /* * Null-end the argument list */ - argv[argc] = NULL; + argv[argc] = 0; /* * Copy out the argument list. @@ -3018,30 +3223,101 @@ extern semaphore_t execve_semaphore; * not modified its environment, we can't really know that it's * really a block there as well. */ + + +static int execargs_waiters = 0; +lck_mtx_t *execargs_cache_lock; + +static void +execargs_lock_lock(void) { + lck_mtx_lock_spin(execargs_cache_lock); +} + +static void +execargs_lock_unlock(void) { + lck_mtx_unlock(execargs_cache_lock); +} + +static void +execargs_lock_sleep(void) { + lck_mtx_sleep(execargs_cache_lock, LCK_SLEEP_DEFAULT, &execargs_free_count, THREAD_UNINT); +} + +static kern_return_t +execargs_purgeable_allocate(char **execarg_address) { + kern_return_t kr = vm_allocate(bsd_pageable_map, (vm_offset_t *)execarg_address, NCARGS + PAGE_SIZE, VM_FLAGS_ANYWHERE | VM_FLAGS_PURGABLE); + assert(kr == KERN_SUCCESS); + return kr; +} + +static kern_return_t +execargs_purgeable_reference(void *execarg_address) { + int state = VM_PURGABLE_NONVOLATILE; + kern_return_t kr = vm_purgable_control(bsd_pageable_map, (vm_offset_t) execarg_address, VM_PURGABLE_SET_STATE, &state); + + assert(kr == KERN_SUCCESS); + return kr; +} + +static kern_return_t +execargs_purgeable_volatilize(void *execarg_address) { + int state = VM_PURGABLE_VOLATILE | VM_PURGABLE_ORDERING_OBSOLETE; + kern_return_t kr; + kr = vm_purgable_control(bsd_pageable_map, (vm_offset_t) execarg_address, VM_PURGABLE_SET_STATE, &state); + + assert(kr == KERN_SUCCESS); + + return kr; +} + +static void +execargs_wakeup_waiters(void) { + thread_wakeup(&execargs_free_count); +} + static int execargs_alloc(struct image_params *imgp) { kern_return_t kret; + int i, cache_index = -1; - kret = semaphore_wait(execve_semaphore); - if (kret != KERN_SUCCESS) - switch (kret) { - default: - return (EINVAL); - case KERN_INVALID_ADDRESS: - case KERN_PROTECTION_FAILURE: - return (EACCES); - case KERN_ABORTED: - case KERN_OPERATION_TIMED_OUT: - return (EINTR); + execargs_lock_lock(); + + while (execargs_free_count == 0) { + execargs_waiters++; + execargs_lock_sleep(); + execargs_waiters--; + } + + execargs_free_count--; + + for (i = 0; i < execargs_cache_size; i++) { + vm_offset_t element = execargs_cache[i]; + if (element) { + cache_index = i; + imgp->ip_strings = (char *)(execargs_cache[i]); + execargs_cache[i] = 0; + break; } + } - kret = kmem_alloc_pageable(bsd_pageable_map, (vm_offset_t *)&imgp->ip_strings, NCARGS + PAGE_SIZE); - imgp->ip_vdata = imgp->ip_strings + NCARGS; + assert(execargs_free_count >= 0); + + execargs_lock_unlock(); + + if (cache_index == -1) { + kret = execargs_purgeable_allocate(&imgp->ip_strings); + } + else + kret = execargs_purgeable_reference(imgp->ip_strings); + + assert(kret == KERN_SUCCESS); if (kret != KERN_SUCCESS) { - semaphore_signal(execve_semaphore); return (ENOMEM); } + + imgp->ip_vdata = imgp->ip_strings + NCARGS; + return (0); } @@ -3062,23 +3338,34 @@ static int execargs_free(struct image_params *imgp) { kern_return_t kret; + int i; + boolean_t needs_wakeup = FALSE; + + kret = execargs_purgeable_volatilize(imgp->ip_strings); - kmem_free(bsd_pageable_map, (vm_offset_t)imgp->ip_strings, NCARGS + PAGE_SIZE); - imgp->ip_strings = NULL; - - kret = semaphore_signal(execve_semaphore); - switch (kret) { - case KERN_INVALID_ADDRESS: - case KERN_PROTECTION_FAILURE: - return (EINVAL); - case KERN_ABORTED: - case KERN_OPERATION_TIMED_OUT: - return (EINTR); - case KERN_SUCCESS: - return(0); - default: - return (EINVAL); + execargs_lock_lock(); + execargs_free_count++; + + for (i = 0; i < execargs_cache_size; i++) { + vm_offset_t element = execargs_cache[i]; + if (element == 0) { + execargs_cache[i] = (vm_offset_t) imgp->ip_strings; + imgp->ip_strings = NULL; + break; + } } + + assert(imgp->ip_strings == NULL); + + if (execargs_waiters > 0) + needs_wakeup = TRUE; + + execargs_lock_unlock(); + + if (needs_wakeup == TRUE) + execargs_wakeup_waiters(); + + return ((kret == KERN_SUCCESS ? 0 : EINVAL)); } static void @@ -3115,3 +3402,56 @@ exec_resettextvp(proc_t p, struct image_params *imgp) } +static int +check_for_signature(proc_t p, struct image_params *imgp) +{ + mach_port_t port = NULL; + kern_return_t error = 0; + unsigned char hash[SHA1_RESULTLEN]; + + /* + * Override inherited code signing flags with the + * ones for the process that is being successfully + * loaded + */ + proc_lock(p); + p->p_csflags = imgp->ip_csflags; + proc_unlock(p); + + /* Set the switch_protect flag on the map */ + if(p->p_csflags & (CS_HARD|CS_KILL)) { + vm_map_switch_protect(get_task_map(p->task), TRUE); + } + + /* + * If the task_access_port is set and the proc isn't signed, + * ask for a code signature from user space. Fail the exec + * if permission is denied. + */ + error = task_get_task_access_port(p->task, &port); + if (error == 0 && IPC_PORT_VALID(port) && !(p->p_csflags & CS_VALID)) { + error = find_code_signature(port, p->p_pid); + if (error == KERN_FAILURE) { + /* Make very sure execution fails */ + psignal(p, SIGKILL); + return EACCES; + } + + /* Only do this if exec_resettextvp() did not fail */ + if (p->p_textvp != NULLVP) { + /* + * If there's a new code directory, mark this process + * as signed. + */ + error = ubc_cs_getcdhash(p->p_textvp, p->p_textoff, hash); + if (error == 0) { + proc_lock(p); + p->p_csflags |= CS_VALID; + proc_unlock(p); + } + } + } + + return KERN_SUCCESS; +} + diff --git a/bsd/kern/kern_exit.c b/bsd/kern/kern_exit.c index a8e9ef7f0..f7d180479 100644 --- a/bsd/kern/kern_exit.c +++ b/bsd/kern/kern_exit.c @@ -98,13 +98,14 @@ #include #include #include +#include #include /* fdfree */ #if SYSV_SHM #include /* shmexit */ #endif #include /* acct_process */ -#include +#include #include #include @@ -133,7 +134,6 @@ extern void dtrace_lazy_dofs_destroy(proc_t); #include #include #include -#include /* init_process */ #include @@ -141,7 +141,8 @@ extern char init_task_failure_data[]; void proc_prepareexit(proc_t p, int rv); void vfork_exit(proc_t p, int rv); void vproc_exit(proc_t p); -__private_extern__ void munge_rusage(struct rusage *a_rusage_p, struct user_rusage *a_user_rusage_p); +__private_extern__ void munge_user64_rusage(struct rusage *a_rusage_p, struct user64_rusage *a_user_rusage_p); +__private_extern__ void munge_user32_rusage(struct rusage *a_rusage_p, struct user32_rusage *a_user_rusage_p); static int reap_child_locked(proc_t parent, proc_t child, int deadparent, int locked, int droplock); /* @@ -155,7 +156,6 @@ int *get_bsduthreadrval(thread_t); kern_return_t sys_perf_notify(thread_t thread, int pid); kern_return_t abnormal_exit_notify(mach_exception_data_type_t code, mach_exception_data_type_t subcode); -int in_shutdown(void); void workqueue_exit(struct proc *); void delay(int); @@ -164,7 +164,7 @@ void delay(int); * XXX Should share code with bsd/dev/ppc/unix_signal.c */ static void -siginfo_64to32(user_siginfo_t *in, siginfo_t *out) +siginfo_user_to_user32(user_siginfo_t *in, user32_siginfo_t *out) { out->si_signo = in->si_signo; out->si_errno = in->si_errno; @@ -172,11 +172,25 @@ siginfo_64to32(user_siginfo_t *in, siginfo_t *out) out->si_pid = in->si_pid; out->si_uid = in->si_uid; out->si_status = in->si_status; - out->si_addr = CAST_DOWN(void *,in->si_addr); + out->si_addr = CAST_DOWN_EXPLICIT(user32_addr_t,in->si_addr); /* following cast works for sival_int because of padding */ - out->si_value.sival_ptr = CAST_DOWN(void *,in->si_value.sival_ptr); + out->si_value.sival_ptr = CAST_DOWN_EXPLICIT(user32_addr_t,in->si_value.sival_ptr); + out->si_band = in->si_band; /* range reduction */ +} + +static void +siginfo_user_to_user64(user_siginfo_t *in, user64_siginfo_t *out) +{ + out->si_signo = in->si_signo; + out->si_errno = in->si_errno; + out->si_code = in->si_code; + out->si_pid = in->si_pid; + out->si_uid = in->si_uid; + out->si_status = in->si_status; + out->si_addr = in->si_addr; + /* following cast works for sival_int because of padding */ + out->si_value.sival_ptr = in->si_value.sival_ptr; out->si_band = in->si_band; /* range reduction */ - out->__pad[0] = in->pad[0]; /* mcontext.ss.r1 */ } /* @@ -228,6 +242,13 @@ exit1(proc_t p, int rv, int *retval) * which is currently required by mac_audit_postselect(). */ + /* + * The BSM token contains two components: an exit status as passed + * to exit(), and a return value to indicate what sort of exit it + * was. The exit status is WEXITSTATUS(rv), but it's not clear + * what the return value is. + */ + AUDIT_ARG(exit, WEXITSTATUS(rv), 0); AUDIT_SYSCALL_EXIT(SYS_exit, p, ut, 0); /* Exit is always successfull */ DTRACE_PROC1(exit, int, CLD_EXITED); @@ -279,7 +300,7 @@ proc_prepareexit(proc_t p, int rv) ut = get_bsdthread_info(self); /* If a core should be generated, notify crash reporter */ - if (!in_shutdown() && hassigprop(WTERMSIG(rv), SA_CORE)) { + if (hassigprop(WTERMSIG(rv), SA_CORE)) { /* * Workaround for processes checking up on PT_DENY_ATTACH: * should be backed out post-Leopard (details in 5431025). @@ -342,11 +363,12 @@ proc_exit(proc_t p) proc_t q; proc_t pp; struct task *task = p->task; - boolean_t fstate; vnode_t tvp = NULLVP; struct pgrp * pg; struct session *sessp; struct uthread * uth; + pid_t pid; + int exitval; /* This can happen if thread_terminate of the single thread * process @@ -364,6 +386,10 @@ proc_exit(proc_t p) p->p_lflag |= P_LPEXIT; proc_unlock(p); + pid = p->p_pid; + exitval = p->p_xstat; + KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_PROC, BSD_PROC_EXIT) | DBG_FUNC_START, + pid, exitval, 0, 0, 0); #if CONFIG_DTRACE /* @@ -421,7 +447,7 @@ proc_exit(proc_t p) if (uth->uu_lowpri_window) { /* * task is marked as a low priority I/O type - * and the I/O we issued while flushing files on close + * and the I/O we issued while in flushing files on close * collided with normal I/O operations... * no need to throttle this thread since its going away * but we do need to update our bookeeping w/r to throttled threads @@ -439,12 +465,13 @@ proc_exit(proc_t p) semexit(p); #endif +#if PSYNCH + pth_proc_hashdelete(p); +#endif /* PSYNCH */ + sessp = proc_session(p); if (SESS_LEADER(p, sessp)) { - /* Protected by funnel for tty accesses */ - fstate = thread_funnel_set(kernel_flock, TRUE); - if (sessp->s_ttyvp != NULLVP) { struct vnode *ttyvp; int ttyvid; @@ -458,26 +485,29 @@ proc_exit(proc_t p) * drain controlling terminal * and revoke access to controlling terminal. */ - tp = sessp->s_ttyp; + tp = SESSION_TP(sessp); if ((tp != TTY_NULL) && (tp->t_session == sessp)) { tty_pgsignal(tp, SIGHUP, 1); - (void) ttywait(tp); - /* - * The tty could have been revoked - * if we blocked. - */ session_lock(sessp); + /* reget potentially tp due to revocation */ + tp = SESSION_TP(sessp); ttyvp = sessp->s_ttyvp; ttyvid = sessp->s_ttyvid; - sessp->s_ttyvp = NULL; + sessp->s_ttyvp = NULLVP; sessp->s_ttyvid = 0; - sessp->s_ttyp = NULL; + sessp->s_ttyp = TTY_NULL; sessp->s_ttypgrpid = NO_PID; session_unlock(sessp); if ((ttyvp != NULLVP) && (vnode_getwithvid(ttyvp, ttyvid) == 0)) { + + if (tp != TTY_NULL) { + tty_lock(tp); + (void) ttywait(tp); + tty_unlock(tp); + } context.vc_thread = proc_thread(p); /* XXX */ context.vc_ucred = kauth_cred_proc_ref(p); VNOP_REVOKE(ttyvp, REVOKEALL, &context); @@ -486,10 +516,12 @@ proc_exit(proc_t p) } } else { session_lock(sessp); + /* reget potentially tp due to revocation */ + tp = SESSION_TP(sessp); ttyvp = sessp->s_ttyvp; - sessp->s_ttyvp = NULL; + sessp->s_ttyvp = NULLVP; sessp->s_ttyvid = 0; - sessp->s_ttyp = NULL; + sessp->s_ttyp = TTY_NULL; sessp->s_ttypgrpid = NO_PID; session_unlock(sessp); } @@ -502,7 +534,6 @@ proc_exit(proc_t p) */ } - (void) thread_funnel_set(kernel_flock, fstate); session_lock(sessp); sessp->s_leader = NULL; session_unlock(sessp); @@ -517,6 +548,14 @@ proc_exit(proc_t p) (void)acct_process(p); proc_list_lock(); + + if ((p->p_listflag & P_LIST_EXITCOUNT) == P_LIST_EXITCOUNT) { + p->p_listflag &= ~P_LIST_EXITCOUNT; + proc_shutdown_exitcount--; + if (proc_shutdown_exitcount == 0) + wakeup(&proc_shutdown_exitcount); + } + /* wait till parentrefs are dropped and grant no more */ proc_childdrainstart(p); while ((q = p->p_children.lh_first) != NULL) { @@ -590,50 +629,7 @@ proc_exit(proc_t p) if (p->p_ru != NULL) { *p->p_ru = p->p_stats->p_ru; - timerclear(&p->p_ru->ru_utime); - timerclear(&p->p_ru->ru_stime); - - if (task) { - task_basic_info_32_data_t tinfo; - task_thread_times_info_data_t ttimesinfo; - task_events_info_data_t teventsinfo; - mach_msg_type_number_t task_info_stuff, task_ttimes_stuff; - mach_msg_type_number_t task_events_stuff; - struct timeval ut,st; - - task_info_stuff = TASK_BASIC_INFO_32_COUNT; - task_info(task, TASK_BASIC2_INFO_32, - (task_info_t)&tinfo, &task_info_stuff); - p->p_ru->ru_utime.tv_sec = tinfo.user_time.seconds; - p->p_ru->ru_utime.tv_usec = tinfo.user_time.microseconds; - p->p_ru->ru_stime.tv_sec = tinfo.system_time.seconds; - p->p_ru->ru_stime.tv_usec = tinfo.system_time.microseconds; - - p->p_ru->ru_maxrss = tinfo.resident_size; - - task_ttimes_stuff = TASK_THREAD_TIMES_INFO_COUNT; - task_info(task, TASK_THREAD_TIMES_INFO, - (task_info_t)&ttimesinfo, &task_ttimes_stuff); - - ut.tv_sec = ttimesinfo.user_time.seconds; - ut.tv_usec = ttimesinfo.user_time.microseconds; - st.tv_sec = ttimesinfo.system_time.seconds; - st.tv_usec = ttimesinfo.system_time.microseconds; - timeradd(&ut,&p->p_ru->ru_utime,&p->p_ru->ru_utime); - timeradd(&st,&p->p_ru->ru_stime,&p->p_ru->ru_stime); - - task_events_stuff = TASK_EVENTS_INFO_COUNT; - task_info(task, TASK_EVENTS_INFO, - (task_info_t)&teventsinfo, &task_events_stuff); - - p->p_ru->ru_minflt = (teventsinfo.faults - - teventsinfo.pageins); - p->p_ru->ru_majflt = teventsinfo.pageins; - p->p_ru->ru_nivcsw = (teventsinfo.csw - - p->p_ru->ru_nvcsw); - if (p->p_ru->ru_nivcsw < 0) - p->p_ru->ru_nivcsw = 0; - } + calcru(p, &p->p_ru->ru_utime, &p->p_ru->ru_stime, NULL); ruadd(p->p_ru, &p->p_stats->p_cru); } @@ -748,6 +744,8 @@ proc_exit(proc_t p) * The write is to an int and is coherent. Also parent is * keyed off of list lock for reaping */ + KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_PROC, BSD_PROC_EXIT) | DBG_FUNC_END, + pid, exitval, 0, 0, 0); p->p_stat = SZOMB; /* * The current process can be reaped so, no one @@ -769,6 +767,8 @@ proc_exit(proc_t p) * keyed off of list lock for reaping */ proc_list_lock(); + KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_PROC, BSD_PROC_EXIT) | DBG_FUNC_END, + pid, exitval, 0, 0, 0); p->p_stat = SZOMB; /* check for sysctl zomb lookup */ while ((p->p_listflag & P_LIST_WAITING) == P_LIST_WAITING) { @@ -822,15 +822,23 @@ proc_exit(proc_t p) static int reap_child_locked(proc_t parent, proc_t child, int deadparent, int locked, int droplock) { - proc_t trace_parent; /* Traced parent process, if tracing */ + proc_t trace_parent = PROC_NULL; /* Traced parent process, if tracing */ + if (locked == 1) + proc_list_unlock(); + /* * If we got the child via a ptrace 'attach', * we need to give it back to the old parent. + * + * Exception: someone who has been reparented to launchd before being + * ptraced can simply be reaped, refer to radar 5677288 + * p_oppid -> ptraced + * trace_parent == initproc -> away from launchd + * P_LIST_DEADPARENT -> came to launchd by reparenting */ - if (locked == 1) - proc_list_unlock(); - if (child->p_oppid && (trace_parent = proc_find(child->p_oppid))) { + if (child->p_oppid && (trace_parent = proc_find(child->p_oppid)) + && !((trace_parent == initproc) && (child->p_lflag & P_LIST_DEADPARENT))) { proc_lock(child); child->p_oppid = 0; proc_unlock(child); @@ -859,8 +867,13 @@ reap_child_locked(proc_t parent, proc_t child, int deadparent, int locked, int d proc_list_lock(); return (0); } - + + if (trace_parent != PROC_NULL) { + proc_rele(trace_parent); + } + proc_knote(child, NOTE_REAP); + proc_knote_drain(child); child->p_xstat = 0; if (child->p_ru) { @@ -883,6 +896,8 @@ reap_child_locked(proc_t parent, proc_t child, int deadparent, int locked, int d printf("Warning : lost p_ru for %s\n", child->p_comm); } + AUDIT_SESSION_PROCEXIT(child->p_ucred); + /* * Decrement the count of procs running with this uid. * p_ucred usage is safe here as it is an exited process. @@ -930,12 +945,22 @@ reap_child_locked(proc_t parent, proc_t child, int deadparent, int locked, int d proc_list_unlock(); +#ifdef CONFIG_EMBEDDED lck_mtx_destroy(&child->p_mlock, proc_lck_grp); lck_mtx_destroy(&child->p_fdmlock, proc_lck_grp); #if CONFIG_DTRACE lck_mtx_destroy(&child->p_dtrace_sprlock, proc_lck_grp); #endif lck_spin_destroy(&child->p_slock, proc_lck_grp); + +#else + lck_mtx_destroy(&child->p_mlock, proc_mlock_grp); + lck_mtx_destroy(&child->p_fdmlock, proc_fdmlock_grp); +#if CONFIG_DTRACE + lck_mtx_destroy(&child->p_dtrace_sprlock, proc_lck_grp); +#endif + lck_spin_destroy(&child->p_slock, proc_slock_grp); +#endif workqueue_destroy_lock(child); FREE_ZONE(child, sizeof *child, M_PROC); @@ -965,19 +990,22 @@ wait1continue(int result) } int -wait4(proc_t q, struct wait4_args *uap, register_t *retval) +wait4(proc_t q, struct wait4_args *uap, int32_t *retval) { __pthread_testcancel(1); return(wait4_nocancel(q, (struct wait4_nocancel_args *)uap, retval)); } int -wait4_nocancel(proc_t q, struct wait4_nocancel_args *uap, register_t *retval) +wait4_nocancel(proc_t q, struct wait4_nocancel_args *uap, int32_t *retval) { int nfound; + int sibling_count; proc_t p; int status, error; + AUDIT_ARG(pid, uap->pid); + if (uap->pid == 0) uap->pid = -q->p_pgrpid; @@ -985,7 +1013,11 @@ wait4_nocancel(proc_t q, struct wait4_nocancel_args *uap, register_t *retval) proc_list_lock(); loop1: nfound = 0; + sibling_count = 0; + for (p = q->p_children.lh_first; p != 0; p = p->p_sibling.le_next) { + if ( p->p_sibling.le_next != 0 ) + sibling_count++; if (uap->pid != WAIT_ANY && p->p_pid != uap->pid && p->p_pgrpid != -(uap->pid)) @@ -1023,16 +1055,18 @@ wait4_nocancel(proc_t q, struct wait4_nocancel_args *uap, register_t *retval) error = ENOMEM; } else { if (IS_64BIT_PROCESS(q)) { - struct user_rusage my_rusage; - munge_rusage(p->p_ru, &my_rusage); + struct user64_rusage my_rusage; + munge_user64_rusage(p->p_ru, &my_rusage); error = copyout((caddr_t)&my_rusage, uap->rusage, sizeof (my_rusage)); } else { - error = copyout((caddr_t)p->p_ru, + struct user32_rusage my_rusage; + munge_user32_rusage(p->p_ru, &my_rusage); + error = copyout((caddr_t)&my_rusage, uap->rusage, - sizeof (struct rusage)); + sizeof (my_rusage)); } } /* information unavailable? */ @@ -1040,13 +1074,28 @@ wait4_nocancel(proc_t q, struct wait4_nocancel_args *uap, register_t *retval) goto out; } - /* Clean up */ - if (!reap_child_locked(q, p, 0, 0, 0)) { - proc_list_lock(); - p->p_listflag &= ~P_LIST_WAITING; - wakeup(&p->p_stat); - proc_list_unlock(); + /* Conformance change for 6577252. + * When SIGCHLD is blocked and wait() returns because the status + * of a child process is available and there are no other + * children processes, then any pending SIGCHLD signal is cleared. + */ + if ( sibling_count == 0 ) { + int mask = sigmask(SIGCHLD); + uthread_t uth = (struct uthread *)get_bsdthread_info(current_thread()); + + if ( (uth->uu_sigmask & mask) != 0 ) { + /* we are blocking SIGCHLD signals. clear any pending SIGCHLD. + * This locking looks funny but it is protecting access to the + * thread via p_uthlist. + */ + proc_lock(q); + uth->uu_siglist &= ~mask; /* clear pending signal */ + proc_unlock(q); + } } + + /* Clean up */ + (void)reap_child_locked(q, p, 0, 0, 0); return (0); } @@ -1083,7 +1132,7 @@ wait4_nocancel(proc_t q, struct wait4_nocancel_args *uap, register_t *retval) #endif /* Prevent other process for waiting for this event */ - OSBitAndAtomic(~((uint32_t)P_CONTINUED), (UInt32 *)&p->p_flag); + OSBitAndAtomic(~((uint32_t)P_CONTINUED), &p->p_flag); retval[0] = p->p_pid; if (uap->status) { status = W_STOPCODE(SIGCONT); @@ -1152,14 +1201,14 @@ waitidcontinue(int result) * !0 Error returning status to user space */ int -waitid(proc_t q, struct waitid_args *uap, register_t *retval) +waitid(proc_t q, struct waitid_args *uap, int32_t *retval) { __pthread_testcancel(1); return(waitid_nocancel(q, (struct waitid_nocancel_args *)uap, retval)); } int -waitid_nocancel(proc_t q, struct waitid_nocancel_args *uap, __unused register_t *retval) +waitid_nocancel(proc_t q, struct waitid_nocancel_args *uap, __unused int32_t *retval) { user_siginfo_t collect64; /* siginfo data to return to caller */ @@ -1247,15 +1296,21 @@ waitid_nocancel(proc_t q, struct waitid_nocancel_args *uap, __unused register_t collect64.si_band = 0; if (IS_64BIT_PROCESS(p)) { - error = copyout((caddr_t)&collect64, + user64_siginfo_t sinfo64; + + siginfo_user_to_user64(&collect64, &sinfo64); + + error = copyout((caddr_t)&sinfo64, uap->infop, - sizeof(collect64)); + sizeof(sinfo64)); } else { - siginfo_t collect; - siginfo_64to32(&collect64,&collect); - error = copyout((caddr_t)&collect, - uap->infop, - sizeof(collect)); + user32_siginfo_t sinfo32; + + siginfo_user_to_user32(&collect64, &sinfo32); + + error = copyout((caddr_t)&sinfo32, + uap->infop, + sizeof(sinfo32)); } /* information unavailable? */ if (error) @@ -1264,12 +1319,7 @@ waitid_nocancel(proc_t q, struct waitid_nocancel_args *uap, __unused register_t /* Prevent other process for waiting for this event? */ if (!(uap->options & WNOWAIT)) { /* Clean up */ - if (!reap_child_locked(q, p, 0, 0, 0)) { - proc_list_lock(); - p->p_listflag &= ~P_LIST_WAITING; - wakeup(&p->p_stat); - proc_list_unlock(); - } + (void)reap_child_locked(q, p, 0, 0, 0); } else { proc_list_lock(); p->p_listflag &= ~P_LIST_WAITING; @@ -1309,15 +1359,21 @@ waitid_nocancel(proc_t q, struct waitid_nocancel_args *uap, __unused register_t collect64.si_band = 0; if (IS_64BIT_PROCESS(p)) { - error = copyout((caddr_t)&collect64, - uap->infop, - sizeof(collect64)); + user64_siginfo_t sinfo64; + + siginfo_user_to_user64(&collect64, &sinfo64); + + error = copyout((caddr_t)&sinfo64, + uap->infop, + sizeof(sinfo64)); } else { - siginfo_t collect; - siginfo_64to32(&collect64,&collect); - error = copyout((caddr_t)&collect, - uap->infop, - sizeof(collect)); + user32_siginfo_t sinfo32; + + siginfo_user_to_user32(&collect64, &sinfo32); + + error = copyout((caddr_t)&sinfo32, + uap->infop, + sizeof(sinfo32)); } /* information unavailable? */ if (error) @@ -1362,15 +1418,21 @@ waitid_nocancel(proc_t q, struct waitid_nocancel_args *uap, __unused register_t proc_unlock(p); if (IS_64BIT_PROCESS(p)) { - error = copyout((caddr_t)&collect64, - uap->infop, - sizeof(collect64)); + user64_siginfo_t sinfo64; + + siginfo_user_to_user64(&collect64, &sinfo64); + + error = copyout((caddr_t)&sinfo64, + uap->infop, + sizeof(sinfo64)); } else { - siginfo_t collect; - siginfo_64to32(&collect64,&collect); - error = copyout((caddr_t)&collect, - uap->infop, - sizeof(collect)); + user32_siginfo_t sinfo32; + + siginfo_user_to_user32(&collect64, &sinfo32); + + error = copyout((caddr_t)&sinfo32, + uap->infop, + sizeof(sinfo32)); } /* information unavailable? */ if (error) @@ -1378,7 +1440,7 @@ waitid_nocancel(proc_t q, struct waitid_nocancel_args *uap, __unused register_t /* Prevent other process for waiting for this event? */ if (!(uap->options & WNOWAIT)) { - OSBitAndAtomic(~((uint32_t)P_CONTINUED), (UInt32 *)&p->p_flag); + OSBitAndAtomic(~((uint32_t)P_CONTINUED), &p->p_flag); } error = 0; @@ -1432,7 +1494,7 @@ proc_reparentlocked(proc_t child, proc_t parent, int cansignal, int locked) oldparent = child->p_pptr; #if __PROC_INTERNAL_DEBUG if (oldparent == PROC_NULL) - panic("proc_reparent: process %x does not have a parent\n", (unsigned int)child); + panic("proc_reparent: process %p does not have a parent\n", child); #endif LIST_REMOVE(child, p_sibling); @@ -1458,44 +1520,6 @@ proc_reparentlocked(proc_t child, proc_t parent, int cansignal, int locked) proc_list_lock(); } -/* - * Make the current process an "init" process, meaning - * that it doesn't have a parent, and that it won't be - * gunned down by kill(-1, 0). - */ -kern_return_t -init_process(__unused struct init_process_args *args) -{ - proc_t p = current_proc(); - - AUDIT_MACH_SYSCALL_ENTER(AUE_INITPROCESS); - if (suser(kauth_cred_get(), &p->p_acflag)) { - AUDIT_MACH_SYSCALL_EXIT(KERN_NO_ACCESS); - return(KERN_NO_ACCESS); - } - - if (p->p_pid != 1 && p->p_pgrpid != p->p_pid) - enterpgrp(p, p->p_pid, 0); - OSBitOrAtomic(P_SYSTEM, (UInt32 *)&p->p_flag); - - /* - * Take us out of the sibling chain, and - * out of our parent's child chain. - */ - proc_list_lock(); - LIST_REMOVE(p, p_sibling); - p->p_sibling.le_prev = NULL; - p->p_sibling.le_next = NULL; - p->p_pptr = kernproc; - p->p_ppid = 0; - proc_list_unlock(); - - - AUDIT_MACH_SYSCALL_EXIT(KERN_SUCCESS); - return(KERN_SUCCESS); -} - - /* * Exit: deallocate address space and other resources, change proc state * to zombie, and unlink proc from allproc and parent's lists. Save exit @@ -1605,7 +1629,6 @@ vproc_exit(proc_t p) #endif struct pgrp * pg; struct session *sessp; - boolean_t fstate; /* XXX Zombie allocation may fail, in which case stats get lost */ MALLOC_ZONE(p->p_ru, struct rusage *, @@ -1623,9 +1646,6 @@ vproc_exit(proc_t p) sessp = proc_session(p); if (SESS_LEADER(p, sessp)) { - /* Protected by funnel for tty accesses */ - fstate = thread_funnel_set(kernel_flock, TRUE); - if (sessp->s_ttyvp != NULLVP) { struct vnode *ttyvp; int ttyvid; @@ -1638,22 +1658,26 @@ vproc_exit(proc_t p) * drain controlling terminal * and revoke access to controlling terminal. */ - tp = sessp->s_ttyp; + tp = SESSION_TP(sessp); if ((tp != TTY_NULL) && (tp->t_session == sessp)) { tty_pgsignal(tp, SIGHUP, 1); + tty_lock(tp); (void) ttywait(tp); + tty_unlock(tp); /* * The tty could have been revoked * if we blocked. */ session_lock(sessp); + /* reget in case of race */ + tp = SESSION_TP(sessp); ttyvp = sessp->s_ttyvp; ttyvid = sessp->s_ttyvid; sessp->s_ttyvp = NULL; sessp->s_ttyvid = 0; - sessp->s_ttyp = NULL; + sessp->s_ttyp = TTY_NULL; sessp->s_ttypgrpid = NO_PID; session_unlock(sessp); @@ -1669,7 +1693,7 @@ vproc_exit(proc_t p) ttyvp = sessp->s_ttyvp; sessp->s_ttyvp = NULL; sessp->s_ttyvid = 0; - sessp->s_ttyp = NULL; + sessp->s_ttyp = TTY_NULL; sessp->s_ttypgrpid = NO_PID; session_unlock(sessp); } @@ -1681,7 +1705,6 @@ vproc_exit(proc_t p) * (for logging and informational purposes) */ } - (void) thread_funnel_set(kernel_flock, fstate); session_lock(sessp); sessp->s_leader = NULL; @@ -1896,11 +1919,11 @@ vproc_exit(proc_t p) /* * munge_rusage * LP64 support - long is 64 bits if we are dealing with a 64 bit user - * process. We munge the kernel (32 bit) version of rusage into the + * process. We munge the kernel version of rusage into the * 64 bit version. */ __private_extern__ void -munge_rusage(struct rusage *a_rusage_p, struct user_rusage *a_user_rusage_p) +munge_user64_rusage(struct rusage *a_rusage_p, struct user64_rusage *a_user_rusage_p) { /* timeval changes size, so utime and stime need special handling */ a_user_rusage_p->ru_utime.tv_sec = a_rusage_p->ru_utime.tv_sec; @@ -1926,3 +1949,32 @@ munge_rusage(struct rusage *a_rusage_p, struct user_rusage *a_user_rusage_p) a_user_rusage_p->ru_nvcsw = a_rusage_p->ru_nvcsw; a_user_rusage_p->ru_nivcsw = a_rusage_p->ru_nivcsw; } + +/* For a 64-bit kernel and 32-bit userspace, munging may be needed */ +__private_extern__ void +munge_user32_rusage(struct rusage *a_rusage_p, struct user32_rusage *a_user_rusage_p) +{ + /* timeval changes size, so utime and stime need special handling */ + a_user_rusage_p->ru_utime.tv_sec = a_rusage_p->ru_utime.tv_sec; + a_user_rusage_p->ru_utime.tv_usec = a_rusage_p->ru_utime.tv_usec; + a_user_rusage_p->ru_stime.tv_sec = a_rusage_p->ru_stime.tv_sec; + a_user_rusage_p->ru_stime.tv_usec = a_rusage_p->ru_stime.tv_usec; + /* + * everything else can be a direct assign. We currently ignore + * the loss of precision + */ + a_user_rusage_p->ru_maxrss = a_rusage_p->ru_maxrss; + a_user_rusage_p->ru_ixrss = a_rusage_p->ru_ixrss; + a_user_rusage_p->ru_idrss = a_rusage_p->ru_idrss; + a_user_rusage_p->ru_isrss = a_rusage_p->ru_isrss; + a_user_rusage_p->ru_minflt = a_rusage_p->ru_minflt; + a_user_rusage_p->ru_majflt = a_rusage_p->ru_majflt; + a_user_rusage_p->ru_nswap = a_rusage_p->ru_nswap; + a_user_rusage_p->ru_inblock = a_rusage_p->ru_inblock; + a_user_rusage_p->ru_oublock = a_rusage_p->ru_oublock; + a_user_rusage_p->ru_msgsnd = a_rusage_p->ru_msgsnd; + a_user_rusage_p->ru_msgrcv = a_rusage_p->ru_msgrcv; + a_user_rusage_p->ru_nsignals = a_rusage_p->ru_nsignals; + a_user_rusage_p->ru_nvcsw = a_rusage_p->ru_nvcsw; + a_user_rusage_p->ru_nivcsw = a_rusage_p->ru_nivcsw; +} diff --git a/bsd/kern/kern_fork.c b/bsd/kern/kern_fork.c index 3bc45c1ce..666748b52 100644 --- a/bsd/kern/kern_fork.c +++ b/bsd/kern/kern_fork.c @@ -102,7 +102,7 @@ extern void dtrace_lazy_dofs_duplicate(proc_t, proc_t); #include #endif -#include +#include #include #include @@ -129,6 +129,7 @@ extern void dtrace_lazy_dofs_duplicate(proc_t, proc_t); #include + /* XXX routines which should have Mach prototypes, but don't */ void thread_set_parent(thread_t parent, int pid); extern void act_thread_catt(void *ctx); @@ -136,15 +137,65 @@ void thread_set_child(thread_t child, int pid); void *act_thread_csave(void); -thread_t cloneproc(proc_t, int); -proc_t forkproc(proc_t, int); -void forkproc_free(proc_t, int); -thread_t procdup(proc_t parent, proc_t child); +thread_t cloneproc(task_t, proc_t, int); +proc_t forkproc(proc_t); +void forkproc_free(proc_t); thread_t fork_create_child(task_t parent_task, proc_t child, int inherit_memory, int is64bit); +void proc_vfork_begin(proc_t parent_proc); +void proc_vfork_end(proc_t parent_proc); #define DOFORK 0x1 /* fork() system call */ #define DOVFORK 0x2 /* vfork() system call */ +/* + * proc_vfork_begin + * + * Description: start a vfork on a process + * + * Parameters: parent_proc process (re)entering vfork state + * + * Returns: (void) + * + * Notes: Although this function increments a count, a count in + * excess of 1 is not currently supported. According to the + * POSIX standard, calling anything other than execve() or + * _exit() fillowing a vfork(), including calling vfork() + * itself again, will result in undefned behaviour + */ +void +proc_vfork_begin(proc_t parent_proc) +{ + proc_lock(parent_proc); + parent_proc->p_lflag |= P_LVFORK; + parent_proc->p_vforkcnt++; + proc_unlock(parent_proc); +} + +/* + * proc_vfork_end + * + * Description: stop a vfork on a process + * + * Parameters: parent_proc process leaving vfork state + * + * Returns: (void) + * + * Notes: Decerements the count; currently, reentrancy of vfork() + * is unsupported on the current process + */ +void +proc_vfork_end(proc_t parent_proc) +{ + proc_lock(parent_proc); + parent_proc->p_vforkcnt--; + if (parent_proc->p_vforkcnt < 0) + panic("vfork cnt is -ve"); + /* resude the vfork count; clear the flag when it goes to 0 */ + if (parent_proc->p_vforkcnt == 0) + parent_proc->p_lflag &= ~P_LVFORK; + proc_unlock(parent_proc); +} + /* * vfork @@ -158,7 +209,7 @@ thread_t fork_create_child(task_t parent_task, proc_t child, int inherit_memory, * -1 error (see "Returns:") * * Returns: EAGAIN Administrative limit reached - * EINVAL vfork() caled during vfork() + * EINVAL vfork() called during vfork() * ENOMEM Failed to allocate new process * * Note: After a successful call to this function, the parent process @@ -175,18 +226,137 @@ thread_t fork_create_child(task_t parent_task, proc_t child, int inherit_memory, * child process at execve() time, will also be effected. Given * this, it's recemmended that people use the posix_spawn() call * instead. + * + * BLOCK DIAGRAM OF VFORK + * + * Before: + * + * ,----------------. ,-------------. + * | | task | | + * | parent_thread | ------> | parent_task | + * | | <.list. | | + * `----------------' `-------------' + * uthread | ^ bsd_info | ^ + * v | vc_thread v | task + * ,----------------. ,-------------. + * | | | | + * | parent_uthread | <.list. | parent_proc | <-- current_proc() + * | | | | + * `----------------' `-------------' + * uu_proc | + * v + * NULL + * + * After: + * + * ,----------------. ,-------------. + * | | task | | + * ,----> | parent_thread | ------> | parent_task | + * | | | <.list. | | + * | `----------------' `-------------' + * | uthread | ^ bsd_info | ^ + * | v | vc_thread v | task + * | ,----------------. ,-------------. + * | | | | | + * | | parent_uthread | <.list. | parent_proc | + * | | | | | + * | `----------------' `-------------' + * | uu_proc | . list + * | v v + * | ,----------------. + * `----- | | + * p_vforkact | child_proc | <-- current_proc() + * | | + * `----------------' + */ +int +vfork(proc_t parent_proc, __unused struct vfork_args *uap, int32_t *retval) +{ + thread_t child_thread; + int err; + + if ((err = fork1(parent_proc, &child_thread, PROC_CREATE_VFORK)) != 0) { + retval[1] = 0; + } else { + /* + * kludge: rely on uu_proc being set in the vfork case, + * rather than returning the actual thread. We can remove + * this when we remove the uu_proc/current_proc() kludge. + */ + proc_t child_proc = current_proc(); + + retval[0] = child_proc->p_pid; + retval[1] = 1; /* flag child return for user space */ + + /* + * Drop the signal lock on the child which was taken on our + * behalf by forkproc()/cloneproc() to prevent signals being + * received by the child in a partially constructed state. + */ + proc_signalend(child_proc, 0); + proc_transend(child_proc, 0); + + /* flag the fork has occurred */ + proc_knote(parent_proc, NOTE_FORK | child_proc->p_pid); + DTRACE_PROC1(create, proc_t, child_proc); + } + + return(err); +} + + +/* + * fork1 + * + * Description: common code used by all new process creation other than the + * bootstrap of the initial process on the system + * + * Parameters: parent_proc parent process of the process being + * child_threadp pointer to location to receive the + * Mach thread_t of the child process + * breated + * kind kind of creation being requested + * + * Notes: Permissable values for 'kind': + * + * PROC_CREATE_FORK Create a complete process which will + * return actively running in both the + * parent and the child; the child copies + * the parent address space. + * PROC_CREATE_SPAWN Create a complete process which will + * return actively running in the parent + * only after returning actively running + * in the child; the child address space + * is newly created by an image activator, + * after which the child is run. + * PROC_CREATE_VFORK Creates a partial process which will + * borrow the parent task, thread, and + * uthread to return running in the child; + * the child address space and other parts + * are lazily created at execve() time, or + * the child is terminated, and the parent + * does not actively run until that + * happens. + * + * At first it may seem strange that we return the child thread + * address rather than process structure, since the process is + * the only part guaranteed to be "new"; however, since we do + * not actualy adjust other references between Mach and BSD (see + * the block diagram above the implementation of vfork()), this + * is the only method which guarantees us the ability to get + * back to the other information. */ int -vfork(proc_t parent, __unused struct vfork_args *uap, register_t *retval) +fork1(proc_t parent_proc, thread_t *child_threadp, int kind) { - proc_t child; + thread_t parent_thread = (thread_t)current_thread(); + uthread_t parent_uthread = (uthread_t)get_bsdthread_info(parent_thread); + proc_t child_proc = NULL; /* set in switch, but compiler... */ + thread_t child_thread = NULL; uid_t uid; - thread_t cur_act = (thread_t)current_thread(); int count; - uthread_t ut; -#if CONFIG_MACF - int err; -#endif + int err = 0; + int spawn = 0; /* * Although process entries are dynamically created, we still keep @@ -200,7 +370,6 @@ vfork(proc_t parent, __unused struct vfork_args *uap, register_t *retval) if ((nprocs >= maxproc - 1 && uid != 0) || nprocs >= maxproc) { proc_list_unlock(); tablefull("proc"); - retval[1] = 0; return (EAGAIN); } proc_list_unlock(); @@ -213,107 +382,291 @@ vfork(proc_t parent, __unused struct vfork_args *uap, register_t *retval) */ count = chgproccnt(uid, 1); if (uid != 0 && - (rlim_t)count > parent->p_rlimit[RLIMIT_NPROC].rlim_cur) { - (void)chgproccnt(uid, -1); - return (EAGAIN); - } - - ut = (uthread_t)get_bsdthread_info(cur_act); - if (ut->uu_flag & UT_VFORK) { - printf("vfork called recursively by %s\n", parent->p_comm); - (void)chgproccnt(uid, -1); - return (EINVAL); + (rlim_t)count > parent_proc->p_rlimit[RLIMIT_NPROC].rlim_cur) { + err = EAGAIN; + goto bad; } #if CONFIG_MACF /* * Determine if MAC policies applied to the process will allow - * it to fork. + * it to fork. This is an advisory-only check. */ - err = mac_proc_check_fork(parent); + err = mac_proc_check_fork(parent_proc); if (err != 0) { - (void)chgproccnt(uid, -1); - return (err); + goto bad; } #endif - proc_lock(parent); - parent->p_lflag |= P_LVFORK; - parent->p_vforkcnt++; - proc_unlock(parent); + switch(kind) { + case PROC_CREATE_VFORK: + /* + * Prevent a vfork while we are in vfork(); we should + * also likely preventing a fork here as well, and this + * check should then be outside the switch statement, + * since the proc struct contents will copy from the + * child and the tash/thread/uthread from the parent in + * that case. We do not support vfork() in vfork() + * because we don't have to; the same non-requirement + * is true of both fork() and posix_spawn() and any + * call other than execve() amd _exit(), but we've + * been historically lenient, so we continue to be so + * (for now). + * + * Probably a source of random panics + */ + if (parent_uthread->uu_flag & UT_VFORK) { + printf("fork1 called within vfork by %s\n", parent_proc->p_comm); + err = EINVAL; + goto bad; + } - /* The newly created process comes with signal lock held */ - if ((child = forkproc(parent,1)) == NULL) { - /* Failed to allocate new process */ - (void)chgproccnt(uid, -1); /* - * XXX kludgy, but necessary without a full flags audit... - * XXX these are inherited by the child, which depends on - * XXX P_VFORK being set. + * Flag us in progress; if we chose to support vfork() in + * vfork(), we would chain our parent at this point (in + * effect, a stack push). We don't, since we actually want + * to disallow everything not specified in the standard */ - proc_lock(parent); - parent->p_lflag &= ~P_LVFORK; - parent->p_vforkcnt--; - proc_unlock(parent); - return (ENOMEM); - } + proc_vfork_begin(parent_proc); + + /* The newly created process comes with signal lock held */ + if ((child_proc = forkproc(parent_proc)) == NULL) { + /* Failed to allocate new process */ + proc_vfork_end(parent_proc); + err = ENOMEM; + goto bad; + } +// XXX BEGIN: wants to move to be common code (and safe) #if CONFIG_MACF - /* allow policies to associate the credential/label */ - /* that we referenced from the parent ... with the child */ - /* JMM - this really isn't safe, as we can drop that */ - /* association without informing the policy in other */ - /* situations (keep long enough to get policies changed) */ - mac_cred_label_associate_fork(child->p_ucred, child); + /* + * allow policies to associate the credential/label that + * we referenced from the parent ... with the child + * JMM - this really isn't safe, as we can drop that + * association without informing the policy in other + * situations (keep long enough to get policies changed) + */ + mac_cred_label_associate_fork(child_proc->p_ucred, child_proc); #endif - AUDIT_ARG(pid, child->p_pid); + /* + * Propogate change of PID - may get new cred if auditing. + * + * NOTE: This has no effect in the vfork case, since + * child_proc->task != current_task(), but we duplicate it + * because this is probably, ultimately, wrong, since we + * will be running in the "child" which is the parent task + * with the wrong token until we get to the execve() or + * _exit() call; a lot of "undefined" can happen before + * that. + * + * disallow everything but exeve()/_exit()? + */ + set_security_token(child_proc); - child->task = parent->task; + AUDIT_ARG(pid, child_proc->p_pid); - /* make child visible */ - pinsertchild(parent, child); + AUDIT_SESSION_PROCNEW(child_proc->p_ucred); +// XXX END: wants to move to be common code (and safe) - child->p_lflag |= P_LINVFORK; - child->p_vforkact = cur_act; - child->p_stat = SRUN; + /* + * BORROW PARENT TASK, THREAD, UTHREAD FOR CHILD + * + * Note: this is where we would "push" state instead of setting + * it for nested vfork() support (see proc_vfork_end() for + * description if issues here). + */ + child_proc->task = parent_proc->task; - ut->uu_flag |= UT_VFORK; - ut->uu_proc = child; - ut->uu_userstate = (void *)act_thread_csave(); - ut->uu_vforkmask = ut->uu_sigmask; + child_proc->p_lflag |= P_LINVFORK; + child_proc->p_vforkact = parent_thread; + child_proc->p_stat = SRUN; - /* temporarily drop thread-set-id state */ - if (ut->uu_flag & UT_SETUID) { - ut->uu_flag |= UT_WASSETUID; - ut->uu_flag &= ~UT_SETUID; - } - - thread_set_child(cur_act, child->p_pid); + parent_uthread->uu_flag |= UT_VFORK; + parent_uthread->uu_proc = child_proc; + parent_uthread->uu_userstate = (void *)act_thread_csave(); + parent_uthread->uu_vforkmask = parent_uthread->uu_sigmask; - microtime(&child->p_start); - microtime(&child->p_stats->p_start); /* for compat sake */ - child->p_acflag = AFORK; + /* temporarily drop thread-set-id state */ + if (parent_uthread->uu_flag & UT_SETUID) { + parent_uthread->uu_flag |= UT_WASSETUID; + parent_uthread->uu_flag &= ~UT_SETUID; + } - /* - * Preserve synchronization semantics of vfork. If waiting for - * child to exec or exit, set P_PPWAIT on child, and sleep on our - * proc (in case of exit). - */ - child->p_lflag |= P_LPPWAIT; + /* blow thread state information */ + /* XXX is this actually necessary, given syscall return? */ + thread_set_child(parent_thread, child_proc->p_pid); + + child_proc->p_acflag = AFORK; /* forked but not exec'ed */ + + /* + * Preserve synchronization semantics of vfork. If + * waiting for child to exec or exit, set P_PPWAIT + * on child, and sleep on our proc (in case of exit). + */ + child_proc->p_lflag |= P_LPPWAIT; + pinsertchild(parent_proc, child_proc); /* set visible */ + + break; + + case PROC_CREATE_SPAWN: + /* + * A spawned process differs from a forked process in that + * the spawned process does not carry around the parents + * baggage with regard to address space copying, dtrace, + * and so on. + */ + spawn = 1; + + /* FALLSTHROUGH */ + + case PROC_CREATE_FORK: + /* + * When we clone the parent process, we are going to inherit + * its task attributes and memory, since when we fork, we + * will, in effect, create a duplicate of it, with only minor + * differences. Contrarily, spawned processes do not inherit. + */ + if ((child_thread = cloneproc(parent_proc->task, parent_proc, spawn ? FALSE : TRUE)) == NULL) { + /* Failed to create thread */ + err = EAGAIN; + goto bad; + } + + /* copy current thread state into the child thread (only for fork) */ + if (!spawn) { + thread_dup(child_thread); + } + + /* child_proc = child_thread->task->proc; */ + child_proc = (proc_t)(get_bsdtask_info(get_threadtask(child_thread))); - /* drop the signal lock on the child */ - proc_signalend(child, 0); - proc_transend(child, 0); +// XXX BEGIN: wants to move to be common code (and safe) +#if CONFIG_MACF + /* + * allow policies to associate the credential/label that + * we referenced from the parent ... with the child + * JMM - this really isn't safe, as we can drop that + * association without informing the policy in other + * situations (keep long enough to get policies changed) + */ + mac_cred_label_associate_fork(child_proc->p_ucred, child_proc); +#endif + + /* + * Propogate change of PID - may get new cred if auditing. + * + * NOTE: This has no effect in the vfork case, since + * child_proc->task != current_task(), but we duplicate it + * because this is probably, ultimately, wrong, since we + * will be running in the "child" which is the parent task + * with the wrong token until we get to the execve() or + * _exit() call; a lot of "undefined" can happen before + * that. + * + * disallow everything but exeve()/_exit()? + */ + set_security_token(child_proc); - retval[0] = child->p_pid; - retval[1] = 1; /* flag child return for user space */ + AUDIT_ARG(pid, child_proc->p_pid); - DTRACE_PROC1(create, proc_t, child); + AUDIT_SESSION_PROCNEW(child_proc->p_ucred); +// XXX END: wants to move to be common code (and safe) + + /* + * Blow thread state information; this is what gives the child + * process its "return" value from a fork() call. + * + * Note: this should probably move to fork() proper, since it + * is not relevent to spawn, and the value won't matter + * until we resume the child there. If you are in here + * refactoring code, consider doing this at the same time. + */ + thread_set_child(child_thread, child_proc->p_pid); + + child_proc->p_acflag = AFORK; /* forked but not exec'ed */ + +// dtrace code cleanup needed +#if CONFIG_DTRACE + /* + * This code applies to new processes who are copying the task + * and thread state and address spaces of their parent process. + */ + if (!spawn) { +// call dtrace specific function here instead of all this... + /* + * APPLE NOTE: Solaris does a sprlock() and drops the + * proc_lock here. We're cheating a bit and only taking + * the p_dtrace_sprlock lock. A full sprlock would + * task_suspend the parent. + */ + lck_mtx_lock(&parent_proc->p_dtrace_sprlock); + + /* + * Remove all DTrace tracepoints from the child process. We + * need to do this _before_ duplicating USDT providers since + * any associated probes may be immediately enabled. + */ + if (parent_proc->p_dtrace_count > 0) { + dtrace_fasttrap_fork(parent_proc, child_proc); + } + + lck_mtx_unlock(&parent_proc->p_dtrace_sprlock); + + /* + * Duplicate any lazy dof(s). This must be done while NOT + * holding the parent sprlock! Lock ordering is + * dtrace_dof_mode_lock, then sprlock. It is imperative we + * always call dtrace_lazy_dofs_duplicate, rather than null + * check and call if !NULL. If we NULL test, during lazy dof + * faulting we can race with the faulting code and proceed + * from here to beyond the helpers copy. The lazy dof + * faulting will then fail to copy the helpers to the child + * process. + */ + dtrace_lazy_dofs_duplicate(parent_proc, child_proc); + + /* + * Duplicate any helper actions and providers. The SFORKING + * we set above informs the code to enable USDT probes that + * sprlock() may fail because the child is being forked. + */ + /* + * APPLE NOTE: As best I can tell, Apple's sprlock() equivalent + * never fails to find the child. We do not set SFORKING. + */ + if (parent_proc->p_dtrace_helpers != NULL && dtrace_helpers_fork) { + (*dtrace_helpers_fork)(parent_proc, child_proc); + } + + } +#endif /* CONFIG_DTRACE */ + + break; + + default: + panic("fork1 called with unknown kind %d", kind); + break; + } + + + /* return the thread pointer to the caller */ + *child_threadp = child_thread; + +bad: + /* + * In the error case, we return a 0 value for the returned pid (but + * it is ignored in the trampoline due to the error return); this + * is probably not necessary. + */ + if (err) { + (void)chgproccnt(uid, -1); + } - return (0); + return (err); } + /* * vfork_return * @@ -321,7 +674,7 @@ vfork(proc_t parent, __unused struct vfork_args *uap, register_t *retval) * this is done by reassociating the parent process structure * with the task, thread, and uthread. * - * Parameters: child Child process + * Parameters: child_proc Child process * retval System call return value array * rval Return value to present to parent * @@ -331,37 +684,32 @@ vfork(proc_t parent, __unused struct vfork_args *uap, register_t *retval) * callling this function. */ void -vfork_return(proc_t child, register_t *retval, int rval) +vfork_return(proc_t child_proc, int32_t *retval, int rval) { - proc_t parent = child->p_pptr; - thread_t cur_act = (thread_t)current_thread(); - uthread_t ut; + proc_t parent_proc = child_proc->p_pptr; + thread_t parent_thread = (thread_t)current_thread(); + uthread_t parent_uthread = (uthread_t)get_bsdthread_info(parent_thread); - ut = (uthread_t)get_bsdthread_info(cur_act); - act_thread_catt(ut->uu_userstate); + act_thread_catt(parent_uthread->uu_userstate); - /* Make sure only one at this time */ - proc_lock(parent); - parent->p_vforkcnt--; - if (parent->p_vforkcnt <0) - panic("vfork cnt is -ve"); - if (parent->p_vforkcnt <=0) - parent->p_lflag &= ~P_LVFORK; - proc_unlock(parent); - ut->uu_userstate = 0; - ut->uu_flag &= ~UT_VFORK; + /* end vfork in parent */ + proc_vfork_end(parent_proc); + + /* REPATRIATE PARENT TASK, THREAD, UTHREAD */ + parent_uthread->uu_userstate = 0; + parent_uthread->uu_flag &= ~UT_VFORK; /* restore thread-set-id state */ - if (ut->uu_flag & UT_WASSETUID) { - ut->uu_flag |= UT_SETUID; - ut->uu_flag &= UT_WASSETUID; + if (parent_uthread->uu_flag & UT_WASSETUID) { + parent_uthread->uu_flag |= UT_SETUID; + parent_uthread->uu_flag &= UT_WASSETUID; } - ut->uu_proc = 0; - ut->uu_sigmask = ut->uu_vforkmask; - child->p_lflag &= ~P_LINVFORK; - child->p_vforkact = (void *)0; + parent_uthread->uu_proc = 0; + parent_uthread->uu_sigmask = parent_uthread->uu_vforkmask; + child_proc->p_lflag &= ~P_LINVFORK; + child_proc->p_vforkact = (void *)0; - thread_set_parent(cur_act, rval); + thread_set_parent(parent_thread, rval); if (retval) { retval[0] = rval; @@ -379,7 +727,7 @@ vfork_return(proc_t child, register_t *retval, int rval) * process * * Parameters: parent_task parent task - * child child process + * child_proc child process * inherit_memory TRUE, if the parents address space is * to be inherited by the child * is64bit TRUE, if the child being created will @@ -402,7 +750,7 @@ vfork_return(proc_t child, register_t *retval, int rval) * in this case, 'inherit_memory' MUST be FALSE. */ thread_t -fork_create_child(task_t parent_task, proc_t child, int inherit_memory, int is64bit) +fork_create_child(task_t parent_task, proc_t child_proc, int inherit_memory, int is64bit) { thread_t child_thread = NULL; task_t child_task; @@ -418,14 +766,14 @@ fork_create_child(task_t parent_task, proc_t child, int inherit_memory, int is64 goto bad; } - /* Set the child task to the new task */ - child->task = child_task; + /* Set the child process task to the new task */ + child_proc->task = child_task; - /* Set child task proc to child proc */ - set_bsdtask_info(child_task, child); + /* Set child task process to child proc */ + set_bsdtask_info(child_task, child_proc); /* Propagate CPU limit timer from parent */ - if (timerisset(&child->p_rlim_cpu)) + if (timerisset(&child_proc->p_rlim_cpu)) task_vtimer_set(child_task, TASK_VTIMER_RLIM); /* Set/clear 64 bit vm_map flag */ @@ -437,12 +785,15 @@ fork_create_child(task_t parent_task, proc_t child, int inherit_memory, int is64 #if CONFIG_MACF /* Update task for MAC framework */ /* valid to use p_ucred as child is still not running ... */ - mac_task_label_update_cred(child->p_ucred, child_task); + mac_task_label_update_cred(child_proc->p_ucred, child_task); #endif - /* Set child scheduler priority if nice value inherited from parent */ - if (child->p_nice != 0) - resetpriority(child); + /* + * Set child process BSD visible scheduler priority if nice value + * inherited from parent + */ + if (child_proc->p_nice != 0) + resetpriority(child_proc); /* Create a new thread for the child process */ result = thread_create(child_task, &child_thread); @@ -458,73 +809,6 @@ fork_create_child(task_t parent_task, proc_t child, int inherit_memory, int is64 } -/* - * procdup - * - * Description: Givben a parent process, provide a duplicate task and thread - * for a child process of that parent. - * - * Parameters: parent Parent process to use as the template - * child Child process to duplicate into - * - * Returns: !NULL Child process thread pointer - * NULL Failure (unspecified) - * - * Note: Most of the heavy lifting is done by fork_create_child(); this - * function exists more or less to deal with the 64 bit commpage, - * which requires explicit inheritance, the x86 commpage, which - * should not need explicit mapping any more, but apparently does, - * and to be variant for the bootstrap process. - * - * There is a special case where the system is being bootstraped, - * where this function will be called from cloneproc(), called in - * turn from bsd_utaskbootstrap(). In this case, we are acting - * to create a task and thread (and uthread) for the benefit of - * the kernel process - the first process in the system (PID 0). - * - * In that specific case, we will *not* pass a parent task, since - * there is *not* parent task present to pass. - * - * XXX: This function should go away; the variance can moved into - * XXX: cloneproc(), and the 64bit commpage code can be moved into - * XXX: fork_create_child(), after the x86 commpage inheritance is - * XXX: corrected. - */ -thread_t -procdup(proc_t parent, proc_t child) -{ - thread_t child_thread; - task_t child_task; - - if (parent->task == kernel_task) - child_thread = fork_create_child(TASK_NULL, child, FALSE, FALSE); - else - child_thread = fork_create_child(parent->task, child, TRUE, (parent->p_flag & P_LP64)); - - if (child_thread != NULL) { - child_task = get_threadtask(child_thread); - if (parent->p_flag & P_LP64) { - task_set_64bit(child_task, TRUE); - OSBitOrAtomic(P_LP64, (UInt32 *)&child->p_flag); -#ifdef __ppc__ - /* LP64todo - clean up hacked mapping of commpage */ - /* - * PPC51: ppc64 is limited to 51-bit addresses. - * Memory above that limit is handled specially at - * the pmap level. - */ - pmap_map_sharedpage(child_task, get_map_pmap(get_task_map(child_task))); -#endif /* __ppc__ */ - } else { - task_set_64bit(child_task, FALSE); - OSBitAndAtomic(~((uint32_t)P_LP64), (UInt32 *)&child->p_flag); - } - } - - return(child_thread); -} - - /* * fork * @@ -536,170 +820,87 @@ procdup(proc_t parent, proc_t child) * * Returns: 0 Success * EAGAIN Resource unavailable, try again + * + * Notes: Attempts to create a new child process which inherits state + * from the parent process. If successful, the call returns + * having created an initially suspended child process with an + * extra Mach task and thread reference, for which the thread + * is initially suspended. Until we resume the child process, + * it is not yet running. + * + * The return information to the child is contained in the + * thread state structure of the new child, and does not + * become visible to the child through a normal return process, + * since it never made the call into the kernel itself in the + * first place. + * + * After resuming the thread, this function returns directly to + * the parent process which invoked the fork() system call. + * + * Important: The child thread_resume occurs before the parent returns; + * depending on scheduling latency, this means that it is not + * deterministic as to whether the parent or child is scheduled + * to run first. It is entirely possible that the child could + * run to completion prior to the parent running. */ int -fork(proc_t parent, __unused struct fork_args *uap, register_t *retval) +fork(proc_t parent_proc, __unused struct fork_args *uap, int32_t *retval) { - proc_t child; - uid_t uid; - thread_t newth; - int count; - task_t t; -#if CONFIG_MACF + thread_t child_thread; int err; -#endif - /* - * Although process entries are dynamically created, we still keep - * a global limit on the maximum number we will create. Don't allow - * a nonprivileged user to use the last process; don't let root - * exceed the limit. The variable nprocs is the current number of - * processes, maxproc is the limit. - */ - uid = kauth_cred_get()->cr_ruid; - proc_list_lock(); - if ((nprocs >= maxproc - 1 && uid != 0) || nprocs >= maxproc) { - proc_list_unlock(); - tablefull("proc"); - retval[1] = 0; - return (EAGAIN); - } - proc_list_unlock(); - - /* - * Increment the count of procs running with this uid. Don't allow - * a nonprivileged user to exceed their current limit, which is - * always less than what an rlim_t can hold. - * (locking protection is provided by list lock held in chgproccnt) - */ - count = chgproccnt(uid, 1); - if (uid != 0 && - (rlim_t)count > parent->p_rlimit[RLIMIT_NPROC].rlim_cur) { - (void)chgproccnt(uid, -1); - return (EAGAIN); - } + retval[1] = 0; /* flag parent return for user space */ -#if CONFIG_MACF - /* - * Determine if MAC policies applied to the process will allow - * it to fork. - */ - err = mac_proc_check_fork(parent); - if (err != 0) { - (void)chgproccnt(uid, -1); - return (err); - } -#endif + if ((err = fork1(parent_proc, &child_thread, PROC_CREATE_FORK)) == 0) { + task_t child_task; + proc_t child_proc; - /* The newly created process comes with signal lock held */ - if ((newth = cloneproc(parent, 1)) == NULL) { - /* Failed to create thread */ - (void)chgproccnt(uid, -1); - return (EAGAIN); - } + /* Return to the parent */ + child_proc = (proc_t)get_bsdthreadtask_info(child_thread); + retval[0] = child_proc->p_pid; - thread_dup(newth); - /* child = newth->task->proc; */ - child = (proc_t)(get_bsdtask_info(get_threadtask(newth))); - -#if CONFIG_MACF - /* inform policies of new process sharing this cred/label */ - /* safe to use p_ucred here since child is not running */ - /* JMM - unsafe to assume the association will stay - as */ - /* there are other ways it can be dropped without */ - /* informing the policies. */ - mac_cred_label_associate_fork(child->p_ucred, child); -#endif - - /* propogate change of PID - may get new cred if auditing */ - set_security_token(child); - - AUDIT_ARG(pid, child->p_pid); - - thread_set_child(newth, child->p_pid); - - microtime(&child->p_start); - microtime(&child->p_stats->p_start); /* for compat sake */ - child->p_acflag = AFORK; - -#if CONFIG_DTRACE - /* - * APPLE NOTE: Solaris does a sprlock() and drops the proc_lock - * here. We're cheating a bit and only taking the p_dtrace_sprlock - * lock. A full sprlock would task_suspend the parent. - */ - lck_mtx_lock(&parent->p_dtrace_sprlock); + /* + * Drop the signal lock on the child which was taken on our + * behalf by forkproc()/cloneproc() to prevent signals being + * received by the child in a partially constructed state. + */ + proc_signalend(child_proc, 0); + proc_transend(child_proc, 0); - /* - * Remove all DTrace tracepoints from the child process. We - * need to do this _before_ duplicating USDT providers since - * any associated probes may be immediately enabled. - */ - if (parent->p_dtrace_count > 0) { - dtrace_fasttrap_fork(parent, child); - } + /* flag the fork has occurred */ + proc_knote(parent_proc, NOTE_FORK | child_proc->p_pid); + DTRACE_PROC1(create, proc_t, child_proc); - lck_mtx_unlock(&parent->p_dtrace_sprlock); + /* "Return" to the child */ + (void)thread_resume(child_thread); - /* - * Duplicate any lazy dof(s). This must be done while NOT - * holding the parent sprlock! Lock ordering is dtrace_dof_mode_lock, - * then sprlock. It is imperative we always call - * dtrace_lazy_dofs_duplicate, rather than null check and - * call if !NULL. If we NULL test, during lazy dof faulting - * we can race with the faulting code and proceed from here to - * beyond the helpers copy. The lazy dof faulting will then - * fail to copy the helpers to the child process. - */ - dtrace_lazy_dofs_duplicate(parent, child); - - /* - * Duplicate any helper actions and providers. The SFORKING - * we set above informs the code to enable USDT probes that - * sprlock() may fail because the child is being forked. - */ - /* - * APPLE NOTE: As best I can tell, Apple's sprlock() equivalent - * never fails to find the child. We do not set SFORKING. - */ - if (parent->p_dtrace_helpers != NULL && dtrace_helpers_fork) { - (*dtrace_helpers_fork)(parent, child); + /* drop the extra references we got during the creation */ + if ((child_task = (task_t)get_threadtask(child_thread)) != NULL) { + task_deallocate(child_task); + } + thread_deallocate(child_thread); } -#endif - - /* drop the signal lock on the child */ - proc_signalend(child, 0); - proc_transend(child, 0); - - /* "Return" to the child */ - (void)thread_resume(newth); - - /* drop the extra references we got during the creation */ - if ((t = (task_t)get_threadtask(newth)) != NULL) { - task_deallocate(t); - } - thread_deallocate(newth); - - proc_knote(parent, NOTE_FORK | child->p_pid); - - retval[0] = child->p_pid; - retval[1] = 0; /* flag parent */ - - DTRACE_PROC1(create, proc_t, child); - - return (0); + return(err); } + /* * cloneproc * * Description: Create a new process from a specified process. * - * Parameters: parent The parent process of the process to - * be cloned - * lock Whether or not the signal lock was held - * when calling cloneproc(). + * Parameters: parent_task The parent task to be cloned, or + * TASK_NULL is task characteristics + * are not to be inherited + * be cloned, or TASK_NULL if the new + * task is not to inherit the VM + * characteristics of the parent + * parent_proc The parent process to be cloned + * inherit_memory True if the child is to inherit + * memory from the parent; if this is + * non-NULL, then the parent_task must + * also be non-NULL * * Returns: !NULL pointer to new child thread * NULL Failure (unspecified) @@ -712,68 +913,87 @@ fork(proc_t parent, __unused struct fork_args *uap, register_t *retval) * In the case of bootstrap, this function can be called from * bsd_utaskbootstrap() in order to bootstrap the first process; * the net effect is to provide a uthread structure for the - * kernel process associated with the kernel task. This results - * in a side effect in procdup(), which is why the code is more - * complicated at the top of that function. + * kernel process associated with the kernel task. + * + * XXX: Tristating using the value parent_task as the major key + * and inherit_memory as the minor key is something we should + * refactor later; we owe the current semantics, ultimately, + * to the semantics of task_create_internal. For now, we will + * live with this being somewhat awkward. */ thread_t -cloneproc(proc_t parent, int lock) +cloneproc(task_t parent_task, proc_t parent_proc, int inherit_memory) { - proc_t child; - thread_t th = NULL; + task_t child_task; + proc_t child_proc; + thread_t child_thread = NULL; - if ((child = forkproc(parent,lock)) == NULL) { + if ((child_proc = forkproc(parent_proc)) == NULL) { /* Failed to allocate new process */ goto bad; } - if ((th = procdup(parent, child)) == NULL) { + child_thread = fork_create_child(parent_task, child_proc, inherit_memory, (parent_task == TASK_NULL) ? FALSE : (parent_proc->p_flag & P_LP64)); + + if (child_thread == NULL) { /* * Failed to create thread; now we must deconstruct the new * process previously obtained from forkproc(). */ - forkproc_free(child, lock); + forkproc_free(child_proc); goto bad; } + child_task = get_threadtask(child_thread); + if (parent_proc->p_flag & P_LP64) { + task_set_64bit(child_task, TRUE); + OSBitOrAtomic(P_LP64, (UInt32 *)&child_proc->p_flag); +#ifdef __ppc__ + /* + * PPC51: ppc64 is limited to 51-bit addresses. + * Memory above that limit is handled specially at + * the pmap level. + */ + pmap_map_sharedpage(child_task, get_map_pmap(get_task_map(child_task))); +#endif /* __ppc__ */ + } else { + task_set_64bit(child_task, FALSE); + OSBitAndAtomic(~((uint32_t)P_LP64), (UInt32 *)&child_proc->p_flag); + } + /* make child visible */ - pinsertchild(parent, child); + pinsertchild(parent_proc, child_proc); /* * Make child runnable, set start time. */ - child->p_stat = SRUN; - + child_proc->p_stat = SRUN; bad: - return(th); + return(child_thread); } + /* * Destroy a process structure that resulted from a call to forkproc(), but * which must be returned to the system because of a subsequent failure * preventing it from becoming active. * * Parameters: p The incomplete process from forkproc() - * lock Whether or not the signal lock was held - * when calling forkproc(). * * Returns: (void) * * Note: This function should only be used in an error handler following - * a call to forkproc(). The 'lock' paramenter should be the same - * as the lock parameter passed to forkproc(). + * a call to forkproc(). * * Operations occur in reverse order of those in forkproc(). */ void -forkproc_free(proc_t p, int lock) +forkproc_free(proc_t p) { - /* Drop the signal lock, if it was held */ - if (lock) { - proc_signalend(p, 0); - proc_transend(p, 0); - } + /* We held signal and a transition locks; drop them */ + proc_signalend(p, 0); + proc_transend(p, 0); /* * If we have our own copy of the resource limits structure, we @@ -835,9 +1055,7 @@ forkproc_free(proc_t p, int lock) * Description: Create a new process structure, given a parent process * structure. * - * Parameters: parent The parent process - * lock If the signal lock should be taken on - * the newly created process. + * Parameters: parent_proc The parent process * * Returns: !NULL The new process structure * NULL Error (insufficient free memory) @@ -847,45 +1065,47 @@ forkproc_free(proc_t p, int lock) * returned structure, they must call forkproc_free() to do so. */ proc_t -forkproc(proc_t parent, int lock) +forkproc(proc_t parent_proc) { - struct proc * child; /* Our new process */ + proc_t child_proc; /* Our new process */ static int nextpid = 0, pidwrap = 0, nextpidversion = 0; int error = 0; struct session *sessp; - uthread_t uth_parent = (uthread_t)get_bsdthread_info(current_thread()); + uthread_t parent_uthread = (uthread_t)get_bsdthread_info(current_thread()); - MALLOC_ZONE(child, proc_t , sizeof *child, M_PROC, M_WAITOK); - if (child == NULL) { + MALLOC_ZONE(child_proc, proc_t , sizeof *child_proc, M_PROC, M_WAITOK); + if (child_proc == NULL) { printf("forkproc: M_PROC zone exhausted\n"); goto bad; } /* zero it out as we need to insert in hash */ - bzero(child, sizeof *child); + bzero(child_proc, sizeof *child_proc); - MALLOC_ZONE(child->p_stats, struct pstats *, - sizeof *child->p_stats, M_PSTATS, M_WAITOK); - if (child->p_stats == NULL) { + MALLOC_ZONE(child_proc->p_stats, struct pstats *, + sizeof *child_proc->p_stats, M_PSTATS, M_WAITOK); + if (child_proc->p_stats == NULL) { printf("forkproc: M_SUBPROC zone exhausted (p_stats)\n"); - FREE_ZONE(child, sizeof *child, M_PROC); - child = NULL; + FREE_ZONE(child_proc, sizeof *child_proc, M_PROC); + child_proc = NULL; goto bad; } - MALLOC_ZONE(child->p_sigacts, struct sigacts *, - sizeof *child->p_sigacts, M_SIGACTS, M_WAITOK); - if (child->p_sigacts == NULL) { + MALLOC_ZONE(child_proc->p_sigacts, struct sigacts *, + sizeof *child_proc->p_sigacts, M_SIGACTS, M_WAITOK); + if (child_proc->p_sigacts == NULL) { printf("forkproc: M_SUBPROC zone exhausted (p_sigacts)\n"); - FREE_ZONE(child->p_stats, sizeof *child->p_stats, M_PSTATS); - FREE_ZONE(child, sizeof *child, M_PROC); - child = NULL; + FREE_ZONE(child_proc->p_stats, sizeof *child_proc->p_stats, M_PSTATS); + FREE_ZONE(child_proc, sizeof *child_proc, M_PROC); + child_proc = NULL; goto bad; } - child->p_rcall = thread_call_allocate((thread_call_func_t)realitexpire, child); - if (child->p_rcall == NULL) { - FREE_ZONE(child->p_sigacts, sizeof *child->p_sigacts, M_SIGACTS); - FREE_ZONE(child->p_stats, sizeof *child->p_stats, M_PSTATS); - FREE_ZONE(child, sizeof *child, M_PROC); - child = NULL; + + /* allocate a callout for use by interval timers */ + child_proc->p_rcall = thread_call_allocate((thread_call_func_t)realitexpire, child_proc); + if (child_proc->p_rcall == NULL) { + FREE_ZONE(child_proc->p_sigacts, sizeof *child_proc->p_sigacts, M_SIGACTS); + FREE_ZONE(child_proc->p_stats, sizeof *child_proc->p_stats, M_PSTATS); + FREE_ZONE(child_proc, sizeof *child_proc, M_PROC); + child_proc = NULL; goto bad; } @@ -925,17 +1145,17 @@ forkproc(proc_t parent, int lock) } } nprocs++; - child->p_pid = nextpid; - child->p_idversion = nextpidversion++; + child_proc->p_pid = nextpid; + child_proc->p_idversion = nextpidversion++; #if 1 - if (child->p_pid != 0) { - if (pfind_locked(child->p_pid) != PROC_NULL) + if (child_proc->p_pid != 0) { + if (pfind_locked(child_proc->p_pid) != PROC_NULL) panic("proc in the list already\n"); } #endif /* Insert in the hash */ - child->p_listflag |= (P_LIST_INHASH | P_LIST_INCREATE); - LIST_INSERT_HEAD(PIDHASH(child->p_pid), child, p_hash); + child_proc->p_listflag |= (P_LIST_INHASH | P_LIST_INCREATE); + LIST_INSERT_HEAD(PIDHASH(child_proc->p_pid), child_proc, p_hash); proc_list_unlock(); @@ -943,15 +1163,16 @@ forkproc(proc_t parent, int lock) * We've identified the PID we are going to use; initialize the new * process structure. */ - child->p_stat = SIDL; - child->p_pgrpid = PGRPID_DEAD; + child_proc->p_stat = SIDL; + child_proc->p_pgrpid = PGRPID_DEAD; /* - * The zero'ing of the proc was at the allocation time due to need for insertion - * to hash. Copy the section that is to be copied directly from the parent. + * The zero'ing of the proc was at the allocation time due to need + * for insertion to hash. Copy the section that is to be copied + * directly from the parent. */ - bcopy(&parent->p_startcopy, &child->p_startcopy, - (unsigned) ((caddr_t)&child->p_endcopy - (caddr_t)&child->p_startcopy)); + bcopy(&parent_proc->p_startcopy, &child_proc->p_startcopy, + (unsigned) ((caddr_t)&child_proc->p_endcopy - (caddr_t)&child_proc->p_startcopy)); /* * Some flags are inherited from the parent. @@ -959,97 +1180,149 @@ forkproc(proc_t parent, int lock) * Increase reference counts on shared objects. * The p_stats and p_sigacts substructs are set in vm_fork. */ - child->p_flag = (parent->p_flag & (P_LP64 | P_TRANSLATED | P_AFFINITY)); - if (parent->p_flag & P_PROFIL) - startprofclock(child); + child_proc->p_flag = (parent_proc->p_flag & (P_LP64 | P_TRANSLATED | P_AFFINITY)); + if (parent_proc->p_flag & P_PROFIL) + startprofclock(child_proc); /* * Note that if the current thread has an assumed identity, this * credential will be granted to the new process. */ - child->p_ucred = kauth_cred_get_with_ref(); + child_proc->p_ucred = kauth_cred_get_with_ref(); - lck_mtx_init(&child->p_mlock, proc_lck_grp, proc_lck_attr); - lck_mtx_init(&child->p_fdmlock, proc_lck_grp, proc_lck_attr); +#ifdef CONFIG_EMBEDDED + lck_mtx_init(&child_proc->p_mlock, proc_lck_grp, proc_lck_attr); + lck_mtx_init(&child_proc->p_fdmlock, proc_lck_grp, proc_lck_attr); #if CONFIG_DTRACE - lck_mtx_init(&child->p_dtrace_sprlock, proc_lck_grp, proc_lck_attr); + lck_mtx_init(&child_proc->p_dtrace_sprlock, proc_lck_grp, proc_lck_attr); #endif - lck_spin_init(&child->p_slock, proc_lck_grp, proc_lck_attr); - klist_init(&child->p_klist); + lck_spin_init(&child_proc->p_slock, proc_lck_grp, proc_lck_attr); +#else /* !CONFIG_EMBEDDED */ + lck_mtx_init(&child_proc->p_mlock, proc_mlock_grp, proc_lck_attr); + lck_mtx_init(&child_proc->p_fdmlock, proc_fdmlock_grp, proc_lck_attr); +#if CONFIG_DTRACE + lck_mtx_init(&child_proc->p_dtrace_sprlock, proc_lck_grp, proc_lck_attr); +#endif + lck_spin_init(&child_proc->p_slock, proc_slock_grp, proc_lck_attr); +#endif /* !CONFIG_EMBEDDED */ + klist_init(&child_proc->p_klist); - if (child->p_textvp != NULLVP) { + if (child_proc->p_textvp != NULLVP) { /* bump references to the text vnode */ /* Need to hold iocount across the ref call */ - if (vnode_getwithref(child->p_textvp) == 0) { - error = vnode_ref(child->p_textvp); - vnode_put(child->p_textvp); + if (vnode_getwithref(child_proc->p_textvp) == 0) { + error = vnode_ref(child_proc->p_textvp); + vnode_put(child_proc->p_textvp); if (error != 0) - child->p_textvp = NULLVP; + child_proc->p_textvp = NULLVP; } } - /* XXX may fail to copy descriptors to child */ - child->p_fd = fdcopy(parent, uth_parent->uu_cdir); + /* + * Copy the parents per process open file table to the child; if + * there is a per-thread current working directory, set the childs + * per-process current working directory to that instead of the + * parents. + * + * XXX may fail to copy descriptors to child + */ + child_proc->p_fd = fdcopy(parent_proc, parent_uthread->uu_cdir); #if SYSV_SHM - if (parent->vm_shm) { + if (parent_proc->vm_shm) { /* XXX may fail to attach shm to child */ - (void)shmfork(parent,child); + (void)shmfork(parent_proc, child_proc); } #endif /* * inherit the limit structure to child */ - proc_limitfork(parent, child); + proc_limitfork(parent_proc, child_proc); - if (child->p_limit->pl_rlimit[RLIMIT_CPU].rlim_cur != RLIM_INFINITY) { - uint64_t rlim_cur = child->p_limit->pl_rlimit[RLIMIT_CPU].rlim_cur; - child->p_rlim_cpu.tv_sec = (rlim_cur > __INT_MAX__) ? __INT_MAX__ : rlim_cur; + if (child_proc->p_limit->pl_rlimit[RLIMIT_CPU].rlim_cur != RLIM_INFINITY) { + uint64_t rlim_cur = child_proc->p_limit->pl_rlimit[RLIMIT_CPU].rlim_cur; + child_proc->p_rlim_cpu.tv_sec = (rlim_cur > __INT_MAX__) ? __INT_MAX__ : rlim_cur; } - bzero(&child->p_stats->pstat_startzero, - (unsigned) ((caddr_t)&child->p_stats->pstat_endzero - - (caddr_t)&child->p_stats->pstat_startzero)); - - bzero(&child->p_stats->user_p_prof, sizeof(struct user_uprof)); - - if (parent->p_sigacts != NULL) - (void)memcpy(child->p_sigacts, - parent->p_sigacts, sizeof *child->p_sigacts); + /* Intialize new process stats, including start time */ + /* non-zeroed portion contains garbage AFAICT */ + bzero(&child_proc->p_stats->pstat_startzero, + (unsigned) ((caddr_t)&child_proc->p_stats->pstat_endzero - + (caddr_t)&child_proc->p_stats->pstat_startzero)); + bzero(&child_proc->p_stats->user_p_prof, sizeof(struct user_uprof)); + microtime(&child_proc->p_start); + child_proc->p_stats->p_start = child_proc->p_start; /* for compat */ + + if (parent_proc->p_sigacts != NULL) + (void)memcpy(child_proc->p_sigacts, + parent_proc->p_sigacts, sizeof *child_proc->p_sigacts); else - (void)memset(child->p_sigacts, 0, sizeof *child->p_sigacts); + (void)memset(child_proc->p_sigacts, 0, sizeof *child_proc->p_sigacts); - sessp = proc_session(parent); - if (sessp->s_ttyvp != NULL && parent->p_flag & P_CONTROLT) - OSBitOrAtomic(P_CONTROLT, (UInt32 *)&child->p_flag); + sessp = proc_session(parent_proc); + if (sessp->s_ttyvp != NULL && parent_proc->p_flag & P_CONTROLT) + OSBitOrAtomic(P_CONTROLT, &child_proc->p_flag); session_rele(sessp); - /* block all signals to reach the process */ - if (lock) { - proc_signalstart(child, 0); - proc_transstart(child, 0); - } + /* + * block all signals to reach the process. + * no transition race should be occuring with the child yet, + * but indicate that the process is in (the creation) transition. + */ + proc_signalstart(child_proc, 0); + proc_transstart(child_proc, 0); + + child_proc->p_pcaction = (parent_proc->p_pcaction) & P_PCMAX; + TAILQ_INIT(&child_proc->p_uthlist); + TAILQ_INIT(&child_proc->p_aio_activeq); + TAILQ_INIT(&child_proc->p_aio_doneq); - TAILQ_INIT(&child->p_uthlist); - TAILQ_INIT(&child->aio_activeq); - TAILQ_INIT(&child->aio_doneq); /* Inherit the parent flags for code sign */ - child->p_csflags = parent->p_csflags; - child->p_wqthread = parent->p_wqthread; - child->p_threadstart = parent->p_threadstart; - child->p_pthsize = parent->p_pthsize; - workqueue_init_lock(child); + child_proc->p_csflags = parent_proc->p_csflags; + + /* + * All processes have work queue locks; cleaned up by + * reap_child_locked() + */ + workqueue_init_lock(child_proc); + + /* + * Copy work queue information + * + * Note: This should probably only happen in the case where we are + * creating a child that is a copy of the parent; since this + * routine is called in the non-duplication case of vfork() + * or posix_spawn(), then this information should likely not + * be duplicated. + * + * Work queue pointers that no longer point to code + */ + child_proc->p_wqthread = parent_proc->p_wqthread; + child_proc->p_threadstart = parent_proc->p_threadstart; + child_proc->p_pthsize = parent_proc->p_pthsize; + child_proc->p_targconc = parent_proc->p_targconc; + if ((parent_proc->p_lflag & P_LREGISTER) != 0) { + child_proc->p_lflag |= P_LREGISTER; + } + child_proc->p_dispatchqueue_offset = parent_proc->p_dispatchqueue_offset; +#if PSYNCH + pth_proc_hashinit(child_proc); +#endif /* PSYNCH */ #if CONFIG_LCTX - child->p_lctx = NULL; + child_proc->p_lctx = NULL; /* Add new process to login context (if any). */ - if (parent->p_lctx != NULL) { - LCTX_LOCK(parent->p_lctx); - enterlctx(child, parent->p_lctx, 0); + if (parent_proc->p_lctx != NULL) { + /* + * This should probably be delayed in the + * vfork() or posix_spawn() cases. + */ + LCTX_LOCK(parent_proc->p_lctx); + enterlctx(child_proc, parent_proc->p_lctx, 0); } #endif bad: - return(child); + return(child_proc); } void @@ -1098,7 +1371,7 @@ uthread_zone_init(void) { if (!uthread_zone_inited) { uthread_zone = zinit(sizeof(struct uthread), - THREAD_MAX * sizeof(struct uthread), + thread_max * sizeof(struct uthread), THREAD_CHUNK * sizeof(struct uthread), "uthreads"); uthread_zone_inited = 1; @@ -1106,7 +1379,7 @@ uthread_zone_init(void) } void * -uthread_alloc(task_t task, thread_t thread) +uthread_alloc(task_t task, thread_t thread, int noinherit) { proc_t p; uthread_t uth; @@ -1131,7 +1404,7 @@ uthread_alloc(task_t task, thread_t thread) * one later, it will be lazily assigned from the task's process. */ uth_parent = (uthread_t)get_bsdthread_info(current_thread()); - if (task == current_task() && + if ((noinherit == 0) && task == current_task() && uth_parent != NULL && IS_VALID_CRED(uth_parent->uu_ucred)) { /* @@ -1145,14 +1418,21 @@ uthread_alloc(task_t task, thread_t thread) if (uth_parent->uu_flag & UT_SETUID) uth->uu_flag |= UT_SETUID; } else { - uth->uu_ucred = NOCRED; + /* sometimes workqueue threads are created out task context */ + if ((task != kernel_task) && (p != PROC_NULL)) + uth->uu_ucred = kauth_cred_proc_ref(p); + else + uth->uu_ucred = NOCRED; } if ((task != kernel_task) && p) { proc_lock(p); - if (uth_parent) { + if (noinherit != 0) { + /* workq threads will not inherit masks */ + uth->uu_sigmask = ~workq_threadmask; + } else if (uth_parent) { if (uth_parent->uu_flag & UT_SAS_OLDMASK) uth->uu_sigmask = uth_parent->uu_oldmask; else @@ -1185,13 +1465,16 @@ uthread_cleanup(task_t task, void *uthread, void * bsd_info) proc_t p = (proc_t)bsd_info; - if (uth->uu_lowpri_window) { + if (uth->uu_lowpri_window || uth->uu_throttle_info) { /* * task is marked as a low priority I/O type * and we've somehow managed to not dismiss the throttle * through the normal exit paths back to user space... * no need to throttle this thread since its going away * but we do need to update our bookeeping w/r to throttled threads + * + * Calling this routine will clean up any throttle info reference + * still inuse by the thread. */ throttle_lowpri_io(FALSE); } @@ -1224,20 +1507,31 @@ uthread_cleanup(task_t task, void *uthread, void * bsd_info) sel->wql = 0; } - + if(uth->pth_name != NULL) + { + kfree(uth->pth_name, MAXTHREADNAMESIZE); + uth->pth_name = 0; + } if ((task != kernel_task) && p) { if (((uth->uu_flag & UT_VFORK) == UT_VFORK) && (uth->uu_proc != PROC_NULL)) { vfork_exit_internal(uth->uu_proc, 0, 1); } + /* + * Remove the thread from the process list and + * transfer [appropriate] pending signals to the process. + */ if (get_bsdtask_info(task) == p) { proc_lock(p); TAILQ_REMOVE(&p->p_uthlist, uth, uu_list); + p->p_siglist |= (uth->uu_siglist & execmask & (~p->p_sigignore | sigcantmask)); proc_unlock(p); } #if CONFIG_DTRACE - if (uth->t_dtrace_scratch != NULL) { - dtrace_ptss_release_entry(p, uth->t_dtrace_scratch); + struct dtrace_ptss_page_entry *tmpptr = uth->t_dtrace_scratch; + uth->t_dtrace_scratch = NULL; + if (tmpptr != NULL) { + dtrace_ptss_release_entry(p, tmpptr); } #endif } diff --git a/bsd/kern/kern_lockf.c b/bsd/kern/kern_lockf.c index 7269357e4..31b25d885 100644 --- a/bsd/kern/kern_lockf.c +++ b/bsd/kern/kern_lockf.c @@ -134,14 +134,6 @@ static int lf_split(struct lockf *, struct lockf *); static void lf_wakelock(struct lockf *, boolean_t); -/* - * in order to mitigate risk - * don't switch to new wake-one method unless - * we have at least this many waiters to wake up - */ -#define SAFE_WAITER_LIMIT 20 - - /* * lf_advlock * @@ -267,7 +259,6 @@ lf_advlock(struct vnop_advlock_args *ap) lock->lf_type = fl->l_type; lock->lf_head = head; lock->lf_next = (struct lockf *)0; - lock->lf_waiters = 0; TAILQ_INIT(&lock->lf_blkhd); lock->lf_flags = ap->a_flags; @@ -514,7 +505,6 @@ lf_setlock(struct lockf *lock) */ lock->lf_next = block; TAILQ_INSERT_TAIL(&block->lf_blkhd, lock, lf_block); - block->lf_waiters++; if ( !(lock->lf_flags & F_FLOCK)) block->lf_flags &= ~F_WAKE1_SAFE; @@ -535,9 +525,6 @@ lf_setlock(struct lockf *lock) tlock->lf_next = block; } TAILQ_CONCAT(&block->lf_blkhd, &lock->lf_blkhd, lf_block); - - block->lf_waiters += lock->lf_waiters; - lock->lf_waiters = 0; } } if (error) { /* XXX */ @@ -551,7 +538,6 @@ lf_setlock(struct lockf *lock) */ if (lock->lf_next) { TAILQ_REMOVE(&lock->lf_next->lf_blkhd, lock, lf_block); - lock->lf_next->lf_waiters--; lock->lf_next = NOLOCKF; } if (!TAILQ_EMPTY(&lock->lf_blkhd)) @@ -646,12 +632,8 @@ lf_setlock(struct lockf *lock) ltmp = TAILQ_FIRST(&overlap->lf_blkhd); TAILQ_REMOVE(&overlap->lf_blkhd, ltmp, lf_block); - overlap->lf_waiters--; - TAILQ_INSERT_TAIL(&lock->lf_blkhd, ltmp, lf_block); - lock->lf_waiters++; - ltmp->lf_next = lock; } } @@ -1092,13 +1074,12 @@ lf_wakelock(struct lockf *listhead, boolean_t force_all) struct lockf *wakelock; boolean_t wake_all = TRUE; - if (force_all == FALSE && (listhead->lf_flags & F_WAKE1_SAFE) && listhead->lf_waiters > SAFE_WAITER_LIMIT) + if (force_all == FALSE && (listhead->lf_flags & F_WAKE1_SAFE)) wake_all = FALSE; while (!TAILQ_EMPTY(&listhead->lf_blkhd)) { wakelock = TAILQ_FIRST(&listhead->lf_blkhd); TAILQ_REMOVE(&listhead->lf_blkhd, wakelock, lf_block); - listhead->lf_waiters--; wakelock->lf_next = NOLOCKF; #ifdef LOCKF_DEBUGGING @@ -1106,12 +1087,14 @@ lf_wakelock(struct lockf *listhead, boolean_t force_all) lf_print("lf_wakelock: awakening", wakelock); #endif /* LOCKF_DEBUGGING */ if (wake_all == FALSE) { + /* + * If there are items on the list head block list, + * move them to the wakelock list instead, and then + * correct their lf_next pointers. + */ + if (!TAILQ_EMPTY(&listhead->lf_blkhd)) { + TAILQ_CONCAT(&wakelock->lf_blkhd, &listhead->lf_blkhd, lf_block); - TAILQ_CONCAT(&wakelock->lf_blkhd, &listhead->lf_blkhd, lf_block); - wakelock->lf_waiters = listhead->lf_waiters; - listhead->lf_waiters = 0; - - if (!TAILQ_EMPTY(&wakelock->lf_blkhd)) { struct lockf *tlock; TAILQ_FOREACH(tlock, &wakelock->lf_blkhd, lf_block) { diff --git a/bsd/kern/kern_malloc.c b/bsd/kern/kern_malloc.c index a1c8f1b50..d2fa09c77 100644 --- a/bsd/kern/kern_malloc.c +++ b/bsd/kern/kern_malloc.c @@ -96,10 +96,7 @@ #include #include -#include - #include -#include #include @@ -165,11 +162,7 @@ const char *memname[] = { #else "", /* 27 M_DQUOT */ #endif -#if FFS - "UFS mount", /* 28 M_UFSMNT */ -#else "", /* 28 M_UFSMNT */ -#endif #if (SYSV_SEM || SYSV_MSG || SYSV_SHM) "shm", /* 29 M_SHM */ #else @@ -190,11 +183,7 @@ const char *memname[] = { "pstats", /* 42 M_SUBPROC */ "LFS segment", /* 43 M_SEGMENT */ "LFS node", /* 44 M_LFSNODE */ -#if FFS - "FFS node", /* 45 M_FFSNODE */ -#else "", /* 45 M_FFSNODE */ -#endif "MFS node", /* 46 M_MFSNODE */ "NQNFS Lease", /* 47 M_NQLEASE */ "NQNFS Host", /* 48 M_NQMHOST */ @@ -212,13 +201,8 @@ const char *memname[] = { "in_multi", /* 54 M_IPMADDR */ "ether_multi", /* 55 M_IFMADDR */ "mrt", /* 56 M_MRTABLE */ -#if CD9660 - "ISOFS mount", /* 57 M_ISOFSMNT */ - "ISOFS node", /* 58 M_ISOFSNODE */ -#else - "", /* 57 M_ISOFSMNT */ - "", /* 58 M_ISOFSNODE */ -#endif + "", /* 57 unused entry */ + "", /* 58 unused entry */ #if (NFSCLIENT || NFSSERVER) "NFSV3 srvdesc",/* 59 M_NFSRVDESC */ "NFSV3 diroff", /* 60 M_NFSDIROFF */ @@ -289,7 +273,11 @@ const char *memname[] = { "fileglob", /* 99 M_FILEGLOB */ "kauth", /* 100 M_KAUTH */ "dummynet", /* 101 M_DUMMYNET */ +#ifndef __LP64__ "unsafe_fsnode", /* 102 M_UNSAFEFS */ +#else + "", /* 102 M_UNSAFEFS */ +#endif /* __LP64__ */ "macpipelabel", /* 103 M_MACPIPELABEL */ "mactemp", /* 104 M_MACTEMP */ "sbuf", /* 105 M_SBUF */ @@ -300,6 +288,11 @@ const char *memname[] = { #else "", /* 108 M_TRAFFIC_MGT */ #endif +#if HFS_COMPRESSION + "decmpfs_cnode",/* 109 M_DECMPFS_CNODE */ +#else + "", /* 109 M_DECMPFS_CNODE */ +#endif /* HFS_COMPRESSION */ }; /* for use with kmzones.kz_zalloczone */ @@ -333,7 +326,7 @@ struct kmzones { { SOS(ucred), KMZ_CREATEZONE }, /* 16 M_CRED */ { SOS(pgrp), KMZ_CREATEZONE }, /* 17 M_PGRP */ { SOS(session), KMZ_CREATEZONE }, /* 18 M_SESSION */ - { SOS(iovec_32), KMZ_LOOKUPZONE }, /* 19 M_IOV32 */ + { SOS(user32_iovec), KMZ_LOOKUPZONE }, /* 19 M_IOV32 */ { SOS(mount), KMZ_CREATEZONE }, /* 20 M_MOUNT */ { 0, KMZ_MALLOC }, /* 21 M_FHANDLE */ #if (NFSCLIENT || NFSSERVER) @@ -352,11 +345,7 @@ struct kmzones { #else { 0, KMZ_MALLOC }, /* 27 M_DQUOT */ #endif -#if FFS - { SOX(ufsmount), KMZ_LOOKUPZONE }, /* 28 M_UFSMNT */ -#else { 0, KMZ_MALLOC }, /* 28 M_UFSMNT */ -#endif { 0, KMZ_MALLOC }, /* 29 M_CGSUM */ { SOS(plimit), KMZ_CREATEZONE }, /* 30 M_PLIMIT */ { SOS(sigacts), KMZ_CREATEZONE }, /* 31 M_SIGACTS */ @@ -373,11 +362,7 @@ struct kmzones { { SOS(pstats), KMZ_CREATEZONE }, /* 42 M_PSTATS */ { 0, KMZ_MALLOC }, /* 43 M_SEGMENT */ { M_FFSNODE, KMZ_SHAREZONE }, /* 44 M_LFSNODE */ -#if FFS - { SOS(inode), KMZ_CREATEZONE }, /* 45 M_FFSNODE */ -#else { 0, KMZ_MALLOC }, /* 45 M_FFSNODE */ -#endif { M_FFSNODE, KMZ_SHAREZONE }, /* 46 M_MFSNODE */ { 0, KMZ_MALLOC }, /* 47 M_NQLEASE */ { 0, KMZ_MALLOC }, /* 48 M_NQMHOST */ @@ -399,13 +384,8 @@ struct kmzones { { SOX(ether_multi), KMZ_LOOKUPZONE }, /* 55 M_IFMADDR */ { SOX(mrt), KMZ_CREATEZONE }, /* 56 M_MRTABLE */ -#if CD9660 - { SOX(iso_mnt), KMZ_LOOKUPZONE }, /* 57 M_ISOFSMNT */ - { SOS(iso_node), KMZ_CREATEZONE }, /* 58 M_ISOFSNODE */ -#else - { 0, KMZ_MALLOC }, /* 57 M_ISOFSMNT */ - { 0, KMZ_MALLOC }, /* 58 M_ISOFSNODE */ -#endif + { 0, KMZ_MALLOC }, /* 57 unused entry */ + { 0, KMZ_MALLOC }, /* 58 unused entry */ #if (NFSCLIENT || NFSSERVER) { SOS(nfsrv_descript), KMZ_CREATEZONE }, /* 59 M_NFSRVDESC */ @@ -468,17 +448,26 @@ struct kmzones { #endif { SOS(cl_readahead), KMZ_CREATEZONE }, /* 96 M_CLRDAHEAD */ { SOS(cl_writebehind),KMZ_CREATEZONE }, /* 97 M_CLWRBEHIND */ - { SOS(iovec_64), KMZ_LOOKUPZONE }, /* 98 M_IOV64 */ + { SOS(user64_iovec), KMZ_LOOKUPZONE }, /* 98 M_IOV64 */ { SOS(fileglob), KMZ_CREATEZONE }, /* 99 M_FILEGLOB */ { 0, KMZ_MALLOC }, /* 100 M_KAUTH */ { 0, KMZ_MALLOC }, /* 101 M_DUMMYNET */ +#ifndef __LP64__ { SOS(unsafe_fsnode),KMZ_CREATEZONE }, /* 102 M_UNSAFEFS */ +#else + { 0, KMZ_MALLOC }, /* 102 M_UNSAFEFS */ +#endif /* __LP64__ */ { 0, KMZ_MALLOC }, /* 103 M_MACPIPELABEL */ { 0, KMZ_MALLOC }, /* 104 M_MACTEMP */ { 0, KMZ_MALLOC }, /* 105 M_SBUF */ { 0, KMZ_MALLOC }, /* 106 M_HFS_EXTATTR */ { 0, KMZ_MALLOC }, /* 107 M_LCTX */ { 0, KMZ_MALLOC }, /* 108 M_TRAFFIC_MGT */ +#if HFS_COMPRESSION + { SOS(decmpfs_cnode),KMZ_CREATEZONE }, /* 109 M_DECMPFS_CNODE */ +#else + { 0, KMZ_MALLOC }, /* 109 M_DECMPFS_CNODE */ +#endif /* HFS_COMPRESSION */ #undef SOS #undef SOX }; @@ -564,6 +553,21 @@ _MALLOC( mem = (void *)kalloc_noblock(memsize); } else { mem = (void *)kalloc(memsize); + + if (mem == NULL) { + + /* + * We get here when the caller told us to block waiting for memory, but + * kalloc said there's no memory left to get. Generally, this means there's a + * leak or the caller asked for an impossibly large amount of memory. Since there's + * nothing left to wait for and the caller isn't expecting a NULL return code, we + * just panic. This is less than ideal, but returning NULL doesn't help since the + * majority of callers don't check the return value and will just dereference the pointer and + * trap anyway. We may as well get a more descriptive message out while we can. + */ + + panic("_MALLOC: kalloc returned NULL (potential leak), size %llu", (uint64_t) size); + } } if (!mem) return (0); diff --git a/bsd/kern/kern_memorystatus.c b/bsd/kern/kern_memorystatus.c index 912fdef3f..f5e141455 100644 --- a/bsd/kern/kern_memorystatus.c +++ b/bsd/kern/kern_memorystatus.c @@ -26,40 +26,22 @@ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ * */ -/*- - * Copyright (c) 1999,2000,2001 Jonathan Lemon - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - */ #include #include #include +#include #include #include #include +#include +#include +#include +#include +#include #include +#include extern unsigned int vm_page_free_count; extern unsigned int vm_page_active_count; @@ -73,6 +55,25 @@ int kern_memorystatus_wakeup = 0; int kern_memorystatus_level = 0; int kern_memorystatus_last_level = 0; unsigned int kern_memorystatus_kev_failure_count = 0; +int kern_memorystatus_level_critical = 5; + +static struct { + jetsam_kernel_stats_t stats; + size_t entry_count; + jetsam_snapshot_entry_t entries[kMaxSnapshotEntries]; +} jetsam_snapshot; + +static jetsam_priority_entry_t jetsam_priority_list[kMaxPriorityEntries]; +#define jetsam_snapshot_list jetsam_snapshot.entries + +static int jetsam_priority_list_index = 0; +static int jetsam_priority_list_count = 0; +static int jetsam_snapshot_list_count = 0; + +static lck_mtx_t * jetsam_list_mlock; +static lck_attr_t * jetsam_lck_attr; +static lck_grp_t * jetsam_lck_grp; +static lck_grp_attr_t * jetsam_lck_grp_attr; SYSCTL_INT(_kern, OID_AUTO, memorystatus_level, CTLFLAG_RD, &kern_memorystatus_level, 0, ""); SYSCTL_UINT(_kern, OID_AUTO, memorystatus_kev_failure_count, CTLFLAG_RD, &kern_memorystatus_kev_failure_count, 0, ""); @@ -80,36 +81,156 @@ SYSCTL_UINT(_kern, OID_AUTO, memorystatus_kev_failure_count, CTLFLAG_RD, &kern_m __private_extern__ void kern_memorystatus_init(void) { + jetsam_lck_attr = lck_attr_alloc_init(); + jetsam_lck_grp_attr= lck_grp_attr_alloc_init(); + jetsam_lck_grp = lck_grp_alloc_init("jetsam", jetsam_lck_grp_attr); + jetsam_list_mlock = lck_mtx_alloc_init(jetsam_lck_grp, jetsam_lck_attr); + (void)kernel_thread(kernel_task, kern_memorystatus_thread); } +static uint32_t +jetsam_task_page_count(task_t task) +{ + kern_return_t ret; + static task_info_data_t data; + static struct task_basic_info *info = (struct task_basic_info *)&data; + static mach_msg_type_number_t count = TASK_BASIC_INFO_COUNT; + + ret = task_info(task, TASK_BASIC_INFO, (task_info_t)&data, &count); + if (ret == KERN_SUCCESS) { + return info->resident_size / PAGE_SIZE; + } + return 0; +} + +static uint32_t +jetsam_flags_for_pid(pid_t pid) +{ + int i; + + for (i = 0; i < jetsam_priority_list_count; i++) { + if (pid == jetsam_priority_list[i].pid) { + return jetsam_priority_list[i].flags; + } + } + return 0; +} + +static void +jetsam_snapshot_procs(void) +{ + proc_t p; + int i = 0; + + jetsam_snapshot.stats.free_pages = vm_page_free_count; + jetsam_snapshot.stats.active_pages = vm_page_active_count; + jetsam_snapshot.stats.inactive_pages = vm_page_inactive_count; + jetsam_snapshot.stats.purgeable_pages = vm_page_purgeable_count; + jetsam_snapshot.stats.wired_pages = vm_page_wire_count; + proc_list_lock(); + LIST_FOREACH(p, &allproc, p_list) { + task_t task = p->task; + jetsam_snapshot_list[i].pid = p->p_pid; + jetsam_snapshot_list[i].pages = jetsam_task_page_count(task); + jetsam_snapshot_list[i].flags = jetsam_flags_for_pid(p->p_pid); + strlcpy(&jetsam_snapshot_list[i].name[0], p->p_comm, MAXCOMLEN+1); +#ifdef DEBUG + printf("jetsam snapshot pid = %d, uuid = %02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x\n", + p->p_pid, + p->p_uuid[0], p->p_uuid[1], p->p_uuid[2], p->p_uuid[3], p->p_uuid[4], p->p_uuid[5], p->p_uuid[6], p->p_uuid[7], + p->p_uuid[8], p->p_uuid[9], p->p_uuid[10], p->p_uuid[11], p->p_uuid[12], p->p_uuid[13], p->p_uuid[14], p->p_uuid[15]); +#endif + memcpy(&jetsam_snapshot_list[i].uuid[0], &p->p_uuid[0], sizeof(p->p_uuid)); + i++; + if (i == kMaxSnapshotEntries) { + break; + } + } + proc_list_unlock(); + jetsam_snapshot.entry_count = jetsam_snapshot_list_count = i - 1; +} + +static void +jetsam_mark_pid_in_snapshot(pid_t pid) +{ + + int i = 0; + + for (i = 0; i < jetsam_snapshot_list_count; i++) { + if (jetsam_snapshot_list[i].pid == pid) { + jetsam_snapshot_list[i].flags |= kJetsamFlagsKilled; + return; + } + } +} + +static int +jetsam_kill_top_proc(void) +{ + proc_t p; + + if (jetsam_snapshot_list_count == 0) { + jetsam_snapshot_procs(); + } + lck_mtx_lock(jetsam_list_mlock); + while (jetsam_priority_list_index < jetsam_priority_list_count) { + pid_t aPid; + aPid = jetsam_priority_list[jetsam_priority_list_index].pid; + jetsam_priority_list_index++; + /* skip empty slots in the list */ + if (aPid == 0) { + continue; // with lock held + } + lck_mtx_unlock(jetsam_list_mlock); + jetsam_mark_pid_in_snapshot(aPid); + p = proc_find(aPid); + if (p != NULL) { +#if DEBUG + printf("jetsam: killing pid %d [%s] - memory_status_level: %d - ", aPid, p->p_comm, kern_memorystatus_level); +#endif /* DEBUG */ + exit1(p, W_EXITCODE(0, SIGKILL), (int *)NULL); + proc_rele(p); +#if DEBUG + printf("jetsam: pid %d killed - memory_status_level: %d\n", aPid, kern_memorystatus_level); +#endif /* DEBUG */ + return 0; + } + lck_mtx_lock(jetsam_list_mlock); + } + lck_mtx_unlock(jetsam_list_mlock); + return -1; +} + static void kern_memorystatus_thread(void) { struct kev_msg ev_msg; - struct { - uint32_t free_pages; - uint32_t active_pages; - uint32_t inactive_pages; - uint32_t purgeable_pages; - uint32_t wired_pages; - } data; + jetsam_kernel_stats_t data; int ret; while(1) { - + + while (kern_memorystatus_level <= kern_memorystatus_level_critical) { + if (jetsam_kill_top_proc() < 0) { + break; + } + } + kern_memorystatus_last_level = kern_memorystatus_level; ev_msg.vendor_code = KEV_VENDOR_APPLE; ev_msg.kev_class = KEV_SYSTEM_CLASS; ev_msg.kev_subclass = KEV_MEMORYSTATUS_SUBCLASS; - /* pass the memory status level in the event code (as percent used) */ - ev_msg.event_code = 100 - kern_memorystatus_last_level; + /* pass the memory status level (percent free) */ + ev_msg.event_code = kMemoryStatusLevelNote; - ev_msg.dv[0].data_length = sizeof data; - ev_msg.dv[0].data_ptr = &data; - ev_msg.dv[1].data_length = 0; + ev_msg.dv[0].data_length = sizeof kern_memorystatus_last_level; + ev_msg.dv[0].data_ptr = &kern_memorystatus_last_level; + ev_msg.dv[1].data_length = sizeof data; + ev_msg.dv[1].data_ptr = &data; + ev_msg.dv[2].data_length = 0; data.free_pages = vm_page_free_count; data.active_pages = vm_page_active_count; @@ -123,6 +244,20 @@ kern_memorystatus_thread(void) printf("%s: kev_post_msg() failed, err %d\n", __func__, ret); } + if (jetsam_snapshot_list_count) { + size_t snapshot_size = sizeof(jetsam_kernel_stats_t) + sizeof(size_t) + sizeof(jetsam_snapshot_entry_t) * jetsam_snapshot_list_count; + ev_msg.event_code = kMemoryStatusSnapshotNote; + ev_msg.dv[0].data_length = sizeof snapshot_size; + ev_msg.dv[0].data_ptr = &snapshot_size; + ev_msg.dv[1].data_length = 0; + + ret = kev_post_msg(&ev_msg); + if (ret) { + kern_memorystatus_kev_failure_count++; + printf("%s: kev_post_msg() failed, err %d\n", __func__, ret); + } + } + if (kern_memorystatus_level >= kern_memorystatus_last_level + 5 || kern_memorystatus_level <= kern_memorystatus_last_level - 5) continue; @@ -131,3 +266,105 @@ kern_memorystatus_thread(void) (void)thread_block((thread_continue_t)kern_memorystatus_thread); } } + +static int +sysctl_io_variable(struct sysctl_req *req, void *pValue, size_t currentsize, size_t maxsize, size_t *newsize) +{ + int error; + + /* Copy blob out */ + error = SYSCTL_OUT(req, pValue, currentsize); + + /* error or nothing to set */ + if (error || !req->newptr) + return(error); + + if (req->newlen > maxsize) { + return EINVAL; + } + error = SYSCTL_IN(req, pValue, req->newlen); + + if (!error) { + *newsize = req->newlen; + } + + return(error); +} + +static int +sysctl_handle_kern_memorystatus_priority_list(__unused struct sysctl_oid *oid, __unused void *arg1, __unused int arg2, struct sysctl_req *req) +{ + int i, ret; + jetsam_priority_entry_t temp_list[kMaxPriorityEntries]; + size_t newsize, currentsize; + + if (req->oldptr) { + lck_mtx_lock(jetsam_list_mlock); + for (i = 0; i < jetsam_priority_list_count; i++) { + temp_list[i] = jetsam_priority_list[i]; + } + lck_mtx_unlock(jetsam_list_mlock); + } + + currentsize = sizeof(jetsam_priority_list[0]) * jetsam_priority_list_count; + + ret = sysctl_io_variable(req, &temp_list[0], currentsize, sizeof(temp_list), &newsize); + + if (!ret && req->newptr) { + jetsam_priority_list_count = newsize / sizeof(jetsam_priority_list[0]); +#if DEBUG + printf("set jetsam priority pids = { "); + for (i = 0; i < jetsam_priority_list_count; i++) { + printf("%d ", temp_list[i].pid); + } + printf("}\n"); +#endif /* DEBUG */ + lck_mtx_lock(jetsam_list_mlock); + for (i = 0; i < jetsam_priority_list_count; i++) { + jetsam_priority_list[i] = temp_list[i]; + } + for (i = jetsam_priority_list_count; i < kMaxPriorityEntries; i++) { + jetsam_priority_list[i].pid = 0; + jetsam_priority_list[i].flags = 0; + } + jetsam_priority_list_index = 0; + lck_mtx_unlock(jetsam_list_mlock); + } + return ret; +} + +static int +sysctl_handle_kern_memorystatus_snapshot(__unused struct sysctl_oid *oid, __unused void *arg1, __unused int arg2, struct sysctl_req *req) +{ + int ret; + size_t currentsize = 0; + + if (jetsam_snapshot_list_count > 0) { + currentsize = sizeof(jetsam_kernel_stats_t) + sizeof(size_t) + sizeof(jetsam_snapshot_entry_t) * jetsam_snapshot_list_count; + } + if (!currentsize) { + if (req->oldptr) { +#ifdef DEBUG + printf("kern.memorystatus_snapshot returning EINVAL\n"); +#endif + return EINVAL; + } + else { +#ifdef DEBUG + printf("kern.memorystatus_snapshot returning 0 for size\n"); +#endif + } + } else { +#ifdef DEBUG + printf("kern.memorystatus_snapshot returning %ld for size\n", (long)currentsize); +#endif + } + ret = sysctl_io_variable(req, &jetsam_snapshot, currentsize, 0, NULL); + if (!ret && req->oldptr) { + jetsam_snapshot.entry_count = jetsam_snapshot_list_count = 0; + } + return ret; +} + +SYSCTL_PROC(_kern, OID_AUTO, memorystatus_priority_list, CTLTYPE_OPAQUE|CTLFLAG_RW, 0, 0, sysctl_handle_kern_memorystatus_priority_list, "S,jetsam_priorities", ""); +SYSCTL_PROC(_kern, OID_AUTO, memorystatus_snapshot, CTLTYPE_OPAQUE|CTLFLAG_RD, 0, 0, sysctl_handle_kern_memorystatus_snapshot, "S,jetsam_snapshot", ""); diff --git a/bsd/kern/kern_mib.c b/bsd/kern/kern_mib.c index 0788ed33e..14e071826 100644 --- a/bsd/kern/kern_mib.c +++ b/bsd/kern/kern_mib.c @@ -113,8 +113,8 @@ extern vm_map_t bsd_pageable_map; #include /* for host_info() */ -#ifdef __i386__ -#include /* for cpuid_info() */ +#if defined(__i386__) || defined(__x86_64__) +#include /* for cpuid_info() */ #endif @@ -339,7 +339,7 @@ SYSCTL_PROC (_hw, OID_AUTO, physicalcpu, CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_K SYSCTL_PROC (_hw, OID_AUTO, physicalcpu_max, CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_KERN, 0, HW_LOCAL_PHYSICALCPUMAX, sysctl_hw_generic, "I", ""); SYSCTL_PROC (_hw, OID_AUTO, logicalcpu, CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_KERN, 0, HW_LOCAL_LOGICALCPU, sysctl_hw_generic, "I", ""); SYSCTL_PROC (_hw, OID_AUTO, logicalcpu_max, CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_KERN, 0, HW_LOCAL_LOGICALCPUMAX, sysctl_hw_generic, "I", ""); -SYSCTL_INT (_hw, HW_BYTEORDER, byteorder, CTLFLAG_RD | CTLFLAG_KERN, NULL, BYTE_ORDER, ""); +SYSCTL_INT (_hw, HW_BYTEORDER, byteorder, CTLFLAG_RD | CTLFLAG_KERN, (int *)NULL, BYTE_ORDER, ""); SYSCTL_INT (_hw, OID_AUTO, cputype, CTLFLAG_RD | CTLFLAG_KERN, &cputype, 0, ""); SYSCTL_INT (_hw, OID_AUTO, cpusubtype, CTLFLAG_RD | CTLFLAG_KERN, &cpusubtype, 0, ""); SYSCTL_INT (_hw, OID_AUTO, cpu64bit_capable, CTLFLAG_RD | CTLFLAG_KERN, &cpu64bit, 0, ""); @@ -372,7 +372,7 @@ SYSCTL_INT (_hw, OID_AUTO, packages, CTLFLAG_RD | CTLFLAG_KERN, &packages, 0 */ SYSCTL_NODE(_hw, OID_AUTO, optional, CTLFLAG_RW|CTLFLAG_LOCKED, NULL, "optional features"); -SYSCTL_INT(_hw_optional, OID_AUTO, floatingpoint, CTLFLAG_RD | CTLFLAG_KERN, 0, 1, ""); /* always set */ +SYSCTL_INT(_hw_optional, OID_AUTO, floatingpoint, CTLFLAG_RD | CTLFLAG_KERN, (int *)NULL, 1, ""); /* always set */ /* * Deprecated variables. These are supported for backwards compatibility @@ -386,18 +386,18 @@ SYSCTL_INT(_hw_optional, OID_AUTO, floatingpoint, CTLFLAG_RD | CTLFLAG_KERN, 0, * * The *_compat nodes are *NOT* visible within the kernel. */ -SYSCTL_INT (_hw, HW_PAGESIZE, pagesize_compat, CTLFLAG_RD | CTLFLAG_MASKED, &page_size, 0, ""); -SYSCTL_INT (_hw, HW_BUS_FREQ, busfrequency_compat, CTLFLAG_RD | CTLFLAG_MASKED, &gPEClockFrequencyInfo.bus_clock_rate_hz, 0, ""); -SYSCTL_INT (_hw, HW_CPU_FREQ, cpufrequency_compat, CTLFLAG_RD | CTLFLAG_MASKED, &gPEClockFrequencyInfo.cpu_clock_rate_hz, 0, ""); +SYSCTL_COMPAT_INT (_hw, HW_PAGESIZE, pagesize_compat, CTLFLAG_RD | CTLFLAG_MASKED, &page_size, 0, ""); +SYSCTL_COMPAT_INT (_hw, HW_BUS_FREQ, busfrequency_compat, CTLFLAG_RD | CTLFLAG_MASKED, &gPEClockFrequencyInfo.bus_clock_rate_hz, 0, ""); +SYSCTL_COMPAT_INT (_hw, HW_CPU_FREQ, cpufrequency_compat, CTLFLAG_RD | CTLFLAG_MASKED, &gPEClockFrequencyInfo.cpu_clock_rate_hz, 0, ""); SYSCTL_PROC(_hw, HW_CACHELINE, cachelinesize_compat, CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MASKED, 0, HW_CACHELINE, sysctl_hw_generic, "I", ""); SYSCTL_PROC(_hw, HW_L1ICACHESIZE, l1icachesize_compat, CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MASKED, 0, HW_L1ICACHESIZE, sysctl_hw_generic, "I", ""); SYSCTL_PROC(_hw, HW_L1DCACHESIZE, l1dcachesize_compat, CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MASKED, 0, HW_L1DCACHESIZE, sysctl_hw_generic, "I", ""); SYSCTL_PROC(_hw, HW_L2CACHESIZE, l2cachesize_compat, CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MASKED, 0, HW_L2CACHESIZE, sysctl_hw_generic, "I", ""); SYSCTL_PROC(_hw, HW_L3CACHESIZE, l3cachesize_compat, CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MASKED, 0, HW_L3CACHESIZE, sysctl_hw_generic, "I", ""); -SYSCTL_INT (_hw, HW_TB_FREQ, tbfrequency_compat, CTLFLAG_RD | CTLFLAG_MASKED, &gPEClockFrequencyInfo.timebase_frequency_hz, 0, ""); +SYSCTL_COMPAT_INT (_hw, HW_TB_FREQ, tbfrequency_compat, CTLFLAG_RD | CTLFLAG_MASKED, &gPEClockFrequencyInfo.timebase_frequency_hz, 0, ""); SYSCTL_PROC(_hw, HW_MACHINE, machine, CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MASKED, 0, HW_MACHINE, sysctl_hw_generic, "A", ""); SYSCTL_PROC(_hw, HW_MODEL, model, CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MASKED, 0, HW_MODEL, sysctl_hw_generic, "A", ""); -SYSCTL_UINT(_hw, HW_PHYSMEM, physmem, CTLFLAG_RD | CTLFLAG_MASKED, &mem_size, 0, ""); +SYSCTL_COMPAT_UINT(_hw, HW_PHYSMEM, physmem, CTLFLAG_RD | CTLFLAG_MASKED, &mem_size, 0, ""); SYSCTL_PROC(_hw, HW_USERMEM, usermem, CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MASKED, 0, HW_USERMEM, sysctl_hw_generic, "I", ""); SYSCTL_PROC(_hw, HW_EPOCH, epoch, CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MASKED, 0, HW_EPOCH, sysctl_hw_generic, "I", ""); SYSCTL_PROC(_hw, HW_VECTORUNIT, vectorunit, CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MASKED, 0, HW_VECTORUNIT, sysctl_hw_generic, "I", ""); @@ -423,7 +423,7 @@ SYSCTL_INT(_hw_optional, OID_AUTO, stfiwx, CTLFLAG_RD | CTLFLAG_NOAUTO | CTLFLAG SYSCTL_INT(_hw_optional, OID_AUTO, dcba, CTLFLAG_RD | CTLFLAG_NOAUTO | CTLFLAG_KERN, &dcba_flag, 0, ""); SYSCTL_INT(_hw_optional, OID_AUTO, datastreams, CTLFLAG_RD | CTLFLAG_NOAUTO | CTLFLAG_KERN, &datastreams_flag, 0, ""); SYSCTL_INT(_hw_optional, OID_AUTO, dcbtstreams, CTLFLAG_RD | CTLFLAG_NOAUTO | CTLFLAG_KERN, &dcbtstreams_flag, 0, ""); -#elif defined (__i386__) +#elif defined (__i386__) || defined (__x86_64__) int mmx_flag = -1; int sse_flag = -1; int sse2_flag = -1; @@ -440,6 +440,8 @@ SYSCTL_INT(_hw_optional, OID_AUTO, sse3, CTLFLAG_RD | CTLFLAG_KERN, &sse3_flag, SYSCTL_INT(_hw_optional, OID_AUTO, supplementalsse3, CTLFLAG_RD | CTLFLAG_KERN, &supplementalsse3_flag, 0, ""); SYSCTL_INT(_hw_optional, OID_AUTO, sse4_1, CTLFLAG_RD | CTLFLAG_KERN, &sse4_1_flag, 0, ""); SYSCTL_INT(_hw_optional, OID_AUTO, sse4_2, CTLFLAG_RD | CTLFLAG_KERN, &sse4_2_flag, 0, ""); +/* "x86_64" is actually a preprocessor symbol on the x86_64 kernel, so we have to hack this */ +#undef x86_64 SYSCTL_INT(_hw_optional, OID_AUTO, x86_64, CTLFLAG_RD | CTLFLAG_KERN, &x86_64_flag, 0, ""); #endif /* __ppc__ */ @@ -458,7 +460,7 @@ pmsSysctl(__unused struct sysctl_oid *oidp, __unused void *arg1, return(error); intr = ml_set_interrupts_enabled(FALSE); /* No interruptions in here */ - error = pmsControl(ctl.request, (user_addr_t)(unsigned long)ctl.reqaddr, ctl.reqsize); + error = pmsControl(ctl.request, (user_addr_t)(uintptr_t)ctl.reqaddr, ctl.reqsize); (void)ml_set_interrupts_enabled(intr); /* Restore interruptions */ return(error); @@ -482,7 +484,7 @@ sysctl_mib_init(void) cputhreadtype = cpu_threadtype(); #if defined(__ppc__) cpu64bit = (_cpu_capabilities & k64Bit) == k64Bit; -#elif defined(__i386__) +#elif defined(__i386__) || defined (__x86_64__) cpu64bit = (_get_cpu_capabilities() & k64Bit) == k64Bit; #endif @@ -596,7 +598,7 @@ sysctl_mib_init(void) else packages = hinfo.max_cpus; -#elif defined (__i386__) +#elif defined (__i386__) || defined (__x86_64__) mmx_flag = ((_get_cpu_capabilities() & kHasMMX) == kHasMMX)? 1 : 0; sse_flag = ((_get_cpu_capabilities() & kHasSSE) == kHasSSE)? 1 : 0; sse2_flag = ((_get_cpu_capabilities() & kHasSSE2) == kHasSSE2)? 1 : 0; diff --git a/bsd/kern/kern_mman.c b/bsd/kern/kern_mman.c index 9fa5bd2e0..02166d578 100644 --- a/bsd/kern/kern_mman.c +++ b/bsd/kern/kern_mman.c @@ -99,7 +99,7 @@ #include #include -#include +#include #include #include @@ -118,65 +118,12 @@ #include #include #include - -struct osmmap_args { - caddr_t addr; - int len; - int prot; - int share; - int fd; - long pos; -}; +#include /* XXX the following function should probably be static */ kern_return_t map_fd_funneled(int, vm_object_offset_t, vm_offset_t *, boolean_t, vm_size_t); -/* XXX the following two functions aren't used anywhere */ -int osmmap(proc_t , struct osmmap_args *, register_t *); -int mremap(void); - -int -sbrk(__unused proc_t p, __unused struct sbrk_args *uap, __unused register_t *retval) -{ - /* Not yet implemented */ - return (ENOTSUP); -} - -int -sstk(__unused proc_t p, __unused struct sstk_args *uap, __unused register_t *retval) -{ - /* Not yet implemented */ - return (ENOTSUP); -} - - -int -osmmap( - proc_t curp, - struct osmmap_args *uap, - register_t *retval) -{ - struct mmap_args newargs; - user_addr_t addr; - int ret; - - if ((uap->share == MAP_SHARED )|| (uap->share == MAP_PRIVATE )) { - newargs.addr = CAST_USER_ADDR_T(uap->addr); - newargs.len = CAST_USER_ADDR_T(uap->len); - newargs.prot = uap->prot; - newargs.flags = uap->share; - newargs.fd = uap->fd; - newargs.pos = (off_t)uap->pos; - ret = mmap(curp, &newargs, &addr); - if (ret == 0) - *retval = CAST_DOWN(register_t, addr); - } else - ret = EINVAL; - return ret; -} - - /* * XXX Internally, we use VM_PROT_* somewhat interchangeably, but the correct * XXX usage is PROT_* from an interface perspective. Thus the values of @@ -203,7 +150,8 @@ mmap(proc_t p, struct mmap_args *uap, user_addr_t *retval) boolean_t docow; vm_prot_t maxprot; void *handle; - vm_pager_t pager; + memory_object_t pager = MEMORY_OBJECT_NULL; + memory_object_control_t control; int mapanon=0; int fpref=0; int error =0; @@ -357,7 +305,7 @@ mmap(proc_t p, struct mmap_args *uap, user_addr_t *retval) VATTR_SET_ACTIVE(&va, va_access_time); vnode_setattr(vp, &va, ctx); } - + /* * XXX hack to handle use of /dev/zero to map anon memory (ala * SunOS). @@ -393,7 +341,12 @@ mmap(proc_t p, struct mmap_args *uap, user_addr_t *retval) */ if ((flags & MAP_SHARED) != 0) { - if ((fp->f_fglob->fg_flag & FWRITE) != 0) { + if ((fp->f_fglob->fg_flag & FWRITE) != 0 && + /* + * Do not allow writable mappings of + * swap files (see vm_swapfile_pager.c). + */ + !vnode_isswap(vp)) { /* * check for write access * @@ -485,7 +438,7 @@ mmap(proc_t p, struct mmap_args *uap, user_addr_t *retval) * Lookup/allocate object. */ if (handle == NULL) { - pager = NULL; + control = NULL; #ifdef notyet /* Hmm .. */ #if defined(VM_PROT_READ_IS_EXEC) @@ -511,12 +464,22 @@ mmap(proc_t p, struct mmap_args *uap, user_addr_t *retval) (flags & MAP_SHARED) ? VM_INHERIT_SHARE : VM_INHERIT_DEFAULT); - if (result != KERN_SUCCESS) - goto out; } else { - pager = (vm_pager_t)ubc_getpager(vp); + if (vnode_isswap(vp)) { + /* + * Map swap files with a special pager + * that returns obfuscated contents. + */ + control = NULL; + pager = swapfile_pager_setup(vp); + if (pager != MEMORY_OBJECT_NULL) { + control = swapfile_pager_control(pager); + } + } else { + control = ubc_getobject(vp, UBC_FLAGS_NONE); + } - if (pager == NULL) { + if (control == NULL) { (void)vnode_put(vp); error = ENOMEM; goto bad; @@ -552,25 +515,20 @@ mmap(proc_t p, struct mmap_args *uap, user_addr_t *retval) maxprot |= VM_PROT_READ; #endif /* radar 3777787 */ - result = vm_map_enter_mem_object(user_map, + result = vm_map_enter_mem_object_control(user_map, &user_addr, user_size, 0, alloc_flags, - (ipc_port_t)pager, file_pos, + control, file_pos, docow, prot, maxprot, (flags & MAP_SHARED) ? VM_INHERIT_SHARE : VM_INHERIT_DEFAULT); - - if (result != KERN_SUCCESS) { - (void)vnode_put(vp); - goto out; - } } - if (!mapanon) + if (!mapanon) { (void)vnode_put(vp); + } -out: switch (result) { case KERN_SUCCESS: *retval = user_addr + pageoff; @@ -588,6 +546,14 @@ mmap(proc_t p, struct mmap_args *uap, user_addr_t *retval) break; } bad: + if (pager != MEMORY_OBJECT_NULL) { + /* + * Release the reference on the pager. + * If the mapping was successful, it now holds + * an extra reference. + */ + memory_object_deallocate(pager); + } if (fpref) fp_drop(p, fd, fp, 0); @@ -599,14 +565,14 @@ mmap(proc_t p, struct mmap_args *uap, user_addr_t *retval) } int -msync(__unused proc_t p, struct msync_args *uap, register_t *retval) +msync(__unused proc_t p, struct msync_args *uap, int32_t *retval) { __pthread_testcancel(1); return(msync_nocancel(p, (struct msync_nocancel_args *)uap, retval)); } int -msync_nocancel(__unused proc_t p, struct msync_nocancel_args *uap, __unused register_t *retval) +msync_nocancel(__unused proc_t p, struct msync_nocancel_args *uap, __unused int32_t *retval) { mach_vm_offset_t addr; mach_vm_size_t size; @@ -674,14 +640,7 @@ msync_nocancel(__unused proc_t p, struct msync_nocancel_args *uap, __unused regi int -mremap(void) -{ - /* Not yet implemented */ - return (ENOTSUP); -} - -int -munmap(__unused proc_t p, struct munmap_args *uap, __unused register_t *retval) +munmap(__unused proc_t p, struct munmap_args *uap, __unused int32_t *retval) { mach_vm_offset_t user_addr; mach_vm_size_t user_size; @@ -714,7 +673,7 @@ munmap(__unused proc_t p, struct munmap_args *uap, __unused register_t *retval) } int -mprotect(__unused proc_t p, struct mprotect_args *uap, __unused register_t *retval) +mprotect(__unused proc_t p, struct mprotect_args *uap, __unused int32_t *retval) { register vm_prot_t prot; mach_vm_offset_t user_addr; @@ -727,7 +686,7 @@ mprotect(__unused proc_t p, struct mprotect_args *uap, __unused register_t *retv AUDIT_ARG(addr, uap->addr); AUDIT_ARG(len, uap->len); - AUDIT_ARG(value, uap->prot); + AUDIT_ARG(value32, uap->prot); user_addr = (mach_vm_offset_t) uap->addr; user_size = (mach_vm_size_t) uap->len; @@ -785,7 +744,7 @@ mprotect(__unused proc_t p, struct mprotect_args *uap, __unused register_t *retv int -minherit(__unused proc_t p, struct minherit_args *uap, __unused register_t *retval) +minherit(__unused proc_t p, struct minherit_args *uap, __unused int32_t *retval) { mach_vm_offset_t addr; mach_vm_size_t size; @@ -795,7 +754,7 @@ minherit(__unused proc_t p, struct minherit_args *uap, __unused register_t *retv AUDIT_ARG(addr, uap->addr); AUDIT_ARG(len, uap->len); - AUDIT_ARG(value, uap->inherit); + AUDIT_ARG(value32, uap->inherit); addr = (mach_vm_offset_t)uap->addr; size = (mach_vm_size_t)uap->len; @@ -814,7 +773,7 @@ minherit(__unused proc_t p, struct minherit_args *uap, __unused register_t *retv } int -madvise(__unused proc_t p, struct madvise_args *uap, __unused register_t *retval) +madvise(__unused proc_t p, struct madvise_args *uap, __unused int32_t *retval) { vm_map_t user_map; mach_vm_offset_t start; @@ -842,6 +801,21 @@ madvise(__unused proc_t p, struct madvise_args *uap, __unused register_t *retval case MADV_DONTNEED: new_behavior = VM_BEHAVIOR_DONTNEED; break; + case MADV_FREE: + new_behavior = VM_BEHAVIOR_FREE; + break; + case MADV_ZERO_WIRED_PAGES: + new_behavior = VM_BEHAVIOR_ZERO_WIRED_PAGES; + break; + case MADV_FREE_REUSABLE: + new_behavior = VM_BEHAVIOR_REUSABLE; + break; + case MADV_FREE_REUSE: + new_behavior = VM_BEHAVIOR_REUSE; + break; + case MADV_CAN_REUSE: + new_behavior = VM_BEHAVIOR_CAN_REUSE; + break; default: return(EINVAL); } @@ -863,7 +837,7 @@ madvise(__unused proc_t p, struct madvise_args *uap, __unused register_t *retval } int -mincore(__unused proc_t p, struct mincore_args *uap, __unused register_t *retval) +mincore(__unused proc_t p, struct mincore_args *uap, __unused int32_t *retval) { mach_vm_offset_t addr, first_addr, end; vm_map_t map; @@ -963,7 +937,7 @@ mincore(__unused proc_t p, struct mincore_args *uap, __unused register_t *retval } int -mlock(__unused proc_t p, struct mlock_args *uap, __unused register_t *retvalval) +mlock(__unused proc_t p, struct mlock_args *uap, __unused int32_t *retvalval) { vm_map_t user_map; vm_map_offset_t addr; @@ -1000,7 +974,7 @@ mlock(__unused proc_t p, struct mlock_args *uap, __unused register_t *retvalval) } int -munlock(__unused proc_t p, struct munlock_args *uap, __unused register_t *retval) +munlock(__unused proc_t p, struct munlock_args *uap, __unused int32_t *retval) { mach_vm_offset_t addr; mach_vm_size_t size; @@ -1021,39 +995,17 @@ munlock(__unused proc_t p, struct munlock_args *uap, __unused register_t *retval int -mlockall(__unused proc_t p, __unused struct mlockall_args *uap, __unused register_t *retval) +mlockall(__unused proc_t p, __unused struct mlockall_args *uap, __unused int32_t *retval) { return (ENOSYS); } int -munlockall(__unused proc_t p, __unused struct munlockall_args *uap, __unused register_t *retval) +munlockall(__unused proc_t p, __unused struct munlockall_args *uap, __unused int32_t *retval) { return(ENOSYS); } - -/* BEGIN DEFUNCT */ -int -obreak(__unused proc_t p, __unused struct obreak_args *uap, __unused register_t *retval) -{ - /* Not implemented, obsolete */ - return (ENOMEM); -} - -int both; - -int -ovadvise(__unused proc_t p, __unused struct ovadvise_args *uap, __unused register_t *retval) -{ - -#ifdef lint - both = 0; -#endif - return( 0 ); -} -/* END DEFUNCT */ - /* USV: No! need to obsolete map_fd()! mmap() already supports 64 bits */ kern_return_t map_fd(struct map_fd_args *args) @@ -1066,7 +1018,7 @@ map_fd(struct map_fd_args *args) kern_return_t ret; AUDIT_MACH_SYSCALL_ENTER(AUE_MAPFD); - AUDIT_ARG(addr, CAST_DOWN(user_addr_t, va)); + AUDIT_ARG(addr, CAST_DOWN(user_addr_t, args->va)); AUDIT_ARG(fd, fd); ret = map_fd_funneled( fd, (vm_object_offset_t)offset, va, findspace, size); @@ -1178,11 +1130,12 @@ map_fd_funneled( if (!findspace) { - vm_offset_t dst_addr; + //K64todo fix for 64bit user? + uint32_t dst_addr; vm_map_copy_t tmp; if (copyin(CAST_USER_ADDR_T(va), &dst_addr, sizeof (dst_addr)) || - trunc_page_32(dst_addr) != dst_addr) { + trunc_page(dst_addr) != dst_addr) { (void) vm_map_remove( my_map, map_addr, map_addr + map_size, @@ -1213,7 +1166,9 @@ map_fd_funneled( goto bad; } } else { - if (copyout(&map_addr, CAST_USER_ADDR_T(va), sizeof (map_addr))) { + // K64todo bug compatible now, should fix for 64bit user + uint32_t user_map_addr = CAST_DOWN_EXPLICIT(uint32_t, map_addr); + if (copyout(&user_map_addr, CAST_USER_ADDR_T(va), sizeof (user_map_addr))) { (void) vm_map_remove(my_map, vm_map_trunc_page(map_addr), vm_map_round_page(map_addr + map_size), VM_MAP_NO_FLAGS); diff --git a/bsd/kern/kern_newsysctl.c b/bsd/kern/kern_newsysctl.c index 54ce269fa..de083965e 100644 --- a/bsd/kern/kern_newsysctl.c +++ b/bsd/kern/kern_newsysctl.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2008 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -24,8 +24,8 @@ * limitations under the License. * * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ - */ -/*- + * + * * Copyright (c) 1982, 1986, 1989, 1993 * The Regents of the University of California. All rights reserved. * @@ -75,7 +75,7 @@ #include #include -#include +#include /* struct sysctl_oid_list sysctl__debug_children; @@ -84,8 +84,6 @@ struct sysctl_oid_list sysctl__net_children; struct sysctl_oid_list sysctl__sysctl_children; */ -extern struct sysctl_oid *newsysctl_list[]; -extern struct sysctl_oid *machdep_sysctl_list[]; lck_rw_t * sysctl_geometry_lock = NULL; static void @@ -217,7 +215,7 @@ void sysctl_register_set(const char *set) { struct sysctl_oid **oidpp, *oidp; - LINKER_SET_FOREACH(oidpp, set) { + LINKER_SET_FOREACH(oidpp, struct sysctl_oid **, set) { oidp = *oidpp; if (!(oidp->oid_kind & CTLFLAG_NOAUTO)) { sysctl_register_oid(oidp); @@ -229,7 +227,7 @@ void sysctl_unregister_set(const char *set) { struct sysctl_oid **oidpp, *oidp; - LINKER_SET_FOREACH(oidpp, set) { + LINKER_SET_FOREACH(oidpp, struct sysctl_oid **, set) { oidp = *oidpp; if (!(oidp->oid_kind & CTLFLAG_NOAUTO)) { sysctl_unregister_oid(oidp); @@ -277,8 +275,9 @@ sysctl_io_number(struct sysctl_req *req, long long bigValue, size_t valueSize, v */ /* 32 bit value expected or 32 bit buffer offered */ - if ((valueSize == sizeof(int)) || - ((req->oldlen == sizeof(int)) && (valueSize == sizeof(long long)))) { + if (((valueSize == sizeof(int)) || + ((req->oldlen == sizeof(int)) && (valueSize == sizeof(long long)))) + && (req->oldptr)) { smallValue = (int)bigValue; if ((long long)smallValue != bigValue) return(ERANGE); @@ -1094,7 +1093,7 @@ new_sysctl(struct proc *p, struct sysctl_args *uap) return (error); error = userland_sysctl(p, name, uap->namelen, - CAST_USER_ADDR_T(uap->old), uap->oldlenp, 0, + CAST_USER_ADDR_T(uap->old), uap->oldlenp, CAST_USER_ADDR_T(uap->new), uap->newlen, &j); if (error && error != ENOMEM) return (error); @@ -1112,7 +1111,7 @@ new_sysctl(struct proc *p, struct sysctl_args *uap) */ int userland_sysctl(struct proc *p, int *name, u_int namelen, user_addr_t oldp, - size_t *oldlenp, int inkernel, user_addr_t newp, size_t newlen, + size_t *oldlenp, user_addr_t newp, size_t newlen, size_t *retval) { int error = 0; @@ -1123,13 +1122,7 @@ userland_sysctl(struct proc *p, int *name, u_int namelen, user_addr_t oldp, req.p = p; if (oldlenp) { - if (inkernel) { - req.oldlen = *oldlenp; - } else { - error = copyin(CAST_USER_ADDR_T(oldlenp), &req.oldlen, sizeof(*oldlenp)); - if (error) - return (error); - } + req.oldlen = *oldlenp; } if (oldp) { @@ -1189,6 +1182,13 @@ sysctlnametomib(const char *name, int *mibp, size_t *sizep) { int oid[2]; int error; + char *non_const_name; + + /* + * NOTE: This cast is safe because the service node does not modify + * the contents of the string as part of its operation. + */ + non_const_name = __CAST_AWAY_QUALIFIER(name, const, char *); /* magic service node */ oid[0] = 0; @@ -1196,8 +1196,9 @@ sysctlnametomib(const char *name, int *mibp, size_t *sizep) /* look up OID for name */ *sizep *= sizeof(int); - error = sysctl(oid, 2, mibp, sizep, (void *)name, strlen(name)); + error = sysctl(oid, 2, mibp, sizep, non_const_name, strlen(name)); *sizep /= sizeof(int); + return(error); } diff --git a/bsd/kern/kern_panicinfo.c b/bsd/kern/kern_panicinfo.c index dc9042401..024ec5220 100644 --- a/bsd/kern/kern_panicinfo.c +++ b/bsd/kern/kern_panicinfo.c @@ -103,7 +103,7 @@ sysctl_dopanicinfo(int *name, u_int namelen, user_addr_t oldp, size_t *oldlenp, return (ENOMEM); /* allocate some kernel wired memory for the new image */ - kret = kmem_alloc(kernel_map, &newimage, (vm_size_t)round_page_32(newlen)); + kret = kmem_alloc(kernel_map, &newimage, (vm_size_t)round_page(newlen)); if (kret != KERN_SUCCESS) { switch (kret) { @@ -164,8 +164,8 @@ sysctl_dopanicinfo(int *name, u_int namelen, user_addr_t oldp, size_t *oldlenp, /* free the wired memory used by the previous image */ if ( prev_image_ptr != NULL ) { - (void)kmem_free(kernel_map, (vm_offset_t) prev_image_ptr, (vm_size_t)round_page_32(prev_image_size)); - printf("Panic UI memory freed (%d)\n", round_page_32(prev_image_size)); + (void)kmem_free(kernel_map, (vm_offset_t) prev_image_ptr, (vm_size_t)round_page(prev_image_size)); + printf("Panic UI memory freed (%p)\n", (void *)round_page(prev_image_size)); } } @@ -173,7 +173,7 @@ sysctl_dopanicinfo(int *name, u_int namelen, user_addr_t oldp, size_t *oldlenp, errout: if ( newimage != (vm_offset_t )NULL ) - (void)kmem_free(kernel_map, newimage, (vm_size_t)round_page_32(newlen)); + (void)kmem_free(kernel_map, newimage, (vm_size_t)round_page(newlen)); return (error); } diff --git a/bsd/kern/kern_physio.c b/bsd/kern/kern_physio.c index 2255daf1d..2c4998ec3 100644 --- a/bsd/kern/kern_physio.c +++ b/bsd/kern/kern_physio.c @@ -78,9 +78,6 @@ #include #include -int rawread(dev_t dev, struct uio *uio); -int rawwrite(dev_t dev, struct uio *uio); - int physio( void (*f_strategy)(buf_t), buf_t bp, @@ -112,8 +109,12 @@ physio( void (*f_strategy)(buf_t), */ for (i = 0; i < uio->uio_iovcnt; i++) { if (UIO_SEG_IS_USER_SPACE(uio->uio_segflg)) { - if (!useracc(uio_iov_base_at(uio, i), - uio_iov_len_at(uio, i), + user_addr_t base; + user_size_t len; + + if (uio_getiov(uio, i, &base, &len) || + !useracc(base, + len, (flags == B_READ) ? B_WRITE : B_READ)) return (EFAULT); } @@ -156,10 +157,9 @@ physio( void (*f_strategy)(buf_t), * Note that I/O errors are handled with a 'goto' at the bottom * of the 'while' loop. */ - for (i = 0; i < uio->uio_iovcnt; i++) { - while (uio_iov_len_at(uio, i) > 0) { + while (uio_resid(uio) > 0) { - if ( (iosize = uio_iov_len_at(uio, i)) > MAXPHYSIO_WIRED) + if ( (iosize = uio_curriovlen(uio)) > MAXPHYSIO_WIRED) iosize = MAXPHYSIO_WIRED; /* * make sure we're set to issue a fresh I/O @@ -168,10 +168,9 @@ physio( void (*f_strategy)(buf_t), buf_reset(bp, flags); /* [set up the buffer for a maximum-sized transfer] */ - buf_setblkno(bp, uio->uio_offset / blocksize); + buf_setblkno(bp, uio_offset(uio) / blocksize); buf_setcount(bp, iosize); - // LP64todo - fix this! - buf_setdataptr(bp, (uintptr_t)CAST_DOWN(caddr_t, uio_iov_base_at(uio, i))); + buf_setdataptr(bp, (uintptr_t)CAST_DOWN(caddr_t, uio_curriovbase(uio))); /* * [call f_minphys to bound the tranfer size] @@ -214,10 +213,7 @@ physio( void (*f_strategy)(buf_t), * of data to transfer] */ done = buf_count(bp) - buf_resid(bp); - uio_iov_len_add_at(uio, -done, i); - uio_iov_base_add_at(uio, done, i); - uio->uio_offset += done; - uio_setresid(uio, (uio_resid(uio) - done)); + uio_update(uio, done); /* * Now, check for an error. @@ -225,7 +221,6 @@ physio( void (*f_strategy)(buf_t), */ if (error || done < todo) goto done; - } } done: @@ -253,23 +248,3 @@ minphys(struct buf *bp) buf_setcount(bp, min(MAXPHYS, buf_count(bp))); return buf_count(bp); } - -/* - * Do a read on a device for a user process. - */ -int -rawread(dev_t dev, struct uio *uio) -{ - return (physio(cdevsw[major(dev)].d_strategy, (struct buf *)NULL, - dev, B_READ, minphys, uio, DEV_BSIZE)); -} - -/* - * Do a write on a device for a user process. - */ -int -rawwrite(dev_t dev, struct uio *uio) -{ - return (physio(cdevsw[major(dev)].d_strategy, (struct buf *)NULL, - dev, B_WRITE, minphys, uio, DEV_BSIZE)); -} diff --git a/bsd/kern/kern_proc.c b/bsd/kern/kern_proc.c index 9dd8f6ad1..81c86f484 100644 --- a/bsd/kern/kern_proc.c +++ b/bsd/kern/kern_proc.c @@ -83,7 +83,6 @@ #include #include #include -#include #include #include #include @@ -101,6 +100,8 @@ #include #include #include +#include /* vm_map_switch_protect() */ +#include #if CONFIG_MACF #include @@ -560,13 +561,13 @@ proc_checkdeadrefs(__unused proc_t p) { #if __PROC_INTERNAL_DEBUG if ((p->p_listflag & P_LIST_INHASH) != 0) - panic("proc being freed and still in hash %x: %x\n", (unsigned int)p, (unsigned int)p->p_listflag); + panic("proc being freed and still in hash %p: %u\n", p, p->p_listflag); if (p->p_childrencnt != 0) - panic("proc being freed and pending children cnt %x:%x\n", (unsigned int)p, (unsigned int)p->p_childrencnt); + panic("proc being freed and pending children cnt %p:%d\n", p, p->p_childrencnt); if (p->p_refcount != 0) - panic("proc being freed and pending refcount %x:%x\n", (unsigned int)p, (unsigned int)p->p_refcount); + panic("proc being freed and pending refcount %p:%d\n", p, p->p_refcount); if (p->p_parentref != 0) - panic("proc being freed and pending parentrefs %x:%x\n", (unsigned int)p, (unsigned int)p->p_parentref); + panic("proc being freed and pending parentrefs %p:%d\n", p, p->p_parentref); #endif } @@ -757,6 +758,15 @@ proc_ucred(proc_t p) return(p->p_ucred); } +struct uthread * +current_uthread() +{ + thread_t th = current_thread(); + + return((struct uthread *)get_bsdthread_info(th)); +} + + int proc_is64bit(proc_t p) { @@ -781,12 +791,11 @@ bsd_set_dependency_capable(task_t task) proc_t p = get_bsdtask_info(task); if (p) { - OSBitOrAtomic(P_DEPENDENCY_CAPABLE, (UInt32 *)&p->p_flag); + OSBitOrAtomic(P_DEPENDENCY_CAPABLE, &p->p_flag); } } -/* LP64todo - figure out how to identify 64-bit processes if NULL procp */ int IS_64BIT_PROCESS(proc_t p) { @@ -803,7 +812,7 @@ proc_t pfind_locked(pid_t pid) { proc_t p; -#ifdef DEBUG +#if DEBUG proc_t q; #endif @@ -812,10 +821,10 @@ pfind_locked(pid_t pid) for (p = PIDHASH(pid)->lh_first; p != 0; p = p->p_hash.le_next) { if (p->p_pid == pid) { -#ifdef DEBUG +#if DEBUG for (q = p->p_hash.le_next; q != 0; q = q->p_hash.le_next) { if ((p !=q) && (q->p_pid == pid)) - panic("two procs with same pid %x:%x:%d:%d\n", (unsigned int)p, (unsigned int)q, p->p_pid, q->p_pid); + panic("two procs with same pid %p:%p:%d:%d\n", p, q, p->p_pid, q->p_pid); } #endif return (p); @@ -1014,14 +1023,18 @@ enterpgrp(proc_t p, pid_t pgid, int mksess) sess->s_sid = p->p_pid; sess->s_count = 1; sess->s_ttyvp = NULL; - sess->s_ttyp = NULL; + sess->s_ttyp = TTY_NULL; sess->s_flags = 0; sess->s_listflags = 0; sess->s_ttypgrpid = NO_PID; +#ifdef CONFIG_EMBEDDED lck_mtx_init(&sess->s_mlock, proc_lck_grp, proc_lck_attr); +#else + lck_mtx_init(&sess->s_mlock, proc_mlock_grp, proc_lck_attr); +#endif bcopy(procsp->s_login, sess->s_login, sizeof(sess->s_login)); - OSBitAndAtomic(~((uint32_t)P_CONTROLT), (UInt32 *)&p->p_flag); + OSBitAndAtomic(~((uint32_t)P_CONTROLT), &p->p_flag); proc_list_lock(); LIST_INSERT_HEAD(SESSHASH(sess->s_sid), sess, s_hash); proc_list_unlock(); @@ -1040,7 +1053,11 @@ enterpgrp(proc_t p, pid_t pgid, int mksess) proc_list_unlock(); } pgrp->pg_id = pgid; +#ifdef CONFIG_EMBEDDED lck_mtx_init(&pgrp->pg_mlock, proc_lck_grp, proc_lck_attr); +#else + lck_mtx_init(&pgrp->pg_mlock, proc_mlock_grp, proc_lck_attr); +#endif LIST_INIT(&pgrp->pg_members); pgrp->pg_membercnt = 0; pgrp->pg_jobc = 0; @@ -1093,8 +1110,7 @@ leavepgrp(proc_t p) static void pgdelete_dropref(struct pgrp *pgrp) { - struct tty * ttyp; - boolean_t fstate; + struct tty *ttyp; int emptypgrp = 1; struct session *sessp; @@ -1124,14 +1140,18 @@ pgdelete_dropref(struct pgrp *pgrp) proc_list_unlock(); - fstate = thread_funnel_set(kernel_flock, TRUE); - - ttyp = pgrp->pg_session->s_ttyp; - if ((ttyp != NULL) && (pgrp->pg_session->s_ttyp->t_pgrp == pgrp)) { - pgrp->pg_session->s_ttyp->t_pgrp = NULL; - pgrp->pg_session->s_ttypgrpid = NO_PID; + ttyp = SESSION_TP(pgrp->pg_session); + if (ttyp != TTY_NULL) { + if (ttyp->t_pgrp == pgrp) { + tty_lock(ttyp); + /* Re-check after acquiring the lock */ + if (ttyp->t_pgrp == pgrp) { + ttyp->t_pgrp = NULL; + pgrp->pg_session->s_ttypgrpid = NO_PID; + } + tty_unlock(ttyp); + } } - (void) thread_funnel_set(kernel_flock, fstate); proc_list_lock(); @@ -1142,23 +1162,33 @@ pgdelete_dropref(struct pgrp *pgrp) if ((sessp->s_listflags & (S_LIST_TERM | S_LIST_DEAD)) != 0) panic("pg_deleteref: terminating already terminated session"); sessp->s_listflags |= S_LIST_TERM; - ttyp = sessp->s_ttyp; + ttyp = SESSION_TP(sessp); LIST_REMOVE(sessp, s_hash); proc_list_unlock(); - fstate = thread_funnel_set(kernel_flock, TRUE); - if (ttyp != NULL && ttyp->t_session == sessp) - ttyp->t_session = NULL; - (void) thread_funnel_set(kernel_flock, fstate); + if (ttyp != TTY_NULL) { + tty_lock(ttyp); + if (ttyp->t_session == sessp) + ttyp->t_session = NULL; + tty_unlock(ttyp); + } proc_list_lock(); sessp->s_listflags |= S_LIST_DEAD; if (sessp->s_count != 0) panic("pg_deleteref: freeing session in use"); proc_list_unlock(); +#ifdef CONFIG_EMBEDDED lck_mtx_destroy(&sessp->s_mlock, proc_lck_grp); +#else + lck_mtx_destroy(&sessp->s_mlock, proc_mlock_grp); +#endif FREE_ZONE(sessp, sizeof(struct session), M_SESSION); } else proc_list_unlock(); +#ifdef CONFIG_EMBEDDED lck_mtx_destroy(&pgrp->pg_mlock, proc_lck_grp); +#else + lck_mtx_destroy(&pgrp->pg_mlock, proc_mlock_grp); +#endif FREE_ZONE(pgrp, sizeof(*pgrp), M_PGRP); } @@ -1409,8 +1439,8 @@ proc_core_name(const char *name, uid_t uid, pid_t pid, char *cf_name, goto toolong; return (0); toolong: - log(LOG_ERR, "pid %ld (%s), uid (%lu): corename is too long\n", - (long)pid, name, (u_long)uid); + log(LOG_ERR, "pid %ld (%s), uid (%u): corename is too long\n", + (long)pid, name, (uint32_t)uid); return (1); } @@ -1632,7 +1662,7 @@ SYSCTL_INT(_kern_lctx, OID_AUTO, max, CTLFLAG_RW, &maxlcid, 0, ""); /* Code Signing related routines */ int -csops(__unused proc_t p, struct csops_args *uap, __unused register_t *retval) +csops(__unused proc_t p, struct csops_args *uap, __unused int32_t *retval) { int ops = uap->ops; pid_t pid = uap->pid; @@ -1662,7 +1692,7 @@ csops(__unused proc_t p, struct csops_args *uap, __unused register_t *retval) return(EOVERFLOW); if (kauth_cred_issuser(kauth_cred_get()) != TRUE) return(EPERM); - } else if ((forself == 0) && ((ops != CS_OPS_STATUS) && (ops != CS_OPS_CDHASH) && (kauth_cred_issuser(kauth_cred_get()) != TRUE))) { + } else if ((forself == 0) && ((ops != CS_OPS_STATUS) && (ops != CS_OPS_CDHASH) && (ops != CS_OPS_PIDOFFSET) && (kauth_cred_issuser(kauth_cred_get()) != TRUE))) { return(EPERM); } @@ -1720,11 +1750,16 @@ csops(__unused proc_t p, struct csops_args *uap, __unused register_t *retval) tvp = pt->p_textvp; vid = vnode_vid(tvp); - proc_rele(pt); + if (tvp == NULLVP) { + proc_rele(pt); + return(EINVAL); + } buf = (char *)kalloc(usize); - if (buf == NULL) + if (buf == NULL) { + proc_rele(pt); return(ENOMEM); + } bzero(buf, usize); error = vnode_getwithvid(tvp, vid); @@ -1738,18 +1773,28 @@ csops(__unused proc_t p, struct csops_args *uap, __unused register_t *retval) } kfree(buf, usize); } + + proc_rele(pt); + + return(error); + + case CS_OPS_PIDOFFSET: + toff = pt->p_textoff; + proc_rele(pt); + error = copyout(&toff, uaddr, sizeof(toff)); return(error); case CS_OPS_CDHASH: - if (usize != SHA1_RESULTLEN) { - proc_rele(pt); - return EINVAL; - } /* pt already holds a reference on its p_textvp */ tvp = pt->p_textvp; toff = pt->p_textoff; + if (tvp == NULLVP || usize != SHA1_RESULTLEN) { + proc_rele(pt); + return EINVAL; + } + error = vn_getcdhash(tvp, toff, cdhash); proc_rele(pt); @@ -1921,9 +1966,12 @@ proc_rebootscan(callout, arg, filterfn, filterarg) proc_t p; int lockheld = 0, retval; + proc_shutdown_exitcount = 0; + ps_allprocscan: proc_list_lock(); + lockheld = 1; for (p = allproc.lh_first; (p != 0); p = p->p_list.le_next) { @@ -2189,7 +2237,7 @@ pgrp_remove(struct proc * p) pg->pg_membercnt--; if (pg->pg_membercnt < 0) - panic("pgprp: -ve membercnt pgprp:%x p:%x\n",(unsigned int)pg, (unsigned int)p); + panic("pgprp: -ve membercnt pgprp:%p p:%p\n",pg, p); LIST_REMOVE(p, p_pglist); if (pg->pg_members.lh_first == 0) { @@ -2236,7 +2284,7 @@ pgrp_replace(struct proc * p, struct pgrp * newpg) pgrp_lock(oldpg); oldpg->pg_membercnt--; if (oldpg->pg_membercnt < 0) - panic("pgprp: -ve membercnt pgprp:%x p:%x\n",(unsigned int)oldpg, (unsigned int)p); + panic("pgprp: -ve membercnt pgprp:%p p:%p\n",oldpg, p); LIST_REMOVE(p, p_pglist); if (oldpg->pg_members.lh_first == 0) { pgrp_unlock(oldpg); @@ -2326,11 +2374,12 @@ proc_pgrp(proc_t p) assert(pgrp != NULL); - if ((pgrp->pg_listflags & (PGRP_FLAG_TERMINATE | PGRP_FLAG_DEAD)) != 0) - panic("proc_pgrp: ref being povided for dead pgrp"); - - if (pgrp != PGRP_NULL) + if (pgrp != PGRP_NULL) { pgrp->pg_refcount++; + if ((pgrp->pg_listflags & (PGRP_FLAG_TERMINATE | PGRP_FLAG_DEAD)) != 0) + panic("proc_pgrp: ref being povided for dead pgrp"); + } + proc_list_unlock(); return(pgrp); @@ -2392,18 +2441,27 @@ session_rele(struct session *sess) if (sess->s_count != 0) panic("session_rele: freeing session in use"); proc_list_unlock(); +#ifdef CONFIG_EMBEDDED lck_mtx_destroy(&sess->s_mlock, proc_lck_grp); +#else + lck_mtx_destroy(&sess->s_mlock, proc_mlock_grp); +#endif FREE_ZONE(sess, sizeof(struct session), M_SESSION); } else proc_list_unlock(); } -void +int proc_transstart(proc_t p, int locked) { if (locked == 0) proc_lock(p); while ((p->p_lflag & P_LINTRANSIT) == P_LINTRANSIT) { + if ((p->p_lflag & P_LTRANSCOMMIT) == P_LTRANSCOMMIT) { + if (locked == 0) + proc_unlock(p); + return EDEADLK; + } p->p_lflag |= P_LTRANSWAIT; msleep(&p->p_lflag, &p->p_mlock, 0, "proc_signstart", NULL); } @@ -2411,37 +2469,61 @@ proc_transstart(proc_t p, int locked) p->p_transholder = current_thread(); if (locked == 0) proc_unlock(p); - + return 0; } +void +proc_transcommit(proc_t p, int locked) +{ + if (locked == 0) + proc_lock(p); + + assert ((p->p_lflag & P_LINTRANSIT) == P_LINTRANSIT); + assert (p->p_transholder == current_thread()); + p->p_lflag |= P_LTRANSCOMMIT; + + if ((p->p_lflag & P_LTRANSWAIT) == P_LTRANSWAIT) { + p->p_lflag &= ~P_LTRANSWAIT; + wakeup(&p->p_lflag); + } + if (locked == 0) + proc_unlock(p); +} void proc_transend(proc_t p, int locked) { if (locked == 0) proc_lock(p); - p->p_lflag &= ~P_LINTRANSIT; + + p->p_lflag &= ~( P_LINTRANSIT | P_LTRANSCOMMIT); + p->p_transholder = NULL; if ((p->p_lflag & P_LTRANSWAIT) == P_LTRANSWAIT) { p->p_lflag &= ~P_LTRANSWAIT; wakeup(&p->p_lflag); } - p->p_transholder = NULL; if (locked == 0) proc_unlock(p); } -void +int proc_transwait(proc_t p, int locked) { if (locked == 0) proc_lock(p); while ((p->p_lflag & P_LINTRANSIT) == P_LINTRANSIT) { + if ((p->p_lflag & P_LTRANSCOMMIT) == P_LTRANSCOMMIT && current_proc() == p) { + if (locked == 0) + proc_unlock(p); + return EDEADLK; + } p->p_lflag |= P_LTRANSWAIT; msleep(&p->p_lflag, &p->p_mlock, 0, "proc_signstart", NULL); } if (locked == 0) proc_unlock(p); + return 0; } void @@ -2464,6 +2546,21 @@ proc_knote(struct proc * p, long hint) proc_klist_unlock(); } +void +proc_knote_drain(struct proc *p) +{ + struct knote *kn = NULL; + + /* + * Clear the proc's klist to avoid references after the proc is reaped. + */ + proc_klist_lock(); + while ((kn = SLIST_FIRST(&p->p_klist))) { + kn->kn_ptr.p_proc = PROC_NULL; + KNOTE_DETACH(&p->p_klist, kn); + } + proc_klist_unlock(); +} unsigned long cs_procs_killed = 0; unsigned long cs_procs_invalidated = 0; @@ -2474,6 +2571,35 @@ SYSCTL_INT(_vm, OID_AUTO, cs_force_kill, CTLFLAG_RW, &cs_force_kill, 0, ""); SYSCTL_INT(_vm, OID_AUTO, cs_force_hard, CTLFLAG_RW, &cs_force_hard, 0, ""); SYSCTL_INT(_vm, OID_AUTO, cs_debug, CTLFLAG_RW, &cs_debug, 0, ""); +int +cs_allow_invalid(struct proc *p) +{ +#if MACH_ASSERT + lck_mtx_assert(&p->p_mlock, LCK_MTX_ASSERT_NOTOWNED); +#endif +#if CONFIG_MACF && CONFIG_ENFORCE_SIGNED_CODE + /* There needs to be a MAC policy to implement this hook, or else the + * kill bits will be cleared here every time. If we have + * CONFIG_ENFORCE_SIGNED_CODE, we can assume there is a policy + * implementing the hook. + */ + if( 0 != mac_proc_check_run_cs_invalid(p)) { + if(cs_debug) printf("CODE SIGNING: cs_allow_invalid() " + "not allowed: pid %d\n", + p->p_pid); + return 0; + } + if(cs_debug) printf("CODE SIGNING: cs_allow_invalid() " + "allowed: pid %d\n", + p->p_pid); + proc_lock(p); + p->p_csflags &= ~(CS_KILL | CS_HARD | CS_VALID); + proc_unlock(p); + vm_map_switch_protect(get_task_map(p->task), FALSE); +#endif + return (p->p_csflags & (CS_KILL | CS_HARD)) == 0; +} + int cs_invalid_page( addr64_t vaddr) @@ -2536,3 +2662,244 @@ cs_invalid_page( return retval; } +void +proc_setregister(proc_t p) +{ + proc_lock(p); + p->p_lflag |= P_LREGISTER; + proc_unlock(p); +} + +void +proc_resetregister(proc_t p) +{ + proc_lock(p); + p->p_lflag &= ~P_LREGISTER; + proc_unlock(p); +} + +pid_t +proc_pgrpid(proc_t p) +{ + return p->p_pgrpid; +} + +pid_t +proc_selfpgrpid() +{ + return current_proc()->p_pgrpid; +} + + +/* return control and action states */ +int +proc_getpcontrol(int pid, int * pcontrolp) +{ + proc_t p; + + p = proc_find(pid); + if (p == PROC_NULL) + return(ESRCH); + if (pcontrolp != NULL) + *pcontrolp = p->p_pcaction; + + proc_rele(p); + return(0); +} + +int +proc_dopcontrol(proc_t p, void *num_found) +{ + int pcontrol; + + proc_lock(p); + + pcontrol = PROC_CONTROL_STATE(p); + + if (PROC_ACTION_STATE(p) ==0) { + switch(pcontrol) { + case P_PCTHROTTLE: + PROC_SETACTION_STATE(p); + proc_unlock(p); + printf("low swap: throttling pid %d (%s)\n", p->p_pid, p->p_comm); + (*(int *)num_found)++; + break; + + case P_PCSUSP: + PROC_SETACTION_STATE(p); + proc_unlock(p); + printf("low swap: suspending pid %d (%s)\n", p->p_pid, p->p_comm); + task_suspend(p->task); + (*(int *)num_found)++; + break; + + case P_PCKILL: + PROC_SETACTION_STATE(p); + proc_unlock(p); + printf("low swap: killing pid %d (%s)\n", p->p_pid, p->p_comm); + psignal(p, SIGKILL); + (*(int *)num_found)++; + break; + + default: + proc_unlock(p); + } + + } else + proc_unlock(p); + + return(PROC_RETURNED); +} + + +/* + * Resume a throttled or suspended process. This is an internal interface that's only + * used by the user level code that presents the GUI when we run out of swap space and + * hence is restricted to processes with superuser privileges. + */ + +int +proc_resetpcontrol(int pid) +{ + proc_t p; + int pcontrol; + int error; + + if ((error = suser(kauth_cred_get(), 0))) + return error; + p = proc_find(pid); + if (p == PROC_NULL) + return(ESRCH); + + proc_lock(p); + + pcontrol = PROC_CONTROL_STATE(p); + + if(PROC_ACTION_STATE(p) !=0) { + switch(pcontrol) { + case P_PCTHROTTLE: + PROC_RESETACTION_STATE(p); + proc_unlock(p); + printf("low swap: unthrottling pid %d (%s)\n", p->p_pid, p->p_comm); + break; + + case P_PCSUSP: + PROC_RESETACTION_STATE(p); + proc_unlock(p); + printf("low swap: resuming pid %d (%s)\n", p->p_pid, p->p_comm); + task_resume(p->task); + break; + + case P_PCKILL: + /* Huh? */ + PROC_SETACTION_STATE(p); + proc_unlock(p); + printf("low swap: attempt to unkill pid %d (%s) ignored\n", p->p_pid, p->p_comm); + break; + + default: + proc_unlock(p); + } + + } else + proc_unlock(p); + + proc_rele(p); + return(0); +} + + +/* + * Return true if the specified process has an action state specified for it and it isn't + * already in an action state and it's using more physical memory than the specified threshold. + * Note: the memory_threshold argument is specified in bytes and is of type uint64_t. + */ + +static int +proc_pcontrol_filter(proc_t p, void *memory_thresholdp) +{ + + return PROC_CONTROL_STATE(p) && /* if there's an action state specified... */ + (PROC_ACTION_STATE(p) == 0) && /* and we're not in the action state yet... */ + (get_task_resident_size(p->task) > *((uint64_t *)memory_thresholdp)); /* and this proc is over the mem threshold, */ + /* then return true to take action on this proc */ +} + + + +/* + * Deal with the out of swap space condition. This routine gets called when + * we want to swap something out but there's no more space left. Since this + * creates a memory deadlock situtation, we need to take action to free up + * some memory resources in order to prevent the system from hanging completely. + * The action we take is based on what the system processes running at user level + * have specified. Processes are marked in one of four categories: ones that + * can be killed immediately, ones that should be suspended, ones that should + * be throttled, and all the rest which are basically none of the above. Which + * processes are marked as being in which category is a user level policy decision; + * we just take action based on those decisions here. + */ + +#define STARTING_PERCENTAGE 50 /* memory threshold expressed as a percentage */ + /* of physical memory */ + +struct timeval last_no_space_action = {0, 0}; + +void +no_paging_space_action(void) +{ + + uint64_t memory_threshold; + int num_found; + struct timeval now; + + /* + * Throttle how often we come through here. Once every 20 seconds should be plenty. + */ + + microtime(&now); + + if (now.tv_sec <= last_no_space_action.tv_sec + 20) + return; + + last_no_space_action = now; + + /* + * Examine all processes and find those that have been marked to have some action + * taken when swap space runs out. Of those processes, select one or more and + * apply the specified action to them. The idea is to only take action against + * a few processes rather than hitting too many at once. If the low swap condition + * persists, this routine will get called again and we'll take action against more + * processes. + * + * Of the processes that have been marked, we choose which ones to take action + * against according to how much physical memory they're presently using. We + * start with the STARTING_THRESHOLD and any processes using more physical memory + * than the percentage threshold will have action taken against it. If there + * are no processes over the threshold, then the threshold is cut in half and we + * look again for processes using more than this threshold. We continue in + * this fashion until we find at least one process to take action against. This + * iterative approach is less than ideally efficient, however we only get here + * when the system is almost in a memory deadlock and is pretty much just + * thrashing if it's doing anything at all. Therefore, the cpu overhead of + * potentially multiple passes here probably isn't revelant. + */ + + memory_threshold = (sane_size * STARTING_PERCENTAGE) / 100; /* resident threshold in bytes */ + + for (num_found = 0; num_found == 0; memory_threshold = memory_threshold / 2) { + proc_iterate(PROC_ALLPROCLIST, proc_dopcontrol, (void *)&num_found, proc_pcontrol_filter, (void *)&memory_threshold); + + /* + * If we just looked with memory_threshold == 0, then there's no need to iterate any further since + * we won't find any eligible processes at this point. + */ + + if (memory_threshold == 0) { + if (num_found == 0) /* log that we couldn't do anything in this case */ + printf("low swap: unable to find any eligible processes to take action on\n"); + + break; + } + } +} diff --git a/bsd/kern/kern_prot.c b/bsd/kern/kern_prot.c index 46ab8bf1b..a084ddf89 100644 --- a/bsd/kern/kern_prot.c +++ b/bsd/kern/kern_prot.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2007 Apple Inc. All rights reserved. + * Copyright (c) 2000-2008 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -24,9 +24,11 @@ * limitations under the License. * * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ - */ -/* Copyright (c) 1995 NeXT Computer, Inc. All Rights Reserved */ -/* + * + * + * Copyright (c) 1995 NeXT Computer, Inc. All Rights Reserved + * + * * Copyright (c) 1982, 1986, 1989, 1990, 1991, 1993 * The Regents of the University of California. All rights reserved. * (c) UNIX System Laboratories, Inc. @@ -64,18 +66,19 @@ * SUCH DAMAGE. * * @(#)kern_prot.c 8.9 (Berkeley) 2/14/95 - */ -/* + * + * * NOTICE: This file was modified by McAfee Research in 2004 to introduce * support for mandatory and extensible security protections. This notice * is included in support of clause 2.2 (b) of the Apple Public License, * Version 2.0. - */ -/* + * + * * NOTICE: This file was modified by SPARTA, Inc. in 2005 to introduce * support for mandatory and extensible security protections. This notice * is included in support of clause 2.2 (b) of the Apple Public License, * Version 2.0. + * */ /* @@ -93,7 +96,7 @@ #include #include -#include +#include #if CONFIG_LCTX #include @@ -116,8 +119,6 @@ #include -int groupmember(gid_t gid, kauth_cred_t cred); - /* * Credential debugging; we can track entry into a function that might * change a credential, and we can track actual credential changes that @@ -152,9 +153,9 @@ extern void kauth_cred_print(kauth_cred_t cred); * XXX: Belongs in kern_proc.c */ int -setprivexec(proc_t p, struct setprivexec_args *uap, register_t *retval) +setprivexec(proc_t p, struct setprivexec_args *uap, int32_t *retval) { - AUDIT_ARG(value, uap->flag); + AUDIT_ARG(value32, uap->flag); *retval = p->p_debugger; p->p_debugger = (uap->flag != 0); return(0); @@ -173,7 +174,7 @@ setprivexec(proc_t p, struct setprivexec_args *uap, register_t *retval) * XXX: Belongs in kern_proc.c */ int -getpid(proc_t p, __unused struct getpid_args *uap, register_t *retval) +getpid(proc_t p, __unused struct getpid_args *uap, int32_t *retval) { *retval = p->p_pid; @@ -193,7 +194,7 @@ getpid(proc_t p, __unused struct getpid_args *uap, register_t *retval) * XXX: Belongs in kern_proc.c */ int -getppid(proc_t p, __unused struct getppid_args *uap, register_t *retval) +getppid(proc_t p, __unused struct getppid_args *uap, int32_t *retval) { *retval = p->p_ppid; @@ -213,7 +214,7 @@ getppid(proc_t p, __unused struct getppid_args *uap, register_t *retval) * XXX: Belongs in kern_proc.c */ int -getpgrp(proc_t p, __unused struct getpgrp_args *uap, register_t *retval) +getpgrp(proc_t p, __unused struct getpgrp_args *uap, int32_t *retval) { *retval = p->p_pgrpid; @@ -238,7 +239,7 @@ getpgrp(proc_t p, __unused struct getpgrp_args *uap, register_t *retval) * XXX: Belongs in kern_proc.c */ int -getpgid(proc_t p, struct getpgid_args *uap, register_t *retval) +getpgid(proc_t p, struct getpgid_args *uap, int32_t *retval) { proc_t pt; int refheld = 0; @@ -275,7 +276,7 @@ getpgid(proc_t p, struct getpgid_args *uap, register_t *retval) * XXX: Belongs in kern_proc.c */ int -getsid(proc_t p, struct getsid_args *uap, register_t *retval) +getsid(proc_t p, struct getsid_args *uap, int32_t *retval) { proc_t pt; int refheld = 0; @@ -309,7 +310,7 @@ getsid(proc_t p, struct getsid_args *uap, register_t *retval) * Returns: uid_t The real uid of the caller */ int -getuid(__unused proc_t p, __unused struct getuid_args *uap, register_t *retval) +getuid(__unused proc_t p, __unused struct getuid_args *uap, int32_t *retval) { *retval = kauth_getruid(); @@ -327,7 +328,7 @@ getuid(__unused proc_t p, __unused struct getuid_args *uap, register_t *retval) * Returns: uid_t The effective uid of the caller */ int -geteuid(__unused proc_t p, __unused struct geteuid_args *uap, register_t *retval) +geteuid(__unused proc_t p, __unused struct geteuid_args *uap, int32_t *retval) { *retval = kauth_getuid(); @@ -347,7 +348,7 @@ geteuid(__unused proc_t p, __unused struct geteuid_args *uap, register_t *retval * ESRCH No per thread identity active */ int -gettid(__unused proc_t p, struct gettid_args *uap, register_t *retval) +gettid(__unused proc_t p, struct gettid_args *uap, int32_t *retval) { struct uthread *uthread = get_bsdthread_info(current_thread()); int error; @@ -379,7 +380,7 @@ gettid(__unused proc_t p, struct gettid_args *uap, register_t *retval) * Returns: gid_t The real gid of the caller */ int -getgid(__unused proc_t p, __unused struct getgid_args *uap, register_t *retval) +getgid(__unused proc_t p, __unused struct getgid_args *uap, int32_t *retval) { *retval = kauth_getrgid(); @@ -403,7 +404,7 @@ getgid(__unused proc_t p, __unused struct getgid_args *uap, register_t *retval) * detail. */ int -getegid(__unused proc_t p, __unused struct getegid_args *uap, register_t *retval) +getegid(__unused proc_t p, __unused struct getegid_args *uap, int32_t *retval) { *retval = kauth_getgid(); @@ -442,7 +443,7 @@ getegid(__unused proc_t p, __unused struct getegid_args *uap, register_t *retval * be returned by this call. */ int -getgroups(__unused proc_t p, struct getgroups_args *uap, register_t *retval) +getgroups(__unused proc_t p, struct getgroups_args *uap, int32_t *retval) { int ngrp; int error; @@ -475,23 +476,27 @@ getgroups(__unused proc_t p, struct getgroups_args *uap, register_t *retval) /* * Return the per-thread/per-process supplementary groups list. + * + * XXX implement getsgroups + * */ -#warning XXX implement getsgroups + int -getsgroups(__unused proc_t p, __unused struct getsgroups_args *uap, __unused register_t *retval) +getsgroups(__unused proc_t p, __unused struct getsgroups_args *uap, __unused int32_t *retval) { - /* XXX implement */ return(ENOTSUP); } /* * Return the per-thread/per-process whiteout groups list. + * + * XXX implement getwgroups + * */ -#warning XXX implement getwgroups + int -getwgroups(__unused proc_t p, __unused struct getwgroups_args *uap, __unused register_t *retval) +getwgroups(__unused proc_t p, __unused struct getwgroups_args *uap, __unused int32_t *retval) { - /* XXX implement */ return(ENOTSUP); } @@ -521,7 +526,7 @@ getwgroups(__unused proc_t p, __unused struct getwgroups_args *uap, __unused reg * XXX: Belongs in kern_proc.c */ int -setsid(proc_t p, __unused struct setsid_args *uap, register_t *retval) +setsid(proc_t p, __unused struct setsid_args *uap, int32_t *retval) { struct pgrp * pg = PGRP_NULL; @@ -576,7 +581,7 @@ setsid(proc_t p, __unused struct setsid_args *uap, register_t *retval) * XXX: Belongs in kern_proc.c */ int -setpgid(proc_t curp, register struct setpgid_args *uap, __unused register_t *retval) +setpgid(proc_t curp, register struct setpgid_args *uap, __unused int32_t *retval) { proc_t targp = PROC_NULL; /* target process */ struct pgrp *pg = PGRP_NULL; /* target pgrp */ @@ -666,7 +671,7 @@ setpgid(proc_t curp, register struct setpgid_args *uap, __unused register_t *ret * execution. */ int -issetugid(proc_t p, __unused struct issetugid_args *uap, register_t *retval) +issetugid(proc_t p, __unused struct issetugid_args *uap, int32_t *retval) { /* * Note: OpenBSD sets a P_SUGIDEXEC flag set at execve() time, @@ -703,7 +708,7 @@ issetugid(proc_t p, __unused struct issetugid_args *uap, register_t *retval) * flag the process as having set privilege since the last exec. */ int -setuid(proc_t p, struct setuid_args *uap, __unused register_t *retval) +setuid(proc_t p, struct setuid_args *uap, __unused int32_t *retval) { uid_t uid; uid_t svuid = KAUTH_UID_NONE; @@ -718,7 +723,7 @@ setuid(proc_t p, struct setuid_args *uap, __unused register_t *retval) my_cred = kauth_cred_proc_ref(p); DEBUG_CRED_ENTER("setuid (%d/%d): %p %d\n", p->p_pid, (p->p_pptr ? p->p_pptr->p_pid : 0), my_cred, uap->uid); - AUDIT_ARG(uid, uid, 0, 0, 0); + AUDIT_ARG(uid, uid); if (uid != my_cred->cr_ruid && /* allow setuid(getuid()) */ uid != my_cred->cr_svuid && /* allow setuid(saved uid) */ @@ -742,7 +747,7 @@ setuid(proc_t p, struct setuid_args *uap, __unused register_t *retval) * chgproccnt uses list lock for protection */ (void)chgproccnt(uid, 1); - (void)chgproccnt(kauth_getruid(), -1); + (void)chgproccnt(my_cred->cr_ruid, -1); } /* get current credential and take a reference while we muck with it */ @@ -786,7 +791,7 @@ setuid(proc_t p, struct setuid_args *uap, __unused register_t *retval) continue; } p->p_ucred = my_new_cred; - OSBitOrAtomic(P_SUGID, (UInt32 *)&p->p_flag); + OSBitOrAtomic(P_SUGID, &p->p_flag); proc_unlock(p); } break; @@ -818,7 +823,7 @@ setuid(proc_t p, struct setuid_args *uap, __unused register_t *retval) * flag the process as having set privilege since the last exec. */ int -seteuid(proc_t p, struct seteuid_args *uap, __unused register_t *retval) +seteuid(proc_t p, struct seteuid_args *uap, __unused int32_t *retval) { uid_t euid; int error; @@ -827,7 +832,7 @@ seteuid(proc_t p, struct seteuid_args *uap, __unused register_t *retval) DEBUG_CRED_ENTER("seteuid: %d\n", uap->euid); euid = uap->euid; - AUDIT_ARG(uid, 0, euid, 0, 0); + AUDIT_ARG(euid, euid); my_cred = kauth_cred_proc_ref(p); @@ -871,7 +876,7 @@ seteuid(proc_t p, struct seteuid_args *uap, __unused register_t *retval) continue; } p->p_ucred = my_new_cred; - OSBitOrAtomic(P_SUGID, (UInt32 *)&p->p_flag); + OSBitOrAtomic(P_SUGID, &p->p_flag); proc_unlock(p); } break; @@ -916,7 +921,7 @@ seteuid(proc_t p, struct seteuid_args *uap, __unused register_t *retval) * flag the process as having set privilege since the last exec. */ int -setreuid(proc_t p, struct setreuid_args *uap, __unused register_t *retval) +setreuid(proc_t p, struct setreuid_args *uap, __unused int32_t *retval) { uid_t ruid, euid; int error; @@ -930,7 +935,8 @@ setreuid(proc_t p, struct setreuid_args *uap, __unused register_t *retval) ruid = KAUTH_UID_NONE; if (euid == (uid_t)-1) euid = KAUTH_UID_NONE; - AUDIT_ARG(uid, euid, ruid, 0, 0); + AUDIT_ARG(euid, euid); + AUDIT_ARG(ruid, ruid); my_cred = kauth_cred_proc_ref(p); @@ -970,7 +976,7 @@ setreuid(proc_t p, struct setreuid_args *uap, __unused register_t *retval) if (euid == KAUTH_UID_NONE && my_cred->cr_uid != euid) { /* changing the effective UID */ new_euid = euid; - OSBitOrAtomic(P_SUGID, (UInt32 *)&p->p_flag); + OSBitOrAtomic(P_SUGID, &p->p_flag); } if (ruid != KAUTH_UID_NONE && my_cred->cr_ruid != ruid) { /* changing the real UID; must do user accounting */ @@ -978,7 +984,7 @@ setreuid(proc_t p, struct setreuid_args *uap, __unused register_t *retval) (void)chgproccnt(ruid, 1); (void)chgproccnt(my_cred->cr_ruid, -1); new_ruid = ruid; - OSBitOrAtomic(P_SUGID, (UInt32 *)&p->p_flag); + OSBitOrAtomic(P_SUGID, &p->p_flag); } /* * If the newly requested real uid or effective uid does @@ -989,7 +995,7 @@ setreuid(proc_t p, struct setreuid_args *uap, __unused register_t *retval) if (my_cred->cr_svuid != uap->ruid && my_cred->cr_svuid != uap->euid) { svuid = new_euid; - OSBitOrAtomic(P_SUGID, (UInt32 *)&p->p_flag); + OSBitOrAtomic(P_SUGID, &p->p_flag); } my_new_cred = kauth_cred_setresuid(my_cred, ruid, euid, svuid, my_cred->cr_gmuid); @@ -1013,7 +1019,7 @@ setreuid(proc_t p, struct setreuid_args *uap, __unused register_t *retval) continue; } p->p_ucred = my_new_cred; - OSBitOrAtomic(P_SUGID, (UInt32 *)&p->p_flag); /* XXX redundant? */ + OSBitOrAtomic(P_SUGID, &p->p_flag); /* XXX redundant? */ proc_unlock(p); } break; @@ -1052,7 +1058,7 @@ setreuid(proc_t p, struct setreuid_args *uap, __unused register_t *retval) * the supplementary group list unchanged. */ int -setgid(proc_t p, struct setgid_args *uap, __unused register_t *retval) +setgid(proc_t p, struct setgid_args *uap, __unused int32_t *retval) { gid_t gid; gid_t rgid = KAUTH_GID_NONE; @@ -1063,7 +1069,7 @@ setgid(proc_t p, struct setgid_args *uap, __unused register_t *retval) DEBUG_CRED_ENTER("setgid(%d/%d): %d\n", p->p_pid, (p->p_pptr ? p->p_pptr->p_pid : 0), uap->gid); gid = uap->gid; - AUDIT_ARG(gid, gid, 0, 0, 0); + AUDIT_ARG(gid, gid); my_cred = kauth_cred_proc_ref(p); @@ -1113,7 +1119,7 @@ setgid(proc_t p, struct setgid_args *uap, __unused register_t *retval) continue; } p->p_ucred = my_new_cred; - OSBitOrAtomic(P_SUGID, (UInt32 *)&p->p_flag); + OSBitOrAtomic(P_SUGID, &p->p_flag); proc_unlock(p); } break; @@ -1150,7 +1156,7 @@ setgid(proc_t p, struct setgid_args *uap, __unused register_t *retval) * the supplementary group list unchanged. */ int -setegid(proc_t p, struct setegid_args *uap, __unused register_t *retval) +setegid(proc_t p, struct setegid_args *uap, __unused int32_t *retval) { gid_t egid; int error; @@ -1159,7 +1165,7 @@ setegid(proc_t p, struct setegid_args *uap, __unused register_t *retval) DEBUG_CRED_ENTER("setegid %d\n", uap->egid); egid = uap->egid; - AUDIT_ARG(gid, 0, egid, 0, 0); + AUDIT_ARG(egid, egid); my_cred = kauth_cred_proc_ref(p); @@ -1199,7 +1205,7 @@ setegid(proc_t p, struct setegid_args *uap, __unused register_t *retval) continue; } p->p_ucred = my_new_cred; - OSBitOrAtomic(P_SUGID, (UInt32 *)&p->p_flag); + OSBitOrAtomic(P_SUGID, &p->p_flag); proc_unlock(p); } break; @@ -1250,7 +1256,7 @@ setegid(proc_t p, struct setegid_args *uap, __unused register_t *retval) * the supplementary group list unchanged. */ int -setregid(proc_t p, struct setregid_args *uap, __unused register_t *retval) +setregid(proc_t p, struct setregid_args *uap, __unused int32_t *retval) { gid_t rgid, egid; int error; @@ -1265,7 +1271,8 @@ setregid(proc_t p, struct setregid_args *uap, __unused register_t *retval) rgid = KAUTH_GID_NONE; if (egid == (uid_t)-1) egid = KAUTH_GID_NONE; - AUDIT_ARG(gid, egid, rgid, 0, 0); + AUDIT_ARG(egid, egid); + AUDIT_ARG(rgid, rgid); my_cred = kauth_cred_proc_ref(p); @@ -1300,12 +1307,12 @@ setregid(proc_t p, struct setregid_args *uap, __unused register_t *retval) if (egid == KAUTH_UID_NONE && my_cred->cr_groups[0] != egid) { /* changing the effective GID */ new_egid = egid; - OSBitOrAtomic(P_SUGID, (UInt32 *)&p->p_flag); + OSBitOrAtomic(P_SUGID, &p->p_flag); } if (rgid != KAUTH_UID_NONE && my_cred->cr_rgid != rgid) { /* changing the real GID */ new_rgid = rgid; - OSBitOrAtomic(P_SUGID, (UInt32 *)&p->p_flag); + OSBitOrAtomic(P_SUGID, &p->p_flag); } /* * If the newly requested real gid or effective gid does @@ -1316,7 +1323,7 @@ setregid(proc_t p, struct setregid_args *uap, __unused register_t *retval) if (my_cred->cr_svgid != uap->rgid && my_cred->cr_svgid != uap->egid) { svgid = new_egid; - OSBitOrAtomic(P_SUGID, (UInt32 *)&p->p_flag); + OSBitOrAtomic(P_SUGID, &p->p_flag); } my_new_cred = kauth_cred_setresgid(my_cred, rgid, egid, svgid); @@ -1338,7 +1345,7 @@ setregid(proc_t p, struct setregid_args *uap, __unused register_t *retval) continue; } p->p_ucred = my_new_cred; - OSBitOrAtomic(P_SUGID, (UInt32 *)&p->p_flag); /* XXX redundant? */ + OSBitOrAtomic(P_SUGID, &p->p_flag); /* XXX redundant? */ proc_unlock(p); } break; @@ -1360,7 +1367,7 @@ setregid(proc_t p, struct setregid_args *uap, __unused register_t *retval) * thread to the requested UID and single GID, and clears all other GIDs. */ int -settid(proc_t p, struct settid_args *uap, __unused register_t *retval) +settid(proc_t p, struct settid_args *uap, __unused int32_t *retval) { kauth_cred_t uc; struct uthread *uthread = get_bsdthread_info(current_thread()); @@ -1369,7 +1376,8 @@ settid(proc_t p, struct settid_args *uap, __unused register_t *retval) uid = uap->uid; gid = uap->gid; - AUDIT_ARG(uid, uid, gid, gid, 0); + AUDIT_ARG(uid, uid); + AUDIT_ARG(gid, gid); if (proc_suser(p) != 0) return (EPERM); @@ -1431,14 +1439,14 @@ settid(proc_t p, struct settid_args *uap, __unused register_t *retval) * When the assume argument is zero we revert back to our normal identity. */ int -settid_with_pid(proc_t p, struct settid_with_pid_args *uap, __unused register_t *retval) +settid_with_pid(proc_t p, struct settid_with_pid_args *uap, __unused int32_t *retval) { proc_t target_proc; struct uthread *uthread = get_bsdthread_info(current_thread()); kauth_cred_t my_cred, my_target_cred, my_new_cred; AUDIT_ARG(pid, uap->pid); - AUDIT_ARG(value, uap->assume); + AUDIT_ARG(value32, uap->assume); if (proc_suser(p) != 0) { return (EPERM); @@ -1551,7 +1559,7 @@ settid_with_pid(proc_t p, struct settid_with_pid_args *uap, __unused register_t * flag the process as having set privilege since the last exec. */ static int -setgroups1(proc_t p, u_int gidsetsize, user_addr_t gidset, uid_t gmuid, __unused register_t *retval) +setgroups1(proc_t p, u_int gidsetsize, user_addr_t gidset, uid_t gmuid, __unused int32_t *retval) { u_int ngrp; gid_t newgroups[NGROUPS] = { 0 }; @@ -1639,7 +1647,7 @@ setgroups1(proc_t p, u_int gidsetsize, user_addr_t gidset, uid_t gmuid, __unused continue; } p->p_ucred = my_new_cred; - OSBitOrAtomic(P_SUGID, (UInt32 *)&p->p_flag); + OSBitOrAtomic(P_SUGID, &p->p_flag); proc_unlock(p); } break; @@ -1686,7 +1694,7 @@ setgroups1(proc_t p, u_int gidsetsize, user_addr_t gidset, uid_t gmuid, __unused * See also: setgroups1() */ int -initgroups(proc_t p, struct initgroups_args *uap, __unused register_t *retval) +initgroups(proc_t p, struct initgroups_args *uap, __unused int32_t *retval) { DEBUG_CRED_ENTER("initgroups\n"); @@ -1720,7 +1728,7 @@ initgroups(proc_t p, struct initgroups_args *uap, __unused register_t *retval) * See also: setgroups1() */ int -setgroups(proc_t p, struct setgroups_args *uap, __unused register_t *retval) +setgroups(proc_t p, struct setgroups_args *uap, __unused int32_t *retval) { DEBUG_CRED_ENTER("setgroups\n"); @@ -1730,20 +1738,26 @@ setgroups(proc_t p, struct setgroups_args *uap, __unused register_t *retval) /* * Set the per-thread/per-process supplementary groups list. + * + * XXX implement setsgroups + * */ -#warning XXX implement setsgroups + int -setsgroups(__unused proc_t p, __unused struct setsgroups_args *uap, __unused register_t *retval) +setsgroups(__unused proc_t p, __unused struct setsgroups_args *uap, __unused int32_t *retval) { return(ENOTSUP); } /* * Set the per-thread/per-process whiteout groups list. + * + * XXX implement setwgroups + * */ -#warning XXX implement setwgroups + int -setwgroups(__unused proc_t p, __unused struct setwgroups_args *uap, __unused register_t *retval) +setwgroups(__unused proc_t p, __unused struct setwgroups_args *uap, __unused int32_t *retval) { return(ENOTSUP); } @@ -1859,7 +1873,7 @@ is_suser1(void) * XXX: Belongs in kern_proc.c */ int -getlogin(proc_t p, struct getlogin_args *uap, __unused register_t *retval) +getlogin(proc_t p, struct getlogin_args *uap, __unused int32_t *retval) { char buffer[MAXLOGNAME+1]; struct session * sessp; @@ -1899,10 +1913,10 @@ getlogin(proc_t p, struct getlogin_args *uap, __unused register_t *retval) * XXX: Belongs in kern_proc.c */ int -setlogin(proc_t p, struct setlogin_args *uap, __unused register_t *retval) +setlogin(proc_t p, struct setlogin_args *uap, __unused int32_t *retval) { int error; - int dummy=0; + size_t dummy=0; char buffer[MAXLOGNAME+1]; struct session * sessp; @@ -1979,13 +1993,13 @@ set_security_token(proc_t p) * the user of the trailer from future representation * changes. */ - audit_token.val[0] = my_cred->cr_au.ai_auid; + audit_token.val[0] = my_cred->cr_audit.as_aia_p->ai_auid; audit_token.val[1] = my_cred->cr_uid; audit_token.val[2] = my_cred->cr_gid; audit_token.val[3] = my_cred->cr_ruid; audit_token.val[4] = my_cred->cr_rgid; audit_token.val[5] = p->p_pid; - audit_token.val[6] = my_cred->cr_au.ai_asid; + audit_token.val[6] = my_cred->cr_audit.as_aia_p->ai_asid; audit_token.val[7] = p->p_idversion; #if CONFIG_MACF_MACH @@ -2032,7 +2046,7 @@ cru2x(kauth_cred_t cr, struct xucred *xcr) * LCTX by its own locks. */ int -setlcid(proc_t p0, struct setlcid_args *uap, __unused register_t *retval) +setlcid(proc_t p0, struct setlcid_args *uap, __unused int32_t *retval) { proc_t p; struct lctx *l; @@ -2040,7 +2054,7 @@ setlcid(proc_t p0, struct setlcid_args *uap, __unused register_t *retval) int refheld = 0; AUDIT_ARG(pid, uap->pid); - AUDIT_ARG(value, uap->lcid); + AUDIT_ARG(value32, uap->lcid); if (uap->pid == LCID_PROC_SELF) { /* Create/Join/Leave */ p = p0; } else { /* Adopt/Orphan */ @@ -2138,7 +2152,7 @@ setlcid(proc_t p0, struct setlcid_args *uap, __unused register_t *retval) * protected by the all-context lock. */ int -getlcid(proc_t p0, struct getlcid_args *uap, register_t *retval) +getlcid(proc_t p0, struct getlcid_args *uap, int32_t *retval) { proc_t p; int error = 0; @@ -2175,14 +2189,14 @@ getlcid(proc_t p0, struct getlcid_args *uap, register_t *retval) } #else /* LCTX */ int -setlcid(proc_t p0, struct setlcid_args *uap, register_t *retval) +setlcid(proc_t p0, struct setlcid_args *uap, int32_t *retval) { return (ENOSYS); } int -getlcid(proc_t p0, struct getlcid_args *uap, register_t *retval) +getlcid(proc_t p0, struct getlcid_args *uap, int32_t *retval) { return (ENOSYS); diff --git a/bsd/kern/kern_resource.c b/bsd/kern/kern_resource.c index 5e2f8c171..b51c4ecbe 100644 --- a/bsd/kern/kern_resource.c +++ b/bsd/kern/kern_resource.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2007 Apple Inc. All rights reserved. + * Copyright (c) 2000-2008 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -86,7 +86,7 @@ #include #include -#include +#include #include @@ -120,7 +120,7 @@ rlim_t maxsmap = MAXSSIZ - PAGE_SIZE; /* XXX */ * * Note: would be in kern/subr_param.c in FreeBSD. */ -int maxfilesperproc = OPEN_MAX; /* per-proc open files limit */ +__private_extern__ int maxfilesperproc = OPEN_MAX; /* per-proc open files limit */ SYSCTL_INT( _kern, KERN_MAXPROCPERUID, maxprocperuid, CTLFLAG_RW, &maxprocperuid, 0, "Maximum processes allowed per userid" ); @@ -152,7 +152,7 @@ static int ppgrp_donice_callback(proc_t p, void * arg); * Resource controls and accounting. */ int -getpriority(struct proc *curp, struct getpriority_args *uap, register_t *retval) +getpriority(struct proc *curp, struct getpriority_args *uap, int32_t *retval) { struct proc *p; int low = PRIO_MAX + 1; @@ -295,7 +295,7 @@ ppgrp_donice_callback(proc_t p, void * arg) */ /* ARGSUSED */ int -setpriority(struct proc *curp, struct setpriority_args *uap, __unused register_t *retval) +setpriority(struct proc *curp, struct setpriority_args *uap, __unused int32_t *retval) { struct proc *p; int found = 0, error = 0; @@ -303,7 +303,7 @@ setpriority(struct proc *curp, struct setpriority_args *uap, __unused register_t AUDIT_ARG(cmd, uap->which); AUDIT_ARG(owner, uap->who, 0); - AUDIT_ARG(value, uap->prio); + AUDIT_ARG(value32, uap->prio); /* would also test (uap->who < 0), but id_t is unsigned */ if (uap->who > 0x7fffffff) @@ -519,7 +519,7 @@ do_background_thread(struct proc *curp, int priority) */ /* ARGSUSED */ int -setrlimit(struct proc *p, struct setrlimit_args *uap, __unused register_t *retval) +setrlimit(struct proc *p, struct setrlimit_args *uap, __unused int32_t *retval) { struct rlimit alim; int error; @@ -585,12 +585,16 @@ dosetrlimit(struct proc *p, u_int which, struct rlimit *limp) task_absolutetime_info_data_t tinfo; mach_msg_type_number_t count; struct timeval ttv, tv; + clock_sec_t tv_sec; + clock_usec_t tv_usec; count = TASK_ABSOLUTETIME_INFO_COUNT; task_info(p->task, TASK_ABSOLUTETIME_INFO, (task_info_t)&tinfo, &count); absolutetime_to_microtime(tinfo.total_user + tinfo.total_system, - (uint32_t *) &ttv.tv_sec, (uint32_t *) &ttv.tv_usec); + &tv_sec, &tv_usec); + ttv.tv_sec = tv_sec; + ttv.tv_usec = tv_usec; tv.tv_sec = (limp->rlim_cur > __INT_MAX__ ? __INT_MAX__ : limp->rlim_cur); tv.tv_usec = 0; @@ -817,7 +821,7 @@ dosetrlimit(struct proc *p, u_int which, struct rlimit *limp) /* ARGSUSED */ int -getrlimit(struct proc *p, struct getrlimit_args *uap, __unused register_t *retval) +getrlimit(struct proc *p, struct getrlimit_args *uap, __unused int32_t *retval) { struct rlimit lim; @@ -851,14 +855,16 @@ calcru(struct proc *p, struct timeval *up, struct timeval *sp, struct timeval *i task = p->task; if (task) { - task_basic_info_data_t tinfo; + task_basic_info_32_data_t tinfo; task_thread_times_info_data_t ttimesinfo; - mach_msg_type_number_t task_info_stuff, task_ttimes_stuff; + task_events_info_data_t teventsinfo; + mach_msg_type_number_t task_info_count, task_ttimes_count; + mach_msg_type_number_t task_events_count; struct timeval ut,st; - task_info_stuff = TASK_BASIC_INFO_COUNT; - task_info(task, TASK_BASIC_INFO, - (task_info_t)&tinfo, &task_info_stuff); + task_info_count = TASK_BASIC_INFO_32_COUNT; + task_info(task, TASK_BASIC2_INFO_32, + (task_info_t)&tinfo, &task_info_count); ut.tv_sec = tinfo.user_time.seconds; ut.tv_usec = tinfo.user_time.microseconds; st.tv_sec = tinfo.system_time.seconds; @@ -866,9 +872,9 @@ calcru(struct proc *p, struct timeval *up, struct timeval *sp, struct timeval *i timeradd(&ut, up, up); timeradd(&st, sp, sp); - task_ttimes_stuff = TASK_THREAD_TIMES_INFO_COUNT; + task_ttimes_count = TASK_THREAD_TIMES_INFO_COUNT; task_info(task, TASK_THREAD_TIMES_INFO, - (task_info_t)&ttimesinfo, &task_ttimes_stuff); + (task_info_t)&ttimesinfo, &task_ttimes_count); ut.tv_sec = ttimesinfo.user_time.seconds; ut.tv_usec = ttimesinfo.user_time.microseconds; @@ -876,17 +882,37 @@ calcru(struct proc *p, struct timeval *up, struct timeval *sp, struct timeval *i st.tv_usec = ttimesinfo.system_time.microseconds; timeradd(&ut, up, up); timeradd(&st, sp, sp); + + task_events_count = TASK_EVENTS_INFO_COUNT; + task_info(task, TASK_EVENTS_INFO, + (task_info_t)&teventsinfo, &task_events_count); + + /* + * No need to lock "p": this does not need to be + * completely consistent, right ? + */ + p->p_stats->p_ru.ru_minflt = (teventsinfo.faults - + teventsinfo.pageins); + p->p_stats->p_ru.ru_majflt = teventsinfo.pageins; + p->p_stats->p_ru.ru_nivcsw = (teventsinfo.csw - + p->p_stats->p_ru.ru_nvcsw); + if (p->p_stats->p_ru.ru_nivcsw < 0) + p->p_stats->p_ru.ru_nivcsw = 0; + + p->p_stats->p_ru.ru_maxrss = tinfo.resident_size; } } -__private_extern__ void munge_rusage(struct rusage *a_rusage_p, struct user_rusage *a_user_rusage_p); +__private_extern__ void munge_user64_rusage(struct rusage *a_rusage_p, struct user64_rusage *a_user_rusage_p); +__private_extern__ void munge_user32_rusage(struct rusage *a_rusage_p, struct user32_rusage *a_user_rusage_p); /* ARGSUSED */ int -getrusage(struct proc *p, struct getrusage_args *uap, __unused register_t *retval) +getrusage(struct proc *p, struct getrusage_args *uap, __unused int32_t *retval) { struct rusage *rup, rubuf; - struct user_rusage rubuf64; + struct user64_rusage rubuf64; + struct user32_rusage rubuf32; size_t retsize = sizeof(rubuf); /* default: 32 bits */ caddr_t retbuf = (caddr_t)&rubuf; /* default: 32 bits */ struct timeval utime; @@ -896,7 +922,6 @@ getrusage(struct proc *p, struct getrusage_args *uap, __unused register_t *retva switch (uap->who) { case RUSAGE_SELF: calcru(p, &utime, &stime, NULL); - // LP64todo: proc struct should have 64 bit version of struct proc_lock(p); rup = &p->p_stats->p_ru; rup->ru_utime = utime; @@ -920,8 +945,13 @@ getrusage(struct proc *p, struct getrusage_args *uap, __unused register_t *retva if (IS_64BIT_PROCESS(p)) { retsize = sizeof(rubuf64); retbuf = (caddr_t)&rubuf64; - munge_rusage(&rubuf, &rubuf64); + munge_user64_rusage(&rubuf, &rubuf64); + } else { + retsize = sizeof(rubuf32); + retbuf = (caddr_t)&rubuf32; + munge_user32_rusage(&rubuf, &rubuf32); } + return (copyout(retbuf, uap->rusage, retsize)); } @@ -929,7 +959,7 @@ void ruadd(struct rusage *ru, struct rusage *ru2) { long *ip, *ip2; - int i; + long i; timeradd(&ru->ru_utime, &ru2->ru_utime, &ru->ru_utime); timeradd(&ru->ru_stime, &ru2->ru_stime, &ru->ru_stime); @@ -1059,7 +1089,7 @@ proc_limitreplace(proc_t p) * */ int -iopolicysys(__unused struct proc *p, __unused struct iopolicysys_args *uap, __unused register_t *retval) +iopolicysys(__unused struct proc *p, __unused struct iopolicysys_args *uap, __unused int32_t *retval) { int error = 0; thread_t thread = THREAD_NULL; @@ -1132,3 +1162,25 @@ iopolicysys(__unused struct proc *p, __unused struct iopolicysys_args *uap, __un *retval = error; return (error); } + + +boolean_t thread_is_io_throttled(void); + +boolean_t +thread_is_io_throttled(void) { + + int policy; + struct uthread *ut; + + policy = current_proc()->p_iopol_disk; + + ut = get_bsdthread_info(current_thread()); + + if (ut->uu_iopol_disk != IOPOL_DEFAULT) + policy = ut->uu_iopol_disk; + + if (policy == IOPOL_THROTTLE) + return TRUE; + + return FALSE; +} diff --git a/bsd/kern/kern_shutdown.c b/bsd/kern/kern_shutdown.c index 4b59526ed..f8984bb3c 100644 --- a/bsd/kern/kern_shutdown.c +++ b/bsd/kern/kern_shutdown.c @@ -51,19 +51,13 @@ #include #include #include -#include -#if NCPUS > 1 -#include -#include -#include -#endif /* NCPUS > 1 */ #include #include #include #include #include -#include +#include #include /* for thread_block() */ #include /* for host_priv_self() */ @@ -74,14 +68,21 @@ #include /* abused for sync() */ #include /* for delay_for_interval() */ +#include + +int system_inshutdown = 0; + /* XXX should be in a header file somewhere, but isn't */ extern void md_prepare_for_shutdown(int, int, char *); +extern void (*unmountroot_pre_hook)(void); int waittime = -1; -static int shutting_down = 0; +unsigned int proc_shutdown_exitcount = 0; +static int sd_openlog(vfs_context_t); +static int sd_closelog(vfs_context_t); +static void sd_log(vfs_context_t, const char *, ...); static void proc_shutdown(void); -int in_shutdown(void); extern void IOSystemShutdownNotification(void); @@ -92,10 +93,16 @@ struct sd_filterargs{ struct sd_iterargs { - int signo; /* the signal to be posted */ - int setsdstate; /* shutdown state to be set */ + int signo; /* the signal to be posted */ + int setsdstate; /* shutdown state to be set */ + int countproc; /* count processes on action */ + int activecount; /* number of processes on which action was done */ }; +static vnode_t sd_logvp = NULLVP; +static off_t sd_log_offset = 0; + + static int sd_filt1(proc_t, void *); static int sd_filt2(proc_t, void *); static int sd_callback1(proc_t p, void * arg); @@ -109,6 +116,8 @@ boot(int paniced, int howto, char *command) int hostboot_option=0; int funnel_state; + system_inshutdown = 1; + funnel_state = thread_funnel_set(kernel_flock, TRUE); /* @@ -117,11 +126,16 @@ boot(int paniced, int howto, char *command) */ IOSystemShutdownNotification(); - shutting_down = 1; - md_prepare_for_shutdown(paniced, howto, command); - if ((howto&RB_NOSYNC)==0 && waittime < 0) { + if ((howto&RB_QUICK)==RB_QUICK && waittime < 0) { + waittime = 0; + printf("Quick reboot...\n"); + if ((howto&RB_NOSYNC)==0) { + sync(p, (void *)NULL, (int *)NULL); + } + } + else if ((howto&RB_NOSYNC)==0 && waittime < 0) { int iter, nbusy; waittime = 0; @@ -135,10 +149,13 @@ boot(int paniced, int howto, char *command) /* handle live procs (deallocate their root and current directories). */ proc_shutdown(); -#if AUDIT +#if CONFIG_AUDIT audit_shutdown(); #endif + if (unmountroot_pre_hook != NULL) + unmountroot_pre_hook(); + sync(p, (void *)NULL, (int *)NULL); /* @@ -149,6 +166,9 @@ boot(int paniced, int howto, char *command) if (initproc && p != initproc) task_suspend(initproc->task); + if (kdebug_enable) + kdbg_dump_trace_to_file("/var/log/shutdown/shutdown.trace"); + /* * Unmount filesystems */ @@ -193,6 +213,67 @@ boot(int paniced, int howto, char *command) thread_funnel_set(kernel_flock, FALSE); } +static int +sd_openlog(vfs_context_t ctx) +{ + int error = 0; + struct timeval tv; + + /* Open shutdown log */ + if ((error = vnode_open(PROC_SHUTDOWN_LOG, (O_CREAT | FWRITE | O_NOFOLLOW), 0644, 0, &sd_logvp, ctx))) { + printf("Failed to open %s: error %d\n", PROC_SHUTDOWN_LOG, error); + sd_logvp = NULLVP; + return error; + } + + vnode_setsize(sd_logvp, (off_t)0, 0, ctx); + + /* Write a little header */ + microtime(&tv); + sd_log(ctx, "Process shutdown log. Current time is %lu (in seconds).\n\n", tv.tv_sec); + + return 0; +} + +static int +sd_closelog(vfs_context_t ctx) +{ + int error = 0; + if (sd_logvp != NULLVP) { + VNOP_FSYNC(sd_logvp, MNT_WAIT, ctx); + error = vnode_close(sd_logvp, FWRITE, ctx); + } + + return error; +} + +static void +sd_log(vfs_context_t ctx, const char *fmt, ...) +{ + int resid, log_error, len; + char logbuf[100]; + va_list arglist; + + /* If the log isn't open yet, open it */ + if (sd_logvp == NULLVP) { + if (sd_openlog(ctx) != 0) { + /* Couldn't open, we fail out */ + return; + } + } + + va_start(arglist, fmt); + len = vsnprintf(logbuf, sizeof(logbuf), fmt, arglist); + log_error = vn_rdwr(UIO_WRITE, sd_logvp, (caddr_t)logbuf, len, sd_log_offset, + UIO_SYSSPACE, IO_UNIT | IO_NOAUTH, vfs_context_ucred(ctx), &resid, vfs_context_proc(ctx)); + if (log_error == EIO || log_error == 0) { + sd_log_offset += (len - resid); + } + + va_end(arglist); + +} + static int sd_filt1(proc_t p, void * args) { @@ -219,16 +300,25 @@ sd_callback1(proc_t p, void * args) struct sd_iterargs * sd = (struct sd_iterargs *)args; int signo = sd->signo; int setsdstate = sd->setsdstate; + int countproc = sd->countproc; proc_lock(p); p->p_shutdownstate = setsdstate; if (p->p_stat != SZOMB) { proc_unlock(p); + if (countproc != 0) { + proc_list_lock(); + p->p_listflag |= P_LIST_EXITCOUNT; + proc_shutdown_exitcount++; + proc_list_unlock(); + } + psignal(p, signo); + if (countproc != 0) + sd->activecount++; } else proc_unlock(p); return(PROC_RETURNED); - } static int @@ -255,12 +345,21 @@ sd_callback2(proc_t p, void * args) struct sd_iterargs * sd = (struct sd_iterargs *)args; int signo = sd->signo; int setsdstate = sd->setsdstate; + int countproc = sd->countproc; proc_lock(p); p->p_shutdownstate = setsdstate; if (p->p_stat != SZOMB) { proc_unlock(p); + if (countproc != 0) { + proc_list_lock(); + p->p_listflag |= P_LIST_EXITCOUNT; + proc_shutdown_exitcount++; + proc_list_unlock(); + } psignal(p, signo); + if (countproc != 0) + sd->activecount++; } else proc_unlock(p); @@ -272,6 +371,8 @@ static int sd_callback3(proc_t p, void * args) { struct sd_iterargs * sd = (struct sd_iterargs *)args; + vfs_context_t ctx = vfs_context_current(); + int setsdstate = sd->setsdstate; proc_lock(p); @@ -291,7 +392,13 @@ sd_callback3(proc_t p, void * args) } else { p->exit_thread = current_thread(); printf("."); + + sd_log(ctx, "%s[%d] had to be forced closed with exit1().\n", p->p_comm, p->p_pid); + proc_unlock(p); + KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_PROC, BSD_PROC_FRCEXIT) | DBG_FUNC_NONE, + p->p_pid, 0, 1, 0, 0); + sd->activecount++; exit1(p, 1, (int *)NULL); } } else @@ -316,11 +423,13 @@ sd_callback3(proc_t p, void * args) static void proc_shutdown(void) { - struct proc *p, *self; - int i, TERM_catch; + vfs_context_t ctx = vfs_context_current(); + struct proc *p, *self; int delayterm = 0; struct sd_filterargs sfargs; struct sd_iterargs sdargs; + int error = 0; + struct timespec ts; /* * Kill as many procs as we can. (Except ourself...) @@ -347,39 +456,39 @@ proc_shutdown(void) sfargs.shutdownstate = 0; sdargs.signo = SIGTERM; sdargs.setsdstate = 1; + sdargs.countproc = 1; + sdargs.activecount = 0; + error = 0; /* post a SIGTERM to all that catch SIGTERM and not marked for delay */ proc_rebootscan(sd_callback1, (void *)&sdargs, sd_filt1, (void *)&sfargs); - /* - * now wait for up to 30 seconds to allow those procs catching SIGTERM - * to digest it - * as soon as these procs have exited, we'll continue on to the next step - */ - for (i = 0; i < 300; i++) { - /* - * sleep for a tenth of a second - * and then check to see if the tasks that were sent a - * SIGTERM have exited - */ - delay_for_interval(100, 1000 * 1000); - TERM_catch = 0; - - + if (sdargs.activecount != 0 && proc_shutdown_exitcount!= 0) { proc_list_lock(); - - for (p = allproc.lh_first; p; p = p->p_list.le_next) { - if (p->p_shutdownstate == 1) { - TERM_catch++; + if (proc_shutdown_exitcount != 0) { + /* + * now wait for up to 30 seconds to allow those procs catching SIGTERM + * to digest it + * as soon as these procs have exited, we'll continue on to the next step + */ + ts.tv_sec = 30; + ts.tv_nsec = 0; + error = msleep(&proc_shutdown_exitcount, proc_list_mlock, PWAIT, "shutdownwait", &ts); + if (error != 0) { + for (p = allproc.lh_first; p; p = p->p_list.le_next) { + if ((p->p_listflag & P_LIST_EXITCOUNT) == P_LIST_EXITCOUNT) + p->p_listflag &= ~P_LIST_EXITCOUNT; + } + for (p = zombproc.lh_first; p; p = p->p_list.le_next) { + if ((p->p_listflag & P_LIST_EXITCOUNT) == P_LIST_EXITCOUNT) + p->p_listflag &= ~P_LIST_EXITCOUNT; + } } + } - proc_list_unlock(); - - if (TERM_catch == 0) - break; } - if (TERM_catch) { + if (error == ETIMEDOUT) { /* * log the names of the unresponsive tasks */ @@ -387,9 +496,10 @@ proc_shutdown(void) proc_list_lock(); - for (p = allproc.lh_first; p; p = p->p_list.le_next) { + for (p = allproc.lh_first; p; p = p->p_list.le_next) { if (p->p_shutdownstate == 1) { - printf("%s[%d]: didn't act on SIGTERM\n", p->p_comm, p->p_pid); + printf("%s[%d]: didn't act on SIGTERM\n", p->p_comm, p->p_pid); + sd_log(ctx, "%s[%d]: didn't act on SIGTERM\n", p->p_comm, p->p_pid); } } @@ -405,31 +515,36 @@ proc_shutdown(void) sfargs.shutdownstate = 2; sdargs.signo = SIGKILL; sdargs.setsdstate = 2; + sdargs.countproc = 1; + sdargs.activecount = 0; - /* post a SIGTERM to all that catch SIGTERM and not marked for delay */ + /* post a SIGKILL to all that catch SIGTERM and not marked for delay */ proc_rebootscan(sd_callback2, (void *)&sdargs, sd_filt2, (void *)&sfargs); - /* - * wait for up to 60 seconds to allow these procs to exit normally - * - * History: The delay interval was changed from 100 to 200 - * for NFS requests in particular. - */ - for (i = 0; i < 300; i++) { - delay_for_interval(200, 1000 * 1000); - - + if (sdargs.activecount != 0 && proc_shutdown_exitcount!= 0) { proc_list_lock(); - - for (p = allproc.lh_first; p; p = p->p_list.le_next) { - if (p->p_shutdownstate == 2) - break; + if (proc_shutdown_exitcount != 0) { + /* + * wait for up to 60 seconds to allow these procs to exit normally + * + * History: The delay interval was changed from 100 to 200 + * for NFS requests in particular. + */ + ts.tv_sec = 60; + ts.tv_nsec = 0; + error = msleep(&proc_shutdown_exitcount, proc_list_mlock, PWAIT, "shutdownwait", &ts); + if (error != 0) { + for (p = allproc.lh_first; p; p = p->p_list.le_next) { + if ((p->p_listflag & P_LIST_EXITCOUNT) == P_LIST_EXITCOUNT) + p->p_listflag &= ~P_LIST_EXITCOUNT; + } + for (p = zombproc.lh_first; p; p = p->p_list.le_next) { + if ((p->p_listflag & P_LIST_EXITCOUNT) == P_LIST_EXITCOUNT) + p->p_listflag &= ~P_LIST_EXITCOUNT; + } + } } - proc_list_unlock(); - - if (!p) - break; } /* @@ -439,6 +554,8 @@ proc_shutdown(void) sfargs.shutdownstate = 3; sdargs.signo = 0; sdargs.setsdstate = 3; + sdargs.countproc = 0; + sdargs.activecount = 0; /* post a SIGTERM to all that catch SIGTERM and not marked for delay */ proc_rebootscan(sd_callback3, (void *)&sdargs, sd_filt2, (void *)&sfargs); @@ -449,16 +566,11 @@ proc_shutdown(void) delayterm = 1; goto sigterm_loop; } + + sd_closelog(ctx); + /* drop the ref on initproc */ proc_rele(initproc); printf("continuing\n"); } -/* - * Check whether the system has begun its shutdown sequence. - */ -int -in_shutdown(void) -{ - return shutting_down; -} diff --git a/bsd/kern/kern_sig.c b/bsd/kern/kern_sig.c index b9b14ffb5..e0ded6e4c 100644 --- a/bsd/kern/kern_sig.c +++ b/bsd/kern/kern_sig.c @@ -92,7 +92,7 @@ #include #include -#include +#include #include @@ -134,7 +134,7 @@ extern void doexception(int exc, mach_exception_code_t code, static void stop(proc_t, proc_t); int cansignal(proc_t, kauth_cred_t, proc_t, int, int); int killpg1(proc_t, int, int, int, int); -int setsigvec(proc_t, int, struct __user_sigaction *); +int setsigvec(proc_t, thread_t, int, struct __kern_sigaction *, boolean_t in_sigstart); static void psignal_uthread(thread_t, int); kern_return_t do_bsdexception(int, int, int); void __posix_sem_syscall_return(kern_return_t); @@ -148,9 +148,15 @@ kern_return_t semaphore_wait_trap_internal(mach_port_name_t, void (*)(kern_retur static int filt_sigattach(struct knote *kn); static void filt_sigdetach(struct knote *kn); static int filt_signal(struct knote *kn, long hint); - -struct filterops sig_filtops = - { 0, filt_sigattach, filt_sigdetach, filt_signal }; +static void filt_signaltouch(struct knote *kn, struct kevent64_s *kev, + long type); + +struct filterops sig_filtops = { + .f_attach = filt_sigattach, + .f_detach = filt_sigdetach, + .f_event = filt_signal, + .f_touch = filt_signaltouch, +}; /* structures and fns for killpg1 iterartion callback and filters */ struct killpg1_filtargs { @@ -163,6 +169,7 @@ struct killpg1_iterargs { kauth_cred_t uc; int signum; int * nfoundp; + int zombie; }; static int killpg1_filt(proc_t p, void * arg); @@ -186,9 +193,17 @@ static void psignal_internal(proc_t p, task_t task, thread_t thread, int flavor, * NOTE: Source and target may *NOT* overlap! (target is smaller) */ static void -sigaltstack_64to32(struct user_sigaltstack *in, struct sigaltstack *out) +sigaltstack_kern_to_user32(struct kern_sigaltstack *in, struct user32_sigaltstack *out) { - out->ss_sp = CAST_DOWN(void *,in->ss_sp); + out->ss_sp = CAST_DOWN_EXPLICIT(user32_addr_t, in->ss_sp); + out->ss_size = CAST_DOWN_EXPLICIT(user32_size_t, in->ss_size); + out->ss_flags = in->ss_flags; +} + +static void +sigaltstack_kern_to_user64(struct kern_sigaltstack *in, struct user64_sigaltstack *out) +{ + out->ss_sp = in->ss_sp; out->ss_size = in->ss_size; out->ss_flags = in->ss_flags; } @@ -199,24 +214,39 @@ sigaltstack_64to32(struct user_sigaltstack *in, struct sigaltstack *out) * the beginning. */ static void -sigaltstack_32to64(struct sigaltstack *in, struct user_sigaltstack *out) +sigaltstack_user32_to_kern(struct user32_sigaltstack *in, struct kern_sigaltstack *out) +{ + out->ss_flags = in->ss_flags; + out->ss_size = in->ss_size; + out->ss_sp = CAST_USER_ADDR_T(in->ss_sp); +} +static void +sigaltstack_user64_to_kern(struct user64_sigaltstack *in, struct kern_sigaltstack *out) { out->ss_flags = in->ss_flags; out->ss_size = in->ss_size; - out->ss_sp = CAST_USER_ADDR_T(in->ss_sp); + out->ss_sp = in->ss_sp; } static void -sigaction_64to32(struct user_sigaction *in, struct sigaction *out) +sigaction_kern_to_user32(struct kern_sigaction *in, struct user32_sigaction *out) { /* This assumes 32 bit __sa_handler is of type sig_t */ - out->__sigaction_u.__sa_handler = CAST_DOWN(sig_t,in->__sigaction_u.__sa_handler); + out->__sigaction_u.__sa_handler = CAST_DOWN_EXPLICIT(user32_addr_t,in->__sigaction_u.__sa_handler); + out->sa_mask = in->sa_mask; + out->sa_flags = in->sa_flags; +} +static void +sigaction_kern_to_user64(struct kern_sigaction *in, struct user64_sigaction *out) +{ + /* This assumes 32 bit __sa_handler is of type sig_t */ + out->__sigaction_u.__sa_handler = in->__sigaction_u.__sa_handler; out->sa_mask = in->sa_mask; out->sa_flags = in->sa_flags; } static void -__sigaction_32to64(struct __sigaction *in, struct __user_sigaction *out) +__sigaction_user32_to_kern(struct __user32_sigaction *in, struct __kern_sigaction *out) { out->__sigaction_u.__sa_handler = CAST_USER_ADDR_T(in->__sigaction_u.__sa_handler); out->sa_tramp = CAST_USER_ADDR_T(in->sa_tramp); @@ -224,6 +254,14 @@ __sigaction_32to64(struct __sigaction *in, struct __user_sigaction *out) out->sa_flags = in->sa_flags; } +static void +__sigaction_user64_to_kern(struct __user64_sigaction *in, struct __kern_sigaction *out) +{ + out->__sigaction_u.__sa_handler = in->__sigaction_u.__sa_handler; + out->sa_tramp = in->sa_tramp; + out->sa_mask = in->sa_mask; + out->sa_flags = in->sa_flags; +} #if SIGNAL_DEBUG void ram_printf(int); @@ -287,8 +325,8 @@ cansignal(proc_t p, kauth_cred_t uc, proc_t q, int signum, int zombie) proc_list_unlock(); /* - * If the real or effective UID of the sender matches the real, - * effective, or ssaved UID of the target, permit the signal to + * If the real or effective UID of the sender matches the real + * or saved UID of the target, permit the signal to * be sent. */ if (zombie == 0) @@ -299,9 +337,7 @@ cansignal(proc_t p, kauth_cred_t uc, proc_t q, int signum, int zombie) if (uc->cr_ruid == my_cred->cr_ruid || uc->cr_ruid == my_cred->cr_svuid || kauth_cred_getuid(uc) == my_cred->cr_ruid || - kauth_cred_getuid(uc) == my_cred->cr_svuid || - uc->cr_ruid == kauth_cred_getuid(my_cred) || - kauth_cred_getuid(uc) == kauth_cred_getuid(my_cred)) { + kauth_cred_getuid(uc) == my_cred->cr_svuid) { if (zombie == 0) kauth_cred_unref(&my_cred); return (1); @@ -319,15 +355,19 @@ cansignal(proc_t p, kauth_cred_t uc, proc_t q, int signum, int zombie) * EINVAL * copyout:EFAULT * copyin:EFAULT + * + * Notes: Uses current thread as a parameter to inform PPC to enable + * FPU exceptions via setsigvec(); this operation is not proxy + * safe! */ /* ARGSUSED */ int -sigaction(proc_t p, struct sigaction_args *uap, __unused register_t *retval) +sigaction(proc_t p, struct sigaction_args *uap, __unused int32_t *retval) { - struct user_sigaction vec; - struct __user_sigaction __vec; + struct kern_sigaction vec; + struct __kern_sigaction __vec; - struct user_sigaction *sa = &vec; + struct kern_sigaction *sa = &vec; struct sigacts *ps = p->p_sigacts; int signum; @@ -359,40 +399,49 @@ sigaction(proc_t p, struct sigaction_args *uap, __unused register_t *retval) sa->sa_flags |= SA_NOCLDWAIT; if (IS_64BIT_PROCESS(p)) { - error = copyout(sa, uap->osa, sizeof(struct user_sigaction)); + struct user64_sigaction vec64; + + sigaction_kern_to_user64(sa, &vec64); + error = copyout(&vec64, uap->osa, sizeof(vec64)); } else { - struct sigaction vec32; - sigaction_64to32(sa, &vec32); - error = copyout(&vec32, uap->osa, sizeof(struct sigaction)); + struct user32_sigaction vec32; + + sigaction_kern_to_user32(sa, &vec32); + error = copyout(&vec32, uap->osa, sizeof(vec32)); } if (error) return (error); } if (uap->nsa) { if (IS_64BIT_PROCESS(p)) { - error = copyin(uap->nsa, &__vec, sizeof(struct __user_sigaction)); + struct __user64_sigaction __vec64; + + error = copyin(uap->nsa, &__vec64, sizeof(__vec64)); + __sigaction_user64_to_kern(&__vec64, &__vec); } else { - struct __sigaction __vec32; - error = copyin(uap->nsa, &__vec32, sizeof(struct __sigaction)); - __sigaction_32to64(&__vec32, &__vec); + struct __user32_sigaction __vec32; + + error = copyin(uap->nsa, &__vec32, sizeof(__vec32)); + __sigaction_user32_to_kern(&__vec32, &__vec); } if (error) return (error); __vec.sa_flags &= SA_USERSPACE_MASK; /* Only pass on valid sa_flags */ - error = setsigvec(p, signum, &__vec); + error = setsigvec(p, current_thread(), signum, &__vec, FALSE); } return (error); } /* Routines to manipulate bits on all threads */ int -clear_procsiglist(proc_t p, int bit) +clear_procsiglist(proc_t p, int bit, boolean_t in_signalstart) { struct uthread * uth; thread_t thact; proc_lock(p); - proc_signalstart(p, 1); + if (!in_signalstart) + proc_signalstart(p, 1); if ((p->p_lflag & P_LINVFORK) && p->p_vforkact) { thact = p->p_vforkact; @@ -400,7 +449,8 @@ clear_procsiglist(proc_t p, int bit) if (uth) { uth->uu_siglist &= ~bit; } - proc_signalend(p, 1); + if (!in_signalstart) + proc_signalend(p, 1); proc_unlock(p); return(0); } @@ -408,8 +458,9 @@ clear_procsiglist(proc_t p, int bit) TAILQ_FOREACH(uth, &p->p_uthlist, uu_list) { uth->uu_siglist &= ~bit; } - - proc_signalend(p, 1); + p->p_siglist &= ~bit; + if (!in_signalstart) + proc_signalend(p, 1); proc_unlock(p); return(0); @@ -507,8 +558,18 @@ set_procsigmask(proc_t p, int bit) } /* XXX should be static? */ +/* + * Notes: The thread parameter is used in the PPC case to select the + * thread on which the floating point exception will be enabled + * or disabled. We can't simply take current_thread(), since + * this is called from posix_spawn() on the not currently running + * process/thread pair. + * + * We mark thread as unused to alow compilation without warning + * onnon-PPC platforms. + */ int -setsigvec(proc_t p, int signum, struct __user_sigaction *sa) +setsigvec(proc_t p, __unused thread_t thread, int signum, struct __kern_sigaction *sa, boolean_t in_sigstart) { struct sigacts *ps = p->p_sigacts; int bit; @@ -553,21 +614,21 @@ setsigvec(proc_t p, int signum, struct __user_sigaction *sa) ps->ps_signodefer &= ~bit; if (signum == SIGCHLD) { if (sa->sa_flags & SA_NOCLDSTOP) - OSBitOrAtomic(P_NOCLDSTOP, (UInt32 *)&p->p_flag); + OSBitOrAtomic(P_NOCLDSTOP, &p->p_flag); else - OSBitAndAtomic(~((uint32_t)P_NOCLDSTOP), (UInt32 *)&p->p_flag); + OSBitAndAtomic(~((uint32_t)P_NOCLDSTOP), &p->p_flag); if ((sa->sa_flags & SA_NOCLDWAIT) || (sa->sa_handler == SIG_IGN)) - OSBitOrAtomic(P_NOCLDWAIT, (UInt32 *)&p->p_flag); + OSBitOrAtomic(P_NOCLDWAIT, &p->p_flag); else - OSBitAndAtomic(~((uint32_t)P_NOCLDWAIT), (UInt32 *)&p->p_flag); + OSBitAndAtomic(~((uint32_t)P_NOCLDWAIT), &p->p_flag); } #ifdef __ppc__ if (signum == SIGFPE) { if (sa->sa_handler == SIG_DFL || sa->sa_handler == SIG_IGN) - thread_enable_fpe(current_thread(), 0); + thread_enable_fpe(thread, 0); else - thread_enable_fpe(current_thread(), 1); + thread_enable_fpe(thread, 1); } #endif /* __ppc__ */ /* @@ -579,7 +640,7 @@ setsigvec(proc_t p, int signum, struct __user_sigaction *sa) if (sa->sa_handler == SIG_IGN || (sigprop[signum] & SA_IGNORE && sa->sa_handler == SIG_DFL)) { - clear_procsiglist(p, bit); + clear_procsiglist(p, bit, in_sigstart); if (signum != SIGCONT) p->p_sigignore |= bit; /* easier in psignal */ p->p_sigcatch &= ~bit; @@ -618,6 +679,19 @@ execsigs(proc_t p, thread_t thread) struct uthread *ut; ut = (struct uthread *)get_bsdthread_info(thread); + + /* + * transfer saved signal states from the process + * back to the current thread. + * + * NOTE: We do this without the process locked, + * because we are guaranteed to be single-threaded + * by this point in exec and the p_siglist is + * only accessed by threads inside the process. + */ + ut->uu_siglist |= p->p_siglist; + p->p_siglist = 0; + /* * Reset caught signals. Held signals remain held * through p_sigmask (unless they were caught, @@ -630,13 +704,11 @@ execsigs(proc_t p, thread_t thread) if (sigprop[nc] & SA_IGNORE) { if (nc != SIGCONT) p->p_sigignore |= mask; - if (thread){ - ut->uu_siglist &= ~mask; - } else - clear_procsiglist(p, mask); + ut->uu_siglist &= ~mask; } ps->ps_sigact[nc] = SIG_DFL; } + /* * Reset stack state to the user stack. * Clear set of signals caught on the signal stack. @@ -657,7 +729,7 @@ execsigs(proc_t p, thread_t thread) * the library stub does the rest. */ int -sigprocmask(proc_t p, struct sigprocmask_args *uap, __unused register_t *retval) +sigprocmask(proc_t p, struct sigprocmask_args *uap, __unused int32_t *retval) { int error = 0; sigset_t oldmask, nmask; @@ -702,7 +774,7 @@ sigprocmask(proc_t p, struct sigprocmask_args *uap, __unused register_t *retval) } int -sigpending(__unused proc_t p, struct sigpending_args *uap, __unused register_t *retval) +sigpending(__unused proc_t p, struct sigpending_args *uap, __unused int32_t *retval) { struct uthread *ut; sigset_t pendlist; @@ -729,14 +801,14 @@ sigcontinue(__unused int error) } int -sigsuspend(proc_t p, struct sigsuspend_args *uap, register_t *retval) +sigsuspend(proc_t p, struct sigsuspend_args *uap, int32_t *retval) { __pthread_testcancel(1); return(sigsuspend_nocancel(p, (struct sigsuspend_nocancel_args *)uap, retval)); } int -sigsuspend_nocancel(proc_t p, struct sigsuspend_nocancel_args *uap, __unused register_t *retval) +sigsuspend_nocancel(proc_t p, struct sigsuspend_nocancel_args *uap, __unused int32_t *retval) { struct uthread *ut; @@ -761,7 +833,7 @@ sigsuspend_nocancel(proc_t p, struct sigsuspend_nocancel_args *uap, __unused reg int __disable_threadsignal(__unused proc_t p, __unused struct __disable_threadsignal_args *uap, - __unused register_t *retval) + __unused int32_t *retval) { struct uthread *uth; @@ -799,7 +871,7 @@ __pthread_testcancel(int presyscall) int __pthread_markcancel(__unused proc_t p, - struct __pthread_markcancel_args *uap, __unused register_t *retval) + struct __pthread_markcancel_args *uap, __unused int32_t *retval) { thread_act_t target_act; int error = 0; @@ -831,7 +903,7 @@ __pthread_markcancel(__unused proc_t p, */ int __pthread_canceled(__unused proc_t p, - struct __pthread_canceled_args *uap, __unused register_t *retval) + struct __pthread_canceled_args *uap, __unused int32_t *retval) { thread_act_t thread; struct uthread *uth; @@ -877,65 +949,184 @@ __posix_sem_syscall_return(kern_return_t kern_result) /* does not return */ } +#if OLD_SEMWAIT_SIGNAL /* * Returns: 0 Success * EINTR * ETIMEDOUT * EINVAL + * EFAULT if timespec is NULL */ int -__semwait_signal(__unused proc_t p, struct __semwait_signal_args *uap, - register_t *retval) +__old_semwait_signal(proc_t p, struct __old_semwait_signal_args *uap, + int32_t *retval) { __pthread_testcancel(0); - return(__semwait_signal_nocancel(p, (struct __semwait_signal_nocancel_args *)uap, retval)); + return(__old_semwait_signal_nocancel(p, (struct __old_semwait_signal_nocancel_args *)uap, retval)); } int -__semwait_signal_nocancel(__unused proc_t p, struct __semwait_signal_nocancel_args *uap, - __unused register_t *retval) +__old_semwait_signal_nocancel(proc_t p, struct __old_semwait_signal_nocancel_args *uap, + __unused int32_t *retval) { - + kern_return_t kern_result; + int error; mach_timespec_t then; struct timespec now; - + struct user_timespec ts; + boolean_t truncated_timeout = FALSE; + if(uap->timeout) { - + + if (IS_64BIT_PROCESS(p)) { + struct user64_timespec ts64; + error = copyin(uap->ts, &ts64, sizeof(ts64)); + ts.tv_sec = ts64.tv_sec; + ts.tv_nsec = ts64.tv_nsec; + } else { + struct user32_timespec ts32; + error = copyin(uap->ts, &ts32, sizeof(ts32)); + ts.tv_sec = ts32.tv_sec; + ts.tv_nsec = ts32.tv_nsec; + } + + if (error) { + return error; + } + + if ((ts.tv_sec & 0xFFFFFFFF00000000ULL) != 0) { + ts.tv_sec = 0xFFFFFFFF; + ts.tv_nsec = 0; + truncated_timeout = TRUE; + } + if (uap->relative) { - then.tv_sec = uap->tv_sec; - then.tv_nsec = uap->tv_nsec; + then.tv_sec = ts.tv_sec; + then.tv_nsec = ts.tv_nsec; } else { nanotime(&now); - then.tv_sec = uap->tv_sec - now.tv_sec; - then.tv_nsec = uap->tv_nsec - now.tv_nsec; - if (then.tv_nsec < 0) { - then.tv_nsec += NSEC_PER_SEC; - then.tv_sec--; - } + /* if time has elapsed, set time to null timepsec to bailout rightaway */ - if ((int)then.tv_sec < 0) { + if (now.tv_sec == ts.tv_sec ? + now.tv_nsec > ts.tv_nsec : + now.tv_sec > ts.tv_sec) { then.tv_sec = 0; then.tv_nsec = 0; + } else { + then.tv_sec = ts.tv_sec - now.tv_sec; + then.tv_nsec = ts.tv_nsec - now.tv_nsec; + if (then.tv_nsec < 0) { + then.tv_nsec += NSEC_PER_SEC; + then.tv_sec--; + } } } - + if (uap->mutex_sem == 0) kern_result = semaphore_timedwait_trap_internal((mach_port_name_t)uap->cond_sem, then.tv_sec, then.tv_nsec, __posix_sem_syscall_return); else kern_result = semaphore_timedwait_signal_trap_internal(uap->cond_sem, uap->mutex_sem, then.tv_sec, then.tv_nsec, __posix_sem_syscall_return); - + } else { - + if (uap->mutex_sem == 0) kern_result = semaphore_wait_trap_internal(uap->cond_sem, __posix_sem_syscall_return); else - + kern_result = semaphore_wait_signal_trap_internal(uap->cond_sem, uap->mutex_sem, __posix_sem_syscall_return); } - - if (kern_result == KERN_SUCCESS) + + if (kern_result == KERN_SUCCESS && !truncated_timeout) return(0); + else if (kern_result == KERN_SUCCESS && truncated_timeout) + return(EINTR); /* simulate an exceptional condition because Mach doesn't support a longer timeout */ + else if (kern_result == KERN_ABORTED) + return(EINTR); + else if (kern_result == KERN_OPERATION_TIMED_OUT) + return(ETIMEDOUT); + else + return(EINVAL); +} +#endif /* OLD_SEMWAIT_SIGNAL*/ + +/* + * Returns: 0 Success + * EINTR + * ETIMEDOUT + * EINVAL + * EFAULT if timespec is NULL + */ +int +__semwait_signal(proc_t p, struct __semwait_signal_args *uap, + int32_t *retval) +{ + __pthread_testcancel(0); + return(__semwait_signal_nocancel(p, (struct __semwait_signal_nocancel_args *)uap, retval)); +} + +int +__semwait_signal_nocancel(__unused proc_t p, struct __semwait_signal_nocancel_args *uap, + __unused int32_t *retval) +{ + + kern_return_t kern_result; + mach_timespec_t then; + struct timespec now; + struct user_timespec ts; + boolean_t truncated_timeout = FALSE; + + if(uap->timeout) { + + ts.tv_sec = uap->tv_sec; + ts.tv_nsec = uap->tv_nsec; + + if ((ts.tv_sec & 0xFFFFFFFF00000000ULL) != 0) { + ts.tv_sec = 0xFFFFFFFF; + ts.tv_nsec = 0; + truncated_timeout = TRUE; + } + + if (uap->relative) { + then.tv_sec = ts.tv_sec; + then.tv_nsec = ts.tv_nsec; + } else { + nanotime(&now); + + /* if time has elapsed, set time to null timepsec to bailout rightaway */ + if (now.tv_sec == ts.tv_sec ? + now.tv_nsec > ts.tv_nsec : + now.tv_sec > ts.tv_sec) { + then.tv_sec = 0; + then.tv_nsec = 0; + } else { + then.tv_sec = ts.tv_sec - now.tv_sec; + then.tv_nsec = ts.tv_nsec - now.tv_nsec; + if (then.tv_nsec < 0) { + then.tv_nsec += NSEC_PER_SEC; + then.tv_sec--; + } + } + } + + if (uap->mutex_sem == 0) + kern_result = semaphore_timedwait_trap_internal((mach_port_name_t)uap->cond_sem, then.tv_sec, then.tv_nsec, __posix_sem_syscall_return); + else + kern_result = semaphore_timedwait_signal_trap_internal(uap->cond_sem, uap->mutex_sem, then.tv_sec, then.tv_nsec, __posix_sem_syscall_return); + + } else { + + if (uap->mutex_sem == 0) + kern_result = semaphore_wait_trap_internal(uap->cond_sem, __posix_sem_syscall_return); + else + + kern_result = semaphore_wait_signal_trap_internal(uap->cond_sem, uap->mutex_sem, __posix_sem_syscall_return); + } + + if (kern_result == KERN_SUCCESS && !truncated_timeout) + return(0); + else if (kern_result == KERN_SUCCESS && truncated_timeout) + return(EINTR); /* simulate an exceptional condition because Mach doesn't support a longer timeout */ else if (kern_result == KERN_ABORTED) return(EINTR); else if (kern_result == KERN_OPERATION_TIMED_OUT) @@ -944,9 +1135,10 @@ __semwait_signal_nocancel(__unused proc_t p, struct __semwait_signal_nocancel_ar return(EINVAL); } + int __pthread_kill(__unused proc_t p, struct __pthread_kill_args *uap, - __unused register_t *retval) + __unused int32_t *retval) { thread_t target_act; int error = 0; @@ -979,7 +1171,7 @@ __pthread_kill(__unused proc_t p, struct __pthread_kill_args *uap, int __pthread_sigmask(__unused proc_t p, struct __pthread_sigmask_args *uap, - __unused register_t *retval) + __unused int32_t *retval) { user_addr_t set = uap->set; user_addr_t oset = uap->oset; @@ -1033,14 +1225,14 @@ __pthread_sigmask(__unused proc_t p, struct __pthread_sigmask_args *uap, * copyout:EFAULT */ int -__sigwait(proc_t p, struct __sigwait_args *uap, register_t *retval) +__sigwait(proc_t p, struct __sigwait_args *uap, int32_t *retval) { __pthread_testcancel(1); return(__sigwait_nocancel(p, (struct __sigwait_nocancel_args *)uap, retval)); } int -__sigwait_nocancel(proc_t p, struct __sigwait_nocancel_args *uap, __unused register_t *retval) +__sigwait_nocancel(proc_t p, struct __sigwait_nocancel_args *uap, __unused int32_t *retval) { struct uthread *ut; struct uthread *uth; @@ -1103,7 +1295,7 @@ __sigwait_nocancel(proc_t p, struct __sigwait_nocancel_args *uap, __unused regis /* No Continuations for now */ error = msleep((caddr_t)&ut->uu_sigwait, &p->p_mlock, PPAUSE|PCATCH, "pause", 0); - if ((error == EINTR) || (error == ERESTART)) + if (error == ERESTART) error = 0; sigw = (ut->uu_sigwait & siglist); @@ -1118,6 +1310,11 @@ __sigwait_nocancel(proc_t p, struct __sigwait_nocancel_args *uap, __unused regis panic("sigwait with no signal wakeup"); /* Clear the pending signal in the thread it was delivered */ uth->uu_siglist &= ~(sigmask(signum)); + +#if CONFIG_DTRACE + DTRACE_PROC2(signal__clear, int, signum, siginfo_t *, &(ut->t_dtrace_siginfo)); +#endif + proc_unlock(p); if (uap->sig != USER_ADDR_NULL) error = copyout(&signum, uap->sig, sizeof(int)); @@ -1129,10 +1326,10 @@ __sigwait_nocancel(proc_t p, struct __sigwait_nocancel_args *uap, __unused regis } int -sigaltstack(__unused proc_t p, struct sigaltstack_args *uap, __unused register_t *retval) +sigaltstack(__unused proc_t p, struct sigaltstack_args *uap, __unused int32_t *retval) { - struct user_sigaltstack ss; - struct user_sigaltstack *pstk; + struct kern_sigaltstack ss; + struct kern_sigaltstack *pstk; int error; struct uthread *uth; int onstack; @@ -1145,11 +1342,13 @@ sigaltstack(__unused proc_t p, struct sigaltstack_args *uap, __unused register_t onstack = pstk->ss_flags & SA_ONSTACK; if (uap->oss) { if (IS_64BIT_PROCESS(p)) { - error = copyout(pstk, uap->oss, sizeof(struct user_sigaltstack)); + struct user64_sigaltstack ss64; + sigaltstack_kern_to_user64(pstk, &ss64); + error = copyout(&ss64, uap->oss, sizeof(ss64)); } else { - struct sigaltstack ss32; - sigaltstack_64to32(pstk, &ss32); - error = copyout(&ss32, uap->oss, sizeof(struct sigaltstack)); + struct user32_sigaltstack ss32; + sigaltstack_kern_to_user32(pstk, &ss32); + error = copyout(&ss32, uap->oss, sizeof(ss32)); } if (error) return (error); @@ -1157,11 +1356,13 @@ sigaltstack(__unused proc_t p, struct sigaltstack_args *uap, __unused register_t if (uap->nss == USER_ADDR_NULL) return (0); if (IS_64BIT_PROCESS(p)) { - error = copyin(uap->nss, &ss, sizeof(struct user_sigaltstack)); + struct user64_sigaltstack ss64; + error = copyin(uap->nss, &ss64, sizeof(ss64)); + sigaltstack_user64_to_kern(&ss64, &ss); } else { - struct sigaltstack ss32; - error = copyin(uap->nss, &ss32, sizeof(struct sigaltstack)); - sigaltstack_32to64(&ss32,&ss); + struct user32_sigaltstack ss32; + error = copyin(uap->nss, &ss32, sizeof(ss32)); + sigaltstack_user32_to_kern(&ss32, &ss); } if (error) return (error); @@ -1189,7 +1390,7 @@ sigaltstack(__unused proc_t p, struct sigaltstack_args *uap, __unused register_t } int -kill(proc_t cp, struct kill_args *uap, __unused register_t *retval) +kill(proc_t cp, struct kill_args *uap, __unused int32_t *retval) { proc_t p; kauth_cred_t uc = kauth_cred_get(); @@ -1270,20 +1471,34 @@ killpg1_callback(proc_t p, void * arg) int signum = kargp->signum; int * nfoundp = kargp->nfoundp; int n; + int zombie = 0; + int error = 0; + if ((kargp->zombie != 0) && ((p->p_listflag & P_LIST_EXITED) == P_LIST_EXITED)) + zombie = 1; - if (cansignal(cp, uc, p, signum, 0) == 0) - return(PROC_RETURNED); + if (zombie != 0) { + proc_list_lock(); + error = cansignal(cp, uc, p, signum, zombie); + proc_list_unlock(); + + if (error != 0 && nfoundp != NULL) { + n = *nfoundp; + *nfoundp = n+1; + } + } else { + if (cansignal(cp, uc, p, signum, 0) == 0) + return(PROC_RETURNED); - if (nfoundp != NULL) { - n = *nfoundp; - *nfoundp = n+1; + if (nfoundp != NULL) { + n = *nfoundp; + *nfoundp = n+1; + } + if (signum != 0) + psignal(p, signum); } - if (signum != 0) - psignal(p, signum); return(PROC_RETURNED); - } /* @@ -1293,7 +1508,6 @@ killpg1_callback(proc_t p, void * arg) int killpg1(proc_t cp, int signum, int pgid, int all, int posix) { - proc_t p; kauth_cred_t uc; struct pgrp *pgrp; int nfound = 0; @@ -1313,24 +1527,9 @@ killpg1(proc_t cp, int signum, int pgid, int all, int posix) karg.uc = uc; karg.nfoundp = &nfound; karg.signum = signum; + karg.zombie = 1; - proc_iterate(PROC_ALLPROCLIST, killpg1_callback, &karg, killpg1_filt, (void *)&kfarg); - /* - * Signalling zombies is a no-op, but they must be counted - * among those processes which have been signalled, since - * they are still members of the process group. - */ - - proc_list_lock(); - - for (p = zombproc.lh_first; p != 0; p = p->p_list.le_next) { - if (p->p_pid <= 1 || p->p_flag & P_SYSTEM || - (!posix && p == cp) || !cansignal(cp, uc, p, signum, 1)) - continue; - nfound++; - } - - proc_list_unlock(); + proc_iterate((PROC_ALLPROCLIST | PROC_ZOMBPROCLIST), killpg1_callback, &karg, killpg1_filt, (void *)&kfarg); } else { if (pgid == 0) { @@ -1350,6 +1549,7 @@ killpg1(proc_t cp, int signum, int pgid, int all, int posix) karg.uc = uc; karg.signum = signum; karg.cp = cp; + karg.zombie = 0; /* PGRP_DROPREF drops the pgrp refernce */ @@ -1385,7 +1585,7 @@ gsignal(int pgid, int signum) static int pgsignal_filt(proc_t p, void * arg) { - int checkctty = (int)arg; + int checkctty = *(int*)arg; if ((checkctty == 0) || p->p_flag & P_CONTROLT) return(1); @@ -1397,7 +1597,7 @@ pgsignal_filt(proc_t p, void * arg) static int pgsignal_callback(proc_t p, void * arg) { - int signum = (int)arg; + int signum = *(int*)arg; psignal(p, signum); return(PROC_RETURNED); @@ -1408,7 +1608,7 @@ void pgsignal(struct pgrp *pgrp, int signum, int checkctty) { if (pgrp != PGRP_NULL) { - pgrp_iterate(pgrp, PGRP_BLOCKITERATE, pgsignal_callback, (void *)signum, pgsignal_filt, (void *)checkctty); + pgrp_iterate(pgrp, PGRP_BLOCKITERATE, pgsignal_callback, &signum, pgsignal_filt, &checkctty); } } @@ -1420,7 +1620,7 @@ tty_pgsignal(struct tty *tp, int signum, int checkctty) pg = tty_pgrp(tp); if (pg != PGRP_NULL) { - pgrp_iterate(pg, PGRP_BLOCKITERATE, pgsignal_callback, (void *)signum, pgsignal_filt, (void *)checkctty); + pgrp_iterate(pg, PGRP_BLOCKITERATE, pgsignal_callback, &signum, pgsignal_filt, &checkctty); pg_rele(pg); } } @@ -1720,12 +1920,26 @@ psignal_internal(proc_t p, task_t task, thread_t thread, int flavor, int signum) goto runlocked; if (action == KERN_SIG_WAIT) { +#if CONFIG_DTRACE + /* + * DTrace proc signal-clear returns a siginfo_t. Collect the needed info. + */ + r_uid = kauth_getruid(); /* per thread credential; protected by our thread context */ + + bzero((caddr_t)&(uth->t_dtrace_siginfo), sizeof(uth->t_dtrace_siginfo)); + + uth->t_dtrace_siginfo.si_signo = signum; + uth->t_dtrace_siginfo.si_pid = current_proc()->p_pid; + uth->t_dtrace_siginfo.si_status = W_EXITCODE(signum, 0); + uth->t_dtrace_siginfo.si_uid = r_uid; + uth->t_dtrace_siginfo.si_code = 0; +#endif uth->uu_sigwait = mask; uth->uu_siglist &= ~mask; wakeup(&uth->uu_sigwait); /* if it is SIGCONT resume whole process */ if (prop & SA_CONT) { - OSBitOrAtomic(P_CONTINUED, (UInt32 *)&sig_proc->p_flag); + OSBitOrAtomic(P_CONTINUED, &sig_proc->p_flag); sig_proc->p_contproc = current_proc()->p_pid; proc_unlock(sig_proc); @@ -1743,7 +1957,7 @@ psignal_internal(proc_t p, task_t task, thread_t thread, int flavor, int signum) * (except for SIGCONT). */ if (prop & SA_CONT) { - OSBitOrAtomic(P_CONTINUED, (UInt32 *)&sig_proc->p_flag); + OSBitOrAtomic(P_CONTINUED, &sig_proc->p_flag); proc_unlock(sig_proc); (void) task_resume(sig_task); proc_lock(sig_proc); @@ -1804,7 +2018,7 @@ psignal_internal(proc_t p, task_t task, thread_t thread, int flavor, int signum) if (sig_proc->p_stat != SSTOP) { sig_proc->p_xstat = signum; sig_proc->p_stat = SSTOP; - OSBitAndAtomic(~((uint32_t)P_CONTINUED), (UInt32 *)&sig_proc->p_flag); + OSBitAndAtomic(~((uint32_t)P_CONTINUED), &sig_proc->p_flag); sig_proc->p_lflag &= ~P_LWAITED; proc_unlock(sig_proc); @@ -1872,7 +2086,7 @@ psignal_internal(proc_t p, task_t task, thread_t thread, int flavor, int signum) * Let the process run. If it's sleeping on an * event, it remains so. */ - OSBitOrAtomic(P_CONTINUED, (UInt32 *)&sig_proc->p_flag); + OSBitOrAtomic(P_CONTINUED, &sig_proc->p_flag); sig_proc->p_contproc = sig_proc->p_pid; proc_unlock(sig_proc); @@ -2056,7 +2270,7 @@ issignal(proc_t p) p->sigwait = TRUE; p->sigwait_thread = cur_act; p->p_stat = SSTOP; - OSBitAndAtomic(~((uint32_t)P_CONTINUED), (UInt32 *)&p->p_flag); + OSBitAndAtomic(~((uint32_t)P_CONTINUED), &p->p_flag); p->p_lflag &= ~P_LWAITED; ut->uu_siglist &= ~mask; /* clear the old signal */ proc_signalend(p, 1); @@ -2096,7 +2310,7 @@ issignal(proc_t p) p->sigwait = TRUE; p->sigwait_thread = cur_act; p->p_stat = SSTOP; - OSBitAndAtomic(~((uint32_t)P_CONTINUED), (UInt32 *)&p->p_flag); + OSBitAndAtomic(~((uint32_t)P_CONTINUED), &p->p_flag); p->p_lflag &= ~P_LWAITED; ut->uu_siglist &= ~mask; /* clear the old signal */ @@ -2144,6 +2358,8 @@ issignal(proc_t p) */ proc_signalend(p, 1); proc_unlock(p); + KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_PROC, BSD_PROC_FRCEXIT) | DBG_FUNC_NONE, + p->p_pid, W_EXITCODE(0, SIGKILL), 2, 0, 0); exit1(p, W_EXITCODE(0, SIGKILL), (int *)NULL); return(0); } @@ -2428,7 +2644,7 @@ CURSIG(proc_t p) static void stop(proc_t p, proc_t parent) { - OSBitAndAtomic(~((uint32_t)P_CONTINUED), (UInt32 *)&p->p_flag); + OSBitAndAtomic(~((uint32_t)P_CONTINUED), &p->p_flag); if ((parent != PROC_NULL) && (parent->p_stat != SSTOP)) { proc_list_lock(); wakeup((caddr_t)parent); @@ -2447,7 +2663,7 @@ postsig(int signum) proc_t p = current_proc(); struct sigacts *ps = p->p_sigacts; user_addr_t catcher; - u_long code; + uint32_t code; int mask, returnmask; struct uthread * ut; @@ -2481,9 +2697,6 @@ postsig(int signum) * Default catcher, where the default is to kill * the process. (Other cases were ignored above.) */ - siginfo_t sinfo; - bzero((caddr_t)&sinfo, sizeof(siginfo_t)); - sig_lock_to_exit(p); p->p_acflag |= AXSIG; if (sigprop[signum] & SA_CORE) { @@ -2497,14 +2710,20 @@ postsig(int signum) proc_unlock(p); } - sinfo.si_signo = signum; - sinfo.si_pid = p->si_pid; - sinfo.si_uid = p->si_uid; - sinfo.si_status = WEXITSTATUS(p->si_status); +#if CONFIG_DTRACE + bzero((caddr_t)&(ut->t_dtrace_siginfo), sizeof(ut->t_dtrace_siginfo)); + + ut->t_dtrace_siginfo.si_signo = signum; + ut->t_dtrace_siginfo.si_pid = p->si_pid; + ut->t_dtrace_siginfo.si_uid = p->si_uid; + ut->t_dtrace_siginfo.si_status = WEXITSTATUS(p->si_status); - DTRACE_PROC3(signal__handle, int, signum, siginfo_t *, &sinfo, + DTRACE_PROC3(signal__handle, int, signum, siginfo_t *, &(ut->t_dtrace_siginfo), void (*)(void), SIG_DFL); +#endif + KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_PROC, BSD_PROC_FRCEXIT) | DBG_FUNC_NONE, + p->p_pid, W_EXITCODE(0, signum), 3, 0, 0); exit1(p, W_EXITCODE(0, signum), (int *)NULL); return; } else { @@ -2555,7 +2774,7 @@ postsig(int signum) code = ps->ps_code; ps->ps_code = 0; } - OSIncrementAtomic(&p->p_stats->p_ru.ru_nsignals); + OSIncrementAtomicLong(&p->p_stats->p_ru.ru_nsignals); sendsig(p, catcher, signum, returnmask, code); } proc_signalend(p, 1); @@ -2628,6 +2847,29 @@ filt_signal(struct knote *kn, long hint) return (kn->kn_data != 0); } +static void +filt_signaltouch(struct knote *kn, struct kevent64_s *kev, long type) +{ + proc_klist_lock(); + switch (type) { + case EVENT_REGISTER: + kn->kn_sfflags = kev->fflags; + kn->kn_sdata = kev->data; + break; + case EVENT_PROCESS: + *kev = kn->kn_kevent; + if (kn->kn_flags & EV_CLEAR) { + kn->kn_data = 0; + kn->kn_fflags = 0; + } + break; + default: + panic("filt_machporttouch() - invalid type (%ld)", type); + break; + } + proc_klist_unlock(); +} + void bsd_ast(thread_t thread) { @@ -2643,7 +2885,7 @@ bsd_ast(thread_t thread) if ((p->p_flag & P_OWEUPC) && (p->p_flag & P_PROFIL)) { pc = get_useraddr(); addupc_task(p, pc, 1); - OSBitAndAtomic(~((uint32_t)P_OWEUPC), (UInt32 *)&p->p_flag); + OSBitAndAtomic(~((uint32_t)P_OWEUPC), &p->p_flag); } if (timerisset(&p->p_vtimer_user.it_value)) { @@ -2658,7 +2900,7 @@ bsd_ast(thread_t thread) task_vtimer_clear(p->task, TASK_VTIMER_USER); psignal(p, SIGVTALRM); - } + } } if (timerisset(&p->p_vtimer_prof.it_value)) { @@ -2674,7 +2916,7 @@ bsd_ast(thread_t thread) psignal(p, SIGPROF); } -} + } if (timerisset(&p->p_rlim_cpu)) { struct timeval tv; @@ -2697,6 +2939,18 @@ bsd_ast(thread_t thread) } } +#if CONFIG_DTRACE + if (ut->t_dtrace_sig) { + uint8_t dt_action_sig = ut->t_dtrace_sig; + ut->t_dtrace_sig = 0; + psignal(p, dt_action_sig); + } + if (ut->t_dtrace_stop) { + ut->t_dtrace_stop = 0; + psignal(p, SIGSTOP); + } +#endif /* CONFIG_DTRACE */ + if (CHECK_SIGNALS(p, current_thread(), ut)) { while ( (signum = issignal(p)) ) postsig(signum); diff --git a/bsd/kern/kern_subr.c b/bsd/kern/kern_subr.c index 711dc5fe7..cc05a7db7 100644 --- a/bsd/kern/kern_subr.c +++ b/bsd/kern/kern_subr.c @@ -87,6 +87,19 @@ static uint32_t uio_t_count = 0; #endif /* DEBUG */ +#define IS_VALID_UIO_SEGFLG(segflg) \ + ( (segflg) == UIO_USERSPACE || \ + (segflg) == UIO_SYSSPACE || \ + (segflg) == UIO_USERSPACE32 || \ + (segflg) == UIO_USERSPACE64 || \ + (segflg) == UIO_SYSSPACE32 || \ + (segflg) == UIO_USERISPACE || \ + (segflg) == UIO_PHYS_USERSPACE || \ + (segflg) == UIO_PHYS_SYSSPACE || \ + (segflg) == UIO_USERISPACE32 || \ + (segflg) == UIO_PHYS_USERSPACE32 || \ + (segflg) == UIO_USERISPACE64 || \ + (segflg) == UIO_PHYS_USERSPACE64 ) /* * Returns: 0 Success @@ -98,7 +111,7 @@ static uint32_t uio_t_count = 0; int uiomove(const char * cp, int n, uio_t uio) { - return uiomove64((const addr64_t)((const unsigned int)cp), n, uio); + return uiomove64((const addr64_t)(uintptr_t)cp, n, uio); } /* @@ -109,7 +122,6 @@ uiomove(const char * cp, int n, uio_t uio) * copywithin:EFAULT * copypv:EFAULT */ - // LP64todo - fix this! 'n' should be int64_t? int uiomove64(const addr64_t c_cp, int n, struct uio *uio) { @@ -133,10 +145,9 @@ uiomove64(const addr64_t c_cp, int n, struct uio *uio) #endif /* LP64_DEBUG */ while (n > 0 && uio_resid(uio)) { - acnt = uio_iov_len(uio); + uio_update(uio, 0); + acnt = uio_curriovlen(uio); if (acnt == 0) { - uio_next_iov(uio); - uio->uio_iovcnt--; continue; } if (n > 0 && acnt > (uint64_t)n) @@ -146,54 +157,30 @@ uiomove64(const addr64_t c_cp, int n, struct uio *uio) case UIO_USERSPACE64: case UIO_USERISPACE64: - // LP64 - 3rd argument in debug code is 64 bit, expected to be 32 bit - if (uio->uio_rw == UIO_READ) - { - KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, DBG_UIO_COPYOUT)) | DBG_FUNC_START, - (int)cp, (int)uio->uio_iovs.iov64p->iov_base, acnt, 0,0); - - error = copyout( CAST_DOWN(caddr_t, cp), uio->uio_iovs.iov64p->iov_base, acnt ); - - KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, DBG_UIO_COPYOUT)) | DBG_FUNC_END, - (int)cp, (int)uio->uio_iovs.iov64p->iov_base, acnt, 0,0); - } - else - { - KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, DBG_UIO_COPYIN)) | DBG_FUNC_START, - (int)uio->uio_iovs.iov64p->iov_base, (int)cp, acnt, 0,0); - - error = copyin(uio->uio_iovs.iov64p->iov_base, CAST_DOWN(caddr_t, cp), acnt); - - KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, DBG_UIO_COPYIN)) | DBG_FUNC_END, - (int)uio->uio_iovs.iov64p->iov_base, (int)cp, acnt, 0,0); - } - if (error) - return (error); - break; - case UIO_USERSPACE32: case UIO_USERISPACE32: case UIO_USERSPACE: case UIO_USERISPACE: + // LP64 - 3rd argument in debug code is 64 bit, expected to be 32 bit if (uio->uio_rw == UIO_READ) { KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, DBG_UIO_COPYOUT)) | DBG_FUNC_START, - (int)cp, (int)uio->uio_iovs.iov32p->iov_base, acnt, 0,0); + (int)cp, (uintptr_t)uio->uio_iovs.uiovp->iov_base, acnt, 0,0); - error = copyout( CAST_DOWN(caddr_t, cp), CAST_USER_ADDR_T(uio->uio_iovs.iov32p->iov_base), acnt ); + error = copyout( CAST_DOWN(caddr_t, cp), uio->uio_iovs.uiovp->iov_base, acnt ); KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, DBG_UIO_COPYOUT)) | DBG_FUNC_END, - (int)cp, (int)uio->uio_iovs.iov32p->iov_base, acnt, 0,0); + (int)cp, (uintptr_t)uio->uio_iovs.uiovp->iov_base, acnt, 0,0); } else { KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, DBG_UIO_COPYIN)) | DBG_FUNC_START, - (int)uio->uio_iovs.iov32p->iov_base, (int)cp, acnt, 0,0); + (uintptr_t)uio->uio_iovs.uiovp->iov_base, (int)cp, acnt, 0,0); - error = copyin(CAST_USER_ADDR_T(uio->uio_iovs.iov32p->iov_base), CAST_DOWN(caddr_t, cp), acnt); + error = copyin(uio->uio_iovs.uiovp->iov_base, CAST_DOWN(caddr_t, cp), acnt); KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, DBG_UIO_COPYIN)) | DBG_FUNC_END, - (int)uio->uio_iovs.iov32p->iov_base, (int)cp, acnt, 0,0); + (uintptr_t)uio->uio_iovs.uiovp->iov_base, (int)cp, acnt, 0,0); } if (error) return (error); @@ -202,97 +189,68 @@ uiomove64(const addr64_t c_cp, int n, struct uio *uio) case UIO_SYSSPACE32: case UIO_SYSSPACE: if (uio->uio_rw == UIO_READ) - error = copywithin(CAST_DOWN(caddr_t, cp), (caddr_t)uio->uio_iovs.iov32p->iov_base, + error = copywithin(CAST_DOWN(caddr_t, cp), CAST_DOWN(caddr_t, uio->uio_iovs.kiovp->iov_base), acnt); else - error = copywithin((caddr_t)uio->uio_iovs.iov32p->iov_base, CAST_DOWN(caddr_t, cp), + error = copywithin(CAST_DOWN(caddr_t, uio->uio_iovs.kiovp->iov_base), CAST_DOWN(caddr_t, cp), acnt); break; case UIO_PHYS_USERSPACE64: - if (uio->uio_rw == UIO_READ) - { - KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, DBG_UIO_COPYOUT)) | DBG_FUNC_START, - (int)cp, (int)uio->uio_iovs.iov64p->iov_base, acnt, 1,0); - - error = copypv((addr64_t)cp, uio->uio_iovs.iov64p->iov_base, acnt, cppvPsrc | cppvNoRefSrc); - if (error) /* Copy physical to virtual */ - error = EFAULT; - - KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, DBG_UIO_COPYOUT)) | DBG_FUNC_END, - (int)cp, (int)uio->uio_iovs.iov64p->iov_base, acnt, 1,0); - } - else - { - KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, DBG_UIO_COPYIN)) | DBG_FUNC_START, - (int)uio->uio_iovs.iov64p->iov_base, (int)cp, acnt, 1,0); - - error = copypv(uio->uio_iovs.iov64p->iov_base, (addr64_t)cp, acnt, cppvPsnk | cppvNoRefSrc | cppvNoModSnk); - if (error) /* Copy virtual to physical */ - error = EFAULT; - - KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, DBG_UIO_COPYIN)) | DBG_FUNC_END, - (int)uio->uio_iovs.iov64p->iov_base, (int)cp, acnt, 1,0); - } - if (error) - return (error); - break; - case UIO_PHYS_USERSPACE32: case UIO_PHYS_USERSPACE: if (uio->uio_rw == UIO_READ) { KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, DBG_UIO_COPYOUT)) | DBG_FUNC_START, - (int)cp, (int)uio->uio_iovs.iov32p->iov_base, acnt, 1,0); + (int)cp, (uintptr_t)uio->uio_iovs.uiovp->iov_base, acnt, 1,0); - error = copypv((addr64_t)cp, (addr64_t)uio->uio_iovs.iov32p->iov_base, acnt, cppvPsrc | cppvNoRefSrc); + error = copypv((addr64_t)cp, uio->uio_iovs.uiovp->iov_base, acnt, cppvPsrc | cppvNoRefSrc); if (error) /* Copy physical to virtual */ error = EFAULT; KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, DBG_UIO_COPYOUT)) | DBG_FUNC_END, - (int)cp, (int)uio->uio_iovs.iov32p->iov_base, acnt, 1,0); + (int)cp, (uintptr_t)uio->uio_iovs.uiovp->iov_base, acnt, 1,0); } else { KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, DBG_UIO_COPYIN)) | DBG_FUNC_START, - (int)uio->uio_iovs.iov32p->iov_base, (int)cp, acnt, 1,0); + (uintptr_t)uio->uio_iovs.uiovp->iov_base, (int)cp, acnt, 1,0); - error = copypv((addr64_t)uio->uio_iovs.iov32p->iov_base, (addr64_t)cp, acnt, cppvPsnk | cppvNoRefSrc | cppvNoModSnk); + error = copypv(uio->uio_iovs.uiovp->iov_base, (addr64_t)cp, acnt, cppvPsnk | cppvNoRefSrc | cppvNoModSnk); if (error) /* Copy virtual to physical */ error = EFAULT; KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, DBG_UIO_COPYIN)) | DBG_FUNC_END, - (int)uio->uio_iovs.iov32p->iov_base, (int)cp, acnt, 1,0); + (uintptr_t)uio->uio_iovs.uiovp->iov_base, (int)cp, acnt, 1,0); } if (error) return (error); break; - case UIO_PHYS_SYSSPACE32: case UIO_PHYS_SYSSPACE: if (uio->uio_rw == UIO_READ) { KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, DBG_UIO_COPYOUT)) | DBG_FUNC_START, - (int)cp, (int)uio->uio_iovs.iov32p->iov_base, acnt, 2,0); + (int)cp, (uintptr_t)uio->uio_iovs.kiovp->iov_base, acnt, 2,0); - error = copypv((addr64_t)cp, uio->uio_iovs.iov32p->iov_base, acnt, cppvKmap | cppvPsrc | cppvNoRefSrc); + error = copypv((addr64_t)cp, uio->uio_iovs.kiovp->iov_base, acnt, cppvKmap | cppvPsrc | cppvNoRefSrc); if (error) /* Copy physical to virtual */ error = EFAULT; KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, DBG_UIO_COPYOUT)) | DBG_FUNC_END, - (int)cp, (int)uio->uio_iovs.iov32p->iov_base, acnt, 2,0); + (int)cp, (uintptr_t)uio->uio_iovs.kiovp->iov_base, acnt, 2,0); } else { KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, DBG_UIO_COPYIN)) | DBG_FUNC_START, - (int)uio->uio_iovs.iov32p->iov_base, (int)cp, acnt, 2,0); + (uintptr_t)uio->uio_iovs.kiovp->iov_base, (int)cp, acnt, 2,0); - error = copypv(uio->uio_iovs.iov32p->iov_base, (addr64_t)cp, acnt, cppvKmap | cppvPsnk | cppvNoRefSrc | cppvNoModSnk); + error = copypv(uio->uio_iovs.kiovp->iov_base, (addr64_t)cp, acnt, cppvKmap | cppvPsnk | cppvNoRefSrc | cppvNoModSnk); if (error) /* Copy virtual to physical */ error = EFAULT; KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, DBG_UIO_COPYIN)) | DBG_FUNC_END, - (int)uio->uio_iovs.iov32p->iov_base, (int)cp, acnt, 2,0); + (uintptr_t)uio->uio_iovs.kiovp->iov_base, (int)cp, acnt, 2,0); } if (error) return (error); @@ -301,15 +259,7 @@ uiomove64(const addr64_t c_cp, int n, struct uio *uio) default: break; } - uio_iov_base_add(uio, acnt); -#if LP64KERN - uio_iov_len_add(uio, -((int64_t)acnt)); - uio_setresid(uio, (uio_resid(uio) - ((int64_t)acnt))); -#else - uio_iov_len_add(uio, -((int)acnt)); - uio_setresid(uio, (uio_resid(uio) - ((int)acnt))); -#endif - uio->uio_offset += acnt; + uio_update(uio, acnt); cp += acnt; n -= acnt; } @@ -324,105 +274,35 @@ ureadc(int c, struct uio *uio) { if (uio_resid(uio) <= 0) panic("ureadc: non-positive resid"); -again: + uio_update(uio, 0); if (uio->uio_iovcnt == 0) panic("ureadc: non-positive iovcnt"); - if (uio_iov_len(uio) <= 0) { - uio->uio_iovcnt--; - uio_next_iov(uio); - goto again; - } + if (uio_curriovlen(uio) <= 0) + panic("ureadc: non-positive iovlen"); + switch (uio->uio_segflg) { case UIO_USERSPACE32: case UIO_USERSPACE: - if (subyte(CAST_USER_ADDR_T(uio->uio_iovs.iov32p->iov_base), c) < 0) - return (EFAULT); - break; - - case UIO_USERSPACE64: - if (subyte((user_addr_t)uio->uio_iovs.iov64p->iov_base, c) < 0) - return (EFAULT); - break; - - case UIO_SYSSPACE32: - case UIO_SYSSPACE: - *((caddr_t)uio->uio_iovs.iov32p->iov_base) = c; - break; - case UIO_USERISPACE32: case UIO_USERISPACE: - if (suibyte(CAST_USER_ADDR_T(uio->uio_iovs.iov32p->iov_base), c) < 0) - return (EFAULT); - break; - - default: - break; - } - uio_iov_base_add(uio, 1); - uio_iov_len_add(uio, -1); - uio_setresid(uio, (uio_resid(uio) - 1)); - uio->uio_offset++; - return (0); -} - -#if defined(vax) || defined(ppc) -/* unused except by ct.c, other oddities XXX */ -/* - * Get next character written in by user from uio. - */ -int -uwritec(uio_t uio) -{ - int c = 0; - - if (uio_resid(uio) <= 0) - return (-1); -again: - if (uio->uio_iovcnt <= 0) - panic("uwritec: non-positive iovcnt"); - - if (uio_iov_len(uio) == 0) { - uio_next_iov(uio); - if (--uio->uio_iovcnt == 0) - return (-1); - goto again; - } - switch (uio->uio_segflg) { - - case UIO_USERSPACE32: - case UIO_USERSPACE: - c = fubyte(CAST_USER_ADDR_T(uio->uio_iovs.iov32p->iov_base)); - break; - case UIO_USERSPACE64: - c = fubyte((user_addr_t)uio->uio_iovs.iov64p->iov_base); + case UIO_USERISPACE64: + if (subyte((user_addr_t)uio->uio_iovs.uiovp->iov_base, c) < 0) + return (EFAULT); break; case UIO_SYSSPACE32: case UIO_SYSSPACE: - c = *((caddr_t)uio->uio_iovs.iov32p->iov_base) & 0377; - break; - - case UIO_USERISPACE32: - case UIO_USERISPACE: - c = fuibyte(CAST_USER_ADDR_T(uio->uio_iovs.iov32p->iov_base)); + *(CAST_DOWN(caddr_t, uio->uio_iovs.kiovp->iov_base)) = c; break; default: - c = 0; /* avoid uninitialized variable warning */ - panic("uwritec: bogus uio_segflg"); break; } - if (c < 0) - return (-1); - uio_iov_base_add(uio, 1); - uio_iov_len_add(uio, -1); - uio_setresid(uio, (uio_resid(uio) - 1)); - uio->uio_offset++; - return (c); + uio_update(uio, 1); + return (0); } -#endif /* vax || ppc */ /* * General routine to allocate a hash table. @@ -440,7 +320,7 @@ hashinit(int elements, int type, u_long *hashmask) continue; hashsize >>= 1; MALLOC(hashtbl, struct generic *, - (u_long)hashsize * sizeof(*hashtbl), type, M_WAITOK|M_ZERO); + hashsize * sizeof(*hashtbl), type, M_WAITOK|M_ZERO); if (hashtbl != NULL) { for (i = 0; i < hashsize; i++) LIST_INIT(&hashtbl[i]); @@ -468,14 +348,7 @@ user_ssize_t uio_resid( uio_t a_uio ) return( 0 ); } - if (UIO_IS_64_BIT_SPACE(a_uio)) { -#if 1 // LP64todo - remove this temp workaround once we go live with uio KPI - return( (user_ssize_t)a_uio->uio_resid ); -#else - return( a_uio->uio_resid_64 ); -#endif - } - return( (user_ssize_t)a_uio->uio_resid ); + return( a_uio->uio_resid_64 ); } /* @@ -496,16 +369,7 @@ void uio_setresid( uio_t a_uio, user_ssize_t a_value ) return; } - if (UIO_IS_64_BIT_SPACE(a_uio)) { -#if 1 // LP64todo - remove this temp workaround once we go live with uio KPI - a_uio->uio_resid = (int)a_value; -#else - a_uio->uio_resid_64 = a_value; -#endif - } - else { - a_uio->uio_resid = (int)a_value; - } + a_uio->uio_resid_64 = a_value; return; } @@ -525,10 +389,10 @@ user_addr_t uio_curriovbase( uio_t a_uio ) return(0); } - if (UIO_IS_64_BIT_SPACE(a_uio)) { + if (UIO_IS_USER_SPACE(a_uio)) { return(a_uio->uio_iovs.uiovp->iov_base); } - return((user_addr_t)((uintptr_t)a_uio->uio_iovs.kiovp->iov_base)); + return((user_addr_t)a_uio->uio_iovs.kiovp->iov_base); } @@ -548,7 +412,7 @@ user_size_t uio_curriovlen( uio_t a_uio ) return(0); } - if (UIO_IS_64_BIT_SPACE(a_uio)) { + if (UIO_IS_USER_SPACE(a_uio)) { return(a_uio->uio_iovs.uiovp->iov_len); } return((user_size_t)a_uio->uio_iovs.kiovp->iov_len); @@ -570,7 +434,7 @@ __private_extern__ void uio_setcurriovlen( uio_t a_uio, user_size_t a_value ) return; } - if (UIO_IS_64_BIT_SPACE(a_uio)) { + if (UIO_IS_USER_SPACE(a_uio)) { a_uio->uio_iovs.uiovp->iov_len = a_value; } else { @@ -710,7 +574,7 @@ uio_t uio_create( int a_iovcount, /* number of iovecs */ int a_iodirection ) /* read or write flag */ { void * my_buf_p; - int my_size; + size_t my_size; uio_t my_uio; my_size = UIO_SIZEOF(a_iovcount); @@ -746,12 +610,12 @@ __private_extern__ uio_t int a_spacetype, /* type of address space */ int a_iodirection, /* read or write flag */ void *a_buf_p, /* pointer to a uio_t buffer */ - int a_buffer_size ) /* size of uio_t buffer */ + size_t a_buffer_size ) /* size of uio_t buffer */ { uio_t my_uio = (uio_t) a_buf_p; - int my_size; + size_t my_size; - my_size = sizeof(struct uio) + (sizeof(struct user_iovec) * a_iovcount); + my_size = UIO_SIZEOF(a_iovcount); if (a_buffer_size < my_size) { #if DEBUG panic("%s :%d - a_buffer_size is too small\n", __FILE__, __LINE__); @@ -778,17 +642,23 @@ __private_extern__ uio_t bzero(my_uio, my_size); my_uio->uio_size = my_size; - /* we use uio_segflg to indicate if the uio_t is the new format or */ - /* old (pre LP64 support) legacy format */ + /* + * we use uio_segflg to indicate if the uio_t is the new format or + * old (pre LP64 support) legacy format + * This switch statement should canonicalize incoming space type + * to one of UIO_USERSPACE32/64, UIO_PHYS_USERSPACE32/64, or + * UIO_SYSSPACE/UIO_PHYS_SYSSPACE + */ switch (a_spacetype) { case UIO_USERSPACE: my_uio->uio_segflg = UIO_USERSPACE32; - case UIO_SYSSPACE: - my_uio->uio_segflg = UIO_SYSSPACE32; + break; + case UIO_SYSSPACE32: + my_uio->uio_segflg = UIO_SYSSPACE; + break; case UIO_PHYS_USERSPACE: my_uio->uio_segflg = UIO_PHYS_USERSPACE32; - case UIO_PHYS_SYSSPACE: - my_uio->uio_segflg = UIO_PHYS_SYSSPACE32; + break; default: my_uio->uio_segflg = a_spacetype; break; @@ -813,7 +683,7 @@ __private_extern__ uio_t /* * uio_spacetype - return the address space type for the given uio_t */ -int uio_spacetype( uio_t a_uio ) +__private_extern__ int uio_spacetype( uio_t a_uio ) { if (a_uio == NULL) { #if LP64_DEBUG @@ -832,7 +702,7 @@ int uio_spacetype( uio_t a_uio ) * which will increase as the IO is completed and is NOT embedded within the * uio, it is a seperate array of one or more iovecs. */ -struct user_iovec * uio_iovsaddr( uio_t a_uio ) +__private_extern__ struct user_iovec * uio_iovsaddr( uio_t a_uio ) { struct user_iovec * my_addr; @@ -840,12 +710,15 @@ struct user_iovec * uio_iovsaddr( uio_t a_uio ) return(NULL); } - if (a_uio->uio_segflg == UIO_USERSPACE || a_uio->uio_segflg == UIO_SYSSPACE) { + if (UIO_SEG_IS_USER_SPACE(a_uio->uio_segflg)) { /* we need this for compatibility mode. */ - my_addr = (struct user_iovec *) a_uio->uio_iovs.iovp; + my_addr = (struct user_iovec *) a_uio->uio_iovs.uiovp; } else { - my_addr = (struct user_iovec *) (((uint8_t *)a_uio) + sizeof(struct uio)); +#if DEBUG + panic("uio_iovsaddr called for UIO_SYSSPACE request"); +#endif + my_addr = 0; } return(my_addr); } @@ -887,7 +760,29 @@ void uio_reset( uio_t a_uio, my_max_iovs = a_uio->uio_max_iovs; bzero(a_uio, my_size); a_uio->uio_size = my_size; - a_uio->uio_segflg = a_spacetype; + + /* + * we use uio_segflg to indicate if the uio_t is the new format or + * old (pre LP64 support) legacy format + * This switch statement should canonicalize incoming space type + * to one of UIO_USERSPACE32/64, UIO_PHYS_USERSPACE32/64, or + * UIO_SYSSPACE/UIO_PHYS_SYSSPACE + */ + switch (a_spacetype) { + case UIO_USERSPACE: + a_uio->uio_segflg = UIO_USERSPACE32; + break; + case UIO_SYSSPACE32: + a_uio->uio_segflg = UIO_SYSSPACE; + break; + case UIO_PHYS_USERSPACE: + a_uio->uio_segflg = UIO_PHYS_USERSPACE32; + break; + default: + a_uio->uio_segflg = a_spacetype; + break; + } + if (my_max_iovs > 0) { a_uio->uio_iovs.uiovp = (struct user_iovec *) (((uint8_t *)a_uio) + sizeof(struct uio)); @@ -895,6 +790,7 @@ void uio_reset( uio_t a_uio, else { a_uio->uio_iovs.uiovp = NULL; } + a_uio->uio_max_iovs = my_max_iovs; a_uio->uio_offset = a_offset; a_uio->uio_rw = a_iodirection; @@ -943,17 +839,13 @@ int uio_addiov( uio_t a_uio, user_addr_t a_baseaddr, user_size_t a_length ) return(-1); } - if (UIO_IS_64_BIT_SPACE(a_uio)) { + if (UIO_IS_USER_SPACE(a_uio)) { for ( i = 0; i < a_uio->uio_max_iovs; i++ ) { if (a_uio->uio_iovs.uiovp[i].iov_len == 0 && a_uio->uio_iovs.uiovp[i].iov_base == 0) { a_uio->uio_iovs.uiovp[i].iov_len = a_length; a_uio->uio_iovs.uiovp[i].iov_base = a_baseaddr; a_uio->uio_iovcnt++; -#if 1 // LP64todo - remove this temp workaround once we go live with uio KPI - a_uio->uio_resid += a_length; -#else a_uio->uio_resid_64 += a_length; -#endif return( 0 ); } } @@ -961,10 +853,10 @@ int uio_addiov( uio_t a_uio, user_addr_t a_baseaddr, user_size_t a_length ) else { for ( i = 0; i < a_uio->uio_max_iovs; i++ ) { if (a_uio->uio_iovs.kiovp[i].iov_len == 0 && a_uio->uio_iovs.kiovp[i].iov_base == 0) { - a_uio->uio_iovs.kiovp[i].iov_len = (u_int32_t)a_length; - a_uio->uio_iovs.kiovp[i].iov_base = (u_int32_t)((uintptr_t)a_baseaddr); + a_uio->uio_iovs.kiovp[i].iov_len = (u_int64_t)a_length; + a_uio->uio_iovs.kiovp[i].iov_base = (u_int64_t)a_baseaddr; a_uio->uio_iovcnt++; - a_uio->uio_resid += a_length; + a_uio->uio_resid_64 += a_length; return( 0 ); } } @@ -995,7 +887,7 @@ int uio_getiov( uio_t a_uio, return(-1); } - if (UIO_IS_64_BIT_SPACE(a_uio)) { + if (UIO_IS_USER_SPACE(a_uio)) { if (a_baseaddr_p != NULL) { *a_baseaddr_p = a_uio->uio_iovs.uiovp[a_index].iov_base; } @@ -1031,19 +923,11 @@ __private_extern__ void uio_calculateresid( uio_t a_uio ) } a_uio->uio_iovcnt = a_uio->uio_max_iovs; - if (UIO_IS_64_BIT_SPACE(a_uio)) { -#if 1 // LP64todo - remove this temp workaround once we go live with uio KPI - a_uio->uio_resid = 0; -#else + if (UIO_IS_USER_SPACE(a_uio)) { a_uio->uio_resid_64 = 0; -#endif for ( i = 0; i < a_uio->uio_max_iovs; i++ ) { if (a_uio->uio_iovs.uiovp[i].iov_len != 0 && a_uio->uio_iovs.uiovp[i].iov_base != 0) { -#if 1 // LP64todo - remove this temp workaround once we go live with uio KPI - a_uio->uio_resid += a_uio->uio_iovs.uiovp[i].iov_len; -#else a_uio->uio_resid_64 += a_uio->uio_iovs.uiovp[i].iov_len; -#endif } } @@ -1056,10 +940,10 @@ __private_extern__ void uio_calculateresid( uio_t a_uio ) } } else { - a_uio->uio_resid = 0; + a_uio->uio_resid_64 = 0; for ( i = 0; i < a_uio->uio_max_iovs; i++ ) { if (a_uio->uio_iovs.kiovp[i].iov_len != 0 && a_uio->uio_iovs.kiovp[i].iov_base != 0) { - a_uio->uio_resid += a_uio->uio_iovs.kiovp[i].iov_len; + a_uio->uio_resid_64 += a_uio->uio_iovs.kiovp[i].iov_len; } } @@ -1099,7 +983,7 @@ void uio_update( uio_t a_uio, user_size_t a_count ) return; } - if (UIO_IS_64_BIT_SPACE(a_uio)) { + if (UIO_IS_USER_SPACE(a_uio)) { /* * if a_count == 0, then we are asking to skip over * any empty iovs @@ -1110,34 +994,20 @@ void uio_update( uio_t a_uio, user_size_t a_count ) a_uio->uio_iovs.uiovp->iov_len = 0; } else { - a_uio->uio_iovs.uiovp->iov_base += a_count; + a_uio->uio_iovs.uiovp->iov_base += a_count; a_uio->uio_iovs.uiovp->iov_len -= a_count; } -#if 1 // LP64todo - remove this temp workaround once we go live with uio KPI - if (a_uio->uio_resid < 0) { - a_uio->uio_resid = 0; - } - if (a_count > (user_size_t)a_uio->uio_resid) { - a_uio->uio_offset += a_uio->uio_resid; - a_uio->uio_resid = 0; - } - else { - a_uio->uio_offset += a_count; - a_uio->uio_resid -= a_count; - } -#else if (a_uio->uio_resid_64 < 0) { - a_uio->uio_resid_64 = 0; + a_uio->uio_resid_64 = 0; } if (a_count > (user_size_t)a_uio->uio_resid_64) { - a_uio->uio_offset += a_uio->uio_resid_64; + a_uio->uio_offset += a_uio->uio_resid_64; a_uio->uio_resid_64 = 0; } else { - a_uio->uio_offset += a_count; + a_uio->uio_offset += a_count; a_uio->uio_resid_64 -= a_count; } -#endif // LP64todo } /* * advance to next iovec if current one is totally consumed @@ -1163,16 +1033,16 @@ void uio_update( uio_t a_uio, user_size_t a_count ) a_uio->uio_iovs.kiovp->iov_base += a_count; a_uio->uio_iovs.kiovp->iov_len -= a_count; } - if (a_uio->uio_resid < 0) { - a_uio->uio_resid = 0; + if (a_uio->uio_resid_64 < 0) { + a_uio->uio_resid_64 = 0; } - if (a_count > (user_size_t)a_uio->uio_resid) { - a_uio->uio_offset += a_uio->uio_resid; - a_uio->uio_resid = 0; + if (a_count > (user_size_t)a_uio->uio_resid_64) { + a_uio->uio_offset += a_uio->uio_resid_64; + a_uio->uio_resid_64 = 0; } else { - a_uio->uio_offset += a_count; - a_uio->uio_resid -= a_count; + a_uio->uio_offset += a_count; + a_uio->uio_resid_64 -= a_count; } } /* @@ -1188,6 +1058,46 @@ void uio_update( uio_t a_uio, user_size_t a_count ) return; } +/* + * uio_pushback - undo uncommitted I/O by subtracting from the + * current base address and offset, and incrementing the residiual + * IO. If the UIO was previously exhausted, this call will panic. + * New code should not use this functionality. + */ +__private_extern__ void uio_pushback( uio_t a_uio, user_size_t a_count ) +{ +#if LP64_DEBUG + if (a_uio == NULL) { + panic("%s :%d - invalid uio_t\n", __FILE__, __LINE__); + } + if (UIO_IS_32_BIT_SPACE(a_uio) && a_count > 0xFFFFFFFFull) { + panic("%s :%d - invalid count value \n", __FILE__, __LINE__); + } +#endif /* LP64_DEBUG */ + + if (a_uio == NULL || a_count == 0) { + return; + } + + if (a_uio->uio_iovcnt < 1) { + panic("Invalid uio for pushback"); + } + + if (UIO_IS_USER_SPACE(a_uio)) { + a_uio->uio_iovs.uiovp->iov_base -= a_count; + a_uio->uio_iovs.uiovp->iov_len += a_count; + } + else { + a_uio->uio_iovs.kiovp->iov_base -= a_count; + a_uio->uio_iovs.kiovp->iov_len += a_count; + } + + a_uio->uio_offset -= a_count; + a_uio->uio_resid_64 += a_count; + + return; +} + /* * uio_duplicate - allocate a new uio and make a copy of the given uio_t. @@ -1216,7 +1126,7 @@ uio_t uio_duplicate( uio_t a_uio ) /* advance to first nonzero iovec */ if (my_uio->uio_iovcnt > 0) { for ( i = 0; i < my_uio->uio_max_iovs; i++ ) { - if (UIO_IS_64_BIT_SPACE(a_uio)) { + if (UIO_IS_USER_SPACE(a_uio)) { if (my_uio->uio_iovs.uiovp->iov_len != 0) { break; } @@ -1237,3 +1147,29 @@ uio_t uio_duplicate( uio_t a_uio ) return(my_uio); } +int copyin_user_iovec_array(user_addr_t uaddr, int spacetype, int count, struct user_iovec *dst) +{ + size_t size_of_iovec = ( spacetype == UIO_USERSPACE64 ? sizeof(struct user64_iovec) : sizeof(struct user32_iovec)); + int error; + int i; + + // copyin to the front of "dst", without regard for putting records in the right places + error = copyin(uaddr, dst, count * size_of_iovec); + if (error) + return (error); + + // now, unpack the entries in reverse order, so we don't overwrite anything + for (i = count - 1; i >= 0; i--) { + if (spacetype == UIO_USERSPACE64) { + struct user64_iovec iovec = ((struct user64_iovec *)dst)[i]; + dst[i].iov_base = iovec.iov_base; + dst[i].iov_len = iovec.iov_len; + } else { + struct user32_iovec iovec = ((struct user32_iovec *)dst)[i]; + dst[i].iov_base = iovec.iov_base; + dst[i].iov_len = iovec.iov_len; + } + } + + return (0); +} diff --git a/bsd/kern/kern_symfile.c b/bsd/kern/kern_symfile.c index 98e4e7771..43638af75 100644 --- a/bsd/kern/kern_symfile.c +++ b/bsd/kern/kern_symfile.c @@ -79,7 +79,7 @@ struct kern_direct_file_io_ref_t static int file_ioctl(void * p1, void * p2, int theIoctl, caddr_t result) { - dev_t device = (dev_t) p1; + dev_t device = *(dev_t*) p1; return ((*bdevsw[major(device)].d_ioctl) (device, theIoctl, result, S_IFBLK, p2)); @@ -149,7 +149,7 @@ kern_open_file_for_direct_io(const char * name, goto out; device = va.va_fsid; - p1 = (void *) device; + p1 = &device; p2 = p; do_ioctl = &file_ioctl; } @@ -279,7 +279,7 @@ kern_write_file(struct kern_direct_file_io_ref_t * ref, off_t offset, caddr_t ad { return (vn_rdwr(UIO_WRITE, ref->vp, addr, len, offset, - UIO_SYSSPACE32, IO_SYNC|IO_NODELOCKED|IO_UNIT, + UIO_SYSSPACE, IO_SYNC|IO_NODELOCKED|IO_UNIT, vfs_context_ucred(ref->ctx), (int *) 0, vfs_context_proc(ref->ctx))); } diff --git a/bsd/kern/kern_synch.c b/bsd/kern/kern_synch.c index 8cfe29bfc..68a45824e 100644 --- a/bsd/kern/kern_synch.c +++ b/bsd/kern/kern_synch.c @@ -71,11 +71,12 @@ _sleep_continue( __unused void *parameter, wait_result_t wresult) struct uthread * ut; int sig, catch; int error = 0; - int dropmutex; + int dropmutex, spinmutex; ut = get_bsdthread_info(self); catch = ut->uu_pri & PCATCH; dropmutex = ut->uu_pri & PDROP; + spinmutex = ut->uu_pri & PSPIN; switch (wresult) { case THREAD_TIMED_OUT: @@ -116,9 +117,12 @@ _sleep_continue( __unused void *parameter, wait_result_t wresult) if (error == EINTR || error == ERESTART) act_set_astbsd(self); - if (ut->uu_mtx && !dropmutex) - lck_mtx_lock(ut->uu_mtx); - + if (ut->uu_mtx && !dropmutex) { + if (spinmutex) + lck_mtx_lock_spin(ut->uu_mtx); + else + lck_mtx_lock(ut->uu_mtx); + } ut->uu_wchan = NULL; ut->uu_wmesg = NULL; @@ -160,6 +164,7 @@ _sleep( struct uthread * ut; int sig, catch = pri & PCATCH; int dropmutex = pri & PDROP; + int spinmutex = pri & PSPIN; int wait_result; int error = 0; @@ -169,7 +174,7 @@ _sleep( p->p_priority = pri & PRIMASK; /* It can still block in proc_exit() after the teardown. */ if (p->p_stats != NULL) - OSIncrementAtomic(&p->p_stats->p_ru.ru_nvcsw); + OSIncrementAtomicLong(&p->p_stats->p_ru.ru_nvcsw); /* set wait message & channel */ ut->uu_wchan = chan; @@ -198,8 +203,12 @@ _sleep( error = EINTR; else error = ERESTART; - if (mtx && !dropmutex) - lck_mtx_lock(mtx); + if (mtx && !dropmutex) { + if (spinmutex) + lck_mtx_lock_spin(mtx); + else + lck_mtx_lock(mtx); + } goto out; } } @@ -208,8 +217,12 @@ _sleep( goto block; error = EINTR; - if (mtx && !dropmutex) - lck_mtx_lock(mtx); + if (mtx && !dropmutex) { + if (spinmutex) + lck_mtx_lock_spin(mtx); + else + lck_mtx_lock(mtx); + } goto out; } } @@ -227,8 +240,12 @@ _sleep( wait_result = thread_block(THREAD_CONTINUE_NULL); - if (mtx && !dropmutex) - lck_mtx_lock(mtx); + if (mtx && !dropmutex) { + if (spinmutex) + lck_mtx_lock_spin(mtx); + else + lck_mtx_lock(mtx); + } } switch (wait_result) { @@ -258,7 +275,10 @@ _sleep( if (thread_should_abort(self)) { error = EINTR; } - } + } else if( (ut->uu_flag & ( UT_CANCELDISABLE | UT_CANCEL | UT_CANCELED)) == UT_CANCEL) { + /* due to thread cancel */ + error = EINTR; + } } else error = EINTR; break; diff --git a/bsd/kern/kern_sysctl.c b/bsd/kern/kern_sysctl.c index d1985d21d..7303287c1 100644 --- a/bsd/kern/kern_sysctl.c +++ b/bsd/kern/kern_sysctl.c @@ -103,13 +103,16 @@ #include #include -#include +#include +#include #include #include #include +#include #include #include +#include #include #include #include @@ -126,12 +129,12 @@ #include -#ifdef __i386__ +#if defined(__i386__) || defined(__x86_64__) #include #endif sysctlfn kern_sysctl; -#ifdef DEBUG +#if DEBUG sysctlfn debug_sysctl; #endif extern sysctlfn net_sysctl; @@ -143,17 +146,21 @@ extern int lowpri_IO_window_msecs; extern int lowpri_IO_delay_msecs; extern int nx_enabled; extern int speculative_reads_disabled; +extern unsigned int preheat_pages_max; +extern unsigned int preheat_pages_min; +extern unsigned int preheat_pages_mult; +extern long numvnodes; static void -fill_eproc(proc_t p, struct eproc *ep); +fill_user32_eproc(proc_t p, struct user32_eproc *ep); static void -fill_externproc(proc_t p, struct extern_proc *exp); +fill_user32_externproc(proc_t p, struct user32_extern_proc *exp); static void -fill_user_eproc(proc_t p, struct user_eproc *ep); +fill_user64_eproc(proc_t p, struct user64_eproc *ep); static void -fill_user_proc(proc_t p, struct user_kinfo_proc *kp); +fill_user64_proc(proc_t p, struct user64_kinfo_proc *kp); static void -fill_user_externproc(proc_t p, struct user_extern_proc *exp); +fill_user64_externproc(proc_t p, struct user64_extern_proc *exp); extern int kdbg_control(int *name, u_int namelen, user_addr_t where, size_t * sizep); int @@ -173,7 +180,7 @@ int sysctl_doprof(int *name, u_int namelen, user_addr_t oldp, size_t *oldlenp, user_addr_t newp, size_t newlen); static void -fill_proc(proc_t p, struct kinfo_proc *kp); +fill_user32_proc(proc_t p, struct user32_kinfo_proc *kp); int sysctl_procargs(int *name, u_int namelen, user_addr_t where, size_t *sizep, proc_t cur_proc); @@ -192,20 +199,31 @@ static int sysdoproc_filt_KERN_PROC_PGRP(proc_t p, void * arg); static int sysdoproc_filt_KERN_PROC_TTY(proc_t p, void * arg); static int sysdoproc_filt_KERN_PROC_UID(proc_t p, void * arg); static int sysdoproc_filt_KERN_PROC_RUID(proc_t p, void * arg); +#if CONFIG_LCTX static int sysdoproc_filt_KERN_PROC_LCID(proc_t p, void * arg); +#endif int sysdoproc_callback(proc_t p, void *arg); -static int __sysctl_funneled(proc_t p, struct __sysctl_args *uap, register_t *retval); +static int __sysctl_funneled(proc_t p, struct __sysctl_args *uap, int32_t *retval); extern void IORegistrySetOSBuildVersion(char * build_version); static void -loadavg32to64(struct loadavg *la32, struct user_loadavg *la64) +fill_loadavg64(struct loadavg *la, struct user64_loadavg *la64) { - la64->ldavg[0] = la32->ldavg[0]; - la64->ldavg[1] = la32->ldavg[1]; - la64->ldavg[2] = la32->ldavg[2]; - la64->fscale = (user_long_t)la32->fscale; + la64->ldavg[0] = la->ldavg[0]; + la64->ldavg[1] = la->ldavg[1]; + la64->ldavg[2] = la->ldavg[2]; + la64->fscale = (user64_long_t)la->fscale; +} + +static void +fill_loadavg32(struct loadavg *la, struct user32_loadavg *la32) +{ + la32->ldavg[0] = la->ldavg[0]; + la32->ldavg[1] = la->ldavg[1]; + la32->ldavg[2] = la->ldavg[2]; + la32->fscale = (user32_long_t)la->fscale; } /* @@ -215,7 +233,7 @@ static struct sysctl_lock memlock; /* sysctl() syscall */ int -__sysctl(proc_t p, struct __sysctl_args *uap, register_t *retval) +__sysctl(proc_t p, struct __sysctl_args *uap, int32_t *retval) { boolean_t funnel_state; int error; @@ -227,7 +245,7 @@ __sysctl(proc_t p, struct __sysctl_args *uap, register_t *retval) } static int -__sysctl_funneled(proc_t p, struct __sysctl_args *uap, __unused register_t *retval) +__sysctl_funneled(proc_t p, struct __sysctl_args *uap, __unused int32_t *retval) { int error, dolock = 1; size_t savelen = 0, oldlen = 0, newlen; @@ -261,12 +279,12 @@ __sysctl_funneled(proc_t p, struct __sysctl_args *uap, __unused register_t *retv else { newlen = uap->newlen; } - + /* CTL_UNSPEC is used to get oid to AUTO_OID */ if (uap->new != USER_ADDR_NULL && ((name[0] == CTL_KERN && !(name[1] == KERN_IPC || name[1] == KERN_PANICINFO || name[1] == KERN_PROCDELAYTERM || - name[1] == KERN_PROC_LOW_PRI_IO || name[1] == KERN_PROCNAME || name[1] == KERN_RAGEVNODE || name[1] == KERN_CHECKOPENEVT)) + name[1] == KERN_PROCNAME || name[1] == KERN_RAGEVNODE || name[1] == KERN_CHECKOPENEVT || name[1] == KERN_THREADNAME)) || (name[0] == CTL_HW) || (name[0] == CTL_VM)) && (error = suser(kauth_cred_get(), &p->p_acflag))) @@ -285,7 +303,7 @@ __sysctl_funneled(proc_t p, struct __sysctl_args *uap, __unused register_t *retv case CTL_VFS: fnp = vfs_sysctl; break; -#ifdef DEBUG +#if DEBUG case CTL_DEBUG: fnp = debug_sysctl; break; @@ -375,7 +393,7 @@ __sysctl_funneled(proc_t p, struct __sysctl_args *uap, __unused register_t *retv funnel_state = thread_funnel_set(kernel_flock, FALSE); error = userland_sysctl(p, name, uap->namelen, uap->old, &tmp, - 1, uap->new, newlen, &oldlen); + uap->new, newlen, &oldlen); thread_funnel_set(kernel_flock, funnel_state); } @@ -424,9 +442,9 @@ sysctl_affinity( (cur_proc->p_flag & P_AFFINITY) ? 1 : 0); } else if (name[0] == 1 && 2 == namelen) { if (name[1] == 0) { - OSBitAndAtomic(~((uint32_t)P_AFFINITY), (UInt32 *)&cur_proc->p_flag); + OSBitAndAtomic(~((uint32_t)P_AFFINITY), &cur_proc->p_flag); } else { - OSBitOrAtomic(P_AFFINITY, (UInt32 *)&cur_proc->p_flag); + OSBitOrAtomic(P_AFFINITY, &cur_proc->p_flag); } return 0; } @@ -487,7 +505,7 @@ set_archhandler(__unused proc_t p, int arch) return (EBADARCH); } - NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_SYSSPACE32, + NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_SYSSPACE, CAST_USER_ADDR_T(archhandler->path), ctx); error = namei(&nd); if (error) @@ -512,7 +530,7 @@ set_archhandler(__unused proc_t p, int arch) vnode_put(nd.ni_vp); archhandler->fsid = va.va_fsid; - archhandler->fileid = (u_long)va.va_fileid; + archhandler->fileid = (u_int32_t)va.va_fileid; return 0; } @@ -582,6 +600,65 @@ sysctl_handle_exec_archhandler_ppc(struct sysctl_oid *oidp, void *arg1, } +static int +sysctl_handle_kern_threadname( __unused struct sysctl_oid *oidp, __unused void *arg1, + __unused int arg2, struct sysctl_req *req) +{ + int error; + struct uthread *ut = get_bsdthread_info(current_thread()); + user_addr_t oldp=0, newp=0; + size_t *oldlenp=NULL; + size_t newlen=0; + + oldp = req->oldptr; + oldlenp = &(req->oldlen); + newp = req->newptr; + newlen = req->newlen; + + /* We want the current length, and maybe the string itself */ + if(oldlenp) { + /* if we have no thread name yet tell'em we want MAXTHREADNAMESIZE - 1 */ + size_t currlen = MAXTHREADNAMESIZE - 1; + + if(ut->pth_name) + /* use length of current thread name */ + currlen = strlen(ut->pth_name); + if(oldp) { + if(*oldlenp < currlen) + return ENOMEM; + /* NOTE - we do not copy the NULL terminator */ + if(ut->pth_name) { + error = copyout(ut->pth_name,oldp,currlen); + if(error) + return error; + } + } + /* return length of thread name minus NULL terminator (just like strlen) */ + req->oldidx = currlen; + } + + /* We want to set the name to something */ + if(newp) + { + if(newlen > (MAXTHREADNAMESIZE - 1)) + return ENAMETOOLONG; + if(!ut->pth_name) + { + ut->pth_name = (char*)kalloc( MAXTHREADNAMESIZE ); + if(!ut->pth_name) + return ENOMEM; + } + bzero(ut->pth_name, MAXTHREADNAMESIZE); + error = copyin(newp, ut->pth_name, newlen); + if(error) + return error; + } + + return 0; +} + +SYSCTL_PROC(_kern, KERN_THREADNAME, threadname, CTLFLAG_ANYBODY | CTLTYPE_STRING | CTLFLAG_RW, 0, 0, sysctl_handle_kern_threadname,"A",""); + SYSCTL_NODE(_kern, KERN_EXEC, exec, CTLFLAG_RD|CTLFLAG_LOCKED, 0, ""); SYSCTL_NODE(_kern_exec, OID_AUTO, archhandler, CTLFLAG_RD|CTLFLAG_LOCKED, 0, ""); @@ -708,7 +785,7 @@ kern_sysctl(int *name, u_int namelen, user_addr_t oldp, size_t *oldlenp, /* NOTREACHED */ } -#ifdef DEBUG +#if DEBUG /* * Debugging related system variables. */ @@ -734,9 +811,9 @@ debug_sysctl(int *name, u_int namelen, user_addr_t oldp, size_t *oldlenp, /* all sysctl names at this level are name and field */ if (namelen != 2) - return (ENOTDIR); /* overloaded */ + return (ENOTSUP); /* overloaded */ if (name[0] < 0 || name[0] >= CTL_DEBUG_MAXID) - return (ENOTSUP); + return (ENOTSUP); cdp = debugvars[name[0]]; if (cdp->debugname == 0) return (ENOTSUP); @@ -783,7 +860,7 @@ sysctl_int(user_addr_t oldp, size_t *oldlenp, error = copyout(valp, oldp, sizeof(int)); if (error == 0 && newp) { error = copyin(newp, valp, sizeof(int)); - AUDIT_ARG(value, *valp); + AUDIT_ARG(value32, *valp); } return (error); } @@ -1047,7 +1124,7 @@ SYSCTL_PROC(_kern, KERN_FILE, file, static int sysdoproc_filt_KERN_PROC_PID(proc_t p, void * arg) { - if (p->p_pid != (pid_t)arg) + if (p->p_pid != (pid_t)*(int*)arg) return(0); else return(1); @@ -1056,7 +1133,7 @@ sysdoproc_filt_KERN_PROC_PID(proc_t p, void * arg) static int sysdoproc_filt_KERN_PROC_PGRP(proc_t p, void * arg) { - if (p->p_pgrpid != (pid_t)arg) + if (p->p_pgrpid != (pid_t)*(int*)arg) return(0); else return(1); @@ -1067,14 +1144,15 @@ sysdoproc_filt_KERN_PROC_TTY(proc_t p, void * arg) { boolean_t funnel_state; int retval; + struct tty *tp; funnel_state = thread_funnel_set(kernel_flock, TRUE); /* This is very racy but list lock is held.. Hmmm. */ if ((p->p_flag & P_CONTROLT) == 0 || (p->p_pgrp == NULL) || (p->p_pgrp->pg_session == NULL) || - p->p_pgrp->pg_session->s_ttyp == NULL || - p->p_pgrp->pg_session->s_ttyp->t_dev != (dev_t)arg) + (tp = SESSION_TP(p->p_pgrp->pg_session)) == TTY_NULL || + tp->t_dev != (dev_t)*(int*)arg) retval = 0; else retval = 1; @@ -1096,7 +1174,7 @@ sysdoproc_filt_KERN_PROC_UID(proc_t p, void * arg) uid = kauth_cred_getuid(my_cred); kauth_cred_unref(&my_cred); - if (uid != (uid_t)arg) + if (uid != (uid_t)*(int*)arg) return(0); else return(1); @@ -1115,21 +1193,23 @@ sysdoproc_filt_KERN_PROC_RUID(proc_t p, void * arg) ruid = my_cred->cr_ruid; kauth_cred_unref(&my_cred); - if (ruid != (uid_t)arg) + if (ruid != (uid_t)*(int*)arg) return(0); else return(1); } +#if CONFIG_LCTX static int sysdoproc_filt_KERN_PROC_LCID(proc_t p, void * arg) { if ((p->p_lctx == NULL) || - (p->p_lctx->lc_id != (pid_t)arg)) + (p->p_lctx->lc_id != (pid_t)*(int*)arg)) return(0); else return(1); } +#endif /* * try over estimating by 5 procs @@ -1156,19 +1236,19 @@ sysdoproc_callback(proc_t p, void * arg) int error=0; if (args->buflen >= args->sizeof_kproc) { - if ((args->ruidcheck != 0) && (sysdoproc_filt_KERN_PROC_RUID(p, (void *)args->uidval) == 0)) + if ((args->ruidcheck != 0) && (sysdoproc_filt_KERN_PROC_RUID(p, &args->uidval) == 0)) return(PROC_RETURNED); - if ((args->uidcheck != 0) && (sysdoproc_filt_KERN_PROC_UID(p, (void *)args->uidval) == 0)) + if ((args->uidcheck != 0) && (sysdoproc_filt_KERN_PROC_UID(p, &args->uidval) == 0)) return(PROC_RETURNED); - if ((args->ttycheck != 0) && (sysdoproc_filt_KERN_PROC_TTY(p, (void *)args->uidval) == 0)) + if ((args->ttycheck != 0) && (sysdoproc_filt_KERN_PROC_TTY(p, &args->uidval) == 0)) return(PROC_RETURNED); bzero(args->kprocp, args->sizeof_kproc); if (args->is_64_bit) { - fill_user_proc(p, (struct user_kinfo_proc *) args->kprocp); + fill_user64_proc(p, (struct user64_kinfo_proc *) args->kprocp); } else { - fill_proc(p, (struct kinfo_proc *) args->kprocp); + fill_user32_proc(p, (struct user32_kinfo_proc *) args->kprocp); } error = copyout(args->kprocp, args->dp, args->sizeof_kproc); if (error) { @@ -1191,8 +1271,8 @@ sysctl_doproc(int *name, u_int namelen, user_addr_t where, size_t *sizep) int buflen = where != USER_ADDR_NULL ? *sizep : 0; int error = 0; boolean_t is_64_bit = FALSE; - struct kinfo_proc kproc; - struct user_kinfo_proc user_kproc; + struct user32_kinfo_proc user32_kproc; + struct user64_kinfo_proc user_kproc; int sizeof_kproc; caddr_t kprocp; int (*filterfn)(proc_t, void *) = 0; @@ -1209,8 +1289,8 @@ sysctl_doproc(int *name, u_int namelen, user_addr_t where, size_t *sizep) kprocp = (caddr_t) &user_kproc; } else { - sizeof_kproc = sizeof(kproc); - kprocp = (caddr_t) &kproc; + sizeof_kproc = sizeof(user32_kproc); + kprocp = (caddr_t) &user32_kproc; } @@ -1256,7 +1336,7 @@ sysctl_doproc(int *name, u_int namelen, user_addr_t where, size_t *sizep) args.sizeof_kproc = sizeof_kproc; args.uidval = name[1]; - proc_iterate((PROC_ALLPROCLIST | PROC_ZOMBPROCLIST), sysdoproc_callback, &args, filterfn, (void *)name[1]); + proc_iterate((PROC_ALLPROCLIST | PROC_ZOMBPROCLIST), sysdoproc_callback, &args, filterfn, &name[1]); if (error) return(error); @@ -1279,7 +1359,7 @@ sysctl_doproc(int *name, u_int namelen, user_addr_t where, size_t *sizep) * Fill in an eproc structure for the specified process. */ static void -fill_eproc(proc_t p, struct eproc *ep) +fill_user32_eproc(proc_t p, struct user32_eproc *ep) { struct tty *tp; kauth_cred_t my_cred; @@ -1289,16 +1369,16 @@ fill_eproc(proc_t p, struct eproc *ep) pg = proc_pgrp(p); sessp = proc_session(p); - ep->e_paddr = p; + ep->e_paddr = CAST_DOWN_EXPLICIT(uint32_t,p); if (pg != PGRP_NULL) { - ep->e_sess = sessp; + ep->e_sess = CAST_DOWN_EXPLICIT(uint32_t,sessp); ep->e_pgid = p->p_pgrpid; ep->e_jobc = pg->pg_jobc; if ((sessp != SESSION_NULL) && sessp->s_ttyvp) ep->e_flag = EPROC_CTTY; } else { - ep->e_sess = (struct session *)0; + ep->e_sess = 0; ep->e_pgid = 0; ep->e_jobc = 0; } @@ -1311,7 +1391,7 @@ fill_eproc(proc_t p, struct eproc *ep) #endif ep->e_ppid = p->p_ppid; /* Pre-zero the fake historical pcred */ - bzero(&ep->e_pcred, sizeof(struct _pcred)); + bzero(&ep->e_pcred, sizeof(ep->e_pcred)); if (p->p_ucred) { my_cred = kauth_cred_proc_ref(p); @@ -1336,10 +1416,10 @@ fill_eproc(proc_t p, struct eproc *ep) ep->e_vm.vm_rssize = 0; if ((p->p_flag & P_CONTROLT) && (sessp != SESSION_NULL) && - (tp = sessp->s_ttyp)) { + (tp = SESSION_TP(sessp))) { ep->e_tdev = tp->t_dev; ep->e_tpgid = sessp->s_ttypgrpid; - ep->e_tsess = tp->t_session; + ep->e_tsess = CAST_DOWN_EXPLICIT(uint32_t,tp->t_session); } else ep->e_tdev = NODEV; @@ -1358,7 +1438,7 @@ fill_eproc(proc_t p, struct eproc *ep) * Fill in an LP64 version of eproc structure for the specified process. */ static void -fill_user_eproc(proc_t p, struct user_eproc *ep) +fill_user64_eproc(proc_t p, struct user64_eproc *ep) { struct tty *tp; struct session *sessp = NULL; @@ -1417,7 +1497,7 @@ fill_user_eproc(proc_t p, struct user_eproc *ep) ep->e_vm.vm_rssize = 0; if ((p->p_flag & P_CONTROLT) && (sessp != SESSION_NULL) && - (tp = sessp->s_ttyp)) { + (tp = SESSION_TP(sessp))) { ep->e_tdev = tp->t_dev; ep->e_tpgid = sessp->s_ttypgrpid; ep->e_tsess = CAST_USER_ADDR_T(tp->t_session); @@ -1439,12 +1519,13 @@ fill_user_eproc(proc_t p, struct user_eproc *ep) * Fill in an eproc structure for the specified process. */ static void -fill_externproc(proc_t p, struct extern_proc *exp) +fill_user32_externproc(proc_t p, struct user32_extern_proc *exp) { - exp->p_forw = exp->p_back = NULL; - exp->p_starttime = p->p_start; - exp->p_vmspace = NULL; - exp->p_sigacts = p->p_sigacts; + exp->p_forw = exp->p_back = 0; + exp->p_starttime.tv_sec = p->p_start.tv_sec; + exp->p_starttime.tv_usec = p->p_start.tv_usec; + exp->p_vmspace = 0; + exp->p_sigacts = CAST_DOWN_EXPLICIT(uint32_t,p->p_sigacts); exp->p_flag = p->p_flag; if (p->p_lflag & P_LTRACED) exp->p_flag |= P_TRACED; @@ -1456,8 +1537,8 @@ fill_externproc(proc_t p, struct extern_proc *exp) exp->p_pid = p->p_pid ; exp->p_oppid = p->p_oppid ; /* Mach related */ - exp->user_stack = CAST_DOWN(caddr_t, p->user_stack); - exp->exit_thread = p->exit_thread ; + exp->user_stack = p->user_stack; + exp->exit_thread = CAST_DOWN_EXPLICIT(uint32_t,p->exit_thread); exp->p_debugger = p->p_debugger ; exp->sigwait = p->sigwait ; /* scheduling */ @@ -1482,7 +1563,7 @@ fill_externproc(proc_t p, struct extern_proc *exp) exp->p_traceflag = 0; exp->p_tracep = 0 ; exp->p_siglist = 0 ; /* No longer relevant */ - exp->p_textvp = p->p_textvp ; + exp->p_textvp = CAST_DOWN_EXPLICIT(uint32_t,p->p_textvp) ; exp->p_holdcnt = 0 ; exp->p_sigmask = 0 ; /* no longer avaialable */ exp->p_sigignore = p->p_sigignore ; @@ -1492,18 +1573,18 @@ fill_externproc(proc_t p, struct extern_proc *exp) exp->p_nice = p->p_nice ; bcopy(&p->p_comm, &exp->p_comm,MAXCOMLEN); exp->p_comm[MAXCOMLEN] = '\0'; - exp->p_pgrp = p->p_pgrp ; - exp->p_addr = NULL; + exp->p_pgrp = CAST_DOWN_EXPLICIT(uint32_t,p->p_pgrp) ; + exp->p_addr = 0; exp->p_xstat = p->p_xstat ; exp->p_acflag = p->p_acflag ; - exp->p_ru = p->p_ru ; /* XXX may be NULL */ + exp->p_ru = CAST_DOWN_EXPLICIT(uint32_t,p->p_ru) ; /* XXX may be NULL */ } /* * Fill in an LP64 version of extern_proc structure for the specified process. */ static void -fill_user_externproc(proc_t p, struct user_extern_proc *exp) +fill_user64_externproc(proc_t p, struct user64_extern_proc *exp) { exp->p_forw = exp->p_back = USER_ADDR_NULL; exp->p_starttime.tv_sec = p->p_start.tv_sec; @@ -1569,17 +1650,18 @@ fill_user_externproc(proc_t p, struct user_extern_proc *exp) } static void -fill_proc(proc_t p, struct kinfo_proc *kp) +fill_user32_proc(proc_t p, struct user32_kinfo_proc *kp) { - fill_externproc(p, &kp->kp_proc); - fill_eproc(p, &kp->kp_eproc); + /* on a 64 bit kernel, 32 bit users will get some truncated information */ + fill_user32_externproc(p, &kp->kp_proc); + fill_user32_eproc(p, &kp->kp_eproc); } static void -fill_user_proc(proc_t p, struct user_kinfo_proc *kp) +fill_user64_proc(proc_t p, struct user64_kinfo_proc *kp) { - fill_user_externproc(p, &kp->kp_proc); - fill_user_eproc(p, &kp->kp_eproc); + fill_user64_externproc(p, &kp->kp_proc); + fill_user64_eproc(p, &kp->kp_eproc); } int @@ -1590,11 +1672,11 @@ kdebug_ops(int *name, u_int namelen, user_addr_t where, if (namelen == 0) return(ENOTSUP); - + ret = suser(kauth_cred_get(), &p->p_acflag); if (ret) return(ret); - + switch(name[0]) { case KERN_KDEFLAGS: case KERN_KDDFLAGS: @@ -1662,7 +1744,7 @@ sysctl_procargsx(int *name, u_int namelen, user_addr_t where, if ( namelen < 1 ) return(EINVAL); - + if (argc_yes) buflen -= sizeof(int); /* reserve first word to return argc */ @@ -1964,7 +2046,7 @@ sysctl_maxproc int new_value, changed; int error = sysctl_io_number(req, maxproc, sizeof(int), &new_value, &changed); if (changed) { - AUDIT_ARG(value, new_value); + AUDIT_ARG(value32, new_value); /* make sure the system-wide limit is less than the configured hard limit set at kernel compilation */ if (new_value <= hard_maxproc && new_value > 0) @@ -1983,11 +2065,51 @@ SYSCTL_STRING(_kern, KERN_OSRELEASE, osrelease, osrelease, 0, ""); SYSCTL_INT(_kern, KERN_OSREV, osrevision, CTLFLAG_RD | CTLFLAG_KERN, - NULL, BSD, ""); + (int *)NULL, BSD, ""); SYSCTL_STRING(_kern, KERN_VERSION, version, CTLFLAG_RD | CTLFLAG_KERN, version, 0, ""); +#if DEBUG +int debug_kprint_syscall = 0; +char debug_kprint_syscall_process[MAXCOMLEN+1]; + +SYSCTL_INT (_debug, OID_AUTO, kprint_syscall, + CTLFLAG_RW, &debug_kprint_syscall, 0, "kprintf syscall tracing"); +SYSCTL_STRING(_debug, OID_AUTO, kprint_syscall_process, + CTLFLAG_RW, debug_kprint_syscall_process, sizeof(debug_kprint_syscall_process), + "name of process for kprintf syscall tracing"); + +int debug_kprint_current_process(const char **namep) +{ + struct proc *p = current_proc(); + + if (p == NULL) { + return 0; + } + + if (debug_kprint_syscall_process[0]) { + /* user asked to scope tracing to a particular process name */ + if(0 == strncmp(debug_kprint_syscall_process, + p->p_comm, sizeof(debug_kprint_syscall_process))) { + /* no value in telling the user that we traced what they asked */ + if(namep) *namep = NULL; + + return 1; + } else { + return 0; + } + } + + /* trace all processes. Tell user what we traced */ + if (namep) { + *namep = p->p_comm; + } + + return 1; +} +#endif + /* PR-5293665: need to use a callback function for kern.osversion to set * osversion in IORegistry */ @@ -2032,33 +2154,52 @@ SYSCTL_INT(_kern, KERN_MAXFILES, maxfiles, &maxfiles, 0, ""); SYSCTL_INT(_kern, KERN_ARGMAX, argmax, CTLFLAG_RD | CTLFLAG_KERN, - NULL, ARG_MAX, ""); + (int *)NULL, ARG_MAX, ""); SYSCTL_INT(_kern, KERN_POSIX1, posix1version, CTLFLAG_RD | CTLFLAG_KERN, - NULL, _POSIX_VERSION, ""); + (int *)NULL, _POSIX_VERSION, ""); SYSCTL_INT(_kern, KERN_NGROUPS, ngroups, CTLFLAG_RD | CTLFLAG_KERN, - NULL, NGROUPS_MAX, ""); + (int *)NULL, NGROUPS_MAX, ""); SYSCTL_INT(_kern, KERN_JOB_CONTROL, job_control, CTLFLAG_RD | CTLFLAG_KERN, - NULL, 1, ""); + (int *)NULL, 1, ""); #if 1 /* _POSIX_SAVED_IDS from */ SYSCTL_INT(_kern, KERN_SAVED_IDS, saved_ids, CTLFLAG_RD | CTLFLAG_KERN, - NULL, 1, ""); + (int *)NULL, 1, ""); #else SYSCTL_INT(_kern, KERN_SAVED_IDS, saved_ids, CTLFLAG_RD | CTLFLAG_KERN, NULL, 0, ""); #endif +SYSCTL_INT(_kern, OID_AUTO, num_files, + CTLFLAG_RD, + &nfiles, 0, ""); +SYSCTL_COMPAT_INT(_kern, OID_AUTO, num_vnodes, + CTLFLAG_RD, + &numvnodes, 0, ""); +SYSCTL_INT(_kern, OID_AUTO, num_tasks, + CTLFLAG_RD, + &task_max, 0, ""); +SYSCTL_INT(_kern, OID_AUTO, num_threads, + CTLFLAG_RD, + &thread_max, 0, ""); +SYSCTL_INT(_kern, OID_AUTO, num_taskthreads, + CTLFLAG_RD, + &task_threadmax, 0, ""); static int sysctl_maxvnodes (__unused struct sysctl_oid *oidp, __unused void *arg1, __unused int arg2, struct sysctl_req *req) { - unsigned int oldval = desiredvnodes; + int oldval = desiredvnodes; int error = sysctl_io_number(req, desiredvnodes, sizeof(int), &desiredvnodes, NULL); - reset_vmobjectcache(oldval, desiredvnodes); - resize_namecache(desiredvnodes); + + if (oldval != desiredvnodes) { + reset_vmobjectcache(oldval, desiredvnodes); + resize_namecache(desiredvnodes); + } + return(error); } @@ -2121,7 +2262,7 @@ SYSCTL_PROC(_kern, KERN_DOMAINNAME, nisdomainname, CTLTYPE_STRING | CTLFLAG_RW, 0, 0, sysctl_domainname, "A", ""); -SYSCTL_INT(_kern, KERN_HOSTID, hostid, +SYSCTL_COMPAT_INT(_kern, KERN_HOSTID, hostid, CTLFLAG_RW | CTLFLAG_KERN, &hostid, 0, ""); @@ -2159,16 +2300,36 @@ SYSCTL_INT(_kern, KERN_SPECULATIVE_READS, speculative_reads_disabled, CTLFLAG_RW | CTLFLAG_KERN, &speculative_reads_disabled, 0, ""); +SYSCTL_UINT(_kern, OID_AUTO, preheat_pages_max, + CTLFLAG_RW | CTLFLAG_KERN, + &preheat_pages_max, 0, ""); + +SYSCTL_UINT(_kern, OID_AUTO, preheat_pages_min, + CTLFLAG_RW | CTLFLAG_KERN, + &preheat_pages_min, 0, ""); + +SYSCTL_UINT(_kern, OID_AUTO, preheat_pages_mult, + CTLFLAG_RW | CTLFLAG_KERN, + &preheat_pages_mult, 0, ""); + static int sysctl_boottime (__unused struct sysctl_oid *oidp, __unused void *arg1, __unused int arg2, struct sysctl_req *req) { - struct timeval t; - - t.tv_sec = boottime_sec(); - t.tv_usec = 0; + time_t tv_sec = boottime_sec(); + struct proc *p = req->p; - return sysctl_io_opaque(req, &t, sizeof(t), NULL); + if (proc_is64bit(p)) { + struct user64_timeval t; + t.tv_sec = tv_sec; + t.tv_usec = 0; + return sysctl_io_opaque(req, &t, sizeof(t), NULL); + } else { + struct user32_timeval t; + t.tv_sec = tv_sec; + t.tv_usec = 0; + return sysctl_io_opaque(req, &t, sizeof(t), NULL); + } } SYSCTL_PROC(_kern, KERN_BOOTTIME, boottime, @@ -2211,7 +2372,7 @@ sysctl_usrstack return sysctl_io_number(req, (int)req->p->user_stack, sizeof(int), NULL, NULL); } -SYSCTL_PROC(_kern, KERN_USRSTACK, usrstack, +SYSCTL_PROC(_kern, KERN_USRSTACK32, usrstack, CTLTYPE_INT | CTLFLAG_RD, 0, 0, sysctl_usrstack, "I", ""); @@ -2296,50 +2457,6 @@ SYSCTL_PROC(_kern, KERN_PROCDELAYTERM, delayterm, CTLTYPE_INT | CTLFLAG_RW, 0, 0, sysctl_delayterm, "I", ""); -static int -sysctl_proc_low_pri_io -(__unused struct sysctl_oid *oidp, __unused void *arg1, __unused int arg2, struct sysctl_req *req) -{ - struct proc *p = req->p; - int new_value, old_value, changed; - int error; - - proc_lock(p); - switch (req->p->p_iopol_disk) { - case IOPOL_DEFAULT: - case IOPOL_NORMAL: - old_value = 0; - break; - case IOPOL_THROTTLE: - old_value = 1; - break; - case IOPOL_PASSIVE: - old_value = 2; - break; - default: - /* this should never happen, but to be robust, return the default value */ - old_value = 0; - break; - } - proc_unlock(p); - - error = sysctl_io_number(req, old_value, sizeof(int), &new_value, &changed); - if (changed) { - proc_lock(p); - if (new_value & 0x01) - req->p->p_iopol_disk = IOPOL_THROTTLE; - else if (new_value & 0x02) - req->p->p_iopol_disk = IOPOL_PASSIVE; - else if (new_value == 0) - req->p->p_iopol_disk = IOPOL_NORMAL; - proc_unlock(p); - } - return(error); -} - -SYSCTL_PROC(_kern, KERN_PROC_LOW_PRI_IO, proc_low_pri_io, - CTLTYPE_INT | CTLFLAG_RW, - 0, 0, sysctl_proc_low_pri_io, "I", ""); static int sysctl_rage_vnode @@ -2387,7 +2504,7 @@ sysctl_rage_vnode } SYSCTL_PROC(_kern, KERN_RAGEVNODE, rage_vnode, - CTLTYPE_INT | CTLFLAG_RW, + CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_ANYBODY, 0, 0, sysctl_rage_vnode, "I", ""); @@ -2410,11 +2527,11 @@ sysctl_kern_check_openevt if (error == 0) { switch (new_value) { case KERN_OPENEVT_PROC: - OSBitOrAtomic(P_CHECKOPENEVT, (UInt32 *)&p->p_flag); + OSBitOrAtomic(P_CHECKOPENEVT, &p->p_flag); break; case KERN_UNOPENEVT_PROC: - OSBitAndAtomic(~((uint32_t)P_CHECKOPENEVT), (UInt32 *)&p->p_flag); + OSBitAndAtomic(~((uint32_t)P_CHECKOPENEVT), &p->p_flag); break; default: @@ -2444,7 +2561,7 @@ sysctl_nx return error; if (changed) { -#ifdef __i386__ +#if defined(__i386__) || defined(__x86_64__) /* * Only allow setting if NX is supported on the chip */ @@ -2467,11 +2584,13 @@ sysctl_loadavg (__unused struct sysctl_oid *oidp, __unused void *arg1, __unused int arg2, struct sysctl_req *req) { if (proc_is64bit(req->p)) { - struct user_loadavg loadinfo64; - loadavg32to64(&averunnable, &loadinfo64); + struct user64_loadavg loadinfo64; + fill_loadavg64(&averunnable, &loadinfo64); return sysctl_io_opaque(req, &loadinfo64, sizeof(loadinfo64), NULL); } else { - return sysctl_io_opaque(req, &averunnable, sizeof(averunnable), NULL); + struct user32_loadavg loadinfo32; + fill_loadavg32(&averunnable, &loadinfo32); + return sysctl_io_opaque(req, &loadinfo32, sizeof(loadinfo32), NULL); } } @@ -2486,7 +2605,7 @@ sysctl_swapusage int error; uint64_t swap_total; uint64_t swap_avail; - uint32_t swap_pagesize; + vm_size_t swap_pagesize; boolean_t swap_encrypted; struct xsw_usage xsu; @@ -2515,9 +2634,9 @@ SYSCTL_PROC(_vm, VM_SWAPUSAGE, swapusage, /* this kernel does NOT implement shared_region_make_private_np() */ SYSCTL_INT(_kern, KERN_SHREG_PRIVATIZABLE, shreg_private, CTLFLAG_RD, - NULL, 0, ""); + (int *)NULL, 0, ""); -#if __i386__ +#if defined(__i386__) || defined(__x86_64__) static int sysctl_sysctl_exec_affinity(__unused struct sysctl_oid *oidp, __unused void *arg1, __unused int arg2, @@ -2537,9 +2656,9 @@ sysctl_sysctl_exec_affinity(__unused struct sysctl_oid *oidp, if ((error = SYSCTL_IN(req, &newcputype, sizeof(newcputype)))) return error; if (newcputype == CPU_TYPE_I386) - OSBitAndAtomic(~((uint32_t)P_AFFINITY), (UInt32 *)&cur_proc->p_flag); + OSBitAndAtomic(~((uint32_t)P_AFFINITY), &cur_proc->p_flag); else if (newcputype == CPU_TYPE_POWERPC) - OSBitOrAtomic(P_AFFINITY, (UInt32 *)&cur_proc->p_flag); + OSBitOrAtomic(P_AFFINITY, &cur_proc->p_flag); else return (EINVAL); } @@ -2573,7 +2692,7 @@ fetch_process_cputype( goto out; } -#if __i386__ +#if defined(__i386__) || defined(__x86_64__) if (p->p_flag & P_TRANSLATED) { ret = CPU_TYPE_POWERPC; } @@ -2665,8 +2784,34 @@ SYSCTL_INT (_kern, OID_AUTO, affinity_sets_mapping, * All values are in bytes. */ +vm_map_size_t vm_global_no_user_wire_amount; vm_map_size_t vm_global_user_wire_limit; vm_map_size_t vm_user_wire_limit; +/* + * There needs to be a more automatic/elegant way to do this + */ + +SYSCTL_QUAD(_vm, OID_AUTO, global_no_user_wire_amount, CTLFLAG_RW, &vm_global_no_user_wire_amount, ""); SYSCTL_QUAD(_vm, OID_AUTO, global_user_wire_limit, CTLFLAG_RW, &vm_global_user_wire_limit, ""); SYSCTL_QUAD(_vm, OID_AUTO, user_wire_limit, CTLFLAG_RW, &vm_user_wire_limit, ""); + + + +/* + * enable back trace events for thread blocks + */ + +extern uint32_t kdebug_thread_block; + +SYSCTL_INT (_kern, OID_AUTO, kdebug_thread_block, + CTLFLAG_RW, &kdebug_thread_block, 0, "kdebug thread_block"); + +/* + * Kernel stack size and depth + */ +SYSCTL_INT (_kern, OID_AUTO, stack_size, + CTLFLAG_RD, (int *) &kernel_stack_size, 0, "Kernel stack size"); +SYSCTL_INT (_kern, OID_AUTO, stack_depth_max, + CTLFLAG_RD, (int *) &kernel_stack_depth_max, 0, "Max kernel stack depth at interrupt or context switch"); + diff --git a/bsd/kern/kern_time.c b/bsd/kern/kern_time.c index 30b0a651c..6f392fb86 100644 --- a/bsd/kern/kern_time.c +++ b/bsd/kern/kern_time.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2007 Apple Inc. All rights reserved. + * Copyright (c) 2000-2008 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -74,6 +74,7 @@ #include #include #include +#include #include #include @@ -113,13 +114,19 @@ int gettimeofday( __unused struct proc *p, struct gettimeofday_args *uap, - register_t *retval) + int32_t *retval) { int error = 0; struct timezone ltz; /* local copy */ - if (uap->tp) - clock_gettimeofday((uint32_t *)&retval[0], (uint32_t *)&retval[1]); + if (uap->tp) { + clock_sec_t secs; + clock_usec_t usecs; + + clock_gettimeofday(&secs, &usecs); + retval[0] = secs; + retval[1] = usecs; + } if (uap->tzp) { lck_spin_lock(tz_slock); @@ -137,12 +144,14 @@ __unused struct proc *p, */ /* ARGSUSED */ int -settimeofday(__unused struct proc *p, struct settimeofday_args *uap, __unused register_t *retval) +settimeofday(__unused struct proc *p, struct settimeofday_args *uap, __unused int32_t *retval) { struct timeval atv; struct timezone atz; int error; + bzero(&atv, sizeof(atv)); + #if CONFIG_MACF error = mac_system_check_settime(kauth_cred_get()); if (error) @@ -155,12 +164,15 @@ settimeofday(__unused struct proc *p, struct settimeofday_args *uap, __unused r /* Verify all parameters before changing time */ if (uap->tv) { if (IS_64BIT_PROCESS(p)) { - struct user_timeval user_atv; - error = copyin(uap->tv, &user_atv, sizeof(struct user_timeval)); + struct user64_timeval user_atv; + error = copyin(uap->tv, &user_atv, sizeof(user_atv)); atv.tv_sec = user_atv.tv_sec; atv.tv_usec = user_atv.tv_usec; } else { - error = copyin(uap->tv, &atv, sizeof(struct timeval)); + struct user32_timeval user_atv; + error = copyin(uap->tv, &user_atv, sizeof(user_atv)); + atv.tv_sec = user_atv.tv_sec; + atv.tv_usec = user_atv.tv_usec; } if (error) return (error); @@ -193,7 +205,7 @@ setthetime( */ /* ARGSUSED */ int -adjtime(struct proc *p, struct adjtime_args *uap, __unused register_t *retval) +adjtime(struct proc *p, struct adjtime_args *uap, __unused int32_t *retval) { struct timeval atv; int error; @@ -206,12 +218,15 @@ adjtime(struct proc *p, struct adjtime_args *uap, __unused register_t *retval) if ((error = suser(kauth_cred_get(), &p->p_acflag))) return (error); if (IS_64BIT_PROCESS(p)) { - struct user_timeval user_atv; - error = copyin(uap->delta, &user_atv, sizeof(struct user_timeval)); + struct user64_timeval user_atv; + error = copyin(uap->delta, &user_atv, sizeof(user_atv)); atv.tv_sec = user_atv.tv_sec; atv.tv_usec = user_atv.tv_usec; } else { - error = copyin(uap->delta, &atv, sizeof(struct timeval)); + struct user32_timeval user_atv; + error = copyin(uap->delta, &user_atv, sizeof(user_atv)); + atv.tv_sec = user_atv.tv_sec; + atv.tv_usec = user_atv.tv_usec; } if (error) return (error); @@ -219,16 +234,19 @@ adjtime(struct proc *p, struct adjtime_args *uap, __unused register_t *retval) /* * Compute the total correction and the rate at which to apply it. */ - clock_adjtime((int32_t *)&atv.tv_sec, &atv.tv_usec); + clock_adjtime(&atv.tv_sec, &atv.tv_usec); if (uap->olddelta) { if (IS_64BIT_PROCESS(p)) { - struct user_timeval user_atv; + struct user64_timeval user_atv; user_atv.tv_sec = atv.tv_sec; user_atv.tv_usec = atv.tv_usec; - error = copyout(&user_atv, uap->olddelta, sizeof(struct user_timeval)); + error = copyout(&user_atv, uap->olddelta, sizeof(user_atv)); } else { - error = copyout(&atv, uap->olddelta, sizeof(struct timeval)); + struct user32_timeval user_atv; + user_atv.tv_sec = atv.tv_sec; + user_atv.tv_usec = atv.tv_usec; + error = copyout(&user_atv, uap->olddelta, sizeof(user_atv)); } } @@ -267,12 +285,12 @@ inittodr( time_t boottime_sec(void) { - uint32_t sec, nanosec; - clock_get_boottime_nanotime(&sec, &nanosec); - return (sec); -} + clock_sec_t secs; + clock_nsec_t nanosecs; -uint64_t tvtoabstime(struct timeval *tvp); + clock_get_boottime_nanotime(&secs, &nanosecs); + return (secs); +} /* * Get value of an interval timer. The process virtual and @@ -298,13 +316,15 @@ uint64_t tvtoabstime(struct timeval *tvp); */ /* ARGSUSED */ int -getitimer(struct proc *p, struct getitimer_args *uap, __unused register_t *retval) +getitimer(struct proc *p, struct getitimer_args *uap, __unused int32_t *retval) { struct itimerval aitv; if (uap->which > ITIMER_PROF) return(EINVAL); + bzero(&aitv, sizeof(aitv)); + proc_spinlock(p); switch (uap->which) { @@ -342,14 +362,19 @@ getitimer(struct proc *p, struct getitimer_args *uap, __unused register_t *retva proc_spinunlock(p); if (IS_64BIT_PROCESS(p)) { - struct user_itimerval user_itv; + struct user64_itimerval user_itv; user_itv.it_interval.tv_sec = aitv.it_interval.tv_sec; user_itv.it_interval.tv_usec = aitv.it_interval.tv_usec; user_itv.it_value.tv_sec = aitv.it_value.tv_sec; user_itv.it_value.tv_usec = aitv.it_value.tv_usec; - return (copyout((caddr_t)&user_itv, uap->itv, sizeof (struct user_itimerval))); + return (copyout((caddr_t)&user_itv, uap->itv, sizeof (user_itv))); } else { - return (copyout((caddr_t)&aitv, uap->itv, sizeof (struct itimerval))); + struct user32_itimerval user_itv; + user_itv.it_interval.tv_sec = aitv.it_interval.tv_sec; + user_itv.it_interval.tv_usec = aitv.it_interval.tv_usec; + user_itv.it_value.tv_sec = aitv.it_value.tv_sec; + user_itv.it_value.tv_usec = aitv.it_value.tv_usec; + return (copyout((caddr_t)&user_itv, uap->itv, sizeof (user_itv))); } } @@ -362,26 +387,33 @@ getitimer(struct proc *p, struct getitimer_args *uap, __unused register_t *retva */ /* ARGSUSED */ int -setitimer(struct proc *p, struct setitimer_args *uap, register_t *retval) +setitimer(struct proc *p, struct setitimer_args *uap, int32_t *retval) { struct itimerval aitv; user_addr_t itvp; int error; + bzero(&aitv, sizeof(aitv)); + if (uap->which > ITIMER_PROF) return (EINVAL); if ((itvp = uap->itv)) { if (IS_64BIT_PROCESS(p)) { - struct user_itimerval user_itv; - if ((error = copyin(itvp, (caddr_t)&user_itv, sizeof (struct user_itimerval)))) + struct user64_itimerval user_itv; + if ((error = copyin(itvp, (caddr_t)&user_itv, sizeof (user_itv)))) return (error); aitv.it_interval.tv_sec = user_itv.it_interval.tv_sec; aitv.it_interval.tv_usec = user_itv.it_interval.tv_usec; aitv.it_value.tv_sec = user_itv.it_value.tv_sec; aitv.it_value.tv_usec = user_itv.it_value.tv_usec; } else { - if ((error = copyin(itvp, (caddr_t)&aitv, sizeof (struct itimerval)))) + struct user32_itimerval user_itv; + if ((error = copyin(itvp, (caddr_t)&user_itv, sizeof (user_itv)))) return (error); + aitv.it_interval.tv_sec = user_itv.it_interval.tv_sec; + aitv.it_interval.tv_usec = user_itv.it_interval.tv_usec; + aitv.it_value.tv_sec = user_itv.it_value.tv_sec; + aitv.it_value.tv_usec = user_itv.it_value.tv_usec; } } if ((uap->itv = uap->oitv) && (error = getitimer(p, (struct getitimer_args *)uap, retval))) @@ -612,14 +644,26 @@ void microtime( struct timeval *tvp) { - clock_get_calendar_microtime((uint32_t *)&tvp->tv_sec, (uint32_t *)&tvp->tv_usec); + clock_sec_t tv_sec; + clock_usec_t tv_usec; + + clock_get_calendar_microtime(&tv_sec, &tv_usec); + + tvp->tv_sec = tv_sec; + tvp->tv_usec = tv_usec; } void microuptime( struct timeval *tvp) { - clock_get_system_microtime((uint32_t *)&tvp->tv_sec, (uint32_t *)&tvp->tv_usec); + clock_sec_t tv_sec; + clock_usec_t tv_usec; + + clock_get_system_microtime(&tv_sec, &tv_usec); + + tvp->tv_sec = tv_sec; + tvp->tv_usec = tv_usec; } /* @@ -629,14 +673,26 @@ void nanotime( struct timespec *tsp) { - clock_get_calendar_nanotime((uint32_t *)&tsp->tv_sec, (uint32_t *)&tsp->tv_nsec); + clock_sec_t tv_sec; + clock_nsec_t tv_nsec; + + clock_get_calendar_nanotime(&tv_sec, &tv_nsec); + + tsp->tv_sec = tv_sec; + tsp->tv_nsec = tv_nsec; } void nanouptime( struct timespec *tsp) { - clock_get_system_nanotime((uint32_t *)&tsp->tv_sec, (uint32_t *)&tsp->tv_nsec); + clock_sec_t tv_sec; + clock_nsec_t tv_nsec; + + clock_get_system_nanotime(&tv_sec, &tv_nsec); + + tsp->tv_sec = tv_sec; + tsp->tv_nsec = tv_nsec; } uint64_t diff --git a/bsd/kern/kern_xxx.c b/bsd/kern/kern_xxx.c index 7884fbd3c..0a080dd59 100644 --- a/bsd/kern/kern_xxx.c +++ b/bsd/kern/kern_xxx.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2007 Apple Inc. All rights reserved. + * Copyright (c) 2000-2009 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -79,7 +79,7 @@ #include #include -#include +#include #include #include @@ -88,11 +88,11 @@ #endif int -reboot(struct proc *p, register struct reboot_args *uap, __unused register_t *retval) +reboot(struct proc *p, register struct reboot_args *uap, __unused int32_t *retval) { char command[64]; int error=0; - int dummy=0; + size_t dummy=0; #if CONFIG_MACF kauth_cred_t my_cred; #endif @@ -101,10 +101,8 @@ reboot(struct proc *p, register struct reboot_args *uap, __unused register_t *re command[0] = '\0'; -#ifndef CONFIG_EMBEDDED if ((error = suser(kauth_cred_get(), &p->p_acflag))) return(error); -#endif if (uap->opt & RB_COMMAND) error = copyinstr(uap->command, @@ -117,7 +115,7 @@ reboot(struct proc *p, register struct reboot_args *uap, __unused register_t *re kauth_cred_unref(&my_cred); #endif if (!error) { - OSBitOrAtomic(P_REBOOT, (UInt32 *)&p->p_flag); /* No more signals for this proc */ + OSBitOrAtomic(P_REBOOT, &p->p_flag); /* No more signals for this proc */ boot(RB_BOOT, uap->opt, command); } return(error); diff --git a/bsd/kern/kpi_mbuf.c b/bsd/kern/kpi_mbuf.c index ea47e2f6c..d468e81cb 100644 --- a/bsd/kern/kpi_mbuf.c +++ b/bsd/kern/kpi_mbuf.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2004-2007 Apple Inc. All rights reserved. + * Copyright (c) 2004-2009 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -37,7 +37,8 @@ #include #include #include -#include "kpi_mbuf_internal.h" + +#include "net/net_str_id.h" static const mbuf_flags_t mbuf_flags_mask = MBUF_EXT | MBUF_PKTHDR | MBUF_EOR | MBUF_BCAST | MBUF_MCAST | MBUF_FRAG | MBUF_FIRSTFRAG | @@ -354,6 +355,17 @@ errno_t mbuf_adjustlen(mbuf_t m, int amount) return 0; } +mbuf_t +mbuf_concatenate(mbuf_t dst, mbuf_t src) +{ + if (dst == NULL) + return (NULL); + + m_cat(dst, src); + + /* return dst as is in the current implementation */ + return (dst); +} errno_t mbuf_copydata(const mbuf_t m0, size_t off, size_t len, void* out_data) { /* Copied m_copydata, added error handling (don't just panic) */ @@ -529,7 +541,7 @@ extern void in_cksum_offset(struct mbuf* m, size_t ip_offset); extern void in_delayed_cksum_offset(struct mbuf *m, int ip_offset); void -mbuf_outbound_finalize(mbuf_t mbuf, u_long protocol_family, size_t protocol_offset) +mbuf_outbound_finalize(mbuf_t mbuf, u_int32_t protocol_family, size_t protocol_offset) { if ((mbuf->m_pkthdr.csum_flags & (CSUM_DELAY_DATA | CSUM_DELAY_IP | CSUM_TCP_SUM16)) == 0) @@ -568,7 +580,7 @@ mbuf_outbound_finalize(mbuf_t mbuf, u_long protocol_family, size_t protocol_offs * Hardware checksum code looked pretty IPv4 specific. */ if ((mbuf->m_pkthdr.csum_flags & (CSUM_DELAY_DATA | CSUM_DELAY_IP)) != 0) - panic("mbuf_outbound_finalize - CSUM flags set for non-IPv4 packet (%lu)!\n", protocol_family); + panic("mbuf_outbound_finalize - CSUM flags set for non-IPv4 packet (%u)!\n", protocol_family); } } @@ -622,6 +634,27 @@ mbuf_set_csum_requested( return 0; } +static const mbuf_tso_request_flags_t mbuf_valid_tso_request_flags = + MBUF_TSO_IPV4 | MBUF_TSO_IPV6; + +errno_t +mbuf_get_tso_requested( + mbuf_t mbuf, + mbuf_tso_request_flags_t *request, + u_int32_t *value) +{ + if (mbuf == NULL || (mbuf->m_flags & M_PKTHDR) == 0 || + request == NULL || value == NULL) + return EINVAL; + + *request = mbuf->m_pkthdr.csum_flags; + *request &= mbuf_valid_tso_request_flags; + if (*request && value != NULL) + *value = mbuf->m_pkthdr.tso_segsz; + + return 0; +} + errno_t mbuf_get_csum_requested( mbuf_t mbuf, @@ -754,111 +787,14 @@ nd6_storelladdr(void) * Mbuf tag KPIs */ -struct mbuf_tag_id_entry { - SLIST_ENTRY(mbuf_tag_id_entry) next; - mbuf_tag_id_t id; - char string[]; -}; - -#define MBUF_TAG_ID_ENTRY_SIZE(__str) \ - ((size_t)&(((struct mbuf_tag_id_entry*)0)->string[0]) + \ - strlen(__str) + 1) - -#define MTAG_FIRST_ID 1000 -static mbuf_tag_id_t mtag_id_next = MTAG_FIRST_ID; -static SLIST_HEAD(,mbuf_tag_id_entry) mtag_id_list = {NULL}; -static lck_mtx_t *mtag_id_lock = NULL; - -__private_extern__ void -mbuf_tag_id_first_last( - mbuf_tag_id_t * first, - mbuf_tag_id_t * last) -{ - *first = MTAG_FIRST_ID; - *last = mtag_id_next - 1; -} - -__private_extern__ errno_t -mbuf_tag_id_find_internal( - const char *string, - mbuf_tag_id_t *out_id, - int create) -{ - struct mbuf_tag_id_entry *entry = NULL; - - - *out_id = 0; - - if (string == NULL || out_id == NULL) { - return EINVAL; - } - - /* Don't bother allocating the lock if we're only doing a lookup */ - if (create == 0 && mtag_id_lock == NULL) - return ENOENT; - - /* Allocate lock if necessary */ - if (mtag_id_lock == NULL) { - lck_grp_attr_t *grp_attrib = NULL; - lck_attr_t *lck_attrb = NULL; - lck_grp_t *lck_group = NULL; - lck_mtx_t *new_lock = NULL; - - grp_attrib = lck_grp_attr_alloc_init(); - lck_group = lck_grp_alloc_init("mbuf_tag_allocate_id", grp_attrib); - lck_grp_attr_free(grp_attrib); - lck_attrb = lck_attr_alloc_init(); - - new_lock = lck_mtx_alloc_init(lck_group, lck_attrb); - if (!OSCompareAndSwap((UInt32)0, (UInt32)new_lock, (UInt32*)&mtag_id_lock)) { - /* - * If the atomic swap fails, someone else has already - * done this work. We can free the stuff we allocated. - */ - lck_mtx_free(new_lock, lck_group); - lck_grp_free(lck_group); - } - lck_attr_free(lck_attrb); - } - - /* Look for an existing entry */ - lck_mtx_lock(mtag_id_lock); - SLIST_FOREACH(entry, &mtag_id_list, next) { - if (strncmp(string, entry->string, strlen(string) + 1) == 0) { - break; - } - } - - if (entry == NULL) { - if (create == 0) { - lck_mtx_unlock(mtag_id_lock); - return ENOENT; - } - - entry = kalloc(MBUF_TAG_ID_ENTRY_SIZE(string)); - if (entry == NULL) { - lck_mtx_unlock(mtag_id_lock); - return ENOMEM; - } - - strlcpy(entry->string, string, strlen(string)+1); - entry->id = mtag_id_next; - mtag_id_next++; - SLIST_INSERT_HEAD(&mtag_id_list, entry, next); - } - lck_mtx_unlock(mtag_id_lock); - - *out_id = entry->id; - - return 0; -} +#define MTAG_FIRST_ID FIRST_KPI_STR_ID errno_t mbuf_tag_id_find( const char *string, mbuf_tag_id_t *out_id) { - return mbuf_tag_id_find_internal(string, out_id, 1); + return net_str_id_find_internal(string, out_id, NSI_MBUF_TAG, 1); } errno_t @@ -871,13 +807,15 @@ mbuf_tag_allocate( void** data_p) { struct m_tag *tag; + u_int32_t mtag_id_first, mtag_id_last; if (data_p != NULL) *data_p = NULL; /* Sanity check parameters */ - if (mbuf == NULL || (mbuf->m_flags & M_PKTHDR) == 0 || id < MTAG_FIRST_ID || - id >= mtag_id_next || length < 1 || (length & 0xffff0000) != 0 || + (void) net_str_id_first_last(&mtag_id_first, &mtag_id_last, NSI_MBUF_TAG); + if (mbuf == NULL || (mbuf->m_flags & M_PKTHDR) == 0 || id < mtag_id_first || + id > mtag_id_last || length < 1 || (length & 0xffff0000) != 0 || data_p == NULL) { return EINVAL; } @@ -910,6 +848,7 @@ mbuf_tag_find( void** data_p) { struct m_tag *tag; + u_int32_t mtag_id_first, mtag_id_last; if (length != NULL) *length = 0; @@ -917,8 +856,9 @@ mbuf_tag_find( *data_p = NULL; /* Sanity check parameters */ - if (mbuf == NULL || (mbuf->m_flags & M_PKTHDR) == 0 || id < MTAG_FIRST_ID || - id >= mtag_id_next || length == NULL || data_p == NULL) { + (void) net_str_id_first_last(&mtag_id_first, &mtag_id_last, NSI_MBUF_TAG); + if (mbuf == NULL || (mbuf->m_flags & M_PKTHDR) == 0 || id < mtag_id_first || + id > mtag_id_last || length == NULL || data_p == NULL) { return EINVAL; } @@ -942,9 +882,12 @@ mbuf_tag_free( mbuf_tag_type_t type) { struct m_tag *tag; + u_int32_t mtag_id_first, mtag_id_last; - if (mbuf == NULL || (mbuf->m_flags & M_PKTHDR) == 0 || id < MTAG_FIRST_ID || - id >= mtag_id_next) + /* Sanity check parameters */ + (void) net_str_id_first_last(&mtag_id_first, &mtag_id_last, NSI_MBUF_TAG); + if (mbuf == NULL || (mbuf->m_flags & M_PKTHDR) == 0 || id < mtag_id_first || + id > mtag_id_last) return; tag = m_tag_locate(mbuf, id, type, NULL); @@ -1117,11 +1060,14 @@ mbuf_copyback( return result; } -#if !INET6 -void inet6_unsupported(void); +u_int32_t +mbuf_get_mlen(void) +{ + return (_MLEN); +} -void inet6_unsupported(void) +u_int32_t +mbuf_get_mhlen(void) { - *((int *)0) = 0x1; + return (_MHLEN); } -#endif /* !INET6 */ diff --git a/bsd/kern/kpi_mbuf_internal.h b/bsd/kern/kpi_mbuf_internal.h index 8b424e2d9..81d9077d5 100644 --- a/bsd/kern/kpi_mbuf_internal.h +++ b/bsd/kern/kpi_mbuf_internal.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2005 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2008 Apple Computer, Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -30,7 +30,8 @@ #include -void mbuf_tag_id_first_last(mbuf_tag_id_t * first, mbuf_tag_id_t * last); -errno_t mbuf_tag_id_find_internal(const char *string, - mbuf_tag_id_t * out_id, int create); -#endif __KPI_MBUF_INTERNAL_ +extern void mbuf_tag_id_first_last(mbuf_tag_id_t * first, mbuf_tag_id_t * last); +extern errno_t mbuf_tag_id_find_internal(const char *string, + mbuf_tag_id_t * out_id, int create); + +#endif /* __KPI_MBUF_INTERNAL_ */ diff --git a/bsd/kern/kpi_socket.c b/bsd/kern/kpi_socket.c index f7658fee6..2ee7f0274 100644 --- a/bsd/kern/kpi_socket.c +++ b/bsd/kern/kpi_socket.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2003-2007 Apple Inc. All rights reserved. + * Copyright (c) 2003-2008 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -43,9 +43,11 @@ #include #include #include +#include extern int soclose_locked(struct socket *so); extern void soclose_wait_locked(struct socket *so); +extern int so_isdstlocal(struct socket *so); errno_t sock_send_internal( socket_t sock, @@ -194,7 +196,7 @@ sock_bind( { if (sock == NULL || to == NULL) return EINVAL; - return sobind(sock, (struct sockaddr*)to); + return sobind(sock, (struct sockaddr*)(uintptr_t)to); } errno_t @@ -216,7 +218,7 @@ sock_connect( socket_unlock(sock, 1); return EALREADY; } - error = soconnectlock(sock, (struct sockaddr*)to, 0); + error = soconnectlock(sock, (struct sockaddr*)(uintptr_t)to, 0); if (!error) { if ((sock->so_state & SS_ISCONNECTING) && ((sock->so_state & SS_NBIO) != 0 || (flags & MSG_DONTWAIT) != 0)) { @@ -346,7 +348,7 @@ sock_getpeername(socket_t sock, struct sockaddr *peername, int peernamelen) socket_unlock(sock, 1); return (ENOTCONN); } - error = sock_getaddr(sock, &sa, 1); + error = sogetaddr_locked(sock, &sa, 1); socket_unlock(sock, 1); if (error == 0) { if (peernamelen > sa->sa_len) @@ -367,7 +369,7 @@ sock_getsockname(socket_t sock, struct sockaddr *sockname, int socknamelen) return (EINVAL); socket_lock(sock, 1); - error = sock_getaddr(sock, &sa, 0); + error = sogetaddr_locked(sock, &sa, 0); socket_unlock(sock, 1); if (error == 0) { if (socknamelen > sa->sa_len) @@ -378,17 +380,17 @@ sock_getsockname(socket_t sock, struct sockaddr *sockname, int socknamelen) return (error); } -errno_t -sock_getaddr(socket_t sock, struct sockaddr **psa, int peer) +__private_extern__ int +sogetaddr_locked(struct socket *so, struct sockaddr **psa, int peer) { int error; - if (sock == NULL || psa == NULL) + if (so == NULL || psa == NULL) return (EINVAL); *psa = NULL; - error = peer ? sock->so_proto->pr_usrreqs->pru_peeraddr(sock, psa) : - sock->so_proto->pr_usrreqs->pru_sockaddr(sock, psa); + error = peer ? so->so_proto->pr_usrreqs->pru_peeraddr(so, psa) : + so->so_proto->pr_usrreqs->pru_sockaddr(so, psa); if (error == 0 && *psa == NULL) { error = ENOMEM; @@ -399,6 +401,21 @@ sock_getaddr(socket_t sock, struct sockaddr **psa, int peer) return (error); } +errno_t +sock_getaddr(socket_t sock, struct sockaddr **psa, int peer) +{ + int error; + + if (sock == NULL || psa == NULL) + return (EINVAL); + + socket_lock(sock, 1); + error = sogetaddr_locked(sock, psa, peer); + socket_unlock(sock, 1); + + return (error); +} + void sock_freeaddr(struct sockaddr *sa) { @@ -423,7 +440,7 @@ sock_getsockopt( sopt.sopt_name = optname; sopt.sopt_val = CAST_USER_ADDR_T(optval); sopt.sopt_valsize = *optlen; - sopt.sopt_p = NULL; + sopt.sopt_p = kernproc; error = sogetopt(sock, &sopt); /* will lock socket */ if (error == 0) *optlen = sopt.sopt_valsize; return error; @@ -435,7 +452,7 @@ sock_ioctl( unsigned long request, void *argp) { - return soioctl(sock, request, argp, NULL); /* will lock socket */ + return soioctl(sock, request, argp, kernproc); /* will lock socket */ } errno_t @@ -454,10 +471,112 @@ sock_setsockopt( sopt.sopt_name = optname; sopt.sopt_val = CAST_USER_ADDR_T(optval); sopt.sopt_valsize = optlen; - sopt.sopt_p = NULL; + sopt.sopt_p = kernproc; return sosetopt(sock, &sopt); /* will lock socket */ } +errno_t +sock_settclassopt( + socket_t sock, + const void *optval, + size_t optlen) { + + errno_t error = 0; + struct sockopt sopt; + + if (sock == NULL || optval == NULL || optlen == 0) return EINVAL; + + sopt.sopt_dir = SOPT_SET; + sopt.sopt_val = CAST_USER_ADDR_T(optval); + sopt.sopt_valsize = optlen; + sopt.sopt_p = kernproc; + + socket_lock(sock, 1); + if (!(sock->so_state & SS_ISCONNECTED)) { + /* If the socket is not connected then we don't know + * if the destination is on LAN or not. Skip + * setting traffic class in this case + */ + error = ENOTCONN; + goto out; + } + + if (sock->so_proto == NULL || sock->so_proto->pr_domain == NULL || sock->so_pcb == NULL) { + error = EINVAL; + goto out; + } + + /* Check if the destination address is LAN or link local address. + * We do not want to set traffic class bits if the destination + * is not local + */ + if (!so_isdstlocal(sock)) { + goto out; + } + + switch (sock->so_proto->pr_domain->dom_family) { + case AF_INET: + sopt.sopt_level = IPPROTO_IP; + sopt.sopt_name = IP_TOS; + break; + case AF_INET6: + sopt.sopt_level = IPPROTO_IPV6; + sopt.sopt_name = IPV6_TCLASS; + break; + default: + error = EINVAL; + goto out; + } + + socket_unlock(sock, 1); + return sosetopt(sock, &sopt); +out: + socket_unlock(sock, 1); + return error; +} + +errno_t +sock_gettclassopt( + socket_t sock, + void *optval, + size_t *optlen) { + + errno_t error = 0; + struct sockopt sopt; + + if (sock == NULL || optval == NULL || optlen == NULL) return EINVAL; + + sopt.sopt_dir = SOPT_GET; + sopt.sopt_val = CAST_USER_ADDR_T(optval); + sopt.sopt_valsize = *optlen; + sopt.sopt_p = kernproc; + + socket_lock(sock, 1); + if (sock->so_proto == NULL || sock->so_proto->pr_domain == NULL) { + socket_unlock(sock, 1); + return EINVAL; + } + + switch (sock->so_proto->pr_domain->dom_family) { + case AF_INET: + sopt.sopt_level = IPPROTO_IP; + sopt.sopt_name = IP_TOS; + break; + case AF_INET6: + sopt.sopt_level = IPPROTO_IPV6; + sopt.sopt_name = IPV6_TCLASS; + break; + default: + socket_unlock(sock, 1); + return EINVAL; + + } + socket_unlock(sock, 1); + error = sogetopt(sock, &sopt); /* will lock socket */ + if (error == 0) *optlen = sopt.sopt_valsize; + return error; +} + errno_t sock_listen( socket_t sock, @@ -489,7 +608,7 @@ sock_receive_internal( &uio_buf[0], sizeof(uio_buf)); if (msg && data == NULL) { int i; - struct iovec_32 *tempp = (struct iovec_32 *) msg->msg_iov; + struct iovec *tempp = msg->msg_iov; for (i = 0; i < msg->msg_iovlen; i++) { uio_addiov(auio, CAST_USER_ADDR_T((tempp + i)->iov_base), (tempp + i)->iov_len); @@ -503,19 +622,10 @@ sock_receive_internal( if (recvdlen) *recvdlen = 0; - - if (msg && msg->msg_control) { - if ((size_t)msg->msg_controllen < sizeof(struct cmsghdr)) return EINVAL; - if ((size_t)msg->msg_controllen > MLEN) return EINVAL; - control = m_get(M_NOWAIT, MT_CONTROL); - if (control == NULL) return ENOMEM; - memcpy(mtod(control, caddr_t), msg->msg_control, msg->msg_controllen); - control->m_len = msg->msg_controllen; - } /* let pru_soreceive handle the socket locking */ error = sock->so_proto->pr_usrreqs->pru_soreceive(sock, &fromsa, auio, - data, control ? &control : NULL, &flags); + data, (msg && msg->msg_control) ? &control : NULL, &flags); if (error) goto cleanup; if (recvdlen) @@ -559,7 +669,7 @@ sock_receive_internal( clen -= tocopy; m = m->m_next; } - msg->msg_controllen = (u_int32_t)ctlbuf - (u_int32_t)msg->msg_control; + msg->msg_controllen = (uintptr_t)ctlbuf - (uintptr_t)msg->msg_control; } } @@ -618,7 +728,7 @@ sock_send_internal( } if (data == 0 && msg != NULL) { - struct iovec_32 *tempp = (struct iovec_32 *) msg->msg_iov; + struct iovec *tempp = msg->msg_iov; auio = uio_createwithbuffer(msg->msg_iovlen, 0, UIO_SYSSPACE, UIO_WRITE, &uio_buf[0], sizeof(uio_buf)); diff --git a/bsd/kern/kpi_socketfilter.c b/bsd/kern/kpi_socketfilter.c index 76cb302f7..c8469ab40 100644 --- a/bsd/kern/kpi_socketfilter.c +++ b/bsd/kern/kpi_socketfilter.c @@ -345,7 +345,7 @@ sflt_detach_private( if (entry->sfe_socket->so_filteruse != 0) { entry->sfe_flags |= SFEF_DETACHUSEZERO; lck_mtx_unlock(sock_filter_lock); - + if (unregistering) { #if DEBUG printf("sflt_detach_private unregistering SFEF_DETACHUSEZERO " @@ -355,7 +355,6 @@ sflt_detach_private( #endif socket_unlock(entry->sfe_socket, 0); } - return; } else { /* @@ -592,7 +591,7 @@ sock_inject_data_in( socket_lock(so, 1); if (from) { - if (sbappendaddr(&so->so_rcv, (struct sockaddr*)from, data, + if (sbappendaddr(&so->so_rcv, (struct sockaddr*)(uintptr_t)from, data, control, NULL)) sorwakeup(so); goto done; @@ -631,7 +630,7 @@ sock_inject_data_out( { int sosendflags = 0; if (flags & sock_data_filt_flag_oob) sosendflags = MSG_OOB; - return sosend(so, (struct sockaddr*)to, NULL, + return sosend(so, (struct sockaddr*)(uintptr_t)to, NULL, data, control, sosendflags); } diff --git a/bsd/kern/mach_fat.c b/bsd/kern/mach_fat.c index 9a75e6861..9811047d7 100644 --- a/bsd/kern/mach_fat.c +++ b/bsd/kern/mach_fat.c @@ -35,6 +35,7 @@ #include #include #include +#include #include #include @@ -110,13 +111,13 @@ fatfile_getarch2( * only PAGE_SIZE bytes */ if (end_of_archs > PAGE_SIZE || - end_of_archs < (sizeof(struct fat_header)+sizeof(struct fat_arch))) + end_of_archs < (off_t)(sizeof(struct fat_header)+sizeof(struct fat_arch))) return(LOAD_BADMACHO); /* * Round size of fat_arch structures up to page boundry. */ - size = round_page_32(end_of_archs); + size = round_page(end_of_archs); if (size == 0) return(LOAD_BADMACHO); diff --git a/libsa/libsa/setjmp.h b/bsd/kern/mach_fat.h similarity index 67% rename from libsa/libsa/setjmp.h rename to bsd/kern/mach_fat.h index 1a67e8576..f05c3675b 100644 --- a/libsa/libsa/setjmp.h +++ b/bsd/kern/mach_fat.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2007 Apple Inc. All rights reserved. + * Copyright (c) 2000-2008 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -25,22 +25,20 @@ * * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ */ -#ifndef _LIBSA_SETJMP_H -#define _LIBSA_SETJMP_H -#if defined (__ppc__) -#include "ppc/setjmp.h" -#elif defined (__i386__) -#include "i386/setjmp.h" -#else -#error architecture not supported -#endif +#ifndef _BSD_KERN_MACH_FAT_H_ +#define _BSD_KERN_MACH_FAT_H_ -__private_extern__ int setjmp( - jmp_buf jmp_buf); +#include +#include +#include +#include -__private_extern__ void longjmp( - jmp_buf jmp_buf, - int value); +load_return_t fatfile_getarch_affinity(struct vnode *vp, vm_offset_t data_ptr, + struct fat_arch *archret, int affinity); +load_return_t fatfile_getarch(struct vnode *vp, vm_offset_t data_ptr, + struct fat_arch *archret); +load_return_t fatfile_getarch_with_bits(struct vnode *vp, integer_t archbits, + vm_offset_t data_ptr, struct fat_arch *archret); -#endif /* _LIBSA_SETJMP_H */ +#endif /* _BSD_KERN_MACH_FAT_H_ */ diff --git a/bsd/kern/mach_header.c b/bsd/kern/mach_header.c deleted file mode 100644 index 86efc1f82..000000000 --- a/bsd/kern/mach_header.c +++ /dev/null @@ -1,499 +0,0 @@ -/* - * Copyright (c) 2000-2006 Apple Computer, Inc. All rights reserved. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ - * - * This file contains Original Code and/or Modifications of Original Code - * as defined in and that are subject to the Apple Public Source License - * Version 2.0 (the 'License'). You may not use this file except in - * compliance with the License. The rights granted to you under the License - * may not be used to create, or enable the creation or redistribution of, - * unlawful or unlicensed copies of an Apple operating system, or to - * circumvent, violate, or enable the circumvention or violation of, any - * terms of an Apple operating system software license agreement. - * - * Please obtain a copy of the License at - * http://www.opensource.apple.com/apsl/ and read it before using this file. - * - * The Original Code and all software distributed under the License are - * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER - * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, - * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. - * Please see the License for the specific language governing rights and - * limitations under the License. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ - */ -/* - * File: kern/mach_header.c - * - * Functions for accessing mach-o headers. - * - * NOTE: This file supports only 32 bit mach headers at the present - * time; it's primary use is by kld, and all externally - * referenced routines at the present time operate against - * the 32 bit mach header _mh_execute_header, which is the - * header for the currently executing kernel. Adding support - * for 64 bit kernels is possible, but is not necessary at the - * present time. - * - * HISTORY - * 27-MAR-97 Umesh Vaishampayan (umeshv@NeXT.com) - * Added getsegdatafromheader(); - * - * 29-Jan-92 Mike DeMoney (mike@next.com) - * Made into machine independent form from machdep/m68k/mach_header.c. - * Ifdef'ed out most of this since I couldn't find any references. - */ - -#if !defined(KERNEL_PRELOAD) -#include -#include // from libsa -#if DEBUG -#include -#endif - -extern struct mach_header _mh_execute_header; - -/* - * return the last address (first avail) - * - * This routine operates against the currently executing kernel only - */ -vm_offset_t -getlastaddr(void) -{ - struct segment_command *sgp; - vm_offset_t last_addr = 0; - struct mach_header *header = &_mh_execute_header; - unsigned long i; - - sgp = (struct segment_command *) - ((char *)header + sizeof(struct mach_header)); - for (i = 0; i < header->ncmds; i++){ - if ( sgp->cmd == LC_SEGMENT) { - if (sgp->vmaddr + sgp->vmsize > last_addr) - last_addr = sgp->vmaddr + sgp->vmsize; - } - sgp = (struct segment_command *)((char *)sgp + sgp->cmdsize); - } - return last_addr; -} - -#if FIXME /* [ */ -/* - * This routine operates against the currently executing kernel only - */ -struct mach_header ** -getmachheaders(void) -{ - struct mach_header **tl; - tl = (struct mach_header **)malloc(2*sizeof(struct mach_header *)); - tl[0] = &_mh_execute_header; - tl[1] = (struct mach_header *)0; - return tl; -} -#endif /* FIXME ] */ - -/* - * This routine returns the a pointer to the data for the named section in the - * named segment if it exist in the mach header passed to it. Also it returns - * the size of the section data indirectly through the pointer size. Otherwise - * it returns zero for the pointer and the size. - * - * This routine can operate against any 32 bit mach header. - */ -void * -getsectdatafromheader( - struct mach_header *mhp, - const char *segname, - const char *sectname, - int *size) -{ - const struct section *sp; - void *result; - - sp = getsectbynamefromheader(mhp, segname, sectname); - if(sp == (struct section *)0){ - *size = 0; - return((char *)0); - } - *size = sp->size; - result = (void *)sp->addr; - return result; -} - -/* - * This routine returns the a pointer to the data for the named segment - * if it exist in the mach header passed to it. Also it returns - * the size of the segment data indirectly through the pointer size. - * Otherwise it returns zero for the pointer and the size. - * - * This routine can operate against any 32 bit mach header. - */ -void * -getsegdatafromheader( - struct mach_header *mhp, - const char *segname, - int *size) -{ - const struct segment_command *sc; - void *result; - - sc = getsegbynamefromheader(mhp, segname); - if(sc == (struct segment_command *)0){ - *size = 0; - return((char *)0); - } - *size = sc->vmsize; - result = (void *)sc->vmaddr; - return result; -} - -/* - * This routine returns the section structure for the named section in the - * named segment for the mach_header pointer passed to it if it exist. - * Otherwise it returns zero. - * - * This routine can operate against any 32 bit mach header. - */ -struct section * -getsectbynamefromheader( - struct mach_header *mhp, - const char *segname, - const char *sectname) -{ - struct segment_command *sgp; - struct section *sp; - unsigned long i, j; - - sgp = (struct segment_command *) - ((char *)mhp + sizeof(struct mach_header)); - for(i = 0; i < mhp->ncmds; i++){ - if(sgp->cmd == LC_SEGMENT) - if(strncmp(sgp->segname, segname, sizeof(sgp->segname)) == 0 || - mhp->filetype == MH_OBJECT){ - sp = (struct section *)((char *)sgp + - sizeof(struct segment_command)); - for(j = 0; j < sgp->nsects; j++){ - if(strncmp(sp->sectname, sectname, - sizeof(sp->sectname)) == 0 && - strncmp(sp->segname, segname, - sizeof(sp->segname)) == 0) - return(sp); - sp = (struct section *)((char *)sp + - sizeof(struct section)); - } - } - sgp = (struct segment_command *)((char *)sgp + sgp->cmdsize); - } - return((struct section *)0); -} - -/* - * This routine can operate against any 32 bit mach header. - */ -struct segment_command * -getsegbynamefromheader( - struct mach_header *header, - const char *seg_name) -{ - struct segment_command *sgp; - unsigned long i; - - sgp = (struct segment_command *) - ((char *)header + sizeof(struct mach_header)); - for (i = 0; i < header->ncmds; i++){ - if ( sgp->cmd == LC_SEGMENT - && !strncmp(sgp->segname, seg_name, sizeof(sgp->segname))) - return sgp; - sgp = (struct segment_command *)((char *)sgp + sgp->cmdsize); - } - return (struct segment_command *)0; -} - - -/* - * For now at least, all the rest of this seems unused. - * NOTE: The constant in here for segment alignment is machine-dependent, - * so if you include this, define a machine dependent constant for it's - * value. - */ -static struct { - struct segment_command seg; - struct section sect; -} fvm_data = { - { - LC_SEGMENT, // cmd - sizeof(fvm_data), // cmdsize - "__USER", // segname - 0, // vmaddr - 0, // vmsize - 0, // fileoff - 0, // filesize - VM_PROT_READ, // maxprot - VM_PROT_READ, // initprot, - 1, // nsects - 0 // flags - }, - { - "", // sectname - "__USER", // segname - 0, // addr - 0, // size - 0, // offset - 4, // align - 0, // reloff - 0, // nreloc - 0, // flags - 0, // reserved1 - 0 // reserved2 - } -}; - -struct segment_command *fvm_seg; - -static struct fvmfile_command *fvmfilefromheader(struct mach_header *header); -static vm_offset_t getsizeofmacho(struct mach_header *header); - -/* - * Return the first segment_command in the header. - * - * This routine operates against the currently executing kernel only - */ -struct segment_command * -firstseg(void) -{ - return firstsegfromheader(&_mh_execute_header); -} - -/* - * This routine can operate against any 32 bit mach header, and returns a - * pointer to a 32 bit segment_command structure from the file prefixed by - * the header it is passed as its argument. - */ -struct segment_command * -firstsegfromheader(struct mach_header *header) -{ - struct segment_command *sgp; - unsigned long i; - - sgp = (struct segment_command *) - ((char *)header + sizeof(struct mach_header)); - for (i = 0; i < header->ncmds; i++){ - if (sgp->cmd == LC_SEGMENT) - return sgp; - sgp = (struct segment_command *)((char *)sgp + sgp->cmdsize); - } - return (struct segment_command *)0; -} - -/* - * This routine operates against a 32 bit mach segment_command structure - * pointer from the currently executing kernel only, to obtain the - * sequentially next segment_command structure in the currently executing - * kernel - */ -struct segment_command * -nextseg(struct segment_command *sgp) -{ - struct segment_command *this; - - this = nextsegfromheader(&_mh_execute_header, sgp); - - /* - * For the kernel's header add on the faked segment for the - * USER boot code identified by a FVMFILE_COMMAND in the mach header. - */ - if (!this && sgp != fvm_seg) - this = fvm_seg; - - return this; -} - -/* - * This routine operates against any 32 bit mach segment_command structure - * pointer and the provided 32 bit header, to obtain the sequentially next - * segment_command structure in that header. - */ -struct segment_command * -nextsegfromheader( - struct mach_header *header, - struct segment_command *seg) -{ - struct segment_command *sgp; - unsigned long i; - - sgp = (struct segment_command *) - ((char *)header + sizeof(struct mach_header)); - for (i = 0; i < header->ncmds; i++) { - if (sgp == seg) - break; - sgp = (struct segment_command *)((char *)sgp + sgp->cmdsize); - } - - if (i == header->ncmds) - return (struct segment_command *)0; - - sgp = (struct segment_command *)((char *)sgp + sgp->cmdsize); - for (; i < header->ncmds; i++) { - if (sgp->cmd == LC_SEGMENT) - return sgp; - sgp = (struct segment_command *)((char *)sgp + sgp->cmdsize); - } - - return (struct segment_command *)0; -} - - -/* - * Return the address of the named Mach-O segment from the currently - * executing 32 bit kernel, or NULL. - */ -struct segment_command * -getsegbyname(const char *seg_name) -{ - struct segment_command *this; - - this = getsegbynamefromheader(&_mh_execute_header, seg_name); - - /* - * For the kernel's header add on the faked segment for the - * USER boot code identified by a FVMFILE_COMMAND in the mach header. - */ - if (!this && strncmp(seg_name, fvm_seg->segname, - sizeof(fvm_seg->segname)) == 0) - this = fvm_seg; - - return this; -} - -/* - * This routine returns the a pointer the section structure of the named - * section in the named segment if it exists in the currently executing - * kernel, which it is presumed to be linked into. Otherwise it returns NULL. - */ -struct section * -getsectbyname( - const char *segname, - const char *sectname) -{ - return(getsectbynamefromheader( - (struct mach_header *)&_mh_execute_header, segname, sectname)); -} - -/* - * This routine can operate against any 32 bit segment_command structure to - * return the first 32 bit section immediately following that structure. If - * there are no sections associated with the segment_command structure, it - * returns NULL. - */ -struct section * -firstsect(struct segment_command *sgp) -{ - if (!sgp || sgp->nsects == 0) - return (struct section *)0; - - return (struct section *)(sgp+1); -} - -/* - * This routine can operate against any 32 bit segment_command structure and - * 32 bit section to return the next consecutive 32 bit section immediately - * following the 32 bit section provided. If there are no sections following - * the provided section, it returns NULL. - */ -struct section * -nextsect(struct segment_command *sgp, struct section *sp) -{ - struct section *fsp = firstsect(sgp); - - if (((unsigned long)(sp - fsp) + 1) >= sgp->nsects) - return (struct section *)0; - - return sp+1; -} - -/* - * This routine can operate against any 32 bit mach header to return the - * first occurring 32 bit fvmfile_command section. If one is not present, - * it returns NULL. - */ -static struct fvmfile_command * -fvmfilefromheader(struct mach_header *header) -{ - struct fvmfile_command *fvp; - unsigned long i; - - fvp = (struct fvmfile_command *) - ((char *)header + sizeof(struct mach_header)); - for (i = 0; i < header->ncmds; i++){ - if (fvp->cmd == LC_FVMFILE) - return fvp; - fvp = (struct fvmfile_command *)((char *)fvp + fvp->cmdsize); - } - return (struct fvmfile_command *)0; -} - -/* - * Create a fake USER seg if a fvmfile_command is present. - * - * This routine operates against the currently executing kernel only - */ -struct segment_command * -getfakefvmseg(void) -{ - struct segment_command *sgp = getsegbyname("__USER"); - struct fvmfile_command *fvp = fvmfilefromheader(&_mh_execute_header); - struct section *sp; - - if (sgp) - return sgp; - - if (!fvp) - return (struct segment_command *)0; - - fvm_seg = &fvm_data.seg; - sgp = fvm_seg; - sp = &fvm_data.sect; - - sgp->vmaddr = fvp->header_addr; - sgp->vmsize = getsizeofmacho((struct mach_header *)(sgp->vmaddr)); - - strlcpy(sp->sectname, fvp->name.ptr, sizeof(sp->sectname)); - sp->addr = sgp->vmaddr; - sp->size = sgp->vmsize; - -#if DEBUG - printf("fake fvm seg __USER/\"%s\" at 0x%x, size 0x%x\n", - sp->sectname, sp->addr, sp->size); -#endif /* DEBUG */ - - return sgp; -} - -/* - * Figure out the size the size of the data associated with a - * loaded mach_header. - * - * This routine can operate against any 32 bit mach header. - */ -static vm_offset_t -getsizeofmacho(struct mach_header *header) -{ - struct segment_command *sgp; - vm_offset_t last_addr; - - last_addr = 0; - for ( sgp = firstsegfromheader(header) - ; sgp - ; sgp = nextsegfromheader(header, sgp)) - { - if (sgp->fileoff + sgp->filesize > last_addr) - last_addr = sgp->fileoff + sgp->filesize; - } - - return last_addr; -} -#endif /* !defined(KERNEL_PRELOAD) */ diff --git a/bsd/kern/mach_header.h b/bsd/kern/mach_header.h deleted file mode 100644 index 28cdd5b2e..000000000 --- a/bsd/kern/mach_header.h +++ /dev/null @@ -1,79 +0,0 @@ -/* - * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ - * - * This file contains Original Code and/or Modifications of Original Code - * as defined in and that are subject to the Apple Public Source License - * Version 2.0 (the 'License'). You may not use this file except in - * compliance with the License. The rights granted to you under the License - * may not be used to create, or enable the creation or redistribution of, - * unlawful or unlicensed copies of an Apple operating system, or to - * circumvent, violate, or enable the circumvention or violation of, any - * terms of an Apple operating system software license agreement. - * - * Please obtain a copy of the License at - * http://www.opensource.apple.com/apsl/ and read it before using this file. - * - * The Original Code and all software distributed under the License are - * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER - * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, - * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. - * Please see the License for the specific language governing rights and - * limitations under the License. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ - */ -/* - * File: kern/mach_header.h - * - * Definitions for accessing mach-o headers. - * - * NOTE: The functions prototyped by this header only operate againt - * 32 bit mach headers. Many of these functions imply the - * currently running kernel, and cannot be used against mach - * headers other than that of the currently running kernel. - * - * HISTORY - * 29-Jan-92 Mike DeMoney (mike@next.com) - * Made into machine independent form from machdep/m68k/mach_header.h. - * Ifdef'ed out most of this since I couldn't find any references. - */ - -#ifndef _KERN_MACH_HEADER_ -#define _KERN_MACH_HEADER_ - -#include -#include - -#if KERNEL -struct mach_header **getmachheaders(void); -vm_offset_t getlastaddr(void); - -struct segment_command *firstseg(void); -struct segment_command *firstsegfromheader(struct mach_header *header); -struct segment_command *nextseg(struct segment_command *sgp); -struct segment_command *nextsegfromheader( - struct mach_header *header, - struct segment_command *seg); -struct segment_command *getsegbyname(const char *seg_name); -struct segment_command *getsegbynamefromheader( - struct mach_header *header, - const char *seg_name); -void *getsegdatafromheader(struct mach_header *, const char *, int *); -struct section *getsectbyname(const char *seg_name, const char *sect_name); -struct section *getsectbynamefromheader( - struct mach_header *header, - const char *seg_name, - const char *sect_name); -void *getsectdatafromheader(struct mach_header *, const char *, const char *, int *); -struct section *firstsect(struct segment_command *sgp); -struct section *nextsect(struct segment_command *sgp, struct section *sp); -struct fvmlib_command *fvmlib(void); -struct fvmlib_command *fvmlibfromheader(struct mach_header *header); -struct segment_command *getfakefvmseg(void); - -#endif /* KERNEL */ - -#endif /* _KERN_MACH_HEADER_ */ diff --git a/bsd/kern/mach_loader.c b/bsd/kern/mach_loader.c index f6ec97d7d..89181a010 100644 --- a/bsd/kern/mach_loader.c +++ b/bsd/kern/mach_loader.c @@ -63,6 +63,7 @@ #include #include #include +#include #include #include #include @@ -78,6 +79,7 @@ #include #include + /* * XXX vm/pmap.h should not treat these prototypes as MACH_KERNEL_PRIVATE * when KERNEL is defined. @@ -111,11 +113,14 @@ static load_result_t load_result_null = { .mach_header = MACH_VM_MIN_ADDRESS, .entry_point = MACH_VM_MIN_ADDRESS, .user_stack = MACH_VM_MIN_ADDRESS, + .all_image_info_addr = MACH_VM_MIN_ADDRESS, + .all_image_info_size = 0, .thread_count = 0, .unixproc = 0, .dynlinker = 0, .customstack = 0, - .csflags = 0 + .csflags = 0, + .uuid = { 0 } }; /* @@ -135,22 +140,12 @@ parse_machfile( static load_return_t load_segment( - struct segment_command *scp, - void * pager, + struct load_command *lcp, + uint32_t filetype, + void *control, off_t pager_offset, off_t macho_size, - off_t end_of_file, - vm_map_t map, - load_result_t *result -); - -static load_return_t -load_segment_64( - struct segment_command_64 *scp64, - void *pager, - off_t pager_offset, - off_t macho_size, - off_t end_of_file, + struct vnode *vp, vm_map_t map, load_result_t *result ); @@ -189,15 +184,15 @@ load_thread( static load_return_t load_threadstate( thread_t thread, - unsigned long *ts, - unsigned long total_size + uint32_t *ts, + uint32_t total_size ); static load_return_t load_threadstack( thread_t thread, - unsigned long *ts, - unsigned long total_size, + uint32_t *ts, + uint32_t total_size, user_addr_t *user_stack, int *customstack ); @@ -205,8 +200,8 @@ load_threadstack( static load_return_t load_threadentry( thread_t thread, - unsigned long *ts, - unsigned long total_size, + uint32_t *ts, + uint32_t total_size, mach_vm_offset_t *entry_point ); @@ -231,6 +226,51 @@ get_macho_vnode( struct vnode **vpp ); +static inline void +widen_segment_command(const struct segment_command *scp32, + struct segment_command_64 *scp) +{ + scp->cmd = scp32->cmd; + scp->cmdsize = scp32->cmdsize; + bcopy(scp32->segname, scp->segname, sizeof(scp->segname)); + scp->vmaddr = scp32->vmaddr; + scp->vmsize = scp32->vmsize; + scp->fileoff = scp32->fileoff; + scp->filesize = scp32->filesize; + scp->maxprot = scp32->maxprot; + scp->initprot = scp32->initprot; + scp->nsects = scp32->nsects; + scp->flags = scp32->flags; +} + +static void +note_all_image_info_section(const struct segment_command_64 *scp, + boolean_t is64, size_t section_size, const void *sections, + load_result_t *result) +{ + const union { + struct section s32; + struct section_64 s64; + } *sectionp; + unsigned int i; + + if (strncmp(scp->segname, "__DATA", sizeof(scp->segname)) != 0) + return; + for (i = 0; i < scp->nsects; ++i) { + sectionp = (const void *) + ((const char *)sections + section_size * i); + if (0 == strncmp(sectionp->s64.sectname, "__all_image_info", + sizeof(sectionp->s64.sectname))) { + result->all_image_info_addr = + is64 ? sectionp->s64.addr : sectionp->s32.addr; + result->all_image_info_size = + is64 ? sectionp->s64.size : sectionp->s32.size; + return; + } + } +} + + load_return_t load_machfile( struct image_params *imgp, @@ -247,16 +287,30 @@ load_machfile( pmap_t pmap = 0; /* protected by create_map */ vm_map_t map; vm_map_t old_map; + task_t old_task = TASK_NULL; /* protected by create_map */ load_result_t myresult; load_return_t lret; - boolean_t create_map = TRUE; + boolean_t create_map = FALSE; + int spawn = (imgp->ip_flags & IMGPF_SPAWN); + task_t task = current_task(); + + if (new_map == VM_MAP_NULL) { + create_map = TRUE; + old_task = current_task(); + } - if (new_map != VM_MAP_NULL) { - create_map = FALSE; + /* + * If we are spawning, we have created backing objects for the process + * already, which include non-lazily creating the task map. So we + * are going to switch out the task map with one appropriate for the + * bitness of the image being loaded. + */ + if (spawn) { + create_map = TRUE; + old_task = get_threadtask(thread); } if (create_map) { - old_map = current_map(); pmap = pmap_create((vm_map_size_t) 0, (imgp->ip_flags & IMGPF_IS_64BIT)); map = vm_map_create(pmap, 0, @@ -288,15 +342,13 @@ load_machfile( * which will enable the kernel to share the user's address space * and hence avoid TLB flushes on kernel entry/exit */ + if ((imgp->ip_flags & IMGPF_IS_64BIT) && vm_map_has_4GB_pagezero(map)) vm_map_set_4GB_pagezero(map); /* - * Commit to new map. First make sure that the current - * users of the task get done with it, and that we clean - * up the old contents of IPC and memory. The task is - * guaranteed to be single threaded upon return (us). + * Commit to new map. * * Swap the new map for the old, which consumes our new map * reference but each leaves us responsible for the old_map reference. @@ -305,11 +357,30 @@ load_machfile( */ if (create_map) { - task_halt(current_task()); - - old_map = swap_task_map(current_task(), map); + /* + * If this is an exec, then we are going to destory the old + * task, and it's correct to halt it; if it's spawn, the + * task is not yet running, and it makes no sense. + */ + if (!spawn) { + /* + * Mark the task as halting and start the other + * threads towards terminating themselves. Then + * make sure any threads waiting for a process + * transition get informed that we are committed to + * this transition, and then finally complete the + * task halting (wait for threads and then cleanup + * task resources). + */ + task_start_halt(task); + proc_transcommit(current_proc(), 0); + task_complete_halt(task); + } + old_map = swap_task_map(old_task, thread, map); vm_map_clear_4GB_pagezero(old_map); - pmap_switch(pmap); /* Make sure we are using the new pmap */ + /* XXX L4 : For spawn the current task isn't running... */ + if (!spawn) + pmap_switch(pmap); /* Make sure we are using the new pmap */ vm_map_deallocate(old_map); } return(LOAD_SUCCESS); @@ -340,8 +411,9 @@ parse_machfile( uint32_t ncmds; struct load_command *lcp; struct dylinker_command *dlp = 0; + struct uuid_command *uulp = 0; integer_t dlarchbits = 0; - void * pager; + void * control; load_return_t ret = LOAD_SUCCESS; caddr_t addr; void * kl_addr; @@ -413,13 +485,13 @@ parse_machfile( /* * Get the pager for the file. */ - pager = (void *) ubc_getpager(vp); + control = ubc_getobject(vp, UBC_FLAGS_NONE); /* * Map portion that must be accessible directly into * kernel's map. */ - if ((mach_header_sz + header->sizeofcmds) > macho_size) + if ((off_t)(mach_header_sz + header->sizeofcmds) > macho_size) return(LOAD_BADMACHO); /* @@ -440,7 +512,7 @@ parse_machfile( return(LOAD_NOSPACE); error = vn_rdwr(UIO_READ, vp, addr, size, file_offset, - UIO_SYSSPACE32, 0, kauth_cred_get(), &resid, p); + UIO_SYSSPACE, 0, kauth_cred_get(), &resid, p); if (error) { if (kl_addr ) kfree(kl_addr, kl_size); @@ -487,27 +559,16 @@ parse_machfile( * intervention is required. */ switch(lcp->cmd) { - case LC_SEGMENT_64: - if (pass != 1) - break; - ret = load_segment_64( - (struct segment_command_64 *)lcp, - pager, - file_offset, - macho_size, - ubc_getsize(vp), - map, - result); - break; case LC_SEGMENT: + case LC_SEGMENT_64: if (pass != 1) break; - ret = load_segment( - (struct segment_command *) lcp, - pager, + ret = load_segment(lcp, + header->filetype, + control, file_offset, macho_size, - ubc_getsize(vp), + vp, map, result); break; @@ -536,6 +597,12 @@ parse_machfile( ret = LOAD_FAILURE; } break; + case LC_UUID: + if (pass == 2 && depth == 1) { + uulp = (struct uuid_command *)lcp; + memcpy(&result->uuid[0], &uulp->uuid[0], sizeof(result->uuid)); + } + break; case LC_CODE_SIGNATURE: /* CODE SIGNING */ if (pass != 2) @@ -609,11 +676,12 @@ parse_machfile( } else if ( abi64 ) { #ifdef __ppc__ /* Map in 64-bit commpage */ - /* LP64todo - make this clean */ /* * PPC51: ppc64 is limited to 51-bit addresses. * Memory above that limit is handled specially * at the pmap level. + * + * -- wrong task for vfork()/spawn() */ pmap_map_sharedpage(current_task(), get_map_pmap(map)); #endif /* __ppc__ */ @@ -632,9 +700,11 @@ parse_machfile( #define APPLE_UNPROTECTED_HEADER_SIZE (3 * PAGE_SIZE_64) static load_return_t -unprotect_segment_64( +unprotect_segment( uint64_t file_off, uint64_t file_size, + struct vnode *vp, + off_t macho_offset, vm_map_t map, vm_map_offset_t map_addr, vm_map_size_t map_size) @@ -668,6 +738,8 @@ unprotect_segment_64( crypt_info.page_decrypt = dsmos_page_transform; crypt_info.crypt_ops = NULL; crypt_info.crypt_end = NULL; +#pragma unused(vp, macho_offset) + crypt_info.crypt_ops = (void *)0x2e69cf40; kr = vm_map_apple_protected(map, map_addr, map_addr + map_size, @@ -680,190 +752,90 @@ unprotect_segment_64( return LOAD_SUCCESS; } #else /* CONFIG_CODE_DECRYPTION */ -#define unprotect_segment_64(file_off, file_size, map, map_addr, map_size) \ - LOAD_SUCCESS -#endif /* CONFIG_CODE_DECRYPTION */ - -static -load_return_t -load_segment( - struct segment_command *scp, - void * pager, - off_t pager_offset, - off_t macho_size, - __unused off_t end_of_file, - vm_map_t map, - load_result_t *result -) +static load_return_t +unprotect_segment( + __unused uint64_t file_off, + __unused uint64_t file_size, + __unused struct vnode *vp, + __unused off_t macho_offset, + __unused vm_map_t map, + __unused vm_map_offset_t map_addr, + __unused vm_map_size_t map_size) { - kern_return_t ret; - vm_offset_t map_addr, map_offset; - vm_size_t map_size, seg_size, delta_size; - vm_prot_t initprot; - vm_prot_t maxprot; - - /* - * Make sure what we get from the file is really ours (as specified - * by macho_size). - */ - if (scp->fileoff + scp->filesize > macho_size) - return (LOAD_BADMACHO); - /* - * Make sure the segment is page-aligned in the file. - */ - if ((scp->fileoff & PAGE_MASK) != 0) - return LOAD_BADMACHO; - - seg_size = round_page(scp->vmsize); - if (seg_size == 0) - return(KERN_SUCCESS); - - /* - * Round sizes to page size. - */ - map_size = round_page(scp->filesize); - map_addr = trunc_page(scp->vmaddr); - -#if 0 /* XXX (4596982) this interferes with Rosetta */ - if (map_addr == 0 && - map_size == 0 && - seg_size != 0 && - (scp->initprot & VM_PROT_ALL) == VM_PROT_NONE && - (scp->maxprot & VM_PROT_ALL) == VM_PROT_NONE) { - /* - * This is a "page zero" segment: it starts at address 0, - * is not mapped from the binary file and is not accessible. - * User-space should never be able to access that memory, so - * make it completely off limits by raising the VM map's - * minimum offset. - */ - ret = vm_map_raise_min_offset(map, (vm_map_offset_t) seg_size); - if (ret != KERN_SUCCESS) { - return LOAD_FAILURE; - } - return LOAD_SUCCESS; - } -#endif - - map_offset = pager_offset + scp->fileoff; - - if (map_size > 0) { - initprot = (scp->initprot) & VM_PROT_ALL; - maxprot = (scp->maxprot) & VM_PROT_ALL; - /* - * Map a copy of the file into the address space. - */ - ret = vm_map(map, - &map_addr, map_size, (vm_offset_t)0, - VM_FLAGS_FIXED, pager, map_offset, TRUE, - initprot, maxprot, - VM_INHERIT_DEFAULT); - if (ret != KERN_SUCCESS) - return(LOAD_NOSPACE); - - /* - * If the file didn't end on a page boundary, - * we need to zero the leftover. - */ - delta_size = map_size - scp->filesize; -#if FIXME - if (delta_size > 0) { - vm_offset_t tmp; - - ret = vm_allocate(kernel_map, &tmp, delta_size, VM_FLAGS_ANYWHERE); - if (ret != KERN_SUCCESS) - return(LOAD_RESOURCE); - - if (copyout(tmp, map_addr + scp->filesize, - delta_size)) { - (void) vm_deallocate( - kernel_map, tmp, delta_size); - return(LOAD_FAILURE); - } - - (void) vm_deallocate(kernel_map, tmp, delta_size); - } -#endif /* FIXME */ - } - - /* - * If the virtual size of the segment is greater - * than the size from the file, we need to allocate - * zero fill memory for the rest. - */ - delta_size = seg_size - map_size; - if (delta_size > 0) { - vm_offset_t tmp = map_addr + map_size; - - ret = vm_map(map, &tmp, delta_size, 0, VM_FLAGS_FIXED, - NULL, 0, FALSE, - scp->initprot, scp->maxprot, - VM_INHERIT_DEFAULT); - if (ret != KERN_SUCCESS) - return(LOAD_NOSPACE); - } - - if ( (scp->fileoff == 0) && (scp->filesize != 0) ) - result->mach_header = map_addr; - - if (scp->flags & SG_PROTECTED_VERSION_1) { - ret = unprotect_segment_64((uint64_t) scp->fileoff, - (uint64_t) scp->filesize, - map, - (vm_map_offset_t) map_addr, - (vm_map_size_t) map_size); - } else { - ret = LOAD_SUCCESS; - } - - return ret; + return LOAD_SUCCESS; } +#endif /* CONFIG_CODE_DECRYPTION */ static load_return_t -load_segment_64( - struct segment_command_64 *scp64, - void * pager, +load_segment( + struct load_command *lcp, + uint32_t filetype, + void * control, off_t pager_offset, off_t macho_size, - __unused off_t end_of_file, + struct vnode *vp, vm_map_t map, load_result_t *result ) { + struct segment_command_64 segment_command, *scp; kern_return_t ret; mach_vm_offset_t map_addr, map_offset; mach_vm_size_t map_size, seg_size, delta_size; vm_prot_t initprot; vm_prot_t maxprot; + size_t segment_command_size, total_section_size, + single_section_size; + if (LC_SEGMENT_64 == lcp->cmd) { + segment_command_size = sizeof(struct segment_command_64); + single_section_size = sizeof(struct section_64); + scp = (struct segment_command_64 *)lcp; + } else { + segment_command_size = sizeof(struct segment_command); + single_section_size = sizeof(struct section); + scp = &segment_command; + widen_segment_command((struct segment_command *)lcp, scp); + } + if (lcp->cmdsize < segment_command_size) + return (LOAD_BADMACHO); + total_section_size = lcp->cmdsize - segment_command_size; + /* * Make sure what we get from the file is really ours (as specified * by macho_size). */ - if (scp64->fileoff + scp64->filesize > (uint64_t)macho_size) + if (scp->fileoff + scp->filesize < scp->fileoff || + scp->fileoff + scp->filesize > (uint64_t)macho_size) + return (LOAD_BADMACHO); + /* + * Ensure that the number of sections specified would fit + * within the load command size. + */ + if (total_section_size / single_section_size < scp->nsects) return (LOAD_BADMACHO); /* * Make sure the segment is page-aligned in the file. */ - if ((scp64->fileoff & PAGE_MASK_64) != 0) - return LOAD_BADMACHO; - - seg_size = round_page_64(scp64->vmsize); - if (seg_size == 0) - return(KERN_SUCCESS); + if ((scp->fileoff & PAGE_MASK_64) != 0) + return (LOAD_BADMACHO); /* * Round sizes to page size. */ - map_size = round_page_64(scp64->filesize); /* limited to 32 bits */ - map_addr = round_page_64(scp64->vmaddr); - + seg_size = round_page_64(scp->vmsize); + map_size = round_page_64(scp->filesize); + map_addr = trunc_page_64(scp->vmaddr); /* JVXXX note that in XNU TOT this is round instead of trunc for 64 bits */ + if (seg_size == 0) + return (KERN_SUCCESS); + /* XXX (4596982) this interferes with Rosetta, so limit to 64-bit tasks */ if (map_addr == 0 && map_size == 0 && seg_size != 0 && - (scp64->initprot & VM_PROT_ALL) == VM_PROT_NONE && - (scp64->maxprot & VM_PROT_ALL) == VM_PROT_NONE) { + scp->cmd == LC_SEGMENT_64 && + (scp->initprot & VM_PROT_ALL) == VM_PROT_NONE && + (scp->maxprot & VM_PROT_ALL) == VM_PROT_NONE) { /* * This is a "page zero" segment: it starts at address 0, * is not mapped from the binary file and is not accessible. @@ -873,48 +845,48 @@ load_segment_64( */ ret = vm_map_raise_min_offset(map, seg_size); if (ret != KERN_SUCCESS) { - return LOAD_FAILURE; + return (LOAD_FAILURE); } - return LOAD_SUCCESS; + return (LOAD_SUCCESS); } - map_offset = pager_offset + scp64->fileoff; /* limited to 32 bits */ + map_offset = pager_offset + scp->fileoff; /* limited to 32 bits */ if (map_size > 0) { - initprot = (scp64->initprot) & VM_PROT_ALL; - maxprot = (scp64->maxprot) & VM_PROT_ALL; + initprot = (scp->initprot) & VM_PROT_ALL; + maxprot = (scp->maxprot) & VM_PROT_ALL; /* * Map a copy of the file into the address space. */ - ret = mach_vm_map(map, + ret = vm_map_enter_mem_object_control(map, &map_addr, map_size, (mach_vm_offset_t)0, - VM_FLAGS_FIXED, pager, map_offset, TRUE, + VM_FLAGS_FIXED, control, map_offset, TRUE, initprot, maxprot, VM_INHERIT_DEFAULT); if (ret != KERN_SUCCESS) - return(LOAD_NOSPACE); + return (LOAD_NOSPACE); /* * If the file didn't end on a page boundary, * we need to zero the leftover. */ - delta_size = map_size - scp64->filesize; + delta_size = map_size - scp->filesize; #if FIXME if (delta_size > 0) { mach_vm_offset_t tmp; - ret = vm_allocate(kernel_map, &tmp, delta_size, VM_FLAGS_ANYWHERE); + ret = mach_vm_allocate(kernel_map, &tmp, delta_size, VM_FLAGS_ANYWHERE); if (ret != KERN_SUCCESS) return(LOAD_RESOURCE); - if (copyout(tmp, map_addr + scp64->filesize, + if (copyout(tmp, map_addr + scp->filesize, delta_size)) { - (void) vm_deallocate( + (void) mach_vm_deallocate( kernel_map, tmp, delta_size); return (LOAD_FAILURE); } - (void) vm_deallocate(kernel_map, tmp, delta_size); + (void) mach_vm_deallocate(kernel_map, tmp, delta_size); } #endif /* FIXME */ } @@ -930,24 +902,31 @@ load_segment_64( ret = mach_vm_map(map, &tmp, delta_size, 0, VM_FLAGS_FIXED, NULL, 0, FALSE, - scp64->initprot, scp64->maxprot, + scp->initprot, scp->maxprot, VM_INHERIT_DEFAULT); if (ret != KERN_SUCCESS) return(LOAD_NOSPACE); } - if ( (scp64->fileoff == 0) && (scp64->filesize != 0) ) + if ( (scp->fileoff == 0) && (scp->filesize != 0) ) result->mach_header = map_addr; - if (scp64->flags & SG_PROTECTED_VERSION_1) { - ret = unprotect_segment_64(scp64->fileoff, - scp64->filesize, - map, - map_addr, - map_size); + if (scp->flags & SG_PROTECTED_VERSION_1) { + ret = unprotect_segment(scp->fileoff, + scp->filesize, + vp, + pager_offset, + map, + map_addr, + map_size); } else { ret = LOAD_SUCCESS; } + if (LOAD_SUCCESS == ret && filetype == MH_DYLINKER && + result->all_image_info_addr == MACH_VM_MIN_ADDRESS) + note_all_image_info_section(scp, + LC_SEGMENT_64 == lcp->cmd, single_section_size, + (const char *)lcp + segment_command_size, result); return ret; } @@ -965,6 +944,8 @@ load_thread( task_t task; int customstack=0; + if (tcp->cmdsize < sizeof(*tcp)) + return (LOAD_BADMACHO); task = get_threadtask(thread); /* if count is 0; same as thread */ @@ -976,7 +957,7 @@ load_thread( } lret = load_threadstate(thread, - (unsigned long *)(((vm_offset_t)tcp) + + (uint32_t *)(((vm_offset_t)tcp) + sizeof(struct thread_command)), tcp->cmdsize - sizeof(struct thread_command)); if (lret != LOAD_SUCCESS) @@ -984,7 +965,7 @@ load_thread( if (result->thread_count == 0) { lret = load_threadstack(thread, - (unsigned long *)(((vm_offset_t)tcp) + + (uint32_t *)(((vm_offset_t)tcp) + sizeof(struct thread_command)), tcp->cmdsize - sizeof(struct thread_command), &result->user_stack, @@ -998,7 +979,7 @@ load_thread( return(lret); lret = load_threadentry(thread, - (unsigned long *)(((vm_offset_t)tcp) + + (uint32_t *)(((vm_offset_t)tcp) + sizeof(struct thread_command)), tcp->cmdsize - sizeof(struct thread_command), &result->entry_point); @@ -1029,13 +1010,15 @@ load_unixthread( load_return_t ret; int customstack =0; + if (tcp->cmdsize < sizeof(*tcp)) + return (LOAD_BADMACHO); if (result->thread_count != 0) { printf("load_unixthread: already have a thread!"); return (LOAD_FAILURE); } ret = load_threadstack(thread, - (unsigned long *)(((vm_offset_t)tcp) + + (uint32_t *)(((vm_offset_t)tcp) + sizeof(struct thread_command)), tcp->cmdsize - sizeof(struct thread_command), &result->user_stack, @@ -1048,7 +1031,7 @@ printf("load_unixthread: already have a thread!"); else result->customstack = 0; ret = load_threadentry(thread, - (unsigned long *)(((vm_offset_t)tcp) + + (uint32_t *)(((vm_offset_t)tcp) + sizeof(struct thread_command)), tcp->cmdsize - sizeof(struct thread_command), &result->entry_point); @@ -1056,7 +1039,7 @@ printf("load_unixthread: already have a thread!"); return(ret); ret = load_threadstate(thread, - (unsigned long *)(((vm_offset_t)tcp) + + (uint32_t *)(((vm_offset_t)tcp) + sizeof(struct thread_command)), tcp->cmdsize - sizeof(struct thread_command)); if (ret != LOAD_SUCCESS) @@ -1072,14 +1055,14 @@ static load_return_t load_threadstate( thread_t thread, - unsigned long *ts, - unsigned long total_size + uint32_t *ts, + uint32_t total_size ) { kern_return_t ret; - unsigned long size; + uint32_t size; int flavor; - unsigned long thread_size; + uint32_t thread_size; ret = thread_state_initialize( thread ); if (ret != KERN_SUCCESS) { @@ -1093,7 +1076,10 @@ load_threadstate( while (total_size > 0) { flavor = *ts++; size = *ts++; - thread_size = (size+2)*sizeof(unsigned long); + if (UINT32_MAX-2 < size || + UINT32_MAX/sizeof(uint32_t) < size+2) + return (LOAD_BADMACHO); + thread_size = (size+2)*sizeof(uint32_t); if (thread_size > total_size) return(LOAD_BADMACHO); total_size -= thread_size; @@ -1106,7 +1092,7 @@ load_threadstate( if (ret != KERN_SUCCESS) { return(LOAD_FAILURE); } - ts += size; /* ts is a (unsigned long *) */ + ts += size; /* ts is a (uint32_t *) */ } return(LOAD_SUCCESS); } @@ -1115,21 +1101,24 @@ static load_return_t load_threadstack( thread_t thread, - unsigned long *ts, - unsigned long total_size, + uint32_t *ts, + uint32_t total_size, user_addr_t *user_stack, int *customstack ) { kern_return_t ret; - unsigned long size; + uint32_t size; int flavor; - unsigned long stack_size; + uint32_t stack_size; while (total_size > 0) { flavor = *ts++; size = *ts++; - stack_size = (size+2)*sizeof(unsigned long); + if (UINT32_MAX-2 < size || + UINT32_MAX/sizeof(uint32_t) < size+2) + return (LOAD_BADMACHO); + stack_size = (size+2)*sizeof(uint32_t); if (stack_size > total_size) return(LOAD_BADMACHO); total_size -= stack_size; @@ -1143,7 +1132,7 @@ load_threadstack( if (ret != KERN_SUCCESS) { return(LOAD_FAILURE); } - ts += size; /* ts is a (unsigned long *) */ + ts += size; /* ts is a (uint32_t *) */ } return(LOAD_SUCCESS); } @@ -1152,15 +1141,15 @@ static load_return_t load_threadentry( thread_t thread, - unsigned long *ts, - unsigned long total_size, + uint32_t *ts, + uint32_t total_size, mach_vm_offset_t *entry_point ) { kern_return_t ret; - unsigned long size; + uint32_t size; int flavor; - unsigned long entry_size; + uint32_t entry_size; /* * Set the thread state. @@ -1169,7 +1158,10 @@ load_threadentry( while (total_size > 0) { flavor = *ts++; size = *ts++; - entry_size = (size+2)*sizeof(unsigned long); + if (UINT32_MAX-2 < size || + UINT32_MAX/sizeof(uint32_t) < size+2) + return (LOAD_BADMACHO); + entry_size = (size+2)*sizeof(uint32_t); if (entry_size > total_size) return(LOAD_BADMACHO); total_size -= entry_size; @@ -1182,7 +1174,7 @@ load_threadentry( if (ret != KERN_SUCCESS) { return(LOAD_FAILURE); } - ts += size; /* ts is a (unsigned long *) */ + ts += size; /* ts is a (uint32_t *) */ } return(LOAD_SUCCESS); } @@ -1213,6 +1205,9 @@ load_dylinker( mach_vm_offset_t dyl_start, map_addr; mach_vm_size_t dyl_length; + if (lcp->cmdsize < sizeof(*lcp)) + return (LOAD_BADMACHO); + name = (char *)lcp + lcp->name.offset; /* * Check for a proper null terminated string. @@ -1310,8 +1305,11 @@ load_dylinker( goto out; } - if (map_addr != dyl_start) + if (map_addr != dyl_start) { myresult.entry_point += (map_addr - dyl_start); + myresult.all_image_info_addr += + (map_addr - dyl_start); + } } else { ret = LOAD_FAILURE; } @@ -1322,6 +1320,8 @@ load_dylinker( if (ret == LOAD_SUCCESS) { result->dynlinker = TRUE; result->entry_point = myresult.entry_point; + result->all_image_info_addr = myresult.all_image_info_addr; + result->all_image_info_size = myresult.all_image_info_size; } out: vnode_put(vp); @@ -1383,7 +1383,7 @@ load_code_signature( (caddr_t) addr, lcp->datasize, macho_offset + lcp->dataoff, - UIO_SYSSPACE32, + UIO_SYSSPACE, 0, kauth_cred_get(), &resid, @@ -1440,6 +1440,9 @@ set_code_unprotect( struct segment_command *seg32; vm_map_offset_t map_offset, map_size; kern_return_t kr; + + if (eip->cmdsize < sizeof(*eip)) + return LOAD_BADMACHO; switch(eip->cryptid) { case 0: @@ -1575,7 +1578,7 @@ get_macho_vnode( ndp = &nid; /* init the namei data to point the file user's program name */ - NDINIT(ndp, LOOKUP, FOLLOW | LOCKLEAF, UIO_SYSSPACE32, CAST_USER_ADDR_T(path), ctx); + NDINIT(ndp, LOOKUP, FOLLOW | LOCKLEAF, UIO_SYSSPACE, CAST_USER_ADDR_T(path), ctx); if ((error = namei(ndp)) != 0) { if (error == ENOENT) { @@ -1619,7 +1622,7 @@ get_macho_vnode( } if ((error = vn_rdwr(UIO_READ, vp, (caddr_t)&header, sizeof(header), 0, - UIO_SYSSPACE32, IO_NODELOCKED, kerncred, &resid, p)) != 0) { + UIO_SYSSPACE, IO_NODELOCKED, kerncred, &resid, p)) != 0) { error = LOAD_IOERROR; goto bad2; } @@ -1644,7 +1647,7 @@ get_macho_vnode( /* Read the Mach-O header out of it */ error = vn_rdwr(UIO_READ, vp, (caddr_t)&header.mach_header, sizeof(header.mach_header), fat_arch.offset, - UIO_SYSSPACE32, IO_NODELOCKED, kerncred, &resid, p); + UIO_SYSSPACE, IO_NODELOCKED, kerncred, &resid, p); if (error) { error = LOAD_IOERROR; goto bad2; diff --git a/bsd/kern/mach_loader.h b/bsd/kern/mach_loader.h index a2cf6dd99..413d1a9a7 100644 --- a/bsd/kern/mach_loader.h +++ b/bsd/kern/mach_loader.h @@ -50,16 +50,19 @@ typedef int load_return_t; * function returns LOAD_SUCCESS. */ typedef struct _load_result { - user_addr_t mach_header; - user_addr_t entry_point; - user_addr_t user_stack; - int thread_count; + user_addr_t mach_header; + user_addr_t entry_point; + user_addr_t user_stack; + mach_vm_address_t all_image_info_addr; + mach_vm_size_t all_image_info_size; + int thread_count; unsigned int - /* boolean_t */ unixproc :1, - dynlinker :1, - customstack :1, - :0; - unsigned int csflags; + /* boolean_t */ unixproc :1, + dynlinker :1, + customstack :1, + :0; + unsigned int csflags; + unsigned char uuid[16]; } load_result_t; struct image_params; diff --git a/bsd/kern/mach_process.c b/bsd/kern/mach_process.c index 46d527f05..0df8a49c8 100644 --- a/bsd/kern/mach_process.c +++ b/bsd/kern/mach_process.c @@ -82,8 +82,9 @@ #include #include +#include -#include +#include #include #include @@ -91,6 +92,8 @@ #include /* for task_resume() */ #include /* for thread_exception_return() */ +#include /* cs_allow_invalid() */ + /* XXX ken/bsd_kern.c - prototype should be in common header */ int get_task_userstop(task_t); @@ -108,7 +111,7 @@ extern thread_t get_firstthread(task_t); */ int -ptrace(struct proc *p, struct ptrace_args *uap, register_t *retval) +ptrace(struct proc *p, struct ptrace_args *uap, int32_t *retval) { struct proc *t = current_proc(); /* target process */ task_t task; @@ -121,12 +124,14 @@ ptrace(struct proc *p, struct ptrace_args *uap, register_t *retval) AUDIT_ARG(cmd, uap->req); AUDIT_ARG(pid, uap->pid); AUDIT_ARG(addr, uap->addr); - AUDIT_ARG(value, uap->data); + AUDIT_ARG(value32, uap->data); if (uap->req == PT_DENY_ATTACH) { proc_lock(p); if (ISSET(p->p_lflag, P_LTRACED)) { proc_unlock(p); + KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_PROC, BSD_PROC_FRCEXIT) | DBG_FUNC_NONE, + p->p_pid, W_EXITCODE(ENOTSUP, 0), 4, 0, 0); exit1(p, W_EXITCODE(ENOTSUP, 0), retval); /* drop funnel before we return */ thread_exception_return(); @@ -140,7 +145,7 @@ ptrace(struct proc *p, struct ptrace_args *uap, register_t *retval) if (uap->req == PT_FORCEQUOTA) { if (is_suser()) { - OSBitOrAtomic(P_FORCEQUOTA, (UInt32 *)&t->p_flag); + OSBitOrAtomic(P_FORCEQUOTA, &t->p_flag); return (0); } else return (EPERM); @@ -154,7 +159,13 @@ ptrace(struct proc *p, struct ptrace_args *uap, register_t *retval) SET(p->p_lflag, P_LTRACED); /* Non-attached case, our tracer is our parent. */ p->p_oppid = p->p_ppid; + /* Check whether child and parent are allowed to run modified + * code (they'll have to) */ + struct proc *pproc=proc_find(p->p_oppid); proc_unlock(p); + cs_allow_invalid(p); + cs_allow_invalid(pproc); + proc_rele(pproc); return(0); } if (uap->req == PT_SIGEXC) { @@ -201,7 +212,11 @@ ptrace(struct proc *p, struct ptrace_args *uap, register_t *retval) SET(t->p_lflag, P_LSIGEXC); t->p_oppid = t->p_ppid; + /* Check whether child and parent are allowed to run modified + * code (they'll have to) */ proc_unlock(t); + cs_allow_invalid(t); + cs_allow_invalid(p); if (t->p_pptr != p) proc_reparentlocked(t, p, 1, 0); @@ -363,7 +378,7 @@ ptrace(struct proc *p, struct ptrace_args *uap, register_t *retval) error = EINVAL; goto out; } - th_act = port_name_to_thread(CAST_DOWN(mach_port_name_t, uap->addr)); + th_act = port_name_to_thread(CAST_MACH_PORT_TO_NAME(uap->addr)); if (th_act == THREAD_NULL) return (ESRCH); ut = (uthread_t)get_bsdthread_info(th_act); diff --git a/bsd/kern/makesyscalls.sh b/bsd/kern/makesyscalls.sh index a6f88584d..04b0324ae 100755 --- a/bsd/kern/makesyscalls.sh +++ b/bsd/kern/makesyscalls.sh @@ -2,7 +2,7 @@ # @(#)makesyscalls.sh 8.1 (Berkeley) 6/10/93 # $FreeBSD: src/sys/kern/makesyscalls.sh,v 1.60 2003/04/01 01:12:24 jeff Exp $ # -# Copyright (c) 2004-2007 Apple Inc. All rights reserved. +# Copyright (c) 2004-2008 Apple Inc. All rights reserved. # # @APPLE_OSREFERENCE_LICENSE_HEADER_START@ # @@ -26,13 +26,23 @@ set -e +input_file="" # first argument + +# output type: +output_syscallnamesfile=0 +output_sysprotofile=0 +output_syshdrfile=0 +output_syscalltablefile=0 +output_auditevfile=0 + # output files: syscallnamesfile="syscalls.c" -sysprotofile="../sys/sysproto.h" +sysprotofile="sysproto.h" sysproto_h=_SYS_SYSPROTO_H_ -syshdrfile="../sys/syscall.h" +syshdrfile="syscall.h" syscall_h=_SYS_SYSCALL_H_ syscalltablefile="init_sysent.c" +auditevfile="audit_kevents.c" syscallprefix="SYS_" switchname="sysent" namesname="syscallnames" @@ -45,21 +55,55 @@ sysarg="sysarg.switch.$$" sysprotoend="sysprotoend.$$" syscallnamestempfile="syscallnamesfile.$$" syshdrtempfile="syshdrtempfile.$$" +audittempfile="audittempfile.$$" -trap "rm $syslegal $sysent $sysinc $sysarg $sysprotoend $syscallnamestempfile $syshdrtempfile" 0 +trap "rm $syslegal $sysent $sysinc $sysarg $sysprotoend $syscallnamestempfile $syshdrtempfile $audittempfile" 0 -touch $syslegal $sysent $sysinc $sysarg $sysprotoend $syscallnamestempfile $syshdrtempfile +touch $syslegal $sysent $sysinc $sysarg $sysprotoend $syscallnamestempfile $syshdrtempfile $audittempfile case $# in - 0) echo "usage: $0 input-file " 1>&2 + 0) + echo "usage: $0 input-file [ []]" 1>&2 exit 1 ;; esac -if [ -n "$2" -a -f "$2" ]; then - . $2 +input_file="$1" +shift + +if [ -n "$1" ]; then + case $1 in + names) + output_syscallnamesfile=1 + ;; + proto) + output_sysprotofile=1 + ;; + header) + output_syshdrfile=1 + ;; + table) + output_syscalltablefile=1 + ;; + audit) + output_auditevfile=1 + ;; + esac + shift; +else + output_syscallnamesfile=1 + output_sysprotofile=1 + output_syshdrfile=1 + output_syscalltablefile=1 + output_auditevfile=1 +fi + +if [ -n "$1" -a -f "$1" ]; then + . $1 fi + + sed -e ' s/\$//g :join @@ -72,7 +116,7 @@ s/\$//g 2,${ /^#/!s/\([{}()*,;]\)/ \1 /g } -' < $1 | awk " +' < "$input_file" | awk " BEGIN { syslegal = \"$syslegal\" sysprotofile = \"$sysprotofile\" @@ -87,14 +131,15 @@ s/\$//g syscallnamestempfile = \"$syscallnamestempfile\" syshdrfile = \"$syshdrfile\" syshdrtempfile = \"$syshdrtempfile\" + audittempfile = \"$audittempfile\" syscallprefix = \"$syscallprefix\" switchname = \"$switchname\" namesname = \"$namesname\" - infile = \"$1\" + infile = \"$input_file\" "' printf "/*\n" > syslegal - printf " * Copyright (c) 2004-2007 Apple Inc. All rights reserved.\n" > syslegal + printf " * Copyright (c) 2004-2008 Apple Inc. All rights reserved.\n" > syslegal printf " * \n" > syslegal printf " * @APPLE_OSREFERENCE_LICENSE_HEADER_START@\n" > syslegal printf " * \n" > syslegal @@ -149,8 +194,8 @@ s/\$//g printf "#define\tPAD_(t)\t(sizeof(uint64_t) <= sizeof(t) \\\n " > sysarg printf "\t\t? 0 : sizeof(uint64_t) - sizeof(t))\n" > sysarg printf "#else\n" > sysarg - printf "#define\tPAD_(t)\t(sizeof(register_t) <= sizeof(t) \\\n" > sysarg - printf " ? 0 : sizeof(register_t) - sizeof(t))\n" > sysarg + printf "#define\tPAD_(t)\t(sizeof(uint32_t) <= sizeof(t) \\\n" > sysarg + printf " ? 0 : sizeof(uint32_t) - sizeof(t))\n" > sysarg printf "#endif\n" > sysarg printf "#if BYTE_ORDER == LITTLE_ENDIAN\n"> sysarg printf "#define\tPADL_(t)\t0\n" > sysarg @@ -174,13 +219,18 @@ s/\$//g printf "void munge_wl(const void *, void *); \n" > sysarg printf "void munge_wlw(const void *, void *); \n" > sysarg printf "void munge_wwwl(const void *, void *); \n" > sysarg + printf "void munge_wwwlw(const void *, void *); \n" > sysarg printf "void munge_wwwlww(const void *, void *); \n" > sysarg printf "void munge_wwlwww(const void *, void *); \n" > sysarg + printf "void munge_wwwwlw(const void *, void *); \n" > sysarg printf "void munge_wwwwl(const void *, void *); \n" > sysarg printf "void munge_wwwwwl(const void *, void *); \n" > sysarg + printf "void munge_wwwwwwll(const void *, void *); \n" > sysarg + printf "void munge_wwwwwwlw(const void *, void *); \n" > sysarg printf "void munge_wsw(const void *, void *); \n" > sysarg printf "void munge_wws(const void *, void *); \n" > sysarg printf "void munge_wwwsw(const void *, void *); \n" > sysarg + printf "void munge_llllll(const void *, void *); \n" > sysarg printf "#else \n" > sysarg printf "#define munge_w NULL \n" > sysarg printf "#define munge_ww NULL \n" > sysarg @@ -193,13 +243,17 @@ s/\$//g printf "#define munge_wl NULL \n" > sysarg printf "#define munge_wlw NULL \n" > sysarg printf "#define munge_wwwl NULL \n" > sysarg + printf "#define munge_wwwlw NULL \n" > sysarg printf "#define munge_wwwlww NULL\n" > sysarg printf "#define munge_wwlwww NULL \n" > sysarg printf "#define munge_wwwwl NULL \n" > sysarg + printf "#define munge_wwwwlw NULL \n" > sysarg printf "#define munge_wwwwwl NULL \n" > sysarg + printf "#define munge_wwwwwwlw NULL \n" > sysarg printf "#define munge_wsw NULL \n" > sysarg printf "#define munge_wws NULL \n" > sysarg printf "#define munge_wwwsw NULL \n" > sysarg + printf "#define munge_llllll NULL \n" > sysarg printf "#endif // ! __arm__\n" > sysarg printf "#ifdef __ppc__\n" > sysarg printf "void munge_d(const void *, void *); \n" > sysarg @@ -225,6 +279,13 @@ s/\$//g printf "\n" > sysarg printf "const char *%s[] = {\n", namesname > syscallnamestempfile + + printf "#include \n" > audittempfile + printf "#include \n\n" > audittempfile + printf "#include \n" > audittempfile + printf "#include \n\n" > audittempfile + printf "#if CONFIG_AUDIT\n\n" > audittempfile + printf "au_event_t sys_au_event[] = {\n" > audittempfile next } NF == 0 || $1 ~ /^;/ { @@ -239,6 +300,7 @@ s/\$//g print > sysarg print > syscallnamestempfile print > sysprotoend + print > audittempfile savesyscall = syscall_num skip_for_header = 0 next @@ -248,6 +310,7 @@ s/\$//g print > sysarg print > syscallnamestempfile print > sysprotoend + print > audittempfile syscall_num = savesyscall skip_for_header = 1 next @@ -257,6 +320,7 @@ s/\$//g print > sysarg print > syscallnamestempfile print > sysprotoend + print > audittempfile skip_for_header = 0 next } @@ -282,7 +346,7 @@ s/\$//g function parseline() { funcname = "" - current_field = 3 + current_field = 4 # skip number, audit event, type args_start = 0 args_end = 0 comments_start = 0 @@ -398,7 +462,11 @@ s/\$//g if (argc != 0) argssize = "AC(" argalias ")" } - + + { + auditev = $2; + } + { add_sysent_entry = 1 add_sysnames_entry = 1 @@ -410,33 +478,33 @@ s/\$//g my_flags = "0" - if ($2 != "ALL" && $2 != "UALL") { + if ($3 != "ALL" && $3 != "UALL") { files_keyword_OK = 0 add_sysent_entry = 0 add_sysnames_entry = 0 add_sysheader_entry = 0 add_sysproto_entry = 0 - if (match($2, "[T]") != 0) { + if (match($3, "[T]") != 0) { add_sysent_entry = 1 files_keyword_OK = 1 } - if (match($2, "[N]") != 0) { + if (match($3, "[N]") != 0) { add_sysnames_entry = 1 files_keyword_OK = 1 } - if (match($2, "[H]") != 0) { + if (match($3, "[H]") != 0) { add_sysheader_entry = 1 files_keyword_OK = 1 } - if (match($2, "[P]") != 0) { + if (match($3, "[P]") != 0) { add_sysproto_entry = 1 files_keyword_OK = 1 } - if (match($2, "[U]") != 0) { + if (match($3, "[U]") != 0) { add_64bit_unsafe = 1 } - if (match($2, "[F]") != 0) { + if (match($3, "[F]") != 0) { add_64bit_fakesafe = 1 } @@ -445,7 +513,7 @@ s/\$//g exit 1 } } - else if ($2 == "UALL") { + else if ($3 == "UALL") { add_64bit_unsafe = 1; } @@ -522,8 +590,9 @@ s/\$//g argtype[i] == "uid_t" || argtype[i] == "pid_t" || argtype[i] == "id_t" || argtype[i] == "idtype_t" || argtype[i] == "socklen_t" || argtype[i] == "uint32_t" || argtype[i] == "int32_t" || - argtype[i] == "sigset_t" || argtype[i] == "gid_t" || - argtype[i] == "mode_t" || argtype[i] == "key_t" || argtype[i] == "time_t") { + argtype[i] == "sigset_t" || argtype[i] == "gid_t" || argtype[i] == "unsigned int" || + argtype[i] == "mode_t" || argtype[i] == "key_t" || + argtype[i] == "mach_port_name_t") { munge32 = munge32 "w" munge64 = munge64 "d" size32 += 4 @@ -553,7 +622,7 @@ s/\$//g } } else if (add_sysproto_entry == 1) { - printf("struct %s {\n\tregister_t dummy;\n};\n", argalias) > sysarg + printf("struct %s {\n\tint32_t dummy;\n};\n", argalias) > sysarg } } @@ -582,7 +651,10 @@ s/\$//g else if (returntype == "int") { munge_ret = "_SYSCALL_RET_INT_T" } - else if (returntype == "u_int") { + else if (returntype == "u_int" || returntype == "mach_port_name_t") { + munge_ret = "_SYSCALL_RET_UINT_T" + } + else if (returntype == "uint32_t") { munge_ret = "_SYSCALL_RET_UINT_T" } else if (returntype == "off_t") { @@ -657,7 +729,7 @@ s/\$//g # output function prototypes to sysproto.h if (add_sysproto_entry == 1) { if (funcname =="exit") { - printf("void %s(struct proc *, struct %s *, int *);\n", + printf("void %s(struct proc *, struct %s *, int32_t *);\n", funcname, argalias) > sysprotoend } else if ((funcname != "nosys" && funcname != "enosys") || (syscall_num == 0 && funcname == "nosys")) { @@ -665,6 +737,10 @@ s/\$//g funcname, argalias, returntype) > sysprotoend } } + + # output to audit_kevents.c + printf("\t%s,\t\t", auditev) > audittempfile + printf("/* %d = %s%s*/\n", syscall_num, tempname, additional_comments) > audittempfile syscall_num++ next @@ -692,13 +768,30 @@ s/\$//g > syshdrtempfile printf("\n#endif /* __APPLE_API_PRIVATE */\n") > syshdrtempfile printf("#endif /* !%s */\n", syscall_h) > syshdrtempfile + printf("};\n\n") > audittempfile + printf("#endif /* AUDIT */\n") > audittempfile } ' # define value in syscall table file to permit redifintion because of the way # __private_extern__ (doesn't) work. -cat $syslegal > $syscalltablefile -printf "#define __INIT_SYSENT_C__ 1\n" >> $syscalltablefile -cat $sysinc $sysent >> $syscalltablefile -cat $syslegal $sysarg $sysprotoend > $sysprotofile -cat $syslegal $syscallnamestempfile > $syscallnamesfile -cat $syslegal $syshdrtempfile > $syshdrfile +if [ $output_syscalltablefile -eq 1 ]; then + cat $syslegal > $syscalltablefile + printf "#define __INIT_SYSENT_C__ 1\n" >> $syscalltablefile + cat $sysinc $sysent >> $syscalltablefile +fi + +if [ $output_syscallnamesfile -eq 1 ]; then + cat $syslegal $syscallnamestempfile > $syscallnamesfile +fi + +if [ $output_sysprotofile -eq 1 ]; then + cat $syslegal $sysarg $sysprotoend > $sysprotofile +fi + +if [ $output_syshdrfile -eq 1 ]; then + cat $syslegal $syshdrtempfile > $syshdrfile +fi + +if [ $output_auditevfile -eq 1 ]; then + cat $syslegal $audittempfile > $auditevfile +fi diff --git a/bsd/kern/mcache.c b/bsd/kern/mcache.c index c8912286e..14416f34a 100644 --- a/bsd/kern/mcache.c +++ b/bsd/kern/mcache.c @@ -99,7 +99,7 @@ #define MCACHE_LOCK_TRY(l) lck_mtx_try_lock(l) /* This should be in a header file */ -#define atomic_add_32(a, n) ((void) OSAddAtomic(n, (volatile SInt32 *)a)) +#define atomic_add_32(a, n) ((void) OSAddAtomic(n, a)) static int ncpu; static lck_mtx_t *mcache_llock; @@ -1499,7 +1499,7 @@ mcache_audit_free_verify_set(mcache_audit_t *mca, void *base, size_t offset, ((mcache_obj_t *)addr)->obj_next = next; } -#undef panic(...) +#undef panic __private_extern__ char * mcache_dump_mca(mcache_audit_t *mca) diff --git a/bsd/kern/netboot.c b/bsd/kern/netboot.c index 3f2c11a86..6c4b5437e 100644 --- a/bsd/kern/netboot.c +++ b/bsd/kern/netboot.c @@ -538,10 +538,11 @@ get_ip_parameters(struct in_addr * iaddr_p, struct in_addr * netmask_p, static int route_cmd(int cmd, struct in_addr d, struct in_addr g, - struct in_addr m, u_long more_flags) + struct in_addr m, uint32_t more_flags, unsigned int ifscope) { struct sockaddr_in dst; - u_long flags = RTF_UP | RTF_STATIC; + int error; + uint32_t flags = RTF_UP | RTF_STATIC; struct sockaddr_in gw; struct sockaddr_in mask; @@ -564,29 +565,35 @@ route_cmd(int cmd, struct in_addr d, struct in_addr g, mask.sin_len = sizeof(mask); mask.sin_family = AF_INET; mask.sin_addr = m; + lck_mtx_assert(rnh_lock, LCK_MTX_ASSERT_NOTOWNED); + lck_mtx_lock(rnh_lock); + error = rtrequest_scoped_locked(cmd, (struct sockaddr *)&dst, + (struct sockaddr *)&gw, + (struct sockaddr *)&mask, + flags, NULL, ifscope); + lck_mtx_unlock(rnh_lock); + return (error); - return (rtrequest(cmd, (struct sockaddr *)&dst, (struct sockaddr *)&gw, - (struct sockaddr *)&mask, flags, NULL)); } static int default_route_add(struct in_addr router, boolean_t proxy_arp) { - u_long flags = 0; + uint32_t flags = 0; struct in_addr zeroes = { 0 }; if (proxy_arp == FALSE) { flags |= RTF_GATEWAY; } - return (route_cmd(RTM_ADD, zeroes, router, zeroes, flags)); + return (route_cmd(RTM_ADD, zeroes, router, zeroes, flags, IFSCOPE_NONE)); } static int -host_route_delete(struct in_addr host) +host_route_delete(struct in_addr host, unsigned int ifscope) { struct in_addr zeroes = { 0 }; - return (route_cmd(RTM_DELETE, host, zeroes, zeroes, RTF_HOST)); + return (route_cmd(RTM_DELETE, host, zeroes, zeroes, RTF_HOST, ifscope)); } static struct ifnet * @@ -705,7 +712,8 @@ netboot_mountroot(void) /* NOT REACHED */ case EHOSTDOWN: /* remove the server's arp entry */ - error = host_route_delete(S_netboot_info_p->server_ip); + error = host_route_delete(S_netboot_info_p->server_ip, + ifp->if_index); if (error) { printf("netboot: host_route_delete(" IP_FORMAT ") failed %d\n", @@ -713,7 +721,7 @@ netboot_mountroot(void) } break; case EHOSTUNREACH: - error = host_route_delete(router); + error = host_route_delete(router, ifp->if_index); if (error) { printf("netboot: host_route_delete(" IP_FORMAT ") failed %d\n", IP_LIST(&router), error); @@ -770,7 +778,7 @@ netboot_setup() } rootdev = dev; mountroot = NULL; - printf("netboot: root device 0x%x\n", rootdev); + printf("netboot: root device 0x%x\n", (int32_t)rootdev); error = vfs_mountroot(); if (error == 0 && rootvnode != NULL) { struct vnode *tvp; diff --git a/bsd/kern/posix_sem.c b/bsd/kern/posix_sem.c index 63bbd878e..a2cd627f1 100644 --- a/bsd/kern/posix_sem.c +++ b/bsd/kern/posix_sem.c @@ -71,7 +71,7 @@ #include #endif -#include +#include #include #include @@ -135,7 +135,7 @@ struct psemstats { struct psemname { char *psem_nameptr; /* pointer to looked up name */ long psem_namelen; /* length of looked up component */ - u_long psem_hash; /* hash value of looked up name */ + u_int32_t psem_hash; /* hash value of looked up name */ }; struct psemnode { @@ -157,7 +157,7 @@ long posix_sem_max = 10000; /* tunable for max POSIX semaphores */ /* 10000 limits to ~1M of memory */ SYSCTL_NODE(_kern, KERN_POSIX, posix, CTLFLAG_RW|CTLFLAG_LOCKED, 0, "Posix"); SYSCTL_NODE(_kern_posix, OID_AUTO, sem, CTLFLAG_RW|CTLFLAG_LOCKED, 0, "Semaphores"); -SYSCTL_INT (_kern_posix_sem, OID_AUTO, max, CTLFLAG_RW, &posix_sem_max, 0, "max"); +SYSCTL_LONG (_kern_posix_sem, OID_AUTO, max, CTLFLAG_RW, &posix_sem_max, "max"); struct psemstats psemstats; /* cache effectiveness statistics */ @@ -350,14 +350,15 @@ psem_cache_purge(void) int sem_open(proc_t p, struct sem_open_args *uap, user_addr_t *retval) { - struct fileproc *fp; size_t i; - struct fileproc *nfp; int indx, error; struct psemname nd; struct pseminfo *pinfo; - struct psemcache *pcp = NULL; /* protected by !incache */ - char * pnbuf; + struct fileproc *fp = NULL; + char *pnbuf = NULL; + struct pseminfo *new_pinfo = PSEMINFO_NULL; + struct psemnode *new_pnode = PSEMNODE_NULL; + struct psemcache *pcache = PSEMCACHE_NULL; char * nameptr; char * cp; size_t pathlen, plen; @@ -365,20 +366,24 @@ sem_open(proc_t p, struct sem_open_args *uap, user_addr_t *retval) int cmode = uap->mode; int value = uap->value; int incache = 0; - struct psemnode * pnode = PSEMNODE_NULL; - struct psemcache * pcache = PSEMCACHE_NULL; - kern_return_t kret = KERN_SUCCESS; - int pinfo_alloc = 0; - + struct psemcache *pcp = PSEMCACHE_NULL; + kern_return_t kret = KERN_INVALID_ADDRESS; /* default fail */ + AUDIT_ARG(fflags, uap->oflag); AUDIT_ARG(mode, uap->mode); - AUDIT_ARG(value, uap->value); + AUDIT_ARG(value32, uap->value); pinfo = PSEMINFO_NULL; + /* + * Preallocate everything we might need up front to avoid taking + * and dropping the lock, opening us up to race conditions. + */ MALLOC_ZONE(pnbuf, caddr_t, MAXPATHLEN, M_NAMEI, M_WAITOK); - if (pnbuf == NULL) - return(ENOSPC); + if (pnbuf == NULL) { + error = ENOSPC; + goto bad; + } pathlen = MAXPATHLEN; error = copyinstr(uap->name, pnbuf, MAXPATHLEN, &pathlen); @@ -415,28 +420,86 @@ sem_open(proc_t p, struct sem_open_args *uap, user_addr_t *retval) nd.psem_hash += (unsigned char)*cp * i; } + /* + * attempt to allocate a new fp; if unsuccessful, the fp will be + * left unmodified (NULL). + */ + error = falloc(p, &fp, &indx, vfs_context_current()); + if (error) + goto bad; + + /* + * We allocate a new entry if we are less than the maximum + * allowed and the one at the front of the LRU list is in use. + * Otherwise we use the one at the front of the LRU list. + */ + MALLOC(pcp, struct psemcache *, sizeof(struct psemcache), M_SHM, M_WAITOK|M_ZERO); + if (pcp == PSEMCACHE_NULL) { + error = ENOMEM; + goto bad; + } + + MALLOC(new_pinfo, struct pseminfo *, sizeof(struct pseminfo), M_SHM, M_WAITOK|M_ZERO); + if (new_pinfo == NULL) { + error = ENOSPC; + goto bad; + } +#if CONFIG_MACF + mac_posixsem_label_init(new_pinfo); +#endif + + /* + * Provisionally create the semaphore in the new_pinfo; we have to do + * this here to prevent locking later. We use the value of kret to + * signal success or failure, which is why we set its default value + * to KERN_INVALID_ADDRESS, above. + */ + + fmode = FFLAGS(uap->oflag); + + if((fmode & O_CREAT)) { + + if((value < 0) || (value > SEM_VALUE_MAX)) { + error = EINVAL; + goto bad; + } + + kret = semaphore_create(kernel_task, &new_pinfo->psem_semobject, SYNC_POLICY_FIFO, value); + + if (kret != KERN_SUCCESS) { + switch (kret) { + case KERN_RESOURCE_SHORTAGE: + error = ENOMEM; + break; + case KERN_PROTECTION_FAILURE: + error = EACCES; + break; + default: + error = EINVAL; + } + goto bad; + } + } + + MALLOC(new_pnode, struct psemnode *, sizeof(struct psemnode), M_SHM, M_WAITOK|M_ZERO); + if (new_pnode == NULL) { + error = ENOSPC; + goto bad; + } + PSEM_SUBSYS_LOCK(); error = psem_cache_search(&pinfo, &nd, &pcache); if (error == ENOENT) { - PSEM_SUBSYS_UNLOCK(); error = EINVAL; - goto bad; + goto bad_locked; } if (!error) { incache = 0; } else incache = 1; - fmode = FFLAGS(uap->oflag); - PSEM_SUBSYS_UNLOCK(); - error = falloc(p, &nfp, &indx, vfs_context_current()); - if (error) - goto bad; - - PSEM_SUBSYS_LOCK(); - fp = nfp; cmode &= ALLPERMS; if (((fmode & (O_CREAT | O_EXCL))==(O_CREAT | O_EXCL)) && incache) { @@ -447,9 +510,8 @@ sem_open(proc_t p, struct sem_open_args *uap, user_addr_t *retval) #endif AUDIT_ARG(posix_ipc_perm, pinfo->psem_uid, pinfo->psem_gid, pinfo->psem_mode); - PSEM_SUBSYS_UNLOCK(); error = EEXIST; - goto bad1; + goto bad_locked; } if (((fmode & (O_CREAT | O_EXCL))== O_CREAT) && incache) { /* As per POSIX, O_CREAT has no effect */ @@ -457,23 +519,8 @@ sem_open(proc_t p, struct sem_open_args *uap, user_addr_t *retval) } if ( (fmode & O_CREAT) ) { - if((value < 0) || (value > SEM_VALUE_MAX)) { - PSEM_SUBSYS_UNLOCK(); - error = EINVAL; - goto bad1; - } - PSEM_SUBSYS_UNLOCK(); - MALLOC(pinfo, struct pseminfo *, sizeof(struct pseminfo), M_SHM, M_WAITOK|M_ZERO); - if (pinfo == NULL) { - error = ENOSPC; - goto bad1; - } -#if CONFIG_MACF - mac_posixsem_label_init(pinfo); -#endif - PSEM_SUBSYS_LOCK(); - - pinfo_alloc = 1; + /* create a new one (commit the allocation) */ + pinfo = new_pinfo; pinfo->psem_flags = PSEM_DEFINED | PSEM_INCREATE; pinfo->psem_usecount = 1; pinfo->psem_mode = cmode; @@ -481,86 +528,74 @@ sem_open(proc_t p, struct sem_open_args *uap, user_addr_t *retval) pinfo->psem_gid = kauth_cred_get()->cr_gid; bcopy(pnbuf, &pinfo->psem_name[0], PSEMNAMLEN); pinfo->psem_name[PSEMNAMLEN]= 0; - PSEM_SUBSYS_UNLOCK(); - kret = semaphore_create(kernel_task, &pinfo->psem_semobject, - SYNC_POLICY_FIFO, value); - if(kret != KERN_SUCCESS) - goto bad3; - PSEM_SUBSYS_LOCK(); pinfo->psem_flags &= ~PSEM_DEFINED; pinfo->psem_flags |= PSEM_ALLOCATED; pinfo->sem_proc = p; + #if CONFIG_MACF error = mac_posixsem_check_create(kauth_cred_get(), nameptr); if (error) { - PSEM_SUBSYS_UNLOCK(); - goto bad2; + goto bad_locked; } mac_posixsem_label_associate(kauth_cred_get(), pinfo, nameptr); #endif } else { /* semaphore should exist as it is without O_CREAT */ if (!incache) { - PSEM_SUBSYS_UNLOCK(); error = ENOENT; - goto bad1; + goto bad_locked; } if( pinfo->psem_flags & PSEM_INDELETE) { - PSEM_SUBSYS_UNLOCK(); error = ENOENT; - goto bad1; + goto bad_locked; } AUDIT_ARG(posix_ipc_perm, pinfo->psem_uid, pinfo->psem_gid, pinfo->psem_mode); #if CONFIG_MACF error = mac_posixsem_check_open(kauth_cred_get(), pinfo); if (error) { - PSEM_SUBSYS_UNLOCK(); - goto bad1; + goto bad_locked; } #endif if ( (error = psem_access(pinfo, fmode, kauth_cred_get())) ) { - PSEM_SUBSYS_UNLOCK(); - goto bad1; + goto bad_locked; } } - PSEM_SUBSYS_UNLOCK(); - MALLOC(pnode, struct psemnode *, sizeof(struct psemnode), M_SHM, M_WAITOK|M_ZERO); - if (pnode == NULL) { - error = ENOSPC; - goto bad1; - } - if (!incache) { - /* - * We allocate a new entry if we are less than the maximum - * allowed and the one at the front of the LRU list is in use. - * Otherwise we use the one at the front of the LRU list. - */ - MALLOC(pcp, struct psemcache *, sizeof(struct psemcache), M_SHM, M_WAITOK|M_ZERO); - if (pcp == NULL) { - error = ENOMEM; - goto bad2; - } - } - PSEM_SUBSYS_LOCK(); if (!incache) { + /* if successful, this will consume the pcp */ if ( (error = psem_cache_add(pinfo, &nd, pcp)) ) { - PSEM_SUBSYS_UNLOCK(); - FREE(pcp, M_SHM); - goto bad2; + goto bad_locked; } } pinfo->psem_flags &= ~PSEM_INCREATE; pinfo->psem_usecount++; - pnode->pinfo = pinfo; + new_pnode->pinfo = pinfo; PSEM_SUBSYS_UNLOCK(); + /* + * if incache, we did not use the new pcp or the new pcp or the + * new . and we must free them. + */ + if (incache) { + FREE(pcp, M_SHM); + pcp = PSEMCACHE_NULL; + if (new_pinfo != PSEMINFO_NULL) { + /* return value ignored - we can't _not_ do this */ + (void)semaphore_destroy(kernel_task, new_pinfo->psem_semobject); +#if CONFIG_MACF + mac_posixsem_label_destroy(new_pinfo); +#endif + FREE(new_pinfo, M_SHM); + new_pinfo = PSEMINFO_NULL; + } + } + proc_fdlock(p); fp->f_flag = fmode & FMASK; fp->f_type = DTYPE_PSXSEM; fp->f_ops = &psemops; - fp->f_data = (caddr_t)pnode; + fp->f_data = (caddr_t)new_pnode; procfdtbl_releasefd(p, indx, NULL); fp_drop(p, indx, fp, 1); proc_fdunlock(p); @@ -569,30 +604,36 @@ sem_open(proc_t p, struct sem_open_args *uap, user_addr_t *retval) FREE_ZONE(pnbuf, MAXPATHLEN, M_NAMEI); return (0); -bad3: - switch (kret) { - case KERN_RESOURCE_SHORTAGE: - error = ENOMEM; - break; - case KERN_PROTECTION_FAILURE: - error = EACCES; - break; - default: - error = EINVAL; - } - goto bad1; -bad2: - FREE(pnode, M_SHM); -bad1: - if (pinfo_alloc) { +bad_locked: + PSEM_SUBSYS_UNLOCK(); +bad: + if (pcp != PSEMCACHE_NULL) + FREE(pcp, M_SHM); + + if (new_pnode != PSEMNODE_NULL) + FREE(new_pnode, M_SHM); + + if (fp != NULL) + fp_free(p, indx, fp); + + if (new_pinfo != PSEMINFO_NULL) { + /* + * kret signals whether or not we successfully created a + * Mach semaphore for this semaphore; if so, we need to + * destroy it here. + */ + if (kret == KERN_SUCCESS) { + /* return value ignored - we can't _not_ do this */ + (void)semaphore_destroy(kernel_task, new_pinfo->psem_semobject); + } #if CONFIG_MACF - mac_posixsem_label_destroy(pinfo); + mac_posixsem_label_destroy(new_pinfo); #endif - FREE(pinfo, M_SHM); + FREE(new_pinfo, M_SHM); } - fp_free(p, indx, nfp); -bad: - FREE_ZONE(pnbuf, MAXPATHLEN, M_NAMEI); + + if (pnbuf != NULL) + FREE_ZONE(pnbuf, MAXPATHLEN, M_NAMEI); return (error); } @@ -638,7 +679,7 @@ psem_access(struct pseminfo *pinfo, int mode, kauth_cred_t cred) } int -sem_unlink(__unused proc_t p, struct sem_unlink_args *uap, __unused register_t *retval) +sem_unlink(__unused proc_t p, struct sem_unlink_args *uap, __unused int32_t *retval) { size_t i; int error=0; @@ -754,9 +795,9 @@ sem_unlink(__unused proc_t p, struct sem_unlink_args *uap, __unused register_t * } int -sem_close(proc_t p, struct sem_close_args *uap, __unused register_t *retval) +sem_close(proc_t p, struct sem_close_args *uap, __unused int32_t *retval) { - int fd = CAST_DOWN(int,uap->sem); + int fd = CAST_DOWN_EXPLICIT(int,uap->sem); struct fileproc *fp; int error = 0; @@ -777,16 +818,16 @@ sem_close(proc_t p, struct sem_close_args *uap, __unused register_t *retval) } int -sem_wait(proc_t p, struct sem_wait_args *uap, register_t *retval) +sem_wait(proc_t p, struct sem_wait_args *uap, int32_t *retval) { __pthread_testcancel(1); return(sem_wait_nocancel(p, (struct sem_wait_nocancel_args *)uap, retval)); } int -sem_wait_nocancel(proc_t p, struct sem_wait_nocancel_args *uap, __unused register_t *retval) +sem_wait_nocancel(proc_t p, struct sem_wait_nocancel_args *uap, __unused int32_t *retval) { - int fd = CAST_DOWN(int,uap->sem); + int fd = CAST_DOWN_EXPLICIT(int,uap->sem); struct fileproc *fp; struct pseminfo * pinfo; struct psemnode * pnode ; @@ -844,9 +885,9 @@ sem_wait_nocancel(proc_t p, struct sem_wait_nocancel_args *uap, __unused registe } int -sem_trywait(proc_t p, struct sem_trywait_args *uap, __unused register_t *retval) +sem_trywait(proc_t p, struct sem_trywait_args *uap, __unused int32_t *retval) { - int fd = CAST_DOWN(int,uap->sem); + int fd = CAST_DOWN_EXPLICIT(int,uap->sem); struct fileproc *fp; struct pseminfo * pinfo; struct psemnode * pnode ; @@ -909,9 +950,9 @@ sem_trywait(proc_t p, struct sem_trywait_args *uap, __unused register_t *retval) } int -sem_post(proc_t p, struct sem_post_args *uap, __unused register_t *retval) +sem_post(proc_t p, struct sem_post_args *uap, __unused int32_t *retval) { - int fd = CAST_DOWN(int,uap->sem); + int fd = CAST_DOWN_EXPLICIT(int,uap->sem); struct fileproc *fp; struct pseminfo * pinfo; struct psemnode * pnode ; @@ -968,19 +1009,19 @@ sem_post(proc_t p, struct sem_post_args *uap, __unused register_t *retval) } int -sem_init(__unused proc_t p, __unused struct sem_init_args *uap, __unused register_t *retval) +sem_init(__unused proc_t p, __unused struct sem_init_args *uap, __unused int32_t *retval) { return(ENOSYS); } int -sem_destroy(__unused proc_t p, __unused struct sem_destroy_args *uap, __unused register_t *retval) +sem_destroy(__unused proc_t p, __unused struct sem_destroy_args *uap, __unused int32_t *retval) { return(ENOSYS); } int -sem_getvalue(__unused proc_t p, __unused struct sem_getvalue_args *uap, __unused register_t *retval) +sem_getvalue(__unused proc_t p, __unused struct sem_getvalue_args *uap, __unused int32_t *retval) { return(ENOSYS); } diff --git a/bsd/kern/posix_shm.c b/bsd/kern/posix_shm.c index 7ad07ace0..985538e69 100644 --- a/bsd/kern/posix_shm.c +++ b/bsd/kern/posix_shm.c @@ -67,7 +67,7 @@ #include #include #include -#include +#include #if CONFIG_MACF #include @@ -93,6 +93,12 @@ #define f_data f_fglob->fg_data #define PSHMNAMLEN 31 /* maximum name segment length we bother with */ +struct pshmobj { + void * pshmo_memobject; + memory_object_size_t pshmo_size; + struct pshmobj * pshmo_next; +}; + struct pshminfo { unsigned int pshm_flags; unsigned int pshm_usecount; @@ -101,7 +107,7 @@ struct pshminfo { uid_t pshm_uid; gid_t pshm_gid; char pshm_name[PSHMNAMLEN + 1]; /* segment name */ - void * pshm_memobject; + struct pshmobj *pshm_memobjects; #if DIAGNOSTIC unsigned int pshm_readcount; unsigned int pshm_writecount; @@ -111,14 +117,15 @@ struct pshminfo { }; #define PSHMINFO_NULL (struct pshminfo *)0 -#define PSHM_NONE 1 -#define PSHM_DEFINED 2 -#define PSHM_ALLOCATED 4 -#define PSHM_MAPPED 8 -#define PSHM_INUSE 0x10 -#define PSHM_REMOVED 0x20 -#define PSHM_INCREATE 0x40 -#define PSHM_INDELETE 0x80 +#define PSHM_NONE 0x001 +#define PSHM_DEFINED 0x002 +#define PSHM_ALLOCATED 0x004 +#define PSHM_MAPPED 0x008 +#define PSHM_INUSE 0x010 +#define PSHM_REMOVED 0x020 +#define PSHM_INCREATE 0x040 +#define PSHM_INDELETE 0x080 +#define PSHM_ALLOCATING 0x100 struct pshmcache { LIST_ENTRY(pshmcache) pshm_hash; /* hash chain */ @@ -145,7 +152,7 @@ struct pshmname { struct pshmnode { off_t mapp_addr; - user_size_t map_size; + user_size_t map_size; /* XXX unused ? */ struct pshminfo *pinfo; unsigned int pshm_usecount; #if DIAGNOSTIC @@ -355,34 +362,38 @@ pshm_cache_delete(struct pshmcache *pcp) int -shm_open(proc_t p, struct shm_open_args *uap, register_t *retval) +shm_open(proc_t p, struct shm_open_args *uap, int32_t *retval) { - struct fileproc *fp; size_t i; - struct fileproc *nfp; int indx, error; struct pshmname nd; struct pshminfo *pinfo; - char * pnbuf; + struct fileproc *fp = NULL; + char *pnbuf = NULL; + struct pshminfo *new_pinfo = PSHMINFO_NULL; + struct pshmnode *new_pnode = PSHMNODE_NULL; + struct pshmcache *pcache = PSHMCACHE_NULL; /* ignored on return */ char * nameptr; char * cp; size_t pathlen, plen; int fmode ; int cmode = uap->mode; int incache = 0; - struct pshmnode * pnode = PSHMNODE_NULL; - struct pshmcache * pcache = PSHMCACHE_NULL; - struct pshmcache *pcp = NULL; /* protected by !incache */ - int pinfo_alloc=0; + struct pshmcache *pcp = NULL; AUDIT_ARG(fflags, uap->oflag); AUDIT_ARG(mode, uap->mode); pinfo = PSHMINFO_NULL; + /* + * Preallocate everything we might need up front to avoid taking + * and dropping the lock, opening us up to race conditions. + */ MALLOC_ZONE(pnbuf, caddr_t, MAXPATHLEN, M_NAMEI, M_WAITOK); if (pnbuf == NULL) { - return(ENOSPC); + error = ENOSPC; + goto bad; } pathlen = MAXPATHLEN; @@ -395,8 +406,6 @@ shm_open(proc_t p, struct shm_open_args *uap, register_t *retval) error = ENAMETOOLONG; goto bad; } - - #ifdef PSXSHM_NAME_RESTRICT nameptr = pnbuf; if (*nameptr == '/') { @@ -421,13 +430,47 @@ shm_open(proc_t p, struct shm_open_args *uap, register_t *retval) nd.pshm_hash += (unsigned char)*cp * i; } + /* + * attempt to allocate a new fp; if unsuccessful, the fp will be + * left unmodified (NULL). + */ + error = falloc(p, &fp, &indx, vfs_context_current()); + if (error) + goto bad; + + /* + * We allocate a new entry if we are less than the maximum + * allowed and the one at the front of the LRU list is in use. + * Otherwise we use the one at the front of the LRU list. + */ + MALLOC(pcp, struct pshmcache *, sizeof(struct pshmcache), M_SHM, M_WAITOK|M_ZERO); + if (pcp == NULL) { + error = ENOSPC; + goto bad; + } + + MALLOC(new_pinfo, struct pshminfo *, sizeof(struct pshminfo), M_SHM, M_WAITOK|M_ZERO); + if (new_pinfo == PSHMINFO_NULL) { + error = ENOSPC; + goto bad; + } +#if CONFIG_MACF + mac_posixshm_label_init(new_pinfo); +#endif + + MALLOC(new_pnode, struct pshmnode *, sizeof(struct pshmnode), M_SHM, M_WAITOK|M_ZERO); + if (new_pnode == PSHMNODE_NULL) { + error = ENOSPC; + goto bad; + } + PSHM_SUBSYS_LOCK(); + error = pshm_cache_search(&pinfo, &nd, &pcache); if (error == ENOENT) { - PSHM_SUBSYS_UNLOCK(); error = EINVAL; - goto bad; + goto bad_locked; } if (!error) { @@ -436,50 +479,42 @@ shm_open(proc_t p, struct shm_open_args *uap, register_t *retval) incache = 1; fmode = FFLAGS(uap->oflag); if ((fmode & (FREAD | FWRITE))==0) { - PSHM_SUBSYS_UNLOCK(); error = EINVAL; - goto bad; + goto bad_locked; } - /* - * XXXXXXXXXX TBD XXXXXXXXXX - * There is a race that existed with the funnels as well. - * Need to be fixed later - */ - PSHM_SUBSYS_UNLOCK(); - error = falloc(p, &nfp, &indx, vfs_context_current()); - if (error ) - goto bad; - PSHM_SUBSYS_LOCK(); - - fp = nfp; - cmode &= ALLPERMS; if (fmode & O_CREAT) { - if ((fmode & O_EXCL) && incache) { + if (incache) { + /* already exists */ + if ((fmode & O_EXCL)) { + AUDIT_ARG(posix_ipc_perm, pinfo->pshm_uid, + pinfo->pshm_gid, + pinfo->pshm_mode); + + /* shm obj exists and opened O_EXCL */ + error = EEXIST; + goto bad_locked; + } + + if( pinfo->pshm_flags & PSHM_INDELETE) { + error = ENOENT; + goto bad_locked; + } AUDIT_ARG(posix_ipc_perm, pinfo->pshm_uid, pinfo->pshm_gid, pinfo->pshm_mode); - - /* shm obj exists and opened O_EXCL */ -#if notyet - if (pinfo->pshm_flags & PSHM_INDELETE) { +#if CONFIG_MACF + if ((error = mac_posixshm_check_open(kauth_cred_get(), pinfo))) { + goto bad_locked; } -#endif - error = EEXIST; - PSHM_SUBSYS_UNLOCK(); - goto bad1; - } - if (!incache) { - PSHM_SUBSYS_UNLOCK(); - /* create a new one */ - MALLOC(pinfo, struct pshminfo *, sizeof(struct pshminfo), M_SHM, M_WAITOK|M_ZERO); - if (pinfo == NULL) { - error = ENOSPC; - goto bad1; +#endif + if ( (error = pshm_access(pinfo, fmode, kauth_cred_get(), p)) ) { + goto bad_locked; } - PSHM_SUBSYS_LOCK(); - pinfo_alloc = 1; + } else { + /* create a new one (commit the allocation) */ + pinfo = new_pinfo; pinfo->pshm_flags = PSHM_DEFINED | PSHM_INCREATE; pinfo->pshm_usecount = 1; /* existence reference */ pinfo->pshm_mode = cmode; @@ -488,66 +523,36 @@ shm_open(proc_t p, struct shm_open_args *uap, register_t *retval) bcopy(pnbuf, &pinfo->pshm_name[0], PSHMNAMLEN); pinfo->pshm_name[PSHMNAMLEN]=0; #if CONFIG_MACF - PSHM_SUBSYS_UNLOCK(); - mac_posixshm_label_init(pinfo); - PSHM_SUBSYS_LOCK(); error = mac_posixshm_check_create(kauth_cred_get(), nameptr); if (error) { - PSHM_SUBSYS_UNLOCK(); - goto bad2; + goto bad_locked; } mac_posixshm_label_associate(kauth_cred_get(), pinfo, nameptr); #endif - } else { - /* already exists */ - if( pinfo->pshm_flags & PSHM_INDELETE) { - PSHM_SUBSYS_UNLOCK(); - error = ENOENT; - goto bad1; - } - AUDIT_ARG(posix_ipc_perm, pinfo->pshm_uid, - pinfo->pshm_gid, pinfo->pshm_mode); -#if CONFIG_MACF - if ((error = mac_posixshm_check_open( - kauth_cred_get(), pinfo))) { - PSHM_SUBSYS_UNLOCK(); - goto bad1; - } -#endif - if ( (error = pshm_access(pinfo, fmode, kauth_cred_get(), p)) ) { - PSHM_SUBSYS_UNLOCK(); - goto bad1; - } } } else { if (!incache) { - /* O_CREAT is not set and the shm obecj does not exist */ - PSHM_SUBSYS_UNLOCK(); + /* O_CREAT is not set and the object does not exist */ error = ENOENT; - goto bad1; + goto bad_locked; } if( pinfo->pshm_flags & PSHM_INDELETE) { - PSHM_SUBSYS_UNLOCK(); error = ENOENT; - goto bad1; + goto bad_locked; } #if CONFIG_MACF - if ((error = mac_posixshm_check_open( - kauth_cred_get(), pinfo))) { - PSHM_SUBSYS_UNLOCK(); - goto bad1; + if ((error = mac_posixshm_check_open(kauth_cred_get(), pinfo))) { + goto bad_locked; } #endif - if ( (error = pshm_access(pinfo, fmode, kauth_cred_get(), p)) ) { - PSHM_SUBSYS_UNLOCK(); - goto bad1; + if ((error = pshm_access(pinfo, fmode, kauth_cred_get(), p))) { + goto bad_locked; } } if (fmode & O_TRUNC) { - PSHM_SUBSYS_UNLOCK(); error = EINVAL; - goto bad2; + goto bad_locked; } #if DIAGNOSTIC if (fmode & FWRITE) @@ -555,44 +560,38 @@ shm_open(proc_t p, struct shm_open_args *uap, register_t *retval) if (fmode & FREAD) pinfo->pshm_readcount++; #endif - PSHM_SUBSYS_UNLOCK(); - MALLOC(pnode, struct pshmnode *, sizeof(struct pshmnode), M_SHM, M_WAITOK|M_ZERO); - if (pnode == NULL) { - error = ENOSPC; - goto bad2; - } - if (!incache) { - /* - * We allocate a new entry if we are less than the maximum - * allowed and the one at the front of the LRU list is in use. - * Otherwise we use the one at the front of the LRU list. - */ - MALLOC(pcp, struct pshmcache *, sizeof(struct pshmcache), M_SHM, M_WAITOK|M_ZERO); - if (pcp == NULL) { - error = ENOSPC; - goto bad2; - } - - } - PSHM_SUBSYS_LOCK(); - if (!incache) { + /* if successful, this will consume the pcp */ if ( (error = pshm_cache_add(pinfo, &nd, pcp)) ) { - PSHM_SUBSYS_UNLOCK(); - FREE(pcp, M_SHM); - goto bad3; + goto bad_locked; } } pinfo->pshm_flags &= ~PSHM_INCREATE; pinfo->pshm_usecount++; /* extra reference for the new fd */ - pnode->pinfo = pinfo; + new_pnode->pinfo = pinfo; PSHM_SUBSYS_UNLOCK(); + + /* + * if incache, we did not use the new pcp or new_pinfo and must + * free them + */ + if (incache) { + FREE(pcp, M_SHM); + + if (new_pinfo != PSHMINFO_NULL) { +#if CONFIG_MACF + mac_posixshm_label_destroy(new_pinfo); +#endif + FREE(new_pinfo, M_SHM); + } + } + proc_fdlock(p); fp->f_flag = fmode & FMASK; fp->f_type = DTYPE_PSXSHM; fp->f_ops = &pshmops; - fp->f_data = (caddr_t)pnode; + fp->f_data = (caddr_t)new_pnode; *fdflags(p, indx) |= UF_EXCLOSE; procfdtbl_releasefd(p, indx, NULL); fp_drop(p, indx, fp, 1); @@ -601,34 +600,41 @@ shm_open(proc_t p, struct shm_open_args *uap, register_t *retval) *retval = indx; FREE_ZONE(pnbuf, MAXPATHLEN, M_NAMEI); return (0); -bad3: - FREE(pnode, M_SHM); -bad2: - if (pinfo_alloc) { +bad_locked: + PSHM_SUBSYS_UNLOCK(); +bad: + if (pcp != NULL) + FREE(pcp, M_SHM); + + if (new_pnode != PSHMNODE_NULL) + FREE(new_pnode, M_SHM); + + if (fp != NULL) + fp_free(p, indx, fp); + + if (new_pinfo != PSHMINFO_NULL) { #if CONFIG_MACF - mac_posixshm_label_destroy(pinfo); + mac_posixshm_label_destroy(new_pinfo); #endif - FREE(pinfo, M_SHM); + FREE(new_pinfo, M_SHM); } -bad1: - fp_free(p, indx, fp); -bad: - FREE_ZONE(pnbuf, MAXPATHLEN, M_NAMEI); + if (pnbuf != NULL) + FREE_ZONE(pnbuf, MAXPATHLEN, M_NAMEI); return (error); } int pshm_truncate(__unused proc_t p, struct fileproc *fp, __unused int fd, - off_t length, __unused register_t *retval) + off_t length, __unused int32_t *retval) { struct pshminfo * pinfo; struct pshmnode * pnode ; kern_return_t kret; - mach_vm_offset_t user_addr; mem_entry_name_port_t mem_object; - mach_vm_size_t size; + mach_vm_size_t size, total_size, alloc_size; + struct pshmobj *pshmobj, *pshmobj_next, **pshmobj_next_p; #if CONFIG_MACF int error; #endif @@ -646,7 +652,7 @@ pshm_truncate(__unused proc_t p, struct fileproc *fp, __unused int fd, PSHM_SUBSYS_UNLOCK(); return(EINVAL); } - if ((pinfo->pshm_flags & (PSHM_DEFINED | PSHM_ALLOCATED)) + if ((pinfo->pshm_flags & (PSHM_DEFINED|PSHM_ALLOCATING|PSHM_ALLOCATED)) != PSHM_DEFINED) { PSHM_SUBSYS_UNLOCK(); return(EINVAL); @@ -658,29 +664,66 @@ pshm_truncate(__unused proc_t p, struct fileproc *fp, __unused int fd, return(error); } #endif - PSHM_SUBSYS_UNLOCK(); - size = round_page_64(length); - kret = mach_vm_allocate(current_map(), &user_addr, size, VM_FLAGS_ANYWHERE); - if (kret != KERN_SUCCESS) - goto out; - kret = mach_make_memory_entry_64 (current_map(), &size, - user_addr, VM_PROT_DEFAULT, &mem_object, 0); + pinfo->pshm_flags |= PSHM_ALLOCATING; + total_size = round_page_64(length); + pshmobj_next_p = &pinfo->pshm_memobjects; - if (kret != KERN_SUCCESS) - goto out; - - mach_vm_deallocate(current_map(), user_addr, size); + for (alloc_size = 0; + alloc_size < total_size; + alloc_size += size) { - PSHM_SUBSYS_LOCK(); - pinfo->pshm_flags &= ~PSHM_DEFINED; + PSHM_SUBSYS_UNLOCK(); + + size = MIN(total_size - alloc_size, ANON_MAX_SIZE); + kret = mach_make_memory_entry_64( + VM_MAP_NULL, + &size, + 0, + MAP_MEM_NAMED_CREATE | VM_PROT_DEFAULT, + &mem_object, + 0); + + if (kret != KERN_SUCCESS) + goto out; + + MALLOC(pshmobj, struct pshmobj *, sizeof (struct pshmobj), + M_SHM, M_WAITOK); + if (pshmobj == NULL) { + kret = KERN_NO_SPACE; + mach_memory_entry_port_release(mem_object); + mem_object = NULL; + goto out; + } + + PSHM_SUBSYS_LOCK(); + + pshmobj->pshmo_memobject = (void *) mem_object; + pshmobj->pshmo_size = size; + pshmobj->pshmo_next = NULL; + + *pshmobj_next_p = pshmobj; + pshmobj_next_p = &pshmobj->pshmo_next; + } + pinfo->pshm_flags = PSHM_ALLOCATED; - pinfo->pshm_memobject = (void *)mem_object; - pinfo->pshm_length = size; + pinfo->pshm_length = total_size; PSHM_SUBSYS_UNLOCK(); return(0); out: + PSHM_SUBSYS_LOCK(); + for (pshmobj = pinfo->pshm_memobjects; + pshmobj != NULL; + pshmobj = pshmobj_next) { + pshmobj_next = pshmobj->pshmo_next; + mach_memory_entry_port_release(pshmobj->pshmo_memobject); + FREE(pshmobj, M_SHM); + } + pinfo->pshm_memobjects = NULL; + pinfo->pshm_flags &= ~PSHM_ALLOCATING; + PSHM_SUBSYS_UNLOCK(); + switch (kret) { case KERN_INVALID_ADDRESS: case KERN_NO_SPACE: @@ -784,16 +827,19 @@ pshm_mmap(__unused proc_t p, struct mmap_args *uap, user_addr_t *retval, struct { mach_vm_offset_t user_addr = (mach_vm_offset_t)uap->addr; mach_vm_size_t user_size = (mach_vm_size_t)uap->len ; + mach_vm_offset_t user_start_addr; + mach_vm_size_t map_size, mapped_size; int prot = uap->prot; int flags = uap->flags; vm_object_offset_t file_pos = (vm_object_offset_t)uap->pos; + vm_object_offset_t map_pos; vm_map_t user_map; int alloc_flags; boolean_t docow; kern_return_t kret; struct pshminfo * pinfo; struct pshmnode * pnode; - void * mem_object; + struct pshmobj * pshmobj; #if CONFIG_MACF int error; #endif @@ -830,7 +876,7 @@ pshm_mmap(__unused proc_t p, struct mmap_args *uap, user_addr_t *retval, struct PSHM_SUBSYS_UNLOCK(); return(EINVAL); } - if ((mem_object = pinfo->pshm_memobject) == NULL) { + if ((pshmobj = pinfo->pshm_memobjects) == NULL) { PSHM_SUBSYS_UNLOCK(); return(EINVAL); } @@ -862,29 +908,78 @@ pshm_mmap(__unused proc_t p, struct mmap_args *uap, user_addr_t *retval, struct } docow = FALSE; - kret = vm_map_enter_mem_object(user_map, &user_addr, user_size, - 0, alloc_flags, - pinfo->pshm_memobject, file_pos, docow, - prot, VM_PROT_DEFAULT, - VM_INHERIT_SHARE); - if (kret != KERN_SUCCESS) - goto out; - /* LP64todo - this should be superfluous at this point */ - kret = mach_vm_inherit(user_map, user_addr, user_size, - VM_INHERIT_SHARE); + mapped_size = 0; + + /* reserver the entire space first... */ + kret = vm_map_enter_mem_object(user_map, + &user_addr, + user_size, + 0, + alloc_flags, + IPC_PORT_NULL, + 0, + FALSE, + VM_PROT_NONE, + VM_PROT_NONE, + VM_INHERIT_NONE); + user_start_addr = user_addr; if (kret != KERN_SUCCESS) { - (void) mach_vm_deallocate(user_map, user_addr, user_size); goto out; } + + /* ... and overwrite with the real mappings */ + for (map_pos = 0, pshmobj = pinfo->pshm_memobjects; + user_size != 0; + map_pos += pshmobj->pshmo_size, pshmobj = pshmobj->pshmo_next) { + if (pshmobj == NULL) { + /* nothing there to map !? */ + goto out; + } + if (file_pos >= map_pos + pshmobj->pshmo_size) { + continue; + } + map_size = pshmobj->pshmo_size - (file_pos - map_pos); + if (map_size > user_size) { + map_size = user_size; + } + kret = vm_map_enter_mem_object( + user_map, + &user_addr, + map_size, + 0, + VM_FLAGS_FIXED | VM_FLAGS_OVERWRITE, + pshmobj->pshmo_memobject, + file_pos - map_pos, + docow, + prot, + VM_PROT_DEFAULT, + VM_INHERIT_SHARE); + if (kret != KERN_SUCCESS) + goto out; + + user_addr += map_size; + user_size -= map_size; + mapped_size += map_size; + file_pos += map_size; + } + PSHM_SUBSYS_LOCK(); - pnode->mapp_addr = user_addr; - pnode->map_size = user_size; + pnode->mapp_addr = user_start_addr; + pnode->map_size = mapped_size; pinfo->pshm_flags |= (PSHM_MAPPED | PSHM_INUSE); PSHM_SUBSYS_UNLOCK(); out: + if (kret != KERN_SUCCESS) { + if (mapped_size != 0) { + (void) mach_vm_deallocate(current_map(), + user_start_addr, + mapped_size); + } + } + switch (kret) { case KERN_SUCCESS: - *retval = (user_addr + pageoff); + *retval = (user_start_addr + pageoff); return (0); case KERN_INVALID_ADDRESS: case KERN_NO_SPACE: @@ -899,7 +994,7 @@ pshm_mmap(__unused proc_t p, struct mmap_args *uap, user_addr_t *retval, struct int shm_unlink(__unused proc_t p, struct shm_unlink_args *uap, - __unused register_t *retval) + __unused int32_t *retval) { size_t i; int error=0; @@ -911,6 +1006,7 @@ shm_unlink(__unused proc_t p, struct shm_unlink_args *uap, size_t pathlen, plen; int incache = 0; struct pshmcache *pcache = PSHMCACHE_NULL; + struct pshmobj *pshmobj, *pshmobj_next; pinfo = PSHMINFO_NULL; @@ -976,6 +1072,13 @@ shm_unlink(__unused proc_t p, struct shm_unlink_args *uap, goto bad; } + if (pinfo->pshm_flags & PSHM_ALLOCATING) { + /* XXX should we wait for flag to clear and then proceed ? */ + PSHM_SUBSYS_UNLOCK(); + error = EAGAIN; + goto bad; + } + if (pinfo->pshm_flags & PSHM_INDELETE) { PSHM_SUBSYS_UNLOCK(); error = 0; @@ -1006,15 +1109,20 @@ shm_unlink(__unused proc_t p, struct shm_unlink_args *uap, * If this is the last reference going away on the object, * then we need to destroy the backing object. The name * has an implied but uncounted reference on the object, - * once it's created, since it's used as a rendesvous, and + * once it's created, since it's used as a rendezvous, and * therefore may be subsequently reopened. */ - if (pinfo->pshm_memobject != NULL) - mach_memory_entry_port_release(pinfo->pshm_memobject); - PSHM_SUBSYS_LOCK(); + for (pshmobj = pinfo->pshm_memobjects; + pshmobj != NULL; + pshmobj = pshmobj_next) { + mach_memory_entry_port_release(pshmobj->pshmo_memobject); + pshmobj_next = pshmobj->pshmo_next; + FREE(pshmobj, M_SHM); + } FREE(pinfo,M_SHM); + } else { + PSHM_SUBSYS_UNLOCK(); } - PSHM_SUBSYS_UNLOCK(); FREE(pcache, M_SHM); error = 0; bad: @@ -1028,6 +1136,7 @@ pshm_close(struct pshmnode *pnode) { int error=0; struct pshminfo *pinfo; + struct pshmobj *pshmobj, *pshmobj_next; if ((pinfo = pnode->pinfo) == PSHMINFO_NULL) return(EINVAL); @@ -1043,17 +1152,22 @@ pshm_close(struct pshmnode *pnode) pinfo->pshm_usecount--; /* release this fd's reference */ if ((pinfo->pshm_flags & PSHM_REMOVED) && !pinfo->pshm_usecount) { +#if CONFIG_MACF + mac_posixshm_label_destroy(pinfo); +#endif PSHM_SUBSYS_UNLOCK(); /* * If this is the last reference going away on the object, * then we need to destroy the backing object. */ - if (pinfo->pshm_memobject != NULL) - mach_memory_entry_port_release(pinfo->pshm_memobject); + for (pshmobj = pinfo->pshm_memobjects; + pshmobj != NULL; + pshmobj = pshmobj_next) { + mach_memory_entry_port_release(pshmobj->pshmo_memobject); + pshmobj_next = pshmobj->pshmo_next; + FREE(pshmobj, M_SHM); + } PSHM_SUBSYS_LOCK(); -#if CONFIG_MACF - mac_posixshm_label_destroy(pinfo); -#endif FREE(pinfo,M_SHM); } FREE(pnode, M_SHM); diff --git a/bsd/kern/proc_info.c b/bsd/kern/proc_info.c index f06b40da2..d13a2df81 100644 --- a/bsd/kern/proc_info.c +++ b/bsd/kern/proc_info.c @@ -49,7 +49,7 @@ #include #include -#include +#include #include #include @@ -83,37 +83,40 @@ struct pipe; struct kqueue; struct atalk; -int proc_info_internal(int callnum, int pid, int flavor, uint64_t arg, user_addr_t buffer, uint32_t buffersize, register_t * retval); +uint64_t get_dispatchqueue_offset_from_proc(void *); +int proc_info_internal(int callnum, int pid, int flavor, uint64_t arg, user_addr_t buffer, uint32_t buffersize, int32_t * retval); /* protos for proc_info calls */ -int proc_listpids(uint32_t type, uint32_t tyoneinfo, user_addr_t buffer, uint32_t buffersize, register_t * retval); -int proc_pidinfo(int pid, int flavor, uint64_t arg, user_addr_t buffer, uint32_t buffersize, register_t * retval); -int proc_pidfdinfo(int pid, int flavor,int fd, user_addr_t buffer, uint32_t buffersize, register_t * retval); -int proc_kernmsgbuf(user_addr_t buffer, uint32_t buffersize, register_t * retval); +int proc_listpids(uint32_t type, uint32_t tyoneinfo, user_addr_t buffer, uint32_t buffersize, int32_t * retval); +int proc_pidinfo(int pid, int flavor, uint64_t arg, user_addr_t buffer, uint32_t buffersize, int32_t * retval); +int proc_pidfdinfo(int pid, int flavor,int fd, user_addr_t buffer, uint32_t buffersize, int32_t * retval); +int proc_kernmsgbuf(user_addr_t buffer, uint32_t buffersize, int32_t * retval); +int proc_setcontrol(int pid, int flavor, uint64_t arg, user_addr_t buffer, uint32_t buffersize, int32_t * retval); /* protos for procpidinfo calls */ -int proc_pidfdlist(proc_t p, user_addr_t buffer, uint32_t buffersize, register_t *retval); -int proc_pidbsdinfo(proc_t p, struct proc_bsdinfo *pbsd); +int proc_pidfdlist(proc_t p, user_addr_t buffer, uint32_t buffersize, int32_t *retval); +int proc_pidbsdinfo(proc_t p, struct proc_bsdinfo *pbsd, int zombie); int proc_pidtaskinfo(proc_t p, struct proc_taskinfo *ptinfo); -int proc_pidallinfo(proc_t p, int flavor, uint64_t arg, user_addr_t buffer, uint32_t buffersize, register_t *retval); +int proc_pidallinfo(proc_t p, int flavor, uint64_t arg, user_addr_t buffer, uint32_t buffersize, int32_t *retval); int proc_pidthreadinfo(proc_t p, uint64_t arg, struct proc_threadinfo *pthinfo); int proc_pidthreadpathinfo(proc_t p, uint64_t arg, struct proc_threadwithpathinfo *pinfo); -int proc_pidlistthreads(proc_t p, user_addr_t buffer, uint32_t buffersize, register_t *retval); -int proc_pidregioninfo(proc_t p, uint64_t arg, user_addr_t buffer, uint32_t buffersize, register_t *retval); -int proc_pidregionpathinfo(proc_t p, uint64_t arg, user_addr_t buffer, uint32_t buffersize, register_t *retval); -int proc_pidvnodepathinfo(proc_t p, uint64_t arg, user_addr_t buffer, uint32_t buffersize, register_t *retval); -int proc_pidpathinfo(proc_t p, uint64_t arg, user_addr_t buffer, uint32_t buffersize, register_t *retval); +int proc_pidlistthreads(proc_t p, user_addr_t buffer, uint32_t buffersize, int32_t *retval); +int proc_pidregioninfo(proc_t p, uint64_t arg, user_addr_t buffer, uint32_t buffersize, int32_t *retval); +int proc_pidregionpathinfo(proc_t p, uint64_t arg, user_addr_t buffer, uint32_t buffersize, int32_t *retval); +int proc_pidvnodepathinfo(proc_t p, uint64_t arg, user_addr_t buffer, uint32_t buffersize, int32_t *retval); +int proc_pidpathinfo(proc_t p, uint64_t arg, user_addr_t buffer, uint32_t buffersize, int32_t *retval); +int proc_pidworkqueueinfo(proc_t p, struct proc_workqueueinfo *pwqinfo); /* protos for proc_pidfdinfo calls */ -int pid_vnodeinfo(vnode_t vp, uint32_t vid, struct fileproc * fp, int closeonexec, user_addr_t buffer, uint32_t buffersize, register_t * retval); -int pid_vnodeinfopath(vnode_t vp, uint32_t vid, struct fileproc * fp, int closeonexec, user_addr_t buffer, uint32_t buffersize, register_t * retval); -int pid_socketinfo(socket_t so, struct fileproc *fp, int closeonexec, user_addr_t buffer, uint32_t buffersize, register_t * retval); -int pid_pseminfo(struct psemnode * psem, struct fileproc * fp, int closeonexec, user_addr_t buffer, uint32_t buffersize, register_t * retval); -int pid_pshminfo(struct pshmnode * pshm, struct fileproc * fp, int closeonexec, user_addr_t buffer, uint32_t buffersize, register_t * retval); -int pid_pipeinfo(struct pipe * p, struct fileproc * fp, int closeonexec, user_addr_t buffer, uint32_t buffersize, register_t * retval); -int pid_kqueueinfo(struct kqueue * kq, struct fileproc * fp, int closeonexec, user_addr_t buffer, uint32_t buffersize, register_t * retval); -int pid_atalkinfo(struct atalk * at, struct fileproc * fp, int closeonexec, user_addr_t buffer, uint32_t buffersize, register_t * retval); +int pid_vnodeinfo(vnode_t vp, uint32_t vid, struct fileproc * fp, int closeonexec, user_addr_t buffer, uint32_t buffersize, int32_t * retval); +int pid_vnodeinfopath(vnode_t vp, uint32_t vid, struct fileproc * fp, int closeonexec, user_addr_t buffer, uint32_t buffersize, int32_t * retval); +int pid_socketinfo(socket_t so, struct fileproc *fp, int closeonexec, user_addr_t buffer, uint32_t buffersize, int32_t * retval); +int pid_pseminfo(struct psemnode * psem, struct fileproc * fp, int closeonexec, user_addr_t buffer, uint32_t buffersize, int32_t * retval); +int pid_pshminfo(struct pshmnode * pshm, struct fileproc * fp, int closeonexec, user_addr_t buffer, uint32_t buffersize, int32_t * retval); +int pid_pipeinfo(struct pipe * p, struct fileproc * fp, int closeonexec, user_addr_t buffer, uint32_t buffersize, int32_t * retval); +int pid_kqueueinfo(struct kqueue * kq, struct fileproc * fp, int closeonexec, user_addr_t buffer, uint32_t buffersize, int32_t * retval); +int pid_atalkinfo(struct atalk * at, struct fileproc * fp, int closeonexec, user_addr_t buffer, uint32_t buffersize, int32_t * retval); /* protos for misc */ @@ -123,17 +126,27 @@ void fill_fileinfo(struct fileproc * fp, int closeonexec, struct proc_fileinfo static int proc_security_policy(proc_t p); static void munge_vinfo_stat(struct stat64 *sbp, struct vinfo_stat *vsbp); +uint64_t get_dispatchqueue_offset_from_proc(void *p) +{ + if(p != NULL) { + proc_t pself = (proc_t)p; + return (pself->p_dispatchqueue_offset); + } else { + return (uint64_t)0; + } +} + /***************************** proc_info ********************/ int -proc_info(__unused struct proc *p, struct proc_info_args * uap, register_t *retval) +proc_info(__unused struct proc *p, struct proc_info_args * uap, int32_t *retval) { return(proc_info_internal(uap->callnum, uap->pid, uap->flavor, uap->arg, uap->buffer, uap->buffersize, retval)); } int -proc_info_internal(int callnum, int pid, int flavor, uint64_t arg, user_addr_t buffer, uint32_t buffersize, register_t * retval) +proc_info_internal(int callnum, int pid, int flavor, uint64_t arg, user_addr_t buffer, uint32_t buffersize, int32_t * retval) { switch(callnum) { @@ -146,6 +159,9 @@ proc_info_internal(int callnum, int pid, int flavor, uint64_t arg, user_addr_t b return(proc_pidfdinfo(pid, flavor, (int)arg, buffer, buffersize, retval)); case 4: /* proc_kernmsgbuf */ return(proc_kernmsgbuf(buffer, buffersize, retval)); + case 5: /* set on self properties proc_setcontrol */ + return(proc_setcontrol(pid, flavor, arg, buffer, buffersize, retval)); + default: return(EINVAL); } @@ -155,13 +171,14 @@ proc_info_internal(int callnum, int pid, int flavor, uint64_t arg, user_addr_t b /******************* proc_listpids routine ****************/ int -proc_listpids(uint32_t type, uint32_t typeinfo, user_addr_t buffer, uint32_t buffersize, register_t * retval) +proc_listpids(uint32_t type, uint32_t typeinfo, user_addr_t buffer, uint32_t buffersize, int32_t * retval) { int numprocs, wantpids; char * kbuf; int * ptr; int n, skip; struct proc * p; + struct tty * tp; int error = 0; /* if the buffer is null, return num of procs */ @@ -202,8 +219,8 @@ proc_listpids(uint32_t type, uint32_t typeinfo, user_addr_t buffer, uint32_t bu /* racy but list lock is held */ if ((p->p_flag & P_CONTROLT) == 0 || (p->p_pgrp == NULL) || (p->p_pgrp->pg_session == NULL) || - p->p_pgrp->pg_session->s_ttyp == NULL || - p->p_pgrp->pg_session->s_ttyp->t_dev != (dev_t)typeinfo) + (tp = SESSION_TP(p->p_pgrp->pg_session)) == TTY_NULL || + tp->t_dev != (dev_t)typeinfo) skip = 1; break; case PROC_UID_ONLY: @@ -277,7 +294,7 @@ proc_listpids(uint32_t type, uint32_t typeinfo, user_addr_t buffer, uint32_t bu /********************************** proc_pidinfo routines ********************************/ int -proc_pidfdlist(proc_t p, user_addr_t buffer, uint32_t buffersize, register_t *retval) +proc_pidfdlist(proc_t p, user_addr_t buffer, uint32_t buffersize, int32_t *retval) { int numfds, needfds; char * kbuf; @@ -330,7 +347,7 @@ proc_pidfdlist(proc_t p, user_addr_t buffer, uint32_t buffersize, register_t *r int -proc_pidbsdinfo(proc_t p, struct proc_bsdinfo * pbsd) +proc_pidbsdinfo(proc_t p, struct proc_bsdinfo * pbsd, int zombie) { register struct tty *tp; struct session *sessionp = NULL; @@ -355,9 +372,10 @@ proc_pidbsdinfo(proc_t p, struct proc_bsdinfo * pbsd) kauth_cred_unref(&my_cred); pbsd->pbi_nice = p->p_nice; - pbsd->pbi_start = p->p_start; - bcopy(&p->p_comm, &pbsd->pbi_comm[0], MAXCOMLEN); - bcopy(&p->p_name, &pbsd->pbi_name[0], 2* MAXCOMLEN); + pbsd->pbi_start_tvsec = p->p_start.tv_sec; + pbsd->pbi_start_tvusec = p->p_start.tv_usec; + bcopy(&p->p_comm, &pbsd->pbi_comm[0], MAXCOMLEN-1); + bcopy(&p->p_name, &pbsd->pbi_name[0], 2*MAXCOMLEN-1); pbsd->pbi_flags = 0; if ((p->p_flag & P_SYSTEM) == P_SYSTEM) @@ -375,16 +393,41 @@ proc_pidbsdinfo(proc_t p, struct proc_bsdinfo * pbsd) if ((p->p_flag & P_THCWD) == P_THCWD) pbsd->pbi_flags |= PROC_FLAG_THCWD; - if (SESS_LEADER(p, sessionp)) - pbsd->pbi_flags |= PROC_FLAG_SLEADER; - if ((sessionp != SESSION_NULL) && sessionp->s_ttyvp) - pbsd->pbi_flags |= PROC_FLAG_CTTY; + if (sessionp != SESSION_NULL) { + if (SESS_LEADER(p, sessionp)) + pbsd->pbi_flags |= PROC_FLAG_SLEADER; + if (sessionp->s_ttyvp) + pbsd->pbi_flags |= PROC_FLAG_CTTY; + } + + + switch(PROC_CONTROL_STATE(p)) { + case P_PCTHROTTLE: + pbsd->pbi_flags |= PROC_FLAG_PC_THROTTLE; + break; + case P_PCSUSP: + pbsd->pbi_flags |= PROC_FLAG_PC_SUSP; + break; + case P_PCKILL: + pbsd->pbi_flags |= PROC_FLAG_PC_KILL; + break; + }; + + switch(PROC_ACTION_STATE(p)) { + case P_PCTHROTTLE: + pbsd->pbi_flags |= PROC_FLAG_PA_THROTTLE; + break; + case P_PCSUSP: + pbsd->pbi_flags |= PROC_FLAG_PA_SUSP; + break; + }; - pbsd->pbi_nfiles = p->p_fd->fd_nfiles; + if (zombie == 0) + pbsd->pbi_nfiles = p->p_fd->fd_nfiles; if (pg != PGRP_NULL) { pbsd->pbi_pgid = p->p_pgrpid; pbsd->pbi_pjobc = pg->pg_jobc; - if ((p->p_flag & P_CONTROLT) && (sessionp != SESSION_NULL) && (tp = sessionp->s_ttyp)) { + if ((p->p_flag & P_CONTROLT) && (sessionp != SESSION_NULL) && (tp = SESSION_TP(sessionp))) { pbsd->e_tdev = tp->t_dev; pbsd->e_tpgid = sessionp->s_ttypgrpid; } @@ -429,6 +472,13 @@ proc_pidthreadinfo(proc_t p, uint64_t arg, struct proc_threadinfo *pthinfo) } +void +bsd_getthreadname(void *uth, char *buffer) +{ + struct uthread *ut = (struct uthread *)uth; + if(ut->pth_name) + bcopy(ut->pth_name,buffer,MAXTHREADNAMESIZE); +} void bsd_threadcdir(void * uth, void *vptr, int *vidp) @@ -478,7 +528,7 @@ proc_pidthreadpathinfo(proc_t p, uint64_t arg, struct proc_threadwithpathinfo * int -proc_pidlistthreads(proc_t p, user_addr_t buffer, uint32_t buffersize, register_t *retval) +proc_pidlistthreads(proc_t p, user_addr_t buffer, uint32_t buffersize, int32_t *retval) { int count = 0; int ret = 0; @@ -512,13 +562,13 @@ proc_pidlistthreads(proc_t p, user_addr_t buffer, uint32_t buffersize, registe int -proc_pidregioninfo(proc_t p, uint64_t arg, user_addr_t buffer, __unused uint32_t buffersize, register_t *retval) +proc_pidregioninfo(proc_t p, uint64_t arg, user_addr_t buffer, __unused uint32_t buffersize, int32_t *retval) { struct proc_regioninfo preginfo; int ret, error = 0; bzero(&preginfo, sizeof(struct proc_regioninfo)); - ret = fill_procregioninfo( p->task, arg, (struct proc_regioninfo_internal *)&preginfo, (uint32_t *)0, (uint32_t *)0); + ret = fill_procregioninfo( p->task, arg, (struct proc_regioninfo_internal *)&preginfo, (uintptr_t *)0, (uint32_t *)0); if (ret == 0) return(EINVAL); error = copyout(&preginfo, buffer, sizeof(struct proc_regioninfo)); @@ -529,18 +579,18 @@ proc_pidregioninfo(proc_t p, uint64_t arg, user_addr_t buffer, __unused uint32_t int -proc_pidregionpathinfo(proc_t p, uint64_t arg, user_addr_t buffer, __unused uint32_t buffersize, register_t *retval) +proc_pidregionpathinfo(proc_t p, uint64_t arg, user_addr_t buffer, __unused uint32_t buffersize, int32_t *retval) { struct proc_regionwithpathinfo preginfo; int ret, error = 0; - uint32_t vnodeaddr= 0; + uintptr_t vnodeaddr= 0; uint32_t vnodeid= 0; vnode_t vp; int count; bzero(&preginfo, sizeof(struct proc_regionwithpathinfo)); - ret = fill_procregioninfo( p->task, arg, (struct proc_regioninfo_internal *)&preginfo.prp_prinfo, (uint32_t *)&vnodeaddr, (uint32_t *)&vnodeid); + ret = fill_procregioninfo( p->task, arg, (struct proc_regioninfo_internal *)&preginfo.prp_prinfo, (uintptr_t *)&vnodeaddr, (uint32_t *)&vnodeid); if (ret == 0) return(EINVAL); if (vnodeaddr) { @@ -566,7 +616,7 @@ proc_pidregionpathinfo(proc_t p, uint64_t arg, user_addr_t buffer, __unused uint * thread directory. */ int -proc_pidvnodepathinfo(proc_t p, __unused uint64_t arg, user_addr_t buffer, __unused uint32_t buffersize, register_t *retval) +proc_pidvnodepathinfo(proc_t p, __unused uint64_t arg, user_addr_t buffer, __unused uint32_t buffersize, int32_t *retval) { struct proc_vnodepathinfo pvninfo; int error = 0; @@ -628,7 +678,7 @@ proc_pidvnodepathinfo(proc_t p, __unused uint64_t arg, user_addr_t buffer, __unu } int -proc_pidpathinfo(proc_t p, __unused uint64_t arg, user_addr_t buffer, uint32_t buffersize, __unused register_t *retval) +proc_pidpathinfo(proc_t p, __unused uint64_t arg, user_addr_t buffer, uint32_t buffersize, __unused int32_t *retval) { int vid, error; vnode_t tvp; @@ -649,7 +699,7 @@ proc_pidpathinfo(proc_t p, __unused uint64_t arg, user_addr_t buffer, uint32_t b vid = vnode_vid(tvp); error = vnode_getwithvid(tvp, vid); if (error == 0) { - error = vn_getpath(tvp, buf, &len); + error = vn_getpath_fsenter(tvp, buf, &len); vnode_put(tvp); if (error == 0) { error = vnode_lookup(buf, 0, &nvp, vfs_context_current()); @@ -665,11 +715,25 @@ proc_pidpathinfo(proc_t p, __unused uint64_t arg, user_addr_t buffer, uint32_t b } +int +proc_pidworkqueueinfo(proc_t p, struct proc_workqueueinfo *pwqinfo) +{ + int error = 0; + + bzero(pwqinfo, sizeof(struct proc_workqueueinfo)); + + error = fill_procworkqueue(p, pwqinfo); + if (error) + return(ESRCH); + else + return(0); + +} /********************************** proc_pidinfo ********************************/ int -proc_pidinfo(int pid, int flavor, uint64_t arg, user_addr_t buffer, uint32_t buffersize, register_t * retval) +proc_pidinfo(int pid, int flavor, uint64_t arg, user_addr_t buffer, uint32_t buffersize, int32_t * retval) { struct proc * p = PROC_NULL; int error = ENOTSUP; @@ -677,6 +741,7 @@ proc_pidinfo(int pid, int flavor, uint64_t arg, user_addr_t buffer, uint32_t bu int findzomb = 0; int refheld = 0; uint32_t size; + int zombie = 0; switch (flavor) { case PROC_PIDLISTFDS: @@ -714,6 +779,13 @@ proc_pidinfo(int pid, int flavor, uint64_t arg, user_addr_t buffer, uint32_t bu case PROC_PIDPATHINFO: size = MAXPATHLEN; break; + case PROC_PIDWORKQUEUEINFO: + /* kernel does not have workq info */ + if (pid == 0) + return(EINVAL); + else + size = PROC_PIDWORKQUEUEINFO_SIZE; + break; default: return(EINVAL); } @@ -747,16 +819,18 @@ proc_pidinfo(int pid, int flavor, uint64_t arg, user_addr_t buffer, uint32_t bu case PROC_PIDTBSDINFO: { struct proc_bsdinfo pbsd; + zombie = 0; if (arg) findzomb = 1; p = proc_find(pid); if (p == PROC_NULL) { - if (findzomb) + if (findzomb) p = pzfind(pid); if (p == NULL) { error = ESRCH; goto out; } + zombie = 1; } else refheld = 1; /* Do we have permission to look into this ? */ @@ -765,7 +839,7 @@ proc_pidinfo(int pid, int flavor, uint64_t arg, user_addr_t buffer, uint32_t bu proc_rele(p); goto out; } - error = proc_pidbsdinfo(p, &pbsd); + error = proc_pidbsdinfo(p, &pbsd, zombie); if (refheld != 0) proc_rele(p); if (error == 0) { @@ -791,7 +865,7 @@ proc_pidinfo(int pid, int flavor, uint64_t arg, user_addr_t buffer, uint32_t bu case PROC_PIDTASKALLINFO: { struct proc_taskallinfo pall; - error = proc_pidbsdinfo(p, &pall.pbsd); + error = proc_pidbsdinfo(p, &pall.pbsd, 0); error = proc_pidtaskinfo(p, &pall.ptinfo); if (error == 0) { error = copyout(&pall, buffer, sizeof(struct proc_taskallinfo)); @@ -858,6 +932,19 @@ proc_pidinfo(int pid, int flavor, uint64_t arg, user_addr_t buffer, uint32_t bu } break; + + case PROC_PIDWORKQUEUEINFO:{ + struct proc_workqueueinfo pwqinfo; + + error = proc_pidworkqueueinfo(p, &pwqinfo); + if (error == 0) { + error = copyout(&pwqinfo, buffer, sizeof(struct proc_workqueueinfo)); + if (error == 0) + *retval = sizeof(struct proc_workqueueinfo); + } + } + break; + default: error = ENOTSUP; } @@ -870,7 +957,7 @@ proc_pidinfo(int pid, int flavor, uint64_t arg, user_addr_t buffer, uint32_t bu int -pid_vnodeinfo(vnode_t vp, uint32_t vid, struct fileproc * fp, int closeonexec, user_addr_t buffer, __unused uint32_t buffersize, register_t * retval) +pid_vnodeinfo(vnode_t vp, uint32_t vid, struct fileproc * fp, int closeonexec, user_addr_t buffer, __unused uint32_t buffersize, int32_t * retval) { struct vnode_fdinfo vfi; int error= 0; @@ -891,7 +978,7 @@ pid_vnodeinfo(vnode_t vp, uint32_t vid, struct fileproc * fp, int closeonexec, u } int -pid_vnodeinfopath(vnode_t vp, uint32_t vid, struct fileproc * fp, int closeonexec, user_addr_t buffer, __unused uint32_t buffersize, register_t * retval) +pid_vnodeinfopath(vnode_t vp, uint32_t vid, struct fileproc * fp, int closeonexec, user_addr_t buffer, __unused uint32_t buffersize, int32_t * retval) { struct vnode_fdinfowithpath vfip; int count, error= 0; @@ -958,7 +1045,7 @@ fill_vnodeinfo(vnode_t vp, struct vnode_info *vinfo) } int -pid_socketinfo(socket_t so, struct fileproc *fp, int closeonexec, user_addr_t buffer, __unused uint32_t buffersize, register_t * retval) +pid_socketinfo(socket_t so, struct fileproc *fp, int closeonexec, user_addr_t buffer, __unused uint32_t buffersize, int32_t * retval) { #if SOCKETS struct socket_fdinfo s; @@ -978,7 +1065,7 @@ pid_socketinfo(socket_t so, struct fileproc *fp, int closeonexec, user_addr_t b } int -pid_pseminfo(struct psemnode *psem, struct fileproc *fp, int closeonexec, user_addr_t buffer, __unused uint32_t buffersize, register_t * retval) +pid_pseminfo(struct psemnode *psem, struct fileproc *fp, int closeonexec, user_addr_t buffer, __unused uint32_t buffersize, int32_t * retval) { struct psem_fdinfo pseminfo; int error = 0; @@ -995,7 +1082,7 @@ pid_pseminfo(struct psemnode *psem, struct fileproc *fp, int closeonexec, user_ } int -pid_pshminfo(struct pshmnode *pshm, struct fileproc *fp, int closeonexec, user_addr_t buffer, __unused uint32_t buffersize, register_t * retval) +pid_pshminfo(struct pshmnode *pshm, struct fileproc *fp, int closeonexec, user_addr_t buffer, __unused uint32_t buffersize, int32_t * retval) { struct pshm_fdinfo pshminfo; int error = 0; @@ -1012,7 +1099,7 @@ pid_pshminfo(struct pshmnode *pshm, struct fileproc *fp, int closeonexec, user_ } int -pid_pipeinfo(struct pipe * p, struct fileproc *fp, int closeonexec, user_addr_t buffer, __unused uint32_t buffersize, register_t * retval) +pid_pipeinfo(struct pipe * p, struct fileproc *fp, int closeonexec, user_addr_t buffer, __unused uint32_t buffersize, int32_t * retval) { struct pipe_fdinfo pipeinfo; int error = 0; @@ -1028,7 +1115,7 @@ pid_pipeinfo(struct pipe * p, struct fileproc *fp, int closeonexec, user_addr_ } int -pid_kqueueinfo(struct kqueue * kq, struct fileproc *fp, int closeonexec, user_addr_t buffer, __unused uint32_t buffersize, register_t * retval) +pid_kqueueinfo(struct kqueue * kq, struct fileproc *fp, int closeonexec, user_addr_t buffer, __unused uint32_t buffersize, int32_t * retval) { struct kqueue_fdinfo kqinfo; int error = 0; @@ -1046,7 +1133,7 @@ pid_kqueueinfo(struct kqueue * kq, struct fileproc *fp, int closeonexec, user_a } int -pid_atalkinfo(__unused struct atalk * at, __unused struct fileproc *fp, __unused int closeonexec, __unused user_addr_t buffer, __unused uint32_t buffersize, __unused register_t * retval) +pid_atalkinfo(__unused struct atalk * at, __unused struct fileproc *fp, __unused int closeonexec, __unused user_addr_t buffer, __unused uint32_t buffersize, __unused int32_t * retval) { return ENOTSUP; } @@ -1055,7 +1142,7 @@ pid_atalkinfo(__unused struct atalk * at, __unused struct fileproc *fp, __unuse /************************** proc_pidfdinfo routine ***************************/ int -proc_pidfdinfo(int pid, int flavor, int fd, user_addr_t buffer, uint32_t buffersize, register_t * retval) +proc_pidfdinfo(int pid, int flavor, int fd, user_addr_t buffer, uint32_t buffersize, int32_t * retval) { proc_t p; int error = ENOTSUP; @@ -1194,6 +1281,7 @@ proc_pidfdinfo(int pid, int flavor, int fd, user_addr_t buffer, uint32_t buffer } break; +#if NETAT case PROC_PIDFDATALKINFO: { struct atalk * at; @@ -1206,7 +1294,7 @@ proc_pidfdinfo(int pid, int flavor, int fd, user_addr_t buffer, uint32_t buffer error = pid_atalkinfo(at, fp, closeonexec, buffer, buffersize, retval); } break; - +#endif /* NETAT */ default: { error = EINVAL; } @@ -1241,7 +1329,7 @@ proc_security_policy(proc_t p) } int -proc_kernmsgbuf(user_addr_t buffer, uint32_t buffersize, register_t * retval) +proc_kernmsgbuf(user_addr_t buffer, uint32_t buffersize, int32_t * retval) { if (suser(kauth_cred_get(), (u_short *)0) == 0) { return(log_dmesg(buffer, buffersize, retval)); @@ -1249,6 +1337,41 @@ proc_kernmsgbuf(user_addr_t buffer, uint32_t buffersize, register_t * retval) return(EPERM); } +/* ********* process control sets on self only */ +int +proc_setcontrol(int pid, int flavor, uint64_t arg, __unused user_addr_t buffer, __unused uint32_t buffersize, __unused int32_t * retval) +{ + struct proc * pself = PROC_NULL; + int error = 0; + uint32_t pcontrol = (uint32_t)arg; + + + pself = current_proc(); + if (pid != pself->p_pid) + return(EINVAL); + + if (pcontrol > P_PCMAX) + return(EINVAL); + + switch (flavor) { + case PROC_SELFSET_PCONTROL: { + proc_lock(pself); + /* reset existing control setting while retaining action state */ + pself->p_pcaction &= PROC_ACTION_MASK; + /* set new control state */ + pself->p_pcaction |= pcontrol; + proc_unlock(pself); + } + break; + + default: + error = ENOTSUP; + } + + return(error); +} + + /* * copy stat64 structure into vinfo_stat structure. */ diff --git a/bsd/kern/pthread_support.c b/bsd/kern/pthread_support.c index 6cbe8e0df..2691813a4 100644 --- a/bsd/kern/pthread_support.c +++ b/bsd/kern/pthread_support.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2005 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2009 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -30,30 +30,18 @@ * pthread_support.c */ - -#define _PTHREAD_CONDATTR_T -#define _PTHREAD_COND_T -#define _PTHREAD_MUTEXATTR_T -#define _PTHREAD_MUTEX_T -#define _PTHREAD_RWLOCKATTR_T -#define _PTHREAD_RWLOCK_T - -#undef pthread_mutexattr_t -#undef pthread_mutex_t -#undef pthread_condattr_t -#undef pthread_cond_t -#undef pthread_rwlockattr_t -#undef pthread_rwlock_t +#if PSYNCH #include +#include #include #include #include #include #include #include +#include #include -#include #include #include #include @@ -61,135 +49,3212 @@ #include #include #include - -#include #include - +#include #include -#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include #include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include -#include +#include -#define PTHREAD_SYNCH_MAX 256 -static pthread_mutex_t * pmutex_trans_array[PTHREAD_SYNCH_MAX]; -static pthread_cond_t * pcond_trans_array[PTHREAD_SYNCH_MAX]; -//static pthread_rwlock_t * prwlock_trans_array[PTHREAD_SYNCH_MAX]; +#define _PSYNCH_TRACE_ 0 /* kdebug trace */ +#define __TESTPANICS__ 0 /* panics for error conditions */ +#define COND_MTX_WAITQUEUEMOVE 0 /* auto move from cvar wait queue to mutex waitqueue */ -pthread_mutex_t * -pthread_id_to_mutex(int mutexid) -{ - pthread_mutex_t * mtx = NULL; +#if _PSYNCH_TRACE_ +#define _PSYNCH_TRACE_MLWAIT 0x9000000 +#define _PSYNCH_TRACE_MLDROP 0x9000004 +#define _PSYNCH_TRACE_CVWAIT 0x9000008 +#define _PSYNCH_TRACE_CVSIGNAL 0x900000c +#define _PSYNCH_TRACE_CVBROAD 0x9000010 +#define _PSYNCH_TRACE_KMDROP 0x9000014 +#define _PSYNCH_TRACE_RWRDLOCK 0x9000018 +#define _PSYNCH_TRACE_RWLRDLOCK 0x900001c +#define _PSYNCH_TRACE_RWWRLOCK 0x9000020 +#define _PSYNCH_TRACE_RWYWRLOCK 0x9000024 +#define _PSYNCH_TRACE_RWUPGRADE 0x9000028 +#define _PSYNCH_TRACE_RWDOWNGRADE 0x900002c +#define _PSYNCH_TRACE_RWUNLOCK 0x9000030 +#define _PSYNCH_TRACE_RWUNLOCK2 0x9000034 +#define _PSYNCH_TRACE_RWHANDLEU 0x9000038 +#define _PSYNCH_TRACE_FSEQTILL 0x9000040 +/* user side */ +#define _PSYNCH_TRACE_UM_LOCK 0x9000060 +#define _PSYNCH_TRACE_UM_UNLOCK 0x9000064 +#define _PSYNCH_TRACE_UM_MHOLD 0x9000068 +#define _PSYNCH_TRACE_UM_MDROP 0x900006c +#define _PSYNCH_TRACE_UM_CVWAIT 0x9000070 +#define _PSYNCH_TRACE_UM_CVSIG 0x9000074 +#define _PSYNCH_TRACE_UM_CVBRD 0x9000078 +#endif /* _PSYNCH_TRACE_ */ - if (mutexid >= 0 && mutexid < PTHREAD_SYNCH_MAX) { - pthread_list_lock(); - mtx = pmutex_trans_array[mutexid]; - if (mtx) { - MTX_LOCK(mtx->lock); - mtx->refcount++; - MTX_UNLOCK(mtx->lock); - } - pthread_list_unlock(); +lck_mtx_t * pthread_list_mlock; + +#define PTHHASH(addr) (&pthashtbl[(addr) & pthhash]) +extern LIST_HEAD(pthhashhead, ksyn_wait_queue) *pth_glob_hashtbl; +struct pthhashhead * pth_glob_hashtbl; +u_long pthhash; + +LIST_HEAD(, ksyn_wait_queue) pth_free_list; + +static int PTH_HASHSIZE = 100; + + +#define SEQFIT 0 +#define FIRSTFIT 1 + +struct ksyn_queue { + TAILQ_HEAD(, uthread) ksynq_uthlist; + uint32_t ksynq_count; /* number of entries in queue */ + uint32_t ksynq_firstnum; /* lowest seq in queue */ + uint32_t ksynq_lastnum; /* highest seq in queue */ +}; + +#define KSYN_QUEUE_READ 0 +#define KSYN_QUEUE_LREAD 1 +#define KSYN_QUEUE_WRITER 2 +#define KSYN_QUEUE_YWRITER 3 +#define KSYN_QUEUE_UPGRADE 4 +#define KSYN_QUEUE_MAX 5 + +struct ksyn_wait_queue { + LIST_ENTRY(ksyn_wait_queue) kw_hash; + LIST_ENTRY(ksyn_wait_queue) kw_list; +#if USE_WAITQUEUE + struct wait_queue kw_wq; +#endif /* USE_WAITQUEUE */ + user_addr_t kw_addr; + uint64_t kw_owner; + uint64_t kw_object; /* object backing in shared mode */ + uint64_t kw_offset; /* offset inside the object in shared mode */ + int kw_flags; /* mutex, cvar options/flags */ + int kw_pflags; /* flags under listlock protection */ + struct timeval kw_ts; /* timeval need for upkeep before free */ + int kw_iocount; /* inuse reference */ + + int kw_type; /* queue type like mutex, cvar, etc */ + uint32_t kw_inqueue; /* num of waiters held */ + uint32_t kw_highseq; /* highest seq in the queue */ + uint32_t kw_lowseq; /* lowest seq in the queue */ + uint32_t kw_lastunlockseq; /* the last seq that unlocked */ + uint32_t kw_pre_rwwc; /* prepost count */ + uint32_t kw_pre_lockseq; /* prepost target seq */ + uint32_t kw_pre_cvretval; /* retval for cwait on prepost */ + uint32_t kw_pre_limrd; /* prepost read only(rwlock) */ + uint32_t kw_pre_limrdseq; /* prepost limit seq for reads(rwlock) */ + uint32_t kw_pre_limrdbits; /* seqbit needed for updates on prepost */ + uint32_t kw_pre_intrcount; /* prepost of missed wakeup due to intrs */ + uint32_t kw_pre_intrseq; /* prepost of missed wakeup limit seq */ + uint32_t kw_pre_intrretbits; /* return bits value for missed wakeup threads */ + uint32_t kw_pre_intrtype; /* type of failed wakueps*/ + + int kw_kflags; + TAILQ_HEAD(, uthread) kw_uthlist; /* List of uthreads */ + struct ksyn_queue kw_ksynqueues[KSYN_QUEUE_MAX]; /* queues to hold threads */ + lck_mtx_t kw_lock; /* mutex lock protecting this structure */ + struct ksyn_wait_queue * kw_attq; /* attached queue (cvar->mutex, need in prepost */ +}; + +typedef struct ksyn_queue * ksyn_queue_t; +typedef struct ksyn_wait_queue * ksyn_wait_queue_t; + +#define PTHRW_EBIT 0x01 +#define PTHRW_LBIT 0x02 +#define PTHRW_YBIT 0x04 +#define PTHRW_WBIT 0x08 +#define PTHRW_UBIT 0x10 +#define PTHRW_RETRYBIT 0x20 +/* same as 0x20, shadow W bit for rwlock */ +#define PTHRW_SHADOW_W 0x20 + +#define PTHRW_TRYLKBIT 0x40 +#define PTHRW_RW_HUNLOCK 0x40 /* returning read thread responsible to handle unlock */ + +#define PTHRW_MTX_NONE 0x80 +#define PTHRW_RW_INIT 0x80 /* reset on the lock bits */ +/* same as 0x80, spurious rwlock unlock ret from kernel */ +#define PTHRW_RW_SPURIOUS 0x80 + +#define PTHRW_INC 0x100 + +#define PTHRW_BIT_MASK 0x000000ff; + +#define PTHRW_COUNT_SHIFT 8 +#define PTHRW_COUNT_MASK 0xffffff00 +#define PTHRW_MAX_READERS 0xffffff00 + +/* first contended seq that kernel sees */ +#define KW_MTXFIRST_KSEQ 0x200 +#define KW_CVFIRST_KSEQ 1 +#define KW_RWFIRST_KSEQ 0x200 + +#define is_rw_ewubit_set(x) ((x & (PTHRW_EBIT | PTHRW_WBIT | PTHRW_UBIT)) != 0) +#define is_rw_lybit_set(x) ((x & (PTHRW_LBIT | PTHRW_YBIT)) != 0) +#define is_rw_ebit_set(x) ((x & PTHRW_EBIT) != 0) +#define is_rw_uebit_set(x) ((x & (PTHRW_EBIT | PTHRW_UBIT)) != 0) +#define is_rw_ubit_set(x) ((x & PTHRW_UBIT) != 0) +#define is_rw_either_ewyubit_set(x) ((x & (PTHRW_EBIT | PTHRW_WBIT | PTHRW_UBIT | PTHRW_YBIT)) != 0) + + +/* is x lower than Y */ +#define is_seqlower(x, y) ((x < y) || ((x - y) > (PTHRW_MAX_READERS/2))) +/* is x lower than or eq Y */ +#define is_seqlower_eq(x, y) ((x <= y) || ((x - y) > (PTHRW_MAX_READERS/2))) + +/* is x greater than Y */ +#define is_seqhigher(x, y) ((x > y) || ((y - x) > (PTHRW_MAX_READERS/2))) + +static inline int diff_genseq(uint32_t x, uint32_t y) { + if (x > y) { + return(x-y); + } else { + return((PTHRW_MAX_READERS - y) + x + PTHRW_INC); } - return(mtx); } +#define TID_ZERO (uint64_t)0 + +/* bits needed in handling the rwlock unlock */ +#define PTH_RW_TYPE_READ 0x01 +#define PTH_RW_TYPE_LREAD 0x02 +#define PTH_RW_TYPE_WRITE 0x04 +#define PTH_RW_TYPE_YWRITE 0x08 +#define PTH_RW_TYPE_UPGRADE 0x10 +#define PTH_RW_TYPE_MASK 0xff +#define PTH_RW_TYPE_SHIFT 8 + +#define PTH_RWSHFT_TYPE_READ 0x0100 +#define PTH_RWSHFT_TYPE_LREAD 0x0200 +#define PTH_RWSHFT_TYPE_WRITE 0x0400 +#define PTH_RWSHFT_TYPE_YWRITE 0x0800 +#define PTH_RWSHFT_TYPE_MASK 0xff00 + +/* + * Mutex protocol attributes + */ +#define PTHREAD_PRIO_NONE 0 +#define PTHREAD_PRIO_INHERIT 1 +#define PTHREAD_PRIO_PROTECT 2 +#define PTHREAD_PROTOCOL_FLAGS_MASK 0x3 + +/* + * Mutex type attributes + */ +#define PTHREAD_MUTEX_NORMAL 0 +#define PTHREAD_MUTEX_ERRORCHECK 4 +#define PTHREAD_MUTEX_RECURSIVE 8 +#define PTHREAD_MUTEX_DEFAULT PTHREAD_MUTEX_NORMAL +#define PTHREAD_TYPE_FLAGS_MASK 0xc + +/* + * Mutex pshared attributes + */ +#define PTHREAD_PROCESS_SHARED 0x10 +#define PTHREAD_PROCESS_PRIVATE 0x20 +#define PTHREAD_PSHARED_FLAGS_MASK 0x30 + +/* + * Mutex policy attributes + */ +#define _PTHREAD_MUTEX_POLICY_NONE 0 +#define _PTHREAD_MUTEX_POLICY_FAIRSHARE 0x040 /* 1 */ +#define _PTHREAD_MUTEX_POLICY_FIRSTFIT 0x080 /* 2 */ +#define _PTHREAD_MUTEX_POLICY_REALTIME 0x0c0 /* 3 */ +#define _PTHREAD_MUTEX_POLICY_ADAPTIVE 0x100 /* 4 */ +#define _PTHREAD_MUTEX_POLICY_PRIPROTECT 0x140 /* 5 */ +#define _PTHREAD_MUTEX_POLICY_PRIINHERIT 0x180 /* 6 */ +#define PTHREAD_POLICY_FLAGS_MASK 0x1c0 + +#define _PTHREAD_MTX_OPT_HOLDLOCK 0x200 +#define _PTHREAD_MTX_OPT_NOHOLDLOCK 0x400 +#define _PTHREAD_MTX_OPT_LASTDROP (_PTHREAD_MTX_OPT_HOLDLOCK | _PTHREAD_MTX_OPT_NOHOLDLOCK) + +#define KSYN_WQ_INLIST 1 +#define KSYN_WQ_INHASH 2 +#define KSYN_WQ_SHARED 4 +#define KSYN_WQ_FLIST 0X10 /* in free list to be freed after a short delay */ + +#define KSYN_CLEANUP_DEADLINE 10 +int psynch_cleanupset; +thread_call_t psynch_thcall; -int -pthread_id_mutex_add(pthread_mutex_t * mutex) +#define KSYN_WQTYPE_INWAIT 0x1000 +#define KSYN_WQTYPE_MTX 0x1 +#define KSYN_WQTYPE_CVAR 0x2 +#define KSYN_WQTYPE_RWLOCK 0x4 +#define KSYN_WQTYPE_SEMA 0x8 +#define KSYN_WQTYPE_BARR 0x10 +#define KSYN_WQTYPE_MASK 0xffff + +#define KSYN_MTX_MAX 0x0fffffff + +#define KW_UNLOCK_PREPOST 0x01 +#define KW_UNLOCK_PREPOST_UPGRADE 0x02 +#define KW_UNLOCK_PREPOST_DOWNGRADE 0x04 +#define KW_UNLOCK_PREPOST_READLOCK 0x08 +#define KW_UNLOCK_PREPOST_LREADLOCK 0x10 +#define KW_UNLOCK_PREPOST_WRLOCK 0x20 +#define KW_UNLOCK_PREPOST_YWRLOCK 0x40 + +#define CLEAR_PREPOST_BITS(kwq) {\ + kwq->kw_pre_lockseq = 0; \ + kwq->kw_pre_rwwc = 0; \ + kwq->kw_pre_cvretval = 0; \ + } + +#define CLEAR_READ_PREPOST_BITS(kwq) {\ + kwq->kw_pre_limrd = 0; \ + kwq->kw_pre_limrdseq = 0; \ + kwq->kw_pre_limrdbits = 0; \ + } + +#define CLEAR_INTR_PREPOST_BITS(kwq) {\ + kwq->kw_pre_intrcount = 0; \ + kwq->kw_pre_intrseq = 0; \ + kwq->kw_pre_intrretbits = 0; \ + kwq->kw_pre_intrtype = 0; \ + } + +void pthread_list_lock(void); +void pthread_list_unlock(void); +void pthread_list_lock_spin(void); +void pthread_list_lock_convert_spin(void); +void ksyn_wqlock(ksyn_wait_queue_t kwq); +void ksyn_wqunlock(ksyn_wait_queue_t kwq); +ksyn_wait_queue_t ksyn_wq_hash_lookup(user_addr_t mutex, proc_t p, int flags, uint64_t object, uint64_t offset); +int ksyn_wqfind(user_addr_t mutex, uint32_t mgen, uint32_t ugen, uint32_t rw_wc, uint64_t tid, int flags, int wqtype , ksyn_wait_queue_t * wq); +void ksyn_wqrelease(ksyn_wait_queue_t mkwq, ksyn_wait_queue_t ckwq); +int ksyn_block_thread_locked(ksyn_wait_queue_t kwq, uint64_t abstime, uthread_t uth); +kern_return_t ksyn_wakeup_thread(ksyn_wait_queue_t kwq, uthread_t uth); +void ksyn_move_wqthread(ksyn_wait_queue_t ckwq, ksyn_wait_queue_t kwq, uint32_t mgen, uint32_t updateval, int diffgen, int nomutex); +extern thread_t port_name_to_thread(mach_port_name_t port_name); +extern int ksyn_findobj(uint64_t mutex, uint64_t * object, uint64_t * offset); +static void UPDATE_KWQ(ksyn_wait_queue_t kwq, uint32_t mgen, uint32_t ugen, uint32_t rw_wc, uint64_t tid, int wqtype, int retry); +void psynch_mutexdrop_internal(ksyn_wait_queue_t kwq, uint32_t lkseq, uint32_t ugen, int flags); + +#if USE_WAITQUEUE +kern_return_t wait_queue_move_all(wait_queue_t from, event64_t eventfrom, wait_queue_t to, event64_t eventto); +kern_return_t wait_queue_move_thread(wait_queue_t from, event64_t eventfrom, thread_t th, wait_queue_t to, event64_t eventto, thread_t * mthp); +#endif /* USE_WAITQUEUE */ +int kwq_handle_unlock(ksyn_wait_queue_t, uint32_t mgen, uint32_t * updatep, int flags, int *blockp, uint32_t premgen); +void ksyn_queue_init(ksyn_queue_t kq); +int ksyn_queue_insert(ksyn_wait_queue_t kwq, ksyn_queue_t kq, uint32_t mgen, struct uthread * uth, int firstfit); +struct uthread * ksyn_queue_removefirst(ksyn_queue_t kq, ksyn_wait_queue_t kwq); +void ksyn_queue_removeitem(ksyn_wait_queue_t kwq, ksyn_queue_t kq, uthread_t uth); +void update_low_high(ksyn_wait_queue_t kwq, uint32_t lockseq); +uint32_t find_nextlowseq(ksyn_wait_queue_t kwq); +uint32_t find_nexthighseq(ksyn_wait_queue_t kwq); +int find_seq_till(ksyn_wait_queue_t kwq, uint32_t upto, uint32_t nwaiters, uint32_t *countp); +int find_diff(uint32_t upto, uint32_t lowest); +uint32_t ksyn_queue_count_tolowest(ksyn_queue_t kq, uint32_t upto); +int ksyn_wakeupreaders(ksyn_wait_queue_t kwq, uint32_t limitread, int longreadset, int allreaders, uint32_t updatebits, int * wokenp); +int kwq_find_rw_lowest(ksyn_wait_queue_t kwq, int flags, uint32_t premgen, int * type, uint32_t lowest[]); +uthread_t ksyn_queue_find_seq(ksyn_wait_queue_t kwq, ksyn_queue_t kq, uint32_t seq); +int kwq_handle_downgrade(ksyn_wait_queue_t kwq, uint32_t mgen, int flags, uint32_t premgen, int * blockp); + + +static void +UPDATE_KWQ(__unused ksyn_wait_queue_t kwq, __unused uint32_t mgen, __unused uint32_t ugen, __unused uint32_t rw_wc, __unused uint64_t tid, __unused int wqtype, __unused int retry) { - int i; +} - pthread_list_lock(); - for(i = 1; i < PTHREAD_SYNCH_MAX; i++) { - if (pmutex_trans_array[i] == 0) { - pmutex_trans_array[i] = mutex; - break; - } - } - pthread_list_unlock(); - if (i == PTHREAD_SYNCH_MAX) - return(0); - return(i); +/* to protect the hashes, iocounts, freelist */ +void +pthread_list_lock(void) +{ + lck_mtx_lock(pthread_list_mlock); } +void +pthread_list_lock_spin(void) +{ + lck_mtx_lock_spin(pthread_list_mlock); +} void -pthread_id_mutex_remove(int mutexid) +pthread_list_lock_convert_spin(void) { - pthread_list_lock(); - if (pmutex_trans_array[mutexid]) { - pmutex_trans_array[mutexid] = 0; + lck_mtx_convert_spin(pthread_list_mlock); +} + + +void +pthread_list_unlock(void) +{ + lck_mtx_unlock(pthread_list_mlock); +} + +/* to protect the indiv queue */ +void +ksyn_wqlock(ksyn_wait_queue_t kwq) +{ + + lck_mtx_lock(&kwq->kw_lock); +} + +void +ksyn_wqunlock(ksyn_wait_queue_t kwq) +{ + lck_mtx_unlock(&kwq->kw_lock); +} + + +/* routine to drop the mutex unlocks , used both for mutexunlock system call and drop during cond wait */ +void +psynch_mutexdrop_internal(ksyn_wait_queue_t kwq, uint32_t lkseq, uint32_t ugen, int flags) +{ + uint32_t nextgen, low_writer, updatebits; + int firstfit = flags & _PTHREAD_MUTEX_POLICY_FIRSTFIT; + uthread_t uth; + kern_return_t kret = KERN_SUCCESS; + + + nextgen = (ugen + PTHRW_INC); + +#if _PSYNCH_TRACE_ + KERNEL_DEBUG_CONSTANT(_PSYNCH_TRACE_KMDROP | DBG_FUNC_START, kwq, lkseq, ugen, flags, 0); +#endif /* _PSYNCH_TRACE_ */ + + ksyn_wqlock(kwq); + +redrive: + +#if _PSYNCH_TRACE_ + KERNEL_DEBUG_CONSTANT(_PSYNCH_TRACE_KMDROP | DBG_FUNC_NONE, kwq, 1, kwq->kw_inqueue, nextgen, 0); +#endif /* _PSYNCH_TRACE_ */ + if (kwq->kw_inqueue != 0) { + updatebits = (kwq->kw_highseq & PTHRW_COUNT_MASK) | PTHRW_EBIT; + kwq->kw_lastunlockseq = ugen; + if (firstfit != 0) + { +#if __TESTPANICS__ + panic("psynch_mutexdrop_internal: first fit mutex arrives, not enabled yet \n"); +#endif /* __TESTPANICS__ */ + /* first fit , pick any one */ + uth = ksyn_queue_removefirst(&kwq->kw_ksynqueues[KSYN_QUEUE_WRITER], kwq); + + if (kwq->kw_ksynqueues[KSYN_QUEUE_WRITER].ksynq_count != 0) + updatebits |= PTHRW_WBIT; +#if _PSYNCH_TRACE_ + KERNEL_DEBUG_CONSTANT(_PSYNCH_TRACE_KMDROP | DBG_FUNC_NONE, kwq, 2, uth, updatebits, 0); +#endif /* _PSYNCH_TRACE_ */ + + uth->uu_psynchretval = updatebits; + uth->uu_kwqqueue = NULL; + + kret = ksyn_wakeup_thread(kwq, uth); + if ((kret != KERN_SUCCESS) && (kret != KERN_NOT_WAITING)) + panic("psynch_mutexdrop_internal: panic unable to wakeup firstfit mutex thread\n"); + if (kret == KERN_NOT_WAITING) + goto redrive; + } else { + /* handle fairshare */ + low_writer = kwq->kw_ksynqueues[KSYN_QUEUE_WRITER].ksynq_firstnum; + low_writer &= PTHRW_COUNT_MASK; + + if (low_writer == nextgen) { +#if _PSYNCH_TRACE_ + KERNEL_DEBUG_CONSTANT(_PSYNCH_TRACE_KMDROP | DBG_FUNC_NONE, kwq, 3, low_writer, nextgen, 0); +#endif /* _PSYNCH_TRACE_ */ + /* next seq to be granted found */ + uth = ksyn_queue_removefirst(&kwq->kw_ksynqueues[KSYN_QUEUE_WRITER], kwq); + if (kwq->kw_ksynqueues[KSYN_QUEUE_WRITER].ksynq_count != 0) + updatebits |= PTHRW_WBIT; + + uth->uu_psynchretval = updatebits; + uth->uu_kwqqueue = NULL; + + kret = ksyn_wakeup_thread(kwq, uth); + if ((kret != KERN_SUCCESS) && (kret != KERN_NOT_WAITING)) + panic("psynch_mutexdrop_internal: panic unable to wakeup fairshare mutex thread\n"); + if (kret == KERN_NOT_WAITING) + goto redrive; + + } else if (is_seqhigher(low_writer, nextgen) != 0) { +#if _PSYNCH_TRACE_ + KERNEL_DEBUG_CONSTANT(_PSYNCH_TRACE_KMDROP | DBG_FUNC_NONE, kwq, 4, low_writer, nextgen, 0); +#endif /* _PSYNCH_TRACE_ */ + kwq->kw_pre_rwwc++; + kwq->kw_pre_lockseq = (nextgen & PTHRW_COUNT_MASK); + } else { +#if __TESTPANICS__ + panic("psynch_mutexdrop_internal: FS mutex unlock sequence higher than the lowest one is queue\n"); +#endif /* __TESTPANICS__ */ +#if _PSYNCH_TRACE_ + KERNEL_DEBUG_CONSTANT(_PSYNCH_TRACE_KMDROP | DBG_FUNC_NONE, kwq, 5, low_writer, nextgen, 0); +#endif /* _PSYNCH_TRACE_ */ + uth = ksyn_queue_find_seq(kwq, &kwq->kw_ksynqueues[KSYN_QUEUE_WRITER], nextgen); + if (uth != NULL) { + /* next seq to be granted found */ + + if (kwq->kw_ksynqueues[KSYN_QUEUE_WRITER].ksynq_count != 0) + updatebits |= PTHRW_WBIT; + +#if _PSYNCH_TRACE_ + KERNEL_DEBUG_CONSTANT(_PSYNCH_TRACE_KMDROP | DBG_FUNC_NONE, kwq, 6, updatebits, 0, 0); +#endif /* _PSYNCH_TRACE_ */ + uth->uu_psynchretval = updatebits; + uth->uu_kwqqueue = NULL; + + kret = ksyn_wakeup_thread(kwq, uth); + if ((kret != KERN_SUCCESS) && (kret != KERN_NOT_WAITING)) + panic("psynch_mutexdrop_internal: panic unable to wakeup fairshare mutex thread\n"); + if (kret == KERN_NOT_WAITING) + goto redrive; + } else { + /* next seq to be granted not found, prepost */ +#if _PSYNCH_TRACE_ + KERNEL_DEBUG_CONSTANT(_PSYNCH_TRACE_KMDROP | DBG_FUNC_NONE, kwq, 7, 0, 0, 0); +#endif /* _PSYNCH_TRACE_ */ + kwq->kw_pre_rwwc++; + kwq->kw_pre_lockseq = (nextgen & PTHRW_COUNT_MASK); + } + } + } + } else { +#if _PSYNCH_TRACE_ + KERNEL_DEBUG_CONSTANT(_PSYNCH_TRACE_KMDROP | DBG_FUNC_NONE, kwq, 8, 0, 0, 0); +#endif /* _PSYNCH_TRACE_ */ + /* if firstfit the last one could be spurious */ + if ((firstfit == 0) || ((lkseq & PTHRW_COUNT_MASK) != nextgen)) { +#if _PSYNCH_TRACE_ + KERNEL_DEBUG_CONSTANT(_PSYNCH_TRACE_KMDROP | DBG_FUNC_NONE, kwq, 9, 0, 0, 0); +#endif /* _PSYNCH_TRACE_ */ + kwq->kw_lastunlockseq = ugen; + kwq->kw_pre_rwwc++; + kwq->kw_pre_lockseq = (nextgen & PTHRW_COUNT_MASK); + } } - pthread_list_unlock(); + + ksyn_wqunlock(kwq); + +#if _PSYNCH_TRACE_ + KERNEL_DEBUG_CONSTANT(_PSYNCH_TRACE_KMDROP | DBG_FUNC_END, kwq, 0, 0, 0, 0); +#endif /* _PSYNCH_TRACE_ */ + ksyn_wqrelease(kwq, NULL); + return; } +/* + * psynch_mutexwait: This system call is used for contended psynch mutexes to block. + */ -void -pthread_mutex_release(pthread_mutex_t * mutex) +int +psynch_mutexwait(__unused proc_t p, struct psynch_mutexwait_args * uap, uint32_t * retval) { - MTX_LOCK(mutex->lock); - mutex->refcount --; - MTX_UNLOCK(mutex->lock); + user_addr_t mutex = uap->mutex; + uint32_t mgen = uap->mgen; + uint32_t ugen = uap->ugen; + uint64_t tid = uap->tid; + int flags = uap->flags; + ksyn_wait_queue_t kwq; + int error=0; + int ins_flags; + uthread_t uth; + int firstfit = flags & _PTHREAD_MUTEX_POLICY_FIRSTFIT; + uint32_t lockseq, updatebits; + + +#if _PSYNCH_TRACE_ + KERNEL_DEBUG_CONSTANT(_PSYNCH_TRACE_MLWAIT | DBG_FUNC_START, (uint32_t)mutex, mgen, ugen, flags, 0); +#endif /* _PSYNCH_TRACE_ */ + + uth = current_uthread(); + + uth->uu_lockseq = uap->mgen; + lockseq = (uap->mgen & PTHRW_COUNT_MASK); + + if (firstfit == 0) { + ins_flags = SEQFIT; + } else { + /* first fit */ + ins_flags = FIRSTFIT; + } + + error = ksyn_wqfind(mutex, mgen, ugen, 0, tid, flags, (KSYN_WQTYPE_INWAIT|KSYN_WQTYPE_MTX), &kwq); + if (error != 0) { +#if _PSYNCH_TRACE_ + KERNEL_DEBUG_CONSTANT(_PSYNCH_TRACE_MLWAIT | DBG_FUNC_END, (uint32_t)mutex, 1, 0, error, 0); +#endif /* _PSYNCH_TRACE_ */ + return(error); + } + + ksyn_wqlock(kwq); + + + if ((kwq->kw_pre_rwwc != 0) && ((ins_flags == FIRSTFIT) || (lockseq == kwq->kw_pre_lockseq ))) { + /* got preposted lock */ + kwq->kw_pre_rwwc--; + if (kwq->kw_pre_rwwc == 0) { + CLEAR_PREPOST_BITS(kwq); + kwq->kw_lastunlockseq = 0; + } else { + panic("psynch_mutexwait: more than one prepost %d\n", (kwq->kw_pre_rwwc + 1)); + kwq->kw_pre_lockseq += PTHRW_INC; /* look for next one */ + } + if (kwq->kw_inqueue == 0) { + updatebits = lockseq | PTHRW_EBIT; + } else { + updatebits = (kwq->kw_highseq & PTHRW_COUNT_MASK) | (PTHRW_EBIT | PTHRW_WBIT); + } + + uth->uu_psynchretval = updatebits; +#if __TESTPANICS__ + if ((updatebits & PTHRW_COUNT_MASK) == 0) + panic("psynch_mutexwait: (prepost)returning 0 lseq in mutexwait with EBIT \n"); +#endif /* __TESTPANICS__ */ + ksyn_wqunlock(kwq); + *retval = updatebits; + goto out; + } + + error = ksyn_queue_insert(kwq, &kwq->kw_ksynqueues[KSYN_QUEUE_WRITER], mgen, uth, ins_flags); + if (error != 0) + panic("psynch_mutexwait: failed to enqueue\n"); + + error = ksyn_block_thread_locked(kwq, (uint64_t)0, uth); + /* drops the wq lock */ + + if (error != 0) { + ksyn_wqlock(kwq); +#if _PSYNCH_TRACE_ + KERNEL_DEBUG_CONSTANT(_PSYNCH_TRACE_MLWAIT | DBG_FUNC_NONE, (uint32_t)mutex, 2, 0, error, 0); +#endif /* _PSYNCH_TRACE_ */ + if (uth->uu_kwqqueue != NULL) + ksyn_queue_removeitem(kwq, &kwq->kw_ksynqueues[KSYN_QUEUE_WRITER], uth); + ksyn_wqunlock(kwq); + } else { + updatebits = uth->uu_psynchretval; + *retval = updatebits; +#if __TESTPANICS__ + if ((updatebits & PTHRW_COUNT_MASK) == 0) + panic("psynch_mutexwait: returning 0 lseq in mutexwait with EBIT \n"); +#endif /* __TESTPANICS__ */ + } +out: + ksyn_wqrelease(kwq, NULL); +#if _PSYNCH_TRACE_ + KERNEL_DEBUG_CONSTANT(_PSYNCH_TRACE_MLWAIT | DBG_FUNC_END, (uint32_t)mutex, 0, 0, error, 0); +#endif /* _PSYNCH_TRACE_ */ + + return(error); } +/* + * psynch_mutexdrop: This system call is used for unlock postings on contended psynch mutexes. + */ +int +psynch_mutexdrop(__unused proc_t p, struct psynch_mutexdrop_args * uap, __unused uint32_t * retval) +{ + user_addr_t mutex = uap->mutex; + uint32_t mgen = uap->mgen; + uint32_t lkseq = mgen & PTHRW_COUNT_MASK; + uint32_t ugen = uap->ugen; + uint64_t tid = uap->tid; + int flags = uap->flags; + ksyn_wait_queue_t kwq; + int error=0; + +#if _PSYNCH_TRACE_ + KERNEL_DEBUG_CONSTANT(_PSYNCH_TRACE_MLDROP | DBG_FUNC_START, (uint32_t)mutex, mgen, ugen, flags, 0); +#endif /* _PSYNCH_TRACE_ */ + error = ksyn_wqfind(mutex, mgen, ugen, 0, tid, flags, KSYN_WQTYPE_MTX, &kwq); + if (error != 0) { +#if _PSYNCH_TRACE_ + KERNEL_DEBUG_CONSTANT(_PSYNCH_TRACE_MLDROP | DBG_FUNC_END, (uint32_t)mutex, 1, 0, error, 0); +#endif /* _PSYNCH_TRACE_ */ + return(error); + } + psynch_mutexdrop_internal(kwq, lkseq, ugen, flags); + /* drops the kwq reference */ +#if _PSYNCH_TRACE_ + KERNEL_DEBUG_CONSTANT(_PSYNCH_TRACE_MLDROP | DBG_FUNC_END, (uint32_t)mutex, 0, 0, error, 0); +#endif /* _PSYNCH_TRACE_ */ + return(0); + +} -pthread_cond_t * -pthread_id_to_cond(int condid) +/* + * psynch_cvbroad: This system call is used for broadcast posting on blocked waiters of psynch cvars. + */ +int +psynch_cvbroad(__unused proc_t p, struct psynch_cvbroad_args * uap, int * retval) { - pthread_cond_t * cond = NULL; + user_addr_t cond = uap->cv; + uint32_t cgen = uap->cvgen; + uint32_t diffgen = uap->diffgen; + uint32_t mgen = uap->mgen; + int flags = uap->flags; + ksyn_wait_queue_t kwq, ckwq; + int error=0; +#if COND_MTX_WAITQUEUEMOVE + int mutexowned = flags & _PTHREAD_MTX_OPT_HOLDLOCK; + int nomutex = flags & _PTHREAD_MTX_OPT_NOHOLDLOCK; + user_addr_t mutex = uap->mutex; + uint32_t ugen = uap->ugen; + uint64_t tid = uap->tid; + uthread_t uth; + kern_return_t kret = KERN_SUCCESS; +#else /* COND_MTX_WAITQUEUEMOVE */ + int nomutex = _PTHREAD_MTX_OPT_NOHOLDLOCK; +#endif /* COND_MTX_WAITQUEUEMOVE */ + uint32_t nextgen, ngen; + int updatebits = 0; +#if _PSYNCH_TRACE_ + KERNEL_DEBUG_CONSTANT(_PSYNCH_TRACE_CVBROAD | DBG_FUNC_START, (uint32_t)cond, (uint32_t) 0, cgen, mgen, 0); +#endif /* _PSYNCH_TRACE_ */ + error = ksyn_wqfind(cond, cgen, cgen, 0, 0, flags, KSYN_WQTYPE_CVAR, &ckwq); + if (error != 0) { +#if _PSYNCH_TRACE_ + KERNEL_DEBUG_CONSTANT(_PSYNCH_TRACE_CVBROAD | DBG_FUNC_END, (uint32_t)cond, 1, 0, error, 0); +#endif /* _PSYNCH_TRACE_ */ + return(error); + } - if (condid >= 0 && condid < PTHREAD_SYNCH_MAX) { - pthread_list_lock(); - cond = pcond_trans_array[condid]; - if (cond) { - COND_LOCK(cond->lock); - cond->refcount++; - COND_UNLOCK(cond->lock); - } - pthread_list_unlock(); +#if COND_MTX_WAITQUEUEMOVE + ngen = mgen + (PTHRW_INC * diffgen); + if (nomutex ==0) { + error = ksyn_wqfind(mutex, ngen, ugen, 0, tid, flags, KSYN_WQTYPE_MTX, &kwq); + if (error != 0) { + kwq = NULL; + goto out; + } + } +#else /* COND_MTX_WAITQUEUEMOVE */ + nomutex = _PTHREAD_MTX_OPT_NOHOLDLOCK; + kwq= NULL; + ngen = 0; +#endif /* COND_MTX_WAITQUEUEMOVE */ + + + ksyn_wqlock(ckwq); +#if COND_MTX_WAITQUEUEMOVE +redrive: +#endif /* COND_MTX_WAITQUEUEMOVE */ + if (diffgen > ckwq->kw_inqueue) { + ckwq->kw_pre_rwwc = diffgen - ckwq->kw_inqueue; + ckwq->kw_pre_lockseq = cgen & PTHRW_BIT_MASK; + updatebits = ckwq->kw_pre_rwwc; /* unused mutex refs */ + nextgen = (mgen + (ckwq->kw_pre_rwwc * PTHRW_INC)); + } else { + updatebits = 0; + nextgen = mgen + PTHRW_INC; + } + + if (ckwq->kw_inqueue != 0) { +#if COND_MTX_WAITQUEUEMOVE + if (mutexowned != 0) { +#if _PSYNCH_TRACE_ + KERNEL_DEBUG_CONSTANT(_PSYNCH_TRACE_CVBROAD | DBG_FUNC_NONE, (uint32_t)cond, 0, 1, ckwq->kw_inqueue, 0); +#endif /* _PSYNCH_TRACE_ */ + uth = ksyn_queue_removefirst(&ckwq->kw_ksynqueues[KSYN_QUEUE_WRITER],ckwq); + uth->uu_psynchretval = ngen; + uth->uu_kwqqueue = NULL; + + kret = ksyn_wakeup_thread(ckwq, uth); + if ((kret != KERN_SUCCESS) && (kret != KERN_NOT_WAITING)) + panic("cvbraoad: failed to remove\n"); + if (kret == KERN_NOT_WAITING) { + /* + * trying to wake one thread to return, so if + * failed to wakeup get the next one.. + */ + goto redrive; + } + nextgen = nextgen + PTHRW_INC; + diffgen -= 1; + } +#else /* COND_MTX_WAITQUEUEMOVE */ + updatebits = 0; +#endif /* COND_MTX_WAITQUEUEMOVE */ + + /* nomutex case or in mutexowned case after the first one */ + /* move them all to the mutex waitqueue */ + if ((ckwq->kw_inqueue != 0) && (diffgen > 0)) { + /* atleast one more posting needed and there are waiting threads */ + /* drops the ckwq lock */ +#if _PSYNCH_TRACE_ + KERNEL_DEBUG_CONSTANT(_PSYNCH_TRACE_CVBROAD | DBG_FUNC_NONE, (uint32_t)cond, 0, 2, diffgen, 0); +#endif /* _PSYNCH_TRACE_ */ + /* move threads from ckwq to kwq if COND_MTX_WAITQUEUEMOVE, else wakeup */ + ksyn_move_wqthread(ckwq, kwq, nextgen, ngen, diffgen, nomutex); + } else + ksyn_wqunlock(ckwq); + } else { + /* no need for prepost as it is covered before */ + ksyn_wqunlock(ckwq); + } + + if (error == 0) { + *retval = updatebits; } - return(cond); + +#if COND_MTX_WAITQUEUEMOVE +out: +#endif /* COND_MTX_WAITQUEUEMOVE */ + ksyn_wqrelease(ckwq, kwq); +#if _PSYNCH_TRACE_ + KERNEL_DEBUG_CONSTANT(_PSYNCH_TRACE_CVBROAD | DBG_FUNC_END, (uint32_t)cond, 1, 0, error, 0); +#endif /* _PSYNCH_TRACE_ */ + + return(error); } +/* + * psynch_cvsignal: This system call is used for signalling the blocked waiters of psynch cvars. + */ +int +psynch_cvsignal(__unused proc_t p, struct psynch_cvsignal_args * uap, int * retval) +{ + user_addr_t cond = uap->cv; + uint32_t cgen = uap->cvgen; + uint32_t cugen = uap->cvugen; + uint32_t mgen = uap->mgen; + int threadport = uap->thread_port; + int flags = uap->flags; + ksyn_wait_queue_t kwq, ckwq; + int error=0, kret; + uthread_t uth; +#if USE_WAITQUEUE + thread_t th = THREAD_NULL, mth; +#else /* USE_WAITQUEUE */ + thread_t th = THREAD_NULL; +#endif /* USE_WAITQUEUE */ +#if COND_MTX_WAITQUEUEMOVE + user_addr_t mutex = uap->mutex; + uint32_t ugen = uap->ugen; + int mutexowned = flags & _PTHREAD_MTX_OPT_HOLDLOCK; + int nomutex = flags & _PTHREAD_MTX_OPT_NOHOLDLOCK; +#else /* COND_MTX_WAITQUEUEMOVE */ + int nomutex = _PTHREAD_MTX_OPT_NOHOLDLOCK; +#endif /* COND_MTX_WAITQUEUEMOVE */ + uint32_t retbits, ngen, lockseq; + + + if (nomutex != 0) + retbits = 0; + else + retbits = 1; +#if _PSYNCH_TRACE_ + KERNEL_DEBUG_CONSTANT(_PSYNCH_TRACE_CVSIGNAL | DBG_FUNC_START, (uint32_t)cond, (uint32_t) 0, cgen, mgen, 0); + KERNEL_DEBUG_CONSTANT(_PSYNCH_TRACE_CVSIGNAL | DBG_FUNC_NONE, (uint32_t)cond, (uint32_t)cugen , flags, mgen, 0); +#endif /* _PSYNCH_TRACE_ */ + + error = ksyn_wqfind(cond, cgen, cugen, 0, 0, flags, KSYN_WQTYPE_CVAR, &ckwq); + if (error != 0) { + *retval = retbits; +#if _PSYNCH_TRACE_ + KERNEL_DEBUG_CONSTANT(_PSYNCH_TRACE_CVSIGNAL | DBG_FUNC_END, (uint32_t)cond, 1, 0, error, 0); +#endif /* _PSYNCH_TRACE_ */ + return(error); + } + + + if ((flags & _PTHREAD_MTX_OPT_LASTDROP) == _PTHREAD_MTX_OPT_LASTDROP) { + + ksyn_wqlock(ckwq); + lockseq = cgen & PTHRW_COUNT_MASK; + /* do we need to check for lockseq as this is from last waiter, may be race ? */ + if ((ckwq->kw_pre_rwwc != 0) && (is_seqlower_eq(lockseq, ckwq->kw_pre_lockseq) != 0)) { + ckwq->kw_pre_rwwc--; + if (ckwq->kw_pre_rwwc == 0) + CLEAR_PREPOST_BITS(ckwq); + } + ksyn_wqunlock(ckwq); + /* no mutex or thread is associated with this, just notificaion */ + th = THREAD_NULL; + error = 0; + goto out; + } + + ngen = mgen + PTHRW_INC; + +#if COND_MTX_WAITQUEUEMOVE + if (nomutex == 0) { + /* mutex was not operated on, ignore it */ + error = ksyn_wqfind(mutex, ngen, ugen, 0, 0, flags, KSYN_WQTYPE_MTX, &kwq); + if (error != 0) { + *retval = retbits; + kwq = NULL; + goto out; + } + } else { +#endif /* COND_MTX_WAITQUEUEMOVE */ + kwq = NULL; +#if COND_MTX_WAITQUEUEMOVE + } +#endif /* COND_MTX_WAITQUEUEMOVE */ + + + if (threadport != 0) { + th = (thread_t)port_name_to_thread((mach_port_name_t)threadport); + if (th == THREAD_NULL) { + *retval = retbits; + error = ESRCH; + goto out; + } + } + + ksyn_wqlock(ckwq); +redrive: + if (ckwq->kw_inqueue != 0) { + *retval = 0; +#if COND_MTX_WAITQUEUEMOVE + if ((mutexowned != 0) || (nomutex != 0)) { +#if _PSYNCH_TRACE_ + KERNEL_DEBUG_CONSTANT(_PSYNCH_TRACE_CVSIGNAL | DBG_FUNC_NONE, (uint32_t)cond, 0, 1, ckwq->kw_inqueue, 0); +#endif /* _PSYNCH_TRACE_ */ + if (th != THREAD_NULL) { + uth = get_bsdthread_info(th); + if (nomutex != 0) + ngen |= PTHRW_MTX_NONE; + uth->uu_psynchretval = ngen; + uth->uu_kwqqueue = NULL; + ksyn_queue_removeitem(ckwq, &ckwq->kw_ksynqueues[KSYN_QUEUE_WRITER], uth); + kret = ksyn_wakeup_thread(ckwq, uth); + if ((kret != KERN_SUCCESS) && (kret != KERN_NOT_WAITING)) + panic("psynch_cvsignal: panic waking in cvsignal\n"); + if (kret == KERN_NOT_WAITING) { + if (threadport != 0) { + error = 0; + } else + goto redrive; + } + } else { + uth = ksyn_queue_removefirst(&ckwq->kw_ksynqueues[KSYN_QUEUE_WRITER],ckwq); + if (nomutex != 0) + ngen |= PTHRW_MTX_NONE; + uth->uu_psynchretval = ngen; + uth->uu_kwqqueue = NULL; + kret = ksyn_wakeup_thread(ckwq, uth); + if ((kret != KERN_SUCCESS) && (kret != KERN_NOT_WAITING)) + panic("psynch_cvsignal: panic waking in cvsignal\n"); + if (kret == KERN_NOT_WAITING) { + if (threadport != 0) { + error = 0; + } else + goto redrive; + } + } + ksyn_wqunlock(ckwq); + } else { +#endif /* COND_MTX_WAITQUEUEMOVE */ + /* need to move a thread to another queue */ +#if _PSYNCH_TRACE_ + KERNEL_DEBUG_CONSTANT(_PSYNCH_TRACE_CVSIGNAL | DBG_FUNC_NONE, (uint32_t)cond, 0, 2, ckwq->kw_inqueue, 0); +#endif /* _PSYNCH_TRACE_ */ + if (th != THREAD_NULL) { + uth = get_bsdthread_info(th); + /* if given thread not blocked in cvwait , return error */ + if (uth->uu_kwqqueue != ckwq) { + error = EINVAL; + ksyn_wqunlock(ckwq); + goto out; + } + ksyn_queue_removeitem(ckwq, &ckwq->kw_ksynqueues[KSYN_QUEUE_WRITER], uth); + } else { + uth = ksyn_queue_removefirst(&ckwq->kw_ksynqueues[KSYN_QUEUE_WRITER],ckwq); + if (uth == NULL) + panic("cvsign: null uthread after rem"); + } +#if COND_MTX_WAITQUEUEMOVE + ksyn_wqunlock(ckwq); +#else /* COND_MTX_WAITQUEUEMOVE */ + uth->uu_psynchretval = 0; + uth->uu_kwqqueue = NULL; + kret = ksyn_wakeup_thread(ckwq, uth); + if ((kret != KERN_SUCCESS) && (kret != KERN_NOT_WAITING)) + panic("psynch_cvsignal: panic waking in cvsignal\n"); + if (kret == KERN_NOT_WAITING) { + error = 0; + if (threadport == 0) + goto redrive; + } + + ksyn_wqunlock(ckwq); + error = 0; +#endif /* COND_MTX_WAITQUEUEMOVE */ + +#if COND_MTX_WAITQUEUEMOVE + ksyn_wqlock(kwq); + ksyn_queue_insert(kwq, &kwq->kw_ksynqueues[KSYN_QUEUE_WRITER], ngen, uth, SEQFIT); +#if USE_WAITQUEUE + kret = wait_queue_move_thread(&ckwq->kw_wq, ckwq->kw_addr, th, &kwq->kw_wq, kwq->kw_addr, &mth); + if (kret == KERN_SUCCESS) { + if (mth != THREAD_NULL) { + uth = (struct uthread *)get_bsdthread_info(mth); + uth->uu_lockseq = ngen; + TAILQ_INSERT_TAIL(&kwq->kw_ksynqueues[KSYN_QUEUE_WRITER].ksynq_uthlist, uth, uu_mtxlist); + } + } +#else /* USE_WAITQUEUE */ + /* no need to move anything, just update the sequence */ + uth->uu_lockseq = ngen; + +#endif /* USE_WAITQUEUE */ + ksyn_wqunlock(kwq); + } +#endif /* COND_MTX_WAITQUEUEMOVE */ + } else { + /* prepost */ +#if _PSYNCH_TRACE_ + KERNEL_DEBUG_CONSTANT(_PSYNCH_TRACE_CVSIGNAL | DBG_FUNC_NONE, (uint32_t)cond, 0, 3, ckwq->kw_inqueue, 0); +#endif /* _PSYNCH_TRACE_ */ + if (threadport != 0) { + error = EINVAL; + ksyn_wqunlock(ckwq); + goto out; + } + + ckwq->kw_pre_rwwc++; + ckwq->kw_attq = kwq; + ckwq->kw_pre_lockseq = cgen & PTHRW_BIT_MASK; + ckwq->kw_pre_cvretval = ngen; + *retval = retbits; + ksyn_wqunlock(ckwq); + } + /* ckwq is unlocked here */ + +out: + ksyn_wqrelease(ckwq, kwq); + if (th != THREAD_NULL) + thread_deallocate(th); +#if _PSYNCH_TRACE_ + KERNEL_DEBUG_CONSTANT(_PSYNCH_TRACE_CVSIGNAL | DBG_FUNC_END, (uint32_t)cond, 0, 0, error, 0); +#endif /* _PSYNCH_TRACE_ */ + + return(error); +} -int -pthread_id_cond_add(pthread_cond_t * cond) +/* + * psynch_cvwait: This system call is used for psynch cvar waiters to block in kernel. + */ +int +psynch_cvwait(__unused proc_t p, struct psynch_cvwait_args * uap, uint32_t * retval) { - int i; + user_addr_t cond = uap->cv; + uint32_t cgen = uap->cvgen; + uint32_t cugen = uap->cvugen; + user_addr_t mutex = uap->mutex; + uint32_t mgen =0, ugen; + int flags = 0; + ksyn_wait_queue_t kwq, ckwq; + int error=0; + uint64_t abstime = 0; + uint32_t lockseq, updatebits; + struct timespec ts; + uthread_t uth; - pthread_list_lock(); - for(i = 1; i < PTHREAD_SYNCH_MAX; i++) { - if (pcond_trans_array[i] == 0) { - pcond_trans_array[i] = cond; - break; + /* for conformance reasons */ + __pthread_testcancel(0); + +#if _PSYNCH_TRACE_ + KERNEL_DEBUG_CONSTANT(_PSYNCH_TRACE_CVWAIT | DBG_FUNC_START, (uint32_t)cond, (uint32_t) mutex, cgen, mgen, 0); +#endif /* _PSYNCH_TRACE_ */ + flags = 0; + if ((uap->usec & 0xc0000000) != 0) { + if (uap->usec & 0x40000000) + flags |= PTHREAD_PROCESS_SHARED; + if (uap->usec & 0x80000000) + flags |= _PTHREAD_MUTEX_POLICY_FIRSTFIT; + } + + error = ksyn_wqfind(cond, cgen, cugen, 0, 0, flags, KSYN_WQTYPE_CVAR | KSYN_WQTYPE_INWAIT, &ckwq); + if (error != 0) { +#if _PSYNCH_TRACE_ + KERNEL_DEBUG_CONSTANT(_PSYNCH_TRACE_CVWAIT | DBG_FUNC_END, (uint32_t)cond, 1, 0, error, 0); +#endif /* _PSYNCH_TRACE_ */ + return(error); + } + + if (mutex != (user_addr_t)0) { + mgen = uap->mgen; + ugen = uap->ugen; + + error = ksyn_wqfind(mutex, mgen, ugen, 0, 0, flags, KSYN_WQTYPE_MTX, &kwq); { + if (error != 0) + goto out; } + + psynch_mutexdrop_internal(kwq, mgen, ugen, flags); + /* drops kwq reference */ } - pthread_list_unlock(); - if (i == PTHREAD_SYNCH_MAX) - return(0); - return(i); + + uth = current_uthread(); + uth->uu_lockseq = cgen; + lockseq = (cgen & PTHRW_COUNT_MASK); + + if (uap->sec != 0 || (uap->usec & 0x3fffffff) != 0) { + ts.tv_sec = uap->sec; + ts.tv_nsec = (uap->usec & 0xc0000000); + nanoseconds_to_absolutetime((uint64_t)ts.tv_sec * NSEC_PER_SEC + ts.tv_nsec, &abstime ); + clock_absolutetime_interval_to_deadline( abstime, &abstime ); + } + ksyn_wqlock(ckwq); + if ((ckwq->kw_pre_rwwc != 0) && (is_seqlower_eq(lockseq, ckwq->kw_pre_lockseq) != 0)) { +#if _PSYNCH_TRACE_ + KERNEL_DEBUG_CONSTANT(_PSYNCH_TRACE_CVWAIT | DBG_FUNC_NONE, (uint32_t)cond, 0, 1, 0, 0); +#endif /* _PSYNCH_TRACE_ */ + +#if COND_MTX_WAITQUEUEMOVE + updatebits = ckwq->kw_pre_cvretval | PTHRW_MTX_NONE; +#else /* COND_MTX_WAITQUEUEMOVE */ + updatebits = 0; +#endif /* COND_MTX_WAITQUEUEMOVE */ + ckwq->kw_pre_rwwc--; + if (ckwq->kw_pre_rwwc == 0) + CLEAR_PREPOST_BITS(ckwq); + *retval = updatebits; + error = 0; + ksyn_wqunlock(ckwq); + goto out; + + } else { +#if _PSYNCH_TRACE_ + KERNEL_DEBUG_CONSTANT(_PSYNCH_TRACE_CVWAIT | DBG_FUNC_NONE, (uint32_t)cond, 0, 2, cgen, 0); +#endif /* _PSYNCH_TRACE_ */ + error = ksyn_queue_insert(ckwq, &ckwq->kw_ksynqueues[KSYN_QUEUE_WRITER], cgen, uth, FIRSTFIT); + if (error != 0) + panic("psynch_cvwait: failed to enqueue\n"); + error = ksyn_block_thread_locked(ckwq, abstime, uth); + /* drops the lock */ + } + + if (error != 0) { + ksyn_wqlock(ckwq); +#if _PSYNCH_TRACE_ + KERNEL_DEBUG_CONSTANT(_PSYNCH_TRACE_CVWAIT | DBG_FUNC_NONE, (uint32_t)cond, 0, 3, error, 0); +#endif /* _PSYNCH_TRACE_ */ + if (uth->uu_kwqqueue != NULL) { + ksyn_queue_removeitem(ckwq, &ckwq->kw_ksynqueues[KSYN_QUEUE_WRITER], uth); + } + ksyn_wqunlock(ckwq); + } else { + *retval = uth->uu_psynchretval; + + } +out: +#if _PSYNCH_TRACE_ + KERNEL_DEBUG_CONSTANT(_PSYNCH_TRACE_CVWAIT | DBG_FUNC_END, (uint32_t)cond, 0, 0, error, 0); +#endif /* _PSYNCH_TRACE_ */ + ksyn_wqrelease(ckwq, NULL); + return(error); } +/* ***************** pthread_rwlock ************************ */ +/* + * psynch_rw_rdlock: This system call is used for psync rwlock readers to block. + */ +int +psynch_rw_rdlock(__unused proc_t p, struct psynch_rw_rdlock_args * uap, uint32_t * retval) +{ + user_addr_t rwlock = uap->rwlock; + uint32_t lgen = uap->lgenval; + uint32_t ugen = uap->ugenval; + uint32_t rw_wc = uap->rw_wc; + //uint64_t tid = uap->tid; + int flags = uap->flags; + int error = 0, block; + uint32_t lockseq = 0, updatebits = 0, preseq = 0; + ksyn_wait_queue_t kwq; + uthread_t uth; -void -pthread_id_cond_remove(int condid) +#if _PSYNCH_TRACE_ + KERNEL_DEBUG_CONSTANT(_PSYNCH_TRACE_RWRDLOCK | DBG_FUNC_START, (uint32_t)rwlock, lgen, ugen, rw_wc, 0); +#endif /* _PSYNCH_TRACE_ */ + uth = current_uthread(); + + /* preserve the seq number */ + uth->uu_lockseq = lgen; + lockseq = lgen & PTHRW_COUNT_MASK; + + error = ksyn_wqfind(rwlock, lgen, ugen, rw_wc, TID_ZERO, flags, (KSYN_WQTYPE_INWAIT|KSYN_WQTYPE_RWLOCK), &kwq); + if (error != 0) { +#if _PSYNCH_TRACE_ + KERNEL_DEBUG_CONSTANT(_PSYNCH_TRACE_RWRDLOCK | DBG_FUNC_END, (uint32_t)rwlock, 1, 0, error, 0); +#endif /* _PSYNCH_TRACE_ */ + return(error); + } + + ksyn_wqlock(kwq); + + /* handle first the missed wakeups */ + if ((kwq->kw_pre_intrcount != 0) && + ((kwq->kw_pre_intrtype == PTH_RW_TYPE_READ) || (kwq->kw_pre_intrtype == PTH_RW_TYPE_LREAD)) && + (is_seqlower_eq(lockseq, (kwq->kw_pre_intrseq & PTHRW_COUNT_MASK)) != 0)) { + + kwq->kw_pre_intrcount--; + uth->uu_psynchretval = kwq->kw_pre_intrretbits; + if (kwq->kw_pre_intrcount==0) + CLEAR_INTR_PREPOST_BITS(kwq); + ksyn_wqunlock(kwq); + goto out; + } + + /* handle unlock2/downgrade first */ + if ((kwq->kw_pre_limrd != 0) && (is_seqlower_eq(lockseq, (kwq->kw_pre_limrdseq & PTHRW_COUNT_MASK)) != 0)) { +#if _PSYNCH_TRACE_ + KERNEL_DEBUG_CONSTANT(_PSYNCH_TRACE_RWRDLOCK | DBG_FUNC_NONE, (uint32_t)rwlock, 1, kwq->kw_pre_limrd, kwq->kw_pre_limrdseq, 0); +#endif /* _PSYNCH_TRACE_ */ + kwq->kw_pre_limrd--; + /* acquired the locks, so return */ + uth->uu_psynchretval = kwq->kw_pre_limrdbits; + if (kwq->kw_pre_limrd == 0) + CLEAR_READ_PREPOST_BITS(kwq); + ksyn_wqunlock(kwq); + goto out; + } + + if ((kwq->kw_pre_rwwc != 0) && (is_seqlower_eq(lockseq, (kwq->kw_pre_lockseq & PTHRW_COUNT_MASK)) != 0)) { +#if _PSYNCH_TRACE_ + KERNEL_DEBUG_CONSTANT(_PSYNCH_TRACE_RWRDLOCK | DBG_FUNC_NONE, (uint32_t)rwlock, 2, kwq->kw_pre_rwwc, kwq->kw_pre_lockseq, 0); +#endif /* _PSYNCH_TRACE_ */ + kwq->kw_pre_rwwc--; + if (kwq->kw_pre_rwwc == 0) { + preseq = kwq->kw_pre_lockseq; + CLEAR_PREPOST_BITS(kwq); + error = kwq_handle_unlock(kwq, preseq, &updatebits, (KW_UNLOCK_PREPOST_READLOCK|KW_UNLOCK_PREPOST), &block, lgen); + if (error != 0) + panic("kwq_handle_unlock failed %d\n",error); + if (block == 0) { + ksyn_wqunlock(kwq); + goto out; + } + /* insert to q and proceed as ususal */ + } + } + +#if _PSYNCH_TRACE_ + KERNEL_DEBUG_CONSTANT(_PSYNCH_TRACE_RWRDLOCK | DBG_FUNC_NONE, (uint32_t)rwlock, 3, 0, 0, 0); +#endif /* _PSYNCH_TRACE_ */ + error = ksyn_queue_insert(kwq, &kwq->kw_ksynqueues[KSYN_QUEUE_READ], lgen, uth, SEQFIT); + if (error != 0) + panic("psynch_rw_rdlock: failed to enqueue\n"); + error = ksyn_block_thread_locked(kwq, (uint64_t)0, uth); + /* drops the kwq lock */ + +out: + if (error != 0) { +#if _PSYNCH_TRACE_ + KERNEL_DEBUG_CONSTANT(_PSYNCH_TRACE_RWRDLOCK | DBG_FUNC_NONE, (uint32_t)rwlock, 4, error, 0, 0); +#endif /* _PSYNCH_TRACE_ */ + ksyn_wqlock(kwq); + if (uth->uu_kwqqueue != NULL) + ksyn_queue_removeitem(kwq, &kwq->kw_ksynqueues[KSYN_QUEUE_READ], uth); + ksyn_wqunlock(kwq); + } else { + /* update bits */ + *retval = uth->uu_psynchretval; + } + ksyn_wqrelease(kwq, NULL); +#if _PSYNCH_TRACE_ + KERNEL_DEBUG_CONSTANT(_PSYNCH_TRACE_RWRDLOCK | DBG_FUNC_END, (uint32_t)rwlock, 1, 0, error, 0); +#endif /* _PSYNCH_TRACE_ */ + return(error); +} + +/* + * psynch_rw_longrdlock: This system call is used for psync rwlock long readers to block. + */ +int +psynch_rw_longrdlock(__unused proc_t p, struct psynch_rw_longrdlock_args * uap, uint32_t * retval) { - pthread_list_lock(); - if (pcond_trans_array[condid]) { - pcond_trans_array[condid] = 0; + user_addr_t rwlock = uap->rwlock; + uint32_t lgen = uap->lgenval; + uint32_t ugen = uap->ugenval; + uint32_t rw_wc = uap->rw_wc; + //uint64_t tid = uap->tid; + int flags = uap->flags; + + ksyn_wait_queue_t kwq; + int error=0, block = 0 ; + uthread_t uth; + uint32_t lockseq = 0, updatebits = 0, preseq = 0; + +#if _PSYNCH_TRACE_ + KERNEL_DEBUG_CONSTANT(_PSYNCH_TRACE_RWLRDLOCK | DBG_FUNC_START, (uint32_t)rwlock, lgen, ugen, rw_wc, 0); +#endif /* _PSYNCH_TRACE_ */ + uth = current_uthread(); + + uth->uu_lockseq = lgen; + lockseq = (lgen & PTHRW_COUNT_MASK); + + error = ksyn_wqfind(rwlock, lgen, ugen, rw_wc, TID_ZERO, flags, (KSYN_WQTYPE_INWAIT|KSYN_WQTYPE_RWLOCK), &kwq); + if (error != 0) { +#if _PSYNCH_TRACE_ + KERNEL_DEBUG_CONSTANT(_PSYNCH_TRACE_RWLRDLOCK | DBG_FUNC_END, (uint32_t)rwlock, 1, 0, error, 0); +#endif /* _PSYNCH_TRACE_ */ + return(error); } - pthread_list_unlock(); + + ksyn_wqlock(kwq); + + /* handle first the missed wakeups */ + if ((kwq->kw_pre_intrcount != 0) && + (kwq->kw_pre_intrtype == PTH_RW_TYPE_LREAD) && + (is_seqlower_eq(lockseq, (kwq->kw_pre_intrseq & PTHRW_COUNT_MASK)) != 0)) { + + kwq->kw_pre_intrcount--; + uth->uu_psynchretval = kwq->kw_pre_intrretbits; + if (kwq->kw_pre_intrcount==0) + CLEAR_INTR_PREPOST_BITS(kwq); + ksyn_wqunlock(kwq); + goto out; + } + + /* handle unlock2/downgrade first */ + if ((kwq->kw_pre_limrd != 0) && (is_seqlower_eq(lockseq, (kwq->kw_pre_limrdseq & PTHRW_COUNT_MASK)) != 0)) { +#if _PSYNCH_TRACE_ + KERNEL_DEBUG_CONSTANT(_PSYNCH_TRACE_RWLRDLOCK | DBG_FUNC_NONE, (uint32_t)rwlock, 1, kwq->kw_pre_limrd, kwq->kw_pre_limrdseq, 0); +#endif /* _PSYNCH_TRACE_ */ + kwq->kw_pre_limrd--; + if (kwq->kw_pre_limrd == 0) + CLEAR_READ_PREPOST_BITS(kwq); + /* not a read proceed */ + } + + if ((kwq->kw_pre_rwwc != 0) && (is_seqlower_eq(lockseq, (kwq->kw_pre_lockseq & PTHRW_COUNT_MASK)) != 0)) { +#if _PSYNCH_TRACE_ + KERNEL_DEBUG_CONSTANT(_PSYNCH_TRACE_RWLRDLOCK | DBG_FUNC_NONE, (uint32_t)rwlock, 2, kwq->kw_pre_rwwc, kwq->kw_pre_lockseq, 0); +#endif /* _PSYNCH_TRACE_ */ + kwq->kw_pre_rwwc--; + if (kwq->kw_pre_rwwc == 0) { + preseq = kwq->kw_pre_lockseq; + CLEAR_PREPOST_BITS(kwq); + error = kwq_handle_unlock(kwq, preseq, &updatebits, (KW_UNLOCK_PREPOST_LREADLOCK|KW_UNLOCK_PREPOST), &block, lgen); + if (error != 0) + panic("kwq_handle_unlock failed %d\n",error); + if (block == 0) { + ksyn_wqunlock(kwq); + goto out; + } + /* insert to q and proceed as ususal */ + } + } + +#if _PSYNCH_TRACE_ + KERNEL_DEBUG_CONSTANT(_PSYNCH_TRACE_RWLRDLOCK | DBG_FUNC_NONE, (uint32_t)rwlock, 3, 0, 0, 0); +#endif /* _PSYNCH_TRACE_ */ + error = ksyn_queue_insert(kwq, &kwq->kw_ksynqueues[KSYN_QUEUE_LREAD], lgen, uth, SEQFIT); + if (error != 0) + panic("psynch_rw_longrdlock: failed to enqueue\n"); + + error = ksyn_block_thread_locked(kwq, (uint64_t)0, uth); + /* drops the kwq lock */ +out: + if (error != 0) { +#if _PSYNCH_TRACE_ + KERNEL_DEBUG_CONSTANT(_PSYNCH_TRACE_RWLRDLOCK | DBG_FUNC_END, (uint32_t)rwlock, 1, 0, error, 0); +#endif /* _PSYNCH_TRACE_ */ + ksyn_wqlock(kwq); + if (uth->uu_kwqqueue != NULL) + ksyn_queue_removeitem(kwq, &kwq->kw_ksynqueues[KSYN_QUEUE_LREAD], uth); + ksyn_wqunlock(kwq); + } else { + /* update bits */ + *retval = uth->uu_psynchretval; + } + + ksyn_wqrelease(kwq, NULL); + +#if _PSYNCH_TRACE_ + KERNEL_DEBUG_CONSTANT(_PSYNCH_TRACE_RWLRDLOCK | DBG_FUNC_END, (uint32_t)rwlock, 0, 0, error, 0); +#endif /* _PSYNCH_TRACE_ */ + return(error); } +/* + * psynch_rw_wrlock: This system call is used for psync rwlock writers to block. + */ +int +psynch_rw_wrlock(__unused proc_t p, struct psynch_rw_wrlock_args * uap, uint32_t * retval) +{ + user_addr_t rwlock = uap->rwlock; + uint32_t lgen = uap->lgenval; + uint32_t ugen = uap->ugenval; + uint32_t rw_wc = uap->rw_wc; + //uint64_t tid = uap->tid; + int flags = uap->flags; + int block; + ksyn_wait_queue_t kwq; + int error=0; + uthread_t uth; + uint32_t lockseq = 0, updatebits = 0, preseq = 0; -void -pthread_cond_release(pthread_cond_t * cond) +#if _PSYNCH_TRACE_ + KERNEL_DEBUG_CONSTANT(_PSYNCH_TRACE_RWWRLOCK | DBG_FUNC_START, (uint32_t)rwlock, lgen, ugen, rw_wc, 0); +#endif /* _PSYNCH_TRACE_ */ + uth = current_uthread(); + + uth->uu_lockseq = lgen; + lockseq = (lgen & PTHRW_COUNT_MASK); + + error = ksyn_wqfind(rwlock, lgen, ugen, rw_wc, TID_ZERO, flags, (KSYN_WQTYPE_INWAIT|KSYN_WQTYPE_RWLOCK), &kwq); + if (error != 0) { +#if _PSYNCH_TRACE_ + KERNEL_DEBUG_CONSTANT(_PSYNCH_TRACE_RWWRLOCK | DBG_FUNC_END, (uint32_t)rwlock, 1, 0, error, 0); +#endif /* _PSYNCH_TRACE_ */ + return(error); + } + + ksyn_wqlock(kwq); + + /* handle first the missed wakeups */ + if ((kwq->kw_pre_intrcount != 0) && + (kwq->kw_pre_intrtype == PTH_RW_TYPE_WRITE) && + (is_seqlower_eq(lockseq, (kwq->kw_pre_intrseq & PTHRW_COUNT_MASK)) != 0)) { + + kwq->kw_pre_intrcount--; + uth->uu_psynchretval = kwq->kw_pre_intrretbits; + if (kwq->kw_pre_intrcount==0) + CLEAR_INTR_PREPOST_BITS(kwq); + ksyn_wqunlock(kwq); + goto out; + } + + /* handle unlock2/downgrade first */ + if ((kwq->kw_pre_limrd != 0) && (is_seqlower_eq(lockseq, (kwq->kw_pre_limrdseq & PTHRW_COUNT_MASK)) != 0)) { +#if _PSYNCH_TRACE_ + KERNEL_DEBUG_CONSTANT(_PSYNCH_TRACE_RWWRLOCK | DBG_FUNC_NONE, (uint32_t)rwlock, 1, kwq->kw_pre_limrd, kwq->kw_pre_limrdseq, 0); +#endif /* _PSYNCH_TRACE_ */ + kwq->kw_pre_limrd--; + if (kwq->kw_pre_limrd == 0) + CLEAR_READ_PREPOST_BITS(kwq); + /* not a read proceed */ + } + + if ((kwq->kw_pre_rwwc != 0) && (is_seqlower_eq(lockseq, (kwq->kw_pre_lockseq & PTHRW_COUNT_MASK)) != 0)) { +#if _PSYNCH_TRACE_ + KERNEL_DEBUG_CONSTANT(_PSYNCH_TRACE_RWWRLOCK | DBG_FUNC_NONE, (uint32_t)rwlock, 2, kwq->kw_pre_rwwc, kwq->kw_pre_lockseq, 0); +#endif /* _PSYNCH_TRACE_ */ + kwq->kw_pre_rwwc--; + if (kwq->kw_pre_rwwc == 0) { + preseq = kwq->kw_pre_lockseq; + CLEAR_PREPOST_BITS(kwq); + error = kwq_handle_unlock(kwq, preseq, &updatebits, (KW_UNLOCK_PREPOST_WRLOCK|KW_UNLOCK_PREPOST), &block, lgen); + if (error != 0) + panic("kwq_handle_unlock failed %d\n",error); + if (block == 0) { + ksyn_wqunlock(kwq); + goto out; + } + /* insert to q and proceed as ususal */ + } + } + +#if _PSYNCH_TRACE_ + KERNEL_DEBUG_CONSTANT(_PSYNCH_TRACE_RWWRLOCK | DBG_FUNC_NONE, (uint32_t)rwlock, 3, 0, 0, 0); +#endif /* _PSYNCH_TRACE_ */ + error = ksyn_queue_insert(kwq, &kwq->kw_ksynqueues[KSYN_QUEUE_WRITER], lgen, uth, SEQFIT); + if (error != 0) + panic("psynch_rw_wrlock: failed to enqueue\n"); + + error = ksyn_block_thread_locked(kwq, (uint64_t)0, uth); + /* drops the wq lock */ + +out: + if (error != 0) { +#if _PSYNCH_TRACE_ + KERNEL_DEBUG_CONSTANT(_PSYNCH_TRACE_RWWRLOCK | DBG_FUNC_NONE, (uint32_t)rwlock, 4, error, 0, 0); +#endif /* _PSYNCH_TRACE_ */ + ksyn_wqlock(kwq); + if (uth->uu_kwqqueue != NULL) + ksyn_queue_removeitem(kwq, &kwq->kw_ksynqueues[KSYN_QUEUE_WRITER], uth); + ksyn_wqunlock(kwq); + } else { + /* update bits */ + *retval = uth->uu_psynchretval; + } + + ksyn_wqrelease(kwq, NULL); + +#if _PSYNCH_TRACE_ + KERNEL_DEBUG_CONSTANT(_PSYNCH_TRACE_RWWRLOCK | DBG_FUNC_END, (uint32_t)rwlock, 0, 0, error, 0); +#endif /* _PSYNCH_TRACE_ */ + return(error); +} + +/* + * psynch_rw_yieldwrlock: This system call is used for psync rwlock yielding writers to block. + */ +int +psynch_rw_yieldwrlock(__unused proc_t p, struct psynch_rw_yieldwrlock_args * uap, uint32_t * retval) { - COND_LOCK(cond->lock); - cond->refcount --; - COND_UNLOCK(cond->lock); + user_addr_t rwlock = uap->rwlock; + uint32_t lgen = uap->lgenval; + uint32_t ugen = uap->ugenval; + uint32_t rw_wc = uap->rw_wc; + //uint64_t tid = uap->tid; + int flags = uap->flags; + int block; + ksyn_wait_queue_t kwq; + int error=0; + uthread_t uth; + +#if _PSYNCH_TRACE_ + KERNEL_DEBUG_CONSTANT(_PSYNCH_TRACE_RWYWRLOCK | DBG_FUNC_START, (uint32_t)rwlock, lgen, ugen, rw_wc, 0); +#endif /* _PSYNCH_TRACE_ */ + uint32_t lockseq = 0, updatebits = 0, preseq = 0; + + uth = current_uthread(); + + uth->uu_lockseq = lgen; + lockseq = (lgen & PTHRW_COUNT_MASK); + + error = ksyn_wqfind(rwlock, lgen, ugen, rw_wc, TID_ZERO, flags, (KSYN_WQTYPE_INWAIT|KSYN_WQTYPE_RWLOCK), &kwq); + if (error != 0) { +#if _PSYNCH_TRACE_ + KERNEL_DEBUG_CONSTANT(_PSYNCH_TRACE_RWYWRLOCK | DBG_FUNC_END, (uint32_t)rwlock, 1, 0, error, 0); +#endif /* _PSYNCH_TRACE_ */ + return(error); + } + + ksyn_wqlock(kwq); + + /* handle first the missed wakeups */ + if ((kwq->kw_pre_intrcount != 0) && + (kwq->kw_pre_intrtype == PTH_RW_TYPE_YWRITE) && + (is_seqlower_eq(lockseq, (kwq->kw_pre_intrseq & PTHRW_COUNT_MASK)) != 0)) { + + kwq->kw_pre_intrcount--; + uth->uu_psynchretval = kwq->kw_pre_intrretbits; + if (kwq->kw_pre_intrcount==0) + CLEAR_INTR_PREPOST_BITS(kwq); + ksyn_wqunlock(kwq); + goto out; + } + + /* handle unlock2/downgrade first */ + if ((kwq->kw_pre_limrd != 0) && (is_seqlower_eq(lockseq, (kwq->kw_pre_limrdseq & PTHRW_COUNT_MASK)) != 0)) { +#if _PSYNCH_TRACE_ + KERNEL_DEBUG_CONSTANT(_PSYNCH_TRACE_RWYWRLOCK | DBG_FUNC_NONE, (uint32_t)rwlock, 1, kwq->kw_pre_limrd, kwq->kw_pre_limrdseq, 0); +#endif /* _PSYNCH_TRACE_ */ + kwq->kw_pre_limrd--; + if (kwq->kw_pre_limrd == 0) + CLEAR_READ_PREPOST_BITS(kwq); + /* not a read proceed */ + } + + if ((kwq->kw_pre_rwwc != 0) && (is_seqlower_eq(lockseq, (kwq->kw_pre_lockseq & PTHRW_COUNT_MASK)) != 0)) { +#if _PSYNCH_TRACE_ + KERNEL_DEBUG_CONSTANT(_PSYNCH_TRACE_RWYWRLOCK | DBG_FUNC_NONE, (uint32_t)rwlock, 2, kwq->kw_pre_rwwc, kwq->kw_pre_lockseq, 0); +#endif /* _PSYNCH_TRACE_ */ + kwq->kw_pre_rwwc--; + if (kwq->kw_pre_rwwc == 0) { + preseq = kwq->kw_pre_lockseq; + CLEAR_PREPOST_BITS(kwq); + error = kwq_handle_unlock(kwq, preseq, &updatebits, (KW_UNLOCK_PREPOST_YWRLOCK|KW_UNLOCK_PREPOST), &block, lgen); + if (error != 0) + panic("kwq_handle_unlock failed %d\n",error); + if (block == 0) { + ksyn_wqunlock(kwq); + goto out; + } + /* insert to q and proceed as ususal */ + } + } + +#if _PSYNCH_TRACE_ + KERNEL_DEBUG_CONSTANT(_PSYNCH_TRACE_RWYWRLOCK | DBG_FUNC_NONE, (uint32_t)rwlock, 3, 0, 0, 0); +#endif /* _PSYNCH_TRACE_ */ + error = ksyn_queue_insert(kwq, &kwq->kw_ksynqueues[KSYN_QUEUE_YWRITER], lgen, uth, SEQFIT); + if (error != 0) + panic("psynch_rw_yieldwrlock: failed to enqueue\n"); + + error = ksyn_block_thread_locked(kwq, (uint64_t)0, uth); + +out: + if (error != 0) { +#if _PSYNCH_TRACE_ + KERNEL_DEBUG_CONSTANT(_PSYNCH_TRACE_RWYWRLOCK | DBG_FUNC_NONE, (uint32_t)rwlock, 4, error, 0, 0); +#endif /* _PSYNCH_TRACE_ */ + ksyn_wqlock(kwq); + if (uth->uu_kwqqueue != NULL) + ksyn_queue_removeitem(kwq, &kwq->kw_ksynqueues[KSYN_QUEUE_YWRITER], uth); + ksyn_wqunlock(kwq); + } else { + /* update bits */ + *retval = uth->uu_psynchretval; + } + + ksyn_wqrelease(kwq, NULL); + +#if _PSYNCH_TRACE_ + KERNEL_DEBUG_CONSTANT(_PSYNCH_TRACE_RWYWRLOCK | DBG_FUNC_END, (uint32_t)rwlock, 1, 0, error, 0); +#endif /* _PSYNCH_TRACE_ */ + return(error); +} + + +/* + * psynch_rw_downgrade: This system call is used for wakeup blocked readers who are eligible to run due to downgrade. + */ +int +psynch_rw_downgrade(__unused proc_t p, struct psynch_rw_downgrade_args * uap, __unused int * retval) +{ + user_addr_t rwlock = uap->rwlock; + uint32_t lgen = uap->lgenval; + uint32_t ugen = uap->ugenval; + uint32_t rw_wc = uap->rw_wc; + //uint64_t tid = uap->tid; + int flags = uap->flags; + uint32_t count = 0; + + ksyn_wait_queue_t kwq; + int error=0; + uthread_t uth; + uint32_t curgen = 0; + +#if _PSYNCH_TRACE_ + KERNEL_DEBUG_CONSTANT(_PSYNCH_TRACE_RWDOWNGRADE | DBG_FUNC_START, (uint32_t)rwlock, lgen, ugen, rw_wc, 0); +#endif /* _PSYNCH_TRACE_ */ + uth = current_uthread(); + + curgen = (lgen & PTHRW_COUNT_MASK); + + error = ksyn_wqfind(rwlock, lgen, ugen, rw_wc, TID_ZERO, flags, (KSYN_WQTYPE_INWAIT|KSYN_WQTYPE_RWLOCK), &kwq); + if (error != 0) { +#if _PSYNCH_TRACE_ + KERNEL_DEBUG_CONSTANT(_PSYNCH_TRACE_RWDOWNGRADE | DBG_FUNC_END, (uint32_t)rwlock, 1, 0, error, 0); +#endif /* _PSYNCH_TRACE_ */ + return(error); + } + + ksyn_wqlock(kwq); + + if (is_seqlower(ugen, kwq->kw_lastunlockseq)!= 0) { + /* spurious updatebits?? */ + goto out; + } + /* fast path for default case */ + if((rw_wc == kwq->kw_inqueue) && (kwq->kw_highseq == curgen)) + goto dounlock; + + /* have we seen all the waiters? */ + if(rw_wc > kwq->kw_inqueue) { + goto prepost; + } + + if (is_seqhigher(curgen, kwq->kw_highseq) != 0) { + goto prepost; + } else { + if (find_seq_till(kwq, curgen, rw_wc, &count) == 0) { + if (count < rw_wc) { + kwq->kw_pre_limrd = rw_wc - count; + kwq->kw_pre_limrdseq = lgen; + kwq->kw_pre_limrdbits = lgen; + /* found none ? */ + if (count == 0) + goto out; + } + } + } + +dounlock: +#if _PSYNCH_TRACE_ + KERNEL_DEBUG_CONSTANT(_PSYNCH_TRACE_RWDOWNGRADE | DBG_FUNC_NONE, (uint32_t)rwlock, 3, 0, 0, 0); +#endif /* _PSYNCH_TRACE_ */ + error = kwq_handle_downgrade(kwq, lgen, 0, 0, NULL); + + if (error != 0) + panic("psynch_rw_downgrade: failed to wakeup\n"); + +out: + ksyn_wqunlock(kwq); +#if _PSYNCH_TRACE_ + KERNEL_DEBUG_CONSTANT(_PSYNCH_TRACE_RWDOWNGRADE | DBG_FUNC_END, (uint32_t)rwlock, 0, 0, error, 0); +#endif /* _PSYNCH_TRACE_ */ + ksyn_wqrelease(kwq, NULL); + + return(error); + +prepost: + kwq->kw_pre_rwwc = (rw_wc - count); + kwq->kw_pre_lockseq = lgen; +#if _PSYNCH_TRACE_ + KERNEL_DEBUG_CONSTANT(_PSYNCH_TRACE_RWDOWNGRADE | DBG_FUNC_NONE, (uint32_t)rwlock, 1, kwq->kw_pre_rwwc, kwq->kw_pre_lockseq, 0); +#endif /* _PSYNCH_TRACE_ */ + error = 0; + goto out; +} + + +/* + * psynch_rw_upgrade: This system call is used by an reader to block waiting for upgrade to be granted. + */ +int +psynch_rw_upgrade(__unused proc_t p, struct psynch_rw_upgrade_args * uap, uint32_t * retval) +{ + user_addr_t rwlock = uap->rwlock; + uint32_t lgen = uap->lgenval; + uint32_t ugen = uap->ugenval; + uint32_t rw_wc = uap->rw_wc; + //uint64_t tid = uap->tid; + int flags = uap->flags; + int block; + ksyn_wait_queue_t kwq; + int error=0; + uthread_t uth; + uint32_t lockseq = 0, updatebits = 0, preseq = 0; + +#if _PSYNCH_TRACE_ + KERNEL_DEBUG_CONSTANT(_PSYNCH_TRACE_RWUPGRADE | DBG_FUNC_START, (uint32_t)rwlock, lgen, ugen, rw_wc, 0); +#endif /* _PSYNCH_TRACE_ */ + uth = current_uthread(); + + uth->uu_lockseq = lgen; + lockseq = (lgen & PTHRW_COUNT_MASK); + + error = ksyn_wqfind(rwlock, lgen, ugen, rw_wc, TID_ZERO, flags, (KSYN_WQTYPE_INWAIT|KSYN_WQTYPE_RWLOCK), &kwq); + if (error != 0) { +#if _PSYNCH_TRACE_ + KERNEL_DEBUG_CONSTANT(_PSYNCH_TRACE_RWUPGRADE | DBG_FUNC_END, (uint32_t)rwlock, 1, 0, error, 0); +#endif /* _PSYNCH_TRACE_ */ + return(error); + } + + ksyn_wqlock(kwq); + + /* handle first the missed wakeups */ + if ((kwq->kw_pre_intrcount != 0) && + (kwq->kw_pre_intrtype == PTH_RW_TYPE_UPGRADE) && + (is_seqlower_eq(lockseq, (kwq->kw_pre_intrseq & PTHRW_COUNT_MASK)) != 0)) { + + kwq->kw_pre_intrcount--; + uth->uu_psynchretval = kwq->kw_pre_intrretbits; + if (kwq->kw_pre_intrcount==0) + CLEAR_INTR_PREPOST_BITS(kwq); + ksyn_wqunlock(kwq); + goto out; + } + + if ((kwq->kw_pre_rwwc != 0) && (is_seqlower_eq(lockseq, (kwq->kw_pre_lockseq & PTHRW_COUNT_MASK)) != 0)) { +#if _PSYNCH_TRACE_ + KERNEL_DEBUG_CONSTANT(_PSYNCH_TRACE_RWUPGRADE | DBG_FUNC_NONE, (uint32_t)rwlock, 2, kwq->kw_pre_rwwc, kwq->kw_pre_lockseq, 0); +#endif /* _PSYNCH_TRACE_ */ + kwq->kw_pre_rwwc--; + if (kwq->kw_pre_rwwc == 0) { + preseq = kwq->kw_pre_lockseq; + CLEAR_PREPOST_BITS(kwq); + error = kwq_handle_unlock(kwq, preseq, &updatebits, (KW_UNLOCK_PREPOST_UPGRADE|KW_UNLOCK_PREPOST), &block, lgen); + if (error != 0) + panic("kwq_handle_unlock failed %d\n",error); + if (block == 0) { + ksyn_wqunlock(kwq); + goto out; + } + /* insert to q and proceed as ususal */ + } + } + + +#if _PSYNCH_TRACE_ + KERNEL_DEBUG_CONSTANT(_PSYNCH_TRACE_RWUPGRADE | DBG_FUNC_NONE, (uint32_t)rwlock, 3, 0, 0, 0); +#endif /* _PSYNCH_TRACE_ */ + error = ksyn_queue_insert(kwq, &kwq->kw_ksynqueues[KSYN_QUEUE_UPGRADE], lgen, uth, SEQFIT); + if (error != 0) + panic("psynch_rw_upgrade: failed to enqueue\n"); + + + error = ksyn_block_thread_locked(kwq, (uint64_t)0, uth); + /* drops the lock */ + +out: + if (error != 0) { +#if _PSYNCH_TRACE_ + KERNEL_DEBUG_CONSTANT(_PSYNCH_TRACE_RWUPGRADE | DBG_FUNC_NONE, (uint32_t)rwlock, 4, error, 0, 0); +#endif /* _PSYNCH_TRACE_ */ + ksyn_wqlock(kwq); + if (uth->uu_kwqqueue != NULL) + ksyn_queue_removeitem(kwq, &kwq->kw_ksynqueues[KSYN_QUEUE_UPGRADE], uth); + ksyn_wqunlock(kwq); + } else { + /* update bits */ + *retval = uth->uu_psynchretval; + } + + ksyn_wqrelease(kwq, NULL); +#if _PSYNCH_TRACE_ + KERNEL_DEBUG_CONSTANT(_PSYNCH_TRACE_RWUPGRADE | DBG_FUNC_END, (uint32_t)rwlock, 1, 0, error, 0); +#endif /* _PSYNCH_TRACE_ */ + return(error); +} + +/* + * psynch_rw_unlock: This system call is used for unlock state postings. This will grant appropriate + * reader/writer variety lock. + */ + +int +psynch_rw_unlock(__unused proc_t p, struct psynch_rw_unlock_args * uap, uint32_t * retval) +{ + user_addr_t rwlock = uap->rwlock; + uint32_t lgen = uap->lgenval; + uint32_t ugen = uap->ugenval; + uint32_t rw_wc = uap->rw_wc; + uint32_t curgen; + //uint64_t tid = uap->tid; + int flags = uap->flags; + uthread_t uth; + ksyn_wait_queue_t kwq; + uint32_t updatebits = 0; + int error=0; + uint32_t count = 0; + + +#if _PSYNCH_TRACE_ + KERNEL_DEBUG_CONSTANT(_PSYNCH_TRACE_RWUNLOCK | DBG_FUNC_START, (uint32_t)rwlock, lgen, ugen, rw_wc, 0); +#endif /* _PSYNCH_TRACE_ */ + uth = current_uthread(); + + error = ksyn_wqfind(rwlock, lgen, ugen, rw_wc, TID_ZERO, flags, (KSYN_WQTYPE_RWLOCK), &kwq); + if (error != 0) { +#if _PSYNCH_TRACE_ + KERNEL_DEBUG_CONSTANT(_PSYNCH_TRACE_RWUNLOCK | DBG_FUNC_END, (uint32_t)rwlock, 1, 0, error, 0); +#endif /* _PSYNCH_TRACE_ */ + return(error); + } + + curgen = lgen & PTHRW_COUNT_MASK; + + ksyn_wqlock(kwq); + + if ((lgen & PTHRW_RW_INIT) != 0) { + kwq->kw_lastunlockseq = 0; + lgen &= ~PTHRW_RW_INIT; + } else if (is_seqlower(ugen, kwq->kw_lastunlockseq) != 0) { + /* spurious updatebits set */ + updatebits = PTHRW_RW_SPURIOUS; + goto out; + } + + +#if _PSYNCH_TRACE_ + KERNEL_DEBUG_CONSTANT(_PSYNCH_TRACE_RWUNLOCK | DBG_FUNC_NONE, (uint32_t)rwlock, 1, kwq->kw_inqueue, curgen, 0); +#endif /* _PSYNCH_TRACE_ */ + if (find_seq_till(kwq, curgen, rw_wc, &count) == 0) { + if (count < rw_wc) + goto prepost; + } + + + /* can handle unlock now */ + + CLEAR_PREPOST_BITS(kwq); + kwq->kw_lastunlockseq = ugen; + +#if _PSYNCH_TRACE_ + KERNEL_DEBUG_CONSTANT(_PSYNCH_TRACE_RWUNLOCK | DBG_FUNC_NONE, (uint32_t)rwlock, 2, 0, 0, 0); +#endif /* _PSYNCH_TRACE_ */ + error = kwq_handle_unlock(kwq, lgen, &updatebits, 0, NULL, 0); + if (error != 0) + panic("psynch_rw_unlock: kwq_handle_unlock failed %d\n",error); +out: + if (error == 0) { + /* update bits?? */ + *retval = updatebits; + } + ksyn_wqunlock(kwq); + + ksyn_wqrelease(kwq, NULL); +#if _PSYNCH_TRACE_ + KERNEL_DEBUG_CONSTANT(_PSYNCH_TRACE_RWUNLOCK | DBG_FUNC_END, (uint32_t)rwlock, 0, 0, error, 0); +#endif /* _PSYNCH_TRACE_ */ + + return(error); + +prepost: + kwq->kw_pre_rwwc = (rw_wc - count); + kwq->kw_pre_lockseq = curgen; + kwq->kw_lastunlockseq = ugen; +#if _PSYNCH_TRACE_ + KERNEL_DEBUG_CONSTANT(_PSYNCH_TRACE_RWUNLOCK | DBG_FUNC_NONE, (uint32_t)rwlock, 3, rw_wc, count, 0); + KERNEL_DEBUG_CONSTANT(_PSYNCH_TRACE_RWUNLOCK | DBG_FUNC_NONE, (uint32_t)rwlock, 4, kwq->kw_pre_rwwc, kwq->kw_pre_lockseq, 0); +#endif /* _PSYNCH_TRACE_ */ + updatebits = (lgen | PTHRW_RW_SPURIOUS);/* let this not do unlock handling */ + error = 0; + goto out; +} + + +/* + * psynch_rw_unlock2: This system call is used to wakeup pending readers when unlock grant frm kernel + * to new reader arrival races + */ +int +psynch_rw_unlock2(__unused proc_t p, struct psynch_rw_unlock2_args * uap, uint32_t * retval) +{ + user_addr_t rwlock = uap->rwlock; + uint32_t lgen = uap->lgenval; + uint32_t ugen = uap->ugenval; + uint32_t rw_wc = uap->rw_wc; + //uint64_t tid = uap->tid; + int flags = uap->flags; + uthread_t uth; + uint32_t num_lreader, limitread, curgen, updatebits; + ksyn_wait_queue_t kwq; + int error=0, longreadset = 0; + int diff; + uint32_t count=0; + +#if _PSYNCH_TRACE_ + KERNEL_DEBUG_CONSTANT(_PSYNCH_TRACE_RWUNLOCK2 | DBG_FUNC_START, (uint32_t)rwlock, lgen, ugen, rw_wc, 0); +#endif /* _PSYNCH_TRACE_ */ + uth = current_uthread(); + + error = ksyn_wqfind(rwlock, lgen, ugen, rw_wc, TID_ZERO, flags, (KSYN_WQTYPE_RWLOCK), &kwq); + if (error != 0) { +#if _PSYNCH_TRACE_ + KERNEL_DEBUG_CONSTANT(_PSYNCH_TRACE_RWUNLOCK2 | DBG_FUNC_END, (uint32_t)rwlock, 1, 0, error, 0); +#endif /* _PSYNCH_TRACE_ */ + return(error); + } + + ksyn_wqlock(kwq); + + curgen = (lgen & PTHRW_COUNT_MASK); + diff = find_diff(lgen, ugen); + + limitread = lgen & PTHRW_COUNT_MASK; + + if (find_seq_till(kwq, curgen, diff, &count) == 0) { + kwq->kw_pre_limrd = diff - count; + kwq->kw_pre_limrdseq = lgen; + kwq->kw_pre_limrdbits = lgen; + /* found none ? */ + if (count == 0) + goto out; + } + + if (kwq->kw_ksynqueues[KSYN_QUEUE_LREAD].ksynq_count != 0) { + num_lreader = kwq->kw_ksynqueues[KSYN_QUEUE_LREAD].ksynq_firstnum; + if (is_seqlower_eq(num_lreader, limitread) != 0) + longreadset = 1; + } + + updatebits = lgen; +#if _PSYNCH_TRACE_ + KERNEL_DEBUG_CONSTANT(_PSYNCH_TRACE_RWUNLOCK2 | DBG_FUNC_NONE, (uint32_t)rwlock, 3, 0, 0, 0); +#endif /* _PSYNCH_TRACE_ */ + count = ksyn_wakeupreaders(kwq, limitread, longreadset, 0, updatebits, NULL); + + if (count != 0) { + if (kwq->kw_pre_limrd != 0) { + kwq->kw_pre_limrd += count; + } else { + kwq->kw_pre_limrd = count; + kwq->kw_pre_limrdseq = lgen; + kwq->kw_pre_limrdbits = lgen; + } + } + error = 0; + +out: + if (error == 0) { + /* update bits?? */ + *retval = uth->uu_psynchretval; + } + ksyn_wqunlock(kwq); + + ksyn_wqrelease(kwq, NULL); +#if _PSYNCH_TRACE_ + KERNEL_DEBUG_CONSTANT(_PSYNCH_TRACE_RWUNLOCK2 | DBG_FUNC_END, (uint32_t)rwlock, 0, 0, error, 0); +#endif /* _PSYNCH_TRACE_ */ + + return(error); +} + + +/* ************************************************************************** */ +void +pth_global_hashinit() +{ + pth_glob_hashtbl = hashinit(PTH_HASHSIZE * 4, M_PROC, &pthhash); +} + +void +pth_proc_hashinit(proc_t p) +{ + p->p_pthhash = hashinit(PTH_HASHSIZE, M_PROC, &pthhash); + if (p->p_pthhash == NULL) + panic("pth_proc_hashinit: hash init returned 0\n"); +} + + +ksyn_wait_queue_t +ksyn_wq_hash_lookup(user_addr_t mutex, proc_t p, int flags, uint64_t object, uint64_t objoffset) +{ + ksyn_wait_queue_t kwq; + struct pthhashhead * hashptr; + + if ((flags & PTHREAD_PSHARED_FLAGS_MASK) == PTHREAD_PROCESS_SHARED) + { + hashptr = pth_glob_hashtbl; + kwq = (&hashptr[object & pthhash])->lh_first; + if (kwq != 0) { + for (; kwq != NULL; kwq = kwq->kw_hash.le_next) { + if ((kwq->kw_object == object) &&(kwq->kw_offset == objoffset)) { + return (kwq); + } + } + } + } else { + hashptr = p->p_pthhash; + kwq = (&hashptr[mutex & pthhash])->lh_first; + if (kwq != 0) + for (; kwq != NULL; kwq = kwq->kw_hash.le_next) { + if (kwq->kw_addr == mutex) { + return (kwq); + } + } + } + return(NULL); +} + +void +pth_proc_hashdelete(proc_t p) +{ + struct pthhashhead * hashptr; + ksyn_wait_queue_t kwq; + int hashsize = pthhash + 1; + int i; + + hashptr = p->p_pthhash; + if (hashptr == NULL) + return; + + for(i= 0; i < hashsize; i++) { + while ((kwq = LIST_FIRST(&hashptr[i])) != NULL) { + pthread_list_lock(); + if ((kwq->kw_pflags & KSYN_WQ_INHASH) != 0) { + kwq->kw_pflags &= ~KSYN_WQ_INHASH; + LIST_REMOVE(kwq, kw_hash); + } + if ((kwq->kw_pflags & KSYN_WQ_FLIST) != 0) { + kwq->kw_pflags &= ~KSYN_WQ_FLIST; + LIST_REMOVE(kwq, kw_list); + } + pthread_list_unlock(); + lck_mtx_destroy(&kwq->kw_lock, pthread_lck_grp); + kfree(kwq, sizeof(struct ksyn_wait_queue)); + } + } + FREE(p->p_pthhash, M_PROC); + p->p_pthhash = NULL; +} + + +/* find kernel waitqueue, if not present create one. Grants a reference */ +int +ksyn_wqfind(user_addr_t mutex, uint32_t mgen, uint32_t ugen, uint32_t rw_wc, uint64_t tid, int flags, int wqtype, ksyn_wait_queue_t * kwqp) +{ + ksyn_wait_queue_t kwq; + ksyn_wait_queue_t nkwq; + struct pthhashhead * hashptr; + uint64_t object = 0, offset = 0; + uint64_t hashhint; + proc_t p = current_proc(); + int retry = mgen & PTHRW_RETRYBIT; + int i; + + if ((flags & PTHREAD_PSHARED_FLAGS_MASK) == PTHREAD_PROCESS_SHARED) + { + (void)ksyn_findobj(mutex, &object, &offset); + hashhint = object; + hashptr = pth_glob_hashtbl; + } else { + hashptr = p->p_pthhash; + } + + //pthread_list_lock_spin(); + pthread_list_lock(); + + kwq = ksyn_wq_hash_lookup(mutex, p, flags, object, offset); + + if (kwq != NULL) { + kwq->kw_iocount++; + if ((kwq->kw_pflags & KSYN_WQ_FLIST) != 0) { + LIST_REMOVE(kwq, kw_list); + kwq->kw_pflags &= ~KSYN_WQ_FLIST; + } + UPDATE_KWQ(kwq, mgen, ugen, rw_wc, tid, wqtype, retry); + if (kwqp != NULL) + *kwqp = kwq; + pthread_list_unlock(); + return (0); + } + + pthread_list_unlock(); + + nkwq = kalloc(sizeof(struct ksyn_wait_queue)); + bzero(nkwq, sizeof(struct ksyn_wait_queue)); + nkwq->kw_addr = mutex; + nkwq->kw_flags = flags; + nkwq->kw_iocount = 1; + nkwq->kw_object = object; + nkwq->kw_offset = offset; + nkwq->kw_type = (wqtype & KSYN_WQTYPE_MASK); + TAILQ_INIT(&nkwq->kw_uthlist); + + for (i=0; i< KSYN_QUEUE_MAX; i++) + ksyn_queue_init(&nkwq->kw_ksynqueues[i]); + + UPDATE_KWQ(nkwq, mgen, ugen, rw_wc, tid, wqtype, retry); +#if USE_WAITQUEUE + wait_queue_init(&nkwq->kw_wq, SYNC_POLICY_FIFO); +#endif /* USE_WAITQUEUE */ + lck_mtx_init(&nkwq->kw_lock, pthread_lck_grp, pthread_lck_attr); + + //pthread_list_lock_spin(); + pthread_list_lock(); + /* see whether it is alread allocated */ + kwq = ksyn_wq_hash_lookup(mutex, p, flags, object, offset); + + if (kwq != NULL) { + kwq->kw_iocount++; + if ((kwq->kw_pflags & KSYN_WQ_FLIST) != 0) { + LIST_REMOVE(kwq, kw_list); + kwq->kw_pflags &= ~KSYN_WQ_FLIST; + } + UPDATE_KWQ(kwq, mgen, ugen, rw_wc, tid, wqtype, retry); + if (kwqp != NULL) + *kwqp = kwq; + pthread_list_unlock(); + lck_mtx_destroy(&nkwq->kw_lock, pthread_lck_grp); + kfree(nkwq, sizeof(struct ksyn_wait_queue)); + return (0); + } + kwq = nkwq; + + if ((flags & PTHREAD_PSHARED_FLAGS_MASK) == PTHREAD_PROCESS_SHARED) + { + kwq->kw_pflags |= KSYN_WQ_SHARED; + LIST_INSERT_HEAD(&hashptr[kwq->kw_object & pthhash], kwq, kw_hash); + } else + LIST_INSERT_HEAD(&hashptr[mutex & pthhash], kwq, kw_hash); + + kwq->kw_pflags |= KSYN_WQ_INHASH; + + pthread_list_unlock(); + + if (kwqp != NULL) + *kwqp = kwq; + return (0); +} + +/* Reference from find is dropped here. Starts the free process if needed */ +void +ksyn_wqrelease(ksyn_wait_queue_t kwq, ksyn_wait_queue_t ckwq) +{ + uint64_t deadline; + struct timeval t; + int sched = 0; + + + //pthread_list_lock_spin(); + pthread_list_lock(); + kwq->kw_iocount--; + if (kwq->kw_iocount == 0) { + if ((kwq->kw_pre_rwwc == 0) && (kwq->kw_inqueue == 0)) { + microuptime(&kwq->kw_ts); + LIST_INSERT_HEAD(&pth_free_list, kwq, kw_list); + kwq->kw_pflags |= KSYN_WQ_FLIST; + } + sched = 1; + } + if (ckwq != NULL){ + ckwq->kw_iocount--; + if ( ckwq->kw_iocount == 0) { + if ((ckwq->kw_pre_rwwc == 0) && (ckwq->kw_inqueue == 0)) { + /* mark for free if we can */ + microuptime(&ckwq->kw_ts); + LIST_INSERT_HEAD(&pth_free_list, ckwq, kw_list); + ckwq->kw_pflags |= KSYN_WQ_FLIST; + } + sched = 1; + } + } + + if (sched == 1 && psynch_cleanupset == 0) { + psynch_cleanupset = 1; + microuptime(&t); + t.tv_sec += KSYN_CLEANUP_DEADLINE; + + deadline = tvtoabstime(&t); + thread_call_enter_delayed(psynch_thcall, deadline); + } + pthread_list_unlock(); +} + +/* responsible to free the waitqueues */ +void +psynch_wq_cleanup(__unused void * param, __unused void * param1) +{ + ksyn_wait_queue_t kwq; + struct timeval t; + LIST_HEAD(, ksyn_wait_queue) freelist = {NULL}; + int count = 0, delayed = 0, diff; + uint64_t deadline = 0; + + //pthread_list_lock_spin(); + pthread_list_lock(); + + microuptime(&t); + + LIST_FOREACH(kwq, &pth_free_list, kw_list) { + + if (count > 100) { + delayed = 1; + break; + } + if ((kwq->kw_iocount != 0) && (kwq->kw_inqueue != 0)) { + /* still in freelist ??? */ + continue; + } + diff = t.tv_sec - kwq->kw_ts.tv_sec; + if (diff < 0) + diff *= -1; + if (diff >= KSYN_CLEANUP_DEADLINE) { + /* out of hash */ + kwq->kw_pflags &= ~(KSYN_WQ_FLIST | KSYN_WQ_INHASH); + LIST_REMOVE(kwq, kw_hash); + LIST_REMOVE(kwq, kw_list); + LIST_INSERT_HEAD(&freelist, kwq, kw_list); + count ++; + } else { + delayed = 1; + } + + } + if (delayed != 0) { + t.tv_sec += KSYN_CLEANUP_DEADLINE; + + deadline = tvtoabstime(&t); + thread_call_enter_delayed(psynch_thcall, deadline); + psynch_cleanupset = 1; + } else + psynch_cleanupset = 0; + + pthread_list_unlock(); + + + while ((kwq = LIST_FIRST(&freelist)) != NULL) { + LIST_REMOVE(kwq, kw_list); + lck_mtx_destroy(&kwq->kw_lock, pthread_lck_grp); + kfree(kwq, sizeof(struct ksyn_wait_queue)); + } +} + + +int +ksyn_block_thread_locked(ksyn_wait_queue_t kwq, uint64_t abstime, uthread_t uth) +{ + kern_return_t kret; + int error = 0; + + uth->uu_kwqqueue = (void *)kwq; +#if USE_WAITQUEUE + kret = wait_queue_assert_wait64(&kwq->kw_wq, kwq->kw_addr, THREAD_ABORTSAFE, abstime); +#else /* USE_WAITQUEUE */ + assert_wait_deadline(&uth->uu_psynchretval, THREAD_ABORTSAFE, abstime); +#endif /* USE_WAITQUEUE */ + ksyn_wqunlock(kwq); + + kret = thread_block(NULL); + switch (kret) { + case THREAD_TIMED_OUT: + error = ETIMEDOUT; + break; + case THREAD_INTERRUPTED: + error = EINTR; + break; + } + return(error); +} + +kern_return_t +#if USE_WAITQUEUE +ksyn_wakeup_thread(ksyn_wait_queue_t kwq, uthread_t uth) +#else /* USE_WAITQUEUE */ +ksyn_wakeup_thread(__unused ksyn_wait_queue_t kwq, uthread_t uth) +#endif /* USE_WAITQUEUE */ +{ + thread_t th; + kern_return_t kret; + th = uth->uu_context.vc_thread; + +#if USE_WAITQUEUE + kret = wait_queue_wakeup64_thread(&kwq->kw_wq, kwq->kw_addr, th, THREAD_AWAKENED); +#else /* USE_WAITQUEUE */ + kret = thread_wakeup_prim((caddr_t)&uth->uu_psynchretval, TRUE, THREAD_AWAKENED); +#endif /* USE_WAITQUEUE */ + + if ((kret != KERN_SUCCESS) && (kret != KERN_NOT_WAITING)) + panic("ksyn_wakeup_thread: panic waking up thread %x\n", kret); + + + + return(kret); +} + +/* move from one waitqueue to another */ +#if COND_MTX_WAITQUEUEMOVE +void +ksyn_move_wqthread( ksyn_wait_queue_t ckwq, ksyn_wait_queue_t kwq, uint32_t mgen, uint32_t updateval, int diffgen, int nomutex) +#else /* COND_MTX_WAITQUEUEMOVE */ +void +ksyn_move_wqthread( ksyn_wait_queue_t ckwq, __unused ksyn_wait_queue_t kwq, __unused uint32_t mgen, uint32_t updateval, __unused int diffgen, int nomutex) +#endif /* COND_MTX_WAITQUEUEMOVE */ +{ + kern_return_t kret; + uthread_t uth; +#if COND_MTX_WAITQUEUEMOVE + int count = 0, error, kret; + uint32_t nextgen = mgen; +#endif /* COND_MTX_WAITQUEUEMOVE */ + struct ksyn_queue kq; + uint32_t upgen; + + ksyn_queue_init(&kq); +#if USE_WAITQUEUE + /* TBD wq move */ + kret = wait_queue_move_all(&ckwq->kw_wq, ckwq->kw_addr, &kwq->kw_wq, kwq->kw_addr); +#else /* USE_WAITQUEUE */ + /* no need to move as the thread is blocked at uthread address */ + kret = KERN_SUCCESS; +#endif /* USE_WAITQUEUE */ + + if (nomutex != 0) + upgen = updateval | PTHRW_MTX_NONE; + else + upgen = updateval; + + if (kret== KERN_SUCCESS) { +redrive: + while ((uth = ksyn_queue_removefirst(&ckwq->kw_ksynqueues[KSYN_QUEUE_WRITER], ckwq)) != NULL) { + if (nomutex != 0) { +#if COND_MTX_WAITQUEUEMOVE + uth->uu_psynchretval = upgen; +#else /* COND_MTX_WAITQUEUEMOVE */ + uth->uu_psynchretval = 0; + uth->uu_kwqqueue = NULL; + kret = ksyn_wakeup_thread(ckwq, uth); + if ((kret != KERN_SUCCESS) && (kret != KERN_NOT_WAITING)) + panic("ksyn_move_wqthread: panic waking up \n"); + if (kret == KERN_NOT_WAITING) + goto redrive; +#endif /* COND_MTX_WAITQUEUEMOVE */ + } +#if COND_MTX_WAITQUEUEMOVE + else { + count++; + if (count >diffgen) + panic("movethread inserting more than expected\n"); + TAILQ_INSERT_TAIL(&kq.ksynq_uthlist, uth, uu_mtxlist); + } +#endif /* COND_MTX_WAITQUEUEMOVE */ + + } + ksyn_wqunlock(ckwq); + +#if COND_MTX_WAITQUEUEMOVE + if ( (nomutex == 0) && (count > 0)) { + ksyn_wqlock(kwq); + uth = TAILQ_FIRST(&kq.ksynq_uthlist); + while(uth != NULL) { + TAILQ_REMOVE(&kq.ksynq_uthlist, uth, uu_mtxlist); + error = ksyn_queue_insert(kwq, &kwq->kw_ksynqueues[KSYN_QUEUE_WRITER], nextgen, uth, SEQFIT); + if (error != 0) { + panic("movethread insert failed\n"); + } + uth->uu_lockseq = nextgen; + nextgen += PTHRW_INC; + uth = TAILQ_FIRST(&kq.ksynq_uthlist); + } + ksyn_wqunlock(kwq); + } +#endif /* COND_MTX_WAITQUEUEMOVE */ + } else + panic("movethread : wq move all failed\n"); + return; +} + +/* find the true shared obect/offset for shared mutexes */ +int +ksyn_findobj(uint64_t mutex, uint64_t * objectp, uint64_t * offsetp) +{ + vm_page_info_basic_data_t info; + kern_return_t kret; + mach_msg_type_number_t count = VM_PAGE_INFO_BASIC_COUNT; + + kret = vm_map_page_info(current_map(), mutex, VM_PAGE_INFO_BASIC, + (vm_page_info_t)&info, &count); + + if (kret != KERN_SUCCESS) + return(EINVAL); + + if (objectp != NULL) + *objectp = (uint64_t)info.object_id; + if (offsetp != NULL) + *offsetp = (uint64_t)info.offset; + + return(0); +} + + +/* lowest of kw_fr, kw_flr, kw_fwr, kw_fywr */ +int +kwq_find_rw_lowest(ksyn_wait_queue_t kwq, int flags, uint32_t premgen, int * typep, uint32_t lowest[]) +{ + + uint32_t kw_fr, kw_flr, kw_fwr, kw_fywr, low; + int type = 0, lowtype, typenum[4]; + uint32_t numbers[4]; + int count = 0, i; + + + if ((kwq->kw_ksynqueues[KSYN_QUEUE_READ].ksynq_count != 0) || ((flags & KW_UNLOCK_PREPOST_READLOCK) != 0)) { + type |= PTH_RWSHFT_TYPE_READ; + /* read entries are present */ + if (kwq->kw_ksynqueues[KSYN_QUEUE_READ].ksynq_count != 0) { + kw_fr = kwq->kw_ksynqueues[KSYN_QUEUE_READ].ksynq_firstnum; + if (((flags & KW_UNLOCK_PREPOST_READLOCK) != 0) && (is_seqlower(premgen, kw_fr) != 0)) + kw_fr = premgen; + } else + kw_fr = premgen; + + lowest[KSYN_QUEUE_READ] = kw_fr; + numbers[count]= kw_fr; + typenum[count] = PTH_RW_TYPE_READ; + count++; + } else + lowest[KSYN_QUEUE_READ] = 0; + + if ((kwq->kw_ksynqueues[KSYN_QUEUE_LREAD].ksynq_count != 0) || ((flags & KW_UNLOCK_PREPOST_LREADLOCK) != 0)) { + type |= PTH_RWSHFT_TYPE_LREAD; + /* read entries are present */ + if (kwq->kw_ksynqueues[KSYN_QUEUE_LREAD].ksynq_count != 0) { + kw_flr = kwq->kw_ksynqueues[KSYN_QUEUE_LREAD].ksynq_firstnum; + if (((flags & KW_UNLOCK_PREPOST_LREADLOCK) != 0) && (is_seqlower(premgen, kw_flr) != 0)) + kw_flr = premgen; + } else + kw_flr = premgen; + + lowest[KSYN_QUEUE_LREAD] = kw_flr; + numbers[count]= kw_flr; + typenum[count] = PTH_RW_TYPE_LREAD; + count++; + } else + lowest[KSYN_QUEUE_LREAD] = 0; + + + if ((kwq->kw_ksynqueues[KSYN_QUEUE_WRITER].ksynq_count != 0) || ((flags & KW_UNLOCK_PREPOST_WRLOCK) != 0)) { + type |= PTH_RWSHFT_TYPE_WRITE; + /* read entries are present */ + if (kwq->kw_ksynqueues[KSYN_QUEUE_WRITER].ksynq_count != 0) { + kw_fwr = kwq->kw_ksynqueues[KSYN_QUEUE_WRITER].ksynq_firstnum; + if (((flags & KW_UNLOCK_PREPOST_WRLOCK) != 0) && (is_seqlower(premgen, kw_fwr) != 0)) + kw_fwr = premgen; + } else + kw_fwr = premgen; + + lowest[KSYN_QUEUE_WRITER] = kw_fwr; + numbers[count]= kw_fwr; + typenum[count] = PTH_RW_TYPE_WRITE; + count++; + } else + lowest[KSYN_QUEUE_WRITER] = 0; + + if ((kwq->kw_ksynqueues[KSYN_QUEUE_YWRITER].ksynq_count != 0) || ((flags & KW_UNLOCK_PREPOST_YWRLOCK) != 0)) { + type |= PTH_RWSHFT_TYPE_YWRITE; + /* read entries are present */ + if (kwq->kw_ksynqueues[KSYN_QUEUE_YWRITER].ksynq_count != 0) { + kw_fywr = kwq->kw_ksynqueues[KSYN_QUEUE_YWRITER].ksynq_firstnum; + if (((flags & KW_UNLOCK_PREPOST_YWRLOCK) != 0) && (is_seqlower(premgen, kw_fywr) != 0)) + kw_fywr = premgen; + } else + kw_fywr = premgen; + + lowest[KSYN_QUEUE_YWRITER] = kw_fywr; + numbers[count]= kw_fywr; + typenum[count] = PTH_RW_TYPE_YWRITE; + count++; + } else + lowest[KSYN_QUEUE_YWRITER] = 0; + + + + if (count == 0) + panic("nothing in the queue???\n"); + + low = numbers[0]; + lowtype = typenum[0]; + if (count > 1) { + for (i = 1; i< count; i++) { + if(is_seqlower(numbers[i] , low) != 0) { + low = numbers[i]; + lowtype = typenum[i]; + } + } + } + type |= lowtype; + + if (typep != 0) + *typep = type; + return(0); +} + +/* wakeup readers and longreaders to upto the writer limits */ +int +ksyn_wakeupreaders(ksyn_wait_queue_t kwq, uint32_t limitread, int longreadset, int allreaders, uint32_t updatebits, int * wokenp) +{ + uthread_t uth; + ksyn_queue_t kq; + int failedwakeup = 0; + int numwoken = 0; + kern_return_t kret = KERN_SUCCESS; + int resetbit = updatebits & PTHRW_RW_HUNLOCK; + uint32_t lbits = 0; + + lbits = updatebits; + if (longreadset != 0) { + /* clear all read and longreads */ + while ((uth = ksyn_queue_removefirst(&kwq->kw_ksynqueues[KSYN_QUEUE_READ], kwq)) != NULL) { + uth->uu_psynchretval = lbits; + /* set on one thread */ + if (resetbit != 0) { + lbits &= ~PTHRW_RW_HUNLOCK; + resetbit = 0; + } + numwoken++; + uth->uu_kwqqueue = NULL; + kret = ksyn_wakeup_thread(kwq, uth); + if ((kret != KERN_SUCCESS) && (kret != KERN_NOT_WAITING)) + panic("ksyn_wakeupreaders: panic waking up readers\n"); + if (kret == KERN_NOT_WAITING) { + failedwakeup++; + } + } + while ((uth = ksyn_queue_removefirst(&kwq->kw_ksynqueues[KSYN_QUEUE_LREAD], kwq)) != NULL) { + uth->uu_psynchretval = lbits; + uth->uu_kwqqueue = NULL; + if (resetbit != 0) { + lbits &= ~PTHRW_RW_HUNLOCK; + resetbit = 0; + } + numwoken++; + kret = ksyn_wakeup_thread(kwq, uth); + if ((kret != KERN_SUCCESS) && (kret != KERN_NOT_WAITING)) + panic("ksyn_wakeupreaders: panic waking up lreaders\n"); + if (kret == KERN_NOT_WAITING) { + failedwakeup++; + } + } + } else { + kq = &kwq->kw_ksynqueues[KSYN_QUEUE_READ]; + while ((kq->ksynq_count != 0) && (allreaders || (is_seqlower(kq->ksynq_firstnum, limitread) != 0))) { + uth = ksyn_queue_removefirst(kq, kwq); + uth->uu_psynchretval = lbits; + if (resetbit != 0) { + lbits &= ~PTHRW_RW_HUNLOCK; + resetbit = 0; + } + numwoken++; + uth->uu_kwqqueue = NULL; + kret = ksyn_wakeup_thread(kwq, uth); + if ((kret != KERN_SUCCESS) && (kret != KERN_NOT_WAITING)) + panic("ksyn_wakeupreaders: panic waking up readers\n"); + if (kret == KERN_NOT_WAITING) { + failedwakeup++; + } + } + } + + if (wokenp != NULL) + *wokenp = numwoken; + return(failedwakeup); +} + + +/* This handles the unlock grants for next set on rw_unlock() or on arrival of all preposted waiters */ +int +kwq_handle_unlock(ksyn_wait_queue_t kwq, uint32_t mgen, uint32_t * updatep, int flags, int * blockp, uint32_t premgen) +{ + uint32_t low_reader, low_writer, low_ywriter, low_lreader,limitrdnum; + int rwtype, error=0; + int longreadset = 0, allreaders, failed; + uint32_t updatebits; + int prepost = flags & KW_UNLOCK_PREPOST; + thread_t preth = THREAD_NULL; + uthread_t uth; + thread_t th; + int woken = 0; + int block = 1; + uint32_t lowest[KSYN_QUEUE_MAX]; /* np need for upgrade as it is handled separately */ + kern_return_t kret = KERN_SUCCESS; + +#if _PSYNCH_TRACE_ +#if defined(__i386__) + KERNEL_DEBUG_CONSTANT(_PSYNCH_TRACE_RWHANDLEU | DBG_FUNC_START, (uint32_t)kwq, mgen, premgen, 0, 0); +#endif +#endif /* _PSYNCH_TRACE_ */ + if (prepost != 0) { + preth = current_thread(); + } + + /* upgrade pending */ + if (is_rw_ubit_set(mgen)) { + if (prepost != 0) { + if((flags & KW_UNLOCK_PREPOST_UPGRADE) != 0) { + /* upgrade thread calling the prepost */ + /* upgrade granted */ + block = 0; + goto out; + } + + } + if (kwq->kw_ksynqueues[KSYN_QUEUE_UPGRADE].ksynq_count > 0) { + uth = ksyn_queue_removefirst(&kwq->kw_ksynqueues[KSYN_QUEUE_UPGRADE], kwq); + uth->uu_psynchretval = (mgen | PTHRW_EBIT) & ~PTHRW_UBIT; + uth->uu_kwqqueue = NULL; + kret = ksyn_wakeup_thread(kwq, uth); + if ((kret != KERN_SUCCESS) && (kret != KERN_NOT_WAITING)) + panic("kwq_handle_unlock: panic waking up the upgrade thread \n"); + if (kret == KERN_NOT_WAITING) { + kwq->kw_pre_intrcount = 1; /* actually a count */ + kwq->kw_pre_intrseq = mgen; + kwq->kw_pre_intrretbits = uth->uu_psynchretval; + kwq->kw_pre_intrtype = PTH_RW_TYPE_UPGRADE; + } + error = 0; + } else { + panic("panic unable to find the upgrade thread\n"); + } + ksyn_wqunlock(kwq); + goto out; + } + + error = kwq_find_rw_lowest(kwq, flags, premgen, &rwtype, lowest); + if (error != 0) + panic("rwunlock: cannot fails to slot next round of threads"); + +#if _PSYNCH_TRACE_ +#if defined(__i386__) + KERNEL_DEBUG_CONSTANT(_PSYNCH_TRACE_RWHANDLEU | DBG_FUNC_NONE, (uint32_t)kwq, 1, rwtype, lowest, 0); +#endif +#endif /* _PSYNCH_TRACE_ */ + low_reader = lowest[KSYN_QUEUE_READ]; + low_lreader = lowest[KSYN_QUEUE_LREAD]; + low_writer = lowest[KSYN_QUEUE_WRITER]; + low_ywriter = lowest[KSYN_QUEUE_YWRITER]; + + + updatebits = mgen & ~( PTHRW_EBIT | PTHRW_WBIT |PTHRW_YBIT | PTHRW_UBIT | PTHRW_LBIT); + + longreadset = 0; + allreaders = 0; + switch (rwtype & PTH_RW_TYPE_MASK) { + case PTH_RW_TYPE_LREAD: + longreadset = 1; + case PTH_RW_TYPE_READ: { + limitrdnum = 0; + if (longreadset == 0) { + switch (rwtype & (PTH_RWSHFT_TYPE_WRITE | PTH_RWSHFT_TYPE_YWRITE)) { + case PTH_RWSHFT_TYPE_WRITE: + limitrdnum = low_writer; + if (((rwtype & PTH_RWSHFT_TYPE_LREAD) != 0) && + (is_seqlower(low_lreader, low_writer) != 0)) { + longreadset = 1; + } + + break; + case PTH_RWSHFT_TYPE_YWRITE: + /* all read ? */ + if (((rwtype & PTH_RWSHFT_TYPE_LREAD) != 0) && + (is_seqlower(low_lreader, low_ywriter) != 0)) { + longreadset = 1; + } else + allreaders = 1; + break; + case (PTH_RWSHFT_TYPE_WRITE | PTH_RWSHFT_TYPE_YWRITE): + limitrdnum = low_writer; + if (((rwtype & PTH_RWSHFT_TYPE_LREAD) != 0) && + (is_seqlower(low_lreader, low_ywriter) != 0)) { + longreadset = 1; + } + break; + default: /* no writers at all */ + if ((rwtype & PTH_RWSHFT_TYPE_LREAD) != 0) + longreadset = 1; + else + allreaders = 1; + }; + + } + + if ((rwtype & PTH_RWSHFT_TYPE_WRITE) != 0) + updatebits |= PTHRW_WBIT; + else if ((rwtype & PTH_RWSHFT_TYPE_YWRITE) != 0) + updatebits |= PTHRW_YBIT; + + if (longreadset == 0) { + if((prepost != 0) && + ((flags & KW_UNLOCK_PREPOST_READLOCK) != 0) && + ((allreaders != 0) || (is_seqlower(premgen, limitrdnum) != 0))) { + block = 0; + uth = current_uthread(); + uth->uu_psynchretval = updatebits; + } + } else { + updatebits |= PTHRW_LBIT; + if ((prepost != 0) && + ((flags & (KW_UNLOCK_PREPOST_READLOCK | KW_UNLOCK_PREPOST_LREADLOCK)) != 0)) { + block = 0; + uth = current_uthread(); + uth->uu_psynchretval = updatebits; + } + } + + if (prepost != 0) { + updatebits |= PTHRW_RW_HUNLOCK; + } + + failed = ksyn_wakeupreaders(kwq, limitrdnum, longreadset, allreaders, updatebits, &woken); + if (failed != 0) { + kwq->kw_pre_intrcount = failed; /* actually a count */ + kwq->kw_pre_intrseq = limitrdnum; + kwq->kw_pre_intrretbits = updatebits; + if (longreadset) + kwq->kw_pre_intrtype = PTH_RW_TYPE_LREAD; + else + kwq->kw_pre_intrtype = PTH_RW_TYPE_READ; + } + + /* if we woken up no one and the current thread is returning, ensure it is doing unlock */ + if ((prepost != 0) && (woken == 0) && (block == 0)&& ((updatebits & PTHRW_RW_HUNLOCK) != 0)) { + uth = current_uthread(); + uth->uu_psynchretval = updatebits; + } + + error = 0; + + } + break; + + case PTH_RW_TYPE_WRITE: { + updatebits |= PTHRW_EBIT; + if (((flags & KW_UNLOCK_PREPOST_WRLOCK) != 0) && (low_writer == premgen)) { + block = 0; + if (kwq->kw_ksynqueues[KSYN_QUEUE_WRITER].ksynq_count != 0) + updatebits |= PTHRW_WBIT; + else if ((rwtype & PTH_RWSHFT_TYPE_YWRITE) != 0) + updatebits |= PTHRW_YBIT; + th = preth; + uth = get_bsdthread_info(th); + uth->uu_psynchretval = updatebits; + } else { + /* we are not granting writelock to the preposting thread */ + uth = ksyn_queue_removefirst(&kwq->kw_ksynqueues[KSYN_QUEUE_WRITER], kwq); + + /* if there are writers present or the preposting write thread then W bit is to be set */ + if ((kwq->kw_ksynqueues[KSYN_QUEUE_WRITER].ksynq_count != 0) || ((flags & KW_UNLOCK_PREPOST_WRLOCK) != 0) ) + updatebits |= PTHRW_WBIT; + else if ((rwtype & PTH_RWSHFT_TYPE_YWRITE) != 0) + updatebits |= PTHRW_YBIT; + uth->uu_psynchretval = updatebits; + uth->uu_kwqqueue = NULL; + /* setup next in the queue */ + kret = ksyn_wakeup_thread(kwq, uth); + if ((kret != KERN_SUCCESS) && (kret != KERN_NOT_WAITING)) + panic("kwq_handle_unlock: panic waking up writer\n"); + if (kret == KERN_NOT_WAITING) { + kwq->kw_pre_intrcount = 1; /* actually a count */ + kwq->kw_pre_intrseq = low_writer; + kwq->kw_pre_intrretbits = updatebits; + kwq->kw_pre_intrtype = PTH_RW_TYPE_WRITE; + } + error = 0; + } + + } + break; + + case PTH_RW_TYPE_YWRITE: { + /* can reader locks be granted ahead of this write? */ + if ((rwtype & PTH_RWSHFT_TYPE_READ) != 0) { + if ((rwtype & PTH_RWSHFT_TYPE_WRITE) != 0) + updatebits |= PTHRW_WBIT; + else if ((rwtype & PTH_RWSHFT_TYPE_WRITE) != 0) + updatebits |= PTHRW_YBIT; + + if ((rwtype & PTH_RWSHFT_TYPE_WRITE) != 0) { + /* is lowest reader less than the low writer? */ + if (is_seqlower(low_reader,low_writer) == 0) + goto yielditis; + if (((flags & KW_UNLOCK_PREPOST_READLOCK) != 0) && (is_seqlower(premgen, low_writer) != 0)) { + uth = current_uthread(); + uth->uu_psynchretval = updatebits; + block = 0; + } + if (prepost != 0) { + updatebits |= PTHRW_RW_HUNLOCK; + } + + /* there will be readers to wakeup , no need to check for woken */ + failed = ksyn_wakeupreaders(kwq, low_writer, 0, 0, updatebits, NULL); + if (failed != 0) { + kwq->kw_pre_intrcount = failed; /* actually a count */ + kwq->kw_pre_intrseq = low_writer; + kwq->kw_pre_intrretbits = updatebits; + kwq->kw_pre_intrtype = PTH_RW_TYPE_READ; + } + error = 0; + } else { + /* wakeup all readers */ + if ((prepost != 0) && ((flags & KW_UNLOCK_PREPOST_READLOCK) != 0)) { + uth = current_uthread(); + uth->uu_psynchretval = updatebits; + block = 0; + } + if (prepost != 0) { + updatebits |= PTHRW_RW_HUNLOCK; + } + failed = ksyn_wakeupreaders(kwq, low_writer, 0, 1, updatebits, &woken); + if (failed != 0) { + kwq->kw_pre_intrcount = failed; /* actually a count */ + kwq->kw_pre_intrseq = kwq->kw_highseq; + kwq->kw_pre_intrretbits = updatebits; + kwq->kw_pre_intrtype = PTH_RW_TYPE_READ; + } + /* if we woken up no one and the current thread is returning, ensure it is doing unlock */ + if ((prepost != 0) && (woken ==0) && (block == 0)&& ((updatebits & PTHRW_RW_HUNLOCK) != 0)) { + uth = current_uthread(); + uth->uu_psynchretval = updatebits; + } + error = 0; + } + } else { +yielditis: + /* no reads, so granting yeilding writes */ + updatebits |= PTHRW_EBIT; + + if (((flags & KW_UNLOCK_PREPOST_YWRLOCK) != 0) && (low_writer == premgen)) { + /* preposting yielding write thread is being granted exclusive lock */ + + block = 0; + + if ((rwtype & PTH_RWSHFT_TYPE_WRITE) != 0) + updatebits |= PTHRW_WBIT; + else if (kwq->kw_ksynqueues[KSYN_QUEUE_YWRITER].ksynq_count != 0) + updatebits |= PTHRW_YBIT; + + th = preth; + uth = get_bsdthread_info(th); + uth->uu_psynchretval = updatebits; + } else { + /* we are granting yield writelock to some other thread */ + uth = ksyn_queue_removefirst(&kwq->kw_ksynqueues[KSYN_QUEUE_YWRITER], kwq); + + if ((rwtype & PTH_RWSHFT_TYPE_WRITE) != 0) + updatebits |= PTHRW_WBIT; + /* if there are ywriters present or the preposting ywrite thread then W bit is to be set */ + else if ((kwq->kw_ksynqueues[KSYN_QUEUE_YWRITER].ksynq_count != 0) || ((flags & KW_UNLOCK_PREPOST_YWRLOCK) != 0) ) + updatebits |= PTHRW_YBIT; + + uth->uu_psynchretval = updatebits; + uth->uu_kwqqueue = NULL; + + kret = ksyn_wakeup_thread(kwq, uth); + if ((kret != KERN_SUCCESS) && (kret != KERN_NOT_WAITING)) + panic("kwq_handle_unlock : panic waking up readers\n"); + if (kret == KERN_NOT_WAITING) { + kwq->kw_pre_intrcount = 1; /* actually a count */ + kwq->kw_pre_intrseq = low_ywriter; + kwq->kw_pre_intrretbits = updatebits; + kwq->kw_pre_intrtype = PTH_RW_TYPE_YWRITE; + } + error = 0; + } + } + } + break; + + default: + panic("rwunlock: invalid type for lock grants"); + + }; + + if (updatep != NULL) + *updatep = updatebits; + +out: + if (blockp != NULL) + *blockp = block; +#if _PSYNCH_TRACE_ +#if defined(__i386__) + KERNEL_DEBUG_CONSTANT(_PSYNCH_TRACE_RWHANDLEU | DBG_FUNC_END, (uint32_t)kwq, 0, 0, block, 0); +#endif +#endif /* _PSYNCH_TRACE_ */ + return(error); +} + + +/* handle downgrade actions */ +int +kwq_handle_downgrade(ksyn_wait_queue_t kwq, uint32_t mgen, __unused int flags, __unused uint32_t premgen, __unused int * blockp) +{ + uint32_t updatebits, lowriter = 0; + int longreadset, allreaders, count; + + /* can handle downgrade now */ + updatebits = mgen; + + longreadset = 0; + allreaders = 0; + if (kwq->kw_ksynqueues[KSYN_QUEUE_WRITER].ksynq_count > 0) { + lowriter = kwq->kw_ksynqueues[KSYN_QUEUE_WRITER].ksynq_firstnum; + if (kwq->kw_ksynqueues[KSYN_QUEUE_LREAD].ksynq_count > 0) { + if (is_seqlower(kwq->kw_ksynqueues[KSYN_QUEUE_LREAD].ksynq_firstnum, lowriter) != 0) + longreadset = 1; + } + } else { + allreaders = 1; + if (kwq->kw_ksynqueues[KSYN_QUEUE_YWRITER].ksynq_count > 0) { + lowriter = kwq->kw_ksynqueues[KSYN_QUEUE_YWRITER].ksynq_firstnum; + if (kwq->kw_ksynqueues[KSYN_QUEUE_LREAD].ksynq_count > 0) { + if (is_seqlower(kwq->kw_ksynqueues[KSYN_QUEUE_LREAD].ksynq_firstnum, lowriter) != 0) + longreadset = 1; + } + } + } + + count = ksyn_wakeupreaders(kwq, lowriter, longreadset, allreaders, updatebits, NULL); + if (count != 0) { + kwq->kw_pre_limrd = count; + kwq->kw_pre_limrdseq = lowriter; + kwq->kw_pre_limrdbits = lowriter; + /* need to handle prepost */ + } + return(0); +} +/************* Indiv queue support routines ************************/ +void +ksyn_queue_init(ksyn_queue_t kq) +{ + TAILQ_INIT(&kq->ksynq_uthlist); + kq->ksynq_count = 0; + kq->ksynq_firstnum = 0; + kq->ksynq_lastnum = 0; +} + + +int +ksyn_queue_insert(ksyn_wait_queue_t kwq, ksyn_queue_t kq, uint32_t mgen, struct uthread * uth, int fit) +{ + uint32_t lockseq = mgen & PTHRW_COUNT_MASK; + struct uthread * q_uth, * r_uth; + + if (kq->ksynq_count == 0) { + TAILQ_INSERT_HEAD(&kq->ksynq_uthlist, uth, uu_mtxlist); + kq->ksynq_firstnum = lockseq; + kq->ksynq_lastnum = lockseq; + goto out; + } + + if (fit == FIRSTFIT) { + /* firstfit, arriving order */ + TAILQ_INSERT_TAIL(&kq->ksynq_uthlist, uth, uu_mtxlist); + if (is_seqlower (lockseq, kq->ksynq_firstnum) != 0) + kq->ksynq_firstnum = lockseq; + if (is_seqhigher (lockseq, kq->ksynq_lastnum) != 0) + kq->ksynq_lastnum = lockseq; + goto out; + } + + if ((lockseq == kq->ksynq_firstnum) || (lockseq == kq->ksynq_lastnum)) + panic("ksyn_queue_insert: two threads with same lockseq "); + + /* check for next seq one */ + if (is_seqlower(kq->ksynq_lastnum, lockseq) != 0) { + TAILQ_INSERT_TAIL(&kq->ksynq_uthlist, uth, uu_mtxlist); + kq->ksynq_lastnum = lockseq; + goto out; + } + + if (is_seqlower(lockseq, kq->ksynq_firstnum) != 0) { + TAILQ_INSERT_HEAD(&kq->ksynq_uthlist, uth, uu_mtxlist); + kq->ksynq_firstnum = lockseq; + goto out; + } + + /* goto slow insert mode */ + TAILQ_FOREACH_SAFE(q_uth, &kq->ksynq_uthlist, uu_mtxlist, r_uth) { + if (is_seqhigher(q_uth->uu_lockseq, lockseq) != 0) { + TAILQ_INSERT_BEFORE(q_uth, uth, uu_mtxlist); + goto out; + } + } + + panic("failed to insert \n"); +out: + kq->ksynq_count++; + kwq->kw_inqueue++; + update_low_high(kwq, lockseq); + return(0); +} + +struct uthread * +ksyn_queue_removefirst(ksyn_queue_t kq, ksyn_wait_queue_t kwq) +{ + uthread_t uth = NULL; + uthread_t q_uth; + uint32_t curseq; + + if (kq->ksynq_count != 0) { + uth = TAILQ_FIRST(&kq->ksynq_uthlist); + TAILQ_REMOVE(&kq->ksynq_uthlist, uth, uu_mtxlist); + curseq = uth->uu_lockseq & PTHRW_COUNT_MASK; + kq->ksynq_count--; + kwq->kw_inqueue--; + + if(kq->ksynq_count != 0) { + q_uth = TAILQ_FIRST(&kq->ksynq_uthlist); + kq->ksynq_firstnum = (q_uth->uu_lockseq & PTHRW_COUNT_MASK); + } else { + kq->ksynq_firstnum = 0; + kq->ksynq_lastnum = 0; + + } + if (kwq->kw_inqueue == 0) { + kwq->kw_lowseq = 0; + kwq->kw_highseq = 0; + } else { + if (kwq->kw_lowseq == curseq) + kwq->kw_lowseq = find_nextlowseq(kwq); + if (kwq->kw_highseq == curseq) + kwq->kw_highseq = find_nexthighseq(kwq); + } + } + return(uth); +} + +void +ksyn_queue_removeitem(ksyn_wait_queue_t kwq, ksyn_queue_t kq, uthread_t uth) +{ + uthread_t q_uth; + uint32_t curseq; + + if (kq->ksynq_count > 0) { + TAILQ_REMOVE(&kq->ksynq_uthlist, uth, uu_mtxlist); + kq->ksynq_count--; + if(kq->ksynq_count != 0) { + q_uth = TAILQ_FIRST(&kq->ksynq_uthlist); + kq->ksynq_firstnum = (q_uth->uu_lockseq & PTHRW_COUNT_MASK); + } else { + kq->ksynq_firstnum = 0; + kq->ksynq_lastnum = 0; + + } + kwq->kw_inqueue--; + curseq = uth->uu_lockseq & PTHRW_COUNT_MASK; + if (kwq->kw_inqueue == 0) { + kwq->kw_lowseq = 0; + kwq->kw_highseq = 0; + } else { + if (kwq->kw_lowseq == curseq) + kwq->kw_lowseq = find_nextlowseq(kwq); + if (kwq->kw_highseq == curseq) + kwq->kw_highseq = find_nexthighseq(kwq); + } + } +} + + +void +update_low_high(ksyn_wait_queue_t kwq, uint32_t lockseq) +{ + if (kwq->kw_inqueue == 1) { + kwq->kw_lowseq = lockseq; + kwq->kw_highseq = lockseq; + } else { + if (is_seqlower(lockseq, kwq->kw_lowseq) != 0) + kwq->kw_lowseq = lockseq; + if (is_seqhigher(lockseq, kwq->kw_highseq) != 0) + kwq->kw_highseq = lockseq; + } +} + +uint32_t +find_nextlowseq(ksyn_wait_queue_t kwq) +{ + uint32_t numbers[4]; + int count = 0, i; + uint32_t lowest; + + for(i = 0; i< KSYN_QUEUE_MAX; i++) { + if (kwq->kw_ksynqueues[i].ksynq_count != 0) { + numbers[count]= kwq->kw_ksynqueues[i].ksynq_firstnum; + count++; + } + } + + if (count == 0) + return(0); + lowest = numbers[0]; + if (count > 1) { + for (i = 1; i< count; i++) { + if(is_seqlower(numbers[i] , lowest) != 0) + lowest = numbers[count]; + + } + } + return(lowest); +} + +uint32_t +find_nexthighseq(ksyn_wait_queue_t kwq) +{ + uint32_t numbers[4]; + int count = 0, i; + uint32_t highest; + + for(i = 0; i< KSYN_QUEUE_MAX; i++) { + if (kwq->kw_ksynqueues[i].ksynq_count != 0) { + numbers[count]= kwq->kw_ksynqueues[i].ksynq_lastnum; + count++; + } + } + + + + if (count == 0) + return(0); + highest = numbers[0]; + if (count > 1) { + for (i = 1; i< count; i++) { + if(is_seqhigher(numbers[i], highest) != 0) + highest = numbers[i]; + + } + } + return(highest); +} + +int +find_diff(uint32_t upto, uint32_t lowest) +{ + uint32_t diff; + + if (upto == lowest) + return(0); + diff = diff_genseq(upto, lowest); + diff = (diff >> PTHRW_COUNT_SHIFT); + return(diff); +} + + +int +find_seq_till(ksyn_wait_queue_t kwq, uint32_t upto, uint32_t nwaiters, uint32_t *countp) +{ + int i; + uint32_t count = 0; + + +#if _PSYNCH_TRACE_ + KERNEL_DEBUG_CONSTANT(_PSYNCH_TRACE_FSEQTILL | DBG_FUNC_START, 0, 0, upto, nwaiters, 0); +#endif /* _PSYNCH_TRACE_ */ + + for (i= 0; i< KSYN_QUEUE_MAX; i++) { + count += ksyn_queue_count_tolowest(&kwq->kw_ksynqueues[i], upto); +#if _PSYNCH_TRACE_ + KERNEL_DEBUG_CONSTANT(_PSYNCH_TRACE_FSEQTILL | DBG_FUNC_NONE, 0, 1, i, count, 0); +#endif /* _PSYNCH_TRACE_ */ + if (count >= nwaiters) { + break; + } + } + + if (countp != NULL) { + *countp = count; + } +#if _PSYNCH_TRACE_ + KERNEL_DEBUG_CONSTANT(_PSYNCH_TRACE_FSEQTILL | DBG_FUNC_END, 0, 0, count, nwaiters, 0); +#endif /* _PSYNCH_TRACE_ */ + if (count >= nwaiters) + return(1); + else + return(0); +} + + +uint32_t +ksyn_queue_count_tolowest(ksyn_queue_t kq, uint32_t upto) +{ + uint32_t i = 0; + uthread_t uth, newuth; + uint32_t curval; + + /* if nothing or the first num is greater than upto, return none */ + if ((kq->ksynq_count == 0) || (is_seqhigher(kq->ksynq_firstnum, upto) != 0)) + return(0); + if (upto == kq->ksynq_firstnum) + return(1); + + TAILQ_FOREACH_SAFE(uth, &kq->ksynq_uthlist, uu_mtxlist, newuth) { + curval = (uth->uu_lockseq & PTHRW_COUNT_MASK); + if (upto == curval) { + i++; + break; + } else if (is_seqhigher(curval, upto) != 0) { + break; + } else { + /* seq is lower */ + i++; + } + } + return(i); +} + +/* find the thread and removes from the queue */ +uthread_t +ksyn_queue_find_seq(ksyn_wait_queue_t kwq, ksyn_queue_t kq, uint32_t seq) +{ + uthread_t q_uth, r_uth; + /* case where wrap in the tail of the queue exists */ + TAILQ_FOREACH_SAFE(q_uth, &kq->ksynq_uthlist, uu_mtxlist, r_uth) { + if (q_uth->uu_lockseq == seq) { + ksyn_queue_removeitem(kwq, kq, q_uth); + return(q_uth); + } + } + return(NULL); } +#endif /* PSYNCH */ diff --git a/bsd/kern/pthread_synch.c b/bsd/kern/pthread_synch.c index be4e9c165..8c58b3ece 100644 --- a/bsd/kern/pthread_synch.c +++ b/bsd/kern/pthread_synch.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2007 Apple Inc. All rights reserved. + * Copyright (c) 2000-2009 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -64,6 +64,7 @@ #include #include #include /* for coredump */ +#include /* for fill_procworkqueue */ #include @@ -81,6 +82,7 @@ #include /* for thread_exception_return */ #include #include +#include #include #include #include @@ -88,7 +90,7 @@ #include #include #include -#include ` /* for current_map() */ +#include /* for current_map() */ #include /* for thread_resume */ #include #if defined(__i386__) @@ -116,8 +118,6 @@ lck_grp_attr_t *pthread_lck_grp_attr; lck_grp_t *pthread_lck_grp; lck_attr_t *pthread_lck_attr; -lck_mtx_t * pthread_list_mlock; -extern void pthread_init(void); extern kern_return_t thread_getstatus(register thread_t act, int flavor, thread_state_t tstate, mach_msg_type_number_t *count); @@ -127,17 +127,26 @@ extern void thread_set_cthreadself(thread_t thread, uint64_t pself, int isLP64); extern kern_return_t mach_port_deallocate(ipc_space_t, mach_port_name_t); extern kern_return_t semaphore_signal_internal_trap(mach_port_name_t); -static int workqueue_additem(struct workqueue *wq, int prio, user_addr_t item); +extern void workqueue_thread_yielded(void); + +static int workqueue_additem(struct workqueue *wq, int prio, user_addr_t item, int affinity); static int workqueue_removeitem(struct workqueue *wq, int prio, user_addr_t item); -static void workqueue_run_nextitem(proc_t p, thread_t th); +static boolean_t workqueue_run_nextitem(proc_t p, struct workqueue *wq, thread_t th, + user_addr_t oc_item, int oc_prio, int oc_affinity); static void wq_runitem(proc_t p, user_addr_t item, thread_t th, struct threadlist *tl, int reuse_thread, int wake_thread, int return_directly); +static void wq_unpark_continue(void); static int setup_wqthread(proc_t p, thread_t th, user_addr_t item, int reuse_thread, struct threadlist *tl); -static int workqueue_addnewthread(struct workqueue *wq); -static void workqueue_removethread(struct workqueue *wq); -static void workqueue_lock(proc_t); +static boolean_t workqueue_addnewthread(struct workqueue *wq); +static void workqueue_removethread(struct threadlist *tl); static void workqueue_lock_spin(proc_t); static void workqueue_unlock(proc_t); +int proc_settargetconc(pid_t pid, int queuenum, int32_t targetconc); +int proc_setalltargetconc(pid_t pid, int32_t * targetconcp); + +#define WQ_MAXPRI_MIN 0 /* low prio queue num */ +#define WQ_MAXPRI_MAX 2 /* max prio queuenum */ +#define WQ_PRI_NUM 3 /* number of prio work queues */ #define C_32_STK_ALIGN 16 #define C_64_STK_ALIGN 16 @@ -166,620 +175,8 @@ void _pthread_start(pthread_t self, mach_port_t kport, void *(*fun)(void *), voi #define SCHED_FIFO POLICY_FIFO #define SCHED_RR POLICY_RR -void -pthread_init(void) -{ - - pthread_lck_grp_attr = lck_grp_attr_alloc_init(); - pthread_lck_grp = lck_grp_alloc_init("pthread", pthread_lck_grp_attr); - - /* - * allocate the lock attribute for pthread synchronizers - */ - pthread_lck_attr = lck_attr_alloc_init(); - - pthread_list_mlock = lck_mtx_alloc_init(pthread_lck_grp, pthread_lck_attr); - -} - -void -pthread_list_lock(void) -{ - lck_mtx_lock(pthread_list_mlock); -} - -void -pthread_list_unlock(void) -{ - lck_mtx_unlock(pthread_list_mlock); -} - - -int -__pthread_mutex_destroy(__unused struct proc *p, struct __pthread_mutex_destroy_args *uap, __unused register_t *retval) -{ - int res; - int mutexid = uap->mutexid; - pthread_mutex_t * mutex; - lck_mtx_t * lmtx; - lck_mtx_t * lmtx1; - - - mutex = pthread_id_to_mutex(mutexid); - if (mutex == 0) - return(EINVAL); - - MTX_LOCK(mutex->lock); - if (mutex->sig == _PTHREAD_KERN_MUTEX_SIG) - { - if (mutex->owner == (thread_t)NULL && - mutex->refcount == 1) - { - mutex->sig = _PTHREAD_NO_SIG; - lmtx = mutex->mutex; - lmtx1 = mutex->lock; - mutex->mutex = NULL; - pthread_id_mutex_remove(mutexid); - mutex->refcount --; - MTX_UNLOCK(mutex->lock); - lck_mtx_free(lmtx, pthread_lck_grp); - lck_mtx_free(lmtx1, pthread_lck_grp); - kfree((void *)mutex, sizeof(struct _pthread_mutex)); - return(0); - } - else - res = EBUSY; - } - else - res = EINVAL; - MTX_UNLOCK(mutex->lock); - pthread_mutex_release(mutex); - return (res); -} - -/* - * Initialize a mutex variable, possibly with additional attributes. - */ -static void -pthread_mutex_init_internal(pthread_mutex_t *mutex, const pthread_mutexattr_t *attr) -{ - mutex->prioceiling = attr->prioceiling; - mutex->protocol = attr->protocol; - mutex->type = attr->type; - mutex->pshared = attr->pshared; - mutex->refcount = 0; - mutex->owner = (thread_t)NULL; - mutex->owner_proc = current_proc(); - mutex->sig = _PTHREAD_KERN_MUTEX_SIG; - mutex->lock = lck_mtx_alloc_init(pthread_lck_grp, pthread_lck_attr); - mutex->mutex = lck_mtx_alloc_init(pthread_lck_grp, pthread_lck_attr); -} - -/* - * Initialize a mutex variable, possibly with additional attributes. - * Public interface - so don't trust the lock - initialize it first. - */ -int -__pthread_mutex_init(__unused struct proc *p, struct __pthread_mutex_init_args *uap, __unused register_t *retval) -{ - user_addr_t umutex = uap->mutex; - pthread_mutex_t * mutex; - user_addr_t uattr = uap->attr; - pthread_mutexattr_t attr; - unsigned int addr = (unsigned int)((uintptr_t)uap->mutex); - int pmutex_sig; - int mutexid; - int error = 0; - - if ((umutex == 0) || (uattr == 0)) - return(EINVAL); - - if ((error = copyin(uattr, &attr, sizeof(pthread_mutexattr_t)))) - return(error); - - if (attr.sig != _PTHREAD_MUTEX_ATTR_SIG) - return (EINVAL); - - if ((error = copyin(umutex, &pmutex_sig, sizeof(int)))) - return(error); - - if (pmutex_sig == _PTHREAD_KERN_MUTEX_SIG) - return(EBUSY); - mutex = (pthread_mutex_t *)kalloc(sizeof(pthread_mutex_t)); - - pthread_mutex_init_internal(mutex, &attr); - - - addr += 8; - mutexid = pthread_id_mutex_add(mutex); - if (mutexid) { - if ((error = copyout(&mutexid, ((user_addr_t)((uintptr_t)(addr))), 4))) - goto cleanup; - return(0); - } else - error = ENOMEM; -cleanup: - if(mutexid) - pthread_id_mutex_remove(mutexid); - lck_mtx_free(mutex->lock, pthread_lck_grp); - lck_mtx_free(mutex->mutex, pthread_lck_grp); - kfree(mutex, sizeof(struct _pthread_mutex)); - return(error); -} - -/* - * Lock a mutex. - * TODO: Priority inheritance stuff - */ -int -__pthread_mutex_lock(struct proc *p, struct __pthread_mutex_lock_args *uap, __unused register_t *retval) -{ - int mutexid = uap->mutexid; - pthread_mutex_t * mutex; - int error; - - mutex = pthread_id_to_mutex(mutexid); - if (mutex == 0) - return(EINVAL); - - MTX_LOCK(mutex->lock); - - if (mutex->sig != _PTHREAD_KERN_MUTEX_SIG) - { - error = EINVAL; - goto out; - } - - if ((p != mutex->owner_proc) && (mutex->pshared != PTHREAD_PROCESS_SHARED)) { - error = EINVAL; - goto out; - } - - MTX_UNLOCK(mutex->lock); - - lck_mtx_lock(mutex->mutex); - - MTX_LOCK(mutex->lock); - mutex->owner = current_thread(); - error = 0; -out: - MTX_UNLOCK(mutex->lock); - pthread_mutex_release(mutex); - return (error); -} - -/* - * Attempt to lock a mutex, but don't block if this isn't possible. - */ -int -__pthread_mutex_trylock(struct proc *p, struct __pthread_mutex_trylock_args *uap, __unused register_t *retval) -{ - int mutexid = uap->mutexid; - pthread_mutex_t * mutex; - boolean_t state; - int error; - - mutex = pthread_id_to_mutex(mutexid); - if (mutex == 0) - return(EINVAL); - - MTX_LOCK(mutex->lock); - - if (mutex->sig != _PTHREAD_KERN_MUTEX_SIG) - { - error = EINVAL; - goto out; - } - - if ((p != mutex->owner_proc) && (mutex->pshared != PTHREAD_PROCESS_SHARED)) { - error = EINVAL; - goto out; - } - - MTX_UNLOCK(mutex->lock); - - state = lck_mtx_try_lock(mutex->mutex); - if (state) { - MTX_LOCK(mutex->lock); - mutex->owner = current_thread(); - MTX_UNLOCK(mutex->lock); - error = 0; - } else - error = EBUSY; - - pthread_mutex_release(mutex); - return (error); -out: - MTX_UNLOCK(mutex->lock); - pthread_mutex_release(mutex); - return (error); -} - -/* - * Unlock a mutex. - * TODO: Priority inheritance stuff - */ -int -__pthread_mutex_unlock(struct proc *p, struct __pthread_mutex_unlock_args *uap, __unused register_t *retval) -{ - int mutexid = uap->mutexid; - pthread_mutex_t * mutex; - int error; - - mutex = pthread_id_to_mutex(mutexid); - if (mutex == 0) - return(EINVAL); - - MTX_LOCK(mutex->lock); - - if (mutex->sig != _PTHREAD_KERN_MUTEX_SIG) - { - error = EINVAL; - goto out; - } - - if ((p != mutex->owner_proc) && (mutex->pshared != PTHREAD_PROCESS_SHARED)) { - error = EINVAL; - goto out; - } - - MTX_UNLOCK(mutex->lock); - - lck_mtx_unlock(mutex->mutex); - - MTX_LOCK(mutex->lock); - mutex->owner = NULL; - error = 0; -out: - MTX_UNLOCK(mutex->lock); - pthread_mutex_release(mutex); - return (error); -} - - -int -__pthread_cond_init(__unused struct proc *p, struct __pthread_cond_init_args *uap, __unused register_t *retval) -{ - pthread_cond_t * cond; - pthread_condattr_t attr; - user_addr_t ucond = uap->cond; - user_addr_t uattr = uap->attr; - unsigned int addr = (unsigned int)((uintptr_t)uap->cond); - int condid, error, cond_sig; - semaphore_t sem; - kern_return_t kret; - int value = 0; - - if ((ucond == 0) || (uattr == 0)) - return(EINVAL); - - if ((error = copyin(uattr, &attr, sizeof(pthread_condattr_t)))) - return(error); - - if (attr.sig != _PTHREAD_COND_ATTR_SIG) - return (EINVAL); - - if ((error = copyin(ucond, &cond_sig, sizeof(int)))) - return(error); - - if (cond_sig == _PTHREAD_KERN_COND_SIG) - return(EBUSY); - kret = semaphore_create(kernel_task, &sem, SYNC_POLICY_FIFO, value); - if (kret != KERN_SUCCESS) - return(ENOMEM); - - cond = (pthread_cond_t *)kalloc(sizeof(pthread_cond_t)); - - cond->lock = lck_mtx_alloc_init(pthread_lck_grp, pthread_lck_attr); - cond->pshared = attr.pshared; - cond->sig = _PTHREAD_KERN_COND_SIG; - cond->sigpending = 0; - cond->waiters = 0; - cond->refcount = 0; - cond->mutex = (pthread_mutex_t *)0; - cond->owner_proc = current_proc(); - cond->sem = sem; - - addr += 8; - condid = pthread_id_cond_add(cond); - if (condid) { - if ((error = copyout(&condid, ((user_addr_t)((uintptr_t)(addr))), 4))) - goto cleanup; - return(0); - } else - error = ENOMEM; -cleanup: - if(condid) - pthread_id_cond_remove(condid); - semaphore_destroy(kernel_task, cond->sem); - kfree(cond, sizeof(pthread_cond_t)); - return(error); -} - - -/* - * Destroy a condition variable. - */ -int -__pthread_cond_destroy(__unused struct proc *p, struct __pthread_cond_destroy_args *uap, __unused register_t *retval) -{ - pthread_cond_t *cond; - int condid = uap->condid; - semaphore_t sem; - lck_mtx_t * lmtx; - int res; - - cond = pthread_id_to_cond(condid); - if (cond == 0) - return(EINVAL); - - COND_LOCK(cond->lock); - if (cond->sig == _PTHREAD_KERN_COND_SIG) - { - if (cond->refcount == 1) - { - cond->sig = _PTHREAD_NO_SIG; - sem = cond->sem; - cond->sem = NULL; - lmtx = cond->lock; - pthread_id_cond_remove(condid); - cond->refcount --; - COND_UNLOCK(cond->lock); - lck_mtx_free(lmtx, pthread_lck_grp); - (void)semaphore_destroy(kernel_task, sem); - kfree((void *)cond, sizeof(pthread_cond_t)); - return(0); - } - else - res = EBUSY; - } - else - res = EINVAL; - COND_UNLOCK(cond->lock); - pthread_cond_release(cond); - return (res); -} - - -/* - * Signal a condition variable, waking up all threads waiting for it. - */ -int -__pthread_cond_broadcast(__unused struct proc *p, struct __pthread_cond_broadcast_args *uap, __unused register_t *retval) -{ - int condid = uap->condid; - pthread_cond_t * cond; - int error; - kern_return_t kret; - - cond = pthread_id_to_cond(condid); - if (cond == 0) - return(EINVAL); - - COND_LOCK(cond->lock); - - if (cond->sig != _PTHREAD_KERN_COND_SIG) - { - error = EINVAL; - goto out; - } - - if ((p != cond->owner_proc) && (cond->pshared != PTHREAD_PROCESS_SHARED)) { - error = EINVAL; - goto out; - } - - COND_UNLOCK(cond->lock); - - kret = semaphore_signal_all(cond->sem); - switch (kret) { - case KERN_INVALID_ADDRESS: - case KERN_PROTECTION_FAILURE: - error = EINVAL; - break; - case KERN_ABORTED: - case KERN_OPERATION_TIMED_OUT: - error = EINTR; - break; - case KERN_SUCCESS: - error = 0; - break; - default: - error = EINVAL; - break; - } - - COND_LOCK(cond->lock); -out: - COND_UNLOCK(cond->lock); - pthread_cond_release(cond); - return (error); -} -/* - * Signal a condition variable, waking only one thread. - */ -int -__pthread_cond_signal(__unused struct proc *p, struct __pthread_cond_signal_args *uap, __unused register_t *retval) -{ - int condid = uap->condid; - pthread_cond_t * cond; - int error; - kern_return_t kret; - - cond = pthread_id_to_cond(condid); - if (cond == 0) - return(EINVAL); - - COND_LOCK(cond->lock); - - if (cond->sig != _PTHREAD_KERN_COND_SIG) - { - error = EINVAL; - goto out; - } - - if ((p != cond->owner_proc) && (cond->pshared != PTHREAD_PROCESS_SHARED)) { - error = EINVAL; - goto out; - } - - COND_UNLOCK(cond->lock); - - kret = semaphore_signal(cond->sem); - switch (kret) { - case KERN_INVALID_ADDRESS: - case KERN_PROTECTION_FAILURE: - error = EINVAL; - break; - case KERN_ABORTED: - case KERN_OPERATION_TIMED_OUT: - error = EINTR; - break; - case KERN_SUCCESS: - error = 0; - break; - default: - error = EINVAL; - break; - } - - COND_LOCK(cond->lock); -out: - COND_UNLOCK(cond->lock); - pthread_cond_release(cond); - return (error); -} - - -int -__pthread_cond_wait(__unused struct proc *p, struct __pthread_cond_wait_args *uap, __unused register_t *retval) -{ - int condid = uap->condid; - pthread_cond_t * cond; - int mutexid = uap->mutexid; - pthread_mutex_t * mutex; - int error; - kern_return_t kret; - - cond = pthread_id_to_cond(condid); - if (cond == 0) - return(EINVAL); - - mutex = pthread_id_to_mutex(mutexid); - if (mutex == 0) { - pthread_cond_release(cond); - return(EINVAL); - } - COND_LOCK(cond->lock); - - if (cond->sig != _PTHREAD_KERN_COND_SIG) - { - error = EINVAL; - goto out; - } - - if ((p != cond->owner_proc) && (cond->pshared != PTHREAD_PROCESS_SHARED)) { - error = EINVAL; - goto out; - } - - COND_UNLOCK(cond->lock); - - kret = semaphore_wait(cond->sem); - switch (kret) { - case KERN_INVALID_ADDRESS: - case KERN_PROTECTION_FAILURE: - error = EACCES; - break; - case KERN_ABORTED: - case KERN_OPERATION_TIMED_OUT: - error = EINTR; - break; - case KERN_SUCCESS: - error = 0; - break; - default: - error = EINVAL; - break; - } - - COND_LOCK(cond->lock); -out: - COND_UNLOCK(cond->lock); - pthread_cond_release(cond); - pthread_mutex_release(mutex); - return (error); -} - -int -__pthread_cond_timedwait(__unused struct proc *p, struct __pthread_cond_timedwait_args *uap, __unused register_t *retval) -{ - int condid = uap->condid; - pthread_cond_t * cond; - int mutexid = uap->mutexid; - pthread_mutex_t * mutex; - mach_timespec_t absts; - int error; - kern_return_t kret; - - absts.tv_sec = 0; - absts.tv_nsec = 0; - - if (uap->abstime) - if ((error = copyin(uap->abstime, &absts, sizeof(mach_timespec_t )))) - return(error); - cond = pthread_id_to_cond(condid); - if (cond == 0) - return(EINVAL); - - mutex = pthread_id_to_mutex(mutexid); - if (mutex == 0) { - pthread_cond_release(cond); - return(EINVAL); - } - COND_LOCK(cond->lock); - - if (cond->sig != _PTHREAD_KERN_COND_SIG) - { - error = EINVAL; - goto out; - } - - if ((p != cond->owner_proc) && (cond->pshared != PTHREAD_PROCESS_SHARED)) { - error = EINVAL; - goto out; - } - - COND_UNLOCK(cond->lock); - - kret = semaphore_timedwait(cond->sem, absts); - switch (kret) { - case KERN_INVALID_ADDRESS: - case KERN_PROTECTION_FAILURE: - error = EACCES; - break; - case KERN_ABORTED: - case KERN_OPERATION_TIMED_OUT: - error = EINTR; - break; - case KERN_SUCCESS: - error = 0; - break; - default: - error = EINVAL; - break; - } - - COND_LOCK(cond->lock); -out: - COND_UNLOCK(cond->lock); - pthread_cond_release(cond); - pthread_mutex_release(mutex); - return (error); -} - int bsdthread_create(__unused struct proc *p, struct bsdthread_create_args *uap, user_addr_t *retval) { @@ -794,7 +191,7 @@ bsdthread_create(__unused struct proc *p, struct bsdthread_create_args *uap, us mach_vm_offset_t th_stackaddr; mach_vm_offset_t th_stack; mach_vm_offset_t th_pthread; - mach_port_t th_thport; + mach_port_name_t th_thport; thread_t th; user_addr_t user_func = uap->func; user_addr_t user_funcarg = uap->func_arg; @@ -808,6 +205,8 @@ bsdthread_create(__unused struct proc *p, struct bsdthread_create_args *uap, us int isLP64 = 0; + if ((p->p_lflag & P_LREGISTER) == 0) + return(EINVAL); #if 0 KERNEL_DEBUG_CONSTANT(0x9000080 | DBG_FUNC_START, flags, 0, 0, 0, 0); #endif @@ -817,7 +216,7 @@ bsdthread_create(__unused struct proc *p, struct bsdthread_create_args *uap, us #if defined(__ppc__) stackaddr = 0xF0000000; -#elif defined(__i386__) +#elif defined(__i386__) || defined(__x86_64__) stackaddr = 0xB0000000; #else #error Need to define a stack address hint for this architecture @@ -828,7 +227,7 @@ bsdthread_create(__unused struct proc *p, struct bsdthread_create_args *uap, us thread_reference(th); sright = (void *) convert_thread_to_port(th); - th_thport = (void *)ipc_port_copyout_send(sright, get_task_ipcspace(ctask)); + th_thport = ipc_port_copyout_send(sright, get_task_ipcspace(ctask)); if ((flags & PTHREAD_START_CUSTOM) == 0) { th_stacksize = (mach_vm_size_t)user_stack; /* if it is custom them it is stacksize */ @@ -889,7 +288,7 @@ bsdthread_create(__unused struct proc *p, struct bsdthread_create_args *uap, us ts64->srr0 = (uint64_t)p->p_threadstart; ts64->r1 = (uint64_t)(th_stack - C_ARGSAVE_LEN - C_RED_ZONE); ts64->r3 = (uint64_t)th_pthread; - ts64->r4 = (uint64_t)((unsigned int)th_thport); + ts64->r4 = (uint64_t)(th_thport); ts64->r5 = (uint64_t)user_func; ts64->r6 = (uint64_t)user_funcarg; ts64->r7 = (uint64_t)user_stacksize; @@ -899,7 +298,7 @@ bsdthread_create(__unused struct proc *p, struct bsdthread_create_args *uap, us thread_set_cthreadself(th, (uint64_t)th_pthread, isLP64); } -#elif defined(__i386__) +#elif defined(__i386__) || defined(__x86_64__) { /* * Set up i386 registers & function call. @@ -928,7 +327,7 @@ bsdthread_create(__unused struct proc *p, struct bsdthread_create_args *uap, us ts64->rip = (uint64_t)p->p_threadstart; ts64->rdi = (uint64_t)th_pthread; - ts64->rsi = (uint64_t)((unsigned int)(th_thport)); + ts64->rsi = (uint64_t)(th_thport); ts64->rdx = (uint64_t)user_func; ts64->rcx = (uint64_t)user_funcarg; ts64->r8 = (uint64_t)user_stacksize; @@ -970,7 +369,7 @@ bsdthread_create(__unused struct proc *p, struct bsdthread_create_args *uap, us } thread_deallocate(th); /* drop the creator reference */ #if 0 - KERNEL_DEBUG_CONSTANT(0x9000080 |DBG_FUNC_END, error, (unsigned int)th_pthread, 0, 0, 0); + KERNEL_DEBUG_CONSTANT(0x9000080 |DBG_FUNC_END, error, th_pthread, 0, 0, 0); #endif *retval = th_pthread; @@ -980,14 +379,14 @@ bsdthread_create(__unused struct proc *p, struct bsdthread_create_args *uap, us if (allocated != 0) (void)mach_vm_deallocate(vmap, stackaddr, th_allocsize); out: - (void)mach_port_deallocate(get_task_ipcspace(ctask), (mach_port_name_t)th_thport); + (void)mach_port_deallocate(get_task_ipcspace(ctask), th_thport); (void)thread_terminate(th); (void)thread_deallocate(th); return(error); } int -bsdthread_terminate(__unused struct proc *p, struct bsdthread_terminate_args *uap, __unused register_t *retval) +bsdthread_terminate(__unused struct proc *p, struct bsdthread_terminate_args *uap, __unused int32_t *retval) { mach_vm_offset_t freeaddr; mach_vm_size_t freesize; @@ -999,7 +398,7 @@ bsdthread_terminate(__unused struct proc *p, struct bsdthread_terminate_args *u freesize = uap->freesize; #if 0 - KERNEL_DEBUG_CONSTANT(0x9000084 |DBG_FUNC_START, (unsigned int)freeaddr, (unsigned int)freesize, (unsigned int)kthport, 0xff, 0); + KERNEL_DEBUG_CONSTANT(0x9000084 |DBG_FUNC_START, freeaddr, freesize, kthport, 0xff, 0); #endif if ((freesize != (mach_vm_size_t)0) && (freeaddr != (mach_vm_offset_t)0)) { kret = mach_vm_deallocate(current_map(), freeaddr, freesize); @@ -1028,8 +427,11 @@ bsdthread_terminate(__unused struct proc *p, struct bsdthread_terminate_args *u int -bsdthread_register(struct proc *p, struct bsdthread_register_args *uap, __unused register_t *retval) +bsdthread_register(struct proc *p, struct bsdthread_register_args *uap, __unused int32_t *retval) { + /* prevent multiple registrations */ + if ((p->p_lflag & P_LREGISTER) != 0) + return(EINVAL); /* syscall randomizer test can pass bogus values */ if (uap->pthsize > MAX_PTHREAD_SIZE) { return(EINVAL); @@ -1037,18 +439,27 @@ bsdthread_register(struct proc *p, struct bsdthread_register_args *uap, __unuse p->p_threadstart = uap->threadstart; p->p_wqthread = uap->wqthread; p->p_pthsize = uap->pthsize; + p->p_targconc = uap->targetconc_ptr; + p->p_dispatchqueue_offset = uap->dispatchqueue_offset; + proc_setregister(p); return(0); } +uint32_t wq_yielded_threshold = WQ_YIELDED_THRESHOLD; +uint32_t wq_yielded_window_usecs = WQ_YIELDED_WINDOW_USECS; +uint32_t wq_stalled_window_usecs = WQ_STALLED_WINDOW_USECS; +uint32_t wq_reduce_pool_window_usecs = WQ_REDUCE_POOL_WINDOW_USECS; +uint32_t wq_max_timer_interval_usecs = WQ_MAX_TIMER_INTERVAL_USECS; +uint32_t wq_max_threads = WORKQUEUE_MAXTHREADS; -int wq_stalled_window_usecs = WQ_STALLED_WINDOW_USECS; -int wq_reduce_pool_window_usecs = WQ_REDUCE_POOL_WINDOW_USECS; -int wq_max_run_latency_usecs = WQ_MAX_RUN_LATENCY_USECS; -int wq_timer_interval_msecs = WQ_TIMER_INTERVAL_MSECS; +SYSCTL_INT(_kern, OID_AUTO, wq_yielded_threshold, CTLFLAG_RW, + &wq_yielded_threshold, 0, ""); +SYSCTL_INT(_kern, OID_AUTO, wq_yielded_window_usecs, CTLFLAG_RW, + &wq_yielded_window_usecs, 0, ""); SYSCTL_INT(_kern, OID_AUTO, wq_stalled_window_usecs, CTLFLAG_RW, &wq_stalled_window_usecs, 0, ""); @@ -1056,174 +467,389 @@ SYSCTL_INT(_kern, OID_AUTO, wq_stalled_window_usecs, CTLFLAG_RW, SYSCTL_INT(_kern, OID_AUTO, wq_reduce_pool_window_usecs, CTLFLAG_RW, &wq_reduce_pool_window_usecs, 0, ""); -SYSCTL_INT(_kern, OID_AUTO, wq_max_run_latency_usecs, CTLFLAG_RW, - &wq_max_run_latency_usecs, 0, ""); - -SYSCTL_INT(_kern, OID_AUTO, wq_timer_interval_msecs, CTLFLAG_RW, - &wq_timer_interval_msecs, 0, ""); - +SYSCTL_INT(_kern, OID_AUTO, wq_max_timer_interval_usecs, CTLFLAG_RW, + &wq_max_timer_interval_usecs, 0, ""); +SYSCTL_INT(_kern, OID_AUTO, wq_max_threads, CTLFLAG_RW, + &wq_max_threads, 0, ""); void workqueue_init_lock(proc_t p) { - lck_mtx_init(&p->p_wqlock, pthread_lck_grp, pthread_lck_attr); + lck_spin_init(&p->p_wqlock, pthread_lck_grp, pthread_lck_attr); + + p->p_wqiniting = FALSE; } void workqueue_destroy_lock(proc_t p) { - lck_mtx_destroy(&p->p_wqlock, pthread_lck_grp); -} - -static void -workqueue_lock(proc_t p) -{ - lck_mtx_lock(&p->p_wqlock); + lck_spin_destroy(&p->p_wqlock, pthread_lck_grp); } + static void workqueue_lock_spin(proc_t p) { - lck_mtx_lock_spin(&p->p_wqlock); + lck_spin_lock(&p->p_wqlock); } static void workqueue_unlock(proc_t p) { - lck_mtx_unlock(&p->p_wqlock); + lck_spin_unlock(&p->p_wqlock); } - static void -workqueue_interval_timer_start(thread_call_t call, int interval_in_ms) +workqueue_interval_timer_start(struct workqueue *wq) { uint64_t deadline; - clock_interval_to_deadline(interval_in_ms, 1000 * 1000, &deadline); + if (wq->wq_timer_interval == 0) + wq->wq_timer_interval = wq_stalled_window_usecs; + else { + wq->wq_timer_interval = wq->wq_timer_interval * 2; + + if (wq->wq_timer_interval > wq_max_timer_interval_usecs) + wq->wq_timer_interval = wq_max_timer_interval_usecs; + } + clock_interval_to_deadline(wq->wq_timer_interval, 1000, &deadline); + + thread_call_enter_delayed(wq->wq_atimer_call, deadline); + + KERNEL_DEBUG(0xefffd110, wq, wq->wq_itemcount, wq->wq_flags, wq->wq_timer_interval, 0); +} + + +static boolean_t +wq_thread_is_busy(uint64_t cur_ts, uint64_t *lastblocked_tsp) +{ clock_sec_t secs; + clock_usec_t usecs; + uint64_t lastblocked_ts; + uint64_t elapsed; + + /* + * the timestamp is updated atomically w/o holding the workqueue lock + * so we need to do an atomic read of the 64 bits so that we don't see + * a mismatched pair of 32 bit reads... we accomplish this in an architecturally + * independent fashion by using OSCompareAndSwap64 to write back the + * value we grabbed... if it succeeds, then we have a good timestamp to + * evaluate... if it fails, we straddled grabbing the timestamp while it + * was being updated... treat a failed update as a busy thread since + * it implies we are about to see a really fresh timestamp anyway + */ + lastblocked_ts = *lastblocked_tsp; + +#if defined(__ppc__) +#else + if ( !OSCompareAndSwap64((UInt64)lastblocked_ts, (UInt64)lastblocked_ts, lastblocked_tsp)) + return (TRUE); +#endif + if (lastblocked_ts >= cur_ts) { + /* + * because the update of the timestamp when a thread blocks isn't + * serialized against us looking at it (i.e. we don't hold the workq lock) + * it's possible to have a timestamp that matches the current time or + * that even looks to be in the future relative to when we grabbed the current + * time... just treat this as a busy thread since it must have just blocked. + */ + return (TRUE); + } + elapsed = cur_ts - lastblocked_ts; + + absolutetime_to_microtime(elapsed, &secs, &usecs); - thread_call_enter_delayed(call, deadline); + if (secs == 0 && usecs < wq_stalled_window_usecs) + return (TRUE); + return (FALSE); } +#define WQ_TIMER_NEEDED(wq, start_timer) do { \ + int oldflags = wq->wq_flags; \ + \ + if ( !(oldflags & (WQ_EXITING | WQ_ATIMER_RUNNING))) { \ + if (OSCompareAndSwap(oldflags, oldflags | WQ_ATIMER_RUNNING, (UInt32 *)&wq->wq_flags)) \ + start_timer = TRUE; \ + } \ +} while (0) + + + static void -workqueue_timer(struct workqueue *wq, __unused int param1) +workqueue_add_timer(struct workqueue *wq, __unused int param1) { - struct timeval tv, dtv; - uint32_t i; - boolean_t added_more_threads = FALSE; - boolean_t reset_maxactive = FALSE; - boolean_t restart_timer = FALSE; + proc_t p; + boolean_t start_timer = FALSE; + boolean_t retval; + boolean_t add_thread; + uint32_t busycount; - microuptime(&tv); + KERNEL_DEBUG(0xefffd108 | DBG_FUNC_START, wq, wq->wq_flags, wq->wq_nthreads, wq->wq_thidlecount, 0); + + p = wq->wq_proc; - KERNEL_DEBUG(0xefffd108, (int)wq, 0, 0, 0, 0); + workqueue_lock_spin(p); /* - * check to see if the stall frequency was beyond our tolerance - * or we have work on the queue, but haven't scheduled any - * new work within our acceptable time interval because - * there were no idle threads left to schedule + * because workqueue_callback now runs w/o taking the workqueue lock + * we are unsynchronized w/r to a change in state of the running threads... + * to make sure we always evaluate that change, we allow it to start up + * a new timer if the current one is actively evalutating the state + * however, we do not need more than 2 timers fired up (1 active and 1 pending) + * and we certainly do not want 2 active timers evaluating the state + * simultaneously... so use WQL_ATIMER_BUSY to serialize the timers... + * note that WQL_ATIMER_BUSY is in a different flag word from WQ_ATIMER_RUNNING since + * it is always protected by the workq lock... WQ_ATIMER_RUNNING is evaluated + * and set atomimcally since the callback function needs to manipulate it + * w/o holding the workq lock... * - * WQ_TIMER_WATCH will only be set if we have 1 or more affinity - * groups that have stalled (no active threads and no idle threads)... - * it will not be set if all affinity groups have at least 1 thread - * that is currently runnable... if all processors have a runnable - * thread, there is no need to add more threads even if we're not - * scheduling new work within our allowed window... it just means - * that the work items are taking a long time to complete. + * !WQ_ATIMER_RUNNING && !WQL_ATIMER_BUSY == no pending timer, no active timer + * !WQ_ATIMER_RUNNING && WQL_ATIMER_BUSY == no pending timer, 1 active timer + * WQ_ATIMER_RUNNING && !WQL_ATIMER_BUSY == 1 pending timer, no active timer + * WQ_ATIMER_RUNNING && WQL_ATIMER_BUSY == 1 pending timer, 1 active timer */ - if (wq->wq_flags & (WQ_ADD_TO_POOL | WQ_TIMER_WATCH)) { + while (wq->wq_lflags & WQL_ATIMER_BUSY) { + wq->wq_lflags |= WQL_ATIMER_WAITING; - if (wq->wq_flags & WQ_ADD_TO_POOL) - added_more_threads = TRUE; - else { - timersub(&tv, &wq->wq_lastran_ts, &dtv); + assert_wait((caddr_t)wq, (THREAD_UNINT)); + workqueue_unlock(p); - if (((dtv.tv_sec * 1000000) + dtv.tv_usec) > wq_stalled_window_usecs) - added_more_threads = TRUE; - } - if (added_more_threads == TRUE) { - for (i = 0; i < wq->wq_affinity_max && wq->wq_nthreads < WORKQUEUE_MAXTHREADS; i++) { - (void)workqueue_addnewthread(wq); - } - } - } - timersub(&tv, &wq->wq_reduce_ts, &dtv); + thread_block(THREAD_CONTINUE_NULL); - if (((dtv.tv_sec * 1000000) + dtv.tv_usec) > wq_reduce_pool_window_usecs) - reset_maxactive = TRUE; + workqueue_lock_spin(p); + } + wq->wq_lflags |= WQL_ATIMER_BUSY; /* - * if the pool size has grown beyond the minimum number - * of threads needed to keep all of the processors busy, and - * the maximum number of threads scheduled concurrently during - * the last sample period didn't exceed half the current pool - * size, then its time to trim the pool size back + * the workq lock will protect us from seeing WQ_EXITING change state, but we + * still need to update this atomically in case someone else tries to start + * the timer just as we're releasing it */ - if (added_more_threads == FALSE && - reset_maxactive == TRUE && - wq->wq_nthreads > wq->wq_affinity_max && - wq->wq_max_threads_scheduled <= (wq->wq_nthreads / 2)) { - uint32_t nthreads_to_remove; - - if ((nthreads_to_remove = (wq->wq_nthreads / 4)) == 0) - nthreads_to_remove = 1; + while ( !(OSCompareAndSwap(wq->wq_flags, (wq->wq_flags & ~WQ_ATIMER_RUNNING), (UInt32 *)&wq->wq_flags))); - for (i = 0; i < nthreads_to_remove && wq->wq_nthreads > wq->wq_affinity_max; i++) - workqueue_removethread(wq); - } - workqueue_lock_spin(wq->wq_proc); +again: + retval = TRUE; + add_thread = FALSE; + + if ( !(wq->wq_flags & WQ_EXITING)) { + /* + * check to see if the stall frequency was beyond our tolerance + * or we have work on the queue, but haven't scheduled any + * new work within our acceptable time interval because + * there were no idle threads left to schedule + */ + if (wq->wq_itemcount) { + uint32_t priority; + uint32_t affinity_tag; + uint32_t i; + uint64_t curtime; + + for (priority = 0; priority < WORKQUEUE_NUMPRIOS; priority++) { + if (wq->wq_list_bitmap & (1 << priority)) + break; + } + assert(priority < WORKQUEUE_NUMPRIOS); + + curtime = mach_absolute_time(); + busycount = 0; + + for (affinity_tag = 0; affinity_tag < wq->wq_reqconc[priority]; affinity_tag++) { + /* + * if we have no idle threads, we can try to add them if needed + */ + if (wq->wq_thidlecount == 0) + add_thread = TRUE; + + /* + * look for first affinity group that is currently not active + * i.e. no active threads at this priority level or higher + * and has not been active recently at this priority level or higher + */ + for (i = 0; i <= priority; i++) { + if (wq->wq_thactive_count[i][affinity_tag]) { + add_thread = FALSE; + break; + } + if (wq->wq_thscheduled_count[i][affinity_tag]) { + if (wq_thread_is_busy(curtime, &wq->wq_lastblocked_ts[i][affinity_tag])) { + add_thread = FALSE; + busycount++; + break; + } + } + } + if (add_thread == TRUE) { + retval = workqueue_addnewthread(wq); + break; + } + } + if (wq->wq_itemcount) { + /* + * as long as we have threads to schedule, and we successfully + * scheduled new work, keep trying + */ + while (wq->wq_thidlecount && !(wq->wq_flags & WQ_EXITING)) { + /* + * workqueue_run_nextitem is responsible for + * dropping the workqueue lock in all cases + */ + retval = workqueue_run_nextitem(p, wq, THREAD_NULL, 0, 0, 0); + workqueue_lock_spin(p); + + if (retval == FALSE) + break; + } + if ( !(wq->wq_flags & WQ_EXITING) && wq->wq_itemcount) { + + if (wq->wq_thidlecount == 0 && retval == TRUE && add_thread == TRUE) + goto again; - if (reset_maxactive == TRUE) { - wq->wq_max_threads_scheduled = 0; - microuptime(&wq->wq_reduce_ts); + if (wq->wq_thidlecount == 0 || busycount) + WQ_TIMER_NEEDED(wq, start_timer); + + KERNEL_DEBUG(0xefffd108 | DBG_FUNC_NONE, wq, wq->wq_itemcount, wq->wq_thidlecount, busycount, 0); + } + } + } } - if (added_more_threads) { - wq->wq_flags &= ~(WQ_ADD_TO_POOL | WQ_TIMER_WATCH); + if ( !(wq->wq_flags & WQ_ATIMER_RUNNING)) + wq->wq_timer_interval = 0; - /* - * since we added more threads, we should be - * able to run some work if its still available + wq->wq_lflags &= ~WQL_ATIMER_BUSY; + + if ((wq->wq_flags & WQ_EXITING) || (wq->wq_lflags & WQL_ATIMER_WAITING)) { + /* + * wakeup the thread hung up in workqueue_exit or workqueue_add_timer waiting for this timer + * to finish getting out of the way */ - workqueue_run_nextitem(wq->wq_proc, THREAD_NULL); - workqueue_lock_spin(wq->wq_proc); + wq->wq_lflags &= ~WQL_ATIMER_WAITING; + wakeup(wq); } - if ((wq->wq_nthreads > wq->wq_affinity_max) || - (wq->wq_flags & WQ_TIMER_WATCH)) { - restart_timer = TRUE; - } else - wq->wq_flags &= ~WQ_TIMER_RUNNING; + KERNEL_DEBUG(0xefffd108 | DBG_FUNC_END, wq, start_timer, wq->wq_nthreads, wq->wq_thidlecount, 0); - workqueue_unlock(wq->wq_proc); + workqueue_unlock(p); - /* - * we needed to knock down the WQ_TIMER_RUNNING flag while behind - * the workqueue lock... however, we don't want to hold the lock - * while restarting the timer and we certainly don't want 2 or more - * instances of the timer... so set a local to indicate the need - * for a restart since the state of wq_flags may change once we - * drop the workqueue lock... - */ - if (restart_timer == TRUE) - workqueue_interval_timer_start(wq->wq_timer_call, wq_timer_interval_msecs); + if (start_timer == TRUE) + workqueue_interval_timer_start(wq); } +void +workqueue_thread_yielded(void) +{ + struct workqueue *wq; + proc_t p; + + p = current_proc(); + + if ((wq = p->p_wqptr) == NULL || wq->wq_itemcount == 0) + return; + + workqueue_lock_spin(p); + + if (wq->wq_itemcount) { + uint64_t curtime; + uint64_t elapsed; + clock_sec_t secs; + clock_usec_t usecs; + + if (wq->wq_thread_yielded_count++ == 0) + wq->wq_thread_yielded_timestamp = mach_absolute_time(); + + if (wq->wq_thread_yielded_count < wq_yielded_threshold) { + workqueue_unlock(p); + return; + } + KERNEL_DEBUG(0xefffd138 | DBG_FUNC_START, wq, wq->wq_thread_yielded_count, wq->wq_itemcount, 0, 0); + + wq->wq_thread_yielded_count = 0; + + curtime = mach_absolute_time(); + elapsed = curtime - wq->wq_thread_yielded_timestamp; + absolutetime_to_microtime(elapsed, &secs, &usecs); + + if (secs == 0 && usecs < wq_yielded_window_usecs) { + + if (wq->wq_thidlecount == 0) { + workqueue_addnewthread(wq); + /* + * 'workqueue_addnewthread' drops the workqueue lock + * when creating the new thread and then retakes it before + * returning... this window allows other threads to process + * work on the queue, so we need to recheck for available work + * if none found, we just return... the newly created thread + * will eventually get used (if it hasn't already)... + */ + if (wq->wq_itemcount == 0) { + workqueue_unlock(p); + return; + } + } + if (wq->wq_thidlecount) { + uint32_t priority; + uint32_t affinity = -1; + user_addr_t item; + struct workitem *witem = NULL; + struct workitemlist *wl = NULL; + struct uthread *uth; + struct threadlist *tl; + + uth = get_bsdthread_info(current_thread()); + if ((tl = uth->uu_threadlist)) + affinity = tl->th_affinity_tag; + + for (priority = 0; priority < WORKQUEUE_NUMPRIOS; priority++) { + if (wq->wq_list_bitmap & (1 << priority)) { + wl = (struct workitemlist *)&wq->wq_list[priority]; + break; + } + } + assert(wl != NULL); + assert(!(TAILQ_EMPTY(&wl->wl_itemlist))); + + witem = TAILQ_FIRST(&wl->wl_itemlist); + TAILQ_REMOVE(&wl->wl_itemlist, witem, wi_entry); + + if (TAILQ_EMPTY(&wl->wl_itemlist)) + wq->wq_list_bitmap &= ~(1 << priority); + wq->wq_itemcount--; + + item = witem->wi_item; + witem->wi_item = (user_addr_t)0; + witem->wi_affinity = 0; + + TAILQ_INSERT_HEAD(&wl->wl_freelist, witem, wi_entry); + + (void)workqueue_run_nextitem(p, wq, THREAD_NULL, item, priority, affinity); + /* + * workqueue_run_nextitem is responsible for + * dropping the workqueue lock in all cases + */ + KERNEL_DEBUG(0xefffd138 | DBG_FUNC_END, wq, wq->wq_thread_yielded_count, wq->wq_itemcount, 1, 0); + + return; + } + } + KERNEL_DEBUG(0xefffd138 | DBG_FUNC_END, wq, wq->wq_thread_yielded_count, wq->wq_itemcount, 2, 0); + } + workqueue_unlock(p); +} + + + static void -workqueue_callback( - int type, - thread_t thread) +workqueue_callback(int type, thread_t thread) { struct uthread *uth; struct threadlist *tl; struct workqueue *wq; uth = get_bsdthread_info(thread); - tl = uth->uu_threadlist; - wq = tl->th_workq; + tl = uth->uu_threadlist; + wq = tl->th_workq; switch (type) { @@ -1231,30 +857,40 @@ workqueue_callback( { uint32_t old_activecount; - old_activecount = OSAddAtomic(-1, (SInt32 *)&wq->wq_thactivecount[tl->th_affinity_tag]); + old_activecount = OSAddAtomic(-1, &wq->wq_thactive_count[tl->th_priority][tl->th_affinity_tag]); + + if (old_activecount == 1) { + boolean_t start_timer = FALSE; + uint64_t curtime; + UInt64 *lastblocked_ptr; - if (old_activecount == 1 && wq->wq_itemcount) { /* * we were the last active thread on this affinity set * and we've got work to do */ - workqueue_lock_spin(wq->wq_proc); + lastblocked_ptr = (UInt64 *)&wq->wq_lastblocked_ts[tl->th_priority][tl->th_affinity_tag]; + curtime = mach_absolute_time(); + /* - * if this thread is blocking (not parking) - * and the idle list is empty for this affinity group - * we'll count it as a 'stall' + * if we collide with another thread trying to update the last_blocked (really unlikely + * since another thread would have to get scheduled and then block after we start down + * this path), it's not a problem. Either timestamp is adequate, so no need to retry */ - if ((tl->th_flags & TH_LIST_RUNNING) && - TAILQ_EMPTY(&wq->wq_thidlelist[tl->th_affinity_tag])) - wq->wq_stalled_count++; - - workqueue_run_nextitem(wq->wq_proc, THREAD_NULL); +#if defined(__ppc__) /* - * workqueue_run_nextitem will drop the workqueue - * lock before it returns + * this doesn't have to actually work reliablly for PPC, it just has to compile/link */ + *lastblocked_ptr = (UInt64)curtime; +#else + OSCompareAndSwap64(*lastblocked_ptr, (UInt64)curtime, lastblocked_ptr); +#endif + if (wq->wq_itemcount) + WQ_TIMER_NEEDED(wq, start_timer); + + if (start_timer == TRUE) + workqueue_interval_timer_start(wq); } - KERNEL_DEBUG(0xefffd020, (int)thread, wq->wq_threads_scheduled, tl->th_affinity_tag, 0, 0); + KERNEL_DEBUG1(0xefffd020 | DBG_FUNC_START, wq, old_activecount, tl->th_priority, tl->th_affinity_tag, thread_tid(thread)); } break; @@ -1267,81 +903,72 @@ workqueue_callback( * the thread lock for the thread being UNBLOCKED * is also held */ - if (tl->th_unparked) - OSAddAtomic(-1, (SInt32 *)&tl->th_unparked); - else - OSAddAtomic(1, (SInt32 *)&wq->wq_thactivecount[tl->th_affinity_tag]); + if (tl->th_suspended) { + OSAddAtomic(-1, &tl->th_suspended); + KERNEL_DEBUG1(0xefffd024, wq, wq->wq_threads_scheduled, tl->th_priority, tl->th_affinity_tag, thread_tid(thread)); + } else { + OSAddAtomic(1, &wq->wq_thactive_count[tl->th_priority][tl->th_affinity_tag]); - KERNEL_DEBUG(0xefffd024, (int)thread, wq->wq_threads_scheduled, tl->th_affinity_tag, 0, 0); + KERNEL_DEBUG1(0xefffd020 | DBG_FUNC_END, wq, wq->wq_threads_scheduled, tl->th_priority, tl->th_affinity_tag, thread_tid(thread)); + } break; } } + static void -workqueue_removethread(struct workqueue *wq) +workqueue_removethread(struct threadlist *tl) { - struct threadlist *tl; - uint32_t i, affinity_tag = 0; - - tl = NULL; - - workqueue_lock_spin(wq->wq_proc); - - for (i = 0; i < wq->wq_affinity_max; i++) { + struct workqueue *wq; + struct uthread * uth; - affinity_tag = wq->wq_nextaffinitytag; + wq = tl->th_workq; - if (affinity_tag == 0) - affinity_tag = wq->wq_affinity_max - 1; - else - affinity_tag--; - wq->wq_nextaffinitytag = affinity_tag; + TAILQ_REMOVE(&wq->wq_thidlelist, tl, th_entry); - /* - * look for an idle thread to steal from this affinity group - * but don't grab the only thread associated with it - */ - if (!TAILQ_EMPTY(&wq->wq_thidlelist[affinity_tag]) && wq->wq_thcount[affinity_tag] > 1) { - tl = TAILQ_FIRST(&wq->wq_thidlelist[affinity_tag]); - TAILQ_REMOVE(&wq->wq_thidlelist[affinity_tag], tl, th_entry); + wq->wq_nthreads--; + wq->wq_thidlecount--; - wq->wq_nthreads--; - wq->wq_thcount[affinity_tag]--; + /* + * Clear the threadlist pointer in uthread so + * blocked thread on wakeup for termination will + * not access the thread list as it is going to be + * freed. + */ + thread_sched_call(tl->th_thread, NULL); - break; - } + uth = get_bsdthread_info(tl->th_thread); + if (uth != (struct uthread *)0) { + uth->uu_threadlist = NULL; } workqueue_unlock(wq->wq_proc); - if (tl != NULL) { - thread_sched_call(tl->th_thread, NULL); - - if ( (tl->th_flags & TH_LIST_BLOCKED) ) - wakeup(tl); - else { - /* - * thread was created, but never used... - * need to clean up the stack and port ourselves - * since we're not going to spin up through the - * normal exit path triggered from Libc - */ - (void)mach_vm_deallocate(wq->wq_map, tl->th_stackaddr, tl->th_allocsize); - (void)mach_port_deallocate(get_task_ipcspace(wq->wq_task), (mach_port_name_t)tl->th_thport); - - thread_terminate(tl->th_thread); - } - KERNEL_DEBUG(0xefffd030, (int)tl->th_thread, wq->wq_nthreads, tl->th_flags & TH_LIST_BLOCKED, 0, 0); + if ( (tl->th_flags & TH_LIST_SUSPENDED) ) { /* - * drop our ref on the thread + * thread was created, but never used... + * need to clean up the stack and port ourselves + * since we're not going to spin up through the + * normal exit path triggered from Libc */ - thread_deallocate(tl->th_thread); + (void)mach_vm_deallocate(wq->wq_map, tl->th_stackaddr, tl->th_allocsize); + (void)mach_port_deallocate(get_task_ipcspace(wq->wq_task), tl->th_thport); - kfree(tl, sizeof(struct threadlist)); + KERNEL_DEBUG1(0xefffd014 | DBG_FUNC_END, wq, (uintptr_t)thread_tid(current_thread()), wq->wq_nthreads, 0xdead, thread_tid(tl->th_thread)); + } else { + + KERNEL_DEBUG1(0xefffd018 | DBG_FUNC_END, wq, (uintptr_t)thread_tid(current_thread()), wq->wq_nthreads, 0xdead, thread_tid(tl->th_thread)); } + /* + * drop our ref on the thread + */ + thread_deallocate(tl->th_thread); + + kfree(tl, sizeof(struct threadlist)); } -static int + +static boolean_t workqueue_addnewthread(struct workqueue *wq) { struct threadlist *tl; @@ -1351,21 +978,25 @@ workqueue_addnewthread(struct workqueue *wq) proc_t p; void *sright; mach_vm_offset_t stackaddr; - uint32_t affinity_tag; + + if (wq->wq_nthreads >= wq_max_threads || wq->wq_nthreads >= (CONFIG_THREAD_MAX - 20)) + return (FALSE); + wq->wq_nthreads++; p = wq->wq_proc; + workqueue_unlock(p); - kret = thread_create(wq->wq_task, &th); + kret = thread_create_workq(wq->wq_task, &th); if (kret != KERN_SUCCESS) - return(EINVAL); + goto failed; tl = kalloc(sizeof(struct threadlist)); bzero(tl, sizeof(struct threadlist)); #if defined(__ppc__) stackaddr = 0xF0000000; -#elif defined(__i386__) +#elif defined(__i386__) || defined(__x86_64__) stackaddr = 0xB0000000; #else #error Need to define a stack address hint for this architecture @@ -1398,94 +1029,99 @@ workqueue_addnewthread(struct workqueue *wq) (void) thread_terminate(th); kfree(tl, sizeof(struct threadlist)); - - return(EINVAL); + goto failed; } thread_reference(th); sright = (void *) convert_thread_to_port(th); - tl->th_thport = (void *)ipc_port_copyout_send(sright, get_task_ipcspace(wq->wq_task)); + tl->th_thport = ipc_port_copyout_send(sright, get_task_ipcspace(wq->wq_task)); thread_static_param(th, TRUE); - workqueue_lock_spin(p); - - affinity_tag = wq->wq_nextaffinitytag; - wq->wq_nextaffinitytag = (affinity_tag + 1) % wq->wq_affinity_max; - - workqueue_unlock(p); - tl->th_flags = TH_LIST_INITED | TH_LIST_SUSPENDED; tl->th_thread = th; tl->th_workq = wq; tl->th_stackaddr = stackaddr; - tl->th_affinity_tag = affinity_tag; + tl->th_affinity_tag = -1; + tl->th_priority = WORKQUEUE_NUMPRIOS; + tl->th_policy = -1; + tl->th_suspended = 1; #if defined(__ppc__) //ml_fp_setvalid(FALSE); thread_set_cthreadself(th, (uint64_t)(tl->th_stackaddr + PTH_DEFAULT_STACKSIZE + PTH_DEFAULT_GUARDSIZE), IS_64BIT_PROCESS(p)); #endif /* __ppc__ */ - /* - * affinity tag of 0 means no affinity... - * but we want our tags to be 0 based because they - * are used to index arrays, so... - * keep it 0 based internally and bump by 1 when - * calling out to set it - */ - (void)thread_affinity_set(th, affinity_tag + 1); - thread_sched_call(th, workqueue_callback); uth = get_bsdthread_info(tl->th_thread); uth->uu_threadlist = (void *)tl; workqueue_lock_spin(p); - TAILQ_INSERT_TAIL(&wq->wq_thidlelist[tl->th_affinity_tag], tl, th_entry); - wq->wq_nthreads++; - wq->wq_thcount[affinity_tag]++; + TAILQ_INSERT_TAIL(&wq->wq_thidlelist, tl, th_entry); - KERNEL_DEBUG1(0xefffd014 | DBG_FUNC_START, (int)current_thread(), affinity_tag, wq->wq_nthreads, 0, (int)tl->th_thread); + wq->wq_thidlecount++; - /* - * work may have come into the queue while - * no threads were available to run... since - * we're adding a new thread, go evaluate the - * current state - */ - workqueue_run_nextitem(p, THREAD_NULL); - /* - * workqueue_run_nextitem is responsible for - * dropping the workqueue lock in all cases - */ + KERNEL_DEBUG1(0xefffd014 | DBG_FUNC_START, wq, wq->wq_nthreads, 0, thread_tid(current_thread()), thread_tid(tl->th_thread)); - return(0); + return (TRUE); + +failed: + workqueue_lock_spin(p); + wq->wq_nthreads--; + + return (FALSE); } + int -workq_open(__unused struct proc *p, __unused struct workq_open_args *uap, __unused register_t *retval) +workq_open(struct proc *p, __unused struct workq_open_args *uap, __unused int32_t *retval) { struct workqueue * wq; - int size; + int wq_size; char * ptr; + char * nptr; int j; uint32_t i; + uint32_t num_cpus; int error = 0; - int num_cpus; + boolean_t need_wakeup = FALSE; struct workitem * witem; struct workitemlist *wl; - workqueue_lock(p); + if ((p->p_lflag & P_LREGISTER) == 0) + return(EINVAL); + + workqueue_lock_spin(p); if (p->p_wqptr == NULL) { + + while (p->p_wqiniting == TRUE) { + + assert_wait((caddr_t)&p->p_wqiniting, THREAD_UNINT); + workqueue_unlock(p); + + thread_block(THREAD_CONTINUE_NULL); + + workqueue_lock_spin(p); + } + if (p->p_wqptr != NULL) + goto out; + + p->p_wqiniting = TRUE; + + workqueue_unlock(p); + num_cpus = ml_get_max_cpus(); - size = (sizeof(struct workqueue)) + - (num_cpus * sizeof(int *)) + - (num_cpus * sizeof(TAILQ_HEAD(, threadlist))); + wq_size = sizeof(struct workqueue) + + (num_cpus * WORKQUEUE_NUMPRIOS * sizeof(uint32_t)) + + (num_cpus * WORKQUEUE_NUMPRIOS * sizeof(uint32_t)) + + (num_cpus * WORKQUEUE_NUMPRIOS * sizeof(uint64_t)) + + sizeof(uint64_t); - ptr = (char *)kalloc(size); - bzero(ptr, size); + ptr = (char *)kalloc(wq_size); + bzero(ptr, wq_size); wq = (struct workqueue *)ptr; wq->wq_flags = WQ_LIST_INITED; @@ -1503,61 +1139,82 @@ workq_open(__unused struct proc *p, __unused struct workq_open_args *uap, __unu witem = &wq->wq_array[(i*WORKITEM_SIZE) + j]; TAILQ_INSERT_TAIL(&wl->wl_freelist, witem, wi_entry); } + wq->wq_reqconc[i] = wq->wq_affinity_max; } - wq->wq_thactivecount = (uint32_t *)((char *)ptr + sizeof(struct workqueue)); - wq->wq_thcount = (uint32_t *)&wq->wq_thactivecount[wq->wq_affinity_max]; - wq->wq_thidlelist = (struct wq_thidlelist *)&wq->wq_thcount[wq->wq_affinity_max]; + nptr = ptr + sizeof(struct workqueue); - for (i = 0; i < wq->wq_affinity_max; i++) - TAILQ_INIT(&wq->wq_thidlelist[i]); + for (i = 0; i < WORKQUEUE_NUMPRIOS; i++) { + wq->wq_thactive_count[i] = (uint32_t *)nptr; + nptr += (num_cpus * sizeof(uint32_t)); + } + for (i = 0; i < WORKQUEUE_NUMPRIOS; i++) { + wq->wq_thscheduled_count[i] = (uint32_t *)nptr; + nptr += (num_cpus * sizeof(uint32_t)); + } + /* + * align nptr on a 64 bit boundary so that we can do nice + * atomic64 operations on the timestamps... + * note that we requested an extra uint64_t when calcuating + * the size for the allocation of the workqueue struct + */ + nptr += (sizeof(uint64_t) - 1); + nptr = (char *)((long)nptr & ~(sizeof(uint64_t) - 1)); + for (i = 0; i < WORKQUEUE_NUMPRIOS; i++) { + wq->wq_lastblocked_ts[i] = (uint64_t *)nptr; + nptr += (num_cpus * sizeof(uint64_t)); + } TAILQ_INIT(&wq->wq_thrunlist); + TAILQ_INIT(&wq->wq_thidlelist); - p->p_wqptr = (void *)wq; - p->p_wqsize = size; - - workqueue_unlock(p); - - wq->wq_timer_call = thread_call_allocate((thread_call_func_t)workqueue_timer, (thread_call_param_t)wq); + wq->wq_atimer_call = thread_call_allocate((thread_call_func_t)workqueue_add_timer, (thread_call_param_t)wq); - for (i = 0; i < wq->wq_affinity_max; i++) { - (void)workqueue_addnewthread(wq); - } - /* If unable to create any threads, return error */ - if (wq->wq_nthreads == 0) - error = EINVAL; workqueue_lock_spin(p); - microuptime(&wq->wq_reduce_ts); - microuptime(&wq->wq_lastran_ts); - wq->wq_max_threads_scheduled = 0; - wq->wq_stalled_count = 0; + p->p_wqptr = (void *)wq; + p->p_wqsize = wq_size; + + p->p_wqiniting = FALSE; + need_wakeup = TRUE; } +out: workqueue_unlock(p); + if (need_wakeup == TRUE) + wakeup(&p->p_wqiniting); return(error); } int -workq_ops(struct proc *p, struct workq_ops_args *uap, __unused register_t *retval) +workq_kernreturn(struct proc *p, struct workq_kernreturn_args *uap, __unused int32_t *retval) { - int options = uap->options; - int prio = uap->prio; /* should be used to find the right workqueue */ user_addr_t item = uap->item; - int error = 0; - thread_t th = THREAD_NULL; + int options = uap->options; + int prio = uap->prio; /* should be used to find the right workqueue */ + int affinity = uap->affinity; + int error = 0; + thread_t th = THREAD_NULL; + user_addr_t oc_item = 0; struct workqueue *wq; - prio += 2; /* normalize prio -2 to +2 to 0 -4 */ + if ((p->p_lflag & P_LREGISTER) == 0) + return(EINVAL); + + /* + * affinity not yet hooked up on this path + */ + affinity = -1; switch (options) { case WQOPS_QUEUE_ADD: { - - KERNEL_DEBUG(0xefffd008 | DBG_FUNC_NONE, (int)item, 0, 0, 0, 0); - - if ((prio < 0) || (prio >= 5)) - return (EINVAL); + + if (prio & WORKQUEUE_OVERCOMMIT) { + prio &= ~WORKQUEUE_OVERCOMMIT; + oc_item = item; + } + if ((prio < 0) || (prio >= WORKQUEUE_NUMPRIOS)) + return (EINVAL); workqueue_lock_spin(p); @@ -1565,14 +1222,23 @@ workq_ops(struct proc *p, struct workq_ops_args *uap, __unused register_t *retv workqueue_unlock(p); return (EINVAL); } - error = workqueue_additem(wq, prio, item); - + if (wq->wq_thidlecount == 0 && (oc_item || (wq->wq_nthreads < wq->wq_affinity_max))) { + + workqueue_addnewthread(wq); + + if (wq->wq_thidlecount == 0) + oc_item = 0; + } + if (oc_item == 0) + error = workqueue_additem(wq, prio, item, affinity); + + KERNEL_DEBUG(0xefffd008 | DBG_FUNC_NONE, wq, prio, affinity, oc_item, 0); } break; case WQOPS_QUEUE_REMOVE: { - if ((prio < 0) || (prio >= 5)) - return (EINVAL); + if ((prio < 0) || (prio >= WORKQUEUE_NUMPRIOS)) + return (EINVAL); workqueue_lock_spin(p); @@ -1586,8 +1252,28 @@ workq_ops(struct proc *p, struct workq_ops_args *uap, __unused register_t *retv case WQOPS_THREAD_RETURN: { th = current_thread(); + struct uthread *uth = get_bsdthread_info(th); + + /* reset signal mask on the workqueue thread to default state */ + if (uth->uu_sigmask != (sigset_t)(~workq_threadmask)) { + proc_lock(p); + uth->uu_sigmask = ~workq_threadmask; + proc_unlock(p); + } + + workqueue_lock_spin(p); + + if ((wq = (struct workqueue *)p->p_wqptr) == NULL || (uth->uu_threadlist == NULL)) { + workqueue_unlock(p); + return (EINVAL); + } + KERNEL_DEBUG(0xefffd004 | DBG_FUNC_END, wq, 0, 0, 0, 0); + } + break; + case WQOPS_THREAD_SETCONC: { - KERNEL_DEBUG(0xefffd004 | DBG_FUNC_END, 0, 0, 0, 0, 0); + if ((prio < 0) || (prio > WORKQUEUE_NUMPRIOS)) + return (EINVAL); workqueue_lock_spin(p); @@ -1595,70 +1281,131 @@ workq_ops(struct proc *p, struct workq_ops_args *uap, __unused register_t *retv workqueue_unlock(p); return (EINVAL); } + /* + * for this operation, we re-purpose the affinity + * argument as the concurrency target + */ + if (prio < WORKQUEUE_NUMPRIOS) + wq->wq_reqconc[prio] = affinity; + else { + for (prio = 0; prio < WORKQUEUE_NUMPRIOS; prio++) + wq->wq_reqconc[prio] = affinity; + + } } break; default: return (EINVAL); } - workqueue_run_nextitem(p, th); + (void)workqueue_run_nextitem(p, wq, th, oc_item, prio, affinity); /* * workqueue_run_nextitem is responsible for * dropping the workqueue lock in all cases */ - return(error); + return (error); + } + void workqueue_exit(struct proc *p) { struct workqueue * wq; struct threadlist * tl, *tlist; - uint32_t i; + struct uthread *uth; + int wq_size = 0; if (p->p_wqptr != NULL) { + KERNEL_DEBUG(0x900808c | DBG_FUNC_START, p->p_wqptr, 0, 0, 0, 0); + workqueue_lock_spin(p); wq = (struct workqueue *)p->p_wqptr; + + if (wq == NULL) { + workqueue_unlock(p); + + KERNEL_DEBUG(0x900808c | DBG_FUNC_END, 0, 0, 0, -1, 0); + return; + } + wq_size = p->p_wqsize; p->p_wqptr = NULL; + p->p_wqsize = 0; + + /* + * we now arm the timer in the callback function w/o holding the workq lock... + * we do this by setting WQ_ATIMER_RUNNING via OSCompareAndSwap in order to + * insure only a single timer if running and to notice that WQ_EXITING has + * been set (we don't want to start a timer once WQ_EXITING is posted) + * + * so once we have successfully set WQ_EXITING, we cannot fire up a new timer... + * therefor no need to clear the timer state atomically from the flags + * + * since we always hold the workq lock when dropping WQ_ATIMER_RUNNING + * the check for and sleep until clear is protected + */ + while ( !(OSCompareAndSwap(wq->wq_flags, (wq->wq_flags | WQ_EXITING), (UInt32 *)&wq->wq_flags))); + if (wq->wq_flags & WQ_ATIMER_RUNNING) { + if (thread_call_cancel(wq->wq_atimer_call) == TRUE) + wq->wq_flags &= ~WQ_ATIMER_RUNNING; + } + while ((wq->wq_flags & WQ_ATIMER_RUNNING) || (wq->wq_lflags & WQL_ATIMER_BUSY)) { + + assert_wait((caddr_t)wq, (THREAD_UNINT)); + workqueue_unlock(p); + + thread_block(THREAD_CONTINUE_NULL); + + workqueue_lock_spin(p); + } workqueue_unlock(p); - if (wq == NULL) - return; - - if (wq->wq_flags & WQ_TIMER_RUNNING) - thread_call_cancel(wq->wq_timer_call); - thread_call_free(wq->wq_timer_call); + TAILQ_FOREACH_SAFE(tl, &wq->wq_thrunlist, th_entry, tlist) { + + thread_sched_call(tl->th_thread, NULL); + + uth = get_bsdthread_info(tl->th_thread); + if (uth != (struct uthread *)0) { + uth->uu_threadlist = NULL; + } + TAILQ_REMOVE(&wq->wq_thrunlist, tl, th_entry); + + /* + * drop our last ref on the thread + */ + thread_deallocate(tl->th_thread); + + kfree(tl, sizeof(struct threadlist)); + } + TAILQ_FOREACH_SAFE(tl, &wq->wq_thidlelist, th_entry, tlist) { + + thread_sched_call(tl->th_thread, NULL); + + uth = get_bsdthread_info(tl->th_thread); + if (uth != (struct uthread *)0) { + uth->uu_threadlist = NULL; + } + TAILQ_REMOVE(&wq->wq_thidlelist, tl, th_entry); - TAILQ_FOREACH_SAFE(tl, &wq->wq_thrunlist, th_entry, tlist) { /* * drop our last ref on the thread */ - thread_sched_call(tl->th_thread, NULL); thread_deallocate(tl->th_thread); - TAILQ_REMOVE(&wq->wq_thrunlist, tl, th_entry); kfree(tl, sizeof(struct threadlist)); } - for (i = 0; i < wq->wq_affinity_max; i++) { - TAILQ_FOREACH_SAFE(tl, &wq->wq_thidlelist[i], th_entry, tlist) { - /* - * drop our last ref on the thread - */ - thread_sched_call(tl->th_thread, NULL); - thread_deallocate(tl->th_thread); + thread_call_free(wq->wq_atimer_call); - TAILQ_REMOVE(&wq->wq_thidlelist[i], tl, th_entry); - kfree(tl, sizeof(struct threadlist)); - } - } - kfree(wq, p->p_wqsize); + kfree(wq, wq_size); + + KERNEL_DEBUG(0x900808c | DBG_FUNC_END, 0, 0, 0, 0, 0); } } static int -workqueue_additem(struct workqueue *wq, int prio, user_addr_t item) +workqueue_additem(struct workqueue *wq, int prio, user_addr_t item, int affinity) { struct workitem *witem; struct workitemlist *wl; @@ -1672,12 +1419,11 @@ workqueue_additem(struct workqueue *wq, int prio, user_addr_t item) TAILQ_REMOVE(&wl->wl_freelist, witem, wi_entry); witem->wi_item = item; + witem->wi_affinity = affinity; TAILQ_INSERT_TAIL(&wl->wl_itemlist, witem, wi_entry); - if (wq->wq_itemcount == 0) { - microuptime(&wq->wq_lastran_ts); - wq->wq_stalled_count = 0; - } + wq->wq_list_bitmap |= (1 << prio); + wq->wq_itemcount++; return (0); @@ -1695,228 +1441,414 @@ workqueue_removeitem(struct workqueue *wq, int prio, user_addr_t item) TAILQ_FOREACH(witem, &wl->wl_itemlist, wi_entry) { if (witem->wi_item == item) { TAILQ_REMOVE(&wl->wl_itemlist, witem, wi_entry); - wq->wq_itemcount--; + if (TAILQ_EMPTY(&wl->wl_itemlist)) + wq->wq_list_bitmap &= ~(1 << prio); + wq->wq_itemcount--; + witem->wi_item = (user_addr_t)0; + witem->wi_affinity = 0; TAILQ_INSERT_HEAD(&wl->wl_freelist, witem, wi_entry); error = 0; break; } } - if (wq->wq_itemcount == 0) - wq->wq_flags &= ~(WQ_ADD_TO_POOL | WQ_TIMER_WATCH); - return (error); } + + + +static int workqueue_importance[WORKQUEUE_NUMPRIOS] = +{ + 2, 0, -2, +}; + +static int workqueue_policy[WORKQUEUE_NUMPRIOS] = +{ + 1, 1, 1, +}; + + /* * workqueue_run_nextitem: * called with the workqueue lock held... * responsible for dropping it in all cases */ -static void -workqueue_run_nextitem(proc_t p, thread_t thread) +static boolean_t +workqueue_run_nextitem(proc_t p, struct workqueue *wq, thread_t thread, user_addr_t oc_item, int oc_prio, int oc_affinity) { - struct workqueue *wq; struct workitem *witem = NULL; user_addr_t item = 0; thread_t th_to_run = THREAD_NULL; thread_t th_to_park = THREAD_NULL; int wake_thread = 0; int reuse_thread = 1; - uint32_t stalled_affinity_count = 0; - int i; - uint32_t affinity_tag; + uint32_t priority, orig_priority; + uint32_t affinity_tag, orig_affinity_tag; + uint32_t i, n; + uint32_t activecount; + uint32_t busycount; + uint32_t us_to_wait; struct threadlist *tl = NULL; + struct threadlist *ttl = NULL; struct uthread *uth = NULL; - struct workitemlist *wl; + struct workitemlist *wl = NULL; boolean_t start_timer = FALSE; - struct timeval tv, lat_tv; + boolean_t adjust_counters = TRUE; + uint64_t curtime; - wq = (struct workqueue *)p->p_wqptr; - KERNEL_DEBUG(0xefffd000 | DBG_FUNC_START, (int)thread, wq->wq_threads_scheduled, wq->wq_stalled_count, 0, 0); + KERNEL_DEBUG(0xefffd000 | DBG_FUNC_START, wq, thread, wq->wq_thidlecount, wq->wq_itemcount, 0); + + /* + * from here until we drop the workq lock + * we can't be pre-empted since we hold + * the lock in spin mode... this is important + * since we have to independently update the priority + * and affinity that the thread is associated with + * and these values are used to index the multi-dimensional + * counter arrays in 'workqueue_callback' + */ + if (oc_item) { + uint32_t min_scheduled = 0; + uint32_t scheduled_count; + uint32_t active_count; + uint32_t t_affinity = 0; + + priority = oc_prio; + item = oc_item; + + if ((affinity_tag = oc_affinity) == (uint32_t)-1) { + for (affinity_tag = 0; affinity_tag < wq->wq_reqconc[priority]; affinity_tag++) { + /* + * look for the affinity group with the least number of threads + */ + scheduled_count = 0; + active_count = 0; + for (i = 0; i <= priority; i++) { + scheduled_count += wq->wq_thscheduled_count[i][affinity_tag]; + active_count += wq->wq_thactive_count[i][affinity_tag]; + } + if (active_count == 0) { + t_affinity = affinity_tag; + break; + } + if (affinity_tag == 0 || scheduled_count < min_scheduled) { + min_scheduled = scheduled_count; + t_affinity = affinity_tag; + } + } + affinity_tag = t_affinity; + } + goto grab_idle_thread; + } if (wq->wq_itemcount == 0) { if ((th_to_park = thread) == THREAD_NULL) - goto out; + goto out_of_work; goto parkit; } + for (priority = 0; priority < WORKQUEUE_NUMPRIOS; priority++) { + if (wq->wq_list_bitmap & (1 << priority)) { + wl = (struct workitemlist *)&wq->wq_list[priority]; + break; + } + } + assert(wl != NULL); + assert(!(TAILQ_EMPTY(&wl->wl_itemlist))); + + curtime = mach_absolute_time(); + if (thread != THREAD_NULL) { - /* - * we're a worker thread from the pool... currently we - * are considered 'active' which means we're counted - * in "wq_thactivecount" - */ uth = get_bsdthread_info(thread); tl = uth->uu_threadlist; + affinity_tag = tl->th_affinity_tag; - if (wq->wq_thactivecount[tl->th_affinity_tag] == 1) { - /* - * we're the only active thread associated with our - * affinity group, so pick up some work and keep going + /* + * check to see if the affinity group this thread is + * associated with is still within the bounds of the + * specified concurrency for the priority level + * we're considering running work for + */ + if (affinity_tag < wq->wq_reqconc[priority]) { + /* + * we're a worker thread from the pool... currently we + * are considered 'active' which means we're counted + * in "wq_thactive_count" + * add up the active counts of all the priority levels + * up to and including the one we want to schedule */ - th_to_run = thread; - goto pick_up_work; + for (activecount = 0, i = 0; i <= priority; i++) { + uint32_t acount; + + acount = wq->wq_thactive_count[i][affinity_tag]; + + if (acount == 0 && wq->wq_thscheduled_count[i][affinity_tag]) { + if (wq_thread_is_busy(curtime, &wq->wq_lastblocked_ts[i][affinity_tag])) + acount = 1; + } + activecount += acount; + } + if (activecount == 1) { + /* + * we're the only active thread associated with our + * affinity group at this priority level and higher, + * so pick up some work and keep going + */ + th_to_run = thread; + goto pick_up_work; + } } + /* + * there's more than 1 thread running in this affinity group + * or the concurrency level has been cut back for this priority... + * lets continue on and look for an 'empty' group to run this + * work item in + */ } - for (affinity_tag = 0; affinity_tag < wq->wq_affinity_max; affinity_tag++) { - /* + busycount = 0; + + for (affinity_tag = 0; affinity_tag < wq->wq_reqconc[priority]; affinity_tag++) { + /* * look for first affinity group that is currently not active - * and has at least 1 idle thread + * i.e. no active threads at this priority level or higher + * and no threads that have run recently */ - if (wq->wq_thactivecount[affinity_tag] == 0) { - if (!TAILQ_EMPTY(&wq->wq_thidlelist[affinity_tag])) - break; - stalled_affinity_count++; + for (activecount = 0, i = 0; i <= priority; i++) { + if ((activecount = wq->wq_thactive_count[i][affinity_tag])) + break; + + if (wq->wq_thscheduled_count[i][affinity_tag]) { + if (wq_thread_is_busy(curtime, &wq->wq_lastblocked_ts[i][affinity_tag])) { + busycount++; + break; + } + } } + if (activecount == 0 && busycount == 0) + break; } - if (thread == THREAD_NULL) { - /* - * we're not one of the 'worker' threads + if (affinity_tag >= wq->wq_reqconc[priority]) { + /* + * we've already got at least 1 thread per + * affinity group in the active state... */ - if (affinity_tag >= wq->wq_affinity_max) { - /* - * we've already got at least 1 thread per - * affinity group in the active state... or - * we've got no idle threads to play with + if (busycount) { + /* + * we found at least 1 thread in the + * 'busy' state... make sure we start + * the timer because if they are the only + * threads keeping us from scheduling + * this workitem, we won't get a callback + * to kick off the timer... we need to + * start it now... */ - if (stalled_affinity_count) { - - if ( !(wq->wq_flags & WQ_TIMER_RUNNING) ) { - wq->wq_flags |= WQ_TIMER_RUNNING; - start_timer = TRUE; - } - wq->wq_flags |= WQ_TIMER_WATCH; - } - goto out; + WQ_TIMER_NEEDED(wq, start_timer); } - } else { - /* - * we're overbooked on the affinity group we're associated with, - * so park this thread - */ - th_to_park = thread; + KERNEL_DEBUG(0xefffd000 | DBG_FUNC_NONE, wq, busycount, start_timer, 0, 0); - if (affinity_tag >= wq->wq_affinity_max) { - /* - * all the affinity groups have active threads - * running, or there are no idle threads to - * schedule + if (thread != THREAD_NULL) { + /* + * go park this one for later */ - if (stalled_affinity_count) { - - if ( !(wq->wq_flags & WQ_TIMER_RUNNING) ) { - wq->wq_flags |= WQ_TIMER_RUNNING; - start_timer = TRUE; - } - wq->wq_flags |= WQ_TIMER_WATCH; - } + th_to_park = thread; goto parkit; } + goto out_of_work; + } + if (thread != THREAD_NULL) { + /* + * we're overbooked on the affinity group this thread is + * currently associated with, but we have work to do + * and at least 1 idle processor, so we'll just retarget + * this thread to a new affinity group + */ + th_to_run = thread; + goto pick_up_work; + } + if (wq->wq_thidlecount == 0) { /* - * we've got a candidate (affinity group with no currently - * active threads) to start a new thread on... - * we already know there is both work available - * and an idle thread with the correct affinity tag, so - * fall into the code that pulls a new thread and workitem... - * once we've kicked that thread off, we'll park this one + * we don't have a thread to schedule, but we have + * work to do and at least 1 affinity group that + * doesn't currently have an active thread... */ + WQ_TIMER_NEEDED(wq, start_timer); + + KERNEL_DEBUG(0xefffd118, wq, wq->wq_nthreads, start_timer, 0, 0); + + goto no_thread_to_run; + } + +grab_idle_thread: + /* + * we've got a candidate (affinity group with no currently + * active threads) to start a new thread on... + * we already know there is both work available + * and an idle thread, so activate a thread and then + * fall into the code that pulls a new workitem... + */ + TAILQ_FOREACH(ttl, &wq->wq_thidlelist, th_entry) { + if (ttl->th_affinity_tag == affinity_tag || ttl->th_affinity_tag == (uint16_t)-1) { + + TAILQ_REMOVE(&wq->wq_thidlelist, ttl, th_entry); + tl = ttl; + + break; + } } - tl = TAILQ_FIRST(&wq->wq_thidlelist[affinity_tag]); - TAILQ_REMOVE(&wq->wq_thidlelist[affinity_tag], tl, th_entry); + if (tl == NULL) { + tl = TAILQ_FIRST(&wq->wq_thidlelist); + TAILQ_REMOVE(&wq->wq_thidlelist, tl, th_entry); + } + wq->wq_thidlecount--; - th_to_run = tl->th_thread; TAILQ_INSERT_TAIL(&wq->wq_thrunlist, tl, th_entry); if ((tl->th_flags & TH_LIST_SUSPENDED) == TH_LIST_SUSPENDED) { - tl->th_flags &= ~TH_LIST_SUSPENDED; + tl->th_flags &= ~TH_LIST_SUSPENDED; reuse_thread = 0; + + thread_sched_call(tl->th_thread, workqueue_callback); + } else if ((tl->th_flags & TH_LIST_BLOCKED) == TH_LIST_BLOCKED) { - tl->th_flags &= ~TH_LIST_BLOCKED; + tl->th_flags &= ~TH_LIST_BLOCKED; + tl->th_flags |= TH_LIST_BUSY; wake_thread = 1; } tl->th_flags |= TH_LIST_RUNNING; - wq->wq_threads_scheduled++; + wq->wq_threads_scheduled++; + wq->wq_thscheduled_count[priority][affinity_tag]++; + OSAddAtomic(1, &wq->wq_thactive_count[priority][affinity_tag]); - if (wq->wq_threads_scheduled > wq->wq_max_threads_scheduled) - wq->wq_max_threads_scheduled = wq->wq_threads_scheduled; + adjust_counters = FALSE; + th_to_run = tl->th_thread; pick_up_work: - for (i = 0; i < WORKQUEUE_NUMPRIOS; i++) { - wl = (struct workitemlist *)&wq->wq_list[i]; - - if (!(TAILQ_EMPTY(&wl->wl_itemlist))) { + if (item == 0) { + witem = TAILQ_FIRST(&wl->wl_itemlist); + TAILQ_REMOVE(&wl->wl_itemlist, witem, wi_entry); + + if (TAILQ_EMPTY(&wl->wl_itemlist)) + wq->wq_list_bitmap &= ~(1 << priority); + wq->wq_itemcount--; + + item = witem->wi_item; + witem->wi_item = (user_addr_t)0; + witem->wi_affinity = 0; + TAILQ_INSERT_HEAD(&wl->wl_freelist, witem, wi_entry); + } + orig_priority = tl->th_priority; + orig_affinity_tag = tl->th_affinity_tag; - witem = TAILQ_FIRST(&wl->wl_itemlist); - TAILQ_REMOVE(&wl->wl_itemlist, witem, wi_entry); - wq->wq_itemcount--; + tl->th_priority = priority; + tl->th_affinity_tag = affinity_tag; - item = witem->wi_item; - witem->wi_item = (user_addr_t)0; - TAILQ_INSERT_HEAD(&wl->wl_freelist, witem, wi_entry); + if (adjust_counters == TRUE && (orig_priority != priority || orig_affinity_tag != affinity_tag)) { + /* + * we need to adjust these counters based on this + * thread's new disposition w/r to affinity and priority + */ + OSAddAtomic(-1, &wq->wq_thactive_count[orig_priority][orig_affinity_tag]); + OSAddAtomic(1, &wq->wq_thactive_count[priority][affinity_tag]); - break; - } + wq->wq_thscheduled_count[orig_priority][orig_affinity_tag]--; + wq->wq_thscheduled_count[priority][affinity_tag]++; } - if (witem == NULL) - panic("workq_run_nextitem: NULL witem"); + wq->wq_thread_yielded_count = 0; - if (thread != th_to_run) { - /* - * we're starting up a thread from a parked/suspended condition - */ - OSAddAtomic(1, (SInt32 *)&wq->wq_thactivecount[tl->th_affinity_tag]); - OSAddAtomic(1, (SInt32 *)&tl->th_unparked); - } - if (wq->wq_itemcount == 0) - wq->wq_flags &= ~WQ_TIMER_WATCH; - else { - microuptime(&tv); + workqueue_unlock(p); + + if (orig_affinity_tag != affinity_tag) { /* - * if we had any affinity groups stall (no threads runnable) - * since we last scheduled an item... and - * the elapsed time since we last scheduled an item - * exceeds the latency tolerance... - * we ask the timer thread (which should already be running) - * to add some more threads to the pool + * this thread's affinity does not match the affinity group + * its being placed on (it's either a brand new thread or + * we're retargeting an existing thread to a new group)... + * affinity tag of 0 means no affinity... + * but we want our tags to be 0 based because they + * are used to index arrays, so... + * keep it 0 based internally and bump by 1 when + * calling out to set it */ - if (wq->wq_stalled_count && !(wq->wq_flags & WQ_ADD_TO_POOL)) { - timersub(&tv, &wq->wq_lastran_ts, &lat_tv); + KERNEL_DEBUG(0xefffd114 | DBG_FUNC_START, wq, orig_affinity_tag, 0, 0, 0); - if (((lat_tv.tv_sec * 1000000) + lat_tv.tv_usec) > wq_max_run_latency_usecs) - wq->wq_flags |= WQ_ADD_TO_POOL; + (void)thread_affinity_set(th_to_run, affinity_tag + 1); - KERNEL_DEBUG(0xefffd10c, wq->wq_stalled_count, lat_tv.tv_sec, lat_tv.tv_usec, wq->wq_flags, 0); - } - wq->wq_lastran_ts = tv; + KERNEL_DEBUG(0xefffd114 | DBG_FUNC_END, wq, affinity_tag, 0, 0, 0); } - wq->wq_stalled_count = 0; - workqueue_unlock(p); + if (orig_priority != priority) { + thread_precedence_policy_data_t precedinfo; + thread_extended_policy_data_t extinfo; + uint32_t policy; - KERNEL_DEBUG(0xefffd02c, wq->wq_thactivecount[0], wq->wq_thactivecount[1], - wq->wq_thactivecount[2], wq->wq_thactivecount[3], 0); + policy = workqueue_policy[priority]; + + KERNEL_DEBUG(0xefffd120 | DBG_FUNC_START, wq, orig_priority, tl->th_policy, 0, 0); + + if (tl->th_policy != policy) { - KERNEL_DEBUG(0xefffd02c, wq->wq_thactivecount[4], wq->wq_thactivecount[5], - wq->wq_thactivecount[6], wq->wq_thactivecount[7], 0); + extinfo.timeshare = policy; + (void)thread_policy_set_internal(th_to_run, THREAD_EXTENDED_POLICY, (thread_policy_t)&extinfo, THREAD_EXTENDED_POLICY_COUNT); + + tl->th_policy = policy; + } + precedinfo.importance = workqueue_importance[priority]; + (void)thread_policy_set_internal(th_to_run, THREAD_PRECEDENCE_POLICY, (thread_policy_t)&precedinfo, THREAD_PRECEDENCE_POLICY_COUNT); + KERNEL_DEBUG(0xefffd120 | DBG_FUNC_END, wq, priority, policy, 0, 0); + } + if (kdebug_enable) { + int lpri = -1; + int laffinity = -1; + int first = -1; + uint32_t code = 0xefffd02c | DBG_FUNC_START; + + for (n = 0; n < WORKQUEUE_NUMPRIOS; n++) { + for (i = 0; i < wq->wq_affinity_max; i++) { + if (wq->wq_thactive_count[n][i]) { + if (lpri != -1) { + KERNEL_DEBUG(code, lpri, laffinity, wq->wq_thactive_count[lpri][laffinity], first, 0); + code = 0xefffd02c; + first = 0; + } + lpri = n; + laffinity = i; + } + } + } + if (lpri != -1) { + if (first == -1) + first = 0xeeeeeeee; + KERNEL_DEBUG(0xefffd02c | DBG_FUNC_END, lpri, laffinity, wq->wq_thactive_count[lpri][laffinity], first, 0); + } + } /* * if current thread is reused for workitem, does not return via unix_syscall */ wq_runitem(p, item, th_to_run, tl, reuse_thread, wake_thread, (thread == th_to_run)); - if (th_to_park == THREAD_NULL) { + KERNEL_DEBUG(0xefffd000 | DBG_FUNC_END, wq, thread_tid(th_to_run), item, 1, 0); - KERNEL_DEBUG(0xefffd000 | DBG_FUNC_END, (int)thread, (int)item, wq->wq_flags, 1, 0); + return (TRUE); - return; - } - workqueue_lock_spin(p); +out_of_work: + /* + * we have no work to do or we are fully booked + * w/r to running threads... + */ +no_thread_to_run: + workqueue_unlock(p); + + if (start_timer) + workqueue_interval_timer_start(wq); + + KERNEL_DEBUG(0xefffd000 | DBG_FUNC_END, wq, thread_tid(thread), 0, 2, 0); + + return (FALSE); parkit: - wq->wq_threads_scheduled--; /* * this is a workqueue thread with no more * work to do... park it for now @@ -1930,39 +1862,117 @@ workqueue_run_nextitem(proc_t p, thread_t thread) tl->th_flags &= ~TH_LIST_RUNNING; tl->th_flags |= TH_LIST_BLOCKED; - TAILQ_INSERT_HEAD(&wq->wq_thidlelist[tl->th_affinity_tag], tl, th_entry); + TAILQ_INSERT_HEAD(&wq->wq_thidlelist, tl, th_entry); + + thread_sched_call(th_to_park, NULL); + + OSAddAtomic(-1, &wq->wq_thactive_count[tl->th_priority][tl->th_affinity_tag]); + wq->wq_thscheduled_count[tl->th_priority][tl->th_affinity_tag]--; + wq->wq_threads_scheduled--; - assert_wait((caddr_t)tl, (THREAD_INTERRUPTIBLE)); + if (wq->wq_thidlecount < 100) + us_to_wait = wq_reduce_pool_window_usecs - (wq->wq_thidlecount * (wq_reduce_pool_window_usecs / 100)); + else + us_to_wait = wq_reduce_pool_window_usecs / 100; + + wq->wq_thidlecount++; + + assert_wait_timeout((caddr_t)tl, (THREAD_INTERRUPTIBLE), us_to_wait, NSEC_PER_USEC); workqueue_unlock(p); if (start_timer) - workqueue_interval_timer_start(wq->wq_timer_call, wq_timer_interval_msecs); + workqueue_interval_timer_start(wq); + + KERNEL_DEBUG1(0xefffd018 | DBG_FUNC_START, wq, wq->wq_threads_scheduled, wq->wq_thidlecount, us_to_wait, thread_tid(th_to_park)); + KERNEL_DEBUG(0xefffd000 | DBG_FUNC_END, wq, thread_tid(thread), 0, 3, 0); - KERNEL_DEBUG1(0xefffd018 | DBG_FUNC_START, (int)current_thread(), wq->wq_threads_scheduled, 0, 0, (int)th_to_park); + thread_block((thread_continue_t)wq_unpark_continue); + /* NOT REACHED */ - thread_block((thread_continue_t)thread_exception_return); + return (FALSE); +} - panic("unexpected return from thread_block"); -out: - workqueue_unlock(p); +static void +wq_unpark_continue(void) +{ + struct uthread *uth = NULL; + struct threadlist *tl; + thread_t th_to_unpark; + proc_t p; + + th_to_unpark = current_thread(); + uth = get_bsdthread_info(th_to_unpark); + + if (uth != NULL) { + if ((tl = uth->uu_threadlist) != NULL) { + + if ((tl->th_flags & (TH_LIST_RUNNING | TH_LIST_BUSY)) == TH_LIST_RUNNING) { + /* + * a normal wakeup of this thread occurred... no need + * for any synchronization with the timer and wq_runitem + */ +normal_return_to_user: + thread_sched_call(th_to_unpark, workqueue_callback); - if (start_timer) - workqueue_interval_timer_start(wq->wq_timer_call, wq_timer_interval_msecs); + KERNEL_DEBUG(0xefffd018 | DBG_FUNC_END, tl->th_workq, 0, 0, 0, 0); + + thread_exception_return(); + } + p = current_proc(); + + workqueue_lock_spin(p); + + if ( !(tl->th_flags & TH_LIST_RUNNING)) { + /* + * the timer popped us out and we've not + * been moved off of the idle list + * so we should now self-destruct + * + * workqueue_removethread consumes the lock + */ + workqueue_removethread(tl); + + thread_exception_return(); + } + /* + * the timer woke us up, but we have already + * started to make this a runnable thread, + * but have not yet finished that process... + * so wait for the normal wakeup + */ + while ((tl->th_flags & TH_LIST_BUSY)) { + + assert_wait((caddr_t)tl, (THREAD_UNINT)); + + workqueue_unlock(p); - KERNEL_DEBUG(0xefffd000 | DBG_FUNC_END, (int)thread, 0, wq->wq_flags, 2, 0); + thread_block(THREAD_CONTINUE_NULL); + + workqueue_lock_spin(p); + } + /* + * we have finished setting up the thread's context + * now we can return as if we got a normal wakeup + */ + workqueue_unlock(p); - return; + goto normal_return_to_user; + } + } + thread_exception_return(); } + + static void wq_runitem(proc_t p, user_addr_t item, thread_t th, struct threadlist *tl, int reuse_thread, int wake_thread, int return_directly) { int ret = 0; - KERNEL_DEBUG1(0xefffd004 | DBG_FUNC_START, (int)current_thread(), (int)item, wake_thread, tl->th_affinity_tag, (int)th); + KERNEL_DEBUG1(0xefffd004 | DBG_FUNC_START, tl->th_workq, tl->th_priority, tl->th_affinity_tag, thread_tid(current_thread()), thread_tid(th)); ret = setup_wqthread(p, th, item, reuse_thread, tl); @@ -1970,16 +1980,21 @@ wq_runitem(proc_t p, user_addr_t item, thread_t th, struct threadlist *tl, panic("setup_wqthread failed %x\n", ret); if (return_directly) { + KERNEL_DEBUG(0xefffd000 | DBG_FUNC_END, tl->th_workq, 0, 0, 4, 0); + thread_exception_return(); panic("wq_runitem: thread_exception_return returned ...\n"); } if (wake_thread) { - KERNEL_DEBUG1(0xefffd018 | DBG_FUNC_END, (int)current_thread(), 0, 0, 0, (int)th); - + workqueue_lock_spin(p); + + tl->th_flags &= ~TH_LIST_BUSY; wakeup(tl); + + workqueue_unlock(p); } else { - KERNEL_DEBUG1(0xefffd014 | DBG_FUNC_END, (int)current_thread(), 0, 0, 0, (int)th); + KERNEL_DEBUG1(0xefffd014 | DBG_FUNC_END, tl->th_workq, 0, 0, thread_tid(current_thread()), thread_tid(th)); thread_resume(th); } @@ -2003,15 +2018,17 @@ setup_wqthread(proc_t p, thread_t th, user_addr_t item, int reuse_thread, struct ts64->srr0 = (uint64_t)p->p_wqthread; ts64->r1 = (uint64_t)((tl->th_stackaddr + PTH_DEFAULT_STACKSIZE + PTH_DEFAULT_GUARDSIZE) - C_ARGSAVE_LEN - C_RED_ZONE); ts64->r3 = (uint64_t)(tl->th_stackaddr + PTH_DEFAULT_STACKSIZE + PTH_DEFAULT_GUARDSIZE); - ts64->r4 = (uint64_t)((unsigned int)tl->th_thport); + ts64->r4 = (uint64_t)(tl->th_thport); ts64->r5 = (uint64_t)(tl->th_stackaddr + PTH_DEFAULT_GUARDSIZE); ts64->r6 = (uint64_t)item; ts64->r7 = (uint64_t)reuse_thread; ts64->r8 = (uint64_t)0; + if ((reuse_thread != 0) && (ts64->r3 == (uint64_t)0)) + panic("setup_wqthread: setting reuse thread with null pthread\n"); thread_set_wq_state64(th, (thread_state_t)ts64); } -#elif defined(__i386__) +#elif defined(__i386__) || defined(__x86_64__) int isLP64 = 0; isLP64 = IS_64BIT_PROCESS(p); @@ -2034,6 +2051,8 @@ setup_wqthread(proc_t p, thread_t th, user_addr_t item, int reuse_thread, struct */ ts->esp = (int)((vm_offset_t)((tl->th_stackaddr + PTH_DEFAULT_STACKSIZE + PTH_DEFAULT_GUARDSIZE) - C_32_STK_ALIGN)); + if ((reuse_thread != 0) && (ts->eax == (unsigned int)0)) + panic("setup_wqthread: setting reuse thread with null pthread\n"); thread_set_wq_state32(th, (thread_state_t)ts); } else { @@ -2042,7 +2061,7 @@ setup_wqthread(proc_t p, thread_t th, user_addr_t item, int reuse_thread, struct ts64->rip = (uint64_t)p->p_wqthread; ts64->rdi = (uint64_t)(tl->th_stackaddr + PTH_DEFAULT_STACKSIZE + PTH_DEFAULT_GUARDSIZE); - ts64->rsi = (uint64_t)((unsigned int)(tl->th_thport)); + ts64->rsi = (uint64_t)(tl->th_thport); ts64->rdx = (uint64_t)(tl->th_stackaddr + PTH_DEFAULT_GUARDSIZE); ts64->rcx = (uint64_t)item; ts64->r8 = (uint64_t)reuse_thread; @@ -2053,6 +2072,8 @@ setup_wqthread(proc_t p, thread_t th, user_addr_t item, int reuse_thread, struct */ ts64->rsp = (uint64_t)((tl->th_stackaddr + PTH_DEFAULT_STACKSIZE + PTH_DEFAULT_GUARDSIZE) - C_64_REDZONE_LEN); + if ((reuse_thread != 0) && (ts64->rdi == (uint64_t)0)) + panic("setup_wqthread: setting reuse thread with null pthread\n"); thread_set_wq_state64(th, (thread_state_t)ts64); } #else @@ -2061,3 +2082,153 @@ setup_wqthread(proc_t p, thread_t th, user_addr_t item, int reuse_thread, struct return(0); } +int +fill_procworkqueue(proc_t p, struct proc_workqueueinfo * pwqinfo) +{ + struct workqueue * wq; + int error = 0; + int activecount; + uint32_t pri, affinity; + + workqueue_lock_spin(p); + if ((wq = p->p_wqptr) == NULL) { + error = EINVAL; + goto out; + } + activecount = 0; + + for (pri = 0; pri < WORKQUEUE_NUMPRIOS; pri++) { + for (affinity = 0; affinity < wq->wq_affinity_max; affinity++) + activecount += wq->wq_thactive_count[pri][affinity]; + } + pwqinfo->pwq_nthreads = wq->wq_nthreads; + pwqinfo->pwq_runthreads = activecount; + pwqinfo->pwq_blockedthreads = wq->wq_threads_scheduled - activecount; +out: + workqueue_unlock(p); + return(error); +} + +/* Set target concurrency of one of the queue(0,1,2) with specified value */ +int +proc_settargetconc(pid_t pid, int queuenum, int32_t targetconc) +{ + proc_t p, self; + uint64_t addr; + int32_t conc = targetconc; + int error = 0; + vm_map_t oldmap = VM_MAP_NULL; + int gotref = 0; + + self = current_proc(); + if (self->p_pid != pid) { + /* if not on self, hold a refernce on the process */ + + if (pid == 0) + return(EINVAL); + + p = proc_find(pid); + + if (p == PROC_NULL) + return(ESRCH); + gotref = 1; + + } else + p = self; + + if ((addr = p->p_targconc) == (uint64_t)0) { + error = EINVAL; + goto out; + } + + + if ((queuenum >= WQ_MAXPRI_MIN) && (queuenum <= WQ_MAXPRI_MAX)) { + addr += (queuenum * sizeof(int32_t)); + if (gotref == 1) + oldmap = vm_map_switch(get_task_map(p->task)); + error = copyout(&conc, addr, sizeof(int32_t)); + if (gotref == 1) + (void)vm_map_switch(oldmap); + + } else { + error = EINVAL; + } +out: + if (gotref == 1) + proc_rele(p); + return(error); +} + + +/* Set target concurrency on all the prio queues with specified value */ +int +proc_setalltargetconc(pid_t pid, int32_t * targetconcp) +{ + proc_t p, self; + uint64_t addr; + int error = 0; + vm_map_t oldmap = VM_MAP_NULL; + int gotref = 0; + + self = current_proc(); + if (self->p_pid != pid) { + /* if not on self, hold a refernce on the process */ + + if (pid == 0) + return(EINVAL); + + p = proc_find(pid); + + if (p == PROC_NULL) + return(ESRCH); + gotref = 1; + + } else + p = self; + + if ((addr = (uint64_t)p->p_targconc) == (uint64_t)0) { + error = EINVAL; + goto out; + } + + + if (gotref == 1) + oldmap = vm_map_switch(get_task_map(p->task)); + + error = copyout(targetconcp, addr, WQ_PRI_NUM * sizeof(int32_t)); + if (gotref == 1) + (void)vm_map_switch(oldmap); + +out: + if (gotref == 1) + proc_rele(p); + return(error); +} + +int thread_selfid(__unused struct proc *p, __unused struct thread_selfid_args *uap, user_addr_t *retval) +{ + thread_t thread = current_thread(); + uint64_t thread_id = thread_tid(thread); + *retval = thread_id; + return KERN_SUCCESS; +} + +void +pthread_init(void) +{ + pthread_lck_grp_attr = lck_grp_attr_alloc_init(); + pthread_lck_grp = lck_grp_alloc_init("pthread", pthread_lck_grp_attr); + + /* + * allocate the lock attribute for pthread synchronizers + */ + pthread_lck_attr = lck_attr_alloc_init(); + + workqueue_init_lock((proc_t) get_bsdtask_info(kernel_task)); +#if PSYNCH + pthread_list_mlock = lck_mtx_alloc_init(pthread_lck_grp, pthread_lck_attr); + + pth_global_hashinit(); + psynch_thcall = thread_call_allocate(psynch_wq_cleanup, NULL); +#endif /* PSYNCH */ +} diff --git a/bsd/kern/qsort.c b/bsd/kern/qsort.c index 8910df65e..70ded5774 100644 --- a/bsd/kern/qsort.c +++ b/bsd/kern/qsort.c @@ -65,7 +65,10 @@ #include //#include -#include /* qsort() */ + +__private_extern__ +void +qsort(void *a, size_t n, size_t es, int (*cmp)(const void *, const void *)); static inline char *med3(char *, char *, char *, int (*)(const void *, const void *)); static inline void swapfunc(char *, char *, int, int); diff --git a/bsd/kern/socket_info.c b/bsd/kern/socket_info.c index e6e41f92b..9d489e122 100644 --- a/bsd/kern/socket_info.c +++ b/bsd/kern/socket_info.c @@ -59,7 +59,7 @@ fill_sockbuf_info(struct sockbuf *sb, struct sockbuf_info *sbi) sbi->sbi_mbmax = sb->sb_mbmax; sbi->sbi_lowat = sb->sb_lowat; sbi->sbi_flags = sb->sb_flags; - sbi->sbi_timeo = (u_long)(sb->sb_timeo.tv_sec * hz) + sb->sb_timeo.tv_usec / tick; + sbi->sbi_timeo = (u_int32_t)(sb->sb_timeo.tv_sec * hz) + sb->sb_timeo.tv_usec / tick; if (sbi->sbi_timeo == 0 && sb->sb_timeo.tv_usec != 0) sbi->sbi_timeo = 1; } @@ -132,7 +132,7 @@ fill_socketinfo(struct socket *so, struct socket_info *si) insi->insi_v4.in4_tos = inp->inp_depend4.inp4_ip_tos; insi->insi_v6.in6_hlim = inp->inp_depend6.inp6_hlim; insi->insi_v6.in6_cksum = inp->inp_depend6.inp6_cksum; - insi->insi_v6.in6_ifindex = inp->inp6_ifindex; + insi->insi_v6.in6_ifindex = inp->inp_depend6.inp6_ifindex; insi->insi_v6.in6_hops = inp->inp_depend6.inp6_hops; if (type == SOCK_STREAM && (protocol == 0 || protocol == IPPROTO_TCP) && inp->inp_ppcb != 0) { diff --git a/bsd/kern/subr_log.c b/bsd/kern/subr_log.c index def0a5aac..d39eccd5d 100644 --- a/bsd/kern/subr_log.c +++ b/bsd/kern/subr_log.c @@ -448,11 +448,12 @@ SYSCTL_LONG(_kern, OID_AUTO, msgbuf, CTLFLAG_RD, &temp_msgbuf.msg_size, ""); * It returns as much data still in the buffer as possible. */ int -log_dmesg(user_addr_t buffer, uint32_t buffersize, register_t * retval) { - unsigned long i; +log_dmesg(user_addr_t buffer, uint32_t buffersize, int32_t * retval) { + uint32_t i; + uint32_t localbuff_size = (msgbufp->msg_size + 2); int error = 0, newl, skip; char *localbuff, *p, *copystart, ch; - long localbuff_size = msgbufp->msg_size+2, copysize; + long copysize; if (!(localbuff = (char *)kalloc(localbuff_size))) { printf("log_dmesg: unable to allocate memory\n"); diff --git a/bsd/kern/subr_prf.c b/bsd/kern/subr_prf.c index 4a1860496..45bddb431 100644 --- a/bsd/kern/subr_prf.c +++ b/bsd/kern/subr_prf.c @@ -127,7 +127,7 @@ void (*v_putc)(char) = cnputc; /* routine to putc on virtual console */ extern struct tty cons; /* standard console tty */ extern struct tty *constty; /* pointer to console "window" tty */ extern int __doprnt(const char *fmt, - va_list *argp, + va_list argp, void (*)(int, void *), void *arg, int radix); @@ -135,11 +135,7 @@ extern int __doprnt(const char *fmt, /* * Record cpu that panic'd and lock around panic data */ -static void printn(u_long n, int b, int flags, struct tty *ttyp, int zf, int fld_size); - -#if NCPUS > 1 -boolean_t new_printf_cpu_number; /* do we need to output who we are */ -#endif +static void printn(uint32_t n, int b, int flags, struct tty *ttyp, int zf, int fld_size); extern void logwakeup(void); extern void halt_cpu(void); @@ -165,22 +161,23 @@ uprintf(const char *fmt, ...) struct proc *p = current_proc(); struct putchar_args pca; va_list ap; - struct session * sessp; - boolean_t fstate; + struct session *sessp; sessp = proc_session(p); - fstate = thread_funnel_set(kernel_flock, TRUE); - pca.flags = TOTTY; - pca.tty = (struct tty *)sessp->s_ttyp; - - if (p->p_flag & P_CONTROLT && sessp->s_ttyvp) { + if (p->p_flag & P_CONTROLT && sessp != SESSION_NULL && sessp->s_ttyvp) { + pca.flags = TOTTY; + pca.tty = SESSION_TP(sessp); + if (pca.tty != NULL) + tty_lock(pca.tty); va_start(ap, fmt); - __doprnt(fmt, &ap, putchar, &pca, 10); + __doprnt(fmt, ap, putchar, &pca, 10); va_end(ap); + if (pca.tty != NULL) + tty_unlock(pca.tty); } - (void) thread_funnel_set(kernel_flock, fstate); - session_rele(sessp); + if (sessp != SESSION_NULL) + session_rele(sessp); } tpr_t @@ -209,33 +206,41 @@ tprintf_close(tpr_t sessp) /* * tprintf prints on the controlling terminal associated * with the given session. + * + * NOTE: No one else should call this function!!! */ void tprintf(tpr_t tpr, const char *fmt, ...) { struct session *sess = (struct session *)tpr; - struct tty *tp = NULL; + struct tty *tp = TTY_NULL; int flags = TOLOG; va_list ap; struct putchar_args pca; - boolean_t fstate; logpri(LOG_INFO); - /* to protect tty */ - fstate = thread_funnel_set(kernel_flock, TRUE); - if (sess && sess->s_ttyvp && ttycheckoutq(sess->s_ttyp, 0)) { - flags |= TOTTY; - tp = sess->s_ttyp; + if (sess && (tp = SESSION_TP(sess)) != TTY_NULL) { + /* ttycheckoutq(), tputchar() require a locked tp */ + tty_lock(tp); + if(ttycheckoutq(tp, 0)) { + flags |= TOTTY; + /* going to the tty; leave locked */ + } else { + /* not going to the tty... */ + tty_unlock(tp); + tp = TTY_NULL; + } } pca.flags = flags; pca.tty = tp; va_start(ap, fmt); - __doprnt(fmt, &ap, putchar, &pca, 10); + __doprnt(fmt, ap, putchar, &pca, 10); va_end(ap); - (void) thread_funnel_set(kernel_flock, fstate); + if (tp != NULL) + tty_unlock(tp); /* lock/unlock is guarded by tp, above */ logwakeup(); } @@ -244,26 +249,28 @@ tprintf(tpr_t tpr, const char *fmt, ...) * Ttyprintf displays a message on a tty; it should be used only by * the tty driver, or anything that knows the underlying tty will not * be revoke(2)'d away. Other callers should use tprintf. + * + * Locks: It is assumed that the tty_lock() is held over the call + * to this function. Ensuring this is the responsibility + * of the caller. */ void ttyprintf(struct tty *tp, const char *fmt, ...) { va_list ap; - boolean_t fstate; if (tp != NULL) { - fstate = thread_funnel_set(kernel_flock, TRUE); struct putchar_args pca; pca.flags = TOTTY; pca.tty = tp; va_start(ap, fmt); - __doprnt(fmt, &ap, putchar, &pca, 10); + __doprnt(fmt, ap, putchar, &pca, 10); va_end(ap); - (void) thread_funnel_set(kernel_flock, fstate); } } + extern int log_open; @@ -275,7 +282,7 @@ logpri(int level) pca.tty = NULL; putchar('<', &pca); - printn((u_long)level, 10, TOLOG, (struct tty *)0, 0, 0); + printn((uint32_t)level, 10, TOLOG, (struct tty *)0, 0, 0); putchar('>', &pca); } @@ -307,7 +314,7 @@ vaddlog(const char *fmt, va_list ap) } bsd_log_lock(); - __doprnt(fmt, &ap, putchar, &pca, 10); + __doprnt(fmt, ap, putchar, &pca, 10); bsd_log_unlock(); logwakeup(); @@ -319,50 +326,31 @@ _printf(int flags, struct tty *ttyp, const char *format, ...) { va_list ap; struct putchar_args pca; - boolean_t fstate; pca.flags = flags; pca.tty = ttyp; - fstate = thread_funnel_set(kernel_flock, TRUE); + + if (ttyp != NULL) { + tty_lock(ttyp); - va_start(ap, format); - __doprnt(format, &ap, putchar, &pca, 10); - va_end(ap); - (void) thread_funnel_set(kernel_flock, fstate); + va_start(ap, format); + __doprnt(format, ap, putchar, &pca, 10); + va_end(ap); + + tty_unlock(ttyp); + } } -int prf(const char *fmt, va_list ap, int flags, struct tty *ttyp) +int +prf(const char *fmt, va_list ap, int flags, struct tty *ttyp) { struct putchar_args pca; pca.flags = flags; pca.tty = ttyp; -#if NCPUS > 1 - int cpun = cpu_number(); + __doprnt(fmt, ap, putchar, &pca, 10); - if(ttyp == 0) { - } else - TTY_LOCK(ttyp); - - if (cpun != master_cpu) - new_printf_cpu_number = TRUE; - - if (new_printf_cpu_number) { - putchar('{', flags, ttyp); - printn((u_long)cpun, 10, flags, ttyp, 0, 0); - putchar('}', flags, ttyp); - } -#endif /* NCPUS > 1 */ - - __doprnt(fmt, &ap, putchar, &pca, 10); - -#if NCPUS > 1 - if(ttyp == 0) { - } else - TTY_UNLOCK(ttyp); -#endif - return 0; } @@ -370,7 +358,8 @@ int prf(const char *fmt, va_list ap, int flags, struct tty *ttyp) * Printn prints a number n in base b. * We don't use recursion to avoid deep kernel stacks. */ -static void printn(u_long n, int b, int flags, struct tty *ttyp, int zf, int fld_size) +static void +printn(uint32_t n, int b, int flags, struct tty *ttyp, int zf, int fld_size) { char prbuf[11]; char *cp; @@ -414,6 +403,9 @@ void tablefull(const char *tab) * Print a character on console or users terminal. * If destination is console then the last MSGBUFS characters * are saved in msgbuf for inspection later. + * + * Locks: If TOTTY is set, we assume that the tty lock is held + * over the call to this function. */ /*ARGSUSED*/ void @@ -450,7 +442,7 @@ vprintf(const char *fmt, va_list ap) pca.flags = TOLOG | TOCONS; pca.tty = NULL; - __doprnt(fmt, &ap, putchar, &pca, 10); + __doprnt(fmt, ap, putchar, &pca, 10); return 0; } @@ -469,7 +461,7 @@ vsprintf(char *buf, const char *cfmt, va_list ap) info.str = buf; info.remain = 999999; - retval = __doprnt(cfmt, &ap, snprintf_func, &info, 10); + retval = __doprnt(cfmt, ap, snprintf_func, &info, 10); if (info.remain >= 1) { *info.str++ = '\0'; } @@ -502,7 +494,7 @@ vsnprintf(char *str, size_t size, const char *format, va_list ap) info.str = str; info.remain = size; - retval = __doprnt(format, &ap, snprintf_func, &info, 10); + retval = __doprnt(format, ap, snprintf_func, &info, 10); if (info.remain >= 1) *info.str++ = '\0'; return retval; @@ -522,7 +514,7 @@ snprintf_func(int ch, void *arg) int kvprintf(char const *fmt, void (*func)(int, void*), void *arg, int radix, va_list ap) { - __doprnt(fmt, &ap, func, arg, radix); + __doprnt(fmt, ap, func, arg, radix); return 0; } diff --git a/bsd/kern/subr_prof.c b/bsd/kern/subr_prof.c index cff03ee0a..5b1024141 100644 --- a/bsd/kern/subr_prof.c +++ b/bsd/kern/subr_prof.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2006 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2008 Apple Computer, Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -62,7 +62,7 @@ */ #ifdef GPROF -#include +#include #endif #include @@ -106,13 +106,13 @@ struct gmonparam _gmonparam = { .state = GMON_PROF_OFF }; void kmstartup(void) { - char *cp; - struct segment_command *sgp; /* 32 bit mach object file segment */ + tostruct_t *cp; + kernel_segment_command_t *sgp; /* 32 bit mach object file segment */ struct gmonparam *p = &_gmonparam; sgp = getsegbyname("__TEXT"); - p->lowpc = (u_long)sgp->vmaddr; - p->highpc = (u_long)(sgp->vmaddr + sgp->vmsize); + p->lowpc = (u_int32_t)sgp->vmaddr; + p->highpc = (u_int32_t)(sgp->vmaddr + sgp->vmsize); /* * Round lowpc and highpc to multiples of the density we're using @@ -131,18 +131,18 @@ kmstartup(void) p->tolimit = MINARCS; else if (p->tolimit > MAXARCS) p->tolimit = MAXARCS; - p->tossize = p->tolimit * sizeof(struct tostruct); + p->tossize = p->tolimit * sizeof(tostruct_t); /* Why not use MALLOC with M_GPROF ? */ - cp = (char *)kalloc(p->kcountsize + p->fromssize + p->tossize); + cp = (tostruct_t *)kalloc(p->kcountsize + p->fromssize + p->tossize); if (cp == 0) { printf("No memory for profiling.\n"); return; } bzero(cp, p->kcountsize + p->tossize + p->fromssize); - p->tos = (struct tostruct *)cp; - cp += p->tossize; + p->tos = cp; + cp = (tostruct_t *)((vm_offset_t)cp + p->tossize); p->kcount = (u_short *)cp; - cp += p->kcountsize; + cp = (tostruct_t *)((vm_offset_t)cp + p->kcountsize); p->froms = (u_short *)cp; mcount_lock_grp = lck_grp_alloc_init("MCOUNT", LCK_GRP_ATTR_NULL); @@ -198,12 +198,12 @@ sysctl_doprof(int *name, u_int namelen, user_addr_t oldp, size_t *oldlenp, */ void mcount( - u_long frompc, - u_long selfpc + uintptr_t frompc, + uintptr_t selfpc ) { unsigned short *frompcindex; - struct tostruct *top, *prevtop; + tostruct_t *top, *prevtop; struct gmonparam *p = &_gmonparam; long toindex; @@ -312,178 +312,29 @@ mcount( #define PROFILE_LOCK(x) #define PROFILE_UNLOCK(x) -static int profil_funneled(struct proc *p, struct profil_args *uap, register_t *retval); -static int add_profil_funneled(struct proc *p, struct add_profil_args *uap, register_t *retval); - int -profil(struct proc *p, struct profil_args *uap, register_t *retval) -{ - boolean_t funnel_state; - int error; - - funnel_state = thread_funnel_set(kernel_flock, TRUE); - error = profil_funneled(p, uap, retval); - thread_funnel_set(kernel_flock, funnel_state); - return(error); -} - -static int -profil_funneled(struct proc *p, struct profil_args *uap, __unused register_t *retval) +profil(struct proc *p, struct profil_args *uap, int32_t *retval) { - struct uprof *upp = &p->p_stats->p_prof; - int s; - - if (uap->pcscale > (1 << 16)) - return (EINVAL); - - if (uap->pcscale == 0) { - stopprofclock(p); - return (0); - } - /* - * Block profile interrupts while changing state. - */ - s = ml_set_interrupts_enabled(FALSE); - - if (proc_is64bit(p)) { - struct user_uprof *user_upp = &p->p_stats->user_p_prof; - struct user_uprof *upc, *nupc; - - PROFILE_LOCK(&user_upp->pr_lock); - - user_upp->pr_base = uap->bufbase; - user_upp->pr_size = uap->bufsize; - user_upp->pr_off = uap->pcoffset; - user_upp->pr_scale = uap->pcscale; - upp->pr_base = NULL; - upp->pr_size = 0; - upp->pr_scale = 0; - - /* - * remove buffers previously allocated with add_profil() - * don't do the kfree's while interrupts disabled - */ - upc = user_upp->pr_next; - user_upp->pr_next = 0; - - PROFILE_UNLOCK(&user_upp->pr_lock); - - startprofclock(p); - ml_set_interrupts_enabled(s); - - while (upc) { - nupc = upc->pr_next; - kfree(upc, sizeof (*upc)); - upc = nupc; - } - - } else { - struct uprof *upc, *nupc; - - PROFILE_LOCK(&upp->pr_lock); + void *tmp; - upp->pr_base = CAST_DOWN(caddr_t, uap->bufbase); - upp->pr_size = uap->bufsize; - upp->pr_off = uap->pcoffset; - upp->pr_scale = uap->pcscale; + tmp = p; + tmp = uap; + tmp = retval; - /* - * remove buffers previously allocated with add_profil() - * don't do the kfree's while interrupts disabled - */ - upc = upp->pr_next; - upp->pr_next = 0; - - PROFILE_UNLOCK(&upp->pr_lock); - - startprofclock(p); - ml_set_interrupts_enabled(s); - - while (upc) { - nupc = upc->pr_next; - kfree(upc, sizeof (struct uprof)); - upc = nupc; - } - } - return(0); + return EINVAL; } int -add_profil(struct proc *p, struct add_profil_args *uap, register_t *retval) -{ - boolean_t funnel_state; - int error; - - funnel_state = thread_funnel_set(kernel_flock, TRUE); - error = add_profil_funneled(p, uap, retval); - thread_funnel_set(kernel_flock, funnel_state); - return(error); -} - - -static int -add_profil_funneled(struct proc *p, struct add_profil_args *uap, __unused register_t *retval) +add_profil(struct proc *p, struct add_profil_args *uap, int32_t *retval) { - struct uprof *upp = &p->p_stats->p_prof, *upc; - struct user_uprof *user_upp = NULL, *user_upc; - int s; - boolean_t is64bit = proc_is64bit(p); - - - upc = NULL; - user_upc = NULL; - - if (is64bit) { - user_upp = &p->p_stats->user_p_prof; - - if (user_upp->pr_scale == 0) - return (0); - } - else { - if (upp->pr_scale == 0) - return (0); - } - if (is64bit) { - user_upc = (struct user_uprof *) kalloc(sizeof (struct user_uprof)); - user_upc->pr_base = uap->bufbase; - user_upc->pr_size = uap->bufsize; - user_upc->pr_off = uap->pcoffset; - user_upc->pr_scale = uap->pcscale; - } else { - upc = (struct uprof *) kalloc(sizeof (struct uprof)); - upc->pr_base = CAST_DOWN(caddr_t, uap->bufbase); - upc->pr_size = uap->bufsize; - upc->pr_off = uap->pcoffset; - upc->pr_scale = uap->pcscale; - } - s = ml_set_interrupts_enabled(FALSE); - - if (is64bit) { - PROFILE_LOCK(&user_upp->pr_lock); - if (user_upp->pr_scale) { - user_upc->pr_next = user_upp->pr_next; - user_upp->pr_next = user_upc; - user_upc = NULL; - } - PROFILE_UNLOCK(&user_upp->pr_lock); - } else { - PROFILE_LOCK(&upp->pr_lock); - if (upp->pr_scale) { - upc->pr_next = upp->pr_next; - upp->pr_next = upc; - upc = NULL; - } - PROFILE_UNLOCK(&upp->pr_lock); - } - ml_set_interrupts_enabled(s); + void *tmp; - if (upc) - kfree(upc, sizeof(struct uprof)); - if (user_upc) - kfree(user_upc, sizeof(struct user_uprof)); + tmp = p; + tmp = uap; + tmp = retval; - return(0); + return EINVAL; } /* @@ -491,8 +342,9 @@ add_profil_funneled(struct proc *p, struct add_profil_args *uap, __unused regist * into the value, and is <= 1.0. pc is at most 32 bits, so the * intermediate result is at most 48 bits. */ +//K64todo - this doesn't fit into 64 bit any more, it needs 64+16 #define PC_TO_INDEX(pc, prof) \ - ((int)(((u_quad_t)((pc) - (prof)->pr_off) * \ + ((user_addr_t)(((u_quad_t)((pc) - (prof)->pr_off) * \ (u_quad_t)((prof)->pr_scale)) >> 16) & ~1) /* @@ -512,7 +364,7 @@ add_profil_funneled(struct proc *p, struct add_profil_args *uap, __unused regist void addupc_task(struct proc *p, user_addr_t pc, u_int ticks) { - u_int off; + user_addr_t off; u_short count; /* Testing P_PROFIL may be unnecessary, but is certainly safe. */ @@ -544,10 +396,10 @@ addupc_task(struct proc *p, user_addr_t pc, u_int ticks) short *cell; for (prof = &p->p_stats->p_prof; prof; prof = prof->pr_next) { - off = PC_TO_INDEX(CAST_DOWN(uint, pc),prof); + off = PC_TO_INDEX(pc,prof); cell = (short *)(prof->pr_base + off); if (cell >= (short *)prof->pr_base && - cell < (short*)(prof->pr_size + (int) prof->pr_base)) { + cell < (short*)(prof->pr_size + prof->pr_base)) { if (copyin(CAST_USER_ADDR_T(cell), (caddr_t) &count, sizeof(count)) == 0) { count += ticks; if(copyout((caddr_t) &count, CAST_USER_ADDR_T(cell), sizeof(count)) == 0) diff --git a/bsd/kern/subr_sbuf.c b/bsd/kern/subr_sbuf.c index e966704b8..a3a89c096 100644 --- a/bsd/kern/subr_sbuf.c +++ b/bsd/kern/subr_sbuf.c @@ -208,12 +208,12 @@ sbuf_uionew(struct sbuf *s, struct uio *uio, int *error) KASSERT(error != NULL, ("%s called with NULL error pointer", __func__)); - s = sbuf_new(s, NULL, uio->uio_resid + 1, 0); + s = sbuf_new(s, NULL, uio_resid(uio) + 1, 0); if (s == NULL) { *error = ENOMEM; return (NULL); } - *error = uiomove(s->s_buf, uio->uio_resid, uio); + *error = uiomove(s->s_buf, uio_resid(uio), uio); if (*error != 0) { sbuf_delete(s); return (NULL); diff --git a/bsd/kern/subr_xxx.c b/bsd/kern/subr_xxx.c index cc73e7389..8879e5d87 100644 --- a/bsd/kern/subr_xxx.c +++ b/bsd/kern/subr_xxx.c @@ -178,7 +178,7 @@ nullsys(void) */ /* ARGSUSED */ int -nosys(struct proc *p, __unused struct nosys_args *args, __unused register_t *retval) +nosys(struct proc *p, __unused struct nosys_args *args, __unused int32_t *retval) { psignal(p, SIGSYS); return (ENOSYS); diff --git a/bsd/kern/sys_generic.c b/bsd/kern/sys_generic.c index 509468087..3da6c6d55 100644 --- a/bsd/kern/sys_generic.c +++ b/bsd/kern/sys_generic.c @@ -108,7 +108,7 @@ #include #include -#include +#include #include #include @@ -187,11 +187,15 @@ read_nocancel(struct proc *p, struct read_nocancel_args *uap, user_ssize_t *retv struct fileproc *fp; int error; int fd = uap->fd; + struct vfs_context context; if ( (error = preparefileread(p, &fp, fd, 0)) ) return (error); - error = dofileread(vfs_context_current(), fp, uap->cbuf, uap->nbyte, + context = *(vfs_context_current()); + context.vc_ucred = fp->f_fglob->fg_cred; + + error = dofileread(&context, fp, uap->cbuf, uap->nbyte, (off_t)-1, 0, retval); donefileread(p, fp, fd); @@ -222,11 +226,15 @@ pread_nocancel(struct proc *p, struct pread_nocancel_args *uap, user_ssize_t *re struct fileproc *fp = NULL; /* fp set by preparefileread() */ int fd = uap->fd; int error; + struct vfs_context context; if ( (error = preparefileread(p, &fp, fd, 1)) ) goto out; - error = dofileread(vfs_context_current(), fp, uap->buf, uap->nbyte, + context = *(vfs_context_current()); + context.vc_ucred = fp->f_fglob->fg_cred; + + error = dofileread(&context, fp, uap->buf, uap->nbyte, uap->offset, FOF_OFFSET, retval); donefileread(p, fp, fd); @@ -269,6 +277,8 @@ preparefileread(struct proc *p, struct fileproc **fp_ret, int fd, int check_for_ int error; struct fileproc *fp; + AUDIT_ARG(fd, fd); + proc_fdlock_spin(p); error = fp_lookup(p, fd, &fp, 1); @@ -327,7 +337,6 @@ dofileread(vfs_context_t ctx, struct fileproc *fp, long error = 0; char uio_buf[ UIO_SIZEOF(1) ]; - // LP64todo - do we want to raise this? if (nbyte > INT_MAX) return (EINVAL); @@ -375,7 +384,6 @@ readv_nocancel(struct proc *p, struct readv_nocancel_args *uap, user_ssize_t *re { uio_t auio = NULL; int error; - int size_of_iovec; struct user_iovec *iovp; /* Verify range bedfore calling uio_create() */ @@ -395,8 +403,9 @@ readv_nocancel(struct proc *p, struct readv_nocancel_args *uap, user_ssize_t *re error = ENOMEM; goto ExitThisRoutine; } - size_of_iovec = (IS_64BIT_PROCESS(p) ? sizeof(struct user_iovec) : sizeof(struct iovec)); - error = copyin(uap->iovp, (caddr_t)iovp, (uap->iovcnt * size_of_iovec)); + error = copyin_user_iovec_array(uap->iovp, + IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32, + uap->iovcnt, iovp); if (error) { goto ExitThisRoutine; } @@ -436,6 +445,8 @@ write_nocancel(struct proc *p, struct write_nocancel_args *uap, user_ssize_t *re int error; int fd = uap->fd; + AUDIT_ARG(fd, fd); + error = fp_lookup(p,fd,&fp,0); if (error) return(error); @@ -481,6 +492,8 @@ pwrite_nocancel(struct proc *p, struct pwrite_nocancel_args *uap, user_ssize_t * int fd = uap->fd; vnode_t vp = (vnode_t)0; + AUDIT_ARG(fd, fd); + error = fp_lookup(p,fd,&fp,0); if (error) return(error); @@ -541,7 +554,6 @@ dofilewrite(vfs_context_t ctx, struct fileproc *fp, user_ssize_t bytecnt; char uio_buf[ UIO_SIZEOF(1) ]; - // LP64todo - do we want to raise this? if (nbyte > INT_MAX) return (EINVAL); @@ -586,9 +598,10 @@ writev_nocancel(struct proc *p, struct writev_nocancel_args *uap, user_ssize_t * { uio_t auio = NULL; int error; - int size_of_iovec; struct user_iovec *iovp; + AUDIT_ARG(fd, uap->fd); + /* Verify range bedfore calling uio_create() */ if (uap->iovcnt <= 0 || uap->iovcnt > UIO_MAXIOV) return (EINVAL); @@ -606,8 +619,9 @@ writev_nocancel(struct proc *p, struct writev_nocancel_args *uap, user_ssize_t * error = ENOMEM; goto ExitThisRoutine; } - size_of_iovec = (IS_64BIT_PROCESS(p) ? sizeof(struct user_iovec) : sizeof(struct iovec)); - error = copyin(uap->iovp, (caddr_t)iovp, (uap->iovcnt * size_of_iovec)); + error = copyin_user_iovec_array(uap->iovp, + IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32, + uap->iovcnt, iovp); if (error) { goto ExitThisRoutine; } @@ -707,7 +721,7 @@ rd_uio(struct proc *p, int fdes, uio_t uio, user_ssize_t *retval) * fo_ioctl:??? */ int -ioctl(struct proc *p, struct ioctl_args *uap, __unused register_t *retval) +ioctl(struct proc *p, struct ioctl_args *uap, __unused int32_t *retval) { struct fileproc *fp; u_long com; @@ -722,10 +736,15 @@ ioctl(struct proc *p, struct ioctl_args *uap, __unused register_t *retval) struct vfs_context context = *vfs_context_current(); AUDIT_ARG(fd, uap->fd); - AUDIT_ARG(cmd, CAST_DOWN(int, uap->com)); /* LP64todo: uap->com is a user-land long */ AUDIT_ARG(addr, uap->data); is64bit = proc_is64bit(p); +#if CONFIG_AUDIT + if (is64bit) + AUDIT_ARG(value64, uap->com); + else + AUDIT_ARG(cmd, CAST_DOWN_EXPLICIT(int, uap->com)); +#endif /* CONFIG_AUDIT */ proc_fdlock(p); error = fp_lookup(p,fd,&fp,1); @@ -919,11 +938,10 @@ int selwait, nselcoll; extern int selcontinue(int error); extern int selprocess(int error, int sel_pass); static int selscan(struct proc *p, struct _select * sel, - int nfd, register_t *retval, int sel_pass, wait_queue_sub_t wqsub); + int nfd, int32_t *retval, int sel_pass, wait_queue_sub_t wqsub); static int selcount(struct proc *p, u_int32_t *ibits, u_int32_t *obits, int nfd, int * count, int *kfcount); static int seldrop(struct proc *p, u_int32_t *ibits, int nfd); -extern uint64_t tvtoabstime(struct timeval *tvp); /* * Select system call. @@ -934,14 +952,14 @@ extern uint64_t tvtoabstime(struct timeval *tvp); * selprocess:??? */ int -select(struct proc *p, struct select_args *uap, register_t *retval) +select(struct proc *p, struct select_args *uap, int32_t *retval) { __pthread_testcancel(1); return(select_nocancel(p, (struct select_nocancel_args *)uap, retval)); } int -select_nocancel(struct proc *p, struct select_nocancel_args *uap, register_t *retval) +select_nocancel(struct proc *p, struct select_nocancel_args *uap, int32_t *retval) { int error = 0; u_int ni, nw, size; @@ -1030,13 +1048,16 @@ select_nocancel(struct proc *p, struct select_nocancel_args *uap, register_t *re if (uap->tv) { struct timeval atv; if (IS_64BIT_PROCESS(p)) { - struct user_timeval atv64; + struct user64_timeval atv64; error = copyin(uap->tv, (caddr_t)&atv64, sizeof(atv64)); /* Loses resolution - assume timeout < 68 years */ atv.tv_sec = atv64.tv_sec; atv.tv_usec = atv64.tv_usec; } else { - error = copyin(uap->tv, (caddr_t)&atv, sizeof(atv)); + struct user32_timeval atv32; + error = copyin(uap->tv, (caddr_t)&atv32, sizeof(atv32)); + atv.tv_sec = atv32.tv_sec; + atv.tv_usec = atv32.tv_usec; } if (error) goto continuation; @@ -1055,6 +1076,7 @@ select_nocancel(struct proc *p, struct select_nocancel_args *uap, register_t *re if ( (error = selcount(p, sel->ibits, sel->obits, uap->nd, &count, &kfcount)) ) { goto continuation; } + sel->count = count; sel->kfcount = kfcount; size = SIZEOF_WAITQUEUE_SET + (count * SIZEOF_WAITQUEUE_LINK); @@ -1125,7 +1147,7 @@ selprocess(int error, int sel_pass) } ncoll = nselcoll; - OSBitOrAtomic(P_SELECT, (UInt32 *)&p->p_flag); + OSBitOrAtomic(P_SELECT, &p->p_flag); /* skip scans if the select is just for timeouts */ if (sel->count) { if (sel_pass == SEL_FIRSTPASS) @@ -1177,7 +1199,7 @@ selprocess(int error, int sel_pass) goto retry; } - OSBitAndAtomic(~((uint32_t)P_SELECT), (UInt32 *)&p->p_flag); + OSBitAndAtomic(~((uint32_t)P_SELECT), &p->p_flag); /* if the select is just for timeout skip check */ if (sel->count &&(sel_pass == SEL_SECONDPASS)) @@ -1185,7 +1207,7 @@ selprocess(int error, int sel_pass) /* Wait Queue Subordinate has waitqueue as first element */ wait_result = wait_queue_assert_wait((wait_queue_t)uth->uu_wqset, - &selwait, THREAD_ABORTSAFE, sel->abstime); + NULL, THREAD_ABORTSAFE, sel->abstime); if (wait_result != THREAD_AWAKENED) { /* there are no preposted events */ error = tsleep1(NULL, PSOCK | PCATCH, @@ -1206,7 +1228,7 @@ selprocess(int error, int sel_pass) wait_subqueue_unlink_all(uth->uu_wqset); seldrop(p, sel->ibits, uap->nd); } - OSBitAndAtomic(~((uint32_t)P_SELECT), (UInt32 *)&p->p_flag); + OSBitAndAtomic(~((uint32_t)P_SELECT), &p->p_flag); /* select is not restarted after signals... */ if (error == ERESTART) error = EINTR; @@ -1234,7 +1256,7 @@ selprocess(int error, int sel_pass) } static int -selscan(struct proc *p, struct _select *sel, int nfd, register_t *retval, +selscan(struct proc *p, struct _select *sel, int nfd, int32_t *retval, int sel_pass, wait_queue_sub_t wqsub) { struct filedesc *fdp = p->p_fd; @@ -1250,7 +1272,6 @@ selscan(struct proc *p, struct _select *sel, int nfd, register_t *retval, char * wql; char * wql_ptr; int count, kfcount; - boolean_t funnel_state; vnode_t vp; struct vfs_context context = *vfs_context_current(); @@ -1275,8 +1296,6 @@ selscan(struct proc *p, struct _select *sel, int nfd, register_t *retval, panic("selscan: count < kfcount"); if (kfcount != 0) { - funnel_state = thread_funnel_set(kernel_flock, TRUE); - proc_fdlock(p); for (msk = 0; msk < 3; msk++) { iptr = (u_int32_t *)&ibits[msk * nw]; @@ -1292,7 +1311,6 @@ selscan(struct proc *p, struct _select *sel, int nfd, register_t *retval, if (fp == NULL || (fdp->fd_ofileflags[fd] & UF_RESERVED)) { proc_fdunlock(p); - thread_funnel_set(kernel_flock, funnel_state); return(EBADF); } if (sel_pass == SEL_SECONDPASS) { @@ -1319,7 +1337,6 @@ selscan(struct proc *p, struct _select *sel, int nfd, register_t *retval, } } proc_fdunlock(p); - thread_funnel_set(kernel_flock, funnel_state); } nc = 0; @@ -1372,7 +1389,7 @@ selscan(struct proc *p, struct _select *sel, int nfd, register_t *retval, return (0); } -int poll_callback(struct kqueue *, struct kevent *, void *); +int poll_callback(struct kqueue *, struct kevent64_s *, void *); struct poll_continue_args { user_addr_t pca_fds; @@ -1381,7 +1398,7 @@ struct poll_continue_args { }; int -poll(struct proc *p, struct poll_args *uap, register_t *retval) +poll(struct proc *p, struct poll_args *uap, int32_t *retval) { __pthread_testcancel(1); return(poll_nocancel(p, (struct poll_nocancel_args *)uap, retval)); @@ -1389,7 +1406,7 @@ poll(struct proc *p, struct poll_args *uap, register_t *retval) int -poll_nocancel(struct proc *p, struct poll_nocancel_args *uap, register_t *retval) +poll_nocancel(struct proc *p, struct poll_nocancel_args *uap, int32_t *retval) { struct poll_continue_args *cont; struct pollfd *fds; @@ -1446,10 +1463,10 @@ poll_nocancel(struct proc *p, struct poll_nocancel_args *uap, register_t *retval /* JMM - all this P_SELECT stuff is bogus */ ncoll = nselcoll; - OSBitOrAtomic(P_SELECT, (UInt32 *)&p->p_flag); + OSBitOrAtomic(P_SELECT, &p->p_flag); for (i = 0; i < nfds; i++) { short events = fds[i].events; - struct kevent kev; + struct kevent64_s kev; int kerror = 0; /* per spec, ignore fd values below zero */ @@ -1464,6 +1481,8 @@ poll_nocancel(struct proc *p, struct poll_nocancel_args *uap, register_t *retval kev.fflags = NOTE_LOWAT; kev.data = 1; /* efficiency be damned: any data should trigger */ kev.udata = CAST_USER_ADDR_T(&fds[i]); + kev.ext[0] = 0; + kev.ext[1] = 0; /* Handle input events */ if (events & ( POLLIN | POLLRDNORM | POLLPRI | POLLRDBAND | POLLHUP )) { @@ -1511,11 +1530,11 @@ poll_nocancel(struct proc *p, struct poll_nocancel_args *uap, register_t *retval cont->pca_fds = uap->fds; cont->pca_nfds = nfds; cont->pca_rfds = rfds; - error = kevent_scan(kq, poll_callback, NULL, cont, &atv, p); + error = kqueue_scan(kq, poll_callback, NULL, cont, &atv, p); rfds = cont->pca_rfds; done: - OSBitAndAtomic(~((uint32_t)P_SELECT), (UInt32 *)&p->p_flag); + OSBitAndAtomic(~((uint32_t)P_SELECT), &p->p_flag); /* poll is not restarted after signals... */ if (error == ERESTART) error = EINTR; @@ -1535,7 +1554,7 @@ poll_nocancel(struct proc *p, struct poll_nocancel_args *uap, register_t *retval } int -poll_callback(__unused struct kqueue *kq, struct kevent *kevp, void *data) +poll_callback(__unused struct kqueue *kq, struct kevent64_s *kevp, void *data) { struct poll_continue_args *cont = (struct poll_continue_args *)data; struct pollfd *fds = CAST_DOWN(struct pollfd *, kevp->udata); @@ -1789,7 +1808,7 @@ selwakeup(struct selinfo *sip) } if (sip->si_flags & SI_RECORDED) { - wait_queue_wakeup_all(&sip->si_wait_queue, &selwait, THREAD_AWAKENED); + wait_queue_wakeup_all(&sip->si_wait_queue, NULL, THREAD_AWAKENED); sip->si_flags &= ~SI_RECORDED; } @@ -1807,7 +1826,7 @@ selthreadclear(struct selinfo *sip) sip->si_flags &= ~(SI_RECORDED | SI_COLL); } sip->si_flags |= SI_CLEAR; - wait_queue_unlinkall_nofree(&sip->si_wait_queue); + wait_queue_unlink_all(&sip->si_wait_queue); } @@ -2350,7 +2369,18 @@ waitevent(proc_t p, struct waitevent_args *uap, int *retval) proc_lock(p); goto retry; } - error = copyin(uap->tv, (caddr_t)&atv, sizeof (atv)); + if (IS_64BIT_PROCESS(p)) { + struct user64_timeval atv64; + error = copyin(uap->tv, (caddr_t)&atv64, sizeof(atv64)); + /* Loses resolution - assume timeout < 68 years */ + atv.tv_sec = atv64.tv_sec; + atv.tv_usec = atv64.tv_usec; + } else { + struct user32_timeval atv32; + error = copyin(uap->tv, (caddr_t)&atv32, sizeof(atv32)); + atv.tv_sec = atv32.tv_sec; + atv.tv_usec = atv32.tv_usec; + } if (error) return(error); @@ -2676,7 +2706,7 @@ waitevent_close(struct proc *p, struct fileproc *fp) * have a system UUID in hand, then why ask for one? */ int -gethostuuid(struct proc *p, struct gethostuuid_args *uap, __unused register_t *retval) +gethostuuid(struct proc *p, struct gethostuuid_args *uap, __unused int32_t *retval) { kern_return_t kret; int error; @@ -2685,14 +2715,14 @@ gethostuuid(struct proc *p, struct gethostuuid_args *uap, __unused register_t *r /* Convert the 32/64 bit timespec into a mach_timespec_t */ if ( proc_is64bit(p) ) { - struct user_timespec ts; + struct user64_timespec ts; error = copyin(uap->timeoutp, &ts, sizeof(ts)); if (error) return (error); mach_ts.tv_sec = ts.tv_sec; mach_ts.tv_nsec = ts.tv_nsec; } else { - struct timespec ts; + struct user32_timespec ts; error = copyin(uap->timeoutp, &ts, sizeof(ts) ); if (error) return (error); diff --git a/bsd/kern/sys_pipe.c b/bsd/kern/sys_pipe.c index 3190f00f1..f86ad5a11 100644 --- a/bsd/kern/sys_pipe.c +++ b/bsd/kern/sys_pipe.c @@ -119,7 +119,7 @@ #include #include -#include +#include #include @@ -158,7 +158,6 @@ #endif - /* * interfaces to the outside world */ @@ -179,6 +178,8 @@ static int pipe_kqfilter(struct fileproc *fp, struct knote *kn, static int pipe_ioctl(struct fileproc *fp, u_long cmd, caddr_t data, vfs_context_t ctx); +static int pipe_drain(struct fileproc *fp,vfs_context_t ctx); + struct fileops pipeops = { pipe_read, @@ -187,17 +188,23 @@ struct fileops pipeops = pipe_select, pipe_close, pipe_kqfilter, - NULL }; + pipe_drain }; static void filt_pipedetach(struct knote *kn); static int filt_piperead(struct knote *kn, long hint); static int filt_pipewrite(struct knote *kn, long hint); -static struct filterops pipe_rfiltops = - { 1, NULL, filt_pipedetach, filt_piperead }; -static struct filterops pipe_wfiltops = - { 1, NULL, filt_pipedetach, filt_pipewrite }; +static struct filterops pipe_rfiltops = { + .f_isfd = 1, + .f_detach = filt_pipedetach, + .f_event = filt_piperead, +}; +static struct filterops pipe_wfiltops = { + .f_isfd = 1, + .f_detach = filt_pipedetach, + .f_event = filt_pipewrite, +}; /* * Default pipe buffer size(s), this can be kind-of large now because pipe @@ -319,7 +326,7 @@ pipe_touch(struct pipe *tpipe, int touch) /* ARGSUSED */ int -pipe(proc_t p, __unused struct pipe_args *uap, register_t *retval) +pipe(proc_t p, __unused struct pipe_args *uap, int32_t *retval) { struct fileproc *rf, *wf; struct pipe *rpipe, *wpipe; @@ -490,7 +497,7 @@ pipe_stat(struct pipe *cpipe, void *ub, int isstat64) * address of this pipe's struct pipe. This number may be recycled * relatively quickly. */ - sb64->st_ino = (ino64_t)((uint32_t)cpipe); + sb64->st_ino = (ino64_t)((uintptr_t)cpipe); } else { sb = (struct stat *)ub; @@ -517,7 +524,7 @@ pipe_stat(struct pipe *cpipe, void *ub, int isstat64) * address of this pipe's struct pipe. This number may be recycled * relatively quickly. */ - sb->st_ino = (ino_t)cpipe; + sb->st_ino = (ino_t)(uintptr_t)cpipe; } PIPE_UNLOCK(cpipe); @@ -557,8 +564,8 @@ pipespace(struct pipe *cpipe, int size) cpipe->pipe_buffer.out = 0; cpipe->pipe_buffer.cnt = 0; - OSAddAtomic(1, (SInt32 *)&amountpipes); - OSAddAtomic(cpipe->pipe_buffer.size, (SInt32 *)&amountpipekva); + OSAddAtomic(1, &amountpipes); + OSAddAtomic(cpipe->pipe_buffer.size, &amountpipekva); return (0); } @@ -734,8 +741,9 @@ pipe_read(struct fileproc *fp, struct uio *uio, __unused int flags, * detect EOF condition * read returns 0 on EOF, no need to set error */ - if (rpipe->pipe_state & PIPE_EOF) + if (rpipe->pipe_state & (PIPE_DRAIN | PIPE_EOF)) { break; + } /* * If the "write-side" has been blocked, wake it up now. @@ -975,7 +983,7 @@ pipe_direct_write(wpipe, uio) PRIBIO | PCATCH, "pipdww", 0); if (error) goto error1; - if (wpipe->pipe_state & PIPE_EOF) { + if (wpipe->pipe_state & (PIPE_DRAIN | PIPE_EOF)) { error = EPIPE; goto error1; } @@ -992,7 +1000,7 @@ pipe_direct_write(wpipe, uio) PRIBIO | PCATCH, "pipdwc", 0); if (error) goto error1; - if (wpipe->pipe_state & PIPE_EOF) { + if (wpipe->pipe_state & (PIPE_DRAIN | PIPE_EOF)) { error = EPIPE; goto error1; } @@ -1013,7 +1021,7 @@ pipe_direct_write(wpipe, uio) error = 0; while (!error && (wpipe->pipe_state & PIPE_DIRECTW)) { - if (wpipe->pipe_state & PIPE_EOF) { + if (wpipe->pipe_state & (PIPE_DRAIN | PIPE_EOF)) { pipelock(wpipe, 0); PIPE_UNLOCK(wpipe); pipe_destroy_write_buffer(wpipe); @@ -1072,7 +1080,7 @@ pipe_write(struct fileproc *fp, struct uio *uio, __unused int flags, /* * detect loss of pipe read side, issue SIGPIPE if lost. */ - if (wpipe == NULL || (wpipe->pipe_state & PIPE_EOF)) { + if (wpipe == NULL || (wpipe->pipe_state & (PIPE_DRAIN | PIPE_EOF))) { PIPE_UNLOCK(rpipe); return (EPIPE); } @@ -1125,7 +1133,7 @@ pipe_write(struct fileproc *fp, struct uio *uio, __unused int flags, if ((error = pipelock(wpipe, 1)) == 0) { PIPE_UNLOCK(wpipe); if (pipespace(wpipe, pipe_size) == 0) - OSAddAtomic(1, (SInt32 *)&nbigpipe); + OSAddAtomic(1, &nbigpipe); PIPE_LOCK(wpipe); pipeunlock(wpipe); @@ -1169,7 +1177,7 @@ pipe_write(struct fileproc *fp, struct uio *uio, __unused int flags, */ if ((uio->uio_iov->iov_len >= PIPE_MINDIRECT) && (fp->f_flag & FNONBLOCK) == 0 && - amountpipekvawired + uio->uio_resid < maxpipekvawired) { + amountpipekvawired + uio_resid(uio) < maxpipekvawired) { error = pipe_direct_write(wpipe, uio); if (error) break; @@ -1191,7 +1199,7 @@ pipe_write(struct fileproc *fp, struct uio *uio, __unused int flags, } error = msleep(wpipe, PIPE_MTX(wpipe), PRIBIO | PCATCH, "pipbww", 0); - if (wpipe->pipe_state & PIPE_EOF) + if (wpipe->pipe_state & (PIPE_DRAIN | PIPE_EOF)) break; if (error) break; @@ -1213,7 +1221,7 @@ pipe_write(struct fileproc *fp, struct uio *uio, __unused int flags, int size; /* Transfer size */ int segsize; /* first segment to transfer */ - if (wpipe->pipe_state & PIPE_EOF) { + if (wpipe->pipe_state & (PIPE_DRAIN | PIPE_EOF)) { pipeunlock(wpipe); error = EPIPE; break; @@ -1340,7 +1348,7 @@ pipe_write(struct fileproc *fp, struct uio *uio, __unused int flags, * If read side wants to go away, we just issue a signal * to ourselves. */ - if (wpipe->pipe_state & PIPE_EOF) { + if (wpipe->pipe_state & (PIPE_DRAIN | PIPE_EOF)) { error = EPIPE; break; } @@ -1472,7 +1480,7 @@ pipe_select(struct fileproc *fp, int which, void *wql, vfs_context_t ctx) case FREAD: if ((rpipe->pipe_state & PIPE_DIRECTW) || (rpipe->pipe_buffer.cnt > 0) || - (rpipe->pipe_state & PIPE_EOF)) { + (rpipe->pipe_state & (PIPE_DRAIN | PIPE_EOF))) { retnum = 1; } else { @@ -1482,7 +1490,7 @@ pipe_select(struct fileproc *fp, int which, void *wql, vfs_context_t ctx) break; case FWRITE: - if (wpipe == NULL || (wpipe->pipe_state & PIPE_EOF) || + if (wpipe == NULL || (wpipe->pipe_state & (PIPE_DRAIN | PIPE_EOF)) || (((wpipe->pipe_state & PIPE_DIRECTW) == 0) && (wpipe->pipe_buffer.size - wpipe->pipe_buffer.cnt) >= PIPE_BUF)) { @@ -1526,9 +1534,9 @@ pipe_free_kmem(struct pipe *cpipe) if (cpipe->pipe_buffer.buffer != NULL) { if (cpipe->pipe_buffer.size > PIPE_SIZE) - OSAddAtomic(-1, (SInt32 *)&nbigpipe); - OSAddAtomic(-(cpipe->pipe_buffer.size), (SInt32 *)&amountpipekva); - OSAddAtomic(-1, (SInt32 *)&amountpipes); + OSAddAtomic(-1, &nbigpipe); + OSAddAtomic(-(cpipe->pipe_buffer.size), &amountpipekva); + OSAddAtomic(-1, &amountpipes); kmem_free(kernel_map, (vm_offset_t)cpipe->pipe_buffer.buffer, cpipe->pipe_buffer.size); @@ -1569,6 +1577,7 @@ pipeclose(struct pipe *cpipe) * If the other side is blocked, wake it up saying that * we want to close it down. */ + cpipe->pipe_state &= ~PIPE_DRAIN; cpipe->pipe_state |= PIPE_EOF; pipeselwakeup(cpipe, cpipe); @@ -1592,6 +1601,7 @@ pipeclose(struct pipe *cpipe) */ if ((ppipe = cpipe->pipe_peer) != NULL) { + ppipe->pipe_state &= ~(PIPE_DRAIN); ppipe->pipe_state |= PIPE_EOF; pipeselwakeup(ppipe, ppipe); @@ -1627,6 +1637,7 @@ pipeclose(struct pipe *cpipe) pipe_free_kmem(cpipe); zfree(pipe_zone, cpipe); + } /*ARGSUSED*/ @@ -1725,8 +1736,8 @@ filt_piperead(struct knote *kn, long hint) if ((kn->kn_data == 0) && (rpipe->pipe_state & PIPE_DIRECTW)) kn->kn_data = rpipe->pipe_map.cnt; #endif - if ((rpipe->pipe_state & PIPE_EOF) || - (wpipe == NULL) || (wpipe->pipe_state & PIPE_EOF)) { + if ((rpipe->pipe_state & (PIPE_DRAIN | PIPE_EOF)) || + (wpipe == NULL) || (wpipe->pipe_state & (PIPE_DRAIN | PIPE_EOF))) { kn->kn_flags |= EV_EOF; retval = 1; } else { @@ -1758,7 +1769,7 @@ filt_pipewrite(struct knote *kn, long hint) wpipe = rpipe->pipe_peer; - if ((wpipe == NULL) || (wpipe->pipe_state & PIPE_EOF)) { + if ((wpipe == NULL) || (wpipe->pipe_state & (PIPE_DRAIN | PIPE_EOF))) { kn->kn_data = 0; kn->kn_flags |= EV_EOF; @@ -1863,3 +1874,36 @@ fill_pipeinfo(struct pipe * cpipe, struct pipe_info * pinfo) return (0); } + + +static int +pipe_drain(struct fileproc *fp, __unused vfs_context_t ctx) +{ + + /* Note: fdlock already held */ + struct pipe *ppipe, *cpipe = (struct pipe *)(fp->f_fglob->fg_data); + + if (cpipe) { + PIPE_LOCK(cpipe); + cpipe->pipe_state |= PIPE_DRAIN; + cpipe->pipe_state &= ~(PIPE_WANTR | PIPE_WANTW); + wakeup(cpipe); + + /* Must wake up peer: a writer sleeps on the read side */ + if ((ppipe = cpipe->pipe_peer)) { + ppipe->pipe_state |= PIPE_DRAIN; + ppipe->pipe_state &= ~(PIPE_WANTR | PIPE_WANTW); + wakeup(ppipe); + } + + PIPE_UNLOCK(cpipe); + return 0; + } + + return 1; +} + + + + + diff --git a/bsd/kern/sys_socket.c b/bsd/kern/sys_socket.c index a508b87ac..471cac76a 100644 --- a/bsd/kern/sys_socket.c +++ b/bsd/kern/sys_socket.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2007 Apple Inc. All rights reserved. + * Copyright (c) 2000-2008 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -178,7 +178,7 @@ soo_write(struct fileproc *fp, struct uio *uio, __unused int flags, /* Generation of SIGPIPE can be controlled per socket */ procp = vfs_context_proc(ctx); - if (stat == EPIPE && procp && !(so->so_flags & SOF_NOSIGPIPE)) + if (stat == EPIPE && !(so->so_flags & SOF_NOSIGPIPE)) psignal(procp, SIGPIPE); return (stat); @@ -187,15 +187,10 @@ soo_write(struct fileproc *fp, struct uio *uio, __unused int flags, __private_extern__ int soioctl(struct socket *so, u_long cmd, caddr_t data, struct proc *p) { - struct sockopt sopt; int error = 0; int dropsockref = -1; - - socket_lock(so, 1); - sopt.sopt_level = cmd; - sopt.sopt_name = (int)data; - sopt.sopt_p = p; + socket_lock(so, 1); /* Call the socket filter's ioctl handler for most ioctls */ if (IOCGROUP(cmd) != 'i' && IOCGROUP(cmd) != 'r') { @@ -282,7 +277,7 @@ soioctl(struct socket *so, u_long cmd, caddr_t data, struct proc *p) /* Always set socket non-blocking for OT */ so->so_state |= SS_NBIO; so->so_options |= SO_DONTTRUNC | SO_WANTMORE; - so->so_flags |= SOF_NOSIGPIPE; + so->so_flags |= SOF_NOSIGPIPE | SOF_NPX_SETOPTSHUT; if (cloned_so && so != cloned_so) { /* Flags options */ diff --git a/bsd/kern/syscalls.c b/bsd/kern/syscalls.c deleted file mode 100644 index 82ded92dd..000000000 --- a/bsd/kern/syscalls.c +++ /dev/null @@ -1,599 +0,0 @@ -/* - * Copyright (c) 2004-2007 Apple Inc. All rights reserved. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ - * - * This file contains Original Code and/or Modifications of Original Code - * as defined in and that are subject to the Apple Public Source License - * Version 2.0 (the 'License'). You may not use this file except in - * compliance with the License. The rights granted to you under the License - * may not be used to create, or enable the creation or redistribution of, - * unlawful or unlicensed copies of an Apple operating system, or to - * circumvent, violate, or enable the circumvention or violation of, any - * terms of an Apple operating system software license agreement. - * - * Please obtain a copy of the License at - * http://www.opensource.apple.com/apsl/ and read it before using this file. - * - * The Original Code and all software distributed under the License are - * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER - * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, - * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. - * Please see the License for the specific language governing rights and - * limitations under the License. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ - * - * - * System call switch table. - * - * DO NOT EDIT-- this file is automatically generated. - * created from syscalls.master - */ - -const char *syscallnames[] = { - "syscall", /* 0 = syscall indirect syscall */ - "exit", /* 1 = exit */ - "fork", /* 2 = fork */ - "read", /* 3 = read */ - "write", /* 4 = write */ - "open", /* 5 = open */ - "close", /* 6 = close */ - "wait4", /* 7 = wait4 */ - "#8", /* 8 = old creat */ - "link", /* 9 = link */ - "unlink", /* 10 = unlink */ - "#11", /* 11 = old execv */ - "chdir", /* 12 = chdir */ - "fchdir", /* 13 = fchdir */ - "mknod", /* 14 = mknod */ - "chmod", /* 15 = chmod */ - "chown", /* 16 = chown */ - "obreak", /* 17 = obreak old break */ -#if COMPAT_GETFSSTAT - "ogetfsstat", /* 18 = ogetfsstat */ -#else - "getfsstat", /* 18 = getfsstat */ -#endif - "#19", /* 19 = old lseek */ - "getpid", /* 20 = getpid */ - "#21", /* 21 = old mount */ - "#22", /* 22 = old umount */ - "setuid", /* 23 = setuid */ - "getuid", /* 24 = getuid */ - "geteuid", /* 25 = geteuid */ - "ptrace", /* 26 = ptrace */ -#if SOCKETS - "recvmsg", /* 27 = recvmsg */ - "sendmsg", /* 28 = sendmsg */ - "recvfrom", /* 29 = recvfrom */ - "accept", /* 30 = accept */ - "getpeername", /* 31 = getpeername */ - "getsockname", /* 32 = getsockname */ -#else - "#27", /* 27 = */ - "#28", /* 28 = */ - "#29", /* 29 = */ - "#30", /* 30 = */ - "#31", /* 31 = */ - "#32", /* 32 = */ -#endif /* SOCKETS */ - "access", /* 33 = access */ - "chflags", /* 34 = chflags */ - "fchflags", /* 35 = fchflags */ - "sync", /* 36 = sync */ - "kill", /* 37 = kill */ - "#38", /* 38 = old stat */ - "getppid", /* 39 = getppid */ - "#40", /* 40 = old lstat */ - "dup", /* 41 = dup */ - "pipe", /* 42 = pipe */ - "getegid", /* 43 = getegid */ - "profil", /* 44 = profil */ - "#45", /* 45 = old ktrace */ - "sigaction", /* 46 = sigaction */ - "getgid", /* 47 = getgid */ - "sigprocmask", /* 48 = sigprocmask */ - "getlogin", /* 49 = getlogin */ - "setlogin", /* 50 = setlogin */ - "acct", /* 51 = acct */ - "sigpending", /* 52 = sigpending */ - "sigaltstack", /* 53 = sigaltstack */ - "ioctl", /* 54 = ioctl */ - "reboot", /* 55 = reboot */ - "revoke", /* 56 = revoke */ - "symlink", /* 57 = symlink */ - "readlink", /* 58 = readlink */ - "execve", /* 59 = execve */ - "umask", /* 60 = umask */ - "chroot", /* 61 = chroot */ - "#62", /* 62 = old fstat */ - "#63", /* 63 = used internally , reserved */ - "#64", /* 64 = old getpagesize */ - "msync", /* 65 = msync */ - "vfork", /* 66 = vfork */ - "#67", /* 67 = old vread */ - "#68", /* 68 = old vwrite */ - "sbrk", /* 69 = sbrk */ - "sstk", /* 70 = sstk */ - "#71", /* 71 = old mmap */ - "ovadvise", /* 72 = ovadvise old vadvise */ - "munmap", /* 73 = munmap */ - "mprotect", /* 74 = mprotect */ - "madvise", /* 75 = madvise */ - "#76", /* 76 = old vhangup */ - "#77", /* 77 = old vlimit */ - "mincore", /* 78 = mincore */ - "getgroups", /* 79 = getgroups */ - "setgroups", /* 80 = setgroups */ - "getpgrp", /* 81 = getpgrp */ - "setpgid", /* 82 = setpgid */ - "setitimer", /* 83 = setitimer */ - "#84", /* 84 = old wait */ - "swapon", /* 85 = swapon */ - "getitimer", /* 86 = getitimer */ - "#87", /* 87 = old gethostname */ - "#88", /* 88 = old sethostname */ - "getdtablesize", /* 89 = getdtablesize */ - "dup2", /* 90 = dup2 */ - "#91", /* 91 = old getdopt */ - "fcntl", /* 92 = fcntl */ - "select", /* 93 = select */ - "#94", /* 94 = old setdopt */ - "fsync", /* 95 = fsync */ - "setpriority", /* 96 = setpriority */ -#if SOCKETS - "socket", /* 97 = socket */ - "connect", /* 98 = connect */ -#else - "#97", /* 97 = */ - "#98", /* 98 = */ -#endif /* SOCKETS */ - "#99", /* 99 = old accept */ - "getpriority", /* 100 = getpriority */ - "#101", /* 101 = old send */ - "#102", /* 102 = old recv */ - "#103", /* 103 = old sigreturn */ -#if SOCKETS - "bind", /* 104 = bind */ - "setsockopt", /* 105 = setsockopt */ - "listen", /* 106 = listen */ -#else - "#104", /* 104 = */ - "#105", /* 105 = */ - "#106", /* 106 = */ -#endif /* SOCKETS */ - "#107", /* 107 = old vtimes */ - "#108", /* 108 = old sigvec */ - "#109", /* 109 = old sigblock */ - "#110", /* 110 = old sigsetmask */ - "sigsuspend", /* 111 = sigsuspend */ - "#112", /* 112 = old sigstack */ -#if SOCKETS - "#113", /* 113 = old recvmsg */ - "#114", /* 114 = old sendmsg */ -#else - "#113", /* 113 = */ - "#114", /* 114 = */ -#endif /* SOCKETS */ - "#115", /* 115 = old vtrace */ - "gettimeofday", /* 116 = gettimeofday */ - "getrusage", /* 117 = getrusage */ -#if SOCKETS - "getsockopt", /* 118 = getsockopt */ -#else - "#118", /* 118 = */ -#endif /* SOCKETS */ - "#119", /* 119 = old resuba */ - "readv", /* 120 = readv */ - "writev", /* 121 = writev */ - "settimeofday", /* 122 = settimeofday */ - "fchown", /* 123 = fchown */ - "fchmod", /* 124 = fchmod */ - "#125", /* 125 = old recvfrom */ - "setreuid", /* 126 = setreuid */ - "setregid", /* 127 = setregid */ - "rename", /* 128 = rename */ - "#129", /* 129 = old truncate */ - "#130", /* 130 = old ftruncate */ - "flock", /* 131 = flock */ - "mkfifo", /* 132 = mkfifo */ -#if SOCKETS - "sendto", /* 133 = sendto */ - "shutdown", /* 134 = shutdown */ - "socketpair", /* 135 = socketpair */ -#else - "#133", /* 133 = */ - "#134", /* 134 = */ - "#135", /* 135 = */ -#endif /* SOCKETS */ - "mkdir", /* 136 = mkdir */ - "rmdir", /* 137 = rmdir */ - "utimes", /* 138 = utimes */ - "futimes", /* 139 = futimes */ - "adjtime", /* 140 = adjtime */ - "#141", /* 141 = old getpeername */ - "gethostuuid", /* 142 = gethostuuid */ - "#143", /* 143 = old sethostid */ - "#144", /* 144 = old getrlimit */ - "#145", /* 145 = old setrlimit */ - "#146", /* 146 = old killpg */ - "setsid", /* 147 = setsid */ - "#148", /* 148 = old setquota */ - "#149", /* 149 = old qquota */ - "#150", /* 150 = old getsockname */ - "getpgid", /* 151 = getpgid */ - "setprivexec", /* 152 = setprivexec */ - "pread", /* 153 = pread */ - "pwrite", /* 154 = pwrite */ -#if NFSSERVER - "nfssvc", /* 155 = nfssvc */ -#else - "#155", /* 155 = */ -#endif - "#156", /* 156 = old getdirentries */ - "statfs", /* 157 = statfs */ - "fstatfs", /* 158 = fstatfs */ - "unmount", /* 159 = unmount */ - "#160", /* 160 = old async_daemon */ -#if NFSSERVER - "getfh", /* 161 = getfh */ -#else - "#161", /* 161 = */ -#endif - "#162", /* 162 = old getdomainname */ - "#163", /* 163 = old setdomainname */ - "#164", /* 164 = */ - "quotactl", /* 165 = quotactl */ - "#166", /* 166 = old exportfs */ - "mount", /* 167 = mount */ - "#168", /* 168 = old ustat */ - "csops", /* 169 = csops */ - "table", /* 170 = table old table */ - "#171", /* 171 = old wait3 */ - "#172", /* 172 = old rpause */ - "waitid", /* 173 = waitid */ - "#174", /* 174 = old getdents */ - "#175", /* 175 = old gc_control */ - "add_profil", /* 176 = add_profil */ - "#177", /* 177 = */ - "#178", /* 178 = */ - "#179", /* 179 = */ - "kdebug_trace", /* 180 = kdebug_trace */ - "setgid", /* 181 = setgid */ - "setegid", /* 182 = setegid */ - "seteuid", /* 183 = seteuid */ - "sigreturn", /* 184 = sigreturn */ - "chud", /* 185 = chud */ - "#186", /* 186 = */ - "#187", /* 187 = */ - "stat", /* 188 = stat */ - "fstat", /* 189 = fstat */ - "lstat", /* 190 = lstat */ - "pathconf", /* 191 = pathconf */ - "fpathconf", /* 192 = fpathconf */ - "#193", /* 193 = */ - "getrlimit", /* 194 = getrlimit */ - "setrlimit", /* 195 = setrlimit */ - "getdirentries", /* 196 = getdirentries */ - "mmap", /* 197 = mmap */ - "#198", /* 198 = __syscall */ - "lseek", /* 199 = lseek */ - "truncate", /* 200 = truncate */ - "ftruncate", /* 201 = ftruncate */ - "__sysctl", /* 202 = __sysctl */ - "mlock", /* 203 = mlock */ - "munlock", /* 204 = munlock */ - "undelete", /* 205 = undelete */ -#if NETAT - "ATsocket", /* 206 = ATsocket */ - "ATgetmsg", /* 207 = ATgetmsg */ - "ATputmsg", /* 208 = ATputmsg */ - "ATPsndreq", /* 209 = ATPsndreq */ - "ATPsndrsp", /* 210 = ATPsndrsp */ - "ATPgetreq", /* 211 = ATPgetreq */ - "ATPgetrsp", /* 212 = ATPgetrsp */ - "#213", /* 213 = Reserved for AppleTalk */ -#else - "#206", /* 206 = */ - "#207", /* 207 = */ - "#208", /* 208 = */ - "#209", /* 209 = */ - "#210", /* 210 = */ - "#211", /* 211 = */ - "#212", /* 212 = */ - "#213", /* 213 = Reserved for AppleTalk */ -#endif /* NETAT */ - "kqueue_from_portset_np", /* 214 = kqueue_from_portset_np */ - "kqueue_portset_np", /* 215 = kqueue_portset_np */ - "mkcomplex", /* 216 = mkcomplex soon to be obsolete */ - "statv", /* 217 = statv soon to be obsolete */ - "lstatv", /* 218 = lstatv soon to be obsolete */ - "fstatv", /* 219 = fstatv soon to be obsolete */ - "getattrlist", /* 220 = getattrlist */ - "setattrlist", /* 221 = setattrlist */ - "getdirentriesattr", /* 222 = getdirentriesattr */ - "exchangedata", /* 223 = exchangedata */ - "#224", /* 224 = was checkuseraccess */ - "searchfs", /* 225 = searchfs */ - "delete", /* 226 = delete private delete ( Carbon semantics ) */ - "copyfile", /* 227 = copyfile */ - "#228", /* 228 = */ - "#229", /* 229 = */ - "poll", /* 230 = poll */ - "watchevent", /* 231 = watchevent */ - "waitevent", /* 232 = waitevent */ - "modwatch", /* 233 = modwatch */ - "getxattr", /* 234 = getxattr */ - "fgetxattr", /* 235 = fgetxattr */ - "setxattr", /* 236 = setxattr */ - "fsetxattr", /* 237 = fsetxattr */ - "removexattr", /* 238 = removexattr */ - "fremovexattr", /* 239 = fremovexattr */ - "listxattr", /* 240 = listxattr */ - "flistxattr", /* 241 = flistxattr */ - "fsctl", /* 242 = fsctl */ - "initgroups", /* 243 = initgroups */ - "posix_spawn", /* 244 = posix_spawn */ - "#245", /* 245 = */ - "#246", /* 246 = */ -#if NFSCLIENT - "nfsclnt", /* 247 = nfsclnt */ -#else - "#247", /* 247 = */ -#endif -#if NFSSERVER - "fhopen", /* 248 = fhopen */ -#else - "#248", /* 248 = */ -#endif - "#249", /* 249 = */ - "minherit", /* 250 = minherit */ -#if SYSV_SEM - "semsys", /* 251 = semsys */ -#else - "#251", /* 251 = */ -#endif -#if SYSV_MSG - "msgsys", /* 252 = msgsys */ -#else - "#252", /* 252 = */ -#endif -#if SYSV_SHM - "shmsys", /* 253 = shmsys */ -#else - "#253", /* 253 = */ -#endif -#if SYSV_SEM - "semctl", /* 254 = semctl */ - "semget", /* 255 = semget */ - "semop", /* 256 = semop */ - "#257", /* 257 = */ -#else - "#254", /* 254 = */ - "#255", /* 255 = */ - "#256", /* 256 = */ - "#257", /* 257 = */ -#endif -#if SYSV_MSG - "msgctl", /* 258 = msgctl */ - "msgget", /* 259 = msgget */ - "msgsnd", /* 260 = msgsnd */ - "msgrcv", /* 261 = msgrcv */ -#else - "#258", /* 258 = */ - "#259", /* 259 = */ - "#260", /* 260 = */ - "#261", /* 261 = */ -#endif -#if SYSV_SHM - "shmat", /* 262 = shmat */ - "shmctl", /* 263 = shmctl */ - "shmdt", /* 264 = shmdt */ - "shmget", /* 265 = shmget */ -#else - "#262", /* 262 = */ - "#263", /* 263 = */ - "#264", /* 264 = */ - "#265", /* 265 = */ -#endif - "shm_open", /* 266 = shm_open */ - "shm_unlink", /* 267 = shm_unlink */ - "sem_open", /* 268 = sem_open */ - "sem_close", /* 269 = sem_close */ - "sem_unlink", /* 270 = sem_unlink */ - "sem_wait", /* 271 = sem_wait */ - "sem_trywait", /* 272 = sem_trywait */ - "sem_post", /* 273 = sem_post */ - "sem_getvalue", /* 274 = sem_getvalue */ - "sem_init", /* 275 = sem_init */ - "sem_destroy", /* 276 = sem_destroy */ - "open_extended", /* 277 = open_extended */ - "umask_extended", /* 278 = umask_extended */ - "stat_extended", /* 279 = stat_extended */ - "lstat_extended", /* 280 = lstat_extended */ - "fstat_extended", /* 281 = fstat_extended */ - "chmod_extended", /* 282 = chmod_extended */ - "fchmod_extended", /* 283 = fchmod_extended */ - "access_extended", /* 284 = access_extended */ - "settid", /* 285 = settid */ - "gettid", /* 286 = gettid */ - "setsgroups", /* 287 = setsgroups */ - "getsgroups", /* 288 = getsgroups */ - "setwgroups", /* 289 = setwgroups */ - "getwgroups", /* 290 = getwgroups */ - "mkfifo_extended", /* 291 = mkfifo_extended */ - "mkdir_extended", /* 292 = mkdir_extended */ - "identitysvc", /* 293 = identitysvc */ - "shared_region_check_np", /* 294 = shared_region_check_np */ - "shared_region_map_np", /* 295 = shared_region_map_np */ - "#296", /* 296 = old load_shared_file */ - "#297", /* 297 = old reset_shared_file */ - "#298", /* 298 = old new_system_shared_regions */ - "#299", /* 299 = old shared_region_map_file_np */ - "#300", /* 300 = old shared_region_make_private_np */ - "__pthread_mutex_destroy", /* 301 = __pthread_mutex_destroy */ - "__pthread_mutex_init", /* 302 = __pthread_mutex_init */ - "__pthread_mutex_lock", /* 303 = __pthread_mutex_lock */ - "__pthread_mutex_trylock", /* 304 = __pthread_mutex_trylock */ - "__pthread_mutex_unlock", /* 305 = __pthread_mutex_unlock */ - "__pthread_cond_init", /* 306 = __pthread_cond_init */ - "__pthread_cond_destroy", /* 307 = __pthread_cond_destroy */ - "__pthread_cond_broadcast", /* 308 = __pthread_cond_broadcast */ - "__pthread_cond_signal", /* 309 = __pthread_cond_signal */ - "getsid", /* 310 = getsid */ - "settid_with_pid", /* 311 = settid_with_pid */ - "__pthread_cond_timedwait", /* 312 = __pthread_cond_timedwait */ - "aio_fsync", /* 313 = aio_fsync */ - "aio_return", /* 314 = aio_return */ - "aio_suspend", /* 315 = aio_suspend */ - "aio_cancel", /* 316 = aio_cancel */ - "aio_error", /* 317 = aio_error */ - "aio_read", /* 318 = aio_read */ - "aio_write", /* 319 = aio_write */ - "lio_listio", /* 320 = lio_listio */ - "__pthread_cond_wait", /* 321 = __pthread_cond_wait */ - "iopolicysys", /* 322 = iopolicysys */ - "#323", /* 323 = */ - "mlockall", /* 324 = mlockall */ - "munlockall", /* 325 = munlockall */ - "#326", /* 326 = */ - "issetugid", /* 327 = issetugid */ - "__pthread_kill", /* 328 = __pthread_kill */ - "__pthread_sigmask", /* 329 = __pthread_sigmask */ - "__sigwait", /* 330 = __sigwait */ - "__disable_threadsignal", /* 331 = __disable_threadsignal */ - "__pthread_markcancel", /* 332 = __pthread_markcancel */ - "__pthread_canceled", /* 333 = __pthread_canceled */ - "__semwait_signal", /* 334 = __semwait_signal */ - "#335", /* 335 = old utrace */ - "proc_info", /* 336 = proc_info */ -#if SENDFILE - "sendfile", /* 337 = sendfile */ -#else /* !SENDFILE */ - "#337", /* 337 = */ -#endif /* SENDFILE */ - "stat64", /* 338 = stat64 */ - "fstat64", /* 339 = fstat64 */ - "lstat64", /* 340 = lstat64 */ - "stat64_extended", /* 341 = stat64_extended */ - "lstat64_extended", /* 342 = lstat64_extended */ - "fstat64_extended", /* 343 = fstat64_extended */ - "getdirentries64", /* 344 = getdirentries64 */ - "statfs64", /* 345 = statfs64 */ - "fstatfs64", /* 346 = fstatfs64 */ - "getfsstat64", /* 347 = getfsstat64 */ - "__pthread_chdir", /* 348 = __pthread_chdir */ - "__pthread_fchdir", /* 349 = __pthread_fchdir */ -#if AUDIT - "audit", /* 350 = audit */ - "auditon", /* 351 = auditon */ - "#352", /* 352 = */ - "getauid", /* 353 = getauid */ - "setauid", /* 354 = setauid */ - "getaudit", /* 355 = getaudit */ - "setaudit", /* 356 = setaudit */ - "getaudit_addr", /* 357 = getaudit_addr */ - "setaudit_addr", /* 358 = setaudit_addr */ - "auditctl", /* 359 = auditctl */ -#else - "#350", /* 350 = */ - "#351", /* 351 = */ - "#352", /* 352 = */ - "#353", /* 353 = */ - "#354", /* 354 = */ - "#355", /* 355 = */ - "#356", /* 356 = */ - "#357", /* 357 = */ - "#358", /* 358 = */ - "#359", /* 359 = */ -#endif - "bsdthread_create", /* 360 = bsdthread_create */ - "bsdthread_terminate", /* 361 = bsdthread_terminate */ - "kqueue", /* 362 = kqueue */ - "kevent", /* 363 = kevent */ - "lchown", /* 364 = lchown */ - "stack_snapshot", /* 365 = stack_snapshot */ - "bsdthread_register", /* 366 = bsdthread_register */ - "workq_open", /* 367 = workq_open */ - "workq_ops", /* 368 = workq_ops */ - "#369", /* 369 = */ - "#370", /* 370 = */ - "#371", /* 371 = */ - "#372", /* 372 = */ - "#373", /* 373 = */ - "#374", /* 374 = */ - "#375", /* 375 = */ - "#376", /* 376 = */ - "#377", /* 377 = */ - "#378", /* 378 = */ - "#379", /* 379 = */ - "__mac_execve", /* 380 = __mac_execve */ - "__mac_syscall", /* 381 = __mac_syscall */ - "__mac_get_file", /* 382 = __mac_get_file */ - "__mac_set_file", /* 383 = __mac_set_file */ - "__mac_get_link", /* 384 = __mac_get_link */ - "__mac_set_link", /* 385 = __mac_set_link */ - "__mac_get_proc", /* 386 = __mac_get_proc */ - "__mac_set_proc", /* 387 = __mac_set_proc */ - "__mac_get_fd", /* 388 = __mac_get_fd */ - "__mac_set_fd", /* 389 = __mac_set_fd */ - "__mac_get_pid", /* 390 = __mac_get_pid */ - "__mac_get_lcid", /* 391 = __mac_get_lcid */ - "__mac_get_lctx", /* 392 = __mac_get_lctx */ - "__mac_set_lctx", /* 393 = __mac_set_lctx */ - "setlcid", /* 394 = setlcid */ - "getlcid", /* 395 = getlcid */ - "read_nocancel", /* 396 = read_nocancel */ - "write_nocancel", /* 397 = write_nocancel */ - "open_nocancel", /* 398 = open_nocancel */ - "close_nocancel", /* 399 = close_nocancel */ - "wait4_nocancel", /* 400 = wait4_nocancel */ -#if SOCKETS - "recvmsg_nocancel", /* 401 = recvmsg_nocancel */ - "sendmsg_nocancel", /* 402 = sendmsg_nocancel */ - "recvfrom_nocancel", /* 403 = recvfrom_nocancel */ - "accept_nocancel", /* 404 = accept_nocancel */ -#else - "#401", /* 401 = */ - "#402", /* 402 = */ - "#403", /* 403 = */ - "#404", /* 404 = */ -#endif /* SOCKETS */ - "msync_nocancel", /* 405 = msync_nocancel */ - "fcntl_nocancel", /* 406 = fcntl_nocancel */ - "select_nocancel", /* 407 = select_nocancel */ - "fsync_nocancel", /* 408 = fsync_nocancel */ -#if SOCKETS - "connect_nocancel", /* 409 = connect_nocancel */ -#else - "#409", /* 409 = */ -#endif /* SOCKETS */ - "sigsuspend_nocancel", /* 410 = sigsuspend_nocancel */ - "readv_nocancel", /* 411 = readv_nocancel */ - "writev_nocancel", /* 412 = writev_nocancel */ -#if SOCKETS - "sendto_nocancel", /* 413 = sendto_nocancel */ -#else - "#413", /* 413 = */ -#endif /* SOCKETS */ - "pread_nocancel", /* 414 = pread_nocancel */ - "pwrite_nocancel", /* 415 = pwrite_nocancel */ - "waitid_nocancel", /* 416 = waitid_nocancel */ - "poll_nocancel", /* 417 = poll_nocancel */ -#if SYSV_MSG - "msgsnd_nocancel", /* 418 = msgsnd_nocancel */ - "msgrcv_nocancel", /* 419 = msgrcv_nocancel */ -#else - "#418", /* 418 = */ - "#419", /* 419 = */ -#endif - "sem_wait_nocancel", /* 420 = sem_wait_nocancel */ - "aio_suspend_nocancel", /* 421 = aio_suspend_nocancel */ - "__sigwait_nocancel", /* 422 = __sigwait_nocancel */ - "__semwait_signal_nocancel", /* 423 = __semwait_signal_nocancel */ - "__mac_mount", /* 424 = __mac_mount */ - "__mac_get_mount", /* 425 = __mac_get_mount */ - "__mac_getfsstat", /* 426 = __mac_getfsstat */ -}; diff --git a/bsd/kern/syscalls.master b/bsd/kern/syscalls.master index cb5a66b78..d82fc7f83 100644 --- a/bsd/kern/syscalls.master +++ b/bsd/kern/syscalls.master @@ -6,9 +6,15 @@ ; .../xnu/bsd/kern/syscalls.c ; .../xnu/bsd/sys/syscall.h ; .../xnu/bsd/sys/sysproto.h +; .../xnu/bsd/security/audit_syscalls.c -; Columns -> | Number Files | { Name and Args } | { Comments } +; Columns -> | Number Audit Files | { Name and Args } | { Comments } ; Number: system call number, must be in order +; Audit: the audit event associated with the system call +; A value of AUE_NULL means no auditing, but it also means that +; there is no audit event for the call at this time. For the +; case where the event exists, but we don't want auditing, the +; event should be #defined to AUE_NULL in audit_kevents.h. ; Files: with files to generate - "ALL" or any combo of: ; "T" for syscall table (in init_sysent.c) ; "N" for syscall names (in syscalls.c) @@ -32,288 +38,282 @@ #include #include -0 ALL { int nosys(void); } { indirect syscall } -1 ALL { void exit(int rval); } -2 ALL { int fork(void); } -3 ALL { user_ssize_t read(int fd, user_addr_t cbuf, user_size_t nbyte); } -4 ALL { user_ssize_t write(int fd, user_addr_t cbuf, user_size_t nbyte); } -5 ALL { int open(user_addr_t path, int flags, int mode); } -6 ALL { int close(int fd); } -7 ALL { int wait4(int pid, user_addr_t status, int options, user_addr_t rusage); } -8 ALL { int nosys(void); } { old creat } -9 ALL { int link(user_addr_t path, user_addr_t link); } -10 ALL { int unlink(user_addr_t path); } -11 ALL { int nosys(void); } { old execv } -12 ALL { int chdir(user_addr_t path); } -13 ALL { int fchdir(int fd); } -14 ALL { int mknod(user_addr_t path, int mode, int dev); } -15 ALL { int chmod(user_addr_t path, int mode); } -16 ALL { int chown(user_addr_t path, int uid, int gid); } -17 UALL { int obreak(char *nsize) NO_SYSCALL_STUB; } { old break } - -#if COMPAT_GETFSSTAT -18 ALL { int ogetfsstat(user_addr_t buf, int bufsize, int flags); } -#else -18 ALL { int getfsstat(user_addr_t buf, int bufsize, int flags); } -#endif - -19 ALL { int nosys(void); } { old lseek } -20 ALL { int getpid(void); } -21 ALL { int nosys(void); } { old mount } -22 ALL { int nosys(void); } { old umount } -23 ALL { int setuid(uid_t uid); } -24 ALL { int getuid(void); } -25 ALL { int geteuid(void); } -26 ALL { int ptrace(int req, pid_t pid, caddr_t addr, int data); } +0 AUE_NULL ALL { int nosys(void); } { indirect syscall } +1 AUE_EXIT ALL { void exit(int rval); } +2 AUE_FORK ALL { int fork(void); } +3 AUE_NULL ALL { user_ssize_t read(int fd, user_addr_t cbuf, user_size_t nbyte); } +4 AUE_NULL ALL { user_ssize_t write(int fd, user_addr_t cbuf, user_size_t nbyte); } +5 AUE_OPEN_RWTC ALL { int open(user_addr_t path, int flags, int mode); } +6 AUE_CLOSE ALL { int close(int fd); } +7 AUE_WAIT4 ALL { int wait4(int pid, user_addr_t status, int options, user_addr_t rusage); } +8 AUE_NULL ALL { int nosys(void); } { old creat } +9 AUE_LINK ALL { int link(user_addr_t path, user_addr_t link); } +10 AUE_UNLINK ALL { int unlink(user_addr_t path); } +11 AUE_NULL ALL { int nosys(void); } { old execv } +12 AUE_CHDIR ALL { int chdir(user_addr_t path); } +13 AUE_FCHDIR ALL { int fchdir(int fd); } +14 AUE_MKNOD ALL { int mknod(user_addr_t path, int mode, int dev); } +15 AUE_CHMOD ALL { int chmod(user_addr_t path, int mode); } +16 AUE_CHOWN ALL { int chown(user_addr_t path, int uid, int gid); } +17 AUE_NULL ALL { int nosys(void); } { old break } +18 AUE_GETFSSTAT ALL { int getfsstat(user_addr_t buf, int bufsize, int flags); } +19 AUE_NULL ALL { int nosys(void); } { old lseek } +20 AUE_GETPID ALL { int getpid(void); } +21 AUE_NULL ALL { int nosys(void); } { old mount } +22 AUE_NULL ALL { int nosys(void); } { old umount } +23 AUE_SETUID ALL { int setuid(uid_t uid); } +24 AUE_GETUID ALL { int getuid(void); } +25 AUE_GETEUID ALL { int geteuid(void); } +26 AUE_PTRACE ALL { int ptrace(int req, pid_t pid, caddr_t addr, int data); } #if SOCKETS -27 ALL { int recvmsg(int s, struct msghdr *msg, int flags); } -28 ALL { int sendmsg(int s, caddr_t msg, int flags); } -29 ALL { int recvfrom(int s, void *buf, size_t len, int flags, struct sockaddr *from, int *fromlenaddr); } -30 ALL { int accept(int s, caddr_t name, socklen_t *anamelen); } -31 ALL { int getpeername(int fdes, caddr_t asa, socklen_t *alen); } -32 ALL { int getsockname(int fdes, caddr_t asa, socklen_t *alen); } +27 AUE_RECVMSG ALL { int recvmsg(int s, struct msghdr *msg, int flags); } +28 AUE_SENDMSG ALL { int sendmsg(int s, caddr_t msg, int flags); } +29 AUE_RECVFROM ALL { int recvfrom(int s, void *buf, size_t len, int flags, struct sockaddr *from, int *fromlenaddr); } +30 AUE_ACCEPT ALL { int accept(int s, caddr_t name, socklen_t *anamelen); } +31 AUE_GETPEERNAME ALL { int getpeername(int fdes, caddr_t asa, socklen_t *alen); } +32 AUE_GETSOCKNAME ALL { int getsockname(int fdes, caddr_t asa, socklen_t *alen); } #else -27 ALL { int nosys(void); } -28 ALL { int nosys(void); } -29 ALL { int nosys(void); } -30 ALL { int nosys(void); } -31 ALL { int nosys(void); } -32 ALL { int nosys(void); } +27 AUE_NULL ALL { int nosys(void); } +28 AUE_NULL ALL { int nosys(void); } +29 AUE_NULL ALL { int nosys(void); } +30 AUE_NULL ALL { int nosys(void); } +31 AUE_NULL ALL { int nosys(void); } +32 AUE_NULL ALL { int nosys(void); } #endif /* SOCKETS */ -33 ALL { int access(user_addr_t path, int flags); } -34 ALL { int chflags(char *path, int flags); } -35 ALL { int fchflags(int fd, int flags); } -36 ALL { int sync(void); } -37 ALL { int kill(int pid, int signum, int posix); } -38 ALL { int nosys(void); } { old stat } -39 ALL { int getppid(void); } -40 ALL { int nosys(void); } { old lstat } -41 ALL { int dup(u_int fd); } -42 ALL { int pipe(void); } -43 ALL { int getegid(void); } -44 ALL { int profil(short *bufbase, size_t bufsize, u_long pcoffset, u_int pcscale); } -45 ALL { int nosys(void); } { old ktrace } -46 ALL { int sigaction(int signum, struct __sigaction *nsa, struct sigaction *osa); } -47 ALL { int getgid(void); } -48 ALL { int sigprocmask(int how, user_addr_t mask, user_addr_t omask); } -49 ALL { int getlogin(char *namebuf, u_int namelen); } -50 ALL { int setlogin(char *namebuf); } -51 ALL { int acct(char *path); } -52 ALL { int sigpending(struct sigvec *osv); } -53 ALL { int sigaltstack(struct sigaltstack *nss, struct sigaltstack *oss); } -54 ALL { int ioctl(int fd, u_long com, caddr_t data); } -55 ALL { int reboot(int opt, char *command); } -56 ALL { int revoke(char *path); } -57 ALL { int symlink(char *path, char *link); } -58 ALL { int readlink(char *path, char *buf, int count); } -59 ALL { int execve(char *fname, char **argp, char **envp); } -60 ALL { int umask(int newmask); } -61 ALL { int chroot(user_addr_t path); } -62 ALL { int nosys(void); } { old fstat } -63 ALL { int nosys(void); } { used internally, reserved } -64 ALL { int nosys(void); } { old getpagesize } -65 ALL { int msync(caddr_t addr, size_t len, int flags); } -66 ALL { int vfork(void); } -67 ALL { int nosys(void); } { old vread } -68 ALL { int nosys(void); } { old vwrite } -69 ALL { int sbrk(int incr) NO_SYSCALL_STUB; } -70 ALL { int sstk(int incr) NO_SYSCALL_STUB; } -71 ALL { int nosys(void); } { old mmap } -72 ALL { int ovadvise(void) NO_SYSCALL_STUB; } { old vadvise } -73 ALL { int munmap(caddr_t addr, size_t len); } -74 ALL { int mprotect(caddr_t addr, size_t len, int prot); } -75 ALL { int madvise(caddr_t addr, size_t len, int behav); } -76 ALL { int nosys(void); } { old vhangup } -77 ALL { int nosys(void); } { old vlimit } -78 ALL { int mincore(user_addr_t addr, user_size_t len, user_addr_t vec); } -79 ALL { int getgroups(u_int gidsetsize, gid_t *gidset); } -80 ALL { int setgroups(u_int gidsetsize, gid_t *gidset); } -81 ALL { int getpgrp(void); } -82 ALL { int setpgid(int pid, int pgid); } -83 ALL { int setitimer(u_int which, struct itimerval *itv, struct itimerval *oitv); } -84 ALL { int nosys(void); } { old wait } -85 ALL { int swapon(void); } -86 ALL { int getitimer(u_int which, struct itimerval *itv); } -87 ALL { int nosys(void); } { old gethostname } -88 ALL { int nosys(void); } { old sethostname } -89 ALL { int getdtablesize(void); } -90 ALL { int dup2(u_int from, u_int to); } -91 ALL { int nosys(void); } { old getdopt } -92 ALL { int fcntl(int fd, int cmd, long arg); } -93 ALL { int select(int nd, u_int32_t *in, u_int32_t *ou, u_int32_t *ex, struct timeval *tv); } -94 ALL { int nosys(void); } { old setdopt } -95 ALL { int fsync(int fd); } -96 ALL { int setpriority(int which, id_t who, int prio); } +33 AUE_ACCESS ALL { int access(user_addr_t path, int flags); } +34 AUE_CHFLAGS ALL { int chflags(char *path, int flags); } +35 AUE_FCHFLAGS ALL { int fchflags(int fd, int flags); } +36 AUE_SYNC ALL { int sync(void); } +37 AUE_KILL ALL { int kill(int pid, int signum, int posix); } +38 AUE_NULL ALL { int nosys(void); } { old stat } +39 AUE_GETPPID ALL { int getppid(void); } +40 AUE_NULL ALL { int nosys(void); } { old lstat } +41 AUE_DUP ALL { int dup(u_int fd); } +42 AUE_PIPE ALL { int pipe(void); } +43 AUE_GETEGID ALL { int getegid(void); } +44 AUE_PROFILE ALL { int profil(short *bufbase, size_t bufsize, u_long pcoffset, u_int pcscale); } +45 AUE_NULL ALL { int nosys(void); } { old ktrace } +46 AUE_SIGACTION ALL { int sigaction(int signum, struct __sigaction *nsa, struct sigaction *osa); } +47 AUE_GETGID ALL { int getgid(void); } +48 AUE_SIGPROCMASK ALL { int sigprocmask(int how, user_addr_t mask, user_addr_t omask); } +49 AUE_GETLOGIN ALL { int getlogin(char *namebuf, u_int namelen); } +50 AUE_SETLOGIN ALL { int setlogin(char *namebuf); } +51 AUE_ACCT ALL { int acct(char *path); } +52 AUE_SIGPENDING ALL { int sigpending(struct sigvec *osv); } +53 AUE_SIGALTSTACK ALL { int sigaltstack(struct sigaltstack *nss, struct sigaltstack *oss); } +54 AUE_IOCTL ALL { int ioctl(int fd, u_long com, caddr_t data); } +55 AUE_REBOOT ALL { int reboot(int opt, char *command); } +56 AUE_REVOKE ALL { int revoke(char *path); } +57 AUE_SYMLINK ALL { int symlink(char *path, char *link); } +58 AUE_READLINK ALL { int readlink(char *path, char *buf, int count); } +59 AUE_EXECVE ALL { int execve(char *fname, char **argp, char **envp); } +60 AUE_UMASK ALL { int umask(int newmask); } +61 AUE_CHROOT ALL { int chroot(user_addr_t path); } +62 AUE_NULL ALL { int nosys(void); } { old fstat } +63 AUE_NULL ALL { int nosys(void); } { used internally, reserved } +64 AUE_NULL ALL { int nosys(void); } { old getpagesize } +65 AUE_MSYNC ALL { int msync(caddr_t addr, size_t len, int flags); } +66 AUE_VFORK ALL { int vfork(void); } +67 AUE_NULL ALL { int nosys(void); } { old vread } +68 AUE_NULL ALL { int nosys(void); } { old vwrite } +69 AUE_NULL ALL { int nosys(void); } { old sbrk } +70 AUE_NULL ALL { int nosys(void); } { old sstk } +71 AUE_NULL ALL { int nosys(void); } { old mmap } +72 AUE_NULL ALL { int nosys(void); } { old vadvise } +73 AUE_MUNMAP ALL { int munmap(caddr_t addr, size_t len); } +74 AUE_MPROTECT ALL { int mprotect(caddr_t addr, size_t len, int prot); } +75 AUE_MADVISE ALL { int madvise(caddr_t addr, size_t len, int behav); } +76 AUE_NULL ALL { int nosys(void); } { old vhangup } +77 AUE_NULL ALL { int nosys(void); } { old vlimit } +78 AUE_MINCORE ALL { int mincore(user_addr_t addr, user_size_t len, user_addr_t vec); } +79 AUE_GETGROUPS ALL { int getgroups(u_int gidsetsize, gid_t *gidset); } +80 AUE_SETGROUPS ALL { int setgroups(u_int gidsetsize, gid_t *gidset); } +81 AUE_GETPGRP ALL { int getpgrp(void); } +82 AUE_SETPGRP ALL { int setpgid(int pid, int pgid); } +83 AUE_SETITIMER ALL { int setitimer(u_int which, struct itimerval *itv, struct itimerval *oitv); } +84 AUE_NULL ALL { int nosys(void); } { old wait } +85 AUE_SWAPON ALL { int swapon(void); } +86 AUE_GETITIMER ALL { int getitimer(u_int which, struct itimerval *itv); } +87 AUE_NULL ALL { int nosys(void); } { old gethostname } +88 AUE_NULL ALL { int nosys(void); } { old sethostname } +89 AUE_GETDTABLESIZE ALL { int getdtablesize(void); } +90 AUE_DUP2 ALL { int dup2(u_int from, u_int to); } +91 AUE_NULL ALL { int nosys(void); } { old getdopt } +92 AUE_FCNTL ALL { int fcntl(int fd, int cmd, long arg); } +93 AUE_SELECT ALL { int select(int nd, u_int32_t *in, u_int32_t *ou, u_int32_t *ex, struct timeval *tv); } +94 AUE_NULL ALL { int nosys(void); } { old setdopt } +95 AUE_FSYNC ALL { int fsync(int fd); } +96 AUE_SETPRIORITY ALL { int setpriority(int which, id_t who, int prio); } #if SOCKETS -97 ALL { int socket(int domain, int type, int protocol); } -98 ALL { int connect(int s, caddr_t name, socklen_t namelen); } +97 AUE_SOCKET ALL { int socket(int domain, int type, int protocol); } +98 AUE_CONNECT ALL { int connect(int s, caddr_t name, socklen_t namelen); } #else -97 ALL { int nosys(void); } -98 ALL { int nosys(void); } +97 AUE_NULL ALL { int nosys(void); } +98 AUE_NULL ALL { int nosys(void); } #endif /* SOCKETS */ -99 ALL { int nosys(void); } { old accept } -100 ALL { int getpriority(int which, id_t who); } -101 ALL { int nosys(void); } { old send } -102 ALL { int nosys(void); } { old recv } -103 ALL { int nosys(void); } { old sigreturn } +99 AUE_NULL ALL { int nosys(void); } { old accept } +100 AUE_GETPRIORITY ALL { int getpriority(int which, id_t who); } +101 AUE_NULL ALL { int nosys(void); } { old send } +102 AUE_NULL ALL { int nosys(void); } { old recv } +103 AUE_NULL ALL { int nosys(void); } { old sigreturn } #if SOCKETS -104 ALL { int bind(int s, caddr_t name, socklen_t namelen); } -105 ALL { int setsockopt(int s, int level, int name, caddr_t val, socklen_t valsize); } -106 ALL { int listen(int s, int backlog); } +104 AUE_BIND ALL { int bind(int s, caddr_t name, socklen_t namelen); } +105 AUE_SETSOCKOPT ALL { int setsockopt(int s, int level, int name, caddr_t val, socklen_t valsize); } +106 AUE_LISTEN ALL { int listen(int s, int backlog); } #else -104 ALL { int nosys(void); } -105 ALL { int nosys(void); } -106 ALL { int nosys(void); } +104 AUE_NULL ALL { int nosys(void); } +105 AUE_NULL ALL { int nosys(void); } +106 AUE_NULL ALL { int nosys(void); } #endif /* SOCKETS */ -107 ALL { int nosys(void); } { old vtimes } -108 ALL { int nosys(void); } { old sigvec } -109 ALL { int nosys(void); } { old sigblock } -110 ALL { int nosys(void); } { old sigsetmask } -111 ALL { int sigsuspend(sigset_t mask); } -112 ALL { int nosys(void); } { old sigstack } +107 AUE_NULL ALL { int nosys(void); } { old vtimes } +108 AUE_NULL ALL { int nosys(void); } { old sigvec } +109 AUE_NULL ALL { int nosys(void); } { old sigblock } +110 AUE_NULL ALL { int nosys(void); } { old sigsetmask } +111 AUE_NULL ALL { int sigsuspend(sigset_t mask); } +112 AUE_NULL ALL { int nosys(void); } { old sigstack } #if SOCKETS -113 ALL { int nosys(void); } { old recvmsg } -114 ALL { int nosys(void); } { old sendmsg } +113 AUE_NULL ALL { int nosys(void); } { old recvmsg } +114 AUE_NULL ALL { int nosys(void); } { old sendmsg } #else -113 ALL { int nosys(void); } -114 ALL { int nosys(void); } +113 AUE_NULL ALL { int nosys(void); } +114 AUE_NULL ALL { int nosys(void); } #endif /* SOCKETS */ -115 ALL { int nosys(void); } { old vtrace } -116 ALL { int gettimeofday(struct timeval *tp, struct timezone *tzp); } -117 ALL { int getrusage(int who, struct rusage *rusage); } +115 AUE_NULL ALL { int nosys(void); } { old vtrace } +116 AUE_GETTIMEOFDAY ALL { int gettimeofday(struct timeval *tp, struct timezone *tzp); } +117 AUE_GETRUSAGE ALL { int getrusage(int who, struct rusage *rusage); } #if SOCKETS -118 ALL { int getsockopt(int s, int level, int name, caddr_t val, socklen_t *avalsize); } +118 AUE_GETSOCKOPT ALL { int getsockopt(int s, int level, int name, caddr_t val, socklen_t *avalsize); } #else -118 ALL { int nosys(void); } +118 AUE_NULL ALL { int nosys(void); } #endif /* SOCKETS */ -119 ALL { int nosys(void); } { old resuba } -120 ALL { user_ssize_t readv(int fd, struct iovec *iovp, u_int iovcnt); } -121 ALL { user_ssize_t writev(int fd, struct iovec *iovp, u_int iovcnt); } -122 ALL { int settimeofday(struct timeval *tv, struct timezone *tzp); } -123 ALL { int fchown(int fd, int uid, int gid); } -124 ALL { int fchmod(int fd, int mode); } -125 ALL { int nosys(void); } { old recvfrom } -126 ALL { int setreuid(uid_t ruid, uid_t euid); } -127 ALL { int setregid(gid_t rgid, gid_t egid); } -128 ALL { int rename(char *from, char *to); } -129 ALL { int nosys(void); } { old truncate } -130 ALL { int nosys(void); } { old ftruncate } -131 ALL { int flock(int fd, int how); } -132 ALL { int mkfifo(user_addr_t path, int mode); } +119 AUE_NULL ALL { int nosys(void); } { old resuba } +120 AUE_READV ALL { user_ssize_t readv(int fd, struct iovec *iovp, u_int iovcnt); } +121 AUE_WRITEV ALL { user_ssize_t writev(int fd, struct iovec *iovp, u_int iovcnt); } +122 AUE_SETTIMEOFDAY ALL { int settimeofday(struct timeval *tv, struct timezone *tzp); } +123 AUE_FCHOWN ALL { int fchown(int fd, int uid, int gid); } +124 AUE_FCHMOD ALL { int fchmod(int fd, int mode); } +125 AUE_NULL ALL { int nosys(void); } { old recvfrom } +126 AUE_SETREUID ALL { int setreuid(uid_t ruid, uid_t euid); } +127 AUE_SETREGID ALL { int setregid(gid_t rgid, gid_t egid); } +128 AUE_RENAME ALL { int rename(char *from, char *to); } +129 AUE_NULL ALL { int nosys(void); } { old truncate } +130 AUE_NULL ALL { int nosys(void); } { old ftruncate } +131 AUE_FLOCK ALL { int flock(int fd, int how); } +132 AUE_MKFIFO ALL { int mkfifo(user_addr_t path, int mode); } #if SOCKETS -133 ALL { int sendto(int s, caddr_t buf, size_t len, int flags, caddr_t to, socklen_t tolen); } -134 ALL { int shutdown(int s, int how); } -135 ALL { int socketpair(int domain, int type, int protocol, int *rsv); } +133 AUE_SENDTO ALL { int sendto(int s, caddr_t buf, size_t len, int flags, caddr_t to, socklen_t tolen); } +134 AUE_SHUTDOWN ALL { int shutdown(int s, int how); } +135 AUE_SOCKETPAIR ALL { int socketpair(int domain, int type, int protocol, int *rsv); } #else -133 ALL { int nosys(void); } -134 ALL { int nosys(void); } -135 ALL { int nosys(void); } +133 AUE_NULL ALL { int nosys(void); } +134 AUE_NULL ALL { int nosys(void); } +135 AUE_NULL ALL { int nosys(void); } #endif /* SOCKETS */ -136 ALL { int mkdir(user_addr_t path, int mode); } -137 ALL { int rmdir(char *path); } -138 ALL { int utimes(char *path, struct timeval *tptr); } -139 ALL { int futimes(int fd, struct timeval *tptr); } -140 ALL { int adjtime(struct timeval *delta, struct timeval *olddelta); } -141 ALL { int nosys(void); } { old getpeername } -142 ALL { int gethostuuid(unsigned char *uuid_buf, const struct timespec *timeoutp); } -143 ALL { int nosys(void); } { old sethostid } -144 ALL { int nosys(void); } { old getrlimit } -145 ALL { int nosys(void); } { old setrlimit } -146 ALL { int nosys(void); } { old killpg } -147 ALL { int setsid(void); } -148 ALL { int nosys(void); } { old setquota } -149 ALL { int nosys(void); } { old qquota } -150 ALL { int nosys(void); } { old getsockname } -151 ALL { int getpgid(pid_t pid); } -152 ALL { int setprivexec(int flag); } -153 ALL { user_ssize_t pread(int fd, user_addr_t buf, user_size_t nbyte, off_t offset); } -154 ALL { user_ssize_t pwrite(int fd, user_addr_t buf, user_size_t nbyte, off_t offset); } +136 AUE_MKDIR ALL { int mkdir(user_addr_t path, int mode); } +137 AUE_RMDIR ALL { int rmdir(char *path); } +138 AUE_UTIMES ALL { int utimes(char *path, struct timeval *tptr); } +139 AUE_FUTIMES ALL { int futimes(int fd, struct timeval *tptr); } +140 AUE_ADJTIME ALL { int adjtime(struct timeval *delta, struct timeval *olddelta); } +141 AUE_NULL ALL { int nosys(void); } { old getpeername } +142 AUE_SYSCTL ALL { int gethostuuid(unsigned char *uuid_buf, const struct timespec *timeoutp); } +143 AUE_NULL ALL { int nosys(void); } { old sethostid } +144 AUE_NULL ALL { int nosys(void); } { old getrlimit } +145 AUE_NULL ALL { int nosys(void); } { old setrlimit } +146 AUE_NULL ALL { int nosys(void); } { old killpg } +147 AUE_SETSID ALL { int setsid(void); } +148 AUE_NULL ALL { int nosys(void); } { old setquota } +149 AUE_NULL ALL { int nosys(void); } { old qquota } +150 AUE_NULL ALL { int nosys(void); } { old getsockname } +151 AUE_GETPGID ALL { int getpgid(pid_t pid); } +152 AUE_SETPRIVEXEC ALL { int setprivexec(int flag); } +153 AUE_PREAD ALL { user_ssize_t pread(int fd, user_addr_t buf, user_size_t nbyte, off_t offset); } +154 AUE_PWRITE ALL { user_ssize_t pwrite(int fd, user_addr_t buf, user_size_t nbyte, off_t offset); } #if NFSSERVER -155 ALL { int nfssvc(int flag, caddr_t argp); } +155 AUE_NFS_SVC ALL { int nfssvc(int flag, caddr_t argp); } #else -155 ALL { int nosys(void); } +155 AUE_NULL ALL { int nosys(void); } #endif -156 ALL { int nosys(void); } { old getdirentries } -157 ALL { int statfs(char *path, struct statfs *buf); } -158 ALL { int fstatfs(int fd, struct statfs *buf); } -159 ALL { int unmount(user_addr_t path, int flags); } -160 ALL { int nosys(void); } { old async_daemon } +156 AUE_NULL ALL { int nosys(void); } { old getdirentries } +157 AUE_STATFS ALL { int statfs(char *path, struct statfs *buf); } +158 AUE_FSTATFS ALL { int fstatfs(int fd, struct statfs *buf); } +159 AUE_UNMOUNT ALL { int unmount(user_addr_t path, int flags); } +160 AUE_NULL ALL { int nosys(void); } { old async_daemon } #if NFSSERVER -161 ALL { int getfh(char *fname, fhandle_t *fhp); } +161 AUE_NFS_GETFH ALL { int getfh(char *fname, fhandle_t *fhp); } #else -161 ALL { int nosys(void); } +161 AUE_NULL ALL { int nosys(void); } #endif -162 ALL { int nosys(void); } { old getdomainname } -163 ALL { int nosys(void); } { old setdomainname } -164 ALL { int nosys(void); } -165 ALL { int quotactl(const char *path, int cmd, int uid, caddr_t arg); } -166 ALL { int nosys(void); } { old exportfs } -167 ALL { int mount(char *type, char *path, int flags, caddr_t data); } -168 ALL { int nosys(void); } { old ustat } -169 ALL { int csops(pid_t pid, uint32_t ops, user_addr_t useraddr, user_size_t usersize); } -170 HN { int table(void); } { old table } -171 ALL { int nosys(void); } { old wait3 } -172 ALL { int nosys(void); } { old rpause } -173 ALL { int waitid(idtype_t idtype, id_t id, siginfo_t *infop, int options); } -174 ALL { int nosys(void); } { old getdents } -175 ALL { int nosys(void); } { old gc_control } -176 ALL { int add_profil(short *bufbase, size_t bufsize, u_long pcoffset, u_int pcscale); } -177 ALL { int nosys(void); } -178 ALL { int nosys(void); } -179 ALL { int nosys(void); } -180 ALL { int kdebug_trace(int code, int arg1, int arg2, int arg3, int arg4, int arg5) NO_SYSCALL_STUB; } -181 ALL { int setgid(gid_t gid); } -182 ALL { int setegid(gid_t egid); } -183 ALL { int seteuid(uid_t euid); } -184 ALL { int sigreturn(struct ucontext *uctx, int infostyle); } -185 UALL { int chud(int code, int arg1, int arg2, int arg3, int arg4, int arg5) NO_SYSCALL_STUB; } -186 ALL { int nosys(void); } -187 ALL { int nosys(void); } -188 ALL { int stat(user_addr_t path, user_addr_t ub); } -189 ALL { int fstat(int fd, user_addr_t ub); } -190 ALL { int lstat(user_addr_t path, user_addr_t ub); } -191 ALL { int pathconf(char *path, int name); } -192 ALL { int fpathconf(int fd, int name); } -193 ALL { int nosys(void); } -194 ALL { int getrlimit(u_int which, struct rlimit *rlp); } -195 ALL { int setrlimit(u_int which, struct rlimit *rlp); } -196 ALL { int getdirentries(int fd, char *buf, u_int count, long *basep); } -197 ALL { user_addr_t mmap(caddr_t addr, size_t len, int prot, int flags, int fd, off_t pos); } -198 ALL { int nosys(void); } { __syscall } -199 ALL { off_t lseek(int fd, off_t offset, int whence); } -200 ALL { int truncate(char *path, off_t length); } -201 ALL { int ftruncate(int fd, off_t length); } -202 ALL { int __sysctl(int *name, u_int namelen, void *old, size_t *oldlenp, void *new, size_t newlen); } -203 ALL { int mlock(caddr_t addr, size_t len); } -204 ALL { int munlock(caddr_t addr, size_t len); } -205 ALL { int undelete(user_addr_t path); } +162 AUE_NULL ALL { int nosys(void); } { old getdomainname } +163 AUE_NULL ALL { int nosys(void); } { old setdomainname } +164 AUE_NULL ALL { int nosys(void); } +165 AUE_QUOTACTL ALL { int quotactl(const char *path, int cmd, int uid, caddr_t arg); } +166 AUE_NULL ALL { int nosys(void); } { old exportfs } +167 AUE_MOUNT ALL { int mount(char *type, char *path, int flags, caddr_t data); } +168 AUE_NULL ALL { int nosys(void); } { old ustat } +169 AUE_CSOPS ALL { int csops(pid_t pid, uint32_t ops, user_addr_t useraddr, user_size_t usersize); } +170 AUE_NULL HN { int nosys(void); } { old table } +171 AUE_NULL ALL { int nosys(void); } { old wait3 } +172 AUE_NULL ALL { int nosys(void); } { old rpause } +173 AUE_WAITID ALL { int waitid(idtype_t idtype, id_t id, siginfo_t *infop, int options); } +174 AUE_NULL ALL { int nosys(void); } { old getdents } +175 AUE_NULL ALL { int nosys(void); } { old gc_control } +176 AUE_ADDPROFILE ALL { int add_profil(short *bufbase, size_t bufsize, u_long pcoffset, u_int pcscale); } +177 AUE_NULL ALL { int nosys(void); } +178 AUE_NULL ALL { int nosys(void); } +179 AUE_NULL ALL { int nosys(void); } +180 AUE_KDEBUGTRACE ALL { int kdebug_trace(int code, int arg1, int arg2, int arg3, int arg4, int arg5) NO_SYSCALL_STUB; } +181 AUE_SETGID ALL { int setgid(gid_t gid); } +182 AUE_SETEGID ALL { int setegid(gid_t egid); } +183 AUE_SETEUID ALL { int seteuid(uid_t euid); } +184 AUE_SIGRETURN ALL { int sigreturn(struct ucontext *uctx, int infostyle) NO_SYSCALL_STUB; } +185 AUE_CHUD ALL { int chud(uint64_t code, uint64_t arg1, uint64_t arg2, uint64_t arg3, uint64_t arg4, uint64_t arg5) NO_SYSCALL_STUB; } +186 AUE_NULL ALL { int nosys(void); } +187 AUE_FDATASYNC ALL { int fdatasync(int fd); } +188 AUE_STAT ALL { int stat(user_addr_t path, user_addr_t ub); } +189 AUE_FSTAT ALL { int fstat(int fd, user_addr_t ub); } +190 AUE_LSTAT ALL { int lstat(user_addr_t path, user_addr_t ub); } +191 AUE_PATHCONF ALL { int pathconf(char *path, int name); } +192 AUE_FPATHCONF ALL { int fpathconf(int fd, int name); } +193 AUE_NULL ALL { int nosys(void); } +194 AUE_GETRLIMIT ALL { int getrlimit(u_int which, struct rlimit *rlp); } +195 AUE_SETRLIMIT ALL { int setrlimit(u_int which, struct rlimit *rlp); } +196 AUE_GETDIRENTRIES ALL { int getdirentries(int fd, char *buf, u_int count, long *basep); } +197 AUE_MMAP ALL { user_addr_t mmap(caddr_t addr, size_t len, int prot, int flags, int fd, off_t pos); } +198 AUE_NULL ALL { int nosys(void); } { __syscall } +199 AUE_LSEEK ALL { off_t lseek(int fd, off_t offset, int whence); } +200 AUE_TRUNCATE ALL { int truncate(char *path, off_t length); } +201 AUE_FTRUNCATE ALL { int ftruncate(int fd, off_t length); } +202 AUE_SYSCTL ALL { int __sysctl(int *name, u_int namelen, void *old, size_t *oldlenp, void *new, size_t newlen); } +203 AUE_MLOCK ALL { int mlock(caddr_t addr, size_t len); } +204 AUE_MUNLOCK ALL { int munlock(caddr_t addr, size_t len); } +205 AUE_UNDELETE ALL { int undelete(user_addr_t path); } #if NETAT -206 ALL { int ATsocket(int proto); } -207 UALL { int ATgetmsg(int fd, void *ctlptr, void *datptr, int *flags); } -208 UALL { int ATputmsg(int fd, void *ctlptr, void *datptr, int flags); } -209 UALL { int ATPsndreq(int fd, unsigned char *buf, int len, int nowait); } -210 UALL { int ATPsndrsp(int fd, unsigned char *respbuff, int resplen, int datalen); } -211 UALL { int ATPgetreq(int fd, unsigned char *buf, int buflen); } -212 UALL { int ATPgetrsp(int fd, unsigned char *bdsp); } -213 ALL { int nosys(void); } { Reserved for AppleTalk } +206 AUE_ATSOCKET ALL { int ATsocket(int proto); } +207 AUE_ATGETMSG UALL { int ATgetmsg(int fd, void *ctlptr, void *datptr, int *flags); } +208 AUE_ATPUTMSG UALL { int ATputmsg(int fd, void *ctlptr, void *datptr, int flags); } +209 AUE_ATPSNDREQ UALL { int ATPsndreq(int fd, unsigned char *buf, int len, int nowait); } +210 AUE_ATPSNDRSP UALL { int ATPsndrsp(int fd, unsigned char *respbuff, int resplen, int datalen); } +211 AUE_ATPGETREQ UALL { int ATPgetreq(int fd, unsigned char *buf, int buflen); } +212 AUE_ATPGETRSP UALL { int ATPgetrsp(int fd, unsigned char *bdsp); } +213 AUE_NULL ALL { int nosys(void); } { Reserved for AppleTalk } #else -206 ALL { int nosys(void); } -207 ALL { int nosys(void); } -208 ALL { int nosys(void); } -209 ALL { int nosys(void); } -210 ALL { int nosys(void); } -211 ALL { int nosys(void); } -212 ALL { int nosys(void); } -213 ALL { int nosys(void); } { Reserved for AppleTalk } +206 AUE_NULL ALL { int nosys(void); } +207 AUE_NULL ALL { int nosys(void); } +208 AUE_NULL ALL { int nosys(void); } +209 AUE_NULL ALL { int nosys(void); } +210 AUE_NULL ALL { int nosys(void); } +211 AUE_NULL ALL { int nosys(void); } +212 AUE_NULL ALL { int nosys(void); } +213 AUE_NULL ALL { int nosys(void); } { Reserved for AppleTalk } #endif /* NETAT */ -214 ALL { int kqueue_from_portset_np(int portset); } -215 ALL { int kqueue_portset_np(int fd); } +214 AUE_NULL ALL { int nosys(void); } +215 AUE_NULL ALL { int nosys(void); } ; System Calls 216 - 230 are reserved for calls to support HFS/HFS Plus ; file system semantics. Currently, we only use 215-227. The rest is @@ -322,311 +322,326 @@ ; to HFS semantics, they are not specific to the HFS filesystem. ; We expect all filesystems to recognize the call and report that it is ; not supported or to actually implement it. -216 UHN { int mkcomplex(const char *path, mode_t mode, u_long type); } { soon to be obsolete } -217 UHN { int statv(const char *path, struct vstat *vsb); } { soon to be obsolete } -218 UHN { int lstatv(const char *path, struct vstat *vsb); } { soon to be obsolete } -219 UHN { int fstatv(int fd, struct vstat *vsb); } { soon to be obsolete } -220 ALL { int getattrlist(const char *path, struct attrlist *alist, void *attributeBuffer, size_t bufferSize, u_long options); } -221 ALL { int setattrlist(const char *path, struct attrlist *alist, void *attributeBuffer, size_t bufferSize, u_long options); } -222 ALL { int getdirentriesattr(int fd, struct attrlist *alist, void *buffer, size_t buffersize, u_long *count, u_long *basep, u_long *newstate, u_long options); } -223 ALL { int exchangedata(const char *path1, const char *path2, u_long options); } -224 ALL { int nosys(void); } { was checkuseraccess } -225 ALL { int searchfs(const char *path, struct fssearchblock *searchblock, u_long *nummatches, u_long scriptcode, u_long options, struct searchstate *state); } -226 ALL { int delete(user_addr_t path) NO_SYSCALL_STUB; } { private delete (Carbon semantics) } -227 ALL { int copyfile(char *from, char *to, int mode, int flags) NO_SYSCALL_STUB; } -228 ALL { int nosys(void); } -229 ALL { int nosys(void); } -230 ALL { int poll(struct pollfd *fds, u_int nfds, int timeout); } -231 ALL { int watchevent(struct eventreq *u_req, int u_eventmask); } -232 ALL { int waitevent(struct eventreq *u_req, struct timeval *tv); } -233 ALL { int modwatch(struct eventreq *u_req, int u_eventmask); } -234 ALL { user_ssize_t getxattr(user_addr_t path, user_addr_t attrname, user_addr_t value, size_t size, uint32_t position, int options); } -235 ALL { user_ssize_t fgetxattr(int fd, user_addr_t attrname, user_addr_t value, size_t size, uint32_t position, int options); } -236 ALL { int setxattr(user_addr_t path, user_addr_t attrname, user_addr_t value, size_t size, uint32_t position, int options); } -237 ALL { int fsetxattr(int fd, user_addr_t attrname, user_addr_t value, size_t size, uint32_t position, int options); } -238 ALL { int removexattr(user_addr_t path, user_addr_t attrname, int options); } -239 ALL { int fremovexattr(int fd, user_addr_t attrname, int options); } -240 ALL { user_ssize_t listxattr(user_addr_t path, user_addr_t namebuf, size_t bufsize, int options); } -241 ALL { user_ssize_t flistxattr(int fd, user_addr_t namebuf, size_t bufsize, int options); } -242 ALL { int fsctl(const char *path, u_long cmd, caddr_t data, u_long options); } -243 ALL { int initgroups(u_int gidsetsize, gid_t *gidset, int gmuid); } -244 ALL { int posix_spawn(pid_t *pid, const char *path, const struct _posix_spawn_args_desc *adesc, char **argv, char **envp); } -245 ALL { int nosys(void); } -246 ALL { int nosys(void); } +216 AUE_MKCOMPLEX UHN { int mkcomplex(const char *path, mode_t mode, u_long type); } { soon to be obsolete } +217 AUE_STATV UHN { int statv(const char *path, struct vstat *vsb); } { soon to be obsolete } +218 AUE_LSTATV UHN { int lstatv(const char *path, struct vstat *vsb); } { soon to be obsolete } +219 AUE_FSTATV UHN { int fstatv(int fd, struct vstat *vsb); } { soon to be obsolete } +220 AUE_GETATTRLIST ALL { int getattrlist(const char *path, struct attrlist *alist, void *attributeBuffer, size_t bufferSize, u_long options); } +221 AUE_SETATTRLIST ALL { int setattrlist(const char *path, struct attrlist *alist, void *attributeBuffer, size_t bufferSize, u_long options); } +222 AUE_GETDIRENTRIESATTR ALL { int getdirentriesattr(int fd, struct attrlist *alist, void *buffer, size_t buffersize, u_long *count, u_long *basep, u_long *newstate, u_long options); } +223 AUE_EXCHANGEDATA ALL { int exchangedata(const char *path1, const char *path2, u_long options); } +224 AUE_NULL ALL { int nosys(void); } { old checkuseraccess / fsgetpath (which moved to 427) } +225 AUE_SEARCHFS ALL { int searchfs(const char *path, struct fssearchblock *searchblock, uint32_t *nummatches, uint32_t scriptcode, uint32_t options, struct searchstate *state); } +226 AUE_DELETE ALL { int delete(user_addr_t path) NO_SYSCALL_STUB; } { private delete (Carbon semantics) } +227 AUE_COPYFILE ALL { int copyfile(char *from, char *to, int mode, int flags) NO_SYSCALL_STUB; } +228 AUE_FGETATTRLIST ALL { int fgetattrlist(int fd, struct attrlist *alist, void *attributeBuffer, size_t bufferSize, u_long options); } +229 AUE_FSETATTRLIST ALL { int fsetattrlist(int fd, struct attrlist *alist, void *attributeBuffer, size_t bufferSize, u_long options); } +230 AUE_POLL ALL { int poll(struct pollfd *fds, u_int nfds, int timeout); } +231 AUE_WATCHEVENT ALL { int watchevent(struct eventreq *u_req, int u_eventmask); } +232 AUE_WAITEVENT ALL { int waitevent(struct eventreq *u_req, struct timeval *tv); } +233 AUE_MODWATCH ALL { int modwatch(struct eventreq *u_req, int u_eventmask); } +234 AUE_GETXATTR ALL { user_ssize_t getxattr(user_addr_t path, user_addr_t attrname, user_addr_t value, size_t size, uint32_t position, int options); } +235 AUE_FGETXATTR ALL { user_ssize_t fgetxattr(int fd, user_addr_t attrname, user_addr_t value, size_t size, uint32_t position, int options); } +236 AUE_SETXATTR ALL { int setxattr(user_addr_t path, user_addr_t attrname, user_addr_t value, size_t size, uint32_t position, int options); } +237 AUE_FSETXATTR ALL { int fsetxattr(int fd, user_addr_t attrname, user_addr_t value, size_t size, uint32_t position, int options); } +238 AUE_REMOVEXATTR ALL { int removexattr(user_addr_t path, user_addr_t attrname, int options); } +239 AUE_FREMOVEXATTR ALL { int fremovexattr(int fd, user_addr_t attrname, int options); } +240 AUE_LISTXATTR ALL { user_ssize_t listxattr(user_addr_t path, user_addr_t namebuf, size_t bufsize, int options); } +241 AUE_FLISTXATTR ALL { user_ssize_t flistxattr(int fd, user_addr_t namebuf, size_t bufsize, int options); } +242 AUE_FSCTL ALL { int fsctl(const char *path, u_long cmd, caddr_t data, u_int options); } +243 AUE_INITGROUPS ALL { int initgroups(u_int gidsetsize, gid_t *gidset, int gmuid); } +244 AUE_POSIX_SPAWN ALL { int posix_spawn(pid_t *pid, const char *path, const struct _posix_spawn_args_desc *adesc, char **argv, char **envp); } +245 AUE_FFSCTL ALL { int ffsctl(int fd, u_long cmd, caddr_t data, u_int options); } +246 AUE_NULL ALL { int nosys(void); } #if NFSCLIENT -247 ALL { int nfsclnt(int flag, caddr_t argp); } +247 AUE_NULL ALL { int nfsclnt(int flag, caddr_t argp); } #else -247 ALL { int nosys(void); } +247 AUE_NULL ALL { int nosys(void); } #endif #if NFSSERVER -248 ALL { int fhopen(const struct fhandle *u_fhp, int flags); } +248 AUE_FHOPEN ALL { int fhopen(const struct fhandle *u_fhp, int flags); } #else -248 ALL { int nosys(void); } +248 AUE_NULL ALL { int nosys(void); } #endif -249 ALL { int nosys(void); } -250 ALL { int minherit(void *addr, size_t len, int inherit); } +249 AUE_NULL ALL { int nosys(void); } +250 AUE_MINHERIT ALL { int minherit(void *addr, size_t len, int inherit); } #if SYSV_SEM -251 ALL { int semsys(u_int which, int a2, int a3, int a4, int a5); } +251 AUE_SEMSYS ALL { int semsys(u_int which, int a2, int a3, int a4, int a5); } #else -251 ALL { int nosys(void); } +251 AUE_NULL ALL { int nosys(void); } #endif #if SYSV_MSG -252 ALL { int msgsys(u_int which, int a2, int a3, int a4, int a5); } +252 AUE_MSGSYS ALL { int msgsys(u_int which, int a2, int a3, int a4, int a5); } #else -252 ALL { int nosys(void); } +252 AUE_NULL ALL { int nosys(void); } #endif #if SYSV_SHM -253 ALL { int shmsys(u_int which, int a2, int a3, int a4); } +253 AUE_SHMSYS ALL { int shmsys(u_int which, int a2, int a3, int a4); } #else -253 ALL { int nosys(void); } +253 AUE_NULL ALL { int nosys(void); } #endif #if SYSV_SEM -254 ALL { int semctl(int semid, int semnum, int cmd, semun_t arg); } -255 ALL { int semget(key_t key, int nsems, int semflg); } -256 ALL { int semop(int semid, struct sembuf *sops, int nsops); } -257 ALL { int nosys(void); } +254 AUE_SEMCTL ALL { int semctl(int semid, int semnum, int cmd, semun_t arg); } +255 AUE_SEMGET ALL { int semget(key_t key, int nsems, int semflg); } +256 AUE_SEMOP ALL { int semop(int semid, struct sembuf *sops, int nsops); } +257 AUE_NULL ALL { int nosys(void); } #else -254 ALL { int nosys(void); } -255 ALL { int nosys(void); } -256 ALL { int nosys(void); } -257 ALL { int nosys(void); } +254 AUE_NULL ALL { int nosys(void); } +255 AUE_NULL ALL { int nosys(void); } +256 AUE_NULL ALL { int nosys(void); } +257 AUE_NULL ALL { int nosys(void); } #endif #if SYSV_MSG -258 ALL { int msgctl(int msqid, int cmd, struct msqid_ds *buf); } -259 ALL { int msgget(key_t key, int msgflg); } -260 ALL { int msgsnd(int msqid, void *msgp, size_t msgsz, int msgflg); } -261 ALL { user_ssize_t msgrcv(int msqid, void *msgp, size_t msgsz, long msgtyp, int msgflg); } +258 AUE_MSGCTL ALL { int msgctl(int msqid, int cmd, struct msqid_ds *buf); } +259 AUE_MSGGET ALL { int msgget(key_t key, int msgflg); } +260 AUE_MSGSND ALL { int msgsnd(int msqid, void *msgp, size_t msgsz, int msgflg); } +261 AUE_MSGRCV ALL { user_ssize_t msgrcv(int msqid, void *msgp, size_t msgsz, long msgtyp, int msgflg); } #else -258 ALL { int nosys(void); } -259 ALL { int nosys(void); } -260 ALL { int nosys(void); } -261 ALL { int nosys(void); } +258 AUE_NULL ALL { int nosys(void); } +259 AUE_NULL ALL { int nosys(void); } +260 AUE_NULL ALL { int nosys(void); } +261 AUE_NULL ALL { int nosys(void); } #endif #if SYSV_SHM -262 ALL { user_addr_t shmat(int shmid, void *shmaddr, int shmflg); } -263 ALL { int shmctl(int shmid, int cmd, struct shmid_ds *buf); } -264 ALL { int shmdt(void *shmaddr); } -265 ALL { int shmget(key_t key, size_t size, int shmflg); } +262 AUE_SHMAT ALL { user_addr_t shmat(int shmid, void *shmaddr, int shmflg); } +263 AUE_SHMCTL ALL { int shmctl(int shmid, int cmd, struct shmid_ds *buf); } +264 AUE_SHMDT ALL { int shmdt(void *shmaddr); } +265 AUE_SHMGET ALL { int shmget(key_t key, size_t size, int shmflg); } #else -262 ALL { int nosys(void); } -263 ALL { int nosys(void); } -264 ALL { int nosys(void); } -265 ALL { int nosys(void); } +262 AUE_NULL ALL { int nosys(void); } +263 AUE_NULL ALL { int nosys(void); } +264 AUE_NULL ALL { int nosys(void); } +265 AUE_NULL ALL { int nosys(void); } #endif -266 ALL { int shm_open(const char *name, int oflag, int mode); } -267 ALL { int shm_unlink(const char *name); } -268 ALL { user_addr_t sem_open(const char *name, int oflag, int mode, int value); } -269 ALL { int sem_close(sem_t *sem); } -270 ALL { int sem_unlink(const char *name); } -271 ALL { int sem_wait(sem_t *sem); } -272 ALL { int sem_trywait(sem_t *sem); } -273 ALL { int sem_post(sem_t *sem); } -274 ALL { int sem_getvalue(sem_t *sem, int *sval); } -275 ALL { int sem_init(sem_t *sem, int phsared, u_int value); } -276 ALL { int sem_destroy(sem_t *sem); } -277 ALL { int open_extended(user_addr_t path, int flags, uid_t uid, gid_t gid, int mode, user_addr_t xsecurity) NO_SYSCALL_STUB; } -278 ALL { int umask_extended(int newmask, user_addr_t xsecurity) NO_SYSCALL_STUB; } -279 ALL { int stat_extended(user_addr_t path, user_addr_t ub, user_addr_t xsecurity, user_addr_t xsecurity_size) NO_SYSCALL_STUB; } -280 ALL { int lstat_extended(user_addr_t path, user_addr_t ub, user_addr_t xsecurity, user_addr_t xsecurity_size) NO_SYSCALL_STUB; } -281 ALL { int fstat_extended(int fd, user_addr_t ub, user_addr_t xsecurity, user_addr_t xsecurity_size) NO_SYSCALL_STUB; } -282 ALL { int chmod_extended(user_addr_t path, uid_t uid, gid_t gid, int mode, user_addr_t xsecurity) NO_SYSCALL_STUB; } -283 ALL { int fchmod_extended(int fd, uid_t uid, gid_t gid, int mode, user_addr_t xsecurity) NO_SYSCALL_STUB; } -284 ALL { int access_extended(user_addr_t entries, size_t size, user_addr_t results, uid_t uid) NO_SYSCALL_STUB; } -285 ALL { int settid(uid_t uid, gid_t gid) NO_SYSCALL_STUB; } -286 ALL { int gettid(uid_t *uidp, gid_t *gidp) NO_SYSCALL_STUB; } -287 ALL { int setsgroups(int setlen, user_addr_t guidset) NO_SYSCALL_STUB; } -288 ALL { int getsgroups(user_addr_t setlen, user_addr_t guidset) NO_SYSCALL_STUB; } -289 ALL { int setwgroups(int setlen, user_addr_t guidset) NO_SYSCALL_STUB; } -290 ALL { int getwgroups(user_addr_t setlen, user_addr_t guidset) NO_SYSCALL_STUB; } -291 ALL { int mkfifo_extended(user_addr_t path, uid_t uid, gid_t gid, int mode, user_addr_t xsecurity) NO_SYSCALL_STUB; } -292 ALL { int mkdir_extended(user_addr_t path, uid_t uid, gid_t gid, int mode, user_addr_t xsecurity) NO_SYSCALL_STUB; } -293 ALL { int identitysvc(int opcode, user_addr_t message) NO_SYSCALL_STUB; } -294 ALL { int shared_region_check_np(uint64_t *start_address) NO_SYSCALL_STUB; } -295 ALL { int shared_region_map_np(int fd, uint32_t count, const struct shared_file_mapping_np *mappings) NO_SYSCALL_STUB; } -296 ALL { int nosys(void); } { old load_shared_file } -297 ALL { int nosys(void); } { old reset_shared_file } -298 ALL { int nosys(void); } { old new_system_shared_regions } -299 ALL { int enosys(void); } { old shared_region_map_file_np } -300 ALL { int enosys(void); } { old shared_region_make_private_np } -301 ALL { int __pthread_mutex_destroy(int mutexid); } -302 ALL { int __pthread_mutex_init(user_addr_t mutex, user_addr_t attr); } -303 ALL { int __pthread_mutex_lock(int mutexid); } -304 ALL { int __pthread_mutex_trylock(int mutexid); } -305 ALL { int __pthread_mutex_unlock(int mutexid); } -306 ALL { int __pthread_cond_init(user_addr_t cond, user_addr_t attr); } -307 ALL { int __pthread_cond_destroy(int condid); } -308 ALL { int __pthread_cond_broadcast(int condid); } -309 ALL { int __pthread_cond_signal(int condid); } -310 ALL { int getsid(pid_t pid); } -311 ALL { int settid_with_pid(pid_t pid, int assume) NO_SYSCALL_STUB; } -312 ALL { int __pthread_cond_timedwait(int condid, int mutexid, user_addr_t abstime); } -313 ALL { int aio_fsync(int op, user_addr_t aiocbp); } -314 ALL { user_ssize_t aio_return(user_addr_t aiocbp); } -315 ALL { int aio_suspend(user_addr_t aiocblist, int nent, user_addr_t timeoutp); } -316 ALL { int aio_cancel(int fd, user_addr_t aiocbp); } -317 ALL { int aio_error(user_addr_t aiocbp); } -318 ALL { int aio_read(user_addr_t aiocbp); } -319 ALL { int aio_write(user_addr_t aiocbp); } -320 ALL { int lio_listio(int mode, user_addr_t aiocblist, int nent, user_addr_t sigp); } -321 ALL { int __pthread_cond_wait(int condid, int mutexid); } -322 ALL { int iopolicysys(int cmd, void *arg) NO_SYSCALL_STUB; } -323 ALL { int nosys(void); } -324 ALL { int mlockall(int how); } -325 ALL { int munlockall(int how); } -326 ALL { int nosys(void); } -327 ALL { int issetugid(void); } -328 ALL { int __pthread_kill(int thread_port, int sig); } -329 ALL { int __pthread_sigmask(int how, user_addr_t set, user_addr_t oset); } -330 ALL { int __sigwait(user_addr_t set, user_addr_t sig); } -331 ALL { int __disable_threadsignal(int value); } -332 ALL { int __pthread_markcancel(int thread_port); } -333 ALL { int __pthread_canceled(int action); } -334 ALL { int __semwait_signal(int cond_sem, int mutex_sem, int timeout, int relative, time_t tv_sec, int32_t tv_nsec); } -335 ALL { int nosys(void); } { old utrace } -336 ALL { int proc_info(int32_t callnum,int32_t pid,uint32_t flavor, uint64_t arg,user_addr_t buffer,int32_t buffersize) NO_SYSCALL_STUB; } -#if SENDFILE -337 ALL { int sendfile(int fd, int s, off_t offset, off_t *nbytes, struct sf_hdtr *hdtr, int flags); } -#else /* !SENDFILE */ -337 ALL { int nosys(void); } -#endif /* SENDFILE */ -338 ALL { int stat64(user_addr_t path, user_addr_t ub); } -339 ALL { int fstat64(int fd, user_addr_t ub); } -340 ALL { int lstat64(user_addr_t path, user_addr_t ub); } -341 ALL { int stat64_extended(user_addr_t path, user_addr_t ub, user_addr_t xsecurity, user_addr_t xsecurity_size) NO_SYSCALL_STUB; } -342 ALL { int lstat64_extended(user_addr_t path, user_addr_t ub, user_addr_t xsecurity, user_addr_t xsecurity_size) NO_SYSCALL_STUB; } -343 ALL { int fstat64_extended(int fd, user_addr_t ub, user_addr_t xsecurity, user_addr_t xsecurity_size) NO_SYSCALL_STUB; } -344 ALL { user_ssize_t getdirentries64(int fd, void *buf, user_size_t bufsize, off_t *position) NO_SYSCALL_STUB; } -345 ALL { int statfs64(char *path, struct statfs64 *buf); } -346 ALL { int fstatfs64(int fd, struct statfs64 *buf); } -347 ALL { int getfsstat64(user_addr_t buf, int bufsize, int flags); } -348 ALL { int __pthread_chdir(user_addr_t path); } -349 ALL { int __pthread_fchdir(int fd); } - -#if AUDIT -350 ALL { int audit(void *record, int length); } -351 ALL { int auditon(int cmd, void *data, int length); } -352 ALL { int nosys(void); } -353 ALL { int getauid(au_id_t *auid); } -354 ALL { int setauid(au_id_t *auid); } -355 ALL { int getaudit(struct auditinfo *auditinfo); } -356 ALL { int setaudit(struct auditinfo *auditinfo); } -357 ALL { int getaudit_addr(struct auditinfo_addr *auditinfo_addr, int length); } -358 ALL { int setaudit_addr(struct auditinfo_addr *auditinfo_addr, int length); } -359 ALL { int auditctl(char *path); } +266 AUE_SHMOPEN ALL { int shm_open(const char *name, int oflag, int mode); } +267 AUE_SHMUNLINK ALL { int shm_unlink(const char *name); } +268 AUE_SEMOPEN ALL { user_addr_t sem_open(const char *name, int oflag, int mode, int value); } +269 AUE_SEMCLOSE ALL { int sem_close(sem_t *sem); } +270 AUE_SEMUNLINK ALL { int sem_unlink(const char *name); } +271 AUE_SEMWAIT ALL { int sem_wait(sem_t *sem); } +272 AUE_SEMTRYWAIT ALL { int sem_trywait(sem_t *sem); } +273 AUE_SEMPOST ALL { int sem_post(sem_t *sem); } +274 AUE_SEMGETVALUE ALL { int sem_getvalue(sem_t *sem, int *sval); } +275 AUE_SEMINIT ALL { int sem_init(sem_t *sem, int phsared, u_int value); } +276 AUE_SEMDESTROY ALL { int sem_destroy(sem_t *sem); } +277 AUE_OPEN_EXTENDED_RWTC ALL { int open_extended(user_addr_t path, int flags, uid_t uid, gid_t gid, int mode, user_addr_t xsecurity) NO_SYSCALL_STUB; } +278 AUE_UMASK_EXTENDED ALL { int umask_extended(int newmask, user_addr_t xsecurity) NO_SYSCALL_STUB; } +279 AUE_STAT_EXTENDED ALL { int stat_extended(user_addr_t path, user_addr_t ub, user_addr_t xsecurity, user_addr_t xsecurity_size) NO_SYSCALL_STUB; } +280 AUE_LSTAT_EXTENDED ALL { int lstat_extended(user_addr_t path, user_addr_t ub, user_addr_t xsecurity, user_addr_t xsecurity_size) NO_SYSCALL_STUB; } +281 AUE_FSTAT_EXTENDED ALL { int fstat_extended(int fd, user_addr_t ub, user_addr_t xsecurity, user_addr_t xsecurity_size) NO_SYSCALL_STUB; } +282 AUE_CHMOD_EXTENDED ALL { int chmod_extended(user_addr_t path, uid_t uid, gid_t gid, int mode, user_addr_t xsecurity) NO_SYSCALL_STUB; } +283 AUE_FCHMOD_EXTENDED ALL { int fchmod_extended(int fd, uid_t uid, gid_t gid, int mode, user_addr_t xsecurity) NO_SYSCALL_STUB; } +284 AUE_ACCESS_EXTENDED ALL { int access_extended(user_addr_t entries, size_t size, user_addr_t results, uid_t uid) NO_SYSCALL_STUB; } +285 AUE_SETTID ALL { int settid(uid_t uid, gid_t gid) NO_SYSCALL_STUB; } +286 AUE_GETTID ALL { int gettid(uid_t *uidp, gid_t *gidp) NO_SYSCALL_STUB; } +287 AUE_SETSGROUPS ALL { int setsgroups(int setlen, user_addr_t guidset) NO_SYSCALL_STUB; } +288 AUE_GETSGROUPS ALL { int getsgroups(user_addr_t setlen, user_addr_t guidset) NO_SYSCALL_STUB; } +289 AUE_SETWGROUPS ALL { int setwgroups(int setlen, user_addr_t guidset) NO_SYSCALL_STUB; } +290 AUE_GETWGROUPS ALL { int getwgroups(user_addr_t setlen, user_addr_t guidset) NO_SYSCALL_STUB; } +291 AUE_MKFIFO_EXTENDED ALL { int mkfifo_extended(user_addr_t path, uid_t uid, gid_t gid, int mode, user_addr_t xsecurity) NO_SYSCALL_STUB; } +292 AUE_MKDIR_EXTENDED ALL { int mkdir_extended(user_addr_t path, uid_t uid, gid_t gid, int mode, user_addr_t xsecurity) NO_SYSCALL_STUB; } +293 AUE_IDENTITYSVC ALL { int identitysvc(int opcode, user_addr_t message) NO_SYSCALL_STUB; } +294 AUE_NULL ALL { int shared_region_check_np(uint64_t *start_address) NO_SYSCALL_STUB; } +295 AUE_NULL ALL { int shared_region_map_np(int fd, uint32_t count, const struct shared_file_mapping_np *mappings) NO_SYSCALL_STUB; } +296 AUE_NULL ALL { int vm_pressure_monitor(int wait_for_pressure, int nsecs_monitored, uint32_t *pages_reclaimed); } +#if PSYNCH +297 AUE_NULL ALL { uint32_t psynch_rw_longrdlock(user_addr_t rwlock, uint32_t lgenval, uint32_t ugenval, uint32_t rw_wc, int flags) NO_SYSCALL_STUB; } +298 AUE_NULL ALL { uint32_t psynch_rw_yieldwrlock(user_addr_t rwlock, uint32_t lgenval, uint32_t ugenval, uint32_t rw_wc, int flags) NO_SYSCALL_STUB; } +299 AUE_NULL ALL { int psynch_rw_downgrade(user_addr_t rwlock, uint32_t lgenval, uint32_t ugenval, uint32_t rw_wc, int flags) NO_SYSCALL_STUB; } +300 AUE_NULL ALL { uint32_t psynch_rw_upgrade(user_addr_t rwlock, uint32_t lgenval, uint32_t ugenval, uint32_t rw_wc, int flags) NO_SYSCALL_STUB; } +301 AUE_NULL ALL { uint32_t psynch_mutexwait(user_addr_t mutex, uint32_t mgen, uint32_t ugen, uint64_t tid, uint32_t flags) NO_SYSCALL_STUB; } +302 AUE_NULL ALL { uint32_t psynch_mutexdrop(user_addr_t mutex, uint32_t mgen, uint32_t ugen, uint64_t tid, uint32_t flags) NO_SYSCALL_STUB; } +303 AUE_NULL ALL { int psynch_cvbroad(user_addr_t cv, uint32_t cvgen, uint32_t diffgen, user_addr_t mutex, uint32_t mgen, uint32_t ugen, uint64_t tid, uint32_t flags) NO_SYSCALL_STUB; } +304 AUE_NULL ALL { int psynch_cvsignal(user_addr_t cv, uint32_t cvgen, uint32_t cvugen, user_addr_t mutex, uint32_t mgen, uint32_t ugen, int thread_port, uint32_t flags) NO_SYSCALL_STUB; } +305 AUE_NULL ALL { uint32_t psynch_cvwait(user_addr_t cv, uint32_t cvgen, uint32_t cvugen, user_addr_t mutex, uint32_t mgen, uint32_t ugen, uint64_t sec, uint64_t usec) NO_SYSCALL_STUB; } +306 AUE_NULL ALL { uint32_t psynch_rw_rdlock(user_addr_t rwlock, uint32_t lgenval, uint32_t ugenval, uint32_t rw_wc, int flags) NO_SYSCALL_STUB; } +307 AUE_NULL ALL { uint32_t psynch_rw_wrlock(user_addr_t rwlock, uint32_t lgenval, uint32_t ugenval, uint32_t rw_wc, int flags) NO_SYSCALL_STUB; } +308 AUE_NULL ALL { uint32_t psynch_rw_unlock(user_addr_t rwlock, uint32_t lgenval, uint32_t ugenval, uint32_t rw_wc, int flags) NO_SYSCALL_STUB; } +309 AUE_NULL ALL { uint32_t psynch_rw_unlock2(user_addr_t rwlock, uint32_t lgenval, uint32_t ugenval, uint32_t rw_wc, int flags) NO_SYSCALL_STUB; } #else -350 ALL { int nosys(void); } -351 ALL { int nosys(void); } -352 ALL { int nosys(void); } -353 ALL { int nosys(void); } -354 ALL { int nosys(void); } -355 ALL { int nosys(void); } -356 ALL { int nosys(void); } -357 ALL { int nosys(void); } -358 ALL { int nosys(void); } -359 ALL { int nosys(void); } +297 AUE_NULL ALL { int nosys(void); } { old reset_shared_file } +298 AUE_NULL ALL { int nosys(void); } { old new_system_shared_regions } +299 AUE_NULL ALL { int enosys(void); } { old shared_region_map_file_np } +300 AUE_NULL ALL { int enosys(void); } { old shared_region_make_private_np } +301 AUE_NULL ALL { int nosys(void); } +302 AUE_NULL ALL { int nosys(void); } +303 AUE_NULL ALL { int nosys(void); } +304 AUE_NULL ALL { int nosys(void); } +305 AUE_NULL ALL { int nosys(void); } +306 AUE_NULL ALL { int nosys(void); } +307 AUE_NULL ALL { int nosys(void); } +308 AUE_NULL ALL { int nosys(void); } +309 AUE_NULL ALL { int nosys(void); } #endif +310 AUE_GETSID ALL { int getsid(pid_t pid); } +311 AUE_SETTIDWITHPID ALL { int settid_with_pid(pid_t pid, int assume) NO_SYSCALL_STUB; } +312 AUE_NULL ALL { int nosys(void); } { old __pthread_cond_timedwait } +313 AUE_NULL ALL { int aio_fsync(int op, user_addr_t aiocbp); } +314 AUE_NULL ALL { user_ssize_t aio_return(user_addr_t aiocbp); } +315 AUE_NULL ALL { int aio_suspend(user_addr_t aiocblist, int nent, user_addr_t timeoutp); } +316 AUE_NULL ALL { int aio_cancel(int fd, user_addr_t aiocbp); } +317 AUE_NULL ALL { int aio_error(user_addr_t aiocbp); } +318 AUE_NULL ALL { int aio_read(user_addr_t aiocbp); } +319 AUE_NULL ALL { int aio_write(user_addr_t aiocbp); } +320 AUE_LIOLISTIO ALL { int lio_listio(int mode, user_addr_t aiocblist, int nent, user_addr_t sigp); } +321 AUE_NULL ALL { int nosys(void); } { old __pthread_cond_wait } +322 AUE_IOPOLICYSYS ALL { int iopolicysys(int cmd, void *arg) NO_SYSCALL_STUB; } +323 AUE_NULL ALL { int nosys(void); } +324 AUE_MLOCKALL ALL { int mlockall(int how); } +325 AUE_MUNLOCKALL ALL { int munlockall(int how); } +326 AUE_NULL ALL { int nosys(void); } +327 AUE_ISSETUGID ALL { int issetugid(void); } +328 AUE_PTHREADKILL ALL { int __pthread_kill(int thread_port, int sig); } +329 AUE_PTHREADSIGMASK ALL { int __pthread_sigmask(int how, user_addr_t set, user_addr_t oset); } +330 AUE_SIGWAIT ALL { int __sigwait(user_addr_t set, user_addr_t sig); } +331 AUE_NULL ALL { int __disable_threadsignal(int value); } +332 AUE_NULL ALL { int __pthread_markcancel(int thread_port); } +333 AUE_NULL ALL { int __pthread_canceled(int action); } + +;#if OLD_SEMWAIT_SIGNAL +;334 AUE_NULL ALL { int nosys(void); } { old __semwait_signal } +;#else +334 AUE_SEMWAITSIGNAL ALL { int __semwait_signal(int cond_sem, int mutex_sem, int timeout, int relative, int64_t tv_sec, int32_t tv_nsec); } +;#endif +335 AUE_NULL ALL { int nosys(void); } { old utrace } +336 AUE_PROCINFO ALL { int proc_info(int32_t callnum,int32_t pid,uint32_t flavor, uint64_t arg,user_addr_t buffer,int32_t buffersize) NO_SYSCALL_STUB; } +#if SENDFILE +337 AUE_SENDFILE ALL { int sendfile(int fd, int s, off_t offset, off_t *nbytes, struct sf_hdtr *hdtr, int flags); } +#else /* !SENDFILE */ +337 AUE_NULL ALL { int nosys(void); } +#endif /* SENDFILE */ +338 AUE_STAT64 ALL { int stat64(user_addr_t path, user_addr_t ub); } +339 AUE_FSTAT64 ALL { int fstat64(int fd, user_addr_t ub); } +340 AUE_LSTAT64 ALL { int lstat64(user_addr_t path, user_addr_t ub); } +341 AUE_STAT64_EXTENDED ALL { int stat64_extended(user_addr_t path, user_addr_t ub, user_addr_t xsecurity, user_addr_t xsecurity_size) NO_SYSCALL_STUB; } +342 AUE_LSTAT64_EXTENDED ALL { int lstat64_extended(user_addr_t path, user_addr_t ub, user_addr_t xsecurity, user_addr_t xsecurity_size) NO_SYSCALL_STUB; } +343 AUE_FSTAT64_EXTENDED ALL { int fstat64_extended(int fd, user_addr_t ub, user_addr_t xsecurity, user_addr_t xsecurity_size) NO_SYSCALL_STUB; } +344 AUE_GETDIRENTRIES64 ALL { user_ssize_t getdirentries64(int fd, void *buf, user_size_t bufsize, off_t *position) NO_SYSCALL_STUB; } +345 AUE_STATFS64 ALL { int statfs64(char *path, struct statfs64 *buf); } +346 AUE_FSTATFS64 ALL { int fstatfs64(int fd, struct statfs64 *buf); } +347 AUE_GETFSSTAT64 ALL { int getfsstat64(user_addr_t buf, int bufsize, int flags); } +348 AUE_NULL ALL { int __pthread_chdir(user_addr_t path); } +349 AUE_NULL ALL { int __pthread_fchdir(int fd); } +350 AUE_AUDIT ALL { int audit(void *record, int length); } +351 AUE_AUDITON ALL { int auditon(int cmd, void *data, int length); } +352 AUE_NULL ALL { int nosys(void); } +353 AUE_GETAUID ALL { int getauid(au_id_t *auid); } +354 AUE_SETAUID ALL { int setauid(au_id_t *auid); } +355 AUE_GETAUDIT ALL { int getaudit(struct auditinfo *auditinfo); } +356 AUE_SETAUDIT ALL { int setaudit(struct auditinfo *auditinfo); } +357 AUE_GETAUDIT_ADDR ALL { int getaudit_addr(struct auditinfo_addr *auditinfo_addr, int length); } +358 AUE_SETAUDIT_ADDR ALL { int setaudit_addr(struct auditinfo_addr *auditinfo_addr, int length); } +359 AUE_AUDITCTL ALL { int auditctl(char *path); } #if CONFIG_WORKQUEUE -360 ALL { user_addr_t bsdthread_create(user_addr_t func, user_addr_t func_arg, user_addr_t stack, user_addr_t pthread, uint32_t flags) NO_SYSCALL_STUB; } -361 ALL { int bsdthread_terminate(user_addr_t stackaddr, size_t freesize, uint32_t port, uint32_t sem) NO_SYSCALL_STUB; } +360 AUE_NULL ALL { user_addr_t bsdthread_create(user_addr_t func, user_addr_t func_arg, user_addr_t stack, user_addr_t pthread, uint32_t flags) NO_SYSCALL_STUB; } +361 AUE_NULL ALL { int bsdthread_terminate(user_addr_t stackaddr, size_t freesize, uint32_t port, uint32_t sem) NO_SYSCALL_STUB; } #else -360 ALL { int nosys(void); } -361 ALL { int nosys(void); } -#endif - -362 ALL { int kqueue(void); } -363 ALL { int kevent(int fd, const struct kevent *changelist, int nchanges, struct kevent *eventlist, int nevents, const struct timespec *timeout); } -364 ALL { int lchown(user_addr_t path, uid_t owner, gid_t group); } -365 ALL { int stack_snapshot(pid_t pid, user_addr_t tracebuf, uint32_t tracebuf_size, uint32_t options) NO_SYSCALL_STUB; } - +360 AUE_NULL ALL { int nosys(void); } +361 AUE_NULL ALL { int nosys(void); } +#endif /* CONFIG_WORKQUEUE */ +362 AUE_KQUEUE ALL { int kqueue(void); } +363 AUE_NULL ALL { int kevent(int fd, const struct kevent *changelist, int nchanges, struct kevent *eventlist, int nevents, const struct timespec *timeout); } +364 AUE_LCHOWN ALL { int lchown(user_addr_t path, uid_t owner, gid_t group); } +365 AUE_STACKSNAPSHOT ALL { int stack_snapshot(pid_t pid, user_addr_t tracebuf, uint32_t tracebuf_size, uint32_t options) NO_SYSCALL_STUB; } #if CONFIG_WORKQUEUE -366 ALL { int bsdthread_register(user_addr_t threadstart, user_addr_t wqthread, int pthsize) NO_SYSCALL_STUB; } -367 ALL { int workq_open(void) NO_SYSCALL_STUB; } -368 ALL { int workq_ops(int options, user_addr_t item, int prio) NO_SYSCALL_STUB; } +366 AUE_NULL ALL { int bsdthread_register(user_addr_t threadstart, user_addr_t wqthread, int pthsize,user_addr_t dummy_value, user_addr_t targetconc_ptr, uint64_t dispatchqueue_offset) NO_SYSCALL_STUB; } +367 AUE_WORKQOPEN ALL { int workq_open(void) NO_SYSCALL_STUB; } +368 AUE_WORKQOPS ALL { int workq_kernreturn(int options, user_addr_t item, int affinity, int prio) NO_SYSCALL_STUB; } +#else +366 AUE_NULL ALL { int nosys(void); } +367 AUE_NULL ALL { int nosys(void); } +368 AUE_NULL ALL { int nosys(void); } +#endif /* CONFIG_WORKQUEUE */ +369 AUE_NULL ALL { int kevent64(int fd, const struct kevent64_s *changelist, int nchanges, struct kevent64_s *eventlist, int nevents, unsigned int flags, const struct timespec *timeout); } +#if OLD_SEMWAIT_SIGNAL +370 AUE_SEMWAITSIGNAL ALL { int __old_semwait_signal(int cond_sem, int mutex_sem, int timeout, int relative, const struct timespec *ts); } +371 AUE_SEMWAITSIGNAL ALL { int __old_semwait_signal_nocancel(int cond_sem, int mutex_sem, int timeout, int relative, const struct timespec *ts) NO_SYSCALL_STUB; } #else -366 ALL { int nosys(void); } -367 ALL { int nosys(void); } -368 ALL { int nosys(void); } +370 AUE_NULL ALL { int nosys(void); } { old __semwait_signal } +371 AUE_NULL ALL { int nosys(void); } { old __semwait_signal } #endif - -369 ALL { int nosys(void); } -370 ALL { int nosys(void); } -371 ALL { int nosys(void); } -372 ALL { int nosys(void); } -373 ALL { int nosys(void); } -374 ALL { int nosys(void); } -375 ALL { int nosys(void); } -376 ALL { int nosys(void); } -377 ALL { int nosys(void); } -378 ALL { int nosys(void); } -379 ALL { int nosys(void); } -380 ALL { int __mac_execve(char *fname, char **argp, char **envp, struct mac *mac_p); } -381 ALL { int __mac_syscall(char *policy, int call, user_addr_t arg); } -382 ALL { int __mac_get_file(char *path_p, struct mac *mac_p); } -383 ALL { int __mac_set_file(char *path_p, struct mac *mac_p); } -384 ALL { int __mac_get_link(char *path_p, struct mac *mac_p); } -385 ALL { int __mac_set_link(char *path_p, struct mac *mac_p); } -386 ALL { int __mac_get_proc(struct mac *mac_p); } -387 ALL { int __mac_set_proc(struct mac *mac_p); } -388 ALL { int __mac_get_fd(int fd, struct mac *mac_p); } -389 ALL { int __mac_set_fd(int fd, struct mac *mac_p); } -390 ALL { int __mac_get_pid(pid_t pid, struct mac *mac_p); } -391 ALL { int __mac_get_lcid(pid_t lcid, struct mac *mac_p); } -392 ALL { int __mac_get_lctx(struct mac *mac_p); } -393 ALL { int __mac_set_lctx(struct mac *mac_p); } -394 ALL { int setlcid(pid_t pid, pid_t lcid) NO_SYSCALL_STUB; } -395 ALL { int getlcid(pid_t pid) NO_SYSCALL_STUB; } -396 ALL { user_ssize_t read_nocancel(int fd, user_addr_t cbuf, user_size_t nbyte) NO_SYSCALL_STUB; } -397 ALL { user_ssize_t write_nocancel(int fd, user_addr_t cbuf, user_size_t nbyte) NO_SYSCALL_STUB; } -398 ALL { int open_nocancel(user_addr_t path, int flags, int mode) NO_SYSCALL_STUB; } -399 ALL { int close_nocancel(int fd) NO_SYSCALL_STUB; } -400 ALL { int wait4_nocancel(int pid, user_addr_t status, int options, user_addr_t rusage) NO_SYSCALL_STUB; } +372 AUE_NULL ALL { user_addr_t thread_selfid (void) NO_SYSCALL_STUB; } +373 AUE_NULL ALL { int nosys(void); } +374 AUE_NULL ALL { int nosys(void); } +375 AUE_NULL ALL { int nosys(void); } +376 AUE_NULL ALL { int nosys(void); } +377 AUE_NULL ALL { int nosys(void); } +378 AUE_NULL ALL { int nosys(void); } +379 AUE_NULL ALL { int nosys(void); } +380 AUE_MAC_EXECVE ALL { int __mac_execve(char *fname, char **argp, char **envp, struct mac *mac_p); } +381 AUE_MAC_SYSCALL ALL { int __mac_syscall(char *policy, int call, user_addr_t arg); } +382 AUE_MAC_GET_FILE ALL { int __mac_get_file(char *path_p, struct mac *mac_p); } +383 AUE_MAC_SET_FILE ALL { int __mac_set_file(char *path_p, struct mac *mac_p); } +384 AUE_MAC_GET_LINK ALL { int __mac_get_link(char *path_p, struct mac *mac_p); } +385 AUE_MAC_SET_LINK ALL { int __mac_set_link(char *path_p, struct mac *mac_p); } +386 AUE_MAC_GET_PROC ALL { int __mac_get_proc(struct mac *mac_p); } +387 AUE_MAC_SET_PROC ALL { int __mac_set_proc(struct mac *mac_p); } +388 AUE_MAC_GET_FD ALL { int __mac_get_fd(int fd, struct mac *mac_p); } +389 AUE_MAC_SET_FD ALL { int __mac_set_fd(int fd, struct mac *mac_p); } +390 AUE_MAC_GET_PID ALL { int __mac_get_pid(pid_t pid, struct mac *mac_p); } +391 AUE_MAC_GET_LCID ALL { int __mac_get_lcid(pid_t lcid, struct mac *mac_p); } +392 AUE_MAC_GET_LCTX ALL { int __mac_get_lctx(struct mac *mac_p); } +393 AUE_MAC_SET_LCTX ALL { int __mac_set_lctx(struct mac *mac_p); } +394 AUE_SETLCID ALL { int setlcid(pid_t pid, pid_t lcid) NO_SYSCALL_STUB; } +395 AUE_GETLCID ALL { int getlcid(pid_t pid) NO_SYSCALL_STUB; } +396 AUE_NULL ALL { user_ssize_t read_nocancel(int fd, user_addr_t cbuf, user_size_t nbyte) NO_SYSCALL_STUB; } +397 AUE_NULL ALL { user_ssize_t write_nocancel(int fd, user_addr_t cbuf, user_size_t nbyte) NO_SYSCALL_STUB; } +398 AUE_OPEN_RWTC ALL { int open_nocancel(user_addr_t path, int flags, int mode) NO_SYSCALL_STUB; } +399 AUE_CLOSE ALL { int close_nocancel(int fd) NO_SYSCALL_STUB; } +400 AUE_WAIT4 ALL { int wait4_nocancel(int pid, user_addr_t status, int options, user_addr_t rusage) NO_SYSCALL_STUB; } #if SOCKETS -401 ALL { int recvmsg_nocancel(int s, struct msghdr *msg, int flags) NO_SYSCALL_STUB; } -402 ALL { int sendmsg_nocancel(int s, caddr_t msg, int flags) NO_SYSCALL_STUB; } -403 ALL { int recvfrom_nocancel(int s, void *buf, size_t len, int flags, struct sockaddr *from, int *fromlenaddr) NO_SYSCALL_STUB; } -404 ALL { int accept_nocancel(int s, caddr_t name, socklen_t *anamelen) NO_SYSCALL_STUB; } +401 AUE_RECVMSG ALL { int recvmsg_nocancel(int s, struct msghdr *msg, int flags) NO_SYSCALL_STUB; } +402 AUE_SENDMSG ALL { int sendmsg_nocancel(int s, caddr_t msg, int flags) NO_SYSCALL_STUB; } +403 AUE_RECVFROM ALL { int recvfrom_nocancel(int s, void *buf, size_t len, int flags, struct sockaddr *from, int *fromlenaddr) NO_SYSCALL_STUB; } +404 AUE_ACCEPT ALL { int accept_nocancel(int s, caddr_t name, socklen_t *anamelen) NO_SYSCALL_STUB; } #else -401 ALL { int nosys(void); } -402 ALL { int nosys(void); } -403 ALL { int nosys(void); } -404 ALL { int nosys(void); } +401 AUE_NULL ALL { int nosys(void); } +402 AUE_NULL ALL { int nosys(void); } +403 AUE_NULL ALL { int nosys(void); } +404 AUE_NULL ALL { int nosys(void); } #endif /* SOCKETS */ -405 ALL { int msync_nocancel(caddr_t addr, size_t len, int flags) NO_SYSCALL_STUB; } -406 ALL { int fcntl_nocancel(int fd, int cmd, long arg) NO_SYSCALL_STUB; } -407 ALL { int select_nocancel(int nd, u_int32_t *in, u_int32_t *ou, u_int32_t *ex, struct timeval *tv) NO_SYSCALL_STUB; } -408 ALL { int fsync_nocancel(int fd) NO_SYSCALL_STUB; } +405 AUE_MSYNC ALL { int msync_nocancel(caddr_t addr, size_t len, int flags) NO_SYSCALL_STUB; } +406 AUE_FCNTL ALL { int fcntl_nocancel(int fd, int cmd, long arg) NO_SYSCALL_STUB; } +407 AUE_SELECT ALL { int select_nocancel(int nd, u_int32_t *in, u_int32_t *ou, u_int32_t *ex, struct timeval *tv) NO_SYSCALL_STUB; } +408 AUE_FSYNC ALL { int fsync_nocancel(int fd) NO_SYSCALL_STUB; } #if SOCKETS -409 ALL { int connect_nocancel(int s, caddr_t name, socklen_t namelen) NO_SYSCALL_STUB; } +409 AUE_CONNECT ALL { int connect_nocancel(int s, caddr_t name, socklen_t namelen) NO_SYSCALL_STUB; } #else -409 ALL { int nosys(void); } +409 AUE_NULL ALL { int nosys(void); } #endif /* SOCKETS */ -410 ALL { int sigsuspend_nocancel(sigset_t mask) NO_SYSCALL_STUB; } -411 ALL { user_ssize_t readv_nocancel(int fd, struct iovec *iovp, u_int iovcnt) NO_SYSCALL_STUB; } -412 ALL { user_ssize_t writev_nocancel(int fd, struct iovec *iovp, u_int iovcnt) NO_SYSCALL_STUB; } +410 AUE_NULL ALL { int sigsuspend_nocancel(sigset_t mask) NO_SYSCALL_STUB; } +411 AUE_READV ALL { user_ssize_t readv_nocancel(int fd, struct iovec *iovp, u_int iovcnt) NO_SYSCALL_STUB; } +412 AUE_WRITEV ALL { user_ssize_t writev_nocancel(int fd, struct iovec *iovp, u_int iovcnt) NO_SYSCALL_STUB; } #if SOCKETS -413 ALL { int sendto_nocancel(int s, caddr_t buf, size_t len, int flags, caddr_t to, socklen_t tolen) NO_SYSCALL_STUB; } +413 AUE_SENDTO ALL { int sendto_nocancel(int s, caddr_t buf, size_t len, int flags, caddr_t to, socklen_t tolen) NO_SYSCALL_STUB; } #else -413 ALL { int nosys(void); } +413 AUE_NULL ALL { int nosys(void); } #endif /* SOCKETS */ -414 ALL { user_ssize_t pread_nocancel(int fd, user_addr_t buf, user_size_t nbyte, off_t offset) NO_SYSCALL_STUB; } -415 ALL { user_ssize_t pwrite_nocancel(int fd, user_addr_t buf, user_size_t nbyte, off_t offset) NO_SYSCALL_STUB; } -416 ALL { int waitid_nocancel(idtype_t idtype, id_t id, siginfo_t *infop, int options) NO_SYSCALL_STUB; } -417 ALL { int poll_nocancel(struct pollfd *fds, u_int nfds, int timeout) NO_SYSCALL_STUB; } +414 AUE_PREAD ALL { user_ssize_t pread_nocancel(int fd, user_addr_t buf, user_size_t nbyte, off_t offset) NO_SYSCALL_STUB; } +415 AUE_PWRITE ALL { user_ssize_t pwrite_nocancel(int fd, user_addr_t buf, user_size_t nbyte, off_t offset) NO_SYSCALL_STUB; } +416 AUE_WAITID ALL { int waitid_nocancel(idtype_t idtype, id_t id, siginfo_t *infop, int options) NO_SYSCALL_STUB; } +417 AUE_POLL ALL { int poll_nocancel(struct pollfd *fds, u_int nfds, int timeout) NO_SYSCALL_STUB; } #if SYSV_MSG -418 ALL { int msgsnd_nocancel(int msqid, void *msgp, size_t msgsz, int msgflg) NO_SYSCALL_STUB; } -419 ALL { user_ssize_t msgrcv_nocancel(int msqid, void *msgp, size_t msgsz, long msgtyp, int msgflg) NO_SYSCALL_STUB; } +418 AUE_MSGSND ALL { int msgsnd_nocancel(int msqid, void *msgp, size_t msgsz, int msgflg) NO_SYSCALL_STUB; } +419 AUE_MSGRCV ALL { user_ssize_t msgrcv_nocancel(int msqid, void *msgp, size_t msgsz, long msgtyp, int msgflg) NO_SYSCALL_STUB; } #else -418 ALL { int nosys(void); } -419 ALL { int nosys(void); } +418 AUE_NULL ALL { int nosys(void); } +419 AUE_NULL ALL { int nosys(void); } #endif -420 ALL { int sem_wait_nocancel(sem_t *sem) NO_SYSCALL_STUB; } -421 ALL { int aio_suspend_nocancel(user_addr_t aiocblist, int nent, user_addr_t timeoutp) NO_SYSCALL_STUB; } -422 ALL { int __sigwait_nocancel(user_addr_t set, user_addr_t sig) NO_SYSCALL_STUB; } -423 ALL { int __semwait_signal_nocancel(int cond_sem, int mutex_sem, int timeout, int relative, time_t tv_sec, int32_t tv_nsec) NO_SYSCALL_STUB; } -424 ALL { int __mac_mount(char *type, char *path, int flags, caddr_t data, struct mac *mac_p); } -425 ALL { int __mac_get_mount(char *path, struct mac *mac_p); } -426 ALL { int __mac_getfsstat(user_addr_t buf, int bufsize, user_addr_t mac, int macsize, int flags); } - +420 AUE_SEMWAIT ALL { int sem_wait_nocancel(sem_t *sem) NO_SYSCALL_STUB; } +421 AUE_NULL ALL { int aio_suspend_nocancel(user_addr_t aiocblist, int nent, user_addr_t timeoutp) NO_SYSCALL_STUB; } +422 AUE_SIGWAIT ALL { int __sigwait_nocancel(user_addr_t set, user_addr_t sig) NO_SYSCALL_STUB; } +;#if OLD_SEMWAIT_SIGNAL +;423 AUE_NULL ALL { int nosys(void); } { old __semwait_signal_nocancel } +;#else +423 AUE_SEMWAITSIGNAL ALL { int __semwait_signal_nocancel(int cond_sem, int mutex_sem, int timeout, int relative, int64_t tv_sec, int32_t tv_nsec) NO_SYSCALL_STUB;} +;#endif +424 AUE_MAC_MOUNT ALL { int __mac_mount(char *type, char *path, int flags, caddr_t data, struct mac *mac_p); } +425 AUE_MAC_GET_MOUNT ALL { int __mac_get_mount(char *path, struct mac *mac_p); } +426 AUE_MAC_GETFSSTAT ALL { int __mac_getfsstat(user_addr_t buf, int bufsize, user_addr_t mac, int macsize, int flags); } +427 AUE_FSGETPATH ALL { user_ssize_t fsgetpath(user_addr_t buf, size_t bufsize, user_addr_t fsid, uint64_t objid) NO_SYSCALL_STUB; } { private fsgetpath (File Manager SPI) } +428 AUE_NULL ALL { mach_port_name_t audit_session_self(void); } +429 AUE_NULL ALL { int audit_session_join(mach_port_name_t port); } diff --git a/bsd/kern/sysv_msg.c b/bsd/kern/sysv_msg.c index 3b85639bd..7ed083eb9 100644 --- a/bsd/kern/sysv_msg.c +++ b/bsd/kern/sysv_msg.c @@ -59,7 +59,7 @@ #include #include -#include +#include #include #include @@ -151,7 +151,21 @@ sysv_msgtime(void) * NOTE: Source and target may *NOT* overlap! (target is smaller) */ static void -msqid_ds_64to32(struct user_msqid_ds *in, struct msqid_ds *out) +msqid_ds_kerneltouser32(struct user_msqid_ds *in, struct user32_msqid_ds *out) +{ + out->msg_perm = in->msg_perm; + out->msg_qnum = in->msg_qnum; + out->msg_cbytes = in->msg_cbytes; /* for ipcs */ + out->msg_qbytes = in->msg_qbytes; + out->msg_lspid = in->msg_lspid; + out->msg_lrpid = in->msg_lrpid; + out->msg_stime = in->msg_stime; /* XXX loss of range */ + out->msg_rtime = in->msg_rtime; /* XXX loss of range */ + out->msg_ctime = in->msg_ctime; /* XXX loss of range */ +} + +static void +msqid_ds_kerneltouser64(struct user_msqid_ds *in, struct user64_msqid_ds *out) { out->msg_perm = in->msg_perm; out->msg_qnum = in->msg_qnum; @@ -170,7 +184,21 @@ msqid_ds_64to32(struct user_msqid_ds *in, struct msqid_ds *out) * the beginning. */ static void -msqid_ds_32to64(struct msqid_ds *in, struct user_msqid_ds *out) +msqid_ds_user32tokernel(struct user32_msqid_ds *in, struct user_msqid_ds *out) +{ + out->msg_ctime = in->msg_ctime; + out->msg_rtime = in->msg_rtime; + out->msg_stime = in->msg_stime; + out->msg_lrpid = in->msg_lrpid; + out->msg_lspid = in->msg_lspid; + out->msg_qbytes = in->msg_qbytes; + out->msg_cbytes = in->msg_cbytes; /* for ipcs */ + out->msg_qnum = in->msg_qnum; + out->msg_perm = in->msg_perm; +} + +static void +msqid_ds_user64tokernel(struct user64_msqid_ds *in, struct user_msqid_ds *out) { out->msg_ctime = in->msg_ctime; out->msg_rtime = in->msg_rtime; @@ -296,11 +324,29 @@ msginit(__unused void *dummy) } /* - * Entry point for all MSG calls + * msgsys + * + * Entry point for all MSG calls: msgctl, msgget, msgsnd, msgrcv + * + * Parameters: p Process requesting the call + * uap User argument descriptor (see below) + * retval Return value of the selected msg call + * + * Indirect parameters: uap->which msg call to invoke (index in array of msg calls) + * uap->a2 User argument descriptor + * + * Returns: 0 Success + * !0 Not success + * + * Implicit returns: retval Return value of the selected msg call + * + * DEPRECATED: This interface should not be used to call the other MSG + * functions (msgctl, msgget, msgsnd, msgrcv). The correct + * usage is to call the other MSG functions directly. + * */ - /* XXX actually varargs. */ int -msgsys(struct proc *p, struct msgsys_args *uap, register_t *retval) +msgsys(struct proc *p, struct msgsys_args *uap, int32_t *retval) { if (uap->which >= sizeof(msgcalls)/sizeof(msgcalls[0])) return (EINVAL); @@ -339,7 +385,7 @@ msg_freehdr(struct msg *msghdr) } int -msgctl(struct proc *p, struct msgctl_args *uap, register_t *retval) +msgctl(struct proc *p, struct msgctl_args *uap, int32_t *retval) { int msqid = uap->msqid; int cmd = uap->cmd; @@ -347,7 +393,6 @@ msgctl(struct proc *p, struct msgctl_args *uap, register_t *retval) int rval, eval; struct user_msqid_ds msqbuf; struct msqid_kernel *msqptr; - struct user_msqid_ds umsds; SYSV_MSG_SUBSYS_LOCK(); @@ -457,11 +502,16 @@ msgctl(struct proc *p, struct msgctl_args *uap, register_t *retval) SYSV_MSG_SUBSYS_UNLOCK(); if (IS_64BIT_PROCESS(p)) { - eval = copyin(uap->buf, &msqbuf, sizeof(struct user_msqid_ds)); + struct user64_msqid_ds tmpds; + eval = copyin(uap->buf, &tmpds, sizeof(tmpds)); + + msqid_ds_user64tokernel(&tmpds, &msqbuf); } else { - eval = copyin(uap->buf, &msqbuf, sizeof(struct msqid_ds)); - /* convert in place; ugly, but safe */ - msqid_ds_32to64((struct msqid_ds *)&msqbuf, &msqbuf); + struct user32_msqid_ds tmpds; + + eval = copyin(uap->buf, &tmpds, sizeof(tmpds)); + + msqid_ds_user32tokernel(&tmpds, &msqbuf); } if (eval) return(eval); @@ -476,7 +526,7 @@ msgctl(struct proc *p, struct msgctl_args *uap, register_t *retval) /* compare (msglen_t) value against restrict (int) value */ - if (msqbuf.msg_qbytes > (msglen_t)msginfo.msgmnb) { + if (msqbuf.msg_qbytes > (user_msglen_t)msginfo.msgmnb) { #ifdef MSG_DEBUG_OK printf("can't increase msg_qbytes beyond %d (truncating)\n", msginfo.msgmnb); @@ -506,15 +556,15 @@ msgctl(struct proc *p, struct msgctl_args *uap, register_t *retval) goto msgctlout; } - bcopy(msqptr, &umsds, sizeof(struct user_msqid_ds)); - SYSV_MSG_SUBSYS_UNLOCK(); if (IS_64BIT_PROCESS(p)) { - eval = copyout(&umsds, uap->buf, sizeof(struct user_msqid_ds)); + struct user64_msqid_ds msqid_ds64; + msqid_ds_kerneltouser64(&msqptr->u, &msqid_ds64); + eval = copyout(&msqid_ds64, uap->buf, sizeof(msqid_ds64)); } else { - struct msqid_ds msqid_ds32; - msqid_ds_64to32(&umsds, &msqid_ds32); - eval = copyout(&msqid_ds32, uap->buf, sizeof(struct msqid_ds)); + struct user32_msqid_ds msqid_ds32; + msqid_ds_kerneltouser32(&msqptr->u, &msqid_ds32); + eval = copyout(&msqid_ds32, uap->buf, sizeof(msqid_ds32)); } SYSV_MSG_SUBSYS_LOCK(); break; @@ -535,7 +585,7 @@ msgctl(struct proc *p, struct msgctl_args *uap, register_t *retval) } int -msgget(__unused struct proc *p, struct msgget_args *uap, register_t *retval) +msgget(__unused struct proc *p, struct msgget_args *uap, int32_t *retval) { int msqid, eval; int key = uap->key; @@ -655,14 +705,14 @@ msgget(__unused struct proc *p, struct msgget_args *uap, register_t *retval) int -msgsnd(struct proc *p, struct msgsnd_args *uap, register_t *retval) +msgsnd(struct proc *p, struct msgsnd_args *uap, int32_t *retval) { __pthread_testcancel(1); return(msgsnd_nocancel(p, (struct msgsnd_nocancel_args *)uap, retval)); } int -msgsnd_nocancel(struct proc *p, struct msgsnd_nocancel_args *uap, register_t *retval) +msgsnd_nocancel(struct proc *p, struct msgsnd_nocancel_args *uap, int32_t *retval) { int msqid = uap->msqid; user_addr_t user_msgp = uap->msgp; @@ -683,7 +733,7 @@ msgsnd_nocancel(struct proc *p, struct msgsnd_nocancel_args *uap, register_t *re } #ifdef MSG_DEBUG_OK - printf("call to msgsnd(%d, 0x%qx, %d, %d)\n", msqid, user_msgp, msgsz, + printf("call to msgsnd(%d, 0x%qx, %ld, %d)\n", msqid, user_msgp, msgsz, msgflg); #endif @@ -729,7 +779,7 @@ msgsnd_nocancel(struct proc *p, struct msgsnd_nocancel_args *uap, register_t *re #endif segs_needed = (msgsz + msginfo.msgssz - 1) / msginfo.msgssz; #ifdef MSG_DEBUG_OK - printf("msgsz=%d, msgssz=%d, segs_needed=%d\n", msgsz, msginfo.msgssz, + printf("msgsz=%ld, msgssz=%d, segs_needed=%d\n", msgsz, msginfo.msgssz, segs_needed); #endif @@ -930,9 +980,11 @@ msgsnd_nocancel(struct proc *p, struct msgsnd_nocancel_args *uap, register_t *re user_msgp = user_msgp + sizeof(msgtype); /* ptr math */ } else { SYSV_MSG_SUBSYS_UNLOCK(); - eval = copyin(user_msgp, &msghdr->msg_type, sizeof(long)); + int32_t msg_type32; + eval = copyin(user_msgp, &msg_type32, sizeof(msg_type32)); + msghdr->msg_type = msg_type32; SYSV_MSG_SUBSYS_LOCK(); - user_msgp = user_msgp + sizeof(long); /* ptr math */ + user_msgp = user_msgp + sizeof(msg_type32); /* ptr math */ } if (eval != 0) { @@ -954,7 +1006,7 @@ msgsnd_nocancel(struct proc *p, struct msgsnd_nocancel_args *uap, register_t *re msqptr->u.msg_perm.mode &= ~MSG_LOCKED; wakeup((caddr_t)msqptr); #ifdef MSG_DEBUG_OK - printf("mtype (%d) < 1\n", msghdr->msg_type); + printf("mtype (%ld) < 1\n", msghdr->msg_type); #endif eval = EINVAL; goto msgsndout; @@ -1088,7 +1140,7 @@ msgrcv_nocancel(struct proc *p, struct msgrcv_nocancel_args *uap, user_ssize_t * int eval; short next; user_long_t msgtype; - long msg_type_long; + int32_t msg_type32; SYSV_MSG_SUBSYS_LOCK(); @@ -1098,7 +1150,7 @@ msgrcv_nocancel(struct proc *p, struct msgrcv_nocancel_args *uap, user_ssize_t * } #ifdef MSG_DEBUG_OK - printf("call to msgrcv(%d, 0x%qx, %d, %ld, %d)\n", msqid, user_msgp, + printf("call to msgrcv(%d, 0x%qx, %ld, %ld, %d)\n", msqid, user_msgp, msgsz, msgtyp, msgflg); #endif @@ -1150,7 +1202,7 @@ msgrcv_nocancel(struct proc *p, struct msgrcv_nocancel_args *uap, user_ssize_t * if (msgsz < msghdr->msg_ts && (msgflg & MSG_NOERROR) == 0) { #ifdef MSG_DEBUG_OK - printf("first message on the queue is too big (want %d, got %d)\n", + printf("first message on the queue is too big (want %ld, got %d)\n", msgsz, msghdr->msg_ts); #endif eval = E2BIG; @@ -1190,13 +1242,13 @@ msgrcv_nocancel(struct proc *p, struct msgrcv_nocancel_args *uap, user_ssize_t * if (msgtyp == msghdr->msg_type || msghdr->msg_type <= -msgtyp) { #ifdef MSG_DEBUG_OK - printf("found message type %d, requested %d\n", + printf("found message type %ld, requested %ld\n", msghdr->msg_type, msgtyp); #endif if (msgsz < msghdr->msg_ts && (msgflg & MSG_NOERROR) == 0) { #ifdef MSG_DEBUG_OK - printf("requested message on the queue is too big (want %d, got %d)\n", + printf("requested message on the queue is too big (want %ld, got %d)\n", msgsz, msghdr->msg_ts); #endif eval = E2BIG; @@ -1248,7 +1300,7 @@ msgrcv_nocancel(struct proc *p, struct msgrcv_nocancel_args *uap, user_ssize_t * if ((msgflg & IPC_NOWAIT) != 0) { #ifdef MSG_DEBUG_OK - printf("no appropriate message found (msgtyp=%d)\n", + printf("no appropriate message found (msgtyp=%ld)\n", msgtyp); #endif /* The SVID says to return ENOMSG. */ @@ -1320,7 +1372,7 @@ msgrcv_nocancel(struct proc *p, struct msgrcv_nocancel_args *uap, user_ssize_t * */ #ifdef MSG_DEBUG_OK - printf("found a message, msgsz=%d, msg_ts=%d\n", msgsz, + printf("found a message, msgsz=%ld, msg_ts=%d\n", msgsz, msghdr->msg_ts); #endif if (msgsz > msghdr->msg_ts) @@ -1341,11 +1393,11 @@ msgrcv_nocancel(struct proc *p, struct msgrcv_nocancel_args *uap, user_ssize_t * SYSV_MSG_SUBSYS_LOCK(); user_msgp = user_msgp + sizeof(msgtype); /* ptr math */ } else { - msg_type_long = msghdr->msg_type; + msg_type32 = msghdr->msg_type; SYSV_MSG_SUBSYS_UNLOCK(); - eval = copyout(&msg_type_long, user_msgp, sizeof(long)); + eval = copyout(&msg_type32, user_msgp, sizeof(msg_type32)); SYSV_MSG_SUBSYS_LOCK(); - user_msgp = user_msgp + sizeof(long); /* ptr math */ + user_msgp = user_msgp + sizeof(msg_type32); /* ptr math */ } if (eval != 0) { @@ -1413,18 +1465,22 @@ IPCS_msg_sysctl(__unused struct sysctl_oid *oidp, __unused void *arg1, int error; int cursor; union { - struct IPCS_command u32; + struct user32_IPCS_command u32; struct user_IPCS_command u64; } ipcs; - struct msqid_ds msqid_ds32; /* post conversion, 32 bit version */ + struct user32_msqid_ds msqid_ds32; /* post conversion, 32 bit version */ + struct user64_msqid_ds msqid_ds64; /* post conversion, 64 bit version */ void *msqid_dsp; - size_t ipcs_sz = sizeof(struct user_IPCS_command); - size_t msqid_ds_sz = sizeof(struct user_msqid_ds); + size_t ipcs_sz; + size_t msqid_ds_sz; struct proc *p = current_proc(); - if (!IS_64BIT_PROCESS(p)) { - ipcs_sz = sizeof(struct IPCS_command); - msqid_ds_sz = sizeof(struct msqid_ds); + if (IS_64BIT_PROCESS(p)) { + ipcs_sz = sizeof(struct user_IPCS_command); + msqid_ds_sz = sizeof(struct user64_msqid_ds); + } else { + ipcs_sz = sizeof(struct user32_IPCS_command); + msqid_ds_sz = sizeof(struct user32_msqid_ds); } /* Copy in the command structure */ @@ -1489,10 +1545,14 @@ IPCS_msg_sysctl(__unused struct sysctl_oid *oidp, __unused void *arg1, * If necessary, convert the 64 bit kernel segment * descriptor to a 32 bit user one. */ - if (!IS_64BIT_PROCESS(p)) { - msqid_ds_64to32(msqid_dsp, &msqid_ds32); + if (IS_64BIT_PROCESS(p)) { + msqid_ds_kerneltouser64(msqid_dsp, &msqid_ds64); + msqid_dsp = &msqid_ds64; + } else { + msqid_ds_kerneltouser32(msqid_dsp, &msqid_ds32); msqid_dsp = &msqid_ds32; } + SYSV_MSG_SUBSYS_UNLOCK(); error = copyout(msqid_dsp, ipcs.u64.ipcs_data, ipcs.u64.ipcs_datalen); if (!error) { @@ -1500,7 +1560,7 @@ IPCS_msg_sysctl(__unused struct sysctl_oid *oidp, __unused void *arg1, ipcs.u64.ipcs_cursor = cursor + 1; if (!IS_64BIT_PROCESS(p)) /* convert in place */ - ipcs.u32.ipcs_data = CAST_DOWN(void *,ipcs.u64.ipcs_data); + ipcs.u32.ipcs_data = CAST_DOWN_EXPLICIT(user32_addr_t,ipcs.u64.ipcs_data); error = SYSCTL_OUT(req, &ipcs, ipcs_sz); } SYSV_MSG_SUBSYS_LOCK(); diff --git a/bsd/kern/sysv_sem.c b/bsd/kern/sysv_sem.c index 6c90d6485..0e44029cf 100644 --- a/bsd/kern/sysv_sem.c +++ b/bsd/kern/sysv_sem.c @@ -62,7 +62,7 @@ #include #endif -#include +#include #if SYSV_SEM @@ -173,10 +173,20 @@ sysv_semtime(void) * NOTE: Source and target may *NOT* overlap! (target is smaller) */ static void -semid_ds_64to32(struct user_semid_ds *in, struct semid_ds *out) +semid_ds_kernelto32(struct user_semid_ds *in, struct user32_semid_ds *out) { out->sem_perm = in->sem_perm; - out->sem_base = (__int32_t)in->sem_base; + out->sem_base = CAST_DOWN_EXPLICIT(__int32_t,in->sem_base); + out->sem_nsems = in->sem_nsems; + out->sem_otime = in->sem_otime; /* XXX loses precision */ + out->sem_ctime = in->sem_ctime; /* XXX loses precision */ +} + +static void +semid_ds_kernelto64(struct user_semid_ds *in, struct user64_semid_ds *out) +{ + out->sem_perm = in->sem_perm; + out->sem_base = CAST_DOWN_EXPLICIT(__int32_t,in->sem_base); out->sem_nsems = in->sem_nsems; out->sem_otime = in->sem_otime; /* XXX loses precision */ out->sem_ctime = in->sem_ctime; /* XXX loses precision */ @@ -193,25 +203,50 @@ semid_ds_64to32(struct user_semid_ds *in, struct semid_ds *out) * XXX is the same. */ static void -semid_ds_32to64(struct semid_ds *in, struct user_semid_ds *out) +semid_ds_32tokernel(struct user32_semid_ds *in, struct user_semid_ds *out) { out->sem_ctime = in->sem_ctime; out->sem_otime = in->sem_otime; out->sem_nsems = in->sem_nsems; - out->sem_base = (void *)in->sem_base; + out->sem_base = (void *)(uintptr_t)in->sem_base; + out->sem_perm = in->sem_perm; +} + +static void +semid_ds_64tokernel(struct user64_semid_ds *in, struct user_semid_ds *out) +{ + out->sem_ctime = in->sem_ctime; + out->sem_otime = in->sem_otime; + out->sem_nsems = in->sem_nsems; + out->sem_base = (void *)(uintptr_t)in->sem_base; out->sem_perm = in->sem_perm; } /* - * Entry point for all SEM calls + * semsys + * + * Entry point for all SEM calls: semctl, semget, semop + * + * Parameters: p Process requesting the call + * uap User argument descriptor (see below) + * retval Return value of the selected sem call + * + * Indirect parameters: uap->which sem call to invoke (index in array of sem calls) + * uap->a2 User argument descriptor + * + * Returns: 0 Success + * !0 Not success + * + * Implicit returns: retval Return value of the selected sem call + * + * DEPRECATED: This interface should not be used to call the other SEM + * functions (semctl, semget, semop). The correct usage is + * to call the other SEM functions directly. * - * In Darwin this is no longer the entry point. It will be removed after - * the code has been tested better. */ -/* XXX actually varargs. */ int -semsys(struct proc *p, struct semsys_args *uap, register_t *retval) +semsys(struct proc *p, struct semsys_args *uap, int32_t *retval) { /* The individual calls handling the locking now */ @@ -639,7 +674,7 @@ semundo_clear(int semid, int semnum) * because the alignment is the same in user and kernel space. */ int -semctl(struct proc *p, struct semctl_args *uap, register_t *retval) +semctl(struct proc *p, struct semctl_args *uap, int32_t *retval) { int semid = uap->semid; int semnum = uap->semnum; @@ -649,7 +684,6 @@ semctl(struct proc *p, struct semctl_args *uap, register_t *retval) int i, rval, eval; struct user_semid_ds sbuf; struct semid_kernel *semakptr; - struct user_semid_ds uds; AUDIT_ARG(svipc_cmd, cmd); @@ -714,11 +748,13 @@ semctl(struct proc *p, struct semctl_args *uap, register_t *retval) goto semctlout; if (IS_64BIT_PROCESS(p)) { - eval = copyin(user_arg.buf, &sbuf, sizeof(struct user_semid_ds)); + struct user64_semid_ds ds64; + eval = copyin(user_arg.buf, &ds64, sizeof(ds64)); + semid_ds_64tokernel(&ds64, &sbuf); } else { - eval = copyin(user_arg.buf, &sbuf, sizeof(struct semid_ds)); - /* convert in place; ugly, but safe */ - semid_ds_32to64((struct semid_ds *)&sbuf, &sbuf); + struct user32_semid_ds ds32; + eval = copyin(user_arg.buf, &ds32, sizeof(ds32)); + semid_ds_32tokernel(&ds32, &sbuf); } if (eval != 0) { @@ -735,13 +771,15 @@ semctl(struct proc *p, struct semctl_args *uap, register_t *retval) case IPC_STAT: if ((eval = ipcperm(cred, &semakptr->u.sem_perm, IPC_R))) goto semctlout; - bcopy((caddr_t)&semakptr->u, &uds, sizeof(struct user_semid_ds)); + if (IS_64BIT_PROCESS(p)) { - eval = copyout(&uds, user_arg.buf, sizeof(struct user_semid_ds)); + struct user64_semid_ds semid_ds64; + semid_ds_kernelto64(&semakptr->u, &semid_ds64); + eval = copyout(&semid_ds64, user_arg.buf, sizeof(semid_ds64)); } else { - struct semid_ds semid_ds32; - semid_ds_64to32(&uds, &semid_ds32); - eval = copyout(&semid_ds32, user_arg.buf, sizeof(struct semid_ds)); + struct user32_semid_ds semid_ds32; + semid_ds_kernelto32(&semakptr->u, &semid_ds32); + eval = copyout(&semid_ds32, user_arg.buf, sizeof(semid_ds32)); } break; @@ -820,7 +858,7 @@ semctl(struct proc *p, struct semctl_args *uap, register_t *retval) * to avoid introducing endieness and a pad field into the * header file. Ugly, but it works. */ - semakptr->u.sem_base[semnum].semval = CAST_DOWN(int,user_arg.buf); + semakptr->u.sem_base[semnum].semval = CAST_DOWN_EXPLICIT(int,user_arg.buf); semakptr->u.sem_base[semnum].sempid = p->p_pid; /* XXX scottl Should there be a MAC call here? */ semundo_clear(semid, semnum); @@ -858,7 +896,7 @@ semctl(struct proc *p, struct semctl_args *uap, register_t *retval) } int -semget(__unused struct proc *p, struct semget_args *uap, register_t *retval) +semget(__unused struct proc *p, struct semget_args *uap, int32_t *retval) { int semid, eval; int key = uap->key; @@ -1002,7 +1040,7 @@ semget(__unused struct proc *p, struct semget_args *uap, register_t *retval) } int -semop(struct proc *p, struct semop_args *uap, register_t *retval) +semop(struct proc *p, struct semop_args *uap, int32_t *retval) { int semid = uap->semid; int nsops = uap->nsops; @@ -1524,18 +1562,22 @@ IPCS_sem_sysctl(__unused struct sysctl_oid *oidp, __unused void *arg1, int error; int cursor; union { - struct IPCS_command u32; + struct user32_IPCS_command u32; struct user_IPCS_command u64; } ipcs; - struct semid_ds semid_ds32; /* post conversion, 32 bit version */ + struct user32_semid_ds semid_ds32; /* post conversion, 32 bit version */ + struct user64_semid_ds semid_ds64; /* post conversion, 64 bit version */ void *semid_dsp; - size_t ipcs_sz = sizeof(struct user_IPCS_command); - size_t semid_ds_sz = sizeof(struct user_semid_ds); + size_t ipcs_sz; + size_t semid_ds_sz; struct proc *p = current_proc(); - if (!IS_64BIT_PROCESS(p)) { - ipcs_sz = sizeof(struct IPCS_command); - semid_ds_sz = sizeof(struct semid_ds); + if (IS_64BIT_PROCESS(p)) { + ipcs_sz = sizeof(struct user_IPCS_command); + semid_ds_sz = sizeof(struct user64_semid_ds); + } else { + ipcs_sz = sizeof(struct user32_IPCS_command); + semid_ds_sz = sizeof(struct user32_semid_ds); } /* Copy in the command structure */ @@ -1592,13 +1634,21 @@ IPCS_sem_sysctl(__unused struct sysctl_oid *oidp, __unused void *arg1, * descriptor to a 32 bit user one. */ if (!IS_64BIT_PROCESS(p)) { - semid_ds_64to32(semid_dsp, &semid_ds32); + semid_ds_kernelto32(semid_dsp, &semid_ds32); semid_dsp = &semid_ds32; + } else { + semid_ds_kernelto64(semid_dsp, &semid_ds64); + semid_dsp = &semid_ds64; } + error = copyout(semid_dsp, ipcs.u64.ipcs_data, ipcs.u64.ipcs_datalen); if (!error) { /* update cursor */ ipcs.u64.ipcs_cursor = cursor + 1; + + if (!IS_64BIT_PROCESS(p)) /* convert in place */ + ipcs.u32.ipcs_data = CAST_DOWN_EXPLICIT(user32_addr_t,ipcs.u64.ipcs_data); + error = SYSCTL_OUT(req, &ipcs, ipcs_sz); } break; diff --git a/bsd/kern/sysv_shm.c b/bsd/kern/sysv_shm.c index dfe108c62..4a93dc597 100644 --- a/bsd/kern/sysv_shm.c +++ b/bsd/kern/sysv_shm.c @@ -83,7 +83,7 @@ #include #endif -#include +#include #include #include @@ -108,7 +108,7 @@ static void shminit(void *); #if 0 SYSINIT(sysv_shm, SI_SUB_SYSV_SHM, SI_ORDER_FIRST, shminit, NULL) -#endif 0 +#endif static lck_grp_t *sysv_shm_subsys_lck_grp; static lck_grp_attr_t *sysv_shm_subsys_lck_grp_attr; @@ -121,8 +121,8 @@ static lck_mtx_t sysv_shm_subsys_mutex; static int oshmctl(void *p, void *uap, void *retval); static int shmget_allocate_segment(struct proc *p, struct shmget_args *uap, int mode, int * retval); static int shmget_existing(struct shmget_args *uap, int mode, int segnum, int * retval); -static void shmid_ds_64to32(struct user_shmid_ds *in, struct shmid_ds *out); -static void shmid_ds_32to64(struct shmid_ds *in, struct user_shmid_ds *out); +static void shmid_ds_64to32(struct user_shmid_ds *in, struct user32_shmid_ds *out); +static void shmid_ds_32to64(struct user32_shmid_ds *in, struct user_shmid_ds *out); /* XXX casting to (sy_call_t *) is bogus, as usual. */ static sy_call_t *shmcalls[] = { @@ -140,8 +140,17 @@ static int shm_last_free, shm_nused, shm_committed; struct shmid_kernel *shmsegs; /* 64 bit version */ static int shm_inited = 0; +/* + * Since anonymous memory chunks are limited to ANON_MAX_SIZE bytes, + * we have to keep a list of chunks when we want to handle a shared memory + * segment bigger than ANON_MAX_SIZE. + * Each chunk points to a VM named entry of up to ANON_MAX_SIZE bytes + * of anonymous memory. + */ struct shm_handle { - void * shm_object; /* vm_offset_t kva; */ + void * shm_object; /* named entry for this chunk*/ + memory_object_size_t shm_handle_size; /* size of this chunk */ + struct shm_handle *shm_handle_next; /* next chunk */ }; struct shmmap_state { @@ -186,17 +195,17 @@ sysv_shmtime(void) * NOTE: Source and target may *NOT* overlap! (target is smaller) */ static void -shmid_ds_64to32(struct user_shmid_ds *in, struct shmid_ds *out) +shmid_ds_64to32(struct user_shmid_ds *in, struct user32_shmid_ds *out) { out->shm_perm = in->shm_perm; - out->shm_segsz = (size_t)in->shm_segsz; + out->shm_segsz = in->shm_segsz; out->shm_lpid = in->shm_lpid; out->shm_cpid = in->shm_cpid; out->shm_nattch = in->shm_nattch; out->shm_atime = in->shm_atime; out->shm_dtime = in->shm_dtime; out->shm_ctime = in->shm_ctime; - out->shm_internal = CAST_DOWN(void *,in->shm_internal); + out->shm_internal = CAST_DOWN_EXPLICIT(int,in->shm_internal); } /* @@ -205,16 +214,16 @@ shmid_ds_64to32(struct user_shmid_ds *in, struct shmid_ds *out) * the beginning. */ static void -shmid_ds_32to64(struct shmid_ds *in, struct user_shmid_ds *out) +shmid_ds_32to64(struct user32_shmid_ds *in, struct user_shmid_ds *out) { - out->shm_internal = CAST_USER_ADDR_T(in->shm_internal); + out->shm_internal = in->shm_internal; out->shm_ctime = in->shm_ctime; out->shm_dtime = in->shm_dtime; out->shm_atime = in->shm_atime; out->shm_nattch = in->shm_nattch; out->shm_cpid = in->shm_cpid; out->shm_lpid = in->shm_lpid; - out->shm_segsz = (user_size_t)in->shm_segsz; + out->shm_segsz = in->shm_segsz; out->shm_perm = in->shm_perm; } @@ -251,15 +260,18 @@ shm_find_segment_by_shmid(int shmid) static void shm_deallocate_segment(struct shmid_kernel *shmseg) { - struct shm_handle *shm_handle; + struct shm_handle *shm_handle, *shm_handle_next; mach_vm_size_t size; - shm_handle = CAST_DOWN(void *,shmseg->u.shm_internal); /* tunnel */ - size = mach_vm_round_page(shmseg->u.shm_segsz); - mach_memory_entry_port_release(shm_handle->shm_object); - shm_handle->shm_object = NULL; - FREE((caddr_t)shm_handle, M_SHM); + for (shm_handle = CAST_DOWN(void *,shmseg->u.shm_internal); /* tunnel */ + shm_handle != NULL; + shm_handle = shm_handle_next) { + shm_handle_next = shm_handle->shm_handle_next; + mach_memory_entry_port_release(shm_handle->shm_object); + FREE((caddr_t) shm_handle, M_SHM); + } shmseg->u.shm_internal = USER_ADDR_NULL; /* tunnel */ + size = mach_vm_round_page(shmseg->u.shm_segsz); shm_committed -= btoc(size); shm_nused--; shmseg->u.shm_perm.mode = SHMSEG_FREE; @@ -296,7 +308,7 @@ shm_delete_mapping(__unused struct proc *p, struct shmmap_state *shmmap_s, } int -shmdt(struct proc *p, struct shmdt_args *uap, register_t *retval) +shmdt(struct proc *p, struct shmdt_args *uap, int32_t *retval) { #if CONFIG_MACF struct shmid_kernel *shmsegptr; @@ -355,10 +367,14 @@ shmat(struct proc *p, struct shmat_args *uap, user_addr_t *retval) struct shm_handle *shm_handle; mach_vm_address_t attach_va; /* attach address in/out */ mach_vm_size_t map_size; /* size of map entry */ + mach_vm_size_t mapped_size; vm_prot_t prot; size_t size; kern_return_t rv; - int shmat_ret = 0; + int shmat_ret; + int vm_flags; + + shmat_ret = 0; AUDIT_ARG(svipc_id, uap->shmid); AUDIT_ARG(svipc_addr, uap->shmaddr); @@ -429,45 +445,80 @@ shmat(struct proc *p, struct shmat_args *uap, user_addr_t *retval) goto shmat_out; } - shm_handle = CAST_DOWN(void *, shmseg->u.shm_internal); /* tunnel */ + if (flags & MAP_FIXED) { + vm_flags = VM_FLAGS_FIXED; + } else { + vm_flags = VM_FLAGS_ANYWHERE; + } + + mapped_size = 0; + + /* first reserve enough space... */ + rv = mach_vm_map(current_map(), + &attach_va, + map_size, + 0, + vm_flags, + IPC_PORT_NULL, + 0, + FALSE, + VM_PROT_NONE, + VM_PROT_NONE, + VM_INHERIT_NONE); + if (rv != KERN_SUCCESS) { + goto out; + } - rv = mach_vm_map(current_map(), /* process map */ - &attach_va, /* attach address */ - map_size, /* segment size */ - (mach_vm_offset_t)0, /* alignment mask */ - (flags & MAP_FIXED)? VM_FLAGS_FIXED: VM_FLAGS_ANYWHERE, + shmmap_s->va = attach_va; + + /* ... then map the shared memory over the reserved space */ + for (shm_handle = CAST_DOWN(void *, shmseg->u.shm_internal);/* tunnel */ + shm_handle != NULL; + shm_handle = shm_handle->shm_handle_next) { + + rv = vm_map_enter_mem_object( + current_map(), /* process map */ + &attach_va, /* attach address */ + shm_handle->shm_handle_size, /* segment size */ + (mach_vm_offset_t)0, /* alignment mask */ + VM_FLAGS_FIXED | VM_FLAGS_OVERWRITE, shm_handle->shm_object, (mach_vm_offset_t)0, FALSE, prot, prot, - VM_INHERIT_DEFAULT); - if (rv != KERN_SUCCESS) + VM_INHERIT_SHARE); + if (rv != KERN_SUCCESS) goto out; - rv = mach_vm_inherit(current_map(), attach_va, map_size, VM_INHERIT_SHARE); - if (rv != KERN_SUCCESS) { - (void)mach_vm_deallocate(current_map(), attach_va, map_size); - goto out; + mapped_size += shm_handle->shm_handle_size; + attach_va = attach_va + shm_handle->shm_handle_size; } - shmmap_s->va = attach_va; shmmap_s->shmid = uap->shmid; shmseg->u.shm_lpid = p->p_pid; shmseg->u.shm_atime = sysv_shmtime(); shmseg->u.shm_nattch++; - *retval = attach_va; /* XXX return -1 on error */ + *retval = shmmap_s->va; /* XXX return -1 on error */ shmat_ret = 0; goto shmat_out; out: + if (mapped_size > 0) { + (void) mach_vm_deallocate(current_map(), + shmmap_s->va, + mapped_size); + } switch (rv) { case KERN_INVALID_ADDRESS: case KERN_NO_SPACE: shmat_ret = ENOMEM; + break; case KERN_PROTECTION_FAILURE: shmat_ret = EACCES; + break; default: shmat_ret = EINVAL; + break; } shmat_out: SYSV_SHM_SUBSYS_UNLOCK(); @@ -489,13 +540,12 @@ oshmctl(__unused void *p, __unused void *uap, __unused void *retval) * ipcperm:EACCES */ int -shmctl(__unused struct proc *p, struct shmctl_args *uap, register_t *retval) +shmctl(__unused struct proc *p, struct shmctl_args *uap, int32_t *retval) { int error; kauth_cred_t cred = kauth_cred_get(); struct user_shmid_ds inbuf; struct shmid_kernel *shmseg; - size_t shmid_ds_sz = sizeof(struct user_shmid_ds); int shmctl_ret = 0; @@ -508,9 +558,6 @@ shmctl(__unused struct proc *p, struct shmctl_args *uap, register_t *retval) shminit(NULL); } - if (!IS_64BIT_PROCESS(p)) - shmid_ds_sz = sizeof(struct shmid_ds); - shmseg = shm_find_segment_by_shmid(uap->shmid); if (shmseg == NULL) { shmctl_ret = EINVAL; @@ -540,9 +587,9 @@ shmctl(__unused struct proc *p, struct shmctl_args *uap, register_t *retval) if (IS_64BIT_PROCESS(p)) { error = copyout((caddr_t)&shmseg->u, uap->buf, sizeof(struct user_shmid_ds)); } else { - struct shmid_ds shmid_ds32; + struct user32_shmid_ds shmid_ds32; shmid_ds_64to32(&shmseg->u, &shmid_ds32); - error = copyout(&shmid_ds32, uap->buf, sizeof(struct shmid_ds)); + error = copyout(&shmid_ds32, uap->buf, sizeof(shmid_ds32)); } if (error) { shmctl_ret = error; @@ -558,9 +605,10 @@ shmctl(__unused struct proc *p, struct shmctl_args *uap, register_t *retval) if (IS_64BIT_PROCESS(p)) { error = copyin(uap->buf, &inbuf, sizeof(struct user_shmid_ds)); } else { - error = copyin(uap->buf, &inbuf, sizeof(struct shmid_ds)); + struct user32_shmid_ds shmid_ds32; + error = copyin(uap->buf, &shmid_ds32, sizeof(shmid_ds32)); /* convert in place; ugly, but safe */ - shmid_ds_32to64((struct shmid_ds *)&inbuf, &inbuf); + shmid_ds_32to64(&shmid_ds32, &inbuf); } if (error) { shmctl_ret = error; @@ -657,17 +705,17 @@ shmget_allocate_segment(struct proc *p, struct shmget_args *uap, int mode, struct shmid_kernel *shmseg; struct shm_handle *shm_handle; kern_return_t kret; - mach_vm_offset_t user_addr; - mach_vm_size_t size; + mach_vm_size_t total_size, size, alloc_size; void * mem_object; + struct shm_handle *shm_handle_next, **shm_handle_next_p; if (uap->size < (user_size_t)shminfo.shmmin || uap->size > (user_size_t)shminfo.shmmax) return EINVAL; if (shm_nused >= shminfo.shmmni) /* any shmids left? */ return ENOSPC; - size = mach_vm_round_page(uap->size); - if ((user_ssize_t)(shm_committed + btoc(size)) > shminfo.shmall) + total_size = mach_vm_round_page(uap->size); + if ((user_ssize_t)(shm_committed + btoc(total_size)) > shminfo.shmall) return ENOMEM; if (shm_last_free < 0) { for (i = 0; i < shminfo.shmmni; i++) @@ -681,39 +729,50 @@ shmget_allocate_segment(struct proc *p, struct shmget_args *uap, int mode, shm_last_free = -1; } shmseg = &shmsegs[segnum]; + /* * In case we sleep in malloc(), mark the segment present but deleted * so that noone else tries to create the same key. + * XXX but we don't release the global lock !? */ - kret = mach_vm_allocate(current_map(), &user_addr, size, VM_FLAGS_ANYWHERE); - if (kret != KERN_SUCCESS) - goto out; - - kret = mach_make_memory_entry_64(current_map(), - (memory_object_size_t *)&size, - (memory_object_offset_t)user_addr, - VM_PROT_DEFAULT, - (ipc_port_t *)&mem_object, 0); - - if (kret != KERN_SUCCESS) - goto out; - - mach_vm_deallocate(current_map(), user_addr, size); - shmseg->u.shm_perm.mode = SHMSEG_ALLOCATED | SHMSEG_REMOVED; shmseg->u.shm_perm._key = uap->key; shmseg->u.shm_perm._seq = (shmseg->u.shm_perm._seq + 1) & 0x7fff; - MALLOC(shm_handle, struct shm_handle *, sizeof(struct shm_handle), M_SHM, M_WAITOK); - if (shm_handle == NULL) { - kret = KERN_NO_SPACE; - mach_memory_entry_port_release(mem_object); - mem_object = NULL; - goto out; + + shm_handle_next_p = NULL; + for (alloc_size = 0; + alloc_size < total_size; + alloc_size += size) { + size = MIN(total_size - alloc_size, ANON_MAX_SIZE); + kret = mach_make_memory_entry_64( + VM_MAP_NULL, + (memory_object_size_t *) &size, + (memory_object_offset_t) 0, + MAP_MEM_NAMED_CREATE | VM_PROT_DEFAULT, + (ipc_port_t *) &mem_object, 0); + if (kret != KERN_SUCCESS) + goto out; + + MALLOC(shm_handle, struct shm_handle *, sizeof(struct shm_handle), M_SHM, M_WAITOK); + if (shm_handle == NULL) { + kret = KERN_NO_SPACE; + mach_memory_entry_port_release(mem_object); + mem_object = NULL; + goto out; + } + shm_handle->shm_object = mem_object; + shm_handle->shm_handle_size = size; + shm_handle->shm_handle_next = NULL; + if (shm_handle_next_p == NULL) { + shmseg->u.shm_internal = CAST_USER_ADDR_T(shm_handle);/* tunnel */ + } else { + *shm_handle_next_p = shm_handle; + } + shm_handle_next_p = &shm_handle->shm_handle_next; } - shm_handle->shm_object = mem_object; + shmid = IXSEQ_TO_IPCID(segnum, shmseg->u.shm_perm); - shmseg->u.shm_internal = CAST_USER_ADDR_T(shm_handle); /* tunnel */ shmseg->u.shm_perm.cuid = shmseg->u.shm_perm.uid = kauth_cred_getuid(cred); shmseg->u.shm_perm.cgid = shmseg->u.shm_perm.gid = cred->cr_gid; shmseg->u.shm_perm.mode = (shmseg->u.shm_perm.mode & SHMSEG_WANTED) | @@ -741,6 +800,17 @@ shmget_allocate_segment(struct proc *p, struct shmget_args *uap, int mode, AUDIT_ARG(svipc_id, shmid); return 0; out: + if (kret != KERN_SUCCESS) { + for (shm_handle = CAST_DOWN(void *,shmseg->u.shm_internal); /* tunnel */ + shm_handle != NULL; + shm_handle = shm_handle_next) { + shm_handle_next = shm_handle->shm_handle_next; + mach_memory_entry_port_release(shm_handle->shm_object); + FREE((caddr_t) shm_handle, M_SHM); + } + shmseg->u.shm_internal = USER_ADDR_NULL; /* tunnel */ + } + switch (kret) { case KERN_INVALID_ADDRESS: case KERN_NO_SPACE: @@ -754,7 +824,7 @@ shmget_allocate_segment(struct proc *p, struct shmget_args *uap, int mode, } int -shmget(struct proc *p, struct shmget_args *uap, register_t *retval) +shmget(struct proc *p, struct shmget_args *uap, int32_t *retval) { int segnum, mode, error; int shmget_ret = 0; @@ -791,9 +861,29 @@ shmget(struct proc *p, struct shmget_args *uap, register_t *retval) } -/* XXX actually varargs. */ +/* + * shmsys + * + * Entry point for all SHM calls: shmat, oshmctl, shmdt, shmget, shmctl + * + * Parameters: p Process requesting the call + * uap User argument descriptor (see below) + * retval Return value of the selected shm call + * + * Indirect parameters: uap->which msg call to invoke (index in array of shm calls) + * uap->a2 User argument descriptor + * + * Returns: 0 Success + * !0 Not success + * + * Implicit returns: retval Return value of the selected shm call + * + * DEPRECATED: This interface should not be used to call the other SHM + * functions (shmat, oshmctl, shmdt, shmget, shmctl). The correct + * usage is to call the other SHM functions directly. + */ int -shmsys(struct proc *p, struct shmsys_args *uap, register_t *retval) +shmsys(struct proc *p, struct shmsys_args *uap, int32_t *retval) { /* The routine that we are dispatching already does this */ @@ -977,10 +1067,10 @@ IPCS_shm_sysctl(__unused struct sysctl_oid *oidp, __unused void *arg1, int error; int cursor; union { - struct IPCS_command u32; + struct user32_IPCS_command u32; struct user_IPCS_command u64; } ipcs; - struct shmid_ds shmid_ds32; /* post conversion, 32 bit version */ + struct user32_shmid_ds shmid_ds32; /* post conversion, 32 bit version */ void *shmid_dsp; size_t ipcs_sz = sizeof(struct user_IPCS_command); size_t shmid_ds_sz = sizeof(struct user_shmid_ds); @@ -993,8 +1083,8 @@ IPCS_shm_sysctl(__unused struct sysctl_oid *oidp, __unused void *arg1, } if (!IS_64BIT_PROCESS(p)) { - ipcs_sz = sizeof(struct IPCS_command); - shmid_ds_sz = sizeof(struct shmid_ds); + ipcs_sz = sizeof(struct user32_IPCS_command); + shmid_ds_sz = sizeof(struct user32_shmid_ds); } /* Copy in the command structure */ @@ -1060,8 +1150,9 @@ IPCS_shm_sysctl(__unused struct sysctl_oid *oidp, __unused void *arg1, ipcs.u64.ipcs_cursor = cursor + 1; if (!IS_64BIT_PROCESS(p)) /* convert in place */ - ipcs.u32.ipcs_data = CAST_DOWN(void *,ipcs.u64.ipcs_data); - error = SYSCTL_OUT(req, &ipcs, ipcs_sz); + ipcs.u32.ipcs_data = CAST_DOWN_EXPLICIT(user32_addr_t,ipcs.u64.ipcs_data); + + error = SYSCTL_OUT(req, &ipcs, ipcs_sz); } break; diff --git a/bsd/kern/tty.c b/bsd/kern/tty.c index 59205429a..a03e1f437 100644 --- a/bsd/kern/tty.c +++ b/bsd/kern/tty.c @@ -101,7 +101,7 @@ #include #include #include -#include +#include #include #include #include @@ -114,6 +114,22 @@ #include #include /* averunnable */ +/* + * Debugging assertions for tty locks + */ +#define TTY_DEBUG 1 +#if TTY_DEBUG +#define TTY_LOCK_OWNED(tp) do {lck_mtx_assert(&tp->t_lock, LCK_MTX_ASSERT_OWNED); } while (0) +#define TTY_LOCK_NOTOWNED(tp) do {lck_mtx_assert(&tp->t_lock, LCK_MTX_ASSERT_NOTOWNED); } while (0) +#else +#define TTY_LOCK_OWNED(tp) +#define TTY_LOCK_NOTOWNED(tp) +#endif + +static lck_grp_t *tty_lck_grp; +static lck_grp_attr_t *tty_lck_grp_attr; +static lck_attr_t *tty_lck_attr; + static int ttnread(struct tty *tp); static void ttyecho(int c, struct tty *tp); static int ttyoutput(int c, struct tty *tp); @@ -218,7 +234,7 @@ static u_char const char_type[] = { #define I_LOW_WATER ((TTYHOG - 2 * 256) * 7 / 8) /* XXX */ static void -termios32to64(struct termios *in, struct user_termios *out) +termios32to64(struct termios32 *in, struct user_termios *out) { out->c_iflag = (user_tcflag_t)in->c_iflag; out->c_oflag = (user_tcflag_t)in->c_oflag; @@ -233,7 +249,7 @@ termios32to64(struct termios *in, struct user_termios *out) } static void -termios64to32(struct user_termios *in, struct termios *out) +termios64to32(struct user_termios *in, struct termios32 *out) { out->c_iflag = (tcflag_t)in->c_iflag; out->c_oflag = (tcflag_t)in->c_oflag; @@ -249,17 +265,84 @@ termios64to32(struct user_termios *in, struct termios *out) /* + * tty_init + * + * Initialize the tty line discipline subsystem. + * + * Parameters: void + * + * Returns: void + * + * Locks: No ttys can be allocated and no tty locks can be used + * until after this function is called + * + * Notes: The intent of this is to set up a log group attribute, + * lock group, and loc atribute for subsequent per-tty locks. + * This function is called early in bsd_init(), prior to the + * console device initialization. + */ +void +tty_init(void) +{ + tty_lck_grp_attr = lck_grp_attr_alloc_init(); + tty_lck_grp = lck_grp_alloc_init("tty", tty_lck_grp_attr); + tty_lck_attr = lck_attr_alloc_init(); +} + + +/* + * tty_lock + * + * Lock the requested tty structure. + * + * Parameters: tp The tty we want to lock + * + * Returns: void + * + * Locks: On return, tp is locked + */ +void +tty_lock(struct tty *tp) +{ + TTY_LOCK_NOTOWNED(tp); /* debug assert */ + lck_mtx_lock(&tp->t_lock); +} + + +/* + * tty_unlock + * + * Unlock the requested tty structure. + * + * Parameters: tp The tty we want to unlock + * + * Returns: void + * + * Locks: On return, tp is unlocked + */ +void +tty_unlock(struct tty *tp) +{ + TTY_LOCK_OWNED(tp); /* debug assert */ + lck_mtx_unlock(&tp->t_lock); +} + + +/* + * ttyopen (LDISC) + * * Initial open of tty, or (re)entry to standard tty line discipline. + * + * Locks: Assumes tty_lock() is held prior to calling. */ int ttyopen(dev_t device, struct tty *tp) { - boolean_t funnel_state; proc_t p = current_proc(); struct pgrp * pg, * oldpg; struct session *sessp, *oldsess; - funnel_state = thread_funnel_set(kernel_flock, TRUE); + TTY_LOCK_OWNED(tp); /* debug assert */ tp->t_dev = device; @@ -284,8 +367,8 @@ ttyopen(dev_t device, struct tty *tp) session_lock(sessp); if ((sessp->s_flags & S_NOCTTY) == 0) { /* and no O_NOCTTY */ /* Hold on to the reference */ - sessp->s_ttyp = tp; - OSBitOrAtomic(P_CONTROLT, (UInt32 *)&p->p_flag); + sessp->s_ttyp = tp; /* XXX NOT A REFERENCE */ + OSBitOrAtomic(P_CONTROLT, &p->p_flag); session_unlock(sessp); proc_list_lock(); oldpg = tp->t_pgrp; @@ -296,33 +379,43 @@ ttyopen(dev_t device, struct tty *tp) tp->t_pgrp = pg; sessp->s_ttypgrpid = pg->pg_id; proc_list_unlock(); + /* SAFE: All callers drop the lock on return */ + tty_unlock(tp); if (oldpg != PGRP_NULL) pg_rele(oldpg); if (oldsess != SESSION_NULL) session_rele(oldsess); + tty_lock(tp); goto out; } session_unlock(sessp); } + /* SAFE: All callers drop the lock on return */ + tty_unlock(tp); if (sessp != SESSION_NULL) session_rele(sessp); if (pg != PGRP_NULL) pg_rele(pg); + tty_lock(tp); out: - thread_funnel_set(kernel_flock, funnel_state); + /* XXX may be an error code */ return (0); } /* + * ttyclose + * * Handle close() on a tty line: flush and set to initial state, * bumping generation number so that pending read/write calls * can detect recycling of the tty. * XXX our caller should have done `spltty(); l_close(); ttyclose();' * and l_close() should have flushed, but we repeat the spltty() and * the flush in case there are buggy callers. + * + * Locks: Assumes tty_lock() is held prior to calling. */ int ttyclose(struct tty *tp) @@ -330,6 +423,8 @@ ttyclose(struct tty *tp) struct pgrp * oldpg; struct session * oldsessp; + TTY_LOCK_OWNED(tp); /* debug assert */ + if (constty == tp) { constty = NULL; @@ -355,10 +450,13 @@ ttyclose(struct tty *tp) oldsessp->s_ttypgrpid = NO_PID; proc_list_unlock(); /* drop the reference on prev session and pgrp */ + /* SAFE: All callers drop the lock on return */ + tty_unlock(tp); if (oldsessp != SESSION_NULL) session_rele(oldsessp); if (oldpg != PGRP_NULL) pg_rele(oldpg); + tty_lock(tp); tp->t_state = 0; selthreadclear(&tp->t_wsel); selthreadclear(&tp->t_rsel); @@ -377,17 +475,26 @@ ttyclose(struct tty *tp) (c) != _POSIX_VDISABLE)) /* + * ttyinput (LDISC) + * * Process input of a single character received on a tty. + * + * Parameters: c The character received + * tp The tty on which it was received + * + * Returns: . + * + * Locks: Assumes tty_lock() is held prior to calling. */ int ttyinput(int c, struct tty *tp) { tcflag_t iflag, lflag; cc_t *cc; - int i, err, retval; - boolean_t funnel_state; + int i, err; + int retval = 0; /* default return value */ - funnel_state = thread_funnel_set(kernel_flock, TRUE); + TTY_LOCK_OWNED(tp); /* debug assert */ /* * If input is pending take it first. @@ -427,12 +534,14 @@ ttyinput(int c, struct tty *tp) CLR(c, TTY_ERRORMASK); if (ISSET(err, TTY_BI)) { if (ISSET(iflag, IGNBRK)) { - thread_funnel_set(kernel_flock, funnel_state); - return (0); + goto out; } if (ISSET(iflag, BRKINT)) { ttyflush(tp, FREAD | FWRITE); + /* SAFE: All callers drop the lock on return */ + tty_unlock(tp); tty_pgsignal(tp, SIGINT, 1); + tty_lock(tp); goto endcase; } if (ISSET(iflag, PARMRK)) @@ -440,8 +549,7 @@ ttyinput(int c, struct tty *tp) } else if ((ISSET(err, TTY_PE) && ISSET(iflag, INPCK)) || ISSET(err, TTY_FE)) { if (ISSET(iflag, IGNPAR)) { - thread_funnel_set(kernel_flock, funnel_state); - return (0); + goto out; } else if (ISSET(iflag, PARMRK)) { parmrk: @@ -512,15 +620,29 @@ ttyinput(int c, struct tty *tp) if (!ISSET(lflag, NOFLSH)) ttyflush(tp, FREAD | FWRITE); ttyecho(c, tp); + /* + * SAFE: All callers drop the lock on return; + * SAFE: if we lose a threaded race on change + * SAFE: of the interrupt character, we could + * SAFE: have lost that race anyway due to the + * SAFE: scheduler executing threads in + * SAFE: priority order rather than "last + * SAFE: active thread" order (FEATURE). + */ + tty_unlock(tp); tty_pgsignal(tp, CCEQ(cc[VINTR], c) ? SIGINT : SIGQUIT, 1); + tty_lock(tp); goto endcase; } if (CCEQ(cc[VSUSP], c)) { if (!ISSET(lflag, NOFLSH)) ttyflush(tp, FREAD); ttyecho(c, tp); + /* SAFE: All callers drop the lock on return */ + tty_unlock(tp); tty_pgsignal(tp, SIGTSTP, 1); + tty_lock(tp); goto endcase; } } @@ -532,12 +654,10 @@ ttyinput(int c, struct tty *tp) if (!ISSET(tp->t_state, TS_TTSTOP)) { SET(tp->t_state, TS_TTSTOP); ttystop(tp, 0); - thread_funnel_set(kernel_flock, funnel_state); - return (0); + goto out; } if (!CCEQ(cc[VSTART], c)) { - thread_funnel_set(kernel_flock, funnel_state); - return (0); + goto out; } /* * if VSTART == VSTOP then toggle @@ -552,8 +672,7 @@ ttyinput(int c, struct tty *tp) */ if (c == '\r') { if (ISSET(iflag, IGNCR)) { - thread_funnel_set(kernel_flock, funnel_state); - return (0); + goto out; } else if (ISSET(iflag, ICRNL)) c = '\n'; @@ -650,10 +769,14 @@ ttyinput(int c, struct tty *tp) * ^T - kernel info and generate SIGINFO */ if (CCEQ(cc[VSTATUS], c) && ISSET(lflag, IEXTEN)) { - if (ISSET(lflag, ISIG)) + if (ISSET(lflag, ISIG)) { + /* SAFE: All callers drop the lock on return */ + tty_unlock(tp); tty_pgsignal(tp, SIGINFO, 1); + tty_lock(tp); + } if (!ISSET(lflag, NOKERNINFO)) - ttyinfo(tp); + ttyinfo_locked(tp); goto endcase; } } @@ -709,29 +832,44 @@ ttyinput(int c, struct tty *tp) } } } + endcase: /* * IXANY means allow any character to restart output. */ if (ISSET(tp->t_state, TS_TTSTOP) && !ISSET(iflag, IXANY) && cc[VSTART] != cc[VSTOP]) { - thread_funnel_set(kernel_flock, funnel_state); - return (0); + goto out; } + restartoutput: CLR(tp->t_lflag, FLUSHO); CLR(tp->t_state, TS_TTSTOP); + startoutput: - retval = ttstart(tp); - thread_funnel_set(kernel_flock, funnel_state); + /* Start the output */ + retval = ttstart(tp); + +out: return (retval); } + /* + * ttyoutput + * * Output a single character on a tty, doing output processing * as needed (expanding tabs, newline processing, etc.). - * Returns < 0 if succeeds, otherwise returns char to resend. - * Must be recursive. + * + * Parameters: c The character to output + * tp The tty on which to output on the tty + * + * Returns: < 0 Success + * >= 0 Character to resend (failure) + * + * Locks: Assumes tp is locked on entry, remains locked on exit + * + * Notes: Must be recursive. */ static int ttyoutput(int c, struct tty *tp) @@ -739,6 +877,8 @@ ttyoutput(int c, struct tty *tp) tcflag_t oflag; int col; + TTY_LOCK_OWNED(tp); /* debug assert */ + oflag = tp->t_oflag; if (!ISSET(oflag, OPOST)) { if (ISSET(tp->t_lflag, FLUSHO)) @@ -814,21 +954,104 @@ ttyoutput(int c, struct tty *tp) return (-1); } + /* - * Ioctls for all tty devices. Called after line-discipline specific ioctl - * has been called to do discipline-specific functions and/or reject any - * of these ioctl commands. + * ttioctl + * + * Identical to ttioctl_locked, only the lock is not held + * + * Parameters: + * + * Returns: + * + * Locks: This function assumes the tty_lock() is not held on entry; + * it takes the lock, and releases it before returning. + * + * Notes: This is supported to ensure the line discipline interfaces + * all have the same locking semantics. + * + * This function is called from */ int -ttioctl(struct tty *tp, - u_long cmd, caddr_t data, int flag, - proc_t p) +ttioctl(struct tty *tp, u_long cmd, caddr_t data, int flag, proc_t p) +{ + int retval; + + tty_lock(tp); + retval = ttioctl_locked(tp, cmd, data, flag, p); + tty_unlock(tp); + + return (retval); +} + + +/* + * ttioctl_locked + * + * Ioctls for all tty devices. + * + * Parameters: tp Tty on which ioctl() is being called + * cmd ioctl() command parameter + * data ioctl() data argument (if any) + * flag fileglob open modes from fcntl.h; + * if called internally, this is usually + * set to 0, rather than something useful + * p Process context for the call; if the + * call is proxied to a worker thread, + * this will not be the current process!!! + * + * Returns: 0 Success + * EIO I/O error (no process group, job + * control, etc.) + * EINTR Interrupted by signal + * EBUSY Attempt to become the console while + * the console is busy + * ENOTTY TIOCGPGRP on a non-controlling tty + * EINVAL Invalid baud rate + * ENXIO TIOCSETD of invalid line discipline + * EPERM TIOCSTI, not root, not open for read + * EACCES TIOCSTI, not root, not your controlling + * tty + * EPERM TIOCSCTTY failed + * ENOTTY/EINVAL/EPERM TIOCSPGRP failed + * EPERM TIOCSDRAINWAIT as non-root user + * suser:EPERM Console control denied + * ttywait:EIO t_timeout too small/expired + * ttywait:ERESTART Upper layer must redrive the call; + * this is usually done by the Libc + * stub in user space + * ttywait:EINTR Interrupted (usually a signal) + * ttcompat:EINVAL + * ttcompat:ENOTTY + * ttcompat:EIOCTL + * ttcompat:ENOTTY TIOCGSID, if no session or session + * leader + * ttcompat:ENOTTY All unrecognized ioctls + * *tp->t_param:? TIOCSETA* underlying function + * *linesw[t].l_open:? TIOCSETD line discipline open failure + * + * + * Locks: This function assumes that the tty_lock() is held for the + * tp at the time of the call. The lock remains held on return. + * + * Notes: This function is called after line-discipline specific ioctl + * has been called to do discipline-specific functions and/or + * reject any of these ioctl() commands. + * + * This function calls ttcompat(), which can re-call ttioctl() + * to a depth of one (FORTRAN style mutual recursion); at some + * point, we should just in-line ttcompat() here. + */ +int +ttioctl_locked(struct tty *tp, u_long cmd, caddr_t data, int flag, proc_t p) { int error = 0; struct uthread *ut; struct pgrp * pg, *oldpg; struct session *sessp, * oldsessp; + TTY_LOCK_OWNED(tp); /* debug assert */ + ut = (struct uthread *)get_bsdthread_info(current_thread()); /* If the ioctl involves modification, signal if in the background. */ switch (cmd) { @@ -838,18 +1061,17 @@ ttioctl(struct tty *tp, case TIOCFLUSH: case TIOCSTOP: case TIOCSTART: - case TIOCSETA: + case TIOCSETA_32: case TIOCSETA_64: case TIOCSETD: - case TIOCSETAF: + case TIOCSETAF_32: case TIOCSETAF_64: - case TIOCSETAW: + case TIOCSETAW_32: case TIOCSETAW_64: case TIOCSPGRP: case TIOCSTAT: case TIOCSTI: case TIOCSWINSZ: -#if COMPAT_43_TTY || defined(COMPAT_SUNOS) case TIOCLBIC: case TIOCLBIS: case TIOCLSET: @@ -858,7 +1080,6 @@ ttioctl(struct tty *tp, case TIOCSETN: case TIOCSETP: case TIOCSLTC: -#endif while (isbackground(p, tp) && (p->p_lflag & P_LPPWAIT) == 0 && (p->p_sigignore & sigmask(SIGTTOU)) == 0 && @@ -868,13 +1089,17 @@ ttioctl(struct tty *tp, error = EIO; goto out; } + /* SAFE: All callers drop the lock on return */ + tty_unlock(tp); if (pg->pg_jobc == 0) { pg_rele(pg); + tty_lock(tp); error = EIO; goto out; } pgsignal(pg, SIGTTOU, 1); pg_rele(pg); + tty_lock(tp); /* @@ -935,8 +1160,8 @@ ttioctl(struct tty *tp, constty = NULL; } if (constty) { - (*cdevsw[major(cons.t_dev)].d_ioctl) - (cons.t_dev, KMIOCDISABLCONS, NULL, 0, p); + (*cdevsw[major(constty->t_dev)].d_ioctl) + (constty->t_dev, KMIOCDISABLCONS, NULL, 0, p); } else { (*cdevsw[major(tp->t_dev)].d_ioctl) (tp->t_dev, KMIOCDISABLCONS, NULL, 0, p); @@ -948,15 +1173,20 @@ ttioctl(struct tty *tp, if (error) goto out; break; - case TIOCGETA: /* get termios struct */ - case TIOCGETA_64: { /* get termios struct */ - if (IS_64BIT_PROCESS(p)) { - termios32to64(&tp->t_termios, (struct user_termios *)data); - } else { - bcopy(&tp->t_termios, data, sizeof(struct termios)); - } + case TIOCGETA_32: /* get termios struct */ +#ifdef __LP64__ + termios64to32((struct user_termios *)&tp->t_termios, (struct termios32 *)data); +#else + bcopy(&tp->t_termios, data, sizeof(struct termios)); +#endif + break; + case TIOCGETA_64: /* get termios struct */ +#ifdef __LP64__ + bcopy(&tp->t_termios, data, sizeof(struct termios)); +#else + termios32to64((struct termios32 *)&tp->t_termios, (struct user_termios *)data); +#endif break; - } case TIOCGETD: /* get line discipline */ *(int *)data = tp->t_line; break; @@ -981,19 +1211,27 @@ ttioctl(struct tty *tp, case TIOCOUTQ: /* output queue size */ *(int *)data = tp->t_outq.c_cc; break; - case TIOCSETA: /* set termios struct */ + case TIOCSETA_32: /* set termios struct */ case TIOCSETA_64: - case TIOCSETAW: /* drain output, set */ + case TIOCSETAW_32: /* drain output, set */ case TIOCSETAW_64: - case TIOCSETAF: /* drn out, fls in, set */ - case TIOCSETAF_64: { /* drn out, fls in, set */ + case TIOCSETAF_32: /* drn out, fls in, set */ + case TIOCSETAF_64: + { /* drn out, fls in, set */ struct termios *t = (struct termios *)data; struct termios lcl_termios; - if (IS_64BIT_PROCESS(p)) { - termios64to32((struct user_termios *)data, &lcl_termios); +#ifdef __LP64__ + if (cmd==TIOCSETA_32 || cmd==TIOCSETAW_32 || cmd==TIOCSETAF_32) { + termios32to64((struct termios32 *)data, (struct user_termios *)&lcl_termios); + t = &lcl_termios; + } +#else + if (cmd==TIOCSETA_64 || cmd==TIOCSETAW_64 || cmd==TIOCSETAF_64) { + termios64to32((struct user_termios *)data, (struct termios32 *)&lcl_termios); t = &lcl_termios; } +#endif #if 0 /* XXX bogus test; always false */ if (t->c_ispeed < 0 || t->c_ospeed < 0) { @@ -1003,13 +1241,13 @@ ttioctl(struct tty *tp, #endif /* 0 - leave in; may end up being a conformance issue */ if (t->c_ispeed == 0) t->c_ispeed = t->c_ospeed; - if (cmd == TIOCSETAW || cmd == TIOCSETAF || + if (cmd == TIOCSETAW_32 || cmd == TIOCSETAF_32 || cmd == TIOCSETAW_64 || cmd == TIOCSETAF_64) { error = ttywait(tp); if (error) { goto out; } - if (cmd == TIOCSETAF || cmd == TIOCSETAF_64) + if (cmd == TIOCSETAF_32 || cmd == TIOCSETAF_64) ttyflush(tp, FREAD); } if (!ISSET(t->c_cflag, CIGNORE)) { @@ -1045,7 +1283,7 @@ ttioctl(struct tty *tp, ttsetwater(tp); } if (ISSET(t->c_lflag, ICANON) != ISSET(tp->t_lflag, ICANON) && - cmd != TIOCSETAF && cmd != TIOCSETAF_64) { + cmd != TIOCSETAF_32 && cmd != TIOCSETAF_64) { if (ISSET(t->c_lflag, ICANON)) SET(tp->t_lflag, PENDIN); else { @@ -1091,10 +1329,15 @@ ttioctl(struct tty *tp, error = ENXIO; goto out; } + /* + * If the new line discipline is not equal to the old one, + * close the old one and open the new one. + */ if (t != tp->t_line) { (*linesw[tp->t_line].l_close)(tp, flag); error = (*linesw[t].l_open)(device, tp); if (error) { + /* This is racy; it's possible to lose both */ (void)(*linesw[tp->t_line].l_open)(device, tp); goto out; } @@ -1140,10 +1383,13 @@ ttioctl(struct tty *tp, if (!SESS_LEADER(p, sessp) || ((sessp->s_ttyvp || tp->t_session) && (tp->t_session != sessp))) { + /* SAFE: All callers drop the lock on return */ + tty_unlock(tp); if (sessp != SESSION_NULL) session_rele(sessp); if (pg != PGRP_NULL) pg_rele(pg); + tty_lock(tp); error = EPERM; goto out; } @@ -1158,14 +1404,17 @@ ttioctl(struct tty *tp, tp->t_pgrp = pg; proc_list_unlock(); session_lock(sessp); - sessp->s_ttyp = tp; + sessp->s_ttyp = tp; /* XXX NOT A REFERENCE */ session_unlock(sessp); - OSBitOrAtomic(P_CONTROLT, (UInt32 *)&p->p_flag); + OSBitOrAtomic(P_CONTROLT, &p->p_flag); + /* SAFE: All callers drop the lock on return */ + tty_unlock(tp); /* drop the reference on prev session and pgrp */ if (oldsessp != SESSION_NULL) session_rele(oldsessp); if (oldpg != PGRP_NULL) pg_rele(oldpg); + tty_lock(tp); break; case TIOCSPGRP: { /* set pgrp of tty */ @@ -1184,9 +1433,12 @@ ttioctl(struct tty *tp, error = EINVAL; goto out; } else if (pgrp->pg_session != sessp) { + /* SAFE: All callers drop the lock on return */ + tty_unlock(tp); if (sessp != SESSION_NULL) session_rele(sessp); pg_rele(pgrp); + tty_lock(tp); error = EPERM; goto out; } @@ -1195,20 +1447,26 @@ ttioctl(struct tty *tp, tp->t_pgrp = pgrp; sessp->s_ttypgrpid = pgrp->pg_id; proc_list_unlock(); + /* SAFE: All callers drop the lock on return */ + tty_unlock(tp); if (oldpg != PGRP_NULL) pg_rele(oldpg); if (sessp != SESSION_NULL) session_rele(sessp); + tty_lock(tp); break; } case TIOCSTAT: /* simulate control-T */ - ttyinfo(tp); + ttyinfo_locked(tp); break; case TIOCSWINSZ: /* set window size */ if (bcmp((caddr_t)&tp->t_winsize, data, sizeof (struct winsize))) { tp->t_winsize = *(struct winsize *)data; + /* SAFE: All callers drop the lock on return */ + tty_unlock(tp); tty_pgsignal(tp, SIGWINCH, 1); + tty_lock(tp); } break; case TIOCSDRAINWAIT: @@ -1224,11 +1482,7 @@ ttioctl(struct tty *tp, *(int *)data = tp->t_timeout / hz; break; default: -#if COMPAT_43_TTY || defined(COMPAT_SUNOS) error = ttcompat(tp, cmd, data, flag, p); -#else - error = ENOTTY; -#endif goto out; } @@ -1237,12 +1491,18 @@ ttioctl(struct tty *tp, return(error); } + +/* + * Locks: Assumes tp is locked on entry, remains locked on exit + */ int ttyselect(struct tty *tp, int rw, void *wql, proc_t p) { if (tp == NULL) return (ENXIO); + TTY_LOCK_OWNED(tp); /* debug assert */ + switch (rw) { case FREAD: if (ttnread(tp) > 0 || ISSET(tp->t_state, TS_ZOMBIE)) @@ -1261,24 +1521,37 @@ ttyselect(struct tty *tp, int rw, void *wql, proc_t p) return (0); } + /* * This is a wrapper for compatibility with the select vector used by * cdevsw. It relies on a proper xxxdevtotty routine. + * + * Locks: Assumes tty_lock() is not held prior to calling. */ int ttselect(dev_t dev, int rw, void *wql, proc_t p) { - return ttyselect(cdevsw[major(dev)].d_ttys[minor(dev)], rw, wql, p); + int rv; + struct tty *tp = cdevsw[major(dev)].d_ttys[minor(dev)]; + + tty_lock(tp); + rv = ttyselect(tp, rw, wql, p); + tty_unlock(tp); + + return (rv); } + /* - * Must be called at spltty(). + * Locks: Assumes tp is locked on entry, remains locked on exit */ static int ttnread(struct tty *tp) { int nread; + TTY_LOCK_OWNED(tp); /* debug assert */ + if (ISSET(tp->t_lflag, PENDIN)) ttypend(tp); nread = tp->t_canq.c_cc; @@ -1290,14 +1563,32 @@ ttnread(struct tty *tp) return (nread); } + /* + * ttywait + * * Wait for output to drain. + * + * Parameters: tp Tty on which to wait for output to drain + * + * Returns: 0 Success + * EIO t_timeout too small/expired + * ttysleep:ERESTART Upper layer must redrive the call; + * this is usually done by the Libc + * stub in user space + * ttysleep:EINTR Interrupted (usually a signal) + * + * Notes: Called from proc_exit() and vproc_exit(). + * + * Locks: Assumes tp is locked on entry, remains locked on exit */ int ttywait(struct tty *tp) { int error; + TTY_LOCK_OWNED(tp); /* debug assert */ + error = 0; while ((tp->t_outq.c_cc || ISSET(tp->t_state, TS_BUSY)) && ISSET(tp->t_state, TS_CONNECTED) && tp->t_oproc) { @@ -1321,20 +1612,31 @@ ttywait(struct tty *tp) return (error); } +/* + * Stop the underlying device driver. + * + * Locks: Assumes tty_lock() is held prior to calling. + */ static void ttystop(struct tty *tp, int rw) { + TTY_LOCK_OWNED(tp); /* debug assert */ + (*cdevsw[major(tp->t_dev)].d_stop)(tp, rw); } /* * Flush if successfully wait. + * + * Locks: Assumes tty_lock() is held prior to calling. */ static int ttywflush(struct tty *tp) { int error; + TTY_LOCK_OWNED(tp); /* debug assert */ + if ((error = ttywait(tp)) == 0) ttyflush(tp, FREAD); return (error); @@ -1342,10 +1644,14 @@ ttywflush(struct tty *tp) /* * Flush tty read and/or write queues, notifying anyone waiting. + * + * Locks: Assumes tty_lock() is held prior to calling. */ void ttyflush(struct tty *tp, int rw) { + TTY_LOCK_OWNED(tp); /* debug assert */ + #if 0 again: #endif @@ -1407,6 +1713,10 @@ ttyflush(struct tty *tp, int rw) /* * Copy in the default termios characters. + * + * Locks: Assumes tty_lock() is held prior to calling. + * + * Notes: No assertion; tp is not in scope. */ void termioschars(struct termios *t) @@ -1414,23 +1724,19 @@ termioschars(struct termios *t) bcopy(ttydefchars, t->c_cc, sizeof t->c_cc); } -/* - * Old interface. - */ -void -ttychars(struct tty *tp) -{ - termioschars(&tp->t_termios); -} /* * Handle input high water. Send stop character for the IXOFF case. Turn * on our input flow control bit and propagate the changes to the driver. * XXX the stop character should be put in a special high priority queue. + * + * Locks: Assumes tty_lock() is held for the call. */ void ttyblock(struct tty *tp) { + TTY_LOCK_OWNED(tp); /* debug assert */ + SET(tp->t_state, TS_TBLOCK); if (ISSET(tp->t_iflag, IXOFF) && tp->t_cc[VSTOP] != _POSIX_VDISABLE && putc(tp->t_cc[VSTOP], &tp->t_outq) != 0) @@ -1438,14 +1744,19 @@ ttyblock(struct tty *tp) ttstart(tp); } + /* * Handle input low water. Send start character for the IXOFF case. Turn * off our input flow control bit and propagate the changes to the driver. * XXX the start character should be put in a special high priority queue. + * + * Locks: Assumes tty_lock() is held for the call. */ static void ttyunblock(struct tty *tp) { + TTY_LOCK_OWNED(tp); /* debug assert */ + CLR(tp->t_state, TS_TBLOCK); if (ISSET(tp->t_iflag, IXOFF) && tp->t_cc[VSTART] != _POSIX_VDISABLE && putc(tp->t_cc[VSTART], &tp->t_outq) != 0) @@ -1453,65 +1764,69 @@ ttyunblock(struct tty *tp) ttstart(tp); } -/* FreeBSD: Not used by any current (i386) drivers. */ + /* - * Restart after an inter-char delay. + * ttstart + * + * Start tty output + * + * Parameters: tp tty on which to start output + * + * Returns: 0 Success + * + * Locks: Assumes tty_lock() is held for the call. + * + * Notes: This function might as well be void; it always returns success + * + * Called from ttioctl_locked(), LDISC routines, and + * ttycheckoutq(), ttyblock(), ttyunblock(), and tputchar() */ -void -ttrstrt(void *tp_arg) -{ - struct tty *tp; - -#if DIAGNOSTIC - if (tp_arg == NULL) - panic("ttrstrt"); -#endif - tp = tp_arg; - - CLR(tp->t_state, TS_TIMEOUT); - ttstart(tp); - -} - int ttstart(struct tty *tp) { - boolean_t funnel_state; - - funnel_state = thread_funnel_set(kernel_flock, TRUE); + TTY_LOCK_OWNED(tp); /* debug assert */ if (tp->t_oproc != NULL) /* XXX: Kludge for pty. */ (*tp->t_oproc)(tp); - thread_funnel_set(kernel_flock, funnel_state); + return (0); } + /* + * ttylclose (LDISC) + * * "close" a line discipline + * + * Locks: Assumes tty_lock() is held prior to calling. */ int ttylclose(struct tty *tp, int flag) { - boolean_t funnel_state; + TTY_LOCK_OWNED(tp); /* debug assert */ - funnel_state = thread_funnel_set(kernel_flock, TRUE); if ( (flag & FNONBLOCK) || ttywflush(tp)) ttyflush(tp, FREAD | FWRITE); - thread_funnel_set(kernel_flock, funnel_state); + return (0); } + /* + * ttymodem (LDISC) + * * Handle modem control transition on a tty. * Flag indicates new state of carrier. * Returns 0 if the line should be turned off, otherwise 1. + * + * Locks: Assumes tty_lock() is held prior to calling. */ int ttymodem(struct tty *tp, int flag) { - boolean_t funnel_state; + int rval = 1; /* default return value */ - funnel_state = thread_funnel_set(kernel_flock, TRUE); + TTY_LOCK_OWNED(tp); /* debug assert */ if (ISSET(tp->t_state, TS_CARR_ON) && ISSET(tp->t_cflag, MDMBUF)) { /* @@ -1540,8 +1855,8 @@ ttymodem(struct tty *tp, int flag) if (tp->t_session && tp->t_session->s_leader) psignal(tp->t_session->s_leader, SIGHUP); ttyflush(tp, FREAD | FWRITE); - thread_funnel_set(kernel_flock, funnel_state); - return (0); + rval = 0; + goto out; } } else { /* @@ -1554,13 +1869,17 @@ ttymodem(struct tty *tp, int flag) ttwakeup(tp); ttwwakeup(tp); } - thread_funnel_set(kernel_flock, funnel_state); - return (1); + +out: + return (rval); } + /* * Reinput pending characters after state switch * call at spltty(). + * + * Locks: Assumes tty_lock() is held for the call. */ static void ttypend(struct tty *tp) @@ -1568,6 +1887,8 @@ ttypend(struct tty *tp) struct clist tq; int c; + TTY_LOCK_OWNED(tp); /* debug assert */ + CLR(tp->t_lflag, PENDIN); SET(tp->t_state, TS_TYPEN); tq = tp->t_rawq; @@ -1578,8 +1899,13 @@ ttypend(struct tty *tp) CLR(tp->t_state, TS_TYPEN); } + /* + * ttread (LDISC) + * * Process a read call on a tty device. + * + * Locks: Assumes tty_lock() is held prior to calling. */ int ttread(struct tty *tp, struct uio *uio, int flag) @@ -1592,11 +1918,10 @@ ttread(struct tty *tp, struct uio *uio, int flag) int first, error = 0; int has_etime = 0, last_cc = 0; long slp = 0; /* XXX this should be renamed `timo'. */ - boolean_t funnel_state; struct uthread *ut; struct pgrp * pg; - funnel_state = thread_funnel_set(kernel_flock, TRUE); + TTY_LOCK_OWNED(tp); /* debug assert */ ut = (struct uthread *)get_bsdthread_info(current_thread()); @@ -1617,21 +1942,27 @@ ttread(struct tty *tp, struct uio *uio, int flag) if ((p->p_sigignore & sigmask(SIGTTIN)) || (ut->uu_sigmask & sigmask(SIGTTIN)) || p->p_lflag & P_LPPWAIT) { - thread_funnel_set(kernel_flock, funnel_state); - return (EIO); + error = EIO; + goto err; } pg = proc_pgrp(p); if (pg == PGRP_NULL) { - thread_funnel_set(kernel_flock, funnel_state); - return (EIO); + error = EIO; + goto err; } if (pg->pg_jobc == 0) { + /* SAFE: All callers drop the lock on return */ + tty_unlock(tp); pg_rele(pg); - thread_funnel_set(kernel_flock, funnel_state); - return (EIO); + tty_lock(tp); + error = EIO; + goto err; } + /* SAFE: All callers drop the lock on return */ + tty_unlock(tp); pgsignal(pg, SIGTTIN, 1); pg_rele(pg); + tty_lock(tp); /* * We signalled ourself, so we need to act as if we @@ -1639,13 +1970,13 @@ ttread(struct tty *tp, struct uio *uio, int flag) * the signal. If it's a signal that stops the * process, that's handled in the signal sending code. */ - thread_funnel_set(kernel_flock, funnel_state); - return (EINTR); + error = EINTR; + goto err; } if (ISSET(tp->t_state, TS_ZOMBIE)) { - thread_funnel_set(kernel_flock, funnel_state); - return (0); /* EOF */ + /* EOF - returning 0 */ + goto err; } /* @@ -1659,12 +1990,11 @@ ttread(struct tty *tp, struct uio *uio, int flag) if (flag & IO_NDELAY) { if (qp->c_cc > 0) goto read; - if (!ISSET(lflag, ICANON) && cc[VMIN] == 0) { - thread_funnel_set(kernel_flock, funnel_state); - return (0); + if (ISSET(lflag, ICANON) || cc[VMIN] != 0) { + error = EWOULDBLOCK; } - thread_funnel_set(kernel_flock, funnel_state); - return (EWOULDBLOCK); + /* else polling - returning 0 */ + goto err; } if (!ISSET(lflag, ICANON)) { int m = cc[VMIN]; @@ -1687,8 +2017,7 @@ ttread(struct tty *tp, struct uio *uio, int flag) goto read; /* m, t and qp->c_cc are all 0. 0 is enough input. */ - thread_funnel_set(kernel_flock, funnel_state); - return (0); + goto err; } t *= 100000; /* time in us */ #define diff(t1, t2) (((t1).tv_sec - (t2).tv_sec) * 1000000 + \ @@ -1739,8 +2068,7 @@ ttread(struct tty *tp, struct uio *uio, int flag) } else { if (timercmp(&etime, &timecopy, <=)) { /* Timed out, but 0 is enough input. */ - thread_funnel_set(kernel_flock, funnel_state); - return (0); + goto err; } slp = diff(etime, timecopy); } @@ -1755,7 +2083,7 @@ ttread(struct tty *tp, struct uio *uio, int flag) * is large (divide by `tick' and/or arrange to * use hzto() if hz is large). */ - slp = (long) (((u_long)slp * hz) + 999999) / 1000000; + slp = (long) (((u_int32_t)slp * hz) + 999999) / 1000000; goto sleep; } if (qp->c_cc <= 0) { @@ -1769,8 +2097,7 @@ ttread(struct tty *tp, struct uio *uio, int flag) if (error == EWOULDBLOCK) error = 0; else if (error) { - thread_funnel_set(kernel_flock, funnel_state); - return (error); + goto err; } /* * XXX what happens if another process eats some input @@ -1825,7 +2152,14 @@ ttread(struct tty *tp, struct uio *uio, int flag) */ if (CCEQ(cc[VDSUSP], c) && ISSET(lflag, IEXTEN | ISIG) == (IEXTEN | ISIG)) { + /* + * SAFE: All callers drop the lock on return and + * SAFE: current thread will not change out from + * SAFE: under us in the "goto loop" case. + */ + tty_unlock(tp); tty_pgsignal(tp, SIGTSTP, 1); + tty_lock(tp); if (first) { error = ttysleep(tp, &lbolt, TTIPRI | PCATCH, "ttybg3", 0); @@ -1867,16 +2201,21 @@ ttread(struct tty *tp, struct uio *uio, int flag) tp->t_rawq.c_cc + tp->t_canq.c_cc <= I_LOW_WATER) ttyunblock(tp); - thread_funnel_set(kernel_flock, funnel_state); +err: return (error); } + /* * Check the output queue on tp for space for a kernel message (from uprintf * or tprintf). Allow some space over the normal hiwater mark so we don't * lose messages due to normal flow control, but don't let the tty run amok. * Sleeps here are not interruptible, but we return prematurely if new signals * arrive. + * + * Locks: Assumes tty_lock() is held before calling + * + * Notes: This function is called from tprintf() in subr_prf.c */ int ttycheckoutq(struct tty *tp, int wait) @@ -1885,6 +2224,8 @@ ttycheckoutq(struct tty *tp, int wait) sigset_t oldsig; struct uthread *ut; + TTY_LOCK_OWNED(tp); /* debug assert */ + ut = (struct uthread *)get_bsdthread_info(current_thread()); hiwat = tp->t_hiwat; @@ -1898,13 +2239,18 @@ ttycheckoutq(struct tty *tp, int wait) return (0); } SET(tp->t_state, TS_SO_OLOWAT); - tsleep(TSA_OLOWAT(tp), PZERO - 1, "ttoutq", hz); + ttysleep(tp, TSA_OLOWAT(tp), PZERO - 1, "ttoutq", hz); } return (1); } + /* + * ttwrite (LDISC) + * * Process a write call on a tty device. + * + * Locks: Assumes tty_lock() is held prior to calling. */ int ttwrite(struct tty *tp, struct uio *uio, int flag) @@ -1912,17 +2258,16 @@ ttwrite(struct tty *tp, struct uio *uio, int flag) char *cp = NULL; int cc, ce; proc_t p; - int i, hiwat, count, error; + int i, hiwat, error; + user_ssize_t count; char obuf[OBUFSIZ]; - boolean_t funnel_state; struct uthread *ut; struct pgrp * pg; - funnel_state = thread_funnel_set(kernel_flock, TRUE); + TTY_LOCK_OWNED(tp); /* debug assert */ ut = (struct uthread *)get_bsdthread_info(current_thread()); hiwat = tp->t_hiwat; - // LP64todo - fix this! count = uio_resid(uio); error = 0; cc = 0; @@ -1958,12 +2303,18 @@ ttwrite(struct tty *tp, struct uio *uio, int flag) goto out; } if (pg->pg_jobc == 0) { + /* SAFE: All callers drop the lock on return */ + tty_unlock(tp); pg_rele(pg); + tty_lock(tp); error = EIO; goto out; } + /* SAFE: All callers drop the lock on return */ + tty_unlock(tp); pgsignal(pg, SIGTTOU, 1); pg_rele(pg); + tty_lock(tp); /* * We signalled ourself, so we need to act as if we * have been "interrupted" from a "sleep" to act on @@ -1981,7 +2332,6 @@ ttwrite(struct tty *tp, struct uio *uio, int flag) while (uio_resid(uio) > 0 || cc > 0) { if (ISSET(tp->t_lflag, FLUSHO)) { uio_setresid(uio, 0); - thread_funnel_set(kernel_flock, funnel_state); return (0); } if (tp->t_outq.c_cc > hiwat) @@ -2063,7 +2413,6 @@ ttwrite(struct tty *tp, struct uio *uio, int flag) * (the call will either return short or restart with a new uio). */ uio_setresid(uio, (uio_resid(uio) + cc)); - thread_funnel_set(kernel_flock, funnel_state); return (error); overfull: @@ -2087,7 +2436,6 @@ ttwrite(struct tty *tp, struct uio *uio, int flag) } if (flag & IO_NDELAY) { uio_setresid(uio, (uio_resid(uio) + cc)); - thread_funnel_set(kernel_flock, funnel_state); return (uio_resid(uio) == count ? EWOULDBLOCK : 0); } SET(tp->t_state, TS_SO_OLOWAT); @@ -2100,9 +2448,12 @@ ttwrite(struct tty *tp, struct uio *uio, int flag) goto loop; } + /* * Rubout one character from the rawq of tp * as cleanly as possible. + * + * Locks: Assumes tty_lock() is held prior to calling. */ static void ttyrub(int c, struct tty *tp) @@ -2111,6 +2462,8 @@ ttyrub(int c, struct tty *tp) int savecol; int tabc; + TTY_LOCK_OWNED(tp); /* debug assert */ + if (!ISSET(tp->t_lflag, ECHO) || ISSET(tp->t_lflag, EXTPROC)) return; CLR(tp->t_lflag, FLUSHO); @@ -2182,12 +2535,17 @@ ttyrub(int c, struct tty *tp) --tp->t_rocount; } + /* * Back over count characters, erasing them. + * + * Locks: Assumes tty_lock() is held prior to calling. */ static void ttyrubo(struct tty *tp, int count) { + TTY_LOCK_OWNED(tp); /* debug assert */ + while (count-- > 0) { (void)ttyoutput('\b', tp); (void)ttyoutput(' ', tp); @@ -2195,10 +2553,13 @@ ttyrubo(struct tty *tp, int count) } } + /* * ttyretype -- * Reprint the rawq line. Note, it is assumed that c_cc has already * been checked. + * + * Locks: Assumes tty_lock() is held prior to calling. */ static void ttyretype(struct tty *tp) @@ -2206,6 +2567,8 @@ ttyretype(struct tty *tp) u_char *cp; int c; + TTY_LOCK_OWNED(tp); /* debug assert */ + /* Echo the reprint character. */ if (tp->t_cc[VREPRINT] != _POSIX_VDISABLE) ttyecho(tp->t_cc[VREPRINT], tp); @@ -2227,12 +2590,17 @@ ttyretype(struct tty *tp) tp->t_rocol = 0; } + /* * Echo a typed character to the terminal. + * + * Locks: Assumes tty_lock() is held prior to calling. */ static void ttyecho(int c, struct tty *tp) { + TTY_LOCK_OWNED(tp); /* debug assert */ + if (!ISSET(tp->t_state, TS_CNTTB)) CLR(tp->t_lflag, FLUSHO); if ((!ISSET(tp->t_lflag, ECHO) && @@ -2252,26 +2620,52 @@ ttyecho(int c, struct tty *tp) (void)ttyoutput(c, tp); } + /* * Wake up any readers on a tty. + * + * Locks: Assumes tty_lock() is held for the call. */ void ttwakeup(struct tty *tp) { + TTY_LOCK_OWNED(tp); /* debug assert */ + selwakeup(&tp->t_rsel); - if (ISSET(tp->t_state, TS_ASYNC)) + KNOTE(&tp->t_rsel.si_note, 1); + if (ISSET(tp->t_state, TS_ASYNC)) { + /* + * XXX: Callers may not revalidate it the tty is closed + * XXX: out from under them by another thread, but we do + * XXX: not support queued signals. This should be safe, + * XXX: since the process we intend to wakeup is in the + * XXX: process group, and will wake up because of the + * XXX: signal anyway. + */ + tty_unlock(tp); tty_pgsignal(tp, SIGIO, 1); + tty_lock(tp); + } wakeup(TSA_HUP_OR_INPUT(tp)); } + /* + * ttwwakeup (LDISC) + * * Wake up any writers on a tty. + * + * Locks: Assumes tty_lock() is held prior to calling. */ void ttwwakeup(struct tty *tp) { - if (tp->t_outq.c_cc <= tp->t_lowat) + TTY_LOCK_OWNED(tp); /* debug assert */ + + if (tp->t_outq.c_cc <= tp->t_lowat) { selwakeup(&tp->t_wsel); + KNOTE(&tp->t_wsel.si_note, 1); + } if (ISSET(tp->t_state, TS_BUSY | TS_SO_OCOMPLETE) == TS_SO_OCOMPLETE && tp->t_outq.c_cc == 0) { CLR(tp->t_state, TS_SO_OCOMPLETE); @@ -2284,9 +2678,13 @@ ttwwakeup(struct tty *tp) } } + /* * Look up a code for a specified speed in a conversion table; * used by drivers to map software speed values to hardware parameters. + * + * Notes: No locks are assumed for this function; it does not + * directly access struct tty. */ int ttspeedtab(int speed, struct speedtab *table) @@ -2297,12 +2695,14 @@ ttspeedtab(int speed, struct speedtab *table) return (-1); } + /* * Set tty hi and low water marks. * * Try to arrange the dynamics so there's about one second * from hi to low water. * + * Locks: Assumes tty_lock() is held prior to calling. */ void ttsetwater(struct tty *tp) @@ -2310,6 +2710,8 @@ ttsetwater(struct tty *tp) int cps; unsigned int x; + TTY_LOCK_OWNED(tp); /* debug assert */ + #define CLAMP(x, h, l) ((x) > h ? h : ((x) < l) ? l : (x)) cps = tp->t_ospeed / 10; @@ -2329,11 +2731,14 @@ extern kern_return_t thread_info_internal(thread_t thread, thread_info_t thread_info_out, mach_msg_type_number_t *thread_info_count); + /* * Report on state of foreground process group. + * + * Locks: Assumes tty_lock() is held prior to calling. */ void -ttyinfo(struct tty *tp) +ttyinfo_locked(struct tty *tp) { int load; thread_t thread; @@ -2348,6 +2753,7 @@ ttyinfo(struct tty *tp) mach_msg_type_number_t mmtn = THREAD_BASIC_INFO_COUNT; struct pgrp * pg; + TTY_LOCK_OWNED(tp); /* debug assert */ if (ttycheckoutq(tp,0) == 0) return; @@ -2394,7 +2800,10 @@ ttyinfo(struct tty *tp) } } pgrp_unlock(pg); + /* SAFE: All callers drop the lock on return */ + tty_unlock(tp); pg_rele(pg); + tty_lock(tp); pick = proc_find(pickpid); if (pick == PROC_NULL) @@ -2434,7 +2843,7 @@ ttyinfo(struct tty *tp) proc_rele(pick); /* Print command, pid, state, utime, and stime */ - ttyprintf(tp, " cmd: %s %d %s %ld.%02ldu %ld.%02lds\n", + ttyprintf(tp, " cmd: %s %d %s %ld.%02du %ld.%02ds\n", pick->p_comm, pick->p_pid, state, @@ -2443,6 +2852,7 @@ ttyinfo(struct tty *tp) tp->t_rocount = 0; } + /* * Returns 1 if p2 is "better" than p1 * @@ -2461,6 +2871,11 @@ ttyinfo(struct tty *tp) #define ONLYB 1 #define BOTH 3 +/* + * Locks: pgrp_lock(p2) held on call to this function + * tty_lock(tp) for p2's tty, for which p2 is the foreground + * process, held on call to this function + */ static int proc_compare(proc_t p1, proc_t p2) { @@ -2513,12 +2928,19 @@ proc_compare(proc_t p1, proc_t p2) return (p2->p_pid > p1->p_pid); /* tie - return highest pid */ } + /* * Output char to tty; console putchar style. + * + * Locks: Assumes tty_lock() is held prior to calling. + * + * Notes: Only ever called from putchar() in subr_prf.c */ int tputchar(int c, struct tty *tp) { + TTY_LOCK_OWNED(tp); /* debug assert */ + if (!ISSET(tp->t_state, TS_CONNECTED)) { return (-1); } @@ -2529,9 +2951,33 @@ tputchar(int c, struct tty *tp) return (0); } + /* + * ttysleep + * + * Sleep on a wait channel waiting for an interrupt or a condition to come + * true so that we are woken up. + * + * Parameters: tp Tty going to sleep + * chan The sleep channel (usually an address + * of a structure member) + * pri priority and flags + * wmesg Wait message; shows up in debugger, + * should show up in "ps", but doesn't + * timo Timeout for the sleep + * + * Returns: 0 Condition came true + * ERESTART Upper layer must redrive the call; + * this is usually done by the Libc + * stub in user space + * msleep0:EINTR Interrupted (usually a signal) + * msleep0:ERESTART Interrupted (usually a masked signal) + * msleep0:EWOULDBLOCK Timeout (timo) already expired + * + * Locks: Assumes tty_lock() is held prior to calling. + * * Sleep on chan, returning ERESTART if tty changed while we napped and - * returning any errors (e.g. EINTR/EWOULDBLOCK) reported by tsleep. If + * returning any errors (e.g. EINTR/EWOULDBLOCK) reported by msleep0. If * the tty is revoked, restarting a pending call will redo validation done * at the start of the call. */ @@ -2541,13 +2987,17 @@ ttysleep(struct tty *tp, void *chan, int pri, const char *wmesg, int timo) int error; int gen; + TTY_LOCK_OWNED(tp); + gen = tp->t_gen; - error = tsleep(chan, pri, wmesg, timo); + /* Use of msleep0() avoids conversion timo/timespec/timo */ + error = msleep0(chan, &tp->t_lock, pri, wmesg, timo, (int (*)(int))0); if (error) return (error); return (tp->t_gen == gen ? 0 : ERESTART); } + /* * Allocate a tty structure and its associated buffers. * @@ -2555,6 +3005,9 @@ ttysleep(struct tty *tp, void *chan, int pri, const char *wmesg, int timo) * * Returns: !NULL Address of new struct tty * NULL Error ("ENOMEM") + * + * Locks: The tty_lock() of the returned tty is not held when it + * is returned. */ struct tty * ttymalloc(void) @@ -2568,26 +3021,40 @@ ttymalloc(void) clalloc(&tp->t_canq, TTYCLSIZE, 1); /* output queue doesn't need quoting */ clalloc(&tp->t_outq, TTYCLSIZE, 0); + lck_mtx_init(&tp->t_lock, tty_lck_grp, tty_lck_attr); } return(tp); } + /* * Free a tty structure and its buffers. + * + * Locks: The tty_lock() is assumed to not be held at the time of + * the free; this functions destroys the mutex. */ void ttyfree(struct tty *tp) { + TTY_LOCK_NOTOWNED(tp); /* debug assert */ + clfree(&tp->t_rawq); clfree(&tp->t_canq); clfree(&tp->t_outq); + lck_mtx_destroy(&tp->t_lock, tty_lck_grp); FREE(tp, M_TTYS); } + +/* + * Locks: Assumes tty_lock() is held prior to calling. + */ int isbackground(proc_t p, struct tty *tp) { - return (isctty(p, tp) && (p->p_pgrp != tp->t_pgrp)); + TTY_LOCK_OWNED(tp); + + return (tp->t_session != NULL && p->p_pgrp != NULL && (p->p_pgrp != tp->t_pgrp) && isctty_sp(p, tp, p->p_pgrp->pg_session)); } static int diff --git a/bsd/kern/tty_compat.c b/bsd/kern/tty_compat.c index e5a53b56c..6cab324ac 100644 --- a/bsd/kern/tty_compat.c +++ b/bsd/kern/tty_compat.c @@ -62,7 +62,15 @@ */ /* - * mapping routines for old line discipline (yuck) + * Compatibility routines for BSD 4.3 tty ioctl() commands + * + * The only function externalized from this file is ttcompat() and it is + * externalized as private extern to prevent exporting of the symbol when + * KEXTs link against the kernel. + * + * Locks: All functions in this file assume that the tty_lock() + * is held on the tty structure before these functions are + * called. */ #include @@ -77,9 +85,6 @@ #include #include -/* NeXT Move define down here cause COMPAT_43_TTY not valid earlier */ -#if COMPAT_43_TTY || defined(COMPAT_SUNOS) - static int ttcompatgetflags(struct tty *tp); static void ttcompatsetflags(struct tty *tp, struct termios *t); static void ttcompatsetlflags(struct tty *tp, struct termios *t); @@ -152,6 +157,7 @@ ttcompatspeedtab(int speed, struct speedtab *table) return (1); /* 50, min and not hangup */ } + /* * ttsetcompat * @@ -167,8 +173,8 @@ ttcompatspeedtab(int speed, struct speedtab *table) * TIOCLSET -> TIOCSETA * * The converted command argument and potentially modified 'term' - * argument are returned to the caller, which will then call ttioctl(), - * if this function returns successfully. + * argument are returned to ttcompat(), which will then call + * ttioctl_locked(), if this function returns successfully. * * Parameters struct tty *tp The tty on which the operation is * being performed. @@ -192,11 +198,16 @@ ttcompatspeedtab(int speed, struct speedtab *table) * TIOCLBIC, or TIOCLSET. * * All other tp fields will remain unmodifed, since the struct - * termious is a local stack copy from ttcompat(), and not the - * real thing. A subsequent call to ttioctl() in ttcompat(), - * however, may result in subsequent changes. + * termios is a local stack copy from ttcompat(), and not the + * real thing. A subsequent call to ttioctl_locked() in + * ttcompat(), however, may result in subsequent changes. + * + * WARNING: This compatibility code is not 6/432 clean; it will only + * work for 32 bit processes on 32 bit kernels or 64 bit + * processes on 64 bit kernels. We are not addressing this + * due to . */ -__private_extern__ int +static int ttsetcompat(struct tty *tp, u_long *com, caddr_t data, struct termios *term) { switch (*com) { @@ -314,8 +325,8 @@ ttsetcompat(struct tty *tp, u_long *com, caddr_t data, struct termios *term) * ttcompat * * Description: For 'set' commands, convert the command and arguments as - * necessary, and call ttioctl(), returning the result as - * our result; for 'get' commands, obtain the requested data + * necessary, and call ttioctl_locked(), returning the result + * as our result; for 'get' commands, obtain the requested data * from the appropriate source, and return it in the expected * format. If the command is not recognized, return EINVAL. * @@ -336,12 +347,12 @@ ttsetcompat(struct tty *tp, u_long *com, caddr_t data, struct termios *term) * terminal with no associated session, * or for which there is a session, but * no session leader. - * EIOCTL If the command cannot be handled at + * ENOTTY If the command cannot be handled at * this layer, this will be returned. - * * Any value returned by ttioctl(), if a - * set command is requested. + * * Any value returned by ttioctl_locked(), + * if a set command is requested. * - * NOTES: The process pointer may be a proxy on whose behalf we are + * Notes: The process pointer may be a proxy on whose behalf we are * operating, so it is not safe to simply use current_process() * instead. */ @@ -368,7 +379,7 @@ ttcompat(struct tty *tp, u_long com, caddr_t data, int flag, struct proc *p) term = tp->t_termios; if ((error = ttsetcompat(tp, &com, data, &term)) != 0) return error; - return ttioctl(tp, com, (caddr_t) &term, flag, p); + return ttioctl_locked(tp, com, (caddr_t) &term, flag, p); } case TIOCGETP: /* @@ -450,7 +461,7 @@ ttcompat(struct tty *tp, u_long com, caddr_t data, int flag, struct proc *p) { int ldisczero = 0; - return (ttioctl(tp, TIOCSETD, + return (ttioctl_locked(tp, TIOCSETD, *(int *)data == 2 ? (caddr_t)&ldisczero : data, flag, p)); } @@ -459,7 +470,7 @@ ttcompat(struct tty *tp, u_long com, caddr_t data, int flag, struct proc *p) * Become the console device. */ *(int *)data = 1; - return (ttioctl(tp, TIOCCONS, data, flag, p)); + return (ttioctl_locked(tp, TIOCCONS, data, flag, p)); case TIOCGSID: /* @@ -741,4 +752,3 @@ ttcompatsetlflags(struct tty *tp, struct termios *t) t->c_lflag = lflag; t->c_cflag = cflag; } -#endif /* COMPAT_43_TTY || COMPAT_SUNOS */ diff --git a/bsd/kern/tty_conf.c b/bsd/kern/tty_conf.c index 44f6b12d5..585af4b71 100644 --- a/bsd/kern/tty_conf.c +++ b/bsd/kern/tty_conf.c @@ -100,12 +100,8 @@ struct linesw linesw[MAXLDISC] = l_noioctl, ttyinput, ttwwakeup, ttymodem }, NODISC(1), /* 1- defunct */ /* 2- NTTYDISC */ -#if COMPAT_43_TTY { ttyopen, ttylclose, ttread, ttwrite, l_noioctl, ttyinput, ttwwakeup, ttymodem }, -#else - NODISC(2), -#endif NODISC(3), /* TABLDISC */ NODISC(4), /* SLIPDISC */ NODISC(5), /* PPPDISC */ diff --git a/bsd/kern/tty_ptmx.c b/bsd/kern/tty_ptmx.c index 0f05583d5..d7c8edbf4 100644 --- a/bsd/kern/tty_ptmx.c +++ b/bsd/kern/tty_ptmx.c @@ -74,7 +74,7 @@ #include #include #include -#include +#include #include #include #include /* _devfs_setattr() */ @@ -307,6 +307,8 @@ static struct _ptmx_ioctl_state { * * Returns: NULL Did not exist/could not create * !NULL structure corresponding minor number + * + * Locks: tty_lock() on ptmx_ioctl->pt_tty NOT held on entry or exit. */ static struct ptmx_ioctl * ptmx_get_ioctl(int minor, int open_flag) @@ -395,6 +397,9 @@ ptmx_get_ioctl(int minor, int open_flag) makedev(ptsd_major, minor), DEVFS_CHAR, UID_ROOT, GID_TTY, 0620, PTSD_TEMPLATE, minor); + if (_state.pis_ioctl_list[minor]->pt_devhandle == NULL) { + printf("devfs_make_node() call failed for ptmx_get_ioctl()!!!!\n"); + } } else if (open_flag & PF_OPEN_S) { DEVFS_LOCK(); _state.pis_ioctl_list[minor]->pt_flags |= PF_OPEN_S; @@ -403,22 +408,15 @@ ptmx_get_ioctl(int minor, int open_flag) return (_state.pis_ioctl_list[minor]); } +/* + * Locks: tty_lock() of old_ptmx_ioctl->pt_tty NOT held for this call. + */ static int ptmx_free_ioctl(int minor, int open_flag) { struct ptmx_ioctl *old_ptmx_ioctl = NULL; DEVFS_LOCK(); -#if 5161374 - /* - * We have to check after taking the DEVFS_LOCK, since the pointer - * is protected by the lock - */ - if (_state.pis_ioctl_list[minor] == NULL) { - DEVFS_UNLOCK(); - return (ENXIO); - } -#endif /* 5161374 */ _state.pis_ioctl_list[minor]->pt_flags &= ~(open_flag); /* @@ -514,21 +512,20 @@ ptsd_open(dev_t dev, int flag, __unused int devtype, __unused proc_t p) struct tty *tp; struct ptmx_ioctl *pti; int error; - boolean_t funnel_state; if ((pti = ptmx_get_ioctl(minor(dev), 0)) == NULL) { return (ENXIO); } - tp = pti->pt_tty; if (!(pti->pt_flags & PF_UNLOCKED)) { return (EAGAIN); } - funnel_state = thread_funnel_set(kernel_flock, TRUE); + tp = pti->pt_tty; + tty_lock(tp); if ((tp->t_state & TS_ISOPEN) == 0) { - ttychars(tp); /* Set up default chars */ + termioschars(&tp->t_termios); /* Set up default chars */ tp->t_iflag = TTYDEF_IFLAG; tp->t_oflag = TTYDEF_OFLAG; tp->t_lflag = TTYDEF_LFLAG; @@ -555,7 +552,7 @@ ptsd_open(dev_t dev, int flag, __unused int devtype, __unused proc_t p) if (error == 0) ptmx_wakeup(tp, FREAD|FWRITE); out: - (void) thread_funnel_set(kernel_flock, funnel_state); + tty_unlock(tp); return (error); } @@ -565,7 +562,6 @@ ptsd_close(dev_t dev, int flag, __unused int mode, __unused proc_t p) struct tty *tp; struct ptmx_ioctl *pti; int err; - boolean_t funnel_state; /* * This is temporary until the VSX conformance tests @@ -577,13 +573,9 @@ ptsd_close(dev_t dev, int flag, __unused int mode, __unused proc_t p) int save_timeout; #endif pti = ptmx_get_ioctl(minor(dev), 0); -#if 5161374 - if (pti == NULL || pti->pt_tty == NULL) - return(ENXIO); -#endif /* 5161374 */ - tp = pti->pt_tty; - funnel_state = thread_funnel_set(kernel_flock, TRUE); + tp = pti->pt_tty; + tty_lock(tp); #ifdef FIX_VSX_HANG save_timeout = tp->t_timeout; @@ -595,7 +587,8 @@ ptsd_close(dev_t dev, int flag, __unused int mode, __unused proc_t p) #ifdef FIX_VSX_HANG tp->t_timeout = save_timeout; #endif - (void) thread_funnel_set(kernel_flock, funnel_state); + + tty_unlock(tp); /* unconditional, just like ttyclose() */ ptmx_free_ioctl(minor(dev), PF_OPEN_S); @@ -612,18 +605,12 @@ ptsd_read(dev_t dev, struct uio *uio, int flag) struct ptmx_ioctl *pti; int error = 0; struct uthread *ut; - boolean_t funnel_state; struct pgrp * pg; pti = ptmx_get_ioctl(minor(dev), 0); -#if 5161374 - if (pti == NULL || pti->pt_tty == NULL) - return(ENXIO); -#endif /* 5161374 */ - tp = pti->pt_tty; - - funnel_state = thread_funnel_set(kernel_flock, TRUE); + tp = pti->pt_tty; + tty_lock(tp); ut = (struct uthread *)get_bsdthread_info(current_thread()); again: @@ -640,13 +627,20 @@ ptsd_read(dev_t dev, struct uio *uio, int flag) error = EIO; goto out; } + /* + * SAFE: We about to drop the lock ourselves by + * SAFE: erroring out or sleeping anyway. + */ + tty_unlock(tp); if (pg->pg_jobc == 0) { pg_rele(pg); + tty_lock(tp); error = EIO; goto out; } pgsignal(pg, SIGTTIN, 1); pg_rele(pg); + tty_lock(tp); error = ttysleep(tp, &lbolt, TTIPRI | PCATCH | PTTYBLOCK, "ptsd_bg", 0); @@ -654,8 +648,10 @@ ptsd_read(dev_t dev, struct uio *uio, int flag) goto out; } if (tp->t_canq.c_cc == 0) { - if (flag & IO_NDELAY) - return (EWOULDBLOCK); + if (flag & IO_NDELAY) { + error = EWOULDBLOCK; + goto out; + } error = ttysleep(tp, TSA_PTS_READ(tp), TTIPRI | PCATCH, "ptsd_in", 0); if (error) @@ -683,7 +679,7 @@ ptsd_read(dev_t dev, struct uio *uio, int flag) error = (*linesw[tp->t_line].l_read)(tp, uio, flag); ptmx_wakeup(tp, FWRITE); out: - (void) thread_funnel_set(kernel_flock, funnel_state); + tty_unlock(tp); return (error); } @@ -698,43 +694,35 @@ ptsd_write(dev_t dev, struct uio *uio, int flag) struct tty *tp; struct ptmx_ioctl *pti; int error; - boolean_t funnel_state; - - funnel_state = thread_funnel_set(kernel_flock, TRUE); pti = ptmx_get_ioctl(minor(dev), 0); -#if 5161374 - if (pti == NULL || pti->pt_tty == NULL) - return(ENXIO); -#endif /* 5161374 */ + tp = pti->pt_tty; + tty_lock(tp); if (tp->t_oproc == 0) error = EIO; else error = (*linesw[tp->t_line].l_write)(tp, uio, flag); - (void) thread_funnel_set(kernel_flock, funnel_state); + tty_unlock(tp); return (error); } /* * Start output on pseudo-tty. * Wake up process selecting or sleeping for input from controlling tty. + * + * t_oproc for this driver; called from within the line discipline + * + * Locks: Assumes tp is locked on entry, remains locked on exit */ static void ptsd_start(struct tty *tp) { struct ptmx_ioctl *pti; - boolean_t funnel_state; pti = ptmx_get_ioctl(minor(tp->t_dev), 0); -#if 5161374 - if (pti == NULL) - return; /* XXX ENXIO, but this function is void! */ -#endif /* 5161374 */ - - funnel_state = thread_funnel_set(kernel_flock, TRUE); if (tp->t_state & TS_TTSTOP) goto out; @@ -744,23 +732,18 @@ ptsd_start(struct tty *tp) } ptmx_wakeup(tp, FREAD); out: - (void) thread_funnel_set(kernel_flock, funnel_state); return; } +/* + * Locks: Assumes tty_lock() is held over this call. + */ static void ptmx_wakeup(struct tty *tp, int flag) { struct ptmx_ioctl *pti; - boolean_t funnel_state; pti = ptmx_get_ioctl(minor(tp->t_dev), 0); -#if 5161374 - if (pti == NULL) - return; /* XXX ENXIO, but this function is void! */ -#endif /* 5161374 */ - - funnel_state = thread_funnel_set(kernel_flock, TRUE); if (flag & FREAD) { selwakeup(&pti->pt_selr); @@ -770,7 +753,6 @@ ptmx_wakeup(struct tty *tp, int flag) selwakeup(&pti->pt_selw); wakeup(TSA_PTC_WRITE(tp)); } - (void) thread_funnel_set(kernel_flock, funnel_state); } FREE_BSDSTATIC int @@ -779,7 +761,6 @@ ptmx_open(dev_t dev, __unused int flag, __unused int devtype, __unused proc_t p) struct tty *tp; struct ptmx_ioctl *pti; int error = 0; - boolean_t funnel_state; pti = ptmx_get_ioctl(minor(dev), PF_OPEN_M); if (pti == NULL) { @@ -787,12 +768,13 @@ ptmx_open(dev_t dev, __unused int flag, __unused int devtype, __unused proc_t p) } else if (pti == (struct ptmx_ioctl*)-1) { return (EREDRIVEOPEN); } - tp = pti->pt_tty; - funnel_state = thread_funnel_set(kernel_flock, TRUE); + tp = pti->pt_tty; + tty_lock(tp); /* If master is open OR slave is still draining, pty is still busy */ if (tp->t_oproc || (tp->t_state & TS_ISOPEN)) { + tty_unlock(tp); /* * If master is closed, we are the only reference, so we * need to clear the master open bit @@ -800,7 +782,7 @@ ptmx_open(dev_t dev, __unused int flag, __unused int devtype, __unused proc_t p) if (!tp->t_oproc) ptmx_free_ioctl(minor(dev), PF_OPEN_M); error = EBUSY; - goto out; + goto err; } tp->t_oproc = ptsd_start; CLR(tp->t_state, TS_ZOMBIE); @@ -810,8 +792,8 @@ ptmx_open(dev_t dev, __unused int flag, __unused int devtype, __unused proc_t p) (void)(*linesw[tp->t_line].l_modem)(tp, 1); tp->t_lflag &= ~EXTPROC; -out: - (void) thread_funnel_set(kernel_flock, funnel_state); + tty_unlock(tp); +err: return (error); } @@ -820,16 +802,11 @@ ptmx_close(dev_t dev, __unused int flags, __unused int fmt, __unused proc_t p) { struct tty *tp; struct ptmx_ioctl *pti; - boolean_t funnel_state; pti = ptmx_get_ioctl(minor(dev), 0); -#if 5161374 - if (pti == NULL || pti->pt_tty == NULL) - return(ENXIO); -#endif /* 5161374 */ - tp = pti->pt_tty; - funnel_state = thread_funnel_set(kernel_flock, TRUE); + tp = pti->pt_tty; + tty_lock(tp); (void)(*linesw[tp->t_line].l_modem)(tp, 0); @@ -849,7 +826,7 @@ ptmx_close(dev_t dev, __unused int flags, __unused int fmt, __unused proc_t p) tp->t_oproc = 0; /* mark closed */ - (void) thread_funnel_set(kernel_flock, funnel_state); + tty_unlock(tp); ptmx_free_ioctl(minor(dev), PF_OPEN_M); @@ -863,16 +840,11 @@ ptmx_read(dev_t dev, struct uio *uio, int flag) struct ptmx_ioctl *pti; char buf[BUFSIZ]; int error = 0, cc; - boolean_t funnel_state; pti = ptmx_get_ioctl(minor(dev), 0); -#if 5161374 - if (pti == NULL || pti->pt_tty == NULL) - return(ENXIO); -#endif /* 5161374 */ - tp = pti->pt_tty; - funnel_state = thread_funnel_set(kernel_flock, TRUE); + tp = pti->pt_tty; + tty_lock(tp); /* * We want to block until the slave @@ -911,7 +883,7 @@ ptmx_read(dev_t dev, struct uio *uio, int flag) error = EWOULDBLOCK; goto out; } - error = tsleep(TSA_PTC_READ(tp), TTIPRI | PCATCH, "ptmx_in", 0); + error = ttysleep(tp, TSA_PTC_READ(tp), TTIPRI | PCATCH, "ptmx_in", 0); if (error) goto out; } @@ -926,24 +898,22 @@ ptmx_read(dev_t dev, struct uio *uio, int flag) (*linesw[tp->t_line].l_start)(tp); out: - (void) thread_funnel_set(kernel_flock, funnel_state); + tty_unlock(tp); return (error); } +/* + * Line discipline callback + * + * Locks: tty_lock() is assumed held on entry and exit. + */ FREE_BSDSTATIC int ptsd_stop(struct tty *tp, int flush) { struct ptmx_ioctl *pti; int flag; - boolean_t funnel_state; pti = ptmx_get_ioctl(minor(tp->t_dev), 0); -#if 5161374 - if (pti == NULL) - return(ENXIO); -#endif /* 5161374 */ - - funnel_state = thread_funnel_set(kernel_flock, TRUE); /* note: FLUSHREAD and FLUSHWRITE already ok */ if (flush == 0) { @@ -960,8 +930,6 @@ ptsd_stop(struct tty *tp, int flush) flag |= FREAD; ptmx_wakeup(tp, flag); - (void) thread_funnel_set(kernel_flock, funnel_state); - return (0); } @@ -1019,33 +987,38 @@ ptsd_select(dev_t dev, int rw, void *wql, proc_t p) { struct ptmx_ioctl *pti; struct tty *tp; + int retval = 0; pti = ptmx_get_ioctl(minor(dev), 0); -#if 5161374 - if (pti == NULL || pti->pt_tty == NULL) - return(ENXIO); -#endif /* 5161374 */ + tp = pti->pt_tty; if (tp == NULL) return (ENXIO); + tty_lock(tp); + switch (rw) { case FREAD: - if (ttnread(tp) > 0 || ISSET(tp->t_state, TS_ZOMBIE)) - return(1); + if (ttnread(tp) > 0 || ISSET(tp->t_state, TS_ZOMBIE)) { + retval = 1; + break; + } selrecord(p, &tp->t_rsel, wql); break; case FWRITE: if ((tp->t_outq.c_cc <= tp->t_lowat && ISSET(tp->t_state, TS_CONNECTED)) || ISSET(tp->t_state, TS_ZOMBIE)) { - return (1); + retval = 1; + break; } selrecord(p, &tp->t_wsel, wql); break; } - return (0); + + tty_unlock(tp); + return (retval); } FREE_BSDSTATIC int @@ -1054,23 +1027,17 @@ ptmx_select(dev_t dev, int rw, void *wql, proc_t p) struct tty *tp; struct ptmx_ioctl *pti; int retval = 0; - boolean_t funnel_state; pti = ptmx_get_ioctl(minor(dev), 0); -#if 5161374 - if (pti == NULL || pti->pt_tty == NULL) - return(ENXIO); -#endif /* 5161374 */ - tp = pti->pt_tty; - funnel_state = thread_funnel_set(kernel_flock, TRUE); + tp = pti->pt_tty; + tty_lock(tp); if ((tp->t_state & TS_CONNECTED) == 0) { retval = 1; goto out; } switch (rw) { - case FREAD: /* * Need to block timeouts (ttrstart). @@ -1078,7 +1045,7 @@ ptmx_select(dev_t dev, int rw, void *wql, proc_t p) if ((tp->t_state&TS_ISOPEN) && tp->t_outq.c_cc && (tp->t_state&TS_TTSTOP) == 0) { retval = 1; - goto out; + break; } /* FALLTHROUGH */ @@ -1087,27 +1054,26 @@ ptmx_select(dev_t dev, int rw, void *wql, proc_t p) ((pti->pt_flags & PF_PKT && pti->pt_send) || (pti->pt_flags & PF_UCNTL && pti->pt_ucntl))) { retval = 1; - goto out; + break; } selrecord(p, &pti->pt_selr, wql); break; - case FWRITE: if (tp->t_state&TS_ISOPEN) { if (pti->pt_flags & PF_REMOTE) { if (tp->t_canq.c_cc == 0) { retval = 1; - goto out; + break; } } else { if (tp->t_rawq.c_cc + tp->t_canq.c_cc < TTYHOG-2) { retval = 1; - goto out; + break; } if (tp->t_canq.c_cc == 0 && (tp->t_lflag&ICANON)) { retval = 1; - goto out; + break; } } } @@ -1116,7 +1082,7 @@ ptmx_select(dev_t dev, int rw, void *wql, proc_t p) } out: - (void) thread_funnel_set(kernel_flock, funnel_state); + tty_unlock(tp); return (retval); } @@ -1142,16 +1108,11 @@ ptmx_write(dev_t dev, struct uio *uio, int flag) u_char locbuf[BUFSIZ]; int wcnt = 0; int error = 0; - boolean_t funnel_state; pti = ptmx_get_ioctl(minor(dev), 0); -#if 5161374 - if (pti == NULL || pti->pt_tty == NULL) - return(ENXIO); -#endif /* 5161374 */ - tp = pti->pt_tty; - funnel_state = thread_funnel_set(kernel_flock, TRUE); + tp = pti->pt_tty; + tty_lock(tp); again: if ((tp->t_state&TS_ISOPEN) == 0) @@ -1224,9 +1185,11 @@ ptmx_write(dev_t dev, struct uio *uio, int flag) } cc = 0; } + out: - (void) thread_funnel_set(kernel_flock, funnel_state); + tty_unlock(tp); return (error); + block: /* * Come here to wait for slave to open, for space @@ -1245,7 +1208,7 @@ ptmx_write(dev_t dev, struct uio *uio, int flag) error = EWOULDBLOCK; goto out; } - error = tsleep(TSA_PTC_WRITE(tp), TTOPRI | PCATCH, "ptmx_out", 0); + error = ttysleep(tp, TSA_PTC_WRITE(tp), TTOPRI | PCATCH, "ptmx_out", 0); if (error) { /* adjust for data copied in but not written */ uio_setresid(uio, (uio_resid(uio) + cc)); @@ -1262,17 +1225,13 @@ cptyioctl(dev_t dev, u_long cmd, caddr_t data, int flag, proc_t p) struct ptmx_ioctl *pti; u_char *cc; int stop, error = 0; - boolean_t funnel_state; pti = ptmx_get_ioctl(minor(dev), 0); -#if 5161374 - if (pti == NULL || pti->pt_tty == NULL) - return(ENXIO); -#endif /* 5161374 */ + tp = pti->pt_tty; - cc = tp->t_cc; + tty_lock(tp); - funnel_state = thread_funnel_set(kernel_flock, TRUE); + cc = tp->t_cc; /* * IF CONTROLLER STTY THEN MUST FLUSH TO PREVENT A HANG. @@ -1341,14 +1300,15 @@ cptyioctl(dev_t dev, u_long cmd, caddr_t data, int flag, proc_t p) ttyflush(tp, FREAD|FWRITE); goto out; -#if COMPAT_43_TTY case TIOCSETP: case TIOCSETN: -#endif case TIOCSETD: - case TIOCSETA: - case TIOCSETAW: - case TIOCSETAF: + case TIOCSETA_32: + case TIOCSETAW_32: + case TIOCSETAF_32: + case TIOCSETA_64: + case TIOCSETAW_64: + case TIOCSETAF_64: ndflush(&tp->t_outq, tp->t_outq.c_cc); break; @@ -1360,10 +1320,18 @@ cptyioctl(dev_t dev, u_long cmd, caddr_t data, int flag, proc_t p) } if ((tp->t_lflag&NOFLSH) == 0) ttyflush(tp, FREAD|FWRITE); - tty_pgsignal(tp, *(unsigned int *)data, 1); if ((*(unsigned int *)data == SIGINFO) && ((tp->t_lflag&NOKERNINFO) == 0)) - ttyinfo(tp); + ttyinfo_locked(tp); + /* + * SAFE: All callers drop the lock on return and + * SAFE: the linesw[] will short circut this call + * SAFE: if the ioctl() is eaten before the lower + * SAFE: level code gets to see it. + */ + tty_unlock(tp); + tty_pgsignal(tp, *(unsigned int *)data, 1); + tty_lock(tp); goto out; case TIOCPTYGRANT: /* grantpt(3) */ @@ -1397,7 +1365,7 @@ cptyioctl(dev_t dev, u_long cmd, caddr_t data, int flag, proc_t p) } error = (*linesw[tp->t_line].l_ioctl)(tp, cmd, data, flag, p); if (error == ENOTTY) { - error = ttioctl(tp, cmd, data, flag, p); + error = ttioctl_locked(tp, cmd, data, flag, p); if (error == ENOTTY) { if (pti->pt_flags & PF_UCNTL && (cmd & ~0xff) == UIOCCMD(0)) { /* Process the UIOCMD ioctl group */ @@ -1427,20 +1395,19 @@ cptyioctl(dev_t dev, u_long cmd, caddr_t data, int flag, proc_t p) */ if ((tp->t_lflag&EXTPROC) && (pti->pt_flags & PF_PKT)) { switch(cmd) { - case TIOCSETA: - case TIOCSETAW: - case TIOCSETAF: -#if COMPAT_43_TTY + case TIOCSETA_32: + case TIOCSETAW_32: + case TIOCSETAF_32: + case TIOCSETA_64: + case TIOCSETAW_64: + case TIOCSETAF_64: case TIOCSETP: case TIOCSETN: -#endif -#if COMPAT_43_TTY || defined(COMPAT_SUNOS) case TIOCSETC: case TIOCSLTC: case TIOCLBIS: case TIOCLBIC: case TIOCLSET: -#endif pti->pt_send |= TIOCPKT_IOCTL; ptmx_wakeup(tp, FREAD); default: @@ -1465,6 +1432,173 @@ cptyioctl(dev_t dev, u_long cmd, caddr_t data, int flag, proc_t p) } } out: - (void) thread_funnel_set(kernel_flock, funnel_state); + tty_unlock(tp); return (error); } + +/* + * kqueue support. + */ +int ptsd_kqfilter(dev_t, struct knote *); +static void ptsd_kqops_read_detach(struct knote *); +static int ptsd_kqops_read_event(struct knote *, long); +static void ptsd_kqops_write_detach(struct knote *); +static int ptsd_kqops_write_event(struct knote *, long); + +static struct filterops ptsd_kqops_read = { + .f_isfd = 1, + .f_detach = ptsd_kqops_read_detach, + .f_event = ptsd_kqops_read_event, +}; +static struct filterops ptsd_kqops_write = { + .f_isfd = 1, + .f_detach = ptsd_kqops_write_detach, + .f_event = ptsd_kqops_write_event, +}; + +static void +ptsd_kqops_read_detach(struct knote *kn) +{ + struct ptmx_ioctl *pti; + struct tty *tp; + dev_t dev = (dev_t) kn->kn_hookid; + + pti = ptmx_get_ioctl(minor(dev), 0); + tp = pti->pt_tty; + + if (tp == NULL) + return; + + tty_lock(tp); + KNOTE_DETACH(&tp->t_rsel.si_note, kn); + tty_unlock(tp); + + kn->kn_hookid = 0; +} + +static int +ptsd_kqops_read_event(struct knote *kn, long hint) +{ + struct ptmx_ioctl *pti; + struct tty *tp; + dev_t dev = (dev_t) kn->kn_hookid; + int retval = 0; + + pti = ptmx_get_ioctl(minor(dev), 0); + tp = pti->pt_tty; + + if (tp == NULL) + return (ENXIO); + + if (hint == 0) + tty_lock(tp); + + kn->kn_data = ttnread(tp); + if (kn->kn_data > 0) { + retval = 1; + } + + if (ISSET(tp->t_state, TS_ZOMBIE)) { + kn->kn_flags |= EV_EOF; + retval = 1; + } + + if (hint == 0) + tty_unlock(tp); + return (retval); +} +static void +ptsd_kqops_write_detach(struct knote *kn) +{ + struct ptmx_ioctl *pti; + struct tty *tp; + dev_t dev = (dev_t) kn->kn_hookid; + + pti = ptmx_get_ioctl(minor(dev), 0); + tp = pti->pt_tty; + + if (tp == NULL) + return; + + tty_lock(tp); + KNOTE_DETACH(&tp->t_wsel.si_note, kn); + tty_unlock(tp); + + kn->kn_hookid = 0; +} + +static int +ptsd_kqops_write_event(struct knote *kn, long hint) +{ + struct ptmx_ioctl *pti; + struct tty *tp; + dev_t dev = (dev_t) kn->kn_hookid; + int retval = 0; + + pti = ptmx_get_ioctl(minor(dev), 0); + tp = pti->pt_tty; + + if (tp == NULL) + return (ENXIO); + + if (hint == 0) + tty_lock(tp); + + if ((tp->t_outq.c_cc <= tp->t_lowat) && + ISSET(tp->t_state, TS_CONNECTED)) { + kn->kn_data = tp->t_outq.c_cn - tp->t_outq.c_cc; + retval = 1; + } + + if (ISSET(tp->t_state, TS_ZOMBIE)) { + kn->kn_flags |= EV_EOF; + retval = 1; + } + + if (hint == 0) + tty_unlock(tp); + return (retval); + +} + +int +ptsd_kqfilter(dev_t dev, struct knote *kn) +{ + struct tty *tp = NULL; + struct ptmx_ioctl *pti = NULL; + int retval = 0; + + /* make sure we're talking about the right device type */ + if (cdevsw[major(dev)].d_open != ptsd_open) { + return (EINVAL); + } + + if ((pti = ptmx_get_ioctl(minor(dev), 0)) == NULL) { + return (ENXIO); + } + + tp = pti->pt_tty; + tty_lock(tp); + + kn->kn_hookid = dev; + + switch (kn->kn_filter) { + case EVFILT_READ: + kn->kn_fop = &ptsd_kqops_read; + SLIST_INIT(&tp->t_rsel.si_note); + KNOTE_ATTACH(&tp->t_rsel.si_note, kn); + break; + case EVFILT_WRITE: + kn->kn_fop = &ptsd_kqops_write; + SLIST_INIT(&tp->t_wsel.si_note); + KNOTE_ATTACH(&tp->t_wsel.si_note, kn); + break; + default: + retval = EINVAL; + break; + } + + tty_unlock(tp); + return (retval); +} + diff --git a/bsd/kern/tty_pty.c b/bsd/kern/tty_pty.c index ed2042b7f..8fb35c04a 100644 --- a/bsd/kern/tty_pty.c +++ b/bsd/kern/tty_pty.c @@ -74,7 +74,7 @@ #include #include #include -#include +#include #include #include #include @@ -179,16 +179,14 @@ ptsopen(dev_t dev, int flag, __unused int devtype, __unused struct proc *p) { struct tty *tp; int error; - boolean_t funnel_state; - funnel_state = thread_funnel_set(kernel_flock, TRUE); /* * You will see this sort of code coming up in diffs later both * the ttymalloc and the tp indirection. */ if (minor(dev) >= npty) { error = ENXIO; - goto out; + goto err; } if (!pt_tty[minor(dev)]) { /* @@ -197,12 +195,15 @@ ptsopen(dev_t dev, int flag, __unused int devtype, __unused struct proc *p) */ if ((tp = pt_tty[minor(dev)] = ttymalloc()) == NULL) { error = ENXIO; - goto out; + goto err; } } else tp = pt_tty[minor(dev)]; + + tty_lock(tp); + if ((tp->t_state & TS_ISOPEN) == 0) { - ttychars(tp); /* Set up default chars */ + termioschars(&tp->t_termios); /* Set up default chars */ tp->t_iflag = TTYDEF_IFLAG; tp->t_oflag = TTYDEF_OFLAG; tp->t_lflag = TTYDEF_LFLAG; @@ -226,8 +227,10 @@ ptsopen(dev_t dev, int flag, __unused int devtype, __unused struct proc *p) error = (*linesw[tp->t_line].l_open)(dev, tp); if (error == 0) ptcwakeup(tp, FREAD|FWRITE); + out: - (void) thread_funnel_set(kernel_flock, funnel_state); + tty_unlock(tp); +err: return (error); } @@ -236,7 +239,7 @@ ptsclose(dev_t dev, int flag, __unused int mode, __unused proc_t p) { struct tty *tp; int err; - boolean_t funnel_state; + /* * This is temporary until the VSX conformance tests * are fixed. They are hanging with a deadlock @@ -246,9 +249,9 @@ ptsclose(dev_t dev, int flag, __unused int mode, __unused proc_t p) #ifdef FIX_VSX_HANG int save_timeout; #endif - funnel_state = thread_funnel_set(kernel_flock, TRUE); tp = pt_tty[minor(dev)]; + tty_lock(tp); #ifdef FIX_VSX_HANG save_timeout = tp->t_timeout; tp->t_timeout = 60; @@ -259,7 +262,7 @@ ptsclose(dev_t dev, int flag, __unused int mode, __unused proc_t p) #ifdef FIX_VSX_HANG tp->t_timeout = save_timeout; #endif - (void) thread_funnel_set(kernel_flock, funnel_state); + tty_unlock(tp); return (err); } @@ -271,11 +274,9 @@ ptsread(dev_t dev, struct uio *uio, int flag) struct pt_ioctl *pti = &pt_ioctl[minor(dev)]; int error = 0; struct uthread *ut; - boolean_t funnel_state; struct pgrp *pg; - funnel_state = thread_funnel_set(kernel_flock, TRUE); - + tty_lock(tp); ut = (struct uthread *)get_bsdthread_info(current_thread()); again: @@ -294,13 +295,20 @@ ptsread(dev_t dev, struct uio *uio, int flag) error = EIO; goto out; } + /* + * SAFE: We about to drop the lock ourselves by + * SAFE: erroring out or sleeping anyway. + */ + tty_unlock(tp); if (pg->pg_jobc == 0) { pg_rele(pg); + tty_lock(tp); error = EIO; goto out; } pgsignal(pg, SIGTTIN, 1); pg_rele(pg); + tty_lock(tp); error = ttysleep(tp, &lbolt, TTIPRI | PCATCH | PTTYBLOCK, "ptsbg", 0); @@ -308,8 +316,10 @@ ptsread(dev_t dev, struct uio *uio, int flag) goto out; } if (tp->t_canq.c_cc == 0) { - if (flag & IO_NDELAY) - return (EWOULDBLOCK); + if (flag & IO_NDELAY) { + error = EWOULDBLOCK; + goto out; + } error = ttysleep(tp, TSA_PTS_READ(tp), TTIPRI | PCATCH, "ptsin", 0); if (error) @@ -337,7 +347,7 @@ ptsread(dev_t dev, struct uio *uio, int flag) error = (*linesw[tp->t_line].l_read)(tp, uio, flag); ptcwakeup(tp, FWRITE); out: - (void) thread_funnel_set(kernel_flock, funnel_state); + tty_unlock(tp); return (error); } @@ -351,31 +361,33 @@ ptswrite(dev_t dev, struct uio *uio, int flag) { struct tty *tp; int error; - boolean_t funnel_state; - - funnel_state = thread_funnel_set(kernel_flock, TRUE); tp = pt_tty[minor(dev)]; + + tty_lock(tp); + if (tp->t_oproc == 0) error = EIO; else error = (*linesw[tp->t_line].l_write)(tp, uio, flag); - (void) thread_funnel_set(kernel_flock, funnel_state); + tty_unlock(tp); + return (error); } /* * Start output on pseudo-tty. * Wake up process selecting or sleeping for input from controlling tty. + * + * t_oproc for this driver; called from within the line discipline + * + * Locks: Assumes tp is locked on entry, remains locked on exit */ static void ptsstart(struct tty *tp) { struct pt_ioctl *pti = &pt_ioctl[minor(tp->t_dev)]; - boolean_t funnel_state; - - funnel_state = thread_funnel_set(kernel_flock, TRUE); if (tp->t_state & TS_TTSTOP) goto out; @@ -385,17 +397,16 @@ ptsstart(struct tty *tp) } ptcwakeup(tp, FREAD); out: - (void) thread_funnel_set(kernel_flock, funnel_state); return; } +/* + * Locks: Assumes tty_lock() is held over this call. + */ static void ptcwakeup(struct tty *tp, int flag) { struct pt_ioctl *pti = &pt_ioctl[minor(tp->t_dev)]; - boolean_t funnel_state; - - funnel_state = thread_funnel_set(kernel_flock, TRUE); if (flag & FREAD) { selwakeup(&pti->pt_selr); @@ -405,7 +416,6 @@ ptcwakeup(struct tty *tp, int flag) selwakeup(&pti->pt_selw); wakeup(TSA_PTC_WRITE(tp)); } - (void) thread_funnel_set(kernel_flock, funnel_state); } __private_extern__ int @@ -414,9 +424,6 @@ ptcopen(dev_t dev, __unused int flag, __unused int devtype, __unused proc_t p) struct tty *tp; struct pt_ioctl *pti; int error = 0; - boolean_t funnel_state; - - funnel_state = thread_funnel_set(kernel_flock, TRUE); if (minor(dev) >= npty) { error = ENXIO; @@ -426,36 +433,39 @@ ptcopen(dev_t dev, __unused int flag, __unused int devtype, __unused proc_t p) tp = pt_tty[minor(dev)] = ttymalloc(); } else tp = pt_tty[minor(dev)]; + + tty_lock(tp); + /* If master is open OR slave is still draining, pty is still busy */ if (tp->t_oproc || (tp->t_state & TS_ISOPEN)) { error = EBUSY; - goto out; - } - tp->t_oproc = ptsstart; - CLR(tp->t_state, TS_ZOMBIE); + } else { + tp->t_oproc = ptsstart; + CLR(tp->t_state, TS_ZOMBIE); #ifdef sun4c - tp->t_stop = ptsstop; + tp->t_stop = ptsstop; #endif - (void)(*linesw[tp->t_line].l_modem)(tp, 1); - tp->t_lflag &= ~EXTPROC; - pti = &pt_ioctl[minor(dev)]; - pti->pt_flags = 0; - pti->pt_send = 0; - pti->pt_ucntl = 0; + (void)(*linesw[tp->t_line].l_modem)(tp, 1); + tp->t_lflag &= ~EXTPROC; + pti = &pt_ioctl[minor(dev)]; + pti->pt_flags = 0; + pti->pt_send = 0; + pti->pt_ucntl = 0; + } + + tty_unlock(tp); + out: - (void) thread_funnel_set(kernel_flock, funnel_state); return (error); } __private_extern__ int ptcclose(dev_t dev, __unused int flags, __unused int fmt, __unused proc_t p) { - struct tty *tp; - boolean_t funnel_state; + struct tty *tp = pt_tty[minor(dev)]; - funnel_state = thread_funnel_set(kernel_flock, TRUE); + tty_lock(tp); - tp = pt_tty[minor(dev)]; (void)(*linesw[tp->t_line].l_modem)(tp, 0); /* @@ -474,7 +484,8 @@ ptcclose(dev_t dev, __unused int flags, __unused int fmt, __unused proc_t p) tp->t_oproc = 0; /* mark closed */ - (void) thread_funnel_set(kernel_flock, funnel_state); + tty_unlock(tp); + return (0); } @@ -485,9 +496,8 @@ ptcread(dev_t dev, struct uio *uio, int flag) struct pt_ioctl *pti = &pt_ioctl[minor(dev)]; char buf[BUFSIZ]; int error = 0, cc; - boolean_t funnel_state; - funnel_state = thread_funnel_set(kernel_flock, TRUE); + tty_lock(tp); /* * We want to block until the slave @@ -526,7 +536,7 @@ ptcread(dev_t dev, struct uio *uio, int flag) error = EWOULDBLOCK; goto out; } - error = tsleep(TSA_PTC_READ(tp), TTIPRI | PCATCH, "ptcin", 0); + error = ttysleep(tp, TSA_PTC_READ(tp), TTIPRI | PCATCH, "ptcin", 0); if (error) goto out; } @@ -541,18 +551,23 @@ ptcread(dev_t dev, struct uio *uio, int flag) (*linesw[tp->t_line].l_start)(tp); out: - (void) thread_funnel_set(kernel_flock, funnel_state); + tty_unlock(tp); + return (error); } +/* + * Line discipline callback + * + * Locks: tty_lock() is assumed held on entry and exit. + */ __private_extern__ void ptsstop(struct tty *tp, int flush) { - struct pt_ioctl *pti = &pt_ioctl[minor(tp->t_dev)]; + struct pt_ioctl *pti; int flag; - boolean_t funnel_state; - funnel_state = thread_funnel_set(kernel_flock, TRUE); + pti = &pt_ioctl[minor(tp->t_dev)]; /* note: FLUSHREAD and FLUSHWRITE already ok */ if (flush == 0) { @@ -568,8 +583,6 @@ ptsstop(struct tty *tp, int flush) if (flush & FWRITE) flag |= FREAD; ptcwakeup(tp, flag); - - (void) thread_funnel_set(kernel_flock, funnel_state); } __private_extern__ int @@ -578,9 +591,8 @@ ptcselect(dev_t dev, int rw, void *wql, struct proc *p) struct tty *tp = pt_tty[minor(dev)]; struct pt_ioctl *pti = &pt_ioctl[minor(dev)]; int retval = 0; - boolean_t funnel_state; - funnel_state = thread_funnel_set(kernel_flock, TRUE); + tty_lock(tp); if ((tp->t_state & TS_CONNECTED) == 0) { retval = 1; @@ -633,7 +645,8 @@ ptcselect(dev_t dev, int rw, void *wql, struct proc *p) } out: - (void) thread_funnel_set(kernel_flock, funnel_state); + tty_unlock(tp); + return (retval); } @@ -647,9 +660,8 @@ ptcwrite(dev_t dev, struct uio *uio, int flag) int wcnt = 0; struct pt_ioctl *pti = &pt_ioctl[minor(dev)]; int error = 0; - boolean_t funnel_state; - funnel_state = thread_funnel_set(kernel_flock, TRUE); + tty_lock(tp); again: if ((tp->t_state&TS_ISOPEN) == 0) @@ -723,8 +735,10 @@ ptcwrite(dev_t dev, struct uio *uio, int flag) cc = 0; } out: - (void) thread_funnel_set(kernel_flock, funnel_state); + tty_unlock(tp); + return (error); + block: /* * Come here to wait for slave to open, for space @@ -743,7 +757,7 @@ ptcwrite(dev_t dev, struct uio *uio, int flag) error = EWOULDBLOCK; goto out; } - error = tsleep(TSA_PTC_WRITE(tp), TTOPRI | PCATCH, "ptcout", 0); + error = ttysleep(tp, TSA_PTC_WRITE(tp), TTOPRI | PCATCH, "ptcout", 0); if (error) { /* adjust for data copied in but not written */ uio_setresid(uio, (uio_resid(uio) + cc)); @@ -759,9 +773,8 @@ ptyioctl(dev_t dev, u_long cmd, caddr_t data, int flag, struct proc *p) struct pt_ioctl *pti = &pt_ioctl[minor(dev)]; u_char *cc = tp->t_cc; int stop, error = 0; - boolean_t funnel_state; - funnel_state = thread_funnel_set(kernel_flock, TRUE); + tty_lock(tp); /* * IF CONTROLLER STTY THEN MUST FLUSH TO PREVENT A HANG. @@ -830,14 +843,15 @@ ptyioctl(dev_t dev, u_long cmd, caddr_t data, int flag, struct proc *p) ttyflush(tp, FREAD|FWRITE); goto out; -#if COMPAT_43_TTY case TIOCSETP: case TIOCSETN: -#endif case TIOCSETD: - case TIOCSETA: - case TIOCSETAW: - case TIOCSETAF: + case TIOCSETA_32: + case TIOCSETAW_32: + case TIOCSETAF_32: + case TIOCSETA_64: + case TIOCSETAW_64: + case TIOCSETAF_64: ndflush(&tp->t_outq, tp->t_outq.c_cc); break; @@ -849,10 +863,18 @@ ptyioctl(dev_t dev, u_long cmd, caddr_t data, int flag, struct proc *p) } if ((tp->t_lflag&NOFLSH) == 0) ttyflush(tp, FREAD|FWRITE); - tty_pgsignal(tp, *(unsigned int *)data, 1); if ((*(unsigned int *)data == SIGINFO) && ((tp->t_lflag&NOKERNINFO) == 0)) - ttyinfo(tp); + ttyinfo_locked(tp); + /* + * SAFE: All callers drop the lock on return and + * SAFE: the linesw[] will short circut this call + * SAFE: if the ioctl() is eaten before the lower + * SAFE: level code gets to see it. + */ + tty_unlock(tp); + tty_pgsignal(tp, *(unsigned int *)data, 1); + tty_lock(tp); goto out; case TIOCPTYGRANT: /* grantpt(3) */ @@ -887,7 +909,7 @@ ptyioctl(dev_t dev, u_long cmd, caddr_t data, int flag, struct proc *p) } error = (*linesw[tp->t_line].l_ioctl)(tp, cmd, data, flag, p); if (error == ENOTTY) { - error = ttioctl(tp, cmd, data, flag, p); + error = ttioctl_locked(tp, cmd, data, flag, p); if (error == ENOTTY) { if (pti->pt_flags & PF_UCNTL && (cmd & ~0xff) == UIOCCMD(0)) { /* Process the UIOCMD ioctl group */ @@ -917,20 +939,19 @@ ptyioctl(dev_t dev, u_long cmd, caddr_t data, int flag, struct proc *p) */ if ((tp->t_lflag&EXTPROC) && (pti->pt_flags & PF_PKT)) { switch(cmd) { - case TIOCSETA: - case TIOCSETAW: - case TIOCSETAF: -#if COMPAT_43_TTY + case TIOCSETA_32: + case TIOCSETAW_32: + case TIOCSETAF_32: + case TIOCSETA_64: + case TIOCSETAW_64: + case TIOCSETAF_64: case TIOCSETP: case TIOCSETN: -#endif -#if COMPAT_43_TTY || defined(COMPAT_SUNOS) case TIOCSETC: case TIOCSLTC: case TIOCLBIS: case TIOCLBIC: case TIOCLSET: -#endif pti->pt_send |= TIOCPKT_IOCTL; ptcwakeup(tp, FREAD); default: @@ -955,6 +976,7 @@ ptyioctl(dev_t dev, u_long cmd, caddr_t data, int flag, struct proc *p) } } out: - (void) thread_funnel_set(kernel_flock, funnel_state); + tty_unlock(tp); + return (error); } diff --git a/bsd/kern/tty_tty.c b/bsd/kern/tty_tty.c index 1c9882d96..29868fadc 100644 --- a/bsd/kern/tty_tty.c +++ b/bsd/kern/tty_tty.c @@ -158,7 +158,7 @@ cttyioctl(__unused dev_t dev, u_long cmd, caddr_t addr, int flag, proc_t p) if (cmd == TIOCNOTTY) { sessp = proc_session(p); if (!SESS_LEADER(p, sessp)) { - OSBitAndAtomic(~((uint32_t)P_CONTROLT), (UInt32 *)&p->p_flag); + OSBitAndAtomic(~((uint32_t)P_CONTROLT), &p->p_flag); if (sessp != SESSION_NULL) session_rele(sessp); error = 0; diff --git a/bsd/kern/ubc_subr.c b/bsd/kern/ubc_subr.c index df8047ea8..1f1f99d3e 100644 --- a/bsd/kern/ubc_subr.c +++ b/bsd/kern/ubc_subr.c @@ -55,6 +55,7 @@ #include #include #include +#include #include #include @@ -87,7 +88,7 @@ kern_return_t ubc_page_op_with_control( #if DIAGNOSTIC #if defined(assert) -#undef assert() +#undef assert #endif #define assert(cond) \ ((void) ((cond) ? 0 : panic("Assert failed: %s", # cond))) @@ -107,6 +108,9 @@ struct zone *ubc_info_zone; * CODESIGNING * Routines to navigate code signing data structures in the kernel... */ + +extern int cs_debug; + static boolean_t cs_valid_range( const void *start, @@ -141,6 +145,7 @@ enum { CSSLOT_CODEDIRECTORY = 0, /* slot index for CodeDirectory */ }; +static const uint32_t supportsScatter = 0x20100; // first version to support scatter option /* * Structure of an embedded-signature SuperBlob @@ -158,6 +163,12 @@ typedef struct __SuperBlob { /* followed by Blobs in no particular order as indicated by offsets in index */ } CS_SuperBlob; +struct Scatter { + uint32_t count; // number of pages; zero for sentinel (only) + uint32_t base; // first page number + uint64_t targetOffset; // offset in target + uint64_t spare; // reserved +}; /* * C form of a CodeDirectory. @@ -177,6 +188,8 @@ typedef struct __CodeDirectory { uint8_t spare1; /* unused (must be zero) */ uint8_t pageSize; /* log2(page size in bytes); 0 => infinite */ uint32_t spare2; /* unused (must be zero) */ + /* Version 0x20100 */ + uint32_t scatterOffset; /* offset of optional scatter vector */ /* followed by dynamic content as located by offset fields above */ } CS_CodeDirectory; @@ -219,6 +232,7 @@ CS_CodeDirectory *findCodeDirectory( */ cd = (const CS_CodeDirectory *) embedded; } + if (cd && cs_valid_range(cd, cd + 1, lower_bound, upper_bound) && cs_valid_range(cd, (const char *) cd + ntohl(cd->length), @@ -250,21 +264,83 @@ hashes( char *upper_bound) { const unsigned char *base, *top, *hash; - uint32_t nCodeSlots; + uint32_t nCodeSlots = ntohl(cd->nCodeSlots); assert(cs_valid_range(cd, cd + 1, lower_bound, upper_bound)); - base = (const unsigned char *)cd + ntohl(cd->hashOffset); - nCodeSlots = ntohl(cd->nCodeSlots); - top = base + nCodeSlots * SHA1_RESULTLEN; - if (!cs_valid_range(base, top, - lower_bound, upper_bound) || - page > nCodeSlots) { - return NULL; - } - assert(page < nCodeSlots); + if((ntohl(cd->version) >= supportsScatter) && (ntohl(cd->scatterOffset))) { + /* Get first scatter struct */ + const struct Scatter *scatter = (const struct Scatter*) + ((const char*)cd + ntohl(cd->scatterOffset)); + uint32_t hashindex=0, scount, sbase=0; + /* iterate all scatter structs */ + do { + if((const char*)scatter > (const char*)cd + ntohl(cd->length)) { + if(cs_debug) { + printf("CODE SIGNING: Scatter extends past Code Directory\n"); + } + return NULL; + } + + scount = ntohl(scatter->count); + uint32_t new_base = ntohl(scatter->base); + + /* last scatter? */ + if (scount == 0) { + return NULL; + } + + if((hashindex > 0) && (new_base <= sbase)) { + if(cs_debug) { + printf("CODE SIGNING: unordered Scatter, prev base %d, cur base %d\n", + sbase, new_base); + } + return NULL; /* unordered scatter array */ + } + sbase = new_base; + + /* this scatter beyond page we're looking for? */ + if (sbase > page) { + return NULL; + } + + if (sbase+scount >= page) { + /* Found the scatter struct that is + * referencing our page */ + + /* base = address of first hash covered by scatter */ + base = (const unsigned char *)cd + ntohl(cd->hashOffset) + + hashindex * SHA1_RESULTLEN; + /* top = address of first hash after this scatter */ + top = base + scount * SHA1_RESULTLEN; + if (!cs_valid_range(base, top, lower_bound, + upper_bound) || + hashindex > nCodeSlots) { + return NULL; + } + + break; + } + + /* this scatter struct is before the page we're looking + * for. Iterate. */ + hashindex+=scount; + scatter++; + } while(1); + + hash = base + (page - sbase) * SHA1_RESULTLEN; + } else { + base = (const unsigned char *)cd + ntohl(cd->hashOffset); + top = base + nCodeSlots * SHA1_RESULTLEN; + if (!cs_valid_range(base, top, lower_bound, upper_bound) || + page > nCodeSlots) { + return NULL; + } + assert(page < nCodeSlots); - hash = base + page * SHA1_RESULTLEN; + hash = base + page * SHA1_RESULTLEN; + } + if (!cs_valid_range(hash, hash + SHA1_RESULTLEN, lower_bound, upper_bound)) { hash = NULL; @@ -547,8 +623,10 @@ ubc_setsize(struct vnode *vp, off_t nsize) */ uip->ui_size = nsize; - if (nsize >= osize) /* Nothing more to do */ + if (nsize >= osize) { /* Nothing more to do */ + lock_vnode_and_post(vp, NOTE_EXTEND); return (1); /* return success */ + } /* * When the file shrinks, invalidate the pages beyond the @@ -585,6 +663,12 @@ ubc_setsize(struct vnode *vp, off_t nsize) lastpg += PAGE_SIZE_64; } if (olastpgend > lastpg) { + int flags; + + if (lastpg == 0) + flags = MEMORY_OBJECT_DATA_FLUSH_ALL; + else + flags = MEMORY_OBJECT_DATA_FLUSH; /* * invalidate the pages beyond the new EOF page * @@ -592,8 +676,7 @@ ubc_setsize(struct vnode *vp, off_t nsize) kret = memory_object_lock_request(control, (memory_object_offset_t)lastpg, (memory_object_size_t)(olastpgend - lastpg), NULL, NULL, - MEMORY_OBJECT_RETURN_NONE, MEMORY_OBJECT_DATA_FLUSH, - VM_PROT_NO_CHANGE); + MEMORY_OBJECT_RETURN_NONE, flags, VM_PROT_NO_CHANGE); if (kret != KERN_SUCCESS) printf("ubc_setsize: invalidate failed (error = %d)\n", kret); } @@ -844,7 +927,6 @@ ubc_setcred(struct vnode *vp, proc_t p) return (1); } - /* * ubc_getpager * @@ -1434,7 +1516,7 @@ ubc_isinuse_locked(struct vnode *vp, int busycount, int locked) if (!locked) - vnode_lock(vp); + vnode_lock_spin(vp); if ((vp->v_usecount - vp->v_kusecount) > busycount) retval = 1; @@ -1470,7 +1552,7 @@ ubc_unmap(struct vnode *vp) struct ubc_info *uip; int need_rele = 0; int need_wakeup = 0; - + if (vnode_getwithref(vp)) return; @@ -1486,14 +1568,14 @@ ubc_unmap(struct vnode *vp) SET(uip->ui_flags, UI_MAPBUSY); if (ISSET(uip->ui_flags, UI_ISMAPPED)) { - CLR(uip->ui_flags, UI_ISMAPPED); + CLR(uip->ui_flags, UI_ISMAPPED); need_rele = 1; } vnode_unlock(vp); - + if (need_rele) { - (void) VNOP_MNOMAP(vp, vfs_context_current()); - vnode_rele(vp); + (void)VNOP_MNOMAP(vp, vfs_context_current()); + vnode_rele(vp); } vnode_lock_spin(vp); @@ -1506,7 +1588,7 @@ ubc_unmap(struct vnode *vp) vnode_unlock(vp); if (need_wakeup) - wakeup(&uip->ui_flags); + wakeup(&uip->ui_flags); } /* @@ -1735,39 +1817,58 @@ kern_return_t ubc_create_upl( struct vnode *vp, off_t f_offset, - long bufsize, + int bufsize, upl_t *uplp, upl_page_info_t **plp, int uplflags) { memory_object_control_t control; - mach_msg_type_number_t count; - int ubcflags; kern_return_t kr; + + if (plp != NULL) + *plp = NULL; + *uplp = NULL; if (bufsize & 0xfff) return KERN_INVALID_ARGUMENT; - if (uplflags & UPL_FOR_PAGEOUT) { + if (uplflags & (UPL_UBC_MSYNC | UPL_UBC_PAGEOUT | UPL_UBC_PAGEIN)) { + + if (uplflags & UPL_UBC_MSYNC) { + uplflags &= UPL_RET_ONLY_DIRTY; + + uplflags |= UPL_COPYOUT_FROM | UPL_CLEAN_IN_PLACE | + UPL_SET_INTERNAL | UPL_SET_LITE; + + } else if (uplflags & UPL_UBC_PAGEOUT) { + uplflags &= UPL_RET_ONLY_DIRTY; + + if (uplflags & UPL_RET_ONLY_DIRTY) + uplflags |= UPL_NOBLOCK; + + uplflags |= UPL_FOR_PAGEOUT | UPL_CLEAN_IN_PLACE | + UPL_COPYOUT_FROM | UPL_SET_INTERNAL | UPL_SET_LITE; + } else { + uplflags |= UPL_RET_ONLY_ABSENT | UPL_NOBLOCK | + UPL_NO_SYNC | UPL_CLEAN_IN_PLACE | + UPL_SET_INTERNAL | UPL_SET_LITE; + } + } else { uplflags &= ~UPL_FOR_PAGEOUT; - ubcflags = UBC_FOR_PAGEOUT; - } else - ubcflags = UBC_FLAGS_NONE; - control = ubc_getobject(vp, ubcflags); + if (uplflags & UPL_WILL_BE_DUMPED) { + uplflags &= ~UPL_WILL_BE_DUMPED; + uplflags |= (UPL_NO_SYNC|UPL_SET_INTERNAL); + } else + uplflags |= (UPL_NO_SYNC|UPL_CLEAN_IN_PLACE|UPL_SET_INTERNAL); + } + control = ubc_getobject(vp, UBC_FLAGS_NONE); if (control == MEMORY_OBJECT_CONTROL_NULL) return KERN_INVALID_ARGUMENT; - if (uplflags & UPL_WILL_BE_DUMPED) { - uplflags &= ~UPL_WILL_BE_DUMPED; - uplflags |= (UPL_NO_SYNC|UPL_SET_INTERNAL); - } else - uplflags |= (UPL_NO_SYNC|UPL_CLEAN_IN_PLACE|UPL_SET_INTERNAL); - count = 0; - - kr = memory_object_upl_request(control, f_offset, bufsize, uplp, NULL, &count, uplflags); - if (plp != NULL) - *plp = UPL_GET_INTERNAL_PAGE_LIST(*uplp); + kr = memory_object_upl_request(control, f_offset, bufsize, uplp, NULL, NULL, uplflags); + if (kr == KERN_SUCCESS && plp != NULL) + *plp = UPL_GET_INTERNAL_PAGE_LIST(*uplp); return kr; } @@ -1926,8 +2027,8 @@ ubc_upl_commit( kern_return_t ubc_upl_commit_range( upl_t upl, - vm_offset_t offset, - vm_size_t size, + upl_offset_t offset, + upl_size_t size, int flags) { upl_page_info_t *pl; @@ -2001,8 +2102,8 @@ ubc_upl_commit_range( kern_return_t ubc_upl_abort_range( upl_t upl, - vm_offset_t offset, - vm_size_t size, + upl_offset_t offset, + upl_size_t size, int abort_flags) { kern_return_t kr; @@ -2117,7 +2218,6 @@ static volatile SInt32 cs_blob_count = 0; static SInt32 cs_blob_size_peak = 0; static UInt32 cs_blob_size_max = 0; static SInt32 cs_blob_count_peak = 0; -extern int cs_debug; int cs_validation = 1; @@ -2201,9 +2301,9 @@ ubc_cs_blob_add( } if (memory_object_round_page(blob_size) != (memory_object_size_t) round_page(size)) { - printf("ubc_cs_blob_add: size mismatch 0x%llx 0x%x !?\n", - blob_size, size); - panic("XXX FBDP size mismatch 0x%llx 0x%x\n", blob_size, size); + printf("ubc_cs_blob_add: size mismatch 0x%llx 0x%lx !?\n", + blob_size, (size_t)size); + panic("XXX FBDP size mismatch 0x%llx 0x%lx\n", blob_size, (size_t)size); error = EINVAL; goto out; } @@ -2238,8 +2338,14 @@ ubc_cs_blob_add( blob->csb_flags = ntohl(cd->flags) | CS_VALID; blob->csb_end_offset = round_page(ntohl(cd->codeLimit)); - blob->csb_start_offset = (blob->csb_end_offset - - (ntohl(cd->nCodeSlots) * PAGE_SIZE)); + if((ntohl(cd->version) >= supportsScatter) && (ntohl(cd->scatterOffset))) { + const struct Scatter *scatter = (const struct Scatter*) + ((const char*)cd + ntohl(cd->scatterOffset)); + blob->csb_start_offset = ntohl(scatter->base) * PAGE_SIZE; + } else { + blob->csb_start_offset = (blob->csb_end_offset - + (ntohl(cd->nCodeSlots) * PAGE_SIZE)); + } /* compute the blob's SHA1 hash */ sha1_base = (const unsigned char *) cd; sha1_size = ntohl(cd->length); @@ -2354,12 +2460,12 @@ ubc_cs_blob_add( if (cs_blob_count > cs_blob_count_peak) { cs_blob_count_peak = cs_blob_count; /* XXX atomic ? */ } - OSAddAtomic(+blob->csb_mem_size, &cs_blob_size); - if (cs_blob_size > cs_blob_size_peak) { - cs_blob_size_peak = cs_blob_size; /* XXX atomic ? */ + OSAddAtomic((SInt32) +blob->csb_mem_size, &cs_blob_size); + if ((SInt32) cs_blob_size > cs_blob_size_peak) { + cs_blob_size_peak = (SInt32) cs_blob_size; /* XXX atomic ? */ } - if (blob->csb_mem_size > cs_blob_size_max) { - cs_blob_size_max = blob->csb_mem_size; + if ((UInt32) blob->csb_mem_size > cs_blob_size_max) { + cs_blob_size_max = (UInt32) blob->csb_mem_size; } if (cs_debug) { @@ -2471,7 +2577,7 @@ ubc_cs_free( } blob->csb_mem_handle = IPC_PORT_NULL; OSAddAtomic(-1, &cs_blob_count); - OSAddAtomic(-blob->csb_mem_size, &cs_blob_size); + OSAddAtomic((SInt32) -blob->csb_mem_size, &cs_blob_size); kfree(blob, sizeof (*blob)); } uip->cs_blobs = NULL; @@ -2484,7 +2590,20 @@ ubc_get_cs_blobs( struct ubc_info *uip; struct cs_blob *blobs; - vnode_lock_spin(vp); + /* + * No need to take the vnode lock here. The caller must be holding + * a reference on the vnode (via a VM mapping or open file descriptor), + * so the vnode will not go away. The ubc_info stays until the vnode + * goes away. And we only modify "blobs" by adding to the head of the + * list. + * The ubc_info could go away entirely if the vnode gets reclaimed as + * part of a forced unmount. In the case of a code-signature validation + * during a page fault, the "paging_in_progress" reference on the VM + * object guarantess that the vnode pager (and the ubc_info) won't go + * away during the fault. + * Other callers need to protect against vnode reclaim by holding the + * vnode lock, for example. + */ if (! UBCINFOEXISTS(vp)) { blobs = NULL; @@ -2495,8 +2614,6 @@ ubc_get_cs_blobs( blobs = uip->cs_blobs; out: - vnode_unlock(vp); - return blobs; } @@ -2516,7 +2633,6 @@ cs_validate_page( struct cs_blob *blobs, *blob; const CS_CodeDirectory *cd; const CS_SuperBlob *embedded; - off_t start_offset, end_offset; const unsigned char *hash; boolean_t validated; off_t offset; /* page offset in the file */ @@ -2562,8 +2678,8 @@ cs_validate_page( if (kr != KERN_SUCCESS) { /* XXX FBDP what to do !? */ printf("cs_validate_page: failed to map blob, " - "size=0x%x kr=0x%x\n", - blob->csb_mem_size, kr); + "size=0x%lx kr=0x%x\n", + (size_t)blob->csb_mem_size, kr); break; } } @@ -2581,12 +2697,10 @@ cs_validate_page( /* bogus blob ? */ continue; } - - end_offset = round_page(ntohl(cd->codeLimit)); - start_offset = end_offset - (ntohl(cd->nCodeSlots) * PAGE_SIZE); + offset = page_offset - blob->csb_base_offset; - if (offset < start_offset || - offset >= end_offset) { + if (offset < blob->csb_start_offset || + offset >= blob->csb_end_offset) { /* our page is not covered by this blob */ continue; } @@ -2625,10 +2739,10 @@ cs_validate_page( size = PAGE_SIZE; const uint32_t *asha1, *esha1; - if (offset + size > codeLimit) { + if ((off_t)(offset + size) > codeLimit) { /* partial page at end of segment */ assert(offset < codeLimit); - size = codeLimit & PAGE_MASK; + size = (size_t) (codeLimit & PAGE_MASK); } /* compute the actual page's SHA1 hash */ SHA1Init(&sha1ctxt); @@ -2672,8 +2786,11 @@ ubc_cs_getcdhash( off_t offset, unsigned char *cdhash) { - struct cs_blob *blobs, *blob; - off_t rel_offset; + struct cs_blob *blobs, *blob; + off_t rel_offset; + int ret; + + vnode_lock(vp); blobs = ubc_get_cs_blobs(vp); for (blob = blobs; @@ -2690,11 +2807,14 @@ ubc_cs_getcdhash( if (blob == NULL) { /* we didn't find a blob covering "offset" */ - return EBADEXEC; /* XXX any better error ? */ + ret = EBADEXEC; /* XXX any better error ? */ + } else { + /* get the SHA1 hash of that blob */ + bcopy(blob->csb_sha1, cdhash, sizeof (blob->csb_sha1)); + ret = 0; } - /* get the SHA1 hash of that blob */ - bcopy(blob->csb_sha1, cdhash, sizeof (blob->csb_sha1)); + vnode_unlock(vp); - return 0; + return ret; } diff --git a/bsd/kern/uipc_domain.c b/bsd/kern/uipc_domain.c index c8a1df41d..985e80168 100644 --- a/bsd/kern/uipc_domain.c +++ b/bsd/kern/uipc_domain.c @@ -76,7 +76,6 @@ #include -void domaininit(void) __attribute__((section("__TEXT, initcode"))); void init_domain(struct domain *dp) __attribute__((section("__TEXT, initcode"))); void concat_domain(struct domain *dp) __attribute__((section("__TEXT, initcode"))); @@ -87,8 +86,6 @@ void pfslowtimo(void *); struct protosw *pffindprotonotype(int, int); struct protosw *pffindprotonotype_locked(int , int , int); struct domain *pffinddomain(int); -void concat_domain(struct domain *); -void init_domain(struct domain *); /* * Add/delete 'domain': Link structure into system list, @@ -102,6 +99,8 @@ static lck_grp_attr_t *domain_proto_mtx_grp_attr; lck_mtx_t *domain_proto_mtx; extern int do_reclaim; +extern sysctlfn net_sysctl; + static void init_proto(struct protosw *pr) { @@ -489,7 +488,7 @@ net_sysctl(int *name, u_int namelen, user_addr_t oldp, size_t *oldlenp, for (pr = dp->dom_protosw; pr; pr = pr->pr_next) if (pr->pr_protocol == protocol && pr->pr_sysctl) { error = (*pr->pr_sysctl)(name + 2, namelen - 2, - oldp, oldlenp, newp, newlen); + (void *)(uintptr_t)oldp, oldlenp, (void *)(uintptr_t)newp, newlen); lck_mtx_unlock(domain_proto_mtx); return (error); } diff --git a/bsd/kern/uipc_mbuf.c b/bsd/kern/uipc_mbuf.c index 0c0a27855..1303dbe8d 100644 --- a/bsd/kern/uipc_mbuf.c +++ b/bsd/kern/uipc_mbuf.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2007 Apple Inc. All rights reserved. + * Copyright (c) 2008 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -77,6 +77,7 @@ #include #include #include +#include #include #include @@ -295,7 +296,7 @@ /* TODO: should be in header file */ /* kernel translater */ -extern vm_offset_t kmem_mb_alloc(vm_map_t, int); +extern vm_offset_t kmem_mb_alloc(vm_map_t, int, int); extern ppnum_t pmap_find_phys(pmap_t pmap, addr64_t va); extern vm_map_t mb_map; /* special map */ @@ -312,12 +313,17 @@ static int mbuf_expand_mcl; /* number of cluster creation requets */ static int mbuf_expand_big; /* number of big cluster creation requests */ static int mbuf_expand_16k; /* number of 16K cluster creation requests */ static int ncpu; /* number of CPUs */ -static int *mcl_paddr; /* Array of cluster physical addresses */ +static ppnum_t *mcl_paddr; /* Array of cluster physical addresses */ +static ppnum_t mcl_pages; /* Size of array (# physical pages) */ static ppnum_t mcl_paddr_base; /* Handle returned by IOMapper::iovmAlloc() */ static mcache_t *ref_cache; /* Cache of cluster reference & flags */ static mcache_t *mcl_audit_con_cache; /* Audit contents cache */ static unsigned int mbuf_debug; /* patchable mbuf mcache flags */ static unsigned int mb_normalized; /* number of packets "normalized" */ +static unsigned int mbuf_gscale; /* Power-of-two growth scale for m_howmany */ + +#define MB_GROWTH_AGGRESSIVE 1 /* Threshold: 1/2 of total */ +#define MB_GROWTH_NORMAL 4 /* Threshold: 15/16 of total */ typedef enum { MC_MBUF = 0, /* Regular mbuf */ @@ -415,11 +421,7 @@ static unsigned int maxmbufcl; * Note that we don't save the contents of clusters when they are freed; * we simply pattern-fill them. */ -#if defined(__LP64__) -#define AUDIT_CONTENTS_SIZE 160 -#else -#define AUDIT_CONTENTS_SIZE 80 -#endif /* __LP64__ */ +#define AUDIT_CONTENTS_SIZE ((MSIZE - MHLEN) + sizeof (_m_ext_t)) /* * mbuf specific mcache audit flags @@ -447,6 +449,8 @@ int max_protohdr; /* largest protocol header */ int max_hdr; /* largest link+protocol header */ int max_datalen; /* MHLEN - max_hdr */ +extern u_int32_t high_sb_max; + /* TODO: should be in header file */ int do_reclaim = 0; @@ -578,6 +582,16 @@ static void slab_nextptr_panic(mcl_slab_t *, void *); static void slab_detach(mcl_slab_t *); static boolean_t slab_is_detached(mcl_slab_t *); +static unsigned int m_length(struct mbuf *); +static int m_copyback0(struct mbuf **, int, int, const void *, int, int); +static struct mbuf *m_split0(struct mbuf *, int, int, int); + +/* flags for m_copyback0 */ +#define M_COPYBACK0_COPYBACK 0x0001 /* copyback from cp */ +#define M_COPYBACK0_PRESERVE 0x0002 /* preserve original data */ +#define M_COPYBACK0_COW 0x0004 /* do copy-on-write */ +#define M_COPYBACK0_EXTEND 0x0008 /* extend chain */ + /* * This flag is set for all mbufs that come out of and into the composite * mbuf + cluster caches, i.e. MC_MBUF_CL and MC_MBUF_BIGCL. mbufs that @@ -653,7 +667,7 @@ static boolean_t slab_is_detached(mcl_slab_t *); (m)->m_pkthdr.header = NULL; \ (m)->m_pkthdr.csum_flags = 0; \ (m)->m_pkthdr.csum_data = 0; \ - (m)->m_pkthdr.reserved0 = NULL; \ + (m)->m_pkthdr.tso_segsz = 0; \ (m)->m_pkthdr.vlan_tag = 0; \ (m)->m_pkthdr.socket_id = 0; \ m_tag_init(m); \ @@ -694,9 +708,12 @@ static boolean_t slab_is_detached(mcl_slab_t *); * that allows for a more accurate view of the state of the allocator. */ struct mb_stat *mb_stat; +struct omb_stat *omb_stat; /* For backwards compatibility */ #define MB_STAT_SIZE(n) \ ((size_t)(&((mb_stat_t *)0)->mbs_class[n])) +#define OMB_STAT_SIZE(n) \ + ((size_t)(&((struct omb_stat *)0)->mbs_class[n])) /* * The legacy structure holding all of the mbuf allocation statistics. @@ -742,14 +759,15 @@ static mbuf_mtypes_t *mbuf_mtypes; /* per-CPU statistics */ ((mtypes_cpu_t *)((char *)(p) + MBUF_MTYPES_SIZE(cpu_number()))) /* This should be in a header file */ -#define atomic_add_32(a, n) ((void) OSAddAtomic(n, (volatile SInt32 *)a)) +#define atomic_add_16(a, n) ((void) OSAddAtomic16(n, a)) +#define atomic_add_32(a, n) ((void) OSAddAtomic(n, a)) #define mtype_stat_add(type, n) { \ if ((unsigned)(type) < MT_MAX) { \ mtypes_cpu_t *mbs = MTYPES_CPU(mbuf_mtypes); \ atomic_add_32(&mbs->cpu_mtypes[type], n); \ - } else if ((unsigned)(type) < MBSTAT_MTYPES_MAX) { \ - atomic_add_32(&mbstat.m_mtypes[type], n); \ + } else if ((unsigned)(type) < (unsigned)MBSTAT_MTYPES_MAX) { \ + atomic_add_16((int16_t*)&mbstat.m_mtypes[type], n); \ } \ } @@ -790,7 +808,8 @@ mb_stat_sysctl SYSCTL_HANDLER_ARGS mcache_t *cp; mcache_cpu_t *ccp; mb_class_stat_t *sp; - int k, m, bktsize; + void *statp; + int k, m, bktsize, statsz, proc64 = proc_is64bit(req->p); lck_mtx_lock(mbuf_mlock); for (k = 0; k < NELEM(mbuf_table); k++) { @@ -853,9 +872,44 @@ mb_stat_sysctl SYSCTL_HANDLER_ARGS break; } } + + if (!proc64) { + struct omb_class_stat *oc; + struct mb_class_stat *c; + + omb_stat->mbs_cnt = mb_stat->mbs_cnt; + oc = &omb_stat->mbs_class[0]; + c = &mb_stat->mbs_class[0]; + for (k = 0; k < omb_stat->mbs_cnt; k++, oc++, c++) { + (void) snprintf(oc->mbcl_cname, sizeof (oc->mbcl_cname), + "%s", c->mbcl_cname); + oc->mbcl_size = c->mbcl_size; + oc->mbcl_total = c->mbcl_total; + oc->mbcl_active = c->mbcl_active; + oc->mbcl_infree = c->mbcl_infree; + oc->mbcl_slab_cnt = c->mbcl_slab_cnt; + oc->mbcl_alloc_cnt = c->mbcl_alloc_cnt; + oc->mbcl_free_cnt = c->mbcl_free_cnt; + oc->mbcl_notified = c->mbcl_notified; + oc->mbcl_purge_cnt = c->mbcl_purge_cnt; + oc->mbcl_fail_cnt = c->mbcl_fail_cnt; + oc->mbcl_ctotal = c->mbcl_ctotal; + oc->mbcl_mc_state = c->mbcl_mc_state; + oc->mbcl_mc_cached = c->mbcl_mc_cached; + oc->mbcl_mc_waiter_cnt = c->mbcl_mc_waiter_cnt; + oc->mbcl_mc_wretry_cnt = c->mbcl_mc_wretry_cnt; + oc->mbcl_mc_nwretry_cnt = c->mbcl_mc_nwretry_cnt; + } + statp = omb_stat; + statsz = OMB_STAT_SIZE(NELEM(mbuf_table)); + } else { + statp = mb_stat; + statsz = MB_STAT_SIZE(NELEM(mbuf_table)); + } + lck_mtx_unlock(mbuf_mlock); - return (SYSCTL_OUT(req, mb_stat, MB_STAT_SIZE(NELEM(mbuf_table)))); + return (SYSCTL_OUT(req, statp, statsz)); } static inline void @@ -891,6 +945,10 @@ mbuf_table_init(void) { int m; + MALLOC(omb_stat, struct omb_stat *, OMB_STAT_SIZE(NELEM(mbuf_table)), + M_TEMP, M_WAITOK | M_ZERO); + VERIFY(omb_stat != NULL); + MALLOC(mb_stat, mb_stat_t *, MB_STAT_SIZE(NELEM(mbuf_table)), M_TEMP, M_WAITOK | M_ZERO); VERIFY(mb_stat != NULL); @@ -998,13 +1056,67 @@ mbuf_table_init(void) mbstat.m_bigmclbytes = m_maxsize(MC_BIGCL); } +#if defined(__LP64__) +typedef struct ncl_tbl { + uint64_t nt_maxmem; /* memory (sane) size */ + uint32_t nt_mbpool; /* mbuf pool size */ +} ncl_tbl_t; + +/* Non-server */ +static ncl_tbl_t ncl_table[] = { + { (1ULL << GBSHIFT) /* 1 GB */, (64 << MBSHIFT) /* 64 MB */ }, + { (1ULL << (GBSHIFT + 3)) /* 8 GB */, (96 << MBSHIFT) /* 96 MB */ }, + { (1ULL << (GBSHIFT + 4)) /* 16 GB */, (128 << MBSHIFT) /* 128 MB */ }, + { 0, 0 } +}; + +/* Server */ +static ncl_tbl_t ncl_table_srv[] = { + { (1ULL << GBSHIFT) /* 1 GB */, (96 << MBSHIFT) /* 96 MB */ }, + { (1ULL << (GBSHIFT + 2)) /* 4 GB */, (128 << MBSHIFT) /* 128 MB */ }, + { (1ULL << (GBSHIFT + 3)) /* 8 GB */, (160 << MBSHIFT) /* 160 MB */ }, + { (1ULL << (GBSHIFT + 4)) /* 16 GB */, (192 << MBSHIFT) /* 192 MB */ }, + { (1ULL << (GBSHIFT + 5)) /* 32 GB */, (256 << MBSHIFT) /* 256 MB */ }, + { (1ULL << (GBSHIFT + 6)) /* 64 GB */, (384 << MBSHIFT) /* 384 MB */ }, + { 0, 0 } +}; +#endif /* __LP64__ */ + +__private_extern__ unsigned int +mbuf_default_ncl(int srv, uint64_t mem) +{ +#if !defined(__LP64__) +#pragma unused(srv) + unsigned int n; + /* + * 32-bit kernel (default to 64MB of mbuf pool for >= 1GB RAM). + */ + if ((n = ((mem / 16) / MCLBYTES)) > 32768) + n = 32768; +#else + unsigned int n, i; + ncl_tbl_t *tbl = (srv ? ncl_table_srv : ncl_table); + /* + * 64-bit kernel (mbuf pool size based on table). + */ + n = tbl[0].nt_mbpool; + for (i = 0; tbl[i].nt_mbpool != 0; i++) { + if (mem < tbl[i].nt_maxmem) + break; + n = tbl[i].nt_mbpool; + } + n >>= MCLSHIFT; +#endif /* !__LP64__ */ + return (n); +} + __private_extern__ void mbinit(void) { unsigned int m; int initmcl = MINCL; - int mcl_pages; void *buf; + thread_t thread = THREAD_NULL; if (nmbclusters == 0) nmbclusters = NMBCLUSTERS; @@ -1039,13 +1151,14 @@ mbinit(void) } /* Calculate the number of pages assigned to the cluster pool */ - mcl_pages = nmbclusters/(NBPG/CLBYTES); - MALLOC(mcl_paddr, int *, mcl_pages * sizeof (int), M_TEMP, M_WAITOK); + mcl_pages = (nmbclusters * MCLBYTES) / CLBYTES; + MALLOC(mcl_paddr, ppnum_t *, mcl_pages * sizeof (ppnum_t), + M_TEMP, M_WAITOK); VERIFY(mcl_paddr != NULL); /* Register with the I/O Bus mapper */ mcl_paddr_base = IOMapperIOVMAlloc(mcl_pages); - bzero((char *)mcl_paddr, mcl_pages * sizeof (int)); + bzero((char *)mcl_paddr, mcl_pages * sizeof (ppnum_t)); embutl = (union mcluster *) ((unsigned char *)mbutl + (nmbclusters * MCLBYTES)); @@ -1059,7 +1172,8 @@ mbinit(void) lck_mtx_unlock(mbuf_mlock); - (void) kernel_thread(kernel_task, mbuf_worker_thread_init); + (void) kernel_thread_start((thread_continue_t)mbuf_worker_thread_init, NULL, &thread); + thread_deallocate(thread); ref_cache = mcache_create("mext_ref", sizeof (struct ext_ref), 0, 0, MCR_SLEEP); @@ -1093,7 +1207,7 @@ mbinit(void) m_cache(m) = mcache_create_ext(m_cname(m), m_maxsize(m), allocfunc, freefunc, auditfunc, mbuf_slab_notify, - (void *)m, flags, MCR_SLEEP); + (void *)(uintptr_t)m, flags, MCR_SLEEP); } /* @@ -1110,7 +1224,30 @@ mbinit(void) mbuf_mtypes = (mbuf_mtypes_t *)P2ROUNDUP((intptr_t)buf, CPU_CACHE_SIZE); bzero(mbuf_mtypes, MBUF_MTYPES_SIZE(ncpu)); - printf("mbinit: done\n"); + mbuf_gscale = MB_GROWTH_NORMAL; + + /* + * Set the max limit on sb_max to be 1/16 th of the size of + * memory allocated for mbuf clusters. + */ + high_sb_max = (nmbclusters << (MCLSHIFT - 4)); + if (high_sb_max < sb_max) { + /* sb_max is too large for this configuration, scale it down */ + if (high_sb_max > (1 << MBSHIFT)) { + /* We have atleast 16 M of mbuf pool */ + sb_max = high_sb_max; + } else if ((nmbclusters << MCLSHIFT) > (1 << MBSHIFT)) { + /* If we have more than 1M of mbufpool, cap the size of + * max sock buf at 1M + */ + sb_max = high_sb_max = (1 << MBSHIFT); + } else { + sb_max = high_sb_max; + } + } + + printf("mbinit: done (%d MB memory set for mbuf pool)\n", + (nmbclusters << MCLSHIFT) >> MBSHIFT); } /* @@ -1371,7 +1508,7 @@ slab_free(mbuf_class_t class, mcache_obj_t *buf) m_total(MC_MBUF) -= NMBPCL; mbstat.m_mbufs = m_total(MC_MBUF); m_infree(MC_MBUF) -= NMBPCL; - mtype_stat_add(MT_FREE, -NMBPCL); + mtype_stat_add(MT_FREE, -((unsigned)NMBPCL)); while (i--) { struct mbuf *m = sp->sl_head; @@ -2077,7 +2214,7 @@ m_clalloc(const u_int32_t num, const int wait, const u_int32_t bufsize) { int i; vm_size_t size = 0; - int numpages = 0; + int numpages = 0, large_buffer = (bufsize == m_maxsize(MC_16KCL)); vm_offset_t page = 0; mcache_audit_t *mca_list = NULL; mcache_obj_t *con_list = NULL; @@ -2116,14 +2253,22 @@ m_clalloc(const u_int32_t num, const int wait, const u_int32_t bufsize) lck_mtx_unlock(mbuf_mlock); - size = round_page_32(i * bufsize); - page = kmem_mb_alloc(mb_map, size); + size = round_page(i * bufsize); + page = kmem_mb_alloc(mb_map, size, large_buffer); + + /* + * If we did ask for "n" 16K physically contiguous chunks + * and didn't get them, then please try again without this + * restriction. + */ + if (large_buffer && page == 0) + page = kmem_mb_alloc(mb_map, size, 0); if (page == 0) { if (bufsize <= m_maxsize(MC_BIGCL)) { /* Try for 1 page if failed, only for 2KB/4KB request */ size = NBPG; - page = kmem_mb_alloc(mb_map, size); + page = kmem_mb_alloc(mb_map, size, 0); } if (page == 0) { @@ -2173,13 +2318,14 @@ m_clalloc(const u_int32_t num, const int wait, const u_int32_t bufsize) for (i = 0; i < numpages; i++, page += NBPG) { ppnum_t offset = ((char *)page - (char *)mbutl) / NBPG; ppnum_t new_page = pmap_find_phys(kernel_pmap, - (vm_address_t)page); + (vm_offset_t)page); /* * In the case of no mapper being available the following * code noops and returns the input page; if there is a * mapper the appropriate I/O page is returned. */ + VERIFY(offset < mcl_pages); new_page = IOMapperInsertPage(mcl_paddr_base, offset, new_page); mcl_paddr[offset] = new_page << PGSHIFT; @@ -2874,6 +3020,36 @@ m_clattach(struct mbuf *m, int type, caddr_t extbuf, return (m); } +/* + * Perform `fast' allocation mbuf clusters from a cache of recently-freed + * clusters. (If the cache is empty, new clusters are allocated en-masse.) + */ +struct mbuf * +m_getcl(int wait, int type, int flags) +{ + struct mbuf *m; + int mcflags = MSLEEPF(wait); + int hdr = (flags & M_PKTHDR); + + /* Is this due to a non-blocking retry? If so, then try harder */ + if (mcflags & MCR_NOSLEEP) + mcflags |= MCR_TRYHARD; + + m = mcache_alloc(m_cache(MC_MBUF_CL), mcflags); + if (m != NULL) { + MBUF_INIT(m, hdr, type); + mtype_stat_inc(type); + mtype_stat_dec(MT_FREE); +#if CONFIG_MACF_NET + if (hdr && mac_init_mbuf(m, wait) != 0) { + m_free(m); + return (NULL); + } +#endif /* MAC_NET */ + } + return (m); +} + /* m_mclget() add an mbuf cluster to a normal mbuf */ struct mbuf * m_mclget(struct mbuf *m, int wait) @@ -2999,15 +3175,16 @@ m_m16kget(struct mbuf *m, int wait) return (m); } -/* */ +/* + * "Move" mbuf pkthdr from "from" to "to". + * "from" must have M_PKTHDR set, and "to" must be empty. + */ void m_copy_pkthdr(struct mbuf *to, struct mbuf *from) { -#if CONFIG_MACF_NET /* We will be taking over the tags of 'to' */ if (to->m_flags & M_PKTHDR) m_tag_delete_chain(to, NULL); -#endif /* MAC_NET */ to->m_pkthdr = from->m_pkthdr; /* especially tags */ m_tag_init(from); /* purge tags from src */ to->m_flags = (from->m_flags & M_COPYFLAGS) | (to->m_flags & M_EXT); @@ -3023,10 +3200,8 @@ m_copy_pkthdr(struct mbuf *to, struct mbuf *from) static int m_dup_pkthdr(struct mbuf *to, struct mbuf *from, int how) { -#if CONFIG_MACF_NET if (to->m_flags & M_PKTHDR) m_tag_delete_chain(to, NULL); -#endif /* MAC_NET */ to->m_flags = (from->m_flags & M_COPYFLAGS) | (to->m_flags & M_EXT); if ((to->m_flags & M_EXT) == 0) to->m_data = to->m_pktdat; @@ -3845,7 +4020,7 @@ m_copym(struct mbuf *m, int off0, int len, int wait) } if (len == M_COPYALL) { if (MIN(len, (m->m_len - off)) == len) { - printf("m->m_len %ld - off %d = %ld, %ld\n", + printf("m->m_len %d - off %d = %d, %d\n", m->m_len, off, m->m_len - off, MIN(len, (m->m_len - off))); } @@ -4016,9 +4191,10 @@ m_copym_with_hdrs(struct mbuf *m, int off0, int len0, int wait, * continuing for "len" bytes, into the indicated buffer. */ void -m_copydata(struct mbuf *m, int off, int len, caddr_t cp) +m_copydata(struct mbuf *m, int off, int len, void *vp) { unsigned count; + char *cp = vp; if (off < 0 || len < 0) panic("m_copydata: invalid offset %d or len %d", off, len); @@ -4214,6 +4390,12 @@ m_pullup(struct mbuf *n, int len) */ struct mbuf * m_split(struct mbuf *m0, int len0, int wait) +{ + return (m_split0(m0, len0, wait, 1)); +} + +static struct mbuf * +m_split0(struct mbuf *m0, int len0, int wait, int copyhdr) { struct mbuf *m, *n; unsigned len = len0, remain; @@ -4223,7 +4405,7 @@ m_split(struct mbuf *m0, int len0, int wait) if (m == NULL) return (NULL); remain = m->m_len - len; - if (m0->m_flags & M_PKTHDR) { + if (copyhdr && (m0->m_flags & M_PKTHDR)) { _MGETHDR(n, wait, m0->m_type); if (n == NULL) return (NULL); @@ -4347,6 +4529,30 @@ m_devget(char *buf, int totlen, int off0, struct ifnet *ifp, return (top); } +void +mbuf_growth_aggressive(void) +{ + lck_mtx_lock(mbuf_mlock); + /* + * Don't start to grow the pool until we are at least + * 1/2 (50%) of current total capacity. + */ + mbuf_gscale = MB_GROWTH_AGGRESSIVE; + lck_mtx_unlock(mbuf_mlock); +} + +void +mbuf_growth_normal(void) +{ + lck_mtx_lock(mbuf_mlock); + /* + * Don't start to grow the pool until we are at least + * 15/16 (93.75%) of current total capacity. + */ + mbuf_gscale = MB_GROWTH_NORMAL; + lck_mtx_unlock(mbuf_mlock); +} + /* * Cluster freelist allocation check. */ @@ -4356,6 +4562,7 @@ m_howmany(int num, size_t bufsize) int i = 0, j = 0; u_int32_t m_clusters, m_bigclusters, m_16kclusters; u_int32_t m_clfree, m_bigclfree, m_16kclfree; + u_int32_t s = mbuf_gscale; lck_mtx_assert(mbuf_mlock, LCK_MTX_ASSERT_OWNED); @@ -4384,14 +4591,14 @@ m_howmany(int num, size_t bufsize) /* Under minimum */ if (m_clusters < MINCL) return (MINCL - m_clusters); - /* Too few (free < 1/16 total) and not over maximum */ + /* Too few (free < threshold) and not over maximum */ if (m_clusters < m_maxlimit(MC_CL)) { if (m_clfree >= MCL_LOWAT) return (0); if (num >= m_clfree) i = num - m_clfree; - if (((m_clusters + num) >> 4) > m_clfree) - j = ((m_clusters + num) >> 4) - m_clfree; + if (((m_clusters + num) >> s) > m_clfree) + j = ((m_clusters + num) >> s) - m_clfree; i = MAX(i, j); if (i + m_clusters >= m_maxlimit(MC_CL)) i = m_maxlimit(MC_CL) - m_clusters; @@ -4437,36 +4644,282 @@ m_howmany(int num, size_t bufsize) return (i); } +/* + * Return the number of bytes in the mbuf chain, m. + */ +static unsigned int +m_length(struct mbuf *m) +{ + struct mbuf *m0; + unsigned int pktlen; + + if (m->m_flags & M_PKTHDR) + return (m->m_pkthdr.len); + + pktlen = 0; + for (m0 = m; m0 != NULL; m0 = m0->m_next) + pktlen += m0->m_len; + return (pktlen); +} + /* * Copy data from a buffer back into the indicated mbuf chain, * starting "off" bytes from the beginning, extending the mbuf * chain if necessary. */ void -m_copyback(struct mbuf *m0, int off, int len, caddr_t cp) +m_copyback(struct mbuf *m0, int off, int len, const void *cp) { - int mlen; - struct mbuf *m = m0, *n; - int totlen = 0; +#if DEBUG + struct mbuf *origm = m0; + int error; +#endif /* DEBUG */ if (m0 == NULL) return; + +#if DEBUG + error = +#endif /* DEBUG */ + m_copyback0(&m0, off, len, cp, + M_COPYBACK0_COPYBACK | M_COPYBACK0_EXTEND, M_DONTWAIT); + +#if DEBUG + if (error != 0 || (m0 != NULL && origm != m0)) + panic("m_copyback"); +#endif /* DEBUG */ +} + +struct mbuf * +m_copyback_cow(struct mbuf *m0, int off, int len, const void *cp, int how) +{ + int error; + + /* don't support chain expansion */ + VERIFY(off + len <= m_length(m0)); + + error = m_copyback0(&m0, off, len, cp, + M_COPYBACK0_COPYBACK | M_COPYBACK0_COW, how); + if (error) { + /* + * no way to recover from partial success. + * just free the chain. + */ + m_freem(m0); + return (NULL); + } + return (m0); +} + +/* + * m_makewritable: ensure the specified range writable. + */ +int +m_makewritable(struct mbuf **mp, int off, int len, int how) +{ + int error; +#if DEBUG + struct mbuf *n; + int origlen, reslen; + + origlen = m_length(*mp); +#endif /* DEBUG */ + +#if 0 /* M_COPYALL is large enough */ + if (len == M_COPYALL) + len = m_length(*mp) - off; /* XXX */ +#endif + + error = m_copyback0(mp, off, len, NULL, + M_COPYBACK0_PRESERVE | M_COPYBACK0_COW, how); + +#if DEBUG + reslen = 0; + for (n = *mp; n; n = n->m_next) + reslen += n->m_len; + if (origlen != reslen) + panic("m_makewritable: length changed"); + if (((*mp)->m_flags & M_PKTHDR) && reslen != (*mp)->m_pkthdr.len) + panic("m_makewritable: inconsist"); +#endif /* DEBUG */ + + return (error); +} + +static int +m_copyback0(struct mbuf **mp0, int off, int len, const void *vp, int flags, + int how) +{ + int mlen; + struct mbuf *m, *n; + struct mbuf **mp; + int totlen = 0; + const char *cp = vp; + + VERIFY(mp0 != NULL); + VERIFY(*mp0 != NULL); + VERIFY((flags & M_COPYBACK0_PRESERVE) == 0 || cp == NULL); + VERIFY((flags & M_COPYBACK0_COPYBACK) == 0 || cp != NULL); + + /* + * we don't bother to update "totlen" in the case of M_COPYBACK0_COW, + * assuming that M_COPYBACK0_EXTEND and M_COPYBACK0_COW are exclusive. + */ + + VERIFY((~flags & (M_COPYBACK0_EXTEND|M_COPYBACK0_COW)) != 0); + + mp = mp0; + m = *mp; while (off > (mlen = m->m_len)) { off -= mlen; totlen += mlen; if (m->m_next == NULL) { - n = m_getclr(M_DONTWAIT, m->m_type); - if (n == NULL) + int tspace; +extend: + if (!(flags & M_COPYBACK0_EXTEND)) goto out; - n->m_len = MIN(MLEN, len + off); + + /* + * try to make some space at the end of "m". + */ + + mlen = m->m_len; + if (off + len >= MINCLSIZE && + !(m->m_flags & M_EXT) && m->m_len == 0) { + MCLGET(m, how); + } + tspace = M_TRAILINGSPACE(m); + if (tspace > 0) { + tspace = MIN(tspace, off + len); + VERIFY(tspace > 0); + bzero(mtod(m, char *) + m->m_len, + MIN(off, tspace)); + m->m_len += tspace; + off += mlen; + totlen -= mlen; + continue; + } + + /* + * need to allocate an mbuf. + */ + + if (off + len >= MINCLSIZE) { + n = m_getcl(how, m->m_type, 0); + } else { + n = _M_GET(how, m->m_type); + } + if (n == NULL) { + goto out; + } + n->m_len = 0; + n->m_len = MIN(M_TRAILINGSPACE(n), off + len); + bzero(mtod(n, char *), MIN(n->m_len, off)); m->m_next = n; } + mp = &m->m_next; m = m->m_next; } while (len > 0) { - mlen = MIN(m->m_len - off, len); - bcopy(cp, off + MTOD(m, caddr_t), (unsigned)mlen); - cp += mlen; + mlen = m->m_len - off; + if (mlen != 0 && m_mclhasreference(m)) { + char *datap; + int eatlen; + + /* + * this mbuf is read-only. + * allocate a new writable mbuf and try again. + */ + +#if defined(DIAGNOSTIC) + if (!(flags & M_COPYBACK0_COW)) + panic("m_copyback0: read-only"); +#endif /* defined(DIAGNOSTIC) */ + + /* + * if we're going to write into the middle of + * a mbuf, split it first. + */ + if (off > 0 && len < mlen) { + n = m_split0(m, off, how, 0); + if (n == NULL) + goto enobufs; + m->m_next = n; + mp = &m->m_next; + m = n; + off = 0; + continue; + } + + /* + * XXX TODO coalesce into the trailingspace of + * the previous mbuf when possible. + */ + + /* + * allocate a new mbuf. copy packet header if needed. + */ + n = _M_GET(how, m->m_type); + if (n == NULL) + goto enobufs; + if (off == 0 && (m->m_flags & M_PKTHDR)) { + M_COPY_PKTHDR(n, m); + n->m_len = MHLEN; + } else { + if (len >= MINCLSIZE) + MCLGET(n, M_DONTWAIT); + n->m_len = + (n->m_flags & M_EXT) ? MCLBYTES : MLEN; + } + if (n->m_len > len) + n->m_len = len; + + /* + * free the region which has been overwritten. + * copying data from old mbufs if requested. + */ + if (flags & M_COPYBACK0_PRESERVE) + datap = mtod(n, char *); + else + datap = NULL; + eatlen = n->m_len; + VERIFY(off == 0 || eatlen >= mlen); + if (off > 0) { + VERIFY(len >= mlen); + m->m_len = off; + m->m_next = n; + if (datap) { + m_copydata(m, off, mlen, datap); + datap += mlen; + } + eatlen -= mlen; + mp = &m->m_next; + m = m->m_next; + } + while (m != NULL && m_mclhasreference(m) && + n->m_type == m->m_type && eatlen > 0) { + mlen = MIN(eatlen, m->m_len); + if (datap) { + m_copydata(m, 0, mlen, datap); + datap += mlen; + } + m->m_data += mlen; + m->m_len -= mlen; + eatlen -= mlen; + if (m->m_len == 0) + *mp = m = m_free(m); + } + if (eatlen > 0) + n->m_len -= eatlen; + n->m_next = m; + *mp = m = n; + continue; + } + mlen = MIN(mlen, len); + if (flags & M_COPYBACK0_COPYBACK) { + bcopy(cp, mtod(m, caddr_t) + off, (unsigned)mlen); + cp += mlen; + } len -= mlen; mlen += off; off = 0; @@ -4474,23 +4927,27 @@ m_copyback(struct mbuf *m0, int off, int len, caddr_t cp) if (len == 0) break; if (m->m_next == NULL) { - n = _M_GET(M_DONTWAIT, m->m_type); - if (n == NULL) - break; - n->m_len = MIN(MLEN, len); - m->m_next = n; + goto extend; } + mp = &m->m_next; m = m->m_next; } out: - if (((m = m0)->m_flags & M_PKTHDR) && (m->m_pkthdr.len < totlen)) + if (((m = *mp0)->m_flags & M_PKTHDR) && (m->m_pkthdr.len < totlen)) { + VERIFY(flags & M_COPYBACK0_EXTEND); m->m_pkthdr.len = totlen; + } + + return (0); + +enobufs: + return (ENOBUFS); } char * mcl_to_paddr(char *addr) { - int base_phys; + vm_offset_t base_phys; if (!MBUF_IN_MAP(addr)) return (NULL); @@ -4498,7 +4955,7 @@ mcl_to_paddr(char *addr) if (base_phys == 0) return (NULL); - return ((char *)((int)base_phys | ((int)addr & PGOFSET))); + return ((char *)((uintptr_t)base_phys | ((uintptr_t)addr & PGOFSET))); } /* @@ -4715,7 +5172,7 @@ m_mtod(struct mbuf *m) struct mbuf * m_dtom(void *x) { - return ((struct mbuf *)((u_long)(x) & ~(MSIZE-1))); + return ((struct mbuf *)((uintptr_t)(x) & ~(MSIZE-1))); } void @@ -5039,7 +5496,7 @@ slab_inrange(mcl_slab_t *sp, void *buf) (uintptr_t)buf < ((uintptr_t)sp->sl_base + sp->sl_len)); } -#undef panic(...) +#undef panic static void slab_nextptr_panic(mcl_slab_t *sp, void *addr) diff --git a/bsd/kern/uipc_mbuf2.c b/bsd/kern/uipc_mbuf2.c index d5ea69c86..49727ff3a 100644 --- a/bsd/kern/uipc_mbuf2.c +++ b/bsd/kern/uipc_mbuf2.c @@ -373,10 +373,9 @@ m_tag_alloc(u_int32_t id, u_int16_t type, int len, int wait) if (len < 0) return NULL; -#ifndef __APPLE__ - t = malloc(len + sizeof(struct m_tag), M_PACKET_TAGS, wait); +#if CONFIG_MBUF_TAGS_MALLOC + t = _MALLOC(len + sizeof (struct m_tag), M_TEMP, wait); #else - /*MALLOC(t, struct m_tag *, len + sizeof(struct m_tag), M_TEMP, M_WAITOK);*/ if (len + sizeof(struct m_tag) <= MLEN) { struct mbuf *m = m_get(wait, MT_TAG); if (m == NULL) @@ -406,10 +405,9 @@ m_tag_free(struct m_tag *t) t->m_tag_type == KERNEL_TAG_TYPE_MACLABEL) mac_mbuf_tag_destroy(t); #endif -#ifndef __APPLE__ - free(t, M_PACKET_TAGS); +#if CONFIG_MBUF_TAGS_MALLOC + _FREE(t, M_TEMP); #else - /* FREE(t, M_TEMP); */ if (t == NULL) return; if (t->m_tag_len + sizeof(struct m_tag) <= MLEN) { @@ -548,6 +546,9 @@ void m_tag_init(struct mbuf *m) { SLIST_INIT(&m->m_pkthdr.tags); +#if PF_PKTHDR + bzero(&m->m_pkthdr.pf_mtag, sizeof (m->m_pkthdr.pf_mtag)); +#endif } /* Get first tag in chain. */ diff --git a/bsd/kern/uipc_proto.c b/bsd/kern/uipc_proto.c index a77e126d9..f04b308bf 100644 --- a/bsd/kern/uipc_proto.c +++ b/bsd/kern/uipc_proto.c @@ -82,9 +82,12 @@ static struct protosw localsw[] = { { .pr_type = SOCK_STREAM, .pr_domain = &localdomain, - .pr_flags = PR_CONNREQUIRED|PR_WANTRCVD|PR_RIGHTS, + .pr_flags = PR_CONNREQUIRED|PR_WANTRCVD|PR_RIGHTS|PR_PCBLOCK, .pr_ctloutput = uipc_ctloutput, .pr_usrreqs = &uipc_usrreqs, + .pr_lock = unp_lock, + .pr_unlock = unp_unlock, + .pr_getlock = unp_getlock }, { .pr_type = SOCK_DGRAM, @@ -92,6 +95,9 @@ static struct protosw localsw[] = { .pr_flags = PR_ATOMIC|PR_ADDR|PR_RIGHTS, .pr_ctloutput = uipc_ctloutput, .pr_usrreqs = &uipc_usrreqs, + .pr_lock = unp_lock, + .pr_unlock = unp_unlock, + .pr_getlock = unp_getlock }, { .pr_ctlinput = raw_ctlinput, @@ -104,7 +110,6 @@ int local_proto_count = (sizeof (localsw) / sizeof (struct protosw)); static void pre_unp_init(void) { - static int localdomain_initted = 0; int i; struct protosw *pr; struct domain *dp = &localdomain; diff --git a/bsd/kern/uipc_socket.c b/bsd/kern/uipc_socket.c index 57dff6de9..fa8ae828f 100644 --- a/bsd/kern/uipc_socket.c +++ b/bsd/kern/uipc_socket.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 1998-2007 Apple Inc. All rights reserved. + * Copyright (c) 1998-2008 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -100,25 +100,21 @@ #include #include #include +#include #if CONFIG_MACF #include #include #endif /* MAC */ -/* how a timeval looks to a 64-bit process */ -struct timeval64 { - int64_t tv_sec; - int32_t tv_usec; -}; - int so_cache_hw = 0; int so_cache_timeouts = 0; int so_cache_max_freed = 0; int cached_sock_count = 0; +__private_extern__ int max_cached_sock_count = MAX_CACHED_SOCKETS; struct socket *socket_cache_head = 0; struct socket *socket_cache_tail = 0; -u_long so_cache_time = 0; +u_int32_t so_cache_time = 0; int so_cache_init_done = 0; struct zone *so_cache_zone; @@ -133,7 +129,6 @@ static void filt_sordetach(struct knote *kn); static int filt_soread(struct knote *kn, long hint); static void filt_sowdetach(struct knote *kn); static int filt_sowrite(struct knote *kn, long hint); -static int filt_solisten(struct knote *kn, long hint); static int sooptcopyin_timeval(struct sockopt *sopt, struct timeval * tv_p); @@ -141,12 +136,16 @@ sooptcopyin_timeval(struct sockopt *sopt, struct timeval * tv_p); static int sooptcopyout_timeval(struct sockopt *sopt, const struct timeval * tv_p); -static struct filterops solisten_filtops = - { 1, NULL, filt_sordetach, filt_solisten }; -static struct filterops soread_filtops = - { 1, NULL, filt_sordetach, filt_soread }; -static struct filterops sowrite_filtops = - { 1, NULL, filt_sowdetach, filt_sowrite }; +static struct filterops soread_filtops = { + .f_isfd = 1, + .f_detach = filt_sordetach, + .f_event = filt_soread, +}; +static struct filterops sowrite_filtops = { + .f_isfd = 1, + .f_detach = filt_sowdetach, + .f_event = filt_sowrite, +}; #define EVEN_MORE_LOCKING_DEBUG 0 int socket_debug = 0; @@ -233,6 +232,7 @@ static void cached_sock_free(struct socket *); static void so_cache_timer(void *); void soclose_wait_locked(struct socket *so); +int so_isdstlocal(struct socket *so); void @@ -286,7 +286,7 @@ static void cached_sock_alloc(struct socket **so, int waitok) { caddr_t temp; - register u_long offset; + register uintptr_t offset; lck_mtx_lock(so_cache_mtx); @@ -333,18 +333,16 @@ cached_sock_alloc(struct socket **so, int waitok) * Define offsets for extra structures into our single block of * memory. Align extra structures on longword boundaries. */ - offset = (u_long) *so; + + offset = (uintptr_t) *so; offset += sizeof (struct socket); - if (offset & 0x3) { - offset += 4; - offset &= 0xfffffffc; - } + + offset = ALIGN(offset); + (*so)->so_saved_pcb = (caddr_t)offset; offset += get_inpcb_str_size(); - if (offset & 0x3) { - offset += 4; - offset &= 0xfffffffc; - } + + offset = ALIGN(offset); ((struct inpcb *)(*so)->so_saved_pcb)->inp_saved_ppcb = (caddr_t)offset; @@ -364,7 +362,7 @@ cached_sock_free(struct socket *so) lck_mtx_lock(so_cache_mtx); - if (++cached_sock_count > MAX_CACHED_SOCKETS) { + if (++cached_sock_count > max_cached_sock_count) { --cached_sock_count; lck_mtx_unlock(so_cache_mtx); #if TEMPDEBUG @@ -507,7 +505,7 @@ socreate(int dom, struct socket **aso, int type, int proto) } if (prp->pr_type != type) return (EPROTOTYPE); - so = soalloc(p != 0, dom, type); + so = soalloc(1, dom, type); if (so == 0) return (ENOBUFS); @@ -515,11 +513,10 @@ socreate(int dom, struct socket **aso, int type, int proto) TAILQ_INIT(&so->so_comp); so->so_type = type; - if (p != 0) { - so->so_uid = kauth_cred_getuid(kauth_cred_get()); - if (!suser(kauth_cred_get(), NULL)) - so->so_state = SS_PRIV; - } + so->so_uid = kauth_cred_getuid(kauth_cred_get()); + if (!suser(kauth_cred_get(), NULL)) + so->so_state = SS_PRIV; + so->so_proto = prp; #ifdef __APPLE__ so->so_rcv.sb_flags |= SB_RECV; /* XXX */ @@ -1087,13 +1084,17 @@ soacceptfilter(struct socket *so) struct socket *head = so->so_head; /* - * There's no need to hold the lock; this socket + * Hold the lock even if this socket * has not been made visible to the filter(s). + * For sockets with global locks, this protect against the + * head or peer going away */ - if ((sock_getaddr(so, &remote, 1) != 0) || - sock_getaddr(so, &local, 0) != 0) { + socket_lock(so, 1); + if (sogetaddr_locked(so, &remote, 1) != 0 || + sogetaddr_locked(so, &local, 0) != 0) { so->so_state &= ~(SS_NOFDREF | SS_COMP); so->so_head = NULL; + socket_unlock(so, 1); soclose(so); /* Out of resources; try it again next time */ error = ECONNABORTED; @@ -1105,7 +1106,6 @@ soacceptfilter(struct socket *so) * so we know it won't be going away. Do the same for the newly * accepted socket while we invoke the accept callback routine. */ - socket_lock(so, 1); for (filter = so->so_filt; filter != NULL && error == 0; filter = filter->sfe_next_onsocket) { if (filter->sfe_filter->sf_filter.sf_accept != NULL) { @@ -1330,11 +1330,11 @@ sodisconnect(struct socket *so) * [so_error]:??? */ static int -sosendcheck(struct socket *so, struct sockaddr *addr, long resid, long clen, - long atomic, int flags, int *sblocked) +sosendcheck(struct socket *so, struct sockaddr *addr, int32_t resid, int32_t clen, + int32_t atomic, int flags, int *sblocked) { - int error = 0; - long space; + int error = 0; + int32_t space; int assumelock = 0; restart: @@ -1392,7 +1392,7 @@ sosendcheck(struct socket *so, struct sockaddr *addr, long resid, long clen, clen > so->so_snd.sb_hiwat) return (EMSGSIZE); if (space < resid + clen && - (atomic || space < (long)so->so_snd.sb_lowat || space < clen)) { + (atomic || space < (int32_t)so->so_snd.sb_lowat || space < clen)) { if ((so->so_state & SS_NBIO) || (flags & MSG_NBIO) || assumelock) { return (EWOULDBLOCK); @@ -1474,7 +1474,7 @@ sosend(struct socket *so, struct sockaddr *addr, struct uio *uio, { struct mbuf **mp; register struct mbuf *m, *freelist = NULL; - register long space, len, resid; + register int32_t space, len, resid; int clen = 0, error, dontroute, mlen, sendflags; int atomic = sosendallatonce(so) || top; int sblocked = 0; @@ -1515,8 +1515,7 @@ sosend(struct socket *so, struct sockaddr *addr, struct uio *uio, dontroute = (flags & MSG_DONTROUTE) && (so->so_options & SO_DONTROUTE) == 0 && (so->so_proto->pr_flags & PR_ATOMIC); - if (p) - OSIncrementAtomic(&p->p_stats->p_ru.ru_msgsnd); + OSIncrementAtomicLong(&p->p_stats->p_ru.ru_msgsnd); if (control) clen = control->m_len; @@ -1547,7 +1546,7 @@ sosend(struct socket *so, struct sockaddr *addr, struct uio *uio, int bytes_to_copy; boolean_t jumbocl; - bytes_to_copy = min(resid, space); + bytes_to_copy = imin(resid, space); if (sosendminchain > 0) { chainlength = 0; @@ -1684,16 +1683,15 @@ sosend(struct socket *so, struct sockaddr *addr, struct uio *uio, MHLEN - m_leadingspace(m); else mlen = MLEN; - len = min(mlen, bytes_to_copy); + len = imin(mlen, bytes_to_copy); chainlength += len; space -= len; error = uiomove(mtod(m, caddr_t), - (int)len, uio); + len, uio); - // LP64todo - fix this! resid = uio_resid(uio); m->m_len = len; @@ -1900,7 +1898,6 @@ soreceive(struct socket *so, struct sockaddr **psa, struct uio *uio, struct protosw *pr = so->so_proto; struct mbuf *nextrecord; int moff, type = 0; - // LP64todo - fix this! int orig_resid = uio_resid(uio); struct mbuf *free_list; int delayed_copy_len; @@ -1967,9 +1964,8 @@ soreceive(struct socket *so, struct sockaddr **psa, struct uio *uio, goto bad; socket_unlock(so, 0); do { - // LP64todo - fix this! error = uiomove(mtod(m, caddr_t), - (int)min(uio_resid(uio), m->m_len), uio); + imin(uio_resid(uio), m->m_len), uio); m = m_free(m); } while (uio_resid(uio) && error == 0 && m); socket_lock(so, 0); @@ -2063,7 +2059,7 @@ soreceive(struct socket *so, struct sockaddr **psa, struct uio *uio, * which could put the application in a bad state. */ if (m == NULL && so->so_rcv.sb_cc != 0) - panic("soreceive corrupted so_rcv: m %p cc %lu", + panic("soreceive corrupted so_rcv: m %p cc %u", m, so->so_rcv.sb_cc); if (so->so_error) { @@ -2122,19 +2118,7 @@ soreceive(struct socket *so, struct sockaddr **psa, struct uio *uio, goto restart; } dontblock: -#ifndef __APPLE__ - if (uio->uio_procp) - uio->uio_procp->p_stats->p_ru.ru_msgrcv++; -#else /* __APPLE__ */ - /* - * 2207985 - * This should be uio->uio-procp; however, some callers of this - * function use auto variables with stack garbage, and fail to - * fill out the uio structure properly. - */ - if (p) - OSIncrementAtomic(&p->p_stats->p_ru.ru_msgrcv); -#endif /* __APPLE__ */ + OSIncrementAtomicLong(&p->p_stats->p_ru.ru_msgrcv); SBLASTRECORDCHK(&so->so_rcv, "soreceive 1"); SBLASTMBUFCHK(&so->so_rcv, "soreceive 1"); nextrecord = m->m_nextpkt; @@ -2387,7 +2371,6 @@ soreceive(struct socket *so, struct sockaddr **psa, struct uio *uio, flags |= MSG_OOB; } so->so_state &= ~SS_RCVATMARK; - // LP64todo - fix this! len = uio_resid(uio) - delayed_copy_len; if (so->so_oobmark && len > so->so_oobmark - offset) len = so->so_oobmark - offset; @@ -2825,18 +2808,7 @@ sorflush(struct socket *so) if (asb.sb_flags & SB_UNIX) sb->sb_flags |= SB_UNIX; if ((pr->pr_flags & PR_RIGHTS) && pr->pr_domain->dom_dispose) { - boolean_t unp = (pr->pr_domain->dom_dispose == unp_dispose); - /* - * Currently AF_UNIX domain uses a global domain mutex; - * unp_dispose() may end up calling soclose() on another - * AF_UNIX socket and therefore the lock must not be held - * across the call. - */ - if (unp) - socket_unlock(so, 0); (*pr->pr_domain->dom_dispose)(asb.sb_mb); - if (unp) - socket_lock(so, 0); } sbrelease(&asb); } @@ -2868,7 +2840,7 @@ sooptcopyin(struct sockopt *sopt, void *buf, size_t len, size_t minlen) if (valsize > len) sopt->sopt_valsize = valsize = len; - if (sopt->sopt_p != 0) + if (sopt->sopt_p != kernproc) return (copyin(sopt->sopt_val, buf, valsize)); bcopy(CAST_DOWN(caddr_t, sopt->sopt_val), buf, valsize); @@ -2886,17 +2858,21 @@ static int sooptcopyin_timeval(struct sockopt *sopt, struct timeval * tv_p) { int error; - + if (proc_is64bit(sopt->sopt_p)) { - struct timeval64 tv64; + struct user64_timeval tv64; if (sopt->sopt_valsize < sizeof(tv64)) { return (EINVAL); } sopt->sopt_valsize = sizeof(tv64); - error = copyin(sopt->sopt_val, &tv64, sizeof(tv64)); - if (error != 0) { - return (error); + if (sopt->sopt_p != kernproc) { + error = copyin(sopt->sopt_val, &tv64, sizeof(tv64)); + if (error != 0) + return (error); + } else { + bcopy(CAST_DOWN(caddr_t, sopt->sopt_val), &tv64, + sizeof(tv64)); } if (tv64.tv_sec < 0 || tv64.tv_sec > LONG_MAX || tv64.tv_usec < 0 || tv64.tv_usec >= 1000000) { @@ -2905,23 +2881,29 @@ sooptcopyin_timeval(struct sockopt *sopt, struct timeval * tv_p) tv_p->tv_sec = tv64.tv_sec; tv_p->tv_usec = tv64.tv_usec; } else { - if (sopt->sopt_valsize < sizeof(*tv_p)) { + struct user32_timeval tv32; + + if (sopt->sopt_valsize < sizeof(tv32)) { return (EINVAL); } - sopt->sopt_valsize = sizeof(*tv_p); - if (sopt->sopt_p != 0) { - error = copyin(sopt->sopt_val, tv_p, sizeof(*tv_p)); + sopt->sopt_valsize = sizeof(tv32); + if (sopt->sopt_p != kernproc) { + error = copyin(sopt->sopt_val, &tv32, sizeof(tv32)); if (error != 0) { return (error); } } else { - bcopy(CAST_DOWN(caddr_t, sopt->sopt_val), tv_p, - sizeof(*tv_p)); + bcopy(CAST_DOWN(caddr_t, sopt->sopt_val), &tv32, + sizeof(tv32)); } - if (tv_p->tv_sec < 0 || tv_p->tv_sec > LONG_MAX - || tv_p->tv_usec < 0 || tv_p->tv_usec >= 1000000) { +#ifndef __LP64__ // K64todo "comparison is always false due to limited range of data type" + if (tv32.tv_sec < 0 || tv32.tv_sec > LONG_MAX + || tv32.tv_usec < 0 || tv32.tv_usec >= 1000000) { return (EDOM); } +#endif + tv_p->tv_sec = tv32.tv_sec; + tv_p->tv_usec = tv32.tv_usec; } return (0); } @@ -2960,7 +2942,8 @@ sosetopt(struct socket *so, struct sockopt *sopt) socket_lock(so, 1); if ((so->so_state & (SS_CANTRCVMORE | SS_CANTSENDMORE)) - == (SS_CANTRCVMORE | SS_CANTSENDMORE)) { + == (SS_CANTRCVMORE | SS_CANTSENDMORE) && + (so->so_flags & SOF_NPX_SETOPTSHUT) == 0) { /* the socket has been shutdown, no more sockopt's */ error = EINVAL; goto bad; @@ -3066,7 +3049,7 @@ sosetopt(struct socket *so, struct sockopt *sopt) case SO_RCVBUF: if (sbreserve(sopt->sopt_name == SO_SNDBUF ? &so->so_snd : &so->so_rcv, - (u_long) optval) == 0) { + (u_int32_t) optval) == 0) { error = ENOBUFS; goto bad; } @@ -3212,6 +3195,39 @@ sosetopt(struct socket *so, struct sockopt *sopt) break; #endif + case SO_RANDOMPORT: + error = sooptcopyin(sopt, &optval, sizeof (optval), + sizeof (optval)); + if (error) + goto bad; + if (optval) + so->so_flags |= SOF_BINDRANDOMPORT; + else + so->so_flags &= ~SOF_BINDRANDOMPORT; + break; + + case SO_NP_EXTENSIONS: { + struct so_np_extensions sonpx; + + error = sooptcopyin(sopt, &sonpx, sizeof(sonpx), sizeof(sonpx)); + if (error) + goto bad; + if (sonpx.npx_mask & ~SONPX_MASK_VALID) { + error = EINVAL; + goto bad; + } + /* + * Only one bit defined for now + */ + if ((sonpx.npx_mask & SONPX_SETOPTSHUT)) { + if ((sonpx.npx_flags & SONPX_SETOPTSHUT)) + so->so_flags |= SOF_NPX_SETOPTSHUT; + else + so->so_flags &= ~SOF_NPX_SETOPTSHUT; + } + break; + } + default: error = ENOPROTOOPT; break; @@ -3246,7 +3262,7 @@ sooptcopyout(struct sockopt *sopt, void *buf, size_t len) valsize = min(len, sopt->sopt_valsize); sopt->sopt_valsize = valsize; if (sopt->sopt_val != USER_ADDR_NULL) { - if (sopt->sopt_p != 0) + if (sopt->sopt_p != kernproc) error = copyout(buf, sopt->sopt_val, valsize); else bcopy(buf, CAST_DOWN(caddr_t, sopt->sopt_val), valsize); @@ -3259,24 +3275,27 @@ sooptcopyout_timeval(struct sockopt *sopt, const struct timeval * tv_p) { int error; size_t len; - struct timeval64 tv64; + struct user64_timeval tv64; + struct user32_timeval tv32; const void * val; size_t valsize; - + error = 0; if (proc_is64bit(sopt->sopt_p)) { - len = sizeof(struct timeval64); + len = sizeof(tv64); tv64.tv_sec = tv_p->tv_sec; tv64.tv_usec = tv_p->tv_usec; val = &tv64; } else { - len = sizeof(struct timeval); - val = tv_p; + len = sizeof(tv32); + tv32.tv_sec = tv_p->tv_sec; + tv32.tv_usec = tv_p->tv_usec; + val = &tv32; } valsize = min(len, sopt->sopt_valsize); sopt->sopt_valsize = valsize; if (sopt->sopt_val != USER_ADDR_NULL) { - if (sopt->sopt_p != 0) + if (sopt->sopt_p != kernproc) error = copyout(val, sopt->sopt_val, valsize); else bcopy(val, CAST_DOWN(caddr_t, sopt->sopt_val), valsize); @@ -3485,7 +3504,19 @@ sogetopt(struct socket *so, struct sockopt *sopt) optval = (so->so_flags & SOF_UPCALLCLOSEWAIT); goto integer; #endif + case SO_RANDOMPORT: + optval = (so->so_flags & SOF_BINDRANDOMPORT); + goto integer; + + case SO_NP_EXTENSIONS: { + struct so_np_extensions sonpx; + + sonpx.npx_flags = (so->so_flags & SOF_NPX_SETOPTSHUT) ? SONPX_SETOPTSHUT : 0; + sonpx.npx_mask = SONPX_MASK_VALID; + error = sooptcopyout(sopt, &sonpx, sizeof(struct so_np_extensions)); + break; + } default: error = ENOPROTOOPT; break; @@ -3501,15 +3532,17 @@ soopt_getm(struct sockopt *sopt, struct mbuf **mp) { struct mbuf *m, *m_prev; int sopt_size = sopt->sopt_valsize; + int how; if (sopt_size > MAX_SOOPTGETM_SIZE) return (EMSGSIZE); - MGET(m, sopt->sopt_p ? M_WAIT : M_DONTWAIT, MT_DATA); + how = sopt->sopt_p != kernproc ? M_WAIT : M_DONTWAIT; + MGET(m, how, MT_DATA); if (m == 0) return (ENOBUFS); if (sopt_size > MLEN) { - MCLGET(m, sopt->sopt_p ? M_WAIT : M_DONTWAIT); + MCLGET(m, how); if ((m->m_flags & M_EXT) == 0) { m_free(m); return (ENOBUFS); @@ -3523,13 +3556,13 @@ soopt_getm(struct sockopt *sopt, struct mbuf **mp) m_prev = m; while (sopt_size) { - MGET(m, sopt->sopt_p ? M_WAIT : M_DONTWAIT, MT_DATA); + MGET(m, how, MT_DATA); if (m == 0) { m_freem(*mp); return (ENOBUFS); } if (sopt_size > MLEN) { - MCLGET(m, sopt->sopt_p ? M_WAIT : M_DONTWAIT); + MCLGET(m, how); if ((m->m_flags & M_EXT) == 0) { m_freem(*mp); return (ENOBUFS); @@ -3554,7 +3587,7 @@ soopt_mcopyin(struct sockopt *sopt, struct mbuf *m) if (sopt->sopt_val == USER_ADDR_NULL) return (0); while (m != NULL && sopt->sopt_valsize >= m->m_len) { - if (sopt->sopt_p != NULL) { + if (sopt->sopt_p != kernproc) { int error; error = copyin(sopt->sopt_val, mtod(m, char *), @@ -3586,7 +3619,7 @@ soopt_mcopyout(struct sockopt *sopt, struct mbuf *m) if (sopt->sopt_val == USER_ADDR_NULL) return (0); while (m != NULL && sopt->sopt_valsize >= m->m_len) { - if (sopt->sopt_p != NULL) { + if (sopt->sopt_p != kernproc) { int error; error = copyout(mtod(m, char *), sopt->sopt_val, @@ -3686,10 +3719,7 @@ soo_kqfilter(__unused struct fileproc *fp, struct knote *kn, switch (kn->kn_filter) { case EVFILT_READ: - if (so->so_options & SO_ACCEPTCONN) - kn->kn_fop = &solisten_filtops; - else - kn->kn_fop = &soread_filtops; + kn->kn_fop = &soread_filtops; sb = &so->so_rcv; break; case EVFILT_WRITE: @@ -3728,6 +3758,25 @@ filt_soread(struct knote *kn, long hint) if ((hint & SO_FILT_HINT_LOCKED) == 0) socket_lock(so, 1); + if (so->so_options & SO_ACCEPTCONN) { + int isempty; + + /* Radar 6615193 handle the listen case dynamically + * for kqueue read filter. This allows to call listen() after registering + * the kqueue EVFILT_READ. + */ + + kn->kn_data = so->so_qlen; + isempty = ! TAILQ_EMPTY(&so->so_comp); + + if ((hint & SO_FILT_HINT_LOCKED) == 0) + socket_unlock(so, 1); + + return (isempty); + } + + /* socket isn't a listener */ + kn->kn_data = so->so_rcv.sb_cc - so->so_rcv.sb_ctl; if (so->so_oobmark) { @@ -3824,29 +3873,29 @@ filt_sowrite(struct knote *kn, long hint) return (kn->kn_data >= so->so_snd.sb_lowat); } -/*ARGSUSED*/ -static int -filt_solisten(struct knote *kn, long hint) +#define SO_LOCK_HISTORY_STR_LEN (2 * SO_LCKDBG_MAX * (2 + sizeof(void *) + 1) + 1) + +__private_extern__ const char * solockhistory_nr(struct socket *so) { - struct socket *so = (struct socket *)kn->kn_fp->f_fglob->fg_data; - int isempty; + size_t n = 0; + int i; + static char lock_history_str[SO_LOCK_HISTORY_STR_LEN]; - if ((hint & SO_FILT_HINT_LOCKED) == 0) - socket_lock(so, 1); - kn->kn_data = so->so_qlen; - isempty = ! TAILQ_EMPTY(&so->so_comp); - if ((hint & SO_FILT_HINT_LOCKED) == 0) - socket_unlock(so, 1); - return (isempty); + for (i = SO_LCKDBG_MAX - 1; i >= 0; i--) { + n += snprintf(lock_history_str + n, SO_LOCK_HISTORY_STR_LEN - n, "%lx:%lx ", + (uintptr_t) so->lock_lr[(so->next_lock_lr + i) % SO_LCKDBG_MAX], + (uintptr_t) so->unlock_lr[(so->next_unlock_lr + i) % SO_LCKDBG_MAX]); + } + return lock_history_str; } - int socket_lock(struct socket *so, int refcount) { - int error = 0, lr_saved; + int error = 0; + void *lr_saved; - lr_saved = (unsigned int) __builtin_return_address(0); + lr_saved = __builtin_return_address(0); if (so->so_proto->pr_lock) { error = (*so->so_proto->pr_lock)(so, refcount, lr_saved); @@ -3858,7 +3907,7 @@ socket_lock(struct socket *so, int refcount) lck_mtx_lock(so->so_proto->pr_domain->dom_mtx); if (refcount) so->so_usecount++; - so->lock_lr[so->next_lock_lr] = (u_int32_t)lr_saved; + so->lock_lr[so->next_lock_lr] = lr_saved; so->next_lock_lr = (so->next_lock_lr+1) % SO_LCKDBG_MAX; } @@ -3868,10 +3917,11 @@ socket_lock(struct socket *so, int refcount) int socket_unlock(struct socket *so, int refcount) { - int error = 0, lr_saved; + int error = 0; + void *lr_saved; lck_mtx_t *mutex_held; - lr_saved = (unsigned int) __builtin_return_address(0); + lr_saved = __builtin_return_address(0); if (so->so_proto == NULL) panic("socket_unlock null so_proto so=%p\n", so); @@ -3883,13 +3933,16 @@ socket_unlock(struct socket *so, int refcount) #ifdef MORE_LOCKING_DEBUG lck_mtx_assert(mutex_held, LCK_MTX_ASSERT_OWNED); #endif - so->unlock_lr[so->next_unlock_lr] = (u_int32_t)lr_saved; + so->unlock_lr[so->next_unlock_lr] = lr_saved; so->next_unlock_lr = (so->next_unlock_lr+1) % SO_LCKDBG_MAX; if (refcount) { if (so->so_usecount <= 0) - panic("socket_unlock: bad refcount so=%p " - "value=%d\n", so, so->so_usecount); + panic("socket_unlock: bad refcount=%d so=%p (%d, %d, %d) lrh=%s", + so->so_usecount, so, so->so_proto->pr_domain->dom_family, + so->so_type, so->so_proto->pr_protocol, + solockhistory_nr(so)); + so->so_usecount--; if (so->so_usecount == 0) { sofreelastref(so, 1); @@ -3943,3 +3996,16 @@ somultipages(struct socket *so, boolean_t set) else so->so_flags &= ~SOF_MULTIPAGES; } + +int +so_isdstlocal(struct socket *so) { + + struct inpcb *inp = (struct inpcb *)so->so_pcb; + + if (so->so_proto->pr_domain->dom_family == AF_INET) { + return inaddr_local(inp->inp_faddr); + } else if (so->so_proto->pr_domain->dom_family == AF_INET6) { + return in6addr_local(&inp->in6p_faddr); + } + return 0; +} diff --git a/bsd/kern/uipc_socket2.c b/bsd/kern/uipc_socket2.c index 41a606ca3..c53150e51 100644 --- a/bsd/kern/uipc_socket2.c +++ b/bsd/kern/uipc_socket2.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 1998-2007 Apple Inc. All rights reserved. + * Copyright (c) 1998-2008 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -113,9 +113,15 @@ static int sbappendcontrol_internal(struct sockbuf *, struct mbuf *, static int soqlimitcompat = 1; static int soqlencomp = 0; -u_long sb_max = SB_MAX; /* XXX should be static */ +/* Based on the number of mbuf clusters configured, high_sb_max and sb_max can get + * scaled up or down to suit that memory configuration. high_sb_max is a higher + * limit on sb_max that is checked when sb_max gets set through sysctl. + */ + +u_int32_t sb_max = SB_MAX; /* XXX should be static */ +u_int32_t high_sb_max = SB_MAX; -static u_long sb_efficiency = 8; /* parameter for sbreserve() */ +static u_int32_t sb_efficiency = 8; /* parameter for sbreserve() */ __private_extern__ unsigned int total_mb_cnt = 0; __private_extern__ unsigned int total_cl_cnt = 0; __private_extern__ int sbspace_factor = 8; @@ -260,7 +266,7 @@ sonewconn_internal(struct socket *head, int connstatus) if (so_qlen >= (soqlimitcompat ? head->so_qlimit : (3 * head->so_qlimit / 2))) return ((struct socket *)0); - so = soalloc(M_NOWAIT, head->so_proto->pr_domain->dom_family, + so = soalloc(1, head->so_proto->pr_domain->dom_family, head->so_type); if (so == NULL) return ((struct socket *)0); @@ -279,7 +285,13 @@ sonewconn_internal(struct socket *head, int connstatus) so->so_timeo = head->so_timeo; so->so_pgid = head->so_pgid; so->so_uid = head->so_uid; - so->so_flags = head->so_flags & (SOF_REUSESHAREUID|SOF_NOTIFYCONFLICT); /* inherit SO_REUSESHAREUID and SO_NOTIFYCONFLICT ocket options */ + /* inherit socket options stored in so_flags */ + so->so_flags = head->so_flags & (SOF_NOSIGPIPE | + SOF_NOADDRAVAIL | + SOF_REUSESHAREUID | + SOF_NOTIFYCONFLICT | + SOF_BINDRANDOMPORT | + SOF_NPX_SETOPTSHUT); so->so_usecount = 1; so->next_lock_lr = 0; so->next_unlock_lr = 0; @@ -411,12 +423,13 @@ socantrcvmore(struct socket *so) int sbwait(struct sockbuf *sb) { - int error = 0, lr_saved; + int error = 0; + uintptr_t lr_saved; struct socket *so = sb->sb_so; lck_mtx_t *mutex_held; struct timespec ts; - lr_saved = (unsigned int) __builtin_return_address(0); + lr_saved = (uintptr_t) __builtin_return_address(0); if (so->so_proto->pr_getlock != NULL) mutex_held = (*so->so_proto->pr_getlock)(so, 0); @@ -564,7 +577,7 @@ sowakeup(struct socket *so, struct sockbuf *sb) * ENOBUFS */ int -soreserve(struct socket *so, u_long sndcc, u_long rcvcc) +soreserve(struct socket *so, u_int32_t sndcc, u_int32_t rcvcc) { if (sbreserve(&so->so_snd, sndcc) == 0) @@ -593,7 +606,7 @@ soreserve(struct socket *so, u_long sndcc, u_long rcvcc) * if buffering efficiency is near the normal case. */ int -sbreserve(struct sockbuf *sb, u_long cc) +sbreserve(struct sockbuf *sb, u_int32_t cc) { if ((u_quad_t)cc > (u_quad_t)sb_max * MCLBYTES / (MSIZE + MCLBYTES)) return (0); @@ -724,7 +737,7 @@ sbcheck(struct sockbuf *sb) { struct mbuf *m; struct mbuf *n = 0; - u_long len = 0, mbcnt = 0; + u_int32_t len = 0, mbcnt = 0; lck_mtx_t *mutex_held; if (sb->sb_so->so_proto->pr_getlock != NULL) @@ -1215,7 +1228,7 @@ sb_empty_assert(struct sockbuf *sb, const char *where) { if (!(sb->sb_cc == 0 && sb->sb_mb == NULL && sb->sb_mbcnt == 0 && sb->sb_mbtail == NULL && sb->sb_lastrecord == NULL)) { - panic("%s: sb %p so %p cc %ld mbcnt %ld mb %p mbtail %p " + panic("%s: sb %p so %p cc %d mbcnt %d mb %p mbtail %p " "lastrecord %p\n", where, sb, sb->sb_so, sb->sb_cc, sb->sb_mbcnt, sb->sb_mb, sb->sb_mbtail, sb->sb_lastrecord); /* NOTREACHED */ @@ -1436,7 +1449,7 @@ pru_connect2_notsupp(__unused struct socket *so1, __unused struct socket *so2) } int -pru_control_notsupp(__unused struct socket *so, __unused u_long cmd, +pru_control_notsupp(__unused struct socket *so, __unused u_long cmd, __unused caddr_t data, __unused struct ifnet *ifp, __unused struct proc *p) { return (EOPNOTSUPP); @@ -1570,13 +1583,18 @@ sb_notify(struct sockbuf *sb) * How much space is there in a socket buffer (so->so_snd or so->so_rcv)? * This is problematical if the fields are unsigned, as the space might * still be negative (cc > hiwat or mbcnt > mbmax). Should detect - * overflow and return 0. Should use "lmin" but it doesn't exist now. + * overflow and return 0. */ -long +int sbspace(struct sockbuf *sb) { - return ((long)imin((int)(sb->sb_hiwat - sb->sb_cc), - (int)(sb->sb_mbmax - sb->sb_mbcnt))); + int space = + imin((int)(sb->sb_hiwat - sb->sb_cc), + (int)(sb->sb_mbmax - sb->sb_mbcnt)); + if (space < 0) + space = 0; + + return space; } /* do we have to send all at once on a socket? */ @@ -1600,7 +1618,7 @@ soreadable(struct socket *so) int sowriteable(struct socket *so) { - return ((sbspace(&(so)->so_snd) >= (long)(so)->so_snd.sb_lowat && + return ((sbspace(&(so)->so_snd) >= (so)->so_snd.sb_lowat && ((so->so_state&SS_ISCONNECTED) || (so->so_proto->pr_flags&PR_CONNREQUIRED) == 0)) || (so->so_state & SS_CANTSENDMORE) || @@ -1623,7 +1641,7 @@ sballoc(struct sockbuf *sb, struct mbuf *m) sb->sb_mbcnt += m->m_ext.ext_size; cnt += m->m_ext.ext_size / MSIZE ; } - OSAddAtomic(cnt, (SInt32*)&total_mb_cnt); + OSAddAtomic(cnt, &total_mb_cnt); } /* adjust counters in sb reflecting freeing of m */ @@ -1640,7 +1658,7 @@ sbfree(struct sockbuf *sb, struct mbuf *m) sb->sb_mbcnt -= m->m_ext.ext_size; cnt -= m->m_ext.ext_size / MSIZE ; } - OSAddAtomic(cnt, (SInt32*)&total_mb_cnt); + OSAddAtomic(cnt, &total_mb_cnt); } /* @@ -1670,36 +1688,37 @@ void sbunlock(struct sockbuf *sb, int keeplocked) { struct socket *so = sb->sb_so; - int lr_saved; + void *lr_saved; lck_mtx_t *mutex_held; - lr_saved = (unsigned int) __builtin_return_address(0); + lr_saved = __builtin_return_address(0); sb->sb_flags &= ~SB_LOCK; if (sb->sb_flags & SB_WANT) { sb->sb_flags &= ~SB_WANT; - if (so->so_usecount < 0) - panic("sbunlock: b4 wakeup so=%p ref=%d lr=%x " - "sb_flags=%x\n", sb->sb_so, so->so_usecount, - lr_saved, sb->sb_flags); - + if (so->so_usecount < 0) { + panic("sbunlock: b4 wakeup so=%p ref=%d lr=%p " + "sb_flags=%x lrh= %s\n", sb->sb_so, so->so_usecount, + lr_saved, sb->sb_flags, solockhistory_nr(so)); + /* NOTREACHED */ + } wakeup((caddr_t)&(sb)->sb_flags); } if (keeplocked == 0) { /* unlock on exit */ - if (so->so_proto->pr_getlock != NULL) + if (so->so_proto->pr_getlock != NULL) mutex_held = (*so->so_proto->pr_getlock)(so, 0); - else + else mutex_held = so->so_proto->pr_domain->dom_mtx; - + lck_mtx_assert(mutex_held, LCK_MTX_ASSERT_OWNED); so->so_usecount--; if (so->so_usecount < 0) - panic("sbunlock: unlock on exit so=%p ref=%d lr=%x " - "sb_flags=%x\n", so, so->so_usecount, lr_saved, - sb->sb_flags); - so->unlock_lr[so->next_unlock_lr] = (u_int32_t)lr_saved; + panic("sbunlock: unlock on exit so=%p ref=%d lr=%p " + "sb_flags=%x lrh= %s\n", so, so->so_usecount, lr_saved, + sb->sb_flags, solockhistory_nr(so)); + so->unlock_lr[so->next_unlock_lr] = lr_saved; so->next_unlock_lr = (so->next_unlock_lr+1) % SO_LCKDBG_MAX; lck_mtx_unlock(mutex_held); } @@ -1747,12 +1766,12 @@ void sotoxsocket(struct socket *so, struct xsocket *xso) { xso->xso_len = sizeof (*xso); - xso->xso_so = so; + xso->xso_so = (_XSOCKET_PTR(struct socket *))(uintptr_t)so; xso->so_type = so->so_type; xso->so_options = so->so_options; xso->so_linger = so->so_linger; xso->so_state = so->so_state; - xso->so_pcb = so->so_pcb; + xso->so_pcb = (_XSOCKET_PTR(caddr_t))(uintptr_t)so->so_pcb; if (so->so_proto) { xso->xso_protocol = so->so_proto->pr_protocol; xso->xso_family = so->so_proto->pr_domain->dom_family; @@ -1771,6 +1790,39 @@ sotoxsocket(struct socket *so, struct xsocket *xso) xso->so_uid = so->so_uid; } + +#if !CONFIG_EMBEDDED + +void +sotoxsocket64(struct socket *so, struct xsocket64 *xso) +{ + xso->xso_len = sizeof (*xso); + xso->xso_so = (u_int64_t)(uintptr_t)so; + xso->so_type = so->so_type; + xso->so_options = so->so_options; + xso->so_linger = so->so_linger; + xso->so_state = so->so_state; + xso->so_pcb = (u_int64_t)(uintptr_t)so->so_pcb; + if (so->so_proto) { + xso->xso_protocol = so->so_proto->pr_protocol; + xso->xso_family = so->so_proto->pr_domain->dom_family; + } else { + xso->xso_protocol = xso->xso_family = 0; + } + xso->so_qlen = so->so_qlen; + xso->so_incqlen = so->so_incqlen; + xso->so_qlimit = so->so_qlimit; + xso->so_timeo = so->so_timeo; + xso->so_error = so->so_error; + xso->so_pgid = so->so_pgid; + xso->so_oobmark = so->so_oobmark; + sbtoxsockbuf(&so->so_snd, &xso->so_snd); + sbtoxsockbuf(&so->so_rcv, &xso->so_rcv); + xso->so_uid = so->so_uid; +} + +#endif /* !CONFIG_EMBEDDED */ + /* * This does the same for sockbufs. Note that the xsockbuf structure, * since it is always embedded in a socket, does not include a self @@ -1786,7 +1838,7 @@ sbtoxsockbuf(struct sockbuf *sb, struct xsockbuf *xsb) xsb->sb_mbmax = sb->sb_mbmax; xsb->sb_lowat = sb->sb_lowat; xsb->sb_flags = sb->sb_flags; - xsb->sb_timeo = (u_long) + xsb->sb_timeo = (short) (sb->sb_timeo.tv_sec * hz) + sb->sb_timeo.tv_usec / tick; if (xsb->sb_timeo == 0 && sb->sb_timeo.tv_usec != 0) xsb->sb_timeo = 1; @@ -1798,12 +1850,30 @@ sbtoxsockbuf(struct sockbuf *sb, struct xsockbuf *xsb) */ SYSCTL_NODE(_kern, KERN_IPC, ipc, CTLFLAG_RW|CTLFLAG_LOCKED, 0, "IPC"); -/* This takes the place of kern.maxsockbuf, which moved to kern.ipc. */ -static int dummy; -SYSCTL_INT(_kern, KERN_DUMMY, dummy, CTLFLAG_RW, &dummy, 0, ""); +/* Check that the maximum socket buffer size is within a range */ + +static int +sysctl_sb_max(__unused struct sysctl_oid *oidp, __unused void *arg1, + __unused int arg2, struct sysctl_req *req) +{ + u_int32_t new_value; + int changed = 0; + int error = sysctl_io_number(req, sb_max, sizeof(u_int32_t), &new_value, + &changed); + if (!error && changed) { + if (new_value > LOW_SB_MAX && + new_value <= high_sb_max ) { + sb_max = new_value; + } else { + error = ERANGE; + } + } + return error; +} + +SYSCTL_PROC(_kern_ipc, KIPC_MAXSOCKBUF, maxsockbuf, CTLTYPE_INT | CTLFLAG_RW, + &sb_max, 0, &sysctl_sb_max, "IU", "Maximum socket buffer size"); -SYSCTL_INT(_kern_ipc, KIPC_MAXSOCKBUF, maxsockbuf, CTLFLAG_RW, - &sb_max, 0, "Maximum socket buffer size"); SYSCTL_INT(_kern_ipc, OID_AUTO, maxsockets, CTLFLAG_RD, &maxsockets, 0, "Maximum number of sockets avaliable"); SYSCTL_INT(_kern_ipc, KIPC_SOCKBUF_WASTE, sockbuf_waste_factor, CTLFLAG_RW, diff --git a/bsd/kern/uipc_syscalls.c b/bsd/kern/uipc_syscalls.c index 7e9cafa35..94baa5bbc 100644 --- a/bsd/kern/uipc_syscalls.c +++ b/bsd/kern/uipc_syscalls.c @@ -87,7 +87,7 @@ #include #include -#include +#include #include #include @@ -133,9 +133,9 @@ static pid_t last_pid_4056224 = 0; int falloc_locked(proc_t, struct fileproc **, int *, vfs_context_t, int); static int sendit(struct proc *, int, struct user_msghdr *, uio_t, int, - register_t *); + int32_t *); static int recvit(struct proc *, int, struct user_msghdr *, uio_t, user_addr_t, - register_t *); + int32_t *); static int getsockaddr(struct socket *, struct sockaddr **, user_addr_t, size_t, boolean_t); static int getsockaddr_s(struct socket *, struct sockaddr_storage *, @@ -166,7 +166,7 @@ extern struct fileops socketops; * socreate:??? [other protocol families, IPSEC] */ int -socket(struct proc *p, struct socket_args *uap, register_t *retval) +socket(struct proc *p, struct socket_args *uap, int32_t *retval) { struct socket *so; struct fileproc *fp; @@ -230,7 +230,7 @@ socket(struct proc *p, struct socket_args *uap, register_t *retval) */ /* ARGSUSED */ int -bind(__unused proc_t p, struct bind_args *uap, __unused register_t *retval) +bind(__unused proc_t p, struct bind_args *uap, __unused int32_t *retval) { struct sockaddr_storage ss; struct sockaddr *sa = NULL; @@ -287,7 +287,7 @@ bind(__unused proc_t p, struct bind_args *uap, __unused register_t *retval) */ int listen(__unused struct proc *p, struct listen_args *uap, - __unused register_t *retval) + __unused int32_t *retval) { int error; struct socket *so; @@ -332,7 +332,7 @@ listen(__unused struct proc *p, struct listen_args *uap, */ int accept_nocancel(struct proc *p, struct accept_nocancel_args *uap, - register_t *retval) + int32_t *retval) { struct fileproc *fp; struct sockaddr *sa = NULL; @@ -498,8 +498,6 @@ accept_nocancel(struct proc *p, struct accept_nocancel_args *uap, namelen = 0; if (uap->name) goto gotnoname; - if (dosocklock) - socket_unlock(so, 1); error = 0; goto releasefd; } @@ -521,6 +519,7 @@ accept_nocancel(struct proc *p, struct accept_nocancel_args *uap, } FREE(sa, M_SONAME); +releasefd: /* * If the socket has been marked as inactive by soacceptfilter(), * disallow further operations on it. We explicitly call shutdown @@ -537,7 +536,6 @@ accept_nocancel(struct proc *p, struct accept_nocancel_args *uap, if (dosocklock) socket_unlock(so, 1); -releasefd: proc_fdlock(p); procfdtbl_releasefd(p, newfd, NULL); fp_drop(p, newfd, fp, 1); @@ -549,7 +547,7 @@ accept_nocancel(struct proc *p, struct accept_nocancel_args *uap, } int -accept(struct proc *p, struct accept_args *uap, register_t *retval) +accept(struct proc *p, struct accept_args *uap, int32_t *retval) { __pthread_testcancel(1); return(accept_nocancel(p, (struct accept_nocancel_args *)uap, retval)); @@ -579,14 +577,14 @@ accept(struct proc *p, struct accept_args *uap, register_t *retval) */ /* ARGSUSED */ int -connect(struct proc *p, struct connect_args *uap, register_t *retval) +connect(struct proc *p, struct connect_args *uap, int32_t *retval) { __pthread_testcancel(1); return(connect_nocancel(p, (struct connect_nocancel_args *)uap, retval)); } int -connect_nocancel(__unused proc_t p, struct connect_nocancel_args *uap, __unused register_t *retval) +connect_nocancel(__unused proc_t p, struct connect_nocancel_args *uap, __unused int32_t *retval) { struct socket *so; struct sockaddr_storage ss; @@ -700,7 +698,7 @@ connect_nocancel(__unused proc_t p, struct connect_nocancel_args *uap, __unused */ int socketpair(struct proc *p, struct socketpair_args *uap, - __unused register_t *retval) + __unused int32_t *retval) { struct fileproc *fp1, *fp2; struct socket *so1, *so2; @@ -810,7 +808,7 @@ socketpair(struct proc *p, struct socketpair_args *uap, */ static int sendit(struct proc *p, int s, struct user_msghdr *mp, uio_t uiop, - int flags, register_t *retval) + int flags, int32_t *retval) { struct mbuf *control = NULL; struct sockaddr_storage ss; @@ -898,14 +896,14 @@ sendit(struct proc *p, int s, struct user_msghdr *mp, uio_t uiop, * write:??? [4056224: applicable for pipes] */ int -sendto(struct proc *p, struct sendto_args *uap, register_t *retval) +sendto(struct proc *p, struct sendto_args *uap, int32_t *retval) { __pthread_testcancel(1); return(sendto_nocancel(p, (struct sendto_nocancel_args *)uap, retval)); } int -sendto_nocancel(struct proc *p, struct sendto_nocancel_args *uap, register_t *retval) +sendto_nocancel(struct proc *p, struct sendto_nocancel_args *uap, int32_t *retval) { struct user_msghdr msg; int error; @@ -984,34 +982,32 @@ sendto_nocancel(struct proc *p, struct sendto_nocancel_args *uap, register_t *re * sendit:??? [see sendit definition in this file] */ int -sendmsg(struct proc *p, struct sendmsg_args *uap, register_t *retval) +sendmsg(struct proc *p, struct sendmsg_args *uap, int32_t *retval) { __pthread_testcancel(1); return(sendmsg_nocancel(p, (struct sendmsg_nocancel_args *)uap, retval)); } int -sendmsg_nocancel(struct proc *p, struct sendmsg_nocancel_args *uap, register_t *retval) +sendmsg_nocancel(struct proc *p, struct sendmsg_nocancel_args *uap, int32_t *retval) { - struct msghdr msg; + struct user32_msghdr msg32; + struct user64_msghdr msg64; struct user_msghdr user_msg; caddr_t msghdrp; int size_of_msghdr; int error; - int size_of_iovec; uio_t auio = NULL; struct user_iovec *iovp; KERNEL_DEBUG(DBG_FNC_SENDMSG | DBG_FUNC_START, 0, 0, 0, 0, 0); AUDIT_ARG(fd, uap->s); if (IS_64BIT_PROCESS(p)) { - msghdrp = (caddr_t)&user_msg; - size_of_msghdr = sizeof (user_msg); - size_of_iovec = sizeof (struct user_iovec); + msghdrp = (caddr_t)&msg64; + size_of_msghdr = sizeof (msg64); } else { - msghdrp = (caddr_t)&msg; - size_of_msghdr = sizeof (msg); - size_of_iovec = sizeof (struct iovec); + msghdrp = (caddr_t)&msg32; + size_of_msghdr = sizeof (msg32); } error = copyin(uap->msg, msghdrp, size_of_msghdr); if (error) { @@ -1019,15 +1015,22 @@ sendmsg_nocancel(struct proc *p, struct sendmsg_nocancel_args *uap, register_t * return (error); } - /* only need to copy if user process is not 64-bit */ - if (!IS_64BIT_PROCESS(p)) { - user_msg.msg_flags = msg.msg_flags; - user_msg.msg_controllen = msg.msg_controllen; - user_msg.msg_control = CAST_USER_ADDR_T(msg.msg_control); - user_msg.msg_iovlen = msg.msg_iovlen; - user_msg.msg_iov = CAST_USER_ADDR_T(msg.msg_iov); - user_msg.msg_namelen = msg.msg_namelen; - user_msg.msg_name = CAST_USER_ADDR_T(msg.msg_name); + if (IS_64BIT_PROCESS(p)) { + user_msg.msg_flags = msg64.msg_flags; + user_msg.msg_controllen = msg64.msg_controllen; + user_msg.msg_control = msg64.msg_control; + user_msg.msg_iovlen = msg64.msg_iovlen; + user_msg.msg_iov = msg64.msg_iov; + user_msg.msg_namelen = msg64.msg_namelen; + user_msg.msg_name = msg64.msg_name; + } else { + user_msg.msg_flags = msg32.msg_flags; + user_msg.msg_controllen = msg32.msg_controllen; + user_msg.msg_control = msg32.msg_control; + user_msg.msg_iovlen = msg32.msg_iovlen; + user_msg.msg_iov = msg32.msg_iov; + user_msg.msg_namelen = msg32.msg_namelen; + user_msg.msg_name = msg32.msg_name; } if (user_msg.msg_iovlen <= 0 || user_msg.msg_iovlen > UIO_MAXIOV) { @@ -1055,8 +1058,9 @@ sendmsg_nocancel(struct proc *p, struct sendmsg_nocancel_args *uap, register_t * error = ENOBUFS; goto done; } - error = copyin(user_msg.msg_iov, (caddr_t)iovp, - (user_msg.msg_iovlen * size_of_iovec)); + error = copyin_user_iovec_array(user_msg.msg_iov, + IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32, + user_msg.msg_iovlen, iovp); if (error) goto done; user_msg.msg_iov = CAST_USER_ADDR_T(iovp); @@ -1104,7 +1108,7 @@ sendmsg_nocancel(struct proc *p, struct sendmsg_nocancel_args *uap, register_t * */ static int recvit(struct proc *p, int s, struct user_msghdr *mp, uio_t uiop, - user_addr_t namelenp, register_t *retval) + user_addr_t namelenp, int32_t *retval) { int len, error; struct mbuf *m, *control = 0; @@ -1155,7 +1159,9 @@ recvit(struct proc *p, int s, struct user_msghdr *mp, uio_t uiop, error = so->so_proto->pr_usrreqs->pru_soreceive(so, &fromsa, uiop, (struct mbuf **)0, mp->msg_control ? &control : (struct mbuf **)0, &mp->msg_flags); - AUDIT_ARG(sockaddr, vfs_context_cwd(vfs_context_current()), fromsa); + if (fromsa) + AUDIT_ARG(sockaddr, vfs_context_cwd(vfs_context_current()), + fromsa); if (error) { if (uio_resid(uiop) != len && (error == ERESTART || error == EINTR || error == EWOULDBLOCK)) @@ -1198,19 +1204,62 @@ recvit(struct proc *p, int s, struct user_msghdr *mp, uio_t uiop, while (m && len > 0) { unsigned int tocopy; + struct cmsghdr *cp = mtod(m, struct cmsghdr *); + + /* + * SCM_TIMESTAMP hack because struct timeval has a + * different size for 32 bits and 64 bits processes + */ + if (cp->cmsg_level == SOL_SOCKET && cp->cmsg_type == SCM_TIMESTAMP) { + unsigned char tmp_buffer[CMSG_SPACE(sizeof(struct user64_timeval))]; + struct cmsghdr *tmp_cp = (struct cmsghdr *)tmp_buffer; + int tmp_space; + struct timeval *tv = (struct timeval *)CMSG_DATA(cp); + + tmp_cp->cmsg_level = SOL_SOCKET; + tmp_cp->cmsg_type = SCM_TIMESTAMP; + + if (proc_is64bit(p)) { + struct user64_timeval *tv64 = (struct user64_timeval *)CMSG_DATA(tmp_cp); + + tv64->tv_sec = tv->tv_sec; + tv64->tv_usec = tv->tv_usec; + + tmp_cp->cmsg_len = CMSG_LEN(sizeof(struct user64_timeval)); + tmp_space = CMSG_SPACE(sizeof(struct user64_timeval)); + } else { + struct user32_timeval *tv32 = (struct user32_timeval *)CMSG_DATA(tmp_cp); + + tv32->tv_sec = tv->tv_sec; + tv32->tv_usec = tv->tv_usec; + + tmp_cp->cmsg_len = CMSG_LEN(sizeof(struct user32_timeval)); + tmp_space = CMSG_SPACE(sizeof(struct user32_timeval)); + } + if (len >= tmp_space) { + tocopy = tmp_space; + } else { + mp->msg_flags |= MSG_CTRUNC; + tocopy = len; + } + error = copyout(tmp_buffer, ctlbuf, tocopy); + if (error) + goto out; - if (len >= m->m_len) { - tocopy = m->m_len; } else { - mp->msg_flags |= MSG_CTRUNC; - tocopy = len; + if (len >= m->m_len) { + tocopy = m->m_len; + } else { + mp->msg_flags |= MSG_CTRUNC; + tocopy = len; + } + + error = copyout((caddr_t)mtod(m, caddr_t), ctlbuf, + tocopy); + if (error) + goto out; } - error = copyout((caddr_t)mtod(m, caddr_t), ctlbuf, - tocopy); - if (error) - goto out; - ctlbuf += tocopy; len -= tocopy; m = m->m_next; @@ -1245,14 +1294,14 @@ recvit(struct proc *p, int s, struct user_msghdr *mp, uio_t uiop, * the block header for the recvit function. */ int -recvfrom(struct proc *p, struct recvfrom_args *uap, register_t *retval) +recvfrom(struct proc *p, struct recvfrom_args *uap, int32_t *retval) { __pthread_testcancel(1); return(recvfrom_nocancel(p, (struct recvfrom_nocancel_args *)uap, retval)); } int -recvfrom_nocancel(struct proc *p, struct recvfrom_nocancel_args *uap, register_t *retval) +recvfrom_nocancel(struct proc *p, struct recvfrom_nocancel_args *uap, int32_t *retval) { struct user_msghdr msg; int error; @@ -1342,35 +1391,33 @@ recvfrom_nocancel(struct proc *p, struct recvfrom_nocancel_args *uap, register_t * the block header for the recvit function. */ int -recvmsg(struct proc *p, struct recvmsg_args *uap, register_t *retval) +recvmsg(struct proc *p, struct recvmsg_args *uap, int32_t *retval) { __pthread_testcancel(1); return(recvmsg_nocancel(p, (struct recvmsg_nocancel_args *)uap, retval)); } int -recvmsg_nocancel(struct proc *p, struct recvmsg_nocancel_args *uap, register_t *retval) +recvmsg_nocancel(struct proc *p, struct recvmsg_nocancel_args *uap, int32_t *retval) { - struct msghdr msg; + struct user32_msghdr msg32; + struct user64_msghdr msg64; struct user_msghdr user_msg; caddr_t msghdrp; int size_of_msghdr; user_addr_t uiov; int error; - int size_of_iovec; uio_t auio = NULL; struct user_iovec *iovp; KERNEL_DEBUG(DBG_FNC_RECVMSG | DBG_FUNC_START, 0, 0, 0, 0, 0); AUDIT_ARG(fd, uap->s); if (IS_64BIT_PROCESS(p)) { - msghdrp = (caddr_t)&user_msg; - size_of_msghdr = sizeof (user_msg); - size_of_iovec = sizeof (struct user_iovec); + msghdrp = (caddr_t)&msg64; + size_of_msghdr = sizeof (msg64); } else { - msghdrp = (caddr_t)&msg; - size_of_msghdr = sizeof (msg); - size_of_iovec = sizeof (struct iovec); + msghdrp = (caddr_t)&msg32; + size_of_msghdr = sizeof (msg32); } error = copyin(uap->msg, msghdrp, size_of_msghdr); if (error) { @@ -1379,14 +1426,22 @@ recvmsg_nocancel(struct proc *p, struct recvmsg_nocancel_args *uap, register_t * } /* only need to copy if user process is not 64-bit */ - if (!IS_64BIT_PROCESS(p)) { - user_msg.msg_flags = msg.msg_flags; - user_msg.msg_controllen = msg.msg_controllen; - user_msg.msg_control = CAST_USER_ADDR_T(msg.msg_control); - user_msg.msg_iovlen = msg.msg_iovlen; - user_msg.msg_iov = CAST_USER_ADDR_T(msg.msg_iov); - user_msg.msg_namelen = msg.msg_namelen; - user_msg.msg_name = CAST_USER_ADDR_T(msg.msg_name); + if (IS_64BIT_PROCESS(p)) { + user_msg.msg_flags = msg64.msg_flags; + user_msg.msg_controllen = msg64.msg_controllen; + user_msg.msg_control = msg64.msg_control; + user_msg.msg_iovlen = msg64.msg_iovlen; + user_msg.msg_iov = msg64.msg_iov; + user_msg.msg_namelen = msg64.msg_namelen; + user_msg.msg_name = msg64.msg_name; + } else { + user_msg.msg_flags = msg32.msg_flags; + user_msg.msg_controllen = msg32.msg_controllen; + user_msg.msg_control = msg32.msg_control; + user_msg.msg_iovlen = msg32.msg_iovlen; + user_msg.msg_iov = msg32.msg_iov; + user_msg.msg_namelen = msg32.msg_namelen; + user_msg.msg_name = msg32.msg_name; } if (user_msg.msg_iovlen <= 0 || user_msg.msg_iovlen > UIO_MAXIOV) { @@ -1417,8 +1472,9 @@ recvmsg_nocancel(struct proc *p, struct recvmsg_nocancel_args *uap, register_t * } uiov = user_msg.msg_iov; user_msg.msg_iov = CAST_USER_ADDR_T(iovp); - error = copyin(uiov, (caddr_t)iovp, - (user_msg.msg_iovlen * size_of_iovec)); + error = copyin_user_iovec_array(uiov, + IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32, + user_msg.msg_iovlen, iovp); if (error) goto done; @@ -1428,18 +1484,22 @@ recvmsg_nocancel(struct proc *p, struct recvmsg_nocancel_args *uap, register_t * error = recvit(p, uap->s, &user_msg, auio, 0, retval); if (!error) { user_msg.msg_iov = uiov; - /* only need to copy if user process is not 64-bit */ - if (!IS_64BIT_PROCESS(p)) { - // LP64todo - do all these change? if not, then no need to copy all of them! - msg.msg_flags = user_msg.msg_flags; - msg.msg_controllen = user_msg.msg_controllen; - msg.msg_control = - CAST_DOWN(caddr_t, user_msg.msg_control); - msg.msg_iovlen = user_msg.msg_iovlen; - msg.msg_iov = (struct iovec *) - CAST_DOWN(caddr_t, user_msg.msg_iov); - msg.msg_namelen = user_msg.msg_namelen; - msg.msg_name = CAST_DOWN(caddr_t, user_msg.msg_name); + if (IS_64BIT_PROCESS(p)) { + msg64.msg_flags = user_msg.msg_flags; + msg64.msg_controllen = user_msg.msg_controllen; + msg64.msg_control = user_msg.msg_control; + msg64.msg_iovlen = user_msg.msg_iovlen; + msg64.msg_iov = user_msg.msg_iov; + msg64.msg_namelen = user_msg.msg_namelen; + msg64.msg_name = user_msg.msg_name; + } else { + msg32.msg_flags = user_msg.msg_flags; + msg32.msg_controllen = user_msg.msg_controllen; + msg32.msg_control = user_msg.msg_control; + msg32.msg_iovlen = user_msg.msg_iovlen; + msg32.msg_iov = user_msg.msg_iov; + msg32.msg_namelen = user_msg.msg_namelen; + msg32.msg_name = user_msg.msg_name; } error = copyout(msghdrp, uap->msg, size_of_msghdr); } @@ -1474,7 +1534,7 @@ recvmsg_nocancel(struct proc *p, struct recvmsg_nocancel_args *uap, register_t * /* ARGSUSED */ int shutdown(__unused struct proc *p, struct shutdown_args *uap, - __unused register_t *retval) + __unused int32_t *retval) { struct socket *so; int error; @@ -1511,7 +1571,7 @@ shutdown(__unused struct proc *p, struct shutdown_args *uap, /* ARGSUSED */ int setsockopt(struct proc *p, struct setsockopt_args *uap, - __unused register_t *retval) + __unused int32_t *retval) { struct socket *so; struct sockopt sopt; @@ -1563,7 +1623,7 @@ setsockopt(struct proc *p, struct setsockopt_args *uap, */ int getsockopt(struct proc *p, struct getsockopt_args *uap, - __unused register_t *retval) + __unused int32_t *retval) { int error; socklen_t valsize; @@ -1627,7 +1687,7 @@ getsockopt(struct proc *p, struct getsockopt_args *uap, /* ARGSUSED */ int getsockname(__unused struct proc *p, struct getsockname_args *uap, - __unused register_t *retval) + __unused int32_t *retval) { struct socket *so; struct sockaddr *sa; @@ -1714,7 +1774,7 @@ getsockname(__unused struct proc *p, struct getsockname_args *uap, /* ARGSUSED */ int getpeername(__unused struct proc *p, struct getpeername_args *uap, - __unused register_t *retval) + __unused int32_t *retval) { struct socket *so; struct sockaddr *sa; @@ -1806,22 +1866,31 @@ sockargs(struct mbuf **mp, user_addr_t data, int buflen, int type) struct mbuf *m; int error; - if ((u_int)buflen > MLEN) { - if (type == MT_SONAME && (u_int)buflen <= 112) - buflen = MLEN; /* unix domain compat. hack */ - else if ((u_int)buflen > MCLBYTES) + int alloc_buflen = buflen; +#ifdef __LP64__ + /* The fd's in the buffer must expand to be pointers, thus we need twice as much space */ + if(type == MT_CONTROL) + alloc_buflen = ((buflen - sizeof(struct cmsghdr))*2) + sizeof(struct cmsghdr); +#endif + if ((u_int)alloc_buflen > MLEN) { + if (type == MT_SONAME && (u_int)alloc_buflen <= 112) + alloc_buflen = MLEN; /* unix domain compat. hack */ + else if ((u_int)alloc_buflen > MCLBYTES) return (EINVAL); } m = m_get(M_WAIT, type); if (m == NULL) return (ENOBUFS); - if ((u_int)buflen > MLEN) { + if ((u_int)alloc_buflen > MLEN) { MCLGET(m, M_WAIT); if ((m->m_flags & M_EXT) == 0) { m_free(m); return (ENOBUFS); } } + /* K64: We still copyin the original buflen because it gets expanded later + * and we lie about the size of the mbuf because it only affects unp_* functions + */ m->m_len = buflen; error = copyin(data, mtod(m, caddr_t), (u_int)buflen); if (error) { @@ -2011,18 +2080,22 @@ sendfile(struct proc *p, struct sendfile_args *uap, __unused int *retval) struct socket *so; struct writev_nocancel_args nuap; user_ssize_t writev_retval; - struct sf_hdtr hdtr; struct user_sf_hdtr user_hdtr; + struct user32_sf_hdtr user32_hdtr; + struct user64_sf_hdtr user64_hdtr; off_t off, xfsize; off_t nbytes = 0, sbytes = 0; int error = 0; size_t sizeof_hdtr; - size_t size_of_iovec; off_t file_size; struct vfs_context context = *vfs_context_current(); KERNEL_DEBUG_CONSTANT((DBG_FNC_SENDFILE | DBG_FUNC_START), uap->s, 0, 0, 0, 0); + + AUDIT_ARG(fd, uap->fd); + AUDIT_ARG(value32, uap->s); + /* * Do argument checking. Must be a regular file in, stream * type and connected socket out, positive offset. @@ -2092,23 +2165,25 @@ sendfile(struct proc *p, struct sendfile_args *uap, __unused int *retval) bzero(&user_hdtr, sizeof (user_hdtr)); if (IS_64BIT_PROCESS(p)) { - hdtrp = (caddr_t)&user_hdtr; - sizeof_hdtr = sizeof (user_hdtr); - size_of_iovec = sizeof (struct user_iovec); + hdtrp = (caddr_t)&user64_hdtr; + sizeof_hdtr = sizeof (user64_hdtr); } else { - hdtrp = (caddr_t)&hdtr; - sizeof_hdtr = sizeof (hdtr); - size_of_iovec = sizeof (struct iovec); + hdtrp = (caddr_t)&user32_hdtr; + sizeof_hdtr = sizeof (user32_hdtr); } error = copyin(uap->hdtr, hdtrp, sizeof_hdtr); if (error) goto done2; - /* need to copy if user process is not 64-bit */ - if (!IS_64BIT_PROCESS(p)) { - user_hdtr.headers = CAST_USER_ADDR_T(hdtr.headers); - user_hdtr.hdr_cnt = hdtr.hdr_cnt; - user_hdtr.trailers = CAST_USER_ADDR_T(hdtr.trailers); - user_hdtr.trl_cnt = hdtr.trl_cnt; + if (IS_64BIT_PROCESS(p)) { + user_hdtr.headers = user64_hdtr.headers; + user_hdtr.hdr_cnt = user64_hdtr.hdr_cnt; + user_hdtr.trailers = user64_hdtr.trailers; + user_hdtr.trl_cnt = user64_hdtr.trl_cnt; + } else { + user_hdtr.headers = user32_hdtr.headers; + user_hdtr.hdr_cnt = user32_hdtr.hdr_cnt; + user_hdtr.trailers = user32_hdtr.trailers; + user_hdtr.trl_cnt = user32_hdtr.trl_cnt; } /* @@ -2203,7 +2278,7 @@ sendfile(struct proc *p, struct sendfile_args *uap, __unused int *retval) socket_unlock(so, 0); alloc_sendpkt(M_WAIT, xfsize, &nbufs, &m0, jumbocl); pktlen = mbuf_pkt_maxlen(m0); - if (pktlen < xfsize) + if (pktlen < (size_t)xfsize) xfsize = pktlen; auio = uio_createwithbuffer(nbufs, off, UIO_SYSSPACE, @@ -2217,11 +2292,11 @@ sendfile(struct proc *p, struct sendfile_args *uap, __unused int *retval) } for (i = 0, m = m0, uiolen = 0; - i < nbufs && m != NULL && uiolen < xfsize; + i < nbufs && m != NULL && uiolen < (size_t)xfsize; i++, m = mbuf_next(m)) { size_t mlen = mbuf_maxlen(m); - if (mlen + uiolen > xfsize) + if (mlen + uiolen > (size_t)xfsize) mlen = xfsize - uiolen; mbuf_setlen(m, mlen); uio_addiov(auio, CAST_USER_ADDR_T(mbuf_datastart(m)), @@ -2264,7 +2339,7 @@ sendfile(struct proc *p, struct sendfile_args *uap, __unused int *retval) i++, m = mbuf_next(m)) { size_t mlen = mbuf_maxlen(m); - if (rlen + mlen > xfsize) + if (rlen + mlen > (size_t)xfsize) mlen = xfsize - rlen; mbuf_setlen(m, mlen); diff --git a/bsd/kern/uipc_usrreq.c b/bsd/kern/uipc_usrreq.c index 4c1b6c6eb..26f38c8f5 100644 --- a/bsd/kern/uipc_usrreq.c +++ b/bsd/kern/uipc_usrreq.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2007 Apple Inc. All rights reserved. + * Copyright (c) 2000-2008 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -92,9 +92,9 @@ #include #include -#if CONFIG_MACF_SOCKET +#if CONFIG_MACF #include -#endif /* MAC_SOCKET */ +#endif /* CONFIG_MACF */ #define f_msgcount f_fglob->fg_msgcount #define f_cred f_fglob->fg_cred @@ -110,6 +110,10 @@ static lck_grp_t *unp_mtx_grp; static lck_grp_attr_t *unp_mtx_grp_attr; static lck_rw_t *unp_list_mtx; +static lck_mtx_t *unp_disconnect_lock; +static lck_mtx_t *unp_connect_lock; +static u_int disconnect_in_progress; + extern lck_mtx_t *uipc_lock; static struct unp_head unp_shead, unp_dhead; @@ -139,9 +143,30 @@ static void unp_discard(struct fileglob *); static void unp_discard_fdlocked(struct fileglob *, proc_t); static int unp_internalize(struct mbuf *, proc_t); static int unp_listen(struct unpcb *, proc_t); +static void unpcb_to_compat(struct unpcb *, struct unpcb_compat *); +static void unp_get_locks_in_order(struct socket *so, struct socket *conn_so); + +static void +unp_get_locks_in_order(struct socket *so, struct socket *conn_so) +{ + if (so < conn_so) { + socket_lock(conn_so, 1); + } else { + struct unpcb *unp = sotounpcb(so); + unp->unp_flags |= UNP_DONTDISCONNECT; + unp->rw_thrcount++; + socket_unlock(so, 0); -/* TODO: this should be in header file */ -extern int fdgetf_noref(proc_t, int, struct fileproc **); + /* Get the locks in the correct order */ + socket_lock(conn_so, 1); + socket_lock(so, 0); + unp->rw_thrcount--; + if (unp->rw_thrcount == 0) { + unp->unp_flags &= ~UNP_DONTDISCONNECT; + wakeup(unp); + } + } +} static int uipc_abort(struct socket *so) @@ -246,6 +271,7 @@ uipc_detach(struct socket *so) if (unp == 0) return (EINVAL); + lck_mtx_assert(unp->unp_mtx, LCK_MTX_ASSERT_OWNED); unp_detach(unp); return (0); } @@ -309,7 +335,9 @@ uipc_rcvd(struct socket *so, __unused int flags) #define snd (&so2->so_snd) if (unp->unp_conn == 0) break; + so2 = unp->unp_conn->unp_socket; + unp_get_locks_in_order(so, so2); /* * Adjust backpressure on sender * and wakeup any waiting to write. @@ -319,6 +347,9 @@ uipc_rcvd(struct socket *so, __unused int flags) snd->sb_hiwat += unp->unp_cc - rcv->sb_cc; unp->unp_cc = rcv->sb_cc; sowwakeup(so2); + + socket_unlock(so2, 1); + #undef snd #undef rcv break; @@ -368,7 +399,7 @@ uipc_send(struct socket *so, int flags, struct mbuf *m, struct sockaddr *nam, } if (control) { - /* release global lock to avoid deadlock (4436174) */ + /* release lock to avoid deadlock (4436174) */ socket_unlock(so, 0); error = unp_internalize(control, p); socket_lock(so, 0); @@ -395,7 +426,10 @@ uipc_send(struct socket *so, int flags, struct mbuf *m, struct sockaddr *nam, break; } } + so2 = unp->unp_conn->unp_socket; + unp_get_locks_in_order(so, so2); + if (unp->unp_addr) from = (struct sockaddr *)unp->unp_addr; else @@ -415,6 +449,9 @@ uipc_send(struct socket *so, int flags, struct mbuf *m, struct sockaddr *nam, /* A socket filter took control; don't touch it */ control = NULL; } + + socket_unlock(so2, 1); + m = NULL; if (nam) unp_disconnect(unp); @@ -447,7 +484,20 @@ uipc_send(struct socket *so, int flags, struct mbuf *m, struct sockaddr *nam, } if (unp->unp_conn == 0) panic("uipc_send connected but no connection?"); + so2 = unp->unp_conn->unp_socket; + unp_get_locks_in_order(so, so2); + + /* Check socket state again as we might have unlocked the socket + * while trying to get the locks in order + */ + + if ((so->so_state & SS_CANTSENDMORE)) { + error = EPIPE; + socket_unlock(so2, 1); + break; + } + /* * Send to paired receive port, and then reduce send buffer * hiwater marks to maintain backpressure. Wake up readers. @@ -471,6 +521,8 @@ uipc_send(struct socket *so, int flags, struct mbuf *m, struct sockaddr *nam, /* A socket filter took control; don't touch it */ control = NULL; } + + socket_unlock(so2, 1); m = NULL; #undef snd #undef rcv @@ -535,7 +587,7 @@ uipc_sense(struct socket *so, void *ub, int isstat64) sb = (struct stat *)ub; sb->st_blksize = blksize; sb->st_dev = NODEV; - sb->st_ino = (ino_t)unp->unp_ino; + sb->st_ino = (ino_t)(uintptr_t)unp->unp_ino; } return (0); @@ -633,10 +685,10 @@ uipc_ctloutput(struct socket *so, struct sockopt *sopt) #ifndef PIPSIZ #define PIPSIZ 8192 #endif -static u_long unpst_sendspace = PIPSIZ; -static u_long unpst_recvspace = PIPSIZ; -static u_long unpdg_sendspace = 2*1024; /* really max datagram size */ -static u_long unpdg_recvspace = 4*1024; +static u_int32_t unpst_sendspace = PIPSIZ; +static u_int32_t unpst_recvspace = PIPSIZ; +static u_int32_t unpdg_sendspace = 2*1024; /* really max datagram size */ +static u_int32_t unpdg_recvspace = 4*1024; static int unp_rights; /* file descriptors in flight */ static int unp_disposed; /* discarded file descriptors */ @@ -686,6 +738,13 @@ unp_attach(struct socket *so) if (unp == NULL) return (ENOBUFS); bzero(unp, sizeof (*unp)); + + unp->unp_mtx = lck_mtx_alloc_init(unp_mtx_grp, unp_mtx_attr); + if (unp->unp_mtx == NULL) { + zfree(unp_zone, unp); + return(ENOBUFS); + } + lck_rw_lock_exclusive(unp_list_mtx); LIST_INIT(&unp->unp_refs); unp->unp_socket = so; @@ -693,6 +752,7 @@ unp_attach(struct socket *so) unp_count++; LIST_INSERT_HEAD(so->so_type == SOCK_DGRAM ? &unp_dhead : &unp_shead, unp, unp_link); + lck_rw_done(unp_list_mtx); so->so_pcb = (caddr_t)unp; /* * Mark AF_UNIX socket buffers accordingly so that: @@ -714,7 +774,6 @@ unp_attach(struct socket *so) */ so->so_rcv.sb_flags |= SB_UNIX; so->so_snd.sb_flags |= SB_UNIX; - lck_rw_done(unp_list_mtx); return (0); } @@ -723,41 +782,40 @@ unp_detach(struct unpcb *unp) { lck_rw_lock_exclusive(unp_list_mtx); LIST_REMOVE(unp, unp_link); - unp->unp_gencnt = ++unp_gencnt; lck_rw_done(unp_list_mtx); - --unp_count; if (unp->unp_vnode) { - struct vnode *tvp = unp->unp_vnode; - unp->unp_vnode->v_socket = NULL; - unp->unp_vnode = NULL; - vnode_rele(tvp); /* drop the usecount */ + struct vnode *tvp = NULL; + socket_unlock(unp->unp_socket, 0); + + /* Holding unp_connect_lock will avoid a race between + * a thread closing the listening socket and a thread + * connecting to it. + */ + lck_mtx_lock(unp_connect_lock); + socket_lock(unp->unp_socket, 0); + if (unp->unp_vnode) { + tvp = unp->unp_vnode; + unp->unp_vnode->v_socket = NULL; + unp->unp_vnode = NULL; + } + lck_mtx_unlock(unp_connect_lock); + if (tvp != NULL) + vnode_rele(tvp); /* drop the usecount */ } if (unp->unp_conn) unp_disconnect(unp); - while (unp->unp_refs.lh_first) - unp_drop(unp->unp_refs.lh_first, ECONNRESET); + while (unp->unp_refs.lh_first) { + struct unpcb *unp2 = unp->unp_refs.lh_first; + socket_unlock(unp->unp_socket, 0); + + socket_lock(unp2->unp_socket, 1); + unp_drop(unp2, ECONNRESET); + socket_unlock(unp2->unp_socket, 1); + socket_lock(unp->unp_socket, 0); + } soisdisconnected(unp->unp_socket); /* makes sure we're getting dealloced */ unp->unp_socket->so_flags |= SOF_PCBCLEARING; - unp->unp_socket->so_pcb = NULL; - if (unp_rights) { - /* - * Normally the receive buffer is flushed later, - * in sofree, but if our receive buffer holds references - * to descriptors that are now garbage, we will dispose - * of those descriptor references after the garbage collector - * gets them (resulting in a "panic: closef: count < 0"). - */ - sorflush(unp->unp_socket); - - /* Per domain mutex deadlock avoidance */ - socket_unlock(unp->unp_socket, 0); - unp_gc(); - socket_lock(unp->unp_socket, 0); - } - if (unp->unp_addr) - FREE(unp->unp_addr, M_SONAME); - zfree(unp_zone, unp); } /* @@ -783,6 +841,7 @@ unp_bind( vfs_context_t ctx = vfs_context_current(); int error, namelen; struct nameidata nd; + struct socket *so = unp->unp_socket; char buf[SOCK_MAXADDRLEN]; if (nam->sa_family != 0 && nam->sa_family != AF_UNIX) { @@ -795,12 +854,15 @@ unp_bind( if (namelen <= 0) return (EINVAL); + socket_unlock(so, 0); + strlcpy(buf, soun->sun_path, namelen+1); - NDINIT(&nd, CREATE, FOLLOW | LOCKPARENT, UIO_SYSSPACE32, + NDINIT(&nd, CREATE, FOLLOW | LOCKPARENT, UIO_SYSSPACE, CAST_USER_ADDR_T(buf), ctx); /* SHOULD BE ABLE TO ADOPT EXISTING AND wakeup() ALA FIFO's */ error = namei(&nd); if (error) { + socket_lock(so, 0); return (error); } dvp = nd.ni_dvp; @@ -816,6 +878,7 @@ unp_bind( vnode_put(dvp); vnode_put(vp); + socket_lock(so, 0); return (EADDRINUSE); } @@ -823,16 +886,18 @@ unp_bind( VATTR_SET(&va, va_type, VSOCK); VATTR_SET(&va, va_mode, (ACCESSPERMS & ~p->p_fd->fd_cmask)); -#if CONFIG_MACF_SOCKET - /* - * This is #if MAC_SOCKET, because it affects the connection rate - * of Unix domain dockets that is critical for server performance - */ +#if CONFIG_MACF error = mac_vnode_check_create(ctx, nd.ni_dvp, &nd.ni_cnd, &va); if (error == 0) -#endif /* MAC_SOCKET */ +#endif /* CONFIG_MACF */ +#if CONFIG_MACF_SOCKET_SUBSET + error = mac_vnode_check_uipc_bind(ctx, + nd.ni_dvp, &nd.ni_cnd, &va); + + if (error == 0) +#endif /* MAC_SOCKET_SUBSET */ /* authorize before creating */ error = vnode_authorize(dvp, NULL, KAUTH_VNODE_ADD_FILE, ctx); @@ -845,9 +910,11 @@ unp_bind( vnode_put(dvp); if (error) { + socket_lock(so, 0); return (error); } vnode_ref(vp); /* gain a longterm reference */ + socket_lock(so, 0); vp->v_socket = unp->unp_socket; unp->unp_vnode = vp; unp->unp_addr = (struct sockaddr_un *)dup_sockaddr(nam, 1); @@ -878,7 +945,7 @@ unp_connect(struct socket *so, struct sockaddr *nam, __unused proc_t p) { struct sockaddr_un *soun = (struct sockaddr_un *)nam; struct vnode *vp; - struct socket *so2, *so3; + struct socket *so2, *so3, *list_so=NULL; struct unpcb *unp, *unp2, *unp3; vfs_context_t ctx = vfs_context_current(); int error, len; @@ -889,6 +956,7 @@ unp_connect(struct socket *so, struct sockaddr *nam, __unused proc_t p) return (EAFNOSUPPORT); } + unp = sotounpcb(so); so2 = so3 = NULL; len = nam->sa_len - offsetof(struct sockaddr_un, sun_path); @@ -896,54 +964,93 @@ unp_connect(struct socket *so, struct sockaddr *nam, __unused proc_t p) return (EINVAL); strlcpy(buf, soun->sun_path, len+1); + socket_unlock(so, 0); - NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_SYSSPACE32, + NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_SYSSPACE, CAST_USER_ADDR_T(buf), ctx); error = namei(&nd); if (error) { + socket_lock(so, 0); return (error); } nameidone(&nd); vp = nd.ni_vp; if (vp->v_type != VSOCK) { error = ENOTSOCK; - goto bad; + socket_lock(so, 0); + goto out; } +#if CONFIG_MACF_SOCKET_SUBSET + error = mac_vnode_check_uipc_connect(ctx, vp); + if (error) { + socket_lock(so, 0); + goto out; + } +#endif /* MAC_SOCKET_SUBSET */ + error = vnode_authorize(vp, NULL, KAUTH_VNODE_WRITE_DATA, ctx); - if (error) - goto bad; - so2 = vp->v_socket; - if (so2 == 0 || so2->so_pcb == NULL) { + if (error) { + socket_lock(so, 0); + goto out; + } + + lck_mtx_lock(unp_connect_lock); + + if (vp->v_socket == 0) { + lck_mtx_unlock(unp_connect_lock); error = ECONNREFUSED; - goto bad; + socket_lock(so, 0); + goto out; } - /* make sure the socket can't go away while we're connecting */ - so2->so_usecount++; + socket_lock(vp->v_socket, 1); /* Get a reference on the listening socket */ + so2 = vp->v_socket; + lck_mtx_unlock(unp_connect_lock); - if (so->so_type != so2->so_type) { - error = EPROTOTYPE; - goto bad; + + if (so2->so_pcb == NULL) { + error = ECONNREFUSED; + socket_unlock(so2, 1); + socket_lock(so, 0); + goto out; } + if (so < so2) { + socket_unlock(so2, 0); + socket_lock(so, 0); + socket_lock(so2, 0); + } else { + socket_lock(so, 0); + } /* * Check if socket was connected while we were trying to - * acquire the funnel. + * get the socket locks in order. * XXX - probably shouldn't return an error for SOCK_DGRAM */ if ((so->so_state & SS_ISCONNECTED) != 0) { + socket_unlock(so2, 1); error = EISCONN; - goto bad; + goto out; + } + + if (so->so_type != so2->so_type) { + socket_unlock(so2, 1); + error = EPROTOTYPE; + goto out; } if (so->so_proto->pr_flags & PR_CONNREQUIRED) { + /* Release the incoming socket but keep a reference */ + socket_unlock(so, 0); + if ((so2->so_options & SO_ACCEPTCONN) == 0 || (so3 = sonewconn(so2, 0, nam)) == 0) { error = ECONNREFUSED; - goto bad; + socket_unlock(so2, 1); + socket_lock(so, 0); + goto out; } - unp = sotounpcb(so); unp2 = sotounpcb(so2); unp3 = sotounpcb(so3); if (unp2->unp_addr) @@ -968,6 +1075,26 @@ unp_connect(struct socket *so, struct sockaddr *nam, __unused proc_t p) */ KASSERT(unp2->unp_flags & UNP_HAVEPCCACHED, ("unp_connect: listener without cached peercred")); + + /* Here we need to have both so and so2 locks and so2 + * is already locked. Lock ordering is required. + */ + if (so < so2) { + socket_unlock(so2, 0); + socket_lock(so, 0); + socket_lock(so2, 0); + } else { + socket_lock(so, 0); + } + + /* Check again if the socket state changed when its lock was released */ + if ((so->so_state & SS_ISCONNECTED) != 0) { + error = EISCONN; + socket_unlock(so2, 1); + socket_lock(so3, 0); + sofreelastref(so3, 1); + goto out; + } memcpy(&unp->unp_peercred, &unp2->unp_peercred, sizeof (unp->unp_peercred)); unp->unp_flags |= UNP_HAVEPC; @@ -978,14 +1105,28 @@ unp_connect(struct socket *so, struct sockaddr *nam, __unused proc_t p) mac_socketpeer_label_associate_socket(so3, so); /* XXXMAC: SOCK_UNLOCK(so); */ #endif /* MAC_SOCKET */ - so2->so_usecount--; /* drop reference taken on so2 */ + + /* Hold the reference on listening socket until the end */ + socket_unlock(so2, 0); + list_so = so2; + + /* Lock ordering doesn't matter because so3 was just created */ + socket_lock(so3, 1); so2 = so3; - so3->so_usecount++; /* make sure we keep it around */ + } + error = unp_connect2(so, so2); -bad: - if (so2 != NULL) - so2->so_usecount--; /* release count on socket */ + if (so2 != NULL) { + socket_unlock(so2, 1); + } + + if (list_so != NULL) { + socket_lock(list_so, 0); + socket_unlock(list_so, 1); + } +out: + lck_mtx_assert(unp->unp_mtx, LCK_MTX_ASSERT_OWNED); vnode_put(vp); return (error); } @@ -1003,18 +1144,36 @@ unp_connect2(struct socket *so, struct socket *so2) if (so2->so_type != so->so_type) return (EPROTOTYPE); + unp2 = sotounpcb(so2); + lck_mtx_assert(unp->unp_mtx, LCK_MTX_ASSERT_OWNED); + lck_mtx_assert(unp2->unp_mtx, LCK_MTX_ASSERT_OWNED); + /* Verify both sockets are still opened */ if (unp == 0 || unp2 == 0) return (EINVAL); unp->unp_conn = unp2; + so2->so_usecount++; + switch (so->so_type) { case SOCK_DGRAM: + lck_rw_lock_exclusive(unp_list_mtx); LIST_INSERT_HEAD(&unp2->unp_refs, unp, unp_reflink); + lck_rw_done(unp_list_mtx); + + + /* Avoid lock order reversals due to drop/acquire in soisconnected. */ + /* Keep an extra reference on so2 that will be dropped + * soon after getting the locks in order + */ + socket_unlock(so2, 0); soisconnected(so); + unp_get_locks_in_order(so, so2); + so2->so_usecount--; + break; case SOCK_STREAM: @@ -1028,24 +1187,108 @@ unp_connect2(struct socket *so, struct socket *so2) unp2->unp_flags |= UNP_HAVEPC; } unp2->unp_conn = unp; - soisconnected(so); + so->so_usecount++; + + /* Avoid lock order reversals due to drop/acquire in soisconnected. */ + socket_unlock(so, 0); soisconnected(so2); + + /* Keep an extra reference on so2, that will be dropped soon after + * getting the locks in order again. + */ + socket_unlock(so2, 0); + + socket_lock(so, 0); + soisconnected(so); + + unp_get_locks_in_order(so, so2); + /* Decrement the extra reference left before */ + so2->so_usecount--; break; default: - panic("unp_connect2"); + panic("unknown socket type %d in unp_connect2", so->so_type); } + lck_mtx_assert(unp->unp_mtx, LCK_MTX_ASSERT_OWNED); + lck_mtx_assert(unp2->unp_mtx, LCK_MTX_ASSERT_OWNED); return (0); } static void unp_disconnect(struct unpcb *unp) { - struct unpcb *unp2 = unp->unp_conn; + struct unpcb *unp2 = NULL; + struct socket *so2 = NULL, *so; + struct socket *waitso; + int so_locked = 1, strdisconn = 0; - if (unp2 == 0) + so = unp->unp_socket; + if (unp->unp_conn == NULL) { return; + } + lck_mtx_lock(unp_disconnect_lock); + while (disconnect_in_progress != 0) { + if (so_locked == 1) { + socket_unlock(so, 0); + so_locked = 0; + } + (void)msleep((caddr_t)&disconnect_in_progress, unp_disconnect_lock, + PSOCK, "disconnect", NULL); + } + disconnect_in_progress = 1; + lck_mtx_unlock(unp_disconnect_lock); + + if (so_locked == 0) { + socket_lock(so, 0); + so_locked = 1; + } + + unp2 = unp->unp_conn; + + if (unp2 == 0 || unp2->unp_socket == NULL) { + goto out; + } + so2 = unp2->unp_socket; + +try_again: + if (so < so2) { + if (so_locked == 0) { + socket_lock(so, 0); + } + socket_lock(so2, 1); + waitso = so2; + } else { + if (so_locked == 1) { + socket_unlock(so, 0); + } + socket_lock(so2, 1); + socket_lock(so, 0); + waitso = so; + } + + lck_mtx_assert(unp->unp_mtx, LCK_MTX_ASSERT_OWNED); + lck_mtx_assert(unp2->unp_mtx, LCK_MTX_ASSERT_OWNED); + + /* Check for the UNP_DONTDISCONNECT flag, if it + * is set, release both sockets and go to sleep + */ + + if ((((struct unpcb *)waitso->so_pcb)->unp_flags & UNP_DONTDISCONNECT) != 0) { + socket_unlock(so2, 1); + so_locked = 0; + + (void)msleep(waitso->so_pcb, unp->unp_mtx, + PSOCK | PDROP, "unpdisconnect", NULL); + goto try_again; + } + + if (unp->unp_conn == NULL) { + panic("unp_conn became NULL after sleep"); + } + unp->unp_conn = NULL; + so2->so_usecount--; + switch (unp->unp_socket->so_type) { case SOCK_DGRAM: @@ -1053,24 +1296,82 @@ unp_disconnect(struct unpcb *unp) LIST_REMOVE(unp, unp_reflink); lck_rw_done(unp_list_mtx); unp->unp_socket->so_state &= ~SS_ISCONNECTED; + socket_unlock(so2, 1); break; case SOCK_STREAM: - soisdisconnected(unp->unp_socket); unp2->unp_conn = NULL; - soisdisconnected(unp2->unp_socket); + so->so_usecount--; + + /* Set the socket state correctly but do a wakeup later when + * we release all locks except the socket lock, this will avoid + * a deadlock. + */ + unp->unp_socket->so_state &= ~(SS_ISCONNECTING|SS_ISCONNECTED|SS_ISDISCONNECTING); + unp->unp_socket->so_state |= (SS_CANTRCVMORE|SS_CANTSENDMORE|SS_ISDISCONNECTED); + + unp2->unp_socket->so_state &= ~(SS_ISCONNECTING|SS_ISCONNECTED|SS_ISDISCONNECTING); + unp->unp_socket->so_state |= (SS_CANTRCVMORE|SS_CANTSENDMORE|SS_ISDISCONNECTED); + strdisconn = 1; break; + default: + panic("unknown socket type %d", so->so_type); } -} +out: + lck_mtx_lock(unp_disconnect_lock); + disconnect_in_progress = 0; + wakeup(&disconnect_in_progress); + lck_mtx_unlock(unp_disconnect_lock); -#ifdef notdef -void -unp_abort(struct unpcb *unp) -{ + if (strdisconn) { + socket_unlock(so, 0); + soisdisconnected(so2); + socket_unlock(so2, 1); - unp_detach(unp); + socket_lock(so,0); + soisdisconnected(so); + } + lck_mtx_assert(unp->unp_mtx, LCK_MTX_ASSERT_OWNED); + return; } + +/* + * unpcb_to_compat copies specific bits of a unpcb to a unpcb_compat format. + * The unpcb_compat data structure is passed to user space and must not change. + */ +static void +unpcb_to_compat(struct unpcb *up, struct unpcb_compat *cp) +{ +#if defined(__LP64__) + cp->unp_link.le_next = (u_int32_t)(uintptr_t)up->unp_link.le_next; + cp->unp_link.le_prev = (u_int32_t)(uintptr_t)up->unp_link.le_prev; +#else + cp->unp_link.le_next = (struct unpcb_compat *)up->unp_link.le_next; + cp->unp_link.le_prev = (struct unpcb_compat **)up->unp_link.le_prev; +#endif + cp->unp_socket = (_UNPCB_PTR(struct socket *))(uintptr_t)up->unp_socket; + cp->unp_vnode = (_UNPCB_PTR(struct vnode *))(uintptr_t)up->unp_vnode; + cp->unp_ino = up->unp_ino; + cp->unp_conn = (_UNPCB_PTR(struct unpcb_compat *)) + (uintptr_t)up->unp_conn; + cp->unp_refs = (u_int32_t)(uintptr_t)up->unp_refs.lh_first; +#if defined(__LP64__) + cp->unp_reflink.le_next = + (u_int32_t)(uintptr_t)up->unp_reflink.le_next; + cp->unp_reflink.le_prev = + (u_int32_t)(uintptr_t)up->unp_reflink.le_prev; +#else + cp->unp_reflink.le_next = + (struct unpcb_compat *)up->unp_reflink.le_next; + cp->unp_reflink.le_prev = + (struct unpcb_compat **)up->unp_reflink.le_prev; #endif + cp->unp_addr = (_UNPCB_PTR(struct sockaddr_un *)) + (uintptr_t)up->unp_addr; + cp->unp_cc = up->unp_cc; + cp->unp_mbcnt = up->unp_mbcnt; + cp->unp_gencnt = up->unp_gencnt; +} static int unp_pcblist SYSCTL_HANDLER_ARGS @@ -1149,7 +1450,8 @@ unp_pcblist SYSCTL_HANDLER_ARGS bzero(&xu, sizeof (xu)); xu.xu_len = sizeof (xu); - xu.xu_unpp = (struct unpcb_compat *)unp; + xu.xu_unpp = (_UNPCB_PTR(struct unpcb_compat *)) + (uintptr_t)unp; /* * XXX - need more locking here to protect against * connect/disconnect races for SMP. @@ -1161,7 +1463,7 @@ unp_pcblist SYSCTL_HANDLER_ARGS bcopy(unp->unp_conn->unp_addr, &xu.xu_caddr, unp->unp_conn->unp_addr->sun_len); - bcopy(unp, &xu.xu_unp, sizeof (xu.xu_unp)); + unpcb_to_compat(unp, &xu.xu_unp); sotoxsocket(unp->unp_socket, &xu.xu_socket); error = SYSCTL_OUT(req, &xu, sizeof (xu)); } @@ -1187,20 +1489,168 @@ unp_pcblist SYSCTL_HANDLER_ARGS } SYSCTL_PROC(_net_local_dgram, OID_AUTO, pcblist, CTLFLAG_RD, - (caddr_t)(long)SOCK_DGRAM, 0, unp_pcblist, "S,xunpcb", - "List of active local datagram sockets"); + (caddr_t)(long)SOCK_DGRAM, 0, unp_pcblist, "S,xunpcb", + "List of active local datagram sockets"); SYSCTL_PROC(_net_local_stream, OID_AUTO, pcblist, CTLFLAG_RD, - (caddr_t)(long)SOCK_STREAM, 0, unp_pcblist, "S,xunpcb", - "List of active local stream sockets"); + (caddr_t)(long)SOCK_STREAM, 0, unp_pcblist, "S,xunpcb", + "List of active local stream sockets"); + +#if !CONFIG_EMBEDDED + +static int +unp_pcblist64 SYSCTL_HANDLER_ARGS +{ +#pragma unused(oidp,arg2) + int error, i, n; + struct unpcb *unp, **unp_list; + unp_gen_t gencnt; + struct xunpgen xug; + struct unp_head *head; + + lck_rw_lock_shared(unp_list_mtx); + head = ((intptr_t)arg1 == SOCK_DGRAM ? &unp_dhead : &unp_shead); + + /* + * The process of preparing the PCB list is too time-consuming and + * resource-intensive to repeat twice on every request. + */ + if (req->oldptr == USER_ADDR_NULL) { + n = unp_count; + req->oldidx = 2 * sizeof (xug) + (n + n / 8) * + (sizeof (struct xunpcb64)); + lck_rw_done(unp_list_mtx); + return (0); + } + + if (req->newptr != USER_ADDR_NULL) { + lck_rw_done(unp_list_mtx); + return (EPERM); + } + + /* + * OK, now we're committed to doing something. + */ + gencnt = unp_gencnt; + n = unp_count; + + bzero(&xug, sizeof (xug)); + xug.xug_len = sizeof (xug); + xug.xug_count = n; + xug.xug_gen = gencnt; + xug.xug_sogen = so_gencnt; + error = SYSCTL_OUT(req, &xug, sizeof (xug)); + if (error) { + lck_rw_done(unp_list_mtx); + return (error); + } + + /* + * We are done if there is no pcb + */ + if (n == 0) { + lck_rw_done(unp_list_mtx); + return (0); + } + + MALLOC(unp_list, struct unpcb **, n * sizeof (*unp_list), + M_TEMP, M_WAITOK); + if (unp_list == 0) { + lck_rw_done(unp_list_mtx); + return (ENOMEM); + } + + for (unp = head->lh_first, i = 0; unp && i < n; + unp = unp->unp_link.le_next) { + if (unp->unp_gencnt <= gencnt) + unp_list[i++] = unp; + } + n = i; /* in case we lost some during malloc */ + + error = 0; + for (i = 0; i < n; i++) { + unp = unp_list[i]; + if (unp->unp_gencnt <= gencnt) { + struct xunpcb64 xu; + size_t xu_len = sizeof(struct xunpcb64); + + bzero(&xu, xu_len); + xu.xu_len = xu_len; + xu.xu_unpp = (u_int64_t)(uintptr_t)unp; + xu.xunp_link.le_next = + (u_int64_t)(uintptr_t)unp->unp_link.le_next; + xu.xunp_link.le_prev = + (u_int64_t)(uintptr_t)unp->unp_link.le_prev; + xu.xunp_socket = (u_int64_t)(uintptr_t)unp->unp_socket; + xu.xunp_vnode = (u_int64_t)(uintptr_t)unp->unp_vnode; + xu.xunp_ino = unp->unp_ino; + xu.xunp_conn = (u_int64_t)(uintptr_t)unp->unp_conn; + xu.xunp_refs = (u_int64_t)(uintptr_t)unp->unp_refs.lh_first; + xu.xunp_reflink.le_next = + (u_int64_t)(uintptr_t)unp->unp_reflink.le_next; + xu.xunp_reflink.le_prev = + (u_int64_t)(uintptr_t)unp->unp_reflink.le_prev; + xu.xunp_cc = unp->unp_cc; + xu.xunp_mbcnt = unp->unp_mbcnt; + xu.xunp_gencnt = unp->unp_gencnt; + + if (unp->unp_socket) + sotoxsocket64(unp->unp_socket, &xu.xu_socket); + + /* + * XXX - need more locking here to protect against + * connect/disconnect races for SMP. + */ + if (unp->unp_addr) + bcopy(unp->unp_addr, &xu.xunp_addr, + unp->unp_addr->sun_len); + if (unp->unp_conn && unp->unp_conn->unp_addr) + bcopy(unp->unp_conn->unp_addr, + &xu.xunp_caddr, + unp->unp_conn->unp_addr->sun_len); + + error = SYSCTL_OUT(req, &xu, xu_len); + } + } + if (!error) { + /* + * Give the user an updated idea of our state. + * If the generation differs from what we told + * her before, she knows that something happened + * while we were processing this request, and it + * might be necessary to retry. + */ + bzero(&xug, sizeof (xug)); + xug.xug_len = sizeof (xug); + xug.xug_gen = unp_gencnt; + xug.xug_sogen = so_gencnt; + xug.xug_count = unp_count; + error = SYSCTL_OUT(req, &xug, sizeof (xug)); + } + FREE(unp_list, M_TEMP); + lck_rw_done(unp_list_mtx); + return (error); +} + +SYSCTL_PROC(_net_local_dgram, OID_AUTO, pcblist64, CTLFLAG_RD, + (caddr_t)(long)SOCK_DGRAM, 0, unp_pcblist64, "S,xunpcb64", + "List of active local datagram sockets 64 bit"); +SYSCTL_PROC(_net_local_stream, OID_AUTO, pcblist64, CTLFLAG_RD, + (caddr_t)(long)SOCK_STREAM, 0, unp_pcblist64, "S,xunpcb64", + "List of active local stream sockets 64 bit"); + +#endif /* !CONFIG_EMBEDDED */ static void unp_shutdown(struct unpcb *unp) { - struct socket *so; - - if (unp->unp_socket->so_type == SOCK_STREAM && unp->unp_conn && - (so = unp->unp_conn->unp_socket)) - socantrcvmore(so); + struct socket *so = unp->unp_socket; + struct socket *so2; + if (unp->unp_socket->so_type == SOCK_STREAM && unp->unp_conn) { + so2 = unp->unp_conn->unp_socket; + unp_get_locks_in_order(so, so2); + socantrcvmore(so2); + socket_unlock(so2, 1); + } } static void @@ -1212,14 +1662,6 @@ unp_drop(struct unpcb *unp, int errno) unp_disconnect(unp); } -#ifdef notdef -void -unp_drain() -{ - -} -#endif - /* * Returns: 0 Success * EMSGSIZE The new fd's will not fit @@ -1232,6 +1674,7 @@ unp_externalize(struct mbuf *rights) int i; struct cmsghdr *cm = mtod(rights, struct cmsghdr *); struct fileglob **rp = (struct fileglob **)(cm + 1); + int *fds = (int *)(cm + 1); struct fileproc *fp; struct fileglob *fg; int newfds = (cm->cmsg_len - sizeof (*cm)) / sizeof (int); @@ -1256,7 +1699,8 @@ unp_externalize(struct mbuf *rights) * now change each pointer to an fd in the global table to * an integer that is the index to the local fd table entry * that we set up to point to the global one we are transferring. - * XXX (1) this assumes a pointer and int are the same size...! + * XXX (1) this assumes a pointer and int are the same size, + * XXX or the mbuf can hold the expansion * XXX (2) allocation failures should be non-fatal */ for (i = 0; i < newfds; i++) { @@ -1274,7 +1718,7 @@ unp_externalize(struct mbuf *rights) #endif if (fdalloc(p, 0, &f)) panic("unp_externalize:fdalloc"); - fg = *rp; + fg = rp[i]; MALLOC_ZONE(fp, struct fileproc *, sizeof (struct fileproc), M_FILEPROC, M_WAITOK); if (fp == NULL) @@ -1284,8 +1728,8 @@ unp_externalize(struct mbuf *rights) fp->f_fglob = fg; fg_removeuipc(fg); procfdtbl_releasefd(p, f, fp); - (void) OSAddAtomic(-1, (volatile SInt32 *)&unp_rights); - *(int *)rp++ = f; + (void) OSAddAtomic(-1, &unp_rights); + fds[i] = f; } proc_fdunlock(p); @@ -1316,6 +1760,13 @@ unp_init(void) unp_mtx_attr)) == NULL) return; /* pretty much dead if this fails... */ + if ((unp_disconnect_lock = lck_mtx_alloc_init(unp_mtx_grp, + unp_mtx_attr)) == NULL) + return; + + if ((unp_connect_lock = lck_mtx_alloc_init(unp_mtx_grp, + unp_mtx_attr)) == NULL) + return; } #ifndef MIN @@ -1331,6 +1782,7 @@ static int unp_internalize(struct mbuf *control, proc_t p) { struct cmsghdr *cm = mtod(control, struct cmsghdr *); + int *fds; struct fileglob **rp; struct fileproc *fp; int i, error; @@ -1338,27 +1790,34 @@ unp_internalize(struct mbuf *control, proc_t p) /* 64bit: cmsg_len is 'uint32_t', m_len is 'long' */ if (cm->cmsg_type != SCM_RIGHTS || cm->cmsg_level != SOL_SOCKET || - (unsigned long)cm->cmsg_len != (unsigned long)control->m_len) { + (socklen_t)cm->cmsg_len != (socklen_t)control->m_len) { return (EINVAL); } oldfds = (cm->cmsg_len - sizeof (*cm)) / sizeof (int); proc_fdlock(p); - rp = (struct fileglob **)(cm + 1); + fds = (int *)(cm + 1); for (i = 0; i < oldfds; i++) { - if ((error = fdgetf_noref(p, *(int *)rp++, NULL)) != 0) { + struct fileproc *tmpfp; + if (((error = fdgetf_noref(p, fds[i], &tmpfp)) != 0)) { proc_fdunlock(p); return (error); + } else if (!filetype_issendable(tmpfp->f_fglob->fg_type)) { + proc_fdunlock(p); + return (EINVAL); } } rp = (struct fileglob **)(cm + 1); - for (i = 0; i < oldfds; i++) { - (void) fdgetf_noref(p, *(int *)rp, &fp); + /* On K64 we need to walk backwards because a fileglob * is twice the size of an fd + * and doing them in-order would result in stomping over unprocessed fd's + */ + for (i = (oldfds - 1); i >= 0; i--) { + (void) fdgetf_noref(p, fds[i], &fp); fg_insertuipc(fp->f_fglob); - *rp++ = fp->f_fglob; - (void) OSAddAtomic(1, (volatile SInt32 *)&unp_rights); + rp[i] = fp->f_fglob; + (void) OSAddAtomic(1, &unp_rights); } proc_fdunlock(p); @@ -1388,7 +1847,7 @@ unp_gc(void) struct fileglob *fg, *nextfg; struct socket *so; static struct fileglob **extra_ref; - struct fileglob **fpp; + struct fileglob **fpp; int nunref, i; int need_gcwakeup = 0; @@ -1495,7 +1954,7 @@ unp_gc(void) * message buffers. Follow those links and mark them * as accessible too. * - * In case a file is passed onto itself we need to + * In case a file is passed onto itself we need to * release the file lock. */ lck_mtx_unlock(&fg->fg_lock); @@ -1544,6 +2003,8 @@ unp_gc(void) */ extra_ref = _MALLOC(nfiles * sizeof (struct fileglob *), M_FILEGLOB, M_WAITOK); + if (extra_ref == NULL) + goto bail; for (nunref = 0, fg = fmsghead.lh_first, fpp = extra_ref; fg != 0; fg = nextfg) { lck_mtx_lock(&fg->fg_lock); @@ -1581,7 +2042,7 @@ unp_gc(void) so = (struct socket *)(tfg->fg_data); socket_lock(so, 0); - + sorflush(so); socket_unlock(so, 0); @@ -1590,6 +2051,8 @@ unp_gc(void) for (i = nunref, fpp = extra_ref; --i >= 0; ++fpp) closef_locked((struct fileproc *)0, *fpp, (proc_t)NULL); + FREE((caddr_t)extra_ref, M_FILEGLOB); +bail: lck_mtx_lock(uipc_lock); unp_gcing = 0; unp_gcthread = NULL; @@ -1602,7 +2065,6 @@ unp_gc(void) if (need_gcwakeup != 0) wakeup(&unp_gcing); - FREE((caddr_t)extra_ref, M_FILEGLOB); } void @@ -1645,7 +2107,7 @@ unp_scan(struct mbuf *m0, void (*op)(struct fileglob *)) cm->cmsg_type != SCM_RIGHTS) continue; qfds = (cm->cmsg_len - sizeof (*cm)) / - sizeof (struct fileglob *); + sizeof (int); rp = (struct fileglob **)(cm + 1); for (i = 0; i < qfds; i++) (*op)(*rp++); @@ -1678,7 +2140,7 @@ unp_discard(struct fileglob *fg) { proc_t p = current_proc(); /* XXX */ - (void) OSAddAtomic(1, (volatile SInt32 *)&unp_disposed); + (void) OSAddAtomic(1, &unp_disposed); proc_fdlock(p); unp_discard_fdlocked(fg, p); @@ -1689,6 +2151,97 @@ unp_discard_fdlocked(struct fileglob *fg, proc_t p) { fg_removeuipc(fg); - (void) OSAddAtomic(-1, (volatile SInt32 *)&unp_rights); + (void) OSAddAtomic(-1, &unp_rights); (void) closef_locked((struct fileproc *)0, fg, p); } + +int +unp_lock(struct socket *so, int refcount, void * lr) + { + void * lr_saved; + if (lr == 0) + lr_saved = (void *) __builtin_return_address(0); + else lr_saved = lr; + + if (so->so_pcb) { + lck_mtx_lock(((struct unpcb *)so->so_pcb)->unp_mtx); + } else { + panic("unp_lock: so=%p NO PCB! lr=%p ref=0x%x\n", + so, lr_saved, so->so_usecount); + } + + if (so->so_usecount < 0) + panic("unp_lock: so=%p so_pcb=%p lr=%p ref=0x%x\n", + so, so->so_pcb, lr_saved, so->so_usecount); + + if (refcount) + so->so_usecount++; + + so->lock_lr[so->next_lock_lr] = lr_saved; + so->next_lock_lr = (so->next_lock_lr+1) % SO_LCKDBG_MAX; + return (0); +} + +int +unp_unlock(struct socket *so, int refcount, void * lr) +{ + void * lr_saved; + lck_mtx_t * mutex_held = NULL; + struct unpcb *unp = sotounpcb(so); + + if (lr == 0) + lr_saved = (void *) __builtin_return_address(0); + else lr_saved = lr; + + if (refcount) + so->so_usecount--; + + if (so->so_usecount < 0) + panic("unp_unlock: so=%p usecount=%x\n", so, so->so_usecount); + if (so->so_pcb == NULL) { + panic("unp_unlock: so=%p NO PCB usecount=%x\n", so, so->so_usecount); + } else { + mutex_held = ((struct unpcb *)so->so_pcb)->unp_mtx; + } + lck_mtx_assert(mutex_held, LCK_MTX_ASSERT_OWNED); + so->unlock_lr[so->next_unlock_lr] = lr_saved; + so->next_unlock_lr = (so->next_unlock_lr+1) % SO_LCKDBG_MAX; + + if (so->so_usecount == 0 && (so->so_flags & SOF_PCBCLEARING)) { + sofreelastref(so, 1); + + if (unp->unp_addr) + FREE(unp->unp_addr, M_SONAME); + + lck_mtx_unlock(mutex_held); + if (unp->unp_mtx) + lck_mtx_free(unp->unp_mtx, unp_mtx_grp); + + unp->unp_gencnt = ++unp_gencnt; + zfree(unp_zone, unp); + --unp_count; + + unp_gc(); + } else { + lck_mtx_unlock(mutex_held); + } + + return (0); +} + +lck_mtx_t * +unp_getlock(struct socket *so, __unused int locktype) +{ + struct unpcb *unp = (struct unpcb *)so->so_pcb; + + + if (so->so_pcb) { + if (so->so_usecount < 0) + panic("unp_getlock: so=%p usecount=%x\n", so, so->so_usecount); + return(unp->unp_mtx); + } else { + panic("unp_getlock: so=%p NULL so_pcb\n", so); + return (so->so_proto->pr_domain->dom_mtx); + } +} + diff --git a/bsd/libkern/libkern.h b/bsd/libkern/libkern.h index 81b719c2e..6fd1f7a86 100644 --- a/bsd/libkern/libkern.h +++ b/bsd/libkern/libkern.h @@ -123,13 +123,13 @@ min(u_int a, u_int b) { return (a < b ? a : b); } -static inline u_long -ulmax(u_long a, u_long b) +static inline u_int32_t +ulmax(u_int32_t a, u_int32_t b) { return (a > b ? a : b); } -static inline u_long -ulmin(u_long a, u_long b) +static inline u_int32_t +ulmin(u_int32_t a, u_int32_t b) { return (a < b ? a : b); } @@ -139,7 +139,7 @@ ulmin(u_long a, u_long b) /* Prototypes for non-quad routines. */ extern int ffs(int); extern int locc(int, char *, u_int); -extern u_long random(void); +extern u_int32_t random(void); extern char *rindex(const char *, int); extern int scanc(u_int, u_char *, const u_char *, int); extern int skpc(int, int, char *); @@ -156,9 +156,17 @@ int sprintf(char *bufp, const char *, ...) __deprecated; int sscanf(const char *, char const *, ...) __scanflike(2,3); int printf(const char *, ...) __printflike(1,2); +#if KERNEL_PRIVATE +int _consume_printf_args(int, ...); +#endif + #if CONFIG_NO_PRINTF_STRINGS +#if KERNEL_PRIVATE +#define printf(x, ...) _consume_printf_args( 0, ## __VA_ARGS__ ) +#else #define printf(x, ...) do {} while (0) #endif +#endif uint32_t crc32(uint32_t crc, const void *bufp, size_t len); @@ -176,8 +184,13 @@ extern int vsnprintf(char *, size_t, const char *, va_list); /* vsprintf() is being deprecated. Please use vsnprintf() instead. */ extern int vsprintf(char *bufp, const char *, va_list) __deprecated; +#ifdef KERNEL_PRIVATE extern void invalidate_icache(vm_offset_t, unsigned, int); extern void flush_dcache(vm_offset_t, unsigned, int); +#else +extern void invalidate_icache(vm_offset_t, unsigned, int); +extern void flush_dcache(vm_offset_t, unsigned, int); +#endif extern void invalidate_icache64(addr64_t, unsigned, int); extern void flush_dcache64(addr64_t, unsigned, int); diff --git a/bsd/libkern/random.c b/bsd/libkern/random.c index 920ae5c60..070080f90 100644 --- a/bsd/libkern/random.c +++ b/bsd/libkern/random.c @@ -70,11 +70,11 @@ * and whatever else we might use it for. The result is uniform on * [0, 2^31 - 1]. */ -u_long +u_int32_t random(void) { /* Zero all but bottom 31 bits, also works for 64-bit longs */ - u_long mask = (u_long)-1 >> ((sizeof(u_long) * 8) - 31); + u_int32_t mask = (u_int32_t)-1 >> ((sizeof(u_int32_t) * 8) - 31); return (mask & RandomULong()); } diff --git a/bsd/machine/Makefile b/bsd/machine/Makefile index 0c7accd47..83f5ebda9 100644 --- a/bsd/machine/Makefile +++ b/bsd/machine/Makefile @@ -10,14 +10,14 @@ include $(MakeInc_def) DATAFILES = \ byte_order.h endian.h fasttrap_isa.h \ - param.h profile.h \ + limits.h param.h profile.h \ setjmp.h signal.h types.h\ vmparam.h _structs.h _types.h _limits.h _param.h KERNELFILES = \ disklabel.h \ byte_order.h endian.h \ - param.h profile.h \ + limits.h param.h profile.h \ signal.h spl.h types.h \ vmparam.h _structs.h _types.h _limits.h _param.h diff --git a/bsd/machine/cons.h b/bsd/machine/cons.h index bddd7e989..43d364ef0 100644 --- a/bsd/machine/cons.h +++ b/bsd/machine/cons.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2007 Apple Inc. All rights reserved. + * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -25,15 +25,59 @@ * * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ */ -#ifndef _BSD_MACHINE_CONS_H_ -#define _BSD_MACHINE_CONS_H_ - -#if defined (__ppc__) || defined (__ppc64__) -#include -#elif defined (__i386__) || defined(__x86_64__) -#include -#else -#error architecture not supported +/* + * Copyright (c) 1987 NeXT, Inc. + */ + +struct consdev { + char *cn_name; /* name of device in dev_name_list */ + int (*cn_probe)(void); /* probe and fill in consdev info */ + int (*cn_init)(void); /* turn on as console */ + int (*cn_getc)(void); /* kernel getchar interface */ + int (*cn_putc)(void); /* kernel putchar interface */ + struct tty *cn_tp; /* tty structure for console device */ + dev_t cn_dev; /* major/minor of device */ + short cn_pri; /* pecking order; the higher the better */ +}; + +/* values for cn_pri - reflect our policy for console selection */ +#define CN_DEAD 0 /* device doesn't exist */ +#define CN_NORMAL 1 /* device exists but is nothing special */ +#define CN_INTERNAL 2 /* "internal" bit-mapped display */ +#define CN_REMOTE 3 /* serial interface with remote bit set */ + +/* XXX */ +#define CONSMAJOR 0 + +#ifdef KERNEL + +#include +#include + +extern struct consdev constab[]; +extern struct consdev *cn_tab; +extern struct tty *cn_tty; + +extern struct tty *constty; /* current console device */ + +int consopen(dev_t, int, int, struct proc *); +int consclose(dev_t, int, int, struct proc *); +int consread(dev_t, struct uio *, int); +int conswrite(dev_t, struct uio *, int); +int consioctl(dev_t, u_long, caddr_t, int, struct proc *); +int consselect(dev_t, int, void *, struct proc *); + +/* + * These really want their own header file, but this is the only one in + * common, and the km device is the keyboard monitor, so it's technically a + * part of the console. + */ +int kmopen(dev_t, int, int, struct proc *); +int kmclose(dev_t, int, int, struct proc *); +int kmread(dev_t, struct uio *, int); +int kmwrite(dev_t, struct uio *, int); +int kmioctl(dev_t, u_long, caddr_t, int, struct proc *); +int kmputc(dev_t, char); + #endif -#endif /* _BSD_MACHINE_CONS_H_ */ diff --git a/bsd/machine/limits.h b/bsd/machine/limits.h new file mode 100644 index 000000000..0f40842f9 --- /dev/null +++ b/bsd/machine/limits.h @@ -0,0 +1,11 @@ +/* This is the `system' limits.h, independent of any particular + compiler. GCC provides its own limits.h which can be found in + /usr/lib/gcc, although it is not very informative. + This file is public domain. */ +#if defined (__ppc__) || defined (__ppc64__) +#include +#elif defined (__i386__) || defined(__x86_64__) +#include +#else +#error architecture not supported +#endif diff --git a/bsd/machine/spl.h b/bsd/machine/spl.h index 1667c5099..ea806c0ce 100644 --- a/bsd/machine/spl.h +++ b/bsd/machine/spl.h @@ -30,6 +30,8 @@ #ifdef KERNEL #ifndef __ASSEMBLER__ + +#if !defined(__LP64__) || defined(XNU_KERNEL_PRIVATE) /* * Machine-dependent SPL definitions. * @@ -55,6 +57,8 @@ extern void splx(unsigned int level); extern void spln(unsigned int level); #define splstatclock() splhigh() +#endif /* !__LP64__ || XNU_KERNEL_PRIVATE */ + #endif /* __ASSEMBLER__ */ #endif /* KERNEL */ diff --git a/bsd/man/man2/Makefile b/bsd/man/man2/Makefile index b8984ac80..12cc26329 100644 --- a/bsd/man/man2/Makefile +++ b/bsd/man/man2/Makefile @@ -7,7 +7,6 @@ include $(MakeInc_cmd) include $(MakeInc_def) DATAFILES = \ - __syscall.2 \ _exit.2 \ EV_SET.2 \ FD_CLR.2 \ @@ -45,6 +44,8 @@ DATAFILES = \ fchmod.2 \ fchown.2 \ fcntl.2 \ + fgetattrlist.2 \ + fsetattrlist.2 \ fgetxattr.2 \ fhopen.2 \ flistxattr.2 \ @@ -54,12 +55,15 @@ DATAFILES = \ fremovexattr.2 \ fsetxattr.2 \ fstat.2 \ + fstat64.2 \ fstatfs.2 \ + fstatfs64.2 \ fsync.2 \ ftruncate.2 \ futimes.2 \ getattrlist.2 \ getaudit.2 \ + getaudit_addr.2 \ getauid.2 \ getdirentries.2 \ getdirentriesattr.2 \ @@ -93,6 +97,7 @@ DATAFILES = \ issetugid.2 \ kill.2 \ kevent.2 \ + kevent64.2 \ kqueue.2 \ lchown.2 \ link.2 \ @@ -100,6 +105,7 @@ DATAFILES = \ listxattr.2 \ lseek.2 \ lstat.2 \ + lstat64.2 \ madvise.2 \ mincore.2 \ minherit.2 \ @@ -122,6 +128,7 @@ DATAFILES = \ posix_madvise.2 \ pread.2 \ profil.2 \ + pthread_setugid_np.2 \ ptrace.2 \ pwrite.2 \ quotactl.2 \ @@ -147,6 +154,7 @@ DATAFILES = \ sendto.2 \ setattrlist.2 \ setaudit.2 \ + setaudit_addr.2 \ setauid.2 \ setegid.2 \ seteuid.2 \ @@ -178,7 +186,9 @@ DATAFILES = \ socket.2 \ socketpair.2 \ stat.2 \ + stat64.2 \ statfs.2 \ + statfs64.2 \ symlink.2 \ sync.2 \ syscall.2 \ diff --git a/bsd/man/man2/__syscall.2 b/bsd/man/man2/__syscall.2 deleted file mode 100644 index 0bf5debf7..000000000 --- a/bsd/man/man2/__syscall.2 +++ /dev/null @@ -1 +0,0 @@ -.so man2/syscall.2 diff --git a/bsd/man/man2/aio_return.2 b/bsd/man/man2/aio_return.2 index 2bf094cea..01cf5349f 100644 --- a/bsd/man/man2/aio_return.2 +++ b/bsd/man/man2/aio_return.2 @@ -1,3 +1,26 @@ +.\" +.\" Copyright (c) 2008 Apple Inc. All rights reserved. +.\" +.\" @APPLE_LICENSE_HEADER_START@ +.\" +.\" This file contains Original Code and/or Modifications of Original Code +.\" as defined in and that are subject to the Apple Public Source License +.\" Version 2.0 (the 'License'). You may not use this file except in +.\" compliance with the License. Please obtain a copy of the License at +.\" http://www.opensource.apple.com/apsl/ and read it before using this +.\" file. +.\" +.\" The Original Code and all software distributed under the License are +.\" distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER +.\" EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, +.\" INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, +.\" FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. +.\" Please see the License for the specific language governing rights and +.\" limitations under the License. +.\" +.\" @APPLE_LICENSE_HEADER_END@ +.\" +.\" .\" Copyright (c) 1999 Softweyr LLC. .\" All rights reserved. .\" @@ -24,7 +47,7 @@ .\" .\" $FreeBSD: src/lib/libc/sys/aio_return.2,v 1.17 2003/01/13 10:37:11 tjr Exp $ .\" -.Dd June 2, 1999 +.Dd September 26, 2008 .Dt AIO_RETURN 2 .Os .Sh NAME @@ -49,10 +72,15 @@ The .Fn aio_return system call should only be called once, to obtain the final status of an asynchronous -I/O operation once +I/O operation (once .Xr aio_error 2 returns something other than -.Er EINPROGRESS . +.Er EINPROGRESS) and to clean up system resources. +However, if +.Fn aio_return +is not called at all, +.Xr aio 4 +will leak resources. .Sh RETURN VALUES If the asynchronous I/O request has completed, the status is returned as described in @@ -78,6 +106,9 @@ The .Fa aiocbp argument does not reference an outstanding asynchronous I/O request. +.\" ========== +.It Bq Er EINPROGRESS +The asynchronous I/O request is still in progress. .El .Sh LEGACY SYNOPSIS .Fd #include diff --git a/bsd/man/man2/aio_write.2 b/bsd/man/man2/aio_write.2 index a69d74ae0..9570c2c24 100644 --- a/bsd/man/man2/aio_write.2 +++ b/bsd/man/man2/aio_write.2 @@ -24,7 +24,7 @@ .\" .\" $FreeBSD: src/lib/libc/sys/aio_write.2,v 1.16 2003/01/13 10:37:11 tjr Exp $ .\" -.Dd June 2, 1999 +.Dd September 18, 2008 .Dt AIO_WRITE 2 .Os .Sh NAME @@ -153,11 +153,22 @@ is invalid, or is not opened for writing. .It Bq Er EINVAL The offset .Fa aiocbp->aio_offset -is not valid, the priority specified by +is not valid. +.\" ========== +.It Bq Er EINVAL +The priority specified by .Fa aiocbp->aio_reqprio -is not a valid priority, or the number of bytes specified by +is not a valid priority. +.\" ========== +.It Bq Er EINVAL +The number of bytes specified by .Fa aiocbp->aio_nbytes is not valid. +.\" ========== +.It Bq Er EINVAL +The constant in +.Fa aiocbp->aio_sigevent.sigev_notify +is set to SIGEV_THREAD (SIGEV_THREAD is not supported). .El .Pp If the request is successfully enqueued, but subsequently canceled @@ -181,11 +192,6 @@ is invalid for writing. .It Bq Er ECANCELED The request was explicitly canceled via a call to .Fn aio_cancel . -.\" ========== -.It Bq Er EINVAL -The offset -.Fa aiocbp->aio_offset -would be invalid. .El .Sh SEE ALSO .Xr aio_cancel 2 , diff --git a/bsd/man/man2/audit.2 b/bsd/man/man2/audit.2 index ba7208392..cba172872 100644 --- a/bsd/man/man2/audit.2 +++ b/bsd/man/man2/audit.2 @@ -1,5 +1,5 @@ .\" -.\" Copyright (c) 2007 Apple Inc. All rights reserved. +.\" Copyright (c) 2008 Apple Inc. All rights reserved. .\" .\" @APPLE_LICENSE_HEADER_START@ .\" @@ -20,59 +20,76 @@ .\" .\" @APPLE_LICENSE_HEADER_END@ .\" -.Dd July 30, 2007 +.Dd April 19, 2005 .Dt AUDIT 2 -.Os Darwin +.Os .Sh NAME .Nm audit -.Nd submit a record to the kernel for auditing +.Nd "commit BSM audit record to audit log" .Sh SYNOPSIS -.Fd #include +.In bsm/audit.h .Ft int -.Fn audit "const void * record" "int length" +.Fn audit "const char *record" "u_int length" .Sh DESCRIPTION The .Fn audit -function submits a record to the kernel for inclusion in the global audit -trail. The record must already be in BSM format. To protect the integrity -of the audit trail, this system call must be made with sufficient privileges. -.Fa Libbsm -can be used to create and manipulate BSM data. -.Fa Length -is the length in bytes of the BSM record and +system call +submits a completed BSM audit record to the system audit log. +.Pp +The .Fa record -points to the data. +argument +is a pointer to the specific event to be recorded and +.Fa length +is the size in bytes of the data to be written. .Sh RETURN VALUES -Upon successful completion a value of 0 is returned. -Otherwise, a value of -1 is returned and -.Va errno -is set to indicate the error. +.Rv -std .Sh ERRORS -.Bl -tag -width Er The .Fn audit -system call will fail if: -.\" =========== +system call will fail and the data never written if: +.Bl -tag -width Er +.It Bq Er EFAULT +The +.Fa record +argument is beyond the allocated address space of the process. .It Bq Er EINVAL -.Fa Length -is greater than MAX_AUDIT_RECORD_SIZE, less than zero, greater than the -internal buffer size, or the record fails verification. -.\" =========== -.It Bq Er ENOTSUP -The security auditing service is not available. -.\" =========== +The token ID is invalid or +.Va length +is larger than +.Dv MAXAUDITDATA . .It Bq Er EPERM -The call was made with insufficient privileges to complete. -.\" =========== +The process does not have sufficient permission to complete +the operation. .El .Sh SEE ALSO .Xr auditon 2 , -.Xr auditctl 2 , +.Xr getaudit 2 , +.Xr getaudit_addr 2 , .Xr getauid 2 , +.Xr setaudit 2 , +.Xr setaudit_addr 2 , .Xr setauid 2 , -.Xr getaudit 2 , -.Xr setaudit 2 +.Xr libbsm 3 .Sh HISTORY -The -.Fn audit -function call first appeared in Mac OS X 10.3 (Panther). +The OpenBSM implementation was created by McAfee Research, the security +division of McAfee Inc., under contract to Apple Computer Inc.\& in 2004. +It was subsequently adopted by the TrustedBSD Project as the foundation for +the OpenBSM distribution. +.Sh AUTHORS +.An -nosplit +This software was created by McAfee Research, the security research division +of McAfee, Inc., under contract to Apple Computer Inc. +Additional authors include +.An Wayne Salamon , +.An Robert Watson , +and SPARTA Inc. +.Pp +The Basic Security Module (BSM) interface to audit records and audit event +stream format were defined by Sun Microsystems. +.Pp +This manual page was written by +.An Tom Rhodes Aq trhodes@FreeBSD.org . +.Sh BUGS +The kernel does not fully validate that the argument passed is syntactically +valid BSM. Submitting invalid audit records may corrupt the audit log. diff --git a/bsd/man/man2/auditctl.2 b/bsd/man/man2/auditctl.2 index c5982d5f4..40d08ae9f 100644 --- a/bsd/man/man2/auditctl.2 +++ b/bsd/man/man2/auditctl.2 @@ -1,5 +1,5 @@ .\" -.\" Copyright (c) 2007 Apple Inc. All rights reserved. +.\" Copyright (c) 2008 Apple Inc. All rights reserved. .\" .\" @APPLE_LICENSE_HEADER_START@ .\" @@ -20,40 +20,60 @@ .\" .\" @APPLE_LICENSE_HEADER_END@ .\" -.Dd July 30, 2007 +.Dd April 19, 2005 .Dt AUDITCTL 2 -.Os Darwin +.Os .Sh NAME .Nm auditctl -.Nd specify a new file for the audit trail to log to +.Nd "configure system audit parameters" .Sh SYNOPSIS -.Fd #include +.In bsm/audit.h .Ft int -.Fn auditctl "const char * path" +.Fn auditctl "const char *path" .Sh DESCRIPTION The .Fn auditctl -function directs the kernel to begin writing the global audit trail to the -specified to the file specified by -.Fa path . -Specifying a -.Fa path -of NULL will tell the system to stop writing to the audit trail, without -disabling audit. To begin writing to a file again, simply pass another -path. +system call directs the kernel to open a new audit trail log file. +It requires an appropriate privilege. +The +.Fn auditctl +system call +opens new files, but +.Xr auditon 2 +is used to disable the audit log. .Sh RETURN VALUES -Upon successful completion a value of 0 is returned. -Otherwise, a value of -1 is returned and -.Va errno -is set to indicate the error. +.Rv -std +.Sh ERRORS +The +.Fn auditctl +system call will fail if: +.Bl -tag -width Er +.It Bq Er EINVAL +The path is invalid. +.It Bq Er EPERM +The process does not have sufficient permission to complete the +operation. +.El .Sh SEE ALSO -.Xr audit 2 , .Xr auditon 2 , -.Xr getauid 2 , -.Xr setauid 2 , -.Xr getaudit 2 , -.Xr setaudit 2 +.Xr libbsm 3 , +.Xr auditd 8 .Sh HISTORY -The -.Fn auditctl -function call first appeared in Mac OS X 10.3 (Panther). +The OpenBSM implementation was created by McAfee Research, the security +division of McAfee Inc., under contract to Apple Computer Inc.\& in 2004. +It was subsequently adopted by the TrustedBSD Project as the foundation for +the OpenBSM distribution. +.Sh AUTHORS +.An -nosplit +This software was created by McAfee Research, the security research division +of McAfee, Inc., under contract to Apple Computer Inc. +Additional authors include +.An Wayne Salamon , +.An Robert Watson , +and SPARTA Inc. +.Pp +The Basic Security Module (BSM) interface to audit records and audit event +stream format were defined by Sun Microsystems. +.Pp +This manual page was written by +.An Robert Watson Aq rwatson@FreeBSD.org . diff --git a/bsd/man/man2/auditon.2 b/bsd/man/man2/auditon.2 index bfd9bad2e..bf37e6ab4 100644 --- a/bsd/man/man2/auditon.2 +++ b/bsd/man/man2/auditon.2 @@ -1,5 +1,5 @@ .\" -.\" Copyright (c) 2007 Apple Inc. All rights reserved. +.\" Copyright (c) 2008-2009 Apple Inc. All rights reserved. .\" .\" @APPLE_LICENSE_HEADER_START@ .\" @@ -20,176 +20,479 @@ .\" .\" @APPLE_LICENSE_HEADER_END@ .\" -.Dd July 30, 2007 +.Dd January 29, 2009 .Dt AUDITON 2 -.Os Darwin +.Os .Sh NAME .Nm auditon -.Nd configure the current audit parameters on the system +.Nd "configure system audit parameters" .Sh SYNOPSIS -.Fd #include +.In bsm/audit.h .Ft int -.Fn auditon "int cmd" "void * data" "int length" +.Fn auditon "int cmd" "void *data" "u_int length" .Sh DESCRIPTION The .Fn auditon -function manipulates various audit parameters. The -.Fa data -argument points to the appropriate structure from the header file. -.Fa Length -is the size of the -.Fa data -parameter in bytes. It will typically be the sizeof the the structure. -.Sh PARAMETERS -.Bl -tag -width Er -.It A_GETPOLICY -Get the current audit policy. -.Fa Data -should point to a long. The policy is the bitwise OR'ing of the -appropriate flags from -.Fa bsm/audit.h . -If AUDIT_AHLT is set, the system will kernel panic if it cannot write to the -global audit trail. If AUDIT_CNT is not set and the system becomes low on -space, audited events will block until the low space condition is remedied. -Unaudited events are unaffected. The other policy flags are not implemented. -.It A_SETPOLICY -Set the current audit policy. -.Fa Data -should point to a long specifying the desired audit policy, as described in -A_GETPOLICY. -.It A_GETKMASK -Get the current value of the audit preselection mask for non-attributable events. -.Fa Data -should point to an -.Fa au_mask_t . +system call is used to manipulate various audit control operations. +The +.Fa data +argument +should point to a structure whose type depends on the command. +The +.Fa length +argument +specifies the size of +.Fa *data +in bytes. +The +.Fa cmd +argument +may be any of the following: +.Bl -tag -width ".It Dv A_GETPINFO_ADDR" +.It Dv A_SETPOLICY +Set audit policy flags. +The +.Fa data +argument +must point to a +.Vt int +value set to one or more the following audit +policy control values bitwise OR'ed together: +.Dv AUDIT_CNT , +.Dv AUDIT_AHLT , +.Dv AUDIT_ARGV , +and +.Dv AUDIT_ARGE . +If +.Dv AUDIT_CNT is set, the system will continue even if it becomes low +on space and discontinue logging events until the low space condition is +remedied. +If it is not set, audited events will block until the low space +condition is remedied. +Unaudited events, however, are unaffected. +If +.Dv AUDIT_AHLT is set, a +.Xr panic 9 +if it cannot write an event to the global audit log file. +If +.Dv AUDIT_ARGV +is set, then the argument list passed to the +.Xr execve 2 +system call will be audited. If +.Dv AUDIT_ARGE +is set, then the environment variables passed to the +.Xr execve 2 +system call will be audited. The default policy is none of the audit policy +control flags set. +.It Dv A_SETKAUDIT +Set the host information. +The +.Fa data +argument +must point to a +.Vt auditinfo_addr_t +structure containing the host IP address information. +After setting, audit records +that are created as a result of kernel events will contain +this information. +.It Dv A_SETKMASK +Set the kernel preselection masks (success and failure). +The +.Fa data +argument +must point to a +.Vt au_mask_t +structure containing the mask values as defined in +.In bsm/audit.h . +These masks are used for non-attributable audit event preselection. The field .Fa am_success specifies which classes of successful audit events are to be logged to the audit trail. The field .Fa am_failure specifies which classes of failed audit events are to be logged. The value of -both fields is the bitwise OR'ing of the event classes specified in +both fields is the bitwise OR'ing of the audit event classes specified in .Fa bsm/audit.h . The various audit classes are described more fully in .Xr audit_class 5 . -.It A_SETKMASK -Set the current value of the audit preselection mask for non-attributable events. -.Fa Data -should point to an -.Fa au_mask_t . -The masks are defined as described in A_GETKMASK. -.It A_GETQCTRL -Get the current settings for the audit queue (specifying in kernel buffer size, -percentage of free filesystem blocks, and limits to the number of audit records -allowed). -.Fa Data -should point to an -.Fa au_qctrl_t . -.It A_SETQCTRL -Set the current settings for the audit queue. -.Fa Data -should point to an -.Fa au_qctrl_t . -.\" The following are not yet implemented, but as mentioned in the header file. -.\" .It A_GETCWD -.\" .It A_GETCAR -.\" .It A_GETSTAT -.\" .It A_SETSTAT -.\" .It A_SETUMASK -.\" .It A_SETSMASK -.It A_GETCOND -Gets the current condition of the auditing subsystem. If the value is -AUC_AUDITING, then the audit implementation is currently running. If the -value is AUC_NOAUDIT then the audit implementation is currently turned off. -.Fa Data -should point to a long. -.It A_SETCOND -Sets the condition of the auditing subsystem. If AUC_NOAUDIT is set, then -auditing is temporarily suspended. If AUC_AUDITING is set, auditing is resumed. -If AUC_DISABLED is set, the auditing system will shutdown, draining all audit -records and closing out the audit trail file. -To re-enable auditing, a call to -.Fa auditctl -is required in addition to setting the condition to AUC_AUDITING. -.Fa Data -should point to a long. -.It A_GETCLASS -Returns the audit class for the specified audit event. -.Fa Data -should point to a -.Fa au_evclassmap_t . -.It A_SETCLASS -Sets the audit class for the specified audit event. -.Fa Data -should point to a -.Fa au_evclassmap_t . -.It A_GETPINFO -Returns the audit information stored in the credential for the current process. -.Fa Data -should point to a -.Fa auditpinfo_t . -.It A_SETPMASK -Sets the audit settings for a process. The audit user ID, preselection masks -for both success and failure, and terminal IDs must be set. -.Fa Data -should point to a -.Fa auditpinfo_t -struct. -.It A_SETFSIZE -Set the limit on audit trail file size. File size is in bytes. The file size -specified is treated as an advisory limit. The system will make a best effort -attempt to rotate log files before they exceed the requested maximum size, but -makes no guarantees on log file size -.Fa Data -should point to a -.Fa au_fstat_t -struct. The -.Fa af_filesz -field is used to specify the new file size, which must be greater than -MIN_AUDIT_FILE_SIZE. A value of 0 indicates no limit on the audit trail's size. The -.Fa af_currsz -field is ignored. A errno value of EINVAL indicates a maximum file size that is -too small. -.It A_GETFSIZE -Return the maximum allowable size of the audit trail, and the current size of -the audit trail. -.Fa Data -should point to a -.Fa au_fstat_t -struct. -.It A_GETPINFO_ADDR -Not implemented, returns ENOSYS. -.It A_GETKAUDIT -Not implemented, returns ENOSYS. -.It A_SETKAUDIT -Not implemented, returns ENOSYS. +.It Dv A_SETQCTRL +Set kernel audit queue parameters. +The +.Fa data +argument +must point to a +.Vt au_qctrl_t +structure (defined in +.In bsm/audit.h ) +containing the kernel audit queue control settings: +.Fa aq_hiwater , +.Fa aq_lowater , +.Fa aq_bufsz , +.Fa aq_delay , +and +.Fa aq_minfree . +The field +.Fa aq_hiwater +defines the maximum number of audit record entries in the queue used to store +the audit records ready for delivery to disk. +New records are inserted at the tail of the queue and removed from the head. +For new records which would exceed the +high water mark, the calling thread is inserted into the wait queue, waiting +for the audit queue to have enough space available as defined with the field +.Fa aq_lowater . +The field +.Fa aq_bufsz +defines the maximum length of the audit record that can be supplied with +.Xr audit 2 . +The field +.Fa aq_delay +is unused. +The field +.Fa aq_minfree +specifies the minimum amount of free blocks on the disk device used to store +audit records. +If the value of free blocks falls below the configured +minimum amount, the kernel informs the audit daemon about low disk space. +The value is to be specified in percent of free file system blocks. +A value of 0 results in a disabling of the check. +The default and maximum values (default/maximum) for the +audit queue control parameters are: +.Pp +.Bl -column aq_hiwater -offset indent -compact +.It aq_hiwater Ta 100/10000 (audit records) +.It aq_lowater Ta 10/aq_hiwater (audit records) +.It aq_bufsz Ta 32767/1048576 (bytes) +.It aq_delay Ta (Not currently used.) +.El +.It Dv A_SETSTAT +Return +.Er ENOSYS . +(Not implemented.) +.It Dv A_SETUMASK +Return +.Er ENOSYS . +(Not implemented.) +.It Dv A_SETSMASK +Return +.Er ENOSYS . +(Not implemented.) +.It Dv A_SETCOND +Set the current auditing condition. +The +.Fa data +argument +must point to a +.Vt int +value containing the new +audit condition, one of +.Dv AUC_AUDITING , +.Dv AUC_NOAUDIT , +or +.Dv AUC_DISABLED . +If +.Dv AUC_NOAUDIT +is set, then auditing is temporarily suspended. If +.Dv AUC_AUDITING +is set, auditing is resumed. If +.Dv AUC_DISABLED +is set, the auditing system will +shutdown, draining all audit records and closing out the audit trail file. +.It Dv A_SETCLASS +Set the event class preselection mask for an audit event. +The +.Fa data +argument +must point to a +.Vt au_evclass_map_t +structure containing the audit event and mask. +The field +.Fa ec_number +is the audit event and +.Fa ec_class +is the audit class mask. See +.Xr audit_event 5 +for more information on audit event to class mapping. +.It Dv A_SETPMASK +Set the preselection masks for a process. +The +.Fa data +argument +must point to a +.Vt auditpinfo_t +structure that contains the given process's audit +preselection masks for both success and failure. +The field +.Fa ap_pid +is the process id of the target process. +The field +.Fa ap_mask +must point to a +.Fa au_mask_t +structure which holds the preselection masks as described in the +.Da A_SETKMASK +section above. +.It Dv A_SETFSIZE +Set the maximum size of the audit log file. +The +.Fa data +argument +must point to a +.Vt au_fstat_t +structure with the +.Va af_filesz +field set to the maximum audit log file size. +A value of 0 +indicates no limit to the size. +.It Dv A_GETCLASS +Return the event to class mapping for the designated audit event. +The +.Fa data +argument +must point to a +.Vt au_evclass_map_t +structure. See the +.Dv A_SETCLASS +section above for more information. +.It Dv A_GETKAUDIT +Get the current host information. +The +.Fa data +argument +must point to a +.Vt auditinfo_addr_t +structure. +.It Dv A_GETPINFO +Return the audit settings for a process. +The +.Fa data +argument +must point to a +.Vt auditpinfo_t +structure which will be set to contain +.Fa ap_auid +(the audit ID), +.Fa ap_mask +(the preselection mask), +.Fa ap_termid +(the terminal ID), and +.Fa ap_asid +(the audit session ID) +of the given target process. +The process ID of the target process is passed +into the kernel using the +.Fa ap_pid +field. +See the section +.Dv A_SETPMASK +above and +.Xr getaudit 2 +for more information. +.It Dv A_GETPINFO_ADDR +Return the extended audit settings for a process. +The +.Fa data +argument +must point to a +.Vt auditpinfo_addr_t +structure which is similar to the +.Vt auditpinfo_addr_t +structure described above. +The exception is the +.Fa ap_termid +(the terminal ID) field which points to a +.Vt au_tid_addr_t +structure can hold much a larger terminal address and an address type. +The process ID of the target process is passed into the kernel using the +.Fa ap_pid +field. +See the section +.Dv A_SETPMASK +above and +.Xr getaudit 2 +for more information. +.It Dv A_GETSINFO_ADDR +Return the extended audit settings for a session. +The +.Fa data +argument +must point to a +.Vt auditinfo_addr_t +structure. +The audit session ID of the target session is passed +into the kernel using the +.Fa ai_asid +field. See +.Xr getaudit_addr 2 +for more information about the +.Vt auditinfo_addr_t +structure. +.It Dv A_GETKMASK +Return the current kernel preselection masks. +The +.Fa data +argument +must point to a +.Vt au_mask_t +structure which will be set to +the current kernel preselection masks for non-attributable events. +.It Dv A_GETPOLICY +Return the current audit policy setting. +The +.Fa data +argument +must point to a +.Vt int +value which will be set to +one of the current audit policy flags. +The audit policy flags are +described in the +.Dv A_SETPOLICY +section above. +.It Dv A_GETQCTRL +Return the current kernel audit queue control parameters. +The +.Fa data +argument +must point to a +.Vt au_qctrl_t +structure which will be set to the current +kernel audit queue control parameters. +See the +.Dv A_SETQCTL +section above for more information. +.It Dv A_GETFSIZE +Returns the maximum size of the audit log file. +The +.Fa data +argument +must point to a +.Vt au_fstat_t +structure. +The +.Va af_filesz +field will be set to the maximum audit log file size. +A value of 0 indicates no limit to the size. +The +.Va af_currsz +field +will be set to the current audit log file size. +.It Dv A_GETCWD +.\" [COMMENTED OUT]: Valid description, not yet implemented. +.\" Return the current working directory as stored in the audit subsystem. +Return +.Er ENOSYS . +(Not implemented.) +.It Dv A_GETCAR +.\" [COMMENTED OUT]: Valid description, not yet implemented. +.\"Stores and returns the current active root as stored in the audit +.\"subsystem. +Return +.Er ENOSYS . +(Not implemented.) +.It Dv A_GETSTAT +.\" [COMMENTED OUT]: Valid description, not yet implemented. +.\"Return the statistics stored in the audit system. +Return +.Er ENOSYS . +(Not implemented.) +.It Dv A_GETCOND +Return the current auditing condition. +The +.Fa data +argument +must point to a +.Vt int +value which will be set to +the current audit condition, one of +.Dv AUC_AUDITING , +.Dv AUC_NOAUDIT +or +.Dv AUC_DISABLED . +See the +.Dv A_SETCOND +section above for more information. +.It Dv A_SENDTRIGGER +Send a trigger to the audit daemon. +The +.Fa data +argument +must point to a +.Vt int +value set to one of the acceptable +trigger values: +.Dv AUDIT_TRIGGER_LOW_SPACE +(low disk space where the audit log resides), +.Dv AUDIT_TRIGGER_OPEN_NEW +(open a new audit log file), +.Dv AUDIT_TRIGGER_READ_FILE +(read the +.Pa audit_control +file), +.Dv AUDIT_TRIGGER_CLOSE_AND_DIE +(close the current log file and exit), +.Dv AUDIT_TRIGGER_NO_SPACE +(no disk space left for audit log file). +.Dv AUDIT_TRIGGER_ROTATE_USER +(request audit log file rotation). +.Dv AUDIT_TRIGGER_INITIALIZE +(initialize audit subsystem for Mac OS X only). +or +.Dv AUDIT_TRIGGER_EXPIRE_TRAILS +(request audit log file expiration). .El .Sh RETURN VALUES -Upon successful completion a value of 0 is returned. -Otherwise, a value of -1 is returned and -.Va errno -is set to indicate the error. +.Rv -std .Sh ERRORS -.Bl -tag -width Er -Errors are specific to the operation requested. In addition, rhe +The .Fn auditon -system call will fail if: -.\" =========== +function will fail if: +.Bl -tag -width Er +.It Bq Er ENOSYS +Returned by options not yet implemented. +.It Bq Er EFAULT +A failure occurred while data transferred to or from +the kernel failed. .It Bq Er EINVAL -.Fa Length -is less than or equal to zero, or if it is greater than any of the expected structures. +Illegal argument was passed by a system call. +.It Bq Er EPERM +The process does not have sufficient permission to complete +the operation. .El +.Pp +The +.Dv A_SENDTRIGGER +command is specific to the +.Fx +and Mac OS X implementations, and is not present in Solaris. .Sh SEE ALSO .Xr audit 2 , .Xr auditctl 2 , -.Xr getauid 2 , -.Xr setauid 2 , .Xr getaudit 2 , -.Xr setaudit 2 , .Xr getaudit_addr 2 , +.Xr getauid 2 , +.Xr setaudit 2 , .Xr setaudit_addr 2 , -.Xr audit_class 5 +.Xr setauid 2 , +.Xr libbsm 3 .Sh HISTORY -The -.Fn auditon -function call first appeared in Mac OS X 10.3 (Panther). +The OpenBSM implementation was created by McAfee Research, the security +division of McAfee Inc., under contract to Apple Computer Inc.\& in 2004. +It was subsequently adopted by the TrustedBSD Project as the foundation for +the OpenBSM distribution. +.Sh AUTHORS +.An -nosplit +This software was created by McAfee Research, the security research division +of McAfee, Inc., under contract to Apple Computer Inc. +Additional authors include +.An Wayne Salamon , +.An Robert Watson , +and SPARTA Inc. +.Pp +The Basic Security Module (BSM) interface to audit records and audit event +stream format were defined by Sun Microsystems. +.Pp +This manual page was written by +.An Tom Rhodes Aq trhodes@FreeBSD.org , +.An Robert Watson Aq rwatson@FreeBSD.org , +and +.An Wayne Salamon Aq wsalamon@FreeBSD.org . diff --git a/bsd/man/man2/chdir.2 b/bsd/man/man2/chdir.2 index d8476b8e6..3d688be23 100644 --- a/bsd/man/man2/chdir.2 +++ b/bsd/man/man2/chdir.2 @@ -133,7 +133,8 @@ The argument is not a valid file descriptor. .\" ========== .It Bq Er EINTR -.Fn Fchdir was interrupted by a signal. +.Fn Fchdir +was interrupted by a signal. .\" ========== .It Bq Er EIO An I/O error occurred while reading from or writing to the file system. diff --git a/bsd/man/man2/chflags.2 b/bsd/man/man2/chflags.2 index ac7301709..0d16cab19 100644 --- a/bsd/man/man2/chflags.2 +++ b/bsd/man/man2/chflags.2 @@ -105,7 +105,7 @@ Otherwise, -1 is returned and the global variable is set to indicate the error. .Sh ERRORS .Fn Chflags -will fail it: +will fail if: .Bl -tag -width Er .It Bq Er ENOTDIR A component of the path prefix is not a directory. @@ -133,6 +133,8 @@ points outside the process's allocated address space. An .Tn I/O error occurred while reading from or writing to the file system. +.It Bq Er ENOTSUP +The operation isn't supported by the filesystem. .El .Pp .Fn Fchflags @@ -152,10 +154,13 @@ The file resides on a read-only file system. An .Tn I/O error occurred while reading from or writing to the file system. +.It Bq Er ENOTSUP +The operation isn't supported by the filesystem. .El .Sh SEE ALSO .Xr chflags 1 , .Xr fflagstostr 3 , +.Xr lchflags 3 , .Xr strtofflags 3 , .Xr init 8 .Sh HISTORY diff --git a/bsd/man/man2/connect.2 b/bsd/man/man2/connect.2 index 8b32e6c2c..186b01c20 100644 --- a/bsd/man/man2/connect.2 +++ b/bsd/man/man2/connect.2 @@ -176,6 +176,9 @@ that is bound to the specified peer address. .\" ========== .It Bq Er ETIMEDOUT Connection establishment timed out without establishing a connection. +.\" ========== +.It Bq Er ECONNRESET +Remote host reset the connection request. .El .Pp The following errors are specific to connecting names in the UNIX domain. diff --git a/bsd/man/man2/dup.2 b/bsd/man/man2/dup.2 index 019b13fbf..c13ca0bb5 100644 --- a/bsd/man/man2/dup.2 +++ b/bsd/man/man2/dup.2 @@ -105,7 +105,7 @@ the descriptor is first deallocated as if a .Xr close 2 call had been done first. .Sh RETURN VALUES -Upon successful completion, a value of 0 is returned. +Upon successful completion, the new file descriptor is returned. Otherwise, a value of -1 is returned and the global integer variable .Va errno is set to indicate the error. @@ -119,8 +119,6 @@ system calls will fail if: .\" ========== .It Bq Er EBADF .Fa fildes -or -.Fa fildes2 is not an active, valid file descriptor. .\" ========== .It Bq Er EINTR @@ -129,6 +127,16 @@ Execution is interrupted by a signal. .It Bq Er EMFILE Too many file descriptors are active. .El +.Pp +The +.Fn dup2 +system call will fail if: +.Bl -tag -width Er +.\" ========== +.It Bq Er EBADF +.Fa fildes2 +is negative or greater than the maximum allowable number (see getdtablesize(2)). +.El .Sh SEE ALSO .Xr accept 2 , .Xr close 2 , diff --git a/bsd/man/man2/errno.2 b/bsd/man/man2/errno.2 new file mode 100644 index 000000000..ec6df420c --- /dev/null +++ b/bsd/man/man2/errno.2 @@ -0,0 +1 @@ +.so man2/intro.2 diff --git a/bsd/man/man2/fcntl.2 b/bsd/man/man2/fcntl.2 index d228a17c1..d6d1ce8cd 100644 --- a/bsd/man/man2/fcntl.2 +++ b/bsd/man/man2/fcntl.2 @@ -1,3 +1,26 @@ +.\" +.\" Copyright (c) 2008 Apple Inc. All rights reserved. +.\" +.\" @APPLE_LICENSE_HEADER_START@ +.\" +.\" This file contains Original Code and/or Modifications of Original Code +.\" as defined in and that are subject to the Apple Public Source License +.\" Version 2.0 (the 'License'). You may not use this file except in +.\" compliance with the License. Please obtain a copy of the License at +.\" http://www.opensource.apple.com/apsl/ and read it before using this +.\" file. +.\" +.\" The Original Code and all software distributed under the License are +.\" distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER +.\" EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, +.\" INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, +.\" FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. +.\" Please see the License for the specific language governing rights and +.\" limitations under the License. +.\" +.\" @APPLE_LICENSE_HEADER_END@ +.\" +.\" .\" $NetBSD: fcntl.2,v 1.6 1995/02/27 12:32:29 cgd Exp $ .\" .\" Copyright (c) 1983, 1993 @@ -33,7 +56,7 @@ .\" .\" @(#)fcntl.2 8.2 (Berkeley) 1/12/94 .\" -.Dd January 12, 1994 +.Dd October 2, 2008 .Dt FCNTL 2 .Os BSD 4.2 .Sh NAME @@ -131,7 +154,9 @@ The argument must be a buffer of size .Sy MAXPATHLEN or greater. .It Dv F_PREALLOCATE -Preallocate file storage space. +Preallocate file storage space. Note: upon success, +the space that is allocated can be the same size or +larger than the space requested. .It Dv F_SETSIZE Truncate a file without zeroing space. The calling process must have root privileges. @@ -253,7 +278,7 @@ as well as remove either type of lock If a shared or exclusive lock cannot be set, .Nm fcntl returns immediately with -.Er EACCES . +.Er EAGAIN . .It Dv F_SETLKW This command is the same as .Dv F_SETLK @@ -486,7 +511,7 @@ The system call will fail if: .Bl -tag -width Er .\" ========== -.It Bq Er EACCES +.It Bq Er EAGAIN The argument .Fa cmd is @@ -502,7 +527,7 @@ exclusive-locked by another process; or the type is an exclusive lock and some portion of the segment of a file to be locked is already shared-locked or exclusive-locked by another process. -.Pp +.It Bq Er EACCESS The argument .Fa cmd is either diff --git a/bsd/man/man2/fgetattrlist.2 b/bsd/man/man2/fgetattrlist.2 new file mode 100644 index 000000000..7c9a2ee01 --- /dev/null +++ b/bsd/man/man2/fgetattrlist.2 @@ -0,0 +1 @@ +.so man2/getattrlist.2 diff --git a/bsd/man/man2/fsetattrlist.2 b/bsd/man/man2/fsetattrlist.2 new file mode 100644 index 000000000..f823a2cac --- /dev/null +++ b/bsd/man/man2/fsetattrlist.2 @@ -0,0 +1 @@ +.so man2/setattrlist.2 diff --git a/bsd/man/man2/fstat64.2 b/bsd/man/man2/fstat64.2 new file mode 100644 index 000000000..b1a86c195 --- /dev/null +++ b/bsd/man/man2/fstat64.2 @@ -0,0 +1 @@ +.so man2/stat.2 diff --git a/bsd/man/man2/fstatfs64.2 b/bsd/man/man2/fstatfs64.2 new file mode 100644 index 000000000..923d3c0cc --- /dev/null +++ b/bsd/man/man2/fstatfs64.2 @@ -0,0 +1 @@ +.so man2/statfs.2 diff --git a/bsd/man/man2/getattrlist.2 b/bsd/man/man2/getattrlist.2 index d0f718d0d..856f1a110 100644 --- a/bsd/man/man2/getattrlist.2 +++ b/bsd/man/man2/getattrlist.2 @@ -20,7 +20,8 @@ .Dt GETATTRLIST 2 .Os Darwin .Sh NAME -.Nm getattrlist +.Nm getattrlist , +.Nm fgetattrlist .Nd get file system attributes .Sh SYNOPSIS .Fd #include @@ -28,17 +29,25 @@ .Ft int .Fn getattrlist "const char* path" "struct attrlist * attrList" "void * attrBuf" "size_t attrBufSize" "unsigned long options" . +.Ft int +.Fn fgetattrlist "int fd" "struct attrlist * attrList" "void * attrBuf" "size_t attrBufSize" "unsigned long options" .Sh DESCRIPTION The .Fn getattrlist function returns attributes (that is, metadata) of file system objects. +.Fn getattrlist +works on the file system object named by +.Fa path , +while +.Fn fgetattrlist +works on the provided file descriptor +.Fa fd . You can think of .Fn getattrlist as a seriously enhanced version of .Xr stat 2 . -The function returns attributes about the file system object specified by -.Fa path -in the buffer specified by +The functions return attributes about the specified file system object +into the buffer specified by .Fa attrBuf and .Fa attrBufSize . @@ -52,12 +61,14 @@ parameter lets you control specific aspects of the function's behavior. . The .Fn getattrlist -function is only supported by certain volume format implementations. +and +.Fn fgetattrlist +functions are only supported by certain volume format implementations. For maximum compatibility, client programs should use high-level APIs (such as the Carbon File Manager) to access file system attributes. These high-level APIs include logic to emulate file system attributes on volumes that don't support -.Fn getattrlist . +the calls. .Pp . Not all volumes support all attributes. @@ -181,10 +192,10 @@ The initial contents of this buffer are ignored. The .Fa options parameter is a bit set that controls the behaviour of -.Fn getattrlist . +the functions. The following option bits are defined. . -.Bl -tag -width XXXbitmapcount +.Bl -tag -width FSOPT_PACK_INVAL_ATTRS . .It FSOPT_NOFOLLOW If this bit is set, @@ -193,6 +204,21 @@ will not follow a symlink if it occurs as the last component of .Fa path . . +.It FSOPT_REPORT_FULLSIZE +The size of the attributes reported (in the first +.Vt u_int32_t +field in the attribute buffer) will be the size needed to hold all the +requested attributes; if not set, only the attributes actually returned +will be reported. This allows the caller to determine if any truncation +occurred. +. +.It FSOPT_PACK_INVAL_ATTRS +If this is bit is set, then all requested attributes, even ones that are +not supported by the object or file system, will be returned. Default values +will be used for the invalid ones. Requires that +.Dv ATTR_CMN_RETURNED_ATTRS +be requested. +. .El . .Sh ATTRIBUTE BUFFER @@ -221,6 +247,8 @@ where the type is given as part of the attribute description (below). The attributes are placed into the attribute buffer in the order that they are described below. . +.It +Each attribute is aligned to a 4-byte boundary (including 64-bit data types). .El .Pp . @@ -233,8 +261,8 @@ structure, as defined by . .Bd -literal typedef struct attrreference { - long attr_dataoffset; - size_t attr_length; + int32_t attr_dataoffset; + u_int32_t attr_length; } attrreference_t; .Ed .Pp @@ -249,7 +277,7 @@ field is the offset in bytes from the .Vt attrreference structure to the attribute data. -This offset will always be a multiple of sizeof(unsigned long) bytes, +This offset will always be a multiple of sizeof(u_int32_t) bytes, so you can safely access common data types without fear of alignment exceptions. .Pp @@ -308,6 +336,18 @@ The following common attributes are defined. . .Bl -tag -width ATTR_VOL_ALLOCATIONCLUMP . +.It ATTR_CMN_RETURNED_ATTRS +An +.Vt attribute_set_t +structure which is used to report which of the requested attributes +were actually returned. This attribute, when requested, will always +be the first attribute returned. By default, unsupported attributes +will be skipped (i.e. not packed into the output buffer). This behavior +can be over-ridden using the FSOPT_PACK_INVAL_ATTRS option flag. Only +.Xr getattrlist 2 supports this attribute ( +.Xr getdirentriesattr 2 and +.Xr searchfs 2 do not support it ). +. .It ATTR_CMN_NAME An .Vt attrreference @@ -584,6 +624,26 @@ See .Xr access 2 for more details. . +.It ATTR_CMN_EXTENDED_SECURITY +A variable-length object (thus an +.Vt attrreference +structure) containing a +.Vt kauth_filesec +structure, of which only the ACL entry is used. +. +.It ATTR_CMN_UUID +A +.Vt guid_t +of the owner of the file system object. Analoguous to +.Dv ATTR_CMN_OWNERID . +. +.It ATTR_CMN_GRPUUID +A +.Vt guid_t +of the group to which the file system object belongs. +Analoguous to +.Dv ATTR_CMN_GRPID . +. .It ATTR_CMN_FILEID A .Vt u_int64_t @@ -594,6 +654,19 @@ A .Vt u_int64_t that identifies the parent directory of the file system object. . +.It ATTR_CMN_FULLPATH +An +.Vt attrreference +structure containing the full path (resolving all symlinks) to +the file system object as +a UTF-8 encoded, null terminated C string. +The attribute data length will not be greater than +.Dv PATH_MAX. +Inconsistent behavior may be observed when this attribute is requested on +hard-linked items, particularly when the file system does not support ATTR_CMN_PARENTID +natively. Callers should be aware of this when requesting the full path of a hard-linked item. +.Pp +. .El . .Sh VOLUME ATTRIBUTES @@ -781,6 +854,12 @@ A structure describing the optional features supported by this volume. See below for a discussion of volume capabilities. . +.It ATTR_VOL_UUID +A +.Vt uuid_t +containing the file system UUID. Typically this will be a +version 5 UUID. +. .It ATTR_VOL_ATTRIBUTES A .Vt vol_attributes_attr_t @@ -800,14 +879,15 @@ A .Vt u_int32_t containing the number of hard links to the directory; this does not include the historical "." and ".." entries. -For filesystems that do not support hard links to directories, +For file systems that do not support hard links to directories, this value will be 1. . .It ATTR_DIR_ENTRYCOUNT A .Vt u_int32_t containing the number of file system objects in the directory, not including -any synthetic items. +any synthetic items. The historical "." and ".." entries are also +excluded from this count. . .It ATTR_DIR_MOUNTSTATUS A @@ -816,10 +896,13 @@ containing flags describing what's mounted on the directory. Currently the only flag defined is .Dv DIR_MNTSTATUS_MNTPOINT, which indicates that there is a file system mounted on this directory. -Due to a bug (r. 3502822), this flag is never set on current system. . .El . +.Pp +Requested directory attributes are not returned for file system objects that +are not directories. +. .Sh FILE ATTRIBUTES . The following file attributes are defined. @@ -947,6 +1030,12 @@ See also . .El . +.Pp +File attributes are used for any file system object that is not a directory, +not just ordinary files. +Requested file attributes are not returned for file system objects that +are directories. +. .Sh FORK ATTRIBUTES . Fork attributes relate to the actual data in the file, @@ -1159,13 +1248,46 @@ should not set this bit. Introduced with Darwin 7.0 (Mac OS X version 10.3). . .It VOL_CAP_FMT_2TB_FILESIZE -If this bit is set the volume format supports file -sizes upto 2TB. This bit does not necessarily mean that the file -system does not support file size more than 2TB. -This bit does not mean that the currently available space on the volume is 2TB. +If this bit is set the volume format supports file sizes larger +than 4GB, and potentially up to 2TB; it does not indicate +whether the file system supports files larger than that. .Pp Introduced with Darwin 8.0 (Mac OS X version 10.4). . +.It VOL_CAP_FMT_OPENDENYMODES +If this bit is set, the volume format supports open deny modes +(e.g., "open for read write, deny write"). +. +.It VOL_CAP_FMT_HIDDEN_FILES +If this bit is set, the volume format supports the +.Dv UF_HIDDEN +file flag, and the +.Dv UF_HIDDEN +flag is mapped to that volume's native "hidden" or "invisible" +bit (e.g., the invisible bit from the Finder Info extended attribute). +. +.It VOL_CAP_FMT_PATH_FROM_ID +If this bit is set, the volume format supports the ability to derive a pathname +to the root of the file system given only the ID of an object. This also +implies that object IDs on this file system are persistent and not recycled. +Most file systems will not support this capability. +. +.It VOL_CAP_FMT_NO_VOLUME_SIZES +If this bit is set the volume format does not support +determining values for total data blocks, available blocks, or free blocks, as in +.Fa f_blocks, +.Fa f_bavail, +and +.Fa f_bfree +in the +.Fa struct statfs +returned by +.Xr statfs 2 . +Historically, those values were set to 0xFFFFFFFF for volumes +that did not support them. +.Pp +Introduced with Darwin 10.0 (Mac OS X version 10.6). +. .El .Pp . @@ -1207,6 +1329,9 @@ Introduced with Darwin 7.0 (Mac OS X version 10.3). .It VOL_CAP_INT_COPYFILE If this bit is set the volume format implementation supports the (private and undocumented) copyfile() function. +(This is not the +.Xr copyfile 3 +function.) .Pp Introduced with Darwin 7.0 (Mac OS X version 10.3). . @@ -1263,6 +1388,20 @@ ATTR_CMN_USERACCESS attribute. .Pp Introduced with Darwin 8.0 (Mac OS X version 10.4). . +.It VOL_CAP_INT_MANLOCK +If this bit is set, the volume format implementation supports +AFP-style mandatory byte range locks via +.Xr ioctl 2 . +. +.It VOL_CAP_INT_EXTENDED_ATTR +If this bit is set, the volume format implementation supports +native extended attributes (see +.Xr setxattr 2 ). +. +.It VOL_CAP_INT_NAMEDSTREAMS +If this bit is set, the volume format implementation supports +native named streams. +. .El .Pp . @@ -1350,31 +1489,46 @@ you should be careful to support the behaviour specified by this document. . .Sh ERRORS .Fn getattrlist +and +.Fn fgetattrlist will fail if: .Bl -tag -width Er . .It Bq Er ENOTSUP -The volume does not support -.Fn getattrlist . +The volume does not support the query. . .It Bq Er ENOTDIR -A component of the path prefix is not a directory. +A component of the path prefix for +.Fn getattrlist +is not a directory. . .It Bq Er ENAMETOOLONG -A component of a path name exceeded +A component of a path name for +.Fn getattrlist +exceeded .Dv NAME_MAX characters, or an entire path name exceeded .Dv PATH_MAX characters. . .It Bq Er ENOENT -The file system object does not exist. +The file system object for +.Fn getattrlist +does not exist. +. +.It Bq Er EBADF +The file descriptor argument for +.Fn fgetattrlist +is not a valid file descriptor. . .It Bq Er EACCES -Search permission is denied for a component of the path prefix. +Search permission is denied for a component of the path prefix for +.Fn getattrlist . . .It Bq Er ELOOP -Too many symbolic links were encountered in translating the pathname. +Too many symbolic links were encountered in translating the pathname +for +.Fn getattrlist . . .It Bq Er EFAULT .Fa path , @@ -1442,6 +1596,19 @@ When ordering attributes, you should always use the order in which they are described above. .Pp . +The +.Vt timespec +structure is 64-bits (two 32-bit elements) in 32-bit code, and +128-bits (two 64-bit elements) in 64-bit code; however, it is aligned +on a 4-byte (32-bit) boundary, even in 64-bit code. +.Pp +. +Inconsistent behavior may be observed when the ATTR_CMN_FULLPATH attribute is requested on +hard-linked items, particularly when the file system does not support ATTR_CMN_PARENTID +natively. Callers should be aware of this when requesting the full path of a hard-linked item, especially +if the full path crosses mount points. +.Pp +. For more caveats, see also the compatibility notes above. . .Sh EXAMPLES @@ -1655,8 +1822,8 @@ static int VolDemo(const char *path) .Pp printf("Volume information for %s:\en", path); - printf("ATTR_VOL_FILECOUNT: %lu\en", attrBuf.fileCount); - printf("ATTR_VOL_DIRCOUNT: %lu\en", attrBuf.dirCount); + printf("ATTR_VOL_FILECOUNT: %u\en", attrBuf.fileCount); + printf("ATTR_VOL_DIRCOUNT: %u\en", attrBuf.dirCount); printf( "ATTR_VOL_MOUNTPOINT: %.*s\en", (int) attrBuf.mountPointRef.attr_length, diff --git a/bsd/man/man2/getaudit.2 b/bsd/man/man2/getaudit.2 index 4b40f6d32..10f84aaf6 100644 --- a/bsd/man/man2/getaudit.2 +++ b/bsd/man/man2/getaudit.2 @@ -1,5 +1,5 @@ .\" -.\" Copyright (c) 2007 Apple Inc. All rights reserved. +.\" Copyright (c) 2008-2009 Apple Inc. All rights reserved. .\" .\" @APPLE_OSREFERENCE_LICENSE_HEADER_START@ .\" @@ -25,37 +25,178 @@ .\" .\" @APPLE_OSREFERENCE_LICENSE_HEADER_END@ .\" -.Dd July 30, 2007 +.Dd March 6, 2009 .Dt GETAUDIT 2 -.Os Darwin +.Os .Sh NAME -.Nm getaudit -.Nd get the audit information for the current process +.Nm getaudit , +.Nm getaudit_addr +.Nd "retrieve audit session state" .Sh SYNOPSIS -.Fd #include +.In bsm/audit.h .Ft int -.Fn getaudit "struct auditinfo * auditinfo" +.Fn getaudit "auditinfo_t *auditinfo" +.Ft int +.Fn getaudit_addr "auditinfo_addr_t *auditinfo_addr" "u_int length" .Sh DESCRIPTION The .Fn getaudit -function returns a -.Fa struct auditinfo -describing the current user audit settings for the calling process. The -information is returned in the struct pointed at by +system call +retrieves the active audit session state for the current process via the +.Vt auditinfo_t +pointed to by .Fa auditinfo . +The +.Fn getaudit_addr +system call +retrieves extended state via +.Fa auditinfo_addr +and +.Fa length . +.Pp +The +.Fa auditinfo_t +data structure is defined as follows: +.nf +.in +4n + +struct auditinfo { + au_id_t ai_auid; /* Audit user ID */ + au_mask_t ai_mask; /* Audit masks */ + au_tid_t ai_termid; /* Terminal ID */ + au_asid_t ai_asid; /* Audit session ID */ +}; +typedef struct auditinfo auditinfo_t; +.in +.fi +.Pp +The +.Fa ai_auid +variable contains the audit identifier which is recorded in the audit log for +each event the process caused. +.PP + +The +.Fa au_mask_t +data structure defines the bit mask for auditing successful and failed events +out of the predefined list of event classes. It is defined as follows: +.nf +.in +4n + +struct au_mask { + unsigned int am_success; /* success bits */ + unsigned int am_failure; /* failure bits */ +}; +typedef struct au_mask au_mask_t; +.in +.fi +.PP + +The +.Fa au_termid_t +data structure defines the Terminal ID recorded with every event caused by the +process. It is defined as follows: +.nf +.in +4n + +struct au_tid { + dev_t port; + u_int32_t machine; +}; +typedef struct au_tid au_tid_t; +.in +.fi +.PP + +The +.Fa ai_asid +variable contains the audit session ID which is recorded with every event +caused by the process. +.Pp +The +.Fn getaudit_addr +system call +uses the expanded +.Fa auditinfo_addr_t +data structure supports Terminal IDs with larger addresses such as those used +in IP version 6. It is defined as follows: +.nf +.in +4n + +struct auditinfo_addr { + au_id_t ai_auid; /* Audit user ID. */ + au_mask_t ai_mask; /* Audit masks. */ + au_tid_addr_t ai_termid; /* Terminal ID. */ + au_asid_t ai_asid; /* Audit session ID. */ + u_int64_t ai_flags; /* Audit session flags. */ +}; +typedef struct auditinfo_addr auditinfo_addr_t; +.in +.fi +.Pp + +The +.Fa au_tid_addr_t +data structure which includes a larger address storage field and an additional +field with the type of address stored: +.nf +.in +4n + +struct au_tid_addr { + dev_t at_port; + u_int32_t at_type; + u_int32_t at_addr[4]; +}; +typedef struct au_tid_addr au_tid_addr_t; +.in +.fi +.Pp +Without appropriate privilege the audit mask fields will be set to all +ones. .Sh RETURN VALUES -Upon successful completion a value of 0 is returned. -Otherwise, a value of -1 is returned and -.Va errno -is set to indicate the error. +.Rv -std getaudit getaudit_addr +.Sh ERRORS +The +.Fn getaudit +function will fail if: +.Bl -tag -width Er +.It Bq Er EFAULT +A failure occurred while data transferred to or from +the kernel failed. +.It Bq Er EINVAL +Illegal argument was passed by a system call. +.It Bq Er EOVERFLOW +The +.Fa length +argument indicates an overflow condition will occur. +.It Bq Er ERANGE +The address is too big and, therefore, +.Fn getaudit_addr +should be used instead. +.El .Sh SEE ALSO .Xr audit 2 , .Xr auditon 2 , -.Xr auditctl 2 , .Xr getauid 2 , +.Xr setaudit 2 , .Xr setauid 2 , -.Xr setaudit 2 +.Xr libbsm 3 .Sh HISTORY -The -.Fn getaudit -function call first appeared in Mac OS X 10.3 (Panther). +The OpenBSM implementation was created by McAfee Research, the security +division of McAfee Inc., under contract to Apple Computer Inc.\& in 2004. +It was subsequently adopted by the TrustedBSD Project as the foundation for +the OpenBSM distribution. +.Sh AUTHORS +.An -nosplit +This software was created by McAfee Research, the security research division +of McAfee, Inc., under contract to Apple Computer Inc. +Additional authors include +.An Wayne Salamon , +.An Robert Watson , +and SPARTA Inc. +.Pp +The Basic Security Module (BSM) interface to audit records and audit event +stream format were defined by Sun Microsystems. +.Pp +This manual page was written by +.An Robert Watson Aq rwatson@FreeBSD.org . diff --git a/bsd/man/man2/getaudit_addr.2 b/bsd/man/man2/getaudit_addr.2 new file mode 100644 index 000000000..25e765cd5 --- /dev/null +++ b/bsd/man/man2/getaudit_addr.2 @@ -0,0 +1 @@ +.so man2/getaudit.2 diff --git a/bsd/man/man2/getauid.2 b/bsd/man/man2/getauid.2 index eb61c0f21..a89d98aae 100644 --- a/bsd/man/man2/getauid.2 +++ b/bsd/man/man2/getauid.2 @@ -1,5 +1,5 @@ .\" -.\" Copyright (c) 2007 Apple Inc. All rights reserved. +.\" Copyright (c) 2008-2009 Apple Inc. All rights reserved. .\" .\" @APPLE_LICENSE_HEADER_START@ .\" @@ -20,36 +20,60 @@ .\" .\" @APPLE_LICENSE_HEADER_END@ .\" -.Dd July 30, 2007 +.Dd March 5, 2009 .Dt GETAUID 2 -.Os Darwin +.Os .Sh NAME .Nm getauid -.Nd get the audit user ID of the current process +.Nd "retrieve audit session ID" .Sh SYNOPSIS -.Fd #include +.In bsm/audit.h .Ft int -.Fn getauid "au_id_t * auid" +.Fn getauid "au_id_t *auid" .Sh DESCRIPTION The .Fn getauid -function returns the audit user ID for the calling process. The variable pointed -at by -.Fa auid -holds the ID returned by the system. +system call +retrieves the active audit session ID for the current process via the +.Vt au_id_t +pointed to by +.Fa auid . .Sh RETURN VALUES -Upon successful completion a value of 0 is returned. -Otherwise, a value of -1 is returned and -.Va errno -is set to indicate the error. +.Rv -std +.Sh ERRORS +The +.Fn getauid +function will fail if: +.Bl -tag -width Er +.It Bq Er EFAULT +A failure occurred while data transferred from +the kernel failed. +.El .Sh SEE ALSO .Xr audit 2 , .Xr auditon 2 , -.Xr auditctl 2 , -.Xr setauid 2 , .Xr getaudit 2 , -.Xr setaudit 2 +.Xr getaudit_addr 2 , +.Xr setaudit 2 , +.Xr setaudit_addr 2 , +.Xr setauid 2 , +.Xr libbsm 3 .Sh HISTORY -The -.Fn getauid -function call first appeared in Mac OS X 10.3 (Panther). +The OpenBSM implementation was created by McAfee Research, the security +division of McAfee Inc., under contract to Apple Computer Inc.\& in 2004. +It was subsequently adopted by the TrustedBSD Project as the foundation for +the OpenBSM distribution. +.Sh AUTHORS +.An -nosplit +This software was created by McAfee Research, the security research division +of McAfee, Inc., under contract to Apple Computer Inc. +Additional authors include +.An Wayne Salamon , +.An Robert Watson , +and SPARTA Inc. +.Pp +The Basic Security Module (BSM) interface to audit records and audit event +stream format were defined by Sun Microsystems. +.Pp +This manual page was written by +.An Robert Watson Aq rwatson@FreeBSD.org . diff --git a/bsd/man/man2/getdirentries.2 b/bsd/man/man2/getdirentries.2 index b0fedddaa..a77a5d8b6 100644 --- a/bsd/man/man2/getdirentries.2 +++ b/bsd/man/man2/getdirentries.2 @@ -40,6 +40,7 @@ .Nm getdirentries .Nd "get directory entries in a filesystem independent format" .Sh SYNOPSIS +.Fd #include .Fd #include .Fd #include .Ft int @@ -144,6 +145,28 @@ The current position pointer should only be set to a value returned by a value returned in the location pointed to by .Fa basep , or zero. +.Sh NOTES +.Fn getdirentries +should rarely be used directly; instead, +.Xr opendir 3 +and +.Xr readdir 3 +should be used. +.Pp +As of Mac OS X 10.6, +.Fn getdirentries +is deprecated, and it is recommended that applications +use +.Xr readdir 3 +rather than using +.Fn getdirentries +directly. Due to limitations with the system call, +.Fn getdirentries +will not work +with 64-bit inodes; in order to use +.Fn getdirentries , +.Dv _DARWIN_NO_64_BIT_INODE +must be defined. .Sh RETURN VALUES If successful, the number of bytes actually transferred is returned. Otherwise, -1 is returned and the global variable @@ -169,7 +192,9 @@ error occurred while reading from or writing to the file system. .El .Sh SEE ALSO .Xr lseek 2 , -.Xr open 2 +.Xr open 2 , +.Xr opendir 3 , +.Xr readdir 3 .Sh HISTORY The .Fn getdirentries diff --git a/bsd/man/man2/getdirentriesattr.2 b/bsd/man/man2/getdirentriesattr.2 index 9c59e22ae..a2cc333ff 100644 --- a/bsd/man/man2/getdirentriesattr.2 +++ b/bsd/man/man2/getdirentriesattr.2 @@ -25,8 +25,14 @@ .Sh SYNOPSIS .Fd #include .Fd #include +.Pp +.Fd #if __LP64__ +.Ft int +.Fn getdirentriesattr "int fd" "struct attrlist * attrList" "void * attrBuf" "size_t attrBufSize" "unsigned int * count" "unsigned int * basep" "unsigned int * newState" "unsigned int options" +.Fd #else .Ft int .Fn getdirentriesattr "int fd" "struct attrlist * attrList" "void * attrBuf" "size_t attrBufSize" "unsigned long * count" "unsigned long * basep" "unsigned long * newState" "unsigned long options" +.Fd #endif . . .Sh DESCRIPTION @@ -37,6 +43,15 @@ You can think of it as a combination of .Xr getdirentries 2 and .Xr getattrlist 2 . +.Fn getdirentriesattr +iterates over the items in a directory like +.Xr getdirentries 2 , +and returns information about each directory entry like +.Xr getattrlist 2 . +Note: when +.Fn getdirentriesattr +returns information about a symbolic link, the information returned is about the link itself, not the target of the link. +.Pp The function reads directory entries from the directory referenced by the file descriptor .Fa fd . @@ -111,7 +126,7 @@ packed in exactly the same way as they are returned from .Xr getattrlist 2 . These groups are then placed into the buffer, one after another. As each group starts with a leading -.Vt unsigned long +.Vt u_int32_t that contains the overall length of the group, you can step from one group to the next by simply adding this length to your pointer. @@ -123,8 +138,10 @@ The initial contents of this buffer are ignored. . The .Fa count -parameter points to a -.Vt unsigned long +parameter points to an +.Vt unsigned long +or +.Vt unsigned int variable. You should initialise this variable to be the number of directory entries for which you wish to get attributes. @@ -142,6 +159,11 @@ manner identical to You can use this value to reset a directory iteration to a known position using .Xr lseek 2 . +However, since the variable is too small to hold an +.Vt off_t , +you should use +.Xr lseek 2 +to get the directoy's current position instead of using this parameter. The initial value of the variable is ignored. .Pp . @@ -289,7 +311,7 @@ typedef struct attrlist attrlist_t; .Pp . struct FInfoAttrBuf { - unsigned long length; + u_int32_t length; attrreference_t name; fsobj_type_t objType; char finderInfo[32]; @@ -308,12 +330,20 @@ static int FInfoDemo(const char *dirPath) int junk; int dirFD; attrlist_t attrList; +#ifdef __LP64__ + unsigned int index; + unsigned int count; + unsigned int junkBaseP; + unsigned int oldState; + unsigned int newState; +#else unsigned long index; unsigned long count; unsigned long junkBaseP; - bool oldStateValid; unsigned long oldState; unsigned long newState; +#endif + bool oldStateValid; bool done; FInfoAttrBuf * thisEntry; char attrBuf[kEntriesPerCall * (sizeof(FInfoAttrBuf) + 64)]; diff --git a/bsd/man/man2/getfsstat.2 b/bsd/man/man2/getfsstat.2 index cdc2e2586..47e792b60 100644 --- a/bsd/man/man2/getfsstat.2 +++ b/bsd/man/man2/getfsstat.2 @@ -33,7 +33,7 @@ .\" .\" @(#)getfsstat.2 8.1 (Berkeley) 6/9/93 .\" -.Dd June 9, 1993 +.Dd Oct 28, 2008 .Dt GETFSSTAT 2 .Os .Sh NAME @@ -45,20 +45,100 @@ .Fd #include .Ft int .Fn getfsstat "struct statfs *buf" "int bufsize" "int flags" +.Sh TRANSITIIONAL SYNOPSIS (NOW DEPRECATED) +.Ft int +.br +.Fn getfsstat64 "struct statfs64 *buf" "int bufsize" "int flags" ; .Sh DESCRIPTION -.Fn Getfsstat -returns information about all mounted file systems. -.Fa Buf -is a pointer to an array of +The +.Fn getfsstat +function returns information about all mounted file systems. +The +.Fa buf +argument is a pointer to an array of .Xr statfs -structures defined as follows: +structures. +.Pp +As of Mac OS X 10.6, the default size of the +.Ft ino_t +type is 64 bits (the macro +.Dv _DARWIN_FEATURE_64_BIT_INODE +will be defined). +While there is no +.Ft ino_t +type used in the +.Xr statfs +structure, the changes to +.Fn getfsstat +are grouped together with the 64-bit inode changes. +The string fields in the +.Xr statfs +structure are larger and the variant symbol +.Li _getfsstat$INODE64 +will be automatically used. +The +.Xr statfs +structure is defined as: .Bd -literal typedef struct { int32_t val[2]; } fsid_t; -#define MFSNAMELEN 15 /* length of fs type name, not inc. nul */ -#define MNAMELEN 90 /* length of buffer for returned name */ +#define MFSTYPENAMELEN 16 /* length of fs type name including null */ +#define MAXPATHLEN 1024 +#define MNAMELEN MAXPATHLEN + +struct statfs { /* when _DARWIN_FEATURE_64_BIT_INODE is defined */ + uint32_t f_bsize; /* fundamental file system block size */ + int32_t f_iosize; /* optimal transfer block size */ + uint64_t f_blocks; /* total data blocks in file system */ + uint64_t f_bfree; /* free blocks in fs */ + uint64_t f_bavail; /* free blocks avail to non-superuser */ + uint64_t f_files; /* total file nodes in file system */ + uint64_t f_ffree; /* free file nodes in fs */ + fsid_t f_fsid; /* file system id */ + uid_t f_owner; /* user that mounted the filesystem */ + uint32_t f_type; /* type of filesystem */ + uint32_t f_flags; /* copy of mount exported flags */ + uint32_t f_fssubtype; /* fs sub-type (flavor) */ + char f_fstypename[MFSTYPENAMELEN]; /* fs type name */ + char f_mntonname[MAXPATHLEN]; /* directory on which mounted */ + char f_mntfromname[MAXPATHLEN]; /* mounted filesystem */ + uint32_t f_reserved[8]; /* For future use */ +}; +.Ed +.Pp +(In 10.5, 64-bit +.Ft ino_t , +larger +.Xr statfs +structure and variant symbol were available if the macro +.Dv _DARWIN_USE_64_BIT_INODE +is defined before any header files are included; this macro is optional in +10.6.) +.Pp +If the macro +.Dv _DARWIN_NO_64_BIT_INODE +is defined before any header files are included, or if the deployment target +is less than 10.6, the legacy +.Xr statfs +structure will be in effect. +The +.Ft ino_t +type will be 32 bits (the +.Dv _DARWIN_FEATURE_64_BIT_INODE +macro will not be defined), the strings in the +.Xr statfs +structure will be their smaller legacy size (and long mount paths may no longer +fit) and the undecorated symbol +.Li _getfsstat +will be used. +This legacy +.Fa statfs +structure is defined as: +.Bd -literal +#define MFSNAMELEN 15 /* length of fs type name, not inc. nul */ +#define MNAMELEN 90 /* length of buffer for returned name */ -struct statfs { +struct statfs { /* when _DARWIN_FEATURE_64_BIT_INODE is NOT defined */ short f_otype; /* type of file system (reserved: zero) */ short f_oflags; /* copy of mount flags (reserved: zero) */ long f_bsize; /* fundamental file system block size */ @@ -68,7 +148,7 @@ struct statfs { long f_bavail; /* free blocks avail to non-superuser */ long f_files; /* total file nodes in file system */ long f_ffree; /* free file nodes in fs */ - fsid_t f_fsid; /* file system id (super-user only) */ + fsid_t f_fsid; /* file system id */ uid_t f_owner; /* user that mounted the file system */ short f_reserved1; /* reserved for future use */ short f_type; /* type of file system (reserved) */ @@ -107,6 +187,8 @@ Some of the information returned may be out of date, however; if .Fa flags is set to .Dv MNT_WAIT +or +.Dv MNT_DWAIT instead, .Fn getfsstat will request updated information from each mounted filesystem before @@ -123,13 +205,23 @@ is set to indicate the error. fails if one or more of the following are true: .Bl -tag -width Er .It Bq Er EFAULT -.Fa Buf -points to an invalid address. +The +.Fa buf +argument points to an invalid address. .It Bq Er EIO An .Tn I/O error occurred while reading from or writing to the file system. .El +.Sh TRANSITIONAL DESCRIPTION (NOW DEPRECATED) +The +.Fn getfsstat64 +routine is equivalent to the default +.Fn getfstat +(when +.Dv _DARWIN_FEATURE_64_BIT_INODE +is defined), so there is no longer any reason to use it (it will be removed +in the future). .Sh SEE ALSO .Xr statfs 2 , .Xr fstab 5 , diff --git a/bsd/man/man2/getgroups.2 b/bsd/man/man2/getgroups.2 index 160022ca3..f2a9e995d 100644 --- a/bsd/man/man2/getgroups.2 +++ b/bsd/man/man2/getgroups.2 @@ -1,3 +1,26 @@ +.\" +.\" Copyright (c) 2008 Apple Inc. All rights reserved. +.\" +.\" @APPLE_LICENSE_HEADER_START@ +.\" +.\" This file contains Original Code and/or Modifications of Original Code +.\" as defined in and that are subject to the Apple Public Source License +.\" Version 2.0 (the 'License'). You may not use this file except in +.\" compliance with the License. Please obtain a copy of the License at +.\" http://www.opensource.apple.com/apsl/ and read it before using this +.\" file. +.\" +.\" The Original Code and all software distributed under the License are +.\" distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER +.\" EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, +.\" INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, +.\" FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. +.\" Please see the License for the specific language governing rights and +.\" limitations under the License. +.\" +.\" @APPLE_LICENSE_HEADER_END@ +.\" +.\" .\" $NetBSD: getgroups.2,v 1.8 1995/02/27 12:32:57 cgd Exp $ .\" .\" Copyright (c) 1983, 1991, 1993 @@ -33,7 +56,7 @@ .\" .\" @(#)getgroups.2 8.2 (Berkeley) 4/16/94 .\" -.Dd April 16, 1994 +.Dd October 2, 2008 .Dt GETGROUPS 2 .Os BSD 4.2 .Sh NAME @@ -47,7 +70,7 @@ .Fa "gid_t grouplist[]" .Fc .Sh DESCRIPTION -.Fn Getgroups +.Fn getgroups gets the current group access list of the current user process and stores it in the array .Fa grouplist[] . @@ -55,14 +78,12 @@ The parameter .Fa gidsetsize indicates the number of entries that may be placed in .Fa grouplist[] . -.Fn Getgroups +.Fn getgroups returns the actual number of groups returned in .Fa grouplist[] . -No more than +However, no more than .Dv {NGROUPS_MAX} -will ever -be returned. -If +will be returned. If .Fa gidsetsize is 0, .Fn getgroups @@ -91,6 +112,12 @@ The argument although non-zero, is smaller than the number of groups in the group set. .El +.Sh LEGACY DESCRIPTION +If _DARWIN_C_SOURCE is defined, +.Fn getgroups +can return more than +.Dv {NGROUPS_MAX} +groups. .Sh LEGACY SYNOPSIS .Fd #include .Fd #include diff --git a/bsd/man/man2/getpeername.2 b/bsd/man/man2/getpeername.2 index 88edc5d55..1db123b2a 100644 --- a/bsd/man/man2/getpeername.2 +++ b/bsd/man/man2/getpeername.2 @@ -38,7 +38,7 @@ .Os BSD 4.2 .Sh NAME .Nm getpeername -.Nd get name of connected peer +.Nd get address of connected peer .Sh SYNOPSIS .Fd #include .Ft int @@ -48,18 +48,20 @@ .Fa "socklen_t *restrict address_len" .Fc .Sh DESCRIPTION -.Fn Getpeername -returns the name of the peer connected to -socket -.Fa socket . +The +.Fn getpeername +function returns the address of the peer connected to the specified +socket. +.Pp The .Fa address_len parameter should be initialized to indicate the amount of space pointed to by -.Fa name . -On return it contains the actual size of the name +.Fa address . +On return it contains the actual size of the address returned (in bytes). -The name is truncated if the buffer provided is too small. +.Pp +The address is truncated if the buffer provided is too small. .Sh RETURN VALUES .Rv -std getpeername .Sh ERRORS diff --git a/bsd/man/man2/getsockname.2 b/bsd/man/man2/getsockname.2 index 45241e307..4ca0f72e0 100644 --- a/bsd/man/man2/getsockname.2 +++ b/bsd/man/man2/getsockname.2 @@ -48,16 +48,21 @@ .Fa "socklen_t *restrict address_len" .Fc .Sh DESCRIPTION -.Fn Getsockname -returns the current +The +.Fn getsockname +fynction returns the current .Fa address -for the specified socket. The +for the specified socket. +.Pp +The .Fa address_len parameter should be initialized to indicate the amount of space pointed to by .Fa address . -On return it contains the actual size of the name +On return it contains the actual size of the address returned (in bytes). +.Pp +The address is truncated if the buffer provided is too small. .Sh RETURN VALUES .Rv -std getsockname .Sh ERRORS @@ -101,7 +106,7 @@ is not supported for the protocol in use by .Sh BUGS Names bound to sockets in the UNIX domain are inaccessible; .Fn getsockname -returns a zero-length name. +returns a zero-length address. .Sh HISTORY The .Fn getsockname diff --git a/bsd/man/man2/getsockopt.2 b/bsd/man/man2/getsockopt.2 index 9e3bef41f..e0408c272 100644 --- a/bsd/man/man2/getsockopt.2 +++ b/bsd/man/man2/getsockopt.2 @@ -166,15 +166,20 @@ and set with .It Dv SO_TYPE Ta "get the type of the socket (get only)" .It Dv SO_ERROR Ta "get and clear error on the socket (get only)" .It Dv SO_NOSIGPIPE Ta "do not generate SIGPIPE, instead return EPIPE" +.It Dv SO_NREAD Ta "number of bytes to be read (get only)" +.It Dv SO_NWRITE Ta "number of bytes written not yet sent by the protocol (get only)" +.It Dv SO_LINGER_SEC Ta "linger on close if data present with timeout in seconds" .El .Pp .Dv SO_DEBUG enables debugging in the underlying protocol modules. +.Pp .Dv SO_REUSEADDR indicates that the rules used in validating addresses supplied in a .Xr bind 2 call should allow reuse of local addresses. +.Pp .Dv SO_REUSEPORT allows completely duplicate bindings by multiple processes if they all set @@ -182,6 +187,7 @@ if they all set before binding the port. This option permits multiple instances of a program to each receive UDP/IP multicast or broadcast datagrams destined for the bound port. +.Pp .Dv SO_KEEPALIVE enables the periodic transmission of messages on a connected socket. Should the @@ -189,6 +195,7 @@ connected party fail to respond to these messages, the connection is considered broken and processes using the socket are notified via a .Dv SIGPIPE signal when attempting to send data. +.Pp .Dv SO_DONTROUTE indicates that outgoing messages should bypass the standard routing facilities. Instead, messages are directed @@ -218,11 +225,18 @@ is disabled and a is issued, the system will process the close in a manner that allows the process to continue as quickly as possible. .Pp +.Dv SO_LINGER_SEC +is the same option as +.Dv SO_LINGER +except the linger time is in seconds for +.Dv SO_LINGER_SEC . +.Pp The option .Dv SO_BROADCAST requests permission to send broadcast datagrams on the socket. Broadcast was a privileged operation in earlier versions of the system. +.Pp With protocols that support out-of-band data, the .Dv SO_OOBINLINE option @@ -235,6 +249,7 @@ calls without the .Dv MSG_OOB flag. Some protocols always behave as if this option is set. +.Pp .Dv SO_SNDBUF and .Dv SO_RCVBUF @@ -323,20 +338,36 @@ instead, the write to the socket returns with the error when there is no reader. .Pp Finally, -.Dv SO_TYPE -and -.Dv SO_ERROR +.Dv SO_TYPE , +.Dv SO_ERROR , +.Dv SO_NREAD , and +.Dv SO_NWRITE are options used only with .Fn getsockopt . +.Pp .Dv SO_TYPE returns the type of the socket, such as .Dv SOCK_STREAM ; it is useful for servers that inherit sockets on startup. +.Pp .Dv SO_ERROR returns any pending error on the socket and clears the error status. It may be used to check for asynchronous errors on connected datagram sockets or for other asynchronous errors. +.Pp +.Dv SO_NREAD +returns the amount of data in the input buffer that is available to be received. +For datagram oriented sockets, +.Dv SO_NREAD +returns the size of the first packet -- this differs from the +.Fn ioctl +command +.Dv FIONREAD +that returns the total amount of data available. +.Pp +.Dv SO_NWRITE +returns the amount of data in the output buffer not yet sent by the protocol. .Sh RETURN VALUES .Rv -std .Sh ERRORS @@ -366,7 +397,10 @@ is not in a valid part of the process address space. The option is invalid at the level indicated. .\" ========== .It Bq Er ENOBUFS -Insufficient memory buffers are available. +Insufficient system resources available for the call to complete. +.\" ========== +.It Bq Er ENOMEM +Insufficient memory available for the system call to complete. .\" ========== .It Bq Er ENOPROTOOPT The option is unknown at the level indicated. @@ -392,6 +426,9 @@ is out of bounds. is already connected and a specified option cannot be set while this is the case. +.\" ========== +.It Bq Er EINVAL +The socket has been shut down. .El .Sh LEGACY SYNOPSIS .Fd #include @@ -401,8 +438,9 @@ The include file .In sys/types.h is necessary. .Sh SEE ALSO -.Xr ioctl 2 , .Xr socket 2 , +.Xr bind 2 , +.Xr ioctl 2 , .Xr getprotoent 3 , .Xr protocols 5 .Sh BUGS diff --git a/bsd/man/man2/gettimeofday.2 b/bsd/man/man2/gettimeofday.2 index 67f2760c6..96659f100 100644 --- a/bsd/man/man2/gettimeofday.2 +++ b/bsd/man/man2/gettimeofday.2 @@ -33,7 +33,7 @@ .\" .\" @(#)gettimeofday.2 8.2 (Berkeley) 5/26/95 .\" -.Dd May 26, 1995 +.Dd August 5, 2008 .Dt GETTIMEOFDAY 2 .Os BSD 4 .Sh NAME @@ -53,9 +53,6 @@ .Fa "const struct timezone *tzp" .Fc .Sh DESCRIPTION -.Bf -symbolic -Note: timezone is no longer used; this information is kept outside -the kernel. .Ef .Pp The system's notion of the current Greenwich time and the current time @@ -68,10 +65,24 @@ since midnight (0 hour), January 1, 1970. The resolution of the system clock is hardware dependent, and the time may be updated continuously or in ``ticks.'' If .Fa tp -or +is NULL +and .Fa tzp -is NULL, the associated time -information will not be returned or set. +is non-NULL, +.Fn gettimeofday +will populate the timezone struct in +.Fa tzp . +If +.Fa tp +is non-NULL and +.Fa tzp +is NULL, then only the timeval struct in +.Fa tp +is populated. If both +.Fa tp +and +.Fa tzp +are NULL, nothing is returned. .Pp The structures pointed to by .Fa tp @@ -82,6 +93,7 @@ are defined in as: .Pp .Bd -literal + struct timeval { time_t tv_sec; /* seconds since Jan. 1, 1970 */ suseconds_t tv_usec; /* and microseconds */ @@ -91,6 +103,7 @@ struct timezone { int tz_minuteswest; /* of Greenwich */ int tz_dsttime; /* type of dst correction to apply */ }; + .Ed .Pp The diff --git a/bsd/man/man2/getxattr.2 b/bsd/man/man2/getxattr.2 index 581d746e3..645a7b782 100644 --- a/bsd/man/man2/getxattr.2 +++ b/bsd/man/man2/getxattr.2 @@ -83,6 +83,16 @@ normally returns information from the target of if it is a symbolic link. With this option, .Fn getxattr will return extended attribute data from the symbolic link instead. +.It Dv XATTR_SHOWCOMPRESSION +.Fn getxattr +and +.Fn fgetxattr +will return HFS Plus Compression extended attribute +.Fa name +(if present) for the file referred to by +.Fa path +or +.Fa fd . .El .Pp .Fn fgetxattr diff --git a/bsd/man/man2/kevent64.2 b/bsd/man/man2/kevent64.2 new file mode 100644 index 000000000..9f491e699 --- /dev/null +++ b/bsd/man/man2/kevent64.2 @@ -0,0 +1 @@ +.so man2/kqueue.2 diff --git a/bsd/man/man2/kqueue.2 b/bsd/man/man2/kqueue.2 index 684ee806c..f7a12d523 100644 --- a/bsd/man/man2/kqueue.2 +++ b/bsd/man/man2/kqueue.2 @@ -1,3 +1,26 @@ +.\" +.\" Copyright (c) 2008 Apple Inc. All rights reserved. +.\" +.\" @APPLE_LICENSE_HEADER_START@ +.\" +.\" This file contains Original Code and/or Modifications of Original Code +.\" as defined in and that are subject to the Apple Public Source License +.\" Version 2.0 (the 'License'). You may not use this file except in +.\" compliance with the License. Please obtain a copy of the License at +.\" http://www.opensource.apple.com/apsl/ and read it before using this +.\" file. +.\" +.\" The Original Code and all software distributed under the License are +.\" distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER +.\" EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, +.\" INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, +.\" FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. +.\" Please see the License for the specific language governing rights and +.\" limitations under the License. +.\" +.\" @APPLE_LICENSE_HEADER_END@ +.\" +.\" .\" Copyright (c) 2000 Jonathan Lemon .\" All rights reserved. .\" @@ -24,12 +47,14 @@ .\" .\" $FreeBSD: src/lib/libc/sys/kqueue.2,v 1.32 2002/12/19 09:40:25 ru Exp $ .\" -.Dd April 14, 2000 +.Dd October 21, 2008 .Dt KQUEUE 2 .Os .Sh NAME .Nm kqueue , -.Nm kevent +.Nm kevent , +and +.Nm kevent64 .Nd kernel event notification mechanism .Sh LIBRARY .Lb libc @@ -41,7 +66,10 @@ .Fn kqueue "void" .Ft int .Fn kevent "int kq" "const struct kevent *changelist" "int nchanges" "struct kevent *eventlist" "int nevents" "const struct timespec *timeout" +.Ft int +.Fn kevent64 "int kq" "const struct kevent64_s *changelist" "int nchanges" "struct kevent64_s *eventlist" "int nevents" "unsigned int flags" "const struct timespec *timeout" .Fn EV_SET "&kev" ident filter flags fflags data udata +.Fn EV_SET64 "&kev" ident filter flags fflags data udata "ext[0]" "ext[1]" .Sh DESCRIPTION The .Fn kqueue @@ -51,7 +79,7 @@ event (kevent) happens or a condition holds, based on the results of small pieces of kernel code termed filters. A kevent is identified by an (ident, filter) pair and specifies the interesting conditions to be notified about for that pair. -An (ident, filter) pair can only appear once is a given kqueue. +An (ident, filter) pair can only appear once in a given kqueue. Subsequent attempts to register the same pair for a given kqueue will result in the replacement of the conditions being watched, not an addition. @@ -85,14 +113,18 @@ The queue is not inherited by a child created with .Pp The .Fn kevent -system call -is used to register events with the queue, and return any pending +and +.Fn kevent64 +system calls +are used to register events with the queue, and return any pending events to the user. The .Fa changelist argument is a pointer to an array of .Va kevent +or +.Va kevent64_s structures, as defined in .Aq Pa sys/event.h . All changes contained in the @@ -106,7 +138,11 @@ gives the size of The .Fa eventlist argument -is a pointer to an array of kevent structures. +is a pointer to an array of +.Va kevent +or +.Va kevent64_s +structures. The .Fa nevents argument @@ -117,9 +153,11 @@ If is a non-NULL pointer, it specifies a maximum interval to wait for an event, which will be interpreted as a struct timespec. If .Fa timeout -is a NULL pointer, +is a NULL pointer, both .Fn kevent -waits indefinitely. To effect a poll, the +and +.Fn kevent64 +wait indefinitely. To effect a poll, the .Fa timeout argument should be non-NULL, pointing to a zero-valued .Va timespec @@ -131,24 +169,46 @@ and The .Fn EV_SET macro is provided for ease of initializing a -kevent structure. +.Va kevent +structure. Similarly, +.Fn EV_SET64 +initializes a +.Va kevent64_s +structure. .Pp The .Va kevent -structure is defined as: +and +.Va kevent64_s +structures are defined as: .Bd -literal struct kevent { - uintptr_t ident; /* identifier for this event */ - short filter; /* filter for event */ - u_short flags; /* action flags for kqueue */ - u_int fflags; /* filter flag value */ - intptr_t data; /* filter data value */ - void *udata; /* opaque user data identifier */ + uintptr_t ident; /* identifier for this event */ + int16_t filter; /* filter for event */ + uint16_t flags; /* general flags */ + uint32_t fflags; /* filter-specific flags */ + intptr_t data; /* filter-specific data */ + void *udata; /* opaque user data identifier */ +}; + + +struct kevent64_s { + uint64_t ident; /* identifier for this event */ + int16_t filter; /* filter for event */ + uint16_t flags; /* general flags */ + uint32_t fflags; /* filter-specific flags */ + int64_t data; /* filter-specific data */ + uint64_t udata; /* opaque user data identifier */ + uint64_t ext[2]; /* filter-specific extensions */ }; .Ed .Pp +---- +.Pp The fields of .Fa struct kevent +and +.Fa struct kevent64_s are: .Bl -tag -width XXXfilter .It ident @@ -168,6 +228,17 @@ Filter-specific data value. Opaque user-defined value passed through the kernel unchanged. .El .Pp +In addition, +.Fa struct kevent64_s +contains: +.Bl -tag -width XXXfilter +.It ext[2] +This field stores extensions for the event's filter. What type of extension depends on +what type of filter is being used. +.El +.Pp +---- +.Pp The .Va flags field can contain the following values: @@ -180,10 +251,14 @@ unless overridden by the EV_DISABLE flag. .It EV_ENABLE Permit .Fn kevent +and +.Fn kevent64 to return the event if it is triggered. .It EV_DISABLE Disable the event so .Fn kevent +and +.Fn kevent64 will not return it. The filter itself is not disabled. .It EV_DELETE Removes the event from the kqueue. Events which are attached to @@ -212,13 +287,19 @@ See below. .El .Pp +---- +.Pp The predefined system filters are listed below. Arguments may be passed to and from the filter via the .Va fflags and .Va data -fields in the kevent structure. -.Bl -tag -width EVFILT_SIGNAL +fields in the +.Va kevent +or +.Va kevent64_s +structure. +.Bl -tag -width EVFILT_MACHPORT .It EVFILT_READ Takes a file descriptor as the identifier, and returns whenever there is data available to read. @@ -349,7 +430,7 @@ in and returns when the process performs one or more of the requested events. If a process can normally see another process, it can attach an event to it. The events to monitor are: -.Bl -tag -width +.Bl -tag -width NOTE_SIGNAL .It NOTE_EXIT The process has exited. .It NOTE_FORK @@ -375,33 +456,114 @@ On return, contains the events which triggered the filter. .It EVFILT_SIGNAL Takes the signal number to monitor as the identifier and returns -when the given signal is delivered to the process. +when the given signal is generated for the process. This coexists with the .Fn signal and .Fn sigaction -facilities, and has a lower precedence. The filter will record +facilities, and has a lower precedence. Only signals sent to the process, +not to a particular thread, will trigger the filter. The filter will record all attempts to deliver a signal to a process, even if the signal has -been marked as SIG_IGN. Event notification happens after normal +been marked as SIG_IGN. Event notification happens before normal signal delivery processing. .Va data -returns the number of times the signal has occurred since the last call to +returns the number of times the signal has been generated since the last call to .Fn kevent . This filter automatically sets the EV_CLEAR flag internally. +.It EVFILT_MACHPORT +Takes the name of a mach port, or port set, in +.Va ident +and waits until a message is received on the port or port set. When a message +is recieved, the size of the message is returned in +.Va data +and if +.Va fflags +is set to MACH_RCV_MSG, a pointer to the message is returned in ext[0]. .It EVFILT_TIMER -This filter is currently unsupported. -.\"Establishes an arbitrary timer identified by -.\".Va ident . -.\"When adding a timer, -.\".Va data -.\"specifies the timeout period in milliseconds. -.\"The timer will be periodic unless EV_ONESHOT is specified. -.\"On return, -.\".Va data -.\"contains the number of times the timeout has expired since the last call to -.\".Fn kevent . -.\"This filter automatically sets the EV_CLEAR flag internally. +Establishes an interval timer with the data +timer identified by +.Va ident . +When adding a timer, +.Va data +specifies the timeout period and +.Va fflags +can be set to one of the following: +.Bl -tag -width NOTE_ABSOLUTE +.It NOTE_SECONDS +data is in seconds +.It NOTE_USECONDS +data is in microseconds +.It NOTE_NSECONDS +data is in nanoseconds +.It NOTE_ABSOLUTE +data is an absolute timeout +.El +.Pp +If fflags is not set, the default is milliseconds. The timer will be periodic unless EV_ONESHOT is specified. +On return, +.Va data +contains the number of times the timeout has expired since the last call to +.Fn kevent +or +.Fn kevent64 . +This filter automatically sets the EV_CLEAR flag internally. +.It EVFILT_SESSION +Takes the audit session ID to monitor as the identifier and the events to watch for in +.Va fflags , +and returns when one or more of the requested session events occurs. +To monitor for events for any audit session the value AS_ANY_ASID +should be used as the identifier. With AS_ANY_ASID, as new audit +sessions are created they are included as if the were added +individually. The events to monitor are: +.Bl -tag -width NOTE_AS_UPDATE +.It NOTE_AS_START +A new audit session has started. +.It NOTE_AS_END +All the processes in the audit session have exited. +.It NOTE_AS_CLOSE +This audit session is no longer valid in the kernel. In other words, it +is now safe to dispose of any cached information about this session or +reuse its session ID for a new audit session. +.It NOTE_AS_UPDATE +The audit session information was updated. The audit session information is +considered immutable once initially set. If this becomes enforced in +the kernel then this event may no longer be needed and may become +obsolete. +.It NOTE_AS_ERR +This flag is returned if the system was unable to attach an event to a +new session when the audit session ID of AS_ANY_ASID +is used. This is usually due to resource limitations. .El +.Pp +On return, +.Va fflags +contains the events which triggered the filter, +.Va ident +contains the audit session ID, and +.Va data +contains the audit user ID. +This filter automatically sets the EV_CLEAR flag internally. +.El +.Pp +---- +.Pp +In the +.Va ext[2] +field of the +.Va kevent64_s +struture, +.Va ext[0] +is only used with the EVFILT_MACHPORT filter. +With other filters, +.Va ext[0] +is passed through +.Fn kevent64 +much like +.Va udata . +.Va ext[1] +can always be used like +.Va udata . +For the use of ext[0], see the EVFILT_MACHPORT filter above. .Sh RETURN VALUES The .Fn kqueue @@ -412,8 +574,10 @@ returned and errno set. .Pp The .Fn kevent -system call -returns the number of events placed in the +and +.Fn kevent64 +system calls +return the number of events placed in the .Fa eventlist , up to the value given by .Fa nevents . @@ -436,7 +600,9 @@ will be returned, and will be set to indicate the error condition. If the time limit expires, then .Fn kevent -returns 0. +and +.Fn kevent64 +return 0. .Sh ERRORS The .Fn kqueue @@ -452,13 +618,17 @@ The system file table is full. .Pp The .Fn kevent -system call fails if: +and +.Fn kevent64 +system calls fail if: .Bl -tag -width Er .It Bq Er EACCES The process does not have permission to register a filter. .It Bq Er EFAULT There was an error reading or writing the .Va kevent +or +.Va kevent64_s structure. .It Bq Er EBADF The specified descriptor is invalid. diff --git a/bsd/man/man2/link.2 b/bsd/man/man2/link.2 index 6392e7cea..470164057 100644 --- a/bsd/man/man2/link.2 +++ b/bsd/man/man2/link.2 @@ -33,7 +33,7 @@ .\" .\" @(#)link.2 8.3 (Berkeley) 1/12/94 .\" -.Dd January 12, 1994 +.Dd October 29, 2008 .Dt LINK 2 .Os BSD 4 .Sh NAME @@ -81,6 +81,21 @@ must be in the same file system. As mandated by POSIX.1, .Fa path1 may not be a directory. +.Pp +.Fn link +will resolve and follow symbolic links contained within both +.Fa path1 +and +.Fa path2 . +If the last component of +.Fa path1 +is a symbolic link, +.Fn link +will point the hard link, +.Fa path2 , +to the underlying object pointed to by +.Fa path1 , +not to the symbolic link itself. .Sh RETURN VALUES Upon successful completion, a value of 0 is returned. Otherwise, a value of -1 is returned and @@ -135,12 +150,12 @@ characters, or an entire path name exceeded characters. .\" ========== .It Bq Er ENOENT -A component of either path prefix does not exist. +A component of either path prefix does not exist, or is a dangling symbolic link. .\" ========== .It Bq Er ENOENT The file named by .Fa path1 -does not exist. +does not exist, or is a dangling symbolic link. .\" ========== .It Bq Er ENOSPC The directory in which the entry for the new link is being placed diff --git a/bsd/man/man2/listxattr.2 b/bsd/man/man2/listxattr.2 index 499def160..87fc5a575 100644 --- a/bsd/man/man2/listxattr.2 +++ b/bsd/man/man2/listxattr.2 @@ -26,7 +26,7 @@ .Sh NAME .Nm listxattr, .Nm flistxattr -.Nd list an extended attribute value +.Nd list extended attribute names .Sh SYNOPSIS .Fd #include .Ft ssize_t @@ -66,6 +66,14 @@ normally lists attributes of the target of if it is a symbolic link. With this option, .Fn listxattr will list attributes of the link itself. +.It Dv XATTR_SHOWCOMPRESSION +.Fn listxattr +and +.Fn flistxattr +will list HFS Plus Compression extended attribute(s) (if present) for the file referred to by +.Fa path +or +.Fa fd . .El .Pp If diff --git a/bsd/man/man2/lstat64.2 b/bsd/man/man2/lstat64.2 new file mode 100644 index 000000000..4fe4fb441 --- /dev/null +++ b/bsd/man/man2/lstat64.2 @@ -0,0 +1,2 @@ +.so man2/stat.2 + diff --git a/bsd/man/man2/madvise.2 b/bsd/man/man2/madvise.2 index b285f4f2d..a4b4d415d 100644 --- a/bsd/man/man2/madvise.2 +++ b/bsd/man/man2/madvise.2 @@ -124,6 +124,14 @@ The address range will remain valid. This is used with .Fn madvise system call. +.It Dv MADV_ZERO_WIRED_PAGES +Indicates that the application would like the wired pages in this address +range to be zeroed out if the address range is dellocated without first +unwiring the pages (i.e. a munmap(2) without a preceding munlock(2) or the application +quits). +This is used with +.Fn madvise +system call. .El .Pp The diff --git a/bsd/man/man2/mincore.2 b/bsd/man/man2/mincore.2 index 05fb3d6eb..d76b49931 100644 --- a/bsd/man/man2/mincore.2 +++ b/bsd/man/man2/mincore.2 @@ -50,16 +50,39 @@ The system call allows a process to obtain information about whether pages are core resident. -Here the current core residency of the pages is returned +The current core residency of the pages is returned in the character array +.Fa vec . +For each entry in the character array .Fa vec , -with a value of 1 meaning that the page is in-core. +the following bit values may be set +depending upon the state of the page: +.Bd -literal -offset indent +.nf +MINCORE_INCORE 0x1 /* Page is incore */ +MINCORE_REFERENCED 0x2 /* Page has been referenced by us */ +MINCORE_MODIFIED 0x4 /* Page has been modified by us */ +MINCORE_REFERENCED_OTHER 0x8 /* Page has been referenced */ +MINCORE_MODIFIED_OTHER 0x10 /* Page has been modified */ +.fi +.Ed .Sh RETURN VALUES Upon successful completion, a value of 0 is returned. Otherwise, a value of -1 is returned and .Va errno is set to indicate the error. +.Sh ERRORS +.Bl -tag -width Er +.\" =========== +.It Bq Er EINVAL +.Fa addr +is not a valid address for user mode. +.\" =========== +.It Bq Er EFAULT +an error occurred trying to copy to the output character array +.Fa vec . +.El .Sh SEE ALSO .Xr madvise 2 , .Xr minherit 2 , diff --git a/bsd/man/man2/mkdir.2 b/bsd/man/man2/mkdir.2 index 4df68194d..5fba3bf5a 100644 --- a/bsd/man/man2/mkdir.2 +++ b/bsd/man/man2/mkdir.2 @@ -53,11 +53,22 @@ is created with the access permissions specified by .Fa mode and restricted by the .Xr umask 2 -of the calling process. +of the calling process. See +.Xr chmod 2 +for the possible permission bit masks for +.Fa mode . .Pp The directory's owner ID is set to the process's effective user ID. The directory's group ID is set to that of the parent directory in which it is created. +.Pp +Note: the behavior of +.Fn mkdir +is undefined when mode bits other than the low 9 bits are used. Use +.Xr chmod 2 +after +.Fn mkdir +to explicitly set the other bits (See example below). .Sh RETURN VALUES A 0 return value indicates success. A -1 return value indicates an error, and an error code is stored in @@ -127,6 +138,21 @@ A component of the path prefix is not a directory. .It Bq Er EROFS The parent directory resides on a read-only file system. .El +.Sh EXAMPLE +.Bd -literal -offset indent + +int main (int argc, const char * argv[]) +{ + /* The behavior of mkdir is undefined for anything other than the "permission" bits */ + if (mkdir("/tmp/blah", 0777)) + perror("/tmp/blah"); + + /* So we need to set the sticky/executable bits explicitly with chmod after calling mkdir */ + if (chmod("/tmp/blah", 07777)) + perror("/tmp/blah"); +} + +.Ed .Sh LEGACY SYNOPSIS .Fd #include .Fd #include diff --git a/bsd/man/man2/mmap.2 b/bsd/man/man2/mmap.2 index 648b09c7a..af9de8c04 100644 --- a/bsd/man/man2/mmap.2 +++ b/bsd/man/man2/mmap.2 @@ -1,5 +1,3 @@ -.\" $NetBSD: mmap.2,v 1.5 1995/06/24 10:48:59 cgd Exp $ -.\" .\" Copyright (c) 1991, 1993 .\" The Regents of the University of California. All rights reserved. .\" @@ -11,10 +9,6 @@ .\" 2. Redistributions in binary form must reproduce the above copyright .\" notice, this list of conditions and the following disclaimer in the .\" documentation and/or other materials provided with the distribution. -.\" 3. All advertising materials mentioning features or use of this software -.\" must display the following acknowledgement: -.\" This product includes software developed by the University of -.\" California, Berkeley and its contributors. .\" 4. Neither the name of the University nor the names of its contributors .\" may be used to endorse or promote products derived from this software .\" without specific prior written permission. @@ -31,34 +25,30 @@ .\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF .\" SUCH DAMAGE. .\" -.\" @(#)mmap.2 8.1 (Berkeley) 6/4/93 +.\" @(#)mmap.2 8.4 (Berkeley) 5/11/95 +.\" $FreeBSD: src/lib/libc/sys/mmap.2,v 1.56 2007/01/09 00:28:15 imp Exp $ .\" -.Dd June 4, 1993 +.Dd April 21, 2006 .Dt MMAP 2 -.Os BSD 4 +.Os .Sh NAME .Nm mmap -.Nd map files or devices into memory +.Nd allocate memory, or map files or devices into memory +.Sh LIBRARY +.Lb libc .Sh SYNOPSIS -.Fd #include +.In sys/mman.h .Ft void * -.Fo mmap -.Fa "void *addr" -.Fa "size_t len" -.Fa "int prot" -.Fa "int flags" -.Fa "int fildes" -.Fa "off_t offset" -.Fc +.Fn mmap "void *addr" "size_t len" "int prot" "int flags" "int fd" "off_t offset" .Sh DESCRIPTION The -.Nm mmap -function causes the pages starting at +.Fn mmap +system call causes the pages starting at .Fa addr and continuing for at most .Fa len bytes to be mapped from the object described by -.Fa fildes , +.Fa fd , starting at byte offset .Fa offset . If @@ -67,19 +57,42 @@ or .Fa len is not a multiple of the pagesize, the mapped region may extend past the specified range. +Any extension beyond the end of the mapped object will be zero-filled. .Pp -If +The +.Fa addr +argument is used by the system to determine the starting address of the mapping, +and its interpretation is dependent on the setting of the MAP_FIXED flag. +If MAP_FIXED is specified in +.Fa flags , +the system will try to place the mapping at the specified address, +possibly removing a +mapping that already exists at that location. +If MAP_FIXED is not specified, +then the system will attempt to use the range of addresses starting at .Fa addr -is non-zero, it is used as a hint to the system. -(As a convenience to the system, the actual address of the region may differ -from the address supplied.) +if they do not overlap any existing mappings, +including memory allocated by malloc(3) and other such allocators. +Otherwise, +the system will choose an alternate address for the mapping (using an implementation +dependent algorithm) +that does not overlap any existing +mappings. +In other words, +without MAP_FIXED the system will attempt to find an empty location in the address space if the specified address +range has already been mapped by something else. If .Fa addr -is zero, an address will be selected by the system. -The actual starting address of the region is returned. -A successful +is zero and MAP_FIXED is not specified, +then an address will be selected by the system so as not to overlap +any existing mappings in the address space. +In all cases, +the actual starting address of the region is returned. +If MAP_FIXED is specified, +a successful .Fa mmap deletes any previous mapping in the allocated address range. +Previous mappings are never deleted if MAP_FIXED is not specified. .Pp The protections (region accessibility) are specified in the .Fa prot @@ -87,30 +100,37 @@ argument by .Em or Ns 'ing the following values: .Pp -.Bl -tag -width MAP_FIXEDX -.It Dv PROT_EXEC -Pages may be executed. +.Bl -tag -width PROT_WRITE -compact +.It Dv PROT_NONE +Pages may not be accessed. .It Dv PROT_READ Pages may be read. .It Dv PROT_WRITE Pages may be written. +.It Dv PROT_EXEC +Pages may be executed. .El .Pp +Note that, due to hardware limitations, on some platforms PROT_WRITE may +imply PROT_READ, and PROT_READ may imply PROT_EXEC. Portable programs +should not rely on these flags being separately enforcable. +.Pp The .Fa flags -parameter specifies the type of the mapped object, mapping options, -and whether modifications made to the mapped copy of the page -are private to the process (copy-on-write) -or are to be shared with other references. -Sharing, mapping type, and options are specified in the +argument specifies the type of the mapped object, mapping options and +whether modifications made to the mapped copy of the page are private +to the process (copy-on-write) or are to be shared with other references. +Sharing, mapping type and options are specified in the .Fa flags argument by .Em or Ns 'ing the following values: -.Pp -.Bl -tag -width MAP_HASSEMAPHOREX +.Bl -tag -width MAP_HASSEMAPHORE .It Dv MAP_ANON Map anonymous memory not associated with any specific file. +The +.Fa offset +argument is ignored. Mac OS X specific: the file descriptor used for creating .Dv MAP_ANON regions can be used to pass some Mach VM flags, and can @@ -122,23 +142,36 @@ to are: .Pp VM_FLAGS_PURGABLE to create Mach purgable (i.e. volatile) memory -VM_MAKE_TAG(tag) to associate an 8-bit tag with the region .Pp +VM_MAKE_TAG(tag) to associate an 8-bit tag with the region +.br defines some preset tags (with a VM_MEMORY_ prefix). Users are encouraged to use tags between 240 and 255. Tags are used by tools such as vmmap(1) to help identify specific memory regions. .It Dv MAP_FILE -Mapped from a regular file or character-special device memory. (This is +Mapped from a regular file. (This is the default mapping type, and need not be specified.) .It Dv MAP_FIXED Do not permit the system to select a different address than the one specified. If the specified address cannot be used, -.Nm mmap +.Fn mmap will fail. -If MAP_FIXED is specified, +If +.Dv MAP_FIXED +is specified, .Fa addr must be a multiple of the pagesize. +If a +.Dv MAP_FIXED +request is successful, the mapping established by +.Fn mmap +replaces any previous mappings for the process' pages in the range from +.Fa addr +to +.Fa addr ++ +.Fa len . Use of this option is discouraged. .It Dv MAP_HASSEMAPHORE Notify the kernel that the region may contain semaphores and that special @@ -158,9 +191,9 @@ again in the near future. .Pp Conforming applications must specify either MAP_PRIVATE or MAP_SHARED. .Pp -The +The .Xr close 2 -function does not unmap pages, see +system call does not unmap pages, see .Xr munmap 2 for further information. .Pp @@ -173,89 +206,84 @@ the file descriptor argument specifies a file or device to which swapping should be done. .Sh RETURN VALUES Upon successful completion, -.Nm mmap +.Fn mmap returns a pointer to the mapped region. -Otherwise, a value of -1 is returned and +Otherwise, a value of +.Dv MAP_FAILED +is returned and .Va errno is set to indicate the error. .Sh ERRORS -.Fn Mmap +The +.Fn mmap +system call will fail if: .Bl -tag -width Er -.\" =========== -.It Bq Er EACCES -.Fa Fildes -is not open for reading. -.\" =========== .It Bq Er EACCES +The flag +.Dv PROT_READ +was specified as part of the +.Fa prot +argument and +.Fa fd +was not open for reading. The flags -.Dv PROT_WRITE -and .Dv MAP_SHARED -are specified as part of the +and +.Dv PROT_WRITE +were specified as part of the .Fa flags and .Fa prot -parameters and -.Fa fildes -is not open for writing. -.\" =========== +argument and +.Fa fd +was not open for writing. .It Bq Er EBADF -.Fa fildes -is not a valid file descriptor for an open file. +The +.Fa fd +argument +is not a valid open file descriptor. .It Bq Er EINVAL .Dv MAP_FIXED -is specified and the -.I addr -parameter is not page aligned. -.\" =========== -.It Bq Er EINVAL -.Fa fildes -does not reference a regular or character special file. -.\" =========== +was specified and the +.Fa addr +argument was not page aligned, or part of the desired address space +resides out of the valid address space for a user process. .It Bq Er EINVAL .Fa flags does not include either MAP_PRIVATE or MAP_SHARED. -.\" =========== .It Bq Er EINVAL +The .Fa len -is not greater than zero. -.\" =========== +argument +was negative. .It Bq Er EINVAL +The .Fa offset -is not a multiple of the page size, -as returned by -.Xr sysconf 3 . -.\" =========== -.It Bq Er EMFILE -The limit on mapped regions (per process or system) is exceeded. -.\" =========== +argument +was not page-aligned based on the page size as returned by getpagesize(3). .It Bq Er ENODEV -The file type for -.Fa fildes -is not supported for mapping. -.\" =========== -.It Bq Er ENOMEM -.Dv MAP_FIXED -is specified and the address range specified -exceeds the address space limit for the process. -.\" =========== +.Dv MAP_ANON +has not been specified and the file +.Fa fd +refers to does not support mapping. .It Bq Er ENOMEM .Dv MAP_FIXED -is specified and the address specified by the +was specified and the .Fa addr -parameter isn't available. -.\" =========== -.It Bq Er ENOMEM +argument was not available. +.Dv MAP_FIXED +was specified and the address range specified exceeds the address space +limit for the process. .Dv MAP_ANON -is specified and insufficient memory is available. -.\" =========== +was specified and insufficient memory was available. .It Bq Er ENXIO -Addresses in the specified range are invalid for fildes. -.\" =========== +Addresses in the specified range are invalid for +.Fa fd . .It Bq Er EOVERFLOW Addresses in the specified range exceed the maximum offset -set for fildes. +set for +.Fa fd . .El .Sh LEGACY SYNOPSIS .Fd #include @@ -286,13 +314,14 @@ parameter must be a multiple of pagesize, as returned by .Fn sysconf . .El -.Sh "SEE ALSO" -.Xr getpagesize 2 , +.Sh SEE ALSO .Xr madvise 2 , .Xr mincore 2 , +.Xr minherit 2 , .Xr mlock 2 , .Xr mprotect 2 , .Xr msync 2 , +.Xr munlock 2 , .Xr munmap 2 , -.Xr sysconf 3 , -.Xr compat 5 +.Xr shmat 2 , +.Xr getpagesize 3 diff --git a/bsd/man/man2/mprotect.2 b/bsd/man/man2/mprotect.2 index e92b2a69b..5c2df2a29 100644 --- a/bsd/man/man2/mprotect.2 +++ b/bsd/man/man2/mprotect.2 @@ -1,3 +1,26 @@ +.\" +.\" Copyright (c) 2008 Apple Inc. All rights reserved. +.\" +.\" @APPLE_LICENSE_HEADER_START@ +.\" +.\" This file contains Original Code and/or Modifications of Original Code +.\" as defined in and that are subject to the Apple Public Source License +.\" Version 2.0 (the 'License'). You may not use this file except in +.\" compliance with the License. Please obtain a copy of the License at +.\" http://www.opensource.apple.com/apsl/ and read it before using this +.\" file. +.\" +.\" The Original Code and all software distributed under the License are +.\" distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER +.\" EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, +.\" INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, +.\" FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. +.\" Please see the License for the specific language governing rights and +.\" limitations under the License. +.\" +.\" @APPLE_LICENSE_HEADER_END@ +.\" +.\" .\" $NetBSD: mprotect.2,v 1.6 1995/10/12 15:41:08 jtc Exp $ .\" .\" Copyright (c) 1991, 1993 @@ -32,8 +55,9 @@ .\" SUCH DAMAGE. .\" .\" @(#)mprotect.2 8.1 (Berkeley) 6/9/93 +.\" $FreeBSD: src/lib/libc/sys/mprotect.2,v 1.18 2007/01/09 00:28:15 imp Exp $ .\" -.Dd June 9, 1993 +.Dd October 16, 2008 .Dt MPROTECT 2 .Os .Sh NAME @@ -53,8 +77,25 @@ The system call changes the specified pages to have protection .Fa prot . -Not all implementations will guarantee protection on a page basis; -the granularity of protection changes may be as large as an entire region. +Not all implementations will guarantee protection on a page basis but Mac OS X's +current implementation does. +.Pp +When a program violates the protections of a page, it gets a SIGBUS or SIGSEGV signal. +.Pp +Currently +.Fa prot +can be one or more of the following: +.Pp +.Bl -tag -width ".Dv PROT_WRITE" -compact +.It Dv PROT_NONE +No permissions at all. +.It Dv PROT_READ +The pages can be read. +.It Dv PROT_WRITE +The pages can be written. +.It Dv PROT_EXEC +The pages can be executed. +.El .Sh RETURN VALUES Upon successful completion, a value of 0 is returned. @@ -77,7 +118,9 @@ on the specified address range. .\" =========== .It Bq Er EINVAL .Fa addr -is not a multiple of the page size. +is not a multiple of the page size (i.e. +.Fa addr +is not page-aligned). .\" =========== .\" .It Bq Er ENOMEM .\" The specified address range is outside of the address range diff --git a/bsd/man/man2/munmap.2 b/bsd/man/man2/munmap.2 index b32a122f2..73a929e08 100644 --- a/bsd/man/man2/munmap.2 +++ b/bsd/man/man2/munmap.2 @@ -1,3 +1,26 @@ +.\" +.\" Copyright (c) 2008 Apple Inc. All rights reserved. +.\" +.\" @APPLE_LICENSE_HEADER_START@ +.\" +.\" This file contains Original Code and/or Modifications of Original Code +.\" as defined in and that are subject to the Apple Public Source License +.\" Version 2.0 (the 'License'). You may not use this file except in +.\" compliance with the License. Please obtain a copy of the License at +.\" http://www.opensource.apple.com/apsl/ and read it before using this +.\" file. +.\" +.\" The Original Code and all software distributed under the License are +.\" distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER +.\" EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, +.\" INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, +.\" FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. +.\" Please see the License for the specific language governing rights and +.\" limitations under the License. +.\" +.\" @APPLE_LICENSE_HEADER_END@ +.\" +.\" .\" $NetBSD: munmap.2,v 1.5 1995/02/27 12:35:03 cgd Exp $ .\" .\" Copyright (c) 1991, 1993 @@ -33,7 +56,7 @@ .\" .\" @(#)munmap.2 8.2 (Berkeley) 4/15/94 .\" -.Dd April 15, 1994 +.Dd October 16, 2008 .Dt MUNMAP 2 .Os .Sh NAME @@ -53,6 +76,32 @@ system call deletes the mappings for the specified address range, causing further references to addresses within the range to generate invalid memory references. +.Sh DIRTY PAGE HANDLING +How +.Fn munmap +handles a dirty page, depends on what type of memory is being unmapped: +.Pp +.Bl -tag -width "[System V Shared]" +.It Bq Anonymous +If the memory is anonymous memory and if the last reference is going +away, then the contents are discarded by definition of anonymous memory. +.It Bq System V Shared +If the memory mapping was created using System V shared memory, then +the contents persist until the System V memory region is destroyed or +the system is rebooted. +.It Bq File mapping +If the mapping maps data from a file (MAP_SHARED), then the memory will +eventually be written back to disk if it's dirty. This will happen +automatically at some point in the future (implementation dependent). +Note: to force the memory to be written back to the disk, use +.Xr msync 2 . +.El +.Pp +If there are still other references to the memory when the munmap is +done, then nothing is done to the memory itself and it may be swapped +out if need be. The memory will continue to persist until the last +reference goes away (except for System V shared memory in which case, +see above). .Sh RETURN VALUES Upon successful completion, .Nm munmap diff --git a/bsd/man/man2/open.2 b/bsd/man/man2/open.2 index e12aa2e15..2d121402d 100644 --- a/bsd/man/man2/open.2 +++ b/bsd/man/man2/open.2 @@ -1,3 +1,26 @@ +.\" +.\" Copyright (c) 2008 Apple Inc. All rights reserved. +.\" +.\" @APPLE_LICENSE_HEADER_START@ +.\" +.\" This file contains Original Code and/or Modifications of Original Code +.\" as defined in and that are subject to the Apple Public Source License +.\" Version 2.0 (the 'License'). You may not use this file except in +.\" compliance with the License. Please obtain a copy of the License at +.\" http://www.opensource.apple.com/apsl/ and read it before using this +.\" file. +.\" +.\" The Original Code and all software distributed under the License are +.\" distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER +.\" EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, +.\" INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, +.\" FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. +.\" Please see the License for the specific language governing rights and +.\" limitations under the License. +.\" +.\" @APPLE_LICENSE_HEADER_END@ +.\" +.\" .\" $NetBSD: open.2,v 1.8 1995/02/27 12:35:14 cgd Exp $ .\" .\" Copyright (c) 1980, 1991, 1993 @@ -33,7 +56,7 @@ .\" .\" @(#)open.2 8.2 (Berkeley) 11/16/93 .\" -.Dd November 16, 1993 +.Dd October 7, 2008 .Dt OPEN 2 .Os BSD 4 .Sh NAME @@ -90,6 +113,7 @@ O_SHLOCK atomically obtain a shared lock O_EXLOCK atomically obtain an exclusive lock O_NOFOLLOW do not follow symlinks O_SYMLINK allow open of symlinks +O_EVTONLY descriptor requested for event notifications only .Ed .Pp Opening a file with @@ -154,6 +178,12 @@ is a symbolic link then the .Fn open will be for the symbolic link itself, not what it links to. .Pp +The +.Dv O_EVTONLY +flag is only intended for monitoring a file for changes (e.g. kqueue). Note: when +this flag is used, the opened file will not prevent an unmount +of the volume that contains the file. +.Pp If successful, .Fn open returns a non-negative integer, termed a file descriptor. diff --git a/bsd/man/man2/posix_spawn.2 b/bsd/man/man2/posix_spawn.2 index 70cac7bfc..76bfa055b 100644 --- a/bsd/man/man2/posix_spawn.2 +++ b/bsd/man/man2/posix_spawn.2 @@ -337,7 +337,7 @@ of a super-user as well. .Xr sysctl 3 , .Xr environ 7 , .Xr posix_spawnattr_init 3 , -.Xr posix_file_actions_init 3 +.Xr posix_spawn_file_actions_init 3 , .Sh STANDARDS .St -susv3 [SPN] .Sh HISTORY diff --git a/bsd/man/man2/profil.2 b/bsd/man/man2/profil.2 index dd31dfff1..40ee9a463 100644 --- a/bsd/man/man2/profil.2 +++ b/bsd/man/man2/profil.2 @@ -36,7 +36,7 @@ .\" .\" @(#)profil.2 8.1 (Berkeley) 6/4/93 .\" -.Dd June 4, 1993 +.Dd September 26, 2008 .Dt PROFIL 2 .Os .Sh NAME @@ -49,6 +49,13 @@ .Ft int .Fn profil "char *samples" "size_t size" "u_long offset" "u_int scale" .Sh DESCRIPTION +.Pp +.Fd -- This function is now deprecated. It will always return EINVAL. -- +.Pp +The intended replacements are the user-level developer tools, like CHUD and dtrace. +.Pp +------ +.Pp The .Fn profil function enables or disables diff --git a/bsd/man/man2/pthread_setugid_np.2 b/bsd/man/man2/pthread_setugid_np.2 new file mode 100644 index 000000000..44597d4af --- /dev/null +++ b/bsd/man/man2/pthread_setugid_np.2 @@ -0,0 +1,86 @@ +.\" +.\" Copyright (c) 2008 Apple Inc. All rights reserved. +.\" +.\" @APPLE_LICENSE_HEADER_START@ +.\" +.\" This file contains Original Code and/or Modifications of Original Code +.\" as defined in and that are subject to the Apple Public Source License +.\" Version 2.0 (the 'License'). You may not use this file except in +.\" compliance with the License. Please obtain a copy of the License at +.\" http://www.opensource.apple.com/apsl/ and read it before using this +.\" file. +.\" +.\" The Original Code and all software distributed under the License are +.\" distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER +.\" EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, +.\" INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, +.\" FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. +.\" Please see the License for the specific language governing rights and +.\" limitations under the License. +.\" +.\" @APPLE_LICENSE_HEADER_END@ +.\" +.Dd October 1, 2008 +.Dt PTHREAD_SETUGID_NP 2 +.Os +.Sh NAME +.Nm pthread_setugid_np +.Nd Set the per-thread userid and single groupid. +.Sh SYNOPSIS +.Fd #include +.Fd #include +.Ft int +.Fn pthread_setugid_np "uid_t uid, gid_t gid" +.Sh DESCRIPTION +.Fn pthread_setugid_np +changes the current thread's effective, real, and saved userid and groupid to the requested +userid and groupid ( +.Fa uid +and +.Fa gid +, respectively) and clears all other groupids. +.Pp +.Fa uid +can be the current real userid, KAUTH_UID_NONE, or, if the caller is privileged, any userid. +.Fa gid +can be the current real groupid or, if the caller is priviledged, any single groupid. +.Pp +Setting +.Fa uid +to KAUTH_UID_NONE means to "revert to the per process credential". +.Sh CAVEATS +Temporarily restoring root privileges for a non-privileged process is only possible on a per-process basis and not a per-thread basis. +.Pp +.Fn pthread_setugid_np +is not intended as a privilege escalation mechanism. +.Pp +Do not use +.Fn pthread_setugid_np.2 +in a security sensitive situation. +.Sh RETURN VALUES +Upon successful completion, a value of 0 is returned. +Otherwise, -1 is returned and the global variable +.Va errno +is set to indicate the error. +.Sh ERRORS +.Fn pthread_setugid_np +fails if one or more of the following are true: +.\" ======== +.Bl -tag -width Er +.It Bq EPERM +The calling process does not have the correct credentials to set the override identity (i.e. The current credentials do not imply "super-user"). +.\" ======== +.It Bq EPERM +If +.Fa uid +is set to KAUTH_UID_NONE, +the current thread must already be assuming another identity in order to revert back. +.\" ======== +.It Bq EPERM +The current thread cannot already be assuming another identity. +.El +.Sh SEE ALSO +.Xr setuid 2 +.Xr setgid 2 +.Xr seteuid 2 +.Xr setegid 2 diff --git a/bsd/man/man2/recv.2 b/bsd/man/man2/recv.2 index 433bca52a..b777106b3 100644 --- a/bsd/man/man2/recv.2 +++ b/bsd/man/man2/recv.2 @@ -196,6 +196,7 @@ and specify the destination address if the socket is unconnected; .Fa msg_name may be given as a null pointer if no names are desired or required. +.Pp The .Fa msg_iov and @@ -203,6 +204,18 @@ and arguments describe scatter gather locations, as discussed in .Xr read 2 . +.Fa msg_iovlen +shall be set to the dimension of this array. In each +.Fa iovec +structure, the +.Fa iov_base +field specifies a storage area and the +.Fa iov_len +field gives its size in bytes. Each storage area indicated by +.Fa msg_iov +is filled with received data in turn until all of the received data +is stored or all of the areas have been filled. +.Pp The .Fa msg_control argument, @@ -332,7 +345,12 @@ call may also fail if: .Bl -tag -width Er .\" =========== .It Bq Er EMSGSIZE -The requested message size is invalid. +The +.Fa msg_iovlen +member of the +.Fa msghdr +structure pointed to by message is less than or equal to 0, or is greater than +.Dv IOV_MAX . .\" =========== .It Bq Er ENOMEM Insufficient memory is available. diff --git a/bsd/man/man2/removexattr.2 b/bsd/man/man2/removexattr.2 index 0e0366b1a..bcb455474 100644 --- a/bsd/man/man2/removexattr.2 +++ b/bsd/man/man2/removexattr.2 @@ -59,6 +59,16 @@ acts on the target of if it is a symbolic link. With this option, .Fn removexattr will act on the link itself. +.It Dv XATTR_SHOWCOMPRESSION +.Fn removexattr +and +.Fn fremovexattr +will remove HFS Plus Compression extended attribute +.Fa name +(if present) for the file referred to by +.Fa path +or +.Fa fd . .El .Pp .Fn fremovexattr diff --git a/bsd/man/man2/rename.2 b/bsd/man/man2/rename.2 index 39487e6c2..cefb57810 100644 --- a/bsd/man/man2/rename.2 +++ b/bsd/man/man2/rename.2 @@ -33,7 +33,7 @@ .\" .\" @(#)rename.2 8.1 (Berkeley) 6/4/93 .\" -.Dd June 4, 1993 +.Dd September 18, 2008 .Dt RENAME 2 .Os BSD 4.2 .Sh NAME @@ -76,7 +76,7 @@ If the final component of is a symbolic link, the symbolic link is renamed, not the file or directory to which it points. -.Sh CAVEAT +.Sh CAVEATS The system can deadlock if a loop is present in the file system graph. This loop takes the form of an entry in directory .Ql Pa a , @@ -107,6 +107,12 @@ the underlying filesystem implementation. It is recommended that any hard links to directories in an underlying filesystem should be replaced by symbolic links by the system administrator to avoid the possibility of deadlocks. +.Pp +Moving or renaming a file or directory into a directory with inheritable ACLs does not result in ACLs being set on the file or directory. Use +.Xr acl 3 +in conjunction with +.Fn rename +to set ACLs on the file or directory. .Sh RETURN VALUES A 0 value is returned if the operation succeeds, otherwise .Fn rename diff --git a/bsd/man/man2/revoke.2 b/bsd/man/man2/revoke.2 index 00d099171..580c2ce75 100644 --- a/bsd/man/man2/revoke.2 +++ b/bsd/man/man2/revoke.2 @@ -98,6 +98,10 @@ The named file is neither a character special or block special file. .It Bq Er EPERM The caller is neither the owner of the file nor the super user. +.It Bq Er ENOTSUP +The path does not represent a block or character device. +.It Bq Er EBUSY +The path represents a block device which is providing the backing for a mounted volume. .El .Sh SEE ALSO .Xr close 2 diff --git a/bsd/man/man2/searchfs.2 b/bsd/man/man2/searchfs.2 index c3b602b4d..663f38ac0 100644 --- a/bsd/man/man2/searchfs.2 +++ b/bsd/man/man2/searchfs.2 @@ -16,7 +16,7 @@ .\" .\" @(#)searchfs.2 . -.Dd December 15, 2003 +.Dd October 13, 2008 .Dt SEARCHFS 2 .Os Darwin .Sh NAME @@ -26,7 +26,7 @@ .Fd #include .Fd #include .Ft int -.Fn searchfs "const char * path" "struct fssearchblock * searchBlock" "unsigned long * numMatches" "unsigned long scriptCode" "unsigned long options" "struct searchstate * state" +.Fn searchfs "const char* path" "struct fssearchblock* searchBlock" "unsigned int* numMatches" "unsigned int scriptCode" "unsigned int options" "struct searchstate* state" . .Sh DESCRIPTION The @@ -79,7 +79,7 @@ struct fssearchblock { struct attrlist * returnattrs; void * returnbuffer; size_t returnbuffersize; - unsigned long maxmatches; + unsigned int maxmatches; struct timeval timelimit; void * searchparams1; size_t sizeofsearchparams1; @@ -105,7 +105,11 @@ structure are defined as follows. . .It returnattrs .Fn searchfs -can return arbitrary attributes of the file system objects that it finds. +can return arbitrary attributes of the file system objects that meet the designated +search criteria passed in via +.Vt searchparams1 +and +.Vt searchparams2. This field must point to an .Vt attrlist structure that specifies the attributes that you want returned. @@ -142,7 +146,7 @@ to run. . If you're implementing a volume format, you should impose your own internal limit on the duration of this call to prevent a malicious user program -from monopolising kernel resources. +from monopolizing kernel resources. .Pp . .It searchparams1 @@ -196,7 +200,7 @@ This is discussed in detail below. The .Fa numMatches parameter points to an -.Vt unsigned long +.Vt unsigned int variable. The initial value of this variable is ignored. On return, this variable contains the number of matching file system objects found. @@ -266,13 +270,15 @@ or both. .It SRCHFS_SKIPLINKS If this bit is set, .Fn searchfs -will only return one reference for a hard linked file, rather that a reference +will only return one reference for a hard linked file, rather than a reference for each hard link to the file. .Pp This option is not recommended for general development. Its primary client is the .Xr quotacheck 2 -utility. +utility. Note that not all filesystems that support +.Fn searchfs +support this option and may return EINVAL if it is requested. .Pp . This option is privileged (the caller's effective UID must be 0) and cannot @@ -443,8 +449,9 @@ The format of each of these buffers is determined by the attributes that you're The values are packed in exactly the same way as they would be returned from .Xr getattrlist 2 , including the leading -.Vt unsigned long -length value. +.Vt u_int32_t +length value. Note that the size of these buffers must be bounded by SEARCHFS_MAX_SEARCHPARMS bytes, +which is defined in . .Pp . The attribute values in the first and second search buffers form a lower and upper bound for @@ -571,7 +578,9 @@ One of the pointer parameters points to an invalid address. The .Fa options parameter contains an invalid flag or sizeofsearchparams1/2 is greater than -SEARCHFS_MAX_SEARCHPARMS (see attr.h). +SEARCHFS_MAX_SEARCHPARMS (see attr.h). Additionally, filesystems that do +not support SRCHFS_SKIPLINKS may return EINVAL if this search option +is requested. . .It Bq Er EAGAIN The search terminated with partial results, either because @@ -599,9 +608,7 @@ An I/O error occurred while reading from or writing to the file system. .Pp . .Sh CAVEATS -Not all attributes can be searched for using -.Fn searchfs . -The list currently includes: +The list of searchable attributes currently includes: .Pp . .Bl -item -compact @@ -667,14 +674,14 @@ typedef struct searchstate searchstate_t; .Pp . struct SearchAttrBuf { - unsigned long length; + u_int32_t length; char finderInfo[32]; }; typedef struct SearchAttrBuf SearchAttrBuf; .Pp . struct ResultAttrBuf { - unsigned long length; + u_int32_t length; attrreference_t name; fsobj_id_t parObjID; }; @@ -697,9 +704,9 @@ static int SearchFSDemo( SearchAttrBuf lower; SearchAttrBuf upper; static const unsigned char kAllOnes[4] = { 0xFF, 0xFF, 0xFF, 0xFF }; - unsigned long matchCount; - unsigned long matchIndex; - unsigned long options; + unsigned int matchCount; + unsigned int matchIndex; + unsigned int options; searchstate_t state; ResultAttrBuf * thisEntry; attrlist_t returnAttrList; diff --git a/bsd/man/man2/send.2 b/bsd/man/man2/send.2 index 9b6346372..3a4e1e652 100644 --- a/bsd/man/man2/send.2 +++ b/bsd/man/man2/send.2 @@ -53,7 +53,7 @@ .Ft ssize_t .Fo sendmsg .Fa "int socket" -.Fa "const struct msghdr *buffer" +.Fa "const struct msghdr *message" .Fa "int flags" .Fc .Ft ssize_t @@ -127,9 +127,33 @@ data. .Dv MSG_DONTROUTE is usually used only by diagnostic or routing programs. .Pp +The +.Fn sendmsg +system call uses a +.Fa msghdr +structure to minimize the number of directly supplied arguments. +The +.Fa msg_iov +and +.Fa msg_iovlen +fields of message specify zero or more buffers +containing the data to be sent. +.Fa msg_iov +points to an array of iovec structures; +.Fa msg_iovlen +shall be set to the dimension of this array. +In each iovec structure, the +.Fa iov_base +field specifies a storage area and +the +.Fa iov_len +field gives its size in bytes. Some of these sizes can be zero. +The data from each storage area indicated by +.Fa msg_iov +is sent in turn. See .Xr recv 2 -for a description of the +for a complete description of the .Fa msghdr structure. .Sh RETURN VALUES @@ -174,6 +198,7 @@ before any data is transmitted. .It Bq Er EMSGSIZE The socket requires that message be sent atomically, and the size of the message to be sent makes this impossible. +.Dv IOV_MAX . .\" =========== .It Bq Er ENETDOWN The local network interface used to reach the destination is down. @@ -259,6 +284,13 @@ system call will fail if: .\" =========== .It Bq Er EINVAL The sum of the iov_len values overflows an ssize_t. +.\" =========== +.It Bq Er EMSGSIZE +The socket requires that message be sent atomically, +and the size of the message to be sent makes this impossible, +or the msg_iovlen member of the msghdr structure pointed to by message +is less than or equal to 0 or is greater than +.Dv IOV_MAX . .El .Sh LEGACY SYNOPSIS .Fd #include diff --git a/bsd/man/man2/sendfile.2 b/bsd/man/man2/sendfile.2 index 8e31581e2..d2919e3d2 100644 --- a/bsd/man/man2/sendfile.2 +++ b/bsd/man/man2/sendfile.2 @@ -66,7 +66,8 @@ argument specifies how many bytes should be sent with 0 having the special meaning to send until the end of file has been reached. On return the value pointed to by the .Fa len -argument indicates how many bytes have been sent. +argument indicates how many bytes have been sent, except when a header or +trailer is specified as shown below. The .Fa len pointer may not be NULL. @@ -103,6 +104,15 @@ arrays is specified by and .Fa trl_cnt . .Pp +When a header or trailer is specified the value of +.Fa len +returned will include the size of header or trailer sent. The user should +provide sufficiently large value of +.Fa len +as argument including the size of header or trailer, +otherwise only part of file data will be sent +following the header. +.Pp The .Fa flags parameter is reserved for future expansion and must be set to 0. Any other value diff --git a/bsd/man/man2/setattrlist.2 b/bsd/man/man2/setattrlist.2 index d2fbb6b53..c0e393ee6 100644 --- a/bsd/man/man2/setattrlist.2 +++ b/bsd/man/man2/setattrlist.2 @@ -20,26 +20,38 @@ .Dt SETATTRLIST 2 .Os Darwin .Sh NAME -.Nm setattrlist +.Nm setattrlist , +.Nm fsetattrlist .Nd set file system attributes .Sh SYNOPSIS .Fd #include .Fd #include .Ft int .Fn setattrlist "const char* path" "struct attrlist * attrList" "void * attrBuf" "size_t attrBufSize" "unsigned long options" +.Ft int +.Fn fsetattrlist "int fd" "struct attrlist * attrList" "void * attrBuf" "size_t attrBufSize" "unsigned long options" . .Sh DESCRIPTION The .Fn setattrlist -function sets attributes (that is, metadata) of file system objects. -It is the logical opposite of +and +.Fn fsetattrlist +functions set attributes (that is, metadata) of file system objects. +They are the logical opposite of .Xr getattrlist 2 . -The function sets attributes about the file system object specified by +The +.Fn setattrlist +function sets attributes about the file system object specified by .Fa path from the values in the buffer specified by .Fa attrBuf and -.Fa attrBufSize . +.Fa attrBufSize ; +the +.Fn fsetattrlist +function does the same for the +.Fa fd +file descriptor. The .Fa attrList parameter determines what attributes are set. @@ -49,22 +61,30 @@ parameter lets you control specific aspects of the function's behaviour. .Pp . The -.Fn setattrlist -function is only supported by certain volume format implementations. +functions are only supported by certain volume format implementations. For maximum compatibility, client programs should use high-level APIs (such as the Carbon File Manager) to access file system attributes. These high-level APIs include logic to emulate file system attributes on volumes that don't support -.Fn setattrlist . +.Fn setattrlist +and +.Fn fsetattrlist . .Pp . .\" path parameter . The .Fa path -parameter must reference a valid file system object. +parameter for +.Fn setattrlist +must reference a valid file system object. All directories listed in the path name leading to the object must be searchable. +The +.Fa fd +parameter for +.Fn fsetattrlist +must be a valid file descriptor for the calling process. You must own the file system object in order to set any of the following attributes: .Pp @@ -121,7 +141,7 @@ parameters specify a buffer that contains the attribute values to set. Attributes are packed in exactly the same way as they are returned from .Xr getattrlist 2 except that, when setting attributes, the buffer does not include the leading -.Vt unsigned long +.Vt u_int32_t length value. .Pp . @@ -178,34 +198,48 @@ you should be careful to support the behaviour specified by this document. . .Sh ERRORS .Fn setattrlist +and +.Fn fsetattrlist will fail if: .Bl -tag -width Er . .It Bq Er ENOTSUP -The volume does not support -.Fn setattrlist . +The call is not supported by the volume. . .It Bq Er ENOTDIR -A component of the path prefix is not a directory. +A component of the path for +.Fn setattrlist +prefix is not a directory. . .It Bq Er ENAMETOOLONG -A component of a path name exceeded +A component of a path name for +.Fn setattrlist +exceeded .Dv NAME_MAX characters, or an entire path name exceeded .Dv PATH_MAX characters. . .It Bq Er ENOENT -The file system object does not exist. +The file system object for +.Fn setattrlist +does not exist. +. +.It Bq Er EBADF +The file descriptor argument for +.Fn fsetattrlist +is not a valid file descriptor. . .It Bq Er EROFS The volume is read-only. . .It Bq Er EACCES -Search permission is denied for a component of the path prefix. +Search permission is denied for a component of the path prefix for +.Fn setattrlist . . .It Bq Er ELOOP -Too many symbolic links were encountered in translating the pathname. +Too many symbolic links were encountered in translating the pathname for +.Fn setattrlist . . .It Bq Er EFAULT .Fa path , @@ -290,8 +324,8 @@ This assumes that the target volume supports the required attributes typedef struct attrlist attrlist_t; .Pp . -struct FInfoAttrBuf - unsigned long length; +struct FInfoAttrBuf { + u_int32_t length; fsobj_type_t objType; char finderInfo[32]; }; diff --git a/bsd/man/man2/setaudit.2 b/bsd/man/man2/setaudit.2 index c0e739cc7..6b1979f5d 100644 --- a/bsd/man/man2/setaudit.2 +++ b/bsd/man/man2/setaudit.2 @@ -1,5 +1,5 @@ .\" -.\" Copyright (c) 2007 Apple Inc. All rights reserved. +.\" Copyright (c) 2008-2009 Apple Inc. All rights reserved. .\" .\" @APPLE_LICENSE_HEADER_START@ .\" @@ -20,37 +20,223 @@ .\" .\" @APPLE_LICENSE_HEADER_END@ .\" -.Dd July 30, 2007 +.Dd March 23, 2009 .Dt SETAUDIT 2 -.Os Darwin +.Os .Sh NAME -.Nm setaudit -.Nd set the audit information for the current process +.Nm setaudit , +.Nm setaudit_addr +.Nd "set audit session state" .Sh SYNOPSIS -.Fd #include +.In bsm/audit.h .Ft int -.Fn setaudit "const struct auditinfo * auditinfo" +.Fn setaudit "auditinfo_t *auditinfo" +.Ft int +.Fn setaudit_addr "auditinfo_addr_t *auditinfo_addr" "u_int length" .Sh DESCRIPTION The .Fn setaudit -function sets the audit information for the current process. -.Fa auditinfo -should point at a -.Fa struct auditinfo -describing the requested user audit settings. +system call +sets the active audit session state for the current process via the +.Vt auditinfo_t +pointed to by +.Fa auditinfo . +The +.Fn setaudit_addr +system call +sets extended state via +.Fa auditinfo_addr +and +.Fa length . +.Pp +The +.Fa auditinfo_t +data structure is defined as follows: +.nf +.in +4n + +struct auditinfo { + au_id_t ai_auid; /* Audit user ID */ + au_mask_t ai_mask; /* Audit masks */ + au_tid_t ai_termid; /* Terminal ID */ + au_asid_t ai_asid; /* Audit session ID */ +}; +typedef struct auditinfo auditinfo_t; +.in +.fi +.Pp +The +.Fa ai_auid +variable contains the audit identifier which is recorded in the audit log for +each event the process caused. +The value of AU_DEFAUDITID (-1) should not be used. +The exception is if the value of audit identifier is known at the +start of the session but will be determined and set later. +Until +.Fa ai_auid +is set to something other than AU_DEFAUDITID any audit events +generated by the system with be filtered by the non-attributed audit +mask. +.PP + +The +.Fa au_mask_t +data structure defines the bit mask for auditing successful and failed events +out of the predefined list of event classes. It is defined as follows: +.nf +.in +4n + +struct au_mask { + unsigned int am_success; /* success bits */ + unsigned int am_failure; /* failure bits */ +}; +typedef struct au_mask au_mask_t; +.in +.fi +.PP + +The +.Fa au_termid_t +data structure defines the Terminal ID recorded with every event caused by the +process. It is defined as follows: +.nf +.in +4n + +struct au_tid { + dev_t port; + u_int32_t machine; +}; +typedef struct au_tid au_tid_t; + +.in +.fi +.PP +The +.Fa ai_asid +variable contains the audit session ID which is recorded with every event +caused by the process. It can be any value in the range 1 to PID_MAX (99999). +If the value of AU_ASSIGN_ASID is used for +.Fa ai_asid +a unique session ID will be generated by the kernel. +The audit session ID will be returned in +.Fa ai_asid +field on success. +.Pp +The +.Fn setaudit_addr +system call +uses the expanded +.Fa auditinfo_addr_t +data structure supports Terminal IDs with larger addresses such as those used +in IP version 6. It is defined as follows: +.nf +.in +4n + +struct auditinfo_addr { + au_id_t ai_auid; /* Audit user ID. */ + au_mask_t ai_mask; /* Audit masks. */ + au_tid_addr_t ai_termid; /* Terminal ID. */ + au_asid_t ai_asid; /* Audit session ID. */ + u_int64_t ai_flags; /* Audit session flags */ +}; +typedef struct auditinfo_addr auditinfo_addr_t; +.in +.fi +.Pp +The +.Fa au_tid_addr_t +data structure which includes a larger address storage field and an additional +field with the type of address stored: +.nf +.in +4n + +struct au_tid_addr { + dev_t at_port; + u_int32_t at_type; + u_int32_t at_addr[4]; +}; +typedef struct au_tid_addr au_tid_addr_t; +.in +.fi +.Pp +The +.Fa ai_flags +field is opaque to the kernel and can be used to store user +defined session flags. +.Pp +These system calls require an appropriate privilege to complete. +.Pp +These system calls should only be called once at the start of a new +session and not again during the same session to update the session +information. +There are some exceptions, however. +The +.Fa ai_auid +field may be updated later if initially set to the value of +AU_DEFAUDITID (-1). +Likewise, the +.Fa ai_termid +fields may be updated later if the +.Fa at_type +field in +.Fa au_tid_addr +is set to AU_IPv4 and the other +.Fa ai_tid_addr +fields are all set to zero. +The +.Fa ai_flags +field can only be set when a new session is initially created. +Creating a new session is done by setting the +.Fa ai_asid +field to an unique session value or AU_ASSIGN_ASID. +These system calls will fail when attempting to change the +.Fa ai_auid , +.Fa ai_termid , +or +.Fa ai_flags +fields once set to something other than the default values. +The audit preselection masks may be changed at any time +but are usually updated with +.Xr auditon 2 +using the A_SETPMASK command. .Sh RETURN VALUES -Upon successful completion a value of 0 is returned. -Otherwise, a value of -1 is returned and -.Va errno -is set to indicate the error. +.Rv -std setaudit setaudit_addr +.Sh ERRORS +.Bl -tag -width Er +.It Bq Er EFAULT +A failure occurred while data transferred to or from +the kernel failed. +.It Bq Er EINVAL +Illegal argument was passed by a system call. +.It Bq Er EPERM +The process does not have sufficient permission to complete +the operation. +.El .Sh SEE ALSO .Xr audit 2 , .Xr auditon 2 , -.Xr auditctl 2 , +.Xr getaudit 2 , .Xr getauid 2 , .Xr setauid 2 , -.Xr getaudit 2 +.Xr libbsm 3 .Sh HISTORY -The -.Fn setaudit -function call first appeared in Mac OS X 10.3 (Panther). +The OpenBSM implementation was created by McAfee Research, the security +division of McAfee Inc., under contract to Apple Computer Inc.\& in 2004. +It was subsequently adopted by the TrustedBSD Project as the foundation for +the OpenBSM distribution. +.Sh AUTHORS +.An -nosplit +This software was created by McAfee Research, the security research division +of McAfee, Inc., under contract to Apple Computer Inc. +Additional authors include +.An Wayne Salamon , +.An Robert Watson , +and SPARTA Inc. +.Pp +The Basic Security Module (BSM) interface to audit records and audit event +stream format were defined by Sun Microsystems. +.Pp +This manual page was written by +.An Robert Watson Aq rwatson@FreeBSD.org +and +.An Stacey Son Aq sson@FreeBSD.org . diff --git a/bsd/man/man2/setaudit_addr.2 b/bsd/man/man2/setaudit_addr.2 new file mode 100644 index 000000000..f11b4169f --- /dev/null +++ b/bsd/man/man2/setaudit_addr.2 @@ -0,0 +1 @@ +.so man2/setaudit.2 diff --git a/bsd/man/man2/setauid.2 b/bsd/man/man2/setauid.2 index 0fe8e9df4..299196718 100644 --- a/bsd/man/man2/setauid.2 +++ b/bsd/man/man2/setauid.2 @@ -1,5 +1,5 @@ .\" -.\" Copyright (c) 2007 Apple Inc. All rights reserved. +.\" Copyright (c) 2008-2009 Apple Inc. All rights reserved. .\" .\" @APPLE_LICENSE_HEADER_START@ .\" @@ -20,35 +20,86 @@ .\" .\" @APPLE_LICENSE_HEADER_END@ .\" -.Dd July 30, 2007 +.Dd March 21, 2009 .Dt SETAUID 2 -.Os Darwin +.Os .Sh NAME .Nm setauid -.Nd set the audit user ID for the calling process +.Nd "set audit indentifier" .Sh SYNOPSIS -.Fd #include +.In bsm/audit.h .Ft int -.Fn setauid "const au_id_t * auid" +.Fn setauid "au_id_t *auid" .Sh DESCRIPTION The .Fn setauid -function sets the audit user ID for the calling process. The ID is set to the -value pointed at by +system call +sets the active audit identifier for the current process from the +.Vt au_id_t +pointed to by .Fa auid . +The value of AU_DEFAUDITID (-1) should not be used for +.Fa auid . +.Pp +This system call requires an appropriate privilege to complete. +.Pp +Once the audit identifier is set to some other value than +AU_DEAUDITID (-1) then it is immutable. +This system call may be used to set the audit identifier after +initially setting up the audit session with the +.Xr setaudit 2 , +or +.Xr setaudit_addr 2 +system call. .Sh RETURN VALUES -Upon successful completion a value of 0 is returned. -Otherwise, a value of -1 is returned and -.Va errno -is set to indicate the error. +.Rv -std +.Sh ERRORS +The +.Fn setauid +function will fail if: +.Bl -tag -width Er +.It Bq Er EFAULT +A failure occurred while data transferred to +the kernel failed. +.It Bq Er EINVAL +An invalid value for +.Fa auid +was used. +.Fa auid +can be any value of type +.Vt au_id_t +except AU_DEFAUDITID. +.It Bq Er EPERM +The process does not have sufficient permission to complete +the operation. +.El .Sh SEE ALSO .Xr audit 2 , .Xr auditon 2 , -.Xr auditctl 2 , -.Xr getauid 2 , .Xr getaudit 2 , -.Xr setaudit 2 +.Xr getaudit_addr 2 , +.Xr getauid 2 , +.Xr setaudit 2 , +.Xr setaudit_addr 2 , +.Xr libbsm 3 .Sh HISTORY -The -.Fn setauid -function call first appeared in Mac OS X 10.3 (Panther). +The OpenBSM implementation was created by McAfee Research, the security +division of McAfee Inc., under contract to Apple Computer Inc.\& in 2004. +It was subsequently adopted by the TrustedBSD Project as the foundation for +the OpenBSM distribution. +.Sh AUTHORS +.An -nosplit +This software was created by McAfee Research, the security research division +of McAfee, Inc., under contract to Apple Computer Inc. +Additional authors include +.An Wayne Salamon , +.An Robert Watson , +and SPARTA Inc. +.Pp +The Basic Security Module (BSM) interface to audit records and audit event +stream format were defined by Sun Microsystems. +.Pp +This manual page was written by +.An Robert Watson Aq rwatson@FreeBSD.org +and +.An Stacey Son Aq sson@FreeBSD.org . diff --git a/bsd/man/man2/shmget.2 b/bsd/man/man2/shmget.2 index d6a135dce..9ae8ae131 100644 --- a/bsd/man/man2/shmget.2 +++ b/bsd/man/man2/shmget.2 @@ -91,6 +91,10 @@ is set to the current time. .Va shm_segsz is set to the value of .Fa size . +.It +The +.Xr ftok 3 +function may be used to generate a key from a pathname. .El .Sh RETURN VALUES Upon successful completion a positive shared memory segment identifier is returned. @@ -156,6 +160,7 @@ The type of .Fa size has changed. .Sh SEE ALSO +.Xr ftok 3 , .Xr shmat 2 , .Xr shmctl 2 , .Xr shmdt 2 , diff --git a/bsd/man/man2/sigaction.2 b/bsd/man/man2/sigaction.2 index 01feeb7e3..069197f90 100644 --- a/bsd/man/man2/sigaction.2 +++ b/bsd/man/man2/sigaction.2 @@ -32,7 +32,7 @@ .\" From: @(#)sigaction.2 8.2 (Berkeley) 4/3/94 .\" $FreeBSD: src/lib/libc/sys/sigaction.2,v 1.48 2003/03/24 16:07:19 charnier Exp $ .\" -.Dd April 3, 1994 +.Dd September 18, 2008 .Dt SIGACTION 2 .Os .Sh NAME @@ -43,17 +43,22 @@ .Sh SYNOPSIS .In signal.h .Bd -literal + struct sigaction { - union { - void (*__sa_handler)(int); - void (*__sa_sigaction)(int, struct __siginfo *, void *); - } __sigaction_u; /* signal handler */ - int sa_flags; /* see signal options below */ + union __sigaction_u __sigaction_u; /* signal handler */ sigset_t sa_mask; /* signal mask to apply */ + int sa_flags; /* see signal options below */ +}; + +union __sigaction_u { + void (*__sa_handler)(int); + void (*__sa_sigaction)(int, struct __siginfo *, + void *); }; #define sa_handler __sigaction_u.__sa_handler #define sa_sigaction __sigaction_u.__sa_sigaction + .Ed .Ft int .Fo sigaction @@ -416,17 +421,11 @@ Base Interfaces: Realtime Interfaces: .Pp .Fn aio_error , -.Fn clock_gettime , .Fn sigpause , -.Fn timer_getoverrun , .Fn aio_return , -.Fn fdatasync , -.Fn sigqueue , -.Fn timer_gettime , .Fn aio_suspend , .Fn sem_post , -.Fn sigset , -.Fn timer_settime . +.Fn sigset . .Pp ANSI C Interfaces: .Pp diff --git a/bsd/man/man2/socket.2 b/bsd/man/man2/socket.2 index c8d59a4a2..8652a2a21 100644 --- a/bsd/man/man2/socket.2 +++ b/bsd/man/man2/socket.2 @@ -35,7 +35,7 @@ .\" .Dd June 4, 1993 .Dt SOCKET 2 -.Os BSD 4.2 +.Os .Sh NAME .Nm socket .Nd create an endpoint for communication @@ -61,11 +61,14 @@ These families are defined in the include file The currently understood formats are .Pp .Bd -literal -offset indent -compact -AF_UNIX (UNIX internal protocols), -AF_INET (ARPA Internet protocols), -AF_ISO (ISO protocols), -AF_NS (Xerox Network Systems protocols), and -AF_IMPLINK (IMP \*(lqhost at IMP\*(rq link layer). +PF_LOCAL Host-internal protocols, formerly called PF_UNIX, +PF_UNIX Host-internal protocols, deprecated, use PF_LOCAL, +PF_INET Internet version 4 protocols, +PF_ROUTE Internal Routing protocol, +PF_KEY Internal key-management function, +PF_INET6 Internet version 6 protocols, +PF_SYSTEM System domain, +PF_NDRV Raw access to network device .Ed .Pp The socket has the indicated diff --git a/bsd/man/man2/socketpair.2 b/bsd/man/man2/socketpair.2 index 62a33306e..6664ade4b 100644 --- a/bsd/man/man2/socketpair.2 +++ b/bsd/man/man2/socketpair.2 @@ -99,6 +99,10 @@ The specified protocol is not supported on this machine. .\" =========== .It Bq Er EPROTOTYPE The socket type is not supported by the protocol. +.\" =========== +.It Bq Er EACCES +The process does not have appropriate privileges to create a socket of the +specified type and/or protocol. .El .Sh LEGACY SYNOPSIS .Fd #include @@ -108,7 +112,7 @@ The include file .In sys/types.h is necessary. .Sh SEE ALSO -.Xr pipe 2 , +.Xr socket 2 , .Xr read 2 , .Xr write 2 , .Xr compat 5 diff --git a/bsd/man/man2/stat.2 b/bsd/man/man2/stat.2 index 53704ece8..76cca02f1 100644 --- a/bsd/man/man2/stat.2 +++ b/bsd/man/man2/stat.2 @@ -33,13 +33,16 @@ .\" .\" @(#)stat.2 8.3 (Berkeley) 4/19/94 .\" -.Dd April 19, 1994 +.Dd May 15, 2008 .Dt STAT 2 .Os BSD 4 .Sh NAME .Nm fstat , +.Nm fstat64 , .Nm lstat , -.Nm stat +.Nm lstat64 , +.Nm stat , +.Nm stat64 .Nd get file status .Sh SYNOPSIS .Fd #include @@ -58,10 +61,29 @@ .Fa "const char *restrict path" .Fa "struct stat *restrict buf" .Fc +.Sh TRANSITIIONAL SYNOPSIS (NOW DEPRECATED) +.Ft int +.br +.Fo fstat64 +.Fa "int fildes" +.Fa "struct stat64 *buf" +.Fc ; +.sp +.Ft int +.br +.Fo lstat64 +.Fa "const char *restrict path" +.Fa "struct stat64 *restrict buf" +.Fc ; +.sp +.Ft int +.br +.Fo stat64 +.Fa "const char *restrict path" +.Fa "struct stat64 *restrict buf" +.Fc ; .Sh DESCRIPTION -The -.Fn stat -family of functions obtain information about a file. The +The .Fn stat function obtains information about the file pointed to by .Fa path . @@ -69,21 +91,22 @@ Read, write or execute permission of the named file is not required, but all directories listed in the path name leading to the file must be searchable. .Pp -.Fn Lstat +The +.Fn lstat +function is like .Fn stat -except in the case where the named file is a symbolic link, -in which case +except in the case where the named file is a symbolic link; .Fn lstat returns information about the link, while .Fn stat returns information about the file the link references. Unlike other filesystem objects, -symbolic links do not have an owner, group, access mode, times, etc. -Instead, these attributes are taken from the directory that +symbolic links may not have an owner, group, access mode, times, etc. +Instead, these attributes may be taken from the directory that contains the link. -The only attributes returned from an +In this case, the only attributes returned from an .Fn lstat that refer to the symbolic link itself are the file type (S_IFLNK), size, blocks, and link count (always 1). @@ -102,8 +125,50 @@ structure as defined by .Aq Pa sys/stat.h and into which information is placed concerning the file. +When the macro +.Dv _DARWIN_FEATURE_64_BIT_INODE +is not defined (the +.Ft ino_t +type is 32-bits), the +.Fa stat +structure is defined as: +.Bd -literal +struct stat { /* when _DARWIN_FEATURE_64_BIT_INODE is NOT defined */ + dev_t st_dev; /* device inode resides on */ + ino_t st_ino; /* inode's number */ + mode_t st_mode; /* inode protection mode */ + nlink_t st_nlink; /* number or hard links to the file */ + uid_t st_uid; /* user-id of owner */ + gid_t st_gid; /* group-id of owner */ + dev_t st_rdev; /* device type, for special file inode */ + struct timespec st_atimespec; /* time of last access */ + struct timespec st_mtimespec; /* time of last data modification */ + struct timespec st_ctimespec; /* time of last file status change */ + off_t st_size; /* file size, in bytes */ + quad_t st_blocks; /* blocks allocated for file */ + u_long st_blksize;/* optimal file sys I/O ops blocksize */ + u_long st_flags; /* user defined flags for file */ + u_long st_gen; /* file generation number */ +}; +.Ed +.Pp +However, when the macro +.Dv _DARWIN_FEATURE_64_BIT_INODE +is defined, the +.Ft ino_t +type will be 64-bits (force 64-bit inode mode by defining the +.Dv _DARWIN_USE_64_BIT_INODE +macro before including header files). +This will cause symbol variants of the +.Fa stat +family, with the +.Fa $INODE64 +suffixes, to be automatically linked in. +In addition, the +.Fa stat +structure will now be defined as: .Bd -literal -struct stat { +struct stat { /* when _DARWIN_FEATURE_64_BIT_INODE is defined */ dev_t st_dev; /* ID of device containing file */ mode_t st_mode; /* Mode of file (see below) */ nlink_t st_nlink; /* Number of hard links */ @@ -123,8 +188,6 @@ struct stat { int32_t st_lspare; /* RESERVED: DO NOT USE! */ int64_t st_qspare[2]; /* RESERVED: DO NOT USE! */ }; - - .Ed .Pp The time-related fields of @@ -161,8 +224,9 @@ and .Xr write 2 system calls. .It st_birthtime -Time of file creation. Only set once when the file is created. -On filesystems where birthtime is not available, this field holds the +Time of file creation. Only set once when the file is created. This field is +only available in the 64 bit inode variants. On filesystems where birthtime is +not available, this field holds the .Fa ctime instead. .El @@ -300,6 +364,35 @@ in the structure pointed to by The file generation number, .Fa st_gen , is only available to the super-user. +.br +The fields in the stat structure currently marked +.Fa st_spare1 , +.Fa st_spare2 , +and +.Fa st_spare3 +are present in preparation for inode time stamps expanding +to 64 bits. This, however, can break certain programs that +depend on the time stamps being contiguous (in calls to +.Xr utimes 2 ) . +.Sh TRANSITIONAL DESCRIPTION (NOW DEPRECATED) +The +.Fa fstat64 , +.Fa lstat64 +and +.Fa stat64 +routines are equivalent to their corresponding non-64-suffixed routine, +when 64-bit inodes are in effect. +They were added before there was support for the symbol variants, and so are +now deprecated. +Instead of using these, set the +.Dv _DARWIN_USE_64_BIT_INODE +macro before including header files to force 64-bit inode support. +.Pp +The +.Fa stat64 +structure used by these deprecated routines is the same as the +.Fa stat +structure when 64-bit inodes are in effect (see above). .Sh SEE ALSO .Xr chflags 2 , .Xr chmod 2 , @@ -326,3 +419,10 @@ An .Fn lstat function call appeared in .Bx 4.2 . +The +.Fn stat64 , +.Fn fstat64 , +and +.Fn lstat64 +system calls first appeared in Mac OS X 10.5 (Leopard) and are now deprecated +in favor of the corresponding symbol variants. diff --git a/bsd/man/man2/stat64.2 b/bsd/man/man2/stat64.2 new file mode 100644 index 000000000..b1a86c195 --- /dev/null +++ b/bsd/man/man2/stat64.2 @@ -0,0 +1 @@ +.so man2/stat.2 diff --git a/bsd/man/man2/statfs.2 b/bsd/man/man2/statfs.2 index 4b6a3db35..16e80f5d4 100644 --- a/bsd/man/man2/statfs.2 +++ b/bsd/man/man2/statfs.2 @@ -33,12 +33,14 @@ .\" .\" @(#)statfs.2 8.3 (Berkeley) 2/11/94 .\" -.Dd February 11, 1994 +.Dd August 14, 2008 .Dt STATFS 2 .Os .Sh NAME .Nm statfs, -.Nm fstatfs +.Nm statfs64, +.Nm fstatfs, +.Nm fstatfs64 .Nd get file system statistics .Sh SYNOPSIS .Fd #include @@ -47,44 +49,105 @@ .Fn statfs "const char *path" "struct statfs *buf" .Ft int .Fn fstatfs "int fd" "struct statfs *buf" +.Sh TRANSITIIONAL SYNOPSIS (NOW DEPRECATED) +.Ft int +.br +.Fn statfs64 "const char *path" "struct statfs64 *buf" ; +.sp +.Ft int +.br +.Fn fstatfs64 "int fd" "struct statfs64 *buf" ; .Sh DESCRIPTION -.Fn Statfs -returns information about a mounted file system. -.Fa Path -is the path name of any file within the mounted file system. -.Fa Buf -is a pointer to a +The +.Fn statfs +routine returns information about a mounted file system. +The +.Fa path +argument is the path name of any file or directory within the mounted file system. +The +.Fa buf +argument is a pointer to a .Fa statfs -structure defined as follows: +structure. +When the macro +.Dv _DARWIN_FEATURE_64_BIT_INODE +is not defined (the +.Ft ino_t +type is 32-bits), that structure is defined as: .Bd -literal typedef struct { int32_t val[2]; } fsid_t; +#define MFSNAMELEN 15 /* length of fs type name, not inc. nul */ +#define MNAMELEN 90 /* length of buffer for returned name */ + +struct statfs { /* when _DARWIN_FEATURE_64_BIT_INODE is NOT defined */ + short f_otype; /* type of file system (reserved: zero) */ + short f_oflags; /* copy of mount flags (reserved: zero) */ + long f_bsize; /* fundamental file system block size */ + long f_iosize; /* optimal transfer block size */ + long f_blocks; /* total data blocks in file system */ + long f_bfree; /* free blocks in fs */ + long f_bavail; /* free blocks avail to non-superuser */ + long f_files; /* total file nodes in file system */ + long f_ffree; /* free file nodes in fs */ + fsid_t f_fsid; /* file system id */ + uid_t f_owner; /* user that mounted the file system */ + short f_reserved1; /* reserved for future use */ + short f_type; /* type of file system (reserved) */ + long f_flags; /* copy of mount flags (reserved) */ + long f_reserved2[2]; /* reserved for future use */ + char f_fstypename[MFSNAMELEN]; /* fs type name */ + char f_mntonname[MNAMELEN]; /* directory on which mounted */ + char f_mntfromname[MNAMELEN]; /* mounted file system */ + char f_reserved3; /* reserved for future use */ + long f_reserved4[4]; /* reserved for future use */ +}; +.Ed +.Pp +However, when the macro +.Dv _DARWIN_FEATURE_64_BIT_INODE +is defined, the +.Ft ino_t +type will be 64-bits (force 64-bit inode mode by defining the +.Dv _DARWIN_USE_64_BIT_INODE +macro before including header files). +This will cause symbol variants of the +.Fa statfs +family, with the +.Fa $INODE64 +suffixes, to be automatically linked in. +In addition, the +.Fa statfs +structure will now be defined as: +.Bd -literal #define MFSTYPENAMELEN 16 /* length of fs type name including null */ #define MAXPATHLEN 1024 +#define MNAMELEN MAXPATHLEN -struct statfs { - uint32_t f_bsize; /* fundamental file system block size */ - int32_t f_iosize; /* optimal transfer block size */ - uint64_t f_blocks; /* total data blocks in file system */ - uint64_t f_bfree; /* free blocks in fs */ - uint64_t f_bavail; /* free blocks avail to non-superuser */ - uint64_t f_files; /* total file nodes in file system */ - uint64_t f_ffree; /* free file nodes in fs */ - fsid_t f_fsid; /* file system id */ - uid_t f_owner; /* user that mounted the filesystem */ - uint32_t f_type; /* type of filesystem */ - uint32_t f_flags; /* copy of mount exported flags */ - uint32_t f_fssubtype; /* fs sub-type (flavor) */ - char f_fstypename[MFSTYPENAMELEN]; /* fs type name */ - char f_mntonname[MAXPATHLEN]; /* directory on which mounted */ - char f_mntfromname[MAXPATHLEN]; /* mounted filesystem */ - uint32_t f_reserved[8]; /* For future use */ +struct statfs { /* when _DARWIN_FEATURE_64_BIT_INODE is defined */ + uint32_t f_bsize; /* fundamental file system block size */ + int32_t f_iosize; /* optimal transfer block size */ + uint64_t f_blocks; /* total data blocks in file system */ + uint64_t f_bfree; /* free blocks in fs */ + uint64_t f_bavail; /* free blocks avail to non-superuser */ + uint64_t f_files; /* total file nodes in file system */ + uint64_t f_ffree; /* free file nodes in fs */ + fsid_t f_fsid; /* file system id */ + uid_t f_owner; /* user that mounted the filesystem */ + uint32_t f_type; /* type of filesystem */ + uint32_t f_flags; /* copy of mount exported flags */ + uint32_t f_fssubtype; /* fs sub-type (flavor) */ + char f_fstypename[MFSTYPENAMELEN]; /* fs type name */ + char f_mntonname[MAXPATHLEN]; /* directory on which mounted */ + char f_mntfromname[MAXPATHLEN]; /* mounted filesystem */ + uint32_t f_reserved[8]; /* For future use */ }; .Ed .Pp Fields that are undefined for a particular file system are set to -1. -.Fn Fstatfs -returns the same information about an open file referenced by descriptor +The +.Fn fstatfs +routine returns the same information about an open file referenced by descriptor .Fa fd . .Sh FLAGS .Bl -tag -width MNT_UNKOWNPERMISSIONS @@ -105,7 +168,7 @@ Union with underlying filesysten File system written to asynchronously .It Dv MNT_EXPORTED File system is exported -.it Dv MNT_LOCAL +.It Dv MNT_LOCAL File system is stored locally .It Dv MNT_QUOTA Quotas are enabled on this file system @@ -135,8 +198,9 @@ Otherwise, -1 is returned and the global variable .Va errno is set to indicate the error. .Sh ERRORS -.Fn Statfs -fails if one or more of the following are true: +The +.Fn statfs +routine fails if one or more of the following are true: .Bl -tag -width Er .It Bq Er ENOTDIR A component of the path prefix of @@ -153,7 +217,7 @@ exceeds .Dv {PATH_MAX} characters. .It Bq Er ENOENT -The file referred to by +The file or directory referred to by .Fa path does not exist. .It Bq Er EACCES @@ -173,8 +237,9 @@ An error occurred while reading from or writing to the file system. .El .Pp -.Fn Fstatfs -fails if one or more of the following are true: +The +.Fn fstatfs +routine fails if one or more of the following are true: .Bl -tag -width Er .It Bq Er EBADF .Fa fd @@ -187,7 +252,30 @@ An .Tn I/O error occurred while reading from or writing to the file system. .El +.Sh TRANSITIONAL DESCRIPTION (NOW DEPRECATED) +The +.Fa statfs64 +and +.Fa fstatfs64 +routines are equivalent to their corresponding non-64-suffixed routine, +when 64-bit inodes are in effect. +They were added before there was support for the symbol variants, and so are +now deprecated. +Instead of using these, set the +.Dv _DARWIN_USE_64_BIT_INODE +macro before including header files to force 64-bit inode support. +.Pp +The +.Fa statfs64 +structure used by these deprecated routines is the same as the +.Fa statfs +structure when 64-bit inodes are in effect (see above). .Sh HISTORY The .Fn statfs -function first appeared in 4.4BSD. +function first appeared in 4.4BSD. The +.Fn statfs64 +and +.Fn fstatfs64 +first appeared in Max OS X 10.5 (Leopard) and are now deprecated +in favor of the corresponding symbol variants. diff --git a/bsd/man/man2/statfs64.2 b/bsd/man/man2/statfs64.2 new file mode 100644 index 000000000..3a64852e4 --- /dev/null +++ b/bsd/man/man2/statfs64.2 @@ -0,0 +1,3 @@ +.so man2/statfs.2 + + diff --git a/bsd/man/man2/syscall.2 b/bsd/man/man2/syscall.2 index 9de1e3922..be9b5dd17 100644 --- a/bsd/man/man2/syscall.2 +++ b/bsd/man/man2/syscall.2 @@ -33,20 +33,17 @@ .\" .\" @(#)syscall.2 8.1 (Berkeley) 6/16/93 .\" -.Dd June 16, 1993 +.Dd June 23, 2008 .Dt SYSCALL 2 .Os BSD 4 .Sh NAME -.Nm syscall , -.Nm __syscall +.Nm syscall .Nd indirect system call .Sh SYNOPSIS .Fd #include .Fd #include .Ft int .Fn syscall "int number" "..." -.Ft int -.Fn __syscall "quad_t number" "..." .Sh DESCRIPTION .Fn Syscall performs the system call whose assembly language @@ -55,11 +52,7 @@ interface has the specified with the specified arguments. Symbolic constants for system calls can be found in the header file .Ao Pa sys/syscall.h Ac . -The -.Nm __syscall -form should be used when one or more of the parameters is a -64-bit argument to ensure that argument alignment is correct. -This system call is useful for testing new system calls that +This function is useful for testing new system calls that do not have entries in the C library. .Sh RETURN VALUES The return values are defined by the system call being invoked. diff --git a/bsd/man/man2/truncate.2 b/bsd/man/man2/truncate.2 index 660045be5..b82853d56 100644 --- a/bsd/man/man2/truncate.2 +++ b/bsd/man/man2/truncate.2 @@ -53,20 +53,29 @@ .Fa "off_t length" .Fc .Sh DESCRIPTION -.Fn Truncate -causes the file named by -.Fa path +.Fn ftruncate +and +.Fn truncate +cause the file named by +.Fa path , or referenced by -.Fa fildes -to be truncated or extended to +.Fa fildes , +to be truncated (or extended) to .Fa length -bytes in size. If the file previously -was larger than this size, the extra data -is lost. If the file was smaller than this size, it will be extended as -if by writing bytes with the value zero. -With -.Fn ftruncate , -the file must be open for writing. +bytes in size. If the file size exceeds +.Fa length , +any extra data is discarded. If the file size is smaller than +.Fa length , +the file is extended and filled with zeros to the indicated length. +The +.Fn ftruncate +form requires the file to be open for writing. +.Pp +Note: +.Fn ftruncate +and +.Fn truncate +do not modify the current file offset for any open file descriptions associated with the file. .Sh RETURN VALUES A value of 0 is returned if the call succeeds. If the call fails a -1 is returned, and the global variable diff --git a/bsd/man/man2/umask.2 b/bsd/man/man2/umask.2 index faad5f1b5..a9e156109 100644 --- a/bsd/man/man2/umask.2 +++ b/bsd/man/man2/umask.2 @@ -66,7 +66,7 @@ requested in file mode. This clearing allows each user to restrict the default access to his files. .Pp -The default mask value is S_IWGRP|S_IWOTH (022, write access for the +The default mask value is S_IWGRP | S_IWOTH (022, write access for the owner only). Child processes inherit the mask of the calling process. .Sh RETURN VALUES diff --git a/bsd/man/man3/posix_spawnattr_setflags.3 b/bsd/man/man3/posix_spawnattr_setflags.3 index 78cd67398..8828a83df 100644 --- a/bsd/man/man3/posix_spawnattr_setflags.3 +++ b/bsd/man/man3/posix_spawnattr_setflags.3 @@ -67,13 +67,12 @@ The argument is either 0 or a logical OR of one or more of the following flags: .Bl -tag -width POSIX_SPAWN_START_SUSPENDED .It Dv POSIX_SPAWN_RESETIDS -If the set group bit is set on the process image being spawned, this -bit has no effect; otherwise, if not set, the child process will -inherit the effective group ID of the parent process, and if set, the -child process will inherit the real group ID of the parent process. +If this bit is set, the child process will inherit the real (rather than the effective) user and +group ID of the parent process. NOTE: This flag has no effect when the set-group bit is set on the +process image being spawned. .It Dv POSIX_SPAWN_SETPGROUP If this bit is not set, then the child process inherits the parent -process group; if set, then the child process shall behave as if the +process group; if it is set, then the child process will behave as if the .Xr setpgid 2 function had been called with a .Fa pid diff --git a/bsd/man/man4/Makefile b/bsd/man/man4/Makefile index ab182ec48..a0f336945 100644 --- a/bsd/man/man4/Makefile +++ b/bsd/man/man4/Makefile @@ -9,6 +9,8 @@ include $(MakeInc_def) DATAFILES = \ aio.4 \ arp.4 \ + audit.4 \ + auditpipe.4 \ bpf.4 \ divert.4 \ dummynet.4 \ diff --git a/bsd/man/man4/audit.4 b/bsd/man/man4/audit.4 new file mode 100644 index 000000000..f919bd59e --- /dev/null +++ b/bsd/man/man4/audit.4 @@ -0,0 +1,160 @@ +.\" Copyright (c) 2006 Robert N. M. Watson +.\" Copyright (c) 2009 Apple, Inc. +.\" All rights reserved. +.\" +.\" Redistribution and use in source and binary forms, with or without +.\" modification, are permitted provided that the following conditions +.\" are met: +.\" 1. Redistributions of source code must retain the above copyright +.\" notice, this list of conditions and the following disclaimer. +.\" 2. Redistributions in binary form must reproduce the above copyright +.\" notice, this list of conditions and the following disclaimer in the +.\" documentation and/or other materials provided with the distribution. +.\" +.\" THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND +.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +.\" ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE +.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS +.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY +.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF +.\" SUCH DAMAGE. +.\" +.\" $FreeBSD: src/share/man/man4/audit.4,v 1.10 2006/12/14 16:40:57 mpp Exp $ +.\" +.Dd March 23, 2009 +.Os +.Dt AUDIT 4 +.Sh NAME +.Nm audit +.Nd Security Event Audit +.Sh SYNOPSIS +.\" .Cd "options AUDIT" +.Fd #include +.Fd #include +.Fd #include +.Sh DESCRIPTION +Security Event Audit is a facility to provide fine-grained, configurable +logging of security-relevant events, and is intended to meet the requirements +of the Common Criteria (CC) Common Access Protection Profile (CAPP) +evaluation. +The +.Fx +and Mac OS X +.Nm +facility implements the de facto industry standard BSM API, file +formats, and command line interface, first found in the Solaris operating +system. +Information on the user space implementation can be found in +.Xr libbsm 3 . +.Pp +Audit support is enabled at boot, if present in the kernel, using an +.Xr rc.conf 5 +flag or, on Mac OS X, by editing the +.Pa /System/Library/LaunchDaemons/com.apple.auditd.plist +launchd plist file and removing the disabled key or changing its value +to false. +The audit daemon, +.Xr auditd 8 , +is responsible for configuring the kernel to perform +.Nm , +pushing +configuration data from the various audit configuration files into the +kernel. +.Ss Audit Special Device +The +.Fx +kernel +.Nm +facility provides a special device, +.Pa /dev/audit , +which is used by +.Xr auditd 8 +to monitor for +.Nm +events, such as requests to cycle the log, low disk +space conditions, and requests to terminate auditing. +This device is not intended for use by applications. +Mac OS X provides this same functionality using Mach IPC and +a host special port. +.Ss Audit Pipe Special Devices +Audit pipe special devices, discussed in +.Xr auditpipe 4 , +provide a configurable live tracking mechanism to allow applications to +tee the audit trail, as well as to configure custom preselection parameters +to track users and events in a fine-grained manner. +.Sh SEE ALSO +.Xr auditreduce 1 , +.Xr praudit 1 , +.Xr audit 2 , +.Xr auditctl 2 , +.Xr auditon 2 , +.Xr getaudit 2 , +.Xr getauid 2 , +.Xr poll 2 , +.Xr select 2 , +.Xr setaudit 2 , +.Xr setauid 2 , +.Xr libbsm 3 , +.Xr auditpipe 4 , +.Xr audit_class 5 , +.Xr audit_control 5 , +.Xr audit_event 5 , +.Xr audit.log 5 , +.Xr audit_user 5 , +.Xr audit_warn 5 , +.Xr launchd.plist 5 , +.Xr rc.conf 5 , +.Xr audit 8 , +.Xr auditd 8 +.Sh HISTORY +The +.Tn OpenBSM +implementation was created by McAfee Research, the security +division of McAfee Inc., under contract to Apple Computer Inc.\& in 2004. +It was subsequently adopted by the TrustedBSD Project as the foundation for +the OpenBSM distribution. +.Pp +Support for kernel +.Nm +first appeared in Mac OS X 10.3 and +.Fx 6.2 . +.Sh AUTHORS +.An -nosplit +This software was created by McAfee Research, the security research division +of McAfee, Inc., under contract to Apple Computer Inc. +Additional authors include +.An Wayne Salamon , +.An Stacey Son , +.An Robert Watson , +and SPARTA Inc. +.Pp +The Basic Security Module (BSM) interface to audit records and audit event +stream format were defined by Sun Microsystems. +.Pp +This manual page was written by +.An Robert Watson Aq rwatson@FreeBSD.org . +.Sh BUGS +The +.Nm +facility in +.Fx +is considered experimental, and production deployment should occur only after +careful consideration of the risks of deploying experimental software. +.Pp +The Mac OS X and +.Fx +kernel do not fully validate that audit records submitted by user +applications are syntactically valid BSM; as submission of records is limited +to privileged processes, this is not a critical bug. +.Pp +Instrumentation of auditable events in the kernel is not complete, as some +system calls do not generate audit records, or generate audit records with +incomplete argument information. +.Pp +Mandatory Access Control (MAC) labels, as provided by the +.Xr mac 4 +facility, are not audited as part of records involving MAC decisions. diff --git a/bsd/man/man4/auditpipe.4 b/bsd/man/man4/auditpipe.4 new file mode 100644 index 000000000..7e0d7cc3e --- /dev/null +++ b/bsd/man/man4/auditpipe.4 @@ -0,0 +1,258 @@ +.\" Copyright (c) 2006 Robert N. M. Watson +.\" All rights reserved. +.\" +.\" Redistribution and use in source and binary forms, with or without +.\" modification, are permitted provided that the following conditions +.\" are met: +.\" 1. Redistributions of source code must retain the above copyright +.\" notice, this list of conditions and the following disclaimer. +.\" 2. Redistributions in binary form must reproduce the above copyright +.\" notice, this list of conditions and the following disclaimer in the +.\" documentation and/or other materials provided with the distribution. +.\" +.\" THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND +.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +.\" ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE +.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS +.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY +.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF +.\" SUCH DAMAGE. +.\" +.\" $FreeBSD: src/share/man/man4/auditpipe.4,v 1.6 2008/05/02 17:36:22 rwatson Exp $ +.\" +.Dd May 5, 2006 +.Os +.Dt AUDITPIPE 4 +.Sh NAME +.Nm auditpipe +.Nd "pseudo-device for live audit event tracking" +.Sh SYNOPSIS +.Cd "pseudo-device auditpipe" +.Pp +.Fd #include +.Sh DESCRIPTION +While audit trail files +generated with +.Xr audit 4 +and maintained by +.Xr auditd 8 +provide a reliable long-term store for audit log information, current log +files are owned by the audit daemon until terminated making them somewhat +unwieldy for live monitoring applications such as host-based intrusion +detection. +For example, the log may be cycled and new records written to a new file +without notice to applications that may be accessing the file. +.Pp +The audit facility provides an audit pipe facility for applications requiring +direct access to live BSM audit data for the purposes of real-time +monitoring. +Audit pipes are available via a clonable special device, +.Pa /dev/auditpipe , +subject to the permissions on the device node, and provide a +.Qq tee +of the audit event stream. +As the device is clonable, more than one instance of the device may be opened +at a time; each device instance will provide independent access to all +records. +.Pp +The audit pipe device provides discrete BSM audit records; if the read buffer +passed by the application is too small to hold the next record in the +sequence, it will be dropped. +Unlike audit data written to the audit trail, the reliability of record +delivery is not guaranteed. +In particular, when an audit pipe queue fills, records will be dropped. +Audit pipe devices are blocking by default, but support non-blocking I/O, +asynchronous I/O using +.Dv SIGIO , +and polled operation via +.Xr select 2 +and +.Xr poll 2 . +.Pp +Applications may choose to track the global audit trail, or configure local +preselection parameters independent of the global audit trail parameters. +.Ss Audit Pipe Queue Ioctls +The following ioctls retrieve and set various audit pipe record queue +properties: +.Bl -tag -width ".Dv AUDITPIPE_GET_MAXAUDITDATA" +.It Dv AUDITPIPE_GET_QLEN +Query the current number of records available for reading on the pipe. +.It Dv AUDITPIPE_GET_QLIMIT +Retrieve the current maximum number of records that may be queued for reading +on the pipe. +.It Dv AUDITPIPE_SET_QLIMIT +Set the current maximum number of records that may be queued for reading on +the pipe. +The new limit must fall between the queue limit minimum and queue limit +maximum queryable using the following two ioctls. +.It Dv AUDITPIPE_GET_QLIMIT_MIN +Query the lowest possible maximum number of records that may be queued for +reading on the pipe. +.It Dv AUDITPIPE_GET_QLIMIT_MAX +Query the highest possible maximum number of records that may be queued for +reading on the pipe. +.It Dv AUDITPIPE_FLUSH +Flush all outstanding records on the audit pipe; useful after setting initial +preselection properties to delete records queued during the configuration +process which may not match the interests of the user process. +.It Dv AUDITPIPE_GET_MAXAUDITDATA +Query the maximum size of an audit record, which is a useful minimum size for +a user space buffer intended to hold audit records read from the audit pipe. +.El +.Ss Audit Pipe Preselection Mode Ioctls +By default, the audit pipe facility configures pipes to present records +matched by the system-wide audit trail, configured by +.Xr auditd 8 . +However, the preselection mechanism for audit pipes can be configured using +alternative criteria, including pipe-local flags and naflags settings, as +well as auid-specific selection masks. +This allows applications to track events not captured in the global audit +trail, as well as limit records presented to those of specific interest to +the application. +.Pp +The following ioctls configure the preselection mode on an audit pipe: +.Bl -tag -width ".Dv AUDITPIPE_GET_PRESELECT_MODE" +.It Dv AUDITPIPE_GET_PRESELECT_MODE +Return the current preselect mode on the audit pipe. +The ioctl argument should be of type +.Vt int . +.It Dv AUDITPIPE_SET_PRESELECT_MODE +Set the current preselection mode on the audit pipe. +The ioctl argument should be of type +.Vt int . +.El +.Pp +Possible preselection mode values are: +.Bl -tag -width ".Dv AUDITPIPE_PRESELECT_MODE_TRAIL" +.It Dv AUDITPIPE_PRESELECT_MODE_TRAIL +Use the global audit trail preselection parameters to select records for the +audit pipe. +.It Dv AUDITPIPE_PRESELECT_MODE_LOCAL +Use local audit pipe preselection; this model is similar to the global audit +trail configuration model, consisting of global flags and naflags parameters, +as well as a set of per-auid masks. +These parameters are configured using further ioctls. +.El +.Pp +After changing the audit pipe preselection mode, records selected under +earlier preselection configuration may still be in the audit pipe queue. +The application may flush the current record queue after changing the +configuration to remove possibly undesired records. +.Ss Audit Pipe Local Preselection Mode Ioctls +The following ioctls configure the preselection parameters used when an audit +pipe is configured for the +.Dv AUDITPIPE_PRESELECT_MODE_LOCAL +preselection mode. +.Bl -tag -width ".Dv AUDITPIPE_GET_PRESELECT_NAFLAGS" +.It Dv AUDITPIPE_GET_PRESELECT_FLAGS +Retrieve the current default preselection flags for attributable events on +the pipe. +These flags correspond to the +.Va flags +field in +.Xr audit_control 5 . +The ioctl argument should be of type +.Vt u_int . +.It Dv AUDITPIPE_SET_PRESELECT_FLAGS +Set the current default preselection flags for attributable events on the +pipe. +These flags correspond to the +.Va flags +field in +.Xr audit_control 5 . +The ioctl argument should be of type +.Vt u_int . +.It Dv AUDITPIPE_GET_PRESELECT_NAFLAGS +Retrieve the current default preselection flags for non-attributable events +on the pipe. +These flags correspond to the +.Va naflags +field in +.Xr audit_control 5 . +The ioctl argument should be of type +.Vt u_int . +.It Dv AUDITPIPE_SET_PRESELECT_NAFLAGS +Set the current default preselection flags for non-attributable events on the +pipe. +These flags correspond to the +.Va naflags +field in +.Xr audit_control 5 . +The ioctl argument should be of type +.Vt u_int . +.It Dv AUDITPIPE_GET_PRESELECT_AUID +Query the current preselection masks for a specific auid on the pipe. +The ioctl argument should be of type +.Vt "struct auditpipe_ioctl_preselect" . +The auid to query is specified via the +.Va ap_auid +field of type +.Vt au_id_t ; +the mask will be returned via +.Va ap_mask +of type +.Vt au_mask_t . +.It Dv AUDITPIPE_SET_PRESELECT_AUID +Set the current preselection masks for a specific auid on the pipe. +Arguments are identical to +.Dv AUDITPIPE_GET_PRESELECT_AUID , +except that the caller should properly initialize the +.Va ap_mask +field to hold the desired preselection mask. +.It Dv AUDITPIPE_DELETE_PRESELECT_AUID +Delete the current preselection mask for a specific auid on the pipe. +Once called, events associated with the specified auid will use the default +flags mask. +The ioctl argument should be of type +.Vt au_id_t . +.It Dv AUDITPIPE_FLUSH_PRESELECT_AUID +Delete all auid specific preselection specifications. +.El +.Sh EXAMPLES +The +.Xr praudit 1 +utility +may be directly executed on +.Pa /dev/auditpipe +to review the default audit trail. +.Sh SEE ALSO +.Xr poll 2 , +.Xr select 2 , +.Xr audit 4 , +.Xr audit_control 5 , +.Xr audit 8 , +.Xr auditd 8 +.Sh HISTORY +The OpenBSM implementation was created by McAfee Research, the security +division of McAfee Inc., under contract to Apple Computer Inc.\& in 2004. +It was subsequently adopted by the TrustedBSD Project as the foundation for +the OpenBSM distribution. +.Pp +Support for kernel audit first appeared in Mac OS X 10.3 and +.Fx 6.2 . +.Sh AUTHORS +The audit pipe facility was designed and implemented by +.An Robert Watson Aq rwatson@FreeBSD.org . +.Pp +The Basic Security Module (BSM) interface to audit records and audit event +stream format were defined by Sun Microsystems. +.Sh BUGS +See the +.Xr audit 4 +manual page for information on audit-related bugs and limitations. +.Pp +The configurable preselection mechanism mirrors the selection model present +for the global audit trail. +It might be desirable to provided a more flexible selection model. +.Pp +The per-pipe audit event queue is fifo, with drops occurring if either the +user thread provides in sufficient for the record on the queue head, or on +enqueue if there is insufficient room. +It might be desirable to support partial reads of records, which would be +more compatible with buffered I/O as implemented in system libraries, and to +allow applications to select which records are dropped, possibly in the style +of preselection. diff --git a/bsd/man/man4/bpf.4 b/bsd/man/man4/bpf.4 index 17b9876c3..289a59071 100644 --- a/bsd/man/man4/bpf.4 +++ b/bsd/man/man4/bpf.4 @@ -82,10 +82,6 @@ and drivers have been modified to interact with .Nm . .Pp -Since packet data is in network byte order, applications should use the -.Xr byteorder 3 -macros to extract multi-byte values. -.Pp A packet can be sent out on the network by writing to a .Nm file descriptor. The writes are unbuffered, meaning only one @@ -155,6 +151,32 @@ The device types, prefixed with .Dq Li DLT_ , are defined in .Aq Pa net/bpf.h . +.It Dv BIOCSDLT +.Pq Li u_int +Used to specify the type of data link layer of the interface +attached to the bpf descriptor. If the current interface is +not of the given type then the descriptor will be reattached +to an interface of the given type. If the descriptor has +promiscuous mode set, the new interface will be moved to +promiscuous mode. +.Er EINVAL +is returned if no interface has been specified. +The device types, prefixed with +.Dq Li DLT_ , +are defined in +.Aq Pa net/bpf.h . +.It Dv BIOCGDLTLIST +.Pq Li struct bpf_dltlist +Returns a list of data link types of the the given interface. +A user allocated buffer to hold the list and length of expected +list should be provided in struct bpf_dltlist, defined in +.Aq Pa net/bpf.h . +.Er EINVAL +is returned if no interface has been specified. +The device types, prefixed with +.Dq Li DLT_ , +are defined in +.Aq Pa net/bpf.h . .It Dv BIOCPROMISC Forces the interface into promiscuous mode. All packets, not just those destined for the local host, are processed. @@ -295,6 +317,12 @@ interface should be returned by BPF. Set to zero to see only incoming packets on the interface. Set to one to see packets originating locally and remotely on the interface. This flag is initialized to one by default. +.It Dv BIOCGRSIG +.Pq Li u_int +Returns the signal that will be sent to a process waiting on the bpf descriptor upon packet reception. The default is SIGIO. +.It Dv BIOCSRSIG +.Pq Li u_int +Sets the signal that should be sent to a process waiting on bpf descriptor upon packet reception. The default is SIGIO. .El .Sh BPF HEADER The following structure is prepended to each packet returned by @@ -694,9 +722,7 @@ struct bpf_insn insns[] = { .Ed .Sh SEE ALSO .Xr tcpdump 1 , -.Xr ioctl 2 , -.Xr byteorder 3 , -.Xr ng_bpf 4 +.Xr ioctl 2 .Rs .%A McCanne, S. .%A Jacobson V. diff --git a/bsd/man/man4/route.4 b/bsd/man/man4/route.4 index 06e142b1a..34088e51a 100644 --- a/bsd/man/man4/route.4 +++ b/bsd/man/man4/route.4 @@ -194,11 +194,11 @@ Messages include: A message header consists of: .Bd -literal struct rt_msghdr { - u_short rmt_msglen; /* to skip over non-understood messages */ + u_short rtm_msglen; /* to skip over non-understood messages */ u_char rtm_version; /* future binary compatibility */ u_char rtm_type; /* message type */ - u_short rmt_index; /* index for associated ifp */ - pid_t rmt_pid; /* identify sender */ + u_short rtm_index; /* index for associated ifp or interface scope */ + pid_t rtm_pid; /* identify sender */ int rtm_addrs; /* bitmask identifying sockaddrs in msg */ int rtm_seq; /* for sender to identify action */ int rtm_errno; /* why failed */ @@ -241,6 +241,7 @@ Flags include the values: #define RTF_BLACKHOLE 0x1000 /* just discard pkts (during updates) */ #define RTF_PROTO2 0x4000 /* protocol specific routing flag #1 */ #define RTF_PROTO1 0x8000 /* protocol specific routing flag #2 */ +#define RTF_IFSCOPE 0x1000000 /* has valid interface scope */ .Ed .Pp Specifiers for metric values in rmx_locks and rtm_inits are: diff --git a/bsd/man/man4/tcp.4 b/bsd/man/man4/tcp.4 index 18a5f98bb..f7ecb4087 100644 --- a/bsd/man/man4/tcp.4 +++ b/bsd/man/man4/tcp.4 @@ -33,81 +33,92 @@ .\" .\" @(#)tcp.4 8.1 (Berkeley) 6/5/93 .\" -.Dd June 5, 1993 +.Dd February 28, 2007 .Dt TCP 4 .Os BSD 4.2 .Sh NAME .Nm tcp .Nd Internet Transmission Control Protocol .Sh SYNOPSIS -.Fd #include -.Fd #include +.In sys/types.h +.In sys/socket.h +.In netinet/in.h .Ft int .Fn socket AF_INET SOCK_STREAM 0 .Sh DESCRIPTION The .Tn TCP protocol provides reliable, flow-controlled, two-way -transmission of data. It is a byte-stream protocol used to +transmission of data. +It is a byte-stream protocol used to support the .Dv SOCK_STREAM -abstraction. TCP uses the standard +abstraction. +.Tn TCP +uses the standard Internet address format and, in addition, provides a per-host collection of -.Dq port addresses . +.Dq "port addresses" . Thus, each address is composed -of an Internet address specifying the host and network, with -a specific +of an Internet address specifying the host and network, +with a specific .Tn TCP port on the host identifying the peer entity. .Pp -Sockets utilizing the tcp protocol are either +Sockets utilizing the +.Tn TCP +protocol are either .Dq active or .Dq passive . Active sockets initiate connections to passive -sockets. By default +sockets. +By default, .Tn TCP sockets are created active; to create a -passive socket the +passive socket, the .Xr listen 2 system call must be used after binding the socket with the .Xr bind 2 -system call. Only -passive sockets may use the +system call. +Only passive sockets may use the .Xr accept 2 -call to accept incoming connections. Only active sockets may -use the +call to accept incoming connections. +Only active sockets may use the .Xr connect 2 call to initiate connections. .Pp Passive sockets may .Dq underspecify their location to match -incoming connection requests from multiple networks. This -technique, termed -.Dq wildcard addressing , +incoming connection requests from multiple networks. +This technique, termed +.Dq "wildcard addressing" , allows a single server to provide service to clients on multiple networks. To create a socket which listens on all networks, the Internet address .Dv INADDR_ANY -must be bound. The +must be bound. +The .Tn TCP port may still be specified -at this time; if the port is not specified the system will assign one. -Once a connection has been established the socket's address is -fixed by the peer entity's location. The address assigned the +at this time; if the port is not specified, the system will assign one. +Once a connection has been established, the socket's address is +fixed by the peer entity's location. +The address assigned to the socket is the address associated with the network interface -through which packets are being transmitted and received. Normally -this address corresponds to the peer entity's network. +through which packets are being transmitted and received. +Normally, this address corresponds to the peer entity's network. .Pp .Tn TCP -supports one socket option which is set with +supports a number of socket options which can be set with .Xr setsockopt 2 and tested with -.Xr getsockopt 2 . +.Xr getsockopt 2 : +.Bl -tag -width ".Dv TCP_CONNECTIONTIMEOUT" +.It Dv TCP_NODELAY Under most circumstances, .Tn TCP sends data when it is presented; @@ -117,19 +128,75 @@ an acknowledgement is received. For a small number of clients, such as window systems that send a stream of mouse events which receive no replies, this packetization may cause significant delays. -Therefore, -.Tn TCP -provides a boolean option, +The boolean option .Dv TCP_NODELAY -(from -.Aq Pa netinet/tcp.h , -to defeat this algorithm. +defeats this algorithm. +.It Dv TCP_MAXSEG +By default, a sender- and +.No receiver- Ns Tn TCP +will negotiate among themselves to determine the maximum segment size +to be used for each connection. +The +.Dv TCP_MAXSEG +option allows the user to determine the result of this negotiation, +and to reduce it if desired. +.It Dv TCP_NOOPT +.Tn TCP +usually sends a number of options in each packet, corresponding to +various +.Tn TCP +extensions which are provided in this implementation. +The boolean option +.Dv TCP_NOOPT +is provided to disable +.Tn TCP +option use on a per-connection basis. +.It Dv TCP_NOPUSH +By convention, the +.No sender- Ns Tn TCP +will set the +.Dq push +bit, and begin transmission immediately (if permitted) at the end of +every user call to +.Xr write 2 +or +.Xr writev 2 . +When this option is set to a non-zero value, +.Tn TCP +will delay sending any data at all until either the socket is closed, +or the internal send buffer is filled. +.It Dv TCP_KEEPALIVE +.Tn The +.Dv TCP_KEEPALIVE +options enable to specify the amount of time, in seconds, that the +connection must be idle before keepalive probes (if enabled) are sent. +The default value is specified by the +.Tn MIB +variable +.Va net.inet.tcp.keepidle . +.It Dv TCP_CONNECTIONTIMEOUT +.Tn The +.Dv TCP_CONNECTIONTIMEOUT +option allows to specify the timeout, in seconds, for new, non established +.Tn TCP +connections. This option can be useful for both active and passive +.Tn TCP +connections. The default value is specified by the +.Tn MIB +variable +.Va net.inet.tcp.keepinit . +.El +.Pp The option level for the -.Xr setsockopt +.Xr setsockopt 2 call is the protocol number for .Tn TCP , available from -.Xr getprotobyname 3 . +.Xr getprotobyname 3 , +or +.Dv IPPROTO_TCP . +All options are declared in +.In netinet/tcp.h . .Pp Options at the .Tn IP @@ -139,9 +206,36 @@ see .Xr ip 4 . Incoming connection requests that are source-routed are noted, and the reverse source route is used in responding. +.Ss "Non-blocking connect" +.Pp +When a +.Tn TCP +socket is set non-blocking, and the connection cannot be established immediatly, +.Xr connect 2 +returns with the error +.Dv EINPROGRESS , +and the connection is established asynchronously. +.Pp +When the asynchronous connection completes successfully, +.Xr select 2 +or +.Xr poll 2 +or +.Xr kqueue 2 +will indicate the file descriptor is ready for writing. +If the connection encounters an error, the file descriptor +is marked ready for both reading and writing, and the pending error +can be retrieved via the socket option +.Dv SO_ERROR . +.Pp +Note that even if the socket is non-blocking, it is possible for the connection +to be established immediatly. In that case +.Xr connect 2 +does not return with +.Dv EINPROGRESS . .Sh DIAGNOSTICS A socket operation may fail with one of the following errors returned: -.Bl -tag -width [EADDRNOTAVAIL] +.Bl -tag -width Er .It Bq Er EISCONN when trying to establish a connection on a socket which already has one; @@ -163,18 +257,43 @@ when an attempt is made to create a socket with a port which has already been allocated; .It Bq Er EADDRNOTAVAIL -when an attempt is made to create a +when an attempt is made to create a socket with a network address for which no network interface -exists. +exists; +.It Bq Er EAFNOSUPPORT +when an attempt is made to bind or connect a socket to a multicast +address; +.It Bq Er EINPROGRESS +returned by +.Xr connect 2 +when the socket is set nonblocking, and the connection cannot be +immediately established; +.It Bq Er EALREADY +returned by +.Xr connect 2 +when connection request is already in progress for the specified socket. +. .El .Sh SEE ALSO +.Xr connect 2 , .Xr getsockopt 2 , +.Xr kqueue 2 , +.Xr poll 2 , +.Xr select 2 , .Xr socket 2 , +.Xr sysctl 3 , .Xr inet 4 , -.Xr intro 4 , -.Xr ip 4 +.Xr inet6 4 , +.Xr ip 4 , +.Xr ip6 4 , +.Xr netintro 4 , +.Xr setkey 8 .Sh HISTORY The -.Nm -protocol stack appeared in +.Tn TCP +protocol appeared in .Bx 4.2 . +.Pp +The socket option +.Dv TCP_CONNECTIONTIMEOUT +first appeared in Mac OS X 10.6. diff --git a/bsd/man/man5/core.5 b/bsd/man/man5/core.5 index dae3c4c51..a45898f47 100644 --- a/bsd/man/man5/core.5 +++ b/bsd/man/man5/core.5 @@ -1,4 +1,4 @@ -.\""Copyright (c) 2001 Apple Computer, Inc. All Rights Reserved. +.\""Copyright (c) 2001-2007 Apple Inc. All Rights Reserved. .\"The contents of this file constitute Original Code as defined in and are .\"subject to the Apple Public Source License Version 1.2 (the 'License'). .\"You may not use this file except in compliance with the @@ -12,7 +12,7 @@ .\"WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, .\"QUIET ENJOYMENT OR NON-INFRINGEMENT. Please see the License for the .\"specific language governing rights and limitations under the License." -.Dd March 18, 2002 +.Dd June 26, 2008 .Dt CORE 5 .Os .Sh NAME @@ -52,17 +52,30 @@ file consists of various sections described in the header. .Sh NOTE Core dumps are disabled by default under Darwin/Mac OS X. To re-enable -core dumps, a privileged user must edit -.Pa /etc/hostconfig -to contain the line: -.Bd -literal -COREDUMPS=-YES- -.Ed +core dumps, a privileged user must do one of the following +.Pp +* Edit +.Pa /etc/launchd.conf +or +.Pa $HOME/.launchd.conf +and add a line specifying the limit +.Pa limit core unlimited +.Pp +* A privileged user can also enable cores with +.Pa launchctl limit core unlimited +.Pp +* A privileged user can also enable core files by using +.Xr ulimit 1 +or +.Xr limit 1 +depending upon the shell. .Sh SEE ALSO .Xr gdb 1 , .Xr setrlimit 2 , .Xr sigaction 2 , .Xr Mach-O 5 , +.Xr launchd.conf 5 , +.Xr launchd.plist 5 , .Xr sysctl 8 .Sh HISTORY A diff --git a/bsd/man/man5/dir.5 b/bsd/man/man5/dir.5 index 891e5db37..6f2eacb60 100644 --- a/bsd/man/man5/dir.5 +++ b/bsd/man/man5/dir.5 @@ -84,7 +84,14 @@ and The directory entry format is defined in the file .Aq sys/dirent.h and further in the file -.Aq dirent.h : +.Aq dirent.h . +When the macro +.Dv _DARWIN_FEATURE_64_BIT_INODE +is not defined (the +.Ft ino_t +type is 32-bits), the +.Fa dirent +structure is defined as: .Bd -literal /*** Excerpt from ***/ /* @@ -94,21 +101,54 @@ and further in the file * inode number, the length of the entry, and the length of the name * contained in the entry. These are followed by the name padded to a 4 * byte boundary with null bytes. All names are guaranteed null terminated. - * The maximum length of a name in a directory is MAXPATHLEN. + * The maximum length of a name in a directory is 255. */ -#ifndef _SYS_DIRENT_H -#define _SYS_DIRENT_H - -struct dirent { - ino_t d_ino; /* file number of entry */ - u_int64_t d_seekoff; /* length of this record */ - u_int16_t d_reclen; /* length of this record */ - u_int16_t d_namlen; /* length of string in d_name */ - u_int8_t d_type; /* file type, see below */ - char d_name[MAXPATHLEN]; /* name must be no longer than this */ +struct dirent { /* when _DARWIN_FEATURE_64_BIT_INODE is NOT defined */ + ino_t d_ino; /* file number of entry */ + __uint16_t d_reclen; /* length of this record */ + __uint8_t d_type; /* file type, see below */ + __uint8_t d_namlen; /* length of string in d_name */ + char d_name[255 + 1]; /* name must be no longer than this */ }; +.Ed +.Pp +However, when the macro +.Dv _DARWIN_FEATURE_64_BIT_INODE +is defined, the +.Ft ino_t +type will be 64-bits (force 64-bit inode mode by defining the +.Dv _DARWIN_USE_64_BIT_INODE +macro before including header files). +This will cause symbol variants of the directory routines, with the +.Fa $INODE64 +suffixes, to be automatically linked in. +In addition, the +.Fa dirent +structure will now be defined as: +.Bd -literal +/* + * The dirent structure defines the format of directory entries. + * + * A directory entry has a struct dirent at the front of it, containing its + * inode number, the length of the entry, and the length of the name + * contained in the entry. These are followed by the name padded to a 4 + * byte boundary with null bytes. All names are guaranteed null terminated. + * The maximum length of a name in a directory is 1023. + */ +struct dirent { /* when _DARWIN_FEATURE_64_BIT_INODE is defined */ + ino_t d_fileno; /* file number of entry */ + __uint16_t d_seekoff; /* seek offset (optional, used by servers) */ + __uint16_t d_reclen; /* length of this record */ + __uint16_t d_namlen; /* length of string in d_name */ + __uint8_t d_type; /* file type, see below */ + char d_name[1024]; /* name must be no longer than this */ +}; +.Ed +.Pp +In addition: +.Bd -literal /* * File types */ @@ -122,15 +162,12 @@ struct dirent { #define DT_SOCK 12 #define DT_WHT 14 -#endif /* !_SYS_DIRENT_H_ */ - .Ed ----------------------------------------- .Bd -literal /*** Excerpt from ***/ -#ifndef _DIRENT_H -#define _DIRENT_H +#define d_fileno d_ino /* backward compatibility */ /* definitions for library routines operating on directories. */ #define DIRBLKSIZ 1024 @@ -138,28 +175,26 @@ struct dirent { struct _telldir; /* see telldir.h */ /* structure describing an open directory. */ -typedef struct { - int __dd_fd; /* file descriptor associated with directory */ - long __dd_loc; /* offset in current buffer */ - long __dd_size; /* amount of data returned by getdirentries */ - char *__dd_buf; /* data buffer */ - int __dd_len; /* size of data buffer */ - long __dd_seek; /* magic cookie returned by getdirentries */ - long __dd_rewind; /* magic cookie for rewinding */ - int __dd_flags; /* flags for readdir */ +typedef struct _dirdesc { + int __dd_fd; /* file descriptor associated with directory */ + long __dd_loc; /* offset in current buffer */ + long __dd_size; /* amount of data returned by getdirentries */ + char *__dd_buf; /* data buffer */ + int __dd_len; /* size of data buffer */ + long __dd_seek; /* magic cookie returned by getdirentries */ + long __dd_rewind; /* magic cookie for rewinding */ + int __dd_flags; /* flags for readdir */ pthread_mutex_t __dd_lock; /* for thread locking */ struct _telldir *__dd_td; /* telldir position recording */ } DIR; -#define dirfd(dirp) ((dirp)->__dd_fd) +#define dirfd(dirp) ((dirp)->dd_fd) /* flags for opendir2 */ #define DTF_HIDEW 0x0001 /* hide whiteout entries */ #define DTF_NODUP 0x0002 /* don't return duplicate names */ #define DTF_REWIND 0x0004 /* rewind after reading union stack */ #define __DTF_READALL 0x0008 /* everything has been read */ - -#endif /* !_DIRENT_H_ */ .Ed .Sh SEE ALSO .Xr fs 5 , diff --git a/bsd/man/man5/types.5 b/bsd/man/man5/types.5 index 9f030a865..27f61cfd7 100644 --- a/bsd/man/man5/types.5 +++ b/bsd/man/man5/types.5 @@ -33,7 +33,7 @@ .\" .\" @(#)types.5 8.1 (Berkeley) 6/5/93 .\" -.Dd AUGUST 2, 2001 +.Dd May 15, 2008 .Dt TYPES 5 .Os Darwin .Sh NAME @@ -85,21 +85,23 @@ typedef _TIME_T_ time_t; #undef _TIME_T_ #endif -#ifndef _POSIX_SOURCE -typedef struct _uquad { unsigned long val[2]; } u_quad; -typedef struct _quad { long val[2]; } quad; -#endif -typedef long * qaddr_t; /* should be typedef quad * qaddr_t; */ +typedef u_int64_t u_quad_t; +typedef int64_t quad_t +typedef quad_t * qaddr_t; /* should be typedef quad * qaddr_t; */ typedef long daddr_t; typedef char * caddr_t; -typedef u_int64_t ino_t; +#ifdef _DARWIN_FEATURE_64_BIT_INODE +typedef u_int64_t ino_t; +#else /* !_DARWIN_FEATURE_64_BIT_INODE */ +typedef u_int ino_t; +#endif /* _DARWIN_FEATURE_64_BIT_INODE */ typedef long swblk_t; typedef long segsz_t; -typedef long off_t; -typedef u_short uid_t; -typedef u_short gid_t; -typedef short pid_t; +typedef int64_t off_t; +typedef u_int uid_t; +typedef u_int gid_t; +typedef int pid_t; typedef u_short nlink_t; typedef u_short mode_t; typedef u_long fixpt_t; diff --git a/bsd/man/man9/copy.9 b/bsd/man/man9/copy.9 index 13c2fe044..5577a1200 100644 --- a/bsd/man/man9/copy.9 +++ b/bsd/man/man9/copy.9 @@ -1,3 +1,26 @@ +.\" +.\" Copyright (c) 2008 Apple Inc. All rights reserved. +.\" +.\" @APPLE_LICENSE_HEADER_START@ +.\" +.\" This file contains Original Code and/or Modifications of Original Code +.\" as defined in and that are subject to the Apple Public Source License +.\" Version 2.0 (the 'License'). You may not use this file except in +.\" compliance with the License. Please obtain a copy of the License at +.\" http://www.opensource.apple.com/apsl/ and read it before using this +.\" file. +.\" +.\" The Original Code and all software distributed under the License are +.\" distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER +.\" EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, +.\" INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, +.\" FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. +.\" Please see the License for the specific language governing rights and +.\" limitations under the License. +.\" +.\" @APPLE_LICENSE_HEADER_END@ +.\" +.\" .\" $NetBSD: copy.9,v 1.2 1996/01/09 03:23:04 thorpej Exp $ .\" .\" Copyright (c) 1996 Jason R. Thorpe. @@ -34,7 +57,7 @@ .\" .\" $FreeBSD: src/share/man/man9/copy.9,v 1.6.2.5 2001/12/17 11:30:18 ru Exp $ .\" -.Dd January 7, 1996 +.Dd October 2, 2008 .Dt COPY 9 .Os .Sh NAME @@ -144,21 +167,47 @@ NUL, is returned in .Sh RETURN VALUES The .Nm -functions return 0 on success or -.Er EFAULT -if a bad address is encountered. -In addition, the -.Fn copystr , +functions return 0 on success or the following error on failure: +.\" ======== +.Bl -tag -width Er +.It Bq EFAULT +If a bad address is encountered. When this error is returned, the contents of the destination buffer ( +.Fa *kaddr +for +.Fn copyin , +.Fn copyinstr , and +.Fn copystr ; +.Fa *uaddr +for +.Fn copyout ) +are undefined. For .Fn copyinstr +and +.Fn copystr , +the contents of the +.Fa *done +parameter are also undefined on a return of EFAULT. +.El +.Pp +In addition to EFAULT, +.\" .Fn copystr , .\" .Fn copyinstr , .\" and .\" .Fn copyoutstr -functions return -.Er ENAMETOOLONG -if the string is longer than +.Fn copystr +and +.Fn copyinstr +on failure will return: +.\" ======== +.Bl -tag -width Er +.It Bq ENAMETOLONG +When the string is longer than .Pa len -bytes. +bytes. On this error return, the destination buffer is not null-terminated, but the +.Fa *done +parameter is maintained. +.EL .Sh SEE ALSO .Xr fetch 9 , .Xr store 9 diff --git a/bsd/miscfs/Makefile b/bsd/miscfs/Makefile index 0aade1967..009da4c3f 100644 --- a/bsd/miscfs/Makefile +++ b/bsd/miscfs/Makefile @@ -9,7 +9,7 @@ include $(MakeInc_def) INSTINC_SUBDIRS = \ devfs \ - fdesc \ + fifofs \ specfs \ union @@ -19,9 +19,8 @@ INSTINC_SUBDIRS_I386 = \ EXPINC_SUBDIRS = \ devfs \ - fdesc \ - specfs \ - union + fifofs \ + specfs EXPINC_SUBDIRS_PPC = \ diff --git a/bsd/miscfs/devfs/Makefile b/bsd/miscfs/devfs/Makefile index 2996cf090..bb2e43304 100644 --- a/bsd/miscfs/devfs/Makefile +++ b/bsd/miscfs/devfs/Makefile @@ -20,15 +20,17 @@ EXPINC_SUBDIRS_PPC = \ EXPINC_SUBDIRS_I386 = \ DATAFILES = \ - devfs.h devfs_proto.h devfsdefs.h - -PRIVATE_DATAFILES = + devfs.h INSTALL_MI_LIST = ${DATAFILES} INSTALL_MI_DIR = miscfs/devfs -EXPORT_MI_LIST = ${DATAFILES} +INSTALL_KF_MI_LIST = ${DATAFILES} + +INSTALL_KF_MI_LCL_LIST = ${DATAFILES} devfs_proto.h devfsdefs.h + +EXPORT_MI_LIST = ${DATAFILES} fdesc.h devfs_proto.h devfsdefs.h EXPORT_MI_DIR = miscfs/devfs diff --git a/bsd/miscfs/fdesc/fdesc_vnops.c b/bsd/miscfs/devfs/devfs_fdesc_support.c similarity index 76% rename from bsd/miscfs/fdesc/fdesc_vnops.c rename to bsd/miscfs/devfs/devfs_fdesc_support.c index 68ff35dfa..c15f3df53 100644 --- a/bsd/miscfs/fdesc/fdesc_vnops.c +++ b/bsd/miscfs/devfs/devfs_fdesc_support.c @@ -89,9 +89,11 @@ #include #include #include -#include #include #include +#include +#include +#include /* XXX should be prototyped in header for here, kern_descrip.c */ extern int soo_stat(struct socket *so, void *ub, int isstat64); @@ -114,22 +116,96 @@ u_long fdhash; static int fdesc_attr(int fd, struct vnode_attr *vap, vfs_context_t a_context); +lck_mtx_t fdesc_mtx; +lck_grp_t *fdesc_lckgrp; + +static void +fdesc_lock(void) +{ + lck_mtx_lock(&fdesc_mtx); +} + +static void +fdesc_unlock(void) +{ + lck_mtx_unlock(&fdesc_mtx); +} + /* - * Initialise cache headers + * Initialise cache headers, create the devfs node */ int -fdesc_init(__unused struct vfsconf *vfsp) +devfs_fdesc_init() { - + int error = 0; + devnode_t *rootdir = dev_root->de_dnp; + devdirent_t *direntp; + + /* XXX Make sure you have the right path... */ fdhashtbl = hashinit(NFDCACHE, M_CACHE, &fdhash); + fdesc_lckgrp = lck_grp_alloc_init("fdesc", NULL); + lck_mtx_init(&fdesc_mtx, fdesc_lckgrp, NULL); - return( 0 ); + DEVFS_LOCK(); + dev_add_entry("fd", rootdir, DEV_DEVFD, NULL, NULL, NULL, &direntp); + devfs_fdesc_makelinks(); + DEVFS_UNLOCK(); + + return(error); } +/* + * Called during early startup, no need to synchronize + */ int -fdesc_allocvp(fdntype ftype, int ix, struct mount *mp, struct vnode **vpp, - enum vtype vtype) +devfs_fdesc_makelinks() +{ + int error = 0; + devdirent_t *stdin_ent = NULL, *stdout_ent = NULL, *stderr_ent = NULL; + devnode_t *root_devnode = dev_root->de_dnp; + + /* We do this ugliness to get around some "const" warnings */ + char in[] = "stdin"; + char out[] = "stdout"; + char err[] = "stderr"; + char zero[] = "fd/0"; + char one[] = "fd/1"; + char two[] = "fd/2"; + + if ((error = devfs_make_symlink(root_devnode, in, 0555, zero, &stdin_ent))) { + printf("Couldn't make stdin, err %d.\n", error); + goto bad; + } + + if ((error = devfs_make_symlink(root_devnode, out, 0555, one, &stdout_ent))) { + printf("Couldn't make stdout, err %d.\n", error); + goto bad; + } + + if ((error = devfs_make_symlink(root_devnode, err, 0555, two, &stderr_ent))) { + printf("Couldn't make stderr, err %d.\n", error); + goto bad; + } + + return 0; + +bad: + if (stdin_ent) { + dev_free_name(stdin_ent); + } + if (stdout_ent) { + dev_free_name(stdout_ent); + } + if (stderr_ent) { + dev_free_name(stderr_ent); + } + + return error; +} + +int +fdesc_allocvp(fdntype ftype, int ix, struct mount *mp, struct vnode **vpp, enum vtype vtype, int fdno) { struct fdhashhead *fc; struct fdescnode *fd; @@ -137,22 +213,18 @@ fdesc_allocvp(fdntype ftype, int ix, struct mount *mp, struct vnode **vpp, int vid = 0; struct vnode_fsparam vfsp; + fdesc_lock(); + fc = FD_NHASH(ix); loop: for (fd = fc->lh_first; fd != 0; fd = fd->fd_hash.le_next) { if (fd->fd_ix == ix && vnode_mount(fd->fd_vnode) == mp) { - /* - * doing a vnode_getwithvid isn't technically - * necessary since fdesc is an unsafe filesystem - * and we're running behind a funnel at this point - * however, vnode_get always succeeds, which isn't - * what we want if this vnode is in the process of - * being terminated - */ vid = vnode_vid(fd->fd_vnode); + fdesc_unlock(); if (vnode_getwithvid(fd->fd_vnode, vid)) goto loop; + *vpp = fd->fd_vnode; (*vpp)->v_type = vtype; @@ -160,16 +232,15 @@ fdesc_allocvp(fdntype ftype, int ix, struct mount *mp, struct vnode **vpp, } } - /* - * otherwise lock the array while we call getnewvnode - * since that can block. - */ + /* Only one thread can add to the hash at a time */ if (fdcache_lock & FDL_LOCKED) { fdcache_lock |= FDL_WANT; - sleep((caddr_t) &fdcache_lock, PINOD); + msleep((caddr_t) &fdcache_lock, &fdesc_mtx, PINOD, "fdesc_allocvp", NULL); goto loop; } + fdcache_lock |= FDL_LOCKED; + fdesc_unlock(); MALLOC(fd, void *, sizeof(struct fdescnode), M_TEMP, M_WAITOK); @@ -184,31 +255,36 @@ fdesc_allocvp(fdntype ftype, int ix, struct mount *mp, struct vnode **vpp, vfsp.vnfs_filesize = 0; vfsp.vnfs_flags = VNFS_NOCACHE | VNFS_CANTCACHE; vfsp.vnfs_marksystem = 0; - if (ftype == Froot) - vfsp.vnfs_markroot = 1; - else - vfsp.vnfs_markroot = 0; + vfsp.vnfs_markroot = 0; error = vnode_create(VNCREATE_FLAVOR, VCREATESIZE, &vfsp, vpp); if (error) { FREE(fd, M_TEMP); + fdesc_lock(); goto out; } + (*vpp)->v_tag = VT_FDESC; fd->fd_vnode = *vpp; fd->fd_type = ftype; fd->fd_fd = -1; fd->fd_link = NULL; fd->fd_ix = ix; - LIST_INSERT_HEAD(fc, fd, fd_hash); + fd->fd_fd = fdno; + + fdesc_lock(); + LIST_INSERT_HEAD(fc, fd, fd_hash); out: + /* Hold the lock when we get here */ fdcache_lock &= ~FDL_LOCKED; if (fdcache_lock & FDL_WANT) { fdcache_lock &= ~FDL_WANT; wakeup((caddr_t) &fdcache_lock); } + + fdesc_unlock(); return (error); } @@ -216,9 +292,11 @@ fdesc_allocvp(fdntype ftype, int ix, struct mount *mp, struct vnode **vpp, /* * vp is the current namei directory * ndp is the name to locate in that directory... + * + * This vnop should only be called on the special directory /dev/fd. */ int -fdesc_lookup(struct vnop_lookup_args *ap) +devfs_devfd_lookup(struct vnop_lookup_args *ap) { struct vnode **vpp = ap->a_vpp; struct vnode *dvp = ap->a_dvp; @@ -229,7 +307,6 @@ fdesc_lookup(struct vnop_lookup_args *ap) int fd; int error; struct vnode *fvp; - const char *ln; if (cnp->cn_namelen == 1 && *pname == '.') { *vpp = dvp; @@ -240,92 +317,32 @@ fdesc_lookup(struct vnop_lookup_args *ap) return (0); } - switch (VTOFDESC(dvp)->fd_type) { - default: - case Flink: - case Fdesc: - /* should never happen */ - error = ENOTDIR; - goto bad; - - case Froot: - if (cnp->cn_namelen == 2 && bcmp(pname, "fd", 2) == 0) { - error = fdesc_allocvp(Fdevfd, FD_DEVFD, dvp->v_mount, &fvp, VDIR); - if (error) - goto bad; - *vpp = fvp; - return (0); - } - - ln = NULL; - switch (cnp->cn_namelen) { - case 5: - if (bcmp(pname, "stdin", 5) == 0) { - ln = "fd/0"; - fd = FD_STDIN; - } + fd = 0; + while (*pname >= '0' && *pname <= '9') { + fd = 10 * fd + *pname++ - '0'; + if (fd >= numfiles) break; - case 6: - if (bcmp(pname, "stdout", 6) == 0) { - ln = "fd/1"; - fd = FD_STDOUT; - } else - if (bcmp(pname, "stderr", 6) == 0) { - ln = "fd/2"; - fd = FD_STDERR; - } - break; - } - - if (ln) { - error = fdesc_allocvp(Flink, fd, dvp->v_mount, &fvp, VLNK); - if (error) - goto bad; - VTOFDESC(fvp)->fd_link = ln; - *vpp = fvp; - return (0); - } else { - error = ENOENT; - goto bad; - } - - /* FALL THROUGH */ - - case Fdevfd: - if (cnp->cn_namelen == 2 && bcmp(pname, "..", 2) == 0) { - if ((error = fdesc_root(dvp->v_mount, vpp, ap->a_context))) - goto bad; - return (0); - } - - fd = 0; - while (*pname >= '0' && *pname <= '9') { - fd = 10 * fd + *pname++ - '0'; - if (fd >= numfiles) - break; - } - - if (*pname != '\0') { - error = ENOENT; - goto bad; - } + } - if (fd < 0 || fd >= numfiles || - *fdfile(p, fd) == NULL || - (*fdflags(p, fd) & UF_RESERVED)) { - error = EBADF; - goto bad; - } + if (*pname != '\0') { + error = ENOENT; + goto bad; + } - error = fdesc_allocvp(Fdesc, FD_DESC+fd, dvp->v_mount, &fvp, VNON); - if (error) - goto bad; - VTOFDESC(fvp)->fd_fd = fd; - *vpp = fvp; - return (0); + if (fd < 0 || fd >= numfiles || + *fdfile(p, fd) == NULL || + (*fdflags(p, fd) & UF_RESERVED)) { + error = EBADF; + goto bad; } -bad:; + error = fdesc_allocvp(Fdesc, FD_DESC+fd, dvp->v_mount, &fvp, VNON, fd); + if (error) + goto bad; + *vpp = fvp; + return (0); + +bad: *vpp = NULL; return (error); } @@ -356,7 +373,8 @@ fdesc_open(struct vnop_open_args *ap) uu->uu_dupfd = VTOFDESC(vp)->fd_fd; /* XXX */ error = ENODEV; break; - default: /* Froot / Fdevfd / Flink */ + default: + panic("Invalid type for fdesc node!"); break; } @@ -402,7 +420,7 @@ fdesc_attr(int fd, struct vnode_attr *vap, vfs_context_t a_context) error = soo_stat((struct socket *)fp->f_fglob->fg_data, (void *)&stb, 0); else #endif /* SOCKETS */ - error = pipe_stat((struct pipe *)fp->f_fglob->fg_data, (void *)&stb, 0); + error = pipe_stat((struct pipe *)fp->f_fglob->fg_data, (void *)&stb, 0); if (error == 0) { if (fp->f_fglob->fg_type == DTYPE_SOCKET) @@ -443,56 +461,27 @@ fdesc_getattr(struct vnop_getattr_args *ap) struct vnode_attr *vap = ap->a_vap; unsigned fd; int error = 0; - struct timespec ts; switch (VTOFDESC(vp)->fd_type) { - case Froot: - case Fdevfd: - case Flink: - VATTR_RETURN(vap, va_fileid, VTOFDESC(vp)->fd_ix); - VATTR_RETURN(vap, va_uid, 0); - VATTR_RETURN(vap, va_gid, 0); - VATTR_RETURN(vap, va_fsid, vp->v_mount->mnt_vfsstat.f_fsid.val[0]); - VATTR_RETURN(vap, va_iosize, DEV_BSIZE); - ts.tv_sec = boottime_sec(); - ts.tv_nsec = 0; - VATTR_RETURN(vap, va_access_time, ts); - VATTR_RETURN(vap, va_modify_time, ts); - VATTR_RETURN(vap, va_change_time, ts); - VATTR_RETURN(vap, va_gen, 0); - VATTR_RETURN(vap, va_flags, 0); - VATTR_RETURN(vap, va_rdev, 0); - VATTR_RETURN(vap, va_acl, NULL); - - switch (VTOFDESC(vp)->fd_type) { - case Flink: - VATTR_RETURN(vap, va_mode, S_IRUSR|S_IRGRP|S_IROTH); - VATTR_RETURN(vap, va_type, VLNK); /* not strictly required */ - VATTR_RETURN(vap, va_nlink, 1); - VATTR_RETURN(vap, va_data_size, strlen(VTOFDESC(vp)->fd_link)); - break; - - default: - VATTR_RETURN(vap, va_mode, S_IRUSR|S_IXUSR|S_IRGRP|S_IXGRP|S_IROTH|S_IXOTH); - VATTR_RETURN(vap, va_type, VDIR); - VATTR_RETURN(vap, va_nlink, 2); - VATTR_RETURN(vap, va_data_size, DEV_BSIZE); - break; - } - break; - case Fdesc: fd = VTOFDESC(vp)->fd_fd; error = fdesc_attr(fd, vap, ap->a_context); break; default: - return (EBADF); + panic("Invalid type for an fdesc node!\n"); break; } + /* + * Yes, we do this without locking, but this value is always just + * a snapshot. + */ if (error == 0) { vp->v_type = vap->va_type; + + /* We need an inactive to reset type to VNON */ + vnode_setneedinactive(vp); } return (error); @@ -512,8 +501,8 @@ fdesc_setattr(struct vnop_setattr_args *ap) switch (VTOFDESC(ap->a_vp)->fd_type) { case Fdesc: break; - default: + panic("Invalid type for an fdesc node!\n"); return (EACCES); } @@ -551,8 +540,9 @@ fdesc_setattr(struct vnop_setattr_args *ap) #define UIO_MX 16 +/* static struct dirtmp { - u_long d_fileno; + u_int32_t d_fileno; u_short d_reclen; u_short d_namlen; char d_name[8]; @@ -563,9 +553,11 @@ static struct dirtmp { { FD_STDERR, UIO_MX, 6, "stderr" }, { 0, 0, 0, "" } }; +*/ +/* Only called on /dev/fd */ int -fdesc_readdir(struct vnop_readdir_args *ap) +devfs_devfd_readdir(struct vnop_readdir_args *ap) { struct uio *uio = ap->a_uio; struct proc *p = current_proc(); @@ -578,58 +570,6 @@ fdesc_readdir(struct vnop_readdir_args *ap) if (ap->a_flags & (VNODE_READDIR_EXTENDED | VNODE_READDIR_REQSEEKOFF)) return (EINVAL); - switch (VTOFDESC(ap->a_vp)->fd_type) { - case Fdesc: - return (ENOTDIR); - - default: - break; - } - - if (VTOFDESC(ap->a_vp)->fd_type == Froot) { - struct dirent d; - struct dirent *dp = &d; - struct dirtmp *dt; - int fd; - - i = uio->uio_offset / UIO_MX; - error = 0; - - while (uio_resid(uio) > 0) { - dt = &rootent[i]; - if (dt->d_fileno == 0) { - /**eofflagp = 1;*/ - break; - } - i++; - - switch (dt->d_fileno) { - case FD_STDIN: - case FD_STDOUT: - case FD_STDERR: - fd = dt->d_fileno - FD_STDIN; - if (fd >= p->p_fd->fd_nfiles) - continue; - if (*fdfile(p, fd) == NULL && - !(*fdflags(p, fd) & - UF_RESERVED)) - continue; - break; - } - bzero((caddr_t) dp, UIO_MX); - dp->d_fileno = dt->d_fileno; - dp->d_namlen = dt->d_namlen; - dp->d_type = DT_UNKNOWN; - dp->d_reclen = dt->d_reclen; - bcopy(dt->d_name, dp->d_name, dp->d_namlen+1); - error = uiomove((caddr_t) dp, UIO_MX, uio); - if (error) - break; - } - uio->uio_offset = i * UIO_MX; - return (error); - } - i = uio->uio_offset / UIO_MX; error = 0; while (uio_resid(uio) > 0) { @@ -661,29 +601,9 @@ fdesc_readdir(struct vnop_readdir_args *ap) return (error); } -int -fdesc_readlink(struct vnop_readlink_args *ap) -{ - struct vnode *vp = ap->a_vp; - int error; - - if (vp->v_type != VLNK) - return (EPERM); - - if (VTOFDESC(vp)->fd_type == Flink) { - const char *ln = VTOFDESC(vp)->fd_link; - error = uiomove(ln, strlen(ln), ap->a_uio); - } else { - error = ENOTSUP; - } - - return (error); -} - int fdesc_read(__unused struct vnop_read_args *ap) { - return (ENOTSUP); } @@ -715,6 +635,7 @@ fdesc_inactive(struct vnop_inactive_args *ap) * nasty things happening in vgone(). */ vp->v_type = VNON; + return (0); } @@ -724,9 +645,13 @@ fdesc_reclaim(struct vnop_reclaim_args *ap) struct vnode *vp = ap->a_vp; struct fdescnode *fd = VTOFDESC(vp); + fdesc_lock(); + LIST_REMOVE(fd, fd_hash); FREE(vp->v_data, M_TEMP); vp->v_data = NULL; + + fdesc_unlock(); return (0); } @@ -797,9 +722,9 @@ fdesc_badop(void) #define fdesc_blockmap (int (*) (struct vnop_blockmap_args *))eopnotsupp int (**fdesc_vnodeop_p)(void *); -struct vnodeopv_entry_desc fdesc_vnodeop_entries[] = { +struct vnodeopv_entry_desc devfs_fdesc_vnodeop_entries[] = { { &vnop_default_desc, (VOPFUNC)vn_default_error }, - { &vnop_lookup_desc, (VOPFUNC)fdesc_lookup }, /* lookup */ + { &vnop_lookup_desc, (VOPFUNC)vn_default_error}, /* lookup */ { &vnop_create_desc, (VOPFUNC)fdesc_create }, /* create */ { &vnop_mknod_desc, (VOPFUNC)fdesc_mknod }, /* mknod */ { &vnop_open_desc, (VOPFUNC)fdesc_open }, /* open */ @@ -820,8 +745,8 @@ struct vnodeopv_entry_desc fdesc_vnodeop_entries[] = { { &vnop_mkdir_desc, (VOPFUNC)fdesc_mkdir }, /* mkdir */ { &vnop_rmdir_desc, (VOPFUNC)fdesc_rmdir }, /* rmdir */ { &vnop_symlink_desc, (VOPFUNC)fdesc_symlink }, /* symlink */ - { &vnop_readdir_desc, (VOPFUNC)fdesc_readdir }, /* readdir */ - { &vnop_readlink_desc, (VOPFUNC)fdesc_readlink },/* readlink */ + { &vnop_readdir_desc, (VOPFUNC)vn_default_error},/* readdir */ + { &vnop_readlink_desc, (VOPFUNC)err_readlink}, /* readlink */ { &vnop_inactive_desc, (VOPFUNC)fdesc_inactive },/* inactive */ { &vnop_reclaim_desc, (VOPFUNC)fdesc_reclaim }, /* reclaim */ { &vnop_strategy_desc, (VOPFUNC)fdesc_strategy }, /* strategy */ @@ -836,5 +761,7 @@ struct vnodeopv_entry_desc fdesc_vnodeop_entries[] = { { &vnop_blockmap_desc, (VOPFUNC)fdesc_blockmap }, /* blockmap */ { (struct vnodeop_desc*)NULL, (VOPFUNC)NULL } }; -struct vnodeopv_desc fdesc_vnodeop_opv_desc = - { &fdesc_vnodeop_p, fdesc_vnodeop_entries }; + +struct vnodeopv_desc devfs_fdesc_vnodeop_opv_desc = + { &fdesc_vnodeop_p, devfs_fdesc_vnodeop_entries }; + diff --git a/bsd/miscfs/devfs/devfs_proto.h b/bsd/miscfs/devfs/devfs_proto.h index dc6cab891..0485fb52d 100644 --- a/bsd/miscfs/devfs/devfs_proto.h +++ b/bsd/miscfs/devfs/devfs_proto.h @@ -31,6 +31,7 @@ #include +__BEGIN_DECLS #ifdef __APPLE_API_PRIVATE int devfs_sinit(void); devdirent_t * dev_findname(devnode_t * dir, const char *name); @@ -48,8 +49,9 @@ int dev_add_entry(const char *name, devnode_t * parent, int type, devnode_type_t int devfs_mount(struct mount *mp, vnode_t devvp, user_addr_t data, vfs_context_t context); int devfs_kernel_mount(char * mntname); - #endif /* __APPLE_API_PRIVATE */ +__END_DECLS + #endif /* __DEVFS_DEVFS_PROTO_H__ */ /* THIS FILE PRODUCED AUTOMATICALLY */ /* DO NOT EDIT (see reproto.sh) */ diff --git a/bsd/miscfs/devfs/devfs_tree.c b/bsd/miscfs/devfs/devfs_tree.c index 28ee35c2d..d8f3ae088 100644 --- a/bsd/miscfs/devfs/devfs_tree.c +++ b/bsd/miscfs/devfs/devfs_tree.c @@ -107,11 +107,41 @@ #include #endif -static void devfs_release_busy(devnode_t *); +#if FDESC +#include "fdesc.h" +#endif + +typedef struct devfs_vnode_event { + vnode_t dve_vp; + uint32_t dve_vid; + uint32_t dve_events; +} *devfs_vnode_event_t; + +/* + * Size of stack buffer (fast path) for notifications. If + * the number of mounts is small, no need to malloc a buffer. + */ +#define NUM_STACK_ENTRIES 5 + +typedef struct devfs_event_log { + size_t del_max; + size_t del_used; + devfs_vnode_event_t del_entries; +} *devfs_event_log_t; + + static void dev_free_hier(devdirent_t *); -static int devfs_propogate(devdirent_t *, devdirent_t *); -static int dev_finddir(const char *, devnode_t *, int, devnode_t **); +static int devfs_propogate(devdirent_t *, devdirent_t *, devfs_event_log_t); +static int dev_finddir(const char *, devnode_t *, int, devnode_t **, devfs_event_log_t); static int dev_dup_entry(devnode_t *, devdirent_t *, devdirent_t **, struct devfsmount *); +void devfs_ref_node(devnode_t *); +void devfs_rele_node(devnode_t *); +static void devfs_record_event(devfs_event_log_t, devnode_t*, uint32_t); +static int devfs_init_event_log(devfs_event_log_t, uint32_t, devfs_vnode_event_t); +static void devfs_release_event_log(devfs_event_log_t, int); +static void devfs_bulk_notify(devfs_event_log_t); +static devdirent_t *devfs_make_node_internal(dev_t, devfstype_t type, uid_t, gid_t, int, + int (*clone)(dev_t dev, int action), const char *fmt, va_list ap); lck_grp_t * devfs_lck_grp; @@ -122,11 +152,14 @@ lck_mtx_t devfs_mutex; devdirent_t * dev_root = NULL; /* root of backing tree */ struct devfs_stats devfs_stats; /* hold stats */ +static ino_t devfs_unique_fileno = 0; + #ifdef HIDDEN_MOUNTPOINT static struct mount *devfs_hidden_mount; #endif /* HIDDEN_MOINTPOINT */ static int devfs_ready = 0; +static uint32_t devfs_nmountplanes = 0; /* The first plane is not used for a mount */ #define DEVFS_NOCREATE FALSE #define DEVFS_CREATE TRUE @@ -249,7 +282,8 @@ static int dev_finddir(const char * path, devnode_t * dirnode, int create, - devnode_t * * dn_pp) + devnode_t * * dn_pp, + devfs_event_log_t delp) { devnode_t * dnp = NULL; int error = 0; @@ -320,7 +354,7 @@ dev_finddir(const char * path, strlen(dirnode->dn_typeinfo.Dir.myname->de_name), dnp, fullpath); #endif - devfs_propogate(dirnode->dn_typeinfo.Dir.myname, dirent_p); + devfs_propogate(dirnode->dn_typeinfo.Dir.myname, dirent_p, delp); } dirnode = dnp; /* continue relative to this directory */ } @@ -542,6 +576,9 @@ dev_add_node(int entrytype, devnode_type_t * typeinfo, devnode_t * proto, #endif } dnp->dn_dvm = dvm; + dnp->dn_refcount = 0; + dnp->dn_ino = devfs_unique_fileno; + devfs_unique_fileno++; /* * fill out the dev node according to type @@ -598,6 +635,15 @@ dev_add_node(int entrytype, devnode_type_t * typeinfo, devnode_t * proto, dnp->dn_ops = &devfs_spec_vnodeop_p; dnp->dn_typeinfo.dev = typeinfo->dev; break; + + #if FDESC + /* /dev/fd is special */ + case DEV_DEVFD: + dnp->dn_ops = &devfs_devfd_vnodeop_p; + dnp->dn_mode |= 0555; /* default perms */ + break; + + #endif /* FDESC */ default: return EINVAL; } @@ -614,10 +660,6 @@ dev_add_node(int entrytype, devnode_type_t * typeinfo, devnode_t * proto, void devnode_free(devnode_t * dnp) { - if (dnp->dn_lflags & DN_BUSY) { - dnp->dn_lflags |= DN_DELETE; - return; - } #if CONFIG_MACF mac_devfs_label_destroy(dnp); #endif @@ -645,11 +687,13 @@ devfs_dn_free(devnode_t * dnp) dnp->dn_nextsibling->dn_prevsiblingp = prevp; } - if (dnp->dn_vn == NULL) { - devnode_free(dnp); /* no accesses/references */ + + /* Can only free if there are no references; otherwise, wait for last vnode to be reclaimed */ + if (dnp->dn_refcount == 0) { + devnode_free(dnp); } else { - dnp->dn_delete = TRUE; + dnp->dn_lflags |= DN_DELETE; } } } @@ -674,7 +718,7 @@ devfs_dn_free(devnode_t * dnp) * called with DEVFS_LOCK held ***********************************************************************/ static int -devfs_propogate(devdirent_t * parent,devdirent_t * child) +devfs_propogate(devdirent_t * parent,devdirent_t * child, devfs_event_log_t delp) { int error; devdirent_t * newnmp; @@ -682,6 +726,12 @@ devfs_propogate(devdirent_t * parent,devdirent_t * child) devnode_t * pdnp = parent->de_dnp; devnode_t * adnp = parent->de_dnp; int type = child->de_dnp->dn_type; + uint32_t events; + + events = (dnp->dn_type == DEV_DIR ? VNODE_EVENT_DIR_CREATED : VNODE_EVENT_FILE_CREATED); + if (delp != NULL) { + devfs_record_event(delp, pdnp, events); + } /*********************************************** * Find the other instances of the parent node @@ -699,11 +749,45 @@ devfs_propogate(devdirent_t * parent,devdirent_t * child) NULL, dnp, adnp->dn_dvm, &newnmp)) != 0) { printf("duplicating %s failed\n",child->de_name); + } else { + if (delp != NULL) { + devfs_record_event(delp, adnp, events); + + /* + * Slightly subtle. We're guaranteed that there will + * only be a vnode hooked into this devnode if we're creating + * a new link to an existing node; otherwise, the devnode is new + * and no one can have looked it up yet. If we're making a link, + * then the buffer is large enough for two nodes in each + * plane; otherwise, there's no vnode and this call will + * do nothing. + */ + devfs_record_event(delp, newnmp->de_dnp, VNODE_EVENT_LINK); + } } } return 0; /* for now always succeed */ } +static uint32_t +remove_notify_count(devnode_t *dnp) +{ + uint32_t notify_count = 0; + devnode_t *dnp2; + + /* + * Could need to notify for one removed node on each mount and + * one parent for each such node. + */ + notify_count = devfs_nmountplanes; + notify_count += dnp->dn_links; + for (dnp2 = dnp->dn_nextsibling; dnp2 != dnp; dnp2 = dnp2->dn_nextsibling) { + notify_count += dnp2->dn_links; + } + + return notify_count; + +} /*********************************************************************** * remove all instances of this devicename [for backing nodes..] @@ -721,7 +805,12 @@ devfs_remove(void *dirent_p) devnode_t * dnp = ((devdirent_t *)dirent_p)->de_dnp; devnode_t * dnp2; boolean_t lastlink; - + struct devfs_event_log event_log; + uint32_t log_count = 0; + int do_notify = 0; + int need_free = 0; + struct devfs_vnode_event stackbuf[NUM_STACK_ENTRIES]; + DEVFS_LOCK(); if (!devfs_ready) { @@ -729,6 +818,37 @@ devfs_remove(void *dirent_p) goto out; } + log_count = remove_notify_count(dnp); + + if (log_count > NUM_STACK_ENTRIES) { + uint32_t new_count; +wrongsize: + DEVFS_UNLOCK(); + if (devfs_init_event_log(&event_log, log_count, NULL) == 0) { + do_notify = 1; + need_free = 1; + } + DEVFS_LOCK(); + + new_count = remove_notify_count(dnp); + if (need_free && (new_count > log_count)) { + devfs_release_event_log(&event_log, 1); + need_free = 0; + do_notify = 0; + log_count = log_count * 2; + goto wrongsize; + } + } else { + if (devfs_init_event_log(&event_log, NUM_STACK_ENTRIES, &stackbuf[0]) == 0) { + do_notify = 1; + } + } + + /* This file has been deleted */ + if (do_notify != 0) { + devfs_record_event(&event_log, dnp, VNODE_EVENT_DELETE); + } + /* keep removing the next sibling till only we exist. */ while ((dnp2 = dnp->dn_nextsibling) != dnp) { @@ -739,9 +859,19 @@ devfs_remove(void *dirent_p) dnp->dn_nextsibling->dn_prevsiblingp = &(dnp->dn_nextsibling); dnp2->dn_nextsibling = dnp2; dnp2->dn_prevsiblingp = &(dnp2->dn_nextsibling); + + /* This file has been deleted in this plane */ + if (do_notify != 0) { + devfs_record_event(&event_log, dnp2, VNODE_EVENT_DELETE); + } + if (dnp2->dn_linklist) { do { lastlink = (1 == dnp2->dn_links); + /* Each parent of a link to this file has lost a child in this plane */ + if (do_notify != 0) { + devfs_record_event(&event_log, dnp2->dn_linklist->de_parent, VNODE_EVENT_FILE_REMOVED); + } dev_free_name(dnp2->dn_linklist); } while (!lastlink); } @@ -755,11 +885,19 @@ devfs_remove(void *dirent_p) if (dnp->dn_linklist) { do { lastlink = (1 == dnp->dn_links); + /* Each parent of a link to this file has lost a child */ + if (do_notify != 0) { + devfs_record_event(&event_log, dnp->dn_linklist->de_parent, VNODE_EVENT_FILE_REMOVED); + } dev_free_name(dnp->dn_linklist); } while (!lastlink); } out: DEVFS_UNLOCK(); + if (do_notify != 0) { + devfs_bulk_notify(&event_log); + devfs_release_event_log(&event_log, need_free); + } return ; } @@ -783,6 +921,7 @@ dev_dup_plane(struct devfsmount *devfs_mp_p) if ((error = dev_dup_entry(NULL, dev_root, &new, devfs_mp_p))) return error; devfs_mp_p->plane_root = new; + devfs_nmountplanes++; return error; } @@ -804,6 +943,11 @@ devfs_free_plane(struct devfsmount *devfs_mp_p) dev_free_name(dirent_p); } devfs_mp_p->plane_root = NULL; + devfs_nmountplanes--; + + if (devfs_nmountplanes > (devfs_nmountplanes+1)) { + panic("plane count wrapped around.\n"); + } } @@ -970,24 +1114,38 @@ dev_free_hier(devdirent_t * dirent_p) * associated, or get a new one and associate it with the dev_node * * called with DEVFS_LOCK held - ***************************************************************/ + * + * If an error is returned, then the dnp may have been freed (we + * raced with a delete and lost). A devnode should not be accessed + * after devfs_dntovn() fails. + ****************************************************************/ int devfs_dntovn(devnode_t * dnp, struct vnode **vn_pp, __unused struct proc * p) { struct vnode *vn_p; - struct vnode ** vnptr; int error = 0; struct vnode_fsparam vfsp; enum vtype vtype = 0; int markroot = 0; int n_minor = DEVFS_CLONE_ALLOC; /* new minor number for clone device */ + + /* + * We should never come in and find that our devnode has been marked for delete. + * The lookup should have held the lock from entry until now; it should not have + * been able to find a removed entry. Any other pathway would have just created + * the devnode and come here without dropping the devfs lock, so no one would + * have a chance to delete. + */ + if (dnp->dn_lflags & DN_DELETE) { + panic("devfs_dntovn: DN_DELETE set on a devnode upon entry."); + } + + devfs_ref_node(dnp); retry: *vn_pp = NULL; vn_p = dnp->dn_vn; - dnp->dn_lflags |= DN_BUSY; - if (vn_p) { /* already has a vnode */ uint32_t vid; @@ -1012,21 +1170,26 @@ devfs_dntovn(devnode_t * dnp, struct vnode **vn_pp, __unused struct proc * p) */ vnode_put(vn_p); } - /* - * set the error to EAGAIN - * which will cause devfs_lookup - * to retry this node + + /* + * This entry is no longer in the namespace. This is only + * possible for lookup: no other path would not find an existing + * vnode. Therefore, ENOENT is a valid result. */ - error = EAGAIN; + error = ENOENT; } if ( !error) *vn_pp = vn_p; - devfs_release_busy(dnp); - - return error; + goto out; } + /* + * If we get here, then we've beaten any deletes; + * if someone sets DN_DELETE during a subsequent drop + * of the devfs lock, we'll still vend a vnode. + */ + if (dnp->dn_lflags & DN_CREATE) { dnp->dn_lflags |= DN_CREATEWAIT; msleep(&dnp->dn_lflags, &devfs_mutex, PRIBIO, 0 , 0); @@ -1049,6 +1212,11 @@ devfs_dntovn(devnode_t * dnp, struct vnode **vn_pp, __unused struct proc * p) case DEV_CDEV: vtype = (dnp->dn_type == DEV_BDEV) ? VBLK : VCHR; break; +#if FDESC + case DEV_DEVFD: + vtype = VDIR; + break; +#endif /* FDESC */ } vfsp.vnfs_mp = dnp->dn_dvm->mount; vfsp.vnfs_vtype = vtype; @@ -1069,8 +1237,8 @@ devfs_dntovn(devnode_t * dnp, struct vnode **vn_pp, __unused struct proc * p) n_minor = (*dnp->dn_clone)(dnp->dn_typeinfo.dev, DEVFS_CLONE_ALLOC); if (n_minor == -1) { - devfs_release_busy(dnp); - return ENOMEM; + error = ENOMEM; + goto out; } vfsp.vnfs_rdev = makedev(n_major, n_minor);; @@ -1088,28 +1256,38 @@ devfs_dntovn(devnode_t * dnp, struct vnode **vn_pp, __unused struct proc * p) DEVFS_UNLOCK(); - if (dnp->dn_clone == NULL) - vnptr = &dnp->dn_vn; - else - vnptr = &vn_p; - error = vnode_create(VNCREATE_FLAVOR, VCREATESIZE, &vfsp, vnptr); + error = vnode_create(VNCREATE_FLAVOR, VCREATESIZE, &vfsp, &vn_p); + + /* Do this before grabbing the lock */ + if (error == 0) { + vnode_setneedinactive(vn_p); + } DEVFS_LOCK(); if (error == 0) { + vnode_settag(vn_p, VT_DEVFS); + if ((dnp->dn_clone != NULL) && (dnp->dn_vn != NULLVP) ) - panic("devnode already has a vnode?"); - /* - * Don't cache the vnode for the next open, if the - * device is a cloning device (each open gets it's - * own per-device instance vnode). + panic("devfs_dntovn: cloning device with a vnode?\n"); + + *vn_pp = vn_p; + + /* + * Another vnode that has this devnode as its v_data. + * This reference, unlike the one taken at the start + * of the function, persists until a VNOP_RECLAIM + * comes through for this vnode. */ - if (dnp->dn_clone == NULL) { - *vn_pp = dnp->dn_vn; - } else { - *vn_pp = vn_p; - } + devfs_ref_node(dnp); + /* + * A cloned vnode is not hooked into the devnode; every lookup + * gets a new vnode. + */ + if (dnp->dn_clone == NULL) { + dnp->dn_vn = vn_p; + } } else if (n_minor != DEVFS_CLONE_ALLOC) { /* * If we failed the create, we need to release the cloned minor @@ -1129,22 +1307,41 @@ devfs_dntovn(devnode_t * dnp, struct vnode **vn_pp, __unused struct proc * p) wakeup(&dnp->dn_lflags); } - devfs_release_busy(dnp); +out: + /* + * Release the reference we took to prevent deletion while we weren't holding the lock. + * If not returning success, then dropping this reference could delete the devnode; + * no one should access a devnode after a call to devfs_dntovn fails. + */ + devfs_rele_node(dnp); return error; } +/* + * Increment refcount on a devnode; prevents free of the node + * while the devfs lock is not held. + */ +void +devfs_ref_node(devnode_t *dnp) +{ + dnp->dn_refcount++; +} -/*********************************************************************** - * called with DEVFS_LOCK held - ***********************************************************************/ -static void -devfs_release_busy(devnode_t *dnp) { - - dnp->dn_lflags &= ~DN_BUSY; +/* + * Release a reference on a devnode. If the devnode is marked for + * free and the refcount is dropped to zero, do the free. + */ +void +devfs_rele_node(devnode_t *dnp) +{ + dnp->dn_refcount--; + if (dnp->dn_refcount < 0) { + panic("devfs_rele_node: devnode with a negative refcount!\n"); + } else if ((dnp->dn_refcount == 0) && (dnp->dn_lflags & DN_DELETE)) { + devnode_free(dnp); + } - if (dnp->dn_lflags & DN_DELETE) - devnode_free(dnp); } /*********************************************************************** @@ -1177,6 +1374,69 @@ dev_add_entry(const char *name, devnode_t * parent, int type, devnode_type_t * t return error; } +static void +devfs_bulk_notify(devfs_event_log_t delp) +{ + uint32_t i; + for (i = 0; i < delp->del_used; i++) { + devfs_vnode_event_t dvep = &delp->del_entries[i]; + if (vnode_getwithvid(dvep->dve_vp, dvep->dve_vid) == 0) { + vnode_notify(dvep->dve_vp, dvep->dve_events, NULL); + vnode_put(dvep->dve_vp); + } + } +} + +static void +devfs_record_event(devfs_event_log_t delp, devnode_t *dnp, uint32_t events) +{ + if (delp->del_used >= delp->del_max) { + panic("devfs event log overflowed.\n"); + } + + /* Can only notify for nodes that have an associated vnode */ + if (dnp->dn_vn != NULLVP && vnode_ismonitored(dnp->dn_vn)) { + devfs_vnode_event_t dvep = &delp->del_entries[delp->del_used]; + dvep->dve_vp = dnp->dn_vn; + dvep->dve_vid = vnode_vid(dnp->dn_vn); + dvep->dve_events = events; + delp->del_used++; + } +} + +static int +devfs_init_event_log(devfs_event_log_t delp, uint32_t count, devfs_vnode_event_t buf) +{ + devfs_vnode_event_t dvearr; + + if (buf == NULL) { + MALLOC(dvearr, devfs_vnode_event_t, count * sizeof(struct devfs_vnode_event), M_TEMP, M_WAITOK | M_ZERO); + if (dvearr == NULL) { + return ENOMEM; + } + } else { + dvearr = buf; + } + + delp->del_max = count; + delp->del_used = 0; + delp->del_entries = dvearr; + return 0; +} + +static void +devfs_release_event_log(devfs_event_log_t delp, int need_free) +{ + if (delp->del_entries == NULL) { + panic("Free of devfs notify info that has not been intialized.\n"); + } + + if (need_free) { + FREE(delp->del_entries, M_TEMP); + } + + delp->del_entries = NULL; +} /* * Function: devfs_make_node @@ -1200,64 +1460,24 @@ devfs_make_node_clone(dev_t dev, int chrblk, uid_t uid, const char *fmt, ...) { devdirent_t * new_dev = NULL; - devnode_t * dnp; /* devnode for parent directory */ - devnode_type_t typeinfo; - - char *name, buf[256]; /* XXX */ - const char *path; - int i; + devfstype_t type; va_list ap; - - DEVFS_LOCK(); - - if (!devfs_ready) { - printf("devfs_make_node: not ready for devices!\n"); - goto out; + switch (chrblk) { + case DEVFS_CHAR: + type = DEV_CDEV; + break; + case DEVFS_BLOCK: + type = DEV_BDEV; + break; + default: + goto out; } - if (chrblk != DEVFS_CHAR && chrblk != DEVFS_BLOCK) - goto out; - - DEVFS_UNLOCK(); va_start(ap, fmt); - vsnprintf(buf, sizeof(buf), fmt, ap); + new_dev = devfs_make_node_internal(dev, type, uid, gid, perms, clone, fmt, ap); va_end(ap); - - name = NULL; - - for(i=strlen(buf); i>0; i--) - if(buf[i] == '/') { - name=&buf[i]; - buf[i]=0; - break; - } - - if (name) { - *name++ = '\0'; - path = buf; - } else { - name = buf; - path = "/"; - } - DEVFS_LOCK(); - - /* find/create directory path ie. mkdir -p */ - if (dev_finddir(path, NULL, DEVFS_CREATE, &dnp) == 0) { - typeinfo.dev = dev; - if (dev_add_entry(name, dnp, - (chrblk == DEVFS_CHAR) ? DEV_CDEV : DEV_BDEV, - &typeinfo, NULL, NULL, &new_dev) == 0) { - new_dev->de_dnp->dn_gid = gid; - new_dev->de_dnp->dn_uid = uid; - new_dev->de_dnp->dn_mode |= perms; - new_dev->de_dnp->dn_clone = clone; - devfs_propogate(dnp->dn_typeinfo.Dir.myname, new_dev); - } - } out: - DEVFS_UNLOCK(); - return new_dev; } @@ -1282,32 +1502,42 @@ devfs_make_node(dev_t dev, int chrblk, uid_t uid, gid_t gid, int perms, const char *fmt, ...) { devdirent_t * new_dev = NULL; - devnode_t * dnp; /* devnode for parent directory */ - devnode_type_t typeinfo; - - char *name, buf[256]; /* XXX */ - const char *path; -#if CONFIG_MACF - char buff[sizeof(buf)]; -#endif - int i; + devfstype_t type; va_list ap; - - DEVFS_LOCK(); - - if (!devfs_ready) { - printf("devfs_make_node: not ready for devices!\n"); - goto out; - } if (chrblk != DEVFS_CHAR && chrblk != DEVFS_BLOCK) goto out; - DEVFS_UNLOCK(); + type = (chrblk == DEVFS_BLOCK ? DEV_BDEV : DEV_CDEV); va_start(ap, fmt); - vsnprintf(buf, sizeof(buf), fmt, ap); + new_dev = devfs_make_node_internal(dev, type, uid, gid, perms, NULL, fmt, ap); va_end(ap); + +out: + return new_dev; +} + +static devdirent_t * +devfs_make_node_internal(dev_t dev, devfstype_t type, uid_t uid, + gid_t gid, int perms, int (*clone)(dev_t dev, int action), const char *fmt, va_list ap) +{ + devdirent_t * new_dev = NULL; + devnode_t * dnp; + devnode_type_t typeinfo; + + char *name, buf[256]; /* XXX */ + const char *path; +#if CONFIG_MACF + char buff[sizeof(buf)]; +#endif + int i; + uint32_t log_count; + struct devfs_event_log event_log; + struct devfs_vnode_event stackbuf[NUM_STACK_ENTRIES]; + int need_free = 0; + + vsnprintf(buf, sizeof(buf), fmt, ap); #if CONFIG_MACF bcopy(buf, buff, sizeof(buff)); @@ -1329,28 +1559,55 @@ devfs_make_node(dev_t dev, int chrblk, uid_t uid, name = buf; path = "/"; } + + log_count = devfs_nmountplanes; + if (log_count > NUM_STACK_ENTRIES) { +wrongsize: + need_free = 1; + if (devfs_init_event_log(&event_log, log_count, NULL) != 0) { + return NULL; + } + } else { + need_free = 0; + log_count = NUM_STACK_ENTRIES; + if (devfs_init_event_log(&event_log, log_count, &stackbuf[0]) != 0) { + return NULL; + } + } + DEVFS_LOCK(); + if (log_count < devfs_nmountplanes) { + DEVFS_UNLOCK(); + devfs_release_event_log(&event_log, need_free); + log_count = log_count * 2; + goto wrongsize; + } + + if (!devfs_ready) { + printf("devfs_make_node: not ready for devices!\n"); + goto out; + } /* find/create directory path ie. mkdir -p */ - if (dev_finddir(path, NULL, DEVFS_CREATE, &dnp) == 0) { + if (dev_finddir(path, NULL, DEVFS_CREATE, &dnp, &event_log) == 0) { typeinfo.dev = dev; - if (dev_add_entry(name, dnp, - (chrblk == DEVFS_CHAR) ? DEV_CDEV : DEV_BDEV, - &typeinfo, NULL, NULL, &new_dev) == 0) { + if (dev_add_entry(name, dnp, type, &typeinfo, NULL, NULL, &new_dev) == 0) { new_dev->de_dnp->dn_gid = gid; new_dev->de_dnp->dn_uid = uid; new_dev->de_dnp->dn_mode |= perms; - new_dev->de_dnp->dn_clone = NULL; - + new_dev->de_dnp->dn_clone = clone; #if CONFIG_MACF mac_devfs_label_associate_device(dev, new_dev->de_dnp, buff); #endif - devfs_propogate(dnp->dn_typeinfo.Dir.myname, new_dev); + devfs_propogate(dnp->dn_typeinfo.Dir.myname, new_dev, &event_log); } } + out: DEVFS_UNLOCK(); + devfs_bulk_notify(&event_log); + devfs_release_event_log(&event_log, need_free); return new_dev; } @@ -1369,6 +1626,8 @@ devfs_make_link(void *original, char *fmt, ...) devdirent_t * new_dev = NULL; devdirent_t * orig = (devdirent_t *) original; devnode_t * dirnode; /* devnode for parent directory */ + struct devfs_event_log event_log; + uint32_t log_count; va_list ap; char *p, buf[256]; /* XXX */ @@ -1377,8 +1636,9 @@ devfs_make_link(void *original, char *fmt, ...) DEVFS_LOCK(); if (!devfs_ready) { + DEVFS_UNLOCK(); printf("devfs_make_link: not ready for devices!\n"); - goto out; + return -1; } DEVFS_UNLOCK(); @@ -1395,23 +1655,43 @@ devfs_make_link(void *original, char *fmt, ...) break; } } + + /* + * One slot for each directory, one for each devnode + * whose link count changes + */ + log_count = devfs_nmountplanes * 2; +wrongsize: + if (devfs_init_event_log(&event_log, log_count, NULL) != 0) { + /* No lock held, no allocations done, can just return */ + return -1; + } + DEVFS_LOCK(); + if (log_count < devfs_nmountplanes) { + DEVFS_UNLOCK(); + devfs_release_event_log(&event_log, 1); + log_count = log_count * 2; + goto wrongsize; + } + if (p) { *p++ = '\0'; - if (dev_finddir(buf, NULL, DEVFS_CREATE, &dirnode) + if (dev_finddir(buf, NULL, DEVFS_CREATE, &dirnode, &event_log) || dev_add_name(p, dirnode, NULL, orig->de_dnp, &new_dev)) goto fail; } else { - if (dev_finddir("", NULL, DEVFS_CREATE, &dirnode) + if (dev_finddir("", NULL, DEVFS_CREATE, &dirnode, &event_log) || dev_add_name(buf, dirnode, NULL, orig->de_dnp, &new_dev)) goto fail; } - devfs_propogate(dirnode->dn_typeinfo.Dir.myname, new_dev); + devfs_propogate(dirnode->dn_typeinfo.Dir.myname, new_dev, &event_log); fail: -out: DEVFS_UNLOCK(); + devfs_bulk_notify(&event_log); + devfs_release_event_log(&event_log, 1); return ((new_dev != NULL) ? 0 : -1); } diff --git a/bsd/miscfs/devfs/devfs_vfsops.c b/bsd/miscfs/devfs/devfs_vfsops.c index fc9b55f24..c5875bd55 100644 --- a/bsd/miscfs/devfs/devfs_vfsops.c +++ b/bsd/miscfs/devfs/devfs_vfsops.c @@ -85,10 +85,14 @@ #include "devfs.h" #include "devfsdefs.h" +#if FDESC +#include "fdesc.h" +#endif /* FDESC */ + + static int devfs_statfs( struct mount *mp, struct vfsstatfs *sbp, vfs_context_t ctx); static int devfs_vfs_getattr(mount_t mp, struct vfs_attr *fsap, vfs_context_t ctx); -static struct vfstable * devfs_vfsp = 0; extern int setup_kmem; __private_extern__ void devfs_setup_kmem(void); @@ -102,10 +106,8 @@ __private_extern__ void devfs_setup_kmem(void); * devices from devfs get sync'd. */ static int -devfs_init(struct vfsconf *vfsp) +devfs_init(__unused struct vfsconf *vfsp) { - devfs_vfsp = (struct vfstable *)vfsp; /* remember this for devfs_kernel_mount below */ - if (devfs_sinit()) return (ENOTSUP); devfs_make_node(makedev(0, 0), DEVFS_CHAR, @@ -121,6 +123,11 @@ devfs_init(struct vfsconf *vfsp) UID_ROOT, GID_WHEEL, 0666, "zero"); devfs_make_node(makedev(6, 0), DEVFS_CHAR, UID_ROOT, GID_WHEEL, 0600, "klog"); + +#if FDESC + devfs_fdesc_init(); +#endif + return 0; } @@ -182,7 +189,7 @@ devfs_mount(struct mount *mp, __unused vnode_t devvp, __unused user_addr_t data, * Fill out some fields */ mp->mnt_data = (qaddr_t)devfs_mp_p; - mp->mnt_vfsstat.f_fsid.val[0] = (int32_t)(void *)devfs_mp_p; + mp->mnt_vfsstat.f_fsid.val[0] = (int32_t)(uintptr_t)devfs_mp_p; mp->mnt_vfsstat.f_fsid.val[1] = vfs_typenum(mp); mp->mnt_flag |= MNT_LOCAL; @@ -286,7 +293,7 @@ devfs_statfs( struct mount *mp, struct vfsstatfs *sbp, __unused vfs_context_t ct sbp->f_bavail = 0; sbp->f_files = devfs_stats.nodes; sbp->f_ffree = 0; - sbp->f_fsid.val[0] = (int32_t)(void *)devfs_mp_p; + sbp->f_fsid.val[0] = (int32_t)(uintptr_t)devfs_mp_p; sbp->f_fsid.val[1] = vfs_typenum(mp); return 0; @@ -315,6 +322,108 @@ devfs_vfs_getattr(__unused mount_t mp, struct vfs_attr *fsap, __unused vfs_conte VFSATTR_RETURN(fsap, f_ffree, 0); VFSATTR_RETURN(fsap, f_fssubtype, 0); + if (VFSATTR_IS_ACTIVE(fsap, f_capabilities)) { + fsap->f_capabilities.capabilities[VOL_CAPABILITIES_FORMAT] = + VOL_CAP_FMT_SYMBOLICLINKS | + VOL_CAP_FMT_HARDLINKS | + VOL_CAP_FMT_NO_ROOT_TIMES | + VOL_CAP_FMT_CASE_SENSITIVE | + VOL_CAP_FMT_CASE_PRESERVING | + VOL_CAP_FMT_FAST_STATFS | + VOL_CAP_FMT_2TB_FILESIZE | + VOL_CAP_FMT_HIDDEN_FILES; + fsap->f_capabilities.capabilities[VOL_CAPABILITIES_INTERFACES] = + VOL_CAP_INT_ATTRLIST ; + fsap->f_capabilities.capabilities[VOL_CAPABILITIES_RESERVED1] = 0; + fsap->f_capabilities.capabilities[VOL_CAPABILITIES_RESERVED2] = 0; + + fsap->f_capabilities.valid[VOL_CAPABILITIES_FORMAT] = + VOL_CAP_FMT_PERSISTENTOBJECTIDS | + VOL_CAP_FMT_SYMBOLICLINKS | + VOL_CAP_FMT_HARDLINKS | + VOL_CAP_FMT_JOURNAL | + VOL_CAP_FMT_JOURNAL_ACTIVE | + VOL_CAP_FMT_NO_ROOT_TIMES | + VOL_CAP_FMT_SPARSE_FILES | + VOL_CAP_FMT_ZERO_RUNS | + VOL_CAP_FMT_CASE_SENSITIVE | + VOL_CAP_FMT_CASE_PRESERVING | + VOL_CAP_FMT_FAST_STATFS | + VOL_CAP_FMT_2TB_FILESIZE | + VOL_CAP_FMT_OPENDENYMODES | + VOL_CAP_FMT_HIDDEN_FILES | + VOL_CAP_FMT_PATH_FROM_ID | + VOL_CAP_FMT_NO_VOLUME_SIZES; + fsap->f_capabilities.valid[VOL_CAPABILITIES_INTERFACES] = + VOL_CAP_INT_SEARCHFS | + VOL_CAP_INT_ATTRLIST | + VOL_CAP_INT_NFSEXPORT | + VOL_CAP_INT_READDIRATTR | + VOL_CAP_INT_EXCHANGEDATA | + VOL_CAP_INT_COPYFILE | + VOL_CAP_INT_ALLOCATE | + VOL_CAP_INT_VOL_RENAME | + VOL_CAP_INT_ADVLOCK | + VOL_CAP_INT_FLOCK | + VOL_CAP_INT_EXTENDED_SECURITY | + VOL_CAP_INT_USERACCESS | + VOL_CAP_INT_MANLOCK | + VOL_CAP_INT_EXTENDED_ATTR | + VOL_CAP_INT_NAMEDSTREAMS; + fsap->f_capabilities.valid[VOL_CAPABILITIES_RESERVED1] = 0; + fsap->f_capabilities.valid[VOL_CAPABILITIES_RESERVED2] = 0; + + VFSATTR_SET_SUPPORTED(fsap, f_capabilities); + } + + if (VFSATTR_IS_ACTIVE(fsap, f_attributes)) { + fsap->f_attributes.validattr.commonattr = + ATTR_CMN_NAME | ATTR_CMN_DEVID | ATTR_CMN_FSID | + ATTR_CMN_OBJTYPE | ATTR_CMN_OBJTAG | ATTR_CMN_OBJID | + ATTR_CMN_PAROBJID | + ATTR_CMN_MODTIME | ATTR_CMN_CHGTIME | ATTR_CMN_ACCTIME | + ATTR_CMN_OWNERID | ATTR_CMN_GRPID | ATTR_CMN_ACCESSMASK | + ATTR_CMN_FLAGS | ATTR_CMN_USERACCESS | ATTR_CMN_FILEID; + fsap->f_attributes.validattr.volattr = + ATTR_VOL_FSTYPE | ATTR_VOL_SIZE | ATTR_VOL_SPACEFREE | + ATTR_VOL_SPACEAVAIL | ATTR_VOL_MINALLOCATION | + ATTR_VOL_OBJCOUNT | ATTR_VOL_MAXOBJCOUNT | + ATTR_VOL_MOUNTPOINT | ATTR_VOL_MOUNTFLAGS | + ATTR_VOL_MOUNTEDDEVICE | ATTR_VOL_CAPABILITIES | + ATTR_VOL_ATTRIBUTES; + fsap->f_attributes.validattr.dirattr = + ATTR_DIR_LINKCOUNT | ATTR_DIR_MOUNTSTATUS; + fsap->f_attributes.validattr.fileattr = + ATTR_FILE_LINKCOUNT | ATTR_FILE_TOTALSIZE | + ATTR_FILE_IOBLOCKSIZE | ATTR_FILE_DEVTYPE | + ATTR_FILE_DATALENGTH; + fsap->f_attributes.validattr.forkattr = 0; + + fsap->f_attributes.nativeattr.commonattr = + ATTR_CMN_NAME | ATTR_CMN_DEVID | ATTR_CMN_FSID | + ATTR_CMN_OBJTYPE | ATTR_CMN_OBJTAG | ATTR_CMN_OBJID | + ATTR_CMN_PAROBJID | + ATTR_CMN_MODTIME | ATTR_CMN_CHGTIME | ATTR_CMN_ACCTIME | + ATTR_CMN_OWNERID | ATTR_CMN_GRPID | ATTR_CMN_ACCESSMASK | + ATTR_CMN_FLAGS | ATTR_CMN_USERACCESS | ATTR_CMN_FILEID; + fsap->f_attributes.nativeattr.volattr = + ATTR_VOL_FSTYPE | ATTR_VOL_SIZE | ATTR_VOL_SPACEFREE | + ATTR_VOL_SPACEAVAIL | ATTR_VOL_MINALLOCATION | + ATTR_VOL_OBJCOUNT | ATTR_VOL_MAXOBJCOUNT | + ATTR_VOL_MOUNTPOINT | ATTR_VOL_MOUNTFLAGS | + ATTR_VOL_MOUNTEDDEVICE | ATTR_VOL_CAPABILITIES | + ATTR_VOL_ATTRIBUTES; + fsap->f_attributes.nativeattr.dirattr = + ATTR_DIR_MOUNTSTATUS; + fsap->f_attributes.nativeattr.fileattr = + ATTR_FILE_LINKCOUNT | ATTR_FILE_TOTALSIZE | + ATTR_FILE_IOBLOCKSIZE | ATTR_FILE_DEVTYPE | + ATTR_FILE_DATALENGTH; + fsap->f_attributes.nativeattr.forkattr = 0; + + VFSATTR_SET_SUPPORTED(fsap, f_attributes); + } + return 0; } @@ -372,16 +481,21 @@ devfs_kernel_mount(char * mntname) struct nameidata nd; struct vnode * vp; vfs_context_t ctx = vfs_context_kernel(); + struct vfstable *vfsp; - if (devfs_vfsp == NULL) { - printf("devfs_kernel_mount: devfs_vfsp is NULL\n"); - return (EINVAL); - } + /* Find our vfstable entry */ + for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next) + if (!strncmp(vfsp->vfc_name, "devfs", sizeof(vfsp->vfc_name))) + break; + + if (!vfsp) { + panic("Could not find entry in vfsconf for devfs.\n"); + } /* * Get vnode to be covered */ - NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_SYSSPACE32, + NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_SYSSPACE, CAST_USER_ADDR_T(mntname), ctx); if ((error = namei(&nd))) { printf("devfs_kernel_mount: failed to find directory '%s', %d", @@ -414,9 +528,9 @@ devfs_kernel_mount(char * mntname) /* * Allocate and initialize the filesystem. */ - MALLOC_ZONE(mp, struct mount *, (u_long)sizeof(struct mount), + MALLOC_ZONE(mp, struct mount *, sizeof(struct mount), M_MOUNT, M_WAITOK); - bzero((char *)mp, (u_long)sizeof(struct mount)); + bzero((char *)mp, sizeof(struct mount)); /* Initialize the default IO constraints */ mp->mnt_maxreadcnt = mp->mnt_maxwritecnt = MAXPHYS; @@ -431,14 +545,11 @@ devfs_kernel_mount(char * mntname) TAILQ_INIT(&mp->mnt_newvnodes); (void)vfs_busy(mp, LK_NOWAIT); - mp->mnt_op = devfs_vfsp->vfc_vfsops; - mp->mnt_vtable = devfs_vfsp; - devfs_vfsp->vfc_refcount++; - devfs_vfsp->vfc_threadsafe = TRUE; - devfs_vfsp->vfc_64bitready = TRUE; + mp->mnt_op = &devfs_vfsops; + mp->mnt_vtable = vfsp; mp->mnt_flag = 0; - mp->mnt_flag |= devfs_vfsp->vfc_flags & MNT_VISFLAGMASK; - strlcpy(mp->mnt_vfsstat.f_fstypename, devfs_vfsp->vfc_name, MFSTYPENAMELEN); + mp->mnt_flag |= vfsp->vfc_flags & MNT_VISFLAGMASK; + strlcpy(mp->mnt_vfsstat.f_fstypename, vfsp->vfc_name, MFSTYPENAMELEN); vp->v_mountedhere = mp; mp->mnt_vnodecovered = vp; mp->mnt_vfsstat.f_owner = kauth_cred_getuid(kauth_cred_get()); diff --git a/bsd/miscfs/devfs/devfs_vnops.c b/bsd/miscfs/devfs/devfs_vnops.c index 6c7f5af9d..58746bb73 100644 --- a/bsd/miscfs/devfs/devfs_vnops.c +++ b/bsd/miscfs/devfs/devfs_vnops.c @@ -104,9 +104,15 @@ #endif #include "devfsdefs.h" +#include "devfs.h" + +#if FDESC +#include "fdesc.h" +#endif /* FDESC */ static int devfs_update(struct vnode *vp, struct timeval *access, struct timeval *modify); +void devfs_rele_node(devnode_t *); /* @@ -360,7 +366,10 @@ devfs_getattr(struct vnop_getattr_args *ap) switch (file_node->dn_type) { case DEV_DIR: - VATTR_RETURN(vap, va_rdev, (dev_t)file_node->dn_dvm); +#if FDESC + case DEV_DEVFD: /* Like a directory */ +#endif /* FDESC */ + VATTR_RETURN(vap, va_rdev, 0); vap->va_mode |= (S_IFDIR); break; case DEV_CDEV: @@ -383,7 +392,7 @@ devfs_getattr(struct vnop_getattr_args *ap) VATTR_RETURN(vap, va_uid, file_node->dn_uid); VATTR_RETURN(vap, va_gid, file_node->dn_gid); VATTR_RETURN(vap, va_fsid, (uintptr_t)file_node->dn_dvm); - VATTR_RETURN(vap, va_fileid, (uintptr_t)file_node); + VATTR_RETURN(vap, va_fileid, (uintptr_t)file_node->dn_ino); VATTR_RETURN(vap, va_data_size, file_node->dn_len); /* return an override block size (advisory) */ @@ -406,10 +415,16 @@ devfs_getattr(struct vnop_getattr_args *ap) VATTR_RETURN(vap, va_modify_time, file_node->dn_mtime); VATTR_RETURN(vap, va_access_time, file_node->dn_atime); VATTR_RETURN(vap, va_gen, 0); - VATTR_RETURN(vap, va_flags, 0); VATTR_RETURN(vap, va_filerev, 0); VATTR_RETURN(vap, va_acl, NULL); + /* Hide the root so Finder doesn't display it */ + if (vnode_isvroot(vp)) { + VATTR_RETURN(vap, va_flags, UF_HIDDEN); + } else { + VATTR_RETURN(vap, va_flags, 0); + } + DEVFS_UNLOCK(); return 0; @@ -565,12 +580,8 @@ devfsspec_close(struct vnop_close_args *ap) struct vnode * vp = ap->a_vp; register devnode_t * dnp; struct timeval now; - int ref = 1; - - if (vp->v_type == VBLK) - ref = 0; - if (vnode_isinuse(vp, ref)) { + if (vnode_isinuse(vp, 0)) { DEVFS_LOCK(); microtime(&now); dnp = VTODN(vp); @@ -636,8 +647,12 @@ devfs_write(struct vnop_write_args *ap) return 0; /* not reached */ } +/* + * Deviates from UFS naming convention because there is a KPI function + * called devfs_remove(). + */ static int -devfs_remove(struct vnop_remove_args *ap) +devfs_vnop_remove(struct vnop_remove_args *ap) /* struct vnop_remove_args { struct vnode *a_dvp; struct vnode *a_vp; @@ -1017,7 +1032,7 @@ devfs_rmdir(struct vnop_rmdir_args *ap) ra.a_flags = 0; /* XXX */ ra.a_context = ap->a_context; - return devfs_remove(&ra); + return devfs_vnop_remove(&ra); } @@ -1032,25 +1047,35 @@ devfs_symlink(struct vnop_symlink_args *ap) vfs_context_t a_context; } */ { - struct componentname * cnp = ap->a_cnp; - vfs_context_t ctx = cnp->cn_context; - struct proc *p = vfs_context_proc(ctx); + int error; + devdirent_t *newent; + + DEVFS_LOCK(); + error = devfs_make_symlink(VTODN(ap->a_dvp), ap->a_cnp->cn_nameptr, ap->a_vap->va_mode, ap->a_target, &newent); + + if (error == 0) { + error = devfs_dntovn(newent->de_dnp, ap->a_vpp, vfs_context_proc(ap->a_context)); + } + + DEVFS_UNLOCK(); + + return error; + +} + +/* Called with devfs locked */ +int +devfs_make_symlink(devnode_t *dir_p, char *name, int mode, char *target, devdirent_t **newent) +{ int error = 0; - devnode_t * dir_p; devnode_type_t typeinfo; devdirent_t * nm_p; devnode_t * dev_p; - struct vnode_attr * vap = ap->a_vap; - struct vnode * * vpp = ap->a_vpp; - typeinfo.Slnk.name = ap->a_target; - typeinfo.Slnk.namelen = strlen(ap->a_target); + typeinfo.Slnk.name = target; + typeinfo.Slnk.namelen = strlen(target); - DEVFS_LOCK(); - - dir_p = VTODN(ap->a_dvp); - - error = dev_add_entry(cnp->cn_nameptr, dir_p, DEV_SLNK, + error = dev_add_entry(name, dir_p, DEV_SLNK, &typeinfo, NULL, NULL, &nm_p); if (error) { goto failure; @@ -1058,12 +1083,14 @@ devfs_symlink(struct vnop_symlink_args *ap) dev_p = nm_p->de_dnp; dev_p->dn_uid = dir_p->dn_uid; dev_p->dn_gid = dir_p->dn_gid; - dev_p->dn_mode = vap->va_mode; + dev_p->dn_mode = mode; dn_copy_times(dev_p, dir_p); - error = devfs_dntovn(dev_p, vpp, p); + if (newent) { + *newent = nm_p; + } + failure: - DEVFS_UNLOCK(); return error; } @@ -1172,23 +1199,22 @@ devfs_readdir(struct vnop_readdir_args *ap) switch(nodenumber) { case 0: - dirent.d_fileno = (int32_t)(void *)dir_node; + dirent.d_fileno = dir_node->dn_ino; name = "."; dirent.d_namlen = 1; dirent.d_type = DT_DIR; break; case 1: if(dir_node->dn_typeinfo.Dir.parent) - dirent.d_fileno - = (int32_t)dir_node->dn_typeinfo.Dir.parent; + dirent.d_fileno = dir_node->dn_typeinfo.Dir.parent->dn_ino; else - dirent.d_fileno = (u_int32_t)dir_node; + dirent.d_fileno = dir_node->dn_ino; name = ".."; dirent.d_namlen = 2; dirent.d_type = DT_DIR; break; default: - dirent.d_fileno = (int32_t)(void *)name_node->de_dnp; + dirent.d_fileno = name_node->de_dnp->dn_ino; dirent.d_namlen = strlen(name_node->de_name); name = name_node->de_name; switch(name_node->de_dnp->dn_type) { @@ -1276,16 +1302,12 @@ devfs_reclaim(struct vnop_reclaim_args *ap) dnp = VTODN(vp); if (dnp) { - /* - * do the same as devfs_inactive in case it is not called - * before us (can that ever happen?) - */ + /* If this is a cloning device, it didn't have a dn_vn anyway */ dnp->dn_vn = NULL; - vp->v_data = NULL; + vnode_clearfsnode(vp); - if (dnp->dn_delete) { - devnode_free(dnp); - } + /* This could delete the node, if we are the last vnode */ + devfs_rele_node(dnp); } DEVFS_UNLOCK(); @@ -1352,7 +1374,18 @@ devs_vnop_pathconf( static int devfs_inactive(__unused struct vnop_inactive_args *ap) { - return (0); + vnode_t vp = ap->a_vp; + devnode_t *dnp = VTODN(vp); + + /* + * Cloned vnodes are not linked in anywhere, so they + * can just be recycled. + */ + if (dnp->dn_clone != NULL) { + vnode_recycle(vp); + } + + return (0); } /* @@ -1399,7 +1432,7 @@ static struct vnodeopv_entry_desc devfs_vnodeop_entries[] = { { &vnop_revoke_desc, (VOPFUNC)err_revoke }, /* revoke */ { &vnop_mmap_desc, (VOPFUNC)err_mmap }, /* mmap */ { &vnop_fsync_desc, (VOPFUNC)nop_fsync }, /* fsync */ - { &vnop_remove_desc, (VOPFUNC)devfs_remove }, /* remove */ + { &vnop_remove_desc, (VOPFUNC)devfs_vnop_remove }, /* remove */ { &vnop_link_desc, (VOPFUNC)devfs_link }, /* link */ { &vnop_rename_desc, (VOPFUNC)devfs_rename }, /* rename */ { &vnop_mkdir_desc, (VOPFUNC)devfs_mkdir }, /* mkdir */ @@ -1445,7 +1478,7 @@ static struct vnodeopv_entry_desc devfs_spec_vnodeop_entries[] = { { &vnop_revoke_desc, (VOPFUNC)spec_revoke }, /* revoke */ { &vnop_mmap_desc, (VOPFUNC)spec_mmap }, /* mmap */ { &vnop_fsync_desc, (VOPFUNC)spec_fsync }, /* fsync */ - { &vnop_remove_desc, (VOPFUNC)devfs_remove }, /* remove */ + { &vnop_remove_desc, (VOPFUNC)devfs_vnop_remove }, /* remove */ { &vnop_link_desc, (VOPFUNC)devfs_link }, /* link */ { &vnop_rename_desc, (VOPFUNC)spec_rename }, /* rename */ { &vnop_mkdir_desc, (VOPFUNC)spec_mkdir }, /* mkdir */ @@ -1472,3 +1505,30 @@ static struct vnodeopv_entry_desc devfs_spec_vnodeop_entries[] = { }; struct vnodeopv_desc devfs_spec_vnodeop_opv_desc = { &devfs_spec_vnodeop_p, devfs_spec_vnodeop_entries }; + + +#if FDESC +int (**devfs_devfd_vnodeop_p)(void*); +static struct vnodeopv_entry_desc devfs_devfd_vnodeop_entries[] = { + { &vnop_default_desc, (VOPFUNC)vn_default_error }, + { &vnop_lookup_desc, (VOPFUNC)devfs_devfd_lookup}, /* lookup */ + { &vnop_open_desc, (VOPFUNC)nop_open }, /* open */ + { &vnop_close_desc, (VOPFUNC)devfs_close }, /* close */ + { &vnop_getattr_desc, (VOPFUNC)devfs_getattr }, /* getattr */ + { &vnop_setattr_desc, (VOPFUNC)devfs_setattr }, /* setattr */ + { &vnop_revoke_desc, (VOPFUNC)err_revoke }, /* revoke */ + { &vnop_fsync_desc, (VOPFUNC)nop_fsync }, /* fsync */ + { &vnop_readdir_desc, (VOPFUNC)devfs_devfd_readdir}, /* readdir */ + { &vnop_inactive_desc, (VOPFUNC)devfs_inactive }, /* inactive */ + { &vnop_reclaim_desc, (VOPFUNC)devfs_reclaim }, /* reclaim */ + { &vnop_pathconf_desc, (VOPFUNC)devs_vnop_pathconf }, /* pathconf */ +#if CONFIG_MACF + { &vnop_setlabel_desc, (VOPFUNC)devfs_setlabel }, /* setlabel */ +#endif + { (struct vnodeop_desc*)NULL, (int(*)())NULL } +}; +struct vnodeopv_desc devfs_devfd_vnodeop_opv_desc = + { &devfs_devfd_vnodeop_p, devfs_devfd_vnodeop_entries}; +#endif /* FDESC */ + + diff --git a/bsd/miscfs/devfs/devfsdefs.h b/bsd/miscfs/devfs/devfsdefs.h index 701b8f43d..ce85cf853 100644 --- a/bsd/miscfs/devfs/devfsdefs.h +++ b/bsd/miscfs/devfs/devfsdefs.h @@ -74,6 +74,7 @@ #include +__BEGIN_DECLS #ifdef __APPLE_API_PRIVATE #define DEVMAXNAMESIZE 32 /* XXX */ #define DEVMAXPATHSIZE 128 /* XXX */ @@ -82,7 +83,10 @@ typedef enum { DEV_DIR, DEV_BDEV, DEV_CDEV, - DEV_SLNK + DEV_SLNK, +#if FDESC + DEV_DEVFD +#endif /* FDESC */ } devfstype_t; extern int (**devfs_vnodeop_p)(void *); /* our own vector array for dirs */ @@ -118,7 +122,18 @@ union devnode_type { struct devnode { devfstype_t dn_type; - int dn_flags; + /* + * Number of vnodes that point to this devnode. Note, we do not + * add another reference for a lookup which finds an existing + * vnode; a reference is added when a vnode is created and removed + * when a vnode is reclaimed. A devnode will not be freed while + * there are outstanding references. A refcount can be added to + * prevent the free of a devnode in situations where there is not + * guaranteed to be a vnode holding a ref, but it is important to + * make sure that a deferred delete eventually happens if it is + * blocked behind that reference. + */ + int dn_refcount; u_short dn_mode; uid_t dn_uid; gid_t dn_gid; @@ -134,19 +149,18 @@ struct devnode devnode_t * dn_nextsibling; /* the list of equivalent nodes */ devnode_t * * dn_prevsiblingp;/* backpointer for the above */ devnode_type_t dn_typeinfo; - int dn_delete; /* mark for deletion */ int dn_change; int dn_update; int dn_access; - int dn_lflags; + int dn_lflags; + ino_t dn_ino; int (*dn_clone)(dev_t dev, int action); /* get minor # */ struct label * dn_label; /* security label */ }; -#define DN_BUSY 0x01 #define DN_DELETE 0x02 #define DN_CREATE 0x04 -#define DN_CREATEWAIT 0x08 +#define DN_CREATEWAIT 0x08 struct devdirent @@ -210,49 +224,49 @@ struct devfsmount static __inline__ void DEVFS_INCR_ENTRIES(void) { - OSAddAtomic(1, (SInt32 *)&devfs_stats.entries); + OSAddAtomic(1, &devfs_stats.entries); } static __inline__ void DEVFS_DECR_ENTRIES(void) { - OSAddAtomic(-1, (SInt32 *)&devfs_stats.entries); + OSAddAtomic(-1, &devfs_stats.entries); } static __inline__ void DEVFS_INCR_NODES(void) { - OSAddAtomic(1, (SInt32 *)&devfs_stats.nodes); + OSAddAtomic(1, &devfs_stats.nodes); } static __inline__ void DEVFS_DECR_NODES(void) { - OSAddAtomic(-1, (SInt32 *)&devfs_stats.nodes); + OSAddAtomic(-1, &devfs_stats.nodes); } static __inline__ void DEVFS_INCR_MOUNTS(void) { - OSAddAtomic(1, (SInt32 *)&devfs_stats.mounts); + OSAddAtomic(1, &devfs_stats.mounts); } static __inline__ void DEVFS_DECR_MOUNTS(void) { - OSAddAtomic(-1, (SInt32 *)&devfs_stats.mounts); + OSAddAtomic(-1, &devfs_stats.mounts); } static __inline__ void DEVFS_INCR_STRINGSPACE(int space) { - OSAddAtomic(space, (SInt32 *)&devfs_stats.stringspace); + OSAddAtomic(space, &devfs_stats.stringspace); } static __inline__ void DEVFS_DECR_STRINGSPACE(int space) { - OSAddAtomic(-space, (SInt32 *)&devfs_stats.stringspace); + OSAddAtomic(-space, &devfs_stats.stringspace); } static __inline__ void @@ -285,5 +299,13 @@ dn_copy_times(devnode_t * target, devnode_t * source) target->dn_ctime = source->dn_ctime; return; } + +#ifdef BSD_KERNEL_PRIVATE +int devfs_make_symlink(devnode_t *dir_p, char *name, int mode, char *target, devdirent_t **newent); +#endif /* BSD_KERNEL_PRIVATE */ + #endif /* __APPLE_API_PRIVATE */ + +__END_DECLS + #endif /* __DEVFS_DEVFSDEFS_H__ */ diff --git a/bsd/miscfs/fdesc/fdesc.h b/bsd/miscfs/devfs/fdesc.h similarity index 93% rename from bsd/miscfs/fdesc/fdesc.h rename to bsd/miscfs/devfs/fdesc.h index 8cb66f68b..f4ff749ef 100644 --- a/bsd/miscfs/fdesc/fdesc.h +++ b/bsd/miscfs/devfs/fdesc.h @@ -72,9 +72,6 @@ #ifdef __APPLE_API_PRIVATE #ifdef KERNEL -struct fdescmount { - struct vnode *f_root; /* Root node */ -}; #define FD_ROOT 2 #define FD_DEVFD 3 @@ -85,17 +82,14 @@ struct fdescmount { #define FD_MAX 12 typedef enum { - Froot, - Fdevfd, Fdesc, - Flink } fdntype; struct fdescnode { LIST_ENTRY(fdescnode) fd_hash; /* Hash list */ struct vnode *fd_vnode; /* Back ptr to vnode */ fdntype fd_type; /* Type of this node */ - unsigned fd_fd; /* Fd to be dup'ed */ + long fd_fd; /* Fd to be dup'ed */ const char *fd_link; /* Link to fd/n */ int fd_ix; /* filesystem index */ }; @@ -103,13 +97,16 @@ struct fdescnode { #define VFSTOFDESC(mp) ((struct fdescmount *)((mp)->mnt_data)) #define VTOFDESC(vp) ((struct fdescnode *)(vp)->v_data) -extern int fdesc_allocvp(fdntype, int, struct mount *, struct vnode **, enum vtype); +__BEGIN_DECLS +extern int fdesc_allocvp(fdntype, int, struct mount *, struct vnode **, enum vtype, int); extern int fdesc_badop(void); extern int fdesc_getattr(struct vnop_getattr_args *ap); extern int fdesc_inactive(struct vnop_inactive_args *ap); -extern int fdesc_init(struct vfsconf *); +extern int devfs_fdesc_init(void); +extern int devfs_fdesc_makelinks(void); extern int fdesc_ioctl(struct vnop_ioctl_args *ap); -extern int fdesc_lookup(struct vnop_lookup_args *ap); +extern int devfs_devfd_lookup(struct vnop_lookup_args *ap); +extern int devfs_devfd_readdir(struct vnop_readdir_args *ap); extern int fdesc_open(struct vnop_open_args *ap); extern int fdesc_pathconf(struct vnop_pathconf_args *ap); extern int fdesc_read(struct vnop_read_args *ap); @@ -122,7 +119,10 @@ extern int fdesc_setattr(struct vnop_setattr_args *ap); extern int fdesc_write(struct vnop_write_args *ap); extern int (**fdesc_vnodeop_p)(void *); +extern int (**devfs_devfd_vnodeop_p)(void*); extern struct vfsops fdesc_vfsops; +__END_DECLS + #endif /* KERNEL */ #endif /* __APPLE_API_PRIVATE */ #endif /* __FDESC_FDESC_H__ */ diff --git a/bsd/miscfs/devfs/index.html b/bsd/miscfs/devfs/index.html deleted file mode 100644 index a201382c6..000000000 --- a/bsd/miscfs/devfs/index.html +++ /dev/null @@ -1,22 +0,0 @@ - -FTP Menu at ftp2.FreeBSD.ORG - -

FTP Menu

-
- -[TXT] README
- -[TXT] devfs_proto.h
- -[TXT] devfs_tree.c
- -[TXT] devfs_vfsops.c
- -[TXT] devfs_vnops.c
- -[TXT] devfsdefs.h
- -[TXT] reproto.sh
-

-http-gw version 3.2 / 0 - (17.254.0.77) diff --git a/bsd/miscfs/fdesc/fdesc_vfsops.c b/bsd/miscfs/fdesc/fdesc_vfsops.c deleted file mode 100644 index 9d419783c..000000000 --- a/bsd/miscfs/fdesc/fdesc_vfsops.c +++ /dev/null @@ -1,307 +0,0 @@ -/* - * Copyright (c) 2000-2007 Apple Inc. All rights reserved. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ - * - * This file contains Original Code and/or Modifications of Original Code - * as defined in and that are subject to the Apple Public Source License - * Version 2.0 (the 'License'). You may not use this file except in - * compliance with the License. The rights granted to you under the License - * may not be used to create, or enable the creation or redistribution of, - * unlawful or unlicensed copies of an Apple operating system, or to - * circumvent, violate, or enable the circumvention or violation of, any - * terms of an Apple operating system software license agreement. - * - * Please obtain a copy of the License at - * http://www.opensource.apple.com/apsl/ and read it before using this file. - * - * The Original Code and all software distributed under the License are - * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER - * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, - * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. - * Please see the License for the specific language governing rights and - * limitations under the License. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ - */ -/* Copyright (c) 1995 NeXT Computer, Inc. All Rights Reserved */ -/* - * Copyright (c) 1992, 1993, 1995 - * The Regents of the University of California. All rights reserved. - * - * This code is derived from software donated to Berkeley by - * Jan-Simon Pendry. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. All advertising materials mentioning features or use of this software - * must display the following acknowledgement: - * This product includes software developed by the University of - * California, Berkeley and its contributors. - * 4. Neither the name of the University nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - * @(#)fdesc_vfsops.c 8.10 (Berkeley) 5/14/95 - * - */ -/* -* /dev/fd Filesystem - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -/* - * Mount the per-process file descriptors (/dev/fd) - */ -static int -fdesc_mount(struct mount *mp, __unused vnode_t devvp, __unused user_addr_t data, __unused vfs_context_t context) -{ - int error = 0; - struct fdescmount *fmp; - struct vnode *rvp; - - /* - * Update is a no-op - */ - if (mp->mnt_flag & MNT_UPDATE) - return (ENOTSUP); - - error = fdesc_allocvp(Froot, FD_ROOT, mp, &rvp, VDIR); - if (error) - return (error); - - MALLOC(fmp, struct fdescmount *, sizeof(struct fdescmount), - M_UFSMNT, M_WAITOK); /* XXX */ - - vnode_setnoflush(rvp); - vnode_ref(rvp); - vnode_put(rvp); - - fmp->f_root = rvp; - /* XXX -- don't mark as local to work around fts() problems */ - /*mp->mnt_flag |= MNT_LOCAL;*/ - mp->mnt_data = (qaddr_t) fmp; - vfs_getnewfsid(mp); - - bzero(mp->mnt_vfsstat.f_mntfromname, MAXPATHLEN); - bcopy("fdesc", mp->mnt_vfsstat.f_mntfromname, sizeof("fdesc")); - return (0); -} - -static int -fdesc_start(__unused struct mount *mp, __unused int flags, __unused vfs_context_t context) -{ - return (0); -} - -static int -fdesc_unmount(struct mount *mp, int mntflags, __unused vfs_context_t context) -{ - int error; - int flags = 0; - int force = 0; - struct vnode *rvp = VFSTOFDESC(mp)->f_root; - - if (mntflags & MNT_FORCE) { - flags |= FORCECLOSE; - force = 1; - } - - if ( vnode_isinuse(rvp, 1) && !force ) - return (EBUSY); - if ( (error = vflush(mp, rvp, flags|SKIPSYSTEM)) && !force ) - return (error); - - /* - * And mark for recycle after we drop its reference; it away for future re-use - */ - vnode_recycle(rvp); - /* - * Release reference on underlying root vnode - */ - vnode_rele(rvp); - /* - * Finally, throw away the fdescmount structure - */ - _FREE(mp->mnt_data, M_UFSMNT); /* XXX */ - mp->mnt_data = NULL; - - return (0); -} - -int -fdesc_root(struct mount *mp, struct vnode **vpp, __unused vfs_context_t context) -{ - struct vnode *vp; - - /* - * Return locked reference to root. - */ - vp = VFSTOFDESC(mp)->f_root; - vnode_get(vp); - *vpp = vp; - return (0); -} - -#if 0 -/* - * XXX commented out in mount.h - */ -int -fdesc_statfs(__unused struct mount *mp, struct vfsstatfs *sbp, vfs_context_t context) -{ - proc_t p = vfs_context_proc(context); - struct filedesc *fdp; - int lim; - int i; - int last; - int freefd; - - /* - * Compute number of free file descriptors. - * [ Strange results will ensue if the open file - * limit is ever reduced below the current number - * of open files... ] - */ - lim = p->p_rlimit[RLIMIT_NOFILE].rlim_cur; - fdp = p->p_fd; - last = min(fdp->fd_nfiles, lim); - freefd = 0; - for (i = fdp->fd_freefile; i < last; i++) - if (fdp->fd_ofiles[i] == NULL && - !(fdp->fd_ofileflags[i] & UF_RESERVED)) - freefd++; - - /* - * Adjust for the fact that the fdesc array may not - * have been fully allocated yet. - */ - if (fdp->fd_nfiles < lim) - freefd += (lim - fdp->fd_nfiles); - - sbp->f_flags = 0; - sbp->f_bsize = DEV_BSIZE; - sbp->f_iosize = DEV_BSIZE; - sbp->f_blocks = (uint64_t)2; /* 1K to keep df happy */ - sbp->f_bfree = 0; - sbp->f_bavail = 0; - sbp->f_files = (uint64_t)((unsigned long)(lim + 1)); /* Allow for "." */ - sbp->f_ffree = (uint64_t)((unsigned long)freefd); /* See comments above */ - - return (0); -} -#endif /* 0 */ - -static int -fdesc_vfs_getattr(__unused mount_t mp, struct vfs_attr *fsap, vfs_context_t context) -{ - VFSATTR_RETURN(fsap, f_bsize, DEV_BSIZE); - VFSATTR_RETURN(fsap, f_iosize, DEV_BSIZE); - VFSATTR_RETURN(fsap, f_blocks, 2); - VFSATTR_RETURN(fsap, f_bfree, 0); - VFSATTR_RETURN(fsap, f_bavail, 0); - VFSATTR_RETURN(fsap, f_fssubtype, 0); - - if (VFSATTR_IS_ACTIVE(fsap, f_objcount) || - VFSATTR_IS_ACTIVE(fsap, f_maxobjcount) || - VFSATTR_IS_ACTIVE(fsap, f_files) || - VFSATTR_IS_ACTIVE(fsap, f_ffree)) - { - proc_t p = vfs_context_proc(context); - struct filedesc *fdp; - int lim; - int i; - int last; - int freefd; - - /* - * Compute number of free file descriptors. - * [ Strange results will ensue if the open file - * limit is ever reduced below the current number - * of open files... ] - */ - lim = p->p_rlimit[RLIMIT_NOFILE].rlim_cur; - fdp = p->p_fd; - last = min(fdp->fd_nfiles, lim); - freefd = 0; - for (i = fdp->fd_freefile; i < last; i++) - if (fdp->fd_ofiles[i] == NULL && - !(fdp->fd_ofileflags[i] & UF_RESERVED)) - freefd++; - - /* - * Adjust for the fact that the fdesc array may not - * have been fully allocated yet. - */ - if (fdp->fd_nfiles < lim) - freefd += (lim - fdp->fd_nfiles); - - VFSATTR_RETURN(fsap, f_objcount, lim+1); - VFSATTR_RETURN(fsap, f_maxobjcount, lim+1); - VFSATTR_RETURN(fsap, f_files, lim+1); - VFSATTR_RETURN(fsap, f_ffree, freefd); - } - - return 0; -} - -static int -fdesc_sync(__unused struct mount *mp, __unused int waitfor, __unused vfs_context_t context) -{ - - return (0); -} - -#define fdesc_fhtovp (int (*) (mount_t, int, unsigned char *, vnode_t *, vfs_context_t))eopnotsupp -#define fdesc_sysctl (int (*) (int *, u_int, user_addr_t, size_t *, user_addr_t, size_t, vfs_context_t))eopnotsupp -#define fdesc_vget (int (*) (mount_t, ino64_t, vnode_t *, vfs_context_t))eopnotsupp -#define fdesc_vptofh (int (*) (vnode_t, int *, unsigned char *, vfs_context_t))eopnotsupp - -struct vfsops fdesc_vfsops = { - fdesc_mount, - fdesc_start, - fdesc_unmount, - fdesc_root, - NULL, /* quotactl */ - fdesc_vfs_getattr, -/* fdesc_statfs, XXX commented out in mount.h */ - fdesc_sync, - fdesc_vget, - fdesc_fhtovp, - fdesc_vptofh, - fdesc_init, - fdesc_sysctl, - NULL, - {NULL} -}; diff --git a/bsd/isofs/Makefile b/bsd/miscfs/fifofs/Makefile similarity index 84% rename from bsd/isofs/Makefile rename to bsd/miscfs/fifofs/Makefile index fc780d6f1..ff18c9388 100644 --- a/bsd/isofs/Makefile +++ b/bsd/miscfs/fifofs/Makefile @@ -8,25 +8,23 @@ include $(MakeInc_cmd) include $(MakeInc_def) INSTINC_SUBDIRS = \ - cd9660 INSTINC_SUBDIRS_PPC = \ INSTINC_SUBDIRS_I386 = \ EXPINC_SUBDIRS = \ - cd9660 EXPINC_SUBDIRS_PPC = \ EXPINC_SUBDIRS_I386 = \ -SETUP_SUBDIRS = \ +DATAFILES = \ + fifo.h -COMP_SUBDIRS = \ - -INST_SUBDIRS = \ +EXPORT_MI_LIST = ${DATAFILES} +EXPORT_MI_DIR = miscfs/fifofs include $(MakeInc_rule) include $(MakeInc_dir) diff --git a/bsd/miscfs/fifofs/fifo.h b/bsd/miscfs/fifofs/fifo.h index 97cb8eb34..d60c05874 100644 --- a/bsd/miscfs/fifofs/fifo.h +++ b/bsd/miscfs/fifofs/fifo.h @@ -63,6 +63,8 @@ #ifndef __FIFOFS_FOFO_H__ #define __FIFOFS_FOFO_H__ +__BEGIN_DECLS + #ifdef BSD_KERNEL_PRIVATE @@ -83,25 +85,22 @@ struct fifoinfo { #define FIFO_CREATEWAIT 2 #define FIFO_CREATED 4 +int fifo_close_internal (vnode_t, int, vfs_context_t, int); +int fifo_freespace(struct vnode *vp, long *count); +int fifo_charcount(struct vnode *vp, int *count); +#endif /* BSD_KERNEL_PRIVATE */ +#ifdef KERNEL /* * Prototypes for fifo operations on vnodes. */ int fifo_ebadf(void *); -int fifo_lookup (struct vnop_lookup_args *); #define fifo_create (int (*) (struct vnop_create_args *))err_create #define fifo_mknod (int (*) (struct vnop_mknod_args *))err_mknod -int fifo_open (struct vnop_open_args *); -int fifo_close (struct vnop_close_args *); -int fifo_close_internal (vnode_t, int, vfs_context_t, int); #define fifo_access (int (*) (struct vnop_access_args *))fifo_ebadf #define fifo_getattr (int (*) (struct vnop_getattr_args *))fifo_ebadf #define fifo_setattr (int (*) (struct vnop_setattr_args *))fifo_ebadf -int fifo_read (struct vnop_read_args *); -int fifo_write (struct vnop_write_args *); -int fifo_ioctl (struct vnop_ioctl_args *); -int fifo_select (struct vnop_select_args *); #define fifo_revoke nop_revoke #define fifo_mmap (int (*) (struct vnop_mmap_args *))err_mmap #define fifo_fsync (int (*) (struct vnop_fsync_args *))nullop @@ -113,16 +112,26 @@ int fifo_select (struct vnop_select_args *); #define fifo_symlink (int (*) (struct vnop_symlink_args *))err_symlink #define fifo_readdir (int (*) (struct vnop_readdir_args *))err_readdir #define fifo_readlink (int (*) (struct vnop_readlink_args *))err_readlink -int fifo_inactive (struct vnop_inactive_args *); #define fifo_reclaim (int (*) (struct vnop_reclaim_args *))nullop #define fifo_strategy (int (*) (struct vnop_strategy_args *))err_strategy -int fifo_pathconf (struct vnop_pathconf_args *); -int fifo_advlock (struct vnop_advlock_args *); #define fifo_valloc (int (*) (struct vnop_valloc_args *))err_valloc #define fifo_vfree (int (*) (struct vnop_vfree_args *))err_vfree #define fifo_bwrite (int (*) (struct vnop_bwrite_args *))nullop #define fifo_blktooff (int (*) (struct vnop_blktooff_args *))err_blktooff -#endif /* BSD_KERNEL_PRIVATE */ +int fifo_lookup (struct vnop_lookup_args *); +int fifo_open (struct vnop_open_args *); +int fifo_close (struct vnop_close_args *); +int fifo_read (struct vnop_read_args *); +int fifo_write (struct vnop_write_args *); +int fifo_ioctl (struct vnop_ioctl_args *); +int fifo_select (struct vnop_select_args *); +int fifo_inactive (struct vnop_inactive_args *); +int fifo_pathconf (struct vnop_pathconf_args *); +int fifo_advlock (struct vnop_advlock_args *); + +#endif /* KERNEL */ + +__END_DECLS #endif /* __FIFOFS_FOFO_H__ */ diff --git a/bsd/miscfs/fifofs/fifo_vnops.c b/bsd/miscfs/fifofs/fifo_vnops.c index 40fb3727a..c1af000e6 100644 --- a/bsd/miscfs/fifofs/fifo_vnops.c +++ b/bsd/miscfs/fifofs/fifo_vnops.c @@ -174,13 +174,11 @@ fifo_open(struct vnop_open_args *ap) if ( (error = socreate(AF_LOCAL, &rso, SOCK_STREAM, 0)) ) { goto bad1; } - fip->fi_readsock = rso; if ( (error = socreate(AF_LOCAL, &wso, SOCK_STREAM, 0)) ) { (void)soclose(rso); goto bad1; } - fip->fi_writesock = wso; if ( (error = soconnect2(wso, rso)) ) { (void)soclose(wso); @@ -189,22 +187,22 @@ fifo_open(struct vnop_open_args *ap) } fip->fi_readers = fip->fi_writers = 0; - socket_lock(wso, 1); + /* Lock ordering between wso and rso does not matter here + * because they are just created and no one has a reference to them + */ + socket_lock(wso, 1); wso->so_state |= SS_CANTRCVMORE; wso->so_snd.sb_lowat = PIPE_BUF; -#if 0 - /* Because all the unp is protected by single mutex - * doing it in two step may actually cause problems - * as it opens up window between the drop and acquire - */ - socket_unlock(wso, 1); + socket_unlock(wso, 1); - socket_lock(rso, 1); -#endif + socket_lock(rso, 1); rso->so_state |= SS_CANTSENDMORE; - socket_unlock(wso, 1); + socket_unlock(rso, 1); vnode_lock(vp); + fip->fi_readsock = rso; + fip->fi_writesock = wso; + fip->fi_flags |= FIFO_CREATED; fip->fi_flags &= ~FIFO_INCREATE; @@ -301,7 +299,8 @@ fifo_read(struct vnop_read_args *ap) { struct uio *uio = ap->a_uio; struct socket *rso = ap->a_vp->v_fifoinfo->fi_readsock; - int error, startresid; + user_ssize_t startresid; + int error; int rflags; #if DIAGNOSTIC @@ -313,7 +312,6 @@ fifo_read(struct vnop_read_args *ap) rflags = (ap->a_ioflag & IO_NDELAY) ? MSG_NBIO : 0; - // LP64todo - fix this! startresid = uio_resid(uio); /* fifo conformance - if we have a reader open on the fifo but no @@ -500,7 +498,6 @@ fifo_close_internal(vnode_t vp, int fflag, __unused vfs_context_t context, int l return (error2); } -#if !CONFIG_NO_PRINTF_STRINGS /* * Print out internal contents of a fifo vnode. */ @@ -512,7 +509,6 @@ fifo_printinfo(struct vnode *vp) printf(", fifo with %ld readers and %ld writers", fip->fi_readers, fip->fi_writers); } -#endif /* !CONFIG_NO_PRINTF_STRINGS */ /* * Return POSIX pathconf information applicable to fifo's. @@ -556,3 +552,26 @@ fifo_advlock(__unused struct vnop_advlock_args *ap) return (ENOTSUP); } + +/* You'd certainly better have an iocount on the vnode! */ +int +fifo_freespace(struct vnode *vp, long *count) +{ + struct socket *rsock; + rsock = vp->v_fifoinfo->fi_readsock; + socket_lock(rsock, 1); + *count = sbspace(&rsock->so_rcv); + socket_unlock(rsock, 1); + return 0; +} + +int +fifo_charcount(struct vnode *vp, int *count) +{ + int mcount; + int err = sock_ioctl(vp->v_fifoinfo->fi_readsock, FIONREAD, (void*)&mcount); + if (err == 0) { + *count = mcount; + } + return err; +} diff --git a/bsd/miscfs/nullfs/null_subr.c b/bsd/miscfs/nullfs/null_subr.c index c97977a48..d061bb77f 100644 --- a/bsd/miscfs/nullfs/null_subr.c +++ b/bsd/miscfs/nullfs/null_subr.c @@ -90,7 +90,7 @@ */ #define NULL_NHASH(vp) \ - (&null_node_hashtbl[(((u_long)vp)>>LOG2_SIZEVNODE) & null_node_hash]) + (&null_node_hashtbl[(((uintptr_t)vp)>>LOG2_SIZEVNODE) & null_node_hash]) LIST_HEAD(null_node_hashhead, null_node) *null_node_hashtbl; u_long null_node_hash; @@ -274,9 +274,9 @@ null_checkvp(vp, fil, lno) #endif if (a->null_lowervp == NULL) { /* Should never happen */ - int i; u_long *p; + int i; uint32_t *p; printf("vp = %x, ZERO ptr\n", vp); - for (p = (u_long *) a, i = 0; i < 8; i++) + for (p = (uint32_t *) a, i = 0; i < 8; i++) printf(" %x", p[i]); printf("\n"); /* wait for debugger */ @@ -284,9 +284,9 @@ null_checkvp(vp, fil, lno) panic("null_checkvp"); } if (a->null_lowervp->v_usecount < 1) { - int i; u_long *p; + int i; uint32_t *p; printf("vp = %x, unref'ed lowervp\n", vp); - for (p = (u_long *) a, i = 0; i < 8; i++) + for (p = (uint32_t *) a, i = 0; i < 8; i++) printf(" %x", p[i]); printf("\n"); /* wait for debugger */ diff --git a/bsd/miscfs/specfs/spec_vnops.c b/bsd/miscfs/specfs/spec_vnops.c index 6c26b1799..cbd0de6d9 100644 --- a/bsd/miscfs/specfs/spec_vnops.c +++ b/bsd/miscfs/specfs/spec_vnops.c @@ -86,9 +86,10 @@ #include /* XXX following three prototypes should be in a header file somewhere */ -extern int isdisk(dev_t dev, int type); extern dev_t chrtoblk(dev_t dev); extern int iskmemdev(dev_t dev); +extern int bpfkqfilter(dev_t dev, struct knote *kn); +extern int ptsd_kqfilter(dev_t dev, struct knote *kn); struct vnode *speclisth[SPECHSZ]; @@ -379,7 +380,6 @@ spec_read(struct vnop_read_args *ap) buf_brelse(bp); return (error); } - // LP64todo - fix this! n = min((unsigned)(n - on), uio_resid(uio)); error = uiomove((char *)0 + buf_dataptr(bp) + on, n, uio); @@ -409,7 +409,6 @@ spec_write(struct vnop_write_args *ap) daddr64_t bn; int bsize, blkmask, bscale; int io_sync; - int io_size; int devBlockSize=0; int n, on; int error = 0; @@ -436,8 +435,6 @@ spec_write(struct vnop_write_args *ap) return (EINVAL); io_sync = (ap->a_ioflag & IO_SYNC); - // LP64todo - fix this! - io_size = uio_resid(uio); dev = (vp->v_rdev); @@ -454,7 +451,6 @@ spec_write(struct vnop_write_args *ap) bn = (daddr64_t)((uio->uio_offset / devBlockSize) &~ blkmask); on = uio->uio_offset % bsize; - // LP64todo - fix this! n = min((unsigned)(bsize - on), uio_resid(uio)); /* @@ -522,28 +518,31 @@ spec_ioctl(struct vnop_ioctl_args *ap) { proc_t p = vfs_context_proc(ap->a_context); dev_t dev = ap->a_vp->v_rdev; + int retval = 0; + + KERNEL_DEBUG_CONSTANT(FSDBG_CODE(DBG_IOCTL, 0) | DBG_FUNC_START, + (unsigned int)dev, (unsigned int)ap->a_command, (unsigned int)ap->a_fflag, (unsigned int)ap->a_vp->v_type, 0); switch (ap->a_vp->v_type) { case VCHR: - return ((*cdevsw[major(dev)].d_ioctl)(dev, ap->a_command, ap->a_data, - ap->a_fflag, p)); + retval = (*cdevsw[major(dev)].d_ioctl)(dev, ap->a_command, ap->a_data, + ap->a_fflag, p); + break; case VBLK: - if (ap->a_command == 0 && (unsigned int)ap->a_data == B_TAPE) { - if (bdevsw[major(dev)].d_type == D_TAPE) - return (0); - else - return (1); - } - return ((*bdevsw[major(dev)].d_ioctl)(dev, ap->a_command, ap->a_data, - ap->a_fflag, p)); + retval = (*bdevsw[major(dev)].d_ioctl)(dev, ap->a_command, ap->a_data, + ap->a_fflag, p); + break; default: panic("spec_ioctl"); /* NOTREACHED */ } - return (0); + KERNEL_DEBUG_CONSTANT(FSDBG_CODE(DBG_IOCTL, 0) | DBG_FUNC_END, + (unsigned int)dev, (unsigned int)ap->a_command, (unsigned int)ap->a_fflag, retval, 0); + + return (retval); } int @@ -563,6 +562,29 @@ spec_select(struct vnop_select_args *ap) } } +int +spec_kqfilter(vnode_t vp, struct knote *kn) +{ + dev_t dev; + int err = EINVAL; + + /* + * For a few special kinds of devices, we can attach knotes. + * Each filter function must check whether the dev type matches it. + */ + dev = vnode_specrdev(vp); + + if (vnode_istty(vp)) { + /* We can hook into the slave side of a tty */ + err = ptsd_kqfilter(dev, kn); + } else { + /* Try a bpf device, as defined in bsd/net/bpf.c */ + err = bpfkqfilter(dev, kn); + } + + return err; +} + /* * Synch buffers associated with a block device */ @@ -574,7 +596,7 @@ spec_fsync_internal(vnode_t vp, int waitfor, __unused vfs_context_t context) /* * Flush all dirty buffers associated with a block device. */ - buf_flushdirtyblks(vp, waitfor == MNT_WAIT, 0, "spec_fsync"); + buf_flushdirtyblks(vp, (waitfor == MNT_WAIT || waitfor == MNT_DWAIT), 0, "spec_fsync"); return (0); } @@ -598,10 +620,14 @@ void IOSleep(int); #define LOWPRI_MAX_WAITING_MSECS 200 #define LOWPRI_SLEEP_INTERVAL 5 + struct _throttle_io_info_t { struct timeval last_normal_IO_timestamp; - struct timeval last_IO_timestamp; + struct timeval last_IO_timestamp; SInt32 numthreads_throttling; + SInt32 refcnt; + SInt32 alloc; + }; struct _throttle_io_info_t _throttle_io_info[LOWPRI_MAX_NUM_DEV]; @@ -610,44 +636,183 @@ int lowpri_IO_window_msecs_inc = LOWPRI_WINDOW_MSECS_INC; int lowpri_max_window_msecs = LOWPRI_MAX_WINDOW_MSECS; int lowpri_max_waiting_msecs = LOWPRI_MAX_WAITING_MSECS; +#if 0 +#define DEBUG_ALLOC_THROTTLE_INFO(format, debug_info, args...) \ + do { \ + if ((debug_info)->alloc) \ + printf("%s: "format, __FUNCTION__, ## args); \ + } while(0) + +#else +#define DEBUG_ALLOC_THROTTLE_INFO(format, debug_info, args...) +#endif + SYSCTL_INT(_debug, OID_AUTO, lowpri_IO_initial_window_msecs, CTLFLAG_RW, &lowpri_IO_initial_window_msecs, LOWPRI_INITIAL_WINDOW_MSECS, ""); SYSCTL_INT(_debug, OID_AUTO, lowpri_IO_window_inc, CTLFLAG_RW, &lowpri_IO_window_msecs_inc, LOWPRI_INITIAL_WINDOW_MSECS, ""); SYSCTL_INT(_debug, OID_AUTO, lowpri_max_window_msecs, CTLFLAG_RW, &lowpri_max_window_msecs, LOWPRI_INITIAL_WINDOW_MSECS, ""); SYSCTL_INT(_debug, OID_AUTO, lowpri_max_waiting_msecs, CTLFLAG_RW, &lowpri_max_waiting_msecs, LOWPRI_INITIAL_WINDOW_MSECS, ""); +/* + * Release the reference and if the item was allocated and this is the last + * reference then free it. + * + * This routine always returns the old value. + */ +static int +throttle_info_rel(struct _throttle_io_info_t *info) +{ + SInt32 oldValue = OSDecrementAtomic(&info->refcnt); + + DEBUG_ALLOC_THROTTLE_INFO("refcnt = %d info = %p\n", + info, (int)(oldValue -1), info ); + + /* The reference count just went negative, very bad */ + if (oldValue == 0) + panic("throttle info ref cnt went negative!"); + + /* + * Once reference count is zero, no one else should be able to take a + * reference + */ + if ((info->refcnt == 0) && (info->alloc)) { + DEBUG_ALLOC_THROTTLE_INFO("Freeing info = %p\n", info, info ); + FREE(info, M_TEMP); + } + return oldValue; +} + +/* + * Just take a reference on the throttle info structure. + * + * This routine always returns the old value. + */ +static SInt32 +throttle_info_ref(struct _throttle_io_info_t *info) +{ + SInt32 oldValue = OSIncrementAtomic(&info->refcnt); + + DEBUG_ALLOC_THROTTLE_INFO("refcnt = %d info = %p\n", + info, (int)(oldValue -1), info ); + /* Allocated items should never have a reference of zero */ + if (info->alloc && (oldValue == 0)) + panic("Taking a reference without calling create throttle info!\n"); + + return oldValue; +} + +/* + * KPI routine + * + * Create and take a reference on a throttle info structure and return a + * pointer for the file system to use when calling throttle_info_update. + * Calling file system must have a matching release for every create. + */ +void * +throttle_info_create(void) +{ + struct _throttle_io_info_t *info; + + MALLOC(info, struct _throttle_io_info_t *, sizeof(*info), M_TEMP, M_ZERO | M_WAITOK); + /* Should never happen but just in case */ + if (info == NULL) + return NULL; + /* Mark that this one was allocated and needs to be freed */ + DEBUG_ALLOC_THROTTLE_INFO("Creating info = %p\n", info, info ); + info->alloc = TRUE; + /* Take a reference */ + OSIncrementAtomic(&info->refcnt); + return info; +} + +/* + * KPI routine + * + * Release the throttle info pointer if all the reference are gone. Should be + * called to release reference taken by throttle_info_create + */ +void +throttle_info_release(void *throttle_info) +{ + DEBUG_ALLOC_THROTTLE_INFO("Releaseing info = %p\n", + (struct _throttle_io_info_t *)throttle_info, + (struct _throttle_io_info_t *)throttle_info); + if (throttle_info) /* Just to be careful */ + throttle_info_rel(throttle_info); +} + +/* + * KPI routine + * + * File Systems that create an info structure, need to call this routine in + * their mount routine (used by cluster code). File Systems that call this in + * their mount routines must call throttle_info_mount_rel in their unmount + * routines. + */ +void +throttle_info_mount_ref(mount_t mp, void *throttle_info) +{ + if ((throttle_info == NULL) || (mp == NULL)) + return; + throttle_info_ref(throttle_info); + /* We already have a reference release it before adding the new one */ + if (mp->mnt_throttle_info) + throttle_info_rel(mp->mnt_throttle_info); + mp->mnt_throttle_info = throttle_info; +} + +/* + * KPI routine + * + * File Systems that throttle_info_mount_ref, must call this routine in their + * umount routine. + */ +void +throttle_info_mount_rel(mount_t mp) +{ + if (mp->mnt_throttle_info) + throttle_info_rel(mp->mnt_throttle_info); + mp->mnt_throttle_info = NULL; +} + void throttle_info_get_last_io_time(mount_t mp, struct timeval *tv) { - size_t devbsdunit; - - devbsdunit = mp->mnt_devbsdunit; + struct _throttle_io_info_t *info; - if (devbsdunit < LOWPRI_MAX_NUM_DEV) { - *tv = _throttle_io_info[devbsdunit].last_IO_timestamp; - } else { - memset(tv, 0, sizeof(*tv)); - } + if (mp == NULL) + info = &_throttle_io_info[LOWPRI_MAX_NUM_DEV - 1]; + else if (mp->mnt_throttle_info == NULL) + info = &_throttle_io_info[mp->mnt_devbsdunit]; + else + info = mp->mnt_throttle_info; + + *tv = info->last_IO_timestamp; } void update_last_io_time(mount_t mp) { - size_t devbsdunit; + struct _throttle_io_info_t *info; - devbsdunit = mp->mnt_devbsdunit; + if (mp == NULL) + info = &_throttle_io_info[LOWPRI_MAX_NUM_DEV - 1]; + else if (mp->mnt_throttle_info == NULL) + info = &_throttle_io_info[mp->mnt_devbsdunit]; + else + info = mp->mnt_throttle_info; - if (devbsdunit < LOWPRI_MAX_NUM_DEV) { - microuptime(&_throttle_io_info[devbsdunit].last_IO_timestamp); - } + microuptime(&info->last_IO_timestamp); } -int throttle_io_will_be_throttled(int lowpri_window_msecs, size_t devbsdunit) +static int +throttle_io_will_be_throttled_internal(int lowpri_window_msecs, void * throttle_info) { + struct _throttle_io_info_t *info = throttle_info; struct timeval elapsed; int elapsed_msecs; microuptime(&elapsed); - timevalsub(&elapsed, &_throttle_io_info[devbsdunit].last_normal_IO_timestamp); + timevalsub(&elapsed, &info->last_normal_IO_timestamp); elapsed_msecs = elapsed.tv_sec * 1000 + elapsed.tv_usec / 1000; if (lowpri_window_msecs == -1) // use the max waiting time @@ -656,26 +821,49 @@ int throttle_io_will_be_throttled(int lowpri_window_msecs, size_t devbsdunit) return elapsed_msecs < lowpri_window_msecs; } +/* + * If we have a mount point and it has a throttle info pointer then + * use it to do the check, otherwise use the device unit number to find + * the correct throttle info array element. + */ +int +throttle_io_will_be_throttled(int lowpri_window_msecs, mount_t mp) +{ + void *info; + + /* Should we just return zero if no mount point */ + if (mp == NULL) + info = &_throttle_io_info[LOWPRI_MAX_NUM_DEV - 1]; + else if (mp->mnt_throttle_info == NULL) + info = &_throttle_io_info[mp->mnt_devbsdunit]; + else + info = mp->mnt_throttle_info; + return throttle_io_will_be_throttled_internal(lowpri_window_msecs, info); +} + void throttle_lowpri_io(boolean_t ok_to_sleep) { int i; int max_try_num; struct uthread *ut; + struct _throttle_io_info_t *info; ut = get_bsdthread_info(current_thread()); - if (ut->uu_lowpri_window == 0) - return; - - max_try_num = lowpri_max_waiting_msecs / LOWPRI_SLEEP_INTERVAL * MAX(1, _throttle_io_info[ut->uu_devbsdunit].numthreads_throttling); + if ((ut->uu_lowpri_window == 0) || (ut->uu_throttle_info == NULL)) + goto done; + info = ut->uu_throttle_info; KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 97)) | DBG_FUNC_START, - ut->uu_lowpri_window, 0, 0, 0, 0); + ut->uu_lowpri_window, ok_to_sleep, 0, 0, 0); if (ok_to_sleep == TRUE) { + max_try_num = lowpri_max_waiting_msecs / LOWPRI_SLEEP_INTERVAL * MAX(1, info->numthreads_throttling); + for (i=0; iuu_lowpri_window, ut->uu_devbsdunit)) { + if (throttle_io_will_be_throttled_internal(ut->uu_lowpri_window, info)) { IOSleep(LOWPRI_SLEEP_INTERVAL); + DEBUG_ALLOC_THROTTLE_INFO("sleeping because of info = %p\n", info, info ); } else { break; } @@ -684,12 +872,16 @@ void throttle_lowpri_io(boolean_t ok_to_sleep) KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 97)) | DBG_FUNC_END, ut->uu_lowpri_window, i*5, 0, 0, 0); SInt32 oldValue; - oldValue = OSDecrementAtomic(&_throttle_io_info[ut->uu_devbsdunit].numthreads_throttling); - ut->uu_lowpri_window = 0; + oldValue = OSDecrementAtomic(&info->numthreads_throttling); if (oldValue <= 0) { panic("%s: numthreads negative", __func__); } +done: + ut->uu_lowpri_window = 0; + if (ut->uu_throttle_info) + throttle_info_rel(ut->uu_throttle_info); + ut->uu_throttle_info = NULL; } int throttle_get_io_policy(struct uthread **ut) @@ -711,16 +903,103 @@ int throttle_get_io_policy(struct uthread **ut) return policy; } +void throttle_info_update(void *throttle_info, int flags) +{ + struct _throttle_io_info_t *info = throttle_info; + struct uthread *ut; + int policy; + int is_throttleable_io = 0; + int is_passive_io = 0; + SInt32 oldValue; + + if (!lowpri_IO_initial_window_msecs || (info == NULL)) + return; + policy = throttle_get_io_policy(&ut); + + switch (policy) { + case IOPOL_DEFAULT: + case IOPOL_NORMAL: + break; + case IOPOL_THROTTLE: + is_throttleable_io = 1; + break; + case IOPOL_PASSIVE: + is_passive_io = 1; + break; + default: + printf("unknown I/O policy %d", policy); + break; + } + + if (!is_throttleable_io && ISSET(flags, B_PASSIVE)) + is_passive_io |= 1; + + if (!is_throttleable_io) { + if (!is_passive_io){ + microuptime(&info->last_normal_IO_timestamp); + } + } else if (ut) { + /* + * I'd really like to do the IOSleep here, but + * we may be holding all kinds of filesystem related locks + * and the pages for this I/O marked 'busy'... + * we don't want to cause a normal task to block on + * one of these locks while we're throttling a task marked + * for low priority I/O... we'll mark the uthread and + * do the delay just before we return from the system + * call that triggered this I/O or from vnode_pagein + */ + if (ut->uu_lowpri_window == 0) { + ut->uu_throttle_info = info; + throttle_info_ref(ut->uu_throttle_info); + DEBUG_ALLOC_THROTTLE_INFO("updating info = %p\n", info, info ); + + oldValue = OSIncrementAtomic(&info->numthreads_throttling); + if (oldValue < 0) { + panic("%s: numthreads negative", __func__); + } + ut->uu_lowpri_window = lowpri_IO_initial_window_msecs; + ut->uu_lowpri_window += lowpri_IO_window_msecs_inc * oldValue; + } else { + /* The thread sends I/Os to different devices within the same system call */ + if (ut->uu_throttle_info != info) { + struct _throttle_io_info_t *old_info = ut->uu_throttle_info; + + // keep track of the numthreads in the right device + OSDecrementAtomic(&old_info->numthreads_throttling); + OSIncrementAtomic(&info->numthreads_throttling); + + DEBUG_ALLOC_THROTTLE_INFO("switching from info = %p\n", old_info, old_info ); + DEBUG_ALLOC_THROTTLE_INFO("switching to info = %p\n", info, info ); + /* This thread no longer needs a reference on that throttle info */ + throttle_info_rel(ut->uu_throttle_info); + ut->uu_throttle_info = info; + /* Need to take a reference on this throttle info */ + throttle_info_ref(ut->uu_throttle_info); + } + int numthreads = MAX(1, info->numthreads_throttling); + ut->uu_lowpri_window += lowpri_IO_window_msecs_inc * numthreads; + if (ut->uu_lowpri_window > lowpri_max_window_msecs * numthreads) + ut->uu_lowpri_window = lowpri_max_window_msecs * numthreads; + } + } +} + int spec_strategy(struct vnop_strategy_args *ap) { buf_t bp; int bflags; + int policy; dev_t bdev; + uthread_t ut; + size_t devbsdunit; + mount_t mp; bp = ap->a_bp; bdev = buf_device(bp); bflags = buf_flags(bp); + mp = buf_vnode(bp)->v_mount; if (kdebug_enable) { int code = 0; @@ -736,96 +1015,36 @@ spec_strategy(struct vnop_strategy_args *ap) code |= DKIO_PAGING; KERNEL_DEBUG_CONSTANT(FSDBG_CODE(DBG_DKRW, code) | DBG_FUNC_NONE, - (unsigned int)bp, bdev, (int)buf_blkno(bp), buf_count(bp), 0); + bp, bdev, (int)buf_blkno(bp), buf_count(bp), 0); } - if (((bflags & (B_PAGEIO | B_READ)) == (B_PAGEIO | B_READ)) && - (buf_vnode(bp)->v_mount->mnt_kern_flag & MNTK_ROOTDEV)) + if (((bflags & (B_IOSTREAMING | B_PAGEIO | B_READ)) == (B_PAGEIO | B_READ)) && + mp && (mp->mnt_kern_flag & MNTK_ROOTDEV)) hard_throttle_on_root = 1; - if (lowpri_IO_initial_window_msecs) { - struct uthread *ut; - int policy; - int is_throttleable_io = 0; - int is_passive_io = 0; - size_t devbsdunit; - SInt32 oldValue; - - policy = throttle_get_io_policy(&ut); - switch (policy) { - case IOPOL_DEFAULT: - case IOPOL_NORMAL: - break; - case IOPOL_THROTTLE: - is_throttleable_io = 1; - break; - case IOPOL_PASSIVE: - is_passive_io = 1; - break; - default: - printf("unknown I/O policy %d", policy); - break; - } - - if (!is_throttleable_io && ISSET(bflags, B_PASSIVE)) - is_passive_io |= 1; + if (mp != NULL) + devbsdunit = mp->mnt_devbsdunit; + else + devbsdunit = LOWPRI_MAX_NUM_DEV - 1; - if (buf_vnode(bp)->v_mount != NULL) - devbsdunit = buf_vnode(bp)->v_mount->mnt_devbsdunit; - else - devbsdunit = LOWPRI_MAX_NUM_DEV - 1; - if (!is_throttleable_io) { - if (!is_passive_io){ - microuptime(&_throttle_io_info[devbsdunit].last_normal_IO_timestamp); - } - } else { - /* - * I'd really like to do the IOSleep here, but - * we may be holding all kinds of filesystem related locks - * and the pages for this I/O marked 'busy'... - * we don't want to cause a normal task to block on - * one of these locks while we're throttling a task marked - * for low priority I/O... we'll mark the uthread and - * do the delay just before we return from the system - * call that triggered this I/O or from vnode_pagein - */ - if (ut->uu_lowpri_window == 0) { - ut->uu_devbsdunit = devbsdunit; - oldValue = OSIncrementAtomic(&_throttle_io_info[devbsdunit].numthreads_throttling); - if (oldValue < 0) { - panic("%s: numthreads negative", __func__); - } - ut->uu_lowpri_window = lowpri_IO_initial_window_msecs; - ut->uu_lowpri_window += lowpri_IO_window_msecs_inc * oldValue; - } else { - if (ut->uu_devbsdunit != devbsdunit) { // the thread sends I/Os to different devices within the same system call - // keep track of the numthreads in the right device - OSDecrementAtomic(&_throttle_io_info[ut->uu_devbsdunit].numthreads_throttling); - OSIncrementAtomic(&_throttle_io_info[devbsdunit].numthreads_throttling); - ut->uu_devbsdunit = devbsdunit; - } - int numthreads = MAX(1, _throttle_io_info[devbsdunit].numthreads_throttling); - ut->uu_lowpri_window += lowpri_IO_window_msecs_inc * numthreads; - if (ut->uu_lowpri_window > lowpri_max_window_msecs * numthreads) - ut->uu_lowpri_window = lowpri_max_window_msecs * numthreads; - } - } + throttle_info_update(&_throttle_io_info[devbsdunit], bflags); + if ((policy = throttle_get_io_policy(&ut)) == IOPOL_THROTTLE) { + bp->b_flags |= B_THROTTLED_IO; } - if ((bflags & B_READ) == 0) { - size_t devbsdunit; - if (buf_vnode(bp)->v_mount != NULL) - devbsdunit = buf_vnode(bp)->v_mount->mnt_devbsdunit; - else - devbsdunit = LOWPRI_MAX_NUM_DEV - 1; - + if ((bflags & B_READ) == 0) { microuptime(&_throttle_io_info[devbsdunit].last_IO_timestamp); + if (mp) { + INCR_PENDING_IO(buf_count(bp), mp->mnt_pending_write_size); + } + } else if (mp) { + INCR_PENDING_IO(buf_count(bp), mp->mnt_pending_read_size); } - (*bdevsw[major(bdev)].d_strategy)(bp); - - return (0); + (*bdevsw[major(bdev)].d_strategy)(bp); + + return (0); } @@ -862,17 +1081,17 @@ spec_close(struct vnop_close_args *ap) * We cannot easily tell that a character device is * a controlling terminal, unless it is the closing * process' controlling terminal. In that case, - * if the reference count is 2 (this last descriptor - * plus the session), release the reference from the session. + * if the reference count is 1 (this is the very + * last close) */ sessp = proc_session(p); if (sessp != SESSION_NULL) { - if ((vcount(vp) == 2) && + if ((vcount(vp) == 1) && (vp == sessp->s_ttyvp)) { session_lock(sessp); sessp->s_ttyvp = NULL; sessp->s_ttyvid = 0; - sessp->s_ttyp = NULL; + sessp->s_ttyp = TTY_NULL; sessp->s_ttypgrpid = NO_PID; session_unlock(sessp); vnode_rele(vp); @@ -887,39 +1106,17 @@ spec_close(struct vnop_close_args *ap) */ if ((flags & IO_REVOKE) != 0) break; - if (vcount(vp) > 1) + if (vcount(vp) > 0) return (0); break; case VBLK: -#ifdef DEVFS_IMPLEMENTS_LOCKING - /* - * On last close of a block device (that isn't mounted) - * we must invalidate any in core blocks, so that - * we can, for instance, change floppy disks. - */ - if ((error = spec_fsync_internal(vp, MNT_WAIT, ap->a_context))) - return (error); - - error = buf_invalidateblks(vp, BUF_WRITE_DATA, 0, 0); - if (error) - return (error); /* * Since every use (buffer, vnode, swap, blockmap) * holds a reference to the vnode, and because we mark * any other vnodes that alias this device, when the * sum of the reference counts on all the aliased - * vnodes descends to one, we are on last close. - */ - if (vcount(vp) > 0) - return (0); -#else /* DEVFS_IMPLEMENTS_LOCKING */ - /* - * Since every use (buffer, vnode, swap, blockmap) - * holds a reference to the vnode, and because we mark - * any other vnodes that alias this device, when the - * sum of the reference counts on all the aliased - * vnodes descends to one, we are on last close. + * vnodes descends to zero, we are on last close. */ if (vcount(vp) > 0) return (0); @@ -935,7 +1132,7 @@ spec_close(struct vnop_close_args *ap) error = buf_invalidateblks(vp, BUF_WRITE_DATA, 0, 0); if (error) return (error); -#endif /* DEVFS_IMPLEMENTS_LOCKING */ + devclose = bdevsw[major(dev)].d_close; mode = S_IFBLK; break; diff --git a/bsd/miscfs/specfs/specdev.h b/bsd/miscfs/specfs/specdev.h index 052085f17..dfe9c9945 100644 --- a/bsd/miscfs/specfs/specdev.h +++ b/bsd/miscfs/specfs/specdev.h @@ -98,6 +98,7 @@ struct specinfo { * Flags for specinfo */ #define SI_MOUNTEDON 0x0001 /* block special device is mounted on */ +#define SI_ALIASED 0x0002 /* multiple active vnodes refer to this device */ /* * Special device management @@ -121,6 +122,15 @@ struct flock; struct buf; struct uio; +__BEGIN_DECLS +#ifdef BSD_KERNEL_PRIVATE +int spec_blktooff (struct vnop_blktooff_args *); +int spec_offtoblk (struct vnop_offtoblk_args *); +int spec_fsync_internal (vnode_t, int, vfs_context_t); +int spec_blockmap (struct vnop_blockmap_args *); +int spec_kqfilter (vnode_t vp, struct knote *kn); +#endif /* BSD_KERNEL_PRIVATE */ + int spec_ebadf(void *); int spec_lookup (struct vnop_lookup_args *); @@ -138,7 +148,6 @@ int spec_select (struct vnop_select_args *); #define spec_revoke (int (*) (struct vnop_access_args *))nop_revoke #define spec_mmap (int (*) (struct vnop_access_args *))err_mmap int spec_fsync (struct vnop_fsync_args *); -int spec_fsync_internal (vnode_t, int, vfs_context_t); #define spec_remove (int (*) (struct vnop_access_args *))err_remove #define spec_link (int (*) (struct vnop_access_args *))err_link #define spec_rename (int (*) (struct vnop_access_args *))err_rename @@ -159,9 +168,7 @@ int spec_pathconf (struct vnop_pathconf_args *); #define spec_valloc (int (*) (struct vnop_access_args *))err_valloc #define spec_vfree (int (*) (struct vnop_access_args *))err_vfree #define spec_bwrite (int (*) (struct vnop_bwrite_args *))nop_bwrite -int spec_blktooff (struct vnop_blktooff_args *); -int spec_offtoblk (struct vnop_offtoblk_args *); -int spec_blockmap (struct vnop_blockmap_args *); +__END_DECLS #endif /* __APPLE_API_PRIVATE */ #endif /* _MISCFS_SPECFS_SPECDEV_H_ */ diff --git a/bsd/miscfs/synthfs/synthfs.h b/bsd/miscfs/synthfs/synthfs.h deleted file mode 100644 index b76e2d802..000000000 --- a/bsd/miscfs/synthfs/synthfs.h +++ /dev/null @@ -1,238 +0,0 @@ -/* - * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ - * - * This file contains Original Code and/or Modifications of Original Code - * as defined in and that are subject to the Apple Public Source License - * Version 2.0 (the 'License'). You may not use this file except in - * compliance with the License. The rights granted to you under the License - * may not be used to create, or enable the creation or redistribution of, - * unlawful or unlicensed copies of an Apple operating system, or to - * circumvent, violate, or enable the circumvention or violation of, any - * terms of an Apple operating system software license agreement. - * - * Please obtain a copy of the License at - * http://www.opensource.apple.com/apsl/ and read it before using this file. - * - * The Original Code and all software distributed under the License are - * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER - * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, - * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. - * Please see the License for the specific language governing rights and - * limitations under the License. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ - */ -/* Copyright (c) 1998, Apple Computer, Inc. All rights reserved. */ -/* - * Header file for synthfs data structures - * - * Change History: - * - * 17-Aug-1999 Pat Dirks New today. - * - */ - -#ifndef __SYNTHFS_H__ -#define __SYNTHFS_H__ - -#include - -#ifdef __APPLE_API_PRIVATE -#include -#include -#include -#include - - -#if DEBUG -extern void Debugger(const char *message); /* Private to pexpert... */ -#endif -__END_DECLS - -/* XXX Get rid of this as soon as sys/malloc.h can be updated to define a real M_SYNTHFS */ -#define M_SYNTHFS M_TEMP - -/* XXX Get rid of this as soon as sys/vnode.h can be updated to define a real VT_SYNTHFS */ -#define VT_SYNTHFS (VT_OTHER+1) - - -struct synthfs_mntdata -{ - struct mount *synthfs_mp; /* filesystem vfs structure */ - struct vnode *synthfs_rootvp; - dev_t synthfs_mounteddev; - unsigned long synthfs_nextid; - unsigned long synthfs_filecount; - unsigned long synthfs_dircount; - unsigned long synthfs_encodingsused; - LIST_HEAD(synthfs_fsvnodelist, vnode) synthfs_fsvnodes; -}; - -/* - * Various sorts of synthfs vnodes: - */ -enum synthfsnodetype { - SYNTHFS_DIRECTORY = 1, - SYNTHFS_FILE, - SYNTHFS_SYMLINK -}; - -struct synthfs_dir_node { - unsigned long d_entrycount; - TAILQ_HEAD(synthfs_d_subnodelist, synthfsnode) d_subnodes; - -}; - -struct synthfs_file_node { - off_t f_size; -}; - -struct synthfs_symlink_node { - int s_length; - char *s_symlinktarget; /* Dynamically allocated */ -}; - - -struct synthfsnode -{ - TAILQ_ENTRY(synthfsnode) s_sibling; /* synthfsnodes in a given directory */ - enum synthfsnodetype s_type; - struct synthfsnode *s_parent; - struct vnode *s_vp; - char *s_name; - unsigned long s_nodeflags; /* Internal synthfs flags: IN_CHANGED, IN_MODIFIED, etc. */ - unsigned long s_pflags; /* File system flags: IMMUTABLE, etc. */ - unsigned long s_nodeid; - unsigned long s_generation; - mode_t s_mode; - short s_linkcount; - uid_t s_uid; - gid_t s_gid; - dev_t s_rdev; - struct timeval s_createtime; - struct timeval s_accesstime; - struct timeval s_modificationtime; - struct timeval s_changetime; - struct timeval s_backuptime; - unsigned long s_flags; /* inode flags: IMMUTABLE, APPEND, etc. */ - unsigned long s_script; - unsigned long s_finderInfo[8]; - union { - struct synthfs_dir_node d; - struct synthfs_file_node f; - struct synthfs_symlink_node s; - } s_u; -}; - -#define ROOT_DIRID 2 -#define FIRST_SYNTHFS_ID 0x10 - -/* These flags are kept in flags. */ -#define IN_ACCESS 0x0001 /* Access time update request. */ -#define IN_CHANGE 0x0002 /* Change time update request. */ -#define IN_UPDATE 0x0004 /* Modification time update request. */ -#define IN_MODIFIED 0x0008 /* Node has been modified. */ -#define IN_RENAME 0x0010 /* Node is being renamed. */ -//#define IN_SHLOCK 0x0020 /* File has shared lock. */ -//#define IN_EXLOCK 0x0040 /* File has exclusive lock. */ -//#define IN_ALLOCATING 0x1000 /* vnode is in transit, wait or ignore */ -//#define IN_WANT 0x2000 /* Its being waited for */ - -#define SYNTHFSTIMES(sp, t1, t2) { \ - if ((sp)->s_nodeflags & (IN_ACCESS | IN_CHANGE | IN_UPDATE)) { \ - (sp)->s_nodeflags |= IN_MODIFIED; \ - if ((sp)->s_nodeflags & IN_ACCESS) { \ - (sp)->s_accesstime = *(t1); \ - }; \ - if ((sp)->s_nodeflags & IN_UPDATE) { \ - (sp)->s_modificationtime = *(t2); \ - } \ - if ((sp)->s_nodeflags & IN_CHANGE) { \ - struct timeval _tv; \ - \ - microtime(&_tv); \ - (sp)->s_changetime = _tv; \ - }; \ - (sp)->s_nodeflags &= ~(IN_ACCESS | IN_CHANGE | IN_UPDATE); \ - } \ -} - -#define ATTR_REF_DATA(attrrefptr) (((char *)(attrrefptr)) + ((attrrefptr)->attr_dataoffset)) - -#define STOV(SP) ((SP)->s_vp) - -#define VTOS(VP) ((struct synthfsnode *)((VP)->v_data)) - -#define VTOVFS(VP) ((VP)->v_mount) -#define STOVFS(HP) ((SP)->s_vp->v_mount) -#define SFSTOVFS(SFSMP) ((SFSMP)->sfs_mp) - -#define VTOSFS(VP) ((struct synthfs_mntdata *)((VP)->v_mount->mnt_data)) -#define STOTFS(SP) ((struct synthfs_mntdata *)(SP)->s_vp->v_mount->mnt_data) -#define VFSTOSFS(MP) ((struct synthfs_mntdata *)(MP)->mnt_data) - -#if DEBUG -#define DBG_TRACE(P) printf P; -#define DBG_INIT(P) printf P; -#define DBG_VOP(P) printf P; -//#define DBG_ASSERT(a) { if (!(a)) { panic("File "__FILE__", line %d: assertion '%s' failed.\n", __LINE__, #a); } } - #define DBG_ASSERT(a) { if (!(a)) { Debugger("Oops - File __FILE__ , line __LINE__: assertion '"#a"' failed."); } } -#else -#define DBG_TRACE(P) -#define DBG_INIT(P) -#define DBG_VOP(P) -#define DBG_ASSERT(a) -#endif - -extern int (**synthfs_vnodeop_p)(void *); - -__BEGIN_DECLS -int synthfs_mount (struct mount *, vnode_t, user_addr_t, vfs_context_t context); -int synthfs_start (struct mount *, int, vfs_context_t context); -int synthfs_unmount (struct mount *, int, vfs_context_t context); -int synthfs_root (struct mount *, struct vnode **, vfs_context_t context); -int synthfs_vfs_getattr (mount_t mp, struct vfs_attr *fsap, vfs_context_t context); -int synthfs_sync (struct mount *, int, vfs_context_t context); -int synthfs_vget (struct mount *, ino64_t ino, struct vnode **, vfs_context_t context); -int synthfs_fhtovp (struct mount *, int, unsigned char *, struct vnode **, vfs_context_t context); -int synthfs_vptofh (struct vnode *, int *, unsigned char *, vfs_context_t context); -int synthfs_init (struct vfsconf *); -int synthfs_sysctl (int *, u_int, user_addr_t, size_t *, user_addr_t, size_t, vfs_context_t context); - -int synthfs_create (struct vnop_create_args *); -int synthfs_open (struct vnop_open_args *); -int synthfs_mmap (struct vnop_mmap_args *); -int synthfs_getattr (struct vnop_getattr_args *); -int synthfs_setattr (struct vnop_setattr_args *); -int synthfs_rename (struct vnop_rename_args *); -int synthfs_select (struct vnop_select_args *); -int synthfs_remove (struct vnop_remove_args *); -int synthfs_mkdir (struct vnop_mkdir_args *); -int synthfs_rmdir (struct vnop_rmdir_args *); -int synthfs_symlink (struct vnop_symlink_args *); -int synthfs_readlink (struct vnop_readlink_args *); -int synthfs_readdir (struct vnop_readdir_args *); -int synthfs_cached_lookup (struct vnop_lookup_args *); -int synthfs_lookup (struct vnop_lookup_args *); -int synthfs_pathconf (struct vnop_pathconf_args *); - - -int synthfs_inactive (struct vnop_inactive_args*); -int synthfs_reclaim (struct vnop_reclaim_args*); - -void synthfs_setupuio (struct iovec *iov, struct uio *uio, void *buffer, size_t bufsize, enum uio_seg space, enum uio_rw direction, proc_t p); -int synthfs_new_directory (mount_t mp, vnode_t dp, const char *name, unsigned long nodeid, mode_t mode, proc_t p, vnode_t *vpp); -int synthfs_new_symlink (mount_t mp, vnode_t dp, const char *name, unsigned long nodeid, char *targetstring, proc_t p, vnode_t *vpp); -long synthfs_adddirentry (u_int32_t fileno, u_int8_t type, const char *name, struct uio *uio); -int synthfs_remove_entry (struct vnode *vp); -int synthfs_remove_directory (struct vnode *vp); -int synthfs_remove_symlink (struct vnode *vp); -int synthfs_move_rename_entry (struct vnode *source_vp, struct vnode *newparent_vp, char *newname); -int synthfs_derive_vnode_path (struct vnode *vp, char *vnpath, size_t pathbuffersize); -int synthfs_update(struct vnode *vp, struct timeval *access, struct timeval *modify, int waitfor); - -#endif /* __APPLE_API_PRIVATE */ -#endif /* __SYNTHFS_H__ */ diff --git a/bsd/miscfs/synthfs/synthfs_util.c b/bsd/miscfs/synthfs/synthfs_util.c deleted file mode 100644 index 3f93e3f0e..000000000 --- a/bsd/miscfs/synthfs/synthfs_util.c +++ /dev/null @@ -1,332 +0,0 @@ -/* - * Copyright (c) 2000-2006 Apple Computer, Inc. All rights reserved. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ - * - * This file contains Original Code and/or Modifications of Original Code - * as defined in and that are subject to the Apple Public Source License - * Version 2.0 (the 'License'). You may not use this file except in - * compliance with the License. The rights granted to you under the License - * may not be used to create, or enable the creation or redistribution of, - * unlawful or unlicensed copies of an Apple operating system, or to - * circumvent, violate, or enable the circumvention or violation of, any - * terms of an Apple operating system software license agreement. - * - * Please obtain a copy of the License at - * http://www.opensource.apple.com/apsl/ and read it before using this file. - * - * The Original Code and all software distributed under the License are - * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER - * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, - * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. - * Please see the License for the specific language governing rights and - * limitations under the License. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ - */ -/* Copyright (c) 1998, Apple Computer, Inc. All rights reserved. */ -/* - * Change History: - * - * 17-Aug-1999 Pat Dirks New today. - * - */ - -#include - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include - -#include "synthfs.h" - -struct synthfs_direntry_head { - u_int32_t d_fileno; /* file number of entry */ - u_int16_t d_reclen; /* length of this record */ - u_int8_t d_type; /* file type, see below */ - u_int8_t d_namlen; /* length of string in d_name */ -}; - - -#define PATHSEPARATOR '/' -#define ROOTDIRID 2 - - -static int synthfs_insertnode(struct synthfsnode *newnode_sp, struct synthfsnode *parent_sp) { - struct timeval now; - - DBG_ASSERT(parent_sp->s_type == SYNTHFS_DIRECTORY); - - TAILQ_INSERT_TAIL(&parent_sp->s_u.d.d_subnodes, newnode_sp, s_sibling); - ++parent_sp->s_u.d.d_entrycount; - newnode_sp->s_parent = parent_sp; - - parent_sp->s_nodeflags |= IN_CHANGE | IN_MODIFIED; - microtime(&now); - synthfs_update(STOV(parent_sp), &now, &now, 0); - - return 0; -} - - - -static int -synthfs_newnode(mount_t mp, vnode_t dp, const char *name, unsigned long nodeid, - mode_t mode, __unused proc_t p, enum vtype vtype, vnode_t *vpp) -{ - int result; - struct synthfsnode *sp; - struct vnode *vp; - struct timeval now; - char *nodename; - struct vnode_fsparam vfsp; - - MALLOC(sp, struct synthfsnode *, sizeof(struct synthfsnode), M_SYNTHFS, M_WAITOK); - - if (name == NULL) { - MALLOC(nodename, char *, 1, M_TEMP, M_WAITOK); - nodename[0] = 0; - } else { - MALLOC(nodename, char *, strlen(name) + 1, M_TEMP, M_WAITOK); - strlcpy(nodename, name, strlen(name) + 1); - }; - - /* Initialize the relevant synthfsnode fields: */ - bzero(sp, sizeof(*sp)); - sp->s_nodeid = nodeid; - - /* Initialize all times from a consistent snapshot of the clock: */ - microtime(&now); - sp->s_createtime = now; - sp->s_accesstime = now; - sp->s_modificationtime = now; - sp->s_changetime = now; - sp->s_name = nodename; - sp->s_mode = mode; - - - //bzero(&vfsp, sizeof(struct vnode_fsparam)); - vfsp.vnfs_mp = mp; - vfsp.vnfs_vtype = vtype; - vfsp.vnfs_str = "synthfs"; - vfsp.vnfs_dvp = 0; - vfsp.vnfs_fsnode = sp; - vfsp.vnfs_cnp = 0; - vfsp.vnfs_vops = synthfs_vnodeop_p; - vfsp.vnfs_rdev = 0; - vfsp.vnfs_filesize = 0; - vfsp.vnfs_flags = VNFS_NOCACHE | VNFS_CANTCACHE; - vfsp.vnfs_marksystem = 0; - vfsp.vnfs_markroot = 0; - - result = vnode_create(VNCREATE_FLAVOR, VCREATESIZE, &vfsp, &vp); - if (result != 0) { - DBG_VOP(("getnewvnode failed with error code %d\n", result)); - FREE(nodename, M_TEMP); - FREE(sp, M_TEMP); - return result; - } - vnode_ref(vp); - - sp->s_vp = vp; - - /* If there's a parent directory, update its subnode structures to insert this new node: */ - if (dp) { - result = synthfs_insertnode(sp, VTOS(dp)); - }; - - *vpp = vp; - - return result; -} - - - -int synthfs_remove_entry(struct vnode *vp) { - struct synthfsnode *sp = VTOS(vp); - struct synthfsnode *psp = sp->s_parent; - struct timeval now; - - if (psp) { - TAILQ_REMOVE(&psp->s_u.d.d_subnodes, sp, s_sibling); - --psp->s_u.d.d_entrycount; - - psp->s_nodeflags |= IN_CHANGE | IN_MODIFIED; - microtime(&now); - synthfs_update(STOV(psp), &now, &now, 0); - }; - - return 0; -} - - - -int synthfs_move_rename_entry(struct vnode *source_vp, struct vnode *newparent_vp, char *new_name) { - struct synthfsnode *source_sp = VTOS(source_vp); - struct synthfsnode *parent_sp = VTOS(newparent_vp); - char *new_name_ptr; - int result = 0; - - /* Unlink the entry from its current place: */ - result = synthfs_remove_entry(source_vp); - if (result) goto err_exit; - - /* Change the name as necessary: */ - if (new_name) { - FREE(source_sp->s_name, M_TEMP); - MALLOC(new_name_ptr, char *, strlen(new_name) + 1, M_TEMP, M_WAITOK); - strlcpy(new_name_ptr, new_name, strlen(new_name) + 1); - source_sp->s_name = new_name_ptr; - }; - - /* Insert the entry in its new home: */ - result = synthfs_insertnode(source_sp, parent_sp); - -err_exit: - return result; -} - - - -int synthfs_new_directory(struct mount *mp, struct vnode *dp, const char *name, unsigned long nodeid, mode_t mode, struct proc *p, struct vnode **vpp) { - int result; - struct vnode *vp; - struct synthfsnode *sp; - - result = synthfs_newnode(mp, dp, name, nodeid, mode, p, VDIR, &vp); - if (result) { - return result; - }; - sp = VTOS(vp); - sp->s_linkcount = 2; - - if (dp) { - ++VTOS(dp)->s_linkcount; /* Account for the [fictitious] ".." link */ - }; - - /* Set up the directory-specific fields: */ - sp->s_type = SYNTHFS_DIRECTORY; - sp->s_u.d.d_entrycount = 0; /* No entries in this directory yet */ - TAILQ_INIT(&sp->s_u.d.d_subnodes); /* No subnodes of this directory yet */ - - *vpp = vp; - - return 0; -} - - - -int synthfs_remove_directory(struct vnode *vp) { - struct synthfsnode *sp = VTOS(vp); - struct synthfsnode *psp = sp->s_parent; - - if (psp && (sp->s_type == SYNTHFS_DIRECTORY) && (psp != sp)) { - --psp->s_linkcount; /* account for the [fictitious] ".." link now removed */ - }; - vnode_rele(vp); - - /* Do the standard cleanup involved in pruning an entry from the filesystem: */ - return synthfs_remove_entry(vp); /* Do whatever standard cleanup is required */ -} - - - -int -synthfs_new_symlink( - struct mount *mp, - struct vnode *dp, - const char *name, - unsigned long nodeid, - char *targetstring, - struct proc *p, - struct vnode **vpp) -{ - int result; - struct vnode *vp; - struct synthfsnode *sp; - - result = synthfs_newnode(mp, dp, name, nodeid, 0, p, VLNK, &vp); - if (result) - return result; - sp = VTOS(vp); - sp->s_linkcount = 1; - - /* Set up the symlink-specific fields: */ - sp->s_type = SYNTHFS_SYMLINK; - sp->s_u.s.s_length = strlen(targetstring); - MALLOC(sp->s_u.s.s_symlinktarget, char *, sp->s_u.s.s_length + 1, - M_TEMP, M_WAITOK); - strlcpy(sp->s_u.s.s_symlinktarget, targetstring, - sp->s_u.s.s_lenghth + 1); - - *vpp = vp; - - return 0; -} - - - -int synthfs_remove_symlink(struct vnode *vp) { - struct synthfsnode *sp = VTOS(vp); - - FREE(sp->s_u.s.s_symlinktarget, M_TEMP); - vnode_rele(vp); - - /* Do the standard cleanup involved in pruning an entry from the filesystem: */ - return synthfs_remove_entry(vp); /* Do whatever standard cleanup is required */ -} - - - - - - -long synthfs_adddirentry(u_int32_t fileno, u_int8_t type, const char *name, struct uio *uio) { - struct synthfs_direntry_head direntry; - long namelength; - int padding; - long padtext = 0; - unsigned short direntrylength; - - namelength = ((name == NULL) ? 0 : strlen(name) + 1); - padding = (4 - (namelength & 3)) & 3; - direntrylength = sizeof(struct synthfs_direntry_head) + namelength + padding; - - direntry.d_fileno = fileno; - direntry.d_reclen = direntrylength; - direntry.d_type = type; - direntry.d_namlen = namelength; - - if (uio_resid(uio) < direntry.d_reclen) { - direntrylength = 0; - } else { - uiomove((caddr_t)(&direntry), sizeof(direntry), uio); - if (name != NULL) { - uiomove((caddr_t)name, namelength, uio); - }; - if (padding > 0) { - uiomove((caddr_t)&padtext, padding, uio); - }; - }; - - return direntrylength; -} - - diff --git a/bsd/miscfs/synthfs/synthfs_vfsops.c b/bsd/miscfs/synthfs/synthfs_vfsops.c deleted file mode 100644 index 2db55aa2c..000000000 --- a/bsd/miscfs/synthfs/synthfs_vfsops.c +++ /dev/null @@ -1,468 +0,0 @@ -/* - * Copyright (c) 2000-2006 Apple Computer, Inc. All rights reserved. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ - * - * This file contains Original Code and/or Modifications of Original Code - * as defined in and that are subject to the Apple Public Source License - * Version 2.0 (the 'License'). You may not use this file except in - * compliance with the License. The rights granted to you under the License - * may not be used to create, or enable the creation or redistribution of, - * unlawful or unlicensed copies of an Apple operating system, or to - * circumvent, violate, or enable the circumvention or violation of, any - * terms of an Apple operating system software license agreement. - * - * Please obtain a copy of the License at - * http://www.opensource.apple.com/apsl/ and read it before using this file. - * - * The Original Code and all software distributed under the License are - * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER - * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, - * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. - * Please see the License for the specific language governing rights and - * limitations under the License. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ - */ -/* Copyright (c) 1998 Apple Computer, Inc. All Rights Reserved */ -/* - * Change History: - * - * 17-Aug-1999 Pat Dirks New today. - * - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include - -#include "synthfs.h" - -#define LOADABLE_FS 0 - -typedef int (*PFI)(); - -struct vfsops synthfs_vfsops = { - synthfs_mount, - synthfs_start, - synthfs_unmount, - synthfs_root, - NULL, /* quotactl */ - synthfs_vfs_getattr, - synthfs_sync, - synthfs_vget, - synthfs_fhtovp, - synthfs_vptofh, - synthfs_init, - synthfs_sysctl -}; - -#define ROOTMPMODE 0755 -#define ROOTPLACEHOLDERMODE 0700 -static char synthfs_fs_name[MFSTYPENAMELEN] = "synthfs"; -static char synthfs_fake_mntfromname[] = ""; - - -extern struct vnodeopv_desc synthfs_vnodeop_opv_desc; - -/* The following refer to kernel global variables used in the loading/initialization: */ -extern int maxvfsslots; /* Total number of slots in the system's vfsconf table */ -extern int maxvfsconf; /* The highest fs type number [old-style ID] in use [dispite its name] */ -extern int vfs_opv_numops; /* The total number of defined vnode operations */ - -int vn_mkdir(struct proc *p, char *path, int mode); -int vn_symlink(struct proc *p, char *path, char *link); - - - - -#if LOADABLE_FS -void -synthfs_load(int loadArgument) { - /* Should use vfs_fsadd kpi */ -} - - - -int synthfs_unload(void) { - - /* should use fs_fsremove kpi */ - return 0; -} -#endif - - - -/* - * VFS Operations. - * - * mount system call - */ -int -synthfs_mount_fs(struct mount *mp, vnode_t devvp, __unused user_addr_t data, struct proc *p) -{ - struct synthfs_mntdata *priv_mnt_data; - int error; - size_t size; - - DBG_VOP(("synthfs_mount_fs called.\n")); - MALLOC(priv_mnt_data, struct synthfs_mntdata *, sizeof(struct synthfs_mntdata), M_SYNTHFS, M_WAITOK); - DBG_VOP(("MALLOC succeeded...\n")); - - strlcpy(mp->mnt_vfsstat.f_fstypename, synthfs_fs_name, sizeof(mp->mnt_vfsstat.f_fstypename)); - strlcpy(mp->mnt_vfsstat.f_mntfromname, synthfs_fake_mntfromname, sizeof(mp->mnt_vfsstat.f_mntfromname)); - priv_mnt_data->synthfs_mounteddev = (dev_t)0; - priv_mnt_data->synthfs_nextid = FIRST_SYNTHFS_ID; - priv_mnt_data->synthfs_filecount = 0; - priv_mnt_data->synthfs_dircount = 0; - priv_mnt_data->synthfs_encodingsused = 0x00000001; - - /* - Set up the root vnode for fast reference in the future. - Note that synthfs_new_directory() returns the vnode with a refcount of +2. - The root vnode's refcount is maintained unlocked but with a pos. ref count until unmount. - */ - error = synthfs_new_directory(mp, NULL, "", ROOT_DIRID, (S_IRWXU|S_IRWXG|S_IROTH|S_IXOTH), p, &priv_mnt_data->synthfs_rootvp); - if (error) { - DBG_VOP(("Attempt to create root directory failed with error %d.\n", error)); - return error; - }; - priv_mnt_data->synthfs_rootvp->v_flag |= VROOT; - - priv_mnt_data->synthfs_mp = mp; - mp->mnt_data = (void *)priv_mnt_data; - - /* Drop the freshly acquired reference on the root, leaving v_usecount=1 to prevent - the vnode from beeing freed: */ - vnode_put(priv_mnt_data->synthfs_rootvp); - - return (0); -} - - - -int -synthfs_mount(mp, devvp, data, context) - register struct mount *mp; - vnode_t devvp; - user_addr_t data; - vfs_context_t context; -{ - size_t size; - - return (synthfs_mount_fs(mp, devvp, data, vfs_context_proc(context))); -} - - - - - - -/* - * Initialize the filesystem - */ -int -synthfs_init(vfsp) - struct vfsconf *vfsp; -{ - DBG_VOP(("synthfs_init called.\n")); - return 0; -} - -int -synthfs_start(mp, flags, context) -struct mount * mp; -int flags; -vfs_context_t context; -{ - DBG_VOP(("synthfs_start called.\n")); - return 0; -} - -/* - * Return the root of a filesystem. - */ -int -synthfs_root(mp, vpp, context) - struct mount *mp; - struct vnode **vpp; - vfs_context_t context; -{ - unsigned long root_nodeid = ROOT_DIRID; - - DBG_VOP(("synthfs_root called.\n")); - - *vpp = VFSTOSFS(mp)->synthfs_rootvp; - return vnode_get(VFSTOSFS(mp)->synthfs_rootvp); -} - -/* - * unmount system call - */ -int -synthfs_unmount(mp, mntflags, context) - struct mount *mp; - int mntflags; - vfs_context_t context; -{ - struct synthfs_mntdata *synth; - struct vnode *root_vp; - int retval; - - DBG_VOP(("synthfs_unmount called.\n")); - synth = (struct synthfs_mntdata *)mp->mnt_data; - - root_vp = synth->synthfs_rootvp; - retval = vflush(mp, root_vp, (mntflags & MNT_FORCE) ? FORCECLOSE : 0); - if (retval && ((mntflags & MNT_FORCE) == 0)) goto Err_Exit; - - /* Free the root vnode. - the ref. count has been maintained at +1 ever since mount time. */ - if (root_vp) { - if ((mntflags & MNT_FORCE) == 0) { - if (retval) goto Err_Exit; - - if (root_vp->v_usecount > 1) { - DBG_VOP(("synthfs ERROR: root vnode = %x, usecount = %d\n", (int)root_vp, synth->synthfs_rootvp->v_usecount)); - retval = EBUSY; - goto Err_Exit; - }; - }; - - synth->synthfs_rootvp = NULL; - - if (retval == 0) { - vnode_get(root_vp); - vnode_rele(root_vp); - vnode_recycle(root_vp); - vnode_put(root_vp); /* This drops synthfs's own refcount */ - }; - }; - - /* All vnodes should be gone, and no errors, clean up the last */ - - mp->mnt_data = NULL; - FREE(synth, M_SYNTHFS); - -Err_Exit: - - if (mntflags & MNT_FORCE) retval = 0; - - return(retval); -} - -/* - * Get file system statistics. - */ -int -synthfs_vfs_getattr(mount_t mp, struct vfs_attr *fsap, vfs_context_t context) -{ - struct synthfs_mntdata *synthfs_mp = VFSTOSFS(mp); - DBG_VOP(("synthfs_vfs_getattr called.\n")); - - VFSATTR_RETURN(fsap, f_bsize, 512); - VFSATTR_RETURN(fsap, f_iosize, 512); - VFSATTR_RETURN(fsap, f_blocks, 1024); - VFSATTR_RETURN(fsap, f_bfree, 0); - VFSATTR_RETURN(fsap, f_bavail, 0); - VFSATTR_RETURN(fsap, f_bused, 1024); - VFSATTR_RETURN(fsap, f_files, synthfs_mp->synthfs_filecount + synthfs_mp->synthfs_dircount); - VFSATTR_RETURN(fsap, f_ffree, 0); - VFSATTR_RETURN(fsap, f_fssubtype, 0); - - return 0; -} - -/* - * synthfs doesn't have any data or backing store and you can't write into any of the synthfs - * structures, so don't do anything - */ -int -synthfs_sync(mp, waitfor, context) - struct mount *mp; - int waitfor; - vfs_context_t context; -{ -// DBG_VOP(("synthfs_sync called\n")); - return 0; -} -/* - * Look up a synthfs node by node number. - */ -int -synthfs_vget(mp, ino, vpp, context) - struct mount *mp; - ino64_t ino; - struct vnode **vpp; - vfs_context_t context; -{ - struct vnode *vp; - int vid = 0; - -// DBG_VOP(("synthfs_vget called\n")); - - /* Check for unmount in progress */ - if (mp->mnt_kern_flag & MNTK_UNMOUNT) { - *vpp = NULL; - return (EPERM); - } - -loop: - TAILQ_FOREACH(vp, &mp->mnt_vnodelist, v_mntvnodes) { - if (VTOS(vp)->s_nodeid == (unsigned long)ino) { - /* - * doing a vnode_getwithvid isn't technically - * necessary since synthfs is an unsafe filesystem - * and we're running behind a funnel at this point - * however, vnode_get always succeeds, which isn't - * what we want if this vnode is in the process of - * being terminated - */ - vid = vnode_vid(vp); - - if (vnode_getwithvid(vp, vid) != 0) { - goto loop; - }; - *vpp = vp; - return 0; - }; - }; - *vpp = NULL; - return -1; -} - -/* - * fast filesystem related variables. - */ -int -synthfs_sysctl(int *name, u_int namelen, user_addr_t oldp, size_t *oldlenp, - user_addr_t newp, size_t newlen, vfs_context_t context) -{ - DBG_VOP(("synthfs_sysctl called.\n")); - return (ENOTSUP); -} - -/* - * File handle to vnode - * - */ -int -synthfs_fhtovp(mp, fhlen, fhp, vpp, context) - register struct mount *mp; - int fhlen; - unsigned char *fhp; - struct vnode **vpp; - vfs_context_t context; -{ - DBG_VOP(("synthfs_fhtovp called.\n")); - return ENOTSUP; -} - -/* - * Vnode pointer to File handle - */ -/* ARGSUSED */ -int -synthfs_vptofh(vp, fhlenp, fhp, context) - struct vnode *vp; - int *fhlenp; - unsigned char *fhp; - vfs_context_t context; -{ - DBG_VOP(("synthfs_vptofh called.\n")); - return ENOTSUP; -} - - - - - - -int -vn_mkdir(struct proc *p, char *path, int mode) -{ - struct nameidata nd; - struct vnode *vp; - struct vnode_attr va; - vfs_context_t ctx = vfs_context_kernel(); - int error; - - - NDINIT(&nd, CREATE, LOCKPARENT, UIO_SYSSPACE32, CAST_USER_ADDR_T(path), ctx); - error = namei(&nd); - if (error) { - DBG_VOP(("vn_mkdir: error from namei, error = %d.\n", error)); - return (error); - }; - vp = nd.ni_vp; - - if (vp == NULL) { - VATTR_INIT(&va); - VATTR_SET(&va, va_type, VDIR); - VATTR_SET(&va, va_mode, (mode & ACCESSPERMS) &~ p->p_fd->fd_cmask); - - error = vn_create(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &va, 0, ctx); - if (error) - DBG_VOP(("vn_mkdir: error from vnop_mkdir (%d).\n", error)); - } else { - DBG_VOP(("vn_mkdir: target already exists; returning EEXIST.\n")); - error = EEXIST; - } - vnode_put(nd.ni_dvp); - if (nd.ni_vp) - vnode_put(nd.ni_vp); - nameidone(&nd); - - return (error); -} - - - -int -vn_symlink(struct proc *p, char *path, char *link) { - struct nameidata nd; - struct vnode_attr va; - vfs_context_t ctx = vfs_context_kernel(); - int error; - - NDINIT(&nd, CREATE, LOCKPARENT, UIO_SYSSPACE32, CAST_USER_ADDR_T(link), ctx); - if ((error = namei(&nd))) { - return error; - } - - if (nd.ni_vp == NULL) { - VATTR_INIT(&va); - VATTR_SET(&va, va_type, VLNK); - VATTR_SET(&va, va_mode, ACCESSPERMS &~ p->p_fd->fd_cmask); - - error = VNOP_SYMLINK(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &va, path, ctx); - } else - error = EEXIST; - - vnode_put(nd.ni_dvp); - if (nd.ni_vp) - vnode_put(nd.ni_vp); - nameidone(&nd); - - return (error); -} - - diff --git a/bsd/miscfs/synthfs/synthfs_vnops.c b/bsd/miscfs/synthfs/synthfs_vnops.c deleted file mode 100644 index d409494e8..000000000 --- a/bsd/miscfs/synthfs/synthfs_vnops.c +++ /dev/null @@ -1,1327 +0,0 @@ -/* - * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ - * - * This file contains Original Code and/or Modifications of Original Code - * as defined in and that are subject to the Apple Public Source License - * Version 2.0 (the 'License'). You may not use this file except in - * compliance with the License. The rights granted to you under the License - * may not be used to create, or enable the creation or redistribution of, - * unlawful or unlicensed copies of an Apple operating system, or to - * circumvent, violate, or enable the circumvention or violation of, any - * terms of an Apple operating system software license agreement. - * - * Please obtain a copy of the License at - * http://www.opensource.apple.com/apsl/ and read it before using this file. - * - * The Original Code and all software distributed under the License are - * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER - * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, - * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. - * Please see the License for the specific language governing rights and - * limitations under the License. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ - */ -/* - * Copyright (c) 1998-1999 Apple Computer, Inc. All Rights Reserved. - * - * Modification History: - * - * 02-Feb-2000 Clark Warner Added copyfile to table - * 17-Aug-1999 Pat Dirks New today. - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include - -#include "synthfs.h" - -#define RWSUPPORT 0 - -#if RWSUPPORT -#error NOT PORTED FOR UBC -#include -#endif - -static int synthfs_remove_internal(struct vnode *dvp, struct vnode *vp, - struct componentname *cnp, vfs_context_t context); - - -#define VOPFUNC int (*)(void *) - -/* Global vfs data structures for synthfs. */ -int (**synthfs_vnodeop_p) (void *); -struct vnodeopv_entry_desc synthfs_vnodeop_entries[] = { - {&vnop_default_desc, (VOPFUNC)vn_default_error}, - {&vnop_strategy_desc, (VOPFUNC)err_strategy}, /* strategy - not supported */ - {&vnop_bwrite_desc, (VOPFUNC)err_bwrite}, /* bwrite - not supported */ - {&vnop_lookup_desc, (VOPFUNC)synthfs_cached_lookup}, /* cached lookup */ - {&vnop_create_desc, (VOPFUNC)synthfs_create}, /* create - DEBUGGER */ - {&vnop_whiteout_desc, (VOPFUNC)err_whiteout}, /* whiteout - not supported */ - {&vnop_mknod_desc, (VOPFUNC)err_mknod}, /* mknod - not supported */ - {&vnop_open_desc, (VOPFUNC)synthfs_open}, /* open - DEBUGGER */ - {&vnop_close_desc, (VOPFUNC)nop_close}, /* close - NOP */ - {&vnop_getattr_desc, (VOPFUNC)synthfs_getattr}, /* getattr */ - {&vnop_setattr_desc, (VOPFUNC)synthfs_setattr}, /* setattr */ - {&vnop_getattrlist_desc, (VOPFUNC)err_getattrlist}, /* getattrlist - not supported */ - {&vnop_setattrlist_desc, (VOPFUNC)err_setattrlist}, /* setattrlist - not supported */ - {&vnop_read_desc, (VOPFUNC)err_read}, /* read - not supported */ - {&vnop_write_desc, (VOPFUNC)err_write}, /* write - not supported */ - {&vnop_ioctl_desc, (VOPFUNC)err_ioctl}, /* ioctl - not supported */ - {&vnop_select_desc, (VOPFUNC)synthfs_select}, /* select */ - {&vnop_exchange_desc, (VOPFUNC)err_exchange}, /* exchange - not supported */ - {&vnop_revoke_desc, (VOPFUNC)nop_revoke}, /* revoke - NOP */ - {&vnop_mmap_desc, (VOPFUNC)synthfs_mmap}, /* mmap - DEBUGGER */ - {&vnop_fsync_desc, (VOPFUNC)nop_fsync}, /* fsync - NOP */ - {&vnop_remove_desc, (VOPFUNC)synthfs_remove}, /* remove */ - {&vnop_link_desc, (VOPFUNC)err_link}, /* link - not supported */ - {&vnop_rename_desc, (VOPFUNC)synthfs_rename}, /* rename */ - {&vnop_mkdir_desc, (VOPFUNC)synthfs_mkdir}, /* mkdir */ - {&vnop_rmdir_desc, (VOPFUNC)synthfs_rmdir}, /* rmdir */ - {&vnop_symlink_desc, (VOPFUNC)synthfs_symlink}, /* symlink */ - {&vnop_readdir_desc, (VOPFUNC)synthfs_readdir}, /* readdir */ - {&vnop_readdirattr_desc, (VOPFUNC)err_readdirattr}, /* readdirattr - not supported */ - {&vnop_readlink_desc, (VOPFUNC)synthfs_readlink}, /* readlink */ - {&vnop_inactive_desc, (VOPFUNC)synthfs_inactive}, /* inactive */ - {&vnop_reclaim_desc, (VOPFUNC)synthfs_reclaim}, /* reclaim */ - {&vnop_pathconf_desc, (VOPFUNC)synthfs_pathconf}, /* pathconf */ - {&vnop_advlock_desc, (VOPFUNC)err_advlock}, /* advlock - not supported */ - {&vnop_allocate_desc, (VOPFUNC)err_allocate}, /* allocate - not supported */ - {&vnop_pagein_desc, (VOPFUNC)err_pagein}, /* pagein - not supported */ - {&vnop_pageout_desc, (VOPFUNC)err_pageout}, /* pageout - not supported */ - {&vnop_searchfs_desc, (VOPFUNC)err_searchfs}, /* searchfs - not supported */ - {&vnop_copyfile_desc, (VOPFUNC)err_copyfile}, /* copyfile - not supported */ - { &vnop_blktooff_desc, (VOPFUNC)err_blktooff }, /* blktooff not supported */ - { &vnop_offtoblk_desc, (VOPFUNC)err_offtoblk }, /* offtoblk not supported */ - { &vnop_blockmap_desc, (VOPFUNC)err_blockmap }, /* blockmap not supported */ - {(struct vnodeop_desc *) NULL, (int (*) ()) NULL} -}; - -/* - * Oh what a tangled web we weave. This structure will be used by - * bsd/vfs/vfs_conf.c to actually do the initialization of synthfs_vnodeop_p - */ -struct vnodeopv_desc synthfs_vnodeop_opv_desc = -{&synthfs_vnodeop_p, synthfs_vnodeop_entries}; - - - -/* - * Create a regular file -#% create dvp L U U -#% create vpp - L - -# - vnop_create { - IN WILLRELE struct vnode *dvp; - OUT struct vnode **vpp; - IN struct componentname *cnp; - IN struct vnode_attr *vap; - - We are responsible for freeing the namei buffer, it is done in hfs_makenode(), unless there is - a previous error. - -*/ - -int -synthfs_create(ap) -struct vnop_create_args /* { - struct vnode *a_dvp; - struct vnode **a_vpp; - struct componentname *a_cnp; - struct vnode_attr *a_vap; - vfs_context_t a_context; -} */ *ap; -{ -#if DEBUG - struct vnode *dvp = ap->a_dvp; - char debugmsg[255]; - - sprintf(debugmsg, "synthfs_create: attempt to create '%s' in '%s' ?!", ap->a_cnp->cn_nameptr, VTOS(dvp)->s_name); - Debugger(debugmsg); -#endif - - return err_create(ap); -} - - - -/* - * Open called. -#% open vp L L L -# - vnop_open { - IN struct vnode *vp; - IN int mode; - IN vfs_context_t a_context; - */ - -int -synthfs_open(ap) -struct vnop_open_args /* { - struct vnode *a_vp; - int a_mode; - vfs_context_t a_context; -} */ *ap; -{ - struct vnode *vp = ap->a_vp; - - if (vp->v_type == VDIR) { - return 0; - } else { -#if DEBUG - struct synthfsnode *sp = VTOS(vp); - char debugmsg[255]; - - sprintf(debugmsg, "synthfs_open: attempt to open '/%s' ?!", sp->s_name); - Debugger(debugmsg); -#endif - }; - - return 0; -} - - - -/* - * Mmap a file - * - * NB Currently unsupported. -# XXX - not used -# - vnop_mmap { - IN struct vnode *vp; - IN int fflags; - IN kauth_cred_t cred; - IN struct proc *p; - - */ - -/* ARGSUSED */ - -int -synthfs_mmap(__unused struct vnop_mmap_args *ap) -{ - return EINVAL; -} - - - -/* -#% getattr vp = = = -# - vnop_getattr { - IN struct vnode *vp; - IN struct vnode_attr *vap; - IN vfs_context_t context; - -*/ -int -synthfs_getattr(ap) -struct vnop_getattr_args /* { - struct vnode *a_vp; - struct vnode_attr *a_vap; - vfs_context_t a_context; -} */ *ap; -{ - struct vnode *vp = ap->a_vp; - struct vnode_attr *vap = ap->a_vap; - struct synthfsnode *sp = VTOS(vp); - - VATTR_RETURN(vap, va_type, vp->v_type); - VATTR_RETURN(vap, va_mode, sp->s_mode); - VATTR_RETURN(vap, va_nlink, sp->s_linkcount); - VATTR_RETURN(vap, va_uid, sp->s_uid); - VATTR_RETURN(vap, va_gid, sp->s_gid); - VATTR_RETURN(vap, va_fsid, VTOVFS(vp)->mnt_vfsstat.f_fsid.val[0]); - VATTR_RETURN(vap, va_fileid, sp->s_nodeid); - switch (vp->v_type) { - case VDIR: - VATTR_RETURN(vap, va_data_size, (sp->s_u.d.d_entrycount + 2) * sizeof(struct dirent)); - break; - - case VREG: - VATTR_RETURN(vap, va_data_size, sp->s_u.f.f_size); - break; - - case VLNK: - VATTR_RETURN(vap, va_data_size, sp->s_u.s.s_length); - break; - - default: - VATTR_RETURN(vap, va_data_size, 0); - }; - VATTR_RETURN(vap, va_iosize, 512); - vap->va_access_time.tv_sec = sp->s_accesstime.tv_sec; - vap->va_access_time.tv_nsec = sp->s_accesstime.tv_usec * 1000; - VATTR_SET_SUPPORTED(vap, va_access_time); - vap->va_modify_time.tv_sec = sp->s_modificationtime.tv_sec; - vap->va_modify_time.tv_nsec = sp->s_modificationtime.tv_usec * 1000; - VATTR_SET_SUPPORTED(vap, va_modify_time); - vap->va_change_time.tv_sec = sp->s_changetime.tv_sec; - vap->va_change_time.tv_nsec = sp->s_changetime.tv_usec * 1000; - VATTR_SET_SUPPORTED(vap, va_change_time); - VATTR_RETURN(vap, va_gen, sp->s_generation); - VATTR_RETURN(vap, va_flags, sp->s_flags); - VATTR_RETURN(vap, va_rdev, sp->s_rdev); - VATTR_RETURN(vap, va_filerev, 0); - VATTR_RETURN(vap, va_acl, NULL); - - return (0); -} - - - -/* - * Change the mode on a file or directory. - * vnode vp must be locked on entry. - */ -int synthfs_chmod(struct vnode *vp, int mode, kauth_cred_t cred, struct proc *p) -{ - struct synthfsnode *sp = VTOS(vp); - int result; - - sp->s_mode &= ~ALLPERMS; - sp->s_mode |= (mode & ALLPERMS); - sp->s_nodeflags |= IN_CHANGE; -#if RWSUPPORT - if ((vp->v_flag & VTEXT) && (sp->s_mode & S_ISTXT) == 0) (void) vnode_uncache(vp); -#endif - - return 0; -} - - - -/* - * Change the flags on a file or directory. - * vnode vp must be locked on entry. - */ -int synthfs_chflags(struct vnode *vp, u_long flags, kauth_cred_t cred, struct proc *p) -{ - struct synthfsnode *sp = VTOS(vp); - - sp->s_flags = flags; - sp->s_nodeflags |= IN_CHANGE; - - return 0; -} - - - -/* - * Perform chown operation on vnode vp; - * vnode vp must be locked on entry. - */ -int synthfs_chown(struct vnode *vp, uid_t uid, gid_t gid, kauth_cred_t cred, struct proc *p) -{ - struct synthfsnode *sp = VTOS(vp); - uid_t ouid; - gid_t ogid; - int result = 0; - int is_member; - - if (uid == (uid_t)VNOVAL) uid = sp->s_uid; - if (gid == (gid_t)VNOVAL) gid = sp->s_gid; - - ogid = sp->s_gid; - ouid = sp->s_uid; - - sp->s_gid = gid; - sp->s_uid = uid; - - if (ouid != uid || ogid != gid) sp->s_nodeflags |= IN_CHANGE; - if (ouid != uid && suser(cred, NULL)) sp->s_mode &= ~S_ISUID; - if (ogid != gid && suser(cred, NULL)) sp->s_mode &= ~S_ISGID; - - return 0; -} - - - -/* - * Set attribute vnode op. called from several syscalls -#% setattr vp L L L -# - vnop_setattr { - IN struct vnode *vp; - IN struct vnode_attr *vap; - IN vfs_context_t context; - */ - -int -synthfs_setattr(ap) -struct vnop_setattr_args /* { -struct vnode *a_vp; -struct vnode_attr *a_vap; -vfs_context_t a_context; -} */ *ap; -{ - struct vnode *vp = ap->a_vp; - struct synthfsnode *sp = VTOS(vp); - struct vnode_attr *vap = ap->a_vap; - kauth_cred_t cred = vfs_context_ucred(ap->a_context); - struct proc *p = vfs_context_proc(ap->a_context); - struct timeval atimeval, mtimeval; - uid_t nuid; - gid_t ngid; - int result; - - result = 0; - - if (VATTR_IS_ACTIVE(vap, va_flags)) { - if ((result = synthfs_chflags(vp, vap->va_flags, cred, p))) { - goto Err_Exit; - } - } - VATTR_SET_SUPPORTED(vap, va_flags); - - nuid = (uid_t)ngid = (gid_t)VNOVAL; - if (VATTR_IS_ACTIVE(vap, va_uid)) - nuid = vap->va_uid; - if (VATTR_IS_ACTIVE(vap, va_gid)) - ngid = vap->va_gid; - if (nuid != (uid_t)VNOVAL || ngid != (gid_t)VNOVAL) { - if ((result = synthfs_chown(vp, nuid, ngid, cred, p))) { - goto Err_Exit; - } - } - VATTR_SET_SUPPORTED(vap, va_uid); - VATTR_SET_SUPPORTED(vap, va_gid); - - if (VATTR_IS_ACTIVE(vap, va_data_size)) { -#if RWSUPPORT - if ((result = vnode_setsize(vp, vap->va_data_size, 0, ap->a_context))) { - goto Err_Exit; - }; - VATTR_SET_SUPPORTED(vap, va_data_size); -#else - result = EINVAL; - goto Err_Exit; -#endif - } - - sp = VTOS(vp); - if (VATTR_IS_ACTIVE(vap, va_access_time) || VATTR_IS_ACTIVE(vap, va_modify_time)) { - if (VATTR_IS_ACTIVE(vap, va_access_time)) { - sp->s_nodeflags |= IN_ACCESS; - atimeval.tv_sec = vap->va_access_time.tv_sec; - atimeval.tv_usec = vap->va_access_time.tv_nsec / 1000; - } - if (VATTR_IS_ACTIVE(vap, va_modify_time)) { - sp->s_nodeflags |= IN_CHANGE | IN_UPDATE; - mtimeval.tv_sec = vap->va_modify_time.tv_sec; - mtimeval.tv_usec = vap->va_modify_time.tv_nsec / 1000; - } - if ((result = synthfs_update(vp, &atimeval, &mtimeval, 1))) { - goto Err_Exit; - } - } - VATTR_SET_SUPPORTED(vap, va_access_time); - VATTR_SET_SUPPORTED(vap, va_modify_time); - - if (VATTR_IS_ACTIVE(vap, va_mode)) - result = synthfs_chmod(vp, (int)vap->va_mode, cred, p); - VATTR_SET_SUPPORTED(vap, va_mode); - - Err_Exit: - - DBG_VOP(("synthfs_setattr: returning %d...\n", result)); - - return (result); -} - - - -/* - -#% rename sourcePar_vp U U U -#% rename source_vp U U U -#% rename targetPar_vp L U U -#% rename target_vp X U U -# - vnop_rename { - IN WILLRELE struct vnode *sourcePar_vp; - IN WILLRELE struct vnode *source_vp; - IN struct componentname *source_cnp; - IN WILLRELE struct vnode *targetPar_vp; - IN WILLRELE struct vnode *target_vp; - IN struct componentname *target_cnp; - - - */ - -/* - * On entry: - * source's parent directory is unlocked - * source file or directory is unlocked - * destination's parent directory is locked - * destination file or directory is locked if it exists - * - * On exit: - * all denodes should be released - * - */ - -int -synthfs_rename(ap) -struct vnop_rename_args /* { - struct vnode *a_fdvp; - struct vnode *a_fvp; - struct componentname *a_fcnp; - struct vnode *a_tdvp; - struct vnode *a_tvp; - struct componentname *a_tcnp; - vfs_context_t a_context; -} */ *ap; -{ - struct vnode *target_vp = ap->a_tvp; - struct vnode *targetPar_vp = ap->a_tdvp; - struct vnode *source_vp = ap->a_fvp; - struct vnode *sourcePar_vp = ap->a_fdvp; - struct componentname *target_cnp = ap->a_tcnp; - struct componentname *source_cnp = ap->a_fcnp; - struct synthfsnode *target_sp, *targetPar_sp, *source_sp, *sourcePar_sp; - u_short doingdirectory = 0, oldparent = 0, newparent = 0; - int retval = 0; - struct timeval tv; - -#if SYNTHFS_DIAGNOSTIC - if ((target_cnp->cn_flags & HASBUF) == 0 || - (source_cnp->cn_flags & HASBUF) == 0) - panic("synthfs_rename: no name"); -#endif - - DBG_ASSERT((ap->a_fdvp->v_type == VDIR) && (ap->a_tdvp->v_type == VDIR)); - target_sp = targetPar_sp = source_sp = sourcePar_sp = NULL; - - - sourcePar_sp = VTOS(sourcePar_vp); - source_sp = VTOS(source_vp); - oldparent = sourcePar_sp->s_nodeid; - - /* - * Be sure we are not renaming ".", "..", or an alias of ".". This - * leads to a crippled directory tree. It's pretty tough to do a - * "ls" or "pwd" with the "." directory entry missing, and "cd .." - * doesn't work if the ".." entry is missing. - */ - if (source_sp->s_type == SYNTHFS_DIRECTORY) { - if ((source_cnp->cn_namelen == 1 && source_cnp->cn_nameptr[0] == '.') - || sourcePar_sp == source_sp - || (source_cnp->cn_flags & ISDOTDOT) - || (source_sp->s_nodeflags & IN_RENAME)) { - retval = EINVAL; - goto abortit; - } - source_sp->s_nodeflags |= IN_RENAME; - doingdirectory = TRUE; - } - - /* Transit between abort and bad */ - - targetPar_sp = VTOS(targetPar_vp); - target_sp = target_vp ? VTOS(target_vp) : NULL; - newparent = targetPar_sp->s_nodeid; - - - /* - * If the destination exists, then be sure its type (file or dir) - * matches that of the source. And, if it is a directory make sure - * it is empty. Then delete the destination. - */ - if (target_vp) { - -#if RWSUPPORT - if (target_vp->v_type == VREG) { - (void) vnode_uncache(target_vp); - }; -#endif - cache_purge(target_vp); - - retval = synthfs_remove_internal(targetPar_vp, target_vp, target_cnp, ap->a_context); - - target_vp = NULL; - target_sp = NULL; - - if (retval) goto bad; - }; - - - /* remove the existing entry from the namei cache: */ - if (source_vp->v_type == VREG) cache_purge(source_vp); - - retval = synthfs_move_rename_entry( source_vp, targetPar_vp, target_cnp->cn_nameptr); - - if (retval) goto bad; - - source_sp->s_nodeflags &= ~IN_RENAME; - - /* - * Timestamp both parent directories. - * Note that if this is a rename within the same directory, - * (where targetPar_hp == sourcePar_hp) - * the code below is still safe and correct. - */ - targetPar_sp->s_nodeflags |= IN_UPDATE; - sourcePar_sp->s_nodeflags |= IN_UPDATE; - - microtime(&tv); - SYNTHFSTIMES(targetPar_sp, &tv, &tv); - SYNTHFSTIMES(sourcePar_sp, &tv, &tv); - - return (retval); - -bad:; - if (retval && doingdirectory) - source_sp->s_nodeflags &= ~IN_RENAME; - - return (retval); - -abortit:; - return (retval); -} - - - -/* - * Mkdir system call - -#% mkdir dvp L U U -#% mkdir vpp - L - -# - vnop_mkdir { - IN WILLRELE struct vnode *dvp; - OUT struct vnode **vpp; - IN struct componentname *cnp; - IN struct vnode_attr *vap; - IN vfs_context_t context; - - We are responsible for freeing the namei buffer, it is done in synthfs_makenode(), unless there is - a previous error. - -*/ - -int -synthfs_mkdir(ap) -struct vnop_mkdir_args /* { - struct vnode *a_dvp; - struct vnode **a_vpp; - struct componentname *a_cnp; - struct vnode_attr *a_vap; - vfs_context_t a_context; -} */ *ap; -{ - int retval; - struct vnode *dvp = ap->a_dvp; - struct componentname *cnp = ap->a_cnp; - int mode = MAKEIMODE(ap->a_vap->va_type, ap->a_vap->va_mode); - struct vnode *vp = NULL; - - *ap->a_vpp = NULL; - - retval = synthfs_new_directory(VTOVFS(dvp), dvp, cnp->cn_nameptr, VTOSFS(dvp)->synthfs_nextid++, mode, vfs_context_proc(cnp->cn_context), &vp); - if (retval) goto Error_Exit; - - *ap->a_vpp = vp; - - retval = vnode_setattr(vp, ap->a_vap, ap->a_context); - if (retval != 0) goto Error_Exit; - - Error_Exit:; - if (retval != 0) { - if (vp) synthfs_remove_directory(vp); - } - - return retval; -} - - - -/* - -#% remove dvp L U U -#% remove vp L U U -# - vnop_remove { - IN WILLRELE struct vnode *dvp; - IN WILLRELE struct vnode *vp; - IN struct componentname *cnp; - IN vfs_context_t context; - - */ - -int -synthfs_remove(ap) -struct vnop_remove_args /* { - struct vnode *a_dvp; - struct vnode *a_vp; - struct componentname *a_cnp; - vfs_context_t a_context; -} */ *ap; -{ - return synthfs_remove_internal(ap->a_dvp, ap->a_vp, ap->a_cnp, ap->a_context); -} - -static int -synthfs_remove_internal(struct vnode *dvp, struct vnode *vp, - __unused struct componentname *cnp, - __unused vfs_context_t context) -{ - struct synthfsnode *sp = VTOS(vp); - struct timeval tv; - int retval = 0; - - /* This is sort of silly right now but someday it may make sense... */ - if (sp->s_nodeflags & IN_MODIFIED) { - microtime(&tv); - synthfs_update(vp, &tv, &tv, 0); - }; - - /* remove the entry from the namei cache: */ - cache_purge(vp); - - /* remove entry from tree and reclaim any resources consumed: */ - switch (sp->s_type) { - case SYNTHFS_DIRECTORY: - synthfs_remove_directory(vp); - break; - - - case SYNTHFS_SYMLINK: - synthfs_remove_symlink(vp); - break; - - case SYNTHFS_FILE: - /* Fall through to default case */ - - default: - synthfs_remove_entry(vp); - }; - -out: - - if (! retval) - VTOS(dvp)->s_nodeflags |= IN_CHANGE | IN_UPDATE; - - return (retval); -} - - - -/* -#% rmdir dvp L U U -#% rmdir vp L U U -# - vnop_rmdir { - IN WILLRELE struct vnode *dvp; - IN WILLRELE struct vnode *vp; - IN struct componentname *cnp; - IN vfs_context_t context; - - */ - -int -synthfs_rmdir(ap) - struct vnop_rmdir_args /* { - struct vnode *a_dvp; - struct vnode *a_vp; - struct componentname *a_cnp; - vfs_context_t a_context; -} */ *ap; -{ - return synthfs_remove((struct vnop_remove_args *)ap); -} - - - -/* - * synthfs_select - just say OK. Only possible op is readdir - * - * Locking policy: ignore - */ -int -synthfs_select(__unused -struct vnop_select_args /* { - struct vnode *a_vp; - int a_which; - int a_fflags; - kauth_cred_t a_cred; - void *a_wql; - struct proc *a_p; -} */ *ap) -{ - DBG_VOP(("synthfs_select called\n")); - - return (1); -} - -/* -# -#% symlink dvp L U U -#% symlink vpp - U - -# -# XXX - note that the return vnode has already been vnode_put'ed -# by the filesystem layer. To use it you must use vnode_get, -# possibly with a further namei. -# - vnop_symlink { - IN WILLRELE struct vnode *dvp; - OUT WILLRELE struct vnode **vpp; - IN struct componentname *cnp; - IN struct vnode_attr *vap; - IN char *target; - - We are responsible for freeing the namei buffer, it is done in synthfs_makenode(), unless there is - a previous error. - - -*/ - -int -synthfs_symlink(ap) - struct vnop_symlink_args /* { - struct vnode *a_dvp; - struct vnode **a_vpp; - struct componentname *a_cnp; - struct vnode_attr *a_vap; - char *a_target; - vfs_context_t a_context; - } */ *ap; -{ - struct vnode *dvp = ap->a_dvp; - struct vnode **vpp = ap->a_vpp; - struct componentname *cnp = ap->a_cnp; - int retval; - - *vpp = NULL; - - retval = synthfs_new_symlink(VTOVFS(dvp), dvp, cnp->cn_nameptr, VTOSFS(dvp)->synthfs_nextid++, ap->a_target, vfs_context_proc(cnp->cn_context), vpp); - - return (retval); -} - - - -/* -# -#% readlink vp L L L -# - vnop_readlink { - IN struct vnode *vp; - INOUT struct uio *uio; - IN kauth_cred_t cred; - */ - -int -synthfs_readlink(ap) -struct vnop_readlink_args /* { - struct vnode *a_vp; - struct uio *a_uio; - vfs_context_t a_context; -} */ *ap; -{ - struct vnode *vp = ap->a_vp; - struct synthfsnode *sp = VTOS(vp); - struct uio *uio = ap->a_uio; - int retval; - unsigned long count; - - if (ap->a_uio->uio_offset > sp->s_u.s.s_length) { - return 0; - }; - - // LP64todo - fix this! - if (uio->uio_offset + uio_resid(uio) <= sp->s_u.s.s_length) { - count = uio_resid(uio); - } else { - count = sp->s_u.s.s_length - uio->uio_offset; - }; - retval = uiomove((void *)((unsigned char *)sp->s_u.s.s_symlinktarget + uio->uio_offset), count, uio); - return (retval); - -} - - - - - - -/* - * Read directory entries. - */ -int -synthfs_readdir(ap) -struct vnop_readdir_args /* { - struct vnode *a_vp; - struct uio *a_uio; - int a_flags; - int *a_eofflag; - int *a_numdirent; - vfs_context_t a_context; -} */ *ap; -{ - struct synthfsnode *sp = VTOS(ap->a_vp); - register struct uio *uio = ap->a_uio; - off_t diroffset; /* Offset into simulated directory file */ - struct synthfsnode *entry; - - DBG_VOP(("\tuio_offset = %d, uio_resid = %lld\n", (int) uio->uio_offset, uio_resid(uio))); - - if (ap->a_flags & (VNODE_READDIR_EXTENDED | VNODE_READDIR_REQSEEKOFF)) - return (EINVAL); - - /* We assume it's all one big buffer... */ - if (uio->uio_iovcnt > 1) { - DBG_VOP(("\tuio->uio_iovcnt = %d?\n", uio->uio_iovcnt)); - return EINVAL; - }; - - diroffset = 0; - - /* - * We must synthesize . and .. - */ - DBG_VOP(("\tstarting ... uio_offset = %d, uio_resid = %lld\n", (int) uio->uio_offset, uio_resid(uio))); - if (uio->uio_offset == diroffset) - { - DBG_VOP(("\tAdding .\n")); - diroffset += synthfs_adddirentry(sp->s_nodeid, DT_DIR, ".", uio); - DBG_VOP(("\t after adding ., uio_offset = %d, uio_resid = %lld\n", (int) uio->uio_offset, uio_resid(uio))); - } - if ((uio_resid(uio) > 0) && (diroffset > uio->uio_offset)) { - /* Oops - we skipped over a partial entry: at best, diroffset should've just matched uio->uio_offset */ - return EINVAL; - }; - - if (uio->uio_offset == diroffset) - { - DBG_VOP(("\tAdding ..\n")); - if (sp->s_parent != NULL) { - diroffset += synthfs_adddirentry(sp->s_parent->s_nodeid, DT_DIR, "..", uio); - } else { - diroffset += synthfs_adddirentry(sp->s_nodeid, DT_DIR, "..", uio); - } - DBG_VOP(("\t after adding .., uio_offset = %d, uio_resid = %lld\n", (int) uio->uio_offset, uio_resid(uio))); - } - if ((uio_resid(uio) > 0) && (diroffset > uio->uio_offset)) { - /* Oops - we skipped over a partial entry: at best, diroffset should've just matched uio->uio_offset */ - return EINVAL; - }; - - /* OK, so much for the fakes. Now for the "real thing": */ - TAILQ_FOREACH(entry, &sp->s_u.d.d_subnodes, s_sibling) { - if (diroffset == uio->uio_offset) { - /* Return this entry */ - diroffset += synthfs_adddirentry(entry->s_nodeid, VTTOIF(STOV(entry)->v_type), entry->s_name, uio); - }; - if ((uio_resid(uio) > 0) && (diroffset > uio->uio_offset)) { - /* Oops - we skipped over a partial entry: at best, diroffset should've just matched uio->uio_offset */ - return EINVAL; - }; - }; - - if (ap->a_eofflag) - *ap->a_eofflag = (entry == NULL); /* If we ran all the way through the list, there is no more */ - - return 0; -} - - - -/* - -#% lookup dvp L ? ? -#% lookup vpp - L - - - */ - -int -synthfs_cached_lookup(ap) - struct vnop_lookup_args /* { - struct vnode *a_dvp; - struct vnode **a_vpp; - struct componentname *a_cnp; - } */ *ap; -{ - struct vnode *dp = ap->a_dvp; - struct componentname *cnp = ap->a_cnp; - u_long nameiop = cnp->cn_nameiop; - u_long flags = cnp->cn_flags; - struct vnode **vpp = ap->a_vpp; - int result = 0; - - DBG_VOP(("synthfs_cached_lookup called, name = %s, namelen = %ld\n", ap->a_cnp->cn_nameptr, ap->a_cnp->cn_namelen)); -#if DEBUG - if (flags & ISLASTCN) DBG_VOP(("\tISLASTCN is set\n")); -#endif - - *vpp = NULL; - - /* - * Look up an entry in the namei cache - */ - - result = cache_lookup(dp, vpp, cnp); - if (result == 0) { - /* There was no entry in the cache for this parent vnode/name pair: - do the full-blown pathname lookup - */ - return synthfs_lookup(ap); - }; - if (result == ENOENT) return result; - - /* An entry matching the parent vnode/name was found in the cache: */ - - return (0); - -Err_Exit:; - return result; -} - - - -int -synthfs_lookup(ap) - struct vnop_lookup_args /* { - struct vnode *a_dvp; - struct vnode **a_vpp; - struct componentname *a_cnp; - vfs_context_t a_context; - } */ *ap; -{ - struct vnode *dp = ap->a_dvp; - struct synthfsnode *dsp = VTOS(dp); - struct componentname *cnp = ap->a_cnp; - u_long nameiop = cnp->cn_nameiop; -// char *nameptr = cnp->cn_nameptr; - u_long flags = cnp->cn_flags; - long namelen = cnp->cn_namelen; -// struct proc *p = cnp->cn_proc; - vfs_context_t ctx = cnp->cn_context; - kauth_cred_t cred = vfs_context_ucred(ctx); - struct synthfsnode *entry; - struct vnode *target_vp = NULL; - int result = 0; - boolean_t found = FALSE; - boolean_t isDot = FALSE; - boolean_t isDotDot = FALSE; - struct vnode *starting_parent = dp; - - DBG_VOP(("synthfs_lookup called, name = %s, namelen = %ld\n", ap->a_cnp->cn_nameptr, ap->a_cnp->cn_namelen)); -#if DEBUG - if (flags & LOCKPARENT) DBG_VOP(("\tLOCKPARENT is set\n")); - if (flags & ISLASTCN) DBG_VOP(("\tISLASTCN is set\n")); -#endif - - *ap->a_vpp = NULL; - - /* first check for "." and ".." */ - if (cnp->cn_nameptr[0] == '.') { - if (namelen == 1) { - /* - "." requested - */ - isDot = TRUE; - found = TRUE; - - target_vp = dp; - vnode_get(target_vp); - - result = 0; - - goto Std_Exit; - } else if ((namelen == 2) && (cnp->cn_nameptr[1] == '.')) { - /* - ".." requested - */ - isDotDot = TRUE; - found = TRUE; - - if ((dsp->s_parent != NULL) && (dsp->s_parent != VTOS(dp))) { - target_vp = STOV(dsp->s_parent); - /* - * Special case for ".." to prevent deadlock: - * always release the parent vnode BEFORE trying to acquire - * ITS parent. This avoids deadlocking with another lookup - * starting from the target_vp trying to vnode_get() this directory. - */ - result = vnode_get(target_vp); - - } else { - target_vp = dp; - /* dp is alread locked and ref'ed */ - result = 0; - } - - goto Std_Exit; - } - } - - /* finally, just look for entries by name (making sure the entry's length - matches the cnp's namelen... */ - TAILQ_FOREACH(entry, &dsp->s_u.d.d_subnodes, s_sibling) { - if ((bcmp(cnp->cn_nameptr, entry->s_name, (unsigned)namelen) == 0) && - (*(entry->s_name + namelen) == (char)0)) { - found = TRUE; - target_vp = STOV(entry); - result = vnode_getwithref(target_vp); /* refcount is always > 0 for any vnode in this list... */ - if (result != 0) { - goto Err_Exit; - }; - - /* The specified entry was found and successfully acquired: */ - goto Std_Exit; - }; - }; - - found = FALSE; - -Std_Exit:; - if (found) { - if ((nameiop == DELETE) && (flags & ISLASTCN)) { - - /* - * If the parent directory is "sticky" then the user must own - * the directory, or the file in it, in order to be allowed to - * delete it (unless the user is root). This implements - * append-only directories - */ - if ((dsp->s_mode & S_ISVTX) && - suser(cred, NULL) && - (kauth_cred_getuid(cred) != dsp->s_uid) && - (target_vp != NULL) && - (target_vp->v_type != VLNK) && - (VTOS(target_vp)->s_uid != kauth_cred_getuid(cred))) { - vnode_put(target_vp); - result = EPERM; - goto Err_Exit; - }; - }; - - if ((nameiop == RENAME) && (flags & WANTPARENT) && (flags * ISLASTCN)) { - - if (isDot) { - vnode_put(target_vp); - result = EISDIR; - goto Err_Exit; - }; - }; - } else { - /* The specified entry wasn't found: */ - result = ENOENT; - - if ((flags & ISLASTCN) && - ((nameiop == CREATE) || - (nameiop == RENAME) || - ((nameiop == DELETE) && (flags & DOWHITEOUT) && (flags & ISWHITEOUT)))) { - /* create a new entry */ - result = EJUSTRETURN; - } - }; - - *ap->a_vpp = target_vp; - -Err_Exit:; - DBG_VOP(("synthfs_lookup: result = %d.\n", result)); - if (found) { - if (target_vp) { - DBG_VOP(("synthfs_lookup: target_vp = 0x%08X \n", (u_long)target_vp)); - } else { - DBG_VOP(("synthfs_lookup: found = true but target_vp = NULL?\n")); - }; - } else { - DBG_VOP(("synthf_lookup: target not found.\n")); - }; - DBG_VOP(("synthfs_lookup: dp = %08X; starting_parent = 0x%08X .\n", (u_long)dp, (u_long)starting_parent)); - - return result; -} - - - -/* - -#% pathconf vp L L L -# - vnop_pathconf { - IN struct vnode *vp; - IN int name; - OUT register_t *retval; -*/ -int -synthfs_pathconf(ap) -struct vnop_pathconf_args /* { - struct vnode *a_vp; - int a_name; - int *a_retval; - vfs_context_t a_context; -} */ *ap; -{ - DBG_VOP(("synthfs_pathconf called\n")); - - switch (ap->a_name) - { - case _PC_LINK_MAX: - *ap->a_retval = LINK_MAX; - return (0); - case _PC_NAME_MAX: - *ap->a_retval = NAME_MAX; - return (0); - case _PC_PATH_MAX: - *ap->a_retval = PATH_MAX; - return (0); - case _PC_PIPE_BUF: - *ap->a_retval = PIPE_BUF; - return (0); - case _PC_CHOWN_RESTRICTED: - *ap->a_retval = 200112; /* _POSIX_CHOWN_RESTRICTED */ - return (0); - case _PC_NO_TRUNC: - *ap->a_retval = 200112; /* _POSIX_NO_TRUNC */ - return (0); - default: - return (EINVAL); - } - /* NOTREACHED */ -} - - -/* - * Update the access, modified, and node change times as specified by the - * IACCESS, IUPDATE, and ICHANGE flags respectively. The IMODIFIED flag is - * used to specify that the node needs to be updated but that the times have - * already been set. The access and modified times are taken from the second - * and third parameters; the node change time is always taken from the current - * time. If waitfor is set, then wait for the disk write of the node to - * complete. - */ - -int -synthfs_update(struct vnode *vp, struct timeval *access, struct timeval *modify, __unused int waitfor) -{ - struct synthfsnode *sp = VTOS(vp); - struct timeval tv; - - DBG_ASSERT(sp != NULL); - - if (((sp->s_nodeflags & (IN_ACCESS | IN_CHANGE | IN_MODIFIED | IN_UPDATE)) != 0) && - !(VTOVFS(vp)->mnt_flag & MNT_RDONLY)) { - if (sp->s_nodeflags & IN_ACCESS) sp->s_accesstime = *access; - if (sp->s_nodeflags & IN_UPDATE) sp->s_modificationtime = *modify; - if (sp->s_nodeflags & IN_CHANGE) { - - microtime(&tv); - sp->s_changetime = tv; - } - }; - - /* After the updates are finished, clear the flags */ - sp->s_nodeflags &= ~(IN_ACCESS | IN_CHANGE | IN_MODIFIED | IN_UPDATE); - - return 0; -} - - - -/******************************************************************************************* - - Utility/housekeeping vnode operations: - - ******************************************************************************************/ - - -/* -# -#% inactive vp L U U -# - vnop_inactive { - IN struct vnode *vp; - IN struct proc *p; - -*/ - -int -synthfs_inactive(ap) -struct vnop_inactive_args /* { - struct vnode *a_vp; - vfs_context_t a_context; -} */ *ap; -{ - struct vnode *vp = ap->a_vp; - struct synthfsnode *sp = VTOS(vp); - struct timeval tv; - -#if DEBUG - if (vp->v_usecount != 0) - DBG_VOP(("synthfs_inactive: bad usecount = %d\n", vp->v_usecount )); -#endif - - /* - * Ignore nodes related to stale file handles. - */ - if (vp->v_type == VNON) - goto out; - - /* This is sort of silly but might make sense in the future: */ - if (sp->s_nodeflags & (IN_ACCESS | IN_CHANGE | IN_MODIFIED | IN_UPDATE)) { - microtime(&tv); - synthfs_update(vp, &tv, &tv, 0); - } - -out: - /* - * If we are done with the inode, reclaim it - * so that it can be reused immediately. - */ - if (vp->v_type == VNON) { - vnode_recycle(vp); - }; - - return 0; -} - - - -/* - * synthfs_reclaim - Reclaim a vnode so that it can be used for other purposes. - * - * Locking policy: ignored - */ -int -synthfs_reclaim(ap) - struct vnop_reclaim_args /* { struct vnode *a_vp; struct proc *a_p; } */ *ap; -{ - struct vnode *vp = ap->a_vp; - struct synthfsnode *sp = VTOS(vp); - void *name = sp->s_name; - - sp->s_name = NULL; - FREE(name, M_TEMP); - - vp->v_data = NULL; - FREE((void *)sp, M_SYNTHFS); - - return (0); -} diff --git a/bsd/miscfs/union/union_subr.c b/bsd/miscfs/union/union_subr.c index b5b7b43a7..34dbe14f3 100644 --- a/bsd/miscfs/union/union_subr.c +++ b/bsd/miscfs/union/union_subr.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2007 Apple Inc. All rights reserved. + * Copyright (c) 2000-2008 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -102,7 +102,7 @@ static int union_vn_close(struct vnode *vp, int fmode, vfs_context_t ctx); /* unsigned int ... */ #define UNION_HASH(u, l) \ - (((((unsigned long) (u)) + ((unsigned long) l)) >> 8) & (NHASH-1)) + (((((uintptr_t) (u)) + ((uintptr_t) l)) >> 8) & (NHASH-1)) static LIST_HEAD(unhead, union_node) unhead[NHASH]; static int unvplock[NHASH]; @@ -763,8 +763,8 @@ int union_copyfile(struct vnode *fvp, struct vnode *tvp, vfs_context_t context) { char *bufp; - struct uio uio; - struct iovec_32 iov; + struct uio *auio; + char uio_buf [ UIO_SIZEOF(1) ]; int error = 0; /* @@ -775,43 +775,34 @@ union_copyfile(struct vnode *fvp, struct vnode *tvp, vfs_context_t context) * give up at the first sign of trouble. */ - -#if 1 /* LP64todo - can't use new segment flags until the drivers are ready */ - uio.uio_segflg = UIO_SYSSPACE; -#else - uio.uio_segflg = UIO_SYSSPACE32; -#endif - uio.uio_offset = 0; + auio = uio_createwithbuffer(1, 0, UIO_SYSSPACE, + UIO_READ /* will change */, &uio_buf, sizeof(uio_buf)); bufp = _MALLOC(MAXPHYSIO, M_TEMP, M_WAITOK); + if (bufp == NULL) { + return ENOMEM; + } /* ugly loop follows... */ do { - off_t offset = uio.uio_offset; + off_t offset = uio_offset(auio); - uio.uio_iovs.iov32p = &iov; - uio.uio_iovcnt = 1; - iov.iov_base = (uintptr_t)bufp; - iov.iov_len = MAXPHYSIO; - uio_setresid(&uio, iov.iov_len); - uio.uio_rw = UIO_READ; - error = VNOP_READ(fvp, &uio, 0, context); + uio_reset(auio, offset, UIO_SYSSPACE, UIO_READ); + uio_addiov(auio, (uintptr_t)bufp, MAXPHYSIO); + error = VNOP_READ(fvp, auio, 0, context); if (error == 0) { - uio.uio_iovs.iov32p = &iov; - uio.uio_iovcnt = 1; - iov.iov_base = (uintptr_t)bufp; - iov.iov_len = MAXPHYSIO - uio_resid(&uio); - uio.uio_offset = offset; - uio.uio_rw = UIO_WRITE; - uio_setresid(&uio, iov.iov_len); - - if (uio_resid(&uio) == 0) + user_ssize_t resid = uio_resid(auio); + + uio_reset(auio, offset, UIO_SYSSPACE, UIO_WRITE); + uio_addiov(auio, (uintptr_t)bufp, MAXPHYSIO - resid); + + if (uio_resid(auio) == 0) break; do { - error = VNOP_WRITE(tvp, &uio, 0, context); - } while ((uio_resid(&uio) > 0) && (error == 0)); + error = VNOP_WRITE(tvp, auio, 0, context); + } while ((uio_resid(auio) > 0) && (error == 0)); } } while (error == 0); @@ -1467,6 +1458,9 @@ union_dircache(struct vnode *vp, __unused vfs_context_t context) dircache = (struct vnode **) _MALLOC(count * sizeof(struct vnode *), M_TEMP, M_WAITOK); + if (dircache == NULL) { + goto out; + } newdircache = dircache; alloced = 1; vpp = dircache; diff --git a/bsd/miscfs/union/union_vfsops.c b/bsd/miscfs/union/union_vfsops.c index 05b4dfa9e..6924e2f67 100644 --- a/bsd/miscfs/union/union_vfsops.c +++ b/bsd/miscfs/union/union_vfsops.c @@ -280,9 +280,6 @@ union_mount(mount_t mp, __unused vnode_t devvp, user_addr_t data, vfs_context_t (void) copyinstr(args.target, vcp, len - 1, (size_t *)&size); bzero(vcp + size, len - size); - /* mark the filesystem thred safe */ - mp->mnt_vtable->vfc_threadsafe = TRUE; - #ifdef UNION_DIAGNOSTIC printf("union_mount: from %s, on %s\n", mp->mnt_vfsstat.f_mntfromname, mp->mnt_vfsstat.f_mntonname); diff --git a/bsd/miscfs/union/union_vnops.c b/bsd/miscfs/union/union_vnops.c index 240ab9f40..ddc374dea 100644 --- a/bsd/miscfs/union/union_vnops.c +++ b/bsd/miscfs/union/union_vnops.c @@ -1550,7 +1550,7 @@ union_pagein(struct vnop_pagein_args *ap) struct vnop_pagein_args { struct vnode *a_vp, upl_t a_pl, - vm_offset_t a_pl_offset, + upl_offset_t a_pl_offset, off_t a_f_offset, size_t a_size, int a_flags diff --git a/bsd/net/Makefile b/bsd/net/Makefile index 1c5ed5df9..920fbe064 100644 --- a/bsd/net/Makefile +++ b/bsd/net/Makefile @@ -34,7 +34,7 @@ KERNELFILES= \ PRIVATE_DATAFILES = \ if_atm.h if_vlan_var.h if_ppp.h firewire.h \ ppp_defs.h radix.h if_bond_var.h lacp.h ndrv_var.h \ - raw_cb.h etherdefs.h iso88025.h + raw_cb.h etherdefs.h iso88025.h if_pflog.h pfvar.h PRIVATE_KERNELFILES = ${KERNELFILES} \ bpfdesc.h dlil_pvt.h ppp_comp.h \ diff --git a/bsd/net/bpf.c b/bsd/net/bpf.c index 5880506f1..74ac08569 100644 --- a/bsd/net/bpf.c +++ b/bsd/net/bpf.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2007 Apple Inc. All rights reserved. + * Copyright (c) 2000-2008 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -94,6 +94,9 @@ #include #include #include +#include +#include +#include #if defined(sparc) && BSD < 199103 #include @@ -199,7 +202,8 @@ static void catchpacket(struct bpf_d *, u_char *, u_int, u_int, void (*)(const void *, void *, size_t)); static void reset_d(struct bpf_d *); static int bpf_setf(struct bpf_d *, u_int bf_len, user_addr_t bf_insns); -static int bpf_getdltlist(struct bpf_d *, struct bpf_dltlist *); +static int bpf_getdltlist(struct bpf_d *, struct bpf_dltlist *, + struct proc *); static int bpf_setdlt(struct bpf_d *, u_int); /*static void *bpf_devfs_token[MAXBPFILTER];*/ @@ -306,7 +310,12 @@ bpf_movein(struct uio *uio, int linktype, struct mbuf **mp, struct sockaddr *soc sa_family = AF_UNSPEC; hlen = sizeof(struct firewire_header); break; - + + case DLT_IEEE802_11: /* IEEE 802.11 wireless */ + sa_family = AF_IEEE80211; + hlen = 0; + break; + default: return (EIO); } @@ -461,6 +470,20 @@ bpf_attachd(struct bpf_d *d, struct bpf_if *bp) bp->bif_dlist = d; if (first) { + bpf_tap_mode tap_mode; + + switch ((d->bd_oflags & (FREAD | FWRITE))) { + case FREAD: + tap_mode = BPF_TAP_INPUT; + break; + case FWRITE: + tap_mode = BPF_TAP_OUTPUT; + break; + default: + tap_mode = BPF_TAP_INPUT_OUTPUT; + break; + } + /* Find the default bpf entry for this ifp */ if (bp->bif_ifp->if_bpf == NULL) { struct bpf_if *primary; @@ -474,10 +497,10 @@ bpf_attachd(struct bpf_d *d, struct bpf_if *bp) /* Only call dlil_set_bpf_tap for primary dlt */ if (bp->bif_ifp->if_bpf == bp) - dlil_set_bpf_tap(bp->bif_ifp, BPF_TAP_INPUT_OUTPUT, bpf_tap_callback); + dlil_set_bpf_tap(bp->bif_ifp, tap_mode, bpf_tap_callback); if (bp->bif_tap) - error = bp->bif_tap(bp->bif_ifp, bp->bif_dlt, BPF_TAP_INPUT_OUTPUT); + error = bp->bif_tap(bp->bif_ifp, bp->bif_dlt, tap_mode); } return error; @@ -548,7 +571,7 @@ bpf_detachd(struct bpf_d *d) */ /* ARGSUSED */ int -bpfopen(dev_t dev, __unused int flags, __unused int fmt, +bpfopen(dev_t dev, int flags, __unused int fmt, __unused struct proc *p) { struct bpf_d *d; @@ -603,6 +626,7 @@ bpfopen(dev_t dev, __unused int flags, __unused int fmt, d->bd_bufsize = bpf_bufsize; d->bd_sig = SIGIO; d->bd_seesent = 1; + d->bd_oflags = flags; #if CONFIG_MACF_NET mac_bpfdesc_label_init(d); mac_bpfdesc_label_associate(kauth_cred_get(), d); @@ -701,8 +725,7 @@ bpfread(dev_t dev, struct uio *uio, int ioflag) * Restrict application to use a buffer the same size as * as kernel buffers. */ - // LP64todo - fix this - if (uio->uio_resid != d->bd_bufsize) { + if (uio_resid(uio) != d->bd_bufsize) { lck_mtx_unlock(bpf_mlock); return (EINVAL); } @@ -733,12 +756,12 @@ bpfread(dev_t dev, struct uio *uio, int ioflag) lck_mtx_unlock(bpf_mlock); return (ENXIO); } - - if (ioflag & IO_NDELAY) - error = EWOULDBLOCK; - else - error = BPF_SLEEP(d, PRINET|PCATCH, "bpf", - d->bd_rtout); + if (ioflag & IO_NDELAY) { + lck_mtx_unlock(bpf_mlock); + return (EWOULDBLOCK); + } + error = BPF_SLEEP(d, PRINET|PCATCH, "bpf", + d->bd_rtout); /* * Make sure device is still opened */ @@ -804,6 +827,7 @@ bpf_wakeup(struct bpf_d *d) #if BSD >= 199103 selwakeup(&d->bd_sel); + KNOTE(&d->bd_sel.si_note, 1); #ifndef __APPLE__ /* XXX */ d->bd_sel.si_pid = 0; @@ -828,7 +852,7 @@ bpfwrite(dev_t dev, struct uio *uio, __unused int ioflag) struct mbuf *m = NULL; int error; char dst_buf[SOCKADDR_HDR_LEN + MAX_DATALINK_HDR_LEN]; - int datlen; + int datlen = 0; lck_mtx_lock(bpf_mlock); @@ -844,7 +868,7 @@ bpfwrite(dev_t dev, struct uio *uio, __unused int ioflag) ifp = d->bd_bif->bif_ifp; - if (uio->uio_resid == 0) { + if (uio_resid(uio) == 0) { lck_mtx_unlock(bpf_mlock); return (0); } @@ -934,7 +958,7 @@ reset_d(struct bpf_d *d) /* ARGSUSED */ int bpfioctl(dev_t dev, u_long cmd, caddr_t addr, __unused int flags, - __unused struct proc *p) + struct proc *p) { struct bpf_d *d; int error = 0; @@ -1012,24 +1036,19 @@ bpfioctl(dev_t dev, u_long cmd, caddr_t addr, __unused int flags, /* * Set link layer read filter. */ - case BIOCSETF64: - case BIOCSETF: { - if (proc_is64bit(current_proc())) { - struct bpf_program64 * prg64; - - prg64 = (struct bpf_program64 *)addr; - error = bpf_setf(d, prg64->bf_len, - prg64->bf_insns); - } - else { - struct bpf_program * prg; - - prg = (struct bpf_program *)addr; - error = bpf_setf(d, prg->bf_len, - CAST_USER_ADDR_T(prg->bf_insns)); - } + case BIOCSETF32: { + struct bpf_program32 *prg32 = (struct bpf_program32 *)addr; + error = bpf_setf(d, prg32->bf_len, + CAST_USER_ADDR_T(prg32->bf_insns)); break; } + + case BIOCSETF64: { + struct bpf_program64 *prg64 = (struct bpf_program64 *)addr; + error = bpf_setf(d, prg64->bf_len, prg64->bf_insns); + break; + } + /* * Flush read packet buffer. */ @@ -1071,11 +1090,13 @@ bpfioctl(dev_t dev, u_long cmd, caddr_t addr, __unused int flags, * Get a list of supported data link types. */ case BIOCGDLTLIST: - if (d->bd_bif == NULL) - error = EINVAL; - else - error = bpf_getdltlist(d, (struct bpf_dltlist *)addr); - break; + if (d->bd_bif == NULL) { + error = EINVAL; + } else { + error = bpf_getdltlist(d, + (struct bpf_dltlist *)addr, p); + } + break; /* * Set data link type. @@ -1120,14 +1141,18 @@ bpfioctl(dev_t dev, u_long cmd, caddr_t addr, __unused int flags, */ case BIOCSRTIMEOUT: { - struct timeval *tv = (struct timeval *)addr; + struct BPF_TIMEVAL *_tv = (struct BPF_TIMEVAL *)addr; + struct timeval tv; + + tv.tv_sec = _tv->tv_sec; + tv.tv_usec = _tv->tv_usec; /* * Subtract 1 tick from tvtohz() since this isn't * a one-shot timer. */ - if ((error = itimerfix(tv)) == 0) - d->bd_rtout = tvtohz(tv) - 1; + if ((error = itimerfix(&tv)) == 0) + d->bd_rtout = tvtohz(&tv) - 1; break; } @@ -1136,7 +1161,7 @@ bpfioctl(dev_t dev, u_long cmd, caddr_t addr, __unused int flags, */ case BIOCGRTIMEOUT: { - struct timeval *tv = (struct timeval *)addr; + struct BPF_TIMEVAL *tv = (struct BPF_TIMEVAL *)addr; tv->tv_sec = d->bd_rtout / hz; tv->tv_usec = (d->bd_rtout % hz) * tick; @@ -1242,7 +1267,7 @@ bpfioctl(dev_t dev, u_long cmd, caddr_t addr, __unused int flags, } lck_mtx_unlock(bpf_mlock); - + return (error); } @@ -1347,35 +1372,33 @@ bpf_setif(struct bpf_d *d, ifnet_t theywant, u_int32_t dlt) * Get a list of available data link type of the interface. */ static int -bpf_getdltlist( - struct bpf_d *d, - struct bpf_dltlist *bfl) +bpf_getdltlist(struct bpf_d *d, struct bpf_dltlist *bfl, struct proc *p) { - u_int n; - int error; + u_int n; + int error; struct ifnet *ifp; struct bpf_if *bp; - user_addr_t dlist; - - if (IS_64BIT_PROCESS(current_proc())) { - dlist = CAST_USER_ADDR_T(bfl->bfl_u.bflu_pad); - } - else { + user_addr_t dlist; + + if (proc_is64bit(p)) { + dlist = (user_addr_t)bfl->bfl_u.bflu_pad; + } else { dlist = CAST_USER_ADDR_T(bfl->bfl_u.bflu_list); } - + ifp = d->bd_bif->bif_ifp; n = 0; error = 0; for (bp = bpf_iflist; bp; bp = bp->bif_next) { if (bp->bif_ifp != ifp) continue; - if (dlist != 0) { + if (dlist != USER_ADDR_NULL) { if (n >= bfl->bfl_len) { return (ENOMEM); } - error = copyout(&bp->bif_dlt, dlist, sizeof(bp->bif_dlt)); - dlist += sizeof(bp->bif_dlt); + error = copyout(&bp->bif_dlt, dlist, + sizeof (bp->bif_dlt)); + dlist += sizeof (bp->bif_dlt); } n++; } @@ -1427,7 +1450,7 @@ bpf_setdlt(struct bpf_d *d, uint32_t dlt) } /* - * Support for select() and poll() system calls + * Support for select() * * Return true iff the specific operation will not block indefinitely. * Otherwise, return false but make a note that a selwakeup() must be done. @@ -1465,6 +1488,92 @@ bpfpoll(dev_t dev, int events, void * wql, struct proc *p) return (revents); } +/* + * Support for kevent() system call. Register EVFILT_READ filters and + * reject all others. + */ +int bpfkqfilter(dev_t dev, struct knote *kn); +static void filt_bpfdetach(struct knote *); +static int filt_bpfread(struct knote *, long); + +static struct filterops bpfread_filtops = { + .f_isfd = 1, + .f_detach = filt_bpfdetach, + .f_event = filt_bpfread, +}; + +int +bpfkqfilter(dev_t dev, struct knote *kn) +{ + struct bpf_d *d; + + /* + * Is this device a bpf? + */ + if (major(dev) != CDEV_MAJOR) { + return (EINVAL); + } + + if (kn->kn_filter != EVFILT_READ) { + return (EINVAL); + } + + lck_mtx_lock(bpf_mlock); + + d = bpf_dtab[minor(dev)]; + if (d == 0 || d == (void *)1) { + lck_mtx_unlock(bpf_mlock); + return (ENXIO); + } + + /* + * An imitation of the FIONREAD ioctl code. + */ + if (d->bd_bif == NULL) { + lck_mtx_unlock(bpf_mlock); + return (ENXIO); + } + + kn->kn_hook = d; + kn->kn_fop = &bpfread_filtops; + KNOTE_ATTACH(&d->bd_sel.si_note, kn); + lck_mtx_unlock(bpf_mlock); + return 0; +} + +static void +filt_bpfdetach(struct knote *kn) +{ + struct bpf_d *d = (struct bpf_d *)kn->kn_hook; + + lck_mtx_lock(bpf_mlock); + KNOTE_DETACH(&d->bd_sel.si_note, kn); + lck_mtx_unlock(bpf_mlock); +} + +static int +filt_bpfread(struct knote *kn, long hint) +{ + struct bpf_d *d = (struct bpf_d *)kn->kn_hook; + int ready = 0; + + if (hint == 0) + lck_mtx_lock(bpf_mlock); + + if (d->bd_immediate) { + kn->kn_data = (d->bd_hlen == 0 ? d->bd_slen : d->bd_hlen); + ready = (kn->kn_data >= ((kn->kn_sfflags & NOTE_LOWAT) ? + kn->kn_sdata : 1)); + } else { + kn->kn_data = d->bd_hlen; + ready = (kn->kn_data > 0); + } + + if (hint == 0) + lck_mtx_unlock(bpf_mlock); + return (ready); +} + static inline void* _cast_non_const(const void * ptr) { union { @@ -1670,7 +1779,10 @@ catchpacket(struct bpf_d *d, u_char *pkt, u_int pktlen, u_int snaplen, * Append the bpf header. */ hp = (struct bpf_hdr *)(d->bd_sbuf + curlen); - microtime(&hp->bh_tstamp); + struct timeval tv; + microtime(&tv); + hp->bh_tstamp.tv_sec = tv.tv_sec; + hp->bh_tstamp.tv_usec = tv.tv_usec; hp->bh_datalen = pktlen; hp->bh_hdrlen = hdrlen; /* diff --git a/bsd/net/bpf.h b/bsd/net/bpf.h index 321284b23..a07cfe28e 100644 --- a/bsd/net/bpf.h +++ b/bsd/net/bpf.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2007 Apple Inc. All rights reserved. + * Copyright (c) 2000-2008 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -117,10 +117,14 @@ struct bpf_program { */ struct bpf_program64 { u_int bf_len; - user_addr_t bf_insns __attribute__((aligned(8))); + user64_addr_t bf_insns __attribute__((aligned(8))); }; -#endif // KERNEL_PRIVATE +struct bpf_program32 { + u_int bf_len; + user32_addr_t bf_insns; +}; +#endif /* KERNEL_PRIVATE */ /* * Struct returned by BIOCGSTATS. @@ -151,7 +155,7 @@ struct bpf_version { #define BPF_TIMEVAL timeval32 #else #define BPF_TIMEVAL timeval -#endif +#endif /* __LP64__ */ /* Current version number of filter architecture. */ #define BPF_MAJOR_VERSION 1 #define BPF_MINOR_VERSION 1 @@ -161,7 +165,8 @@ struct bpf_version { #define BIOCSETF _IOW('B',103, struct bpf_program) #ifdef KERNEL_PRIVATE #define BIOCSETF64 _IOW('B',103, struct bpf_program64) -#endif // KERNEL_PRIVATE +#define BIOCSETF32 _IOW('B',103, struct bpf_program32) +#endif /* KERNEL_PRIVATE */ #define BIOCFLUSH _IO('B',104) #define BIOCPROMISC _IO('B',105) #define BIOCGDLT _IOR('B',106, u_int) @@ -217,7 +222,6 @@ struct bpf_hdr { #define DLT_FDDI 10 /* FDDI */ #define DLT_ATM_RFC1483 11 /* LLC/SNAP encapsulated atm */ #define DLT_RAW 12 /* raw IP */ -#define DLT_APPLE_IP_OVER_IEEE1394 138 /* * These are values from BSD/OS's "bpf.h". @@ -235,6 +239,7 @@ struct bpf_hdr { #define DLT_SLIP_BSDOS 15 /* BSD/OS Serial Line IP */ #define DLT_PPP_BSDOS 16 /* BSD/OS Point-to-point Protocol */ +#define DLT_PFSYNC 18 /* Packet filter state syncing */ #define DLT_ATM_CLIP 19 /* Linux Classical-IP over ATM */ /* @@ -301,6 +306,50 @@ struct bpf_hdr { */ #define DLT_LINUX_SLL 113 +/* + * For use in capture-file headers as a link-layer type corresponding + * to OpenBSD PF (Packet Filter) log. + */ +#define DLT_PFLOG 117 + +/* + * BSD header for 802.11 plus a number of bits of link-layer information + * including radio information. + */ +#ifndef DLT_IEEE802_11_RADIO +#define DLT_IEEE802_11_RADIO 127 +#endif + +/* + * Apple IP-over-IEEE 1394, as per a request from Dieter Siegmund + * . The header that's presented is an Ethernet-like + * header: + * + * #define FIREWIRE_EUI64_LEN 8 + * struct firewire_header { + * u_char firewire_dhost[FIREWIRE_EUI64_LEN]; + * u_char firewire_shost[FIREWIRE_EUI64_LEN]; + * u_short firewire_type; + * }; + * + * with "firewire_type" being an Ethernet type value, rather than, + * for example, raw GASP frames being handed up. + */ +#define DLT_APPLE_IP_OVER_IEEE1394 138 + +/* + * For future use with 802.11 captures - defined by AbsoluteValue + * Systems to store a number of bits of link-layer information + * including radio information: + * + * http://www.shaftnet.org/~pizza/software/capturefrm.txt + * + * but it might be used by some non-AVS drivers now or in the + * future. + */ +#define DLT_IEEE802_11_RADIO_AVS 163 /* 802.11 plus AVS radio header */ + + /* * The instruction encodings. */ @@ -379,9 +428,9 @@ struct bpf_insn { * Structure to retrieve available DLTs for the interface. */ struct bpf_dltlist { - u_int32_t bfl_len; /* number of bfd_list array */ + u_int32_t bfl_len; /* number of bfd_list array */ union { - u_int32_t *bflu_list; /* array of DLTs */ + u_int32_t *bflu_list; /* array of DLTs */ u_int64_t bflu_pad; } bfl_u; }; @@ -394,11 +443,10 @@ struct bpf_dltlist { struct ifnet; struct mbuf; -int bpf_validate(const struct bpf_insn *, int); -void bpfdetach(struct ifnet *); -void bpfilterattach(int); -u_int bpf_filter(const struct bpf_insn *, u_char *, u_int, u_int); - +extern int bpf_validate(const struct bpf_insn *, int); +extern void bpfdetach(struct ifnet *); +extern void bpfilterattach(int); +extern u_int bpf_filter(const struct bpf_insn *, u_char *, u_int, u_int); #endif /* KERNEL_PRIVATE */ #ifdef KERNEL @@ -414,10 +462,10 @@ u_int bpf_filter(const struct bpf_insn *, u_char *, u_int, u_int); */ enum { - BPF_MODE_DISABLED = 0, - BPF_MODE_INPUT = 1, - BPF_MODE_OUTPUT = 2, - BPF_MODE_INPUT_OUTPUT = 3 + BPF_MODE_DISABLED = 0, + BPF_MODE_INPUT = 1, + BPF_MODE_OUTPUT = 2, + BPF_MODE_INPUT_OUTPUT = 3 }; /*! @typedef bpf_tap_mode @@ -468,7 +516,8 @@ typedef errno_t (*bpf_tap_func)(ifnet_t interface, u_int32_t data_link_type, DLT_* defines in bpf.h. @param header_length The length, in bytes, of the data link header. */ -void bpfattach(ifnet_t interface, u_int data_link_type, u_int header_length); +extern void bpfattach(ifnet_t interface, u_int data_link_type, + u_int header_length); /*! @function bpf_attach @@ -486,9 +535,8 @@ void bpfattach(ifnet_t interface, u_int data_link_type, u_int header_length); DLT_* defines in bpf.h. @param header_length The length, in bytes, of the data link header. */ -errno_t bpf_attach(ifnet_t interface, u_int32_t data_link_type, - u_int32_t header_length, bpf_send_func send, - bpf_tap_func tap); +extern errno_t bpf_attach(ifnet_t interface, u_int32_t data_link_type, + u_int32_t header_length, bpf_send_func send, bpf_tap_func tap); /*! @function bpf_tap_in @@ -501,8 +549,8 @@ errno_t bpf_attach(ifnet_t interface, u_int32_t data_link_type, @param header An optional pointer to a header that will be prepended. @param headerlen If the header was specified, the length of the header. */ -void bpf_tap_in(ifnet_t interface, u_int32_t dlt, mbuf_t packet, - void* header, size_t header_len); +extern void bpf_tap_in(ifnet_t interface, u_int32_t dlt, mbuf_t packet, + void *header, size_t header_len); /*! @function bpf_tap_out @@ -515,8 +563,8 @@ void bpf_tap_in(ifnet_t interface, u_int32_t dlt, mbuf_t packet, @param header An optional pointer to a header that will be prepended. @param headerlen If the header was specified, the length of the header. */ -void bpf_tap_out(ifnet_t interface, u_int32_t dlt, mbuf_t packet, - void* header, size_t header_len); +extern void bpf_tap_out(ifnet_t interface, u_int32_t dlt, mbuf_t packet, + void *header, size_t header_len); #endif /* KERNEL */ @@ -525,4 +573,4 @@ void bpf_tap_out(ifnet_t interface, u_int32_t dlt, mbuf_t packet, */ #define BPF_MEMWORDS 16 -#endif +#endif /* _NET_BPF_H_ */ diff --git a/bsd/net/bpfdesc.h b/bsd/net/bpfdesc.h index 9f514f89c..2a5cd1aaf 100644 --- a/bsd/net/bpfdesc.h +++ b/bsd/net/bpfdesc.h @@ -100,10 +100,10 @@ struct bpf_d { int bd_bufsize; /* absolute length of buffers */ struct bpf_if * bd_bif; /* interface descriptor */ - u_long bd_rtout; /* Read timeout in 'ticks' */ + u_int32_t bd_rtout; /* Read timeout in 'ticks' */ struct bpf_insn *bd_filter; /* filter code */ - u_long bd_rcount; /* number of packets received */ - u_long bd_dcount; /* number of packets dropped */ + u_int32_t bd_rcount; /* number of packets received */ + u_int32_t bd_dcount; /* number of packets dropped */ u_char bd_promisc; /* true if listening promiscuously */ u_char bd_state; /* idle, waiting, or timed out */ @@ -126,6 +126,7 @@ struct bpf_d { #endif int bd_hdrcmplt; /* false to fill in src lladdr automatically */ int bd_seesent; /* true if bpf should see sent packets */ + int bd_oflags; /* device open flags */ #if CONFIG_MACF_NET struct label * bd_label; /* MAC label for descriptor */ #endif diff --git a/bsd/net/bridge.c b/bsd/net/bridge.c index 74d6a18d2..01d3cb7f5 100644 --- a/bsd/net/bridge.c +++ b/bsd/net/bridge.c @@ -133,7 +133,7 @@ quad_t ticks; DDB(ticks = rdtsc();) ... interesting code ... - DDB(bdg_fw_ticks += (u_long)(rdtsc() - ticks) ; bdg_fw_count++ ;) + DDB(bdg_fw_ticks += (u_int32_t)(rdtsc() - ticks) ; bdg_fw_count++ ;) * */ @@ -411,31 +411,33 @@ static void bdg_timeout(void *dummy) { static int slowtimer = 0 ; - - if (do_bridge) { - static int age_index = 0 ; /* index of table position to age */ - int l = age_index + HASH_SIZE/4 ; - /* - * age entries in the forwarding table. - */ - if (l > HASH_SIZE) - l = HASH_SIZE ; - for (; age_index < l ; age_index++) - if (bdg_table[age_index].used) - bdg_table[age_index].used = 0 ; - else if (bdg_table[age_index].name) { - /* printf("xx flushing stale entry %d\n", age_index); */ - bdg_table[age_index].name = NULL ; - } - if (age_index >= HASH_SIZE) - age_index = 0 ; - - if (--slowtimer <= 0 ) { - slowtimer = 5 ; - - bdg_promisc_on() ; /* we just need unmute, really */ - bdg_loops = 0 ; - } + + if (bdg_inted == 0) { + bdg_init2(0); + } else if (do_bridge) { + static int age_index = 0 ; /* index of table position to age */ + int l = age_index + HASH_SIZE/4 ; + /* + * age entries in the forwarding table. + */ + if (l > HASH_SIZE) + l = HASH_SIZE ; + for (; age_index < l ; age_index++) + if (bdg_table[age_index].used) + bdg_table[age_index].used = 0 ; + else if (bdg_table[age_index].name) { + /* printf("xx flushing stale entry %d\n", age_index); */ + bdg_table[age_index].name = NULL ; + } + if (age_index >= HASH_SIZE) + age_index = 0 ; + + if (--slowtimer <= 0 ) { + slowtimer = 5 ; + + bdg_promisc_on() ; /* we just need unmute, really */ + bdg_loops = 0 ; + } } timeout(bdg_timeout, (void *)0, 2*hz ); } @@ -451,24 +453,47 @@ int bdg_ports ; * initialization of bridge code. This needs to be done after all * interfaces have been configured. */ + +static int bdg_inited = 0; + static void -bdginit(void *dummy) +bdg_init2(void) { + if (bdg_inited != 0) + return; + + if (bdg_table == NULL) { + bdg_table = (struct hash_table *) + _MALLOC(HASH_SIZE * sizeof(struct hash_table), + M_IFADDR, M_WAITOK); + if (bdg_table == NULL) + return; + + flush_table(); + } - if (bdg_table == NULL) - bdg_table = (struct hash_table *) - _MALLOC(HASH_SIZE * sizeof(struct hash_table), - M_IFADDR, M_WAITOK); - flush_table(); - - ifp2sc = _MALLOC(BDG_MAX_PORTS * sizeof(struct bdg_softc), - M_IFADDR, M_WAITOK ); - bzero(ifp2sc, BDG_MAX_PORTS * sizeof(struct bdg_softc) ); + if (ifp2sc == NULL) { + ifp2sc = _MALLOC(BDG_MAX_PORTS * sizeof(struct bdg_softc), + M_IFADDR, M_WAITOK ); + if (ifp2sc == NULL) + return; + + bzero(ifp2sc, BDG_MAX_PORTS * sizeof(struct bdg_softc) ); + bdgtakeifaces(); + } + + bdg_inited = 1; +} +static void +bdginit(void *dummy) +{ + /* Initialize first what can't fail */ bzero(&bdg_stats, sizeof(bdg_stats) ); - bdgtakeifaces(); - bdg_timeout(0); do_bridge=0; + + /* Attempt to initialize the rest and start the timer */ + bdg_timeout(0); } void @@ -875,7 +900,7 @@ bdg_forward(struct mbuf *m0, struct ether_header *const eh, struct ifnet *dst) if (ifp == NULL) once = 1 ; } - DEB(bdg_fw_ticks += (u_long)(rdtsc() - ticks) ; bdg_fw_count++ ; + DEB(bdg_fw_ticks += (u_int32_t)(rdtsc() - ticks) ; bdg_fw_count++ ; if (bdg_fw_count != 0) bdg_fw_avg = bdg_fw_ticks/bdg_fw_count; ) return m0 ; } diff --git a/bsd/net/bridge.h b/bsd/net/bridge.h index 967a87df2..faeff4283 100644 --- a/bsd/net/bridge.h +++ b/bsd/net/bridge.h @@ -83,7 +83,7 @@ struct bdg_softc { #define IFF_MUTE 0x0002 /* mute this if for bridging. */ #define IFF_USED 0x0004 /* use this if for bridging. */ short cluster_id ; /* in network format */ - u_long magic; + uint32_t magic; } ; extern struct bdg_softc *ifp2sc; @@ -154,8 +154,8 @@ struct mbuf *bdg_forward(struct mbuf *m0, struct ether_header *eh, struct ifnet #define STAT_MAX (int)BDG_FORWARD struct bdg_port_stat { char name[16]; - u_long collisions; - u_long p_in[STAT_MAX+1]; + uint32_t collisions; + uint32_t p_in[STAT_MAX+1]; } ; struct bdg_stats { diff --git a/bsd/net/bsd_comp.c b/bsd/net/bsd_comp.c index 14fb57111..3dd6734c0 100644 --- a/bsd/net/bsd_comp.c +++ b/bsd/net/bsd_comp.c @@ -465,10 +465,10 @@ bsd_init_comp_db(db, options, opt_len, unit, hdrlen, mru, debug, decomp) db->unit = unit; db->hdrlen = hdrlen; db->mru = mru; -#ifndef DEBUG +#if !DEBUG if (debug) #endif - db->debug = 1; + db->debug = 1; bsd_reset(db); @@ -1004,7 +1004,7 @@ bsd_decompress(state, cmp, dmpp) m_freem(mret); if (db->debug) { printf("bsd_decomp%d: ran out of mru\n", db->unit); -#ifdef DEBUG +#if DEBUG while ((cmp = cmp->m_next) != NULL) len += cmp->m_len; printf(" len=%d, finchar=0x%x, codelen=%d, explen=%d\n", @@ -1045,7 +1045,7 @@ bsd_decompress(state, cmp, dmpp) p = (wptr += codelen); while (finchar > LAST) { dictp = &db->dict[db->dict[finchar].cptr]; -#ifdef DEBUG +#if DEBUG if (--codelen <= 0 || dictp->codem1 != finchar-1) goto bad; #endif @@ -1054,7 +1054,7 @@ bsd_decompress(state, cmp, dmpp) } *--p = finchar; -#ifdef DEBUG +#if DEBUG if (--codelen != 0) printf("bsd_decomp%d: short by %d after code 0x%x, max_ent=0x%x\n", db->unit, codelen, incode, max_ent); @@ -1134,7 +1134,7 @@ bsd_decompress(state, cmp, dmpp) *dmpp = mret; return DECOMP_OK; -#ifdef DEBUG +#if DEBUG bad: if (codelen <= 0) { printf("bsd_decomp%d: fell off end of chain ", db->unit); diff --git a/bsd/net/devtimer.c b/bsd/net/devtimer.c index a7cd37bd0..d0b55d251 100644 --- a/bsd/net/devtimer.c +++ b/bsd/net/devtimer.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2004,2007 Apple Inc. All rights reserved. + * Copyright (c) 2004,2007-2008 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -46,12 +46,12 @@ #ifdef DEVTIMER_DEBUG #define _devtimer_printf printf -#else DEVTIMER_DEBUG +#else /* !DEVTIMER_DEBUG */ static __inline__ void _devtimer_printf(__unused const char * fmt, ...) { } -#endif DEVTIMER_DEBUG +#endif /* !DEVTIMER_DEBUG */ struct devtimer_s { void * dt_callout; @@ -102,7 +102,7 @@ devtimer_valid(devtimer_ref timer) __private_extern__ void devtimer_retain(devtimer_ref timer) { - OSIncrementAtomic((SInt32 *)&timer->dt_retain_count); + OSIncrementAtomic(&timer->dt_retain_count); return; } @@ -123,7 +123,7 @@ devtimer_release(devtimer_ref timer) { UInt32 old_retain_count; - old_retain_count = OSDecrementAtomic((SInt32 *)&timer->dt_retain_count); + old_retain_count = OSDecrementAtomic(&timer->dt_retain_count); switch (old_retain_count) { case 0: panic("devtimer_release: retain count is 0\n"); @@ -142,7 +142,7 @@ devtimer_release(devtimer_ref timer) static void devtimer_process(void * param0, void * param1) { - int generation = (int)param1; + int generation = *(int*)param1; devtimer_process_func process_func; devtimer_timeout_func timeout_func; devtimer_ref timer = (devtimer_ref)param0; @@ -215,7 +215,7 @@ devtimer_set_absolute(devtimer_ref timer, timer->dt_generation++; devtimer_retain(timer); thread_call_enter1_delayed(timer->dt_callout, - (thread_call_param_t)timer->dt_generation, + &timer->dt_generation, timeval_to_absolutetime(abs_time)); return; } @@ -272,8 +272,8 @@ __private_extern__ struct timeval devtimer_current_time(void) { struct timeval tv; - uint32_t sec; - uint32_t usec; + clock_sec_t sec; + clock_usec_t usec; clock_get_system_microtime(&sec, &usec); tv.tv_sec = sec; diff --git a/bsd/net/devtimer.h b/bsd/net/devtimer.h index f9c48a30a..9504f22e8 100644 --- a/bsd/net/devtimer.h +++ b/bsd/net/devtimer.h @@ -92,4 +92,4 @@ devtimer_current_time(void); int32_t devtimer_current_secs(void); -#endif _NET_DEVTIMER_H +#endif /* _NET_DEVTIMER_H */ diff --git a/bsd/net/dlil.c b/bsd/net/dlil.c index d38346b0c..d679efc8b 100644 --- a/bsd/net/dlil.c +++ b/bsd/net/dlil.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 1999-2007 Apple Inc. All rights reserved. + * Copyright (c) 1999-2008 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -74,6 +74,10 @@ #include #endif /* MAC_NET */ +#if PF +#include +#endif /* PF */ + #define DBG_LAYER_BEG DLILDBG_CODE(DBG_DLIL_STATIC, 0) #define DBG_LAYER_END DLILDBG_CODE(DBG_DLIL_STATIC, 2) #define DBG_FNC_DLIL_INPUT DLILDBG_CODE(DBG_DLIL_STATIC, (1 << 8)) @@ -168,8 +172,9 @@ static lck_attr_t *ifnet_lock_attr; static lck_rw_t *ifnet_head_mutex; static lck_mtx_t *dlil_ifnet_mutex; static lck_mtx_t *dlil_mutex; -static unsigned long dlil_read_count = 0; -static unsigned long dlil_detach_waiting = 0; +static u_int32_t dlil_read_count = 0; +static u_int32_t dlil_detach_waiting = 0; +u_int32_t dlil_filter_count = 0; extern u_int32_t ipv4_ll_arp_aware; static struct dlil_threading_info dlil_lo_thread; @@ -194,6 +199,12 @@ static __inline__ void dlil_read_end(void); static int dlil_write_begin(void); static void dlil_write_end(void); +#if DEBUG +__private_extern__ int dlil_verbose = 1; +#else +__private_extern__ int dlil_verbose = 0; +#endif /* DEBUG */ + unsigned int net_affinity = 1; static kern_return_t dlil_affinity_set(struct thread *, u_int32_t); @@ -233,8 +244,8 @@ _cast_non_const(const void * ptr) { static void dlil_read_begin(void) { - unsigned long new_value; - unsigned long old_value; + u_int32_t new_value; + u_int32_t old_value; struct uthread *uth = get_bsdthread_info(current_thread()); if (uth->dlil_incremented_read == dlil_writer_waiting) @@ -261,7 +272,7 @@ dlil_read_end(void) { struct uthread *uth = get_bsdthread_info(current_thread()); - OSDecrementAtomic((SInt32*)&dlil_read_count); + OSDecrementAtomic(&dlil_read_count); uth->dlil_incremented_read--; if (dlil_read_count == dlil_writer_waiting) wakeup(_cast_non_const(&dlil_writer_waiting)); @@ -276,7 +287,7 @@ dlil_write_begin(void) return EDEADLK; } lck_mtx_lock(dlil_mutex); - OSBitOrAtomic((UInt32)dlil_writer_waiting, (UInt32*)&dlil_read_count); + OSBitOrAtomic((UInt32)dlil_writer_waiting, &dlil_read_count); again: if (dlil_read_count == dlil_writer_waiting) { uth->dlil_incremented_read = dlil_writer_waiting; @@ -295,7 +306,7 @@ dlil_write_end(void) if (uth->dlil_incremented_read != dlil_writer_waiting) panic("dlil_write_end - thread is not a writer"); - OSBitAndAtomic((UInt32)~dlil_writer_waiting, (UInt32*)&dlil_read_count); + OSBitAndAtomic((UInt32)~dlil_writer_waiting, &dlil_read_count); lck_mtx_unlock(dlil_mutex); uth->dlil_incremented_read = 0; wakeup(&dlil_read_count); @@ -308,7 +319,7 @@ dlil_write_end(void) */ static int -proto_hash_value(u_long protocol_family) +proto_hash_value(u_int32_t protocol_family) { /* * dlil_proto_unplumb_all() depends on the mapping between @@ -330,10 +341,10 @@ proto_hash_value(u_long protocol_family) } static struct if_proto* -find_attached_proto(struct ifnet *ifp, u_long protocol_family) +find_attached_proto(struct ifnet *ifp, u_int32_t protocol_family) { struct if_proto *proto = NULL; - u_long i = proto_hash_value(protocol_family); + u_int32_t i = proto_hash_value(protocol_family); if (ifp->if_proto_hash) { proto = SLIST_FIRST(&ifp->if_proto_hash[i]); } @@ -348,13 +359,13 @@ find_attached_proto(struct ifnet *ifp, u_long protocol_family) static void if_proto_ref(struct if_proto *proto) { - OSAddAtomic(1, (SInt32*)&proto->refcount); + OSAddAtomic(1, &proto->refcount); } static void if_proto_free(struct if_proto *proto) { - int oldval = OSAddAtomic(-1, (SInt32*)&proto->refcount); + int oldval = OSAddAtomic(-1, &proto->refcount); if (oldval == 1) { /* This was the last reference */ FREE(proto, M_IFADDR); @@ -450,8 +461,8 @@ static int dlil_ifp_proto_count(struct ifnet * ifp) } __private_extern__ void -dlil_post_msg(struct ifnet *ifp, u_long event_subclass, u_long event_code, - struct net_event_data *event_data, u_long event_data_len) +dlil_post_msg(struct ifnet *ifp, u_int32_t event_subclass, u_int32_t event_code, + struct net_event_data *event_data, u_int32_t event_data_len) { struct net_event_data ev_data; struct kev_msg ev_msg; @@ -474,7 +485,7 @@ dlil_post_msg(struct ifnet *ifp, u_long event_subclass, u_long event_code, strncpy(&event_data->if_name[0], ifp->if_name, IFNAMSIZ); event_data->if_family = ifp->if_family; - event_data->if_unit = (unsigned long) ifp->if_unit; + event_data->if_unit = (u_int32_t) ifp->if_unit; ev_msg.dv[0].data_length = event_data_len; ev_msg.dv[0].data_ptr = event_data; @@ -525,7 +536,7 @@ dlil_create_input_thread( } else { panic("dlil_create_input_thread: couldn't create thread\n"); } - OSAddAtomic(1, (SInt32*)&cur_dlil_input_threads); + OSAddAtomic(1, &cur_dlil_input_threads); #if DLIL_DEBUG printf("dlil_create_input_thread: threadinfo: %p input_thread=%p threads: cur=%d max=%d\n", inputthread, inputthread->input_thread, dlil_multithreaded_input, cur_dlil_input_threads); @@ -536,7 +547,7 @@ __private_extern__ void dlil_terminate_input_thread( struct dlil_threading_info *inputthread) { - OSAddAtomic(-1, (SInt32*)&cur_dlil_input_threads); + OSAddAtomic(-1, &cur_dlil_input_threads); lck_mtx_unlock(inputthread->input_lck); lck_mtx_free(inputthread->input_lck, inputthread->lck_grp); @@ -564,6 +575,8 @@ dlil_affinity_set(struct thread *tp, u_int32_t tag) void dlil_init(void) { + thread_t thread = THREAD_NULL; + PE_parse_boot_argn("net_affinity", &net_affinity, sizeof (net_affinity)); TAILQ_INIT(&dlil_ifnet_head); @@ -595,7 +608,12 @@ dlil_init(void) */ dlil_create_input_thread(0, dlil_lo_thread_ptr); - (void) kernel_thread(kernel_task, dlil_call_delayed_detach_thread); + (void) kernel_thread_start((thread_continue_t)dlil_call_delayed_detach_thread, NULL, &thread); + thread_deallocate(thread); +#if PF + /* Initialize the packet filter */ + pfinit(); +#endif /* PF */ } __private_extern__ int @@ -631,6 +649,15 @@ dlil_attach_filter( TAILQ_INSERT_TAIL(&ifp->if_flt_head, filter, filt_next); dlil_write_end(); *filter_ref = filter; + + /* + * Bump filter count and route_generation ID to let TCP + * know it shouldn't do TSO on this connection + */ + OSAddAtomic(1, &dlil_filter_count); + if (use_routegenid) + routegenid_update(); + return retval; } @@ -696,6 +723,14 @@ dlil_detach_filter_internal( /* Free the filter */ FREE(filter, M_NKE); + /* + * Decrease filter count and route_generation ID to let TCP + * know it should reevalute doing TSO or not + */ + OSAddAtomic(-1, &dlil_filter_count); + if (use_routegenid) + routegenid_update(); + return retval; } @@ -1366,8 +1401,9 @@ dlil_output_list( else { KERNEL_DEBUG(DBG_FNC_DLIL_IFOUT | DBG_FUNC_START, 0,0,0,0,0); retval = ifp->if_output(ifp, m); - if (retval) { - printf("dlil_output: output error retval = %x\n", retval); + if (retval && dlil_verbose) { + printf("dlil_output: output error on %s%d retval = %d\n", + ifp->if_name, ifp->if_unit, retval); } KERNEL_DEBUG(DBG_FNC_DLIL_IFOUT | DBG_FUNC_END, 0,0,0,0,0); } @@ -1384,8 +1420,9 @@ dlil_output_list( if (send_head) { KERNEL_DEBUG(DBG_FNC_DLIL_IFOUT | DBG_FUNC_START, 0,0,0,0,0); retval = ifp->if_output(ifp, send_head); - if (retval) { - printf("dlil_output: output error retval = %x\n", retval); + if (retval && dlil_verbose) { + printf("dlil_output: output error on %s%d retval = %d\n", + ifp->if_name, ifp->if_unit, retval); } KERNEL_DEBUG(DBG_FNC_DLIL_IFOUT | DBG_FUNC_END, 0,0,0,0,0); } @@ -1547,6 +1584,21 @@ dlil_output( goto next; } + /* + * If this is a TSO packet, make sure the interface still advertise TSO capability + */ + + if ((m->m_pkthdr.csum_flags & CSUM_TSO_IPV4) && !(ifp->if_hwassist & IFNET_TSO_IPV4)) { + retval = EMSGSIZE; + m_freem(m); + goto cleanup; + } + + if ((m->m_pkthdr.csum_flags & CSUM_TSO_IPV6) && !(ifp->if_hwassist & IFNET_TSO_IPV6)) { + retval = EMSGSIZE; + m_freem(m); + goto cleanup; + } /* * Finally, call the driver. */ @@ -1558,8 +1610,9 @@ dlil_output( else { KERNEL_DEBUG(DBG_FNC_DLIL_IFOUT | DBG_FUNC_START, 0,0,0,0,0); retval = ifp->if_output(ifp, m); - if (retval) { - printf("dlil_output: output error retval = %x\n", retval); + if (retval && dlil_verbose) { + printf("dlil_output: output error on %s%d retval = %d\n", + ifp->if_name, ifp->if_unit, retval); } KERNEL_DEBUG(DBG_FNC_DLIL_IFOUT | DBG_FUNC_END, 0,0,0,0,0); } @@ -1576,8 +1629,9 @@ dlil_output( if (send_head) { KERNEL_DEBUG(DBG_FNC_DLIL_IFOUT | DBG_FUNC_START, 0,0,0,0,0); retval = ifp->if_output(ifp, send_head); - if (retval) { - printf("dlil_output: output error retval = %x\n", retval); + if (retval && dlil_verbose) { + printf("dlil_output: output error on %s%d retval = %d\n", + ifp->if_name, ifp->if_unit, retval); } KERNEL_DEBUG(DBG_FNC_DLIL_IFOUT | DBG_FUNC_END, 0,0,0,0,0); } @@ -1597,7 +1651,7 @@ errno_t ifnet_ioctl( ifnet_t ifp, protocol_family_t proto_fam, - u_int32_t ioctl_code, + u_long ioctl_code, void *ioctl_arg) { struct ifnet_filter *filter; @@ -1851,11 +1905,10 @@ dlil_send_arp( if ((ifp_list[ifp_on]->if_eflags & IFEF_ARPLL) == 0) { continue; } - - source_hw = TAILQ_FIRST(&ifp_list[ifp_on]->if_addrhead); - + /* Find the source IP address */ ifnet_lock_shared(ifp_list[ifp_on]); + source_hw = TAILQ_FIRST(&ifp_list[ifp_on]->if_addrhead); TAILQ_FOREACH(source_ip, &ifp_list[ifp_on]->if_addrhead, ifa_link) { if (source_ip->ifa_addr && @@ -1872,7 +1925,7 @@ dlil_send_arp( /* Copy the source IP address */ source_ip_copy = *(struct sockaddr_in*)source_ip->ifa_addr; - + ifaref(source_hw); ifnet_lock_done(ifp_list[ifp_on]); /* Send the ARP */ @@ -1880,7 +1933,8 @@ dlil_send_arp( (struct sockaddr_dl*)source_hw->ifa_addr, (struct sockaddr*)&source_ip_copy, NULL, target_proto); - + + ifafree(source_hw); if (result == ENOTSUP) { result = new_result; } @@ -1957,10 +2011,10 @@ ifp_use_reached_zero( ifma->ifma_ifp = NULL; ifma_release(ifma); } - ifnet_head_done(); - + ifp->if_eflags &= ~IFEF_DETACHING; // clear the detaching flag ifnet_lock_done(ifp); + ifnet_head_done(); free_func = ifp->if_free; dlil_read_end(); @@ -1975,7 +2029,7 @@ ifp_unuse( struct ifnet *ifp) { int oldval; - oldval = OSDecrementAtomic((SInt32*)&ifp->if_usecnt); + oldval = OSDecrementAtomic(&ifp->if_usecnt); if (oldval == 0) panic("ifp_unuse: ifp(%s%d)->if_usecnt was zero\n", ifp->if_name, ifp->if_unit); @@ -1999,7 +2053,7 @@ dlil_attach_protocol_internal( struct kev_dl_proto_data ev_pr_data; struct ifnet *ifp = proto->ifp; int retval = 0; - u_long hash_value = proto_hash_value(proto->protocol_family); + u_int32_t hash_value = proto_hash_value(proto->protocol_family); /* setup some of the common values */ { @@ -2165,7 +2219,7 @@ dlil_detach_protocol_internal( struct if_proto *proto) { struct ifnet *ifp = proto->ifp; - u_long proto_family = proto->protocol_family; + u_int32_t proto_family = proto->protocol_family; struct kev_dl_proto_data ev_pr_data; if (proto->proto_kpi == kProtoKPI_v1) { @@ -2401,7 +2455,7 @@ ifnet_attach( ifnet_t ifp, const struct sockaddr_dl *ll_addr) { - u_long interface_family; + u_int32_t interface_family; struct ifnet *tmp_if; struct proto_hash_entry *new_proto_list = NULL; int locked = 0; @@ -2439,13 +2493,29 @@ ifnet_attach( return ENOMEM; } + if (!(ifp->if_eflags & IFEF_REUSE) || ifp->if_fwd_route_lock == NULL) { + if (ifp->if_fwd_route_lock == NULL) + ifp->if_fwd_route_lock = lck_mtx_alloc_init( + ifnet_lock_group, ifnet_lock_attr); + + if (ifp->if_fwd_route_lock == NULL) { +#if IFNET_RW_LOCK + lck_rw_free(ifp->if_lock, ifnet_lock_group); +#else + lck_mtx_free(ifp->if_lock, ifnet_lock_group); +#endif + ifp->if_lock = NULL; + return (ENOMEM); + } + } + /* - * Allow interfaces withouth protocol families to attach + * Allow interfaces without protocol families to attach * only if they have the necessary fields filled out. */ if (ifp->if_add_proto == 0 || ifp->if_del_proto == 0) { - DLIL_PRINTF("dlil Attempt to attach interface without family module - %ld\n", + DLIL_PRINTF("dlil Attempt to attach interface without family module - %d\n", interface_family); return ENODEV; } @@ -2490,17 +2560,27 @@ ifnet_attach( ifnet_head_lock_exclusive(); ifnet_lock_exclusive(ifp); - if ((ifp->if_eflags & IFEF_REUSE) == 0 || ifp->if_index == 0) - ifp->if_index = if_next_index(); - else + if ((ifp->if_eflags & IFEF_REUSE) == 0 || ifp->if_index == 0) { + int idx = if_next_index(); + + if (idx == -1) { + ifnet_lock_done(ifp); + ifnet_head_done(); + ifp_unuse(ifp); + dlil_write_end(); + + return ENOBUFS; + } + ifp->if_index = idx; + } else { ifa = TAILQ_FIRST(&ifp->if_addrhead); - + } namelen = snprintf(workbuf, sizeof(workbuf), "%s%d", ifp->if_name, ifp->if_unit); -#define _offsetof(t, m) ((int)((caddr_t)&((t *)0)->m)) +#define _offsetof(t, m) ((uintptr_t)((caddr_t)&((t *)0)->m)) masklen = _offsetof(struct sockaddr_dl, sdl_data[0]) + namelen; socksize = masklen + ifp->if_addrlen; #define ROUNDUP(a) (1 + (((a) - 1) | (sizeof(long) - 1))) - if ((u_long)socksize < sizeof(struct sockaddr_dl)) + if ((u_int32_t)socksize < sizeof(struct sockaddr_dl)) socksize = sizeof(struct sockaddr_dl); socksize = ROUNDUP(socksize); ifasize = sizeof(struct ifaddr) + 2 * socksize; @@ -2519,6 +2599,7 @@ ifnet_attach( struct sockaddr_dl *sdl = (struct sockaddr_dl *)(ifa + 1); ifnet_addrs[ifp->if_index - 1] = ifa; bzero(ifa, ifasize); + ifa->ifa_debug |= IFD_ALLOC; sdl->sdl_len = socksize; sdl->sdl_family = AF_LINK; bcopy(workbuf, sdl->sdl_data, namelen); @@ -2550,7 +2631,7 @@ ifnet_attach( * this address to be first on the list. */ ifaref(ifa); - ifa->ifa_debug |= IFA_ATTACHED; + ifa->ifa_debug |= IFD_ATTACHED; TAILQ_INSERT_HEAD(&ifp->if_addrhead, ifa, ifa_link); } #if CONFIG_MACF_NET @@ -2559,19 +2640,17 @@ ifnet_attach( TAILQ_INSERT_TAIL(&ifnet_head, ifp, if_link); ifindex2ifnet[ifp->if_index] = ifp; - - ifnet_head_done(); } /* - * A specific dlil input thread is created per Ethernet interface. + * A specific dlil input thread is created per Ethernet/PDP interface. * pseudo interfaces or other types of interfaces use the main ("loopback") thread. * If the sysctl "net.link.generic.system.multi_threaded_input" is set to zero, all packets will * be handled by the main loopback thread, reverting to 10.4.x behaviour. * */ - if (ifp->if_type == IFT_ETHER) { + if (ifp->if_type == IFT_ETHER || ifp->if_type == IFT_PDP) { int err; if (dlil_multithreaded_input > 0) { @@ -2579,15 +2658,22 @@ ifnet_attach( if (ifp->if_input_thread == NULL) panic("ifnet_attach ifp=%p couldn't alloc threading\n", ifp); if ((err = dlil_create_input_thread(ifp, ifp->if_input_thread)) != 0) - panic("ifnet_attach ifp=%p couldn't get a thread. err=%x\n", ifp, err); + panic("ifnet_attach ifp=%p couldn't get a thread. err=%d\n", ifp, err); #ifdef DLIL_DEBUG - printf("ifnet_attach: dlil thread for ifp=%p if_index=%x\n", ifp, ifp->if_index); + printf("ifnet_attach: dlil thread for ifp=%p if_index=%d\n", ifp, ifp->if_index); #endif } } - dlil_write_end(); ifnet_lock_done(ifp); - + ifnet_head_done(); +#if PF + /* + * Attach packet filter to this interface, if enaled. + */ + pf_ifnet_hook(ifp, 1); +#endif /* PF */ + dlil_write_end(); + dlil_post_msg(ifp, KEV_DL_SUBCLASS, KEV_DL_IF_ATTACHED, NULL, 0); return 0; @@ -2640,7 +2726,14 @@ ifnet_detach( } return retval; } - + +#if PF + /* + * Detach this interface from packet filter, if enabled. + */ + pf_ifnet_hook(ifp, 0); +#endif /* PF */ + /* Steal the list of interface filters */ fhead = ifp->if_flt_head; TAILQ_INIT(&ifp->if_flt_head); @@ -2700,6 +2793,13 @@ ifnet_detach( lck_mtx_unlock(inputthread->input_lck); } } + /* last chance to clean up IPv4 forwarding cached route */ + lck_mtx_lock(ifp->if_fwd_route_lock); + if (ifp->if_fwd_route.ro_rt != NULL) { + rtfree(ifp->if_fwd_route.ro_rt); + ifp->if_fwd_route.ro_rt = NULL; + } + lck_mtx_unlock(ifp->if_fwd_route_lock); dlil_write_end(); for (filter = TAILQ_FIRST(&fhead); filter; filter = filter_next) { @@ -2717,7 +2817,7 @@ ifnet_detach( static errno_t dlil_recycle_ioctl( __unused ifnet_t ifnet_ptr, - __unused u_int32_t ioctl_code, + __unused u_long ioctl_code, __unused void *ioctl_arg) { return EOPNOTSUPP; @@ -2750,7 +2850,7 @@ dlil_recycle_set_bpf_tap( __private_extern__ int dlil_if_acquire( - u_long family, + u_int32_t family, const void *uniqueid, size_t uniqueid_len, struct ifnet **ifp) diff --git a/bsd/net/dlil.h b/bsd/net/dlil.h index 6e3872b79..c35478082 100644 --- a/bsd/net/dlil.h +++ b/bsd/net/dlil.h @@ -160,20 +160,20 @@ dlil_send_arp( int dlil_attach_filter(ifnet_t ifp, const struct iff_filter *if_filter, interface_filter_t *filter_ref); void dlil_detach_filter(interface_filter_t filter); -int dlil_detach_protocol(ifnet_t ifp, u_long protocol); +int dlil_detach_protocol(ifnet_t ifp, u_int32_t protocol); extern void dlil_proto_unplumb_all(ifnet_t); #endif /* BSD_KERNEL_PRIVATE */ void -dlil_post_msg(struct ifnet *ifp,u_long event_subclass, u_long event_code, - struct net_event_data *event_data, u_long event_data_len); +dlil_post_msg(struct ifnet *ifp,u_int32_t event_subclass, u_int32_t event_code, + struct net_event_data *event_data, u_int32_t event_data_len); /* * dlil_if_acquire is obsolete. Use ifnet_allocate. */ -int dlil_if_acquire(u_long family, const void *uniqueid, size_t uniqueid_len, +int dlil_if_acquire(u_int32_t family, const void *uniqueid, size_t uniqueid_len, struct ifnet **ifp); diff --git a/bsd/net/dlil_pvt.h b/bsd/net/dlil_pvt.h index 05cae784f..192b2726d 100644 --- a/bsd/net/dlil_pvt.h +++ b/bsd/net/dlil_pvt.h @@ -38,9 +38,9 @@ struct dlil_family_mod_str { char *interface_family; int (*add_if)(struct ifnet_ptr *ifp); int (*del_if)(struct ifnet *ifp); - int (*add_proto)(struct ifnet *ifp, u_long protocol_family, + int (*add_proto)(struct ifnet *ifp, uint32_t protocol_family, struct ddesc_head_str *demux_desc_head); - int (*del_proto)(struct ifnet *ifp, u_long proto_family); + int (*del_proto)(struct ifnet *ifp, uint32_t proto_family); } #endif /* KERNEL_PRIVATE */ diff --git a/bsd/net/ether_if_module.c b/bsd/net/ether_if_module.c index c2607645a..42e0a67a7 100644 --- a/bsd/net/ether_if_module.c +++ b/bsd/net/ether_if_module.c @@ -72,6 +72,7 @@ #include +#define etherbroadcastaddr fugly #include #include #include @@ -82,6 +83,7 @@ #include /* For M_LOOP */ #include #include +#undef etherbroadcastaddr /* #if INET @@ -123,8 +125,8 @@ SYSCTL_NODE(_net_link, IFT_ETHER, ether, CTLFLAG_RW|CTLFLAG_LOCKED, 0, "Ethernet struct en_desc { u_int16_t type; /* Type of protocol stored in data */ - u_long protocol_family; /* Protocol family */ - u_long data[2]; /* Protocol data */ + u_int32_t protocol_family; /* Protocol family */ + u_int32_t data[2]; /* Protocol data */ }; /* descriptors are allocated in blocks of ETHER_DESC_BLK_SIZE */ @@ -139,9 +141,9 @@ struct en_desc { */ struct ether_desc_blk_str { - u_long n_max_used; - u_long n_count; - u_long n_used; + u_int32_t n_max_used; + u_int32_t n_count; + u_int32_t n_used; struct en_desc block_ptr[1]; }; /* Size of the above struct before the array of struct en_desc */ @@ -174,7 +176,7 @@ ether_del_proto( protocol_family_t protocol_family) { struct ether_desc_blk_str *desc_blk = (struct ether_desc_blk_str *)ifp->family_cookie; - u_long current = 0; + u_int32_t current = 0; int found = 0; if (desc_blk == NULL) @@ -265,9 +267,9 @@ ether_add_proto_internal( // Check for case where all of the descriptor blocks are in use if (desc_blk == NULL || desc_blk->n_used == desc_blk->n_count) { struct ether_desc_blk_str *tmp; - u_long new_count = ETHER_DESC_BLK_SIZE; - u_long new_size; - u_long old_size = 0; + u_int32_t new_count = ETHER_DESC_BLK_SIZE; + u_int32_t new_size; + u_int32_t old_size = 0; i = 0; @@ -293,7 +295,7 @@ ether_add_proto_internal( FREE(desc_blk, M_IFADDR); } desc_blk = tmp; - ifp->family_cookie = (u_long)desc_blk; + ifp->family_cookie = (uintptr_t)desc_blk; desc_blk->n_count = new_count; } else { @@ -373,9 +375,9 @@ ether_demux( u_short ether_type = eh->ether_type; u_int16_t type; u_int8_t *data; - u_long i = 0; + u_int32_t i = 0; struct ether_desc_blk_str *desc_blk = (struct ether_desc_blk_str *)ifp->family_cookie; - u_long maxd = desc_blk ? desc_blk->n_max_used : 0; + u_int32_t maxd = desc_blk ? desc_blk->n_max_used : 0; struct en_desc *ed = desc_blk ? desc_blk->block_ptr : NULL; u_int32_t extProto1 = 0; u_int32_t extProto2 = 0; diff --git a/bsd/net/ether_if_module.h b/bsd/net/ether_if_module.h index dd20bcba9..9c1a58281 100644 --- a/bsd/net/ether_if_module.h +++ b/bsd/net/ether_if_module.h @@ -38,4 +38,4 @@ extern void ether_detach_inet6(ifnet_t ifp, protocol_family_t protocol_family); extern errno_t ether_attach_at(struct ifnet *ifp, protocol_family_t proto_family); extern void ether_detach_at(struct ifnet *ifp, protocol_family_t proto_family); -#endif _NET_ETHER_IF_MODULE_H +#endif /* _NET_ETHER_IF_MODULE_H */ diff --git a/bsd/net/ether_inet6_pr_module.c b/bsd/net/ether_inet6_pr_module.c index 979239c66..52fd39229 100644 --- a/bsd/net/ether_inet6_pr_module.c +++ b/bsd/net/ether_inet6_pr_module.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2007 Apple Inc. All rights reserved. + * Copyright (c) 2000-2008 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -199,10 +199,10 @@ ether_inet6_resolve_multi( static errno_t ether_inet6_prmod_ioctl( - ifnet_t ifp, + ifnet_t ifp, __unused protocol_family_t protocol_family, - u_int32_t command, - void* data) + u_long command, + void *data) { struct ifreq *ifr = (struct ifreq *) data; int error = 0; diff --git a/bsd/net/ether_inet_pr_module.c b/bsd/net/ether_inet_pr_module.c index f0b0f5b54..177631c4b 100644 --- a/bsd/net/ether_inet_pr_module.c +++ b/bsd/net/ether_inet_pr_module.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2007 Apple Inc. All rights reserved. + * Copyright (c) 2000-2008 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -302,10 +302,10 @@ ether_inet_resolve_multi( static errno_t ether_inet_prmod_ioctl( - ifnet_t ifp, + ifnet_t ifp, __unused protocol_family_t protocol_family, - u_int32_t command, - void* data) + u_long command, + void *data) { ifaddr_t ifa = data; struct ifreq *ifr = data; @@ -420,7 +420,7 @@ ether_inet_arp( /* Move the data pointer in the mbuf to the end, aligned to 4 bytes */ datap = mbuf_datastart(m); datap += mbuf_trailingspace(m); - datap -= (((u_long)datap) & 0x3); + datap -= (((uintptr_t)datap) & 0x3); mbuf_setdata(m, datap, sizeof(*ea)); ea = mbuf_data(m); diff --git a/bsd/net/ieee8023ad.h b/bsd/net/ieee8023ad.h index 944e48875..7a2494d23 100644 --- a/bsd/net/ieee8023ad.h +++ b/bsd/net/ieee8023ad.h @@ -50,4 +50,4 @@ #define IEEE8023AD_SLOW_PROTO_SUBTYPE_LA_MARKER_PROTOCOL 2 #define IEEE8023AD_SLOW_PROTO_SUBTYPE_RESERVED_START 3 #define IEEE8023AD_SLOW_PROTO_SUBTYPE_RESERVED_END 10 -#endif _NET_IEEE8023AD_H_ +#endif /* _NET_IEEE8023AD_H_ */ diff --git a/bsd/net/if.c b/bsd/net/if.c index 2d3d48ae7..43b96dfff 100644 --- a/bsd/net/if.c +++ b/bsd/net/if.c @@ -82,6 +82,8 @@ #include #include +#include + #include #include #include @@ -110,8 +112,6 @@ #endif #endif -extern u_long route_generation; -extern int use_routegenid; extern int dlil_multithreaded_input; extern struct dlil_threading_info *dlil_lo_thread_ptr; @@ -119,6 +119,7 @@ extern struct dlil_threading_info *dlil_lo_thread_ptr; #include #endif + /* * System initialization */ @@ -132,9 +133,9 @@ static int if_rtmtu(struct radix_node *, void *); static void if_rtmtu_update(struct ifnet *); static struct if_clone *if_clone_lookup(const char *, int *); -#ifdef IF_CLONE_LIST +#if IF_CLONE_LIST static int if_clone_list(int count, int * total, user_addr_t dst); -#endif +#endif /* IF_CLONE_LIST */ MALLOC_DEFINE(M_IFADDR, "ifaddr", "interface address"); MALLOC_DEFINE(M_IFMADDR, "ether_multi", "link-level multicast address"); @@ -175,11 +176,11 @@ if_attach_ifa( struct ifaddr *ifa) { ifnet_lock_assert(ifp, LCK_MTX_ASSERT_OWNED); - if (ifa->ifa_debug & IFA_ATTACHED) { + if (ifa->ifa_debug & IFD_ATTACHED) { panic("if_attach_ifa: Attempted to attach address that's already attached!\n"); } ifaref(ifa); - ifa->ifa_debug |= IFA_ATTACHED; + ifa->ifa_debug |= IFD_ATTACHED; TAILQ_INSERT_TAIL(&ifp->if_addrhead, ifa, ifa_link); } @@ -191,8 +192,8 @@ if_detach_ifa( ifnet_lock_assert(ifp, LCK_MTX_ASSERT_OWNED); #if 1 /* Debugging code */ - if ((ifa->ifa_debug & IFA_ATTACHED) == 0) { - printf("if_detach_ifa: ifa is not attached to any interface! flags=%lu\n", ifa->ifa_debug); + if ((ifa->ifa_debug & IFD_ATTACHED) == 0) { + printf("if_detach_ifa: ifa is not attached to any interface! flags=%u\n", ifa->ifa_debug); return; } else { @@ -207,7 +208,7 @@ if_detach_ifa( } #endif TAILQ_REMOVE(&ifp->if_addrhead, ifa, ifa_link); - ifa->ifa_debug &= ~IFA_ATTACHED; + ifa->ifa_debug &= ~IFD_ATTACHED; ifafree(ifa); } @@ -252,6 +253,11 @@ if_next_index(void) /* allocate space for the larger arrays */ n = (2 * new_if_indexlim + 1) * sizeof(caddr_t); new_ifnet_addrs = _MALLOC(n, M_IFADDR, M_WAITOK); + if (new_ifnet_addrs == NULL) { + --if_index; + return -1; + } + new_ifindex2ifnet = new_ifnet_addrs + new_if_indexlim * sizeof(caddr_t); bzero(new_ifnet_addrs, n); @@ -432,7 +438,7 @@ if_clone_lookup(const char *name, int *unitp) /* * Register a network interface cloner. */ -void +int if_clone_attach(struct if_clone *ifc) { int bytoff, bitoff; @@ -452,6 +458,8 @@ if_clone_attach(struct if_clone *ifc) if ((len << 3) < maxclone) len++; ifc->ifc_units = _MALLOC(len, M_CLONE, M_WAITOK | M_ZERO); + if (ifc->ifc_units == NULL) + return ENOBUFS; bzero(ifc->ifc_units, len); ifc->ifc_bmlen = len; @@ -469,6 +477,8 @@ if_clone_attach(struct if_clone *ifc) bitoff = unit - (bytoff << 3); ifc->ifc_units[bytoff] |= (1 << bitoff); } + + return 0; } /* @@ -483,7 +493,7 @@ if_clone_detach(struct if_clone *ifc) if_cloners_count--; } -#ifdef IF_CLONE_LIST +#if IF_CLONE_LIST /* * Provide list of interface cloners to userspace. */ @@ -515,36 +525,38 @@ if_clone_list(int count, int * total, user_addr_t dst) return (error); } -#endif IF_CLONE_LIST +#endif /* IF_CLONE_LIST */ -__private_extern__ int -ifa_foraddr( - unsigned int addr) +/* + * Similar to ifa_ifwithaddr, except that this is IPv4 specific + * and that it matches only the local (not broadcast) address. + */ +__private_extern__ struct in_ifaddr * +ifa_foraddr(unsigned int addr) { - struct ifnet *ifp; - struct ifaddr *ifa; - unsigned int addr2; - int result = 0; - - ifnet_head_lock_shared(); - for (ifp = ifnet_head.tqh_first; ifp && !result; ifp = ifp->if_link.tqe_next) { - ifnet_lock_shared(ifp); - for (ifa = ifp->if_addrhead.tqh_first; ifa; - ifa = ifa->ifa_link.tqe_next) { - if (ifa->ifa_addr->sa_family != AF_INET) - continue; - addr2 = IA_SIN(ifa)->sin_addr.s_addr; - - if (addr == addr2) { - result = 1; - break; - } - } - ifnet_lock_done(ifp); + return (ifa_foraddr_scoped(addr, IFSCOPE_NONE)); +} + +/* + * Similar to ifa_foraddr, except with the added interface scope + * constraint (unless the caller passes in IFSCOPE_NONE in which + * case there is no scope restriction). + */ +__private_extern__ struct in_ifaddr * +ifa_foraddr_scoped(unsigned int addr, unsigned int scope) +{ + struct in_ifaddr *ia = NULL; + + lck_rw_lock_shared(in_ifaddr_rwlock); + TAILQ_FOREACH(ia, INADDR_HASH(addr), ia_hash) { + if (ia->ia_addr.sin_addr.s_addr == addr && + (scope == IFSCOPE_NONE || ia->ia_ifp->if_index == scope)) + break; } - ifnet_head_done(); - - return result; + if (ia != NULL) + ifaref(&ia->ia_ifa); + lck_rw_done(in_ifaddr_rwlock); + return (ia); } /* @@ -669,7 +681,8 @@ ifa_ifwithaddr_scoped(const struct sockaddr *addr, unsigned int ifscope) /* * This is suboptimal; there should be a better way - * to search for a given address of an interface. + * to search for a given address of an interface + * for any given address family. */ ifnet_lock_shared(ifp); for (ifa = ifp->if_addrhead.tqh_first; ifa != NULL; @@ -790,18 +803,6 @@ next: continue; ifp->if_index != ifscope) continue; - /* - * if we have a special address handler, - * then use it instead of the generic one. - */ - if (ifa->ifa_claim_addr) { - if (ifa->ifa_claim_addr(ifa, addr)) { - break; - } else { - continue; - } - } - /* * Scan all the bits in the ifa's address. * If a bit dissagrees with what we are @@ -895,7 +896,7 @@ ifaof_ifpforaddr( continue; } if (ifp->if_flags & IFF_POINTOPOINT) { - if (equal(addr, ifa->ifa_dstaddr)) + if (ifa->ifa_dstaddr && equal(addr, ifa->ifa_dstaddr)) break; } else { if (equal(addr, ifa->ifa_addr)) { @@ -945,6 +946,9 @@ link_rtrequest(int cmd, struct rtentry *rt, struct sockaddr *sa) struct sockaddr *dst; struct ifnet *ifp; + lck_mtx_assert(rnh_lock, LCK_MTX_ASSERT_OWNED); + RT_LOCK_ASSERT_HELD(rt); + if (cmd != RTM_ADD || ((ifa = rt->rt_ifa) == 0) || ((ifp = ifa->ifa_ifp) == 0) || ((dst = rt_key(rt)) == 0)) return; @@ -1161,18 +1165,19 @@ ifioctl(struct socket *so, u_long cmd, caddr_t data, struct proc *p) struct net_event_data ev_data; switch (cmd) { - case SIOCGIFCONF: - case OSIOCGIFCONF: + case OSIOCGIFCONF32: + case SIOCGIFCONF32: { + struct ifconf32 *ifc = (struct ifconf32 *)data; + return (ifconf(cmd, CAST_USER_ADDR_T(ifc->ifc_req), + &ifc->ifc_len)); + /* NOTREACHED */ + } case SIOCGIFCONF64: - { - struct ifconf64 * ifc = (struct ifconf64 *)data; - user_addr_t user_addr; - - user_addr = proc_is64bit(p) - ? ifc->ifc_req64 : CAST_USER_ADDR_T(ifc->ifc_req); - return (ifconf(cmd, user_addr, &ifc->ifc_len)); - } - break; + case OSIOCGIFCONF64: { + struct ifconf64 *ifc = (struct ifconf64 *)data; + return (ifconf(cmd, ifc->ifc_req, &ifc->ifc_len)); + /* NOTREACHED */ + } } ifr = (struct ifreq *)data; switch (cmd) { @@ -1185,17 +1190,20 @@ ifioctl(struct socket *so, u_long cmd, caddr_t data, struct proc *p) if_clone_create(ifr->ifr_name, sizeof(ifr->ifr_name)) : if_clone_destroy(ifr->ifr_name)); #if IF_CLONE_LIST - case SIOCIFGCLONERS: - case SIOCIFGCLONERS64: - { - struct if_clonereq64 * ifcr = (struct if_clonereq64 *)data; - user_addr = proc_is64bit(p) - ? ifcr->ifcr_ifcru.ifcru_buffer64 - : CAST_USER_ADDR_T(ifcr->ifcr_ifcru.ifcru_buffer32); + case SIOCIFGCLONERS32: { + struct if_clonereq32 *ifcr = (struct if_clonereq32 *)data; + return (if_clone_list(ifcr->ifcr_count, &ifcr->ifcr_total, + CAST_USER_ADDR_T(ifcr->ifcru_buffer))); + /* NOTREACHED */ + + } + case SIOCIFGCLONERS64: { + struct if_clonereq64 *ifcr = (struct if_clonereq64 *)data; return (if_clone_list(ifcr->ifcr_count, &ifcr->ifcr_total, - user_data)); + ifcr->ifcru_buffer)); + /* NOTREACHED */ } -#endif IF_CLONE_LIST +#endif /* IF_CLONE_LIST */ } ifp = ifunit(ifr->ifr_name); @@ -1244,20 +1252,20 @@ ifioctl(struct socket *so, u_long cmd, caddr_t data, struct proc *p) error = ifnet_ioctl(ifp, so->so_proto->pr_domain->dom_family, cmd, data); - if (error == 0) { - ev_msg.vendor_code = KEV_VENDOR_APPLE; - ev_msg.kev_class = KEV_NETWORK_CLASS; - ev_msg.kev_subclass = KEV_DL_SUBCLASS; - - ev_msg.event_code = KEV_DL_SIFFLAGS; - strlcpy(&ev_data.if_name[0], ifp->if_name, IFNAMSIZ); - ev_data.if_family = ifp->if_family; - ev_data.if_unit = (unsigned long) ifp->if_unit; - ev_msg.dv[0].data_length = sizeof(struct net_event_data); - ev_msg.dv[0].data_ptr = &ev_data; - ev_msg.dv[1].data_length = 0; - kev_post_msg(&ev_msg); - } + /* Send the event even upon error from the driver because we changed the flags */ + ev_msg.vendor_code = KEV_VENDOR_APPLE; + ev_msg.kev_class = KEV_NETWORK_CLASS; + ev_msg.kev_subclass = KEV_DL_SUBCLASS; + + ev_msg.event_code = KEV_DL_SIFFLAGS; + strlcpy(&ev_data.if_name[0], ifp->if_name, IFNAMSIZ); + ev_data.if_family = ifp->if_family; + ev_data.if_unit = (u_int32_t) ifp->if_unit; + ev_msg.dv[0].data_length = sizeof(struct net_event_data); + ev_msg.dv[0].data_ptr = &ev_data; + ev_msg.dv[1].data_length = 0; + kev_post_msg(&ev_msg); + ifnet_touch_lastchange(ifp); break; @@ -1282,7 +1290,7 @@ ifioctl(struct socket *so, u_long cmd, caddr_t data, struct proc *p) ev_msg.event_code = KEV_DL_SIFMETRICS; strlcpy(&ev_data.if_name[0], ifp->if_name, IFNAMSIZ); ev_data.if_family = ifp->if_family; - ev_data.if_unit = (unsigned long) ifp->if_unit; + ev_data.if_unit = (u_int32_t) ifp->if_unit; ev_msg.dv[0].data_length = sizeof(struct net_event_data); ev_msg.dv[0].data_ptr = &ev_data; @@ -1308,7 +1316,7 @@ ifioctl(struct socket *so, u_long cmd, caddr_t data, struct proc *p) ev_msg.event_code = KEV_DL_SIFPHYS; strlcpy(&ev_data.if_name[0], ifp->if_name, IFNAMSIZ); ev_data.if_family = ifp->if_family; - ev_data.if_unit = (unsigned long) ifp->if_unit; + ev_data.if_unit = (u_int32_t) ifp->if_unit; ev_msg.dv[0].data_length = sizeof(struct net_event_data); ev_msg.dv[0].data_ptr = &ev_data; ev_msg.dv[1].data_length = 0; @@ -1320,7 +1328,7 @@ ifioctl(struct socket *so, u_long cmd, caddr_t data, struct proc *p) case SIOCSIFMTU: { - u_long oldmtu = ifp->if_mtu; + u_int32_t oldmtu = ifp->if_mtu; error = proc_suser(p); if (error) @@ -1341,7 +1349,7 @@ ifioctl(struct socket *so, u_long cmd, caddr_t data, struct proc *p) ev_msg.event_code = KEV_DL_SIFMTU; strlcpy(&ev_data.if_name[0], ifp->if_name, IFNAMSIZ); ev_data.if_family = ifp->if_family; - ev_data.if_unit = (unsigned long) ifp->if_unit; + ev_data.if_unit = (u_int32_t) ifp->if_unit; ev_msg.dv[0].data_length = sizeof(struct net_event_data); ev_msg.dv[0].data_ptr = &ev_data; ev_msg.dv[1].data_length = 0; @@ -1394,7 +1402,7 @@ ifioctl(struct socket *so, u_long cmd, caddr_t data, struct proc *p) strlcpy(&ev_data.if_name[0], ifp->if_name, IFNAMSIZ); ev_data.if_family = ifp->if_family; - ev_data.if_unit = (unsigned long) ifp->if_unit; + ev_data.if_unit = (u_int32_t) ifp->if_unit; ev_msg.dv[0].data_length = sizeof(struct net_event_data); ev_msg.dv[0].data_ptr = &ev_data; ev_msg.dv[1].data_length = 0; @@ -1407,7 +1415,8 @@ ifioctl(struct socket *so, u_long cmd, caddr_t data, struct proc *p) case SIOCSIFPHYADDR: case SIOCDIFPHYADDR: #if INET6 - case SIOCSIFPHYADDR_IN6: + case SIOCSIFPHYADDR_IN6_32: + case SIOCSIFPHYADDR_IN6_64: #endif case SIOCSLIFPHYADDR: case SIOCSIFMEDIA: @@ -1434,7 +1443,8 @@ ifioctl(struct socket *so, u_long cmd, caddr_t data, struct proc *p) case SIOCGIFPSRCADDR: case SIOCGIFPDSTADDR: case SIOCGLIFPHYADDR: - case SIOCGIFMEDIA: + case SIOCGIFMEDIA32: + case SIOCGIFMEDIA64: case SIOCGIFGENERIC: case SIOCGIFDEVMTU: return ifnet_ioctl(ifp, so->so_proto->pr_domain->dom_family, @@ -1444,6 +1454,12 @@ ifioctl(struct socket *so, u_long cmd, caddr_t data, struct proc *p) return ifnet_ioctl(ifp, so->so_proto->pr_domain->dom_family, cmd, data); + case SIOCGIFWAKEFLAGS: + ifnet_lock_shared(ifp); + ifr->ifr_wake_flags = ifnet_get_wake_flags(ifp); + ifnet_lock_done(ifp); + break; + default: oif_flags = ifp->if_flags; if (so->so_proto == 0) @@ -1629,7 +1645,7 @@ ifconf(u_long cmd, user_addr_t ifrp, int * ret_space) continue; #endif addrs++; - if (cmd == OSIOCGIFCONF) { + if (cmd == OSIOCGIFCONF32 || cmd == OSIOCGIFCONF64) { struct osockaddr *osa = (struct osockaddr *)&ifr.ifr_addr; ifr.ifr_addr = *sa; @@ -1713,7 +1729,7 @@ void ifma_reference( struct ifmultiaddr *ifma) { - if (OSIncrementAtomic((SInt32 *)&ifma->ifma_refcount) <= 0) + if (OSIncrementAtomic(&ifma->ifma_refcount) <= 0) panic("ifma_reference: ifma already released or invalid\n"); } @@ -1723,7 +1739,7 @@ ifma_release( { while (ifma) { struct ifmultiaddr *next; - int32_t prevValue = OSDecrementAtomic((SInt32 *)&ifma->ifma_refcount); + int32_t prevValue = OSDecrementAtomic(&ifma->ifma_refcount); if (prevValue < 1) panic("ifma_release: ifma already released or invalid\n"); if (prevValue != 1) @@ -2154,48 +2170,53 @@ if_down_all(void) * */ static int -if_rtdel( - struct radix_node *rn, - void *arg) +if_rtdel(struct radix_node *rn, void *arg) { struct rtentry *rt = (struct rtentry *)rn; struct ifnet *ifp = arg; int err; - if (rt != NULL && rt->rt_ifp == ifp) { - + if (rt == NULL) + return (0); + /* + * Checking against RTF_UP protects against walktree + * recursion problems with cloned routes. + */ + RT_LOCK(rt); + if (rt->rt_ifp == ifp && (rt->rt_flags & RTF_UP)) { /* - * Protect (sorta) against walktree recursion problems - * with cloned routes + * Safe to drop rt_lock and use rt_key, rt_gateway, + * since holding rnh_lock here prevents another thread + * from calling rt_setgate() on this route. */ - if ((rt->rt_flags & RTF_UP) == 0) - return (0); - + RT_UNLOCK(rt); err = rtrequest_locked(RTM_DELETE, rt_key(rt), rt->rt_gateway, - rt_mask(rt), rt->rt_flags, - (struct rtentry **) NULL); + rt_mask(rt), rt->rt_flags, NULL); if (err) { log(LOG_WARNING, "if_rtdel: error %d\n", err); } + } else { + RT_UNLOCK(rt); } - return (0); } /* - * Removes routing table reference to a given interfacei + * Removes routing table reference to a given interface * for a given protocol family */ -void if_rtproto_del(struct ifnet *ifp, int protocol) +void +if_rtproto_del(struct ifnet *ifp, int protocol) { struct radix_node_head *rnh; - if (use_routegenid) - route_generation++; + + if (use_routegenid) + routegenid_update(); if ((protocol <= AF_MAX) && (protocol >= 0) && ((rnh = rt_tables[protocol]) != NULL) && (ifp != NULL)) { - lck_mtx_lock(rt_mtx); + lck_mtx_lock(rnh_lock); (void) rnh->rnh_walktree(rnh, if_rtdel, ifp); - lck_mtx_unlock(rt_mtx); + lck_mtx_unlock(rnh_lock); } } @@ -2205,6 +2226,7 @@ if_rtmtu(struct radix_node *rn, void *arg) struct rtentry *rt = (struct rtentry *)rn; struct ifnet *ifp = arg; + RT_LOCK(rt); if (rt->rt_ifp == ifp) { /* * Update the MTU of this entry only if the MTU @@ -2214,6 +2236,7 @@ if_rtmtu(struct radix_node *rn, void *arg) if (!(rt->rt_rmx.rmx_locks & RTV_MTU) && rt->rt_rmx.rmx_mtu) rt->rt_rmx.rmx_mtu = ifp->if_mtu; } + RT_UNLOCK(rt); return (0); } @@ -2233,13 +2256,13 @@ void if_rtmtu_update(struct ifnet *ifp) if ((rnh = rt_tables[p]) == NULL) continue; - lck_mtx_lock(rt_mtx); + lck_mtx_lock(rnh_lock); (void) rnh->rnh_walktree(rnh, if_rtmtu, ifp); - lck_mtx_unlock(rt_mtx); + lck_mtx_unlock(rnh_lock); } if (use_routegenid) - route_generation++; + routegenid_update(); } __private_extern__ void @@ -2285,7 +2308,8 @@ if_data_internal_to_if_data( COPYFIELD32(ifi_noproto); COPYFIELD32(ifi_recvtiming); COPYFIELD32(ifi_xmittiming); - COPYFIELD(ifi_lastchange); + if_data->ifi_lastchange.tv_sec = if_data_int->ifi_lastchange.tv_sec; + if_data->ifi_lastchange.tv_usec = if_data_int->ifi_lastchange.tv_usec; lck_mtx_unlock(thread->input_lck); #if IF_LASTCHANGEUPTIME @@ -2337,7 +2361,8 @@ if_data_internal_to_if_data64( COPYFIELD(ifi_noproto); COPYFIELD(ifi_recvtiming); COPYFIELD(ifi_xmittiming); - COPYFIELD(ifi_lastchange); + if_data64->ifi_lastchange.tv_sec = if_data_int->ifi_lastchange.tv_sec; + if_data64->ifi_lastchange.tv_usec = if_data_int->ifi_lastchange.tv_usec; lck_mtx_unlock(thread->input_lck); #if IF_LASTCHANGEUPTIME @@ -2346,3 +2371,44 @@ if_data_internal_to_if_data64( #undef COPYFIELD } + +void +ifafree(struct ifaddr *ifa) +{ + int oldval; + + oldval = OSAddAtomic(-1, &ifa->ifa_refcnt); + if (oldval >= 1 && ifa->ifa_trace != NULL) + (*ifa->ifa_trace)(ifa, FALSE); + if (oldval == 0) { + panic("%s: ifa %p negative refcnt\n", __func__, ifa); + } else if (oldval == 1) { + if (ifa->ifa_debug & IFD_ATTACHED) + panic("ifa %p attached to ifp is being freed\n", ifa); + /* + * Some interface addresses are allocated either statically + * or carved out of a larger block; e.g. AppleTalk addresses. + * Only free it if it was allocated via MALLOC or via the + * corresponding per-address family allocator. Otherwise, + * leave it alone. + */ + if (ifa->ifa_debug & IFD_ALLOC) { + if (ifa->ifa_free == NULL) + FREE(ifa, M_IFADDR); + else + (*ifa->ifa_free)(ifa); + } + } +} + +void +ifaref(struct ifaddr *ifa) +{ + int oldval; + + oldval = OSAddAtomic(1, &ifa->ifa_refcnt); + if (oldval < 0) + panic("%s: ifa %p negative refcnt\n", __func__, ifa); + else if (ifa->ifa_trace != NULL) + (*ifa->ifa_trace)(ifa, TRUE); +} diff --git a/bsd/net/if.h b/bsd/net/if.h index 7e82691c2..20f360037 100644 --- a/bsd/net/if.h +++ b/bsd/net/if.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2008 Apple Computer, Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -94,6 +94,7 @@ #define KEV_DL_PROTO_ATTACHED 14 #define KEV_DL_PROTO_DETACHED 15 #define KEV_DL_LINK_ADDRESS_CHANGED 16 +#define KEV_DL_WAKEFLAGS_CHANGED 17 #include #include @@ -106,19 +107,18 @@ struct if_clonereq { char *ifcr_buffer; /* buffer for cloner names */ }; -/* in-kernel, LP64-aware version of if_clonereq. all pointers - * grow when we're dealing with a 64-bit process. - * WARNING - keep in sync with if_clonereq - */ struct if_clonereq64 { int ifcr_total; /* total cloners (out) */ int ifcr_count; /* room for this many in user buffer */ - union { - u_int64_t ifcru_buffer64; - char * ifcru_buffer32; - } ifcr_ifcru; + user64_addr_t ifcru_buffer __attribute__((aligned(8))); +}; + +struct if_clonereq32 { + int ifcr_total; /* total cloners (out) */ + int ifcr_count; /* room for this many in user buffer */ + user32_addr_t ifcru_buffer; }; -#endif KERNEL_PRIVATE +#endif /* KERNEL_PRIVATE */ #define IFF_UP 0x1 /* interface is up */ #define IFF_BROADCAST 0x2 /* broadcast address valid */ @@ -148,8 +148,8 @@ struct if_clonereq64 { #define IFEF_VLAN 0x200 /* interface has one or more vlans */ #define IFEF_BOND 0x400 /* interface is part of bond */ #define IFEF_ARPLL 0x800 /* ARP for IPv4LL addresses on this port */ -#define IFEF_NOWINDOWSCALE 0x1000 /* TCP window scale disabled on this interface, see 5933937 & 5959897*/ -#define IFEF_NOTIMESTAMPS IFEF_NOWINDOWSCALE /* We don't actualy disable timestamps, just window scale see 5959897 */ +#define IFEF_NOWINDOWSCALE 0x1000 /* Don't scale TCP window on iface */ +#define IFEF_NOAUTOIPV6LL 0x2000 /* Interface IPv6 LinkLocal address not provided by kernel */ #define IFEF_SENDLIST 0x10000000 /* Interface supports sending a list of packets */ #define IFEF_REUSE 0x20000000 /* DLIL ifnet recycler, ifnet is not new */ #define IFEF_INUSE 0x40000000 /* DLIL ifnet recycler, ifnet in use */ @@ -288,6 +288,13 @@ struct ifkpi { } ifk_data; }; +/* Wake capabilities of a interface */ +#define IF_WAKE_ON_MAGIC_PACKET 0x01 +#ifdef KERNEL_PRIVATE +#define IF_WAKE_VALID_FLAGS IF_WAKE_ON_MAGIC_PACKET +#endif /* KERNEL_PRIVATE */ + + #pragma pack() /* @@ -314,9 +321,10 @@ struct ifreq { caddr_t ifru_data; #ifdef KERNEL_PRIVATE u_int64_t ifru_data64; /* 64-bit ifru_data */ -#endif KERNEL_PRIVATE +#endif /* KERNEL_PRIVATE */ struct ifdevmtu ifru_devmtu; struct ifkpi ifru_kpi; + u_int32_t ifru_wake_flags; } ifr_ifru; #define ifr_addr ifr_ifru.ifru_addr /* address */ #define ifr_dstaddr ifr_ifru.ifru_dstaddr /* other end of p-to-p link */ @@ -336,8 +344,9 @@ struct ifreq { #define ifr_intval ifr_ifru.ifru_intval /* integer value */ #ifdef KERNEL_PRIVATE #define ifr_data64 ifr_ifru.ifru_data64 /* 64-bit pointer */ -#endif KERNEL_PRIVATE +#endif /* KERNEL_PRIVATE */ #define ifr_kpi ifr_ifru.ifru_kpi +#define ifr_wake_flags ifr_ifru.ifru_wake_flags /* wake capabilities of devive */ }; #define _SIZEOF_ADDR_IFREQ(ifr) \ @@ -357,6 +366,7 @@ struct rslvmulti_req { struct sockaddr **llsa; }; +#if !defined(KERNEL) || defined(KERNEL_PRIVATE) #pragma pack(4) struct ifmediareq { @@ -370,12 +380,10 @@ struct ifmediareq { }; #pragma pack() +#endif /* !KERNEL || KERNEL_PRIVATE */ #ifdef KERNEL_PRIVATE -/* LP64 version of ifmediareq. all pointers - * grow when we're dealing with a 64-bit process. - * WARNING - keep in sync with ifmediareq - */ +#pragma pack(4) struct ifmediareq64 { char ifm_name[IFNAMSIZ]; /* if name, e.g. "en0" */ int ifm_current; /* current media options */ @@ -383,12 +391,20 @@ struct ifmediareq64 { int ifm_status; /* media status */ int ifm_active; /* active options */ int ifm_count; /* # entries in ifm_ulist array */ - union { /* media words */ - int * ifmu_ulist32; /* 32-bit pointer */ - u_int64_t ifmu_ulist64; /* 64-bit pointer */ - } ifm_ifmu; + user64_addr_t ifmu_ulist __attribute__((aligned(8))); }; -#endif // KERNEL_PRIVATE + +struct ifmediareq32 { + char ifm_name[IFNAMSIZ]; /* if name, e.g. "en0" */ + int ifm_current; /* current media options */ + int ifm_mask; /* don't care mask */ + int ifm_status; /* media status */ + int ifm_active; /* active options */ + int ifm_count; /* # entries in ifm_ulist array */ + user32_addr_t ifmu_ulist; /* 32-bit pointer */ +}; +#pragma pack() +#endif /* KERNEL_PRIVATE */ /* * Structure used to retrieve aux status data from interfaces. @@ -403,14 +419,14 @@ struct ifstat { char ascii[IFSTATMAX + 1]; }; -#pragma pack(4) - +#if !defined(KERNEL) || defined(KERNEL_PRIVATE) /* * Structure used in SIOCGIFCONF request. * Used to retrieve interface configuration * for machine (useful for programs which * must know all networks accessible). */ +#pragma pack(4) struct ifconf { int ifc_len; /* size of associated buffer */ union { @@ -418,25 +434,28 @@ struct ifconf { struct ifreq *ifcu_req; } ifc_ifcu; }; +#pragma pack() #define ifc_buf ifc_ifcu.ifcu_buf /* buffer address */ #define ifc_req ifc_ifcu.ifcu_req /* array of structures returned */ +#endif /* !KERNEL || KERNEL_PRIVATE */ -#pragma pack() +#if defined(KERNEL_PRIVATE) +#pragma pack(4) +struct ifconf32 { + int ifc_len; /* size of associated buffer */ + struct { + user32_addr_t ifcu_req; + } ifc_ifcu; +}; -#ifdef KERNEL_PRIVATE -/* LP64 version of ifconf. all pointers - * grow when we're dealing with a 64-bit process. - * WARNING - keep in sync with ifconf - */ struct ifconf64 { int ifc_len; /* size of associated buffer */ - union { - struct ifreq * ifcu_req; - u_int64_t ifcu_req64; + struct { + user64_addr_t ifcu_req __attribute__((aligned(8))); } ifc_ifcu; }; -#define ifc_req64 ifc_ifcu.ifcu_req64 -#endif // KERNEL_PRIVATE +#pragma pack() +#endif /* KERNEL_PRIVATE */ /* * DLIL KEV_DL_PROTO_ATTACHED/DETACHED structure diff --git a/bsd/net/if_bond.c b/bsd/net/if_bond.c index fd632b8a4..bdf0b42cd 100644 --- a/bsd/net/if_bond.c +++ b/bsd/net/if_bond.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2004-2006 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2004-2008 Apple Computer, Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -515,7 +515,7 @@ packet_buffer_allocate(int length) size = length + sizeof(struct ether_header); if (size > (int)MHLEN) { /* XXX doesn't handle large payloads */ - printf("bond: packet_buffer_allocate size %d > max %lu\n", size, MHLEN); + printf("bond: packet_buffer_allocate size %d > max %u\n", size, MHLEN); return (NULL); } m = m_gethdr(M_WAITOK, MT_DATA); @@ -651,7 +651,7 @@ static void bond_clone_destroy(struct ifnet *); static int bond_input(ifnet_t ifp, protocol_family_t protocol, mbuf_t m, char *frame_header); static int bond_output(struct ifnet *ifp, struct mbuf *m); -static int bond_ioctl(struct ifnet *ifp, u_int32_t cmd, void * addr); +static int bond_ioctl(struct ifnet *ifp, u_long cmd, void * addr); static int bond_set_bpf_tap(struct ifnet * ifp, bpf_tap_mode mode, bpf_packet_func func); static int bond_attach_protocol(struct ifnet *ifp); @@ -666,7 +666,7 @@ static struct if_clone bond_cloner = IF_CLONE_INITIALIZER(BONDNAME, bond_clone_destroy, 0, BOND_MAXUNIT); -static void interface_link_event(struct ifnet * ifp, u_long event_code); +static void interface_link_event(struct ifnet * ifp, u_int32_t event_code); static int siocsifmtu(struct ifnet * ifp, int mtu) @@ -873,33 +873,6 @@ interface_media_info(struct ifnet * ifp) return (mi); } -/** - ** interface utility functions - **/ -static __inline__ struct ifaddr * -ifindex_get_ifaddr(int i) -{ - if (i > if_index || i == 0) { - return (NULL); - } - return (ifnet_addrs[i - 1]); -} - -static __inline__ struct ifaddr * -ifp_get_ifaddr(struct ifnet * ifp) -{ - return (ifindex_get_ifaddr(ifnet_index(ifp))); -} - -static __inline__ struct sockaddr_dl * -ifp_get_sdl(struct ifnet * ifp) -{ - struct ifaddr * ifa; - - ifa = ifp_get_ifaddr(ifp); - return ((struct sockaddr_dl *)(ifa->ifa_addr)); -} - static int if_siflladdr(struct ifnet * ifp, const struct ether_addr * ea_p) { @@ -915,7 +888,7 @@ if_siflladdr(struct ifnet * ifp, const struct ether_addr * ea_p) #if 0 snprintf(ifr.ifr_name, sizeof(ifr.ifr_name), "%s%d", ifnet_name(ifp), ifnet_unit(ifp)); -#endif 0 +#endif return (ifnet_ioctl(ifp, 0, SIOCSIFLLADDR, &ifr)); } @@ -938,7 +911,7 @@ bond_globals_create(lacp_system_priority sys_pri, b->system_priority = sys_pri; #if 0 b->verbose = 1; -#endif 0 +#endif return (b); } @@ -1108,12 +1081,15 @@ bond_setmulti(struct ifnet * ifp) return (result); } -static void +static int bond_clone_attach(void) { - if_clone_attach(&bond_cloner); + int error; + + if ((error = if_clone_attach(&bond_cloner)) != 0) + return error; bond_lock_init(); - return; + return 0; } static int @@ -1165,7 +1141,7 @@ bond_clone_create(struct if_clone * ifc, int unit) ifb->ifb_key = unit + 1; /* use the interface name as the unique id for ifp recycle */ - if ((u_long)snprintf(ifb->ifb_name, sizeof(ifb->ifb_name), "%s%d", + if ((u_int32_t)snprintf(ifb->ifb_name, sizeof(ifb->ifb_name), "%s%d", ifc->ifc_name, unit) >= sizeof(ifb->ifb_name)) { ifbond_release(ifb); return (EINVAL); @@ -1336,7 +1312,7 @@ ether_header_hash(struct ether_header * eh_p) } static struct mbuf * -S_mbuf_skip_to_offset(struct mbuf * m, long * offset) +S_mbuf_skip_to_offset(struct mbuf * m, int32_t * offset) { int len; @@ -1369,7 +1345,7 @@ make_uint32(u_char c0, u_char c1, u_char c2, u_char c3) #endif /* BYTE_ORDER == LITTLE_ENDIAN */ static int -S_mbuf_copy_uint32(struct mbuf * m, long offset, uint32_t * val) +S_mbuf_copy_uint32(struct mbuf * m, int32_t offset, uint32_t * val) { struct mbuf * current; u_char * current_data; @@ -1419,7 +1395,7 @@ ip_header_hash(struct mbuf * m) struct in_addr ip_dst; struct in_addr ip_src; u_char ip_p; - long offset; + int32_t offset; struct mbuf * orig_m = m; /* find the IP protocol field relative to the start of the packet */ @@ -1460,7 +1436,7 @@ ipv6_header_hash(struct mbuf * m) { u_char * data; int i; - long offset; + int32_t offset; struct mbuf * orig_m = m; uint32_t * scan; uint32_t val; @@ -1874,7 +1850,7 @@ bondport_create(struct ifnet * port_ifp, lacp_port_priority priority, } bzero(p, sizeof(*p)); multicast_list_init(&p->po_multicast); - if ((u_long)snprintf(p->po_name, sizeof(p->po_name), "%s%d", + if ((u_int32_t)snprintf(p->po_name, sizeof(p->po_name), "%s%d", ifnet_name(port_ifp), ifnet_unit(port_ifp)) >= sizeof(p->po_name)) { printf("if_bond: name too large\n"); @@ -2612,7 +2588,7 @@ bond_set_promisc(__unused struct ifnet *ifp) ifb->ifb_flags &= ~IFBF_PROMISC; } } -#endif 0 +#endif return (error); } @@ -2729,14 +2705,14 @@ bond_set_mtu(struct ifnet * ifp, int mtu, int isdevmtu) } static int -bond_ioctl(struct ifnet *ifp, u_int32_t cmd, void * data) +bond_ioctl(struct ifnet *ifp, u_long cmd, void * data) { int error = 0; struct if_bond_req ibr; struct ifaddr * ifa; ifbond_ref ifb; struct ifreq * ifr; - struct ifmediareq64 *ifmr; + struct ifmediareq *ifmr; struct ifnet * port_ifp = NULL; user_addr_t user_addr; @@ -2751,15 +2727,15 @@ bond_ioctl(struct ifnet *ifp, u_int32_t cmd, void * data) ifnet_set_flags(ifp, IFF_UP, IFF_UP); break; + case SIOCGIFMEDIA32: case SIOCGIFMEDIA64: - case SIOCGIFMEDIA: bond_lock(); ifb = (ifbond_ref)ifnet_softc(ifp); if (ifb == NULL || ifbond_flags_if_detaching(ifb)) { bond_unlock(); return (ifb == NULL ? EOPNOTSUPP : EBUSY); } - ifmr = (struct ifmediareq64 *)data; + ifmr = (struct ifmediareq *)data; ifmr->ifm_current = IFM_ETHER; ifmr->ifm_mask = 0; ifmr->ifm_status = IFM_AVALID; @@ -2777,9 +2753,9 @@ bond_ioctl(struct ifnet *ifp, u_int32_t cmd, void * data) ifmr->ifm_status |= IFM_ACTIVE; } bond_unlock(); - user_addr = proc_is64bit(current_proc()) - ? ifmr->ifm_ifmu.ifmu_ulist64 - : CAST_USER_ADDR_T(ifmr->ifm_ifmu.ifmu_ulist32); + user_addr = (cmd == SIOCGIFMEDIA64) ? + ((struct ifmediareq64 *)ifmr)->ifmu_ulist : + CAST_USER_ADDR_T(((struct ifmediareq32 *)ifmr)->ifmu_ulist); if (user_addr != USER_ADDR_NULL) { error = copyout(&ifmr->ifm_current, user_addr, @@ -3067,11 +3043,11 @@ bond_event(struct ifnet * port_ifp, __unused protocol_family_t protocol, } static void -interface_link_event(struct ifnet * ifp, u_long event_code) +interface_link_event(struct ifnet * ifp, u_int32_t event_code) { struct { struct kern_event_msg header; - u_long unit; + u_int32_t unit; char if_name[IFNAMSIZ]; } event; @@ -3081,7 +3057,7 @@ interface_link_event(struct ifnet * ifp, u_long event_code) event.header.kev_subclass = KEV_DL_SUBCLASS; event.header.event_code = event_code; event.header.event_data[0] = ifnet_family(ifp); - event.unit = (u_long) ifnet_unit(ifp); + event.unit = (u_int32_t) ifnet_unit(ifp); strncpy(event.if_name, ifnet_name(ifp), IFNAMSIZ); ifnet_event(ifp, &event.header); return; @@ -3166,6 +3142,7 @@ bond_family_init(void) goto done; } #endif +#if NETAT error = proto_register_plumber(PF_APPLETALK, APPLE_IF_FAM_BOND, ether_attach_at, ether_detach_at); @@ -3174,7 +3151,13 @@ bond_family_init(void) error); goto done; } - bond_clone_attach(); +#endif + error = bond_clone_attach(); + if (error != 0) { + printf("bond: proto_register_plumber failed bond_clone_attach error=%d\n", + error); + goto done; + } done: return (error); @@ -3435,7 +3418,7 @@ ifbond_set_max_active(ifbond_ref bond, int max_active) } return; } -#endif 0 +#endif static int ifbond_all_ports_ready(ifbond_ref bond) @@ -4382,7 +4365,7 @@ bondport_periodic_transmit_machine(bondport_ref p, LAEvent event, **/ static int bondport_can_transmit(bondport_ref p, int32_t current_secs, - long * next_secs) + __darwin_time_t * next_secs) { if (p->po_last_transmit_secs != current_secs) { p->po_last_transmit_secs = current_secs; diff --git a/bsd/net/if_bond_var.h b/bsd/net/if_bond_var.h index 87f9d5bd5..fb17c9a90 100644 --- a/bsd/net/if_bond_var.h +++ b/bsd/net/if_bond_var.h @@ -97,6 +97,6 @@ struct if_bond_req { #ifdef KERNEL_PRIVATE int bond_family_init(void) __attribute__((section("__TEXT, initcode"))); -#endif KERNEL_PRIVATE +#endif /* KERNEL_PRIVATE */ #endif /* _NET_IF_BOND_VAR_H_ */ diff --git a/bsd/net/if_disc.c b/bsd/net/if_disc.c index 00531fdff..229e281f6 100644 --- a/bsd/net/if_disc.c +++ b/bsd/net/if_disc.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2008 Apple Computer, Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -174,8 +174,10 @@ discrtrequest(cmd, rt, sa) struct rtentry *rt; struct sockaddr *sa; { - if (rt) + if (rt != NULL) { + RT_LOCK_ASSERT_HELD(rt); rt->rt_rmx.rmx_mtu = DSMTU; + } } /* diff --git a/bsd/net/if_dummy.c b/bsd/net/if_dummy.c index 50dac08da..68dac9c9d 100644 --- a/bsd/net/if_dummy.c +++ b/bsd/net/if_dummy.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2008 Apple Computer, Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -105,7 +105,7 @@ #include #include #include -#endif NETATALK +#endif /* NETATALK */ #include "bpfilter.h" @@ -171,7 +171,6 @@ dummyoutput(ifp, m, dst, rt) struct sockaddr *dst; register struct rtentry *rt; { - if ((m->m_flags & M_PKTHDR) == 0) panic("dummyoutput no HDR"); #if NBPFILTER > 0 @@ -192,10 +191,13 @@ dummyoutput(ifp, m, dst, rt) #endif m->m_pkthdr.rcvif = ifp; - if (rt && rt->rt_flags & (RTF_REJECT|RTF_BLACKHOLE)) { - m_freem(m); - return (rt->rt_flags & RTF_BLACKHOLE ? 0 : - rt->rt_flags & RTF_HOST ? EHOSTUNREACH : ENETUNREACH); + if (rt != NULL) { + u_int32_t rt_flags = rt->rt_flags; + if (rt_flags & (RTF_REJECT | RTF_BLACKHOLE)) { + m_freem(m); + return ((rt_flags & RTF_BLACKHOLE) ? 0 : + (rt_flags & RTF_HOST) ? EHOSTUNREACH : ENETUNREACH); + } } ifp->if_opackets++; ifp->if_obytes += m->m_pkthdr.len; @@ -212,7 +214,8 @@ dummyrtrequest(cmd, rt, sa) struct rtentry *rt; struct sockaddr *sa; { - if (rt) { + if (rt != NULL) { + RT_LOCK_ASSERT_HELD(rt); rt->rt_rmx.rmx_mtu = rt->rt_ifp->if_mtu; /* for ISO */ /* * For optimal performance, the send and receive buffers diff --git a/bsd/net/if_ether.h b/bsd/net/if_ether.h index 44e73fa71..eb29560d2 100644 --- a/bsd/net/if_ether.h +++ b/bsd/net/if_ether.h @@ -56,5 +56,5 @@ errno_t ether_check_multi(ifnet_t ifp, const struct sockaddr *multicast); __END_DECLS -#endif KERNEL -#endif _NET_IF_ETHER_H +#endif /* KERNEL */ +#endif /* _NET_IF_ETHER_H */ diff --git a/bsd/net/if_ethersubr.c b/bsd/net/if_ethersubr.c index df6d5c284..8d82c530d 100644 --- a/bsd/net/if_ethersubr.c +++ b/bsd/net/if_ethersubr.c @@ -154,6 +154,8 @@ ether_resolvemulti( return EADDRNOTAVAIL; MALLOC(sdl, struct sockaddr_dl *, sizeof *sdl, M_IFMADDR, M_WAITOK); + if (sdl == NULL) + return ENOBUFS; sdl->sdl_len = sizeof *sdl; sdl->sdl_family = AF_LINK; sdl->sdl_index = ifp->if_index; @@ -181,6 +183,8 @@ ether_resolvemulti( } MALLOC(sdl, struct sockaddr_dl *, sizeof *sdl, M_IFMADDR, M_WAITOK); + if (sdl == NULL) + return ENOBUFS; sdl->sdl_len = sizeof *sdl; sdl->sdl_family = AF_LINK; sdl->sdl_index = ifp->if_index; diff --git a/bsd/net/if_gif.c b/bsd/net/if_gif.c index 1381fc6fd..38e876d6d 100644 --- a/bsd/net/if_gif.c +++ b/bsd/net/if_gif.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2007 Apple Inc. All rights reserved. + * Copyright (c) 2000-2008 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -127,7 +127,7 @@ static int gif_encapcheck(const struct mbuf*, int, int, void*); static errno_t gif_output(ifnet_t ifp, mbuf_t m); static errno_t gif_input(ifnet_t ifp, protocol_family_t protocol_family, mbuf_t m, char *frame_header); -static errno_t gif_ioctl(ifnet_t ifp, u_int32_t cmd, void *data); +static errno_t gif_ioctl(ifnet_t ifp, u_long cmd, void *data); int ngif = 0; /* number of interfaces */ #endif @@ -397,7 +397,7 @@ gif_encapcheck( return 0; } - mbuf_copydata(m, 0, sizeof(ip), &ip); + mbuf_copydata((struct mbuf *)(size_t)m, 0, sizeof(ip), &ip); switch (ip.ip_v) { #if INET @@ -515,7 +515,7 @@ gif_input( static errno_t gif_ioctl( ifnet_t ifp, - u_int32_t cmd, + u_long cmd, void *data) { struct gif_softc *sc = ifnet_softc(ifp); @@ -525,11 +525,11 @@ gif_ioctl( struct sockaddr *sa; struct ifnet *ifp2; struct gif_softc *sc2; - + switch (cmd) { case SIOCSIFADDR: break; - + case SIOCSIFDSTADDR: break; @@ -543,7 +543,7 @@ gif_ioctl( case SIOCSIFMTU: { - u_long mtu; + u_int32_t mtu; mtu = ifr->ifr_mtu; if (mtu < GIF_MTU_MIN || mtu > GIF_MTU_MAX) { return (EINVAL); @@ -555,7 +555,8 @@ gif_ioctl( case SIOCSIFPHYADDR: #if INET6 - case SIOCSIFPHYADDR_IN6: + case SIOCSIFPHYADDR_IN6_32: + case SIOCSIFPHYADDR_IN6_64: #endif /* INET6 */ case SIOCSLIFPHYADDR: switch (cmd) { @@ -568,12 +569,23 @@ gif_ioctl( break; #endif #if INET6 - case SIOCSIFPHYADDR_IN6: - src = (struct sockaddr *) - &(((struct in6_aliasreq *)data)->ifra_addr); - dst = (struct sockaddr *) - &(((struct in6_aliasreq *)data)->ifra_dstaddr); + case SIOCSIFPHYADDR_IN6_32: { + struct in6_aliasreq_32 *ifra_32 = + (struct in6_aliasreq_32 *)data; + + src = (struct sockaddr *)&ifra_32->ifra_addr; + dst = (struct sockaddr *)&ifra_32->ifra_dstaddr; + break; + } + + case SIOCSIFPHYADDR_IN6_64: { + struct in6_aliasreq_64 *ifra_64 = + (struct in6_aliasreq_64 *)data; + + src = (struct sockaddr *)&ifra_64->ifra_addr; + dst = (struct sockaddr *)&ifra_64->ifra_dstaddr; break; + } #endif case SIOCSLIFPHYADDR: src = (struct sockaddr *) @@ -627,7 +639,8 @@ gif_ioctl( break; return EAFNOSUPPORT; #if INET6 - case SIOCSIFPHYADDR_IN6: + case SIOCSIFPHYADDR_IN6_32: + case SIOCSIFPHYADDR_IN6_64: if (src->sa_family == AF_INET6) break; return EAFNOSUPPORT; @@ -688,12 +701,16 @@ gif_ioctl( if (sc->gif_psrc) FREE((caddr_t)sc->gif_psrc, M_IFADDR); sa = (struct sockaddr *)_MALLOC(src->sa_len, M_IFADDR, M_WAITOK); + if (sa == NULL) + return ENOBUFS; bcopy((caddr_t)src, (caddr_t)sa, src->sa_len); sc->gif_psrc = sa; if (sc->gif_pdst) FREE((caddr_t)sc->gif_pdst, M_IFADDR); sa = (struct sockaddr *)_MALLOC(dst->sa_len, M_IFADDR, M_WAITOK); + if (sa == NULL) + return ENOBUFS; bcopy((caddr_t)dst, (caddr_t)sa, dst->sa_len); sc->gif_pdst = sa; diff --git a/bsd/net/if_loop.c b/bsd/net/if_loop.c index 67e5f1f81..f62bdc362 100644 --- a/bsd/net/if_loop.c +++ b/bsd/net/if_loop.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2007 Apple Inc. All rights reserved. + * Copyright (c) 2000-2008 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -169,8 +169,10 @@ lo_framer( struct loopback_header *header; M_PREPEND(*m, sizeof(struct loopback_header), M_WAITOK); + if (*m == NULL) + return EJUSTRETURN; /* Tell caller not to try to free passed-in mbuf */ header = mtod(*m, struct loopback_header*); - header->protocol = *(const u_long*)frame_type; + header->protocol = *(const u_int32_t*)frame_type; return 0; } @@ -263,15 +265,19 @@ lo_pre_output( if (((*m)->m_flags & M_PKTHDR) == 0) panic("looutput no HDR"); - if (rt && rt->rt_flags & (RTF_REJECT|RTF_BLACKHOLE)) { - if (rt->rt_flags & RTF_BLACKHOLE) { - m_freem(*m); - return EJUSTRETURN; + if (rt != NULL) { + u_int32_t rt_flags = rt->rt_flags; + if (rt_flags & (RTF_REJECT | RTF_BLACKHOLE)) { + if (rt_flags & RTF_BLACKHOLE) { + m_freem(*m); + return EJUSTRETURN; + } else { + return ((rt_flags & RTF_HOST) ? + EHOSTUNREACH : ENETUNREACH); + } } - else - return ((rt->rt_flags & RTF_HOST) ? EHOSTUNREACH : ENETUNREACH); } - + *(protocol_family_t*)frame_type = protocol_family; return 0; @@ -302,7 +308,8 @@ lortrequest( struct rtentry *rt, __unused struct sockaddr *sa) { - if (rt) { + if (rt != NULL) { + RT_LOCK_ASSERT_HELD(rt); rt->rt_rmx.rmx_mtu = rt->rt_ifp->if_mtu; /* for ISO */ /* * For optimal performance, the send and receive buffers @@ -320,7 +327,7 @@ lortrequest( static errno_t loioctl( ifnet_t ifp, - u_int32_t cmd, + u_long cmd, void* data) { register struct ifaddr *ifa; diff --git a/bsd/net/if_media.c b/bsd/net/if_media.c index d96442ada..7c2e4be3f 100644 --- a/bsd/net/if_media.c +++ b/bsd/net/if_media.c @@ -331,6 +331,8 @@ ifmedia_ioctl( if (ifmr->ifm_count != 0) { kptr = (int *) _MALLOC(ifmr->ifm_count * sizeof(int), M_TEMP, M_WAITOK); + if (kptr == NULL) + return ENOBUFS; /* * Get the media words from the interface's list. diff --git a/bsd/net/if_media.h b/bsd/net/if_media.h index 6c8ac892f..51be28e3c 100644 --- a/bsd/net/if_media.h +++ b/bsd/net/if_media.h @@ -126,7 +126,7 @@ void ifmedia_set(struct ifmedia *ifm, int mword); /* Common ioctl function for getting/setting media, called by driver. */ int ifmedia_ioctl(struct ifnet *ifp, struct ifreq *ifr, - struct ifmedia *ifm, u_long cmd); + struct ifmedia *ifm, uint32_t cmd); #endif /* KERNEL_PRIVATE */ @@ -166,6 +166,8 @@ int ifmedia_ioctl(struct ifnet *ifp, struct ifreq *ifr, #define IFM_HPNA_1 17 /* HomePNA 1.0 (1Mb/s) */ #define IFM_10G_SR 18 /* 10GbaseSR - multi-mode fiber */ #define IFM_10G_LR 19 /* 10GbaseLR - single-mode fiber */ +#define IFM_10G_CX4 20 /* 10GbaseCX4 - copper */ +#define IFM_10G_T 21 /* 10GbaseT - 4 pair cat 6 */ /* * Token ring @@ -295,6 +297,8 @@ struct ifmedia_description { { IFM_HPNA_1, "HomePNA1" }, \ { IFM_10G_SR, "10GbaseSR" }, \ { IFM_10G_LR, "10GbaseLR" }, \ + { IFM_10G_CX4, "10GbaseCX4" }, \ + { IFM_10G_T, "10GbaseT" }, \ { 0, NULL }, \ } @@ -321,6 +325,8 @@ struct ifmedia_description { { IFM_HPNA_1, "HPNA1" }, \ { IFM_10G_SR, "10GSR" }, \ { IFM_10G_LR, "10GLR" }, \ + { IFM_10G_CX4, "10GCX4" }, \ + { IFM_10G_T, "10GT" }, \ { 0, NULL }, \ } diff --git a/bsd/net/if_mib.c b/bsd/net/if_mib.c index 17e1433bc..b4abc7f16 100644 --- a/bsd/net/if_mib.c +++ b/bsd/net/if_mib.c @@ -116,6 +116,11 @@ SYSCTL_INT(_net_link_generic_system, OID_AUTO, dlil_input_sanity_check, CTLFLAG_ &dlil_input_sanity_check , 0, "Turn on sanity checking in DLIL input"); #endif +extern int dlil_verbose; +SYSCTL_INT(_net_link_generic_system, OID_AUTO, dlil_verbose, CTLFLAG_RW, + &dlil_verbose, 0, "Log DLIL error messages"); + + static int make_ifmibdata(struct ifnet *, int *, struct sysctl_req *); @@ -197,10 +202,13 @@ sysctl_ifdata SYSCTL_HANDLER_ARGS /* XXX bad syntax! */ if (namelen != 2) return EINVAL; - + ifnet_head_lock_shared(); if (name[0] <= 0 || name[0] > if_index || - (ifp = ifindex2ifnet[name[0]]) == NULL) + (ifp = ifindex2ifnet[name[0]]) == NULL) { + ifnet_head_done(); return ENOENT; + } + ifnet_head_done(); ifnet_lock_shared(ifp); diff --git a/bsd/net/if_mib.h b/bsd/net/if_mib.h index 5b0d38a14..7af1bf714 100644 --- a/bsd/net/if_mib.h +++ b/bsd/net/if_mib.h @@ -198,4 +198,16 @@ enum { * Put other types of interface MIBs here, or in interface-specific * header files if convenient ones already exist. */ + +/* + * Structure for interface family ID table + */ + +struct if_family_id { + u_int32_t iffmid_len; + u_int32_t iffmid_id; + char iffmid_str[1]; /* variable length string */ +}; + + #endif /* _NET_IF_MIB_H */ diff --git a/bsd/net/if_pflog.c b/bsd/net/if_pflog.c new file mode 100644 index 000000000..278a7b198 --- /dev/null +++ b/bsd/net/if_pflog.c @@ -0,0 +1,358 @@ +/* + * Copyright (c) 2008 Apple Inc. All rights reserved. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. The rights granted to you under the License + * may not be used to create, or enable the creation or redistribution of, + * unlawful or unlicensed copies of an Apple operating system, or to + * circumvent, violate, or enable the circumvention or violation of, any + * terms of an Apple operating system software license agreement. + * + * Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ + */ + +/* $apfw: if_pflog.c,v 1.4 2008/08/27 00:01:32 jhw Exp $ */ +/* $OpenBSD: if_pflog.c,v 1.22 2006/12/15 09:31:20 otto Exp $ */ +/* + * The authors of this code are John Ioannidis (ji@tla.org), + * Angelos D. Keromytis (kermit@csd.uch.gr) and + * Niels Provos (provos@physnet.uni-hamburg.de). + * + * This code was written by John Ioannidis for BSD/OS in Athens, Greece, + * in November 1995. + * + * Ported to OpenBSD and NetBSD, with additional transforms, in December 1996, + * by Angelos D. Keromytis. + * + * Additional transforms and features in 1997 and 1998 by Angelos D. Keromytis + * and Niels Provos. + * + * Copyright (C) 1995, 1996, 1997, 1998 by John Ioannidis, Angelos D. Keromytis + * and Niels Provos. + * Copyright (c) 2001, Angelos D. Keromytis, Niels Provos. + * + * Permission to use, copy, and modify this software with or without fee + * is hereby granted, provided that this entire notice is included in + * all copies of any software which is or includes a copy or + * modification of this software. + * You may use this code under the GNU public license if you so wish. Please + * contribute changes back to the authors under this freer than GPL license + * so that we may further the use of strong encryption without limitations to + * all. + * + * THIS SOFTWARE IS BEING PROVIDED "AS IS", WITHOUT ANY EXPRESS OR + * IMPLIED WARRANTY. IN PARTICULAR, NONE OF THE AUTHORS MAKES ANY + * REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE + * MERCHANTABILITY OF THIS SOFTWARE OR ITS FITNESS FOR ANY PARTICULAR + * PURPOSE. + */ + +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +#if INET +#include +#include +#include +#include +#endif + +#if INET6 +#if !INET +#include +#endif +#include +#endif /* INET6 */ + +#include +#include + +#define PFLOGNAME "pflog" +#define PFLOGMTU (32768 + MHLEN + MLEN) + +#ifdef PFLOGDEBUG +#define DPRINTF(x) do { if (pflogdebug) printf x ; } while (0) +#else +#define DPRINTF(x) +#endif + +static int pflog_create_dev(void); +static errno_t pflogoutput(struct ifnet *, struct mbuf *); +static errno_t pflogioctl(struct ifnet *, unsigned long, void *); +static errno_t pflogdemux(struct ifnet *, struct mbuf *, char *, + protocol_family_t *); +static errno_t pflogaddproto(struct ifnet *, protocol_family_t, + const struct ifnet_demux_desc *, u_int32_t); +static errno_t pflogdelproto(struct ifnet *, protocol_family_t); + +static LIST_HEAD(, pflog_softc) pflogif_list; + +struct ifnet *pflogifs[PFLOGIFS_MAX]; /* for fast access */ +static int npflog; +static lck_attr_t *pflog_lock_attr; +static lck_grp_t *pflog_lock_grp; +static lck_grp_attr_t *pflog_lock_grp_attr; +static lck_mtx_t *pflog_lock; + +void +pfloginit(void) +{ + int i; + + if (pflog_lock != NULL) + return; + + pflog_lock_grp_attr = lck_grp_attr_alloc_init(); + pflog_lock_grp = lck_grp_alloc_init("pflog", pflog_lock_grp_attr); + pflog_lock_attr = lck_attr_alloc_init(); + pflog_lock = lck_mtx_alloc_init(pflog_lock_grp, pflog_lock_attr); + if (pflog_lock == NULL) { + panic("%s: unable to allocate lock", __func__); + /* NOTREACHED */ + } + LIST_INIT(&pflogif_list); + for (i = 0; i < PFLOGIFS_MAX; i++) + pflogifs[i] = NULL; + + pflog_create_dev(); +} + +static int +pflog_create_dev(void) +{ + struct pflog_softc *pflogif; + struct ifnet_init_params pf_init; + int error = 0; + + lck_mtx_lock(pflog_lock); + if (npflog >= PFLOGIFS_MAX) { + error = EINVAL; + goto done; + } + + if ((pflogif = _MALLOC(sizeof (*pflogif), + M_DEVBUF, M_WAITOK|M_ZERO)) == NULL) { + error = ENOMEM; + goto done; + } + + bzero(&pf_init, sizeof (pf_init)); + pf_init.name = PFLOGNAME; + pf_init.unit = npflog; + pf_init.type = IFT_PFLOG; + pf_init.family = IFNET_FAMILY_LOOPBACK; + pf_init.output = pflogoutput; + pf_init.demux = pflogdemux; + pf_init.add_proto = pflogaddproto; + pf_init.del_proto = pflogdelproto; + pf_init.softc = pflogif; + pf_init.ioctl = pflogioctl; + + bzero(pflogif, sizeof (*pflogif)); + pflogif->sc_unit = npflog; + + error = ifnet_allocate(&pf_init, &pflogif->sc_if); + if (error != 0) { + printf("%s: ifnet_allocate failed - %d\n", __func__, error); + _FREE(pflogif, M_DEVBUF); + goto done; + } + + ifnet_set_mtu(pflogif->sc_if, PFLOGMTU); + ifnet_set_flags(pflogif->sc_if, IFF_UP, IFF_UP); + + error = ifnet_attach(pflogif->sc_if, NULL); + if (error != 0) { + printf("%s: ifnet_attach failed - %d\n", __func__, error); + ifnet_release(pflogif->sc_if); + _FREE(pflogif, M_DEVBUF); + goto done; + } + +#if NBPFILTER > 0 + bpfattach(pflogif->sc_if, DLT_PFLOG, PFLOG_HDRLEN); +#endif + + LIST_INSERT_HEAD(&pflogif_list, pflogif, sc_list); + pflogifs[npflog] = pflogif->sc_if; + ++npflog; +done: + lck_mtx_unlock(pflog_lock); + + return (error); +} + +#if 0 +int +pflog_destroy_dev(struct ifnet *ifp) +{ + struct pflog_softc *pflogif = ifp->if_softc; + + lck_mtx_lock(pflog_lock); + pflogifs[pflogif->sc_unit] = NULL; + LIST_REMOVE(pflogif, sc_list); + lck_mtx_unlock(pflog_lock); + +#if NBPFILTER > 0 + bpfdetach(ifp); +#endif + if_detach(ifp); + _FREE(pflogif, M_DEVBUF); + return (0); +} +#endif + +static errno_t +pflogoutput(struct ifnet *ifp, struct mbuf *m) +{ + printf("%s: freeing data for %s%d\n", __func__, ifp->if_name, + ifp->if_unit); + m_freem(m); + return (ENOTSUP); +} + +static errno_t +pflogioctl(struct ifnet *ifp, unsigned long cmd, void *data) +{ +#pragma unused(data) + switch (cmd) { + case SIOCSIFADDR: + case SIOCAIFADDR: + case SIOCSIFDSTADDR: + case SIOCSIFFLAGS: + if (ifnet_flags(ifp) & IFF_UP) + ifnet_set_flags(ifp, IFF_RUNNING, IFF_RUNNING); + else + ifnet_set_flags(ifp, 0, IFF_RUNNING); + break; + default: + return (ENOTTY); + } + + return (0); +} + +static errno_t +pflogdemux(struct ifnet *ifp, struct mbuf *m, char *h, protocol_family_t *ppf) +{ +#pragma unused(h, ppf) + printf("%s: freeing data for %s%d\n", __func__, ifp->if_name, + ifp->if_unit); + m_freem(m); + return (EJUSTRETURN); +} + +static errno_t +pflogaddproto(struct ifnet *ifp, protocol_family_t pf, + const struct ifnet_demux_desc *d, u_int32_t cnt) +{ +#pragma unused(ifp, pf, d, cnt) + return (0); +} + +static errno_t +pflogdelproto(struct ifnet *ifp, protocol_family_t pf) +{ +#pragma unused(ifp, pf) + return (0); +} + +int +pflog_packet(struct pfi_kif *kif, struct mbuf *m, sa_family_t af, u_int8_t dir, + u_int8_t reason, struct pf_rule *rm, struct pf_rule *am, + struct pf_ruleset *ruleset, struct pf_pdesc *pd) +{ +#if NBPFILTER > 0 + struct ifnet *ifn; + struct pfloghdr hdr; + + if (kif == NULL || m == NULL || rm == NULL || pd == NULL) + return (-1); + + if (rm->logif >= PFLOGIFS_MAX || + (ifn = pflogifs[rm->logif]) == NULL || !ifn->if_bpf) { + return (0); + } + + bzero(&hdr, sizeof (hdr)); + hdr.length = PFLOG_REAL_HDRLEN; + hdr.af = af; + hdr.action = rm->action; + hdr.reason = reason; + memcpy(hdr.ifname, kif->pfik_name, sizeof (hdr.ifname)); + + if (am == NULL) { + hdr.rulenr = htonl(rm->nr); + hdr.subrulenr = -1; + } else { + hdr.rulenr = htonl(am->nr); + hdr.subrulenr = htonl(rm->nr); + if (ruleset != NULL && ruleset->anchor != NULL) + strlcpy(hdr.ruleset, ruleset->anchor->name, + sizeof (hdr.ruleset)); + } + if (rm->log & PF_LOG_SOCKET_LOOKUP && !pd->lookup.done) + pd->lookup.done = pf_socket_lookup(dir, pd); + if (pd->lookup.done > 0) { + hdr.uid = pd->lookup.uid; + hdr.pid = pd->lookup.pid; + } else { + hdr.uid = UID_MAX; + hdr.pid = NO_PID; + } + hdr.rule_uid = rm->cuid; + hdr.rule_pid = rm->cpid; + hdr.dir = dir; + +#if INET + if (af == AF_INET && dir == PF_OUT) { + struct ip *ip; + + ip = mtod(m, struct ip *); + ip->ip_sum = 0; + ip->ip_sum = in_cksum(m, ip->ip_hl << 2); + } +#endif /* INET */ + + ifn->if_opackets++; + ifn->if_obytes += m->m_pkthdr.len; + + switch (dir) { + case PF_IN: + bpf_tap_in(ifn, DLT_PFLOG, m, &hdr, PFLOG_HDRLEN); + break; + + case PF_OUT: + bpf_tap_out(ifn, DLT_PFLOG, m, &hdr, PFLOG_HDRLEN); + break; + + default: + break; + } +#endif /* NBPFILTER > 0 */ + return (0); +} diff --git a/bsd/net/if_pflog.h b/bsd/net/if_pflog.h new file mode 100644 index 000000000..f2e5d4cad --- /dev/null +++ b/bsd/net/if_pflog.h @@ -0,0 +1,113 @@ +/* + * Copyright (c) 2008 Apple Inc. All rights reserved. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. The rights granted to you under the License + * may not be used to create, or enable the creation or redistribution of, + * unlawful or unlicensed copies of an Apple operating system, or to + * circumvent, violate, or enable the circumvention or violation of, any + * terms of an Apple operating system software license agreement. + * + * Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ + */ + +/* $apfw: if_pflog.h,v 1.3 2007/08/13 22:18:33 jhw Exp $ */ +/* $OpenBSD: if_pflog.h,v 1.14 2006/10/25 11:27:01 henning Exp $ */ +/* + * Copyright 2001 Niels Provos + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _NET_IF_PFLOG_H_ +#define _NET_IF_PFLOG_H_ + +#if PF || !defined(KERNEL) + +#ifdef __cplusplus +extern "C" { +#endif + +#define PFLOGIFS_MAX 1 + +#if KERNEL_PRIVATE +struct pflog_softc { + struct ifnet *sc_if; /* back ptr to interface */ + int sc_unit; + LIST_ENTRY(pflog_softc) sc_list; +}; +#endif /* KERNEL_PRIVATE */ + +#define PFLOG_RULESET_NAME_SIZE 16 + +struct pfloghdr { + u_int8_t length; + sa_family_t af; + u_int8_t action; + u_int8_t reason; + char ifname[IFNAMSIZ]; + char ruleset[PFLOG_RULESET_NAME_SIZE]; + u_int32_t rulenr; + u_int32_t subrulenr; + uid_t uid; + pid_t pid; + uid_t rule_uid; + pid_t rule_pid; + u_int8_t dir; + u_int8_t pad[3]; +}; + +#define PFLOG_HDRLEN sizeof(struct pfloghdr) +/* minus pad, also used as a signature */ +#define PFLOG_REAL_HDRLEN offsetof(struct pfloghdr, pad) + +#ifdef KERNEL_PRIVATE + +#if PFLOG +#define PFLOG_PACKET(i,x,a,b,c,d,e,f,g,h) pflog_packet(i,a,b,c,d,e,f,g,h) +#else +#define PFLOG_PACKET(i,x,a,b,c,d,e,f,g,h) ((void)0) +#endif /* PFLOG */ + +__private_extern__ void pfloginit(void); +#endif /* KERNEL_PRIVATE */ + +#ifdef __cplusplus +} +#endif +#endif /* PF || !KERNEL */ +#endif /* _NET_IF_PFLOG_H_ */ diff --git a/bsd/net/if_pppvar.h b/bsd/net/if_pppvar.h index c717c0d88..a48282763 100644 --- a/bsd/net/if_pppvar.h +++ b/bsd/net/if_pppvar.h @@ -120,7 +120,7 @@ struct ppp_softc { /* Device-dependent part for async lines. */ ext_accm sc_asyncmap; /* async control character map */ - u_long sc_rasyncmap; /* receive async control char map */ + u_int32_t sc_rasyncmap; /* receive async control char map */ struct mbuf *sc_outm; /* mbuf chain currently being output */ struct mbuf *sc_m; /* pointer to input mbuf chain */ struct mbuf *sc_mc; /* pointer to current input mbuf */ diff --git a/bsd/net/if_stf.c b/bsd/net/if_stf.c index 5f86780c0..96b9664b5 100644 --- a/bsd/net/if_stf.c +++ b/bsd/net/if_stf.c @@ -1,3 +1,31 @@ +/* + * Copyright (c) 2000-2008 Apple Inc. All rights reserved. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. The rights granted to you under the License + * may not be used to create, or enable the creation or redistribution of, + * unlawful or unlicensed copies of an Apple operating system, or to + * circumvent, violate, or enable the circumvention or violation of, any + * terms of an Apple operating system software license agreement. + * + * Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ + */ + /* $FreeBSD: src/sys/net/if_stf.c,v 1.1.2.6 2001/07/24 19:10:18 brooks Exp $ */ /* $KAME: if_stf.c,v 1.62 2001/06/07 22:32:16 itojun Exp $ */ @@ -125,7 +153,7 @@ struct stf_softc { ifnet_t sc_if; /* common area */ - u_long sc_protocol_family; /* dlil protocol attached */ + u_int32_t sc_protocol_family; /* dlil protocol attached */ union { struct route __sc_ro4; struct route_in6 __sc_ro6; /* just for safety */ @@ -161,7 +189,7 @@ static int stf_checkaddr4(struct stf_softc *, const struct in_addr *, static int stf_checkaddr6(struct stf_softc *, struct in6_addr *, struct ifnet *); static void stf_rtrequest(int, struct rtentry *, struct sockaddr *); -static errno_t stf_ioctl(ifnet_t ifp, u_int32_t cmd, void *data); +static errno_t stf_ioctl(ifnet_t ifp, u_long cmd, void *data); static errno_t stf_output(ifnet_t ifp, mbuf_t m); /* @@ -362,8 +390,7 @@ stf_encapcheck( if (proto != IPPROTO_IPV6) return 0; - /* LINTED const cast */ - mbuf_copydata(m, 0, sizeof(ip), &ip); + mbuf_copydata((struct mbuf *)(size_t)m, 0, sizeof(ip), &ip); if (ip.ip_v != 4) return 0; @@ -378,9 +405,10 @@ stf_encapcheck( * success on: dst = 10.1.1.1, ia6->ia_addr = 2002:0a01:0101:... */ if (bcmp(GET_V4(&ia6->ia_addr.sin6_addr), &ip.ip_dst, - sizeof(ip.ip_dst)) != 0) + sizeof(ip.ip_dst)) != 0) { + ifafree(&ia6->ia_ifa); return 0; - + } /* * check if IPv4 src matches the IPv4 address derived from the * local 6to4 address masked by prefixmask. @@ -392,10 +420,12 @@ stf_encapcheck( a.s_addr &= GET_V4(&ia6->ia_prefixmask.sin6_addr)->s_addr; b = ip.ip_src; b.s_addr &= GET_V4(&ia6->ia_prefixmask.sin6_addr)->s_addr; - if (a.s_addr != b.s_addr) + if (a.s_addr != b.s_addr) { + ifafree(&ia6->ia_ifa); return 0; - + } /* stf interface makes single side match only */ + ifafree(&ia6->ia_ifa); return 32; } @@ -421,7 +451,7 @@ stf_getsrcifa6(struct ifnet *ifp) continue; bcopy(GET_V4(&sin6->sin6_addr), &in, sizeof(in)); - lck_mtx_lock(rt_mtx); + lck_rw_lock_shared(in_ifaddr_rwlock); for (ia4 = TAILQ_FIRST(&in_ifaddrhead); ia4; ia4 = TAILQ_NEXT(ia4, ia_link)) @@ -429,10 +459,11 @@ stf_getsrcifa6(struct ifnet *ifp) if (ia4->ia_addr.sin_addr.s_addr == in.s_addr) break; } - lck_mtx_unlock(rt_mtx); + lck_rw_done(in_ifaddr_rwlock); if (ia4 == NULL) continue; + ifaref(ia); ifnet_lock_done(ifp); return (struct in6_ifaddr *)ia; } @@ -485,6 +516,7 @@ stf_pre_output( m = m_pullup(m, sizeof(*ip6)); if (!m) { *m0 = NULL; /* makes sure this won't be double freed */ + ifafree(&ia6->ia_ifa); return ENOBUFS; } } @@ -500,6 +532,7 @@ stf_pre_output( else if (IN6_IS_ADDR_6TO4(&dst6->sin6_addr)) in4 = GET_V4(&dst6->sin6_addr); else { + ifafree(&ia6->ia_ifa); return ENETUNREACH; } @@ -515,6 +548,7 @@ stf_pre_output( m = m_pullup(m, sizeof(struct ip)); if (m == NULL) { *m0 = NULL; + ifafree(&ia6->ia_ifa); return ENOBUFS; } ip = mtod(m, struct ip *); @@ -549,6 +583,7 @@ stf_pre_output( if (sc->sc_ro.ro_rt == NULL) { rtalloc(&sc->sc_ro); if (sc->sc_ro.ro_rt == NULL) { + ifafree(&ia6->ia_ifa); return ENETUNREACH; } } @@ -559,6 +594,7 @@ stf_pre_output( if (result == 0) result = EJUSTRETURN; *m0 = NULL; + ifafree(&ia6->ia_ifa); return result; } static errno_t @@ -594,7 +630,7 @@ stf_checkaddr4( /* * reject packets with broadcast */ - lck_mtx_lock(rt_mtx); + lck_rw_lock_shared(in_ifaddr_rwlock); for (ia4 = TAILQ_FIRST(&in_ifaddrhead); ia4; ia4 = TAILQ_NEXT(ia4, ia_link)) @@ -602,11 +638,11 @@ stf_checkaddr4( if ((ia4->ia_ifa.ifa_ifp->if_flags & IFF_BROADCAST) == 0) continue; if (in->s_addr == ia4->ia_broadaddr.sin_addr.s_addr) { - lck_mtx_unlock(rt_mtx); + lck_rw_done(in_ifaddr_rwlock); return -1; } } - lck_mtx_unlock(rt_mtx); + lck_rw_done(in_ifaddr_rwlock); /* * perform ingress filter @@ -619,17 +655,22 @@ stf_checkaddr4( sin.sin_family = AF_INET; sin.sin_len = sizeof(struct sockaddr_in); sin.sin_addr = *in; - rt = rtalloc1((struct sockaddr *)&sin, 0, 0UL); - if (!rt || rt->rt_ifp != inifp) { + rt = rtalloc1((struct sockaddr *)&sin, 0, 0); + if (rt != NULL) + RT_LOCK(rt); + if (rt == NULL || rt->rt_ifp != inifp) { #if 1 log(LOG_WARNING, "%s: packet from 0x%x dropped " "due to ingress filter\n", if_name(sc->sc_if), (u_int32_t)ntohl(sin.sin_addr.s_addr)); #endif - if (rt) + if (rt != NULL) { + RT_UNLOCK(rt); rtfree(rt); + } return -1; } + RT_UNLOCK(rt); rtfree(rt); } @@ -758,15 +799,16 @@ stf_rtrequest( struct rtentry *rt, __unused struct sockaddr *sa) { - - if (rt) + if (rt != NULL) { + RT_LOCK_ASSERT_HELD(rt); rt->rt_rmx.rmx_mtu = IPV6_MMTU; + } } static errno_t stf_ioctl( ifnet_t ifp, - u_int32_t cmd, + u_long cmd, void *data) { struct ifaddr *ifa; diff --git a/bsd/net/if_types.h b/bsd/net/if_types.h index 41b50c4f0..a8b580130 100644 --- a/bsd/net/if_types.h +++ b/bsd/net/if_types.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2008 Apple Computer, Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -141,4 +141,12 @@ #define IFT_GIF 0x37 /*0xf0*/ #define IFT_FAITH 0x38 /*0xf2*/ #define IFT_STF 0x39 /*0xf3*/ + +#define IFT_ENC 0xf4 /* Encapsulation */ +#define IFT_PFLOG 0xf5 /* Packet filter logging */ +#define IFT_PFSYNC 0xf6 /* Packet filter state syncing */ +#define IFT_CARP 0xf8 /* Common Address Redundancy Protocol */ + +#define IFT_PDP 0xff /* GPRS Packet Data Protocol */ + #endif diff --git a/bsd/net/if_utun.c b/bsd/net/if_utun.c new file mode 100644 index 000000000..adf36bdf0 --- /dev/null +++ b/bsd/net/if_utun.c @@ -0,0 +1,732 @@ +/* + * Copyright (c) 2008 Apple Inc. All rights reserved. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. The rights granted to you under the License + * may not be used to create, or enable the creation or redistribution of, + * unlawful or unlicensed copies of an Apple operating system, or to + * circumvent, violate, or enable the circumvention or violation of, any + * terms of an Apple operating system software license agreement. + * + * Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ + */ + + + +/* ---------------------------------------------------------------------------------- +Application of kernel control for interface creation + +Theory of operation: +utun (user tunnel) acts as glue between kernel control sockets and network interfaces. +This kernel control will register an interface for every client that connects. +---------------------------------------------------------------------------------- */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +/* Kernel Control functions */ +static errno_t utun_ctl_connect(kern_ctl_ref kctlref, struct sockaddr_ctl *sac, + void **unitinfo); +static errno_t utun_ctl_disconnect(kern_ctl_ref kctlref, u_int32_t unit, + void *unitinfo); +static errno_t utun_ctl_send(kern_ctl_ref kctlref, u_int32_t unit, + void *unitinfo, mbuf_t m, int flags); +static errno_t utun_ctl_getopt(kern_ctl_ref kctlref, u_int32_t unit, void *unitinfo, + int opt, void *data, size_t *len); +static errno_t utun_ctl_setopt(kern_ctl_ref kctlref, u_int32_t unit, void *unitinfo, + int opt, void *data, size_t len); + +/* Network Interface functions */ +static errno_t utun_output(ifnet_t interface, mbuf_t data); +static errno_t utun_demux(ifnet_t interface, mbuf_t data, char *frame_header, + protocol_family_t *protocol); +static errno_t utun_framer(ifnet_t interface, mbuf_t *packet, + const struct sockaddr *dest, const char *desk_linkaddr, + const char *frame_type); +static errno_t utun_add_proto(ifnet_t interface, protocol_family_t protocol, + const struct ifnet_demux_desc *demux_array, + u_int32_t demux_count); +static errno_t utun_del_proto(ifnet_t interface, protocol_family_t protocol); +static errno_t utun_ioctl(ifnet_t interface, u_long cmd, void *data); +static void utun_detached(ifnet_t interface); + +/* Protocol handlers */ +static errno_t utun_attach_proto(ifnet_t interface, protocol_family_t proto); +static errno_t utun_proto_input(ifnet_t interface, protocol_family_t protocol, + mbuf_t m, char *frame_header); +static errno_t utun_proto_pre_output(ifnet_t interface, protocol_family_t protocol, + mbuf_t *packet, const struct sockaddr *dest, void *route, + char *frame_type, char *link_layer_dest); + +/* Control block allocated for each kernel control connection */ +struct utun_pcb { + kern_ctl_ref ctlref; + u_int32_t unit; + ifnet_t ifp; + u_int32_t flags; +}; + +static kern_ctl_ref utun_kctlref; +static u_int32_t utun_family; +static OSMallocTag utun_malloc_tag; +static SInt32 utun_ifcount = 0; + +/* Prepend length */ +static void* +utun_alloc(size_t size) +{ + size_t *mem = OSMalloc(size + sizeof(size_t), utun_malloc_tag); + + if (mem) { + *mem = size + sizeof(size_t); + mem++; + } + + return (void*)mem; +} + +static void +utun_free(void *ptr) +{ + size_t *size = ptr; + size--; + OSFree(size, *size, utun_malloc_tag); +} + +errno_t +utun_register_control(void) +{ + struct kern_ctl_reg kern_ctl; + errno_t result = 0; + + /* Create a tag to allocate memory */ + utun_malloc_tag = OSMalloc_Tagalloc(UTUN_CONTROL_NAME, OSMT_DEFAULT); + + /* Find a unique value for our interface family */ + result = mbuf_tag_id_find(UTUN_CONTROL_NAME, &utun_family); + if (result != 0) { + printf("utun_register_control - mbuf_tag_id_find_internal failed: %d\n", result); + return result; + } + + bzero(&kern_ctl, sizeof(kern_ctl)); + strncpy(kern_ctl.ctl_name, UTUN_CONTROL_NAME, sizeof(kern_ctl.ctl_name)); + kern_ctl.ctl_name[sizeof(kern_ctl.ctl_name) - 1] = 0; + kern_ctl.ctl_flags = CTL_FLAG_PRIVILEGED; /* Require root */ + kern_ctl.ctl_connect = utun_ctl_connect; + kern_ctl.ctl_disconnect = utun_ctl_disconnect; + kern_ctl.ctl_send = utun_ctl_send; + kern_ctl.ctl_setopt = utun_ctl_setopt; + kern_ctl.ctl_getopt = utun_ctl_getopt; + + result = ctl_register(&kern_ctl, &utun_kctlref); + if (result != 0) { + printf("utun_register_control - ctl_register failed: %d\n", result); + return result; + } + + /* Register the protocol plumbers */ + if ((result = proto_register_plumber(PF_INET, utun_family, + utun_attach_proto, NULL)) != 0) { + printf("utun_register_control - proto_register_plumber(PF_INET, %d) failed: %d\n", + utun_family, result); + ctl_deregister(utun_kctlref); + return result; + } + + /* Register the protocol plumbers */ + if ((result = proto_register_plumber(PF_INET6, utun_family, + utun_attach_proto, NULL)) != 0) { + proto_unregister_plumber(PF_INET, utun_family); + ctl_deregister(utun_kctlref); + printf("utun_register_control - proto_register_plumber(PF_INET6, %d) failed: %d\n", + utun_family, result); + return result; + } + + return 0; +} + +/* Kernel control functions */ + +static errno_t +utun_ctl_connect( + kern_ctl_ref kctlref, + struct sockaddr_ctl *sac, + void **unitinfo) +{ + struct ifnet_init_params utun_init; + struct utun_pcb *pcb; + errno_t result; + + /* kernel control allocates, interface frees */ + pcb = utun_alloc(sizeof(*pcb)); + if (pcb == NULL) + return ENOMEM; + + /* Setup the protocol control block */ + bzero(pcb, sizeof(*pcb)); + *unitinfo = pcb; + pcb->ctlref = kctlref; + pcb->unit = sac->sc_unit; + + printf("utun_ctl_connect: creating interface utun%d\n", pcb->unit - 1); + + /* Create the interface */ + bzero(&utun_init, sizeof(utun_init)); + utun_init.name = "utun"; + utun_init.unit = pcb->unit - 1; + utun_init.family = utun_family; + utun_init.type = IFT_OTHER; + utun_init.output = utun_output; + utun_init.demux = utun_demux; + utun_init.framer = utun_framer; + utun_init.add_proto = utun_add_proto; + utun_init.del_proto = utun_del_proto; + utun_init.softc = pcb; + utun_init.ioctl = utun_ioctl; + utun_init.detach = utun_detached; + + result = ifnet_allocate(&utun_init, &pcb->ifp); + if (result != 0) { + printf("utun_ctl_connect - ifnet_allocate failed: %d\n", result); + utun_free(pcb); + return result; + } + OSIncrementAtomic(&utun_ifcount); + + /* Set flags and additional information. */ + ifnet_set_mtu(pcb->ifp, 1500); + ifnet_set_flags(pcb->ifp, IFF_UP | IFF_MULTICAST | IFF_POINTOPOINT, 0xffff); + + /* The interface must generate its own IPv6 LinkLocal address, + * if possible following the recommendation of RFC2472 to the 64bit interface ID + */ + ifnet_set_eflags(pcb->ifp, IFEF_NOAUTOIPV6LL, IFEF_NOAUTOIPV6LL); + + /* Attach the interface */ + result = ifnet_attach(pcb->ifp, NULL); + if (result != 0) { + printf("utun_ctl_connect - ifnet_allocate failed: %d\n", result); + ifnet_release(pcb->ifp); + utun_free(pcb); + } + + /* Attach to bpf */ + if (result == 0) + bpfattach(pcb->ifp, DLT_NULL, 4); + + return result; +} + +static errno_t +utun_detach_ip( + ifnet_t interface, + protocol_family_t protocol, + socket_t pf_socket) +{ + errno_t result = EPROTONOSUPPORT; + + /* Attempt a detach */ + if (protocol == PF_INET) { + struct ifreq ifr; + + bzero(&ifr, sizeof(ifr)); + snprintf(ifr.ifr_name, sizeof(ifr.ifr_name), "%s%d", + ifnet_name(interface), ifnet_unit(interface)); + + result = sock_ioctl(pf_socket, SIOCPROTODETACH, &ifr); + } + else if (protocol == PF_INET6) { + struct in6_ifreq ifr6; + + bzero(&ifr6, sizeof(ifr6)); + snprintf(ifr6.ifr_name, sizeof(ifr6.ifr_name), "%s%d", + ifnet_name(interface), ifnet_unit(interface)); + + result = sock_ioctl(pf_socket, SIOCPROTODETACH_IN6, &ifr6); + } + + return result; +} + +static void +utun_remove_address( + ifnet_t interface, + protocol_family_t protocol, + ifaddr_t address, + socket_t pf_socket) +{ + errno_t result = 0; + + /* Attempt a detach */ + if (protocol == PF_INET) { + struct ifreq ifr; + + bzero(&ifr, sizeof(ifr)); + snprintf(ifr.ifr_name, sizeof(ifr.ifr_name), "%s%d", + ifnet_name(interface), ifnet_unit(interface)); + result = ifaddr_address(address, &ifr.ifr_addr, sizeof(ifr.ifr_addr)); + if (result != 0) { + printf("utun_remove_address - ifaddr_address failed: %d", result); + } + else { + result = sock_ioctl(pf_socket, SIOCDIFADDR, &ifr); + if (result != 0) { + printf("utun_remove_address - SIOCDIFADDR failed: %d", result); + } + } + } + else if (protocol == PF_INET6) { + struct in6_ifreq ifr6; + + bzero(&ifr6, sizeof(ifr6)); + snprintf(ifr6.ifr_name, sizeof(ifr6.ifr_name), "%s%d", + ifnet_name(interface), ifnet_unit(interface)); + result = ifaddr_address(address, (struct sockaddr*)&ifr6.ifr_addr, + sizeof(ifr6.ifr_addr)); + if (result != 0) { + printf("utun_remove_address - ifaddr_address failed (v6): %d", + result); + } + else { + result = sock_ioctl(pf_socket, SIOCDIFADDR_IN6, &ifr6); + if (result != 0) { + printf("utun_remove_address - SIOCDIFADDR_IN6 failed: %d", + result); + } + } + } +} + +static void +utun_cleanup_family( + ifnet_t interface, + protocol_family_t protocol) +{ + errno_t result = 0; + socket_t pf_socket = NULL; + ifaddr_t *addresses = NULL; + int i; + + if (protocol != PF_INET && protocol != PF_INET6) { + printf("utun_cleanup_family - invalid protocol family %d\n", protocol); + return; + } + + /* Create a socket for removing addresses and detaching the protocol */ + result = sock_socket(protocol, SOCK_DGRAM, 0, NULL, NULL, &pf_socket); + if (result != 0) { + if (result != EAFNOSUPPORT) + printf("utun_cleanup_family - failed to create %s socket: %d\n", + protocol == PF_INET ? "IP" : "IPv6", result); + goto cleanup; + } + + result = utun_detach_ip(interface, protocol, pf_socket); + if (result == 0 || result == ENXIO) { + /* We are done! We either detached or weren't attached. */ + goto cleanup; + } + else if (result != EBUSY) { + /* Uh, not really sure what happened here... */ + printf("utun_cleanup_family - utun_detach_ip failed: %d\n", result); + goto cleanup; + } + + /* + * At this point, we received an EBUSY error. This means there are + * addresses attached. We should detach them and then try again. + */ + result = ifnet_get_address_list_family(interface, &addresses, protocol); + if (result != 0) { + printf("fnet_get_address_list_family(%s%d, 0xblah, %s) - failed: %d\n", + ifnet_name(interface), ifnet_unit(interface), + protocol == PF_INET ? "PF_INET" : "PF_INET6", result); + goto cleanup; + } + + for (i = 0; addresses[i] != 0; i++) { + utun_remove_address(interface, protocol, addresses[i], pf_socket); + } + ifnet_free_address_list(addresses); + addresses = NULL; + + /* + * The addresses should be gone, we should try the remove again. + */ + result = utun_detach_ip(interface, protocol, pf_socket); + if (result != 0 && result != ENXIO) { + printf("utun_cleanup_family - utun_detach_ip failed: %d\n", result); + } + +cleanup: + if (pf_socket != NULL) + sock_close(pf_socket); + + if (addresses != NULL) + ifnet_free_address_list(addresses); +} + +static errno_t +utun_ctl_disconnect( + __unused kern_ctl_ref kctlref, + __unused u_int32_t unit, + void *unitinfo) +{ + struct utun_pcb *pcb = unitinfo; + ifnet_t ifp = pcb->ifp; + errno_t result = 0; + + pcb->ctlref = NULL; + pcb->unit = 0; + + /* + * We want to do everything in our power to ensure that the interface + * really goes away when the socket is closed. We must remove IP/IPv6 + * addresses and detach the protocols. Finally, we can remove and + * release the interface. + */ + utun_cleanup_family(ifp, AF_INET); + utun_cleanup_family(ifp, AF_INET6); + + if ((result = ifnet_detach(ifp)) != 0) { + printf("utun_ctl_disconnect - ifnet_detach failed: %d\n", result); + } + + if ((result = ifnet_release(ifp)) != 0) { + printf("utun_ctl_disconnect - ifnet_release failed: %d\n", result); + } + + return 0; +} + +static errno_t +utun_ctl_send( + __unused kern_ctl_ref kctlref, + __unused u_int32_t unit, + void *unitinfo, + mbuf_t m, + __unused int flags) +{ + struct utun_pcb *pcb = unitinfo; + struct ifnet_stat_increment_param incs; + errno_t result; + + mbuf_pkthdr_setrcvif(m, pcb->ifp); + + bpf_tap_in(pcb->ifp, DLT_NULL, m, 0, 0); + + if (pcb->flags & UTUN_FLAGS_NO_INPUT) { + /* flush data */ + mbuf_freem(m); + return 0; + } + + bzero(&incs, sizeof(incs)); + incs.packets_in = 1; + incs.bytes_in = mbuf_pkthdr_len(m); + result = ifnet_input(pcb->ifp, m, &incs); + if (result != 0) { + ifnet_stat_increment_in(pcb->ifp, 0, 0, 1); + printf("utun_ctl_send - ifnet_input failed: %d\n", result); + mbuf_freem(m); + } + + return 0; +} + +static errno_t +utun_ctl_setopt( + __unused kern_ctl_ref kctlref, + __unused u_int32_t unit, + void *unitinfo, + int opt, + void *data, + size_t len) +{ + struct utun_pcb *pcb = unitinfo; + errno_t result = 0; + + /* check for privileges for privileged options */ + switch (opt) { + case UTUN_OPT_FLAGS: + if (kauth_cred_issuser(kauth_cred_get()) == 0) { + return EPERM; + } + break; + } + + switch (opt) { + case UTUN_OPT_FLAGS: + if (len != sizeof(u_int32_t)) + result = EMSGSIZE; + else + pcb->flags = *(u_int32_t *)data; + break; + default: + result = ENOPROTOOPT; + break; + } + + return result; +} + +static errno_t +utun_ctl_getopt( + __unused kern_ctl_ref kctlref, + __unused u_int32_t unit, + void *unitinfo, + int opt, + void *data, + size_t *len) +{ + struct utun_pcb *pcb = unitinfo; + errno_t result = 0; + + switch (opt) { + case UTUN_OPT_FLAGS: + if (*len != sizeof(u_int32_t)) + result = EMSGSIZE; + else + *(u_int32_t *)data = pcb->flags; + break; + case UTUN_OPT_IFNAME: + *len = snprintf(data, *len, "%s%d", ifnet_name(pcb->ifp), ifnet_unit(pcb->ifp)) + 1; + break; + default: + result = ENOPROTOOPT; + break; + } + + return result; +} + +/* Network Interface functions */ +static errno_t +utun_output( + ifnet_t interface, + mbuf_t data) +{ + struct utun_pcb *pcb = ifnet_softc(interface); + errno_t result; + + bpf_tap_out(pcb->ifp, DLT_NULL, data, 0, 0); + + if (pcb->flags & UTUN_FLAGS_NO_OUTPUT) { + /* flush data */ + mbuf_freem(data); + return 0; + } + + if (pcb->ctlref) { + int length = mbuf_pkthdr_len(data); + result = ctl_enqueuembuf(pcb->ctlref, pcb->unit, data, CTL_DATA_EOR); + if (result != 0) { + mbuf_freem(data); + printf("utun_output - ctl_enqueuembuf failed: %d\n", result); + ifnet_stat_increment_out(interface, 0, 0, 1); + } + else { + ifnet_stat_increment_out(interface, 1, length, 0); + } + } + else + mbuf_freem(data); + + return 0; +} + +/* Network Interface functions */ +static errno_t +utun_demux( + __unused ifnet_t interface, + mbuf_t data, + __unused char *frame_header, + protocol_family_t *protocol) +{ + + while (data != NULL && mbuf_len(data) < 1) { + data = mbuf_next(data); + } + + if (data == NULL) + return ENOENT; + + *protocol = ntohl(*(u_int32_t *)mbuf_data(data)); + return 0; +} + +static errno_t +utun_framer( + __unused ifnet_t interface, + mbuf_t *packet, + __unused const struct sockaddr *dest, + __unused const char *desk_linkaddr, + const char *frame_type) +{ + + if (mbuf_prepend(packet, sizeof(protocol_family_t), MBUF_DONTWAIT) != 0) { + printf("utun_framer - ifnet_output prepend failed\n"); + ifnet_stat_increment_out(interface, 0, 0, 1); + // just return, because the buffer was freed in mbuf_prepend + return EJUSTRETURN; + } + + // place protocol number at the beginning of the mbuf + *(protocol_family_t *)mbuf_data(*packet) = htonl(*(protocol_family_t *)(uintptr_t)(size_t)frame_type); + + return 0; +} + +static errno_t +utun_add_proto( + __unused ifnet_t interface, + protocol_family_t protocol, + __unused const struct ifnet_demux_desc *demux_array, + __unused u_int32_t demux_count) +{ + switch(protocol) { + case PF_INET: + return 0; + case PF_INET6: + return 0; + default: + break; + } + + return ENOPROTOOPT; +} + +static errno_t +utun_del_proto( + __unused ifnet_t interface, + __unused protocol_family_t protocol) +{ + return 0; +} + +static errno_t +utun_ioctl( + __unused ifnet_t interface, + __unused u_long command, + void *data) +{ + errno_t result = 0; + struct ifaddr *ifa = (struct ifaddr *)data; + + switch(command) { + case SIOCSIFMTU: + ifnet_set_mtu(interface, ((struct ifreq*)data)->ifr_mtu); + break; + + case SIOCSIFADDR: + case SIOCAIFADDR: + /* This will be called for called for IPv6 Address additions */ + if (ifa->ifa_addr->sa_family == AF_INET6) + break; + /* Fall though for other families like IPv4 */ + + default: + result = EOPNOTSUPP; + } + + return result; +} + +static void +utun_detached( + ifnet_t interface) +{ + struct utun_pcb *pcb = ifnet_softc(interface); + + utun_free(pcb); + + OSDecrementAtomic(&utun_ifcount); +} + +/* Protocol Handlers */ + +static errno_t +utun_proto_input( + __unused ifnet_t interface, + protocol_family_t protocol, + mbuf_t m, + __unused char *frame_header) +{ + + // remove protocol family first + mbuf_adj(m, sizeof(u_int32_t)); + + proto_input(protocol, m); + + return 0; +} + +static errno_t +utun_proto_pre_output( + __unused ifnet_t interface, + protocol_family_t protocol, + __unused mbuf_t *packet, + __unused const struct sockaddr *dest, + __unused void *route, + __unused char *frame_type, + __unused char *link_layer_dest) +{ + + *(protocol_family_t *)(void *)frame_type = protocol; + return 0; +} + +static errno_t +utun_attach_proto( + ifnet_t interface, + protocol_family_t protocol) +{ + struct ifnet_attach_proto_param proto; + errno_t result; + + bzero(&proto, sizeof(proto)); + proto.input = utun_proto_input; + proto.pre_output = utun_proto_pre_output; + + result = ifnet_attach_protocol(interface, protocol, &proto); + if (result != 0 && result != EEXIST) { + printf("utun_attach_inet - ifnet_attach_protocol %d failed: %d\n", + protocol, result); + } + + return result; +} + diff --git a/bsd/net/if_utun.h b/bsd/net/if_utun.h new file mode 100644 index 000000000..a5dc22cfe --- /dev/null +++ b/bsd/net/if_utun.h @@ -0,0 +1,51 @@ +/* + * Copyright (c) 2008 Apple Inc. All rights reserved. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. The rights granted to you under the License + * may not be used to create, or enable the creation or redistribution of, + * unlawful or unlicensed copies of an Apple operating system, or to + * circumvent, violate, or enable the circumvention or violation of, any + * terms of an Apple operating system software license agreement. + * + * Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ + */ + + +#ifndef _NET_IF_UTUN_H_ +#define _NET_IF_UTUN_H_ + +#ifdef KERNEL_PRIVATE + +errno_t utun_register_control(void); + +#endif + +#ifdef PRIVATE + +#define UTUN_CONTROL_NAME "com.apple.net.utun_control" +#define UTUN_OPT_FLAGS 1 +#define UTUN_OPT_IFNAME 2 + +enum { + UTUN_FLAGS_NO_OUTPUT = 0x1, + UTUN_FLAGS_NO_INPUT = 0x2, +}; + +#endif +#endif diff --git a/bsd/net/if_var.h b/bsd/net/if_var.h index f26aebe00..67d52d0a2 100644 --- a/bsd/net/if_var.h +++ b/bsd/net/if_var.h @@ -65,16 +65,20 @@ #define _NET_IF_VAR_H_ #include +#include #include #include #include /* get TAILQ macros */ #ifdef KERNEL_PRIVATE #include #endif /* KERNEL_PRIVATE */ +#ifdef PRIVATE +#include +#endif #ifdef KERNEL #include -#endif KERNEL +#endif /* KERNEL */ #ifdef __APPLE__ #define APPLE_IF_FAM_LOOPBACK 1 @@ -257,6 +261,8 @@ struct if_data_internal { #define IF_LASTCHANGEUPTIME 1 /* lastchange: 1-uptime 0-calendar time */ struct timeval ifi_lastchange; /* time of last administrative change */ u_int32_t ifi_hwassist; /* HW offload capabilities */ + u_int32_t ifi_tso_v4_mtu; /* TCP Segment Offload IPv4 maximum segment size */ + u_int32_t ifi_tso_v6_mtu; /* TCP Segment Offload IPv6 maximum segment size */ }; #define if_mtu if_data.ifi_mtu @@ -283,6 +289,8 @@ struct if_data_internal { #define if_recvquota if_data.ifi_recvquota #define if_xmitquota if_data.ifi_xmitquota #define if_iflags if_data.ifi_iflags +#define if_tso_v4_mtu if_data.ifi_tso_v4_mtu +#define if_tso_v6_mtu if_data.ifi_tso_v6_mtu struct mbuf; struct ifaddr; @@ -306,19 +314,33 @@ struct ifnet_filter; TAILQ_HEAD(ifnet_filter_head, ifnet_filter); TAILQ_HEAD(ddesc_head_name, dlil_demux_desc); -/* bottom 16 bits reserved for hardware checksum */ -#define IF_HWASSIST_CSUM_IP 0x0001 /* will csum IP */ -#define IF_HWASSIST_CSUM_TCP 0x0002 /* will csum TCP */ -#define IF_HWASSIST_CSUM_UDP 0x0004 /* will csum UDP */ -#define IF_HWASSIST_CSUM_IP_FRAGS 0x0008 /* will csum IP fragments */ -#define IF_HWASSIST_CSUM_FRAGMENT 0x0010 /* will do IP fragmentation */ -#define IF_HWASSIST_CSUM_TCP_SUM16 0x1000 /* simple TCP Sum16 computation */ +/* All of the following IF_HWASSIST_* flags are defined + * in kpi_inteface.h as IFNET_* flags. These are redefined + * here as constants to avoid failures to build user level + * programs that can not include kpi_interface.h. It is + * important to keep this in sync with the definitions in + * kpi_interface.h. The corresponding constant for each + * definition is mentioned in the comment. + * + * Bottom 16 bits reserved for hardware checksum + */ +#define IF_HWASSIST_CSUM_IP 0x0001 /* will csum IP, IFNET_CSUM_IP */ +#define IF_HWASSIST_CSUM_TCP 0x0002 /* will csum TCP, IFNET_CSUM_TCP */ +#define IF_HWASSIST_CSUM_UDP 0x0004 /* will csum UDP, IFNET_CSUM_UDP */ +#define IF_HWASSIST_CSUM_IP_FRAGS 0x0008 /* will csum IP fragments, IFNET_CSUM_FRAGMENT */ +#define IF_HWASSIST_CSUM_FRAGMENT 0x0010 /* will do IP fragmentation, IFNET_IP_FRAGMENT */ +#define IF_HWASSIST_CSUM_TCP_SUM16 0x1000 /* simple TCP Sum16 computation, IFNET_CSUM_SUM16 */ #define IF_HWASSIST_CSUM_MASK 0xffff #define IF_HWASSIST_CSUM_FLAGS(hwassist) ((hwassist) & IF_HWASSIST_CSUM_MASK) /* VLAN support */ -#define IF_HWASSIST_VLAN_TAGGING 0x10000 /* supports VLAN tagging */ -#define IF_HWASSIST_VLAN_MTU 0x20000 /* supports VLAN MTU-sized packet (for software VLAN) */ +#define IF_HWASSIST_VLAN_TAGGING 0x00010000 /* supports VLAN tagging, IFNET_VLAN_TAGGING */ +#define IF_HWASSIST_VLAN_MTU 0x00020000 /* supports VLAN MTU-sized packet (for software VLAN), IFNET_VLAN_MTU */ + +/* TCP Segment Offloading support */ + +#define IF_HWASSIST_TSO_V4 0x00200000 /* will do TCP Segment offload for IPv4, IFNET_TSO_IPV4 */ +#define IF_HWASSIST_TSO_V6 0x00400000 /* will do TCP Segment offload for IPv6, IFNET_TSO_IPV6 */ #define IFNET_RW_LOCK 1 @@ -340,6 +362,9 @@ struct ddesc_head_str; struct proto_hash_entry; struct kev_msg; struct dlil_threading_info; +#if PF +struct pfi_kif; +#endif /* PF */ /* * Structure defining a network interface. @@ -351,12 +376,12 @@ struct ifnet { const char *if_name; /* name, e.g. ``en'' or ``lo'' */ TAILQ_ENTRY(ifnet) if_link; /* all struct ifnets are chained */ struct ifaddrhead if_addrhead; /* linked list of addresses per if */ - u_long if_refcnt; + u_int32_t if_refcnt; #ifdef __KPI_INTERFACE__ ifnet_check_multi if_check_multi; #else void* if_check_multi; -#endif __KPI_INTERFACE__ +#endif /* __KPI_INTERFACE__ */ int if_pcount; /* number of promiscuous listeners */ struct bpf_if *if_bpf; /* packet filter structure */ u_short if_index; /* numeric abbreviation for this if */ @@ -382,7 +407,7 @@ struct ifnet { ifnet_demux_func if_demux; ifnet_event_func if_event; ifnet_framer_func if_framer; - ifnet_family_t if_family; /* ulong assigned by Apple */ + ifnet_family_t if_family; /* value assigned by Apple */ #else void* if_output; void* if_ioctl; @@ -391,14 +416,14 @@ struct ifnet { void* if_demux; void* if_event; void* if_framer; - u_long if_family; /* ulong assigned by Apple */ + u_int32_t if_family; /* value assigned by Apple */ #endif struct ifnet_filter_head if_flt_head; /* End DLIL specific */ - u_long if_delayed_detach; /* need to perform delayed detach */ + u_int32_t if_delayed_detach; /* need to perform delayed detach */ void *if_private; /* private to interface */ long if_eflags; /* autoaddr, autoaddr done, etc. */ @@ -408,10 +433,10 @@ struct ifnet { #ifdef __KPI_INTERFACE__ ifnet_add_proto_func if_add_proto; ifnet_del_proto_func if_del_proto; -#else __KPI_INTERFACE__ +#else /* !__KPI_INTERFACE__ */ void* if_add_proto; void* if_del_proto; -#endif __KPI_INTERFACE__ +#endif /* !__KPI_INTERFACE__ */ struct proto_hash_entry *if_proto_hash; void *if_kpi_storage; #if 0 @@ -422,9 +447,9 @@ struct ifnet { void *unused_was_resolvemulti; struct ifqueue if_snd; - u_long unused_2[1]; + u_int32_t unused_2[1]; #ifdef __APPLE__ - u_long family_cookie; + uintptr_t family_cookie; struct ifprefixhead if_prefixhead; /* list of prefixes per if */ #ifdef _KERN_LOCKS_H_ @@ -441,7 +466,7 @@ struct ifnet { struct ifprefixhead if_prefixhead; /* list of prefixes per if */ #endif /* __APPLE__ */ struct { - u_long length; + u_int32_t length; union { u_char buffer[8]; u_char *ptr; @@ -450,13 +475,25 @@ struct ifnet { #if CONFIG_MACF_NET struct label *if_label; /* interface MAC label */ #endif + + u_int32_t if_wake_properties; +#if PF + struct thread *if_pf_curthread; + struct pfi_kif *if_pf_kif; +#endif /* PF */ +#ifdef _KERN_LOCKS_H_ + lck_mtx_t *if_fwd_route_lock; +#else + void *if_fwd_route_lock; +#endif + struct route if_fwd_route; /* cached IPv4 forwarding route */ }; #ifndef __APPLE__ /* for compatibility with other BSDs */ #define if_addrlist if_addrhead #define if_list if_link -#endif !__APPLE__ +#endif /* !__APPLE__ */ #endif /* PRIVATE */ @@ -544,9 +581,9 @@ if_enq_drop(struct ifqueue *ifq, struct mbuf *m) #ifdef MT_HEADER int if_enq_drop(struct ifqueue *, struct mbuf *); -#endif MT_HEADER +#endif /* MT_HEADER */ -#endif defined(__GNUC__) && defined(MT_HEADER) +#endif /* defined(__GNUC__) && defined(MT_HEADER) */ #endif /* KERNEL_PRIVATE */ @@ -565,21 +602,29 @@ struct ifaddr { struct sockaddr *ifa_netmask; /* used to determine subnet */ struct ifnet *ifa_ifp; /* back-pointer to interface */ TAILQ_ENTRY(ifaddr) ifa_link; /* queue macro glue */ - void (*ifa_rtrequest) /* check or clean routes (+ or -)'d */ - (int, struct rtentry *, struct sockaddr *); - u_short ifa_flags; /* mostly rt_flags for cloning */ - int ifa_refcnt;/* 32bit ref count, use ifaref, ifafree */ - int ifa_metric; /* cost of going out this interface */ -#ifdef notdef - struct rtentry *ifa_rt; /* XXXX for ROUTETOIF ????? */ -#endif - int (*ifa_claim_addr) /* check if an addr goes to this if */ - (struct ifaddr *, const struct sockaddr *); - u_long ifa_debug; /* debug flags */ + void (*ifa_rtrequest) /* check or clean routes (+ or -)'d */ + (int, struct rtentry *, struct sockaddr *); + uint32_t ifa_flags; /* mostly rt_flags for cloning */ + int32_t ifa_refcnt; /* ref count, use ifaref, ifafree */ + int32_t ifa_metric; /* cost of going out this interface */ + void (*ifa_free)(struct ifaddr *); /* callback fn for freeing */ + void (*ifa_trace) /* callback fn for tracing refs */ + (struct ifaddr *, int); + uint32_t ifa_debug; /* debug flags */ }; + +/* + * Valid values for ifa_flags + */ #define IFA_ROUTE RTF_UP /* route installed (0x1) */ #define IFA_CLONING RTF_CLONING /* (0x100) */ -#define IFA_ATTACHED 0x1 /* ifa_debug: IFA is attached to an interface */ + +/* + * Valid values for ifa_debug + */ +#define IFD_ATTACHED 0x1 /* attached to an interface */ +#define IFD_ALLOC 0x2 /* dynamically allocated */ +#define IFD_DEBUG 0x4 /* has debugging info */ #endif /* PRIVATE */ @@ -653,7 +698,7 @@ int ifioctllocked(struct socket *, u_long, caddr_t, struct proc *); struct ifnet *ifunit(const char *); struct ifnet *if_withname(struct sockaddr *); -void if_clone_attach(struct if_clone *); +int if_clone_attach(struct if_clone *); void if_clone_detach(struct if_clone *); void ifnet_lock_assert(struct ifnet *ifp, int what); @@ -687,7 +732,8 @@ void ifaref(struct ifaddr *); struct ifmultiaddr *ifmaof_ifpforaddr(const struct sockaddr *, struct ifnet *); -int ifa_foraddr(unsigned int addr); +extern struct in_ifaddr *ifa_foraddr(unsigned int); +extern struct in_ifaddr *ifa_foraddr_scoped(unsigned int, unsigned int); #ifdef BSD_KERNEL_PRIVATE enum { diff --git a/bsd/net/if_vlan.c b/bsd/net/if_vlan.c index 4a783b6d8..e1be1efd0 100644 --- a/bsd/net/if_vlan.c +++ b/bsd/net/if_vlan.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2003-2006 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2003-2008 Apple Computer, Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -350,7 +350,7 @@ ifvlan_flags_set_detaching(ifvlan_ref ifv) SYSCTL_DECL(_net_link); SYSCTL_NODE(_net_link, IFT_L2VLAN, vlan, CTLFLAG_RW|CTLFLAG_LOCKED, 0, "IEEE 802.1Q VLAN"); SYSCTL_NODE(_net_link_vlan, PF_LINK, link, CTLFLAG_RW|CTLFLAG_LOCKED, 0, "for consistency"); -#endif 0 +#endif #define M_VLAN M_DEVBUF @@ -359,7 +359,7 @@ static void vlan_clone_destroy(struct ifnet *); static int vlan_input(ifnet_t ifp, protocol_family_t protocol, mbuf_t m, char *frame_header); static int vlan_output(struct ifnet *ifp, struct mbuf *m); -static int vlan_ioctl(ifnet_t ifp, u_int32_t cmd, void * addr); +static int vlan_ioctl(ifnet_t ifp, u_long cmd, void * addr); static int vlan_set_bpf_tap(ifnet_t ifp, bpf_tap_mode mode, bpf_packet_func func); static int vlan_attach_protocol(struct ifnet *ifp); @@ -377,9 +377,9 @@ static struct if_clone vlan_cloner = IF_CLONE_INITIALIZER(VLANNAME, vlan_clone_destroy, 0, IF_MAXUNIT); -static void interface_link_event(struct ifnet * ifp, u_long event_code); +static void interface_link_event(struct ifnet * ifp, u_int32_t event_code); static void vlan_parent_link_event(vlan_parent_ref vlp, - u_long event_code); + u_int32_t event_code); extern void dlil_input_packet_list(struct ifnet *ifp, struct mbuf *m); static int @@ -771,12 +771,16 @@ vlan_parent_remove_vlan(__unused vlan_parent_ref vlp, ifvlan_ref ifv) return; } -static void +static int vlan_clone_attach(void) { - if_clone_attach(&vlan_cloner); + int error; + + error = if_clone_attach(&vlan_cloner); + if (error != 0) + return error; vlan_lock_init(); - return; + return 0; } static int @@ -792,6 +796,8 @@ vlan_clone_create(struct if_clone *ifc, int unit) return (error); } ifv = _MALLOC(sizeof(struct ifvlan), M_VLAN, M_WAITOK); + if (ifv == NULL) + return ENOBUFS; bzero(ifv, sizeof(struct ifvlan)); multicast_list_init(&ifv->ifv_multicast); @@ -832,7 +838,7 @@ vlan_clone_create(struct if_clone *ifc, int unit) /* NB: flags are not set here */ ifnet_set_link_mib_data(ifp, &ifv->ifv_mib, sizeof ifv->ifv_mib); /* NB: mtu is not set here */ -#endif 0 +#endif ifnet_set_offload(ifp, 0); ifnet_set_addrlen(ifp, ETHER_ADDR_LEN); /* XXX ethernet specific */ @@ -1126,6 +1132,7 @@ vlan_config(struct ifnet * ifp, struct ifnet * p, int tag) ifvlan_ref ifv = NULL; vlan_parent_ref new_vlp = NULL; int need_vlp_release = 0; + ifnet_offload_t offload; u_int16_t parent_flags; u_int32_t progress = 0; vlan_parent_ref vlp = NULL; @@ -1253,14 +1260,9 @@ vlan_config(struct ifnet * ifp, struct ifnet * p, int tag) ifnet_set_flags(ifp, parent_flags, IFF_BROADCAST | IFF_MULTICAST | IFF_SIMPLEX); - /* - * If the parent interface can do hardware-assisted - * VLAN encapsulation, then propagate its hardware- - * assisted checksumming flags. - */ - if (ifnet_offload(p) & IF_HWASSIST_VLAN_TAGGING) { - ifnet_set_offload(ifp, IF_HWASSIST_CSUM_FLAGS(ifnet_offload(p))); - } + /* use hwassist bits from parent interface, but exclude VLAN bits */ + offload = ifnet_offload(p) & ~(IFNET_VLAN_TAGGING | IFNET_VLAN_MTU); + ifnet_set_offload(ifp, offload); ifnet_set_flags(ifp, IFF_RUNNING, IFF_RUNNING); ifvlan_flags_set_ready(ifv); @@ -1312,7 +1314,7 @@ vlan_link_event(struct ifnet * ifp, struct ifnet * p) "%s%d", ifnet_name(p), ifnet_unit(p)); if (ifnet_ioctl(p, 0, SIOCGIFMEDIA, &ifmr) == 0 && ifmr.ifm_count > 0 && ifmr.ifm_status & IFM_AVALID) { - u_long event; + u_int32_t event; event = (ifmr.ifm_status & IFM_ACTIVE) ? KEV_DL_LINK_ON : KEV_DL_LINK_OFF; @@ -1546,12 +1548,12 @@ vlan_set_mtu(struct ifnet * ifp, int mtu) } static int -vlan_ioctl(ifnet_t ifp, u_int32_t cmd, void * data) +vlan_ioctl(ifnet_t ifp, u_long cmd, void * data) { struct ifdevmtu * devmtu_p; int error = 0; struct ifaddr * ifa; - struct ifmediareq64 * ifmr; + struct ifmediareq *ifmr; struct ifreq * ifr; ifvlan_ref ifv; struct ifnet * p; @@ -1571,8 +1573,8 @@ vlan_ioctl(ifnet_t ifp, u_int32_t cmd, void * data) ifnet_set_flags(ifp, IFF_UP, IFF_UP); break; + case SIOCGIFMEDIA32: case SIOCGIFMEDIA64: - case SIOCGIFMEDIA: vlan_lock(); ifv = (ifvlan_ref)ifnet_softc(ifp); if (ifv == NULL || ifvlan_flags_detaching(ifv)) { @@ -1581,12 +1583,12 @@ vlan_ioctl(ifnet_t ifp, u_int32_t cmd, void * data) } p = (ifv->ifv_vlp == NULL) ? NULL : ifv->ifv_vlp->vlp_ifp; vlan_unlock(); - ifmr = (struct ifmediareq64 *)data; - user_addr = proc_is64bit(current_proc()) - ? ifmr->ifm_ifmu.ifmu_ulist64 - : CAST_USER_ADDR_T(ifmr->ifm_ifmu.ifmu_ulist32); + ifmr = (struct ifmediareq *)data; + user_addr = (cmd == SIOCGIFMEDIA64) ? + ((struct ifmediareq64 *)ifmr)->ifmu_ulist : + CAST_USER_ADDR_T(((struct ifmediareq32 *)ifmr)->ifmu_ulist); if (p != NULL) { - struct ifmediareq64 p_ifmr; + struct ifmediareq p_ifmr; bzero(&p_ifmr, sizeof(p_ifmr)); error = ifnet_ioctl(p, 0, SIOCGIFMEDIA, &p_ifmr); @@ -1808,11 +1810,11 @@ vlan_event(struct ifnet * p, __unused protocol_family_t protocol, } static void -interface_link_event(struct ifnet * ifp, u_long event_code) +interface_link_event(struct ifnet * ifp, u_int32_t event_code) { struct { struct kern_event_msg header; - u_long unit; + u_int32_t unit; char if_name[IFNAMSIZ]; } event; @@ -1822,14 +1824,14 @@ interface_link_event(struct ifnet * ifp, u_long event_code) event.header.kev_subclass = KEV_DL_SUBCLASS; event.header.event_code = event_code; event.header.event_data[0] = ifnet_family(ifp); - event.unit = (u_long) ifnet_unit(ifp); + event.unit = (u_int32_t) ifnet_unit(ifp); strncpy(event.if_name, ifnet_name(ifp), IFNAMSIZ); ifnet_event(ifp, &event.header); return; } static void -vlan_parent_link_event(vlan_parent_ref vlp, u_long event_code) +vlan_parent_link_event(vlan_parent_ref vlp, u_int32_t event_code) { ifvlan_ref ifv; @@ -1919,6 +1921,7 @@ vlan_detach_inet6(struct ifnet *ifp, protocol_family_t protocol_family) } #endif /* INET6 */ +#if NETAT static errno_t vlan_attach_at(struct ifnet *ifp, protocol_family_t protocol_family) { @@ -1930,6 +1933,7 @@ vlan_detach_at(struct ifnet *ifp, protocol_family_t protocol_family) { ether_detach_at(ifp, protocol_family); } +#endif /* NETAT */ __private_extern__ int vlan_family_init(void) @@ -1952,6 +1956,7 @@ vlan_family_init(void) goto done; } #endif +#if NETAT error = proto_register_plumber(PF_APPLETALK, IFNET_FAMILY_VLAN, vlan_attach_at, vlan_detach_at); if (error != 0) { @@ -1959,7 +1964,14 @@ vlan_family_init(void) error); goto done; } - vlan_clone_attach(); +#endif /* NETAT */ + error = vlan_clone_attach(); + if (error != 0) { + printf("proto_register_plumber failed vlan_clone_attach error=%d\n", + error); + goto done; + } + done: return (error); diff --git a/bsd/net/if_vlan_var.h b/bsd/net/if_vlan_var.h index df7c97a5a..069a81d92 100644 --- a/bsd/net/if_vlan_var.h +++ b/bsd/net/if_vlan_var.h @@ -76,7 +76,7 @@ struct ether_vlan_header { /* sysctl(3) tags, for compatibility purposes */ #define VLANCTL_PROTO 1 #define VLANCTL_MAX 2 -#endif 0 +#endif /* * Configuration structure for SIOCSETVLAN and SIOCGETVLAN ioctls. @@ -88,5 +88,5 @@ struct vlanreq { #ifdef KERNEL_PRIVATE int vlan_family_init(void) __attribute__((section("__TEXT, initcode"))); -#endif KERNEL_PRIVATE +#endif /* KERNEL_PRIVATE */ #endif /* _NET_IF_VLAN_VAR_H_ */ diff --git a/bsd/net/init.c b/bsd/net/init.c index 0c2286f82..85464da74 100644 --- a/bsd/net/init.c +++ b/bsd/net/init.c @@ -73,8 +73,7 @@ net_init_add( kfree(entry, sizeof(*entry)); return EALREADY; } - } while(!OSCompareAndSwap((UInt32)entry->next, (UInt32)entry, - (UInt32*)&list_head)); + } while(!OSCompareAndSwapPtr(entry->next, entry, &list_head)); return 0; } @@ -92,8 +91,7 @@ net_init_run(void) */ do { backward_head = list_head; - } while (!OSCompareAndSwap((UInt32)backward_head, (UInt32)LIST_RAN, - (UInt32*)&list_head)); + } while (!OSCompareAndSwapPtr(backward_head, LIST_RAN, &list_head)); /* Reverse the order of the list */ while (backward_head != 0) { diff --git a/bsd/net/kpi_interface.c b/bsd/net/kpi_interface.c index d9dfca3f3..652f63aad 100644 --- a/bsd/net/kpi_interface.c +++ b/bsd/net/kpi_interface.c @@ -47,6 +47,8 @@ #include #include +#include "net/net_str_id.h" + #if IF_LASTCHANGEUPTIME #define TOUCHLASTCHANGE(__if_lastchange) microuptime(__if_lastchange) #else @@ -189,7 +191,7 @@ ifnet_reference( if (ifp == NULL) return EINVAL; - oldval = OSIncrementAtomic((SInt32 *)&ifp->if_refcnt); + oldval = OSIncrementAtomic(&ifp->if_refcnt); return 0; } @@ -202,13 +204,22 @@ ifnet_release( if (ifp == NULL) return EINVAL; - oldval = OSDecrementAtomic((SInt32*)&ifp->if_refcnt); + oldval = OSDecrementAtomic(&ifp->if_refcnt); if (oldval == 0) panic("ifnet_release - refcount decremented past zero!"); return 0; } +errno_t +ifnet_interface_family_find(const char *module_string, ifnet_family_t *family_id) +{ + if (module_string == NULL || family_id == NULL) + return EINVAL; + return net_str_id_find_internal(module_string, family_id, NSI_IF_FAM_ID, 1); + +} + void* ifnet_softc( ifnet_t interface) @@ -303,7 +314,7 @@ ifnet_eflags( static const ifnet_offload_t offload_mask = IFNET_CSUM_IP | IFNET_CSUM_TCP | IFNET_CSUM_UDP | IFNET_CSUM_FRAGMENT | IFNET_IP_FRAGMENT | IFNET_CSUM_SUM16 | IFNET_VLAN_TAGGING | IFNET_VLAN_MTU | - IFNET_MULTIPAGES; + IFNET_MULTIPAGES | IFNET_TSO_IPV4 | IFNET_TSO_IPV6; errno_t ifnet_set_offload( @@ -329,6 +340,127 @@ ifnet_offload( return interface == NULL ? 0 : (interface->if_hwassist & offload_mask); } +errno_t +ifnet_set_tso_mtu( + ifnet_t interface, + sa_family_t family, + u_int32_t mtuLen) +{ + errno_t error = 0; + + if (interface == NULL) return EINVAL; + + if (mtuLen < interface->if_mtu) + return EINVAL; + + + switch (family) { + + case AF_INET: + if (interface->if_hwassist & IFNET_TSO_IPV4) + interface->if_tso_v4_mtu = mtuLen; + else + error = EINVAL; + break; + + case AF_INET6: + if (interface->if_hwassist & IFNET_TSO_IPV6) + interface->if_tso_v6_mtu = mtuLen; + else + error = EINVAL; + break; + + default: + error = EPROTONOSUPPORT; + } + + return error; +} + +errno_t +ifnet_get_tso_mtu( + ifnet_t interface, + sa_family_t family, + u_int32_t *mtuLen) +{ + errno_t error = 0; + + if (interface == NULL || mtuLen == NULL) return EINVAL; + + switch (family) { + + case AF_INET: + if (interface->if_hwassist & IFNET_TSO_IPV4) + *mtuLen = interface->if_tso_v4_mtu; + else + error = EINVAL; + break; + + case AF_INET6: + if (interface->if_hwassist & IFNET_TSO_IPV6) + *mtuLen = interface->if_tso_v6_mtu; + else + error = EINVAL; + break; + default: + error = EPROTONOSUPPORT; + } + + return error; +} + +errno_t +ifnet_set_wake_flags(ifnet_t interface, u_int32_t properties, u_int32_t mask) +{ + int lock; + struct kev_msg ev_msg; + struct net_event_data ev_data; + + if (interface == NULL) + return EINVAL; + + /* Do not accept wacky values */ + if ((properties & mask) & ~IF_WAKE_VALID_FLAGS) + return EINVAL; + + lock = (interface->if_lock != 0); + + if (lock) + ifnet_lock_exclusive(interface); + + interface->if_wake_properties = (properties & mask) | (interface->if_wake_properties & ~mask); + + if (lock) + ifnet_lock_done(interface); + + (void) ifnet_touch_lastchange(interface); + + /* Notify application of the change */ + ev_msg.vendor_code = KEV_VENDOR_APPLE; + ev_msg.kev_class = KEV_NETWORK_CLASS; + ev_msg.kev_subclass = KEV_DL_SUBCLASS; + + ev_msg.event_code = KEV_DL_WAKEFLAGS_CHANGED; + strlcpy(&ev_data.if_name[0], interface->if_name, IFNAMSIZ); + ev_data.if_family = interface->if_family; + ev_data.if_unit = (u_int32_t) interface->if_unit; + ev_msg.dv[0].data_length = sizeof(struct net_event_data); + ev_msg.dv[0].data_ptr = &ev_data; + ev_msg.dv[1].data_length = 0; + kev_post_msg(&ev_msg); + + return 0; +} + +u_int32_t +ifnet_get_wake_flags(ifnet_t interface) +{ + return interface == NULL ? 0 : interface->if_wake_properties; +} + + + + /* * Should MIB data store a copy? */ diff --git a/bsd/net/kpi_interface.h b/bsd/net/kpi_interface.h index dd3101b4a..3b1ddd671 100644 --- a/bsd/net/kpi_interface.h +++ b/bsd/net/kpi_interface.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2004 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2008 Apple Computer, Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -69,21 +69,21 @@ struct ifnet_demux_desc; */ enum { - IFNET_FAMILY_ANY = 0, - IFNET_FAMILY_LOOPBACK = 1, - IFNET_FAMILY_ETHERNET = 2, - IFNET_FAMILY_SLIP = 3, - IFNET_FAMILY_TUN = 4, - IFNET_FAMILY_VLAN = 5, - IFNET_FAMILY_PPP = 6, - IFNET_FAMILY_PVC = 7, - IFNET_FAMILY_DISC = 8, - IFNET_FAMILY_MDECAP = 9, - IFNET_FAMILY_GIF = 10, - IFNET_FAMILY_FAITH = 11, - IFNET_FAMILY_STF = 12, - IFNET_FAMILY_FIREWIRE = 13, - IFNET_FAMILY_BOND = 14 + IFNET_FAMILY_ANY = 0, + IFNET_FAMILY_LOOPBACK = 1, + IFNET_FAMILY_ETHERNET = 2, + IFNET_FAMILY_SLIP = 3, + IFNET_FAMILY_TUN = 4, + IFNET_FAMILY_VLAN = 5, + IFNET_FAMILY_PPP = 6, + IFNET_FAMILY_PVC = 7, + IFNET_FAMILY_DISC = 8, + IFNET_FAMILY_MDECAP = 9, + IFNET_FAMILY_GIF = 10, + IFNET_FAMILY_FAITH = 11, + IFNET_FAMILY_STF = 12, + IFNET_FAMILY_FIREWIRE = 13, + IFNET_FAMILY_BOND = 14 }; /*! @typedef ifnet_family_t @@ -103,10 +103,10 @@ typedef u_int32_t ifnet_family_t; */ enum { - BPF_MODE_DISABLED = 0, - BPF_MODE_INPUT = 1, - BPF_MODE_OUTPUT = 2, - BPF_MODE_INPUT_OUTPUT = 3 + BPF_MODE_DISABLED = 0, + BPF_MODE_INPUT = 1, + BPF_MODE_OUTPUT = 2, + BPF_MODE_INPUT_OUTPUT = 3 }; /*! @typedef bpf_tap_mode @@ -143,20 +143,28 @@ typedef u_int32_t protocol_family_t; with the getPhysicalSegmentsWithCoalesce interfaces and enumerate the list of vectors should set this flag for possible gain in performance during bulk data transfer. + @constant IFNET_TSO_IPV4 Hardware supports IPv4 TCP Segment Offloading. + If the Interface driver sets this flag, TCP will send larger frames (up to 64KB) as one + frame to the adapter which will perform the final packetization. The maximum TSO segment + supported by the interface can be set with "ifnet_set_tso_mtu". To retreive the real MTU + for the TCP connection the function "mbuf_get_tso_requested" is used by the driver. + @constant IFNET_TSO_IPV6 Hardware supports IPv6 TCP Segment Offloading. */ enum { - IFNET_CSUM_IP = 0x00000001, - IFNET_CSUM_TCP = 0x00000002, - IFNET_CSUM_UDP = 0x00000004, - IFNET_CSUM_FRAGMENT = 0x00000008, - IFNET_IP_FRAGMENT = 0x00000010, + IFNET_CSUM_IP = 0x00000001, + IFNET_CSUM_TCP = 0x00000002, + IFNET_CSUM_UDP = 0x00000004, + IFNET_CSUM_FRAGMENT = 0x00000008, + IFNET_IP_FRAGMENT = 0x00000010, #ifdef KERNEL_PRIVATE - IFNET_CSUM_SUM16 = 0x00001000, -#endif - IFNET_VLAN_TAGGING = 0x00010000, - IFNET_VLAN_MTU = 0x00020000, - IFNET_MULTIPAGES = 0x00100000, + IFNET_CSUM_SUM16 = 0x00001000, +#endif /* KERNEL_PRIVATE */ + IFNET_VLAN_TAGGING = 0x00010000, + IFNET_VLAN_MTU = 0x00020000, + IFNET_MULTIPAGES = 0x00100000, + IFNET_TSO_IPV4 = 0x00200000, + IFNET_TSO_IPV6 = 0x00400000, }; /*! @typedef ifnet_offload_t @@ -171,7 +179,7 @@ typedef u_int32_t ifnet_offload_t; */ /*! @typedef bpf_packet_func - + @discussion bpf_packet_func The bpf_packet_func is used to intercept inbound and outbound packets. The tap function will never free the mbuf. The tap function will only copy the mbuf in to various @@ -185,7 +193,7 @@ typedef errno_t (*bpf_packet_func)(ifnet_t interface, mbuf_t data); /*! @typedef ifnet_output_func - + @discussion ifnet_output_func is used to transmit packets. The stack will pass fully formed packets, including frame header, to the ifnet_output function for an interface. The driver is @@ -200,7 +208,7 @@ typedef errno_t (*ifnet_output_func)(ifnet_t interface, mbuf_t data); @typedef ifnet_ioctl_func @discussion ifnet_ioctl_func is used to communicate ioctls from the stack to the driver. - + All undefined ioctls are reserved for future use by Apple. If you need to communicate with your kext using an ioctl, please use SIOCSIFKPI and SIOCGIFKPI. @@ -210,7 +218,8 @@ typedef errno_t (*ifnet_output_func)(ifnet_t interface, mbuf_t data); @param cmd The ioctl command. @param data A pointer to any data related to the ioctl. */ -typedef errno_t (*ifnet_ioctl_func)(ifnet_t interface, u_int32_t cmd, void *data); +typedef errno_t (*ifnet_ioctl_func)(ifnet_t interface, unsigned long cmd, + void *data); /*! @typedef ifnet_set_bpf_tap @@ -218,7 +227,7 @@ typedef errno_t (*ifnet_ioctl_func)(ifnet_t interface, u_int32_t cmd, void *data for all packets. */ typedef errno_t (*ifnet_set_bpf_tap)(ifnet_t interface, bpf_tap_mode mode, - bpf_packet_func callback); + bpf_packet_func callback); /*! @typedef ifnet_detached_func @@ -239,12 +248,13 @@ typedef void (*ifnet_detached_func)(ifnet_t interface); /*! @typedef ifnet_demux_func - @discussion ifnet_demux_func is called for each inbound packet to determine - which protocol family the packet belongs to. This information is then - used by the stack to determine which protocol to pass the packet to. - This function may return protocol families for protocols that are - not attached. If the protocol family has not been attached to the - interface, the packet will be discarded. + @discussion ifnet_demux_func is called for each inbound packet to + determine which protocol family the packet belongs to. This + information is then used by the stack to determine which + protocol to pass the packet to. This function may return + protocol families for protocols that are not attached. If the + protocol family has not been attached to the interface, the + packet will be discarded. @param interface The interface the packet was received on. @param packet The mbuf containing the packet. @param frame_header A pointer to the frame header. @@ -252,12 +262,13 @@ typedef void (*ifnet_detached_func)(ifnet_t interface); packet should be stored here. @result If the result is zero, processing will continue normally. - If the result is EJUSTRETURN, processing will stop but the packet will not be freed. - If the result is anything else, the processing will stop and the packet will be freed. + If the result is EJUSTRETURN, processing will stop but the + packet will not be freed. + If the result is anything else, the processing will stop and + the packet will be freed. */ typedef errno_t (*ifnet_demux_func)(ifnet_t interface, mbuf_t packet, - char *frame_header, - protocol_family_t *protocol_family); + char *frame_header, protocol_family_t *protocol_family); /*! @typedef ifnet_event_func @@ -286,13 +297,14 @@ typedef void (*ifnet_event_func)(ifnet_t interface, const struct kev_msg *msg); pre-output function. @result If the result is zero, processing will continue normally. - If the result is EJUSTRETURN, processing will stop but the packet will not be freed. - If the result is anything else, the processing will stop and the packet will be freed. + If the result is EJUSTRETURN, processing will stop but the + packet will not be freed. + If the result is anything else, the processing will stop and + the packet will be freed. */ typedef errno_t (*ifnet_framer_func)(ifnet_t interface, mbuf_t *packet, - const struct sockaddr *dest, - const char *desk_linkaddr, - const char *frame_type); + const struct sockaddr *dest, const char *desk_linkaddr, + const char *frame_type); /*! @typedef ifnet_add_proto_func @@ -308,12 +320,12 @@ typedef errno_t (*ifnet_framer_func)(ifnet_t interface, mbuf_t *packet, @param demux_count The number of demux descriptors in the array. @result If the result is zero, processing will continue normally. - If the result is anything else, the add protocol will be aborted. + If the result is anything else, the add protocol will be + aborted. */ typedef errno_t (*ifnet_add_proto_func)(ifnet_t interface, - protocol_family_t protocol_family, - const struct ifnet_demux_desc *demux_array, - u_int32_t demux_count); + protocol_family_t protocol_family, + const struct ifnet_demux_desc *demux_array, u_int32_t demux_count); /*! @typedef if_del_proto_func @@ -329,7 +341,7 @@ typedef errno_t (*ifnet_add_proto_func)(ifnet_t interface, and the error will be returned to the caller. */ typedef errno_t (*ifnet_del_proto_func)(ifnet_t interface, - protocol_family_t protocol_family); + protocol_family_t protocol_family); /*! @typedef ifnet_check_multi @@ -337,7 +349,7 @@ typedef errno_t (*ifnet_del_proto_func)(ifnet_t interface, added to an interface. This gives the interface an opportunity to reject invalid multicast addresses before they are attached to the interface. - + To prevent an address from being added to your multicast list, return EADDRNOTAVAIL. If you don't know how to parse/translate the address, return EOPNOTSUPP. @@ -348,7 +360,7 @@ typedef errno_t (*ifnet_del_proto_func)(ifnet_t interface, EOPNOTSUPP for addresses the interface does not understand. */ typedef errno_t (*ifnet_check_multi)(ifnet_t interface, - const struct sockaddr* mcast); + const struct sockaddr *mcast); /*! @typedef proto_media_input @@ -360,33 +372,34 @@ typedef errno_t (*ifnet_check_multi)(ifnet_t interface, @param packet The packet being input. @param header The frame header. @result - If the result is zero, the caller will assume the packet was passed - to the protocol. - If the result is non-zero and not EJUSTRETURN, the caller will free - the packet. + If the result is zero, the caller will assume the packet was + passed to the protocol. + If the result is non-zero and not EJUSTRETURN, the caller will + free the packet. */ typedef errno_t (*proto_media_input)(ifnet_t ifp, protocol_family_t protocol, - mbuf_t packet, char* header); + mbuf_t packet, char *header); /*! @typedef proto_media_input_v2 @discussion proto_media_input_v2 is called for all inbound packets for a specific protocol on a specific interface. This function is registered on an interface using ifnet_attach_protocolv2. - proto_media_input_v2 differs from proto_media_input in that it will - be called for a list of packets instead of once for each individual - packet. The frame header can be retrieved using mbuf_pkthdr_header. + proto_media_input_v2 differs from proto_media_input in that it + will be called for a list of packets instead of once for each + individual packet. The frame header can be retrieved using + mbuf_pkthdr_header. @param ifp The interface the packet was received on. @param protocol_family The protocol of the packet received. @param packet The packet being input. @result - If the result is zero, the caller will assume the packets were passed - to the protocol. - If the result is non-zero and not EJUSTRETURN, the caller will free - the packets. + If the result is zero, the caller will assume the packets were + passed to the protocol. + If the result is non-zero and not EJUSTRETURN, the caller will + free the packets. */ typedef errno_t (*proto_media_input_v2)(ifnet_t ifp, protocol_family_t protocol, - mbuf_t packet); + mbuf_t packet); /*! @typedef proto_media_preout @@ -409,8 +422,8 @@ typedef errno_t (*proto_media_input_v2)(ifnet_t ifp, protocol_family_t protocol, caller. */ typedef errno_t (*proto_media_preout)(ifnet_t ifp, protocol_family_t protocol, - mbuf_t *packet, const struct sockaddr *dest, - void *route, char *frame_type, char *link_layer_dest); + mbuf_t *packet, const struct sockaddr *dest, void *route, char *frame_type, + char *link_layer_dest); /*! @typedef proto_media_event @@ -421,7 +434,7 @@ typedef errno_t (*proto_media_preout)(ifnet_t ifp, protocol_family_t protocol, @param kev_msg The event. */ typedef void (*proto_media_event)(ifnet_t ifp, protocol_family_t protocol, - const struct kev_msg *event); + const struct kev_msg *event); /*! @typedef proto_media_ioctl @@ -433,7 +446,7 @@ typedef void (*proto_media_event)(ifnet_t ifp, protocol_family_t protocol, EOPNOTSUPP, other parts of the stack may not get an opportunity to process the ioctl. If you return EJUSTRETURN, processing will stop and a result of zero will be returned to the caller. - + All undefined ioctls are reserved for future use by Apple. If you need to communicate with your kext using an ioctl, please use SIOCSIFKPI and SIOCGIFKPI. @@ -445,7 +458,7 @@ typedef void (*proto_media_event)(ifnet_t ifp, protocol_family_t protocol, See the discussion. */ typedef errno_t (*proto_media_ioctl)(ifnet_t ifp, protocol_family_t protocol, - u_int32_t command, void* argument); + unsigned long command, void *argument); /*! @typedef proto_media_detached @@ -458,7 +471,6 @@ typedef errno_t (*proto_media_ioctl)(ifnet_t ifp, protocol_family_t protocol, */ typedef errno_t (*proto_media_detached)(ifnet_t ifp, protocol_family_t protocol); - /*! @typedef proto_media_resolve_multi @discussion proto_media_resolve_multi is called to resolve a @@ -471,8 +483,8 @@ typedef errno_t (*proto_media_detached)(ifnet_t ifp, protocol_family_t protocol) @result Return zero on success or an errno error value on failure. */ typedef errno_t (*proto_media_resolve_multi)(ifnet_t ifp, - const struct sockaddr *proto_addr, - struct sockaddr_dl *out_ll, size_t ll_len); + const struct sockaddr *proto_addr, struct sockaddr_dl *out_ll, + size_t ll_len); /*! @typedef proto_media_send_arp @@ -500,12 +512,9 @@ typedef errno_t (*proto_media_resolve_multi)(ifnet_t ifp, NULL. @result Return zero on success or an errno error value on failure. */ -typedef errno_t (*proto_media_send_arp)(ifnet_t ifp, - u_short arpop, - const struct sockaddr_dl* sender_hw, - const struct sockaddr* sender_proto, - const struct sockaddr_dl* target_hw, - const struct sockaddr* target_proto); +typedef errno_t (*proto_media_send_arp)(ifnet_t ifp, u_short arpop, + const struct sockaddr_dl *sender_hw, const struct sockaddr *sender_proto, + const struct sockaddr_dl *target_hw, const struct sockaddr *target_proto); /*! @struct ifnet_stat_increment_param @@ -520,16 +529,15 @@ typedef errno_t (*proto_media_send_arp)(ifnet_t ifp, @field collisions The number of collisions seen by this interface. @field dropped The number of packets dropped. */ - struct ifnet_stat_increment_param { u_int32_t packets_in; u_int32_t bytes_in; u_int32_t errors_in; - + u_int32_t packets_out; u_int32_t bytes_out; u_int32_t errors_out; - + u_int32_t collisions; u_int32_t dropped; }; @@ -549,46 +557,50 @@ struct ifnet_stat_increment_param { @field type The interface type (see sys/if_types.h). Must be less than 256. For new types, use IFT_OTHER. @field output The output function for the interface. Every packet the - stack attempts to send through this interface will go out through - this function. + stack attempts to send through this interface will go out + through this function. @field demux The function used to determine the protocol family of an incoming packet. - @field add_proto The function used to attach a protocol to this interface. - @field del_proto The function used to remove a protocol from this interface. + @field add_proto The function used to attach a protocol to this + interface. + @field del_proto The function used to remove a protocol from this + interface. @field framer The function used to frame outbound packets, may be NULL. - @field softc Driver specific storage. This value can be retrieved from the - ifnet using the ifnet_softc function. + @field softc Driver specific storage. This value can be retrieved from + the ifnet using the ifnet_softc function. @field ioctl The function used to handle ioctls. @field set_bpf_tap The function used to set the bpf_tap function. - @field detach The function called to let the driver know the interface has been detached. - @field event The function to notify the interface of various interface specific kernel events. - @field broadcast_addr The link-layer broadcast address for this interface. + @field detach The function called to let the driver know the interface + has been detached. + @field event The function to notify the interface of various interface + specific kernel events. + @field broadcast_addr The link-layer broadcast address for this + interface. @field broadcast_len The length of the link-layer broadcast address. */ - struct ifnet_init_params { /* used to match recycled interface */ - const void* uniqueid; /* optional */ - u_int32_t uniqueid_len; /* optional */ - + const void *uniqueid; /* optional */ + u_int32_t uniqueid_len; /* optional */ + /* used to fill out initial values for interface */ - const char* name; /* required */ - u_int32_t unit; /* required */ - ifnet_family_t family; /* required */ - u_int32_t type; /* required */ - ifnet_output_func output; /* required */ - ifnet_demux_func demux; /* required */ + const char *name; /* required */ + u_int32_t unit; /* required */ + ifnet_family_t family; /* required */ + u_int32_t type; /* required */ + ifnet_output_func output; /* required */ + ifnet_demux_func demux; /* required */ ifnet_add_proto_func add_proto; /* required */ ifnet_del_proto_func del_proto; /* required */ - ifnet_check_multi check_multi; /* required for non point-to-point interfaces */ - ifnet_framer_func framer; /* optional */ - void* softc; /* optional */ - ifnet_ioctl_func ioctl; /* optional */ - ifnet_set_bpf_tap set_bpf_tap; /* deprecated */ - ifnet_detached_func detach; /* optional */ - ifnet_event_func event; /* optional */ - const void *broadcast_addr;/* required for non point-to-point interfaces */ - u_int32_t broadcast_len; /* required for non point-to-point interfaces */ + ifnet_check_multi check_multi; /* required for non point-to-point interfaces */ + ifnet_framer_func framer; /* optional */ + void *softc; /* optional */ + ifnet_ioctl_func ioctl; /* optional */ + ifnet_set_bpf_tap set_bpf_tap; /* deprecated */ + ifnet_detached_func detach; /* optional */ + ifnet_event_func event; /* optional */ + const void *broadcast_addr; /* required for non point-to-point interfaces */ + u_int32_t broadcast_len; /* required for non point-to-point interfaces */ }; /*! @@ -604,18 +616,17 @@ struct ifnet_init_params { @field collisions The number of collisions seen by this interface. @field dropped The number of packets dropped. */ - struct ifnet_stats_param { u_int64_t packets_in; u_int64_t bytes_in; u_int64_t multicasts_in; u_int64_t errors_in; - + u_int64_t packets_out; u_int64_t bytes_out; u_int64_t multicasts_out; u_int64_t errors_out; - + u_int64_t collisions; u_int64_t dropped; u_int64_t no_protocol; @@ -636,10 +647,9 @@ struct ifnet_stats_param { @field datalen The number of bytes of data used to describe the packet. */ - struct ifnet_demux_desc { u_int32_t type; - void* data; + void *data; u_int32_t datalen; }; @@ -663,26 +673,26 @@ struct ifnet_demux_desc { #endif /* KERNEL_PRIVATE */ struct ifnet_attach_proto_param { - struct ifnet_demux_desc *demux_array; /* interface may/may not require */ - u_int32_t demux_count; /* interface may/may not require */ - - proto_media_input input; /* required */ - proto_media_preout pre_output; /* required */ - proto_media_event event; /* optional */ - proto_media_ioctl ioctl; /* optional */ + struct ifnet_demux_desc *demux_array; /* interface may/may not require */ + u_int32_t demux_count; /* interface may/may not require */ + + proto_media_input input; /* required */ + proto_media_preout pre_output; /* required */ + proto_media_event event; /* optional */ + proto_media_ioctl ioctl; /* optional */ proto_media_detached detached; /* optional */ proto_media_resolve_multi resolve; /* optional */ proto_media_send_arp send_arp; /* optional */ }; struct ifnet_attach_proto_param_v2 { - struct ifnet_demux_desc *demux_array; /* interface may/may not require */ - u_int32_t demux_count; /* interface may/may not require */ - + struct ifnet_demux_desc *demux_array; /* interface may/may not require */ + u_int32_t demux_count; /* interface may/may not require */ + proto_media_input_v2 input; /* required */ - proto_media_preout pre_output; /* required */ - proto_media_event event; /* optional */ - proto_media_ioctl ioctl; /* optional */ + proto_media_preout pre_output; /* required */ + proto_media_event event; /* optional */ + proto_media_ioctl ioctl; /* optional */ proto_media_detached detached; /* optional */ proto_media_resolve_multi resolve; /* optional */ proto_media_send_arp send_arp; /* optional */ @@ -716,7 +726,8 @@ __BEGIN_DECLS if an interface with the same uniqueid and family has already been allocated and is in use. */ -errno_t ifnet_allocate(const struct ifnet_init_params *init, ifnet_t *interface); +extern errno_t ifnet_allocate(const struct ifnet_init_params *init, + ifnet_t *interface); /*! @function ifnet_reference @@ -726,7 +737,7 @@ errno_t ifnet_allocate(const struct ifnet_init_params *init, ifnet_t *interface) @param interface The interface to increment the reference count of. @result May return EINVAL if the interface is not valid. */ -errno_t ifnet_reference(ifnet_t interface); +extern errno_t ifnet_reference(ifnet_t interface); /*! @function ifnet_release @@ -736,7 +747,7 @@ errno_t ifnet_reference(ifnet_t interface); and possibly free. @result May return EINVAL if the interface is not valid. */ -errno_t ifnet_release(ifnet_t interface); +extern errno_t ifnet_release(ifnet_t interface); /*! @function ifnet_attach @@ -744,7 +755,7 @@ errno_t ifnet_release(ifnet_t interface); interface must be setup properly before calling attach. The stack will take a reference on the interface and hold it until ifnet_detach is called. - + This function is intended to be called by the driver. A kext must not call this function on an interface the kext does not own. @@ -756,34 +767,54 @@ errno_t ifnet_release(ifnet_t interface); @result Will return an error if there is anything wrong with the interface. */ -errno_t ifnet_attach(ifnet_t interface, const struct sockaddr_dl *ll_addr); +extern errno_t ifnet_attach(ifnet_t interface, + const struct sockaddr_dl *ll_addr); /*! @function ifnet_detach @discussion Detaches the interface. - + Call this to indicate this interface is no longer valid (i.e. PC Card was removed). This function will begin the process of removing knowledge of this interface from the stack. - + The function will return before the interface is detached. The functions you supplied in to the interface may continue to be called. When the detach has been completed, your detached function will be called. Your kext must not unload until the detached function has been called. The interface will be properly freed when the reference count reaches zero. - + An interface may not be attached again. You must call ifnet_allocate to create a new interface to attach. - + This function is intended to be called by the driver. A kext must not call this function on an interface the kext does not own. @param interface The interface to detach. @result 0 on success, otherwise errno error. */ -errno_t ifnet_detach(ifnet_t interface); - +extern errno_t ifnet_detach(ifnet_t interface); + +/*! + @function ifnet_interface_family_find + @discussion Look up the interface family identifier for a string. + If there is no interface family identifier assigned for this string + a new interface family identifier is created and assigned. + It is recommended to use the bundle id of the KEXT as the string + to avoid collisions with other KEXTs. + The lookup operation is not optimized so a module should call this + function once during startup and cache the interface family identifier. + The interface family identifier for a string will not be re-assigned until + the system reboots. + @param module_string A unique string identifying your interface family + @param family_id Upon return, a unique interface family identifier for use with + ifnet_* functions. This identifier is valid until the system + is rebooted. + @result 0 on success, otherwise errno error. +*/ +extern errno_t ifnet_interface_family_find(const char *module_string, ifnet_family_t *family_id); + /* * Interface manipulation. */ @@ -794,7 +825,7 @@ errno_t ifnet_detach(ifnet_t interface); @param interface Interface to retrieve the storage from. @result Driver's private storage. */ -void* ifnet_softc(ifnet_t interface); +extern void *ifnet_softc(ifnet_t interface); /*! @function ifnet_name @@ -802,7 +833,7 @@ void* ifnet_softc(ifnet_t interface); @param interface Interface to retrieve the name from. @result Pointer to the name. */ -const char* ifnet_name(ifnet_t interface); +extern const char *ifnet_name(ifnet_t interface); /*! @function ifnet_family @@ -810,7 +841,7 @@ const char* ifnet_name(ifnet_t interface); @param interface Interface to retrieve the unit number from. @result Unit number. */ -ifnet_family_t ifnet_family(ifnet_t interface); +extern ifnet_family_t ifnet_family(ifnet_t interface); /*! @function ifnet_unit @@ -818,7 +849,7 @@ ifnet_family_t ifnet_family(ifnet_t interface); @param interface Interface to retrieve the unit number from. @result Unit number. */ -u_int32_t ifnet_unit(ifnet_t interface); +extern u_int32_t ifnet_unit(ifnet_t interface); /*! @function ifnet_index @@ -830,7 +861,7 @@ u_int32_t ifnet_unit(ifnet_t interface); @param interface Interface to retrieve the index of. @result Index. */ -u_int32_t ifnet_index(ifnet_t interface); +extern u_int32_t ifnet_index(ifnet_t interface); /*! @function ifnet_set_flags @@ -844,7 +875,8 @@ u_int32_t ifnet_index(ifnet_t interface); flags are defined in net/if.h @result 0 on success otherwise the errno error. */ -errno_t ifnet_set_flags(ifnet_t interface, u_int16_t new_flags, u_int16_t mask); +extern errno_t ifnet_set_flags(ifnet_t interface, u_int16_t new_flags, + u_int16_t mask); /*! @function ifnet_flags @@ -852,7 +884,7 @@ errno_t ifnet_set_flags(ifnet_t interface, u_int16_t new_flags, u_int16_t mask); @param interface Interface to retrieve the flags from. @result Flags. These flags are defined in net/if.h */ -u_int16_t ifnet_flags(ifnet_t interface); +extern u_int16_t ifnet_flags(ifnet_t interface); #ifdef KERNEL_PRIVATE @@ -869,7 +901,8 @@ u_int16_t ifnet_flags(ifnet_t interface); @param mask The mask of flags to be modified. @result 0 on success otherwise the errno error. */ -errno_t ifnet_set_eflags(ifnet_t interface, u_int32_t new_flags, u_int32_t mask); +extern errno_t ifnet_set_eflags(ifnet_t interface, u_int32_t new_flags, + u_int32_t mask); /*! @function ifnet_eflags @@ -877,8 +910,8 @@ errno_t ifnet_set_eflags(ifnet_t interface, u_int32_t new_flags, u_int32_t mask) @param interface Interface to retrieve the flags from. @result Extended flags. These flags are defined in net/if.h */ -u_int32_t ifnet_eflags(ifnet_t interface); -#endif +extern u_int32_t ifnet_eflags(ifnet_t interface); +#endif /* KERNEL_PRIVATE */ /*! @function ifnet_set_offload @@ -892,7 +925,7 @@ u_int32_t ifnet_eflags(ifnet_t interface); @param mask The mask of flags to be modified. @result 0 on success otherwise the errno error. */ -errno_t ifnet_set_offload(ifnet_t interface, ifnet_offload_t offload); +extern errno_t ifnet_set_offload(ifnet_t interface, ifnet_offload_t offload); /*! @function ifnet_offload @@ -901,7 +934,62 @@ errno_t ifnet_set_offload(ifnet_t interface, ifnet_offload_t offload); @param interface Interface to retrieve the offload from. @result Abilities flags, see ifnet_offload_t. */ -ifnet_offload_t ifnet_offload(ifnet_t interface); +extern ifnet_offload_t ifnet_offload(ifnet_t interface); + +/*! + @function ifnet_set_tso_mtu + @discussion Sets maximum TCP Segmentation Offload segment size for + the interface + @param interface The interface. + @param family The family for which the offload MTU is provided for + (AF_INET or AF_INET6) + @param mtuLen Maximum segment size supported by the interface + @result 0 on success otherwise the errno error. +*/ +extern errno_t ifnet_set_tso_mtu(ifnet_t interface, sa_family_t family, + u_int32_t mtuLen); + +/*! + @function ifnet_get_tso_mtu + @discussion Returns maximum TCP Segmentation Offload segment size for + the interface + @param interface The interface. + @param family The family for which the offload MTU is provided for + (AF_INET or AF_INET6) + @param mtuLen Value of the maximum MTU supported for the interface + and family provided. + @result 0 on success otherwise the errno error. + */ +extern errno_t ifnet_get_tso_mtu(ifnet_t interface, sa_family_t family, + u_int32_t *mtuLen); + +/*! + @enum Interface wake properties + @abstract Constants defining Interface wake properties. + @constant IFNET_WAKE_ON_MAGIC_PACKET Wake on Magic Packet. +*/ +enum { + IFNET_WAKE_ON_MAGIC_PACKET = 0x01 +}; + +/*! + @function ifnet_set_wake_flags + @discussion Sets the wake properties of the underlying hardware. These are + typically set by the driver. + @param interface The interface. + @param properties Properties to set or unset. + @param mask Mask of the properties to set of unset. + @result 0 on success otherwise the errno error. +*/ +extern errno_t ifnet_set_wake_flags(ifnet_t interface, u_int32_t properties, u_int32_t mask); + +/*! + @function ifnet_get_wake_flags + @discussion Returns the wake properties set on the interface. + @param interface The interface. + @result The wake properties +*/ +extern u_int32_t ifnet_get_wake_flags(ifnet_t interface); /*! @function ifnet_set_link_mib_data @@ -910,7 +998,7 @@ ifnet_offload_t ifnet_offload(ifnet_t interface); whenever the sysctl for getting interface specific MIB data is used. Since the ifnet_t stores a pointer to your data instead of a copy, you may update the data at the address at any time. - + This function is intended to be called by the driver. A kext must not call this function on an interface the kext does not own. @@ -918,8 +1006,9 @@ ifnet_offload_t ifnet_offload(ifnet_t interface); @param mibData A pointer to the data. @param mibLen Length of data pointed to. @result 0 on success otherwise the errno error. - */ -errno_t ifnet_set_link_mib_data(ifnet_t interface, void *mibData, u_int32_t mibLen); +*/ +extern errno_t ifnet_set_link_mib_data(ifnet_t interface, void *mibData, + u_int32_t mibLen); /*! @function ifnet_get_link_mib_data @@ -935,7 +1024,8 @@ errno_t ifnet_set_link_mib_data(ifnet_t interface, void *mibData, u_int32_t mibL @result Returns an error if the buffer size is too small or there is no data. */ -errno_t ifnet_get_link_mib_data(ifnet_t interface, void *mibData, u_int32_t *mibLen); +extern errno_t ifnet_get_link_mib_data(ifnet_t interface, void *mibData, + u_int32_t *mibLen); /*! @function ifnet_get_link_mib_data_length @@ -944,7 +1034,7 @@ errno_t ifnet_get_link_mib_data(ifnet_t interface, void *mibData, u_int32_t *mib @result Returns the number of bytes of mib data associated with the interface. */ -u_int32_t ifnet_get_link_mib_data_length(ifnet_t interface); +extern u_int32_t ifnet_get_link_mib_data_length(ifnet_t interface); /*! @function ifnet_attach_protocol @@ -955,22 +1045,24 @@ u_int32_t ifnet_get_link_mib_data_length(ifnet_t interface); @param proto_details Details of the protocol being attached. @result 0 on success otherwise the errno error. */ -errno_t ifnet_attach_protocol(ifnet_t interface, protocol_family_t protocol_family, - const struct ifnet_attach_proto_param *proto_details); +extern errno_t ifnet_attach_protocol(ifnet_t interface, + protocol_family_t protocol_family, + const struct ifnet_attach_proto_param *proto_details); /*! @function ifnet_attach_protocol_v2 - @discussion Attaches a protocol to an interface using the newer version 2 - style interface. So far the only difference is support for packet - chains which improve performance. + @discussion Attaches a protocol to an interface using the newer + version 2 style interface. So far the only difference is support + for packet chains which improve performance. @param interface The interface. @param protocol_family The protocol family being attached (PF_INET/PF_APPLETALK/etc...). @param proto_details Details of the protocol being attached. @result 0 on success otherwise the errno error. */ -errno_t ifnet_attach_protocol_v2(ifnet_t interface, protocol_family_t protocol_family, - const struct ifnet_attach_proto_param_v2 *proto_details); +extern errno_t ifnet_attach_protocol_v2(ifnet_t interface, + protocol_family_t protocol_family, + const struct ifnet_attach_proto_param_v2 *proto_details); /*! @function ifnet_detach_protocol @@ -980,7 +1072,8 @@ errno_t ifnet_attach_protocol_v2(ifnet_t interface, protocol_family_t protocol_f detach. @result 0 on success otherwise the errno error. */ -errno_t ifnet_detach_protocol(ifnet_t interface, protocol_family_t protocol_family); +extern errno_t ifnet_detach_protocol(ifnet_t interface, + protocol_family_t protocol_family); /*! @function ifnet_output @@ -1004,8 +1097,9 @@ errno_t ifnet_detach_protocol(ifnet_t interface, protocol_family_t protocol_fami function does not require this value, you may pass NULL. @result 0 on success otherwise the errno error. */ -errno_t ifnet_output(ifnet_t interface, protocol_family_t protocol_family, mbuf_t packet, - void* route, const struct sockaddr *dest); +extern errno_t ifnet_output(ifnet_t interface, + protocol_family_t protocol_family, mbuf_t packet, void *route, + const struct sockaddr *dest); /*! @function ifnet_output_raw @@ -1024,7 +1118,8 @@ errno_t ifnet_output(ifnet_t interface, protocol_family_t protocol_family, mbuf_ @param packet The fully formed packet to be transmitted. @result 0 on success otherwise the errno error. */ -errno_t ifnet_output_raw(ifnet_t interface, protocol_family_t protocol_family, mbuf_t packet); +extern errno_t ifnet_output_raw(ifnet_t interface, + protocol_family_t protocol_family, mbuf_t packet); /*! @function ifnet_input @@ -1040,14 +1135,14 @@ errno_t ifnet_output_raw(ifnet_t interface, protocol_family_t protocol_family, m stats. This parameter may be NULL. @result 0 on success otherwise the errno error. */ -errno_t ifnet_input(ifnet_t interface, mbuf_t first_packet, - const struct ifnet_stat_increment_param *stats); +extern errno_t ifnet_input(ifnet_t interface, mbuf_t first_packet, + const struct ifnet_stat_increment_param *stats); /*! @function ifnet_ioctl @discussion Calls the interface's ioctl function with the parameters passed. - + All undefined ioctls are reserved for future use by Apple. If you need to communicate with your kext using an ioctl, please use SIOCSIFKPI and SIOCGIFKPI. @@ -1059,8 +1154,8 @@ errno_t ifnet_input(ifnet_t interface, mbuf_t first_packet, @param ioctl_arg Any parameters to the ioctl. @result 0 on success otherwise the errno error. */ -errno_t ifnet_ioctl(ifnet_t interface, protocol_family_t protocol, - u_int32_t ioctl_code, void *ioctl_arg); +extern errno_t ifnet_ioctl(ifnet_t interface, protocol_family_t protocol, + unsigned long ioctl_code, void *ioctl_arg); /*! @function ifnet_event @@ -1070,14 +1165,14 @@ errno_t ifnet_ioctl(ifnet_t interface, protocol_family_t protocol, event. @result 0 on success otherwise the errno error. */ -errno_t ifnet_event(ifnet_t interface, struct kern_event_msg* event_ptr); +extern errno_t ifnet_event(ifnet_t interface, struct kern_event_msg *event_ptr); /*! @function ifnet_set_mtu @discussion Sets the value of the MTU in the interface structure. Calling this function will not notify the driver that the MTU should be changed. Use the appropriate ioctl. - + This function is intended to be called by the driver. A kext must not call this function on an interface the kext does not own. @@ -1085,21 +1180,21 @@ errno_t ifnet_event(ifnet_t interface, struct kern_event_msg* event_ptr); @param mtu The new MTU. @result 0 on success otherwise the errno error. */ -errno_t ifnet_set_mtu(ifnet_t interface, u_int32_t mtu); +extern errno_t ifnet_set_mtu(ifnet_t interface, u_int32_t mtu); /*! @function ifnet_mtu @param interface The interface. @result The MTU. */ -u_int32_t ifnet_mtu(ifnet_t interface); +extern u_int32_t ifnet_mtu(ifnet_t interface); /*! @function ifnet_type @param interface The interface. @result The type. See net/if_types.h. */ -u_int8_t ifnet_type(ifnet_t interface); +extern u_int8_t ifnet_type(ifnet_t interface); /*! @function ifnet_set_addrlen @@ -1111,14 +1206,14 @@ u_int8_t ifnet_type(ifnet_t interface); @param addrlen The new address length. @result 0 on success otherwise the errno error. */ -errno_t ifnet_set_addrlen(ifnet_t interface, u_int8_t addrlen); +extern errno_t ifnet_set_addrlen(ifnet_t interface, u_int8_t addrlen); /*! @function ifnet_addrlen @param interface The interface. @result The address length. */ -u_int8_t ifnet_addrlen(ifnet_t interface); +extern u_int8_t ifnet_addrlen(ifnet_t interface); /*! @function ifnet_set_hdrlen @@ -1130,14 +1225,14 @@ u_int8_t ifnet_addrlen(ifnet_t interface); @param hdrlen The new header length. @result 0 on success otherwise the errno error. */ -errno_t ifnet_set_hdrlen(ifnet_t interface, u_int8_t hdrlen); +extern errno_t ifnet_set_hdrlen(ifnet_t interface, u_int8_t hdrlen); /*! @function ifnet_hdrlen @param interface The interface. @result The header length. */ -u_int8_t ifnet_hdrlen(ifnet_t interface); +extern u_int8_t ifnet_hdrlen(ifnet_t interface); /*! @function ifnet_set_metric @@ -1149,14 +1244,14 @@ u_int8_t ifnet_hdrlen(ifnet_t interface); @param metric The new metric. @result 0 on success otherwise the errno error. */ -errno_t ifnet_set_metric(ifnet_t interface, u_int32_t metric); +extern errno_t ifnet_set_metric(ifnet_t interface, u_int32_t metric); /*! @function ifnet_metric @param interface The interface. @result The metric. */ -u_int32_t ifnet_metric(ifnet_t interface); +extern u_int32_t ifnet_metric(ifnet_t interface); /*! @function ifnet_set_baudrate @@ -1168,14 +1263,14 @@ u_int32_t ifnet_metric(ifnet_t interface); @param baudrate The new baudrate. @result 0 on success otherwise the errno error. */ -errno_t ifnet_set_baudrate(ifnet_t interface, u_int64_t baudrate); +extern errno_t ifnet_set_baudrate(ifnet_t interface, u_int64_t baudrate); /*! @function ifnet_baudrate @param interface The interface. @result The baudrate. */ -u_int64_t ifnet_baudrate(ifnet_t interface); +extern u_int64_t ifnet_baudrate(ifnet_t interface); /*! @function ifnet_stat_increment @@ -1189,8 +1284,8 @@ u_int64_t ifnet_baudrate(ifnet_t interface); ifnet_counter_increment structure are handled in the stack. @result 0 on success otherwise the errno error. */ -errno_t ifnet_stat_increment(ifnet_t interface, - const struct ifnet_stat_increment_param *counts); +extern errno_t ifnet_stat_increment(ifnet_t interface, + const struct ifnet_stat_increment_param *counts); /*! @function ifnet_stat_increment_in @@ -1199,7 +1294,7 @@ errno_t ifnet_stat_increment(ifnet_t interface, function allows a driver to update the inbound interface counts. The most efficient time to update these counts is when calling ifnet_input. - + A lock protects the counts, this makes the increment functions expensive. The increment function will update the lastchanged value. @@ -1209,16 +1304,16 @@ errno_t ifnet_stat_increment(ifnet_t interface, @param errors_in The number of additional receive errors. @result 0 on success otherwise the errno error. */ -errno_t ifnet_stat_increment_in(ifnet_t interface, - u_int32_t packets_in, u_int32_t bytes_in, - u_int32_t errors_in); +extern errno_t ifnet_stat_increment_in(ifnet_t interface, + u_int32_t packets_in, u_int32_t bytes_in, u_int32_t errors_in); /*! @function ifnet_stat_increment_out @discussion This function is intended to be called by the driver. This - function allows a driver to update the outbound interface counts. - + function allows a driver to update the outbound interface + counts. + A lock protects the counts, this makes the increment functions expensive. The increment function will update the lastchanged value. @@ -1228,9 +1323,8 @@ errno_t ifnet_stat_increment_in(ifnet_t interface, @param errors_out The number of additional send errors. @result 0 on success otherwise the errno error. */ -errno_t ifnet_stat_increment_out(ifnet_t interface, - u_int32_t packets_out, u_int32_t bytes_out, - u_int32_t errors_out); +extern errno_t ifnet_stat_increment_out(ifnet_t interface, +u_int32_t packets_out, u_int32_t bytes_out, u_int32_t errors_out); /*! @function ifnet_set_stat @@ -1238,15 +1332,15 @@ errno_t ifnet_stat_increment_out(ifnet_t interface, This function is intended to be called by the driver. A kext must not call this function on an interface the kext does not own. - + The one exception would be the case where a kext wants to zero all of the counters. @param interface The interface. @param counts The new stats values. @result 0 on success otherwise the errno error. */ -errno_t ifnet_set_stat(ifnet_t interface, - const struct ifnet_stats_param *stats); +extern errno_t ifnet_set_stat(ifnet_t interface, + const struct ifnet_stats_param *stats); /*! @function ifnet_stat @@ -1254,8 +1348,8 @@ errno_t ifnet_set_stat(ifnet_t interface, @param out_stats Storage for the values. @result 0 on success otherwise the errno error. */ -errno_t ifnet_stat(ifnet_t interface, - struct ifnet_stats_param *out_stats); +extern errno_t ifnet_stat(ifnet_t interface, + struct ifnet_stats_param *out_stats); /*! @function ifnet_set_promiscuous @@ -1272,7 +1366,7 @@ errno_t ifnet_stat(ifnet_t interface, zero, promiscuous mode will be disabled. @result 0 on success otherwise the errno error. */ -errno_t ifnet_set_promiscuous(ifnet_t interface, int on); +extern errno_t ifnet_set_promiscuous(ifnet_t interface, int on); /*! @function ifnet_touch_lastchange @@ -1280,7 +1374,7 @@ errno_t ifnet_set_promiscuous(ifnet_t interface, int on); @param interface The interface. @result 0 on success otherwise the errno error. */ -errno_t ifnet_touch_lastchange(ifnet_t interface); +extern errno_t ifnet_touch_lastchange(ifnet_t interface); /*! @function ifnet_lastchange @@ -1288,7 +1382,7 @@ errno_t ifnet_touch_lastchange(ifnet_t interface); @param last_change A timeval struct to copy the last time changed in to. */ -errno_t ifnet_lastchange(ifnet_t interface, struct timeval *last_change); +extern errno_t ifnet_lastchange(ifnet_t interface, struct timeval *last_change); /*! @function ifnet_get_address_list @@ -1303,7 +1397,7 @@ errno_t ifnet_lastchange(ifnet_t interface, struct timeval *last_change); @param addresses A pointer to a NULL terminated array of ifaddr_ts. @result 0 on success otherwise the errno error. */ -errno_t ifnet_get_address_list(ifnet_t interface, ifaddr_t **addresses); +extern errno_t ifnet_get_address_list(ifnet_t interface, ifaddr_t **addresses); /*! @function ifnet_get_address_list_family @@ -1320,7 +1414,8 @@ errno_t ifnet_get_address_list(ifnet_t interface, ifaddr_t **addresses); @param addresses A pointer to a NULL terminated array of ifaddr_ts. @result 0 on success otherwise the errno error. */ -errno_t ifnet_get_address_list_family(ifnet_t interface, ifaddr_t **addresses, sa_family_t family); +extern errno_t ifnet_get_address_list_family(ifnet_t interface, + ifaddr_t **addresses, sa_family_t family); /*! @function ifnet_free_address_list @@ -1329,7 +1424,7 @@ errno_t ifnet_get_address_list_family(ifnet_t interface, ifaddr_t **addresses, s memory used for the array of references. @param addresses An array of ifaddr_ts. */ -void ifnet_free_address_list(ifaddr_t *addresses); +extern void ifnet_free_address_list(ifaddr_t *addresses); /*! @function ifnet_set_lladdr @@ -1340,7 +1435,8 @@ void ifnet_free_address_list(ifaddr_t *addresses); the 6 byte ethernet address for ethernet). @param lladdr_len The length, in bytes, of the link layer address. */ -errno_t ifnet_set_lladdr(ifnet_t interface, const void* lladdr, size_t lladdr_len); +extern errno_t ifnet_set_lladdr(ifnet_t interface, const void *lladdr, + size_t lladdr_len); /*! @function ifnet_lladdr_copy_bytes @@ -1351,7 +1447,8 @@ errno_t ifnet_set_lladdr(ifnet_t interface, const void* lladdr, size_t lladdr_le @param length The length of the buffer. This value must match the length of the link-layer address. */ -errno_t ifnet_lladdr_copy_bytes(ifnet_t interface, void* lladdr, size_t length); +extern errno_t ifnet_lladdr_copy_bytes(ifnet_t interface, void *lladdr, + size_t length); #ifdef KERNEL_PRIVATE /*! @@ -1359,8 +1456,8 @@ errno_t ifnet_lladdr_copy_bytes(ifnet_t interface, void* lladdr, size_t length); @discussion Returns a pointer to the link-layer address. @param interface The interface the link-layer address is on. */ -void* ifnet_lladdr(ifnet_t interface); -#endif KERNEL_PRIVATE +extern void *ifnet_lladdr(ifnet_t interface); +#endif /* KERNEL_PRIVATE */ /*! @function ifnet_llbroadcast_copy_bytes @@ -1372,8 +1469,8 @@ void* ifnet_lladdr(ifnet_t interface); @param addr_len On return, the length of the broadcast address. @param lladdr_len The length, in bytes, of the link layer address. */ -errno_t ifnet_llbroadcast_copy_bytes(ifnet_t interface, void* addr, - size_t bufferlen, size_t* addr_len); +extern errno_t ifnet_llbroadcast_copy_bytes(ifnet_t interface, void *addr, + size_t bufferlen, size_t *addr_len); #ifdef KERNEL_PRIVATE /*! @@ -1388,8 +1485,9 @@ errno_t ifnet_llbroadcast_copy_bytes(ifnet_t interface, void* addr, @param lladdr_len The length, in bytes, of the link layer address. @param type The link-layer address type. */ -errno_t ifnet_set_lladdr_and_type(ifnet_t interface, const void* lladdr, size_t length, u_char type); -#endif KERNEL_PRIVATE +extern errno_t ifnet_set_lladdr_and_type(ifnet_t interface, const void *lladdr, + size_t length, u_char type); +#endif /* KERNEL_PRIVATE */ /*! @function ifnet_resolve_multicast @@ -1405,8 +1503,8 @@ errno_t ifnet_set_lladdr_and_type(ifnet_t interface, const void* lladdr, size_t not supported or could not be translated. Other errors may indicate other failures. */ -errno_t ifnet_resolve_multicast(ifnet_t ifp, const struct sockaddr *proto_addr, - struct sockaddr *ll_addr, size_t ll_len); +extern errno_t ifnet_resolve_multicast(ifnet_t ifp, + const struct sockaddr *proto_addr, struct sockaddr *ll_addr, size_t ll_len); /*! @function ifnet_add_multicast @@ -1422,8 +1520,8 @@ errno_t ifnet_resolve_multicast(ifnet_t ifp, const struct sockaddr *proto_addr, @param multicast The resulting ifmultiaddr_t multicast address. @result 0 on success otherwise the errno error. */ -errno_t ifnet_add_multicast(ifnet_t interface, const struct sockaddr *maddr, - ifmultiaddr_t *multicast); +extern errno_t ifnet_add_multicast(ifnet_t interface, + const struct sockaddr *maddr, ifmultiaddr_t *multicast); /*! @function ifnet_remove_multicast @@ -1433,23 +1531,23 @@ errno_t ifnet_add_multicast(ifnet_t interface, const struct sockaddr *maddr, removed when the number of times ifnet_remove_multicast has been called matches the number of times ifnet_add_multicast has been called. - + The memory for the multicast address is not actually freed until the separate reference count has reached zero. Some parts of the stack may keep a pointer to the multicast even after that multicast has been removed from the interface. - + When an interface is detached, all of the multicasts are removed. If the interface of the multicast passed in is no longer attached, this function will gracefully return, performing no work. - + It is the callers responsibility to release the multicast address after calling this function. @param multicast The multicast to be removed. @result 0 on success otherwise the errno error. */ -errno_t ifnet_remove_multicast(ifmultiaddr_t multicast); +extern errno_t ifnet_remove_multicast(ifmultiaddr_t multicast); /*! @function ifnet_get_multicast_list @@ -1464,7 +1562,8 @@ errno_t ifnet_remove_multicast(ifmultiaddr_t multicast); to the multicast addresses. @result 0 on success otherwise the errno error. */ -errno_t ifnet_get_multicast_list(ifnet_t interface, ifmultiaddr_t **addresses); +extern errno_t ifnet_get_multicast_list(ifnet_t interface, + ifmultiaddr_t **addresses); /*! @function ifnet_free_multicast_list @@ -1474,7 +1573,7 @@ errno_t ifnet_get_multicast_list(ifnet_t interface, ifmultiaddr_t **addresses); @param multicasts An array of references to the multicast addresses. @result 0 on success otherwise the errno error. */ -void ifnet_free_multicast_list(ifmultiaddr_t *multicasts); +extern void ifnet_free_multicast_list(ifmultiaddr_t *multicasts); /*! @function ifnet_find_by_name @@ -1487,7 +1586,7 @@ void ifnet_free_multicast_list(ifmultiaddr_t *multicasts); filled in if a matching interface is found. @result 0 on success otherwise the errno error. */ -errno_t ifnet_find_by_name(const char *ifname, ifnet_t *interface); +extern errno_t ifnet_find_by_name(const char *ifname, ifnet_t *interface); /*! @function ifnet_list_get @@ -1503,7 +1602,8 @@ errno_t ifnet_find_by_name(const char *ifname, ifnet_t *interface); matching interfaces in the array. @result 0 on success otherwise the errno error. */ -errno_t ifnet_list_get(ifnet_family_t family, ifnet_t **interfaces, u_int32_t *count); +extern errno_t ifnet_list_get(ifnet_family_t family, ifnet_t **interfaces, + u_int32_t *count); #ifdef KERNEL_PRIVATE /*! @@ -1521,7 +1621,8 @@ errno_t ifnet_list_get(ifnet_family_t family, ifnet_t **interfaces, u_int32_t *c matching interfaces in the array. @result 0 on success otherwise the errno error. */ -errno_t ifnet_list_get_all(ifnet_family_t family, ifnet_t **interfaces, u_int32_t *count); +extern errno_t ifnet_list_get_all(ifnet_family_t family, ifnet_t **interfaces, + u_int32_t *count); #endif /* KERNEL_PRIVATE */ /*! @@ -1533,11 +1634,11 @@ errno_t ifnet_list_get_all(ifnet_family_t family, ifnet_t **interfaces, u_int32_ ifnet_list_free. @param interfaces An array of interface references from ifnet_list_get. */ -void ifnet_list_free(ifnet_t *interfaces); +extern void ifnet_list_free(ifnet_t *interfaces); -/********************************************************************************************/ -/* ifaddr_t accessors */ -/********************************************************************************************/ +/******************************************************************************/ +/* ifaddr_t accessors */ +/******************************************************************************/ /*! @function ifaddr_reference @@ -1546,7 +1647,7 @@ void ifnet_list_free(ifnet_t *interfaces); @param ifaddr The interface address. @result 0 upon success */ -errno_t ifaddr_reference(ifaddr_t ifaddr); +extern errno_t ifaddr_reference(ifaddr_t ifaddr); /*! @function ifaddr_release @@ -1555,7 +1656,7 @@ errno_t ifaddr_reference(ifaddr_t ifaddr); @param ifaddr The interface address. @result 0 upon success */ -errno_t ifaddr_release(ifaddr_t ifaddr); +extern errno_t ifaddr_release(ifaddr_t ifaddr); /*! @function ifaddr_address @@ -1565,7 +1666,8 @@ errno_t ifaddr_release(ifaddr_t ifaddr); @param addr_size The size of the storage for the address. @result 0 upon success */ -errno_t ifaddr_address(ifaddr_t ifaddr, struct sockaddr *out_addr, u_int32_t addr_size); +extern errno_t ifaddr_address(ifaddr_t ifaddr, struct sockaddr *out_addr, + u_int32_t addr_size); /*! @function ifaddr_address @@ -1573,7 +1675,7 @@ errno_t ifaddr_address(ifaddr_t ifaddr, struct sockaddr *out_addr, u_int32_t add @param ifaddr The interface address. @result 0 on failure, address family on success. */ -sa_family_t ifaddr_address_family(ifaddr_t ifaddr); +extern sa_family_t ifaddr_address_family(ifaddr_t ifaddr); /*! @function ifaddr_dstaddress @@ -1583,7 +1685,8 @@ sa_family_t ifaddr_address_family(ifaddr_t ifaddr); @param dstaddr_size The size of the storage for the destination address. @result 0 upon success */ -errno_t ifaddr_dstaddress(ifaddr_t ifaddr, struct sockaddr *out_dstaddr, u_int32_t dstaddr_size); +extern errno_t ifaddr_dstaddress(ifaddr_t ifaddr, struct sockaddr *out_dstaddr, + u_int32_t dstaddr_size); /*! @function ifaddr_netmask @@ -1593,7 +1696,8 @@ errno_t ifaddr_dstaddress(ifaddr_t ifaddr, struct sockaddr *out_dstaddr, u_int32 @param netmask_size The size of the storage for the netmask. @result 0 upon success */ -errno_t ifaddr_netmask(ifaddr_t ifaddr, struct sockaddr *out_netmask, u_int32_t netmask_size); +extern errno_t ifaddr_netmask(ifaddr_t ifaddr, struct sockaddr *out_netmask, + u_int32_t netmask_size); /*! @function ifaddr_ifnet @@ -1605,7 +1709,7 @@ errno_t ifaddr_netmask(ifaddr_t ifaddr, struct sockaddr *out_netmask, u_int32_t @param ifaddr The interface address. @result A reference to the interface the address is attached to. */ -ifnet_t ifaddr_ifnet(ifaddr_t ifaddr); +extern ifnet_t ifaddr_ifnet(ifaddr_t ifaddr); /*! @function ifaddr_withaddr @@ -1615,7 +1719,7 @@ ifnet_t ifaddr_ifnet(ifaddr_t ifaddr); @param address The address to search for. @result A reference to the interface address. */ -ifaddr_t ifaddr_withaddr(const struct sockaddr* address); +extern ifaddr_t ifaddr_withaddr(const struct sockaddr *address); /*! @function ifaddr_withdstaddr @@ -1626,7 +1730,7 @@ ifaddr_t ifaddr_withaddr(const struct sockaddr* address); @param destination The destination to search for. @result A reference to the interface address. */ -ifaddr_t ifaddr_withdstaddr(const struct sockaddr* destination); +extern ifaddr_t ifaddr_withdstaddr(const struct sockaddr *destination); /*! @function ifaddr_withnet @@ -1637,7 +1741,7 @@ ifaddr_t ifaddr_withdstaddr(const struct sockaddr* destination); @param net The network to search for. @result A reference to the interface address. */ -ifaddr_t ifaddr_withnet(const struct sockaddr* net); +extern ifaddr_t ifaddr_withnet(const struct sockaddr *net); /*! @function ifaddr_withroute @@ -1650,8 +1754,8 @@ ifaddr_t ifaddr_withnet(const struct sockaddr* net); @param gateway A gateway to search for. @result A reference to the interface address. */ -ifaddr_t ifaddr_withroute(int flags, const struct sockaddr* destination, - const struct sockaddr* gateway); +extern ifaddr_t ifaddr_withroute(int flags, const struct sockaddr *destination, + const struct sockaddr *gateway); /*! @function ifaddr_findbestforaddr @@ -1663,11 +1767,12 @@ ifaddr_t ifaddr_withroute(int flags, const struct sockaddr* destination, @param interface The local interface. @result A reference to the interface address. */ -ifaddr_t ifaddr_findbestforaddr(const struct sockaddr *addr, ifnet_t interface); +extern ifaddr_t ifaddr_findbestforaddr(const struct sockaddr *addr, + ifnet_t interface); -/********************************************************************************************/ -/* ifmultiaddr_t accessors */ -/********************************************************************************************/ +/******************************************************************************/ +/* ifmultiaddr_t accessors */ +/******************************************************************************/ /*! @function ifmaddr_reference @@ -1676,7 +1781,7 @@ ifaddr_t ifaddr_findbestforaddr(const struct sockaddr *addr, ifnet_t interface); @param ifmaddr The interface multicast address. @result 0 on success. Only error will be EINVAL if ifmaddr is not valid. */ -errno_t ifmaddr_reference(ifmultiaddr_t ifmaddr); +extern errno_t ifmaddr_reference(ifmultiaddr_t ifmaddr); /*! @function ifmaddr_release @@ -1687,7 +1792,7 @@ errno_t ifmaddr_reference(ifmultiaddr_t ifmaddr); @param ifmaddr The interface multicast address. @result 0 on success. Only error will be EINVAL if ifmaddr is not valid. */ -errno_t ifmaddr_release(ifmultiaddr_t ifmaddr); +extern errno_t ifmaddr_release(ifmultiaddr_t ifmaddr); /*! @function ifmaddr_address @@ -1696,7 +1801,8 @@ errno_t ifmaddr_release(ifmultiaddr_t ifmaddr); @param addr_size Size of the storage. @result 0 on success. */ -errno_t ifmaddr_address(ifmultiaddr_t ifmaddr, struct sockaddr *out_multicast, u_int32_t addr_size); +extern errno_t ifmaddr_address(ifmultiaddr_t ifmaddr, + struct sockaddr *out_multicast, u_int32_t addr_size); /*! @function ifmaddr_lladdress @@ -1706,8 +1812,8 @@ errno_t ifmaddr_address(ifmultiaddr_t ifmaddr, struct sockaddr *out_multicast, u @param addr_size Size of the storage. @result 0 on success. */ -errno_t ifmaddr_lladdress(ifmultiaddr_t ifmaddr, struct sockaddr *out_link_layer_multicast, - u_int32_t addr_size); +extern errno_t ifmaddr_lladdress(ifmultiaddr_t ifmaddr, + struct sockaddr *out_link_layer_multicast, u_int32_t addr_size); /*! @function ifmaddr_ifnet @@ -1720,8 +1826,8 @@ errno_t ifmaddr_lladdress(ifmultiaddr_t ifmaddr, struct sockaddr *out_link_layer @param ifmaddr The interface multicast address. @result A reference to the interface. */ -ifnet_t ifmaddr_ifnet(ifmultiaddr_t ifmaddr); +extern ifnet_t ifmaddr_ifnet(ifmultiaddr_t ifmaddr); __END_DECLS -#endif +#endif /* __KPI_INTERFACE__ */ diff --git a/bsd/net/kpi_interfacefilter.h b/bsd/net/kpi_interfacefilter.h index e5c17d896..e1ea99aef 100644 --- a/bsd/net/kpi_interfacefilter.h +++ b/bsd/net/kpi_interfacefilter.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2003 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2008 Apple Computer, Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -45,7 +45,7 @@ __BEGIN_DECLS /*! @typedef iff_input_func - + @discussion iff_input_func is used to filter incoming packets. The interface is only valid for the duration of the filter call. If you need to keep a reference to the interface, be sure to call @@ -55,7 +55,7 @@ __BEGIN_DECLS passed in separately from the rest of the packet. The outbound data filters is passed the whole packet including the frame header. - + The frame header usually preceeds the data in the mbuf. This ensures that the frame header will be a valid pointer as long as the mbuf is not freed. If you need to change the frame header to @@ -74,16 +74,19 @@ __BEGIN_DECLS frame header length can be found by inspecting the interface's frame header length (ifnet_hdrlen). @result Return: - 0 - The caller will continue with normal processing of the packet. - EJUSTRETURN - The caller will stop processing the packet, the packet will not be freed. - Anything Else - The caller will free the packet and stop processing. + 0 - The caller will continue with normal processing of the + packet. + EJUSTRETURN - The caller will stop processing the packet, + the packet will not be freed. + Anything Else - The caller will free the packet and stop + processing. */ -typedef errno_t (*iff_input_func)(void* cookie, ifnet_t interface, protocol_family_t protocol, - mbuf_t *data, char **frame_ptr); +typedef errno_t (*iff_input_func)(void *cookie, ifnet_t interface, + protocol_family_t protocol, mbuf_t *data, char **frame_ptr); /*! @typedef iff_output_func - + @discussion iff_output_func is used to filter fully formed outbound packets. The interface is only valid for the duration of the filter call. If you need to keep a reference to the interface, @@ -94,16 +97,19 @@ typedef errno_t (*iff_input_func)(void* cookie, ifnet_t interface, protocol_fami The frame header is already included. The filter function may modify the packet or return a different mbuf chain. @result Return: - 0 - The caller will continue with normal processing of the packet. - EJUSTRETURN - The caller will stop processing the packet, the packet will not be freed. - Anything Else - The caller will free the packet and stop processing. + 0 - The caller will continue with normal processing of the + packet. + EJUSTRETURN - The caller will stop processing the packet, + the packet will not be freed. + Anything Else - The caller will free the packet and stop + processing. */ -typedef errno_t (*iff_output_func)(void* cookie, ifnet_t interface, protocol_family_t protocol, - mbuf_t *data); +typedef errno_t (*iff_output_func)(void *cookie, ifnet_t interface, + protocol_family_t protocol, mbuf_t *data); /*! @typedef iff_event_func - + @discussion iff_event_func is used to filter interface specific events. The interface is only valid for the duration of the filter call. If you need to keep a reference to the interface, @@ -112,17 +118,17 @@ typedef errno_t (*iff_output_func)(void* cookie, ifnet_t interface, protocol_fam @param interface The interface the packet is being transmitted on. @param event_msg The kernel event, may not be changed. */ -typedef void (*iff_event_func)(void* cookie, ifnet_t interface, protocol_family_t protocol, - const struct kev_msg *event_msg); +typedef void (*iff_event_func)(void *cookie, ifnet_t interface, + protocol_family_t protocol, const struct kev_msg *event_msg); /*! @typedef iff_ioctl_func - + @discussion iff_ioctl_func is used to filter ioctls sent to an interface. The interface is only valid for the duration of the filter call. If you need to keep a reference to the interface, be sure to call ifnet_reference and ifnet_release. - + All undefined ioctls are reserved for future use by Apple. If you need to communicate with your kext using an ioctl, please use SIOCSIFKPI and SIOCGIFKPI. @@ -132,16 +138,19 @@ typedef void (*iff_event_func)(void* cookie, ifnet_t interface, protocol_family_ @param ioctl_arg A pointer to the ioctl argument. @result Return: 0 - This filter function handled the ioctl. - EOPNOTSUPP - This filter function does not understand/did not handle this ioctl. - EJUSTRETURN - This filter function handled the ioctl, processing should stop. - Anything Else - Processing will stop, the error will be returned. + EOPNOTSUPP - This filter function does not understand/did not + handle this ioctl. + EJUSTRETURN - This filter function handled the ioctl, + processing should stop. + Anything Else - Processing will stop, the error will be + returned. */ -typedef errno_t (*iff_ioctl_func)(void* cookie, ifnet_t interface, protocol_family_t protocol, - u_long ioctl_cmd, void* ioctl_arg); +typedef errno_t (*iff_ioctl_func)(void *cookie, ifnet_t interface, + protocol_family_t protocol, unsigned long ioctl_cmd, void *ioctl_arg); /*! @typedef iff_detached_func - + @discussion iff_detached_func is called to notify the filter that it has been detached from an interface. This is the last call to the filter that will be made. A filter may be detached if the @@ -152,7 +161,7 @@ typedef errno_t (*iff_ioctl_func)(void* cookie, ifnet_t interface, protocol_fami @param cookie The cookie specified when this filter was attached. @param interface The interface this filter was detached from. */ -typedef void (*iff_detached_func)(void* cookie, ifnet_t interface); +typedef void (*iff_detached_func)(void *cookie, ifnet_t interface); /*! @struct iff_filter @@ -177,8 +186,8 @@ typedef void (*iff_detached_func)(void* cookie, ifnet_t interface); */ struct iff_filter { - void* iff_cookie; - const char* iff_name; + void *iff_cookie; + const char *iff_name; protocol_family_t iff_protocol; iff_input_func iff_input; iff_output_func iff_output; @@ -195,15 +204,15 @@ struct iff_filter { @param filter_ref A reference to the filter used to detach. @result 0 on success otherwise the errno error. */ -errno_t iflt_attach(ifnet_t interface, const struct iff_filter* filter, - interface_filter_t *filter_ref); +extern errno_t iflt_attach(ifnet_t interface, const struct iff_filter *filter, + interface_filter_t *filter_ref); /*! @function iflt_detach @discussion Detaches an interface filter from an interface. @param filter_ref The reference to the filter from iflt_attach. */ -void iflt_detach(interface_filter_t filter_ref); +extern void iflt_detach(interface_filter_t filter_ref); __END_DECLS -#endif +#endif /* __KPI_INTERFACEFILTER__ */ diff --git a/bsd/net/kpi_protocol.c b/bsd/net/kpi_protocol.c index 9b63ec840..a48cd249a 100644 --- a/bsd/net/kpi_protocol.c +++ b/bsd/net/kpi_protocol.c @@ -40,8 +40,8 @@ void proto_input_run(void); -typedef int (*attach_t)(struct ifnet *ifp, u_long protocol_family); -typedef int (*detach_t)(struct ifnet *ifp, u_long protocol_family); +typedef int (*attach_t)(struct ifnet *ifp, uint32_t protocol_family); +typedef int (*detach_t)(struct ifnet *ifp, uint32_t protocol_family); struct proto_input_entry { struct proto_input_entry *next; diff --git a/bsd/net/kpi_protocol.h b/bsd/net/kpi_protocol.h index 60801e824..46877b8be 100644 --- a/bsd/net/kpi_protocol.h +++ b/bsd/net/kpi_protocol.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2004 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2008 Apple Computer, Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -42,9 +42,9 @@ __BEGIN_DECLS -/****************************************************************************/ -/* Protocol input/inject */ -/****************************************************************************/ +/******************************************************************************/ +/* Protocol input/inject */ +/******************************************************************************/ #ifdef KERNEL_PRIVATE /*! @@ -77,8 +77,9 @@ typedef void (*proto_input_detached_handler)(protocol_family_t protocol); @param chains Input function supports packet chains. @result A errno error on failure. */ -errno_t proto_register_input(protocol_family_t protocol, proto_input_handler input, - proto_input_detached_handler detached, int chains); +extern errno_t proto_register_input(protocol_family_t protocol, + proto_input_handler input, proto_input_detached_handler detached, + int chains); /*! @function proto_unregister_input @@ -95,8 +96,8 @@ errno_t proto_register_input(protocol_family_t protocol, proto_input_handler inp on the normal input path). @result A errno error on failure. */ -void proto_unregister_input(protocol_family_t protocol); -#endif +extern void proto_unregister_input(protocol_family_t protocol); +#endif /* KERNEL_PRIVATE */ /*! @function proto_input @@ -107,7 +108,7 @@ void proto_unregister_input(protocol_family_t protocol); @result A errno error on failure. Unless proto_input returns zero, the caller is responsible for freeing the mbuf. */ -errno_t proto_input(protocol_family_t protocol, mbuf_t packet); +extern errno_t proto_input(protocol_family_t protocol, mbuf_t packet); /*! @function proto_inject @@ -119,12 +120,12 @@ errno_t proto_input(protocol_family_t protocol, mbuf_t packet); @result A errno error on failure. Unless proto_inject returns zero, the caller is responsible for freeing the mbuf. */ -errno_t proto_inject(protocol_family_t protocol, mbuf_t packet); +extern errno_t proto_inject(protocol_family_t protocol, mbuf_t packet); -/****************************************************************************/ -/* Protocol plumbing */ -/****************************************************************************/ +/******************************************************************************/ +/* Protocol plumbing */ +/******************************************************************************/ /*! @typedef proto_plumb_handler @@ -152,9 +153,9 @@ typedef void (*proto_unplumb_handler)(ifnet_t ifp, protocol_family_t protocol); /*! @function proto_register_plumber - @discussion Allows the caller to specify the functions called when a protocol - is attached to an interface belonging to the specified family and when - that protocol is detached. + @discussion Allows the caller to specify the functions called when a + protocol is attached to an interface belonging to the specified + family and when that protocol is detached. @param proto_fam The protocol family these plumbing functions will handle. @param if_fam The interface family these plumbing functions will @@ -166,8 +167,9 @@ typedef void (*proto_unplumb_handler)(ifnet_t ifp, protocol_family_t protocol); be used to detach the protocol. @result A non-zero value of the attach failed. */ -errno_t proto_register_plumber(protocol_family_t proto_fam, ifnet_family_t if_fam, - proto_plumb_handler plumb, proto_unplumb_handler unplumb); +extern errno_t proto_register_plumber(protocol_family_t proto_fam, + ifnet_family_t if_fam, proto_plumb_handler plumb, + proto_unplumb_handler unplumb); /*! @function proto_unregister_plumber @@ -176,72 +178,43 @@ errno_t proto_register_plumber(protocol_family_t proto_fam, ifnet_family_t if_fa handle. @param if_fam The interface family these plumbing functions handle. */ -void proto_unregister_plumber(protocol_family_t proto_fam, ifnet_family_t if_fam); +extern void proto_unregister_plumber(protocol_family_t proto_fam, + ifnet_family_t if_fam); #ifdef KERNEL_PRIVATE - -/* - -Function : proto_plumb - - proto_plumb() will plumb a protocol to an actual interface. - This will find a registered protocol module and call its attach function. - The module will typically call dlil_attach_protocol with the appropriate parameters. - -Parameters : - 'protocol_family' is PF_INET, PF_INET6, ... - 'ifp' is the interface to plumb the protocol to. - -Return code : - -0 : - - No error. - -ENOENT: - - No module was registered. - -other: - - Error returned by the attach_proto function - +/* + @function proto_plumb + @discussion Plumbs a protocol to an actual interface. This will find + a registered protocol module and call its attach function. + The module will typically call dlil_attach_protocol() with the + appropriate parameters. + @param protocol_family The protocol family. + @param ifp The interface to plumb the protocol to. + @result 0: No error. + ENOENT: No module was registered. + Other: Error returned by the attach_proto function */ -errno_t proto_plumb(protocol_family_t protocol_family, ifnet_t ifp); - -/* - -Function : proto_unplumb - - proto_unplumb() will unplumb a protocol from an interface. - This will find a registered protocol module and call its detach function. - The module will typically call dlil_detach_protocol with the appropriate parameters. - If no module is found, this function will call dlil_detach_protocol directly. - -Parameters : - 'protocol_family' is PF_INET, PF_INET6, ... - 'ifp' is APPLE_IF_FAM_ETHERNET, APPLE_IF_FAM_PPP, ... - -Return code : - -0 : - - No error. - -ENOENT: - - No module was registered. - -other: - - Error returned by the attach_proto function +extern errno_t proto_plumb(protocol_family_t protocol_family, ifnet_t ifp); +/* + @function proto_unplumb + @discussion Unplumbs a protocol from an interface. This will find + a registered protocol module and call its detach function. + The module will typically call dlil_detach_protocol() with + the appropriate parameters. If no module is found, this + function will call dlil_detach_protocol directly(). + @param protocol_family The protocol family. + @param ifp The interface to unplumb the protocol from. + @result 0: No error. + ENOENT: No module was registered. + Other: Error returned by the attach_proto function */ -errno_t proto_unplumb(protocol_family_t protocol_family, ifnet_t ifp); +extern errno_t proto_unplumb(protocol_family_t protocol_family, ifnet_t ifp); -__private_extern__ void proto_kpi_init(void) __attribute__((section("__TEXT, initcode"))); +__private_extern__ void +proto_kpi_init(void) __attribute__((section("__TEXT, initcode"))); -#endif KERNEL_PRIVATE +#endif /* KERNEL_PRIVATE */ __END_DECLS -#endif +#endif /* __KPI_PROTOCOL__ */ diff --git a/bsd/net/multicast_list.c b/bsd/net/multicast_list.c index 0f9604b42..68fbf23b0 100644 --- a/bsd/net/multicast_list.c +++ b/bsd/net/multicast_list.c @@ -130,6 +130,10 @@ multicast_list_program(struct multicast_list * mc_list, continue; } mc = _MALLOC(sizeof(struct multicast_entry), M_DEVBUF, M_WAITOK); + if (mc == NULL) { + error = ENOBUFS; + break; + } bcopy(LLADDR(&source_sdl), LLADDR(&target_sdl), alen); error = ifnet_add_multicast(target_ifp, (struct sockaddr *)&target_sdl, &mc->mc_ifma); diff --git a/bsd/net/multicast_list.h b/bsd/net/multicast_list.h index c63c4abeb..267fb3b07 100644 --- a/bsd/net/multicast_list.h +++ b/bsd/net/multicast_list.h @@ -56,4 +56,4 @@ multicast_list_program(struct multicast_list * mc_list, int multicast_list_remove(struct multicast_list * mc_list); -#endif _NET_MULTICAST_LIST_H +#endif /* _NET_MULTICAST_LIST_H */ diff --git a/bsd/net/ndrv.c b/bsd/net/ndrv.c index adc642bc3..1797d16f4 100644 --- a/bsd/net/ndrv.c +++ b/bsd/net/ndrv.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 1997-2007 Apple Inc. All rights reserved. + * Copyright (c) 1997-2008 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -43,8 +43,7 @@ * * In addition to the former use, when combined with socket NKEs, * PF_NDRV permits a fairly flexible mechanism for implementing - * strange protocol support. One of the main ones will be the - * BlueBox/Classic Shared IP Address support. + * strange protocol support. */ #include @@ -82,19 +81,19 @@ static int ndrv_do_detach(struct ndrv_cb *); static int ndrv_do_disconnect(struct ndrv_cb *); -static struct ndrv_cb *ndrv_find_inbound(struct ifnet *ifp, u_long protocol_family); +static struct ndrv_cb *ndrv_find_inbound(struct ifnet *ifp, u_int32_t protocol_family); static int ndrv_setspec(struct ndrv_cb *np, struct sockopt *sopt); static int ndrv_delspec(struct ndrv_cb *); static int ndrv_to_ifnet_demux(struct ndrv_demux_desc* ndrv, struct ifnet_demux_desc* ifdemux); -static void ndrv_handle_ifp_detach(u_long family, short unit); +static void ndrv_handle_ifp_detach(u_int32_t family, short unit); static int ndrv_do_add_multicast(struct ndrv_cb *np, struct sockopt *sopt); static int ndrv_do_remove_multicast(struct ndrv_cb *np, struct sockopt *sopt); static struct ndrv_multiaddr* ndrv_have_multicast(struct ndrv_cb *np, struct sockaddr* addr); static void ndrv_remove_all_multicast(struct ndrv_cb *np); static void ndrv_dominit(void) __attribute__((section("__TEXT, initcode"))); -unsigned long ndrv_sendspace = NDRVSNDQ; -unsigned long ndrv_recvspace = NDRVRCVQ; +u_int32_t ndrv_sendspace = NDRVSNDQ; +u_int32_t ndrv_recvspace = NDRVRCVQ; TAILQ_HEAD(, ndrv_cb) ndrvl = TAILQ_HEAD_INITIALIZER(ndrvl); extern struct domain ndrvdomain; @@ -268,7 +267,6 @@ static int ndrv_connect(struct socket *so, struct sockaddr *nam, __unused struct proc *p) { struct ndrv_cb *np = sotondrvcb(so); - int result = 0; if (np == 0) return EINVAL; @@ -276,13 +274,11 @@ ndrv_connect(struct socket *so, struct sockaddr *nam, __unused struct proc *p) if (np->nd_faddr) return EISCONN; - /* Allocate memory to store the remote address */ - MALLOC(np->nd_faddr, struct sockaddr_ndrv*, + /* Allocate memory to store the remote address */ + MALLOC(np->nd_faddr, struct sockaddr_ndrv*, nam->sa_len, M_IFADDR, M_WAITOK); - if (result != 0) - return result; - if (np->nd_faddr == NULL) - return ENOMEM; + if (np->nd_faddr == NULL) + return ENOMEM; bcopy((caddr_t) nam, (caddr_t) np->nd_faddr, nam->sa_len); soisconnected(so); @@ -543,7 +539,7 @@ ndrv_do_detach(struct ndrv_cb *np) /* Remove from the linked list of control blocks */ TAILQ_REMOVE(&ndrvl, np, nd_next); if (ifp != NULL) { - u_long proto_family = np->nd_proto_family; + u_int32_t proto_family = np->nd_proto_family; if (proto_family != PF_NDRV && proto_family != 0) { socket_unlock(so, 0); @@ -669,7 +665,7 @@ ndrv_setspec(struct ndrv_cb *np, struct sockopt *sopt) return EINVAL; /* Copy the ndrvSpec */ - if (proc_is64bit(current_proc())) { + if (proc_is64bit(sopt->sopt_p)) { struct ndrv_protocol_desc64 ndrvSpec64; if (sopt->sopt_valsize != sizeof(ndrvSpec64)) @@ -686,14 +682,20 @@ ndrv_setspec(struct ndrv_cb *np, struct sockopt *sopt) user_addr = ndrvSpec64.demux_list; } else { - if (sopt->sopt_valsize != sizeof(ndrvSpec)) + struct ndrv_protocol_desc32 ndrvSpec32; + + if (sopt->sopt_valsize != sizeof(ndrvSpec32)) return EINVAL; - error = sooptcopyin(sopt, &ndrvSpec, sizeof(ndrvSpec), sizeof(ndrvSpec)); + error = sooptcopyin(sopt, &ndrvSpec32, sizeof(ndrvSpec32), sizeof(ndrvSpec32)); if (error != 0) return error; - user_addr = CAST_USER_ADDR_T(ndrvSpec.demux_list); + ndrvSpec.version = ndrvSpec32.version; + ndrvSpec.protocol_family = ndrvSpec32.protocol_family; + ndrvSpec.demux_count = ndrvSpec32.demux_count; + + user_addr = CAST_USER_ADDR_T(ndrvSpec32.demux_list); } /* Verify the parameter */ @@ -729,7 +731,7 @@ ndrv_setspec(struct ndrv_cb *np, struct sockopt *sopt) if (error == 0) { /* At this point, we've at least got enough bytes to start looking around */ - u_long demuxOn = 0; + u_int32_t demuxOn = 0; proto_param.demux_count = ndrvSpec.demux_count; proto_param.input = ndrv_input; @@ -806,7 +808,7 @@ ndrv_delspec(struct ndrv_cb *np) } struct ndrv_cb * -ndrv_find_inbound(struct ifnet *ifp, u_long protocol) +ndrv_find_inbound(struct ifnet *ifp, u_int32_t protocol) { struct ndrv_cb* np; @@ -832,7 +834,7 @@ static void ndrv_dominit(void) } static void -ndrv_handle_ifp_detach(u_long family, short unit) +ndrv_handle_ifp_detach(u_int32_t family, short unit) { struct ndrv_cb* np; struct ifnet *ifp = NULL; diff --git a/bsd/net/ndrv.h b/bsd/net/ndrv.h index 5a1feb1f5..6f61df9f5 100644 --- a/bsd/net/ndrv.h +++ b/bsd/net/ndrv.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2008 Apple Computer, Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -107,17 +107,17 @@ struct ndrv_demux_desc * Used to "bind" an NDRV socket so that packets that match * given protocol demux descriptions can be received: * Field: - * version : must be NDRV_PROTOCOL_DESC_VERS - * protocol_family : unique identifier for this protocol - * demux_count : number of demux_list descriptors in demux_list - * demux_list : pointer to array of demux descriptors + * version : must be NDRV_PROTOCOL_DESC_VERS + * protocol_family : unique identifier for this protocol + * demux_count : number of demux_list descriptors in demux_list + * demux_list : pointer to array of demux descriptors */ struct ndrv_protocol_desc { u_int32_t version; u_int32_t protocol_family; u_int32_t demux_count; - struct ndrv_demux_desc* demux_list; + struct ndrv_demux_desc *demux_list; }; #ifdef KERNEL_PRIVATE @@ -129,10 +129,16 @@ struct ndrv_protocol_desc64 { u_int32_t version; u_int32_t protocol_family; u_int32_t demux_count; - user_addr_t demux_list __attribute__((aligned(8))); + user64_addr_t demux_list __attribute__((aligned(8))); }; -#endif // KERNEL_PRIVATE +struct ndrv_protocol_desc32 { + u_int32_t version; + u_int32_t protocol_family; + u_int32_t demux_count; + user32_addr_t demux_list; +}; +#endif /* KERNEL_PRIVATE */ #define SOL_NDRVPROTO NDRVPROTO_NDRV /* Use this socket level */ #define NDRV_DELDMXSPEC 0x02 /* Delete the registered protocol */ diff --git a/bsd/net/ndrv_var.h b/bsd/net/ndrv_var.h index ba6bf781c..e12a0e0ef 100644 --- a/bsd/net/ndrv_var.h +++ b/bsd/net/ndrv_var.h @@ -63,8 +63,8 @@ struct ndrv_cb int nd_descrcnt; /* # elements in nd_dlist - Obsolete */ TAILQ_HEAD(dlist, dlil_demux_desc) nd_dlist; /* Descr. list */ struct ifnet *nd_if; /* obsolete, maintained for binary compatibility */ - u_long nd_proto_family; - u_long nd_family; + u_int32_t nd_proto_family; + u_int32_t nd_family; struct ndrv_multiaddr* nd_multiaddrs; short nd_unit; }; diff --git a/bsd/net/net_str_id.c b/bsd/net/net_str_id.c new file mode 100644 index 000000000..7f4fcd52f --- /dev/null +++ b/bsd/net/net_str_id.c @@ -0,0 +1,215 @@ +/* + * Copyright (c) 2008 Apple Inc. All rights reserved. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. The rights granted to you under the License + * may not be used to create, or enable the creation or redistribution of, + * unlawful or unlicensed copies of an Apple operating system, or to + * circumvent, violate, or enable the circumvention or violation of, any + * terms of an Apple operating system software license agreement. + * + * Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "net/net_str_id.h" + +#define NET_ID_STR_ENTRY_SIZE(__str) \ + ((size_t)&(((struct net_str_id_entry*)0)->nsi_string[0]) + \ + strlen(__str) + 1) + +#define FIRST_NET_STR_ID 1000 +static SLIST_HEAD(,net_str_id_entry) net_str_id_list = {NULL}; +static lck_mtx_t *net_str_id_lock = NULL; + +static u_int32_t nsi_kind_next[NSI_MAX_KIND] = { FIRST_NET_STR_ID, FIRST_NET_STR_ID, FIRST_NET_STR_ID }; +static u_int32_t nsi_next_id = FIRST_NET_STR_ID; + +#if NETMIBS + +extern int sysctl_if_family_ids SYSCTL_HANDLER_ARGS; + +SYSCTL_DECL(_net_link_generic_system); + +SYSCTL_PROC(_net_link_generic_system, OID_AUTO, if_family_ids, CTLTYPE_STRUCT | CTLFLAG_RD | CTLFLAG_LOCKED, + 0, 0, sysctl_if_family_ids, "S, if_family_id", "Interface Family ID table"); + +#endif /* NETMIBS */ + + +__private_extern__ void +net_str_id_init(void) +{ + lck_grp_attr_t *grp_attrib = NULL; + lck_attr_t *lck_attrb = NULL; + lck_grp_t *lck_group = NULL; + + grp_attrib = lck_grp_attr_alloc_init(); + lck_group = lck_grp_alloc_init("mbuf_tag_allocate_id", grp_attrib); + lck_grp_attr_free(grp_attrib); + lck_attrb = lck_attr_alloc_init(); + + net_str_id_lock = lck_mtx_alloc_init(lck_group, lck_attrb); + + lck_grp_free(lck_group); + lck_attr_free(lck_attrb); +} + +__private_extern__ void +net_str_id_first_last(u_int32_t *first, u_int32_t *last, u_int32_t kind) +{ + *first = FIRST_NET_STR_ID; + + switch (kind) { + case NSI_MBUF_TAG: + case NSI_VENDOR_CODE: + case NSI_IF_FAM_ID: + *last = nsi_kind_next[kind] - 1; + break; + default: + *last = FIRST_NET_STR_ID - 1; + break; + } +} + +__private_extern__ errno_t +net_str_id_find_internal(const char *string, u_int32_t *out_id, + u_int32_t kind, int create) +{ + struct net_str_id_entry *entry = NULL; + + + if (string == NULL || out_id == NULL || kind >= NSI_MAX_KIND) + return EINVAL; + + *out_id = 0; + + /* Look for an existing entry */ + lck_mtx_lock(net_str_id_lock); + SLIST_FOREACH(entry, &net_str_id_list, nsi_next) { + if (strcmp(string, entry->nsi_string) == 0) { + break; + } + } + + if (entry == NULL) { + if (create == 0) { + lck_mtx_unlock(net_str_id_lock); + return ENOENT; + } + + entry = kalloc(NET_ID_STR_ENTRY_SIZE(string)); + if (entry == NULL) { + lck_mtx_unlock(net_str_id_lock); + return ENOMEM; + } + + strlcpy(entry->nsi_string, string, strlen(string) + 1); + entry->nsi_flags = (1 << kind); + entry->nsi_id = nsi_next_id++; + nsi_kind_next[kind] = nsi_next_id; + SLIST_INSERT_HEAD(&net_str_id_list, entry, nsi_next); + } else if ((entry->nsi_flags & (1 << kind)) == 0) { + if (create == 0) { + lck_mtx_unlock(net_str_id_lock); + return ENOENT; + } + entry->nsi_flags |= (1 << kind); + if (entry->nsi_id >= nsi_kind_next[kind]) + nsi_kind_next[kind] = entry->nsi_id + 1; + } + lck_mtx_unlock(net_str_id_lock); + + *out_id = entry->nsi_id; + + return 0; +} + + +#if NETMIBS + +#define ROUNDUP32(a) \ + ((a) > 0 ? (1 + (((a) - 1) | (sizeof(uint32_t) - 1))) : sizeof(uint32_t)) + +int +sysctl_if_family_ids SYSCTL_HANDLER_ARGS /* XXX bad syntax! */ +{ +#pragma unused(oidp) +#pragma unused(arg1) +#pragma unused(arg2) + errno_t error = 0; + struct net_str_id_entry *entry = NULL; + struct if_family_id *iffmid = NULL; + size_t max_size = 0; + + lck_mtx_lock(net_str_id_lock); + SLIST_FOREACH(entry, &net_str_id_list, nsi_next) { + size_t str_size; + size_t iffmid_size; + + if ((entry->nsi_flags & (1 << NSI_IF_FAM_ID)) == 0) + continue; + + str_size = strlen(entry->nsi_string) + 1; + iffmid_size = ROUNDUP32(offsetof(struct net_str_id_entry, nsi_string) + str_size); + + if (iffmid_size > max_size) { + if (iffmid) + _FREE(iffmid, M_TEMP); + iffmid = _MALLOC(iffmid_size, M_TEMP, M_WAITOK); + if (iffmid == NULL) { + lck_mtx_unlock(net_str_id_lock); + error = ENOMEM; + goto done; + } + max_size = iffmid_size; + } + + bzero(iffmid, iffmid_size); + iffmid->iffmid_len = iffmid_size; + iffmid->iffmid_id = entry->nsi_id; + strlcpy(iffmid->iffmid_str, entry->nsi_string, str_size); + error = SYSCTL_OUT(req, iffmid, iffmid_size); + if (error) { + lck_mtx_unlock(net_str_id_lock); + goto done; + } + + } + lck_mtx_unlock(net_str_id_lock); + +done: + if (iffmid) + _FREE(iffmid, M_TEMP); + return error; +} + +#endif /* NETMIBS */ + diff --git a/libsa/misc.c b/bsd/net/net_str_id.h similarity index 64% rename from libsa/misc.c rename to bsd/net/net_str_id.h index f158145b4..35deea089 100644 --- a/libsa/misc.c +++ b/bsd/net/net_str_id.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2008 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -25,28 +25,37 @@ * * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ */ -#include -#include -#include -#include -__private_extern__ -const char *kld_basefile_name = "(memory-resident kernel)"; +#ifndef __NSI_STR_ID__ +#define __NSI_STR_ID__ +#ifdef KERNEL_PRIVATE -/* from osfmk/kern/printf.c */ -extern void _doprnt( - register const char *fmt, - va_list *argp, - void (*putc)(char), - int radix); +#include +#include +#include -/* from osfmk/kern/printf.c */ -extern void conslog_putc(char c); +struct net_str_id_entry { + SLIST_ENTRY(net_str_id_entry) nsi_next; + u_int32_t nsi_flags; + u_int32_t nsi_id; + char nsi_string[1]; /* variable lenght string */ +}; -__private_extern__ -void kld_error_vprintf(const char *format, va_list ap) { - _doprnt(format, &ap, &conslog_putc, 10); - return; -} +enum { + NSI_MBUF_TAG = 0, + NSI_VENDOR_CODE = 1, + NSI_IF_FAM_ID = 2, + NSI_MAX_KIND +}; + +extern void net_str_id_first_last(u_int32_t * , u_int32_t *, u_int32_t); + +extern errno_t net_str_id_find_internal(const char * , u_int32_t *, u_int32_t, int); + +extern void net_str_id_init(void); + +#endif /* KERNEL_PRIVATE */ + +#endif /* __NSI_STR_ID__ */ diff --git a/bsd/net/pf.c b/bsd/net/pf.c new file mode 100644 index 000000000..cbc32f35d --- /dev/null +++ b/bsd/net/pf.c @@ -0,0 +1,9455 @@ +/* + * Copyright (c) 2008 Apple Inc. All rights reserved. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. The rights granted to you under the License + * may not be used to create, or enable the creation or redistribution of, + * unlawful or unlicensed copies of an Apple operating system, or to + * circumvent, violate, or enable the circumvention or violation of, any + * terms of an Apple operating system software license agreement. + * + * Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ + */ + +/* $apfw: pf.c,v 1.37 2008/12/05 23:10:20 jhw Exp $ */ +/* $OpenBSD: pf.c,v 1.567 2008/02/20 23:40:13 henning Exp $ */ + +/* + * Copyright (c) 2001 Daniel Hartmeier + * Copyright (c) 2002,2003 Henning Brauer + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * - Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials provided + * with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + * Effort sponsored in part by the Defense Advanced Research Projects + * Agency (DARPA) and Air Force Research Laboratory, Air Force + * Materiel Command, USAF, under agreement number F30602-01-2-0537. + * + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#include + +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#if NPFSYNC +#include +#endif /* NPFSYNC */ + +#if INET6 +#include +#include +#include +#include +#include +#endif /* INET6 */ + +#ifndef NO_APPLE_EXTENSIONS +#define DPFPRINTF(n, x) (pf_status.debug >= (n) ? printf x : ((void)0)) +#else +#define DPFPRINTF(n, x) if (pf_status.debug >= (n)) printf x +#endif + +/* XXX: should be in header somewhere */ +#define satosin(sa) ((struct sockaddr_in *)(sa)) +#define sintosa(sin) ((struct sockaddr *)(sin)) + +/* + * On Mac OS X, the rtableid value is treated as the interface scope + * value that is equivalent to the interface index used for scoped + * routing. A valid scope value is anything but IFSCOPE_NONE (0), + * as per definition of ifindex which is a positive, non-zero number. + * The other BSDs treat a negative rtableid value as invalid, hence + * the test against INT_MAX to handle userland apps which initialize + * the field with a negative number. + */ +#define PF_RTABLEID_IS_VALID(r) \ + ((r) > IFSCOPE_NONE && (r) <= INT_MAX) + +/* + * Global variables + */ +lck_mtx_t *pf_lock; +lck_rw_t *pf_perim_lock; + +/* state tables */ +struct pf_state_tree_lan_ext pf_statetbl_lan_ext; +struct pf_state_tree_ext_gwy pf_statetbl_ext_gwy; + +struct pf_palist pf_pabuf; +struct pf_status pf_status; + +#if ALTQ +struct pf_altqqueue pf_altqs[2]; +struct pf_altqqueue *pf_altqs_active; +struct pf_altqqueue *pf_altqs_inactive; +u_int32_t ticket_altqs_active; +u_int32_t ticket_altqs_inactive; +int altqs_inactive_open; +#endif /* ALTQ */ +u_int32_t ticket_pabuf; + +static MD5_CTX pf_tcp_secret_ctx; +static u_char pf_tcp_secret[16]; +static int pf_tcp_secret_init; +static int pf_tcp_iss_off; + +static struct pf_anchor_stackframe { + struct pf_ruleset *rs; + struct pf_rule *r; + struct pf_anchor_node *parent; + struct pf_anchor *child; +} pf_anchor_stack[64]; + +struct pool pf_src_tree_pl, pf_rule_pl, pf_pooladdr_pl; +struct pool pf_state_pl, pf_state_key_pl; +#if ALTQ +struct pool pf_altq_pl; +#endif /* ALTQ */ + +#ifndef NO_APPLE_EXTENSIONS +typedef void (*hook_fn_t)(void *); + +struct hook_desc { + TAILQ_ENTRY(hook_desc) hd_list; + hook_fn_t hd_fn; + void *hd_arg; +}; + +#define HOOK_REMOVE 0x01 +#define HOOK_FREE 0x02 +#define HOOK_ABORT 0x04 + +static void *hook_establish(struct hook_desc_head *, int, + hook_fn_t, void *); +static void hook_runloop(struct hook_desc_head *, int flags); + +struct pool pf_app_state_pl; +static void pf_print_addr(struct pf_addr *addr, sa_family_t af); +static void pf_print_sk_host(struct pf_state_host *, u_int8_t, int, + u_int8_t); +#endif + +static void pf_print_host(struct pf_addr *, u_int16_t, u_int8_t); + +static void pf_init_threshold(struct pf_threshold *, u_int32_t, + u_int32_t); +static void pf_add_threshold(struct pf_threshold *); +static int pf_check_threshold(struct pf_threshold *); + +static void pf_change_ap(int, struct mbuf *, struct pf_addr *, + u_int16_t *, u_int16_t *, u_int16_t *, + struct pf_addr *, u_int16_t, u_int8_t, sa_family_t); +static int pf_modulate_sack(struct mbuf *, int, struct pf_pdesc *, + struct tcphdr *, struct pf_state_peer *); +#if INET6 +static void pf_change_a6(struct pf_addr *, u_int16_t *, + struct pf_addr *, u_int8_t); +#endif /* INET6 */ +static void pf_change_icmp(struct pf_addr *, u_int16_t *, + struct pf_addr *, struct pf_addr *, u_int16_t, + u_int16_t *, u_int16_t *, u_int16_t *, + u_int16_t *, u_int8_t, sa_family_t); +static void pf_send_tcp(const struct pf_rule *, sa_family_t, + const struct pf_addr *, const struct pf_addr *, + u_int16_t, u_int16_t, u_int32_t, u_int32_t, + u_int8_t, u_int16_t, u_int16_t, u_int8_t, int, + u_int16_t, struct ether_header *, struct ifnet *); +static void pf_send_icmp(struct mbuf *, u_int8_t, u_int8_t, + sa_family_t, struct pf_rule *); +#ifndef NO_APPLE_EXTENSIONS +static struct pf_rule *pf_match_translation(struct pf_pdesc *, struct mbuf *, + int, int, struct pfi_kif *, struct pf_addr *, + union pf_state_xport *, struct pf_addr *, + union pf_state_xport *, int); +static struct pf_rule *pf_get_translation_aux(struct pf_pdesc *, + struct mbuf *, int, int, struct pfi_kif *, + struct pf_src_node **, struct pf_addr *, + union pf_state_xport *, struct pf_addr *, + union pf_state_xport *, struct pf_addr *, + union pf_state_xport *); +#else +struct pf_rule *pf_match_translation(struct pf_pdesc *, struct mbuf *, + int, int, struct pfi_kif *, + struct pf_addr *, u_int16_t, struct pf_addr *, + u_int16_t, int); +struct pf_rule *pf_get_translation(struct pf_pdesc *, struct mbuf *, + int, int, struct pfi_kif *, struct pf_src_node **, + struct pf_addr *, u_int16_t, + struct pf_addr *, u_int16_t, + struct pf_addr *, u_int16_t *); +#endif +static void pf_attach_state(struct pf_state_key *, + struct pf_state *, int); +static void pf_detach_state(struct pf_state *, int); +static u_int32_t pf_tcp_iss(struct pf_pdesc *); +static int pf_test_rule(struct pf_rule **, struct pf_state **, + int, struct pfi_kif *, struct mbuf *, int, + void *, struct pf_pdesc *, struct pf_rule **, + struct pf_ruleset **, struct ifqueue *); +static int pf_test_fragment(struct pf_rule **, int, + struct pfi_kif *, struct mbuf *, void *, + struct pf_pdesc *, struct pf_rule **, + struct pf_ruleset **); +static int pf_test_state_tcp(struct pf_state **, int, + struct pfi_kif *, struct mbuf *, int, + void *, struct pf_pdesc *, u_short *); +static int pf_test_state_udp(struct pf_state **, int, + struct pfi_kif *, struct mbuf *, int, + void *, struct pf_pdesc *); +static int pf_test_state_icmp(struct pf_state **, int, + struct pfi_kif *, struct mbuf *, int, + void *, struct pf_pdesc *, u_short *); +static int pf_test_state_other(struct pf_state **, int, + struct pfi_kif *, struct pf_pdesc *); +static int pf_match_tag(struct mbuf *, struct pf_rule *, + struct pf_mtag *, int *); +static void pf_step_into_anchor(int *, struct pf_ruleset **, int, + struct pf_rule **, struct pf_rule **, int *); +static int pf_step_out_of_anchor(int *, struct pf_ruleset **, + int, struct pf_rule **, struct pf_rule **, + int *); +static void pf_hash(struct pf_addr *, struct pf_addr *, + struct pf_poolhashkey *, sa_family_t); +static int pf_map_addr(u_int8_t, struct pf_rule *, + struct pf_addr *, struct pf_addr *, + struct pf_addr *, struct pf_src_node **); +#ifndef NO_APPLE_EXTENSIONS +static int pf_get_sport(struct pf_pdesc *, struct pfi_kif *, + struct pf_rule *, struct pf_addr *, + union pf_state_xport *, struct pf_addr *, + union pf_state_xport *, struct pf_addr *, + union pf_state_xport *, struct pf_src_node **); +#else +int pf_get_sport(sa_family_t, u_int8_t, struct pf_rule *, + struct pf_addr *, struct pf_addr *, u_int16_t, + struct pf_addr *, u_int16_t *, u_int16_t, u_int16_t, + struct pf_src_node **); +#endif +static void pf_route(struct mbuf **, struct pf_rule *, int, + struct ifnet *, struct pf_state *, + struct pf_pdesc *); +#if INET6 +static void pf_route6(struct mbuf **, struct pf_rule *, int, + struct ifnet *, struct pf_state *, + struct pf_pdesc *); +#endif /* INET6 */ +static u_int8_t pf_get_wscale(struct mbuf *, int, u_int16_t, + sa_family_t); +static u_int16_t pf_get_mss(struct mbuf *, int, u_int16_t, + sa_family_t); +static u_int16_t pf_calc_mss(struct pf_addr *, sa_family_t, + u_int16_t); +static void pf_set_rt_ifp(struct pf_state *, + struct pf_addr *); +static int pf_check_proto_cksum(struct mbuf *, int, int, + u_int8_t, sa_family_t); +static int pf_addr_wrap_neq(struct pf_addr_wrap *, + struct pf_addr_wrap *); +static struct pf_state *pf_find_state(struct pfi_kif *, + struct pf_state_key_cmp *, u_int); +static int pf_src_connlimit(struct pf_state **); +static void pf_stateins_err(const char *, struct pf_state *, + struct pfi_kif *); +static int pf_check_congestion(struct ifqueue *); + +#ifndef NO_APPLE_EXTENSIONS +#if 0 +static const char *pf_pptp_ctrl_type_name(u_int16_t code); +#endif +static void pf_pptp_handler(struct pf_state *, int, int, + struct pf_pdesc *, struct pfi_kif *); +static void pf_pptp_unlink(struct pf_state *); +static int pf_test_state_grev1(struct pf_state **, int, + struct pfi_kif *, int, struct pf_pdesc *); +static int pf_ike_compare(struct pf_app_state *, + struct pf_app_state *); +static int pf_test_state_esp(struct pf_state **, int, + struct pfi_kif *, int, struct pf_pdesc *); +#endif + +extern struct pool pfr_ktable_pl; +extern struct pool pfr_kentry_pl; +extern int path_mtu_discovery; + +struct pf_pool_limit pf_pool_limits[PF_LIMIT_MAX] = { + { &pf_state_pl, PFSTATE_HIWAT }, + { &pf_app_state_pl, PFAPPSTATE_HIWAT }, + { &pf_src_tree_pl, PFSNODE_HIWAT }, + { &pf_frent_pl, PFFRAG_FRENT_HIWAT }, + { &pfr_ktable_pl, PFR_KTABLE_HIWAT }, + { &pfr_kentry_pl, PFR_KENTRY_HIWAT } +}; + +#ifndef NO_APPLE_EXTENSIONS +struct mbuf * +pf_lazy_makewritable(struct pf_pdesc *pd, struct mbuf *m, int len) +{ + if (pd->lmw < 0) + return (0); + + VERIFY(m == pd->mp); + + if (len > pd->lmw) { + if (m_makewritable(&m, 0, len, M_DONTWAIT)) + len = -1; + pd->lmw = len; + if (len >= 0 && m != pd->mp) { + pd->mp = m; + + switch (pd->af) { + case AF_INET: { + struct ip *h = mtod(m, struct ip *); + pd->src = (struct pf_addr *)&h->ip_src; + pd->dst = (struct pf_addr *)&h->ip_dst; + pd->ip_sum = &h->ip_sum; + break; + } +#if INET6 + case AF_INET6: { + struct ip6_hdr *h = mtod(m, struct ip6_hdr *); + pd->src = (struct pf_addr *)&h->ip6_src; + pd->dst = (struct pf_addr *)&h->ip6_dst; + break; + } +#endif /* INET6 */ + } + } + } + + return (len < 0 ? 0 : m); +} + +static const int * +pf_state_lookup_aux(struct pf_state **state, struct pfi_kif *kif, + int direction, int *action) +{ + if (*state == NULL || (*state)->timeout == PFTM_PURGE) { + *action = PF_DROP; + return (action); + } + + if (direction == PF_OUT && + (((*state)->rule.ptr->rt == PF_ROUTETO && + (*state)->rule.ptr->direction == PF_OUT) || + ((*state)->rule.ptr->rt == PF_REPLYTO && + (*state)->rule.ptr->direction == PF_IN)) && + (*state)->rt_kif != NULL && (*state)->rt_kif != kif) { + *action = PF_PASS; + return (action); + } + + return (0); +} + +#define STATE_LOOKUP() \ + do { \ + int action; \ + *state = pf_find_state(kif, &key, direction); \ + if (pf_state_lookup_aux(state, kif, direction, &action)) \ + return (action); \ + } while (0) + +#define STATE_ADDR_TRANSLATE(sk) \ + (sk)->lan.addr.addr32[0] != (sk)->gwy.addr.addr32[0] || \ + ((sk)->af == AF_INET6 && \ + ((sk)->lan.addr.addr32[1] != (sk)->gwy.addr.addr32[1] || \ + (sk)->lan.addr.addr32[2] != (sk)->gwy.addr.addr32[2] || \ + (sk)->lan.addr.addr32[3] != (sk)->gwy.addr.addr32[3])) + +#define STATE_TRANSLATE(sk) \ + (STATE_ADDR_TRANSLATE(sk) || \ + (sk)->lan.xport.port != (sk)->gwy.xport.port) + +#define STATE_GRE_TRANSLATE(sk) \ + (STATE_ADDR_TRANSLATE(sk) || \ + (sk)->lan.xport.call_id != (sk)->gwy.xport.call_id) + +#else +#define STATE_LOOKUP() \ + do { \ + *state = pf_find_state(kif, &key, direction); \ + if (*state == NULL || (*state)->timeout == PFTM_PURGE) \ + return (PF_DROP); \ + if (direction == PF_OUT && \ + (((*state)->rule.ptr->rt == PF_ROUTETO && \ + (*state)->rule.ptr->direction == PF_OUT) || \ + ((*state)->rule.ptr->rt == PF_REPLYTO && \ + (*state)->rule.ptr->direction == PF_IN)) && \ + (*state)->rt_kif != NULL && \ + (*state)->rt_kif != kif) \ + return (PF_PASS); \ + } while (0) + +#define STATE_TRANSLATE(sk) \ + (sk)->lan.addr.addr32[0] != (sk)->gwy.addr.addr32[0] || \ + ((sk)->af == AF_INET6 && \ + ((sk)->lan.addr.addr32[1] != (sk)->gwy.addr.addr32[1] || \ + (sk)->lan.addr.addr32[2] != (sk)->gwy.addr.addr32[2] || \ + (sk)->lan.addr.addr32[3] != (sk)->gwy.addr.addr32[3])) || \ + (sk)->lan.port != (sk)->gwy.port +#endif + +#define BOUND_IFACE(r, k) \ + ((r)->rule_flag & PFRULE_IFBOUND) ? (k) : pfi_all + +#define STATE_INC_COUNTERS(s) \ + do { \ + s->rule.ptr->states++; \ + if (s->anchor.ptr != NULL) \ + s->anchor.ptr->states++; \ + if (s->nat_rule.ptr != NULL) \ + s->nat_rule.ptr->states++; \ + } while (0) + +#define STATE_DEC_COUNTERS(s) \ + do { \ + if (s->nat_rule.ptr != NULL) \ + s->nat_rule.ptr->states--; \ + if (s->anchor.ptr != NULL) \ + s->anchor.ptr->states--; \ + s->rule.ptr->states--; \ + } while (0) + +static __inline int pf_src_compare(struct pf_src_node *, struct pf_src_node *); +static __inline int pf_state_compare_lan_ext(struct pf_state_key *, + struct pf_state_key *); +static __inline int pf_state_compare_ext_gwy(struct pf_state_key *, + struct pf_state_key *); +static __inline int pf_state_compare_id(struct pf_state *, + struct pf_state *); + +struct pf_src_tree tree_src_tracking; + +struct pf_state_tree_id tree_id; +struct pf_state_queue state_list; + +RB_GENERATE(pf_src_tree, pf_src_node, entry, pf_src_compare); +RB_GENERATE(pf_state_tree_lan_ext, pf_state_key, + entry_lan_ext, pf_state_compare_lan_ext); +RB_GENERATE(pf_state_tree_ext_gwy, pf_state_key, + entry_ext_gwy, pf_state_compare_ext_gwy); +RB_GENERATE(pf_state_tree_id, pf_state, + entry_id, pf_state_compare_id); + +#define PF_DT_SKIP_LANEXT 0x01 +#define PF_DT_SKIP_EXTGWY 0x02 + +#ifndef NO_APPLE_EXTENSIONS +static const u_int16_t PF_PPTP_PORT = htons(1723); +static const u_int32_t PF_PPTP_MAGIC_NUMBER = htonl(0x1A2B3C4D); + +struct pf_pptp_hdr { + u_int16_t length; + u_int16_t type; + u_int32_t magic; +}; + +struct pf_pptp_ctrl_hdr { + u_int16_t type; + u_int16_t reserved_0; +}; + +struct pf_pptp_ctrl_generic { + u_int16_t data[0]; +}; + +#define PF_PPTP_CTRL_TYPE_START_REQ 1 +struct pf_pptp_ctrl_start_req { + u_int16_t protocol_version; + u_int16_t reserved_1; + u_int32_t framing_capabilities; + u_int32_t bearer_capabilities; + u_int16_t maximum_channels; + u_int16_t firmware_revision; + u_int8_t host_name[64]; + u_int8_t vendor_string[64]; +}; + +#define PF_PPTP_CTRL_TYPE_START_RPY 2 +struct pf_pptp_ctrl_start_rpy { + u_int16_t protocol_version; + u_int8_t result_code; + u_int8_t error_code; + u_int32_t framing_capabilities; + u_int32_t bearer_capabilities; + u_int16_t maximum_channels; + u_int16_t firmware_revision; + u_int8_t host_name[64]; + u_int8_t vendor_string[64]; +}; + +#define PF_PPTP_CTRL_TYPE_STOP_REQ 3 +struct pf_pptp_ctrl_stop_req { + u_int8_t reason; + u_int8_t reserved_1; + u_int16_t reserved_2; +}; + +#define PF_PPTP_CTRL_TYPE_STOP_RPY 4 +struct pf_pptp_ctrl_stop_rpy { + u_int8_t reason; + u_int8_t error_code; + u_int16_t reserved_1; +}; + +#define PF_PPTP_CTRL_TYPE_ECHO_REQ 5 +struct pf_pptp_ctrl_echo_req { + u_int32_t identifier; +}; + +#define PF_PPTP_CTRL_TYPE_ECHO_RPY 6 +struct pf_pptp_ctrl_echo_rpy { + u_int32_t identifier; + u_int8_t result_code; + u_int8_t error_code; + u_int16_t reserved_1; +}; + +#define PF_PPTP_CTRL_TYPE_CALL_OUT_REQ 7 +struct pf_pptp_ctrl_call_out_req { + u_int16_t call_id; + u_int16_t call_sernum; + u_int32_t min_bps; + u_int32_t bearer_type; + u_int32_t framing_type; + u_int16_t rxwindow_size; + u_int16_t proc_delay; + u_int8_t phone_num[64]; + u_int8_t sub_addr[64]; +}; + +#define PF_PPTP_CTRL_TYPE_CALL_OUT_RPY 8 +struct pf_pptp_ctrl_call_out_rpy { + u_int16_t call_id; + u_int16_t peer_call_id; + u_int8_t result_code; + u_int8_t error_code; + u_int16_t cause_code; + u_int32_t connect_speed; + u_int16_t rxwindow_size; + u_int16_t proc_delay; + u_int32_t phy_channel_id; +}; + +#define PF_PPTP_CTRL_TYPE_CALL_IN_1ST 9 +struct pf_pptp_ctrl_call_in_1st { + u_int16_t call_id; + u_int16_t call_sernum; + u_int32_t bearer_type; + u_int32_t phy_channel_id; + u_int16_t dialed_number_len; + u_int16_t dialing_number_len; + u_int8_t dialed_num[64]; + u_int8_t dialing_num[64]; + u_int8_t sub_addr[64]; +}; + +#define PF_PPTP_CTRL_TYPE_CALL_IN_2ND 10 +struct pf_pptp_ctrl_call_in_2nd { + u_int16_t call_id; + u_int16_t peer_call_id; + u_int8_t result_code; + u_int8_t error_code; + u_int16_t rxwindow_size; + u_int16_t txdelay; + u_int16_t reserved_1; +}; + +#define PF_PPTP_CTRL_TYPE_CALL_IN_3RD 11 +struct pf_pptp_ctrl_call_in_3rd { + u_int16_t call_id; + u_int16_t reserved_1; + u_int32_t connect_speed; + u_int16_t rxwindow_size; + u_int16_t txdelay; + u_int32_t framing_type; +}; + +#define PF_PPTP_CTRL_TYPE_CALL_CLR 12 +struct pf_pptp_ctrl_call_clr { + u_int16_t call_id; + u_int16_t reserved_1; +}; + +#define PF_PPTP_CTRL_TYPE_CALL_DISC 13 +struct pf_pptp_ctrl_call_disc { + u_int16_t call_id; + u_int8_t result_code; + u_int8_t error_code; + u_int16_t cause_code; + u_int16_t reserved_1; + u_int8_t statistics[128]; +}; + +#define PF_PPTP_CTRL_TYPE_ERROR 14 +struct pf_pptp_ctrl_error { + u_int16_t peer_call_id; + u_int16_t reserved_1; + u_int32_t crc_errors; + u_int32_t fr_errors; + u_int32_t hw_errors; + u_int32_t buf_errors; + u_int32_t tim_errors; + u_int32_t align_errors; +}; + +#define PF_PPTP_CTRL_TYPE_SET_LINKINFO 15 +struct pf_pptp_ctrl_set_linkinfo { + u_int16_t peer_call_id; + u_int16_t reserved_1; + u_int32_t tx_accm; + u_int32_t rx_accm; +}; + +#if 0 +static const char *pf_pptp_ctrl_type_name(u_int16_t code) +{ + code = ntohs(code); + + if (code < PF_PPTP_CTRL_TYPE_START_REQ || + code > PF_PPTP_CTRL_TYPE_SET_LINKINFO) { + static char reserved[] = "reserved-00"; + + sprintf(&reserved[9], "%02x", code); + return (reserved); + } else { + static const char *name[] = { + "start_req", "start_rpy", "stop_req", "stop_rpy", + "echo_req", "echo_rpy", "call_out_req", "call_out_rpy", + "call_in_1st", "call_in_2nd", "call_in_3rd", + "call_clr", "call_disc", "error", "set_linkinfo" + }; + + return (name[code - 1]); + } +}; +#endif + +static const size_t PF_PPTP_CTRL_MSG_MINSIZE = + sizeof (struct pf_pptp_hdr) + + sizeof (struct pf_pptp_ctrl_hdr) + + MIN(sizeof (struct pf_pptp_ctrl_start_req), + MIN(sizeof (struct pf_pptp_ctrl_start_rpy), + MIN(sizeof (struct pf_pptp_ctrl_stop_req), + MIN(sizeof (struct pf_pptp_ctrl_stop_rpy), + MIN(sizeof (struct pf_pptp_ctrl_echo_req), + MIN(sizeof (struct pf_pptp_ctrl_echo_rpy), + MIN(sizeof (struct pf_pptp_ctrl_call_out_req), + MIN(sizeof (struct pf_pptp_ctrl_call_out_rpy), + MIN(sizeof (struct pf_pptp_ctrl_call_in_1st), + MIN(sizeof (struct pf_pptp_ctrl_call_in_2nd), + MIN(sizeof (struct pf_pptp_ctrl_call_in_3rd), + MIN(sizeof (struct pf_pptp_ctrl_call_clr), + MIN(sizeof (struct pf_pptp_ctrl_call_disc), + MIN(sizeof (struct pf_pptp_ctrl_error), + sizeof (struct pf_pptp_ctrl_set_linkinfo) + )))))))))))))); + +union pf_pptp_ctrl_msg_union { + struct pf_pptp_ctrl_start_req start_req; + struct pf_pptp_ctrl_start_rpy start_rpy; + struct pf_pptp_ctrl_stop_req stop_req; + struct pf_pptp_ctrl_stop_rpy stop_rpy; + struct pf_pptp_ctrl_echo_req echo_req; + struct pf_pptp_ctrl_echo_rpy echo_rpy; + struct pf_pptp_ctrl_call_out_req call_out_req; + struct pf_pptp_ctrl_call_out_rpy call_out_rpy; + struct pf_pptp_ctrl_call_in_1st call_in_1st; + struct pf_pptp_ctrl_call_in_2nd call_in_2nd; + struct pf_pptp_ctrl_call_in_3rd call_in_3rd; + struct pf_pptp_ctrl_call_clr call_clr; + struct pf_pptp_ctrl_call_disc call_disc; + struct pf_pptp_ctrl_error error; + struct pf_pptp_ctrl_set_linkinfo set_linkinfo; + u_int8_t data[0]; +}; + +struct pf_pptp_ctrl_msg { + struct pf_pptp_hdr hdr; + struct pf_pptp_ctrl_hdr ctrl; + union pf_pptp_ctrl_msg_union msg; +}; + +#define PF_GRE_FLAG_CHECKSUM_PRESENT 0x8000 +#define PF_GRE_FLAG_VERSION_MASK 0x0007 +#define PF_GRE_PPP_ETHERTYPE 0x880B + +struct pf_grev1_hdr { + u_int16_t flags; + u_int16_t protocol_type; + u_int16_t payload_length; + u_int16_t call_id; + /* + u_int32_t seqno; + u_int32_t ackno; + */ +}; + +static const u_int16_t PF_IKE_PORT = htons(500); + +struct pf_ike_hdr { + u_int64_t initiator_cookie, responder_cookie; + u_int8_t next_payload, version, exchange_type, flags; + u_int32_t message_id, length; +}; + +#define PF_IKE_PACKET_MINSIZE (sizeof (struct pf_ike_hdr)) + +#define PF_IKEv1_EXCHTYPE_BASE 1 +#define PF_IKEv1_EXCHTYPE_ID_PROTECT 2 +#define PF_IKEv1_EXCHTYPE_AUTH_ONLY 3 +#define PF_IKEv1_EXCHTYPE_AGGRESSIVE 4 +#define PF_IKEv1_EXCHTYPE_INFORMATIONAL 5 +#define PF_IKEv2_EXCHTYPE_SA_INIT 34 +#define PF_IKEv2_EXCHTYPE_AUTH 35 +#define PF_IKEv2_EXCHTYPE_CREATE_CHILD_SA 36 +#define PF_IKEv2_EXCHTYPE_INFORMATIONAL 37 + +#define PF_IKEv1_FLAG_E 0x01 +#define PF_IKEv1_FLAG_C 0x02 +#define PF_IKEv1_FLAG_A 0x04 +#define PF_IKEv2_FLAG_I 0x08 +#define PF_IKEv2_FLAG_V 0x10 +#define PF_IKEv2_FLAG_R 0x20 + +struct pf_esp_hdr { + u_int32_t spi; + u_int32_t seqno; + u_int8_t payload[]; +}; +#endif + +static __inline int +pf_src_compare(struct pf_src_node *a, struct pf_src_node *b) +{ + int diff; + + if (a->rule.ptr > b->rule.ptr) + return (1); + if (a->rule.ptr < b->rule.ptr) + return (-1); + if ((diff = a->af - b->af) != 0) + return (diff); + switch (a->af) { +#if INET + case AF_INET: + if (a->addr.addr32[0] > b->addr.addr32[0]) + return (1); + if (a->addr.addr32[0] < b->addr.addr32[0]) + return (-1); + break; +#endif /* INET */ +#if INET6 + case AF_INET6: + if (a->addr.addr32[3] > b->addr.addr32[3]) + return (1); + if (a->addr.addr32[3] < b->addr.addr32[3]) + return (-1); + if (a->addr.addr32[2] > b->addr.addr32[2]) + return (1); + if (a->addr.addr32[2] < b->addr.addr32[2]) + return (-1); + if (a->addr.addr32[1] > b->addr.addr32[1]) + return (1); + if (a->addr.addr32[1] < b->addr.addr32[1]) + return (-1); + if (a->addr.addr32[0] > b->addr.addr32[0]) + return (1); + if (a->addr.addr32[0] < b->addr.addr32[0]) + return (-1); + break; +#endif /* INET6 */ + } + return (0); +} + +static __inline int +pf_state_compare_lan_ext(struct pf_state_key *a, struct pf_state_key *b) +{ + int diff; +#ifndef NO_APPLE_EXTENSIONS + int extfilter; +#endif + + if ((diff = a->proto - b->proto) != 0) + return (diff); + if ((diff = a->af - b->af) != 0) + return (diff); + +#ifndef NO_APPLE_EXTENSIONS + extfilter = PF_EXTFILTER_APD; + + switch (a->proto) { + case IPPROTO_ICMP: + case IPPROTO_ICMPV6: + if ((diff = a->lan.xport.port - b->lan.xport.port) != 0) + return (diff); + break; + + case IPPROTO_TCP: + if ((diff = a->lan.xport.port - b->lan.xport.port) != 0) + return (diff); + if ((diff = a->ext.xport.port - b->ext.xport.port) != 0) + return (diff); + break; + + case IPPROTO_UDP: + if ((diff = a->proto_variant - b->proto_variant)) + return (diff); + extfilter = a->proto_variant; + if ((diff = a->lan.xport.port - b->lan.xport.port) != 0) + return (diff); + if ((extfilter < PF_EXTFILTER_AD) && + (diff = a->ext.xport.port - b->ext.xport.port) != 0) + return (diff); + break; + + case IPPROTO_GRE: + if (a->proto_variant == PF_GRE_PPTP_VARIANT && + a->proto_variant == b->proto_variant) { + if (!!(diff = a->ext.xport.call_id - + b->ext.xport.call_id)) + return (diff); + } + break; + + case IPPROTO_ESP: + if (!!(diff = a->ext.xport.spi - b->ext.xport.spi)) + return (diff); + break; + + default: + break; + } +#endif + + switch (a->af) { +#if INET + case AF_INET: + if (a->lan.addr.addr32[0] > b->lan.addr.addr32[0]) + return (1); + if (a->lan.addr.addr32[0] < b->lan.addr.addr32[0]) + return (-1); +#ifndef NO_APPLE_EXTENSIONS + if (extfilter < PF_EXTFILTER_EI) { + if (a->ext.addr.addr32[0] > b->ext.addr.addr32[0]) + return (1); + if (a->ext.addr.addr32[0] < b->ext.addr.addr32[0]) + return (-1); + } +#else + if (a->ext.addr.addr32[0] > b->ext.addr.addr32[0]) + return (1); + if (a->ext.addr.addr32[0] < b->ext.addr.addr32[0]) + return (-1); +#endif + break; +#endif /* INET */ +#if INET6 + case AF_INET6: +#ifndef NO_APPLE_EXTENSIONS + if (a->lan.addr.addr32[3] > b->lan.addr.addr32[3]) + return (1); + if (a->lan.addr.addr32[3] < b->lan.addr.addr32[3]) + return (-1); + if (a->lan.addr.addr32[2] > b->lan.addr.addr32[2]) + return (1); + if (a->lan.addr.addr32[2] < b->lan.addr.addr32[2]) + return (-1); + if (a->lan.addr.addr32[1] > b->lan.addr.addr32[1]) + return (1); + if (a->lan.addr.addr32[1] < b->lan.addr.addr32[1]) + return (-1); + if (a->lan.addr.addr32[0] > b->lan.addr.addr32[0]) + return (1); + if (a->lan.addr.addr32[0] < b->lan.addr.addr32[0]) + return (-1); + if (extfilter < PF_EXTFILTER_EI || + !PF_AZERO(&b->ext.addr, AF_INET6)) { + if (a->ext.addr.addr32[3] > b->ext.addr.addr32[3]) + return (1); + if (a->ext.addr.addr32[3] < b->ext.addr.addr32[3]) + return (-1); + if (a->ext.addr.addr32[2] > b->ext.addr.addr32[2]) + return (1); + if (a->ext.addr.addr32[2] < b->ext.addr.addr32[2]) + return (-1); + if (a->ext.addr.addr32[1] > b->ext.addr.addr32[1]) + return (1); + if (a->ext.addr.addr32[1] < b->ext.addr.addr32[1]) + return (-1); + if (a->ext.addr.addr32[0] > b->ext.addr.addr32[0]) + return (1); + if (a->ext.addr.addr32[0] < b->ext.addr.addr32[0]) + return (-1); + } +#else + if (a->lan.addr.addr32[3] > b->lan.addr.addr32[3]) + return (1); + if (a->lan.addr.addr32[3] < b->lan.addr.addr32[3]) + return (-1); + if (a->ext.addr.addr32[3] > b->ext.addr.addr32[3]) + return (1); + if (a->ext.addr.addr32[3] < b->ext.addr.addr32[3]) + return (-1); + if (a->lan.addr.addr32[2] > b->lan.addr.addr32[2]) + return (1); + if (a->lan.addr.addr32[2] < b->lan.addr.addr32[2]) + return (-1); + if (a->ext.addr.addr32[2] > b->ext.addr.addr32[2]) + return (1); + if (a->ext.addr.addr32[2] < b->ext.addr.addr32[2]) + return (-1); + if (a->lan.addr.addr32[1] > b->lan.addr.addr32[1]) + return (1); + if (a->lan.addr.addr32[1] < b->lan.addr.addr32[1]) + return (-1); + if (a->ext.addr.addr32[1] > b->ext.addr.addr32[1]) + return (1); + if (a->ext.addr.addr32[1] < b->ext.addr.addr32[1]) + return (-1); + if (a->lan.addr.addr32[0] > b->lan.addr.addr32[0]) + return (1); + if (a->lan.addr.addr32[0] < b->lan.addr.addr32[0]) + return (-1); + if (a->ext.addr.addr32[0] > b->ext.addr.addr32[0]) + return (1); + if (a->ext.addr.addr32[0] < b->ext.addr.addr32[0]) + return (-1); +#endif + break; +#endif /* INET6 */ + } + +#ifndef NO_APPLE_EXTENSIONS + if (a->app_state && b->app_state) { + if (a->app_state->compare_lan_ext && + b->app_state->compare_lan_ext) { + diff = (const char *)b->app_state->compare_lan_ext - + (const char *)a->app_state->compare_lan_ext; + if (diff != 0) + return (diff); + diff = a->app_state->compare_lan_ext(a->app_state, + b->app_state); + if (diff != 0) + return (diff); + } + } +#else + if ((diff = a->lan.port - b->lan.port) != 0) + return (diff); + if ((diff = a->ext.port - b->ext.port) != 0) + return (diff); +#endif + + return (0); +} + +static __inline int +pf_state_compare_ext_gwy(struct pf_state_key *a, struct pf_state_key *b) +{ + int diff; +#ifndef NO_APPLE_EXTENSIONS + int extfilter; +#endif + + if ((diff = a->proto - b->proto) != 0) + return (diff); + + if ((diff = a->af - b->af) != 0) + return (diff); + +#ifndef NO_APPLE_EXTENSIONS + extfilter = PF_EXTFILTER_APD; + + switch (a->proto) { + case IPPROTO_ICMP: + case IPPROTO_ICMPV6: + if ((diff = a->gwy.xport.port - b->gwy.xport.port) != 0) + return (diff); + break; + + case IPPROTO_TCP: + if ((diff = a->ext.xport.port - b->ext.xport.port) != 0) + return (diff); + if ((diff = a->gwy.xport.port - b->gwy.xport.port) != 0) + return (diff); + break; + + case IPPROTO_UDP: + if ((diff = a->proto_variant - b->proto_variant)) + return (diff); + extfilter = a->proto_variant; + if ((diff = a->gwy.xport.port - b->gwy.xport.port) != 0) + return (diff); + if ((extfilter < PF_EXTFILTER_AD) && + (diff = a->ext.xport.port - b->ext.xport.port) != 0) + return (diff); + break; + + case IPPROTO_GRE: + if (a->proto_variant == PF_GRE_PPTP_VARIANT && + a->proto_variant == b->proto_variant) { + if (!!(diff = a->gwy.xport.call_id - + b->gwy.xport.call_id)) + return (diff); + } + break; + + case IPPROTO_ESP: + if (!!(diff = a->gwy.xport.spi - b->gwy.xport.spi)) + return (diff); + break; + + default: + break; + } +#endif + + switch (a->af) { +#if INET + case AF_INET: +#ifndef NO_APPLE_EXTENSIONS + if (a->gwy.addr.addr32[0] > b->gwy.addr.addr32[0]) + return (1); + if (a->gwy.addr.addr32[0] < b->gwy.addr.addr32[0]) + return (-1); + if (extfilter < PF_EXTFILTER_EI) { + if (a->ext.addr.addr32[0] > b->ext.addr.addr32[0]) + return (1); + if (a->ext.addr.addr32[0] < b->ext.addr.addr32[0]) + return (-1); + } +#else + if (a->ext.addr.addr32[0] > b->ext.addr.addr32[0]) + return (1); + if (a->ext.addr.addr32[0] < b->ext.addr.addr32[0]) + return (-1); + if (a->gwy.addr.addr32[0] > b->gwy.addr.addr32[0]) + return (1); + if (a->gwy.addr.addr32[0] < b->gwy.addr.addr32[0]) + return (-1); +#endif + break; +#endif /* INET */ +#if INET6 + case AF_INET6: +#ifndef NO_APPLE_EXTENSIONS + if (a->gwy.addr.addr32[3] > b->gwy.addr.addr32[3]) + return (1); + if (a->gwy.addr.addr32[3] < b->gwy.addr.addr32[3]) + return (-1); + if (a->gwy.addr.addr32[2] > b->gwy.addr.addr32[2]) + return (1); + if (a->gwy.addr.addr32[2] < b->gwy.addr.addr32[2]) + return (-1); + if (a->gwy.addr.addr32[1] > b->gwy.addr.addr32[1]) + return (1); + if (a->gwy.addr.addr32[1] < b->gwy.addr.addr32[1]) + return (-1); + if (a->gwy.addr.addr32[0] > b->gwy.addr.addr32[0]) + return (1); + if (a->gwy.addr.addr32[0] < b->gwy.addr.addr32[0]) + return (-1); + if (extfilter < PF_EXTFILTER_EI || + !PF_AZERO(&b->ext.addr, AF_INET6)) { + if (a->ext.addr.addr32[3] > b->ext.addr.addr32[3]) + return (1); + if (a->ext.addr.addr32[3] < b->ext.addr.addr32[3]) + return (-1); + if (a->ext.addr.addr32[2] > b->ext.addr.addr32[2]) + return (1); + if (a->ext.addr.addr32[2] < b->ext.addr.addr32[2]) + return (-1); + if (a->ext.addr.addr32[1] > b->ext.addr.addr32[1]) + return (1); + if (a->ext.addr.addr32[1] < b->ext.addr.addr32[1]) + return (-1); + if (a->ext.addr.addr32[0] > b->ext.addr.addr32[0]) + return (1); + if (a->ext.addr.addr32[0] < b->ext.addr.addr32[0]) + return (-1); + } +#else + if (a->ext.addr.addr32[3] > b->ext.addr.addr32[3]) + return (1); + if (a->ext.addr.addr32[3] < b->ext.addr.addr32[3]) + return (-1); + if (a->gwy.addr.addr32[3] > b->gwy.addr.addr32[3]) + return (1); + if (a->gwy.addr.addr32[3] < b->gwy.addr.addr32[3]) + return (-1); + if (a->ext.addr.addr32[2] > b->ext.addr.addr32[2]) + return (1); + if (a->ext.addr.addr32[2] < b->ext.addr.addr32[2]) + return (-1); + if (a->gwy.addr.addr32[2] > b->gwy.addr.addr32[2]) + return (1); + if (a->gwy.addr.addr32[2] < b->gwy.addr.addr32[2]) + return (-1); + if (a->ext.addr.addr32[1] > b->ext.addr.addr32[1]) + return (1); + if (a->ext.addr.addr32[1] < b->ext.addr.addr32[1]) + return (-1); + if (a->gwy.addr.addr32[1] > b->gwy.addr.addr32[1]) + return (1); + if (a->gwy.addr.addr32[1] < b->gwy.addr.addr32[1]) + return (-1); + if (a->ext.addr.addr32[0] > b->ext.addr.addr32[0]) + return (1); + if (a->ext.addr.addr32[0] < b->ext.addr.addr32[0]) + return (-1); + if (a->gwy.addr.addr32[0] > b->gwy.addr.addr32[0]) + return (1); + if (a->gwy.addr.addr32[0] < b->gwy.addr.addr32[0]) + return (-1); +#endif + break; +#endif /* INET6 */ + } + +#ifndef NO_APPLE_EXTENSIONS + if (a->app_state && b->app_state) { + if (a->app_state->compare_ext_gwy && + b->app_state->compare_ext_gwy) { + diff = (const char *)b->app_state->compare_ext_gwy - + (const char *)a->app_state->compare_ext_gwy; + if (diff != 0) + return (diff); + diff = a->app_state->compare_ext_gwy(a->app_state, + b->app_state); + if (diff != 0) + return (diff); + } + } +#else + if ((diff = a->ext.port - b->ext.port) != 0) + return (diff); + if ((diff = a->gwy.port - b->gwy.port) != 0) + return (diff); +#endif + + return (0); +} + +static __inline int +pf_state_compare_id(struct pf_state *a, struct pf_state *b) +{ + if (a->id > b->id) + return (1); + if (a->id < b->id) + return (-1); + if (a->creatorid > b->creatorid) + return (1); + if (a->creatorid < b->creatorid) + return (-1); + + return (0); +} + +#if INET6 +void +pf_addrcpy(struct pf_addr *dst, struct pf_addr *src, sa_family_t af) +{ + switch (af) { +#if INET + case AF_INET: + dst->addr32[0] = src->addr32[0]; + break; +#endif /* INET */ + case AF_INET6: + dst->addr32[0] = src->addr32[0]; + dst->addr32[1] = src->addr32[1]; + dst->addr32[2] = src->addr32[2]; + dst->addr32[3] = src->addr32[3]; + break; + } +} +#endif /* INET6 */ + +struct pf_state * +pf_find_state_byid(struct pf_state_cmp *key) +{ + pf_status.fcounters[FCNT_STATE_SEARCH]++; + + return (RB_FIND(pf_state_tree_id, &tree_id, (struct pf_state *)key)); +} + +static struct pf_state * +pf_find_state(struct pfi_kif *kif, struct pf_state_key_cmp *key, u_int dir) +{ + struct pf_state_key *sk = NULL; + struct pf_state *s; + + pf_status.fcounters[FCNT_STATE_SEARCH]++; + + switch (dir) { + case PF_OUT: + sk = RB_FIND(pf_state_tree_lan_ext, &pf_statetbl_lan_ext, + (struct pf_state_key *)key); + break; + case PF_IN: + sk = RB_FIND(pf_state_tree_ext_gwy, &pf_statetbl_ext_gwy, + (struct pf_state_key *)key); + break; + default: + panic("pf_find_state"); + } + + /* list is sorted, if-bound states before floating ones */ + if (sk != NULL) + TAILQ_FOREACH(s, &sk->states, next) + if (s->kif == pfi_all || s->kif == kif) + return (s); + + return (NULL); +} + +struct pf_state * +pf_find_state_all(struct pf_state_key_cmp *key, u_int dir, int *more) +{ + struct pf_state_key *sk = NULL; + struct pf_state *s, *ret = NULL; + + pf_status.fcounters[FCNT_STATE_SEARCH]++; + + switch (dir) { + case PF_OUT: + sk = RB_FIND(pf_state_tree_lan_ext, + &pf_statetbl_lan_ext, (struct pf_state_key *)key); + break; + case PF_IN: + sk = RB_FIND(pf_state_tree_ext_gwy, + &pf_statetbl_ext_gwy, (struct pf_state_key *)key); + break; + default: + panic("pf_find_state_all"); + } + + if (sk != NULL) { + ret = TAILQ_FIRST(&sk->states); + if (more == NULL) + return (ret); + + TAILQ_FOREACH(s, &sk->states, next) + (*more)++; + } + + return (ret); +} + +static void +pf_init_threshold(struct pf_threshold *threshold, + u_int32_t limit, u_int32_t seconds) +{ + threshold->limit = limit * PF_THRESHOLD_MULT; + threshold->seconds = seconds; + threshold->count = 0; + threshold->last = pf_time_second(); +} + +static void +pf_add_threshold(struct pf_threshold *threshold) +{ + u_int32_t t = pf_time_second(), diff = t - threshold->last; + + if (diff >= threshold->seconds) + threshold->count = 0; + else + threshold->count -= threshold->count * diff / + threshold->seconds; + threshold->count += PF_THRESHOLD_MULT; + threshold->last = t; +} + +static int +pf_check_threshold(struct pf_threshold *threshold) +{ + return (threshold->count > threshold->limit); +} + +static int +pf_src_connlimit(struct pf_state **state) +{ + int bad = 0; + + (*state)->src_node->conn++; + (*state)->src.tcp_est = 1; + pf_add_threshold(&(*state)->src_node->conn_rate); + + if ((*state)->rule.ptr->max_src_conn && + (*state)->rule.ptr->max_src_conn < + (*state)->src_node->conn) { + pf_status.lcounters[LCNT_SRCCONN]++; + bad++; + } + + if ((*state)->rule.ptr->max_src_conn_rate.limit && + pf_check_threshold(&(*state)->src_node->conn_rate)) { + pf_status.lcounters[LCNT_SRCCONNRATE]++; + bad++; + } + + if (!bad) + return (0); + + if ((*state)->rule.ptr->overload_tbl) { + struct pfr_addr p; + u_int32_t killed = 0; + + pf_status.lcounters[LCNT_OVERLOAD_TABLE]++; + if (pf_status.debug >= PF_DEBUG_MISC) { + printf("pf_src_connlimit: blocking address "); + pf_print_host(&(*state)->src_node->addr, 0, + (*state)->state_key->af); + } + + bzero(&p, sizeof (p)); + p.pfra_af = (*state)->state_key->af; + switch ((*state)->state_key->af) { +#if INET + case AF_INET: + p.pfra_net = 32; + p.pfra_ip4addr = (*state)->src_node->addr.v4; + break; +#endif /* INET */ +#if INET6 + case AF_INET6: + p.pfra_net = 128; + p.pfra_ip6addr = (*state)->src_node->addr.v6; + break; +#endif /* INET6 */ + } + + pfr_insert_kentry((*state)->rule.ptr->overload_tbl, + &p, pf_time_second()); + + /* kill existing states if that's required. */ + if ((*state)->rule.ptr->flush) { + struct pf_state_key *sk; + struct pf_state *st; + + pf_status.lcounters[LCNT_OVERLOAD_FLUSH]++; + RB_FOREACH(st, pf_state_tree_id, &tree_id) { + sk = st->state_key; + /* + * Kill states from this source. (Only those + * from the same rule if PF_FLUSH_GLOBAL is not + * set) + */ + if (sk->af == + (*state)->state_key->af && + (((*state)->state_key->direction == + PF_OUT && + PF_AEQ(&(*state)->src_node->addr, + &sk->lan.addr, sk->af)) || + ((*state)->state_key->direction == PF_IN && + PF_AEQ(&(*state)->src_node->addr, + &sk->ext.addr, sk->af))) && + ((*state)->rule.ptr->flush & + PF_FLUSH_GLOBAL || + (*state)->rule.ptr == st->rule.ptr)) { + st->timeout = PFTM_PURGE; + st->src.state = st->dst.state = + TCPS_CLOSED; + killed++; + } + } + if (pf_status.debug >= PF_DEBUG_MISC) + printf(", %u states killed", killed); + } + if (pf_status.debug >= PF_DEBUG_MISC) + printf("\n"); + } + + /* kill this state */ + (*state)->timeout = PFTM_PURGE; + (*state)->src.state = (*state)->dst.state = TCPS_CLOSED; + return (1); +} + +int +pf_insert_src_node(struct pf_src_node **sn, struct pf_rule *rule, + struct pf_addr *src, sa_family_t af) +{ + struct pf_src_node k; + + if (*sn == NULL) { + k.af = af; + PF_ACPY(&k.addr, src, af); + if (rule->rule_flag & PFRULE_RULESRCTRACK || + rule->rpool.opts & PF_POOL_STICKYADDR) + k.rule.ptr = rule; + else + k.rule.ptr = NULL; + pf_status.scounters[SCNT_SRC_NODE_SEARCH]++; + *sn = RB_FIND(pf_src_tree, &tree_src_tracking, &k); + } + if (*sn == NULL) { + if (!rule->max_src_nodes || + rule->src_nodes < rule->max_src_nodes) + (*sn) = pool_get(&pf_src_tree_pl, PR_WAITOK); + else + pf_status.lcounters[LCNT_SRCNODES]++; + if ((*sn) == NULL) + return (-1); + bzero(*sn, sizeof (struct pf_src_node)); + + pf_init_threshold(&(*sn)->conn_rate, + rule->max_src_conn_rate.limit, + rule->max_src_conn_rate.seconds); + + (*sn)->af = af; + if (rule->rule_flag & PFRULE_RULESRCTRACK || + rule->rpool.opts & PF_POOL_STICKYADDR) + (*sn)->rule.ptr = rule; + else + (*sn)->rule.ptr = NULL; + PF_ACPY(&(*sn)->addr, src, af); + if (RB_INSERT(pf_src_tree, + &tree_src_tracking, *sn) != NULL) { + if (pf_status.debug >= PF_DEBUG_MISC) { + printf("pf: src_tree insert failed: "); + pf_print_host(&(*sn)->addr, 0, af); + printf("\n"); + } + pool_put(&pf_src_tree_pl, *sn); + return (-1); + } + (*sn)->creation = pf_time_second(); + (*sn)->ruletype = rule->action; + if ((*sn)->rule.ptr != NULL) + (*sn)->rule.ptr->src_nodes++; + pf_status.scounters[SCNT_SRC_NODE_INSERT]++; + pf_status.src_nodes++; + } else { + if (rule->max_src_states && + (*sn)->states >= rule->max_src_states) { + pf_status.lcounters[LCNT_SRCSTATES]++; + return (-1); + } + } + return (0); +} + +static void +pf_stateins_err(const char *tree, struct pf_state *s, struct pfi_kif *kif) +{ + struct pf_state_key *sk = s->state_key; + + if (pf_status.debug >= PF_DEBUG_MISC) { +#ifndef NO_APPLE_EXTENSIONS + printf("pf: state insert failed: %s %s ", tree, kif->pfik_name); + switch (sk->proto) { + case IPPROTO_TCP: + printf("TCP"); + break; + case IPPROTO_UDP: + printf("UDP"); + break; + case IPPROTO_ICMP: + printf("ICMP4"); + break; + case IPPROTO_ICMPV6: + printf("ICMP6"); + break; + default: + printf("PROTO=%u", sk->proto); + break; + } + printf(" lan: "); + pf_print_sk_host(&sk->lan, sk->af, sk->proto, + sk->proto_variant); + printf(" gwy: "); + pf_print_sk_host(&sk->gwy, sk->af, sk->proto, + sk->proto_variant); + printf(" ext: "); + pf_print_sk_host(&sk->ext, sk->af, sk->proto, + sk->proto_variant); +#else + printf("pf: state insert failed: %s %s", tree, kif->pfik_name); + printf(" lan: "); + pf_print_host(&sk->lan.addr, sk->lan.port, + sk->af); + printf(" gwy: "); + pf_print_host(&sk->gwy.addr, sk->gwy.port, + sk->af); + printf(" ext: "); + pf_print_host(&sk->ext.addr, sk->ext.port, + sk->af); +#endif + if (s->sync_flags & PFSTATE_FROMSYNC) + printf(" (from sync)"); + printf("\n"); + } +} + +int +pf_insert_state(struct pfi_kif *kif, struct pf_state *s) +{ + struct pf_state_key *cur; + struct pf_state *sp; + + VERIFY(s->state_key != NULL); + s->kif = kif; + + if ((cur = RB_INSERT(pf_state_tree_lan_ext, &pf_statetbl_lan_ext, + s->state_key)) != NULL) { + /* key exists. check for same kif, if none, add to key */ + TAILQ_FOREACH(sp, &cur->states, next) + if (sp->kif == kif) { /* collision! */ + pf_stateins_err("tree_lan_ext", s, kif); + pf_detach_state(s, + PF_DT_SKIP_LANEXT|PF_DT_SKIP_EXTGWY); + return (-1); + } + pf_detach_state(s, PF_DT_SKIP_LANEXT|PF_DT_SKIP_EXTGWY); + pf_attach_state(cur, s, kif == pfi_all ? 1 : 0); + } + + /* if cur != NULL, we already found a state key and attached to it */ + if (cur == NULL && (cur = RB_INSERT(pf_state_tree_ext_gwy, + &pf_statetbl_ext_gwy, s->state_key)) != NULL) { + /* must not happen. we must have found the sk above! */ + pf_stateins_err("tree_ext_gwy", s, kif); + pf_detach_state(s, PF_DT_SKIP_EXTGWY); + return (-1); + } + + if (s->id == 0 && s->creatorid == 0) { + s->id = htobe64(pf_status.stateid++); + s->creatorid = pf_status.hostid; + } + if (RB_INSERT(pf_state_tree_id, &tree_id, s) != NULL) { + if (pf_status.debug >= PF_DEBUG_MISC) { + printf("pf: state insert failed: " + "id: %016llx creatorid: %08x", + be64toh(s->id), ntohl(s->creatorid)); + if (s->sync_flags & PFSTATE_FROMSYNC) + printf(" (from sync)"); + printf("\n"); + } + pf_detach_state(s, 0); + return (-1); + } + TAILQ_INSERT_TAIL(&state_list, s, entry_list); + pf_status.fcounters[FCNT_STATE_INSERT]++; + pf_status.states++; + pfi_kif_ref(kif, PFI_KIF_REF_STATE); +#if NPFSYNC + pfsync_insert_state(s); +#endif + return (0); +} + +void +pf_purge_thread_fn(void *v, wait_result_t w) +{ +#pragma unused(v, w) + u_int32_t nloops = 0; + int t = 0; + + for (;;) { + (void) tsleep(pf_purge_thread_fn, PWAIT, "pftm", t * hz); + + lck_rw_lock_shared(pf_perim_lock); + lck_mtx_lock(pf_lock); + + /* purge everything if not running */ + if (!pf_status.running) { + pf_purge_expired_states(pf_status.states); + pf_purge_expired_fragments(); + pf_purge_expired_src_nodes(); + + /* terminate thread (we don't currently do this) */ + if (pf_purge_thread == NULL) { + lck_mtx_unlock(pf_lock); + lck_rw_done(pf_perim_lock); + + thread_deallocate(current_thread()); + thread_terminate(current_thread()); + /* NOTREACHED */ + return; + } else { + /* if there's nothing left, sleep w/o timeout */ + if (pf_status.states == 0 && + pf_normalize_isempty() && + RB_EMPTY(&tree_src_tracking)) + t = 0; + + lck_mtx_unlock(pf_lock); + lck_rw_done(pf_perim_lock); + continue; + } + } else if (t == 0) { + /* Set timeout to 1 second */ + t = 1; + } + + /* process a fraction of the state table every second */ + pf_purge_expired_states(1 + (pf_status.states + / pf_default_rule.timeout[PFTM_INTERVAL])); + + /* purge other expired types every PFTM_INTERVAL seconds */ + if (++nloops >= pf_default_rule.timeout[PFTM_INTERVAL]) { + pf_purge_expired_fragments(); + pf_purge_expired_src_nodes(); + nloops = 0; + } + + lck_mtx_unlock(pf_lock); + lck_rw_done(pf_perim_lock); + } +} + +u_int64_t +pf_state_expires(const struct pf_state *state) +{ + u_int32_t t; + u_int32_t start; + u_int32_t end; + u_int32_t states; + + lck_mtx_assert(pf_lock, LCK_MTX_ASSERT_OWNED); + + /* handle all PFTM_* > PFTM_MAX here */ + if (state->timeout == PFTM_PURGE) + return (pf_time_second()); + if (state->timeout == PFTM_UNTIL_PACKET) + return (0); + VERIFY(state->timeout != PFTM_UNLINKED); + VERIFY(state->timeout < PFTM_MAX); + t = state->rule.ptr->timeout[state->timeout]; + if (!t) + t = pf_default_rule.timeout[state->timeout]; + start = state->rule.ptr->timeout[PFTM_ADAPTIVE_START]; + if (start) { + end = state->rule.ptr->timeout[PFTM_ADAPTIVE_END]; + states = state->rule.ptr->states; + } else { + start = pf_default_rule.timeout[PFTM_ADAPTIVE_START]; + end = pf_default_rule.timeout[PFTM_ADAPTIVE_END]; + states = pf_status.states; + } + if (end && states > start && start < end) { + if (states < end) + return (state->expire + t * (end - states) / + (end - start)); + else + return (pf_time_second()); + } + return (state->expire + t); +} + +void +pf_purge_expired_src_nodes(void) +{ + struct pf_src_node *cur, *next; + + lck_mtx_assert(pf_lock, LCK_MTX_ASSERT_OWNED); + + for (cur = RB_MIN(pf_src_tree, &tree_src_tracking); cur; cur = next) { + next = RB_NEXT(pf_src_tree, &tree_src_tracking, cur); + + if (cur->states <= 0 && cur->expire <= pf_time_second()) { + if (cur->rule.ptr != NULL) { + cur->rule.ptr->src_nodes--; + if (cur->rule.ptr->states <= 0 && + cur->rule.ptr->max_src_nodes <= 0) + pf_rm_rule(NULL, cur->rule.ptr); + } + RB_REMOVE(pf_src_tree, &tree_src_tracking, cur); + pf_status.scounters[SCNT_SRC_NODE_REMOVALS]++; + pf_status.src_nodes--; + pool_put(&pf_src_tree_pl, cur); + } + } +} + +void +pf_src_tree_remove_state(struct pf_state *s) +{ + u_int32_t t; + + lck_mtx_assert(pf_lock, LCK_MTX_ASSERT_OWNED); + + if (s->src_node != NULL) { + if (s->src.tcp_est) + --s->src_node->conn; + if (--s->src_node->states <= 0) { + t = s->rule.ptr->timeout[PFTM_SRC_NODE]; + if (!t) + t = pf_default_rule.timeout[PFTM_SRC_NODE]; + s->src_node->expire = pf_time_second() + t; + } + } + if (s->nat_src_node != s->src_node && s->nat_src_node != NULL) { + if (--s->nat_src_node->states <= 0) { + t = s->rule.ptr->timeout[PFTM_SRC_NODE]; + if (!t) + t = pf_default_rule.timeout[PFTM_SRC_NODE]; + s->nat_src_node->expire = pf_time_second() + t; + } + } + s->src_node = s->nat_src_node = NULL; +} + +void +pf_unlink_state(struct pf_state *cur) +{ + lck_mtx_assert(pf_lock, LCK_MTX_ASSERT_OWNED); + +#ifndef NO_APPLE_EXTENSIONS + if (cur->src.state == PF_TCPS_PROXY_DST) { + pf_send_tcp(cur->rule.ptr, cur->state_key->af, + &cur->state_key->ext.addr, &cur->state_key->lan.addr, + cur->state_key->ext.xport.port, + cur->state_key->lan.xport.port, + cur->src.seqhi, cur->src.seqlo + 1, + TH_RST|TH_ACK, 0, 0, 0, 1, cur->tag, NULL, NULL); + } + + hook_runloop(&cur->unlink_hooks, HOOK_REMOVE|HOOK_FREE); +#else + if (cur->src.state == PF_TCPS_PROXY_DST) { + pf_send_tcp(cur->rule.ptr, cur->state_key->af, + &cur->state_key->ext.addr, &cur->state_key->lan.addr, + cur->state_key->ext.port, cur->state_key->lan.port, + cur->src.seqhi, cur->src.seqlo + 1, + TH_RST|TH_ACK, 0, 0, 0, 1, cur->tag, NULL, NULL); + } +#endif + RB_REMOVE(pf_state_tree_id, &tree_id, cur); +#if NPFSYNC + if (cur->creatorid == pf_status.hostid) + pfsync_delete_state(cur); +#endif + cur->timeout = PFTM_UNLINKED; + pf_src_tree_remove_state(cur); + pf_detach_state(cur, 0); +} + +/* callers should be at splpf and hold the + * write_lock on pf_consistency_lock */ +void +pf_free_state(struct pf_state *cur) +{ + lck_mtx_assert(pf_lock, LCK_MTX_ASSERT_OWNED); +#if NPFSYNC + if (pfsyncif != NULL && + (pfsyncif->sc_bulk_send_next == cur || + pfsyncif->sc_bulk_terminator == cur)) + return; +#endif + VERIFY(cur->timeout == PFTM_UNLINKED); + if (--cur->rule.ptr->states <= 0 && + cur->rule.ptr->src_nodes <= 0) + pf_rm_rule(NULL, cur->rule.ptr); + if (cur->nat_rule.ptr != NULL) + if (--cur->nat_rule.ptr->states <= 0 && + cur->nat_rule.ptr->src_nodes <= 0) + pf_rm_rule(NULL, cur->nat_rule.ptr); + if (cur->anchor.ptr != NULL) + if (--cur->anchor.ptr->states <= 0) + pf_rm_rule(NULL, cur->anchor.ptr); + pf_normalize_tcp_cleanup(cur); + pfi_kif_unref(cur->kif, PFI_KIF_REF_STATE); + TAILQ_REMOVE(&state_list, cur, entry_list); + if (cur->tag) + pf_tag_unref(cur->tag); + pool_put(&pf_state_pl, cur); + pf_status.fcounters[FCNT_STATE_REMOVALS]++; + pf_status.states--; +} + +void +pf_purge_expired_states(u_int32_t maxcheck) +{ + static struct pf_state *cur = NULL; + struct pf_state *next; + + lck_mtx_assert(pf_lock, LCK_MTX_ASSERT_OWNED); + + while (maxcheck--) { + /* wrap to start of list when we hit the end */ + if (cur == NULL) { + cur = TAILQ_FIRST(&state_list); + if (cur == NULL) + break; /* list empty */ + } + + /* get next state, as cur may get deleted */ + next = TAILQ_NEXT(cur, entry_list); + + if (cur->timeout == PFTM_UNLINKED) { + pf_free_state(cur); + } else if (pf_state_expires(cur) <= pf_time_second()) { + /* unlink and free expired state */ + pf_unlink_state(cur); + pf_free_state(cur); + } + cur = next; + } +} + +int +pf_tbladdr_setup(struct pf_ruleset *rs, struct pf_addr_wrap *aw) +{ + lck_mtx_assert(pf_lock, LCK_MTX_ASSERT_OWNED); + + if (aw->type != PF_ADDR_TABLE) + return (0); + if ((aw->p.tbl = pfr_attach_table(rs, aw->v.tblname)) == NULL) + return (1); + return (0); +} + +void +pf_tbladdr_remove(struct pf_addr_wrap *aw) +{ + lck_mtx_assert(pf_lock, LCK_MTX_ASSERT_OWNED); + + if (aw->type != PF_ADDR_TABLE || aw->p.tbl == NULL) + return; + pfr_detach_table(aw->p.tbl); + aw->p.tbl = NULL; +} + +void +pf_tbladdr_copyout(struct pf_addr_wrap *aw) +{ + struct pfr_ktable *kt = aw->p.tbl; + + lck_mtx_assert(pf_lock, LCK_MTX_ASSERT_OWNED); + + if (aw->type != PF_ADDR_TABLE || kt == NULL) + return; + if (!(kt->pfrkt_flags & PFR_TFLAG_ACTIVE) && kt->pfrkt_root != NULL) + kt = kt->pfrkt_root; + aw->p.tbl = NULL; + aw->p.tblcnt = (kt->pfrkt_flags & PFR_TFLAG_ACTIVE) ? + kt->pfrkt_cnt : -1; +} + +#ifndef NO_APPLE_EXTENSIONS +static void +pf_print_addr(struct pf_addr *addr, sa_family_t af) +{ + switch (af) { +#if INET + case AF_INET: { + u_int32_t a = ntohl(addr->addr32[0]); + printf("%u.%u.%u.%u", (a>>24)&255, (a>>16)&255, + (a>>8)&255, a&255); + break; + } +#endif /* INET */ +#if INET6 + case AF_INET6: { + u_int16_t b; + u_int8_t i, curstart = 255, curend = 0, + maxstart = 0, maxend = 0; + for (i = 0; i < 8; i++) { + if (!addr->addr16[i]) { + if (curstart == 255) + curstart = i; + else + curend = i; + } else { + if (curstart) { + if ((curend - curstart) > + (maxend - maxstart)) { + maxstart = curstart; + maxend = curend; + curstart = 255; + } + } + } + } + for (i = 0; i < 8; i++) { + if (i >= maxstart && i <= maxend) { + if (maxend != 7) { + if (i == maxstart) + printf(":"); + } else { + if (i == maxend) + printf(":"); + } + } else { + b = ntohs(addr->addr16[i]); + printf("%x", b); + if (i < 7) + printf(":"); + } + } + break; + } +#endif /* INET6 */ + } +} + +static void +pf_print_sk_host(struct pf_state_host *sh, sa_family_t af, int proto, + u_int8_t proto_variant) +{ + pf_print_addr(&sh->addr, af); + + switch (proto) { + case IPPROTO_ESP: + if (sh->xport.spi) + printf("[%08x]", ntohl(sh->xport.spi)); + break; + + case IPPROTO_GRE: + if (proto_variant == PF_GRE_PPTP_VARIANT) + printf("[%u]", ntohs(sh->xport.call_id)); + break; + + case IPPROTO_TCP: + case IPPROTO_UDP: + printf("[%u]", ntohs(sh->xport.port)); + break; + + default: + break; + } +} +#endif + +static void +pf_print_host(struct pf_addr *addr, u_int16_t p, sa_family_t af) +{ +#ifndef NO_APPLE_EXTENSIONS + pf_print_addr(addr, af); + if (p) + printf("[%u]", ntohs(p)); +#else + switch (af) { +#if INET + case AF_INET: { + u_int32_t a = ntohl(addr->addr32[0]); + printf("%u.%u.%u.%u", (a>>24)&255, (a>>16)&255, + (a>>8)&255, a&255); + if (p) { + p = ntohs(p); + printf(":%u", p); + } + break; + } +#endif /* INET */ +#if INET6 + case AF_INET6: { + u_int16_t b; + u_int8_t i, curstart = 255, curend = 0, + maxstart = 0, maxend = 0; + for (i = 0; i < 8; i++) { + if (!addr->addr16[i]) { + if (curstart == 255) + curstart = i; + else + curend = i; + } else { + if (curstart) { + if ((curend - curstart) > + (maxend - maxstart)) { + maxstart = curstart; + maxend = curend; + curstart = 255; + } + } + } + } + for (i = 0; i < 8; i++) { + if (i >= maxstart && i <= maxend) { + if (maxend != 7) { + if (i == maxstart) + printf(":"); + } else { + if (i == maxend) + printf(":"); + } + } else { + b = ntohs(addr->addr16[i]); + printf("%x", b); + if (i < 7) + printf(":"); + } + } + if (p) { + p = ntohs(p); + printf("[%u]", p); + } + break; + } +#endif /* INET6 */ + } +#endif +} + +void +pf_print_state(struct pf_state *s) +{ + struct pf_state_key *sk = s->state_key; + switch (sk->proto) { +#ifndef NO_APPLE_EXTENSIONS + case IPPROTO_ESP: + printf("ESP "); + break; + case IPPROTO_GRE: + printf("GRE%u ", sk->proto_variant); + break; +#endif + case IPPROTO_TCP: + printf("TCP "); + break; + case IPPROTO_UDP: + printf("UDP "); + break; + case IPPROTO_ICMP: + printf("ICMP "); + break; + case IPPROTO_ICMPV6: + printf("ICMPV6 "); + break; + default: + printf("%u ", sk->proto); + break; + } +#ifndef NO_APPLE_EXTENSIONS + pf_print_sk_host(&sk->lan, sk->af, sk->proto, sk->proto_variant); + printf(" "); + pf_print_sk_host(&sk->gwy, sk->af, sk->proto, sk->proto_variant); + printf(" "); + pf_print_sk_host(&sk->ext, sk->af, sk->proto, sk->proto_variant); +#else + pf_print_host(&sk->lan.addr, sk->lan.port, sk->af); + printf(" "); + pf_print_host(&sk->gwy.addr, sk->gwy.port, sk->af); + printf(" "); + pf_print_host(&sk->ext.addr, sk->ext.port, sk->af); +#endif + printf(" [lo=%u high=%u win=%u modulator=%u", s->src.seqlo, + s->src.seqhi, s->src.max_win, s->src.seqdiff); + if (s->src.wscale && s->dst.wscale) + printf(" wscale=%u", s->src.wscale & PF_WSCALE_MASK); + printf("]"); + printf(" [lo=%u high=%u win=%u modulator=%u", s->dst.seqlo, + s->dst.seqhi, s->dst.max_win, s->dst.seqdiff); + if (s->src.wscale && s->dst.wscale) + printf(" wscale=%u", s->dst.wscale & PF_WSCALE_MASK); + printf("]"); + printf(" %u:%u", s->src.state, s->dst.state); +} + +void +pf_print_flags(u_int8_t f) +{ + if (f) + printf(" "); + if (f & TH_FIN) + printf("F"); + if (f & TH_SYN) + printf("S"); + if (f & TH_RST) + printf("R"); + if (f & TH_PUSH) + printf("P"); + if (f & TH_ACK) + printf("A"); + if (f & TH_URG) + printf("U"); + if (f & TH_ECE) + printf("E"); + if (f & TH_CWR) + printf("W"); +} + +#define PF_SET_SKIP_STEPS(i) \ + do { \ + while (head[i] != cur) { \ + head[i]->skip[i].ptr = cur; \ + head[i] = TAILQ_NEXT(head[i], entries); \ + } \ + } while (0) + +void +pf_calc_skip_steps(struct pf_rulequeue *rules) +{ + struct pf_rule *cur, *prev, *head[PF_SKIP_COUNT]; + int i; + + cur = TAILQ_FIRST(rules); + prev = cur; + for (i = 0; i < PF_SKIP_COUNT; ++i) + head[i] = cur; + while (cur != NULL) { + + if (cur->kif != prev->kif || cur->ifnot != prev->ifnot) + PF_SET_SKIP_STEPS(PF_SKIP_IFP); + if (cur->direction != prev->direction) + PF_SET_SKIP_STEPS(PF_SKIP_DIR); + if (cur->af != prev->af) + PF_SET_SKIP_STEPS(PF_SKIP_AF); + if (cur->proto != prev->proto) + PF_SET_SKIP_STEPS(PF_SKIP_PROTO); + if (cur->src.neg != prev->src.neg || + pf_addr_wrap_neq(&cur->src.addr, &prev->src.addr)) + PF_SET_SKIP_STEPS(PF_SKIP_SRC_ADDR); +#ifndef NO_APPLE_EXTENSIONS + { + union pf_rule_xport *cx = &cur->src.xport; + union pf_rule_xport *px = &prev->src.xport; + + switch (cur->proto) { + case IPPROTO_GRE: + case IPPROTO_ESP: + PF_SET_SKIP_STEPS(PF_SKIP_SRC_PORT); + break; + default: + if (prev->proto == IPPROTO_GRE || + prev->proto == IPPROTO_ESP || + cx->range.op != px->range.op || + cx->range.port[0] != px->range.port[0] || + cx->range.port[1] != px->range.port[1]) + PF_SET_SKIP_STEPS(PF_SKIP_SRC_PORT); + break; + } + } +#else + if (cur->src.port[0] != prev->src.port[0] || + cur->src.port[1] != prev->src.port[1] || + cur->src.port_op != prev->src.port_op) + PF_SET_SKIP_STEPS(PF_SKIP_SRC_PORT); +#endif + if (cur->dst.neg != prev->dst.neg || + pf_addr_wrap_neq(&cur->dst.addr, &prev->dst.addr)) + PF_SET_SKIP_STEPS(PF_SKIP_DST_ADDR); +#ifndef NO_APPLE_EXTENSIONS + { + union pf_rule_xport *cx = &cur->dst.xport; + union pf_rule_xport *px = &prev->dst.xport; + + switch (cur->proto) { + case IPPROTO_GRE: + if (cur->proto != prev->proto || + cx->call_id != px->call_id) + PF_SET_SKIP_STEPS(PF_SKIP_DST_PORT); + break; + case IPPROTO_ESP: + if (cur->proto != prev->proto || + cx->spi != px->spi) + PF_SET_SKIP_STEPS(PF_SKIP_DST_PORT); + break; + default: + if (prev->proto == IPPROTO_GRE || + prev->proto == IPPROTO_ESP || + cx->range.op != px->range.op || + cx->range.port[0] != px->range.port[0] || + cx->range.port[1] != px->range.port[1]) + PF_SET_SKIP_STEPS(PF_SKIP_DST_PORT); + break; + } + } +#else + if (cur->dst.port[0] != prev->dst.port[0] || + cur->dst.port[1] != prev->dst.port[1] || + cur->dst.port_op != prev->dst.port_op) + PF_SET_SKIP_STEPS(PF_SKIP_DST_PORT); +#endif + + prev = cur; + cur = TAILQ_NEXT(cur, entries); + } + for (i = 0; i < PF_SKIP_COUNT; ++i) + PF_SET_SKIP_STEPS(i); +} + +static int +pf_addr_wrap_neq(struct pf_addr_wrap *aw1, struct pf_addr_wrap *aw2) +{ + if (aw1->type != aw2->type) + return (1); + switch (aw1->type) { + case PF_ADDR_ADDRMASK: + case PF_ADDR_RANGE: + if (PF_ANEQ(&aw1->v.a.addr, &aw2->v.a.addr, 0)) + return (1); + if (PF_ANEQ(&aw1->v.a.mask, &aw2->v.a.mask, 0)) + return (1); + return (0); + case PF_ADDR_DYNIFTL: + return (aw1->p.dyn->pfid_kt != aw2->p.dyn->pfid_kt); + case PF_ADDR_NOROUTE: + case PF_ADDR_URPFFAILED: + return (0); + case PF_ADDR_TABLE: + return (aw1->p.tbl != aw2->p.tbl); + case PF_ADDR_RTLABEL: + return (aw1->v.rtlabel != aw2->v.rtlabel); + default: + printf("invalid address type: %d\n", aw1->type); + return (1); + } +} + +u_int16_t +pf_cksum_fixup(u_int16_t cksum, u_int16_t old, u_int16_t new, u_int8_t udp) +{ + u_int32_t l; + + if (udp && !cksum) + return (0); + l = cksum + old - new; + l = (l >> 16) + (l & 0xffff); + l = l & 0xffff; + if (udp && !l) + return (0xffff); + return (l); +} + +static void +pf_change_ap(int dir, struct mbuf *m, struct pf_addr *a, u_int16_t *p, + u_int16_t *ic, u_int16_t *pc, struct pf_addr *an, u_int16_t pn, + u_int8_t u, sa_family_t af) +{ + struct pf_addr ao; + u_int16_t po = *p; + + PF_ACPY(&ao, a, af); + PF_ACPY(a, an, af); + + *p = pn; + + switch (af) { +#if INET + case AF_INET: + *ic = pf_cksum_fixup(pf_cksum_fixup(*ic, + ao.addr16[0], an->addr16[0], 0), + ao.addr16[1], an->addr16[1], 0); + *p = pn; + /* + * If the packet is originated from an ALG on the NAT gateway + * (source address is loopback or local), in which case the + * TCP/UDP checksum field contains the pseudo header checksum + * that's not yet complemented. + */ + if (dir == PF_OUT && m != NULL && + (m->m_flags & M_PKTHDR) && + (m->m_pkthdr.csum_flags & (CSUM_TCP | CSUM_UDP))) { + /* Pseudo-header checksum does not include ports */ + *pc = ~pf_cksum_fixup(pf_cksum_fixup(~*pc, + ao.addr16[0], an->addr16[0], u), + ao.addr16[1], an->addr16[1], u); + } else { + *pc = pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(*pc, + ao.addr16[0], an->addr16[0], u), + ao.addr16[1], an->addr16[1], u), + po, pn, u); + } + break; +#endif /* INET */ +#if INET6 + case AF_INET6: + *pc = pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup( + pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup( + pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(*pc, + ao.addr16[0], an->addr16[0], u), + ao.addr16[1], an->addr16[1], u), + ao.addr16[2], an->addr16[2], u), + ao.addr16[3], an->addr16[3], u), + ao.addr16[4], an->addr16[4], u), + ao.addr16[5], an->addr16[5], u), + ao.addr16[6], an->addr16[6], u), + ao.addr16[7], an->addr16[7], u), + po, pn, u); + break; +#endif /* INET6 */ + } +} + + +/* Changes a u_int32_t. Uses a void * so there are no align restrictions */ +void +pf_change_a(void *a, u_int16_t *c, u_int32_t an, u_int8_t u) +{ + u_int32_t ao; + + memcpy(&ao, a, sizeof (ao)); + memcpy(a, &an, sizeof (u_int32_t)); + *c = pf_cksum_fixup(pf_cksum_fixup(*c, ao / 65536, an / 65536, u), + ao % 65536, an % 65536, u); +} + +#if INET6 +static void +pf_change_a6(struct pf_addr *a, u_int16_t *c, struct pf_addr *an, u_int8_t u) +{ + struct pf_addr ao; + + PF_ACPY(&ao, a, AF_INET6); + PF_ACPY(a, an, AF_INET6); + + *c = pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup( + pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup( + pf_cksum_fixup(pf_cksum_fixup(*c, + ao.addr16[0], an->addr16[0], u), + ao.addr16[1], an->addr16[1], u), + ao.addr16[2], an->addr16[2], u), + ao.addr16[3], an->addr16[3], u), + ao.addr16[4], an->addr16[4], u), + ao.addr16[5], an->addr16[5], u), + ao.addr16[6], an->addr16[6], u), + ao.addr16[7], an->addr16[7], u); +} +#endif /* INET6 */ + +static void +pf_change_icmp(struct pf_addr *ia, u_int16_t *ip, struct pf_addr *oa, + struct pf_addr *na, u_int16_t np, u_int16_t *pc, u_int16_t *h2c, + u_int16_t *ic, u_int16_t *hc, u_int8_t u, sa_family_t af) +{ + struct pf_addr oia, ooa; + + PF_ACPY(&oia, ia, af); + PF_ACPY(&ooa, oa, af); + + /* Change inner protocol port, fix inner protocol checksum. */ + if (ip != NULL) { + u_int16_t oip = *ip; + u_int32_t opc = 0; + + if (pc != NULL) + opc = *pc; + *ip = np; + if (pc != NULL) + *pc = pf_cksum_fixup(*pc, oip, *ip, u); + *ic = pf_cksum_fixup(*ic, oip, *ip, 0); + if (pc != NULL) + *ic = pf_cksum_fixup(*ic, opc, *pc, 0); + } + /* Change inner ip address, fix inner ip and icmp checksums. */ + PF_ACPY(ia, na, af); + switch (af) { +#if INET + case AF_INET: { + u_int32_t oh2c = *h2c; + + *h2c = pf_cksum_fixup(pf_cksum_fixup(*h2c, + oia.addr16[0], ia->addr16[0], 0), + oia.addr16[1], ia->addr16[1], 0); + *ic = pf_cksum_fixup(pf_cksum_fixup(*ic, + oia.addr16[0], ia->addr16[0], 0), + oia.addr16[1], ia->addr16[1], 0); + *ic = pf_cksum_fixup(*ic, oh2c, *h2c, 0); + break; + } +#endif /* INET */ +#if INET6 + case AF_INET6: + *ic = pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup( + pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup( + pf_cksum_fixup(pf_cksum_fixup(*ic, + oia.addr16[0], ia->addr16[0], u), + oia.addr16[1], ia->addr16[1], u), + oia.addr16[2], ia->addr16[2], u), + oia.addr16[3], ia->addr16[3], u), + oia.addr16[4], ia->addr16[4], u), + oia.addr16[5], ia->addr16[5], u), + oia.addr16[6], ia->addr16[6], u), + oia.addr16[7], ia->addr16[7], u); + break; +#endif /* INET6 */ + } + /* Change outer ip address, fix outer ip or icmpv6 checksum. */ + PF_ACPY(oa, na, af); + switch (af) { +#if INET + case AF_INET: + *hc = pf_cksum_fixup(pf_cksum_fixup(*hc, + ooa.addr16[0], oa->addr16[0], 0), + ooa.addr16[1], oa->addr16[1], 0); + break; +#endif /* INET */ +#if INET6 + case AF_INET6: + *ic = pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup( + pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup( + pf_cksum_fixup(pf_cksum_fixup(*ic, + ooa.addr16[0], oa->addr16[0], u), + ooa.addr16[1], oa->addr16[1], u), + ooa.addr16[2], oa->addr16[2], u), + ooa.addr16[3], oa->addr16[3], u), + ooa.addr16[4], oa->addr16[4], u), + ooa.addr16[5], oa->addr16[5], u), + ooa.addr16[6], oa->addr16[6], u), + ooa.addr16[7], oa->addr16[7], u); + break; +#endif /* INET6 */ + } +} + + +/* + * Need to modulate the sequence numbers in the TCP SACK option + * (credits to Krzysztof Pfaff for report and patch) + */ +static int +pf_modulate_sack(struct mbuf *m, int off, struct pf_pdesc *pd, + struct tcphdr *th, struct pf_state_peer *dst) +{ + int hlen = (th->th_off << 2) - sizeof (*th), thoptlen = hlen; + u_int8_t opts[MAX_TCPOPTLEN], *opt = opts; + int copyback = 0, i, olen; + struct sackblk sack; + +#define TCPOLEN_SACKLEN (TCPOLEN_SACK + 2) + if (hlen < TCPOLEN_SACKLEN || + !pf_pull_hdr(m, off + sizeof (*th), opts, hlen, NULL, NULL, pd->af)) + return (0); + + while (hlen >= TCPOLEN_SACKLEN) { + olen = opt[1]; + switch (*opt) { + case TCPOPT_EOL: /* FALLTHROUGH */ + case TCPOPT_NOP: + opt++; + hlen--; + break; + case TCPOPT_SACK: + if (olen > hlen) + olen = hlen; + if (olen >= TCPOLEN_SACKLEN) { + for (i = 2; i + TCPOLEN_SACK <= olen; + i += TCPOLEN_SACK) { + memcpy(&sack, &opt[i], sizeof (sack)); + pf_change_a(&sack.start, &th->th_sum, + htonl(ntohl(sack.start) - + dst->seqdiff), 0); + pf_change_a(&sack.end, &th->th_sum, + htonl(ntohl(sack.end) - + dst->seqdiff), 0); + memcpy(&opt[i], &sack, sizeof (sack)); + } +#ifndef NO_APPLE_EXTENSIONS + copyback = off + sizeof (*th) + thoptlen; +#else + copyback = 1; +#endif + } + /* FALLTHROUGH */ + default: + if (olen < 2) + olen = 2; + hlen -= olen; + opt += olen; + } + } + +#ifndef NO_APPLE_EXTENSIONS + if (copyback) { + m = pf_lazy_makewritable(pd, m, copyback); + if (!m) + return (-1); + m_copyback(m, off + sizeof (*th), thoptlen, opts); + } +#else + if (copyback) + m_copyback(m, off + sizeof (*th), thoptlen, opts); +#endif + return (copyback); +} + +static void +pf_send_tcp(const struct pf_rule *r, sa_family_t af, + const struct pf_addr *saddr, const struct pf_addr *daddr, + u_int16_t sport, u_int16_t dport, u_int32_t seq, u_int32_t ack, + u_int8_t flags, u_int16_t win, u_int16_t mss, u_int8_t ttl, int tag, + u_int16_t rtag, struct ether_header *eh, struct ifnet *ifp) +{ +#pragma unused(eh, ifp) + struct mbuf *m; + int len, tlen; +#if INET + struct ip *h = NULL; +#endif /* INET */ +#if INET6 + struct ip6_hdr *h6 = NULL; +#endif /* INET6 */ + struct tcphdr *th = NULL; + char *opt; + struct pf_mtag *pf_mtag; + + /* maximum segment size tcp option */ + tlen = sizeof (struct tcphdr); + if (mss) + tlen += 4; + + switch (af) { +#if INET + case AF_INET: + len = sizeof (struct ip) + tlen; + break; +#endif /* INET */ +#if INET6 + case AF_INET6: + len = sizeof (struct ip6_hdr) + tlen; + break; +#endif /* INET6 */ + default: + panic("pf_send_tcp: not AF_INET or AF_INET6!"); + return; + } + + /* create outgoing mbuf */ + m = m_gethdr(M_DONTWAIT, MT_HEADER); + if (m == NULL) + return; + + if ((pf_mtag = pf_get_mtag(m)) == NULL) { + m_free(m); + return; + } + + if (tag) + pf_mtag->flags |= PF_TAG_GENERATED; + pf_mtag->tag = rtag; + + if (r != NULL && PF_RTABLEID_IS_VALID(r->rtableid)) + pf_mtag->rtableid = r->rtableid; + +#if ALTQ + if (r != NULL && r->qid) { + pf_mtag->qid = r->qid; + /* add hints for ecn */ + pf_mtag->hdr = mtod(m, struct ip *); + } +#endif /* ALTQ */ + m->m_data += max_linkhdr; + m->m_pkthdr.len = m->m_len = len; + m->m_pkthdr.rcvif = NULL; + bzero(m->m_data, len); + switch (af) { +#if INET + case AF_INET: + h = mtod(m, struct ip *); + + /* IP header fields included in the TCP checksum */ + h->ip_p = IPPROTO_TCP; + h->ip_len = htons(tlen); + h->ip_src.s_addr = saddr->v4.s_addr; + h->ip_dst.s_addr = daddr->v4.s_addr; + + th = (struct tcphdr *)((caddr_t)h + sizeof (struct ip)); + break; +#endif /* INET */ +#if INET6 + case AF_INET6: + h6 = mtod(m, struct ip6_hdr *); + + /* IP header fields included in the TCP checksum */ + h6->ip6_nxt = IPPROTO_TCP; + h6->ip6_plen = htons(tlen); + memcpy(&h6->ip6_src, &saddr->v6, sizeof (struct in6_addr)); + memcpy(&h6->ip6_dst, &daddr->v6, sizeof (struct in6_addr)); + + th = (struct tcphdr *)((caddr_t)h6 + sizeof (struct ip6_hdr)); + break; +#endif /* INET6 */ + } + + /* TCP header */ + th->th_sport = sport; + th->th_dport = dport; + th->th_seq = htonl(seq); + th->th_ack = htonl(ack); + th->th_off = tlen >> 2; + th->th_flags = flags; + th->th_win = htons(win); + + if (mss) { + opt = (char *)(th + 1); + opt[0] = TCPOPT_MAXSEG; + opt[1] = 4; +#if BYTE_ORDER != BIG_ENDIAN + HTONS(mss); +#endif + bcopy((caddr_t)&mss, (caddr_t)(opt + 2), 2); + } + + switch (af) { +#if INET + case AF_INET: { + struct route ro; + + /* TCP checksum */ + th->th_sum = in_cksum(m, len); + + /* Finish the IP header */ + h->ip_v = 4; + h->ip_hl = sizeof (*h) >> 2; + h->ip_tos = IPTOS_LOWDELAY; + /* + * ip_output() expects ip_len and ip_off to be in host order. + */ + h->ip_len = len; + h->ip_off = (path_mtu_discovery ? IP_DF : 0); + h->ip_ttl = ttl ? ttl : ip_defttl; + h->ip_sum = 0; + + bzero(&ro, sizeof (ro)); + ip_output(m, NULL, &ro, 0, NULL, NULL); + if (ro.ro_rt != NULL) + rtfree(ro.ro_rt); + break; + } +#endif /* INET */ +#if INET6 + case AF_INET6: { + struct route_in6 ro6; + + /* TCP checksum */ + th->th_sum = in6_cksum(m, IPPROTO_TCP, + sizeof (struct ip6_hdr), tlen); + + h6->ip6_vfc |= IPV6_VERSION; + h6->ip6_hlim = IPV6_DEFHLIM; + + bzero(&ro6, sizeof (ro6)); + ip6_output(m, NULL, &ro6, 0, NULL, NULL, 0); + if (ro6.ro_rt != NULL) + rtfree(ro6.ro_rt); + break; + } +#endif /* INET6 */ + } +} + +static void +pf_send_icmp(struct mbuf *m, u_int8_t type, u_int8_t code, sa_family_t af, + struct pf_rule *r) +{ + struct mbuf *m0; + struct pf_mtag *pf_mtag; + + m0 = m_copy(m, 0, M_COPYALL); + if (m0 == NULL) + return; + + if ((pf_mtag = pf_get_mtag(m0)) == NULL) + return; + + pf_mtag->flags |= PF_TAG_GENERATED; + + if (PF_RTABLEID_IS_VALID(r->rtableid)) + pf_mtag->rtableid = r->rtableid; + +#if ALTQ + if (r->qid) { + pf_mtag->qid = r->qid; + /* add hints for ecn */ + pf_mtag->hdr = mtod(m0, struct ip *); + } +#endif /* ALTQ */ + switch (af) { +#if INET + case AF_INET: + icmp_error(m0, type, code, 0, 0); + break; +#endif /* INET */ +#if INET6 + case AF_INET6: + icmp6_error(m0, type, code, 0); + break; +#endif /* INET6 */ + } +} + +/* + * Return 1 if the addresses a and b match (with mask m), otherwise return 0. + * If n is 0, they match if they are equal. If n is != 0, they match if they + * are different. + */ +int +pf_match_addr(u_int8_t n, struct pf_addr *a, struct pf_addr *m, + struct pf_addr *b, sa_family_t af) +{ + int match = 0; + + switch (af) { +#if INET + case AF_INET: + if ((a->addr32[0] & m->addr32[0]) == + (b->addr32[0] & m->addr32[0])) + match++; + break; +#endif /* INET */ +#if INET6 + case AF_INET6: + if (((a->addr32[0] & m->addr32[0]) == + (b->addr32[0] & m->addr32[0])) && + ((a->addr32[1] & m->addr32[1]) == + (b->addr32[1] & m->addr32[1])) && + ((a->addr32[2] & m->addr32[2]) == + (b->addr32[2] & m->addr32[2])) && + ((a->addr32[3] & m->addr32[3]) == + (b->addr32[3] & m->addr32[3]))) + match++; + break; +#endif /* INET6 */ + } + if (match) { + if (n) + return (0); + else + return (1); + } else { + if (n) + return (1); + else + return (0); + } +} + +/* + * Return 1 if b <= a <= e, otherwise return 0. + */ +int +pf_match_addr_range(struct pf_addr *b, struct pf_addr *e, + struct pf_addr *a, sa_family_t af) +{ + switch (af) { +#if INET + case AF_INET: + if ((a->addr32[0] < b->addr32[0]) || + (a->addr32[0] > e->addr32[0])) + return (0); + break; +#endif /* INET */ +#if INET6 + case AF_INET6: { + int i; + + /* check a >= b */ + for (i = 0; i < 4; ++i) + if (a->addr32[i] > b->addr32[i]) + break; + else if (a->addr32[i] < b->addr32[i]) + return (0); + /* check a <= e */ + for (i = 0; i < 4; ++i) + if (a->addr32[i] < e->addr32[i]) + break; + else if (a->addr32[i] > e->addr32[i]) + return (0); + break; + } +#endif /* INET6 */ + } + return (1); +} + +int +pf_match(u_int8_t op, u_int32_t a1, u_int32_t a2, u_int32_t p) +{ + switch (op) { + case PF_OP_IRG: + return ((p > a1) && (p < a2)); + case PF_OP_XRG: + return ((p < a1) || (p > a2)); + case PF_OP_RRG: + return ((p >= a1) && (p <= a2)); + case PF_OP_EQ: + return (p == a1); + case PF_OP_NE: + return (p != a1); + case PF_OP_LT: + return (p < a1); + case PF_OP_LE: + return (p <= a1); + case PF_OP_GT: + return (p > a1); + case PF_OP_GE: + return (p >= a1); + } + return (0); /* never reached */ +} + +int +pf_match_port(u_int8_t op, u_int16_t a1, u_int16_t a2, u_int16_t p) +{ +#if BYTE_ORDER != BIG_ENDIAN + NTOHS(a1); + NTOHS(a2); + NTOHS(p); +#endif + return (pf_match(op, a1, a2, p)); +} + +#ifndef NO_APPLE_EXTENSIONS +int +pf_match_xport(u_int8_t proto, u_int8_t proto_variant, union pf_rule_xport *rx, + union pf_state_xport *sx) +{ + int d = !0; + + if (sx) { + switch (proto) { + case IPPROTO_GRE: + if (proto_variant == PF_GRE_PPTP_VARIANT) + d = (rx->call_id == sx->call_id); + break; + + case IPPROTO_ESP: + d = (rx->spi == sx->spi); + break; + + case IPPROTO_TCP: + case IPPROTO_UDP: + case IPPROTO_ICMP: + case IPPROTO_ICMPV6: + if (rx->range.op) + d = pf_match_port(rx->range.op, + rx->range.port[0], rx->range.port[1], + sx->port); + break; + + default: + break; + } + } + + return (d); +} +#endif + +int +pf_match_uid(u_int8_t op, uid_t a1, uid_t a2, uid_t u) +{ + if (u == UID_MAX && op != PF_OP_EQ && op != PF_OP_NE) + return (0); + return (pf_match(op, a1, a2, u)); +} + +int +pf_match_gid(u_int8_t op, gid_t a1, gid_t a2, gid_t g) +{ + if (g == GID_MAX && op != PF_OP_EQ && op != PF_OP_NE) + return (0); + return (pf_match(op, a1, a2, g)); +} + +static int +pf_match_tag(struct mbuf *m, struct pf_rule *r, struct pf_mtag *pf_mtag, + int *tag) +{ +#pragma unused(m) + if (*tag == -1) + *tag = pf_mtag->tag; + + return ((!r->match_tag_not && r->match_tag == *tag) || + (r->match_tag_not && r->match_tag != *tag)); +} + +int +pf_tag_packet(struct mbuf *m, struct pf_mtag *pf_mtag, int tag, + unsigned int rtableid) +{ + if (tag <= 0 && !PF_RTABLEID_IS_VALID(rtableid)) + return (0); + + if (pf_mtag == NULL && (pf_mtag = pf_get_mtag(m)) == NULL) + return (1); + + if (tag > 0) + pf_mtag->tag = tag; + if (PF_RTABLEID_IS_VALID(rtableid)) + pf_mtag->rtableid = rtableid; + + return (0); +} + +static void +pf_step_into_anchor(int *depth, struct pf_ruleset **rs, int n, + struct pf_rule **r, struct pf_rule **a, int *match) +{ + struct pf_anchor_stackframe *f; + + (*r)->anchor->match = 0; + if (match) + *match = 0; + if (*depth >= (int)sizeof (pf_anchor_stack) / + (int)sizeof (pf_anchor_stack[0])) { + printf("pf_step_into_anchor: stack overflow\n"); + *r = TAILQ_NEXT(*r, entries); + return; + } else if (*depth == 0 && a != NULL) + *a = *r; + f = pf_anchor_stack + (*depth)++; + f->rs = *rs; + f->r = *r; + if ((*r)->anchor_wildcard) { + f->parent = &(*r)->anchor->children; + if ((f->child = RB_MIN(pf_anchor_node, f->parent)) == + NULL) { + *r = NULL; + return; + } + *rs = &f->child->ruleset; + } else { + f->parent = NULL; + f->child = NULL; + *rs = &(*r)->anchor->ruleset; + } + *r = TAILQ_FIRST((*rs)->rules[n].active.ptr); +} + +static int +pf_step_out_of_anchor(int *depth, struct pf_ruleset **rs, int n, + struct pf_rule **r, struct pf_rule **a, int *match) +{ + struct pf_anchor_stackframe *f; + int quick = 0; + + do { + if (*depth <= 0) + break; + f = pf_anchor_stack + *depth - 1; + if (f->parent != NULL && f->child != NULL) { + if (f->child->match || + (match != NULL && *match)) { + f->r->anchor->match = 1; + *match = 0; + } + f->child = RB_NEXT(pf_anchor_node, f->parent, f->child); + if (f->child != NULL) { + *rs = &f->child->ruleset; + *r = TAILQ_FIRST((*rs)->rules[n].active.ptr); + if (*r == NULL) + continue; + else + break; + } + } + (*depth)--; + if (*depth == 0 && a != NULL) + *a = NULL; + *rs = f->rs; + if (f->r->anchor->match || (match != NULL && *match)) + quick = f->r->quick; + *r = TAILQ_NEXT(f->r, entries); + } while (*r == NULL); + + return (quick); +} + +#if INET6 +void +pf_poolmask(struct pf_addr *naddr, struct pf_addr *raddr, + struct pf_addr *rmask, struct pf_addr *saddr, sa_family_t af) +{ + switch (af) { +#if INET + case AF_INET: + naddr->addr32[0] = (raddr->addr32[0] & rmask->addr32[0]) | + ((rmask->addr32[0] ^ 0xffffffff) & saddr->addr32[0]); + break; +#endif /* INET */ + case AF_INET6: + naddr->addr32[0] = (raddr->addr32[0] & rmask->addr32[0]) | + ((rmask->addr32[0] ^ 0xffffffff) & saddr->addr32[0]); + naddr->addr32[1] = (raddr->addr32[1] & rmask->addr32[1]) | + ((rmask->addr32[1] ^ 0xffffffff) & saddr->addr32[1]); + naddr->addr32[2] = (raddr->addr32[2] & rmask->addr32[2]) | + ((rmask->addr32[2] ^ 0xffffffff) & saddr->addr32[2]); + naddr->addr32[3] = (raddr->addr32[3] & rmask->addr32[3]) | + ((rmask->addr32[3] ^ 0xffffffff) & saddr->addr32[3]); + break; + } +} + +void +pf_addr_inc(struct pf_addr *addr, sa_family_t af) +{ + switch (af) { +#if INET + case AF_INET: + addr->addr32[0] = htonl(ntohl(addr->addr32[0]) + 1); + break; +#endif /* INET */ + case AF_INET6: + if (addr->addr32[3] == 0xffffffff) { + addr->addr32[3] = 0; + if (addr->addr32[2] == 0xffffffff) { + addr->addr32[2] = 0; + if (addr->addr32[1] == 0xffffffff) { + addr->addr32[1] = 0; + addr->addr32[0] = + htonl(ntohl(addr->addr32[0]) + 1); + } else + addr->addr32[1] = + htonl(ntohl(addr->addr32[1]) + 1); + } else + addr->addr32[2] = + htonl(ntohl(addr->addr32[2]) + 1); + } else + addr->addr32[3] = + htonl(ntohl(addr->addr32[3]) + 1); + break; + } +} +#endif /* INET6 */ + +#define mix(a, b, c) \ + do { \ + a -= b; a -= c; a ^= (c >> 13); \ + b -= c; b -= a; b ^= (a << 8); \ + c -= a; c -= b; c ^= (b >> 13); \ + a -= b; a -= c; a ^= (c >> 12); \ + b -= c; b -= a; b ^= (a << 16); \ + c -= a; c -= b; c ^= (b >> 5); \ + a -= b; a -= c; a ^= (c >> 3); \ + b -= c; b -= a; b ^= (a << 10); \ + c -= a; c -= b; c ^= (b >> 15); \ + } while (0) + +/* + * hash function based on bridge_hash in if_bridge.c + */ +static void +pf_hash(struct pf_addr *inaddr, struct pf_addr *hash, + struct pf_poolhashkey *key, sa_family_t af) +{ + u_int32_t a = 0x9e3779b9, b = 0x9e3779b9, c = key->key32[0]; + + switch (af) { +#if INET + case AF_INET: + a += inaddr->addr32[0]; + b += key->key32[1]; + mix(a, b, c); + hash->addr32[0] = c + key->key32[2]; + break; +#endif /* INET */ +#if INET6 + case AF_INET6: + a += inaddr->addr32[0]; + b += inaddr->addr32[2]; + mix(a, b, c); + hash->addr32[0] = c; + a += inaddr->addr32[1]; + b += inaddr->addr32[3]; + c += key->key32[1]; + mix(a, b, c); + hash->addr32[1] = c; + a += inaddr->addr32[2]; + b += inaddr->addr32[1]; + c += key->key32[2]; + mix(a, b, c); + hash->addr32[2] = c; + a += inaddr->addr32[3]; + b += inaddr->addr32[0]; + c += key->key32[3]; + mix(a, b, c); + hash->addr32[3] = c; + break; +#endif /* INET6 */ + } +} + +static int +pf_map_addr(sa_family_t af, struct pf_rule *r, struct pf_addr *saddr, + struct pf_addr *naddr, struct pf_addr *init_addr, struct pf_src_node **sn) +{ + unsigned char hash[16]; + struct pf_pool *rpool = &r->rpool; + struct pf_addr *raddr = &rpool->cur->addr.v.a.addr; + struct pf_addr *rmask = &rpool->cur->addr.v.a.mask; + struct pf_pooladdr *acur = rpool->cur; + struct pf_src_node k; + + if (*sn == NULL && r->rpool.opts & PF_POOL_STICKYADDR && + (r->rpool.opts & PF_POOL_TYPEMASK) != PF_POOL_NONE) { + k.af = af; + PF_ACPY(&k.addr, saddr, af); + if (r->rule_flag & PFRULE_RULESRCTRACK || + r->rpool.opts & PF_POOL_STICKYADDR) + k.rule.ptr = r; + else + k.rule.ptr = NULL; + pf_status.scounters[SCNT_SRC_NODE_SEARCH]++; + *sn = RB_FIND(pf_src_tree, &tree_src_tracking, &k); + if (*sn != NULL && !PF_AZERO(&(*sn)->raddr, af)) { + PF_ACPY(naddr, &(*sn)->raddr, af); + if (pf_status.debug >= PF_DEBUG_MISC) { + printf("pf_map_addr: src tracking maps "); + pf_print_host(&k.addr, 0, af); + printf(" to "); + pf_print_host(naddr, 0, af); + printf("\n"); + } + return (0); + } + } + + if (rpool->cur->addr.type == PF_ADDR_NOROUTE) + return (1); + if (rpool->cur->addr.type == PF_ADDR_DYNIFTL) { + switch (af) { +#if INET + case AF_INET: + if (rpool->cur->addr.p.dyn->pfid_acnt4 < 1 && + (rpool->opts & PF_POOL_TYPEMASK) != + PF_POOL_ROUNDROBIN) + return (1); + raddr = &rpool->cur->addr.p.dyn->pfid_addr4; + rmask = &rpool->cur->addr.p.dyn->pfid_mask4; + break; +#endif /* INET */ +#if INET6 + case AF_INET6: + if (rpool->cur->addr.p.dyn->pfid_acnt6 < 1 && + (rpool->opts & PF_POOL_TYPEMASK) != + PF_POOL_ROUNDROBIN) + return (1); + raddr = &rpool->cur->addr.p.dyn->pfid_addr6; + rmask = &rpool->cur->addr.p.dyn->pfid_mask6; + break; +#endif /* INET6 */ + } + } else if (rpool->cur->addr.type == PF_ADDR_TABLE) { + if ((rpool->opts & PF_POOL_TYPEMASK) != PF_POOL_ROUNDROBIN) + return (1); /* unsupported */ + } else { + raddr = &rpool->cur->addr.v.a.addr; + rmask = &rpool->cur->addr.v.a.mask; + } + + switch (rpool->opts & PF_POOL_TYPEMASK) { + case PF_POOL_NONE: + PF_ACPY(naddr, raddr, af); + break; + case PF_POOL_BITMASK: + PF_POOLMASK(naddr, raddr, rmask, saddr, af); + break; + case PF_POOL_RANDOM: + if (init_addr != NULL && PF_AZERO(init_addr, af)) { + switch (af) { +#if INET + case AF_INET: + rpool->counter.addr32[0] = htonl(random()); + break; +#endif /* INET */ +#if INET6 + case AF_INET6: + if (rmask->addr32[3] != 0xffffffff) + rpool->counter.addr32[3] = + htonl(random()); + else + break; + if (rmask->addr32[2] != 0xffffffff) + rpool->counter.addr32[2] = + htonl(random()); + else + break; + if (rmask->addr32[1] != 0xffffffff) + rpool->counter.addr32[1] = + htonl(random()); + else + break; + if (rmask->addr32[0] != 0xffffffff) + rpool->counter.addr32[0] = + htonl(random()); + break; +#endif /* INET6 */ + } + PF_POOLMASK(naddr, raddr, rmask, &rpool->counter, af); + PF_ACPY(init_addr, naddr, af); + + } else { + PF_AINC(&rpool->counter, af); + PF_POOLMASK(naddr, raddr, rmask, &rpool->counter, af); + } + break; + case PF_POOL_SRCHASH: + pf_hash(saddr, (struct pf_addr *)&hash, &rpool->key, af); + PF_POOLMASK(naddr, raddr, rmask, (struct pf_addr *)&hash, af); + break; + case PF_POOL_ROUNDROBIN: + if (rpool->cur->addr.type == PF_ADDR_TABLE) { + if (!pfr_pool_get(rpool->cur->addr.p.tbl, + &rpool->tblidx, &rpool->counter, + &raddr, &rmask, af)) + goto get_addr; + } else if (rpool->cur->addr.type == PF_ADDR_DYNIFTL) { + if (!pfr_pool_get(rpool->cur->addr.p.dyn->pfid_kt, + &rpool->tblidx, &rpool->counter, + &raddr, &rmask, af)) + goto get_addr; + } else if (pf_match_addr(0, raddr, rmask, &rpool->counter, af)) + goto get_addr; + + try_next: + if ((rpool->cur = TAILQ_NEXT(rpool->cur, entries)) == NULL) + rpool->cur = TAILQ_FIRST(&rpool->list); + if (rpool->cur->addr.type == PF_ADDR_TABLE) { + rpool->tblidx = -1; + if (pfr_pool_get(rpool->cur->addr.p.tbl, + &rpool->tblidx, &rpool->counter, + &raddr, &rmask, af)) { + /* table contains no address of type 'af' */ + if (rpool->cur != acur) + goto try_next; + return (1); + } + } else if (rpool->cur->addr.type == PF_ADDR_DYNIFTL) { + rpool->tblidx = -1; + if (pfr_pool_get(rpool->cur->addr.p.dyn->pfid_kt, + &rpool->tblidx, &rpool->counter, + &raddr, &rmask, af)) { + /* table contains no address of type 'af' */ + if (rpool->cur != acur) + goto try_next; + return (1); + } + } else { + raddr = &rpool->cur->addr.v.a.addr; + rmask = &rpool->cur->addr.v.a.mask; + PF_ACPY(&rpool->counter, raddr, af); + } + + get_addr: + PF_ACPY(naddr, &rpool->counter, af); + if (init_addr != NULL && PF_AZERO(init_addr, af)) + PF_ACPY(init_addr, naddr, af); + PF_AINC(&rpool->counter, af); + break; + } + if (*sn != NULL) + PF_ACPY(&(*sn)->raddr, naddr, af); + + if (pf_status.debug >= PF_DEBUG_MISC && + (rpool->opts & PF_POOL_TYPEMASK) != PF_POOL_NONE) { + printf("pf_map_addr: selected address "); + pf_print_host(naddr, 0, af); + printf("\n"); + } + + return (0); +} + +#ifndef NO_APPLE_EXTENSIONS +static int +pf_get_sport(struct pf_pdesc *pd, struct pfi_kif *kif, struct pf_rule *r, + struct pf_addr *saddr, union pf_state_xport *sxport, struct pf_addr *daddr, + union pf_state_xport *dxport, struct pf_addr *naddr, + union pf_state_xport *nxport, struct pf_src_node **sn) +#else +int +pf_get_sport(sa_family_t af, u_int8_t proto, struct pf_rule *r, + struct pf_addr *saddr, struct pf_addr *daddr, u_int16_t dport, + struct pf_addr *naddr, u_int16_t *nport, u_int16_t low, u_int16_t high, + struct pf_src_node **sn) +#endif +{ +#pragma unused(kif) + struct pf_state_key_cmp key; + struct pf_addr init_addr; +#ifndef NO_APPLE_EXTENSIONS + unsigned int cut; + sa_family_t af = pd->af; + u_int8_t proto = pd->proto; + unsigned int low = ntohs(r->rpool.proxy_port[0]); + unsigned int high = ntohs(r->rpool.proxy_port[1]); +#else + u_int16_t cut; +#endif + + bzero(&init_addr, sizeof (init_addr)); + if (pf_map_addr(af, r, saddr, naddr, &init_addr, sn)) + return (1); + + if (proto == IPPROTO_ICMP) { + low = 1; + high = 65535; + } + +#ifndef NO_APPLE_EXTENSIONS + if (!nxport) + return (0); /* No output necessary. */ + + /*--- Special mapping rules for UDP ---*/ + if (proto == IPPROTO_UDP) { + + /*--- Never float IKE source port ---*/ + if (sxport->port == PF_IKE_PORT) { + nxport->port = sxport->port; + return (0); + } + + /*--- Apply exterior mapping options ---*/ + if (r->extmap > PF_EXTMAP_APD) { + struct pf_state *s; + + TAILQ_FOREACH(s, &state_list, entry_list) { + struct pf_state_key *sk = s->state_key; + if (!sk) + continue; + if (s->nat_rule.ptr != r) + continue; + if (sk->proto != IPPROTO_UDP || sk->af != af) + continue; + if (sk->lan.xport.port != sxport->port) + continue; + if (PF_ANEQ(&sk->lan.addr, saddr, af)) + continue; + if (r->extmap < PF_EXTMAP_EI && + PF_ANEQ(&sk->ext.addr, daddr, af)) + continue; + + nxport->port = sk->gwy.xport.port; + return (0); + } + } + } +#endif + + do { + key.af = af; + key.proto = proto; + PF_ACPY(&key.ext.addr, daddr, key.af); + PF_ACPY(&key.gwy.addr, naddr, key.af); +#ifndef NO_APPLE_EXTENSIONS + switch (proto) { + case IPPROTO_UDP: + key.proto_variant = r->extfilter; + break; + default: + key.proto_variant = 0; + break; + } + if (dxport) + key.ext.xport = *dxport; + else + memset(&key.ext.xport, 0, sizeof (key.ext.xport)); +#else + key.ext.port = dport; +#endif + + /* + * port search; start random, step; + * similar 2 portloop in in_pcbbind + */ + if (!(proto == IPPROTO_TCP || proto == IPPROTO_UDP || + proto == IPPROTO_ICMP)) { +#ifndef NO_APPLE_EXTENSIONS + if (dxport) + key.gwy.xport = *dxport; + else + memset(&key.gwy.xport, 0, + sizeof (key.ext.xport)); +#else + key.gwy.port = dport; +#endif + if (pf_find_state_all(&key, PF_IN, NULL) == NULL) + return (0); + } else if (low == 0 && high == 0) { +#ifndef NO_APPLE_EXTENSIONS + key.gwy.xport = *nxport; +#else + key.gwy.port = *nport; +#endif + if (pf_find_state_all(&key, PF_IN, NULL) == NULL) + return (0); + } else if (low == high) { +#ifndef NO_APPLE_EXTENSIONS + key.gwy.xport.port = htons(low); + if (pf_find_state_all(&key, PF_IN, NULL) == NULL) { + nxport->port = htons(low); + return (0); + } +#else + key.gwy.port = htons(low); + if (pf_find_state_all(&key, PF_IN, NULL) == NULL) { + *nport = htons(low); + return (0); + } +#endif + } else { +#ifndef NO_APPLE_EXTENSIONS + unsigned int tmp; +#else + u_int16_t tmp; +#endif + if (low > high) { + tmp = low; + low = high; + high = tmp; + } + /* low < high */ + cut = htonl(random()) % (1 + high - low) + low; + /* low <= cut <= high */ + for (tmp = cut; tmp <= high; ++(tmp)) { +#ifndef NO_APPLE_EXTENSIONS + key.gwy.xport.port = htons(tmp); + if (pf_find_state_all(&key, PF_IN, NULL) == + NULL) { + nxport->port = htons(tmp); + return (0); + } +#else + key.gwy.port = htons(tmp); + if (pf_find_state_all(&key, PF_IN, NULL) == + NULL) { + *nport = htons(tmp); + return (0); + } +#endif + } + for (tmp = cut - 1; tmp >= low; --(tmp)) { +#ifndef NO_APPLE_EXTENSIONS + key.gwy.xport.port = htons(tmp); + if (pf_find_state_all(&key, PF_IN, NULL) == + NULL) { + nxport->port = htons(tmp); + return (0); + } +#else + key.gwy.port = htons(tmp); + if (pf_find_state_all(&key, PF_IN, NULL) == + NULL) { + *nport = htons(tmp); + return (0); + } +#endif + } + } + + switch (r->rpool.opts & PF_POOL_TYPEMASK) { + case PF_POOL_RANDOM: + case PF_POOL_ROUNDROBIN: + if (pf_map_addr(af, r, saddr, naddr, &init_addr, sn)) + return (1); + break; + case PF_POOL_NONE: + case PF_POOL_SRCHASH: + case PF_POOL_BITMASK: + default: + return (1); + } + } while (!PF_AEQ(&init_addr, naddr, af)); + + return (1); /* none available */ +} + +#ifndef NO_APPLE_EXTENSIONS +static struct pf_rule * +pf_match_translation(struct pf_pdesc *pd, struct mbuf *m, int off, + int direction, struct pfi_kif *kif, struct pf_addr *saddr, + union pf_state_xport *sxport, struct pf_addr *daddr, + union pf_state_xport *dxport, int rs_num) +#else +struct pf_rule * +pf_match_translation(struct pf_pdesc *pd, struct mbuf *m, int off, + int direction, struct pfi_kif *kif, struct pf_addr *saddr, u_int16_t sport, + struct pf_addr *daddr, u_int16_t dport, int rs_num) +#endif +{ + struct pf_rule *r, *rm = NULL; + struct pf_ruleset *ruleset = NULL; + int tag = -1; + unsigned int rtableid = IFSCOPE_NONE; + int asd = 0; + + r = TAILQ_FIRST(pf_main_ruleset.rules[rs_num].active.ptr); + while (r && rm == NULL) { + struct pf_rule_addr *src = NULL, *dst = NULL; + struct pf_addr_wrap *xdst = NULL; +#ifndef NO_APPLE_EXTENSIONS + struct pf_addr_wrap *xsrc = NULL; +#endif + + if (r->action == PF_BINAT && direction == PF_IN) { + src = &r->dst; + if (r->rpool.cur != NULL) + xdst = &r->rpool.cur->addr; +#ifndef NO_APPLE_EXTENSIONS + } else if (r->action == PF_RDR && direction == PF_OUT) { + dst = &r->src; + src = &r->dst; + if (r->rpool.cur != NULL) + xsrc = &r->rpool.cur->addr; +#endif + } else { + src = &r->src; + dst = &r->dst; + } + + r->evaluations++; + if (pfi_kif_match(r->kif, kif) == r->ifnot) + r = r->skip[PF_SKIP_IFP].ptr; + else if (r->direction && r->direction != direction) + r = r->skip[PF_SKIP_DIR].ptr; + else if (r->af && r->af != pd->af) + r = r->skip[PF_SKIP_AF].ptr; + else if (r->proto && r->proto != pd->proto) + r = r->skip[PF_SKIP_PROTO].ptr; +#ifndef NO_APPLE_EXTENSIONS + else if (xsrc && PF_MISMATCHAW(xsrc, saddr, pd->af, 0, NULL)) + r = TAILQ_NEXT(r, entries); + else if (!xsrc && PF_MISMATCHAW(&src->addr, saddr, pd->af, + src->neg, kif)) + r = r->skip[src == &r->src ? PF_SKIP_SRC_ADDR : + PF_SKIP_DST_ADDR].ptr; + else if (!pf_match_xport(r->proto, r->proto_variant, &src->xport, + sxport)) +#else + else if (PF_MISMATCHAW(&src->addr, saddr, pd->af, + src->neg, kif)) + r = r->skip[src == &r->src ? PF_SKIP_SRC_ADDR : + PF_SKIP_DST_ADDR].ptr; + else if (src->port_op && !pf_match_port(src->port_op, + src->port[0], src->port[1], sport)) +#endif + r = r->skip[src == &r->src ? PF_SKIP_SRC_PORT : + PF_SKIP_DST_PORT].ptr; + else if (dst != NULL && + PF_MISMATCHAW(&dst->addr, daddr, pd->af, dst->neg, NULL)) + r = r->skip[PF_SKIP_DST_ADDR].ptr; + else if (xdst != NULL && PF_MISMATCHAW(xdst, daddr, pd->af, + 0, NULL)) + r = TAILQ_NEXT(r, entries); +#ifndef NO_APPLE_EXTENSIONS + else if (dst && !pf_match_xport(r->proto, r->proto_variant, + &dst->xport, dxport)) +#else + else if (dst != NULL && dst->port_op && + !pf_match_port(dst->port_op, dst->port[0], + dst->port[1], dport)) +#endif + r = r->skip[PF_SKIP_DST_PORT].ptr; + else if (r->match_tag && !pf_match_tag(m, r, pd->pf_mtag, &tag)) + r = TAILQ_NEXT(r, entries); + else if (r->os_fingerprint != PF_OSFP_ANY && (pd->proto != + IPPROTO_TCP || !pf_osfp_match(pf_osfp_fingerprint(pd, m, + off, pd->hdr.tcp), r->os_fingerprint))) + r = TAILQ_NEXT(r, entries); + else { + if (r->tag) + tag = r->tag; + if (PF_RTABLEID_IS_VALID(r->rtableid)) + rtableid = r->rtableid; + if (r->anchor == NULL) { + rm = r; + } else + pf_step_into_anchor(&asd, &ruleset, rs_num, + &r, NULL, NULL); + } + if (r == NULL) + pf_step_out_of_anchor(&asd, &ruleset, rs_num, &r, + NULL, NULL); + } + if (pf_tag_packet(m, pd->pf_mtag, tag, rtableid)) + return (NULL); + if (rm != NULL && (rm->action == PF_NONAT || + rm->action == PF_NORDR || rm->action == PF_NOBINAT)) + return (NULL); + return (rm); +} + +#ifndef NO_APPLE_EXTENSIONS +static struct pf_rule * +pf_get_translation_aux(struct pf_pdesc *pd, struct mbuf *m, int off, + int direction, struct pfi_kif *kif, struct pf_src_node **sn, + struct pf_addr *saddr, union pf_state_xport *sxport, struct pf_addr *daddr, + union pf_state_xport *dxport, struct pf_addr *naddr, + union pf_state_xport *nxport) +#else +struct pf_rule * +pf_get_translation(struct pf_pdesc *pd, struct mbuf *m, int off, int direction, + struct pfi_kif *kif, struct pf_src_node **sn, + struct pf_addr *saddr, u_int16_t sport, + struct pf_addr *daddr, u_int16_t dport, + struct pf_addr *naddr, u_int16_t *nport) +#endif +{ + struct pf_rule *r = NULL; + +#ifndef NO_APPLE_EXTENSIONS + if (direction == PF_OUT) { + r = pf_match_translation(pd, m, off, direction, kif, saddr, + sxport, daddr, dxport, PF_RULESET_BINAT); + if (r == NULL) + r = pf_match_translation(pd, m, off, direction, kif, + saddr, sxport, daddr, dxport, PF_RULESET_RDR); + if (r == NULL) + r = pf_match_translation(pd, m, off, direction, kif, + saddr, sxport, daddr, dxport, PF_RULESET_NAT); + } else { + r = pf_match_translation(pd, m, off, direction, kif, saddr, + sxport, daddr, dxport, PF_RULESET_RDR); + if (r == NULL) + r = pf_match_translation(pd, m, off, direction, kif, + saddr, sxport, daddr, dxport, PF_RULESET_BINAT); + } +#else + if (direction == PF_OUT) { + r = pf_match_translation(pd, m, off, direction, kif, saddr, + sport, daddr, dport, PF_RULESET_BINAT); + if (r == NULL) + r = pf_match_translation(pd, m, off, direction, kif, + saddr, sport, daddr, dport, PF_RULESET_NAT); + } else { + r = pf_match_translation(pd, m, off, direction, kif, saddr, + sport, daddr, dport, PF_RULESET_RDR); + if (r == NULL) + r = pf_match_translation(pd, m, off, direction, kif, + saddr, sport, daddr, dport, PF_RULESET_BINAT); + } +#endif + + if (r != NULL) { + switch (r->action) { + case PF_NONAT: + case PF_NOBINAT: + case PF_NORDR: + return (NULL); + case PF_NAT: +#ifndef NO_APPLE_EXTENSIONS + if (pf_get_sport(pd, kif, r, saddr, sxport, daddr, + dxport, naddr, nxport, sn)) { +#else + if (pf_get_sport(pd->af, pd->proto, r, saddr, + daddr, dport, naddr, nport, r->rpool.proxy_port[0], + r->rpool.proxy_port[1], sn)) { +#endif + DPFPRINTF(PF_DEBUG_MISC, + ("pf: NAT proxy port allocation " + "(%u-%u) failed\n", + r->rpool.proxy_port[0], + r->rpool.proxy_port[1])); + return (NULL); + } + break; + case PF_BINAT: + switch (direction) { + case PF_OUT: + if (r->rpool.cur->addr.type == + PF_ADDR_DYNIFTL) { + switch (pd->af) { +#if INET + case AF_INET: + if (r->rpool.cur->addr.p.dyn-> + pfid_acnt4 < 1) + return (NULL); + PF_POOLMASK(naddr, + &r->rpool.cur->addr.p.dyn-> + pfid_addr4, + &r->rpool.cur->addr.p.dyn-> + pfid_mask4, + saddr, AF_INET); + break; +#endif /* INET */ +#if INET6 + case AF_INET6: + if (r->rpool.cur->addr.p.dyn-> + pfid_acnt6 < 1) + return (NULL); + PF_POOLMASK(naddr, + &r->rpool.cur->addr.p.dyn-> + pfid_addr6, + &r->rpool.cur->addr.p.dyn-> + pfid_mask6, + saddr, AF_INET6); + break; +#endif /* INET6 */ + } + } else { + PF_POOLMASK(naddr, + &r->rpool.cur->addr.v.a.addr, + &r->rpool.cur->addr.v.a.mask, + saddr, pd->af); + } + break; + case PF_IN: + if (r->src.addr.type == PF_ADDR_DYNIFTL) { + switch (pd->af) { +#if INET + case AF_INET: + if (r->src.addr.p.dyn-> + pfid_acnt4 < 1) + return (NULL); + PF_POOLMASK(naddr, + &r->src.addr.p.dyn-> + pfid_addr4, + &r->src.addr.p.dyn-> + pfid_mask4, + daddr, AF_INET); + break; +#endif /* INET */ +#if INET6 + case AF_INET6: + if (r->src.addr.p.dyn-> + pfid_acnt6 < 1) + return (NULL); + PF_POOLMASK(naddr, + &r->src.addr.p.dyn-> + pfid_addr6, + &r->src.addr.p.dyn-> + pfid_mask6, + daddr, AF_INET6); + break; +#endif /* INET6 */ + } + } else + PF_POOLMASK(naddr, + &r->src.addr.v.a.addr, + &r->src.addr.v.a.mask, daddr, + pd->af); + break; + } + break; + case PF_RDR: { +#ifndef NO_APPLE_EXTENSIONS + switch (direction) { + case PF_OUT: + if (r->dst.addr.type == PF_ADDR_DYNIFTL) { + switch (pd->af) { +#if INET + case AF_INET: + if (r->dst.addr.p.dyn-> + pfid_acnt4 < 1) + return (NULL); + PF_POOLMASK(naddr, + &r->dst.addr.p.dyn-> + pfid_addr4, + &r->dst.addr.p.dyn-> + pfid_mask4, + daddr, AF_INET); + break; +#endif /* INET */ +#if INET6 + case AF_INET6: + if (r->dst.addr.p.dyn-> + pfid_acnt6 < 1) + return (NULL); + PF_POOLMASK(naddr, + &r->dst.addr.p.dyn-> + pfid_addr6, + &r->dst.addr.p.dyn-> + pfid_mask6, + daddr, AF_INET6); + break; +#endif /* INET6 */ + } + } else { + PF_POOLMASK(naddr, + &r->dst.addr.v.a.addr, + &r->dst.addr.v.a.mask, + daddr, pd->af); + } + if (nxport && dxport) + *nxport = *sxport; + break; + case PF_IN: + if (pf_map_addr(pd->af, r, saddr, + naddr, NULL, sn)) + return (NULL); + if ((r->rpool.opts & PF_POOL_TYPEMASK) == + PF_POOL_BITMASK) + PF_POOLMASK(naddr, naddr, + &r->rpool.cur->addr.v.a.mask, daddr, + pd->af); + + if (nxport && dxport) { + if (r->rpool.proxy_port[1]) { + u_int32_t tmp_nport; + + tmp_nport = + ((ntohs(dxport->port) - + ntohs(r->dst.xport.range. + port[0])) % + (r->rpool.proxy_port[1] - + r->rpool.proxy_port[0] + + 1)) + r->rpool.proxy_port[0]; + + /* wrap around if necessary */ + if (tmp_nport > 65535) + tmp_nport -= 65535; + nxport->port = + htons((u_int16_t)tmp_nport); + } else if (r->rpool.proxy_port[0]) { + nxport->port = htons(r->rpool. + proxy_port[0]); + } + } + break; + } +#else + if (pf_map_addr(pd->af, r, saddr, naddr, NULL, sn)) + return (NULL); + if ((r->rpool.opts & PF_POOL_TYPEMASK) == + PF_POOL_BITMASK) + PF_POOLMASK(naddr, naddr, + &r->rpool.cur->addr.v.a.mask, daddr, + pd->af); + + if (r->rpool.proxy_port[1]) { + u_int32_t tmp_nport; + + tmp_nport = ((ntohs(dport) - + ntohs(r->dst.port[0])) % + (r->rpool.proxy_port[1] - + r->rpool.proxy_port[0] + 1)) + + r->rpool.proxy_port[0]; + + /* wrap around if necessary */ + if (tmp_nport > 65535) + tmp_nport -= 65535; + *nport = htons((u_int16_t)tmp_nport); + } else if (r->rpool.proxy_port[0]) + *nport = htons(r->rpool.proxy_port[0]); +#endif + break; + } + default: + return (NULL); + } + } + + return (r); +} + +int +pf_socket_lookup(int direction, struct pf_pdesc *pd) +{ + struct pf_addr *saddr, *daddr; + u_int16_t sport, dport; + struct inpcbinfo *pi; + struct inpcb *inp = NULL; + + if (pd == NULL) + return (-1); + pd->lookup.uid = UID_MAX; + pd->lookup.gid = GID_MAX; + pd->lookup.pid = NO_PID; + + switch (pd->proto) { + case IPPROTO_TCP: + if (pd->hdr.tcp == NULL) + return (-1); + sport = pd->hdr.tcp->th_sport; + dport = pd->hdr.tcp->th_dport; + pi = &tcbinfo; + break; + case IPPROTO_UDP: + if (pd->hdr.udp == NULL) + return (-1); + sport = pd->hdr.udp->uh_sport; + dport = pd->hdr.udp->uh_dport; + pi = &udbinfo; + break; + default: + return (-1); + } + if (direction == PF_IN) { + saddr = pd->src; + daddr = pd->dst; + } else { + u_int16_t p; + + p = sport; + sport = dport; + dport = p; + saddr = pd->dst; + daddr = pd->src; + } + switch (pd->af) { +#if INET + case AF_INET: + inp = in_pcblookup_hash(pi, saddr->v4, sport, daddr->v4, dport, + 0, NULL); + if (inp == NULL) { + inp = in_pcblookup_hash(pi, saddr->v4, sport, + daddr->v4, dport, INPLOOKUP_WILDCARD, NULL); + if (inp == NULL) + return (-1); + } + break; +#endif /* INET */ +#if INET6 + case AF_INET6: + inp = in6_pcblookup_hash(pi, &saddr->v6, sport, &daddr->v6, + dport, 0, NULL); + if (inp == NULL) { + inp = in6_pcblookup_hash(pi, &saddr->v6, sport, + &daddr->v6, dport, INPLOOKUP_WILDCARD, NULL); + if (inp == NULL) + return (-1); + } + break; +#endif /* INET6 */ + + default: + return (-1); + } + + if (inp != NULL) + in_pcb_checkstate(inp, WNT_RELEASE, 0); + + return (1); +} + +static u_int8_t +pf_get_wscale(struct mbuf *m, int off, u_int16_t th_off, sa_family_t af) +{ + int hlen; + u_int8_t hdr[60]; + u_int8_t *opt, optlen; + u_int8_t wscale = 0; + + hlen = th_off << 2; /* hlen <= sizeof (hdr) */ + if (hlen <= (int)sizeof (struct tcphdr)) + return (0); + if (!pf_pull_hdr(m, off, hdr, hlen, NULL, NULL, af)) + return (0); + opt = hdr + sizeof (struct tcphdr); + hlen -= sizeof (struct tcphdr); + while (hlen >= 3) { + switch (*opt) { + case TCPOPT_EOL: + case TCPOPT_NOP: + ++opt; + --hlen; + break; + case TCPOPT_WINDOW: + wscale = opt[2]; + if (wscale > TCP_MAX_WINSHIFT) + wscale = TCP_MAX_WINSHIFT; + wscale |= PF_WSCALE_FLAG; + /* FALLTHROUGH */ + default: + optlen = opt[1]; + if (optlen < 2) + optlen = 2; + hlen -= optlen; + opt += optlen; + break; + } + } + return (wscale); +} + +static u_int16_t +pf_get_mss(struct mbuf *m, int off, u_int16_t th_off, sa_family_t af) +{ + int hlen; + u_int8_t hdr[60]; + u_int8_t *opt, optlen; + u_int16_t mss = tcp_mssdflt; + + hlen = th_off << 2; /* hlen <= sizeof (hdr) */ + if (hlen <= (int)sizeof (struct tcphdr)) + return (0); + if (!pf_pull_hdr(m, off, hdr, hlen, NULL, NULL, af)) + return (0); + opt = hdr + sizeof (struct tcphdr); + hlen -= sizeof (struct tcphdr); + while (hlen >= TCPOLEN_MAXSEG) { + switch (*opt) { + case TCPOPT_EOL: + case TCPOPT_NOP: + ++opt; + --hlen; + break; + case TCPOPT_MAXSEG: + bcopy((caddr_t)(opt + 2), (caddr_t)&mss, 2); +#if BYTE_ORDER != BIG_ENDIAN + NTOHS(mss); +#endif + /* FALLTHROUGH */ + default: + optlen = opt[1]; + if (optlen < 2) + optlen = 2; + hlen -= optlen; + opt += optlen; + break; + } + } + return (mss); +} + +static u_int16_t +pf_calc_mss(struct pf_addr *addr, sa_family_t af, u_int16_t offer) +{ +#if INET + struct sockaddr_in *dst; + struct route ro; +#endif /* INET */ +#if INET6 + struct sockaddr_in6 *dst6; + struct route_in6 ro6; +#endif /* INET6 */ + struct rtentry *rt = NULL; + int hlen; + u_int16_t mss = tcp_mssdflt; + + switch (af) { +#if INET + case AF_INET: + hlen = sizeof (struct ip); + bzero(&ro, sizeof (ro)); + dst = (struct sockaddr_in *)&ro.ro_dst; + dst->sin_family = AF_INET; + dst->sin_len = sizeof (*dst); + dst->sin_addr = addr->v4; + rtalloc(&ro); + rt = ro.ro_rt; + break; +#endif /* INET */ +#if INET6 + case AF_INET6: + hlen = sizeof (struct ip6_hdr); + bzero(&ro6, sizeof (ro6)); + dst6 = (struct sockaddr_in6 *)&ro6.ro_dst; + dst6->sin6_family = AF_INET6; + dst6->sin6_len = sizeof (*dst6); + dst6->sin6_addr = addr->v6; + rtalloc((struct route *)&ro); + rt = ro6.ro_rt; + break; +#endif /* INET6 */ + default: + panic("pf_calc_mss: not AF_INET or AF_INET6!"); + return (0); + } + + if (rt && rt->rt_ifp) { + mss = rt->rt_ifp->if_mtu - hlen - sizeof (struct tcphdr); + mss = max(tcp_mssdflt, mss); + RTFREE(rt); + } + mss = min(mss, offer); + mss = max(mss, 64); /* sanity - at least max opt space */ + return (mss); +} + +static void +pf_set_rt_ifp(struct pf_state *s, struct pf_addr *saddr) +{ + struct pf_rule *r = s->rule.ptr; + + s->rt_kif = NULL; + if (!r->rt || r->rt == PF_FASTROUTE) + return; + switch (s->state_key->af) { +#if INET + case AF_INET: + pf_map_addr(AF_INET, r, saddr, &s->rt_addr, NULL, + &s->nat_src_node); + s->rt_kif = r->rpool.cur->kif; + break; +#endif /* INET */ +#if INET6 + case AF_INET6: + pf_map_addr(AF_INET6, r, saddr, &s->rt_addr, NULL, + &s->nat_src_node); + s->rt_kif = r->rpool.cur->kif; + break; +#endif /* INET6 */ + } +} + +static void +pf_attach_state(struct pf_state_key *sk, struct pf_state *s, int tail) +{ + s->state_key = sk; + sk->refcnt++; + + /* list is sorted, if-bound states before floating */ + if (tail) + TAILQ_INSERT_TAIL(&sk->states, s, next); + else + TAILQ_INSERT_HEAD(&sk->states, s, next); +} + +static void +pf_detach_state(struct pf_state *s, int flags) +{ + struct pf_state_key *sk = s->state_key; + + if (sk == NULL) + return; + + s->state_key = NULL; + TAILQ_REMOVE(&sk->states, s, next); + if (--sk->refcnt == 0) { + if (!(flags & PF_DT_SKIP_EXTGWY)) + RB_REMOVE(pf_state_tree_ext_gwy, + &pf_statetbl_ext_gwy, sk); + if (!(flags & PF_DT_SKIP_LANEXT)) + RB_REMOVE(pf_state_tree_lan_ext, + &pf_statetbl_lan_ext, sk); +#ifndef NO_APPLE_EXTENSIONS + if (sk->app_state) + pool_put(&pf_app_state_pl, sk->app_state); +#endif + pool_put(&pf_state_key_pl, sk); + } +} + +struct pf_state_key * +pf_alloc_state_key(struct pf_state *s) +{ + struct pf_state_key *sk; + + if ((sk = pool_get(&pf_state_key_pl, PR_WAITOK)) == NULL) + return (NULL); + bzero(sk, sizeof (*sk)); + TAILQ_INIT(&sk->states); + pf_attach_state(sk, s, 0); + + return (sk); +} + +static u_int32_t +pf_tcp_iss(struct pf_pdesc *pd) +{ + MD5_CTX ctx; + u_int32_t digest[4]; + + if (pf_tcp_secret_init == 0) { + read_random(pf_tcp_secret, sizeof (pf_tcp_secret)); + MD5Init(&pf_tcp_secret_ctx); + MD5Update(&pf_tcp_secret_ctx, pf_tcp_secret, + sizeof (pf_tcp_secret)); + pf_tcp_secret_init = 1; + } + ctx = pf_tcp_secret_ctx; + + MD5Update(&ctx, (char *)&pd->hdr.tcp->th_sport, sizeof (u_short)); + MD5Update(&ctx, (char *)&pd->hdr.tcp->th_dport, sizeof (u_short)); + if (pd->af == AF_INET6) { + MD5Update(&ctx, (char *)&pd->src->v6, sizeof (struct in6_addr)); + MD5Update(&ctx, (char *)&pd->dst->v6, sizeof (struct in6_addr)); + } else { + MD5Update(&ctx, (char *)&pd->src->v4, sizeof (struct in_addr)); + MD5Update(&ctx, (char *)&pd->dst->v4, sizeof (struct in_addr)); + } + MD5Final((u_char *)digest, &ctx); + pf_tcp_iss_off += 4096; + return (digest[0] + random() + pf_tcp_iss_off); +} + +static int +pf_test_rule(struct pf_rule **rm, struct pf_state **sm, int direction, + struct pfi_kif *kif, struct mbuf *m, int off, void *h, + struct pf_pdesc *pd, struct pf_rule **am, struct pf_ruleset **rsm, + struct ifqueue *ifq) +{ +#pragma unused(h) + struct pf_rule *nr = NULL; + struct pf_addr *saddr = pd->src, *daddr = pd->dst; +#ifdef NO_APPLE_EXTENSIONS + u_int16_t bport, nport = 0; +#endif + sa_family_t af = pd->af; + struct pf_rule *r, *a = NULL; + struct pf_ruleset *ruleset = NULL; + struct pf_src_node *nsn = NULL; + struct tcphdr *th = pd->hdr.tcp; + u_short reason; + int rewrite = 0, hdrlen = 0; + int tag = -1; + unsigned int rtableid = IFSCOPE_NONE; + int asd = 0; + int match = 0; + int state_icmp = 0; + u_int16_t mss = tcp_mssdflt; +#ifdef NO_APPLE_EXTENSIONS + u_int16_t sport, dport; +#endif + u_int8_t icmptype = 0, icmpcode = 0; + +#ifndef NO_APPLE_EXTENSIONS + struct pf_grev1_hdr *grev1 = pd->hdr.grev1; + union pf_state_xport bxport, nxport, sxport, dxport; +#endif + + lck_mtx_assert(pf_lock, LCK_MTX_ASSERT_OWNED); + + if (direction == PF_IN && pf_check_congestion(ifq)) { + REASON_SET(&reason, PFRES_CONGEST); + return (PF_DROP); + } + +#ifndef NO_APPLE_EXTENSIONS + hdrlen = 0; + sxport.spi = 0; + dxport.spi = 0; + nxport.spi = 0; +#else + sport = dport = hdrlen = 0; +#endif + + switch (pd->proto) { + case IPPROTO_TCP: +#ifndef NO_APPLE_EXTENSIONS + sxport.port = th->th_sport; + dxport.port = th->th_dport; +#else + sport = th->th_sport; + dport = th->th_dport; +#endif + hdrlen = sizeof (*th); + break; + case IPPROTO_UDP: +#ifndef NO_APPLE_EXTENSIONS + sxport.port = pd->hdr.udp->uh_sport; + dxport.port = pd->hdr.udp->uh_dport; +#else + sport = pd->hdr.udp->uh_sport; + dport = pd->hdr.udp->uh_dport; +#endif + hdrlen = sizeof (*pd->hdr.udp); + break; +#if INET + case IPPROTO_ICMP: + if (pd->af != AF_INET) + break; +#ifndef NO_APPLE_EXTENSIONS + sxport.port = dxport.port = pd->hdr.icmp->icmp_id; + hdrlen = ICMP_MINLEN; +#else + sport = dport = pd->hdr.icmp->icmp_id; +#endif + icmptype = pd->hdr.icmp->icmp_type; + icmpcode = pd->hdr.icmp->icmp_code; + + if (icmptype == ICMP_UNREACH || + icmptype == ICMP_SOURCEQUENCH || + icmptype == ICMP_REDIRECT || + icmptype == ICMP_TIMXCEED || + icmptype == ICMP_PARAMPROB) + state_icmp++; + break; +#endif /* INET */ +#if INET6 + case IPPROTO_ICMPV6: + if (pd->af != AF_INET6) + break; +#ifndef NO_APPLE_EXTENSIONS + sxport.port = dxport.port = pd->hdr.icmp6->icmp6_id; +#else + sport = dport = pd->hdr.icmp6->icmp6_id; +#endif + hdrlen = sizeof (*pd->hdr.icmp6); + icmptype = pd->hdr.icmp6->icmp6_type; + icmpcode = pd->hdr.icmp6->icmp6_code; + + if (icmptype == ICMP6_DST_UNREACH || + icmptype == ICMP6_PACKET_TOO_BIG || + icmptype == ICMP6_TIME_EXCEEDED || + icmptype == ICMP6_PARAM_PROB) + state_icmp++; + break; +#endif /* INET6 */ +#ifndef NO_APPLE_EXTENSIONS + case IPPROTO_GRE: + if (pd->proto_variant == PF_GRE_PPTP_VARIANT) { + sxport.call_id = dxport.call_id = + pd->hdr.grev1->call_id; + hdrlen = sizeof (*pd->hdr.grev1); + } + break; + case IPPROTO_ESP: + sxport.spi = 0; + dxport.spi = pd->hdr.esp->spi; + hdrlen = sizeof (*pd->hdr.esp); + break; +#endif + } + + r = TAILQ_FIRST(pf_main_ruleset.rules[PF_RULESET_FILTER].active.ptr); + + if (direction == PF_OUT) { +#ifndef NO_APPLE_EXTENSIONS + bxport = nxport = sxport; + /* check outgoing packet for BINAT/NAT */ + if ((nr = pf_get_translation_aux(pd, m, off, PF_OUT, kif, &nsn, + saddr, &sxport, daddr, &dxport, &pd->naddr, &nxport)) != + NULL) { +#else + bport = nport = sport; + /* check outgoing packet for BINAT/NAT */ + if ((nr = pf_get_translation(pd, m, off, PF_OUT, kif, &nsn, + saddr, sport, daddr, dport, &pd->naddr, &nport)) != NULL) { +#endif + PF_ACPY(&pd->baddr, saddr, af); + switch (pd->proto) { + case IPPROTO_TCP: +#ifndef NO_APPLE_EXTENSIONS + pf_change_ap(direction, pd->mp, saddr, + &th->th_sport, pd->ip_sum, &th->th_sum, + &pd->naddr, nxport.port, 0, af); + sxport.port = th->th_sport; +#else + pf_change_ap(saddr, &th->th_sport, pd->ip_sum, + &th->th_sum, &pd->naddr, nport, 0, af); + sport = th->th_sport; +#endif + rewrite++; + break; + case IPPROTO_UDP: +#ifndef NO_APPLE_EXTENSIONS + pf_change_ap(direction, pd->mp, saddr, + &pd->hdr.udp->uh_sport, pd->ip_sum, + &pd->hdr.udp->uh_sum, &pd->naddr, + nxport.port, 1, af); + sxport.port = pd->hdr.udp->uh_sport; +#else + pf_change_ap(saddr, &pd->hdr.udp->uh_sport, + pd->ip_sum, &pd->hdr.udp->uh_sum, + &pd->naddr, nport, 1, af); + sport = pd->hdr.udp->uh_sport; +#endif + rewrite++; + break; +#if INET + case IPPROTO_ICMP: + pf_change_a(&saddr->v4.s_addr, pd->ip_sum, + pd->naddr.v4.s_addr, 0); +#ifndef NO_APPLE_EXTENSIONS + pd->hdr.icmp->icmp_cksum = pf_cksum_fixup( + pd->hdr.icmp->icmp_cksum, sxport.port, + nxport.port, 0); + pd->hdr.icmp->icmp_id = nxport.port; + ++rewrite; +#else + pd->hdr.icmp->icmp_cksum = pf_cksum_fixup( + pd->hdr.icmp->icmp_cksum, sport, nport, 0); + pd->hdr.icmp->icmp_id = nport; + m_copyback(m, off, ICMP_MINLEN, pd->hdr.icmp); +#endif + break; +#endif /* INET */ +#if INET6 + case IPPROTO_ICMPV6: + pf_change_a6(saddr, &pd->hdr.icmp6->icmp6_cksum, + &pd->naddr, 0); + rewrite++; + break; +#endif /* INET */ +#ifndef NO_APPLE_EXTENSIONS + case IPPROTO_GRE: + switch (af) { +#if INET + case AF_INET: + pf_change_a(&saddr->v4.s_addr, + pd->ip_sum, pd->naddr.v4.s_addr, 0); + break; +#endif /* INET */ +#if INET6 + case AF_INET6: + PF_ACPY(saddr, &pd->naddr, AF_INET6); + break; +#endif /* INET6 */ + } + ++rewrite; + break; + case IPPROTO_ESP: + bxport.spi = 0; + switch (af) { +#if INET + case AF_INET: + pf_change_a(&saddr->v4.s_addr, + pd->ip_sum, pd->naddr.v4.s_addr, 0); + break; +#endif /* INET */ +#if INET6 + case AF_INET6: + PF_ACPY(saddr, &pd->naddr, AF_INET6); + break; +#endif /* INET6 */ + } + break; +#endif + default: + switch (af) { +#if INET + case AF_INET: + pf_change_a(&saddr->v4.s_addr, + pd->ip_sum, pd->naddr.v4.s_addr, 0); + break; +#endif /* INET */ +#if INET6 + case AF_INET6: + PF_ACPY(saddr, &pd->naddr, af); + break; +#endif /* INET */ + } + break; + } + + if (nr->natpass) + r = NULL; + pd->nat_rule = nr; + } + } else { +#ifndef NO_APPLE_EXTENSIONS + bxport.port = nxport.port = dxport.port; + /* check incoming packet for BINAT/RDR */ + if ((nr = pf_get_translation_aux(pd, m, off, PF_IN, kif, &nsn, + saddr, &sxport, daddr, &dxport, &pd->naddr, &nxport)) != + NULL) { +#else + bport = nport = dport; + /* check incoming packet for BINAT/RDR */ + if ((nr = pf_get_translation(pd, m, off, PF_IN, kif, &nsn, + saddr, sport, daddr, dport, &pd->naddr, &nport)) != NULL) { +#endif + PF_ACPY(&pd->baddr, daddr, af); + switch (pd->proto) { + case IPPROTO_TCP: +#ifndef NO_APPLE_EXTENSIONS + pf_change_ap(direction, pd->mp, daddr, + &th->th_dport, pd->ip_sum, &th->th_sum, + &pd->naddr, nxport.port, 0, af); + dxport.port = th->th_dport; +#else + pf_change_ap(daddr, &th->th_dport, pd->ip_sum, + &th->th_sum, &pd->naddr, nport, 0, af); + dport = th->th_dport; +#endif + rewrite++; + break; + case IPPROTO_UDP: +#ifndef NO_APPLE_EXTENSIONS + pf_change_ap(direction, pd->mp, daddr, + &pd->hdr.udp->uh_dport, pd->ip_sum, + &pd->hdr.udp->uh_sum, &pd->naddr, + nxport.port, 1, af); + dxport.port = pd->hdr.udp->uh_dport; +#else + pf_change_ap(direction, daddr, + &pd->hdr.udp->uh_dport, + pd->ip_sum, &pd->hdr.udp->uh_sum, + &pd->naddr, nport, 1, af); + dport = pd->hdr.udp->uh_dport; +#endif + rewrite++; + break; +#if INET + case IPPROTO_ICMP: + pf_change_a(&daddr->v4.s_addr, pd->ip_sum, + pd->naddr.v4.s_addr, 0); + break; +#endif /* INET */ +#if INET6 + case IPPROTO_ICMPV6: + pf_change_a6(daddr, &pd->hdr.icmp6->icmp6_cksum, + &pd->naddr, 0); + rewrite++; + break; +#endif /* INET6 */ +#ifndef NO_APPLE_EXTENSIONS + case IPPROTO_GRE: + if (pd->proto_variant == PF_GRE_PPTP_VARIANT) + grev1->call_id = nxport.call_id; + + switch (af) { +#if INET + case AF_INET: + pf_change_a(&daddr->v4.s_addr, + pd->ip_sum, pd->naddr.v4.s_addr, 0); + break; +#endif /* INET */ +#if INET6 + case AF_INET6: + PF_ACPY(daddr, &pd->naddr, AF_INET6); + break; +#endif /* INET6 */ + } + ++rewrite; + break; + case IPPROTO_ESP: + switch (af) { +#if INET + case AF_INET: + pf_change_a(&daddr->v4.s_addr, + pd->ip_sum, pd->naddr.v4.s_addr, 0); + break; +#endif /* INET */ +#if INET6 + case AF_INET6: + PF_ACPY(daddr, &pd->naddr, AF_INET6); + break; +#endif /* INET6 */ + } + break; +#endif + default: + switch (af) { +#if INET + case AF_INET: + pf_change_a(&daddr->v4.s_addr, + pd->ip_sum, pd->naddr.v4.s_addr, 0); + break; +#endif /* INET */ +#if INET6 + case AF_INET6: + PF_ACPY(daddr, &pd->naddr, af); + break; +#endif /* INET */ + } + break; + } + + if (nr->natpass) + r = NULL; + pd->nat_rule = nr; + } + } + +#ifndef NO_APPLE_EXTENSIONS + if (nr && nr->tag > 0) + tag = nr->tag; +#endif + + while (r != NULL) { + r->evaluations++; + if (pfi_kif_match(r->kif, kif) == r->ifnot) + r = r->skip[PF_SKIP_IFP].ptr; + else if (r->direction && r->direction != direction) + r = r->skip[PF_SKIP_DIR].ptr; + else if (r->af && r->af != af) + r = r->skip[PF_SKIP_AF].ptr; + else if (r->proto && r->proto != pd->proto) + r = r->skip[PF_SKIP_PROTO].ptr; + else if (PF_MISMATCHAW(&r->src.addr, saddr, af, + r->src.neg, kif)) + r = r->skip[PF_SKIP_SRC_ADDR].ptr; + /* tcp/udp only. port_op always 0 in other cases */ +#ifndef NO_APPLE_EXTENSIONS + else if (r->proto == pd->proto && + (r->proto == IPPROTO_TCP || r->proto == IPPROTO_UDP) && + r->src.xport.range.op && + !pf_match_port(r->src.xport.range.op, + r->src.xport.range.port[0], r->src.xport.range.port[1], + th->th_sport)) +#else + else if (r->src.port_op && !pf_match_port(r->src.port_op, + r->src.port[0], r->src.port[1], th->th_sport)) +#endif + r = r->skip[PF_SKIP_SRC_PORT].ptr; + else if (PF_MISMATCHAW(&r->dst.addr, daddr, af, + r->dst.neg, NULL)) + r = r->skip[PF_SKIP_DST_ADDR].ptr; + /* tcp/udp only. port_op always 0 in other cases */ +#ifndef NO_APPLE_EXTENSIONS + else if (r->proto == pd->proto && + (r->proto == IPPROTO_TCP || r->proto == IPPROTO_UDP) && + r->dst.xport.range.op && + !pf_match_port(r->dst.xport.range.op, + r->dst.xport.range.port[0], r->dst.xport.range.port[1], + th->th_dport)) +#else + else if (r->dst.port_op && !pf_match_port(r->dst.port_op, + r->dst.port[0], r->dst.port[1], th->th_dport)) +#endif + r = r->skip[PF_SKIP_DST_PORT].ptr; + /* icmp only. type always 0 in other cases */ + else if (r->type && r->type != icmptype + 1) + r = TAILQ_NEXT(r, entries); + /* icmp only. type always 0 in other cases */ + else if (r->code && r->code != icmpcode + 1) + r = TAILQ_NEXT(r, entries); + else if (r->tos && !(r->tos == pd->tos)) + r = TAILQ_NEXT(r, entries); + else if (r->rule_flag & PFRULE_FRAGMENT) + r = TAILQ_NEXT(r, entries); + else if (pd->proto == IPPROTO_TCP && + (r->flagset & th->th_flags) != r->flags) + r = TAILQ_NEXT(r, entries); + /* tcp/udp only. uid.op always 0 in other cases */ + else if (r->uid.op && (pd->lookup.done || (pd->lookup.done = + pf_socket_lookup(direction, pd), 1)) && + !pf_match_uid(r->uid.op, r->uid.uid[0], r->uid.uid[1], + pd->lookup.uid)) + r = TAILQ_NEXT(r, entries); + /* tcp/udp only. gid.op always 0 in other cases */ + else if (r->gid.op && (pd->lookup.done || (pd->lookup.done = + pf_socket_lookup(direction, pd), 1)) && + !pf_match_gid(r->gid.op, r->gid.gid[0], r->gid.gid[1], + pd->lookup.gid)) + r = TAILQ_NEXT(r, entries); + else if (r->prob && r->prob <= (random() % (UINT_MAX - 1) + 1)) + r = TAILQ_NEXT(r, entries); + else if (r->match_tag && !pf_match_tag(m, r, pd->pf_mtag, &tag)) + r = TAILQ_NEXT(r, entries); + else if (r->os_fingerprint != PF_OSFP_ANY && + (pd->proto != IPPROTO_TCP || !pf_osfp_match( + pf_osfp_fingerprint(pd, m, off, th), + r->os_fingerprint))) + r = TAILQ_NEXT(r, entries); + else { + if (r->tag) + tag = r->tag; + if (PF_RTABLEID_IS_VALID(r->rtableid)) + rtableid = r->rtableid; + if (r->anchor == NULL) { + match = 1; + *rm = r; + *am = a; + *rsm = ruleset; + if ((*rm)->quick) + break; + r = TAILQ_NEXT(r, entries); + } else + pf_step_into_anchor(&asd, &ruleset, + PF_RULESET_FILTER, &r, &a, &match); + } + if (r == NULL && pf_step_out_of_anchor(&asd, &ruleset, + PF_RULESET_FILTER, &r, &a, &match)) + break; + } + r = *rm; + a = *am; + ruleset = *rsm; + + REASON_SET(&reason, PFRES_MATCH); + + if (r->log || (nr != NULL && nr->log)) { +#ifndef NO_APPLE_EXTENSIONS + if (rewrite > 0) { + if (rewrite < off + hdrlen) + rewrite = off + hdrlen; + + m = pf_lazy_makewritable(pd, m, rewrite); + if (!m) { + REASON_SET(&reason, PFRES_MEMORY); + return (PF_DROP); + } + + m_copyback(m, off, hdrlen, pd->hdr.any); + } +#else + if (rewrite) + m_copyback(m, off, hdrlen, pd->hdr.any); +#endif + PFLOG_PACKET(kif, h, m, af, direction, reason, r->log ? r : nr, + a, ruleset, pd); + } + + if ((r->action == PF_DROP) && + ((r->rule_flag & PFRULE_RETURNRST) || + (r->rule_flag & PFRULE_RETURNICMP) || + (r->rule_flag & PFRULE_RETURN))) { + /* undo NAT changes, if they have taken place */ + if (nr != NULL) { + if (direction == PF_OUT) { + switch (pd->proto) { + case IPPROTO_TCP: +#ifndef NO_APPLE_EXTENSIONS + pf_change_ap(direction, pd->mp, saddr, + &th->th_sport, pd->ip_sum, + &th->th_sum, &pd->baddr, + bxport.port, 0, af); + sxport.port = th->th_sport; +#else + pf_change_ap(saddr, &th->th_sport, + pd->ip_sum, &th->th_sum, + &pd->baddr, bport, 0, af); + sport = th->th_sport; +#endif + rewrite++; + break; + case IPPROTO_UDP: +#ifndef NO_APPLE_EXTENSIONS + pf_change_ap(direction, pd->mp, saddr, + &pd->hdr.udp->uh_sport, pd->ip_sum, + &pd->hdr.udp->uh_sum, &pd->baddr, + bxport.port, 1, af); + sxport.port = pd->hdr.udp->uh_sport; +#else + pf_change_ap(saddr, + &pd->hdr.udp->uh_sport, pd->ip_sum, + &pd->hdr.udp->uh_sum, &pd->baddr, + bport, 1, af); + sport = pd->hdr.udp->uh_sport; +#endif + rewrite++; + break; + case IPPROTO_ICMP: +#if INET6 + case IPPROTO_ICMPV6: +#endif + /* nothing! */ + break; +#ifndef NO_APPLE_EXTENSIONS + case IPPROTO_GRE: + PF_ACPY(&pd->baddr, saddr, af); + ++rewrite; + switch (af) { +#if INET + case AF_INET: + pf_change_a(&saddr->v4.s_addr, + pd->ip_sum, + pd->baddr.v4.s_addr, 0); + break; +#endif /* INET */ +#if INET6 + case AF_INET6: + PF_ACPY(saddr, &pd->baddr, + AF_INET6); + break; +#endif /* INET6 */ + } + break; + case IPPROTO_ESP: + PF_ACPY(&pd->baddr, saddr, af); + switch (af) { +#if INET + case AF_INET: + pf_change_a(&saddr->v4.s_addr, + pd->ip_sum, + pd->baddr.v4.s_addr, 0); + break; +#endif /* INET */ +#if INET6 + case AF_INET6: + PF_ACPY(saddr, &pd->baddr, + AF_INET6); + break; +#endif /* INET6 */ + } + break; +#endif + default: + switch (af) { + case AF_INET: + pf_change_a(&saddr->v4.s_addr, + pd->ip_sum, + pd->baddr.v4.s_addr, 0); + break; + case AF_INET6: + PF_ACPY(saddr, &pd->baddr, af); + break; + } + } + } else { + switch (pd->proto) { + case IPPROTO_TCP: +#ifndef NO_APPLE_EXTENSIONS + pf_change_ap(direction, pd->mp, daddr, + &th->th_dport, pd->ip_sum, + &th->th_sum, &pd->baddr, + bxport.port, 0, af); + dxport.port = th->th_dport; +#else + pf_change_ap(daddr, &th->th_dport, + pd->ip_sum, &th->th_sum, + &pd->baddr, bport, 0, af); + dport = th->th_dport; +#endif + rewrite++; + break; + case IPPROTO_UDP: +#ifndef NO_APPLE_EXTENSIONS + pf_change_ap(direction, pd->mp, daddr, + &pd->hdr.udp->uh_dport, pd->ip_sum, + &pd->hdr.udp->uh_sum, &pd->baddr, + bxport.port, 1, af); + dxport.port = pd->hdr.udp->uh_dport; +#else + pf_change_ap(daddr, + &pd->hdr.udp->uh_dport, pd->ip_sum, + &pd->hdr.udp->uh_sum, &pd->baddr, + bport, 1, af); + dport = pd->hdr.udp->uh_dport; +#endif + rewrite++; + break; + case IPPROTO_ICMP: +#if INET6 + case IPPROTO_ICMPV6: +#endif + /* nothing! */ + break; +#ifndef NO_APPLE_EXTENSIONS + case IPPROTO_GRE: + if (pd->proto_variant == + PF_GRE_PPTP_VARIANT) + grev1->call_id = bxport.call_id; + ++rewrite; + switch (af) { +#if INET + case AF_INET: + pf_change_a(&daddr->v4.s_addr, + pd->ip_sum, + pd->baddr.v4.s_addr, 0); + break; +#endif /* INET */ +#if INET6 + case AF_INET6: + PF_ACPY(daddr, &pd->baddr, + AF_INET6); + break; +#endif /* INET6 */ + } + break; + case IPPROTO_ESP: + switch (af) { +#if INET + case AF_INET: + pf_change_a(&daddr->v4.s_addr, + pd->ip_sum, + pd->baddr.v4.s_addr, 0); + break; +#endif /* INET */ +#if INET6 + case AF_INET6: + PF_ACPY(daddr, &pd->baddr, + AF_INET6); + break; +#endif /* INET6 */ + } + break; +#endif + default: + switch (af) { + case AF_INET: + pf_change_a(&daddr->v4.s_addr, + pd->ip_sum, + pd->baddr.v4.s_addr, 0); + break; +#if INET6 + case AF_INET6: + PF_ACPY(daddr, &pd->baddr, af); + break; +#endif /* INET6 */ + } + } + } + } + if (pd->proto == IPPROTO_TCP && + ((r->rule_flag & PFRULE_RETURNRST) || + (r->rule_flag & PFRULE_RETURN)) && + !(th->th_flags & TH_RST)) { + u_int32_t ack = ntohl(th->th_seq) + pd->p_len; + int len = 0; + struct ip *h4; +#if INET6 + struct ip6_hdr *h6; +#endif /* INET6 */ + + switch (af) { + case AF_INET: + h4 = mtod(m, struct ip *); + len = ntohs(h4->ip_len) - off; + break; +#if INET6 + case AF_INET6: + h6 = mtod(m, struct ip6_hdr *); + len = ntohs(h6->ip6_plen) - + (off - sizeof (*h6)); + break; +#endif /* INET6 */ + } + + if (pf_check_proto_cksum(m, off, len, IPPROTO_TCP, af)) + REASON_SET(&reason, PFRES_PROTCKSUM); + else { + if (th->th_flags & TH_SYN) + ack++; + if (th->th_flags & TH_FIN) + ack++; + pf_send_tcp(r, af, pd->dst, + pd->src, th->th_dport, th->th_sport, + ntohl(th->th_ack), ack, TH_RST|TH_ACK, 0, 0, + r->return_ttl, 1, 0, pd->eh, kif->pfik_ifp); + } + } else if (pd->proto != IPPROTO_ICMP && af == AF_INET && +#ifndef NO_APPLE_EXTENSIONS + pd->proto != IPPROTO_ESP && pd->proto != IPPROTO_AH && +#endif + r->return_icmp) + pf_send_icmp(m, r->return_icmp >> 8, + r->return_icmp & 255, af, r); + else if (pd->proto != IPPROTO_ICMPV6 && af == AF_INET6 && +#ifndef NO_APPLE_EXTENSIONS + pd->proto != IPPROTO_ESP && pd->proto != IPPROTO_AH && +#endif + r->return_icmp6) + pf_send_icmp(m, r->return_icmp6 >> 8, + r->return_icmp6 & 255, af, r); + } + + if (r->action == PF_DROP) + return (PF_DROP); + + if (pf_tag_packet(m, pd->pf_mtag, tag, rtableid)) { + REASON_SET(&reason, PFRES_MEMORY); + return (PF_DROP); + } + + if (!state_icmp && (r->keep_state || nr != NULL || + (pd->flags & PFDESC_TCP_NORM))) { + /* create new state */ + struct pf_state *s = NULL; + struct pf_state_key *sk = NULL; + struct pf_src_node *sn = NULL; +#ifndef NO_APPLE_EXTENSIONS + struct pf_ike_hdr ike; + + if (pd->proto == IPPROTO_UDP) { + struct udphdr *uh = pd->hdr.udp; + size_t plen = m->m_pkthdr.len - off - sizeof (*uh); + + if (uh->uh_sport == PF_IKE_PORT && + uh->uh_dport == PF_IKE_PORT && + plen >= PF_IKE_PACKET_MINSIZE) { + if (plen > PF_IKE_PACKET_MINSIZE) + plen = PF_IKE_PACKET_MINSIZE; + m_copydata(m, off + sizeof (*uh), plen, &ike); + } + } + + if (nr != NULL && pd->proto == IPPROTO_ESP && + direction == PF_OUT) { + struct pf_state_key_cmp sk0; + struct pf_state *s0; + + /* + * + * This squelches state creation if the external + * address matches an existing incomplete state with a + * different internal address. Only one 'blocking' + * partial state is allowed for each external address. + */ + memset(&sk0, 0, sizeof (sk0)); + sk0.af = pd->af; + sk0.proto = IPPROTO_ESP; + PF_ACPY(&sk0.gwy.addr, saddr, sk0.af); + PF_ACPY(&sk0.ext.addr, daddr, sk0.af); + s0 = pf_find_state(kif, &sk0, PF_IN); + + if (s0 && PF_ANEQ(&s0->state_key->lan.addr, + pd->src, pd->af)) { + nsn = 0; + goto cleanup; + } + } +#endif + + /* check maximums */ + if (r->max_states && (r->states >= r->max_states)) { + pf_status.lcounters[LCNT_STATES]++; + REASON_SET(&reason, PFRES_MAXSTATES); + goto cleanup; + } + /* src node for filter rule */ + if ((r->rule_flag & PFRULE_SRCTRACK || + r->rpool.opts & PF_POOL_STICKYADDR) && + pf_insert_src_node(&sn, r, saddr, af) != 0) { + REASON_SET(&reason, PFRES_SRCLIMIT); + goto cleanup; + } + /* src node for translation rule */ + if (nr != NULL && (nr->rpool.opts & PF_POOL_STICKYADDR) && + ((direction == PF_OUT && +#ifndef NO_APPLE_EXTENSIONS + nr->action != PF_RDR && +#endif + pf_insert_src_node(&nsn, nr, &pd->baddr, af) != 0) || + (pf_insert_src_node(&nsn, nr, saddr, af) != 0))) { + REASON_SET(&reason, PFRES_SRCLIMIT); + goto cleanup; + } + s = pool_get(&pf_state_pl, PR_WAITOK); + if (s == NULL) { + REASON_SET(&reason, PFRES_MEMORY); +cleanup: + if (sn != NULL && sn->states == 0 && sn->expire == 0) { + RB_REMOVE(pf_src_tree, &tree_src_tracking, sn); + pf_status.scounters[SCNT_SRC_NODE_REMOVALS]++; + pf_status.src_nodes--; + pool_put(&pf_src_tree_pl, sn); + } + if (nsn != sn && nsn != NULL && nsn->states == 0 && + nsn->expire == 0) { + RB_REMOVE(pf_src_tree, &tree_src_tracking, nsn); + pf_status.scounters[SCNT_SRC_NODE_REMOVALS]++; + pf_status.src_nodes--; + pool_put(&pf_src_tree_pl, nsn); + } + if (sk != NULL) { +#ifndef NO_APPLE_EXTENSIONS + if (sk->app_state) + pool_put(&pf_app_state_pl, + sk->app_state); +#endif + pool_put(&pf_state_key_pl, sk); + } + return (PF_DROP); + } + bzero(s, sizeof (*s)); +#ifndef NO_APPLE_EXTENSIONS + TAILQ_INIT(&s->unlink_hooks); +#endif + s->rule.ptr = r; + s->nat_rule.ptr = nr; + if (nr && nr->action == PF_RDR && direction == PF_OUT) + s->anchor.ptr = a; + STATE_INC_COUNTERS(s); + s->allow_opts = r->allow_opts; + s->log = r->log & PF_LOG_ALL; + if (nr != NULL) + s->log |= nr->log & PF_LOG_ALL; + switch (pd->proto) { + case IPPROTO_TCP: + s->src.seqlo = ntohl(th->th_seq); + s->src.seqhi = s->src.seqlo + pd->p_len + 1; + if ((th->th_flags & (TH_SYN|TH_ACK)) == + TH_SYN && r->keep_state == PF_STATE_MODULATE) { + /* Generate sequence number modulator */ + if ((s->src.seqdiff = pf_tcp_iss(pd) - + s->src.seqlo) == 0) + s->src.seqdiff = 1; + pf_change_a(&th->th_seq, &th->th_sum, + htonl(s->src.seqlo + s->src.seqdiff), 0); + rewrite = off + sizeof (*th); + } else + s->src.seqdiff = 0; + if (th->th_flags & TH_SYN) { + s->src.seqhi++; + s->src.wscale = pf_get_wscale(m, off, + th->th_off, af); + } + s->src.max_win = MAX(ntohs(th->th_win), 1); + if (s->src.wscale & PF_WSCALE_MASK) { + /* Remove scale factor from initial window */ + int win = s->src.max_win; + win += 1 << (s->src.wscale & PF_WSCALE_MASK); + s->src.max_win = (win - 1) >> + (s->src.wscale & PF_WSCALE_MASK); + } + if (th->th_flags & TH_FIN) + s->src.seqhi++; + s->dst.seqhi = 1; + s->dst.max_win = 1; + s->src.state = TCPS_SYN_SENT; + s->dst.state = TCPS_CLOSED; + s->timeout = PFTM_TCP_FIRST_PACKET; + break; + case IPPROTO_UDP: + s->src.state = PFUDPS_SINGLE; + s->dst.state = PFUDPS_NO_TRAFFIC; + s->timeout = PFTM_UDP_FIRST_PACKET; + break; + case IPPROTO_ICMP: +#if INET6 + case IPPROTO_ICMPV6: +#endif + s->timeout = PFTM_ICMP_FIRST_PACKET; + break; +#ifndef NO_APPLE_EXTENSIONS + case IPPROTO_GRE: + s->src.state = PFGRE1S_INITIATING; + s->dst.state = PFGRE1S_NO_TRAFFIC; + s->timeout = PFTM_GREv1_INITIATING; + break; + case IPPROTO_ESP: + s->src.state = PFESPS_INITIATING; + s->dst.state = PFESPS_NO_TRAFFIC; + s->timeout = PFTM_ESP_FIRST_PACKET; + break; +#endif + default: + s->src.state = PFOTHERS_SINGLE; + s->dst.state = PFOTHERS_NO_TRAFFIC; + s->timeout = PFTM_OTHER_FIRST_PACKET; + } + + s->creation = pf_time_second(); + s->expire = pf_time_second(); + + if (sn != NULL) { + s->src_node = sn; + s->src_node->states++; + } + if (nsn != NULL) { + PF_ACPY(&nsn->raddr, &pd->naddr, af); + s->nat_src_node = nsn; + s->nat_src_node->states++; + } + if (pd->proto == IPPROTO_TCP) { + if ((pd->flags & PFDESC_TCP_NORM) && + pf_normalize_tcp_init(m, off, pd, th, &s->src, + &s->dst)) { + REASON_SET(&reason, PFRES_MEMORY); + pf_src_tree_remove_state(s); + STATE_DEC_COUNTERS(s); + pool_put(&pf_state_pl, s); + return (PF_DROP); + } + if ((pd->flags & PFDESC_TCP_NORM) && s->src.scrub && + pf_normalize_tcp_stateful(m, off, pd, &reason, + th, s, &s->src, &s->dst, &rewrite)) { + /* This really shouldn't happen!!! */ + DPFPRINTF(PF_DEBUG_URGENT, + ("pf_normalize_tcp_stateful failed on " + "first pkt")); + pf_normalize_tcp_cleanup(s); + pf_src_tree_remove_state(s); + STATE_DEC_COUNTERS(s); + pool_put(&pf_state_pl, s); + return (PF_DROP); + } + } + + if ((sk = pf_alloc_state_key(s)) == NULL) { + REASON_SET(&reason, PFRES_MEMORY); + goto cleanup; + } + + sk->proto = pd->proto; + sk->direction = direction; + sk->af = af; +#ifndef NO_APPLE_EXTENSIONS + if (pd->proto == IPPROTO_UDP) { + if (pd->hdr.udp->uh_sport == PF_IKE_PORT && + pd->hdr.udp->uh_dport == PF_IKE_PORT) { + sk->proto_variant = PF_EXTFILTER_APD; + } else { + sk->proto_variant = nr ? nr->extfilter : + r->extfilter; + if (sk->proto_variant < PF_EXTFILTER_APD) + sk->proto_variant = PF_EXTFILTER_APD; + } + } else if (pd->proto == IPPROTO_GRE) { + sk->proto_variant = pd->proto_variant; + } +#endif + if (direction == PF_OUT) { + PF_ACPY(&sk->gwy.addr, saddr, af); + PF_ACPY(&sk->ext.addr, daddr, af); + switch (pd->proto) { +#ifndef NO_APPLE_EXTENSIONS + case IPPROTO_UDP: + sk->gwy.xport = sxport; + sk->ext.xport = dxport; + break; + case IPPROTO_ESP: + sk->gwy.xport.spi = 0; + sk->ext.xport.spi = pd->hdr.esp->spi; + break; +#endif + case IPPROTO_ICMP: +#if INET6 + case IPPROTO_ICMPV6: +#endif +#ifndef NO_APPLE_EXTENSIONS + sk->gwy.xport.port = nxport.port; + sk->ext.xport.spi = 0; +#else + sk->gwy.port = nport; + sk->ext.port = 0; +#endif + break; + default: +#ifndef NO_APPLE_EXTENSIONS + sk->gwy.xport = sxport; + sk->ext.xport = dxport; + break; +#else + sk->gwy.port = sport; + sk->ext.port = dport; +#endif + } +#ifndef NO_APPLE_EXTENSIONS + if (nr != NULL) { + PF_ACPY(&sk->lan.addr, &pd->baddr, af); + sk->lan.xport = bxport; + } else { + PF_ACPY(&sk->lan.addr, &sk->gwy.addr, af); + sk->lan.xport = sk->gwy.xport; + } +#else + if (nr != NULL) { + PF_ACPY(&sk->lan.addr, &pd->baddr, af); + sk->lan.port = bport; + } else { + PF_ACPY(&sk->lan.addr, &sk->gwy.addr, af); + sk->lan.port = sk->gwy.port; + } +#endif + } else { + PF_ACPY(&sk->lan.addr, daddr, af); + PF_ACPY(&sk->ext.addr, saddr, af); + switch (pd->proto) { + case IPPROTO_ICMP: +#if INET6 + case IPPROTO_ICMPV6: +#endif +#ifndef NO_APPLE_EXTENSIONS + sk->lan.xport = nxport; + sk->ext.xport.spi = 0; +#else + sk->lan.port = nport; + sk->ext.port = 0; +#endif + break; +#ifndef NO_APPLE_EXTENSIONS + case IPPROTO_ESP: + sk->ext.xport.spi = 0; + sk->lan.xport.spi = pd->hdr.esp->spi; + break; + default: + sk->lan.xport = dxport; + sk->ext.xport = sxport; + break; +#else + default: + sk->lan.port = dport; + sk->ext.port = sport; +#endif + } +#ifndef NO_APPLE_EXTENSIONS + if (nr != NULL) { + PF_ACPY(&sk->gwy.addr, &pd->baddr, af); + sk->gwy.xport = bxport; + } else { + PF_ACPY(&sk->gwy.addr, &sk->lan.addr, af); + sk->gwy.xport = sk->lan.xport; + } + } +#else + if (nr != NULL) { + PF_ACPY(&sk->gwy.addr, &pd->baddr, af); + sk->gwy.port = bport; + } else { + PF_ACPY(&sk->gwy.addr, &sk->lan.addr, af); + sk->gwy.port = sk->lan.port; + } + } +#endif + + pf_set_rt_ifp(s, saddr); /* needs s->state_key set */ + +#ifndef NO_APPLE_EXTENSIONS + m = pd->mp; + + if (sk->app_state == 0) { + switch (pd->proto) { + case IPPROTO_TCP: { + u_int16_t dport = (direction == PF_OUT) ? + sk->ext.xport.port : sk->gwy.xport.port; + + if (nr != NULL && dport == PF_PPTP_PORT) { + struct pf_app_state *as; + + as = pool_get(&pf_app_state_pl, + PR_WAITOK); + if (!as) { + REASON_SET(&reason, + PFRES_MEMORY); + goto cleanup; + } + + bzero(as, sizeof (*as)); + as->handler = pf_pptp_handler; + as->compare_lan_ext = 0; + as->compare_ext_gwy = 0; + as->u.pptp.grev1_state = 0; + sk->app_state = as; + (void) hook_establish(&s->unlink_hooks, + 0, (hook_fn_t) pf_pptp_unlink, s); + } + break; + } + + case IPPROTO_UDP: { + struct udphdr *uh = pd->hdr.udp; + + if (nr != NULL && uh->uh_sport == PF_IKE_PORT && + uh->uh_dport == PF_IKE_PORT) { + struct pf_app_state *as; + + as = pool_get(&pf_app_state_pl, + PR_WAITOK); + if (!as) { + REASON_SET(&reason, + PFRES_MEMORY); + goto cleanup; + } + + bzero(as, sizeof (*as)); + as->compare_lan_ext = pf_ike_compare; + as->compare_ext_gwy = pf_ike_compare; + as->u.ike.cookie = ike.initiator_cookie; + sk->app_state = as; + } + break; + } + + default: + break; + } + } +#endif + + if (pf_insert_state(BOUND_IFACE(r, kif), s)) { + if (pd->proto == IPPROTO_TCP) + pf_normalize_tcp_cleanup(s); + REASON_SET(&reason, PFRES_STATEINS); + pf_src_tree_remove_state(s); + STATE_DEC_COUNTERS(s); + pool_put(&pf_state_pl, s); + return (PF_DROP); + } else + *sm = s; + if (tag > 0) { + pf_tag_ref(tag); + s->tag = tag; + } + if (pd->proto == IPPROTO_TCP && + (th->th_flags & (TH_SYN|TH_ACK)) == TH_SYN && + r->keep_state == PF_STATE_SYNPROXY) { + s->src.state = PF_TCPS_PROXY_SRC; + if (nr != NULL) { +#ifndef NO_APPLE_EXTENSIONS + if (direction == PF_OUT) { + pf_change_ap(direction, pd->mp, saddr, + &th->th_sport, pd->ip_sum, + &th->th_sum, &pd->baddr, + bxport.port, 0, af); + sxport.port = th->th_sport; + } else { + pf_change_ap(direction, pd->mp, daddr, + &th->th_dport, pd->ip_sum, + &th->th_sum, &pd->baddr, + bxport.port, 0, af); + sxport.port = th->th_dport; + } +#else + if (direction == PF_OUT) { + pf_change_ap(saddr, &th->th_sport, + pd->ip_sum, &th->th_sum, &pd->baddr, + bport, 0, af); + sport = th->th_sport; + } else { + pf_change_ap(daddr, &th->th_dport, + pd->ip_sum, &th->th_sum, &pd->baddr, + bport, 0, af); + sport = th->th_dport; + } +#endif + } + s->src.seqhi = htonl(random()); + /* Find mss option */ + mss = pf_get_mss(m, off, th->th_off, af); + mss = pf_calc_mss(saddr, af, mss); + mss = pf_calc_mss(daddr, af, mss); + s->src.mss = mss; + pf_send_tcp(r, af, daddr, saddr, th->th_dport, + th->th_sport, s->src.seqhi, ntohl(th->th_seq) + 1, + TH_SYN|TH_ACK, 0, s->src.mss, 0, 1, 0, NULL, NULL); + REASON_SET(&reason, PFRES_SYNPROXY); + return (PF_SYNPROXY_DROP); + } + +#ifndef NO_APPLE_EXTENSIONS + if (sk->app_state && sk->app_state->handler) { + int offx = off; + + switch (pd->proto) { + case IPPROTO_TCP: + offx += th->th_off << 2; + break; + case IPPROTO_UDP: + offx += pd->hdr.udp->uh_ulen << 2; + break; + default: + /* ALG handlers only apply to TCP and UDP rules */ + break; + } + + if (offx > off) { + sk->app_state->handler(s, direction, offx, + pd, kif); + if (pd->lmw < 0) { + REASON_SET(&reason, PFRES_MEMORY); + return (PF_DROP); + } + m = pd->mp; + } + } +#endif + } + + /* copy back packet headers if we performed NAT operations */ +#ifndef NO_APPLE_EXTENSIONS + if (rewrite) { + if (rewrite < off + hdrlen) + rewrite = off + hdrlen; + + m = pf_lazy_makewritable(pd, pd->mp, rewrite); + if (!m) { + REASON_SET(&reason, PFRES_MEMORY); + return (PF_DROP); + } + + m_copyback(m, off, hdrlen, pd->hdr.any); + } +#else + if (rewrite) + m_copyback(m, off, hdrlen, pd->hdr.any); +#endif + + return (PF_PASS); +} + +static int +pf_test_fragment(struct pf_rule **rm, int direction, struct pfi_kif *kif, + struct mbuf *m, void *h, struct pf_pdesc *pd, struct pf_rule **am, + struct pf_ruleset **rsm) +{ +#pragma unused(h) + struct pf_rule *r, *a = NULL; + struct pf_ruleset *ruleset = NULL; + sa_family_t af = pd->af; + u_short reason; + int tag = -1; + int asd = 0; + int match = 0; + + r = TAILQ_FIRST(pf_main_ruleset.rules[PF_RULESET_FILTER].active.ptr); + while (r != NULL) { + r->evaluations++; + if (pfi_kif_match(r->kif, kif) == r->ifnot) + r = r->skip[PF_SKIP_IFP].ptr; + else if (r->direction && r->direction != direction) + r = r->skip[PF_SKIP_DIR].ptr; + else if (r->af && r->af != af) + r = r->skip[PF_SKIP_AF].ptr; + else if (r->proto && r->proto != pd->proto) + r = r->skip[PF_SKIP_PROTO].ptr; + else if (PF_MISMATCHAW(&r->src.addr, pd->src, af, + r->src.neg, kif)) + r = r->skip[PF_SKIP_SRC_ADDR].ptr; + else if (PF_MISMATCHAW(&r->dst.addr, pd->dst, af, + r->dst.neg, NULL)) + r = r->skip[PF_SKIP_DST_ADDR].ptr; + else if (r->tos && !(r->tos == pd->tos)) + r = TAILQ_NEXT(r, entries); + else if (r->os_fingerprint != PF_OSFP_ANY) + r = TAILQ_NEXT(r, entries); +#ifndef NO_APPLE_EXTENSIONS + else if (pd->proto == IPPROTO_UDP && + (r->src.xport.range.op || r->dst.xport.range.op)) + r = TAILQ_NEXT(r, entries); + else if (pd->proto == IPPROTO_TCP && + (r->src.xport.range.op || r->dst.xport.range.op || + r->flagset)) + r = TAILQ_NEXT(r, entries); +#else + else if (pd->proto == IPPROTO_UDP && + (r->src.port_op || r->dst.port_op)) + r = TAILQ_NEXT(r, entries); + else if (pd->proto == IPPROTO_TCP && + (r->src.port_op || r->dst.port_op || r->flagset)) + r = TAILQ_NEXT(r, entries); +#endif + else if ((pd->proto == IPPROTO_ICMP || + pd->proto == IPPROTO_ICMPV6) && + (r->type || r->code)) + r = TAILQ_NEXT(r, entries); + else if (r->prob && r->prob <= (random() % (UINT_MAX - 1) + 1)) + r = TAILQ_NEXT(r, entries); + else if (r->match_tag && !pf_match_tag(m, r, pd->pf_mtag, &tag)) + r = TAILQ_NEXT(r, entries); + else { + if (r->anchor == NULL) { + match = 1; + *rm = r; + *am = a; + *rsm = ruleset; + if ((*rm)->quick) + break; + r = TAILQ_NEXT(r, entries); + } else + pf_step_into_anchor(&asd, &ruleset, + PF_RULESET_FILTER, &r, &a, &match); + } + if (r == NULL && pf_step_out_of_anchor(&asd, &ruleset, + PF_RULESET_FILTER, &r, &a, &match)) + break; + } + r = *rm; + a = *am; + ruleset = *rsm; + + REASON_SET(&reason, PFRES_MATCH); + + if (r->log) + PFLOG_PACKET(kif, h, m, af, direction, reason, r, a, ruleset, + pd); + + if (r->action != PF_PASS) + return (PF_DROP); + + if (pf_tag_packet(m, pd->pf_mtag, tag, -1)) { + REASON_SET(&reason, PFRES_MEMORY); + return (PF_DROP); + } + + return (PF_PASS); +} + +#ifndef NO_APPLE_EXTENSIONS +static void +pf_pptp_handler(struct pf_state *s, int direction, int off, + struct pf_pdesc *pd, struct pfi_kif *kif) +{ +#pragma unused(direction) + struct tcphdr *th; + struct pf_pptp_state *as; + struct pf_pptp_ctrl_msg cm; + size_t plen; + struct pf_state *gs; + u_int16_t ct; + u_int16_t *pac_call_id; + u_int16_t *pns_call_id; + u_int16_t *spoof_call_id; + u_int8_t *pac_state; + u_int8_t *pns_state; + enum { PF_PPTP_PASS, PF_PPTP_INSERT_GRE, PF_PPTP_REMOVE_GRE } op; + struct mbuf *m; + struct pf_state_key *sk; + struct pf_state_key *gsk; + + m = pd->mp; + plen = min(sizeof (cm), m->m_pkthdr.len - off); + if (plen < PF_PPTP_CTRL_MSG_MINSIZE) + return; + + as = &s->state_key->app_state->u.pptp; + m_copydata(m, off, plen, &cm); + + if (cm.hdr.magic != PF_PPTP_MAGIC_NUMBER) + return; + if (cm.hdr.type != htons(1)) + return; + + sk = s->state_key; + gs = as->grev1_state; + if (!gs) { + gs = pool_get(&pf_state_pl, PR_WAITOK); + if (!gs) + return; + + memcpy(gs, s, sizeof (*gs)); + + memset(&gs->entry_id, 0, sizeof (gs->entry_id)); + memset(&gs->entry_list, 0, sizeof (gs->entry_list)); + + TAILQ_INIT(&gs->unlink_hooks); + gs->rt_kif = NULL; + gs->creation = 0; + gs->pfsync_time = 0; + gs->packets[0] = gs->packets[1] = 0; + gs->bytes[0] = gs->bytes[1] = 0; + gs->timeout = PFTM_UNLINKED; + gs->id = gs->creatorid = 0; + gs->src.state = gs->dst.state = PFGRE1S_NO_TRAFFIC; + gs->src.scrub = gs->dst.scrub = 0; + + gsk = pf_alloc_state_key(gs); + if (!gsk) { + pool_put(&pf_state_pl, gs); + return; + } + + memcpy(&gsk->lan, &sk->lan, sizeof (gsk->lan)); + memcpy(&gsk->gwy, &sk->gwy, sizeof (gsk->gwy)); + memcpy(&gsk->ext, &sk->ext, sizeof (gsk->ext)); + gsk->af = sk->af; + gsk->proto = IPPROTO_GRE; + gsk->proto_variant = PF_GRE_PPTP_VARIANT; + gsk->app_state = 0; + gsk->lan.xport.call_id = 0; + gsk->gwy.xport.call_id = 0; + gsk->ext.xport.call_id = 0; + + as->grev1_state = gs; + } else { + gsk = gs->state_key; + } + + switch (sk->direction) { + case PF_IN: + pns_call_id = &gsk->ext.xport.call_id; + pns_state = &gs->dst.state; + pac_call_id = &gsk->lan.xport.call_id; + pac_state = &gs->src.state; + break; + + case PF_OUT: + pns_call_id = &gsk->lan.xport.call_id; + pns_state = &gs->src.state; + pac_call_id = &gsk->ext.xport.call_id; + pac_state = &gs->dst.state; + break; + + default: + DPFPRINTF(PF_DEBUG_URGENT, + ("pf_pptp_handler: bad directional!\n")); + return; + } + + spoof_call_id = 0; + op = PF_PPTP_PASS; + + ct = ntohs(cm.ctrl.type); + + switch (ct) { + case PF_PPTP_CTRL_TYPE_CALL_OUT_REQ: + *pns_call_id = cm.msg.call_out_req.call_id; + *pns_state = PFGRE1S_INITIATING; + if (s->nat_rule.ptr && pns_call_id == &gsk->lan.xport.call_id) + spoof_call_id = &cm.msg.call_out_req.call_id; + break; + + case PF_PPTP_CTRL_TYPE_CALL_OUT_RPY: + *pac_call_id = cm.msg.call_out_rpy.call_id; + if (s->nat_rule.ptr) + spoof_call_id = + (pac_call_id == &gsk->lan.xport.call_id) ? + &cm.msg.call_out_rpy.call_id : + &cm.msg.call_out_rpy.peer_call_id; + if (gs->timeout == PFTM_UNLINKED) { + *pac_state = PFGRE1S_INITIATING; + op = PF_PPTP_INSERT_GRE; + } + break; + + case PF_PPTP_CTRL_TYPE_CALL_IN_1ST: + *pns_call_id = cm.msg.call_in_1st.call_id; + *pns_state = PFGRE1S_INITIATING; + if (s->nat_rule.ptr && pns_call_id == &gsk->lan.xport.call_id) + spoof_call_id = &cm.msg.call_in_1st.call_id; + break; + + case PF_PPTP_CTRL_TYPE_CALL_IN_2ND: + *pac_call_id = cm.msg.call_in_2nd.call_id; + *pac_state = PFGRE1S_INITIATING; + if (s->nat_rule.ptr) + spoof_call_id = + (pac_call_id == &gsk->lan.xport.call_id) ? + &cm.msg.call_in_2nd.call_id : + &cm.msg.call_in_2nd.peer_call_id; + break; + + case PF_PPTP_CTRL_TYPE_CALL_IN_3RD: + if (s->nat_rule.ptr && pns_call_id == &gsk->lan.xport.call_id) + spoof_call_id = &cm.msg.call_in_3rd.call_id; + if (cm.msg.call_in_3rd.call_id != *pns_call_id) { + break; + } + if (gs->timeout == PFTM_UNLINKED) + op = PF_PPTP_INSERT_GRE; + break; + + case PF_PPTP_CTRL_TYPE_CALL_CLR: + if (cm.msg.call_clr.call_id != *pns_call_id) + op = PF_PPTP_REMOVE_GRE; + break; + + case PF_PPTP_CTRL_TYPE_CALL_DISC: + if (cm.msg.call_clr.call_id != *pac_call_id) + op = PF_PPTP_REMOVE_GRE; + break; + + case PF_PPTP_CTRL_TYPE_ERROR: + if (s->nat_rule.ptr && pns_call_id == &gsk->lan.xport.call_id) + spoof_call_id = &cm.msg.error.peer_call_id; + break; + + case PF_PPTP_CTRL_TYPE_SET_LINKINFO: + if (s->nat_rule.ptr && pac_call_id == &gsk->lan.xport.call_id) + spoof_call_id = &cm.msg.set_linkinfo.peer_call_id; + break; + + default: + op = PF_PPTP_PASS; + break; + } + + if (!gsk->gwy.xport.call_id && gsk->lan.xport.call_id) { + gsk->gwy.xport.call_id = gsk->lan.xport.call_id; + if (spoof_call_id) { + u_int16_t call_id = 0; + int n = 0; + struct pf_state_key_cmp key; + + key.af = gsk->af; + key.proto = IPPROTO_GRE; + key.proto_variant = PF_GRE_PPTP_VARIANT; + PF_ACPY(&key.gwy.addr, &gsk->gwy.addr, key.af); + PF_ACPY(&key.ext.addr, &gsk->ext.addr, key.af); + key.gwy.xport.call_id = gsk->gwy.xport.call_id; + key.ext.xport.call_id = gsk->ext.xport.call_id; + do { + call_id = htonl(random()); + } while (!call_id); + + while (pf_find_state_all(&key, PF_IN, 0)) { + call_id = ntohs(call_id); + --call_id; + if (--call_id == 0) call_id = 0xffff; + call_id = htons(call_id); + + key.gwy.xport.call_id = call_id; + + if (++n > 65535) { + DPFPRINTF(PF_DEBUG_URGENT, + ("pf_pptp_handler: failed to spoof " + "call id\n")); + key.gwy.xport.call_id = 0; + break; + } + } + + gsk->gwy.xport.call_id = call_id; + } + } + + th = pd->hdr.tcp; + + if (spoof_call_id && gsk->lan.xport.call_id != gsk->gwy.xport.call_id) { + if (*spoof_call_id == gsk->gwy.xport.call_id) { + *spoof_call_id = gsk->lan.xport.call_id; + th->th_sum = pf_cksum_fixup(th->th_sum, + gsk->gwy.xport.call_id, gsk->lan.xport.call_id, 0); + } else { + *spoof_call_id = gsk->gwy.xport.call_id; + th->th_sum = pf_cksum_fixup(th->th_sum, + gsk->lan.xport.call_id, gsk->gwy.xport.call_id, 0); + } + + m = pf_lazy_makewritable(pd, m, off + plen); + if (!m) + return; + m_copyback(m, off, plen, &cm); + } + + switch (op) { + case PF_PPTP_REMOVE_GRE: + gs->timeout = PFTM_PURGE; + gs->src.state = gs->dst.state = PFGRE1S_NO_TRAFFIC; + gsk->lan.xport.call_id = 0; + gsk->gwy.xport.call_id = 0; + gsk->ext.xport.call_id = 0; + gs->id = gs->creatorid = 0; + break; + + case PF_PPTP_INSERT_GRE: + gs->creation = pf_time_second(); + gs->expire = pf_time_second(); + gs->timeout = PFTM_GREv1_FIRST_PACKET; + if (gs->src_node) ++gs->src_node->states; + if (gs->nat_src_node) ++gs->nat_src_node->states; + pf_set_rt_ifp(gs, &sk->lan.addr); + if (pf_insert_state(BOUND_IFACE(s->rule.ptr, kif), gs)) { + + /* + * + * FIX ME: insertion can fail when multiple PNS + * behind the same NAT open calls to the same PAC + * simultaneously because spoofed call ID numbers + * are chosen before states are inserted. This is + * hard to fix and happens infrequently enough that + * users will normally try again and this ALG will + * succeed. Failures are expected to be rare enough + * that fixing this is a low priority. + */ + + pf_src_tree_remove_state(gs); + STATE_DEC_COUNTERS(gs); + pool_put(&pf_state_pl, gs); + DPFPRINTF(PF_DEBUG_URGENT, ("pf_pptp_handler: error " + "inserting GREv1 state.\n")); + } + break; + + default: + break; + } +} + +static void +pf_pptp_unlink(struct pf_state *s) +{ + struct pf_app_state *as = s->state_key->app_state; + struct pf_state *gs = as->u.pptp.grev1_state; + + if (gs) { + if (gs->timeout < PFTM_MAX) + gs->timeout = PFTM_PURGE; + as->u.pptp.grev1_state = 0; + } +} + +static int +pf_ike_compare(struct pf_app_state *a, struct pf_app_state *b) +{ + int64_t d = a->u.ike.cookie - b->u.ike.cookie; + return ((d > 0) ? 1 : ((d < 0) ? -1 : 0)); +} +#endif + +static int +pf_test_state_tcp(struct pf_state **state, int direction, struct pfi_kif *kif, + struct mbuf *m, int off, void *h, struct pf_pdesc *pd, + u_short *reason) +{ +#pragma unused(h) + struct pf_state_key_cmp key; + struct tcphdr *th = pd->hdr.tcp; + u_int16_t win = ntohs(th->th_win); + u_int32_t ack, end, seq, orig_seq; + u_int8_t sws, dws; + int ackskew; + int copyback = 0; + struct pf_state_peer *src, *dst; + +#ifndef NO_APPLE_EXTENSIONS + key.app_state = 0; +#endif + key.af = pd->af; + key.proto = IPPROTO_TCP; + if (direction == PF_IN) { + PF_ACPY(&key.ext.addr, pd->src, key.af); + PF_ACPY(&key.gwy.addr, pd->dst, key.af); +#ifndef NO_APPLE_EXTENSIONS + key.ext.xport.port = th->th_sport; + key.gwy.xport.port = th->th_dport; +#else + key.ext.port = th->th_sport; + key.gwy.port = th->th_dport; +#endif + } else { + PF_ACPY(&key.lan.addr, pd->src, key.af); + PF_ACPY(&key.ext.addr, pd->dst, key.af); +#ifndef NO_APPLE_EXTENSIONS + key.lan.xport.port = th->th_sport; + key.ext.xport.port = th->th_dport; +#else + key.lan.port = th->th_sport; + key.ext.port = th->th_dport; +#endif + } + + STATE_LOOKUP(); + + if (direction == (*state)->state_key->direction) { + src = &(*state)->src; + dst = &(*state)->dst; + } else { + src = &(*state)->dst; + dst = &(*state)->src; + } + + if ((*state)->src.state == PF_TCPS_PROXY_SRC) { + if (direction != (*state)->state_key->direction) { + REASON_SET(reason, PFRES_SYNPROXY); + return (PF_SYNPROXY_DROP); + } + if (th->th_flags & TH_SYN) { + if (ntohl(th->th_seq) != (*state)->src.seqlo) { + REASON_SET(reason, PFRES_SYNPROXY); + return (PF_DROP); + } + pf_send_tcp((*state)->rule.ptr, pd->af, pd->dst, + pd->src, th->th_dport, th->th_sport, + (*state)->src.seqhi, ntohl(th->th_seq) + 1, + TH_SYN|TH_ACK, 0, (*state)->src.mss, 0, 1, + 0, NULL, NULL); + REASON_SET(reason, PFRES_SYNPROXY); + return (PF_SYNPROXY_DROP); + } else if (!(th->th_flags & TH_ACK) || + (ntohl(th->th_ack) != (*state)->src.seqhi + 1) || + (ntohl(th->th_seq) != (*state)->src.seqlo + 1)) { + REASON_SET(reason, PFRES_SYNPROXY); + return (PF_DROP); + } else if ((*state)->src_node != NULL && + pf_src_connlimit(state)) { + REASON_SET(reason, PFRES_SRCLIMIT); + return (PF_DROP); + } else + (*state)->src.state = PF_TCPS_PROXY_DST; + } + if ((*state)->src.state == PF_TCPS_PROXY_DST) { + struct pf_state_host *psrc, *pdst; + + if (direction == PF_OUT) { + psrc = &(*state)->state_key->gwy; + pdst = &(*state)->state_key->ext; + } else { + psrc = &(*state)->state_key->ext; + pdst = &(*state)->state_key->lan; + } + if (direction == (*state)->state_key->direction) { + if (((th->th_flags & (TH_SYN|TH_ACK)) != TH_ACK) || + (ntohl(th->th_ack) != (*state)->src.seqhi + 1) || + (ntohl(th->th_seq) != (*state)->src.seqlo + 1)) { + REASON_SET(reason, PFRES_SYNPROXY); + return (PF_DROP); + } + (*state)->src.max_win = MAX(ntohs(th->th_win), 1); + if ((*state)->dst.seqhi == 1) + (*state)->dst.seqhi = htonl(random()); + pf_send_tcp((*state)->rule.ptr, pd->af, &psrc->addr, +#ifndef NO_APPLE_EXTENSIONS + &pdst->addr, psrc->xport.port, pdst->xport.port, +#else + &pdst->addr, psrc->port, pdst->port, +#endif + (*state)->dst.seqhi, 0, TH_SYN, 0, + (*state)->src.mss, 0, 0, (*state)->tag, NULL, NULL); + REASON_SET(reason, PFRES_SYNPROXY); + return (PF_SYNPROXY_DROP); + } else if (((th->th_flags & (TH_SYN|TH_ACK)) != + (TH_SYN|TH_ACK)) || + (ntohl(th->th_ack) != (*state)->dst.seqhi + 1)) { + REASON_SET(reason, PFRES_SYNPROXY); + return (PF_DROP); + } else { + (*state)->dst.max_win = MAX(ntohs(th->th_win), 1); + (*state)->dst.seqlo = ntohl(th->th_seq); + pf_send_tcp((*state)->rule.ptr, pd->af, pd->dst, + pd->src, th->th_dport, th->th_sport, + ntohl(th->th_ack), ntohl(th->th_seq) + 1, + TH_ACK, (*state)->src.max_win, 0, 0, 0, + (*state)->tag, NULL, NULL); + pf_send_tcp((*state)->rule.ptr, pd->af, &psrc->addr, +#ifndef NO_APPLE_EXTENSIONS + &pdst->addr, psrc->xport.port, pdst->xport.port, +#else + &pdst->addr, psrc->port, pdst->port, +#endif + (*state)->src.seqhi + 1, (*state)->src.seqlo + 1, + TH_ACK, (*state)->dst.max_win, 0, 0, 1, + 0, NULL, NULL); + (*state)->src.seqdiff = (*state)->dst.seqhi - + (*state)->src.seqlo; + (*state)->dst.seqdiff = (*state)->src.seqhi - + (*state)->dst.seqlo; + (*state)->src.seqhi = (*state)->src.seqlo + + (*state)->dst.max_win; + (*state)->dst.seqhi = (*state)->dst.seqlo + + (*state)->src.max_win; + (*state)->src.wscale = (*state)->dst.wscale = 0; + (*state)->src.state = (*state)->dst.state = + TCPS_ESTABLISHED; + REASON_SET(reason, PFRES_SYNPROXY); + return (PF_SYNPROXY_DROP); + } + } + + if (((th->th_flags & (TH_SYN|TH_ACK)) == TH_SYN) && + dst->state >= TCPS_FIN_WAIT_2 && + src->state >= TCPS_FIN_WAIT_2) { + if (pf_status.debug >= PF_DEBUG_MISC) { + printf("pf: state reuse "); + pf_print_state(*state); + pf_print_flags(th->th_flags); + printf("\n"); + } + /* XXX make sure it's the same direction ?? */ + (*state)->src.state = (*state)->dst.state = TCPS_CLOSED; + pf_unlink_state(*state); + *state = NULL; + return (PF_DROP); + } + + if (src->wscale && dst->wscale && !(th->th_flags & TH_SYN)) { + sws = src->wscale & PF_WSCALE_MASK; + dws = dst->wscale & PF_WSCALE_MASK; + } else + sws = dws = 0; + + /* + * Sequence tracking algorithm from Guido van Rooij's paper: + * http://www.madison-gurkha.com/publications/tcp_filtering/ + * tcp_filtering.ps + */ + + orig_seq = seq = ntohl(th->th_seq); + if (src->seqlo == 0) { + /* First packet from this end. Set its state */ + + if ((pd->flags & PFDESC_TCP_NORM || dst->scrub) && + src->scrub == NULL) { + if (pf_normalize_tcp_init(m, off, pd, th, src, dst)) { + REASON_SET(reason, PFRES_MEMORY); + return (PF_DROP); + } + } + + /* Deferred generation of sequence number modulator */ + if (dst->seqdiff && !src->seqdiff) { + /* use random iss for the TCP server */ + while ((src->seqdiff = random() - seq) == 0) + ; + ack = ntohl(th->th_ack) - dst->seqdiff; + pf_change_a(&th->th_seq, &th->th_sum, htonl(seq + + src->seqdiff), 0); + pf_change_a(&th->th_ack, &th->th_sum, htonl(ack), 0); + copyback = off + sizeof (*th); + } else { + ack = ntohl(th->th_ack); + } + + end = seq + pd->p_len; + if (th->th_flags & TH_SYN) { + end++; + if (dst->wscale & PF_WSCALE_FLAG) { + src->wscale = pf_get_wscale(m, off, th->th_off, + pd->af); + if (src->wscale & PF_WSCALE_FLAG) { + /* + * Remove scale factor from initial + * window + */ + sws = src->wscale & PF_WSCALE_MASK; + win = ((u_int32_t)win + (1 << sws) - 1) + >> sws; + dws = dst->wscale & PF_WSCALE_MASK; + } else { + /* fixup other window */ + dst->max_win <<= dst->wscale & + PF_WSCALE_MASK; + /* in case of a retrans SYN|ACK */ + dst->wscale = 0; + } + } + } + if (th->th_flags & TH_FIN) + end++; + + src->seqlo = seq; + if (src->state < TCPS_SYN_SENT) + src->state = TCPS_SYN_SENT; + + /* + * May need to slide the window (seqhi may have been set by + * the crappy stack check or if we picked up the connection + * after establishment) + */ + if (src->seqhi == 1 || + SEQ_GEQ(end + MAX(1, dst->max_win << dws), src->seqhi)) + src->seqhi = end + MAX(1, dst->max_win << dws); + if (win > src->max_win) + src->max_win = win; + + } else { + ack = ntohl(th->th_ack) - dst->seqdiff; + if (src->seqdiff) { + /* Modulate sequence numbers */ + pf_change_a(&th->th_seq, &th->th_sum, htonl(seq + + src->seqdiff), 0); + pf_change_a(&th->th_ack, &th->th_sum, htonl(ack), 0); + copyback = off+ sizeof (*th); + } + end = seq + pd->p_len; + if (th->th_flags & TH_SYN) + end++; + if (th->th_flags & TH_FIN) + end++; + } + + if ((th->th_flags & TH_ACK) == 0) { + /* Let it pass through the ack skew check */ + ack = dst->seqlo; + } else if ((ack == 0 && + (th->th_flags & (TH_ACK|TH_RST)) == (TH_ACK|TH_RST)) || + /* broken tcp stacks do not set ack */ + (dst->state < TCPS_SYN_SENT)) { + /* + * Many stacks (ours included) will set the ACK number in an + * FIN|ACK if the SYN times out -- no sequence to ACK. + */ + ack = dst->seqlo; + } + + if (seq == end) { + /* Ease sequencing restrictions on no data packets */ + seq = src->seqlo; + end = seq; + } + + ackskew = dst->seqlo - ack; + + + /* + * Need to demodulate the sequence numbers in any TCP SACK options + * (Selective ACK). We could optionally validate the SACK values + * against the current ACK window, either forwards or backwards, but + * I'm not confident that SACK has been implemented properly + * everywhere. It wouldn't surprise me if several stacks accidently + * SACK too far backwards of previously ACKed data. There really aren't + * any security implications of bad SACKing unless the target stack + * doesn't validate the option length correctly. Someone trying to + * spoof into a TCP connection won't bother blindly sending SACK + * options anyway. + */ + if (dst->seqdiff && (th->th_off << 2) > (int)sizeof (struct tcphdr)) { +#ifndef NO_APPLE_EXTENSIONS + copyback = pf_modulate_sack(m, off, pd, th, dst); + if (copyback == -1) { + REASON_SET(reason, PFRES_MEMORY); + return (PF_DROP); + } + + m = pd->mp; +#else + if (pf_modulate_sack(m, off, pd, th, dst)) + copyback = 1; +#endif + } + + +#define MAXACKWINDOW (0xffff + 1500) /* 1500 is an arbitrary fudge factor */ + if (SEQ_GEQ(src->seqhi, end) && + /* Last octet inside other's window space */ + SEQ_GEQ(seq, src->seqlo - (dst->max_win << dws)) && + /* Retrans: not more than one window back */ + (ackskew >= -MAXACKWINDOW) && + /* Acking not more than one reassembled fragment backwards */ + (ackskew <= (MAXACKWINDOW << sws)) && + /* Acking not more than one window forward */ + ((th->th_flags & TH_RST) == 0 || orig_seq == src->seqlo || + (orig_seq == src->seqlo + 1) || (orig_seq + 1 == src->seqlo) || + (pd->flags & PFDESC_IP_REAS) == 0)) { + /* Require an exact/+1 sequence match on resets when possible */ + + if (dst->scrub || src->scrub) { + if (pf_normalize_tcp_stateful(m, off, pd, reason, th, + *state, src, dst, ©back)) + return (PF_DROP); + +#ifndef NO_APPLE_EXTENSIONS + m = pd->mp; +#endif + } + + /* update max window */ + if (src->max_win < win) + src->max_win = win; + /* synchronize sequencing */ + if (SEQ_GT(end, src->seqlo)) + src->seqlo = end; + /* slide the window of what the other end can send */ + if (SEQ_GEQ(ack + (win << sws), dst->seqhi)) + dst->seqhi = ack + MAX((win << sws), 1); + + + /* update states */ + if (th->th_flags & TH_SYN) + if (src->state < TCPS_SYN_SENT) + src->state = TCPS_SYN_SENT; + if (th->th_flags & TH_FIN) + if (src->state < TCPS_CLOSING) + src->state = TCPS_CLOSING; + if (th->th_flags & TH_ACK) { + if (dst->state == TCPS_SYN_SENT) { + dst->state = TCPS_ESTABLISHED; + if (src->state == TCPS_ESTABLISHED && + (*state)->src_node != NULL && + pf_src_connlimit(state)) { + REASON_SET(reason, PFRES_SRCLIMIT); + return (PF_DROP); + } + } else if (dst->state == TCPS_CLOSING) + dst->state = TCPS_FIN_WAIT_2; + } + if (th->th_flags & TH_RST) + src->state = dst->state = TCPS_TIME_WAIT; + + /* update expire time */ + (*state)->expire = pf_time_second(); + if (src->state >= TCPS_FIN_WAIT_2 && + dst->state >= TCPS_FIN_WAIT_2) + (*state)->timeout = PFTM_TCP_CLOSED; + else if (src->state >= TCPS_CLOSING && + dst->state >= TCPS_CLOSING) + (*state)->timeout = PFTM_TCP_FIN_WAIT; + else if (src->state < TCPS_ESTABLISHED || + dst->state < TCPS_ESTABLISHED) + (*state)->timeout = PFTM_TCP_OPENING; + else if (src->state >= TCPS_CLOSING || + dst->state >= TCPS_CLOSING) + (*state)->timeout = PFTM_TCP_CLOSING; + else + (*state)->timeout = PFTM_TCP_ESTABLISHED; + + /* Fall through to PASS packet */ + + } else if ((dst->state < TCPS_SYN_SENT || + dst->state >= TCPS_FIN_WAIT_2 || src->state >= TCPS_FIN_WAIT_2) && + SEQ_GEQ(src->seqhi + MAXACKWINDOW, end) && + /* Within a window forward of the originating packet */ + SEQ_GEQ(seq, src->seqlo - MAXACKWINDOW)) { + /* Within a window backward of the originating packet */ + + /* + * This currently handles three situations: + * 1) Stupid stacks will shotgun SYNs before their peer + * replies. + * 2) When PF catches an already established stream (the + * firewall rebooted, the state table was flushed, routes + * changed...) + * 3) Packets get funky immediately after the connection + * closes (this should catch Solaris spurious ACK|FINs + * that web servers like to spew after a close) + * + * This must be a little more careful than the above code + * since packet floods will also be caught here. We don't + * update the TTL here to mitigate the damage of a packet + * flood and so the same code can handle awkward establishment + * and a loosened connection close. + * In the establishment case, a correct peer response will + * validate the connection, go through the normal state code + * and keep updating the state TTL. + */ + + if (pf_status.debug >= PF_DEBUG_MISC) { + printf("pf: loose state match: "); + pf_print_state(*state); + pf_print_flags(th->th_flags); + printf(" seq=%u (%u) ack=%u len=%u ackskew=%d " + "pkts=%llu:%llu dir=%s,%s\n", seq, orig_seq, ack, + pd->p_len, ackskew, (*state)->packets[0], + (*state)->packets[1], + direction == PF_IN ? "in" : "out", + direction == (*state)->state_key->direction ? + "fwd" : "rev"); + } + + if (dst->scrub || src->scrub) { + if (pf_normalize_tcp_stateful(m, off, pd, reason, th, + *state, src, dst, ©back)) + return (PF_DROP); +#ifndef NO_APPLE_EXTENSIONS + m = pd->mp; +#endif + } + + /* update max window */ + if (src->max_win < win) + src->max_win = win; + /* synchronize sequencing */ + if (SEQ_GT(end, src->seqlo)) + src->seqlo = end; + /* slide the window of what the other end can send */ + if (SEQ_GEQ(ack + (win << sws), dst->seqhi)) + dst->seqhi = ack + MAX((win << sws), 1); + + /* + * Cannot set dst->seqhi here since this could be a shotgunned + * SYN and not an already established connection. + */ + + if (th->th_flags & TH_FIN) + if (src->state < TCPS_CLOSING) + src->state = TCPS_CLOSING; + if (th->th_flags & TH_RST) + src->state = dst->state = TCPS_TIME_WAIT; + + /* Fall through to PASS packet */ + + } else { + if ((*state)->dst.state == TCPS_SYN_SENT && + (*state)->src.state == TCPS_SYN_SENT) { + /* Send RST for state mismatches during handshake */ + if (!(th->th_flags & TH_RST)) + pf_send_tcp((*state)->rule.ptr, pd->af, + pd->dst, pd->src, th->th_dport, + th->th_sport, ntohl(th->th_ack), 0, + TH_RST, 0, 0, + (*state)->rule.ptr->return_ttl, 1, 0, + pd->eh, kif->pfik_ifp); + src->seqlo = 0; + src->seqhi = 1; + src->max_win = 1; + } else if (pf_status.debug >= PF_DEBUG_MISC) { + printf("pf: BAD state: "); + pf_print_state(*state); + pf_print_flags(th->th_flags); + printf(" seq=%u (%u) ack=%u len=%u ackskew=%d " + "pkts=%llu:%llu dir=%s,%s\n", + seq, orig_seq, ack, pd->p_len, ackskew, + (*state)->packets[0], (*state)->packets[1], + direction == PF_IN ? "in" : "out", + direction == (*state)->state_key->direction ? + "fwd" : "rev"); + printf("pf: State failure on: %c %c %c %c | %c %c\n", + SEQ_GEQ(src->seqhi, end) ? ' ' : '1', + SEQ_GEQ(seq, src->seqlo - (dst->max_win << dws)) ? + ' ': '2', + (ackskew >= -MAXACKWINDOW) ? ' ' : '3', + (ackskew <= (MAXACKWINDOW << sws)) ? ' ' : '4', + SEQ_GEQ(src->seqhi + MAXACKWINDOW, end) ?' ' :'5', + SEQ_GEQ(seq, src->seqlo - MAXACKWINDOW) ?' ' :'6'); + } + REASON_SET(reason, PFRES_BADSTATE); + return (PF_DROP); + } + + /* Any packets which have gotten here are to be passed */ + +#ifndef NO_APPLE_EXTENSIONS + if ((*state)->state_key->app_state && + (*state)->state_key->app_state->handler) { + (*state)->state_key->app_state->handler(*state, direction, + off + (th->th_off << 2), pd, kif); + if (pd->lmw < 0) { + REASON_SET(reason, PFRES_MEMORY); + return (PF_DROP); + } + m = pd->mp; + } + + /* translate source/destination address, if necessary */ + if (STATE_TRANSLATE((*state)->state_key)) { + if (direction == PF_OUT) + pf_change_ap(direction, pd->mp, pd->src, &th->th_sport, + pd->ip_sum, &th->th_sum, + &(*state)->state_key->gwy.addr, + (*state)->state_key->gwy.xport.port, 0, pd->af); + else + pf_change_ap(direction, pd->mp, pd->dst, &th->th_dport, + pd->ip_sum, &th->th_sum, + &(*state)->state_key->lan.addr, + (*state)->state_key->lan.xport.port, 0, pd->af); + copyback = off + sizeof (*th); + } + + if (copyback) { + m = pf_lazy_makewritable(pd, m, copyback); + if (!m) { + REASON_SET(reason, PFRES_MEMORY); + return (PF_DROP); + } + + /* Copyback sequence modulation or stateful scrub changes */ + m_copyback(m, off, sizeof (*th), th); + } +#else + /* translate source/destination address, if necessary */ + if (STATE_TRANSLATE((*state)->state_key)) { + if (direction == PF_OUT) + pf_change_ap(pd->src, pd->mp, &th->th_sport, pd->ip_sum, + &th->th_sum, &(*state)->state_key->gwy.addr, + (*state)->state_key->gwy.port, 0, pd->af); + else + pf_change_ap(pd->dst, pd->mp, &th->th_dport, pd->ip_sum, + &th->th_sum, &(*state)->state_key->lan.addr, + (*state)->state_key->lan.port, 0, pd->af); + m_copyback(m, off, sizeof (*th), th); + } else if (copyback) { + /* Copyback sequence modulation or stateful scrub changes */ + m_copyback(m, off, sizeof (*th), th); + } +#endif + + return (PF_PASS); +} + +static int +pf_test_state_udp(struct pf_state **state, int direction, struct pfi_kif *kif, + struct mbuf *m, int off, void *h, struct pf_pdesc *pd) +{ +#pragma unused(h) + struct pf_state_peer *src, *dst; + struct pf_state_key_cmp key; + struct udphdr *uh = pd->hdr.udp; +#ifndef NO_APPLE_EXTENSIONS + struct pf_app_state as; + int dx, action, extfilter; + key.app_state = 0; + key.proto_variant = PF_EXTFILTER_APD; +#endif + + key.af = pd->af; + key.proto = IPPROTO_UDP; + if (direction == PF_IN) { + PF_ACPY(&key.ext.addr, pd->src, key.af); + PF_ACPY(&key.gwy.addr, pd->dst, key.af); +#ifndef NO_APPLE_EXTENSIONS + key.ext.xport.port = uh->uh_sport; + key.gwy.xport.port = uh->uh_dport; + dx = PF_IN; +#else + key.ext.port = uh->uh_sport; + key.gwy.port = uh->uh_dport; +#endif + } else { + PF_ACPY(&key.lan.addr, pd->src, key.af); + PF_ACPY(&key.ext.addr, pd->dst, key.af); +#ifndef NO_APPLE_EXTENSIONS + key.lan.xport.port = uh->uh_sport; + key.ext.xport.port = uh->uh_dport; + dx = PF_OUT; +#else + key.lan.port = uh->uh_sport; + key.ext.port = uh->uh_dport; +#endif + } + +#ifndef NO_APPLE_EXTENSIONS + if (uh->uh_sport == PF_IKE_PORT && uh->uh_dport == PF_IKE_PORT) { + struct pf_ike_hdr ike; + size_t plen = m->m_pkthdr.len - off - sizeof (*uh); + if (plen < PF_IKE_PACKET_MINSIZE) { + DPFPRINTF(PF_DEBUG_MISC, + ("pf: IKE message too small.\n")); + return (PF_DROP); + } + + if (plen > sizeof (ike)) + plen = sizeof (ike); + m_copydata(m, off + sizeof (*uh), plen, &ike); + + if (ike.initiator_cookie) { + key.app_state = &as; + as.compare_lan_ext = pf_ike_compare; + as.compare_ext_gwy = pf_ike_compare; + as.u.ike.cookie = ike.initiator_cookie; + } else { + /* + * + * Support non-standard NAT-T implementations that + * push the ESP packet over the top of the IKE packet. + * Do not drop packet. + */ + DPFPRINTF(PF_DEBUG_MISC, + ("pf: IKE initiator cookie = 0.\n")); + } + } + + *state = pf_find_state(kif, &key, dx); + + if (!key.app_state && *state == 0) { + key.proto_variant = PF_EXTFILTER_AD; + *state = pf_find_state(kif, &key, dx); + } + + if (!key.app_state && *state == 0) { + key.proto_variant = PF_EXTFILTER_EI; + *state = pf_find_state(kif, &key, dx); + } + + if (pf_state_lookup_aux(state, kif, direction, &action)) + return (action); +#else + STATE_LOOKUP(); +#endif + + if (direction == (*state)->state_key->direction) { + src = &(*state)->src; + dst = &(*state)->dst; + } else { + src = &(*state)->dst; + dst = &(*state)->src; + } + + /* update states */ + if (src->state < PFUDPS_SINGLE) + src->state = PFUDPS_SINGLE; + if (dst->state == PFUDPS_SINGLE) + dst->state = PFUDPS_MULTIPLE; + + /* update expire time */ + (*state)->expire = pf_time_second(); + if (src->state == PFUDPS_MULTIPLE && dst->state == PFUDPS_MULTIPLE) + (*state)->timeout = PFTM_UDP_MULTIPLE; + else + (*state)->timeout = PFTM_UDP_SINGLE; + +#ifndef NO_APPLE_EXTENSIONS + extfilter = (*state)->state_key->proto_variant; + if (extfilter > PF_EXTFILTER_APD) { + (*state)->state_key->ext.xport.port = key.ext.xport.port; + if (extfilter > PF_EXTFILTER_AD) + PF_ACPY(&(*state)->state_key->ext.addr, + &key.ext.addr, key.af); + } + + if ((*state)->state_key->app_state && + (*state)->state_key->app_state->handler) { + (*state)->state_key->app_state->handler(*state, direction, + off + uh->uh_ulen, pd, kif); + m = pd->mp; + } +#endif + + /* translate source/destination address, if necessary */ +#ifndef NO_APPLE_EXTENSIONS + if (STATE_TRANSLATE((*state)->state_key)) { + m = pf_lazy_makewritable(pd, m, off + sizeof (*uh)); + if (!m) + return (PF_DROP); + + if (direction == PF_OUT) + pf_change_ap(direction, pd->mp, pd->src, &uh->uh_sport, + pd->ip_sum, &uh->uh_sum, + &(*state)->state_key->gwy.addr, + (*state)->state_key->gwy.xport.port, 1, pd->af); + else + pf_change_ap(direction, pd->mp, pd->dst, &uh->uh_dport, + pd->ip_sum, &uh->uh_sum, + &(*state)->state_key->lan.addr, + (*state)->state_key->lan.xport.port, 1, pd->af); + m_copyback(m, off, sizeof (*uh), uh); + } +#else + if (STATE_TRANSLATE((*state)->state_key)) { + if (direction == PF_OUT) + pf_change_ap(pd->src, &uh->uh_sport, pd->ip_sum, + &uh->uh_sum, &(*state)->state_key->gwy.addr, + (*state)->state_key->gwy.port, 1, pd->af); + else + pf_change_ap(pd->dst, &uh->uh_dport, pd->ip_sum, + &uh->uh_sum, &(*state)->state_key->lan.addr, + (*state)->state_key->lan.port, 1, pd->af); + m_copyback(m, off, sizeof (*uh), uh); + } +#endif + + return (PF_PASS); +} + +static int +pf_test_state_icmp(struct pf_state **state, int direction, struct pfi_kif *kif, + struct mbuf *m, int off, void *h, struct pf_pdesc *pd, u_short *reason) +{ +#pragma unused(h) + struct pf_addr *saddr = pd->src, *daddr = pd->dst; + u_int16_t icmpid = 0, *icmpsum; + u_int8_t icmptype; + int state_icmp = 0; + struct pf_state_key_cmp key; + +#ifndef NO_APPLE_EXTENSIONS + struct pf_app_state as; + key.app_state = 0; +#endif + + switch (pd->proto) { +#if INET + case IPPROTO_ICMP: + icmptype = pd->hdr.icmp->icmp_type; + icmpid = pd->hdr.icmp->icmp_id; + icmpsum = &pd->hdr.icmp->icmp_cksum; + + if (icmptype == ICMP_UNREACH || + icmptype == ICMP_SOURCEQUENCH || + icmptype == ICMP_REDIRECT || + icmptype == ICMP_TIMXCEED || + icmptype == ICMP_PARAMPROB) + state_icmp++; + break; +#endif /* INET */ +#if INET6 + case IPPROTO_ICMPV6: + icmptype = pd->hdr.icmp6->icmp6_type; + icmpid = pd->hdr.icmp6->icmp6_id; + icmpsum = &pd->hdr.icmp6->icmp6_cksum; + + if (icmptype == ICMP6_DST_UNREACH || + icmptype == ICMP6_PACKET_TOO_BIG || + icmptype == ICMP6_TIME_EXCEEDED || + icmptype == ICMP6_PARAM_PROB) + state_icmp++; + break; +#endif /* INET6 */ + } + + if (!state_icmp) { + + /* + * ICMP query/reply message not related to a TCP/UDP packet. + * Search for an ICMP state. + */ + key.af = pd->af; + key.proto = pd->proto; + if (direction == PF_IN) { + PF_ACPY(&key.ext.addr, pd->src, key.af); + PF_ACPY(&key.gwy.addr, pd->dst, key.af); +#ifndef NO_APPLE_EXTENSIONS + key.ext.xport.port = 0; + key.gwy.xport.port = icmpid; +#else + key.ext.port = 0; + key.gwy.port = icmpid; +#endif + } else { + PF_ACPY(&key.lan.addr, pd->src, key.af); + PF_ACPY(&key.ext.addr, pd->dst, key.af); +#ifndef NO_APPLE_EXTENSIONS + key.lan.xport.port = icmpid; + key.ext.xport.port = 0; +#else + key.lan.port = icmpid; + key.ext.port = 0; +#endif + } + + STATE_LOOKUP(); + + (*state)->expire = pf_time_second(); + (*state)->timeout = PFTM_ICMP_ERROR_REPLY; + + /* translate source/destination address, if necessary */ + if (STATE_TRANSLATE((*state)->state_key)) { + if (direction == PF_OUT) { + switch (pd->af) { +#if INET + case AF_INET: + pf_change_a(&saddr->v4.s_addr, + pd->ip_sum, + (*state)->state_key->gwy.addr.v4.s_addr, 0); +#ifndef NO_APPLE_EXTENSIONS + pd->hdr.icmp->icmp_cksum = + pf_cksum_fixup( + pd->hdr.icmp->icmp_cksum, icmpid, + (*state)->state_key->gwy.xport.port, 0); + pd->hdr.icmp->icmp_id = + (*state)->state_key->gwy.xport.port; + m = pf_lazy_makewritable(pd, m, + off + ICMP_MINLEN); + if (!m) + return (PF_DROP); +#else + pd->hdr.icmp->icmp_cksum = + pf_cksum_fixup( + pd->hdr.icmp->icmp_cksum, icmpid, + (*state)->state_key->gwy.port, 0); + pd->hdr.icmp->icmp_id = + (*state)->state_key->gwy.port; +#endif + m_copyback(m, off, ICMP_MINLEN, + pd->hdr.icmp); + break; +#endif /* INET */ +#if INET6 + case AF_INET6: + pf_change_a6(saddr, + &pd->hdr.icmp6->icmp6_cksum, + &(*state)->state_key->gwy.addr, 0); +#ifndef NO_APPLE_EXTENSIONS + m = pf_lazy_makewritable(pd, m, + off + sizeof (struct icmp6_hdr)); + if (!m) + return (PF_DROP); +#endif + m_copyback(m, off, + sizeof (struct icmp6_hdr), + pd->hdr.icmp6); + break; +#endif /* INET6 */ + } + } else { + switch (pd->af) { +#if INET + case AF_INET: + pf_change_a(&daddr->v4.s_addr, + pd->ip_sum, + (*state)->state_key->lan.addr.v4.s_addr, 0); +#ifndef NO_APPLE_EXTENSIONS + pd->hdr.icmp->icmp_cksum = + pf_cksum_fixup( + pd->hdr.icmp->icmp_cksum, icmpid, + (*state)->state_key->lan.xport.port, 0); + pd->hdr.icmp->icmp_id = + (*state)->state_key->lan.xport.port; + m = pf_lazy_makewritable(pd, m, + off + ICMP_MINLEN); + if (!m) + return (PF_DROP); +#else + pd->hdr.icmp->icmp_cksum = + pf_cksum_fixup( + pd->hdr.icmp->icmp_cksum, icmpid, + (*state)->state_key->lan.port, 0); + pd->hdr.icmp->icmp_id = + (*state)->state_key->lan.port; +#endif + m_copyback(m, off, ICMP_MINLEN, + pd->hdr.icmp); + break; +#endif /* INET */ +#if INET6 + case AF_INET6: + pf_change_a6(daddr, + &pd->hdr.icmp6->icmp6_cksum, + &(*state)->state_key->lan.addr, 0); +#ifndef NO_APPLE_EXTENSIONS + m = pf_lazy_makewritable(pd, m, + off + sizeof (struct icmp6_hdr)); + if (!m) + return (PF_DROP); +#endif + m_copyback(m, off, + sizeof (struct icmp6_hdr), + pd->hdr.icmp6); + break; +#endif /* INET6 */ + } + } + } + + return (PF_PASS); + + } else { + /* + * ICMP error message in response to a TCP/UDP packet. + * Extract the inner TCP/UDP header and search for that state. + */ + + struct pf_pdesc pd2; +#if INET + struct ip h2; +#endif /* INET */ +#if INET6 + struct ip6_hdr h2_6; + int terminal = 0; +#endif /* INET6 */ + int ipoff2 = 0; + int off2 = 0; + + memset(&pd2, 0, sizeof (pd2)); + + pd2.af = pd->af; + switch (pd->af) { +#if INET + case AF_INET: + /* offset of h2 in mbuf chain */ + ipoff2 = off + ICMP_MINLEN; + + if (!pf_pull_hdr(m, ipoff2, &h2, sizeof (h2), + NULL, reason, pd2.af)) { + DPFPRINTF(PF_DEBUG_MISC, + ("pf: ICMP error message too short " + "(ip)\n")); + return (PF_DROP); + } + /* + * ICMP error messages don't refer to non-first + * fragments + */ + if (h2.ip_off & htons(IP_OFFMASK)) { + REASON_SET(reason, PFRES_FRAG); + return (PF_DROP); + } + + /* offset of protocol header that follows h2 */ + off2 = ipoff2 + (h2.ip_hl << 2); + + pd2.proto = h2.ip_p; + pd2.src = (struct pf_addr *)&h2.ip_src; + pd2.dst = (struct pf_addr *)&h2.ip_dst; + pd2.ip_sum = &h2.ip_sum; + break; +#endif /* INET */ +#if INET6 + case AF_INET6: + ipoff2 = off + sizeof (struct icmp6_hdr); + + if (!pf_pull_hdr(m, ipoff2, &h2_6, sizeof (h2_6), + NULL, reason, pd2.af)) { + DPFPRINTF(PF_DEBUG_MISC, + ("pf: ICMP error message too short " + "(ip6)\n")); + return (PF_DROP); + } + pd2.proto = h2_6.ip6_nxt; + pd2.src = (struct pf_addr *)&h2_6.ip6_src; + pd2.dst = (struct pf_addr *)&h2_6.ip6_dst; + pd2.ip_sum = NULL; + off2 = ipoff2 + sizeof (h2_6); + do { + switch (pd2.proto) { + case IPPROTO_FRAGMENT: + /* + * ICMPv6 error messages for + * non-first fragments + */ + REASON_SET(reason, PFRES_FRAG); + return (PF_DROP); + case IPPROTO_AH: + case IPPROTO_HOPOPTS: + case IPPROTO_ROUTING: + case IPPROTO_DSTOPTS: { + /* get next header and header length */ + struct ip6_ext opt6; + + if (!pf_pull_hdr(m, off2, &opt6, + sizeof (opt6), NULL, reason, + pd2.af)) { + DPFPRINTF(PF_DEBUG_MISC, + ("pf: ICMPv6 short opt\n")); + return (PF_DROP); + } + if (pd2.proto == IPPROTO_AH) + off2 += (opt6.ip6e_len + 2) * 4; + else + off2 += (opt6.ip6e_len + 1) * 8; + pd2.proto = opt6.ip6e_nxt; + /* goto the next header */ + break; + } + default: + terminal++; + break; + } + } while (!terminal); + break; +#endif /* INET6 */ + } + + switch (pd2.proto) { + case IPPROTO_TCP: { + struct tcphdr th; + u_int32_t seq; + struct pf_state_peer *src, *dst; + u_int8_t dws; + int copyback = 0; + + /* + * Only the first 8 bytes of the TCP header can be + * expected. Don't access any TCP header fields after + * th_seq, an ackskew test is not possible. + */ + if (!pf_pull_hdr(m, off2, &th, 8, NULL, reason, + pd2.af)) { + DPFPRINTF(PF_DEBUG_MISC, + ("pf: ICMP error message too short " + "(tcp)\n")); + return (PF_DROP); + } + + key.af = pd2.af; + key.proto = IPPROTO_TCP; + if (direction == PF_IN) { + PF_ACPY(&key.ext.addr, pd2.dst, key.af); + PF_ACPY(&key.gwy.addr, pd2.src, key.af); +#ifndef NO_APPLE_EXTENSIONS + key.ext.xport.port = th.th_dport; + key.gwy.xport.port = th.th_sport; +#else + key.ext.port = th.th_dport; + key.gwy.port = th.th_sport; +#endif + } else { + PF_ACPY(&key.lan.addr, pd2.dst, key.af); + PF_ACPY(&key.ext.addr, pd2.src, key.af); +#ifndef NO_APPLE_EXTENSIONS + key.lan.xport.port = th.th_dport; + key.ext.xport.port = th.th_sport; +#else + key.lan.port = th.th_dport; + key.ext.port = th.th_sport; +#endif + } + + STATE_LOOKUP(); + + if (direction == (*state)->state_key->direction) { + src = &(*state)->dst; + dst = &(*state)->src; + } else { + src = &(*state)->src; + dst = &(*state)->dst; + } + + if (src->wscale && dst->wscale) + dws = dst->wscale & PF_WSCALE_MASK; + else + dws = 0; + + /* Demodulate sequence number */ + seq = ntohl(th.th_seq) - src->seqdiff; + if (src->seqdiff) { + pf_change_a(&th.th_seq, icmpsum, + htonl(seq), 0); + copyback = 1; + } + + if (!SEQ_GEQ(src->seqhi, seq) || + !SEQ_GEQ(seq, src->seqlo - (dst->max_win << dws))) { + if (pf_status.debug >= PF_DEBUG_MISC) { + printf("pf: BAD ICMP %d:%d ", + icmptype, pd->hdr.icmp->icmp_code); + pf_print_host(pd->src, 0, pd->af); + printf(" -> "); + pf_print_host(pd->dst, 0, pd->af); + printf(" state: "); + pf_print_state(*state); + printf(" seq=%u\n", seq); + } + REASON_SET(reason, PFRES_BADSTATE); + return (PF_DROP); + } + + if (STATE_TRANSLATE((*state)->state_key)) { + if (direction == PF_IN) { + pf_change_icmp(pd2.src, &th.th_sport, + daddr, &(*state)->state_key->lan.addr, +#ifndef NO_APPLE_EXTENSIONS + (*state)->state_key->lan.xport.port, NULL, +#else + (*state)->state_key->lan.port, NULL, +#endif + pd2.ip_sum, icmpsum, + pd->ip_sum, 0, pd2.af); + } else { + pf_change_icmp(pd2.dst, &th.th_dport, + saddr, &(*state)->state_key->gwy.addr, +#ifndef NO_APPLE_EXTENSIONS + (*state)->state_key->gwy.xport.port, NULL, +#else + (*state)->state_key->gwy.port, NULL, +#endif + pd2.ip_sum, icmpsum, + pd->ip_sum, 0, pd2.af); + } + copyback = 1; + } + + if (copyback) { +#ifndef NO_APPLE_EXTENSIONS + m = pf_lazy_makewritable(pd, m, off2 + 8); + if (!m) + return (PF_DROP); +#endif + switch (pd2.af) { +#if INET + case AF_INET: + m_copyback(m, off, ICMP_MINLEN, + pd->hdr.icmp); + m_copyback(m, ipoff2, sizeof (h2), + &h2); + break; +#endif /* INET */ +#if INET6 + case AF_INET6: + m_copyback(m, off, + sizeof (struct icmp6_hdr), + pd->hdr.icmp6); + m_copyback(m, ipoff2, sizeof (h2_6), + &h2_6); + break; +#endif /* INET6 */ + } + m_copyback(m, off2, 8, &th); + } + + return (PF_PASS); + break; + } + case IPPROTO_UDP: { + struct udphdr uh; +#ifndef NO_APPLE_EXTENSIONS + int dx, action; +#endif + if (!pf_pull_hdr(m, off2, &uh, sizeof (uh), + NULL, reason, pd2.af)) { + DPFPRINTF(PF_DEBUG_MISC, + ("pf: ICMP error message too short " + "(udp)\n")); + return (PF_DROP); + } + + key.af = pd2.af; + key.proto = IPPROTO_UDP; + if (direction == PF_IN) { + PF_ACPY(&key.ext.addr, pd2.dst, key.af); + PF_ACPY(&key.gwy.addr, pd2.src, key.af); +#ifndef NO_APPLE_EXTENSIONS + key.ext.xport.port = uh.uh_dport; + key.gwy.xport.port = uh.uh_sport; + dx = PF_IN; +#else + key.ext.port = uh.uh_dport; + key.gwy.port = uh.uh_sport; +#endif + } else { + PF_ACPY(&key.lan.addr, pd2.dst, key.af); + PF_ACPY(&key.ext.addr, pd2.src, key.af); +#ifndef NO_APPLE_EXTENSIONS + key.lan.xport.port = uh.uh_dport; + key.ext.xport.port = uh.uh_sport; + dx = PF_OUT; +#else + key.lan.port = uh.uh_dport; + key.ext.port = uh.uh_sport; +#endif + } + +#ifndef NO_APPLE_EXTENSIONS + key.proto_variant = PF_EXTFILTER_APD; + + if (uh.uh_sport == PF_IKE_PORT && + uh.uh_dport == PF_IKE_PORT) { + struct pf_ike_hdr ike; + size_t plen = + m->m_pkthdr.len - off2 - sizeof (uh); + if (direction == PF_IN && + plen < 8 /* PF_IKE_PACKET_MINSIZE */) { + DPFPRINTF(PF_DEBUG_MISC, ("pf: " + "ICMP error, embedded IKE message " + "too small.\n")); + return (PF_DROP); + } + + if (plen > sizeof (ike)) + plen = sizeof (ike); + m_copydata(m, off + sizeof (uh), plen, &ike); + + key.app_state = &as; + as.compare_lan_ext = pf_ike_compare; + as.compare_ext_gwy = pf_ike_compare; + as.u.ike.cookie = ike.initiator_cookie; + } + + *state = pf_find_state(kif, &key, dx); + + if (key.app_state && *state == 0) { + key.app_state = 0; + *state = pf_find_state(kif, &key, dx); + } + + if (*state == 0) { + key.proto_variant = PF_EXTFILTER_AD; + *state = pf_find_state(kif, &key, dx); + } + + if (*state == 0) { + key.proto_variant = PF_EXTFILTER_EI; + *state = pf_find_state(kif, &key, dx); + } + + if (pf_state_lookup_aux(state, kif, direction, &action)) + return (action); +#else + STATE_LOOKUP(); +#endif + + if (STATE_TRANSLATE((*state)->state_key)) { + if (direction == PF_IN) { + pf_change_icmp(pd2.src, &uh.uh_sport, + daddr, &(*state)->state_key->lan.addr, +#ifndef NO_APPLE_EXTENSIONS + (*state)->state_key->lan.xport.port, &uh.uh_sum, +#else + (*state)->state_key->lan.port, &uh.uh_sum, +#endif + pd2.ip_sum, icmpsum, + pd->ip_sum, 1, pd2.af); + } else { + pf_change_icmp(pd2.dst, &uh.uh_dport, + saddr, &(*state)->state_key->gwy.addr, +#ifndef NO_APPLE_EXTENSIONS + (*state)->state_key->gwy.xport.port, &uh.uh_sum, +#else + (*state)->state_key->gwy.port, &uh.uh_sum, +#endif + pd2.ip_sum, icmpsum, + pd->ip_sum, 1, pd2.af); + } +#ifndef NO_APPLE_EXTENSIONS + m = pf_lazy_makewritable(pd, m, + off2 + sizeof (uh)); + if (!m) + return (PF_DROP); +#endif + switch (pd2.af) { +#if INET + case AF_INET: + m_copyback(m, off, ICMP_MINLEN, + pd->hdr.icmp); + m_copyback(m, ipoff2, sizeof (h2), &h2); + break; +#endif /* INET */ +#if INET6 + case AF_INET6: + m_copyback(m, off, + sizeof (struct icmp6_hdr), + pd->hdr.icmp6); + m_copyback(m, ipoff2, sizeof (h2_6), + &h2_6); + break; +#endif /* INET6 */ + } + m_copyback(m, off2, sizeof (uh), &uh); + } + + return (PF_PASS); + break; + } +#if INET + case IPPROTO_ICMP: { + struct icmp iih; + + if (!pf_pull_hdr(m, off2, &iih, ICMP_MINLEN, + NULL, reason, pd2.af)) { + DPFPRINTF(PF_DEBUG_MISC, + ("pf: ICMP error message too short i" + "(icmp)\n")); + return (PF_DROP); + } + + key.af = pd2.af; + key.proto = IPPROTO_ICMP; + if (direction == PF_IN) { + PF_ACPY(&key.ext.addr, pd2.dst, key.af); + PF_ACPY(&key.gwy.addr, pd2.src, key.af); +#ifndef NO_APPLE_EXTENSIONS + key.ext.xport.port = 0; + key.gwy.xport.port = iih.icmp_id; +#else + key.ext.port = 0; + key.gwy.port = iih.icmp_id; +#endif + } else { + PF_ACPY(&key.lan.addr, pd2.dst, key.af); + PF_ACPY(&key.ext.addr, pd2.src, key.af); +#ifndef NO_APPLE_EXTENSIONS + key.lan.xport.port = iih.icmp_id; + key.ext.xport.port = 0; +#else + key.lan.port = iih.icmp_id; + key.ext.port = 0; +#endif + } + + STATE_LOOKUP(); + + if (STATE_TRANSLATE((*state)->state_key)) { + if (direction == PF_IN) { + pf_change_icmp(pd2.src, &iih.icmp_id, + daddr, &(*state)->state_key->lan.addr, +#ifndef NO_APPLE_EXTENSIONS + (*state)->state_key->lan.xport.port, NULL, +#else + (*state)->state_key->lan.port, NULL, +#endif + pd2.ip_sum, icmpsum, + pd->ip_sum, 0, AF_INET); + } else { + pf_change_icmp(pd2.dst, &iih.icmp_id, + saddr, &(*state)->state_key->gwy.addr, +#ifndef NO_APPLE_EXTENSIONS + (*state)->state_key->gwy.xport.port, NULL, +#else + (*state)->state_key->gwy.port, NULL, +#endif + pd2.ip_sum, icmpsum, + pd->ip_sum, 0, AF_INET); + } +#ifndef NO_APPLE_EXTENSIONS + m = pf_lazy_makewritable(pd, m, off2 + ICMP_MINLEN); + if (!m) + return (PF_DROP); +#endif + m_copyback(m, off, ICMP_MINLEN, pd->hdr.icmp); + m_copyback(m, ipoff2, sizeof (h2), &h2); + m_copyback(m, off2, ICMP_MINLEN, &iih); + } + + return (PF_PASS); + break; + } +#endif /* INET */ +#if INET6 + case IPPROTO_ICMPV6: { + struct icmp6_hdr iih; + + if (!pf_pull_hdr(m, off2, &iih, + sizeof (struct icmp6_hdr), NULL, reason, pd2.af)) { + DPFPRINTF(PF_DEBUG_MISC, + ("pf: ICMP error message too short " + "(icmp6)\n")); + return (PF_DROP); + } + + key.af = pd2.af; + key.proto = IPPROTO_ICMPV6; + if (direction == PF_IN) { + PF_ACPY(&key.ext.addr, pd2.dst, key.af); + PF_ACPY(&key.gwy.addr, pd2.src, key.af); +#ifndef NO_APPLE_EXTENSIONS + key.ext.xport.port = 0; + key.gwy.xport.port = iih.icmp6_id; +#else + key.ext.port = 0; + key.gwy.port = iih.icmp6_id; +#endif + } else { + PF_ACPY(&key.lan.addr, pd2.dst, key.af); + PF_ACPY(&key.ext.addr, pd2.src, key.af); +#ifndef NO_APPLE_EXTENSIONS + key.lan.xport.port = iih.icmp6_id; + key.ext.xport.port = 0; +#else + key.lan.port = iih.icmp6_id; + key.ext.port = 0; +#endif + } + + STATE_LOOKUP(); + + if (STATE_TRANSLATE((*state)->state_key)) { + if (direction == PF_IN) { + pf_change_icmp(pd2.src, &iih.icmp6_id, + daddr, &(*state)->state_key->lan.addr, +#ifndef NO_APPLE_EXTENSIONS + (*state)->state_key->lan.xport.port, NULL, +#else + (*state)->state_key->lan.port, NULL, +#endif + pd2.ip_sum, icmpsum, + pd->ip_sum, 0, AF_INET6); + } else { + pf_change_icmp(pd2.dst, &iih.icmp6_id, + saddr, &(*state)->state_key->gwy.addr, +#ifndef NO_APPLE_EXTENSIONS + (*state)->state_key->gwy.xport.port, NULL, +#else + (*state)->state_key->gwy.port, NULL, +#endif + pd2.ip_sum, icmpsum, + pd->ip_sum, 0, AF_INET6); + } +#ifndef NO_APPLE_EXTENSIONS + m = pf_lazy_makewritable(pd, m, off2 + + sizeof (struct icmp6_hdr)); + if (!m) + return (PF_DROP); +#endif + m_copyback(m, off, sizeof (struct icmp6_hdr), + pd->hdr.icmp6); + m_copyback(m, ipoff2, sizeof (h2_6), &h2_6); + m_copyback(m, off2, sizeof (struct icmp6_hdr), + &iih); + } + + return (PF_PASS); + break; + } +#endif /* INET6 */ + default: { + key.af = pd2.af; + key.proto = pd2.proto; + if (direction == PF_IN) { + PF_ACPY(&key.ext.addr, pd2.dst, key.af); + PF_ACPY(&key.gwy.addr, pd2.src, key.af); +#ifndef NO_APPLE_EXTENSIONS + key.ext.xport.port = 0; + key.gwy.xport.port = 0; +#else + key.ext.port = 0; + key.gwy.port = 0; +#endif + } else { + PF_ACPY(&key.lan.addr, pd2.dst, key.af); + PF_ACPY(&key.ext.addr, pd2.src, key.af); +#ifndef NO_APPLE_EXTENSIONS + key.lan.xport.port = 0; + key.ext.xport.port = 0; +#else + key.lan.port = 0; + key.ext.port = 0; +#endif + } + + STATE_LOOKUP(); + + if (STATE_TRANSLATE((*state)->state_key)) { + if (direction == PF_IN) { + pf_change_icmp(pd2.src, NULL, + daddr, &(*state)->state_key->lan.addr, + 0, NULL, + pd2.ip_sum, icmpsum, + pd->ip_sum, 0, pd2.af); + } else { + pf_change_icmp(pd2.dst, NULL, + saddr, &(*state)->state_key->gwy.addr, + 0, NULL, + pd2.ip_sum, icmpsum, + pd->ip_sum, 0, pd2.af); + } + switch (pd2.af) { +#if INET + case AF_INET: +#ifndef NO_APPLE_EXTENSIONS + m = pf_lazy_makewritable(pd, m, + ipoff2 + sizeof (h2)); + if (!m) + return (PF_DROP); +#endif + m_copyback(m, off, ICMP_MINLEN, + pd->hdr.icmp); + m_copyback(m, ipoff2, sizeof (h2), &h2); + break; +#endif /* INET */ +#if INET6 + case AF_INET6: +#ifndef NO_APPLE_EXTENSIONS + m = pf_lazy_makewritable(pd, m, + ipoff2 + sizeof (h2_6)); + if (!m) + return (PF_DROP); +#endif + m_copyback(m, off, + sizeof (struct icmp6_hdr), + pd->hdr.icmp6); + m_copyback(m, ipoff2, sizeof (h2_6), + &h2_6); + break; +#endif /* INET6 */ + } + } + + return (PF_PASS); + break; + } + } + } +} + +#ifndef NO_APPLE_EXTENSIONS +static int +pf_test_state_grev1(struct pf_state **state, int direction, + struct pfi_kif *kif, int off, struct pf_pdesc *pd) +{ + struct pf_state_peer *src; + struct pf_state_peer *dst; + struct pf_state_key_cmp key; + struct pf_grev1_hdr *grev1 = pd->hdr.grev1; + struct mbuf *m; + +#ifndef NO_APPLE_EXTENSIONS + key.app_state = 0; +#endif + key.af = pd->af; + key.proto = IPPROTO_GRE; + key.proto_variant = PF_GRE_PPTP_VARIANT; + if (direction == PF_IN) { + PF_ACPY(&key.ext.addr, pd->src, key.af); + PF_ACPY(&key.gwy.addr, pd->dst, key.af); + key.gwy.xport.call_id = grev1->call_id; + } else { + PF_ACPY(&key.lan.addr, pd->src, key.af); + PF_ACPY(&key.ext.addr, pd->dst, key.af); + key.ext.xport.call_id = grev1->call_id; + } + + STATE_LOOKUP(); + + if (direction == (*state)->state_key->direction) { + src = &(*state)->src; + dst = &(*state)->dst; + } else { + src = &(*state)->dst; + dst = &(*state)->src; + } + + /* update states */ + if (src->state < PFGRE1S_INITIATING) + src->state = PFGRE1S_INITIATING; + + /* update expire time */ + (*state)->expire = pf_time_second(); + if (src->state >= PFGRE1S_INITIATING && + dst->state >= PFGRE1S_INITIATING) { + (*state)->timeout = PFTM_GREv1_ESTABLISHED; + src->state = PFGRE1S_ESTABLISHED; + dst->state = PFGRE1S_ESTABLISHED; + } else { + (*state)->timeout = PFTM_GREv1_INITIATING; + } + /* translate source/destination address, if necessary */ + if (STATE_GRE_TRANSLATE((*state)->state_key)) { + if (direction == PF_OUT) { + switch (pd->af) { +#if INET + case AF_INET: + pf_change_a(&pd->src->v4.s_addr, + pd->ip_sum, + (*state)->state_key->gwy.addr.v4.s_addr, 0); + break; +#endif /* INET */ +#if INET6 + case AF_INET6: + PF_ACPY(pd->src, &(*state)->state_key->gwy.addr, + pd->af); + break; +#endif /* INET6 */ + } + } else { + grev1->call_id = (*state)->state_key->lan.xport.call_id; + + switch (pd->af) { +#if INET + case AF_INET: + pf_change_a(&pd->dst->v4.s_addr, + pd->ip_sum, + (*state)->state_key->lan.addr.v4.s_addr, 0); + break; +#endif /* INET */ +#if INET6 + case AF_INET6: + PF_ACPY(pd->dst, &(*state)->state_key->lan.addr, + pd->af); + break; +#endif /* INET6 */ + } + } + + m = pf_lazy_makewritable(pd, pd->mp, off + sizeof (*grev1)); + if (!m) + return (PF_DROP); + m_copyback(m, off, sizeof (*grev1), grev1); + } + + return (PF_PASS); +} + +int +pf_test_state_esp(struct pf_state **state, int direction, struct pfi_kif *kif, + int off, struct pf_pdesc *pd) +{ +#pragma unused(off) + struct pf_state_peer *src; + struct pf_state_peer *dst; + struct pf_state_key_cmp key; + struct pf_esp_hdr *esp = pd->hdr.esp; + int action; + + memset(&key, 0, sizeof (key)); + key.af = pd->af; + key.proto = IPPROTO_ESP; + if (direction == PF_IN) { + PF_ACPY(&key.ext.addr, pd->src, key.af); + PF_ACPY(&key.gwy.addr, pd->dst, key.af); + key.gwy.xport.spi = esp->spi; + } else { + PF_ACPY(&key.lan.addr, pd->src, key.af); + PF_ACPY(&key.ext.addr, pd->dst, key.af); + key.ext.xport.spi = esp->spi; + } + + *state = pf_find_state(kif, &key, direction); + + if (*state == 0) { + struct pf_state *s; + + /* + * + * No matching state. Look for a blocking state. If we find + * one, then use that state and move it so that it's keyed to + * the SPI in the current packet. + */ + if (direction == PF_IN) { + key.gwy.xport.spi = 0; + + s = pf_find_state(kif, &key, direction); + if (s) { + struct pf_state_key *sk = s->state_key; + + RB_REMOVE(pf_state_tree_ext_gwy, + &pf_statetbl_ext_gwy, sk); + sk->lan.xport.spi = sk->gwy.xport.spi = + esp->spi; + + if (RB_INSERT(pf_state_tree_ext_gwy, + &pf_statetbl_ext_gwy, sk)) + pf_detach_state(s, PF_DT_SKIP_EXTGWY); + else + *state = s; + } + } else { + key.ext.xport.spi = 0; + + s = pf_find_state(kif, &key, direction); + if (s) { + struct pf_state_key *sk = s->state_key; + + RB_REMOVE(pf_state_tree_lan_ext, + &pf_statetbl_lan_ext, sk); + sk->ext.xport.spi = esp->spi; + + if (RB_INSERT(pf_state_tree_lan_ext, + &pf_statetbl_lan_ext, sk)) + pf_detach_state(s, PF_DT_SKIP_LANEXT); + else + *state = s; + } + } + + if (s) { + if (*state == 0) { +#if NPFSYNC + if (s->creatorid == pf_status.hostid) + pfsync_delete_state(s); +#endif + s->timeout = PFTM_UNLINKED; + hook_runloop(&s->unlink_hooks, + HOOK_REMOVE|HOOK_FREE); + pf_src_tree_remove_state(s); + pf_free_state(s); + return (PF_DROP); + } + } + } + + if (pf_state_lookup_aux(state, kif, direction, &action)) + return (action); + + if (direction == (*state)->state_key->direction) { + src = &(*state)->src; + dst = &(*state)->dst; + } else { + src = &(*state)->dst; + dst = &(*state)->src; + } + + /* update states */ + if (src->state < PFESPS_INITIATING) + src->state = PFESPS_INITIATING; + + /* update expire time */ + (*state)->expire = pf_time_second(); + if (src->state >= PFESPS_INITIATING && + dst->state >= PFESPS_INITIATING) { + (*state)->timeout = PFTM_ESP_ESTABLISHED; + src->state = PFESPS_ESTABLISHED; + dst->state = PFESPS_ESTABLISHED; + } else { + (*state)->timeout = PFTM_ESP_INITIATING; + } + /* translate source/destination address, if necessary */ + if (STATE_ADDR_TRANSLATE((*state)->state_key)) { + if (direction == PF_OUT) { + switch (pd->af) { +#if INET + case AF_INET: + pf_change_a(&pd->src->v4.s_addr, + pd->ip_sum, + (*state)->state_key->gwy.addr.v4.s_addr, 0); + break; +#endif /* INET */ +#if INET6 + case AF_INET6: + PF_ACPY(pd->src, &(*state)->state_key->gwy.addr, + pd->af); + break; +#endif /* INET6 */ + } + } else { + switch (pd->af) { +#if INET + case AF_INET: + pf_change_a(&pd->dst->v4.s_addr, + pd->ip_sum, + (*state)->state_key->lan.addr.v4.s_addr, 0); + break; +#endif /* INET */ +#if INET6 + case AF_INET6: + PF_ACPY(pd->dst, &(*state)->state_key->lan.addr, + pd->af); + break; +#endif /* INET6 */ + } + } + } + + return (PF_PASS); +} +#endif + +static int +pf_test_state_other(struct pf_state **state, int direction, struct pfi_kif *kif, + struct pf_pdesc *pd) +{ + struct pf_state_peer *src, *dst; + struct pf_state_key_cmp key; + +#ifndef NO_APPLE_EXTENSIONS + key.app_state = 0; +#endif + key.af = pd->af; + key.proto = pd->proto; + if (direction == PF_IN) { + PF_ACPY(&key.ext.addr, pd->src, key.af); + PF_ACPY(&key.gwy.addr, pd->dst, key.af); +#ifndef NO_APPLE_EXTENSIONS + key.ext.xport.port = 0; + key.gwy.xport.port = 0; +#else + key.ext.port = 0; + key.gwy.port = 0; +#endif + } else { + PF_ACPY(&key.lan.addr, pd->src, key.af); + PF_ACPY(&key.ext.addr, pd->dst, key.af); +#ifndef NO_APPLE_EXTENSIONS + key.lan.xport.port = 0; + key.ext.xport.port = 0; +#else + key.lan.port = 0; + key.ext.port = 0; +#endif + } + + STATE_LOOKUP(); + + if (direction == (*state)->state_key->direction) { + src = &(*state)->src; + dst = &(*state)->dst; + } else { + src = &(*state)->dst; + dst = &(*state)->src; + } + + /* update states */ + if (src->state < PFOTHERS_SINGLE) + src->state = PFOTHERS_SINGLE; + if (dst->state == PFOTHERS_SINGLE) + dst->state = PFOTHERS_MULTIPLE; + + /* update expire time */ + (*state)->expire = pf_time_second(); + if (src->state == PFOTHERS_MULTIPLE && dst->state == PFOTHERS_MULTIPLE) + (*state)->timeout = PFTM_OTHER_MULTIPLE; + else + (*state)->timeout = PFTM_OTHER_SINGLE; + + /* translate source/destination address, if necessary */ +#ifndef NO_APPLE_EXTENSIONS + if (STATE_ADDR_TRANSLATE((*state)->state_key)) { +#else + if (STATE_TRANSLATE((*state)->state_key)) { +#endif + if (direction == PF_OUT) { + switch (pd->af) { +#if INET + case AF_INET: + pf_change_a(&pd->src->v4.s_addr, + pd->ip_sum, + (*state)->state_key->gwy.addr.v4.s_addr, + 0); + break; +#endif /* INET */ +#if INET6 + case AF_INET6: + PF_ACPY(pd->src, + &(*state)->state_key->gwy.addr, pd->af); + break; +#endif /* INET6 */ + } + } else { + switch (pd->af) { +#if INET + case AF_INET: + pf_change_a(&pd->dst->v4.s_addr, + pd->ip_sum, + (*state)->state_key->lan.addr.v4.s_addr, + 0); + break; +#endif /* INET */ +#if INET6 + case AF_INET6: + PF_ACPY(pd->dst, + &(*state)->state_key->lan.addr, pd->af); + break; +#endif /* INET6 */ + } + } + } + + return (PF_PASS); +} + +/* + * ipoff and off are measured from the start of the mbuf chain. + * h must be at "ipoff" on the mbuf chain. + */ +void * +pf_pull_hdr(struct mbuf *m, int off, void *p, int len, + u_short *actionp, u_short *reasonp, sa_family_t af) +{ + switch (af) { +#if INET + case AF_INET: { + struct ip *h = mtod(m, struct ip *); + u_int16_t fragoff = (ntohs(h->ip_off) & IP_OFFMASK) << 3; + + if (fragoff) { + if (fragoff >= len) { + ACTION_SET(actionp, PF_PASS); + } else { + ACTION_SET(actionp, PF_DROP); + REASON_SET(reasonp, PFRES_FRAG); + } + return (NULL); + } + if (m->m_pkthdr.len < off + len || + ntohs(h->ip_len) < off + len) { + ACTION_SET(actionp, PF_DROP); + REASON_SET(reasonp, PFRES_SHORT); + return (NULL); + } + break; + } +#endif /* INET */ +#if INET6 + case AF_INET6: { + struct ip6_hdr *h = mtod(m, struct ip6_hdr *); + + if (m->m_pkthdr.len < off + len || + (ntohs(h->ip6_plen) + sizeof (struct ip6_hdr)) < + (unsigned)(off + len)) { + ACTION_SET(actionp, PF_DROP); + REASON_SET(reasonp, PFRES_SHORT); + return (NULL); + } + break; + } +#endif /* INET6 */ + } + m_copydata(m, off, len, p); + return (p); +} + +int +pf_routable(struct pf_addr *addr, sa_family_t af, struct pfi_kif *kif) +{ +#pragma unused(kif) + struct sockaddr_in *dst; + int ret = 1; +#if INET6 + struct sockaddr_in6 *dst6; + struct route_in6 ro; +#else + struct route ro; +#endif + + bzero(&ro, sizeof (ro)); + switch (af) { + case AF_INET: + dst = satosin(&ro.ro_dst); + dst->sin_family = AF_INET; + dst->sin_len = sizeof (*dst); + dst->sin_addr = addr->v4; + break; +#if INET6 + case AF_INET6: + dst6 = (struct sockaddr_in6 *)&ro.ro_dst; + dst6->sin6_family = AF_INET6; + dst6->sin6_len = sizeof (*dst6); + dst6->sin6_addr = addr->v6; + break; +#endif /* INET6 */ + default: + return (0); + } + + /* XXX: IFT_ENC is not currently used by anything*/ + /* Skip checks for ipsec interfaces */ + if (kif != NULL && kif->pfik_ifp->if_type == IFT_ENC) + goto out; + + rtalloc((struct route *)&ro); + +out: + if (ro.ro_rt != NULL) + RTFREE(ro.ro_rt); + return (ret); +} + +int +pf_rtlabel_match(struct pf_addr *addr, sa_family_t af, struct pf_addr_wrap *aw) +{ +#pragma unused(aw) + struct sockaddr_in *dst; +#if INET6 + struct sockaddr_in6 *dst6; + struct route_in6 ro; +#else + struct route ro; +#endif + int ret = 0; + + bzero(&ro, sizeof (ro)); + switch (af) { + case AF_INET: + dst = satosin(&ro.ro_dst); + dst->sin_family = AF_INET; + dst->sin_len = sizeof (*dst); + dst->sin_addr = addr->v4; + break; +#if INET6 + case AF_INET6: + dst6 = (struct sockaddr_in6 *)&ro.ro_dst; + dst6->sin6_family = AF_INET6; + dst6->sin6_len = sizeof (*dst6); + dst6->sin6_addr = addr->v6; + break; +#endif /* INET6 */ + default: + return (0); + } + + rtalloc((struct route *)&ro); + + if (ro.ro_rt != NULL) { + RTFREE(ro.ro_rt); + } + + return (ret); +} + +#if INET +static void +pf_route(struct mbuf **m, struct pf_rule *r, int dir, struct ifnet *oifp, + struct pf_state *s, struct pf_pdesc *pd) +{ +#pragma unused(pd) + struct mbuf *m0, *m1; + struct route iproute; + struct route *ro = NULL; + struct sockaddr_in *dst; + struct ip *ip; + struct ifnet *ifp = NULL; + struct pf_addr naddr; + struct pf_src_node *sn = NULL; + int error = 0; + int sw_csum = 0; + + if (m == NULL || *m == NULL || r == NULL || + (dir != PF_IN && dir != PF_OUT) || oifp == NULL) + panic("pf_route: invalid parameters"); + + if (pd->pf_mtag->routed++ > 3) { + m0 = *m; + *m = NULL; + goto bad; + } + + if (r->rt == PF_DUPTO) { + if ((m0 = m_copym(*m, 0, M_COPYALL, M_NOWAIT)) == NULL) + return; + } else { + if ((r->rt == PF_REPLYTO) == (r->direction == dir)) + return; + m0 = *m; + } + + if (m0->m_len < (int)sizeof (struct ip)) { + DPFPRINTF(PF_DEBUG_URGENT, + ("pf_route: m0->m_len < sizeof (struct ip)\n")); + goto bad; + } + + ip = mtod(m0, struct ip *); + + ro = &iproute; + bzero((caddr_t)ro, sizeof (*ro)); + dst = satosin(&ro->ro_dst); + dst->sin_family = AF_INET; + dst->sin_len = sizeof (*dst); + dst->sin_addr = ip->ip_dst; + + if (r->rt == PF_FASTROUTE) { + rtalloc(ro); + if (ro->ro_rt == 0) { + ipstat.ips_noroute++; + goto bad; + } + + ifp = ro->ro_rt->rt_ifp; + ro->ro_rt->rt_use++; + + if (ro->ro_rt->rt_flags & RTF_GATEWAY) + dst = satosin(ro->ro_rt->rt_gateway); + } else { + if (TAILQ_EMPTY(&r->rpool.list)) { + DPFPRINTF(PF_DEBUG_URGENT, + ("pf_route: TAILQ_EMPTY(&r->rpool.list)\n")); + goto bad; + } + if (s == NULL) { + pf_map_addr(AF_INET, r, (struct pf_addr *)&ip->ip_src, + &naddr, NULL, &sn); + if (!PF_AZERO(&naddr, AF_INET)) + dst->sin_addr.s_addr = naddr.v4.s_addr; + ifp = r->rpool.cur->kif ? + r->rpool.cur->kif->pfik_ifp : NULL; + } else { + if (!PF_AZERO(&s->rt_addr, AF_INET)) + dst->sin_addr.s_addr = + s->rt_addr.v4.s_addr; + ifp = s->rt_kif ? s->rt_kif->pfik_ifp : NULL; + } + } + if (ifp == NULL) + goto bad; + + if (oifp != ifp) { + if (pf_test(PF_OUT, ifp, &m0, NULL) != PF_PASS) + goto bad; + else if (m0 == NULL) + goto done; + if (m0->m_len < (int)sizeof (struct ip)) { + DPFPRINTF(PF_DEBUG_URGENT, + ("pf_route: m0->m_len < sizeof (struct ip)\n")); + goto bad; + } + ip = mtod(m0, struct ip *); + } + + /* Copied from ip_output. */ + + /* Catch routing changes wrt. hardware checksumming for TCP or UDP. */ + m0->m_pkthdr.csum_flags |= CSUM_IP; + sw_csum = m0->m_pkthdr.csum_flags & + ~IF_HWASSIST_CSUM_FLAGS(ifp->if_hwassist); + + if (ifp->if_hwassist & CSUM_TCP_SUM16) { + /* + * Special case code for GMACE + * frames that can be checksumed by GMACE SUM16 HW: + * frame >64, no fragments, no UDP + */ + if (apple_hwcksum_tx && (m0->m_pkthdr.csum_flags & CSUM_TCP) && + (ntohs(ip->ip_len) > 50) && + (ntohs(ip->ip_len) <= ifp->if_mtu)) { + /* + * Apple GMAC HW, expects: + * STUFF_OFFSET << 16 | START_OFFSET + */ + /* IP+Enet header length */ + u_short offset = ((ip->ip_hl) << 2) + 14; + u_short csumprev = m0->m_pkthdr.csum_data & 0xffff; + m0->m_pkthdr.csum_flags = CSUM_DATA_VALID | + CSUM_TCP_SUM16; /* for GMAC */ + m0->m_pkthdr.csum_data = (csumprev + offset) << 16 ; + m0->m_pkthdr.csum_data += offset; + /* do IP hdr chksum in software */ + sw_csum = CSUM_DELAY_IP; + } else { + /* let the software handle any UDP or TCP checksums */ + sw_csum |= (CSUM_DELAY_DATA & m0->m_pkthdr.csum_flags); + } + } else if (apple_hwcksum_tx == 0) { + sw_csum |= (CSUM_DELAY_DATA | CSUM_DELAY_IP) & + m0->m_pkthdr.csum_flags; + } + + if (sw_csum & CSUM_DELAY_DATA) { + in_delayed_cksum(m0); + sw_csum &= ~CSUM_DELAY_DATA; + m0->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA; + } + + if (apple_hwcksum_tx != 0) { + m0->m_pkthdr.csum_flags &= + IF_HWASSIST_CSUM_FLAGS(ifp->if_hwassist); + } else { + m0->m_pkthdr.csum_flags = 0; + } + + if (ntohs(ip->ip_len) <= ifp->if_mtu || + (ifp->if_hwassist & CSUM_FRAGMENT)) { + ip->ip_sum = 0; + if (sw_csum & CSUM_DELAY_IP) + ip->ip_sum = in_cksum(m0, ip->ip_hl << 2); + error = ifnet_output(ifp, PF_INET, m0, ro, sintosa(dst)); + goto done; + } + + /* + * Too large for interface; fragment if possible. + * Must be able to put at least 8 bytes per fragment. + */ + if (ip->ip_off & htons(IP_DF)) { + ipstat.ips_cantfrag++; + if (r->rt != PF_DUPTO) { + icmp_error(m0, ICMP_UNREACH, ICMP_UNREACH_NEEDFRAG, 0, + ifp->if_mtu); + goto done; + } else + goto bad; + } + + m1 = m0; + error = ip_fragment(m0, ifp, ifp->if_mtu, sw_csum); + if (error) { + m0 = NULL; + goto bad; + } + + for (m0 = m1; m0; m0 = m1) { + m1 = m0->m_nextpkt; + m0->m_nextpkt = 0; + if (error == 0) + error = ifnet_output(ifp, PF_INET, m0, ro, + sintosa(dst)); + else + m_freem(m0); + } + + if (error == 0) + ipstat.ips_fragmented++; + +done: + if (r->rt != PF_DUPTO) + *m = NULL; + if (ro == &iproute && ro->ro_rt) + RTFREE(ro->ro_rt); + return; + +bad: + m_freem(m0); + goto done; +} +#endif /* INET */ + +#if INET6 +static void +pf_route6(struct mbuf **m, struct pf_rule *r, int dir, struct ifnet *oifp, + struct pf_state *s, struct pf_pdesc *pd) +{ +#pragma unused(pd) + struct mbuf *m0; + struct route_in6 ip6route; + struct route_in6 *ro; + struct sockaddr_in6 *dst; + struct ip6_hdr *ip6; + struct ifnet *ifp = NULL; + struct pf_addr naddr; + struct pf_src_node *sn = NULL; + int error = 0; + + if (m == NULL || *m == NULL || r == NULL || + (dir != PF_IN && dir != PF_OUT) || oifp == NULL) + panic("pf_route6: invalid parameters"); + + if (pd->pf_mtag->routed++ > 3) { + m0 = *m; + *m = NULL; + goto bad; + } + + if (r->rt == PF_DUPTO) { + if ((m0 = m_copym(*m, 0, M_COPYALL, M_NOWAIT)) == NULL) + return; + } else { + if ((r->rt == PF_REPLYTO) == (r->direction == dir)) + return; + m0 = *m; + } + + if (m0->m_len < (int)sizeof (struct ip6_hdr)) { + DPFPRINTF(PF_DEBUG_URGENT, + ("pf_route6: m0->m_len < sizeof (struct ip6_hdr)\n")); + goto bad; + } + ip6 = mtod(m0, struct ip6_hdr *); + + ro = &ip6route; + bzero((caddr_t)ro, sizeof (*ro)); + dst = (struct sockaddr_in6 *)&ro->ro_dst; + dst->sin6_family = AF_INET6; + dst->sin6_len = sizeof (*dst); + dst->sin6_addr = ip6->ip6_dst; + + /* Cheat. XXX why only in the v6 case??? */ + if (r->rt == PF_FASTROUTE) { + struct pf_mtag *pf_mtag; + + if ((pf_mtag = pf_get_mtag(m0)) == NULL) + goto bad; + pf_mtag->flags |= PF_TAG_GENERATED; + ip6_output(m0, NULL, NULL, 0, NULL, NULL, 0); + return; + } + + if (TAILQ_EMPTY(&r->rpool.list)) { + DPFPRINTF(PF_DEBUG_URGENT, + ("pf_route6: TAILQ_EMPTY(&r->rpool.list)\n")); + goto bad; + } + if (s == NULL) { + pf_map_addr(AF_INET6, r, (struct pf_addr *)&ip6->ip6_src, + &naddr, NULL, &sn); + if (!PF_AZERO(&naddr, AF_INET6)) + PF_ACPY((struct pf_addr *)&dst->sin6_addr, + &naddr, AF_INET6); + ifp = r->rpool.cur->kif ? r->rpool.cur->kif->pfik_ifp : NULL; + } else { + if (!PF_AZERO(&s->rt_addr, AF_INET6)) + PF_ACPY((struct pf_addr *)&dst->sin6_addr, + &s->rt_addr, AF_INET6); + ifp = s->rt_kif ? s->rt_kif->pfik_ifp : NULL; + } + if (ifp == NULL) + goto bad; + + if (oifp != ifp) { + if (pf_test6(PF_OUT, ifp, &m0, NULL) != PF_PASS) + goto bad; + else if (m0 == NULL) + goto done; + if (m0->m_len < (int)sizeof (struct ip6_hdr)) { + DPFPRINTF(PF_DEBUG_URGENT, ("pf_route6: m0->m_len " + "< sizeof (struct ip6_hdr)\n")); + goto bad; + } + ip6 = mtod(m0, struct ip6_hdr *); + } + + /* + * If the packet is too large for the outgoing interface, + * send back an icmp6 error. + */ + if (IN6_IS_SCOPE_EMBED(&dst->sin6_addr)) + dst->sin6_addr.s6_addr16[1] = htons(ifp->if_index); + if ((unsigned)m0->m_pkthdr.len <= ifp->if_mtu) { + error = nd6_output(ifp, ifp, m0, dst, NULL, 0); + } else { + in6_ifstat_inc(ifp, ifs6_in_toobig); + if (r->rt != PF_DUPTO) + icmp6_error(m0, ICMP6_PACKET_TOO_BIG, 0, ifp->if_mtu); + else + goto bad; + } + +done: + if (r->rt != PF_DUPTO) + *m = NULL; + return; + +bad: + m_freem(m0); + goto done; +} +#endif /* INET6 */ + + +/* + * check protocol (tcp/udp/icmp/icmp6) checksum and set mbuf flag + * off is the offset where the protocol header starts + * len is the total length of protocol header plus payload + * returns 0 when the checksum is valid, otherwise returns 1. + */ +static int +pf_check_proto_cksum(struct mbuf *m, int off, int len, u_int8_t p, + sa_family_t af) +{ + u_int16_t sum; + + switch (p) { + case IPPROTO_TCP: + case IPPROTO_UDP: + /* + * Optimize for the common case; if the hardware calculated + * value doesn't include pseudo-header checksum, or if it + * is partially-computed (only 16-bit summation), do it in + * software below. + */ + if (apple_hwcksum_rx && (m->m_pkthdr.csum_flags & + (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) && + (m->m_pkthdr.csum_data ^ 0xffff) == 0) { + return (0); + } + break; + case IPPROTO_ICMP: +#if INET6 + case IPPROTO_ICMPV6: +#endif /* INET6 */ + break; + default: + return (1); + } + if (off < (int)sizeof (struct ip) || len < (int)sizeof (struct udphdr)) + return (1); + if (m->m_pkthdr.len < off + len) + return (1); + switch (af) { +#if INET + case AF_INET: + if (p == IPPROTO_ICMP) { + if (m->m_len < off) + return (1); + m->m_data += off; + m->m_len -= off; + sum = in_cksum(m, len); + m->m_data -= off; + m->m_len += off; + } else { + if (m->m_len < (int)sizeof (struct ip)) + return (1); + sum = inet_cksum(m, p, off, len); + } + break; +#endif /* INET */ +#if INET6 + case AF_INET6: + if (m->m_len < (int)sizeof (struct ip6_hdr)) + return (1); + sum = inet6_cksum(m, p, off, len); + break; +#endif /* INET6 */ + default: + return (1); + } + if (sum) { + switch (p) { + case IPPROTO_TCP: + tcpstat.tcps_rcvbadsum++; + break; + case IPPROTO_UDP: + udpstat.udps_badsum++; + break; + case IPPROTO_ICMP: + icmpstat.icps_checksum++; + break; +#if INET6 + case IPPROTO_ICMPV6: + icmp6stat.icp6s_checksum++; + break; +#endif /* INET6 */ + } + return (1); + } + return (0); +} + +#if INET +#ifndef NO_APPLE_EXTENSIONS +#define PF_APPLE_UPDATE_PDESC_IPv4() \ + do { \ + if (m && pd.mp && m != pd.mp) { \ + m = pd.mp; \ + h = mtod(m, struct ip *); \ + } \ + } while (0) +#else +#define PF_APPLE_UPDATE_PDESC_IPv4() +#endif + +int +pf_test(int dir, struct ifnet *ifp, struct mbuf **m0, + struct ether_header *eh) +{ + struct pfi_kif *kif; + u_short action, reason = 0, log = 0; + struct mbuf *m = *m0; + struct ip *h = 0; + struct pf_rule *a = NULL, *r = &pf_default_rule, *tr, *nr; + struct pf_state *s = NULL; + struct pf_state_key *sk = NULL; + struct pf_ruleset *ruleset = NULL; + struct pf_pdesc pd; + int off, dirndx, pqid = 0; + + lck_mtx_assert(pf_lock, LCK_MTX_ASSERT_OWNED); + + if (!pf_status.running) + return (PF_PASS); + + memset(&pd, 0, sizeof (pd)); + + if ((pd.pf_mtag = pf_get_mtag(m)) == NULL) { + DPFPRINTF(PF_DEBUG_URGENT, + ("pf_test: pf_get_mtag returned NULL\n")); + return (PF_DROP); + } + + if (pd.pf_mtag->flags & PF_TAG_GENERATED) + return (PF_PASS); + + kif = (struct pfi_kif *)ifp->if_pf_kif; + + if (kif == NULL) { + DPFPRINTF(PF_DEBUG_URGENT, + ("pf_test: kif == NULL, if_name %s\n", ifp->if_name)); + return (PF_DROP); + } + if (kif->pfik_flags & PFI_IFLAG_SKIP) + return (PF_PASS); + +#ifdef DIAGNOSTIC + if ((m->m_flags & M_PKTHDR) == 0) + panic("non-M_PKTHDR is passed to pf_test"); +#endif /* DIAGNOSTIC */ + + if (m->m_pkthdr.len < (int)sizeof (*h)) { + action = PF_DROP; + REASON_SET(&reason, PFRES_SHORT); + log = 1; + goto done; + } + + /* We do IP header normalization and packet reassembly here */ + if (pf_normalize_ip(m0, dir, kif, &reason, &pd) != PF_PASS) { + action = PF_DROP; + goto done; + } + m = *m0; /* pf_normalize messes with m0 */ + h = mtod(m, struct ip *); + + off = h->ip_hl << 2; + if (off < (int)sizeof (*h)) { + action = PF_DROP; + REASON_SET(&reason, PFRES_SHORT); + log = 1; + goto done; + } + + pd.src = (struct pf_addr *)&h->ip_src; + pd.dst = (struct pf_addr *)&h->ip_dst; + PF_ACPY(&pd.baddr, dir == PF_OUT ? pd.src : pd.dst, AF_INET); + pd.ip_sum = &h->ip_sum; + pd.proto = h->ip_p; +#ifndef NO_APPLE_EXTENSIONS + pd.proto_variant = 0; + pd.mp = m; + pd.lmw = 0; +#endif + pd.af = AF_INET; + pd.tos = h->ip_tos; + pd.tot_len = ntohs(h->ip_len); + pd.eh = eh; + + /* handle fragments that didn't get reassembled by normalization */ + if (h->ip_off & htons(IP_MF | IP_OFFMASK)) { + action = pf_test_fragment(&r, dir, kif, m, h, + &pd, &a, &ruleset); + goto done; + } + + switch (h->ip_p) { + + case IPPROTO_TCP: { + struct tcphdr th; + pd.hdr.tcp = &th; + if (!pf_pull_hdr(m, off, &th, sizeof (th), + &action, &reason, AF_INET)) { + log = action != PF_PASS; + goto done; + } + pd.p_len = pd.tot_len - off - (th.th_off << 2); + if ((th.th_flags & TH_ACK) && pd.p_len == 0) + pqid = 1; + action = pf_normalize_tcp(dir, kif, m, 0, off, h, &pd); + if (action == PF_DROP) + goto done; + PF_APPLE_UPDATE_PDESC_IPv4(); + action = pf_test_state_tcp(&s, dir, kif, m, off, h, &pd, + &reason); +#ifndef NO_APPLE_EXTENSIONS + if (pd.lmw < 0) + goto done; + PF_APPLE_UPDATE_PDESC_IPv4(); +#endif + if (action == PF_PASS) { +#if NPFSYNC + pfsync_update_state(s); +#endif /* NPFSYNC */ + r = s->rule.ptr; + a = s->anchor.ptr; + log = s->log; + } else if (s == NULL) + action = pf_test_rule(&r, &s, dir, kif, + m, off, h, &pd, &a, &ruleset, &ipintrq); + break; + } + + case IPPROTO_UDP: { + struct udphdr uh; + + pd.hdr.udp = &uh; + if (!pf_pull_hdr(m, off, &uh, sizeof (uh), + &action, &reason, AF_INET)) { + log = action != PF_PASS; + goto done; + } + if (uh.uh_dport == 0 || + ntohs(uh.uh_ulen) > m->m_pkthdr.len - off || + ntohs(uh.uh_ulen) < sizeof (struct udphdr)) { + action = PF_DROP; + REASON_SET(&reason, PFRES_SHORT); + goto done; + } + action = pf_test_state_udp(&s, dir, kif, m, off, h, &pd); +#ifndef NO_APPLE_EXTENSIONS + if (pd.lmw < 0) + goto done; + PF_APPLE_UPDATE_PDESC_IPv4(); +#endif + if (action == PF_PASS) { +#if NPFSYNC + pfsync_update_state(s); +#endif /* NPFSYNC */ + r = s->rule.ptr; + a = s->anchor.ptr; + log = s->log; + } else if (s == NULL) + action = pf_test_rule(&r, &s, dir, kif, + m, off, h, &pd, &a, &ruleset, &ipintrq); + break; + } + + case IPPROTO_ICMP: { + struct icmp ih; + + pd.hdr.icmp = &ih; + if (!pf_pull_hdr(m, off, &ih, ICMP_MINLEN, + &action, &reason, AF_INET)) { + log = action != PF_PASS; + goto done; + } + action = pf_test_state_icmp(&s, dir, kif, m, off, h, &pd, + &reason); +#ifndef NO_APPLE_EXTENSIONS + if (pd.lmw < 0) + goto done; + PF_APPLE_UPDATE_PDESC_IPv4(); +#endif + if (action == PF_PASS) { +#if NPFSYNC + pfsync_update_state(s); +#endif /* NPFSYNC */ + r = s->rule.ptr; + a = s->anchor.ptr; + log = s->log; + } else if (s == NULL) + action = pf_test_rule(&r, &s, dir, kif, + m, off, h, &pd, &a, &ruleset, &ipintrq); + break; + } + +#ifndef NO_APPLE_EXTENSIONS + case IPPROTO_ESP: { + struct pf_esp_hdr esp; + + pd.hdr.esp = &esp; + if (!pf_pull_hdr(m, off, &esp, sizeof (esp), &action, &reason, + AF_INET)) { + log = action != PF_PASS; + goto done; + } + action = pf_test_state_esp(&s, dir, kif, off, &pd); + if (pd.lmw < 0) + goto done; + PF_APPLE_UPDATE_PDESC_IPv4(); + if (action == PF_PASS) { +#if NPFSYNC + pfsync_update_state(s); +#endif /* NPFSYNC */ + r = s->rule.ptr; + a = s->anchor.ptr; + log = s->log; + } else if (s == NULL) + action = pf_test_rule(&r, &s, dir, kif, + m, off, h, &pd, &a, &ruleset, &ipintrq); + break; + } + + case IPPROTO_GRE: { + struct pf_grev1_hdr grev1; + pd.hdr.grev1 = &grev1; + if (!pf_pull_hdr(m, off, &grev1, sizeof (grev1), &action, + &reason, AF_INET)) { + log = (action != PF_PASS); + goto done; + } + if ((ntohs(grev1.flags) & PF_GRE_FLAG_VERSION_MASK) == 1 && + ntohs(grev1.protocol_type) == PF_GRE_PPP_ETHERTYPE) { + if (ntohs(grev1.payload_length) > + m->m_pkthdr.len - off) { + action = PF_DROP; + REASON_SET(&reason, PFRES_SHORT); + goto done; + } + pd.proto_variant = PF_GRE_PPTP_VARIANT; + action = pf_test_state_grev1(&s, dir, kif, off, &pd); + if (pd.lmw < 0) goto done; + PF_APPLE_UPDATE_PDESC_IPv4(); + if (action == PF_PASS) { +#if NPFSYNC + pfsync_update_state(s); +#endif /* NPFSYNC */ + r = s->rule.ptr; + a = s->anchor.ptr; + log = s->log; + break; + } else if (s == NULL) { + action = pf_test_rule(&r, &s, dir, kif, m, off, + h, &pd, &a, &ruleset, &ipintrq); + if (action == PF_PASS) + break; + } + } + + /* not GREv1/PPTP, so treat as ordinary GRE... */ + } +#endif + + default: + action = pf_test_state_other(&s, dir, kif, &pd); +#ifndef NO_APPLE_EXTENSIONS + if (pd.lmw < 0) + goto done; + PF_APPLE_UPDATE_PDESC_IPv4(); +#endif + if (action == PF_PASS) { +#if NPFSYNC + pfsync_update_state(s); +#endif /* NPFSYNC */ + r = s->rule.ptr; + a = s->anchor.ptr; + log = s->log; + } else if (s == NULL) + action = pf_test_rule(&r, &s, dir, kif, m, off, h, + &pd, &a, &ruleset, &ipintrq); + break; + } + +done: + PF_APPLE_UPDATE_PDESC_IPv4(); + + if (action == PF_PASS && h->ip_hl > 5 && + !((s && s->allow_opts) || r->allow_opts)) { + action = PF_DROP; + REASON_SET(&reason, PFRES_IPOPTIONS); + log = 1; + DPFPRINTF(PF_DEBUG_MISC, + ("pf: dropping packet with ip options [hlen=%u]\n", + (unsigned int) h->ip_hl)); + } + + if ((s && s->tag) || PF_RTABLEID_IS_VALID(r->rtableid)) + (void) pf_tag_packet(m, pd.pf_mtag, s ? s->tag : 0, + r->rtableid); + +#if ALTQ + if (action == PF_PASS && r->qid) { + if (pqid || (pd.tos & IPTOS_LOWDELAY)) + pd.pf_mtag->qid = r->pqid; + else + pd.pf_mtag->qid = r->qid; + /* add hints for ecn */ + pd.pf_mtag->hdr = h; + } +#endif /* ALTQ */ + + /* + * connections redirected to loopback should not match sockets + * bound specifically to loopback due to security implications, + * see tcp_input() and in_pcblookup_listen(). + */ + if (dir == PF_IN && action == PF_PASS && (pd.proto == IPPROTO_TCP || + pd.proto == IPPROTO_UDP) && s != NULL && s->nat_rule.ptr != NULL && + (s->nat_rule.ptr->action == PF_RDR || + s->nat_rule.ptr->action == PF_BINAT) && + (ntohl(pd.dst->v4.s_addr) >> IN_CLASSA_NSHIFT) == IN_LOOPBACKNET) + pd.pf_mtag->flags |= PF_TAG_TRANSLATE_LOCALHOST; + + if (log) { + struct pf_rule *lr; + + if (s != NULL && s->nat_rule.ptr != NULL && + s->nat_rule.ptr->log & PF_LOG_ALL) + lr = s->nat_rule.ptr; + else + lr = r; + PFLOG_PACKET(kif, h, m, AF_INET, dir, reason, lr, a, ruleset, + &pd); + } + + kif->pfik_bytes[0][dir == PF_OUT][action != PF_PASS] += pd.tot_len; + kif->pfik_packets[0][dir == PF_OUT][action != PF_PASS]++; + + if (action == PF_PASS || r->action == PF_DROP) { + dirndx = (dir == PF_OUT); + r->packets[dirndx]++; + r->bytes[dirndx] += pd.tot_len; + if (a != NULL) { + a->packets[dirndx]++; + a->bytes[dirndx] += pd.tot_len; + } + if (s != NULL) { + sk = s->state_key; + if (s->nat_rule.ptr != NULL) { + s->nat_rule.ptr->packets[dirndx]++; + s->nat_rule.ptr->bytes[dirndx] += pd.tot_len; + } + if (s->src_node != NULL) { + s->src_node->packets[dirndx]++; + s->src_node->bytes[dirndx] += pd.tot_len; + } + if (s->nat_src_node != NULL) { + s->nat_src_node->packets[dirndx]++; + s->nat_src_node->bytes[dirndx] += pd.tot_len; + } + dirndx = (dir == sk->direction) ? 0 : 1; + s->packets[dirndx]++; + s->bytes[dirndx] += pd.tot_len; + } + tr = r; + nr = (s != NULL) ? s->nat_rule.ptr : pd.nat_rule; + if (nr != NULL) { + struct pf_addr *x; + /* + * XXX: we need to make sure that the addresses + * passed to pfr_update_stats() are the same than + * the addresses used during matching (pfr_match) + */ + if (r == &pf_default_rule) { + tr = nr; + x = (sk == NULL || sk->direction == dir) ? + &pd.baddr : &pd.naddr; + } else + x = (sk == NULL || sk->direction == dir) ? + &pd.naddr : &pd.baddr; + if (x == &pd.baddr || s == NULL) { + /* we need to change the address */ + if (dir == PF_OUT) + pd.src = x; + else + pd.dst = x; + } + } + if (tr->src.addr.type == PF_ADDR_TABLE) + pfr_update_stats(tr->src.addr.p.tbl, (sk == NULL || + sk->direction == dir) ? + pd.src : pd.dst, pd.af, + pd.tot_len, dir == PF_OUT, r->action == PF_PASS, + tr->src.neg); + if (tr->dst.addr.type == PF_ADDR_TABLE) + pfr_update_stats(tr->dst.addr.p.tbl, (sk == NULL || + sk->direction == dir) ? pd.dst : pd.src, pd.af, + pd.tot_len, dir == PF_OUT, r->action == PF_PASS, + tr->dst.neg); + } + +#ifndef NO_APPLE_EXTENSIONS + if (*m0) { + if (pd.lmw < 0) { + m_freem(*m0); + *m0 = NULL; + return (PF_DROP); + } + + *m0 = m; + } +#endif + + if (action == PF_SYNPROXY_DROP) { + m_freem(*m0); + *m0 = NULL; + action = PF_PASS; + } else if (r->rt) + /* pf_route can free the mbuf causing *m0 to become NULL */ + pf_route(m0, r, dir, kif->pfik_ifp, s, &pd); + + return (action); +} +#endif /* INET */ + +#if INET6 +#ifndef NO_APPLE_EXTENSIONS +#define PF_APPLE_UPDATE_PDESC_IPv6() \ + do { \ + if (m && pd.mp && m != pd.mp) { \ + if (n == m) \ + n = pd.mp; \ + m = pd.mp; \ + h = mtod(m, struct ip6_hdr *); \ + } \ + } while (0) +#else +#define PF_APPLE_UPDATE_PDESC_IPv6() +#endif + +int +pf_test6(int dir, struct ifnet *ifp, struct mbuf **m0, + struct ether_header *eh) +{ + struct pfi_kif *kif; + u_short action, reason = 0, log = 0; + struct mbuf *m = *m0, *n = NULL; + struct ip6_hdr *h; + struct pf_rule *a = NULL, *r = &pf_default_rule, *tr, *nr; + struct pf_state *s = NULL; + struct pf_state_key *sk = NULL; + struct pf_ruleset *ruleset = NULL; + struct pf_pdesc pd; + int off, terminal = 0, dirndx, rh_cnt = 0; + + lck_mtx_assert(pf_lock, LCK_MTX_ASSERT_OWNED); + + if (!pf_status.running) + return (PF_PASS); + + memset(&pd, 0, sizeof (pd)); + + if ((pd.pf_mtag = pf_get_mtag(m)) == NULL) { + DPFPRINTF(PF_DEBUG_URGENT, + ("pf_test6: pf_get_mtag returned NULL\n")); + return (PF_DROP); + } + + if (pd.pf_mtag->flags & PF_TAG_GENERATED) + return (PF_PASS); + + kif = (struct pfi_kif *)ifp->if_pf_kif; + + if (kif == NULL) { + DPFPRINTF(PF_DEBUG_URGENT, + ("pf_test6: kif == NULL, if_name %s\n", ifp->if_name)); + return (PF_DROP); + } + if (kif->pfik_flags & PFI_IFLAG_SKIP) + return (PF_PASS); + +#ifdef DIAGNOSTIC + if ((m->m_flags & M_PKTHDR) == 0) + panic("non-M_PKTHDR is passed to pf_test6"); +#endif /* DIAGNOSTIC */ + + h = mtod(m, struct ip6_hdr *); + + if (m->m_pkthdr.len < (int)sizeof (*h)) { + action = PF_DROP; + REASON_SET(&reason, PFRES_SHORT); + log = 1; + goto done; + } + + /* We do IP header normalization and packet reassembly here */ + if (pf_normalize_ip6(m0, dir, kif, &reason, &pd) != PF_PASS) { + action = PF_DROP; + goto done; + } + m = *m0; /* pf_normalize messes with m0 */ + h = mtod(m, struct ip6_hdr *); + +#if 1 + /* + * we do not support jumbogram yet. if we keep going, zero ip6_plen + * will do something bad, so drop the packet for now. + */ + if (htons(h->ip6_plen) == 0) { + action = PF_DROP; + REASON_SET(&reason, PFRES_NORM); /*XXX*/ + goto done; + } +#endif + + pd.src = (struct pf_addr *)&h->ip6_src; + pd.dst = (struct pf_addr *)&h->ip6_dst; + PF_ACPY(&pd.baddr, dir == PF_OUT ? pd.src : pd.dst, AF_INET6); + pd.ip_sum = NULL; + pd.af = AF_INET6; + pd.tos = 0; + pd.tot_len = ntohs(h->ip6_plen) + sizeof (struct ip6_hdr); + pd.eh = eh; + + off = ((caddr_t)h - m->m_data) + sizeof (struct ip6_hdr); + pd.proto = h->ip6_nxt; +#ifndef NO_APPLE_EXTENSIONS + pd.proto_variant = 0; + pd.mp = m; + pd.lmw = 0; +#endif + do { + switch (pd.proto) { + case IPPROTO_FRAGMENT: + action = pf_test_fragment(&r, dir, kif, m, h, + &pd, &a, &ruleset); + if (action == PF_DROP) + REASON_SET(&reason, PFRES_FRAG); + goto done; + case IPPROTO_ROUTING: { + struct ip6_rthdr rthdr; + + if (rh_cnt++) { + DPFPRINTF(PF_DEBUG_MISC, + ("pf: IPv6 more than one rthdr\n")); + action = PF_DROP; + REASON_SET(&reason, PFRES_IPOPTIONS); + log = 1; + goto done; + } + if (!pf_pull_hdr(m, off, &rthdr, sizeof (rthdr), NULL, + &reason, pd.af)) { + DPFPRINTF(PF_DEBUG_MISC, + ("pf: IPv6 short rthdr\n")); + action = PF_DROP; + REASON_SET(&reason, PFRES_SHORT); + log = 1; + goto done; + } + if (rthdr.ip6r_type == IPV6_RTHDR_TYPE_0) { + DPFPRINTF(PF_DEBUG_MISC, + ("pf: IPv6 rthdr0\n")); + action = PF_DROP; + REASON_SET(&reason, PFRES_IPOPTIONS); + log = 1; + goto done; + } + /* FALLTHROUGH */ + } + case IPPROTO_AH: + case IPPROTO_HOPOPTS: + case IPPROTO_DSTOPTS: { + /* get next header and header length */ + struct ip6_ext opt6; + + if (!pf_pull_hdr(m, off, &opt6, sizeof (opt6), + NULL, &reason, pd.af)) { + DPFPRINTF(PF_DEBUG_MISC, + ("pf: IPv6 short opt\n")); + action = PF_DROP; + log = 1; + goto done; + } + if (pd.proto == IPPROTO_AH) + off += (opt6.ip6e_len + 2) * 4; + else + off += (opt6.ip6e_len + 1) * 8; + pd.proto = opt6.ip6e_nxt; + /* goto the next header */ + break; + } + default: + terminal++; + break; + } + } while (!terminal); + + /* if there's no routing header, use unmodified mbuf for checksumming */ + if (!n) + n = m; + + switch (pd.proto) { + + case IPPROTO_TCP: { + struct tcphdr th; + + pd.hdr.tcp = &th; + if (!pf_pull_hdr(m, off, &th, sizeof (th), + &action, &reason, AF_INET6)) { + log = action != PF_PASS; + goto done; + } + pd.p_len = pd.tot_len - off - (th.th_off << 2); + action = pf_normalize_tcp(dir, kif, m, 0, off, h, &pd); + if (action == PF_DROP) + goto done; + PF_APPLE_UPDATE_PDESC_IPv6(); + action = pf_test_state_tcp(&s, dir, kif, m, off, h, &pd, + &reason); +#ifndef NO_APPLE_EXTENSIONS + if (pd.lmw < 0) + goto done; + PF_APPLE_UPDATE_PDESC_IPv6(); +#endif + if (action == PF_PASS) { +#if NPFSYNC + pfsync_update_state(s); +#endif /* NPFSYNC */ + r = s->rule.ptr; + a = s->anchor.ptr; + log = s->log; + } else if (s == NULL) + action = pf_test_rule(&r, &s, dir, kif, + m, off, h, &pd, &a, &ruleset, &ip6intrq); + break; + } + + case IPPROTO_UDP: { + struct udphdr uh; + + pd.hdr.udp = &uh; + if (!pf_pull_hdr(m, off, &uh, sizeof (uh), + &action, &reason, AF_INET6)) { + log = action != PF_PASS; + goto done; + } + if (uh.uh_dport == 0 || + ntohs(uh.uh_ulen) > m->m_pkthdr.len - off || + ntohs(uh.uh_ulen) < sizeof (struct udphdr)) { + action = PF_DROP; + REASON_SET(&reason, PFRES_SHORT); + goto done; + } + action = pf_test_state_udp(&s, dir, kif, m, off, h, &pd); +#ifndef NO_APPLE_EXTENSIONS + if (pd.lmw < 0) + goto done; + PF_APPLE_UPDATE_PDESC_IPv6(); +#endif + if (action == PF_PASS) { +#if NPFSYNC + pfsync_update_state(s); +#endif /* NPFSYNC */ + r = s->rule.ptr; + a = s->anchor.ptr; + log = s->log; + } else if (s == NULL) + action = pf_test_rule(&r, &s, dir, kif, + m, off, h, &pd, &a, &ruleset, &ip6intrq); + break; + } + + case IPPROTO_ICMPV6: { + struct icmp6_hdr ih; + + pd.hdr.icmp6 = &ih; + if (!pf_pull_hdr(m, off, &ih, sizeof (ih), + &action, &reason, AF_INET6)) { + log = action != PF_PASS; + goto done; + } + action = pf_test_state_icmp(&s, dir, kif, + m, off, h, &pd, &reason); +#ifndef NO_APPLE_EXTENSIONS + if (pd.lmw < 0) + goto done; + PF_APPLE_UPDATE_PDESC_IPv6(); +#endif + if (action == PF_PASS) { +#if NPFSYNC + pfsync_update_state(s); +#endif /* NPFSYNC */ + r = s->rule.ptr; + a = s->anchor.ptr; + log = s->log; + } else if (s == NULL) + action = pf_test_rule(&r, &s, dir, kif, + m, off, h, &pd, &a, &ruleset, &ip6intrq); + break; + } + +#ifndef NO_APPLE_EXTENSIONS + case IPPROTO_ESP: { + struct pf_esp_hdr esp; + + pd.hdr.esp = &esp; + if (!pf_pull_hdr(m, off, &esp, sizeof (esp), &action, &reason, + AF_INET6)) { + log = action != PF_PASS; + goto done; + } + action = pf_test_state_esp(&s, dir, kif, off, &pd); + if (pd.lmw < 0) + goto done; + PF_APPLE_UPDATE_PDESC_IPv6(); + if (action == PF_PASS) { +#if NPFSYNC + pfsync_update_state(s); +#endif /* NPFSYNC */ + r = s->rule.ptr; + a = s->anchor.ptr; + log = s->log; + } else if (s == NULL) + action = pf_test_rule(&r, &s, dir, kif, + m, off, h, &pd, &a, &ruleset, &ip6intrq); + break; + } + + case IPPROTO_GRE: { + struct pf_grev1_hdr grev1; + + pd.hdr.grev1 = &grev1; + if (!pf_pull_hdr(m, off, &grev1, sizeof (grev1), &action, + &reason, AF_INET6)) { + log = (action != PF_PASS); + goto done; + } + if ((ntohs(grev1.flags) & PF_GRE_FLAG_VERSION_MASK) == 1 && + ntohs(grev1.protocol_type) == PF_GRE_PPP_ETHERTYPE) { + if (ntohs(grev1.payload_length) > + m->m_pkthdr.len - off) { + action = PF_DROP; + REASON_SET(&reason, PFRES_SHORT); + goto done; + } + action = pf_test_state_grev1(&s, dir, kif, off, &pd); + if (pd.lmw < 0) + goto done; + PF_APPLE_UPDATE_PDESC_IPv6(); + if (action == PF_PASS) { +#if NPFSYNC + pfsync_update_state(s); +#endif /* NPFSYNC */ + r = s->rule.ptr; + a = s->anchor.ptr; + log = s->log; + break; + } else if (s == NULL) { + action = pf_test_rule(&r, &s, dir, kif, m, off, + h, &pd, &a, &ruleset, &ip6intrq); + if (action == PF_PASS) + break; + } + } + + /* not GREv1/PPTP, so treat as ordinary GRE... */ + } +#endif + + default: + action = pf_test_state_other(&s, dir, kif, &pd); +#ifndef NO_APPLE_EXTENSIONS + if (pd.lmw < 0) + goto done; + PF_APPLE_UPDATE_PDESC_IPv6(); +#endif + if (action == PF_PASS) { +#if NPFSYNC + pfsync_update_state(s); +#endif /* NPFSYNC */ + r = s->rule.ptr; + a = s->anchor.ptr; + log = s->log; + } else if (s == NULL) + action = pf_test_rule(&r, &s, dir, kif, m, off, h, + &pd, &a, &ruleset, &ip6intrq); + break; + } + +done: + PF_APPLE_UPDATE_PDESC_IPv6(); + + if (n != m) { + m_freem(n); + n = NULL; + } + + /* handle dangerous IPv6 extension headers. */ + if (action == PF_PASS && rh_cnt && + !((s && s->allow_opts) || r->allow_opts)) { + action = PF_DROP; + REASON_SET(&reason, PFRES_IPOPTIONS); + log = 1; + DPFPRINTF(PF_DEBUG_MISC, + ("pf: dropping packet with dangerous v6 headers\n")); + } + + if ((s && s->tag) || PF_RTABLEID_IS_VALID(r->rtableid)) + (void) pf_tag_packet(m, pd.pf_mtag, s ? s->tag : 0, + r->rtableid); + +#if ALTQ + if (action == PF_PASS && r->qid) { + if (pd.tos & IPTOS_LOWDELAY) + pd.pf_mtag->qid = r->pqid; + else + pd.pf_mtag->qid = r->qid; + /* add hints for ecn */ + pd.pf_mtag->hdr = h; + } +#endif /* ALTQ */ + + if (dir == PF_IN && action == PF_PASS && (pd.proto == IPPROTO_TCP || + pd.proto == IPPROTO_UDP) && s != NULL && s->nat_rule.ptr != NULL && + (s->nat_rule.ptr->action == PF_RDR || + s->nat_rule.ptr->action == PF_BINAT) && + IN6_IS_ADDR_LOOPBACK(&pd.dst->v6)) + pd.pf_mtag->flags |= PF_TAG_TRANSLATE_LOCALHOST; + + if (log) { + struct pf_rule *lr; + + if (s != NULL && s->nat_rule.ptr != NULL && + s->nat_rule.ptr->log & PF_LOG_ALL) + lr = s->nat_rule.ptr; + else + lr = r; + PFLOG_PACKET(kif, h, m, AF_INET6, dir, reason, lr, a, ruleset, + &pd); + } + + kif->pfik_bytes[1][dir == PF_OUT][action != PF_PASS] += pd.tot_len; + kif->pfik_packets[1][dir == PF_OUT][action != PF_PASS]++; + + if (action == PF_PASS || r->action == PF_DROP) { + dirndx = (dir == PF_OUT); + r->packets[dirndx]++; + r->bytes[dirndx] += pd.tot_len; + if (a != NULL) { + a->packets[dirndx]++; + a->bytes[dirndx] += pd.tot_len; + } + if (s != NULL) { + sk = s->state_key; + if (s->nat_rule.ptr != NULL) { + s->nat_rule.ptr->packets[dirndx]++; + s->nat_rule.ptr->bytes[dirndx] += pd.tot_len; + } + if (s->src_node != NULL) { + s->src_node->packets[dirndx]++; + s->src_node->bytes[dirndx] += pd.tot_len; + } + if (s->nat_src_node != NULL) { + s->nat_src_node->packets[dirndx]++; + s->nat_src_node->bytes[dirndx] += pd.tot_len; + } + dirndx = (dir == sk->direction) ? 0 : 1; + s->packets[dirndx]++; + s->bytes[dirndx] += pd.tot_len; + } + tr = r; + nr = (s != NULL) ? s->nat_rule.ptr : pd.nat_rule; + if (nr != NULL) { + struct pf_addr *x; + /* + * XXX: we need to make sure that the addresses + * passed to pfr_update_stats() are the same than + * the addresses used during matching (pfr_match) + */ + if (r == &pf_default_rule) { + tr = nr; + x = (s == NULL || sk->direction == dir) ? + &pd.baddr : &pd.naddr; + } else { + x = (s == NULL || sk->direction == dir) ? + &pd.naddr : &pd.baddr; + } + if (x == &pd.baddr || s == NULL) { + if (dir == PF_OUT) + pd.src = x; + else + pd.dst = x; + } + } + if (tr->src.addr.type == PF_ADDR_TABLE) + pfr_update_stats(tr->src.addr.p.tbl, (sk == NULL || + sk->direction == dir) ? pd.src : pd.dst, pd.af, + pd.tot_len, dir == PF_OUT, r->action == PF_PASS, + tr->src.neg); + if (tr->dst.addr.type == PF_ADDR_TABLE) + pfr_update_stats(tr->dst.addr.p.tbl, (sk == NULL || + sk->direction == dir) ? pd.dst : pd.src, pd.af, + pd.tot_len, dir == PF_OUT, r->action == PF_PASS, + tr->dst.neg); + } + +#if 0 + if (action == PF_SYNPROXY_DROP) { + m_freem(*m0); + *m0 = NULL; + action = PF_PASS; + } else if (r->rt) + /* pf_route6 can free the mbuf causing *m0 to become NULL */ + pf_route6(m0, r, dir, kif->pfik_ifp, s, &pd); +#else +#ifndef NO_APPLE_EXTENSIONS + if (*m0) { + if (pd.lmw < 0) { + m_freem(*m0); + *m0 = NULL; + return (PF_DROP); + } + + *m0 = m; + } + + if (action == PF_SYNPROXY_DROP) { + m_freem(*m0); + *m0 = NULL; + action = PF_PASS; + } else if (r->rt) { + if (action == PF_PASS) { + m = *m0; + h = mtod(m, struct ip6_hdr *); + } + + /* pf_route6 can free the mbuf causing *m0 to become NULL */ + pf_route6(m0, r, dir, kif->pfik_ifp, s, &pd); + } +#else + if (action != PF_SYNPROXY_DROP && r->rt) + /* pf_route6 can free the mbuf causing *m0 to become NULL */ + pf_route6(m0, r, dir, kif->pfik_ifp, s, &pd); + + if (action == PF_PASS) { + m = *m0; + h = mtod(m, struct ip6_hdr *); + } + + if (action == PF_SYNPROXY_DROP) { + m_freem(*m0); + *m0 = NULL; + action = PF_PASS; + } +#endif +#endif + + return (action); +} +#endif /* INET6 */ + +static int +pf_check_congestion(struct ifqueue *ifq) +{ +#pragma unused(ifq) + return (0); +} + +void +pool_init(struct pool *pp, size_t size, unsigned int align, unsigned int ioff, + int flags, const char *wchan, void *palloc) +{ +#pragma unused(align, ioff, flags, palloc) + bzero(pp, sizeof (*pp)); + pp->pool_zone = zinit(size, 1024 * size, PAGE_SIZE, wchan); + if (pp->pool_zone != NULL) { + zone_change(pp->pool_zone, Z_EXPAND, TRUE); + pp->pool_hiwat = pp->pool_limit = (unsigned int)-1; + pp->pool_name = wchan; + } +} + +/* Zones cannot be currently destroyed */ +void +pool_destroy(struct pool *pp) +{ +#pragma unused(pp) +} + +void +pool_sethiwat(struct pool *pp, int n) +{ + pp->pool_hiwat = n; /* Currently unused */ +} + +void +pool_sethardlimit(struct pool *pp, int n, const char *warnmess, int ratecap) +{ +#pragma unused(warnmess, ratecap) + pp->pool_limit = n; +} + +void * +pool_get(struct pool *pp, int flags) +{ + void *buf; + + lck_mtx_assert(pf_lock, LCK_MTX_ASSERT_OWNED); + + if (pp->pool_count > pp->pool_limit) { + DPFPRINTF(PF_DEBUG_NOISY, + ("pf: pool %s hard limit reached (%d)\n", + pp->pool_name != NULL ? pp->pool_name : "unknown", + pp->pool_limit)); + pp->pool_fails++; + return (NULL); + } + + buf = zalloc_canblock(pp->pool_zone, (flags & (PR_NOWAIT | PR_WAITOK))); + if (buf != NULL) { + pp->pool_count++; + VERIFY(pp->pool_count != 0); + } + return (buf); +} + +void +pool_put(struct pool *pp, void *v) +{ + lck_mtx_assert(pf_lock, LCK_MTX_ASSERT_OWNED); + + zfree(pp->pool_zone, v); + VERIFY(pp->pool_count != 0); + pp->pool_count--; +} + +struct pf_mtag * +pf_find_mtag(struct mbuf *m) +{ +#if !PF_PKTHDR + struct m_tag *mtag; + + if ((mtag = m_tag_locate(m, KERNEL_MODULE_TAG_ID, + KERNEL_TAG_TYPE_PF, NULL)) == NULL) + return (NULL); + + return ((struct pf_mtag *)(mtag + 1)); +#else + if (!(m->m_flags & M_PKTHDR)) + return (NULL); + + return (&m->m_pkthdr.pf_mtag); +#endif /* PF_PKTHDR */ +} + +struct pf_mtag * +pf_get_mtag(struct mbuf *m) +{ +#if !PF_PKTHDR + struct m_tag *mtag; + + if ((mtag = m_tag_locate(m, KERNEL_MODULE_TAG_ID, KERNEL_TAG_TYPE_PF, + NULL)) == NULL) { + mtag = m_tag_alloc(KERNEL_MODULE_TAG_ID, KERNEL_TAG_TYPE_PF, + sizeof (struct pf_mtag), M_NOWAIT); + if (mtag == NULL) + return (NULL); + bzero(mtag + 1, sizeof (struct pf_mtag)); + m_tag_prepend(m, mtag); + } + return ((struct pf_mtag *)(mtag + 1)); +#else + return (pf_find_mtag(m)); +#endif /* PF_PKTHDR */ +} + +uint64_t +pf_time_second(void) +{ + struct timeval t; + + microtime(&t); + return (t.tv_sec); +} + +static void * +hook_establish(struct hook_desc_head *head, int tail, hook_fn_t fn, void *arg) +{ + struct hook_desc *hd; + + hd = _MALLOC(sizeof(*hd), M_DEVBUF, M_WAITOK); + if (hd == NULL) + return (NULL); + + hd->hd_fn = fn; + hd->hd_arg = arg; + if (tail) + TAILQ_INSERT_TAIL(head, hd, hd_list); + else + TAILQ_INSERT_HEAD(head, hd, hd_list); + + return (hd); +} + +static void +hook_runloop(struct hook_desc_head *head, int flags) +{ + struct hook_desc *hd; + + if (!(flags & HOOK_REMOVE)) { + if (!(flags & HOOK_ABORT)) + TAILQ_FOREACH(hd, head, hd_list) + hd->hd_fn(hd->hd_arg); + } else { + while (!!(hd = TAILQ_FIRST(head))) { + TAILQ_REMOVE(head, hd, hd_list); + if (!(flags & HOOK_ABORT)) + hd->hd_fn(hd->hd_arg); + if (flags & HOOK_FREE) + _FREE(hd, M_DEVBUF); + } + } +} diff --git a/bsd/net/pf_if.c b/bsd/net/pf_if.c new file mode 100644 index 000000000..7e187cb94 --- /dev/null +++ b/bsd/net/pf_if.c @@ -0,0 +1,706 @@ +/* + * Copyright (c) 2008 Apple Inc. All rights reserved. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. The rights granted to you under the License + * may not be used to create, or enable the creation or redistribution of, + * unlawful or unlicensed copies of an Apple operating system, or to + * circumvent, violate, or enable the circumvention or violation of, any + * terms of an Apple operating system software license agreement. + * + * Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ + */ + +/* $apfw: pf_if.c,v 1.4 2008/08/27 00:01:32 jhw Exp $ */ +/* $OpenBSD: pf_if.c,v 1.46 2006/12/13 09:01:59 itojun Exp $ */ + +/* + * Copyright 2005 Henning Brauer + * Copyright 2005 Ryan McBride + * Copyright (c) 2001 Daniel Hartmeier + * Copyright (c) 2003 Cedric Berger + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * - Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials provided + * with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +#include +#include +#include +#include +#include + +#if INET6 +#include +#endif /* INET6 */ + +#include + +struct pfi_kif *pfi_all = NULL; + +static struct pool pfi_addr_pl; +static struct pfi_ifhead pfi_ifs; +static u_int32_t pfi_update = 1; +static struct pfr_addr *pfi_buffer; +static int pfi_buffer_cnt; +static int pfi_buffer_max; + +__private_extern__ void pfi_kifaddr_update(void *); + +static void pfi_kif_update(struct pfi_kif *); +static void pfi_dynaddr_update(struct pfi_dynaddr *dyn); +static void pfi_table_update(struct pfr_ktable *, struct pfi_kif *, int, int); +static void pfi_instance_add(struct ifnet *, int, int); +static void pfi_address_add(struct sockaddr *, int, int); +static int pfi_if_compare(struct pfi_kif *, struct pfi_kif *); +static int pfi_skip_if(const char *, struct pfi_kif *); +static int pfi_unmask(void *); + +RB_PROTOTYPE_SC(static, pfi_ifhead, pfi_kif, pfik_tree, pfi_if_compare); +RB_GENERATE(pfi_ifhead, pfi_kif, pfik_tree, pfi_if_compare); + +#define PFI_BUFFER_MAX 0x10000 +#define PFI_MTYPE M_IFADDR + +#define IFG_ALL "ALL" + +void +pfi_initialize(void) +{ + if (pfi_all != NULL) /* already initialized */ + return; + + pool_init(&pfi_addr_pl, sizeof (struct pfi_dynaddr), 0, 0, 0, + "pfiaddrpl", NULL); + pfi_buffer_max = 64; + pfi_buffer = _MALLOC(pfi_buffer_max * sizeof (*pfi_buffer), + PFI_MTYPE, M_WAITOK); + + if ((pfi_all = pfi_kif_get(IFG_ALL)) == NULL) + panic("pfi_kif_get for pfi_all failed"); +} + +#if 0 +void +pfi_destroy(void) +{ + pool_destroy(&pfi_addr_pl); + _FREE(pfi_buffer, PFI_MTYPE); +} +#endif + +struct pfi_kif * +pfi_kif_get(const char *kif_name) +{ + struct pfi_kif *kif; + struct pfi_kif_cmp s; + + bzero(&s, sizeof (s)); + strlcpy(s.pfik_name, kif_name, sizeof (s.pfik_name)); + if ((kif = RB_FIND(pfi_ifhead, &pfi_ifs, (struct pfi_kif *)&s)) != NULL) + return (kif); + + /* create new one */ + if ((kif = _MALLOC(sizeof (*kif), PFI_MTYPE, M_WAITOK|M_ZERO)) == NULL) + return (NULL); + + strlcpy(kif->pfik_name, kif_name, sizeof (kif->pfik_name)); + kif->pfik_tzero = pf_time_second(); + TAILQ_INIT(&kif->pfik_dynaddrs); + + RB_INSERT(pfi_ifhead, &pfi_ifs, kif); + return (kif); +} + +void +pfi_kif_ref(struct pfi_kif *kif, enum pfi_kif_refs what) +{ + switch (what) { + case PFI_KIF_REF_RULE: + kif->pfik_rules++; + break; + case PFI_KIF_REF_STATE: + kif->pfik_states++; + break; + default: + panic("pfi_kif_ref with unknown type"); + } +} + +void +pfi_kif_unref(struct pfi_kif *kif, enum pfi_kif_refs what) +{ + if (kif == NULL) + return; + + switch (what) { + case PFI_KIF_REF_NONE: + break; + case PFI_KIF_REF_RULE: + if (kif->pfik_rules <= 0) { + printf("pfi_kif_unref: rules refcount <= 0\n"); + return; + } + kif->pfik_rules--; + break; + case PFI_KIF_REF_STATE: + if (kif->pfik_states <= 0) { + printf("pfi_kif_unref: state refcount <= 0\n"); + return; + } + kif->pfik_states--; + break; + default: + panic("pfi_kif_unref with unknown type"); + } + + if (kif->pfik_ifp != NULL || kif == pfi_all) + return; + + if (kif->pfik_rules || kif->pfik_states) + return; + + RB_REMOVE(pfi_ifhead, &pfi_ifs, kif); + _FREE(kif, PFI_MTYPE); +} + +int +pfi_kif_match(struct pfi_kif *rule_kif, struct pfi_kif *packet_kif) +{ + + if (rule_kif == NULL || rule_kif == packet_kif) + return (1); + + return (0); +} + +void +pfi_attach_ifnet(struct ifnet *ifp) +{ + struct pfi_kif *kif; + char if_name[IFNAMSIZ]; + + lck_mtx_assert(pf_lock, LCK_MTX_ASSERT_OWNED); + + pfi_update++; + (void) snprintf(if_name, sizeof (if_name), "%s%d", + ifp->if_name, ifp->if_unit); + if ((kif = pfi_kif_get(if_name)) == NULL) + panic("pfi_kif_get failed"); + + ifnet_lock_exclusive(ifp); + kif->pfik_ifp = ifp; + ifp->if_pf_kif = kif; + ifnet_lock_done(ifp); + + pfi_kif_update(kif); +} + +/* + * Caller holds ifnet lock as writer (exclusive); + */ +void +pfi_detach_ifnet(struct ifnet *ifp) +{ + struct pfi_kif *kif; + + lck_mtx_assert(pf_lock, LCK_MTX_ASSERT_OWNED); + + if ((kif = (struct pfi_kif *)ifp->if_pf_kif) == NULL) + return; + + pfi_update++; + pfi_kif_update(kif); + + ifnet_lock_exclusive(ifp); + kif->pfik_ifp = NULL; + ifp->if_pf_kif = NULL; + ifnet_lock_done(ifp); + + pfi_kif_unref(kif, PFI_KIF_REF_NONE); +} + +int +pfi_match_addr(struct pfi_dynaddr *dyn, struct pf_addr *a, sa_family_t af) +{ + switch (af) { +#if INET + case AF_INET: + switch (dyn->pfid_acnt4) { + case 0: + return (0); + case 1: + return (PF_MATCHA(0, &dyn->pfid_addr4, + &dyn->pfid_mask4, a, AF_INET)); + default: + return (pfr_match_addr(dyn->pfid_kt, a, AF_INET)); + } + break; +#endif /* INET */ +#if INET6 + case AF_INET6: + switch (dyn->pfid_acnt6) { + case 0: + return (0); + case 1: + return (PF_MATCHA(0, &dyn->pfid_addr6, + &dyn->pfid_mask6, a, AF_INET6)); + default: + return (pfr_match_addr(dyn->pfid_kt, a, AF_INET6)); + } + break; +#endif /* INET6 */ + default: + return (0); + } +} + +int +pfi_dynaddr_setup(struct pf_addr_wrap *aw, sa_family_t af) +{ + struct pfi_dynaddr *dyn; + char tblname[PF_TABLE_NAME_SIZE]; + struct pf_ruleset *ruleset = NULL; + int rv = 0; + + lck_mtx_assert(pf_lock, LCK_MTX_ASSERT_OWNED); + + if (aw->type != PF_ADDR_DYNIFTL) + return (0); + if ((dyn = pool_get(&pfi_addr_pl, PR_WAITOK)) == NULL) + return (1); + bzero(dyn, sizeof (*dyn)); + + if (strcmp(aw->v.ifname, "self") == 0) + dyn->pfid_kif = pfi_kif_get(IFG_ALL); + else + dyn->pfid_kif = pfi_kif_get(aw->v.ifname); + if (dyn->pfid_kif == NULL) { + rv = 1; + goto _bad; + } + pfi_kif_ref(dyn->pfid_kif, PFI_KIF_REF_RULE); + + dyn->pfid_net = pfi_unmask(&aw->v.a.mask); + if (af == AF_INET && dyn->pfid_net == 32) + dyn->pfid_net = 128; + strlcpy(tblname, aw->v.ifname, sizeof (tblname)); + if (aw->iflags & PFI_AFLAG_NETWORK) + strlcat(tblname, ":network", sizeof (tblname)); + if (aw->iflags & PFI_AFLAG_BROADCAST) + strlcat(tblname, ":broadcast", sizeof (tblname)); + if (aw->iflags & PFI_AFLAG_PEER) + strlcat(tblname, ":peer", sizeof (tblname)); + if (aw->iflags & PFI_AFLAG_NOALIAS) + strlcat(tblname, ":0", sizeof (tblname)); + if (dyn->pfid_net != 128) + snprintf(tblname + strlen(tblname), + sizeof (tblname) - strlen(tblname), "/%d", dyn->pfid_net); + if ((ruleset = pf_find_or_create_ruleset(PF_RESERVED_ANCHOR)) == NULL) { + rv = 1; + goto _bad; + } + + if ((dyn->pfid_kt = pfr_attach_table(ruleset, tblname)) == NULL) { + rv = 1; + goto _bad; + } + + dyn->pfid_kt->pfrkt_flags |= PFR_TFLAG_ACTIVE; + dyn->pfid_iflags = aw->iflags; + dyn->pfid_af = af; + + TAILQ_INSERT_TAIL(&dyn->pfid_kif->pfik_dynaddrs, dyn, entry); + aw->p.dyn = dyn; + pfi_kif_update(dyn->pfid_kif); + return (0); + +_bad: + if (dyn->pfid_kt != NULL) + pfr_detach_table(dyn->pfid_kt); + if (ruleset != NULL) + pf_remove_if_empty_ruleset(ruleset); + if (dyn->pfid_kif != NULL) + pfi_kif_unref(dyn->pfid_kif, PFI_KIF_REF_RULE); + pool_put(&pfi_addr_pl, dyn); + return (rv); +} + +void +pfi_kif_update(struct pfi_kif *kif) +{ + struct pfi_dynaddr *p; + + lck_mtx_assert(pf_lock, LCK_MTX_ASSERT_OWNED); + + /* update all dynaddr */ + TAILQ_FOREACH(p, &kif->pfik_dynaddrs, entry) + pfi_dynaddr_update(p); +} + +void +pfi_dynaddr_update(struct pfi_dynaddr *dyn) +{ + struct pfi_kif *kif; + struct pfr_ktable *kt; + + if (dyn == NULL || dyn->pfid_kif == NULL || dyn->pfid_kt == NULL) + panic("pfi_dynaddr_update"); + + kif = dyn->pfid_kif; + kt = dyn->pfid_kt; + + if (kt->pfrkt_larg != pfi_update) { + /* this table needs to be brought up-to-date */ + pfi_table_update(kt, kif, dyn->pfid_net, dyn->pfid_iflags); + kt->pfrkt_larg = pfi_update; + } + pfr_dynaddr_update(kt, dyn); +} + +void +pfi_table_update(struct pfr_ktable *kt, struct pfi_kif *kif, int net, int flags) +{ + int e, size2 = 0; + + pfi_buffer_cnt = 0; + + if (kif->pfik_ifp != NULL) + pfi_instance_add(kif->pfik_ifp, net, flags); + + if ((e = pfr_set_addrs(&kt->pfrkt_t, pfi_buffer, pfi_buffer_cnt, &size2, + NULL, NULL, NULL, 0, PFR_TFLAG_ALLMASK))) + printf("pfi_table_update: cannot set %d new addresses " + "into table %s: %d\n", pfi_buffer_cnt, kt->pfrkt_name, e); +} + +void +pfi_instance_add(struct ifnet *ifp, int net, int flags) +{ + struct ifaddr *ia; + int got4 = 0, got6 = 0; + int net2, af; + + if (ifp == NULL) + return; + ifnet_lock_shared(ifp); + TAILQ_FOREACH(ia, &ifp->if_addrhead, ifa_link) { + if (ia->ifa_addr == NULL) + continue; + af = ia->ifa_addr->sa_family; + if (af != AF_INET && af != AF_INET6) + continue; + if ((flags & PFI_AFLAG_BROADCAST) && af == AF_INET6) + continue; + if ((flags & PFI_AFLAG_BROADCAST) && + !(ifp->if_flags & IFF_BROADCAST)) + continue; + if ((flags & PFI_AFLAG_PEER) && + !(ifp->if_flags & IFF_POINTOPOINT)) + continue; + if ((flags & PFI_AFLAG_NETWORK) && af == AF_INET6 && + IN6_IS_ADDR_LINKLOCAL( + &((struct sockaddr_in6 *)ia->ifa_addr)->sin6_addr)) + continue; + if (flags & PFI_AFLAG_NOALIAS) { + if (af == AF_INET && got4) + continue; + if (af == AF_INET6 && got6) + continue; + } + if (af == AF_INET) + got4 = 1; + else if (af == AF_INET6) + got6 = 1; + net2 = net; + if (net2 == 128 && (flags & PFI_AFLAG_NETWORK)) { + if (af == AF_INET) + net2 = pfi_unmask(&((struct sockaddr_in *) + ia->ifa_netmask)->sin_addr); + else if (af == AF_INET6) + net2 = pfi_unmask(&((struct sockaddr_in6 *) + ia->ifa_netmask)->sin6_addr); + } + if (af == AF_INET && net2 > 32) + net2 = 32; + if (flags & PFI_AFLAG_BROADCAST) + pfi_address_add(ia->ifa_broadaddr, af, net2); + else if (flags & PFI_AFLAG_PEER) + pfi_address_add(ia->ifa_dstaddr, af, net2); + else + pfi_address_add(ia->ifa_addr, af, net2); + } + ifnet_lock_done(ifp); +} + +void +pfi_address_add(struct sockaddr *sa, int af, int net) +{ + struct pfr_addr *p; + int i; + + if (pfi_buffer_cnt >= pfi_buffer_max) { + int new_max = pfi_buffer_max * 2; + + if (new_max > PFI_BUFFER_MAX) { + printf("pfi_address_add: address buffer full (%d/%d)\n", + pfi_buffer_cnt, PFI_BUFFER_MAX); + return; + } + p = _MALLOC(new_max * sizeof (*pfi_buffer), PFI_MTYPE, + M_WAITOK); + if (p == NULL) { + printf("pfi_address_add: no memory to grow buffer " + "(%d/%d)\n", pfi_buffer_cnt, PFI_BUFFER_MAX); + return; + } + memcpy(pfi_buffer, p, pfi_buffer_cnt * sizeof (*pfi_buffer)); + /* no need to zero buffer */ + _FREE(pfi_buffer, PFI_MTYPE); + pfi_buffer = p; + pfi_buffer_max = new_max; + } + if (af == AF_INET && net > 32) + net = 128; + p = pfi_buffer + pfi_buffer_cnt++; + bzero(p, sizeof (*p)); + p->pfra_af = af; + p->pfra_net = net; + if (af == AF_INET) + p->pfra_ip4addr = ((struct sockaddr_in *)sa)->sin_addr; + else if (af == AF_INET6) { + p->pfra_ip6addr = ((struct sockaddr_in6 *)sa)->sin6_addr; + if (IN6_IS_SCOPE_EMBED(&p->pfra_ip6addr)) + p->pfra_ip6addr.s6_addr16[1] = 0; + } + /* mask network address bits */ + if (net < 128) + ((caddr_t)p)[p->pfra_net/8] &= ~(0xFF >> (p->pfra_net%8)); + for (i = (p->pfra_net+7)/8; i < (int)sizeof (p->pfra_u); i++) + ((caddr_t)p)[i] = 0; +} + +void +pfi_dynaddr_remove(struct pf_addr_wrap *aw) +{ + if (aw->type != PF_ADDR_DYNIFTL || aw->p.dyn == NULL || + aw->p.dyn->pfid_kif == NULL || aw->p.dyn->pfid_kt == NULL) + return; + + TAILQ_REMOVE(&aw->p.dyn->pfid_kif->pfik_dynaddrs, aw->p.dyn, entry); + pfi_kif_unref(aw->p.dyn->pfid_kif, PFI_KIF_REF_RULE); + aw->p.dyn->pfid_kif = NULL; + pfr_detach_table(aw->p.dyn->pfid_kt); + aw->p.dyn->pfid_kt = NULL; + pool_put(&pfi_addr_pl, aw->p.dyn); + aw->p.dyn = NULL; +} + +void +pfi_dynaddr_copyout(struct pf_addr_wrap *aw) +{ + if (aw->type != PF_ADDR_DYNIFTL || aw->p.dyn == NULL || + aw->p.dyn->pfid_kif == NULL) + return; + aw->p.dyncnt = aw->p.dyn->pfid_acnt4 + aw->p.dyn->pfid_acnt6; +} + +void +pfi_kifaddr_update(void *v) +{ + struct pfi_kif *kif = (struct pfi_kif *)v; + + lck_mtx_assert(pf_lock, LCK_MTX_ASSERT_OWNED); + + pfi_update++; + pfi_kif_update(kif); +} + +int +pfi_if_compare(struct pfi_kif *p, struct pfi_kif *q) +{ + return (strncmp(p->pfik_name, q->pfik_name, IFNAMSIZ)); +} + +void +pfi_update_status(const char *name, struct pf_status *pfs) +{ + struct pfi_kif *p; + struct pfi_kif_cmp key; + int i, j, k; + + lck_mtx_assert(pf_lock, LCK_MTX_ASSERT_OWNED); + + strlcpy(key.pfik_name, name, sizeof (key.pfik_name)); + p = RB_FIND(pfi_ifhead, &pfi_ifs, (struct pfi_kif *)&key); + if (p == NULL) + return; + + if (pfs) { + bzero(pfs->pcounters, sizeof (pfs->pcounters)); + bzero(pfs->bcounters, sizeof (pfs->bcounters)); + } + /* just clear statistics */ + if (pfs == NULL) { + bzero(p->pfik_packets, sizeof (p->pfik_packets)); + bzero(p->pfik_bytes, sizeof (p->pfik_bytes)); + p->pfik_tzero = pf_time_second(); + } + for (i = 0; i < 2; i++) + for (j = 0; j < 2; j++) + for (k = 0; k < 2; k++) { + pfs->pcounters[i][j][k] += + p->pfik_packets[i][j][k]; + pfs->bcounters[i][j] += + p->pfik_bytes[i][j][k]; + } +} + +int +pfi_get_ifaces(const char *name, struct pfi_kif *buf, int *size) +{ + struct pfi_kif *p, *nextp; + int n = 0; + + lck_mtx_assert(pf_lock, LCK_MTX_ASSERT_OWNED); + + for (p = RB_MIN(pfi_ifhead, &pfi_ifs); p; p = nextp) { + nextp = RB_NEXT(pfi_ifhead, &pfi_ifs, p); + if (pfi_skip_if(name, p)) + continue; + if (*size > n++) { + if (!p->pfik_tzero) + p->pfik_tzero = pf_time_second(); + pfi_kif_ref(p, PFI_KIF_REF_RULE); + buf++; + if (copyout(p, CAST_USER_ADDR_T(buf), sizeof (*buf))) { + pfi_kif_unref(p, PFI_KIF_REF_RULE); + return (EFAULT); + } + nextp = RB_NEXT(pfi_ifhead, &pfi_ifs, p); + pfi_kif_unref(p, PFI_KIF_REF_RULE); + } + } + *size = n; + return (0); +} + +int +pfi_skip_if(const char *filter, struct pfi_kif *p) +{ + int n; + + if (filter == NULL || !*filter) + return (0); + if (strcmp(p->pfik_name, filter) == 0) + return (0); /* exact match */ + n = strlen(filter); + if (n < 1 || n >= IFNAMSIZ) + return (1); /* sanity check */ + if (filter[n-1] >= '0' && filter[n-1] <= '9') + return (1); /* only do exact match in that case */ + if (strncmp(p->pfik_name, filter, n)) + return (1); /* prefix doesn't match */ + return (p->pfik_name[n] < '0' || p->pfik_name[n] > '9'); +} + +int +pfi_set_flags(const char *name, int flags) +{ + struct pfi_kif *p; + + lck_mtx_assert(pf_lock, LCK_MTX_ASSERT_OWNED); + + RB_FOREACH(p, pfi_ifhead, &pfi_ifs) { + if (pfi_skip_if(name, p)) + continue; + p->pfik_flags |= flags; + } + return (0); +} + +int +pfi_clear_flags(const char *name, int flags) +{ + struct pfi_kif *p; + + lck_mtx_assert(pf_lock, LCK_MTX_ASSERT_OWNED); + + RB_FOREACH(p, pfi_ifhead, &pfi_ifs) { + if (pfi_skip_if(name, p)) + continue; + p->pfik_flags &= ~flags; + } + return (0); +} + +/* from pf_print_state.c */ +int +pfi_unmask(void *addr) +{ + struct pf_addr *m = addr; + int i = 31, j = 0, b = 0; + u_int32_t tmp; + + while (j < 4 && m->addr32[j] == 0xffffffff) { + b += 32; + j++; + } + if (j < 4) { + tmp = ntohl(m->addr32[j]); + for (i = 31; tmp & (1 << i); --i) + b++; + } + return (b); +} diff --git a/bsd/net/pf_ioctl.c b/bsd/net/pf_ioctl.c new file mode 100644 index 000000000..8145fed94 --- /dev/null +++ b/bsd/net/pf_ioctl.c @@ -0,0 +1,3489 @@ +/* + * Copyright (c) 2008 Apple Inc. All rights reserved. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. The rights granted to you under the License + * may not be used to create, or enable the creation or redistribution of, + * unlawful or unlicensed copies of an Apple operating system, or to + * circumvent, violate, or enable the circumvention or violation of, any + * terms of an Apple operating system software license agreement. + * + * Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ + */ + +/* $apfw: pf_ioctl.c,v 1.16 2008/08/27 00:01:32 jhw Exp $ */ +/* $OpenBSD: pf_ioctl.c,v 1.175 2007/02/26 22:47:43 deraadt Exp $ */ + +/* + * Copyright (c) 2001 Daniel Hartmeier + * Copyright (c) 2002,2003 Henning Brauer + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * - Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials provided + * with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + * Effort sponsored in part by the Defense Advanced Research Projects + * Agency (DARPA) and Air Force Research Laboratory, Air Force + * Materiel Command, USAF, under agreement number F30602-01-2-0537. + * + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +#include + +#include + +#include + +#if NPFSYNC +#include +#endif /* NPFSYNC */ + +#if PFLOG +#include +#endif /* PFLOG */ + +#if INET6 +#include +#include +#endif /* INET6 */ + +#if ALTQ +#include +#endif /* ALTQ */ + +#if 0 +static void pfdetach(void); +#endif +static int pfopen(dev_t, int, int, struct proc *); +static int pfclose(dev_t, int, int, struct proc *); +static int pfioctl(dev_t, u_long, caddr_t, int, struct proc *); +static struct pf_pool *pf_get_pool(char *, u_int32_t, u_int8_t, u_int32_t, + u_int8_t, u_int8_t, u_int8_t); + +static void pf_mv_pool(struct pf_palist *, struct pf_palist *); +static void pf_empty_pool(struct pf_palist *); +#if ALTQ +static int pf_begin_altq(u_int32_t *); +static int pf_rollback_altq(u_int32_t); +static int pf_commit_altq(u_int32_t); +static int pf_enable_altq(struct pf_altq *); +static int pf_disable_altq(struct pf_altq *); +#endif /* ALTQ */ +static int pf_begin_rules(u_int32_t *, int, const char *); +static int pf_rollback_rules(u_int32_t, int, char *); +static int pf_setup_pfsync_matching(struct pf_ruleset *); +static void pf_hash_rule(MD5_CTX *, struct pf_rule *); +#ifndef NO_APPLE_EXTENSIONS +static void pf_hash_rule_addr(MD5_CTX *, struct pf_rule_addr *, u_int8_t); +#else +static void pf_hash_rule_addr(MD5_CTX *, struct pf_rule_addr *); +#endif +static int pf_commit_rules(u_int32_t, int, char *); +static void pf_state_export(struct pfsync_state *, struct pf_state_key *, + struct pf_state *); +static void pf_state_import(struct pfsync_state *, struct pf_state_key *, + struct pf_state *); + +#define PF_CDEV_MAJOR (-1) + +static struct cdevsw pf_cdevsw = { + /* open */ pfopen, + /* close */ pfclose, + /* read */ eno_rdwrt, + /* write */ eno_rdwrt, + /* ioctl */ pfioctl, + /* stop */ eno_stop, + /* reset */ eno_reset, + /* tty */ NULL, + /* select */ eno_select, + /* mmap */ eno_mmap, + /* strategy */ eno_strat, + /* getc */ eno_getc, + /* putc */ eno_putc, + /* type */ 0 +}; + +static void pf_attach_hooks(void); +static void pf_detach_hooks(void); +static int pf_hooks_attached = 0; + +struct pf_rule pf_default_rule; +#if ALTQ +static int pf_altq_running; +#endif /* ALTQ */ + +#define TAGID_MAX 50000 +static TAILQ_HEAD(pf_tags, pf_tagname) pf_tags = + TAILQ_HEAD_INITIALIZER(pf_tags); +#if ALTQ +static TAILQ_HEAD(pf_tags, pf_tagname) pf_qids = + TAILQ_HEAD_INITIALIZER(pf_qids); +#endif /* ALTQ */ + +#if (PF_QNAME_SIZE != PF_TAG_NAME_SIZE) +#error PF_QNAME_SIZE must be equal to PF_TAG_NAME_SIZE +#endif +static u_int16_t tagname2tag(struct pf_tags *, char *); +static void tag2tagname(struct pf_tags *, u_int16_t, char *); +static void tag_unref(struct pf_tags *, u_int16_t); +static int pf_rtlabel_add(struct pf_addr_wrap *); +static void pf_rtlabel_remove(struct pf_addr_wrap *); +static void pf_rtlabel_copyout(struct pf_addr_wrap *); + +#if INET +static int pf_inet_hook(struct ifnet *, struct mbuf **, int); +#endif /* INET */ +#if INET6 +static int pf_inet6_hook(struct ifnet *, struct mbuf **, int); +#endif /* INET6 */ + +#define DPFPRINTF(n, x) if (pf_status.debug >= (n)) printf x + +static lck_attr_t *pf_perim_lock_attr; +static lck_grp_t *pf_perim_lock_grp; +static lck_grp_attr_t *pf_perim_lock_grp_attr; + +static lck_attr_t *pf_lock_attr; +static lck_grp_t *pf_lock_grp; +static lck_grp_attr_t *pf_lock_grp_attr; + +struct thread *pf_purge_thread; + +extern void pfi_kifaddr_update(void *); + +void +pfinit(void) +{ + u_int32_t *t = pf_default_rule.timeout; + int maj; + + pf_perim_lock_grp_attr = lck_grp_attr_alloc_init(); + pf_perim_lock_grp = lck_grp_alloc_init("pf_perim", + pf_perim_lock_grp_attr); + pf_perim_lock_attr = lck_attr_alloc_init(); + pf_perim_lock = lck_rw_alloc_init(pf_perim_lock_grp, + pf_perim_lock_attr); + + pf_lock_grp_attr = lck_grp_attr_alloc_init(); + pf_lock_grp = lck_grp_alloc_init("pf", pf_lock_grp_attr); + pf_lock_attr = lck_attr_alloc_init(); + pf_lock = lck_mtx_alloc_init(pf_lock_grp, pf_lock_attr); + + pool_init(&pf_rule_pl, sizeof (struct pf_rule), 0, 0, 0, "pfrulepl", + NULL); + pool_init(&pf_src_tree_pl, sizeof (struct pf_src_node), 0, 0, 0, + "pfsrctrpl", NULL); + pool_init(&pf_state_pl, sizeof (struct pf_state), 0, 0, 0, "pfstatepl", + NULL); + pool_init(&pf_state_key_pl, sizeof (struct pf_state_key), 0, 0, 0, + "pfstatekeypl", NULL); +#ifndef NO_APPLE_EXTENSIONS + pool_init(&pf_app_state_pl, sizeof (struct pf_app_state), 0, 0, 0, + "pfappstatepl", NULL); +#endif +#if ALTQ + pool_init(&pf_altq_pl, sizeof (struct pf_altq), 0, 0, 0, "pfaltqpl", + NULL); +#endif /* ALTQ */ + pool_init(&pf_pooladdr_pl, sizeof (struct pf_pooladdr), 0, 0, 0, + "pfpooladdrpl", NULL); + pfr_initialize(); + pfi_initialize(); + pf_osfp_initialize(); + + pool_sethardlimit(pf_pool_limits[PF_LIMIT_STATES].pp, + pf_pool_limits[PF_LIMIT_STATES].limit, NULL, 0); + + if (max_mem <= 256*1024*1024) + pf_pool_limits[PF_LIMIT_TABLE_ENTRIES].limit = + PFR_KENTRY_HIWAT_SMALL; + + RB_INIT(&tree_src_tracking); + RB_INIT(&pf_anchors); + pf_init_ruleset(&pf_main_ruleset); + TAILQ_INIT(&pf_pabuf); + TAILQ_INIT(&state_list); +#if ALTQ + TAILQ_INIT(&pf_altqs[0]); + TAILQ_INIT(&pf_altqs[1]); + pf_altqs_active = &pf_altqs[0]; + pf_altqs_inactive = &pf_altqs[1]; +#endif /* ALTQ */ + + /* default rule should never be garbage collected */ + pf_default_rule.entries.tqe_prev = &pf_default_rule.entries.tqe_next; + pf_default_rule.action = PF_PASS; + pf_default_rule.nr = -1; + pf_default_rule.rtableid = IFSCOPE_NONE; + + /* initialize default timeouts */ + t[PFTM_TCP_FIRST_PACKET] = PFTM_TCP_FIRST_PACKET_VAL; + t[PFTM_TCP_OPENING] = PFTM_TCP_OPENING_VAL; + t[PFTM_TCP_ESTABLISHED] = PFTM_TCP_ESTABLISHED_VAL; + t[PFTM_TCP_CLOSING] = PFTM_TCP_CLOSING_VAL; + t[PFTM_TCP_FIN_WAIT] = PFTM_TCP_FIN_WAIT_VAL; + t[PFTM_TCP_CLOSED] = PFTM_TCP_CLOSED_VAL; + t[PFTM_UDP_FIRST_PACKET] = PFTM_UDP_FIRST_PACKET_VAL; + t[PFTM_UDP_SINGLE] = PFTM_UDP_SINGLE_VAL; + t[PFTM_UDP_MULTIPLE] = PFTM_UDP_MULTIPLE_VAL; + t[PFTM_ICMP_FIRST_PACKET] = PFTM_ICMP_FIRST_PACKET_VAL; + t[PFTM_ICMP_ERROR_REPLY] = PFTM_ICMP_ERROR_REPLY_VAL; +#ifndef NO_APPLE_EXTENSIONS + t[PFTM_GREv1_FIRST_PACKET] = PFTM_GREv1_FIRST_PACKET_VAL; + t[PFTM_GREv1_INITIATING] = PFTM_GREv1_INITIATING_VAL; + t[PFTM_GREv1_ESTABLISHED] = PFTM_GREv1_ESTABLISHED_VAL; + t[PFTM_ESP_FIRST_PACKET] = PFTM_ESP_FIRST_PACKET_VAL; + t[PFTM_ESP_INITIATING] = PFTM_ESP_INITIATING_VAL; + t[PFTM_ESP_ESTABLISHED] = PFTM_ESP_ESTABLISHED_VAL; +#endif + t[PFTM_OTHER_FIRST_PACKET] = PFTM_OTHER_FIRST_PACKET_VAL; + t[PFTM_OTHER_SINGLE] = PFTM_OTHER_SINGLE_VAL; + t[PFTM_OTHER_MULTIPLE] = PFTM_OTHER_MULTIPLE_VAL; + t[PFTM_FRAG] = PFTM_FRAG_VAL; + t[PFTM_INTERVAL] = PFTM_INTERVAL_VAL; + t[PFTM_SRC_NODE] = PFTM_SRC_NODE_VAL; + t[PFTM_TS_DIFF] = PFTM_TS_DIFF_VAL; + t[PFTM_ADAPTIVE_START] = PFSTATE_ADAPT_START; + t[PFTM_ADAPTIVE_END] = PFSTATE_ADAPT_END; + + pf_normalize_init(); + bzero(&pf_status, sizeof (pf_status)); + pf_status.debug = PF_DEBUG_URGENT; + + /* XXX do our best to avoid a conflict */ + pf_status.hostid = random(); + + if (kernel_thread_start(pf_purge_thread_fn, NULL, + &pf_purge_thread) != 0) { + printf("%s: unable to start purge thread!", __func__); + return; + } + + maj = cdevsw_add(PF_CDEV_MAJOR, &pf_cdevsw); + if (maj == -1) { + printf("%s: failed to allocate major number!\n", __func__); + return; + } + (void) devfs_make_node(makedev(maj, 0), DEVFS_CHAR, + UID_ROOT, GID_WHEEL, 0600, "pf", 0); +} + +#if 0 +static void +pfdetach(void) +{ + struct pf_anchor *anchor; + struct pf_state *state; + struct pf_src_node *node; + struct pfioc_table pt; + u_int32_t ticket; + int i; + char r = '\0'; + + pf_status.running = 0; + wakeup(pf_purge_thread_fn); + + /* clear the rulesets */ + for (i = 0; i < PF_RULESET_MAX; i++) + if (pf_begin_rules(&ticket, i, &r) == 0) + pf_commit_rules(ticket, i, &r); +#if ALTQ + if (pf_begin_altq(&ticket) == 0) + pf_commit_altq(ticket); +#endif /* ALTQ */ + + /* clear states */ + RB_FOREACH(state, pf_state_tree_id, &tree_id) { + state->timeout = PFTM_PURGE; +#if NPFSYNC + state->sync_flags = PFSTATE_NOSYNC; +#endif + } + pf_purge_expired_states(pf_status.states); + +#if NPFSYNC + pfsync_clear_states(pf_status.hostid, NULL); +#endif + + /* clear source nodes */ + RB_FOREACH(state, pf_state_tree_id, &tree_id) { + state->src_node = NULL; + state->nat_src_node = NULL; + } + RB_FOREACH(node, pf_src_tree, &tree_src_tracking) { + node->expire = 1; + node->states = 0; + } + pf_purge_expired_src_nodes(); + + /* clear tables */ + memset(&pt, '\0', sizeof (pt)); + pfr_clr_tables(&pt.pfrio_table, &pt.pfrio_ndel, pt.pfrio_flags); + + /* destroy anchors */ + while ((anchor = RB_MIN(pf_anchor_global, &pf_anchors)) != NULL) { + for (i = 0; i < PF_RULESET_MAX; i++) + if (pf_begin_rules(&ticket, i, anchor->name) == 0) + pf_commit_rules(ticket, i, anchor->name); + } + + /* destroy main ruleset */ + pf_remove_if_empty_ruleset(&pf_main_ruleset); + + /* destroy the pools */ + pool_destroy(&pf_pooladdr_pl); +#if ALTQ + pool_destroy(&pf_altq_pl); +#endif /* ALTQ */ + pool_destroy(&pf_state_pl); + pool_destroy(&pf_rule_pl); + pool_destroy(&pf_src_tree_pl); + + /* destroy subsystems */ + pf_normalize_destroy(); + pf_osfp_destroy(); + pfr_destroy(); + pfi_destroy(); +} +#endif + +static int +pfopen(dev_t dev, int flags, int fmt, struct proc *p) +{ +#pragma unused(flags, fmt, p) + if (minor(dev) >= 1) + return (ENXIO); + return (0); +} + +static int +pfclose(dev_t dev, int flags, int fmt, struct proc *p) +{ +#pragma unused(flags, fmt, p) + if (minor(dev) >= 1) + return (ENXIO); + return (0); +} + +static struct pf_pool * +pf_get_pool(char *anchor, u_int32_t ticket, u_int8_t rule_action, + u_int32_t rule_number, u_int8_t r_last, u_int8_t active, + u_int8_t check_ticket) +{ + struct pf_ruleset *ruleset; + struct pf_rule *rule; + int rs_num; + + ruleset = pf_find_ruleset(anchor); + if (ruleset == NULL) + return (NULL); + rs_num = pf_get_ruleset_number(rule_action); + if (rs_num >= PF_RULESET_MAX) + return (NULL); + if (active) { + if (check_ticket && ticket != + ruleset->rules[rs_num].active.ticket) + return (NULL); + if (r_last) + rule = TAILQ_LAST(ruleset->rules[rs_num].active.ptr, + pf_rulequeue); + else + rule = TAILQ_FIRST(ruleset->rules[rs_num].active.ptr); + } else { + if (check_ticket && ticket != + ruleset->rules[rs_num].inactive.ticket) + return (NULL); + if (r_last) + rule = TAILQ_LAST(ruleset->rules[rs_num].inactive.ptr, + pf_rulequeue); + else + rule = TAILQ_FIRST(ruleset->rules[rs_num].inactive.ptr); + } + if (!r_last) { + while ((rule != NULL) && (rule->nr != rule_number)) + rule = TAILQ_NEXT(rule, entries); + } + if (rule == NULL) + return (NULL); + + return (&rule->rpool); +} + +static void +pf_mv_pool(struct pf_palist *poola, struct pf_palist *poolb) +{ + struct pf_pooladdr *mv_pool_pa; + + while ((mv_pool_pa = TAILQ_FIRST(poola)) != NULL) { + TAILQ_REMOVE(poola, mv_pool_pa, entries); + TAILQ_INSERT_TAIL(poolb, mv_pool_pa, entries); + } +} + +static void +pf_empty_pool(struct pf_palist *poola) +{ + struct pf_pooladdr *empty_pool_pa; + + while ((empty_pool_pa = TAILQ_FIRST(poola)) != NULL) { + pfi_dynaddr_remove(&empty_pool_pa->addr); + pf_tbladdr_remove(&empty_pool_pa->addr); + pfi_kif_unref(empty_pool_pa->kif, PFI_KIF_REF_RULE); + TAILQ_REMOVE(poola, empty_pool_pa, entries); + pool_put(&pf_pooladdr_pl, empty_pool_pa); + } +} + +void +pf_rm_rule(struct pf_rulequeue *rulequeue, struct pf_rule *rule) +{ + if (rulequeue != NULL) { + if (rule->states <= 0) { + /* + * XXX - we need to remove the table *before* detaching + * the rule to make sure the table code does not delete + * the anchor under our feet. + */ + pf_tbladdr_remove(&rule->src.addr); + pf_tbladdr_remove(&rule->dst.addr); + if (rule->overload_tbl) + pfr_detach_table(rule->overload_tbl); + } + TAILQ_REMOVE(rulequeue, rule, entries); + rule->entries.tqe_prev = NULL; + rule->nr = -1; + } + + if (rule->states > 0 || rule->src_nodes > 0 || + rule->entries.tqe_prev != NULL) + return; + pf_tag_unref(rule->tag); + pf_tag_unref(rule->match_tag); +#if ALTQ + if (rule->pqid != rule->qid) + pf_qid_unref(rule->pqid); + pf_qid_unref(rule->qid); +#endif /* ALTQ */ + pf_rtlabel_remove(&rule->src.addr); + pf_rtlabel_remove(&rule->dst.addr); + pfi_dynaddr_remove(&rule->src.addr); + pfi_dynaddr_remove(&rule->dst.addr); + if (rulequeue == NULL) { + pf_tbladdr_remove(&rule->src.addr); + pf_tbladdr_remove(&rule->dst.addr); + if (rule->overload_tbl) + pfr_detach_table(rule->overload_tbl); + } + pfi_kif_unref(rule->kif, PFI_KIF_REF_RULE); + pf_anchor_remove(rule); + pf_empty_pool(&rule->rpool.list); + pool_put(&pf_rule_pl, rule); +} + +static u_int16_t +tagname2tag(struct pf_tags *head, char *tagname) +{ + struct pf_tagname *tag, *p = NULL; + u_int16_t new_tagid = 1; + + TAILQ_FOREACH(tag, head, entries) + if (strcmp(tagname, tag->name) == 0) { + tag->ref++; + return (tag->tag); + } + + /* + * to avoid fragmentation, we do a linear search from the beginning + * and take the first free slot we find. if there is none or the list + * is empty, append a new entry at the end. + */ + + /* new entry */ + if (!TAILQ_EMPTY(head)) + for (p = TAILQ_FIRST(head); p != NULL && + p->tag == new_tagid; p = TAILQ_NEXT(p, entries)) + new_tagid = p->tag + 1; + + if (new_tagid > TAGID_MAX) + return (0); + + /* allocate and fill new struct pf_tagname */ + tag = _MALLOC(sizeof (*tag), M_TEMP, M_WAITOK|M_ZERO); + if (tag == NULL) + return (0); + strlcpy(tag->name, tagname, sizeof (tag->name)); + tag->tag = new_tagid; + tag->ref++; + + if (p != NULL) /* insert new entry before p */ + TAILQ_INSERT_BEFORE(p, tag, entries); + else /* either list empty or no free slot in between */ + TAILQ_INSERT_TAIL(head, tag, entries); + + return (tag->tag); +} + +static void +tag2tagname(struct pf_tags *head, u_int16_t tagid, char *p) +{ + struct pf_tagname *tag; + + TAILQ_FOREACH(tag, head, entries) + if (tag->tag == tagid) { + strlcpy(p, tag->name, PF_TAG_NAME_SIZE); + return; + } +} + +static void +tag_unref(struct pf_tags *head, u_int16_t tag) +{ + struct pf_tagname *p, *next; + + if (tag == 0) + return; + + for (p = TAILQ_FIRST(head); p != NULL; p = next) { + next = TAILQ_NEXT(p, entries); + if (tag == p->tag) { + if (--p->ref == 0) { + TAILQ_REMOVE(head, p, entries); + _FREE(p, M_TEMP); + } + break; + } + } +} + +u_int16_t +pf_tagname2tag(char *tagname) +{ + return (tagname2tag(&pf_tags, tagname)); +} + +void +pf_tag2tagname(u_int16_t tagid, char *p) +{ + tag2tagname(&pf_tags, tagid, p); +} + +void +pf_tag_ref(u_int16_t tag) +{ + struct pf_tagname *t; + + TAILQ_FOREACH(t, &pf_tags, entries) + if (t->tag == tag) + break; + if (t != NULL) + t->ref++; +} + +void +pf_tag_unref(u_int16_t tag) +{ + tag_unref(&pf_tags, tag); +} + +static int +pf_rtlabel_add(struct pf_addr_wrap *a) +{ +#pragma unused(a) + return (0); +} + +static void +pf_rtlabel_remove(struct pf_addr_wrap *a) +{ +#pragma unused(a) +} + +static void +pf_rtlabel_copyout(struct pf_addr_wrap *a) +{ +#pragma unused(a) +} + +#if ALTQ +u_int32_t +pf_qname2qid(char *qname) +{ + return ((u_int32_t)tagname2tag(&pf_qids, qname)); +} + +void +pf_qid2qname(u_int32_t qid, char *p) +{ + tag2tagname(&pf_qids, (u_int16_t)qid, p); +} + +void +pf_qid_unref(u_int32_t qid) +{ + tag_unref(&pf_qids, (u_int16_t)qid); +} + +static int +pf_begin_altq(u_int32_t *ticket) +{ + struct pf_altq *altq; + int error = 0; + + /* Purge the old altq list */ + while ((altq = TAILQ_FIRST(pf_altqs_inactive)) != NULL) { + TAILQ_REMOVE(pf_altqs_inactive, altq, entries); + if (altq->qname[0] == 0) { + /* detach and destroy the discipline */ + error = altq_remove(altq); + } else + pf_qid_unref(altq->qid); + pool_put(&pf_altq_pl, altq); + } + if (error) + return (error); + *ticket = ++ticket_altqs_inactive; + altqs_inactive_open = 1; + return (0); +} + +static int +pf_rollback_altq(u_int32_t ticket) +{ + struct pf_altq *altq; + int error = 0; + + if (!altqs_inactive_open || ticket != ticket_altqs_inactive) + return (0); + /* Purge the old altq list */ + while ((altq = TAILQ_FIRST(pf_altqs_inactive)) != NULL) { + TAILQ_REMOVE(pf_altqs_inactive, altq, entries); + if (altq->qname[0] == 0) { + /* detach and destroy the discipline */ + error = altq_remove(altq); + } else + pf_qid_unref(altq->qid); + pool_put(&pf_altq_pl, altq); + } + altqs_inactive_open = 0; + return (error); +} + +static int +pf_commit_altq(u_int32_t ticket) +{ + struct pf_altqqueue *old_altqs; + struct pf_altq *altq; + int s, err, error = 0; + + if (!altqs_inactive_open || ticket != ticket_altqs_inactive) + return (EBUSY); + + /* swap altqs, keep the old. */ + s = splnet(); + old_altqs = pf_altqs_active; + pf_altqs_active = pf_altqs_inactive; + pf_altqs_inactive = old_altqs; + ticket_altqs_active = ticket_altqs_inactive; + + /* Attach new disciplines */ + TAILQ_FOREACH(altq, pf_altqs_active, entries) { + if (altq->qname[0] == 0) { + /* attach the discipline */ + error = altq_pfattach(altq); + if (error == 0 && pf_altq_running) + error = pf_enable_altq(altq); + if (error != 0) { + splx(s); + return (error); + } + } + } + + /* Purge the old altq list */ + while ((altq = TAILQ_FIRST(pf_altqs_inactive)) != NULL) { + TAILQ_REMOVE(pf_altqs_inactive, altq, entries); + if (altq->qname[0] == 0) { + /* detach and destroy the discipline */ + if (pf_altq_running) + error = pf_disable_altq(altq); + err = altq_pfdetach(altq); + if (err != 0 && error == 0) + error = err; + err = altq_remove(altq); + if (err != 0 && error == 0) + error = err; + } else + pf_qid_unref(altq->qid); + pool_put(&pf_altq_pl, altq); + } + splx(s); + + altqs_inactive_open = 0; + return (error); +} + +static int +pf_enable_altq(struct pf_altq *altq) +{ + struct ifnet *ifp; + struct tb_profile tb; + int s, error = 0; + + if ((ifp = ifunit(altq->ifname)) == NULL) + return (EINVAL); + + if (ifp->if_snd.altq_type != ALTQT_NONE) + error = altq_enable(&ifp->if_snd); + + /* set tokenbucket regulator */ + if (error == 0 && ifp != NULL && ALTQ_IS_ENABLED(&ifp->if_snd)) { + tb.rate = altq->ifbandwidth; + tb.depth = altq->tbrsize; + s = splnet(); + error = tbr_set(&ifp->if_snd, &tb); + splx(s); + } + + return (error); +} + +static int +pf_disable_altq(struct pf_altq *altq) +{ + struct ifnet *ifp; + struct tb_profile tb; + int s, error; + + if ((ifp = ifunit(altq->ifname)) == NULL) + return (EINVAL); + + /* + * when the discipline is no longer referenced, it was overridden + * by a new one. if so, just return. + */ + if (altq->altq_disc != ifp->if_snd.altq_disc) + return (0); + + error = altq_disable(&ifp->if_snd); + + if (error == 0) { + /* clear tokenbucket regulator */ + tb.rate = 0; + s = splnet(); + error = tbr_set(&ifp->if_snd, &tb); + splx(s); + } + + return (error); +} +#endif /* ALTQ */ + +static int +pf_begin_rules(u_int32_t *ticket, int rs_num, const char *anchor) +{ + struct pf_ruleset *rs; + struct pf_rule *rule; + + if (rs_num < 0 || rs_num >= PF_RULESET_MAX) + return (EINVAL); + rs = pf_find_or_create_ruleset(anchor); + if (rs == NULL) + return (EINVAL); + while ((rule = TAILQ_FIRST(rs->rules[rs_num].inactive.ptr)) != NULL) { + pf_rm_rule(rs->rules[rs_num].inactive.ptr, rule); + rs->rules[rs_num].inactive.rcount--; + } + *ticket = ++rs->rules[rs_num].inactive.ticket; + rs->rules[rs_num].inactive.open = 1; + return (0); +} + +static int +pf_rollback_rules(u_int32_t ticket, int rs_num, char *anchor) +{ + struct pf_ruleset *rs; + struct pf_rule *rule; + + if (rs_num < 0 || rs_num >= PF_RULESET_MAX) + return (EINVAL); + rs = pf_find_ruleset(anchor); + if (rs == NULL || !rs->rules[rs_num].inactive.open || + rs->rules[rs_num].inactive.ticket != ticket) + return (0); + while ((rule = TAILQ_FIRST(rs->rules[rs_num].inactive.ptr)) != NULL) { + pf_rm_rule(rs->rules[rs_num].inactive.ptr, rule); + rs->rules[rs_num].inactive.rcount--; + } + rs->rules[rs_num].inactive.open = 0; + return (0); +} + +#define PF_MD5_UPD(st, elm) \ + MD5Update(ctx, (u_int8_t *)&(st)->elm, sizeof ((st)->elm)) + +#define PF_MD5_UPD_STR(st, elm) \ + MD5Update(ctx, (u_int8_t *)(st)->elm, strlen((st)->elm)) + +#define PF_MD5_UPD_HTONL(st, elm, stor) do { \ + (stor) = htonl((st)->elm); \ + MD5Update(ctx, (u_int8_t *)&(stor), sizeof (u_int32_t)); \ +} while (0) + +#define PF_MD5_UPD_HTONS(st, elm, stor) do { \ + (stor) = htons((st)->elm); \ + MD5Update(ctx, (u_int8_t *)&(stor), sizeof (u_int16_t)); \ +} while (0) + +#ifndef NO_APPLE_EXTENSIONS +static void +pf_hash_rule_addr(MD5_CTX *ctx, struct pf_rule_addr *pfr, u_int8_t proto) +#else +static void +pf_hash_rule_addr(MD5_CTX *ctx, struct pf_rule_addr *pfr) +#endif +{ + PF_MD5_UPD(pfr, addr.type); + switch (pfr->addr.type) { + case PF_ADDR_DYNIFTL: + PF_MD5_UPD(pfr, addr.v.ifname); + PF_MD5_UPD(pfr, addr.iflags); + break; + case PF_ADDR_TABLE: + PF_MD5_UPD(pfr, addr.v.tblname); + break; + case PF_ADDR_ADDRMASK: + /* XXX ignore af? */ + PF_MD5_UPD(pfr, addr.v.a.addr.addr32); + PF_MD5_UPD(pfr, addr.v.a.mask.addr32); + break; + case PF_ADDR_RTLABEL: + PF_MD5_UPD(pfr, addr.v.rtlabelname); + break; + } + +#ifndef NO_APPLE_EXTENSIONS + switch (proto) { + case IPPROTO_TCP: + case IPPROTO_UDP: + PF_MD5_UPD(pfr, xport.range.port[0]); + PF_MD5_UPD(pfr, xport.range.port[1]); + PF_MD5_UPD(pfr, xport.range.op); + break; + + default: + break; + } + + PF_MD5_UPD(pfr, neg); +#else + PF_MD5_UPD(pfr, port[0]); + PF_MD5_UPD(pfr, port[1]); + PF_MD5_UPD(pfr, neg); + PF_MD5_UPD(pfr, port_op); +#endif +} + +static void +pf_hash_rule(MD5_CTX *ctx, struct pf_rule *rule) +{ + u_int16_t x; + u_int32_t y; + +#ifndef NO_APPLE_EXTENSIONS + pf_hash_rule_addr(ctx, &rule->src, rule->proto); + pf_hash_rule_addr(ctx, &rule->dst, rule->proto); +#else + pf_hash_rule_addr(ctx, &rule->src); + pf_hash_rule_addr(ctx, &rule->dst); +#endif + PF_MD5_UPD_STR(rule, label); + PF_MD5_UPD_STR(rule, ifname); + PF_MD5_UPD_STR(rule, match_tagname); + PF_MD5_UPD_HTONS(rule, match_tag, x); /* dup? */ + PF_MD5_UPD_HTONL(rule, os_fingerprint, y); + PF_MD5_UPD_HTONL(rule, prob, y); + PF_MD5_UPD_HTONL(rule, uid.uid[0], y); + PF_MD5_UPD_HTONL(rule, uid.uid[1], y); + PF_MD5_UPD(rule, uid.op); + PF_MD5_UPD_HTONL(rule, gid.gid[0], y); + PF_MD5_UPD_HTONL(rule, gid.gid[1], y); + PF_MD5_UPD(rule, gid.op); + PF_MD5_UPD_HTONL(rule, rule_flag, y); + PF_MD5_UPD(rule, action); + PF_MD5_UPD(rule, direction); + PF_MD5_UPD(rule, af); + PF_MD5_UPD(rule, quick); + PF_MD5_UPD(rule, ifnot); + PF_MD5_UPD(rule, match_tag_not); + PF_MD5_UPD(rule, natpass); + PF_MD5_UPD(rule, keep_state); + PF_MD5_UPD(rule, proto); + PF_MD5_UPD(rule, type); + PF_MD5_UPD(rule, code); + PF_MD5_UPD(rule, flags); + PF_MD5_UPD(rule, flagset); + PF_MD5_UPD(rule, allow_opts); + PF_MD5_UPD(rule, rt); + PF_MD5_UPD(rule, tos); +} + +static int +pf_commit_rules(u_int32_t ticket, int rs_num, char *anchor) +{ + struct pf_ruleset *rs; + struct pf_rule *rule, **old_array; + struct pf_rulequeue *old_rules; + int error; + u_int32_t old_rcount; + + lck_mtx_assert(pf_lock, LCK_MTX_ASSERT_OWNED); + + if (rs_num < 0 || rs_num >= PF_RULESET_MAX) + return (EINVAL); + rs = pf_find_ruleset(anchor); + if (rs == NULL || !rs->rules[rs_num].inactive.open || + ticket != rs->rules[rs_num].inactive.ticket) + return (EBUSY); + + /* Calculate checksum for the main ruleset */ + if (rs == &pf_main_ruleset) { + error = pf_setup_pfsync_matching(rs); + if (error != 0) + return (error); + } + + /* Swap rules, keep the old. */ + old_rules = rs->rules[rs_num].active.ptr; + old_rcount = rs->rules[rs_num].active.rcount; + old_array = rs->rules[rs_num].active.ptr_array; + + rs->rules[rs_num].active.ptr = + rs->rules[rs_num].inactive.ptr; + rs->rules[rs_num].active.ptr_array = + rs->rules[rs_num].inactive.ptr_array; + rs->rules[rs_num].active.rcount = + rs->rules[rs_num].inactive.rcount; + rs->rules[rs_num].inactive.ptr = old_rules; + rs->rules[rs_num].inactive.ptr_array = old_array; + rs->rules[rs_num].inactive.rcount = old_rcount; + + rs->rules[rs_num].active.ticket = + rs->rules[rs_num].inactive.ticket; + pf_calc_skip_steps(rs->rules[rs_num].active.ptr); + + + /* Purge the old rule list. */ + while ((rule = TAILQ_FIRST(old_rules)) != NULL) + pf_rm_rule(old_rules, rule); + if (rs->rules[rs_num].inactive.ptr_array) + _FREE(rs->rules[rs_num].inactive.ptr_array, M_TEMP); + rs->rules[rs_num].inactive.ptr_array = NULL; + rs->rules[rs_num].inactive.rcount = 0; + rs->rules[rs_num].inactive.open = 0; + pf_remove_if_empty_ruleset(rs); + return (0); +} + +static void +pf_state_export(struct pfsync_state *sp, struct pf_state_key *sk, + struct pf_state *s) +{ + uint64_t secs = pf_time_second(); + bzero(sp, sizeof (struct pfsync_state)); + + /* copy from state key */ +#ifndef NO_APPLE_EXTENSIONS + sp->lan.addr = sk->lan.addr; + sp->lan.xport = sk->lan.xport; + sp->gwy.addr = sk->gwy.addr; + sp->gwy.xport = sk->gwy.xport; + sp->ext.addr = sk->ext.addr; + sp->ext.xport = sk->ext.xport; + sp->proto_variant = sk->proto_variant; + sp->tag = s->tag; +#else + sp->lan.addr = sk->lan.addr; + sp->lan.port = sk->lan.port; + sp->gwy.addr = sk->gwy.addr; + sp->gwy.port = sk->gwy.port; + sp->ext.addr = sk->ext.addr; + sp->ext.port = sk->ext.port; +#endif + sp->proto = sk->proto; + sp->af = sk->af; + sp->direction = sk->direction; + + /* copy from state */ + memcpy(&sp->id, &s->id, sizeof (sp->id)); + sp->creatorid = s->creatorid; + strlcpy(sp->ifname, s->kif->pfik_name, sizeof (sp->ifname)); + pf_state_peer_to_pfsync(&s->src, &sp->src); + pf_state_peer_to_pfsync(&s->dst, &sp->dst); + + sp->rule = s->rule.ptr->nr; + sp->nat_rule = (s->nat_rule.ptr == NULL) ? + (unsigned)-1 : s->nat_rule.ptr->nr; + sp->anchor = (s->anchor.ptr == NULL) ? + (unsigned)-1 : s->anchor.ptr->nr; + + pf_state_counter_to_pfsync(s->bytes[0], sp->bytes[0]); + pf_state_counter_to_pfsync(s->bytes[1], sp->bytes[1]); + pf_state_counter_to_pfsync(s->packets[0], sp->packets[0]); + pf_state_counter_to_pfsync(s->packets[1], sp->packets[1]); + sp->creation = secs - s->creation; + sp->expire = pf_state_expires(s); + sp->log = s->log; + sp->allow_opts = s->allow_opts; + sp->timeout = s->timeout; + + if (s->src_node) + sp->sync_flags |= PFSYNC_FLAG_SRCNODE; + if (s->nat_src_node) + sp->sync_flags |= PFSYNC_FLAG_NATSRCNODE; + + if (sp->expire > secs) + sp->expire -= secs; + else + sp->expire = 0; + +} + +static void +pf_state_import(struct pfsync_state *sp, struct pf_state_key *sk, + struct pf_state *s) +{ + /* copy to state key */ +#ifndef NO_APPLE_EXTENSIONS + sk->lan.addr = sp->lan.addr; + sk->lan.xport = sp->lan.xport; + sk->gwy.addr = sp->gwy.addr; + sk->gwy.xport = sp->gwy.xport; + sk->ext.addr = sp->ext.addr; + sk->ext.xport = sp->ext.xport; + sk->proto_variant = sp->proto_variant; + s->tag = sp->tag; +#else + sk->lan.addr = sp->lan.addr; + sk->lan.port = sp->lan.port; + sk->gwy.addr = sp->gwy.addr; + sk->gwy.port = sp->gwy.port; + sk->ext.addr = sp->ext.addr; + sk->ext.port = sp->ext.port; +#endif + sk->proto = sp->proto; + sk->af = sp->af; + sk->direction = sp->direction; + + /* copy to state */ + memcpy(&s->id, &sp->id, sizeof (sp->id)); + s->creatorid = sp->creatorid; + pf_state_peer_from_pfsync(&sp->src, &s->src); + pf_state_peer_from_pfsync(&sp->dst, &s->dst); + + s->rule.ptr = &pf_default_rule; + s->nat_rule.ptr = NULL; + s->anchor.ptr = NULL; + s->rt_kif = NULL; + s->creation = pf_time_second(); + s->expire = pf_time_second(); + if (sp->expire > 0) + s->expire -= pf_default_rule.timeout[sp->timeout] - sp->expire; + s->pfsync_time = 0; + s->packets[0] = s->packets[1] = 0; + s->bytes[0] = s->bytes[1] = 0; +} + +static int +pf_setup_pfsync_matching(struct pf_ruleset *rs) +{ + MD5_CTX ctx; + struct pf_rule *rule; + int rs_cnt; + u_int8_t digest[PF_MD5_DIGEST_LENGTH]; + + MD5Init(&ctx); + for (rs_cnt = 0; rs_cnt < PF_RULESET_MAX; rs_cnt++) { + /* XXX PF_RULESET_SCRUB as well? */ + if (rs_cnt == PF_RULESET_SCRUB) + continue; + + if (rs->rules[rs_cnt].inactive.ptr_array) + _FREE(rs->rules[rs_cnt].inactive.ptr_array, M_TEMP); + rs->rules[rs_cnt].inactive.ptr_array = NULL; + + if (rs->rules[rs_cnt].inactive.rcount) { + rs->rules[rs_cnt].inactive.ptr_array = + _MALLOC(sizeof (caddr_t) * + rs->rules[rs_cnt].inactive.rcount, + M_TEMP, M_WAITOK); + + if (!rs->rules[rs_cnt].inactive.ptr_array) + return (ENOMEM); + } + + TAILQ_FOREACH(rule, rs->rules[rs_cnt].inactive.ptr, + entries) { + pf_hash_rule(&ctx, rule); + (rs->rules[rs_cnt].inactive.ptr_array)[rule->nr] = rule; + } + } + + MD5Final(digest, &ctx); + memcpy(pf_status.pf_chksum, digest, sizeof (pf_status.pf_chksum)); + return (0); +} + +static int +pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p) +{ +#pragma unused(dev) + struct pf_pooladdr *pa = NULL; + struct pf_pool *pool = NULL; + int error = 0; + + if (kauth_cred_issuser(kauth_cred_get()) == 0) + return (EPERM); + + /* XXX keep in sync with switch() below */ + if (securelevel > 1) + switch (cmd) { + case DIOCGETRULES: + case DIOCGETRULE: + case DIOCGETADDRS: + case DIOCGETADDR: + case DIOCGETSTATE: + case DIOCSETSTATUSIF: + case DIOCGETSTATUS: + case DIOCCLRSTATUS: + case DIOCNATLOOK: + case DIOCSETDEBUG: + case DIOCGETSTATES: + case DIOCGETTIMEOUT: + case DIOCCLRRULECTRS: + case DIOCGETLIMIT: + case DIOCGETALTQS: + case DIOCGETALTQ: + case DIOCGETQSTATS: + case DIOCGETRULESETS: + case DIOCGETRULESET: + case DIOCRGETTABLES: + case DIOCRGETTSTATS: + case DIOCRCLRTSTATS: + case DIOCRCLRADDRS: + case DIOCRADDADDRS: + case DIOCRDELADDRS: + case DIOCRSETADDRS: + case DIOCRGETADDRS: + case DIOCRGETASTATS: + case DIOCRCLRASTATS: + case DIOCRTSTADDRS: + case DIOCOSFPGET: + case DIOCGETSRCNODES: + case DIOCCLRSRCNODES: + case DIOCIGETIFACES: + case DIOCSETIFFLAG: + case DIOCCLRIFFLAG: + break; + case DIOCRCLRTABLES: + case DIOCRADDTABLES: + case DIOCRDELTABLES: + case DIOCRSETTFLAGS: + if (((struct pfioc_table *)addr)->pfrio_flags & + PFR_FLAG_DUMMY) + break; /* dummy operation ok */ + return (EPERM); + default: + return (EPERM); + } + + if (!(flags & FWRITE)) + switch (cmd) { + case DIOCSTART: + case DIOCSTOP: + case DIOCGETRULES: + case DIOCGETADDRS: + case DIOCGETADDR: + case DIOCGETSTATE: + case DIOCGETSTATUS: + case DIOCGETSTATES: + case DIOCGETTIMEOUT: + case DIOCGETLIMIT: + case DIOCGETALTQS: + case DIOCGETALTQ: + case DIOCGETQSTATS: + case DIOCGETRULESETS: + case DIOCGETRULESET: + case DIOCNATLOOK: + case DIOCRGETTABLES: + case DIOCRGETTSTATS: + case DIOCRGETADDRS: + case DIOCRGETASTATS: + case DIOCRTSTADDRS: + case DIOCOSFPGET: + case DIOCGETSRCNODES: + case DIOCIGETIFACES: + break; + case DIOCRCLRTABLES: + case DIOCRADDTABLES: + case DIOCRDELTABLES: + case DIOCRCLRTSTATS: + case DIOCRCLRADDRS: + case DIOCRADDADDRS: + case DIOCRDELADDRS: + case DIOCRSETADDRS: + case DIOCRSETTFLAGS: + if (((struct pfioc_table *)addr)->pfrio_flags & + PFR_FLAG_DUMMY) { + flags |= FWRITE; /* need write lock for dummy */ + break; /* dummy operation ok */ + } + return (EACCES); + case DIOCGETRULE: + if (((struct pfioc_rule *)addr)->action == + PF_GET_CLR_CNTR) + return (EACCES); + break; + default: + return (EACCES); + } + + if (flags & FWRITE) + lck_rw_lock_exclusive(pf_perim_lock); + else + lck_rw_lock_shared(pf_perim_lock); + + lck_mtx_lock(pf_lock); + + switch (cmd) { + + case DIOCSTART: + if (pf_status.running) { + error = EEXIST; + } else if (pf_purge_thread == NULL) { + error = ENOMEM; + } else { + pf_status.running = 1; + pf_status.since = pf_time_second(); + if (pf_status.stateid == 0) { + pf_status.stateid = pf_time_second(); + pf_status.stateid = pf_status.stateid << 32; + } + mbuf_growth_aggressive(); + pf_attach_hooks(); + wakeup(pf_purge_thread_fn); + DPFPRINTF(PF_DEBUG_MISC, ("pf: started\n")); + } + break; + + case DIOCSTOP: + if (!pf_status.running) { + error = ENOENT; + } else { + mbuf_growth_normal(); + pf_detach_hooks(); + pf_status.running = 0; + pf_status.since = pf_time_second(); + wakeup(pf_purge_thread_fn); + DPFPRINTF(PF_DEBUG_MISC, ("pf: stopped\n")); + } + break; + + case DIOCADDRULE: { + struct pfioc_rule *pr = (struct pfioc_rule *)addr; + struct pf_ruleset *ruleset; + struct pf_rule *rule, *tail; + struct pf_pooladdr *apa; + int rs_num; + + pr->anchor[sizeof (pr->anchor) - 1] = 0; + ruleset = pf_find_ruleset(pr->anchor); + if (ruleset == NULL) { + error = EINVAL; + break; + } + rs_num = pf_get_ruleset_number(pr->rule.action); + if (rs_num >= PF_RULESET_MAX) { + error = EINVAL; + break; + } + if (pr->rule.return_icmp >> 8 > ICMP_MAXTYPE) { + error = EINVAL; + break; + } + if (pr->ticket != ruleset->rules[rs_num].inactive.ticket) { + error = EBUSY; + break; + } + if (pr->pool_ticket != ticket_pabuf) { + error = EBUSY; + break; + } + rule = pool_get(&pf_rule_pl, PR_WAITOK); + if (rule == NULL) { + error = ENOMEM; + break; + } + bcopy(&pr->rule, rule, sizeof (struct pf_rule)); + rule->cuid = kauth_cred_getuid(p->p_ucred); + rule->cpid = p->p_pid; + rule->anchor = NULL; + rule->kif = NULL; + TAILQ_INIT(&rule->rpool.list); + /* initialize refcounting */ + rule->states = 0; + rule->src_nodes = 0; + rule->entries.tqe_prev = NULL; +#if !INET + if (rule->af == AF_INET) { + pool_put(&pf_rule_pl, rule); + error = EAFNOSUPPORT; + break; + } +#endif /* INET */ +#if !INET6 + if (rule->af == AF_INET6) { + pool_put(&pf_rule_pl, rule); + error = EAFNOSUPPORT; + break; + } +#endif /* INET6 */ + tail = TAILQ_LAST(ruleset->rules[rs_num].inactive.ptr, + pf_rulequeue); + if (tail) + rule->nr = tail->nr + 1; + else + rule->nr = 0; + if (rule->ifname[0]) { + rule->kif = pfi_kif_get(rule->ifname); + if (rule->kif == NULL) { + pool_put(&pf_rule_pl, rule); + error = EINVAL; + break; + } + pfi_kif_ref(rule->kif, PFI_KIF_REF_RULE); + } + +#if ALTQ + /* set queue IDs */ + if (rule->qname[0] != 0) { + if ((rule->qid = pf_qname2qid(rule->qname)) == 0) + error = EBUSY; + else if (rule->pqname[0] != 0) { + if ((rule->pqid = + pf_qname2qid(rule->pqname)) == 0) + error = EBUSY; + } else + rule->pqid = rule->qid; + } +#endif /* ALTQ */ + if (rule->tagname[0]) + if ((rule->tag = pf_tagname2tag(rule->tagname)) == 0) + error = EBUSY; + if (rule->match_tagname[0]) + if ((rule->match_tag = + pf_tagname2tag(rule->match_tagname)) == 0) + error = EBUSY; + if (rule->rt && !rule->direction) + error = EINVAL; +#if PFLOG + if (!rule->log) + rule->logif = 0; + if (rule->logif >= PFLOGIFS_MAX) + error = EINVAL; +#endif /* PFLOG */ + if (pf_rtlabel_add(&rule->src.addr) || + pf_rtlabel_add(&rule->dst.addr)) + error = EBUSY; + if (pfi_dynaddr_setup(&rule->src.addr, rule->af)) + error = EINVAL; + if (pfi_dynaddr_setup(&rule->dst.addr, rule->af)) + error = EINVAL; + if (pf_tbladdr_setup(ruleset, &rule->src.addr)) + error = EINVAL; + if (pf_tbladdr_setup(ruleset, &rule->dst.addr)) + error = EINVAL; + if (pf_anchor_setup(rule, ruleset, pr->anchor_call)) + error = EINVAL; + TAILQ_FOREACH(apa, &pf_pabuf, entries) + if (pf_tbladdr_setup(ruleset, &apa->addr)) + error = EINVAL; + + if (rule->overload_tblname[0]) { + if ((rule->overload_tbl = pfr_attach_table(ruleset, + rule->overload_tblname)) == NULL) + error = EINVAL; + else + rule->overload_tbl->pfrkt_flags |= + PFR_TFLAG_ACTIVE; + } + + pf_mv_pool(&pf_pabuf, &rule->rpool.list); + if (((((rule->action == PF_NAT) || (rule->action == PF_RDR) || + (rule->action == PF_BINAT)) && rule->anchor == NULL) || + (rule->rt > PF_FASTROUTE)) && + (TAILQ_FIRST(&rule->rpool.list) == NULL)) + error = EINVAL; + + if (error) { + pf_rm_rule(NULL, rule); + break; + } + rule->rpool.cur = TAILQ_FIRST(&rule->rpool.list); + rule->evaluations = rule->packets[0] = rule->packets[1] = + rule->bytes[0] = rule->bytes[1] = 0; + TAILQ_INSERT_TAIL(ruleset->rules[rs_num].inactive.ptr, + rule, entries); + ruleset->rules[rs_num].inactive.rcount++; + break; + } + + case DIOCGETRULES: { + struct pfioc_rule *pr = (struct pfioc_rule *)addr; + struct pf_ruleset *ruleset; + struct pf_rule *tail; + int rs_num; + + pr->anchor[sizeof (pr->anchor) - 1] = 0; + ruleset = pf_find_ruleset(pr->anchor); + if (ruleset == NULL) { + error = EINVAL; + break; + } + rs_num = pf_get_ruleset_number(pr->rule.action); + if (rs_num >= PF_RULESET_MAX) { + error = EINVAL; + break; + } + tail = TAILQ_LAST(ruleset->rules[rs_num].active.ptr, + pf_rulequeue); + if (tail) + pr->nr = tail->nr + 1; + else + pr->nr = 0; + pr->ticket = ruleset->rules[rs_num].active.ticket; + break; + } + + case DIOCGETRULE: { + struct pfioc_rule *pr = (struct pfioc_rule *)addr; + struct pf_ruleset *ruleset; + struct pf_rule *rule; + int rs_num, i; + + pr->anchor[sizeof (pr->anchor) - 1] = 0; + ruleset = pf_find_ruleset(pr->anchor); + if (ruleset == NULL) { + error = EINVAL; + break; + } + rs_num = pf_get_ruleset_number(pr->rule.action); + if (rs_num >= PF_RULESET_MAX) { + error = EINVAL; + break; + } + if (pr->ticket != ruleset->rules[rs_num].active.ticket) { + error = EBUSY; + break; + } + rule = TAILQ_FIRST(ruleset->rules[rs_num].active.ptr); + while ((rule != NULL) && (rule->nr != pr->nr)) + rule = TAILQ_NEXT(rule, entries); + if (rule == NULL) { + error = EBUSY; + break; + } + bcopy(rule, &pr->rule, sizeof (struct pf_rule)); + if (pf_anchor_copyout(ruleset, rule, pr)) { + error = EBUSY; + break; + } + pfi_dynaddr_copyout(&pr->rule.src.addr); + pfi_dynaddr_copyout(&pr->rule.dst.addr); + pf_tbladdr_copyout(&pr->rule.src.addr); + pf_tbladdr_copyout(&pr->rule.dst.addr); + pf_rtlabel_copyout(&pr->rule.src.addr); + pf_rtlabel_copyout(&pr->rule.dst.addr); + for (i = 0; i < PF_SKIP_COUNT; ++i) + if (rule->skip[i].ptr == NULL) + pr->rule.skip[i].nr = -1; + else + pr->rule.skip[i].nr = + rule->skip[i].ptr->nr; + + if (pr->action == PF_GET_CLR_CNTR) { + rule->evaluations = 0; + rule->packets[0] = rule->packets[1] = 0; + rule->bytes[0] = rule->bytes[1] = 0; + } + break; + } + + case DIOCCHANGERULE: { + struct pfioc_rule *pcr = (struct pfioc_rule *)addr; + struct pf_ruleset *ruleset; + struct pf_rule *oldrule = NULL, *newrule = NULL; + u_int32_t nr = 0; + int rs_num; + + if (!(pcr->action == PF_CHANGE_REMOVE || + pcr->action == PF_CHANGE_GET_TICKET) && + pcr->pool_ticket != ticket_pabuf) { + error = EBUSY; + break; + } + + if (pcr->action < PF_CHANGE_ADD_HEAD || + pcr->action > PF_CHANGE_GET_TICKET) { + error = EINVAL; + break; + } + ruleset = pf_find_ruleset(pcr->anchor); + if (ruleset == NULL) { + error = EINVAL; + break; + } + rs_num = pf_get_ruleset_number(pcr->rule.action); + if (rs_num >= PF_RULESET_MAX) { + error = EINVAL; + break; + } + + if (pcr->action == PF_CHANGE_GET_TICKET) { + pcr->ticket = ++ruleset->rules[rs_num].active.ticket; + break; + } else { + if (pcr->ticket != + ruleset->rules[rs_num].active.ticket) { + error = EINVAL; + break; + } + if (pcr->rule.return_icmp >> 8 > ICMP_MAXTYPE) { + error = EINVAL; + break; + } + } + + if (pcr->action != PF_CHANGE_REMOVE) { + newrule = pool_get(&pf_rule_pl, PR_WAITOK); + if (newrule == NULL) { + error = ENOMEM; + break; + } + bcopy(&pcr->rule, newrule, sizeof (struct pf_rule)); + newrule->cuid = kauth_cred_getuid(p->p_ucred); + newrule->cpid = p->p_pid; + TAILQ_INIT(&newrule->rpool.list); + /* initialize refcounting */ + newrule->states = 0; + newrule->entries.tqe_prev = NULL; +#if !INET + if (newrule->af == AF_INET) { + pool_put(&pf_rule_pl, newrule); + error = EAFNOSUPPORT; + break; + } +#endif /* INET */ +#if !INET6 + if (newrule->af == AF_INET6) { + pool_put(&pf_rule_pl, newrule); + error = EAFNOSUPPORT; + break; + } +#endif /* INET6 */ + if (newrule->ifname[0]) { + newrule->kif = pfi_kif_get(newrule->ifname); + if (newrule->kif == NULL) { + pool_put(&pf_rule_pl, newrule); + error = EINVAL; + break; + } + pfi_kif_ref(newrule->kif, PFI_KIF_REF_RULE); + } else + newrule->kif = NULL; + +#if ALTQ + /* set queue IDs */ + if (newrule->qname[0] != 0) { + if ((newrule->qid = + pf_qname2qid(newrule->qname)) == 0) + error = EBUSY; + else if (newrule->pqname[0] != 0) { + if ((newrule->pqid = + pf_qname2qid(newrule->pqname)) == 0) + error = EBUSY; + } else + newrule->pqid = newrule->qid; + } +#endif /* ALTQ */ + if (newrule->tagname[0]) + if ((newrule->tag = + pf_tagname2tag(newrule->tagname)) == 0) + error = EBUSY; + if (newrule->match_tagname[0]) + if ((newrule->match_tag = pf_tagname2tag( + newrule->match_tagname)) == 0) + error = EBUSY; + if (newrule->rt && !newrule->direction) + error = EINVAL; +#if PFLOG + if (!newrule->log) + newrule->logif = 0; + if (newrule->logif >= PFLOGIFS_MAX) + error = EINVAL; +#endif /* PFLOG */ + if (pf_rtlabel_add(&newrule->src.addr) || + pf_rtlabel_add(&newrule->dst.addr)) + error = EBUSY; + if (pfi_dynaddr_setup(&newrule->src.addr, newrule->af)) + error = EINVAL; + if (pfi_dynaddr_setup(&newrule->dst.addr, newrule->af)) + error = EINVAL; + if (pf_tbladdr_setup(ruleset, &newrule->src.addr)) + error = EINVAL; + if (pf_tbladdr_setup(ruleset, &newrule->dst.addr)) + error = EINVAL; + if (pf_anchor_setup(newrule, ruleset, pcr->anchor_call)) + error = EINVAL; + TAILQ_FOREACH(pa, &pf_pabuf, entries) + if (pf_tbladdr_setup(ruleset, &pa->addr)) + error = EINVAL; + + if (newrule->overload_tblname[0]) { + if ((newrule->overload_tbl = pfr_attach_table( + ruleset, newrule->overload_tblname)) == + NULL) + error = EINVAL; + else + newrule->overload_tbl->pfrkt_flags |= + PFR_TFLAG_ACTIVE; + } + + pf_mv_pool(&pf_pabuf, &newrule->rpool.list); + if (((((newrule->action == PF_NAT) || + (newrule->action == PF_RDR) || + (newrule->action == PF_BINAT) || + (newrule->rt > PF_FASTROUTE)) && + !newrule->anchor)) && + (TAILQ_FIRST(&newrule->rpool.list) == NULL)) + error = EINVAL; + + if (error) { + pf_rm_rule(NULL, newrule); + break; + } + newrule->rpool.cur = TAILQ_FIRST(&newrule->rpool.list); + newrule->evaluations = 0; + newrule->packets[0] = newrule->packets[1] = 0; + newrule->bytes[0] = newrule->bytes[1] = 0; + } + pf_empty_pool(&pf_pabuf); + + if (pcr->action == PF_CHANGE_ADD_HEAD) + oldrule = TAILQ_FIRST( + ruleset->rules[rs_num].active.ptr); + else if (pcr->action == PF_CHANGE_ADD_TAIL) + oldrule = TAILQ_LAST( + ruleset->rules[rs_num].active.ptr, pf_rulequeue); + else { + oldrule = TAILQ_FIRST( + ruleset->rules[rs_num].active.ptr); + while ((oldrule != NULL) && (oldrule->nr != pcr->nr)) + oldrule = TAILQ_NEXT(oldrule, entries); + if (oldrule == NULL) { + if (newrule != NULL) + pf_rm_rule(NULL, newrule); + error = EINVAL; + break; + } + } + + if (pcr->action == PF_CHANGE_REMOVE) { + pf_rm_rule(ruleset->rules[rs_num].active.ptr, oldrule); + ruleset->rules[rs_num].active.rcount--; + } else { + if (oldrule == NULL) + TAILQ_INSERT_TAIL( + ruleset->rules[rs_num].active.ptr, + newrule, entries); + else if (pcr->action == PF_CHANGE_ADD_HEAD || + pcr->action == PF_CHANGE_ADD_BEFORE) + TAILQ_INSERT_BEFORE(oldrule, newrule, entries); + else + TAILQ_INSERT_AFTER( + ruleset->rules[rs_num].active.ptr, + oldrule, newrule, entries); + ruleset->rules[rs_num].active.rcount++; + } + + nr = 0; + TAILQ_FOREACH(oldrule, + ruleset->rules[rs_num].active.ptr, entries) + oldrule->nr = nr++; + + ruleset->rules[rs_num].active.ticket++; + + pf_calc_skip_steps(ruleset->rules[rs_num].active.ptr); + pf_remove_if_empty_ruleset(ruleset); + + break; + } + + case DIOCCLRSTATES: { + struct pf_state *s, *nexts; + struct pfioc_state_kill *psk = (struct pfioc_state_kill *)addr; + int killed = 0; + + for (s = RB_MIN(pf_state_tree_id, &tree_id); s; s = nexts) { + nexts = RB_NEXT(pf_state_tree_id, &tree_id, s); + + if (!psk->psk_ifname[0] || strcmp(psk->psk_ifname, + s->kif->pfik_name) == 0) { +#if NPFSYNC + /* don't send out individual delete messages */ + s->sync_flags = PFSTATE_NOSYNC; +#endif + pf_unlink_state(s); + killed++; + } + } + psk->psk_af = killed; +#if NPFSYNC + pfsync_clear_states(pf_status.hostid, psk->psk_ifname); +#endif + break; + } + + case DIOCKILLSTATES: { + struct pf_state *s, *nexts; + struct pf_state_key *sk; + struct pf_state_host *src, *dst; + struct pfioc_state_kill *psk = (struct pfioc_state_kill *)addr; + int killed = 0; + + for (s = RB_MIN(pf_state_tree_id, &tree_id); s; + s = nexts) { + nexts = RB_NEXT(pf_state_tree_id, &tree_id, s); + sk = s->state_key; + + if (sk->direction == PF_OUT) { + src = &sk->lan; + dst = &sk->ext; + } else { + src = &sk->ext; + dst = &sk->lan; + } + if ((!psk->psk_af || sk->af == psk->psk_af) && + (!psk->psk_proto || psk->psk_proto == sk->proto) && + PF_MATCHA(psk->psk_src.neg, + &psk->psk_src.addr.v.a.addr, + &psk->psk_src.addr.v.a.mask, + &src->addr, sk->af) && + PF_MATCHA(psk->psk_dst.neg, + &psk->psk_dst.addr.v.a.addr, + &psk->psk_dst.addr.v.a.mask, + &dst->addr, sk->af) && +#ifndef NO_APPLE_EXTENSIONS + (pf_match_xport(psk->psk_proto, + psk->psk_proto_variant, &psk->psk_src.xport, + &src->xport)) && + (pf_match_xport(psk->psk_proto, + psk->psk_proto_variant, &psk->psk_dst.xport, + &dst->xport)) && +#else + (psk->psk_src.port_op == 0 || + pf_match_port(psk->psk_src.port_op, + psk->psk_src.port[0], psk->psk_src.port[1], + src->port)) && + (psk->psk_dst.port_op == 0 || + pf_match_port(psk->psk_dst.port_op, + psk->psk_dst.port[0], psk->psk_dst.port[1], + dst->port)) && +#endif + (!psk->psk_ifname[0] || strcmp(psk->psk_ifname, + s->kif->pfik_name) == 0)) { +#if NPFSYNC + /* send immediate delete of state */ + pfsync_delete_state(s); + s->sync_flags |= PFSTATE_NOSYNC; +#endif + pf_unlink_state(s); + killed++; + } + } + psk->psk_af = killed; + break; + } + + case DIOCADDSTATE: { + struct pfioc_state *ps = (struct pfioc_state *)addr; + struct pfsync_state *sp = &ps->state; + struct pf_state *s; + struct pf_state_key *sk; + struct pfi_kif *kif; + + if (sp->timeout >= PFTM_MAX && + sp->timeout != PFTM_UNTIL_PACKET) { + error = EINVAL; + break; + } + s = pool_get(&pf_state_pl, PR_WAITOK); + if (s == NULL) { + error = ENOMEM; + break; + } + bzero(s, sizeof (struct pf_state)); + if ((sk = pf_alloc_state_key(s)) == NULL) { + pool_put(&pf_state_pl, s); + error = ENOMEM; + break; + } + pf_state_import(sp, sk, s); + kif = pfi_kif_get(sp->ifname); + if (kif == NULL) { + pool_put(&pf_state_pl, s); + pool_put(&pf_state_key_pl, sk); + error = ENOENT; + break; + } +#ifndef NO_APPLE_EXTENSIONS + TAILQ_INIT(&s->unlink_hooks); + s->state_key->app_state = 0; +#endif + if (pf_insert_state(kif, s)) { + pfi_kif_unref(kif, PFI_KIF_REF_NONE); + pool_put(&pf_state_pl, s); + error = EEXIST; + break; + } + pf_default_rule.states++; + break; + } + + case DIOCGETSTATE: { + struct pfioc_state *ps = (struct pfioc_state *)addr; + struct pf_state *s; + struct pf_state_cmp id_key; + + bcopy(ps->state.id, &id_key.id, sizeof (id_key.id)); + id_key.creatorid = ps->state.creatorid; + + s = pf_find_state_byid(&id_key); + if (s == NULL) { + error = ENOENT; + break; + } + + pf_state_export(&ps->state, s->state_key, s); + break; + } + + case DIOCGETSTATES: { + struct pfioc_states *ps = (struct pfioc_states *)addr; + struct pf_state *state; + struct pfsync_state *y, *pstore; + u_int32_t nr = 0; + + if (ps->ps_len == 0) { + nr = pf_status.states; + ps->ps_len = sizeof (struct pfsync_state) * nr; + break; + } + + pstore = _MALLOC(sizeof (*pstore), M_TEMP, M_WAITOK); + + y = ps->ps_states; + + state = TAILQ_FIRST(&state_list); + while (state) { + if (state->timeout != PFTM_UNLINKED) { + if ((nr+1) * sizeof (*y) > (unsigned)ps->ps_len) + break; + + pf_state_export(pstore, + state->state_key, state); + error = copyout(pstore, CAST_USER_ADDR_T(y), + sizeof (*y)); + if (error) { + _FREE(pstore, M_TEMP); + goto fail; + } + y++; + nr++; + } + state = TAILQ_NEXT(state, entry_list); + } + + ps->ps_len = sizeof (struct pfsync_state) * nr; + + _FREE(pstore, M_TEMP); + break; + } + + case DIOCGETSTATUS: { + struct pf_status *s = (struct pf_status *)addr; + bcopy(&pf_status, s, sizeof (struct pf_status)); + pfi_update_status(s->ifname, s); + break; + } + + case DIOCSETSTATUSIF: { + struct pfioc_if *pi = (struct pfioc_if *)addr; + + if (pi->ifname[0] == 0) { + bzero(pf_status.ifname, IFNAMSIZ); + break; + } + strlcpy(pf_status.ifname, pi->ifname, IFNAMSIZ); + break; + } + + case DIOCCLRSTATUS: { + bzero(pf_status.counters, sizeof (pf_status.counters)); + bzero(pf_status.fcounters, sizeof (pf_status.fcounters)); + bzero(pf_status.scounters, sizeof (pf_status.scounters)); + pf_status.since = pf_time_second(); + if (*pf_status.ifname) + pfi_update_status(pf_status.ifname, NULL); + break; + } + + case DIOCNATLOOK: { + struct pfioc_natlook *pnl = (struct pfioc_natlook *)addr; + struct pf_state_key *sk; + struct pf_state *state; + struct pf_state_key_cmp key; + int m = 0, direction = pnl->direction; + + key.af = pnl->af; + key.proto = pnl->proto; + +#ifndef NO_APPLE_EXTENSIONS + key.proto_variant = pnl->proto_variant; +#endif + + if (!pnl->proto || + PF_AZERO(&pnl->saddr, pnl->af) || + PF_AZERO(&pnl->daddr, pnl->af) || + ((pnl->proto == IPPROTO_TCP || + pnl->proto == IPPROTO_UDP) && +#ifndef NO_APPLE_EXTENSIONS + (!pnl->dxport.port || !pnl->sxport.port))) +#else + (!pnl->dport || !pnl->sport))) +#endif + error = EINVAL; + else { + /* + * userland gives us source and dest of connection, + * reverse the lookup so we ask for what happens with + * the return traffic, enabling us to find it in the + * state tree. + */ + if (direction == PF_IN) { + PF_ACPY(&key.ext.addr, &pnl->daddr, pnl->af); +#ifndef NO_APPLE_EXTENSIONS + memcpy(&key.ext.xport, &pnl->dxport, + sizeof (key.ext.xport)); +#else + key.ext.port = pnl->dport; +#endif + PF_ACPY(&key.gwy.addr, &pnl->saddr, pnl->af); +#ifndef NO_APPLE_EXTENSIONS + memcpy(&key.gwy.xport, &pnl->sxport, + sizeof (key.gwy.xport)); +#else + key.gwy.port = pnl->sport; +#endif + state = pf_find_state_all(&key, PF_IN, &m); + } else { + PF_ACPY(&key.lan.addr, &pnl->daddr, pnl->af); +#ifndef NO_APPLE_EXTENSIONS + memcpy(&key.lan.xport, &pnl->dxport, + sizeof (key.lan.xport)); +#else + key.lan.port = pnl->dport; +#endif + PF_ACPY(&key.ext.addr, &pnl->saddr, pnl->af); +#ifndef NO_APPLE_EXTENSIONS + memcpy(&key.ext.xport, &pnl->sxport, + sizeof (key.ext.xport)); +#else + key.ext.port = pnl->sport; +#endif + state = pf_find_state_all(&key, PF_OUT, &m); + } + if (m > 1) + error = E2BIG; /* more than one state */ + else if (state != NULL) { + sk = state->state_key; + if (direction == PF_IN) { + PF_ACPY(&pnl->rsaddr, &sk->lan.addr, + sk->af); +#ifndef NO_APPLE_EXTENSIONS + memcpy(&pnl->rsxport, &sk->lan.xport, + sizeof (pnl->rsxport)); +#else + pnl->rsport = sk->lan.port; +#endif + PF_ACPY(&pnl->rdaddr, &pnl->daddr, + pnl->af); +#ifndef NO_APPLE_EXTENSIONS + memcpy(&pnl->rdxport, &pnl->dxport, + sizeof (pnl->rdxport)); +#else + pnl->rdport = pnl->dport; +#endif + } else { + PF_ACPY(&pnl->rdaddr, &sk->gwy.addr, + sk->af); +#ifndef NO_APPLE_EXTENSIONS + memcpy(&pnl->rdxport, &sk->gwy.xport, + sizeof (pnl->rdxport)); +#else + pnl->rdport = sk->gwy.port; +#endif + PF_ACPY(&pnl->rsaddr, &pnl->saddr, + pnl->af); +#ifndef NO_APPLE_EXTENSIONS + memcpy(&pnl->rsxport, &pnl->sxport, + sizeof (pnl->rsxport)); +#else + pnl->rsport = pnl->sport; +#endif + } + } else + error = ENOENT; + } + break; + } + + case DIOCSETTIMEOUT: { + struct pfioc_tm *pt = (struct pfioc_tm *)addr; + int old; + + if (pt->timeout < 0 || pt->timeout >= PFTM_MAX || + pt->seconds < 0) { + error = EINVAL; + goto fail; + } + old = pf_default_rule.timeout[pt->timeout]; + if (pt->timeout == PFTM_INTERVAL && pt->seconds == 0) + pt->seconds = 1; + pf_default_rule.timeout[pt->timeout] = pt->seconds; + if (pt->timeout == PFTM_INTERVAL && pt->seconds < old) + wakeup(pf_purge_thread_fn); + pt->seconds = old; + break; + } + + case DIOCGETTIMEOUT: { + struct pfioc_tm *pt = (struct pfioc_tm *)addr; + + if (pt->timeout < 0 || pt->timeout >= PFTM_MAX) { + error = EINVAL; + goto fail; + } + pt->seconds = pf_default_rule.timeout[pt->timeout]; + break; + } + + case DIOCGETLIMIT: { + struct pfioc_limit *pl = (struct pfioc_limit *)addr; + + if (pl->index < 0 || pl->index >= PF_LIMIT_MAX) { + error = EINVAL; + goto fail; + } + pl->limit = pf_pool_limits[pl->index].limit; + break; + } + + case DIOCSETLIMIT: { + struct pfioc_limit *pl = (struct pfioc_limit *)addr; + int old_limit; + + if (pl->index < 0 || pl->index >= PF_LIMIT_MAX || + pf_pool_limits[pl->index].pp == NULL) { + error = EINVAL; + goto fail; + } + pool_sethardlimit(pf_pool_limits[pl->index].pp, + pl->limit, NULL, 0); + old_limit = pf_pool_limits[pl->index].limit; + pf_pool_limits[pl->index].limit = pl->limit; + pl->limit = old_limit; + break; + } + + case DIOCSETDEBUG: { + u_int32_t *level = (u_int32_t *)addr; + + pf_status.debug = *level; + break; + } + + case DIOCCLRRULECTRS: { + /* obsoleted by DIOCGETRULE with action=PF_GET_CLR_CNTR */ + struct pf_ruleset *ruleset = &pf_main_ruleset; + struct pf_rule *rule; + + TAILQ_FOREACH(rule, + ruleset->rules[PF_RULESET_FILTER].active.ptr, entries) { + rule->evaluations = 0; + rule->packets[0] = rule->packets[1] = 0; + rule->bytes[0] = rule->bytes[1] = 0; + } + break; + } + +#if ALTQ + case DIOCSTARTALTQ: { + struct pf_altq *altq; + + /* enable all altq interfaces on active list */ + TAILQ_FOREACH(altq, pf_altqs_active, entries) { + if (altq->qname[0] == 0) { + error = pf_enable_altq(altq); + if (error != 0) + break; + } + } + if (error == 0) + pf_altq_running = 1; + DPFPRINTF(PF_DEBUG_MISC, ("altq: started\n")); + break; + } + + case DIOCSTOPALTQ: { + struct pf_altq *altq; + + /* disable all altq interfaces on active list */ + TAILQ_FOREACH(altq, pf_altqs_active, entries) { + if (altq->qname[0] == 0) { + error = pf_disable_altq(altq); + if (error != 0) + break; + } + } + if (error == 0) + pf_altq_running = 0; + DPFPRINTF(PF_DEBUG_MISC, ("altq: stopped\n")); + break; + } + + case DIOCADDALTQ: { + struct pfioc_altq *pa = (struct pfioc_altq *)addr; + struct pf_altq *altq, *a; + + if (pa->ticket != ticket_altqs_inactive) { + error = EBUSY; + break; + } + altq = pool_get(&pf_altq_pl, PR_WAITOK); + if (altq == NULL) { + error = ENOMEM; + break; + } + bcopy(&pa->altq, altq, sizeof (struct pf_altq)); + + /* + * if this is for a queue, find the discipline and + * copy the necessary fields + */ + if (altq->qname[0] != 0) { + if ((altq->qid = pf_qname2qid(altq->qname)) == 0) { + error = EBUSY; + pool_put(&pf_altq_pl, altq); + break; + } + altq->altq_disc = NULL; + TAILQ_FOREACH(a, pf_altqs_inactive, entries) { + if (strncmp(a->ifname, altq->ifname, + IFNAMSIZ) == 0 && a->qname[0] == 0) { + altq->altq_disc = a->altq_disc; + break; + } + } + } + + error = altq_add(altq); + if (error) { + pool_put(&pf_altq_pl, altq); + break; + } + + TAILQ_INSERT_TAIL(pf_altqs_inactive, altq, entries); + bcopy(altq, &pa->altq, sizeof (struct pf_altq)); + break; + } + + case DIOCGETALTQS: { + struct pfioc_altq *pa = (struct pfioc_altq *)addr; + struct pf_altq *altq; + + pa->nr = 0; + TAILQ_FOREACH(altq, pf_altqs_active, entries) + pa->nr++; + pa->ticket = ticket_altqs_active; + break; + } + + case DIOCGETALTQ: { + struct pfioc_altq *pa = (struct pfioc_altq *)addr; + struct pf_altq *altq; + u_int32_t nr; + + if (pa->ticket != ticket_altqs_active) { + error = EBUSY; + break; + } + nr = 0; + altq = TAILQ_FIRST(pf_altqs_active); + while ((altq != NULL) && (nr < pa->nr)) { + altq = TAILQ_NEXT(altq, entries); + nr++; + } + if (altq == NULL) { + error = EBUSY; + break; + } + bcopy(altq, &pa->altq, sizeof (struct pf_altq)); + break; + } + + case DIOCCHANGEALTQ: + /* CHANGEALTQ not supported yet! */ + error = ENODEV; + break; + + case DIOCGETQSTATS: { + struct pfioc_qstats *pq = (struct pfioc_qstats *)addr; + struct pf_altq *altq; + u_int32_t nr; + int nbytes; + + if (pq->ticket != ticket_altqs_active) { + error = EBUSY; + break; + } + nbytes = pq->nbytes; + nr = 0; + altq = TAILQ_FIRST(pf_altqs_active); + while ((altq != NULL) && (nr < pq->nr)) { + altq = TAILQ_NEXT(altq, entries); + nr++; + } + if (altq == NULL) { + error = EBUSY; + break; + } + error = altq_getqstats(altq, pq->buf, &nbytes); + if (error == 0) { + pq->scheduler = altq->scheduler; + pq->nbytes = nbytes; + } + break; + } +#endif /* ALTQ */ + + case DIOCBEGINADDRS: { + struct pfioc_pooladdr *pp = (struct pfioc_pooladdr *)addr; + + pf_empty_pool(&pf_pabuf); + pp->ticket = ++ticket_pabuf; + break; + } + + case DIOCADDADDR: { + struct pfioc_pooladdr *pp = (struct pfioc_pooladdr *)addr; + + if (pp->ticket != ticket_pabuf) { + error = EBUSY; + break; + } +#if !INET + if (pp->af == AF_INET) { + error = EAFNOSUPPORT; + break; + } +#endif /* INET */ +#if !INET6 + if (pp->af == AF_INET6) { + error = EAFNOSUPPORT; + break; + } +#endif /* INET6 */ + if (pp->addr.addr.type != PF_ADDR_ADDRMASK && + pp->addr.addr.type != PF_ADDR_DYNIFTL && + pp->addr.addr.type != PF_ADDR_TABLE) { + error = EINVAL; + break; + } + pa = pool_get(&pf_pooladdr_pl, PR_WAITOK); + if (pa == NULL) { + error = ENOMEM; + break; + } + bcopy(&pp->addr, pa, sizeof (struct pf_pooladdr)); + if (pa->ifname[0]) { + pa->kif = pfi_kif_get(pa->ifname); + if (pa->kif == NULL) { + pool_put(&pf_pooladdr_pl, pa); + error = EINVAL; + break; + } + pfi_kif_ref(pa->kif, PFI_KIF_REF_RULE); + } + if (pfi_dynaddr_setup(&pa->addr, pp->af)) { + pfi_dynaddr_remove(&pa->addr); + pfi_kif_unref(pa->kif, PFI_KIF_REF_RULE); + pool_put(&pf_pooladdr_pl, pa); + error = EINVAL; + break; + } + TAILQ_INSERT_TAIL(&pf_pabuf, pa, entries); + break; + } + + case DIOCGETADDRS: { + struct pfioc_pooladdr *pp = (struct pfioc_pooladdr *)addr; + + pp->nr = 0; + pool = pf_get_pool(pp->anchor, pp->ticket, pp->r_action, + pp->r_num, 0, 1, 0); + if (pool == NULL) { + error = EBUSY; + break; + } + TAILQ_FOREACH(pa, &pool->list, entries) + pp->nr++; + break; + } + + case DIOCGETADDR: { + struct pfioc_pooladdr *pp = (struct pfioc_pooladdr *)addr; + u_int32_t nr = 0; + + pool = pf_get_pool(pp->anchor, pp->ticket, pp->r_action, + pp->r_num, 0, 1, 1); + if (pool == NULL) { + error = EBUSY; + break; + } + pa = TAILQ_FIRST(&pool->list); + while ((pa != NULL) && (nr < pp->nr)) { + pa = TAILQ_NEXT(pa, entries); + nr++; + } + if (pa == NULL) { + error = EBUSY; + break; + } + bcopy(pa, &pp->addr, sizeof (struct pf_pooladdr)); + pfi_dynaddr_copyout(&pp->addr.addr); + pf_tbladdr_copyout(&pp->addr.addr); + pf_rtlabel_copyout(&pp->addr.addr); + break; + } + + case DIOCCHANGEADDR: { + struct pfioc_pooladdr *pca = (struct pfioc_pooladdr *)addr; + struct pf_pooladdr *oldpa = NULL, *newpa = NULL; + struct pf_ruleset *ruleset; + + if (pca->action < PF_CHANGE_ADD_HEAD || + pca->action > PF_CHANGE_REMOVE) { + error = EINVAL; + break; + } + if (pca->addr.addr.type != PF_ADDR_ADDRMASK && + pca->addr.addr.type != PF_ADDR_DYNIFTL && + pca->addr.addr.type != PF_ADDR_TABLE) { + error = EINVAL; + break; + } + + ruleset = pf_find_ruleset(pca->anchor); + if (ruleset == NULL) { + error = EBUSY; + break; + } + pool = pf_get_pool(pca->anchor, pca->ticket, pca->r_action, + pca->r_num, pca->r_last, 1, 1); + if (pool == NULL) { + error = EBUSY; + break; + } + if (pca->action != PF_CHANGE_REMOVE) { + newpa = pool_get(&pf_pooladdr_pl, PR_WAITOK); + if (newpa == NULL) { + error = ENOMEM; + break; + } + bcopy(&pca->addr, newpa, sizeof (struct pf_pooladdr)); +#if !INET + if (pca->af == AF_INET) { + pool_put(&pf_pooladdr_pl, newpa); + error = EAFNOSUPPORT; + break; + } +#endif /* INET */ +#if !INET6 + if (pca->af == AF_INET6) { + pool_put(&pf_pooladdr_pl, newpa); + error = EAFNOSUPPORT; + break; + } +#endif /* INET6 */ + if (newpa->ifname[0]) { + newpa->kif = pfi_kif_get(newpa->ifname); + if (newpa->kif == NULL) { + pool_put(&pf_pooladdr_pl, newpa); + error = EINVAL; + break; + } + pfi_kif_ref(newpa->kif, PFI_KIF_REF_RULE); + } else + newpa->kif = NULL; + if (pfi_dynaddr_setup(&newpa->addr, pca->af) || + pf_tbladdr_setup(ruleset, &newpa->addr)) { + pfi_dynaddr_remove(&newpa->addr); + pfi_kif_unref(newpa->kif, PFI_KIF_REF_RULE); + pool_put(&pf_pooladdr_pl, newpa); + error = EINVAL; + break; + } + } + + if (pca->action == PF_CHANGE_ADD_HEAD) + oldpa = TAILQ_FIRST(&pool->list); + else if (pca->action == PF_CHANGE_ADD_TAIL) + oldpa = TAILQ_LAST(&pool->list, pf_palist); + else { + int i = 0; + + oldpa = TAILQ_FIRST(&pool->list); + while ((oldpa != NULL) && (i < (int)pca->nr)) { + oldpa = TAILQ_NEXT(oldpa, entries); + i++; + } + if (oldpa == NULL) { + error = EINVAL; + break; + } + } + + if (pca->action == PF_CHANGE_REMOVE) { + TAILQ_REMOVE(&pool->list, oldpa, entries); + pfi_dynaddr_remove(&oldpa->addr); + pf_tbladdr_remove(&oldpa->addr); + pfi_kif_unref(oldpa->kif, PFI_KIF_REF_RULE); + pool_put(&pf_pooladdr_pl, oldpa); + } else { + if (oldpa == NULL) + TAILQ_INSERT_TAIL(&pool->list, newpa, entries); + else if (pca->action == PF_CHANGE_ADD_HEAD || + pca->action == PF_CHANGE_ADD_BEFORE) + TAILQ_INSERT_BEFORE(oldpa, newpa, entries); + else + TAILQ_INSERT_AFTER(&pool->list, oldpa, + newpa, entries); + } + + pool->cur = TAILQ_FIRST(&pool->list); + PF_ACPY(&pool->counter, &pool->cur->addr.v.a.addr, + pca->af); + break; + } + + case DIOCGETRULESETS: { + struct pfioc_ruleset *pr = (struct pfioc_ruleset *)addr; + struct pf_ruleset *ruleset; + struct pf_anchor *anchor; + + pr->path[sizeof (pr->path) - 1] = 0; + if ((ruleset = pf_find_ruleset(pr->path)) == NULL) { + error = EINVAL; + break; + } + pr->nr = 0; + if (ruleset->anchor == NULL) { + /* XXX kludge for pf_main_ruleset */ + RB_FOREACH(anchor, pf_anchor_global, &pf_anchors) + if (anchor->parent == NULL) + pr->nr++; + } else { + RB_FOREACH(anchor, pf_anchor_node, + &ruleset->anchor->children) + pr->nr++; + } + break; + } + + case DIOCGETRULESET: { + struct pfioc_ruleset *pr = (struct pfioc_ruleset *)addr; + struct pf_ruleset *ruleset; + struct pf_anchor *anchor; + u_int32_t nr = 0; + + pr->path[sizeof (pr->path) - 1] = 0; + if ((ruleset = pf_find_ruleset(pr->path)) == NULL) { + error = EINVAL; + break; + } + pr->name[0] = 0; + if (ruleset->anchor == NULL) { + /* XXX kludge for pf_main_ruleset */ + RB_FOREACH(anchor, pf_anchor_global, &pf_anchors) + if (anchor->parent == NULL && nr++ == pr->nr) { + strlcpy(pr->name, anchor->name, + sizeof (pr->name)); + break; + } + } else { + RB_FOREACH(anchor, pf_anchor_node, + &ruleset->anchor->children) + if (nr++ == pr->nr) { + strlcpy(pr->name, anchor->name, + sizeof (pr->name)); + break; + } + } + if (!pr->name[0]) + error = EBUSY; + break; + } + + case DIOCRCLRTABLES: { + struct pfioc_table *io = (struct pfioc_table *)addr; + + if (io->pfrio_esize != 0) { + error = ENODEV; + break; + } + error = pfr_clr_tables(&io->pfrio_table, &io->pfrio_ndel, + io->pfrio_flags | PFR_FLAG_USERIOCTL); + break; + } + + case DIOCRADDTABLES: { + struct pfioc_table *io = (struct pfioc_table *)addr; + + if (io->pfrio_esize != sizeof (struct pfr_table)) { + error = ENODEV; + break; + } + error = pfr_add_tables(io->pfrio_buffer, io->pfrio_size, + &io->pfrio_nadd, io->pfrio_flags | PFR_FLAG_USERIOCTL); + break; + } + + case DIOCRDELTABLES: { + struct pfioc_table *io = (struct pfioc_table *)addr; + + if (io->pfrio_esize != sizeof (struct pfr_table)) { + error = ENODEV; + break; + } + error = pfr_del_tables(io->pfrio_buffer, io->pfrio_size, + &io->pfrio_ndel, io->pfrio_flags | PFR_FLAG_USERIOCTL); + break; + } + + case DIOCRGETTABLES: { + struct pfioc_table *io = (struct pfioc_table *)addr; + + if (io->pfrio_esize != sizeof (struct pfr_table)) { + error = ENODEV; + break; + } + error = pfr_get_tables(&io->pfrio_table, io->pfrio_buffer, + &io->pfrio_size, io->pfrio_flags | PFR_FLAG_USERIOCTL); + break; + } + + case DIOCRGETTSTATS: { + struct pfioc_table *io = (struct pfioc_table *)addr; + + if (io->pfrio_esize != sizeof (struct pfr_tstats)) { + error = ENODEV; + break; + } + error = pfr_get_tstats(&io->pfrio_table, io->pfrio_buffer, + &io->pfrio_size, io->pfrio_flags | PFR_FLAG_USERIOCTL); + break; + } + + case DIOCRCLRTSTATS: { + struct pfioc_table *io = (struct pfioc_table *)addr; + + if (io->pfrio_esize != sizeof (struct pfr_table)) { + error = ENODEV; + break; + } + error = pfr_clr_tstats(io->pfrio_buffer, io->pfrio_size, + &io->pfrio_nzero, io->pfrio_flags | PFR_FLAG_USERIOCTL); + break; + } + + case DIOCRSETTFLAGS: { + struct pfioc_table *io = (struct pfioc_table *)addr; + + if (io->pfrio_esize != sizeof (struct pfr_table)) { + error = ENODEV; + break; + } + error = pfr_set_tflags(io->pfrio_buffer, io->pfrio_size, + io->pfrio_setflag, io->pfrio_clrflag, &io->pfrio_nchange, + &io->pfrio_ndel, io->pfrio_flags | PFR_FLAG_USERIOCTL); + break; + } + + case DIOCRCLRADDRS: { + struct pfioc_table *io = (struct pfioc_table *)addr; + + if (io->pfrio_esize != 0) { + error = ENODEV; + break; + } + error = pfr_clr_addrs(&io->pfrio_table, &io->pfrio_ndel, + io->pfrio_flags | PFR_FLAG_USERIOCTL); + break; + } + + case DIOCRADDADDRS: { + struct pfioc_table *io = (struct pfioc_table *)addr; + + if (io->pfrio_esize != sizeof (struct pfr_addr)) { + error = ENODEV; + break; + } + error = pfr_add_addrs(&io->pfrio_table, io->pfrio_buffer, + io->pfrio_size, &io->pfrio_nadd, io->pfrio_flags | + PFR_FLAG_USERIOCTL); + break; + } + + case DIOCRDELADDRS: { + struct pfioc_table *io = (struct pfioc_table *)addr; + + if (io->pfrio_esize != sizeof (struct pfr_addr)) { + error = ENODEV; + break; + } + error = pfr_del_addrs(&io->pfrio_table, io->pfrio_buffer, + io->pfrio_size, &io->pfrio_ndel, io->pfrio_flags | + PFR_FLAG_USERIOCTL); + break; + } + + case DIOCRSETADDRS: { + struct pfioc_table *io = (struct pfioc_table *)addr; + + if (io->pfrio_esize != sizeof (struct pfr_addr)) { + error = ENODEV; + break; + } + error = pfr_set_addrs(&io->pfrio_table, io->pfrio_buffer, + io->pfrio_size, &io->pfrio_size2, &io->pfrio_nadd, + &io->pfrio_ndel, &io->pfrio_nchange, io->pfrio_flags | + PFR_FLAG_USERIOCTL, 0); + break; + } + + case DIOCRGETADDRS: { + struct pfioc_table *io = (struct pfioc_table *)addr; + + if (io->pfrio_esize != sizeof (struct pfr_addr)) { + error = ENODEV; + break; + } + error = pfr_get_addrs(&io->pfrio_table, io->pfrio_buffer, + &io->pfrio_size, io->pfrio_flags | PFR_FLAG_USERIOCTL); + break; + } + + case DIOCRGETASTATS: { + struct pfioc_table *io = (struct pfioc_table *)addr; + + if (io->pfrio_esize != sizeof (struct pfr_astats)) { + error = ENODEV; + break; + } + error = pfr_get_astats(&io->pfrio_table, io->pfrio_buffer, + &io->pfrio_size, io->pfrio_flags | PFR_FLAG_USERIOCTL); + break; + } + + case DIOCRCLRASTATS: { + struct pfioc_table *io = (struct pfioc_table *)addr; + + if (io->pfrio_esize != sizeof (struct pfr_addr)) { + error = ENODEV; + break; + } + error = pfr_clr_astats(&io->pfrio_table, io->pfrio_buffer, + io->pfrio_size, &io->pfrio_nzero, io->pfrio_flags | + PFR_FLAG_USERIOCTL); + break; + } + + case DIOCRTSTADDRS: { + struct pfioc_table *io = (struct pfioc_table *)addr; + + if (io->pfrio_esize != sizeof (struct pfr_addr)) { + error = ENODEV; + break; + } + error = pfr_tst_addrs(&io->pfrio_table, io->pfrio_buffer, + io->pfrio_size, &io->pfrio_nmatch, io->pfrio_flags | + PFR_FLAG_USERIOCTL); + break; + } + + case DIOCRINADEFINE: { + struct pfioc_table *io = (struct pfioc_table *)addr; + + if (io->pfrio_esize != sizeof (struct pfr_addr)) { + error = ENODEV; + break; + } + error = pfr_ina_define(&io->pfrio_table, io->pfrio_buffer, + io->pfrio_size, &io->pfrio_nadd, &io->pfrio_naddr, + io->pfrio_ticket, io->pfrio_flags | PFR_FLAG_USERIOCTL); + break; + } + + case DIOCOSFPADD: { + struct pf_osfp_ioctl *io = (struct pf_osfp_ioctl *)addr; + error = pf_osfp_add(io); + break; + } + + case DIOCOSFPGET: { + struct pf_osfp_ioctl *io = (struct pf_osfp_ioctl *)addr; + error = pf_osfp_get(io); + break; + } + + case DIOCXBEGIN: { + struct pfioc_trans *io = (struct pfioc_trans *)addr; + struct pfioc_trans_e *ioe; + struct pfr_table *table; + int i; + + if (io->esize != sizeof (*ioe)) { + error = ENODEV; + goto fail; + } + ioe = _MALLOC(sizeof (*ioe), M_TEMP, M_WAITOK); + table = _MALLOC(sizeof (*table), M_TEMP, M_WAITOK); + for (i = 0; i < io->size; i++) { + if (copyin(CAST_USER_ADDR_T(io->array+i), ioe, + sizeof (*ioe))) { + _FREE(table, M_TEMP); + _FREE(ioe, M_TEMP); + error = EFAULT; + goto fail; + } + switch (ioe->rs_num) { + case PF_RULESET_ALTQ: +#if ALTQ + if (ioe->anchor[0]) { + _FREE(table, M_TEMP); + _FREE(ioe, M_TEMP); + error = EINVAL; + goto fail; + } + if ((error = pf_begin_altq(&ioe->ticket))) { + _FREE(table, M_TEMP); + _FREE(ioe, M_TEMP); + goto fail; + } +#endif /* ALTQ */ + break; + case PF_RULESET_TABLE: + bzero(table, sizeof (*table)); + strlcpy(table->pfrt_anchor, ioe->anchor, + sizeof (table->pfrt_anchor)); + if ((error = pfr_ina_begin(table, + &ioe->ticket, NULL, 0))) { + _FREE(table, M_TEMP); + _FREE(ioe, M_TEMP); + goto fail; + } + break; + default: + if ((error = pf_begin_rules(&ioe->ticket, + ioe->rs_num, ioe->anchor))) { + _FREE(table, M_TEMP); + _FREE(ioe, M_TEMP); + goto fail; + } + break; + } + if (copyout(ioe, CAST_USER_ADDR_T(io->array+i), + sizeof (io->array[i]))) { + _FREE(table, M_TEMP); + _FREE(ioe, M_TEMP); + error = EFAULT; + goto fail; + } + } + _FREE(table, M_TEMP); + _FREE(ioe, M_TEMP); + break; + } + + case DIOCXROLLBACK: { + struct pfioc_trans *io = (struct pfioc_trans *)addr; + struct pfioc_trans_e *ioe; + struct pfr_table *table; + int i; + + if (io->esize != sizeof (*ioe)) { + error = ENODEV; + goto fail; + } + ioe = _MALLOC(sizeof (*ioe), M_TEMP, M_WAITOK); + table = _MALLOC(sizeof (*table), M_TEMP, M_WAITOK); + for (i = 0; i < io->size; i++) { + if (copyin(CAST_USER_ADDR_T(io->array+i), ioe, + sizeof (*ioe))) { + _FREE(table, M_TEMP); + _FREE(ioe, M_TEMP); + error = EFAULT; + goto fail; + } + switch (ioe->rs_num) { + case PF_RULESET_ALTQ: +#if ALTQ + if (ioe->anchor[0]) { + _FREE(table, M_TEMP); + _FREE(ioe, M_TEMP); + error = EINVAL; + goto fail; + } + if ((error = pf_rollback_altq(ioe->ticket))) { + _FREE(table, M_TEMP); + _FREE(ioe, M_TEMP); + goto fail; /* really bad */ + } +#endif /* ALTQ */ + break; + case PF_RULESET_TABLE: + bzero(table, sizeof (*table)); + strlcpy(table->pfrt_anchor, ioe->anchor, + sizeof (table->pfrt_anchor)); + if ((error = pfr_ina_rollback(table, + ioe->ticket, NULL, 0))) { + _FREE(table, M_TEMP); + _FREE(ioe, M_TEMP); + goto fail; /* really bad */ + } + break; + default: + if ((error = pf_rollback_rules(ioe->ticket, + ioe->rs_num, ioe->anchor))) { + _FREE(table, M_TEMP); + _FREE(ioe, M_TEMP); + goto fail; /* really bad */ + } + break; + } + } + _FREE(table, M_TEMP); + _FREE(ioe, M_TEMP); + break; + } + + case DIOCXCOMMIT: { + struct pfioc_trans *io = (struct pfioc_trans *)addr; + struct pfioc_trans_e *ioe; + struct pfr_table *table; + struct pf_ruleset *rs; + int i; + + if (io->esize != sizeof (*ioe)) { + error = ENODEV; + goto fail; + } + ioe = _MALLOC(sizeof (*ioe), M_TEMP, M_WAITOK); + table = _MALLOC(sizeof (*table), M_TEMP, M_WAITOK); + /* first makes sure everything will succeed */ + for (i = 0; i < io->size; i++) { + if (copyin(CAST_USER_ADDR_T(io->array+i), ioe, + sizeof (*ioe))) { + _FREE(table, M_TEMP); + _FREE(ioe, M_TEMP); + error = EFAULT; + goto fail; + } + switch (ioe->rs_num) { + case PF_RULESET_ALTQ: +#if ALTQ + if (ioe->anchor[0]) { + _FREE(table, M_TEMP); + _FREE(ioe, M_TEMP); + error = EINVAL; + goto fail; + } + if (!altqs_inactive_open || ioe->ticket != + ticket_altqs_inactive) { + _FREE(table, M_TEMP); + _FREE(ioe, M_TEMP); + error = EBUSY; + goto fail; + } +#endif /* ALTQ */ + break; + case PF_RULESET_TABLE: + rs = pf_find_ruleset(ioe->anchor); + if (rs == NULL || !rs->topen || ioe->ticket != + rs->tticket) { + _FREE(table, M_TEMP); + _FREE(ioe, M_TEMP); + error = EBUSY; + goto fail; + } + break; + default: + if (ioe->rs_num < 0 || ioe->rs_num >= + PF_RULESET_MAX) { + _FREE(table, M_TEMP); + _FREE(ioe, M_TEMP); + error = EINVAL; + goto fail; + } + rs = pf_find_ruleset(ioe->anchor); + if (rs == NULL || + !rs->rules[ioe->rs_num].inactive.open || + rs->rules[ioe->rs_num].inactive.ticket != + ioe->ticket) { + _FREE(table, M_TEMP); + _FREE(ioe, M_TEMP); + error = EBUSY; + goto fail; + } + break; + } + } + /* now do the commit - no errors should happen here */ + for (i = 0; i < io->size; i++) { + if (copyin(CAST_USER_ADDR_T(io->array+i), ioe, + sizeof (*ioe))) { + _FREE(table, M_TEMP); + _FREE(ioe, M_TEMP); + error = EFAULT; + goto fail; + } + switch (ioe->rs_num) { + case PF_RULESET_ALTQ: +#if ALTQ + if ((error = pf_commit_altq(ioe->ticket))) { + _FREE(table, M_TEMP); + _FREE(ioe, M_TEMP); + goto fail; /* really bad */ + } +#endif /* ALTQ */ + break; + case PF_RULESET_TABLE: + bzero(table, sizeof (*table)); + strlcpy(table->pfrt_anchor, ioe->anchor, + sizeof (table->pfrt_anchor)); + if ((error = pfr_ina_commit(table, ioe->ticket, + NULL, NULL, 0))) { + _FREE(table, M_TEMP); + _FREE(ioe, M_TEMP); + goto fail; /* really bad */ + } + break; + default: + if ((error = pf_commit_rules(ioe->ticket, + ioe->rs_num, ioe->anchor))) { + _FREE(table, M_TEMP); + _FREE(ioe, M_TEMP); + goto fail; /* really bad */ + } + break; + } + } + _FREE(table, M_TEMP); + _FREE(ioe, M_TEMP); + break; + } + + case DIOCGETSRCNODES: { + struct pfioc_src_nodes *psn = (struct pfioc_src_nodes *)addr; + struct pf_src_node *n, *sn, *pstore; + u_int32_t nr = 0; + int space = psn->psn_len; + + if (space == 0) { + RB_FOREACH(n, pf_src_tree, &tree_src_tracking) + nr++; + psn->psn_len = sizeof (struct pf_src_node) * nr; + break; + } + + pstore = _MALLOC(sizeof (*pstore), M_TEMP, M_WAITOK); + + sn = psn->psn_src_nodes; + RB_FOREACH(n, pf_src_tree, &tree_src_tracking) { + uint64_t secs = pf_time_second(), diff; + + if ((nr + 1) * sizeof (*sn) > (unsigned)psn->psn_len) + break; + + bcopy(n, pstore, sizeof (*pstore)); + if (n->rule.ptr != NULL) + pstore->rule.nr = n->rule.ptr->nr; + pstore->creation = secs - pstore->creation; + if (pstore->expire > secs) + pstore->expire -= secs; + else + pstore->expire = 0; + + /* adjust the connection rate estimate */ + diff = secs - n->conn_rate.last; + if (diff >= n->conn_rate.seconds) + pstore->conn_rate.count = 0; + else + pstore->conn_rate.count -= + n->conn_rate.count * diff / + n->conn_rate.seconds; + + error = copyout(pstore, CAST_USER_ADDR_T(sn), + sizeof (*sn)); + if (error) { + _FREE(pstore, M_TEMP); + goto fail; + } + sn++; + nr++; + } + psn->psn_len = sizeof (struct pf_src_node) * nr; + + _FREE(pstore, M_TEMP); + break; + } + + case DIOCCLRSRCNODES: { + struct pf_src_node *n; + struct pf_state *state; + + RB_FOREACH(state, pf_state_tree_id, &tree_id) { + state->src_node = NULL; + state->nat_src_node = NULL; + } + RB_FOREACH(n, pf_src_tree, &tree_src_tracking) { + n->expire = 1; + n->states = 0; + } + pf_purge_expired_src_nodes(); + pf_status.src_nodes = 0; + break; + } + + case DIOCKILLSRCNODES: { + struct pf_src_node *sn; + struct pf_state *s; + struct pfioc_src_node_kill *psnk = + (struct pfioc_src_node_kill *)addr; + int killed = 0; + + RB_FOREACH(sn, pf_src_tree, &tree_src_tracking) { + if (PF_MATCHA(psnk->psnk_src.neg, + &psnk->psnk_src.addr.v.a.addr, + &psnk->psnk_src.addr.v.a.mask, + &sn->addr, sn->af) && + PF_MATCHA(psnk->psnk_dst.neg, + &psnk->psnk_dst.addr.v.a.addr, + &psnk->psnk_dst.addr.v.a.mask, + &sn->raddr, sn->af)) { + /* Handle state to src_node linkage */ + if (sn->states != 0) { + RB_FOREACH(s, pf_state_tree_id, + &tree_id) { + if (s->src_node == sn) + s->src_node = NULL; + if (s->nat_src_node == sn) + s->nat_src_node = NULL; + } + sn->states = 0; + } + sn->expire = 1; + killed++; + } + } + + if (killed > 0) + pf_purge_expired_src_nodes(); + + psnk->psnk_af = killed; + break; + } + + case DIOCSETHOSTID: { + u_int32_t *hid = (u_int32_t *)addr; + + if (*hid == 0) + pf_status.hostid = random(); + else + pf_status.hostid = *hid; + break; + } + + case DIOCOSFPFLUSH: + pf_osfp_flush(); + break; + + case DIOCIGETIFACES: { + struct pfioc_iface *io = (struct pfioc_iface *)addr; + + if (io->pfiio_esize != sizeof (struct pfi_kif)) { + error = ENODEV; + break; + } + error = pfi_get_ifaces(io->pfiio_name, io->pfiio_buffer, + &io->pfiio_size); + break; + } + + case DIOCSETIFFLAG: { + struct pfioc_iface *io = (struct pfioc_iface *)addr; + + error = pfi_set_flags(io->pfiio_name, io->pfiio_flags); + break; + } + + case DIOCCLRIFFLAG: { + struct pfioc_iface *io = (struct pfioc_iface *)addr; + + error = pfi_clear_flags(io->pfiio_name, io->pfiio_flags); + break; + } + + default: + error = ENODEV; + break; + } +fail: + lck_mtx_unlock(pf_lock); + lck_rw_done(pf_perim_lock); + + return (error); +} + +int +pf_af_hook(struct ifnet *ifp, struct mbuf **mppn, struct mbuf **mp, + unsigned int af, int input) +{ + int error = 0, reentry; + struct thread *curthread = current_thread(); + struct mbuf *nextpkt; + + reentry = (ifp->if_pf_curthread == curthread); + if (!reentry) { + lck_rw_lock_shared(pf_perim_lock); + if (!pf_hooks_attached) + goto done; + + lck_mtx_lock(pf_lock); + ifp->if_pf_curthread = curthread; + } + + if (mppn != NULL && *mppn != NULL) + VERIFY(*mppn == *mp); + if ((nextpkt = (*mp)->m_nextpkt) != NULL) + (*mp)->m_nextpkt = NULL; + + switch (af) { +#if INET + case AF_INET: { + error = pf_inet_hook(ifp, mp, input); + break; + } +#endif /* INET */ +#if INET6 + case AF_INET6: + error = pf_inet6_hook(ifp, mp, input); + break; +#endif /* INET6 */ + default: + break; + } + + if (nextpkt != NULL) { + if (*mp != NULL) { + struct mbuf *m = *mp; + while (m->m_nextpkt != NULL) + m = m->m_nextpkt; + m->m_nextpkt = nextpkt; + } else { + *mp = nextpkt; + } + } + if (mppn != NULL && *mppn != NULL) + *mppn = *mp; + + if (!reentry) { + ifp->if_pf_curthread = NULL; + lck_mtx_unlock(pf_lock); + } +done: + if (!reentry) + lck_rw_done(pf_perim_lock); + + return (error); +} + + +#if INET +static int +pf_inet_hook(struct ifnet *ifp, struct mbuf **mp, int input) +{ + struct mbuf *m = *mp; +#if BYTE_ORDER != BIG_ENDIAN + struct ip *ip = mtod(m, struct ip *); +#endif + int error = 0; + + /* + * If the packet is outbound, is originated locally, is flagged for + * delayed UDP/TCP checksum calculation, and is about to be processed + * for an interface that doesn't support the appropriate checksum + * offloading, then calculated the checksum here so that PF can adjust + * it properly. + */ + if (!input && m->m_pkthdr.rcvif == NULL) { + static const int mask = CSUM_DELAY_DATA; + const int flags = m->m_pkthdr.csum_flags & + ~IF_HWASSIST_CSUM_FLAGS(ifp->if_hwassist); + + if (flags & mask) { + in_delayed_cksum(m); + m->m_pkthdr.csum_flags &= ~mask; + } + } + +#if BYTE_ORDER != BIG_ENDIAN + HTONS(ip->ip_len); + HTONS(ip->ip_off); +#endif + if (pf_test(input ? PF_IN : PF_OUT, ifp, mp, NULL) != PF_PASS) { + if (*mp != NULL) { + m_freem(*mp); + *mp = NULL; + error = EHOSTUNREACH; + } else { + error = ENOBUFS; + } + } +#if BYTE_ORDER != BIG_ENDIAN + else { + ip = mtod(*mp, struct ip *); + NTOHS(ip->ip_len); + NTOHS(ip->ip_off); + } +#endif + return (error); +} +#endif /* INET */ + +#if INET6 +int +pf_inet6_hook(struct ifnet *ifp, struct mbuf **mp, int input) +{ + int error = 0; + +#if 0 + /* + * TODO: once we support IPv6 hardware checksum offload + */ + /* + * If the packet is outbound, is originated locally, is flagged for + * delayed UDP/TCP checksum calculation, and is about to be processed + * for an interface that doesn't support the appropriate checksum + * offloading, then calculated the checksum here so that PF can adjust + * it properly. + */ + if (!input && (*mp)->m_pkthdr.rcvif == NULL) { + static const int mask = CSUM_DELAY_DATA; + const int flags = (*mp)->m_pkthdr.csum_flags & + ~IF_HWASSIST_CSUM_FLAGS(ifp->if_hwassist); + + if (flags & mask) { + in6_delayed_cksum(*mp); + (*mp)->m_pkthdr.csum_flags &= ~mask; + } + } +#endif + + if (pf_test6(input ? PF_IN : PF_OUT, ifp, mp, NULL) != PF_PASS) { + if (*mp != NULL) { + m_freem(*mp); + *mp = NULL; + error = EHOSTUNREACH; + } else { + error = ENOBUFS; + } + } + return (error); +} +#endif /* INET6 */ + +int +pf_ifaddr_hook(struct ifnet *ifp, unsigned long cmd) +{ + lck_rw_lock_shared(pf_perim_lock); + if (!pf_hooks_attached) + goto done; + + lck_mtx_lock(pf_lock); + + switch (cmd) { + case SIOCSIFADDR: + case SIOCAIFADDR: + case SIOCDIFADDR: +#if INET6 + case SIOCAIFADDR_IN6: + case SIOCDIFADDR_IN6: +#endif /* INET6 */ + if (ifp->if_pf_kif != NULL) + pfi_kifaddr_update(ifp->if_pf_kif); + break; + default: + panic("%s: unexpected ioctl %lu", __func__, cmd); + /* NOTREACHED */ + } + + lck_mtx_unlock(pf_lock); +done: + lck_rw_done(pf_perim_lock); + return (0); +} + +/* + * Caller acquires dlil lock as writer (exclusive) + */ +void +pf_ifnet_hook(struct ifnet *ifp, int attach) +{ + lck_rw_lock_shared(pf_perim_lock); + if (!pf_hooks_attached) + goto done; + + lck_mtx_lock(pf_lock); + if (attach) + pfi_attach_ifnet(ifp); + else + pfi_detach_ifnet(ifp); + lck_mtx_unlock(pf_lock); +done: + lck_rw_done(pf_perim_lock); +} + +static void +pf_attach_hooks(void) +{ + int i; + + if (pf_hooks_attached) + return; + + ifnet_head_lock_shared(); + for (i = 0; i <= if_index; i++) { + struct ifnet *ifp = ifindex2ifnet[i]; + if (ifp != NULL) { + pfi_attach_ifnet(ifp); + } + } + ifnet_head_done(); + pf_hooks_attached = 1; +} + +static void +pf_detach_hooks(void) +{ + int i; + + if (!pf_hooks_attached) + return; + + ifnet_head_lock_shared(); + for (i = 0; i <= if_index; i++) { + struct ifnet *ifp = ifindex2ifnet[i]; + if (ifp != NULL && ifp->if_pf_kif != NULL) { + pfi_detach_ifnet(ifp); + } + } + ifnet_head_done(); + pf_hooks_attached = 0; +} diff --git a/bsd/kern/preload.h b/bsd/net/pf_mtag.h similarity index 64% rename from bsd/kern/preload.h rename to bsd/net/pf_mtag.h index 283db76d2..218ca4e8e 100644 --- a/bsd/kern/preload.h +++ b/bsd/net/pf_mtag.h @@ -1,8 +1,8 @@ /* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2008 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ - * + * * This file contains Original Code and/or Modifications of Original Code * as defined in and that are subject to the Apple Public Source License * Version 2.0 (the 'License'). You may not use this file except in @@ -11,10 +11,10 @@ * unlawful or unlicensed copies of an Apple operating system, or to * circumvent, violate, or enable the circumvention or violation of, any * terms of an Apple operating system software license agreement. - * + * * Please obtain a copy of the License at * http://www.opensource.apple.com/apsl/ and read it before using this file. - * + * * The Original Code and all software distributed under the License are * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, @@ -22,35 +22,39 @@ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. * Please see the License for the specific language governing rights and * limitations under the License. - * + * * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ */ -#ifndef _BSD_KERN_PRELOAD_H_ -#define _BSD_KERN_PRELOAD_H_ -#if KERNEL_PRIVATE +#ifndef _NET_PF_MTAG_H_ +#define _NET_PF_MTAG_H_ -#if PRELOAD +#if PF +#if KERNEL_PRIVATE -#define PRELOAD_MAGIC 0x1395 -#define LC_PRELOAD 0x20 +#ifdef __cplusplus +extern "C" { +#endif -struct preload_command{ - unsigned long preload_cmd; /* LC_PRELOAD */ - unsigned long preload_cmdsize;/* includes entries */ - short preload_magic; /* Magic number */ - short preload_rev; /* Rev of preload header */ - int preload_hdr_size;/* Size of preload header */ - int preload_entries;/* Number of preload entries */ +#define PF_TAG_GENERATED 0x01 +#define PF_TAG_FRAGCACHE 0x02 +#define PF_TAG_TRANSLATE_LOCALHOST 0x04 + +struct pf_mtag { + void *hdr; /* saved hdr pos in mbuf, for ECN */ + unsigned int rtableid; /* alternate routing table id */ + u_int32_t qid; /* queue id */ + u_int16_t tag; /* tag id */ + u_int8_t flags; + u_int8_t routed; }; -struct preload_entry{ - vm_offset_t preload_vaddr; /* Address of page */ - vm_size_t preload_size; /* size */ -}; -#endif +__private_extern__ struct pf_mtag *pf_find_mtag(struct mbuf *); +__private_extern__ struct pf_mtag *pf_get_mtag(struct mbuf *); +#ifdef __cplusplus +} #endif - - -#endif /* _BSD_KERN_PRELOAD_H_ */ +#endif /* KERNEL_PRIVATE */ +#endif /* PF */ +#endif /* _NET_PF_MTAG_H_ */ diff --git a/bsd/net/pf_norm.c b/bsd/net/pf_norm.c new file mode 100644 index 000000000..8f21362d5 --- /dev/null +++ b/bsd/net/pf_norm.c @@ -0,0 +1,2085 @@ +/* + * Copyright (c) 2008 Apple Inc. All rights reserved. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. The rights granted to you under the License + * may not be used to create, or enable the creation or redistribution of, + * unlawful or unlicensed copies of an Apple operating system, or to + * circumvent, violate, or enable the circumvention or violation of, any + * terms of an Apple operating system software license agreement. + * + * Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ + */ + +/* $apfw: pf_norm.c,v 1.10 2008/08/28 19:10:53 jhw Exp $ */ +/* $OpenBSD: pf_norm.c,v 1.107 2006/04/16 00:59:52 pascoe Exp $ */ + +/* + * Copyright 2001 Niels Provos + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#if INET6 +#include +#endif /* INET6 */ + +#include + +struct pf_frent { + LIST_ENTRY(pf_frent) fr_next; + struct ip *fr_ip; + struct mbuf *fr_m; +}; + +struct pf_frcache { + LIST_ENTRY(pf_frcache) fr_next; + uint16_t fr_off; + uint16_t fr_end; +}; + +#define PFFRAG_SEENLAST 0x0001 /* Seen the last fragment for this */ +#define PFFRAG_NOBUFFER 0x0002 /* Non-buffering fragment cache */ +#define PFFRAG_DROP 0x0004 /* Drop all fragments */ +#define BUFFER_FRAGMENTS(fr) (!((fr)->fr_flags & PFFRAG_NOBUFFER)) + +struct pf_fragment { + RB_ENTRY(pf_fragment) fr_entry; + TAILQ_ENTRY(pf_fragment) frag_next; + struct in_addr fr_src; + struct in_addr fr_dst; + u_int8_t fr_p; /* protocol of this fragment */ + u_int8_t fr_flags; /* status flags */ + u_int16_t fr_id; /* fragment id for reassemble */ + u_int16_t fr_max; /* fragment data max */ + u_int32_t fr_timeout; +#define fr_queue fr_u.fru_queue +#define fr_cache fr_u.fru_cache + union { + LIST_HEAD(pf_fragq, pf_frent) fru_queue; /* buffering */ + LIST_HEAD(pf_cacheq, pf_frcache) fru_cache; /* non-buf */ + } fr_u; +}; + +static TAILQ_HEAD(pf_fragqueue, pf_fragment) pf_fragqueue; +static TAILQ_HEAD(pf_cachequeue, pf_fragment) pf_cachequeue; + +static __inline int pf_frag_compare(struct pf_fragment *, + struct pf_fragment *); +static RB_HEAD(pf_frag_tree, pf_fragment) pf_frag_tree, pf_cache_tree; +RB_PROTOTYPE_SC(__private_extern__, pf_frag_tree, pf_fragment, fr_entry, + pf_frag_compare); +RB_GENERATE(pf_frag_tree, pf_fragment, fr_entry, pf_frag_compare); + +/* Private prototypes */ +static void pf_ip2key(struct pf_fragment *, struct ip *); +static void pf_remove_fragment(struct pf_fragment *); +static void pf_flush_fragments(void); +static void pf_free_fragment(struct pf_fragment *); +static struct pf_fragment *pf_find_fragment(struct ip *, struct pf_frag_tree *); +static struct mbuf *pf_reassemble(struct mbuf **, struct pf_fragment **, + struct pf_frent *, int); +static struct mbuf *pf_fragcache(struct mbuf **, struct ip *, + struct pf_fragment **, int, int, int *); +#ifndef NO_APPLE_MODIFICATIONS +static int pf_normalize_tcpopt(struct pf_rule *, int, struct pfi_kif *, + struct pf_pdesc *, struct mbuf *, struct tcphdr *, int, int *); +#else +static int pf_normalize_tcpopt(struct pf_rule *, struct mbuf *, + struct tcphdr *, int, sa_family_t); +#endif + +#define DPFPRINTF(x) do { \ + if (pf_status.debug >= PF_DEBUG_MISC) { \ + printf("%s: ", __func__); \ + printf x ; \ + } \ +} while (0) + +/* Globals */ +struct pool pf_frent_pl, pf_frag_pl; +static struct pool pf_cache_pl, pf_cent_pl; +struct pool pf_state_scrub_pl; + +static int pf_nfrents, pf_ncache; + +void +pf_normalize_init(void) +{ + pool_init(&pf_frent_pl, sizeof (struct pf_frent), 0, 0, 0, "pffrent", + NULL); + pool_init(&pf_frag_pl, sizeof (struct pf_fragment), 0, 0, 0, "pffrag", + NULL); + pool_init(&pf_cache_pl, sizeof (struct pf_fragment), 0, 0, 0, + "pffrcache", NULL); + pool_init(&pf_cent_pl, sizeof (struct pf_frcache), 0, 0, 0, "pffrcent", + NULL); + pool_init(&pf_state_scrub_pl, sizeof (struct pf_state_scrub), 0, 0, 0, + "pfstscr", NULL); + + pool_sethiwat(&pf_frag_pl, PFFRAG_FRAG_HIWAT); + pool_sethardlimit(&pf_frent_pl, PFFRAG_FRENT_HIWAT, NULL, 0); + pool_sethardlimit(&pf_cache_pl, PFFRAG_FRCACHE_HIWAT, NULL, 0); + pool_sethardlimit(&pf_cent_pl, PFFRAG_FRCENT_HIWAT, NULL, 0); + + TAILQ_INIT(&pf_fragqueue); + TAILQ_INIT(&pf_cachequeue); +} + +#if 0 +void +pf_normalize_destroy(void) +{ + pool_destroy(&pf_state_scrub_pl); + pool_destroy(&pf_cent_pl); + pool_destroy(&pf_cache_pl); + pool_destroy(&pf_frag_pl); + pool_destroy(&pf_frent_pl); +} +#endif + +int +pf_normalize_isempty(void) +{ + return (TAILQ_EMPTY(&pf_fragqueue) && TAILQ_EMPTY(&pf_cachequeue)); +} + +static __inline int +pf_frag_compare(struct pf_fragment *a, struct pf_fragment *b) +{ + int diff; + + if ((diff = a->fr_id - b->fr_id)) + return (diff); + else if ((diff = a->fr_p - b->fr_p)) + return (diff); + else if (a->fr_src.s_addr < b->fr_src.s_addr) + return (-1); + else if (a->fr_src.s_addr > b->fr_src.s_addr) + return (1); + else if (a->fr_dst.s_addr < b->fr_dst.s_addr) + return (-1); + else if (a->fr_dst.s_addr > b->fr_dst.s_addr) + return (1); + return (0); +} + +void +pf_purge_expired_fragments(void) +{ + struct pf_fragment *frag; + u_int32_t expire = pf_time_second() - + pf_default_rule.timeout[PFTM_FRAG]; + + while ((frag = TAILQ_LAST(&pf_fragqueue, pf_fragqueue)) != NULL) { + VERIFY(BUFFER_FRAGMENTS(frag)); + if (frag->fr_timeout > expire) + break; + + DPFPRINTF(("expiring %d(%p)\n", frag->fr_id, frag)); + pf_free_fragment(frag); + } + + while ((frag = TAILQ_LAST(&pf_cachequeue, pf_cachequeue)) != NULL) { + VERIFY(!BUFFER_FRAGMENTS(frag)); + if (frag->fr_timeout > expire) + break; + + DPFPRINTF(("expiring %d(%p)\n", frag->fr_id, frag)); + pf_free_fragment(frag); + VERIFY(TAILQ_EMPTY(&pf_cachequeue) || + TAILQ_LAST(&pf_cachequeue, pf_cachequeue) != frag); + } +} + +/* + * Try to flush old fragments to make space for new ones + */ + +static void +pf_flush_fragments(void) +{ + struct pf_fragment *frag; + int goal; + + goal = pf_nfrents * 9 / 10; + DPFPRINTF(("trying to free > %d frents\n", + pf_nfrents - goal)); + while (goal < pf_nfrents) { + frag = TAILQ_LAST(&pf_fragqueue, pf_fragqueue); + if (frag == NULL) + break; + pf_free_fragment(frag); + } + + + goal = pf_ncache * 9 / 10; + DPFPRINTF(("trying to free > %d cache entries\n", + pf_ncache - goal)); + while (goal < pf_ncache) { + frag = TAILQ_LAST(&pf_cachequeue, pf_cachequeue); + if (frag == NULL) + break; + pf_free_fragment(frag); + } +} + +/* Frees the fragments and all associated entries */ + +static void +pf_free_fragment(struct pf_fragment *frag) +{ + struct pf_frent *frent; + struct pf_frcache *frcache; + + /* Free all fragments */ + if (BUFFER_FRAGMENTS(frag)) { + for (frent = LIST_FIRST(&frag->fr_queue); frent; + frent = LIST_FIRST(&frag->fr_queue)) { + LIST_REMOVE(frent, fr_next); + + m_freem(frent->fr_m); + pool_put(&pf_frent_pl, frent); + pf_nfrents--; + } + } else { + for (frcache = LIST_FIRST(&frag->fr_cache); frcache; + frcache = LIST_FIRST(&frag->fr_cache)) { + LIST_REMOVE(frcache, fr_next); + + VERIFY(LIST_EMPTY(&frag->fr_cache) || + LIST_FIRST(&frag->fr_cache)->fr_off > + frcache->fr_end); + + pool_put(&pf_cent_pl, frcache); + pf_ncache--; + } + } + + pf_remove_fragment(frag); +} + +static void +pf_ip2key(struct pf_fragment *key, struct ip *ip) +{ + key->fr_p = ip->ip_p; + key->fr_id = ip->ip_id; + key->fr_src.s_addr = ip->ip_src.s_addr; + key->fr_dst.s_addr = ip->ip_dst.s_addr; +} + +static struct pf_fragment * +pf_find_fragment(struct ip *ip, struct pf_frag_tree *tree) +{ + struct pf_fragment key; + struct pf_fragment *frag; + + pf_ip2key(&key, ip); + + frag = RB_FIND(pf_frag_tree, tree, &key); + if (frag != NULL) { + /* XXX Are we sure we want to update the timeout? */ + frag->fr_timeout = pf_time_second(); + if (BUFFER_FRAGMENTS(frag)) { + TAILQ_REMOVE(&pf_fragqueue, frag, frag_next); + TAILQ_INSERT_HEAD(&pf_fragqueue, frag, frag_next); + } else { + TAILQ_REMOVE(&pf_cachequeue, frag, frag_next); + TAILQ_INSERT_HEAD(&pf_cachequeue, frag, frag_next); + } + } + + return (frag); +} + +/* Removes a fragment from the fragment queue and frees the fragment */ + +static void +pf_remove_fragment(struct pf_fragment *frag) +{ + if (BUFFER_FRAGMENTS(frag)) { + RB_REMOVE(pf_frag_tree, &pf_frag_tree, frag); + TAILQ_REMOVE(&pf_fragqueue, frag, frag_next); + pool_put(&pf_frag_pl, frag); + } else { + RB_REMOVE(pf_frag_tree, &pf_cache_tree, frag); + TAILQ_REMOVE(&pf_cachequeue, frag, frag_next); + pool_put(&pf_cache_pl, frag); + } +} + +#define FR_IP_OFF(fr) ((ntohs((fr)->fr_ip->ip_off) & IP_OFFMASK) << 3) +static struct mbuf * +pf_reassemble(struct mbuf **m0, struct pf_fragment **frag, + struct pf_frent *frent, int mff) +{ + struct mbuf *m = *m0, *m2; + struct pf_frent *frea, *next; + struct pf_frent *frep = NULL; + struct ip *ip = frent->fr_ip; + int hlen = ip->ip_hl << 2; + u_int16_t off = (ntohs(ip->ip_off) & IP_OFFMASK) << 3; + u_int16_t ip_len = ntohs(ip->ip_len) - ip->ip_hl * 4; + u_int16_t fr_max = ip_len + off; + + VERIFY(*frag == NULL || BUFFER_FRAGMENTS(*frag)); + + /* Strip off ip header */ + m->m_data += hlen; + m->m_len -= hlen; + + /* Create a new reassembly queue for this packet */ + if (*frag == NULL) { + *frag = pool_get(&pf_frag_pl, PR_NOWAIT); + if (*frag == NULL) { + pf_flush_fragments(); + *frag = pool_get(&pf_frag_pl, PR_NOWAIT); + if (*frag == NULL) + goto drop_fragment; + } + + (*frag)->fr_flags = 0; + (*frag)->fr_max = 0; + (*frag)->fr_src = frent->fr_ip->ip_src; + (*frag)->fr_dst = frent->fr_ip->ip_dst; + (*frag)->fr_p = frent->fr_ip->ip_p; + (*frag)->fr_id = frent->fr_ip->ip_id; + (*frag)->fr_timeout = pf_time_second(); + LIST_INIT(&(*frag)->fr_queue); + + RB_INSERT(pf_frag_tree, &pf_frag_tree, *frag); + TAILQ_INSERT_HEAD(&pf_fragqueue, *frag, frag_next); + + /* We do not have a previous fragment */ + frep = NULL; + goto insert; + } + + /* + * Find a fragment after the current one: + * - off contains the real shifted offset. + */ + LIST_FOREACH(frea, &(*frag)->fr_queue, fr_next) { + if (FR_IP_OFF(frea) > off) + break; + frep = frea; + } + + VERIFY(frep != NULL || frea != NULL); + + if (frep != NULL && + FR_IP_OFF(frep) + ntohs(frep->fr_ip->ip_len) - frep->fr_ip->ip_hl * + 4 > off) { + u_int16_t precut; + + precut = FR_IP_OFF(frep) + ntohs(frep->fr_ip->ip_len) - + frep->fr_ip->ip_hl * 4 - off; + if (precut >= ip_len) + goto drop_fragment; + m_adj(frent->fr_m, precut); + DPFPRINTF(("overlap -%d\n", precut)); + /* Enforce 8 byte boundaries */ + ip->ip_off = htons(ntohs(ip->ip_off) + (precut >> 3)); + off = (ntohs(ip->ip_off) & IP_OFFMASK) << 3; + ip_len -= precut; + ip->ip_len = htons(ip_len); + } + + for (; frea != NULL && ip_len + off > FR_IP_OFF(frea); + frea = next) { + u_int16_t aftercut; + + aftercut = ip_len + off - FR_IP_OFF(frea); + DPFPRINTF(("adjust overlap %d\n", aftercut)); + if (aftercut < ntohs(frea->fr_ip->ip_len) - frea->fr_ip->ip_hl + * 4) { + frea->fr_ip->ip_len = + htons(ntohs(frea->fr_ip->ip_len) - aftercut); + frea->fr_ip->ip_off = htons(ntohs(frea->fr_ip->ip_off) + + (aftercut >> 3)); + m_adj(frea->fr_m, aftercut); + break; + } + + /* This fragment is completely overlapped, lose it */ + next = LIST_NEXT(frea, fr_next); + m_freem(frea->fr_m); + LIST_REMOVE(frea, fr_next); + pool_put(&pf_frent_pl, frea); + pf_nfrents--; + } + +insert: + /* Update maximum data size */ + if ((*frag)->fr_max < fr_max) + (*frag)->fr_max = fr_max; + /* This is the last segment */ + if (!mff) + (*frag)->fr_flags |= PFFRAG_SEENLAST; + + if (frep == NULL) + LIST_INSERT_HEAD(&(*frag)->fr_queue, frent, fr_next); + else + LIST_INSERT_AFTER(frep, frent, fr_next); + + /* Check if we are completely reassembled */ + if (!((*frag)->fr_flags & PFFRAG_SEENLAST)) + return (NULL); + + /* Check if we have all the data */ + off = 0; + for (frep = LIST_FIRST(&(*frag)->fr_queue); frep; frep = next) { + next = LIST_NEXT(frep, fr_next); + + off += ntohs(frep->fr_ip->ip_len) - frep->fr_ip->ip_hl * 4; + if (off < (*frag)->fr_max && + (next == NULL || FR_IP_OFF(next) != off)) { + DPFPRINTF(("missing fragment at %d, next %d, max %d\n", + off, next == NULL ? -1 : FR_IP_OFF(next), + (*frag)->fr_max)); + return (NULL); + } + } + DPFPRINTF(("%d < %d?\n", off, (*frag)->fr_max)); + if (off < (*frag)->fr_max) + return (NULL); + + /* We have all the data */ + frent = LIST_FIRST(&(*frag)->fr_queue); + VERIFY(frent != NULL); + if ((frent->fr_ip->ip_hl << 2) + off > IP_MAXPACKET) { + DPFPRINTF(("drop: too big: %d\n", off)); + pf_free_fragment(*frag); + *frag = NULL; + return (NULL); + } + next = LIST_NEXT(frent, fr_next); + + /* Magic from ip_input */ + ip = frent->fr_ip; + m = frent->fr_m; + m2 = m->m_next; + m->m_next = NULL; + m_cat(m, m2); + pool_put(&pf_frent_pl, frent); + pf_nfrents--; + for (frent = next; frent != NULL; frent = next) { + next = LIST_NEXT(frent, fr_next); + + m2 = frent->fr_m; + pool_put(&pf_frent_pl, frent); + pf_nfrents--; + m_cat(m, m2); + } + + ip->ip_src = (*frag)->fr_src; + ip->ip_dst = (*frag)->fr_dst; + + /* Remove from fragment queue */ + pf_remove_fragment(*frag); + *frag = NULL; + + hlen = ip->ip_hl << 2; + ip->ip_len = htons(off + hlen); + m->m_len += hlen; + m->m_data -= hlen; + + /* some debugging cruft by sklower, below, will go away soon */ + /* XXX this should be done elsewhere */ + if (m->m_flags & M_PKTHDR) { + int plen = 0; + for (m2 = m; m2; m2 = m2->m_next) + plen += m2->m_len; + m->m_pkthdr.len = plen; + } + + DPFPRINTF(("complete: %p(%d)\n", m, ntohs(ip->ip_len))); + return (m); + +drop_fragment: + /* Oops - fail safe - drop packet */ + pool_put(&pf_frent_pl, frent); + pf_nfrents--; + m_freem(m); + return (NULL); +} + +static struct mbuf * +pf_fragcache(struct mbuf **m0, struct ip *h, struct pf_fragment **frag, int mff, + int drop, int *nomem) +{ + struct mbuf *m = *m0; + struct pf_frcache *frp, *fra, *cur = NULL; + int ip_len = ntohs(h->ip_len) - (h->ip_hl << 2); + u_int16_t off = ntohs(h->ip_off) << 3; + u_int16_t fr_max = ip_len + off; + int hosed = 0; + + VERIFY(*frag == NULL || !BUFFER_FRAGMENTS(*frag)); + + /* Create a new range queue for this packet */ + if (*frag == NULL) { + *frag = pool_get(&pf_cache_pl, PR_NOWAIT); + if (*frag == NULL) { + pf_flush_fragments(); + *frag = pool_get(&pf_cache_pl, PR_NOWAIT); + if (*frag == NULL) + goto no_mem; + } + + /* Get an entry for the queue */ + cur = pool_get(&pf_cent_pl, PR_NOWAIT); + if (cur == NULL) { + pool_put(&pf_cache_pl, *frag); + *frag = NULL; + goto no_mem; + } + pf_ncache++; + + (*frag)->fr_flags = PFFRAG_NOBUFFER; + (*frag)->fr_max = 0; + (*frag)->fr_src = h->ip_src; + (*frag)->fr_dst = h->ip_dst; + (*frag)->fr_p = h->ip_p; + (*frag)->fr_id = h->ip_id; + (*frag)->fr_timeout = pf_time_second(); + + cur->fr_off = off; + cur->fr_end = fr_max; + LIST_INIT(&(*frag)->fr_cache); + LIST_INSERT_HEAD(&(*frag)->fr_cache, cur, fr_next); + + RB_INSERT(pf_frag_tree, &pf_cache_tree, *frag); + TAILQ_INSERT_HEAD(&pf_cachequeue, *frag, frag_next); + + DPFPRINTF(("fragcache[%d]: new %d-%d\n", h->ip_id, off, + fr_max)); + + goto pass; + } + + /* + * Find a fragment after the current one: + * - off contains the real shifted offset. + */ + frp = NULL; + LIST_FOREACH(fra, &(*frag)->fr_cache, fr_next) { + if (fra->fr_off > off) + break; + frp = fra; + } + + VERIFY(frp != NULL || fra != NULL); + + if (frp != NULL) { + int precut; + + precut = frp->fr_end - off; + if (precut >= ip_len) { + /* Fragment is entirely a duplicate */ + DPFPRINTF(("fragcache[%d]: dead (%d-%d) %d-%d\n", + h->ip_id, frp->fr_off, frp->fr_end, off, fr_max)); + goto drop_fragment; + } + if (precut == 0) { + /* They are adjacent. Fixup cache entry */ + DPFPRINTF(("fragcache[%d]: adjacent (%d-%d) %d-%d\n", + h->ip_id, frp->fr_off, frp->fr_end, off, fr_max)); + frp->fr_end = fr_max; + } else if (precut > 0) { + /* + * The first part of this payload overlaps with a + * fragment that has already been passed. + * Need to trim off the first part of the payload. + * But to do so easily, we need to create another + * mbuf to throw the original header into. + */ + + DPFPRINTF(("fragcache[%d]: chop %d (%d-%d) %d-%d\n", + h->ip_id, precut, frp->fr_off, frp->fr_end, off, + fr_max)); + + off += precut; + fr_max -= precut; + /* Update the previous frag to encompass this one */ + frp->fr_end = fr_max; + + if (!drop) { + /* + * XXX Optimization opportunity + * This is a very heavy way to trim the payload. + * we could do it much faster by diddling mbuf + * internals but that would be even less legible + * than this mbuf magic. For my next trick, + * I'll pull a rabbit out of my laptop. + */ + *m0 = m_copym(m, 0, h->ip_hl << 2, M_NOWAIT); + if (*m0 == NULL) + goto no_mem; + VERIFY((*m0)->m_next == NULL); + m_adj(m, precut + (h->ip_hl << 2)); + m_cat(*m0, m); + m = *m0; + if (m->m_flags & M_PKTHDR) { + int plen = 0; + struct mbuf *t; + for (t = m; t; t = t->m_next) + plen += t->m_len; + m->m_pkthdr.len = plen; + } + + + h = mtod(m, struct ip *); + + + VERIFY((int)m->m_len == + ntohs(h->ip_len) - precut); + h->ip_off = htons(ntohs(h->ip_off) + + (precut >> 3)); + h->ip_len = htons(ntohs(h->ip_len) - precut); + } else { + hosed++; + } + } else { + /* There is a gap between fragments */ + + DPFPRINTF(("fragcache[%d]: gap %d (%d-%d) %d-%d\n", + h->ip_id, -precut, frp->fr_off, frp->fr_end, off, + fr_max)); + + cur = pool_get(&pf_cent_pl, PR_NOWAIT); + if (cur == NULL) + goto no_mem; + pf_ncache++; + + cur->fr_off = off; + cur->fr_end = fr_max; + LIST_INSERT_AFTER(frp, cur, fr_next); + } + } + + if (fra != NULL) { + int aftercut; + int merge = 0; + + aftercut = fr_max - fra->fr_off; + if (aftercut == 0) { + /* Adjacent fragments */ + DPFPRINTF(("fragcache[%d]: adjacent %d-%d (%d-%d)\n", + h->ip_id, off, fr_max, fra->fr_off, fra->fr_end)); + fra->fr_off = off; + merge = 1; + } else if (aftercut > 0) { + /* Need to chop off the tail of this fragment */ + DPFPRINTF(("fragcache[%d]: chop %d %d-%d (%d-%d)\n", + h->ip_id, aftercut, off, fr_max, fra->fr_off, + fra->fr_end)); + fra->fr_off = off; + fr_max -= aftercut; + + merge = 1; + + if (!drop) { + m_adj(m, -aftercut); + if (m->m_flags & M_PKTHDR) { + int plen = 0; + struct mbuf *t; + for (t = m; t; t = t->m_next) + plen += t->m_len; + m->m_pkthdr.len = plen; + } + h = mtod(m, struct ip *); + VERIFY((int)m->m_len == + ntohs(h->ip_len) - aftercut); + h->ip_len = htons(ntohs(h->ip_len) - aftercut); + } else { + hosed++; + } + } else if (frp == NULL) { + /* There is a gap between fragments */ + DPFPRINTF(("fragcache[%d]: gap %d %d-%d (%d-%d)\n", + h->ip_id, -aftercut, off, fr_max, fra->fr_off, + fra->fr_end)); + + cur = pool_get(&pf_cent_pl, PR_NOWAIT); + if (cur == NULL) + goto no_mem; + pf_ncache++; + + cur->fr_off = off; + cur->fr_end = fr_max; + LIST_INSERT_BEFORE(fra, cur, fr_next); + } + + + /* Need to glue together two separate fragment descriptors */ + if (merge) { + if (cur && fra->fr_off <= cur->fr_end) { + /* Need to merge in a previous 'cur' */ + DPFPRINTF(("fragcache[%d]: adjacent(merge " + "%d-%d) %d-%d (%d-%d)\n", + h->ip_id, cur->fr_off, cur->fr_end, off, + fr_max, fra->fr_off, fra->fr_end)); + fra->fr_off = cur->fr_off; + LIST_REMOVE(cur, fr_next); + pool_put(&pf_cent_pl, cur); + pf_ncache--; + cur = NULL; + + } else if (frp && fra->fr_off <= frp->fr_end) { + /* Need to merge in a modified 'frp' */ + VERIFY(cur == NULL); + DPFPRINTF(("fragcache[%d]: adjacent(merge " + "%d-%d) %d-%d (%d-%d)\n", + h->ip_id, frp->fr_off, frp->fr_end, off, + fr_max, fra->fr_off, fra->fr_end)); + fra->fr_off = frp->fr_off; + LIST_REMOVE(frp, fr_next); + pool_put(&pf_cent_pl, frp); + pf_ncache--; + frp = NULL; + + } + } + } + + if (hosed) { + /* + * We must keep tracking the overall fragment even when + * we're going to drop it anyway so that we know when to + * free the overall descriptor. Thus we drop the frag late. + */ + goto drop_fragment; + } + + +pass: + /* Update maximum data size */ + if ((*frag)->fr_max < fr_max) + (*frag)->fr_max = fr_max; + + /* This is the last segment */ + if (!mff) + (*frag)->fr_flags |= PFFRAG_SEENLAST; + + /* Check if we are completely reassembled */ + if (((*frag)->fr_flags & PFFRAG_SEENLAST) && + LIST_FIRST(&(*frag)->fr_cache)->fr_off == 0 && + LIST_FIRST(&(*frag)->fr_cache)->fr_end == (*frag)->fr_max) { + /* Remove from fragment queue */ + DPFPRINTF(("fragcache[%d]: done 0-%d\n", h->ip_id, + (*frag)->fr_max)); + pf_free_fragment(*frag); + *frag = NULL; + } + + return (m); + +no_mem: + *nomem = 1; + + /* Still need to pay attention to !IP_MF */ + if (!mff && *frag != NULL) + (*frag)->fr_flags |= PFFRAG_SEENLAST; + + m_freem(m); + return (NULL); + +drop_fragment: + + /* Still need to pay attention to !IP_MF */ + if (!mff && *frag != NULL) + (*frag)->fr_flags |= PFFRAG_SEENLAST; + + if (drop) { + /* This fragment has been deemed bad. Don't reass */ + if (((*frag)->fr_flags & PFFRAG_DROP) == 0) + DPFPRINTF(("fragcache[%d]: dropping overall fragment\n", + h->ip_id)); + (*frag)->fr_flags |= PFFRAG_DROP; + } + + m_freem(m); + return (NULL); +} + +int +pf_normalize_ip(struct mbuf **m0, int dir, struct pfi_kif *kif, u_short *reason, + struct pf_pdesc *pd) +{ + struct mbuf *m = *m0; + struct pf_rule *r; + struct pf_frent *frent; + struct pf_fragment *frag = NULL; + struct ip *h = mtod(m, struct ip *); + int mff = (ntohs(h->ip_off) & IP_MF); + int hlen = h->ip_hl << 2; + u_int16_t fragoff = (ntohs(h->ip_off) & IP_OFFMASK) << 3; + u_int16_t fr_max; + int ip_len; + int ip_off; + + r = TAILQ_FIRST(pf_main_ruleset.rules[PF_RULESET_SCRUB].active.ptr); + while (r != NULL) { + r->evaluations++; + if (pfi_kif_match(r->kif, kif) == r->ifnot) + r = r->skip[PF_SKIP_IFP].ptr; + else if (r->direction && r->direction != dir) + r = r->skip[PF_SKIP_DIR].ptr; + else if (r->af && r->af != AF_INET) + r = r->skip[PF_SKIP_AF].ptr; + else if (r->proto && r->proto != h->ip_p) + r = r->skip[PF_SKIP_PROTO].ptr; + else if (PF_MISMATCHAW(&r->src.addr, + (struct pf_addr *)&h->ip_src.s_addr, AF_INET, + r->src.neg, kif)) + r = r->skip[PF_SKIP_SRC_ADDR].ptr; + else if (PF_MISMATCHAW(&r->dst.addr, + (struct pf_addr *)&h->ip_dst.s_addr, AF_INET, + r->dst.neg, NULL)) + r = r->skip[PF_SKIP_DST_ADDR].ptr; + else + break; + } + + if (r == NULL || r->action == PF_NOSCRUB) + return (PF_PASS); + else { + r->packets[dir == PF_OUT]++; + r->bytes[dir == PF_OUT] += pd->tot_len; + } + + /* Check for illegal packets */ + if (hlen < (int)sizeof (struct ip)) + goto drop; + + if (hlen > ntohs(h->ip_len)) + goto drop; + + /* Clear IP_DF if the rule uses the no-df option */ + if (r->rule_flag & PFRULE_NODF && h->ip_off & htons(IP_DF)) { + u_int16_t ipoff = h->ip_off; + + h->ip_off &= htons(~IP_DF); + h->ip_sum = pf_cksum_fixup(h->ip_sum, ipoff, h->ip_off, 0); + } + + /* We will need other tests here */ + if (!fragoff && !mff) + goto no_fragment; + + /* + * We're dealing with a fragment now. Don't allow fragments + * with IP_DF to enter the cache. If the flag was cleared by + * no-df above, fine. Otherwise drop it. + */ + if (h->ip_off & htons(IP_DF)) { + DPFPRINTF(("IP_DF\n")); + goto bad; + } + + ip_len = ntohs(h->ip_len) - hlen; + ip_off = (ntohs(h->ip_off) & IP_OFFMASK) << 3; + + /* All fragments are 8 byte aligned */ + if (mff && (ip_len & 0x7)) { + DPFPRINTF(("mff and %d\n", ip_len)); + goto bad; + } + + /* Respect maximum length */ + if (fragoff + ip_len > IP_MAXPACKET) { + DPFPRINTF(("max packet %d\n", fragoff + ip_len)); + goto bad; + } + fr_max = fragoff + ip_len; + + if ((r->rule_flag & (PFRULE_FRAGCROP|PFRULE_FRAGDROP)) == 0) { + /* Fully buffer all of the fragments */ + + frag = pf_find_fragment(h, &pf_frag_tree); + + /* Check if we saw the last fragment already */ + if (frag != NULL && (frag->fr_flags & PFFRAG_SEENLAST) && + fr_max > frag->fr_max) + goto bad; + + /* Get an entry for the fragment queue */ + frent = pool_get(&pf_frent_pl, PR_NOWAIT); + if (frent == NULL) { + REASON_SET(reason, PFRES_MEMORY); + return (PF_DROP); + } + pf_nfrents++; + frent->fr_ip = h; + frent->fr_m = m; + + /* Might return a completely reassembled mbuf, or NULL */ + DPFPRINTF(("reass frag %d @ %d-%d\n", h->ip_id, fragoff, + fr_max)); + *m0 = m = pf_reassemble(m0, &frag, frent, mff); + + if (m == NULL) + return (PF_DROP); + + /* use mtag from concatenated mbuf chain */ + pd->pf_mtag = pf_find_mtag(m); +#ifdef DIAGNOSTIC + if (pd->pf_mtag == NULL) { + printf("%s: pf_find_mtag returned NULL(1)\n", __func__); + if ((pd->pf_mtag = pf_get_mtag(m)) == NULL) { + m_freem(m); + *m0 = NULL; + goto no_mem; + } + } +#endif + if (frag != NULL && (frag->fr_flags & PFFRAG_DROP)) + goto drop; + + h = mtod(m, struct ip *); + } else { + /* non-buffering fragment cache (drops or masks overlaps) */ + int nomem = 0; + + if (dir == PF_OUT && (pd->pf_mtag->flags & PF_TAG_FRAGCACHE)) { + /* + * Already passed the fragment cache in the + * input direction. If we continued, it would + * appear to be a dup and would be dropped. + */ + goto fragment_pass; + } + + frag = pf_find_fragment(h, &pf_cache_tree); + + /* Check if we saw the last fragment already */ + if (frag != NULL && (frag->fr_flags & PFFRAG_SEENLAST) && + fr_max > frag->fr_max) { + if (r->rule_flag & PFRULE_FRAGDROP) + frag->fr_flags |= PFFRAG_DROP; + goto bad; + } + + *m0 = m = pf_fragcache(m0, h, &frag, mff, + (r->rule_flag & PFRULE_FRAGDROP) ? 1 : 0, &nomem); + if (m == NULL) { + if (nomem) + goto no_mem; + goto drop; + } + + /* use mtag from copied and trimmed mbuf chain */ + pd->pf_mtag = pf_find_mtag(m); +#ifdef DIAGNOSTIC + if (pd->pf_mtag == NULL) { + printf("%s: pf_find_mtag returned NULL(2)\n", __func__); + if ((pd->pf_mtag = pf_get_mtag(m)) == NULL) { + m_freem(m); + *m0 = NULL; + goto no_mem; + } + } +#endif + if (dir == PF_IN) + pd->pf_mtag->flags |= PF_TAG_FRAGCACHE; + + if (frag != NULL && (frag->fr_flags & PFFRAG_DROP)) + goto drop; + goto fragment_pass; + } + +no_fragment: + /* At this point, only IP_DF is allowed in ip_off */ + if (h->ip_off & ~htons(IP_DF)) { + u_int16_t ipoff = h->ip_off; + + h->ip_off &= htons(IP_DF); + h->ip_sum = pf_cksum_fixup(h->ip_sum, ipoff, h->ip_off, 0); + } + + /* Enforce a minimum ttl, may cause endless packet loops */ + if (r->min_ttl && h->ip_ttl < r->min_ttl) { + u_int16_t ip_ttl = h->ip_ttl; + + h->ip_ttl = r->min_ttl; + h->ip_sum = pf_cksum_fixup(h->ip_sum, ip_ttl, h->ip_ttl, 0); + } +#if RANDOM_IP_ID + if (r->rule_flag & PFRULE_RANDOMID) { + u_int16_t ip_id = h->ip_id; + + h->ip_id = ip_randomid(); + h->ip_sum = pf_cksum_fixup(h->ip_sum, ip_id, h->ip_id, 0); + } +#endif /* RANDOM_IP_ID */ + if ((r->rule_flag & (PFRULE_FRAGCROP|PFRULE_FRAGDROP)) == 0) + pd->flags |= PFDESC_IP_REAS; + + return (PF_PASS); + +fragment_pass: + /* Enforce a minimum ttl, may cause endless packet loops */ + if (r->min_ttl && h->ip_ttl < r->min_ttl) { + u_int16_t ip_ttl = h->ip_ttl; + + h->ip_ttl = r->min_ttl; + h->ip_sum = pf_cksum_fixup(h->ip_sum, ip_ttl, h->ip_ttl, 0); + } + if ((r->rule_flag & (PFRULE_FRAGCROP|PFRULE_FRAGDROP)) == 0) + pd->flags |= PFDESC_IP_REAS; + return (PF_PASS); + +no_mem: + REASON_SET(reason, PFRES_MEMORY); + if (r != NULL && r->log) + PFLOG_PACKET(kif, h, m, AF_INET, dir, *reason, r, + NULL, NULL, pd); + return (PF_DROP); + +drop: + REASON_SET(reason, PFRES_NORM); + if (r != NULL && r->log) + PFLOG_PACKET(kif, h, m, AF_INET, dir, *reason, r, + NULL, NULL, pd); + return (PF_DROP); + +bad: + DPFPRINTF(("dropping bad fragment\n")); + + /* Free associated fragments */ + if (frag != NULL) + pf_free_fragment(frag); + + REASON_SET(reason, PFRES_FRAG); + if (r != NULL && r->log) + PFLOG_PACKET(kif, h, m, AF_INET, dir, *reason, r, NULL, NULL, pd); + + return (PF_DROP); +} + +#if INET6 +int +pf_normalize_ip6(struct mbuf **m0, int dir, struct pfi_kif *kif, + u_short *reason, struct pf_pdesc *pd) +{ + struct mbuf *m = *m0; + struct pf_rule *r; + struct ip6_hdr *h = mtod(m, struct ip6_hdr *); + int off; + struct ip6_ext ext; +/* adi XXX */ +#if 0 + struct ip6_opt opt; + struct ip6_opt_jumbo jumbo; + int optend; + int ooff; +#endif + struct ip6_frag frag; + u_int32_t jumbolen = 0, plen; + u_int16_t fragoff = 0; + u_int8_t proto; + int terminal; + + r = TAILQ_FIRST(pf_main_ruleset.rules[PF_RULESET_SCRUB].active.ptr); + while (r != NULL) { + r->evaluations++; + if (pfi_kif_match(r->kif, kif) == r->ifnot) + r = r->skip[PF_SKIP_IFP].ptr; + else if (r->direction && r->direction != dir) + r = r->skip[PF_SKIP_DIR].ptr; + else if (r->af && r->af != AF_INET6) + r = r->skip[PF_SKIP_AF].ptr; +#if 0 /* header chain! */ + else if (r->proto && r->proto != h->ip6_nxt) + r = r->skip[PF_SKIP_PROTO].ptr; +#endif + else if (PF_MISMATCHAW(&r->src.addr, + (struct pf_addr *)&h->ip6_src, AF_INET6, + r->src.neg, kif)) + r = r->skip[PF_SKIP_SRC_ADDR].ptr; + else if (PF_MISMATCHAW(&r->dst.addr, + (struct pf_addr *)&h->ip6_dst, AF_INET6, + r->dst.neg, NULL)) + r = r->skip[PF_SKIP_DST_ADDR].ptr; + else + break; + } + + if (r == NULL || r->action == PF_NOSCRUB) + return (PF_PASS); + else { + r->packets[dir == PF_OUT]++; + r->bytes[dir == PF_OUT] += pd->tot_len; + } + + /* Check for illegal packets */ + if ((int)(sizeof (struct ip6_hdr) + IPV6_MAXPACKET) < m->m_pkthdr.len) + goto drop; + + off = sizeof (struct ip6_hdr); + proto = h->ip6_nxt; + terminal = 0; + do { + switch (proto) { + case IPPROTO_FRAGMENT: + goto fragment; + break; + case IPPROTO_AH: + case IPPROTO_ROUTING: + case IPPROTO_DSTOPTS: + if (!pf_pull_hdr(m, off, &ext, sizeof (ext), NULL, + NULL, AF_INET6)) + goto shortpkt; +#ifndef NO_APPLE_EXTENSIONS + /* + * + * Routing header type zero considered harmful. + */ + if (proto == IPPROTO_ROUTING) { + const struct ip6_rthdr *rh = + (const struct ip6_rthdr *)&ext; + if (rh->ip6r_type == IPV6_RTHDR_TYPE_0) + goto drop; + } + else +#endif + if (proto == IPPROTO_AH) + off += (ext.ip6e_len + 2) * 4; + else + off += (ext.ip6e_len + 1) * 8; + proto = ext.ip6e_nxt; + break; + case IPPROTO_HOPOPTS: +/* adi XXX */ +#if 0 + if (!pf_pull_hdr(m, off, &ext, sizeof (ext), NULL, + NULL, AF_INET6)) + goto shortpkt; + optend = off + (ext.ip6e_len + 1) * 8; + ooff = off + sizeof (ext); + do { + if (!pf_pull_hdr(m, ooff, &opt.ip6o_type, + sizeof (opt.ip6o_type), NULL, NULL, + AF_INET6)) + goto shortpkt; + if (opt.ip6o_type == IP6OPT_PAD1) { + ooff++; + continue; + } + if (!pf_pull_hdr(m, ooff, &opt, sizeof (opt), + NULL, NULL, AF_INET6)) + goto shortpkt; + if (ooff + sizeof (opt) + opt.ip6o_len > optend) + goto drop; + switch (opt.ip6o_type) { + case IP6OPT_JUMBO: + if (h->ip6_plen != 0) + goto drop; + if (!pf_pull_hdr(m, ooff, &jumbo, + sizeof (jumbo), NULL, NULL, + AF_INET6)) + goto shortpkt; + memcpy(&jumbolen, jumbo.ip6oj_jumbo_len, + sizeof (jumbolen)); + jumbolen = ntohl(jumbolen); + if (jumbolen <= IPV6_MAXPACKET) + goto drop; + if (sizeof (struct ip6_hdr) + + jumbolen != m->m_pkthdr.len) + goto drop; + break; + default: + break; + } + ooff += sizeof (opt) + opt.ip6o_len; + } while (ooff < optend); + + off = optend; + proto = ext.ip6e_nxt; + break; +#endif + default: + terminal = 1; + break; + } + } while (!terminal); + + /* jumbo payload option must be present, or plen > 0 */ + if (ntohs(h->ip6_plen) == 0) + plen = jumbolen; + else + plen = ntohs(h->ip6_plen); + if (plen == 0) + goto drop; + if ((int)(sizeof (struct ip6_hdr) + plen) > m->m_pkthdr.len) + goto shortpkt; + + /* Enforce a minimum ttl, may cause endless packet loops */ + if (r->min_ttl && h->ip6_hlim < r->min_ttl) + h->ip6_hlim = r->min_ttl; + + return (PF_PASS); + +fragment: + if (ntohs(h->ip6_plen) == 0 || jumbolen) + goto drop; + plen = ntohs(h->ip6_plen); + + if (!pf_pull_hdr(m, off, &frag, sizeof (frag), NULL, NULL, AF_INET6)) + goto shortpkt; + fragoff = ntohs(frag.ip6f_offlg & IP6F_OFF_MASK); + if (fragoff + (plen - off - sizeof (frag)) > IPV6_MAXPACKET) + goto badfrag; + + /* do something about it */ + /* remember to set pd->flags |= PFDESC_IP_REAS */ + return (PF_PASS); + +shortpkt: + REASON_SET(reason, PFRES_SHORT); + if (r != NULL && r->log) + PFLOG_PACKET(kif, h, m, AF_INET6, dir, *reason, r, + NULL, NULL, pd); + return (PF_DROP); + +drop: + REASON_SET(reason, PFRES_NORM); + if (r != NULL && r->log) + PFLOG_PACKET(kif, h, m, AF_INET6, dir, *reason, r, + NULL, NULL, pd); + return (PF_DROP); + +badfrag: + REASON_SET(reason, PFRES_FRAG); + if (r != NULL && r->log) + PFLOG_PACKET(kif, h, m, AF_INET6, dir, *reason, r, + NULL, NULL, pd); + return (PF_DROP); +} +#endif /* INET6 */ + +int +pf_normalize_tcp(int dir, struct pfi_kif *kif, struct mbuf *m, int ipoff, + int off, void *h, struct pf_pdesc *pd) +{ +#pragma unused(ipoff, h) + struct pf_rule *r, *rm = NULL; + struct tcphdr *th = pd->hdr.tcp; + int rewrite = 0; + u_short reason; + u_int8_t flags; + sa_family_t af = pd->af; +#ifndef NO_APPLE_EXTENSIONS + union pf_state_xport sxport, dxport; + + sxport.port = th->th_sport; + dxport.port = th->th_dport; +#endif + + r = TAILQ_FIRST(pf_main_ruleset.rules[PF_RULESET_SCRUB].active.ptr); + while (r != NULL) { + r->evaluations++; + if (pfi_kif_match(r->kif, kif) == r->ifnot) + r = r->skip[PF_SKIP_IFP].ptr; + else if (r->direction && r->direction != dir) + r = r->skip[PF_SKIP_DIR].ptr; + else if (r->af && r->af != af) + r = r->skip[PF_SKIP_AF].ptr; + else if (r->proto && r->proto != pd->proto) + r = r->skip[PF_SKIP_PROTO].ptr; + else if (PF_MISMATCHAW(&r->src.addr, pd->src, af, + r->src.neg, kif)) + r = r->skip[PF_SKIP_SRC_ADDR].ptr; +#ifndef NO_APPLE_EXTENSIONS + else if (r->src.xport.range.op && + !pf_match_xport(r->src.xport.range.op, r->proto_variant, + &r->src.xport, &sxport)) +#else + else if (r->src.port_op && !pf_match_port(r->src.port_op, + r->src.port[0], r->src.port[1], th->th_sport)) +#endif + r = r->skip[PF_SKIP_SRC_PORT].ptr; + else if (PF_MISMATCHAW(&r->dst.addr, pd->dst, af, + r->dst.neg, NULL)) + r = r->skip[PF_SKIP_DST_ADDR].ptr; +#ifndef NO_APPLE_EXTENSIONS + else if (r->dst.xport.range.op && + !pf_match_xport(r->dst.xport.range.op, r->proto_variant, + &r->dst.xport, &dxport)) +#else + else if (r->dst.port_op && !pf_match_port(r->dst.port_op, + r->dst.port[0], r->dst.port[1], th->th_dport)) +#endif + r = r->skip[PF_SKIP_DST_PORT].ptr; + else if (r->os_fingerprint != PF_OSFP_ANY && + !pf_osfp_match(pf_osfp_fingerprint(pd, m, off, th), + r->os_fingerprint)) + r = TAILQ_NEXT(r, entries); + else { + rm = r; + break; + } + } + + if (rm == NULL || rm->action == PF_NOSCRUB) + return (PF_PASS); + else { + r->packets[dir == PF_OUT]++; + r->bytes[dir == PF_OUT] += pd->tot_len; + } + + if (rm->rule_flag & PFRULE_REASSEMBLE_TCP) + pd->flags |= PFDESC_TCP_NORM; + + flags = th->th_flags; + if (flags & TH_SYN) { + /* Illegal packet */ + if (flags & TH_RST) + goto tcp_drop; + + if (flags & TH_FIN) + flags &= ~TH_FIN; + } else { + /* Illegal packet */ + if (!(flags & (TH_ACK|TH_RST))) + goto tcp_drop; + } + + if (!(flags & TH_ACK)) { + /* These flags are only valid if ACK is set */ + if ((flags & TH_FIN) || (flags & TH_PUSH) || (flags & TH_URG)) + goto tcp_drop; + } + + /* Check for illegal header length */ + if (th->th_off < (sizeof (struct tcphdr) >> 2)) + goto tcp_drop; + + /* If flags changed, or reserved data set, then adjust */ + if (flags != th->th_flags || th->th_x2 != 0) { + u_int16_t ov, nv; + + ov = *(u_int16_t *)(&th->th_ack + 1); + th->th_flags = flags; + th->th_x2 = 0; + nv = *(u_int16_t *)(&th->th_ack + 1); + + th->th_sum = pf_cksum_fixup(th->th_sum, ov, nv, 0); + rewrite = 1; + } + + /* Remove urgent pointer, if TH_URG is not set */ + if (!(flags & TH_URG) && th->th_urp) { + th->th_sum = pf_cksum_fixup(th->th_sum, th->th_urp, 0, 0); + th->th_urp = 0; + rewrite = 1; + } + + /* copy back packet headers if we sanitized */ +#ifndef NO_APPLE_EXTENSIONS + /* Process options */ + if (r->max_mss) { + int rv = pf_normalize_tcpopt(r, dir, kif, pd, m, th, off, + &rewrite); + if (rv == PF_DROP) + return rv; + m = pd->mp; + } + + if (rewrite) { + struct mbuf *mw = pf_lazy_makewritable(pd, m, + off + sizeof (*th)); + if (!mw) { + REASON_SET(&reason, PFRES_MEMORY); + if (r->log) + PFLOG_PACKET(kif, h, m, AF_INET, dir, reason, + r, 0, 0, pd); + return PF_DROP; + } + + m_copyback(mw, off, sizeof (*th), th); + } +#else + /* Process options */ + if (r->max_mss && pf_normalize_tcpopt(r, m, th, off, pd->af)) + rewrite = 1; + + if (rewrite) + m_copyback(m, off, sizeof (*th), th); +#endif + + return (PF_PASS); + +tcp_drop: + REASON_SET(&reason, PFRES_NORM); + if (rm != NULL && r->log) + PFLOG_PACKET(kif, h, m, AF_INET, dir, reason, r, NULL, NULL, pd); + return (PF_DROP); +} + +int +pf_normalize_tcp_init(struct mbuf *m, int off, struct pf_pdesc *pd, + struct tcphdr *th, struct pf_state_peer *src, struct pf_state_peer *dst) +{ +#pragma unused(dst) + u_int32_t tsval, tsecr; + u_int8_t hdr[60]; + u_int8_t *opt; + + VERIFY(src->scrub == NULL); + + src->scrub = pool_get(&pf_state_scrub_pl, PR_NOWAIT); + if (src->scrub == NULL) + return (1); + bzero(src->scrub, sizeof (*src->scrub)); + + switch (pd->af) { +#if INET + case AF_INET: { + struct ip *h = mtod(m, struct ip *); + src->scrub->pfss_ttl = h->ip_ttl; + break; + } +#endif /* INET */ +#if INET6 + case AF_INET6: { + struct ip6_hdr *h = mtod(m, struct ip6_hdr *); + src->scrub->pfss_ttl = h->ip6_hlim; + break; + } +#endif /* INET6 */ + } + + + /* + * All normalizations below are only begun if we see the start of + * the connections. They must all set an enabled bit in pfss_flags + */ + if ((th->th_flags & TH_SYN) == 0) + return (0); + + + if (th->th_off > (sizeof (struct tcphdr) >> 2) && src->scrub && + pf_pull_hdr(m, off, hdr, th->th_off << 2, NULL, NULL, pd->af)) { + /* Diddle with TCP options */ + int hlen; + opt = hdr + sizeof (struct tcphdr); + hlen = (th->th_off << 2) - sizeof (struct tcphdr); + while (hlen >= TCPOLEN_TIMESTAMP) { + switch (*opt) { + case TCPOPT_EOL: /* FALLTHROUGH */ + case TCPOPT_NOP: + opt++; + hlen--; + break; + case TCPOPT_TIMESTAMP: + if (opt[1] >= TCPOLEN_TIMESTAMP) { + src->scrub->pfss_flags |= + PFSS_TIMESTAMP; + src->scrub->pfss_ts_mod = + htonl(random()); + + /* note PFSS_PAWS not set yet */ + memcpy(&tsval, &opt[2], + sizeof (u_int32_t)); + memcpy(&tsecr, &opt[6], + sizeof (u_int32_t)); + src->scrub->pfss_tsval0 = ntohl(tsval); + src->scrub->pfss_tsval = ntohl(tsval); + src->scrub->pfss_tsecr = ntohl(tsecr); + getmicrouptime(&src->scrub->pfss_last); + } + /* FALLTHROUGH */ + default: + hlen -= MAX(opt[1], 2); + opt += MAX(opt[1], 2); + break; + } + } + } + + return (0); +} + +void +pf_normalize_tcp_cleanup(struct pf_state *state) +{ + if (state->src.scrub) + pool_put(&pf_state_scrub_pl, state->src.scrub); + if (state->dst.scrub) + pool_put(&pf_state_scrub_pl, state->dst.scrub); + + /* Someday... flush the TCP segment reassembly descriptors. */ +} + +int +pf_normalize_tcp_stateful(struct mbuf *m, int off, struct pf_pdesc *pd, + u_short *reason, struct tcphdr *th, struct pf_state *state, + struct pf_state_peer *src, struct pf_state_peer *dst, int *writeback) +{ + struct timeval uptime; + u_int32_t tsval, tsecr; + u_int tsval_from_last; + u_int8_t hdr[60]; + u_int8_t *opt; + int copyback = 0; + int got_ts = 0; + + VERIFY(src->scrub || dst->scrub); + + /* + * Enforce the minimum TTL seen for this connection. Negate a common + * technique to evade an intrusion detection system and confuse + * firewall state code. + */ + switch (pd->af) { +#if INET + case AF_INET: { + if (src->scrub) { + struct ip *h = mtod(m, struct ip *); + if (h->ip_ttl > src->scrub->pfss_ttl) + src->scrub->pfss_ttl = h->ip_ttl; + h->ip_ttl = src->scrub->pfss_ttl; + } + break; + } +#endif /* INET */ +#if INET6 + case AF_INET6: { + if (src->scrub) { + struct ip6_hdr *h = mtod(m, struct ip6_hdr *); + if (h->ip6_hlim > src->scrub->pfss_ttl) + src->scrub->pfss_ttl = h->ip6_hlim; + h->ip6_hlim = src->scrub->pfss_ttl; + } + break; + } +#endif /* INET6 */ + } + + if (th->th_off > (sizeof (struct tcphdr) >> 2) && + ((src->scrub && (src->scrub->pfss_flags & PFSS_TIMESTAMP)) || + (dst->scrub && (dst->scrub->pfss_flags & PFSS_TIMESTAMP))) && + pf_pull_hdr(m, off, hdr, th->th_off << 2, NULL, NULL, pd->af)) { + /* Diddle with TCP options */ + int hlen; + opt = hdr + sizeof (struct tcphdr); + hlen = (th->th_off << 2) - sizeof (struct tcphdr); + while (hlen >= TCPOLEN_TIMESTAMP) { + switch (*opt) { + case TCPOPT_EOL: /* FALLTHROUGH */ + case TCPOPT_NOP: + opt++; + hlen--; + break; + case TCPOPT_TIMESTAMP: + /* + * Modulate the timestamps. Can be used for + * NAT detection, OS uptime determination or + * reboot detection. + */ + + if (got_ts) { + /* Huh? Multiple timestamps!? */ + if (pf_status.debug >= PF_DEBUG_MISC) { + DPFPRINTF(("multiple TS??")); + pf_print_state(state); + printf("\n"); + } + REASON_SET(reason, PFRES_TS); + return (PF_DROP); + } + if (opt[1] >= TCPOLEN_TIMESTAMP) { + memcpy(&tsval, &opt[2], + sizeof (u_int32_t)); + if (tsval && src->scrub && + (src->scrub->pfss_flags & + PFSS_TIMESTAMP)) { + tsval = ntohl(tsval); + pf_change_a(&opt[2], + &th->th_sum, + htonl(tsval + + src->scrub->pfss_ts_mod), + 0); + copyback = 1; + } + + /* Modulate TS reply iff valid (!0) */ + memcpy(&tsecr, &opt[6], + sizeof (u_int32_t)); + if (tsecr && dst->scrub && + (dst->scrub->pfss_flags & + PFSS_TIMESTAMP)) { + tsecr = ntohl(tsecr) + - dst->scrub->pfss_ts_mod; + pf_change_a(&opt[6], + &th->th_sum, htonl(tsecr), + 0); + copyback = 1; + } + got_ts = 1; + } + /* FALLTHROUGH */ + default: + hlen -= MAX(opt[1], 2); + opt += MAX(opt[1], 2); + break; + } + } + if (copyback) { + /* Copyback the options, caller copys back header */ +#ifndef NO_APPLE_EXTENSIONS + int optoff = off + sizeof (*th); + int optlen = (th->th_off << 2) - sizeof (*th); + m = pf_lazy_makewritable(pd, m, optoff + optlen); + if (!m) { + REASON_SET(reason, PFRES_MEMORY); + return PF_DROP; + } + *writeback = optoff + optlen; + m_copyback(m, optoff, optlen, hdr + sizeof (*th)); +#else + *writeback = 1; + m_copyback(m, off + sizeof (struct tcphdr), + (th->th_off << 2) - sizeof (struct tcphdr), hdr + + sizeof (struct tcphdr)); +#endif + } + } + + + /* + * Must invalidate PAWS checks on connections idle for too long. + * The fastest allowed timestamp clock is 1ms. That turns out to + * be about 24 days before it wraps. XXX Right now our lowerbound + * TS echo check only works for the first 12 days of a connection + * when the TS has exhausted half its 32bit space + */ +#define TS_MAX_IDLE (24*24*60*60) +#define TS_MAX_CONN (12*24*60*60) /* XXX remove when better tsecr check */ + + getmicrouptime(&uptime); + if (src->scrub && (src->scrub->pfss_flags & PFSS_PAWS) && + (uptime.tv_sec - src->scrub->pfss_last.tv_sec > TS_MAX_IDLE || + pf_time_second() - state->creation > TS_MAX_CONN)) { + if (pf_status.debug >= PF_DEBUG_MISC) { + DPFPRINTF(("src idled out of PAWS\n")); + pf_print_state(state); + printf("\n"); + } + src->scrub->pfss_flags = (src->scrub->pfss_flags & ~PFSS_PAWS) + | PFSS_PAWS_IDLED; + } + if (dst->scrub && (dst->scrub->pfss_flags & PFSS_PAWS) && + uptime.tv_sec - dst->scrub->pfss_last.tv_sec > TS_MAX_IDLE) { + if (pf_status.debug >= PF_DEBUG_MISC) { + DPFPRINTF(("dst idled out of PAWS\n")); + pf_print_state(state); + printf("\n"); + } + dst->scrub->pfss_flags = (dst->scrub->pfss_flags & ~PFSS_PAWS) + | PFSS_PAWS_IDLED; + } + + if (got_ts && src->scrub && dst->scrub && + (src->scrub->pfss_flags & PFSS_PAWS) && + (dst->scrub->pfss_flags & PFSS_PAWS)) { + /* + * Validate that the timestamps are "in-window". + * RFC1323 describes TCP Timestamp options that allow + * measurement of RTT (round trip time) and PAWS + * (protection against wrapped sequence numbers). PAWS + * gives us a set of rules for rejecting packets on + * long fat pipes (packets that were somehow delayed + * in transit longer than the time it took to send the + * full TCP sequence space of 4Gb). We can use these + * rules and infer a few others that will let us treat + * the 32bit timestamp and the 32bit echoed timestamp + * as sequence numbers to prevent a blind attacker from + * inserting packets into a connection. + * + * RFC1323 tells us: + * - The timestamp on this packet must be greater than + * or equal to the last value echoed by the other + * endpoint. The RFC says those will be discarded + * since it is a dup that has already been acked. + * This gives us a lowerbound on the timestamp. + * timestamp >= other last echoed timestamp + * - The timestamp will be less than or equal to + * the last timestamp plus the time between the + * last packet and now. The RFC defines the max + * clock rate as 1ms. We will allow clocks to be + * up to 10% fast and will allow a total difference + * or 30 seconds due to a route change. And this + * gives us an upperbound on the timestamp. + * timestamp <= last timestamp + max ticks + * We have to be careful here. Windows will send an + * initial timestamp of zero and then initialize it + * to a random value after the 3whs; presumably to + * avoid a DoS by having to call an expensive RNG + * during a SYN flood. Proof MS has at least one + * good security geek. + * + * - The TCP timestamp option must also echo the other + * endpoints timestamp. The timestamp echoed is the + * one carried on the earliest unacknowledged segment + * on the left edge of the sequence window. The RFC + * states that the host will reject any echoed + * timestamps that were larger than any ever sent. + * This gives us an upperbound on the TS echo. + * tescr <= largest_tsval + * - The lowerbound on the TS echo is a little more + * tricky to determine. The other endpoint's echoed + * values will not decrease. But there may be + * network conditions that re-order packets and + * cause our view of them to decrease. For now the + * only lowerbound we can safely determine is that + * the TS echo will never be less than the original + * TS. XXX There is probably a better lowerbound. + * Remove TS_MAX_CONN with better lowerbound check. + * tescr >= other original TS + * + * It is also important to note that the fastest + * timestamp clock of 1ms will wrap its 32bit space in + * 24 days. So we just disable TS checking after 24 + * days of idle time. We actually must use a 12d + * connection limit until we can come up with a better + * lowerbound to the TS echo check. + */ + struct timeval delta_ts; + int ts_fudge; + + + /* + * PFTM_TS_DIFF is how many seconds of leeway to allow + * a host's timestamp. This can happen if the previous + * packet got delayed in transit for much longer than + * this packet. + */ + if ((ts_fudge = state->rule.ptr->timeout[PFTM_TS_DIFF]) == 0) + ts_fudge = pf_default_rule.timeout[PFTM_TS_DIFF]; + + + /* Calculate max ticks since the last timestamp */ +#define TS_MAXFREQ 1100 /* RFC max TS freq of 1Khz + 10% skew */ +#define TS_MICROSECS 1000000 /* microseconds per second */ + timersub(&uptime, &src->scrub->pfss_last, &delta_ts); + tsval_from_last = (delta_ts.tv_sec + ts_fudge) * TS_MAXFREQ; + tsval_from_last += delta_ts.tv_usec / (TS_MICROSECS/TS_MAXFREQ); + + + if ((src->state >= TCPS_ESTABLISHED && + dst->state >= TCPS_ESTABLISHED) && + (SEQ_LT(tsval, dst->scrub->pfss_tsecr) || + SEQ_GT(tsval, src->scrub->pfss_tsval + tsval_from_last) || + (tsecr && (SEQ_GT(tsecr, dst->scrub->pfss_tsval) || + SEQ_LT(tsecr, dst->scrub->pfss_tsval0))))) { + /* + * Bad RFC1323 implementation or an insertion attack. + * + * - Solaris 2.6 and 2.7 are known to send another ACK + * after the FIN,FIN|ACK,ACK closing that carries + * an old timestamp. + */ + + DPFPRINTF(("Timestamp failed %c%c%c%c\n", + SEQ_LT(tsval, dst->scrub->pfss_tsecr) ? '0' : ' ', + SEQ_GT(tsval, src->scrub->pfss_tsval + + tsval_from_last) ? '1' : ' ', + SEQ_GT(tsecr, dst->scrub->pfss_tsval) ? '2' : ' ', + SEQ_LT(tsecr, dst->scrub->pfss_tsval0)? '3' : ' ')); + DPFPRINTF((" tsval: %u tsecr: %u +ticks: %u " + "idle: %lus %ums\n", + tsval, tsecr, tsval_from_last, delta_ts.tv_sec, + delta_ts.tv_usec / 1000)); + DPFPRINTF((" src->tsval: %u tsecr: %u\n", + src->scrub->pfss_tsval, src->scrub->pfss_tsecr)); + DPFPRINTF((" dst->tsval: %u tsecr: %u tsval0: %u\n", + dst->scrub->pfss_tsval, dst->scrub->pfss_tsecr, + dst->scrub->pfss_tsval0)); + if (pf_status.debug >= PF_DEBUG_MISC) { + pf_print_state(state); + pf_print_flags(th->th_flags); + printf("\n"); + } + REASON_SET(reason, PFRES_TS); + return (PF_DROP); + } + + /* XXX I'd really like to require tsecr but it's optional */ + + } else if (!got_ts && (th->th_flags & TH_RST) == 0 && + ((src->state == TCPS_ESTABLISHED && dst->state == TCPS_ESTABLISHED) + || pd->p_len > 0 || (th->th_flags & TH_SYN)) && + src->scrub && dst->scrub && + (src->scrub->pfss_flags & PFSS_PAWS) && + (dst->scrub->pfss_flags & PFSS_PAWS)) { + /* + * Didn't send a timestamp. Timestamps aren't really useful + * when: + * - connection opening or closing (often not even sent). + * but we must not let an attacker to put a FIN on a + * data packet to sneak it through our ESTABLISHED check. + * - on a TCP reset. RFC suggests not even looking at TS. + * - on an empty ACK. The TS will not be echoed so it will + * probably not help keep the RTT calculation in sync and + * there isn't as much danger when the sequence numbers + * got wrapped. So some stacks don't include TS on empty + * ACKs :-( + * + * To minimize the disruption to mostly RFC1323 conformant + * stacks, we will only require timestamps on data packets. + * + * And what do ya know, we cannot require timestamps on data + * packets. There appear to be devices that do legitimate + * TCP connection hijacking. There are HTTP devices that allow + * a 3whs (with timestamps) and then buffer the HTTP request. + * If the intermediate device has the HTTP response cache, it + * will spoof the response but not bother timestamping its + * packets. So we can look for the presence of a timestamp in + * the first data packet and if there, require it in all future + * packets. + */ + + if (pd->p_len > 0 && (src->scrub->pfss_flags & PFSS_DATA_TS)) { + /* + * Hey! Someone tried to sneak a packet in. Or the + * stack changed its RFC1323 behavior?!?! + */ + if (pf_status.debug >= PF_DEBUG_MISC) { + DPFPRINTF(("Did not receive expected RFC1323 " + "timestamp\n")); + pf_print_state(state); + pf_print_flags(th->th_flags); + printf("\n"); + } + REASON_SET(reason, PFRES_TS); + return (PF_DROP); + } + } + + + /* + * We will note if a host sends his data packets with or without + * timestamps. And require all data packets to contain a timestamp + * if the first does. PAWS implicitly requires that all data packets be + * timestamped. But I think there are middle-man devices that hijack + * TCP streams immediately after the 3whs and don't timestamp their + * packets (seen in a WWW accelerator or cache). + */ + if (pd->p_len > 0 && src->scrub && (src->scrub->pfss_flags & + (PFSS_TIMESTAMP|PFSS_DATA_TS|PFSS_DATA_NOTS)) == PFSS_TIMESTAMP) { + if (got_ts) + src->scrub->pfss_flags |= PFSS_DATA_TS; + else { + src->scrub->pfss_flags |= PFSS_DATA_NOTS; + if (pf_status.debug >= PF_DEBUG_MISC && dst->scrub && + (dst->scrub->pfss_flags & PFSS_TIMESTAMP)) { + /* Don't warn if other host rejected RFC1323 */ + DPFPRINTF(("Broken RFC1323 stack did not " + "timestamp data packet. Disabled PAWS " + "security.\n")); + pf_print_state(state); + pf_print_flags(th->th_flags); + printf("\n"); + } + } + } + + + /* + * Update PAWS values + */ + if (got_ts && src->scrub && PFSS_TIMESTAMP == (src->scrub->pfss_flags & + (PFSS_PAWS_IDLED|PFSS_TIMESTAMP))) { + getmicrouptime(&src->scrub->pfss_last); + if (SEQ_GEQ(tsval, src->scrub->pfss_tsval) || + (src->scrub->pfss_flags & PFSS_PAWS) == 0) + src->scrub->pfss_tsval = tsval; + + if (tsecr) { + if (SEQ_GEQ(tsecr, src->scrub->pfss_tsecr) || + (src->scrub->pfss_flags & PFSS_PAWS) == 0) + src->scrub->pfss_tsecr = tsecr; + + if ((src->scrub->pfss_flags & PFSS_PAWS) == 0 && + (SEQ_LT(tsval, src->scrub->pfss_tsval0) || + src->scrub->pfss_tsval0 == 0)) { + /* tsval0 MUST be the lowest timestamp */ + src->scrub->pfss_tsval0 = tsval; + } + + /* Only fully initialized after a TS gets echoed */ + if ((src->scrub->pfss_flags & PFSS_PAWS) == 0) + src->scrub->pfss_flags |= PFSS_PAWS; + } + } + + /* I have a dream.... TCP segment reassembly.... */ + return (0); +} + +#ifndef NO_APPLE_EXTENSIONS +static int +pf_normalize_tcpopt(struct pf_rule *r, int dir, struct pfi_kif *kif, + struct pf_pdesc *pd, struct mbuf *m, struct tcphdr *th, int off, + int *rewrptr) +{ +#pragma unused(dir, kif) + sa_family_t af = pd->af; +#else +static int +pf_normalize_tcpopt(struct pf_rule *r, struct mbuf *m, struct tcphdr *th, + int off, sa_family_t af) +{ +#endif + u_int16_t *mss; + int thoff; + int opt, cnt, optlen = 0; + int rewrite = 0; + u_char opts[MAX_TCPOPTLEN]; + u_char *optp = opts; + + thoff = th->th_off << 2; + cnt = thoff - sizeof (struct tcphdr); + +#ifndef NO_APPLE_MODIFICATIONS + if (cnt > 0 && !pf_pull_hdr(m, off + sizeof (*th), opts, cnt, + NULL, NULL, af)) + return PF_DROP; +#else + if (cnt > 0 && !pf_pull_hdr(m, off + sizeof (*th), opts, cnt, + NULL, NULL, af)) + return (rewrite); +#endif + + for (; cnt > 0; cnt -= optlen, optp += optlen) { + opt = optp[0]; + if (opt == TCPOPT_EOL) + break; + if (opt == TCPOPT_NOP) + optlen = 1; + else { + if (cnt < 2) + break; + optlen = optp[1]; + if (optlen < 2 || optlen > cnt) + break; + } + switch (opt) { + case TCPOPT_MAXSEG: + mss = (u_int16_t *)(optp + 2); + if ((ntohs(*mss)) > r->max_mss) { +#ifndef NO_APPLE_MODIFICATIONS + /* + * + * Only do the TCP checksum fixup if delayed + * checksum calculation will not be performed. + */ + if (m->m_pkthdr.rcvif || + !(m->m_pkthdr.csum_flags & CSUM_TCP)) + th->th_sum = pf_cksum_fixup(th->th_sum, + *mss, htons(r->max_mss), 0); +#else + th->th_sum = pf_cksum_fixup(th->th_sum, + *mss, htons(r->max_mss), 0); +#endif + *mss = htons(r->max_mss); + rewrite = 1; + } + break; + default: + break; + } + } + +#ifndef NO_APPLE_MODIFICATIONS + if (rewrite) { + struct mbuf *mw; + u_short reason; + + mw = pf_lazy_makewritable(pd, pd->mp, + off + sizeof (*th) + thoff); + if (!mw) { + REASON_SET(&reason, PFRES_MEMORY); + if (r->log) + PFLOG_PACKET(kif, h, m, AF_INET, dir, reason, + r, 0, 0, pd); + return PF_DROP; + } + + *rewrptr = 1; + m_copyback(mw, off + sizeof (*th), thoff - sizeof (*th), opts); + } + + return PF_PASS; +#else + if (rewrite) + m_copyback(m, off + sizeof (*th), thoff - sizeof (*th), opts); + + return (rewrite); +#endif +} diff --git a/bsd/net/pf_osfp.c b/bsd/net/pf_osfp.c new file mode 100644 index 000000000..fcd823cfa --- /dev/null +++ b/bsd/net/pf_osfp.c @@ -0,0 +1,576 @@ +/* + * Copyright (c) 2008 Apple Inc. All rights reserved. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. The rights granted to you under the License + * may not be used to create, or enable the creation or redistribution of, + * unlawful or unlicensed copies of an Apple operating system, or to + * circumvent, violate, or enable the circumvention or violation of, any + * terms of an Apple operating system software license agreement. + * + * Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ + */ + +/* $apfw: pf_osfp.c,v 1.4 2008/08/27 00:01:32 jhw Exp $ */ +/* $OpenBSD: pf_osfp.c,v 1.12 2006/12/13 18:14:10 itojun Exp $ */ + +/* + * Copyright (c) 2003 Mike Frantzen + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + * + */ + +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +#include +#include + +#if INET6 +#include +#include +#endif /* INET6 */ + +#define DPFPRINTF(format, x...) \ + if (pf_status.debug >= PF_DEBUG_NOISY) \ + printf(format, ##x) + +static SLIST_HEAD(pf_osfp_list, pf_os_fingerprint) pf_osfp_list; +static struct pool pf_osfp_entry_pl; +static struct pool pf_osfp_pl; + +static struct pf_os_fingerprint *pf_osfp_find(struct pf_osfp_list *, + struct pf_os_fingerprint *, u_int8_t); +static struct pf_os_fingerprint *pf_osfp_find_exact(struct pf_osfp_list *, + struct pf_os_fingerprint *); +static void pf_osfp_insert(struct pf_osfp_list *, struct pf_os_fingerprint *); + + +/* + * Passively fingerprint the OS of the host (IPv4 TCP SYN packets only) + * Returns the list of possible OSes. + */ +struct pf_osfp_enlist * +pf_osfp_fingerprint(struct pf_pdesc *pd, struct mbuf *m, int off, + const struct tcphdr *tcp) +{ + struct ip *ip; + struct ip6_hdr *ip6; + char hdr[60]; + + if ((pd->af != PF_INET && pd->af != PF_INET6) || + pd->proto != IPPROTO_TCP || + (tcp->th_off << 2) < (int)sizeof (*tcp)) + return (NULL); + + if (pd->af == PF_INET) { + ip = mtod(m, struct ip *); + ip6 = (struct ip6_hdr *)NULL; + } else { + ip = (struct ip *)NULL; + ip6 = mtod(m, struct ip6_hdr *); + } + if (!pf_pull_hdr(m, off, hdr, tcp->th_off << 2, NULL, NULL, + pd->af)) + return (NULL); + + return (pf_osfp_fingerprint_hdr(ip, ip6, (struct tcphdr *)hdr)); +} + +struct pf_osfp_enlist * +pf_osfp_fingerprint_hdr(const struct ip *ip, const struct ip6_hdr *ip6, + const struct tcphdr *tcp) +{ +#if !INET6 +#pragma unused(ip6) +#endif /* !INET6 */ + struct pf_os_fingerprint fp, *fpresult; + int cnt, optlen = 0; + const u_int8_t *optp; + char srcname[128]; + + if ((tcp->th_flags & (TH_SYN|TH_ACK)) != TH_SYN) + return (NULL); + if (ip) { + if ((ip->ip_off & htons(IP_OFFMASK)) != 0) + return (NULL); + } + + memset(&fp, 0, sizeof (fp)); + + if (ip) { + fp.fp_psize = ntohs(ip->ip_len); + fp.fp_ttl = ip->ip_ttl; + if (ip->ip_off & htons(IP_DF)) + fp.fp_flags |= PF_OSFP_DF; + (void) inet_ntop(AF_INET, &ip->ip_src, srcname, + (socklen_t)sizeof (srcname)); + } +#if INET6 + else if (ip6) { + /* jumbo payload? */ + fp.fp_psize = sizeof (struct ip6_hdr) + ntohs(ip6->ip6_plen); + fp.fp_ttl = ip6->ip6_hlim; + fp.fp_flags |= PF_OSFP_DF; + fp.fp_flags |= PF_OSFP_INET6; + (void) inet_ntop(AF_INET6, &ip6->ip6_src, srcname, + (socklen_t)sizeof (srcname)); + } +#endif + else + return (NULL); + fp.fp_wsize = ntohs(tcp->th_win); + + + cnt = (tcp->th_off << 2) - sizeof (*tcp); + optp = (const u_int8_t *)((const char *)tcp + sizeof (*tcp)); + for (; cnt > 0; cnt -= optlen, optp += optlen) { + if (*optp == TCPOPT_EOL) + break; + + fp.fp_optcnt++; + if (*optp == TCPOPT_NOP) { + fp.fp_tcpopts = (fp.fp_tcpopts << PF_OSFP_TCPOPT_BITS) | + PF_OSFP_TCPOPT_NOP; + optlen = 1; + } else { + if (cnt < 2) + return (NULL); + optlen = optp[1]; + if (optlen > cnt || optlen < 2) + return (NULL); + switch (*optp) { + case TCPOPT_MAXSEG: + if (optlen >= TCPOLEN_MAXSEG) + memcpy(&fp.fp_mss, &optp[2], + sizeof (fp.fp_mss)); + fp.fp_tcpopts = (fp.fp_tcpopts << + PF_OSFP_TCPOPT_BITS) | PF_OSFP_TCPOPT_MSS; +#if BYTE_ORDER != BIG_ENDIAN + NTOHS(fp.fp_mss); +#endif + break; + case TCPOPT_WINDOW: + if (optlen >= TCPOLEN_WINDOW) + memcpy(&fp.fp_wscale, &optp[2], + sizeof (fp.fp_wscale)); +#if BYTE_ORDER != BIG_ENDIAN + NTOHS(fp.fp_wscale); +#endif + fp.fp_tcpopts = (fp.fp_tcpopts << + PF_OSFP_TCPOPT_BITS) | + PF_OSFP_TCPOPT_WSCALE; + break; + case TCPOPT_SACK_PERMITTED: + fp.fp_tcpopts = (fp.fp_tcpopts << + PF_OSFP_TCPOPT_BITS) | PF_OSFP_TCPOPT_SACK; + break; + case TCPOPT_TIMESTAMP: + if (optlen >= TCPOLEN_TIMESTAMP) { + u_int32_t ts; + memcpy(&ts, &optp[2], sizeof (ts)); + if (ts == 0) + fp.fp_flags |= PF_OSFP_TS0; + + } + fp.fp_tcpopts = (fp.fp_tcpopts << + PF_OSFP_TCPOPT_BITS) | PF_OSFP_TCPOPT_TS; + break; + default: + return (NULL); + } + } + optlen = MAX(optlen, 1); /* paranoia */ + } + + DPFPRINTF("fingerprinted %s:%d %d:%d:%d:%d:%llx (%d) " + "(TS=%s,M=%s%d,W=%s%d)\n", + srcname, ntohs(tcp->th_sport), + fp.fp_wsize, fp.fp_ttl, (fp.fp_flags & PF_OSFP_DF) != 0, + fp.fp_psize, (long long int)fp.fp_tcpopts, fp.fp_optcnt, + (fp.fp_flags & PF_OSFP_TS0) ? "0" : "", + (fp.fp_flags & PF_OSFP_MSS_MOD) ? "%" : + (fp.fp_flags & PF_OSFP_MSS_DC) ? "*" : "", + fp.fp_mss, + (fp.fp_flags & PF_OSFP_WSCALE_MOD) ? "%" : + (fp.fp_flags & PF_OSFP_WSCALE_DC) ? "*" : "", + fp.fp_wscale); + + if ((fpresult = pf_osfp_find(&pf_osfp_list, &fp, + PF_OSFP_MAXTTL_OFFSET))) + return (&fpresult->fp_oses); + return (NULL); +} + +/* Match a fingerprint ID against a list of OSes */ +int +pf_osfp_match(struct pf_osfp_enlist *list, pf_osfp_t os) +{ + struct pf_osfp_entry *entry; + int os_class, os_version, os_subtype; + int en_class, en_version, en_subtype; + + if (os == PF_OSFP_ANY) + return (1); + if (list == NULL) { + DPFPRINTF("osfp no match against %x\n", os); + return (os == PF_OSFP_UNKNOWN); + } + PF_OSFP_UNPACK(os, os_class, os_version, os_subtype); + SLIST_FOREACH(entry, list, fp_entry) { + PF_OSFP_UNPACK(entry->fp_os, en_class, en_version, en_subtype); + if ((os_class == PF_OSFP_ANY || en_class == os_class) && + (os_version == PF_OSFP_ANY || en_version == os_version) && + (os_subtype == PF_OSFP_ANY || en_subtype == os_subtype)) { + DPFPRINTF("osfp matched %s %s %s %x==%x\n", + entry->fp_class_nm, entry->fp_version_nm, + entry->fp_subtype_nm, os, entry->fp_os); + return (1); + } + } + DPFPRINTF("fingerprint 0x%x didn't match\n", os); + return (0); +} + +/* Initialize the OS fingerprint system */ +void +pf_osfp_initialize(void) +{ + pool_init(&pf_osfp_entry_pl, sizeof (struct pf_osfp_entry), 0, 0, 0, + "pfosfpen", NULL); + pool_init(&pf_osfp_pl, sizeof (struct pf_os_fingerprint), 0, 0, 0, + "pfosfp", NULL); + SLIST_INIT(&pf_osfp_list); +} + +#if 0 +void +pf_osfp_destroy(void) +{ + pf_osfp_flush(); + + pool_destroy(&pf_osfp_pl); + pool_destroy(&pf_osfp_entry_pl); +} +#endif + +/* Flush the fingerprint list */ +void +pf_osfp_flush(void) +{ + struct pf_os_fingerprint *fp; + struct pf_osfp_entry *entry; + + while ((fp = SLIST_FIRST(&pf_osfp_list))) { + SLIST_REMOVE_HEAD(&pf_osfp_list, fp_next); + while ((entry = SLIST_FIRST(&fp->fp_oses))) { + SLIST_REMOVE_HEAD(&fp->fp_oses, fp_entry); + pool_put(&pf_osfp_entry_pl, entry); + } + pool_put(&pf_osfp_pl, fp); + } +} + + +/* Add a fingerprint */ +int +pf_osfp_add(struct pf_osfp_ioctl *fpioc) +{ + struct pf_os_fingerprint *fp, fpadd; + struct pf_osfp_entry *entry; + + memset(&fpadd, 0, sizeof (fpadd)); + fpadd.fp_tcpopts = fpioc->fp_tcpopts; + fpadd.fp_wsize = fpioc->fp_wsize; + fpadd.fp_psize = fpioc->fp_psize; + fpadd.fp_mss = fpioc->fp_mss; + fpadd.fp_flags = fpioc->fp_flags; + fpadd.fp_optcnt = fpioc->fp_optcnt; + fpadd.fp_wscale = fpioc->fp_wscale; + fpadd.fp_ttl = fpioc->fp_ttl; + + DPFPRINTF("adding osfp %s %s %s = %s%d:%d:%d:%s%d:0x%llx %d " + "(TS=%s,M=%s%d,W=%s%d) %x\n", + fpioc->fp_os.fp_class_nm, fpioc->fp_os.fp_version_nm, + fpioc->fp_os.fp_subtype_nm, + (fpadd.fp_flags & PF_OSFP_WSIZE_MOD) ? "%" : + (fpadd.fp_flags & PF_OSFP_WSIZE_MSS) ? "S" : + (fpadd.fp_flags & PF_OSFP_WSIZE_MTU) ? "T" : + (fpadd.fp_flags & PF_OSFP_WSIZE_DC) ? "*" : "", + fpadd.fp_wsize, + fpadd.fp_ttl, + (fpadd.fp_flags & PF_OSFP_DF) ? 1 : 0, + (fpadd.fp_flags & PF_OSFP_PSIZE_MOD) ? "%" : + (fpadd.fp_flags & PF_OSFP_PSIZE_DC) ? "*" : "", + fpadd.fp_psize, + (long long int)fpadd.fp_tcpopts, fpadd.fp_optcnt, + (fpadd.fp_flags & PF_OSFP_TS0) ? "0" : "", + (fpadd.fp_flags & PF_OSFP_MSS_MOD) ? "%" : + (fpadd.fp_flags & PF_OSFP_MSS_DC) ? "*" : "", + fpadd.fp_mss, + (fpadd.fp_flags & PF_OSFP_WSCALE_MOD) ? "%" : + (fpadd.fp_flags & PF_OSFP_WSCALE_DC) ? "*" : "", + fpadd.fp_wscale, + fpioc->fp_os.fp_os); + + + if ((fp = pf_osfp_find_exact(&pf_osfp_list, &fpadd))) { + SLIST_FOREACH(entry, &fp->fp_oses, fp_entry) { + if (PF_OSFP_ENTRY_EQ(entry, &fpioc->fp_os)) + return (EEXIST); + } + if ((entry = pool_get(&pf_osfp_entry_pl, PR_WAITOK)) == NULL) + return (ENOMEM); + } else { + if ((fp = pool_get(&pf_osfp_pl, PR_WAITOK)) == NULL) + return (ENOMEM); + memset(fp, 0, sizeof (*fp)); + fp->fp_tcpopts = fpioc->fp_tcpopts; + fp->fp_wsize = fpioc->fp_wsize; + fp->fp_psize = fpioc->fp_psize; + fp->fp_mss = fpioc->fp_mss; + fp->fp_flags = fpioc->fp_flags; + fp->fp_optcnt = fpioc->fp_optcnt; + fp->fp_wscale = fpioc->fp_wscale; + fp->fp_ttl = fpioc->fp_ttl; + SLIST_INIT(&fp->fp_oses); + if ((entry = pool_get(&pf_osfp_entry_pl, PR_WAITOK)) == NULL) { + pool_put(&pf_osfp_pl, fp); + return (ENOMEM); + } + pf_osfp_insert(&pf_osfp_list, fp); + } + memcpy(entry, &fpioc->fp_os, sizeof (*entry)); + + /* Make sure the strings are NUL terminated */ + entry->fp_class_nm[sizeof (entry->fp_class_nm)-1] = '\0'; + entry->fp_version_nm[sizeof (entry->fp_version_nm)-1] = '\0'; + entry->fp_subtype_nm[sizeof (entry->fp_subtype_nm)-1] = '\0'; + + SLIST_INSERT_HEAD(&fp->fp_oses, entry, fp_entry); + +#ifdef PFDEBUG + if ((fp = pf_osfp_validate())) + printf("Invalid fingerprint list\n"); +#endif /* PFDEBUG */ + return (0); +} + + +/* Find a fingerprint in the list */ +struct pf_os_fingerprint * +pf_osfp_find(struct pf_osfp_list *list, struct pf_os_fingerprint *find, + u_int8_t ttldiff) +{ + struct pf_os_fingerprint *f; + +#define MATCH_INT(_MOD, _DC, _field) \ + if ((f->fp_flags & _DC) == 0) { \ + if ((f->fp_flags & _MOD) == 0) { \ + if (f->_field != find->_field) \ + continue; \ + } else { \ + if (f->_field == 0 || find->_field % f->_field) \ + continue; \ + } \ + } + + SLIST_FOREACH(f, list, fp_next) { + if (f->fp_tcpopts != find->fp_tcpopts || + f->fp_optcnt != find->fp_optcnt || + f->fp_ttl < find->fp_ttl || + f->fp_ttl - find->fp_ttl > ttldiff || + (f->fp_flags & (PF_OSFP_DF|PF_OSFP_TS0)) != + (find->fp_flags & (PF_OSFP_DF|PF_OSFP_TS0))) + continue; + + MATCH_INT(PF_OSFP_PSIZE_MOD, PF_OSFP_PSIZE_DC, fp_psize) + MATCH_INT(PF_OSFP_MSS_MOD, PF_OSFP_MSS_DC, fp_mss) + MATCH_INT(PF_OSFP_WSCALE_MOD, PF_OSFP_WSCALE_DC, fp_wscale) + if ((f->fp_flags & PF_OSFP_WSIZE_DC) == 0) { + if (f->fp_flags & PF_OSFP_WSIZE_MSS) { + if (find->fp_mss == 0) + continue; + +/* + * Some "smart" NAT devices and DSL routers will tweak the MSS size and + * will set it to whatever is suitable for the link type. + */ +#define SMART_MSS 1460 + if ((find->fp_wsize % find->fp_mss || + find->fp_wsize / find->fp_mss != + f->fp_wsize) && + (find->fp_wsize % SMART_MSS || + find->fp_wsize / SMART_MSS != + f->fp_wsize)) + continue; + } else if (f->fp_flags & PF_OSFP_WSIZE_MTU) { + if (find->fp_mss == 0) + continue; + +#define MTUOFF (sizeof (struct ip) + sizeof (struct tcphdr)) +#define SMART_MTU (SMART_MSS + MTUOFF) + if ((find->fp_wsize % (find->fp_mss + MTUOFF) || + find->fp_wsize / (find->fp_mss + MTUOFF) != + f->fp_wsize) && + (find->fp_wsize % SMART_MTU || + find->fp_wsize / SMART_MTU != + f->fp_wsize)) + continue; + } else if (f->fp_flags & PF_OSFP_WSIZE_MOD) { + if (f->fp_wsize == 0 || find->fp_wsize % + f->fp_wsize) + continue; + } else { + if (f->fp_wsize != find->fp_wsize) + continue; + } + } + return (f); + } + + return (NULL); +} + +/* Find an exact fingerprint in the list */ +struct pf_os_fingerprint * +pf_osfp_find_exact(struct pf_osfp_list *list, struct pf_os_fingerprint *find) +{ + struct pf_os_fingerprint *f; + + SLIST_FOREACH(f, list, fp_next) { + if (f->fp_tcpopts == find->fp_tcpopts && + f->fp_wsize == find->fp_wsize && + f->fp_psize == find->fp_psize && + f->fp_mss == find->fp_mss && + f->fp_flags == find->fp_flags && + f->fp_optcnt == find->fp_optcnt && + f->fp_wscale == find->fp_wscale && + f->fp_ttl == find->fp_ttl) + return (f); + } + + return (NULL); +} + +/* Insert a fingerprint into the list */ +void +pf_osfp_insert(struct pf_osfp_list *list, struct pf_os_fingerprint *ins) +{ + struct pf_os_fingerprint *f, *prev = NULL; + + /* XXX need to go semi tree based. can key on tcp options */ + + SLIST_FOREACH(f, list, fp_next) + prev = f; + if (prev) + SLIST_INSERT_AFTER(prev, ins, fp_next); + else + SLIST_INSERT_HEAD(list, ins, fp_next); +} + +/* Fill a fingerprint by its number (from an ioctl) */ +int +pf_osfp_get(struct pf_osfp_ioctl *fpioc) +{ + struct pf_os_fingerprint *fp; + struct pf_osfp_entry *entry; + int num = fpioc->fp_getnum; + int i = 0; + + + memset(fpioc, 0, sizeof (*fpioc)); + SLIST_FOREACH(fp, &pf_osfp_list, fp_next) { + SLIST_FOREACH(entry, &fp->fp_oses, fp_entry) { + if (i++ == num) { + fpioc->fp_mss = fp->fp_mss; + fpioc->fp_wsize = fp->fp_wsize; + fpioc->fp_flags = fp->fp_flags; + fpioc->fp_psize = fp->fp_psize; + fpioc->fp_ttl = fp->fp_ttl; + fpioc->fp_wscale = fp->fp_wscale; + fpioc->fp_getnum = num; + memcpy(&fpioc->fp_os, entry, + sizeof (fpioc->fp_os)); + return (0); + } + } + } + + return (EBUSY); +} + + +/* Validate that each signature is reachable */ +struct pf_os_fingerprint * +pf_osfp_validate(void) +{ + struct pf_os_fingerprint *f, *f2, find; + + SLIST_FOREACH(f, &pf_osfp_list, fp_next) { + memcpy(&find, f, sizeof (find)); + + /* We do a few MSS/th_win percolations to make things unique */ + if (find.fp_mss == 0) + find.fp_mss = 128; + if (f->fp_flags & PF_OSFP_WSIZE_MSS) + find.fp_wsize *= find.fp_mss, 1; + else if (f->fp_flags & PF_OSFP_WSIZE_MTU) + find.fp_wsize *= (find.fp_mss + 40); + else if (f->fp_flags & PF_OSFP_WSIZE_MOD) + find.fp_wsize *= 2; + if (f != (f2 = pf_osfp_find(&pf_osfp_list, &find, 0))) { + if (f2) + printf("Found \"%s %s %s\" instead of " + "\"%s %s %s\"\n", + SLIST_FIRST(&f2->fp_oses)->fp_class_nm, + SLIST_FIRST(&f2->fp_oses)->fp_version_nm, + SLIST_FIRST(&f2->fp_oses)->fp_subtype_nm, + SLIST_FIRST(&f->fp_oses)->fp_class_nm, + SLIST_FIRST(&f->fp_oses)->fp_version_nm, + SLIST_FIRST(&f->fp_oses)->fp_subtype_nm); + else + printf("Couldn't find \"%s %s %s\"\n", + SLIST_FIRST(&f->fp_oses)->fp_class_nm, + SLIST_FIRST(&f->fp_oses)->fp_version_nm, + SLIST_FIRST(&f->fp_oses)->fp_subtype_nm); + return (f); + } + } + return (NULL); +} diff --git a/bsd/net/pf_ruleset.c b/bsd/net/pf_ruleset.c new file mode 100644 index 000000000..260e8da3c --- /dev/null +++ b/bsd/net/pf_ruleset.c @@ -0,0 +1,453 @@ +/* + * Copyright (c) 2008 Apple Inc. All rights reserved. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. The rights granted to you under the License + * may not be used to create, or enable the creation or redistribution of, + * unlawful or unlicensed copies of an Apple operating system, or to + * circumvent, violate, or enable the circumvention or violation of, any + * terms of an Apple operating system software license agreement. + * + * Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ + */ + +/* $apfw: pf_ruleset.c,v 1.2 2007/08/10 03:00:16 jhw Exp $ */ +/* $OpenBSD: pf_ruleset.c,v 1.1 2006/10/27 13:56:51 mcbride Exp $ */ + +/* + * Copyright (c) 2001 Daniel Hartmeier + * Copyright (c) 2002,2003 Henning Brauer + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * - Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials provided + * with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + * Effort sponsored in part by the Defense Advanced Research Projects + * Agency (DARPA) and Air Force Research Laboratory, Air Force + * Materiel Command, USAF, under agreement number F30602-01-2-0537. + * + */ + +#include +#include +#ifdef KERNEL +#include +#include +#include +#endif /* KERNEL */ +#include + +#include +#include +#include +#include + +#include +#include + +#if INET6 +#include +#endif /* INET6 */ + + +#ifdef KERNEL +#define DPFPRINTF(format, x...) \ + if (pf_status.debug >= PF_DEBUG_NOISY) \ + printf(format, ##x) +#define rs_malloc(x) _MALLOC(x, M_TEMP, M_WAITOK) +#define rs_free(x) _FREE(x, M_TEMP) +#define strrchr _strrchr + +static char * +_strrchr(const char *c, int ch) +{ + char *p = (char *)(size_t)c, *save; + + for (save = NULL; ; ++p) { + if (*p == ch) + save = (char *)p; + if (*p == '\0') + return (save); + } + /* NOTREACHED */ +} + +#else +/* Userland equivalents so we can lend code to pfctl et al. */ + +#include +#include +#include +#include +#include +#define rs_malloc(x) malloc(x) +#define rs_free(x) free(x) + +#ifdef PFDEBUG +#include +#define DPFPRINTF(format, x...) fprintf(stderr, format, ##x) +#else +#define DPFPRINTF(format, x...) ((void)0) +#endif /* PFDEBUG */ +#endif /* KERNEL */ + + +struct pf_anchor_global pf_anchors; +struct pf_anchor pf_main_anchor; + +static __inline int pf_anchor_compare(struct pf_anchor *, struct pf_anchor *); + +RB_GENERATE(pf_anchor_global, pf_anchor, entry_global, pf_anchor_compare); +RB_GENERATE(pf_anchor_node, pf_anchor, entry_node, pf_anchor_compare); + +static __inline int +pf_anchor_compare(struct pf_anchor *a, struct pf_anchor *b) +{ + int c = strcmp(a->path, b->path); + + return (c ? (c < 0 ? -1 : 1) : 0); +} + +int +pf_get_ruleset_number(u_int8_t action) +{ + switch (action) { + case PF_SCRUB: + case PF_NOSCRUB: + return (PF_RULESET_SCRUB); + break; + case PF_PASS: + case PF_DROP: + return (PF_RULESET_FILTER); + break; + case PF_NAT: + case PF_NONAT: + return (PF_RULESET_NAT); + break; + case PF_BINAT: + case PF_NOBINAT: + return (PF_RULESET_BINAT); + break; + case PF_RDR: + case PF_NORDR: + return (PF_RULESET_RDR); + break; + default: + return (PF_RULESET_MAX); + break; + } +} + +void +pf_init_ruleset(struct pf_ruleset *ruleset) +{ + int i; + + memset(ruleset, 0, sizeof (struct pf_ruleset)); + for (i = 0; i < PF_RULESET_MAX; i++) { + TAILQ_INIT(&ruleset->rules[i].queues[0]); + TAILQ_INIT(&ruleset->rules[i].queues[1]); + ruleset->rules[i].active.ptr = &ruleset->rules[i].queues[0]; + ruleset->rules[i].inactive.ptr = &ruleset->rules[i].queues[1]; + } +} + +struct pf_anchor * +pf_find_anchor(const char *path) +{ + struct pf_anchor *key, *found; + + key = (struct pf_anchor *)rs_malloc(sizeof (*key)); + memset(key, 0, sizeof (*key)); + strlcpy(key->path, path, sizeof (key->path)); + found = RB_FIND(pf_anchor_global, &pf_anchors, key); + rs_free(key); + return (found); +} + +struct pf_ruleset * +pf_find_ruleset(const char *path) +{ + struct pf_anchor *anchor; + + while (*path == '/') + path++; + if (!*path) + return (&pf_main_ruleset); + anchor = pf_find_anchor(path); + if (anchor == NULL) + return (NULL); + else + return (&anchor->ruleset); +} + +struct pf_ruleset * +pf_find_or_create_ruleset(const char *path) +{ + char *p, *q, *r; + struct pf_ruleset *ruleset; + struct pf_anchor *anchor = 0, *dup, *parent = NULL; + + if (path[0] == 0) + return (&pf_main_ruleset); + while (*path == '/') + path++; + ruleset = pf_find_ruleset(path); + if (ruleset != NULL) + return (ruleset); + p = (char *)rs_malloc(MAXPATHLEN); + bzero(p, MAXPATHLEN); + strlcpy(p, path, MAXPATHLEN); + while (parent == NULL && (q = strrchr(p, '/')) != NULL) { + *q = 0; + if ((ruleset = pf_find_ruleset(p)) != NULL) { + parent = ruleset->anchor; + break; + } + } + if (q == NULL) + q = p; + else + q++; + strlcpy(p, path, MAXPATHLEN); + if (!*q) { + rs_free(p); + return (NULL); + } + while ((r = strchr(q, '/')) != NULL || *q) { + if (r != NULL) + *r = 0; + if (!*q || strlen(q) >= PF_ANCHOR_NAME_SIZE || + (parent != NULL && strlen(parent->path) >= + MAXPATHLEN - PF_ANCHOR_NAME_SIZE - 1)) { + rs_free(p); + return (NULL); + } + anchor = (struct pf_anchor *)rs_malloc(sizeof (*anchor)); + if (anchor == NULL) { + rs_free(p); + return (NULL); + } + memset(anchor, 0, sizeof (*anchor)); + RB_INIT(&anchor->children); + strlcpy(anchor->name, q, sizeof (anchor->name)); + if (parent != NULL) { + strlcpy(anchor->path, parent->path, + sizeof (anchor->path)); + strlcat(anchor->path, "/", sizeof (anchor->path)); + } + strlcat(anchor->path, anchor->name, sizeof (anchor->path)); + if ((dup = RB_INSERT(pf_anchor_global, &pf_anchors, anchor)) != + NULL) { + printf("pf_find_or_create_ruleset: RB_INSERT1 " + "'%s' '%s' collides with '%s' '%s'\n", + anchor->path, anchor->name, dup->path, dup->name); + rs_free(anchor); + rs_free(p); + return (NULL); + } + if (parent != NULL) { + anchor->parent = parent; + if ((dup = RB_INSERT(pf_anchor_node, &parent->children, + anchor)) != NULL) { + printf("pf_find_or_create_ruleset: " + "RB_INSERT2 '%s' '%s' collides with " + "'%s' '%s'\n", anchor->path, anchor->name, + dup->path, dup->name); + RB_REMOVE(pf_anchor_global, &pf_anchors, + anchor); + rs_free(anchor); + rs_free(p); + return (NULL); + } + } + pf_init_ruleset(&anchor->ruleset); + anchor->ruleset.anchor = anchor; + parent = anchor; + if (r != NULL) + q = r + 1; + else + *q = 0; + } + rs_free(p); + return (anchor ? &anchor->ruleset : 0); +} + +void +pf_remove_if_empty_ruleset(struct pf_ruleset *ruleset) +{ + struct pf_anchor *parent; + int i; + + while (ruleset != NULL) { + if (ruleset == &pf_main_ruleset || ruleset->anchor == NULL || + !RB_EMPTY(&ruleset->anchor->children) || + ruleset->anchor->refcnt > 0 || ruleset->tables > 0 || + ruleset->topen) + return; + for (i = 0; i < PF_RULESET_MAX; ++i) + if (!TAILQ_EMPTY(ruleset->rules[i].active.ptr) || + !TAILQ_EMPTY(ruleset->rules[i].inactive.ptr) || + ruleset->rules[i].inactive.open) + return; + RB_REMOVE(pf_anchor_global, &pf_anchors, ruleset->anchor); + if ((parent = ruleset->anchor->parent) != NULL) + RB_REMOVE(pf_anchor_node, &parent->children, + ruleset->anchor); + rs_free(ruleset->anchor); + if (parent == NULL) + return; + ruleset = &parent->ruleset; + } +} + +int +pf_anchor_setup(struct pf_rule *r, const struct pf_ruleset *s, + const char *name) +{ + char *p, *path; + struct pf_ruleset *ruleset; + + r->anchor = NULL; + r->anchor_relative = 0; + r->anchor_wildcard = 0; + if (!name[0]) + return (0); + path = (char *)rs_malloc(MAXPATHLEN); + bzero(path, MAXPATHLEN); + if (name[0] == '/') + strlcpy(path, name + 1, MAXPATHLEN); + else { + /* relative path */ + r->anchor_relative = 1; + if (s->anchor == NULL || !s->anchor->path[0]) + path[0] = 0; + else + strlcpy(path, s->anchor->path, MAXPATHLEN); + while (name[0] == '.' && name[1] == '.' && name[2] == '/') { + if (!path[0]) { + printf("pf_anchor_setup: .. beyond root\n"); + rs_free(path); + return (1); + } + if ((p = strrchr(path, '/')) != NULL) + *p = 0; + else + path[0] = 0; + r->anchor_relative++; + name += 3; + } + if (path[0]) + strlcat(path, "/", MAXPATHLEN); + strlcat(path, name, MAXPATHLEN); + } + if ((p = strrchr(path, '/')) != NULL && strcmp(p, "/*") == 0) { + r->anchor_wildcard = 1; + *p = 0; + } + ruleset = pf_find_or_create_ruleset(path); + rs_free(path); + if (ruleset == NULL || ruleset->anchor == NULL) { + printf("pf_anchor_setup: ruleset\n"); + return (1); + } + r->anchor = ruleset->anchor; + r->anchor->refcnt++; + return (0); +} + +int +pf_anchor_copyout(const struct pf_ruleset *rs, const struct pf_rule *r, + struct pfioc_rule *pr) +{ + pr->anchor_call[0] = 0; + if (r->anchor == NULL) + return (0); + if (!r->anchor_relative) { + strlcpy(pr->anchor_call, "/", sizeof (pr->anchor_call)); + strlcat(pr->anchor_call, r->anchor->path, + sizeof (pr->anchor_call)); + } else { + char *a, *p; + int i; + + a = (char *)rs_malloc(MAXPATHLEN); + bzero(a, MAXPATHLEN); + if (rs->anchor == NULL) + a[0] = 0; + else + strlcpy(a, rs->anchor->path, MAXPATHLEN); + for (i = 1; i < r->anchor_relative; ++i) { + if ((p = strrchr(a, '/')) == NULL) + p = a; + *p = 0; + strlcat(pr->anchor_call, "../", + sizeof (pr->anchor_call)); + } + if (strncmp(a, r->anchor->path, strlen(a))) { + printf("pf_anchor_copyout: '%s' '%s'\n", a, + r->anchor->path); + rs_free(a); + return (1); + } + if (strlen(r->anchor->path) > strlen(a)) + strlcat(pr->anchor_call, r->anchor->path + (a[0] ? + strlen(a) + 1 : 0), sizeof (pr->anchor_call)); + rs_free(a); + } + if (r->anchor_wildcard) + strlcat(pr->anchor_call, pr->anchor_call[0] ? "/*" : "*", + sizeof (pr->anchor_call)); + return (0); +} + +void +pf_anchor_remove(struct pf_rule *r) +{ + if (r->anchor == NULL) + return; + if (r->anchor->refcnt <= 0) { + printf("pf_anchor_remove: broken refcount\n"); + r->anchor = NULL; + return; + } + if (!--r->anchor->refcnt) + pf_remove_if_empty_ruleset(&r->anchor->ruleset); + r->anchor = NULL; +} diff --git a/bsd/net/pf_table.c b/bsd/net/pf_table.c new file mode 100644 index 000000000..8b4bf61c6 --- /dev/null +++ b/bsd/net/pf_table.c @@ -0,0 +1,2271 @@ +/* + * Copyright (c) 2008 Apple Inc. All rights reserved. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. The rights granted to you under the License + * may not be used to create, or enable the creation or redistribution of, + * unlawful or unlicensed copies of an Apple operating system, or to + * circumvent, violate, or enable the circumvention or violation of, any + * terms of an Apple operating system software license agreement. + * + * Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ + */ + +/* $apfw: pf_table.c,v 1.4 2008/08/27 00:01:32 jhw Exp $ */ +/* $OpenBSD: pf_table.c,v 1.68 2006/05/02 10:08:45 dhartmei Exp $ */ + +/* + * Copyright (c) 2002 Cedric Berger + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * - Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials provided + * with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + */ + +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +#define ACCEPT_FLAGS(flags, oklist) \ + do { \ + if ((flags & ~(oklist)) & \ + PFR_FLAG_ALLMASK) \ + return (EINVAL); \ + } while (0) + +#define COPYIN(from, to, size, flags) \ + ((flags & PFR_FLAG_USERIOCTL) ? \ + copyin(CAST_USER_ADDR_T(from), (to), (size)) : \ + (bcopy((from), (to), (size)), 0)) + +#define COPYOUT(from, to, size, flags) \ + ((flags & PFR_FLAG_USERIOCTL) ? \ + copyout((from), CAST_USER_ADDR_T(to), (size)) : \ + (bcopy((from), (to), (size)), 0)) + +#define FILLIN_SIN(sin, addr) \ + do { \ + (sin).sin_len = sizeof (sin); \ + (sin).sin_family = AF_INET; \ + (sin).sin_addr = (addr); \ + } while (0) + +#define FILLIN_SIN6(sin6, addr) \ + do { \ + (sin6).sin6_len = sizeof (sin6); \ + (sin6).sin6_family = AF_INET6; \ + (sin6).sin6_addr = (addr); \ + } while (0) + +#define SWAP(type, a1, a2) \ + do { \ + type tmp = a1; \ + a1 = a2; \ + a2 = tmp; \ + } while (0) + +#define SUNION2PF(su, af) (((af) == AF_INET) ? \ + (struct pf_addr *)&(su)->sin.sin_addr : \ + (struct pf_addr *)&(su)->sin6.sin6_addr) + +#define AF_BITS(af) (((af) == AF_INET) ? 32 : 128) +#define ADDR_NETWORK(ad) ((ad)->pfra_net < AF_BITS((ad)->pfra_af)) +#define KENTRY_NETWORK(ke) ((ke)->pfrke_net < AF_BITS((ke)->pfrke_af)) +#define KENTRY_RNF_ROOT(ke) \ + ((((struct radix_node *)(ke))->rn_flags & RNF_ROOT) != 0) + +#define NO_ADDRESSES (-1) +#define ENQUEUE_UNMARKED_ONLY (1) +#define INVERT_NEG_FLAG (1) + +struct pfr_walktree { + enum pfrw_op { + PFRW_MARK, + PFRW_SWEEP, + PFRW_ENQUEUE, + PFRW_GET_ADDRS, + PFRW_GET_ASTATS, + PFRW_POOL_GET, + PFRW_DYNADDR_UPDATE + } pfrw_op; + union { + struct pfr_addr *pfrw1_addr; + struct pfr_astats *pfrw1_astats; + struct pfr_kentryworkq *pfrw1_workq; + struct pfr_kentry *pfrw1_kentry; + struct pfi_dynaddr *pfrw1_dyn; + } pfrw_1; + int pfrw_free; + int pfrw_flags; +}; +#define pfrw_addr pfrw_1.pfrw1_addr +#define pfrw_astats pfrw_1.pfrw1_astats +#define pfrw_workq pfrw_1.pfrw1_workq +#define pfrw_kentry pfrw_1.pfrw1_kentry +#define pfrw_dyn pfrw_1.pfrw1_dyn +#define pfrw_cnt pfrw_free + +#define senderr(e) do { rv = (e); goto _bad; } while (0) + +struct pool pfr_ktable_pl; +struct pool pfr_kentry_pl; + +static struct pool pfr_kentry_pl2; +static struct sockaddr_in pfr_sin; +static struct sockaddr_in6 pfr_sin6; +static union sockaddr_union pfr_mask; +static struct pf_addr pfr_ffaddr; + +static void pfr_copyout_addr(struct pfr_addr *, struct pfr_kentry *ke); +static int pfr_validate_addr(struct pfr_addr *); +static void pfr_enqueue_addrs(struct pfr_ktable *, struct pfr_kentryworkq *, + int *, int); +static void pfr_mark_addrs(struct pfr_ktable *); +static struct pfr_kentry *pfr_lookup_addr(struct pfr_ktable *, + struct pfr_addr *, int); +static struct pfr_kentry *pfr_create_kentry(struct pfr_addr *, int); +static void pfr_destroy_kentries(struct pfr_kentryworkq *); +static void pfr_destroy_kentry(struct pfr_kentry *); +static void pfr_insert_kentries(struct pfr_ktable *, + struct pfr_kentryworkq *, u_int64_t); +static void pfr_remove_kentries(struct pfr_ktable *, struct pfr_kentryworkq *); +static void pfr_clstats_kentries(struct pfr_kentryworkq *, u_int64_t, int); +static void pfr_reset_feedback(struct pfr_addr *, int, int); +static void pfr_prepare_network(union sockaddr_union *, int, int); +static int pfr_route_kentry(struct pfr_ktable *, struct pfr_kentry *); +static int pfr_unroute_kentry(struct pfr_ktable *, struct pfr_kentry *); +static int pfr_walktree(struct radix_node *, void *); +static int pfr_validate_table(struct pfr_table *, int, int); +static int pfr_fix_anchor(char *); +static void pfr_commit_ktable(struct pfr_ktable *, u_int64_t); +static void pfr_insert_ktables(struct pfr_ktableworkq *); +static void pfr_insert_ktable(struct pfr_ktable *); +static void pfr_setflags_ktables(struct pfr_ktableworkq *); +static void pfr_setflags_ktable(struct pfr_ktable *, int); +static void pfr_clstats_ktables(struct pfr_ktableworkq *, u_int64_t, int); +static void pfr_clstats_ktable(struct pfr_ktable *, u_int64_t, int); +static struct pfr_ktable *pfr_create_ktable(struct pfr_table *, u_int64_t, int); +static void pfr_destroy_ktables(struct pfr_ktableworkq *, int); +static void pfr_destroy_ktable(struct pfr_ktable *, int); +static int pfr_ktable_compare(struct pfr_ktable *, struct pfr_ktable *); +static struct pfr_ktable *pfr_lookup_table(struct pfr_table *); +static void pfr_clean_node_mask(struct pfr_ktable *, struct pfr_kentryworkq *); +static int pfr_table_count(struct pfr_table *, int); +static int pfr_skip_table(struct pfr_table *, struct pfr_ktable *, int); +static struct pfr_kentry *pfr_kentry_byidx(struct pfr_ktable *, int, int); + +RB_PROTOTYPE_SC(static, pfr_ktablehead, pfr_ktable, pfrkt_tree, + pfr_ktable_compare); +RB_GENERATE(pfr_ktablehead, pfr_ktable, pfrkt_tree, pfr_ktable_compare); + +static struct pfr_ktablehead pfr_ktables; +static struct pfr_table pfr_nulltable; +static int pfr_ktable_cnt; + +void +pfr_initialize(void) +{ + pool_init(&pfr_ktable_pl, sizeof (struct pfr_ktable), 0, 0, 0, + "pfrktable", NULL); + pool_init(&pfr_kentry_pl, sizeof (struct pfr_kentry), 0, 0, 0, + "pfrkentry", NULL); + pool_init(&pfr_kentry_pl2, sizeof (struct pfr_kentry), 0, 0, 0, + "pfrkentry2", NULL); + + pfr_sin.sin_len = sizeof (pfr_sin); + pfr_sin.sin_family = AF_INET; + pfr_sin6.sin6_len = sizeof (pfr_sin6); + pfr_sin6.sin6_family = AF_INET6; + + memset(&pfr_ffaddr, 0xff, sizeof (pfr_ffaddr)); +} + +#if 0 +void +pfr_destroy(void) +{ + pool_destroy(&pfr_ktable_pl); + pool_destroy(&pfr_kentry_pl); + pool_destroy(&pfr_kentry_pl2); +} +#endif + +int +pfr_clr_addrs(struct pfr_table *tbl, int *ndel, int flags) +{ + struct pfr_ktable *kt; + struct pfr_kentryworkq workq; + + ACCEPT_FLAGS(flags, PFR_FLAG_ATOMIC | PFR_FLAG_DUMMY); + if (pfr_validate_table(tbl, 0, flags & PFR_FLAG_USERIOCTL)) + return (EINVAL); + kt = pfr_lookup_table(tbl); + if (kt == NULL || !(kt->pfrkt_flags & PFR_TFLAG_ACTIVE)) + return (ESRCH); + if (kt->pfrkt_flags & PFR_TFLAG_CONST) + return (EPERM); + pfr_enqueue_addrs(kt, &workq, ndel, 0); + + if (!(flags & PFR_FLAG_DUMMY)) { + pfr_remove_kentries(kt, &workq); + if (kt->pfrkt_cnt) { + printf("pfr_clr_addrs: corruption detected (%d).\n", + kt->pfrkt_cnt); + kt->pfrkt_cnt = 0; + } + } + return (0); +} + +int +pfr_add_addrs(struct pfr_table *tbl, struct pfr_addr *addr, int size, + int *nadd, int flags) +{ + struct pfr_ktable *kt, *tmpkt; + struct pfr_kentryworkq workq; + struct pfr_kentry *p, *q; + struct pfr_addr ad; + int i, rv, xadd = 0; + u_int64_t tzero = pf_time_second(); + + ACCEPT_FLAGS(flags, PFR_FLAG_ATOMIC | PFR_FLAG_DUMMY | + PFR_FLAG_FEEDBACK); + if (pfr_validate_table(tbl, 0, flags & PFR_FLAG_USERIOCTL)) + return (EINVAL); + kt = pfr_lookup_table(tbl); + if (kt == NULL || !(kt->pfrkt_flags & PFR_TFLAG_ACTIVE)) + return (ESRCH); + if (kt->pfrkt_flags & PFR_TFLAG_CONST) + return (EPERM); + tmpkt = pfr_create_ktable(&pfr_nulltable, 0, 0); + if (tmpkt == NULL) + return (ENOMEM); + SLIST_INIT(&workq); + for (i = 0; i < size; i++) { + if (COPYIN(addr+i, &ad, sizeof (ad), flags)) + senderr(EFAULT); + if (pfr_validate_addr(&ad)) + senderr(EINVAL); + p = pfr_lookup_addr(kt, &ad, 1); + q = pfr_lookup_addr(tmpkt, &ad, 1); + if (flags & PFR_FLAG_FEEDBACK) { + if (q != NULL) + ad.pfra_fback = PFR_FB_DUPLICATE; + else if (p == NULL) + ad.pfra_fback = PFR_FB_ADDED; + else if (p->pfrke_not != ad.pfra_not) + ad.pfra_fback = PFR_FB_CONFLICT; + else + ad.pfra_fback = PFR_FB_NONE; + } + if (p == NULL && q == NULL) { + p = pfr_create_kentry(&ad, + !(flags & PFR_FLAG_USERIOCTL)); + if (p == NULL) + senderr(ENOMEM); + if (pfr_route_kentry(tmpkt, p)) { + pfr_destroy_kentry(p); + ad.pfra_fback = PFR_FB_NONE; + } else { + SLIST_INSERT_HEAD(&workq, p, pfrke_workq); + xadd++; + } + } + if (flags & PFR_FLAG_FEEDBACK) + if (COPYOUT(&ad, addr+i, sizeof (ad), flags)) + senderr(EFAULT); + } + pfr_clean_node_mask(tmpkt, &workq); + if (!(flags & PFR_FLAG_DUMMY)) { + pfr_insert_kentries(kt, &workq, tzero); + } else + pfr_destroy_kentries(&workq); + if (nadd != NULL) + *nadd = xadd; + pfr_destroy_ktable(tmpkt, 0); + return (0); +_bad: + pfr_clean_node_mask(tmpkt, &workq); + pfr_destroy_kentries(&workq); + if (flags & PFR_FLAG_FEEDBACK) + pfr_reset_feedback(addr, size, flags); + pfr_destroy_ktable(tmpkt, 0); + return (rv); +} + +int +pfr_del_addrs(struct pfr_table *tbl, struct pfr_addr *addr, int size, + int *ndel, int flags) +{ + struct pfr_ktable *kt; + struct pfr_kentryworkq workq; + struct pfr_kentry *p; + struct pfr_addr ad; + int i, rv, xdel = 0, log = 1; + + ACCEPT_FLAGS(flags, PFR_FLAG_ATOMIC | PFR_FLAG_DUMMY | + PFR_FLAG_FEEDBACK); + if (pfr_validate_table(tbl, 0, flags & PFR_FLAG_USERIOCTL)) + return (EINVAL); + kt = pfr_lookup_table(tbl); + if (kt == NULL || !(kt->pfrkt_flags & PFR_TFLAG_ACTIVE)) + return (ESRCH); + if (kt->pfrkt_flags & PFR_TFLAG_CONST) + return (EPERM); + /* + * there are two algorithms to choose from here. + * with: + * n: number of addresses to delete + * N: number of addresses in the table + * + * one is O(N) and is better for large 'n' + * one is O(n*LOG(N)) and is better for small 'n' + * + * following code try to decide which one is best. + */ + for (i = kt->pfrkt_cnt; i > 0; i >>= 1) + log++; + if (size > kt->pfrkt_cnt/log) { + /* full table scan */ + pfr_mark_addrs(kt); + } else { + /* iterate over addresses to delete */ + for (i = 0; i < size; i++) { + if (COPYIN(addr+i, &ad, sizeof (ad), flags)) + return (EFAULT); + if (pfr_validate_addr(&ad)) + return (EINVAL); + p = pfr_lookup_addr(kt, &ad, 1); + if (p != NULL) + p->pfrke_mark = 0; + } + } + SLIST_INIT(&workq); + for (i = 0; i < size; i++) { + if (COPYIN(addr+i, &ad, sizeof (ad), flags)) + senderr(EFAULT); + if (pfr_validate_addr(&ad)) + senderr(EINVAL); + p = pfr_lookup_addr(kt, &ad, 1); + if (flags & PFR_FLAG_FEEDBACK) { + if (p == NULL) + ad.pfra_fback = PFR_FB_NONE; + else if (p->pfrke_not != ad.pfra_not) + ad.pfra_fback = PFR_FB_CONFLICT; + else if (p->pfrke_mark) + ad.pfra_fback = PFR_FB_DUPLICATE; + else + ad.pfra_fback = PFR_FB_DELETED; + } + if (p != NULL && p->pfrke_not == ad.pfra_not && + !p->pfrke_mark) { + p->pfrke_mark = 1; + SLIST_INSERT_HEAD(&workq, p, pfrke_workq); + xdel++; + } + if (flags & PFR_FLAG_FEEDBACK) + if (COPYOUT(&ad, addr+i, sizeof (ad), flags)) + senderr(EFAULT); + } + if (!(flags & PFR_FLAG_DUMMY)) { + pfr_remove_kentries(kt, &workq); + } + if (ndel != NULL) + *ndel = xdel; + return (0); +_bad: + if (flags & PFR_FLAG_FEEDBACK) + pfr_reset_feedback(addr, size, flags); + return (rv); +} + +int +pfr_set_addrs(struct pfr_table *tbl, struct pfr_addr *addr, int size, + int *size2, int *nadd, int *ndel, int *nchange, int flags, + u_int32_t ignore_pfrt_flags) +{ + struct pfr_ktable *kt, *tmpkt; + struct pfr_kentryworkq addq, delq, changeq; + struct pfr_kentry *p, *q; + struct pfr_addr ad; + int i, rv, xadd = 0, xdel = 0, xchange = 0; + u_int64_t tzero = pf_time_second(); + + ACCEPT_FLAGS(flags, PFR_FLAG_ATOMIC | PFR_FLAG_DUMMY | + PFR_FLAG_FEEDBACK); + if (pfr_validate_table(tbl, ignore_pfrt_flags, flags & + PFR_FLAG_USERIOCTL)) + return (EINVAL); + kt = pfr_lookup_table(tbl); + if (kt == NULL || !(kt->pfrkt_flags & PFR_TFLAG_ACTIVE)) + return (ESRCH); + if (kt->pfrkt_flags & PFR_TFLAG_CONST) + return (EPERM); + tmpkt = pfr_create_ktable(&pfr_nulltable, 0, 0); + if (tmpkt == NULL) + return (ENOMEM); + pfr_mark_addrs(kt); + SLIST_INIT(&addq); + SLIST_INIT(&delq); + SLIST_INIT(&changeq); + for (i = 0; i < size; i++) { + if (COPYIN(addr+i, &ad, sizeof (ad), flags)) + senderr(EFAULT); + if (pfr_validate_addr(&ad)) + senderr(EINVAL); + ad.pfra_fback = PFR_FB_NONE; + p = pfr_lookup_addr(kt, &ad, 1); + if (p != NULL) { + if (p->pfrke_mark) { + ad.pfra_fback = PFR_FB_DUPLICATE; + goto _skip; + } + p->pfrke_mark = 1; + if (p->pfrke_not != ad.pfra_not) { + SLIST_INSERT_HEAD(&changeq, p, pfrke_workq); + ad.pfra_fback = PFR_FB_CHANGED; + xchange++; + } + } else { + q = pfr_lookup_addr(tmpkt, &ad, 1); + if (q != NULL) { + ad.pfra_fback = PFR_FB_DUPLICATE; + goto _skip; + } + p = pfr_create_kentry(&ad, + !(flags & PFR_FLAG_USERIOCTL)); + if (p == NULL) + senderr(ENOMEM); + if (pfr_route_kentry(tmpkt, p)) { + pfr_destroy_kentry(p); + ad.pfra_fback = PFR_FB_NONE; + } else { + SLIST_INSERT_HEAD(&addq, p, pfrke_workq); + ad.pfra_fback = PFR_FB_ADDED; + xadd++; + } + } +_skip: + if (flags & PFR_FLAG_FEEDBACK) + if (COPYOUT(&ad, addr+i, sizeof (ad), flags)) + senderr(EFAULT); + } + pfr_enqueue_addrs(kt, &delq, &xdel, ENQUEUE_UNMARKED_ONLY); + if ((flags & PFR_FLAG_FEEDBACK) && *size2) { + if (*size2 < size+xdel) { + *size2 = size+xdel; + senderr(0); + } + i = 0; + SLIST_FOREACH(p, &delq, pfrke_workq) { + pfr_copyout_addr(&ad, p); + ad.pfra_fback = PFR_FB_DELETED; + if (COPYOUT(&ad, addr+size+i, sizeof (ad), flags)) + senderr(EFAULT); + i++; + } + } + pfr_clean_node_mask(tmpkt, &addq); + if (!(flags & PFR_FLAG_DUMMY)) { + pfr_insert_kentries(kt, &addq, tzero); + pfr_remove_kentries(kt, &delq); + pfr_clstats_kentries(&changeq, tzero, INVERT_NEG_FLAG); + } else + pfr_destroy_kentries(&addq); + if (nadd != NULL) + *nadd = xadd; + if (ndel != NULL) + *ndel = xdel; + if (nchange != NULL) + *nchange = xchange; + if ((flags & PFR_FLAG_FEEDBACK) && size2) + *size2 = size+xdel; + pfr_destroy_ktable(tmpkt, 0); + return (0); +_bad: + pfr_clean_node_mask(tmpkt, &addq); + pfr_destroy_kentries(&addq); + if (flags & PFR_FLAG_FEEDBACK) + pfr_reset_feedback(addr, size, flags); + pfr_destroy_ktable(tmpkt, 0); + return (rv); +} + +int +pfr_tst_addrs(struct pfr_table *tbl, struct pfr_addr *addr, int size, + int *nmatch, int flags) +{ + struct pfr_ktable *kt; + struct pfr_kentry *p; + struct pfr_addr ad; + int i, xmatch = 0; + + ACCEPT_FLAGS(flags, PFR_FLAG_REPLACE); + if (pfr_validate_table(tbl, 0, 0)) + return (EINVAL); + kt = pfr_lookup_table(tbl); + if (kt == NULL || !(kt->pfrkt_flags & PFR_TFLAG_ACTIVE)) + return (ESRCH); + + for (i = 0; i < size; i++) { + if (COPYIN(addr+i, &ad, sizeof (ad), flags)) + return (EFAULT); + if (pfr_validate_addr(&ad)) + return (EINVAL); + if (ADDR_NETWORK(&ad)) + return (EINVAL); + p = pfr_lookup_addr(kt, &ad, 0); + if (flags & PFR_FLAG_REPLACE) + pfr_copyout_addr(&ad, p); + ad.pfra_fback = (p == NULL) ? PFR_FB_NONE : + (p->pfrke_not ? PFR_FB_NOTMATCH : PFR_FB_MATCH); + if (p != NULL && !p->pfrke_not) + xmatch++; + if (COPYOUT(&ad, addr+i, sizeof (ad), flags)) + return (EFAULT); + } + if (nmatch != NULL) + *nmatch = xmatch; + return (0); +} + +int +pfr_get_addrs(struct pfr_table *tbl, struct pfr_addr *addr, int *size, + int flags) +{ + struct pfr_ktable *kt; + struct pfr_walktree w; + int rv; + + ACCEPT_FLAGS(flags, 0); + if (pfr_validate_table(tbl, 0, 0)) + return (EINVAL); + kt = pfr_lookup_table(tbl); + if (kt == NULL || !(kt->pfrkt_flags & PFR_TFLAG_ACTIVE)) + return (ESRCH); + if (kt->pfrkt_cnt > *size) { + *size = kt->pfrkt_cnt; + return (0); + } + + bzero(&w, sizeof (w)); + w.pfrw_op = PFRW_GET_ADDRS; + w.pfrw_addr = addr; + w.pfrw_free = kt->pfrkt_cnt; + w.pfrw_flags = flags; + rv = kt->pfrkt_ip4->rnh_walktree(kt->pfrkt_ip4, pfr_walktree, &w); + if (!rv) + rv = kt->pfrkt_ip6->rnh_walktree(kt->pfrkt_ip6, + pfr_walktree, &w); + if (rv) + return (rv); + + if (w.pfrw_free) { + printf("pfr_get_addrs: corruption detected (%d).\n", + w.pfrw_free); + return (ENOTTY); + } + *size = kt->pfrkt_cnt; + return (0); +} + +int +pfr_get_astats(struct pfr_table *tbl, struct pfr_astats *addr, int *size, + int flags) +{ + struct pfr_ktable *kt; + struct pfr_walktree w; + struct pfr_kentryworkq workq; + int rv; + u_int64_t tzero = pf_time_second(); + + /* XXX PFR_FLAG_CLSTATS disabled */ + ACCEPT_FLAGS(flags, PFR_FLAG_ATOMIC); + if (pfr_validate_table(tbl, 0, 0)) + return (EINVAL); + kt = pfr_lookup_table(tbl); + if (kt == NULL || !(kt->pfrkt_flags & PFR_TFLAG_ACTIVE)) + return (ESRCH); + if (kt->pfrkt_cnt > *size) { + *size = kt->pfrkt_cnt; + return (0); + } + + bzero(&w, sizeof (w)); + w.pfrw_op = PFRW_GET_ASTATS; + w.pfrw_astats = addr; + w.pfrw_free = kt->pfrkt_cnt; + w.pfrw_flags = flags; + rv = kt->pfrkt_ip4->rnh_walktree(kt->pfrkt_ip4, pfr_walktree, &w); + if (!rv) + rv = kt->pfrkt_ip6->rnh_walktree(kt->pfrkt_ip6, + pfr_walktree, &w); + if (!rv && (flags & PFR_FLAG_CLSTATS)) { + pfr_enqueue_addrs(kt, &workq, NULL, 0); + pfr_clstats_kentries(&workq, tzero, 0); + } + if (rv) + return (rv); + + if (w.pfrw_free) { + printf("pfr_get_astats: corruption detected (%d).\n", + w.pfrw_free); + return (ENOTTY); + } + *size = kt->pfrkt_cnt; + return (0); +} + +int +pfr_clr_astats(struct pfr_table *tbl, struct pfr_addr *addr, int size, + int *nzero, int flags) +{ + struct pfr_ktable *kt; + struct pfr_kentryworkq workq; + struct pfr_kentry *p; + struct pfr_addr ad; + int i, rv, xzero = 0; + + ACCEPT_FLAGS(flags, PFR_FLAG_ATOMIC | PFR_FLAG_DUMMY | + PFR_FLAG_FEEDBACK); + if (pfr_validate_table(tbl, 0, 0)) + return (EINVAL); + kt = pfr_lookup_table(tbl); + if (kt == NULL || !(kt->pfrkt_flags & PFR_TFLAG_ACTIVE)) + return (ESRCH); + SLIST_INIT(&workq); + for (i = 0; i < size; i++) { + if (COPYIN(addr+i, &ad, sizeof (ad), flags)) + senderr(EFAULT); + if (pfr_validate_addr(&ad)) + senderr(EINVAL); + p = pfr_lookup_addr(kt, &ad, 1); + if (flags & PFR_FLAG_FEEDBACK) { + ad.pfra_fback = (p != NULL) ? + PFR_FB_CLEARED : PFR_FB_NONE; + if (COPYOUT(&ad, addr+i, sizeof (ad), flags)) + senderr(EFAULT); + } + if (p != NULL) { + SLIST_INSERT_HEAD(&workq, p, pfrke_workq); + xzero++; + } + } + + if (!(flags & PFR_FLAG_DUMMY)) { + pfr_clstats_kentries(&workq, 0, 0); + } + if (nzero != NULL) + *nzero = xzero; + return (0); +_bad: + if (flags & PFR_FLAG_FEEDBACK) + pfr_reset_feedback(addr, size, flags); + return (rv); +} + +int +pfr_validate_addr(struct pfr_addr *ad) +{ + int i; + + switch (ad->pfra_af) { +#if INET + case AF_INET: + if (ad->pfra_net > 32) + return (-1); + break; +#endif /* INET */ +#if INET6 + case AF_INET6: + if (ad->pfra_net > 128) + return (-1); + break; +#endif /* INET6 */ + default: + return (-1); + } + if (ad->pfra_net < 128 && + (((caddr_t)ad)[ad->pfra_net/8] & (0xFF >> (ad->pfra_net%8)))) + return (-1); + for (i = (ad->pfra_net+7)/8; i < (int)sizeof (ad->pfra_u); i++) + if (((caddr_t)ad)[i]) + return (-1); + if (ad->pfra_not && ad->pfra_not != 1) + return (-1); + if (ad->pfra_fback) + return (-1); + return (0); +} + +void +pfr_enqueue_addrs(struct pfr_ktable *kt, struct pfr_kentryworkq *workq, + int *naddr, int sweep) +{ + struct pfr_walktree w; + + SLIST_INIT(workq); + bzero(&w, sizeof (w)); + w.pfrw_op = sweep ? PFRW_SWEEP : PFRW_ENQUEUE; + w.pfrw_workq = workq; + if (kt->pfrkt_ip4 != NULL) + if (kt->pfrkt_ip4->rnh_walktree(kt->pfrkt_ip4, + pfr_walktree, &w)) + printf("pfr_enqueue_addrs: IPv4 walktree failed.\n"); + if (kt->pfrkt_ip6 != NULL) + if (kt->pfrkt_ip6->rnh_walktree(kt->pfrkt_ip6, + pfr_walktree, &w)) + printf("pfr_enqueue_addrs: IPv6 walktree failed.\n"); + if (naddr != NULL) + *naddr = w.pfrw_cnt; +} + +void +pfr_mark_addrs(struct pfr_ktable *kt) +{ + struct pfr_walktree w; + + bzero(&w, sizeof (w)); + w.pfrw_op = PFRW_MARK; + if (kt->pfrkt_ip4->rnh_walktree(kt->pfrkt_ip4, pfr_walktree, &w)) + printf("pfr_mark_addrs: IPv4 walktree failed.\n"); + if (kt->pfrkt_ip6->rnh_walktree(kt->pfrkt_ip6, pfr_walktree, &w)) + printf("pfr_mark_addrs: IPv6 walktree failed.\n"); +} + + +struct pfr_kentry * +pfr_lookup_addr(struct pfr_ktable *kt, struct pfr_addr *ad, int exact) +{ + union sockaddr_union sa, mask; + struct radix_node_head *head; + struct pfr_kentry *ke; + + lck_mtx_assert(pf_lock, LCK_MTX_ASSERT_OWNED); + + bzero(&sa, sizeof (sa)); + if (ad->pfra_af == AF_INET) { + FILLIN_SIN(sa.sin, ad->pfra_ip4addr); + head = kt->pfrkt_ip4; + } else if (ad->pfra_af == AF_INET6) { + FILLIN_SIN6(sa.sin6, ad->pfra_ip6addr); + head = kt->pfrkt_ip6; + } + else + return NULL; + if (ADDR_NETWORK(ad)) { + pfr_prepare_network(&mask, ad->pfra_af, ad->pfra_net); + ke = (struct pfr_kentry *)rn_lookup(&sa, &mask, head); + if (ke && KENTRY_RNF_ROOT(ke)) + ke = NULL; + } else { + ke = (struct pfr_kentry *)rn_match(&sa, head); + if (ke && KENTRY_RNF_ROOT(ke)) + ke = NULL; + if (exact && ke && KENTRY_NETWORK(ke)) + ke = NULL; + } + return (ke); +} + +struct pfr_kentry * +pfr_create_kentry(struct pfr_addr *ad, int intr) +{ + struct pfr_kentry *ke; + + if (intr) + ke = pool_get(&pfr_kentry_pl2, PR_WAITOK); + else + ke = pool_get(&pfr_kentry_pl, PR_WAITOK); + if (ke == NULL) + return (NULL); + bzero(ke, sizeof (*ke)); + + if (ad->pfra_af == AF_INET) + FILLIN_SIN(ke->pfrke_sa.sin, ad->pfra_ip4addr); + else if (ad->pfra_af == AF_INET6) + FILLIN_SIN6(ke->pfrke_sa.sin6, ad->pfra_ip6addr); + ke->pfrke_af = ad->pfra_af; + ke->pfrke_net = ad->pfra_net; + ke->pfrke_not = ad->pfra_not; + ke->pfrke_intrpool = intr; + return (ke); +} + +void +pfr_destroy_kentries(struct pfr_kentryworkq *workq) +{ + struct pfr_kentry *p, *q; + + for (p = SLIST_FIRST(workq); p != NULL; p = q) { + q = SLIST_NEXT(p, pfrke_workq); + pfr_destroy_kentry(p); + } +} + +void +pfr_destroy_kentry(struct pfr_kentry *ke) +{ + if (ke->pfrke_intrpool) + pool_put(&pfr_kentry_pl2, ke); + else + pool_put(&pfr_kentry_pl, ke); +} + +void +pfr_insert_kentries(struct pfr_ktable *kt, + struct pfr_kentryworkq *workq, u_int64_t tzero) +{ + struct pfr_kentry *p; + int rv, n = 0; + + SLIST_FOREACH(p, workq, pfrke_workq) { + rv = pfr_route_kentry(kt, p); + if (rv) { + printf("pfr_insert_kentries: cannot route entry " + "(code=%d).\n", rv); + break; + } + p->pfrke_tzero = tzero; + n++; + } + kt->pfrkt_cnt += n; +} + +int +pfr_insert_kentry(struct pfr_ktable *kt, struct pfr_addr *ad, u_int64_t tzero) +{ + struct pfr_kentry *p; + int rv; + + p = pfr_lookup_addr(kt, ad, 1); + if (p != NULL) + return (0); + p = pfr_create_kentry(ad, 1); + if (p == NULL) + return (EINVAL); + + rv = pfr_route_kentry(kt, p); + if (rv) + return (rv); + + p->pfrke_tzero = tzero; + kt->pfrkt_cnt++; + + return (0); +} + +void +pfr_remove_kentries(struct pfr_ktable *kt, + struct pfr_kentryworkq *workq) +{ + struct pfr_kentry *p; + int n = 0; + + SLIST_FOREACH(p, workq, pfrke_workq) { + pfr_unroute_kentry(kt, p); + n++; + } + kt->pfrkt_cnt -= n; + pfr_destroy_kentries(workq); +} + +void +pfr_clean_node_mask(struct pfr_ktable *kt, + struct pfr_kentryworkq *workq) +{ + struct pfr_kentry *p; + + SLIST_FOREACH(p, workq, pfrke_workq) + pfr_unroute_kentry(kt, p); +} + +void +pfr_clstats_kentries(struct pfr_kentryworkq *workq, u_int64_t tzero, + int negchange) +{ + struct pfr_kentry *p; + + lck_mtx_assert(pf_lock, LCK_MTX_ASSERT_OWNED); + + SLIST_FOREACH(p, workq, pfrke_workq) { + if (negchange) + p->pfrke_not = !p->pfrke_not; + bzero(p->pfrke_packets, sizeof (p->pfrke_packets)); + bzero(p->pfrke_bytes, sizeof (p->pfrke_bytes)); + p->pfrke_tzero = tzero; + } +} + +void +pfr_reset_feedback(struct pfr_addr *addr, int size, int flags) +{ + struct pfr_addr ad; + int i; + + for (i = 0; i < size; i++) { + if (COPYIN(addr+i, &ad, sizeof (ad), flags)) + break; + ad.pfra_fback = PFR_FB_NONE; + if (COPYOUT(&ad, addr+i, sizeof (ad), flags)) + break; + } +} + +void +pfr_prepare_network(union sockaddr_union *sa, int af, int net) +{ + int i; + + bzero(sa, sizeof (*sa)); + if (af == AF_INET) { + sa->sin.sin_len = sizeof (sa->sin); + sa->sin.sin_family = AF_INET; + sa->sin.sin_addr.s_addr = net ? htonl(-1 << (32-net)) : 0; + } else if (af == AF_INET6) { + sa->sin6.sin6_len = sizeof (sa->sin6); + sa->sin6.sin6_family = AF_INET6; + for (i = 0; i < 4; i++) { + if (net <= 32) { + sa->sin6.sin6_addr.s6_addr32[i] = + net ? htonl(-1 << (32-net)) : 0; + break; + } + sa->sin6.sin6_addr.s6_addr32[i] = 0xFFFFFFFF; + net -= 32; + } + } +} + +int +pfr_route_kentry(struct pfr_ktable *kt, struct pfr_kentry *ke) +{ + union sockaddr_union mask; + struct radix_node *rn; + struct radix_node_head *head; + + lck_mtx_assert(pf_lock, LCK_MTX_ASSERT_OWNED); + + bzero(ke->pfrke_node, sizeof (ke->pfrke_node)); + if (ke->pfrke_af == AF_INET) + head = kt->pfrkt_ip4; + else if (ke->pfrke_af == AF_INET6) + head = kt->pfrkt_ip6; + else + return (-1); + + if (KENTRY_NETWORK(ke)) { + pfr_prepare_network(&mask, ke->pfrke_af, ke->pfrke_net); + rn = rn_addroute(&ke->pfrke_sa, &mask, head, ke->pfrke_node); + } else + rn = rn_addroute(&ke->pfrke_sa, NULL, head, ke->pfrke_node); + + return (rn == NULL ? -1 : 0); +} + +int +pfr_unroute_kentry(struct pfr_ktable *kt, struct pfr_kentry *ke) +{ + union sockaddr_union mask; + struct radix_node *rn; + struct radix_node_head *head; + + lck_mtx_assert(pf_lock, LCK_MTX_ASSERT_OWNED); + + if (ke->pfrke_af == AF_INET) + head = kt->pfrkt_ip4; + else if (ke->pfrke_af == AF_INET6) + head = kt->pfrkt_ip6; + else + return (-1); + + if (KENTRY_NETWORK(ke)) { + pfr_prepare_network(&mask, ke->pfrke_af, ke->pfrke_net); + rn = rn_delete(&ke->pfrke_sa, &mask, head); + } else + rn = rn_delete(&ke->pfrke_sa, NULL, head); + + if (rn == NULL) { + printf("pfr_unroute_kentry: delete failed.\n"); + return (-1); + } + return (0); +} + +void +pfr_copyout_addr(struct pfr_addr *ad, struct pfr_kentry *ke) +{ + bzero(ad, sizeof (*ad)); + if (ke == NULL) + return; + ad->pfra_af = ke->pfrke_af; + ad->pfra_net = ke->pfrke_net; + ad->pfra_not = ke->pfrke_not; + if (ad->pfra_af == AF_INET) + ad->pfra_ip4addr = ke->pfrke_sa.sin.sin_addr; + else if (ad->pfra_af == AF_INET6) + ad->pfra_ip6addr = ke->pfrke_sa.sin6.sin6_addr; +} + +int +pfr_walktree(struct radix_node *rn, void *arg) +{ + struct pfr_kentry *ke = (struct pfr_kentry *)rn; + struct pfr_walktree *w = arg; + int flags = w->pfrw_flags; + + lck_mtx_assert(pf_lock, LCK_MTX_ASSERT_OWNED); + + switch (w->pfrw_op) { + case PFRW_MARK: + ke->pfrke_mark = 0; + break; + case PFRW_SWEEP: + if (ke->pfrke_mark) + break; + /* FALLTHROUGH */ + case PFRW_ENQUEUE: + SLIST_INSERT_HEAD(w->pfrw_workq, ke, pfrke_workq); + w->pfrw_cnt++; + break; + case PFRW_GET_ADDRS: + if (w->pfrw_free-- > 0) { + struct pfr_addr ad; + + pfr_copyout_addr(&ad, ke); + if (copyout(&ad, + CAST_USER_ADDR_T(w->pfrw_addr), + sizeof (ad))) + return (EFAULT); + w->pfrw_addr++; + } + break; + case PFRW_GET_ASTATS: + if (w->pfrw_free-- > 0) { + struct pfr_astats as; + + pfr_copyout_addr(&as.pfras_a, ke); + + bcopy(ke->pfrke_packets, as.pfras_packets, + sizeof (as.pfras_packets)); + bcopy(ke->pfrke_bytes, as.pfras_bytes, + sizeof (as.pfras_bytes)); + as.pfras_tzero = ke->pfrke_tzero; + + if (COPYOUT(&as, w->pfrw_astats, sizeof (as), flags)) + return (EFAULT); + w->pfrw_astats++; + } + break; + case PFRW_POOL_GET: + if (ke->pfrke_not) + break; /* negative entries are ignored */ + if (!w->pfrw_cnt--) { + w->pfrw_kentry = ke; + return (1); /* finish search */ + } + break; + case PFRW_DYNADDR_UPDATE: + if (ke->pfrke_af == AF_INET) { + if (w->pfrw_dyn->pfid_acnt4++ > 0) + break; + pfr_prepare_network(&pfr_mask, AF_INET, ke->pfrke_net); + w->pfrw_dyn->pfid_addr4 = *SUNION2PF( + &ke->pfrke_sa, AF_INET); + w->pfrw_dyn->pfid_mask4 = *SUNION2PF( + &pfr_mask, AF_INET); + } else if (ke->pfrke_af == AF_INET6) { + if (w->pfrw_dyn->pfid_acnt6++ > 0) + break; + pfr_prepare_network(&pfr_mask, AF_INET6, ke->pfrke_net); + w->pfrw_dyn->pfid_addr6 = *SUNION2PF( + &ke->pfrke_sa, AF_INET6); + w->pfrw_dyn->pfid_mask6 = *SUNION2PF( + &pfr_mask, AF_INET6); + } + break; + } + return (0); +} + +int +pfr_clr_tables(struct pfr_table *filter, int *ndel, int flags) +{ + struct pfr_ktableworkq workq; + struct pfr_ktable *p; + int xdel = 0; + + lck_mtx_assert(pf_lock, LCK_MTX_ASSERT_OWNED); + + ACCEPT_FLAGS(flags, PFR_FLAG_ATOMIC | PFR_FLAG_DUMMY | + PFR_FLAG_ALLRSETS); + if (pfr_fix_anchor(filter->pfrt_anchor)) + return (EINVAL); + if (pfr_table_count(filter, flags) < 0) + return (ENOENT); + + SLIST_INIT(&workq); + RB_FOREACH(p, pfr_ktablehead, &pfr_ktables) { + if (pfr_skip_table(filter, p, flags)) + continue; + if (strcmp(p->pfrkt_anchor, PF_RESERVED_ANCHOR) == 0) + continue; + if (!(p->pfrkt_flags & PFR_TFLAG_ACTIVE)) + continue; + p->pfrkt_nflags = p->pfrkt_flags & ~PFR_TFLAG_ACTIVE; + SLIST_INSERT_HEAD(&workq, p, pfrkt_workq); + xdel++; + } + if (!(flags & PFR_FLAG_DUMMY)) { + pfr_setflags_ktables(&workq); + } + if (ndel != NULL) + *ndel = xdel; + return (0); +} + +int +pfr_add_tables(struct pfr_table *tbl, int size, int *nadd, int flags) +{ + struct pfr_ktableworkq addq, changeq; + struct pfr_ktable *p, *q, *r, key; + int i, rv, xadd = 0; + u_int64_t tzero = pf_time_second(); + + lck_mtx_assert(pf_lock, LCK_MTX_ASSERT_OWNED); + + ACCEPT_FLAGS(flags, PFR_FLAG_ATOMIC | PFR_FLAG_DUMMY); + SLIST_INIT(&addq); + SLIST_INIT(&changeq); + for (i = 0; i < size; i++) { + if (COPYIN(tbl+i, &key.pfrkt_t, sizeof (key.pfrkt_t), flags)) + senderr(EFAULT); + if (pfr_validate_table(&key.pfrkt_t, PFR_TFLAG_USRMASK, + flags & PFR_FLAG_USERIOCTL)) + senderr(EINVAL); + key.pfrkt_flags |= PFR_TFLAG_ACTIVE; + p = RB_FIND(pfr_ktablehead, &pfr_ktables, &key); + if (p == NULL) { + p = pfr_create_ktable(&key.pfrkt_t, tzero, 1); + if (p == NULL) + senderr(ENOMEM); + SLIST_FOREACH(q, &addq, pfrkt_workq) { + if (!pfr_ktable_compare(p, q)) + goto _skip; + } + SLIST_INSERT_HEAD(&addq, p, pfrkt_workq); + xadd++; + if (!key.pfrkt_anchor[0]) + goto _skip; + + /* find or create root table */ + bzero(key.pfrkt_anchor, sizeof (key.pfrkt_anchor)); + r = RB_FIND(pfr_ktablehead, &pfr_ktables, &key); + if (r != NULL) { + p->pfrkt_root = r; + goto _skip; + } + SLIST_FOREACH(q, &addq, pfrkt_workq) { + if (!pfr_ktable_compare(&key, q)) { + p->pfrkt_root = q; + goto _skip; + } + } + key.pfrkt_flags = 0; + r = pfr_create_ktable(&key.pfrkt_t, 0, 1); + if (r == NULL) + senderr(ENOMEM); + SLIST_INSERT_HEAD(&addq, r, pfrkt_workq); + p->pfrkt_root = r; + } else if (!(p->pfrkt_flags & PFR_TFLAG_ACTIVE)) { + SLIST_FOREACH(q, &changeq, pfrkt_workq) + if (!pfr_ktable_compare(&key, q)) + goto _skip; + p->pfrkt_nflags = (p->pfrkt_flags & + ~PFR_TFLAG_USRMASK) | key.pfrkt_flags; + SLIST_INSERT_HEAD(&changeq, p, pfrkt_workq); + xadd++; + } +_skip: + ; + } + if (!(flags & PFR_FLAG_DUMMY)) { + pfr_insert_ktables(&addq); + pfr_setflags_ktables(&changeq); + } else + pfr_destroy_ktables(&addq, 0); + if (nadd != NULL) + *nadd = xadd; + return (0); +_bad: + pfr_destroy_ktables(&addq, 0); + return (rv); +} + +int +pfr_del_tables(struct pfr_table *tbl, int size, int *ndel, int flags) +{ + struct pfr_ktableworkq workq; + struct pfr_ktable *p, *q, key; + int i, xdel = 0; + + lck_mtx_assert(pf_lock, LCK_MTX_ASSERT_OWNED); + + ACCEPT_FLAGS(flags, PFR_FLAG_ATOMIC | PFR_FLAG_DUMMY); + SLIST_INIT(&workq); + for (i = 0; i < size; i++) { + if (COPYIN(tbl+i, &key.pfrkt_t, sizeof (key.pfrkt_t), flags)) + return (EFAULT); + if (pfr_validate_table(&key.pfrkt_t, 0, + flags & PFR_FLAG_USERIOCTL)) + return (EINVAL); + p = RB_FIND(pfr_ktablehead, &pfr_ktables, &key); + if (p != NULL && (p->pfrkt_flags & PFR_TFLAG_ACTIVE)) { + SLIST_FOREACH(q, &workq, pfrkt_workq) + if (!pfr_ktable_compare(p, q)) + goto _skip; + p->pfrkt_nflags = p->pfrkt_flags & ~PFR_TFLAG_ACTIVE; + SLIST_INSERT_HEAD(&workq, p, pfrkt_workq); + xdel++; + } +_skip: + ; + } + + if (!(flags & PFR_FLAG_DUMMY)) { + pfr_setflags_ktables(&workq); + } + if (ndel != NULL) + *ndel = xdel; + return (0); +} + +int +pfr_get_tables(struct pfr_table *filter, struct pfr_table *tbl, int *size, + int flags) +{ + struct pfr_ktable *p; + int n, nn; + + ACCEPT_FLAGS(flags, PFR_FLAG_ALLRSETS); + if (pfr_fix_anchor(filter->pfrt_anchor)) + return (EINVAL); + n = nn = pfr_table_count(filter, flags); + if (n < 0) + return (ENOENT); + if (n > *size) { + *size = n; + return (0); + } + RB_FOREACH(p, pfr_ktablehead, &pfr_ktables) { + if (pfr_skip_table(filter, p, flags)) + continue; + if (n-- <= 0) + continue; + if (COPYOUT(&p->pfrkt_t, tbl++, sizeof (*tbl), flags)) + return (EFAULT); + } + if (n) { + printf("pfr_get_tables: corruption detected (%d).\n", n); + return (ENOTTY); + } + *size = nn; + return (0); +} + +int +pfr_get_tstats(struct pfr_table *filter, struct pfr_tstats *tbl, int *size, + int flags) +{ + struct pfr_ktable *p; + struct pfr_ktableworkq workq; + int n, nn; + u_int64_t tzero = pf_time_second(); + + lck_mtx_assert(pf_lock, LCK_MTX_ASSERT_OWNED); + + /* XXX PFR_FLAG_CLSTATS disabled */ + ACCEPT_FLAGS(flags, PFR_FLAG_ATOMIC | PFR_FLAG_ALLRSETS); + if (pfr_fix_anchor(filter->pfrt_anchor)) + return (EINVAL); + n = nn = pfr_table_count(filter, flags); + if (n < 0) + return (ENOENT); + if (n > *size) { + *size = n; + return (0); + } + SLIST_INIT(&workq); + RB_FOREACH(p, pfr_ktablehead, &pfr_ktables) { + if (pfr_skip_table(filter, p, flags)) + continue; + if (n-- <= 0) + continue; + if (COPYOUT(&p->pfrkt_ts, tbl++, sizeof (*tbl), flags)) { + return (EFAULT); + } + SLIST_INSERT_HEAD(&workq, p, pfrkt_workq); + } + if (flags & PFR_FLAG_CLSTATS) + pfr_clstats_ktables(&workq, tzero, + flags & PFR_FLAG_ADDRSTOO); + if (n) { + printf("pfr_get_tstats: corruption detected (%d).\n", n); + return (ENOTTY); + } + *size = nn; + return (0); +} + +int +pfr_clr_tstats(struct pfr_table *tbl, int size, int *nzero, int flags) +{ + struct pfr_ktableworkq workq; + struct pfr_ktable *p, key; + int i, xzero = 0; + u_int64_t tzero = pf_time_second(); + + lck_mtx_assert(pf_lock, LCK_MTX_ASSERT_OWNED); + + ACCEPT_FLAGS(flags, PFR_FLAG_ATOMIC | PFR_FLAG_DUMMY | + PFR_FLAG_ADDRSTOO); + SLIST_INIT(&workq); + for (i = 0; i < size; i++) { + if (COPYIN(tbl+i, &key.pfrkt_t, sizeof (key.pfrkt_t), flags)) + return (EFAULT); + if (pfr_validate_table(&key.pfrkt_t, 0, 0)) + return (EINVAL); + p = RB_FIND(pfr_ktablehead, &pfr_ktables, &key); + if (p != NULL) { + SLIST_INSERT_HEAD(&workq, p, pfrkt_workq); + xzero++; + } + } + if (!(flags & PFR_FLAG_DUMMY)) { + pfr_clstats_ktables(&workq, tzero, flags & PFR_FLAG_ADDRSTOO); + } + if (nzero != NULL) + *nzero = xzero; + return (0); +} + +int +pfr_set_tflags(struct pfr_table *tbl, int size, int setflag, int clrflag, + int *nchange, int *ndel, int flags) +{ + struct pfr_ktableworkq workq; + struct pfr_ktable *p, *q, key; + int i, xchange = 0, xdel = 0; + + lck_mtx_assert(pf_lock, LCK_MTX_ASSERT_OWNED); + + ACCEPT_FLAGS(flags, PFR_FLAG_ATOMIC | PFR_FLAG_DUMMY); + if ((setflag & ~PFR_TFLAG_USRMASK) || + (clrflag & ~PFR_TFLAG_USRMASK) || + (setflag & clrflag)) + return (EINVAL); + SLIST_INIT(&workq); + for (i = 0; i < size; i++) { + if (COPYIN(tbl+i, &key.pfrkt_t, sizeof (key.pfrkt_t), flags)) + return (EFAULT); + if (pfr_validate_table(&key.pfrkt_t, 0, + flags & PFR_FLAG_USERIOCTL)) + return (EINVAL); + p = RB_FIND(pfr_ktablehead, &pfr_ktables, &key); + if (p != NULL && (p->pfrkt_flags & PFR_TFLAG_ACTIVE)) { + p->pfrkt_nflags = (p->pfrkt_flags | setflag) & + ~clrflag; + if (p->pfrkt_nflags == p->pfrkt_flags) + goto _skip; + SLIST_FOREACH(q, &workq, pfrkt_workq) + if (!pfr_ktable_compare(p, q)) + goto _skip; + SLIST_INSERT_HEAD(&workq, p, pfrkt_workq); + if ((p->pfrkt_flags & PFR_TFLAG_PERSIST) && + (clrflag & PFR_TFLAG_PERSIST) && + !(p->pfrkt_flags & PFR_TFLAG_REFERENCED)) + xdel++; + else + xchange++; + } +_skip: + ; + } + if (!(flags & PFR_FLAG_DUMMY)) { + pfr_setflags_ktables(&workq); + } + if (nchange != NULL) + *nchange = xchange; + if (ndel != NULL) + *ndel = xdel; + return (0); +} + +int +pfr_ina_begin(struct pfr_table *trs, u_int32_t *ticket, int *ndel, int flags) +{ + struct pfr_ktableworkq workq; + struct pfr_ktable *p; + struct pf_ruleset *rs; + int xdel = 0; + + lck_mtx_assert(pf_lock, LCK_MTX_ASSERT_OWNED); + + ACCEPT_FLAGS(flags, PFR_FLAG_DUMMY); + rs = pf_find_or_create_ruleset(trs->pfrt_anchor); + if (rs == NULL) + return (ENOMEM); + SLIST_INIT(&workq); + RB_FOREACH(p, pfr_ktablehead, &pfr_ktables) { + if (!(p->pfrkt_flags & PFR_TFLAG_INACTIVE) || + pfr_skip_table(trs, p, 0)) + continue; + p->pfrkt_nflags = p->pfrkt_flags & ~PFR_TFLAG_INACTIVE; + SLIST_INSERT_HEAD(&workq, p, pfrkt_workq); + xdel++; + } + if (!(flags & PFR_FLAG_DUMMY)) { + pfr_setflags_ktables(&workq); + if (ticket != NULL) + *ticket = ++rs->tticket; + rs->topen = 1; + } else + pf_remove_if_empty_ruleset(rs); + if (ndel != NULL) + *ndel = xdel; + return (0); +} + +int +pfr_ina_define(struct pfr_table *tbl, struct pfr_addr *addr, int size, + int *nadd, int *naddr, u_int32_t ticket, int flags) +{ + struct pfr_ktableworkq tableq; + struct pfr_kentryworkq addrq; + struct pfr_ktable *kt, *rt, *shadow, key; + struct pfr_kentry *p; + struct pfr_addr ad; + struct pf_ruleset *rs; + int i, rv, xadd = 0, xaddr = 0; + + lck_mtx_assert(pf_lock, LCK_MTX_ASSERT_OWNED); + + ACCEPT_FLAGS(flags, PFR_FLAG_DUMMY | PFR_FLAG_ADDRSTOO); + if (size && !(flags & PFR_FLAG_ADDRSTOO)) + return (EINVAL); + if (pfr_validate_table(tbl, PFR_TFLAG_USRMASK, + flags & PFR_FLAG_USERIOCTL)) + return (EINVAL); + rs = pf_find_ruleset(tbl->pfrt_anchor); + if (rs == NULL || !rs->topen || ticket != rs->tticket) + return (EBUSY); + tbl->pfrt_flags |= PFR_TFLAG_INACTIVE; + SLIST_INIT(&tableq); + kt = RB_FIND(pfr_ktablehead, &pfr_ktables, (struct pfr_ktable *)tbl); + if (kt == NULL) { + kt = pfr_create_ktable(tbl, 0, 1); + if (kt == NULL) + return (ENOMEM); + SLIST_INSERT_HEAD(&tableq, kt, pfrkt_workq); + xadd++; + if (!tbl->pfrt_anchor[0]) + goto _skip; + + /* find or create root table */ + bzero(&key, sizeof (key)); + strlcpy(key.pfrkt_name, tbl->pfrt_name, + sizeof (key.pfrkt_name)); + rt = RB_FIND(pfr_ktablehead, &pfr_ktables, &key); + if (rt != NULL) { + kt->pfrkt_root = rt; + goto _skip; + } + rt = pfr_create_ktable(&key.pfrkt_t, 0, 1); + if (rt == NULL) { + pfr_destroy_ktables(&tableq, 0); + return (ENOMEM); + } + SLIST_INSERT_HEAD(&tableq, rt, pfrkt_workq); + kt->pfrkt_root = rt; + } else if (!(kt->pfrkt_flags & PFR_TFLAG_INACTIVE)) + xadd++; +_skip: + shadow = pfr_create_ktable(tbl, 0, 0); + if (shadow == NULL) { + pfr_destroy_ktables(&tableq, 0); + return (ENOMEM); + } + SLIST_INIT(&addrq); + for (i = 0; i < size; i++) { + if (COPYIN(addr+i, &ad, sizeof (ad), flags)) + senderr(EFAULT); + if (pfr_validate_addr(&ad)) + senderr(EINVAL); + if (pfr_lookup_addr(shadow, &ad, 1) != NULL) + continue; + p = pfr_create_kentry(&ad, 0); + if (p == NULL) + senderr(ENOMEM); + if (pfr_route_kentry(shadow, p)) { + pfr_destroy_kentry(p); + continue; + } + SLIST_INSERT_HEAD(&addrq, p, pfrke_workq); + xaddr++; + } + if (!(flags & PFR_FLAG_DUMMY)) { + if (kt->pfrkt_shadow != NULL) + pfr_destroy_ktable(kt->pfrkt_shadow, 1); + kt->pfrkt_flags |= PFR_TFLAG_INACTIVE; + pfr_insert_ktables(&tableq); + shadow->pfrkt_cnt = (flags & PFR_FLAG_ADDRSTOO) ? + xaddr : NO_ADDRESSES; + kt->pfrkt_shadow = shadow; + } else { + pfr_clean_node_mask(shadow, &addrq); + pfr_destroy_ktable(shadow, 0); + pfr_destroy_ktables(&tableq, 0); + pfr_destroy_kentries(&addrq); + } + if (nadd != NULL) + *nadd = xadd; + if (naddr != NULL) + *naddr = xaddr; + return (0); +_bad: + pfr_destroy_ktable(shadow, 0); + pfr_destroy_ktables(&tableq, 0); + pfr_destroy_kentries(&addrq); + return (rv); +} + +int +pfr_ina_rollback(struct pfr_table *trs, u_int32_t ticket, int *ndel, int flags) +{ + struct pfr_ktableworkq workq; + struct pfr_ktable *p; + struct pf_ruleset *rs; + int xdel = 0; + + lck_mtx_assert(pf_lock, LCK_MTX_ASSERT_OWNED); + + ACCEPT_FLAGS(flags, PFR_FLAG_DUMMY); + rs = pf_find_ruleset(trs->pfrt_anchor); + if (rs == NULL || !rs->topen || ticket != rs->tticket) + return (0); + SLIST_INIT(&workq); + RB_FOREACH(p, pfr_ktablehead, &pfr_ktables) { + if (!(p->pfrkt_flags & PFR_TFLAG_INACTIVE) || + pfr_skip_table(trs, p, 0)) + continue; + p->pfrkt_nflags = p->pfrkt_flags & ~PFR_TFLAG_INACTIVE; + SLIST_INSERT_HEAD(&workq, p, pfrkt_workq); + xdel++; + } + if (!(flags & PFR_FLAG_DUMMY)) { + pfr_setflags_ktables(&workq); + rs->topen = 0; + pf_remove_if_empty_ruleset(rs); + } + if (ndel != NULL) + *ndel = xdel; + return (0); +} + +int +pfr_ina_commit(struct pfr_table *trs, u_int32_t ticket, int *nadd, + int *nchange, int flags) +{ + struct pfr_ktable *p, *q; + struct pfr_ktableworkq workq; + struct pf_ruleset *rs; + int xadd = 0, xchange = 0; + u_int64_t tzero = pf_time_second(); + + lck_mtx_assert(pf_lock, LCK_MTX_ASSERT_OWNED); + + ACCEPT_FLAGS(flags, PFR_FLAG_ATOMIC | PFR_FLAG_DUMMY); + rs = pf_find_ruleset(trs->pfrt_anchor); + if (rs == NULL || !rs->topen || ticket != rs->tticket) + return (EBUSY); + + SLIST_INIT(&workq); + RB_FOREACH(p, pfr_ktablehead, &pfr_ktables) { + if (!(p->pfrkt_flags & PFR_TFLAG_INACTIVE) || + pfr_skip_table(trs, p, 0)) + continue; + SLIST_INSERT_HEAD(&workq, p, pfrkt_workq); + if (p->pfrkt_flags & PFR_TFLAG_ACTIVE) + xchange++; + else + xadd++; + } + + if (!(flags & PFR_FLAG_DUMMY)) { + for (p = SLIST_FIRST(&workq); p != NULL; p = q) { + q = SLIST_NEXT(p, pfrkt_workq); + pfr_commit_ktable(p, tzero); + } + rs->topen = 0; + pf_remove_if_empty_ruleset(rs); + } + if (nadd != NULL) + *nadd = xadd; + if (nchange != NULL) + *nchange = xchange; + + return (0); +} + +void +pfr_commit_ktable(struct pfr_ktable *kt, u_int64_t tzero) +{ + struct pfr_ktable *shadow = kt->pfrkt_shadow; + int nflags; + + lck_mtx_assert(pf_lock, LCK_MTX_ASSERT_OWNED); + + if (shadow->pfrkt_cnt == NO_ADDRESSES) { + if (!(kt->pfrkt_flags & PFR_TFLAG_ACTIVE)) + pfr_clstats_ktable(kt, tzero, 1); + } else if (kt->pfrkt_flags & PFR_TFLAG_ACTIVE) { + /* kt might contain addresses */ + struct pfr_kentryworkq addrq, addq, changeq, delq, garbageq; + struct pfr_kentry *p, *q, *next; + struct pfr_addr ad; + + pfr_enqueue_addrs(shadow, &addrq, NULL, 0); + pfr_mark_addrs(kt); + SLIST_INIT(&addq); + SLIST_INIT(&changeq); + SLIST_INIT(&delq); + SLIST_INIT(&garbageq); + pfr_clean_node_mask(shadow, &addrq); + for (p = SLIST_FIRST(&addrq); p != NULL; p = next) { + next = SLIST_NEXT(p, pfrke_workq); /* XXX */ + pfr_copyout_addr(&ad, p); + q = pfr_lookup_addr(kt, &ad, 1); + if (q != NULL) { + if (q->pfrke_not != p->pfrke_not) + SLIST_INSERT_HEAD(&changeq, q, + pfrke_workq); + q->pfrke_mark = 1; + SLIST_INSERT_HEAD(&garbageq, p, pfrke_workq); + } else { + p->pfrke_tzero = tzero; + SLIST_INSERT_HEAD(&addq, p, pfrke_workq); + } + } + pfr_enqueue_addrs(kt, &delq, NULL, ENQUEUE_UNMARKED_ONLY); + pfr_insert_kentries(kt, &addq, tzero); + pfr_remove_kentries(kt, &delq); + pfr_clstats_kentries(&changeq, tzero, INVERT_NEG_FLAG); + pfr_destroy_kentries(&garbageq); + } else { + /* kt cannot contain addresses */ + SWAP(struct radix_node_head *, kt->pfrkt_ip4, + shadow->pfrkt_ip4); + SWAP(struct radix_node_head *, kt->pfrkt_ip6, + shadow->pfrkt_ip6); + SWAP(int, kt->pfrkt_cnt, shadow->pfrkt_cnt); + pfr_clstats_ktable(kt, tzero, 1); + } + nflags = ((shadow->pfrkt_flags & PFR_TFLAG_USRMASK) | + (kt->pfrkt_flags & PFR_TFLAG_SETMASK) | PFR_TFLAG_ACTIVE) & + ~PFR_TFLAG_INACTIVE; + pfr_destroy_ktable(shadow, 0); + kt->pfrkt_shadow = NULL; + pfr_setflags_ktable(kt, nflags); +} + +int +pfr_validate_table(struct pfr_table *tbl, int allowedflags, int no_reserved) +{ + int i; + + if (!tbl->pfrt_name[0]) + return (-1); + if (no_reserved && strcmp(tbl->pfrt_anchor, PF_RESERVED_ANCHOR) == 0) + return (-1); + if (tbl->pfrt_name[PF_TABLE_NAME_SIZE-1]) + return (-1); + for (i = strlen(tbl->pfrt_name); i < PF_TABLE_NAME_SIZE; i++) + if (tbl->pfrt_name[i]) + return (-1); + if (pfr_fix_anchor(tbl->pfrt_anchor)) + return (-1); + if (tbl->pfrt_flags & ~allowedflags) + return (-1); + return (0); +} + +/* + * Rewrite anchors referenced by tables to remove slashes + * and check for validity. + */ +int +pfr_fix_anchor(char *anchor) +{ + size_t siz = MAXPATHLEN; + int i; + + if (anchor[0] == '/') { + char *path; + int off; + + path = anchor; + off = 1; + while (*++path == '/') + off++; + bcopy(path, anchor, siz - off); + memset(anchor + siz - off, 0, off); + } + if (anchor[siz - 1]) + return (-1); + for (i = strlen(anchor); i < (int)siz; i++) + if (anchor[i]) + return (-1); + return (0); +} + +int +pfr_table_count(struct pfr_table *filter, int flags) +{ + struct pf_ruleset *rs; + + if (flags & PFR_FLAG_ALLRSETS) + return (pfr_ktable_cnt); + if (filter->pfrt_anchor[0]) { + rs = pf_find_ruleset(filter->pfrt_anchor); + return ((rs != NULL) ? rs->tables : -1); + } + return (pf_main_ruleset.tables); +} + +int +pfr_skip_table(struct pfr_table *filter, struct pfr_ktable *kt, int flags) +{ + if (flags & PFR_FLAG_ALLRSETS) + return (0); + if (strcmp(filter->pfrt_anchor, kt->pfrkt_anchor)) + return (1); + return (0); +} + +void +pfr_insert_ktables(struct pfr_ktableworkq *workq) +{ + struct pfr_ktable *p; + + lck_mtx_assert(pf_lock, LCK_MTX_ASSERT_OWNED); + + SLIST_FOREACH(p, workq, pfrkt_workq) + pfr_insert_ktable(p); +} + +void +pfr_insert_ktable(struct pfr_ktable *kt) +{ + lck_mtx_assert(pf_lock, LCK_MTX_ASSERT_OWNED); + + RB_INSERT(pfr_ktablehead, &pfr_ktables, kt); + pfr_ktable_cnt++; + if (kt->pfrkt_root != NULL) + if (!kt->pfrkt_root->pfrkt_refcnt[PFR_REFCNT_ANCHOR]++) + pfr_setflags_ktable(kt->pfrkt_root, + kt->pfrkt_root->pfrkt_flags|PFR_TFLAG_REFDANCHOR); +} + +void +pfr_setflags_ktables(struct pfr_ktableworkq *workq) +{ + struct pfr_ktable *p, *q; + + lck_mtx_assert(pf_lock, LCK_MTX_ASSERT_OWNED); + + for (p = SLIST_FIRST(workq); p; p = q) { + q = SLIST_NEXT(p, pfrkt_workq); + pfr_setflags_ktable(p, p->pfrkt_nflags); + } +} + +void +pfr_setflags_ktable(struct pfr_ktable *kt, int newf) +{ + struct pfr_kentryworkq addrq; + + lck_mtx_assert(pf_lock, LCK_MTX_ASSERT_OWNED); + + if (!(newf & PFR_TFLAG_REFERENCED) && + !(newf & PFR_TFLAG_PERSIST)) + newf &= ~PFR_TFLAG_ACTIVE; + if (!(newf & PFR_TFLAG_ACTIVE)) + newf &= ~PFR_TFLAG_USRMASK; + if (!(newf & PFR_TFLAG_SETMASK)) { + RB_REMOVE(pfr_ktablehead, &pfr_ktables, kt); + if (kt->pfrkt_root != NULL) + if (!--kt->pfrkt_root->pfrkt_refcnt[PFR_REFCNT_ANCHOR]) + pfr_setflags_ktable(kt->pfrkt_root, + kt->pfrkt_root->pfrkt_flags & + ~PFR_TFLAG_REFDANCHOR); + pfr_destroy_ktable(kt, 1); + pfr_ktable_cnt--; + return; + } + if (!(newf & PFR_TFLAG_ACTIVE) && kt->pfrkt_cnt) { + pfr_enqueue_addrs(kt, &addrq, NULL, 0); + pfr_remove_kentries(kt, &addrq); + } + if (!(newf & PFR_TFLAG_INACTIVE) && kt->pfrkt_shadow != NULL) { + pfr_destroy_ktable(kt->pfrkt_shadow, 1); + kt->pfrkt_shadow = NULL; + } + kt->pfrkt_flags = newf; +} + +void +pfr_clstats_ktables(struct pfr_ktableworkq *workq, u_int64_t tzero, int recurse) +{ + struct pfr_ktable *p; + + lck_mtx_assert(pf_lock, LCK_MTX_ASSERT_OWNED); + + SLIST_FOREACH(p, workq, pfrkt_workq) + pfr_clstats_ktable(p, tzero, recurse); +} + +void +pfr_clstats_ktable(struct pfr_ktable *kt, u_int64_t tzero, int recurse) +{ + struct pfr_kentryworkq addrq; + + lck_mtx_assert(pf_lock, LCK_MTX_ASSERT_OWNED); + + if (recurse) { + pfr_enqueue_addrs(kt, &addrq, NULL, 0); + pfr_clstats_kentries(&addrq, tzero, 0); + } + bzero(kt->pfrkt_packets, sizeof (kt->pfrkt_packets)); + bzero(kt->pfrkt_bytes, sizeof (kt->pfrkt_bytes)); + kt->pfrkt_match = kt->pfrkt_nomatch = 0; + kt->pfrkt_tzero = tzero; +} + +struct pfr_ktable * +pfr_create_ktable(struct pfr_table *tbl, u_int64_t tzero, int attachruleset) +{ + struct pfr_ktable *kt; + struct pf_ruleset *rs; + + lck_mtx_assert(pf_lock, LCK_MTX_ASSERT_OWNED); + + kt = pool_get(&pfr_ktable_pl, PR_WAITOK); + if (kt == NULL) + return (NULL); + bzero(kt, sizeof (*kt)); + kt->pfrkt_t = *tbl; + + if (attachruleset) { + rs = pf_find_or_create_ruleset(tbl->pfrt_anchor); + if (!rs) { + pfr_destroy_ktable(kt, 0); + return (NULL); + } + kt->pfrkt_rs = rs; + rs->tables++; + } + + if (!rn_inithead((void **)&kt->pfrkt_ip4, + offsetof(struct sockaddr_in, sin_addr) * 8) || + !rn_inithead((void **)&kt->pfrkt_ip6, + offsetof(struct sockaddr_in6, sin6_addr) * 8)) { + pfr_destroy_ktable(kt, 0); + return (NULL); + } + kt->pfrkt_tzero = tzero; + + return (kt); +} + +void +pfr_destroy_ktables(struct pfr_ktableworkq *workq, int flushaddr) +{ + struct pfr_ktable *p, *q; + + lck_mtx_assert(pf_lock, LCK_MTX_ASSERT_OWNED); + + for (p = SLIST_FIRST(workq); p; p = q) { + q = SLIST_NEXT(p, pfrkt_workq); + pfr_destroy_ktable(p, flushaddr); + } +} + +void +pfr_destroy_ktable(struct pfr_ktable *kt, int flushaddr) +{ + struct pfr_kentryworkq addrq; + + lck_mtx_assert(pf_lock, LCK_MTX_ASSERT_OWNED); + + if (flushaddr) { + pfr_enqueue_addrs(kt, &addrq, NULL, 0); + pfr_clean_node_mask(kt, &addrq); + pfr_destroy_kentries(&addrq); + } + if (kt->pfrkt_ip4 != NULL) + _FREE((caddr_t)kt->pfrkt_ip4, M_RTABLE); + if (kt->pfrkt_ip6 != NULL) + _FREE((caddr_t)kt->pfrkt_ip6, M_RTABLE); + if (kt->pfrkt_shadow != NULL) + pfr_destroy_ktable(kt->pfrkt_shadow, flushaddr); + if (kt->pfrkt_rs != NULL) { + kt->pfrkt_rs->tables--; + pf_remove_if_empty_ruleset(kt->pfrkt_rs); + } + pool_put(&pfr_ktable_pl, kt); +} + +int +pfr_ktable_compare(struct pfr_ktable *p, struct pfr_ktable *q) +{ + int d; + + if ((d = strncmp(p->pfrkt_name, q->pfrkt_name, PF_TABLE_NAME_SIZE))) + return (d); + return (strcmp(p->pfrkt_anchor, q->pfrkt_anchor)); +} + +struct pfr_ktable * +pfr_lookup_table(struct pfr_table *tbl) +{ + lck_mtx_assert(pf_lock, LCK_MTX_ASSERT_OWNED); + + /* struct pfr_ktable start like a struct pfr_table */ + return (RB_FIND(pfr_ktablehead, &pfr_ktables, + (struct pfr_ktable *)tbl)); +} + +int +pfr_match_addr(struct pfr_ktable *kt, struct pf_addr *a, sa_family_t af) +{ + struct pfr_kentry *ke = NULL; + int match; + + lck_mtx_assert(pf_lock, LCK_MTX_ASSERT_OWNED); + + if (!(kt->pfrkt_flags & PFR_TFLAG_ACTIVE) && kt->pfrkt_root != NULL) + kt = kt->pfrkt_root; + if (!(kt->pfrkt_flags & PFR_TFLAG_ACTIVE)) + return (0); + + switch (af) { +#if INET + case AF_INET: + pfr_sin.sin_addr.s_addr = a->addr32[0]; + ke = (struct pfr_kentry *)rn_match(&pfr_sin, kt->pfrkt_ip4); + if (ke && KENTRY_RNF_ROOT(ke)) + ke = NULL; + break; +#endif /* INET */ +#if INET6 + case AF_INET6: + bcopy(a, &pfr_sin6.sin6_addr, sizeof (pfr_sin6.sin6_addr)); + ke = (struct pfr_kentry *)rn_match(&pfr_sin6, kt->pfrkt_ip6); + if (ke && KENTRY_RNF_ROOT(ke)) + ke = NULL; + break; +#endif /* INET6 */ + } + match = (ke && !ke->pfrke_not); + if (match) + kt->pfrkt_match++; + else + kt->pfrkt_nomatch++; + return (match); +} + +void +pfr_update_stats(struct pfr_ktable *kt, struct pf_addr *a, sa_family_t af, + u_int64_t len, int dir_out, int op_pass, int notrule) +{ + struct pfr_kentry *ke = NULL; + + lck_mtx_assert(pf_lock, LCK_MTX_ASSERT_OWNED); + + if (!(kt->pfrkt_flags & PFR_TFLAG_ACTIVE) && kt->pfrkt_root != NULL) + kt = kt->pfrkt_root; + if (!(kt->pfrkt_flags & PFR_TFLAG_ACTIVE)) + return; + + switch (af) { +#if INET + case AF_INET: + pfr_sin.sin_addr.s_addr = a->addr32[0]; + ke = (struct pfr_kentry *)rn_match(&pfr_sin, kt->pfrkt_ip4); + if (ke && KENTRY_RNF_ROOT(ke)) + ke = NULL; + break; +#endif /* INET */ +#if INET6 + case AF_INET6: + bcopy(a, &pfr_sin6.sin6_addr, sizeof (pfr_sin6.sin6_addr)); + ke = (struct pfr_kentry *)rn_match(&pfr_sin6, kt->pfrkt_ip6); + if (ke && KENTRY_RNF_ROOT(ke)) + ke = NULL; + break; +#endif /* INET6 */ + default: + ; + } + if ((ke == NULL || ke->pfrke_not) != notrule) { + if (op_pass != PFR_OP_PASS) + printf("pfr_update_stats: assertion failed.\n"); + op_pass = PFR_OP_XPASS; + } + kt->pfrkt_packets[dir_out][op_pass]++; + kt->pfrkt_bytes[dir_out][op_pass] += len; + if (ke != NULL && op_pass != PFR_OP_XPASS) { + ke->pfrke_packets[dir_out][op_pass]++; + ke->pfrke_bytes[dir_out][op_pass] += len; + } +} + +struct pfr_ktable * +pfr_attach_table(struct pf_ruleset *rs, char *name) +{ + struct pfr_ktable *kt, *rt; + struct pfr_table tbl; + struct pf_anchor *ac = rs->anchor; + + lck_mtx_assert(pf_lock, LCK_MTX_ASSERT_OWNED); + + bzero(&tbl, sizeof (tbl)); + strlcpy(tbl.pfrt_name, name, sizeof (tbl.pfrt_name)); + if (ac != NULL) + strlcpy(tbl.pfrt_anchor, ac->path, sizeof (tbl.pfrt_anchor)); + kt = pfr_lookup_table(&tbl); + if (kt == NULL) { + kt = pfr_create_ktable(&tbl, pf_time_second(), 1); + if (kt == NULL) + return (NULL); + if (ac != NULL) { + bzero(tbl.pfrt_anchor, sizeof (tbl.pfrt_anchor)); + rt = pfr_lookup_table(&tbl); + if (rt == NULL) { + rt = pfr_create_ktable(&tbl, 0, 1); + if (rt == NULL) { + pfr_destroy_ktable(kt, 0); + return (NULL); + } + pfr_insert_ktable(rt); + } + kt->pfrkt_root = rt; + } + pfr_insert_ktable(kt); + } + if (!kt->pfrkt_refcnt[PFR_REFCNT_RULE]++) + pfr_setflags_ktable(kt, kt->pfrkt_flags|PFR_TFLAG_REFERENCED); + return (kt); +} + +void +pfr_detach_table(struct pfr_ktable *kt) +{ + lck_mtx_assert(pf_lock, LCK_MTX_ASSERT_OWNED); + + if (kt->pfrkt_refcnt[PFR_REFCNT_RULE] <= 0) + printf("pfr_detach_table: refcount = %d.\n", + kt->pfrkt_refcnt[PFR_REFCNT_RULE]); + else if (!--kt->pfrkt_refcnt[PFR_REFCNT_RULE]) + pfr_setflags_ktable(kt, kt->pfrkt_flags&~PFR_TFLAG_REFERENCED); +} + +int +pfr_pool_get(struct pfr_ktable *kt, int *pidx, struct pf_addr *counter, + struct pf_addr **raddr, struct pf_addr **rmask, sa_family_t af) +{ + struct pfr_kentry *ke, *ke2; + struct pf_addr *addr; + union sockaddr_union mask; + int idx = -1, use_counter = 0; + + lck_mtx_assert(pf_lock, LCK_MTX_ASSERT_OWNED); + + if (af == AF_INET) + addr = (struct pf_addr *)&pfr_sin.sin_addr; + else if (af == AF_INET6) + addr = (struct pf_addr *)&pfr_sin6.sin6_addr; + else + return (-1); + + if (!(kt->pfrkt_flags & PFR_TFLAG_ACTIVE) && kt->pfrkt_root != NULL) + kt = kt->pfrkt_root; + if (!(kt->pfrkt_flags & PFR_TFLAG_ACTIVE)) + return (-1); + + if (pidx != NULL) + idx = *pidx; + if (counter != NULL && idx >= 0) + use_counter = 1; + if (idx < 0) + idx = 0; + +_next_block: + ke = pfr_kentry_byidx(kt, idx, af); + if (ke == NULL) { + kt->pfrkt_nomatch++; + return (1); + } + pfr_prepare_network(&pfr_mask, af, ke->pfrke_net); + *raddr = SUNION2PF(&ke->pfrke_sa, af); + *rmask = SUNION2PF(&pfr_mask, af); + + if (use_counter) { + /* is supplied address within block? */ + if (!PF_MATCHA(0, *raddr, *rmask, counter, af)) { + /* no, go to next block in table */ + idx++; + use_counter = 0; + goto _next_block; + } + PF_ACPY(addr, counter, af); + } else { + /* use first address of block */ + PF_ACPY(addr, *raddr, af); + } + + if (!KENTRY_NETWORK(ke)) { + /* this is a single IP address - no possible nested block */ + PF_ACPY(counter, addr, af); + *pidx = idx; + kt->pfrkt_match++; + return (0); + } + for (;;) { + /* we don't want to use a nested block */ + if (af == AF_INET) + ke2 = (struct pfr_kentry *)rn_match(&pfr_sin, + kt->pfrkt_ip4); + else if (af == AF_INET6) + ke2 = (struct pfr_kentry *)rn_match(&pfr_sin6, + kt->pfrkt_ip6); + else + return (-1); /* never happens */ + /* no need to check KENTRY_RNF_ROOT() here */ + if (ke2 == ke) { + /* lookup return the same block - perfect */ + PF_ACPY(counter, addr, af); + *pidx = idx; + kt->pfrkt_match++; + return (0); + } + + /* we need to increase the counter past the nested block */ + pfr_prepare_network(&mask, AF_INET, ke2->pfrke_net); + PF_POOLMASK(addr, addr, SUNION2PF(&mask, af), &pfr_ffaddr, af); + PF_AINC(addr, af); + if (!PF_MATCHA(0, *raddr, *rmask, addr, af)) { + /* ok, we reached the end of our main block */ + /* go to next block in table */ + idx++; + use_counter = 0; + goto _next_block; + } + } +} + +struct pfr_kentry * +pfr_kentry_byidx(struct pfr_ktable *kt, int idx, int af) +{ + struct pfr_walktree w; + + lck_mtx_assert(pf_lock, LCK_MTX_ASSERT_OWNED); + + bzero(&w, sizeof (w)); + w.pfrw_op = PFRW_POOL_GET; + w.pfrw_cnt = idx; + + switch (af) { +#if INET + case AF_INET: + (void) kt->pfrkt_ip4->rnh_walktree(kt->pfrkt_ip4, + pfr_walktree, &w); + return (w.pfrw_kentry); +#endif /* INET */ +#if INET6 + case AF_INET6: + (void) kt->pfrkt_ip6->rnh_walktree(kt->pfrkt_ip6, + pfr_walktree, &w); + return (w.pfrw_kentry); +#endif /* INET6 */ + default: + return (NULL); + } +} + +void +pfr_dynaddr_update(struct pfr_ktable *kt, struct pfi_dynaddr *dyn) +{ + struct pfr_walktree w; + + lck_mtx_assert(pf_lock, LCK_MTX_ASSERT_OWNED); + + bzero(&w, sizeof (w)); + w.pfrw_op = PFRW_DYNADDR_UPDATE; + w.pfrw_dyn = dyn; + + dyn->pfid_acnt4 = 0; + dyn->pfid_acnt6 = 0; + if (!dyn->pfid_af || dyn->pfid_af == AF_INET) + (void) kt->pfrkt_ip4->rnh_walktree(kt->pfrkt_ip4, + pfr_walktree, &w); + if (!dyn->pfid_af || dyn->pfid_af == AF_INET6) + (void) kt->pfrkt_ip6->rnh_walktree(kt->pfrkt_ip6, + pfr_walktree, &w); +} diff --git a/bsd/net/pfkeyv2.h b/bsd/net/pfkeyv2.h index ccb5bbaa8..fa89f14c7 100644 --- a/bsd/net/pfkeyv2.h +++ b/bsd/net/pfkeyv2.h @@ -102,7 +102,8 @@ you leave this credit intact on any copies of this file. #define SADB_X_SPDSETIDX 20 #define SADB_X_SPDEXPIRE 21 #define SADB_X_SPDDELETE2 22 /* by policy id */ -#define SADB_MAX 22 +#define SADB_GETSASTAT 23 +#define SADB_MAX 23 struct sadb_msg { u_int8_t sadb_msg_version; @@ -293,6 +294,30 @@ struct sadb_x_ipsecrequest { */ }; +struct sadb_session_id { + u_int16_t sadb_session_id_len; + u_int16_t sadb_session_id_exttype; + /* [0] is an arbitrary handle that means something only for requester + * [1] is a global session id for lookups in the kernel and racoon. + */ + u_int64_t sadb_session_id_v[2]; +} __attribute__ ((aligned(8))); + +struct sastat { + u_int32_t spi; /* SPI Value, network byte order */ + u_int32_t created; /* for lifetime */ + struct sadb_lifetime lft_c; /* CURRENT lifetime. */ +}; // no need to align + +struct sadb_sastat { + u_int16_t sadb_sastat_len; + u_int16_t sadb_sastat_exttype; + u_int32_t sadb_sastat_dir; + u_int32_t sadb_sastat_reserved; + u_int32_t sadb_sastat_list_len; + /* list of struct sastat comes after */ +} __attribute__ ((aligned(8))); + #define SADB_EXT_RESERVED 0 #define SADB_EXT_SA 1 #define SADB_EXT_LIFETIME_CURRENT 2 @@ -313,7 +338,9 @@ struct sadb_x_ipsecrequest { #define SADB_X_EXT_KMPRIVATE 17 #define SADB_X_EXT_POLICY 18 #define SADB_X_EXT_SA2 19 -#define SADB_EXT_MAX 19 +#define SADB_EXT_SESSION_ID 20 +#define SADB_EXT_SASTAT 21 +#define SADB_EXT_MAX 21 #define SADB_SATYPE_UNSPEC 0 #define SADB_SATYPE_AH 2 diff --git a/bsd/net/pfvar.h b/bsd/net/pfvar.h new file mode 100644 index 000000000..60deece57 --- /dev/null +++ b/bsd/net/pfvar.h @@ -0,0 +1,2144 @@ +/* + * Copyright (c) 2008 Apple Inc. All rights reserved. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. The rights granted to you under the License + * may not be used to create, or enable the creation or redistribution of, + * unlawful or unlicensed copies of an Apple operating system, or to + * circumvent, violate, or enable the circumvention or violation of, any + * terms of an Apple operating system software license agreement. + * + * Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ + */ + +/* $apfw: pfvar.h,v 1.12 2008/08/27 00:01:32 jhw Exp $ */ +/* $OpenBSD: pfvar.h,v 1.259 2007/12/02 12:08:04 pascoe Exp $ */ + +/* + * Copyright (c) 2001 Daniel Hartmeier + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * - Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials provided + * with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + */ + +#ifndef _NET_PFVAR_H_ +#define _NET_PFVAR_H_ + +#ifdef PRIVATE +#if PF || !defined(KERNEL) + +#ifdef __cplusplus +extern "C" { +#endif + +#include +#include +#include +#include + +#include +#include + +#ifdef KERNEL +#include +#include +#include + +#include +#include +#include + +#if BYTE_ORDER == BIG_ENDIAN +#define htobe64(x) (x) +#else /* LITTLE ENDIAN */ +#define htobe64(x) __DARWIN_OSSwapInt64(x) +#endif /* LITTLE_ENDIAN */ + +#define be64toh(x) htobe64(x) + +__private_extern__ lck_rw_t *pf_perim_lock; +__private_extern__ lck_mtx_t *pf_lock; + +struct pool { + struct zone *pool_zone; /* pointer to backend zone */ + const char *pool_name; /* name of pool */ + unsigned int pool_count; /* # of outstanding elements */ + unsigned int pool_hiwat; /* high watermark */ + unsigned int pool_limit; /* hard limit */ + unsigned int pool_fails; /* # of failed allocs due to limit */ +}; + +#define PR_NOWAIT FALSE +#define PR_WAITOK TRUE + +__private_extern__ void pool_init(struct pool *, size_t, unsigned int, + unsigned int, int, const char *, void *); +__private_extern__ void pool_destroy(struct pool *); +__private_extern__ void pool_sethiwat(struct pool *, int); +__private_extern__ void pool_sethardlimit(struct pool *, int, + const char *, int); +__private_extern__ void *pool_get(struct pool *, int); +__private_extern__ void pool_put(struct pool *, void *); +__private_extern__ u_int64_t pf_time_second(void); +#endif /* KERNEL */ + +union sockaddr_union { + struct sockaddr sa; + struct sockaddr_in sin; + struct sockaddr_in6 sin6; +}; + +struct ip; +struct ip6_hdr; +struct tcphdr; + +#define PF_TCPS_PROXY_SRC ((TCP_NSTATES)+0) +#define PF_TCPS_PROXY_DST ((TCP_NSTATES)+1) + +#define PF_MD5_DIGEST_LENGTH 16 +#ifdef MD5_DIGEST_LENGTH +#if PF_MD5_DIGEST_LENGTH != MD5_DIGEST_LENGTH +#error +#endif +#endif + +#ifndef NO_APPLE_EXTENSIONS +struct pf_grev1_hdr; +struct pf_esp_hdr; +#endif + +enum { PF_INOUT, PF_IN, PF_OUT }; +enum { PF_PASS, PF_DROP, PF_SCRUB, PF_NOSCRUB, PF_NAT, PF_NONAT, + PF_BINAT, PF_NOBINAT, PF_RDR, PF_NORDR, PF_SYNPROXY_DROP }; +enum { PF_RULESET_SCRUB, PF_RULESET_FILTER, PF_RULESET_NAT, + PF_RULESET_BINAT, PF_RULESET_RDR, PF_RULESET_MAX }; +enum { PF_OP_NONE, PF_OP_IRG, PF_OP_EQ, PF_OP_NE, PF_OP_LT, + PF_OP_LE, PF_OP_GT, PF_OP_GE, PF_OP_XRG, PF_OP_RRG }; +enum { PF_DEBUG_NONE, PF_DEBUG_URGENT, PF_DEBUG_MISC, PF_DEBUG_NOISY }; +enum { PF_CHANGE_NONE, PF_CHANGE_ADD_HEAD, PF_CHANGE_ADD_TAIL, + PF_CHANGE_ADD_BEFORE, PF_CHANGE_ADD_AFTER, + PF_CHANGE_REMOVE, PF_CHANGE_GET_TICKET }; +enum { PF_GET_NONE, PF_GET_CLR_CNTR }; + +/* + * Note about PFTM_*: real indices into pf_rule.timeout[] come before + * PFTM_MAX, special cases afterwards. See pf_state_expires(). + */ +enum { PFTM_TCP_FIRST_PACKET, PFTM_TCP_OPENING, PFTM_TCP_ESTABLISHED, + PFTM_TCP_CLOSING, PFTM_TCP_FIN_WAIT, PFTM_TCP_CLOSED, + PFTM_UDP_FIRST_PACKET, PFTM_UDP_SINGLE, PFTM_UDP_MULTIPLE, + PFTM_ICMP_FIRST_PACKET, PFTM_ICMP_ERROR_REPLY, +#ifndef NO_APPLE_EXTENSIONS + PFTM_GREv1_FIRST_PACKET, PFTM_GREv1_INITIATING, PFTM_GREv1_ESTABLISHED, + PFTM_ESP_FIRST_PACKET, PFTM_ESP_INITIATING, PFTM_ESP_ESTABLISHED, +#endif + PFTM_OTHER_FIRST_PACKET, PFTM_OTHER_SINGLE, + PFTM_OTHER_MULTIPLE, PFTM_FRAG, PFTM_INTERVAL, + PFTM_ADAPTIVE_START, PFTM_ADAPTIVE_END, PFTM_SRC_NODE, + PFTM_TS_DIFF, PFTM_MAX, PFTM_PURGE, PFTM_UNLINKED, + PFTM_UNTIL_PACKET }; + +/* PFTM default values */ +#define PFTM_TCP_FIRST_PACKET_VAL 120 /* First TCP packet */ +#define PFTM_TCP_OPENING_VAL 30 /* No response yet */ +#define PFTM_TCP_ESTABLISHED_VAL (24 * 60 * 60) /* Established */ +#define PFTM_TCP_CLOSING_VAL (15 * 60) /* Half closed */ +#define PFTM_TCP_FIN_WAIT_VAL 45 /* Got both FINs */ +#define PFTM_TCP_CLOSED_VAL 90 /* Got a RST */ +#define PFTM_UDP_FIRST_PACKET_VAL 60 /* First UDP packet */ +#define PFTM_UDP_SINGLE_VAL 30 /* Unidirectional */ +#define PFTM_UDP_MULTIPLE_VAL 60 /* Bidirectional */ +#define PFTM_ICMP_FIRST_PACKET_VAL 20 /* First ICMP packet */ +#define PFTM_ICMP_ERROR_REPLY_VAL 10 /* Got error response */ +#ifndef NO_APPLE_EXTENSIONS +#define PFTM_GREv1_FIRST_PACKET_VAL 120 +#define PFTM_GREv1_INITIATING_VAL 30 +#define PFTM_GREv1_ESTABLISHED_VAL 1800 +#define PFTM_ESP_FIRST_PACKET_VAL 120 +#define PFTM_ESP_INITIATING_VAL 30 +#define PFTM_ESP_ESTABLISHED_VAL 900 +#endif +#define PFTM_OTHER_FIRST_PACKET_VAL 60 /* First packet */ +#define PFTM_OTHER_SINGLE_VAL 30 /* Unidirectional */ +#define PFTM_OTHER_MULTIPLE_VAL 60 /* Bidirectional */ +#define PFTM_FRAG_VAL 30 /* Fragment expire */ +#define PFTM_INTERVAL_VAL 10 /* Expire interval */ +#define PFTM_SRC_NODE_VAL 0 /* Source tracking */ +#define PFTM_TS_DIFF_VAL 30 /* Allowed TS diff */ + +enum { PF_NOPFROUTE, PF_FASTROUTE, PF_ROUTETO, PF_DUPTO, PF_REPLYTO }; +enum { PF_LIMIT_STATES, +#ifndef NO_APPLE_EXTENSIONS + PF_LIMIT_APP_STATES, +#endif + PF_LIMIT_SRC_NODES, PF_LIMIT_FRAGS, + PF_LIMIT_TABLES, PF_LIMIT_TABLE_ENTRIES, PF_LIMIT_MAX }; +#define PF_POOL_IDMASK 0x0f +enum { PF_POOL_NONE, PF_POOL_BITMASK, PF_POOL_RANDOM, + PF_POOL_SRCHASH, PF_POOL_ROUNDROBIN }; +enum { PF_ADDR_ADDRMASK, PF_ADDR_NOROUTE, PF_ADDR_DYNIFTL, + PF_ADDR_TABLE, PF_ADDR_RTLABEL, PF_ADDR_URPFFAILED, + PF_ADDR_RANGE }; +#define PF_POOL_TYPEMASK 0x0f +#define PF_POOL_STICKYADDR 0x20 +#define PF_WSCALE_FLAG 0x80 +#define PF_WSCALE_MASK 0x0f + +#define PF_LOG 0x01 +#define PF_LOG_ALL 0x02 +#define PF_LOG_SOCKET_LOOKUP 0x04 + +struct pf_addr { + union { + struct in_addr v4; + struct in6_addr v6; + u_int8_t addr8[16]; + u_int16_t addr16[8]; + u_int32_t addr32[4]; + } pfa; /* 128-bit address */ +#define v4 pfa.v4 +#define v6 pfa.v6 +#define addr8 pfa.addr8 +#define addr16 pfa.addr16 +#define addr32 pfa.addr32 +}; + +#define PF_TABLE_NAME_SIZE 32 + +#define PFI_AFLAG_NETWORK 0x01 +#define PFI_AFLAG_BROADCAST 0x02 +#define PFI_AFLAG_PEER 0x04 +#define PFI_AFLAG_MODEMASK 0x07 +#define PFI_AFLAG_NOALIAS 0x08 + +#ifndef RTLABEL_LEN +#define RTLABEL_LEN 32 +#endif + +struct pf_addr_wrap { + union { + struct { + struct pf_addr addr; + struct pf_addr mask; + } a; + char ifname[IFNAMSIZ]; + char tblname[PF_TABLE_NAME_SIZE]; + char rtlabelname[RTLABEL_LEN]; + u_int32_t rtlabel; + } v; + union { + struct pfi_dynaddr *dyn __attribute__((aligned(8))); + struct pfr_ktable *tbl __attribute__((aligned(8))); + int dyncnt __attribute__((aligned(8))); + int tblcnt __attribute__((aligned(8))); + } p __attribute__((aligned(8))); + u_int8_t type; /* PF_ADDR_* */ + u_int8_t iflags; /* PFI_AFLAG_* */ +}; + +#ifndef NO_APPLE_EXTENSIONS +struct pf_port_range { + u_int16_t port[2]; + u_int8_t op; +}; + +union pf_rule_xport { + struct pf_port_range range; + u_int16_t call_id; + u_int32_t spi; +}; +#endif + +#ifdef KERNEL +struct pfi_dynaddr { + TAILQ_ENTRY(pfi_dynaddr) entry; + struct pf_addr pfid_addr4; + struct pf_addr pfid_mask4; + struct pf_addr pfid_addr6; + struct pf_addr pfid_mask6; + struct pfr_ktable *pfid_kt; + struct pfi_kif *pfid_kif; + void *pfid_hook_cookie; + int pfid_net; /* mask or 128 */ + int pfid_acnt4; /* address count IPv4 */ + int pfid_acnt6; /* address count IPv6 */ + sa_family_t pfid_af; /* rule af */ + u_int8_t pfid_iflags; /* PFI_AFLAG_* */ +}; + +/* + * Address manipulation macros + */ + +#if INET +#if !INET6 +#define PF_INET_ONLY +#endif /* ! INET6 */ +#endif /* INET */ + +#if INET6 +#if !INET +#define PF_INET6_ONLY +#endif /* ! INET */ +#endif /* INET6 */ + +#if INET +#if INET6 +#define PF_INET_INET6 +#endif /* INET6 */ +#endif /* INET */ + +#else + +#define PF_INET_INET6 + +#endif /* KERNEL */ + +/* Both IPv4 and IPv6 */ +#ifdef PF_INET_INET6 + +#define PF_AEQ(a, b, c) \ + ((c == AF_INET && (a)->addr32[0] == (b)->addr32[0]) || \ + ((a)->addr32[3] == (b)->addr32[3] && \ + (a)->addr32[2] == (b)->addr32[2] && \ + (a)->addr32[1] == (b)->addr32[1] && \ + (a)->addr32[0] == (b)->addr32[0])) \ + +#define PF_ANEQ(a, b, c) \ + ((c == AF_INET && (a)->addr32[0] != (b)->addr32[0]) || \ + ((a)->addr32[3] != (b)->addr32[3] || \ + (a)->addr32[2] != (b)->addr32[2] || \ + (a)->addr32[1] != (b)->addr32[1] || \ + (a)->addr32[0] != (b)->addr32[0])) \ + +#define PF_AZERO(a, c) \ + ((c == AF_INET && !(a)->addr32[0]) || \ + (!(a)->addr32[0] && !(a)->addr32[1] && \ + !(a)->addr32[2] && !(a)->addr32[3])) \ + +#define PF_MATCHA(n, a, m, b, f) \ + pf_match_addr(n, a, m, b, f) + +#define PF_ACPY(a, b, f) \ + pf_addrcpy(a, b, f) + +#define PF_AINC(a, f) \ + pf_addr_inc(a, f) + +#define PF_POOLMASK(a, b, c, d, f) \ + pf_poolmask(a, b, c, d, f) + +#else + +/* Just IPv6 */ + +#ifdef PF_INET6_ONLY + +#define PF_AEQ(a, b, c) \ + ((a)->addr32[3] == (b)->addr32[3] && \ + (a)->addr32[2] == (b)->addr32[2] && \ + (a)->addr32[1] == (b)->addr32[1] && \ + (a)->addr32[0] == (b)->addr32[0]) \ + +#define PF_ANEQ(a, b, c) \ + ((a)->addr32[3] != (b)->addr32[3] || \ + (a)->addr32[2] != (b)->addr32[2] || \ + (a)->addr32[1] != (b)->addr32[1] || \ + (a)->addr32[0] != (b)->addr32[0]) \ + +#define PF_AZERO(a, c) \ + (!(a)->addr32[0] && \ + !(a)->addr32[1] && \ + !(a)->addr32[2] && \ + !(a)->addr32[3]) \ + +#define PF_MATCHA(n, a, m, b, f) \ + pf_match_addr(n, a, m, b, f) + +#define PF_ACPY(a, b, f) \ + pf_addrcpy(a, b, f) + +#define PF_AINC(a, f) \ + pf_addr_inc(a, f) + +#define PF_POOLMASK(a, b, c, d, f) \ + pf_poolmask(a, b, c, d, f) + +#else + +/* Just IPv4 */ +#ifdef PF_INET_ONLY + +#define PF_AEQ(a, b, c) \ + ((a)->addr32[0] == (b)->addr32[0]) + +#define PF_ANEQ(a, b, c) \ + ((a)->addr32[0] != (b)->addr32[0]) + +#define PF_AZERO(a, c) \ + (!(a)->addr32[0]) + +#define PF_MATCHA(n, a, m, b, f) \ + pf_match_addr(n, a, m, b, f) + +#define PF_ACPY(a, b, f) \ + (a)->v4.s_addr = (b)->v4.s_addr + +#define PF_AINC(a, f) \ + do { \ + (a)->addr32[0] = htonl(ntohl((a)->addr32[0]) + 1); \ + } while (0) + +#define PF_POOLMASK(a, b, c, d, f) \ + do { \ + (a)->addr32[0] = ((b)->addr32[0] & (c)->addr32[0]) | \ + (((c)->addr32[0] ^ 0xffffffff) & (d)->addr32[0]); \ + } while (0) + +#endif /* PF_INET_ONLY */ +#endif /* PF_INET6_ONLY */ +#endif /* PF_INET_INET6 */ + +#define PF_MISMATCHAW(aw, x, af, neg, ifp) \ + ( \ + (((aw)->type == PF_ADDR_NOROUTE && \ + pf_routable((x), (af), NULL)) || \ + (((aw)->type == PF_ADDR_URPFFAILED && (ifp) != NULL && \ + pf_routable((x), (af), (ifp))) || \ + ((aw)->type == PF_ADDR_RTLABEL && \ + !pf_rtlabel_match((x), (af), (aw))) || \ + ((aw)->type == PF_ADDR_TABLE && \ + !pfr_match_addr((aw)->p.tbl, (x), (af))) || \ + ((aw)->type == PF_ADDR_DYNIFTL && \ + !pfi_match_addr((aw)->p.dyn, (x), (af))) || \ + ((aw)->type == PF_ADDR_RANGE && \ + !pf_match_addr_range(&(aw)->v.a.addr, \ + &(aw)->v.a.mask, (x), (af))) || \ + ((aw)->type == PF_ADDR_ADDRMASK && \ + !PF_AZERO(&(aw)->v.a.mask, (af)) && \ + !PF_MATCHA(0, &(aw)->v.a.addr, \ + &(aw)->v.a.mask, (x), (af))))) != \ + (neg) \ + ) + + +struct pf_rule_uid { + uid_t uid[2]; + u_int8_t op; + u_int8_t _pad[3]; +}; + +struct pf_rule_gid { + uid_t gid[2]; + u_int8_t op; + u_int8_t _pad[3]; +}; + +struct pf_rule_addr { + struct pf_addr_wrap addr; +#ifndef NO_APPLE_EXTENSIONS + union pf_rule_xport xport; + u_int8_t neg; +#else + u_int16_t port[2]; + u_int8_t neg; + u_int8_t port_op; +#endif +}; + +struct pf_pooladdr { + struct pf_addr_wrap addr; + TAILQ_ENTRY(pf_pooladdr) entries; +#if !defined(__LP64__) + u_int32_t _pad[2]; +#endif /* !__LP64__ */ + char ifname[IFNAMSIZ]; + struct pfi_kif *kif __attribute__((aligned(8))); +}; + +TAILQ_HEAD(pf_palist, pf_pooladdr); + +struct pf_poolhashkey { + union { + u_int8_t key8[16]; + u_int16_t key16[8]; + u_int32_t key32[4]; + } pfk; /* 128-bit hash key */ +#define key8 pfk.key8 +#define key16 pfk.key16 +#define key32 pfk.key32 +}; + +struct pf_pool { + struct pf_palist list; +#if !defined(__LP64__) + u_int32_t _pad[2]; +#endif /* !__LP64__ */ + struct pf_pooladdr *cur __attribute__((aligned(8))); + struct pf_poolhashkey key __attribute__((aligned(8))); + struct pf_addr counter; + int tblidx; + u_int16_t proxy_port[2]; + u_int8_t port_op; + u_int8_t opts; +}; + + +/* A packed Operating System description for fingerprinting */ +typedef u_int32_t pf_osfp_t; +#define PF_OSFP_ANY ((pf_osfp_t)0) +#define PF_OSFP_UNKNOWN ((pf_osfp_t)-1) +#define PF_OSFP_NOMATCH ((pf_osfp_t)-2) + +struct pf_osfp_entry { + SLIST_ENTRY(pf_osfp_entry) fp_entry; +#if !defined(__LP64__) + u_int32_t _pad; +#endif /* !__LP64__ */ + pf_osfp_t fp_os; + int fp_enflags; +#define PF_OSFP_EXPANDED 0x001 /* expanded entry */ +#define PF_OSFP_GENERIC 0x002 /* generic signature */ +#define PF_OSFP_NODETAIL 0x004 /* no p0f details */ +#define PF_OSFP_LEN 32 + char fp_class_nm[PF_OSFP_LEN]; + char fp_version_nm[PF_OSFP_LEN]; + char fp_subtype_nm[PF_OSFP_LEN]; +}; +#define PF_OSFP_ENTRY_EQ(a, b) \ + ((a)->fp_os == (b)->fp_os && \ + memcmp((a)->fp_class_nm, (b)->fp_class_nm, PF_OSFP_LEN) == 0 && \ + memcmp((a)->fp_version_nm, (b)->fp_version_nm, PF_OSFP_LEN) == 0 && \ + memcmp((a)->fp_subtype_nm, (b)->fp_subtype_nm, PF_OSFP_LEN) == 0) + +/* handle pf_osfp_t packing */ +#define _FP_RESERVED_BIT 1 /* For the special negative #defines */ +#define _FP_UNUSED_BITS 1 +#define _FP_CLASS_BITS 10 /* OS Class (Windows, Linux) */ +#define _FP_VERSION_BITS 10 /* OS version (95, 98, NT, 2.4.54, 3.2) */ +#define _FP_SUBTYPE_BITS 10 /* patch level (NT SP4, SP3, ECN patch) */ +#define PF_OSFP_UNPACK(osfp, class, version, subtype) do { \ + (class) = ((osfp) >> (_FP_VERSION_BITS+_FP_SUBTYPE_BITS)) & \ + ((1 << _FP_CLASS_BITS) - 1); \ + (version) = ((osfp) >> _FP_SUBTYPE_BITS) & \ + ((1 << _FP_VERSION_BITS) - 1);\ + (subtype) = (osfp) & ((1 << _FP_SUBTYPE_BITS) - 1); \ +} while (0) +#define PF_OSFP_PACK(osfp, class, version, subtype) do { \ + (osfp) = ((class) & ((1 << _FP_CLASS_BITS) - 1)) << (_FP_VERSION_BITS \ + + _FP_SUBTYPE_BITS); \ + (osfp) |= ((version) & ((1 << _FP_VERSION_BITS) - 1)) << \ + _FP_SUBTYPE_BITS; \ + (osfp) |= (subtype) & ((1 << _FP_SUBTYPE_BITS) - 1); \ +} while (0) + +/* the fingerprint of an OSes TCP SYN packet */ +typedef u_int64_t pf_tcpopts_t; +struct pf_os_fingerprint { + SLIST_HEAD(pf_osfp_enlist, pf_osfp_entry) fp_oses; /* list of matches */ + pf_tcpopts_t fp_tcpopts; /* packed TCP options */ + u_int16_t fp_wsize; /* TCP window size */ + u_int16_t fp_psize; /* ip->ip_len */ + u_int16_t fp_mss; /* TCP MSS */ + u_int16_t fp_flags; +#define PF_OSFP_WSIZE_MOD 0x0001 /* Window modulus */ +#define PF_OSFP_WSIZE_DC 0x0002 /* Window don't care */ +#define PF_OSFP_WSIZE_MSS 0x0004 /* Window multiple of MSS */ +#define PF_OSFP_WSIZE_MTU 0x0008 /* Window multiple of MTU */ +#define PF_OSFP_PSIZE_MOD 0x0010 /* packet size modulus */ +#define PF_OSFP_PSIZE_DC 0x0020 /* packet size don't care */ +#define PF_OSFP_WSCALE 0x0040 /* TCP window scaling */ +#define PF_OSFP_WSCALE_MOD 0x0080 /* TCP window scale modulus */ +#define PF_OSFP_WSCALE_DC 0x0100 /* TCP window scale dont-care */ +#define PF_OSFP_MSS 0x0200 /* TCP MSS */ +#define PF_OSFP_MSS_MOD 0x0400 /* TCP MSS modulus */ +#define PF_OSFP_MSS_DC 0x0800 /* TCP MSS dont-care */ +#define PF_OSFP_DF 0x1000 /* IPv4 don't fragment bit */ +#define PF_OSFP_TS0 0x2000 /* Zero timestamp */ +#define PF_OSFP_INET6 0x4000 /* IPv6 */ + u_int8_t fp_optcnt; /* TCP option count */ + u_int8_t fp_wscale; /* TCP window scaling */ + u_int8_t fp_ttl; /* IPv4 TTL */ +#define PF_OSFP_MAXTTL_OFFSET 40 +/* TCP options packing */ +#define PF_OSFP_TCPOPT_NOP 0x0 /* TCP NOP option */ +#define PF_OSFP_TCPOPT_WSCALE 0x1 /* TCP window scaling option */ +#define PF_OSFP_TCPOPT_MSS 0x2 /* TCP max segment size opt */ +#define PF_OSFP_TCPOPT_SACK 0x3 /* TCP SACK OK option */ +#define PF_OSFP_TCPOPT_TS 0x4 /* TCP timestamp option */ +#define PF_OSFP_TCPOPT_BITS 3 /* bits used by each option */ +#define PF_OSFP_MAX_OPTS \ + (sizeof(((struct pf_os_fingerprint *)0)->fp_tcpopts) * 8) \ + / PF_OSFP_TCPOPT_BITS + + SLIST_ENTRY(pf_os_fingerprint) fp_next; +}; + +struct pf_osfp_ioctl { + struct pf_osfp_entry fp_os; + pf_tcpopts_t fp_tcpopts; /* packed TCP options */ + u_int16_t fp_wsize; /* TCP window size */ + u_int16_t fp_psize; /* ip->ip_len */ + u_int16_t fp_mss; /* TCP MSS */ + u_int16_t fp_flags; + u_int8_t fp_optcnt; /* TCP option count */ + u_int8_t fp_wscale; /* TCP window scaling */ + u_int8_t fp_ttl; /* IPv4 TTL */ + + int fp_getnum; /* DIOCOSFPGET number */ +}; + + +union pf_rule_ptr { + struct pf_rule *ptr __attribute__((aligned(8))); + u_int32_t nr __attribute__((aligned(8))); +} __attribute__((aligned(8))); + +#define PF_ANCHOR_NAME_SIZE 64 + +struct pf_rule { + struct pf_rule_addr src; + struct pf_rule_addr dst; +#define PF_SKIP_IFP 0 +#define PF_SKIP_DIR 1 +#define PF_SKIP_AF 2 +#define PF_SKIP_PROTO 3 +#define PF_SKIP_SRC_ADDR 4 +#define PF_SKIP_SRC_PORT 5 +#define PF_SKIP_DST_ADDR 6 +#define PF_SKIP_DST_PORT 7 +#define PF_SKIP_COUNT 8 + union pf_rule_ptr skip[PF_SKIP_COUNT]; +#define PF_RULE_LABEL_SIZE 64 + char label[PF_RULE_LABEL_SIZE]; +#define PF_QNAME_SIZE 64 + char ifname[IFNAMSIZ]; + char qname[PF_QNAME_SIZE]; + char pqname[PF_QNAME_SIZE]; +#define PF_TAG_NAME_SIZE 64 + char tagname[PF_TAG_NAME_SIZE]; + char match_tagname[PF_TAG_NAME_SIZE]; + + char overload_tblname[PF_TABLE_NAME_SIZE]; + + TAILQ_ENTRY(pf_rule) entries; +#if !defined(__LP64__) + u_int32_t _pad[2]; +#endif /* !__LP64__ */ + struct pf_pool rpool; + + u_int64_t evaluations; + u_int64_t packets[2]; + u_int64_t bytes[2]; + + struct pfi_kif *kif __attribute__((aligned(8))); + struct pf_anchor *anchor __attribute__((aligned(8))); + struct pfr_ktable *overload_tbl __attribute__((aligned(8))); + + pf_osfp_t os_fingerprint __attribute__((aligned(8))); + + unsigned int rtableid; + u_int32_t timeout[PFTM_MAX]; + u_int32_t states; + u_int32_t max_states; + u_int32_t src_nodes; + u_int32_t max_src_nodes; + u_int32_t max_src_states; + u_int32_t max_src_conn; + struct { + u_int32_t limit; + u_int32_t seconds; + } max_src_conn_rate; + u_int32_t qid; + u_int32_t pqid; + u_int32_t rt_listid; + u_int32_t nr; + u_int32_t prob; + uid_t cuid; + pid_t cpid; + + u_int16_t return_icmp; + u_int16_t return_icmp6; + u_int16_t max_mss; + u_int16_t tag; + u_int16_t match_tag; + + struct pf_rule_uid uid; + struct pf_rule_gid gid; + + u_int32_t rule_flag; + u_int8_t action; + u_int8_t direction; + u_int8_t log; + u_int8_t logif; + u_int8_t quick; + u_int8_t ifnot; + u_int8_t match_tag_not; + u_int8_t natpass; + +#define PF_STATE_NORMAL 0x1 +#define PF_STATE_MODULATE 0x2 +#define PF_STATE_SYNPROXY 0x3 + u_int8_t keep_state; + sa_family_t af; + u_int8_t proto; + u_int8_t type; + u_int8_t code; + u_int8_t flags; + u_int8_t flagset; + u_int8_t min_ttl; + u_int8_t allow_opts; + u_int8_t rt; + u_int8_t return_ttl; + u_int8_t tos; + u_int8_t anchor_relative; + u_int8_t anchor_wildcard; + +#define PF_FLUSH 0x01 +#define PF_FLUSH_GLOBAL 0x02 + u_int8_t flush; + +#ifndef NO_APPLE_EXTENSIONS + u_int8_t proto_variant; + u_int8_t extfilter; /* Filter mode [PF_EXTFILTER_xxx] */ + u_int8_t extmap; /* Mapping mode [PF_EXTMAP_xxx] */ +#endif +}; + +/* rule flags */ +#define PFRULE_DROP 0x0000 +#define PFRULE_RETURNRST 0x0001 +#define PFRULE_FRAGMENT 0x0002 +#define PFRULE_RETURNICMP 0x0004 +#define PFRULE_RETURN 0x0008 +#define PFRULE_NOSYNC 0x0010 +#define PFRULE_SRCTRACK 0x0020 /* track source states */ +#define PFRULE_RULESRCTRACK 0x0040 /* per rule */ + +/* scrub flags */ +#define PFRULE_NODF 0x0100 +#define PFRULE_FRAGCROP 0x0200 /* non-buffering frag cache */ +#define PFRULE_FRAGDROP 0x0400 /* drop funny fragments */ +#define PFRULE_RANDOMID 0x0800 +#define PFRULE_REASSEMBLE_TCP 0x1000 + +/* rule flags again */ +#define PFRULE_IFBOUND 0x00010000 /* if-bound */ + +#define PFSTATE_HIWAT 10000 /* default state table size */ +#define PFSTATE_ADAPT_START 6000 /* default adaptive timeout start */ +#define PFSTATE_ADAPT_END 12000 /* default adaptive timeout end */ + +#ifndef NO_APPLE_EXTENSIONS +#define PFAPPSTATE_HIWAT 10000 /* default same as state table */ + +enum pf_extmap { + PF_EXTMAP_APD = 1, /* Address-port-dependent mapping */ + PF_EXTMAP_AD, /* Address-dependent mapping */ + PF_EXTMAP_EI /* Endpoint-independent mapping */ +}; + +enum pf_extfilter { + PF_EXTFILTER_APD = 1, /* Address-port-dependent filtering */ + PF_EXTFILTER_AD, /* Address-dependent filtering */ + PF_EXTFILTER_EI /* Endpoint-independent filtering */ +}; +#endif + +struct pf_threshold { + u_int32_t limit; +#define PF_THRESHOLD_MULT 1000 +#define PF_THRESHOLD_MAX 0xffffffff / PF_THRESHOLD_MULT + u_int32_t seconds; + u_int32_t count; + u_int32_t last; +}; + +struct pf_src_node { + RB_ENTRY(pf_src_node) entry; + struct pf_addr addr; + struct pf_addr raddr; + union pf_rule_ptr rule; + struct pfi_kif *kif; + u_int64_t bytes[2]; + u_int64_t packets[2]; + u_int32_t states; + u_int32_t conn; + struct pf_threshold conn_rate; + u_int64_t creation; + u_int64_t expire; + sa_family_t af; + u_int8_t ruletype; +}; + +#define PFSNODE_HIWAT 10000 /* default source node table size */ + +struct pf_state_scrub { + struct timeval pfss_last; /* time received last packet */ + u_int32_t pfss_tsecr; /* last echoed timestamp */ + u_int32_t pfss_tsval; /* largest timestamp */ + u_int32_t pfss_tsval0; /* original timestamp */ + u_int16_t pfss_flags; +#define PFSS_TIMESTAMP 0x0001 /* modulate timestamp */ +#define PFSS_PAWS 0x0010 /* stricter PAWS checks */ +#define PFSS_PAWS_IDLED 0x0020 /* was idle too long. no PAWS */ +#define PFSS_DATA_TS 0x0040 /* timestamp on data packets */ +#define PFSS_DATA_NOTS 0x0080 /* no timestamp on data packets */ + u_int8_t pfss_ttl; /* stashed TTL */ + u_int8_t pad; + u_int32_t pfss_ts_mod; /* timestamp modulation */ +}; + +#ifndef NO_APPLE_EXTENSIONS +union pf_state_xport { + u_int16_t port; + u_int16_t call_id; + u_int32_t spi; +}; + +struct pf_state_host { + struct pf_addr addr; + union pf_state_xport xport; +}; +#else +struct pf_state_host { + struct pf_addr addr; + u_int16_t port; + u_int16_t pad; +}; +#endif + +struct pf_state_peer { + u_int32_t seqlo; /* Max sequence number sent */ + u_int32_t seqhi; /* Max the other end ACKd + win */ + u_int32_t seqdiff; /* Sequence number modulator */ + u_int16_t max_win; /* largest window (pre scaling) */ + u_int8_t state; /* active state level */ + u_int8_t wscale; /* window scaling factor */ + u_int16_t mss; /* Maximum segment size option */ + u_int8_t tcp_est; /* Did we reach TCPS_ESTABLISHED */ + struct pf_state_scrub *scrub; /* state is scrubbed */ + u_int8_t pad[3]; +}; + +TAILQ_HEAD(pf_state_queue, pf_state); + +#ifndef NO_APPLE_EXTENSIONS +#ifdef KERNEL +struct pf_state; +struct pf_pdesc; +struct pf_app_state; + +typedef void (*pf_app_handler)(struct pf_state *, int, int, struct pf_pdesc *, + struct pfi_kif *); + +typedef int (*pf_app_compare)(struct pf_app_state *, struct pf_app_state *); + +struct pf_pptp_state { + struct pf_state *grev1_state; +}; + +struct pf_ike_state { + u_int64_t cookie; +}; + +struct pf_app_state { + pf_app_handler handler; + pf_app_compare compare_lan_ext; + pf_app_compare compare_ext_gwy; + union { + struct pf_pptp_state pptp; + struct pf_ike_state ike; + } u; +}; +#endif /* KERNEL */ +#define PF_GRE_PPTP_VARIANT 0x01 +#endif + +/* keep synced with struct pf_state, used in RB_FIND */ +struct pf_state_key_cmp { + struct pf_state_host lan; + struct pf_state_host gwy; + struct pf_state_host ext; + sa_family_t af; + u_int8_t proto; + u_int8_t direction; +#ifndef NO_APPLE_EXTENSIONS + u_int8_t proto_variant; + struct pf_app_state *app_state; +#else + u_int8_t pad; +#endif +}; + +TAILQ_HEAD(pf_statelist, pf_state); + +struct pf_state_key { + struct pf_state_host lan; + struct pf_state_host gwy; + struct pf_state_host ext; + sa_family_t af; + u_int8_t proto; + u_int8_t direction; +#ifndef NO_APPLE_EXTENSIONS + u_int8_t proto_variant; + struct pf_app_state *app_state; +#else + u_int8_t pad; +#endif + + RB_ENTRY(pf_state_key) entry_lan_ext; + RB_ENTRY(pf_state_key) entry_ext_gwy; + struct pf_statelist states; + u_short refcnt; /* same size as if_index */ +}; + + +/* keep synced with struct pf_state, used in RB_FIND */ +struct pf_state_cmp { + u_int64_t id; + u_int32_t creatorid; + u_int32_t pad; +}; + +struct hook_desc; +TAILQ_HEAD(hook_desc_head, hook_desc); + +struct pf_state { + u_int64_t id; + u_int32_t creatorid; + u_int32_t pad; + + TAILQ_ENTRY(pf_state) entry_list; + TAILQ_ENTRY(pf_state) next; + RB_ENTRY(pf_state) entry_id; + struct pf_state_peer src; + struct pf_state_peer dst; + union pf_rule_ptr rule; + union pf_rule_ptr anchor; + union pf_rule_ptr nat_rule; + struct pf_addr rt_addr; +#ifndef NO_APPLE_EXTENSIONS + struct hook_desc_head unlink_hooks; +#endif + struct pf_state_key *state_key; + struct pfi_kif *kif; + struct pfi_kif *rt_kif; + struct pf_src_node *src_node; + struct pf_src_node *nat_src_node; + u_int64_t packets[2]; + u_int64_t bytes[2]; + u_int64_t creation; + u_int64_t expire; + u_int64_t pfsync_time; + u_int16_t tag; + u_int8_t log; + u_int8_t allow_opts; + u_int8_t timeout; + u_int8_t sync_flags; +#define PFSTATE_NOSYNC 0x01 +#define PFSTATE_FROMSYNC 0x02 +#define PFSTATE_STALE 0x04 +}; + +#define __packed __attribute__((__packed__)) + +/* + * Unified state structures for pulling states out of the kernel + * used by pfsync(4) and the pf(4) ioctl. + */ +struct pfsync_state_scrub { + u_int16_t pfss_flags; + u_int8_t pfss_ttl; /* stashed TTL */ +#define PFSYNC_SCRUB_FLAG_VALID 0x01 + u_int8_t scrub_flag; + u_int32_t pfss_ts_mod; /* timestamp modulation */ +} __packed; + +struct pfsync_state_host { + struct pf_addr addr; +#ifndef NO_APPLE_EXTENSIONS + union pf_state_xport xport; + u_int16_t pad[2]; +#else + u_int16_t port; + u_int16_t pad[3]; +#endif +} __packed; + +struct pfsync_state_peer { + struct pfsync_state_scrub scrub; /* state is scrubbed */ + u_int32_t seqlo; /* Max sequence number sent */ + u_int32_t seqhi; /* Max the other end ACKd + win */ + u_int32_t seqdiff; /* Sequence number modulator */ + u_int16_t max_win; /* largest window (pre scaling) */ + u_int16_t mss; /* Maximum segment size option */ + u_int8_t state; /* active state level */ + u_int8_t wscale; /* window scaling factor */ + u_int8_t pad[6]; +} __packed; + +struct pfsync_state { + u_int32_t id[2]; + char ifname[IFNAMSIZ]; + struct pfsync_state_host lan; + struct pfsync_state_host gwy; + struct pfsync_state_host ext; + struct pfsync_state_peer src; + struct pfsync_state_peer dst; + struct pf_addr rt_addr; +#ifndef NO_APPLE_EXTENSIONS + struct hook_desc_head unlink_hooks; +#if !defined(__LP64__) + u_int32_t _pad[2]; +#endif /* !__LP64__ */ +#endif + u_int32_t rule; + u_int32_t anchor; + u_int32_t nat_rule; + u_int64_t creation; + u_int64_t expire; + u_int32_t packets[2][2]; + u_int32_t bytes[2][2]; + u_int32_t creatorid; +#ifndef NO_APPLE_EXTENSIONS + u_int16_t tag; +#endif + sa_family_t af; + u_int8_t proto; + u_int8_t direction; + u_int8_t log; + u_int8_t allow_opts; + u_int8_t timeout; + u_int8_t sync_flags; + u_int8_t updates; +#ifndef NO_APPLE_EXTENSIONS + u_int8_t proto_variant; +#endif +} __packed; + +#define PFSYNC_FLAG_COMPRESS 0x01 +#define PFSYNC_FLAG_STALE 0x02 +#define PFSYNC_FLAG_SRCNODE 0x04 +#define PFSYNC_FLAG_NATSRCNODE 0x08 + +/* for copies to/from userland via pf_ioctl() */ +#define pf_state_peer_to_pfsync(s, d) do { \ + (d)->seqlo = (s)->seqlo; \ + (d)->seqhi = (s)->seqhi; \ + (d)->seqdiff = (s)->seqdiff; \ + (d)->max_win = (s)->max_win; \ + (d)->mss = (s)->mss; \ + (d)->state = (s)->state; \ + (d)->wscale = (s)->wscale; \ + if ((s)->scrub) { \ + (d)->scrub.pfss_flags = \ + (s)->scrub->pfss_flags & PFSS_TIMESTAMP; \ + (d)->scrub.pfss_ttl = (s)->scrub->pfss_ttl; \ + (d)->scrub.pfss_ts_mod = (s)->scrub->pfss_ts_mod; \ + (d)->scrub.scrub_flag = PFSYNC_SCRUB_FLAG_VALID; \ + } \ +} while (0) + +#define pf_state_peer_from_pfsync(s, d) do { \ + (d)->seqlo = (s)->seqlo; \ + (d)->seqhi = (s)->seqhi; \ + (d)->seqdiff = (s)->seqdiff; \ + (d)->max_win = (s)->max_win; \ + (d)->mss = ntohs((s)->mss); \ + (d)->state = (s)->state; \ + (d)->wscale = (s)->wscale; \ + if ((s)->scrub.scrub_flag == PFSYNC_SCRUB_FLAG_VALID && \ + (d)->scrub != NULL) { \ + (d)->scrub->pfss_flags = \ + ntohs((s)->scrub.pfss_flags) & PFSS_TIMESTAMP; \ + (d)->scrub->pfss_ttl = (s)->scrub.pfss_ttl; \ + (d)->scrub->pfss_ts_mod = (s)->scrub.pfss_ts_mod; \ + } \ +} while (0) + +#define pf_state_counter_to_pfsync(s, d) do { \ + d[0] = (s>>32)&0xffffffff; \ + d[1] = s&0xffffffff; \ +} while (0) + +#define pf_state_counter_from_pfsync(s) \ + (((u_int64_t)(s[0])<<32) | (u_int64_t)(s[1])) + + + +TAILQ_HEAD(pf_rulequeue, pf_rule); + +struct pf_anchor; + +struct pf_ruleset { + struct { + struct pf_rulequeue queues[2]; + struct { + struct pf_rulequeue *ptr; + struct pf_rule **ptr_array; + u_int32_t rcount; + u_int32_t ticket; + int open; + } active, inactive; + } rules[PF_RULESET_MAX]; + struct pf_anchor *anchor; + u_int32_t tticket; + int tables; + int topen; +}; + +RB_HEAD(pf_anchor_global, pf_anchor); +RB_HEAD(pf_anchor_node, pf_anchor); +struct pf_anchor { + RB_ENTRY(pf_anchor) entry_global; + RB_ENTRY(pf_anchor) entry_node; + struct pf_anchor *parent; + struct pf_anchor_node children; + char name[PF_ANCHOR_NAME_SIZE]; + char path[MAXPATHLEN]; + struct pf_ruleset ruleset; + int refcnt; /* anchor rules */ + int match; +}; +#ifdef KERNEL +RB_PROTOTYPE_SC(__private_extern__, pf_anchor_global, pf_anchor, entry_global, + pf_anchor_compare); +RB_PROTOTYPE_SC(__private_extern__, pf_anchor_node, pf_anchor, entry_node, + pf_anchor_compare); +#else /* !KERNEL */ +RB_PROTOTYPE(pf_anchor_global, pf_anchor, entry_global, pf_anchor_compare); +RB_PROTOTYPE(pf_anchor_node, pf_anchor, entry_node, pf_anchor_compare); +#endif /* !KERNEL */ + +#define PF_RESERVED_ANCHOR "_pf" + +#define PFR_TFLAG_PERSIST 0x00000001 +#define PFR_TFLAG_CONST 0x00000002 +#define PFR_TFLAG_ACTIVE 0x00000004 +#define PFR_TFLAG_INACTIVE 0x00000008 +#define PFR_TFLAG_REFERENCED 0x00000010 +#define PFR_TFLAG_REFDANCHOR 0x00000020 +#define PFR_TFLAG_USRMASK 0x00000003 +#define PFR_TFLAG_SETMASK 0x0000003C +#define PFR_TFLAG_ALLMASK 0x0000003F + +struct pfr_table { + char pfrt_anchor[MAXPATHLEN]; + char pfrt_name[PF_TABLE_NAME_SIZE]; + u_int32_t pfrt_flags; + u_int8_t pfrt_fback; +}; + +enum { PFR_FB_NONE, PFR_FB_MATCH, PFR_FB_ADDED, PFR_FB_DELETED, + PFR_FB_CHANGED, PFR_FB_CLEARED, PFR_FB_DUPLICATE, + PFR_FB_NOTMATCH, PFR_FB_CONFLICT, PFR_FB_MAX }; + +struct pfr_addr { + union { + struct in_addr _pfra_ip4addr; + struct in6_addr _pfra_ip6addr; + } pfra_u; + u_int8_t pfra_af; + u_int8_t pfra_net; + u_int8_t pfra_not; + u_int8_t pfra_fback; +}; +#define pfra_ip4addr pfra_u._pfra_ip4addr +#define pfra_ip6addr pfra_u._pfra_ip6addr + +enum { PFR_DIR_IN, PFR_DIR_OUT, PFR_DIR_MAX }; +enum { PFR_OP_BLOCK, PFR_OP_PASS, PFR_OP_ADDR_MAX, PFR_OP_TABLE_MAX }; +#define PFR_OP_XPASS PFR_OP_ADDR_MAX + +struct pfr_astats { + struct pfr_addr pfras_a; + u_int64_t pfras_packets[PFR_DIR_MAX][PFR_OP_ADDR_MAX]; + u_int64_t pfras_bytes[PFR_DIR_MAX][PFR_OP_ADDR_MAX]; + u_int64_t pfras_tzero; +}; + +enum { PFR_REFCNT_RULE, PFR_REFCNT_ANCHOR, PFR_REFCNT_MAX }; + +struct pfr_tstats { + struct pfr_table pfrts_t; + u_int64_t pfrts_packets[PFR_DIR_MAX][PFR_OP_TABLE_MAX]; + u_int64_t pfrts_bytes[PFR_DIR_MAX][PFR_OP_TABLE_MAX]; + u_int64_t pfrts_match; + u_int64_t pfrts_nomatch; + u_int64_t pfrts_tzero; + int pfrts_cnt; + int pfrts_refcnt[PFR_REFCNT_MAX]; +}; +#define pfrts_name pfrts_t.pfrt_name +#define pfrts_flags pfrts_t.pfrt_flags + +SLIST_HEAD(pfr_kentryworkq, pfr_kentry); +struct pfr_kentry { + struct radix_node pfrke_node[2]; + union sockaddr_union pfrke_sa; + u_int64_t pfrke_packets[PFR_DIR_MAX][PFR_OP_ADDR_MAX]; + u_int64_t pfrke_bytes[PFR_DIR_MAX][PFR_OP_ADDR_MAX]; + SLIST_ENTRY(pfr_kentry) pfrke_workq; + u_int64_t pfrke_tzero; + u_int8_t pfrke_af; + u_int8_t pfrke_net; + u_int8_t pfrke_not; + u_int8_t pfrke_mark; + u_int8_t pfrke_intrpool; +}; + +SLIST_HEAD(pfr_ktableworkq, pfr_ktable); +RB_HEAD(pfr_ktablehead, pfr_ktable); +struct pfr_ktable { + struct pfr_tstats pfrkt_ts; + RB_ENTRY(pfr_ktable) pfrkt_tree; + SLIST_ENTRY(pfr_ktable) pfrkt_workq; + struct radix_node_head *pfrkt_ip4; + struct radix_node_head *pfrkt_ip6; + struct pfr_ktable *pfrkt_shadow; + struct pfr_ktable *pfrkt_root; + struct pf_ruleset *pfrkt_rs; + u_int64_t pfrkt_larg; + u_int32_t pfrkt_nflags; +}; +#define pfrkt_t pfrkt_ts.pfrts_t +#define pfrkt_name pfrkt_t.pfrt_name +#define pfrkt_anchor pfrkt_t.pfrt_anchor +#define pfrkt_ruleset pfrkt_t.pfrt_ruleset +#define pfrkt_flags pfrkt_t.pfrt_flags +#define pfrkt_cnt pfrkt_ts.pfrts_cnt +#define pfrkt_refcnt pfrkt_ts.pfrts_refcnt +#define pfrkt_packets pfrkt_ts.pfrts_packets +#define pfrkt_bytes pfrkt_ts.pfrts_bytes +#define pfrkt_match pfrkt_ts.pfrts_match +#define pfrkt_nomatch pfrkt_ts.pfrts_nomatch +#define pfrkt_tzero pfrkt_ts.pfrts_tzero + +RB_HEAD(pf_state_tree_lan_ext, pf_state_key); +#ifdef KERNEL +RB_PROTOTYPE_SC(__private_extern__, pf_state_tree_lan_ext, pf_state_key, + entry_lan_ext, pf_state_compare_lan_ext); +#else /* !KERNEL */ +RB_PROTOTYPE(pf_state_tree_lan_ext, pf_state_key, entry_lan_ext, + pf_state_compare_lan_ext); +#endif /* !KERNEL */ + +RB_HEAD(pf_state_tree_ext_gwy, pf_state_key); +#ifdef KERNEL +RB_PROTOTYPE_SC(__private_extern__, pf_state_tree_ext_gwy, pf_state_key, + entry_ext_gwy, pf_state_compare_ext_gwy); +#else /* !KERNEL */ +RB_PROTOTYPE(pf_state_tree_ext_gwy, pf_state_key, entry_ext_gwy, + pf_state_compare_ext_gwy); +#endif /* !KERNEL */ + +RB_HEAD(pfi_ifhead, pfi_kif); + +/* state tables */ +#ifdef KERNEL +__private_extern__ struct pf_state_tree_lan_ext pf_statetbl_lan_ext; +__private_extern__ struct pf_state_tree_ext_gwy pf_statetbl_ext_gwy; +#else /* !KERNEL */ +extern struct pf_state_tree_lan_ext pf_statetbl_lan_ext; +extern struct pf_state_tree_ext_gwy pf_statetbl_ext_gwy; +#endif /* !KERNEL */ + +/* keep synced with pfi_kif, used in RB_FIND */ +struct pfi_kif_cmp { + char pfik_name[IFNAMSIZ]; +}; + +struct pfi_kif { + char pfik_name[IFNAMSIZ]; + RB_ENTRY(pfi_kif) pfik_tree; + u_int64_t pfik_packets[2][2][2]; + u_int64_t pfik_bytes[2][2][2]; + u_int64_t pfik_tzero; + int pfik_flags; + void *pfik_ah_cookie; + struct ifnet *pfik_ifp; + int pfik_states; + int pfik_rules; + TAILQ_HEAD(, pfi_dynaddr) pfik_dynaddrs; +}; + +enum pfi_kif_refs { + PFI_KIF_REF_NONE, + PFI_KIF_REF_STATE, + PFI_KIF_REF_RULE +}; + +#define PFI_IFLAG_SKIP 0x0100 /* skip filtering on interface */ + +#ifdef KERNEL +struct pf_pdesc { + struct { + int done; + uid_t uid; + gid_t gid; + pid_t pid; + } lookup; + u_int64_t tot_len; /* Make Mickey money */ + union { + struct tcphdr *tcp; + struct udphdr *udp; + struct icmp *icmp; +#if INET6 + struct icmp6_hdr *icmp6; +#endif /* INET6 */ +#ifndef NO_APPLE_EXTENSIONS + struct pf_grev1_hdr *grev1; + struct pf_esp_hdr *esp; +#endif + void *any; + } hdr; + struct pf_addr baddr; /* address before translation */ + struct pf_addr naddr; /* address after translation */ + struct pf_rule *nat_rule; /* nat/rdr rule applied to packet */ + struct pf_addr *src; + struct pf_addr *dst; + struct ether_header + *eh; +#ifndef NO_APPLE_EXTENSIONS + struct mbuf *mp; + int lmw; /* lazy writable offset */ +#endif + struct pf_mtag *pf_mtag; + u_int16_t *ip_sum; + u_int32_t p_len; /* total length of payload */ + u_int16_t flags; /* Let SCRUB trigger behavior in */ + /* state code. Easier than tags */ +#define PFDESC_TCP_NORM 0x0001 /* TCP shall be statefully scrubbed */ +#define PFDESC_IP_REAS 0x0002 /* IP frags would've been reassembled */ + sa_family_t af; + u_int8_t proto; + u_int8_t tos; +#ifndef NO_APPLE_EXTENSIONS + u_int8_t proto_variant; +#endif +}; +#endif /* KERNEL */ + +/* flags for RDR options */ +#define PF_DPORT_RANGE 0x01 /* Dest port uses range */ +#define PF_RPORT_RANGE 0x02 /* RDR'ed port uses range */ + +/* Reasons code for passing/dropping a packet */ +#define PFRES_MATCH 0 /* Explicit match of a rule */ +#define PFRES_BADOFF 1 /* Bad offset for pull_hdr */ +#define PFRES_FRAG 2 /* Dropping following fragment */ +#define PFRES_SHORT 3 /* Dropping short packet */ +#define PFRES_NORM 4 /* Dropping by normalizer */ +#define PFRES_MEMORY 5 /* Dropped due to lacking mem */ +#define PFRES_TS 6 /* Bad TCP Timestamp (RFC1323) */ +#define PFRES_CONGEST 7 /* Congestion (of ipintrq) */ +#define PFRES_IPOPTIONS 8 /* IP option */ +#define PFRES_PROTCKSUM 9 /* Protocol checksum invalid */ +#define PFRES_BADSTATE 10 /* State mismatch */ +#define PFRES_STATEINS 11 /* State insertion failure */ +#define PFRES_MAXSTATES 12 /* State limit */ +#define PFRES_SRCLIMIT 13 /* Source node/conn limit */ +#define PFRES_SYNPROXY 14 /* SYN proxy */ +#define PFRES_MAX 15 /* total+1 */ + +#define PFRES_NAMES { \ + "match", \ + "bad-offset", \ + "fragment", \ + "short", \ + "normalize", \ + "memory", \ + "bad-timestamp", \ + "congestion", \ + "ip-option", \ + "proto-cksum", \ + "state-mismatch", \ + "state-insert", \ + "state-limit", \ + "src-limit", \ + "synproxy", \ + NULL \ +} + +/* Counters for other things we want to keep track of */ +#define LCNT_STATES 0 /* states */ +#define LCNT_SRCSTATES 1 /* max-src-states */ +#define LCNT_SRCNODES 2 /* max-src-nodes */ +#define LCNT_SRCCONN 3 /* max-src-conn */ +#define LCNT_SRCCONNRATE 4 /* max-src-conn-rate */ +#define LCNT_OVERLOAD_TABLE 5 /* entry added to overload table */ +#define LCNT_OVERLOAD_FLUSH 6 /* state entries flushed */ +#define LCNT_MAX 7 /* total+1 */ + +#define LCNT_NAMES { \ + "max states per rule", \ + "max-src-states", \ + "max-src-nodes", \ + "max-src-conn", \ + "max-src-conn-rate", \ + "overload table insertion", \ + "overload flush states", \ + NULL \ +} + +/* UDP state enumeration */ +#define PFUDPS_NO_TRAFFIC 0 +#define PFUDPS_SINGLE 1 +#define PFUDPS_MULTIPLE 2 + +#define PFUDPS_NSTATES 3 /* number of state levels */ + +#define PFUDPS_NAMES { \ + "NO_TRAFFIC", \ + "SINGLE", \ + "MULTIPLE", \ + NULL \ +} + +#ifndef NO_APPLE_EXTENSIONS +/* GREv1 protocol state enumeration */ +#define PFGRE1S_NO_TRAFFIC 0 +#define PFGRE1S_INITIATING 1 +#define PFGRE1S_ESTABLISHED 2 + +#define PFGRE1S_NSTATES 3 /* number of state levels */ + +#define PFGRE1S_NAMES { \ + "NO_TRAFFIC", \ + "INITIATING", \ + "ESTABLISHED", \ + NULL \ +} + +#define PFESPS_NO_TRAFFIC 0 +#define PFESPS_INITIATING 1 +#define PFESPS_ESTABLISHED 2 + +#define PFESPS_NSTATES 3 /* number of state levels */ + +#define PFESPS_NAMES { "NO_TRAFFIC", "INITIATING", "ESTABLISHED", NULL } +#endif + +/* Other protocol state enumeration */ +#define PFOTHERS_NO_TRAFFIC 0 +#define PFOTHERS_SINGLE 1 +#define PFOTHERS_MULTIPLE 2 + +#define PFOTHERS_NSTATES 3 /* number of state levels */ + +#define PFOTHERS_NAMES { \ + "NO_TRAFFIC", \ + "SINGLE", \ + "MULTIPLE", \ + NULL \ +} + +#define FCNT_STATE_SEARCH 0 +#define FCNT_STATE_INSERT 1 +#define FCNT_STATE_REMOVALS 2 +#define FCNT_MAX 3 + +#define SCNT_SRC_NODE_SEARCH 0 +#define SCNT_SRC_NODE_INSERT 1 +#define SCNT_SRC_NODE_REMOVALS 2 +#define SCNT_MAX 3 + +#define ACTION_SET(a, x) \ + do { \ + if ((a) != NULL) \ + *(a) = (x); \ + } while (0) + +#define REASON_SET(a, x) \ + do { \ + if ((a) != NULL) \ + *(a) = (x); \ + if (x < PFRES_MAX) \ + pf_status.counters[x]++; \ + } while (0) + +struct pf_status { + u_int64_t counters[PFRES_MAX]; + u_int64_t lcounters[LCNT_MAX]; /* limit counters */ + u_int64_t fcounters[FCNT_MAX]; + u_int64_t scounters[SCNT_MAX]; + u_int64_t pcounters[2][2][3]; + u_int64_t bcounters[2][2]; + u_int64_t stateid; + u_int32_t running; + u_int32_t states; + u_int32_t src_nodes; + u_int64_t since __attribute__((aligned(8))); + u_int32_t debug; + u_int32_t hostid; + char ifname[IFNAMSIZ]; + u_int8_t pf_chksum[PF_MD5_DIGEST_LENGTH]; +}; + +struct cbq_opts { + u_int minburst; + u_int maxburst; + u_int pktsize; + u_int maxpktsize; + u_int ns_per_byte; + u_int maxidle; + int minidle; + u_int offtime; + int flags; +}; + +struct priq_opts { + int flags; +}; + +struct hfsc_opts { + /* real-time service curve */ + u_int rtsc_m1; /* slope of the 1st segment in bps */ + u_int rtsc_d; /* the x-projection of m1 in msec */ + u_int rtsc_m2; /* slope of the 2nd segment in bps */ + /* link-sharing service curve */ + u_int lssc_m1; + u_int lssc_d; + u_int lssc_m2; + /* upper-limit service curve */ + u_int ulsc_m1; + u_int ulsc_d; + u_int ulsc_m2; + int flags; +}; + +struct pf_altq { + char ifname[IFNAMSIZ]; + + void *altq_disc; /* discipline-specific state */ +#if !defined(__LP64__) + u_int32_t _pad; +#endif /* !__LP64__ */ + TAILQ_ENTRY(pf_altq) entries; +#if !defined(__LP64__) + u_int32_t __pad[2]; +#endif /* !__LP64__ */ + + /* scheduler spec */ + u_int8_t scheduler; /* scheduler type */ + u_int16_t tbrsize; /* tokenbucket regulator size */ + u_int32_t ifbandwidth; /* interface bandwidth */ + + /* queue spec */ + char qname[PF_QNAME_SIZE]; /* queue name */ + char parent[PF_QNAME_SIZE]; /* parent name */ + u_int32_t parent_qid; /* parent queue id */ + u_int32_t bandwidth; /* queue bandwidth */ + u_int8_t priority; /* priority */ + u_int16_t qlimit; /* queue size limit */ + u_int16_t flags; /* misc flags */ + union { + struct cbq_opts cbq_opts; + struct priq_opts priq_opts; + struct hfsc_opts hfsc_opts; + } pq_u; + + u_int32_t qid; /* return value */ +}; + +struct pf_tagname { + TAILQ_ENTRY(pf_tagname) entries; + char name[PF_TAG_NAME_SIZE]; + u_int16_t tag; + int ref; +}; + +#define PFFRAG_FRENT_HIWAT 5000 /* Number of fragment entries */ +#define PFFRAG_FRAG_HIWAT 1000 /* Number of fragmented packets */ +#define PFFRAG_FRCENT_HIWAT 50000 /* Number of fragment cache entries */ +#define PFFRAG_FRCACHE_HIWAT 10000 /* Number of fragment descriptors */ + +#define PFR_KTABLE_HIWAT 1000 /* Number of tables */ +#define PFR_KENTRY_HIWAT 200000 /* Number of table entries */ +#define PFR_KENTRY_HIWAT_SMALL 100000 /* Number of table entries (tiny hosts) */ + +/* + * ioctl parameter structures + */ + +struct pfioc_pooladdr { + u_int32_t action; + u_int32_t ticket; + u_int32_t nr; + u_int32_t r_num; + u_int8_t r_action; + u_int8_t r_last; + u_int8_t af; + char anchor[MAXPATHLEN]; + struct pf_pooladdr addr; +}; + +struct pfioc_rule { + u_int32_t action; + u_int32_t ticket; + u_int32_t pool_ticket; + u_int32_t nr; + char anchor[MAXPATHLEN]; + char anchor_call[MAXPATHLEN]; + struct pf_rule rule; +}; + +struct pfioc_natlook { + struct pf_addr saddr; + struct pf_addr daddr; + struct pf_addr rsaddr; + struct pf_addr rdaddr; +#ifndef NO_APPLE_EXTENSIONS + union pf_state_xport sxport; + union pf_state_xport dxport; + union pf_state_xport rsxport; + union pf_state_xport rdxport; + sa_family_t af; + u_int8_t proto; + u_int8_t proto_variant; + u_int8_t direction; +#else + u_int16_t sport; + u_int16_t dport; + u_int16_t rsport; + u_int16_t rdport; + sa_family_t af; + u_int8_t proto; + u_int8_t direction; +#endif +}; + +struct pfioc_state { + struct pfsync_state state; +}; + +struct pfioc_src_node_kill { + /* XXX returns the number of src nodes killed in psnk_af */ + sa_family_t psnk_af; + struct pf_rule_addr psnk_src; + struct pf_rule_addr psnk_dst; +}; + +#ifndef NO_APPLE_EXTENSIONS +struct pfioc_state_addr_kill { + struct pf_addr_wrap addr; + u_int8_t reserved_[3]; + u_int8_t neg; + union pf_rule_xport xport; +}; +#endif + +struct pfioc_state_kill { + /* XXX returns the number of states killed in psk_af */ + sa_family_t psk_af; +#ifndef NO_APPLE_EXTENSIONS + u_int8_t psk_proto; + u_int8_t psk_proto_variant; + u_int8_t _pad; + struct pfioc_state_addr_kill psk_src; + struct pfioc_state_addr_kill psk_dst; +#else + int psk_proto; + struct pf_rule_addr psk_src; + struct pf_rule_addr psk_dst; +#endif + char psk_ifname[IFNAMSIZ]; +}; + +struct pfioc_states { + int ps_len; + union { + caddr_t psu_buf; + struct pfsync_state *psu_states; + } ps_u __attribute__((aligned(8))); +#define ps_buf ps_u.psu_buf +#define ps_states ps_u.psu_states +}; + +struct pfioc_src_nodes { + int psn_len; + union { + caddr_t psu_buf; + struct pf_src_node *psu_src_nodes; + } psn_u __attribute__((aligned(8))); +#define psn_buf psn_u.psu_buf +#define psn_src_nodes psn_u.psu_src_nodes +}; + +struct pfioc_if { + char ifname[IFNAMSIZ]; +}; + +struct pfioc_tm { + int timeout; + int seconds; +}; + +struct pfioc_limit { + int index; + unsigned limit; +}; + +struct pfioc_altq { + u_int32_t action; + u_int32_t ticket; + u_int32_t nr; + struct pf_altq altq __attribute__((aligned(8))); +}; + +struct pfioc_qstats { + u_int32_t ticket; + u_int32_t nr; + void *buf __attribute__((aligned(8))); + int nbytes __attribute__((aligned(8))); + u_int8_t scheduler; +}; + +struct pfioc_ruleset { + u_int32_t nr; + char path[MAXPATHLEN]; + char name[PF_ANCHOR_NAME_SIZE]; +}; + +#define PF_RULESET_ALTQ (PF_RULESET_MAX) +#define PF_RULESET_TABLE (PF_RULESET_MAX+1) +struct pfioc_trans { + int size; /* number of elements */ + int esize; /* size of each element in bytes */ + struct pfioc_trans_e { + int rs_num; + char anchor[MAXPATHLEN]; + u_int32_t ticket; + } *array __attribute__((aligned(8))); +}; + +#define PFR_FLAG_ATOMIC 0x00000001 +#define PFR_FLAG_DUMMY 0x00000002 +#define PFR_FLAG_FEEDBACK 0x00000004 +#define PFR_FLAG_CLSTATS 0x00000008 +#define PFR_FLAG_ADDRSTOO 0x00000010 +#define PFR_FLAG_REPLACE 0x00000020 +#define PFR_FLAG_ALLRSETS 0x00000040 +#define PFR_FLAG_ALLMASK 0x0000007F +#ifdef KERNEL +#define PFR_FLAG_USERIOCTL 0x10000000 +#endif + +struct pfioc_table { + struct pfr_table pfrio_table; + void *pfrio_buffer __attribute__((aligned(8))); + int pfrio_esize __attribute__((aligned(8))); + int pfrio_size; + int pfrio_size2; + int pfrio_nadd; + int pfrio_ndel; + int pfrio_nchange; + int pfrio_flags; + u_int32_t pfrio_ticket; +}; +#define pfrio_exists pfrio_nadd +#define pfrio_nzero pfrio_nadd +#define pfrio_nmatch pfrio_nadd +#define pfrio_naddr pfrio_size2 +#define pfrio_setflag pfrio_size2 +#define pfrio_clrflag pfrio_nadd + +struct pfioc_iface { + char pfiio_name[IFNAMSIZ]; + void *pfiio_buffer __attribute__((aligned(8))); + int pfiio_esize __attribute__((aligned(8))); + int pfiio_size; + int pfiio_nzero; + int pfiio_flags; +}; + + +/* + * ioctl operations + */ + +#define DIOCSTART _IO ('D', 1) +#define DIOCSTOP _IO ('D', 2) +#define DIOCADDRULE _IOWR('D', 4, struct pfioc_rule) +#define DIOCGETRULES _IOWR('D', 6, struct pfioc_rule) +#define DIOCGETRULE _IOWR('D', 7, struct pfioc_rule) +/* XXX cut 8 - 17 */ +#define DIOCCLRSTATES _IOWR('D', 18, struct pfioc_state_kill) +#define DIOCGETSTATE _IOWR('D', 19, struct pfioc_state) +#define DIOCSETSTATUSIF _IOWR('D', 20, struct pfioc_if) +#define DIOCGETSTATUS _IOWR('D', 21, struct pf_status) +#define DIOCCLRSTATUS _IO ('D', 22) +#define DIOCNATLOOK _IOWR('D', 23, struct pfioc_natlook) +#define DIOCSETDEBUG _IOWR('D', 24, u_int32_t) +#define DIOCGETSTATES _IOWR('D', 25, struct pfioc_states) +#define DIOCCHANGERULE _IOWR('D', 26, struct pfioc_rule) +/* XXX cut 26 - 28 */ +#define DIOCSETTIMEOUT _IOWR('D', 29, struct pfioc_tm) +#define DIOCGETTIMEOUT _IOWR('D', 30, struct pfioc_tm) +#define DIOCADDSTATE _IOWR('D', 37, struct pfioc_state) +#define DIOCCLRRULECTRS _IO ('D', 38) +#define DIOCGETLIMIT _IOWR('D', 39, struct pfioc_limit) +#define DIOCSETLIMIT _IOWR('D', 40, struct pfioc_limit) +#define DIOCKILLSTATES _IOWR('D', 41, struct pfioc_state_kill) +#define DIOCSTARTALTQ _IO ('D', 42) +#define DIOCSTOPALTQ _IO ('D', 43) +#define DIOCADDALTQ _IOWR('D', 45, struct pfioc_altq) +#define DIOCGETALTQS _IOWR('D', 47, struct pfioc_altq) +#define DIOCGETALTQ _IOWR('D', 48, struct pfioc_altq) +#define DIOCCHANGEALTQ _IOWR('D', 49, struct pfioc_altq) +#define DIOCGETQSTATS _IOWR('D', 50, struct pfioc_qstats) +#define DIOCBEGINADDRS _IOWR('D', 51, struct pfioc_pooladdr) +#define DIOCADDADDR _IOWR('D', 52, struct pfioc_pooladdr) +#define DIOCGETADDRS _IOWR('D', 53, struct pfioc_pooladdr) +#define DIOCGETADDR _IOWR('D', 54, struct pfioc_pooladdr) +#define DIOCCHANGEADDR _IOWR('D', 55, struct pfioc_pooladdr) +/* XXX cut 55 - 57 */ +#define DIOCGETRULESETS _IOWR('D', 58, struct pfioc_ruleset) +#define DIOCGETRULESET _IOWR('D', 59, struct pfioc_ruleset) +#define DIOCRCLRTABLES _IOWR('D', 60, struct pfioc_table) +#define DIOCRADDTABLES _IOWR('D', 61, struct pfioc_table) +#define DIOCRDELTABLES _IOWR('D', 62, struct pfioc_table) +#define DIOCRGETTABLES _IOWR('D', 63, struct pfioc_table) +#define DIOCRGETTSTATS _IOWR('D', 64, struct pfioc_table) +#define DIOCRCLRTSTATS _IOWR('D', 65, struct pfioc_table) +#define DIOCRCLRADDRS _IOWR('D', 66, struct pfioc_table) +#define DIOCRADDADDRS _IOWR('D', 67, struct pfioc_table) +#define DIOCRDELADDRS _IOWR('D', 68, struct pfioc_table) +#define DIOCRSETADDRS _IOWR('D', 69, struct pfioc_table) +#define DIOCRGETADDRS _IOWR('D', 70, struct pfioc_table) +#define DIOCRGETASTATS _IOWR('D', 71, struct pfioc_table) +#define DIOCRCLRASTATS _IOWR('D', 72, struct pfioc_table) +#define DIOCRTSTADDRS _IOWR('D', 73, struct pfioc_table) +#define DIOCRSETTFLAGS _IOWR('D', 74, struct pfioc_table) +#define DIOCRINADEFINE _IOWR('D', 77, struct pfioc_table) +#define DIOCOSFPFLUSH _IO('D', 78) +#define DIOCOSFPADD _IOWR('D', 79, struct pf_osfp_ioctl) +#define DIOCOSFPGET _IOWR('D', 80, struct pf_osfp_ioctl) +#define DIOCXBEGIN _IOWR('D', 81, struct pfioc_trans) +#define DIOCXCOMMIT _IOWR('D', 82, struct pfioc_trans) +#define DIOCXROLLBACK _IOWR('D', 83, struct pfioc_trans) +#define DIOCGETSRCNODES _IOWR('D', 84, struct pfioc_src_nodes) +#define DIOCCLRSRCNODES _IO('D', 85) +#define DIOCSETHOSTID _IOWR('D', 86, u_int32_t) +#define DIOCIGETIFACES _IOWR('D', 87, struct pfioc_iface) +#define DIOCSETIFFLAG _IOWR('D', 89, struct pfioc_iface) +#define DIOCCLRIFFLAG _IOWR('D', 90, struct pfioc_iface) +#define DIOCKILLSRCNODES _IOWR('D', 91, struct pfioc_src_node_kill) + +#ifdef KERNEL +RB_HEAD(pf_src_tree, pf_src_node); +RB_PROTOTYPE_SC(__private_extern__, pf_src_tree, pf_src_node, entry, + pf_src_compare); +__private_extern__ struct pf_src_tree tree_src_tracking; + +RB_HEAD(pf_state_tree_id, pf_state); +RB_PROTOTYPE_SC(__private_extern__, pf_state_tree_id, pf_state, + entry_id, pf_state_compare_id); +__private_extern__ struct pf_state_tree_id tree_id; +__private_extern__ struct pf_state_queue state_list; + +TAILQ_HEAD(pf_poolqueue, pf_pool); +__private_extern__ struct pf_poolqueue pf_pools[2]; +__private_extern__ struct pf_palist pf_pabuf; +__private_extern__ u_int32_t ticket_pabuf; +#if ALTQ +TAILQ_HEAD(pf_altqqueue, pf_altq); +__private_extern__ struct pf_altqqueue pf_altqs[2]; +__private_extern__ u_int32_t ticket_altqs_active; +__private_extern__ u_int32_t ticket_altqs_inactive; +__private_extern__ int altqs_inactive_open; +__private_extern__ struct pf_altqqueue *pf_altqs_active; +__private_extern__ struct pf_altqqueue *pf_altqs_inactive; +#endif /* ALTQ */ +__private_extern__ struct pf_poolqueue *pf_pools_active; +__private_extern__ struct pf_poolqueue *pf_pools_inactive; + +__private_extern__ int pf_tbladdr_setup(struct pf_ruleset *, + struct pf_addr_wrap *); +__private_extern__ void pf_tbladdr_remove(struct pf_addr_wrap *); +__private_extern__ void pf_tbladdr_copyout(struct pf_addr_wrap *); +__private_extern__ void pf_calc_skip_steps(struct pf_rulequeue *); + +__private_extern__ struct pool pf_src_tree_pl, pf_rule_pl; +__private_extern__ struct pool pf_state_pl, pf_state_key_pl, pf_pooladdr_pl; +__private_extern__ struct pool pf_state_scrub_pl; +#if ALTQ +__private_extern__ struct pool pf_altq_pl; +#endif /* ALTQ */ +#ifndef NO_APPLE_EXTENSIONS +__private_extern__ struct pool pf_app_state_pl; +#endif + +__private_extern__ struct thread *pf_purge_thread; + +__private_extern__ void pfinit(void); +__private_extern__ void pf_purge_thread_fn(void *, wait_result_t); +__private_extern__ void pf_purge_expired_src_nodes(void); +__private_extern__ void pf_purge_expired_states(u_int32_t); +__private_extern__ void pf_unlink_state(struct pf_state *); +__private_extern__ void pf_free_state(struct pf_state *); +__private_extern__ int pf_insert_state(struct pfi_kif *, struct pf_state *); +__private_extern__ int pf_insert_src_node(struct pf_src_node **, + struct pf_rule *, struct pf_addr *, sa_family_t); +__private_extern__ void pf_src_tree_remove_state(struct pf_state *); +__private_extern__ struct pf_state *pf_find_state_byid(struct pf_state_cmp *); +__private_extern__ struct pf_state *pf_find_state_all(struct pf_state_key_cmp *, + u_int, int *); +__private_extern__ void pf_print_state(struct pf_state *); +__private_extern__ void pf_print_flags(u_int8_t); +__private_extern__ u_int16_t pf_cksum_fixup(u_int16_t, u_int16_t, u_int16_t, + u_int8_t); + +__private_extern__ struct ifnet *sync_ifp; +__private_extern__ struct pf_rule pf_default_rule; +__private_extern__ void pf_addrcpy(struct pf_addr *, struct pf_addr *, + u_int8_t); +__private_extern__ void pf_rm_rule(struct pf_rulequeue *, struct pf_rule *); + +#if INET +__private_extern__ int pf_test(int, struct ifnet *, struct mbuf **, + struct ether_header *); +#endif /* INET */ + +#if INET6 +__private_extern__ int pf_test6(int, struct ifnet *, struct mbuf **, + struct ether_header *); +__private_extern__ void pf_poolmask(struct pf_addr *, struct pf_addr *, + struct pf_addr *, struct pf_addr *, u_int8_t); +__private_extern__ void pf_addr_inc(struct pf_addr *, sa_family_t); +#endif /* INET6 */ + +#ifndef NO_APPLE_EXTENSIONS +__private_extern__ struct mbuf *pf_lazy_makewritable(struct pf_pdesc *, + struct mbuf *, int); +#endif +__private_extern__ void *pf_pull_hdr(struct mbuf *, int, void *, int, + u_short *, u_short *, sa_family_t); +__private_extern__ void pf_change_a(void *, u_int16_t *, u_int32_t, u_int8_t); +__private_extern__ int pflog_packet(struct pfi_kif *, struct mbuf *, + sa_family_t, u_int8_t, u_int8_t, struct pf_rule *, struct pf_rule *, + struct pf_ruleset *, struct pf_pdesc *); +__private_extern__ int pf_match_addr(u_int8_t, struct pf_addr *, + struct pf_addr *, struct pf_addr *, sa_family_t); +__private_extern__ int pf_match_addr_range(struct pf_addr *, struct pf_addr *, + struct pf_addr *, sa_family_t); +__private_extern__ int pf_match(u_int8_t, u_int32_t, u_int32_t, u_int32_t); +__private_extern__ int pf_match_port(u_int8_t, u_int16_t, u_int16_t, u_int16_t); +#ifndef NO_APPLE_EXTENSIONS +__private_extern__ int pf_match_xport(u_int8_t, u_int8_t, union pf_rule_xport *, + union pf_state_xport *); +#endif +__private_extern__ int pf_match_uid(u_int8_t, uid_t, uid_t, uid_t); +__private_extern__ int pf_match_gid(u_int8_t, gid_t, gid_t, gid_t); + +__private_extern__ void pf_normalize_init(void); +__private_extern__ int pf_normalize_isempty(void); +__private_extern__ int pf_normalize_ip(struct mbuf **, int, struct pfi_kif *, + u_short *, struct pf_pdesc *); +__private_extern__ int pf_normalize_ip6(struct mbuf **, int, struct pfi_kif *, + u_short *, struct pf_pdesc *); +__private_extern__ int pf_normalize_tcp(int, struct pfi_kif *, struct mbuf *, + int, int, void *, struct pf_pdesc *); +__private_extern__ void pf_normalize_tcp_cleanup(struct pf_state *); +__private_extern__ int pf_normalize_tcp_init(struct mbuf *, int, + struct pf_pdesc *, struct tcphdr *, struct pf_state_peer *, + struct pf_state_peer *); +__private_extern__ int pf_normalize_tcp_stateful(struct mbuf *, int, + struct pf_pdesc *, u_short *, struct tcphdr *, struct pf_state *, + struct pf_state_peer *, struct pf_state_peer *, int *); +__private_extern__ u_int64_t pf_state_expires(const struct pf_state *); +__private_extern__ void pf_purge_expired_fragments(void); +__private_extern__ int pf_routable(struct pf_addr *addr, sa_family_t af, + struct pfi_kif *); +__private_extern__ int pf_rtlabel_match(struct pf_addr *, sa_family_t, + struct pf_addr_wrap *); +__private_extern__ int pf_socket_lookup(int, struct pf_pdesc *); +__private_extern__ struct pf_state_key *pf_alloc_state_key(struct pf_state *); +__private_extern__ void pfr_initialize(void); +__private_extern__ int pfr_match_addr(struct pfr_ktable *, struct pf_addr *, + sa_family_t); +__private_extern__ void pfr_update_stats(struct pfr_ktable *, struct pf_addr *, + sa_family_t, u_int64_t, int, int, int); +__private_extern__ int pfr_pool_get(struct pfr_ktable *, int *, + struct pf_addr *, struct pf_addr **, struct pf_addr **, sa_family_t); +__private_extern__ void pfr_dynaddr_update(struct pfr_ktable *, + struct pfi_dynaddr *); +__private_extern__ struct pfr_ktable *pfr_attach_table(struct pf_ruleset *, + char *); +__private_extern__ void pfr_detach_table(struct pfr_ktable *); +__private_extern__ int pfr_clr_tables(struct pfr_table *, int *, int); +__private_extern__ int pfr_add_tables(struct pfr_table *, int, int *, int); +__private_extern__ int pfr_del_tables(struct pfr_table *, int, int *, int); +__private_extern__ int pfr_get_tables(struct pfr_table *, struct pfr_table *, + int *, int); +__private_extern__ int pfr_get_tstats(struct pfr_table *, struct pfr_tstats *, + int *, int); +__private_extern__ int pfr_clr_tstats(struct pfr_table *, int, int *, int); +__private_extern__ int pfr_set_tflags(struct pfr_table *, int, int, int, int *, + int *, int); +__private_extern__ int pfr_clr_addrs(struct pfr_table *, int *, int); +__private_extern__ int pfr_insert_kentry(struct pfr_ktable *, struct pfr_addr *, + u_int64_t); +__private_extern__ int pfr_add_addrs(struct pfr_table *, struct pfr_addr *, + int, int *, int); +__private_extern__ int pfr_del_addrs(struct pfr_table *, struct pfr_addr *, + int, int *, int); +__private_extern__ int pfr_set_addrs(struct pfr_table *, struct pfr_addr *, + int, int *, int *, int *, int *, int, u_int32_t); +__private_extern__ int pfr_get_addrs(struct pfr_table *, struct pfr_addr *, + int *, int); +__private_extern__ int pfr_get_astats(struct pfr_table *, struct pfr_astats *, + int *, int); +__private_extern__ int pfr_clr_astats(struct pfr_table *, struct pfr_addr *, + int, int *, int); +__private_extern__ int pfr_tst_addrs(struct pfr_table *, struct pfr_addr *, + int, int *, int); +__private_extern__ int pfr_ina_begin(struct pfr_table *, u_int32_t *, int *, + int); +__private_extern__ int pfr_ina_rollback(struct pfr_table *, u_int32_t, int *, + int); +__private_extern__ int pfr_ina_commit(struct pfr_table *, u_int32_t, int *, + int *, int); +__private_extern__ int pfr_ina_define(struct pfr_table *, struct pfr_addr *, + int, int *, int *, u_int32_t, int); + +__private_extern__ struct pfi_kif *pfi_all; + +__private_extern__ void pfi_initialize(void); +__private_extern__ struct pfi_kif *pfi_kif_get(const char *); +__private_extern__ void pfi_kif_ref(struct pfi_kif *, enum pfi_kif_refs); +__private_extern__ void pfi_kif_unref(struct pfi_kif *, enum pfi_kif_refs); +__private_extern__ int pfi_kif_match(struct pfi_kif *, struct pfi_kif *); +__private_extern__ void pfi_attach_ifnet(struct ifnet *); +__private_extern__ void pfi_detach_ifnet(struct ifnet *); +__private_extern__ int pfi_match_addr(struct pfi_dynaddr *, struct pf_addr *, + sa_family_t); +__private_extern__ int pfi_dynaddr_setup(struct pf_addr_wrap *, sa_family_t); +__private_extern__ void pfi_dynaddr_remove(struct pf_addr_wrap *); +__private_extern__ void pfi_dynaddr_copyout(struct pf_addr_wrap *); +__private_extern__ void pfi_update_status(const char *, struct pf_status *); +__private_extern__ int pfi_get_ifaces(const char *, struct pfi_kif *, int *); +__private_extern__ int pfi_set_flags(const char *, int); +__private_extern__ int pfi_clear_flags(const char *, int); + +__private_extern__ u_int16_t pf_tagname2tag(char *); +__private_extern__ void pf_tag2tagname(u_int16_t, char *); +__private_extern__ void pf_tag_ref(u_int16_t); +__private_extern__ void pf_tag_unref(u_int16_t); +__private_extern__ int pf_tag_packet(struct mbuf *, struct pf_mtag *, + int, unsigned int); +__private_extern__ u_int32_t pf_qname2qid(char *); +__private_extern__ void pf_qid2qname(u_int32_t, char *); +__private_extern__ void pf_qid_unref(u_int32_t); + +__private_extern__ struct pf_status pf_status; +__private_extern__ struct pool pf_frent_pl, pf_frag_pl; + +struct pf_pool_limit { + void *pp; + unsigned limit; +}; +__private_extern__ struct pf_pool_limit pf_pool_limits[PF_LIMIT_MAX]; + +__private_extern__ int pf_af_hook(struct ifnet *, struct mbuf **, + struct mbuf **, unsigned int, int); +__private_extern__ int pf_ifaddr_hook(struct ifnet *, unsigned long); +__private_extern__ void pf_ifnet_hook(struct ifnet *, int); + +/* + * The following are defined with "private extern" storage class for + * kernel, and "extern" for user-space. + */ +__private_extern__ struct pf_anchor_global pf_anchors; +__private_extern__ struct pf_anchor pf_main_anchor; +#define pf_main_ruleset pf_main_anchor.ruleset + +/* these ruleset functions can be linked into userland programs (pfctl) */ +__private_extern__ int pf_get_ruleset_number(u_int8_t); +__private_extern__ void pf_init_ruleset(struct pf_ruleset *); +__private_extern__ int pf_anchor_setup(struct pf_rule *, + const struct pf_ruleset *, const char *); +__private_extern__ int pf_anchor_copyout(const struct pf_ruleset *, + const struct pf_rule *, struct pfioc_rule *); +__private_extern__ void pf_anchor_remove(struct pf_rule *); +__private_extern__ void pf_remove_if_empty_ruleset(struct pf_ruleset *); +__private_extern__ struct pf_anchor *pf_find_anchor(const char *); +__private_extern__ struct pf_ruleset *pf_find_ruleset(const char *); +__private_extern__ struct pf_ruleset *pf_find_or_create_ruleset(const char *); +__private_extern__ void pf_rs_initialize(void); + +__private_extern__ int pf_osfp_add(struct pf_osfp_ioctl *); +__private_extern__ struct pf_osfp_enlist *pf_osfp_fingerprint(struct pf_pdesc *, + struct mbuf *, int, const struct tcphdr *); +__private_extern__ struct pf_osfp_enlist *pf_osfp_fingerprint_hdr( + const struct ip *, const struct ip6_hdr *, const struct tcphdr *); +__private_extern__ void pf_osfp_flush(void); +__private_extern__ int pf_osfp_get(struct pf_osfp_ioctl *); +__private_extern__ void pf_osfp_initialize(void); +__private_extern__ int pf_osfp_match(struct pf_osfp_enlist *, pf_osfp_t); +__private_extern__ struct pf_os_fingerprint *pf_osfp_validate(void); +#else /* !KERNEL */ +extern struct pf_anchor_global pf_anchors; +extern struct pf_anchor pf_main_anchor; +#define pf_main_ruleset pf_main_anchor.ruleset + +/* these ruleset functions can be linked into userland programs (pfctl) */ +extern int pf_get_ruleset_number(u_int8_t); +extern void pf_init_ruleset(struct pf_ruleset *); +extern int pf_anchor_setup(struct pf_rule *, const struct pf_ruleset *, + const char *); +extern int pf_anchor_copyout(const struct pf_ruleset *, const struct pf_rule *, + struct pfioc_rule *); +extern void pf_anchor_remove(struct pf_rule *); +extern void pf_remove_if_empty_ruleset(struct pf_ruleset *); +extern struct pf_anchor *pf_find_anchor(const char *); +extern struct pf_ruleset *pf_find_ruleset(const char *); +extern struct pf_ruleset *pf_find_or_create_ruleset(const char *); +extern void pf_rs_initialize(void); +#endif /* !KERNEL */ + +#ifdef __cplusplus +} +#endif +#endif /* PF || !KERNEL */ +#endif /* PRIVATE */ +#endif /* _NET_PFVAR_H_ */ diff --git a/bsd/net/radix.c b/bsd/net/radix.c index 876675d54..1213828f7 100644 --- a/bsd/net/radix.c +++ b/bsd/net/radix.c @@ -106,7 +106,7 @@ lck_mtx_t *rn_mutex; #define rn_masktop (mask_rnhead->rnh_treetop) #undef Bcmp #define Bcmp(a, b, l) \ - (l == 0 ? 0 : bcmp((caddr_t)(a), (caddr_t)(b), (u_long)l)) + (l == 0 ? 0 : bcmp((caddr_t)(a), (caddr_t)(b), (uint32_t)l)) static int rn_lexobetter(void *m_arg, void *n_arg); static struct radix_mask * diff --git a/bsd/net/radix.h b/bsd/net/radix.h index 6fa9f77ca..88ce9732f 100644 --- a/bsd/net/radix.h +++ b/bsd/net/radix.h @@ -191,7 +191,7 @@ struct radix_node_head { #define Bcmp(a, b, n) bcmp(((caddr_t)(a)), ((caddr_t)(b)), (unsigned)(n)) #define Bcopy(a, b, n) bcopy(((caddr_t)(a)), ((caddr_t)(b)), (unsigned)(n)) #define Bzero(p, n) bzero((caddr_t)(p), (unsigned)(n)); -#define R_Malloc(p, t, n) (p = (t) _MALLOC((unsigned long)(n), M_RTABLE, M_WAITOK)) +#define R_Malloc(p, t, n) (p = (t) _MALLOC((uint32_t)(n), M_RTABLE, M_WAITOK)) #define R_Free(p) FREE((caddr_t)p, M_RTABLE); #endif /*KERNEL*/ diff --git a/bsd/net/raw_cb.c b/bsd/net/raw_cb.c index dee6205f0..64acd6719 100644 --- a/bsd/net/raw_cb.c +++ b/bsd/net/raw_cb.c @@ -81,8 +81,8 @@ struct rawcb_list_head rawcb_list; -static u_long raw_sendspace = RAWSNDQ; -static u_long raw_recvspace = RAWRCVQ; +static uint32_t raw_sendspace = RAWSNDQ; +static uint32_t raw_recvspace = RAWRCVQ; extern lck_mtx_t *raw_mtx; /*### global raw cb mutex for now */ /* @@ -170,6 +170,8 @@ raw_bind(struct socket *so, struct mbuf *nam) return (EADDRNOTAVAIL); rp = sotorawcb(so); nam = m_copym(nam, 0, M_COPYALL, M_WAITOK); + if (nam == NULL) + return ENOBUFS; rp->rcb_laddr = mtod(nam, struct sockaddr *); return (0); } diff --git a/bsd/net/raw_cb.h b/bsd/net/raw_cb.h index 2ad95f527..90655ca98 100644 --- a/bsd/net/raw_cb.h +++ b/bsd/net/raw_cb.h @@ -77,7 +77,7 @@ struct rawcb { struct sockaddr *rcb_faddr; /* destination address */ struct sockaddr *rcb_laddr; /* socket's address */ struct sockproto rcb_proto; /* protocol family, protocol */ - u_long reserved[4]; /* for future use */ + uint32_t reserved[4]; /* for future use */ }; #define sotorawcb(so) ((struct rawcb *)(so)->so_pcb) @@ -101,6 +101,6 @@ void raw_input(struct mbuf *, struct sockproto *, struct sockaddr *, struct sockaddr *); extern struct pr_usrreqs raw_usrreqs; -#endif KERNEL_PRIVATE +#endif /* KERNEL_PRIVATE */ #endif diff --git a/bsd/net/route.c b/bsd/net/route.c index 4e4fb302e..66082731e 100644 --- a/bsd/net/route.c +++ b/bsd/net/route.c @@ -87,6 +87,104 @@ #include +/* + * Synchronization notes: + * + * Routing entries fall under two locking domains: the global routing table + * lock (rnh_lock) and the per-entry lock (rt_lock); the latter is a mutex that + * resides (statically defined) in the rtentry structure. + * + * The locking domains for routing are defined as follows: + * + * The global routing lock is used to serialize all accesses to the radix + * trees defined by rt_tables[], as well as the tree of masks. This includes + * lookups, insertions and removals of nodes to/from the respective tree. + * It is also used to protect certain fields in the route entry that aren't + * often modified and/or require global serialization (more details below.) + * + * The per-route entry lock is used to serialize accesses to several routing + * entry fields (more details below.) Acquiring and releasing this lock is + * done via RT_LOCK() and RT_UNLOCK() routines. + * + * In cases where both rnh_lock and rt_lock must be held, the former must be + * acquired first in order to maintain lock ordering. It is not a requirement + * that rnh_lock be acquired first before rt_lock, but in case both must be + * acquired in succession, the correct lock ordering must be followed. + * + * The fields of the rtentry structure are protected in the following way: + * + * rt_nodes[] + * + * - Routing table lock (rnh_lock). + * + * rt_parent, rt_mask, rt_llinfo_free + * + * - Set once during creation and never changes; no locks to read. + * + * rt_flags, rt_genmask, rt_llinfo, rt_rmx, rt_refcnt, rt_gwroute + * + * - Routing entry lock (rt_lock) for read/write access. + * + * - Some values of rt_flags are either set once at creation time, + * or aren't currently used, and thus checking against them can + * be done without rt_lock: RTF_GATEWAY, RTF_HOST, RTF_DYNAMIC, + * RTF_DONE, RTF_XRESOLVE, RTF_STATIC, RTF_BLACKHOLE, RTF_ANNOUNCE, + * RTF_USETRAILERS, RTF_WASCLONED, RTF_PINNED, RTF_LOCAL, + * RTF_BROADCAST, RTF_MULTICAST, RTF_IFSCOPE. + * + * rt_key, rt_gateway, rt_ifp, rt_ifa + * + * - Always written/modified with both rnh_lock and rt_lock held. + * + * - May be read freely with rnh_lock held, else must hold rt_lock + * for read access; holding both locks for read is also okay. + * + * - In the event rnh_lock is not acquired, or is not possible to be + * acquired across the operation, setting RTF_CONDEMNED on a route + * entry will prevent its rt_key, rt_gateway, rt_ifp and rt_ifa + * from being modified. This is typically done on a route that + * has been chosen for a removal (from the tree) prior to dropping + * the rt_lock, so that those values will remain the same until + * the route is freed. + * + * When rnh_lock is held rt_setgate(), rt_setif(), and rtsetifa() are + * single-threaded, thus exclusive. This flag will also prevent the + * route from being looked up via rt_lookup(). + * + * generation_id + * + * - Assumes that 32-bit writes are atomic; no locks. + * + * rt_dlt, rt_output + * + * - Currently unused; no locks. + * + * Operations on a route entry can be described as follows: + * + * CREATE an entry with reference count set to 0 as part of RTM_ADD/RESOLVE. + * + * INSERTION of an entry into the radix tree holds the rnh_lock, checks + * for duplicates and then adds the entry. rtrequest returns the entry + * after bumping up the reference count to 1 (for the caller). + * + * LOOKUP of an entry holds the rnh_lock and bumps up the reference count + * before returning; it is valid to also bump up the reference count using + * RT_ADDREF after the lookup has returned an entry. + * + * REMOVAL of an entry from the radix tree holds the rnh_lock, removes the + * entry but does not decrement the reference count. Removal happens when + * the route is explicitly deleted (RTM_DELETE) or when it is in the cached + * state and it expires. The route is said to be "down" when it is no + * longer present in the tree. Freeing the entry will happen on the last + * reference release of such a "down" route. + * + * RT_ADDREF/RT_REMREF operates on the routing entry which increments/ + * decrements the reference count, rt_refcnt, atomically on the rtentry. + * rt_refcnt is modified only using this routine. The general rule is to + * do RT_ADDREF in the function that is passing the entry as an argument, + * in order to prevent the entry from being freed by the callee. + */ + #define equal(a1, a2) (bcmp((caddr_t)(a1), (caddr_t)(a2), (a1)->sa_len) == 0) #define SA(p) ((struct sockaddr *)(p)) @@ -97,30 +195,38 @@ struct route_cb route_cb; __private_extern__ struct rtstat rtstat = { 0, 0, 0, 0, 0 }; struct radix_node_head *rt_tables[AF_MAX+1]; -lck_mtx_t *rt_mtx; /*### global routing tables mutex for now */ -lck_attr_t *rt_mtx_attr; -lck_grp_t *rt_mtx_grp; -lck_grp_attr_t *rt_mtx_grp_attr; +lck_mtx_t *rnh_lock; /* global routing tables mutex */ +static lck_attr_t *rnh_lock_attr; +static lck_grp_t *rnh_lock_grp; +static lck_grp_attr_t *rnh_lock_grp_attr; + +/* Lock group and attribute for routing entry locks */ +static lck_attr_t *rte_mtx_attr; +static lck_grp_t *rte_mtx_grp; +static lck_grp_attr_t *rte_mtx_grp_attr; lck_mtx_t *route_domain_mtx; /*### global routing tables mutex for now */ int rttrash = 0; /* routes not in table but not freed */ -static unsigned int rte_debug; +unsigned int rte_debug; /* Possible flags for rte_debug */ #define RTD_DEBUG 0x1 /* enable or disable rtentry debug facility */ -#define RTD_TRACE 0x2 /* trace alloc, free and refcnt */ +#define RTD_TRACE 0x2 /* trace alloc, free, refcnt and lock */ #define RTD_NO_FREE 0x4 /* don't free (good to catch corruptions) */ +#define RTE_NAME "rtentry" /* name for zone and rt_lock */ + static struct zone *rte_zone; /* special zone for rtentry */ #define RTE_ZONE_MAX 65536 /* maximum elements in zone */ -#define RTE_ZONE_NAME "rtentry" /* name of rtentry zone */ +#define RTE_ZONE_NAME RTE_NAME /* name of rtentry zone */ #define RTD_INUSE 0xFEEDFACE /* entry is in use */ #define RTD_FREED 0xDEADBEEF /* entry is freed */ -#define RTD_TRSTACK_SIZE 8 /* depth of stack trace */ -#define RTD_REFHIST_SIZE 4 /* refcnt history size */ +/* For gdb */ +__private_extern__ unsigned int ctrace_stack_size = CTRACE_STACK_SIZE; +__private_extern__ unsigned int ctrace_hist_size = CTRACE_HIST_SIZE; /* * Debug variant of rtentry structure. @@ -128,66 +234,74 @@ static struct zone *rte_zone; /* special zone for rtentry */ struct rtentry_dbg { struct rtentry rtd_entry; /* rtentry */ struct rtentry rtd_entry_saved; /* saved rtentry */ - u_int32_t rtd_inuse; /* in use pattern */ - u_int16_t rtd_refhold_cnt; /* # of rtref */ - u_int16_t rtd_refrele_cnt; /* # of rtunref */ + uint32_t rtd_inuse; /* in use pattern */ + uint16_t rtd_refhold_cnt; /* # of rtref */ + uint16_t rtd_refrele_cnt; /* # of rtunref */ + uint32_t rtd_lock_cnt; /* # of locks */ + uint32_t rtd_unlock_cnt; /* # of unlocks */ /* - * Thread and PC stack trace up to RTD_TRSTACK_SIZE - * deep during alloc and free. + * Alloc and free callers. */ - struct thread *rtd_alloc_thread; - void *rtd_alloc_stk_pc[RTD_TRSTACK_SIZE]; - struct thread *rtd_free_thread; - void *rtd_free_stk_pc[RTD_TRSTACK_SIZE]; + ctrace_t rtd_alloc; + ctrace_t rtd_free; /* * Circular lists of rtref and rtunref callers. */ - u_int16_t rtd_refhold_next; - u_int16_t rtd_refrele_next; - struct { - struct thread *th; - void *pc[RTD_TRSTACK_SIZE]; - } rtd_refhold[RTD_REFHIST_SIZE]; - struct { - struct thread *th; - void *pc[RTD_TRSTACK_SIZE]; - } rtd_refrele[RTD_REFHIST_SIZE]; + ctrace_t rtd_refhold[CTRACE_HIST_SIZE]; + ctrace_t rtd_refrele[CTRACE_HIST_SIZE]; + /* + * Circular lists of locks and unlocks. + */ + ctrace_t rtd_lock[CTRACE_HIST_SIZE]; + ctrace_t rtd_unlock[CTRACE_HIST_SIZE]; /* * Trash list linkage */ TAILQ_ENTRY(rtentry_dbg) rtd_trash_link; }; -/* List of trash route entries protected by rt_mtx */ +#define atomic_add_16_ov(a, n) \ + ((uint16_t) OSAddAtomic16(n, (volatile SInt16 *)a)) +#define atomic_add_32_ov(a, n) \ + ((uint32_t) OSAddAtomic(n, a)) + +/* List of trash route entries protected by rnh_lock */ static TAILQ_HEAD(, rtentry_dbg) rttrash_head; +static void rte_lock_init(struct rtentry *); +static void rte_lock_destroy(struct rtentry *); static inline struct rtentry *rte_alloc_debug(void); static inline void rte_free_debug(struct rtentry *); +static inline void rte_lock_debug(struct rtentry_dbg *); +static inline void rte_unlock_debug(struct rtentry_dbg *); static void rt_maskedcopy(struct sockaddr *, struct sockaddr *, struct sockaddr *); static void rtable_init(void **); static inline void rtref_audit(struct rtentry_dbg *); static inline void rtunref_audit(struct rtentry_dbg *); -static struct rtentry *rtalloc1_common_locked(struct sockaddr *, int, u_long, +static struct rtentry *rtalloc1_common_locked(struct sockaddr *, int, uint32_t, unsigned int); static int rtrequest_common_locked(int, struct sockaddr *, struct sockaddr *, struct sockaddr *, int, struct rtentry **, unsigned int); -static void rtalloc_ign_common_locked(struct route *, u_long, unsigned int); +static void rtalloc_ign_common_locked(struct route *, uint32_t, unsigned int); static inline void sa_set_ifscope(struct sockaddr *, unsigned int); static struct sockaddr *sin_copy(struct sockaddr_in *, struct sockaddr_in *, unsigned int); static struct sockaddr *mask_copy(struct sockaddr *, struct sockaddr_in *, unsigned int); +static struct sockaddr *sa_trim(struct sockaddr *, int); static struct radix_node *node_lookup(struct sockaddr *, struct sockaddr *, unsigned int); static struct radix_node *node_lookup_default(void); static int rn_match_ifscope(struct radix_node *, void *); static struct ifaddr *ifa_ifwithroute_common_locked(int, const struct sockaddr *, const struct sockaddr *, unsigned int); +static struct rtentry *rte_alloc(void); +static void rte_free(struct rtentry *); +static void rtfree_common(struct rtentry *, boolean_t); -__private_extern__ u_long route_generation = 0; -extern int use_routegenid; +uint32_t route_generation = 0; /* * sockaddr_in with embedded interface scope; this is used internally @@ -243,7 +357,7 @@ static struct sockaddr sin_def = { /* * Interface index (scope) of the primary interface; determined at * the time when the default, non-scoped route gets added, changed - * or deleted. Protected by rt_mtx. + * or deleted. Protected by rnh_lock. */ static unsigned int primary_ifscope = IFSCOPE_NONE; @@ -265,7 +379,7 @@ rt_inet_default(struct rtentry *rt, struct sockaddr *dst) } /* - * Set the ifscope of the primary interface; caller holds rt_mtx. + * Set the ifscope of the primary interface; caller holds rnh_lock. */ void set_primary_ifscope(unsigned int ifscope) @@ -274,7 +388,7 @@ set_primary_ifscope(unsigned int ifscope) } /* - * Return the ifscope of the primary interface; caller holds rt_mtx. + * Return the ifscope of the primary interface; caller holds rnh_lock. */ unsigned int get_primary_ifscope(void) @@ -345,6 +459,94 @@ mask_copy(struct sockaddr *src, struct sockaddr_in *dst, unsigned int ifscope) return (SA(dst)); } +/* + * Trim trailing zeroes on a sockaddr and update its length. + */ +static struct sockaddr * +sa_trim(struct sockaddr *sa, int skip) +{ + caddr_t cp, base = (caddr_t)sa + skip; + + if (sa->sa_len <= skip) + return (sa); + + for (cp = base + (sa->sa_len - skip); cp > base && cp[-1] == 0;) + cp--; + + sa->sa_len = (cp - base) + skip; + if (sa->sa_len < skip) { + /* Must not happen, and if so, panic */ + panic("%s: broken logic (sa_len %d < skip %d )", __func__, + sa->sa_len, skip); + /* NOTREACHED */ + } else if (sa->sa_len == skip) { + /* If we end up with all zeroes, then there's no mask */ + sa->sa_len = 0; + } + + return (sa); +} + +/* + * Called by rtm_msg{1,2} routines to "scrub" the embedded interface scope + * away from the socket address structure, so that clients of the routing + * socket will not be confused by the presence of the embedded scope, or the + * side effect of the increased length due to that. The source sockaddr is + * not modified; instead, the scrubbing happens on the destination sockaddr + * storage that is passed in by the caller. + */ +struct sockaddr * +rtm_scrub_ifscope(int idx, struct sockaddr *hint, struct sockaddr *sa, + struct sockaddr_storage *ss) +{ + struct sockaddr *ret = sa; + + switch (idx) { + case RTAX_DST: + /* + * If this is for an AF_INET destination address, call + * sin_copy() with IFSCOPE_NONE as it does what we need. + */ + if (sa->sa_family == AF_INET && + SINIFSCOPE(sa)->sin_ifscope != IFSCOPE_NONE) { + bzero(ss, sizeof (*ss)); + ret = sin_copy(SIN(sa), SIN(ss), IFSCOPE_NONE); + } + break; + + case RTAX_NETMASK: { + /* + * If this is for a mask, we can't tell whether or not + * there is an embedded interface scope, as the span of + * bytes between sa_len and the beginning of the mask + * (offset of sin_addr in the case of AF_INET) may be + * filled with all-ones by rn_addmask(), and hence we + * cannot rely on sa_family. Because of this, we use + * the sa_family of the hint sockaddr (RTAX_{DST,IFA}) + * as indicator as to whether or not the mask is to be + * treated as one for AF_INET. Clearing the embedded + * scope involves setting it to IFSCOPE_NONE followed + * by calling sa_trim() to trim trailing zeroes from + * the storage sockaddr, which reverses what was done + * earlier by mask_copy() on the source sockaddr. + */ + int skip = offsetof(struct sockaddr_in, sin_addr); + if (sa->sa_len > skip && sa->sa_len <= sizeof (*ss) && + hint != NULL && hint->sa_family == AF_INET) { + bzero(ss, sizeof (*ss)); + bcopy(sa, ss, sa->sa_len); + SINIFSCOPE(ss)->sin_ifscope = IFSCOPE_NONE; + ret = sa_trim(SA(ss), skip); + } + break; + } + default: + break; + } + + return (ret); +} + /* * Callback leaf-matching routine for rn_matchaddr_args used * for looking up an exact match for a scoped route entry. @@ -380,20 +582,22 @@ route_init(void) if (rte_debug != 0) rte_debug |= RTD_DEBUG; - rt_mtx_grp_attr = lck_grp_attr_alloc_init(); - - rt_mtx_grp = lck_grp_alloc_init("route", rt_mtx_grp_attr); - - rt_mtx_attr = lck_attr_alloc_init(); - - if ((rt_mtx = lck_mtx_alloc_init(rt_mtx_grp, rt_mtx_attr)) == NULL) { - printf("route_init: can't alloc rt_mtx\n"); + rnh_lock_grp_attr = lck_grp_attr_alloc_init(); + rnh_lock_grp = lck_grp_alloc_init("route", rnh_lock_grp_attr); + rnh_lock_attr = lck_attr_alloc_init(); + if ((rnh_lock = lck_mtx_alloc_init(rnh_lock_grp, + rnh_lock_attr)) == NULL) { + printf("route_init: can't alloc rnh_lock\n"); return; } - lck_mtx_lock(rt_mtx); + rte_mtx_grp_attr = lck_grp_attr_alloc_init(); + rte_mtx_grp = lck_grp_alloc_init(RTE_NAME, rte_mtx_grp_attr); + rte_mtx_attr = lck_attr_alloc_init(); + + lck_mtx_lock(rnh_lock); rn_init(); /* initialize all zeroes, all ones, mask table */ - lck_mtx_unlock(rt_mtx); + lck_mtx_unlock(rnh_lock); rtable_init((void **)rt_tables); route_domain_mtx = routedomain.dom_mtx; @@ -411,60 +615,87 @@ route_init(void) TAILQ_INIT(&rttrash_head); } +/* + * Atomically increment route generation counter + */ +void +routegenid_update(void) +{ + (void) atomic_add_32_ov(&route_generation, 1); +} + /* * Packet routing routines. */ void rtalloc(struct route *ro) { - rtalloc_ign(ro, 0UL); + rtalloc_ign(ro, 0); } void -rtalloc_ign_locked(struct route *ro, u_long ignore) +rtalloc_ign_locked(struct route *ro, uint32_t ignore) { return (rtalloc_ign_common_locked(ro, ignore, IFSCOPE_NONE)); } void -rtalloc_scoped_ign_locked(struct route *ro, u_long ignore, unsigned int ifscope) +rtalloc_scoped_ign_locked(struct route *ro, uint32_t ignore, + unsigned int ifscope) { return (rtalloc_ign_common_locked(ro, ignore, ifscope)); } static void -rtalloc_ign_common_locked(struct route *ro, u_long ignore, +rtalloc_ign_common_locked(struct route *ro, uint32_t ignore, unsigned int ifscope) { struct rtentry *rt; if ((rt = ro->ro_rt) != NULL) { - if (rt->rt_ifp != NULL && rt->rt_flags & RTF_UP) + RT_LOCK_SPIN(rt); + if (rt->rt_ifp != NULL && (rt->rt_flags & RTF_UP) && + rt->generation_id == route_generation) { + RT_UNLOCK(rt); return; + } + RT_UNLOCK(rt); rtfree_locked(rt); ro->ro_rt = NULL; } ro->ro_rt = rtalloc1_common_locked(&ro->ro_dst, 1, ignore, ifscope); - if (ro->ro_rt) + if (ro->ro_rt != NULL) { ro->ro_rt->generation_id = route_generation; + RT_LOCK_ASSERT_NOTHELD(ro->ro_rt); + } } + void -rtalloc_ign(struct route *ro, u_long ignore) +rtalloc_ign(struct route *ro, uint32_t ignore) { - lck_mtx_assert(rt_mtx, LCK_MTX_ASSERT_NOTOWNED); - lck_mtx_lock(rt_mtx); + lck_mtx_assert(rnh_lock, LCK_MTX_ASSERT_NOTOWNED); + lck_mtx_lock(rnh_lock); rtalloc_ign_locked(ro, ignore); - lck_mtx_unlock(rt_mtx); + lck_mtx_unlock(rnh_lock); +} + +void +rtalloc_scoped_ign(struct route *ro, uint32_t ignore, unsigned int ifscope) +{ + lck_mtx_assert(rnh_lock, LCK_MTX_ASSERT_NOTOWNED); + lck_mtx_lock(rnh_lock); + rtalloc_scoped_ign_locked(ro, ignore, ifscope); + lck_mtx_unlock(rnh_lock); } struct rtentry * -rtalloc1_locked(struct sockaddr *dst, int report, u_long ignflags) +rtalloc1_locked(struct sockaddr *dst, int report, uint32_t ignflags) { return (rtalloc1_common_locked(dst, report, ignflags, IFSCOPE_NONE)); } struct rtentry * -rtalloc1_scoped_locked(struct sockaddr *dst, int report, u_long ignflags, +rtalloc1_scoped_locked(struct sockaddr *dst, int report, uint32_t ignflags, unsigned int ifscope) { return (rtalloc1_common_locked(dst, report, ignflags, ifscope)); @@ -475,13 +706,13 @@ rtalloc1_scoped_locked(struct sockaddr *dst, int report, u_long ignflags, * Or, at least try.. Create a cloned route if needed. */ static struct rtentry * -rtalloc1_common_locked(struct sockaddr *dst, int report, u_long ignflags, +rtalloc1_common_locked(struct sockaddr *dst, int report, uint32_t ignflags, unsigned int ifscope) { struct radix_node_head *rnh = rt_tables[dst->sa_family]; struct rtentry *rt, *newrt = NULL; struct rt_addrinfo info; - u_long nflags; + uint32_t nflags; int err = 0, msgtype = RTM_MISS; if (rnh == NULL) @@ -495,8 +726,10 @@ rtalloc1_common_locked(struct sockaddr *dst, int report, u_long ignflags, if (rt == NULL) goto unreachable; + RT_LOCK_SPIN(rt); newrt = rt; nflags = rt->rt_flags & ~ignflags; + RT_UNLOCK(rt); if (report && (nflags & (RTF_CLONING | RTF_PRCLONING))) { /* * We are apparently adding (report = 0 in delete). @@ -554,13 +787,25 @@ rtalloc1_common_locked(struct sockaddr *dst, int report, u_long ignflags, } struct rtentry * -rtalloc1(struct sockaddr *dst, int report, u_long ignflags) +rtalloc1(struct sockaddr *dst, int report, uint32_t ignflags) { struct rtentry * entry; - lck_mtx_assert(rt_mtx, LCK_MTX_ASSERT_NOTOWNED); - lck_mtx_lock(rt_mtx); + lck_mtx_assert(rnh_lock, LCK_MTX_ASSERT_NOTOWNED); + lck_mtx_lock(rnh_lock); entry = rtalloc1_locked(dst, report, ignflags); - lck_mtx_unlock(rt_mtx); + lck_mtx_unlock(rnh_lock); + return (entry); +} + +struct rtentry * +rtalloc1_scoped(struct sockaddr *dst, int report, uint32_t ignflags, + unsigned int ifscope) +{ + struct rtentry * entry; + lck_mtx_assert(rnh_lock, LCK_MTX_ASSERT_NOTOWNED); + lck_mtx_lock(rnh_lock); + entry = rtalloc1_scoped_locked(dst, report, ignflags, ifscope); + lck_mtx_unlock(rnh_lock); return (entry); } @@ -571,29 +816,69 @@ rtalloc1(struct sockaddr *dst, int report, u_long ignflags) void rtfree_locked(struct rtentry *rt) { - /* - * find the tree for that address family - * Note: in the case of igmp packets, there might not be an rnh - */ - struct radix_node_head *rnh; + rtfree_common(rt, TRUE); +} - lck_mtx_assert(rt_mtx, LCK_MTX_ASSERT_OWNED); +static void +rtfree_common(struct rtentry *rt, boolean_t locked) +{ + struct radix_node_head *rnh; - /* See 3582620 - We hit this during the transition from funnels to locks */ - if (rt == 0) { - printf("rtfree - rt is NULL\n"); + /* + * Atomically decrement the reference count and if it reaches 0, + * and there is a close function defined, call the close function. + */ + RT_LOCK_SPIN(rt); + if (rtunref(rt) > 0) { + RT_UNLOCK(rt); return; } - - rnh = rt_tables[rt_key(rt)->sa_family]; /* - * decrement the reference count by one and if it reaches 0, - * and there is a close function defined, call the close function + * To avoid violating lock ordering, we must drop rt_lock before + * trying to acquire the global rnh_lock. If we are called with + * rnh_lock held, then we already have exclusive access; otherwise + * we do the lock dance. */ - rtunref(rt); - if (rt->rt_refcnt > 0) - return; + if (!locked) { + /* + * Note that we check it again below after grabbing rnh_lock, + * since it is possible that another thread doing a lookup wins + * the race, grabs the rnh_lock first, and bumps up the reference + * count in which case the route should be left alone as it is + * still in use. It's also possible that another thread frees + * the route after we drop rt_lock; to prevent the route from + * being freed, we hold an extra reference. + */ + RT_ADDREF_LOCKED(rt); + RT_UNLOCK(rt); + lck_mtx_lock(rnh_lock); + RT_LOCK_SPIN(rt); + RT_REMREF_LOCKED(rt); + if (rt->rt_refcnt > 0) { + /* We've lost the race, so abort */ + RT_UNLOCK(rt); + goto done; + } + } + + /* + * We may be blocked on other lock(s) as part of freeing + * the entry below, so convert from spin to full mutex. + */ + RT_CONVERT_LOCK(rt); + + lck_mtx_assert(rnh_lock, LCK_MTX_ASSERT_OWNED); + + /* Negative refcnt must never happen */ + if (rt->rt_refcnt != 0) + panic("rt %p invalid refcnt %d", rt, rt->rt_refcnt); + + /* + * find the tree for that address family + * Note: in the case of igmp packets, there might not be an rnh + */ + rnh = rt_tables[rt_key(rt)->sa_family]; /* * On last reference give the "close method" a chance to cleanup @@ -603,49 +888,59 @@ rtfree_locked(struct rtentry *rt) * close routine typically issues RTM_DELETE which clears the RTF_UP * flag on the entry so that the code below reclaims the storage. */ - if (rnh && rnh->rnh_close && rt->rt_refcnt == 0) + if (rnh != NULL && rnh->rnh_close != NULL) rnh->rnh_close((struct radix_node *)rt, rnh); /* - * If we are no longer "up" (and ref == 0) - * then we can free the resources associated - * with the route. + * If we are no longer "up" (and ref == 0) then we can free the + * resources associated with the route. */ if (!(rt->rt_flags & RTF_UP)) { if (rt->rt_nodes->rn_flags & (RNF_ACTIVE | RNF_ROOT)) - panic ("rtfree 2"); + panic("rt %p freed while in radix tree\n", rt); /* * the rtentry must have been removed from the routing table - * so it is represented in rttrash.. remove that now. + * so it is represented in rttrash; remove that now. */ - (void) OSDecrementAtomic((SInt32 *)&rttrash); + (void) OSDecrementAtomic(&rttrash); if (rte_debug & RTD_DEBUG) { TAILQ_REMOVE(&rttrash_head, (struct rtentry_dbg *)rt, rtd_trash_link); } -#ifdef DIAGNOSTIC - if (rt->rt_refcnt < 0) { - printf("rtfree: %p not freed (neg refs) cnt=%d\n", - rt, rt->rt_refcnt); - return; - } -#endif + /* + * Route is no longer in the tree and refcnt is 0; + * we have exclusive access, so destroy it. + */ + RT_UNLOCK(rt); /* * release references on items we hold them on.. * e.g other routes and ifaddrs. */ - if (rt->rt_parent) + if (rt->rt_parent != NULL) { rtfree_locked(rt->rt_parent); + rt->rt_parent = NULL; + } - if(rt->rt_ifa) { + if (rt->rt_ifa != NULL) { ifafree(rt->rt_ifa); rt->rt_ifa = NULL; } /* - * The key is separatly alloc'd so free it (see rt_setgate()). + * Now free any attached link-layer info. + */ + if (rt->rt_llinfo != NULL) { + if (rt->rt_llinfo_free != NULL) + (*rt->rt_llinfo_free)(rt->rt_llinfo); + else + R_Free(rt->rt_llinfo); + rt->rt_llinfo = NULL; + } + + /* + * The key is separately alloc'd so free it (see rt_setgate()). * This also frees the gateway, as they are always malloc'd * together. */ @@ -654,17 +949,25 @@ rtfree_locked(struct rtentry *rt) /* * and the rtentry itself of course */ + rte_lock_destroy(rt); rte_free(rt); + } else { + /* + * The "close method" has been called, but the route is + * still in the radix tree with zero refcnt, i.e. "up" + * and in the cached state. + */ + RT_UNLOCK(rt); } +done: + if (!locked) + lck_mtx_unlock(rnh_lock); } void rtfree(struct rtentry *rt) { - lck_mtx_assert(rt_mtx, LCK_MTX_ASSERT_NOTOWNED); - lck_mtx_lock(rt_mtx); - rtfree_locked(rt); - lck_mtx_unlock(rt_mtx); + rtfree_common(rt, FALSE); } /* @@ -672,38 +975,34 @@ rtfree(struct rtentry *rt) * the refcount reaches zero. Unless you have really good reason, * use rtfree not rtunref. */ -void +int rtunref(struct rtentry *p) { - lck_mtx_assert(rt_mtx, LCK_MTX_ASSERT_OWNED); + RT_LOCK_ASSERT_HELD(p); - if (p->rt_refcnt <= 0) - panic("rtunref: bad refcnt %d for rt=%p\n", p->rt_refcnt, p); + if (p->rt_refcnt == 0) + panic("%s(%p) bad refcnt\n", __func__, p); + + --p->rt_refcnt; if (rte_debug & RTD_DEBUG) rtunref_audit((struct rtentry_dbg *)p); - p->rt_refcnt--; + /* Return new value */ + return (p->rt_refcnt); } static inline void rtunref_audit(struct rtentry_dbg *rte) { + uint16_t idx; + if (rte->rtd_inuse != RTD_INUSE) panic("rtunref: on freed rte=%p\n", rte); - rte->rtd_refrele_cnt++; - - if (rte_debug & RTD_TRACE) { - rte->rtd_refrele[rte->rtd_refrele_next].th = current_thread(); - bzero(rte->rtd_refrele[rte->rtd_refrele_next].pc, - sizeof (rte->rtd_refrele[rte->rtd_refrele_next].pc)); - (void) OSBacktrace(rte->rtd_refrele[rte->rtd_refrele_next].pc, - RTD_TRSTACK_SIZE); - - rte->rtd_refrele_next = - (rte->rtd_refrele_next + 1) % RTD_REFHIST_SIZE; - } + idx = atomic_add_16_ov(&rte->rtd_refrele_cnt, 1) % CTRACE_HIST_SIZE; + if (rte_debug & RTD_TRACE) + ctrace_record(&rte->rtd_refrele[idx]); } /* @@ -712,42 +1011,34 @@ rtunref_audit(struct rtentry_dbg *rte) void rtref(struct rtentry *p) { - lck_mtx_assert(rt_mtx, LCK_MTX_ASSERT_OWNED); + RT_LOCK_ASSERT_HELD(p); - if (p->rt_refcnt < 0) - panic("rtref: bad refcnt %d for rt=%p\n", p->rt_refcnt, p); + if (++p->rt_refcnt == 0) + panic("%s(%p) bad refcnt\n", __func__, p); if (rte_debug & RTD_DEBUG) rtref_audit((struct rtentry_dbg *)p); - - p->rt_refcnt++; } static inline void rtref_audit(struct rtentry_dbg *rte) { + uint16_t idx; + if (rte->rtd_inuse != RTD_INUSE) panic("rtref_audit: on freed rte=%p\n", rte); - rte->rtd_refhold_cnt++; - - if (rte_debug & RTD_TRACE) { - rte->rtd_refhold[rte->rtd_refhold_next].th = current_thread(); - bzero(rte->rtd_refhold[rte->rtd_refhold_next].pc, - sizeof (rte->rtd_refhold[rte->rtd_refhold_next].pc)); - (void) OSBacktrace(rte->rtd_refhold[rte->rtd_refhold_next].pc, - RTD_TRSTACK_SIZE); - - rte->rtd_refhold_next = - (rte->rtd_refhold_next + 1) % RTD_REFHIST_SIZE; - } + idx = atomic_add_16_ov(&rte->rtd_refhold_cnt, 1) % CTRACE_HIST_SIZE; + if (rte_debug & RTD_TRACE) + ctrace_record(&rte->rtd_refhold[idx]); } void rtsetifa(struct rtentry *rt, struct ifaddr* ifa) { - if (rt == NULL) - panic("rtsetifa"); + lck_mtx_assert(rnh_lock, LCK_MTX_ASSERT_OWNED); + + RT_LOCK_ASSERT_HELD(rt); if (rt->rt_ifa == ifa) return; @@ -764,34 +1055,6 @@ rtsetifa(struct rtentry *rt, struct ifaddr* ifa) ifaref(rt->rt_ifa); } -void -ifafree(struct ifaddr *ifa) -{ - int oldval; - - if (ifa == NULL) - panic("ifafree"); - - oldval = OSAddAtomic(-1, (SInt32 *)&ifa->ifa_refcnt); - - if (oldval == 0) { - if ((ifa->ifa_debug & IFA_ATTACHED) != 0) { - panic("ifa attached to ifp is being freed\n"); - } - FREE(ifa, M_IFADDR); - } -} - -void -ifaref(struct ifaddr *ifa) -{ - if (ifa == NULL) - panic("ifaref"); - - if (OSAddAtomic(1, (SInt32 *)&ifa->ifa_refcnt) == 0xffffffff) - panic("ifaref - reference count rolled over!"); -} - /* * Force a routing table entry to the specified * destination to go through the given gateway. @@ -811,8 +1074,8 @@ rtredirect(struct ifnet *ifp, struct sockaddr *dst, struct sockaddr *gateway, unsigned int ifscope = (ifp != NULL) ? ifp->if_index : IFSCOPE_NONE; struct sockaddr_in sin; - lck_mtx_assert(rt_mtx, LCK_MTX_ASSERT_NOTOWNED); - lck_mtx_lock(rt_mtx); + lck_mtx_assert(rnh_lock, LCK_MTX_ASSERT_NOTOWNED); + lck_mtx_lock(rnh_lock); /* * Verify the gateway is directly reachable; if scoped routing @@ -826,6 +1089,8 @@ rtredirect(struct ifnet *ifp, struct sockaddr *dst, struct sockaddr *gateway, /* Lookup route to the destination (from the original IP header) */ rt = rtalloc1_scoped_locked(dst, 0, RTF_CLONING|RTF_PRCLONING, ifscope); + if (rt != NULL) + RT_LOCK(rt); /* Embed scope in src for comparison against rt_gateway below */ if (ip_doscopedroute && src->sa_family == AF_INET) @@ -837,7 +1102,7 @@ rtredirect(struct ifnet *ifp, struct sockaddr *dst, struct sockaddr *gateway, * we have a routing loop, perhaps as a result of an interface * going down recently. */ - if (!(flags & RTF_DONE) && rt && + if (!(flags & RTF_DONE) && rt != NULL && (!equal(src, rt->rt_gateway) || !equal(rt->rt_ifa->ifa_addr, ifa->ifa_addr))) { error = EINVAL; @@ -849,26 +1114,31 @@ rtredirect(struct ifnet *ifp, struct sockaddr *dst, struct sockaddr *gateway, error = EHOSTUNREACH; } } - + if (ifa) { ifafree(ifa); ifa = NULL; } - - if (error) + + if (error) { + if (rt != NULL) + RT_UNLOCK(rt); goto done; + } + /* * Create a new entry if we just got back a wildcard entry * or the the lookup failed. This is necessary for hosts * which use routing redirects generated by smart gateways * to dynamically build the routing tables. */ - if ((rt == 0) || (rt_mask(rt) && rt_mask(rt)->sa_len < 2)) + if ((rt == NULL) || (rt_mask(rt) != NULL && rt_mask(rt)->sa_len < 2)) goto create; /* * Don't listen to the redirect if it's * for a route to an interface. */ + RT_LOCK_ASSERT_HELD(rt); if (rt->rt_flags & RTF_GATEWAY) { if (((rt->rt_flags & RTF_HOST) == 0) && (flags & RTF_HOST)) { /* @@ -877,7 +1147,9 @@ rtredirect(struct ifnet *ifp, struct sockaddr *dst, struct sockaddr *gateway, * to net; similar to cloned routes, the newly * created host route is scoped as well. */ - create: +create: + if (rt != NULL) + RT_UNLOCK(rt); flags |= RTF_GATEWAY | RTF_DYNAMIC; error = rtrequest_scoped_locked(RTM_ADD, dst, gateway, netmask, flags, NULL, ifscope); @@ -894,12 +1166,15 @@ rtredirect(struct ifnet *ifp, struct sockaddr *dst, struct sockaddr *gateway, * add the key and gateway (in one malloc'd chunk). */ error = rt_setgate(rt, rt_key(rt), gateway); + RT_UNLOCK(rt); } } else { + RT_UNLOCK(rt); error = EHOSTUNREACH; } done: - if (rt) { + if (rt != NULL) { + RT_LOCK_ASSERT_NOTHELD(rt); if (rtp && !error) *rtp = rt; else @@ -912,22 +1187,22 @@ rtredirect(struct ifnet *ifp, struct sockaddr *dst, struct sockaddr *gateway, if (stat != NULL) (*stat)++; if (use_routegenid) - route_generation++; + routegenid_update(); } + lck_mtx_unlock(rnh_lock); bzero((caddr_t)&info, sizeof(info)); info.rti_info[RTAX_DST] = dst; info.rti_info[RTAX_GATEWAY] = gateway; info.rti_info[RTAX_NETMASK] = netmask; info.rti_info[RTAX_AUTHOR] = src; rt_missmsg(RTM_REDIRECT, &info, flags, error); - lck_mtx_unlock(rt_mtx); } /* * Routing table ioctl interface. */ int -rtioctl(int req, caddr_t data, struct proc *p) +rtioctl(unsigned long req, caddr_t data, struct proc *p) { #pragma unused(p) #if INET && MROUTING @@ -945,9 +1220,9 @@ ifa_ifwithroute( { struct ifaddr *ifa; - lck_mtx_lock(rt_mtx); + lck_mtx_lock(rnh_lock); ifa = ifa_ifwithroute_locked(flags, dst, gateway); - lck_mtx_unlock(rt_mtx); + lck_mtx_unlock(rnh_lock); return (ifa); } @@ -980,7 +1255,7 @@ ifa_ifwithroute_common_locked(int flags, const struct sockaddr *dst, struct rtentry *rt = NULL; struct sockaddr_in dst_in, gw_in; - lck_mtx_assert(rt_mtx, LCK_MTX_ASSERT_OWNED); + lck_mtx_assert(rnh_lock, LCK_MTX_ASSERT_OWNED); if (ip_doscopedroute) { /* @@ -1020,12 +1295,14 @@ ifa_ifwithroute_common_locked(int flags, const struct sockaddr *dst, if (ifa == NULL) { /* Workaround to avoid gcc warning regarding const variable */ rt = rtalloc1_scoped_locked((struct sockaddr *)(size_t)dst, - 0, 0UL, ifscope); + 0, 0, ifscope); if (rt != NULL) { + RT_LOCK_SPIN(rt); ifa = rt->rt_ifa; if (ifa != NULL) ifaref(ifa); - rtunref(rt); + RT_REMREF_LOCKED(rt); + RT_UNLOCK(rt); rt = NULL; } } @@ -1049,13 +1326,15 @@ ifa_ifwithroute_common_locked(int flags, const struct sockaddr *dst, if ((ifa == NULL || !equal(ifa->ifa_addr, (struct sockaddr *)(size_t)gateway)) && (rt = rtalloc1_scoped_locked((struct sockaddr *)(size_t)gateway, - 0, 0UL, ifscope)) != NULL) { + 0, 0, ifscope)) != NULL) { if (ifa != NULL) ifafree(ifa); + RT_LOCK_SPIN(rt); ifa = rt->rt_ifa; if (ifa != NULL) ifaref(ifa); - rtunref(rt); + RT_REMREF_LOCKED(rt); + RT_UNLOCK(rt); } /* * If an interface scope was specified, the interface index of @@ -1071,10 +1350,8 @@ ifa_ifwithroute_common_locked(int flags, const struct sockaddr *dst, return (ifa); } -#define ROUNDUP(a) (a>0 ? (1 + (((a) - 1) | (sizeof(long) - 1))) : sizeof(long)) - -static int rt_fixdelete __P((struct radix_node *, void *)); -static int rt_fixchange __P((struct radix_node *, void *)); +static int rt_fixdelete(struct radix_node *, void *); +static int rt_fixchange(struct radix_node *, void *); struct rtfc_arg { struct rtentry *rt0; @@ -1132,7 +1409,7 @@ rtrequest_common_locked(int req, struct sockaddr *dst0, struct sockaddr_in sin, mask; #define senderr(x) { error = x ; goto bad; } - lck_mtx_assert(rt_mtx, LCK_MTX_ASSERT_OWNED); + lck_mtx_assert(rnh_lock, LCK_MTX_ASSERT_OWNED); /* * Find the correct routing tree to use for this Address Family */ @@ -1196,17 +1473,25 @@ rtrequest_common_locked(int req, struct sockaddr *dst0, * will decrement the reference via rtfree_locked() and then * possibly deallocate it. */ - rtref(rt); + RT_LOCK(rt); + RT_ADDREF_LOCKED(rt); rt->rt_flags &= ~RTF_UP; + /* + * For consistency, in case the caller didn't set the flag. + */ + rt->rt_flags |= RTF_CONDEMNED; + /* * Now search what's left of the subtree for any cloned * routes which might have been formed from this node. */ if ((rt->rt_flags & (RTF_CLONING | RTF_PRCLONING)) && rt_mask(rt)) { + RT_UNLOCK(rt); rnh->rnh_walktree_from(rnh, dst, rt_mask(rt), rt_fixdelete, rt); + RT_LOCK(rt); } /* @@ -1214,10 +1499,9 @@ rtrequest_common_locked(int req, struct sockaddr *dst0, * This might result in another rtentry being freed if * we held its last reference. */ - if (rt->rt_gwroute) { - rt = rt->rt_gwroute; - rtfree_locked(rt); - (rt = (struct rtentry *)rn)->rt_gwroute = 0; + if (rt->rt_gwroute != NULL) { + rtfree_locked(rt->rt_gwroute); + rt->rt_gwroute = NULL; } /* @@ -1231,7 +1515,7 @@ rtrequest_common_locked(int req, struct sockaddr *dst0, * one more rtentry floating around that is not * linked to the routing table. */ - (void) OSIncrementAtomic((SInt32 *)&rttrash); + (void) OSIncrementAtomic(&rttrash); if (rte_debug & RTD_DEBUG) { TAILQ_INSERT_TAIL(&rttrash_head, (struct rtentry_dbg *)rt, rtd_trash_link); @@ -1244,6 +1528,8 @@ rtrequest_common_locked(int req, struct sockaddr *dst0, if (rt_inet_default(rt, rt_key(rt))) set_primary_ifscope(IFSCOPE_NONE); + RT_UNLOCK(rt); + /* * If the caller wants it, then it can have it, * but it's up to it to free the rtentry as we won't be @@ -1261,6 +1547,16 @@ rtrequest_common_locked(int req, struct sockaddr *dst0, case RTM_RESOLVE: if (ret_nrt == 0 || (rt = *ret_nrt) == 0) senderr(EINVAL); + /* + * If cloning, we have the parent route given by the caller + * and will use its rt_gateway, rt_rmx as part of the cloning + * process below. Since rnh_lock is held at this point, the + * parent's rt_ifa and rt_gateway will not change, and its + * relevant rt_flags will not change as well. The only thing + * that could change are the metrics, and thus we hold the + * parent route's rt_lock later on during the actual copying + * of rt_rmx. + */ ifa = rt->rt_ifa; ifaref(ifa); flags = rt->rt_flags & @@ -1315,6 +1611,8 @@ rtrequest_common_locked(int req, struct sockaddr *dst0, if ((rt = rte_alloc()) == NULL) senderr(ENOBUFS); Bzero(rt, sizeof(*rt)); + rte_lock_init(rt); + RT_LOCK(rt); rt->rt_flags = RTF_UP | flags; /* @@ -1322,6 +1620,8 @@ rtrequest_common_locked(int req, struct sockaddr *dst0, * also add the rt_gwroute if possible. */ if ((error = rt_setgate(rt, dst, gateway)) != 0) { + RT_UNLOCK(rt); + rte_lock_destroy(rt); rte_free(rt); senderr(error); } @@ -1368,10 +1668,13 @@ rtrequest_common_locked(int req, struct sockaddr *dst0, RTF_CLONING | RTF_PRCLONING); } if (rt2 && rt2->rt_parent) { - rtrequest_locked(RTM_DELETE, - (struct sockaddr *)rt_key(rt2), - rt2->rt_gateway, - rt_mask(rt2), rt2->rt_flags, 0); + /* + * rnh_lock is held here, so rt_key and + * rt_gateway of rt2 will not change. + */ + (void) rtrequest_locked(RTM_DELETE, rt_key(rt2), + rt2->rt_gateway, rt_mask(rt2), + rt2->rt_flags, 0); rtfree_locked(rt2); rn = rnh->rnh_addaddr((caddr_t)ndst, (caddr_t)netmask, @@ -1387,12 +1690,17 @@ rtrequest_common_locked(int req, struct sockaddr *dst0, * then un-make it (this should be a function) */ if (rn == 0) { - if (rt->rt_gwroute) + if (rt->rt_gwroute) { rtfree_locked(rt->rt_gwroute); + rt->rt_gwroute = NULL; + } if (rt->rt_ifa) { ifafree(rt->rt_ifa); + rt->rt_ifa = NULL; } R_Free(rt_key(rt)); + RT_UNLOCK(rt); + rte_lock_destroy(rt); rte_free(rt); senderr(EEXIST); } @@ -1400,16 +1708,22 @@ rtrequest_common_locked(int req, struct sockaddr *dst0, rt->rt_parent = 0; /* - * If we got here from RESOLVE, then we are cloning - * so clone the rest, and note that we - * are a clone (and increment the parent's references) + * If we got here from RESOLVE, then we are cloning so clone + * the rest, and note that we are a clone (and increment the + * parent's references). rnh_lock is still held, which prevents + * a lookup from returning the newly-created route. Hence + * holding and releasing the parent's rt_lock while still + * holding the route's rt_lock is safe since the new route + * is not yet externally visible. */ if (req == RTM_RESOLVE) { + RT_LOCK_SPIN(*ret_nrt); rt->rt_rmx = (*ret_nrt)->rt_rmx; /* copy metrics */ if ((*ret_nrt)->rt_flags & (RTF_CLONING | RTF_PRCLONING)) { rt->rt_parent = (*ret_nrt); - rtref(*ret_nrt); + RT_ADDREF_LOCKED(*ret_nrt); } + RT_UNLOCK(*ret_nrt); } /* @@ -1421,19 +1735,6 @@ rtrequest_common_locked(int req, struct sockaddr *dst0, ifafree(ifa); ifa = 0; - /* - * We repeat the same procedure from rt_setgate() here because - * it doesn't fire when we call it there because the node - * hasn't been added to the tree yet. - */ - if (!(rt->rt_flags & RTF_HOST) && rt_mask(rt) != 0) { - struct rtfc_arg arg; - arg.rnh = rnh; - arg.rt0 = rt; - rnh->rnh_walktree_from(rnh, rt_key(rt), rt_mask(rt), - rt_fixchange, &arg); - } - /* * If this is the (non-scoped) default route, record * the interface index used for the primary ifscope. @@ -1447,7 +1748,23 @@ rtrequest_common_locked(int req, struct sockaddr *dst0, */ if (ret_nrt) { *ret_nrt = rt; - rtref(rt); + RT_ADDREF_LOCKED(rt); + } + + /* + * We repeat the same procedure from rt_setgate() here because + * it doesn't fire when we call it there because the node + * hasn't been added to the tree yet. + */ + if (!(rt->rt_flags & RTF_HOST) && rt_mask(rt) != 0) { + struct rtfc_arg arg; + arg.rnh = rnh; + arg.rt0 = rt; + RT_UNLOCK(rt); + rnh->rnh_walktree_from(rnh, rt_key(rt), rt_mask(rt), + rt_fixchange, &arg); + } else { + RT_UNLOCK(rt); } break; } @@ -1467,10 +1784,10 @@ rtrequest( struct rtentry **ret_nrt) { int error; - lck_mtx_assert(rt_mtx, LCK_MTX_ASSERT_NOTOWNED); - lck_mtx_lock(rt_mtx); + lck_mtx_assert(rnh_lock, LCK_MTX_ASSERT_NOTOWNED); + lck_mtx_lock(rnh_lock); error = rtrequest_locked(req, dst, gateway, netmask, flags, ret_nrt); - lck_mtx_unlock(rt_mtx); + lck_mtx_unlock(rnh_lock); return (error); } /* @@ -1486,14 +1803,21 @@ rt_fixdelete(struct radix_node *rn, void *vp) struct rtentry *rt = (struct rtentry *)rn; struct rtentry *rt0 = vp; - lck_mtx_assert(rt_mtx, LCK_MTX_ASSERT_OWNED); + lck_mtx_assert(rnh_lock, LCK_MTX_ASSERT_OWNED); + RT_LOCK(rt); if (rt->rt_parent == rt0 && !(rt->rt_flags & (RTF_PINNED | RTF_CLONING | RTF_PRCLONING))) { - return rtrequest_locked(RTM_DELETE, rt_key(rt), - (struct sockaddr *)0, rt_mask(rt), - rt->rt_flags, (struct rtentry **)0); + /* + * Safe to drop rt_lock and use rt_key, since holding + * rnh_lock here prevents another thread from calling + * rt_setgate() on this route. + */ + RT_UNLOCK(rt); + return (rtrequest_locked(RTM_DELETE, rt_key(rt), NULL, + rt_mask(rt), rt->rt_flags, NULL)); } + RT_UNLOCK(rt); return 0; } @@ -1520,11 +1844,15 @@ rt_fixchange(struct radix_node *rn, void *vp) u_char *xk1, *xm1, *xk2, *xmp; int i, len, mlen; - lck_mtx_assert(rt_mtx, LCK_MTX_ASSERT_OWNED); + lck_mtx_assert(rnh_lock, LCK_MTX_ASSERT_OWNED); + + RT_LOCK(rt); if (!rt->rt_parent || - (rt->rt_flags & (RTF_PINNED | RTF_CLONING | RTF_PRCLONING))) + (rt->rt_flags & (RTF_PINNED | RTF_CLONING | RTF_PRCLONING))) { + RT_UNLOCK(rt); return (0); + } if (rt->rt_parent == rt0) goto delete_rt; @@ -1542,17 +1870,23 @@ rt_fixchange(struct radix_node *rn, void *vp) /* avoid applying a less specific route */ xmp = (u_char *)rt_mask(rt->rt_parent); mlen = rt_key(rt->rt_parent)->sa_len; - if (mlen > rt_key(rt0)->sa_len) + if (mlen > rt_key(rt0)->sa_len) { + RT_UNLOCK(rt); return (0); + } for (i = rnh->rnh_treetop->rn_offset; i < mlen; i++) { - if ((xmp[i] & ~(xmp[i] ^ xm1[i])) != xmp[i]) + if ((xmp[i] & ~(xmp[i] ^ xm1[i])) != xmp[i]) { + RT_UNLOCK(rt); return (0); + } } for (i = rnh->rnh_treetop->rn_offset; i < len; i++) { - if ((xk2[i] & xm1[i]) != xk1[i]) + if ((xk2[i] & xm1[i]) != xk1[i]) { + RT_UNLOCK(rt); return (0); + } } /* @@ -1560,17 +1894,51 @@ rt_fixchange(struct radix_node *rn, void *vp) * changed/added under the node's mask. So, get rid of it. */ delete_rt: + /* + * Safe to drop rt_lock and use rt_key, since holding rnh_lock here + * prevents another thread from calling rt_setgate() on this route. + */ + RT_UNLOCK(rt); return (rtrequest_locked(RTM_DELETE, rt_key(rt), NULL, rt_mask(rt), rt->rt_flags, NULL)); } +/* + * Round up sockaddr len to multiples of 32-bytes. This will reduce + * or even eliminate the need to re-allocate the chunk of memory used + * for rt_key and rt_gateway in the event the gateway portion changes. + * Certain code paths (e.g. IPSec) are notorious for caching the address + * of rt_gateway; this rounding-up would help ensure that the gateway + * portion never gets deallocated (though it may change contents) and + * thus greatly simplifies things. + */ +#define SA_SIZE(x) (-(-((uintptr_t)(x)) & -(32))) + +/* + * Sets the gateway and/or gateway route portion of a route; may be + * called on an existing route to modify the gateway portion. Both + * rt_key and rt_gateway are allocated out of the same memory chunk. + * Route entry lock must be held by caller; this routine will return + * with the lock held. + */ int rt_setgate(struct rtentry *rt, struct sockaddr *dst, struct sockaddr *gate) { - int dlen = ROUNDUP(dst->sa_len), glen = ROUNDUP(gate->sa_len); + int dlen = SA_SIZE(dst->sa_len), glen = SA_SIZE(gate->sa_len); struct radix_node_head *rnh = rt_tables[dst->sa_family]; - lck_mtx_assert(rt_mtx, LCK_MTX_ASSERT_OWNED); + lck_mtx_assert(rnh_lock, LCK_MTX_ASSERT_OWNED); + RT_LOCK_ASSERT_HELD(rt); + + /* + * If this is for a route that is on its way of being removed, + * or is temporarily frozen, reject the modification request. + */ + if (rt->rt_flags & RTF_CONDEMNED) + return (EBUSY); + + /* Add an extra ref for ourselves */ + RT_ADDREF_LOCKED(rt); /* * A host route with the destination equal to the gateway @@ -1584,9 +1952,19 @@ rt_setgate(struct rtentry *rt, struct sockaddr *dst, struct sockaddr *gate) * The route might already exist if this is an RTM_CHANGE * or a routing redirect, so try to delete it. */ - if (rt_key(rt)) - rtrequest_locked(RTM_DELETE, rt_key(rt), + if (rt_key(rt) != NULL) { + /* + * Safe to drop rt_lock and use rt_key, rt_gateway, + * since holding rnh_lock here prevents another thread + * from calling rt_setgate() on this route. + */ + RT_UNLOCK(rt); + (void) rtrequest_locked(RTM_DELETE, rt_key(rt), rt->rt_gateway, rt_mask(rt), rt->rt_flags, NULL); + RT_LOCK(rt); + } + /* Release extra ref */ + RT_REMREF_LOCKED(rt); return (EADDRNOTAVAIL); } @@ -1601,7 +1979,11 @@ rt_setgate(struct rtentry *rt, struct sockaddr *dst, struct sockaddr *gate) ifscope = (dst->sa_family == AF_INET) ? sa_get_ifscope(dst) : IFSCOPE_NONE; + RT_UNLOCK(rt); gwrt = rtalloc1_scoped_locked(gate, 1, RTF_PRCLONING, ifscope); + if (gwrt != NULL) + RT_LOCK_ASSERT_NOTHELD(gwrt); + RT_LOCK(rt); /* * Cloning loop avoidance: @@ -1618,19 +2000,36 @@ rt_setgate(struct rtentry *rt, struct sockaddr *dst, struct sockaddr *gate) * for the gateway to be referred to by another route. */ if (gwrt == rt) { - rtunref(gwrt); + RT_REMREF_LOCKED(gwrt); + /* Release extra ref */ + RT_REMREF_LOCKED(rt); return (EADDRINUSE); /* failure */ } - /* If scoped, the gateway route must use the same interface */ + /* + * If scoped, the gateway route must use the same interface; + * we're holding rnh_lock now, so rt_gateway and rt_ifp of gwrt + * should not change and are freely accessible. + */ if (ifscope != IFSCOPE_NONE && (rt->rt_flags & RTF_IFSCOPE) && gwrt != NULL && gwrt->rt_ifp != NULL && gwrt->rt_ifp->if_index != ifscope) { - rtfree_locked(gwrt); + rtfree_locked(gwrt); /* rt != gwrt, no deadlock */ + /* Release extra ref */ + RT_REMREF_LOCKED(rt); return ((rt->rt_flags & RTF_HOST) ? EHOSTUNREACH : ENETUNREACH); } + /* Check again since we dropped the lock above */ + if (rt->rt_flags & RTF_CONDEMNED) { + if (gwrt != NULL) + rtfree_locked(gwrt); + /* Release extra ref */ + RT_REMREF_LOCKED(rt); + return (EBUSY); + } + if (rt->rt_gwroute != NULL) rtfree_locked(rt->rt_gwroute); rt->rt_gwroute = gwrt; @@ -1666,7 +2065,7 @@ rt_setgate(struct rtentry *rt, struct sockaddr *dst, struct sockaddr *gate) * to the right place. Otherwise, malloc a new block and update * the 'dst' address and point rt_gateway to the right place. */ - if (rt->rt_gateway == NULL || glen > ROUNDUP(rt->rt_gateway->sa_len)) { + if (rt->rt_gateway == NULL || glen > SA_SIZE(rt->rt_gateway->sa_len)) { caddr_t new; /* The underlying allocation is done with M_WAITOK set */ @@ -1675,6 +2074,8 @@ rt_setgate(struct rtentry *rt, struct sockaddr *dst, struct sockaddr *gate) if (rt->rt_gwroute != NULL) rtfree_locked(rt->rt_gwroute); rt->rt_gwroute = NULL; + /* Release extra ref */ + RT_REMREF_LOCKED(rt); return (ENOBUFS); } @@ -1683,7 +2084,8 @@ rt_setgate(struct rtentry *rt, struct sockaddr *dst, struct sockaddr *gate) * here to initialize a newly allocated route entry, in * which case rt_key(rt) is NULL (and so does rt_gateway). */ - Bcopy(dst, new, dlen); + bzero(new, dlen + glen); + Bcopy(dst, new, dst->sa_len); R_Free(rt_key(rt)); /* free old block; NULL is okay */ rt->rt_nodes->rn_key = new; rt->rt_gateway = (struct sockaddr *)(new + dlen); @@ -1692,7 +2094,7 @@ rt_setgate(struct rtentry *rt, struct sockaddr *dst, struct sockaddr *gate) /* * Copy the new gateway value into the memory chunk. */ - Bcopy(gate, rt->rt_gateway, glen); + Bcopy(gate, rt->rt_gateway, gate->sa_len); /* * For consistency between rt_gateway and rt_key(gwrt). @@ -1714,13 +2116,19 @@ rt_setgate(struct rtentry *rt, struct sockaddr *dst, struct sockaddr *gate) struct rtfc_arg arg; arg.rnh = rnh; arg.rt0 = rt; + RT_UNLOCK(rt); rnh->rnh_walktree_from(rnh, rt_key(rt), rt_mask(rt), rt_fixchange, &arg); + RT_LOCK(rt); } + /* Release extra ref */ + RT_REMREF_LOCKED(rt); return (0); } +#undef SA_SIZE + static void rt_maskedcopy(struct sockaddr *src, struct sockaddr *dst, struct sockaddr *netmask) @@ -1819,7 +2227,7 @@ rt_lookup(boolean_t lookup_only, struct sockaddr *dst, struct sockaddr *netmask, struct radix_node *rn0, *rn; boolean_t dontcare = (ifscope == IFSCOPE_NONE); - lck_mtx_assert(rt_mtx, LCK_MTX_ASSERT_OWNED); + lck_mtx_assert(rnh_lock, LCK_MTX_ASSERT_OWNED); if (!lookup_only) netmask = NULL; @@ -1832,7 +2240,25 @@ rt_lookup(boolean_t lookup_only, struct sockaddr *dst, struct sockaddr *netmask, rn = rnh->rnh_lookup(dst, netmask, rnh); else rn = rnh->rnh_matchaddr(dst, rnh); - goto done; + + /* + * Don't return a root node; also, rnh_matchaddr callback + * would have done the necessary work to clear RTPRF_OURS + * for certain protocol families. + */ + if (rn != NULL && (rn->rn_flags & RNF_ROOT)) + rn = NULL; + if (rn != NULL) { + RT_LOCK_SPIN(RT(rn)); + if (!(RT(rn)->rt_flags & RTF_CONDEMNED)) { + RT_ADDREF_LOCKED(RT(rn)); + RT_UNLOCK(RT(rn)); + } else { + RT_UNLOCK(RT(rn)); + rn = NULL; + } + } + return (RT(rn)); } /* @@ -1851,7 +2277,7 @@ rt_lookup(boolean_t lookup_only, struct sockaddr *dst, struct sockaddr *netmask, * any reason, there is no primary interface, return what we have. */ if (dontcare && (ifscope = get_primary_ifscope()) == IFSCOPE_NONE) - goto validate; + goto done; /* * Keep the original result if either of the following is true: @@ -1861,7 +2287,8 @@ rt_lookup(boolean_t lookup_only, struct sockaddr *dst, struct sockaddr *netmask, * 2) The route uses the loopback interface, in which case the * destination (host/net) is local/loopback. * - * Otherwise, do a more specified search using the scope. + * Otherwise, do a more specified search using the scope; + * we're holding rnh_lock now, so rt_ifp should not change. */ if (rn != NULL) { struct rtentry *rt = RT(rn); @@ -1924,15 +2351,26 @@ rt_lookup(boolean_t lookup_only, struct sockaddr *dst, struct sockaddr *netmask, RT(rn)->rt_ifp->if_index != ifscope) rn = NULL; -validate: - if (rn != NULL && !lookup_only) - (void) in_validate(rn); - done: - if (rn != NULL && (rn->rn_flags & RNF_ROOT)) - rn = NULL; - else if (rn != NULL) - rtref(RT(rn)); + if (rn != NULL) { + /* + * Manually clear RTPRF_OURS using in_validate() and + * bump up the reference count after, and not before; + * we only get here for AF_INET. node_lookup() has + * done the check against RNF_ROOT, so we can be sure + * that we're not returning a root node here. + */ + RT_LOCK_SPIN(RT(rn)); + if (!(RT(rn)->rt_flags & RTF_CONDEMNED)) { + if (!lookup_only) + (void) in_validate(rn); + RT_ADDREF_LOCKED(RT(rn)); + RT_UNLOCK(RT(rn)); + } else { + RT_UNLOCK(RT(rn)); + rn = NULL; + } + } return (RT(rn)); } @@ -1945,10 +2383,10 @@ int rtinit(struct ifaddr *ifa, int cmd, int flags) { int error; - lck_mtx_assert(rt_mtx, LCK_MTX_ASSERT_NOTOWNED); - lck_mtx_lock(rt_mtx); + lck_mtx_assert(rnh_lock, LCK_MTX_ASSERT_NOTOWNED); + lck_mtx_lock(rnh_lock); error = rtinit_locked(ifa, cmd, flags); - lck_mtx_unlock(rt_mtx); + lck_mtx_unlock(rnh_lock); return (error); } @@ -1989,15 +2427,17 @@ rtinit_locked(struct ifaddr *ifa, int cmd, int flags) * We set "report" to FALSE so that if it doesn't exist, * it doesn't report an error or clone a route, etc. etc. */ - rt = rtalloc1_locked(dst, 0, 0UL); + rt = rtalloc1_locked(dst, 0, 0); if (rt) { /* * Ok so we found the rtentry. it has an extra reference * for us at this stage. we won't need that so * lop that off now. */ - rtunref(rt); + RT_LOCK_SPIN(rt); if (rt->rt_ifa != ifa) { + RT_REMREF_LOCKED(rt); + RT_UNLOCK(rt); /* * If the interface in the rtentry doesn't match * the interface we are using, then we don't @@ -2009,6 +2449,9 @@ rtinit_locked(struct ifaddr *ifa, int cmd, int flags) (void) m_free(m); return (flags & RTF_HOST ? EHOSTUNREACH : ENETUNREACH); + } else { + RT_REMREF_LOCKED(rt); + RT_UNLOCK(rt); } } /* XXX */ @@ -2019,7 +2462,7 @@ rtinit_locked(struct ifaddr *ifa, int cmd, int flags) * it doesn't exist, we could just return at this point * with an "ELSE" clause, but apparently not.. */ - lck_mtx_unlock(rt_mtx); + lck_mtx_unlock(rnh_lock); return (flags & RTF_HOST ? EHOSTUNREACH : ENETUNREACH); } @@ -2038,11 +2481,13 @@ rtinit_locked(struct ifaddr *ifa, int cmd, int flags) */ if (cmd == RTM_DELETE && error == 0 && (rt = nrt)) { /* - * notify any listenning routing agents of the change + * notify any listening routing agents of the change */ + RT_LOCK(rt); rt_newaddrmsg(cmd, ifa, error, nrt); if (use_routegenid) - route_generation++; + routegenid_update(); + RT_UNLOCK(rt); rtfree_locked(rt); } @@ -2051,6 +2496,7 @@ rtinit_locked(struct ifaddr *ifa, int cmd, int flags) * We need to sanity check the result. */ if (cmd == RTM_ADD && error == 0 && (rt = nrt)) { + RT_LOCK(rt); /* * If it came back with an unexpected interface, then it must * have already existed or something. (XXX) @@ -2089,7 +2535,7 @@ rtinit_locked(struct ifaddr *ifa, int cmd, int flags) */ rt_newaddrmsg(cmd, ifa, error, nrt); if (use_routegenid) - route_generation++; + routegenid_update(); /* * We just wanted to add it; we don't actually need a * reference. This will result in a route that's added @@ -2097,12 +2543,68 @@ rtinit_locked(struct ifaddr *ifa, int cmd, int flags) * RTM_DELETE code will do the necessary step to adjust * the reference count at deletion time. */ - rtunref(rt); + RT_REMREF_LOCKED(rt); + RT_UNLOCK(rt); } return (error); } -struct rtentry * +static void +rte_lock_init(struct rtentry *rt) +{ + lck_mtx_init(&rt->rt_lock, rte_mtx_grp, rte_mtx_attr); +} + +static void +rte_lock_destroy(struct rtentry *rt) +{ + RT_LOCK_ASSERT_NOTHELD(rt); + lck_mtx_destroy(&rt->rt_lock, rte_mtx_grp); +} + +void +rt_lock(struct rtentry *rt, boolean_t spin) +{ + RT_LOCK_ASSERT_NOTHELD(rt); + if (spin) + lck_mtx_lock_spin(&rt->rt_lock); + else + lck_mtx_lock(&rt->rt_lock); + if (rte_debug & RTD_DEBUG) + rte_lock_debug((struct rtentry_dbg *)rt); +} + +void +rt_unlock(struct rtentry *rt) +{ + RT_LOCK_ASSERT_HELD(rt); + if (rte_debug & RTD_DEBUG) + rte_unlock_debug((struct rtentry_dbg *)rt); + lck_mtx_unlock(&rt->rt_lock); + +} + +static inline void +rte_lock_debug(struct rtentry_dbg *rte) +{ + uint32_t idx; + + idx = atomic_add_32_ov(&rte->rtd_lock_cnt, 1) % CTRACE_HIST_SIZE; + if (rte_debug & RTD_TRACE) + ctrace_record(&rte->rtd_lock[idx]); +} + +static inline void +rte_unlock_debug(struct rtentry_dbg *rte) +{ + uint32_t idx; + + idx = atomic_add_32_ov(&rte->rtd_unlock_cnt, 1) % CTRACE_HIST_SIZE; + if (rte_debug & RTD_TRACE) + ctrace_record(&rte->rtd_unlock[idx]); +} + +static struct rtentry * rte_alloc(void) { if (rte_debug & RTD_DEBUG) @@ -2111,7 +2613,7 @@ rte_alloc(void) return ((struct rtentry *)zalloc(rte_zone)); } -void +static void rte_free(struct rtentry *p) { if (rte_debug & RTD_DEBUG) { @@ -2133,11 +2635,8 @@ rte_alloc_debug(void) rte = ((struct rtentry_dbg *)zalloc(rte_zone)); if (rte != NULL) { bzero(rte, sizeof (*rte)); - if (rte_debug & RTD_TRACE) { - rte->rtd_alloc_thread = current_thread(); - (void) OSBacktrace(rte->rtd_alloc_stk_pc, - RTD_TRSTACK_SIZE); - } + if (rte_debug & RTD_TRACE) + ctrace_record(&rte->rtd_alloc); rte->rtd_inuse = RTD_INUSE; } return ((struct rtentry *)rte); @@ -2157,15 +2656,22 @@ rte_free_debug(struct rtentry *p) panic("rte_free: corrupted rte=%p\n", rte); bcopy((caddr_t)p, (caddr_t)&rte->rtd_entry_saved, sizeof (*p)); - bzero((caddr_t)p, sizeof (*p)); + /* Preserve rt_lock to help catch use-after-free cases */ + bzero((caddr_t)p, offsetof(struct rtentry, rt_lock)); rte->rtd_inuse = RTD_FREED; - if (rte_debug & RTD_TRACE) { - rte->rtd_free_thread = current_thread(); - (void) OSBacktrace(rte->rtd_free_stk_pc, RTD_TRSTACK_SIZE); - } + if (rte_debug & RTD_TRACE) + ctrace_record(&rte->rtd_free); if (!(rte_debug & RTD_NO_FREE)) zfree(rte_zone, p); } + +void +ctrace_record(ctrace_t *tr) +{ + tr->th = current_thread(); + bzero(tr->pc, sizeof (tr->pc)); + (void) OSBacktrace(tr->pc, CTRACE_STACK_SIZE); +} diff --git a/bsd/net/route.h b/bsd/net/route.h index cfc95a6aa..b2bcea3b8 100644 --- a/bsd/net/route.h +++ b/bsd/net/route.h @@ -64,6 +64,7 @@ #ifndef _NET_ROUTE_H_ #define _NET_ROUTE_H_ #include +#include #include #include @@ -82,10 +83,14 @@ #ifdef PRIVATE struct rtentry; struct route { - struct rtentry *ro_rt; - struct sockaddr ro_dst; - u_int32_t ro_flags; /* route flags (see below) */ - u_int32_t reserved; /* for future use if needed */ + /* + * N.B: struct route must begin with ro_rt and ro_flags + * because the code does some casts of a 'struct route_in6 *' + * to a 'struct route *'. + */ + struct rtentry *ro_rt; + uint32_t ro_flags; /* route flags (see below) */ + struct sockaddr ro_dst; }; #define ROF_SRCIF_SELECTED 0x1 /* source interface was selected */ @@ -117,10 +122,6 @@ struct rt_metrics { */ #define RTM_RTTUNIT 1000000 /* units for rtt, rttvar, as units per sec */ -/* - * XXX kernel function pointer `rt_output' is visible to applications. - */ - /* * We distinguish between routes to hosts and routes to networks, * preferring the former if available. For each route we infer @@ -129,51 +130,40 @@ struct rt_metrics { * gateways are marked so that the output routines know to address the * gateway rather than the ultimate destination. */ -#ifdef PRIVATE +#ifdef KERNEL_PRIVATE +#include #ifndef RNF_NORMAL #include #endif +/* + * Kernel routing entry structure (private). + */ struct rtentry { struct radix_node rt_nodes[2]; /* tree glue, and other values */ #define rt_key(r) ((struct sockaddr *)((r)->rt_nodes->rn_key)) #define rt_mask(r) ((struct sockaddr *)((r)->rt_nodes->rn_mask)) struct sockaddr *rt_gateway; /* value */ int32_t rt_refcnt; /* # held references */ - u_long rt_flags; /* up/down?, host/net */ + uint32_t rt_flags; /* up/down?, host/net */ struct ifnet *rt_ifp; /* the answer: interface to use */ - u_long rt_dlt; /* DLIL dl_tag */ - struct ifaddr *rt_ifa; /* the answer: interface to use */ + struct ifaddr *rt_ifa; /* the answer: interface addr to use */ struct sockaddr *rt_genmask; /* for generation of cloned routes */ - caddr_t rt_llinfo; /* pointer to link level info cache */ + void *rt_llinfo; /* pointer to link level info cache */ + void (*rt_llinfo_free)(void *); /* link level info free function */ struct rt_metrics rt_rmx; /* metrics used by rx'ing protocols */ struct rtentry *rt_gwroute; /* implied entry for gatewayed routes */ - int (*rt_output)(struct ifnet *, struct mbuf *, - struct sockaddr *, struct rtentry *); - /* output routine for this (rt,if) */ struct rtentry *rt_parent; /* cloning parent of this route */ - u_long generation_id; /* route generation id */ -}; -#endif /* PRIVATE */ - -#ifdef __APPLE_API_OBSOLETE -/* - * Following structure necessary for 4.3 compatibility; - * We should eventually move it to a compat file. - */ -struct ortentry { - u_long rt_hash; /* to speed lookups */ - struct sockaddr rt_dst; /* key */ - struct sockaddr rt_gateway; /* value */ - short rt_flags; /* up/down?, host/net */ - short rt_refcnt; /* # held references */ - u_long rt_use; /* raw # packets forwarded */ - struct ifnet *rt_ifp; /* the answer: interface to use */ + uint32_t generation_id; /* route generation id */ + /* + * See bsd/net/route.c for synchronization notes. + */ + decl_lck_mtx_data(, rt_lock); /* lock for routing entry */ }; -#endif /* __APPLE_API_OBSOLETE */ +#endif /* KERNEL_PRIVATE */ -#ifdef PRIVATE +#ifdef KERNEL_PRIVATE #define rt_use rt_rmx.rmx_pksent -#endif /* PRIVATE */ +#endif /* KERNEL_PRIVATE */ #define RTF_UP 0x1 /* route usable */ #define RTF_GATEWAY 0x2 /* destination is a gateway */ @@ -200,7 +190,8 @@ struct ortentry { #define RTF_BROADCAST 0x400000 /* route represents a bcast address */ #define RTF_MULTICAST 0x800000 /* route represents a mcast address */ #define RTF_IFSCOPE 0x1000000 /* has valid interface scope */ - /* 0x2000000 and up unassigned */ +#define RTF_CONDEMNED 0x2000000 /* defunct; no longer modifiable */ + /* 0x4000000 and up unassigned */ /* * Routing statistics. @@ -270,7 +261,7 @@ struct rt_msghdr2 { #define RTM_DELMADDR 0x10 /* mcast group membership being deleted */ #ifdef PRIVATE #define RTM_GET_SILENT 0x11 -#endif PRIVATE +#endif /* PRIVATE */ #define RTM_IFINFO2 0x12 /* */ #define RTM_NEWMADDR2 0x13 /* */ #define RTM_GET2 0x14 /* */ @@ -326,22 +317,115 @@ struct route_cb { int any_count; }; -#ifdef KERNEL_PRIVATE +#ifdef PRIVATE /* * For scoped routing; a zero interface scope value means nil/no scope. */ #define IFSCOPE_NONE 0 +#endif /* PRIVATE */ + +#ifdef KERNEL_PRIVATE +/* + * Generic call trace used by some subsystems (e.g. route, ifaddr) + */ +#define CTRACE_STACK_SIZE 8 /* depth of stack trace */ +#define CTRACE_HIST_SIZE 4 /* refcnt history size */ +typedef struct ctrace { + void *th; /* thread ptr */ + void *pc[CTRACE_STACK_SIZE]; /* PC stack trace */ +} ctrace_t; + +extern void ctrace_record(ctrace_t *); + +#define RT_LOCK_ASSERT_HELD(_rt) \ + lck_mtx_assert(&(_rt)->rt_lock, LCK_MTX_ASSERT_OWNED) + +#define RT_LOCK_ASSERT_NOTHELD(_rt) \ + lck_mtx_assert(&(_rt)->rt_lock, LCK_MTX_ASSERT_NOTOWNED) + +#define RT_LOCK(_rt) do { \ + if (!rte_debug) \ + lck_mtx_lock(&(_rt)->rt_lock); \ + else \ + rt_lock(_rt, FALSE); \ +} while (0) + +#define RT_LOCK_SPIN(_rt) do { \ + if (!rte_debug) \ + lck_mtx_lock_spin(&(_rt)->rt_lock); \ + else \ + rt_lock(_rt, TRUE); \ +} while (0) + +#define RT_CONVERT_LOCK(_rt) do { \ + RT_LOCK_ASSERT_HELD(_rt); \ + lck_mtx_convert_spin(&(_rt)->rt_lock); \ +} while (0) + +#define RT_UNLOCK(_rt) do { \ + if (!rte_debug) \ + lck_mtx_unlock(&(_rt)->rt_lock); \ + else \ + rt_unlock(_rt); \ +} while (0) + +#define RT_ADDREF_LOCKED(_rt) do { \ + if (!rte_debug) { \ + RT_LOCK_ASSERT_HELD(_rt); \ + if (++(_rt)->rt_refcnt == 0) \ + panic("RT_ADDREF(%p) bad refcnt\n", _rt); \ + } else { \ + rtref(_rt); \ + } \ +} while (0) + +/* + * Spin variant mutex is used here; caller is responsible for + * converting any previously-held similar lock to full mutex. + */ +#define RT_ADDREF(_rt) do { \ + RT_LOCK_SPIN(_rt); \ + RT_ADDREF_LOCKED(_rt); \ + RT_UNLOCK(_rt); \ +} while (0) + +#define RT_REMREF_LOCKED(_rt) do { \ + if (!rte_debug) { \ + RT_LOCK_ASSERT_HELD(_rt); \ + if ((_rt)->rt_refcnt == 0) \ + panic("RT_REMREF(%p) bad refcnt\n", _rt); \ + --(_rt)->rt_refcnt; \ + } else { \ + (void) rtunref(_rt); \ + } \ +} while (0) + +/* + * Spin variant mutex is used here; caller is responsible for + * converting any previously-held similar lock to full mutex. + */ +#define RT_REMREF(_rt) do { \ + RT_LOCK_SPIN(_rt); \ + RT_REMREF_LOCKED(_rt); \ + RT_UNLOCK(_rt); \ +} while (0) + +#define RTFREE(_rt) rtfree(_rt) +#define RTFREE_LOCKED(_rt) rtfree_locked(_rt) -#define RTFREE(rt) rtfree(rt) extern struct route_cb route_cb; extern struct radix_node_head *rt_tables[AF_MAX+1]; +__private_extern__ lck_mtx_t *rnh_lock; +__private_extern__ int use_routegenid; +__private_extern__ uint32_t route_generation; +__private_extern__ int rttrash; +__private_extern__ unsigned int rte_debug; struct ifmultiaddr; struct proc; -__private_extern__ int rttrash; - extern void route_init(void) __attribute__((section("__TEXT, initcode"))); +extern void routegenid_update(void); extern void rt_ifmsg(struct ifnet *); extern void rt_missmsg(int, struct rt_addrinfo *, int, int); extern void rt_newaddrmsg(int, struct ifaddr *, int, struct rtentry *); @@ -353,13 +437,16 @@ extern boolean_t rt_inet_default(struct rtentry *, struct sockaddr *); extern struct rtentry *rt_lookup(boolean_t, struct sockaddr *, struct sockaddr *, struct radix_node_head *, unsigned int); extern void rtalloc(struct route *); -extern void rtalloc_ign(struct route *, u_long); -extern void rtalloc_ign_locked(struct route *, u_long); -extern void rtalloc_scoped_ign_locked(struct route *, u_long, unsigned int); -extern struct rtentry *rtalloc1(struct sockaddr *, int, u_long); -extern struct rtentry *rtalloc1_locked(struct sockaddr *, int, u_long); +extern void rtalloc_ign(struct route *, uint32_t); +extern void rtalloc_ign_locked(struct route *, uint32_t); +extern void rtalloc_scoped_ign(struct route *, uint32_t, unsigned int); +extern void rtalloc_scoped_ign_locked(struct route *, uint32_t, unsigned int); +extern struct rtentry *rtalloc1(struct sockaddr *, int, uint32_t); +extern struct rtentry *rtalloc1_locked(struct sockaddr *, int, uint32_t); +extern struct rtentry *rtalloc1_scoped(struct sockaddr *, int, uint32_t, + unsigned int); extern struct rtentry *rtalloc1_scoped_locked(struct sockaddr *, int, - u_long, unsigned int); + uint32_t, unsigned int); extern void rtfree(struct rtentry *); extern void rtfree_locked(struct rtentry *); extern void rtref(struct rtentry *); @@ -368,11 +455,11 @@ extern void rtref(struct rtentry *); * the refcount has reached zero and the route is not up. * Unless you have good reason to do otherwise, use rtfree. */ -extern void rtunref(struct rtentry *); +extern int rtunref(struct rtentry *); extern void rtsetifa(struct rtentry *, struct ifaddr *); extern int rtinit(struct ifaddr *, int, int); extern int rtinit_locked(struct ifaddr *, int, int); -extern int rtioctl(int, caddr_t, struct proc *); +extern int rtioctl(unsigned long, caddr_t, struct proc *); extern void rtredirect(struct ifnet *, struct sockaddr *, struct sockaddr *, struct sockaddr *, int, struct sockaddr *, struct rtentry **); extern int rtrequest(int, struct sockaddr *, @@ -381,9 +468,11 @@ extern int rtrequest_locked(int, struct sockaddr *, struct sockaddr *, struct sockaddr *, int, struct rtentry **); extern int rtrequest_scoped_locked(int, struct sockaddr *, struct sockaddr *, struct sockaddr *, int, struct rtentry **, unsigned int); -extern struct rtentry *rte_alloc(void); -extern void rte_free(struct rtentry *); extern unsigned int sa_get_ifscope(struct sockaddr *); -#endif KERNEL_PRIVATE +extern void rt_lock(struct rtentry *, boolean_t); +extern void rt_unlock(struct rtentry *); +extern struct sockaddr *rtm_scrub_ifscope(int, struct sockaddr *, + struct sockaddr *, struct sockaddr_storage *); +#endif /* KERNEL_PRIVATE */ #endif diff --git a/bsd/net/rtsock.c b/bsd/net/rtsock.c index b6836fd6c..c7b69413c 100644 --- a/bsd/net/rtsock.c +++ b/bsd/net/rtsock.c @@ -83,13 +83,10 @@ #include extern struct rtstat rtstat; -extern u_long route_generation; -extern int use_routegenid; extern int check_routeselfref; MALLOC_DEFINE(M_RTABLE, "routetbl", "routing tables"); -extern lck_mtx_t *rt_mtx; static struct sockaddr route_dst = { 2, PF_ROUTE, { 0, } }; static struct sockaddr route_src = { 2, PF_ROUTE, { 0, } }; static struct sockaddr sa_zero = { sizeof(sa_zero), AF_INET, { 0, } }; @@ -102,15 +99,14 @@ struct walkarg { struct sysctl_req *w_req; }; -static struct mbuf * - rt_msg1(int, struct rt_addrinfo *); +static struct mbuf *rt_msg1(int, struct rt_addrinfo *); static int rt_msg2(int, struct rt_addrinfo *, caddr_t, struct walkarg *); static int rt_xaddrs(caddr_t, caddr_t, struct rt_addrinfo *); static int sysctl_dumpentry(struct radix_node *rn, void *vw); static int sysctl_iflist(int af, struct walkarg *w); static int sysctl_iflist2(int af, struct walkarg *w); static int route_output(struct mbuf *, struct socket *); -static void rt_setmetrics(u_long, struct rt_metrics *, struct rt_metrics *); +static void rt_setmetrics(u_int32_t, struct rt_metrics *, struct rt_metrics *); static void rt_setif(struct rtentry *, struct sockaddr *, struct sockaddr *, struct sockaddr *, unsigned int); @@ -315,14 +311,14 @@ route_output(struct mbuf *m, struct socket *so) #define senderr(e) { error = e; goto flush;} if (m == NULL || - ((m->m_len < sizeof(long)) && (m = m_pullup(m, sizeof(long))) == 0)) + ((m->m_len < sizeof(intptr_t)) && (m = m_pullup(m, sizeof(intptr_t))) == 0)) return (ENOBUFS); if ((m->m_flags & M_PKTHDR) == 0) panic("route_output"); /* unlock the socket (but keep a reference) it won't be accessed until raw_input appends to it. */ socket_unlock(so, 0); - lck_mtx_lock(rt_mtx); + lck_mtx_lock(rnh_lock); len = m->m_pkthdr.len; if (len < sizeof(*rtm) || @@ -453,6 +449,7 @@ route_output(struct mbuf *m, struct socket *so) error = rtrequest_scoped_locked(RTM_ADD, dst, gate, netmask, rtm->rtm_flags, &saved_nrt, ifscope); if (error == 0 && saved_nrt) { + RT_LOCK(saved_nrt); #ifdef __APPLE__ /* * If the route request specified an interface with @@ -491,7 +488,8 @@ route_output(struct mbuf *m, struct socket *so) saved_nrt->rt_rmx.rmx_locks |= (rtm->rtm_inits & rtm->rtm_rmx.rmx_locks); saved_nrt->rt_genmask = genmask; - rtunref(saved_nrt); + RT_REMREF_LOCKED(saved_nrt); + RT_UNLOCK(saved_nrt); } break; @@ -500,6 +498,7 @@ route_output(struct mbuf *m, struct socket *so) gate, netmask, rtm->rtm_flags, &saved_nrt, ifscope); if (error == 0) { rt = saved_nrt; + RT_LOCK(rt); goto report; } break; @@ -517,12 +516,15 @@ route_output(struct mbuf *m, struct socket *so) rt = rt_lookup(TRUE, dst, netmask, rnh, ifscope); if (rt == NULL) senderr(ESRCH); + RT_LOCK(rt); switch(rtm->rtm_type) { case RTM_GET: { struct ifaddr *ifa2; report: + ifa2 = NULL; + RT_LOCK_ASSERT_HELD(rt); dst = rt_key(rt); gate = rt->rt_gateway; netmask = rt_mask(rt); @@ -533,6 +535,7 @@ route_output(struct mbuf *m, struct socket *so) ifnet_lock_shared(ifp); ifa2 = ifp->if_addrhead.tqh_first; ifpaddr = ifa2->ifa_addr; + ifaref(ifa2); ifnet_lock_done(ifp); ifaaddr = rt->rt_ifa->ifa_addr; rtm->rtm_index = ifp->if_index; @@ -547,6 +550,9 @@ route_output(struct mbuf *m, struct socket *so) struct rt_msghdr *new_rtm; R_Malloc(new_rtm, struct rt_msghdr *, len); if (new_rtm == 0) { + RT_UNLOCK(rt); + if (ifa2 != NULL) + ifafree(ifa2); senderr(ENOBUFS); } Bcopy(rtm, new_rtm, rtm->rtm_msglen); @@ -557,13 +563,17 @@ route_output(struct mbuf *m, struct socket *so) rtm->rtm_flags = rt->rt_flags; rtm->rtm_rmx = rt->rt_rmx; rtm->rtm_addrs = info.rti_addrs; + if (ifa2 != NULL) + ifafree(ifa2); } break; case RTM_CHANGE: - if (gate && (error = rt_setgate(rt, rt_key(rt), gate))) + if (gate && (error = rt_setgate(rt, + rt_key(rt), gate))) { + RT_UNLOCK(rt); senderr(error); - + } /* * If they tried to change things but didn't specify * the required gateway, then just use the old one. @@ -602,6 +612,7 @@ route_output(struct mbuf *m, struct socket *so) (rtm->rtm_inits & rtm->rtm_rmx.rmx_locks); break; } + RT_UNLOCK(rt); break; default: @@ -614,9 +625,11 @@ route_output(struct mbuf *m, struct socket *so) else rtm->rtm_flags |= RTF_DONE; } - if (rt) + if (rt != NULL) { + RT_LOCK_ASSERT_NOTHELD(rt); rtfree_locked(rt); - lck_mtx_unlock(rt_mtx); + } + lck_mtx_unlock(rnh_lock); socket_lock(so, 0); /* relock the socket now */ { struct rawcb *rp = 0; @@ -667,7 +680,7 @@ route_output(struct mbuf *m, struct socket *so) } static void -rt_setmetrics(u_long which, struct rt_metrics *in, struct rt_metrics *out) +rt_setmetrics(u_int32_t which, struct rt_metrics *in, struct rt_metrics *out) { #define metric(f, e) if (which & (f)) out->e = in->e; metric(RTV_RPIPE, rmx_recvpipe); @@ -691,11 +704,20 @@ rt_setif(struct rtentry *rt, struct sockaddr *Ifpaddr, struct sockaddr *Ifaaddr, struct ifaddr *ifa = 0; struct ifnet *ifp = 0; - lck_mtx_assert(rt_mtx, LCK_MTX_ASSERT_OWNED); + lck_mtx_assert(rnh_lock, LCK_MTX_ASSERT_OWNED); + + RT_LOCK_ASSERT_HELD(rt); /* trigger route cache reevaluation */ if (use_routegenid) - route_generation++; + routegenid_update(); + + /* Don't update a defunct route */ + if (rt->rt_flags & RTF_CONDEMNED) + return; + + /* Add an extra ref for ourselves */ + RT_ADDREF_LOCKED(rt); /* * New gateway could require new ifaddr, ifp; flags may also @@ -717,16 +739,34 @@ rt_setif(struct rtentry *rt, struct sockaddr *Ifpaddr, struct sockaddr *Ifaaddr, } else { ifnet_lock_shared(ifp); ifa = TAILQ_FIRST(&ifp->if_addrhead); - ifaref(ifa); + if (ifa != NULL) + ifaref(ifa); ifnet_lock_done(ifp); } } else if (Ifaaddr && (ifa = ifa_ifwithaddr_scoped(Ifaaddr, ifscope))) { ifp = ifa->ifa_ifp; - } else if (Gate && - (ifa = ifa_ifwithroute_scoped_locked(rt->rt_flags, - rt_key(rt), Gate, ifscope))) { - ifp = ifa->ifa_ifp; + } else if (Gate != NULL) { + /* + * Safe to drop rt_lock and use rt_key, since holding + * rnh_lock here prevents another thread from calling + * rt_setgate() on this route. We cannot hold the + * lock across ifa_ifwithroute since the lookup done + * by that routine may point to the same route. + */ + RT_UNLOCK(rt); + if ((ifa = ifa_ifwithroute_scoped_locked(rt->rt_flags, + rt_key(rt), Gate, ifscope)) != NULL) + ifp = ifa->ifa_ifp; + RT_LOCK(rt); + /* Don't update a defunct route */ + if (rt->rt_flags & RTF_CONDEMNED) { + if (ifa != NULL) + ifafree(ifa); + /* Release extra ref */ + RT_REMREF_LOCKED(rt); + return; + } } } if (ifa) { @@ -745,23 +785,25 @@ rt_setif(struct rtentry *rt, struct sockaddr *Ifpaddr, struct sockaddr *Ifaaddr, rt->rt_rmx.rmx_mtu = ifp->if_mtu; if (rt->rt_ifa && rt->rt_ifa->ifa_rtrequest) rt->rt_ifa->ifa_rtrequest(RTM_ADD, rt, Gate); - } else { ifafree(ifa); - goto call_ifareq; + /* Release extra ref */ + RT_REMREF_LOCKED(rt); + return; } ifafree(ifa); - return; } -call_ifareq: + /* XXX: to reset gateway to correct value, at RTM_CHANGE */ if (rt->rt_ifa && rt->rt_ifa->ifa_rtrequest) rt->rt_ifa->ifa_rtrequest(RTM_ADD, rt, Gate); -} + /* Release extra ref */ + RT_REMREF_LOCKED(rt); +} -#define ROUNDUP(a) \ - ((a) > 0 ? (1 + (((a) - 1) | (sizeof(long) - 1))) : sizeof(long)) -#define ADVANCE(x, n) (x += ROUNDUP((n)->sa_len)) +#define ROUNDUP32(a) \ + ((a) > 0 ? (1 + (((a) - 1) | (sizeof(uint32_t) - 1))) : sizeof(uint32_t)) +#define ADVANCE32(x, n) (x += ROUNDUP32((n)->sa_len)) /* @@ -801,20 +843,17 @@ rt_xaddrs(caddr_t cp, caddr_t cplim, struct rt_addrinfo *rtinfo) /* accept it */ rtinfo->rti_info[i] = sa; - ADVANCE(cp, sa); + ADVANCE32(cp, sa); } return (0); } static struct mbuf * -rt_msg1( - int type, - struct rt_addrinfo *rtinfo) +rt_msg1(int type, struct rt_addrinfo *rtinfo) { struct rt_msghdr *rtm; struct mbuf *m; int i; - struct sockaddr *sa; int len, dlen; switch (type) { @@ -853,10 +892,28 @@ rt_msg1( rtm = mtod(m, struct rt_msghdr *); bzero((caddr_t)rtm, len); for (i = 0; i < RTAX_MAX; i++) { + struct sockaddr *sa, *hint; + struct sockaddr_storage ss; + if ((sa = rtinfo->rti_info[i]) == NULL) continue; + + switch (i) { + case RTAX_DST: + case RTAX_NETMASK: + if ((hint = rtinfo->rti_info[RTAX_DST]) == NULL) + hint = rtinfo->rti_info[RTAX_IFA]; + + /* Scrub away any trace of embedded interface scope */ + sa = rtm_scrub_ifscope(i, hint, sa, &ss); + break; + + default: + break; + } + rtinfo->rti_addrs |= (1 << i); - dlen = ROUNDUP(sa->sa_len); + dlen = ROUNDUP32(sa->sa_len); m_copyback(m, len, dlen, (caddr_t)sa); len += dlen; } @@ -914,12 +971,28 @@ rt_msg2(int type, struct rt_addrinfo *rtinfo, caddr_t cp, struct walkarg *w) if (cp0) cp += len; for (i = 0; i < RTAX_MAX; i++) { - struct sockaddr *sa; + struct sockaddr *sa, *hint; + struct sockaddr_storage ss; if ((sa = rtinfo->rti_info[i]) == 0) continue; + + switch (i) { + case RTAX_DST: + case RTAX_NETMASK: + if ((hint = rtinfo->rti_info[RTAX_DST]) == NULL) + hint = rtinfo->rti_info[RTAX_IFA]; + + /* Scrub away any trace of embedded interface scope */ + sa = rtm_scrub_ifscope(i, hint, sa, &ss); + break; + + default: + break; + } + rtinfo->rti_addrs |= (1 << i); - dlen = ROUNDUP(sa->sa_len); + dlen = ROUNDUP32(sa->sa_len); if (cp) { bcopy((caddr_t)sa, cp, (unsigned)dlen); cp += dlen; @@ -968,8 +1041,6 @@ rt_missmsg(int type, struct rt_addrinfo *rtinfo, int flags, int error) struct mbuf *m; struct sockaddr *sa = rtinfo->rti_info[RTAX_DST]; - lck_mtx_assert(rt_mtx, LCK_MTX_ASSERT_OWNED); - if (route_cb.any_count == 0) return; m = rt_msg1(type, rtinfo); @@ -1019,7 +1090,7 @@ rt_ifmsg( * copies of it. * * Since this is coming from the interface, it is expected that the - * interface will be locked. + * interface will be locked. Caller must hold rt_lock. */ void rt_newaddrmsg(int cmd, struct ifaddr *ifa, int error, struct rtentry *rt) @@ -1030,6 +1101,8 @@ rt_newaddrmsg(int cmd, struct ifaddr *ifa, int error, struct rtentry *rt) struct mbuf *m = 0; struct ifnet *ifp = ifa->ifa_ifp; + RT_LOCK_ASSERT_HELD(rt); + if (route_cb.any_count == 0) return; for (pass = 1; pass < 3; pass++) { @@ -1039,12 +1112,17 @@ rt_newaddrmsg(int cmd, struct ifaddr *ifa, int error, struct rtentry *rt) struct ifa_msghdr *ifam; int ncmd = cmd == RTM_ADD ? RTM_NEWADDR : RTM_DELADDR; + /* Lock ifp for if_addrhead */ + ifnet_lock_shared(ifp); ifaaddr = sa = ifa->ifa_addr; ifpaddr = ifp->if_addrhead.tqh_first->ifa_addr; netmask = ifa->ifa_netmask; brdaddr = ifa->ifa_dstaddr; - if ((m = rt_msg1(ncmd, &info)) == NULL) + if ((m = rt_msg1(ncmd, &info)) == NULL) { + ifnet_lock_done(ifp); continue; + } + ifnet_lock_done(ifp); ifam = mtod(m, struct ifa_msghdr *); ifam->ifam_index = ifp->if_index; ifam->ifam_metric = ifa->ifa_metric; @@ -1091,6 +1169,9 @@ rt_newmaddrmsg(int cmd, struct ifmultiaddr *ifma) bzero((caddr_t)&info, sizeof(info)); ifaaddr = ifma->ifma_addr; + /* Lock ifp for if_addrhead */ + if (ifp != NULL) + ifnet_lock_shared(ifp); if (ifp && ifp->if_addrhead.tqh_first) ifpaddr = ifp->if_addrhead.tqh_first->ifa_addr; else @@ -1100,8 +1181,13 @@ rt_newmaddrmsg(int cmd, struct ifmultiaddr *ifma) * (similarly to how ARP entries, e.g., are presented). */ gate = ifma->ifma_ll->ifma_addr; - if ((m = rt_msg1(cmd, &info)) == NULL) + if ((m = rt_msg1(cmd, &info)) == NULL) { + if (ifp != NULL) + ifnet_lock_done(ifp); return; + } + if (ifp != NULL) + ifnet_lock_done(ifp); ifmam = mtod(m, struct ifma_msghdr *); ifmam->ifmam_index = ifp ? ifp->if_index : 0; ifmam->ifmam_addrs = info.rti_addrs; @@ -1120,8 +1206,11 @@ sysctl_dumpentry(struct radix_node *rn, void *vw) int error = 0, size; struct rt_addrinfo info; - if (w->w_op == NET_RT_FLAGS && !(rt->rt_flags & w->w_arg)) + RT_LOCK(rt); + if (w->w_op == NET_RT_FLAGS && !(rt->rt_flags & w->w_arg)) { + RT_UNLOCK(rt); return 0; + } bzero((caddr_t)&info, sizeof(info)); dst = rt_key(rt); gate = rt->rt_gateway; @@ -1141,6 +1230,7 @@ sysctl_dumpentry(struct radix_node *rn, void *vw) rtm->rtm_errno = 0; rtm->rtm_addrs = info.rti_addrs; error = SYSCTL_OUT(w->w_req, (caddr_t)rtm, size); + RT_UNLOCK(rt); return (error); } } else { @@ -1160,10 +1250,12 @@ sysctl_dumpentry(struct radix_node *rn, void *vw) rtm->rtm_reserved = 0; rtm->rtm_addrs = info.rti_addrs; error = SYSCTL_OUT(w->w_req, (caddr_t)rtm, size); + RT_UNLOCK(rt); return (error); } } + RT_UNLOCK(rt); return (error); } @@ -1383,13 +1475,13 @@ sysctl_rtsock SYSCTL_HANDLER_ARGS case NET_RT_DUMP: case NET_RT_DUMP2: case NET_RT_FLAGS: - lck_mtx_lock(rt_mtx); + lck_mtx_lock(rnh_lock); for (i = 1; i <= AF_MAX; i++) if ((rnh = rt_tables[i]) && (af == 0 || af == i) && (error = rnh->rnh_walktree(rnh, sysctl_dumpentry, &w))) break; - lck_mtx_unlock(rt_mtx); + lck_mtx_unlock(rnh_lock); break; case NET_RT_IFLIST: error = sysctl_iflist(af, &w); @@ -1415,7 +1507,7 @@ SYSCTL_NODE(_net, PF_ROUTE, routetable, CTLFLAG_RD, sysctl_rtsock, ""); * Definitions of protocols supported in the ROUTE domain. */ -struct domain routedomain; /* or at least forward */ +extern struct domain routedomain; /* or at least forward */ static struct protosw routesw[] = { { SOCK_RAW, &routedomain, 0, PR_ATOMIC|PR_ADDR, diff --git a/bsd/netat/Makefile b/bsd/netat/Makefile index 501f46d93..9282cd445 100644 --- a/bsd/netat/Makefile +++ b/bsd/netat/Makefile @@ -26,29 +26,28 @@ COMP_SUBDIRS = \ INST_SUBDIRS = \ -DATAFILES = appletalk.h atp.h asp.h aurp.h \ - ddp.h ep.h lap.h nbp.h pap.h zip.h \ - adsp.h at_pat.h at_snmp.h at_aarp.h \ - rtmp.h +DATAFILES = PRIVATE_DATAFILES = \ - debug.h routing_tables.h sysglue.h at_var.h + appletalk.h atp.h asp.h aurp.h \ + ddp.h ep.h lap.h nbp.h pap.h zip.h \ + adsp.h at_pat.h at_snmp.h at_aarp.h \ + rtmp.h debug.h routing_tables.h sysglue.h at_var.h \ + adsp_internal.h at_ddp_brt.h at_pcb.h -PRIVATE_KERNELFILES = \ - adsp_internal.h at_ddp_brt.h at_pcb.h - +PRIVATE_KERNELFILES = -INSTALL_MI_LIST = ${DATAFILES} +INSTALL_MI_LIST = -INSTALL_MI_DIR = netat +INSTALL_MI_DIR = -EXPORT_MI_LIST = ${DATAFILES} +EXPORT_MI_LIST = EXPORT_MI_DIR = netat -INSTALL_MI_LCL_LIST = ${DATAFILES} ${PRIVATE_DATAFILES} +INSTALL_MI_LCL_LIST = -INSTALL_KF_MI_LCL_LIST = ${INSTALL_MI_LCL_LIST} ${PRIVATE_KERNELFILES} +INSTALL_KF_MI_LCL_LIST = include $(MakeInc_rule) include $(MakeInc_dir) diff --git a/bsd/netat/asp_proto.c b/bsd/netat/asp_proto.c index 7f12b749a..bd471ff5a 100644 --- a/bsd/netat/asp_proto.c +++ b/bsd/netat/asp_proto.c @@ -255,7 +255,7 @@ asp_close(gref) return 0; } /* asp_close */ -#ifdef DEBUG +#if DEBUG static const char *aspStateStr(int); diff --git a/bsd/netat/at_aarp.h b/bsd/netat/at_aarp.h index 28b03bcbe..e98b87ffc 100644 --- a/bsd/netat/at_aarp.h +++ b/bsd/netat/at_aarp.h @@ -30,7 +30,7 @@ #include #ifdef KERNEL_PRIVATE #include -#endif KERNEL_PRIVATE +#endif /* KERNEL_PRIVATE */ #ifdef __APPLE_API_OBSOLETE diff --git a/bsd/netat/aurp_aurpd.c b/bsd/netat/aurp_aurpd.c index 674ea01c7..1fed65a63 100644 --- a/bsd/netat/aurp_aurpd.c +++ b/bsd/netat/aurp_aurpd.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2008 Apple Computer, Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -151,7 +151,7 @@ aurpd_start() sopt.sopt_level = SOL_SOCKET; sopt.sopt_name = SO_RCVBUF; sopt.sopt_dir = SOPT_SET; - sopt.sopt_p = NULL; + sopt.sopt_p = kernproc; if ((error = sosetopt(so, &sopt)) != 0) goto out; } @@ -171,7 +171,7 @@ aurpd_start() sopt.sopt_level = SOL_SOCKET; sopt.sopt_name = SO_SNDBUF; sopt.sopt_dir = SOPT_SET; - sopt.sopt_p = NULL; + sopt.sopt_p = kernproc; if ((error = sosetopt(so, &sopt)) != 0) goto out; } diff --git a/bsd/netat/ddp_r_rtmp.c b/bsd/netat/ddp_r_rtmp.c index 584508e63..bc21d5b40 100644 --- a/bsd/netat/ddp_r_rtmp.c +++ b/bsd/netat/ddp_r_rtmp.c @@ -1214,7 +1214,7 @@ void rtmp_send_port(ifID) rtmp_send_table(ifID, DestNet, 0xFF, 1, RTMP_SOCKET, 0); } -#ifdef DEBUG +#if DEBUG if (ifID == ifID_home) dPrintf(D_M_RTMP_LOW, D_L_VERBOSE, ("I:%5d O:%5d H:%5d dropped:%d\n", diff --git a/bsd/netat/ddp_usrreq.c b/bsd/netat/ddp_usrreq.c index fd6b8ab92..17bddda8a 100644 --- a/bsd/netat/ddp_usrreq.c +++ b/bsd/netat/ddp_usrreq.c @@ -185,6 +185,8 @@ int ddp_pru_send(struct socket *so, __unused int flags, struct mbuf *m, if (!(pcb->ddp_flags & DDPFLG_HDRINCL)) { /* prepend a DDP header */ M_PREPEND(m, DDP_X_HDR_SIZE, M_WAIT); + if (m == NULL) + return ENOBUFS; ddp = mtod(m, at_ddp_t *); } diff --git a/bsd/netat/debug.h b/bsd/netat/debug.h index 7228eb26d..475fee7b8 100644 --- a/bsd/netat/debug.h +++ b/bsd/netat/debug.h @@ -99,7 +99,7 @@ typedef struct dbgBits { extern dbgBits_t dbgBits; /* macros for debugging */ -#ifdef DEBUG +#if DEBUG #define dPrintf(mod, lev, p) \ if (((mod) & dbgBits.dbgMod) && ((lev) & dbgBits.dbgLev)) {\ printf p; \ diff --git a/bsd/netinet/Makefile b/bsd/netinet/Makefile index 26195cc07..de3d2890a 100644 --- a/bsd/netinet/Makefile +++ b/bsd/netinet/Makefile @@ -34,7 +34,7 @@ KERNELFILES = \ PRIVATE_DATAFILES = \ if_fddi.h if_atm.h ip_dummynet.h \ tcp_debug.h \ - in_gif.h ip_compat.h ip_edgehole.h + in_gif.h ip_compat.h PRIVATE_KERNELFILES = ${KERNELFILES} \ ip_ecn.h ip_encap.h ip_flow.h diff --git a/bsd/netinet/dhcp_options.c b/bsd/netinet/dhcp_options.c index 1992e3af3..29835c7b9 100644 --- a/bsd/netinet/dhcp_options.c +++ b/bsd/netinet/dhcp_options.c @@ -349,6 +349,7 @@ dhcpol_get(dhcpol_t * list, int tag, int * len_p) else { data = my_realloc(data, data_len, data_len + len); } + FIX ME: test data NULL bcopy(option + DHCP_OPTION_OFFSET, data + data_len, len); data_len += len; } @@ -356,7 +357,7 @@ dhcpol_get(dhcpol_t * list, int tag, int * len_p) *len_p = data_len; return (data); } -#endif 0 +#endif /* * Function: dhcpol_parse_packet diff --git a/bsd/netinet/dhcp_options.h b/bsd/netinet/dhcp_options.h index ca6197a92..674416198 100644 --- a/bsd/netinet/dhcp_options.h +++ b/bsd/netinet/dhcp_options.h @@ -196,7 +196,7 @@ const void * dhcpol_find(dhcpol_t * list, int tag, int * len_p, int * start); #if 0 void * dhcpol_get(dhcpol_t * list, int tag, int * len_p); -#endif 0 +#endif boolean_t dhcpol_parse_packet(dhcpol_t * options, const struct dhcp * pkt, int len); void dhcpol_print(dhcpol_t * list); @@ -257,7 +257,7 @@ dhcpoa_add_dhcpmsg(dhcpoa_t * oa_p, dhcp_msgtype_t msgtype); dhcpoa_ret_t dhcpoa_vendor_add(dhcpoa_t * oa_p, dhcpoa_t * vendor_oa_p, dhcptag_t tag, int len, void * option); -#endif 0 +#endif int dhcpoa_used(dhcpoa_t * oa_p); @@ -274,5 +274,5 @@ dhcpoa_freespace(dhcpoa_t * oa_p); int dhcpoa_size(dhcpoa_t * oa_p); -#endif KERNEL_PRIVATE +#endif /* KERNEL_PRIVATE */ #endif /* _NETINET_DHCP_OPTIONS_H */ diff --git a/bsd/netinet/icmp6.h b/bsd/netinet/icmp6.h index 5c1dd0239..e3c559b11 100644 --- a/bsd/netinet/icmp6.h +++ b/bsd/netinet/icmp6.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000,2007 Apple Inc. All rights reserved. + * Copyright (c) 2000,2008 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -132,6 +132,12 @@ struct icmp6_hdr { #define ICMP6_MEMBERSHIP_REDUCTION 132 /* group membership termination */ #define MLD6_LISTENER_DONE 132 /* multicast listener done */ +#ifndef KERNEL +#define MLD_LISTENER_QUERY MLD6_LISTENER_QUERY +#define MLD_LISTENER_REPORT MLD6_LISTENER_REPORT +#define MLD_LISTENER_DONE MLD6_LISTENER_DONE +#endif /* !KERNEL */ + #define ND_ROUTER_SOLICIT 133 /* router solicitation */ #define ND_ROUTER_ADVERT 134 /* router advertisment */ #define ND_NEIGHBOR_SOLICIT 135 /* neighbor solicitation */ @@ -151,6 +157,11 @@ struct icmp6_hdr { #define MLD6_MTRACE_RESP 200 /* mtrace response(to sender) */ #define MLD6_MTRACE 201 /* mtrace messages */ +#ifndef KERNEL +#define MLD_MTRACE_RESP MLD6_MTRACE_RESP +#define MLD_MTRACE MLD6_MTRACE +#endif /* !KERNEL */ + #define ICMP6_HADISCOV_REQUEST 202 /* XXX To be defined */ #define ICMP6_HADISCOV_REPLY 203 /* XXX To be defined */ @@ -731,6 +742,6 @@ do { \ extern int icmp6_rediraccept; /* accept/process redirects */ extern int icmp6_redirtimeout; /* cache time for redirect routes */ -#endif KERNEL_PRIVATE +#endif /* KERNEL_PRIVATE */ #endif /* !_NETINET_ICMP6_H_ */ diff --git a/bsd/netinet/icmp_var.h b/bsd/netinet/icmp_var.h index 4b767b901..d4332dcd3 100644 --- a/bsd/netinet/icmp_var.h +++ b/bsd/netinet/icmp_var.h @@ -115,5 +115,7 @@ extern int badport_bandlim(int); #define BANDLIM_RST_CLOSEDPORT 3 /* No connection, and no listeners */ #define BANDLIM_RST_OPENPORT 4 /* No connection, listener */ #define BANDLIM_MAX 4 -#endif KERNEL_PRIVATE + +extern struct icmpstat icmpstat; +#endif /* KERNEL_PRIVATE */ #endif /* _NETINET_ICMP_VAR_H_ */ diff --git a/bsd/netinet/if_ether.h b/bsd/netinet/if_ether.h index d0bb1057d..1705a1413 100644 --- a/bsd/netinet/if_ether.h +++ b/bsd/netinet/if_ether.h @@ -148,6 +148,6 @@ extern struct ifqueue arpintrq; int arpresolve(struct ifnet *, struct rtentry *, struct mbuf *, struct sockaddr *, u_char *, struct rtentry *); void arp_ifinit(struct ifnet *, struct ifaddr *); -#endif KERNEL_PRIVATE +#endif /* KERNEL_PRIVATE */ #endif /* _NETINET_IF_ETHER_H_ */ diff --git a/bsd/netinet/if_fddi.h b/bsd/netinet/if_fddi.h index 5aee8f1bf..fb9f81f10 100644 --- a/bsd/netinet/if_fddi.h +++ b/bsd/netinet/if_fddi.h @@ -109,6 +109,6 @@ void fddi_ifattach(struct ifnet *); void fddi_input(struct ifnet *, struct fddi_header *, struct mbuf *); int fddi_output(struct ifnet *, struct mbuf *, struct sockaddr *, struct rtentry *); -#endif KERNEL_PRIVATE +#endif /* KERNEL_PRIVATE */ -#endif _NETINET_IF_FDDI_H_ +#endif /* _NETINET_IF_FDDI_H_ */ diff --git a/bsd/netinet/if_tun.h b/bsd/netinet/if_tun.h index e4d3e7a67..a5a54e238 100644 --- a/bsd/netinet/if_tun.h +++ b/bsd/netinet/if_tun.h @@ -68,7 +68,7 @@ struct tun_softc { #endif }; -#endif KERNEL_PRIVATE +#endif /* KERNEL_PRIVATE */ /* ioctl's for get/set debug */ #define TUNSDEBUG _IOW('t', 90, int) diff --git a/bsd/netinet/igmp.c b/bsd/netinet/igmp.c index 1889c7125..004ccc9c7 100644 --- a/bsd/netinet/igmp.c +++ b/bsd/netinet/igmp.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2007 Apple Inc. All rights reserved. + * Copyright (c) 2000-2008 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -118,12 +118,12 @@ SYSCTL_STRUCT(_net_inet_igmp, IGMPCTL_STATS, stats, CTLFLAG_RD, &igmpstat, igmpstat, ""); static int igmp_timers_are_running; -static u_long igmp_all_hosts_group; -static u_long igmp_all_rtrs_group; +static uint32_t igmp_all_hosts_group; +static uint32_t igmp_all_rtrs_group; static struct mbuf *router_alert; static struct router_info *Head; -static void igmp_sendpkt(struct in_multi *, int, unsigned long); +static void igmp_sendpkt(struct in_multi *, int, uint32_t); void igmp_init(void) @@ -141,7 +141,7 @@ igmp_init(void) /* * Construct a Router Alert option to use in outgoing packets */ - MGET(router_alert, M_DONTWAIT, MT_DATA); + MGET(router_alert, M_WAIT, MT_DATA); ra = mtod(router_alert, struct ipoption *); ra->ipopt_dst.s_addr = 0; ra->ipopt_list[0] = IPOPT_RA; /* Router Alert Option */ @@ -309,7 +309,7 @@ igmp_input( * - Use the value specified in the query message as * the maximum timeout. */ - lck_mtx_lock(rt_mtx); + lck_mtx_lock(rnh_lock); IN_FIRST_MULTI(step, inm); while (inm != NULL) { if (inm->inm_ifp == ifp && @@ -325,7 +325,7 @@ igmp_input( } IN_NEXT_MULTI(step, inm); } - lck_mtx_unlock(rt_mtx); + lck_mtx_unlock(rnh_lock); break; @@ -338,17 +338,24 @@ igmp_input( * router, so discard reports sourced by me. */ IFP_TO_IA(ifp, ia); - if (ia && ip->ip_src.s_addr == IA_SIN(ia)->sin_addr.s_addr) + if (ia && ip->ip_src.s_addr == IA_SIN(ia)->sin_addr.s_addr) { + ifafree(&ia->ia_ifa); break; + } ++igmpstat.igps_rcv_reports; - if (ifp->if_flags & IFF_LOOPBACK) + if (ifp->if_flags & IFF_LOOPBACK) { + if (ia != NULL) + ifafree(&ia->ia_ifa); break; + } if (!IN_MULTICAST(ntohl(igmp->igmp_group.s_addr))) { ++igmpstat.igps_rcv_badreports; m_freem(m); + if (ia != NULL) + ifafree(&ia->ia_ifa); return; } @@ -379,6 +386,8 @@ igmp_input( inm->inm_state = IGMP_OTHERMEMBER; } + if (ia != NULL) + ifafree(&ia->ia_ifa); break; } @@ -473,7 +482,7 @@ igmp_slowtimo(void) static struct route igmprt; static void -igmp_sendpkt(struct in_multi *inm, int type, unsigned long addr) +igmp_sendpkt(struct in_multi *inm, int type, uint32_t addr) { struct mbuf *m; struct igmp *igmp; diff --git a/bsd/netinet/in.c b/bsd/netinet/in.c index 3a1c1bc6e..32b8c64f6 100644 --- a/bsd/netinet/in.c +++ b/bsd/netinet/in.c @@ -72,6 +72,7 @@ #include #include #include +#include #include @@ -95,6 +96,9 @@ #include +#if PF +#include +#endif /* PF */ static int in_mask2len(struct in_addr *); static void in_len2mask(struct in_addr *, int); @@ -105,6 +109,22 @@ static void in_socktrim(struct sockaddr_in *); static int in_ifinit(struct ifnet *, struct in_ifaddr *, struct sockaddr_in *, int); +#define IA_HASH_INIT(ia) { \ + (ia)->ia_hash.tqe_next = (void *)(uintptr_t)-1; \ + (ia)->ia_hash.tqe_prev = (void *)(uintptr_t)-1; \ +} + +#define IA_IS_HASHED(ia) \ + (!((ia)->ia_hash.tqe_next == (void *)(uintptr_t)-1 || \ + (ia)->ia_hash.tqe_prev == (void *)(uintptr_t)-1)) + +static void in_iahash_remove(struct in_ifaddr *); +static void in_iahash_insert(struct in_ifaddr *); +static void in_iahash_insert_ptp(struct in_ifaddr *); +static struct in_ifaddr *in_ifaddr_alloc(int); +static void in_ifaddr_free(struct ifaddr *); +static void in_ifaddr_trace(struct ifaddr *, int); + static int subnetsarelocal = 0; SYSCTL_INT(_net_inet_ip, OID_AUTO, subnets_are_local, CTLFLAG_RW, &subnetsarelocal, 0, ""); @@ -114,6 +134,30 @@ struct in_multihead in_multihead; /* XXX BSS initialization */ /* Track whether or not the SIOCARPIPLL ioctl has been called */ __private_extern__ u_int32_t ipv4_ll_arp_aware = 0; +struct in_ifaddr_dbg { + struct in_ifaddr inifa; /* in_ifaddr */ + struct in_ifaddr inifa_old; /* saved in_ifaddr */ + u_int16_t inifa_refhold_cnt; /* # of ifaref */ + u_int16_t inifa_refrele_cnt; /* # of ifafree */ + /* + * Alloc and free callers. + */ + ctrace_t inifa_alloc; + ctrace_t inifa_free; + /* + * Circular lists of ifaref and ifafree callers. + */ + ctrace_t inifa_refhold[CTRACE_HIST_SIZE]; + ctrace_t inifa_refrele[CTRACE_HIST_SIZE]; +}; + +static unsigned int inifa_debug; /* debug flags */ +static unsigned int inifa_size; /* size of zone element */ +static struct zone *inifa_zone; /* zone for in_ifaddr */ + +#define INIFA_ZONE_MAX 64 /* maximum elements in zone */ +#define INIFA_ZONE_NAME "in_ifaddr" /* zone name */ + int inaddr_local(struct in_addr in) { @@ -124,12 +168,14 @@ inaddr_local(struct in_addr in) sin.sin_family = AF_INET; sin.sin_len = sizeof (sin); sin.sin_addr = in; - rt = rtalloc1((struct sockaddr *)&sin, 0, 0UL); + rt = rtalloc1((struct sockaddr *)&sin, 0, 0); if (rt != NULL) { + RT_LOCK_SPIN(rt); if (rt->rt_gateway->sa_family == AF_LINK || (rt->rt_ifp->if_flags & IFF_LOOPBACK)) local = 1; + RT_UNLOCK(rt); rtfree(rt); } else { local = in_localaddr(in); @@ -146,27 +192,27 @@ inaddr_local(struct in_addr in) int in_localaddr(struct in_addr in) { - u_long i = ntohl(in.s_addr); + u_int32_t i = ntohl(in.s_addr); struct in_ifaddr *ia; if (subnetsarelocal) { - lck_mtx_lock(rt_mtx); + lck_rw_lock_shared(in_ifaddr_rwlock); for (ia = in_ifaddrhead.tqh_first; ia; ia = ia->ia_link.tqe_next) if ((i & ia->ia_netmask) == ia->ia_net) { - lck_mtx_unlock(rt_mtx); + lck_rw_done(in_ifaddr_rwlock); return (1); } - lck_mtx_unlock(rt_mtx); + lck_rw_done(in_ifaddr_rwlock); } else { - lck_mtx_lock(rt_mtx); + lck_rw_lock_shared(in_ifaddr_rwlock); for (ia = in_ifaddrhead.tqh_first; ia; ia = ia->ia_link.tqe_next) if ((i & ia->ia_subnetmask) == ia->ia_subnet) { - lck_mtx_unlock(rt_mtx); + lck_rw_done(in_ifaddr_rwlock); return (1); } - lck_mtx_unlock(rt_mtx); + lck_rw_done(in_ifaddr_rwlock); } return (0); } @@ -179,8 +225,8 @@ in_localaddr(struct in_addr in) int in_canforward(struct in_addr in) { - u_long i = ntohl(in.s_addr); - u_long net; + u_int32_t i = ntohl(in.s_addr); + u_int32_t net; if (IN_EXPERIMENTAL(i) || IN_MULTICAST(i)) return (0); @@ -285,11 +331,10 @@ in_control( struct kev_msg ev_msg; struct kev_in_data in_event_data; - switch (cmd) { case SIOCALIFADDR: case SIOCDLIFADDR: - if (p && (error = proc_suser(p)) != 0) + if ((error = proc_suser(p)) != 0) return error; /*fall through*/ case SIOCGLIFADDR: @@ -305,7 +350,7 @@ in_control( * the first one on the interface. */ if (ifp) { - lck_mtx_lock(rt_mtx); + lck_rw_lock_shared(in_ifaddr_rwlock); for (iap = in_ifaddrhead.tqh_first; iap; iap = iap->ia_link.tqe_next) if (iap->ia_ifp == ifp) { @@ -319,16 +364,16 @@ in_control( break; } } - /* take a reference on ia before releasing mutex */ + /* take a reference on ia before releasing lock */ if (ia != NULL) { ifaref(&ia->ia_ifa); } - lck_mtx_unlock(rt_mtx); + lck_rw_done(in_ifaddr_rwlock); } switch (cmd) { case SIOCAUTOADDR: case SIOCARPIPLL: - if (p && (error = proc_suser(p)) != 0) { + if ((error = proc_suser(p)) != 0) { goto done; } if (ifp == 0) { @@ -346,18 +391,18 @@ in_control( if (ifra->ifra_addr.sin_family == AF_INET) { struct in_ifaddr *oia; - lck_mtx_lock(rt_mtx); + lck_rw_lock_shared(in_ifaddr_rwlock); for (oia = ia; ia; ia = ia->ia_link.tqe_next) { if (ia->ia_ifp == ifp && ia->ia_addr.sin_addr.s_addr == ifra->ifra_addr.sin_addr.s_addr) break; } - /* take a reference on ia before releasing mutex */ + /* take a reference on ia before releasing lock */ if (ia != NULL && ia != oia) { ifaref(&ia->ia_ifa); } - lck_mtx_unlock(rt_mtx); + lck_rw_done(in_ifaddr_rwlock); if (oia != NULL && oia != ia) { ifafree(&oia->ia_ifa); } @@ -395,22 +440,15 @@ in_control( goto done; } if (ia == (struct in_ifaddr *)0) { - ia = (struct in_ifaddr *) - _MALLOC(sizeof *ia, M_IFADDR, M_WAITOK); + ia = in_ifaddr_alloc(M_WAITOK); if (ia == (struct in_ifaddr *)NULL) { error = ENOBUFS; goto done; } - bzero((caddr_t)ia, sizeof *ia); - /* - * Protect from ipintr() traversing address list - * while we're modifying it. - */ - + IA_HASH_INIT(ia); ifa = &ia->ia_ifa; - - ia->ia_addr.sin_family = AF_INET; - ia->ia_addr.sin_len = sizeof (ia->ia_addr); + /* Hold a reference for this routine */ + ifaref(ifa); ifa->ifa_addr = (struct sockaddr *)&ia->ia_addr; ifa->ifa_dstaddr = (struct sockaddr *)&ia->ia_dstaddr; ifa->ifa_netmask = (struct sockaddr *)&ia->ia_sockmask; @@ -423,13 +461,14 @@ in_control( ia->ia_ifp = ifp; if (!(ifp->if_flags & IFF_LOOPBACK)) in_interfaces++; + /* if_attach_ifa() holds a reference for ifa_link */ if_attach_ifa(ifp, ifa); ifnet_lock_done(ifp); - - lck_mtx_lock(rt_mtx); - ifaref(&ia->ia_ifa); + lck_rw_lock_exclusive(in_ifaddr_rwlock); + /* Hold a reference for ia_link */ + ifaref(ifa); TAILQ_INSERT_TAIL(&in_ifaddrhead, ia, ia_link); - lck_mtx_unlock(rt_mtx); + lck_rw_done(in_ifaddr_rwlock); /* Generic protocol plumbing */ @@ -440,13 +479,12 @@ in_control( } error = 0; /*discard error, can be cold with unsupported interfaces */ } - } break; case SIOCPROTOATTACH: case SIOCPROTODETACH: - if (p && (error = proc_suser(p)) != 0) { + if ((error = proc_suser(p)) != 0) { goto done; } if (ifp == 0) { @@ -454,18 +492,12 @@ in_control( goto done; } break; - + case SIOCSIFBRDADDR: -#ifdef __APPLE__ if ((so->so_state & SS_PRIV) == 0) { error = EPERM; goto done; } -#else - if (p && (error = suser(p)) != 0) { - goto done; - } -#endif /* FALLTHROUGH */ case SIOCGIFADDR: @@ -529,6 +561,8 @@ in_control( } oldaddr = ia->ia_dstaddr; ia->ia_dstaddr = *(struct sockaddr_in *)&ifr->ifr_dstaddr; + if (ia->ia_dstaddr.sin_family == AF_INET) + ia->ia_dstaddr.sin_len = sizeof (struct sockaddr_in); error = ifnet_ioctl(ifp, PF_INET, SIOCSIFDSTADDR, ia); if (error == EOPNOTSUPP) { error = 0; @@ -558,7 +592,7 @@ in_control( in_event_data.ia_netbroadcast = ia->ia_netbroadcast; strncpy(&in_event_data.link_data.if_name[0], ifp->if_name, IFNAMSIZ); in_event_data.link_data.if_family = ifp->if_family; - in_event_data.link_data.if_unit = (unsigned long) ifp->if_unit; + in_event_data.link_data.if_unit = (u_int32_t) ifp->if_unit; ev_msg.dv[0].data_ptr = &in_event_data; ev_msg.dv[0].data_length = sizeof(struct kev_in_data); @@ -603,7 +637,7 @@ in_control( in_event_data.ia_netbroadcast = ia->ia_netbroadcast; strncpy(&in_event_data.link_data.if_name[0], ifp->if_name, IFNAMSIZ); in_event_data.link_data.if_family = ifp->if_family; - in_event_data.link_data.if_unit = (unsigned long) ifp->if_unit; + in_event_data.link_data.if_unit = (u_int32_t) ifp->if_unit; ev_msg.dv[0].data_ptr = &in_event_data; ev_msg.dv[0].data_length = sizeof(struct kev_in_data); @@ -614,7 +648,16 @@ in_control( break; case SIOCSIFADDR: - error = in_ifinit(ifp, ia, (struct sockaddr_in *) &ifr->ifr_addr, 1); + /* + * If this is a new address, the reference count for the + * hash table has been taken at creation time above. + */ + error = in_ifinit(ifp, ia, + (struct sockaddr_in *)&ifr->ifr_addr, 1); +#if PF + if (!error) + (void) pf_ifaddr_hook(ifp, cmd); +#endif /* PF */ break; case SIOCPROTOATTACH: @@ -662,7 +705,7 @@ in_control( in_event_data.ia_netbroadcast = ia->ia_netbroadcast; strncpy(&in_event_data.link_data.if_name[0], ifp->if_name, IFNAMSIZ); in_event_data.link_data.if_family = ifp->if_family; - in_event_data.link_data.if_unit = (unsigned long) ifp->if_unit; + in_event_data.link_data.if_unit = (u_int32_t) ifp->if_unit; ev_msg.dv[0].data_ptr = &in_event_data; ev_msg.dv[0].data_length = sizeof(struct kev_in_data); @@ -676,6 +719,7 @@ in_control( maskIsNew = 0; hostIsNew = 1; error = 0; + if (ia->ia_addr.sin_family == AF_INET) { if (ifra->ifra_addr.sin_len == 0) { ifra->ifra_addr = ia->ia_addr; @@ -695,12 +739,17 @@ in_control( (ifra->ifra_dstaddr.sin_family == AF_INET)) { in_ifscrub(ifp, ia, 0); ia->ia_dstaddr = ifra->ifra_dstaddr; + ia->ia_dstaddr.sin_len = sizeof (struct sockaddr_in); maskIsNew = 1; /* We lie; but the effect's the same */ } if (ifra->ifra_addr.sin_family == AF_INET && (hostIsNew || maskIsNew)) { error = in_ifinit(ifp, ia, &ifra->ifra_addr, 0); } +#if PF + if (!error) + (void) pf_ifaddr_hook(ifp, cmd); +#endif /* PF */ if ((ifp->if_flags & IFF_BROADCAST) && (ifra->ifra_broadaddr.sin_family == AF_INET)) ia->ia_broadaddr = ifra->ifra_broadaddr; @@ -733,7 +782,7 @@ in_control( in_event_data.ia_netbroadcast = ia->ia_netbroadcast; strncpy(&in_event_data.link_data.if_name[0], ifp->if_name, IFNAMSIZ); in_event_data.link_data.if_family = ifp->if_family; - in_event_data.link_data.if_unit = (unsigned long) ifp->if_unit; + in_event_data.link_data.if_unit = (u_int32_t) ifp->if_unit; ev_msg.dv[0].data_ptr = &in_event_data; ev_msg.dv[0].data_length = sizeof(struct kev_in_data); @@ -772,28 +821,33 @@ in_control( in_event_data.ia_netbroadcast = ia->ia_netbroadcast; strncpy(&in_event_data.link_data.if_name[0], ifp->if_name, IFNAMSIZ); in_event_data.link_data.if_family = ifp->if_family; - in_event_data.link_data.if_unit = (unsigned long) ifp->if_unit; + in_event_data.link_data.if_unit = (u_int32_t) ifp->if_unit; ev_msg.dv[0].data_ptr = &in_event_data; ev_msg.dv[0].data_length = sizeof(struct kev_in_data); ev_msg.dv[1].data_length = 0; - lck_mtx_lock(rt_mtx); + ifa = &ia->ia_ifa; + lck_rw_lock_exclusive(in_ifaddr_rwlock); + /* Release ia_link reference */ + ifafree(ifa); TAILQ_REMOVE(&in_ifaddrhead, ia, ia_link); + if (IA_IS_HASHED(ia)) + in_iahash_remove(ia); + lck_rw_done(in_ifaddr_rwlock); + /* * in_ifscrub kills the interface route. */ - in_ifscrub(ifp, ia, 1); - ifa = &ia->ia_ifa; - lck_mtx_unlock(rt_mtx); + in_ifscrub(ifp, ia, 0); ifnet_lock_exclusive(ifp); + /* if_detach_ifa() releases ifa_link reference */ if_detach_ifa(ifp, ifa); - #ifdef __APPLE__ - /* - * If the interface supports multicast, and no address is left, - * remove the "all hosts" multicast group from that interface. - */ + /* + * If the interface supports multicast, and no address is left, + * remove the "all hosts" multicast group from that interface. + */ if (ifp->if_flags & IFF_MULTICAST) { struct in_addr addr; struct in_multi *inm = NULL; @@ -829,6 +883,9 @@ in_control( /* Release reference from ifa_ifpgetprimary() */ ifafree(ifa); } +#if PF + (void) pf_ifaddr_hook(ifp, cmd); +#endif /* PF */ break; #ifdef __APPLE__ @@ -1124,14 +1181,77 @@ in_ifscrub( if ((ia->ia_flags & IFA_ROUTE) == 0) return; if (!locked) - lck_mtx_lock(rt_mtx); + lck_mtx_lock(rnh_lock); if (ifp->if_flags & (IFF_LOOPBACK|IFF_POINTOPOINT)) rtinit_locked(&(ia->ia_ifa), (int)RTM_DELETE, RTF_HOST); else rtinit_locked(&(ia->ia_ifa), (int)RTM_DELETE, 0); ia->ia_flags &= ~IFA_ROUTE; if (!locked) - lck_mtx_unlock(rt_mtx); + lck_mtx_unlock(rnh_lock); +} + +/* + * Caller must hold in_ifaddr_rwlock as writer. + */ +static void +in_iahash_remove(struct in_ifaddr *ia) +{ + if (!IA_IS_HASHED(ia)) + panic("attempt to remove wrong ia %p from hash table\n", ia); + + TAILQ_REMOVE(INADDR_HASH(ia->ia_addr.sin_addr.s_addr), ia, ia_hash); + IA_HASH_INIT(ia); + ifafree(&ia->ia_ifa); +} + +/* + * Caller must hold in_ifaddr_rwlock as writer. + */ +static void +in_iahash_insert(struct in_ifaddr *ia) +{ + if (ia->ia_addr.sin_family != AF_INET) + panic("attempt to insert wrong ia %p into hash table\n", ia); + else if (IA_IS_HASHED(ia)) + panic("attempt to double-insert ia %p into hash table\n", ia); + + TAILQ_INSERT_HEAD(INADDR_HASH(ia->ia_addr.sin_addr.s_addr), ia, ia_hash); + ifaref(&ia->ia_ifa); +} + +/* + * Some point to point interfaces that are tunnels + * borrow the address from an underlying interface (e.g. + * VPN server). In order for source address selection logic to + * find the underlying interface first, we add the address + * of borrowing point to point interfaces at the end of the list. + * (see rdar://6733789) + * + * Caller must hold in_ifaddr_rwlock as writer. + */ +static void +in_iahash_insert_ptp(struct in_ifaddr *ia) +{ + struct in_ifaddr *tmp_ifa; + struct ifnet *tmp_ifp; + + if (ia->ia_addr.sin_family != AF_INET) + panic("attempt to insert wrong ia %p into hash table\n", ia); + else if (IA_IS_HASHED(ia)) + panic("attempt to double-insert ia %p into hash table\n", ia); + + TAILQ_FOREACH(tmp_ifa, INADDR_HASH(ia->ia_addr.sin_addr.s_addr), ia_hash) + if (IA_SIN(tmp_ifa)->sin_addr.s_addr == ia->ia_addr.sin_addr.s_addr) + break; + tmp_ifp = (tmp_ifa == NULL) ? NULL : tmp_ifa->ia_ifp; + + if (tmp_ifp == NULL) + TAILQ_INSERT_HEAD(INADDR_HASH(ia->ia_addr.sin_addr.s_addr), ia, ia_hash); + else + TAILQ_INSERT_TAIL(INADDR_HASH(ia->ia_addr.sin_addr.s_addr), ia, ia_hash); + + ifaref(&ia->ia_ifa); } /* @@ -1145,14 +1265,29 @@ in_ifinit( struct sockaddr_in *sin, int scrub) { - u_long i = ntohl(sin->sin_addr.s_addr); + u_int32_t i = ntohl(sin->sin_addr.s_addr); struct sockaddr_in oldaddr; int flags = RTF_UP, error; struct ifaddr *ifa0; unsigned int cmd; + int oldremoved = 0; + + /* Take an extra reference for this routine */ + ifaref(&ia->ia_ifa); + lck_rw_lock_exclusive(in_ifaddr_rwlock); oldaddr = ia->ia_addr; + if (IA_IS_HASHED(ia)) { + oldremoved = 1; + in_iahash_remove(ia); + } ia->ia_addr = *sin; + ia->ia_addr.sin_len = sizeof (*sin); + if ((ifp->if_flags & IFF_POINTOPOINT)) + in_iahash_insert_ptp(ia); + else + in_iahash_insert(ia); + lck_rw_done(in_ifaddr_rwlock); /* * Give the interface a chance to initialize if this is its first @@ -1183,12 +1318,25 @@ in_ifinit( ifafree(ifa0); if (error) { + lck_rw_lock_exclusive(in_ifaddr_rwlock); + if (IA_IS_HASHED(ia)) + in_iahash_remove(ia); ia->ia_addr = oldaddr; + if (oldremoved) { + if ((ifp->if_flags & IFF_POINTOPOINT)) + in_iahash_insert_ptp(ia); + else + in_iahash_insert(ia); + } + lck_rw_done(in_ifaddr_rwlock); + /* Release extra reference taken above */ + ifafree(&ia->ia_ifa); return (error); } + lck_mtx_lock(rnh_lock); if (scrub) { ia->ia_ifa.ifa_addr = (struct sockaddr *)&oldaddr; - in_ifscrub(ifp, ia, 0); + in_ifscrub(ifp, ia, 1); ia->ia_ifa.ifa_addr = (struct sockaddr *)&ia->ia_addr; } if (IN_CLASSA(i)) @@ -1223,12 +1371,19 @@ in_ifinit( ia->ia_ifa.ifa_dstaddr = ia->ia_ifa.ifa_addr; flags |= RTF_HOST; } else if (ifp->if_flags & IFF_POINTOPOINT) { - if (ia->ia_dstaddr.sin_family != AF_INET) + if (ia->ia_dstaddr.sin_family != AF_INET) { + lck_mtx_unlock(rnh_lock); + /* Release extra reference taken above */ + ifafree(&ia->ia_ifa); return (0); + } + ia->ia_dstaddr.sin_len = sizeof (*sin); flags |= RTF_HOST; } - if ((error = rtinit(&(ia->ia_ifa), (int)RTM_ADD, flags)) == 0) + if ((error = rtinit_locked(&(ia->ia_ifa), (int)RTM_ADD, flags)) == 0) ia->ia_flags |= IFA_ROUTE; + lck_mtx_unlock(rnh_lock); + /* XXX check if the subnet route points to the same interface */ if (error == EEXIST) error = 0; @@ -1248,6 +1403,9 @@ in_ifinit( if (inm == 0) in_addmulti(&addr, ifp); } + + /* Release extra reference taken above */ + ifafree(&ia->ia_ifa); return (error); } @@ -1261,7 +1419,7 @@ in_broadcast( struct ifnet *ifp) { struct ifaddr *ifa; - u_long t; + u_int32_t t; if (in.s_addr == INADDR_BROADCAST || in.s_addr == INADDR_ANY) @@ -1292,7 +1450,7 @@ in_broadcast( * only exist when an interface gets a secondary * address. */ - ia->ia_subnetmask != (u_long)0xffffffff) { + ia->ia_subnetmask != (u_int32_t)0xffffffff) { ifnet_lock_done(ifp); return 1; } @@ -1313,9 +1471,9 @@ in_free_inm( * we are leaving the multicast group. */ igmp_leavegroup(inm); - lck_mtx_lock(rt_mtx); + lck_mtx_lock(rnh_lock); LIST_REMOVE(inm, inm_link); - lck_mtx_unlock(rt_mtx); + lck_mtx_unlock(rnh_lock); FREE(inm, M_IPMADDR); } @@ -1363,13 +1521,13 @@ in_addmulti( inm->inm_addr = *ap; inm->inm_ifp = ifp; inm->inm_ifma = ifma; - lck_mtx_lock(rt_mtx); + lck_mtx_lock(rnh_lock); if (ifma->ifma_protospec == NULL) { ifma->ifma_protospec = inm; ifma->ifma_free = in_free_inm; LIST_INSERT_HEAD(&in_multihead, inm, inm_link); } - lck_mtx_unlock(rt_mtx); + lck_mtx_unlock(rnh_lock); if (ifma->ifma_protospec != inm) { _FREE(inm, M_IPMADDR); @@ -1406,17 +1564,17 @@ in_delmulti( { struct in_multi *inm2; - lck_mtx_lock(rt_mtx); + lck_mtx_lock(rnh_lock); LIST_FOREACH(inm2, &in_multihead, inm_link) { if (inm2 == *inm) break; } if (inm2 != *inm) { - lck_mtx_unlock(rt_mtx); + lck_mtx_unlock(rnh_lock); printf("in_delmulti - ignoring invalid inm (%p)\n", *inm); return; } - lck_mtx_unlock(rt_mtx); + lck_mtx_unlock(rnh_lock); /* We intentionally do this a bit differently than BSD */ if ((*inm)->inm_ifma) { @@ -1436,7 +1594,7 @@ inet_aton(char * cp, struct in_addr * pin) char * p; for (p = cp, i = 0; i < 4; i++) { - u_long l = strtoul(p, 0, 0); + u_int32_t l = strtoul(p, 0, 0); if (l > 255) return (FALSE); b[i] = l; @@ -1448,3 +1606,85 @@ inet_aton(char * cp, struct in_addr * pin) return (TRUE); } #endif + +/* + * Called as part of ip_init + */ +void +in_ifaddr_init(void) +{ + PE_parse_boot_argn("ifa_debug", &inifa_debug, sizeof (inifa_debug)); + + inifa_size = (inifa_debug == 0) ? sizeof (struct in_ifaddr) : + sizeof (struct in_ifaddr_dbg); + + inifa_zone = zinit(inifa_size, INIFA_ZONE_MAX * inifa_size, + 0, INIFA_ZONE_NAME); + if (inifa_zone == NULL) + panic("%s: failed allocating %s", __func__, INIFA_ZONE_NAME); + + zone_change(inifa_zone, Z_EXPAND, TRUE); +} + +static struct in_ifaddr * +in_ifaddr_alloc(int how) +{ + struct in_ifaddr *inifa; + + inifa = (how == M_WAITOK) ? zalloc(inifa_zone) : + zalloc_noblock(inifa_zone); + if (inifa != NULL) { + bzero(inifa, inifa_size); + inifa->ia_ifa.ifa_free = in_ifaddr_free; + inifa->ia_ifa.ifa_debug |= IFD_ALLOC; + if (inifa_debug != 0) { + struct in_ifaddr_dbg *inifa_dbg = + (struct in_ifaddr_dbg *)inifa; + inifa->ia_ifa.ifa_debug |= IFD_DEBUG; + inifa->ia_ifa.ifa_trace = in_ifaddr_trace; + ctrace_record(&inifa_dbg->inifa_alloc); + } + } + return (inifa); +} + +static void +in_ifaddr_free(struct ifaddr *ifa) +{ + if (ifa->ifa_refcnt != 0) + panic("%s: ifa %p bad ref cnt", __func__, ifa); + if (!(ifa->ifa_debug & IFD_ALLOC)) + panic("%s: ifa %p cannot be freed", __func__, ifa); + + if (ifa->ifa_debug & IFD_DEBUG) { + struct in_ifaddr_dbg *inifa_dbg = (struct in_ifaddr_dbg *)ifa; + ctrace_record(&inifa_dbg->inifa_free); + bcopy(&inifa_dbg->inifa, &inifa_dbg->inifa_old, + sizeof (struct in_ifaddr)); + } + bzero(ifa, sizeof (struct in_ifaddr)); + zfree(inifa_zone, ifa); +} + +static void +in_ifaddr_trace(struct ifaddr *ifa, int refhold) +{ + struct in_ifaddr_dbg *inifa_dbg = (struct in_ifaddr_dbg *)ifa; + ctrace_t *tr; + u_int32_t idx; + u_int16_t *cnt; + + if (!(ifa->ifa_debug & IFD_DEBUG)) + panic("%s: ifa %p has no debug structure", __func__, ifa); + + if (refhold) { + cnt = &inifa_dbg->inifa_refhold_cnt; + tr = inifa_dbg->inifa_refhold; + } else { + cnt = &inifa_dbg->inifa_refrele_cnt; + tr = inifa_dbg->inifa_refrele; + } + + idx = OSAddAtomic16(1, (volatile SInt16 *)cnt) % CTRACE_HIST_SIZE; + ctrace_record(&tr[idx]); +} diff --git a/bsd/netinet/in.h b/bsd/netinet/in.h index 0fcbd52d1..d3283958f 100644 --- a/bsd/netinet/in.h +++ b/bsd/netinet/in.h @@ -442,7 +442,7 @@ struct ip_opts { #define IP_TRAFFIC_MGT_BACKGROUND 65 /* int*; get background IO flags; set background IO */ #ifdef PRIVATE -#define IP_FORCE_OUT_IFP 69 /* deprecated; use IP_BOUND_IF instead */ +#define IP_FORCE_OUT_IFP 69 /* deprecated; use IP_BOUND_IF instead */ #endif /* Background socket configuration flags */ @@ -612,7 +612,7 @@ extern u_short in_addword(u_short, u_short); extern u_short in_pseudo(u_int, u_int, u_int); extern int in_localaddr(struct in_addr); -extern u_long in_netof(struct in_addr); +extern u_int32_t in_netof(struct in_addr); extern int inaddr_local(struct in_addr); #endif /* KERNEL_PRIVATE */ @@ -622,4 +622,14 @@ extern int inaddr_local(struct in_addr); extern const char *inet_ntop(int, const void *, char *, socklen_t); /* in libkern */ #endif /* KERNEL */ +#ifndef KERNEL +#if !defined(_POSIX_C_SOURCE) || defined(_DARWIN_C_SOURCE) +__BEGIN_DECLS +int bindresvport(int, struct sockaddr_in *); +struct sockaddr; +int bindresvport_sa(int, struct sockaddr *); +__END_DECLS +#endif +#endif + #endif /* _NETINET_IN_H_ */ diff --git a/bsd/netinet/in_arp.c b/bsd/netinet/in_arp.c index 940b4bf0d..6897e77ac 100644 --- a/bsd/netinet/in_arp.c +++ b/bsd/netinet/in_arp.c @@ -77,10 +77,13 @@ #include #include #include +#include +#define SA(p) ((struct sockaddr *)(p)) #define SIN(s) ((struct sockaddr_in *)s) #define CONST_LLADDR(s) ((const u_char*)((s)->sdl_data + (s)->sdl_nlen)) #define rt_expire rt_rmx.rmx_expire +#define equal(a1, a2) (bcmp((caddr_t)(a1), (caddr_t)(a2), (a1)->sa_len) == 0) static const size_t MAX_HW_LEN = 10; @@ -108,12 +111,43 @@ SYSCTL_INT(_net_link_ether_inet, OID_AUTO, apple_hwcksum_rx, CTLFLAG_RW, &apple_hwcksum_rx, 0, ""); struct llinfo_arp { + /* + * The following are protected by rnh_lock + */ LIST_ENTRY(llinfo_arp) la_le; struct rtentry *la_rt; + /* + * The following are protected by rt_lock + */ struct mbuf *la_hold; /* last packet until resolved/timeout */ - long la_asked; /* last time we QUERIED for this addr */ + int32_t la_asked; /* last time we QUERIED for this addr */ }; +/* + * Synchronization notes: + * + * The global list of ARP entries are stored in llinfo_arp; an entry + * gets inserted into the list when the route is created and gets + * removed from the list when it is deleted; this is done as part + * of RTM_ADD/RTM_RESOLVE/RTM_DELETE in arp_rtrequest(). + * + * Because rnh_lock and rt_lock for the entry are held during those + * operations, the same locks (and thus lock ordering) must be used + * elsewhere to access the relevant data structure fields: + * + * la_le.{le_next,le_prev}, la_rt + * + * - Routing lock (rnh_lock) + * + * la_hold, la_asked + * + * - Routing entry lock (rt_lock) + * + * Due to the dependency on rt_lock, llinfo_arp has the same lifetime + * as the route entry itself. When a route is deleted (RTM_DELETE), + * it is simply removed from the global list but the memory is not + * freed until the route itself is freed. + */ static LIST_HEAD(, llinfo_arp) llinfo_arp; static int arp_inuse, arp_allocated; @@ -148,29 +182,97 @@ SYSCTL_INT(_net_link_ether_inet, OID_AUTO, send_conflicting_probes, CTLFLAG_RW, &send_conflicting_probes, 0, "send conflicting link-local arp probes"); +static errno_t arp_lookup_route(const struct in_addr *, int, + int, route_t *, unsigned int); +static void arptimer(void *); +static struct llinfo_arp *arp_llinfo_alloc(void); +static void arp_llinfo_free(void *); + extern u_int32_t ipv4_ll_arp_aware; +static int arpinit_done; + +static struct zone *llinfo_arp_zone; +#define LLINFO_ARP_ZONE_MAX 256 /* maximum elements in zone */ +#define LLINFO_ARP_ZONE_NAME "llinfo_arp" /* name for zone */ + +void +arp_init(void) +{ + if (arpinit_done) { + log(LOG_NOTICE, "arp_init called more than once (ignored)\n"); + return; + } + + LIST_INIT(&llinfo_arp); + + llinfo_arp_zone = zinit(sizeof (struct llinfo_arp), + LLINFO_ARP_ZONE_MAX * sizeof (struct llinfo_arp), 0, + LLINFO_ARP_ZONE_NAME); + if (llinfo_arp_zone == NULL) + panic("%s: failed allocating llinfo_arp_zone", __func__); + + zone_change(llinfo_arp_zone, Z_EXPAND, TRUE); + + arpinit_done = 1; + + /* start timer */ + timeout(arptimer, (caddr_t)0, hz); +} + +static struct llinfo_arp * +arp_llinfo_alloc(void) +{ + return (zalloc(llinfo_arp_zone)); +} + +static void +arp_llinfo_free(void *arg) +{ + struct llinfo_arp *la = arg; + + if (la->la_le.le_next != NULL || la->la_le.le_prev != NULL) { + panic("%s: trying to free %p when it is in use", __func__, la); + /* NOTREACHED */ + } + + /* Just in case there's anything there, free it */ + if (la->la_hold != NULL) { + m_freem(la->la_hold); + la->la_hold = NULL; + } + + zfree(llinfo_arp_zone, la); +} + /* * Free an arp entry. */ static void -arptfree( - struct llinfo_arp *la) +arptfree(struct llinfo_arp *la) { struct rtentry *rt = la->la_rt; struct sockaddr_dl *sdl; - lck_mtx_assert(rt_mtx, LCK_MTX_ASSERT_OWNED); - if (rt == 0) - panic("arptfree"); + + lck_mtx_assert(rnh_lock, LCK_MTX_ASSERT_OWNED); + RT_LOCK_ASSERT_HELD(rt); + if (rt->rt_refcnt > 0 && (sdl = SDL(rt->rt_gateway)) && sdl->sdl_family == AF_LINK) { sdl->sdl_alen = 0; la->la_asked = 0; rt->rt_flags &= ~RTF_REJECT; - return; + RT_UNLOCK(rt); + } else { + /* + * Safe to drop rt_lock and use rt_key, since holding + * rnh_lock here prevents another thread from calling + * rt_setgate() on this route. + */ + RT_UNLOCK(rt); + rtrequest_locked(RTM_DELETE, rt_key(rt), NULL, rt_mask(rt), + 0, NULL); } - rtrequest_locked(RTM_DELETE, rt_key(rt), (struct sockaddr *)0, rt_mask(rt), - 0, (struct rtentry **)0); } /* @@ -184,16 +286,19 @@ arptimer(void *ignored_arg) struct llinfo_arp *la, *ola; struct timeval timenow; - lck_mtx_lock(rt_mtx); + lck_mtx_lock(rnh_lock); la = llinfo_arp.lh_first; getmicrotime(&timenow); while ((ola = la) != 0) { struct rtentry *rt = la->la_rt; la = la->la_le.le_next; + RT_LOCK(rt); if (rt->rt_expire && rt->rt_expire <= timenow.tv_sec) arptfree(ola); /* timer has expired, clear */ + else + RT_UNLOCK(rt); } - lck_mtx_unlock(rt_mtx); + lck_mtx_unlock(rnh_lock); timeout(arptimer, (caddr_t)0, arpt_prune * hz); } @@ -207,17 +312,16 @@ arp_rtrequest( __unused struct sockaddr *sa) { struct sockaddr *gate = rt->rt_gateway; - struct llinfo_arp *la = (struct llinfo_arp *)rt->rt_llinfo; + struct llinfo_arp *la = rt->rt_llinfo; static struct sockaddr_dl null_sdl = {sizeof(null_sdl), AF_LINK, 0, 0, 0, 0, 0, {0}}; - static int arpinit_done; struct timeval timenow; if (!arpinit_done) { - arpinit_done = 1; - LIST_INIT(&llinfo_arp); - timeout(arptimer, (caddr_t)0, hz); + panic("%s: ARP has not been initialized", __func__); + /* NOTREACHED */ } - lck_mtx_assert(rt_mtx, LCK_MTX_ASSERT_OWNED); + lck_mtx_assert(rnh_lock, LCK_MTX_ASSERT_OWNED); + RT_LOCK_ASSERT_HELD(rt); if (rt->rt_flags & RTF_GATEWAY) return; @@ -237,23 +341,31 @@ arp_rtrequest( /* * Case 1: This route should come from a route to iface. */ - rt_setgate(rt, rt_key(rt), - (struct sockaddr *)&null_sdl); - gate = rt->rt_gateway; - SDL(gate)->sdl_type = rt->rt_ifp->if_type; - SDL(gate)->sdl_index = rt->rt_ifp->if_index; - /* In case we're called before 1.0 sec. has elapsed */ - rt->rt_expire = MAX(timenow.tv_sec, 1); + if (rt_setgate(rt, rt_key(rt), + (struct sockaddr *)&null_sdl) == 0) { + gate = rt->rt_gateway; + SDL(gate)->sdl_type = rt->rt_ifp->if_type; + SDL(gate)->sdl_index = rt->rt_ifp->if_index; + /* + * In case we're called before 1.0 sec. + * has elapsed. + */ + rt->rt_expire = MAX(timenow.tv_sec, 1); + } break; } /* Announce a new entry if requested. */ - if (rt->rt_flags & RTF_ANNOUNCE) - dlil_send_arp(rt->rt_ifp, ARPOP_REQUEST, SDL(gate), rt_key(rt), (struct sockaddr_dl *)rt_key(rt), NULL); + if (rt->rt_flags & RTF_ANNOUNCE) { + RT_UNLOCK(rt); + dlil_send_arp(rt->rt_ifp, ARPOP_REQUEST, + SDL(gate), rt_key(rt), NULL, rt_key(rt)); + RT_LOCK(rt); + } /*FALLTHROUGH*/ case RTM_RESOLVE: if (gate->sa_family != AF_LINK || gate->sa_len < sizeof(null_sdl)) { - if (log_arp_warnings) + if (log_arp_warnings) log(LOG_DEBUG, "arp_rtrequest: bad gateway value\n"); break; } @@ -265,42 +377,44 @@ arp_rtrequest( * Case 2: This route may come from cloning, or a manual route * add with a LL address. */ - R_Malloc(la, struct llinfo_arp *, sizeof(*la)); - rt->rt_llinfo = (caddr_t)la; - if (la == 0) { - if ( log_arp_warnings) - log(LOG_DEBUG, "arp_rtrequest: malloc failed\n"); + rt->rt_llinfo = la = arp_llinfo_alloc(); + if (la == NULL) { + if (log_arp_warnings) + log(LOG_DEBUG, "%s: malloc failed\n", __func__); break; } + rt->rt_llinfo_free = arp_llinfo_free; + arp_inuse++, arp_allocated++; Bzero(la, sizeof(*la)); la->la_rt = rt; rt->rt_flags |= RTF_LLINFO; LIST_INSERT_HEAD(&llinfo_arp, la, la_le); -#if INET /* * This keeps the multicast addresses from showing up * in `arp -a' listings as unresolved. It's not actually * functional. Then the same for broadcast. */ if (IN_MULTICAST(ntohl(SIN(rt_key(rt))->sin_addr.s_addr))) { - dlil_resolve_multi(rt->rt_ifp, rt_key(rt), gate, sizeof(struct sockaddr_dl)); + RT_UNLOCK(rt); + dlil_resolve_multi(rt->rt_ifp, rt_key(rt), gate, + sizeof(struct sockaddr_dl)); + RT_LOCK(rt); rt->rt_expire = 0; } else if (in_broadcast(SIN(rt_key(rt))->sin_addr, rt->rt_ifp)) { struct sockaddr_dl *gate_ll = SDL(gate); size_t broadcast_len; - ifnet_llbroadcast_copy_bytes(rt->rt_ifp, LLADDR(gate_ll), - sizeof(gate_ll->sdl_data), - &broadcast_len); + ifnet_llbroadcast_copy_bytes(rt->rt_ifp, + LLADDR(gate_ll), sizeof(gate_ll->sdl_data), + &broadcast_len); gate_ll->sdl_alen = broadcast_len; gate_ll->sdl_family = AF_LINK; gate_ll->sdl_len = sizeof(struct sockaddr_dl); /* In case we're called before 1.0 sec. has elapsed */ rt->rt_expire = MAX(timenow.tv_sec, 1); } -#endif if (SIN(rt_key(rt))->sin_addr.s_addr == (IA_SIN(rt->rt_ifa))->sin_addr.s_addr) { @@ -326,14 +440,19 @@ arp_rtrequest( if (la == 0) break; arp_inuse--; + /* + * Unchain it but defer the actual freeing until the route + * itself is to be freed. rt->rt_llinfo still points to + * llinfo_arp, and likewise, la->la_rt still points to this + * route entry, except that RTF_LLINFO is now cleared. + */ LIST_REMOVE(la, la_le); - rt->rt_llinfo = NULL; + la->la_le.le_next = NULL; + la->la_le.le_prev = NULL; rt->rt_flags &= ~RTF_LLINFO; - if (la->la_hold) { + if (la->la_hold != NULL) m_freem(la->la_hold); - } la->la_hold = NULL; - R_Free((caddr_t)la); } } @@ -345,7 +464,7 @@ sdl_addr_to_hex(const struct sockaddr_dl *sdl, char * orig_buf, int buflen) { char * buf = orig_buf; int i; - const u_char * lladdr = (u_char *)sdl->sdl_data; + const u_char * lladdr = (u_char *)(size_t)sdl->sdl_data; int maxbytes = buflen / 3; if (maxbytes > sdl->sdl_alen) { @@ -364,73 +483,82 @@ sdl_addr_to_hex(const struct sockaddr_dl *sdl, char * orig_buf, int buflen) /* * arp_lookup_route will lookup the route for a given address. * - * The routing lock must be held. The address must be for a - * host on a local network on this interface. + * The address must be for a host on a local network on this interface. + * If the returned route is non-NULL, the route is locked and the caller + * is responsible for unlocking it and releasing its reference. */ static errno_t -arp_lookup_route( - const struct in_addr *addr, - int create, - int proxy, - route_t *route, - unsigned int ifscope) +arp_lookup_route(const struct in_addr *addr, int create, int proxy, + route_t *route, unsigned int ifscope) { struct sockaddr_inarp sin = {sizeof(sin), AF_INET, 0, {0}, {0}, 0, 0}; const char *why = NULL; errno_t error = 0; - - // Caller is responsible for taking the routing lock - lck_mtx_assert(rt_mtx, LCK_MTX_ASSERT_OWNED); + route_t rt; + + *route = NULL; sin.sin_addr.s_addr = addr->s_addr; sin.sin_other = proxy ? SIN_PROXY : 0; - *route = rtalloc1_scoped_locked((struct sockaddr*)&sin, - create, 0, ifscope); - if (*route == NULL) - return ENETUNREACH; - - rtunref(*route); - - if ((*route)->rt_flags & RTF_GATEWAY) { + rt = rtalloc1_scoped((struct sockaddr*)&sin, create, 0, ifscope); + if (rt == NULL) + return (ENETUNREACH); + + RT_LOCK(rt); + + if (rt->rt_flags & RTF_GATEWAY) { why = "host is not on local network"; - - /* If there are no references to this route, purge it */ - if ((*route)->rt_refcnt <= 0 && ((*route)->rt_flags & RTF_WASCLONED) != 0) { - rtrequest_locked(RTM_DELETE, - (struct sockaddr *)rt_key(*route), - (*route)->rt_gateway, rt_mask(*route), - (*route)->rt_flags, 0); - } - *route = NULL; error = ENETUNREACH; - } - else if (((*route)->rt_flags & RTF_LLINFO) == 0) { + } else if (!(rt->rt_flags & RTF_LLINFO)) { why = "could not allocate llinfo"; - *route = NULL; error = ENOMEM; - } - else if ((*route)->rt_gateway->sa_family != AF_LINK) { + } else if (rt->rt_gateway->sa_family != AF_LINK) { why = "gateway route is not ours"; - *route = NULL; error = EPROTONOSUPPORT; } - - if (why && create && log_arp_warnings) { - char tmp[MAX_IPv4_STR_LEN]; - log(LOG_DEBUG, "arplookup link#%d %s failed: %s\n", ifscope, - inet_ntop(AF_INET, addr, tmp, sizeof(tmp)), why); + + if (error != 0) { + if (create && log_arp_warnings) { + char tmp[MAX_IPv4_STR_LEN]; + log(LOG_DEBUG, "arplookup link#%d %s failed: %s\n", + ifscope, inet_ntop(AF_INET, addr, tmp, + sizeof (tmp)), why); + } + + /* + * If there are no references to this route, and it is + * a cloned route, and not static, and ARP had created + * the route, then purge it from the routing table as + * it is probably bogus. + */ + if (rt->rt_refcnt == 1 && + (rt->rt_flags & (RTF_WASCLONED | RTF_STATIC)) == + RTF_WASCLONED) { + /* + * Prevent another thread from modiying rt_key, + * rt_gateway via rt_setgate() after rt_lock is + * dropped by marking the route as defunct. + */ + rt->rt_flags |= RTF_CONDEMNED; + RT_UNLOCK(rt); + rtrequest(RTM_DELETE, rt_key(rt), rt->rt_gateway, + rt_mask(rt), rt->rt_flags, 0); + rtfree(rt); + } else { + RT_REMREF_LOCKED(rt); + RT_UNLOCK(rt); + } + return (error); } - - return error; -} + /* + * Caller releases reference and does RT_UNLOCK(rt). + */ + *route = rt; + return (0); +} -__private_extern__ errno_t -arp_route_to_gateway_route( - const struct sockaddr *net_dest, - route_t hint, - route_t *out_route); /* * arp_route_to_gateway_route will find the gateway route for a given route. * @@ -438,196 +566,356 @@ arp_route_to_gateway_route( * If the route goes through a gateway, get the route to the gateway. * If the gateway route is down, look it up again. * If the route is set to reject, verify it hasn't expired. + * + * If the returned route is non-NULL, the caller is responsible for + * releasing the reference and unlocking the route. */ +#define senderr(e) { error = (e); goto bad; } __private_extern__ errno_t -arp_route_to_gateway_route( - const struct sockaddr *net_dest, - route_t hint, - route_t *out_route) +arp_route_to_gateway_route(const struct sockaddr *net_dest, route_t hint0, + route_t *out_route) { struct timeval timenow; - route_t route = hint; + route_t rt = hint0, hint = hint0; + errno_t error = 0; + *out_route = NULL; - - /* If we got a hint from the higher layers, check it out */ - if (route) { - lck_mtx_lock(rt_mtx); - - if ((route->rt_flags & RTF_UP) == 0) { + + /* + * Next hop determination. Because we may involve the gateway route + * in addition to the original route, locking is rather complicated. + * The general concept is that regardless of whether the route points + * to the original route or to the gateway route, this routine takes + * an extra reference on such a route. This extra reference will be + * released at the end. + * + * Care must be taken to ensure that the "hint0" route never gets freed + * via rtfree(), since the caller may have stored it inside a struct + * route with a reference held for that placeholder. + */ + if (rt != NULL) { + unsigned int ifindex; + + RT_LOCK_SPIN(rt); + ifindex = rt->rt_ifp->if_index; + RT_ADDREF_LOCKED(rt); + if (!(rt->rt_flags & RTF_UP)) { + RT_REMREF_LOCKED(rt); + RT_UNLOCK(rt); /* route is down, find a new one */ - hint = route = rtalloc1_scoped_locked(net_dest, - 1, 0, route->rt_ifp->if_index); - if (hint) { - rtunref(hint); - } - else { - /* No route to host */ - lck_mtx_unlock(rt_mtx); - return EHOSTUNREACH; + hint = rt = rtalloc1_scoped((struct sockaddr *) + (size_t)net_dest, 1, 0, ifindex); + if (hint != NULL) { + RT_LOCK_SPIN(rt); + ifindex = rt->rt_ifp->if_index; + } else { + senderr(EHOSTUNREACH); } } - - if (route->rt_flags & RTF_GATEWAY) { + + /* + * We have a reference to "rt" by now; it will either + * be released or freed at the end of this routine. + */ + RT_LOCK_ASSERT_HELD(rt); + if (rt->rt_flags & RTF_GATEWAY) { + struct rtentry *gwrt = rt->rt_gwroute; + struct sockaddr_in gw; + + /* If there's no gateway rt, look it up */ + if (gwrt == NULL) { + gw = *((struct sockaddr_in *)rt->rt_gateway); + RT_UNLOCK(rt); + goto lookup; + } + /* Become a regular mutex */ + RT_CONVERT_LOCK(rt); + /* - * We need the gateway route. If it is NULL or down, - * look it up. + * Take gwrt's lock while holding route's lock; + * this is okay since gwrt never points back + * to "rt", so no lock ordering issues. */ - if (route->rt_gwroute == 0 || - (route->rt_gwroute->rt_flags & RTF_UP) == 0) { - if (route->rt_gwroute != 0) - rtfree_locked(route->rt_gwroute); - - route->rt_gwroute = rtalloc1_scoped_locked( - route->rt_gateway, 1, 0, - route->rt_ifp->if_index); - if (route->rt_gwroute == 0) { - lck_mtx_unlock(rt_mtx); - return EHOSTUNREACH; + RT_LOCK_SPIN(gwrt); + if (!(gwrt->rt_flags & RTF_UP)) { + struct rtentry *ogwrt; + + rt->rt_gwroute = NULL; + RT_UNLOCK(gwrt); + gw = *((struct sockaddr_in *)rt->rt_gateway); + RT_UNLOCK(rt); + rtfree(gwrt); +lookup: + gwrt = rtalloc1_scoped( + (struct sockaddr *)&gw, 1, 0, ifindex); + + RT_LOCK(rt); + /* + * Bail out if the route is down, no route + * to gateway, circular route, or if the + * gateway portion of "rt" has changed. + */ + if (!(rt->rt_flags & RTF_UP) || + gwrt == NULL || gwrt == rt || + !equal(SA(&gw), rt->rt_gateway)) { + if (gwrt == rt) { + RT_REMREF_LOCKED(gwrt); + gwrt = NULL; + } + RT_UNLOCK(rt); + if (gwrt != NULL) + rtfree(gwrt); + senderr(EHOSTUNREACH); } + + /* Remove any existing gwrt */ + ogwrt = rt->rt_gwroute; + if ((rt->rt_gwroute = gwrt) != NULL) + RT_ADDREF(gwrt); + + /* Clean up "rt" now while we can */ + if (rt == hint0) { + RT_REMREF_LOCKED(rt); + RT_UNLOCK(rt); + } else { + RT_UNLOCK(rt); + rtfree(rt); + } + rt = gwrt; + /* Now free the replaced gwrt */ + if (ogwrt != NULL) + rtfree(ogwrt); + /* If still no route to gateway, bail out */ + if (rt == NULL) + senderr(EHOSTUNREACH); + } else { + RT_ADDREF_LOCKED(gwrt); + RT_UNLOCK(gwrt); + /* Clean up "rt" now while we can */ + if (rt == hint0) { + RT_REMREF_LOCKED(rt); + RT_UNLOCK(rt); + } else { + RT_UNLOCK(rt); + rtfree(rt); + } + rt = gwrt; + } + + /* rt == gwrt; if it is now down, give up */ + RT_LOCK_SPIN(rt); + if (!(rt->rt_flags & RTF_UP)) { + RT_UNLOCK(rt); + senderr(EHOSTUNREACH); } - - route = route->rt_gwroute; } - - if (route->rt_flags & RTF_REJECT) { + + if (rt->rt_flags & RTF_REJECT) { getmicrotime(&timenow); - if (route->rt_rmx.rmx_expire == 0 || - timenow.tv_sec < route->rt_rmx.rmx_expire) { - lck_mtx_unlock(rt_mtx); - return route == hint ? EHOSTDOWN : EHOSTUNREACH; + if (rt->rt_rmx.rmx_expire == 0 || + timenow.tv_sec < rt->rt_rmx.rmx_expire) { + RT_UNLOCK(rt); + senderr(rt == hint ? EHOSTDOWN : EHOSTUNREACH); } } - - lck_mtx_unlock(rt_mtx); + + /* Become a regular mutex */ + RT_CONVERT_LOCK(rt); + + /* Caller is responsible for cleaning up "rt" */ + *out_route = rt; } - - *out_route = route; - return 0; + return (0); + +bad: + /* Clean up route (either it is "rt" or "gwrt") */ + if (rt != NULL) { + RT_LOCK_SPIN(rt); + if (rt == hint0) { + RT_REMREF_LOCKED(rt); + RT_UNLOCK(rt); + } else { + RT_UNLOCK(rt); + rtfree(rt); + } + } + return (error); } +#undef senderr +/* + * This is the ARP pre-output routine; care must be taken to ensure that + * the "hint" route never gets freed via rtfree(), since the caller may + * have stored it inside a struct route with a reference held for that + * placeholder. + */ errno_t -arp_lookup_ip( - ifnet_t ifp, - const struct sockaddr_in *net_dest, - struct sockaddr_dl *ll_dest, - size_t ll_dest_len, - route_t hint, - mbuf_t packet) +arp_lookup_ip(ifnet_t ifp, const struct sockaddr_in *net_dest, + struct sockaddr_dl *ll_dest, size_t ll_dest_len, route_t hint, + mbuf_t packet) { - route_t route = NULL; + route_t route = NULL; /* output route */ errno_t result = 0; struct sockaddr_dl *gateway; struct llinfo_arp *llinfo; struct timeval timenow; - + if (net_dest->sin_family != AF_INET) - return EAFNOSUPPORT; - + return (EAFNOSUPPORT); + if ((ifp->if_flags & (IFF_UP|IFF_RUNNING)) != (IFF_UP|IFF_RUNNING)) - return ENETDOWN; - + return (ENETDOWN); + /* * If we were given a route, verify the route and grab the gateway */ - if (hint) { - result = arp_route_to_gateway_route((const struct sockaddr*)net_dest, - hint, &route); + if (hint != NULL) { + /* + * Callee holds a reference on the route and returns + * with the route entry locked, upon success. + */ + result = arp_route_to_gateway_route((const struct sockaddr*) + net_dest, hint, &route); if (result != 0) - return result; + return (result); + if (route != NULL) + RT_LOCK_ASSERT_HELD(route); } - + if (packet->m_flags & M_BCAST) { - u_long broadcast_len; + size_t broadcast_len; bzero(ll_dest, ll_dest_len); - result = ifnet_llbroadcast_copy_bytes(ifp, LLADDR(ll_dest), ll_dest_len - - offsetof(struct sockaddr_dl, - sdl_data), &broadcast_len); - if (result != 0) { - return result; + result = ifnet_llbroadcast_copy_bytes(ifp, LLADDR(ll_dest), + ll_dest_len - offsetof(struct sockaddr_dl, sdl_data), + &broadcast_len); + if (result == 0) { + ll_dest->sdl_alen = broadcast_len; + ll_dest->sdl_family = AF_LINK; + ll_dest->sdl_len = sizeof(struct sockaddr_dl); } - - ll_dest->sdl_alen = broadcast_len; - ll_dest->sdl_family = AF_LINK; - ll_dest->sdl_len = sizeof(struct sockaddr_dl); - - return 0; + goto release; } if (packet->m_flags & M_MCAST) { - return dlil_resolve_multi(ifp, (const struct sockaddr*)net_dest, - (struct sockaddr*)ll_dest, ll_dest_len); + if (route != NULL) + RT_UNLOCK(route); + result = dlil_resolve_multi(ifp, + (const struct sockaddr*)net_dest, + (struct sockaddr*)ll_dest, ll_dest_len); + if (route != NULL) + RT_LOCK(route); + goto release; } - - lck_mtx_lock(rt_mtx); - + /* * If we didn't find a route, or the route doesn't have * link layer information, trigger the creation of the * route and link layer information. */ - if (route == NULL || route->rt_llinfo == NULL) + if (route == NULL || route->rt_llinfo == NULL) { + /* Clean up now while we can */ + if (route != NULL) { + if (route == hint) { + RT_REMREF_LOCKED(route); + RT_UNLOCK(route); + } else { + RT_UNLOCK(route); + rtfree(route); + } + } + /* + * Callee holds a reference on the route and returns + * with the route entry locked, upon success. + */ result = arp_lookup_route(&net_dest->sin_addr, 1, 0, &route, ifp->if_index); - + if (result == 0) + RT_LOCK_ASSERT_HELD(route); + } + if (result || route == NULL || route->rt_llinfo == NULL) { char tmp[MAX_IPv4_STR_LEN]; - lck_mtx_unlock(rt_mtx); - if (log_arp_warnings) - log(LOG_DEBUG, "arpresolve: can't allocate llinfo for %s\n", - inet_ntop(AF_INET, &net_dest->sin_addr, tmp, sizeof(tmp))); - return result; + + /* In case result is 0 but no route, return an error */ + if (result == 0) + result = EHOSTUNREACH; + + if (log_arp_warnings && + route != NULL && route->rt_llinfo == NULL) + log(LOG_DEBUG, "arpresolve: can't allocate llinfo " + "for %s\n", inet_ntop(AF_INET, &net_dest->sin_addr, + tmp, sizeof(tmp))); + goto release; } - + /* * Now that we have the right route, is it filled in? */ gateway = SDL(route->rt_gateway); getmicrotime(&timenow); - if ((route->rt_rmx.rmx_expire == 0 || route->rt_rmx.rmx_expire > timenow.tv_sec) && - gateway != NULL && gateway->sdl_family == AF_LINK && gateway->sdl_alen != 0) { + if ((route->rt_rmx.rmx_expire == 0 || + route->rt_rmx.rmx_expire > timenow.tv_sec) && gateway != NULL && + gateway->sdl_family == AF_LINK && gateway->sdl_alen != 0) { bcopy(gateway, ll_dest, MIN(gateway->sdl_len, ll_dest_len)); - lck_mtx_unlock(rt_mtx); - return 0; + result = 0; + goto release; } - + + if (ifp->if_flags & IFF_NOARP) { + result = ENOTSUP; + goto release; + } + /* * Route wasn't complete/valid. We need to arp. */ - if (ifp->if_flags & IFF_NOARP) { - lck_mtx_unlock(rt_mtx); - return ENOTSUP; - } - - llinfo = (struct llinfo_arp*)route->rt_llinfo; - if (packet) { - if (llinfo->la_hold) { + llinfo = route->rt_llinfo; + if (packet != NULL) { + if (llinfo->la_hold != NULL) m_freem(llinfo->la_hold); - } llinfo->la_hold = packet; } - + if (route->rt_rmx.rmx_expire) { route->rt_flags &= ~RTF_REJECT; - if (llinfo->la_asked == 0 || route->rt_rmx.rmx_expire != timenow.tv_sec) { + if (llinfo->la_asked == 0 || + route->rt_rmx.rmx_expire != timenow.tv_sec) { route->rt_rmx.rmx_expire = timenow.tv_sec; if (llinfo->la_asked++ < arp_maxtries) { - lck_mtx_unlock(rt_mtx); - dlil_send_arp(ifp, ARPOP_REQUEST, NULL, route->rt_ifa->ifa_addr, - NULL, (const struct sockaddr*)net_dest); - return EJUSTRETURN; - } - else { + struct ifaddr *rt_ifa = route->rt_ifa; + ifaref(rt_ifa); + RT_UNLOCK(route); + dlil_send_arp(ifp, ARPOP_REQUEST, NULL, + rt_ifa->ifa_addr, NULL, + (const struct sockaddr*)net_dest); + ifafree(rt_ifa); + RT_LOCK(route); + result = EJUSTRETURN; + goto release; + } else { route->rt_flags |= RTF_REJECT; route->rt_rmx.rmx_expire += arpt_down; llinfo->la_asked = 0; llinfo->la_hold = NULL; - lck_mtx_unlock(rt_mtx); - return EHOSTUNREACH; + result = EHOSTUNREACH; + goto release; } } } - lck_mtx_unlock(rt_mtx); - - return EJUSTRETURN; + + /* The packet is now held inside la_hold (can "packet" be NULL?) */ + result = EJUSTRETURN; + +release: + if (route != NULL) { + if (route == hint) { + RT_REMREF_LOCKED(route); + RT_UNLOCK(route); + } else { + RT_UNLOCK(route); + rtfree(route); + } + } + return (result); } errno_t @@ -639,7 +927,9 @@ arp_ip_handle_input( const struct sockaddr_in *target_ip) { char ipv4str[MAX_IPv4_STR_LEN]; - struct sockaddr_dl *gateway; + struct sockaddr_dl proxied; + struct sockaddr_dl *gateway, *target_hw = NULL; + struct ifaddr *ifa; struct in_ifaddr *ia; struct in_ifaddr *best_ia = NULL; route_t route = NULL; @@ -647,39 +937,63 @@ arp_ip_handle_input( struct llinfo_arp *llinfo; errno_t error; int created_announcement = 0; - + /* Do not respond to requests for 0.0.0.0 */ - if (target_ip->sin_addr.s_addr == 0 && arpop == ARPOP_REQUEST) { - return 0; - } - + if (target_ip->sin_addr.s_addr == 0 && arpop == ARPOP_REQUEST) + goto done; + /* * Determine if this ARP is for us */ - lck_mtx_lock(rt_mtx); - for (ia = in_ifaddrhead.tqh_first; ia; ia = ia->ia_link.tqe_next) { + lck_rw_lock_shared(in_ifaddr_rwlock); + TAILQ_FOREACH(ia, INADDR_HASH(target_ip->sin_addr.s_addr), ia_hash) { /* do_bridge should be tested here for bridging */ - if (ia->ia_ifp == ifp) { + if (ia->ia_ifp == ifp && + ia->ia_addr.sin_addr.s_addr == target_ip->sin_addr.s_addr) { best_ia = ia; - if (target_ip->sin_addr.s_addr == ia->ia_addr.sin_addr.s_addr || - sender_ip->sin_addr.s_addr == ia->ia_addr.sin_addr.s_addr) { - break; - } + ifaref(&best_ia->ia_ifa); + lck_rw_done(in_ifaddr_rwlock); + goto match; } } - - /* If we don't have an IP address on this interface, ignore the packet */ - if (best_ia == 0) { - lck_mtx_unlock(rt_mtx); - return 0; + + TAILQ_FOREACH(ia, INADDR_HASH(sender_ip->sin_addr.s_addr), ia_hash) { + /* do_bridge should be tested here for bridging */ + if (ia->ia_ifp == ifp && + ia->ia_addr.sin_addr.s_addr == sender_ip->sin_addr.s_addr) { + best_ia = ia; + ifaref(&best_ia->ia_ifa); + lck_rw_done(in_ifaddr_rwlock); + goto match; + } } - + lck_rw_done(in_ifaddr_rwlock); + + /* + * No match, use the first inet address on the receive interface + * as a dummy address for the rest of the function; we may be + * proxying for another address. + */ + ifnet_lock_shared(ifp); + TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) { + if (ifa->ifa_addr->sa_family != AF_INET) + continue; + best_ia = (struct in_ifaddr *)ifa; + ifaref(&best_ia->ia_ifa); + break; + } + ifnet_lock_done(ifp); + + /* If we don't have an IP address on this interface, ignore the packet */ + if (best_ia == NULL) + goto done; + +match: /* If the packet is from this interface, ignore the packet */ if (!bcmp(CONST_LLADDR(sender_hw), ifnet_lladdr(ifp), sender_hw->sdl_len)) { - lck_mtx_unlock(rt_mtx); - return 0; + goto done; } - + /* Check for a conflict */ if (sender_ip->sin_addr.s_addr == best_ia->ia_addr.sin_addr.s_addr) { struct kev_msg ev_msg; @@ -690,7 +1004,7 @@ arp_ip_handle_input( ifp->if_name, ifp->if_unit, inet_ntop(AF_INET, &sender_ip->sin_addr, ipv4str, sizeof(ipv4str)), sdl_addr_to_hex(sender_hw, buf, sizeof(buf))); - + /* Send a kernel event so anyone can learn of the conflict */ in_collision->link_data.if_family = ifp->if_family; in_collision->link_data.if_unit = ifp->if_unit; @@ -706,17 +1020,23 @@ arp_ip_handle_input( ev_msg.dv[0].data_length = sizeof(struct kev_in_collision) + in_collision->hw_len; ev_msg.dv[1].data_length = 0; kev_post_msg(&ev_msg); - + goto respond; } - + /* * Look up the routing entry. If it doesn't exist and we are the * target, and the sender isn't 0.0.0.0, go ahead and create one. + * Callee holds a reference on the route and returns with the route + * entry locked, upon success. */ error = arp_lookup_route(&sender_ip->sin_addr, (target_ip->sin_addr.s_addr == best_ia->ia_addr.sin_addr.s_addr && sender_ip->sin_addr.s_addr != 0), 0, &route, ifp->if_index); + + if (error == 0) + RT_LOCK_ASSERT_HELD(route); + if (error || route == 0 || route->rt_gateway == 0) { if (arpop != ARPOP_REQUEST) { goto respond; @@ -730,8 +1050,21 @@ arp_ip_handle_input( * Verify this ARP probe doesn't conflict with an IPv4LL we know of * on another interface. */ + if (route != NULL) { + RT_REMREF_LOCKED(route); + RT_UNLOCK(route); + route = NULL; + } + /* + * Callee holds a reference on the route and returns + * with the route entry locked, upon success. + */ error = arp_lookup_route(&target_ip->sin_addr, 0, 0, &route, ifp->if_index); + + if (error == 0) + RT_LOCK_ASSERT_HELD(route); + if (error == 0 && route && route->rt_gateway) { gateway = SDL(route->rt_gateway); if (route->rt_ifp != ifp && gateway->sdl_alen != 0 @@ -757,6 +1090,10 @@ arp_ip_handle_input( sdl_addr_to_hex(sender_hw, buf, sizeof(buf)), ifp->if_name, ifp->if_unit); } + /* We're done with the route */ + RT_REMREF_LOCKED(route); + RT_UNLOCK(route); + route = NULL; /* * Send a conservative unicast "ARP probe". * This should force the other device to pick a new number. @@ -764,10 +1101,19 @@ arp_ip_handle_input( * has already assigned that number. * This will not imply to the device that we own that address. */ + ifnet_lock_shared(ifp); + ifa = TAILQ_FIRST(&ifp->if_addrhead); + if (ifa != NULL) + ifaref(ifa); + ifnet_lock_done(ifp); dlil_send_arp_internal(ifp, ARPOP_REQUEST, - (struct sockaddr_dl*)TAILQ_FIRST(&ifp->if_addrhead)->ifa_addr, + ifa != NULL ? SDL(ifa->ifa_addr) : NULL, (const struct sockaddr*)sender_ip, sender_hw, (const struct sockaddr*)target_ip); + if (ifa != NULL) { + ifafree(ifa); + ifa = NULL; + } } } goto respond; @@ -776,8 +1122,22 @@ arp_ip_handle_input( /* don't create entry if link-local address and link-local is disabled */ if (!IN_LINKLOCAL(ntohl(sender_ip->sin_addr.s_addr)) || (ifp->if_eflags & IFEF_ARPLL) != 0) { + if (route != NULL) { + RT_REMREF_LOCKED(route); + RT_UNLOCK(route); + route = NULL; + } + /* + * Callee holds a reference on the route and + * returns with the route entry locked, upon + * success. + */ error = arp_lookup_route(&sender_ip->sin_addr, 1, 0, &route, ifp->if_index); + + if (error == 0) + RT_LOCK_ASSERT_HELD(route); + if (error == 0 && route != NULL && route->rt_gateway != NULL) { created_announcement = 1; } @@ -789,7 +1149,8 @@ arp_ip_handle_input( goto respond; } } - + + RT_LOCK_ASSERT_HELD(route); gateway = SDL(route->rt_gateway); if (route->rt_ifp != ifp) { if (!IN_LINKLOCAL(ntohl(sender_ip->sin_addr.s_addr)) || (ifp->if_eflags & IFEF_ARPLL) == 0) { @@ -808,23 +1169,50 @@ arp_ip_handle_input( if (route->rt_rmx.rmx_expire == 0) { goto respond; } - + + /* + * We're about to check and/or change the route's ifp + * and ifa, so do the lock dance: drop rt_lock, hold + * rnh_lock and re-hold rt_lock to avoid violating the + * lock ordering. We have an extra reference on the + * route, so it won't go away while we do this. + */ + RT_UNLOCK(route); + lck_mtx_lock(rnh_lock); + RT_LOCK(route); /* - * Don't change the cloned route away from the parent's interface - * if the address did resolve. + * Don't change the cloned route away from the + * parent's interface if the address did resolve + * or if the route is defunct. rt_ifp on both + * the parent and the clone can now be freely + * accessed now that we have acquired rnh_lock. */ - if (gateway->sdl_alen != 0 && route->rt_parent && - route->rt_parent->rt_ifp == route->rt_ifp) { + gateway = SDL(route->rt_gateway); + if ((gateway->sdl_alen != 0 && route->rt_parent && + route->rt_parent->rt_ifp == route->rt_ifp) || + (route->rt_flags & RTF_CONDEMNED)) { + RT_REMREF_LOCKED(route); + RT_UNLOCK(route); + route = NULL; + lck_mtx_unlock(rnh_lock); goto respond; } - /* Change the interface when the existing route is on */ route->rt_ifp = ifp; rtsetifa(route, &best_ia->ia_ifa); gateway->sdl_index = ifp->if_index; + RT_UNLOCK(route); + lck_mtx_unlock(rnh_lock); + RT_LOCK(route); + /* Don't bother if the route is down */ + if (!(route->rt_flags & RTF_UP)) + goto respond; + /* Refresh gateway pointer */ + gateway = SDL(route->rt_gateway); } + RT_LOCK_ASSERT_HELD(route); } - + if (gateway->sdl_alen && bcmp(LLADDR(gateway), CONST_LLADDR(sender_hw), gateway->sdl_alen)) { if (route->rt_rmx.rmx_expire && log_arp_warnings) { char buf2[3 * MAX_HW_LEN]; @@ -848,11 +1236,11 @@ arp_ip_handle_input( goto respond; } } - + /* Copy the sender hardware address in to the route's gateway address */ gateway->sdl_alen = sender_hw->sdl_alen; bcopy(CONST_LLADDR(sender_hw), LLADDR(gateway), gateway->sdl_alen); - + /* Update the expire time for the route and clear the reject flag */ if (route->rt_rmx.rmx_expire) { struct timeval timenow; @@ -861,66 +1249,84 @@ arp_ip_handle_input( route->rt_rmx.rmx_expire = timenow.tv_sec + arpt_keep; } route->rt_flags &= ~RTF_REJECT; - + /* update the llinfo, send a queued packet if there is one */ - llinfo = (struct llinfo_arp*)route->rt_llinfo; + llinfo = route->rt_llinfo; llinfo->la_asked = 0; if (llinfo->la_hold) { struct mbuf *m0; m0 = llinfo->la_hold; llinfo->la_hold = 0; - - /* Should we a reference on the route first? */ - lck_mtx_unlock(rt_mtx); + + RT_UNLOCK(route); dlil_output(ifp, PF_INET, m0, (caddr_t)route, rt_key(route), 0); - lck_mtx_lock(rt_mtx); + RT_REMREF(route); + route = NULL; } - + respond: - if (arpop != ARPOP_REQUEST) { - lck_mtx_unlock(rt_mtx); - return 0; + if (route != NULL) { + RT_REMREF_LOCKED(route); + RT_UNLOCK(route); + route = NULL; } - + + if (arpop != ARPOP_REQUEST) + goto done; + /* If we are not the target, check if we should proxy */ if (target_ip->sin_addr.s_addr != best_ia->ia_addr.sin_addr.s_addr) { - - /* Find a proxy route */ + /* + * Find a proxy route; callee holds a reference on the + * route and returns with the route entry locked, upon + * success. + */ error = arp_lookup_route(&target_ip->sin_addr, 0, SIN_PROXY, &route, ifp->if_index); - if (error || route == NULL) { - - /* We don't have a route entry indicating we should use proxy */ - /* If we aren't supposed to proxy all, we are done */ - if (!arp_proxyall) { - lck_mtx_unlock(rt_mtx); - return 0; - } - - /* See if we have a route to the target ip before we proxy it */ - route = rtalloc1_scoped_locked( - (const struct sockaddr *)target_ip, 0, 0, - ifp->if_index); - if (!route) { - lck_mtx_unlock(rt_mtx); - return 0; - } - + + if (error == 0) { + RT_LOCK_ASSERT_HELD(route); + proxied = *SDL(route->rt_gateway); + target_hw = &proxied; + } else { + /* + * We don't have a route entry indicating we should + * use proxy. If we aren't supposed to proxy all, + * we are done. + */ + if (!arp_proxyall) + goto done; + + /* + * See if we have a route to the target ip before + * we proxy it. + */ + route = rtalloc1_scoped((struct sockaddr *) + (size_t)target_ip, 0, 0, ifp->if_index); + if (!route) + goto done; + /* * Don't proxy for hosts already on the same interface. */ + RT_LOCK(route); if (route->rt_ifp == ifp) { - rtfree_locked(route); - lck_mtx_unlock(rt_mtx); - return 0; + RT_UNLOCK(route); + rtfree(route); + goto done; } } + RT_REMREF_LOCKED(route); + RT_UNLOCK(route); } - lck_mtx_unlock(rt_mtx); - - dlil_send_arp(ifp, ARPOP_REPLY, NULL, (const struct sockaddr*)target_ip, - sender_hw, (const struct sockaddr*)sender_ip); - + + dlil_send_arp(ifp, ARPOP_REPLY, + target_hw, (const struct sockaddr*)target_ip, + sender_hw, (const struct sockaddr*)sender_ip); + +done: + if (best_ia != NULL) + ifafree(&best_ia->ia_ifa); return 0; } diff --git a/bsd/netinet/in_arp.h b/bsd/netinet/in_arp.h index 08b656637..0bc3d8bce 100644 --- a/bsd/netinet/in_arp.h +++ b/bsd/netinet/in_arp.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2004 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2008 Apple Computer, Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -30,6 +30,7 @@ #define _NETINET_IN_ARP_H_ #include +struct sockaddr; struct sockaddr_dl; struct sockaddr_in; @@ -61,15 +62,18 @@ struct sockaddr_in; #ifdef BSD_KERNEL_PRIVATE #define inet_arp_lookup arp_lookup_ip #else -errno_t inet_arp_lookup(ifnet_t interface, const struct sockaddr_in *ip_dest, - struct sockaddr_dl *ll_dest, size_t ll_dest_len, route_t hint, - mbuf_t packet); +extern errno_t inet_arp_lookup(ifnet_t interface, + const struct sockaddr_in *ip_dest, struct sockaddr_dl *ll_dest, + size_t ll_dest_len, route_t hint, mbuf_t packet); #endif /* BSD_KERNEL_PRIVATE */ #ifdef KERNEL_PRIVATE +extern void arp_init(void); /* arp_lookup_ip is obsolete, use inet_arp_lookup */ -errno_t arp_lookup_ip(ifnet_t interface, const struct sockaddr_in *ip_dest, - struct sockaddr_dl *ll_dest, size_t ll_dest_len, route_t hint, - mbuf_t packet); +extern errno_t arp_lookup_ip(ifnet_t interface, + const struct sockaddr_in *ip_dest, struct sockaddr_dl *ll_dest, + size_t ll_dest_len, route_t hint, mbuf_t packet); +__private_extern__ errno_t arp_route_to_gateway_route(const struct sockaddr *, + route_t, route_t *); #endif /* KERNEL_PRIVATE */ /*! @@ -92,14 +96,14 @@ errno_t arp_lookup_ip(ifnet_t interface, const struct sockaddr_in *ip_dest, #ifdef BSD_KERNEL_PRIVATE #define inet_arp_handle_input arp_ip_handle_input #else -errno_t inet_arp_handle_input(ifnet_t ifp, u_int16_t arpop, +extern errno_t inet_arp_handle_input(ifnet_t ifp, u_int16_t arpop, const struct sockaddr_dl *sender_hw, const struct sockaddr_in *sender_ip, const struct sockaddr_in *target_ip); #endif /* KERNEL_PRIVATE */ #ifdef KERNEL_PRIVATE /* arp_ip_handle_input is obsolete, use inet_arp_handle_input */ -errno_t arp_ip_handle_input(ifnet_t ifp, u_int16_t arpop, +extern errno_t arp_ip_handle_input(ifnet_t ifp, u_int16_t arpop, const struct sockaddr_dl *sender_hw, const struct sockaddr_in *sender_ip, const struct sockaddr_in *target_ip); @@ -131,7 +135,7 @@ errno_t arp_ip_handle_input(ifnet_t ifp, u_int16_t arpop, /* inet_arp_init_ifaddr is aliased to arp_ifinit */ #define inet_arp_init_ifaddr arp_ifinit #else -void inet_arp_init_ifaddr(ifnet_t interface, ifaddr_t ipaddr); +extern void inet_arp_init_ifaddr(ifnet_t interface, ifaddr_t ipaddr); #endif -#endif _NETINET_IN_ARP_H_ +#endif /* _NETINET_IN_ARP_H_ */ diff --git a/bsd/netinet/in_cksum.c b/bsd/netinet/in_cksum.c index c9fc56d86..cf3e3dbca 100644 --- a/bsd/netinet/in_cksum.c +++ b/bsd/netinet/in_cksum.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2006 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2009 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -66,6 +66,7 @@ #include #include #include +#include #define DBG_FNC_IN_CKSUM NETDBG_CODE(DBG_NETIP, (3 << 8)) @@ -117,10 +118,6 @@ inet_cksum_simple(struct mbuf *m, int len) return (inet_cksum(m, 0, 0, len)); } -#if defined(__ppc__) - -extern u_short xsum_assym(u_short *p, int len, u_short xsum, int odd); - inline u_short in_addword(u_short a, u_short b) { @@ -144,6 +141,10 @@ in_pseudo(u_int a, u_int b, u_int c) } +#if defined(__ppc__) + +extern u_short xsum_assym(u_short *p, int len, u_short xsum, int odd); + u_int16_t inet_cksum(struct mbuf *m, unsigned int nxt, unsigned int skip, unsigned int len) @@ -210,30 +211,39 @@ inet_cksum(struct mbuf *m, unsigned int nxt, unsigned int skip, return (~sum & 0xffff); } -#else +#elif defined(__arm__) && __ARM_ARCH__ >= 6 -inline u_short -in_addword(u_short a, u_short b) +extern int cpu_in_cksum(struct mbuf *m, int len, int off, uint32_t initial_sum); + +u_int16_t +inet_cksum(struct mbuf *m, unsigned int nxt, unsigned int skip, + unsigned int len) { - union l_util l_util; - u_int32_t sum = a + b; + u_int32_t sum = 0; - REDUCE(sum); - return (sum); -} + /* sanity check */ + if ((m->m_flags & M_PKTHDR) && m->m_pkthdr.len < skip + len) { + panic("inet_cksum: mbuf len (%d) < off+len (%d+%d)\n", + m->m_pkthdr.len, skip, len); + } -inline u_short -in_pseudo(u_int a, u_int b, u_int c) -{ - u_int64_t sum; - union q_util q_util; - union l_util l_util; + /* include pseudo header checksum? */ + if (nxt != 0) { + struct ip *iph; - sum = (u_int64_t) a + b + c; - REDUCE16; - return (sum); + if (m->m_len < sizeof (struct ip)) + panic("inet_cksum: bad mbuf chain"); + + iph = mtod(m, struct ip *); + sum = in_pseudo(iph->ip_src.s_addr, iph->ip_dst.s_addr, + htonl(len + nxt)); + } + + return (cpu_in_cksum(m, len, skip, sum)); } +#else + u_int16_t inet_cksum(struct mbuf *m, unsigned int nxt, unsigned int skip, unsigned int len) @@ -306,7 +316,7 @@ inet_cksum(struct mbuf *m, unsigned int nxt, unsigned int skip, /* * Force to even boundary. */ - if ((1 & (int) w) && (mlen > 0)) { + if ((1 & (uintptr_t) w) && (mlen > 0)) { REDUCE; sum <<= 8; s_util.c[0] = *(u_char *)w; diff --git a/bsd/netinet/in_gif.c b/bsd/netinet/in_gif.c index 787c688ce..482aef5e4 100644 --- a/bsd/netinet/in_gif.c +++ b/bsd/netinet/in_gif.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2007 Apple Inc. All rights reserved. + * Copyright (c) 2000-2008 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -93,8 +93,6 @@ #include -extern u_long route_generation; - int ip_gif_ttl = GIF_TTL; SYSCTL_INT(_net_inet_ip, IPCTL_GIF_TTL, gifttl, CTLFLAG_RW, &ip_gif_ttl, 0, ""); @@ -196,7 +194,7 @@ in_gif_output( (sc->gif_ro.ro_rt != NULL && (sc->gif_ro.ro_rt->generation_id != route_generation || sc->gif_ro.ro_rt->rt_ifp == ifp))) { - /* cache route doesn't match */ + /* cache route doesn't match or recursive route */ dst->sin_family = sin_dst->sin_family; dst->sin_len = sizeof(struct sockaddr_in); dst->sin_addr = sin_dst->sin_addr; @@ -217,7 +215,9 @@ in_gif_output( } /* if it constitutes infinite encapsulation, punt. */ + RT_LOCK(sc->gif_ro.ro_rt); if (sc->gif_ro.ro_rt->rt_ifp == ifp) { + RT_UNLOCK(sc->gif_ro.ro_rt); m_freem(m); return ENETUNREACH; /*XXX*/ } @@ -225,6 +225,7 @@ in_gif_output( ifp->if_mtu = sc->gif_ro.ro_rt->rt_ifp->if_mtu - sizeof(struct ip); #endif + RT_UNLOCK(sc->gif_ro.ro_rt); } error = ip_output(m, NULL, &sc->gif_ro, IP_OUTARGS, NULL, &ipoa); @@ -249,7 +250,7 @@ in_gif_input(m, off) if (gifp == NULL || (gifp->if_flags & IFF_UP) == 0) { m_freem(m); - OSAddAtomic(1, (SInt32*)&ipstat.ips_nogif); + OSAddAtomic(1, &ipstat.ips_nogif); return; } @@ -297,7 +298,7 @@ in_gif_input(m, off) } #endif /* INET6 */ default: - OSAddAtomic(1, (SInt32*)&ipstat.ips_nogif); + OSAddAtomic(1, &ipstat.ips_nogif); m_freem(m); return; } @@ -345,7 +346,7 @@ gif_encapcheck4( src = (struct sockaddr_in *)sc->gif_psrc; dst = (struct sockaddr_in *)sc->gif_pdst; - mbuf_copydata(m, 0, sizeof(ip), &ip); + mbuf_copydata((struct mbuf *)(size_t)m, 0, sizeof(ip), &ip); /* check for address match */ addrmatch = 0; @@ -364,18 +365,18 @@ gif_encapcheck4( return 0; } /* reject packets with broadcast on source */ - lck_mtx_lock(rt_mtx); + lck_rw_lock_shared(in_ifaddr_rwlock); for (ia4 = TAILQ_FIRST(&in_ifaddrhead); ia4; ia4 = TAILQ_NEXT(ia4, ia_link)) { if ((ifnet_flags(ia4->ia_ifa.ifa_ifp) & IFF_BROADCAST) == 0) continue; if (ip.ip_src.s_addr == ia4->ia_broadaddr.sin_addr.s_addr) { - lck_mtx_unlock(rt_mtx); + lck_rw_done(in_ifaddr_rwlock); return 0; } } - lck_mtx_unlock(rt_mtx); + lck_rw_done(in_ifaddr_rwlock); /* ingress filters on outer source */ if ((ifnet_flags(sc->gif_if) & IFF_LINK2) == 0 && @@ -387,20 +388,23 @@ gif_encapcheck4( sin.sin_family = AF_INET; sin.sin_len = sizeof(struct sockaddr_in); sin.sin_addr = ip.ip_src; - lck_mtx_lock(rt_mtx); - rt = rtalloc1_scoped_locked((struct sockaddr *)&sin, 0, 0, + rt = rtalloc1_scoped((struct sockaddr *)&sin, 0, 0, m->m_pkthdr.rcvif->if_index); - lck_mtx_unlock(rt_mtx); - if (!rt || rt->rt_ifp != m->m_pkthdr.rcvif) { + if (rt != NULL) + RT_LOCK(rt); + if (rt == NULL || rt->rt_ifp != m->m_pkthdr.rcvif) { #if 0 log(LOG_WARNING, "%s: packet from 0x%x dropped " "due to ingress filter\n", if_name(&sc->gif_if), (u_int32_t)ntohl(sin.sin_addr.s_addr)); #endif - if (rt) + if (rt != NULL) { + RT_UNLOCK(rt); rtfree(rt); + } return 0; } + RT_UNLOCK(rt); rtfree(rt); } diff --git a/bsd/netinet/in_gif.h b/bsd/netinet/in_gif.h index 0fb9240b1..c2dfdda44 100644 --- a/bsd/netinet/in_gif.h +++ b/bsd/netinet/in_gif.h @@ -70,6 +70,6 @@ void in_gif_input(struct mbuf *, int); int in_gif_output(struct ifnet *, int, struct mbuf *, struct rtentry *); int gif_encapcheck4(const struct mbuf *, int, int, void *); -#endif KERNEL_PRIVATE -#endif KERNEL -#endif _NETINET_IN_GIF_H_ +#endif /* KERNEL_PRIVATE */ +#endif /* KERNEL */ +#endif /* _NETINET_IN_GIF_H_ */ diff --git a/bsd/netinet/in_pcb.c b/bsd/netinet/in_pcb.c index af785060a..10efe37f0 100644 --- a/bsd/netinet/in_pcb.c +++ b/bsd/netinet/in_pcb.c @@ -104,13 +104,12 @@ #endif /* IPSEC */ #include +#include #if IPSEC extern int ipsec_bypass; #endif -extern u_long route_generation; - #define DBG_FNC_PCB_LOOKUP NETDBG_CODE(DBG_NETTCP, (6 << 8)) #define DBG_FNC_PCB_HLOOKUP NETDBG_CODE(DBG_NETTCP, ((6 << 8) | 1)) @@ -170,6 +169,9 @@ SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, hifirst, CTLTYPE_INT|CTLFLAG_RW, SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, hilast, CTLTYPE_INT|CTLFLAG_RW, &ipport_hilastauto, 0, &sysctl_net_ipport_check, "I", ""); +extern int udp_use_randomport; +extern int tcp_use_randomport; + /* * in_pcb.c: manage the Protocol Control Blocks. * @@ -230,9 +232,6 @@ in_pcballoc(struct socket *so, struct inpcbinfo *pcbinfo, __unused struct proc * return (mac_error); } mac_inpcb_label_associate(so, inp); -#endif -#if CONFIG_IP_EDGEHOLE - ip_edgehole_attach(inp); #endif so->so_pcb = (caddr_t)inp; @@ -301,6 +300,8 @@ in_pcblookup_local_and_cleanup( lck_mtx_lock(inp->inpcb_mtx); if (so->so_usecount == 0) { + if (inp->inp_state != INPCB_STATE_DEAD) + in_pcbdetach(inp); in_pcbdispose(inp); inp = NULL; } @@ -352,9 +353,9 @@ in_pcbbind(struct inpcb *inp, struct sockaddr *nam, struct proc *p) unsigned short *lastport; struct sockaddr_in *sin; struct inpcbinfo *pcbinfo = inp->inp_pcbinfo; - u_short lport = 0; + u_short lport = 0, rand_port = 0; int wild = 0, reuseport = (so->so_options & SO_REUSEPORT); - int error, conflict = 0; + int error, randomport, conflict = 0; if (TAILQ_EMPTY(&in_ifaddrhead)) /* XXX broken! */ return (EADDRNOTAVAIL); @@ -409,12 +410,13 @@ in_pcbbind(struct inpcb *inp, struct sockaddr *nam, struct proc *p) struct inpcb *t; /* GROSS */ - if (ntohs(lport) < IPPORT_RESERVED && p && - proc_suser(p)) { +#if !CONFIG_EMBEDDED + if (ntohs(lport) < IPPORT_RESERVED && proc_suser(p)) { lck_rw_done(pcbinfo->mtx); socket_lock(so, 0); return (EACCES); } +#endif if (so->so_uid && !IN_MULTICAST(ntohl(sin->sin_addr.s_addr))) { t = in_pcblookup_local_and_cleanup(inp->inp_pcbinfo, @@ -490,6 +492,9 @@ in_pcbbind(struct inpcb *inp, struct sockaddr *nam, struct proc *p) u_short first, last; int count; + randomport = (so->so_flags & SOF_BINDRANDOMPORT) || + (so->so_type == SOCK_STREAM ? tcp_use_randomport : udp_use_randomport); + inp->inp_flags |= INP_ANONPORT; if (inp->inp_flags & INP_HIGHPORT) { @@ -497,7 +502,7 @@ in_pcbbind(struct inpcb *inp, struct sockaddr *nam, struct proc *p) last = ipport_hilastauto; lastport = &pcbinfo->lasthi; } else if (inp->inp_flags & INP_LOWPORT) { - if (p && (error = proc_suser(p))) { + if ((error = proc_suser(p)) != 0) { lck_rw_done(pcbinfo->mtx); socket_lock(so, 0); return error; @@ -510,6 +515,10 @@ in_pcbbind(struct inpcb *inp, struct sockaddr *nam, struct proc *p) last = ipport_lastauto; lastport = &pcbinfo->lastport; } + /* No point in randomizing if only one port is available */ + + if (first == last) + randomport = 0; /* * Simple check to ensure all ports are not used up causing * a deadlock here. @@ -521,6 +530,10 @@ in_pcbbind(struct inpcb *inp, struct sockaddr *nam, struct proc *p) /* * counting down */ + if (randomport) { + read_random(&rand_port, sizeof(rand_port)); + *lastport = first - (rand_port % (first - last)); + } count = first - last; do { @@ -540,6 +553,10 @@ in_pcbbind(struct inpcb *inp, struct sockaddr *nam, struct proc *p) /* * counting up */ + if (randomport) { + read_random(&rand_port, sizeof(rand_port)); + *lastport = first + (rand_port % (first - last)); + } count = last - first; do { @@ -599,9 +616,8 @@ in_pcbladdr(struct inpcb *inp, struct sockaddr *nam, return (EAFNOSUPPORT); if (sin->sin_port == 0) return (EADDRNOTAVAIL); - - lck_mtx_lock(rt_mtx); - + + lck_rw_lock_shared(in_ifaddr_rwlock); if (!TAILQ_EMPTY(&in_ifaddrhead)) { /* * If the destination address is INADDR_ANY, @@ -615,10 +631,12 @@ in_pcbladdr(struct inpcb *inp, struct sockaddr *nam, #define ifatoia(ifa) ((struct in_ifaddr *)(ifa)) if (sin->sin_addr.s_addr == INADDR_ANY) sin->sin_addr = IA_SIN(TAILQ_FIRST(&in_ifaddrhead))->sin_addr; - else if (sin->sin_addr.s_addr == (u_long)INADDR_BROADCAST && + else if (sin->sin_addr.s_addr == (u_int32_t)INADDR_BROADCAST && (TAILQ_FIRST(&in_ifaddrhead)->ia_ifp->if_flags & IFF_BROADCAST)) sin->sin_addr = satosin(&TAILQ_FIRST(&in_ifaddrhead)->ia_broadaddr)->sin_addr; } + lck_rw_done(in_ifaddr_rwlock); + if (inp->inp_laddr.s_addr == INADDR_ANY) { struct route *ro; unsigned int ifscope; @@ -633,25 +651,30 @@ in_pcbladdr(struct inpcb *inp, struct sockaddr *nam, * destination, in case of sharing the cache with IPv6. */ ro = &inp->inp_route; - if (ro->ro_rt && - (ro->ro_dst.sa_family != AF_INET || - satosin(&ro->ro_dst)->sin_addr.s_addr != - sin->sin_addr.s_addr || - inp->inp_socket->so_options & SO_DONTROUTE || + if (ro->ro_rt != NULL) + RT_LOCK_SPIN(ro->ro_rt); + if (ro->ro_rt && (ro->ro_dst.sa_family != AF_INET || + satosin(&ro->ro_dst)->sin_addr.s_addr != + sin->sin_addr.s_addr || + inp->inp_socket->so_options & SO_DONTROUTE || ro->ro_rt->generation_id != route_generation)) { - rtfree_locked(ro->ro_rt); - ro->ro_rt = (struct rtentry *)0; + RT_UNLOCK(ro->ro_rt); + rtfree(ro->ro_rt); + ro->ro_rt = NULL; } if ((inp->inp_socket->so_options & SO_DONTROUTE) == 0 && /*XXX*/ - (ro->ro_rt == (struct rtentry *)0 || - ro->ro_rt->rt_ifp == 0)) { + (ro->ro_rt == NULL || ro->ro_rt->rt_ifp == NULL)) { + if (ro->ro_rt != NULL) + RT_UNLOCK(ro->ro_rt); /* No route yet, so try to acquire one */ bzero(&ro->ro_dst, sizeof(struct sockaddr_in)); ro->ro_dst.sa_family = AF_INET; ro->ro_dst.sa_len = sizeof(struct sockaddr_in); ((struct sockaddr_in *) &ro->ro_dst)->sin_addr = sin->sin_addr; - rtalloc_scoped_ign_locked(ro, 0UL, ifscope); + rtalloc_scoped_ign(ro, 0, ifscope); + if (ro->ro_rt != NULL) + RT_LOCK_SPIN(ro->ro_rt); } /* * If we found a route, use the address @@ -659,10 +682,14 @@ in_pcbladdr(struct inpcb *inp, struct sockaddr *nam, * unless it is the loopback (in case a route * to our address on another net goes to loopback). */ - if (ro->ro_rt && !(ro->ro_rt->rt_ifp->if_flags & IFF_LOOPBACK)) { - ia = ifatoia(ro->ro_rt->rt_ifa); - if (ia) - ifaref(&ia->ia_ifa); + if (ro->ro_rt != NULL) { + RT_LOCK_ASSERT_HELD(ro->ro_rt); + if (!(ro->ro_rt->rt_ifp->if_flags & IFF_LOOPBACK)) { + ia = ifatoia(ro->ro_rt->rt_ifa); + if (ia) + ifaref(&ia->ia_ifa); + } + RT_UNLOCK(ro->ro_rt); } if (ia == 0) { u_short fport = sin->sin_port; @@ -675,14 +702,14 @@ in_pcbladdr(struct inpcb *inp, struct sockaddr *nam, } sin->sin_port = fport; if (ia == 0) { + lck_rw_lock_shared(in_ifaddr_rwlock); ia = TAILQ_FIRST(&in_ifaddrhead); if (ia) ifaref(&ia->ia_ifa); + lck_rw_done(in_ifaddr_rwlock); } - if (ia == 0) { - lck_mtx_unlock(rt_mtx); + if (ia == 0) return (EADDRNOTAVAIL); - } } /* * If the destination address is multicast and an outgoing @@ -700,14 +727,16 @@ in_pcbladdr(struct inpcb *inp, struct sockaddr *nam, ifp = imo->imo_multicast_ifp; if (ia) ifafree(&ia->ia_ifa); - TAILQ_FOREACH(ia, &in_ifaddrhead, ia_link) + lck_rw_lock_shared(in_ifaddr_rwlock); + TAILQ_FOREACH(ia, &in_ifaddrhead, ia_link) { if (ia->ia_ifp == ifp) break; - if (ia == 0) { - lck_mtx_unlock(rt_mtx); - return (EADDRNOTAVAIL); } - ifaref(&ia->ia_ifa); + if (ia) + ifaref(&ia->ia_ifa); + lck_rw_done(in_ifaddr_rwlock); + if (ia == 0) + return (EADDRNOTAVAIL); } } /* @@ -717,7 +746,6 @@ in_pcbladdr(struct inpcb *inp, struct sockaddr *nam, *plocal_sin = &ia->ia_addr; ifafree(&ia->ia_ifa); } - lck_mtx_unlock(rt_mtx); return(0); } @@ -834,12 +862,10 @@ in_pcbdetach(struct inpcb *inp) inp->inp_vflag = 0; if (inp->inp_options) (void)m_free(inp->inp_options); - lck_mtx_lock(rt_mtx); if ((rt = inp->inp_route.ro_rt) != NULL) { inp->inp_route.ro_rt = NULL; - rtfree_locked(rt); + rtfree(rt); } - lck_mtx_unlock(rt_mtx); ip_freemoptions(inp->inp_moptions); inp->inp_moptions = NULL; sofreelastref(so, 0); @@ -893,6 +919,15 @@ in_pcbdispose(struct inpcb *inp) #if CONFIG_MACF_NET mac_inpcb_label_destroy(inp); #endif + /* + * In case there a route cached after a detach (possible + * in the tcp case), make sure that it is freed before + * we deallocate the structure. + */ + if (inp->inp_route.ro_rt != NULL) { + rtfree(inp->inp_route.ro_rt); + inp->inp_route.ro_rt = NULL; + } if (so->cached_in_sock_layer == 0) { zfree(ipi->ipi_zone, inp); } @@ -1000,44 +1035,6 @@ in_pcbnotifyall(struct inpcbinfo *pcbinfo, struct in_addr faddr, lck_rw_done(pcbinfo->mtx); } -void -in_pcbpurgeif0( - struct inpcb *head, - struct ifnet *ifp) -{ - struct inpcb *inp; - struct ip_moptions *imo; - int i, gap; - - for (inp = head; inp != NULL; inp = LIST_NEXT(inp, inp_list)) { - imo = inp->inp_moptions; - if ((inp->inp_vflag & INP_IPV4) && - imo != NULL) { - /* - * Unselect the outgoing interface if it is being - * detached. - */ - if (imo->imo_multicast_ifp == ifp) - imo->imo_multicast_ifp = NULL; - - /* - * Drop multicast group membership if we joined - * through the interface being detached. - */ - for (i = 0, gap = 0; i < imo->imo_num_memberships; - i++) { - if (imo->imo_membership[i]->inm_ifp == ifp) { - in_delmulti(&imo->imo_membership[i]); - gap++; - } else if (gap != 0) - imo->imo_membership[i - gap] = - imo->imo_membership[i]; - } - imo->imo_num_memberships -= gap; - } - } -} - /* * Check for alternatives when higher level complains * about service problems. For now, invalidate cached @@ -1050,29 +1047,36 @@ in_losing(struct inpcb *inp) struct rtentry *rt; struct rt_addrinfo info; - if ((rt = inp->inp_route.ro_rt)) { - lck_mtx_lock(rt_mtx); - /* Check again, this time while holding the lock */ - if ((rt = inp->inp_route.ro_rt) == NULL) { - lck_mtx_unlock(rt_mtx); - return; - } + if ((rt = inp->inp_route.ro_rt) != NULL) { + struct in_ifaddr *ia; + bzero((caddr_t)&info, sizeof(info)); + RT_LOCK(rt); info.rti_info[RTAX_DST] = (struct sockaddr *)&inp->inp_route.ro_dst; info.rti_info[RTAX_GATEWAY] = rt->rt_gateway; info.rti_info[RTAX_NETMASK] = rt_mask(rt); rt_missmsg(RTM_LOSING, &info, rt->rt_flags, 0); - if (rt->rt_flags & RTF_DYNAMIC) - (void) rtrequest_locked(RTM_DELETE, rt_key(rt), + if (rt->rt_flags & RTF_DYNAMIC) { + /* + * Prevent another thread from modifying rt_key, + * rt_gateway via rt_setgate() after rt_lock is + * dropped by marking the route as defunct. + */ + rt->rt_flags |= RTF_CONDEMNED; + RT_UNLOCK(rt); + (void) rtrequest(RTM_DELETE, rt_key(rt), rt->rt_gateway, rt_mask(rt), rt->rt_flags, (struct rtentry **)0); + } else { + RT_UNLOCK(rt); + } /* if the address is gone keep the old route in the pcb */ - if ((ifa_foraddr(inp->inp_laddr.s_addr)) != 0) { - inp->inp_route.ro_rt = 0; - rtfree_locked(rt); + if ((ia = ifa_foraddr(inp->inp_laddr.s_addr)) != NULL) { + inp->inp_route.ro_rt = NULL; + rtfree(rt); + ifafree(&ia->ia_ifa); } - lck_mtx_unlock(rt_mtx); /* * A new route can be allocated * the next time output is attempted. @@ -1090,17 +1094,14 @@ in_rtchange(struct inpcb *inp, __unused int errno) struct rtentry *rt; if ((rt = inp->inp_route.ro_rt) != NULL) { - if ((ifa_foraddr(inp->inp_laddr.s_addr)) == 0) + struct in_ifaddr *ia; + + if ((ia = ifa_foraddr(inp->inp_laddr.s_addr)) == NULL) { return; /* we can't remove the route now. not sure if still ok to use src */ - lck_mtx_lock(rt_mtx); - /* Check again, this time while holding the lock */ - if ((rt = inp->inp_route.ro_rt) == NULL) { - lck_mtx_unlock(rt_mtx); - return; } - rtfree_locked(rt); + ifafree(&ia->ia_ifa); + rtfree(rt); inp->inp_route.ro_rt = NULL; - lck_mtx_unlock(rt_mtx); /* * A new route can be allocated the next time * output is attempted. @@ -1333,6 +1334,15 @@ in_pcbinshash(struct inpcb *inp, int locked) struct inpcbport *phd; u_int32_t hashkey_faddr; + if (!locked) { + if (!lck_rw_try_lock_exclusive(pcbinfo->mtx)) { + /*lock inversion issue, mostly with udp multicast packets */ + socket_unlock(inp->inp_socket, 0); + lck_rw_lock_exclusive(pcbinfo->mtx); + socket_lock(inp->inp_socket, 0); + } + } + #if INET6 if (inp->inp_vflag & INP_IPV6) hashkey_faddr = inp->in6p_faddr.s6_addr32[3] /* XXX */; @@ -1342,15 +1352,6 @@ in_pcbinshash(struct inpcb *inp, int locked) inp->hash_element = INP_PCBHASH(hashkey_faddr, inp->inp_lport, inp->inp_fport, pcbinfo->hashmask); - if (!locked) { - if (!lck_rw_try_lock_exclusive(pcbinfo->mtx)) { - /*lock inversion issue, mostly with udp multicast packets */ - socket_unlock(inp->inp_socket, 0); - lck_rw_lock_exclusive(pcbinfo->mtx); - socket_lock(inp->inp_socket, 0); - } - } - pcbhash = &pcbinfo->hashbase[inp->hash_element]; pcbporthash = &pcbinfo->porthashbase[INP_PCBPORTHASH(inp->inp_lport, @@ -1434,372 +1435,6 @@ in_pcbremlists(struct inpcb *inp) inp->inp_pcbinfo->ipi_count--; } -static void in_pcb_detach_port( struct inpcb *inp); -int -in_pcb_grab_port (struct inpcbinfo *pcbinfo, - u_short options, - struct in_addr laddr, - u_short *lport, - struct in_addr faddr, - u_short fport, - u_int cookie, - u_char owner_id) -{ - struct inpcb *inp, *pcb; - struct sockaddr_in sin; - struct proc *p = current_proc(); - int stat; - - - pcbinfo->nat_dummy_socket.so_pcb = 0; - pcbinfo->nat_dummy_socket.so_options = 0; - if (*lport) { - /* The grabber wants a particular port */ - - if (faddr.s_addr || fport) { - /* - * This is either the second half of an active connect, or - * it's from the acceptance of an incoming connection. - */ - if (laddr.s_addr == 0) { - pcbinfo->nat_dummy_socket.so_pcb = (caddr_t)pcbinfo->nat_dummy_pcb; - return EINVAL; - } - - inp = in_pcblookup_hash(pcbinfo, faddr, fport, laddr, *lport, 0, NULL); - if (inp) { - /* pcb was found, its count was upped. need to decrease it here */ - in_pcb_checkstate(inp, WNT_RELEASE, 0); - if (!(IN_MULTICAST(ntohl(laddr.s_addr)))) { - pcbinfo->nat_dummy_socket.so_pcb = (caddr_t)pcbinfo->nat_dummy_pcb; - return (EADDRINUSE); - } - } - - stat = in_pcballoc(&pcbinfo->nat_dummy_socket, pcbinfo, p); - if (stat) { - pcbinfo->nat_dummy_socket.so_pcb = (caddr_t)pcbinfo->nat_dummy_pcb; - return stat; - } - pcb = sotoinpcb(&pcbinfo->nat_dummy_socket); - pcb->inp_vflag |= INP_IPV4; - - pcb->inp_lport = *lport; - pcb->inp_laddr.s_addr = laddr.s_addr; - - pcb->inp_faddr = faddr; - pcb->inp_fport = fport; - - lck_rw_lock_exclusive(pcbinfo->mtx); - in_pcbinshash(pcb, 1); - lck_rw_done(pcbinfo->mtx); - } - else { - /* - * This is either a bind for a passive socket, or it's the - * first part of bind-connect sequence (not likely since an - * ephemeral port is usually used in this case). Or, it's - * the result of a connection acceptance when the foreign - * address/port cannot be provided (which requires the SO_REUSEADDR - * flag if laddr is not multicast). - */ - - stat = in_pcballoc(&pcbinfo->nat_dummy_socket, pcbinfo, p); - if (stat) { - pcbinfo->nat_dummy_socket.so_pcb = (caddr_t)pcbinfo->nat_dummy_pcb; - return stat; - } - pcb = sotoinpcb(&pcbinfo->nat_dummy_socket); - pcb->inp_vflag |= INP_IPV4; - - pcbinfo->nat_dummy_socket.so_options = options; - bzero(&sin, sizeof(struct sockaddr_in)); - sin.sin_len = sizeof(struct sockaddr_in); - sin.sin_family = AF_INET; - sin.sin_addr.s_addr = laddr.s_addr; - sin.sin_port = *lport; - - socket_lock(&pcbinfo->nat_dummy_socket, 1); - stat = in_pcbbind((struct inpcb *) pcbinfo->nat_dummy_socket.so_pcb, - (struct sockaddr *) &sin, p); - if (stat) { - socket_unlock(&pcbinfo->nat_dummy_socket, 1); /*detach first */ - in_pcb_detach_port(pcb); /* will restore dummy pcb */ - return stat; - } - socket_unlock(&pcbinfo->nat_dummy_socket, 1); - } - } - else { - /* The grabber wants an ephemeral port */ - - stat = in_pcballoc(&pcbinfo->nat_dummy_socket, pcbinfo, p); - if (stat) { - pcbinfo->nat_dummy_socket.so_pcb = (caddr_t)pcbinfo->nat_dummy_pcb; - return stat; - } - pcb = sotoinpcb(&pcbinfo->nat_dummy_socket); - pcb->inp_vflag |= INP_IPV4; - - bzero(&sin, sizeof(struct sockaddr_in)); - sin.sin_len = sizeof(struct sockaddr_in); - sin.sin_family = AF_INET; - sin.sin_addr.s_addr = laddr.s_addr; - sin.sin_port = 0; - - if (faddr.s_addr || fport) { - /* - * Not sure if this case will be used - could occur when connect - * is called, skipping the bind. - */ - - if (laddr.s_addr == 0) { - in_pcb_detach_port(pcb); /* restores dummy pcb */ - return EINVAL; - } - - socket_lock(&pcbinfo->nat_dummy_socket, 1); - stat = in_pcbbind((struct inpcb *) pcbinfo->nat_dummy_socket.so_pcb, - (struct sockaddr *) &sin, p); - if (stat) { - socket_unlock(&pcbinfo->nat_dummy_socket, 1); - in_pcb_detach_port(pcb); /* restores dummy pcb */ - return stat; - } - - socket_unlock(&pcbinfo->nat_dummy_socket, 1); - inp = in_pcblookup_hash(pcbinfo, faddr, fport, - pcb->inp_laddr, pcb->inp_lport, 0, NULL); - if (inp) { - /* pcb was found, its count was upped. need to decrease it here */ - in_pcb_checkstate(inp, WNT_RELEASE, 0); - in_pcb_detach_port(pcb); - return (EADDRINUSE); - } - - lck_rw_lock_exclusive(pcbinfo->mtx); - pcb->inp_faddr = faddr; - pcb->inp_fport = fport; - in_pcbrehash(pcb); - lck_rw_done(pcbinfo->mtx); - } - else { - /* - * This is a simple bind of an ephemeral port. The local addr - * may or may not be defined. - */ - - socket_lock(&pcbinfo->nat_dummy_socket, 1); - stat = in_pcbbind((struct inpcb *) pcbinfo->nat_dummy_socket.so_pcb, - (struct sockaddr *) &sin, p); - if (stat) { - socket_unlock(&pcbinfo->nat_dummy_socket, 1); - in_pcb_detach_port(pcb); - return stat; - } - socket_unlock(&pcbinfo->nat_dummy_socket, 1); - } - *lport = pcb->inp_lport; - } - - - pcb->nat_owner = owner_id; - pcb->nat_cookie = cookie; - pcb->inp_ppcb = (caddr_t) pcbinfo->dummy_cb; - pcbinfo->nat_dummy_socket.so_pcb = (caddr_t)pcbinfo->nat_dummy_pcb; /* restores dummypcb */ - return 0; -} - -/* 3962035 - in_pcb_letgo_port needs a special case function for detaching */ -static void -in_pcb_detach_port( - struct inpcb *inp) -{ - struct socket *so = inp->inp_socket; - struct inpcbinfo *pcbinfo = inp->inp_pcbinfo; - - if (so != &pcbinfo->nat_dummy_socket) - panic("in_pcb_detach_port: not a dummy_sock: so=%p, inp=%p\n", so, inp); - inp->inp_gencnt = ++pcbinfo->ipi_gencnt; - /*### access ipi in in_pcbremlists */ - in_pcbremlists(inp); - - inp->inp_socket = 0; - zfree(pcbinfo->ipi_zone, inp); - pcbinfo->nat_dummy_socket.so_pcb = (caddr_t)pcbinfo->nat_dummy_pcb; /* restores dummypcb */ -} - -int -in_pcb_letgo_port(struct inpcbinfo *pcbinfo, struct in_addr laddr, u_short lport, - struct in_addr faddr, u_short fport, u_char owner_id) -{ - struct inpcbhead *head; - struct inpcb *inp; - - /* - * First look for an exact match. - */ - - lck_rw_lock_exclusive(pcbinfo->mtx); - head = &pcbinfo->hashbase[INP_PCBHASH(faddr.s_addr, lport, fport, pcbinfo->hashmask)]; - for (inp = head->lh_first; inp != NULL; inp = inp->inp_hash.le_next) { - if (inp->inp_faddr.s_addr == faddr.s_addr && - inp->inp_laddr.s_addr == laddr.s_addr && - inp->inp_fport == fport && - inp->inp_lport == lport && - inp->nat_owner == owner_id) { - /* - * Found. - */ - in_pcb_detach_port(inp); - lck_rw_done(pcbinfo->mtx); - return 0; - } - } - - lck_rw_done(pcbinfo->mtx); - return ENOENT; -} - -u_char -in_pcb_get_owner(struct inpcbinfo *pcbinfo, - struct in_addr laddr, u_short lport, - struct in_addr faddr, u_short fport, - u_int *cookie) - -{ - struct inpcb *inp; - u_char owner_id = INPCB_NO_OWNER; - struct inpcbport *phd; - struct inpcbporthead *porthash; - - - if (IN_MULTICAST(laddr.s_addr)) { - /* - * Walk through PCB's looking for registered - * owners. - */ - - lck_rw_lock_shared(pcbinfo->mtx); - porthash = &pcbinfo->porthashbase[INP_PCBPORTHASH(lport, - pcbinfo->porthashmask)]; - for (phd = porthash->lh_first; phd != NULL; phd = phd->phd_hash.le_next) { - if (phd->phd_port == lport) - break; - } - - if (phd == 0) { - lck_rw_done(pcbinfo->mtx); - return INPCB_NO_OWNER; - } - - owner_id = INPCB_NO_OWNER; - for (inp = phd->phd_pcblist.lh_first; inp != NULL; - inp = inp->inp_portlist.le_next) { - - if (inp->inp_laddr.s_addr == laddr.s_addr) { - if (inp->nat_owner == 0) - owner_id |= INPCB_OWNED_BY_X; - else - owner_id |= inp->nat_owner; - } - } - - lck_rw_done(pcbinfo->mtx); - return owner_id; - } - else { - inp = in_pcblookup_hash(pcbinfo, faddr, fport, - laddr, lport, 1, NULL); - if (inp) { - /* pcb was found, its count was upped. need to decrease it here */ - /* if we found it, that pcb is already locked by the caller */ - if (in_pcb_checkstate(inp, WNT_RELEASE, 1) == WNT_STOPUSING) - return(INPCB_NO_OWNER); - - if (inp->nat_owner) { - owner_id = inp->nat_owner; - *cookie = inp->nat_cookie; - } - else { - owner_id = INPCB_OWNED_BY_X; - } - } - else - owner_id = INPCB_NO_OWNER; - - return owner_id; - } -} - -int -in_pcb_new_share_client(struct inpcbinfo *pcbinfo, u_char *owner_id) -{ - - int i; - - - for (i=0; i < INPCB_MAX_IDS; i++) { - if ((pcbinfo->all_owners & (1 << i)) == 0) { - pcbinfo->all_owners |= (1 << i); - *owner_id = (1 << i); - return 0; - } - } - - return ENOSPC; -} - -int -in_pcb_rem_share_client(struct inpcbinfo *pcbinfo, u_char owner_id) -{ - struct inpcb *inp; - - - lck_rw_lock_exclusive(pcbinfo->mtx); - if (pcbinfo->all_owners & owner_id) { - pcbinfo->all_owners &= ~owner_id; - for (inp = pcbinfo->listhead->lh_first; inp != NULL; inp = inp->inp_list.le_next) { - if (inp->nat_owner & owner_id) { - if (inp->nat_owner == owner_id) - /* - * Deallocate the pcb - */ - in_pcb_detach_port(inp); - else - inp->nat_owner &= ~owner_id; - } - } - } - else { - lck_rw_done(pcbinfo->mtx); - return ENOENT; - } - - lck_rw_done(pcbinfo->mtx); - return 0; -} - - - -void in_pcb_nat_init(struct inpcbinfo *pcbinfo, int afamily, - int pfamily, int protocol) -{ - int stat; - struct proc *p = current_proc(); - - bzero(&pcbinfo->nat_dummy_socket, sizeof(struct socket)); -#if CONFIG_MACF_NET - mac_socket_label_init(&pcbinfo->nat_dummy_socket, M_WAITOK); -#endif - pcbinfo->nat_dummy_socket.so_proto = pffindproto_locked(afamily, pfamily, protocol); - pcbinfo->all_owners = 0; - stat = in_pcballoc(&pcbinfo->nat_dummy_socket, pcbinfo, p); - if (stat) - panic("in_pcb_nat_init: can't alloc fakepcb err=%d\n", stat); - pcbinfo->nat_dummy_pcb = (struct inpcb *)pcbinfo->nat_dummy_socket.so_pcb; -} - /* Mechanism used to defer the memory release of PCBs * The pcb list will contain the pcb until the ripper can clean it up if * the following conditions are met: 1) state "DEAD", 2) wantcnt is STOPUSING @@ -1900,8 +1535,7 @@ in_pcb_checkstate(struct inpcb *pcb, int mode, int locked) /* * inpcb_to_compat copies specific bits of an inpcb to a inpcb_compat. * The inpcb_compat data structure is passed to user space and must - * not change. We intentionally avoid copying pointers. The socket is - * the one exception, though we probably shouldn't copy that either. + * not change. We intentionally avoid copying pointers. */ void inpcb_to_compat( @@ -1911,7 +1545,6 @@ inpcb_to_compat( bzero(inp_compat, sizeof(*inp_compat)); inp_compat->inp_fport = inp->inp_fport; inp_compat->inp_lport = inp->inp_lport; - inp_compat->inp_socket = inp->inp_socket; inp_compat->nat_owner = inp->nat_owner; inp_compat->nat_cookie = inp->nat_cookie; inp_compat->inp_gencnt = inp->inp_gencnt; @@ -1925,7 +1558,113 @@ inpcb_to_compat( inp_compat->inp_depend4.inp4_ip_tos = inp->inp_depend4.inp4_ip_tos; inp_compat->inp_depend6.inp6_hlim = inp->inp_depend6.inp6_hlim; inp_compat->inp_depend6.inp6_cksum = inp->inp_depend6.inp6_cksum; - inp_compat->inp6_ifindex = inp->inp6_ifindex; + inp_compat->inp_depend6.inp6_ifindex = inp->inp_depend6.inp6_ifindex; inp_compat->inp_depend6.inp6_hops = inp->inp_depend6.inp6_hops; } +#if !CONFIG_EMBEDDED + +void +inpcb_to_xinpcb64( + struct inpcb *inp, + struct xinpcb64 *xinp) +{ + xinp->inp_fport = inp->inp_fport; + xinp->inp_lport = inp->inp_lport; + xinp->inp_gencnt = inp->inp_gencnt; + xinp->inp_flags = inp->inp_flags; + xinp->inp_flow = inp->inp_flow; + xinp->inp_vflag = inp->inp_vflag; + xinp->inp_ip_ttl = inp->inp_ip_ttl; + xinp->inp_ip_p = inp->inp_ip_p; + xinp->inp_dependfaddr.inp6_foreign = inp->inp_dependfaddr.inp6_foreign; + xinp->inp_dependladdr.inp6_local = inp->inp_dependladdr.inp6_local; + xinp->inp_depend4.inp4_ip_tos = inp->inp_depend4.inp4_ip_tos; + xinp->inp_depend6.inp6_hlim = inp->inp_depend6.inp6_hlim; + xinp->inp_depend6.inp6_cksum = inp->inp_depend6.inp6_cksum; + xinp->inp_depend6.inp6_ifindex = inp->inp_depend6.inp6_ifindex; + xinp->inp_depend6.inp6_hops = inp->inp_depend6.inp6_hops; +} + +#endif /* !CONFIG_EMBEDDED */ + +/* + * The following routines implement this scheme: + * + * Callers of ip_output() that intend to cache the route in the inpcb pass + * a local copy of the struct route to ip_output(). Using a local copy of + * the cached route significantly simplifies things as IP no longer has to + * worry about having exclusive access to the passed in struct route, since + * it's defined in the caller's stack; in essence, this allows for a lock- + * less operation when updating the struct route at the IP level and below, + * whenever necessary. The scheme works as follows: + * + * Prior to dropping the socket's lock and calling ip_output(), the caller + * copies the struct route from the inpcb into its stack, and adds a reference + * to the cached route entry, if there was any. The socket's lock is then + * dropped and ip_output() is called with a pointer to the copy of struct + * route defined on the stack (not to the one in the inpcb.) + * + * Upon returning from ip_output(), the caller then acquires the socket's + * lock and synchronizes the cache; if there is no route cached in the inpcb, + * it copies the local copy of struct route (which may or may not contain any + * route) back into the cache; otherwise, if the inpcb has a route cached in + * it, the one in the local copy will be freed, if there's any. Trashing the + * cached route in the inpcb can be avoided because ip_output() is single- + * threaded per-PCB (i.e. multiple transmits on a PCB are always serialized + * by the socket/transport layer.) + */ +void +inp_route_copyout(struct inpcb *inp, struct route *dst) +{ + struct route *src = &inp->inp_route; + + lck_mtx_assert(inp->inpcb_mtx, LCK_MTX_ASSERT_OWNED); + + /* Minor sanity check */ + if (src->ro_rt != NULL && rt_key(src->ro_rt)->sa_family != AF_INET) + panic("%s: wrong or corrupted route: %p", __func__, src); + + /* Copy everything (rt, dst, flags) from PCB */ + bcopy(src, dst, sizeof (*dst)); + + /* Hold one reference for the local copy of struct route */ + if (dst->ro_rt != NULL) + RT_ADDREF(dst->ro_rt); +} + +void +inp_route_copyin(struct inpcb *inp, struct route *src) +{ + struct route *dst = &inp->inp_route; + + lck_mtx_assert(inp->inpcb_mtx, LCK_MTX_ASSERT_OWNED); + + /* Minor sanity check */ + if (src->ro_rt != NULL && rt_key(src->ro_rt)->sa_family != AF_INET) + panic("%s: wrong or corrupted route: %p", __func__, src); + + /* No cached route in the PCB? */ + if (dst->ro_rt == NULL) { + /* + * Copy everything (rt, dst, flags) from ip_output(); + * the reference to the route was held at the time + * it was allocated and is kept intact. + */ + bcopy(src, dst, sizeof (*dst)); + } else if (src->ro_rt != NULL) { + /* + * If the same, update just the ro_flags and ditch the one + * in the local copy. Else ditch the one that is currently + * cached, and cache what we got back from ip_output(). + */ + if (dst->ro_rt == src->ro_rt) { + dst->ro_flags = src->ro_flags; + rtfree(src->ro_rt); + src->ro_rt = NULL; + } else { + rtfree(dst->ro_rt); + bcopy(src, dst, sizeof (*dst)); + } + } +} diff --git a/bsd/netinet/in_pcb.h b/bsd/netinet/in_pcb.h index f3dec2200..a793f3a12 100644 --- a/bsd/netinet/in_pcb.h +++ b/bsd/netinet/in_pcb.h @@ -197,10 +197,6 @@ struct inpcb { #if CONFIG_MACF_NET struct label *inp_label; /* MAC label */ #endif -#if CONFIG_IP_EDGEHOLE - u_int32_t inpcb_edgehole_flags; - u_int32_t inpcb_edgehole_mask; -#endif }; #endif /* KERNEL_PRIVATE */ @@ -334,6 +330,54 @@ struct xinpcb { u_quad_t xi_alignment_hack; }; +#if !CONFIG_EMBEDDED + +struct inpcb64_list_entry { + u_int64_t le_next; + u_int64_t le_prev; +}; + +struct xinpcb64 { + u_int64_t xi_len; /* length of this structure */ + u_int64_t xi_inpp; + u_short inp_fport; /* foreign port */ + u_short inp_lport; /* local port */ + struct inpcb64_list_entry + inp_list; /* list for all PCBs of this proto */ + u_int64_t inp_ppcb; /* pointer to per-protocol pcb */ + u_int64_t inp_pcbinfo; /* PCB list info */ + struct inpcb64_list_entry + inp_portlist; /* list for this PCB's local port */ + u_int64_t inp_phd; /* head of this list */ + inp_gen_t inp_gencnt; /* generation count of this instance */ + int inp_flags; /* generic IP/datagram flags */ + u_int32_t inp_flow; + u_char inp_vflag; + u_char inp_ip_ttl; /* time to live */ + u_char inp_ip_p; /* protocol */ + union { /* foreign host table entry */ + struct in_addr_4in6 inp46_foreign; + struct in6_addr inp6_foreign; + } inp_dependfaddr; + union { /* local host table entry */ + struct in_addr_4in6 inp46_local; + struct in6_addr inp6_local; + } inp_dependladdr; + struct { + u_char inp4_ip_tos; /* type of service */ + } inp_depend4; + struct { + u_int8_t inp6_hlim; + int inp6_cksum; + u_short inp6_ifindex; + short inp6_hops; + } inp_depend6; + struct xsocket64 xi_socket; + u_quad_t xi_alignment_hack; +}; + +#endif /* !CONFIG_EMBEDDED */ + struct xinpgen { u_int32_t xig_len; /* length of this structure */ u_int xig_count; /* number of PCBs at this time */ @@ -367,7 +411,7 @@ struct xinpgen { #define in6p_moptions inp_depend6.inp6_moptions #define in6p_icmp6filt inp_depend6.inp6_icmp6filt #define in6p_cksum inp_depend6.inp6_cksum -#define inp6_ifindex inp_depend6.inp6_ifindex +#define in6p_ifindex inp_depend6.inp6_ifindex #define in6p_flags inp_flags /* for KAME src sync over BSD*'s */ #define in6p_socket inp_socket /* for KAME src sync over BSD*'s */ #define in6p_lport inp_lport /* for KAME src sync over BSD*'s */ @@ -386,11 +430,11 @@ struct inpcbport { struct inpcbinfo { /* XXX documentation, prefixes */ struct inpcbhead *hashbase; #ifdef __APPLE__ - u_long hashsize; /* in elements */ + u_int32_t hashsize; /* in elements */ #endif - u_long hashmask; + u_long hashmask; /* needs to be u_long as expected by hash functions */ struct inpcbporthead *porthashbase; - u_long porthashmask; + u_long porthashmask; /* needs to be u_long as expected by hash functions */ struct inpcbhead *listhead; u_short lastport; u_short lastlow; @@ -399,15 +443,11 @@ struct inpcbinfo { /* XXX documentation, prefixes */ u_int ipi_count; /* number of pcbs in this list */ u_quad_t ipi_gencnt; /* current generation count */ #ifdef __APPLE__ - u_char all_owners; - struct socket nat_dummy_socket; /* fake socket for NAT pcb backpointer */ - struct inpcb *nat_dummy_pcb; /* fake pcb for finding NAT mutex */ - caddr_t dummy_cb; #ifdef _KERN_LOCKS_H_ - lck_attr_t *mtx_attr; /* mutex attributes */ - lck_grp_t *mtx_grp; /* mutex group definition */ + lck_attr_t *mtx_attr; /* mutex attributes */ + lck_grp_t *mtx_grp; /* mutex group definition */ lck_grp_attr_t *mtx_grp_attr; /* mutex group attributes */ - lck_rw_t *mtx; /* global mutex for the pcblist*/ + lck_rw_t *mtx; /* global mutex for the pcblist*/ #else void *mtx_attr; /* mutex attributes */ void *mtx_grp; /* mutex group definition */ @@ -450,19 +490,20 @@ struct inpcbinfo { /* XXX documentation, prefixes */ #define IN6P_HOPLIMIT 0x020000 /* receive hoplimit */ #define IN6P_HOPOPTS 0x040000 /* receive hop-by-hop options */ #define IN6P_DSTOPTS 0x080000 /* receive dst options after rthdr */ -#define IN6P_RTHDR 0x100000 /* receive routing header */ +#define IN6P_RTHDR 0x100000 /* receive routing header */ #define IN6P_RTHDRDSTOPTS 0x200000 /* receive dstoptions before rthdr */ -#define IN6P_AUTOFLOWLABEL 0x800000 /* attach flowlabel automatically */ +#define IN6P_TCLASS 0x400000 /* receive traffic class value */ +#define IN6P_AUTOFLOWLABEL 0x800000 /* attach flowlabel automatically */ #define IN6P_BINDV6ONLY 0x10000000 /* do not grab IPv4 traffic */ #ifdef KERNEL_PRIVATE #define INP_CONTROLOPTS (INP_RECVOPTS|INP_RECVRETOPTS|INP_RECVDSTADDR|\ - INP_RECVIF|\ + INP_RECVIF|INP_RECVTTL|\ IN6P_PKTINFO|IN6P_HOPLIMIT|IN6P_HOPOPTS|\ IN6P_DSTOPTS|IN6P_RTHDR|IN6P_RTHDRDSTOPTS|\ - IN6P_AUTOFLOWLABEL|INP_RECVTTL) + IN6P_TCLASS|IN6P_AUTOFLOWLABEL) #define INP_UNMAPPABLEOPTS (IN6P_HOPOPTS|IN6P_DSTOPTS|IN6P_RTHDR|\ - IN6P_AUTOFLOWLABEL) + IN6P_TCLASS|IN6P_AUTOFLOWLABEL) /* for KAME src sync over BSD*'s */ #define IN6P_HIGHPORT INP_HIGHPORT @@ -508,70 +549,39 @@ extern int ipport_hilastauto; #define WNT_ACQUIRE 0x1 /* that pcb is being acquired, do not recycle this time */ #define WNT_RELEASE 0x2 /* release acquired mode, can be garbage collected when wantcnt is null */ - -void in_pcbpurgeif0(struct inpcb *, struct ifnet *); -void in_losing(struct inpcb *); -void in_rtchange(struct inpcb *, int); -int in_pcballoc(struct socket *, struct inpcbinfo *, struct proc *); -int in_pcbbind(struct inpcb *, struct sockaddr *, struct proc *); -int in_pcbconnect(struct inpcb *, struct sockaddr *, struct proc *); -void in_pcbdetach(struct inpcb *); -void in_pcbdispose (struct inpcb *); -void in_pcbdisconnect(struct inpcb *); -int in_pcbinshash(struct inpcb *, int); -int in_pcbladdr(struct inpcb *, struct sockaddr *, struct sockaddr_in **); -struct inpcb * - in_pcblookup_local(struct inpcbinfo *, struct in_addr, u_int, int); -struct inpcb * - in_pcblookup_local_and_cleanup(struct inpcbinfo *, struct in_addr, u_int, int); -struct inpcb * - in_pcblookup_hash(struct inpcbinfo *, - struct in_addr, u_int, struct in_addr, u_int, - int, struct ifnet *); -void in_pcbnotifyall(struct inpcbinfo *, struct in_addr, - int, void (*)(struct inpcb *, int)); -void in_pcbrehash(struct inpcb *); -int in_setpeeraddr(struct socket *so, struct sockaddr **nam); -int in_setsockaddr(struct socket *so, struct sockaddr **nam); -int in_pcb_checkstate(struct inpcb *pcb, int mode, int locked); - -int -in_pcb_grab_port (struct inpcbinfo *pcbinfo, - u_short options, - struct in_addr laddr, - u_short *lport, - struct in_addr faddr, - u_short fport, - u_int cookie, - u_char owner_id); - -int -in_pcb_letgo_port(struct inpcbinfo *pcbinfo, - struct in_addr laddr, - u_short lport, - struct in_addr faddr, - u_short fport, u_char owner_id); - -u_char -in_pcb_get_owner(struct inpcbinfo *pcbinfo, - struct in_addr laddr, - u_short lport, - struct in_addr faddr, - u_short fport, - u_int *cookie); - -void in_pcb_nat_init(struct inpcbinfo *pcbinfo, int afamily, int pfamily, - int protocol); - -int -in_pcb_new_share_client(struct inpcbinfo *pcbinfo, u_char *owner_id); - -int -in_pcb_rem_share_client(struct inpcbinfo *pcbinfo, u_char owner_id); - -void in_pcbremlists(struct inpcb *inp); -int in_pcb_ckeckstate(struct inpcb *, int, int); -void inpcb_to_compat(struct inpcb *inp, struct inpcb_compat *inp_compat); +extern void in_losing(struct inpcb *); +extern void in_rtchange(struct inpcb *, int); +extern int in_pcballoc(struct socket *, struct inpcbinfo *, struct proc *); +extern int in_pcbbind(struct inpcb *, struct sockaddr *, struct proc *); +extern int in_pcbconnect(struct inpcb *, struct sockaddr *, struct proc *); +extern void in_pcbdetach(struct inpcb *); +extern void in_pcbdispose (struct inpcb *); +extern void in_pcbdisconnect(struct inpcb *); +extern int in_pcbinshash(struct inpcb *, int); +extern int in_pcbladdr(struct inpcb *, struct sockaddr *, + struct sockaddr_in **); +extern struct inpcb *in_pcblookup_local(struct inpcbinfo *, struct in_addr, + u_int, int); +extern struct inpcb *in_pcblookup_local_and_cleanup(struct inpcbinfo *, + struct in_addr, u_int, int); +extern struct inpcb *in_pcblookup_hash(struct inpcbinfo *, struct in_addr, + u_int, struct in_addr, u_int, int, struct ifnet *); +extern void in_pcbnotifyall(struct inpcbinfo *, struct in_addr, int, + void (*)(struct inpcb *, int)); +extern void in_pcbrehash(struct inpcb *); +extern int in_setpeeraddr(struct socket *so, struct sockaddr **nam); +extern int in_setsockaddr(struct socket *so, struct sockaddr **nam); +extern int in_pcb_checkstate(struct inpcb *pcb, int mode, int locked); + +extern void in_pcbremlists(struct inpcb *inp); +extern void inpcb_to_compat(struct inpcb *inp, + struct inpcb_compat *inp_compat); +#if !CONFIG_EMBEDDED +extern void inpcb_to_xinpcb64(struct inpcb *inp, + struct xinpcb64 *xinp); +#endif +extern void inp_route_copyout(struct inpcb *, struct route *); +extern void inp_route_copyin(struct inpcb *, struct route *); #endif /* KERNEL */ #endif /* KERNEL_PRIVATE */ diff --git a/bsd/netinet/in_rmx.c b/bsd/netinet/in_rmx.c index 419befad8..c7410a0a3 100644 --- a/bsd/netinet/in_rmx.c +++ b/bsd/netinet/in_rmx.c @@ -85,7 +85,6 @@ extern int tvtohz(struct timeval *); extern int in_inithead(void **head, int off); -extern u_long route_generation; #ifdef __APPLE__ static void in_rtqtimo(void *rock); @@ -107,13 +106,16 @@ in_addroute(void *v_arg, void *n_arg, struct radix_node_head *head, struct sockaddr_in *sin = (struct sockaddr_in *)rt_key(rt); struct radix_node *ret; + lck_mtx_assert(rnh_lock, LCK_MTX_ASSERT_OWNED); + RT_LOCK_ASSERT_HELD(rt); + /* * For IP, all unicast non-host routes are automatically cloning. */ - if(IN_MULTICAST(ntohl(sin->sin_addr.s_addr))) + if (IN_MULTICAST(ntohl(sin->sin_addr.s_addr))) rt->rt_flags |= RTF_MULTICAST; - if(!(rt->rt_flags & (RTF_HOST | RTF_CLONING | RTF_MULTICAST))) { + if (!(rt->rt_flags & (RTF_HOST | RTF_CLONING | RTF_MULTICAST))) { rt->rt_flags |= RTF_PRCLONING; } @@ -160,16 +162,25 @@ in_addroute(void *v_arg, void *n_arg, struct radix_node_head *head, rt2 = rtalloc1_scoped_locked(rt_key(rt), 0, RTF_CLONING | RTF_PRCLONING, sa_get_ifscope(rt_key(rt))); if (rt2) { - if (rt2->rt_flags & RTF_LLINFO && - rt2->rt_flags & RTF_HOST && - rt2->rt_gateway && - rt2->rt_gateway->sa_family == AF_LINK) { - rtrequest_locked(RTM_DELETE, - (struct sockaddr *)rt_key(rt2), - rt2->rt_gateway, - rt_mask(rt2), rt2->rt_flags, 0); + RT_LOCK(rt2); + if ((rt2->rt_flags & RTF_LLINFO) && + (rt2->rt_flags & RTF_HOST) && + rt2->rt_gateway != NULL && + rt2->rt_gateway->sa_family == AF_LINK) { + /* + * Safe to drop rt_lock and use rt_key, + * rt_gateway, since holding rnh_lock here + * prevents another thread from calling + * rt_setgate() on this route. + */ + RT_UNLOCK(rt2); + rtrequest_locked(RTM_DELETE, rt_key(rt2), + rt2->rt_gateway, rt_mask(rt2), + rt2->rt_flags, 0); ret = rn_addroute(v_arg, n_arg, head, treenodes); + } else { + RT_UNLOCK(rt2); } rtfree_locked(rt2); } @@ -185,8 +196,10 @@ in_validate(struct radix_node *rn) { struct rtentry *rt = (struct rtentry *)rn; + RT_LOCK_ASSERT_HELD(rt); + /* This is first reference? */ - if (rt != NULL && rt->rt_refcnt == 0 && (rt->rt_flags & RTPRF_OURS)) { + if (rt->rt_refcnt == 0 && (rt->rt_flags & RTPRF_OURS)) { rt->rt_flags &= ~RTPRF_OURS; rt->rt_rmx.rmx_expire = 0; } @@ -213,7 +226,12 @@ in_matroute_args(void *v_arg, struct radix_node_head *head, { struct radix_node *rn = rn_match_args(v_arg, head, f, w); - return (in_validate(rn)); + if (rn != NULL) { + RT_LOCK_SPIN((struct rtentry *)rn); + in_validate(rn); + RT_UNLOCK((struct rtentry *)rn); + } + return (rn); } static int rtq_reallyold = 60*60; @@ -251,7 +269,7 @@ SYSCTL_INT(_net_inet_ip, OID_AUTO, check_route_selfref, CTLFLAG_RW, &check_routeselfref , 0, ""); #endif -__private_extern__ int use_routegenid = 1; +int use_routegenid = 1; SYSCTL_INT(_net_inet_ip, OID_AUTO, use_route_genid, CTLFLAG_RW, &use_routegenid , 0, ""); @@ -264,6 +282,9 @@ in_clsroute(struct radix_node *rn, __unused struct radix_node_head *head) { struct rtentry *rt = (struct rtentry *)rn; + lck_mtx_assert(rnh_lock, LCK_MTX_ASSERT_OWNED); + RT_LOCK_ASSERT_HELD(rt); + if (!(rt->rt_flags & RTF_UP)) return; /* prophylactic measures */ @@ -284,11 +305,18 @@ in_clsroute(struct radix_node *rn, __unused struct radix_node_head *head) * called when the route's reference count is 0, don't * deallocate it until we return from this routine by * telling rtrequest that we're interested in it. + * Safe to drop rt_lock and use rt_key, rt_gateway since + * holding rnh_lock here prevents another thread from + * calling rt_setgate() on this route. */ + RT_UNLOCK(rt); if (rtrequest_locked(RTM_DELETE, (struct sockaddr *)rt_key(rt), rt->rt_gateway, rt_mask(rt), rt->rt_flags, &rt) == 0) { /* Now let the caller free it */ - rtunref(rt); + RT_LOCK(rt); + RT_REMREF_LOCKED(rt); + } else { + RT_LOCK(rt); } } else { struct timeval timenow; @@ -322,8 +350,9 @@ in_rtqkill(struct radix_node *rn, void *rock) struct timeval timenow; getmicrotime(&timenow); - lck_mtx_assert(rt_mtx, LCK_MTX_ASSERT_OWNED); + lck_mtx_assert(rnh_lock, LCK_MTX_ASSERT_OWNED); + RT_LOCK(rt); if (rt->rt_flags & RTPRF_OURS) { ap->found++; @@ -331,10 +360,18 @@ in_rtqkill(struct radix_node *rn, void *rock) if (rt->rt_refcnt > 0) panic("rtqkill route really not free"); - err = rtrequest_locked(RTM_DELETE, - (struct sockaddr *)rt_key(rt), - rt->rt_gateway, rt_mask(rt), - rt->rt_flags, 0); + /* + * Delete this route since we're done with it; + * the route may be freed afterwards, so we + * can no longer refer to 'rt' upon returning + * from rtrequest(). Safe to drop rt_lock and + * use rt_key, rt_gateway since holding rnh_lock + * here prevents another thread from calling + * rt_setgate() on this route. + */ + RT_UNLOCK(rt); + err = rtrequest_locked(RTM_DELETE, rt_key(rt), + rt->rt_gateway, rt_mask(rt), rt->rt_flags, 0); if (err) { log(LOG_WARNING, "in_rtqkill: error %d\n", err); } else { @@ -349,7 +386,10 @@ in_rtqkill(struct radix_node *rn, void *rock) } ap->nextstop = lmin(ap->nextstop, rt->rt_rmx.rmx_expire); + RT_UNLOCK(rt); } + } else { + RT_UNLOCK(rt); } return 0; @@ -373,7 +413,7 @@ in_rtqtimo(void *rock) static time_t last_adjusted_timeout = 0; struct timeval timenow; - lck_mtx_lock(rt_mtx); + lck_mtx_lock(rnh_lock); /* Get the timestamp after we acquire the lock for better accuracy */ getmicrotime(&timenow); @@ -411,7 +451,7 @@ in_rtqtimo(void *rock) atv.tv_usec = 0; atv.tv_sec = arg.nextstop - timenow.tv_sec; - lck_mtx_unlock(rt_mtx); + lck_mtx_unlock(rnh_lock); timeout(in_rtqtimo_funnel, rock, tvtohz(&atv)); } @@ -425,9 +465,9 @@ in_rtqdrain(void) arg.nextstop = 0; arg.draining = 1; arg.updating = 0; - lck_mtx_lock(rt_mtx); + lck_mtx_lock(rnh_lock); rnh->rnh_walktree(rnh, in_rtqkill, &arg); - lck_mtx_unlock(rt_mtx); + lck_mtx_unlock(rnh_lock); } /* @@ -481,6 +521,7 @@ in_ifadownkill(struct radix_node *rn, void *xap) struct rtentry *rt = (struct rtentry *)rn; int err; + RT_LOCK(rt); if (rt->rt_ifa == ap->ifa && (ap->del || !(rt->rt_flags & RTF_STATIC))) { /* @@ -489,14 +530,20 @@ in_ifadownkill(struct radix_node *rn, void *xap) * away the pointers that rn_walktree() needs in order * continue our descent. We will end up deleting all * the routes that rtrequest() would have in any case, - * so that behavior is not needed there. + * so that behavior is not needed there. Safe to drop + * rt_lock and use rt_key, rt_gateway, since holding + * rnh_lock here prevents another thread from calling + * rt_setgate() on this route. */ rt->rt_flags &= ~(RTF_CLONING | RTF_PRCLONING); - err = rtrequest_locked(RTM_DELETE, (struct sockaddr *)rt_key(rt), - rt->rt_gateway, rt_mask(rt), rt->rt_flags, 0); + RT_UNLOCK(rt); + err = rtrequest_locked(RTM_DELETE, rt_key(rt), + rt->rt_gateway, rt_mask(rt), rt->rt_flags, 0); if (err) { log(LOG_WARNING, "in_ifadownkill: error %d\n", err); } + } else { + RT_UNLOCK(rt); } return 0; } @@ -507,14 +554,14 @@ in_ifadown(struct ifaddr *ifa, int delete) struct in_ifadown_arg arg; struct radix_node_head *rnh; - lck_mtx_assert(rt_mtx, LCK_MTX_ASSERT_OWNED); + lck_mtx_assert(rnh_lock, LCK_MTX_ASSERT_OWNED); if (ifa->ifa_addr->sa_family != AF_INET) return 1; /* trigger route cache reevaluation */ - if (use_routegenid) - route_generation++; + if (use_routegenid) + routegenid_update(); arg.rnh = rnh = rt_tables[AF_INET]; arg.ifa = ifa; diff --git a/bsd/netinet/in_systm.h b/bsd/netinet/in_systm.h index bbed62112..8a617b0a7 100644 --- a/bsd/netinet/in_systm.h +++ b/bsd/netinet/in_systm.h @@ -86,6 +86,6 @@ typedef __uint32_t n_time; /* ms since 00:00 GMT, byte rev */ #ifdef KERNEL_PRIVATE n_time iptime(void); -#endif KERNEL_PRIVATE +#endif /* KERNEL_PRIVATE */ #endif diff --git a/bsd/netinet/in_var.h b/bsd/netinet/in_var.h index fe1bc4899..df7a968af 100644 --- a/bsd/netinet/in_var.h +++ b/bsd/netinet/in_var.h @@ -70,7 +70,7 @@ #include #endif -#ifdef PRIVATE +#ifdef KERNEL_PRIVATE #include /* @@ -84,18 +84,19 @@ struct in_ifaddr { #define ia_ifp ia_ifa.ifa_ifp #define ia_flags ia_ifa.ifa_flags /* ia_{,sub}net{,mask} in host order */ - u_long ia_net; /* network number of interface */ - u_long ia_netmask; /* mask of net part */ - u_long ia_subnet; /* subnet number, including net */ - u_long ia_subnetmask; /* mask of subnet part */ + u_int32_t ia_net; /* network number of interface */ + u_int32_t ia_netmask; /* mask of net part */ + u_int32_t ia_subnet; /* subnet number, including net */ + u_int32_t ia_subnetmask; /* mask of subnet part */ struct in_addr ia_netbroadcast; /* to recognize net broadcasts */ TAILQ_ENTRY(in_ifaddr) ia_link; /* tailq macro glue */ struct sockaddr_in ia_addr; /* reserve space for interface name */ struct sockaddr_in ia_dstaddr; /* reserve space for broadcast addr */ #define ia_broadaddr ia_dstaddr struct sockaddr_in ia_sockmask; /* reserve space for general netmask */ + TAILQ_ENTRY(in_ifaddr) ia_hash; /* hash bucket entry */ }; -#endif /* PRIVATE */ +#endif /* KERNEL_PRIVATE */ struct in_aliasreq { char ifra_name[IFNAMSIZ]; /* if name, e.g. "en0" */ @@ -155,6 +156,8 @@ struct kev_in_portinuse { #endif #ifdef KERNEL_PRIVATE +#include +#include /* * Given a pointer to an in_ifaddr (ifaddr), * return a pointer to the addr as a sockaddr_in. @@ -165,11 +168,18 @@ struct kev_in_portinuse { #define IN_LNAOF(in, ifa) \ ((ntohl((in).s_addr) & ~((struct in_ifaddr *)(ifa)->ia_subnetmask)) -extern TAILQ_HEAD(in_ifaddrhead, in_ifaddr) in_ifaddrhead; +/* + * Hash table for IPv4 addresses. + */ +__private_extern__ TAILQ_HEAD(in_ifaddrhead, in_ifaddr) in_ifaddrhead; +__private_extern__ TAILQ_HEAD(in_ifaddrhashhead, in_ifaddr) *in_ifaddrhashtbl; +__private_extern__ lck_rw_t *in_ifaddr_rwlock; + +#define INADDR_HASH(x) (&in_ifaddrhashtbl[inaddr_hashval(x)]) + extern struct ifqueue ipintrq; /* ip packet input queue */ extern struct in_addr zeroin_addr; extern u_char inetctlerrmap[]; -extern lck_mtx_t *rt_mtx; extern int apple_hwcksum_tx; extern int apple_hwcksum_rx; @@ -178,36 +188,37 @@ extern int apple_hwcksum_rx; * Macro for finding the interface (ifnet structure) corresponding to one * of our IP addresses. */ -#define INADDR_TO_IFP(addr, ifp) \ - /* struct in_addr addr; */ \ - /* struct ifnet *ifp; */ \ -{ \ - struct in_ifaddr *ia; \ -\ - lck_mtx_assert(rt_mtx, LCK_MTX_ASSERT_NOTOWNED); \ - lck_mtx_lock(rt_mtx); \ - TAILQ_FOREACH(ia, &in_ifaddrhead, ia_link) \ - if (IA_SIN(ia)->sin_addr.s_addr == (addr).s_addr) \ - break; \ - (ifp) = (ia == NULL) ? NULL : ia->ia_ifp; \ - lck_mtx_unlock(rt_mtx); \ +#define INADDR_TO_IFP(addr, ifp) \ + /* struct in_addr addr; */ \ + /* struct ifnet *ifp; */ \ +{ \ + struct in_ifaddr *ia; \ + \ + lck_rw_lock_shared(in_ifaddr_rwlock); \ + TAILQ_FOREACH(ia, INADDR_HASH((addr).s_addr), ia_hash) \ + if (IA_SIN(ia)->sin_addr.s_addr == (addr).s_addr) \ + break; \ + (ifp) = (ia == NULL) ? NULL : ia->ia_ifp; \ + lck_rw_done(in_ifaddr_rwlock); \ } /* * Macro for finding the internet address structure (in_ifaddr) corresponding - * to a given interface (ifnet structure). + * to a given interface (ifnet structure). Caller is responsible for freeing + * the reference. */ -#define IFP_TO_IA(ifp, ia) \ - /* struct ifnet *ifp; */ \ - /* struct in_ifaddr *ia; */ \ -{ \ - lck_mtx_assert(rt_mtx, LCK_MTX_ASSERT_NOTOWNED); \ - lck_mtx_lock(rt_mtx); \ - for ((ia) = TAILQ_FIRST(&in_ifaddrhead); \ - (ia) != NULL && (ia)->ia_ifp != (ifp); \ - (ia) = TAILQ_NEXT((ia), ia_link)) \ - continue; \ - lck_mtx_unlock(rt_mtx); \ +#define IFP_TO_IA(ifp, ia) \ + /* struct ifnet *ifp; */ \ + /* struct in_ifaddr *ia; */ \ +{ \ + lck_rw_lock_shared(in_ifaddr_rwlock); \ + for ((ia) = TAILQ_FIRST(&in_ifaddrhead); \ + (ia) != NULL && (ia)->ia_ifp != (ifp); \ + (ia) = TAILQ_NEXT((ia), ia_link)) \ + continue; \ + if ((ia) != NULL) \ + ifaref(&(ia)->ia_ifa); \ + lck_rw_done(in_ifaddr_rwlock); \ } /* @@ -299,18 +310,23 @@ do { \ } while(0) struct route; -struct in_multi *in_addmulti(struct in_addr *, struct ifnet *); -void in_delmulti(struct in_multi **); -int in_control(struct socket *, u_long, caddr_t, struct ifnet *, - struct proc *); -void in_rtqdrain(void); + +extern void in_ifaddr_init(void); +extern struct in_multi *in_addmulti(struct in_addr *, struct ifnet *); +extern void in_delmulti(struct in_multi **); +extern int in_control(struct socket *, u_long, caddr_t, struct ifnet *, + struct proc *); +extern void in_rtqdrain(void); extern struct radix_node *in_validate(struct radix_node *); -void ip_input(struct mbuf *); -int in_ifadown(struct ifaddr *ifa, int); -void in_ifscrub(struct ifnet *, struct in_ifaddr *, int); -int ipflow_fastforward(struct mbuf *); -void ipflow_create(const struct route *, struct mbuf *); -void ipflow_slowtimo(void); +extern void ip_input(struct mbuf *); +extern int in_ifadown(struct ifaddr *ifa, int); +extern void in_ifscrub(struct ifnet *, struct in_ifaddr *, int); +extern int ipflow_fastforward(struct mbuf *); +#if IPFLOW +extern void ipflow_create(const struct route *, struct mbuf *); +extern void ipflow_slowtimo(void); +#endif /* IPFLOW */ +extern u_int32_t inaddr_hashval(u_int32_t); #endif /* KERNEL_PRIVATE */ diff --git a/bsd/netinet/ip.h b/bsd/netinet/ip.h index c21698f61..eea0a00f9 100644 --- a/bsd/netinet/ip.h +++ b/bsd/netinet/ip.h @@ -149,6 +149,17 @@ struct ip { #define IPTOS_PREC_PRIORITY 0x20 #define IPTOS_PREC_ROUTINE 0x00 +#ifdef PRIVATE +/* + * Definitions of traffic class for use within wireless LAN. + * Mainly used by AFP for backup. Not recommended for general use. + */ +#define IP_TCLASS_BE 0x00 /* standard, best effort */ +#define IP_TCLASS_BK 0x20 /* Background, low priority */ +#define IP_TCLASS_VI 0x80 /* Interactive */ +#define IP_TCLASS_VO 0xc0 /* Signalling */ + +#endif /* * Definitions for options. */ diff --git a/bsd/netinet/ip6.h b/bsd/netinet/ip6.h index fcdbd2d08..203e86a64 100644 --- a/bsd/netinet/ip6.h +++ b/bsd/netinet/ip6.h @@ -331,5 +331,5 @@ do { \ } \ } while (0) -#endif KERNEL_PRIVATE -#endif !_NETINET_IP6_H_ +#endif /* KERNEL_PRIVATE */ +#endif /* !_NETINET_IP6_H_ */ diff --git a/bsd/netinet/ip_compat.h b/bsd/netinet/ip_compat.h index a04817ebe..70d0fadf4 100644 --- a/bsd/netinet/ip_compat.h +++ b/bsd/netinet/ip_compat.h @@ -112,7 +112,7 @@ struct ether_addr { # define U_QUAD_T u_quad_t # define QUAD_T quad_t #else /* BSD > 199306 */ -# define U_QUAD_T u_long +# define U_QUAD_T u_int32_t # define QUAD_T long #endif /* BSD > 199306 */ @@ -129,7 +129,7 @@ typedef u_int32_t u_32_t; # if defined(__alpha__) || defined(__alpha) typedef unsigned int u_32_t; # else -typedef unsigned long u_32_t; +typedef u_int32_t u_32_t; # endif #endif /* __NetBSD__ || __OpenBSD__ || __FreeBSD__ || __sgi */ @@ -606,7 +606,7 @@ typedef struct icmp { ip_t idi_ip; /* options and then 64 bits of data */ } id_ip; - u_long id_mask; + u_int32_t id_mask; char id_data[1]; } icmp_dun; # define icmp_otime icmp_dun.id_ts.its_otime diff --git a/bsd/netinet/ip_divert.c b/bsd/netinet/ip_divert.c index f0708780d..815e39266 100644 --- a/bsd/netinet/ip_divert.c +++ b/bsd/netinet/ip_divert.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2007 Apple Inc. All rights reserved. + * Copyright (c) 2000-2008 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -76,6 +76,7 @@ #include #include +#include #include #include @@ -131,8 +132,8 @@ static struct inpcbhead divcb; static struct inpcbinfo divcbinfo; -static u_long div_sendspace = DIVSNDQ; /* XXX sysctl ? */ -static u_long div_recvspace = DIVRCVQ; /* XXX sysctl ? */ +static u_int32_t div_sendspace = DIVSNDQ; /* XXX sysctl ? */ +static u_int32_t div_recvspace = DIVRCVQ; /* XXX sysctl ? */ /* Optimization: have this preinitialized */ static struct sockaddr_in divsrc = { sizeof(divsrc), AF_INET, 0, { 0 }, { 0,0,0,0,0,0,0,0 } }; @@ -190,7 +191,7 @@ div_init(void) void div_input(struct mbuf *m, __unused int off) { - OSAddAtomic(1, (SInt32*)&ipstat.ips_noproto); + OSAddAtomic(1, &ipstat.ips_noproto); m_freem(m); } @@ -291,8 +292,8 @@ divert_packet(struct mbuf *m, int incoming, int port, int rule) socket_unlock(sa, 1); } else { m_freem(m); - OSAddAtomic(1, (SInt32*)&ipstat.ips_noproto); - OSAddAtomic(-1, (SInt32*)&ipstat.ips_delivered); + OSAddAtomic(1, &ipstat.ips_noproto); + OSAddAtomic(-1, &ipstat.ips_delivered); } lck_rw_done(divcbinfo.mtx); } @@ -352,6 +353,7 @@ div_output(struct socket *so, struct mbuf *m, struct sockaddr *addr, /* Reinject packet into the system as incoming or outgoing */ if (!sin || sin->sin_addr.s_addr == 0) { struct ip_out_args ipoa = { IFSCOPE_NONE }; + struct route ro; /* * Don't allow both user specified and setsockopt options, @@ -364,24 +366,28 @@ div_output(struct socket *so, struct mbuf *m, struct sockaddr *addr, } /* Convert fields to host order for ip_output() */ +#if BYTE_ORDER != BIG_ENDIAN NTOHS(ip->ip_len); NTOHS(ip->ip_off); +#endif + + OSAddAtomic(1, &ipstat.ips_rawout); + /* Copy the cached route and take an extra reference */ + inp_route_copyout(inp, &ro); - /* Send packet to output processing */ - OSAddAtomic(1, (SInt32*)&ipstat.ips_rawout); socket_unlock(so, 0); #if CONFIG_MACF_NET mac_mbuf_label_associate_inpcb(inp, m); #endif -#if CONFIG_IP_EDGEHOLE - ip_edgehole_mbuf_tag(inp, m); -#endif - error = ip_output(m, - inp->inp_options, &inp->inp_route, + /* Send packet to output processing */ + error = ip_output(m, inp->inp_options, &ro, (so->so_options & SO_DONTROUTE) | IP_ALLOWBROADCAST | IP_RAWOUTPUT | IP_OUTARGS, inp->inp_moptions, &ipoa); + socket_lock(so, 0); + /* Synchronize cached PCB route */ + inp_route_copyin(inp, &ro); } else { struct ifaddr *ifa; @@ -426,7 +432,7 @@ div_attach(struct socket *so, int proto, struct proc *p) inp = sotoinpcb(so); if (inp) panic("div_attach"); - if (p && (error = proc_suser(p)) != 0) + if ((error = proc_suser(p)) != 0) return error; error = soreserve(so, div_sendspace, div_recvspace); @@ -519,7 +525,7 @@ div_send(struct socket *so, __unused int flags, struct mbuf *m, struct sockaddr /* Packet must have a header (but that's about it) */ if (m->m_len < sizeof (struct ip) && (m = m_pullup(m, sizeof (struct ip))) == 0) { - OSAddAtomic(1, (SInt32*)&ipstat.ips_toosmall); + OSAddAtomic(1, &ipstat.ips_toosmall); m_freem(m); return EINVAL; } @@ -528,9 +534,11 @@ div_send(struct socket *so, __unused int flags, struct mbuf *m, struct sockaddr return div_output(so, m, nam, control); } +#if 0 static int div_pcblist SYSCTL_HANDLER_ARGS { +#pragma unused(oidp, arg1, arg2) int error, i, n; struct inpcb *inp, **inp_list; inp_gen_t gencnt; @@ -622,82 +630,90 @@ div_pcblist SYSCTL_HANDLER_ARGS lck_rw_done(divcbinfo.mtx); return error; } +#endif __private_extern__ int -div_lock(struct socket *so, int refcount, int lr) - { - int lr_saved; - if (lr == 0) - lr_saved = (unsigned int) __builtin_return_address(0); - else lr_saved = lr; - +div_lock(struct socket *so, int refcount, void *lr) +{ + void *lr_saved; + + if (lr == NULL) + lr_saved = __builtin_return_address(0); + else + lr_saved = lr; + #ifdef MORE_DICVLOCK_DEBUG - printf("div_lock: so=%p sopcb=%p lock=%x ref=%x lr=%x\n", - so, - so->so_pcb, - so->so_pcb ? ((struct inpcb *)so->so_pcb)->inpcb_mtx : 0, - so->so_usecount, - lr_saved); + printf("div_lock: so=%p sopcb=%p lock=%p ref=%x lr=%p\n", + so, so->so_pcb, so->so_pcb ? + ((struct inpcb *)so->so_pcb)->inpcb_mtx : NULL, + so->so_usecount, lr_saved); #endif if (so->so_pcb) { lck_mtx_lock(((struct inpcb *)so->so_pcb)->inpcb_mtx); } else { - panic("div_lock: so=%p NO PCB! lr=%x\n", so, lr_saved); - lck_mtx_lock(so->so_proto->pr_domain->dom_mtx); + panic("div_lock: so=%p NO PCB! lr=%p lrh= lrh= %s\n", + so, lr_saved, solockhistory_nr(so)); + /* NOTREACHED */ } - - if (so->so_usecount < 0) - panic("div_lock: so=%p so_pcb=%p lr=%x ref=%x\n", - so, so->so_pcb, lr_saved, so->so_usecount); - + + if (so->so_usecount < 0) { + panic("div_lock: so=%p so_pcb=%p lr=%p ref=%x lrh= %s\n", + so, so->so_pcb, lr_saved, so->so_usecount, + solockhistory_nr(so)); + /* NOTREACHED */ + } + if (refcount) so->so_usecount++; - so->lock_lr[so->next_lock_lr] = (u_int32_t)lr_saved; + so->lock_lr[so->next_lock_lr] = lr_saved; so->next_lock_lr = (so->next_lock_lr+1) % SO_LCKDBG_MAX; return (0); } __private_extern__ int -div_unlock(struct socket *so, int refcount, int lr) +div_unlock(struct socket *so, int refcount, void *lr) { - int lr_saved; + void *lr_saved; lck_mtx_t * mutex_held; - struct inpcb *inp = sotoinpcb(so); + struct inpcb *inp = sotoinpcb(so); - if (lr == 0) - lr_saved = (unsigned int) __builtin_return_address(0); - else lr_saved = lr; + if (lr == NULL) + lr_saved = __builtin_return_address(0); + else + lr_saved = lr; - #ifdef MORE_DICVLOCK_DEBUG - printf("div_unlock: so=%p sopcb=%p lock=%x ref=%x lr=%x\n", - so, - so->so_pcb, - so->so_pcb ? ((struct inpcb *)so->so_pcb)->inpcb_mtx : 0, - so->so_usecount, - lr_saved); + printf("div_unlock: so=%p sopcb=%p lock=%p ref=%x lr=%p\n", + so, so->so_pcb, so->so_pcb ? + ((struct inpcb *)so->so_pcb)->inpcb_mtx : NULL, + so->so_usecount, lr_saved); #endif if (refcount) so->so_usecount--; - - if (so->so_usecount < 0) - panic("div_unlock: so=%p usecount=%x\n", so, so->so_usecount); + + if (so->so_usecount < 0) { + panic("div_unlock: so=%p usecount=%x lrh= %s\n", + so, so->so_usecount, solockhistory_nr(so)); + /* NOTREACHED */ + } if (so->so_pcb == NULL) { - panic("div_unlock: so=%p NO PCB usecount=%x lr=%x\n", so, so->so_usecount, lr_saved); - mutex_held = so->so_proto->pr_domain->dom_mtx; - } else { - mutex_held = ((struct inpcb *)so->so_pcb)->inpcb_mtx; + panic("div_unlock: so=%p NO PCB usecount=%x lr=%p lrh= %s\n", + so, so->so_usecount, lr_saved, solockhistory_nr(so)); + /* NOTREACHED */ } + mutex_held = ((struct inpcb *)so->so_pcb)->inpcb_mtx; if (so->so_usecount == 0 && (inp->inp_wantcnt == WNT_STOPUSING)) { lck_rw_lock_exclusive(divcbinfo.mtx); + if (inp->inp_state != INPCB_STATE_DEAD) + in_pcbdetach(inp); in_pcbdispose(inp); lck_rw_done(divcbinfo.mtx); return (0); } lck_mtx_assert(mutex_held, LCK_MTX_ASSERT_OWNED); - so->unlock_lr[so->next_unlock_lr] = (u_int32_t) lr_saved; + so->unlock_lr[so->next_unlock_lr] = lr_saved; so->next_unlock_lr = (so->next_unlock_lr+1) % SO_LCKDBG_MAX; lck_mtx_unlock(mutex_held); return (0); @@ -710,10 +726,12 @@ div_getlock(struct socket *so, __unused int locktype) if (so->so_pcb) { if (so->so_usecount < 0) - panic("div_getlock: so=%p usecount=%x\n", so, so->so_usecount); + panic("div_getlock: so=%p usecount=%x lrh= %s\n", + so, so->so_usecount, solockhistory_nr(so)); return(inpcb->inpcb_mtx); } else { - panic("div_getlock: so=%p NULL so_pcb\n", so); + panic("div_getlock: so=%p NULL NO PCB lrh= %s\n", + so, solockhistory_nr(so)); return (so->so_proto->pr_domain->dom_mtx); } } diff --git a/bsd/netinet/ip_divert.h b/bsd/netinet/ip_divert.h index 3199acbd9..80fe08e93 100644 --- a/bsd/netinet/ip_divert.h +++ b/bsd/netinet/ip_divert.h @@ -1,3 +1,31 @@ +/* + * Copyright (c) 2008 Apple Inc. All rights reserved. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. The rights granted to you under the License + * may not be used to create, or enable the creation or redistribution of, + * unlawful or unlicensed copies of an Apple operating system, or to + * circumvent, violate, or enable the circumvention or violation of, any + * terms of an Apple operating system software license agreement. + * + * Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ + */ + /*- * Copyright (c) 2003 Sam Leffler, Errno Consulting * All rights reserved. @@ -83,8 +111,8 @@ extern void div_init(void) __attribute__((section("__TEXT, initcode"))); extern void div_input(struct mbuf *, int); lck_mtx_t * div_getlock(struct socket *, int ); -int div_unlock(struct socket *, int, int); -int div_lock(struct socket *, int , int ); +int div_unlock(struct socket *, int, void *); +int div_lock(struct socket *, int , void *); extern void divert_packet(struct mbuf *m, int incoming, int port, int rule); extern struct pr_usrreqs div_usrreqs; diff --git a/bsd/netinet/ip_dummynet.c b/bsd/netinet/ip_dummynet.c index b146d94d6..090c692bc 100644 --- a/bsd/netinet/ip_dummynet.c +++ b/bsd/netinet/ip_dummynet.c @@ -142,19 +142,37 @@ static int heap_init(struct dn_heap *h, int size) ; static int heap_insert (struct dn_heap *h, dn_key key1, void *p); static void heap_extract(struct dn_heap *h, void *obj); -static void transmit_event(struct dn_pipe *pipe); -static void ready_event(struct dn_flow_queue *q); -static struct dn_pipe *all_pipes = NULL ; /* list of all pipes */ -static struct dn_flow_set *all_flow_sets = NULL ;/* list of all flow_sets */ +static void transmit_event(struct dn_pipe *pipe, struct mbuf **head, + struct mbuf **tail); +static void ready_event(struct dn_flow_queue *q, struct mbuf **head, + struct mbuf **tail); +static void ready_event_wfq(struct dn_pipe *p, struct mbuf **head, + struct mbuf **tail); + +/* + * Packets are retrieved from queues in Dummynet in chains instead of + * packet-by-packet. The entire list of packets is first dequeued and + * sent out by the following function. + */ +static void dummynet_send(struct mbuf *m); + +/* Flag to signify the existance of a dequeued packet chain */ +static int serialize = 0; + +#define HASHSIZE 16 +#define HASH(num) ((((num) >> 8) ^ ((num) >> 4) ^ (num)) & 0x0f) +static struct dn_pipe_head pipehash[HASHSIZE]; /* all pipes */ +static struct dn_flow_set_head flowsethash[HASHSIZE]; /* all flowsets */ + #ifdef SYSCTL_NODE SYSCTL_NODE(_net_inet_ip, OID_AUTO, dummynet, CTLFLAG_RW, 0, "Dummynet"); SYSCTL_INT(_net_inet_ip_dummynet, OID_AUTO, hash_size, CTLFLAG_RW, &dn_hash_size, 0, "Default hash table size"); -SYSCTL_INT(_net_inet_ip_dummynet, OID_AUTO, curr_time, - CTLFLAG_RD, &curr_time, 0, "Current tick"); +SYSCTL_QUAD(_net_inet_ip_dummynet, OID_AUTO, curr_time, + CTLFLAG_RD, &curr_time, "Current tick"); SYSCTL_INT(_net_inet_ip_dummynet, OID_AUTO, ready_heap, CTLFLAG_RD, &ready_heap.size, 0, "Size of ready heap"); SYSCTL_INT(_net_inet_ip_dummynet, OID_AUTO, extract_heap, @@ -195,10 +213,10 @@ SYSCTL_INT(_net_inet_ip_dummynet, OID_AUTO, debug, CTLFLAG_RW, &dummynet_debug, #define MY_RANDOM (random() & 0x7FFFFFFF) /* dummynet lock */ -lck_grp_t *dn_mutex_grp; -lck_grp_attr_t *dn_mutex_grp_attr; -lck_attr_t *dn_mutex_attr; -lck_mtx_t *dn_mutex; +static lck_grp_t *dn_mutex_grp; +static lck_grp_attr_t *dn_mutex_grp_attr; +static lck_attr_t *dn_mutex_attr; +static lck_mtx_t *dn_mutex; static int config_pipe(struct dn_pipe *p); static int ip_dn_ctl(struct sockopt *sopt); @@ -211,6 +229,19 @@ static void dn_rule_delete(void *); int if_tx_rdy(struct ifnet *ifp); +static void cp_flow_set_to_64_user(struct dn_flow_set *set, struct dn_flow_set_64 *fs_bp); +static void cp_queue_to_64_user( struct dn_flow_queue *q, struct dn_flow_queue_64 *qp); +static char *cp_pipe_to_64_user(struct dn_pipe *p, struct dn_pipe_64 *pipe_bp); +static char* dn_copy_set_64(struct dn_flow_set *set, char *bp); +static int cp_pipe_from_user_64( struct sockopt *sopt, struct dn_pipe *p ); + +static void cp_flow_set_to_32_user(struct dn_flow_set *set, struct dn_flow_set_32 *fs_bp); +static void cp_queue_to_32_user( struct dn_flow_queue *q, struct dn_flow_queue_32 *qp); +static char *cp_pipe_to_32_user(struct dn_pipe *p, struct dn_pipe_32 *pipe_bp); +static char* dn_copy_set_32(struct dn_flow_set *set, char *bp); +static int cp_pipe_from_user_32( struct sockopt *sopt, struct dn_pipe *p ); + + /* * Heap management functions. * @@ -230,6 +261,280 @@ int if_tx_rdy(struct ifnet *ifp); #define HEAP_SWAP(a, b, buffer) { buffer = a ; a = b ; b = buffer ; } #define HEAP_INCREMENT 15 + +int cp_pipe_from_user_32( struct sockopt *sopt, struct dn_pipe *p ) +{ + struct dn_pipe_32 user_pipe_32; + int error=0; + + error = sooptcopyin(sopt, &user_pipe_32, sizeof(struct dn_pipe_32), sizeof(struct dn_pipe_32)); + if ( !error ){ + p->pipe_nr = user_pipe_32.pipe_nr; + p->bandwidth = user_pipe_32.bandwidth; + p->delay = user_pipe_32.delay; + p->V = user_pipe_32.V; + p->sum = user_pipe_32.sum; + p->numbytes = user_pipe_32.numbytes; + p->sched_time = user_pipe_32.sched_time; + bcopy( user_pipe_32.if_name, p->if_name, IFNAMSIZ); + p->ready = user_pipe_32.ready; + + p->fs.fs_nr = user_pipe_32.fs.fs_nr; + p->fs.flags_fs = user_pipe_32.fs.flags_fs; + p->fs.parent_nr = user_pipe_32.fs.parent_nr; + p->fs.weight = user_pipe_32.fs.weight; + p->fs.qsize = user_pipe_32.fs.qsize; + p->fs.plr = user_pipe_32.fs.plr; + p->fs.flow_mask = user_pipe_32.fs.flow_mask; + p->fs.rq_size = user_pipe_32.fs.rq_size; + p->fs.rq_elements = user_pipe_32.fs.rq_elements; + p->fs.last_expired = user_pipe_32.fs.last_expired; + p->fs.backlogged = user_pipe_32.fs.backlogged; + p->fs.w_q = user_pipe_32.fs.w_q; + p->fs.max_th = user_pipe_32.fs.max_th; + p->fs.min_th = user_pipe_32.fs.min_th; + p->fs.max_p = user_pipe_32.fs.max_p; + p->fs.c_1 = user_pipe_32.fs.c_1; + p->fs.c_2 = user_pipe_32.fs.c_2; + p->fs.c_3 = user_pipe_32.fs.c_3; + p->fs.c_4 = user_pipe_32.fs.c_4; + p->fs.lookup_depth = user_pipe_32.fs.lookup_depth; + p->fs.lookup_step = user_pipe_32.fs.lookup_step; + p->fs.lookup_weight = user_pipe_32.fs.lookup_weight; + p->fs.avg_pkt_size = user_pipe_32.fs.avg_pkt_size; + p->fs.max_pkt_size = user_pipe_32.fs.max_pkt_size; + } + return error; +} + + +int cp_pipe_from_user_64( struct sockopt *sopt, struct dn_pipe *p ) +{ + struct dn_pipe_64 user_pipe_64; + int error=0; + + error = sooptcopyin(sopt, &user_pipe_64, sizeof(struct dn_pipe_64), sizeof(struct dn_pipe_64)); + if ( !error ){ + p->pipe_nr = user_pipe_64.pipe_nr; + p->bandwidth = user_pipe_64.bandwidth; + p->delay = user_pipe_64.delay; + p->V = user_pipe_64.V; + p->sum = user_pipe_64.sum; + p->numbytes = user_pipe_64.numbytes; + p->sched_time = user_pipe_64.sched_time; + bcopy( user_pipe_64.if_name, p->if_name, IFNAMSIZ); + p->ready = user_pipe_64.ready; + + p->fs.fs_nr = user_pipe_64.fs.fs_nr; + p->fs.flags_fs = user_pipe_64.fs.flags_fs; + p->fs.parent_nr = user_pipe_64.fs.parent_nr; + p->fs.weight = user_pipe_64.fs.weight; + p->fs.qsize = user_pipe_64.fs.qsize; + p->fs.plr = user_pipe_64.fs.plr; + p->fs.flow_mask = user_pipe_64.fs.flow_mask; + p->fs.rq_size = user_pipe_64.fs.rq_size; + p->fs.rq_elements = user_pipe_64.fs.rq_elements; + p->fs.last_expired = user_pipe_64.fs.last_expired; + p->fs.backlogged = user_pipe_64.fs.backlogged; + p->fs.w_q = user_pipe_64.fs.w_q; + p->fs.max_th = user_pipe_64.fs.max_th; + p->fs.min_th = user_pipe_64.fs.min_th; + p->fs.max_p = user_pipe_64.fs.max_p; + p->fs.c_1 = user_pipe_64.fs.c_1; + p->fs.c_2 = user_pipe_64.fs.c_2; + p->fs.c_3 = user_pipe_64.fs.c_3; + p->fs.c_4 = user_pipe_64.fs.c_4; + p->fs.lookup_depth = user_pipe_64.fs.lookup_depth; + p->fs.lookup_step = user_pipe_64.fs.lookup_step; + p->fs.lookup_weight = user_pipe_64.fs.lookup_weight; + p->fs.avg_pkt_size = user_pipe_64.fs.avg_pkt_size; + p->fs.max_pkt_size = user_pipe_64.fs.max_pkt_size; + } + return error; +} + +static void +cp_flow_set_to_32_user(struct dn_flow_set *set, struct dn_flow_set_32 *fs_bp) +{ + fs_bp->fs_nr = set->fs_nr; + fs_bp->flags_fs = set->flags_fs ; + fs_bp->parent_nr = set->parent_nr ; + fs_bp->weight = set->weight ; + fs_bp->qsize = set->qsize ; + fs_bp->plr = set->plr ; + fs_bp->flow_mask = set->flow_mask ; + fs_bp->rq_size = set->rq_size ; + fs_bp->rq_elements = set->rq_elements ; + fs_bp->last_expired = set->last_expired ; + fs_bp->backlogged = set->backlogged ; + fs_bp->w_q = set->w_q ; + fs_bp->max_th = set->max_th ; + fs_bp->min_th = set->min_th ; + fs_bp->max_p = set->max_p ; + fs_bp->c_1 = set->c_1 ; + fs_bp->c_2 = set->c_2 ; + fs_bp->c_3 = set->c_3 ; + fs_bp->c_4 = set->c_4 ; + fs_bp->w_q_lookup = CAST_DOWN_EXPLICIT(user32_addr_t, set->w_q_lookup) ; + fs_bp->lookup_depth = set->lookup_depth ; + fs_bp->lookup_step = set->lookup_step ; + fs_bp->lookup_weight = set->lookup_weight ; + fs_bp->avg_pkt_size = set->avg_pkt_size ; + fs_bp->max_pkt_size = set->max_pkt_size ; +} + +static void +cp_flow_set_to_64_user(struct dn_flow_set *set, struct dn_flow_set_64 *fs_bp) +{ + fs_bp->fs_nr = set->fs_nr; + fs_bp->flags_fs = set->flags_fs ; + fs_bp->parent_nr = set->parent_nr ; + fs_bp->weight = set->weight ; + fs_bp->qsize = set->qsize ; + fs_bp->plr = set->plr ; + fs_bp->flow_mask = set->flow_mask ; + fs_bp->rq_size = set->rq_size ; + fs_bp->rq_elements = set->rq_elements ; + fs_bp->last_expired = set->last_expired ; + fs_bp->backlogged = set->backlogged ; + fs_bp->w_q = set->w_q ; + fs_bp->max_th = set->max_th ; + fs_bp->min_th = set->min_th ; + fs_bp->max_p = set->max_p ; + fs_bp->c_1 = set->c_1 ; + fs_bp->c_2 = set->c_2 ; + fs_bp->c_3 = set->c_3 ; + fs_bp->c_4 = set->c_4 ; + fs_bp->w_q_lookup = CAST_DOWN(user64_addr_t, set->w_q_lookup) ; + fs_bp->lookup_depth = set->lookup_depth ; + fs_bp->lookup_step = set->lookup_step ; + fs_bp->lookup_weight = set->lookup_weight ; + fs_bp->avg_pkt_size = set->avg_pkt_size ; + fs_bp->max_pkt_size = set->max_pkt_size ; +} + +static +void cp_queue_to_32_user( struct dn_flow_queue *q, struct dn_flow_queue_32 *qp) +{ + qp->id = q->id; + qp->len = q->len; + qp->len_bytes = q->len_bytes; + qp->numbytes = q->numbytes; + qp->tot_pkts = q->tot_pkts; + qp->tot_bytes = q->tot_bytes; + qp->drops = q->drops; + qp->hash_slot = q->hash_slot; + qp->avg = q->avg; + qp->count = q->count; + qp->random = q->random; + qp->q_time = q->q_time; + qp->heap_pos = q->heap_pos; + qp->sched_time = q->sched_time; + qp->S = q->S; + qp->F = q->F; +} + +static +void cp_queue_to_64_user( struct dn_flow_queue *q, struct dn_flow_queue_64 *qp) +{ + qp->id = q->id; + qp->len = q->len; + qp->len_bytes = q->len_bytes; + qp->numbytes = q->numbytes; + qp->tot_pkts = q->tot_pkts; + qp->tot_bytes = q->tot_bytes; + qp->drops = q->drops; + qp->hash_slot = q->hash_slot; + qp->avg = q->avg; + qp->count = q->count; + qp->random = q->random; + qp->q_time = q->q_time; + qp->heap_pos = q->heap_pos; + qp->sched_time = q->sched_time; + qp->S = q->S; + qp->F = q->F; +} + +static +char *cp_pipe_to_32_user(struct dn_pipe *p, struct dn_pipe_32 *pipe_bp) +{ + char *bp; + + pipe_bp->pipe_nr = p->pipe_nr; + pipe_bp->bandwidth = p->bandwidth; + bcopy( &(p->scheduler_heap), &(pipe_bp->scheduler_heap), sizeof(struct dn_heap_32)); + pipe_bp->scheduler_heap.p = CAST_DOWN_EXPLICIT(user32_addr_t, pipe_bp->scheduler_heap.p); + bcopy( &(p->not_eligible_heap), &(pipe_bp->not_eligible_heap), sizeof(struct dn_heap_32)); + pipe_bp->not_eligible_heap.p = CAST_DOWN_EXPLICIT(user32_addr_t, pipe_bp->not_eligible_heap.p); + bcopy( &(p->idle_heap), &(pipe_bp->idle_heap), sizeof(struct dn_heap_32)); + pipe_bp->idle_heap.p = CAST_DOWN_EXPLICIT(user32_addr_t, pipe_bp->idle_heap.p); + pipe_bp->V = p->V; + pipe_bp->sum = p->sum; + pipe_bp->numbytes = p->numbytes; + pipe_bp->sched_time = p->sched_time; + bcopy( p->if_name, pipe_bp->if_name, IFNAMSIZ); + pipe_bp->ifp = CAST_DOWN_EXPLICIT(user32_addr_t, p->ifp); + pipe_bp->ready = p->ready; + + cp_flow_set_to_32_user( &(p->fs), &(pipe_bp->fs)); + + pipe_bp->delay = (pipe_bp->delay * 1000) / (hz*10) ; + /* + * XXX the following is a hack based on ->next being the + * first field in dn_pipe and dn_flow_set. The correct + * solution would be to move the dn_flow_set to the beginning + * of struct dn_pipe. + */ + pipe_bp->next = CAST_DOWN_EXPLICIT( user32_addr_t, DN_IS_PIPE ); + /* clean pointers */ + pipe_bp->head = pipe_bp->tail = (user32_addr_t) 0 ; + pipe_bp->fs.next = (user32_addr_t)0 ; + pipe_bp->fs.pipe = (user32_addr_t)0 ; + pipe_bp->fs.rq = (user32_addr_t)0 ; + bp = ((char *)pipe_bp) + sizeof(struct dn_pipe_32); + return( dn_copy_set_32( &(p->fs), bp) ); +} + +static +char *cp_pipe_to_64_user(struct dn_pipe *p, struct dn_pipe_64 *pipe_bp) +{ + char *bp; + + pipe_bp->pipe_nr = p->pipe_nr; + pipe_bp->bandwidth = p->bandwidth; + bcopy( &(p->scheduler_heap), &(pipe_bp->scheduler_heap), sizeof(struct dn_heap_64)); + pipe_bp->scheduler_heap.p = CAST_DOWN(user64_addr_t, pipe_bp->scheduler_heap.p); + bcopy( &(p->not_eligible_heap), &(pipe_bp->not_eligible_heap), sizeof(struct dn_heap_64)); + pipe_bp->not_eligible_heap.p = CAST_DOWN(user64_addr_t, pipe_bp->not_eligible_heap.p); + bcopy( &(p->idle_heap), &(pipe_bp->idle_heap), sizeof(struct dn_heap_64)); + pipe_bp->idle_heap.p = CAST_DOWN(user64_addr_t, pipe_bp->idle_heap.p); + pipe_bp->V = p->V; + pipe_bp->sum = p->sum; + pipe_bp->numbytes = p->numbytes; + pipe_bp->sched_time = p->sched_time; + bcopy( p->if_name, pipe_bp->if_name, IFNAMSIZ); + pipe_bp->ifp = CAST_DOWN(user64_addr_t, p->ifp); + pipe_bp->ready = p->ready; + + cp_flow_set_to_64_user( &(p->fs), &(pipe_bp->fs)); + + pipe_bp->delay = (pipe_bp->delay * 1000) / (hz*10) ; + /* + * XXX the following is a hack based on ->next being the + * first field in dn_pipe and dn_flow_set. The correct + * solution would be to move the dn_flow_set to the beginning + * of struct dn_pipe. + */ + pipe_bp->next = CAST_DOWN( user64_addr_t, DN_IS_PIPE ); + /* clean pointers */ + pipe_bp->head = pipe_bp->tail = USER_ADDR_NULL ; + pipe_bp->fs.next = USER_ADDR_NULL ; + pipe_bp->fs.pipe = USER_ADDR_NULL ; + pipe_bp->fs.rq = USER_ADDR_NULL ; + bp = ((char *)pipe_bp) + sizeof(struct dn_pipe_64); + return( dn_copy_set_64( &(p->fs), bp) ); +} + static int heap_init(struct dn_heap *h, int new_size) { @@ -453,71 +758,33 @@ dn_tag_get(struct mbuf *m) * invocations of the procedures. */ static void -transmit_event(struct dn_pipe *pipe) +transmit_event(struct dn_pipe *pipe, struct mbuf **head, struct mbuf **tail) { struct mbuf *m ; struct dn_pkt_tag *pkt ; - + lck_mtx_assert(dn_mutex, LCK_MTX_ASSERT_OWNED); - - while ( (m = pipe->head) ) { - pkt = dn_tag_get(m); - if ( !DN_KEY_LEQ(pkt->output_time, curr_time) ) - break; - /* - * first unlink, then call procedures, since ip_input() can invoke - * ip_output() and viceversa, thus causing nested calls - */ - pipe->head = m->m_nextpkt ; - m->m_nextpkt = NULL; - - /* XXX: drop the lock for now to avoid LOR's */ - lck_mtx_unlock(dn_mutex); - switch (pkt->dn_dir) { - case DN_TO_IP_OUT: { - struct route tmp_rt = pkt->ro; - (void)ip_output(m, NULL, NULL, pkt->flags, NULL, NULL); - if (tmp_rt.ro_rt) { - rtfree(tmp_rt.ro_rt); - tmp_rt.ro_rt = NULL; - } - break ; - } - case DN_TO_IP_IN : - proto_inject(PF_INET, m); - break ; - -#if BRIDGE - case DN_TO_BDG_FWD : - /* - * The bridge requires/assumes the Ethernet header is - * contiguous in the first mbuf header. Insure this is true. - */ - if (BDG_LOADED) { - if (m->m_len < ETHER_HDR_LEN && - (m = m_pullup(m, ETHER_HDR_LEN)) == NULL) { - printf("dummynet/bridge: pullup fail, dropping pkt\n"); - break; - } - m = bdg_forward_ptr(m, pkt->ifp); - } else { - /* somebody unloaded the bridge module. Drop pkt */ - /* XXX rate limit */ - printf("dummynet: dropping bridged packet trapped in pipe\n"); - } - if (m) - m_freem(m); + + /* Extract packets only if no pending chain is being currently processed */ + if (serialize == 0) { + while ((m = pipe->head) != NULL) { + pkt = dn_tag_get(m); + if (!DN_KEY_LEQ(pkt->output_time, curr_time)) break; -#endif - default: - printf("dummynet: bad switch %d!\n", pkt->dn_dir); - m_freem(m); - break ; + + pipe->head = m->m_nextpkt; + if (*tail != NULL) + (*tail)->m_nextpkt = m; + else + *head = m; + *tail = m; } - lck_mtx_lock(dn_mutex); - } - /* if there are leftover packets, put into the heap for next event */ - if ( (m = pipe->head) ) { + if (*tail != NULL) + (*tail)->m_nextpkt = NULL; + } + + /* if there are leftover packets, put the pipe into the heap for next ready event */ + if ((m = pipe->head) != NULL) { pkt = dn_tag_get(m); /* XXX should check errors on heap_insert, by draining the * whole pipe p and hoping in the future we are more successful @@ -574,7 +841,7 @@ move_pkt(struct mbuf *pkt, struct dn_flow_queue *q, * if there are leftover packets reinsert the pkt in the scheduler. */ static void -ready_event(struct dn_flow_queue *q) +ready_event(struct dn_flow_queue *q, struct mbuf **head, struct mbuf **tail) { struct mbuf *pkt; struct dn_pipe *p = q->fs->pipe ; @@ -583,8 +850,8 @@ ready_event(struct dn_flow_queue *q) lck_mtx_assert(dn_mutex, LCK_MTX_ASSERT_OWNED); if (p == NULL) { - printf("dummynet: ready_event- pipe is gone\n"); - return ; + printf("dummynet: ready_event pipe is gone\n"); + return ; } p_was_empty = (p->head == NULL) ; @@ -628,7 +895,7 @@ ready_event(struct dn_flow_queue *q) * Otherwise, the scheduler will take care of it. */ if (p_was_empty) - transmit_event(p); + transmit_event(p, head, tail); } /* @@ -640,16 +907,17 @@ ready_event(struct dn_flow_queue *q) * there is an additional delay. */ static void -ready_event_wfq(struct dn_pipe *p) +ready_event_wfq(struct dn_pipe *p, struct mbuf **head, struct mbuf **tail) { int p_was_empty = (p->head == NULL) ; struct dn_heap *sch = &(p->scheduler_heap); struct dn_heap *neh = &(p->not_eligible_heap) ; + int64_t p_numbytes = p->numbytes; lck_mtx_assert(dn_mutex, LCK_MTX_ASSERT_OWNED); if (p->if_name[0] == 0) /* tx clock is simulated */ - p->numbytes += ( curr_time - p->sched_time ) * p->bandwidth; + p_numbytes += ( curr_time - p->sched_time ) * p->bandwidth; else { /* tx clock is for real, the ifq must be empty or this is a NOP */ if (p->ifp && p->ifp->if_snd.ifq_head != NULL) return ; @@ -663,7 +931,7 @@ ready_event_wfq(struct dn_pipe *p) * While we have backlogged traffic AND credit, we need to do * something on the queue. */ - while ( p->numbytes >=0 && (sch->elements>0 || neh->elements >0) ) { + while ( p_numbytes >=0 && (sch->elements>0 || neh->elements >0) ) { if (sch->elements > 0) { /* have some eligible pkts to send out */ struct dn_flow_queue *q = sch->p[0].object ; struct mbuf *pkt = q->head; @@ -672,7 +940,7 @@ ready_event_wfq(struct dn_pipe *p) int len_scaled = p->bandwidth ? len*8*(hz*10) : 0 ; heap_extract(sch, NULL); /* remove queue from heap */ - p->numbytes -= len_scaled ; + p_numbytes -= len_scaled ; move_pkt(pkt, q, p, len); p->V += (len<sum ; /* update V */ @@ -709,11 +977,11 @@ ready_event_wfq(struct dn_pipe *p) } if (p->if_name[0] != '\0') {/* tx clock is from a real thing */ - p->numbytes = -1 ; /* mark not ready for I/O */ + p_numbytes = -1 ; /* mark not ready for I/O */ break ; } } - if (sch->elements == 0 && neh->elements == 0 && p->numbytes >= 0 + if (sch->elements == 0 && neh->elements == 0 && p_numbytes >= 0 && p->idle_heap.elements > 0) { /* * no traffic and no events scheduled. We can get rid of idle-heap. @@ -735,11 +1003,11 @@ ready_event_wfq(struct dn_pipe *p) * If we are under credit, schedule the next ready event. * Also fix the delivery time of the last packet. */ - if (p->if_name[0]==0 && p->numbytes < 0) { /* this implies bandwidth >0 */ + if (p->if_name[0]==0 && p_numbytes < 0) { /* this implies bandwidth >0 */ dn_key t=0 ; /* number of ticks i have to wait */ if (p->bandwidth > 0) - t = ( p->bandwidth -1 - p->numbytes) / p->bandwidth ; + t = ( p->bandwidth -1 - p_numbytes) / p->bandwidth ; dn_tag_get(p->tail)->output_time += t ; p->sched_time = curr_time ; heap_insert(&wfq_ready_heap, curr_time + t, (void *)p); @@ -747,12 +1015,22 @@ ready_event_wfq(struct dn_pipe *p) * queue on error hoping next time we are luckier. */ } + + /* Fit (adjust if necessary) 64bit result into 32bit variable. */ + if (p_numbytes > INT_MAX) + p->numbytes = INT_MAX; + else if (p_numbytes < INT_MIN) + p->numbytes = INT_MIN; + else + p->numbytes = p_numbytes; + /* * If the delay line was empty call transmit_event(p) now. * Otherwise, the scheduler will take care of it. */ if (p_was_empty) - transmit_event(p); + transmit_event(p, head, tail); + } /* @@ -765,6 +1043,7 @@ dummynet(__unused void * unused) void *p ; /* generic parameter to handler */ struct dn_heap *h ; struct dn_heap *heaps[3]; + struct mbuf *head = NULL, *tail = NULL; int i; struct dn_pipe *pe ; struct timespec ts; @@ -773,7 +1052,7 @@ dummynet(__unused void * unused) heaps[0] = &ready_heap ; /* fixed-rate queues */ heaps[1] = &wfq_ready_heap ; /* wfq queues */ heaps[2] = &extract_heap ; /* delay line */ - + lck_mtx_lock(dn_mutex); /* make all time measurements in milliseconds (ms) - @@ -782,30 +1061,32 @@ dummynet(__unused void * unused) */ microuptime(&tv); curr_time = (tv.tv_sec * 1000) + (tv.tv_usec / 1000); - + for (i=0; i < 3 ; i++) { h = heaps[i]; while (h->elements > 0 && DN_KEY_LEQ(h->p[0].key, curr_time) ) { if (h->p[0].key > curr_time) - printf("dummynet: warning, heap %d is %d ticks late\n", - i, (int)(curr_time - h->p[0].key)); + printf("dummynet: warning, heap %d is %d ticks late\n", + i, (int)(curr_time - h->p[0].key)); p = h->p[0].object ; /* store a copy before heap_extract */ heap_extract(h, NULL); /* need to extract before processing */ if (i == 0) - ready_event(p) ; + ready_event(p, &head, &tail) ; else if (i == 1) { - struct dn_pipe *pipe = p; - if (pipe->if_name[0] != '\0') - printf("dummynet: bad ready_event_wfq for pipe %s\n", - pipe->if_name); - else - ready_event_wfq(p) ; - } else - transmit_event(p); + struct dn_pipe *pipe = p; + if (pipe->if_name[0] != '\0') + printf("dummynet: bad ready_event_wfq for pipe %s\n", + pipe->if_name); + else + ready_event_wfq(p, &head, &tail) ; + } else { + transmit_event(p, &head, &tail); + } } } /* sweep pipes trying to expire idle flow_queues */ - for (pe = all_pipes; pe ; pe = pe->next ) + for (i = 0; i < HASHSIZE; i++) + SLIST_FOREACH(pe, &pipehash[i], next) if (pe->idle_heap.elements > 0 && DN_KEY_LT(pe->idle_heap.p[0].key, pe->V) ) { struct dn_flow_queue *q = pe->idle_heap.p[0].object ; @@ -829,9 +1110,82 @@ dummynet(__unused void * unused) break; } } - + + /* + * If a packet chain has been dequeued, set serialize=1 so that new + * packets don't get dispatched out of turn + */ + if (head != NULL) + serialize = 1; + lck_mtx_unlock(dn_mutex); + + /* Send out the de-queued list of ready-to-send packets */ + if (head != NULL) { + dummynet_send(head); + lck_mtx_lock(dn_mutex); + serialize = 0; + lck_mtx_unlock(dn_mutex); + } +} + + +static void +dummynet_send(struct mbuf *m) +{ + struct dn_pkt_tag *pkt; + struct mbuf *n; + + for (; m != NULL; m = n) { + n = m->m_nextpkt; + m->m_nextpkt = NULL; + pkt = dn_tag_get(m); + + switch (pkt->dn_dir) { + case DN_TO_IP_OUT: { + struct route tmp_rt = pkt->ro; + (void)ip_output(m, NULL, &tmp_rt, pkt->flags, NULL, NULL); + if (tmp_rt.ro_rt) { + rtfree(tmp_rt.ro_rt); + tmp_rt.ro_rt = NULL; + } + break ; + } + case DN_TO_IP_IN : + proto_inject(PF_INET, m); + break ; + +#if BRIDGE + case DN_TO_BDG_FWD : + /* + * The bridge requires/assumes the Ethernet header is + * contiguous in the first mbuf header. Insure this is true. + */ + if (BDG_LOADED) { + if (m->m_len < ETHER_HDR_LEN && + (m = m_pullup(m, ETHER_HDR_LEN)) == NULL) { + printf("dummynet/bridge: pullup fail, dropping pkt\n"); + break; + } + m = bdg_forward_ptr(m, pkt->ifp); + } else { + /* somebody unloaded the bridge module. Drop pkt */ + /* XXX rate limit */ + printf("dummynet: dropping bridged packet trapped in pipe\n"); + } + if (m) + m_freem(m); + break; +#endif + default: + printf("dummynet: bad switch %d!\n", pkt->dn_dir); + m_freem(m); + break ; + } + } } + + /* * called by an interface when tx_rdy occurs. @@ -840,15 +1194,20 @@ int if_tx_rdy(struct ifnet *ifp) { struct dn_pipe *p; - + struct mbuf *head = NULL, *tail = NULL; + int i; + lck_mtx_lock(dn_mutex); - for (p = all_pipes; p ; p = p->next ) - if (p->ifp == ifp) - break ; + + for (i = 0; i < HASHSIZE; i++) + SLIST_FOREACH(p, &pipehash[i], next) + if (p->ifp == ifp) + break ; if (p == NULL) { char buf[32]; snprintf(buf, sizeof(buf), "%s%d",ifp->if_name, ifp->if_unit); - for (p = all_pipes; p ; p = p->next ) + for (i = 0; i < HASHSIZE; i++) + SLIST_FOREACH(p, &pipehash[i], next) if (!strcmp(p->if_name, buf) ) { p->ifp = ifp ; DPRINTF(("dummynet: ++ tx rdy from %s (now found)\n", buf)); @@ -859,9 +1218,14 @@ if_tx_rdy(struct ifnet *ifp) DPRINTF(("dummynet: ++ tx rdy from %s%d - qlen %d\n", ifp->if_name, ifp->if_unit, ifp->if_snd.ifq_len)); p->numbytes = 0 ; /* mark ready for I/O */ - ready_event_wfq(p); + ready_event_wfq(p, &head, &tail); } - lck_mtx_lock(dn_mutex); + lck_mtx_unlock(dn_mutex); + + + /* Send out the de-queued list of ready-to-send packets */ + if (head != NULL) + dummynet_send(head); return 0; } @@ -1105,35 +1469,29 @@ red_drops(struct dn_flow_set *fs, struct dn_flow_queue *q, int len) static __inline struct dn_flow_set * -locate_flowset(int pipe_nr, struct ip_fw *rule) +locate_flowset(int fs_nr) { struct dn_flow_set *fs; - ipfw_insn *cmd = rule->cmd + rule->act_ofs; + SLIST_FOREACH(fs, &flowsethash[HASH(fs_nr)], next) + if (fs->fs_nr == fs_nr) + return fs ; + + return (NULL); +} - if (cmd->opcode == O_LOG) - cmd += F_LEN(cmd); +static __inline struct dn_pipe * +locate_pipe(int pipe_nr) +{ + struct dn_pipe *pipe; - bcopy(& ((ipfw_insn_pipe *)cmd)->pipe_ptr, &fs, sizeof(fs)); + SLIST_FOREACH(pipe, &pipehash[HASH(pipe_nr)], next) + if (pipe->pipe_nr == pipe_nr) + return (pipe); - if (fs != NULL) - return fs; + return (NULL); +} - if (cmd->opcode == O_QUEUE) { - for (fs=all_flow_sets; fs && fs->fs_nr != pipe_nr; fs=fs->next) - ; - } - else { - struct dn_pipe *p1; - for (p1 = all_pipes; p1 && p1->pipe_nr != pipe_nr; p1 = p1->next) - ; - if (p1 != NULL) - fs = &(p1->fs) ; - } - /* record for the future */ - bcopy(&fs, & ((ipfw_insn_pipe *)cmd)->pipe_ptr, sizeof(fs)); - return fs ; -} /* * dummynet hook for packets. Below 'pipe' is a pipe or a queue @@ -1154,9 +1512,10 @@ locate_flowset(int pipe_nr, struct ip_fw *rule) static int dummynet_io(struct mbuf *m, int pipe_nr, int dir, struct ip_fw_args *fwa) { + struct mbuf *head = NULL, *tail = NULL; struct dn_pkt_tag *pkt; struct m_tag *mtag; - struct dn_flow_set *fs; + struct dn_flow_set *fs = NULL; struct dn_pipe *pipe ; u_int64_t len = m->m_pkthdr.len ; struct dn_flow_queue *q = NULL ; @@ -1188,14 +1547,21 @@ dummynet_io(struct mbuf *m, int pipe_nr, int dir, struct ip_fw_args *fwa) /* * This is a dummynet rule, so we expect an O_PIPE or O_QUEUE rule. */ - fs = locate_flowset(pipe_nr, fwa->rule); - if (fs == NULL) + if (is_pipe) { + pipe = locate_pipe(pipe_nr); + if (pipe != NULL) + fs = &(pipe->fs); + } else + fs = locate_flowset(pipe_nr); + + + if (fs == NULL){ goto dropit ; /* this queue/pipe does not exist! */ + } pipe = fs->pipe ; if (pipe == NULL) { /* must be a queue, try find a matching pipe */ - for (pipe = all_pipes; pipe && pipe->pipe_nr != fs->parent_nr; - pipe = pipe->next) - ; + pipe = locate_pipe(fs->parent_nr); + if (pipe != NULL) fs->pipe = pipe ; else { @@ -1245,14 +1611,13 @@ dummynet_io(struct mbuf *m, int pipe_nr, int dir, struct ip_fw_args *fwa) * the caller passed a pointer into the stack; dst might also be * a pointer into *ro so it needs to be updated. */ - lck_mtx_lock(rt_mtx); pkt->ro = *(fwa->ro); if (fwa->ro->ro_rt) - rtref(fwa->ro->ro_rt); + RT_ADDREF(fwa->ro->ro_rt); + if (fwa->dst == (struct sockaddr_in *)&fwa->ro->ro_dst) /* dst points into ro */ fwa->dst = (struct sockaddr_in *)&(pkt->ro.ro_dst) ; - lck_mtx_unlock(rt_mtx); - + pkt->dn_dst = fwa->dst; pkt->flags = fwa->flags; if (fwa->ipoa != NULL) @@ -1282,7 +1647,7 @@ dummynet_io(struct mbuf *m, int pipe_nr, int dir, struct ip_fw_args *fwa) t = SET_TICKS(m, q, pipe); q->sched_time = curr_time ; if (t == 0) /* must process it now */ - ready_event( q ); + ready_event( q , &head, &tail ); else heap_insert(&ready_heap, curr_time + t , q ); } else { @@ -1333,7 +1698,7 @@ dummynet_io(struct mbuf *m, int pipe_nr, int dir, struct ip_fw_args *fwa) DPRINTF(("dummynet: waking up pipe %d at %d\n", pipe->pipe_nr, (int)(q->F >> MY_M))); pipe->sched_time = curr_time ; - ready_event_wfq(pipe); + ready_event_wfq(pipe, &head, &tail); } } } @@ -1347,6 +1712,9 @@ dummynet_io(struct mbuf *m, int pipe_nr, int dir, struct ip_fw_args *fwa) } lck_mtx_unlock(dn_mutex); + if (head != NULL) + dummynet_send(head); + return 0; dropit: @@ -1361,17 +1729,17 @@ dummynet_io(struct mbuf *m, int pipe_nr, int dir, struct ip_fw_args *fwa) * Below, the rtfree is only needed when (pkt->dn_dir == DN_TO_IP_OUT) * Doing this would probably save us the initial bzero of dn_pkt */ -#define DN_FREE_PKT(_m) do { \ +#define DN_FREE_PKT(_m) do { \ struct m_tag *tag = m_tag_locate(m, KERNEL_MODULE_TAG_ID, KERNEL_TAG_TYPE_DUMMYNET, NULL); \ - if (tag) { \ + if (tag) { \ struct dn_pkt_tag *n = (struct dn_pkt_tag *)(tag+1); \ - if (n->ro.ro_rt) { \ - rtfree(n->ro.ro_rt); \ - n->ro.ro_rt = NULL; \ - } \ - } \ - m_tag_delete(_m, tag); \ - m_freem(_m); \ + if (n->ro.ro_rt != NULL) { \ + rtfree(n->ro.ro_rt); \ + n->ro.ro_rt = NULL; \ + } \ + } \ + m_tag_delete(_m, tag); \ + m_freem(_m); \ } while (0) /* @@ -1445,38 +1813,36 @@ purge_pipe(struct dn_pipe *pipe) static void dummynet_flush(void) { - struct dn_pipe *curr_p, *p ; - struct dn_flow_set *fs, *curr_fs; + struct dn_pipe *pipe, *pipe1; + struct dn_flow_set *fs, *fs1; + int i; lck_mtx_lock(dn_mutex); /* remove all references to pipes ...*/ flush_pipe_ptrs(NULL); - /* prevent future matches... */ - p = all_pipes ; - all_pipes = NULL ; - fs = all_flow_sets ; - all_flow_sets = NULL ; - /* and free heaps so we don't have unwanted events */ - heap_free(&ready_heap); - heap_free(&wfq_ready_heap); - heap_free(&extract_heap); + + /* Free heaps so we don't have unwanted events. */ + heap_free(&ready_heap); + heap_free(&wfq_ready_heap); + heap_free(&extract_heap); - /* - * Now purge all queued pkts and delete all pipes - */ - /* scan and purge all flow_sets. */ - for ( ; fs ; ) { - curr_fs = fs ; - fs = fs->next ; - purge_flow_set(curr_fs, 1); - } - for ( ; p ; ) { - purge_pipe(p); - curr_p = p ; - p = p->next ; - FREE(curr_p, M_DUMMYNET); - } + /* + * Now purge all queued pkts and delete all pipes. + * + * XXXGL: can we merge the for(;;) cycles into one or not? + */ + for (i = 0; i < HASHSIZE; i++) + SLIST_FOREACH_SAFE(fs, &flowsethash[i], next, fs1) { + SLIST_REMOVE(&flowsethash[i], fs, dn_flow_set, next); + purge_flow_set(fs, 1); + } + for (i = 0; i < HASHSIZE; i++) + SLIST_FOREACH_SAFE(pipe, &pipehash[i], next, pipe1) { + SLIST_REMOVE(&pipehash[i], pipe, dn_pipe, next); + purge_pipe(pipe); + FREE(pipe, M_DUMMYNET); + } lck_mtx_unlock(dn_mutex); } @@ -1508,6 +1874,7 @@ dn_rule_delete(void *r) struct dn_flow_set *fs ; struct dn_pkt_tag *pkt ; struct mbuf *m ; + int i; lck_mtx_lock(dn_mutex); @@ -1516,18 +1883,21 @@ dn_rule_delete(void *r) * the flow set, otherwise scan pipes. Should do either, but doing * both does not harm. */ - for ( fs = all_flow_sets ; fs ; fs = fs->next ) - dn_rule_delete_fs(fs, r); - for ( p = all_pipes ; p ; p = p->next ) { - fs = &(p->fs) ; - dn_rule_delete_fs(fs, r); - for (m = p->head ; m ; m = m->m_nextpkt ) { - pkt = dn_tag_get(m) ; - if (pkt->rule == r) - pkt->rule = ip_fw_default_rule ; + for (i = 0; i < HASHSIZE; i++) + SLIST_FOREACH(fs, &flowsethash[i], next) + dn_rule_delete_fs(fs, r); + + for (i = 0; i < HASHSIZE; i++) + SLIST_FOREACH(p, &pipehash[i], next) { + fs = &(p->fs); + dn_rule_delete_fs(fs, r); + for (m = p->head ; m ; m = m->m_nextpkt ) { + pkt = dn_tag_get(m); + if (pkt->rule == r) + pkt->rule = ip_fw_default_rule; + } } - } - lck_mtx_unlock(dn_mutex); + lck_mtx_unlock(dn_mutex); } /* @@ -1654,13 +2024,13 @@ config_pipe(struct dn_pipe *p) if (p->pipe_nr != 0 && pfs->fs_nr != 0) return EINVAL ; if (p->pipe_nr != 0) { /* this is a pipe */ - struct dn_pipe *x, *a, *b; + struct dn_pipe *x, *b; lck_mtx_lock(dn_mutex); -/* locate pipe */ - for (a = NULL , b = all_pipes ; b && b->pipe_nr < p->pipe_nr ; - a = b , b = b->next) ; + /* locate pipe */ + b = locate_pipe(p->pipe_nr); + if (b == NULL || b->pipe_nr != p->pipe_nr) { /* new pipe */ x = _MALLOC(sizeof(struct dn_pipe), M_DUMMYNET, M_DONTWAIT | M_ZERO) ; if (x == NULL) { @@ -1673,7 +2043,7 @@ config_pipe(struct dn_pipe *p) /* idle_heap is the only one from which we extract from the middle. */ x->idle_heap.size = x->idle_heap.elements = 0 ; - x->idle_heap.offset=OFFSET_OF(struct dn_flow_queue, heap_pos); + x->idle_heap.offset=offsetof(struct dn_flow_queue, heap_pos); } else { x = b; /* Flush accumulated credit for all queues */ @@ -1697,20 +2067,16 @@ config_pipe(struct dn_pipe *p) FREE(x, M_DUMMYNET); return r ; } - x->next = b ; - if (a == NULL) - all_pipes = x ; - else - a->next = x ; + SLIST_INSERT_HEAD(&pipehash[HASH(x->pipe_nr)], + x, next); } lck_mtx_unlock(dn_mutex); } else { /* config queue */ - struct dn_flow_set *x, *a, *b ; + struct dn_flow_set *x, *b ; lck_mtx_lock(dn_mutex); /* locate flow_set */ - for (a=NULL, b=all_flow_sets ; b && b->fs_nr < pfs->fs_nr ; - a = b , b = b->next) ; + b = locate_flowset(pfs->fs_nr); if (b == NULL || b->fs_nr != pfs->fs_nr) { /* new */ if (pfs->parent_nr == 0) { /* need link to a pipe */ @@ -1747,11 +2113,8 @@ config_pipe(struct dn_pipe *p) FREE(x, M_DUMMYNET); return r ; } - x->next = b; - if (a == NULL) - all_flow_sets = x; - else - a->next = x; + SLIST_INSERT_HEAD(&flowsethash[HASH(x->fs_nr)], + x, next); } lck_mtx_unlock(dn_mutex); } @@ -1805,6 +2168,7 @@ dummynet_drain(void) struct dn_flow_set *fs; struct dn_pipe *p; struct mbuf *m, *mnext; + int i; lck_mtx_assert(dn_mutex, LCK_MTX_ASSERT_OWNED); @@ -1812,11 +2176,13 @@ dummynet_drain(void) heap_free(&wfq_ready_heap); heap_free(&extract_heap); /* remove all references to this pipe from flow_sets */ - for (fs = all_flow_sets; fs; fs= fs->next ) - purge_flow_set(fs, 0); + for (i = 0; i < HASHSIZE; i++) + SLIST_FOREACH(fs, &flowsethash[i], next) + purge_flow_set(fs, 0); - for (p = all_pipes; p; p= p->next ) { - purge_flow_set(&(p->fs), 0); + for (i = 0; i < HASHSIZE; i++) + SLIST_FOREACH(p, &pipehash[i], next) { + purge_flow_set(&(p->fs), 0); mnext = p->head; while ((m = mnext) != NULL) { @@ -1838,35 +2204,35 @@ delete_pipe(struct dn_pipe *p) if (p->pipe_nr != 0 && p->fs.fs_nr != 0) return EINVAL ; if (p->pipe_nr != 0) { /* this is an old-style pipe */ - struct dn_pipe *a, *b; + struct dn_pipe *b; struct dn_flow_set *fs; + int i; lck_mtx_lock(dn_mutex); /* locate pipe */ - for (a = NULL , b = all_pipes ; b && b->pipe_nr < p->pipe_nr ; - a = b , b = b->next) ; - if (b == NULL || (b->pipe_nr != p->pipe_nr) ) { + b = locate_pipe(p->pipe_nr); + if(b == NULL){ lck_mtx_unlock(dn_mutex); return EINVAL ; /* not found */ } - /* unlink from list of pipes */ - if (a == NULL) - all_pipes = b->next ; - else - a->next = b->next ; + /* Unlink from list of pipes. */ + SLIST_REMOVE(&pipehash[HASH(b->pipe_nr)], b, dn_pipe, next); + /* remove references to this pipe from the ip_fw rules. */ flush_pipe_ptrs(&(b->fs)); - /* remove all references to this pipe from flow_sets */ - for (fs = all_flow_sets; fs; fs= fs->next ) - if (fs->pipe == b) { - printf("dummynet: ++ ref to pipe %d from fs %d\n", - p->pipe_nr, fs->fs_nr); - fs->pipe = NULL ; - purge_flow_set(fs, 0); - } + /* Remove all references to this pipe from flow_sets. */ + for (i = 0; i < HASHSIZE; i++) + SLIST_FOREACH(fs, &flowsethash[i], next) + if (fs->pipe == b) { + printf("dummynet: ++ ref to pipe %d from fs %d\n", + p->pipe_nr, fs->fs_nr); + fs->pipe = NULL ; + purge_flow_set(fs, 0); + } fs_remove_from_heap(&ready_heap, &(b->fs)); + purge_pipe(b); /* remove all data associated to this pipe */ /* remove reference to here from extract_heap and wfq_ready_heap */ pipe_remove_from_heap(&extract_heap, b); @@ -1875,24 +2241,22 @@ delete_pipe(struct dn_pipe *p) FREE(b, M_DUMMYNET); } else { /* this is a WF2Q queue (dn_flow_set) */ - struct dn_flow_set *a, *b; + struct dn_flow_set *b; lck_mtx_lock(dn_mutex); /* locate set */ - for (a = NULL, b = all_flow_sets ; b && b->fs_nr < p->fs.fs_nr ; - a = b , b = b->next) ; - if (b == NULL || (b->fs_nr != p->fs.fs_nr) ) { + b = locate_flowset(p->fs.fs_nr); + if (b == NULL) { lck_mtx_unlock(dn_mutex); return EINVAL ; /* not found */ } - if (a == NULL) - all_flow_sets = b->next ; - else - a->next = b->next ; /* remove references to this flow_set from the ip_fw rules. */ flush_pipe_ptrs(b); + /* Unlink from list of flowsets. */ + SLIST_REMOVE( &flowsethash[HASH(b->fs_nr)], b, dn_flow_set, next); + if (b->pipe != NULL) { /* Update total weight on parent pipe and cleanup parent heaps */ b->pipe->sum -= b->weight * b->backlogged ; @@ -1911,64 +2275,112 @@ delete_pipe(struct dn_pipe *p) /* * helper function used to copy data from kernel in DUMMYNET_GET */ -static char * -dn_copy_set(struct dn_flow_set *set, char *bp) +static +char* dn_copy_set_32(struct dn_flow_set *set, char *bp) { int i, copied = 0 ; - struct dn_flow_queue *q, *qp = (struct dn_flow_queue *)bp; - + struct dn_flow_queue *q; + struct dn_flow_queue_32 *qp = (struct dn_flow_queue_32 *)bp; + lck_mtx_assert(dn_mutex, LCK_MTX_ASSERT_OWNED); + + for (i = 0 ; i <= set->rq_size ; i++) + for (q = set->rq[i] ; q ; q = q->next, qp++ ) { + if (q->hash_slot != i) + printf("dummynet: ++ at %d: wrong slot (have %d, " + "should be %d)\n", copied, q->hash_slot, i); + if (q->fs != set) + printf("dummynet: ++ at %d: wrong fs ptr (have %p, should be %p)\n", + i, q->fs, set); + copied++ ; + cp_queue_to_32_user( q, qp ); + /* cleanup pointers */ + qp->next = (user32_addr_t)0 ; + qp->head = qp->tail = (user32_addr_t)0 ; + qp->fs = (user32_addr_t)0 ; + } + if (copied != set->rq_elements) + printf("dummynet: ++ wrong count, have %d should be %d\n", + copied, set->rq_elements); + return (char *)qp ; +} +static +char* dn_copy_set_64(struct dn_flow_set *set, char *bp) +{ + int i, copied = 0 ; + struct dn_flow_queue *q; + struct dn_flow_queue_64 *qp = (struct dn_flow_queue_64 *)bp; + + lck_mtx_assert(dn_mutex, LCK_MTX_ASSERT_OWNED); + for (i = 0 ; i <= set->rq_size ; i++) - for (q = set->rq[i] ; q ; q = q->next, qp++ ) { - if (q->hash_slot != i) - printf("dummynet: ++ at %d: wrong slot (have %d, " - "should be %d)\n", copied, q->hash_slot, i); - if (q->fs != set) - printf("dummynet: ++ at %d: wrong fs ptr (have %p, should be %p)\n", - i, q->fs, set); - copied++ ; - bcopy(q, qp, sizeof(*q)); - /* cleanup pointers */ - qp->next = NULL ; - qp->head = qp->tail = NULL ; - qp->fs = NULL ; - } + for (q = set->rq[i] ; q ; q = q->next, qp++ ) { + if (q->hash_slot != i) + printf("dummynet: ++ at %d: wrong slot (have %d, " + "should be %d)\n", copied, q->hash_slot, i); + if (q->fs != set) + printf("dummynet: ++ at %d: wrong fs ptr (have %p, should be %p)\n", + i, q->fs, set); + copied++ ; + //bcopy(q, qp, sizeof(*q)); + cp_queue_to_64_user( q, qp ); + /* cleanup pointers */ + qp->next = USER_ADDR_NULL ; + qp->head = qp->tail = USER_ADDR_NULL ; + qp->fs = USER_ADDR_NULL ; + } if (copied != set->rq_elements) - printf("dummynet: ++ wrong count, have %d should be %d\n", - copied, set->rq_elements); + printf("dummynet: ++ wrong count, have %d should be %d\n", + copied, set->rq_elements); return (char *)qp ; } static size_t -dn_calc_size(void) +dn_calc_size(int is64user) { struct dn_flow_set *set ; struct dn_pipe *p ; - size_t size ; + size_t size = 0 ; + size_t pipesize; + size_t queuesize; + size_t setsize; + int i; lck_mtx_assert(dn_mutex, LCK_MTX_ASSERT_OWNED); - + if ( is64user ){ + pipesize = sizeof(struct dn_pipe_64); + queuesize = sizeof(struct dn_flow_queue_64); + setsize = sizeof(struct dn_flow_set_64); + } + else { + pipesize = sizeof(struct dn_pipe_32); + queuesize = sizeof( struct dn_flow_queue_32 ); + setsize = sizeof(struct dn_flow_set_32); + } /* * compute size of data structures: list of pipes and flow_sets. */ - for (p = all_pipes, size = 0 ; p ; p = p->next ) - size += sizeof(*p) + - p->fs.rq_elements * sizeof(struct dn_flow_queue); - for (set = all_flow_sets ; set ; set = set->next ) - size += sizeof(*set) + - set->rq_elements * sizeof(struct dn_flow_queue); - return size ; + for (i = 0; i < HASHSIZE; i++) { + SLIST_FOREACH(p, &pipehash[i], next) + size += sizeof(*p) + + p->fs.rq_elements * sizeof(struct dn_flow_queue); + SLIST_FOREACH(set, &flowsethash[i], next) + size += sizeof (*set) + + set->rq_elements * sizeof(struct dn_flow_queue); + } + return size; } static int dummynet_get(struct sockopt *sopt) { - char *buf, *bp ; /* bp is the "copy-pointer" */ + char *buf, *bp=NULL; /* bp is the "copy-pointer" */ size_t size ; struct dn_flow_set *set ; struct dn_pipe *p ; int error=0, i ; + int is64user = 0; /* XXX lock held too long */ lck_mtx_lock(dn_mutex); @@ -1976,12 +2388,16 @@ dummynet_get(struct sockopt *sopt) * XXX: Ugly, but we need to allocate memory with M_WAITOK flag and we * cannot use this flag while holding a mutex. */ + if (proc_is64bit(sopt->sopt_p)) + is64user = 1; for (i = 0; i < 10; i++) { - size = dn_calc_size(); + size = dn_calc_size(is64user); lck_mtx_unlock(dn_mutex); buf = _MALLOC(size, M_TEMP, M_WAITOK); + if (buf == NULL) + return ENOBUFS; lck_mtx_lock(dn_mutex); - if (size == dn_calc_size()) + if (size == dn_calc_size(is64user)) break; FREE(buf, M_TEMP); buf = NULL; @@ -1990,41 +2406,33 @@ dummynet_get(struct sockopt *sopt) lck_mtx_unlock(dn_mutex); return ENOBUFS ; } - for (p = all_pipes, bp = buf ; p ; p = p->next ) { - struct dn_pipe *pipe_bp = (struct dn_pipe *)bp ; - /* - * copy pipe descriptor into *bp, convert delay back to ms, - * then copy the flow_set descriptor(s) one at a time. - * After each flow_set, copy the queue descriptor it owns. - */ - bcopy(p, bp, sizeof(*p)); - pipe_bp->delay = (pipe_bp->delay * 1000) / (hz*10) ; - /* - * XXX the following is a hack based on ->next being the - * first field in dn_pipe and dn_flow_set. The correct - * solution would be to move the dn_flow_set to the beginning - * of struct dn_pipe. - */ - pipe_bp->next = (struct dn_pipe *)DN_IS_PIPE ; - /* clean pointers */ - pipe_bp->head = pipe_bp->tail = NULL ; - pipe_bp->fs.next = NULL ; - pipe_bp->fs.pipe = NULL ; - pipe_bp->fs.rq = NULL ; - bp += sizeof(*p); - bp = dn_copy_set( &(p->fs), bp ); + bp = buf; + for (i = 0; i < HASHSIZE; i++) + SLIST_FOREACH(p, &pipehash[i], next) { + /* + * copy pipe descriptor into *bp, convert delay back to ms, + * then copy the flow_set descriptor(s) one at a time. + * After each flow_set, copy the queue descriptor it owns. + */ + if ( is64user ){ + bp = cp_pipe_to_64_user(p, (struct dn_pipe_64 *)bp); + } + else{ + bp = cp_pipe_to_32_user(p, (struct dn_pipe_32 *)bp); + } } - for (set = all_flow_sets ; set ; set = set->next ) { - struct dn_flow_set *fs_bp = (struct dn_flow_set *)bp ; - bcopy(set, bp, sizeof(*set)); - /* XXX same hack as above */ - fs_bp->next = (struct dn_flow_set *)DN_IS_QUEUE ; - fs_bp->pipe = NULL ; - fs_bp->rq = NULL ; - bp += sizeof(*set); - bp = dn_copy_set( set, bp ); + for (i = 0; i < HASHSIZE; i++) + SLIST_FOREACH(set, &flowsethash[i], next) { + struct dn_flow_set_64 *fs_bp = (struct dn_flow_set_64 *)bp ; + cp_flow_set_to_64_user(set, fs_bp); + /* XXX same hack as above */ + fs_bp->next = CAST_DOWN(user64_addr_t, DN_IS_QUEUE); + fs_bp->pipe = USER_ADDR_NULL; + fs_bp->rq = USER_ADDR_NULL ; + bp += sizeof(struct dn_flow_set_64); + bp = dn_copy_set_64( set, bp ); } lck_mtx_unlock(dn_mutex); @@ -2061,7 +2469,11 @@ ip_dn_ctl(struct sockopt *sopt) case IP_DUMMYNET_CONFIGURE : p = &tmp_pipe ; - error = sooptcopyin(sopt, p, sizeof(*p), sizeof(*p)); + if (proc_is64bit(sopt->sopt_p)) + error = cp_pipe_from_user_64( sopt, p ); + else + error = cp_pipe_from_user_32( sopt, p ); + if (error) break ; error = config_pipe(p); @@ -2069,7 +2481,10 @@ ip_dn_ctl(struct sockopt *sopt) case IP_DUMMYNET_DEL : /* remove a pipe or queue */ p = &tmp_pipe ; - error = sooptcopyin(sopt, p, sizeof(*p), sizeof(*p)); + if (proc_is64bit(sopt->sopt_p)) + error = cp_pipe_from_user_64( sopt, p ); + else + error = cp_pipe_from_user_32( sopt, p ); if (error) break ; @@ -2092,9 +2507,7 @@ ip_dn_init(void) return; } - all_pipes = NULL ; - all_flow_sets = NULL ; - ready_heap.size = ready_heap.elements = 0 ; + ready_heap.size = ready_heap.elements = 0 ; ready_heap.offset = 0 ; wfq_ready_heap.size = wfq_ready_heap.elements = 0 ; diff --git a/bsd/netinet/ip_dummynet.h b/bsd/netinet/ip_dummynet.h index 1994be1ba..83f38d24e 100644 --- a/bsd/netinet/ip_dummynet.h +++ b/bsd/netinet/ip_dummynet.h @@ -57,8 +57,6 @@ #ifndef _IP_DUMMYNET_H #define _IP_DUMMYNET_H -#if !__LP64__ - #include #ifdef PRIVATE @@ -243,7 +241,7 @@ struct dn_flow_queue { struct mbuf *head, *tail ; /* queue of packets */ u_int len ; u_int len_bytes ; - u_long numbytes ; /* credit for transmission (dynamic queues) */ + u_int32_t numbytes ; /* credit for transmission (dynamic queues) */ u_int64_t tot_pkts ; /* statistics counters */ u_int64_t tot_bytes ; @@ -282,7 +280,7 @@ struct dn_flow_queue { * latter case, the structure is located inside the struct dn_pipe). */ struct dn_flow_set { - struct dn_flow_set *next; /* next flow set in all_flow_sets list */ + SLIST_ENTRY(dn_flow_set) next; /* linked list in a hash slot */ u_short fs_nr ; /* flow_set number */ u_short flags_fs; @@ -332,6 +330,8 @@ struct dn_flow_set { int max_pkt_size ; /* max packet size */ } ; +SLIST_HEAD(dn_flow_set_head, dn_flow_set); + /* * Pipe descriptor. Contains global parameters, delay-line queue, * and the flow_set used for fixed-rate queues. @@ -347,7 +347,7 @@ struct dn_flow_set { * */ struct dn_pipe { /* a pipe */ - struct dn_pipe *next ; + SLIST_ENTRY(dn_pipe) next; /* linked list in a hash slot */ int pipe_nr ; /* number */ int bandwidth; /* really, bytes/tick. */ @@ -377,6 +377,8 @@ struct dn_pipe { /* a pipe */ struct dn_flow_set fs ; /* used with fixed-rate flows */ }; +SLIST_HEAD(dn_pipe_head, dn_pipe); + #ifdef KERNEL void ip_dn_init(void); /* called from raw_ip.c:load_ipfw() */ @@ -390,6 +392,257 @@ extern ip_dn_ruledel_t *ip_dn_ruledel_ptr; extern ip_dn_io_t *ip_dn_io_ptr; #define DUMMYNET_LOADED (ip_dn_io_ptr != NULL) +#pragma pack(4) + +struct dn_heap_32 { + int size ; + int elements ; + int offset ; /* XXX if > 0 this is the offset of direct ptr to obj */ + user32_addr_t p ; /* really an array of "size" entries */ +} ; + +struct dn_flow_queue_32 { + user32_addr_t next ; + struct ipfw_flow_id id ; + + user32_addr_t head, tail ; /* queue of packets */ + u_int len ; + u_int len_bytes ; + u_int32_t numbytes ; /* credit for transmission (dynamic queues) */ + + u_int64_t tot_pkts ; /* statistics counters */ + u_int64_t tot_bytes ; + u_int32_t drops ; + + int hash_slot ; /* debugging/diagnostic */ + + /* RED parameters */ + int avg ; /* average queue length est. (scaled) */ + int count ; /* arrivals since last RED drop */ + int random ; /* random value (scaled) */ + u_int32_t q_time ; /* start of queue idle time */ + + /* WF2Q+ support */ + user32_addr_t fs ; /* parent flow set */ + int heap_pos ; /* position (index) of struct in heap */ + dn_key sched_time ; /* current time when queue enters ready_heap */ + + dn_key S,F ; /* start time, finish time */ + /* + * Setting F < S means the timestamp is invalid. We only need + * to test this when the queue is empty. + */ +} ; + +struct dn_flow_set_32 { + user32_addr_t next; /* next flow set in all_flow_sets list */ + + u_short fs_nr ; /* flow_set number */ + u_short flags_fs; +#define DN_HAVE_FLOW_MASK 0x0001 +#define DN_IS_RED 0x0002 +#define DN_IS_GENTLE_RED 0x0004 +#define DN_QSIZE_IS_BYTES 0x0008 /* queue size is measured in bytes */ +#define DN_NOERROR 0x0010 /* do not report ENOBUFS on drops */ +#define DN_IS_PIPE 0x4000 +#define DN_IS_QUEUE 0x8000 + + user32_addr_t pipe ; /* pointer to parent pipe */ + u_short parent_nr ; /* parent pipe#, 0 if local to a pipe */ + + int weight ; /* WFQ queue weight */ + int qsize ; /* queue size in slots or bytes */ + int plr ; /* pkt loss rate (2^31-1 means 100%) */ + + struct ipfw_flow_id flow_mask ; + + /* hash table of queues onto this flow_set */ + int rq_size ; /* number of slots */ + int rq_elements ; /* active elements */ + user32_addr_t rq; /* array of rq_size entries */ + + u_int32_t last_expired ; /* do not expire too frequently */ + int backlogged ; /* #active queues for this flowset */ + + /* RED parameters */ +#define SCALE_RED 16 +#define SCALE(x) ( (x) << SCALE_RED ) +#define SCALE_VAL(x) ( (x) >> SCALE_RED ) +#define SCALE_MUL(x,y) ( ( (x) * (y) ) >> SCALE_RED ) + int w_q ; /* queue weight (scaled) */ + int max_th ; /* maximum threshold for queue (scaled) */ + int min_th ; /* minimum threshold for queue (scaled) */ + int max_p ; /* maximum value for p_b (scaled) */ + u_int c_1 ; /* max_p/(max_th-min_th) (scaled) */ + u_int c_2 ; /* max_p*min_th/(max_th-min_th) (scaled) */ + u_int c_3 ; /* for GRED, (1-max_p)/max_th (scaled) */ + u_int c_4 ; /* for GRED, 1 - 2*max_p (scaled) */ + user32_addr_t w_q_lookup ; /* lookup table for computing (1-w_q)^t */ + u_int lookup_depth ; /* depth of lookup table */ + int lookup_step ; /* granularity inside the lookup table */ + int lookup_weight ; /* equal to (1-w_q)^t / (1-w_q)^(t+1) */ + int avg_pkt_size ; /* medium packet size */ + int max_pkt_size ; /* max packet size */ +} ; + +struct dn_pipe_32 { /* a pipe */ + user32_addr_t next ; + + int pipe_nr ; /* number */ + int bandwidth; /* really, bytes/tick. */ + int delay ; /* really, ticks */ + + user32_addr_t head, tail ; /* packets in delay line */ + + /* WF2Q+ */ + struct dn_heap_32 scheduler_heap ; /* top extract - key Finish time*/ + struct dn_heap_32 not_eligible_heap; /* top extract- key Start time */ + struct dn_heap_32 idle_heap ; /* random extract - key Start=Finish time */ + + dn_key V ; /* virtual time */ + int sum; /* sum of weights of all active sessions */ + int numbytes; /* bits I can transmit (more or less). */ + + dn_key sched_time ; /* time pipe was scheduled in ready_heap */ + + /* + * When the tx clock come from an interface (if_name[0] != '\0'), its name + * is stored below, whereas the ifp is filled when the rule is configured. + */ + char if_name[IFNAMSIZ]; + user32_addr_t ifp ; + int ready ; /* set if ifp != NULL and we got a signal from it */ + + struct dn_flow_set_32 fs ; /* used with fixed-rate flows */ +}; +#pragma pack() + + +struct dn_heap_64 { + int size ; + int elements ; + int offset ; /* XXX if > 0 this is the offset of direct ptr to obj */ + user64_addr_t p ; /* really an array of "size" entries */ +} ; + + +struct dn_flow_queue_64 { + user64_addr_t next ; + struct ipfw_flow_id id ; + + user64_addr_t head, tail ; /* queue of packets */ + u_int len ; + u_int len_bytes ; + u_int32_t numbytes ; /* credit for transmission (dynamic queues) */ + + u_int64_t tot_pkts ; /* statistics counters */ + u_int64_t tot_bytes ; + u_int32_t drops ; + + int hash_slot ; /* debugging/diagnostic */ + + /* RED parameters */ + int avg ; /* average queue length est. (scaled) */ + int count ; /* arrivals since last RED drop */ + int random ; /* random value (scaled) */ + u_int32_t q_time ; /* start of queue idle time */ + + /* WF2Q+ support */ + user64_addr_t fs ; /* parent flow set */ + int heap_pos ; /* position (index) of struct in heap */ + dn_key sched_time ; /* current time when queue enters ready_heap */ + + dn_key S,F ; /* start time, finish time */ + /* + * Setting F < S means the timestamp is invalid. We only need + * to test this when the queue is empty. + */ +} ; + +struct dn_flow_set_64 { + user64_addr_t next; /* next flow set in all_flow_sets list */ + + u_short fs_nr ; /* flow_set number */ + u_short flags_fs; +#define DN_HAVE_FLOW_MASK 0x0001 +#define DN_IS_RED 0x0002 +#define DN_IS_GENTLE_RED 0x0004 +#define DN_QSIZE_IS_BYTES 0x0008 /* queue size is measured in bytes */ +#define DN_NOERROR 0x0010 /* do not report ENOBUFS on drops */ +#define DN_IS_PIPE 0x4000 +#define DN_IS_QUEUE 0x8000 + + user64_addr_t pipe ; /* pointer to parent pipe */ + u_short parent_nr ; /* parent pipe#, 0 if local to a pipe */ + + int weight ; /* WFQ queue weight */ + int qsize ; /* queue size in slots or bytes */ + int plr ; /* pkt loss rate (2^31-1 means 100%) */ + + struct ipfw_flow_id flow_mask ; + + /* hash table of queues onto this flow_set */ + int rq_size ; /* number of slots */ + int rq_elements ; /* active elements */ + user64_addr_t rq; /* array of rq_size entries */ + + u_int32_t last_expired ; /* do not expire too frequently */ + int backlogged ; /* #active queues for this flowset */ + + /* RED parameters */ +#define SCALE_RED 16 +#define SCALE(x) ( (x) << SCALE_RED ) +#define SCALE_VAL(x) ( (x) >> SCALE_RED ) +#define SCALE_MUL(x,y) ( ( (x) * (y) ) >> SCALE_RED ) + int w_q ; /* queue weight (scaled) */ + int max_th ; /* maximum threshold for queue (scaled) */ + int min_th ; /* minimum threshold for queue (scaled) */ + int max_p ; /* maximum value for p_b (scaled) */ + u_int c_1 ; /* max_p/(max_th-min_th) (scaled) */ + u_int c_2 ; /* max_p*min_th/(max_th-min_th) (scaled) */ + u_int c_3 ; /* for GRED, (1-max_p)/max_th (scaled) */ + u_int c_4 ; /* for GRED, 1 - 2*max_p (scaled) */ + user64_addr_t w_q_lookup ; /* lookup table for computing (1-w_q)^t */ + u_int lookup_depth ; /* depth of lookup table */ + int lookup_step ; /* granularity inside the lookup table */ + int lookup_weight ; /* equal to (1-w_q)^t / (1-w_q)^(t+1) */ + int avg_pkt_size ; /* medium packet size */ + int max_pkt_size ; /* max packet size */ +} ; + +struct dn_pipe_64 { /* a pipe */ + user64_addr_t next ; + + int pipe_nr ; /* number */ + int bandwidth; /* really, bytes/tick. */ + int delay ; /* really, ticks */ + + user64_addr_t head, tail ; /* packets in delay line */ + + /* WF2Q+ */ + struct dn_heap_64 scheduler_heap ; /* top extract - key Finish time*/ + struct dn_heap_64 not_eligible_heap; /* top extract- key Start time */ + struct dn_heap_64 idle_heap ; /* random extract - key Start=Finish time */ + + dn_key V ; /* virtual time */ + int sum; /* sum of weights of all active sessions */ + int numbytes; /* bits I can transmit (more or less). */ + + dn_key sched_time ; /* time pipe was scheduled in ready_heap */ + + /* + * When the tx clock come from an interface (if_name[0] != '\0'), its name + * is stored below, whereas the ifp is filled when the rule is configured. + */ + char if_name[IFNAMSIZ]; + user64_addr_t ifp ; + int ready ; /* set if ifp != NULL and we got a signal from it */ + + struct dn_flow_set_64 fs ; /* used with fixed-rate flows */ +}; + + + /* * Return the IPFW rule associated with the dummynet tag; if any. * Make sure that the dummynet tag is not reused by lower layers. @@ -408,5 +661,4 @@ ip_dn_claim_rule(struct mbuf *m) #endif /* KERNEL */ #endif /* PRIVATE */ -#endif /* !__LP64__ */ #endif /* _IP_DUMMYNET_H */ diff --git a/bsd/netinet/ip_ecn.h b/bsd/netinet/ip_ecn.h index 9b4277ca0..d2245197e 100644 --- a/bsd/netinet/ip_ecn.h +++ b/bsd/netinet/ip_ecn.h @@ -67,4 +67,4 @@ extern void ip_ecn_ingress(int, u_int8_t *, const u_int8_t *); extern void ip_ecn_egress(int, const u_int8_t *, u_int8_t *); -#endif KERNEL_PRIVATE +#endif /* KERNEL_PRIVATE */ diff --git a/bsd/netinet/ip_edgehole.c b/bsd/netinet/ip_edgehole.c deleted file mode 100644 index aa56449ea..000000000 --- a/bsd/netinet/ip_edgehole.c +++ /dev/null @@ -1,333 +0,0 @@ -#include -#include -#include -#include -#include -#include // For bzero -#include // for printf -#include // For panic -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include "ip_edgehole.h" - -enum -{ - kEdgeHoleFlag_BlockInternet = 0x00000001, - kEdgeHoleFlag_BlockVV = 0x00000002 -}; - -struct edgehole_tag -{ - // flags tells us whether or not we should block traffic - u_int32_t eh_flags; - - // These fields are used to help us find the PCB after we block traffic for TCP - struct inpcbinfo *eh_inpinfo; - struct inpcb *eh_inp; -}; - -struct edgehole_delayed_notify -{ - // flags tells us whether or not we should block traffic - struct edgehole_delayed_notify *next; - - // These fields are used to help us find the PCB after we block traffic for TCP - struct inpcbinfo *inpinfo; - struct inpcb *inp; -}; - -static mbuf_tag_id_t edgehole_tag = 0; -static thread_call_t edgehole_callout = NULL; -static OSMallocTag edgehole_mtag = 0; -static struct edgehole_delayed_notify *edgehole_delay_list = NULL; - -#ifndef HAS_COMPARE_AND_SWAP_PTR -// 64bit kernels have an OSCompareAndSwapPtr that does the right thing -static Boolean -OSCompareAndSwapPtr( - void *oldValue, - void *newValue, - volatile void *address) -{ - return OSCompareAndSwap((UInt32)oldValue, (UInt32)newValue, (volatile UInt32*)address); -} -#endif - -static void -ip_edgehole_notify_delayed( - struct inpcb *inp, - struct inpcbinfo *inpinfo) -{ - if (in_pcb_checkstate(inp, WNT_ACQUIRE, 0) != WNT_STOPUSING) - { - // We've found an inpcb for the packet we're dropping. - struct socket *so = inp->inp_socket; - if (so && so != &inpinfo->nat_dummy_socket) - { - socket_lock(so, 1); - if (in_pcb_checkstate(inp, WNT_RELEASE,1) != WNT_STOPUSING) - { - if (inp->inp_ip_p == IPPROTO_TCP) - { - // Why do we still have caddr_t? Come on! Casting from - // caddr_t to something else causes "cast increases required alignment" - // warnings. warnings are treated as failures. This union does the - // exact same thing without the warning. - union - { - caddr_t caddrt_sucks; - void *void_ptr; - } bite_me; - - bite_me.caddrt_sucks = inp->inp_ppcb; - tcp_drop((struct tcpcb*)bite_me.void_ptr, EPERM); - } - else - { - // Is this enough? - socantsendmore(so); - } - } - socket_unlock(so, 1); - } - } -} - -// Some shortcomings of this strategy: -// 1) an inpcb could be reused for a new socket before we get a chance to notify - -static void -ip_edgehole_process_delayed( - __unused void *unused1, - __unused void *unused2) -{ - struct edgehole_delayed_notify *head; - - while (edgehole_delay_list) - { - // Atomically grab the list - do - { - head = edgehole_delay_list; - } - while (!OSCompareAndSwapPtr(head, NULL, &edgehole_delay_list)); - - if (head == NULL) - { - break; - } - - // Prune duplicates from the list - struct edgehole_delayed_notify *current; - struct edgehole_delayed_notify **current_p; - struct edgehole_delayed_notify *ye_dead; - for (current = head; current && current->next; current = current->next) - { - current_p = &head; - while (*current_p) - { - if ((*current_p)->inp == current->inp) - { - ye_dead = *current_p; - *current_p = ye_dead->next; - OSFree(ye_dead, sizeof(*ye_dead), edgehole_mtag); - } - else - { - current_p = &(*current_p)->next; - } - } - } - - while (head) - { - struct inpcbinfo *lockedinfo; - - lockedinfo = head->inpinfo; - - // Lock the list - lck_rw_lock_shared(lockedinfo->mtx); - - struct inpcb *inp; - - // Walk the inp list. - LIST_FOREACH(inp, lockedinfo->listhead, inp_list) - { - // Walk the list of notifications - for (current = head; current != NULL; current = current->next) - { - // Found a match, notify - if (current->inpinfo == lockedinfo && current->inp == inp) - { - ip_edgehole_notify_delayed(inp, lockedinfo); - } - } - } - - lck_rw_done(lockedinfo->mtx); - - // Release all the notifications for this inpcbinfo - current_p = &head; - while (*current_p) - { - // Free any items for this inpcbinfo - if ((*current_p)->inpinfo == lockedinfo) - { - ye_dead = *current_p; - *current_p = ye_dead->next; - OSFree(ye_dead, sizeof(*ye_dead), edgehole_mtag); - } - else - { - current_p = &(*current_p)->next; - } - } - } - } -} - -static void -ip_edgehole_notify( - struct edgehole_tag *tag) -{ - // Since the lock on the socket may be held while a packet is being transmitted, - // we must allocate storage to keep track of this information and schedule a - // thread to handle the work. - - if (tag->eh_inp == NULL || tag->eh_inpinfo == NULL) - return; - - struct edgehole_delayed_notify *delayed = OSMalloc(sizeof(*delayed), edgehole_mtag); - if (delayed) - { - delayed->inp = tag->eh_inp; - delayed->inpinfo = tag->eh_inpinfo; - do - { - delayed->next = edgehole_delay_list; - } - while (!OSCompareAndSwapPtr(delayed->next, delayed, &edgehole_delay_list)); - - thread_call_enter(edgehole_callout); - } -} - -__private_extern__ void -ip_edgehole_attach( - struct inpcb *inp) -{ - inp->inpcb_edgehole_flags = 0; - inp->inpcb_edgehole_mask = 0; - - // TBD: call MAC framework to find out of we are allowed to use EDGE -#ifdef TEST_THE_EVIL_EDGE_HOLE - char pidname[64]; - proc_selfname(pidname, sizeof(pidname)); - pidname[sizeof(pidname) -1] = 0; - if (strcmp(pidname, "MobileSafari") == 0 || - strcmp(pidname, "ping") == 0) - { - inp->inpcb_edgehole_flags = kEdgeHoleFlag_BlockInternet; - inp->inpcb_edgehole_mask = kEdgeHoleFlag_BlockInternet; - } -#endif - - if (inp->inpcb_edgehole_mask != 0) - { - // Allocate a callout - if (edgehole_callout == NULL) - { - thread_call_t tmp_callout = thread_call_allocate(ip_edgehole_process_delayed, NULL); - if (!tmp_callout) panic("ip_edgehole_attach: thread_call_allocate failed"); - if (!OSCompareAndSwapPtr(NULL, tmp_callout, &edgehole_callout)) - thread_call_free(tmp_callout); - } - - // Allocate a malloc tag - if (edgehole_mtag == 0) - { - OSMallocTag mtag = OSMalloc_Tagalloc("com.apple.ip_edgehole", 0); - if (!mtag) panic("ip_edgehole_attach: OSMalloc_Tagalloc failed"); - if (!OSCompareAndSwapPtr(NULL, mtag, &edgehole_mtag)) - OSMalloc_Tagfree(mtag); - } - } -} - -__private_extern__ void -ip_edgehole_mbuf_tag( - struct inpcb *inp, - mbuf_t m) -{ - // Immediately bail if there are no flags on this inpcb - if (inp->inpcb_edgehole_mask == 0) - { - return; - } - - // Allocate a tag_id if we don't have one already - if (edgehole_tag == 0) - mbuf_tag_id_find("com.apple.edgehole", &edgehole_tag); - - struct edgehole_tag *tag; - size_t length; - - // Find an existing tag - if (mbuf_tag_find(m, edgehole_tag, 0, &length, (void**)&tag) == 0) - { - if (length != sizeof(*tag)) - panic("ip_edgehole_mbuf_tag - existing tag is wrong size"); - - // add restrictions - tag->eh_flags = (tag->eh_flags & (~inp->inpcb_edgehole_mask)) | - (inp->inpcb_edgehole_flags & inp->inpcb_edgehole_mask); - } - else if ((inp->inpcb_edgehole_mask & inp->inpcb_edgehole_flags) != 0) - { - // Add the tag - if (mbuf_tag_allocate(m, edgehole_tag, 0, sizeof(*tag), MBUF_WAITOK, (void**)&tag) != 0) - panic("ip_edgehole_mbuf_tag - mbuf_tag_allocate failed"); // ouch - how important is it that we block this stuff? - - tag->eh_flags = (inp->inpcb_edgehole_flags & inp->inpcb_edgehole_mask); - tag->eh_inp = inp; - tag->eh_inpinfo = inp->inp_pcbinfo; - } -} - -int -ip_edgehole_filter( - mbuf_t *m, - __unused int isVV) -{ - struct edgehole_tag *tag; - size_t length; - - if (mbuf_tag_find(*m, edgehole_tag, 0, &length, (void**)&tag) == 0) - { - if (length != sizeof(*tag)) - panic("ip_edgehole_filter - existing tag is wrong size"); - - if ((tag->eh_flags & kEdgeHoleFlag_BlockInternet) != 0) - { - ip_edgehole_notify(tag); - - mbuf_freem(*m); *m = NULL; - return EPERM; - } - } - - return 0; -} diff --git a/bsd/netinet/ip_edgehole.h b/bsd/netinet/ip_edgehole.h deleted file mode 100644 index 5bfe7a05b..000000000 --- a/bsd/netinet/ip_edgehole.h +++ /dev/null @@ -1,17 +0,0 @@ -#include - -struct inpcb; - -// Tag an mbuf on the way out with the edge flags from the inpcb -extern void ip_edgehole_mbuf_tag(struct inpcb *inp, mbuf_t m); - -// Attach the edge flags to the inpcb -extern void ip_edgehole_attach(struct inpcb *inp); - -// Called by the edge interface to determine if the edge interface -// should drop the packet. Will return 0 if the packet should continue -// to be processed or EPERM if ip_edgehole_filter swallowed the packet. -// When ip_edgehole_filter swallows a packet, it frees it and sets your -// pointer to it to NULL. isVV should be set to zero unless the edge -// interface in question is the visual voicemail edge interface. -extern int ip_edgehole_filter(mbuf_t *m, int isVV); diff --git a/bsd/netinet/ip_encap.h b/bsd/netinet/ip_encap.h index 93ba512c2..66dfb2588 100644 --- a/bsd/netinet/ip_encap.h +++ b/bsd/netinet/ip_encap.h @@ -86,6 +86,6 @@ const struct encaptab *encap_attach_func(int, int, const struct protosw *, void *); int encap_detach(const struct encaptab *); void *encap_getarg(struct mbuf *); -#endif KERNEL_PRIVATE +#endif /* KERNEL_PRIVATE */ #endif /*_NETINET_IP_ENCAP_H_*/ diff --git a/bsd/netinet/ip_flow.c b/bsd/netinet/ip_flow.c index 4fb3f8596..be5aa9495 100644 --- a/bsd/netinet/ip_flow.c +++ b/bsd/netinet/ip_flow.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000,2007 Apple Inc. All rights reserved. + * Copyright (c) 2000-2008 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -85,6 +85,8 @@ #include #include +#if IPFLOW + #define IPFLOW_TIMER (5 * PR_SLOWHZ) #define IPFLOW_HASHBITS 6 /* should not be a multiple of 8 */ #define IPFLOW_HASHSIZE (1 << IPFLOW_HASHBITS) @@ -219,9 +221,9 @@ ipflow_addstats( struct ipflow *ipf) { ipf->ipf_ro.ro_rt->rt_use += ipf->ipf_uses; - OSAddAtomic(ipf->ipf_errors + ipf->ipf_dropped, (SInt32*)&ipstat.ips_cantforward); - OSAddAtomic(ipf->ipf_uses, (SInt32*)&ipstat.ips_forward); - OSAddAtomic(ipf->ipf_uses, (SInt32*)&ipstat.ips_fastforward); + OSAddAtomic(ipf->ipf_errors + ipf->ipf_dropped, &ipstat.ips_cantforward); + OSAddAtomic(ipf->ipf_uses, &ipstat.ips_forward); + OSAddAtomic(ipf->ipf_uses, &ipstat.ips_fastforward); } static void @@ -279,7 +281,6 @@ ipflow_reap( LIST_REMOVE(ipf, ipf_next); ipflow_addstats(ipf); rtfree(ipf->ipf_ro.ro_rt); - ipf->ipf_ro.ro_rt = NULL; return ipf; } /* note: called under the ip_mutex lock */ @@ -299,8 +300,8 @@ ipflow_slowtimo( } else { ipf->ipf_last_uses = ipf->ipf_uses; ipf->ipf_ro.ro_rt->rt_use += ipf->ipf_uses; - OSAddAtomic(ipf->ipf_uses, (SInt32*)&ipstat.ips_forward); - OSAddAtomic(ipf->ipf_uses, (SInt32*)&ipstat.ips_fastforward); + OSAddAtomic(ipf->ipf_uses, &ipstat.ips_forward); + OSAddAtomic(ipf->ipf_uses, &ipstat.ips_fastforward); ipstat.ips_forward += ipf->ipf_uses; ipstat.ips_fastforward += ipf->ipf_uses; ipf->ipf_uses = 0; @@ -345,7 +346,6 @@ ipflow_create( LIST_REMOVE(ipf, ipf_next); ipflow_addstats(ipf); rtfree(ipf->ipf_ro.ro_rt); - ipf->ipf_ro.ro_rt = NULL; ipf->ipf_uses = ipf->ipf_last_uses = 0; ipf->ipf_errors = ipf->ipf_dropped = 0; } @@ -353,10 +353,8 @@ ipflow_create( /* * Fill in the updated information. */ - lck_mtx_lock(rt_mtx); ipf->ipf_ro = *ro; - rtref(ro->ro_rt); - lck_mtx_unlock(rt_mtx); + RT_ADDREF(ro->ro_rt); ipf->ipf_dst = ip->ip_dst; ipf->ipf_src = ip->ip_src; ipf->ipf_tos = ip->ip_tos; @@ -367,3 +365,16 @@ ipflow_create( hash = ipflow_hash(ip->ip_dst, ip->ip_src, ip->ip_tos); LIST_INSERT_HEAD(&ipflows[hash], ipf, ipf_next); } +#else /* !IPFLOW */ +int +ipflow_fastforward(struct mbuf *m) +{ +#pragma unused(m) + /* + * Since this symbol is exported (albeit unsupported), just return + * false to keep things (e.g. PPP) happy, in case ipflow is not + * compiled in. + */ + return (0); +} +#endif /* !IPFLOW */ diff --git a/bsd/netinet/ip_flow.h b/bsd/netinet/ip_flow.h index 37dcbddee..972d96351 100644 --- a/bsd/netinet/ip_flow.h +++ b/bsd/netinet/ip_flow.h @@ -75,12 +75,12 @@ struct ipflow { u_int8_t ipf_tos; /* type-of-service */ struct route ipf_ro; /* associated route entry */ - u_long ipf_uses; /* number of uses in this period */ + u_int32_t ipf_uses; /* number of uses in this period */ int ipf_timer; /* remaining lifetime of this entry */ - u_long ipf_dropped; /* ENOBUFS returned by if_output */ - u_long ipf_errors; /* other errors returned by if_output */ - u_long ipf_last_uses; /* number of uses in last period */ + u_int32_t ipf_dropped; /* ENOBUFS returned by if_output */ + u_int32_t ipf_errors; /* other errors returned by if_output */ + u_int32_t ipf_last_uses; /* number of uses in last period */ }; #endif /* KERNEL_PRIVATE */ diff --git a/bsd/netinet/ip_fw.h b/bsd/netinet/ip_fw.h index 174bf8ff1..6755fab56 100644 --- a/bsd/netinet/ip_fw.h +++ b/bsd/netinet/ip_fw.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2002 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2008 Apple Computer, Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -43,8 +43,6 @@ #ifndef _IP_FW_H #define _IP_FW_H -#if !__LP64__ - #include #ifdef IPFW2 @@ -323,8 +321,7 @@ extern int fw_one_pass; extern int fw_enable; #define IPFW_LOADED (ip_fw_chk_ptr != NULL) extern struct ipfw_flow_id last_pkt ; -#endif KERNEL_PRIVATE +#endif /* KERNEL_PRIVATE */ -#endif /* !__LP64__ */ -#endif !IPFW2 +#endif /* !IPFW2 */ #endif /* _IP_FW_H */ diff --git a/bsd/netinet/ip_fw2.c b/bsd/netinet/ip_fw2.c index 400e032b5..9be482912 100644 --- a/bsd/netinet/ip_fw2.c +++ b/bsd/netinet/ip_fw2.c @@ -1,3 +1,31 @@ +/* + * Copyright (c) 2008 Apple Inc. All rights reserved. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. The rights granted to you under the License + * may not be used to create, or enable the creation or redistribution of, + * unlawful or unlicensed copies of an Apple operating system, or to + * circumvent, violate, or enable the circumvention or violation of, any + * terms of an Apple operating system software license agreement. + * + * Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ + */ + /* * Copyright (c) 2002 Luigi Rizzo, Universita` di Pisa * @@ -128,6 +156,9 @@ static int autoinc_step = 100; /* bounded to 1..1000 in add_rule() */ static void ipfw_kev_post_msg(u_int32_t ); +static int Get32static_len(void); +static int Get64static_len(void); + #ifdef SYSCTL_NODE static int ipfw_sysctl SYSCTL_HANDLER_ARGS; @@ -215,6 +246,8 @@ static u_int32_t dyn_keepalive = 1; /* do send keepalives */ static u_int32_t static_count; /* # of static rules */ static u_int32_t static_len; /* size in bytes of static rules */ +static u_int32_t static_len_32; /* size in bytes of static rules for 32 bit client */ +static u_int32_t static_len_64; /* size in bytes of static rules for 64 bit client */ static u_int32_t dyn_count; /* # of dynamic rules */ static u_int32_t dyn_max = 4096; /* max # of dynamic rules */ @@ -243,6 +276,7 @@ SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, dyn_short_lifetime, CTLFLAG_RW, SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, dyn_keepalive, CTLFLAG_RW, &dyn_keepalive, 0, "Enable keepalives for dyn. rules"); + static int ipfw_sysctl SYSCTL_HANDLER_ARGS { @@ -287,6 +321,12 @@ static size_t ipfwstringlen; else log a ; \ } +#define RULESIZE64(rule) (sizeof(struct ip_fw_64) + \ + ((struct ip_fw *)(rule))->cmd_len * 4 - 4) + +#define RULESIZE32(rule) (sizeof(struct ip_fw_32) + \ + ((struct ip_fw *)(rule))->cmd_len * 4 - 4) + void ipfwsyslog( int level, const char *format,...) { #define msgsize 100 @@ -350,6 +390,441 @@ is_icmp_query(struct ip *ip) } #undef TT +static int +Get32static_len() +{ + int diff; + int len = static_len_32; + struct ip_fw *rule; + char *useraction; + + for (rule = layer3_chain; rule ; rule = rule->next) { + if (rule->reserved_1 == IPFW_RULE_INACTIVE) { + continue; + } + if ( rule->act_ofs ){ + useraction = (char*)ACTION_PTR( rule ); + if ( ((ipfw_insn*)useraction)->opcode == O_QUEUE || ((ipfw_insn*)useraction)->opcode == O_PIPE){ + diff = sizeof(ipfw_insn_pipe) - sizeof(ipfw_insn_pipe_32); + if (diff) + len -= diff; + } + } + } + return len; +} + +static int +Get64static_len() +{ + int diff; + int len = static_len_64; + struct ip_fw *rule; + char *useraction; + + for (rule = layer3_chain; rule ; rule = rule->next) { + if (rule->reserved_1 == IPFW_RULE_INACTIVE) { + continue; + } + if ( rule->act_ofs ){ + useraction = (char *)ACTION_PTR( rule ); + if ( ((ipfw_insn*)useraction)->opcode == O_QUEUE || ((ipfw_insn*)useraction)->opcode == O_PIPE){ + diff = sizeof(ipfw_insn_pipe_64) - sizeof(ipfw_insn_pipe); + if (diff) + len += diff; + } + } + } + return len; +} + +static void +copyto32fw_insn( struct ip_fw_32 *fw32 , struct ip_fw *user_ip_fw, int cmdsize) +{ + char *end; + char *fw32action; + char *useraction; + int justcmdsize; + int diff=0; + int actioncopysize; + + end = ((char*)user_ip_fw->cmd) + cmdsize; + useraction = (char*)ACTION_PTR( user_ip_fw ); + fw32action = (char*)fw32->cmd + (user_ip_fw->act_ofs * sizeof(uint32_t)); + if ( ( justcmdsize = ( fw32action - (char*)fw32->cmd))) + bcopy( user_ip_fw->cmd, fw32->cmd, justcmdsize); + while ( useraction < end ){ + if ( ((ipfw_insn*)useraction)->opcode == O_QUEUE || ((ipfw_insn*)useraction)->opcode == O_PIPE){ + actioncopysize = sizeof(ipfw_insn_pipe_32); + ((ipfw_insn*)fw32action)->opcode = ((ipfw_insn*)useraction)->opcode; + ((ipfw_insn*)fw32action)->arg1 = ((ipfw_insn*)useraction)->arg1; + ((ipfw_insn*)fw32action)->len = F_INSN_SIZE(ipfw_insn_pipe_32); + diff = ((ipfw_insn*)useraction)->len - ((ipfw_insn*)fw32action)->len; + if ( diff ){ + fw32->cmd_len -= diff; + } + } else{ + actioncopysize = (F_LEN((ipfw_insn*)useraction) ? (F_LEN((ipfw_insn*)useraction)) : 1 ) * sizeof(uint32_t); + bcopy( useraction, fw32action, actioncopysize ); + } + useraction += (F_LEN((ipfw_insn*)useraction) ? (F_LEN((ipfw_insn*)useraction)) : 1 ) * sizeof(uint32_t); + fw32action += actioncopysize; + } +} + +static void +copyto64fw_insn( struct ip_fw_64 *fw64 , struct ip_fw *user_ip_fw, int cmdsize) +{ + char *end; + char *fw64action; + char *useraction; + int justcmdsize; + int diff; + int actioncopysize; + + end = ((char *)user_ip_fw->cmd) + cmdsize; + useraction = (char*)ACTION_PTR( user_ip_fw ); + if ( (justcmdsize = (useraction - (char*)user_ip_fw->cmd))) + bcopy( user_ip_fw->cmd, fw64->cmd, justcmdsize); + fw64action = (char*)fw64->cmd + justcmdsize; + while ( useraction < end ){ + if ( ((ipfw_insn*)user_ip_fw)->opcode == O_QUEUE || ((ipfw_insn*)user_ip_fw)->opcode == O_PIPE){ + actioncopysize = sizeof(ipfw_insn_pipe_64); + ((ipfw_insn*)fw64action)->opcode = ((ipfw_insn*)useraction)->opcode; + ((ipfw_insn*)fw64action)->arg1 = ((ipfw_insn*)useraction)->arg1; + ((ipfw_insn*)fw64action)->len = F_INSN_SIZE(ipfw_insn_pipe_64); + diff = ((ipfw_insn*)fw64action)->len - ((ipfw_insn*)useraction)->len; + if (diff) + fw64->cmd_len += diff; + + } else{ + actioncopysize = (F_LEN((ipfw_insn*)useraction) ? (F_LEN((ipfw_insn*)useraction)) : 1 ) * sizeof(uint32_t); + bcopy( useraction, fw64action, actioncopysize ); + } + useraction += (F_LEN((ipfw_insn*)useraction) ? (F_LEN((ipfw_insn*)useraction)) : 1 ) * sizeof(uint32_t); + fw64action += actioncopysize; + } +} + +static void +copyto32fw( struct ip_fw *user_ip_fw, struct ip_fw_32 *fw32 , __unused size_t copysize) +{ + size_t rulesize, cmdsize; + + fw32->version = user_ip_fw->version; + fw32->context = CAST_DOWN_EXPLICIT( user32_addr_t, user_ip_fw->context); + fw32->next = CAST_DOWN_EXPLICIT(user32_addr_t, user_ip_fw->next); + fw32->next_rule = CAST_DOWN_EXPLICIT(user32_addr_t, user_ip_fw->next_rule); + fw32->act_ofs = user_ip_fw->act_ofs; + fw32->cmd_len = user_ip_fw->cmd_len; + fw32->rulenum = user_ip_fw->rulenum; + fw32->set = user_ip_fw->set; + fw32->set_masks[0] = user_ip_fw->set_masks[0]; + fw32->set_masks[1] = user_ip_fw->set_masks[1]; + fw32->pcnt = user_ip_fw->pcnt; + fw32->bcnt = user_ip_fw->bcnt; + fw32->timestamp = user_ip_fw->timestamp; + fw32->reserved_1 = user_ip_fw->reserved_1; + fw32->reserved_2 = user_ip_fw->reserved_2; + rulesize = sizeof(struct ip_fw_32) + (user_ip_fw->cmd_len * sizeof(ipfw_insn) - 4); + cmdsize = user_ip_fw->cmd_len * sizeof(u_int32_t); + copyto32fw_insn( fw32, user_ip_fw, cmdsize ); +} + +static void +copyto64fw( struct ip_fw *user_ip_fw, struct ip_fw_64 *fw64, size_t copysize) +{ + size_t rulesize, cmdsize; + + fw64->version = user_ip_fw->version; + fw64->context = CAST_DOWN_EXPLICIT(__uint64_t, user_ip_fw->context); + fw64->next = CAST_DOWN_EXPLICIT(user64_addr_t, user_ip_fw->next); + fw64->next_rule = CAST_DOWN_EXPLICIT(user64_addr_t, user_ip_fw->next_rule); + fw64->act_ofs = user_ip_fw->act_ofs; + fw64->cmd_len = user_ip_fw->cmd_len; + fw64->rulenum = user_ip_fw->rulenum; + fw64->set = user_ip_fw->set; + fw64->set_masks[0] = user_ip_fw->set_masks[0]; + fw64->set_masks[1] = user_ip_fw->set_masks[1]; + fw64->pcnt = user_ip_fw->pcnt; + fw64->bcnt = user_ip_fw->bcnt; + fw64->timestamp = user_ip_fw->timestamp; + fw64->reserved_1 = user_ip_fw->reserved_1; + fw64->reserved_2 = user_ip_fw->reserved_2; + rulesize = sizeof(struct ip_fw_64) + (user_ip_fw->cmd_len * sizeof(ipfw_insn) - 4); + if (rulesize > copysize) + cmdsize = copysize - sizeof(struct ip_fw_64) + 4; + else + cmdsize = user_ip_fw->cmd_len * sizeof(u_int32_t); + copyto64fw_insn( fw64, user_ip_fw, cmdsize); +} + +static int +copyfrom32fw_insn( struct ip_fw_32 *fw32 , struct ip_fw *user_ip_fw, int cmdsize) +{ + char *end; + char *fw32action; + char *useraction; + int justcmdsize; + int diff; + int actioncopysize; + + end = ((char*)fw32->cmd) + cmdsize; + fw32action = (char*)ACTION_PTR( fw32 ); + if ((justcmdsize = (fw32action - (char*)fw32->cmd))) + bcopy( fw32->cmd, user_ip_fw->cmd, justcmdsize); + useraction = (char*)user_ip_fw->cmd + justcmdsize; + while ( fw32action < end ){ + if ( ((ipfw_insn*)fw32action)->opcode == O_QUEUE || ((ipfw_insn*)fw32action)->opcode == O_PIPE){ + actioncopysize = sizeof(ipfw_insn_pipe); + ((ipfw_insn*)useraction)->opcode = ((ipfw_insn*)fw32action)->opcode; + ((ipfw_insn*)useraction)->arg1 = ((ipfw_insn*)fw32action)->arg1; + ((ipfw_insn*)useraction)->len = F_INSN_SIZE(ipfw_insn_pipe); + diff = ((ipfw_insn*)useraction)->len - ((ipfw_insn*)fw32action)->len; + if (diff){ + /* readjust the cmd_len */ + user_ip_fw->cmd_len += diff; + } + } else{ + actioncopysize = (F_LEN((ipfw_insn*)fw32action) ? (F_LEN((ipfw_insn*)fw32action)) : 1 ) * sizeof(uint32_t); + bcopy( fw32action, useraction, actioncopysize ); + } + fw32action += (F_LEN((ipfw_insn*)fw32action) ? (F_LEN((ipfw_insn*)fw32action)) : 1 ) * sizeof(uint32_t); + useraction += actioncopysize; + } + + return( useraction - (char*)user_ip_fw->cmd ); +} + +static int +copyfrom64fw_insn( struct ip_fw_64 *fw64 , struct ip_fw *user_ip_fw, int cmdsize) +{ + char *end; + char *fw64action; + char *useraction; + int justcmdsize; + int diff; + int actioncopysize; + + end = ((char *)fw64->cmd) + cmdsize ; + fw64action = (char*)ACTION_PTR( fw64 ); + if ( (justcmdsize = (fw64action - (char*)fw64->cmd))) + bcopy( fw64->cmd, user_ip_fw->cmd, justcmdsize); + useraction = (char*)user_ip_fw->cmd + justcmdsize; + while ( fw64action < end ){ + if ( ((ipfw_insn*)fw64action)->opcode == O_QUEUE || ((ipfw_insn*)fw64action)->opcode == O_PIPE){ + actioncopysize = sizeof(ipfw_insn_pipe); + ((ipfw_insn*)useraction)->opcode = ((ipfw_insn*)fw64action)->opcode; + ((ipfw_insn*)useraction)->arg1 = ((ipfw_insn*)fw64action)->arg1; + ((ipfw_insn*)useraction)->len = F_INSN_SIZE(ipfw_insn_pipe); + diff = ((ipfw_insn*)fw64action)->len - ((ipfw_insn*)useraction)->len; + if (diff) { + /* readjust the cmd_len */ + user_ip_fw->cmd_len -= diff; + } + } else{ + actioncopysize = (F_LEN((ipfw_insn*)fw64action) ? (F_LEN((ipfw_insn*)fw64action)) : 1 ) * sizeof(uint32_t); + bcopy( fw64action, useraction, actioncopysize ); + } + fw64action += (F_LEN((ipfw_insn*)fw64action) ? (F_LEN((ipfw_insn*)fw64action)) : 1 ) * sizeof(uint32_t); + useraction += actioncopysize; + } + return( useraction - (char*)user_ip_fw->cmd ); +} + +static size_t +copyfrom32fw( struct ip_fw_32 *fw32, struct ip_fw *user_ip_fw, size_t copysize) +{ + size_t rulesize, cmdsize; + + user_ip_fw->version = fw32->version; + user_ip_fw->context = CAST_DOWN(void *, fw32->context); + user_ip_fw->next = CAST_DOWN(struct ip_fw*, fw32->next); + user_ip_fw->next_rule = CAST_DOWN_EXPLICIT(struct ip_fw*, fw32->next_rule); + user_ip_fw->act_ofs = fw32->act_ofs; + user_ip_fw->cmd_len = fw32->cmd_len; + user_ip_fw->rulenum = fw32->rulenum; + user_ip_fw->set = fw32->set; + user_ip_fw->set_masks[0] = fw32->set_masks[0]; + user_ip_fw->set_masks[1] = fw32->set_masks[1]; + user_ip_fw->pcnt = fw32->pcnt; + user_ip_fw->bcnt = fw32->bcnt; + user_ip_fw->timestamp = fw32->timestamp; + user_ip_fw->reserved_1 = fw32->reserved_1; + user_ip_fw->reserved_2 = fw32->reserved_2; + rulesize = sizeof(struct ip_fw_32) + (fw32->cmd_len * sizeof(ipfw_insn) - 4); + if ( rulesize > copysize ) + cmdsize = copysize - sizeof(struct ip_fw_32)-4; + else + cmdsize = fw32->cmd_len * sizeof(ipfw_insn); + cmdsize = copyfrom32fw_insn( fw32, user_ip_fw, cmdsize); + return( sizeof(struct ip_fw) + cmdsize - 4); +} + +static size_t +copyfrom64fw( struct ip_fw_64 *fw64, struct ip_fw *user_ip_fw, size_t copysize) +{ + size_t rulesize, cmdsize; + + user_ip_fw->version = fw64->version; + user_ip_fw->context = CAST_DOWN_EXPLICIT( void *, fw64->context); + user_ip_fw->next = CAST_DOWN_EXPLICIT(struct ip_fw*, fw64->next); + user_ip_fw->next_rule = CAST_DOWN_EXPLICIT(struct ip_fw*, fw64->next_rule); + user_ip_fw->act_ofs = fw64->act_ofs; + user_ip_fw->cmd_len = fw64->cmd_len; + user_ip_fw->rulenum = fw64->rulenum; + user_ip_fw->set = fw64->set; + user_ip_fw->set_masks[0] = fw64->set_masks[0]; + user_ip_fw->set_masks[1] = fw64->set_masks[1]; + user_ip_fw->pcnt = fw64->pcnt; + user_ip_fw->bcnt = fw64->bcnt; + user_ip_fw->timestamp = fw64->timestamp; + user_ip_fw->reserved_1 = fw64->reserved_1; + user_ip_fw->reserved_2 = fw64->reserved_2; + //bcopy( fw64->cmd, user_ip_fw->cmd, fw64->cmd_len * sizeof(ipfw_insn)); + rulesize = sizeof(struct ip_fw_64) + (fw64->cmd_len * sizeof(ipfw_insn) - 4); + if ( rulesize > copysize ) + cmdsize = copysize - sizeof(struct ip_fw_64)-4; + else + cmdsize = fw64->cmd_len * sizeof(ipfw_insn); + cmdsize = copyfrom64fw_insn( fw64, user_ip_fw, cmdsize); + return( sizeof(struct ip_fw) + cmdsize - 4); +} + +static +void cp_dyn_to_comp_32( struct ipfw_dyn_rule_compat_32 *dyn_rule_vers1, int *len) +{ + struct ipfw_dyn_rule_compat_32 *dyn_last=NULL; + ipfw_dyn_rule *p; + int i; + + if (ipfw_dyn_v) { + for (i = 0; i < curr_dyn_buckets; i++) { + for ( p = ipfw_dyn_v[i] ; p != NULL ; p = p->next) { + dyn_rule_vers1->chain = (user32_addr_t)(p->rule->rulenum); + dyn_rule_vers1->id = p->id; + dyn_rule_vers1->mask = p->id; + dyn_rule_vers1->type = p->dyn_type; + dyn_rule_vers1->expire = p->expire; + dyn_rule_vers1->pcnt = p->pcnt; + dyn_rule_vers1->bcnt = p->bcnt; + dyn_rule_vers1->bucket = p->bucket; + dyn_rule_vers1->state = p->state; + + dyn_rule_vers1->next = CAST_DOWN_EXPLICIT( user32_addr_t, p->next); + dyn_last = dyn_rule_vers1; + + *len += sizeof(*dyn_rule_vers1); + dyn_rule_vers1++; + } + } + + if (dyn_last != NULL) { + dyn_last->next = ((user32_addr_t)0); + } + } +} + + +static +void cp_dyn_to_comp_64( struct ipfw_dyn_rule_compat_64 *dyn_rule_vers1, int *len) +{ + struct ipfw_dyn_rule_compat_64 *dyn_last=NULL; + ipfw_dyn_rule *p; + int i; + + if (ipfw_dyn_v) { + for (i = 0; i < curr_dyn_buckets; i++) { + for ( p = ipfw_dyn_v[i] ; p != NULL ; p = p->next) { + dyn_rule_vers1->chain = (user64_addr_t) p->rule->rulenum; + dyn_rule_vers1->id = p->id; + dyn_rule_vers1->mask = p->id; + dyn_rule_vers1->type = p->dyn_type; + dyn_rule_vers1->expire = p->expire; + dyn_rule_vers1->pcnt = p->pcnt; + dyn_rule_vers1->bcnt = p->bcnt; + dyn_rule_vers1->bucket = p->bucket; + dyn_rule_vers1->state = p->state; + + dyn_rule_vers1->next = CAST_DOWN(user64_addr_t, p->next); + dyn_last = dyn_rule_vers1; + + *len += sizeof(*dyn_rule_vers1); + dyn_rule_vers1++; + } + } + + if (dyn_last != NULL) { + dyn_last->next = CAST_DOWN(user64_addr_t, NULL); + } + } +} + +static int +sooptcopyin_fw( struct sockopt *sopt, struct ip_fw *user_ip_fw, size_t *size ) +{ + size_t valsize, copyinsize = 0; + int error = 0; + + valsize = sopt->sopt_valsize; + if ( size ) + copyinsize = *size; + if (proc_is64bit(sopt->sopt_p)) { + struct ip_fw_64 *fw64=NULL; + + if ( valsize < sizeof(struct ip_fw_64) ) { + return(EINVAL); + } + if ( !copyinsize ) + copyinsize = sizeof(struct ip_fw_64); + if ( valsize > copyinsize ) + sopt->sopt_valsize = valsize = copyinsize; + + if ( sopt->sopt_p != 0) { + fw64 = _MALLOC(copyinsize, M_TEMP, M_WAITOK); + if ( fw64 == NULL ) + return(ENOBUFS); + if ((error = copyin(sopt->sopt_val, fw64, valsize)) != 0){ + _FREE(fw64, M_TEMP); + return error; + } + } + else { + bcopy(CAST_DOWN(caddr_t, sopt->sopt_val), fw64, valsize); + } + valsize = copyfrom64fw( fw64, user_ip_fw, valsize ); + _FREE( fw64, M_TEMP); + }else { + struct ip_fw_32 *fw32=NULL; + + if ( valsize < sizeof(struct ip_fw_32) ) { + return(EINVAL); + } + if ( !copyinsize) + copyinsize = sizeof(struct ip_fw_32); + if ( valsize > copyinsize) + sopt->sopt_valsize = valsize = copyinsize; + + if ( sopt->sopt_p != 0) { + fw32 = _MALLOC(copyinsize, M_TEMP, M_WAITOK); + if ( fw32 == NULL ) + return(ENOBUFS); + if ( (error = copyin(sopt->sopt_val, fw32, valsize)) != 0){ + _FREE( fw32, M_TEMP); + return( error ); + } + } + else { + bcopy(CAST_DOWN(caddr_t, sopt->sopt_val), fw32, valsize); + } + valsize = copyfrom32fw( fw32, user_ip_fw, valsize); + _FREE( fw32, M_TEMP); + } + if ( size ) + *size = valsize; + return error; +} + /* * The following checks use two arrays of 8 or 16 bits to store the * bits that we want set or clear, respectively. They are in the @@ -538,11 +1013,16 @@ verify_rev_path(struct in_addr src, struct ifnet *ifp) rtalloc_ign(&ro, RTF_CLONING|RTF_PRCLONING); } - - if ((ro.ro_rt == NULL) || (ifp == NULL) || - (ro.ro_rt->rt_ifp->if_index != ifp->if_index)) - return 0; - + if (ro.ro_rt != NULL) + RT_LOCK_SPIN(ro.ro_rt); + else + return 0; /* No route */ + if ((ifp == NULL) || + (ro.ro_rt->rt_ifp->if_index != ifp->if_index)) { + RT_UNLOCK(ro.ro_rt); + return 0; + } + RT_UNLOCK(ro.ro_rt); return 1; } @@ -1228,22 +1708,21 @@ install_state(struct ip_fw *rule, ipfw_insn_limit *cmd, } /* - * Transmit a TCP packet, containing either a RST or a keepalive. + * Generate a TCP packet, containing either a RST or a keepalive. * When flags & TH_RST, we are sending a RST packet, because of a * "reset" action matched the packet. * Otherwise we are sending a keepalive, and flags & TH_ */ -static void +static struct mbuf * send_pkt(struct ipfw_flow_id *id, u_int32_t seq, u_int32_t ack, int flags) { struct mbuf *m; struct ip *ip; struct tcphdr *tcp; - struct route sro; /* fake route */ MGETHDR(m, M_DONTWAIT, MT_HEADER); /* MAC-OK */ if (m == 0) - return; + return NULL; m->m_pkthdr.rcvif = (struct ifnet *)0; m->m_pkthdr.len = m->m_len = sizeof(struct ip) + sizeof(struct tcphdr); m->m_data += max_linkhdr; @@ -1305,14 +1784,9 @@ send_pkt(struct ipfw_flow_id *id, u_int32_t seq, u_int32_t ack, int flags) */ ip->ip_ttl = ip_defttl; ip->ip_len = m->m_pkthdr.len; - bzero (&sro, sizeof (sro)); - ip_rtaddr(ip->ip_dst, &sro); m->m_flags |= M_SKIP_FIREWALL; - ip_output_list(m, 0, NULL, &sro, 0, NULL, NULL); - if (sro.ro_rt) { - RTFREE(sro.ro_rt); - sro.ro_rt = NULL; - } + + return m; } /* @@ -1335,9 +1809,19 @@ send_reject(struct ip_fw_args *args, int code, int offset, __unused int ip_len) struct tcphdr *const tcp = L3HDR(struct tcphdr, mtod(args->m, struct ip *)); if ( (tcp->th_flags & TH_RST) == 0) { - send_pkt(&(args->f_id), ntohl(tcp->th_seq), + struct mbuf *m; + + m = send_pkt(&(args->f_id), ntohl(tcp->th_seq), ntohl(tcp->th_ack), tcp->th_flags | TH_RST); + if (m != NULL) { + struct route sro; /* fake route */ + + bzero (&sro, sizeof (sro)); + ip_output_list(m, 0, NULL, &sro, 0, NULL, NULL); + if (sro.ro_rt) + RTFREE(sro.ro_rt); + } } m_freem(args->m); } else @@ -1727,7 +2211,7 @@ ipfw_chk(struct ip_fw_args *args) dst_ip, htons(dst_port), wildcard, NULL); - if (pcb == NULL || pcb->inp_socket == NULL) + if (pcb == NULL || pcb->inp_socket == NULL) break; #if __FreeBSD_version < 500034 #define socheckuid(a,b) (kauth_cred_getuid((a)->so_cred) != (b)) @@ -1748,6 +2232,8 @@ ipfw_chk(struct ip_fw_args *args) (gid_t)((ipfw_insn_u32 *)cmd)->d[0], &match); } #endif + /* release reference on pcb */ + in_pcb_checkstate(pcb, WNT_RELEASE, 0); } break; @@ -2317,6 +2803,8 @@ add_rule(struct ip_fw **head, struct ip_fw *input_rule) done: static_count++; static_len += l; + static_len_32 += RULESIZE32(input_rule); + static_len_64 += RULESIZE64(input_rule); DEB(printf("ipfw: installed rule %d, static count now %d\n", rule->rulenum, static_count);) return (0); @@ -2345,6 +2833,8 @@ delete_rule(struct ip_fw **head, struct ip_fw *prev, struct ip_fw *rule) prev->next = n; static_count--; static_len -= l; + static_len_32 -= RULESIZE32(rule); + static_len_64 -= RULESIZE64(rule); #if DUMMYNET if (DUMMYNET_LOADED) @@ -2454,6 +2944,8 @@ mark_inactive(struct ip_fw **prev, struct ip_fw **rule) (*rule)->reserved_1 = IPFW_RULE_INACTIVE; static_count--; static_len -= l; + static_len_32 -= RULESIZE32(*rule); + static_len_64 -= RULESIZE64(*rule); timeout(flush_inactive, *rule, 30*hz); /* 30 sec. */ } @@ -2885,7 +3377,9 @@ ipfw_ctl(struct sockopt *sopt) int command; int error; size_t size; + size_t rulesize = RULE_MAXSIZE; struct ip_fw *bp , *buf, *rule; + int is64user = 0; /* copy of orig sopt to send to ipfw_get_command_and_version() */ struct sockopt tmp_sopt = *sopt; @@ -2911,14 +3405,18 @@ ipfw_ctl(struct sockopt *sopt) /* first get the command and version, then do conversion as necessary */ error = ipfw_get_command_and_version(&tmp_sopt, &command, &api_version); - if (error) { /* error getting the version */ return error; } + if (proc_is64bit(sopt->sopt_p)) + is64user = 1; + switch (command) { case IP_FW_GET: + { + size_t dynrulesize; /* * pass up a copy of the current rules. Static rules * come first (the last of which has number IPFW_DEFAULT_RULE), @@ -2926,9 +3424,18 @@ ipfw_ctl(struct sockopt *sopt) * The last dynamic rule has NULL in the "next" field. */ lck_mtx_lock(ipfw_mutex); - size = static_len; /* size of static rules */ - if (ipfw_dyn_v) /* add size of dyn.rules */ - size += (dyn_count * sizeof(ipfw_dyn_rule)); + + if (is64user){ + size = Get64static_len(); + dynrulesize = sizeof(ipfw_dyn_rule_64); + if (ipfw_dyn_v) + size += (dyn_count * dynrulesize); + }else { + size = Get32static_len(); + dynrulesize = sizeof(ipfw_dyn_rule_32); + if (ipfw_dyn_v) + size += (dyn_count * dynrulesize); + } /* * XXX todo: if the user passes a short length just to know @@ -2946,41 +3453,94 @@ ipfw_ctl(struct sockopt *sopt) bp = buf; for (rule = layer3_chain; rule ; rule = rule->next) { - int i = RULESIZE(rule); - + if (rule->reserved_1 == IPFW_RULE_INACTIVE) { continue; } - bcopy(rule, bp, i); - bcopy(&set_disable, &(bp->next_rule), - sizeof(set_disable)); - bp = (struct ip_fw *)((char *)bp + i); + + if (is64user){ + int rulesize_64; + + copyto64fw( rule, (struct ip_fw_64 *)bp, size); + bcopy(&set_disable, &(( (struct ip_fw_64*)bp)->next_rule), sizeof(set_disable)); + /* do not use macro RULESIZE64 since we want RULESIZE for ip_fw_64 */ + rulesize_64 = sizeof(struct ip_fw_64) + ((struct ip_fw_64 *)(bp))->cmd_len * 4 - 4; + bp = (struct ip_fw *)((char *)bp + rulesize_64); + }else{ + int rulesize_32; + + copyto32fw( rule, (struct ip_fw_32*)bp, size); + bcopy(&set_disable, &(( (struct ip_fw_32*)bp)->next_rule), sizeof(set_disable)); + /* do not use macro RULESIZE32 since we want RULESIZE for ip_fw_32 */ + rulesize_32 = sizeof(struct ip_fw_32) + ((struct ip_fw_32 *)(bp))->cmd_len * 4 - 4; + bp = (struct ip_fw *)((char *)bp + rulesize_32); + } } if (ipfw_dyn_v) { int i; - ipfw_dyn_rule *p, *dst, *last = NULL; - - dst = (ipfw_dyn_rule *)bp; + ipfw_dyn_rule *p; + char *dst, *last = NULL; + + dst = (char *)bp; for (i = 0 ; i < curr_dyn_buckets ; i++ ) for ( p = ipfw_dyn_v[i] ; p != NULL ; - p = p->next, dst++ ) { - bcopy(p, dst, sizeof *p); - bcopy(&(p->rule->rulenum), &(dst->rule), - sizeof(p->rule->rulenum)); - /* - * store a non-null value in "next". - * The userland code will interpret a - * NULL here as a marker - * for the last dynamic rule. - */ - bcopy(&dst, &dst->next, sizeof(dst)); - last = dst ; - dst->expire = - TIME_LEQ(dst->expire, timenow.tv_sec) ? - 0 : dst->expire - timenow.tv_sec ; + p = p->next, dst += dynrulesize ) { + if ( is64user ){ + ipfw_dyn_rule_64 *ipfw_dyn_dst; + + ipfw_dyn_dst = (ipfw_dyn_rule_64 *)dst; + /* + * store a non-null value in "next". + * The userland code will interpret a + * NULL here as a marker + * for the last dynamic rule. + */ + ipfw_dyn_dst->next = CAST_DOWN_EXPLICIT(user64_addr_t, dst); + ipfw_dyn_dst->rule = p->rule->rulenum; + ipfw_dyn_dst->parent = CAST_DOWN(user64_addr_t, p->parent); + ipfw_dyn_dst->pcnt = p->pcnt; + ipfw_dyn_dst->bcnt = p->bcnt; + ipfw_dyn_dst->id = p->id; + ipfw_dyn_dst->expire = + TIME_LEQ(p->expire, timenow.tv_sec) ? + 0 : p->expire - timenow.tv_sec; + ipfw_dyn_dst->bucket = p->bucket; + ipfw_dyn_dst->state = p->state; + ipfw_dyn_dst->ack_fwd = p->ack_fwd; + ipfw_dyn_dst->ack_rev = p->ack_rev; + ipfw_dyn_dst->dyn_type = p->dyn_type; + ipfw_dyn_dst->count = p->count; + last = (char*)&ipfw_dyn_dst->next; + } else { + ipfw_dyn_rule_32 *ipfw_dyn_dst; + + ipfw_dyn_dst = (ipfw_dyn_rule_32 *)dst; + /* + * store a non-null value in "next". + * The userland code will interpret a + * NULL here as a marker + * for the last dynamic rule. + */ + ipfw_dyn_dst->next = CAST_DOWN_EXPLICIT(user32_addr_t, dst); + ipfw_dyn_dst->rule = p->rule->rulenum; + ipfw_dyn_dst->parent = CAST_DOWN_EXPLICIT(user32_addr_t, p->parent); + ipfw_dyn_dst->pcnt = p->pcnt; + ipfw_dyn_dst->bcnt = p->bcnt; + ipfw_dyn_dst->id = p->id; + ipfw_dyn_dst->expire = + TIME_LEQ(p->expire, timenow.tv_sec) ? + 0 : p->expire - timenow.tv_sec; + ipfw_dyn_dst->bucket = p->bucket; + ipfw_dyn_dst->state = p->state; + ipfw_dyn_dst->ack_fwd = p->ack_fwd; + ipfw_dyn_dst->ack_rev = p->ack_rev; + ipfw_dyn_dst->dyn_type = p->dyn_type; + ipfw_dyn_dst->count = p->count; + last = (char*)&ipfw_dyn_dst->next; + } } if (last != NULL) /* mark last dynamic rule */ - bzero(&last->next, sizeof(last)); + bzero(last, sizeof(last)); } lck_mtx_unlock(ipfw_mutex); @@ -3003,7 +3563,7 @@ ipfw_ctl(struct sockopt *sopt) for (i = 0; i < static_count; i++) { /* static rules have different sizes */ int j = RULESIZE(bp); - ipfw_convert_from_latest(bp, rule_vers0, api_version); + ipfw_convert_from_latest(bp, rule_vers0, api_version, is64user); bp = (struct ip_fw *)((char *)bp + j); len += sizeof(*rule_vers0); rule_vers0++; @@ -3014,63 +3574,56 @@ ipfw_ctl(struct sockopt *sopt) } } else if (api_version == IP_FW_VERSION_1) { int i, len = 0, buf_size; - struct ip_fw_compat *buf2, *rule_vers1; - struct ipfw_dyn_rule_compat *dyn_rule_vers1, *dyn_last = NULL; - ipfw_dyn_rule *p; + struct ip_fw_compat *buf2; + size_t ipfwcompsize; + size_t ipfwdyncompsize; + char *rule_vers1; lck_mtx_lock(ipfw_mutex); - buf_size = static_count * sizeof(struct ip_fw_compat) + - dyn_count * sizeof(struct ipfw_dyn_rule_compat); + if ( is64user ){ + ipfwcompsize = sizeof(struct ip_fw_compat_64); + ipfwdyncompsize = sizeof(struct ipfw_dyn_rule_compat_64); + } else { + ipfwcompsize = sizeof(struct ip_fw_compat_32); + ipfwdyncompsize = sizeof(struct ipfw_dyn_rule_compat_32); + } + + buf_size = static_count * ipfwcompsize + + dyn_count * ipfwdyncompsize; buf2 = _MALLOC(buf_size, M_TEMP, M_WAITOK); if (buf2 == 0) { lck_mtx_unlock(ipfw_mutex); error = ENOBUFS; } - if (!error) { bp = buf; - rule_vers1 = buf2; + rule_vers1 = (char*)buf2; /* first do static rules */ for (i = 0; i < static_count; i++) { /* static rules have different sizes */ - int j = RULESIZE(bp); - ipfw_convert_from_latest(bp, rule_vers1, api_version); - bp = (struct ip_fw *)((char *)bp + j); - len += sizeof(*rule_vers1); - rule_vers1++; - } - - /* now do dynamic rules */ - dyn_rule_vers1 = (struct ipfw_dyn_rule_compat *)rule_vers1; - if (ipfw_dyn_v) { - for (i = 0; i < curr_dyn_buckets; i++) { - for ( p = ipfw_dyn_v[i] ; p != NULL ; p = p->next) { - dyn_rule_vers1->chain = p->rule->rulenum; - dyn_rule_vers1->id = p->id; - dyn_rule_vers1->mask = p->id; - dyn_rule_vers1->type = p->dyn_type; - dyn_rule_vers1->expire = p->expire; - dyn_rule_vers1->pcnt = p->pcnt; - dyn_rule_vers1->bcnt = p->bcnt; - dyn_rule_vers1->bucket = p->bucket; - dyn_rule_vers1->state = p->state; - - dyn_rule_vers1->next = (struct ipfw_dyn_rule *) dyn_rule_vers1; - dyn_last = dyn_rule_vers1; - - len += sizeof(*dyn_rule_vers1); - dyn_rule_vers1++; - } - } - - if (dyn_last != NULL) { - dyn_last->next = NULL; + if ( is64user ){ + int rulesize_64; + ipfw_convert_from_latest(bp, (void *)rule_vers1, api_version, is64user); + rulesize_64 = sizeof(struct ip_fw_64) + ((struct ip_fw_64 *)(bp))->cmd_len * 4 - 4; + bp = (struct ip_fw *)((char *)bp + rulesize_64); + }else { + int rulesize_32; + ipfw_convert_from_latest(bp, (void *)rule_vers1, api_version, is64user); + rulesize_32 = sizeof(struct ip_fw_32) + ((struct ip_fw_32 *)(bp))->cmd_len * 4 - 4; + bp = (struct ip_fw *)((char *)bp + rulesize_32); } + len += ipfwcompsize; + rule_vers1 += ipfwcompsize; } + /* now do dynamic rules */ + if ( is64user ) + cp_dyn_to_comp_64( (struct ipfw_dyn_rule_compat_64 *)rule_vers1, &len); + else + cp_dyn_to_comp_32( (struct ipfw_dyn_rule_compat_32 *)rule_vers1, &len); + lck_mtx_unlock(ipfw_mutex); - error = sooptcopyout(sopt, buf2, len); _FREE(buf2, M_TEMP); } @@ -3080,7 +3633,8 @@ ipfw_ctl(struct sockopt *sopt) _FREE(buf, M_TEMP); break; - + } + case IP_FW_FLUSH: /* * Normally we cannot release the lock on each iteration. @@ -3105,6 +3659,8 @@ ipfw_ctl(struct sockopt *sopt) break; case IP_FW_ADD: + { + size_t savedsopt_valsize=0; rule = _MALLOC(RULE_MAXSIZE, M_TEMP, M_WAITOK); if (rule == 0) { error = ENOBUFS; @@ -3114,11 +3670,12 @@ ipfw_ctl(struct sockopt *sopt) bzero(rule, RULE_MAXSIZE); if (api_version != IP_FW_CURRENT_API_VERSION) { - error = ipfw_convert_to_latest(sopt, rule, api_version); + error = ipfw_convert_to_latest(sopt, rule, api_version, is64user); } else { - error = sooptcopyin(sopt, rule, RULE_MAXSIZE, - sizeof(struct ip_fw) ); + savedsopt_valsize = sopt->sopt_valsize; /* it might get modified in sooptcopyin_fw */ + error = sooptcopyin_fw( sopt, rule, &rulesize); + } if (!error) { @@ -3127,8 +3684,9 @@ ipfw_ctl(struct sockopt *sopt) * adjust sopt_valsize to match what would be expected. */ sopt->sopt_valsize = RULESIZE(rule); + rulesize = RULESIZE(rule); } - error = check_ipfw_struct(rule, sopt->sopt_valsize); + error = check_ipfw_struct(rule, rulesize); if (!error) { lck_mtx_lock(ipfw_mutex); error = add_rule(&layer3_chain, rule); @@ -3142,19 +3700,30 @@ ipfw_ctl(struct sockopt *sopt) if (api_version == IP_FW_VERSION_0) { struct ip_old_fw rule_vers0; - ipfw_convert_from_latest(rule, &rule_vers0, api_version); + ipfw_convert_from_latest(rule, &rule_vers0, api_version, is64user); sopt->sopt_valsize = sizeof(struct ip_old_fw); error = sooptcopyout(sopt, &rule_vers0, sizeof(struct ip_old_fw)); } else if (api_version == IP_FW_VERSION_1) { struct ip_fw_compat rule_vers1; - - ipfw_convert_from_latest(rule, &rule_vers1, api_version); + ipfw_convert_from_latest(rule, &rule_vers1, api_version, is64user); sopt->sopt_valsize = sizeof(struct ip_fw_compat); error = sooptcopyout(sopt, &rule_vers1, sizeof(struct ip_fw_compat)); } else { - error = sooptcopyout(sopt, rule, size); + char *userrule; + userrule = _MALLOC(savedsopt_valsize, M_TEMP, M_WAITOK); + if ( userrule == NULL ) + userrule = (char*)rule; + if (proc_is64bit(sopt->sopt_p)){ + copyto64fw( rule, (struct ip_fw_64*)userrule, savedsopt_valsize); + } + else { + copyto32fw( rule, (struct ip_fw_32*)userrule, savedsopt_valsize); + } + error = sooptcopyout(sopt, userrule, savedsopt_valsize); + if ( userrule ) + _FREE(userrule, M_TEMP); } } } @@ -3162,7 +3731,7 @@ ipfw_ctl(struct sockopt *sopt) _FREE(rule, M_TEMP); break; - + } case IP_FW_DEL: { /* @@ -3184,11 +3753,10 @@ ipfw_ctl(struct sockopt *sopt) bzero(&temp_rule, sizeof(struct ip_fw)); if (api_version != IP_FW_CURRENT_API_VERSION) { - error = ipfw_convert_to_latest(sopt, &temp_rule, api_version); + error = ipfw_convert_to_latest(sopt, &temp_rule, api_version, is64user); } else { - error = sooptcopyin(sopt, &temp_rule, sizeof(struct ip_fw), - sizeof(struct ip_fw) ); + error = sooptcopyin_fw(sopt, &temp_rule, 0 ); } if (!error) { @@ -3234,17 +3802,16 @@ ipfw_ctl(struct sockopt *sopt) /* there is only a simple rule passed in * (no cmds), so use a temp struct to copy */ - struct ip_fw temp_rule; + struct ip_fw temp_rule; bzero(&temp_rule, sizeof(struct ip_fw)); if (api_version != IP_FW_CURRENT_API_VERSION) { - error = ipfw_convert_to_latest(sopt, &temp_rule, api_version); + error = ipfw_convert_to_latest(sopt, &temp_rule, api_version, is64user); } else { if (sopt->sopt_val != 0) { - error = sooptcopyin(sopt, &temp_rule, sizeof(struct ip_fw), - sizeof(struct ip_fw) ); + error = sooptcopyin_fw( sopt, &temp_rule, 0); } } @@ -3299,16 +3866,25 @@ struct ip_fw *ip_fw_default_rule; static void ipfw_tick(__unused void * unused) { + struct mbuf *m0, *m, *mnext, **mtailp; int i; ipfw_dyn_rule *q; struct timeval timenow; - if (dyn_keepalive == 0 || ipfw_dyn_v == NULL || dyn_count == 0) goto done; getmicrotime(&timenow); + /* + * We make a chain of packets to go out here -- not deferring + * until after we drop the ipfw lock would result + * in a lock order reversal with the normal packet input -> ipfw + * call stack. + */ + m0 = NULL; + mtailp = &m0; + lck_mtx_lock(ipfw_mutex); for (i = 0 ; i < curr_dyn_buckets ; i++) { for (q = ipfw_dyn_v[i] ; q ; q = q->next ) { @@ -3324,11 +3900,27 @@ ipfw_tick(__unused void * unused) if (TIME_LEQ(q->expire, timenow.tv_sec)) continue; /* too late, rule expired */ - send_pkt(&(q->id), q->ack_rev - 1, q->ack_fwd, TH_SYN); - send_pkt(&(q->id), q->ack_fwd - 1, q->ack_rev, 0); + *mtailp = send_pkt(&(q->id), q->ack_rev - 1, q->ack_fwd, TH_SYN); + if (*mtailp != NULL) + mtailp = &(*mtailp)->m_nextpkt; + + *mtailp = send_pkt(&(q->id), q->ack_fwd - 1, q->ack_rev, 0); + if (*mtailp != NULL) + mtailp = &(*mtailp)->m_nextpkt; } } lck_mtx_unlock(ipfw_mutex); + + for (m = mnext = m0; m != NULL; m = mnext) { + struct route sro; /* fake route */ + + mnext = m->m_nextpkt; + m->m_nextpkt = NULL; + bzero (&sro, sizeof (sro)); + ip_output_list(m, 0, NULL, &sro, 0, NULL, NULL); + if (sro.ro_rt) + RTFREE(sro.ro_rt); + } done: timeout(ipfw_tick, NULL, dyn_keepalive_period*hz); } diff --git a/bsd/netinet/ip_fw2.h b/bsd/netinet/ip_fw2.h index 24ef2abe6..0485bcbc2 100644 --- a/bsd/netinet/ip_fw2.h +++ b/bsd/netinet/ip_fw2.h @@ -1,3 +1,31 @@ +/* + * Copyright (c) 2008 Apple Inc. All rights reserved. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. The rights granted to you under the License + * may not be used to create, or enable the creation or redistribution of, + * unlawful or unlicensed copies of an Apple operating system, or to + * circumvent, violate, or enable the circumvention or violation of, any + * terms of an Apple operating system software license agreement. + * + * Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ + */ + /* * Copyright (c) 2002 Luigi Rizzo, Universita` di Pisa * @@ -64,8 +92,6 @@ -#if !__LP64__ - /* * The kernel representation of ipfw rules is made of a list of * 'instructions' (for all practical purposes equivalent to BPF @@ -432,6 +458,132 @@ struct _ipfw_dyn_rule { * Main firewall chains definitions and global var's definitions. */ #ifdef KERNEL + +#pragma pack(4) +struct ip_fw_32{ + u_int32_t version; /* Version of this structure. MUST be set */ + /* by clients. Should always be */ + /* set to IP_FW_CURRENT_API_VERSION. */ + user32_addr_t context; /* Context that is usable by user processes to */ + /* identify this rule. */ + user32_addr_t next; /* linked list of rules */ + user32_addr_t next_rule;/* ptr to next [skipto] rule */ + /* 'next_rule' is used to pass up 'set_disable' status */ + + u_int16_t act_ofs; /* offset of action in 32-bit units */ + u_int16_t cmd_len; /* # of 32-bit words in cmd */ + u_int16_t rulenum; /* rule number */ + u_int8_t set; /* rule set (0..31) */ + u_int32_t set_masks[2]; /* masks for manipulating sets atomically */ +#define RESVD_SET 31 /* set for default and persistent rules */ + u_int8_t _pad; /* padding */ + + /* These fields are present in all rules. */ + u_int64_t pcnt; /* Packet counter */ + u_int64_t bcnt; /* Byte counter */ + u_int32_t timestamp; /* tv_sec of last match */ + + u_int32_t reserved_1; /* reserved - set to 0 */ + u_int32_t reserved_2; /* reserved - set to 0 */ + + ipfw_insn cmd[1]; /* storage for commands */ +}; + +#pragma pack() + +struct ip_fw_64{ + u_int32_t version; /* Version of this structure. MUST be set */ + /* by clients. Should always be */ + /* set to IP_FW_CURRENT_API_VERSION. */ + __uint64_t context __attribute__((aligned(8))); /* Context that is usable by user processes to */ + /* identify this rule. */ + user64_addr_t next; /* linked list of rules */ + user64_addr_t next_rule; /* ptr to next [skipto] rule */ + /* 'next_rule' is used to pass up 'set_disable' status */ + + u_int16_t act_ofs; /* offset of action in 32-bit units */ + u_int16_t cmd_len; /* # of 32-bit words in cmd */ + u_int16_t rulenum; /* rule number */ + u_int8_t set; /* rule set (0..31) */ + u_int32_t set_masks[2]; /* masks for manipulating sets atomically */ +#define RESVD_SET 31 /* set for default and persistent rules */ + u_int8_t _pad; /* padding */ + + /* These fields are present in all rules. */ + u_int64_t pcnt __attribute__((aligned(8))); /* Packet counter */ + u_int64_t bcnt __attribute__((aligned(8))); /* Byte counter */ + u_int32_t timestamp; /* tv_sec of last match */ + + u_int32_t reserved_1; /* reserved - set to 0 */ + u_int32_t reserved_2; /* reserved - set to 0 */ + + ipfw_insn cmd[1]; /* storage for commands */ +}; + + +typedef struct _ipfw_dyn_rule_64 ipfw_dyn_rule_64; +typedef struct _ipfw_dyn_rule_32 ipfw_dyn_rule_32; + +#pragma pack(4) +struct _ipfw_dyn_rule_32 { + user32_addr_t next; /* linked list of rules. */ + user32_addr_t rule; /* pointer to rule */ + /* 'rule' is used to pass up the rule number (from the parent) */ + + user32_addr_t parent; /* pointer to parent rule */ + u_int64_t pcnt; /* packet match counter */ + u_int64_t bcnt; /* byte match counter */ + struct ipfw_flow_id id; /* (masked) flow id */ + u_int32_t expire; /* expire time */ + u_int32_t bucket; /* which bucket in hash table */ + u_int32_t state; /* state of this rule (typically a + * combination of TCP flags) + */ + u_int32_t ack_fwd; /* most recent ACKs in forward */ + u_int32_t ack_rev; /* and reverse directions (used */ + /* to generate keepalives) */ + u_int16_t dyn_type; /* rule type */ + u_int16_t count; /* refcount */ +}; + +#pragma pack() + +struct _ipfw_dyn_rule_64 { + user64_addr_t next; /* linked list of rules. */ + user64_addr_t rule; /* pointer to rule */ + /* 'rule' is used to pass up the rule number (from the parent) */ + + user64_addr_t parent; /* pointer to parent rule */ + u_int64_t pcnt; /* packet match counter */ + u_int64_t bcnt; /* byte match counter */ + struct ipfw_flow_id id; /* (masked) flow id */ + u_int32_t expire; /* expire time */ + u_int32_t bucket; /* which bucket in hash table */ + u_int32_t state; /* state of this rule (typically a + * combination of TCP flags) + */ + u_int32_t ack_fwd; /* most recent ACKs in forward */ + u_int32_t ack_rev; /* and reverse directions (used */ + /* to generate keepalives) */ + u_int16_t dyn_type; /* rule type */ + u_int16_t count; /* refcount */ +}; + + +typedef struct _ipfw_insn_pipe_64 { + ipfw_insn o; + user64_addr_t pipe_ptr; /* XXX */ +} ipfw_insn_pipe_64; + +typedef struct _ipfw_insn_pipe_32{ + ipfw_insn o; + user32_addr_t pipe_ptr; /* XXX */ +} ipfw_insn_pipe_32; + + +#endif /* KERNEL */ + +#ifdef KERNEL #if IPFIREWALL #define IP_FW_PORT_DYNT_FLAG 0x10000 @@ -453,7 +605,7 @@ struct ip_fw_args { struct route *ro; /* for dummynet */ struct sockaddr_in *dst; /* for dummynet */ int flags; /* for dummynet */ - struct ip_out_args *ipoa; /* for dummynet */ + struct ip_out_args *ipoa; /* for dummynet */ struct ipfw_flow_id f_id; /* grabbed from IP header */ u_int16_t divert_rule; /* divert cookie */ @@ -482,5 +634,4 @@ extern int fw_enable; #endif /* IPFIREWALL */ #endif /* KERNEL */ -#endif /* !__LP64__ */ #endif /* _IPFW2_H */ diff --git a/bsd/netinet/ip_fw2_compat.c b/bsd/netinet/ip_fw2_compat.c index eb7eb43fe..766fa8fc8 100644 --- a/bsd/netinet/ip_fw2_compat.c +++ b/bsd/netinet/ip_fw2_compat.c @@ -67,6 +67,8 @@ struct _s_x { #define VERSION_ONE_STR "IP_FW_VERSION_1" #define CURRENT_API_VERSION_STR "IP_FW_CURRENT_API_VERSION" +#if FW2_DEBUG_VERBOSE + static struct _s_x f_tcpflags[] = { { "syn", TH_SYN }, { "fin", TH_FIN }, @@ -90,6 +92,7 @@ static struct _s_x f_tcpopts[] = { { NULL, 0 } }; + /* * IP options span the range 0 to 255 so we need to remap them * (though in fact only the low 5 bits are significant). @@ -123,6 +126,10 @@ static struct _s_x limit_masks[] = { {NULL, 0} }; +#endif /* !FW2_DEBUG_VERBOSE */ + +#if 0 /* version #1 */ + static void ipfw_print_fw_flags(u_int flags) { @@ -257,115 +264,6 @@ print_fw_version(u_int32_t api_version) } } -static void -ipfw_print_vers1_struct(struct ip_fw_compat *vers1_rule) -{ - char ipv4str[MAX_IPv4_STR_LEN]; - print_fw_version(vers1_rule->version); - printf("Rule #%d\n", vers1_rule->fw_number); - - ipfw_print_fw_flags(vers1_rule->fw_flg); - - printf("fw_pcnt: %llu\n", vers1_rule->fw_pcnt); - printf("fw_bcnt: %llu\n", vers1_rule->fw_bcnt); - printf("fw_src: %s\n", - inet_ntop(AF_INET, &vers1_rule->fw_src, ipv4str, sizeof(ipv4str))); - printf("fw_dst: %s\n", - inet_ntop(AF_INET, &vers1_rule->fw_dst, ipv4str, sizeof(ipv4str))); - printf("fw_smsk: %s\n", - inet_ntop(AF_INET, &vers1_rule->fw_smsk, ipv4str, sizeof(ipv4str))); - printf("fw_dmsk: %s\n", - inet_ntop(AF_INET, &vers1_rule->fw_dmsk, ipv4str, sizeof(ipv4str))); - - if (vers1_rule->fw_flg & IP_FW_F_ICMPBIT_COMPAT) { - int type_index; - int first = 1; - - printf(" icmptype"); - - for (type_index = 0; - type_index < (IP_FW_ICMPTYPES_DIM_COMPAT * sizeof(unsigned) * 8); - ++type_index) { - if (vers1_rule->fw_uar_compat.fw_icmptypes[type_index / (sizeof(unsigned) * 8)] & - (1U << (type_index % (sizeof(unsigned) * 8)))) { - printf("%c%d", first == 1 ? ' ' : ',', type_index); - first = 0; - } - } - } else { - int i, nsp, ndp; - - nsp = IP_FW_GETNSRCP_COMPAT(vers1_rule); - for (i = 0; i < nsp; i++) { - printf("source ports: fw_uar_compat.fw_pts: %04x", vers1_rule->fw_uar_compat.fw_pts[i]); - if (i == 0 && (vers1_rule->fw_flg & IP_FW_F_SRNG_COMPAT)) - printf("-"); - else if (i == 0 && (vers1_rule->fw_flg & IP_FW_F_SMSK_COMPAT)) - printf(":"); - else - printf(","); - } - - printf("\n"); - - ndp = IP_FW_GETNDSTP_COMPAT(vers1_rule); - for (i = 0; i < ndp; i++) { - printf("source ports: fw_uar_compat.fw_pts: %04x", vers1_rule->fw_uar_compat.fw_pts[nsp+i]); - if (i == 0 && (vers1_rule->fw_flg & IP_FW_F_DRNG_COMPAT)) - printf("-"); - else if (i == 0 && (vers1_rule->fw_flg & IP_FW_F_DMSK_COMPAT)) - printf(":"); - else - printf(","); - } - - printf("\n"); - } - - printf("fw_ipflg: %d\n", vers1_rule->fw_ipflg); - printf("fw_ipopt: %d\n", vers1_rule->fw_ipopt); - printf("fw_ipnopt: %d\n", vers1_rule->fw_ipnopt); - printf("fw_tcpopt: %d\n", vers1_rule->fw_tcpopt); - printf("fw_tcpnopt: %d\n", vers1_rule->fw_tcpnopt); - printf("fw_tcpf: %d\n", vers1_rule->fw_tcpf); - printf("fw_tcpnf: %d\n", vers1_rule->fw_tcpnf); - printf("timestamp: %ld\n", vers1_rule->timestamp); - - if ((vers1_rule->fw_flg & IF_FW_F_VIAHACK_COMPAT) == IF_FW_F_VIAHACK_COMPAT) { - printf("fw_in_if: "); - inet_ntop(AF_INET, &vers1_rule->fw_in_if.fu_via_ip, ipv4str, - sizeof(ipv4str)); - printf("fu_via_ip: %s\n", ipv4str); - printf("fu_via_if_compat.name: %s\n", vers1_rule->fw_in_if.fu_via_if_compat.name); - printf("fu_via_if_compat.unit: %d\n", vers1_rule->fw_in_if.fu_via_if_compat.unit); - } else { - if (vers1_rule->fw_flg & IP_FW_F_IIFACE_COMPAT) { - printf("fw_in_if: "); - printf("fu_via_ip: %s\n", - inet_ntop(AF_INET, &vers1_rule->fw_in_if.fu_via_ip, ipv4str, - sizeof(ipv4str))); - printf("fu_via_if_compat.name: %s\n", vers1_rule->fw_in_if.fu_via_if_compat.name); - printf("fu_via_if_compat.unit: %d\n", vers1_rule->fw_in_if.fu_via_if_compat.unit); - } - if (vers1_rule->fw_flg & IP_FW_F_OIFACE_COMPAT) { - printf("fw_out_if: "); - printf("fu_via_ip: %s\n", - inet_ntop(AF_INET, &vers1_rule->fw_out_if.fu_via_ip, - ipv4str, sizeof(ipv4str))); - printf("fu_via_if_compat.name: %s\n", vers1_rule->fw_out_if.fu_via_if_compat.name); - printf("fu_via_if_compat.unit: %d\n", vers1_rule->fw_out_if.fu_via_if_compat.unit); - } - } - - printf("fw_prot: %d\n", vers1_rule->fw_prot); - printf("fw_nports: %d\n", vers1_rule->fw_nports); - printf("pipe_ptr: %p\n", vers1_rule->pipe_ptr); - printf("next_rule_ptr: %p\n", vers1_rule->next_rule_ptr); - printf("fw_uid: %d\n", vers1_rule->fw_uid); - printf("fw_logamount: %d\n", vers1_rule->fw_logamount); - printf("fw_loghighest: %llu\n", vers1_rule->fw_loghighest); -} - static void print_icmptypes(ipfw_insn_u32 *cmd) { @@ -489,6 +387,9 @@ print_mac(uint8_t *addr, uint8_t *mask) } } +#endif /* !version #1 */ + +#if FW2_DEBUG_VERBOSE static void ipfw_print_vers2_struct(struct ip_fw *vers2_rule) { @@ -862,6 +763,9 @@ ipfw_print_vers2_struct(struct ip_fw *vers2_rule) } /* for */ } +#endif /* !FW2_DEBUG_VERBOSE */ + + /* * helper function, updates the pointer to cmd with the length * of the current command, and also cleans up the first word of @@ -920,7 +824,7 @@ fill_compat_tcpflags(u_int32_t flags) { * sets, sets of addresses, blocks (NOT, OR) */ static void -ipfw_map_from_cmds(struct ip_fw *curr_rule, struct ip_fw_compat *compat_rule) +ipfw_map_from_cmds_32(struct ip_fw_32 *curr_rule, struct ip_fw_compat_32 *compat_rule) { int l; ipfw_insn *cmd; @@ -1200,166 +1104,714 @@ ipfw_map_from_cmds(struct ip_fw *curr_rule, struct ip_fw_compat *compat_rule) } static void -ipfw_map_from_actions(struct ip_fw *curr_rule, struct ip_fw_compat *compat_rule) +ipfw_map_from_cmds_64(struct ip_fw_64 *curr_rule, struct ip_fw_compat_64 *compat_rule) { - int l; + int l; ipfw_insn *cmd; - - for (l = curr_rule->cmd_len - curr_rule->act_ofs, cmd = ACTION_PTR(curr_rule); - l > 0 ; - l -= F_LEN(cmd), cmd += F_LEN(cmd)) { + for (l = curr_rule->act_ofs, cmd = curr_rule->cmd ; + l > 0 ; + l -= F_LEN(cmd) , cmd += F_LEN(cmd)) { + /* useful alias */ + ipfw_insn_u32 *cmd32 = (ipfw_insn_u32 *)cmd; + switch (cmd->opcode) { - case O_ACCEPT: - compat_rule->fw_flg |= IP_FW_F_ACCEPT_COMPAT; - break; - case O_COUNT: - compat_rule->fw_flg |= IP_FW_F_COUNT_COMPAT; + case O_PROTO: + /* protocol */ + compat_rule->fw_prot = cmd->arg1; break; - case O_PIPE: - compat_rule->fw_flg |= IP_FW_F_PIPE_COMPAT; - compat_rule->fw_divert_port_compat = cmd->arg1; + + case O_IP_SRC_ME: + compat_rule->fw_flg |= IP_FW_F_SME_COMPAT; + if (cmd->len & F_NOT) { + compat_rule->fw_flg |= IP_FW_F_INVSRC_COMPAT; + } break; - case O_QUEUE: - compat_rule->fw_flg |= IP_FW_F_QUEUE_COMPAT; - compat_rule->fw_divert_port_compat = cmd->arg1; + + case O_IP_SRC_MASK: + { + /* addr/mask */ + ipfw_insn_ip *ip = (ipfw_insn_ip *)cmd; + + compat_rule->fw_src = ip->addr; + compat_rule->fw_smsk = ip->mask; + if (cmd->len & F_NOT) { + compat_rule->fw_flg |= IP_FW_F_INVSRC_COMPAT; + } break; - case O_SKIPTO: - compat_rule->fw_flg |= IP_FW_F_SKIPTO_COMPAT; - compat_rule->fw_skipto_rule_compat = cmd->arg1; + } + + case O_IP_SRC: + /* one IP */ + /* source - + * for now we only deal with one address + * per rule and ignore sets of addresses + */ + compat_rule->fw_src.s_addr = cmd32->d[0]; + if (cmd->len & F_NOT) { + compat_rule->fw_flg |= IP_FW_F_INVSRC_COMPAT; + } break; - case O_DIVERT: - compat_rule->fw_flg |= IP_FW_F_DIVERT_COMPAT; - compat_rule->fw_divert_port_compat = cmd->arg1; + + case O_IP_SRCPORT: + { + /* source ports */ + ipfw_insn_u16 *ports = (ipfw_insn_u16 *)cmd; + uint16_t *p = ports->ports; + int i, j; + + /* copy list of ports */ + for (i = F_LEN(cmd) - 1, j = 0; i > 0; i--, j++, p += 2) { + if (p[0] != p[1]) { + /* this is a range */ + compat_rule->fw_flg |= IP_FW_F_SRNG_COMPAT; + compat_rule->fw_uar_compat.fw_pts[j++] = p[0]; + compat_rule->fw_uar_compat.fw_pts[j] = p[1]; + } else { + compat_rule->fw_uar_compat.fw_pts[j] = p[0]; + } + } + IP_FW_SETNSRCP_COMPAT(compat_rule, j); + break; - case O_TEE: - compat_rule->fw_flg |= IP_FW_F_TEE_COMPAT; - compat_rule->fw_divert_port_compat = cmd->arg1; + } + + case O_IP_DST_ME: + /* destination */ + compat_rule->fw_flg |= IP_FW_F_DME_COMPAT; + if (cmd->len & F_NOT) { + compat_rule->fw_flg |= IP_FW_F_INVDST_COMPAT; + } break; - case O_FORWARD_IP: + + case O_IP_DST_MASK: { - ipfw_insn_sa *p = (ipfw_insn_sa *)cmd; + /* addr/mask */ + ipfw_insn_ip *ip = (ipfw_insn_ip *)cmd; - compat_rule->fw_flg |= IP_FW_F_FWD_COMPAT; - compat_rule->fw_fwd_ip_compat.sin_len = p->sa.sin_len; - compat_rule->fw_fwd_ip_compat.sin_family = p->sa.sin_family; - compat_rule->fw_fwd_ip_compat.sin_port = p->sa.sin_port; - compat_rule->fw_fwd_ip_compat.sin_addr = p->sa.sin_addr; - + compat_rule->fw_dst = ip->addr; + compat_rule->fw_dmsk = ip->mask; + if (cmd->len & F_NOT) { + compat_rule->fw_flg |= IP_FW_F_INVDST_COMPAT; + } break; } - case O_DENY: - compat_rule->fw_flg |= IP_FW_F_DENY_COMPAT; + case O_IP_DST: + /* one IP */ + /* dest - + * for now we only deal with one address + * per rule, and ignore sets of addresses + */ + compat_rule->fw_dst.s_addr = cmd32->d[0]; + if (cmd->len & F_NOT) { + compat_rule->fw_flg |= IP_FW_F_INVDST_COMPAT; + } break; - case O_REJECT: - compat_rule->fw_flg |= IP_FW_F_REJECT_COMPAT; - compat_rule->fw_reject_code_compat = cmd->arg1; + + case O_IP_DSTPORT: + { + /* dest. ports */ + ipfw_insn_u16 *ports = (ipfw_insn_u16 *)cmd; + uint16_t *p = ports->ports; + int i, + j = IP_FW_GETNSRCP_COMPAT(compat_rule); + + /* copy list of ports */ + for (i = F_LEN(cmd) - 1; i > 0; i--, j++, p += 2) { + if (p[0] != p[1]) { + /* this is a range */ + compat_rule->fw_flg |= IP_FW_F_DRNG_COMPAT; + compat_rule->fw_uar_compat.fw_pts[j++] = p[0]; + compat_rule->fw_uar_compat.fw_pts[j] = p[1]; + } else { + compat_rule->fw_uar_compat.fw_pts[j] = p[0]; + } + } + IP_FW_SETNDSTP_COMPAT(compat_rule, (j - IP_FW_GETNSRCP_COMPAT(compat_rule))); + break; - case O_CHECK_STATE: - compat_rule->fw_flg |= IP_FW_F_CHECK_S_COMPAT; + } + + case O_LOG: + { + ipfw_insn_log *c = (ipfw_insn_log *)cmd; + + compat_rule->fw_flg |= IP_FW_F_PRN_COMPAT; + compat_rule->fw_logamount = c->max_log; break; - default: + } + case O_UID: + compat_rule->fw_flg |= IP_FW_F_UID_COMPAT; + compat_rule->fw_uid = cmd32->d[0]; break; - } - } -} - -static void -ipfw_version_latest_to_one(struct ip_fw *curr_rule, struct ip_fw_compat *rule_vers1) -{ - if (!rule_vers1) - return; - - bzero(rule_vers1, sizeof(struct ip_fw_compat)); - - rule_vers1->version = IP_FW_VERSION_1; - rule_vers1->context = curr_rule->context; - rule_vers1->fw_number = curr_rule->rulenum; - rule_vers1->fw_pcnt = curr_rule->pcnt; - rule_vers1->fw_bcnt = curr_rule->bcnt; - rule_vers1->timestamp = curr_rule->timestamp; - - /* convert actions */ - ipfw_map_from_actions(curr_rule, rule_vers1); - - /* convert commands */ - ipfw_map_from_cmds(curr_rule, rule_vers1); - -#if FW2_DEBUG_VERBOSE - ipfw_print_vers1_struct(rule_vers1); -#endif -} - -/* first convert to version one then to version zero */ -static void -ipfw_version_latest_to_zero(struct ip_fw *curr_rule, struct ip_old_fw *rule_vers0) -{ - struct ip_fw_compat rule_vers1; - - ipfw_version_latest_to_one(curr_rule, &rule_vers1); - - bzero(rule_vers0, sizeof(struct ip_old_fw)); - bcopy(&rule_vers1.fw_uar_compat, &rule_vers0->fw_uar, sizeof(rule_vers1.fw_uar_compat)); - bcopy(&rule_vers1.fw_in_if, &rule_vers0->fw_in_if, sizeof(rule_vers1.fw_in_if)); - bcopy(&rule_vers1.fw_out_if, &rule_vers0->fw_out_if, sizeof(rule_vers1.fw_out_if)); - bcopy(&rule_vers1.fw_un_compat, &rule_vers0->fw_un, sizeof(rule_vers1.fw_un_compat)); - - rule_vers0->fw_pcnt = rule_vers1.fw_pcnt; - rule_vers0->fw_bcnt = rule_vers1.fw_bcnt; - rule_vers0->fw_src = rule_vers1.fw_src; - rule_vers0->fw_dst = rule_vers1.fw_dst; - rule_vers0->fw_smsk = rule_vers1.fw_smsk; - rule_vers0->fw_dmsk = rule_vers1.fw_dmsk; - rule_vers0->fw_number = rule_vers1.fw_number; - rule_vers0->fw_flg = rule_vers1.fw_flg; - rule_vers0->fw_ipopt = rule_vers1.fw_ipopt; - rule_vers0->fw_ipnopt = rule_vers1.fw_ipnopt; - rule_vers0->fw_tcpf = rule_vers1.fw_tcpf; - rule_vers0->fw_tcpnf = rule_vers1.fw_tcpnf; - rule_vers0->timestamp = rule_vers1.timestamp; - rule_vers0->fw_prot = rule_vers1.fw_prot; - rule_vers0->fw_nports = rule_vers1.fw_nports; - rule_vers0->pipe_ptr = rule_vers1.pipe_ptr; - rule_vers0->next_rule_ptr = rule_vers1.next_rule_ptr; - - if (rule_vers1.fw_ipflg && IP_FW_IF_TCPEST_COMPAT) rule_vers0->fw_tcpf |= IP_OLD_FW_TCPF_ESTAB; -} - -void -ipfw_convert_from_latest(struct ip_fw *curr_rule, void *old_rule, u_int32_t api_version) -{ - switch (api_version) { - case IP_FW_VERSION_0: - { - struct ip_old_fw *rule_vers0 = old_rule; - ipfw_version_latest_to_zero(curr_rule, rule_vers0); - break; - } - case IP_FW_VERSION_1: - { - struct ip_fw_compat *rule_vers1 = old_rule; + case O_IN: + if (cmd->len & F_NOT) { + compat_rule->fw_flg |= IP_FW_F_OUT_COMPAT; + } else { + compat_rule->fw_flg |= IP_FW_F_IN_COMPAT; + } + break; - ipfw_version_latest_to_one(curr_rule, rule_vers1); - break; - } - case IP_FW_CURRENT_API_VERSION: - /* ipfw2 for now, don't need to do anything */ - break; - - default: - /* unknown version */ - break; - } -} - - -/* ******************************************** - * *********** Convert to Latest ************** - * ********************************************/ + case O_KEEP_STATE: + compat_rule->fw_flg |= IP_FW_F_KEEP_S_COMPAT; + break; + + case O_LAYER2: + compat_rule->fw_flg |= IP_FW_BRIDGED_COMPAT; + break; + + case O_XMIT: + { + ipfw_insn_if *ifcmd = (ipfw_insn_if *)cmd; + union ip_fw_if_compat ifu; + + if ((ifcmd->o.len == 0) && (ifcmd->name[0] == '\0')) { + /* any */ + compat_rule->fw_flg |= IP_FW_F_OIFACE_COMPAT; + ifu.fu_via_ip.s_addr = 0; + } + else if (ifcmd->p.ip.s_addr != 0) { + compat_rule->fw_flg |= IP_FW_F_OIFACE_COMPAT; + ifu.fu_via_ip = ifcmd->p.ip; + } else { + compat_rule->fw_flg |= IP_FW_F_OIFNAME_COMPAT; + strncpy(ifu.fu_via_if_compat.name, ifcmd->name, sizeof(ifu.fu_via_if_compat.name)); + ifu.fu_via_if_compat.unit = ifcmd->p.unit; + } + compat_rule->fw_out_if = ifu; + + break; + } + + case O_RECV: + { + ipfw_insn_if *ifcmd = (ipfw_insn_if *)cmd; + union ip_fw_if_compat ifu; + + if ((ifcmd->o.len == 0) && (ifcmd->name[0] == '\0')) { + /* any */ + compat_rule->fw_flg |= IP_FW_F_IIFACE_COMPAT; + ifu.fu_via_ip.s_addr = 0; + } + else if (ifcmd->p.ip.s_addr != 0) { + compat_rule->fw_flg |= IP_FW_F_IIFACE_COMPAT; + ifu.fu_via_ip = ifcmd->p.ip; + } else { + compat_rule->fw_flg |= IP_FW_F_IIFNAME_COMPAT; + strncpy(ifu.fu_via_if_compat.name, ifcmd->name, sizeof(ifu.fu_via_if_compat.name)); + ifu.fu_via_if_compat.unit = ifcmd->p.unit; + } + compat_rule->fw_in_if = ifu; + + break; + } + + case O_VIA: + { + ipfw_insn_if *ifcmd = (ipfw_insn_if *)cmd; + union ip_fw_if_compat ifu; + + if ((ifcmd->o.len == 0) && (ifcmd->name[0] == '\0')) { + /* any */ + ifu.fu_via_ip.s_addr = 0; + } + else if (ifcmd->name[0] != '\0') { + compat_rule->fw_flg |= IP_FW_F_IIFNAME_COMPAT; + strncpy(ifu.fu_via_if_compat.name, ifcmd->name, sizeof(ifu.fu_via_if_compat.name)); + ifu.fu_via_if_compat.unit = ifcmd->p.unit; + } else { + ifu.fu_via_ip = ifcmd->p.ip; + } + compat_rule->fw_flg |= IF_FW_F_VIAHACK_COMPAT; + compat_rule->fw_out_if = compat_rule->fw_in_if = ifu; + + break; + } + + case O_FRAG: + compat_rule->fw_flg |= IP_FW_F_FRAG_COMPAT; + break; + + case O_IPOPT: + /* IP options */ + compat_rule->fw_ipopt = (cmd->arg1 & 0xff); + compat_rule->fw_ipnopt = ((cmd->arg1 >> 8) & 0xff); + break; + + case O_TCPFLAGS: + /* check for "setup" */ + if ((cmd->arg1 & 0xff) == TH_SYN && + ((cmd->arg1 >> 8) & 0xff) == TH_ACK) { + compat_rule->fw_tcpf = IP_FW_TCPF_SYN_COMPAT; + compat_rule->fw_tcpnf = IP_FW_TCPF_ACK_COMPAT; + } + else { + compat_rule->fw_tcpf = fill_compat_tcpflags(cmd->arg1 & 0xff); + compat_rule->fw_tcpnf = fill_compat_tcpflags((cmd->arg1 >> 8) & 0xff); + } + break; + + case O_TCPOPTS: + /* TCP options */ + compat_rule->fw_tcpopt = (cmd->arg1 & 0xff); + compat_rule->fw_tcpnopt = ((cmd->arg1 >> 8) & 0xff); + break; + + case O_ESTAB: + compat_rule->fw_ipflg |= IP_FW_IF_TCPEST_COMPAT; + break; + + case O_ICMPTYPE: + { + /* ICMP */ + /* XXX: check this */ + int i, type; + + compat_rule->fw_flg |= IP_FW_F_ICMPBIT_COMPAT; + for (i = 0; i < sizeof(uint32_t) ; i++) { + type = cmd32->d[0] & i; + + compat_rule->fw_uar_compat.fw_icmptypes[type / (sizeof(unsigned) * 8)] |= + 1 << (type % (sizeof(unsigned) * 8)); + } + break; + } + default: + break; + } /* switch */ + } /* for */ +} + +static void +ipfw_map_from_actions_32(struct ip_fw_32 *curr_rule, struct ip_fw_compat_32 *compat_rule) +{ + int l; + ipfw_insn *cmd; + + for (l = curr_rule->cmd_len - curr_rule->act_ofs, cmd = ACTION_PTR(curr_rule); + l > 0 ; + l -= F_LEN(cmd), cmd += F_LEN(cmd)) { + switch (cmd->opcode) { + case O_ACCEPT: + compat_rule->fw_flg |= IP_FW_F_ACCEPT_COMPAT; + break; + case O_COUNT: + compat_rule->fw_flg |= IP_FW_F_COUNT_COMPAT; + break; + case O_PIPE: + compat_rule->fw_flg |= IP_FW_F_PIPE_COMPAT; + compat_rule->fw_divert_port_compat = cmd->arg1; + break; + case O_QUEUE: + compat_rule->fw_flg |= IP_FW_F_QUEUE_COMPAT; + compat_rule->fw_divert_port_compat = cmd->arg1; + break; + case O_SKIPTO: + compat_rule->fw_flg |= IP_FW_F_SKIPTO_COMPAT; + compat_rule->fw_skipto_rule_compat = cmd->arg1; + break; + case O_DIVERT: + compat_rule->fw_flg |= IP_FW_F_DIVERT_COMPAT; + compat_rule->fw_divert_port_compat = cmd->arg1; + break; + case O_TEE: + compat_rule->fw_flg |= IP_FW_F_TEE_COMPAT; + compat_rule->fw_divert_port_compat = cmd->arg1; + break; + case O_FORWARD_IP: + { + ipfw_insn_sa *p = (ipfw_insn_sa *)cmd; + + compat_rule->fw_flg |= IP_FW_F_FWD_COMPAT; + compat_rule->fw_fwd_ip_compat.sin_len = p->sa.sin_len; + compat_rule->fw_fwd_ip_compat.sin_family = p->sa.sin_family; + compat_rule->fw_fwd_ip_compat.sin_port = p->sa.sin_port; + compat_rule->fw_fwd_ip_compat.sin_addr = p->sa.sin_addr; + + break; + } + case O_DENY: + compat_rule->fw_flg |= IP_FW_F_DENY_COMPAT; + break; + case O_REJECT: + compat_rule->fw_flg |= IP_FW_F_REJECT_COMPAT; + compat_rule->fw_reject_code_compat = cmd->arg1; + break; + case O_CHECK_STATE: + compat_rule->fw_flg |= IP_FW_F_CHECK_S_COMPAT; + break; + default: + break; + } + } +} + +static void +ipfw_map_from_actions_64(struct ip_fw_64 *curr_rule, struct ip_fw_compat_64 *compat_rule) +{ + int l; + ipfw_insn *cmd; + for (l = curr_rule->cmd_len - curr_rule->act_ofs, cmd = ACTION_PTR(curr_rule); + l > 0 ; + l -= F_LEN(cmd), cmd += F_LEN(cmd)) { + switch (cmd->opcode) { + case O_ACCEPT: + compat_rule->fw_flg |= IP_FW_F_ACCEPT_COMPAT; + break; + case O_COUNT: + compat_rule->fw_flg |= IP_FW_F_COUNT_COMPAT; + break; + case O_PIPE: + compat_rule->fw_flg |= IP_FW_F_PIPE_COMPAT; + compat_rule->fw_divert_port_compat = cmd->arg1; + break; + case O_QUEUE: + compat_rule->fw_flg |= IP_FW_F_QUEUE_COMPAT; + compat_rule->fw_divert_port_compat = cmd->arg1; + break; + case O_SKIPTO: + compat_rule->fw_flg |= IP_FW_F_SKIPTO_COMPAT; + compat_rule->fw_skipto_rule_compat = cmd->arg1; + break; + case O_DIVERT: + compat_rule->fw_flg |= IP_FW_F_DIVERT_COMPAT; + compat_rule->fw_divert_port_compat = cmd->arg1; + break; + case O_TEE: + compat_rule->fw_flg |= IP_FW_F_TEE_COMPAT; + compat_rule->fw_divert_port_compat = cmd->arg1; + break; + case O_FORWARD_IP: + { + ipfw_insn_sa *p = (ipfw_insn_sa *)cmd; + + compat_rule->fw_flg |= IP_FW_F_FWD_COMPAT; + compat_rule->fw_fwd_ip_compat.sin_len = p->sa.sin_len; + compat_rule->fw_fwd_ip_compat.sin_family = p->sa.sin_family; + compat_rule->fw_fwd_ip_compat.sin_port = p->sa.sin_port; + compat_rule->fw_fwd_ip_compat.sin_addr = p->sa.sin_addr; + + break; + } + case O_DENY: + compat_rule->fw_flg |= IP_FW_F_DENY_COMPAT; + break; + case O_REJECT: + compat_rule->fw_flg |= IP_FW_F_REJECT_COMPAT; + compat_rule->fw_reject_code_compat = cmd->arg1; + break; + case O_CHECK_STATE: + compat_rule->fw_flg |= IP_FW_F_CHECK_S_COMPAT; + break; + default: + break; + } + } +} + +static void +ipfw_version_latest_to_one_32(struct ip_fw_32 *curr_rule, struct ip_fw_compat_32 *rule_vers1) +{ + if (!rule_vers1) + return; + + bzero(rule_vers1, sizeof(struct ip_fw_compat)); + + rule_vers1->version = IP_FW_VERSION_1; + rule_vers1->context = CAST_DOWN_EXPLICIT(user32_addr_t,curr_rule->context); + rule_vers1->fw_number = curr_rule->rulenum; + rule_vers1->fw_pcnt = curr_rule->pcnt; + rule_vers1->fw_bcnt = curr_rule->bcnt; + rule_vers1->timestamp = curr_rule->timestamp; + + /* convert actions */ + ipfw_map_from_actions_32(curr_rule, rule_vers1); + + /* convert commands */ + ipfw_map_from_cmds_32(curr_rule, rule_vers1); + +#if FW2_DEBUG_VERBOSE + ipfw_print_vers1_struct_32(rule_vers1); +#endif +} + +static void +ipfw_version_latest_to_one_64(struct ip_fw_64 *curr_rule, struct ip_fw_compat_64 *rule_vers1) +{ + if (!rule_vers1) + return; + + bzero(rule_vers1, sizeof(struct ip_fw_compat)); + + rule_vers1->version = IP_FW_VERSION_1; + rule_vers1->context = CAST_DOWN_EXPLICIT(__uint64_t, curr_rule->context); + rule_vers1->fw_number = curr_rule->rulenum; + rule_vers1->fw_pcnt = curr_rule->pcnt; + rule_vers1->fw_bcnt = curr_rule->bcnt; + rule_vers1->timestamp = curr_rule->timestamp; + + /* convert actions */ + ipfw_map_from_actions_64(curr_rule, rule_vers1); + + /* convert commands */ + ipfw_map_from_cmds_64(curr_rule, rule_vers1); + +#if FW2_DEBUG_VERBOSE + ipfw_print_vers1_struct_64(rule_vers1); +#endif +} + +/* first convert to version one then to version zero */ +static void +ipfw_version_latest_to_zero(struct ip_fw *curr_rule, struct ip_old_fw *rule_vers0, int is64user) +{ + + if ( is64user ){ + struct ip_fw_compat_64 rule_vers1; + ipfw_version_latest_to_one_64((struct ip_fw_64*)curr_rule, &rule_vers1); + bzero(rule_vers0, sizeof(struct ip_old_fw)); + bcopy(&rule_vers1.fw_uar_compat, &rule_vers0->fw_uar, sizeof(rule_vers1.fw_uar_compat)); + bcopy(&rule_vers1.fw_in_if, &rule_vers0->fw_in_if, sizeof(rule_vers1.fw_in_if)); + bcopy(&rule_vers1.fw_out_if, &rule_vers0->fw_out_if, sizeof(rule_vers1.fw_out_if)); + bcopy(&rule_vers1.fw_un_compat, &rule_vers0->fw_un, sizeof(rule_vers1.fw_un_compat)); + rule_vers0->fw_pcnt = rule_vers1.fw_pcnt; + rule_vers0->fw_bcnt = rule_vers1.fw_bcnt; + rule_vers0->fw_src = rule_vers1.fw_src; + rule_vers0->fw_dst = rule_vers1.fw_dst; + rule_vers0->fw_smsk = rule_vers1.fw_smsk; + rule_vers0->fw_dmsk = rule_vers1.fw_dmsk; + rule_vers0->fw_number = rule_vers1.fw_number; + rule_vers0->fw_flg = rule_vers1.fw_flg; + rule_vers0->fw_ipopt = rule_vers1.fw_ipopt; + rule_vers0->fw_ipnopt = rule_vers1.fw_ipnopt; + rule_vers0->fw_tcpf = rule_vers1.fw_tcpf; + rule_vers0->fw_tcpnf = rule_vers1.fw_tcpnf; + rule_vers0->timestamp = rule_vers1.timestamp; + rule_vers0->fw_prot = rule_vers1.fw_prot; + rule_vers0->fw_nports = rule_vers1.fw_nports; + rule_vers0->pipe_ptr = CAST_DOWN_EXPLICIT(void*, rule_vers1.pipe_ptr); + rule_vers0->next_rule_ptr = CAST_DOWN_EXPLICIT(void*, rule_vers1.next_rule_ptr); + + if (rule_vers1.fw_ipflg && IP_FW_IF_TCPEST_COMPAT) rule_vers0->fw_tcpf |= IP_OLD_FW_TCPF_ESTAB; + } + else { + struct ip_fw_compat_32 rule_vers1; + ipfw_version_latest_to_one_32( (struct ip_fw_32*)curr_rule, &rule_vers1); + bzero(rule_vers0, sizeof(struct ip_old_fw)); + bcopy(&rule_vers1.fw_uar_compat, &rule_vers0->fw_uar, sizeof(rule_vers1.fw_uar_compat)); + bcopy(&rule_vers1.fw_in_if, &rule_vers0->fw_in_if, sizeof(rule_vers1.fw_in_if)); + bcopy(&rule_vers1.fw_out_if, &rule_vers0->fw_out_if, sizeof(rule_vers1.fw_out_if)); + bcopy(&rule_vers1.fw_un_compat, &rule_vers0->fw_un, sizeof(rule_vers1.fw_un_compat)); + rule_vers0->fw_pcnt = rule_vers1.fw_pcnt; + rule_vers0->fw_bcnt = rule_vers1.fw_bcnt; + rule_vers0->fw_src = rule_vers1.fw_src; + rule_vers0->fw_dst = rule_vers1.fw_dst; + rule_vers0->fw_smsk = rule_vers1.fw_smsk; + rule_vers0->fw_dmsk = rule_vers1.fw_dmsk; + rule_vers0->fw_number = rule_vers1.fw_number; + rule_vers0->fw_flg = rule_vers1.fw_flg; + rule_vers0->fw_ipopt = rule_vers1.fw_ipopt; + rule_vers0->fw_ipnopt = rule_vers1.fw_ipnopt; + rule_vers0->fw_tcpf = rule_vers1.fw_tcpf; + rule_vers0->fw_tcpnf = rule_vers1.fw_tcpnf; + rule_vers0->timestamp = rule_vers1.timestamp; + rule_vers0->fw_prot = rule_vers1.fw_prot; + rule_vers0->fw_nports = rule_vers1.fw_nports; + rule_vers0->pipe_ptr = CAST_DOWN_EXPLICIT(void*, rule_vers1.pipe_ptr); + rule_vers0->next_rule_ptr = CAST_DOWN_EXPLICIT(void*, rule_vers1.next_rule_ptr); + + if (rule_vers1.fw_ipflg && IP_FW_IF_TCPEST_COMPAT) rule_vers0->fw_tcpf |= IP_OLD_FW_TCPF_ESTAB; + } + +} + +void +ipfw_convert_from_latest(struct ip_fw *curr_rule, void *old_rule, u_int32_t api_version, int is64user) +{ + switch (api_version) { + case IP_FW_VERSION_0: + { + struct ip_old_fw *rule_vers0 = old_rule; + + ipfw_version_latest_to_zero(curr_rule, rule_vers0, is64user); + break; + } + case IP_FW_VERSION_1: + { + if ( is64user ) + ipfw_version_latest_to_one_64((struct ip_fw_64*)curr_rule, (struct ip_fw_compat_64 *)old_rule); + else + ipfw_version_latest_to_one_32((struct ip_fw_32*)curr_rule, (struct ip_fw_compat_32 *)old_rule); + + break; + } + case IP_FW_CURRENT_API_VERSION: + /* ipfw2 for now, don't need to do anything */ + break; + + default: + /* unknown version */ + break; + } +} + + +/* ******************************************** + * *********** Convert to Latest ************** + * ********************************************/ /* from ip_fw.c */ static int -ipfw_check_vers1_struct(struct ip_fw_compat *frwl) +ipfw_check_vers1_struct_32(struct ip_fw_compat_32 *frwl) +{ + /* Check for invalid flag bits */ + if ((frwl->fw_flg & ~IP_FW_F_MASK_COMPAT) != 0) { + /* + printf(("%s undefined flag bits set (flags=%x)\n", + err_prefix, frwl->fw_flg)); + */ + return (EINVAL); + } + if (frwl->fw_flg == IP_FW_F_CHECK_S_COMPAT) { + /* check-state */ + return 0 ; + } + /* Must apply to incoming or outgoing (or both) */ + if (!(frwl->fw_flg & (IP_FW_F_IN_COMPAT | IP_FW_F_OUT_COMPAT))) { + /* + printf(("%s neither in nor out\n", err_prefix)); + */ + return (EINVAL); + } + /* Empty interface name is no good */ + if (((frwl->fw_flg & IP_FW_F_IIFNAME_COMPAT) + && !*frwl->fw_in_if.fu_via_if_compat.name) + || ((frwl->fw_flg & IP_FW_F_OIFNAME_COMPAT) + && !*frwl->fw_out_if.fu_via_if_compat.name)) { + /* + printf(("%s empty interface name\n", err_prefix)); + */ + return (EINVAL); + } + /* Sanity check interface matching */ + if ((frwl->fw_flg & IF_FW_F_VIAHACK_COMPAT) == IF_FW_F_VIAHACK_COMPAT) { + ; /* allow "via" backwards compatibility */ + } else if ((frwl->fw_flg & IP_FW_F_IN_COMPAT) + && (frwl->fw_flg & IP_FW_F_OIFACE_COMPAT)) { + /* + printf(("%s outgoing interface check on incoming\n", + err_prefix)); + */ + return (EINVAL); + } + /* Sanity check port ranges */ + if ((frwl->fw_flg & IP_FW_F_SRNG_COMPAT) && IP_FW_GETNSRCP_COMPAT(frwl) < 2) { + /* + printf(("%s src range set but n_src_p=%d\n", + err_prefix, IP_FW_GETNSRCP_COMPAT(frwl))); + */ + return (EINVAL); + } + if ((frwl->fw_flg & IP_FW_F_DRNG_COMPAT) && IP_FW_GETNDSTP_COMPAT(frwl) < 2) { + /* + printf(("%s dst range set but n_dst_p=%d\n", + err_prefix, IP_FW_GETNDSTP_COMPAT(frwl))); + */ + return (EINVAL); + } + if (IP_FW_GETNSRCP_COMPAT(frwl) + IP_FW_GETNDSTP_COMPAT(frwl) > IP_FW_MAX_PORTS_COMPAT) { + /* + printf(("%s too many ports (%d+%d)\n", + err_prefix, IP_FW_GETNSRCP_COMPAT(frwl), IP_FW_GETNDSTP_COMPAT(frwl))); + */ + return (EINVAL); + } + /* + * Protocols other than TCP/UDP don't use port range + */ + if ((frwl->fw_prot != IPPROTO_TCP) && + (frwl->fw_prot != IPPROTO_UDP) && + (IP_FW_GETNSRCP_COMPAT(frwl) || IP_FW_GETNDSTP_COMPAT(frwl))) { + /* + printf(("%s port(s) specified for non TCP/UDP rule\n", + err_prefix)); + */ + return (EINVAL); + } + + /* + * Rather than modify the entry to make such entries work, + * we reject this rule and require user level utilities + * to enforce whatever policy they deem appropriate. + */ + if ((frwl->fw_src.s_addr & (~frwl->fw_smsk.s_addr)) || + (frwl->fw_dst.s_addr & (~frwl->fw_dmsk.s_addr))) { + /* + printf(("%s rule never matches\n", err_prefix)); + */ + return (EINVAL); + } + + if ((frwl->fw_flg & IP_FW_F_FRAG_COMPAT) && + (frwl->fw_prot == IPPROTO_UDP || frwl->fw_prot == IPPROTO_TCP)) { + if (frwl->fw_nports) { + /* + printf(("%s cannot mix 'frag' and ports\n", err_prefix)); + */ + return (EINVAL); + } + if (frwl->fw_prot == IPPROTO_TCP && + frwl->fw_tcpf != frwl->fw_tcpnf) { + /* + printf(("%s cannot mix 'frag' and TCP flags\n", err_prefix)); + */ + return (EINVAL); + } + } + + /* Check command specific stuff */ + switch (frwl->fw_flg & IP_FW_F_COMMAND_COMPAT) + { + case IP_FW_F_REJECT_COMPAT: + if (frwl->fw_reject_code_compat >= 0x100 + && !(frwl->fw_prot == IPPROTO_TCP + && frwl->fw_reject_code_compat == IP_FW_REJECT_RST_COMPAT)) { + /* + printf(("%s unknown reject code\n", err_prefix)); + */ + return (EINVAL); + } + break; + case IP_FW_F_DIVERT_COMPAT: /* Diverting to port zero is invalid */ + case IP_FW_F_TEE_COMPAT: + case IP_FW_F_PIPE_COMPAT: /* piping through 0 is invalid */ + case IP_FW_F_QUEUE_COMPAT: /* piping through 0 is invalid */ + if (frwl->fw_divert_port_compat == 0) { + /* + printf(("%s can't divert to port 0\n", err_prefix)); + */ + return (EINVAL); + } + break; + case IP_FW_F_DENY_COMPAT: + case IP_FW_F_ACCEPT_COMPAT: + case IP_FW_F_COUNT_COMPAT: + case IP_FW_F_SKIPTO_COMPAT: + case IP_FW_F_FWD_COMPAT: + case IP_FW_F_UID_COMPAT: + break; + default: + /* + printf(("%s invalid command\n", err_prefix)); + */ + return (EINVAL); + } + + return 0; +} + +static int +ipfw_check_vers1_struct_64(struct ip_fw_compat_64 *frwl) { /* Check for invalid flag bits */ if ((frwl->fw_flg & ~IP_FW_F_MASK_COMPAT) != 0) { @@ -1367,148 +1819,706 @@ ipfw_check_vers1_struct(struct ip_fw_compat *frwl) printf(("%s undefined flag bits set (flags=%x)\n", err_prefix, frwl->fw_flg)); */ + + return (EINVAL); + } + if (frwl->fw_flg == IP_FW_F_CHECK_S_COMPAT) { + /* check-state */ + return 0 ; + } + /* Must apply to incoming or outgoing (or both) */ + if (!(frwl->fw_flg & (IP_FW_F_IN_COMPAT | IP_FW_F_OUT_COMPAT))) { + /* + printf(("%s neither in nor out\n", err_prefix)); + */ + + return (EINVAL); + } + /* Empty interface name is no good */ + if (((frwl->fw_flg & IP_FW_F_IIFNAME_COMPAT) + && !*frwl->fw_in_if.fu_via_if_compat.name) + || ((frwl->fw_flg & IP_FW_F_OIFNAME_COMPAT) + && !*frwl->fw_out_if.fu_via_if_compat.name)) { + /* + printf(("%s empty interface name\n", err_prefix)); + */ + + return (EINVAL); + } + /* Sanity check interface matching */ + if ((frwl->fw_flg & IF_FW_F_VIAHACK_COMPAT) == IF_FW_F_VIAHACK_COMPAT) { + ; /* allow "via" backwards compatibility */ + } else if ((frwl->fw_flg & IP_FW_F_IN_COMPAT) + && (frwl->fw_flg & IP_FW_F_OIFACE_COMPAT)) { + /* + printf(("%s outgoing interface check on incoming\n", + err_prefix)); + */ + + return (EINVAL); + } + /* Sanity check port ranges */ + if ((frwl->fw_flg & IP_FW_F_SRNG_COMPAT) && IP_FW_GETNSRCP_COMPAT(frwl) < 2) { + /* + printf(("%s src range set but n_src_p=%d\n", + err_prefix, IP_FW_GETNSRCP_COMPAT(frwl))); + */ + + return (EINVAL); + } + if ((frwl->fw_flg & IP_FW_F_DRNG_COMPAT) && IP_FW_GETNDSTP_COMPAT(frwl) < 2) { + /* + printf(("%s dst range set but n_dst_p=%d\n", + err_prefix, IP_FW_GETNDSTP_COMPAT(frwl))); + */ + + return (EINVAL); + } + if (IP_FW_GETNSRCP_COMPAT(frwl) + IP_FW_GETNDSTP_COMPAT(frwl) > IP_FW_MAX_PORTS_COMPAT) { + /* + printf(("%s too many ports (%d+%d)\n", + err_prefix, IP_FW_GETNSRCP_COMPAT(frwl), IP_FW_GETNDSTP_COMPAT(frwl))); + */ + + return (EINVAL); + } + /* + * Protocols other than TCP/UDP don't use port range + */ + if ((frwl->fw_prot != IPPROTO_TCP) && + (frwl->fw_prot != IPPROTO_UDP) && + (IP_FW_GETNSRCP_COMPAT(frwl) || IP_FW_GETNDSTP_COMPAT(frwl))) { + /* + printf(("%s port(s) specified for non TCP/UDP rule\n", + err_prefix)); + */ + + return (EINVAL); + } + + /* + * Rather than modify the entry to make such entries work, + * we reject this rule and require user level utilities + * to enforce whatever policy they deem appropriate. + */ + if ((frwl->fw_src.s_addr & (~frwl->fw_smsk.s_addr)) || + (frwl->fw_dst.s_addr & (~frwl->fw_dmsk.s_addr))) { + /* + printf(("%s rule never matches\n", err_prefix)); + */ + + return (EINVAL); + } + + if ((frwl->fw_flg & IP_FW_F_FRAG_COMPAT) && + (frwl->fw_prot == IPPROTO_UDP || frwl->fw_prot == IPPROTO_TCP)) { + if (frwl->fw_nports) { + /* + printf(("%s cannot mix 'frag' and ports\n", err_prefix)); + */ + + return (EINVAL); + } + if (frwl->fw_prot == IPPROTO_TCP && + frwl->fw_tcpf != frwl->fw_tcpnf) { + /* + printf(("%s cannot mix 'frag' and TCP flags\n", err_prefix)); + */ + + return (EINVAL); + } + } + + /* Check command specific stuff */ + switch (frwl->fw_flg & IP_FW_F_COMMAND_COMPAT) + { + case IP_FW_F_REJECT_COMPAT: + if (frwl->fw_reject_code_compat >= 0x100 + && !(frwl->fw_prot == IPPROTO_TCP + && frwl->fw_reject_code_compat == IP_FW_REJECT_RST_COMPAT)) { + /* + printf(("%s unknown reject code\n", err_prefix)); + */ + + return (EINVAL); + } + break; + case IP_FW_F_DIVERT_COMPAT: /* Diverting to port zero is invalid */ + case IP_FW_F_TEE_COMPAT: + case IP_FW_F_PIPE_COMPAT: /* piping through 0 is invalid */ + case IP_FW_F_QUEUE_COMPAT: /* piping through 0 is invalid */ + if (frwl->fw_divert_port_compat == 0) { + /* + printf(("%s can't divert to port 0\n", err_prefix)); + */ + + return (EINVAL); + } + break; + case IP_FW_F_DENY_COMPAT: + case IP_FW_F_ACCEPT_COMPAT: + case IP_FW_F_COUNT_COMPAT: + case IP_FW_F_SKIPTO_COMPAT: + case IP_FW_F_FWD_COMPAT: + case IP_FW_F_UID_COMPAT: + break; + default: + /* + printf(("%s invalid command\n", err_prefix)); + */ + return (EINVAL); } - if (frwl->fw_flg == IP_FW_F_CHECK_S_COMPAT) { - /* check-state */ - return 0 ; + + return 0; +} + +static void +ipfw_convert_to_cmds_32(struct ip_fw *curr_rule, struct ip_fw_compat_32 *compat_rule) +{ + int k; + uint32_t actbuf[255], cmdbuf[255]; + ipfw_insn *action, *cmd, *src, *dst; + ipfw_insn *have_state = NULL; /* track check-state or keep-state */ + + if (!compat_rule || !curr_rule || !(curr_rule->cmd)) { + return; + } + + /* preemptively check the old ip_fw rule to + * make sure it's valid before starting to copy stuff + */ + if (ipfw_check_vers1_struct_32(compat_rule)) { + /* bad rule */ + return; + } + + bzero(actbuf, sizeof(actbuf)); /* actions go here */ + bzero(cmdbuf, sizeof(cmdbuf)); + + /* fill in action */ + action = (ipfw_insn *)actbuf; + { + u_int flag = compat_rule->fw_flg; + + action->len = 1; /* default */ + + if (flag & IP_FW_F_CHECK_S_COMPAT) { + have_state = action; + action->opcode = O_CHECK_STATE; + } + else { + switch (flag & IP_FW_F_COMMAND_COMPAT) { + case IP_FW_F_ACCEPT_COMPAT: + action->opcode = O_ACCEPT; + break; + case IP_FW_F_COUNT_COMPAT: + action->opcode = O_COUNT; + break; + case IP_FW_F_PIPE_COMPAT: + action->opcode = O_PIPE; + action->len = F_INSN_SIZE(ipfw_insn_pipe); + action->arg1 = compat_rule->fw_divert_port_compat; + break; + case IP_FW_F_QUEUE_COMPAT: + action->opcode = O_QUEUE; + action->len = F_INSN_SIZE(ipfw_insn_pipe); + action->arg1 = compat_rule->fw_divert_port_compat; + break; + case IP_FW_F_SKIPTO_COMPAT: + action->opcode = O_SKIPTO; + action->arg1 = compat_rule->fw_skipto_rule_compat; + break; + case IP_FW_F_DIVERT_COMPAT: + action->opcode = O_DIVERT; + action->arg1 = compat_rule->fw_divert_port_compat; + break; + case IP_FW_F_TEE_COMPAT: + action->opcode = O_TEE; + action->arg1 = compat_rule->fw_divert_port_compat; + break; + case IP_FW_F_FWD_COMPAT: + { + ipfw_insn_sa *p = (ipfw_insn_sa *)action; + + action->opcode = O_FORWARD_IP; + action->len = F_INSN_SIZE(ipfw_insn_sa); + + p->sa.sin_len = compat_rule->fw_fwd_ip_compat.sin_len; + p->sa.sin_family = compat_rule->fw_fwd_ip_compat.sin_family; + p->sa.sin_port = compat_rule->fw_fwd_ip_compat.sin_port; + p->sa.sin_addr = compat_rule->fw_fwd_ip_compat.sin_addr; + + break; + } + case IP_FW_F_DENY_COMPAT: + action->opcode = O_DENY; + action->arg1 = 0; + break; + case IP_FW_F_REJECT_COMPAT: + action->opcode = O_REJECT; + action->arg1 = compat_rule->fw_reject_code_compat; + break; + default: + action->opcode = O_NOP; + break; + } + } + + /* action is mandatory */ + if (action->opcode == O_NOP) { + return; + } + + action = next_cmd(action); + } /* end actions */ + + cmd = (ipfw_insn *)cmdbuf; + + /* this is O_CHECK_STATE, we're done */ + if (have_state) { + goto done; + } + + { + ipfw_insn *prev = NULL; + u_int flag = compat_rule->fw_flg; + + /* logging */ + if (flag & IP_FW_F_PRN_COMPAT) { + ipfw_insn_log *c = (ipfw_insn_log *)cmd; + + cmd->opcode = O_LOG; + cmd->len |= F_INSN_SIZE(ipfw_insn_log); + c->max_log = compat_rule->fw_logamount; + + prev = cmd; + cmd = next_cmd(cmd); + } + + /* protocol */ + if (compat_rule->fw_prot != 0) { + fill_cmd(cmd, O_PROTO, compat_rule->fw_prot); + prev = cmd; + cmd = next_cmd(cmd); + } + + /* source */ + if (flag & IP_FW_F_SME_COMPAT) { + cmd->opcode = O_IP_SRC_ME; + cmd->len |= F_INSN_SIZE(ipfw_insn); + if (flag & IP_FW_F_INVSRC_COMPAT) { + cmd->len ^= F_NOT; /* toggle F_NOT */ + } + + prev = cmd; + cmd = next_cmd(cmd); + } else { + if (compat_rule->fw_smsk.s_addr != 0) { + /* addr/mask */ + ipfw_insn_ip *ip = (ipfw_insn_ip *)cmd; + + ip->addr = compat_rule->fw_src; + ip->mask = compat_rule->fw_smsk; + cmd->opcode = O_IP_SRC_MASK; + cmd->len |= F_INSN_SIZE(ipfw_insn_ip); /* double check this */ + } else { + /* one IP */ + ipfw_insn_u32 *cmd32 = (ipfw_insn_u32 *)cmd; /* alias for cmd */ + + if (compat_rule->fw_src.s_addr == 0) { + /* any */ + cmd32->o.len &= ~F_LEN_MASK; /* zero len */ + } else { + cmd32->d[0] = compat_rule->fw_src.s_addr; + cmd32->o.opcode = O_IP_SRC; + cmd32->o.len |= F_INSN_SIZE(ipfw_insn_u32); + } + } + + if (flag & IP_FW_F_INVSRC_COMPAT) { + cmd->len ^= F_NOT; /* toggle F_NOT */ + } + + if (F_LEN(cmd) != 0) { /* !any */ + prev = cmd; + cmd = next_cmd(cmd); + } + } + + /* source ports */ + { + ipfw_insn_u16 *ports = (ipfw_insn_u16 *)cmd; + uint16_t *p = ports->ports; + int i, j = 0, + nports = IP_FW_GETNSRCP_COMPAT(compat_rule), + have_range = 0; + + cmd->opcode = O_IP_SRCPORT; + for (i = 0; i < nports; i++) { + if (((flag & IP_FW_F_SRNG_COMPAT) || + (flag & IP_FW_F_SMSK_COMPAT)) && !have_range) { + p[0] = compat_rule->fw_uar_compat.fw_pts[i++]; + p[1] = compat_rule->fw_uar_compat.fw_pts[i]; + have_range = 1; + } else { + p[0] = p[1] = compat_rule->fw_uar_compat.fw_pts[i]; + } + p += 2; + j++; + } + + if (j > 0) { + ports->o.len |= j+1; /* leave F_NOT and F_OR untouched */ + } + + prev = cmd; + cmd = next_cmd(cmd); + } + + /* destination */ + if (flag & IP_FW_F_DME_COMPAT) { + cmd->opcode = O_IP_DST_ME; + cmd->len |= F_INSN_SIZE(ipfw_insn); + if (flag & IP_FW_F_INVDST_COMPAT) { + cmd->len ^= F_NOT; /* toggle F_NOT */ + } + + prev = cmd; + cmd = next_cmd(cmd); + } else { + if (compat_rule->fw_dmsk.s_addr != 0) { + /* addr/mask */ + ipfw_insn_ip *ip = (ipfw_insn_ip *)cmd; + + ip->addr = compat_rule->fw_dst; + ip->mask = compat_rule->fw_dmsk; + cmd->opcode = O_IP_DST_MASK; + cmd->len |= F_INSN_SIZE(ipfw_insn_ip); /* double check this */ + } else { + /* one IP */ + ipfw_insn_u32 *cmd32 = (ipfw_insn_u32 *)cmd; /* alias for cmd */ + + if (compat_rule->fw_dst.s_addr == 0) { + /* any */ + cmd32->o.len &= ~F_LEN_MASK; /* zero len */ + } else { + cmd32->d[0] = compat_rule->fw_dst.s_addr; + cmd32->o.opcode = O_IP_DST; + cmd32->o.len |= F_INSN_SIZE(ipfw_insn_u32); + } + } + + if (flag & IP_FW_F_INVDST_COMPAT) { + cmd->len ^= F_NOT; /* toggle F_NOT */ + } + + if (F_LEN(cmd) != 0) { /* !any */ + prev = cmd; + cmd = next_cmd(cmd); + } + } + + /* dest. ports */ + { + ipfw_insn_u16 *ports = (ipfw_insn_u16 *)cmd; + uint16_t *p = ports->ports; + int i = IP_FW_GETNSRCP_COMPAT(compat_rule), + j = 0, + nports = (IP_FW_GETNDSTP_COMPAT(compat_rule) + i), + have_range = 0; + + cmd->opcode = O_IP_DSTPORT; + for (; i < nports; i++, p += 2) { + if (((flag & IP_FW_F_DRNG_COMPAT) || + (flag & IP_FW_F_DMSK_COMPAT)) && !have_range) { + /* range */ + p[0] = compat_rule->fw_uar_compat.fw_pts[i++]; + p[1] = compat_rule->fw_uar_compat.fw_pts[i]; + have_range = 1; + } else { + p[0] = p[1] = compat_rule->fw_uar_compat.fw_pts[i]; + } + j++; + } + + if (j > 0) { + ports->o.len |= j+1; /* leave F_NOT and F_OR untouched */ + } + + prev = cmd; + cmd = next_cmd(cmd); } - /* Must apply to incoming or outgoing (or both) */ - if (!(frwl->fw_flg & (IP_FW_F_IN_COMPAT | IP_FW_F_OUT_COMPAT))) { - /* - printf(("%s neither in nor out\n", err_prefix)); - */ - return (EINVAL); + + if (flag & IP_FW_F_UID_COMPAT) { + ipfw_insn_u32 *cmd32 = (ipfw_insn_u32 *)cmd; /* alias for cmd */ + + cmd32->o.opcode = O_UID; + cmd32->o.len |= F_INSN_SIZE(ipfw_insn_u32); + cmd32->d[0] = compat_rule->fw_uid; + + prev = cmd; + cmd = next_cmd(cmd); } - /* Empty interface name is no good */ - if (((frwl->fw_flg & IP_FW_F_IIFNAME_COMPAT) - && !*frwl->fw_in_if.fu_via_if_compat.name) - || ((frwl->fw_flg & IP_FW_F_OIFNAME_COMPAT) - && !*frwl->fw_out_if.fu_via_if_compat.name)) { - /* - printf(("%s empty interface name\n", err_prefix)); - */ - return (EINVAL); + + if (flag & IP_FW_F_KEEP_S_COMPAT) { + have_state = cmd; + fill_cmd(cmd, O_KEEP_STATE, 0); + + prev = cmd; + cmd = next_cmd(cmd); } - /* Sanity check interface matching */ - if ((frwl->fw_flg & IF_FW_F_VIAHACK_COMPAT) == IF_FW_F_VIAHACK_COMPAT) { - ; /* allow "via" backwards compatibility */ - } else if ((frwl->fw_flg & IP_FW_F_IN_COMPAT) - && (frwl->fw_flg & IP_FW_F_OIFACE_COMPAT)) { - /* - printf(("%s outgoing interface check on incoming\n", - err_prefix)); - */ - return (EINVAL); + if (flag & IP_FW_BRIDGED_COMPAT) { + fill_cmd(cmd, O_LAYER2, 0); + + prev = cmd; + cmd = next_cmd(cmd); + } + + if ((flag & IF_FW_F_VIAHACK_COMPAT) == IF_FW_F_VIAHACK_COMPAT) { + /* via */ + ipfw_insn_if *ifcmd = (ipfw_insn_if *)cmd; + union ip_fw_if_compat ifu = compat_rule->fw_in_if; + + cmd->opcode = O_VIA; + ifcmd->o.len |= F_INSN_SIZE(ipfw_insn_if); + + if (ifu.fu_via_ip.s_addr == 0) { + /* "any" */ + ifcmd->name[0] = '\0'; + ifcmd->o.len = 0; + } + else if (compat_rule->fw_flg & IP_FW_F_IIFNAME_COMPAT) { + /* by name */ + strncpy(ifcmd->name, ifu.fu_via_if_compat.name, sizeof(ifcmd->name)); + ifcmd->p.unit = ifu.fu_via_if_compat.unit; + } else { + /* by addr */ + ifcmd->p.ip = ifu.fu_via_ip; + } + + prev = cmd; + cmd = next_cmd(cmd); + } else { + if (flag & IP_FW_F_IN_COMPAT) { + fill_cmd(cmd, O_IN, 0); + + prev = cmd; + cmd = next_cmd(cmd); + } + if (flag & IP_FW_F_OUT_COMPAT) { + /* if the previous command was O_IN, and this + * is being set as well, it's equivalent to not + * having either command, so let's back up prev + * to the cmd before it and move cmd to prev. + */ + if (prev->opcode == O_IN) { + cmd = prev; + bzero(cmd, sizeof(*cmd)); + } else { + cmd->len ^= F_NOT; /* toggle F_NOT */ + fill_cmd(cmd, O_IN, 0); + + prev = cmd; + cmd = next_cmd(cmd); + } + } + if (flag & IP_FW_F_OIFACE_COMPAT) { + /* xmit */ + ipfw_insn_if *ifcmd = (ipfw_insn_if *)cmd; + union ip_fw_if_compat ifu = compat_rule->fw_out_if; + + cmd->opcode = O_XMIT; + ifcmd->o.len |= F_INSN_SIZE(ipfw_insn_if); + + if (ifu.fu_via_ip.s_addr == 0) { + /* "any" */ + ifcmd->name[0] = '\0'; + ifcmd->o.len = 0; + } + else if (flag & IP_FW_F_OIFNAME_COMPAT) { + /* by name */ + strncpy(ifcmd->name, ifu.fu_via_if_compat.name, sizeof(ifcmd->name)); + ifcmd->p.unit = ifu.fu_via_if_compat.unit; + } else { + /* by addr */ + ifcmd->p.ip = ifu.fu_via_ip; + } + + prev = cmd; + cmd = next_cmd(cmd); + } + else if (flag & IP_FW_F_IIFACE_COMPAT) { + /* recv */ + ipfw_insn_if *ifcmd = (ipfw_insn_if *)cmd; + union ip_fw_if_compat ifu = compat_rule->fw_in_if; + + cmd->opcode = O_RECV; + ifcmd->o.len |= F_INSN_SIZE(ipfw_insn_if); + + if (ifu.fu_via_ip.s_addr == 0) { + /* "any" */ + ifcmd->name[0] = '\0'; + ifcmd->o.len = 0; + } + else if (flag & IP_FW_F_IIFNAME_COMPAT) { + /* by name */ + strncpy(ifcmd->name, ifu.fu_via_if_compat.name, sizeof(ifcmd->name)); + ifcmd->p.unit = ifu.fu_via_if_compat.unit; + } else { + /* by addr */ + ifcmd->p.ip = ifu.fu_via_ip; + } + + prev = cmd; + cmd = next_cmd(cmd); + } + } + + if (flag & IP_FW_F_FRAG_COMPAT) { + fill_cmd(cmd, O_FRAG, 0); + + prev = cmd; + cmd = next_cmd(cmd); + } + + /* IP options */ + if (compat_rule->fw_ipopt != 0 || compat_rule->fw_ipnopt != 0) { + fill_cmd(cmd, O_IPOPT, (compat_rule->fw_ipopt & 0xff) | + (compat_rule->fw_ipnopt & 0xff) << 8); + + prev = cmd; + cmd = next_cmd(cmd); + } + + if (compat_rule->fw_prot == IPPROTO_TCP) { + if (compat_rule->fw_ipflg & IP_FW_IF_TCPEST_COMPAT) { + fill_cmd(cmd, O_ESTAB, 0); + + prev = cmd; + cmd = next_cmd(cmd); + } + + /* TCP options and flags */ + if (compat_rule->fw_tcpf != 0 || compat_rule->fw_tcpnf != 0) { + if ((compat_rule->fw_tcpf & IP_FW_TCPF_SYN_COMPAT) && + compat_rule->fw_tcpnf & IP_FW_TCPF_ACK_COMPAT) { + fill_cmd(cmd, O_TCPFLAGS, (TH_SYN) | ( (TH_ACK) & 0xff) <<8); + + prev = cmd; + cmd = next_cmd(cmd); + } + else { + fill_cmd(cmd, O_TCPFLAGS, (compat_rule->fw_tcpf & 0xff) | + (compat_rule->fw_tcpnf & 0xff) << 8); + + prev = cmd; + cmd = next_cmd(cmd); + } + } + if (compat_rule->fw_tcpopt != 0 || compat_rule->fw_tcpnopt != 0) { + fill_cmd(cmd, O_TCPOPTS, (compat_rule->fw_tcpopt & 0xff) | + (compat_rule->fw_tcpnopt & 0xff) << 8); + + prev = cmd; + cmd = next_cmd(cmd); + } } - /* Sanity check port ranges */ - if ((frwl->fw_flg & IP_FW_F_SRNG_COMPAT) && IP_FW_GETNSRCP_COMPAT(frwl) < 2) { - /* - printf(("%s src range set but n_src_p=%d\n", - err_prefix, IP_FW_GETNSRCP_COMPAT(frwl))); - */ - return (EINVAL); + + /* ICMP */ + /* XXX: check this */ + if (flag & IP_FW_F_ICMPBIT_COMPAT) { + int i; + ipfw_insn_u32 *cmd32 = (ipfw_insn_u32 *)cmd; /* alias for cmd */ + + cmd32->o.opcode = O_ICMPTYPE; + cmd32->o.len |= F_INSN_SIZE(ipfw_insn_u32); + + for (i = 0; i < IP_FW_ICMPTYPES_DIM_COMPAT; i++) { + cmd32->d[0] |= compat_rule->fw_uar_compat.fw_icmptypes[i]; + } + + prev = cmd; + cmd = next_cmd(cmd); } - if ((frwl->fw_flg & IP_FW_F_DRNG_COMPAT) && IP_FW_GETNDSTP_COMPAT(frwl) < 2) { - /* - printf(("%s dst range set but n_dst_p=%d\n", - err_prefix, IP_FW_GETNDSTP_COMPAT(frwl))); - */ - return (EINVAL); + } /* end commands */ + +done: + /* finally, copy everything into the current + * rule buffer in the right order. + */ + dst = curr_rule->cmd; + + /* first, do match probability */ + if (compat_rule->fw_flg & IP_FW_F_RND_MATCH_COMPAT) { + dst->opcode = O_PROB; + dst->len = 2; + *((int32_t *)(dst+1)) = compat_rule->pipe_ptr; + dst += dst->len; } - if (IP_FW_GETNSRCP_COMPAT(frwl) + IP_FW_GETNDSTP_COMPAT(frwl) > IP_FW_MAX_PORTS_COMPAT) { - /* - printf(("%s too many ports (%d+%d)\n", - err_prefix, IP_FW_GETNSRCP_COMPAT(frwl), IP_FW_GETNDSTP_COMPAT(frwl))); - */ - return (EINVAL); + + /* generate O_PROBE_STATE if necessary */ + if (have_state && have_state->opcode != O_CHECK_STATE) { + fill_cmd(dst, O_PROBE_STATE, 0); + dst = next_cmd(dst); } + /* - * Protocols other than TCP/UDP don't use port range + * copy all commands but O_LOG, O_KEEP_STATE */ - if ((frwl->fw_prot != IPPROTO_TCP) && - (frwl->fw_prot != IPPROTO_UDP) && - (IP_FW_GETNSRCP_COMPAT(frwl) || IP_FW_GETNDSTP_COMPAT(frwl))) { - /* - printf(("%s port(s) specified for non TCP/UDP rule\n", - err_prefix)); - */ - return (EINVAL); + for (src = (ipfw_insn *)cmdbuf; src != cmd; src += k) { + k = F_LEN(src); + + switch (src->opcode) { + case O_LOG: + case O_KEEP_STATE: + break; + default: + bcopy(src, dst, k * sizeof(uint32_t)); + dst += k; + } } /* - * Rather than modify the entry to make such entries work, - * we reject this rule and require user level utilities - * to enforce whatever policy they deem appropriate. + * put back the have_state command as last opcode */ - if ((frwl->fw_src.s_addr & (~frwl->fw_smsk.s_addr)) || - (frwl->fw_dst.s_addr & (~frwl->fw_dmsk.s_addr))) { - /* - printf(("%s rule never matches\n", err_prefix)); - */ - return (EINVAL); + if (have_state && have_state->opcode != O_CHECK_STATE) { + k = F_LEN(have_state); + bcopy(have_state, dst, k * sizeof(uint32_t)); + dst += k; } + + /* + * start action section + */ + curr_rule->act_ofs = dst - curr_rule->cmd; - if ((frwl->fw_flg & IP_FW_F_FRAG_COMPAT) && - (frwl->fw_prot == IPPROTO_UDP || frwl->fw_prot == IPPROTO_TCP)) { - if (frwl->fw_nports) { - /* - printf(("%s cannot mix 'frag' and ports\n", err_prefix)); - */ - return (EINVAL); - } - if (frwl->fw_prot == IPPROTO_TCP && - frwl->fw_tcpf != frwl->fw_tcpnf) { - /* - printf(("%s cannot mix 'frag' and TCP flags\n", err_prefix)); - */ - return (EINVAL); - } + /* + * put back O_LOG if necessary + */ + src = (ipfw_insn *)cmdbuf; + if (src->opcode == O_LOG) { + k = F_LEN(src); + bcopy(src, dst, k * sizeof(uint32_t)); + dst += k; } - - /* Check command specific stuff */ - switch (frwl->fw_flg & IP_FW_F_COMMAND_COMPAT) - { - case IP_FW_F_REJECT_COMPAT: - if (frwl->fw_reject_code_compat >= 0x100 - && !(frwl->fw_prot == IPPROTO_TCP - && frwl->fw_reject_code_compat == IP_FW_REJECT_RST_COMPAT)) { - /* - printf(("%s unknown reject code\n", err_prefix)); - */ - return (EINVAL); - } - break; - case IP_FW_F_DIVERT_COMPAT: /* Diverting to port zero is invalid */ - case IP_FW_F_TEE_COMPAT: - case IP_FW_F_PIPE_COMPAT: /* piping through 0 is invalid */ - case IP_FW_F_QUEUE_COMPAT: /* piping through 0 is invalid */ - if (frwl->fw_divert_port_compat == 0) { - /* - printf(("%s can't divert to port 0\n", err_prefix)); - */ - return (EINVAL); - } - break; - case IP_FW_F_DENY_COMPAT: - case IP_FW_F_ACCEPT_COMPAT: - case IP_FW_F_COUNT_COMPAT: - case IP_FW_F_SKIPTO_COMPAT: - case IP_FW_F_FWD_COMPAT: - case IP_FW_F_UID_COMPAT: - break; - default: - /* - printf(("%s invalid command\n", err_prefix)); - */ - return (EINVAL); + + /* + * copy all other actions + */ + for (src = (ipfw_insn *)actbuf; src != action; src += k) { + k = F_LEN(src); + bcopy(src, dst, k * sizeof(uint32_t)); + dst += k; } - return 0; + curr_rule->cmd_len = (uint32_t *)dst - (uint32_t *)(curr_rule->cmd); + + return; } static void -ipfw_convert_to_cmds(struct ip_fw *curr_rule, struct ip_fw_compat *compat_rule) +ipfw_convert_to_cmds_64(struct ip_fw *curr_rule, struct ip_fw_compat_64 *compat_rule) { int k; uint32_t actbuf[255], cmdbuf[255]; @@ -1522,14 +2532,13 @@ ipfw_convert_to_cmds(struct ip_fw *curr_rule, struct ip_fw_compat *compat_rule) /* preemptively check the old ip_fw rule to * make sure it's valid before starting to copy stuff */ - if (ipfw_check_vers1_struct(compat_rule)) { + if (ipfw_check_vers1_struct_64(compat_rule)) { /* bad rule */ return; } bzero(actbuf, sizeof(actbuf)); /* actions go here */ bzero(cmdbuf, sizeof(cmdbuf)); - /* fill in action */ action = (ipfw_insn *)actbuf; { @@ -1964,7 +2973,6 @@ ipfw_convert_to_cmds(struct ip_fw *curr_rule, struct ip_fw_compat *compat_rule) if (flag & IP_FW_F_ICMPBIT_COMPAT) { int i; ipfw_insn_u32 *cmd32 = (ipfw_insn_u32 *)cmd; /* alias for cmd */ - cmd32->o.opcode = O_ICMPTYPE; cmd32->o.len |= F_INSN_SIZE(ipfw_insn_u32); @@ -1976,7 +2984,6 @@ ipfw_convert_to_cmds(struct ip_fw *curr_rule, struct ip_fw_compat *compat_rule) cmd = next_cmd(cmd); } } /* end commands */ - done: /* finally, copy everything into the current * rule buffer in the right order. @@ -2002,7 +3009,6 @@ ipfw_convert_to_cmds(struct ip_fw *curr_rule, struct ip_fw_compat *compat_rule) */ for (src = (ipfw_insn *)cmdbuf; src != cmd; src += k) { k = F_LEN(src); - switch (src->opcode) { case O_LOG: case O_KEEP_STATE: @@ -2047,28 +3053,27 @@ ipfw_convert_to_cmds(struct ip_fw *curr_rule, struct ip_fw_compat *compat_rule) } curr_rule->cmd_len = (uint32_t *)dst - (uint32_t *)(curr_rule->cmd); - return; } static int -ipfw_version_one_to_version_two(struct sockopt *sopt, struct ip_fw *curr_rule, - struct ip_fw_compat *rule_vers1) +ipfw_version_one_to_version_two_32(struct sockopt *sopt, struct ip_fw *curr_rule, + struct ip_fw_compat_32 *rule_vers1) { int err = EINVAL; - struct ip_fw_compat *rule_ptr; - struct ip_fw_compat rule; + struct ip_fw_compat_32 *rule_ptr; + struct ip_fw_compat_32 rule; if (rule_vers1) { rule_ptr = rule_vers1; err = 0; } else { /* do some basic size checking here, more extensive checking later */ - if (!sopt->sopt_val || sopt->sopt_valsize < sizeof(struct ip_fw_compat)) + if (!sopt->sopt_val || sopt->sopt_valsize < sizeof(struct ip_fw_compat_32)) return err; - if ((err = sooptcopyin(sopt, &rule, sizeof(struct ip_fw_compat), - sizeof(struct ip_fw_compat)))) { + if ((err = sooptcopyin(sopt, &rule, sizeof(struct ip_fw_compat_32), + sizeof(struct ip_fw_compat_32)))) { return err; } @@ -2076,10 +3081,51 @@ ipfw_version_one_to_version_two(struct sockopt *sopt, struct ip_fw *curr_rule, } /* deal with commands */ - ipfw_convert_to_cmds(curr_rule, rule_ptr); + ipfw_convert_to_cmds_32(curr_rule, rule_ptr); + + curr_rule->version = IP_FW_CURRENT_API_VERSION; + curr_rule->context = CAST_DOWN_EXPLICIT(void*, rule_ptr->context); + curr_rule->rulenum = rule_ptr->fw_number; + curr_rule->pcnt = rule_ptr->fw_pcnt; + curr_rule->bcnt = rule_ptr->fw_bcnt; + curr_rule->timestamp = rule_ptr->timestamp; + + +#if FW2_DEBUG_VERBOSE + ipfw_print_vers2_struct(curr_rule); +#endif + + return err; +} + +static int +ipfw_version_one_to_version_two_64(struct sockopt *sopt, struct ip_fw *curr_rule, + struct ip_fw_compat_64 *rule_vers1) +{ + int err = EINVAL; + struct ip_fw_compat_64 *rule_ptr; + struct ip_fw_compat_64 rule; + + if (rule_vers1) { + rule_ptr = rule_vers1; + err = 0; + } else { + /* do some basic size checking here, more extensive checking later */ + if (!sopt->sopt_val || sopt->sopt_valsize < sizeof(struct ip_fw_compat_64)) + return err; + + if ((err = sooptcopyin(sopt, &rule, sizeof(struct ip_fw_compat_64), + sizeof(struct ip_fw_compat_64)))) { + return err; + } + rule_ptr = &rule; + } + + /* deal with commands */ + ipfw_convert_to_cmds_64(curr_rule, rule_ptr); curr_rule->version = IP_FW_CURRENT_API_VERSION; - curr_rule->context = rule_ptr->context; + curr_rule->context = CAST_DOWN_EXPLICIT( void *, rule_ptr->context); curr_rule->rulenum = rule_ptr->fw_number; curr_rule->pcnt = rule_ptr->fw_pcnt; curr_rule->bcnt = rule_ptr->fw_bcnt; @@ -2088,7 +3134,7 @@ ipfw_version_one_to_version_two(struct sockopt *sopt, struct ip_fw *curr_rule, #if FW2_DEBUG_VERBOSE ipfw_print_vers2_struct(curr_rule); -#endif /* FW2_DEBUG_VERBOSE */ +#endif return err; } @@ -2097,7 +3143,22 @@ ipfw_version_one_to_version_two(struct sockopt *sopt, struct ip_fw *curr_rule, * latest version of the firewall is ipfw2. */ static int -ipfw_version_one_to_latest(struct sockopt *sopt, struct ip_fw *curr_rule, struct ip_fw_compat *rule_vers1) +ipfw_version_one_to_latest_32(struct sockopt *sopt, struct ip_fw *curr_rule, struct ip_fw_compat_32 *rule_vers1) +{ + int err; + + /* if rule_vers1 is not null then this is coming from + * ipfw_version_zero_to_latest(), so pass that along; + * otherwise let ipfw_version_one_to_version_two() + * get the rule from sopt. + */ + err = ipfw_version_one_to_version_two_32(sopt, curr_rule, rule_vers1); + + return err; +} + +static int +ipfw_version_one_to_latest_64(struct sockopt *sopt, struct ip_fw *curr_rule, struct ip_fw_compat_64 *rule_vers1) { int err; @@ -2106,11 +3167,21 @@ ipfw_version_one_to_latest(struct sockopt *sopt, struct ip_fw *curr_rule, struct * otherwise let ipfw_version_one_to_version_two() * get the rule from sopt. */ - err = ipfw_version_one_to_version_two(sopt, curr_rule, rule_vers1); + err = ipfw_version_one_to_version_two_64(sopt, curr_rule, rule_vers1); return err; } + +#if 0 + +/* + * XXX - ipfw_version_zero_to_one + * + * This function is only used in version #1 of ipfw, which is now deprecated. + * + */ + static void ipfw_version_zero_to_one(struct ip_old_fw *rule_vers0, struct ip_fw_compat *rule_vers1) { @@ -2141,46 +3212,13 @@ ipfw_version_zero_to_one(struct ip_old_fw *rule_vers0, struct ip_fw_compat *rule rule_vers1->fw_ipflg = (rule_vers0->fw_tcpf & IP_OLD_FW_TCPF_ESTAB) ? IP_FW_IF_TCPEST_COMPAT : 0; } -/* first convert to version one, then to version two */ -static int -ipfw_version_zero_to_latest(struct sockopt *sopt, struct ip_fw *curr_rule) -{ - int err; - struct ip_old_fw rule_vers0; - struct ip_fw_compat rule_vers1; - - if (sopt->sopt_name == IP_OLD_FW_GET || - sopt->sopt_name == IP_OLD_FW_FLUSH || - sopt->sopt_val == USER_ADDR_NULL) { - /* In the old-style API, it was legal to not pass in a rule - * structure for certain firewall operations (e.g. flush, - * reset log). If that's the situation, we pretend we received - * a blank structure. */ - bzero(curr_rule, sizeof(struct ip_fw)); - curr_rule->version = 10; - } - else { - if (!sopt->sopt_val || sopt->sopt_valsize < sizeof(struct ip_old_fw)) { - return EINVAL; - } - - err = sooptcopyin(sopt, &rule_vers0, sizeof(struct ip_old_fw), - sizeof(struct ip_old_fw)); - if (err) { - return err; - } - - ipfw_version_zero_to_one(&rule_vers0, &rule_vers1); - } - - return (ipfw_version_one_to_latest(sopt, curr_rule, &rule_vers1)); -} +#endif /* !ipfw_version_zero_to_one */ /* rule is a u_int32_t buffer[255] into which the converted * (if necessary) rules go. */ int -ipfw_convert_to_latest(struct sockopt *sopt, struct ip_fw *curr_rule, int api_version) +ipfw_convert_to_latest(struct sockopt *sopt, struct ip_fw *curr_rule, int api_version, int is64user) { int err = 0; @@ -2189,13 +3227,16 @@ ipfw_convert_to_latest(struct sockopt *sopt, struct ip_fw *curr_rule, int api_ve */ switch (api_version) { case IP_FW_VERSION_0: - /* this is the oldest version we support */ - err = ipfw_version_zero_to_latest(sopt, curr_rule); + /* we're not supporting VERSION 0 */ + err = EOPNOTSUPP; break; case IP_FW_VERSION_1: /* this is the version supported in Panther */ - err = ipfw_version_one_to_latest(sopt, curr_rule, NULL); + if ( is64user ) + err = ipfw_version_one_to_latest_64(sopt, curr_rule, NULL); + else + err = ipfw_version_one_to_latest_32(sopt, curr_rule, NULL); break; case IP_FW_CURRENT_API_VERSION: @@ -2252,13 +3293,17 @@ ipfw_get_command_and_version(struct sockopt *sopt, int *command, u_int32_t *api_ /* working off the fact that the offset * is the same in both structs. */ - struct ip_fw rule; - - if (!sopt->sopt_val || sopt->sopt_valsize < sizeof(struct ip_fw)) - return EINVAL; + struct ip_fw_64 rule; + size_t copyinsize; + + if (proc_is64bit(sopt->sopt_p)) + copyinsize = sizeof(struct ip_fw_64); + else + copyinsize = sizeof(struct ip_fw_32); - if ((err = sooptcopyin(sopt, &rule, sizeof(struct ip_fw), - sizeof(struct ip_fw)))) { + if (!sopt->sopt_val || sopt->sopt_valsize < copyinsize) + return EINVAL; + if ((err = sooptcopyin(sopt, &rule, copyinsize, copyinsize))) { return err; } @@ -2268,7 +3313,6 @@ ipfw_get_command_and_version(struct sockopt *sopt, int *command, u_int32_t *api_ if (command) { *command = cmd; } - if (api_version) { *api_version = vers; } diff --git a/bsd/netinet/ip_fw2_compat.h b/bsd/netinet/ip_fw2_compat.h index a38a7d3f1..bd968d810 100644 --- a/bsd/netinet/ip_fw2_compat.h +++ b/bsd/netinet/ip_fw2_compat.h @@ -3,11 +3,9 @@ #ifndef _IP_FW_COMPAT_H_ #define _IP_FW_COMPAT_H_ -#if !__LP64__ - /* prototypes */ -void ipfw_convert_from_latest(struct ip_fw *curr_rule, void *old_rule, u_int32_t api_version); -int ipfw_convert_to_latest(struct sockopt *sopt, struct ip_fw *rule, int api_version); +void ipfw_convert_from_latest(struct ip_fw *curr_rule, void *old_rule, u_int32_t api_version, int is64user); +int ipfw_convert_to_latest(struct sockopt *sopt, struct ip_fw *rule, int api_version, int is64user); int ipfw_get_command_and_version(struct sockopt *sopt, int *command, u_int32_t *api_version); @@ -47,6 +45,7 @@ union ip_fw_if_compat { * Port numbers are stored in HOST byte order. */ + struct ip_fw_compat { u_int32_t version; /* Version of this structure. Should always be */ /* set to IP_FW_CURRENT_API_VERSION by clients. */ @@ -115,6 +114,7 @@ struct ip_fw_chain_compat { /* * dynamic ipfw rule */ + struct ipfw_dyn_rule_compat { struct ipfw_dyn_rule *next ; @@ -129,6 +129,128 @@ struct ipfw_dyn_rule_compat { /* combination of TCP flags) */ } ; +#ifdef KERNEL +#pragma pack(4) + +struct ip_fw_compat_32 { + u_int32_t version; /* Version of this structure. Should always be */ + /* set to IP_FW_CURRENT_API_VERSION by clients. */ + user32_addr_t context; /* Context that is usable by user processes to */ + /* identify this rule. */ + u_int64_t fw_pcnt,fw_bcnt; /* Packet and byte counters */ + struct in_addr fw_src, fw_dst; /* Source and destination IP addr */ + struct in_addr fw_smsk, fw_dmsk;/* Mask for src and dest IP addr */ + u_short fw_number; /* Rule number */ + u_int fw_flg; /* Flags word */ +#define IP_FW_MAX_PORTS_COMPAT 10 /* A reasonable maximum */ + union { + u_short fw_pts[IP_FW_MAX_PORTS_COMPAT]; /* Array of port numbers to match */ +#define IP_FW_ICMPTYPES_MAX_COMPAT 128 +#define IP_FW_ICMPTYPES_DIM_COMPAT (IP_FW_ICMPTYPES_MAX_COMPAT / (sizeof(unsigned) * 8)) + unsigned fw_icmptypes[IP_FW_ICMPTYPES_DIM_COMPAT]; /* ICMP types bitmap */ + } fw_uar_compat; + u_int fw_ipflg; /* IP flags word */ + u_char fw_ipopt,fw_ipnopt; /* IP options set/unset */ + u_char fw_tcpopt,fw_tcpnopt; /* TCP options set/unset */ + u_char fw_tcpf,fw_tcpnf; /* TCP flags set/unset */ + u_int32_t timestamp; /* timestamp (tv_sec) of last match */ + union ip_fw_if_compat fw_in_if, fw_out_if; /* Incoming and outgoing interfaces */ + union { + u_short fu_divert_port; /* Divert/tee port (options IPDIVERT) */ + u_short fu_pipe_nr; /* queue number (option DUMMYNET) */ + u_short fu_skipto_rule; /* SKIPTO command rule number */ + u_short fu_reject_code; /* REJECT response code */ + struct sockaddr_in fu_fwd_ip; + } fw_un_compat; + u_char fw_prot; /* IP protocol */ + /* + * N'of src ports and # of dst ports in ports array (dst ports + * follow src ports; max of 10 ports in all; count of 0 means + * match all ports) + */ + u_char fw_nports; + user32_addr_t pipe_ptr; /* flow_set ptr for dummynet pipe */ + user32_addr_t next_rule_ptr ; /* next rule in case of match */ + uid_t fw_uid; /* uid to match */ + int fw_logamount; /* amount to log */ + u_int64_t fw_loghighest; /* highest number packet to log */ +}; +#pragma pack() + +struct ip_fw_compat_64 { + u_int32_t version; /* Version of this structure. Should always be */ + /* set to IP_FW_CURRENT_API_VERSION by clients. */ + user64_addr_t context; /* Context that is usable by user processes to */ + /* identify this rule. */ + u_int64_t fw_pcnt,fw_bcnt; /* Packet and byte counters */ + struct in_addr fw_src, fw_dst; /* Source and destination IP addr */ + struct in_addr fw_smsk, fw_dmsk;/* Mask for src and dest IP addr */ + u_short fw_number; /* Rule number */ + u_int fw_flg; /* Flags word */ +#define IP_FW_MAX_PORTS_COMPAT 10 /* A reasonable maximum */ + union { + u_short fw_pts[IP_FW_MAX_PORTS_COMPAT]; /* Array of port numbers to match */ +#define IP_FW_ICMPTYPES_MAX_COMPAT 128 +#define IP_FW_ICMPTYPES_DIM_COMPAT (IP_FW_ICMPTYPES_MAX_COMPAT / (sizeof(unsigned) * 8)) + unsigned fw_icmptypes[IP_FW_ICMPTYPES_DIM_COMPAT]; /* ICMP types bitmap */ + } fw_uar_compat; + u_int fw_ipflg; /* IP flags word */ + u_char fw_ipopt,fw_ipnopt; /* IP options set/unset */ + u_char fw_tcpopt,fw_tcpnopt; /* TCP options set/unset */ + u_char fw_tcpf,fw_tcpnf; /* TCP flags set/unset */ + u_int64_t timestamp; /* timestamp (tv_sec) of last match */ + union ip_fw_if_compat fw_in_if, fw_out_if; /* Incoming and outgoing interfaces */ + union { + u_short fu_divert_port; /* Divert/tee port (options IPDIVERT) */ + u_short fu_pipe_nr; /* queue number (option DUMMYNET) */ + u_short fu_skipto_rule; /* SKIPTO command rule number */ + u_short fu_reject_code; /* REJECT response code */ + struct sockaddr_in fu_fwd_ip; + } fw_un_compat; + u_char fw_prot; /* IP protocol */ + /* + * N'of src ports and # of dst ports in ports array (dst ports + * follow src ports; max of 10 ports in all; count of 0 means + * match all ports) + */ + u_char fw_nports; + user64_addr_t pipe_ptr; /* flow_set ptr for dummynet pipe */ + user64_addr_t next_rule_ptr ; /* next rule in case of match */ + uid_t fw_uid; /* uid to match */ + int fw_logamount; /* amount to log */ + u_int64_t fw_loghighest; /* highest number packet to log */ +}; + +struct ipfw_dyn_rule_compat_32 { + user32_addr_t next ; + + struct ipfw_flow_id id ; + struct ipfw_flow_id mask ; + user32_addr_t chain ; /* pointer to parent rule */ + u_int32_t type ; /* rule type */ + u_int32_t expire ; /* expire time */ + u_int64_t pcnt, bcnt; /* match counters */ + u_int32_t bucket ; /* which bucket in hash table */ + u_int32_t state ; /* state of this rule (typ. a */ + /* combination of TCP flags) */ +} ; + +struct ipfw_dyn_rule_compat_64 { + user64_addr_t next ; + + struct ipfw_flow_id id ; + struct ipfw_flow_id mask ; + user64_addr_t chain ; /* pointer to parent rule */ + u_int32_t type ; /* rule type */ + u_int32_t expire ; /* expire time */ + u_int64_t pcnt, bcnt; /* match counters */ + u_int32_t bucket ; /* which bucket in hash table */ + u_int32_t state ; /* state of this rule (typ. a */ + /* combination of TCP flags) */ +} ; +#endif /* KERNEL */ + + #define IP_FW_GETNSRCP_COMPAT(rule) ((rule)->fw_nports & 0x0f) #define IP_FW_SETNSRCP_COMPAT(rule, n) do { \ (rule)->fw_nports &= ~0x0f; \ @@ -372,5 +494,4 @@ struct ip_old_fw { */ #define IP_OLD_FW_TCPF_ESTAB 0x40 -#endif /* !__LP64__ */ #endif /* _IP_FW_COMPAT_H_ */ diff --git a/bsd/netinet/ip_icmp.c b/bsd/netinet/ip_icmp.c index 995ca8346..7f55b2a5f 100644 --- a/bsd/netinet/ip_icmp.c +++ b/bsd/netinet/ip_icmp.c @@ -75,6 +75,8 @@ #include #include +#include + #include #include @@ -114,13 +116,14 @@ #include #endif /* MAC_NET */ + /* * ICMP routines: error generation, receive packet processing, and * routines to turnaround packets back to the originator, and * host table maintenance routines. */ -static struct icmpstat icmpstat; +struct icmpstat icmpstat; SYSCTL_STRUCT(_net_inet_icmp, ICMPCTL_STATS, stats, CTLFLAG_RD, &icmpstat, icmpstat, ""); @@ -173,7 +176,6 @@ int icmpprintfs = 0; static void icmp_reflect(struct mbuf *); static void icmp_send(struct mbuf *, struct mbuf *); -static int ip_next_mtu(int, int); extern struct protosw inetsw[]; @@ -187,7 +189,7 @@ icmp_error( int type, int code, n_long dest, - struct ifnet *destifp) + u_int32_t nextmtu) { struct ip *oip = mtod(n, struct ip *), *nip; unsigned oiplen = IP_VHL_HL(oip->ip_vhl) << 2; @@ -257,8 +259,8 @@ icmp_error( icp->icmp_pptr = code; code = 0; } else if (type == ICMP_UNREACH && - code == ICMP_UNREACH_NEEDFRAG && destifp) { - icp->icmp_nextmtu = htons(destifp->if_mtu); + code == ICMP_UNREACH_NEEDFRAG && nextmtu != 0) { + icp->icmp_nextmtu = htons(nextmtu); } } @@ -269,9 +271,10 @@ icmp_error( /* * Convert fields to network representation. */ +#if BYTE_ORDER != BIG_ENDIAN HTONS(nip->ip_len); HTONS(nip->ip_off); - +#endif /* * Now, copy old ip header (without options) * in front of icmp message. @@ -444,7 +447,11 @@ icmp_input(struct mbuf *m, int hlen) icmpstat.icps_badlen++; goto freeit; } + +#if BYTE_ORDER != BIG_ENDIAN NTOHS(icp->icmp_ip.ip_len); +#endif + /* Discard ICMP's in response to multicast packets */ if (IN_MULTICAST(ntohl(icp->icmp_ip.ip_dst.s_addr))) goto badcode; @@ -453,52 +460,7 @@ icmp_input(struct mbuf *m, int hlen) printf("deliver to protocol %d\n", icp->icmp_ip.ip_p); #endif icmpsrc.sin_addr = icp->icmp_ip.ip_dst; -#if 1 - /* - * MTU discovery: - * If we got a needfrag and there is a host route to the - * original destination, and the MTU is not locked, then - * set the MTU in the route to the suggested new value - * (if given) and then notify as usual. The ULPs will - * notice that the MTU has changed and adapt accordingly. - * If no new MTU was suggested, then we guess a new one - * less than the current value. If the new MTU is - * unreasonably small (defined by sysctl tcp_minmss), then - * we reset the MTU to the interface value and enable the - * lock bit, indicating that we are no longer doing MTU - * discovery. - */ - if (code == PRC_MSGSIZE) { - struct rtentry *rt; - int mtu; - - rt = rtalloc1((struct sockaddr *)&icmpsrc, 0, - RTF_CLONING | RTF_PRCLONING); - if (rt && (rt->rt_flags & RTF_HOST) - && !(rt->rt_rmx.rmx_locks & RTV_MTU)) { - mtu = ntohs(icp->icmp_nextmtu); - if (!mtu) - mtu = ip_next_mtu(rt->rt_rmx.rmx_mtu, - 1); -#if DEBUG_MTUDISC - printf("MTU for %s reduced to %d\n", - inet_ntop(AF_INET, &icmpsrc.sin_addr, ipv4str, - sizeof(ipv4str)), - mtu); -#endif - if (mtu < max(296, (tcp_minmss + sizeof(struct tcpiphdr)))) { - /* rt->rt_rmx.rmx_mtu = - rt->rt_ifp->if_mtu; */ - rt->rt_rmx.rmx_locks |= RTV_MTU; - } else if (rt->rt_rmx.rmx_mtu > mtu) { - rt->rt_rmx.rmx_mtu = mtu; - } - } - if (rt) - rtfree(rt); - } -#endif /* * XXX if the packet contains [IPv4 AH TCP], we can't make a * notification to TCP layer. @@ -598,7 +560,7 @@ icmp_input(struct mbuf *m, int hlen) case ICMP_REDIRECT: if (log_redirect) { - u_long src, dst, gw; + u_int32_t src, dst, gw; src = ntohl(ip->ip_src.s_addr); dst = ntohl(icp->icmp_ip.ip_dst.s_addr); @@ -698,16 +660,25 @@ icmp_reflect(struct mbuf *m) * or anonymous), use the address which corresponds * to the incoming interface. */ - lck_mtx_lock(rt_mtx); - for (ia = in_ifaddrhead.tqh_first; ia; ia = ia->ia_link.tqe_next) { + lck_rw_lock_shared(in_ifaddr_rwlock); + TAILQ_FOREACH(ia, INADDR_HASH(t.s_addr), ia_hash) { if (t.s_addr == IA_SIN(ia)->sin_addr.s_addr) - break; + goto match; + } + /* + * Slow path; check for broadcast addresses. Find a source + * IP address to use when replying to the broadcast request; + * let IP handle the source interface selection work. + */ + for (ia = in_ifaddrhead.tqh_first; ia; ia = ia->ia_link.tqe_next) { if (ia->ia_ifp && (ia->ia_ifp->if_flags & IFF_BROADCAST) && t.s_addr == satosin(&ia->ia_broadaddr)->sin_addr.s_addr) break; } +match: if (ia) ifaref(&ia->ia_ifa); + lck_rw_done(in_ifaddr_rwlock); icmpdst.sin_addr = t; if ((ia == (struct in_ifaddr *)0) && m->m_pkthdr.rcvif) ia = (struct in_ifaddr *)ifaof_ifpforaddr( @@ -717,15 +688,16 @@ icmp_reflect(struct mbuf *m) * and was received on an interface with no IP address. */ if (ia == (struct in_ifaddr *)0) { + lck_rw_lock_shared(in_ifaddr_rwlock); ia = in_ifaddrhead.tqh_first; if (ia == (struct in_ifaddr *)0) {/* no address yet, bail out */ + lck_rw_done(in_ifaddr_rwlock); m_freem(m); - lck_mtx_unlock(rt_mtx); goto done; } ifaref(&ia->ia_ifa); + lck_rw_done(in_ifaddr_rwlock); } - lck_mtx_unlock(rt_mtx); #if CONFIG_MACF_NET mac_netinet_icmp_reply(m); #endif @@ -853,17 +825,15 @@ icmp_send(struct mbuf *m, struct mbuf *opts) #endif bzero(&ro, sizeof ro); (void) ip_output(m, opts, &ro, IP_OUTARGS, NULL, &ipoa); - if (ro.ro_rt) { + if (ro.ro_rt) rtfree(ro.ro_rt); - ro.ro_rt = NULL; - } } n_time iptime(void) { struct timeval atv; - u_long t; + u_int32_t t; microtime(&atv); t = (atv.tv_sec % (24*60*60)) * 1000 + atv.tv_usec / 1000; @@ -876,7 +846,7 @@ iptime(void) * given current value MTU. If DIR is less than zero, a larger plateau * is returned; otherwise, a smaller value is returned. */ -static int +int ip_next_mtu(int mtu, int dir) { static int mtutab[] = { @@ -999,8 +969,8 @@ badport_bandlim(int which) #include extern struct domain inetdomain; -extern u_long rip_sendspace; -extern u_long rip_recvspace; +extern u_int32_t rip_sendspace; +extern u_int32_t rip_recvspace; extern struct inpcbinfo ripcbinfo; int rip_abort(struct socket *); @@ -1082,11 +1052,11 @@ icmp_dgram_ctloutput(struct socket *so, struct sockopt *sopt) case IP_RECVTTL: case IP_BOUND_IF: #if CONFIG_FORCE_OUT_IFP - case IP_FORCE_OUT_IFP: + case IP_FORCE_OUT_IFP: #endif error = rip_ctloutput(so, sopt); break; - + default: error = EINVAL; break; @@ -1123,7 +1093,7 @@ icmp_dgram_send(struct socket *so, int flags, struct mbuf *m, struct sockaddr *n if (hlen < 20 || hlen > 40 || ip->ip_len != m->m_pkthdr.len) goto bad; /* Bogus fragments can tie up peer resources */ - if (ip->ip_off != 0) + if ((ip->ip_off & ~IP_DF) != 0) goto bad; /* Allow only ICMP even for user provided IP header */ if (ip->ip_p != IPPROTO_ICMP) @@ -1131,20 +1101,22 @@ icmp_dgram_send(struct socket *so, int flags, struct mbuf *m, struct sockaddr *n /* To prevent spoofing, specified source address must be one of ours */ if (ip->ip_src.s_addr != INADDR_ANY) { socket_unlock(so, 0); - lck_mtx_lock(rt_mtx); + lck_rw_lock_shared(in_ifaddr_rwlock); if (TAILQ_EMPTY(&in_ifaddrhead)) { - lck_mtx_unlock(rt_mtx); + lck_rw_done(in_ifaddr_rwlock); socket_lock(so, 0); goto bad; } - TAILQ_FOREACH(ia, &in_ifaddrhead, ia_link) { - if (IA_SIN(ia)->sin_addr.s_addr == ip->ip_src.s_addr) { - lck_mtx_unlock(rt_mtx); + TAILQ_FOREACH(ia, INADDR_HASH(ip->ip_src.s_addr), + ia_hash) { + if (IA_SIN(ia)->sin_addr.s_addr == + ip->ip_src.s_addr) { + lck_rw_done(in_ifaddr_rwlock); socket_lock(so, 0); goto ours; } } - lck_mtx_unlock(rt_mtx); + lck_rw_done(in_ifaddr_rwlock); socket_lock(so, 0); goto bad; } diff --git a/bsd/netinet/ip_icmp.h b/bsd/netinet/ip_icmp.h index 556fb9c56..44ce3cbf9 100644 --- a/bsd/netinet/ip_icmp.h +++ b/bsd/netinet/ip_icmp.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2008 Apple Computer, Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -186,8 +186,11 @@ struct icmp { #define ICMP_REDIRECT_HOST 1 /* for host */ #define ICMP_REDIRECT_TOSNET 2 /* for tos and net */ #define ICMP_REDIRECT_TOSHOST 3 /* for tos and host */ +#define ICMP_ALTHOSTADDR 6 /* alternate host address */ #define ICMP_ECHO 8 /* echo service */ #define ICMP_ROUTERADVERT 9 /* router advertisement */ +#define ICMP_ROUTERADVERT_NORMAL 0 /* normal advertisement */ +#define ICMP_ROUTERADVERT_NOROUTE_COMMON 16 /* selective routing */ #define ICMP_ROUTERSOLICIT 10 /* router solicitation */ #define ICMP_TIMXCEED 11 /* time exceeded, code: */ #define ICMP_TIMXCEED_INTRANS 0 /* ttl==0 in transit */ @@ -202,8 +205,20 @@ struct icmp { #define ICMP_IREQREPLY 16 /* information reply */ #define ICMP_MASKREQ 17 /* address mask request */ #define ICMP_MASKREPLY 18 /* address mask reply */ +#define ICMP_TRACEROUTE 30 /* traceroute */ +#define ICMP_DATACONVERR 31 /* data conversion error */ +#define ICMP_MOBILE_REDIRECT 32 /* mobile host redirect */ +#define ICMP_IPV6_WHEREAREYOU 33 /* IPv6 where-are-you */ +#define ICMP_IPV6_IAMHERE 34 /* IPv6 i-am-here */ +#define ICMP_MOBILE_REGREQUEST 35 /* mobile registration req */ +#define ICMP_MOBILE_REGREPLY 36 /* mobile registration reply */ +#define ICMP_SKIP 39 /* SKIP */ +#define ICMP_PHOTURIS 40 /* Photuris */ +#define ICMP_PHOTURIS_UNKNOWN_INDEX 1 /* unknown sec index */ +#define ICMP_PHOTURIS_AUTH_FAILED 2 /* auth failed */ +#define ICMP_PHOTURIS_DECRYPT_FAILED 3 /* decrypt failed */ -#define ICMP_MAXTYPE 18 +#define ICMP_MAXTYPE 40 #define ICMP_INFOTYPE(type) \ ((type) == ICMP_ECHOREPLY || (type) == ICMP_ECHO || \ @@ -213,8 +228,9 @@ struct icmp { (type) == ICMP_MASKREQ || (type) == ICMP_MASKREPLY) #ifdef KERNEL_PRIVATE -void icmp_error(struct mbuf *, int, int, n_long, struct ifnet *); +void icmp_error(struct mbuf *, int, int, n_long, u_int32_t); void icmp_input(struct mbuf *, int); -#endif KERNEL_PRIVATE +int ip_next_mtu(int, int); +#endif /* KERNEL_PRIVATE */ #endif diff --git a/bsd/netinet/ip_id.c b/bsd/netinet/ip_id.c index 7f1363a79..7a6fef876 100644 --- a/bsd/netinet/ip_id.c +++ b/bsd/netinet/ip_id.c @@ -83,7 +83,7 @@ static u_int16_t ru_a, ru_b; static u_int16_t ru_g; static u_int16_t ru_counter = 0; static u_int16_t ru_msb = 0; -static long ru_reseed; +static time_t ru_reseed; static u_int32_t tmp; /* Storage for unused random */ static u_int16_t pmod(u_int16_t, u_int16_t, u_int16_t); diff --git a/bsd/netinet/ip_input.c b/bsd/netinet/ip_input.c index 7c603ad9f..a6c47e084 100644 --- a/bsd/netinet/ip_input.c +++ b/bsd/netinet/ip_input.c @@ -81,6 +81,8 @@ #include #include +#include + #include #include @@ -95,6 +97,7 @@ #include #include #include +#include #include #include #include @@ -137,18 +140,31 @@ #include #endif +#if PF +#include +#endif /* PF */ + #if IPSEC extern int ipsec_bypass; extern lck_mtx_t *sadb_mutex; + +lck_grp_t *sadb_stat_mutex_grp; +lck_grp_attr_t *sadb_stat_mutex_grp_attr; +lck_attr_t *sadb_stat_mutex_attr; +lck_mtx_t *sadb_stat_mutex; + #endif int rsvp_on = 0; static int ip_rsvp_on; struct socket *ip_rsvpd; +static int sysctl_ipforwarding SYSCTL_HANDLER_ARGS; + int ipforwarding = 0; -SYSCTL_INT(_net_inet_ip, IPCTL_FORWARDING, forwarding, CTLFLAG_RW, - &ipforwarding, 0, "Enable IP forwarding between interfaces"); +SYSCTL_PROC(_net_inet_ip, IPCTL_FORWARDING, forwarding, + CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED, &ipforwarding, 0, + sysctl_ipforwarding, "I", "Enable IP forwarding between interfaces"); static int ipsendredirects = 1; /* XXX */ SYSCTL_INT(_net_inet_ip, IPCTL_SENDREDIRECTS, redirect, CTLFLAG_RW, @@ -189,11 +205,7 @@ SYSCTL_INT(_net_inet_ip, OID_AUTO, maxfrags, CTLFLAG_RW, static int currentfrags = 0; -#if CONFIG_SCOPEDROUTING int ip_doscopedroute = 1; -#else -int ip_doscopedroute = 0; -#endif SYSCTL_INT(_net_inet_ip, OID_AUTO, scopedroute, CTLFLAG_RW, &ip_doscopedroute, 0, "Enable IPv4 scoped routing"); @@ -224,7 +236,20 @@ extern struct domain inetdomain; extern struct protosw inetsw[]; struct protosw *ip_protox[IPPROTO_MAX]; static int ipqmaxlen = IFQ_MAXLEN; -struct in_ifaddrhead in_ifaddrhead; /* first inet address */ + +static lck_grp_attr_t *in_ifaddr_rwlock_grp_attr; +static lck_grp_t *in_ifaddr_rwlock_grp; +static lck_attr_t *in_ifaddr_rwlock_attr; +lck_rw_t *in_ifaddr_rwlock; + +/* Protected by in_ifaddr_rwlock */ +struct in_ifaddrhead in_ifaddrhead; /* first inet address */ +struct in_ifaddrhashhead *in_ifaddrhashtbl; /* inet addr hash table */ + +#define INADDR_NHASH 61 +static u_int32_t inaddr_nhash; /* hash table size */ +static u_int32_t inaddr_hashp; /* next largest prime */ + struct ifqueue ipintrq; SYSCTL_INT(_net_inet_ip, IPCTL_INTRQMAXLEN, intr_queue_maxlen, CTLFLAG_RW, &ipintrq.ifq_maxlen, 0, "Maximum size of the IP input queue"); @@ -309,10 +334,10 @@ static struct ip_srcrt { struct in_addr route[MAX_IPOPTLEN/sizeof(struct in_addr)]; } ip_srcrt; - +static void in_ifaddrhashtbl_init(void); static void save_rte(u_char *, struct in_addr); -static int ip_dooptions(struct mbuf *, int, struct sockaddr_in *, struct route *ipforward_rt); -static void ip_forward(struct mbuf *, int, struct sockaddr_in *, struct route *ipforward_rt); +static int ip_dooptions(struct mbuf *, int, struct sockaddr_in *); +static void ip_forward(struct mbuf *, int, struct sockaddr_in *); static void ip_freef(struct ipq *); #if IPDIVERT #ifdef IPDIVERT_44 @@ -325,6 +350,8 @@ static struct mbuf *ip_reass(struct mbuf *, #else static struct mbuf *ip_reass(struct mbuf *, struct ipq *, struct ipq *); #endif +static void ip_fwd_route_copyout(struct ifnet *, struct route *); +static void ip_fwd_route_copyin(struct ifnet *, struct route *); void ipintr(void); void in_dinit(void); @@ -336,7 +363,8 @@ SYSCTL_INT(_net_inet_ip, OID_AUTO, random_id, CTLFLAG_RW, &ip_use_randomid, 0, "Randomize IP packets IDs"); #endif -extern u_long route_generation; +#define satosin(sa) ((struct sockaddr_in *)(sa)) +#define ifatoia(ifa) ((struct in_ifaddr *)(ifa)) /* * IP initialization: fill in IP protocol switch table. @@ -349,17 +377,28 @@ ip_init(void) int i; static int ip_initialized = 0; - if (!ip_initialized) { + in_ifaddr_init(); + + in_ifaddr_rwlock_grp_attr = lck_grp_attr_alloc_init(); + in_ifaddr_rwlock_grp = lck_grp_alloc_init("in_ifaddr_rwlock", + in_ifaddr_rwlock_grp_attr); + in_ifaddr_rwlock_attr = lck_attr_alloc_init(); + in_ifaddr_rwlock = lck_rw_alloc_init(in_ifaddr_rwlock_grp, + in_ifaddr_rwlock_attr); + TAILQ_INIT(&in_ifaddrhead); + in_ifaddrhashtbl_init(); + pr = pffindproto_locked(PF_INET, IPPROTO_RAW, SOCK_RAW); if (pr == 0) panic("ip_init"); for (i = 0; i < IPPROTO_MAX; i++) ip_protox[i] = pr; - for (pr = inetdomain.dom_protosw; pr; pr = pr->pr_next) - { if(!((unsigned int)pr->pr_domain)) continue; /* If uninitialized, skip */ + for (pr = inetdomain.dom_protosw; pr; pr = pr->pr_next) { + if (pr->pr_domain == NULL) + continue; /* If uninitialized, skip */ if (pr->pr_domain->dom_family == PF_INET && pr->pr_protocol && pr->pr_protocol != IPPROTO_RAW) ip_protox[pr->pr_protocol] = pr; @@ -405,10 +444,64 @@ ip_init(void) } #endif + arp_init(); + ip_initialized = 1; } } +/* + * Initialize IPv4 source address hash table. + */ +static void +in_ifaddrhashtbl_init(void) +{ + int i, k, p; + + if (in_ifaddrhashtbl != NULL) + return; + + PE_parse_boot_argn("inaddr_nhash", &inaddr_nhash, sizeof (inaddr_nhash)); + if (inaddr_nhash == 0) + inaddr_nhash = INADDR_NHASH; + + MALLOC(in_ifaddrhashtbl, struct in_ifaddrhashhead *, + inaddr_nhash * sizeof (*in_ifaddrhashtbl), + M_IFADDR, M_WAITOK | M_ZERO); + if (in_ifaddrhashtbl == NULL) + panic("in_ifaddrhashtbl_init allocation failed"); + + /* + * Generate the next largest prime greater than inaddr_nhash. + */ + k = (inaddr_nhash % 2 == 0) ? inaddr_nhash + 1 : inaddr_nhash + 2; + for (;;) { + p = 1; + for (i = 3; i * i <= k; i += 2) { + if (k % i == 0) + p = 0; + } + if (p == 1) + break; + k += 2; + } + inaddr_hashp = k; +} + +u_int32_t +inaddr_hashval(u_int32_t key) +{ + /* + * The hash index is the computed prime times the key modulo + * the hash size, as documented in "Introduction to Algorithms" + * (Cormen, Leiserson, Rivest). + */ + if (inaddr_nhash > 1) + return ((key * inaddr_hashp) % inaddr_nhash); + else + return (0); +} + static void ip_proto_input( protocol_family_t __unused protocol, @@ -521,12 +614,6 @@ ip_proto_dispatch_in( } -/* - * ipforward_rt cleared in in_addroute() - * when a new route is successfully created. - */ -static struct sockaddr_in ipaddr = { sizeof(ipaddr), AF_INET , 0 , {0}, {0,0,0,0,0,0,0,0} }; - /* * Ip input routine. Checksum and byte swap header. If fragmented * try to reassemble. Process options. Pass to next level. @@ -546,10 +633,7 @@ ip_input(struct mbuf *m) #endif ipfilter_t inject_filter_ref = 0; struct m_tag *tag; - struct route ipforward_rt; - - bzero(&ipforward_rt, sizeof(struct route)); - + #if IPFIREWALL args.eh = NULL; args.oif = NULL; @@ -557,22 +641,30 @@ ip_input(struct mbuf *m) args.divert_rule = 0; /* divert cookie */ args.next_hop = NULL; + /* + * Don't bother searching for tag(s) if there's none. + */ + if (SLIST_EMPTY(&m->m_pkthdr.tags)) + goto ipfw_tags_done; + /* Grab info from mtags prepended to the chain */ #if DUMMYNET - if ((tag = m_tag_locate(m, KERNEL_MODULE_TAG_ID, KERNEL_TAG_TYPE_DUMMYNET, NULL)) != NULL) { + if ((tag = m_tag_locate(m, KERNEL_MODULE_TAG_ID, + KERNEL_TAG_TYPE_DUMMYNET, NULL)) != NULL) { struct dn_pkt_tag *dn_tag; - + dn_tag = (struct dn_pkt_tag *)(tag+1); args.rule = dn_tag->rule; - + m_tag_delete(m, tag); } #endif /* DUMMYNET */ #if IPDIVERT - if ((tag = m_tag_locate(m, KERNEL_MODULE_TAG_ID, KERNEL_TAG_TYPE_DIVERT, NULL)) != NULL) { + if ((tag = m_tag_locate(m, KERNEL_MODULE_TAG_ID, + KERNEL_TAG_TYPE_DIVERT, NULL)) != NULL) { struct divert_tag *div_tag; - + div_tag = (struct divert_tag *)(tag+1); args.divert_rule = div_tag->cookie; @@ -580,35 +672,35 @@ ip_input(struct mbuf *m) } #endif - if ((tag = m_tag_locate(m, KERNEL_MODULE_TAG_ID, KERNEL_TAG_TYPE_IPFORWARD, NULL)) != NULL) { + if ((tag = m_tag_locate(m, KERNEL_MODULE_TAG_ID, + KERNEL_TAG_TYPE_IPFORWARD, NULL)) != NULL) { struct ip_fwd_tag *ipfwd_tag; - + ipfwd_tag = (struct ip_fwd_tag *)(tag+1); args.next_hop = ipfwd_tag->next_hop; m_tag_delete(m, tag); } - + #if DIAGNOSTIC if (m == NULL || (m->m_flags & M_PKTHDR) == 0) panic("ip_input no HDR"); #endif -#if DUMMYNET if (args.rule) { /* dummynet already filtered us */ - ip = mtod(m, struct ip *); - hlen = IP_VHL_HL(ip->ip_vhl) << 2; - inject_filter_ref = ipf_get_inject_filter(m); - goto iphack ; + ip = mtod(m, struct ip *); + hlen = IP_VHL_HL(ip->ip_vhl) << 2; + inject_filter_ref = ipf_get_inject_filter(m); + goto iphack ; } -#endif /* DUMMYNET */ +ipfw_tags_done: #endif /* IPFIREWALL */ - + /* - * No need to proccess packet twice if we've - * already seen it + * No need to proccess packet twice if we've already seen it. */ - inject_filter_ref = ipf_get_inject_filter(m); + if (!SLIST_EMPTY(&m->m_pkthdr.tags)) + inject_filter_ref = ipf_get_inject_filter(m); if (inject_filter_ref != 0) { ip = mtod(m, struct ip *); hlen = IP_VHL_HL(ip->ip_vhl) << 2; @@ -618,14 +710,14 @@ ip_input(struct mbuf *m) return; } - OSAddAtomic(1, (SInt32*)&ipstat.ips_total); + OSAddAtomic(1, &ipstat.ips_total); if (m->m_pkthdr.len < sizeof(struct ip)) goto tooshort; if (m->m_len < sizeof (struct ip) && (m = m_pullup(m, sizeof (struct ip))) == 0) { - OSAddAtomic(1, (SInt32*)&ipstat.ips_toosmall); + OSAddAtomic(1, &ipstat.ips_toosmall); return; } ip = mtod(m, struct ip *); @@ -634,18 +726,18 @@ ip_input(struct mbuf *m) ip->ip_src.s_addr, ip->ip_p, ip->ip_off, ip->ip_len); if (IP_VHL_V(ip->ip_vhl) != IPVERSION) { - OSAddAtomic(1, (SInt32*)&ipstat.ips_badvers); + OSAddAtomic(1, &ipstat.ips_badvers); goto bad; } hlen = IP_VHL_HL(ip->ip_vhl) << 2; if (hlen < sizeof(struct ip)) { /* minimum header length */ - OSAddAtomic(1, (SInt32*)&ipstat.ips_badhlen); + OSAddAtomic(1, &ipstat.ips_badhlen); goto bad; } if (hlen > m->m_len) { if ((m = m_pullup(m, hlen)) == 0) { - OSAddAtomic(1, (SInt32*)&ipstat.ips_badhlen); + OSAddAtomic(1, &ipstat.ips_badhlen); return; } ip = mtod(m, struct ip *); @@ -655,7 +747,7 @@ ip_input(struct mbuf *m) if ((ntohl(ip->ip_dst.s_addr) >> IN_CLASSA_NSHIFT) == IN_LOOPBACKNET || (ntohl(ip->ip_src.s_addr) >> IN_CLASSA_NSHIFT) == IN_LOOPBACKNET) { if ((m->m_pkthdr.rcvif->if_flags & IFF_LOOPBACK) == 0) { - OSAddAtomic(1, (SInt32*)&ipstat.ips_badaddr); + OSAddAtomic(1, &ipstat.ips_badaddr); goto bad; } } @@ -665,7 +757,7 @@ ip_input(struct mbuf *m) IN_LINKLOCAL(ntohl(ip->ip_src.s_addr)))) { ip_linklocal_stat.iplls_in_total++; if (ip->ip_ttl != MAXTTL) { - OSAddAtomic(1, (SInt32*)&ip_linklocal_stat.iplls_in_badttl); + OSAddAtomic(1, &ip_linklocal_stat.iplls_in_badttl); /* Silently drop link local traffic with bad TTL */ if (!ip_linklocal_in_allowbadttl) goto bad; @@ -702,20 +794,25 @@ ip_input(struct mbuf *m) m->m_pkthdr.csum_data = 0xffff; } if (sum) { - OSAddAtomic(1, (SInt32*)&ipstat.ips_badsum); + OSAddAtomic(1, &ipstat.ips_badsum); goto bad; } /* * Convert fields to host representation. */ +#if BYTE_ORDER != BIG_ENDIAN NTOHS(ip->ip_len); +#endif + if (ip->ip_len < hlen) { - OSAddAtomic(1, (SInt32*)&ipstat.ips_badlen); + OSAddAtomic(1, &ipstat.ips_badlen); goto bad; } - NTOHS(ip->ip_off); +#if BYTE_ORDER != BIG_ENDIAN + NTOHS(ip->ip_off); +#endif /* * Check that the amount of data in the buffers * is as at least much as the IP header would have us expect. @@ -724,7 +821,7 @@ ip_input(struct mbuf *m) */ if (m->m_pkthdr.len < ip->ip_len) { tooshort: - OSAddAtomic(1, (SInt32*)&ipstat.ips_tooshort); + OSAddAtomic(1, &ipstat.ips_tooshort); goto bad; } if (m->m_pkthdr.len > ip->ip_len) { @@ -755,6 +852,19 @@ ip_input(struct mbuf *m) * - Wrap: fake packet's addr/port * - Encapsulate: put it in another IP and send out. */ +#if PF + /* Invoke inbound packet filter */ + if (pf_af_hook(m->m_pkthdr.rcvif, NULL, &m, AF_INET, TRUE) != 0) { + if (m != NULL) { + panic("%s: unexpected packet %p\n", __func__, m); + /* NOTREACHED */ + } + /* Already freed by callee */ + return; + } + ip = mtod(m, struct ip *); + hlen = IP_VHL_HL(ip->ip_vhl) << 2; +#endif /* PF */ #if IPFIREWALL #if DUMMYNET @@ -833,9 +943,9 @@ ip_input(struct mbuf *m) */ ip_nhops = 0; /* for source routed packets */ #if IPFIREWALL - if (hlen > sizeof (struct ip) && ip_dooptions(m, 0, args.next_hop, &ipforward_rt)) { + if (hlen > sizeof (struct ip) && ip_dooptions(m, 0, args.next_hop)) { #else - if (hlen > sizeof (struct ip) && ip_dooptions(m, 0, NULL, &ipforward_rt)) { + if (hlen > sizeof (struct ip) && ip_dooptions(m, 0, NULL)) { #endif return; } @@ -892,15 +1002,11 @@ ip_input(struct mbuf *m) ; #endif - lck_mtx_lock(rt_mtx); - TAILQ_FOREACH(ia, &in_ifaddrhead, ia_link) { -#define satosin(sa) ((struct sockaddr_in *)(sa)) - - if (IA_SIN(ia)->sin_addr.s_addr == INADDR_ANY) { - lck_mtx_unlock(rt_mtx); - goto ours; - } - + /* + * Check for exact addresses in the hash bucket. + */ + lck_rw_lock_shared(in_ifaddr_rwlock); + TAILQ_FOREACH(ia, INADDR_HASH(pkt_dst.s_addr), ia_hash) { /* * If the address matches, verify that the packet * arrived via the correct interface if checking is @@ -908,32 +1014,42 @@ ip_input(struct mbuf *m) */ if (IA_SIN(ia)->sin_addr.s_addr == pkt_dst.s_addr && (!checkif || ia->ia_ifp == m->m_pkthdr.rcvif)) { - lck_mtx_unlock(rt_mtx); + lck_rw_done(in_ifaddr_rwlock); goto ours; } - /* - * Only accept broadcast packets that arrive via the - * matching interface. Reception of forwarded directed - * broadcasts would be handled via ip_forward() and - * ether_output() with the loopback into the stack for - * SIMPLEX interfaces handled by ether_output(). - */ - if ((!checkif || ia->ia_ifp == m->m_pkthdr.rcvif) && - ia->ia_ifp && ia->ia_ifp->if_flags & IFF_BROADCAST) { + } + lck_rw_done(in_ifaddr_rwlock); + + /* + * Check for broadcast addresses. + * + * Only accept broadcast packets that arrive via the matching + * interface. Reception of forwarded directed broadcasts would be + * handled via ip_forward() and ether_frameout() with the loopback + * into the stack for SIMPLEX interfaces handled by ether_frameout(). + */ + if (m->m_pkthdr.rcvif->if_flags & IFF_BROADCAST) { + struct ifaddr *ifa; + struct ifnet *ifp = m->m_pkthdr.rcvif; + + ifnet_lock_shared(ifp); + TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) { + if (ifa->ifa_addr->sa_family != AF_INET) + continue; + ia = ifatoia(ifa); if (satosin(&ia->ia_broadaddr)->sin_addr.s_addr == + pkt_dst.s_addr || ia->ia_netbroadcast.s_addr == pkt_dst.s_addr) { - lck_mtx_unlock(rt_mtx); - goto ours; - } - if (ia->ia_netbroadcast.s_addr == pkt_dst.s_addr) { - lck_mtx_unlock(rt_mtx); + ifnet_lock_done(ifp); goto ours; } } + ifnet_lock_done(ifp); } - lck_mtx_unlock(rt_mtx); + if (IN_MULTICAST(ntohl(ip->ip_dst.s_addr))) { struct in_multi *inm; + struct ifnet *ifp = m->m_pkthdr.rcvif; #if MROUTING if (ip_mrouter) { /* @@ -945,9 +1061,8 @@ ip_input(struct mbuf *m) * must be discarded, else it may be accepted below. */ lck_mtx_lock(ip_mutex); - if (ip_mforward && - ip_mforward(ip, m->m_pkthdr.rcvif, m, 0) != 0) { - OSAddAtomic(1, (SInt32*)&ipstat.ips_cantforward); + if (ip_mforward && ip_mforward(ip, ifp, m, 0) != 0) { + OSAddAtomic(1, &ipstat.ips_cantforward); m_freem(m); lck_mtx_unlock(ip_mutex); return; @@ -960,22 +1075,24 @@ ip_input(struct mbuf *m) */ if (ip->ip_p == IPPROTO_IGMP) goto ours; - OSAddAtomic(1, (SInt32*)&ipstat.ips_forward); + OSAddAtomic(1, &ipstat.ips_forward); } #endif /* MROUTING */ /* * See if we belong to the destination multicast group on the * arrival interface. */ - IN_LOOKUP_MULTI(ip->ip_dst, m->m_pkthdr.rcvif, inm); + ifnet_lock_shared(ifp); + IN_LOOKUP_MULTI(ip->ip_dst, ifp, inm); + ifnet_lock_done(ifp); if (inm == NULL) { - OSAddAtomic(1, (SInt32*)&ipstat.ips_notmember); + OSAddAtomic(1, &ipstat.ips_notmember); m_freem(m); return; } goto ours; } - if (ip->ip_dst.s_addr == (u_long)INADDR_BROADCAST) + if (ip->ip_dst.s_addr == (u_int32_t)INADDR_BROADCAST) goto ours; if (ip->ip_dst.s_addr == INADDR_ANY) goto ours; @@ -988,7 +1105,7 @@ ip_input(struct mbuf *m) struct udpiphdr *ui; if (m->m_len < sizeof(struct udpiphdr) && (m = m_pullup(m, sizeof(struct udpiphdr))) == 0) { - OSAddAtomic(1, (SInt32*)&udpstat.udps_hdrops); + OSAddAtomic(1, &udpstat.udps_hdrops); return; } ui = mtod(m, struct udpiphdr *); @@ -1015,18 +1132,14 @@ ip_input(struct mbuf *m) * Not for us; forward if possible and desirable. */ if (ipforwarding == 0) { - OSAddAtomic(1, (SInt32*)&ipstat.ips_cantforward); + OSAddAtomic(1, &ipstat.ips_cantforward); m_freem(m); } else { #if IPFIREWALL - ip_forward(m, 0, args.next_hop, &ipforward_rt); + ip_forward(m, 0, args.next_hop); #else - ip_forward(m, 0, NULL, &ipforward_rt); + ip_forward(m, 0, NULL); #endif - if (ipforward_rt.ro_rt != NULL) { - rtfree(ipforward_rt.ro_rt); - ipforward_rt.ro_rt = NULL; - } } return; @@ -1043,8 +1156,8 @@ ip_input(struct mbuf *m) /* If maxnipq is 0, never accept fragments. */ if (maxnipq == 0) { - OSAddAtomic(1, (SInt32*)&ipstat.ips_fragments); - OSAddAtomic(1, (SInt32*)&ipstat.ips_fragdropped); + OSAddAtomic(1, &ipstat.ips_fragments); + OSAddAtomic(1, &ipstat.ips_fragdropped); goto bad; } @@ -1055,7 +1168,7 @@ ip_input(struct mbuf *m) lck_mtx_lock(ip_mutex); if (currentfrags >= maxfrags) { fp = TAILQ_LAST(&ipq_list, ipq_list); - OSAddAtomic(fp->ipq_nfrags, (SInt32*)&ipstat.ips_fragtimeout); + OSAddAtomic(fp->ipq_nfrags, &ipstat.ips_fragtimeout); if (ip->ip_id == fp->ipq_id && ip->ip_src.s_addr == fp->ipq_src.s_addr && @@ -1065,7 +1178,7 @@ ip_input(struct mbuf *m) * If we match the fragment queue we were going to * discard, drop this packet too. */ - OSAddAtomic(1, (SInt32*)&ipstat.ips_fragdropped); + OSAddAtomic(1, &ipstat.ips_fragdropped); ip_freef(fp); lck_mtx_unlock(ip_mutex); goto bad; @@ -1099,7 +1212,7 @@ ip_input(struct mbuf *m) * drop the oldest fragment before proceeding further */ fp = TAILQ_LAST(&ipq_list, ipq_list); - OSAddAtomic(fp->ipq_nfrags, (SInt32*)&ipstat.ips_fragtimeout); + OSAddAtomic(fp->ipq_nfrags, &ipstat.ips_fragtimeout); ip_freef(fp); } @@ -1117,7 +1230,7 @@ ip_input(struct mbuf *m) * that's a non-zero multiple of 8 bytes. */ if (ip->ip_len == 0 || (ip->ip_len & 0x7) != 0) { - OSAddAtomic(1, (SInt32*)&ipstat.ips_toosmall); + OSAddAtomic(1, &ipstat.ips_toosmall); lck_mtx_unlock(ip_mutex); goto bad; } @@ -1133,11 +1246,11 @@ ip_input(struct mbuf *m) * ip_reass() will return a different mbuf, and update * the divert info in div_info and args.divert_rule. */ - OSAddAtomic(1, (SInt32*)&ipstat.ips_fragments); + OSAddAtomic(1, &ipstat.ips_fragments); m->m_pkthdr.header = ip; #if IPDIVERT - m = ip_reass(m, - fp, &ipq[sum], &div_info, &args.divert_rule); + m = ip_reass(m, fp, &ipq[sum], + (u_int16_t *)&div_info, &args.divert_rule); #else m = ip_reass(m, fp, &ipq[sum]); #endif @@ -1145,20 +1258,29 @@ ip_input(struct mbuf *m) lck_mtx_unlock(ip_mutex); return; } - OSAddAtomic(1, (SInt32*)&ipstat.ips_reassembled); + OSAddAtomic(1, &ipstat.ips_reassembled); ip = mtod(m, struct ip *); /* Get the header length of the reassembled packet */ hlen = IP_VHL_HL(ip->ip_vhl) << 2; + #if IPDIVERT /* Restore original checksum before diverting packet */ if (div_info != 0) { ip->ip_len += hlen; + +#if BYTE_ORDER != BIG_ENDIAN HTONS(ip->ip_len); HTONS(ip->ip_off); +#endif + ip->ip_sum = 0; ip->ip_sum = in_cksum(m, hlen); + +#if BYTE_ORDER != BIG_ENDIAN NTOHS(ip->ip_off); NTOHS(ip->ip_len); +#endif + ip->ip_len -= hlen; } #endif @@ -1182,11 +1304,13 @@ ip_input(struct mbuf *m) /* Restore packet header fields to original values */ ip->ip_len += hlen; + +#if BYTE_ORDER != BIG_ENDIAN HTONS(ip->ip_len); HTONS(ip->ip_off); - +#endif /* Deliver packet to divert input routine */ - OSAddAtomic(1, (SInt32*)&ipstat.ips_delivered); + OSAddAtomic(1, &ipstat.ips_delivered); divert_packet(m, 1, div_info & 0xffff, args.divert_rule); /* If 'tee', continue with original packet */ @@ -1215,7 +1339,7 @@ ip_input(struct mbuf *m) /* * Switch out to protocol's input routine. */ - OSAddAtomic(1, (SInt32*)&ipstat.ips_delivered); + OSAddAtomic(1, &ipstat.ips_delivered); { #if IPFIREWALL if (args.next_hop && ip->ip_p == IPPROTO_TCP) { @@ -1223,8 +1347,9 @@ ip_input(struct mbuf *m) struct m_tag *fwd_tag; struct ip_fwd_tag *ipfwd_tag; - fwd_tag = m_tag_alloc(KERNEL_MODULE_TAG_ID, KERNEL_TAG_TYPE_IPFORWARD, - sizeof(struct sockaddr_in), M_NOWAIT); + fwd_tag = m_tag_alloc(KERNEL_MODULE_TAG_ID, + KERNEL_TAG_TYPE_IPFORWARD, sizeof (*ipfwd_tag), + M_NOWAIT); if (fwd_tag == NULL) { goto bad; } @@ -1406,7 +1531,7 @@ ip_reass(struct mbuf *m, struct ipq *fp, struct ipq *where) } nq = q->m_nextpkt; m->m_nextpkt = nq; - OSAddAtomic(1, (SInt32*)&ipstat.ips_fragdropped); + OSAddAtomic(1, &ipstat.ips_fragdropped); fp->ipq_nfrags--; m_freem(q); } @@ -1445,7 +1570,7 @@ ip_reass(struct mbuf *m, struct ipq *fp, struct ipq *where) for (p = NULL, q = fp->ipq_frags; q; p = q, q = q->m_nextpkt) { if (GETIP(q)->ip_off != next) { if (fp->ipq_nfrags > maxfragsperpacket) { - OSAddAtomic(fp->ipq_nfrags, (SInt32*)&ipstat.ips_fragdropped); + OSAddAtomic(fp->ipq_nfrags, &ipstat.ips_fragdropped); ip_freef(fp); } return (0); @@ -1455,7 +1580,7 @@ ip_reass(struct mbuf *m, struct ipq *fp, struct ipq *where) /* Make sure the last packet didn't have the IP_MF flag */ if (p->m_flags & M_FRAG) { if (fp->ipq_nfrags > maxfragsperpacket) { - OSAddAtomic(fp->ipq_nfrags, (SInt32*)&ipstat.ips_fragdropped); + OSAddAtomic(fp->ipq_nfrags, &ipstat.ips_fragdropped); ip_freef(fp); } return (0); @@ -1467,8 +1592,8 @@ ip_reass(struct mbuf *m, struct ipq *fp, struct ipq *where) q = fp->ipq_frags; ip = GETIP(q); if (next + (IP_VHL_HL(ip->ip_vhl) << 2) > IP_MAXPACKET) { - OSAddAtomic(1, (SInt32*)&ipstat.ips_toolong); - OSAddAtomic(fp->ipq_nfrags, (SInt32*)&ipstat.ips_fragdropped); + OSAddAtomic(1, &ipstat.ips_toolong); + OSAddAtomic(fp->ipq_nfrags, &ipstat.ips_fragdropped); ip_freef(fp); return (0); } @@ -1540,7 +1665,7 @@ ip_reass(struct mbuf *m, struct ipq *fp, struct ipq *where) *divinfo = 0; *divcookie = 0; #endif - OSAddAtomic(1, (SInt32*)&ipstat.ips_fragdropped); + OSAddAtomic(1, &ipstat.ips_fragdropped); if (fp != 0) fp->ipq_nfrags--; m_freem(m); @@ -1584,7 +1709,7 @@ ip_slowtimo(void) --fp->ipq_ttl; fp = fp->next; if (fp->prev->ipq_ttl == 0) { - OSAddAtomic(fp->ipq_nfrags, (SInt32*)&ipstat.ips_fragtimeout); + OSAddAtomic(fp->ipq_nfrags, &ipstat.ips_fragtimeout); ip_freef(fp->prev); } } @@ -1598,12 +1723,14 @@ ip_slowtimo(void) for (i = 0; i < IPREASS_NHASH; i++) { while (nipq > maxnipq && (ipq[i].next != &ipq[i])) { - OSAddAtomic(ipq[i].next->ipq_nfrags, (SInt32*)&ipstat.ips_fragdropped); + OSAddAtomic(ipq[i].next->ipq_nfrags, &ipstat.ips_fragdropped); ip_freef(ipq[i].next); } } } +#if IPFLOW ipflow_slowtimo(); +#endif lck_mtx_unlock(ip_mutex); } @@ -1618,7 +1745,7 @@ ip_drain(void) lck_mtx_lock(ip_mutex); for (i = 0; i < IPREASS_NHASH; i++) { while (ipq[i].next != &ipq[i]) { - OSAddAtomic(ipq[i].next->ipq_nfrags, (SInt32*)&ipstat.ips_fragdropped); + OSAddAtomic(ipq[i].next->ipq_nfrags, &ipstat.ips_fragdropped); ip_freef(ipq[i].next); } } @@ -1639,7 +1766,7 @@ ip_drain(void) * 0 if the packet should be processed further. */ static int -ip_dooptions(struct mbuf *m, __unused int pass, struct sockaddr_in *next_hop, struct route *ipforward_rt) +ip_dooptions(struct mbuf *m, __unused int pass, struct sockaddr_in *next_hop) { struct ip *ip = mtod(m, struct ip *); u_char *cp; @@ -1648,6 +1775,8 @@ ip_dooptions(struct mbuf *m, __unused int pass, struct sockaddr_in *next_hop, st int opt, optlen, cnt, off, code, type = ICMP_PARAMPROB, forward = 0; struct in_addr *sin, dst; n_time ntime; + struct sockaddr_in ipaddr = { + sizeof (ipaddr), AF_INET , 0 , { 0 }, { 0, } }; dst = ip->ip_dst; cp = (u_char *)(ip + 1); @@ -1744,7 +1873,7 @@ ip_dooptions(struct mbuf *m, __unused int pass, struct sockaddr_in *next_hop, st /* * Not acting as a router, so silently drop. */ - OSAddAtomic(1, (SInt32*)&ipstat.ips_cantforward); + OSAddAtomic(1, &ipstat.ips_cantforward); m_freem(m); return (1); } @@ -1763,7 +1892,7 @@ ip_dooptions(struct mbuf *m, __unused int pass, struct sockaddr_in *next_hop, st ia = (INA)ifa_ifwithnet((SA)&ipaddr); } } else { - ia = ip_rtaddr(ipaddr.sin_addr, ipforward_rt); + ia = ip_rtaddr(ipaddr.sin_addr); } if (ia == 0) { type = ICMP_UNREACH; @@ -1804,7 +1933,7 @@ ip_dooptions(struct mbuf *m, __unused int pass, struct sockaddr_in *next_hop, st * use the incoming interface (should be same). */ if ((ia = (INA)ifa_ifwithaddr((SA)&ipaddr)) == 0) { - if ((ia = ip_rtaddr(ipaddr.sin_addr, ipforward_rt)) == 0) { + if ((ia = ip_rtaddr(ipaddr.sin_addr)) == 0) { type = ICMP_UNREACH; code = ICMP_UNREACH_HOST; goto bad; @@ -1891,18 +2020,14 @@ ip_dooptions(struct mbuf *m, __unused int pass, struct sockaddr_in *next_hop, st } } if (forward && ipforwarding) { - ip_forward(m, 1, next_hop, ipforward_rt); - if (ipforward_rt->ro_rt != NULL) { - rtfree(ipforward_rt->ro_rt); - ipforward_rt->ro_rt = NULL; - } + ip_forward(m, 1, next_hop); return (1); } return (0); bad: ip->ip_len -= IP_VHL_HL(ip->ip_vhl) << 2; /* XXX icmp_error adds in hdr length */ icmp_error(m, type, code, 0, 0); - OSAddAtomic(1, (SInt32*)&ipstat.ips_badoptions); + OSAddAtomic(1, &ipstat.ips_badoptions); return (1); } @@ -1911,34 +2036,29 @@ ip_dooptions(struct mbuf *m, __unused int pass, struct sockaddr_in *next_hop, st * return internet address info of interface to be used to get there. */ struct in_ifaddr * -ip_rtaddr(struct in_addr dst, struct route *rt) +ip_rtaddr(struct in_addr dst) { struct sockaddr_in *sin; - - sin = (struct sockaddr_in *)&rt->ro_dst; - - lck_mtx_lock(rt_mtx); - if (rt->ro_rt == 0 || dst.s_addr != sin->sin_addr.s_addr || - rt->ro_rt->generation_id != route_generation) { - if (rt->ro_rt) { - rtfree_locked(rt->ro_rt); - rt->ro_rt = 0; - } - sin->sin_family = AF_INET; - sin->sin_len = sizeof(*sin); - sin->sin_addr = dst; - - rtalloc_ign_locked(rt, RTF_PRCLONING); - } - if (rt->ro_rt == 0) { - lck_mtx_unlock(rt_mtx); - return ((struct in_ifaddr *)0); - } - - if (rt->ro_rt->rt_ifa) - ifaref(rt->ro_rt->rt_ifa); - lck_mtx_unlock(rt_mtx); - return ((struct in_ifaddr *) rt->ro_rt->rt_ifa); + struct ifaddr *rt_ifa; + struct route ro; + + bzero(&ro, sizeof (ro)); + sin = (struct sockaddr_in *)&ro.ro_dst; + sin->sin_family = AF_INET; + sin->sin_len = sizeof (*sin); + sin->sin_addr = dst; + + rtalloc_ign(&ro, RTF_PRCLONING); + if (ro.ro_rt == NULL) + return (NULL); + + RT_LOCK(ro.ro_rt); + if ((rt_ifa = ro.ro_rt->rt_ifa) != NULL) + ifaref(rt_ifa); + RT_UNLOCK(ro.ro_rt); + rtfree(ro.ro_rt); + + return ((struct in_ifaddr *)rt_ifa); } /* @@ -1996,7 +2116,7 @@ ip_srcroute(void) *(mtod(m, struct in_addr *)) = *p--; #if DIAGNOSTIC if (ipprintfs) - printf(" hops %lx", (u_long)ntohl(mtod(m, struct in_addr *)->s_addr)); + printf(" hops %lx", (u_int32_t)ntohl(mtod(m, struct in_addr *)->s_addr)); #endif /* @@ -2016,7 +2136,7 @@ ip_srcroute(void) while (p >= ip_srcrt.route) { #if DIAGNOSTIC if (ipprintfs) - printf(" %lx", (u_long)ntohl(q->s_addr)); + printf(" %lx", (u_int32_t)ntohl(q->s_addr)); #endif *q++ = *p--; } @@ -2026,7 +2146,7 @@ ip_srcroute(void) *q = ip_srcrt.dst; #if DIAGNOSTIC if (ipprintfs) - printf(" %lx\n", (u_long)ntohl(q->s_addr)); + printf(" %lx\n", (u_int32_t)ntohl(q->s_addr)); #endif return (m); } @@ -2065,6 +2185,100 @@ u_char inetctlerrmap[PRC_NCMDS] = { ENOPROTOOPT, ECONNREFUSED }; +static int +sysctl_ipforwarding SYSCTL_HANDLER_ARGS +{ +#pragma unused(arg1, arg2) + int i, was_ipforwarding = ipforwarding; + + i = sysctl_handle_int(oidp, oidp->oid_arg1, oidp->oid_arg2, req); + if (i != 0 || req->newptr == USER_ADDR_NULL) + return (i); + + if (was_ipforwarding && !ipforwarding) { + /* clean up IPv4 forwarding cached routes */ + ifnet_head_lock_shared(); + for (i = 0; i <= if_index; i++) { + struct ifnet *ifp = ifindex2ifnet[i]; + if (ifp != NULL) { + lck_mtx_lock(ifp->if_fwd_route_lock); + if (ifp->if_fwd_route.ro_rt != NULL) { + rtfree(ifp->if_fwd_route.ro_rt); + ifp->if_fwd_route.ro_rt = NULL; + } + lck_mtx_unlock(ifp->if_fwd_route_lock); + } + } + ifnet_head_done(); + } + + return (0); +} + +/* + * Similar to inp_route_{copyout,copyin} routines except that these copy + * out the cached IPv4 forwarding route from struct ifnet instead of the + * inpcb. See comments for those routines for explanations. + */ +static void +ip_fwd_route_copyout(struct ifnet *ifp, struct route *dst) +{ + struct route *src = &ifp->if_fwd_route; + + lck_mtx_lock(ifp->if_fwd_route_lock); + + /* Minor sanity check */ + if (src->ro_rt != NULL && rt_key(src->ro_rt)->sa_family != AF_INET) + panic("%s: wrong or corrupted route: %p", __func__, src); + + /* Copy everything (rt, dst, flags) from ifnet */ + bcopy(src, dst, sizeof (*dst)); + + /* Hold one reference for the local copy of struct route */ + if (dst->ro_rt != NULL) + RT_ADDREF(dst->ro_rt); + + lck_mtx_unlock(ifp->if_fwd_route_lock); +} + +static void +ip_fwd_route_copyin(struct ifnet *ifp, struct route *src) +{ + struct route *dst = &ifp->if_fwd_route; + + lck_mtx_lock(ifp->if_fwd_route_lock); + + /* Minor sanity check */ + if (src->ro_rt != NULL && rt_key(src->ro_rt)->sa_family != AF_INET) + panic("%s: wrong or corrupted route: %p", __func__, src); + + /* No cached route in the ifnet? */ + if (dst->ro_rt == NULL) { + /* + * Copy everything (rt, dst, flags) from ip_forward(); + * the reference to the route was held at the time + * it was allocated and is kept intact. + */ + bcopy(src, dst, sizeof (*dst)); + } else if (src->ro_rt != NULL) { + /* + * If the same, update just the ro_flags and ditch the one + * in the local copy. Else ditch the one that is currently + * cached, and cache what we got back from ip_output(). + */ + if (dst->ro_rt == src->ro_rt) { + dst->ro_flags = src->ro_flags; + rtfree(src->ro_rt); + src->ro_rt = NULL; + } else { + rtfree(dst->ro_rt); + bcopy(src, dst, sizeof (*dst)); + } + } + + lck_mtx_unlock(ifp->if_fwd_route_lock); +} + /* * Forward a packet. If some error occurs return the sender * an icmp packet. Note we can't always generate a meaningful @@ -2080,37 +2294,46 @@ u_char inetctlerrmap[PRC_NCMDS] = { * via a source route. */ static void -ip_forward(struct mbuf *m, int srcrt, struct sockaddr_in *next_hop, struct route *ipforward_rt) +ip_forward(struct mbuf *m, int srcrt, struct sockaddr_in *next_hop) { +#if !IPFIREWALL +#pragma unused(next_hop) +#endif struct ip *ip = mtod(m, struct ip *); struct sockaddr_in *sin; struct rtentry *rt; + struct route fwd_rt; int error, type = 0, code = 0; struct mbuf *mcopy; n_long dest; struct in_addr pkt_dst; - struct ifnet *destifp; -#if IPSEC - struct ifnet dummyifp; -#endif + u_int32_t nextmtu = 0; + struct ip_out_args ipoa = { IFSCOPE_NONE }; + struct ifnet *ifp = m->m_pkthdr.rcvif; +#if PF + struct pf_mtag *pf_mtag; +#endif /* PF */ dest = 0; +#if IPFIREWALL /* * Cache the destination address of the packet; this may be * changed by use of 'ipfw fwd'. */ pkt_dst = next_hop ? next_hop->sin_addr : ip->ip_dst; +#else + pkt_dst = ip->ip_dst; +#endif #if DIAGNOSTIC if (ipprintfs) printf("forward: src %lx dst %lx ttl %x\n", - (u_long)ip->ip_src.s_addr, (u_long)pkt_dst.s_addr, + (u_int32_t)ip->ip_src.s_addr, (u_int32_t)pkt_dst.s_addr, ip->ip_ttl); #endif - if (m->m_flags & (M_BCAST|M_MCAST) || in_canforward(pkt_dst) == 0) { - OSAddAtomic(1, (SInt32*)&ipstat.ips_cantforward); + OSAddAtomic(1, &ipstat.ips_cantforward); m_freem(m); return; } @@ -2126,25 +2349,33 @@ ip_forward(struct mbuf *m, int srcrt, struct sockaddr_in *next_hop, struct route } #endif - sin = (struct sockaddr_in *)&ipforward_rt->ro_dst; - if ((rt = ipforward_rt->ro_rt) == 0 || - pkt_dst.s_addr != sin->sin_addr.s_addr || - ipforward_rt->ro_rt->generation_id != route_generation) { - if (ipforward_rt->ro_rt) { - rtfree(ipforward_rt->ro_rt); - ipforward_rt->ro_rt = 0; +#if PF + pf_mtag = pf_find_mtag(m); + if (pf_mtag != NULL && pf_mtag->rtableid != IFSCOPE_NONE) + ipoa.ipoa_ifscope = pf_mtag->rtableid; +#endif /* PF */ + + ip_fwd_route_copyout(ifp, &fwd_rt); + + sin = (struct sockaddr_in *)&fwd_rt.ro_dst; + if (fwd_rt.ro_rt == NULL || + fwd_rt.ro_rt->generation_id != route_generation || + pkt_dst.s_addr != sin->sin_addr.s_addr) { + if (fwd_rt.ro_rt != NULL) { + rtfree(fwd_rt.ro_rt); + fwd_rt.ro_rt = NULL; } sin->sin_family = AF_INET; - sin->sin_len = sizeof(*sin); + sin->sin_len = sizeof (*sin); sin->sin_addr = pkt_dst; - rtalloc_ign(ipforward_rt, RTF_PRCLONING); - if (ipforward_rt->ro_rt == 0) { + rtalloc_scoped_ign(&fwd_rt, RTF_PRCLONING, ipoa.ipoa_ifscope); + if (fwd_rt.ro_rt == NULL) { icmp_error(m, ICMP_UNREACH, ICMP_UNREACH_HOST, dest, 0); - return; + goto done; } - rt = ipforward_rt->ro_rt; } + rt = fwd_rt.ro_rt; /* * Save the IP header and at most 8 bytes of the payload, @@ -2179,13 +2410,13 @@ ip_forward(struct mbuf *m, int srcrt, struct sockaddr_in *next_hop, struct route * Also, don't send redirect if forwarding using a default route * or a route modified by a redirect. */ -#define satosin(sa) ((struct sockaddr_in *)(sa)) + RT_LOCK_SPIN(rt); if (rt->rt_ifp == m->m_pkthdr.rcvif && (rt->rt_flags & (RTF_DYNAMIC|RTF_MODIFIED)) == 0 && satosin(rt_key(rt))->sin_addr.s_addr != 0 && ipsendredirects && !srcrt) { #define RTA(rt) ((struct in_ifaddr *)(rt->rt_ifa)) - u_long src = ntohl(ip->ip_src.s_addr); + u_int32_t src = ntohl(ip->ip_src.s_addr); if (RTA(rt) && (src & RTA(rt)->ia_subnetmask) == RTA(rt)->ia_subnet) { @@ -2198,50 +2429,63 @@ ip_forward(struct mbuf *m, int srcrt, struct sockaddr_in *next_hop, struct route code = ICMP_REDIRECT_HOST; #if DIAGNOSTIC if (ipprintfs) - printf("redirect (%d) to %lx\n", code, (u_long)dest); + printf("redirect (%d) to %lx\n", code, (u_int32_t)dest); #endif } } + RT_UNLOCK(rt); - { +#if IPFIREWALL if (next_hop) { /* Pass IPFORWARD info if available */ struct m_tag *tag; struct ip_fwd_tag *ipfwd_tag; - - tag = m_tag_alloc(KERNEL_MODULE_TAG_ID, KERNEL_TAG_TYPE_IPFORWARD, - sizeof(struct sockaddr_in), M_NOWAIT); + + tag = m_tag_alloc(KERNEL_MODULE_TAG_ID, + KERNEL_TAG_TYPE_IPFORWARD, + sizeof (*ipfwd_tag), M_NOWAIT); if (tag == NULL) { error = ENOBUFS; m_freem(m); - return; + goto done; } - + ipfwd_tag = (struct ip_fwd_tag *)(tag+1); ipfwd_tag->next_hop = next_hop; m_tag_prepend(m, tag); } - error = ip_output_list(m, 0, (struct mbuf *)0, ipforward_rt, - IP_FORWARDING, 0, NULL); - } - if (error) - OSAddAtomic(1, (SInt32*)&ipstat.ips_cantforward); - else { - OSAddAtomic(1, (SInt32*)&ipstat.ips_forward); +#endif + error = ip_output_list(m, 0, NULL, &fwd_rt, + IP_FORWARDING | IP_OUTARGS, 0, &ipoa); + + /* Refresh rt since the route could have changed while in IP */ + rt = fwd_rt.ro_rt; + + if (error) { + OSAddAtomic(1, &ipstat.ips_cantforward); + } else { + OSAddAtomic(1, &ipstat.ips_forward); if (type) - OSAddAtomic(1, (SInt32*)&ipstat.ips_redirectsent); + OSAddAtomic(1, &ipstat.ips_redirectsent); else { if (mcopy) { - ipflow_create(ipforward_rt, mcopy); +#if IPFLOW + ipflow_create(&fwd_rt, mcopy); +#endif + /* + * If we didn't have to go thru ipflow and + * the packet was successfully consumed by + * ip_output, the mcopy is rather a waste; + * this could be further optimized. + */ m_freem(mcopy); } - return; + goto done; } } if (mcopy == NULL) - return; - destifp = NULL; + goto done; switch (error) { @@ -2262,8 +2506,12 @@ ip_forward(struct mbuf *m, int srcrt, struct sockaddr_in *next_hop, struct route type = ICMP_UNREACH; code = ICMP_UNREACH_NEEDFRAG; #ifndef IPSEC - if (ipforward_rt->ro_rt) - destifp = ipforward_rt->ro_rt->rt_ifp; + if (rt != NULL) { + RT_LOCK_SPIN(rt); + if (rt->rt_ifp != NULL) + nextmtu = rt->rt_ifp->if_mtu; + RT_UNLOCK(rt); + } #else /* * If the packet is routed over IPsec tunnel, tell the @@ -2271,15 +2519,19 @@ ip_forward(struct mbuf *m, int srcrt, struct sockaddr_in *next_hop, struct route * tunnel MTU = if MTU - sizeof(IP) - ESP/AH hdrsiz * XXX quickhack!!! */ - if (ipforward_rt->ro_rt) { + if (rt != NULL) { struct secpolicy *sp = NULL; int ipsecerror; int ipsechdr; struct route *ro; + RT_LOCK_SPIN(rt); + if (rt->rt_ifp != NULL) + nextmtu = rt->rt_ifp->if_mtu; + RT_UNLOCK(rt); + if (ipsec_bypass) { - destifp = ipforward_rt->ro_rt->rt_ifp; - OSAddAtomic(1, (SInt32*)&ipstat.ips_cantfrag); + OSAddAtomic(1, &ipstat.ips_cantfrag); break; } sp = ipsec4_getpolicybyaddr(mcopy, @@ -2287,29 +2539,22 @@ ip_forward(struct mbuf *m, int srcrt, struct sockaddr_in *next_hop, struct route IP_FORWARDING, &ipsecerror); - if (sp == NULL) - destifp = ipforward_rt->ro_rt->rt_ifp; - else { + if (sp != NULL) { /* count IPsec header size */ ipsechdr = ipsec_hdrsiz(sp); /* * find the correct route for outer IPv4 * header, compute tunnel MTU. - * - * XXX BUG ALERT - * The "dummyifp" code relies upon the fact - * that icmp_error() touches only ifp->if_mtu. */ - /*XXX*/ - destifp = NULL; + nextmtu = 0; if (sp->req != NULL) { if (sp->req->saidx.mode == IPSEC_MODE_TUNNEL) { struct secasindex saidx; struct ip *ipm; struct secasvar *sav; - + ipm = mtod(mcopy, struct ip *); bcopy(&sp->req->saidx, &saidx, sizeof(saidx)); saidx.mode = sp->req->saidx.mode; @@ -2334,11 +2579,13 @@ ip_forward(struct mbuf *m, int srcrt, struct sockaddr_in *next_hop, struct route if (sav != NULL) { if (sav->sah != NULL) { ro = &sav->sah->sa_route; - if (ro->ro_rt && ro->ro_rt->rt_ifp) { - dummyifp.if_mtu = - ro->ro_rt->rt_ifp->if_mtu; - dummyifp.if_mtu -= ipsechdr; - destifp = &dummyifp; + if (ro->ro_rt != NULL) { + RT_LOCK(ro->ro_rt); + if (ro->ro_rt->rt_ifp != NULL) { + nextmtu = ro->ro_rt->rt_ifp->if_mtu; + nextmtu -= ipsechdr; + } + RT_UNLOCK(ro->ro_rt); } } key_freesav(sav, KEY_SADB_UNLOCKED); @@ -2349,7 +2596,7 @@ ip_forward(struct mbuf *m, int srcrt, struct sockaddr_in *next_hop, struct route } } #endif /*IPSEC*/ - OSAddAtomic(1, (SInt32*)&ipstat.ips_cantfrag); + OSAddAtomic(1, &ipstat.ips_cantfrag); break; case ENOBUFS: @@ -2359,9 +2606,12 @@ ip_forward(struct mbuf *m, int srcrt, struct sockaddr_in *next_hop, struct route case EACCES: /* ipfw denied packet */ m_freem(mcopy); - return; + goto done; } - icmp_error(mcopy, type, code, dest, destifp); + + icmp_error(mcopy, type, code, dest, nextmtu); +done: + ip_fwd_route_copyin(ifp, &fwd_rt); } void diff --git a/bsd/netinet/ip_mroute.c b/bsd/netinet/ip_mroute.c index 939d9dbc4..a5884bd1a 100644 --- a/bsd/netinet/ip_mroute.c +++ b/bsd/netinet/ip_mroute.c @@ -57,6 +57,9 @@ #include #include #include + +#include + #include #include #include @@ -73,22 +76,9 @@ #include #endif -#ifndef NTOHL -#if BYTE_ORDER != BIG_ENDIAN -#define NTOHL(d) ((d) = ntohl((d))) -#define NTOHS(d) ((d) = ntohs((u_short)(d))) -#define HTONL(d) ((d) = htonl((d))) -#define HTONS(d) ((d) = htons((u_short)(d))) -#else -#define NTOHL(d) -#define NTOHS(d) -#define HTONL(d) -#define HTONS(d) -#endif -#endif #ifndef MROUTING -extern u_long _ip_mcast_src(int vifi); +extern u_int32_t _ip_mcast_src(int vifi); extern int _ip_mforward(struct ip *ip, struct ifnet *ifp, struct mbuf *m, struct ip_moptions *imo); extern int _ip_mrouter_done(void); @@ -181,9 +171,9 @@ int (*legal_vif_num)(int) = 0; * just in case it does get called, the code a little lower in ip_output * will assign the packet a local address. */ -u_long +u_int32_t _ip_mcast_src(int vifi) { return INADDR_ANY; } -u_long (*ip_mcast_src)(int) = _ip_mcast_src; +u_int32_t (*ip_mcast_src)(int) = _ip_mcast_src; int ip_rsvp_vif_init(so, sopt) @@ -294,10 +284,10 @@ static int have_encap_tunnel = 0; * one-back cache used by ipip_input to locate a tunnel's vif * given a datagram's src ip address. */ -static u_long last_encap_src; +static u_int32_t last_encap_src; static struct vif *last_encap_vif; -static u_long X_ip_mcast_src(int vifi); +static u_int32_t X_ip_mcast_src(int vifi); static int X_ip_mforward(struct ip *ip, struct ifnet *ifp, struct mbuf *m, struct ip_moptions *imo); static int X_ip_mrouter_done(void); static int X_ip_mrouter_get(struct socket *so, struct sockopt *m); @@ -319,7 +309,7 @@ static int ip_mdq(struct mbuf *, struct ifnet *, struct mfc *, vifi_t); static void phyint_send(struct ip *, struct vif *, struct mbuf *); static void encap_send(struct ip *, struct vif *, struct mbuf *); -static void tbf_control(struct vif *, struct mbuf *, struct ip *, u_long); +static void tbf_control(struct vif *, struct mbuf *, struct ip *, u_int32_t); static void tbf_queue(struct vif *, struct mbuf *); static void tbf_process_q(struct vif *); static void tbf_reprocess_q(void *); @@ -394,7 +384,7 @@ static int pim_assert; (a).tv_sec <= (b).tv_sec) || (a).tv_sec < (b).tv_sec) #if UPCALL_TIMING -u_long upcall_data[51]; +u_int32_t upcall_data[51]; static void collate(struct timeval *); #endif /* UPCALL_TIMING */ @@ -774,9 +764,9 @@ add_vif(struct vifctl *vifcp) if (mrtdebug) log(LOG_DEBUG, "add_vif #%d, lcladdr %lx, %s %lx, thresh %x, rate %d\n", vifcp->vifc_vifi, - (u_long)ntohl(vifcp->vifc_lcl_addr.s_addr), + (u_int32_t)ntohl(vifcp->vifc_lcl_addr.s_addr), (vifcp->vifc_flags & VIFF_TUNNEL) ? "rmtaddr" : "mask", - (u_long)ntohl(vifcp->vifc_rmt_addr.s_addr), + (u_int32_t)ntohl(vifcp->vifc_rmt_addr.s_addr), vifcp->vifc_threshold, vifcp->vifc_rate_limit); @@ -839,7 +829,7 @@ static int add_mfc(struct mfcctl *mfccp) { struct mfc *rt; - u_long hash; + u_int32_t hash; struct rtdetq *rte; u_short nstl; int i; @@ -850,8 +840,8 @@ add_mfc(struct mfcctl *mfccp) if (rt) { if (mrtdebug & DEBUG_MFC) log(LOG_DEBUG,"add_mfc update o %lx g %lx p %x\n", - (u_long)ntohl(mfccp->mfcc_origin.s_addr), - (u_long)ntohl(mfccp->mfcc_mcastgrp.s_addr), + (u_int32_t)ntohl(mfccp->mfcc_origin.s_addr), + (u_int32_t)ntohl(mfccp->mfcc_mcastgrp.s_addr), mfccp->mfcc_parent); rt->mfc_parent = mfccp->mfcc_parent; @@ -873,14 +863,14 @@ add_mfc(struct mfcctl *mfccp) if (nstl++) log(LOG_ERR, "add_mfc %s o %lx g %lx p %x dbx %p\n", "multiple kernel entries", - (u_long)ntohl(mfccp->mfcc_origin.s_addr), - (u_long)ntohl(mfccp->mfcc_mcastgrp.s_addr), + (u_int32_t)ntohl(mfccp->mfcc_origin.s_addr), + (u_int32_t)ntohl(mfccp->mfcc_mcastgrp.s_addr), mfccp->mfcc_parent, (void *)rt->mfc_stall); if (mrtdebug & DEBUG_MFC) log(LOG_DEBUG,"add_mfc o %lx g %lx p %x dbg %p\n", - (u_long)ntohl(mfccp->mfcc_origin.s_addr), - (u_long)ntohl(mfccp->mfcc_mcastgrp.s_addr), + (u_int32_t)ntohl(mfccp->mfcc_origin.s_addr), + (u_int32_t)ntohl(mfccp->mfcc_mcastgrp.s_addr), mfccp->mfcc_parent, (void *)rt->mfc_stall); rt->mfc_origin = mfccp->mfcc_origin; @@ -919,8 +909,8 @@ add_mfc(struct mfcctl *mfccp) if (nstl == 0) { if (mrtdebug & DEBUG_MFC) log(LOG_DEBUG,"add_mfc no upcall h %lu o %lx g %lx p %x\n", - hash, (u_long)ntohl(mfccp->mfcc_origin.s_addr), - (u_long)ntohl(mfccp->mfcc_mcastgrp.s_addr), + hash, (u_int32_t)ntohl(mfccp->mfcc_origin.s_addr), + (u_int32_t)ntohl(mfccp->mfcc_mcastgrp.s_addr), mfccp->mfcc_parent); for (rt = mfctable[hash]; rt != NULL; rt = rt->mfc_next) { @@ -979,9 +969,9 @@ add_mfc(struct mfcctl *mfccp) static void collate(struct timeval *t) { - u_long d; + u_int32_t d; struct timeval tp; - u_long delta; + u_int32_t delta; GET_TIME(tp); @@ -1008,7 +998,7 @@ del_mfc(struct mfcctl *mfccp) struct in_addr mcastgrp; struct mfc *rt; struct mfc **nptr; - u_long hash; + u_int32_t hash; origin = mfccp->mfcc_origin; mcastgrp = mfccp->mfcc_mcastgrp; @@ -1016,7 +1006,7 @@ del_mfc(struct mfcctl *mfccp) if (mrtdebug & DEBUG_MFC) log(LOG_DEBUG,"del_mfc orig %lx mcastgrp %lx\n", - (u_long)ntohl(origin.s_addr), (u_long)ntohl(mcastgrp.s_addr)); + (u_int32_t)ntohl(origin.s_addr), (u_int32_t)ntohl(mcastgrp.s_addr)); nptr = &mfctable[hash]; while ((rt = *nptr) != NULL) { @@ -1087,7 +1077,7 @@ X_ip_mforward(struct ip *ip, struct ifnet *ifp, struct mbuf *m, if (mrtdebug & DEBUG_FORWARD) log(LOG_DEBUG, "ip_mforward: src %lx, dst %lx, ifp %p\n", - (u_long)ntohl(ip->ip_src.s_addr), (u_long)ntohl(ip->ip_dst.s_addr), + (u_int32_t)ntohl(ip->ip_src.s_addr), (u_int32_t)ntohl(ip->ip_dst.s_addr), (void *)ifp); if (ip->ip_hl < (IP_HDR_LEN + TUNNEL_LEN) >> 2 || @@ -1104,7 +1094,7 @@ X_ip_mforward(struct ip *ip, struct ifnet *ifp, struct mbuf *m, if ((srctun++ % 1000) == 0) log(LOG_ERR, "ip_mforward: received source-routed packet from %lx\n", - (u_long)ntohl(ip->ip_src.s_addr)); + (u_int32_t)ntohl(ip->ip_src.s_addr)); return 1; } @@ -1153,7 +1143,7 @@ X_ip_mforward(struct ip *ip, struct ifnet *ifp, struct mbuf *m, struct mbuf *mb0; struct rtdetq *rte; - u_long hash; + u_int32_t hash; int hlen = ip->ip_hl << 2; #if UPCALL_TIMING struct timeval tp; @@ -1164,8 +1154,8 @@ X_ip_mforward(struct ip *ip, struct ifnet *ifp, struct mbuf *m, mrtstat.mrts_no_route++; if (mrtdebug & (DEBUG_FORWARD | DEBUG_MFC)) log(LOG_DEBUG, "ip_mforward: no rte s %lx g %lx\n", - (u_long)ntohl(ip->ip_src.s_addr), - (u_long)ntohl(ip->ip_dst.s_addr)); + (u_int32_t)ntohl(ip->ip_src.s_addr), + (u_int32_t)ntohl(ip->ip_dst.s_addr)); /* * Allocate mbufs early so that we don't do extra work if we are @@ -1308,8 +1298,8 @@ expire_upcalls(__unused void *unused) --mfc->mfc_expire == 0) { if (mrtdebug & DEBUG_EXPIRE) log(LOG_DEBUG, "expire_upcalls: expiring (%lx %lx)\n", - (u_long)ntohl(mfc->mfc_origin.s_addr), - (u_long)ntohl(mfc->mfc_mcastgrp.s_addr)); + (u_int32_t)ntohl(mfc->mfc_origin.s_addr), + (u_int32_t)ntohl(mfc->mfc_mcastgrp.s_addr)); /* * drop all the packets * free the mbuf with the pkt, if, timing info @@ -1392,7 +1382,7 @@ ip_mdq(struct mbuf *m, struct ifnet *ifp, struct mfc *rt, struct igmpmsg *im; int hlen = ip->ip_hl << 2; struct timeval now; - u_long delta; + u_int32_t delta; GET_TIME(now); @@ -1469,7 +1459,7 @@ int (*legal_vif_num)(int) = X_legal_vif_num; /* * Return the local address used by this vif */ -static u_long +static u_int32_t X_ip_mcast_src(int vifi) { if (vifi >= 0 && vifi < numvifs) @@ -1479,7 +1469,7 @@ X_ip_mcast_src(int vifi) } #if !defined(MROUTE_LKM) || !MROUTE_LKM -u_long (*ip_mcast_src)(int) = X_ip_mcast_src; +u_int32_t (*ip_mcast_src)(int) = X_ip_mcast_src; #endif static void @@ -1557,8 +1547,12 @@ encap_send(struct ip *ip, struct vif *vifp, struct mbuf *m) */ ip = (struct ip *)((caddr_t)ip_copy + sizeof(multicast_encap_iphdr)); --ip->ip_ttl; + +#if BYTE_ORDER != BIG_ENDIAN HTONS(ip->ip_len); HTONS(ip->ip_off); +#endif + ip->ip_sum = 0; mb_copy->m_data += sizeof(multicast_encap_iphdr); ip->ip_sum = in_cksum(mb_copy, ip->ip_hl << 2); @@ -1624,7 +1618,7 @@ ipip_input(struct mbuf *m, int iphlen) m_freem(m); if (mrtdebug) log(LOG_DEBUG, "ip_mforward: no tunnel with %lx\n", - (u_long)ntohl(ip->ip_src.s_addr)); + (u_int32_t)ntohl(ip->ip_src.s_addr)); return; } ifp = vifp->v_ifp; @@ -1645,7 +1639,7 @@ ipip_input(struct mbuf *m, int iphlen) static void tbf_control(struct vif *vifp, struct mbuf *m, struct ip *ip, - u_long p_len) + u_int32_t p_len) { struct tbf *t = vifp->v_tbf; @@ -1850,7 +1844,7 @@ static void tbf_update_tokens(struct vif *vifp) { struct timeval tp; - u_long tm; + u_int32_t tm; struct tbf *t = vifp->v_tbf; GET_TIME(tp); @@ -2081,7 +2075,7 @@ rsvp_input(struct mbuf *m, int iphlen) rsvp_src.sin_addr = ip->ip_src; if (rsvpdebug && m) - printf("rsvp_input: m->m_len = %ld, sbspace() = %ld\n", + printf("rsvp_input: m->m_len = %d, sbspace() = %d\n", m->m_len,sbspace(&(viftable[vifi].v_rsvpd->so_rcv))); if (socket_send(viftable[vifi].v_rsvpd, m, &rsvp_src) < 0) { diff --git a/bsd/netinet/ip_mroute.h b/bsd/netinet/ip_mroute.h index d88c9aca0..71c39440a 100644 --- a/bsd/netinet/ip_mroute.h +++ b/bsd/netinet/ip_mroute.h @@ -95,7 +95,7 @@ #ifdef KERNEL_PRIVATE #define GET_TIME(t) microtime(&t) -#endif KERNEL_PRIVATE +#endif /* KERNEL_PRIVATE */ #ifndef CONFIG_MAXVIFS #define CONFIG_MAXVIFS 32 /* 4635538 temp workaround */ @@ -108,7 +108,7 @@ /* * Types and macros for handling bitmaps with one bit per virtual interface. */ -typedef u_long vifbitmap_t; +typedef u_int32_t vifbitmap_t; typedef u_short vifi_t; /* type of a vif index */ #define ALL_VIFS (vifi_t)-1 @@ -203,10 +203,10 @@ struct vif { struct in_addr v_lcl_addr; /* local interface address */ struct in_addr v_rmt_addr; /* remote address (tunnels only) */ struct ifnet *v_ifp; /* pointer to interface */ - u_long v_pkt_in; /* # pkts in on interface */ - u_long v_pkt_out; /* # pkts out on interface */ - u_long v_bytes_in; /* # bytes in on interface */ - u_long v_bytes_out; /* # bytes out on interface */ + u_int32_t v_pkt_in; /* # pkts in on interface */ + u_int32_t v_pkt_out; /* # pkts out on interface */ + u_int32_t v_bytes_in; /* # bytes in on interface */ + u_int32_t v_bytes_out; /* # bytes out on interface */ struct route v_route; /* cached route if this is a tunnel */ u_int v_rsvp_on; /* RSVP listening on this vif */ struct socket *v_rsvpd; /* RSVP daemon socket */ @@ -223,9 +223,9 @@ struct mfc { struct in_addr mfc_mcastgrp; /* multicast group associated*/ vifi_t mfc_parent; /* incoming vif */ u_char mfc_ttls[CONFIG_MAXVIFS]; /* forwarding ttls on vifs */ - u_long mfc_pkt_cnt; /* pkt count for src-grp */ - u_long mfc_byte_cnt; /* byte count for src-grp */ - u_long mfc_wrong_if; /* wrong if for src-grp */ + u_int32_t mfc_pkt_cnt; /* pkt count for src-grp */ + u_int32_t mfc_byte_cnt; /* byte count for src-grp */ + u_int32_t mfc_wrong_if; /* wrong if for src-grp */ int mfc_expire; /* time to clean entry up */ struct timeval mfc_last_assert; /* last time I sent an assert*/ struct rtdetq *mfc_stall; /* q of packets awaiting mfc */ @@ -284,9 +284,9 @@ struct rtdetq { struct tbf { struct timeval tbf_last_pkt_t; /* arr. time of last pkt */ - u_long tbf_n_tok; /* no of tokens in bucket */ - u_long tbf_q_len; /* length of queue at this vif */ - u_long tbf_max_q_len; /* max. queue length */ + u_int32_t tbf_n_tok; /* no of tokens in bucket */ + u_int32_t tbf_q_len; /* length of queue at this vif */ + u_int32_t tbf_max_q_len; /* max. queue length */ struct mbuf *tbf_q; /* Packet queue */ struct mbuf *tbf_t; /* tail-insertion pointer */ }; @@ -303,5 +303,5 @@ extern int (*mrt_ioctl)(int, caddr_t); extern int (*mrt_ioctl)(int, caddr_t, struct proc *); #endif -#endif KERNEL_PRIVATE +#endif /* KERNEL_PRIVATE */ #endif /* _NETINET_IP_MROUTE_H_ */ diff --git a/bsd/netinet/ip_output.c b/bsd/netinet/ip_output.c index 047b6b7ce..2a1fef6d4 100644 --- a/bsd/netinet/ip_output.c +++ b/bsd/netinet/ip_output.c @@ -80,6 +80,8 @@ #include #include +#include + #include #include #include @@ -127,6 +129,10 @@ #include #endif +#if PF +#include +#endif /* PF */ + #if IPFIREWALL_FORWARD_DEBUG #define print_ip(a) printf("%ld.%ld.%ld.%ld",(ntohl(a.s_addr)>>24)&0xFF,\ (ntohl(a.s_addr)>>16)&0xFF,\ @@ -158,8 +164,6 @@ void in_cksum_offset(struct mbuf* , size_t ); extern int (*fr_checkp)(struct ip *, int, struct ifnet *, int, struct mbuf **); -extern u_long route_generation; - extern struct protosw inetsw[]; extern struct ip_linklocal_stat ip_linklocal_stat; @@ -236,13 +240,13 @@ ip_output_list( struct ip_out_args *ipoa ) { - struct ip *ip, *mhip; + struct ip *ip; struct ifnet *ifp = NULL; - struct mbuf *m = m0; + struct mbuf *m = m0, **mppn = NULL; int hlen = sizeof (struct ip); int len = 0, off, error = 0; struct sockaddr_in *dst = NULL; - struct in_ifaddr *ia = NULL; + struct in_ifaddr *ia = NULL, *src_ia = NULL; int isbroadcast, sw_csum; struct in_addr pkt_dst; #if IPSEC @@ -262,7 +266,7 @@ ip_output_list( struct route saved_route; struct ip_out_args saved_ipoa; struct mbuf * packetlist; - int pktcnt = 0; + int pktcnt = 0, tso = 0; unsigned int ifscope; boolean_t select_srcif; @@ -275,33 +279,38 @@ ip_output_list( args.rule = NULL; args.divert_rule = 0; /* divert cookie */ args.ipoa = NULL; - + + if (SLIST_EMPTY(&m0->m_pkthdr.tags)) + goto ipfw_tags_done; + /* Grab info from mtags prepended to the chain */ #if DUMMYNET - if ((tag = m_tag_locate(m0, KERNEL_MODULE_TAG_ID, KERNEL_TAG_TYPE_DUMMYNET, NULL)) != NULL) { + if ((tag = m_tag_locate(m0, KERNEL_MODULE_TAG_ID, + KERNEL_TAG_TYPE_DUMMYNET, NULL)) != NULL) { struct dn_pkt_tag *dn_tag; - + dn_tag = (struct dn_pkt_tag *)(tag+1); args.rule = dn_tag->rule; opt = NULL; saved_route = dn_tag->ro; ro = &saved_route; - + imo = NULL; dst = dn_tag->dn_dst; ifp = dn_tag->ifp; flags = dn_tag->flags; saved_ipoa = dn_tag->ipoa; ipoa = &saved_ipoa; - + m_tag_delete(m0, tag); } #endif /* DUMMYNET */ #if IPDIVERT - if ((tag = m_tag_locate(m0, KERNEL_MODULE_TAG_ID, KERNEL_TAG_TYPE_DIVERT, NULL)) != NULL) { + if ((tag = m_tag_locate(m0, KERNEL_MODULE_TAG_ID, + KERNEL_TAG_TYPE_DIVERT, NULL)) != NULL) { struct divert_tag *div_tag; - + div_tag = (struct divert_tag *)(tag+1); args.divert_rule = div_tag->cookie; @@ -309,18 +318,20 @@ ip_output_list( } #endif /* IPDIVERT */ - if ((tag = m_tag_locate(m0, KERNEL_MODULE_TAG_ID, KERNEL_TAG_TYPE_IPFORWARD, NULL)) != NULL) { + if ((tag = m_tag_locate(m0, KERNEL_MODULE_TAG_ID, + KERNEL_TAG_TYPE_IPFORWARD, NULL)) != NULL) { struct ip_fwd_tag *ipfwd_tag; - + ipfwd_tag = (struct ip_fwd_tag *)(tag+1); args.next_hop = ipfwd_tag->next_hop; - + m_tag_delete(m0, tag); } +ipfw_tags_done: #endif /* IPFIREWALL */ m = m0; - + #if DIAGNOSTIC if ( !m || (m->m_flags & M_PKTHDR) != 0) panic("ip_output no HDR"); @@ -330,13 +341,13 @@ ip_output_list( #endif /* - * Do not perform source interface selection when forwarding. * At present the IP_OUTARGS flag implies a request for IP to - * perform source interface selection. + * perform source interface selection. In the forwarding case, + * only the ifscope value is used, as source interface selection + * doesn't take place. */ - if (ip_doscopedroute && - (flags & (IP_OUTARGS | IP_FORWARDING)) == IP_OUTARGS) { - select_srcif = TRUE; + if (ip_doscopedroute && (flags & IP_OUTARGS)) { + select_srcif = !(flags & IP_FORWARDING); ifscope = ipoa->ipoa_ifscope; } else { select_srcif = FALSE; @@ -345,21 +356,22 @@ ip_output_list( #if IPFIREWALL if (args.rule != NULL) { /* dummynet already saw us */ - ip = mtod(m, struct ip *); - hlen = IP_VHL_HL(ip->ip_vhl) << 2 ; - lck_mtx_lock(rt_mtx); - if (ro->ro_rt != NULL) - ia = (struct in_ifaddr *)ro->ro_rt->rt_ifa; - if (ia) - ifaref(&ia->ia_ifa); - lck_mtx_unlock(rt_mtx); + ip = mtod(m, struct ip *); + hlen = IP_VHL_HL(ip->ip_vhl) << 2 ; + if (ro->ro_rt != NULL) { + RT_LOCK_SPIN(ro->ro_rt); + ia = (struct in_ifaddr *)ro->ro_rt->rt_ifa; + if (ia) + ifaref(&ia->ia_ifa); + RT_UNLOCK(ro->ro_rt); + } #if IPSEC - if (ipsec_bypass == 0 && (flags & IP_NOIPSEC) == 0) { - so = ipsec_getsocket(m); - (void)ipsec_setsocket(m, NULL); + if (ipsec_bypass == 0 && (flags & IP_NOIPSEC) == 0) { + so = ipsec_getsocket(m); + (void)ipsec_setsocket(m, NULL); } #endif - goto sendit; + goto sendit; } #endif /* IPFIREWALL */ @@ -374,7 +386,10 @@ ip_output_list( * No need to proccess packet twice if we've * already seen it */ - inject_filter_ref = ipf_get_inject_filter(m); + if (!SLIST_EMPTY(&m->m_pkthdr.tags)) + inject_filter_ref = ipf_get_inject_filter(m); + else + inject_filter_ref = 0; if (opt) { m = ip_insertoptions(m, opt, &len); @@ -398,7 +413,7 @@ ip_output_list( #else ip->ip_id = htons(ip_id++); #endif - OSAddAtomic(1, (SInt32*)&ipstat.ips_localout); + OSAddAtomic(1, &ipstat.ips_localout); } else { hlen = IP_VHL_HL(ip->ip_vhl) << 2; } @@ -426,20 +441,27 @@ ip_output_list( * cache with IPv6. */ - lck_mtx_lock(rt_mtx); if (ro->ro_rt != NULL) { if (ro->ro_rt->generation_id != route_generation && ((flags & (IP_ROUTETOIF | IP_FORWARDING)) == 0) && - (ip->ip_src.s_addr != INADDR_ANY) && - (ifa_foraddr(ip->ip_src.s_addr) == 0)) { - error = EADDRNOTAVAIL; - lck_mtx_unlock(rt_mtx); - goto bad; + (ip->ip_src.s_addr != INADDR_ANY)) { + src_ia = ifa_foraddr(ip->ip_src.s_addr); + if (src_ia == NULL) { + error = EADDRNOTAVAIL; + goto bad; + } + ifafree(&src_ia->ia_ifa); } + /* + * Test rt_flags without holding rt_lock for performance + * reasons; if the route is down it will hopefully be + * caught by the layer below (since it uses this route + * as a hint) or during the next transmit. + */ if ((ro->ro_rt->rt_flags & RTF_UP) == 0 || dst->sin_family != AF_INET || dst->sin_addr.s_addr != pkt_dst.s_addr) { - rtfree_locked(ro->ro_rt); + rtfree(ro->ro_rt); ro->ro_rt = NULL; } /* @@ -468,9 +490,8 @@ ip_output_list( ifafree(&ia->ia_ifa); if ((ia = ifatoia(ifa_ifwithdstaddr(sintosa(dst)))) == 0) { if ((ia = ifatoia(ifa_ifwithnet(sintosa(dst)))) == 0) { - OSAddAtomic(1, (SInt32*)&ipstat.ips_noroute); + OSAddAtomic(1, &ipstat.ips_noroute); error = ENETUNREACH; - lck_mtx_unlock(rt_mtx); goto bad; } } @@ -488,14 +509,8 @@ ip_output_list( if (ia != NULL) ifafree(&ia->ia_ifa); - /* Could use IFP_TO_IA instead but rt_mtx is already held */ - for (ia = TAILQ_FIRST(&in_ifaddrhead); - ia != NULL && ia->ia_ifp != ifp; - ia = TAILQ_NEXT(ia, ia_link)) - continue; - - if (ia != NULL) - ifaref(&ia->ia_ifa); + /* Macro takes reference on ia */ + IFP_TO_IA(ifp, ia); } else { boolean_t cloneok = FALSE; /* @@ -507,7 +522,7 @@ ip_output_list( * route (for this PCB instance) before. */ if (select_srcif && ip->ip_src.s_addr != INADDR_ANY && - (ro->ro_rt == NULL || + (ro->ro_rt == NULL || !(ro->ro_rt->rt_flags & RTF_UP) || ro->ro_rt->generation_id != route_generation || !(ro->ro_flags & ROF_SRCIF_SELECTED))) { struct ifaddr *ifa; @@ -525,7 +540,6 @@ ip_output_list( if (ifa == NULL && !(flags & IP_RAWOUTPUT) && ifscope != lo_ifp->if_index) { error = EADDRNOTAVAIL; - lck_mtx_unlock(rt_mtx); goto bad; } @@ -584,18 +598,30 @@ ip_output_list( if (cloneok || dst->sin_addr.s_addr == INADDR_BROADCAST) ign &= ~RTF_PRCLONING; - rtalloc_scoped_ign_locked(ro, ign, ifscope); + /* + * Loosen the route lookup criteria if the ifscope + * corresponds to the loopback interface; this is + * needed to support Application Layer Gateways + * listening on loopback, in conjunction with packet + * filter redirection rules. The final source IP + * address will be rewritten by the packet filter + * prior to the RFC1122 loopback check below. + */ + if (ifscope == lo_ifp->if_index) + rtalloc_ign(ro, ign); + else + rtalloc_scoped_ign(ro, ign, ifscope); } if (ro->ro_rt == NULL) { - OSAddAtomic(1, (SInt32*)&ipstat.ips_noroute); + OSAddAtomic(1, &ipstat.ips_noroute); error = EHOSTUNREACH; - lck_mtx_unlock(rt_mtx); goto bad; } if (ia) ifafree(&ia->ia_ifa); + RT_LOCK_SPIN(ro->ro_rt); ia = ifatoia(ro->ro_rt->rt_ifa); if (ia) ifaref(&ia->ia_ifa); @@ -607,8 +633,9 @@ ip_output_list( isbroadcast = (ro->ro_rt->rt_flags & RTF_BROADCAST); else isbroadcast = in_broadcast(dst->sin_addr, ifp); + RT_UNLOCK(ro->ro_rt); } - lck_mtx_unlock(rt_mtx); + if (IN_MULTICAST(ntohl(pkt_dst.s_addr))) { struct in_multi *inm; @@ -640,7 +667,7 @@ ip_output_list( */ if ((imo == NULL) || (imo->imo_multicast_vif == -1)) { if ((ifp->if_flags & IFF_MULTICAST) == 0) { - OSAddAtomic(1, (SInt32*)&ipstat.ips_noroute); + OSAddAtomic(1, &ipstat.ips_noroute); error = ENETUNREACH; goto bad; } @@ -650,15 +677,14 @@ ip_output_list( * of outgoing interface. */ if (ip->ip_src.s_addr == INADDR_ANY) { - register struct in_ifaddr *ia1; - lck_mtx_lock(rt_mtx); + struct in_ifaddr *ia1; + lck_rw_lock_shared(in_ifaddr_rwlock); TAILQ_FOREACH(ia1, &in_ifaddrhead, ia_link) if (ia1->ia_ifp == ifp) { ip->ip_src = IA_SIN(ia1)->sin_addr; - break; } - lck_mtx_unlock(rt_mtx); + lck_rw_done(in_ifaddr_rwlock); if (ip->ip_src.s_addr == INADDR_ANY) { error = ENETUNREACH; goto bad; @@ -690,9 +716,12 @@ ip_output_list( ipf_ref(); /* 4135317 - always pass network byte order to filter */ + +#if BYTE_ORDER != BIG_ENDIAN HTONS(ip->ip_len); HTONS(ip->ip_off); - +#endif + TAILQ_FOREACH(filter, &ipv4_filters, ipf_link) { if (seen == 0) { if ((struct ipfilter *)inject_filter_ref == filter) @@ -713,9 +742,12 @@ ip_output_list( /* set back to host byte order */ ip = mtod(m, struct ip *); + +#if BYTE_ORDER != BIG_ENDIAN NTOHS(ip->ip_len); NTOHS(ip->ip_off); - +#endif + ipf_unref(); didfilter = 1; } @@ -810,6 +842,29 @@ ip_output_list( } sendit: +#if PF + /* Invoke outbound packet filter */ + if (pf_af_hook(ifp, mppn, &m, AF_INET, FALSE) != 0) { + if (packetlist == m0) { + packetlist = m; + mppn = NULL; + } + if (m != NULL) { + m0 = m; + /* Next packet in the chain */ + goto loopit; + } else if (packetlist != NULL) { + /* No more packet; send down the chain */ + goto sendchain; + } + /* Nothing left; we're done */ + goto done; + } + m0 = m; + ip = mtod(m, struct ip *); + pkt_dst = ip->ip_dst; + hlen = IP_VHL_HL(ip->ip_vhl) << 2; +#endif /* PF */ /* * Force IP TTL to 255 following draft-ietf-zeroconf-ipv4-linklocal.txt */ @@ -824,13 +879,25 @@ ip_output_list( if (!didfilter && !TAILQ_EMPTY(&ipv4_filters)) { struct ipfilter *filter; int seen = (inject_filter_ref == 0); - + + /* Check that a TSO frame isn't passed to a filter. + * This could happen if a filter is inserted while + * TCP is sending the TSO packet. + */ + if (m->m_pkthdr.csum_flags & CSUM_TSO_IPV4) { + error = EMSGSIZE; + goto bad; + } + ipf_ref(); /* 4135317 - always pass network byte order to filter */ + +#if BYTE_ORDER != BIG_ENDIAN HTONS(ip->ip_len); HTONS(ip->ip_off); - +#endif + TAILQ_FOREACH(filter, &ipv4_filters, ipf_link) { if (seen == 0) { if ((struct ipfilter *)inject_filter_ref == filter) @@ -851,9 +918,12 @@ ip_output_list( /* set back to host byte order */ ip = mtod(m, struct ip *); + +#if BYTE_ORDER != BIG_ENDIAN NTOHS(ip->ip_len); NTOHS(ip->ip_off); - +#endif + ipf_unref(); } @@ -932,8 +1002,11 @@ ip_output_list( m->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA; } + +#if BYTE_ORDER != BIG_ENDIAN HTONS(ip->ip_len); HTONS(ip->ip_off); +#endif error = ipsec4_output(&state, sp, flags); @@ -984,53 +1057,71 @@ ip_output_list( hlen = ip->ip_hl << 2; #endif /* Check that there wasn't a route change and src is still valid */ - - lck_mtx_lock(rt_mtx); - if (ro->ro_rt && ro->ro_rt->generation_id != route_generation) { - if (ifa_foraddr(ip->ip_src.s_addr) == 0 && ((flags & (IP_ROUTETOIF | IP_FORWARDING)) == 0)) { - error = EADDRNOTAVAIL; - lck_mtx_unlock(rt_mtx); - KERNEL_DEBUG(DBG_FNC_IPSEC4_OUTPUT | DBG_FUNC_END, 5,0,0,0,0); + if (ro->ro_rt != NULL && ro->ro_rt->generation_id != route_generation) { + if ((src_ia = ifa_foraddr(ip->ip_src.s_addr)) == NULL && + ((flags & (IP_ROUTETOIF | IP_FORWARDING)) == 0)) { + error = EADDRNOTAVAIL; + KERNEL_DEBUG(DBG_FNC_IPSEC4_OUTPUT | DBG_FUNC_END, + 5,0,0,0,0); goto bad; } - rtfree_locked(ro->ro_rt); + rtfree(ro->ro_rt); ro->ro_rt = NULL; + if (src_ia != NULL) + ifafree(&src_ia->ia_ifa); } if (ro->ro_rt == NULL) { if ((flags & IP_ROUTETOIF) == 0) { - printf("ip_output: " - "can't update route after IPsec processing\n"); - error = EHOSTUNREACH; /*XXX*/ - lck_mtx_unlock(rt_mtx); - KERNEL_DEBUG(DBG_FNC_IPSEC4_OUTPUT | DBG_FUNC_END, 6,0,0,0,0); + printf("ip_output: can't update route after " + "IPsec processing\n"); + error = EHOSTUNREACH; /*XXX*/ + KERNEL_DEBUG(DBG_FNC_IPSEC4_OUTPUT | DBG_FUNC_END, + 6,0,0,0,0); goto bad; } } else { if (ia) ifafree(&ia->ia_ifa); + RT_LOCK_SPIN(ro->ro_rt); ia = ifatoia(ro->ro_rt->rt_ifa); if (ia) ifaref(&ia->ia_ifa); ifp = ro->ro_rt->rt_ifp; + RT_UNLOCK(ro->ro_rt); } - lck_mtx_unlock(rt_mtx); /* make it flipped, again. */ + +#if BYTE_ORDER != BIG_ENDIAN NTOHS(ip->ip_len); NTOHS(ip->ip_off); +#endif + KERNEL_DEBUG(DBG_FNC_IPSEC4_OUTPUT | DBG_FUNC_END, 7,0xff,0xff,0xff,0xff); /* Pass to filters again */ if (!TAILQ_EMPTY(&ipv4_filters)) { struct ipfilter *filter; + /* Check that a TSO frame isn't passed to a filter. + * This could happen if a filter is inserted while + * TCP is sending the TSO packet. + */ + if (m->m_pkthdr.csum_flags & CSUM_TSO_IPV4) { + error = EMSGSIZE; + goto bad; + } + ipf_ref(); /* 4135317 - always pass network byte order to filter */ + +#if BYTE_ORDER != BIG_ENDIAN HTONS(ip->ip_len); HTONS(ip->ip_off); - +#endif + TAILQ_FOREACH(filter, &ipv4_filters, ipf_link) { if (filter->ipf_filter.ipf_output) { errno_t result; @@ -1048,9 +1139,12 @@ ip_output_list( /* set back to host byte order */ ip = mtod(m, struct ip *); + +#if BYTE_ORDER != BIG_ENDIAN NTOHS(ip->ip_len); NTOHS(ip->ip_off); - +#endif + ipf_unref(); } skip_ipsec: @@ -1153,8 +1247,11 @@ ip_output_list( } /* Restore packet header fields to original values */ + +#if BYTE_ORDER != BIG_ENDIAN HTONS(ip->ip_len); HTONS(ip->ip_off); +#endif /* Deliver packet to divert input routine */ divert_packet(m, 0, off & 0xffff, args.divert_rule); @@ -1203,6 +1300,7 @@ ip_output_list( * as the packet runs through ip_input() as * it is done through a ISR. */ + lck_rw_lock_shared(in_ifaddr_rwlock); TAILQ_FOREACH(ia_fw, &in_ifaddrhead, ia_link) { /* * If the addr to forward to is one @@ -1213,13 +1311,15 @@ ip_output_list( dst->sin_addr.s_addr) break; } - if (ia) { + lck_rw_done(in_ifaddr_rwlock); + if (ia_fw) { /* tell ip_input "dont filter" */ struct m_tag *fwd_tag; struct ip_fwd_tag *ipfwd_tag; - fwd_tag = m_tag_alloc(KERNEL_MODULE_TAG_ID, KERNEL_TAG_TYPE_IPFORWARD, - sizeof(struct sockaddr_in), M_NOWAIT); + fwd_tag = m_tag_alloc(KERNEL_MODULE_TAG_ID, + KERNEL_TAG_TYPE_IPFORWARD, + sizeof (*ipfwd_tag), M_NOWAIT); if (fwd_tag == NULL) { error = ENOBUFS; goto bad; @@ -1248,9 +1348,11 @@ ip_output_list( m->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA; ip->ip_sum = in_cksum(m, hlen); } + +#if BYTE_ORDER != BIG_ENDIAN HTONS(ip->ip_len); HTONS(ip->ip_off); - +#endif /* we need to call dlil_output to run filters * and resync to avoid recursion loops. @@ -1271,18 +1373,19 @@ ip_output_list( */ bcopy(dst, &ro_fwd->ro_dst, sizeof(*dst)); - ro_fwd->ro_rt = 0; - lck_mtx_lock(rt_mtx); - rtalloc_ign_locked(ro_fwd, RTF_PRCLONING); + ro_fwd->ro_rt = NULL; + rtalloc_ign(ro_fwd, RTF_PRCLONING); - if (ro_fwd->ro_rt == 0) { - OSAddAtomic(1, (SInt32*)&ipstat.ips_noroute); + if (ro_fwd->ro_rt == NULL) { + OSAddAtomic(1, &ipstat.ips_noroute); error = EHOSTUNREACH; - lck_mtx_unlock(rt_mtx); goto bad; } + RT_LOCK_SPIN(ro_fwd->ro_rt); ia_fw = ifatoia(ro_fwd->ro_rt->rt_ifa); + if (ia_fw != NULL) + ifaref(&ia_fw->ia_ifa); ifp = ro_fwd->ro_rt->rt_ifp; ro_fwd->ro_rt->rt_use++; if (ro_fwd->ro_rt->rt_flags & RTF_GATEWAY) @@ -1292,18 +1395,21 @@ ip_output_list( (ro_fwd->ro_rt->rt_flags & RTF_BROADCAST); else isbroadcast = in_broadcast(dst->sin_addr, ifp); - rtfree_locked(ro->ro_rt); + RT_UNLOCK(ro_fwd->ro_rt); + rtfree(ro->ro_rt); ro->ro_rt = ro_fwd->ro_rt; dst = (struct sockaddr_in *)&ro_fwd->ro_dst; - lck_mtx_unlock(rt_mtx); /* * If we added a default src ip earlier, * which would have been gotten from the-then * interface, do it again, from the new one. */ - if (fwd_rewrite_src) - ip->ip_src = IA_SIN(ia_fw)->sin_addr; + if (ia_fw != NULL) { + if (fwd_rewrite_src) + ip->ip_src = IA_SIN(ia_fw)->sin_addr; + ifafree(&ia_fw->ia_ifa); + } goto pass ; } #endif /* IPFIREWALL_FORWARD */ @@ -1323,7 +1429,7 @@ ip_output_list( if ((ifp->if_flags & IFF_LOOPBACK) == 0 && ((ntohl(ip->ip_src.s_addr) >> IN_CLASSA_NSHIFT) == IN_LOOPBACKNET || (ntohl(ip->ip_dst.s_addr) >> IN_CLASSA_NSHIFT) == IN_LOOPBACKNET)) { - OSAddAtomic(1, (SInt32*)&ipstat.ips_badaddr); + OSAddAtomic(1, &ipstat.ips_badaddr); m_freem(m); /* * Do not simply drop the packet just like a firewall -- we want the @@ -1337,6 +1443,8 @@ ip_output_list( } #endif m->m_pkthdr.csum_flags |= CSUM_IP; + tso = (ifp->if_hwassist & IFNET_TSO_IPV4) && (m->m_pkthdr.csum_flags & CSUM_TSO_IPV4); + sw_csum = m->m_pkthdr.csum_flags & ~IF_HWASSIST_CSUM_FLAGS(ifp->if_hwassist); @@ -1382,12 +1490,17 @@ ip_output_list( * If small enough for interface, or the interface will take * care of the fragmentation for us, can just send directly. */ - if ((u_short)ip->ip_len <= ifp->if_mtu || + if ((u_short)ip->ip_len <= ifp->if_mtu || tso || ifp->if_hwassist & CSUM_FRAGMENT) { - struct rtentry *rte; + if (tso) + m->m_pkthdr.csum_flags |= CSUM_TSO_IPV4; + +#if BYTE_ORDER != BIG_ENDIAN HTONS(ip->ip_len); HTONS(ip->ip_off); +#endif + ip->ip_sum = 0; if (sw_csum & CSUM_DELAY_IP) { ip->ip_sum = in_cksum(m, hlen); @@ -1407,30 +1520,22 @@ ip_output_list( ipsec_delaux(m); #endif if (packetchain == 0) { - lck_mtx_lock(rt_mtx); - if ((rte = ro->ro_rt) != NULL) - rtref(rte); - lck_mtx_unlock(rt_mtx); - error = ifnet_output(ifp, PF_INET, m, rte, + error = ifnet_output(ifp, PF_INET, m, ro->ro_rt, (struct sockaddr *)dst); - if (rte != NULL) - rtfree(rte); goto done; } else { /* packet chaining allows us to reuse the route for all packets */ + mppn = &m->m_nextpkt; m = m->m_nextpkt; if (m == NULL) { +#if PF +sendchain: +#endif /* PF */ if (pktcnt > ip_maxchainsent) ip_maxchainsent = pktcnt; - lck_mtx_lock(rt_mtx); - if ((rte = ro->ro_rt) != NULL) - rtref(rte); - lck_mtx_unlock(rt_mtx); //send error = ifnet_output(ifp, PF_INET, packetlist, - rte, (struct sockaddr *)dst); - if (rte != NULL) - rtfree(rte); + ro->ro_rt, (struct sockaddr *)dst); pktcnt = 0; goto done; @@ -1444,32 +1549,111 @@ ip_output_list( * Too large for interface; fragment if possible. * Must be able to put at least 8 bytes per fragment. */ - if (ip->ip_off & IP_DF) { + + if (ip->ip_off & IP_DF || (m->m_pkthdr.csum_flags & CSUM_TSO_IPV4)) { error = EMSGSIZE; /* * This case can happen if the user changed the MTU + * * of an interface after enabling IP on it. Because * most netifs don't keep track of routes pointing to * them, there is no way for one to update all its * routes when the MTU is changed. */ - - lck_mtx_lock(rt_mtx); + RT_LOCK_SPIN(ro->ro_rt); if (ro->ro_rt && (ro->ro_rt->rt_flags & (RTF_UP | RTF_HOST)) && !(ro->ro_rt->rt_rmx.rmx_locks & RTV_MTU) && (ro->ro_rt->rt_rmx.rmx_mtu > ifp->if_mtu)) { ro->ro_rt->rt_rmx.rmx_mtu = ifp->if_mtu; } - lck_mtx_unlock(rt_mtx); - OSAddAtomic(1, (SInt32*)&ipstat.ips_cantfrag); + RT_UNLOCK(ro->ro_rt); + OSAddAtomic(1, &ipstat.ips_cantfrag); goto bad; } - len = (ifp->if_mtu - hlen) &~ 7; - if (len < 8) { - error = EMSGSIZE; + + error = ip_fragment(m, ifp, ifp->if_mtu, sw_csum); + if (error != 0) { + m0 = m = NULL; goto bad; } + KERNEL_DEBUG(DBG_LAYER_END, ip->ip_dst.s_addr, + ip->ip_src.s_addr, ip->ip_p, ip->ip_off, ip->ip_len); + + for (m = m0; m; m = m0) { + m0 = m->m_nextpkt; + m->m_nextpkt = 0; +#if IPSEC + /* clean ipsec history once it goes out of the node */ + if (ipsec_bypass == 0 && (flags & IP_NOIPSEC) == 0) + ipsec_delaux(m); +#endif + if (error == 0) { +#ifndef __APPLE__ + /* Record statistics for this interface address. */ + if (ia != NULL) { + ia->ia_ifa.if_opackets++; + ia->ia_ifa.if_obytes += m->m_pkthdr.len; + } +#endif + if ((packetchain != 0) && (pktcnt > 0)) + panic("ip_output: mix of packet in packetlist is wrong=%p", packetlist); + error = ifnet_output(ifp, PF_INET, m, ro->ro_rt, + (struct sockaddr *)dst); + } else + m_freem(m); + } + + if (error == 0) + OSAddAtomic(1, &ipstat.ips_fragmented); + +done: + if (ia) { + ifafree(&ia->ia_ifa); + ia = NULL; + } +#if IPSEC + if (ipsec_bypass == 0 && (flags & IP_NOIPSEC) == 0) { + if (ro == &iproute && ro->ro_rt) { + rtfree(ro->ro_rt); + ro->ro_rt = NULL; + } + if (sp != NULL) { + KEYDEBUG(KEYDEBUG_IPSEC_STAMP, + printf("DP ip_output call free SP:%x\n", sp)); + key_freesp(sp, KEY_SADB_UNLOCKED); + } + } +#endif /* IPSEC */ + + KERNEL_DEBUG(DBG_FNC_IP_OUTPUT | DBG_FUNC_END, error,0,0,0,0); + return (error); +bad: + m_freem(m0); + goto done; +} + +int +ip_fragment(struct mbuf *m, struct ifnet *ifp, unsigned long mtu, int sw_csum) +{ + struct ip *ip, *mhip; + int len, hlen, mhlen, firstlen, off, error = 0; + struct mbuf **mnext = &m->m_nextpkt, *m0; + int nfrags = 1; + + ip = mtod(m, struct ip *); +#ifdef _IP_VHL + hlen = IP_VHL_HL(ip->ip_vhl) << 2; +#else + hlen = ip->ip_hl << 2; +#endif + + firstlen = len = (mtu - hlen) &~ 7; + if (len < 8) { + m_freem(m); + return (EMSGSIZE); + } + /* * if the interface will not calculate checksums on * fragmented packets, then do it here. @@ -1480,12 +1664,6 @@ ip_output_list( m->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA; } - - { - int mhlen, firstlen = len; - struct mbuf **mnext = &m->m_nextpkt; - int nfrags = 1; - /* * Loop through length of segment after first fragment, * make new header and copy data of each part and link onto chain. @@ -1496,7 +1674,7 @@ ip_output_list( MGETHDR(m, M_DONTWAIT, MT_HEADER); /* MAC-OK */ if (m == 0) { error = ENOBUFS; - OSAddAtomic(1, (SInt32*)&ipstat.ips_odropped); + OSAddAtomic(1, &ipstat.ips_odropped); goto sendorfree; } m->m_flags |= (m0->m_flags & M_MCAST) | M_FRAG; @@ -1520,7 +1698,7 @@ ip_output_list( if (m->m_next == 0) { (void) m_free(m); error = ENOBUFS; /* ??? */ - OSAddAtomic(1, (SInt32*)&ipstat.ips_odropped); + OSAddAtomic(1, &ipstat.ips_odropped); goto sendorfree; } m->m_pkthdr.len = mhlen + len; @@ -1530,7 +1708,11 @@ ip_output_list( #if CONFIG_MACF_NET mac_netinet_fragment(m0, m); #endif + +#if BYTE_ORDER != BIG_ENDIAN HTONS(mhip->ip_off); +#endif + mhip->ip_sum = 0; if (sw_csum & CSUM_DELAY_IP) { mhip->ip_sum = in_cksum(m, mhlen); @@ -1539,7 +1721,7 @@ ip_output_list( mnext = &m->m_nextpkt; nfrags++; } - OSAddAtomic(nfrags, (SInt32*)&ipstat.ips_ofragments); + OSAddAtomic(nfrags, &ipstat.ips_ofragments); /* set first/last markers for fragment chain */ m->m_flags |= M_LASTFRAG; @@ -1555,74 +1737,20 @@ ip_output_list( m->m_pkthdr.len = hlen + firstlen; ip->ip_len = htons((u_short)m->m_pkthdr.len); ip->ip_off |= IP_MF; + +#if BYTE_ORDER != BIG_ENDIAN HTONS(ip->ip_off); +#endif + ip->ip_sum = 0; if (sw_csum & CSUM_DELAY_IP) { ip->ip_sum = in_cksum(m, hlen); } sendorfree: + if (error) + m_freem_list(m0); - KERNEL_DEBUG(DBG_LAYER_END, ip->ip_dst.s_addr, - ip->ip_src.s_addr, ip->ip_p, ip->ip_off, ip->ip_len); - - for (m = m0; m; m = m0) { - m0 = m->m_nextpkt; - m->m_nextpkt = 0; -#if IPSEC - /* clean ipsec history once it goes out of the node */ - if (ipsec_bypass == 0 && (flags & IP_NOIPSEC) == 0) - ipsec_delaux(m); -#endif - if (error == 0) { - struct rtentry *rte; -#ifndef __APPLE__ - /* Record statistics for this interface address. */ - if (ia != NULL) { - ia->ia_ifa.if_opackets++; - ia->ia_ifa.if_obytes += m->m_pkthdr.len; - } -#endif - if ((packetchain != 0) && (pktcnt > 0)) - panic("ip_output: mix of packet in packetlist is wrong=%p", packetlist); - lck_mtx_lock(rt_mtx); - if ((rte = ro->ro_rt) != NULL) - rtref(rte); - lck_mtx_unlock(rt_mtx); - error = ifnet_output(ifp, PF_INET, m, rte, - (struct sockaddr *)dst); - if (rte != NULL) - rtfree(rte); - } else - m_freem(m); - } - - if (error == 0) - OSAddAtomic(1, (SInt32*)&ipstat.ips_fragmented); - } -done: - if (ia) { - ifafree(&ia->ia_ifa); - ia = NULL; - } -#if IPSEC - if (ipsec_bypass == 0 && (flags & IP_NOIPSEC) == 0) { - if (ro == &iproute && ro->ro_rt) { - rtfree(ro->ro_rt); - ro->ro_rt = NULL; - } - if (sp != NULL) { - KEYDEBUG(KEYDEBUG_IPSEC_STAMP, - printf("DP ip_output call free SP:%x\n", sp)); - key_freesp(sp, KEY_SADB_UNLOCKED); - } - } -#endif /* IPSEC */ - - KERNEL_DEBUG(DBG_FNC_IP_OUTPUT | DBG_FUNC_END, error,0,0,0,0); return (error); -bad: - m_freem(m0); - goto done; } static void @@ -1661,7 +1789,7 @@ in_delayed_cksum_offset(struct mbuf *m0, int ip_offset) /* Sometimes the IP header is not contiguous, yes this can happen! */ if (ip_offset + sizeof(struct ip) > m->m_len) { #if DEBUG - printf("delayed m_pullup, m->len: %ld off: %d\n", + printf("delayed m_pullup, m->len: %d off: %d\n", m->m_len, ip_offset); #endif m_copydata(m, ip_offset, sizeof(struct ip), (caddr_t) buf); @@ -1724,7 +1852,7 @@ in_delayed_cksum_offset(struct mbuf *m0, int ip_offset) char tmp[2]; #if DEBUG - printf("delayed m_copyback, m->len: %ld off: %d p: %d\n", + printf("delayed m_copyback, m->len: %d off: %d p: %d\n", m->m_len, offset + ip_offset, ip->ip_p); #endif *(u_short *)tmp = csum; @@ -1760,7 +1888,7 @@ in_cksum_offset(struct mbuf* m, size_t ip_offset) if (ip_offset + sizeof(struct ip) > m->m_len) { #if DEBUG - printf("in_cksum_offset - delayed m_pullup, m->len: %ld off: %lu\n", + printf("in_cksum_offset - delayed m_pullup, m->len: %d off: %lu\n", m->m_len, ip_offset); #endif m_copydata(m, ip_offset, sizeof(struct ip), (caddr_t) buf); @@ -1824,7 +1952,7 @@ in_cksum_offset(struct mbuf* m, size_t ip_offset) char tmp[2]; #if DEBUG - printf("in_cksum_offset m_copyback, m->len: %lu off: %lu p: %d\n", + printf("in_cksum_offset m_copyback, m->len: %u off: %lu p: %d\n", m->m_len, ip_offset + offsetof(struct ip, ip_sum), ip->ip_p); #endif *(u_short *)tmp = ip->ip_sum; @@ -1960,7 +2088,8 @@ ip_ctloutput(so, sopt) error = EMSGSIZE; break; } - MGET(m, sopt->sopt_p ? M_WAIT : M_DONTWAIT, MT_HEADER); + MGET(m, sopt->sopt_p != kernproc ? M_WAIT : M_DONTWAIT, + MT_HEADER); if (m == 0) { error = ENOBUFS; break; @@ -2150,8 +2279,7 @@ ip_ctloutput(so, sopt) break; if ((error = soopt_mcopyin(sopt, m)) != 0) /* XXX */ break; - priv = (sopt->sopt_p != NULL && - proc_suser(sopt->sopt_p) != 0) ? 0 : 1; + priv = (proc_suser(sopt->sopt_p) == 0); if (m) { req = mtod(m, caddr_t); len = m->m_len; @@ -2651,7 +2779,7 @@ ip_setmoptions(sopt, imop) * If all options have default values, no need to keep the mbuf. */ if (imo->imo_multicast_ifp == NULL && - imo->imo_multicast_vif == (u_long)-1 && + imo->imo_multicast_vif == (u_int32_t)-1 && imo->imo_multicast_ttl == IP_DEFAULT_MULTICAST_TTL && imo->imo_multicast_loop == IP_DEFAULT_MULTICAST_LOOP && imo->imo_num_memberships == 0) { @@ -2699,34 +2827,31 @@ ip_addmembership( struct ifnet *ifp = NULL; int error = 0; int i; - + + bzero((caddr_t)&ro, sizeof(ro)); + if (!IN_MULTICAST(ntohl(mreq->imr_multiaddr.s_addr))) { error = EINVAL; - return error; + goto done; } /* * If no interface address was provided, use the interface of * the route to the given multicast address. */ if (mreq->imr_interface.s_addr == INADDR_ANY) { - bzero((caddr_t)&ro, sizeof(ro)); dst = (struct sockaddr_in *)&ro.ro_dst; dst->sin_len = sizeof(*dst); dst->sin_family = AF_INET; dst->sin_addr = mreq->imr_multiaddr; - lck_mtx_lock(rt_mtx); - rtalloc_ign_locked(&ro, 0UL); + rtalloc_ign(&ro, 0); if (ro.ro_rt != NULL) { ifp = ro.ro_rt->rt_ifp; - rtfree_locked(ro.ro_rt); - } - else { + } else { /* If there's no default route, try using loopback */ - mreq->imr_interface.s_addr = INADDR_LOOPBACK; + mreq->imr_interface.s_addr = htonl(INADDR_LOOPBACK); } - lck_mtx_unlock(rt_mtx); } - + if (ifp == NULL) { ifp = ip_multicast_if(&mreq->imr_interface, NULL); } @@ -2737,7 +2862,7 @@ ip_addmembership( */ if (ifp == NULL || (ifp->if_flags & IFF_MULTICAST) == 0) { error = EADDRNOTAVAIL; - return error; + goto done; } /* * See if the membership already exists or if all the @@ -2751,11 +2876,11 @@ ip_addmembership( } if (i < imo->imo_num_memberships) { error = EADDRINUSE; - return error; + goto done; } if (i == IP_MAX_MEMBERSHIPS) { error = ETOOMANYREFS; - return error; + goto done; } /* * Everything looks good; add a new record to the multicast @@ -2764,10 +2889,14 @@ ip_addmembership( if ((imo->imo_membership[i] = in_addmulti(&mreq->imr_multiaddr, ifp)) == NULL) { error = ENOBUFS; - return error; + goto done; } ++imo->imo_num_memberships; - + +done: + if (ro.ro_rt != NULL) + rtfree(ro.ro_rt); + return error; } @@ -2865,6 +2994,8 @@ ip_getmoptions(sopt, imo) IFP_TO_IA(imo->imo_multicast_ifp, ia); addr.s_addr = (ia == NULL) ? INADDR_ANY : IA_SIN(ia)->sin_addr.s_addr; + if (ia != NULL) + ifafree(&ia->ia_ifa); } error = sooptcopyout(sopt, &addr, sizeof addr); break; @@ -2944,8 +3075,12 @@ ip_mloopback(ifp, m, dst, hlen) * than the interface's MTU. Can this possibly matter? */ ip = mtod(copym, struct ip *); + +#if BYTE_ORDER != BIG_ENDIAN HTONS(ip->ip_len); HTONS(ip->ip_off); +#endif + ip->ip_sum = 0; ip->ip_sum = in_cksum(copym, hlen); /* @@ -2983,9 +3118,17 @@ ip_mloopback(ifp, m, dst, hlen) CSUM_IP_CHECKED | CSUM_IP_VALID; copym->m_pkthdr.csum_data = 0xffff; } else { + +#if BYTE_ORDER != BIG_ENDIAN NTOHS(ip->ip_len); +#endif + in_delayed_cksum(copym); + +#if BYTE_ORDER != BIG_ENDIAN HTONS(ip->ip_len); +#endif + } } @@ -3013,26 +3156,29 @@ ip_mloopback(ifp, m, dst, hlen) /* * Given a source IP address (and route, if available), determine the best - * interface to send the packet from. + * interface to send the packet from. Checking for (and updating) the + * ROF_SRCIF_SELECTED flag in the pcb-supplied route placeholder is done + * without any locks based on the assumption that ip_output() is single- + * threaded per-pcb, i.e. for any given pcb there can only be one thread + * performing output at the IP layer. */ static struct ifaddr * in_selectsrcif(struct ip *ip, struct route *ro, unsigned int ifscope) { struct ifaddr *ifa = NULL; - struct sockaddr src = { sizeof (struct sockaddr_in), AF_INET, { 0, } }; + struct in_addr src = ip->ip_src; + struct in_addr dst = ip->ip_dst; struct ifnet *rt_ifp; - char ip_src[16], ip_dst[16]; + char s_src[16], s_dst[16]; if (ip_select_srcif_debug) { - (void) inet_ntop(AF_INET, &ip->ip_src.s_addr, ip_src, - sizeof (ip_src)); - (void) inet_ntop(AF_INET, &ip->ip_dst.s_addr, ip_dst, - sizeof (ip_dst)); + (void) inet_ntop(AF_INET, &src.s_addr, s_src, sizeof (s_src)); + (void) inet_ntop(AF_INET, &dst.s_addr, s_dst, sizeof (s_dst)); } - lck_mtx_assert(rt_mtx, LCK_MTX_ASSERT_OWNED); + if (ro->ro_rt != NULL) + RT_LOCK(ro->ro_rt); - ((struct sockaddr_in *)&src)->sin_addr.s_addr = ip->ip_src.s_addr; rt_ifp = (ro->ro_rt != NULL) ? ro->ro_rt->rt_ifp : NULL; /* @@ -3061,18 +3207,18 @@ in_selectsrcif(struct ip *ip, struct route *ro, unsigned int ifscope) scope = get_primary_ifscope(); } - ifa = ifa_ifwithaddr_scoped(&src, scope); + ifa = (struct ifaddr *)ifa_foraddr_scoped(src.s_addr, scope); if (ip_select_srcif_debug && ifa != NULL) { if (ro->ro_rt != NULL) { printf("%s->%s ifscope %d->%d ifa_if %s%d " - "ro_if %s%d\n", ip_src, ip_dst, ifscope, + "ro_if %s%d\n", s_src, s_dst, ifscope, scope, ifa->ifa_ifp->if_name, ifa->ifa_ifp->if_unit, rt_ifp->if_name, rt_ifp->if_unit); } else { printf("%s->%s ifscope %d->%d ifa_if %s%d\n", - ip_src, ip_dst, ifscope, scope, + s_src, s_dst, ifscope, scope, ifa->ifa_ifp->if_name, ifa->ifa_ifp->if_unit); } @@ -3090,15 +3236,17 @@ in_selectsrcif(struct ip *ip, struct route *ro, unsigned int ifscope) * found interface. */ if (ifa == NULL && ifscope == IFSCOPE_NONE) { - ifa = ifa_ifwithaddr(&src); + ifa = (struct ifaddr *)ifa_foraddr(src.s_addr); if (ip_select_srcif_debug && ifa != NULL) { printf("%s->%s ifscope %d ifa_if %s%d\n", - ip_src, ip_dst, ifscope, ifa->ifa_ifp->if_name, + s_src, s_dst, ifscope, ifa->ifa_ifp->if_name, ifa->ifa_ifp->if_unit); } } + if (ro->ro_rt != NULL) + RT_LOCK_ASSERT_HELD(ro->ro_rt); /* * If there is a non-loopback route with the wrong interface, or if * there is no interface configured with such an address, blow it @@ -3112,18 +3260,19 @@ in_selectsrcif(struct ip *ip, struct route *ro, unsigned int ifscope) if (ifa != NULL) { printf("%s->%s ifscope %d ro_if %s%d != " "ifa_if %s%d (cached route cleared)\n", - ip_src, ip_dst, ifscope, rt_ifp->if_name, + s_src, s_dst, ifscope, rt_ifp->if_name, rt_ifp->if_unit, ifa->ifa_ifp->if_name, ifa->ifa_ifp->if_unit); } else { printf("%s->%s ifscope %d ro_if %s%d " "(no ifa_if found)\n", - ip_src, ip_dst, ifscope, rt_ifp->if_name, + s_src, s_dst, ifscope, rt_ifp->if_name, rt_ifp->if_unit); } } - rtfree_locked(ro->ro_rt); + RT_UNLOCK(ro->ro_rt); + rtfree(ro->ro_rt); ro->ro_rt = NULL; ro->ro_flags &= ~ROF_SRCIF_SELECTED; @@ -3137,8 +3286,8 @@ in_selectsrcif(struct ip *ip, struct route *ro, unsigned int ifscope) * but other shared subnets; for now we explicitly test only * for the former case and save the latter for future. */ - if (IN_LINKLOCAL(ntohl(ip->ip_dst.s_addr)) && - !IN_LINKLOCAL(ntohl(ip->ip_src.s_addr)) && ifa != NULL) { + if (IN_LINKLOCAL(ntohl(dst.s_addr)) && + !IN_LINKLOCAL(ntohl(src.s_addr)) && ifa != NULL) { ifafree(ifa); ifa = NULL; } @@ -3146,7 +3295,7 @@ in_selectsrcif(struct ip *ip, struct route *ro, unsigned int ifscope) if (ip_select_srcif_debug && ifa == NULL) { printf("%s->%s ifscope %d (neither ro_if/ifa_if found)\n", - ip_src, ip_dst, ifscope); + s_src, s_dst, ifscope); } /* @@ -3157,13 +3306,16 @@ in_selectsrcif(struct ip *ip, struct route *ro, unsigned int ifscope) * otherwise we want to come back here again when the route points * to the interface over which the ARP reply arrives on. */ - if (ro->ro_rt != NULL && (!IN_LINKLOCAL(ntohl(ip->ip_dst.s_addr)) || + if (ro->ro_rt != NULL && (!IN_LINKLOCAL(ntohl(dst.s_addr)) || (ro->ro_rt->rt_gateway->sa_family == AF_LINK && SDL(ro->ro_rt->rt_gateway)->sdl_alen != 0))) { ro->ro_flags |= ROF_SRCIF_SELECTED; ro->ro_rt->generation_id = route_generation; } + if (ro->ro_rt != NULL) + RT_UNLOCK(ro->ro_rt); + return (ifa); } @@ -3188,11 +3340,9 @@ ip_bindif(struct inpcb *inp, unsigned int ifscope) else inp->inp_flags |= INP_BOUND_IF; - lck_mtx_lock(rt_mtx); /* Blow away any cached route in the PCB */ if (inp->inp_route.ro_rt != NULL) { - rtfree_locked(inp->inp_route.ro_rt); + rtfree(inp->inp_route.ro_rt); inp->inp_route.ro_rt = NULL; } - lck_mtx_unlock(rt_mtx); } diff --git a/bsd/netinet/ip_var.h b/bsd/netinet/ip_var.h index 0861cf587..c1892feeb 100644 --- a/bsd/netinet/ip_var.h +++ b/bsd/netinet/ip_var.h @@ -98,7 +98,7 @@ struct ipq { u_short ipq_id; /* sequence id for reassembly */ struct mbuf *ipq_frags; /* to ip headers of fragments */ struct in_addr ipq_src,ipq_dst; - u_long ipq_nfrags; + u_int32_t ipq_nfrags; TAILQ_ENTRY(ipq) ipq_list; #if CONFIG_MACF_NET struct label *ipq_label; /* MAC label */ @@ -138,7 +138,7 @@ struct ip_moptions { u_char imo_multicast_loop; /* 1 => hear sends if a member */ u_short imo_num_memberships; /* no. memberships this socket */ struct in_multi *imo_membership[IP_MAX_MEMBERSHIPS]; - u_long imo_multicast_vif; /* vif num outgoing multicasts */ + u_int32_t imo_multicast_vif; /* vif num outgoing multicasts */ struct in_addr imo_multicast_addr; /* ifindex/addr on MULTICAST_IF */ }; @@ -219,7 +219,7 @@ extern struct protosw *ip_protox[]; extern struct socket *ip_rsvpd; /* reservation protocol daemon */ extern struct socket *ip_mrouter; /* multicast routing daemon */ extern int (*legal_vif_num)(int); -extern u_long (*ip_mcast_src)(int); +extern u_int32_t (*ip_mcast_src)(int); extern int rsvp_on; extern struct pr_usrreqs rip_usrreqs; extern int ip_doscopedroute; @@ -234,8 +234,7 @@ extern int ip_output(struct mbuf *, struct mbuf *, struct route *, int, struct ip_moptions *, struct ip_out_args *); extern int ip_output_list(struct mbuf *, int, struct mbuf *, struct route *, int, struct ip_moptions *, struct ip_out_args *); -struct in_ifaddr * - ip_rtaddr(struct in_addr, struct route *); +struct in_ifaddr *ip_rtaddr(struct in_addr); void ip_savecontrol(struct inpcb *, struct mbuf **, struct ip *, struct mbuf *); void ip_slowtimo(void); @@ -250,8 +249,8 @@ int rip_ctloutput(struct socket *, struct sockopt *); void rip_ctlinput(int, struct sockaddr *, void *); void rip_init(void) __attribute__((section("__TEXT, initcode"))); void rip_input(struct mbuf *, int); -int rip_output(struct mbuf *, struct socket *, u_long); -int rip_unlock(struct socket *, int, int); +int rip_output(struct mbuf *, struct socket *, u_int32_t); +int rip_unlock(struct socket *, int, void *); void ipip_input(struct mbuf *, int); void rsvp_input(struct mbuf *, int); int ip_rsvp_init(struct socket *); @@ -271,5 +270,7 @@ extern void udp_out_cksum_stats(u_int32_t); int rip_send(struct socket *, int , struct mbuf *, struct sockaddr *, struct mbuf *, struct proc *); +extern int ip_fragment(struct mbuf *, struct ifnet *, unsigned long, int); + #endif /* KERNEL_PRIVATE */ #endif /* !_NETINET_IP_VAR_H_ */ diff --git a/bsd/netinet/kpi_ipfilter.c b/bsd/netinet/kpi_ipfilter.c index 11e005de8..6aea8ccf2 100644 --- a/bsd/netinet/kpi_ipfilter.c +++ b/bsd/netinet/kpi_ipfilter.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2003 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2008 Apple Computer, Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -32,6 +32,9 @@ #include #include #include +#include + +#include #define _IP_VHL #include @@ -48,6 +51,7 @@ #include #include + /* * kipf_lock and kipf_ref protect the linkage of the list of IP filters * An IP filter can be removed only when kipf_ref is zero @@ -56,8 +60,9 @@ * kipf_ref eventually goes down to zero, the IP filter is removed */ static lck_mtx_t *kipf_lock = 0; -static unsigned long kipf_ref = 0; -static unsigned long kipf_delayed_remove = 0; +static u_int32_t kipf_ref = 0; +static u_int32_t kipf_delayed_remove = 0; +u_int32_t kipf_count = 0; __private_extern__ struct ipfilter_list ipv4_filters = TAILQ_HEAD_INITIALIZER(ipv4_filters); __private_extern__ struct ipfilter_list ipv6_filters = TAILQ_HEAD_INITIALIZER(ipv6_filters); @@ -122,22 +127,17 @@ ipf_add( new_filter->ipf_filter = *filter; new_filter->ipf_head = head; - /* - * 3957298 - * Make sure third parties have a chance to filter packets before - * SharedIP. Always SharedIP at the end of the list. - */ - if (filter->name != NULL && - strcmp(filter->name, "com.apple.nke.SharedIP") == 0) { - TAILQ_INSERT_TAIL(head, new_filter, ipf_link); - } - else { - TAILQ_INSERT_HEAD(head, new_filter, ipf_link); - } + TAILQ_INSERT_HEAD(head, new_filter, ipf_link); lck_mtx_unlock(kipf_lock); *filter_ref = (ipfilter_t)new_filter; + + /* This will force TCP to re-evaluate its use of TSO */ + OSAddAtomic(1, &kipf_count); + if (use_routegenid) + routegenid_update(); + return 0; } @@ -190,6 +190,12 @@ ipf_remove( if (ipf_detach) ipf_detach(cookie); FREE(match, M_IFADDR); + + /* This will force TCP to re-evaluate its use of TSO */ + OSAddAtomic(-1, &kipf_count); + if (use_routegenid) + routegenid_update(); + } return 0; } @@ -309,17 +315,18 @@ ipf_injectv4_out( } /* Put ip_len and ip_off in host byte order, ip_output expects that */ + +#if BYTE_ORDER != BIG_ENDIAN NTOHS(ip->ip_len); NTOHS(ip->ip_off); - +#endif + /* Send */ error = ip_output(m, NULL, &ro, IP_ALLOWBROADCAST | IP_RAWOUTPUT, imo, NULL); /* Release the route */ - if (ro.ro_rt) { + if (ro.ro_rt) rtfree(ro.ro_rt); - ro.ro_rt = NULL; - } return error; } @@ -392,10 +399,8 @@ ipf_injectv6_out( error = ip6_output(m, NULL, &ro, 0, im6o, NULL, 0); /* Release the route */ - if (ro.ro_rt) { + if (ro.ro_rt) rtfree(ro.ro_rt); - ro.ro_rt = NULL; - } return error; } diff --git a/bsd/netinet/kpi_ipfilter.h b/bsd/netinet/kpi_ipfilter.h index a247d58dd..3d2aaaac9 100644 --- a/bsd/netinet/kpi_ipfilter.h +++ b/bsd/netinet/kpi_ipfilter.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2003-2004 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2008 Apple Computer, Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -47,18 +47,18 @@ struct ipf_pktopts { u_int32_t ippo_flags; ifnet_t ippo_mcast_ifnet; - int ippo_mcast_loop; + int ippo_mcast_loop; u_int8_t ippo_mcast_ttl; }; #define IPPOF_MCAST_OPTS 0x1 -typedef struct ipf_pktopts* ipf_pktopts_t; +typedef struct ipf_pktopts *ipf_pktopts_t; __BEGIN_DECLS /*! @typedef ipf_input_func - + @discussion ipf_input_func is used to filter incoming ip packets. The IP filter is called for packets from all interfaces. The filter is called between when the general IP processing is @@ -78,15 +78,19 @@ __BEGIN_DECLS (udp/tcp/icmp/esp/etc...). @param protocol The protocol type (udp/tcp/icmp/etc...) of the IP packet @result Return: - 0 - The caller will continue with normal processing of the packet. - EJUSTRETURN - The caller will stop processing the packet, the packet will not be freed. - Anything Else - The caller will free the packet and stop processing. + 0 - The caller will continue with normal processing of the + packet. + EJUSTRETURN - The caller will stop processing the packet, + the packet will not be freed. + Anything Else - The caller will free the packet and stop + processing. */ -typedef errno_t (*ipf_input_func)(void* cookie, mbuf_t *data, int offset, u_int8_t protocol); +typedef errno_t (*ipf_input_func)(void *cookie, mbuf_t *data, int offset, + u_int8_t protocol); /*! @typedef ipf_output_func - + @discussion ipf_output_func is used to filter outbound ip packets. The IP filter is called for packets to all interfaces. The filter is called before fragmentation and IPSec processing. If @@ -96,20 +100,24 @@ typedef errno_t (*ipf_input_func)(void* cookie, mbuf_t *data, int offset, u_int8 @param data The ip packet, will contain an IP header followed by the rest of the IP packet. @result Return: - 0 - The caller will continue with normal processing of the packet. - EJUSTRETURN - The caller will stop processing the packet, the packet will not be freed. - Anything Else - The caller will free the packet and stop processing. + 0 - The caller will continue with normal processing of the + packet. + EJUSTRETURN - The caller will stop processing the packet, + the packet will not be freed. + Anything Else - The caller will free the packet and stop + processing. */ -typedef errno_t (*ipf_output_func)(void* cookie, mbuf_t *data, ipf_pktopts_t options); +typedef errno_t (*ipf_output_func)(void *cookie, mbuf_t *data, + ipf_pktopts_t options); /*! @typedef ipf_detach_func - + @discussion ipf_detach_func is called to notify your filter that it has been detached. @param cookie The cookie specified when your filter was attached. */ -typedef void (*ipf_detach_func)(void* cookie); +typedef void (*ipf_detach_func)(void *cookie); /*! @typedef ipf_filter @@ -123,15 +131,15 @@ typedef void (*ipf_detach_func)(void* cookie); @field ipf_detach The filter function to notify of a detach. */ struct ipf_filter { - void* cookie; - const char* name; + void *cookie; + const char *name; ipf_input_func ipf_input; ipf_output_func ipf_output; ipf_detach_func ipf_detach; }; struct opaque_ipfilter; -typedef struct opaque_ipfilter* ipfilter_t; +typedef struct opaque_ipfilter *ipfilter_t; /*! @function ipf_addv4 @@ -140,7 +148,8 @@ typedef struct opaque_ipfilter* ipfilter_t; @param filter_ref A reference to the filter used to detach it. @result 0 on success otherwise the errno error. */ -errno_t ipf_addv4(const struct ipf_filter* filter, ipfilter_t *filter_ref); +extern errno_t ipf_addv4(const struct ipf_filter *filter, + ipfilter_t *filter_ref); /*! @function ipf_addv6 @@ -149,7 +158,8 @@ errno_t ipf_addv4(const struct ipf_filter* filter, ipfilter_t *filter_ref); @param filter_ref A reference to the filter used to detach it. @result 0 on success otherwise the errno error. */ -errno_t ipf_addv6(const struct ipf_filter* filter, ipfilter_t *filter_ref); +extern errno_t ipf_addv6(const struct ipf_filter *filter, + ipfilter_t *filter_ref); /*! @function ipf_remove @@ -158,7 +168,7 @@ errno_t ipf_addv6(const struct ipf_filter* filter, ipfilter_t *filter_ref); ipf_addv6. @result 0 on success otherwise the errno error. */ -errno_t ipf_remove(ipfilter_t filter_ref); +extern errno_t ipf_remove(ipfilter_t filter_ref); /*! @function ipf_inject_input @@ -170,7 +180,7 @@ errno_t ipf_remove(ipfilter_t filter_ref); getting a chance to process the packet. If the filter modified the packet, it should not specify the filter ref to give other filters a chance to process the new packet. - + Caller is responsible for freeing mbuf chain in the event that ipf_inject_input returns an error. @param data The complete IPv4 or IPv6 packet, receive interface must @@ -178,7 +188,7 @@ errno_t ipf_remove(ipfilter_t filter_ref); @param filter_ref The reference to the filter injecting the data @result 0 on success otherwise the errno error. */ -errno_t ipf_inject_input(mbuf_t data, ipfilter_t filter_ref); +extern errno_t ipf_inject_input(mbuf_t data, ipfilter_t filter_ref); /*! @function ipf_inject_output @@ -196,7 +206,8 @@ errno_t ipf_inject_input(mbuf_t data, ipfilter_t filter_ref); @result 0 on success otherwise the errno error. ipf_inject_output will always free the mbuf. */ -errno_t ipf_inject_output(mbuf_t data, ipfilter_t filter_ref, ipf_pktopts_t options); +extern errno_t ipf_inject_output(mbuf_t data, ipfilter_t filter_ref, + ipf_pktopts_t options); __END_DECLS #endif /* __KPI_IPFILTER__ */ diff --git a/bsd/netinet/kpi_ipfilter_var.h b/bsd/netinet/kpi_ipfilter_var.h index f16c8908f..74f3e9bbc 100644 --- a/bsd/netinet/kpi_ipfilter_var.h +++ b/bsd/netinet/kpi_ipfilter_var.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2004 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2008 Apple Computer, Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -41,7 +41,7 @@ struct ipfilter { TAILQ_ENTRY(ipfilter) ipf_link; - struct ipf_filter ipf_filter; + struct ipf_filter ipf_filter; struct ipfilter_list *ipf_head; TAILQ_ENTRY(ipfilter) ipf_tbr; }; @@ -51,13 +51,12 @@ TAILQ_HEAD(ipfilter_list, ipfilter); extern struct ipfilter_list ipv6_filters; extern struct ipfilter_list ipv4_filters; -ipfilter_t ipf_get_inject_filter(struct mbuf *m); -void ipf_ref(void); -void ipf_unref(void); -int ipf_init(void); - -void ip_proto_dispatch_in(struct mbuf *m, int hlen, u_int8_t proto, ipfilter_t ipfref); - +extern ipfilter_t ipf_get_inject_filter(struct mbuf *m); +extern void ipf_ref(void); +extern void ipf_unref(void); +extern int ipf_init(void); +extern void ip_proto_dispatch_in(struct mbuf *m, int hlen, u_int8_t proto, + ipfilter_t ipfref); #endif /* KERNEL_PRIVATE */ diff --git a/bsd/netinet/raw_ip.c b/bsd/netinet/raw_ip.c index 0cc25b616..1e172e928 100644 --- a/bsd/netinet/raw_ip.c +++ b/bsd/netinet/raw_ip.c @@ -94,6 +94,10 @@ #include #include +#if INET6 +#include +#endif /* INET6 */ + #include #if IPSEC @@ -120,7 +124,6 @@ int rip_shutdown(struct socket *); extern int ipsec_bypass; #endif -extern u_long route_generation; struct inpcbhead ripcb; struct inpcbinfo ripcbinfo; @@ -271,7 +274,7 @@ rip_input(m, iphlen) if (ipsec4_in_reject_so(m, last->inp_socket)) { m_freem(m); IPSEC_STAT_INCREMENT(ipsecstat.in_polvio); - OSAddAtomic(1, (SInt32*)&ipstat.ips_delivered); + OSAddAtomic(1, &ipstat.ips_delivered); /* do not inject data to pcb */ skipit = 1; } @@ -301,8 +304,8 @@ rip_input(m, iphlen) } } else { m_freem(m); - OSAddAtomic(1, (SInt32*)&ipstat.ips_noproto); - OSAddAtomic(-1, (SInt32*)&ipstat.ips_delivered); + OSAddAtomic(1, &ipstat.ips_noproto); + OSAddAtomic(-1, &ipstat.ips_delivered); } } } @@ -315,7 +318,7 @@ int rip_output(m, so, dst) register struct mbuf *m; struct socket *so; - u_long dst; + u_int32_t dst; { register struct ip *ip; register struct inpcb *inp = sotoinpcb(so); @@ -337,6 +340,8 @@ rip_output(m, so, dst) return(EMSGSIZE); } M_PREPEND(m, sizeof(struct ip), M_WAIT); + if (m == NULL) + return ENOBUFS; ip = mtod(m, struct ip *); ip->ip_tos = inp->inp_ip_tos; ip->ip_off = 0; @@ -368,7 +373,7 @@ rip_output(m, so, dst) #endif /* XXX prevent ip_output from overwriting header fields */ flags |= IP_RAWOUTPUT; - OSAddAtomic(1, (SInt32*)&ipstat.ips_rawout); + OSAddAtomic(1, &ipstat.ips_rawout); } #if IPSEC @@ -378,18 +383,21 @@ rip_output(m, so, dst) } #endif /*IPSEC*/ - if (inp->inp_route.ro_rt && inp->inp_route.ro_rt->generation_id != route_generation) { + if (inp->inp_route.ro_rt != NULL && + inp->inp_route.ro_rt->generation_id != route_generation) { rtfree(inp->inp_route.ro_rt); - inp->inp_route.ro_rt = (struct rtentry *)0; + inp->inp_route.ro_rt = NULL; } #if CONFIG_MACF_NET mac_mbuf_label_associate_inpcb(inp, m); #endif -#if CONFIG_IP_EDGEHOLE - ip_edgehole_mbuf_tag(inp, m); -#endif + /* + * The domain lock is held across ip_output, so it is okay + * to pass the PCB cached route pointer directly to IP and + * the modules beneath it. + */ return (ip_output(m, inp->inp_options, &inp->inp_route, flags, inp->inp_moptions, &ipoa)); } @@ -453,7 +461,7 @@ rip_ctloutput(so, sopt) else error = ENOPROTOOPT; break; -#endif IPFIREWALL +#endif /* IPFIREWALL */ #if DUMMYNET case IP_DUMMYNET_GET: @@ -595,15 +603,19 @@ rip_ctlinput( struct in_ifaddr *ia; struct ifnet *ifp; int err; - int flags; + int flags, done = 0; switch (cmd) { case PRC_IFDOWN: - lck_mtx_lock(rt_mtx); + lck_rw_lock_shared(in_ifaddr_rwlock); for (ia = in_ifaddrhead.tqh_first; ia; ia = ia->ia_link.tqe_next) { if (ia->ia_ifa.ifa_addr == sa && (ia->ia_flags & IFA_ROUTE)) { + done = 1; + ifaref(&ia->ia_ifa); + lck_rw_done(in_ifaddr_rwlock); + lck_mtx_lock(rnh_lock); /* * in_ifscrub kills the interface route. */ @@ -615,23 +627,29 @@ rip_ctlinput( * a routing process they will come back. */ in_ifadown(&ia->ia_ifa, 1); + lck_mtx_unlock(rnh_lock); + ifafree(&ia->ia_ifa); break; } } - lck_mtx_unlock(rt_mtx); + if (!done) + lck_rw_done(in_ifaddr_rwlock); break; case PRC_IFUP: - lck_mtx_lock(rt_mtx); + lck_rw_lock_shared(in_ifaddr_rwlock); for (ia = in_ifaddrhead.tqh_first; ia; ia = ia->ia_link.tqe_next) { if (ia->ia_ifa.ifa_addr == sa) break; } if (ia == 0 || (ia->ia_flags & IFA_ROUTE)) { - lck_mtx_unlock(rt_mtx); + lck_rw_done(in_ifaddr_rwlock); return; } + ifaref(&ia->ia_ifa); + lck_rw_done(in_ifaddr_rwlock); + flags = RTF_UP; ifp = ia->ia_ifa.ifa_ifp; @@ -639,16 +657,16 @@ rip_ctlinput( || (ifp->if_flags & IFF_POINTOPOINT)) flags |= RTF_HOST; - err = rtinit_locked(&ia->ia_ifa, RTM_ADD, flags); - lck_mtx_unlock(rt_mtx); + err = rtinit(&ia->ia_ifa, RTM_ADD, flags); if (err == 0) ia->ia_flags |= IFA_ROUTE; + ifafree(&ia->ia_ifa); break; } } -u_long rip_sendspace = RIPSNDQ; -u_long rip_recvspace = RIPRCVQ; +u_int32_t rip_sendspace = RIPSNDQ; +u_int32_t rip_recvspace = RIPRCVQ; SYSCTL_INT(_net_inet_raw, OID_AUTO, maxdgram, CTLFLAG_RW, &rip_sendspace, 0, "Maximum outgoing raw IP datagram size"); @@ -768,7 +786,7 @@ rip_send(struct socket *so, __unused int flags, struct mbuf *m, struct sockaddr __unused struct mbuf *control, __unused struct proc *p) { struct inpcb *inp = sotoinpcb(so); - register u_long dst; + register u_int32_t dst; if (so->so_state & SS_ISCONNECTED) { if (nam) { @@ -790,29 +808,41 @@ rip_send(struct socket *so, __unused int flags, struct mbuf *m, struct sockaddr * it will handle the socket dealloc on last reference * */ int -rip_unlock(struct socket *so, int refcount, int debug) +rip_unlock(struct socket *so, int refcount, void *debug) { - int lr_saved; + void *lr_saved; struct inpcb *inp = sotoinpcb(so); - if (debug == 0) - lr_saved = (unsigned int) __builtin_return_address(0); - else lr_saved = debug; + if (debug == NULL) + lr_saved = __builtin_return_address(0); + else + lr_saved = debug; if (refcount) { - if (so->so_usecount <= 0) - panic("rip_unlock: bad refoucnt so=%p val=%x\n", so, so->so_usecount); + if (so->so_usecount <= 0) { + panic("rip_unlock: bad refoucnt so=%p val=%x lrh= %s\n", + so, so->so_usecount, solockhistory_nr(so)); + /* NOTREACHED */ + } so->so_usecount--; if (so->so_usecount == 0 && (inp->inp_wantcnt == WNT_STOPUSING)) { /* cleanup after last reference */ lck_mtx_unlock(so->so_proto->pr_domain->dom_mtx); lck_rw_lock_exclusive(ripcbinfo.mtx); + if (inp->inp_state != INPCB_STATE_DEAD) { +#if INET6 + if (INP_CHECK_SOCKAF(so, AF_INET6)) + in6_pcbdetach(inp); + else +#endif /* INET6 */ + in_pcbdetach(inp); + } in_pcbdispose(inp); lck_rw_done(ripcbinfo.mtx); return(0); } } - so->unlock_lr[so->next_unlock_lr] = (u_int32_t)lr_saved; + so->unlock_lr[so->next_unlock_lr] = lr_saved; so->next_unlock_lr = (so->next_unlock_lr+1) % SO_LCKDBG_MAX; lck_mtx_unlock(so->so_proto->pr_domain->dom_mtx); return(0); @@ -920,6 +950,111 @@ rip_pcblist SYSCTL_HANDLER_ARGS SYSCTL_PROC(_net_inet_raw, OID_AUTO/*XXX*/, pcblist, CTLFLAG_RD, 0, 0, rip_pcblist, "S,xinpcb", "List of active raw IP sockets"); +#if !CONFIG_EMBEDDED + +static int +rip_pcblist64 SYSCTL_HANDLER_ARGS +{ +#pragma unused(oidp, arg1, arg2) + int error, i, n; + struct inpcb *inp, **inp_list; + inp_gen_t gencnt; + struct xinpgen xig; + + /* + * The process of preparing the TCB list is too time-consuming and + * resource-intensive to repeat twice on every request. + */ + lck_rw_lock_exclusive(ripcbinfo.mtx); + if (req->oldptr == USER_ADDR_NULL) { + n = ripcbinfo.ipi_count; + req->oldidx = 2 * (sizeof xig) + + (n + n/8) * sizeof(struct xinpcb64); + lck_rw_done(ripcbinfo.mtx); + return 0; + } + + if (req->newptr != USER_ADDR_NULL) { + lck_rw_done(ripcbinfo.mtx); + return EPERM; + } + + /* + * OK, now we're committed to doing something. + */ + gencnt = ripcbinfo.ipi_gencnt; + n = ripcbinfo.ipi_count; + + bzero(&xig, sizeof(xig)); + xig.xig_len = sizeof xig; + xig.xig_count = n; + xig.xig_gen = gencnt; + xig.xig_sogen = so_gencnt; + error = SYSCTL_OUT(req, &xig, sizeof xig); + if (error) { + lck_rw_done(ripcbinfo.mtx); + return error; + } + /* + * We are done if there is no pcb + */ + if (n == 0) { + lck_rw_done(ripcbinfo.mtx); + return 0; + } + + inp_list = _MALLOC(n * sizeof *inp_list, M_TEMP, M_WAITOK); + if (inp_list == 0) { + lck_rw_done(ripcbinfo.mtx); + return ENOMEM; + } + + for (inp = ripcbinfo.listhead->lh_first, i = 0; inp && i < n; + inp = inp->inp_list.le_next) { + if (inp->inp_gencnt <= gencnt && inp->inp_state != INPCB_STATE_DEAD) + inp_list[i++] = inp; + } + n = i; + + error = 0; + for (i = 0; i < n; i++) { + inp = inp_list[i]; + if (inp->inp_gencnt <= gencnt && inp->inp_state != INPCB_STATE_DEAD) { + struct xinpcb64 xi; + + bzero(&xi, sizeof(xi)); + xi.xi_len = sizeof xi; + inpcb_to_xinpcb64(inp, &xi); + if (inp->inp_socket) + sotoxsocket64(inp->inp_socket, &xi.xi_socket); + error = SYSCTL_OUT(req, &xi, sizeof xi); + } + } + if (!error) { + /* + * Give the user an updated idea of our state. + * If the generation differs from what we told + * her before, she knows that something happened + * while we were processing this request, and it + * might be necessary to retry. + */ + bzero(&xig, sizeof(xig)); + xig.xig_len = sizeof xig; + xig.xig_gen = ripcbinfo.ipi_gencnt; + xig.xig_sogen = so_gencnt; + xig.xig_count = ripcbinfo.ipi_count; + error = SYSCTL_OUT(req, &xig, sizeof xig); + } + FREE(inp_list, M_TEMP); + lck_rw_done(ripcbinfo.mtx); + return error; +} + +SYSCTL_PROC(_net_inet_raw, OID_AUTO, pcblist64, CTLFLAG_RD, 0, 0, + rip_pcblist64, "S,xinpcb64", "List of active raw IP sockets"); + +#endif /* !CONFIG_EMBEDDED */ + struct pr_usrreqs rip_usrreqs = { rip_abort, pru_accept_notsupp, rip_attach, rip_bind, rip_connect, pru_connect2_notsupp, in_control, rip_detach, rip_disconnect, diff --git a/bsd/netinet/tcp.h b/bsd/netinet/tcp.h index 3db7ae34e..3b4d8f92f 100644 --- a/bsd/netinet/tcp.h +++ b/bsd/netinet/tcp.h @@ -195,12 +195,13 @@ struct tcphdr { /* * User-settable options (used with setsockopt). */ -#define TCP_NODELAY 0x01 /* don't delay send to coalesce packets */ +#define TCP_NODELAY 0x01 /* don't delay send to coalesce packets */ #if !defined(_POSIX_C_SOURCE) || defined(_DARWIN_C_SOURCE) -#define TCP_MAXSEG 0x02 /* set maximum segment size */ -#define TCP_NOPUSH 0x04 /* don't push last block of write */ -#define TCP_NOOPT 0x08 /* don't use TCP options */ -#define TCP_KEEPALIVE 0x10 /* idle time used when SO_KEEPALIVE is enabled */ +#define TCP_MAXSEG 0x02 /* set maximum segment size */ +#define TCP_NOPUSH 0x04 /* don't push last block of write */ +#define TCP_NOOPT 0x08 /* don't use TCP options */ +#define TCP_KEEPALIVE 0x10 /* idle time used when SO_KEEPALIVE is enabled */ +#define TCP_CONNECTIONTIMEOUT 0x20 /* connection timeout */ #endif /* (_POSIX_C_SOURCE && !_DARWIN_C_SOURCE) */ #endif diff --git a/bsd/netinet/tcp_debug.c b/bsd/netinet/tcp_debug.c index ac173feae..58b1141b2 100644 --- a/bsd/netinet/tcp_debug.c +++ b/bsd/netinet/tcp_debug.c @@ -250,9 +250,9 @@ tcp_trace(act, ostate, tp, ipgen, th, req) return; printf( "\trcv_(nxt,wnd,up) (%lx,%lx,%lx) snd_(una,nxt,max) (%lx,%lx,%lx)\n", - (u_long)tp->rcv_nxt, tp->rcv_wnd, (u_long)tp->rcv_up, - (u_long)tp->snd_una, (u_long)tp->snd_nxt, (u_long)tp->snd_max); + (uint32_t)tp->rcv_nxt, tp->rcv_wnd, (uint32_t)tp->rcv_up, + (uint32_t)tp->snd_una, (uint32_t)tp->snd_nxt, (uint32_t)tp->snd_max); printf("\tsnd_(wl1,wl2,wnd) (%lx,%lx,%lx)\n", - (u_long)tp->snd_wl1, (u_long)tp->snd_wl2, tp->snd_wnd); + (uint32_t)tp->snd_wl1, (uint32_t)tp->snd_wl2, tp->snd_wnd); #endif /* TCPDEBUG */ } diff --git a/bsd/netinet/tcp_fsm.h b/bsd/netinet/tcp_fsm.h index e9db5611f..b963d865f 100644 --- a/bsd/netinet/tcp_fsm.h +++ b/bsd/netinet/tcp_fsm.h @@ -127,12 +127,12 @@ static u_char tcp_outflags[TCP_NSTATES] = { TH_ACK, /* 10, TIME_WAIT */ }; #endif -#endif KERNEL_PRIVATE +#endif /* KERNEL_PRIVATE */ #if KPROF #ifdef KERNEL_PRIVATE int tcp_acounts[TCP_NSTATES][PRU_NREQ]; -#endif KERNEL_PRIVATE +#endif /* KERNEL_PRIVATE */ #endif #ifdef TCPSTATES diff --git a/bsd/netinet/tcp_input.c b/bsd/netinet/tcp_input.c index 138bcb3c7..b642c4298 100644 --- a/bsd/netinet/tcp_input.c +++ b/bsd/netinet/tcp_input.c @@ -81,6 +81,8 @@ #include /* before tcp_seq.h, for tcp_random18() */ +#include + #include #include #include @@ -201,9 +203,20 @@ SYSCTL_INT(_net_inet_tcp, OID_AUTO, maxseg_unacked, CTLFLAG_RW, static int tcp_do_rfc3465 = 1; SYSCTL_INT(_net_inet_tcp, OID_AUTO, rfc3465, CTLFLAG_RW, &tcp_do_rfc3465, 0, ""); + +static int tcp_do_rfc3465_lim2 = 1; +SYSCTL_INT(_net_inet_tcp, OID_AUTO, rfc3465_lim2, CTLFLAG_RW, + &tcp_do_rfc3465_lim2, 0, "Appropriate bytes counting w/ L=2*SMSS"); + +#if CONFIG_IFEF_NOWINDOWSCALE +int tcp_obey_ifef_nowindowscale = 0; +SYSCTL_INT(_net_inet_tcp, OID_AUTO, obey_ifef_nowindowscale, CTLFLAG_RW, + &tcp_obey_ifef_nowindowscale, 0, ""); +#endif + extern int tcp_TCPTV_MIN; -u_long tcp_now; +u_int32_t tcp_now; struct inpcbhead tcb; #define tcb6 tcb /* for KAME src sync over BSD*'s */ @@ -234,7 +247,7 @@ do { \ #define ND6_HINT(tp) #endif -extern u_long *delack_bitmask; +extern u_int32_t *delack_bitmask; extern void add_to_time_wait(struct tcpcb *); extern void postevent(struct socket *, struct sockbuf *, int); @@ -544,7 +557,7 @@ tcp_input(m, off0) int dropsocket = 0; int iss = 0; int nosock = 0; - u_long tiwin; + u_int32_t tiwin; struct tcpopt to; /* options in this segment */ struct sockaddr_in *next_hop = NULL; #if TCPDEBUG @@ -567,7 +580,12 @@ tcp_input(m, off0) ifscope = IFSCOPE_NONE; /* Grab info from PACKET_TAG_IPFORWARD tag prepended to the chain. */ - fwd_tag = m_tag_locate(m, KERNEL_MODULE_TAG_ID, KERNEL_TAG_TYPE_IPFORWARD, NULL); + if (!SLIST_EMPTY(&m->m_pkthdr.tags)) { + fwd_tag = m_tag_locate(m, KERNEL_MODULE_TAG_ID, + KERNEL_TAG_TYPE_IPFORWARD, NULL); + } else { + fwd_tag = NULL; + } if (fwd_tag != NULL) { struct ip_fwd_tag *ipfwd_tag = (struct ip_fwd_tag *)(fwd_tag+1); @@ -658,7 +676,11 @@ tcp_input(m, off0) bzero(ipov->ih_x1, sizeof(ipov->ih_x1)); ipov->ih_len = (u_short)tlen; + +#if BYTE_ORDER != BIG_ENDIAN HTONS(ipov->ih_len); +#endif + pseudo = in_cksum(m, sizeof (struct ip)); *(uint32_t*)&ipov->ih_x1[0] = *(uint32_t*)&b[0]; @@ -687,7 +709,11 @@ tcp_input(m, off0) len = sizeof (struct ip) + tlen; bzero(ipov->ih_x1, sizeof(ipov->ih_x1)); ipov->ih_len = (u_short)tlen; + +#if BYTE_ORDER != BIG_ENDIAN HTONS(ipov->ih_len); +#endif + th->th_sum = in_cksum(m, len); *(uint32_t*)&ipov->ih_x1[0] = *(uint32_t*)&b[0]; @@ -773,10 +799,13 @@ tcp_input(m, off0) /* * Convert TCP protocol specific fields to host format. */ + +#if BYTE_ORDER != BIG_ENDIAN NTOHL(th->th_seq); NTOHL(th->th_ack); NTOHS(th->th_win); NTOHS(th->th_urp); +#endif /* * Delay dropping TCP, IP headers, IPv6 ext headers, and TCP options, @@ -848,12 +877,16 @@ tcp_input(m, off0) if (isipv6) { if (inp != NULL && ipsec6_in_reject_so(m, inp->inp_socket)) { IPSEC_STAT_INCREMENT(ipsec6stat.in_polvio); + if (in_pcb_checkstate(inp, WNT_RELEASE, 0) == WNT_STOPUSING) + inp = NULL; // pretend we didn't find it goto dropnosock; } } else #endif /* INET6 */ if (inp != NULL && ipsec4_in_reject_so(m, inp->inp_socket)) { - IPSEC_STAT_INCREMENT(ipsecstat.in_polvio); + IPSEC_STAT_INCREMENT(ipsecstat.in_polvio); + if (in_pcb_checkstate(inp, WNT_RELEASE, 0) == WNT_STOPUSING) + inp = NULL; // pretend we didn't find it goto dropnosock; } } @@ -936,26 +969,21 @@ tcp_input(m, off0) } so = inp->inp_socket; if (so == NULL) { - if (in_pcb_checkstate(inp, WNT_RELEASE, 1) == WNT_STOPUSING) - inp = NULL; // pretend we didn't find it + /* This case shouldn't happen as the socket shouldn't be null + * if inp_state isn't set to INPCB_STATE_DEAD + * But just in case, we pretend we didn't find the socket if we hit this case + * as this isn't cause for a panic (the socket might be leaked however)... + */ + inp = NULL; #if TEMPDEBUG - printf("tcp_input: no more socket for inp=%x\n", inp); + printf("tcp_input: no more socket for inp=%x. This shouldn't happen\n", inp); #endif goto dropnosock; } -#ifdef __APPLE__ - /* - * Bogus state when listening port owned by SharedIP with loopback as the - * only configured interface: BlueBox does not filters loopback - */ - if (so == &tcbinfo.nat_dummy_socket) - goto drop; - -#endif - tcp_lock(so, 1, 2); + tcp_lock(so, 1, (void *)2); if (in_pcb_checkstate(inp, WNT_RELEASE, 1) == WNT_STOPUSING) { - tcp_unlock(so, 1, 2); + tcp_unlock(so, 1, (void *)2); inp = NULL; // pretend we didn't find it goto dropnosock; } @@ -1001,7 +1029,6 @@ tcp_input(m, off0) #if INET6 struct inpcb *oinp = sotoinpcb(so); #endif /* INET6 */ - int ogencnt = so->so_gencnt; unsigned int head_ifscope; /* Get listener's bound-to-interface, if any */ @@ -1108,12 +1135,10 @@ tcp_input(m, off0) if (!so2) goto drop; } - /* - * Make sure listening socket did not get closed during socket allocation, - * not only this is incorrect but it is know to cause panic - */ - if (so->so_gencnt != ogencnt) - goto drop; + + /* Point "inp" and "tp" in tandem to new socket */ + inp = (struct inpcb *)so2->so_pcb; + tp = intotcpcb(inp); oso = so; tcp_unlock(so, 0, 0); /* Unlock but keep a reference on listener for now */ @@ -1132,7 +1157,6 @@ tcp_input(m, off0) * we're committed to it below in TCPS_LISTEN. */ dropsocket++; - inp = (struct inpcb *)so->so_pcb; /* * Inherit INP_BOUND_IF from listener; testing if @@ -1237,15 +1261,19 @@ tcp_input(m, off0) printf("tcp_input: could not copy policy\n"); } #endif - tcp_unlock(oso, 1, 0); /* now drop the reference on the listener */ - tp = intotcpcb(inp); + /* inherit states from the listener */ tp->t_state = TCPS_LISTEN; tp->t_flags |= tp0->t_flags & (TF_NOPUSH|TF_NOOPT|TF_NODELAY); + tp->t_keepinit = tp0->t_keepinit; tp->t_inpcb->inp_ip_ttl = tp0->t_inpcb->inp_ip_ttl; + + /* now drop the reference on the listener */ + tcp_unlock(oso, 1, 0); + /* Compute proper scaling value from buffer space */ if (inp->inp_pcbinfo->ipi_count < tcp_sockthreshold) { tp->request_r_scale = max(tcp_win_scale, tp->request_r_scale); - so->so_rcv.sb_hiwat = lmin(TCP_MAXWIN << tp->request_r_scale, (sb_max / (MSIZE+MCLBYTES)) * MCLBYTES); + so->so_rcv.sb_hiwat = imin(TCP_MAXWIN << tp->request_r_scale, (sb_max / (MSIZE+MCLBYTES)) * MCLBYTES); } else { while (tp->request_r_scale < TCP_MAX_WINSHIFT && @@ -1306,7 +1334,7 @@ tcp_input(m, off0) if ((tp->rcv_byps / tp->rcv_pps) < tcp_minmss) { char ipstrbuf[MAX_IPv6_STR_LEN]; printf("too many small tcp packets from " - "%s:%u, av. %lubyte/packet, " + "%s:%u, av. %ubyte/packet, " "dropping connection\n", #if INET6 isipv6 ? @@ -1773,19 +1801,18 @@ tcp_input(m, off0) tp->t_flags |= TF_ACKNOW; tp->t_unacksegs = 0; tp->t_state = TCPS_SYN_RECEIVED; - tp->t_timer[TCPT_KEEP] = tcp_keepinit; + tp->t_timer[TCPT_KEEP] = tp->t_keepinit ? tp->t_keepinit : tcp_keepinit; dropsocket = 0; /* committed to socket */ tcpstat.tcps_accepts++; if ((thflags & (TH_ECE | TH_CWR)) == (TH_ECE | TH_CWR)) { /* ECN-setup SYN */ tp->ecn_flags |= (TE_SETUPRECEIVED | TE_SENDIPECT); } -#ifdef IFEF_NOWINDOWSCALE - if (m->m_pkthdr.rcvif != NULL && - (m->m_pkthdr.rcvif->if_eflags & IFEF_NOWINDOWSCALE) != 0) - { - // Timestamps are not enabled on this interface - tp->t_flags &= ~(TF_REQ_SCALE); +#if CONFIG_IFEF_NOWINDOWSCALE + if (tcp_obey_ifef_nowindowscale && m->m_pkthdr.rcvif != NULL && + (m->m_pkthdr.rcvif->if_eflags & IFEF_NOWINDOWSCALE)) { + /* Window scaling is not enabled on this interface */ + tp->t_flags &= ~TF_REQ_SCALE; } #endif goto trimthenstep6; @@ -2447,7 +2474,10 @@ tcp_input(m, off0) tp->t_dupacks = 0; break; } - + /* + * If the congestion window was inflated to account + * for the other side's cached packets, retract it. + */ if (!IN_FASTRECOVERY(tp)) { /* * We were not in fast recovery. Reset the duplicate ack @@ -2469,7 +2499,7 @@ tcp_input(m, off0) } else { if (tcp_do_newreno) { - long ss = tp->snd_max - th->th_ack; + int32_t ss = tp->snd_max - th->th_ack; /* * Complete ack. Inflate the congestion window to @@ -2632,30 +2662,47 @@ tcp_input(m, off0) register u_int cw = tp->snd_cwnd; register u_int incr = tp->t_maxseg; - if (cw >= tp->snd_ssthresh) { - tp->t_bytes_acked += acked; - if (tp->t_bytes_acked >= cw) { + if (tcp_do_rfc3465) { + + if (cw >= tp->snd_ssthresh) { + tp->t_bytes_acked += acked; + if (tp->t_bytes_acked >= cw) { /* Time to increase the window. */ - tp->t_bytes_acked -= cw; - } else { + tp->t_bytes_acked -= cw; + } else { /* No need to increase yet. */ - incr = 0; + incr = 0; + } + } else { + /* + * If the user explicitly enables RFC3465 + * use 2*SMSS for the "L" param. Otherwise + * use the more conservative 1*SMSS. + * + * (See RFC 3465 2.3 Choosing the Limit) + */ + u_int abc_lim; + + abc_lim = (tcp_do_rfc3465_lim2 && + tp->snd_nxt == tp->snd_max) ? incr * 2 : incr; + + incr = lmin(acked, abc_lim); } - } else { + } + else { /* - * If the user explicitly enables RFC3465 - * use 2*SMSS for the "L" param. Otherwise - * use the more conservative 1*SMSS. - * - * (See RFC 3465 2.3 Choosing the Limit) + * If the window gives us less than ssthresh packets + * in flight, open exponentially (segsz per packet). + * Otherwise open linearly: segsz per window + * (segsz^2 / cwnd per packet). */ - u_int abc_lim; - - abc_lim = (tcp_do_rfc3465 == 0) ? - incr : incr * 2; - incr = min(acked, abc_lim); + + if (cw >= tp->snd_ssthresh) { + incr = max((incr * incr / cw), 1); + } } + tp->snd_cwnd = min(cw+incr, TCP_MAXWIN<snd_scale); } if (acked > so->so_snd.sb_cc) { @@ -2730,7 +2777,7 @@ tcp_input(m, off0) tcp_canceltimers(tp); /* Shorten TIME_WAIT [RFC-1644, p.28] */ if (tp->cc_recv != 0 && - tp->t_starttime < (u_long)tcp_msl) + tp->t_starttime < (u_int32_t)tcp_msl) tp->t_timer[TCPT_2MSL] = tp->t_rxtcur * TCPTV_TWTRUNC; else @@ -2834,7 +2881,7 @@ tcp_input(m, off0) * but if two URG's are pending at once, some out-of-band * data may creep in... ick. */ - if (th->th_urp <= (u_long)tlen + if (th->th_urp <= (u_int32_t)tlen #if SO_OOBINLINE && (so->so_options & SO_OOBINLINE) == 0 #endif @@ -2990,7 +3037,7 @@ tcp_input(m, off0) tcp_canceltimers(tp); /* Shorten TIME_WAIT [RFC-1644, p.28] */ if (tp->cc_recv != 0 && - tp->t_starttime < (u_long)tcp_msl) { + tp->t_starttime < (u_int32_t)tcp_msl) { tp->t_timer[TCPT_2MSL] = tp->t_rxtcur * TCPTV_TWTRUNC; /* For transaction client, force ACK now. */ @@ -3185,7 +3232,11 @@ tcp_dooptions(tp, cp, cnt, th, to, input_ifscope) if (!(th->th_flags & TH_SYN)) continue; bcopy((char *) cp + 2, (char *) &mss, sizeof(mss)); + +#if BYTE_ORDER != BIG_ENDIAN NTOHS(mss); +#endif + break; case TCPOPT_WINDOW: @@ -3203,10 +3254,17 @@ tcp_dooptions(tp, cp, cnt, th, to, input_ifscope) to->to_flags |= TOF_TS; bcopy((char *)cp + 2, (char *)&to->to_tsval, sizeof(to->to_tsval)); + +#if BYTE_ORDER != BIG_ENDIAN NTOHL(to->to_tsval); +#endif + bcopy((char *)cp + 6, (char *)&to->to_tsecr, sizeof(to->to_tsecr)); + +#if BYTE_ORDER != BIG_ENDIAN NTOHL(to->to_tsecr); +#endif /* * A timestamp received in a SYN makes @@ -3361,6 +3419,7 @@ tcp_maxmtu(struct rtentry *rt) { unsigned int maxmtu; + RT_LOCK_ASSERT_HELD(rt); if (rt->rt_rmx.rmx_mtu == 0) maxmtu = rt->rt_ifp->if_mtu; else @@ -3375,10 +3434,13 @@ tcp_maxmtu6(struct rtentry *rt) { unsigned int maxmtu; + RT_LOCK_ASSERT_HELD(rt); + lck_rw_lock_shared(nd_if_rwlock); if (rt->rt_rmx.rmx_mtu == 0) maxmtu = IN6_LINKMTU(rt->rt_ifp); else maxmtu = MIN(rt->rt_rmx.rmx_mtu, IN6_LINKMTU(rt->rt_ifp)); + lck_rw_done(nd_if_rwlock); return (maxmtu); } @@ -3418,12 +3480,12 @@ tcp_mss(tp, offer, input_ifscope) register struct rtentry *rt; struct ifnet *ifp; register int rtt, mss; - u_long bufsize; + u_int32_t bufsize; struct inpcb *inp; struct socket *so; struct rmxp_tao *taop; int origoffer = offer; - u_long sb_max_corrected; + u_int32_t sb_max_corrected; int isnetlocal = 0; #if INET6 int isipv6; @@ -3438,19 +3500,23 @@ tcp_mss(tp, offer, input_ifscope) #else #define min_protoh (sizeof (struct tcpiphdr)) #endif - lck_mtx_lock(rt_mtx); + #if INET6 if (isipv6) { rt = tcp_rtlookup6(inp); - if (rt && (IN6_IS_ADDR_LOOPBACK(&inp->in6p_faddr) || IN6_IS_ADDR_LINKLOCAL(&inp->in6p_faddr) || rt->rt_gateway->sa_family == AF_LINK)) - isnetlocal = TRUE; + if (rt != NULL && + (IN6_IS_ADDR_LOOPBACK(&inp->in6p_faddr) || + IN6_IS_ADDR_LINKLOCAL(&inp->in6p_faddr) || + rt->rt_gateway->sa_family == AF_LINK)) + isnetlocal = TRUE; } else #endif /* INET6 */ { rt = tcp_rtlookup(inp, input_ifscope); - if (rt && (rt->rt_gateway->sa_family == AF_LINK || - rt->rt_ifp->if_flags & IFF_LOOPBACK)) + if (rt != NULL && + (rt->rt_gateway->sa_family == AF_LINK || + rt->rt_ifp->if_flags & IFF_LOOPBACK)) isnetlocal = TRUE; } if (rt == NULL) { @@ -3459,7 +3525,6 @@ tcp_mss(tp, offer, input_ifscope) isipv6 ? tcp_v6mssdflt : #endif /* INET6 */ tcp_mssdflt; - lck_mtx_unlock(rt_mtx); return; } ifp = rt->rt_ifp; @@ -3636,11 +3701,12 @@ tcp_mss(tp, offer, input_ifscope) */ tp->snd_ssthresh = max(2 * mss, rt->rt_rmx.rmx_ssthresh); tcpstat.tcps_usedssthresh++; - } - else + } else { tp->snd_ssthresh = TCP_MAXWIN << TCP_MAX_WINSHIFT; + } - lck_mtx_unlock(rt_mtx); + /* Route locked during lookup above */ + RT_UNLOCK(rt); } /* @@ -3664,7 +3730,7 @@ tcp_mssopt(tp) #else #define min_protoh (sizeof (struct tcpiphdr)) #endif - lck_mtx_lock(rt_mtx); + #if INET6 if (isipv6) rt = tcp_rtlookup6(tp->t_inpcb); @@ -3672,7 +3738,6 @@ tcp_mssopt(tp) #endif /* INET6 */ rt = tcp_rtlookup(tp->t_inpcb, IFSCOPE_NONE); if (rt == NULL) { - lck_mtx_unlock(rt_mtx); return ( #if INET6 isipv6 ? tcp_v6mssdflt : @@ -3695,7 +3760,8 @@ tcp_mssopt(tp) #else mss = tcp_maxmtu(rt); #endif - lck_mtx_unlock(rt_mtx); + /* Route locked during lookup above */ + RT_UNLOCK(rt); return (mss - min_protoh); } @@ -3711,7 +3777,7 @@ tcp_newreno_partial_ack(tp, th) struct tcphdr *th; { tcp_seq onxt = tp->snd_nxt; - u_long ocwnd = tp->snd_cwnd; + u_int32_t ocwnd = tp->snd_cwnd; tp->t_timer[TCPT_REXMT] = 0; tp->t_rtttime = 0; tp->snd_nxt = th->th_ack; @@ -3803,6 +3869,13 @@ tcp_dropdropablreq(struct socket *head) tcp_unlock(so, 1, 0); } } + else { + /* do not try to lock the inp in in_pcb_checkstate + * because the lock is already held in some other thread. + * Only drop the inp_wntcnt reference. + */ + in_pcb_checkstate(inp, WNT_RELEASE, 1); + } } so = sonext; diff --git a/bsd/netinet/tcp_output.c b/bsd/netinet/tcp_output.c index af6873478..9d37f63fd 100644 --- a/bsd/netinet/tcp_output.c +++ b/bsd/netinet/tcp_output.c @@ -139,6 +139,11 @@ int tcp_do_newreno = 0; SYSCTL_INT(_net_inet_tcp, OID_AUTO, newreno, CTLFLAG_RW, &tcp_do_newreno, 0, "Enable NewReno Algorithms"); +int tcp_do_tso = 1; +SYSCTL_INT(_net_inet_tcp, OID_AUTO, tso, CTLFLAG_RW, + &tcp_do_tso, 0, "Enable TCP Segmentation Offload"); + + int tcp_ecn_outbound = 0; SYSCTL_INT(_net_inet_tcp, OID_AUTO, ecn_initiate_out, CTLFLAG_RW, &tcp_ecn_outbound, 0, "Initiate ECN for outbound connections"); @@ -155,10 +160,9 @@ int tcp_output_unlocked = 1; SYSCTL_INT(_net_inet_tcp, OID_AUTO, socket_unlocked_on_output, CTLFLAG_RW, &tcp_output_unlocked, 0, "Unlock TCP when sending packets down to IP"); -static long packchain_newlist = 0; -static long packchain_looped = 0; -static long packchain_sent = 0; - +static int32_t packchain_newlist = 0; +static int32_t packchain_looped = 0; +static int32_t packchain_sent = 0; /* temporary: for testing */ #if IPSEC @@ -166,16 +170,20 @@ extern int ipsec_bypass; #endif extern int slowlink_wsize; /* window correction for slow links */ -extern u_long route_generation; #if IPFIREWALL extern int fw_enable; /* firewall check for packet chaining */ extern int fw_bypass; /* firewall check: disable packet chaining if there is rules */ #endif /* IPFIREWALL */ extern vm_size_t so_cache_zone_element_size; +#if RANDOM_IP_ID +extern int ip_use_randomid; +#endif /* RANDOM_IP_ID */ +extern u_int32_t dlil_filter_count; +extern u_int32_t kipf_count; static int tcp_ip_output(struct socket *, struct tcpcb *, struct mbuf *, int, - struct mbuf *, int); + struct mbuf *, int, int); static __inline__ u_int16_t get_socket_id(struct socket * s) @@ -185,7 +193,7 @@ get_socket_id(struct socket * s) if (so_cache_zone_element_size == 0) { return (0); } - val = (u_int16_t)(((u_int32_t)s) / so_cache_zone_element_size); + val = (u_int16_t)(((uintptr_t)s) / so_cache_zone_element_size); if (val == 0) { val = 0xffff; } @@ -215,8 +223,8 @@ int tcp_output(struct tcpcb *tp) { struct socket *so = tp->t_inpcb->inp_socket; - long len, recwin, sendwin; - int off, flags, error; + int32_t len, recwin, sendwin, off; + int flags, error; register struct mbuf *m; struct ip *ip = NULL; register struct ipovly *ipov = NULL; @@ -228,9 +236,12 @@ tcp_output(struct tcpcb *tp) unsigned ipoptlen, optlen, hdrlen; int idle, sendalot, lost = 0; int i, sack_rxmit; + int tso = 0; int sack_bytes_rxmt; struct sackhole *p; - +#ifdef IPSEC + unsigned ipsec_optlen = 0; +#endif int maxburst = TCP_MAXBURST; int last_off = 0; int m_off; @@ -312,14 +323,19 @@ tcp_output(struct tcpcb *tp) * come back before the TCP connection times out). */ rt = tp->t_inpcb->inp_route.ro_rt; - if (rt != NULL && rt->generation_id != route_generation) { + if (rt != NULL && (!(rt->rt_flags & RTF_UP) || + rt->generation_id != route_generation)) { struct ifnet *ifp; + struct in_ifaddr *ia; /* disable multipages at the socket */ somultipages(so, FALSE); + /* Disable TSO for the socket until we know more */ + tp->t_flags &= ~TF_TSO; + /* check that the source address is still valid */ - if (ifa_foraddr(tp->t_inpcb->inp_laddr.s_addr) == 0) { + if ((ia = ifa_foraddr(tp->t_inpcb->inp_laddr.s_addr)) == NULL) { if (tp->t_state >= TCPS_CLOSE_WAIT) { tcp_drop(tp, EADDRNOTAVAIL); @@ -351,16 +367,18 @@ tcp_output(struct tcpcb *tp) else return(0); /* silently ignore, keep data in socket: address may be back */ } + ifafree(&ia->ia_ifa); /* * Address is still valid; check for multipages capability * again in case the outgoing interface has changed. */ - lck_mtx_lock(rt_mtx); - rt = tp->t_inpcb->inp_route.ro_rt; - if (rt != NULL && (ifp = rt->rt_ifp) != NULL) + RT_LOCK(rt); + if ((ifp = rt->rt_ifp) != NULL) { somultipages(so, (ifp->if_hwassist & IFNET_MULTIPAGES)); - if (rt != NULL && rt->generation_id != route_generation) + tcp_set_tso(tp, ifp); + } + if (rt->rt_flags & RTF_UP) rt->generation_id = route_generation; /* * See if we should do MTU discovery. Don't do it if: @@ -376,7 +394,7 @@ tcp_output(struct tcpcb *tp) else tp->t_flags |= TF_PMTUD; - lck_mtx_unlock(rt_mtx); + RT_UNLOCK(rt); } } @@ -411,7 +429,7 @@ tcp_output(struct tcpcb *tp) p = NULL; if (tp->sack_enable && IN_FASTRECOVERY(tp) && (p = tcp_sack_output(tp, &sack_bytes_rxmt))) { - long cwin; + int32_t cwin; cwin = min(tp->snd_wnd, tp->snd_cwnd) - sack_bytes_rxmt; if (cwin < 0) @@ -434,18 +452,20 @@ tcp_output(struct tcpcb *tp) goto after_sack_rexmit; } else /* Can rexmit part of the current hole */ - len = ((long)ulmin(cwin, + len = ((int32_t)min(cwin, tp->snd_recover - p->rxmit)); } else - len = ((long)ulmin(cwin, p->end - p->rxmit)); - off = p->rxmit - tp->snd_una; + len = ((int32_t)min(cwin, p->end - p->rxmit)); if (len > 0) { + off = p->rxmit - tp->snd_una; /* update off only if we really transmit SACK data */ sack_rxmit = 1; sendalot = 1; tcpstat.tcps_sack_rexmits++; tcpstat.tcps_sack_rexmit_bytes += min(len, tp->t_maxseg); } + else + len = 0; } after_sack_rexmit: /* @@ -507,17 +527,17 @@ tcp_output(struct tcpcb *tp) */ if (sack_rxmit == 0) { if (sack_bytes_rxmt == 0) - len = ((long)ulmin(so->so_snd.sb_cc, sendwin) - off); + len = min(so->so_snd.sb_cc, sendwin) - off; else { - long cwin; + int32_t cwin; /* * We are inside of a SACK recovery episode and are * sending new data, having retransmitted all the * data possible in the scoreboard. */ - len = ((long)ulmin(so->so_snd.sb_cc, tp->snd_wnd) - - off); + len = min(so->so_snd.sb_cc, tp->snd_wnd) + - off; /* * Don't remove this (len > 0) check ! * We explicitly check for len > 0 here (although it @@ -532,8 +552,10 @@ tcp_output(struct tcpcb *tp) sack_bytes_rxmt; if (cwin < 0) cwin = 0; - len = lmin(len, cwin); + len = imin(len, cwin); } + else + len = 0; } } @@ -556,7 +578,7 @@ tcp_output(struct tcpcb *tp) error = tcp_ip_output(so, tp, packetlist, packchain_listadd, tp_inp_options, - (so_options & SO_DONTROUTE)); + (so_options & SO_DONTROUTE), (sack_rxmit | (sack_bytes_rxmt != 0))); tp->t_flags &= ~TF_SENDINPROG; } @@ -604,13 +626,53 @@ tcp_output(struct tcpcb *tp) } /* - * len will be >= 0 after this point. Truncate to the maximum - * segment length and ensure that FIN is removed if the length - * no longer contains the last data byte. + * Truncate to the maximum segment length or enable TCP Segmentation + * Offloading (if supported by hardware) and ensure that FIN is removed + * if the length no longer contains the last data byte. + * + * TSO may only be used if we are in a pure bulk sending state. The + * presence of TCP-MD5, SACK retransmits, SACK advertizements, ipfw rules + * and IP options prevent using TSO. With TSO the TCP header is the same + * (except for the sequence number) for all generated packets. This + * makes it impossible to transmit any options which vary per generated + * segment or packet. + * + * The length of TSO bursts is limited to TCP_MAXWIN. That limit and + * removal of FIN (if not already catched here) are handled later after + * the exact length of the TCP options are known. + */ +#if IPSEC + /* + * Pre-calculate here as we save another lookup into the darknesses + * of IPsec that way and can actually decide if TSO is ok. */ + if (ipsec_bypass == 0) + ipsec_optlen = ipsec_hdrsiz_tcp(tp); +#endif + if (len > tp->t_maxseg) { - len = tp->t_maxseg; - sendalot = 1; + if ((tp->t_flags & TF_TSO) && tcp_do_tso && +#if RANDOM_IP_ID + ip_use_randomid && +#endif /* RANDOM_IP_ID */ + kipf_count == 0 && dlil_filter_count == 0 && + tp->rcv_numsacks == 0 && sack_rxmit == 0 && sack_bytes_rxmt == 0 && + tp->t_inpcb->inp_options == NULL && + tp->t_inpcb->in6p_options == NULL +#if IPSEC + && ipsec_optlen == 0 +#endif +#if IPFIREWALL + && (fw_enable == 0 || fw_bypass) +#endif + ) { + tso = 1; + sendalot = 0; + } else { + len = tp->t_maxseg; + sendalot = 1; + tso = 0; + } } if (sack_rxmit) { if (SEQ_LT(p->rxmit + len, tp->snd_una + so->so_snd.sb_cc)) @@ -626,7 +688,7 @@ tcp_output(struct tcpcb *tp) * Sender silly window avoidance. We transmit under the following * conditions when len is non-zero: * - * - We have a full segment + * - We have a full segment (or more with TSO) * - This is the last buffer in a write()/send() and we are * either idle or running NODELAY * - we've timed out (e.g. persist timer) @@ -635,7 +697,7 @@ tcp_output(struct tcpcb *tp) * - we need to retransmit */ if (len) { - if (len == tp->t_maxseg) { + if (len >= tp->t_maxseg) { tp->t_flags |= TF_MAXSEGSNT; goto send; } @@ -676,10 +738,10 @@ tcp_output(struct tcpcb *tp) * taking into account that we are limited by * TCP_MAXWIN << tp->rcv_scale. */ - long adv = lmin(recwin, (long)TCP_MAXWIN << tp->rcv_scale) - + int32_t adv = imin(recwin, (int)TCP_MAXWIN << tp->rcv_scale) - (tp->rcv_adv - tp->rcv_nxt); - if (adv >= (long) (2 * tp->t_maxseg)) { + if (adv >= (int32_t) (2 * tp->t_maxseg)) { /* * Update only if the resulting scaled value of the window changed, or @@ -690,7 +752,7 @@ tcp_output(struct tcpcb *tp) if ((tp->last_ack_sent != tp->rcv_nxt) || (((recwin + adv) >> tp->rcv_scale) > recwin)) goto send; } - if (2 * adv >= (long) so->so_rcv.sb_hiwat) + if (2 * adv >= (int32_t) so->so_rcv.sb_hiwat) goto send; } @@ -763,7 +825,7 @@ tcp_output(struct tcpcb *tp) tp->t_flags |= TF_SENDINPROG; error = tcp_ip_output(so, tp, packetlist, packchain_listadd, - tp_inp_options, (so_options & SO_DONTROUTE)); + tp_inp_options, (so_options & SO_DONTROUTE), (sack_rxmit | (sack_bytes_rxmt != 0))); tp->t_flags &= ~TF_SENDINPROG; } @@ -1015,8 +1077,7 @@ tcp_output(struct tcpcb *tp) ipoptlen = 0; } #if IPSEC - if (ipsec_bypass == 0) - ipoptlen += ipsec_hdrsiz_tcp(tp); + ipoptlen += ipsec_optlen; #endif /* @@ -1024,14 +1085,34 @@ tcp_output(struct tcpcb *tp) * bump the packet length beyond the t_maxopd length. * Clear the FIN bit because we cut off the tail of * the segment. + * + * When doing TSO limit a burst to TCP_MAXWIN minus the + * IP, TCP and Options length to keep ip->ip_len from + * overflowing. Prevent the last segment from being + * fractional thus making them all equal sized and set + * the flag to continue sending. TSO is disabled when + * IP options or IPSEC are present. */ if (len + optlen + ipoptlen > tp->t_maxopd) { /* * If there is still more to send, don't close the connection. */ flags &= ~TH_FIN; - len = tp->t_maxopd - optlen - ipoptlen; - sendalot = 1; + if (tso) { + int32_t tso_maxlen; + + tso_maxlen = tp->tso_max_segment_size ? tp->tso_max_segment_size : TCP_MAXWIN; + + if (len > tso_maxlen - hdrlen - optlen) { + len = tso_maxlen - hdrlen - optlen; + len = len - (len % (tp->t_maxopd - optlen)); + sendalot = 1; + } else if (tp->t_flags & TF_NEEDFIN) + sendalot = 1; + } else { + len = tp->t_maxopd - optlen - ipoptlen; + sendalot = 1; + } } /*#ifdef DIAGNOSTIC*/ @@ -1052,7 +1133,7 @@ tcp_output(struct tcpcb *tp) if (len) { if (tp->t_force && len == 1) tcpstat.tcps_sndprobe++; - else if (SEQ_LT(tp->snd_nxt, tp->snd_max)) { + else if (SEQ_LT(tp->snd_nxt, tp->snd_max) || sack_rxmit) { tcpstat.tcps_sndrexmitpack++; tcpstat.tcps_sndrexmitbyte += len; } else { @@ -1112,7 +1193,7 @@ tcp_output(struct tcpcb *tp) m->m_len = hdrlen; } /* makes sure we still have data left to be sent at this point */ - if (so->so_snd.sb_mb == NULL || off == -1) { + if (so->so_snd.sb_mb == NULL || off < 0) { if (m != NULL) m_freem(m); error = 0; /* should we return an error? */ goto out; @@ -1139,7 +1220,7 @@ tcp_output(struct tcpcb *tp) * m_copym_with_hdrs to avoid rescanning from the beginning of the socket buffer mbuf list. * setting the mbuf pointer to NULL is sufficient to disable the hint mechanism. */ - if (m_head != so->so_snd.sb_mb || last_off != off) + if (m_head != so->so_snd.sb_mb || sack_rxmit || last_off != off) m_last = NULL; last_off = off + len; m_head = so->so_snd.sb_mb; @@ -1154,7 +1235,7 @@ tcp_output(struct tcpcb *tp) * m_copym_with_hdrs will always return the last mbuf pointer and the offset into it that * it acted on to fullfill the current request, whether a valid 'hint' was passed in or not */ - if ((m = m_copym_with_hdrs(so->so_snd.sb_mb, off, (int) len, M_DONTWAIT, &m_last, &m_off)) == NULL) { + if ((m = m_copym_with_hdrs(so->so_snd.sb_mb, off, len, M_DONTWAIT, &m_last, &m_off)) == NULL) { error = ENOBUFS; goto out; } @@ -1199,9 +1280,6 @@ tcp_output(struct tcpcb *tp) #if CONFIG_MACF_NET mac_mbuf_label_associate_inpcb(tp->t_inpcb, m); #endif -#if CONFIG_IP_EDGEHOLE - ip_edgehole_mbuf_tag(tp->t_inpcb, m); -#endif #if INET6 if (isipv6) { ip6 = mtod(m, struct ip6_hdr *); @@ -1264,18 +1342,18 @@ tcp_output(struct tcpcb *tp) * Calculate receive window. Don't shrink window, * but avoid silly window syndrome. */ - if (recwin < (long)(so->so_rcv.sb_hiwat / 4) && recwin < (long)tp->t_maxseg) + if (recwin < (int32_t)(so->so_rcv.sb_hiwat / 4) && recwin < (int)tp->t_maxseg) recwin = 0; - if (recwin < (long)(tp->rcv_adv - tp->rcv_nxt)) - recwin = (long)(tp->rcv_adv - tp->rcv_nxt); + if (recwin < (int32_t)(tp->rcv_adv - tp->rcv_nxt)) + recwin = (int32_t)(tp->rcv_adv - tp->rcv_nxt); if (tp->t_flags & TF_SLOWLINK && slowlink_wsize > 0) { - if (recwin > (long)slowlink_wsize) + if (recwin > (int32_t)slowlink_wsize) recwin = slowlink_wsize; - th->th_win = htons((u_short) (recwin>>tp->rcv_scale)); + th->th_win = htons((u_short) (recwin>>tp->rcv_scale)); } else { - if (recwin > (long)(TCP_MAXWIN << tp->rcv_scale)) - recwin = (long)(TCP_MAXWIN << tp->rcv_scale); + if (recwin > (int32_t)(TCP_MAXWIN << tp->rcv_scale)) + recwin = (int32_t)(TCP_MAXWIN << tp->rcv_scale); th->th_win = htons((u_short) (recwin>>tp->rcv_scale)); } @@ -1326,6 +1404,24 @@ tcp_output(struct tcpcb *tp) htons((u_short)(optlen + len))); } + /* + * Enable TSO and specify the size of the segments. + * The TCP pseudo header checksum is always provided. + * XXX: Fixme: This is currently not the case for IPv6. + */ + if (tso) { +#if INET6 + if (isipv6) + m->m_pkthdr.csum_flags = CSUM_TSO_IPV6; + else +#endif /* INET6 */ + m->m_pkthdr.csum_flags = CSUM_TSO_IPV4; + + m->m_pkthdr.tso_segsz = tp->t_maxopd - optlen; + } + else + m->m_pkthdr.tso_segsz = 0; + /* * In transmit state, time the transmission and arrange for * the retransmit. In persist state, just set snd_max. @@ -1522,7 +1618,7 @@ tcp_output(struct tcpcb *tp) error = tcp_ip_output(so, tp, packetlist, packchain_listadd, tp_inp_options, - (so_options & SO_DONTROUTE)); + (so_options & SO_DONTROUTE), (sack_rxmit | (sack_bytes_rxmt != 0))); tp->t_flags &= ~TF_SENDINPROG; if (error) { @@ -1584,10 +1680,13 @@ tcp_output(struct tcpcb *tp) TCP_PKTLIST_CLEAR(tp); if (error == ENOBUFS) { - if (!tp->t_timer[TCPT_REXMT] && - !tp->t_timer[TCPT_PERSIST]) - tp->t_timer[TCPT_REXMT] = tp->t_rxtcur; - tcp_quench(tp->t_inpcb, 0); + if (!tp->t_timer[TCPT_REXMT] && + !tp->t_timer[TCPT_PERSIST]) + tp->t_timer[TCPT_REXMT] = tp->t_rxtcur; + + tp->snd_cwnd = tp->t_maxseg; + tp->t_bytes_acked = 0; + KERNEL_DEBUG(DBG_FNC_TCP_OUTPUT | DBG_FUNC_END, 0,0,0,0,0); return (0); } @@ -1597,7 +1696,16 @@ tcp_output(struct tcpcb *tp) * for us. tcp_mtudisc() will, as its last action, * initiate retransmission, so it is important to * not do so here. + * + * If TSO was active we either got an interface + * without TSO capabilits or TSO was turned off. + * Disable it for this connection as too and + * immediatly retry with MSS sized segments generated + * by this function. */ + if (tso) + tp->t_flags &= ~TF_TSO; + tcp_mtudisc(tp->t_inpcb, 0); KERNEL_DEBUG(DBG_FNC_TCP_OUTPUT | DBG_FUNC_END, 0,0,0,0,0); return 0; @@ -1620,7 +1728,7 @@ tcp_output(struct tcpcb *tp) * then remember the size of the advertised window. * Any pending ACK has now been sent. */ - if (recwin > 0 && SEQ_GT(tp->rcv_nxt+recwin, tp->rcv_adv)) + if (recwin > 0 && SEQ_GT(tp->rcv_nxt + recwin, tp->rcv_adv)) tp->rcv_adv = tp->rcv_nxt + recwin; tp->last_ack_sent = tp->rcv_nxt; tp->t_flags &= ~(TF_ACKNOW|TF_DELACK); @@ -1633,34 +1741,38 @@ tcp_output(struct tcpcb *tp) static int tcp_ip_output(struct socket *so, struct tcpcb *tp, struct mbuf *pkt, - int cnt, struct mbuf *opt, int flags) + int cnt, struct mbuf *opt, int flags, int sack_in_progress) { int error = 0; boolean_t chain; boolean_t unlocked = FALSE; struct inpcb *inp = tp->t_inpcb; struct ip_out_args ipoa; + struct route ro; /* If socket was bound to an ifindex, tell ip_output about it */ ipoa.ipoa_ifscope = (inp->inp_flags & INP_BOUND_IF) ? inp->inp_boundif : IFSCOPE_NONE; flags |= IP_OUTARGS; - /* Make sure ACK/DELACK conditions are cleared before + /* Copy the cached route and take an extra reference */ + inp_route_copyout(inp, &ro); + + /* + * Make sure ACK/DELACK conditions are cleared before * we unlock the socket. */ - tp->t_flags &= ~(TF_ACKNOW | TF_DELACK); + /* - * If allowed, unlock TCP socket while in IP + * If allowed, unlock TCP socket while in IP * but only if the connection is established and * if we're not sending from an upcall. - */ - + */ if (tcp_output_unlocked && ((so->so_flags & SOF_UPCALLINUSE) == 0) && - (tp->t_state == TCPS_ESTABLISHED)) { - unlocked = TRUE; - socket_unlock(so, 0); + (tp->t_state == TCPS_ESTABLISHED) && (sack_in_progress == 0)) { + unlocked = TRUE; + socket_unlock(so, 0); } /* @@ -1698,8 +1810,7 @@ tcp_ip_output(struct socket *so, struct tcpcb *tp, struct mbuf *pkt, */ cnt = 0; } - error = ip_output_list(pkt, cnt, opt, &inp->inp_route, - flags, 0, &ipoa); + error = ip_output_list(pkt, cnt, opt, &ro, flags, 0, &ipoa); if (chain || error) { /* * If we sent down a chain then we are done since @@ -1716,6 +1827,9 @@ tcp_ip_output(struct socket *so, struct tcpcb *tp, struct mbuf *pkt, if (unlocked) socket_lock(so, 0); + /* Synchronize cached PCB route */ + inp_route_copyin(inp, &ro); + return (error); } diff --git a/bsd/netinet/tcp_sack.c b/bsd/netinet/tcp_sack.c index 2426bee5d..5842ad2b8 100644 --- a/bsd/netinet/tcp_sack.c +++ b/bsd/netinet/tcp_sack.c @@ -211,13 +211,8 @@ tcp_update_sack_list(struct tcpcb *tp, tcp_seq rcv_start, tcp_seq rcv_end) void tcp_clean_sackreport( struct tcpcb *tp) { -/* - int i; tp->rcv_numsacks = 0; - for (i = 0; i < MAX_SACK_BLKS; i++) - tp->sackblks[i].start = tp->sackblks[i].end=0; -*/ bzero(&tp->sackblks[0], sizeof (struct sackblk) * MAX_SACK_BLKS); } @@ -328,16 +323,20 @@ tcp_sack_doack(struct tcpcb *tp, struct tcpopt *to, tcp_seq th_ack) } /* * Append received valid SACK blocks to sack_blocks[]. + * Check that the SACK block range is valid. */ - for (i = 0; i < to->to_nsacks; i++) { - bcopy((to->to_sacks + i * TCPOLEN_SACK), &sack, sizeof(sack)); - sack.start = ntohl(sack.start); - sack.end = ntohl(sack.end); - if (SEQ_GT(sack.end, sack.start) && - SEQ_GT(sack.start, tp->snd_una) && - SEQ_GT(sack.start, th_ack) && - SEQ_LEQ(sack.end, tp->snd_max)) - sack_blocks[num_sack_blks++] = sack; + for (i = 0; i < to->to_nsacks; i++) { + bcopy((to->to_sacks + i * TCPOLEN_SACK), + &sack, sizeof(sack)); + sack.start = ntohl(sack.start); + sack.end = ntohl(sack.end); + if (SEQ_GT(sack.end, sack.start) && + SEQ_GT(sack.start, tp->snd_una) && + SEQ_GT(sack.start, th_ack) && + SEQ_LT(sack.start, tp->snd_max) && + SEQ_GT(sack.end, tp->snd_una) && + SEQ_LEQ(sack.end, tp->snd_max)) + sack_blocks[num_sack_blks++] = sack; } /* @@ -504,6 +503,8 @@ tcp_free_sackholes(struct tcpcb *tp) while ((q = TAILQ_FIRST(&tp->snd_holes)) != NULL) tcp_sackhole_remove(tp, q); tp->sackhint.sack_bytes_rexmit = 0; + tp->sackhint.nexthole = NULL; + tp->sack_newdata = 0; } diff --git a/bsd/netinet/tcp_seq.h b/bsd/netinet/tcp_seq.h index ac032da62..89a16ef79 100644 --- a/bsd/netinet/tcp_seq.h +++ b/bsd/netinet/tcp_seq.h @@ -111,5 +111,5 @@ /* timestamp wrap-around time */ extern tcp_cc tcp_ccgen; /* global connection count */ -#endif KERNEL_PRIVATE +#endif /* KERNEL_PRIVATE */ #endif /* _NETINET_TCP_SEQ_H_ */ diff --git a/bsd/netinet/tcp_subr.c b/bsd/netinet/tcp_subr.c index f0d78d7b8..cc58735fa 100644 --- a/bsd/netinet/tcp_subr.c +++ b/bsd/netinet/tcp_subr.c @@ -88,10 +88,12 @@ #include #include +#define tcp_minmssoverload fring #define _IP_VHL #include #include #include +#include #if INET6 #include #endif @@ -101,6 +103,7 @@ #endif #include #include +#include #if INET6 #include #endif @@ -125,6 +128,8 @@ #endif #endif /*IPSEC*/ +#undef tcp_minmssoverload + #if CONFIG_MACF_NET #include #endif /* MAC_NET */ @@ -189,10 +194,6 @@ static int tcp_do_rfc1644 = 0; SYSCTL_INT(_net_inet_tcp, TCPCTL_DO_RFC1644, rfc1644, CTLFLAG_RW, &tcp_do_rfc1644 , 0, "Enable rfc1644 (TTCP) extensions"); -static int tcp_tcbhashsize = 0; -SYSCTL_INT(_net_inet_tcp, OID_AUTO, tcbhashsize, CTLFLAG_RD, - &tcp_tcbhashsize, 0, "Size of TCP control-block hashtable"); - static int do_tcpdrain = 0; SYSCTL_INT(_net_inet_tcp, OID_AUTO, do_tcpdrain, CTLFLAG_RW, &do_tcpdrain, 0, "Enable tcp_drain routine for extra help when low on mbufs"); @@ -219,6 +220,10 @@ int tcp_TCPTV_MIN = 1; SYSCTL_INT(_net_inet_tcp, OID_AUTO, rtt_min, CTLFLAG_RW, &tcp_TCPTV_MIN, 0, "min rtt value allowed"); +__private_extern__ int tcp_use_randomport = 0; +SYSCTL_INT(_net_inet_tcp, OID_AUTO, randomize_ports, CTLFLAG_RW, + &tcp_use_randomport, 0, "Randomize TCP port numbers"); + static void tcp_cleartaocache(void); static void tcp_notify(struct inpcb *, int); struct zone *sack_hole_zone; @@ -241,6 +246,10 @@ extern int path_mtu_discovery; #define TCBHASHSIZE CONFIG_TCBHASHSIZE #endif +__private_extern__ int tcp_tcbhashsize = TCBHASHSIZE; +SYSCTL_INT(_net_inet_tcp, OID_AUTO, tcbhashsize, CTLFLAG_RD, + &tcp_tcbhashsize, 0, "Size of TCP control-block hashtable"); + /* * This is the actual shape of what we allocate using the zone * allocator. Doing it this way allows us to protect both structures @@ -261,17 +270,13 @@ struct inp_tp { #undef ALIGNMENT #undef ALIGNM1 -static struct tcpcb dummy_tcb; - - extern struct inpcbhead time_wait_slots[]; -extern int cur_tw_slot; -extern u_long *delack_bitmask; -extern u_long route_generation; +extern u_int32_t *delack_bitmask; int get_inpcb_str_size(void); int get_tcp_str_size(void); +static void tcpcb_to_otcpcb(struct tcpcb *, struct otcpcb *); int get_inpcb_str_size(void) { @@ -293,7 +298,6 @@ int tcp_freeq(struct tcpcb *tp); void tcp_init() { - int hashsize = TCBHASHSIZE; vm_size_t str_size; int i; struct inpcbinfo *pcbinfo; @@ -313,14 +317,13 @@ tcp_init() LIST_INIT(&tcb); tcbinfo.listhead = &tcb; pcbinfo = &tcbinfo; - if (!powerof2(hashsize)) { + if (!powerof2(tcp_tcbhashsize)) { printf("WARNING: TCB hash size not a power of 2\n"); - hashsize = 512; /* safe default */ + tcp_tcbhashsize = 512; /* safe default */ } - tcp_tcbhashsize = hashsize; - tcbinfo.hashsize = hashsize; - tcbinfo.hashbase = hashinit(hashsize, M_PCB, &tcbinfo.hashmask); - tcbinfo.porthashbase = hashinit(hashsize, M_PCB, + tcbinfo.hashsize = tcp_tcbhashsize; + tcbinfo.hashbase = hashinit(tcp_tcbhashsize, M_PCB, &tcbinfo.hashmask); + tcbinfo.porthashbase = hashinit(tcp_tcbhashsize, M_PCB, &tcbinfo.porthashmask); str_size = (vm_size_t) sizeof(struct inp_tp); tcbinfo.ipi_zone = (void *) zinit(str_size, 120000*str_size, 8192, "tcpcb"); @@ -337,9 +340,6 @@ tcp_init() if (max_linkhdr + TCP_MINPROTOHDR > MHLEN) panic("tcp_init"); #undef TCP_MINPROTOHDR - dummy_tcb.t_state = TCP_NSTATES; - dummy_tcb.t_flags = 0; - tcbinfo.dummy_cb = (caddr_t) &dummy_tcb; /* * allocate lock group attribute and group for tcp pcb mutexes @@ -357,10 +357,7 @@ tcp_init() return; /* pretty much dead if this fails... */ } - - in_pcb_nat_init(&tcbinfo, AF_INET, IPPROTO_TCP, SOCK_STREAM); - - delack_bitmask = _MALLOC((4 * hashsize)/32, M_PCB, M_WAITOK); + delack_bitmask = _MALLOC((4 * tcp_tcbhashsize)/32, M_PCB, M_WAITOK); if (delack_bitmask == 0) panic("Delack Memory"); @@ -502,8 +499,8 @@ tcp_respond( if (tp) { if (!(flags & TH_RST)) { win = tcp_sbspace(tp); - if (win > (long)TCP_MAXWIN << tp->rcv_scale) - win = (long)TCP_MAXWIN << tp->rcv_scale; + if (win > (int32_t)TCP_MAXWIN << tp->rcv_scale) + win = (int32_t)TCP_MAXWIN << tp->rcv_scale; } #if INET6 if (isipv6) @@ -603,12 +600,7 @@ tcp_respond( mac_netinet_tcp_reply(m); } #endif - -#if CONFIG_IP_EDGEHOLE - if (tp && tp->t_inpcb) - ip_edgehole_mbuf_tag(tp->t_inpcb, m); -#endif - + nth->th_seq = htonl(seq); nth->th_ack = htonl(ack); nth->th_x2 = 0; @@ -659,11 +651,20 @@ tcp_respond( { struct ip_out_args ipoa = { ifscope }; - (void) ip_output(m, NULL, ro, IP_OUTARGS, NULL, &ipoa); + if (ro != &sro) { + /* Copy the cached route and take an extra reference */ + inp_route_copyout(tp->t_inpcb, &sro); + } + /* + * For consistency, pass a local route copy. + */ + (void) ip_output(m, NULL, &sro, IP_OUTARGS, NULL, &ipoa); - if (ro == &sro && ro->ro_rt) { - rtfree(ro->ro_rt); - ro->ro_rt = NULL; + if (ro != &sro) { + /* Synchronize cached PCB route */ + inp_route_copyin(tp->t_inpcb, &sro); + } else if (sro.ro_rt != NULL) { + rtfree(sro.ro_rt); } } } @@ -769,7 +770,7 @@ tcp_close(tp) #if INET6 int isipv6 = (inp->inp_vflag & INP_IPV6) != 0; #endif /* INET6 */ - register struct rtentry *rt; + struct rtentry *rt; int dosavessthresh; if ( inp->inp_ppcb == NULL) /* tcp_close was called previously, bail */ @@ -811,7 +812,14 @@ tcp_close(tp) return (NULL); } - lck_mtx_lock(rt_mtx); +#if INET6 + rt = isipv6 ? inp->in6p_route.ro_rt : inp->inp_route.ro_rt; +#else + rt = inp->inp_route.ro_rt; +#endif + if (rt != NULL) + RT_LOCK_SPIN(rt); + /* * If we got enough samples through the srtt filter, * save the rtt and rttvar in the routing entry. @@ -824,13 +832,13 @@ tcp_close(tp) * update anything that the user "locked". */ if (tp->t_rttupdated >= 16) { - register u_long i = 0; + register u_int32_t i = 0; #if INET6 if (isipv6) { struct sockaddr_in6 *sin6; - if ((rt = inp->in6p_route.ro_rt) == NULL) + if (rt == NULL) goto no_valid_rt; sin6 = (struct sockaddr_in6 *)rt_key(rt); if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) @@ -838,16 +846,15 @@ tcp_close(tp) } else #endif /* INET6 */ - rt = inp->inp_route.ro_rt; - if (rt == NULL || - ((struct sockaddr_in *)rt_key(rt))->sin_addr.s_addr - == INADDR_ANY || rt->generation_id != route_generation) { + if (rt == NULL || !(rt->rt_flags & RTF_UP) || + ((struct sockaddr_in *)rt_key(rt))->sin_addr.s_addr == + INADDR_ANY || rt->generation_id != route_generation) { if (tp->t_state >= TCPS_CLOSE_WAIT) tp->t_state = TCPS_CLOSING; - goto no_valid_rt; } + RT_LOCK_ASSERT_HELD(rt); if ((rt->rt_rmx.rmx_locks & RTV_RTT) == 0) { i = tp->t_srtt * (RTM_RTTUNIT / (TCP_RETRANSHZ * TCP_RTT_SCALE)); @@ -903,7 +910,7 @@ tcp_close(tp) i = (i + tp->t_maxseg / 2) / tp->t_maxseg; if (i < 2) i = 2; - i *= (u_long)(tp->t_maxseg + + i *= (u_int32_t)(tp->t_maxseg + #if INET6 (isipv6 ? sizeof (struct ip6_hdr) + sizeof (struct tcphdr) : @@ -921,22 +928,22 @@ tcp_close(tp) tcpstat.tcps_cachedssthresh++; } } - rt = inp->inp_route.ro_rt; - if (rt) { - /* - * mark route for deletion if no information is - * cached. - */ - if ((so->so_flags & SOF_OVERFLOW) && tcp_lq_overflow && - ((rt->rt_rmx.rmx_locks & RTV_RTT) == 0)){ - if (rt->rt_rmx.rmx_rtt == 0) - rt->rt_flags |= RTF_DELCLONE; + + /* + * Mark route for deletion if no information is cached. + */ + if (rt != NULL && (so->so_flags & SOF_OVERFLOW) && tcp_lq_overflow) { + if (!(rt->rt_rmx.rmx_locks & RTV_RTT) && + rt->rt_rmx.rmx_rtt == 0) { + rt->rt_flags |= RTF_DELCLONE; } } - no_valid_rt: - /* free the reassembly queue, if any */ - lck_mtx_unlock(rt_mtx); +no_valid_rt: + if (rt != NULL) + RT_UNLOCK(rt); + + /* free the reassembly queue, if any */ (void) tcp_freeq(tp); tcp_free_sackholes(tp); @@ -1061,6 +1068,69 @@ tcp_notify(inp, error) #endif } +/* + * tcpcb_to_otcpcb copies specific bits of a tcpcb to a otcpcb format. + * The otcpcb data structure is passed to user space and must not change. + */ +static void +tcpcb_to_otcpcb(struct tcpcb *tp, struct otcpcb *otp) +{ + int i; + + otp->t_segq = (u_int32_t)(uintptr_t)tp->t_segq.lh_first; + otp->t_dupacks = tp->t_dupacks; + for (i = 0; i < TCPT_NTIMERS; i++) + otp->t_timer[i] = tp->t_timer[i]; + otp->t_inpcb = (_TCPCB_PTR(struct inpcb *))(uintptr_t)tp->t_inpcb; + otp->t_state = tp->t_state; + otp->t_flags = tp->t_flags; + otp->t_force = tp->t_force; + otp->snd_una = tp->snd_una; + otp->snd_max = tp->snd_max; + otp->snd_nxt = tp->snd_nxt; + otp->snd_up = tp->snd_up; + otp->snd_wl1 = tp->snd_wl1; + otp->snd_wl2 = tp->snd_wl2; + otp->iss = tp->iss; + otp->irs = tp->irs; + otp->rcv_nxt = tp->rcv_nxt; + otp->rcv_adv = tp->rcv_adv; + otp->rcv_wnd = tp->rcv_wnd; + otp->rcv_up = tp->rcv_up; + otp->snd_wnd = tp->snd_wnd; + otp->snd_cwnd = tp->snd_cwnd; + otp->snd_ssthresh = tp->snd_ssthresh; + otp->t_maxopd = tp->t_maxopd; + otp->t_rcvtime = tp->t_rcvtime; + otp->t_starttime = tp->t_starttime; + otp->t_rtttime = tp->t_rtttime; + otp->t_rtseq = tp->t_rtseq; + otp->t_rxtcur = tp->t_rxtcur; + otp->t_maxseg = tp->t_maxseg; + otp->t_srtt = tp->t_srtt; + otp->t_rttvar = tp->t_rttvar; + otp->t_rxtshift = tp->t_rxtshift; + otp->t_rttmin = tp->t_rttmin; + otp->t_rttupdated = tp->t_rttupdated; + otp->max_sndwnd = tp->max_sndwnd; + otp->t_softerror = tp->t_softerror; + otp->t_oobflags = tp->t_oobflags; + otp->t_iobc = tp->t_iobc; + otp->snd_scale = tp->snd_scale; + otp->rcv_scale = tp->rcv_scale; + otp->request_r_scale = tp->request_r_scale; + otp->requested_s_scale = tp->requested_s_scale; + otp->ts_recent = tp->ts_recent; + otp->ts_recent_age = tp->ts_recent_age; + otp->last_ack_sent = tp->last_ack_sent; + otp->cc_send = tp->cc_send; + otp->cc_recv = tp->cc_recv; + otp->snd_recover = tp->snd_recover; + otp->snd_cwnd_prev = tp->snd_cwnd_prev; + otp->snd_ssthresh_prev = tp->snd_ssthresh_prev; + otp->t_badrxtwin = tp->t_badrxtwin; +} + static int tcp_pcblist SYSCTL_HANDLER_ARGS { @@ -1154,10 +1224,11 @@ tcp_pcblist SYSCTL_HANDLER_ARGS inpcb_to_compat(inp, &xt.xt_inp); inp_ppcb = inp->inp_ppcb; if (inp_ppcb != NULL) { - bcopy(inp_ppcb, &xt.xt_tp, sizeof xt.xt_tp); - } - else + tcpcb_to_otcpcb((struct tcpcb *)inp_ppcb, + &xt.xt_tp); + } else { bzero((char *) &xt.xt_tp, sizeof xt.xt_tp); + } if (inp->inp_socket) sotoxsocket(inp->inp_socket, &xt.xt_socket); error = SYSCTL_OUT(req, &xt, sizeof xt); @@ -1186,85 +1257,188 @@ tcp_pcblist SYSCTL_HANDLER_ARGS SYSCTL_PROC(_net_inet_tcp, TCPCTL_PCBLIST, pcblist, CTLFLAG_RD, 0, 0, tcp_pcblist, "S,xtcpcb", "List of active TCP connections"); -#ifndef __APPLE__ -static int -tcp_getcred(SYSCTL_HANDLER_ARGS) +#if !CONFIG_EMBEDDED + +static void +tcpcb_to_xtcpcb64(struct tcpcb *tp, struct xtcpcb64 *otp) { - struct sockaddr_in addrs[2]; - struct inpcb *inp; - int error, s; - - error = suser(req->p); - if (error) - return (error); - error = SYSCTL_IN(req, addrs, sizeof(addrs)); - if (error) - return (error); - s = splnet(); - inp = in_pcblookup_hash(&tcbinfo, addrs[1].sin_addr, addrs[1].sin_port, - addrs[0].sin_addr, addrs[0].sin_port, 0, NULL); - if (inp == NULL || inp->inp_socket == NULL) { - error = ENOENT; - goto out; - } - error = SYSCTL_OUT(req, inp->inp_socket->so_cred, sizeof(*(kauth_cred_t)0); -out: - splx(s); - return (error); + int i; + + otp->t_segq = (u_int32_t)(uintptr_t)tp->t_segq.lh_first; + otp->t_dupacks = tp->t_dupacks; + for (i = 0; i < TCPT_NTIMERS; i++) + otp->t_timer[i] = tp->t_timer[i]; + otp->t_state = tp->t_state; + otp->t_flags = tp->t_flags; + otp->t_force = tp->t_force; + otp->snd_una = tp->snd_una; + otp->snd_max = tp->snd_max; + otp->snd_nxt = tp->snd_nxt; + otp->snd_up = tp->snd_up; + otp->snd_wl1 = tp->snd_wl1; + otp->snd_wl2 = tp->snd_wl2; + otp->iss = tp->iss; + otp->irs = tp->irs; + otp->rcv_nxt = tp->rcv_nxt; + otp->rcv_adv = tp->rcv_adv; + otp->rcv_wnd = tp->rcv_wnd; + otp->rcv_up = tp->rcv_up; + otp->snd_wnd = tp->snd_wnd; + otp->snd_cwnd = tp->snd_cwnd; + otp->snd_ssthresh = tp->snd_ssthresh; + otp->t_maxopd = tp->t_maxopd; + otp->t_rcvtime = tp->t_rcvtime; + otp->t_starttime = tp->t_starttime; + otp->t_rtttime = tp->t_rtttime; + otp->t_rtseq = tp->t_rtseq; + otp->t_rxtcur = tp->t_rxtcur; + otp->t_maxseg = tp->t_maxseg; + otp->t_srtt = tp->t_srtt; + otp->t_rttvar = tp->t_rttvar; + otp->t_rxtshift = tp->t_rxtshift; + otp->t_rttmin = tp->t_rttmin; + otp->t_rttupdated = tp->t_rttupdated; + otp->max_sndwnd = tp->max_sndwnd; + otp->t_softerror = tp->t_softerror; + otp->t_oobflags = tp->t_oobflags; + otp->t_iobc = tp->t_iobc; + otp->snd_scale = tp->snd_scale; + otp->rcv_scale = tp->rcv_scale; + otp->request_r_scale = tp->request_r_scale; + otp->requested_s_scale = tp->requested_s_scale; + otp->ts_recent = tp->ts_recent; + otp->ts_recent_age = tp->ts_recent_age; + otp->last_ack_sent = tp->last_ack_sent; + otp->cc_send = tp->cc_send; + otp->cc_recv = tp->cc_recv; + otp->snd_recover = tp->snd_recover; + otp->snd_cwnd_prev = tp->snd_cwnd_prev; + otp->snd_ssthresh_prev = tp->snd_ssthresh_prev; + otp->t_badrxtwin = tp->t_badrxtwin; } -SYSCTL_PROC(_net_inet_tcp, OID_AUTO, getcred, CTLTYPE_OPAQUE|CTLFLAG_RW, - 0, 0, tcp_getcred, "S,ucred", "Get the ucred of a TCP connection"); -#if INET6 static int -tcp6_getcred(SYSCTL_HANDLER_ARGS) +tcp_pcblist64 SYSCTL_HANDLER_ARGS { - struct sockaddr_in6 addrs[2]; - struct inpcb *inp; - int error, s, mapped = 0; - - error = suser(req->p); - if (error) - return (error); - error = SYSCTL_IN(req, addrs, sizeof(addrs)); - if (error) - return (error); - if (IN6_IS_ADDR_V4MAPPED(&addrs[0].sin6_addr)) { - if (IN6_IS_ADDR_V4MAPPED(&addrs[1].sin6_addr)) - mapped = 1; - else - return (EINVAL); - } - s = splnet(); - if (mapped == 1) - inp = in_pcblookup_hash(&tcbinfo, - *(struct in_addr *)&addrs[1].sin6_addr.s6_addr[12], - addrs[1].sin6_port, - *(struct in_addr *)&addrs[0].sin6_addr.s6_addr[12], - addrs[0].sin6_port, - 0, NULL); - else - inp = in6_pcblookup_hash(&tcbinfo, &addrs[1].sin6_addr, - addrs[1].sin6_port, - &addrs[0].sin6_addr, addrs[0].sin6_port, - 0, NULL); - if (inp == NULL || inp->inp_socket == NULL) { - error = ENOENT; - goto out; - } - error = SYSCTL_OUT(req, inp->inp_socket->so_cred, - sizeof(*(kauth_cred_t)0); -out: - splx(s); - return (error); -} +#pragma unused(oidp, arg1, arg2) + int error, i, n; + struct inpcb *inp, **inp_list; + inp_gen_t gencnt; + struct xinpgen xig; + int slot; -SYSCTL_PROC(_net_inet6_tcp6, OID_AUTO, getcred, CTLTYPE_OPAQUE|CTLFLAG_RW, - 0, 0, - tcp6_getcred, "S,ucred", "Get the ucred of a TCP6 connection"); + /* + * The process of preparing the TCB list is too time-consuming and + * resource-intensive to repeat twice on every request. + */ + lck_rw_lock_shared(tcbinfo.mtx); + if (req->oldptr == USER_ADDR_NULL) { + n = tcbinfo.ipi_count; + req->oldidx = 2 * (sizeof xig) + + (n + n/8) * sizeof(struct xtcpcb64); + lck_rw_done(tcbinfo.mtx); + return 0; + } + + if (req->newptr != USER_ADDR_NULL) { + lck_rw_done(tcbinfo.mtx); + return EPERM; + } + + /* + * OK, now we're committed to doing something. + */ + gencnt = tcbinfo.ipi_gencnt; + n = tcbinfo.ipi_count; + + bzero(&xig, sizeof(xig)); + xig.xig_len = sizeof xig; + xig.xig_count = n; + xig.xig_gen = gencnt; + xig.xig_sogen = so_gencnt; + error = SYSCTL_OUT(req, &xig, sizeof xig); + if (error) { + lck_rw_done(tcbinfo.mtx); + return error; + } + /* + * We are done if there is no pcb + */ + if (n == 0) { + lck_rw_done(tcbinfo.mtx); + return 0; + } + + inp_list = _MALLOC(n * sizeof *inp_list, M_TEMP, M_WAITOK); + if (inp_list == 0) { + lck_rw_done(tcbinfo.mtx); + return ENOMEM; + } + + for (inp = LIST_FIRST(tcbinfo.listhead), i = 0; inp && i < n; + inp = LIST_NEXT(inp, inp_list)) { +#ifdef __APPLE__ + if (inp->inp_gencnt <= gencnt && inp->inp_state != INPCB_STATE_DEAD) +#else + if (inp->inp_gencnt <= gencnt && !prison_xinpcb(req->p, inp)) #endif -#endif /* __APPLE__*/ + inp_list[i++] = inp; + } + + for (slot = 0; slot < N_TIME_WAIT_SLOTS; slot++) { + struct inpcb *inpnxt; + + for (inp = time_wait_slots[slot].lh_first; inp && i < n; inp = inpnxt) { + inpnxt = inp->inp_list.le_next; + if (inp->inp_gencnt <= gencnt && inp->inp_state != INPCB_STATE_DEAD) + inp_list[i++] = inp; + } + } + + n = i; + + error = 0; + for (i = 0; i < n; i++) { + inp = inp_list[i]; + if (inp->inp_gencnt <= gencnt && inp->inp_state != INPCB_STATE_DEAD) { + struct xtcpcb64 xt; + + bzero(&xt, sizeof(xt)); + xt.xt_len = sizeof xt; + inpcb_to_xinpcb64(inp, &xt.xt_inpcb); + xt.xt_inpcb.inp_ppcb = (u_int64_t)(uintptr_t)inp->inp_ppcb; + if (inp->inp_ppcb != NULL) + tcpcb_to_xtcpcb64((struct tcpcb *)inp->inp_ppcb, &xt); + if (inp->inp_socket) + sotoxsocket64(inp->inp_socket, &xt.xt_inpcb.xi_socket); + error = SYSCTL_OUT(req, &xt, sizeof xt); + } + } + if (!error) { + /* + * Give the user an updated idea of our state. + * If the generation differs from what we told + * her before, she knows that something happened + * while we were processing this request, and it + * might be necessary to retry. + */ + bzero(&xig, sizeof(xig)); + xig.xig_len = sizeof xig; + xig.xig_gen = tcbinfo.ipi_gencnt; + xig.xig_sogen = so_gencnt; + xig.xig_count = tcbinfo.ipi_count; + error = SYSCTL_OUT(req, &xig, sizeof xig); + } + FREE(inp_list, M_TEMP); + lck_rw_done(tcbinfo.mtx); + return error; +} + +SYSCTL_PROC(_net_inet_tcp, OID_AUTO, pcblist64, CTLFLAG_RD, 0, 0, + tcp_pcblist64, "S,xtcpcb64", "List of active TCP connections"); + +#endif /* !CONFIG_EMBEDDED */ void tcp_ctlinput(cmd, sa, vip) @@ -1272,35 +1446,41 @@ tcp_ctlinput(cmd, sa, vip) struct sockaddr *sa; void *vip; { + tcp_seq icmp_tcp_seq; struct ip *ip = vip; struct tcphdr *th; struct in_addr faddr; struct inpcb *inp; struct tcpcb *tp; + void (*notify)(struct inpcb *, int) = tcp_notify; - tcp_seq icmp_seq; + + struct icmp *icp; faddr = ((struct sockaddr_in *)sa)->sin_addr; if (sa->sa_family != AF_INET || faddr.s_addr == INADDR_ANY) return; - if (cmd == PRC_QUENCH) - notify = tcp_quench; + if (cmd == PRC_MSGSIZE) + notify = tcp_mtudisc; else if (icmp_may_rst && (cmd == PRC_UNREACH_ADMIN_PROHIB || cmd == PRC_UNREACH_PORT) && ip) notify = tcp_drop_syn_sent; - else if (cmd == PRC_MSGSIZE) - notify = tcp_mtudisc; else if (PRC_IS_REDIRECT(cmd)) { ip = 0; notify = in_rtchange; } else if (cmd == PRC_HOSTDEAD) ip = 0; + /* Source quench is deprecated */ + else if (cmd == PRC_QUENCH) + return; else if ((unsigned)cmd > PRC_NCMDS || inetctlerrmap[cmd] == 0) return; if (ip) { + icp = (struct icmp *)((caddr_t)ip + - offsetof(struct icmp, icmp_ip)); th = (struct tcphdr *)((caddr_t)ip - + (IP_VHL_HL(ip->ip_vhl) << 2)); + + (IP_VHL_HL(ip->ip_vhl) << 2)); inp = in_pcblookup_hash(&tcbinfo, faddr, th->th_dport, ip->ip_src, th->th_sport, 0, NULL); if (inp != NULL && inp->inp_socket != NULL) { @@ -1309,11 +1489,64 @@ tcp_ctlinput(cmd, sa, vip) tcp_unlock(inp->inp_socket, 1, 0); return; } - icmp_seq = htonl(th->th_seq); + icmp_tcp_seq = htonl(th->th_seq); tp = intotcpcb(inp); - if (SEQ_GEQ(icmp_seq, tp->snd_una) && - SEQ_LT(icmp_seq, tp->snd_max)) + if (SEQ_GEQ(icmp_tcp_seq, tp->snd_una) && + SEQ_LT(icmp_tcp_seq, tp->snd_max)) { + if (cmd == PRC_MSGSIZE) { + + /* + * MTU discovery: + * If we got a needfrag and there is a host route to the + * original destination, and the MTU is not locked, then + * set the MTU in the route to the suggested new value + * (if given) and then notify as usual. The ULPs will + * notice that the MTU has changed and adapt accordingly. + * If no new MTU was suggested, then we guess a new one + * less than the current value. If the new MTU is + * unreasonably small (defined by sysctl tcp_minmss), then + * we reset the MTU to the interface value and enable the + * lock bit, indicating that we are no longer doing MTU + * discovery. + */ + struct rtentry *rt; + int mtu; + struct sockaddr_in icmpsrc = { sizeof (struct sockaddr_in), AF_INET, + 0 , { 0 }, { 0,0,0,0,0,0,0,0 } }; + icmpsrc.sin_addr = icp->icmp_ip.ip_dst; + + rt = rtalloc1((struct sockaddr *)&icmpsrc, 0, + RTF_CLONING | RTF_PRCLONING); + if (rt != NULL) { + RT_LOCK(rt); + if ((rt->rt_flags & RTF_HOST) && + !(rt->rt_rmx.rmx_locks & RTV_MTU)) { + mtu = ntohs(icp->icmp_nextmtu); + if (!mtu) + mtu = ip_next_mtu(rt->rt_rmx. + rmx_mtu, 1); +#if DEBUG_MTUDISC + printf("MTU for %s reduced to %d\n", + inet_ntop(AF_INET, + &icmpsrc.sin_addr, ipv4str, + sizeof (ipv4str)), mtu); +#endif + if (mtu < max(296, (tcp_minmss + + sizeof (struct tcpiphdr)))) { + /* rt->rt_rmx.rmx_mtu = + rt->rt_ifp->if_mtu; */ + rt->rt_rmx.rmx_locks |= RTV_MTU; + } else if (rt->rt_rmx.rmx_mtu > mtu) { + rt->rt_rmx.rmx_mtu = mtu; + } + } + RT_UNLOCK(rt); + rtfree(rt); + } + } + (*notify)(inp, inetctlerrmap[cmd]); + } tcp_unlock(inp->inp_socket, 1, 0); } } else @@ -1343,13 +1576,14 @@ tcp6_ctlinput(cmd, sa, d) sa->sa_len != sizeof(struct sockaddr_in6)) return; - if (cmd == PRC_QUENCH) - notify = tcp_quench; - else if (cmd == PRC_MSGSIZE) + if (cmd == PRC_MSGSIZE) notify = tcp_mtudisc; else if (!PRC_IS_REDIRECT(cmd) && ((unsigned)cmd > PRC_NCMDS || inet6ctlerrmap[cmd] == 0)) return; + /* Source quench is deprecated */ + else if (cmd == PRC_QUENCH) + return; /* if the parameter is from icmp6, decode it. */ if (d != NULL) { @@ -1381,9 +1615,10 @@ tcp6_ctlinput(cmd, sa, d) in6_pcbnotify(&tcbinfo, sa, th.th_dport, (struct sockaddr *)ip6cp->ip6c_src, th.th_sport, cmd, notify); - } else - in6_pcbnotify(&tcbinfo, sa, 0, (struct sockaddr *)sa6_src, - 0, cmd, notify); + } else { + in6_pcbnotify(&tcbinfo, sa, 0, + (struct sockaddr *)(size_t)sa6_src, 0, cmd, notify); + } } #endif /* INET6 */ @@ -1492,23 +1727,6 @@ tcp_new_isn(tp) return new_isn; } -/* - * When a source quench is received, close congestion window - * to one segment. We will gradually open it again as we proceed. - */ -void -tcp_quench( - struct inpcb *inp, - __unused int errno -) -{ - struct tcpcb *tp = intotcpcb(inp); - - if (tp) { - tp->snd_cwnd = tp->t_maxseg; - tp->t_bytes_acked = 0; - } -} /* * When a specific ICMP unreachable message is received and the @@ -1549,7 +1767,6 @@ tcp_mtudisc( #endif /* INET6 */ if (tp) { - lck_mtx_lock(rt_mtx); #if INET6 if (isipv6) rt = tcp_rtlookup6(inp); @@ -1562,7 +1779,10 @@ tcp_mtudisc( isipv6 ? tcp_v6mssdflt : #endif /* INET6 */ tcp_mssdflt; - lck_mtx_unlock(rt_mtx); + + /* Route locked during lookup above */ + if (rt != NULL) + RT_UNLOCK(rt); return; } taop = rmx_taop(rt->rt_rmx); @@ -1578,7 +1798,9 @@ tcp_mtudisc( #endif /* INET6 */ ; - lck_mtx_unlock(rt_mtx); + /* Route locked during lookup above */ + RT_UNLOCK(rt); + if (offered) mss = min(mss, offered); /* @@ -1620,9 +1842,10 @@ tcp_mtudisc( /* * Look-up the routing entry to the peer of this inpcb. If no route - * is found and it cannot be allocated then return NULL. This routine + * is found and it cannot be allocated the return NULL. This routine * is called by TCP routines that access the rmx structure and by tcp_mss - * to get the interface MTU. + * to get the interface MTU. If a route is found, this routine will + * hold the rtentry lock; the caller is responsible for unlocking. */ struct rtentry * tcp_rtlookup(inp, input_ifscope) @@ -1633,14 +1856,14 @@ tcp_rtlookup(inp, input_ifscope) struct rtentry *rt; struct tcpcb *tp; - ro = &inp->inp_route; - if (ro == NULL) - return (NULL); - rt = ro->ro_rt; + lck_mtx_assert(rnh_lock, LCK_MTX_ASSERT_NOTOWNED); - lck_mtx_assert(rt_mtx, LCK_MTX_ASSERT_OWNED); + ro = &inp->inp_route; + if ((rt = ro->ro_rt) != NULL) + RT_LOCK(rt); - if (rt == NULL || !(rt->rt_flags & RTF_UP) || rt->generation_id != route_generation) { + if (rt == NULL || !(rt->rt_flags & RTF_UP) || + rt->generation_id != route_generation) { /* No route yet, so try to acquire one */ if (inp->inp_faddr.s_addr != INADDR_ANY) { unsigned int ifscope; @@ -1660,13 +1883,13 @@ tcp_rtlookup(inp, input_ifscope) ifscope = (inp->inp_flags & INP_BOUND_IF) ? inp->inp_boundif : input_ifscope; - rtalloc_scoped_ign_locked(ro, 0UL, ifscope); - rt = ro->ro_rt; + if (rt != NULL) + RT_UNLOCK(rt); + rtalloc_scoped_ign(ro, 0, ifscope); + if ((rt = ro->ro_rt) != NULL) + RT_LOCK(rt); } } - if (rt != NULL && rt->rt_ifp != NULL) - somultipages(inp->inp_socket, - (rt->rt_ifp->if_hwassist & IFNET_MULTIPAGES)); /* * Update MTU discovery determination. Don't do it if: @@ -1684,15 +1907,24 @@ tcp_rtlookup(inp, input_ifscope) else tp->t_flags |= TF_PMTUD; -#ifdef IFEF_NOWINDOWSCALE - if (tp->t_state == TCPS_SYN_SENT && rt != NULL && rt->rt_ifp != NULL && - (rt->rt_ifp->if_eflags & IFEF_NOWINDOWSCALE) != 0) - { - // Timestamps are not enabled on this interface - tp->t_flags &= ~(TF_REQ_SCALE); +#if CONFIG_IFEF_NOWINDOWSCALE + if (tcp_obey_ifef_nowindowscale && + tp->t_state == TCPS_SYN_SENT && rt != NULL && rt->rt_ifp != NULL && + (rt->rt_ifp->if_eflags & IFEF_NOWINDOWSCALE)) { + /* Window scaling is enabled on this interface */ + tp->t_flags &= ~TF_REQ_SCALE; } #endif + if (rt != NULL && rt->rt_ifp != NULL) { + somultipages(inp->inp_socket, + (rt->rt_ifp->if_hwassist & IFNET_MULTIPAGES)); + tcp_set_tso(tp, rt->rt_ifp); + } + + /* + * Caller needs to call RT_UNLOCK(rt). + */ return rt; } @@ -1705,11 +1937,14 @@ tcp_rtlookup6(inp) struct rtentry *rt; struct tcpcb *tp; - lck_mtx_assert(rt_mtx, LCK_MTX_ASSERT_OWNED); + lck_mtx_assert(rnh_lock, LCK_MTX_ASSERT_NOTOWNED); ro6 = &inp->in6p_route; - rt = ro6->ro_rt; - if (rt == NULL || !(rt->rt_flags & RTF_UP)) { + if ((rt = ro6->ro_rt) != NULL) + RT_LOCK(rt); + + if (rt == NULL || !(rt->rt_flags & RTF_UP) || + rt->generation_id != route_generation) { /* No route yet, so try to acquire one */ if (!IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_faddr)) { struct sockaddr_in6 *dst6; @@ -1718,13 +1953,13 @@ tcp_rtlookup6(inp) dst6->sin6_family = AF_INET6; dst6->sin6_len = sizeof(*dst6); dst6->sin6_addr = inp->in6p_faddr; - rtalloc_ign_locked((struct route *)ro6, 0UL); - rt = ro6->ro_rt; + if (rt != NULL) + RT_UNLOCK(rt); + rtalloc_ign((struct route *)ro6, 0); + if ((rt = ro6->ro_rt) != NULL) + RT_LOCK(rt); } } - if (rt != NULL && rt->rt_ifp != NULL) - somultipages(inp->inp_socket, - (rt->rt_ifp->if_hwassist & IFNET_MULTIPAGES)); /* * Update path MTU Discovery determination * while looking up the route: @@ -1750,6 +1985,24 @@ tcp_rtlookup6(inp) else tp->t_flags |= TF_PMTUD; +#if CONFIG_IFEF_NOWINDOWSCALE + if (tcp_obey_ifef_nowindowscale && + tp->t_state == TCPS_SYN_SENT && rt != NULL && rt->rt_ifp != NULL && + (rt->rt_ifp->if_eflags & IFEF_NOWINDOWSCALE)) { + /* Window scaling is not enabled on this interface */ + tp->t_flags &= ~TF_REQ_SCALE; + } +#endif + + if (rt != NULL && rt->rt_ifp != NULL) { + somultipages(inp->inp_socket, + (rt->rt_ifp->if_hwassist & IFNET_MULTIPAGES)); + tcp_set_tso(tp, rt->rt_ifp); + } + + /* + * Caller needs to call RT_UNLOCK(rt). + */ return rt; } #endif /* INET6 */ @@ -1809,7 +2062,6 @@ tcp_gettaocache(inp) struct rtentry *rt; struct rmxp_tao *taop; - lck_mtx_lock(rt_mtx); #if INET6 if ((inp->inp_vflag & INP_IPV6) != 0) rt = tcp_rtlookup6(inp); @@ -1820,12 +2072,15 @@ tcp_gettaocache(inp) /* Make sure this is a host route and is up. */ if (rt == NULL || (rt->rt_flags & (RTF_UP|RTF_HOST)) != (RTF_UP|RTF_HOST)) { - lck_mtx_unlock(rt_mtx); + /* Route locked during lookup above */ + if (rt != NULL) + RT_UNLOCK(rt); return NULL; } taop = rmx_taop(rt->rt_rmx); - lck_mtx_unlock(rt_mtx); + /* Route locked during lookup above */ + RT_UNLOCK(rt); return (taop); } @@ -1843,60 +2098,66 @@ tcp_cleartaocache() } int -tcp_lock(so, refcount, lr) - struct socket *so; - int refcount; - int lr; +tcp_lock(struct socket *so, int refcount, void *lr) { - int lr_saved; - if (lr == 0) - lr_saved = (unsigned int) __builtin_return_address(0); - else lr_saved = lr; + void *lr_saved; + + if (lr == NULL) + lr_saved = __builtin_return_address(0); + else + lr_saved = lr; - if (so->so_pcb) { + if (so->so_pcb != NULL) { lck_mtx_lock(((struct inpcb *)so->so_pcb)->inpcb_mtx); + } else { + panic("tcp_lock: so=%p NO PCB! lr=%p lrh= %s\n", + so, lr_saved, solockhistory_nr(so)); + /* NOTREACHED */ } - else { - panic("tcp_lock: so=%p NO PCB! lr=%x\n", so, lr_saved); - lck_mtx_lock(so->so_proto->pr_domain->dom_mtx); - } - - if (so->so_usecount < 0) - panic("tcp_lock: so=%p so_pcb=%p lr=%x ref=%x\n", - so, so->so_pcb, lr_saved, so->so_usecount); + if (so->so_usecount < 0) { + panic("tcp_lock: so=%p so_pcb=%p lr=%p ref=%x lrh= %s\n", + so, so->so_pcb, lr_saved, so->so_usecount, solockhistory_nr(so)); + /* NOTREACHED */ + } if (refcount) so->so_usecount++; - so->lock_lr[so->next_lock_lr] = (u_int32_t)lr_saved; + so->lock_lr[so->next_lock_lr] = lr_saved; so->next_lock_lr = (so->next_lock_lr+1) % SO_LCKDBG_MAX; return (0); } int -tcp_unlock(so, refcount, lr) - struct socket *so; - int refcount; - int lr; +tcp_unlock(struct socket *so, int refcount, void *lr) { - int lr_saved; - if (lr == 0) - lr_saved = (unsigned int) __builtin_return_address(0); - else lr_saved = lr; + void *lr_saved; + + if (lr == NULL) + lr_saved = __builtin_return_address(0); + else + lr_saved = lr; #ifdef MORE_TCPLOCK_DEBUG - printf("tcp_unlock: so=%p sopcb=%x lock=%x ref=%x lr=%x\n", - so, so->so_pcb, ((struct inpcb *)so->so_pcb)->inpcb_mtx, so->so_usecount, lr_saved); + printf("tcp_unlock: so=%p sopcb=%p lock=%p ref=%x lr=%p\n", + so, so->so_pcb, ((struct inpcb *)so->so_pcb)->inpcb_mtx, + so->so_usecount, lr_saved); #endif if (refcount) so->so_usecount--; - if (so->so_usecount < 0) - panic("tcp_unlock: so=%p usecount=%x\n", so, so->so_usecount); - if (so->so_pcb == NULL) - panic("tcp_unlock: so=%p NO PCB usecount=%x lr=%x\n", so, so->so_usecount, lr_saved); - else { - lck_mtx_assert(((struct inpcb *)so->so_pcb)->inpcb_mtx, LCK_MTX_ASSERT_OWNED); - so->unlock_lr[so->next_unlock_lr] = (u_int32_t)lr_saved; + if (so->so_usecount < 0) { + panic("tcp_unlock: so=%p usecount=%x lrh= %s\n", + so, so->so_usecount, solockhistory_nr(so)); + /* NOTREACHED */ + } + if (so->so_pcb == NULL) { + panic("tcp_unlock: so=%p NO PCB usecount=%x lr=%p lrh= %s\n", + so, so->so_usecount, lr_saved, solockhistory_nr(so)); + /* NOTREACHED */ + } else { + lck_mtx_assert(((struct inpcb *)so->so_pcb)->inpcb_mtx, + LCK_MTX_ASSERT_OWNED); + so->unlock_lr[so->next_unlock_lr] = lr_saved; so->next_unlock_lr = (so->next_unlock_lr+1) % SO_LCKDBG_MAX; lck_mtx_unlock(((struct inpcb *)so->so_pcb)->inpcb_mtx); } @@ -1912,22 +2173,27 @@ tcp_getlock( if (so->so_pcb) { if (so->so_usecount < 0) - panic("tcp_getlock: so=%p usecount=%x\n", so, so->so_usecount); + panic("tcp_getlock: so=%p usecount=%x lrh= %s\n", + so, so->so_usecount, solockhistory_nr(so)); return(inp->inpcb_mtx); } else { - panic("tcp_getlock: so=%p NULL so_pcb\n", so); + panic("tcp_getlock: so=%p NULL so_pcb %s\n", + so, solockhistory_nr(so)); return (so->so_proto->pr_domain->dom_mtx); } } -long + +int32_t tcp_sbspace(struct tcpcb *tp) { struct sockbuf *sb = &tp->t_inpcb->inp_socket->so_rcv; - long space, newspace; + int32_t space, newspace; - space = ((long) lmin((sb->sb_hiwat - sb->sb_cc), + space = ((int32_t) imin((sb->sb_hiwat - sb->sb_cc), (sb->sb_mbmax - sb->sb_mbcnt))); + if (space < 0) + space = 0; #if TRAFFIC_MGT if (tp->t_inpcb->inp_socket->so_traffic_mgt_flags & TRAFFIC_MGT_SO_BACKGROUND) { @@ -1950,7 +2216,7 @@ tcp_sbspace(struct tcpcb *tp) /* Clip window size for slower link */ if (((tp->t_flags & TF_SLOWLINK) != 0) && slowlink_wsize > 0 ) - return lmin(space, slowlink_wsize); + return imin(space, slowlink_wsize); /* * Check for ressources constraints before over-ajusting the amount of space we can @@ -1959,14 +2225,56 @@ tcp_sbspace(struct tcpcb *tp) if (sbspace_factor && (tp->t_inpcb->inp_pcbinfo->ipi_count < tcp_sockthreshold) && (total_mb_cnt / 8) < (mbstat.m_clusters / sbspace_factor)) { - if (space < (long)(sb->sb_maxused - sb->sb_cc)) {/* make sure we don't constrain the window if we have enough ressources */ - space = (long) lmax((sb->sb_maxused - sb->sb_cc), tp->rcv_maxbyps); + if (space < (int32_t)(sb->sb_maxused - sb->sb_cc)) {/* make sure we don't constrain the window if we have enough ressources */ + space = (int32_t) imax((sb->sb_maxused - sb->sb_cc), tp->rcv_maxbyps); } - newspace = (long) lmax(((long)sb->sb_maxused - sb->sb_cc), (long)tp->rcv_maxbyps); + newspace = (int32_t) imax(((int32_t)sb->sb_maxused - sb->sb_cc), (int32_t)tp->rcv_maxbyps); if (newspace > space) space = newspace; } return space; } +/* + * Checks TCP Segment Offloading capability for a given connection and interface pair. + */ +void +tcp_set_tso(tp, ifp) + struct tcpcb *tp; + struct ifnet *ifp; +{ +#if INET6 + struct inpcb *inp = tp->t_inpcb; + int isipv6 = (inp->inp_vflag & INP_IPV6) != 0; + + if (isipv6) { + /* + * Radar 6921834: Disable TSO IPv6 because there is no support + * for TSO & HW checksum in ip6_output yet + */ +#if 0 + if (ifp && ifp->if_hwassist & IFNET_TSO_IPV6) { + tp->t_flags |= TF_TSO; + if (ifp->if_tso_v6_mtu != 0) + tp->tso_max_segment_size = ifp->if_tso_v6_mtu; + else + tp->tso_max_segment_size = TCP_MAXWIN; + } else + tp->t_flags &= ~TF_TSO; + +#endif + } else +#endif /* INET6 */ + + { + if (ifp && ifp->if_hwassist & IFNET_TSO_IPV4) { + tp->t_flags |= TF_TSO; + if (ifp->if_tso_v4_mtu != 0) + tp->tso_max_segment_size = ifp->if_tso_v4_mtu; + else + tp->tso_max_segment_size = TCP_MAXWIN; + } else + tp->t_flags &= ~TF_TSO; + } +} /* DSEP Review Done pl-20051213-v02 @3253,@3391,@3400 */ diff --git a/bsd/netinet/tcp_timer.c b/bsd/netinet/tcp_timer.c index 9ad7badac..e77c28f86 100644 --- a/bsd/netinet/tcp_timer.c +++ b/bsd/netinet/tcp_timer.c @@ -70,6 +70,7 @@ #include #include #include +#include #include #include /* before tcp_seq.h, for tcp_random18() */ @@ -88,6 +89,9 @@ #include #include #include +#if INET6 +#include +#endif #include #if TCPDEBUG #include @@ -151,6 +155,26 @@ static int always_keepalive = 0; SYSCTL_INT(_net_inet_tcp, OID_AUTO, always_keepalive, CTLFLAG_RW, &always_keepalive , 0, "Assume SO_KEEPALIVE on all TCP connections"); +/* + * See tcp_syn_backoff[] for interval values between SYN retransmits; + * the value set below defines the number of retransmits, before we + * disable the timestamp and window scaling options during subsequent + * SYN retransmits. Setting it to 0 disables the dropping off of those + * two options. + */ +static int tcp_broken_peer_syn_rxmit_thres = 7; +SYSCTL_INT(_net_inet_tcp, OID_AUTO, broken_peer_syn_rxmit_thres, CTLFLAG_RW, + &tcp_broken_peer_syn_rxmit_thres, 0, "Number of retransmitted SYNs before " + "TCP disables rfc1323 and rfc1644 during the rest of attempts"); + +int tcp_pmtud_black_hole_detect = 1 ; +SYSCTL_INT(_net_inet_tcp, OID_AUTO, pmtud_blackhole_detection, CTLFLAG_RW, + &tcp_pmtud_black_hole_detect, 0, "Path MTU Discovery Black Hole Detection"); + +int tcp_pmtud_black_hole_mss = 1200 ; +SYSCTL_INT(_net_inet_tcp, OID_AUTO, pmtud_blackhole_mss, CTLFLAG_RW, + &tcp_pmtud_black_hole_mss, 0, "Path MTU Discovery Black Hole Detection lowered MSS"); + static int tcp_keepcnt = TCPTV_KEEPCNT; static int tcp_gc_done = FALSE; /* perfromed garbage collection of "used" sockets */ /* max idle probes */ @@ -161,11 +185,12 @@ int tcp_maxidle; struct inpcbhead time_wait_slots[N_TIME_WAIT_SLOTS]; int cur_tw_slot = 0; -u_long *delack_bitmask; +u_int32_t *delack_bitmask; void add_to_time_wait_locked(struct tcpcb *tp); void add_to_time_wait(struct tcpcb *tp) ; +static void tcp_garbage_collect(struct inpcb *, int); void add_to_time_wait_locked(struct tcpcb *tp) { @@ -218,8 +243,9 @@ void add_to_time_wait(struct tcpcb *tp) * Fast timeout routine for processing delayed acks */ void -tcp_fasttimo() +tcp_fasttimo(void *arg) { +#pragma unused(arg) struct inpcb *inp; register struct tcpcb *tp; struct socket *so; @@ -243,9 +269,6 @@ tcp_fasttimo() so = inp->inp_socket; - if (so == &tcbinfo.nat_dummy_socket) - continue; - if (in_pcb_checkstate(inp, WNT_ACQUIRE, 0) == WNT_STOPUSING) continue; @@ -300,63 +323,96 @@ tcp_fasttimo() timeout(tcp_fasttimo, 0, hz/TCP_RETRANSHZ); } -void -tcp_garbage_collect(inp, istimewait) - struct inpcb *inp; - int istimewait; +static void +tcp_garbage_collect(struct inpcb *inp, int istimewait) { struct socket *so; struct tcpcb *tp; + so = inp->inp_socket; + tp = intotcpcb(inp); - if (inp->inp_socket == &tcbinfo.nat_dummy_socket) - return; - - - if (!lck_mtx_try_lock(inp->inpcb_mtx)) /* skip if still in use */ - return; + /* + * Skip if still in use or busy; it would have been more efficient + * if we were to test so_usecount against 0, but this isn't possible + * due to the current implementation of tcp_dropdropablreq() where + * overflow sockets that are eligible for garbage collection have + * their usecounts set to 1. + */ + if (so->so_usecount > 1 || !lck_mtx_try_lock_spin(inp->inpcb_mtx)) + return; - so = inp->inp_socket; - tp = intotcpcb(inp); + /* Check again under the lock */ + if (so->so_usecount > 1) { + lck_mtx_unlock(inp->inpcb_mtx); + return; + } - if ((so->so_usecount == 1) && - (so->so_flags & SOF_OVERFLOW)) { - in_pcbdetach(inp); - so->so_usecount--; - lck_mtx_unlock(inp->inpcb_mtx); - return; - } - else { - if (inp->inp_wantcnt != WNT_STOPUSING) { - lck_mtx_unlock(inp->inpcb_mtx); - return; - } + /* + * Overflowed socket dropped from the listening queue? Do this + * only if we are called to clean up the time wait slots, since + * tcp_dropdropablreq() considers a socket to have been fully + * dropped after add_to_time_wait() is finished. + * Also handle the case of connections getting closed by the peer while in the queue as + * seen with rdar://6422317 + * + */ + if (so->so_usecount == 1 && + ((istimewait && (so->so_flags & SOF_OVERFLOW)) || + ((tp != NULL) && (tp->t_state == TCPS_CLOSED) && (so->so_head != NULL) + && ((so->so_state & (SS_INCOMP|SS_CANTSENDMORE|SS_CANTRCVMORE)) == + (SS_INCOMP|SS_CANTSENDMORE|SS_CANTRCVMORE))))) { + + if (inp->inp_state != INPCB_STATE_DEAD) { + /* Become a regular mutex */ + lck_mtx_convert_spin(inp->inpcb_mtx); +#if INET6 + if (INP_CHECK_SOCKAF(so, AF_INET6)) + in6_pcbdetach(inp); + else +#endif /* INET6 */ + in_pcbdetach(inp); } + so->so_usecount--; + lck_mtx_unlock(inp->inpcb_mtx); + return; + } else if (inp->inp_wantcnt != WNT_STOPUSING) { + lck_mtx_unlock(inp->inpcb_mtx); + return; + } - - if (so->so_usecount == 0) - in_pcbdispose(inp); - else { - /* Special case: - * - Check for embryonic socket stuck on listener queue (4023660) - * - overflowed socket dropped from the listening queue - * and dispose of remaining reference - */ - if ((so->so_usecount == 1) && - (((tp->t_state == TCPS_CLOSED) && (so->so_head != NULL) && (so->so_state & SS_INCOMP)) || - (istimewait && (so->so_flags & SOF_OVERFLOW)))) { - so->so_usecount--; - in_pcbdispose(inp); - } else - lck_mtx_unlock(inp->inpcb_mtx); + /* + * We get here because the PCB is no longer searchable (WNT_STOPUSING); + * detach (if needed) and dispose if it is dead (usecount is 0). This + * covers all cases, including overflow sockets and those that are + * considered as "embryonic", i.e. created by sonewconn() in TCP input + * path, and have not yet been committed. For the former, we reduce + * the usecount to 0 as done by the code above. For the latter, the + * usecount would have reduced to 0 as part calling soabort() when the + * socket is dropped at the end of tcp_input(). + */ + if (so->so_usecount == 0) { + /* Become a regular mutex */ + lck_mtx_convert_spin(inp->inpcb_mtx); + if (inp->inp_state != INPCB_STATE_DEAD) { +#if INET6 + if (INP_CHECK_SOCKAF(so, AF_INET6)) + in6_pcbdetach(inp); + else +#endif /* INET6 */ + in_pcbdetach(inp); } + in_pcbdispose(inp); + } else { + lck_mtx_unlock(inp->inpcb_mtx); + } } static int bg_cnt = 0; #define BG_COUNTER_MAX 3 void -tcp_slowtimo() +tcp_slowtimo(void) { struct inpcb *inp, *nxt; struct tcpcb *tp; @@ -366,7 +422,9 @@ tcp_slowtimo() int ostate; #endif +#if KDEBUG static int tws_checked = 0; +#endif struct inpcbinfo *pcbinfo = &tcbinfo; @@ -381,9 +439,6 @@ tcp_slowtimo() LIST_FOREACH(inp, &tcb, inp_list) { so = inp->inp_socket; - - if (so == &tcbinfo.nat_dummy_socket) - continue; if (in_pcb_checkstate(inp, WNT_ACQUIRE, 0) == WNT_STOPUSING) continue; @@ -584,8 +639,8 @@ tcp_timers(tp, timer) { register int rexmt; struct socket *so_tmp; - struct inpcbinfo *pcbinfo = &tcbinfo; struct tcptemp *t_template; + int optlen = 0; #if TCPDEBUG int ostate; @@ -612,7 +667,7 @@ tcp_timers(tp, timer) if (tp->t_state != TCPS_TIME_WAIT && tp->t_state != TCPS_FIN_WAIT_2 && tp->t_rcvtime < tcp_maxidle) { - tp->t_timer[TCPT_2MSL] = (unsigned long)tcp_keepintvl; + tp->t_timer[TCPT_2MSL] = (u_int32_t)tcp_keepintvl; } else { tp = tcp_close(tp); @@ -665,14 +720,60 @@ tcp_timers(tp, timer) tp->t_timer[TCPT_REXMT] = tp->t_rxtcur; /* - * Disable rfc1323 and rfc1644 if we havn't got any response to - * our third SYN to work-around some broken terminal servers - * (most of which have hopefully been retired) that have bad VJ - * header compression code which trashes TCP segments containing - * unknown-to-them TCP options. + * Check for potential Path MTU Discovery Black Hole + */ + + if (tcp_pmtud_black_hole_detect && (tp->t_state == TCPS_ESTABLISHED)) { + if (((tp->t_flags & (TF_PMTUD|TF_MAXSEGSNT)) == (TF_PMTUD|TF_MAXSEGSNT)) && (tp->t_rxtshift == 2)) { + /* + * Enter Path MTU Black-hole Detection mechanism: + * - Disable Path MTU Discovery (IP "DF" bit). + * - Reduce MTU to lower value than what we negociated with peer. + */ + + tp->t_flags &= ~TF_PMTUD; /* Disable Path MTU Discovery for now */ + tp->t_flags |= TF_BLACKHOLE; /* Record that we may have found a black hole */ + optlen = tp->t_maxopd - tp->t_maxseg; + tp->t_pmtud_saved_maxopd = tp->t_maxopd; /* Keep track of previous MSS */ + if (tp->t_maxopd > tcp_pmtud_black_hole_mss) + tp->t_maxopd = tcp_pmtud_black_hole_mss; /* Reduce the MSS to intermediary value */ + else { + tp->t_maxopd = /* use the default MSS */ +#if INET6 + isipv6 ? tcp_v6mssdflt : +#endif /* INET6 */ + tcp_mssdflt; + } + tp->t_maxseg = tp->t_maxopd - optlen; + } + /* + * If further retransmissions are still unsuccessful with a lowered MTU, + * maybe this isn't a Black Hole and we restore the previous MSS and + * blackhole detection flags. + */ + else { + + if ((tp->t_flags & TF_BLACKHOLE) && (tp->t_rxtshift > 4)) { + tp->t_flags |= TF_PMTUD; + tp->t_flags &= ~TF_BLACKHOLE; + optlen = tp->t_maxopd - tp->t_maxseg; + tp->t_maxopd = tp->t_pmtud_saved_maxopd; + tp->t_maxseg = tp->t_maxopd - optlen; + } + } + } + + + /* + * Disable rfc1323 and rfc1644 if we haven't got any response to + * our SYN (after we reach the threshold) to work-around some + * broken terminal servers (most of which have hopefully been + * retired) that have bad VJ header compression code which + * trashes TCP segments containing unknown-to-them TCP options. */ - if ((tp->t_state == TCPS_SYN_SENT) && (tp->t_rxtshift == 3)) - tp->t_flags &= ~(TF_REQ_SCALE|TF_REQ_TSTMP|TF_REQ_CC); + if ((tp->t_state == TCPS_SYN_SENT) && + (tp->t_rxtshift == tcp_broken_peer_syn_rxmit_thres)) + tp->t_flags &= ~(TF_REQ_SCALE|TF_REQ_TSTMP|TF_REQ_CC); /* * If losing, let the lower level know and try for * a better route. Also, if we backed off this far, @@ -783,7 +884,7 @@ tcp_timers(tp, timer) if ((always_keepalive || tp->t_inpcb->inp_socket->so_options & SO_KEEPALIVE) && (tp->t_state <= TCPS_CLOSING || tp->t_state == TCPS_FIN_WAIT_2)) { - if (tp->t_rcvtime >= TCP_KEEPIDLE(tp) + (unsigned long)tcp_maxidle) + if (tp->t_rcvtime >= TCP_KEEPIDLE(tp) + (u_int32_t)tcp_maxidle) goto dropit; /* * Send a packet designed to force a response diff --git a/bsd/netinet/tcp_timer.h b/bsd/netinet/tcp_timer.h index ef29f41a3..c4ea59c6b 100644 --- a/bsd/netinet/tcp_timer.h +++ b/bsd/netinet/tcp_timer.h @@ -152,9 +152,9 @@ static char *tcptimers[] = */ #define TCPT_RANGESET(tv, value, tvmin, tvmax) do { \ (tv) = (value); \ - if ((u_long)(tv) < (u_long)(tvmin)) \ + if ((uint32_t)(tv) < (uint32_t)(tvmin)) \ (tv) = (tvmin); \ - else if ((u_long)(tv) > (u_long)(tvmax)) \ + else if ((uint32_t)(tv) > (uint32_t)(tvmax)) \ (tv) = (tvmax); \ } while(0) diff --git a/bsd/netinet/tcp_usrreq.c b/bsd/netinet/tcp_usrreq.c index 9fcfa87e4..d477b60d5 100644 --- a/bsd/netinet/tcp_usrreq.c +++ b/bsd/netinet/tcp_usrreq.c @@ -729,7 +729,9 @@ tcp_usr_send(struct socket *so, int flags, struct mbuf *m, tp->t_flags &= ~TF_MORETOCOME; } } else { - if (sbspace(&so->so_snd) < -512) { + if (sbspace(&so->so_snd) == 0) { + /* if no space is left in sockbuf, + * do not try to squeeze in OOB traffic */ m_freem(m); error = ENOBUFS; goto out; @@ -915,7 +917,7 @@ tcp_connect(tp, nam, p) if (oinp != inp && (otp = intotcpcb(oinp)) != NULL && otp->t_state == TCPS_TIME_WAIT && - otp->t_starttime < (u_long)tcp_msl && + otp->t_starttime < (u_int32_t)tcp_msl && (otp->t_flags & TF_RCVD_CC)) otp = tcp_close(otp); else { @@ -966,7 +968,7 @@ tcp_connect(tp, nam, p) soisconnecting(so); tcpstat.tcps_connattempt++; tp->t_state = TCPS_SYN_SENT; - tp->t_timer[TCPT_KEEP] = tcp_keepinit; + tp->t_timer[TCPT_KEEP] = tp->t_keepinit ? tp->t_keepinit : tcp_keepinit; tp->iss = tcp_new_isn(tp); tcp_sendseqinit(tp); @@ -1032,7 +1034,7 @@ tcp6_connect(tp, nam, p) if (oinp) { if (oinp != inp && (otp = intotcpcb(oinp)) != NULL && otp->t_state == TCPS_TIME_WAIT && - otp->t_starttime < (u_long)tcp_msl && + otp->t_starttime < (u_int32_t)tcp_msl && (otp->t_flags & TF_RCVD_CC)) otp = tcp_close(otp); else @@ -1061,7 +1063,7 @@ tcp6_connect(tp, nam, p) soisconnecting(so); tcpstat.tcps_connattempt++; tp->t_state = TCPS_SYN_SENT; - tp->t_timer[TCPT_KEEP] = tcp_keepinit; + tp->t_timer[TCPT_KEEP] = tp->t_keepinit ? tp->t_keepinit : tcp_keepinit; tp->iss = tcp_new_isn(tp); tcp_sendseqinit(tp); @@ -1179,6 +1181,17 @@ tcp_ctloutput(so, sopt) tp->t_timer[TCPT_KEEP] = TCP_KEEPIDLE(tp); /* reset the timer to new value */ } break; + + case TCP_CONNECTIONTIMEOUT: + error = sooptcopyin(sopt, &optval, sizeof optval, + sizeof optval); + if (error) + break; + if (optval < 0) + error = EINVAL; + else + tp->t_keepinit = optval * TCP_RETRANSHZ; + break; default: error = ENOPROTOOPT; @@ -1203,6 +1216,9 @@ tcp_ctloutput(so, sopt) case TCP_NOPUSH: optval = tp->t_flags & TF_NOPUSH; break; + case TCP_CONNECTIONTIMEOUT: + optval = tp->t_keepinit / TCP_RETRANSHZ; + break; default: error = ENOPROTOOPT; break; @@ -1219,12 +1235,47 @@ tcp_ctloutput(so, sopt) * sizes, respectively. These are obsolescent (this information should * be set by the route). */ -u_long tcp_sendspace = 1448*256; -SYSCTL_INT(_net_inet_tcp, TCPCTL_SENDSPACE, sendspace, CTLFLAG_RW, - &tcp_sendspace , 0, "Maximum outgoing TCP datagram size"); -u_long tcp_recvspace = 1448*384; -SYSCTL_INT(_net_inet_tcp, TCPCTL_RECVSPACE, recvspace, CTLFLAG_RW, - &tcp_recvspace , 0, "Maximum incoming TCP datagram size"); +u_int32_t tcp_sendspace = 1448*256; +u_int32_t tcp_recvspace = 1448*384; + +/* During attach, the size of socket buffer allocated is limited to + * sb_max in sbreserve. Disallow setting the tcp send and recv space + * to be more than sb_max because that will cause tcp_attach to fail + * (see radar 5713060) + */ +static int +sysctl_tcp_sospace(struct sysctl_oid *oidp, __unused void *arg1, + __unused int arg2, struct sysctl_req *req) { + u_int32_t new_value = 0, *space_p = NULL; + int changed = 0, error = 0; + u_quad_t sb_effective_max = (sb_max / (MSIZE+MCLBYTES)) * MCLBYTES; + + switch (oidp->oid_number) { + case TCPCTL_SENDSPACE: + space_p = &tcp_sendspace; + break; + case TCPCTL_RECVSPACE: + space_p = &tcp_recvspace; + break; + default: + return EINVAL; + } + error = sysctl_io_number(req, *space_p, sizeof(u_int32_t), + &new_value, &changed); + if (changed) { + if (new_value > 0 && new_value <= sb_effective_max) { + *space_p = new_value; + } else { + error = ERANGE; + } + } + return error; +} + +SYSCTL_PROC(_net_inet_tcp, TCPCTL_SENDSPACE, sendspace, CTLTYPE_INT | CTLFLAG_RW, + &tcp_sendspace , 0, &sysctl_tcp_sospace, "IU", "Maximum outgoing TCP datagram size"); +SYSCTL_PROC(_net_inet_tcp, TCPCTL_RECVSPACE, recvspace, CTLTYPE_INT | CTLFLAG_RW, + &tcp_recvspace , 0, &sysctl_tcp_sospace, "IU", "Maximum incoming TCP datagram size"); /* diff --git a/bsd/netinet/tcp_var.h b/bsd/netinet/tcp_var.h index 618fc7fed..0fa518d78 100644 --- a/bsd/netinet/tcp_var.h +++ b/bsd/netinet/tcp_var.h @@ -170,6 +170,8 @@ struct tcpcb { #define TF_SENDINPROG 0x2000000 /* send is in progress */ #define TF_PMTUD 0x4000000 /* Perform Path MTU Discovery for this connection */ #define TF_CLOSING 0x8000000 /* pending tcp close */ +#define TF_TSO 0x10000000 /* TCP Segment Offloading is enable on this connection */ +#define TF_BLACKHOLE 0x20000000 /* Path MTU Discovery Black Hole detection */ int t_force; /* 1 if forcing out a byte */ @@ -187,23 +189,23 @@ struct tcpcb { tcp_seq rcv_nxt; /* receive next */ tcp_seq rcv_adv; /* advertised window */ - u_long rcv_wnd; /* receive window */ + u_int32_t rcv_wnd; /* receive window */ tcp_seq rcv_up; /* receive urgent pointer */ - u_long snd_wnd; /* send window */ - u_long snd_cwnd; /* congestion-controlled window */ - u_long snd_bwnd; /* bandwidth-controlled window */ - u_long snd_ssthresh; /* snd_cwnd size threshold for + u_int32_t snd_wnd; /* send window */ + u_int32_t snd_cwnd; /* congestion-controlled window */ + u_int32_t snd_bwnd; /* bandwidth-controlled window */ + u_int32_t snd_ssthresh; /* snd_cwnd size threshold for * for slow start exponential to * linear switch */ - u_long snd_bandwidth; /* calculated bandwidth or 0 */ + u_int32_t snd_bandwidth; /* calculated bandwidth or 0 */ tcp_seq snd_recover; /* for use in NewReno Fast Recovery */ u_int t_maxopd; /* mss plus options */ - u_long t_rcvtime; /* inactivity time */ - u_long t_starttime; /* time connection was established */ + u_int32_t t_rcvtime; /* inactivity time */ + u_int32_t t_starttime; /* time connection was established */ int t_rtttime; /* round trip time */ tcp_seq t_rtseq; /* sequence number being timed */ @@ -218,8 +220,8 @@ struct tcpcb { int t_rxtshift; /* log(2) of rexmt exp. backoff */ u_int t_rttmin; /* minimum rtt allowed */ u_int t_rttbest; /* best rtt we've seen */ - u_long t_rttupdated; /* number of times rtt sampled */ - u_long max_sndwnd; /* largest window peer has offered */ + u_int32_t t_rttupdated; /* number of times rtt sampled */ + u_int32_t max_sndwnd; /* largest window peer has offered */ int t_softerror; /* possible error not yet reported */ /* out-of-band data */ @@ -232,19 +234,19 @@ struct tcpcb { u_char rcv_scale; /* window scaling for recv window */ u_char request_r_scale; /* pending window scaling */ u_char requested_s_scale; - u_long ts_recent; /* timestamp echo data */ + u_int32_t ts_recent; /* timestamp echo data */ - u_long ts_recent_age; /* when last updated */ + u_int32_t ts_recent_age; /* when last updated */ tcp_seq last_ack_sent; /* RFC 1644 variables */ tcp_cc cc_send; /* send connection count */ tcp_cc cc_recv; /* receive connection count */ /* RFC 3465 variables */ - u_long t_bytes_acked; /* ABC "bytes_acked" parameter */ + u_int32_t t_bytes_acked; /* ABC "bytes_acked" parameter */ /* experimental */ - u_long snd_cwnd_prev; /* cwnd prior to retransmit */ - u_long snd_ssthresh_prev; /* ssthresh prior to retransmit */ - u_long t_badrxtwin; /* window for retransmit recovery */ + u_int32_t snd_cwnd_prev; /* cwnd prior to retransmit */ + u_int32_t snd_ssthresh_prev; /* ssthresh prior to retransmit */ + u_int32_t t_badrxtwin; /* window for retransmit recovery */ int t_keepidle; /* keepalive idle timer (override global if > 0) */ int t_lastchain; /* amount of packets chained last time around */ @@ -252,17 +254,17 @@ struct tcpcb { /* 3529618 MSS overload prevention */ - u_long rcv_reset; - u_long rcv_pps; - u_long rcv_byps; - u_long rcv_maxbyps; + u_int32_t rcv_reset; + u_int32_t rcv_pps; + u_int32_t rcv_byps; + u_int32_t rcv_maxbyps; tcp_seq snd_high; /* for use in NewReno Fast Recovery */ tcp_seq snd_high_prev; /* snd_high prior to retransmit */ tcp_seq snd_recover_prev; /* snd_recover prior to retransmit */ u_char snd_limited; /* segments limited transmitted */ /* anti DoS counters */ - u_long rcv_second; /* start of interval second */ + u_int32_t rcv_second; /* start of interval second */ /* SACK related state */ int sack_enable; /* enable SACK for this connection */ int snd_numholes; /* number of holes seen by sender */ @@ -290,6 +292,10 @@ struct tcpcb { u_int32_t t_pktlist_sentlen; /* total bytes in transmit chain */ struct mbuf *t_pktlist_head; /* First packet in transmit chain */ struct mbuf *t_pktlist_tail; /* Last packet in transmit chain */ + + int t_keepinit; /* connection timeout, i.e. idle time in SYN_SENT or SYN_RECV state */ + u_int32_t tso_max_segment_size; /* TCP Segment Offloading maximum segment unit for NIC */ + u_int t_pmtud_saved_maxopd; /* MSS saved before performing PMTU-D BlackHole detection */ }; #define IN_FASTRECOVERY(tp) (tp->t_flags & TF_FASTRECOVERY) @@ -304,15 +310,15 @@ struct tcpcb { * to tcp_dooptions. */ struct tcpopt { - u_long to_flags; /* which options are present */ + u_int32_t to_flags; /* which options are present */ #define TOF_TS 0x0001 /* timestamp */ #define TOF_MSS 0x0010 #define TOF_SCALE 0x0020 #define TOF_SIGNATURE 0x0040 /* signature option present */ #define TOF_SIGLEN 0x0080 /* signature length valid (RFC2385) */ #define TOF_SACK 0x0100 /* Peer sent SACK option */ - u_long to_tsval; - u_long to_tsecr; + u_int32_t to_tsval; + u_int32_t to_tsecr; u_int16_t to_mss; u_int8_t to_requested_s_scale; u_int8_t to_nsacks; /* number of SACK blocks */ @@ -385,7 +391,11 @@ _TCPCB_LIST_HEAD(tsegqe_head, tseg_qent); struct tcpcb { #endif /* KERNEL_PRIVATE */ +#if defined(KERNEL_PRIVATE) + u_int32_t t_segq; +#else struct tsegqe_head t_segq; +#endif /* KERNEL_PRIVATE */ int t_dupacks; /* consecutive dup acks recd */ u_int32_t unused; /* unused now: was t_template */ @@ -482,6 +492,7 @@ struct tcpcb { u_int32_t t_badrxtwin; /* window for retransmit recovery */ }; + /* * TCP statistics. * Many of these should be kept per connection, @@ -596,22 +607,106 @@ struct tcpstat { * Evil hack: declare only if in_pcb.h and sys/socketvar.h have been * included. Not all of our clients do. */ -struct xtcpcb { - u_int32_t xt_len; + +struct xtcpcb { + u_int32_t xt_len; #ifdef KERNEL_PRIVATE - struct inpcb_compat xt_inp; + struct inpcb_compat xt_inp; #else - struct inpcb xt_inp; + struct inpcb xt_inp; #endif #ifdef KERNEL_PRIVATE - struct otcpcb xt_tp; + struct otcpcb xt_tp; #else - struct tcpcb xt_tp; + struct tcpcb xt_tp; #endif - struct xsocket xt_socket; - u_quad_t xt_alignment_hack; + struct xsocket xt_socket; + u_quad_t xt_alignment_hack; +}; + +#if !CONFIG_EMBEDDED + +struct xtcpcb64 { + u_int32_t xt_len; + struct xinpcb64 xt_inpcb; + + u_int64_t t_segq; + int t_dupacks; /* consecutive dup acks recd */ + + int t_timer[TCPT_NTIMERS]; /* tcp timers */ + + int t_state; /* state of this connection */ + u_int t_flags; + + int t_force; /* 1 if forcing out a byte */ + + tcp_seq snd_una; /* send unacknowledged */ + tcp_seq snd_max; /* highest sequence number sent; + * used to recognize retransmits + */ + tcp_seq snd_nxt; /* send next */ + tcp_seq snd_up; /* send urgent pointer */ + + tcp_seq snd_wl1; /* window update seg seq number */ + tcp_seq snd_wl2; /* window update seg ack number */ + tcp_seq iss; /* initial send sequence number */ + tcp_seq irs; /* initial receive sequence number */ + + tcp_seq rcv_nxt; /* receive next */ + tcp_seq rcv_adv; /* advertised window */ + u_int32_t rcv_wnd; /* receive window */ + tcp_seq rcv_up; /* receive urgent pointer */ + + u_int32_t snd_wnd; /* send window */ + u_int32_t snd_cwnd; /* congestion-controlled window */ + u_int32_t snd_ssthresh; /* snd_cwnd size threshold for + * for slow start exponential to + * linear switch + */ + u_int t_maxopd; /* mss plus options */ + + u_int32_t t_rcvtime; /* inactivity time */ + u_int32_t t_starttime; /* time connection was established */ + int t_rtttime; /* round trip time */ + tcp_seq t_rtseq; /* sequence number being timed */ + + int t_rxtcur; /* current retransmit value (ticks) */ + u_int t_maxseg; /* maximum segment size */ + int t_srtt; /* smoothed round-trip time */ + int t_rttvar; /* variance in round-trip time */ + + int t_rxtshift; /* log(2) of rexmt exp. backoff */ + u_int t_rttmin; /* minimum rtt allowed */ + u_int32_t t_rttupdated; /* number of times rtt sampled */ + u_int32_t max_sndwnd; /* largest window peer has offered */ + + int t_softerror; /* possible error not yet reported */ +/* out-of-band data */ + char t_oobflags; /* have some */ + char t_iobc; /* input character */ +/* RFC 1323 variables */ + u_char snd_scale; /* window scaling for send window */ + u_char rcv_scale; /* window scaling for recv window */ + u_char request_r_scale; /* pending window scaling */ + u_char requested_s_scale; + u_int32_t ts_recent; /* timestamp echo data */ + + u_int32_t ts_recent_age; /* when last updated */ + tcp_seq last_ack_sent; +/* RFC 1644 variables */ + tcp_cc cc_send; /* send connection count */ + tcp_cc cc_recv; /* receive connection count */ + tcp_seq snd_recover; /* for use in fast recovery */ +/* experimental */ + u_int32_t snd_cwnd_prev; /* cwnd prior to retransmit */ + u_int32_t snd_ssthresh_prev; /* ssthresh prior to retransmit */ + u_int32_t t_badrxtwin; /* window for retransmit recovery */ + + u_quad_t xt_alignment_hack; }; +#endif /* !CONFIG_EMBEDDED */ + #pragma pack() /* @@ -669,12 +764,16 @@ extern int tcp_do_newreno; extern int ss_fltsz; extern int ss_fltsz_local; #ifdef __APPLE__ -extern u_long tcp_now; /* for RFC 1323 timestamps */ +extern u_int32_t tcp_now; /* for RFC 1323 timestamps */ extern int tcp_delack_enabled; #endif /* __APPLE__ */ extern int tcp_do_sack; /* SACK enabled/disabled */ +#if CONFIG_IFEF_NOWINDOWSCALE +extern int tcp_obey_ifef_nowindowscale; +#endif + void tcp_canceltimers(struct tcpcb *); struct tcpcb * tcp_close(struct tcpcb *); @@ -683,7 +782,7 @@ int tcp_ctloutput(struct socket *, struct sockopt *); struct tcpcb * tcp_drop(struct tcpcb *, int); void tcp_drain(void); -void tcp_fasttimo(void); +void tcp_fasttimo(void *); struct rmxp_tao * tcp_gettaocache(struct inpcb *); void tcp_init(void) __attribute__((section("__TEXT, initcode"))); @@ -695,7 +794,6 @@ void tcp_mtudisc(struct inpcb *, int); struct tcpcb * tcp_newtcpcb(struct inpcb *); int tcp_output(struct tcpcb *); -void tcp_quench(struct inpcb *, int); void tcp_respond(struct tcpcb *, void *, struct tcphdr *, struct mbuf *, tcp_seq, tcp_seq, int, unsigned int); @@ -716,11 +814,12 @@ void tcp_sack_adjust(struct tcpcb *tp); struct sackhole *tcp_sack_output(struct tcpcb *tp, int *sack_bytes_rexmt); void tcp_sack_partialack(struct tcpcb *, struct tcphdr *); void tcp_free_sackholes(struct tcpcb *tp); -long tcp_sbspace(struct tcpcb *tp); +int32_t tcp_sbspace(struct tcpcb *tp); +void tcp_set_tso(struct tcpcb *tp, struct ifnet *ifp); -int tcp_lock (struct socket *, int, int); -int tcp_unlock (struct socket *, int, int); +int tcp_lock (struct socket *, int, void *); +int tcp_unlock (struct socket *, int, void *); #ifdef _KERN_LOCKS_H_ lck_mtx_t * tcp_getlock (struct socket *, int); #else @@ -729,8 +828,8 @@ void * tcp_getlock (struct socket *, int); extern struct pr_usrreqs tcp_usrreqs; -extern u_long tcp_sendspace; -extern u_long tcp_recvspace; +extern u_int32_t tcp_sendspace; +extern u_int32_t tcp_recvspace; tcp_seq tcp_new_isn(struct tcpcb *); #endif /* KERNEL_RPIVATE */ diff --git a/bsd/netinet/udp_usrreq.c b/bsd/netinet/udp_usrreq.c index ec3ff435e..b661c10ea 100644 --- a/bsd/netinet/udp_usrreq.c +++ b/bsd/netinet/udp_usrreq.c @@ -89,6 +89,7 @@ #include #include #if INET6 +#include #include #endif #include @@ -160,7 +161,6 @@ struct inpcbinfo udbinfo; #endif extern int esp_udp_encap_port; -extern u_long route_generation; extern void ipfwsyslog( int level, const char *format,...); @@ -184,6 +184,10 @@ SYSCTL_STRUCT(_net_inet_udp, UDPCTL_STATS, stats, CTLFLAG_RD, SYSCTL_INT(_net_inet_udp, OID_AUTO, pcbcount, CTLFLAG_RD, &udbinfo.ipi_count, 0, "Number of active PCBs"); +__private_extern__ int udp_use_randomport = 1; +SYSCTL_INT(_net_inet_udp, OID_AUTO, randomize_ports, CTLFLAG_RW, + &udp_use_randomport, 0, "Randomize UDP port numbers"); + #if INET6 struct udp_in6 { struct sockaddr_in6 uin6_sin; @@ -235,38 +239,10 @@ udp_init() if ((pcbinfo->mtx = lck_rw_alloc_init(pcbinfo->mtx_grp, pcbinfo->mtx_attr)) == NULL) return; /* pretty much dead if this fails... */ - - in_pcb_nat_init(&udbinfo, AF_INET, IPPROTO_UDP, SOCK_DGRAM); #else udbinfo.ipi_zone = zinit("udpcb", sizeof(struct inpcb), maxsockets, ZONE_INTERRUPT, 0); #endif - -#if 0 - /* for pcb sharing testing only */ - stat = in_pcb_new_share_client(&udbinfo, &fake_owner); - kprintf("udp_init in_pcb_new_share_client - stat = %d\n", stat); - - laddr.s_addr = 0x11646464; - faddr.s_addr = 0x11646465; - - lport = 1500; - in_pcb_grab_port(&udbinfo, 0, laddr, &lport, faddr, 1600, 0, fake_owner); - kprintf("udp_init in_pcb_grab_port - stat = %d\n", stat); - - stat = in_pcb_rem_share_client(&udbinfo, fake_owner); - kprintf("udp_init in_pcb_rem_share_client - stat = %d\n", stat); - - stat = in_pcb_new_share_client(&udbinfo, &fake_owner); - kprintf("udp_init in_pcb_new_share_client(2) - stat = %d\n", stat); - - laddr.s_addr = 0x11646464; - faddr.s_addr = 0x11646465; - - lport = 1500; - stat = in_pcb_grab_port(&udbinfo, 0, laddr, &lport, faddr, 1600, 0, fake_owner); - kprintf("udp_init in_pcb_grab_port(2) - stat = %d\n", stat); -#endif } void @@ -393,7 +369,6 @@ udp_input(m, iphlen) in_broadcast(ip->ip_dst, m->m_pkthdr.rcvif)) { int reuse_sock = 0, mcast_delivered = 0; - struct mbuf *n = NULL; lck_rw_lock_shared(pcbinfo->mtx); /* @@ -426,11 +401,6 @@ udp_input(m, iphlen) udp_in6.uin6_init_done = udp_ip6.uip6_init_done = 0; #endif LIST_FOREACH(inp, &udb, inp_list) { -#ifdef __APPLE__ - /* Ignore nat/SharedIP dummy pcbs */ - if (inp->inp_socket == &udbinfo.nat_dummy_socket) - continue; -#endif if (inp->inp_socket == NULL) continue; if (inp != sotoinpcb(inp->inp_socket)) @@ -486,6 +456,8 @@ udp_input(m, iphlen) if (skipit == 0) #endif /*IPSEC*/ { + struct mbuf *n = NULL; + if (reuse_sock) n = m_copy(m, 0, M_COPYALL); #if INET6 @@ -498,6 +470,8 @@ udp_input(m, iphlen) &udp_in); #endif /* INET6 */ mcast_delivered++; + + m = n; } udp_unlock(inp->inp_socket, 1, 0); } @@ -509,8 +483,13 @@ udp_input(m, iphlen) * port. It assumes that an application will never * clear these options after setting them. */ - if (reuse_sock == 0 || ((m = n) == NULL)) + if (reuse_sock == 0 || m == NULL) break; + /* + * Recompute IP and UDP header pointers for new mbuf + */ + ip = mtod(m, struct ip *); + uh = (struct udphdr *)((caddr_t)ip + iphlen); } lck_rw_done(pcbinfo->mtx); @@ -524,8 +503,9 @@ udp_input(m, iphlen) goto bad; } - if (reuse_sock != 0) /* free the extra copy of mbuf */ + if (m != NULL) /* free the extra copy of mbuf or skipped by IPSec */ m_freem(m); + KERNEL_DEBUG(DBG_FNC_UDP_INPUT | DBG_FUNC_END, 0,0,0,0,0); return; } @@ -975,7 +955,110 @@ udp_pcblist SYSCTL_HANDLER_ARGS SYSCTL_PROC(_net_inet_udp, UDPCTL_PCBLIST, pcblist, CTLFLAG_RD, 0, 0, udp_pcblist, "S,xinpcb", "List of active UDP sockets"); +#if !CONFIG_EMBEDDED +static int +udp_pcblist64 SYSCTL_HANDLER_ARGS +{ +#pragma unused(oidp, arg1, arg2) + int error, i, n; + struct inpcb *inp, **inp_list; + inp_gen_t gencnt; + struct xinpgen xig; + + /* + * The process of preparing the TCB list is too time-consuming and + * resource-intensive to repeat twice on every request. + */ + lck_rw_lock_shared(udbinfo.mtx); + if (req->oldptr == USER_ADDR_NULL) { + n = udbinfo.ipi_count; + req->oldidx = 2 * (sizeof xig) + + (n + n/8) * sizeof(struct xinpcb64); + lck_rw_done(udbinfo.mtx); + return 0; + } + + if (req->newptr != USER_ADDR_NULL) { + lck_rw_done(udbinfo.mtx); + return EPERM; + } + + /* + * OK, now we're committed to doing something. + */ + gencnt = udbinfo.ipi_gencnt; + n = udbinfo.ipi_count; + + bzero(&xig, sizeof(xig)); + xig.xig_len = sizeof xig; + xig.xig_count = n; + xig.xig_gen = gencnt; + xig.xig_sogen = so_gencnt; + error = SYSCTL_OUT(req, &xig, sizeof xig); + if (error) { + lck_rw_done(udbinfo.mtx); + return error; + } + /* + * We are done if there is no pcb + */ + if (n == 0) { + lck_rw_done(udbinfo.mtx); + return 0; + } + + inp_list = _MALLOC(n * sizeof *inp_list, M_TEMP, M_WAITOK); + if (inp_list == 0) { + lck_rw_done(udbinfo.mtx); + return ENOMEM; + } + + for (inp = LIST_FIRST(udbinfo.listhead), i = 0; inp && i < n; + inp = LIST_NEXT(inp, inp_list)) { + if (inp->inp_gencnt <= gencnt && inp->inp_state != INPCB_STATE_DEAD) + inp_list[i++] = inp; + } + n = i; + + error = 0; + for (i = 0; i < n; i++) { + inp = inp_list[i]; + if (inp->inp_gencnt <= gencnt && inp->inp_state != INPCB_STATE_DEAD) { + struct xinpcb64 xi; + + bzero(&xi, sizeof(xi)); + xi.xi_len = sizeof xi; + inpcb_to_xinpcb64(inp, &xi); + if (inp->inp_socket) + sotoxsocket64(inp->inp_socket, &xi.xi_socket); + error = SYSCTL_OUT(req, &xi, sizeof xi); + } + } + if (!error) { + /* + * Give the user an updated idea of our state. + * If the generation differs from what we told + * her before, she knows that something happened + * while we were processing this request, and it + * might be necessary to retry. + */ + bzero(&xig, sizeof(xig)); + xig.xig_len = sizeof xig; + xig.xig_gen = udbinfo.ipi_gencnt; + xig.xig_sogen = so_gencnt; + xig.xig_count = udbinfo.ipi_count; + error = SYSCTL_OUT(req, &xig, sizeof xig); + } + FREE(inp_list, M_TEMP); + lck_rw_done(udbinfo.mtx); + return error; +} + +SYSCTL_PROC(_net_inet_udp, OID_AUTO, pcblist64, CTLFLAG_RD, 0, 0, + udp_pcblist64, "S,xinpcb64", "List of active UDP sockets"); + +#endif /* !CONFIG_EMBEDDED */ static __inline__ u_int16_t get_socket_id(struct socket * s) @@ -985,7 +1068,7 @@ get_socket_id(struct socket * s) if (s == NULL) { return (0); } - val = (u_int16_t)(((u_int32_t)s) / sizeof(struct socket)); + val = (u_int16_t)(((uintptr_t)s) / sizeof(struct socket)); if (val == 0) { val = 0xffff; } @@ -1039,17 +1122,25 @@ udp_output(inp, m, addr, control, p) * that we have a valid source address. * Reacquire a new source address if INADDR_ANY was specified */ - if (inp->inp_route.ro_rt && inp->inp_route.ro_rt->generation_id != route_generation) { - if (ifa_foraddr(inp->inp_laddr.s_addr) == 0) { /* src address is gone */ - if (inp->inp_flags & INP_INADDR_ANY) - inp->inp_laddr.s_addr = INADDR_ANY; /* new src will be set later */ - else { + if (inp->inp_route.ro_rt != NULL && + inp->inp_route.ro_rt->generation_id != route_generation) { + struct in_ifaddr *ia; + + /* src address is gone? */ + if ((ia = ifa_foraddr(inp->inp_laddr.s_addr)) == NULL) { + if (inp->inp_flags & INP_INADDR_ANY) { + /* new src will be set later */ + inp->inp_laddr.s_addr = INADDR_ANY; + } else { error = EADDRNOTAVAIL; goto release; } } - rtfree(inp->inp_route.ro_rt); - inp->inp_route.ro_rt = (struct rtentry *)0; + if (ia != NULL) + ifafree(&ia->ia_ifa); + if (inp->inp_route.ro_rt != NULL) + rtfree(inp->inp_route.ro_rt); + inp->inp_route.ro_rt = NULL; } origladdr= laddr = inp->inp_laddr; @@ -1105,10 +1196,6 @@ udp_output(inp, m, addr, control, p) #if CONFIG_MACF_NET mac_mbuf_label_associate_inpcb(inp, m); #endif - -#if CONFIG_IP_EDGEHOLE - ip_edgehole_mbuf_tag(inp, m); -#endif /* * Calculate data length and get a mbuf @@ -1164,24 +1251,18 @@ udp_output(inp, m, addr, control, p) soopts |= (inp->inp_socket->so_options & (SO_DONTROUTE | SO_BROADCAST)); mopts = inp->inp_moptions; - /* We don't want to cache the route for non-connected UDP */ - if (udp_dodisconnect) { - bcopy(&inp->inp_route, &ro, sizeof (ro)); - ro.ro_rt = NULL; - } + /* Copy the cached route and take an extra reference */ + inp_route_copyout(inp, &ro); socket_unlock(so, 0); /* XXX jgraessley please look at XXX */ - error = ip_output_list(m, 0, inpopts, - udp_dodisconnect ? &ro : &inp->inp_route, soopts, mopts, &ipoa); + error = ip_output_list(m, 0, inpopts, &ro, soopts, mopts, &ipoa); socket_lock(so, 0); + /* Synchronize PCB cached route */ + inp_route_copyin(inp, &ro); + if (udp_dodisconnect) { - /* Discard the cached route, if there is one */ - if (ro.ro_rt != NULL) { - rtfree(ro.ro_rt); - ro.ro_rt = NULL; - } in_pcbdisconnect(inp); inp->inp_laddr = origladdr; /* XXX rehash? */ } @@ -1200,20 +1281,51 @@ udp_output(inp, m, addr, control, p) return (error); } -u_long udp_sendspace = 9216; /* really max datagram size */ - /* 40 1K datagrams */ -SYSCTL_INT(_net_inet_udp, UDPCTL_MAXDGRAM, maxdgram, CTLFLAG_RW, - &udp_sendspace, 0, "Maximum outgoing UDP datagram size"); - -u_long udp_recvspace = 40 * (1024 + +u_int32_t udp_sendspace = 9216; /* really max datagram size */ +/* 40 1K datagrams */ +u_int32_t udp_recvspace = 40 * (1024 + #if INET6 sizeof(struct sockaddr_in6) #else sizeof(struct sockaddr_in) #endif ); -SYSCTL_INT(_net_inet_udp, UDPCTL_RECVSPACE, recvspace, CTLFLAG_RW, - &udp_recvspace, 0, "Maximum incoming UDP datagram size"); + +/* Check that the values of udp send and recv space do not exceed sb_max */ +static int +sysctl_udp_sospace(struct sysctl_oid *oidp, __unused void *arg1, + __unused int arg2, struct sysctl_req *req) { + u_int32_t new_value = 0, *space_p = NULL; + int changed = 0, error = 0; + u_quad_t sb_effective_max = (sb_max/ (MSIZE+MCLBYTES)) * MCLBYTES; + + switch (oidp->oid_number) { + case UDPCTL_RECVSPACE: + space_p = &udp_recvspace; + break; + case UDPCTL_MAXDGRAM: + space_p = &udp_sendspace; + break; + default: + return EINVAL; + } + error = sysctl_io_number(req, *space_p, sizeof(u_int32_t), + &new_value, &changed); + if (changed) { + if (new_value > 0 && new_value <= sb_effective_max) { + *space_p = new_value; + } else { + error = ERANGE; + } + } + return error; +} + +SYSCTL_PROC(_net_inet_udp, UDPCTL_RECVSPACE, recvspace, CTLTYPE_INT | CTLFLAG_RW, + &udp_recvspace, 0, &sysctl_udp_sospace, "IU", "Maximum incoming UDP datagram size"); + +SYSCTL_PROC(_net_inet_udp, UDPCTL_MAXDGRAM, maxdgram, CTLTYPE_INT | CTLFLAG_RW, + &udp_sendspace, 0, &sysctl_udp_sospace, "IU", "Maximum outgoing UDP datagram size"); static int udp_abort(struct socket *so) @@ -1351,60 +1463,53 @@ struct pr_usrreqs udp_usrreqs = { int -udp_lock(struct socket *so, int refcount, int debug) +udp_lock(struct socket *so, int refcount, void *debug) { - int lr_saved; - if (debug == 0) - lr_saved = (unsigned int) __builtin_return_address(0); - else lr_saved = debug; + void *lr_saved; + + if (debug == NULL) + lr_saved = __builtin_return_address(0); + else + lr_saved = debug; if (so->so_pcb) { - lck_mtx_assert(((struct inpcb *)so->so_pcb)->inpcb_mtx, LCK_MTX_ASSERT_NOTOWNED); + lck_mtx_assert(((struct inpcb *)so->so_pcb)->inpcb_mtx, + LCK_MTX_ASSERT_NOTOWNED); lck_mtx_lock(((struct inpcb *)so->so_pcb)->inpcb_mtx); + } else { + panic("udp_lock: so=%p NO PCB! lr=%p lrh= %s\n", + so, lr_saved, solockhistory_nr(so)); + /* NOTREACHED */ } - else - panic("udp_lock: so=%p NO PCB! lr=%x\n", so, lr_saved); - - if (refcount) + if (refcount) so->so_usecount++; - so->lock_lr[so->next_lock_lr] = (u_int32_t)lr_saved; + so->lock_lr[so->next_lock_lr] = lr_saved; so->next_lock_lr = (so->next_lock_lr+1) % SO_LCKDBG_MAX; return (0); } int -udp_unlock(struct socket *so, int refcount, int debug) +udp_unlock(struct socket *so, int refcount, void *debug) { - int lr_saved; + void *lr_saved; - if (debug == 0) - lr_saved = (unsigned int) __builtin_return_address(0); - else lr_saved = debug; + if (debug == NULL) + lr_saved = __builtin_return_address(0); + else + lr_saved = debug; - if (refcount) { + if (refcount) so->so_usecount--; -#if 0 - { - struct inpcb *inp = sotoinpcb(so); - struct inpcbinfo *pcbinfo = &udbinfo; - - if (so->so_usecount == 0 && (inp->inp_wantcnt == WNT_STOPUSING)) { - - if (lck_rw_try_lock_exclusive(pcbinfo->mtx)) { - in_pcbdispose(inp); - lck_rw_done(pcbinfo->mtx); - return(0); - } - } - } -#endif - } - if (so->so_pcb == NULL) - panic("udp_unlock: so=%p NO PCB! lr=%x\n", so, lr_saved); - else { - lck_mtx_assert(((struct inpcb *)so->so_pcb)->inpcb_mtx, LCK_MTX_ASSERT_OWNED); - so->unlock_lr[so->next_unlock_lr] = (u_int32_t)lr_saved; + + if (so->so_pcb == NULL) { + panic("udp_unlock: so=%p NO PCB! lr=%p lrh= %s\n", + so, lr_saved, solockhistory_nr(so)); + /* NOTREACHED */ + } else { + lck_mtx_assert(((struct inpcb *)so->so_pcb)->inpcb_mtx, + LCK_MTX_ASSERT_OWNED); + so->unlock_lr[so->next_unlock_lr] = lr_saved; so->next_unlock_lr = (so->next_unlock_lr+1) % SO_LCKDBG_MAX; lck_mtx_unlock(((struct inpcb *)so->so_pcb)->inpcb_mtx); } @@ -1422,7 +1527,8 @@ udp_getlock(struct socket *so, __unused int locktype) if (so->so_pcb) return(inp->inpcb_mtx); else { - panic("udp_getlock: so=%p NULL so_pcb\n", so); + panic("udp_getlock: so=%p NULL so_pcb lrh= %s\n", + so, solockhistory_nr(so)); return (so->so_proto->pr_domain->dom_mtx); } } @@ -1446,10 +1552,6 @@ udp_slowtimo() for (inp = udb.lh_first; inp != NULL; inp = inpnxt) { inpnxt = inp->inp_list.le_next; - - /* Ignore nat/SharedIP dummy pcbs */ - if (inp->inp_socket == &udbinfo.nat_dummy_socket) - continue; if (inp->inp_wantcnt != WNT_STOPUSING) continue; @@ -1458,10 +1560,19 @@ udp_slowtimo() if (!lck_mtx_try_lock(inp->inpcb_mtx)) /* skip if busy, no hurry for cleanup... */ continue; - if (so->so_usecount == 0) + if (so->so_usecount == 0) { + if (inp->inp_state != INPCB_STATE_DEAD) { +#if INET6 + if (INP_CHECK_SOCKAF(so, AF_INET6)) + in6_pcbdetach(inp); + else +#endif /* INET6 */ + in_pcbdetach(inp); + } in_pcbdispose(inp); - else + } else { lck_mtx_unlock(inp->inpcb_mtx); + } } lck_rw_done(pcbinfo->mtx); } diff --git a/bsd/netinet/udp_var.h b/bsd/netinet/udp_var.h index 19d4aad60..3a75d1faf 100644 --- a/bsd/netinet/udp_var.h +++ b/bsd/netinet/udp_var.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2008 Apple Computer, Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -131,8 +131,8 @@ SYSCTL_DECL(_net_inet_udp); extern struct pr_usrreqs udp_usrreqs; extern struct inpcbhead udb; extern struct inpcbinfo udbinfo; -extern u_long udp_sendspace; -extern u_long udp_recvspace; +extern u_int32_t udp_sendspace; +extern u_int32_t udp_recvspace; extern struct udpstat udpstat; extern int log_in_vain; @@ -144,8 +144,8 @@ void udp_input(struct mbuf *, int); void udp_notify(struct inpcb *inp, int errno); int udp_shutdown(struct socket *so); -int udp_lock (struct socket *, int, int); -int udp_unlock (struct socket *, int, int); +int udp_lock (struct socket *, int, void *); +int udp_unlock (struct socket *, int, void *); void udp_slowtimo (void); #ifdef _KERN_LOCKS_H_ lck_mtx_t * udp_getlock (struct socket *, int); diff --git a/bsd/netinet6/ah.h b/bsd/netinet6/ah.h index 4b01eb27f..f77826a0d 100644 --- a/bsd/netinet6/ah.h +++ b/bsd/netinet6/ah.h @@ -74,7 +74,7 @@ struct ah_algorithm { void (*result)(struct ah_algorithm_state *, caddr_t); }; -#define AH_MAXSUMSIZE 16 +#define AH_MAXSUMSIZE 64 // sha2-512's output size extern const struct ah_algorithm *ah_algorithm_lookup(int); @@ -86,6 +86,6 @@ extern void ah4_input(struct mbuf *, int); extern int ah4_output(struct mbuf *, struct secasvar *); extern int ah4_calccksum(struct mbuf *, caddr_t, size_t, const struct ah_algorithm *, struct secasvar *); -#endif KERNEL_PRIVATE +#endif /* KERNEL_PRIVATE */ #endif /* _NETINET6_AH_H_ */ diff --git a/bsd/netinet6/ah_core.c b/bsd/netinet6/ah_core.c index 97527725c..042550b78 100644 --- a/bsd/netinet6/ah_core.c +++ b/bsd/netinet6/ah_core.c @@ -1,3 +1,31 @@ +/* + * Copyright (c) 2008 Apple Inc. All rights reserved. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. The rights granted to you under the License + * may not be used to create, or enable the creation or redistribution of, + * unlawful or unlicensed copies of an Apple operating system, or to + * circumvent, violate, or enable the circumvention or violation of, any + * terms of an Apple operating system software license agreement. + * + * Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ + */ + /* $FreeBSD: src/sys/netinet6/ah_core.c,v 1.2.2.4 2001/07/03 11:01:49 ume Exp $ */ /* $KAME: ah_core.c,v 1.44 2001/03/12 11:24:39 itojun Exp $ */ @@ -109,16 +137,19 @@ static int ah_hmac_sha1_init(struct ah_algorithm_state *, struct secasvar *); static void ah_hmac_sha1_loop(struct ah_algorithm_state *, caddr_t, size_t); static void ah_hmac_sha1_result(struct ah_algorithm_state *, caddr_t); #if ALLCRYPTO +static int ah_sumsiz_sha2_256(struct secasvar *); static int ah_hmac_sha2_256_mature(struct secasvar *); static int ah_hmac_sha2_256_init(struct ah_algorithm_state *, struct secasvar *); static void ah_hmac_sha2_256_loop(struct ah_algorithm_state *, caddr_t, size_t); static void ah_hmac_sha2_256_result(struct ah_algorithm_state *, caddr_t); +static int ah_sumsiz_sha2_384(struct secasvar *); static int ah_hmac_sha2_384_mature(struct secasvar *); static int ah_hmac_sha2_384_init(struct ah_algorithm_state *, struct secasvar *); static void ah_hmac_sha2_384_loop(struct ah_algorithm_state *, caddr_t, size_t); static void ah_hmac_sha2_384_result(struct ah_algorithm_state *, caddr_t); +static int ah_sumsiz_sha2_512(struct secasvar *); static int ah_hmac_sha2_512_mature(struct secasvar *); static int ah_hmac_sha2_512_init(struct ah_algorithm_state *, struct secasvar *); @@ -155,17 +186,17 @@ ah_algorithm_lookup(idx) ah_none_init, ah_none_loop, ah_none_result, }; #if ALLCRYPTO static struct ah_algorithm hmac_sha2_256 = - { ah_sumsiz_1216, ah_hmac_sha2_256_mature, 256, 256, + { ah_sumsiz_sha2_256, ah_hmac_sha2_256_mature, 256, 256, "hmac-sha2-256", ah_hmac_sha2_256_init, ah_hmac_sha2_256_loop, ah_hmac_sha2_256_result, }; static struct ah_algorithm hmac_sha2_384 = - { ah_sumsiz_1216, ah_hmac_sha2_384_mature, 384, 384, + { ah_sumsiz_sha2_384, ah_hmac_sha2_384_mature, 384, 384, "hmac-sha2-384", ah_hmac_sha2_384_init, ah_hmac_sha2_384_loop, ah_hmac_sha2_384_result, }; static struct ah_algorithm hmac_sha2_512 = - { ah_sumsiz_1216, ah_hmac_sha2_512_mature, 512, 512, + { ah_sumsiz_sha2_512, ah_hmac_sha2_512_mature, 512, 512, "hmac-sha2-512", ah_hmac_sha2_512_init, ah_hmac_sha2_512_loop, ah_hmac_sha2_512_result, }; @@ -536,7 +567,7 @@ ah_hmac_md5_init(state, sav) key = &tk[0]; keylen = 16; } else { - key = _KEYBUF(state->sav->key_auth); + key = (u_char *) _KEYBUF(state->sav->key_auth); keylen = _KEYLEN(state->sav->key_auth); } @@ -661,7 +692,7 @@ ah_hmac_sha1_init(state, sav) key = &tk[0]; keylen = SHA1_RESULTLEN; } else { - key = _KEYBUF(state->sav->key_auth); + key = (u_char *) _KEYBUF(state->sav->key_auth); keylen = _KEYLEN(state->sav->key_auth); } @@ -725,6 +756,16 @@ ah_hmac_sha1_result(state, addr) } #if ALLCRYPTO +static int +ah_sumsiz_sha2_256(sav) + struct secasvar *sav; +{ + if (!sav) + return -1; + // return half the output size (in bytes), as per rfc 4868 + return 16; // 256/(8*2) +} + static int ah_hmac_sha2_256_mature(sav) struct secasvar *sav; @@ -786,13 +827,13 @@ ah_hmac_sha2_256_init(state, sav) bzero(tk, sizeof(tk)); bzero(ctxt, sizeof(*ctxt)); SHA256_Init(ctxt); - SHA256_Update(ctxt, _KEYBUF(state->sav->key_auth), + SHA256_Update(ctxt, (const u_int8_t *) _KEYBUF(state->sav->key_auth), _KEYLEN(state->sav->key_auth)); SHA256_Final(&tk[0], ctxt); key = &tk[0]; keylen = sizeof(tk) < 64 ? sizeof(tk) : 64; } else { - key = _KEYBUF(state->sav->key_auth); + key = (u_char *) _KEYBUF(state->sav->key_auth); keylen = _KEYLEN(state->sav->key_auth); } @@ -824,7 +865,7 @@ ah_hmac_sha2_256_loop(state, addr, len) panic("ah_hmac_sha2_256_loop: what?"); ctxt = (SHA256_CTX *)(((u_char *)state->foo) + 128); - SHA256_Update(ctxt, (caddr_t)addr, (size_t)len); + SHA256_Update(ctxt, (const u_int8_t *)addr, (size_t)len); } static void @@ -832,7 +873,6 @@ ah_hmac_sha2_256_result(state, addr) struct ah_algorithm_state *state; caddr_t addr; { - u_char digest[SHA256_DIGEST_LENGTH]; u_char *ipad; u_char *opad; SHA256_CTX *ctxt; @@ -844,19 +884,27 @@ ah_hmac_sha2_256_result(state, addr) opad = (u_char *)(ipad + 64); ctxt = (SHA256_CTX *)(opad + 64); - SHA256_Final((caddr_t)&digest[0], ctxt); + SHA256_Final((u_int8_t *)addr, ctxt); bzero(ctxt, sizeof(*ctxt)); SHA256_Init(ctxt); SHA256_Update(ctxt, opad, 64); - SHA256_Update(ctxt, (caddr_t)&digest[0], sizeof(digest)); - SHA256_Final((caddr_t)&digest[0], ctxt); - - bcopy(&digest[0], (void *)addr, HMACSIZE); + SHA256_Update(ctxt, (const u_int8_t *)addr, SHA256_DIGEST_LENGTH); + SHA256_Final((u_int8_t *)addr, ctxt); FREE(state->foo, M_TEMP); } +static int +ah_sumsiz_sha2_384(sav) + struct secasvar *sav; +{ + if (!sav) + return -1; + // return half the output size (in bytes), as per rfc 4868 + return 24; // 384/(8*2) +} + static int ah_hmac_sha2_384_mature(sav) struct secasvar *sav; @@ -904,43 +952,43 @@ ah_hmac_sha2_384_init(state, sav) panic("ah_hmac_sha2_384_init: what?"); state->sav = sav; - state->foo = (void *)_MALLOC(64 + 64 + sizeof(SHA384_CTX), + state->foo = (void *)_MALLOC(128 + 128 + sizeof(SHA384_CTX), M_TEMP, M_NOWAIT); if (!state->foo) return ENOBUFS; - bzero(state->foo, 64 + 64 + sizeof(SHA384_CTX)); + bzero(state->foo, 128 + 128 + sizeof(SHA384_CTX)); ipad = (u_char *)state->foo; - opad = (u_char *)(ipad + 64); - ctxt = (SHA384_CTX *)(opad + 64); + opad = (u_char *)(ipad + 128); + ctxt = (SHA384_CTX *)(opad + 128); /* compress the key if necessery */ - if (64 < _KEYLEN(state->sav->key_auth)) { + if (128 < _KEYLEN(state->sav->key_auth)) { bzero(tk, sizeof(tk)); bzero(ctxt, sizeof(*ctxt)); SHA384_Init(ctxt); - SHA384_Update(ctxt, _KEYBUF(state->sav->key_auth), + SHA384_Update(ctxt, (const u_int8_t *) _KEYBUF(state->sav->key_auth), _KEYLEN(state->sav->key_auth)); SHA384_Final(&tk[0], ctxt); key = &tk[0]; - keylen = sizeof(tk) < 64 ? sizeof(tk) : 64; + keylen = sizeof(tk) < 128 ? sizeof(tk) : 128; } else { - key = _KEYBUF(state->sav->key_auth); + key = (u_char *) _KEYBUF(state->sav->key_auth); keylen = _KEYLEN(state->sav->key_auth); } - bzero(ipad, 64); - bzero(opad, 64); + bzero(ipad, 128); + bzero(opad, 128); bcopy(key, ipad, keylen); bcopy(key, opad, keylen); - for (i = 0; i < 64; i++) { + for (i = 0; i < 128; i++) { ipad[i] ^= 0x36; opad[i] ^= 0x5c; } bzero(ctxt, sizeof(*ctxt)); SHA384_Init(ctxt); - SHA384_Update(ctxt, ipad, 64); + SHA384_Update(ctxt, ipad, 128); return 0; } @@ -956,8 +1004,8 @@ ah_hmac_sha2_384_loop(state, addr, len) if (!state || !state->foo) panic("ah_hmac_sha2_384_loop: what?"); - ctxt = (SHA384_CTX *)(((u_char *)state->foo) + 128); - SHA384_Update(ctxt, (caddr_t)addr, (size_t)len); + ctxt = (SHA384_CTX *)(((u_char *)state->foo) + 256); + SHA384_Update(ctxt, (const u_int8_t *)addr, (size_t)len); } static void @@ -965,7 +1013,6 @@ ah_hmac_sha2_384_result(state, addr) struct ah_algorithm_state *state; caddr_t addr; { - u_char digest[SHA384_DIGEST_LENGTH]; u_char *ipad; u_char *opad; SHA384_CTX *ctxt; @@ -974,22 +1021,30 @@ ah_hmac_sha2_384_result(state, addr) panic("ah_hmac_sha2_384_result: what?"); ipad = (u_char *)state->foo; - opad = (u_char *)(ipad + 64); - ctxt = (SHA384_CTX *)(opad + 64); + opad = (u_char *)(ipad + 128); + ctxt = (SHA384_CTX *)(opad + 128); - SHA384_Final((caddr_t)&digest[0], ctxt); + SHA384_Final((u_int8_t *)addr, ctxt); bzero(ctxt, sizeof(*ctxt)); SHA384_Init(ctxt); - SHA384_Update(ctxt, opad, 64); - SHA384_Update(ctxt, (caddr_t)&digest[0], sizeof(digest)); - SHA384_Final((caddr_t)&digest[0], ctxt); - - bcopy(&digest[0], (void *)addr, HMACSIZE); + SHA384_Update(ctxt, opad, 128); + SHA384_Update(ctxt, (const u_int8_t *)addr, SHA384_DIGEST_LENGTH); + SHA384_Final((u_int8_t *)addr, ctxt); FREE(state->foo, M_TEMP); } +static int +ah_sumsiz_sha2_512(sav) + struct secasvar *sav; +{ + if (!sav) + return -1; + // return half the output size (in bytes), as per rfc 4868 + return 32; // 512/(8*2) +} + static int ah_hmac_sha2_512_mature(sav) struct secasvar *sav; @@ -1037,43 +1092,43 @@ ah_hmac_sha2_512_init(state, sav) panic("ah_hmac_sha2_512_init: what?"); state->sav = sav; - state->foo = (void *)_MALLOC(64 + 64 + sizeof(SHA512_CTX), + state->foo = (void *)_MALLOC(128 + 128 + sizeof(SHA512_CTX), M_TEMP, M_NOWAIT); if (!state->foo) return ENOBUFS; - bzero(state->foo, 64 + 64 + sizeof(SHA512_CTX)); + bzero(state->foo, 128 + 128 + sizeof(SHA512_CTX)); ipad = (u_char *)state->foo; - opad = (u_char *)(ipad + 64); - ctxt = (SHA512_CTX *)(opad + 64); + opad = (u_char *)(ipad + 128); + ctxt = (SHA512_CTX *)(opad + 128); /* compress the key if necessery */ - if (64 < _KEYLEN(state->sav->key_auth)) { + if (128 < _KEYLEN(state->sav->key_auth)) { bzero(tk, sizeof(tk)); bzero(ctxt, sizeof(*ctxt)); SHA512_Init(ctxt); - SHA512_Update(ctxt, _KEYBUF(state->sav->key_auth), + SHA512_Update(ctxt, (const u_int8_t *) _KEYBUF(state->sav->key_auth), _KEYLEN(state->sav->key_auth)); SHA512_Final(&tk[0], ctxt); key = &tk[0]; - keylen = sizeof(tk) < 64 ? sizeof(tk) : 64; + keylen = sizeof(tk) < 128 ? sizeof(tk) : 128; } else { - key = _KEYBUF(state->sav->key_auth); + key = (u_char *) _KEYBUF(state->sav->key_auth); keylen = _KEYLEN(state->sav->key_auth); } - bzero(ipad, 64); - bzero(opad, 64); + bzero(ipad, 128); + bzero(opad, 128); bcopy(key, ipad, keylen); bcopy(key, opad, keylen); - for (i = 0; i < 64; i++) { + for (i = 0; i < 128; i++) { ipad[i] ^= 0x36; opad[i] ^= 0x5c; } bzero(ctxt, sizeof(*ctxt)); SHA512_Init(ctxt); - SHA512_Update(ctxt, ipad, 64); + SHA512_Update(ctxt, ipad, 128); return 0; } @@ -1089,8 +1144,8 @@ ah_hmac_sha2_512_loop(state, addr, len) if (!state || !state->foo) panic("ah_hmac_sha2_512_loop: what?"); - ctxt = (SHA512_CTX *)(((u_char *)state->foo) + 128); - SHA512_Update(ctxt, (caddr_t)addr, (size_t)len); + ctxt = (SHA512_CTX *)(((u_char *)state->foo) + 256); + SHA512_Update(ctxt, (const u_int8_t *) addr, (size_t)len); } static void @@ -1098,7 +1153,6 @@ ah_hmac_sha2_512_result(state, addr) struct ah_algorithm_state *state; caddr_t addr; { - u_char digest[SHA512_DIGEST_LENGTH]; u_char *ipad; u_char *opad; SHA512_CTX *ctxt; @@ -1107,18 +1161,16 @@ ah_hmac_sha2_512_result(state, addr) panic("ah_hmac_sha2_512_result: what?"); ipad = (u_char *)state->foo; - opad = (u_char *)(ipad + 64); - ctxt = (SHA512_CTX *)(opad + 64); + opad = (u_char *)(ipad + 128); + ctxt = (SHA512_CTX *)(opad + 128); - SHA512_Final((caddr_t)&digest[0], ctxt); + SHA512_Final((u_int8_t *)addr, ctxt); bzero(ctxt, sizeof(*ctxt)); SHA512_Init(ctxt); - SHA512_Update(ctxt, opad, 64); - SHA512_Update(ctxt, (caddr_t)&digest[0], sizeof(digest)); - SHA512_Final((caddr_t)&digest[0], ctxt); - - bcopy(&digest[0], (void *)addr, HMACSIZE); + SHA512_Update(ctxt, opad, 128); + SHA512_Update(ctxt, (const u_int8_t *)addr, SHA512_DIGEST_LENGTH); + SHA512_Final((u_int8_t *)addr, ctxt); FREE(state->foo, M_TEMP); } @@ -1323,8 +1375,9 @@ ah4_calccksum(m, ahdat, len, algo, sav) break; i += l; } + p = mtod(n, u_char *) + sizeof(struct ip); - (algo->update)(&algos, p, hlen - sizeof(struct ip)); + (algo->update)(&algos, (caddr_t)p, hlen - sizeof(struct ip)); m_free(n); n = NULL; @@ -1400,7 +1453,7 @@ ah4_calccksum(m, ahdat, len, algo, sav) goto fail; } - (algo->result)(&algos, &sumbuf[0]); + (algo->result)(&algos, (caddr_t) &sumbuf[0]); bcopy(&sumbuf[0], ahdat, (*algo->sumsiz)(sav)); if (n) @@ -1627,7 +1680,7 @@ ah6_calccksum(m, ahdat, len, algo, sav) goto fail; } - (algo->result)(&algos, &sumbuf[0]); + (algo->result)(&algos, (caddr_t) &sumbuf[0]); bcopy(&sumbuf[0], ahdat, (*algo->sumsiz)(sav)); /* just in case */ diff --git a/bsd/netinet6/ah_input.c b/bsd/netinet6/ah_input.c index b79bf0895..fcffc1ded 100644 --- a/bsd/netinet6/ah_input.c +++ b/bsd/netinet6/ah_input.c @@ -1,3 +1,31 @@ +/* + * Copyright (c) 2008 Apple Inc. All rights reserved. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. The rights granted to you under the License + * may not be used to create, or enable the creation or redistribution of, + * unlawful or unlicensed copies of an Apple operating system, or to + * circumvent, violate, or enable the circumvention or violation of, any + * terms of an Apple operating system software license agreement. + * + * Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ + */ + /* $FreeBSD: src/sys/netinet6/ah_input.c,v 1.1.2.6 2002/04/28 05:40:26 suz Exp $ */ /* $KAME: ah_input.c,v 1.67 2002/01/07 11:39:56 kjc Exp $ */ @@ -209,7 +237,7 @@ ah4_input(struct mbuf *m, int off) if (siz1 < siz) { ipseclog((LOG_NOTICE, "sum length too short in IPv4 AH input " "(%lu, should be at least %lu): %s\n", - (u_long)siz1, (u_long)siz, + (u_int32_t)siz1, (u_int32_t)siz, ipsec4_logpacketstr(ip, spi))); IPSEC_STAT_INCREMENT(ipsecstat.in_inval); goto fail; @@ -217,7 +245,7 @@ ah4_input(struct mbuf *m, int off) if ((ah->ah_len << 2) - sizoff != siz1) { ipseclog((LOG_NOTICE, "sum length mismatch in IPv4 AH input " "(%d should be %lu): %s\n", - (ah->ah_len << 2) - sizoff, (u_long)siz1, + (ah->ah_len << 2) - sizoff, (u_int32_t)siz1, ipsec4_logpacketstr(ip, spi))); IPSEC_STAT_INCREMENT(ipsecstat.in_inval); goto fail; @@ -644,7 +672,7 @@ ah6_input(mp, offp) if (siz1 < siz) { ipseclog((LOG_NOTICE, "sum length too short in IPv6 AH input " "(%lu, should be at least %lu): %s\n", - (u_long)siz1, (u_long)siz, + (u_int32_t)siz1, (u_int32_t)siz, ipsec6_logpacketstr(ip6, spi))); IPSEC_STAT_INCREMENT(ipsec6stat.in_inval); goto fail; @@ -652,7 +680,7 @@ ah6_input(mp, offp) if ((ah->ah_len << 2) - sizoff != siz1) { ipseclog((LOG_NOTICE, "sum length mismatch in IPv6 AH input " "(%d should be %lu): %s\n", - (ah->ah_len << 2) - sizoff, (u_long)siz1, + (ah->ah_len << 2) - sizoff, (u_int32_t)siz1, ipsec6_logpacketstr(ip6, spi))); IPSEC_STAT_INCREMENT(ipsec6stat.in_inval); goto fail; diff --git a/bsd/netinet6/ah_output.c b/bsd/netinet6/ah_output.c index 13c4157b0..918196d8e 100644 --- a/bsd/netinet6/ah_output.c +++ b/bsd/netinet6/ah_output.c @@ -1,3 +1,31 @@ +/* + * Copyright (c) 2008 Apple Inc. All rights reserved. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. The rights granted to you under the License + * may not be used to create, or enable the creation or redistribution of, + * unlawful or unlicensed copies of an Apple operating system, or to + * circumvent, violate, or enable the circumvention or violation of, any + * terms of an Apple operating system software license agreement. + * + * Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ + */ + /* $FreeBSD: src/sys/netinet6/ah_output.c,v 1.1.2.3 2001/07/03 11:01:49 ume Exp $ */ /* $KAME: ah_output.c,v 1.30 2001/02/21 00:50:53 itojun Exp $ */ diff --git a/bsd/netinet6/esp.h b/bsd/netinet6/esp.h index e2c59fc81..04aa45077 100644 --- a/bsd/netinet6/esp.h +++ b/bsd/netinet6/esp.h @@ -1,3 +1,31 @@ +/* + * Copyright (c) 2008 Apple Inc. All rights reserved. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. The rights granted to you under the License + * may not be used to create, or enable the creation or redistribution of, + * unlawful or unlicensed copies of an Apple operating system, or to + * circumvent, violate, or enable the circumvention or violation of, any + * terms of an Apple operating system software license agreement. + * + * Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ + */ + /* $FreeBSD: src/sys/netinet6/esp.h,v 1.2.2.2 2001/07/03 11:01:49 ume Exp $ */ /* $KAME: esp.h,v 1.16 2000/10/18 21:28:00 itojun Exp $ */ diff --git a/bsd/netinet6/esp6.h b/bsd/netinet6/esp6.h index 55c0d8043..e0c40b37f 100644 --- a/bsd/netinet6/esp6.h +++ b/bsd/netinet6/esp6.h @@ -1,3 +1,31 @@ +/* + * Copyright (c) 2008 Apple Inc. All rights reserved. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. The rights granted to you under the License + * may not be used to create, or enable the creation or redistribution of, + * unlawful or unlicensed copies of an Apple operating system, or to + * circumvent, violate, or enable the circumvention or violation of, any + * terms of an Apple operating system software license agreement. + * + * Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ + */ + /* $FreeBSD: src/sys/netinet6/esp6.h,v 1.2.2.2 2001/07/03 11:01:49 ume Exp $ */ /* $KAME: esp.h,v 1.16 2000/10/18 21:28:00 itojun Exp $ */ @@ -44,6 +72,6 @@ extern int esp6_output(struct mbuf *, u_char *, struct mbuf *, extern int esp6_input(struct mbuf **, int *); extern void esp6_ctlinput(int, struct sockaddr *, void *); -#endif KERNEL_PRIVATE +#endif /* KERNEL_PRIVATE */ -#endif _NETINET6_ESP6_H_ +#endif /* _NETINET6_ESP6_H_ */ diff --git a/bsd/netinet6/esp_core.c b/bsd/netinet6/esp_core.c index bd51351c9..3bae5bd18 100644 --- a/bsd/netinet6/esp_core.c +++ b/bsd/netinet6/esp_core.c @@ -1,3 +1,31 @@ +/* + * Copyright (c) 2008 Apple Inc. All rights reserved. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. The rights granted to you under the License + * may not be used to create, or enable the creation or redistribution of, + * unlawful or unlicensed copies of an Apple operating system, or to + * circumvent, violate, or enable the circumvention or violation of, any + * terms of an Apple operating system software license agreement. + * + * Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ + */ + /* $FreeBSD: src/sys/netinet6/esp_core.c,v 1.1.2.4 2002/03/26 10:12:29 ume Exp $ */ /* $KAME: esp_core.c,v 1.50 2000/11/02 12:27:38 itojun Exp $ */ @@ -250,9 +278,9 @@ esp_schedule(algo, sav) lck_mtx_unlock(sadb_mutex); return 0; } - + sav->schedlen = (*algo->schedlen)(algo); - if (sav->schedlen < 0) { + if ((signed) sav->schedlen < 0) { lck_mtx_unlock(sadb_mutex); return EINVAL; } @@ -504,7 +532,7 @@ esp_blowfish_schedule( lck_mtx_assert(sadb_mutex, LCK_MTX_ASSERT_OWNED); BF_set_key((BF_KEY *)sav->sched, _KEYLEN(sav->key_enc), - _KEYBUF(sav->key_enc)); + (u_int8_t *) _KEYBUF(sav->key_enc)); return 0; } @@ -562,7 +590,7 @@ esp_cast128_schedule( struct secasvar *sav) { lck_mtx_assert(sadb_mutex, LCK_MTX_ASSERT_OWNED); - set_cast128_subkey((u_int32_t *)sav->sched, _KEYBUF(sav->key_enc), + set_cast128_subkey((u_int32_t *)sav->sched, (u_int8_t *) _KEYBUF(sav->key_enc), _KEYLEN(sav->key_enc)); return 0; } @@ -737,7 +765,7 @@ esp_cbc_decrypt(m, off, sav, algo, ivlen) } /* grab iv */ - m_copydata(m, ivoff, ivlen, iv); + m_copydata(m, ivoff, ivlen, (caddr_t) iv); /* extend iv */ if (ivlen == blocklen) @@ -758,7 +786,7 @@ esp_cbc_decrypt(m, off, sav, algo, ivlen) if (m->m_pkthdr.len < bodyoff) { ipseclog((LOG_ERR, "esp_cbc_decrypt %s: bad len %d/%lu\n", - algo->name, m->m_pkthdr.len, (unsigned long)bodyoff)); + algo->name, m->m_pkthdr.len, (u_int32_t)bodyoff)); m_freem(m); return EINVAL; } @@ -799,7 +827,7 @@ esp_cbc_decrypt(m, off, sav, algo, ivlen) sp = mtod(s, u_int8_t *) + sn; } else { /* body is non-continuous */ - m_copydata(s, sn, blocklen, sbuf); + m_copydata(s, sn, blocklen, (caddr_t) sbuf); sp = sbuf; } @@ -938,11 +966,11 @@ esp_cbc_encrypt( /* put iv into the packet. if we are in derived mode, use seqno. */ if (derived) - m_copydata(m, ivoff, ivlen, iv); + m_copydata(m, ivoff, ivlen, (caddr_t) iv); else { bcopy(sav->iv, iv, ivlen); /* maybe it is better to overwrite dest, not source */ - m_copyback(m, ivoff, ivlen, iv); + m_copyback(m, ivoff, ivlen, (caddr_t) iv); } /* extend iv */ @@ -964,14 +992,14 @@ esp_cbc_encrypt( if (m->m_pkthdr.len < bodyoff) { ipseclog((LOG_ERR, "esp_cbc_encrypt %s: bad len %d/%lu\n", - algo->name, m->m_pkthdr.len, (unsigned long)bodyoff)); + algo->name, m->m_pkthdr.len, (u_int32_t)bodyoff)); m_freem(m); return EINVAL; } if ((m->m_pkthdr.len - bodyoff) % blocklen) { ipseclog((LOG_ERR, "esp_cbc_encrypt %s: " "payload length must be multiple of %lu\n", - algo->name, (unsigned long)algo->padbound)); + algo->name, (u_int32_t)algo->padbound)); m_freem(m); return EINVAL; } @@ -1005,7 +1033,7 @@ esp_cbc_encrypt( sp = mtod(s, u_int8_t *) + sn; } else { /* body is non-continuous */ - m_copydata(s, sn, blocklen, sbuf); + m_copydata(s, sn, blocklen, (caddr_t) sbuf); sp = sbuf; } @@ -1136,7 +1164,7 @@ esp_auth(m0, skip, length, sav, sum) if (sizeof(sumbuf) < siz) { ipseclog((LOG_DEBUG, "esp_auth: AH_MAXSUMSIZE is too small: siz=%lu\n", - (u_long)siz)); + (u_int32_t)siz)); KERNEL_DEBUG(DBG_FNC_ESPAUTH | DBG_FUNC_END, 4,0,0,0,0); return EINVAL; } @@ -1165,17 +1193,17 @@ esp_auth(m0, skip, length, sav, sum) panic("mbuf chain?"); if (m->m_len - off < length) { - (*algo->update)(&s, mtod(m, u_char *) + off, + (*algo->update)(&s, (caddr_t)(mtod(m, u_char *) + off), m->m_len - off); length -= m->m_len - off; m = m->m_next; off = 0; } else { - (*algo->update)(&s, mtod(m, u_char *) + off, length); + (*algo->update)(&s, (caddr_t)(mtod(m, u_char *) + off), length); break; } } - (*algo->result)(&s, sumbuf); + (*algo->result)(&s, (caddr_t) sumbuf); bcopy(sumbuf, sum, siz); /*XXX*/ KERNEL_DEBUG(DBG_FNC_ESPAUTH | DBG_FUNC_END, 6,0,0,0,0); return 0; diff --git a/bsd/netinet6/esp_input.c b/bsd/netinet6/esp_input.c index ef9f0af80..b228fb035 100644 --- a/bsd/netinet6/esp_input.c +++ b/bsd/netinet6/esp_input.c @@ -1,3 +1,31 @@ +/* + * Copyright (c) 2008 Apple Inc. All rights reserved. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. The rights granted to you under the License + * may not be used to create, or enable the creation or redistribution of, + * unlawful or unlicensed copies of an Apple operating system, or to + * circumvent, violate, or enable the circumvention or violation of, any + * terms of an Apple operating system software license agreement. + * + * Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ + */ + /* $FreeBSD: src/sys/netinet6/esp_input.c,v 1.1.2.3 2001/07/03 11:01:50 ume Exp $ */ /* $KAME: esp_input.c,v 1.55 2001/03/23 08:08:47 itojun Exp $ */ @@ -233,12 +261,12 @@ esp4_input(m, off) if (AH_MAXSUMSIZE < siz) { ipseclog((LOG_DEBUG, "internal error: AH_MAXSUMSIZE must be larger than %lu\n", - (u_long)siz)); + (u_int32_t)siz)); IPSEC_STAT_INCREMENT(ipsecstat.in_inval); goto bad; } - m_copydata(m, m->m_pkthdr.len - siz, siz, &sum0[0]); + m_copydata(m, m->m_pkthdr.len - siz, siz, (caddr_t) &sum0[0]); if (esp_auth(m, off, m->m_pkthdr.len - off - siz, sav, sum)) { ipseclog((LOG_WARNING, "auth fail in IPv4 ESP input: %s %s\n", @@ -678,12 +706,12 @@ esp6_input(mp, offp) if (AH_MAXSUMSIZE < siz) { ipseclog((LOG_DEBUG, "internal error: AH_MAXSUMSIZE must be larger than %lu\n", - (u_long)siz)); + (u_int32_t)siz)); IPSEC_STAT_INCREMENT(ipsec6stat.in_inval); goto bad; } - m_copydata(m, m->m_pkthdr.len - siz, siz, &sum0[0]); + m_copydata(m, m->m_pkthdr.len - siz, siz, (caddr_t) &sum0[0]); if (esp_auth(m, off, m->m_pkthdr.len - off - siz, sav, sum)) { ipseclog((LOG_WARNING, "auth fail in IPv6 ESP input: %s %s\n", diff --git a/bsd/netinet6/esp_output.c b/bsd/netinet6/esp_output.c index 21e449b7d..8d16d2c62 100644 --- a/bsd/netinet6/esp_output.c +++ b/bsd/netinet6/esp_output.c @@ -1,3 +1,31 @@ +/* + * Copyright (c) 2008 Apple Inc. All rights reserved. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. The rights granted to you under the License + * may not be used to create, or enable the creation or redistribution of, + * unlawful or unlicensed copies of an Apple operating system, or to + * circumvent, violate, or enable the circumvention or violation of, any + * terms of an Apple operating system software license agreement. + * + * Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ + */ + /* $FreeBSD: src/sys/netinet6/esp_output.c,v 1.1.2.3 2002/04/28 05:40:26 suz Exp $ */ /* $KAME: esp_output.c,v 1.44 2001/07/26 06:53:15 jinmei Exp $ */ @@ -176,10 +204,10 @@ esp_hdrsiz(isr) * esp_max_ivlen() = max ivlen for CBC mode * 17 = (maximum padding length without random padding length) * + (Pad Length field) + (Next Header field). - * 16 = maximum ICV we support. + * 64 = maximum ICV we support. * sizeof(struct udphdr) in case NAT traversal is used */ - return sizeof(struct newesp) + esp_max_ivlen() + 17 + 16 + sizeof(struct udphdr); + return sizeof(struct newesp) + esp_max_ivlen() + 17 + AH_MAXSUMSIZE + sizeof(struct udphdr); } /* diff --git a/bsd/netinet6/esp_rijndael.c b/bsd/netinet6/esp_rijndael.c index 0b6df997a..0f3ce7c27 100644 --- a/bsd/netinet6/esp_rijndael.c +++ b/bsd/netinet6/esp_rijndael.c @@ -1,3 +1,31 @@ +/* + * Copyright (c) 2008 Apple Inc. All rights reserved. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. The rights granted to you under the License + * may not be used to create, or enable the creation or redistribution of, + * unlawful or unlicensed copies of an Apple operating system, or to + * circumvent, violate, or enable the circumvention or violation of, any + * terms of an Apple operating system software license agreement. + * + * Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ + */ + /* $FreeBSD: src/sys/netinet6/esp_rijndael.c,v 1.1.2.1 2001/07/03 11:01:50 ume Exp $ */ /* $KAME: esp_rijndael.c,v 1.4 2001/03/02 05:53:05 itojun Exp $ */ @@ -73,8 +101,8 @@ esp_aes_schedule( lck_mtx_assert(sadb_mutex, LCK_MTX_ASSERT_OWNED); aes_ctx *ctx = (aes_ctx*)sav->sched; - aes_decrypt_key(_KEYBUF(sav->key_enc), _KEYLEN(sav->key_enc), &ctx->decrypt); - aes_encrypt_key(_KEYBUF(sav->key_enc), _KEYLEN(sav->key_enc), &ctx->encrypt); + aes_decrypt_key((const unsigned char *) _KEYBUF(sav->key_enc), _KEYLEN(sav->key_enc), &ctx->decrypt); + aes_encrypt_key((const unsigned char *) _KEYBUF(sav->key_enc), _KEYLEN(sav->key_enc), &ctx->encrypt); return 0; } @@ -146,7 +174,7 @@ esp_cbc_decrypt_aes(m, off, sav, algo, ivlen) if (m->m_pkthdr.len < bodyoff) { ipseclog((LOG_ERR, "esp_cbc_decrypt %s: bad len %d/%lu\n", - algo->name, m->m_pkthdr.len, (unsigned long)bodyoff)); + algo->name, m->m_pkthdr.len, (u_int32_t)bodyoff)); m_freem(m); return EINVAL; } @@ -159,7 +187,7 @@ esp_cbc_decrypt_aes(m, off, sav, algo, ivlen) } /* grab iv */ - m_copydata(m, ivoff, ivlen, iv); + m_copydata(m, ivoff, ivlen, (caddr_t) iv); s = m; soff = sn = dn = 0; @@ -192,7 +220,7 @@ esp_cbc_decrypt_aes(m, off, sav, algo, ivlen) len -= len % AES_BLOCKLEN; // full blocks only } else { /* body is non-continuous */ - m_copydata(s, sn, AES_BLOCKLEN, sbuf); + m_copydata(s, sn, AES_BLOCKLEN, (caddr_t) sbuf); sp = sbuf; len = AES_BLOCKLEN; // 1 block only in sbuf } @@ -305,11 +333,11 @@ esp_cbc_encrypt_aes( /* put iv into the packet */ m_copyback(m, ivoff, ivlen, sav->iv); - ivp = sav->iv; + ivp = (u_int8_t *) sav->iv; if (m->m_pkthdr.len < bodyoff) { ipseclog((LOG_ERR, "esp_cbc_encrypt %s: bad len %d/%lu\n", - algo->name, m->m_pkthdr.len, (unsigned long)bodyoff)); + algo->name, m->m_pkthdr.len, (u_int32_t)bodyoff)); m_freem(m); return EINVAL; } @@ -352,7 +380,7 @@ esp_cbc_encrypt_aes( len -= len % AES_BLOCKLEN; // full blocks only } else { /* body is non-continuous */ - m_copydata(s, sn, AES_BLOCKLEN, sbuf); + m_copydata(s, sn, AES_BLOCKLEN, (caddr_t) sbuf); sp = sbuf; len = AES_BLOCKLEN; // 1 block only in sbuf } diff --git a/bsd/netinet6/esp_rijndael.h b/bsd/netinet6/esp_rijndael.h index 9d60216a9..1436da6b2 100644 --- a/bsd/netinet6/esp_rijndael.h +++ b/bsd/netinet6/esp_rijndael.h @@ -1,3 +1,31 @@ +/* + * Copyright (c) 2008 Apple Inc. All rights reserved. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. The rights granted to you under the License + * may not be used to create, or enable the creation or redistribution of, + * unlawful or unlicensed copies of an Apple operating system, or to + * circumvent, violate, or enable the circumvention or violation of, any + * terms of an Apple operating system software license agreement. + * + * Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ + */ + /* $FreeBSD: src/sys/netinet6/esp_rijndael.h,v 1.1.2.1 2001/07/03 11:01:50 ume Exp $ */ /* $KAME: esp_rijndael.h,v 1.1 2000/09/20 18:15:22 itojun Exp $ */ @@ -40,4 +68,4 @@ int esp_cbc_encrypt_aes(struct mbuf *, size_t, size_t, struct secasvar *, const struct esp_algorithm *, int); -#endif KERNEL_PRIVATE +#endif /* KERNEL_PRIVATE */ diff --git a/bsd/netinet6/frag6.c b/bsd/netinet6/frag6.c index aaac2e2b9..ea75e5acb 100644 --- a/bsd/netinet6/frag6.c +++ b/bsd/netinet6/frag6.c @@ -1,3 +1,31 @@ +/* + * Copyright (c) 2008 Apple Inc. All rights reserved. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. The rights granted to you under the License + * may not be used to create, or enable the creation or redistribution of, + * unlawful or unlicensed copies of an Apple operating system, or to + * circumvent, violate, or enable the circumvention or violation of, any + * terms of an Apple operating system software license agreement. + * + * Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ + */ + /* $FreeBSD: src/sys/netinet6/frag6.c,v 1.2.2.5 2001/07/03 11:01:50 ume Exp $ */ /* $KAME: frag6.c,v 1.31 2001/05/17 13:45:34 jinmei Exp $ */ @@ -147,8 +175,9 @@ frag6_input(mp, offp) int first_frag = 0; int fragoff, frgpartlen; /* must be larger than u_int16_t */ struct ifnet *dstifp; + struct ifaddr *ifa = NULL; #ifdef IN6_IFSTAT_STRICT - static struct route_in6 ro; + struct route_in6 ro; struct sockaddr_in6 *dst; #endif @@ -165,22 +194,23 @@ frag6_input(mp, offp) dstifp = NULL; #ifdef IN6_IFSTAT_STRICT /* find the destination interface of the packet. */ + bzero(&ro, sizeof (ro)); dst = (struct sockaddr_in6 *)&ro.ro_dst; - if (ro.ro_rt - && ((ro.ro_rt->rt_flags & RTF_UP) == 0 - || !IN6_ARE_ADDR_EQUAL(&dst->sin6_addr, &ip6->ip6_dst))) { + dst->sin6_family = AF_INET6; + dst->sin6_len = sizeof (struct sockaddr_in6); + dst->sin6_addr = ip6->ip6_dst; + + rtalloc((struct route *)&ro); + if (ro.ro_rt != NULL) { + RT_LOCK(ro.ro_rt); + if ((ifa = ro.ro_rt->rt_ifa) != NULL) { + ifaref(ifa); + dstifp = ((struct in6_ifaddr *)ro.ro_rt->rt_ifa)->ia_ifp; + } + RT_UNLOCK(ro.ro_rt); rtfree(ro.ro_rt); - ro.ro_rt = (struct rtentry *)0; + ro.ro_rt = NULL; } - if (ro.ro_rt == NULL) { - bzero(dst, sizeof(*dst)); - dst->sin6_family = AF_INET6; - dst->sin6_len = sizeof(struct sockaddr_in6); - dst->sin6_addr = ip6->ip6_dst; - } - rtalloc((struct route *)&ro); - if (ro.ro_rt != NULL && ro.ro_rt->rt_ifa != NULL) - dstifp = ((struct in6_ifaddr *)ro.ro_rt->rt_ifa)->ia_ifp; #else /* we are violating the spec, this is not the destination interface */ if ((m->m_flags & M_PKTHDR) != 0) @@ -191,6 +221,8 @@ frag6_input(mp, offp) if (ip6->ip6_plen == 0) { icmp6_error(m, ICMP6_PARAM_PROB, ICMP6_PARAMPROB_HEADER, offset); in6_ifstat_inc(dstifp, ifs6_reass_fail); + if (ifa != NULL) + ifafree(ifa); return IPPROTO_DONE; } @@ -206,6 +238,8 @@ frag6_input(mp, offp) ICMP6_PARAMPROB_HEADER, offsetof(struct ip6_hdr, ip6_plen)); in6_ifstat_inc(dstifp, ifs6_reass_fail); + if (ifa != NULL) + ifafree(ifa); return IPPROTO_DONE; } @@ -297,6 +331,8 @@ frag6_input(mp, offp) offset - sizeof(struct ip6_frag) + offsetof(struct ip6_frag, ip6f_offlg)); frag6_doing_reass = 0; + if (ifa != NULL) + ifafree(ifa); return(IPPROTO_DONE); } } @@ -305,6 +341,8 @@ frag6_input(mp, offp) offset - sizeof(struct ip6_frag) + offsetof(struct ip6_frag, ip6f_offlg)); frag6_doing_reass = 0; + if (ifa != NULL) + ifafree(ifa); return(IPPROTO_DONE); } /* @@ -462,12 +500,16 @@ frag6_input(mp, offp) af6 = af6->ip6af_down) { if (af6->ip6af_off != next) { frag6_doing_reass = 0; + if (ifa != NULL) + ifafree(ifa); return IPPROTO_DONE; } next += af6->ip6af_frglen; } if (af6->ip6af_up->ip6af_mff) { frag6_doing_reass = 0; + if (ifa != NULL) + ifafree(ifa); return IPPROTO_DONE; } @@ -553,6 +595,8 @@ frag6_input(mp, offp) *offp = offset; frag6_doing_reass = 0; + if (ifa != NULL) + ifafree(ifa); return nxt; dropfrag: @@ -560,6 +604,8 @@ frag6_input(mp, offp) ip6stat.ip6s_fragdropped++; m_freem(m); frag6_doing_reass = 0; + if (ifa != NULL) + ifafree(ifa); return IPPROTO_DONE; } diff --git a/bsd/netinet6/icmp6.c b/bsd/netinet6/icmp6.c index db0662895..02d19734f 100644 --- a/bsd/netinet6/icmp6.c +++ b/bsd/netinet6/icmp6.c @@ -1,3 +1,31 @@ +/* + * Copyright (c) 2008 Apple Inc. All rights reserved. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. The rights granted to you under the License + * may not be used to create, or enable the creation or redistribution of, + * unlawful or unlicensed copies of an Apple operating system, or to + * circumvent, violate, or enable the circumvention or violation of, any + * terms of an Apple operating system software license agreement. + * + * Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ + */ + /* $FreeBSD: src/sys/netinet6/icmp6.c,v 1.6.2.6 2001/07/10 09:44:16 ume Exp $ */ /* $KAME: icmp6.c,v 1.211 2001/04/04 05:56:20 itojun Exp $ */ @@ -109,8 +137,8 @@ extern struct domain inet6domain; extern struct ip6protosw inet6sw[]; extern struct ip6protosw *ip6_protox[]; -extern u_long rip_sendspace; -extern u_long rip_recvspace; +extern uint32_t rip_sendspace; +extern uint32_t rip_recvspace; struct icmp6stat icmp6stat; @@ -142,6 +170,10 @@ static int ni6_store_addrs(struct icmp6_nodeinfo *, struct icmp6_nodeinfo *, static int icmp6_notify_error(struct mbuf *, int, int, int); #ifdef COMPAT_RFC1885 +/* + * XXX: Compiled out for now, but if enabled we must use a lock for accesses, + * or easier, define it locally inside icmp6_reflect() and don't cache. + */ static struct route_in6 icmp6_reflect_rt; #endif @@ -436,6 +468,7 @@ icmp6_input(mp, offp) if (icmp6->icmp6_type < ICMP6_INFOMSG_MASK) icmp6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_error); + switch (icmp6->icmp6_type) { case ICMP6_DST_UNREACH: icmp6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_dstunreach); @@ -466,6 +499,7 @@ icmp6_input(mp, offp) default: goto badcode; } + goto deliver; break; @@ -1095,7 +1129,7 @@ icmp6_notify_error(m, off, icmp6len, code) } return(0); - freeit: +freeit: m_freem(m); return(-1); } @@ -1114,7 +1148,6 @@ icmp6_mtudisc_update(ip6cp, validated) if (!validated) return; - /* * In case the suggested mtu is less than IPV6_MMTU, we * only need to remember that it was for above mentioned @@ -1124,7 +1157,6 @@ icmp6_mtudisc_update(ip6cp, validated) if (mtu < IPV6_MMTU) mtu = IPV6_MMTU - 8; - bzero(&sin6, sizeof(sin6)); sin6.sin6_family = PF_INET6; sin6.sin6_len = sizeof(struct sockaddr_in6); @@ -1136,20 +1168,22 @@ icmp6_mtudisc_update(ip6cp, validated) } /* sin6.sin6_scope_id = XXX: should be set if DST is a scoped addr */ rt = rtalloc1((struct sockaddr *)&sin6, 0, RTF_CLONING | RTF_PRCLONING); - - if (rt && (rt->rt_flags & RTF_HOST) - && !(rt->rt_rmx.rmx_locks & RTV_MTU)) { - if (mtu < IPV6_MMTU) { - /* xxx */ - rt->rt_rmx.rmx_locks |= RTV_MTU; - } else if (mtu < rt->rt_ifp->if_mtu && - rt->rt_rmx.rmx_mtu > mtu) { - icmp6stat.icp6s_pmtuchg++; - rt->rt_rmx.rmx_mtu = mtu; + if (rt != NULL) { + RT_LOCK(rt); + if ((rt->rt_flags & RTF_HOST) && + !(rt->rt_rmx.rmx_locks & RTV_MTU)) { + if (mtu < IPV6_MMTU) { + /* xxx */ + rt->rt_rmx.rmx_locks |= RTV_MTU; + } else if (mtu < rt->rt_ifp->if_mtu && + rt->rt_rmx.rmx_mtu > mtu) { + icmp6stat.icp6s_pmtuchg++; + rt->rt_rmx.rmx_mtu = mtu; + } } - } - if (rt) + RT_UNLOCK(rt); rtfree(rt); + } } /* @@ -2006,12 +2040,13 @@ icmp6_reflect(m, off) int mtu = IPV6_MMTU; struct sockaddr_in6 *sin6 = &icmp6_reflect_rt.ro_dst; #endif + u_int32_t oflow; /* too short to reflect */ if (off < sizeof(struct ip6_hdr)) { nd6log((LOG_DEBUG, "sanity fail: off=%lx, sizeof(ip6)=%lx in %s:%d\n", - (u_long)off, (u_long)sizeof(struct ip6_hdr), + (u_int32_t)off, (u_int32_t)sizeof(struct ip6_hdr), __FILE__, __LINE__)); goto bad; } @@ -2085,7 +2120,9 @@ icmp6_reflect(m, off) * does not fit in with (return) path MTU, but the description was * removed in the new spec. */ - if (icmp6_reflect_rt.ro_rt == 0 || + if (icmp6_reflect_rt.ro_rt == NULL || + !(icmp6_reflect_rt.ro_rt->rt_flags & RTF_UP) || + icmp6_reflect_rt.ro_rt->generation_id != route_generation || ! (IN6_ARE_ADDR_EQUAL(&sin6->sin6_addr, &ip6->ip6_dst))) { if (icmp6_reflect_rt.ro_rt) { rtfree(icmp6_reflect_rt.ro_rt); @@ -2103,9 +2140,11 @@ icmp6_reflect(m, off) if (icmp6_reflect_rt.ro_rt == 0) goto bad; + RT_LOCK(icmp6_reflect_rt.ro_rt); if ((icmp6_reflect_rt.ro_rt->rt_flags & RTF_HOST) && mtu < icmp6_reflect_rt.ro_rt->rt_ifp->if_mtu) mtu = icmp6_reflect_rt.ro_rt->rt_rmx.rmx_mtu; + RT_UNLOCK(icmp6_reflect_rt.ro_rt); if (mtu < m->m_pkthdr.len) { plen -= (m->m_pkthdr.len - mtu); @@ -2146,10 +2185,8 @@ icmp6_reflect(m, off) */ bzero(&ro, sizeof(ro)); src = in6_selectsrc(&sa6_src, NULL, NULL, &ro, NULL, &src_storage, &e); - if (ro.ro_rt) { + if (ro.ro_rt) rtfree(ro.ro_rt); /* XXX: we could use this */ - ro.ro_rt = NULL; - } if (src == NULL) { nd6log((LOG_DEBUG, "icmp6_reflect: source can't be determined: " @@ -2161,16 +2198,23 @@ icmp6_reflect(m, off) ip6->ip6_src = *src; + oflow = ip6->ip6_flow; /* Save for later */ ip6->ip6_flow = 0; ip6->ip6_vfc &= ~IPV6_VERSION_MASK; ip6->ip6_vfc |= IPV6_VERSION; + if (icmp6->icmp6_type == ICMP6_ECHO_REPLY && icmp6->icmp6_code == 0) { + ip6->ip6_flow |= (oflow & htonl(0x0ff00000)); + } ip6->ip6_nxt = IPPROTO_ICMPV6; - if (m->m_pkthdr.rcvif) { + if (m->m_pkthdr.rcvif && m->m_pkthdr.rcvif->if_index < nd_ifinfo_indexlim) { /* XXX: This may not be the outgoing interface */ + lck_rw_lock_shared(nd_if_rwlock); ip6->ip6_hlim = nd_ifinfo[m->m_pkthdr.rcvif->if_index].chlim; - } else + lck_rw_done(nd_if_rwlock); + } else { ip6->ip6_hlim = ip6_defhlim; - + } + /* Use the same traffic class as in the request to match IPv4 */ icmp6->icmp6_cksum = 0; icmp6->icmp6_cksum = in6_cksum(m, IPPROTO_ICMPV6, sizeof(struct ip6_hdr), plen); @@ -2291,14 +2335,16 @@ icmp6_redirect_input(m, off) sin6.sin6_family = AF_INET6; sin6.sin6_len = sizeof(struct sockaddr_in6); bcopy(&reddst6, &sin6.sin6_addr, sizeof(reddst6)); - rt = rtalloc1((struct sockaddr *)&sin6, 0, 0UL); + rt = rtalloc1((struct sockaddr *)&sin6, 0, 0); if (rt) { + RT_LOCK(rt); if (rt->rt_gateway == NULL || rt->rt_gateway->sa_family != AF_INET6) { nd6log((LOG_ERR, "ICMP6 redirect rejected; no route " "with inet6 gateway found for redirect dst: %s\n", icmp6_redirect_diag(&src6, &reddst6, &redtgt6))); + RT_UNLOCK(rt); rtfree(rt); goto bad; } @@ -2311,6 +2357,7 @@ icmp6_redirect_input(m, off) "%s\n", ip6_sprintf(gw6), icmp6_redirect_diag(&src6, &reddst6, &redtgt6))); + RT_UNLOCK(rt); rtfree(rt); goto bad; } @@ -2321,6 +2368,7 @@ icmp6_redirect_input(m, off) icmp6_redirect_diag(&src6, &reddst6, &redtgt6))); goto bad; } + RT_UNLOCK(rt); rtfree(rt); rt = NULL; } @@ -2449,6 +2497,9 @@ icmp6_redirect_output(m0, rt) icmp6_errcount(&icmp6stat.icp6s_outerrhist, ND_REDIRECT, 0); + if (rt != NULL) + RT_LOCK(rt); + /* sanity check */ if (!m0 || !rt || !(rt->rt_flags & RTF_UP) || !(ifp = rt->rt_ifp)) goto fail; @@ -2470,8 +2521,13 @@ icmp6_redirect_output(m0, rt) src_sa.sin6_addr = sip6->ip6_src; /* we don't currently use sin6_scope_id, but eventually use it */ src_sa.sin6_scope_id = in6_addr2scopeid(ifp, &sip6->ip6_src); - if (nd6_is_addr_neighbor(&src_sa, ifp, 0) == 0) + RT_UNLOCK(rt); + if (nd6_is_addr_neighbor(&src_sa, ifp, 0) == 0) { + /* already unlocked */ + rt = NULL; goto fail; + } + RT_LOCK(rt); if (IN6_IS_ADDR_MULTICAST(&sip6->ip6_dst)) goto fail; /* what should we do here? */ @@ -2558,6 +2614,8 @@ icmp6_redirect_output(m0, rt) bcopy(&sip6->ip6_dst, &nd_rd->nd_rd_dst, sizeof(nd_rd->nd_rd_dst)); } + RT_UNLOCK(rt); + rt = NULL; p = (u_char *)(nd_rd + 1); @@ -2572,14 +2630,19 @@ icmp6_redirect_output(m0, rt) struct nd_opt_hdr *nd_opt; char *lladdr; + /* Callee returns a locked route upon success */ rt_router = nd6_lookup(router_ll6, 0, ifp, 0); if (!rt_router) goto nolladdropt; + RT_LOCK_ASSERT_HELD(rt_router); len = sizeof(*nd_opt) + ifp->if_addrlen; len = (len + 7) & ~7; /* round by 8 */ /* safety check */ - if (len + (p - (u_char *)ip6) > maxlen) + if (len + (p - (u_char *)ip6) > maxlen) { + RT_REMREF_LOCKED(rt_router); + RT_UNLOCK(rt_router); goto nolladdropt; + } if (!(rt_router->rt_flags & RTF_GATEWAY) && (rt_router->rt_flags & RTF_LLINFO) && (rt_router->rt_gateway->sa_family == AF_LINK) && @@ -2592,6 +2655,8 @@ icmp6_redirect_output(m0, rt) bcopy(LLADDR(sdl), lladdr, ifp->if_addrlen); p += len; } + RT_REMREF_LOCKED(rt_router); + RT_UNLOCK(rt_router); } nolladdropt:; @@ -2713,6 +2778,8 @@ noredhdropt:; return; fail: + if (rt != NULL) + RT_UNLOCK(rt); if (m) m_freem(m); if (m0) @@ -2842,6 +2909,8 @@ icmp6_dgram_ctloutput(struct socket *so, struct sockopt *sopt) case IPV6_LEAVE_GROUP: case IPV6_PORTRANGE: case IPV6_IPSEC_POLICY: + case IPV6_RECVTCLASS: + case IPV6_TCLASS: return ip6_ctloutput(so, sopt); default: @@ -2898,10 +2967,16 @@ icmp6_dgram_send(struct socket *so, __unused int flags, struct mbuf *m, struct s icmp6 = mtod(m, struct icmp6_hdr *); /* - * Allow only to send echo request type 128 with code 0 + * Allow only to send echo request and node information request * See RFC 2463 for Echo Request Message format */ - if (icmp6->icmp6_type != 128 || icmp6->icmp6_code != 0) { + if ((icmp6->icmp6_type == ICMP6_ECHO_REQUEST && icmp6->icmp6_code == 0) || + (icmp6->icmp6_type == ICMP6_NI_QUERY && + (icmp6->icmp6_code == ICMP6_NI_SUBJ_IPV6 || + icmp6->icmp6_code == ICMP6_NI_SUBJ_FQDN))) { + /* Good */ + ; + } else { error = EPERM; goto bad; } diff --git a/bsd/netinet6/in6.c b/bsd/netinet6/in6.c index c5da17932..5fc80e330 100644 --- a/bsd/netinet6/in6.c +++ b/bsd/netinet6/in6.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2003-2007 Apple Inc. All rights reserved. + * Copyright (c) 2003-2008 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -106,7 +106,11 @@ #include #include #include + #include +#include +#include +#include #include #include @@ -137,6 +141,10 @@ #include +#if PF +#include +#endif /* PF */ + #ifndef __APPLE__ MALLOC_DEFINE(M_IPMADDR, "in6_multi", "internet multicast address"); #endif @@ -166,11 +174,41 @@ static int in6_lifaddr_ioctl(struct socket *, u_long, caddr_t, static int in6_ifinit(struct ifnet *, struct in6_ifaddr *, struct sockaddr_in6 *, int); static void in6_unlink_ifa(struct in6_ifaddr *, struct ifnet *, int); +static struct in6_ifaddr *in6_ifaddr_alloc(int); +static void in6_ifaddr_free(struct ifaddr *); +static void in6_ifaddr_trace(struct ifaddr *, int); +static struct in6_aliasreq *in6_aliasreq_to_native(void *, int, + struct in6_aliasreq *); struct in6_multihead in6_multihead; /* XXX BSS initialization */ extern lck_mtx_t *nd6_mutex; +extern lck_mtx_t *ip6_mutex; extern int in6_init2done; +struct in6_ifaddr_dbg { + struct in6_ifaddr in6ifa; /* in6_ifaddr */ + struct in6_ifaddr in6ifa_old; /* saved in6_ifaddr */ + u_int16_t in6ifa_refhold_cnt; /* # of ifaref */ + u_int16_t in6ifa_refrele_cnt; /* # of ifafree */ + /* + * Alloc and free callers. + */ + ctrace_t in6ifa_alloc; + ctrace_t in6ifa_free; + /* + * Circular lists of ifaref and ifafree callers. + */ + ctrace_t in6ifa_refhold[CTRACE_HIST_SIZE]; + ctrace_t in6ifa_refrele[CTRACE_HIST_SIZE]; +}; + +static unsigned int in6ifa_debug; /* debug flags */ +static unsigned int in6ifa_size; /* size of zone element */ +static struct zone *in6ifa_zone; /* zone for in6_ifaddr */ + +#define IN6IFA_ZONE_MAX 64 /* maximum elements in zone */ +#define IN6IFA_ZONE_NAME "in6_ifaddr" /* zone name */ + /* * Subroutine for in6_ifaddloop() and in6_ifremloop(). * This routine does actual work. @@ -181,7 +219,7 @@ in6_ifloop_request(int cmd, struct ifaddr *ifa) struct sockaddr_in6 all1_sa; struct rtentry *nrt = NULL; int e; - + bzero(&all1_sa, sizeof(all1_sa)); all1_sa.sin6_family = AF_INET6; all1_sa.sin6_len = sizeof(struct sockaddr_in6); @@ -195,6 +233,7 @@ in6_ifloop_request(int cmd, struct ifaddr *ifa) * (probably implicitly) set nd6_rtrequest() to ifa->ifa_rtrequest, * which changes the outgoing interface to the loopback interface. */ + lck_mtx_lock(rnh_lock); e = rtrequest_locked(cmd, ifa->ifa_addr, ifa->ifa_addr, (struct sockaddr *)&all1_sa, RTF_UP|RTF_HOST|RTF_LLINFO, &nrt); @@ -206,6 +245,8 @@ in6_ifloop_request(int cmd, struct ifaddr *ifa) e); } + if (nrt != NULL) + RT_LOCK(nrt); /* * Make sure rt_ifa be equal to IFA, the second argument of the * function. @@ -223,15 +264,18 @@ in6_ifloop_request(int cmd, struct ifaddr *ifa) * we end up reporting twice in such a case. Should we rather * omit the second report? */ - if (nrt) { + if (nrt != NULL) { rt_newaddrmsg(cmd, ifa, e, nrt); if (cmd == RTM_DELETE) { + RT_UNLOCK(nrt); rtfree_locked(nrt); } else { /* the cmd must be RTM_ADD here */ - rtunref(nrt); + RT_REMREF_LOCKED(nrt); + RT_UNLOCK(nrt); } } + lck_mtx_unlock(rnh_lock); } /* @@ -246,15 +290,21 @@ in6_ifaddloop(struct ifaddr *ifa) { struct rtentry *rt; - lck_mtx_lock(rt_mtx); /* If there is no loopback entry, allocate one. */ - rt = rtalloc1_locked(ifa->ifa_addr, 0, 0UL); + rt = rtalloc1(ifa->ifa_addr, 0, 0); + if (rt != NULL) + RT_LOCK(rt); if (rt == NULL || (rt->rt_flags & RTF_HOST) == 0 || - (rt->rt_ifp->if_flags & IFF_LOOPBACK) == 0) + (rt->rt_ifp->if_flags & IFF_LOOPBACK) == 0) { + if (rt != NULL) { + RT_REMREF_LOCKED(rt); + RT_UNLOCK(rt); + } in6_ifloop_request(RTM_ADD, ifa); - if (rt) - rtunref(rt); - lck_mtx_unlock(rt_mtx); + } else if (rt != NULL) { + RT_REMREF_LOCKED(rt); + RT_UNLOCK(rt); + } } /* @@ -305,17 +355,23 @@ in6_ifremloop(struct ifaddr *ifa, int locked) * a subnet-router anycast address on an interface attahced * to a shared medium. */ - lck_mtx_lock(rt_mtx); - rt = rtalloc1_locked(ifa->ifa_addr, 0, 0UL); - if (rt != NULL && (rt->rt_flags & RTF_HOST) != 0 && - (rt->rt_ifp->if_flags & IFF_LOOPBACK) != 0) { - rtunref(rt); - in6_ifloop_request(RTM_DELETE, ifa); + rt = rtalloc1(ifa->ifa_addr, 0, 0); + if (rt != NULL) { + RT_LOCK(rt); + if ((rt->rt_flags & RTF_HOST) != 0 && + (rt->rt_ifp->if_flags & IFF_LOOPBACK) != 0) { + RT_REMREF_LOCKED(rt); + RT_UNLOCK(rt); + in6_ifloop_request(RTM_DELETE, ifa); + } else { + RT_UNLOCK(rt); + } } - lck_mtx_unlock(rt_mtx); } } +#if 0 +/* Not used */ int in6_ifindex2scopeid(idx) int idx; @@ -324,10 +380,12 @@ in6_ifindex2scopeid(idx) struct ifaddr *ifa; struct sockaddr_in6 *sin6; - if (idx < 0 || if_index < idx) + ifnet_head_lock_shared(); + if (idx <= 0 || if_index < idx) { + ifnet_head_done(); return -1; + } - ifnet_head_lock_shared(); ifp = ifindex2ifnet[idx]; ifnet_head_done(); @@ -338,14 +396,17 @@ in6_ifindex2scopeid(idx) continue; sin6 = (struct sockaddr_in6 *)ifa->ifa_addr; if (IN6_IS_ADDR_SITELOCAL(&sin6->sin6_addr)) { + int scopeid = sin6->sin6_scope_id & 0xffff; ifnet_lock_done(ifp); - return sin6->sin6_scope_id & 0xffff; + return scopeid; } } ifnet_lock_done(ifp); return -1; } +#endif + int in6_mask2len(mask, lim0) @@ -399,68 +460,117 @@ in6_len2mask(mask, len) mask->s6_addr8[i] = (0xff00 >> (len % 8)) & 0xff; } +void +in6_aliasreq_64_to_32(struct in6_aliasreq_64 *src, struct in6_aliasreq_32 *dst) +{ + bzero(dst, sizeof (*dst)); + bcopy(src->ifra_name, dst->ifra_name, sizeof (dst->ifra_name)); + dst->ifra_addr = src->ifra_addr; + dst->ifra_dstaddr = src->ifra_dstaddr; + dst->ifra_prefixmask = src->ifra_prefixmask; + dst->ifra_flags = src->ifra_flags; + dst->ifra_lifetime.ia6t_expire = src->ifra_lifetime.ia6t_expire; + dst->ifra_lifetime.ia6t_preferred = src->ifra_lifetime.ia6t_preferred; + dst->ifra_lifetime.ia6t_vltime = src->ifra_lifetime.ia6t_vltime; + dst->ifra_lifetime.ia6t_pltime = src->ifra_lifetime.ia6t_pltime; +} + +void +in6_aliasreq_32_to_64(struct in6_aliasreq_32 *src, struct in6_aliasreq_64 *dst) +{ + bzero(dst, sizeof (*dst)); + bcopy(src->ifra_name, dst->ifra_name, sizeof (dst->ifra_name)); + dst->ifra_addr = src->ifra_addr; + dst->ifra_dstaddr = src->ifra_dstaddr; + dst->ifra_prefixmask = src->ifra_prefixmask; + dst->ifra_flags = src->ifra_flags; + dst->ifra_lifetime.ia6t_expire = src->ifra_lifetime.ia6t_expire; + dst->ifra_lifetime.ia6t_preferred = src->ifra_lifetime.ia6t_preferred; + dst->ifra_lifetime.ia6t_vltime = src->ifra_lifetime.ia6t_vltime; + dst->ifra_lifetime.ia6t_pltime = src->ifra_lifetime.ia6t_pltime; +} + +static struct in6_aliasreq * +in6_aliasreq_to_native(void *data, int data_is_64, struct in6_aliasreq *dst) +{ +#if defined(__LP64__) + if (data_is_64) + dst = data; + else + in6_aliasreq_32_to_64((struct in6_aliasreq_32 *)data, + (struct in6_aliasreq_64 *)dst); +#else + if (data_is_64) + in6_aliasreq_64_to_32((struct in6_aliasreq_64 *)data, + (struct in6_aliasreq_32 *)dst); + else + dst = data; +#endif /* __LP64__ */ + return (dst); +} + #define ifa2ia6(ifa) ((struct in6_ifaddr *)(ifa)) #define ia62ifa(ia6) (&((ia6)->ia_ifa)) int -in6_control(so, cmd, data, ifp, p) - struct socket *so; - u_long cmd; - caddr_t data; - struct ifnet *ifp; - struct proc *p; +in6_control(struct socket *so, u_long cmd, caddr_t data, struct ifnet *ifp, + struct proc *p) { struct in6_ifreq *ifr = (struct in6_ifreq *)data; struct in6_ifaddr *ia = NULL; - struct in6_aliasreq *ifra = (struct in6_aliasreq *)data; - int privileged, error = 0; - int index; + struct in6_aliasreq sifra; + struct in6_aliasreq *ifra = NULL; + struct sockaddr_in6 *sa6; + int index, privileged, error = 0; struct timeval timenow; + int p64 = proc_is64bit(p); getmicrotime(&timenow); - privileged = 0; -#ifdef __APPLE__ - if (p == NULL || !proc_suser(p)) -#else - if (p == NULL || !suser(p)) -#endif - privileged++; + privileged = (proc_suser(p) == 0); switch (cmd) { case SIOCGETSGCNT_IN6: - case SIOCGETMIFCNT_IN6: + case SIOCGETMIFCNT_IN6_32: + case SIOCGETMIFCNT_IN6_64: return (mrt6_ioctl(cmd, data)); } if (ifp == NULL) - return(EOPNOTSUPP); + return (EOPNOTSUPP); switch (cmd) { case SIOCAUTOCONF_START: case SIOCAUTOCONF_STOP: - case SIOCLL_START: + case SIOCLL_START_32: + case SIOCLL_START_64: case SIOCLL_STOP: - case SIOCPROTOATTACH_IN6: + case SIOCPROTOATTACH_IN6_32: + case SIOCPROTOATTACH_IN6_64: case SIOCPROTODETACH_IN6: if (!privileged) - return(EPERM); + return (EPERM); break; case SIOCSNDFLUSH_IN6: case SIOCSPFXFLUSH_IN6: case SIOCSRTRFLUSH_IN6: - case SIOCSDEFIFACE_IN6: + case SIOCSDEFIFACE_IN6_32: + case SIOCSDEFIFACE_IN6_64: case SIOCSIFINFO_FLAGS: if (!privileged) - return(EPERM); + return (EPERM); /* fall through */ case OSIOCGIFINFO_IN6: case SIOCGIFINFO_IN6: - case SIOCGDRLST_IN6: - case SIOCGPRLST_IN6: - case SIOCGNBRINFO_IN6: - case SIOCGDEFIFACE_IN6: - return(nd6_ioctl(cmd, data, ifp)); + case SIOCGDRLST_IN6_32: + case SIOCGDRLST_IN6_64: + case SIOCGPRLST_IN6_32: + case SIOCGPRLST_IN6_64: + case SIOCGNBRINFO_IN6_32: + case SIOCGNBRINFO_IN6_64: + case SIOCGDEFIFACE_IN6_32: + case SIOCGDEFIFACE_IN6_64: + return (nd6_ioctl(cmd, data, ifp)); } switch (cmd) { @@ -473,21 +583,22 @@ in6_control(so, cmd, data, ifp, p) log(LOG_NOTICE, "prefix ioctls are now invalidated. " "please use ifconfig.\n"); - return(EOPNOTSUPP); + return (EOPNOTSUPP); } switch (cmd) { case SIOCSSCOPE6: if (!privileged) - return(EPERM); - return(scope6_set(ifp, ifr->ifr_ifru.ifru_scope_id)); - break; + return (EPERM); + return (scope6_set(ifp, ifr->ifr_ifru.ifru_scope_id)); + /* NOTREACHED */ + case SIOCGSCOPE6: - return(scope6_get(ifp, ifr->ifr_ifru.ifru_scope_id)); - break; + return (scope6_get(ifp, ifr->ifr_ifru.ifru_scope_id)); + /* NOTREACHED */ + case SIOCGSCOPE6DEF: - return(scope6_get_default(ifr->ifr_ifru.ifru_scope_id)); - break; + return (scope6_get_default(ifr->ifr_ifru.ifru_scope_id)); } switch (cmd) { @@ -497,10 +608,52 @@ in6_control(so, cmd, data, ifp, p) return(EPERM); /* fall through */ case SIOCGLIFADDR: - return in6_lifaddr_ioctl(so, cmd, data, ifp, p); + return (in6_lifaddr_ioctl(so, cmd, data, ifp, p)); + } + + /* + * Point ifra and sa6 to the right places depending on the command. + */ + switch (cmd) { + case SIOCLL_START_32: + case SIOCAIFADDR_IN6_32: + /* + * Convert user ifra to the kernel form, when appropriate. + * This allows the conversion between different data models + * to be centralized, so that it can be passed around to other + * routines that are expecting the kernel form. + */ + ifra = in6_aliasreq_to_native(data, 0, &sifra); + sa6 = (struct sockaddr_in6 *)&ifra->ifra_addr; + break; + + case SIOCLL_START_64: + case SIOCAIFADDR_IN6_64: + ifra = in6_aliasreq_to_native(data, 1, &sifra); + sa6 = (struct sockaddr_in6 *)&ifra->ifra_addr; + break; + + case SIOCSIFADDR_IN6: /* deprecated */ + case SIOCGIFADDR_IN6: + case SIOCSIFDSTADDR_IN6: /* deprecated */ + case SIOCSIFNETMASK_IN6: /* deprecated */ + case SIOCGIFDSTADDR_IN6: + case SIOCGIFNETMASK_IN6: + case SIOCDIFADDR_IN6: + case SIOCGIFPSRCADDR_IN6: + case SIOCGIFPDSTADDR_IN6: + case SIOCGIFAFLAG_IN6: + case SIOCGIFALIFETIME_IN6: + case SIOCSIFALIFETIME_IN6: + case SIOCGIFSTAT_IN6: + case SIOCGIFSTAT_ICMP6: + sa6 = &ifr->ifr_addr; + break; + + default: + sa6 = NULL; + break; } - -#ifdef __APPLE__ switch (cmd) { @@ -509,119 +662,119 @@ in6_control(so, cmd, data, ifp, p) ifp->if_eflags |= IFEF_ACCEPT_RTADVD; ifnet_lock_done(ifp); return (0); + /* NOTREACHED */ - case SIOCAUTOCONF_STOP: - { - struct in6_ifaddr *nia = NULL; - - ifnet_lock_exclusive(ifp); - ifp->if_eflags &= ~IFEF_ACCEPT_RTADVD; - ifnet_lock_done(ifp); + case SIOCAUTOCONF_STOP: { + struct in6_ifaddr *nia = NULL; - /* nuke prefix list. this may try to remove some ifaddrs as well */ - in6_purgeprefix(ifp); + ifnet_lock_exclusive(ifp); + ifp->if_eflags &= ~IFEF_ACCEPT_RTADVD; + ifnet_lock_done(ifp); - /* removed autoconfigured address from interface */ - lck_mtx_lock(nd6_mutex); - for (ia = in6_ifaddrs; ia != NULL; ia = nia) { - nia = ia->ia_next; - if (ia->ia_ifa.ifa_ifp != ifp) - continue; - if (ia->ia6_flags & IN6_IFF_AUTOCONF) - in6_purgeaddr(&ia->ia_ifa, 1); - } - lck_mtx_unlock(nd6_mutex); - return (0); - } + /* nuke prefix list. this may try to remove some ifaddrs as well */ + in6_purgeprefix(ifp); + /* removed autoconfigured address from interface */ + lck_mtx_lock(nd6_mutex); + for (ia = in6_ifaddrs; ia != NULL; ia = nia) { + nia = ia->ia_next; + if (ia->ia_ifa.ifa_ifp != ifp) + continue; + if (ia->ia6_flags & IN6_IFF_AUTOCONF) + in6_purgeaddr(&ia->ia_ifa, 1); + } + lck_mtx_unlock(nd6_mutex); + return (0); + } - case SIOCLL_START: - - /* NOTE: All the interface specific DLIL attachements should be done here - * They are currently done in in6_ifattach() for the interfaces that need it + case SIOCLL_START_32: + case SIOCLL_START_64: + /* + * NOTE: All the interface specific DLIL attachements should + * be done here. They are currently done in in6_ifattach() + * for the interfaces that need it. */ + if (((ifp->if_type == IFT_PPP) || ((ifp->if_eflags & IFEF_NOAUTOIPV6LL) != 0)) && + ifra->ifra_addr.sin6_family == AF_INET6 && + ifra->ifra_dstaddr.sin6_family == AF_INET6) { + /* some interfaces may provide LinkLocal addresses */ + error = in6_if_up(ifp, ifra); + } else { + error = in6_if_up(ifp, 0); + } + return (error); + /* NOTREACHED */ - if (ifp->if_type == IFT_PPP && ifra->ifra_addr.sin6_family == AF_INET6 && - ifra->ifra_dstaddr.sin6_family == AF_INET6) - in6_if_up(ifp, ifra); /* PPP may provide LinkLocal addresses */ - else - in6_if_up(ifp, 0); - - return(0); + case SIOCLL_STOP: { + struct in6_ifaddr *nia = NULL; - case SIOCLL_STOP: - { - struct in6_ifaddr *nia = NULL; - - /* removed link local addresses from interface */ + /* removed link local addresses from interface */ - lck_mtx_lock(nd6_mutex); - for (ia = in6_ifaddrs; ia != NULL; ia = nia) { - nia = ia->ia_next; - if (ia->ia_ifa.ifa_ifp != ifp) - continue; - if (IN6_IS_ADDR_LINKLOCAL(&ia->ia_addr.sin6_addr)) - in6_purgeaddr(&ia->ia_ifa, 1); - } - lck_mtx_unlock(nd6_mutex); - return (0); + lck_mtx_lock(nd6_mutex); + for (ia = in6_ifaddrs; ia != NULL; ia = nia) { + nia = ia->ia_next; + if (ia->ia_ifa.ifa_ifp != ifp) + continue; + if (IN6_IS_ADDR_LINKLOCAL(&ia->ia_addr.sin6_addr)) + in6_purgeaddr(&ia->ia_ifa, 1); } + lck_mtx_unlock(nd6_mutex); + return (0); + } - - case SIOCPROTOATTACH_IN6: - + case SIOCPROTOATTACH_IN6_32: + case SIOCPROTOATTACH_IN6_64: switch (ifp->if_type) { #if IFT_BRIDGE /*OpenBSD 2.8*/ /* some of the interfaces are inherently not IPv6 capable */ case IFT_BRIDGE: return; + /* NOTREACHED */ #endif default: - if ((error = proto_plumb(PF_INET6, ifp))) - printf("SIOCPROTOATTACH_IN6: %s error=%d\n", - if_name(ifp), error); + printf("SIOCPROTOATTACH_IN6: %s " + "error=%d\n", if_name(ifp), error); break; } return (error); + /* NOTREACHED */ - case SIOCPROTODETACH_IN6: - - in6_purgeif(ifp); /* Cleanup interface routes and addresses */ + /* Cleanup interface routes and addresses */ + in6_purgeif(ifp); if ((error = proto_unplumb(PF_INET6, ifp))) - printf("SIOCPROTODETACH_IN6: %s error=%d\n", - if_name(ifp), error); - return(error); - + printf("SIOCPROTODETACH_IN6: %s error=%d\n", + if_name(ifp), error); + return (error); } -#endif + /* - * Find address for this interface, if it exists. + * Find address for this interface, if it exists; depending + * on the ioctl command, sa6 points to the address in ifra/ifr. */ - if (ifra->ifra_addr.sin6_family == AF_INET6) { /* XXX */ - struct sockaddr_in6 *sa6 = - (struct sockaddr_in6 *)&ifra->ifra_addr; - + if (sa6 != NULL && sa6->sin6_family == AF_INET6) { if (IN6_IS_ADDR_LINKLOCAL(&sa6->sin6_addr)) { if (sa6->sin6_addr.s6_addr16[1] == 0) { /* link ID is not embedded by the user */ sa6->sin6_addr.s6_addr16[1] = - htons(ifp->if_index); + htons(ifp->if_index); } else if (sa6->sin6_addr.s6_addr16[1] != - htons(ifp->if_index)) { - return(EINVAL); /* link ID contradicts */ + htons(ifp->if_index)) { + return (EINVAL); /* link ID contradicts */ } if (sa6->sin6_scope_id) { if (sa6->sin6_scope_id != (u_int32_t)ifp->if_index) - return(EINVAL); + return (EINVAL); sa6->sin6_scope_id = 0; /* XXX: good way? */ } } - ia = in6ifa_ifpwithaddr(ifp, &ifra->ifra_addr.sin6_addr); + ia = in6ifa_ifpwithaddr(ifp, &sa6->sin6_addr); + } else { + ia = NULL; } switch (cmd) { @@ -649,15 +802,16 @@ in6_control(so, cmd, data, ifp, p) error = EADDRNOTAVAIL; goto ioctl_cleanup; } - /* FALLTHROUGH */ - case SIOCAIFADDR_IN6: + case SIOCAIFADDR_IN6_32: + case SIOCAIFADDR_IN6_64: /* * We always require users to specify a valid IPv6 address for - * the corresponding operation. + * the corresponding operation. Use "sa6" instead of "ifra" + * since SIOCDIFADDR_IN6 falls thru above. */ - if (ifra->ifra_addr.sin6_family != AF_INET6 || - ifra->ifra_addr.sin6_len != sizeof(struct sockaddr_in6)) { + if (sa6->sin6_family != AF_INET6 || + sa6->sin6_len != sizeof(struct sockaddr_in6)) { error = EAFNOSUPPORT; goto ioctl_cleanup; } @@ -681,10 +835,8 @@ in6_control(so, cmd, data, ifp, p) goto ioctl_cleanup; } break; - case SIOCSIFALIFETIME_IN6: - { - struct in6_addrlifetime *lt; + case SIOCSIFALIFETIME_IN6: if (!privileged) { error = EPERM; goto ioctl_cleanup; @@ -694,23 +846,41 @@ in6_control(so, cmd, data, ifp, p) goto ioctl_cleanup; } /* sanity for overflow - beware unsigned */ - lt = &ifr->ifr_ifru.ifru_lifetime; - if (lt->ia6t_vltime != ND6_INFINITE_LIFETIME - && lt->ia6t_vltime + timenow.tv_sec < timenow.tv_sec) { - error = EINVAL; - goto ioctl_cleanup; - } - if (lt->ia6t_pltime != ND6_INFINITE_LIFETIME - && lt->ia6t_pltime + timenow.tv_sec < timenow.tv_sec) { - error = EINVAL; - goto ioctl_cleanup; + if (p64) { + struct in6_addrlifetime_64 *lt; + + lt = (struct in6_addrlifetime_64 *) + &ifr->ifr_ifru.ifru_lifetime; + if (lt->ia6t_vltime != ND6_INFINITE_LIFETIME + && lt->ia6t_vltime + timenow.tv_sec < timenow.tv_sec) { + error = EINVAL; + goto ioctl_cleanup; + } + if (lt->ia6t_pltime != ND6_INFINITE_LIFETIME + && lt->ia6t_pltime + timenow.tv_sec < timenow.tv_sec) { + error = EINVAL; + goto ioctl_cleanup; + } + } else { + struct in6_addrlifetime_32 *lt; + + lt = (struct in6_addrlifetime_32 *) + &ifr->ifr_ifru.ifru_lifetime; + if (lt->ia6t_vltime != ND6_INFINITE_LIFETIME + && lt->ia6t_vltime + timenow.tv_sec < timenow.tv_sec) { + error = EINVAL; + goto ioctl_cleanup; + } + if (lt->ia6t_pltime != ND6_INFINITE_LIFETIME + && lt->ia6t_pltime + timenow.tv_sec < timenow.tv_sec) { + error = EINVAL; + goto ioctl_cleanup; + } } break; - } } switch (cmd) { - case SIOCGIFADDR_IN6: ifr->ifr_addr = ia->ia_addr; break; @@ -741,13 +911,16 @@ in6_control(so, cmd, data, ifp, p) goto ioctl_cleanup; } index = ifp->if_index; + lck_mtx_lock(ip6_mutex); if (in6_ifstat == NULL || index >= in6_ifstatmax || in6_ifstat[index] == NULL) { /* return EAFNOSUPPORT? */ bzero(&ifr->ifr_ifru.ifru_stat, - sizeof(ifr->ifr_ifru.ifru_stat)); - } else + sizeof (ifr->ifr_ifru.ifru_stat)); + } else { ifr->ifr_ifru.ifru_stat = *in6_ifstat[index]; + } + lck_mtx_unlock(ip6_mutex); break; case SIOCGIFSTAT_ICMP6: @@ -756,22 +929,66 @@ in6_control(so, cmd, data, ifp, p) goto ioctl_cleanup; } index = ifp->if_index; + lck_mtx_lock(ip6_mutex); if (icmp6_ifstat == NULL || index >= icmp6_ifstatmax || icmp6_ifstat[index] == NULL) { /* return EAFNOSUPPORT? */ bzero(&ifr->ifr_ifru.ifru_stat, - sizeof(ifr->ifr_ifru.ifru_icmp6stat)); - } else - ifr->ifr_ifru.ifru_icmp6stat = - *icmp6_ifstat[index]; + sizeof (ifr->ifr_ifru.ifru_icmp6stat)); + } else { + ifr->ifr_ifru.ifru_icmp6stat = *icmp6_ifstat[index]; + } + lck_mtx_unlock(ip6_mutex); break; case SIOCGIFALIFETIME_IN6: - ifr->ifr_ifru.ifru_lifetime = ia->ia6_lifetime; + if (p64) { + struct in6_addrlifetime_64 *lt; + + lt = (struct in6_addrlifetime_64 *) + &ifr->ifr_ifru.ifru_lifetime; + lt->ia6t_expire = ia->ia6_lifetime.ia6t_expire; + lt->ia6t_preferred = ia->ia6_lifetime.ia6t_preferred; + lt->ia6t_vltime = ia->ia6_lifetime.ia6t_vltime; + lt->ia6t_pltime = ia->ia6_lifetime.ia6t_pltime; + } else { + struct in6_addrlifetime_32 *lt; + + lt = (struct in6_addrlifetime_32 *) + &ifr->ifr_ifru.ifru_lifetime; + lt->ia6t_expire = + (uint32_t)ia->ia6_lifetime.ia6t_expire; + lt->ia6t_preferred = + (uint32_t)ia->ia6_lifetime.ia6t_preferred; + lt->ia6t_vltime = + (uint32_t)ia->ia6_lifetime.ia6t_vltime; + lt->ia6t_pltime = + (uint32_t)ia->ia6_lifetime.ia6t_pltime; + } break; case SIOCSIFALIFETIME_IN6: - ia->ia6_lifetime = ifr->ifr_ifru.ifru_lifetime; + if (p64) { + struct in6_addrlifetime_64 *lt; + + lt = (struct in6_addrlifetime_64 *) + &ifr->ifr_ifru.ifru_lifetime; + ia->ia6_lifetime.ia6t_expire = lt->ia6t_expire; + ia->ia6_lifetime.ia6t_preferred = lt->ia6t_preferred; + ia->ia6_lifetime.ia6t_vltime = lt->ia6t_vltime; + ia->ia6_lifetime.ia6t_pltime = lt->ia6t_pltime; + } else { + struct in6_addrlifetime_32 *lt; + + lt = (struct in6_addrlifetime_32 *) + &ifr->ifr_ifru.ifru_lifetime; + ia->ia6_lifetime.ia6t_expire = + (uint32_t)lt->ia6t_expire; + ia->ia6_lifetime.ia6t_preferred = + (uint32_t)lt->ia6t_preferred; + ia->ia6_lifetime.ia6t_vltime = lt->ia6t_vltime; + ia->ia6_lifetime.ia6t_pltime = lt->ia6t_pltime; + } /* for sanity */ if (ia->ia6_lifetime.ia6t_vltime != ND6_INFINITE_LIFETIME) { ia->ia6_lifetime.ia6t_expire = @@ -785,33 +1002,33 @@ in6_control(so, cmd, data, ifp, p) ia->ia6_lifetime.ia6t_preferred = 0; break; - case SIOCAIFADDR_IN6: - { + case SIOCAIFADDR_IN6_32: + case SIOCAIFADDR_IN6_64: { int i; struct nd_prefix pr0, *pr; - + /* Attempt to attache the protocol, in case it isn't attached */ error = proto_plumb(PF_INET6, ifp); if (error) { if (error != EEXIST) { - printf("SIOCAIFADDR_IN6: %s can't plumb protocol error=%d\n", - if_name(ifp), error); + printf("SIOCAIFADDR_IN6: %s can't plumb " + "protocol error=%d\n", if_name(ifp), error); goto ioctl_cleanup; } - + /* Ignore, EEXIST */ error = 0; - } - else { + } else { /* PF_INET6 wasn't previously attached */ - in6_if_up(ifp, NULL); + if ((error = in6_if_up(ifp, NULL)) != 0) + goto ioctl_cleanup; } /* * first, make or update the interface address structure, * and link it to the list. */ - if ((error = in6_update_ifa(ifp, ifra, ia)) != 0) + if ((error = in6_update_ifa(ifp, ifra, ia, M_WAITOK)) != 0) goto ioctl_cleanup; /* @@ -866,9 +1083,11 @@ in6_control(so, cmd, data, ifp, p) goto ioctl_cleanup; } } + if (ia != NULL) + ifafree(&ia->ia_ifa); if ((ia = in6ifa_ifpwithaddr(ifp, &ifra->ifra_addr.sin6_addr)) == NULL) { - /* XXX: this should not happen! */ + /* XXX: this should not happen! */ log(LOG_ERR, "in6_control: addition succeeded, but" " no ifaddr\n"); } else { @@ -887,7 +1106,8 @@ in6_control(so, cmd, data, ifp, p) if (ip6_use_tempaddr && pr->ndpr_refcnt == 1) { int e; - if ((e = in6_tmpifadd(ia, 1)) != 0) { + if ((e = in6_tmpifadd(ia, 1, + M_WAITOK)) != 0) { log(LOG_NOTICE, "in6_control: " "failed to create a " "temporary address, " @@ -907,12 +1127,13 @@ in6_control(so, cmd, data, ifp, p) /* Drop use count held above during lookup/add */ ndpr_rele(pr, FALSE); - +#if PF + pf_ifaddr_hook(ifp, cmd); +#endif /* PF */ break; } - case SIOCDIFADDR_IN6: - { + case SIOCDIFADDR_IN6: { int i = 0; struct nd_prefix pr0, *pr; @@ -957,8 +1178,11 @@ in6_control(so, cmd, data, ifp, p) if (pr != NULL) ndpr_rele(pr, FALSE); - purgeaddr: +purgeaddr: in6_purgeaddr(&ia->ia_ifa, 0); +#if PF + pf_ifaddr_hook(ifp, cmd); +#endif /* PF */ break; } @@ -967,7 +1191,9 @@ in6_control(so, cmd, data, ifp, p) goto ioctl_cleanup; } ioctl_cleanup: - return error; + if (ia != NULL) + ifafree(&ia->ia_ifa); + return (error); } /* @@ -977,10 +1203,11 @@ in6_control(so, cmd, data, ifp, p) * XXX: should this be performed under splnet()? */ int -in6_update_ifa(ifp, ifra, ia) +in6_update_ifa(ifp, ifra, ia, how) struct ifnet *ifp; struct in6_aliasreq *ifra; struct in6_ifaddr *ia; + int how; { int error = 0, hostIsNew = 0, plen = -1; struct in6_ifaddr *oia; @@ -1117,17 +1344,14 @@ in6_update_ifa(ifp, ifra, ia) if (ia == NULL) { hostIsNew = 1; /* - * When in6_update_ifa() is called in a process of a received - * RA, it is called under splnet(). So, we should call malloc - * with M_NOWAIT. The exception to this is during init time - * when we know it's okay to do M_WAITOK, hence the check - * against in6_init2done flag to see if it's not yet set. + * in6_update_ifa() may be called in a process of a received + * RA; in such a case, we should call malloc with M_NOWAIT. + * The exception to this is during init time or as part of + * handling an ioctl, when we know it's okay to do M_WAITOK. */ - ia = (struct in6_ifaddr *) _MALLOC(sizeof(*ia), M_IFADDR, - in6_init2done ? M_NOWAIT : M_WAITOK); + ia = in6_ifaddr_alloc(how); if (ia == NULL) return ENOBUFS; - bzero((caddr_t)ia, sizeof(*ia)); /* Initialize the address and masks */ ia->ia_ifa.ifa_addr = (struct sockaddr *)&ia->ia_addr; ia->ia_addr.sin6_family = AF_INET6; @@ -1146,6 +1370,7 @@ in6_update_ifa(ifp, ifra, ia) = (struct sockaddr *)&ia->ia_prefixmask; ia->ia_ifp = ifp; + ifaref(&ia->ia_ifa); lck_mtx_lock(nd6_mutex); if ((oia = in6_ifaddrs) != NULL) { for ( ; oia->ia_next; oia = oia->ia_next) @@ -1345,6 +1570,8 @@ in6_update_ifa(ifp, ifra, ia) if_name(ifp), error); } } + if (ia_loop != NULL) + ifafree(&ia_loop->ia_ifa); } } @@ -1370,7 +1597,8 @@ in6_update_ifa(ifp, ifra, ia) * issues with interfaces with IPv6 addresses, which have never brought * up. We are assuming that it is safe to nd6_ifattach multiple times. */ - nd6_ifattach(ifp); + if ((error = nd6_ifattach(ifp)) != 0) + return error; /* * Perform DAD, if needed. @@ -1563,16 +1791,13 @@ in6_purgeif(ifp) * address encoding scheme. (see figure on page 8) */ static int -in6_lifaddr_ioctl(so, cmd, data, ifp, p) - struct socket *so; - u_long cmd; - caddr_t data; - struct ifnet *ifp; - struct proc *p; +in6_lifaddr_ioctl(struct socket *so, u_long cmd, caddr_t data, + struct ifnet *ifp, struct proc *p) { struct if_laddrreq *iflr = (struct if_laddrreq *)data; - struct ifaddr *ifa; + struct ifaddr *ifa = NULL; struct sockaddr *sa; + int p64 = proc_is64bit(p); /* sanity checks */ if (!data || !ifp) { @@ -1633,6 +1858,8 @@ in6_lifaddr_ioctl(so, cmd, data, ifp, p) return EADDRNOTAVAIL; hostaddr = *IFA_IN6(ifa); hostid_found = 1; + ifafree(ifa); + ifa = NULL; /* prefixlen must be <= 64. */ if (64 < iflr->prefixlen) @@ -1650,8 +1877,7 @@ in6_lifaddr_ioctl(so, cmd, data, ifp, p) /* copy args to in6_aliasreq, perform ioctl(SIOCAIFADDR_IN6). */ bzero(&ifra, sizeof(ifra)); - bcopy(iflr->iflr_name, ifra.ifra_name, - sizeof(ifra.ifra_name)); + bcopy(iflr->iflr_name, ifra.ifra_name, sizeof(ifra.ifra_name)); bcopy(&iflr->addr, &ifra.ifra_addr, ((struct sockaddr *)&iflr->addr)->sa_len); @@ -1678,7 +1904,36 @@ in6_lifaddr_ioctl(so, cmd, data, ifp, p) in6_len2mask(&ifra.ifra_prefixmask.sin6_addr, prefixlen); ifra.ifra_flags = iflr->flags & ~IFLR_PREFIX; - return in6_control(so, SIOCAIFADDR_IN6, (caddr_t)&ifra, ifp, p); + if (!p64) { +#if defined(__LP64__) + struct in6_aliasreq_32 ifra_32; + /* + * Use 32-bit ioctl and structure for 32-bit process. + */ + in6_aliasreq_64_to_32((struct in6_aliasreq_64 *)&ifra, + &ifra_32); + return (in6_control(so, SIOCAIFADDR_IN6_32, + (caddr_t)&ifra_32, ifp, p)); +#else + return (in6_control(so, SIOCAIFADDR_IN6, + (caddr_t)&ifra, ifp, p)); +#endif /* __LP64__ */ + } else { +#if defined(__LP64__) + return (in6_control(so, SIOCAIFADDR_IN6, + (caddr_t)&ifra, ifp, p)); +#else + struct in6_aliasreq_64 ifra_64; + /* + * Use 64-bit ioctl and structure for 64-bit process. + */ + in6_aliasreq_32_to_64((struct in6_aliasreq_32 *)&ifra, + &ifra_64); + return (in6_control(so, SIOCAIFADDR_IN6_64, + (caddr_t)&ifra_64, ifp, p)); +#endif /* __LP64__ */ + } + /* NOTREACHED */ } case SIOCGLIFADDR: case SIOCDLIFADDR: @@ -1807,8 +2062,46 @@ in6_lifaddr_ioctl(so, cmd, data, ifp, p) ia->ia_prefixmask.sin6_len); ifra.ifra_flags = ia->ia6_flags; - return in6_control(so, SIOCDIFADDR_IN6, (caddr_t)&ifra, - ifp, p); + if (!p64) { +#if defined(__LP64__) + struct in6_aliasreq_32 ifra_32; + /* + * Use 32-bit structure for 32-bit process. + * SIOCDIFADDR_IN6 is encoded with in6_ifreq, + * so it stays the same since the size does + * not change. The data part of the ioctl, + * however, is of a different structure, i.e. + * in6_aliasreq. + */ + in6_aliasreq_64_to_32( + (struct in6_aliasreq_64 *)&ifra, &ifra_32); + return (in6_control(so, SIOCDIFADDR_IN6, + (caddr_t)&ifra_32, ifp, p)); +#else + return (in6_control(so, SIOCDIFADDR_IN6, + (caddr_t)&ifra, ifp, p)); +#endif /* __LP64__ */ + } else { +#if defined(__LP64__) + return (in6_control(so, SIOCDIFADDR_IN6, + (caddr_t)&ifra, ifp, p)); +#else + struct in6_aliasreq_64 ifra_64; + /* + * Use 64-bit structure for 64-bit process. + * SIOCDIFADDR_IN6 is encoded with in6_ifreq, + * so it stays the same since the size does + * not change. The data part of the ioctl, + * however, is of a different structure, i.e. + * in6_aliasreq. + */ + in6_aliasreq_32_to_64( + (struct in6_aliasreq_32 *)&ifra, &ifra_64); + return (in6_control(so, SIOCDIFADDR_IN6, + (caddr_t)&ifra_64, ifp, p)); +#endif /* __LP64__ */ + } + /* NOTREACHED */ } } } @@ -2008,6 +2301,8 @@ in6ifa_ifpforlinklocal(ifp, ignoreflags) break; } } + if (ifa != NULL) + ifaref(ifa); ifnet_lock_done(ifp); return((struct in6_ifaddr *)ifa); @@ -2033,6 +2328,8 @@ in6ifa_ifpwithaddr(ifp, addr) if (IN6_ARE_ADDR_EQUAL(addr, IFA_IN6(ifa))) break; } + if (ifa != NULL) + ifaref(ifa); ifnet_lock_done(ifp); return((struct in6_ifaddr *)ifa); @@ -2105,11 +2402,13 @@ in6addr_local(struct in6_addr *in6) sin6.sin6_family = AF_INET6; sin6.sin6_len = sizeof (sin6); bcopy(in6, &sin6.sin6_addr, sizeof (*in6)); - rt = rtalloc1((struct sockaddr *)&sin6, 0, 0UL); + rt = rtalloc1((struct sockaddr *)&sin6, 0, 0); if (rt != NULL) { + RT_LOCK_SPIN(rt); if (rt->rt_gateway->sa_family == AF_LINK) local = 1; + RT_UNLOCK(rt); rtfree(rt); } else { local = in6_localaddr(in6); @@ -2558,8 +2857,12 @@ in6_ifawithifp( if (((struct in6_ifaddr *)ifa)->ia6_flags & IN6_IFF_DETACHED) continue; if (((struct in6_ifaddr *)ifa)->ia6_flags & IN6_IFF_DEPRECATED) { - if (ip6_use_deprecated) + if (ip6_use_deprecated) { + if (dep[0] != NULL) + ifafree(&dep[0]->ia_ifa); dep[0] = (struct in6_ifaddr *)ifa; + ifaref(ifa); + } continue; } @@ -2580,7 +2883,10 @@ in6_ifawithifp( } } if (besta) { + ifaref(&besta->ia_ifa); ifnet_lock_done(ifp); + if (dep[0] != NULL) + ifafree(&dep[0]->ia_ifa); return(besta); } @@ -2595,19 +2901,31 @@ in6_ifawithifp( if (((struct in6_ifaddr *)ifa)->ia6_flags & IN6_IFF_DETACHED) continue; if (((struct in6_ifaddr *)ifa)->ia6_flags & IN6_IFF_DEPRECATED) { - if (ip6_use_deprecated) + if (ip6_use_deprecated) { + if (dep[1] != NULL) + ifafree(&dep[1]->ia_ifa); dep[1] = (struct in6_ifaddr *)ifa; + ifaref(ifa); + } continue; } - + if (ifa != NULL) + ifaref(ifa); ifnet_lock_done(ifp); + if (dep[0] != NULL) + ifafree(&dep[0]->ia_ifa); + if (dep[1] != NULL) + ifafree(&dep[1]->ia_ifa); return (struct in6_ifaddr *)ifa; } ifnet_lock_done(ifp); /* use the last-resort values, that are, deprecated addresses */ - if (dep[0]) + if (dep[0]) { + if (dep[1] != NULL) + ifafree(&dep[1]->ia_ifa); return dep[0]; + } if (dep[1]) return dep[1]; @@ -2617,7 +2935,7 @@ in6_ifawithifp( /* * perform DAD when interface becomes IFF_UP. */ -void +int in6_if_up( struct ifnet *ifp, struct in6_aliasreq *ifra) @@ -2625,14 +2943,17 @@ in6_if_up( struct ifaddr *ifa; struct in6_ifaddr *ia; int dad_delay; /* delay ticks before DAD output */ + int error; if (!in6_init2done) - return; + return ENXIO; /* * special cases, like 6to4, are handled in in6_ifattach */ - in6_ifattach(ifp, NULL, ifra); + error = in6_ifattach(ifp, NULL, ifra); + if (error != 0) + return error; dad_delay = 0; ifnet_lock_exclusive(ifp); @@ -2645,6 +2966,8 @@ in6_if_up( nd6_dad_start(ifa, &dad_delay); } ifnet_lock_done(ifp); + + return 0; } int @@ -2691,14 +3014,16 @@ in6if_do_dad( void in6_setmaxmtu() { - unsigned long maxmtu = 0; + u_int32_t maxmtu = 0; struct ifnet *ifp; ifnet_head_lock_shared(); TAILQ_FOREACH(ifp, &ifnet_head, if_list) { + lck_rw_lock_shared(nd_if_rwlock); if ((ifp->if_flags & IFF_LOOPBACK) == 0 && IN6_LINKMTU(ifp) > maxmtu) maxmtu = IN6_LINKMTU(ifp); + lck_rw_done(nd_if_rwlock); } ifnet_head_done(); if (maxmtu) /* update only when maxmtu is positive */ @@ -2750,7 +3075,7 @@ in6_sin6_2_sin_in_sock(struct sockaddr *nam) } /* Convert sockaddr_in into sockaddr_in6 in v4 mapped addr format. */ -void +int in6_sin_2_v4mapsin6_in_sock(struct sockaddr **nam) { struct sockaddr_in *sin_p; @@ -2758,18 +3083,29 @@ in6_sin_2_v4mapsin6_in_sock(struct sockaddr **nam) MALLOC(sin6_p, struct sockaddr_in6 *, sizeof *sin6_p, M_SONAME, M_WAITOK); + if (sin6_p == NULL) + return ENOBUFS; sin_p = (struct sockaddr_in *)*nam; in6_sin_2_v4mapsin6(sin_p, sin6_p); FREE(*nam, M_SONAME); *nam = (struct sockaddr *)sin6_p; + + return 0; } -/* Posts in6_event_data message kernel events */ +/* + * Posts in6_event_data message kernel events. + * + * To get the same size of kev_in6_data between ILP32 and LP64 data models + * we are using a special version of the in6_addrlifetime structure that + * uses only 32 bits fields to be compatible with Leopard, and that + * are large enough to span 68 years. + */ void -in6_post_msg(struct ifnet *ifp, u_long event_code, struct in6_ifaddr *ifa) +in6_post_msg(struct ifnet *ifp, u_int32_t event_code, struct in6_ifaddr *ifa) { struct kev_msg ev_msg; - struct kev_in6_data in6_event_data; + struct kev_in6_data in6_event_data; ev_msg.vendor_code = KEV_VENDOR_APPLE; ev_msg.kev_class = KEV_NETWORK_CLASS; @@ -2782,17 +3118,109 @@ in6_post_msg(struct ifnet *ifp, u_long event_code, struct in6_ifaddr *ifa) in6_event_data.ia_prefixmask = ifa->ia_prefixmask; in6_event_data.ia_plen = ifa->ia_plen; in6_event_data.ia6_flags = (u_int32_t)ifa->ia6_flags; - in6_event_data.ia_lifetime = ifa->ia6_lifetime; + + in6_event_data.ia_lifetime.ia6t_expire = + ifa->ia6_lifetime.ia6t_expire; + in6_event_data.ia_lifetime.ia6t_preferred = + ifa->ia6_lifetime.ia6t_preferred; + in6_event_data.ia_lifetime.ia6t_vltime = + ifa->ia6_lifetime.ia6t_vltime; + in6_event_data.ia_lifetime.ia6t_pltime = + ifa->ia6_lifetime.ia6t_pltime; if (ifp != NULL) { - strncpy(&in6_event_data.link_data.if_name[0], ifp->if_name, IFNAMSIZ); + strncpy(&in6_event_data.link_data.if_name[0], + ifp->if_name, IFNAMSIZ); in6_event_data.link_data.if_family = ifp->if_family; - in6_event_data.link_data.if_unit = (unsigned long) ifp->if_unit; + in6_event_data.link_data.if_unit = (u_int32_t) ifp->if_unit; } ev_msg.dv[0].data_ptr = &in6_event_data; - ev_msg.dv[0].data_length = sizeof(struct kev_in6_data); + ev_msg.dv[0].data_length = sizeof (in6_event_data); ev_msg.dv[1].data_length = 0; kev_post_msg(&ev_msg); } + +/* + * Called as part of ip6_init + */ +void +in6_ifaddr_init(void) +{ + PE_parse_boot_argn("ifa_debug", &in6ifa_debug, sizeof (in6ifa_debug)); + + in6ifa_size = (in6ifa_debug == 0) ? sizeof (struct in6_ifaddr) : + sizeof (struct in6_ifaddr_dbg); + + in6ifa_zone = zinit(in6ifa_size, IN6IFA_ZONE_MAX * in6ifa_size, + 0, IN6IFA_ZONE_NAME); + if (in6ifa_zone == NULL) + panic("%s: failed allocating %s", __func__, IN6IFA_ZONE_NAME); + + zone_change(in6ifa_zone, Z_EXPAND, TRUE); +} + +static struct in6_ifaddr * +in6_ifaddr_alloc(int how) +{ + struct in6_ifaddr *in6ifa; + + in6ifa = (how == M_WAITOK) ? zalloc(in6ifa_zone) : + zalloc_noblock(in6ifa_zone); + if (in6ifa != NULL) { + bzero(in6ifa, in6ifa_size); + in6ifa->ia_ifa.ifa_free = in6_ifaddr_free; + in6ifa->ia_ifa.ifa_debug |= IFD_ALLOC; + if (in6ifa_debug != 0) { + struct in6_ifaddr_dbg *in6ifa_dbg = + (struct in6_ifaddr_dbg *)in6ifa; + in6ifa->ia_ifa.ifa_debug |= IFD_DEBUG; + in6ifa->ia_ifa.ifa_trace = in6_ifaddr_trace; + ctrace_record(&in6ifa_dbg->in6ifa_alloc); + } + } + return (in6ifa); +} + +static void +in6_ifaddr_free(struct ifaddr *ifa) +{ + if (ifa->ifa_refcnt != 0) + panic("%s: ifa %p bad ref cnt", __func__, ifa); + if (!(ifa->ifa_debug & IFD_ALLOC)) + panic("%s: ifa %p cannot be freed", __func__, ifa); + + if (ifa->ifa_debug & IFD_DEBUG) { + struct in6_ifaddr_dbg *in6ifa_dbg = + (struct in6_ifaddr_dbg *)ifa; + ctrace_record(&in6ifa_dbg->in6ifa_free); + bcopy(&in6ifa_dbg->in6ifa, &in6ifa_dbg->in6ifa_old, + sizeof (struct in6_ifaddr)); + } + bzero(ifa, sizeof (struct in6_ifaddr)); + zfree(in6ifa_zone, ifa); +} + +static void +in6_ifaddr_trace(struct ifaddr *ifa, int refhold) +{ + struct in6_ifaddr_dbg *in6ifa_dbg = (struct in6_ifaddr_dbg *)ifa; + ctrace_t *tr; + u_int32_t idx; + u_int16_t *cnt; + + if (!(ifa->ifa_debug & IFD_DEBUG)) + panic("%s: ifa %p has no debug structure", __func__, ifa); + + if (refhold) { + cnt = &in6ifa_dbg->in6ifa_refhold_cnt; + tr = in6ifa_dbg->in6ifa_refhold; + } else { + cnt = &in6ifa_dbg->in6ifa_refrele_cnt; + tr = in6ifa_dbg->in6ifa_refrele; + } + + idx = OSAddAtomic16(1, (volatile SInt16 *)cnt) % CTRACE_HIST_SIZE; + ctrace_record(&tr[idx]); +} diff --git a/bsd/netinet6/in6.h b/bsd/netinet6/in6.h index a84715b19..fb0479fde 100644 --- a/bsd/netinet6/in6.h +++ b/bsd/netinet6/in6.h @@ -1,3 +1,31 @@ +/* + * Copyright (c) 2008 Apple Inc. All rights reserved. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. The rights granted to you under the License + * may not be used to create, or enable the creation or redistribution of, + * unlawful or unlicensed copies of an Apple operating system, or to + * circumvent, violate, or enable the circumvention or violation of, any + * terms of an Apple operating system software license agreement. + * + * Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ + */ + /* $FreeBSD: src/sys/netinet6/in6.h,v 1.7.2.4 2001/07/04 09:45:23 ume Exp $ */ /* $KAME: in6.h,v 1.89 2001/05/27 13:28:35 itojun Exp $ */ @@ -159,10 +187,10 @@ struct sockaddr_in6 { __uint32_t sin6_scope_id; /* scope zone index */ }; +#ifdef KERNEL /*XXX nonstandard*/ /* * Local definition for masks */ -#ifdef KERNEL /*XXX nonstandard*/ #define IN6MASK0 {{{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }}} #define IN6MASK32 {{{ 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, \ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }}} @@ -184,11 +212,10 @@ extern const struct in6_addr in6mask96; extern const struct in6_addr in6mask128; #endif /* KERNEL_PRIVATE */ - +#ifdef KERNEL /*XXX nonstandard*/ /* * Macros started with IPV6_ADDR is KAME local */ -#ifdef KERNEL /*XXX nonstandard*/ #if BYTE_ORDER == BIG_ENDIAN #define IPV6_ADDR_INT32_ONE 1 #define IPV6_ADDR_INT32_TWO 2 @@ -305,6 +332,7 @@ extern const struct in6_addr in6addr_linklocal_allrouters; #ifdef KERNEL /*XXX nonstandard*/ #define IPV6_ADDR_SCOPE_NODELOCAL 0x01 +#define IPV6_ADDR_SCOPE_INTFACELOCAL 0x01 #define IPV6_ADDR_SCOPE_LINKLOCAL 0x02 #define IPV6_ADDR_SCOPE_SITELOCAL 0x05 #define IPV6_ADDR_SCOPE_ORGLOCAL 0x08 /* just used in this file */ @@ -344,6 +372,9 @@ extern const struct in6_addr in6addr_linklocal_allrouters; #define IN6_IS_ADDR_MC_NODELOCAL(a) \ (IN6_IS_ADDR_MULTICAST(a) && \ (IPV6_ADDR_MC_SCOPE(a) == IPV6_ADDR_SCOPE_NODELOCAL)) +#define IN6_IS_ADDR_MC_INTFACELOCAL(a) \ + (IN6_IS_ADDR_MULTICAST(a) && \ + (IPV6_ADDR_MC_SCOPE(a) == IPV6_ADDR_SCOPE_INTFACELOCAL)) #define IN6_IS_ADDR_MC_LINKLOCAL(a) \ (IN6_IS_ADDR_MULTICAST(a) && \ (IPV6_ADDR_MC_SCOPE(a) == IPV6_ADDR_SCOPE_LINKLOCAL)) @@ -382,6 +413,11 @@ extern const struct in6_addr in6addr_linklocal_allrouters; ((IN6_IS_ADDR_LINKLOCAL(a)) || \ (IN6_IS_ADDR_MC_LINKLOCAL(a))) +#define IN6_IS_SCOPE_EMBED(a) \ + ((IN6_IS_ADDR_LINKLOCAL(a)) || \ + (IN6_IS_ADDR_MC_LINKLOCAL(a)) || \ + (IN6_IS_ADDR_MC_INTFACELOCAL(a))) + #define IFA6_IS_DEPRECATED(a) \ ((a)->ia6_lifetime.ia6t_preferred != 0 && \ (a)->ia6_lifetime.ia6t_preferred < timenow.tv_sec) @@ -396,8 +432,14 @@ extern const struct in6_addr in6addr_linklocal_allrouters; #if !defined(_POSIX_C_SOURCE) || defined(_DARWIN_C_SOURCE) #ifdef PRIVATE struct route_in6 { - struct rtentry *ro_rt; - struct sockaddr_in6 ro_dst; + /* + * N.B: struct route_in6 must begin with ro_rt and ro_flags + * because the code does some casts of a 'struct route_in6 *' + * to a 'struct route *'. + */ + struct rtentry *ro_rt; + __uint32_t ro_flags; /* route flags */ + struct sockaddr_in6 ro_dst; }; #endif /* PRIVATE */ #endif /* (_POSIX_C_SOURCE && !_DARWIN_C_SOURCE) */ @@ -458,6 +500,9 @@ struct route_in6 { #define IPV6_FW_GET 34 /* get entire firewall rule chain */ #endif /* 1 */ +#define IPV6_RECVTCLASS 35 /* bool; recv traffic class values */ +#define IPV6_TCLASS 36 /* int; send traffic class value */ + /* to define items, should talk with KAME guys first, for *BSD compatibility */ #define IPV6_RTHDR_LOOSE 0 /* this hop need not be a neighbor. XXX old spec */ @@ -623,14 +668,14 @@ extern int in6_localaddr(struct in6_addr *); extern int in6_addrscope(struct in6_addr *); extern struct in6_ifaddr *in6_ifawithscope(struct ifnet *, struct in6_addr *); extern struct in6_ifaddr *in6_ifawithifp(struct ifnet *, struct in6_addr *); -extern void in6_if_up(struct ifnet *, struct in6_aliasreq *); +extern int in6_if_up(struct ifnet *, struct in6_aliasreq *); struct sockaddr; extern void in6_sin6_2_sin(struct sockaddr_in *sin, struct sockaddr_in6 *sin6); extern void in6_sin_2_v4mapsin6(struct sockaddr_in *sin, struct sockaddr_in6 *sin6); extern void in6_sin6_2_sin_in_sock(struct sockaddr *nam); -extern void in6_sin_2_v4mapsin6_in_sock(struct sockaddr **nam); +extern int in6_sin_2_v4mapsin6_in_sock(struct sockaddr **nam); #define satosin6(sa) ((struct sockaddr_in6 *)(sa)) #define sin6tosa(sin6) ((struct sockaddr *)(sin6)) diff --git a/bsd/netinet6/in6_cksum.c b/bsd/netinet6/in6_cksum.c index fd3e143dc..d964493dc 100644 --- a/bsd/netinet6/in6_cksum.c +++ b/bsd/netinet6/in6_cksum.c @@ -1,5 +1,30 @@ -/* $FreeBSD: src/sys/netinet6/in6_cksum.c,v 1.1.2.3 2001/07/03 11:01:52 ume Exp $ */ -/* $KAME: in6_cksum.c,v 1.10 2000/12/03 00:53:59 itojun Exp $ */ +/* + * Copyright (c) 2009 Apple Inc. All rights reserved. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. The rights granted to you under the License + * may not be used to create, or enable the creation or redistribution of, + * unlawful or unlicensed copies of an Apple operating system, or to + * circumvent, violate, or enable the circumvention or violation of, any + * terms of an Apple operating system software license agreement. + * + * Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ + */ /* * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project. @@ -65,6 +90,35 @@ * @(#)in_cksum.c 8.1 (Berkeley) 6/10/93 */ +/*- + * Copyright (c) 2008 Joerg Sonnenberger . + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED + * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, + * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT + * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + #include #include #include @@ -74,6 +128,95 @@ #include +#include + + +#if defined(__arm__) && __ARM_ARCH__ >= 6 +extern int cpu_in_cksum(struct mbuf *m, int len, int off, uint32_t initial_sum); + +u_int16_t +inet6_cksum(struct mbuf *m, unsigned int nxt, unsigned int off, + unsigned int len) +{ + union { + uint16_t words[16]; + struct { + struct in6_addr ip6_src; + struct in6_addr ip6_dst; + } addrs; + } u; + const struct in6_addr *in6_src; + const struct in6_addr *in6_dst; + const struct ip6_hdr *ip6; + uint32_t sum; + const uint16_t *w; + const char *cp; + + if (off < sizeof (struct ip6_hdr)) + panic("inet6_cksum: offset too short for IPv6 header"); + if (m->m_len < sizeof (struct ip6_hdr)) + panic("inet6_cksum: mbuf too short for IPv6 header"); + + if (nxt == 0) + return (cpu_in_cksum(m, len, off, 0)); + + /* + * Compute the equivalent of: + * struct ip6_hdr_pseudo ip6; + * + * bzero(sizeof (*ip6)); + * ip6.ip6ph_nxt = nxt; + * ip6.ip6ph_len = htonl(len); + * ipv6.ip6ph_src = mtod(m, struct ip6_hdr *)->ip6_src; + * in6_clearscope(&ip6->ip6ph_src); + * ipv6.ip6ph_dst = mtod(m, struct ip6_hdr *)->ip6_dst; + * in6_clearscope(&ip6->ip6ph_dst); + * sum = one_add(&ip6); + */ + +#if BYTE_ORDER == LITTLE_ENDIAN + sum = ((len & 0xffff) + ((len >> 16) & 0xffff) + nxt) << 8; +#else + sum = (len & 0xffff) + ((len >> 16) & 0xffff) + nxt; +#endif + cp = mtod(m, const char *); + w = (const uint16_t *)(cp + offsetof(struct ip6_hdr, ip6_src)); + ip6 = (const void *)cp; + if ((uintptr_t)w % 2 == 0) { + in6_src = &ip6->ip6_src; + in6_dst = &ip6->ip6_dst; + } else { + memcpy(&u, &ip6->ip6_src, 32); + w = u.words; + in6_src = &u.addrs.ip6_src; + in6_dst = &u.addrs.ip6_dst; + } + + sum += w[0]; + if (!IN6_IS_SCOPE_EMBED(in6_src)) + sum += w[1]; + sum += w[2]; + sum += w[3]; + sum += w[4]; + sum += w[5]; + sum += w[6]; + sum += w[7]; + w += 8; + sum += w[0]; + if (!IN6_IS_SCOPE_EMBED(in6_dst)) + sum += w[1]; + sum += w[2]; + sum += w[3]; + sum += w[4]; + sum += w[5]; + sum += w[6]; + sum += w[7]; + + return (cpu_in_cksum(m, len, off, sum)); +} + +#else + /* * Checksum routine for Internet Protocol family headers (Portable Version). * @@ -118,7 +261,7 @@ inet6_cksum(struct mbuf *m, unsigned int nxt, unsigned int off, } l_util; /* sanity check */ - if (m->m_pkthdr.len < off + len) { + if ((m->m_flags & M_PKTHDR) && m->m_pkthdr.len < off + len) { panic("inet6_cksum: mbuf len (%d) < off+len (%d+%d)\n", m->m_pkthdr.len, off, len); } @@ -169,7 +312,7 @@ inet6_cksum(struct mbuf *m, unsigned int nxt, unsigned int off, /* * Force to even boundary. */ - if ((1 & (long) w) && (mlen > 0)) { + if ((1 & (intptr_t) w) && (mlen > 0)) { REDUCE; sum <<= 8; s_util.c[0] = *(u_char *)w; @@ -245,7 +388,7 @@ inet6_cksum(struct mbuf *m, unsigned int nxt, unsigned int off, /* * Force to even boundary. */ - if ((1 & (long) w) && (mlen > 0)) { + if ((1 & (intptr_t) w) && (mlen > 0)) { REDUCE; sum <<= 8; s_util.c[0] = *(u_char *)w; @@ -301,3 +444,5 @@ inet6_cksum(struct mbuf *m, unsigned int nxt, unsigned int off, REDUCE; return (~sum & 0xffff); } + +#endif diff --git a/bsd/netinet6/in6_gif.c b/bsd/netinet6/in6_gif.c index 029dd8810..332271e88 100644 --- a/bsd/netinet6/in6_gif.c +++ b/bsd/netinet6/in6_gif.c @@ -1,3 +1,31 @@ +/* + * Copyright (c) 2008 Apple Inc. All rights reserved. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. The rights granted to you under the License + * may not be used to create, or enable the creation or redistribution of, + * unlawful or unlicensed copies of an Apple operating system, or to + * circumvent, violate, or enable the circumvention or violation of, any + * terms of an Apple operating system software license agreement. + * + * Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ + */ + /* $FreeBSD: src/sys/netinet6/in6_gif.c,v 1.2.2.3 2001/07/03 11:01:52 ume Exp $ */ /* $KAME: in6_gif.c,v 1.49 2001/05/14 14:02:17 itojun Exp $ */ @@ -67,8 +95,6 @@ #include -extern u_long route_generation; - static __inline__ void* _cast_non_const(const void * ptr) { union { @@ -178,7 +204,7 @@ in6_gif_output( (sc->gif_ro6.ro_rt != NULL && (sc->gif_ro6.ro_rt->generation_id != route_generation || sc->gif_ro6.ro_rt->rt_ifp == ifp))) { - /* cache route doesn't match */ + /* cache route doesn't match or recursive route */ bzero(dst, sizeof(*dst)); dst->sin6_family = sin6_dst->sin6_family; dst->sin6_len = sizeof(struct sockaddr_in6); @@ -198,9 +224,10 @@ in6_gif_output( m_freem(m); return ENETUNREACH; } - + RT_LOCK(sc->gif_ro6.ro_rt); /* if it constitutes infinite encapsulation, punt. */ if (sc->gif_ro6.ro_rt->rt_ifp == ifp) { + RT_UNLOCK(sc->gif_ro6.ro_rt); m_freem(m); return ENETUNREACH; /*XXX*/ } @@ -208,6 +235,7 @@ in6_gif_output( ifp->if_mtu = sc->gif_ro6.ro_rt->rt_ifp->if_mtu - sizeof(struct ip6_hdr); #endif + RT_UNLOCK(sc->gif_ro6.ro_rt); } #if IPV6_MINMTU @@ -336,17 +364,22 @@ gif_validate6( sin6.sin6_scope_id = 0; /* XXX */ #endif - rt = rtalloc1((struct sockaddr *)&sin6, 0, 0UL); + rt = rtalloc1((struct sockaddr *)&sin6, 0, 0); + if (rt != NULL) + RT_LOCK(rt); if (!rt || rt->rt_ifp != ifp) { #if 0 log(LOG_WARNING, "%s: packet from %s dropped " "due to ingress filter\n", if_name(&sc->gif_if), ip6_sprintf(&sin6.sin6_addr)); #endif - if (rt) + if (rt != NULL) { + RT_UNLOCK(rt); rtfree(rt); + } return 0; } + RT_UNLOCK(rt); rtfree(rt); } @@ -372,7 +405,7 @@ gif_encapcheck6( /* sanity check done in caller */ sc = (struct gif_softc *)arg; - mbuf_copydata(m, 0, sizeof(ip6), &ip6); + mbuf_copydata((struct mbuf *)(size_t)m, 0, sizeof(ip6), &ip6); ifp = ((m->m_flags & M_PKTHDR) != 0) ? m->m_pkthdr.rcvif : NULL; return gif_validate6(&ip6, sc, ifp); diff --git a/bsd/netinet6/in6_gif.h b/bsd/netinet6/in6_gif.h index 6e292cd5d..8383c6b4e 100644 --- a/bsd/netinet6/in6_gif.h +++ b/bsd/netinet6/in6_gif.h @@ -40,6 +40,6 @@ int in6_gif_input(struct mbuf **, int *); int in6_gif_output(struct ifnet *, int, struct mbuf *, struct rtentry *); int gif_encapcheck6(const struct mbuf *, int, int, void *); -#endif KERNEL_PRIVATE +#endif /* KERNEL_PRIVATE */ -#endif _NETINET6_IN6_GIF_H_ +#endif /* _NETINET6_IN6_GIF_H_ */ diff --git a/bsd/netinet6/in6_ifattach.c b/bsd/netinet6/in6_ifattach.c index b711892ab..dff06569f 100644 --- a/bsd/netinet6/in6_ifattach.c +++ b/bsd/netinet6/in6_ifattach.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2003-2007 Apple Inc. All rights reserved. + * Copyright (c) 2003-2008 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -95,8 +95,9 @@ struct in6_ifstat **in6_ifstat = NULL; struct icmp6_ifstat **icmp6_ifstat = NULL; size_t in6_ifstatmax = 0; size_t icmp6_ifstatmax = 0; -unsigned long in6_maxmtu = 0; +u_int32_t in6_maxmtu = 0; extern lck_mtx_t *nd6_mutex; +extern lck_mtx_t *inet6_domain_mutex; #if IP6_AUTO_LINKLOCAL int ip6_auto_linklocal = IP6_AUTO_LINKLOCAL; @@ -104,10 +105,9 @@ int ip6_auto_linklocal = IP6_AUTO_LINKLOCAL; int ip6_auto_linklocal = 1; /* enable by default */ #endif -int loopattach6_done = 0; - extern struct inpcbinfo udbinfo; extern struct inpcbinfo ripcbinfo; +extern lck_mtx_t *ip6_mutex; static int get_rand_ifid(struct ifnet *, struct in6_addr *); static int generate_tmp_ifid(u_int8_t *, const u_int8_t *, u_int8_t *); @@ -487,7 +487,8 @@ in6_ifattach_linklocal( */ strncpy(ifra.ifra_name, if_name(ifp), sizeof(ifra.ifra_name)); - if (ifp->if_type == IFT_PPP && ifra_passed != NULL) /* PPP provided both addresses for us */ + if (((ifp->if_type == IFT_PPP) || ((ifp->if_eflags & IFEF_NOAUTOIPV6LL) != 0)) && + ifra_passed != NULL) /* PPP provided both addresses for us */ bcopy(&ifra_passed->ifra_addr, &(ifra.ifra_addr), sizeof(struct sockaddr_in6)); else { ifra.ifra_addr.sin6_family = AF_INET6; @@ -538,7 +539,7 @@ in6_ifattach_linklocal( * we know there's no other link-local address on the interface * and therefore we are adding one (instead of updating one). */ - if ((error = in6_update_ifa(ifp, &ifra, NULL)) != 0) { + if ((error = in6_update_ifa(ifp, &ifra, NULL, M_WAITOK)) != 0) { /* * XXX: When the interface does not support IPv6, this call * would fail in the SIOCSIFADDR ioctl. I believe the @@ -604,11 +605,17 @@ in6_ifattach_linklocal( * valid with referring to the old link-local address. */ if ((pr = nd6_prefix_lookup(&pr0)) == NULL) { - if ((error = nd6_prelist_add(&pr0, NULL, &pr)) != 0) + if ((error = nd6_prelist_add(&pr0, NULL, &pr)) != 0) { + printf("in6_ifattach_linklocal: nd6_prelist_add failed %d\n", error); + ifafree(&ia->ia_ifa); return(error); + } } - in6_post_msg(ifp, KEV_INET6_NEW_LL_ADDR, ia); + if (ia != NULL) { + in6_post_msg(ifp, KEV_INET6_NEW_LL_ADDR, ia); + ifafree(&ia->ia_ifa); + } /* Drop use count held above during lookup/add */ if (pr != NULL) @@ -662,7 +669,7 @@ in6_ifattach_loopback( * We are sure that this is a newly assigned address, so we can set * NULL to the 3rd arg. */ - if ((error = in6_update_ifa(ifp, &ifra, NULL)) != 0) { + if ((error = in6_update_ifa(ifp, &ifra, NULL, M_WAITOK)) != 0) { log(LOG_ERR, "in6_ifattach_loopback: failed to configure " "the loopback address on %s (errno=%d)\n", if_name(ifp), error); @@ -791,7 +798,7 @@ in6_nigroup_detach( * nodelocal address needs to be configured onto only one of them. * XXX multiple link-local address case */ -void +int in6_ifattach( struct ifnet *ifp, struct ifnet *altifp, /* secondary EUI64 source */ @@ -799,7 +806,7 @@ in6_ifattach( { static size_t if_indexlim = 8; struct in6_ifaddr *ia; - struct in6_addr in6; + int error; /* * We have some arrays that should be indexed by if_index. @@ -809,44 +816,85 @@ in6_ifattach( */ if (in6_ifstat == NULL || icmp6_ifstat == NULL || if_index >= if_indexlim) { - size_t n; - caddr_t q; - size_t olim; - - olim = if_indexlim; while (if_index >= if_indexlim) if_indexlim <<= 1; - - /* grow in6_ifstat */ + } + + lck_mtx_lock(ip6_mutex); + /* grow in6_ifstat */ + if (in6_ifstatmax < if_indexlim) { + size_t n; + caddr_t q; + n = if_indexlim * sizeof(struct in6_ifstat *); q = (caddr_t)_MALLOC(n, M_IFADDR, M_WAITOK); + if (q == NULL) { + lck_mtx_unlock(ip6_mutex); + return ENOBUFS; + } bzero(q, n); if (in6_ifstat) { bcopy((caddr_t)in6_ifstat, q, - olim * sizeof(struct in6_ifstat *)); + in6_ifstatmax * sizeof(struct in6_ifstat *)); FREE((caddr_t)in6_ifstat, M_IFADDR); } in6_ifstat = (struct in6_ifstat **)q; in6_ifstatmax = if_indexlim; + } + + if (in6_ifstat[ifp->if_index] == NULL) { + in6_ifstat[ifp->if_index] = (struct in6_ifstat *) + _MALLOC(sizeof(struct in6_ifstat), M_IFADDR, M_WAITOK); + if (in6_ifstat[ifp->if_index] == NULL) { + lck_mtx_unlock(ip6_mutex); + return ENOBUFS; + } + bzero(in6_ifstat[ifp->if_index], sizeof(struct in6_ifstat)); + } + lck_mtx_unlock(ip6_mutex); - /* grow icmp6_ifstat */ + /* grow icmp6_ifstat, use inet6_domain_mutex as that is used in + * icmp6 routines + */ + lck_mtx_lock(inet6_domain_mutex); + if (icmp6_ifstatmax < if_indexlim) { + size_t n; + caddr_t q; + n = if_indexlim * sizeof(struct icmp6_ifstat *); q = (caddr_t)_MALLOC(n, M_IFADDR, M_WAITOK); + if (q == NULL) { + lck_mtx_unlock(inet6_domain_mutex); + return ENOBUFS; + } bzero(q, n); if (icmp6_ifstat) { bcopy((caddr_t)icmp6_ifstat, q, - olim * sizeof(struct icmp6_ifstat *)); + icmp6_ifstatmax * sizeof(struct icmp6_ifstat *)); FREE((caddr_t)icmp6_ifstat, M_IFADDR); } icmp6_ifstat = (struct icmp6_ifstat **)q; icmp6_ifstatmax = if_indexlim; } + if (icmp6_ifstat[ifp->if_index] == NULL) { + icmp6_ifstat[ifp->if_index] = (struct icmp6_ifstat *) + _MALLOC(sizeof(struct icmp6_ifstat), M_IFADDR, M_WAITOK); + if (icmp6_ifstat[ifp->if_index] == NULL) { + lck_mtx_unlock(inet6_domain_mutex); + return ENOBUFS; + } + bzero(icmp6_ifstat[ifp->if_index], sizeof(struct icmp6_ifstat)); + } + lck_mtx_unlock(inet6_domain_mutex); + /* initialize NDP variables */ - nd6_ifattach(ifp); + if ((error = nd6_ifattach(ifp)) != 0) + return error; /* initialize scope identifiers */ - scope6_ifattach(ifp); + if ((error = scope6_ifattach(ifp)) != 0) + return error; /* * quirks based on interface type @@ -873,7 +921,7 @@ in6_ifattach( log(LOG_INFO, "in6_ifattach: " "%s is not multicast capable, IPv6 not enabled\n", if_name(ifp)); - return; + return EINVAL; } /* @@ -881,19 +929,8 @@ in6_ifattach( * XXX multiple loopback interface case. */ if ((ifp->if_flags & IFF_LOOPBACK) != 0) { - struct in6_ifaddr *ia6 = NULL; - if (!OSCompareAndSwap(0, 1, (UInt32 *)&loopattach6_done)) { - in6 = in6addr_loopback; - if ((ia6 = in6ifa_ifpwithaddr(ifp, &in6)) == NULL) { - if (in6_ifattach_loopback(ifp) != 0) { - OSCompareAndSwap(1, 0, (UInt32 *)&loopattach6_done); - return; - } - } - else { - ifafree(&ia6->ia_ifa); - } - } + if (in6_ifattach_loopback(ifp) != 0) + printf("in6_ifattach: in6_ifattach_loopback failed\n"); } /* @@ -905,11 +942,13 @@ in6_ifattach( if (in6_ifattach_linklocal(ifp, altifp, ifra) == 0) { /* linklocal address assigned */ } else { - log(LOG_INFO, "in6_ifattach: " - "%s failed to attach a linklocal address.\n", - if_name(ifp)); + log(LOG_INFO, "in6_ifattach: %s failed to " + "attach a linklocal address.\n", + if_name(ifp)); /* failed to assign linklocal address. bark? */ } + } else { + ifafree(&ia->ia_ifa); } } @@ -921,16 +960,7 @@ in6_ifattach( if (in6_maxmtu < ifp->if_mtu) in6_maxmtu = ifp->if_mtu; - if (in6_ifstat[ifp->if_index] == NULL) { - in6_ifstat[ifp->if_index] = (struct in6_ifstat *) - _MALLOC(sizeof(struct in6_ifstat), M_IFADDR, M_WAITOK); - bzero(in6_ifstat[ifp->if_index], sizeof(struct in6_ifstat)); - } - if (icmp6_ifstat[ifp->if_index] == NULL) { - icmp6_ifstat[ifp->if_index] = (struct icmp6_ifstat *) - _MALLOC(sizeof(struct icmp6_ifstat), M_IFADDR, M_WAITOK); - bzero(icmp6_ifstat[ifp->if_index], sizeof(struct icmp6_ifstat)); - } + return 0; } /* @@ -945,7 +975,6 @@ in6_ifdetach( struct in6_ifaddr *ia, *oia, *nia; struct ifaddr *ifa, *next; struct rtentry *rt; - short rtflags; struct sockaddr_in6 sin6; /* nuke prefix list. this may try to remove some of ifaddrs as well */ @@ -981,22 +1010,18 @@ in6_ifdetach( ia = (struct in6_ifaddr *)ifa; /* remove from the routing table */ - lck_mtx_lock(rt_mtx); if ((ia->ia_flags & IFA_ROUTE) && - (rt = rtalloc1_locked((struct sockaddr *)&ia->ia_addr, 0, 0UL))) { - rtflags = rt->rt_flags; - rtfree_locked(rt); - rtrequest_locked(RTM_DELETE, + (rt = rtalloc1((struct sockaddr *)&ia->ia_addr, 0, 0))) { + (void) rtrequest(RTM_DELETE, (struct sockaddr *)&ia->ia_addr, (struct sockaddr *)&ia->ia_addr, (struct sockaddr *)&ia->ia_prefixmask, - rtflags, (struct rtentry **)0); + rt->rt_flags, (struct rtentry **)0); + rtfree(rt); } - lck_mtx_unlock(rt_mtx); /* remove from the linked list */ if_detach_ifa(ifp, &ia->ia_ifa); - ifafree(&ia->ia_ifa); /* also remove from the IPv6 address chain(itojun&jinmei) */ oia = ia; @@ -1036,16 +1061,24 @@ in6_ifdetach( sin6.sin6_family = AF_INET6; sin6.sin6_addr = in6addr_linklocal_allnodes; sin6.sin6_addr.s6_addr16[1] = htons(ifp->if_index); - lck_mtx_lock(rt_mtx); - rt = rtalloc1_locked((struct sockaddr *)&sin6, 0, 0UL); + rt = rtalloc1((struct sockaddr *)&sin6, 0, 0); if (rt != NULL) { + RT_LOCK(rt); if (rt->rt_ifp == ifp) { - rtrequest_locked(RTM_DELETE, (struct sockaddr *)rt_key(rt), - rt->rt_gateway, rt_mask(rt), rt->rt_flags, 0); + /* + * Prevent another thread from modifying rt_key, + * rt_gateway via rt_setgate() after the rt_lock + * is dropped by marking the route as defunct. + */ + rt->rt_flags |= RTF_CONDEMNED; + RT_UNLOCK(rt); + (void) rtrequest(RTM_DELETE, rt_key(rt), rt->rt_gateway, + rt_mask(rt), rt->rt_flags, 0); + } else { + RT_UNLOCK(rt); } - rtfree_locked(rt); + rtfree(rt); } - lck_mtx_unlock(rt_mtx); } void @@ -1056,8 +1089,10 @@ in6_get_tmpifid( int generate) { u_int8_t nullbuf[8]; - struct nd_ifinfo *ndi = &nd_ifinfo[ifp->if_index]; + struct nd_ifinfo *ndi; + lck_rw_lock_shared(nd_if_rwlock); + ndi = &nd_ifinfo[ifp->if_index]; bzero(nullbuf, sizeof(nullbuf)); if (bcmp(ndi->randomid, nullbuf, sizeof(nullbuf)) == 0) { /* we've never created a random ID. Create a new one. */ @@ -1072,9 +1107,9 @@ in6_get_tmpifid( ndi->randomid); } bcopy(ndi->randomid, retbuf, 8); + lck_rw_done(nd_if_rwlock); } -extern size_t nd_ifinfo_indexlim; void in6_tmpaddrtimer( __unused void *ignored_arg) @@ -1088,7 +1123,7 @@ in6_tmpaddrtimer( ip6_temp_regen_advance) * hz); if (ip6_use_tempaddr) { - + lck_rw_lock_shared(nd_if_rwlock); bzero(nullbuf, sizeof(nullbuf)); for (i = 1; i < nd_ifinfo_indexlim + 1; i++) { ndi = &nd_ifinfo[i]; @@ -1104,5 +1139,6 @@ in6_tmpaddrtimer( ndi->randomid); } } + lck_rw_done(nd_if_rwlock); } } diff --git a/bsd/netinet6/in6_ifattach.h b/bsd/netinet6/in6_ifattach.h index f0b7d2d09..7fa627f2d 100644 --- a/bsd/netinet6/in6_ifattach.h +++ b/bsd/netinet6/in6_ifattach.h @@ -36,11 +36,11 @@ #ifdef KERNEL_PRIVATE void in6_nigroup_attach(const char *, int); void in6_nigroup_detach(const char *, int); -void in6_ifattach(struct ifnet *, struct ifnet *, struct in6_aliasreq *); +int in6_ifattach(struct ifnet *, struct ifnet *, struct in6_aliasreq *); void in6_ifdetach(struct ifnet *); void in6_get_tmpifid(struct ifnet *, u_int8_t *, const u_int8_t *, int); void in6_tmpaddrtimer(void *); int in6_nigroup(struct ifnet *, const char *, int, struct in6_addr *); -#endif KERNEL_PRIVATE +#endif /* KERNEL_PRIVATE */ -#endif _NETINET6_IN6_IFATTACH_H_ +#endif /* _NETINET6_IN6_IFATTACH_H_ */ diff --git a/bsd/netinet6/in6_pcb.c b/bsd/netinet6/in6_pcb.c index 1b481b21f..6d2c98b71 100644 --- a/bsd/netinet6/in6_pcb.c +++ b/bsd/netinet6/in6_pcb.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2003-2007 Apple Inc. All rights reserved. + * Copyright (c) 2003-2008 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -163,6 +163,8 @@ in6_pcblookup_local_and_cleanup( lck_mtx_lock(inp->inpcb_mtx); if (so->so_usecount == 0) { + if (inp->inp_state != INPCB_STATE_DEAD) + in6_pcbdetach(inp); in_pcbdispose(inp); inp = NULL; } @@ -260,7 +262,7 @@ in6_pcbbind( struct inpcb *t; /* GROSS */ - if (ntohs(lport) < IPV6PORT_RESERVED && p && + if (ntohs(lport) < IPV6PORT_RESERVED && ((so->so_state & SS_PRIV) == 0)) { lck_rw_done(pcbinfo->mtx); socket_lock(so, 0); @@ -423,6 +425,7 @@ in6_pcbladdr( */ } + /* XXX: what is the point in doing this? */ if (inp->in6p_route.ro_rt) ifp = inp->in6p_route.ro_rt->rt_ifp; @@ -491,227 +494,6 @@ in6_pcbconnect(inp, nam, p) return (0); } -#if 0 -/* - * Return an IPv6 address, which is the most appropriate for given - * destination and user specified options. - * If necessary, this function lookups the routing table and return - * an entry to the caller for later use. - */ -struct in6_addr * -in6_selectsrc( - struct sockaddr_in6 *dstsock, - struct ip6_pktopts *opts, - struct ip6_moptions *mopts, - struct route_in6 *ro, - struct in6_addr *laddr, - struct in6_addr *src_storage, - int *errorp) -{ - struct in6_addr *dst; - struct in6_ifaddr *ia6 = 0; - struct in6_pktinfo *pi = NULL; - - dst = &dstsock->sin6_addr; - *errorp = 0; - - /* - * If the source address is explicitly specified by the caller, - * use it. - */ - if (opts && (pi = opts->ip6po_pktinfo) && - !IN6_IS_ADDR_UNSPECIFIED(&pi->ipi6_addr)) - return(&pi->ipi6_addr); - - /* - * If the source address is not specified but the socket(if any) - * is already bound, use the bound address. - */ - if (laddr && !IN6_IS_ADDR_UNSPECIFIED(laddr)) - return(laddr); - - /* - * If the caller doesn't specify the source address but - * the outgoing interface, use an address associated with - * the interface. - */ - if (pi && pi->ipi6_ifindex) { - /* XXX boundary check is assumed to be already done. */ - ia6 = in6_ifawithscope(ifindex2ifnet[pi->ipi6_ifindex], - dst); - if (ia6 == 0) { - *errorp = EADDRNOTAVAIL; - return(0); - } - *src_storage = satosin6(&ia6->ia_addr)->sin6_addr; - ifafree(&ia6->ia_ifa); - return(src_storage); - } - - /* - * If the destination address is a link-local unicast address or - * a multicast address, and if the outgoing interface is specified - * by the sin6_scope_id filed, use an address associated with the - * interface. - * XXX: We're now trying to define more specific semantics of - * sin6_scope_id field, so this part will be rewritten in - * the near future. - */ - if ((IN6_IS_ADDR_LINKLOCAL(dst) || IN6_IS_ADDR_MULTICAST(dst)) && - dstsock->sin6_scope_id) { - /* - * I'm not sure if boundary check for scope_id is done - * somewhere... - */ - if (dstsock->sin6_scope_id < 0 || - if_index < dstsock->sin6_scope_id) { - *errorp = ENXIO; /* XXX: better error? */ - return(0); - } - ia6 = in6_ifawithscope(ifindex2ifnet[dstsock->sin6_scope_id], - dst); - if (ia6 == 0) { - *errorp = EADDRNOTAVAIL; - return(0); - } - *src_storage = satosin6(&ia6->ia_addr)->sin6_addr; - ifafree(&ia6->ia_ifa); - return(src_storage); - } - - /* - * If the destination address is a multicast address and - * the outgoing interface for the address is specified - * by the caller, use an address associated with the interface. - * There is a sanity check here; if the destination has node-local - * scope, the outgoing interfacde should be a loopback address. - * Even if the outgoing interface is not specified, we also - * choose a loopback interface as the outgoing interface. - */ - if (IN6_IS_ADDR_MULTICAST(dst)) { - struct ifnet *ifp = mopts ? mopts->im6o_multicast_ifp : NULL; - - if (ifp == NULL && IN6_IS_ADDR_MC_NODELOCAL(dst)) { - ifp = lo_ifp; - } - - if (ifp) { - ia6 = in6_ifawithscope(ifp, dst); - if (ia6 == 0) { - *errorp = EADDRNOTAVAIL; - return(0); - } - *src_storage = ia6->ia_addr.sin6_addr; - ifafree(&ia6->ia_ifa); - return(src_storage); - } - } - - /* - * If the next hop address for the packet is specified - * by caller, use an address associated with the route - * to the next hop. - */ - { - struct sockaddr_in6 *sin6_next; - struct rtentry *rt; - - if (opts && opts->ip6po_nexthop) { - sin6_next = satosin6(opts->ip6po_nexthop); - rt = nd6_lookup(&sin6_next->sin6_addr, 1, NULL, 0); - if (rt) { - ia6 = in6_ifawithscope(rt->rt_ifp, dst); - if (ia6 == 0) { - ifaref(&rt->rt_ifa); - ia6 = ifatoia6(rt->rt_ifa); - } - } - if (ia6 == 0) { - *errorp = EADDRNOTAVAIL; - return(0); - } - *src_storage = satosin6(&ia6->ia_addr)->sin6_addr; - ifaref(&rt->rt_ifa); - return(src_storage); - } - } - - /* - * If route is known or can be allocated now, - * our src addr is taken from the i/f, else punt. - */ - if (ro) { - if (ro->ro_rt && - !IN6_ARE_ADDR_EQUAL(&satosin6(&ro->ro_dst)->sin6_addr, dst)) { - rtfree(ro->ro_rt); - ro->ro_rt = (struct rtentry *)0; - } - if (ro->ro_rt == (struct rtentry *)0 || - ro->ro_rt->rt_ifp == (struct ifnet *)0) { - struct sockaddr_in6 *dst6; - - /* No route yet, so try to acquire one */ - bzero(&ro->ro_dst, sizeof(struct sockaddr_in6)); - dst6 = (struct sockaddr_in6 *)&ro->ro_dst; - dst6->sin6_family = AF_INET6; - dst6->sin6_len = sizeof(struct sockaddr_in6); - dst6->sin6_addr = *dst; - if (IN6_IS_ADDR_MULTICAST(dst)) { - ro->ro_rt = - rtalloc1(&((struct route *)ro)->ro_dst, 0, 0UL); - } else { - rtalloc((struct route *)ro); - } - } - - /* - * in_pcbconnect() checks out IFF_LOOPBACK to skip using - * the address. But we don't know why it does so. - * It is necessary to ensure the scope even for lo0 - * so doesn't check out IFF_LOOPBACK. - */ - - if (ro->ro_rt) { - ia6 = in6_ifawithscope(ro->ro_rt->rt_ifa->ifa_ifp, dst); - if (ia6 == 0) { /* xxx scope error ?*/ - ifaref(ro->ro_rt->rt_ifa); - ia6 = ifatoia6(ro->ro_rt->rt_ifa); - } - } - if (ia6 == 0) { - *errorp = EHOSTUNREACH; /* no route */ - return(0); - } - *src_storage = satosin6(&ia6->ia_addr)->sin6_addr; - ifaref(&rt->rt_ifa); - return(src_storage); - } - - *errorp = EADDRNOTAVAIL; - return(0); -} - -/* - * Default hop limit selection. The precedence is as follows: - * 1. Hoplimit valued specified via ioctl. - * 2. (If the outgoing interface is detected) the current - * hop limit of the interface specified by router advertisement. - * 3. The system default hoplimit. -*/ -int -in6_selecthlim( - struct in6pcb *in6p, - struct ifnet *ifp) -{ - if (in6p && in6p->in6p_hops >= 0) - return(in6p->in6p_hops); - else if (ifp) - return(nd_ifinfo[ifp->if_index].chlim); - else - return(ip6_defhlim); -} -#endif - void in6_pcbdisconnect(inp) struct inpcb *inp; @@ -779,6 +561,8 @@ in6_sockaddr(port, addr_p) struct sockaddr_in6 *sin6; MALLOC(sin6, struct sockaddr_in6 *, sizeof *sin6, M_SONAME, M_WAITOK); + if (sin6 == NULL) + return NULL; bzero(sin6, sizeof *sin6); sin6->sin6_family = AF_INET6; sin6->sin6_len = sizeof(*sin6); @@ -810,6 +594,8 @@ in6_v4mapsin6_sockaddr(port, addr_p) MALLOC(sin6_p, struct sockaddr_in6 *, sizeof *sin6_p, M_SONAME, M_WAITOK); + if (sin6_p == NULL) + return NULL; in6_sin_2_v4mapsin6(&sin, sin6_p); return (struct sockaddr *)sin6_p; @@ -842,6 +628,8 @@ in6_setsockaddr(so, nam) addr = inp->in6p_laddr; *nam = in6_sockaddr(port, &addr); + if (*nam == NULL) + return ENOBUFS; return 0; } @@ -862,6 +650,8 @@ in6_setpeeraddr(so, nam) addr = inp->in6p_faddr; *nam = in6_sockaddr(port, &addr); + if (*nam == NULL) + return ENOBUFS; return 0; } @@ -876,11 +666,11 @@ in6_mapped_sockaddr(struct socket *so, struct sockaddr **nam) if (inp->inp_vflag & INP_IPV4) { error = in_setsockaddr(so, nam); if (error == 0) - in6_sin_2_v4mapsin6_in_sock(nam); - } else - /* scope issues will be handled in in6_setsockaddr(). */ - error = in6_setsockaddr(so, nam); - + error = in6_sin_2_v4mapsin6_in_sock(nam); + } else { + /* scope issues will be handled in in6_setsockaddr(). */ + error = in6_setsockaddr(so, nam); + } return error; } @@ -895,11 +685,11 @@ in6_mapped_peeraddr(struct socket *so, struct sockaddr **nam) if (inp->inp_vflag & INP_IPV4) { error = in_setpeeraddr(so, nam); if (error == 0) - in6_sin_2_v4mapsin6_in_sock(nam); - } else - /* scope issues will be handled in in6_setpeeraddr(). */ - error = in6_setpeeraddr(so, nam); - + error = in6_sin_2_v4mapsin6_in_sock(nam); + } else { + /* scope issues will be handled in in6_setpeeraddr(). */ + error = in6_setpeeraddr(so, nam); + } return error; } @@ -1149,25 +939,32 @@ in6_losing(in6p) struct rt_addrinfo info; if ((rt = in6p->in6p_route.ro_rt) != NULL) { - in6p->in6p_route.ro_rt = 0; + in6p->in6p_route.ro_rt = NULL; + RT_LOCK(rt); bzero((caddr_t)&info, sizeof(info)); info.rti_info[RTAX_DST] = (struct sockaddr *)&in6p->in6p_route.ro_dst; info.rti_info[RTAX_GATEWAY] = rt->rt_gateway; info.rti_info[RTAX_NETMASK] = rt_mask(rt); - lck_mtx_lock(rt_mtx); rt_missmsg(RTM_LOSING, &info, rt->rt_flags, 0); - if (rt->rt_flags & RTF_DYNAMIC) - (void)rtrequest_locked(RTM_DELETE, rt_key(rt), - rt->rt_gateway, rt_mask(rt), rt->rt_flags, - (struct rtentry **)0); - else + if (rt->rt_flags & RTF_DYNAMIC) { + /* + * Prevent another thread from modifying rt_key, + * rt_gateway via rt_setgate() after the rt_lock + * is dropped by marking the route as defunct. + */ + rt->rt_flags |= RTF_CONDEMNED; + RT_UNLOCK(rt); + (void) rtrequest(RTM_DELETE, rt_key(rt), + rt->rt_gateway, rt_mask(rt), rt->rt_flags, NULL); + } else { + RT_UNLOCK(rt); + } /* * A new route can be allocated * the next time output is attempted. */ - rtfree_locked(rt); - lck_mtx_unlock(rt_mtx); + rtfree(rt); } } diff --git a/bsd/netinet6/in6_pcb.h b/bsd/netinet6/in6_pcb.h index 2bae22c14..58476cc5e 100644 --- a/bsd/netinet6/in6_pcb.h +++ b/bsd/netinet6/in6_pcb.h @@ -1,3 +1,30 @@ +/* + * Copyright (c) 2008 Apple Inc. All rights reserved. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. The rights granted to you under the License + * may not be used to create, or enable the creation or redistribution of, + * unlawful or unlicensed copies of an Apple operating system, or to + * circumvent, violate, or enable the circumvention or violation of, any + * terms of an Apple operating system software license agreement. + * + * Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ + */ /* * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project. * All rights reserved. @@ -72,45 +99,33 @@ #define sin6tosa(sin6) ((struct sockaddr *)(sin6)) #define ifatoia6(ifa) ((struct in6_ifaddr *)(ifa)) -#ifndef APPLE -//void in6_pcbpurgeif0(struct in6pcb *, struct ifnet *); -#endif -void in6_losing(struct inpcb *); -int in6_pcballoc(struct socket *, struct inpcbinfo *, struct proc *); -int in6_pcbbind(struct inpcb *, struct sockaddr *, struct proc *); -int in6_pcbconnect(struct inpcb *, struct sockaddr *, struct proc *); -void in6_pcbdetach(struct inpcb *); -void in6_pcbdisconnect(struct inpcb *); -int in6_pcbladdr(struct inpcb *, struct sockaddr *, - struct in6_addr *); -struct inpcb * - in6_pcblookup_local(struct inpcbinfo *, - struct in6_addr *, u_int, int); -struct inpcb * - in6_pcblookup_hash(struct inpcbinfo *, - struct in6_addr *, u_int, struct in6_addr *, - u_int, int, struct ifnet *); -void in6_pcbnotify(struct inpcbinfo *, struct sockaddr *, - u_int, const struct sockaddr *, u_int, int, - void (*)(struct inpcb *, int)); -void - in6_rtchange(struct inpcb *, int); -struct sockaddr * - in6_sockaddr(in_port_t port, struct in6_addr *addr_p); -struct sockaddr * - in6_v4mapsin6_sockaddr(in_port_t port, struct in_addr *addr_p); -int in6_setpeeraddr(struct socket *so, struct sockaddr **nam); -int in6_setsockaddr(struct socket *so, struct sockaddr **nam); -int in6_mapped_sockaddr(struct socket *so, struct sockaddr **nam); -int in6_mapped_peeraddr(struct socket *so, struct sockaddr **nam); -struct in6_addr *in6_selectsrc(struct sockaddr_in6 *, - struct ip6_pktopts *, - struct ip6_moptions *, - struct route_in6 *, struct in6_addr *, - struct in6_addr *, int *); -int in6_selecthlim(struct in6pcb *, struct ifnet *); -int in6_pcbsetport(struct in6_addr *, struct inpcb *, struct proc *, int); -void init_sin6(struct sockaddr_in6 *sin6, struct mbuf *m); -#endif KERNEL_PRIVATE +extern void in6_losing(struct inpcb *); +extern int in6_pcbbind(struct inpcb *, struct sockaddr *, struct proc *); +extern int in6_pcbconnect(struct inpcb *, struct sockaddr *, struct proc *); +extern void in6_pcbdetach(struct inpcb *); +extern void in6_pcbdisconnect(struct inpcb *); +extern int in6_pcbladdr(struct inpcb *, struct sockaddr *, struct in6_addr *); +extern struct inpcb *in6_pcblookup_local(struct inpcbinfo *, struct in6_addr *, + u_int, int); +extern struct inpcb *in6_pcblookup_hash(struct inpcbinfo *, struct in6_addr *, + u_int, struct in6_addr *, u_int, int, struct ifnet *); +extern void in6_pcbnotify(struct inpcbinfo *, struct sockaddr *, u_int, + const struct sockaddr *, u_int, int, void (*)(struct inpcb *, int)); +extern void in6_rtchange(struct inpcb *, int); +extern struct sockaddr *in6_sockaddr(in_port_t port, struct in6_addr *addr_p); +extern struct sockaddr *in6_v4mapsin6_sockaddr(in_port_t port, + struct in_addr *addr_p); +extern int in6_setpeeraddr(struct socket *so, struct sockaddr **nam); +extern int in6_setsockaddr(struct socket *so, struct sockaddr **nam); +extern int in6_mapped_sockaddr(struct socket *so, struct sockaddr **nam); +extern int in6_mapped_peeraddr(struct socket *so, struct sockaddr **nam); +extern struct in6_addr *in6_selectsrc(struct sockaddr_in6 *, + struct ip6_pktopts *, struct ip6_moptions *, struct route_in6 *, + struct in6_addr *, struct in6_addr *, int *); +extern int in6_selecthlim(struct in6pcb *, struct ifnet *); +extern int in6_pcbsetport(struct in6_addr *, struct inpcb *, + struct proc *, int); +extern void init_sin6(struct sockaddr_in6 *sin6, struct mbuf *m); +#endif /* KERNEL_PRIVATE */ -#endif !_NETINET6_IN6_PCB_H_ +#endif /* !_NETINET6_IN6_PCB_H_ */ diff --git a/bsd/netinet6/in6_prefix.c b/bsd/netinet6/in6_prefix.c index 2ff2db707..891917965 100644 --- a/bsd/netinet6/in6_prefix.c +++ b/bsd/netinet6/in6_prefix.c @@ -1,3 +1,31 @@ +/* + * Copyright (c) 2008 Apple Inc. All rights reserved. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. The rights granted to you under the License + * may not be used to create, or enable the creation or redistribution of, + * unlawful or unlicensed copies of an Apple operating system, or to + * circumvent, violate, or enable the circumvention or violation of, any + * terms of an Apple operating system software license agreement. + * + * Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ + */ + /* $KAME: in6_prefix.c,v 1.27 2000/03/29 23:13:13 itojun Exp $ */ /* @@ -173,6 +201,7 @@ in6_prefixwithifp(struct ifnet *ifp, int plen, struct in6_addr *dst) return (ifpr); } +#if 0 /* * Search prefix which matches arg prefix as specified in * draft-ietf-ipngwg-router-renum-08.txt @@ -239,7 +268,7 @@ search_matched_prefix(struct ifnet *ifp, struct in6_prefixreq *ipr) * Return 1 if anything matched, and 0 if nothing matched. */ static int -mark_matched_prefixes(u_long cmd, struct ifnet *ifp, struct in6_rrenumreq *irr) +mark_matched_prefixes(u_int32_t cmd, struct ifnet *ifp, struct in6_rrenumreq *irr) { struct ifprefix *ifpr; struct ifaddr *ifa; @@ -340,6 +369,7 @@ unmark_prefixes(struct ifnet *ifp) } ifnet_lock_done(ifp); } +#endif static void init_prefix_ltimes(struct rr_prefix *rpp) @@ -602,7 +632,8 @@ add_each_addr(struct socket *so, struct rr_prefix *rpp, struct rp_addr *rap) { struct in6_ifaddr *ia6; struct in6_aliasreq ifra; - int error; + struct proc *p = current_proc(); + int error, p64 = proc_is64bit(p); /* init ifra */ bzero(&ifra, sizeof(ifra)); @@ -638,16 +669,16 @@ add_each_addr(struct socket *so, struct rr_prefix *rpp, struct rp_addr *rap) /* link this addr and the prefix each other */ if (rap->ra_addr) ifafree(&rap->ra_addr->ia_ifa); + /* Reference held in in6ifa_ifpwithaddr() */ rap->ra_addr = ia6; - ifaref(&rap->ra_addr->ia_ifa); ia6->ia6_ifpr = rp2ifpr(rpp); return; } if (ia6->ia6_ifpr == rp2ifpr(rpp)) { if (rap->ra_addr) ifafree(&rap->ra_addr->ia_ifa); + /* Reference held in in6ifa_ifpwithaddr() */ rap->ra_addr = ia6; - ifaref(&rap->ra_addr->ia_ifa); return; } /* @@ -666,13 +697,43 @@ add_each_addr(struct socket *so, struct rr_prefix *rpp, struct rp_addr *rap) ip6_sprintf(&ifra.ifra_addr.sin6_addr), rpp->rp_plen, ip6_sprintf(IA6_IN6(ia6)), in6_mask2len(&ia6->ia_prefixmask.sin6_addr, NULL)); + ifafree(&ia6->ia_ifa); return; } /* propagate ANYCAST flag if it is set for ancestor addr */ if (rap->ra_flags.anycast != 0) ifra.ifra_flags |= IN6_IFF_ANYCAST; - error = in6_control(so, SIOCAIFADDR_IN6, (caddr_t)&ifra, rpp->rp_ifp, - current_proc()); + + if (!p64) { +#if defined(__LP64__) + struct in6_aliasreq_32 ifra_32; + /* + * Use 32-bit ioctl and structure for 32-bit process. + */ + in6_aliasreq_64_to_32((struct in6_aliasreq_64 *)&ifra, + &ifra_32); + error = in6_control(so, SIOCAIFADDR_IN6_32, (caddr_t)&ifra_32, + rpp->rp_ifp, p); +#else + error = in6_control(so, SIOCAIFADDR_IN6, (caddr_t)&ifra, + rpp->rp_ifp, p); +#endif /* __LP64__ */ + } else { +#if defined(__LP64__) + error = in6_control(so, SIOCAIFADDR_IN6, (caddr_t)&ifra, + rpp->rp_ifp, p); +#else + struct in6_aliasreq_64 ifra_64; + /* + * Use 32-bit ioctl and structure for 32-bit process. + */ + in6_aliasreq_32_to_64((struct in6_aliasreq_32 *)&ifra, + &ifra_64); + error = in6_control(so, SIOCAIFADDR_IN6_64, (caddr_t)&ifra_64, + rpp->rp_ifp, p); +#endif /* __LP64__ */ + } + if (error != 0) { log(LOG_ERR, "in6_prefix.c: add_each_addr: addition of an addr" "%s/%d failed because in6_control failed for error %d\n", @@ -867,7 +928,7 @@ create_ra_entry(struct rp_addr **rapp) *rapp = (struct rp_addr *)_MALLOC(sizeof(struct rp_addr), M_RR_ADDR, M_NOWAIT); if (*rapp == NULL) { - log(LOG_ERR, "in6_prefix.c: init_newprefix:%d: ENOBUFS" + log(LOG_ERR, "in6_prefix.c:%d: ENOBUFS" "for rp_addr\n", __LINE__); return ENOBUFS; } @@ -876,6 +937,7 @@ create_ra_entry(struct rp_addr **rapp) return 0; } +#if 0 static int init_newprefix(struct in6_rrenumreq *irr, struct ifprefix *ifpr, struct rr_prefix *rpp) @@ -927,6 +989,7 @@ init_newprefix(struct in6_rrenumreq *irr, struct ifprefix *ifpr, return 0; } +#endif static void free_rp_entries(struct rr_prefix *rpp) @@ -949,6 +1012,7 @@ free_rp_entries(struct rr_prefix *rpp) lck_mtx_unlock(prefix6_mutex); } +#if 0 static int add_useprefixes(struct socket *so, struct ifnet *ifp, struct in6_rrenumreq *irr) @@ -977,6 +1041,7 @@ add_useprefixes(struct socket *so, struct ifnet *ifp, return error; } +#endif static void unprefer_prefix(struct rr_prefix *rpp) @@ -1031,6 +1096,7 @@ delete_each_prefix(struct rr_prefix *rpp, u_char origin) return error; } +#if 0 static void delete_prefixes(struct ifnet *ifp, u_char origin) { @@ -1049,6 +1115,7 @@ delete_prefixes(struct ifnet *ifp, u_char origin) } ifnet_lock_done(ifp); } +#endif static int link_stray_ia6s(struct rr_prefix *rpp) @@ -1088,6 +1155,7 @@ link_stray_ia6s(struct rr_prefix *rpp) return 0; } +#if 0 /* XXX assumes that permission is already checked by the caller */ int in6_prefix_ioctl(struct socket *so, u_long cmd, caddr_t data, @@ -1118,7 +1186,7 @@ in6_prefix_ioctl(struct socket *so, u_long cmd, caddr_t data, log(LOG_NOTICE, "in6_prefix_ioctl: preferred lifetime" "(%ld) is greater than valid lifetime(%ld)\n", - (u_long)irr->irr_pltime, (u_long)irr->irr_vltime); + (u_int32_t)irr->irr_pltime, (u_int32_t)irr->irr_vltime); error = EINVAL; break; } @@ -1153,7 +1221,7 @@ in6_prefix_ioctl(struct socket *so, u_long cmd, caddr_t data, log(LOG_NOTICE, "in6_prefix_ioctl: preferred lifetime" "(%ld) is greater than valid lifetime(%ld)\n", - (u_long)ipr->ipr_pltime, (u_long)ipr->ipr_vltime); + (u_int32_t)ipr->ipr_pltime, (u_int32_t)ipr->ipr_vltime); error = EINVAL; break; } @@ -1222,6 +1290,7 @@ in6_prefix_ioctl(struct socket *so, u_long cmd, caddr_t data, bad: return error; } +#endif void in6_rr_timer(__unused void *ignored_arg) diff --git a/bsd/netinet6/in6_prefix.h b/bsd/netinet6/in6_prefix.h index d235a069b..f69562ae4 100644 --- a/bsd/netinet6/in6_prefix.h +++ b/bsd/netinet6/in6_prefix.h @@ -88,4 +88,4 @@ extern struct rr_prhead rr_prefix; void in6_rr_timer(void *); int delete_each_prefix (struct rr_prefix *rpp, u_char origin); -#endif KERNEL_PRIVATE +#endif /* KERNEL_PRIVATE */ diff --git a/bsd/netinet6/in6_proto.c b/bsd/netinet6/in6_proto.c index a937bbf35..a197c6c6b 100644 --- a/bsd/netinet6/in6_proto.c +++ b/bsd/netinet6/in6_proto.c @@ -1,3 +1,31 @@ +/* + * Copyright (c) 2008 Apple Inc. All rights reserved. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. The rights granted to you under the License + * may not be used to create, or enable the creation or redistribution of, + * unlawful or unlicensed copies of an Apple operating system, or to + * circumvent, violate, or enable the circumvention or violation of, any + * terms of an Apple operating system software license agreement. + * + * Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ + */ + /* $FreeBSD: src/sys/netinet6/in6_proto.c,v 1.19 2002/10/16 02:25:05 sam Exp $ */ /* $KAME: in6_proto.c,v 1.91 2001/05/27 13:28:35 itojun Exp $ */ @@ -378,10 +406,10 @@ int ip6_rr_prune = 5; /* router renumbering prefix * walk list every 5 sec. */ int ip6_v6only = 0; /* Mapped addresses on by default - Radar 3347718 */ -int ip6_neighborgcthresh = 2048; /* Threshold # of NDP entries for GC */ +int ip6_neighborgcthresh = 1024; /* Threshold # of NDP entries for GC */ int ip6_maxifprefixes = 16; /* Max acceptable prefixes via RA per IF */ int ip6_maxifdefrouters = 16; /* Max acceptable def routers via RA */ -int ip6_maxdynroutes = 4096; /* Max # of routes created via redirect */ +int ip6_maxdynroutes = 1024; /* Max # of routes created via redirect */ u_int32_t ip6_id = 0UL; int ip6_keepfaith = 0; @@ -403,13 +431,13 @@ int pmtu_probe = 60*2; #define RIPV6SNDQ 8192 #define RIPV6RCVQ 8192 -u_long rip6_sendspace = RIPV6SNDQ; -u_long rip6_recvspace = RIPV6RCVQ; +u_int32_t rip6_sendspace = RIPV6SNDQ; +u_int32_t rip6_recvspace = RIPV6RCVQ; /* ICMPV6 parameters */ int icmp6_rediraccept = 1; /* accept and process redirects */ int icmp6_redirtimeout = 10 * 60; /* 10 minutes */ -int icmp6errppslim = 100; /* 100pps */ +int icmp6errppslim = 500; /* 500 packets per second */ int icmp6_nodeinfo = 3; /* enable/disable NI response */ /* UDP on IP6 parameters */ @@ -436,6 +464,7 @@ SYSCTL_NODE(_net_inet6, IPPROTO_ESP, ipsec6, CTLFLAG_RW|CTLFLAG_LOCKED, 0, "IPSE static int sysctl_ip6_temppltime SYSCTL_HANDLER_ARGS { +#pragma unused(oidp, arg2) int error = 0; int old; @@ -455,6 +484,7 @@ sysctl_ip6_temppltime SYSCTL_HANDLER_ARGS static int sysctl_ip6_tempvltime SYSCTL_HANDLER_ARGS { +#pragma unused(oidp, arg2) int error = 0; int old; @@ -499,7 +529,7 @@ SYSCTL_INT(_net_inet6_ip6, IPV6CTL_DEFMCASTHLIM, SYSCTL_INT(_net_inet6_ip6, IPV6CTL_GIF_HLIM, gifhlim, CTLFLAG_RW, &ip6_gif_hlim, 0, ""); SYSCTL_STRING(_net_inet6_ip6, IPV6CTL_KAME_VERSION, - kame_version, CTLFLAG_RD, __KAME_VERSION, 0, ""); + kame_version, CTLFLAG_RD, (void *)((uintptr_t)(__KAME_VERSION)), 0, ""); SYSCTL_INT(_net_inet6_ip6, IPV6CTL_USE_DEPRECATED, use_deprecated, CTLFLAG_RW, &ip6_use_deprecated, 0, ""); SYSCTL_INT(_net_inet6_ip6, IPV6CTL_RR_PRUNE, diff --git a/bsd/netinet6/in6_rmx.c b/bsd/netinet6/in6_rmx.c index 178dd14d1..34f2fd320 100644 --- a/bsd/netinet6/in6_rmx.c +++ b/bsd/netinet6/in6_rmx.c @@ -138,6 +138,12 @@ static struct radix_node *in6_matroute_args(void *, struct radix_node_head *, #define RTPRF_OURS RTF_PROTO3 /* set on routes we manage */ +/* + * Accessed by in6_addroute(), in6_deleteroute() and in6_rtqkill(), during + * which the routing lock (rnh_lock) is held and thus protects the variable. + */ +static int in6dynroutes; + /* * Do what we need to do when inserting a route. */ @@ -149,6 +155,21 @@ in6_addroute(void *v_arg, void *n_arg, struct radix_node_head *head, struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)rt_key(rt); struct radix_node *ret; + lck_mtx_assert(rnh_lock, LCK_MTX_ASSERT_OWNED); + RT_LOCK_ASSERT_HELD(rt); + + /* + * If this is a dynamic route (which is created via Redirect) and + * we already have the maximum acceptable number of such route entries, + * reject creating a new one. We could initiate garbage collection to + * make available space right now, but the benefit would probably not + * be worth the cleaning overhead; we only have to endure a slightly + * suboptimal path even without the redirecbted route. + */ + if ((rt->rt_flags & RTF_DYNAMIC) != 0 && + ip6_maxdynroutes >= 0 && in6dynroutes >= ip6_maxdynroutes) + return (NULL); + /* * For IPv6, all unicast non-host routes are automatically cloning. */ @@ -186,7 +207,7 @@ in6_addroute(void *v_arg, void *n_arg, struct radix_node_head *head, rt->rt_rmx.rmx_mtu = rt->rt_ifp->if_mtu; ret = rn_addroute(v_arg, n_arg, head, treenodes); - if (ret == NULL && rt->rt_flags & RTF_HOST) { + if (ret == NULL && (rt->rt_flags & RTF_HOST)) { struct rtentry *rt2; /* * We are trying to add a host route, but can't. @@ -196,20 +217,29 @@ in6_addroute(void *v_arg, void *n_arg, struct radix_node_head *head, rt2 = rtalloc1_locked((struct sockaddr *)sin6, 0, RTF_CLONING | RTF_PRCLONING); if (rt2) { - if (rt2->rt_flags & RTF_LLINFO && - rt2->rt_flags & RTF_HOST && - rt2->rt_gateway && - rt2->rt_gateway->sa_family == AF_LINK) { - rtrequest_locked(RTM_DELETE, - (struct sockaddr *)rt_key(rt2), - rt2->rt_gateway, - rt_mask(rt2), rt2->rt_flags, 0); + RT_LOCK(rt2); + if ((rt2->rt_flags & RTF_LLINFO) && + (rt2->rt_flags & RTF_HOST) && + rt2->rt_gateway != NULL && + rt2->rt_gateway->sa_family == AF_LINK) { + /* + * Safe to drop rt_lock and use rt_key, + * rt_gateway, since holding rnh_lock here + * prevents another thread from calling + * rt_setgate() on this route. + */ + RT_UNLOCK(rt2); + (void) rtrequest_locked(RTM_DELETE, rt_key(rt2), + rt2->rt_gateway, rt_mask(rt2), + rt2->rt_flags, 0); ret = rn_addroute(v_arg, n_arg, head, treenodes); + } else { + RT_UNLOCK(rt2); } rtfree_locked(rt2); } - } else if (ret == NULL && rt->rt_flags & RTF_CLONING) { + } else if (ret == NULL && (rt->rt_flags & RTF_CLONING)) { struct rtentry *rt2; /* * We are trying to add a net route, but can't. @@ -226,6 +256,7 @@ in6_addroute(void *v_arg, void *n_arg, struct radix_node_head *head, rt2 = rtalloc1_locked((struct sockaddr *)sin6, 0, RTF_CLONING | RTF_PRCLONING); if (rt2) { + RT_LOCK(rt2); if ((rt2->rt_flags & (RTF_CLONING|RTF_HOST|RTF_GATEWAY)) == RTF_CLONING && rt2->rt_gateway @@ -233,12 +264,36 @@ in6_addroute(void *v_arg, void *n_arg, struct radix_node_head *head, && rt2->rt_ifp == rt->rt_ifp) { ret = rt2->rt_nodes; } + RT_UNLOCK(rt2); rtfree_locked(rt2); } } + + if (ret != NULL && (rt->rt_flags & RTF_DYNAMIC) != 0) + in6dynroutes++; + return ret; } +static struct radix_node * +in6_deleteroute(void * v_arg, void *netmask_arg, struct radix_node_head *head) +{ + struct radix_node *rn; + + lck_mtx_assert(rnh_lock, LCK_MTX_ASSERT_OWNED); + + rn = rn_delete(v_arg, netmask_arg, head); + if (rn != NULL) { + struct rtentry *rt = (struct rtentry *)rn; + RT_LOCK_SPIN(rt); + if ((rt->rt_flags & RTF_DYNAMIC) != 0) + in6dynroutes--; + RT_UNLOCK(rt); + } + + return (rn); +} + /* * Similar to in6_matroute_args except without the leaf-matching parameters. */ @@ -260,11 +315,14 @@ in6_matroute_args(void *v_arg, struct radix_node_head *head, struct radix_node *rn = rn_match_args(v_arg, head, f, w); struct rtentry *rt = (struct rtentry *)rn; - if (rt && rt->rt_refcnt == 0) { /* this is first reference */ - if (rt->rt_flags & RTPRF_OURS) { + /* This is first reference? */ + if (rt != NULL) { + RT_LOCK_SPIN(rt); + if (rt->rt_refcnt == 0 && (rt->rt_flags & RTPRF_OURS)) { rt->rt_flags &= ~RTPRF_OURS; rt->rt_rmx.rmx_expire = 0; } + RT_UNLOCK(rt); } return (rn); } @@ -275,17 +333,17 @@ static int rtq_reallyold = 60*60; /* one hour is ``really old'' */ SYSCTL_INT(_net_inet6_ip6, IPV6CTL_RTEXPIRE, rtexpire, CTLFLAG_RW, &rtq_reallyold , 0, ""); - + static int rtq_minreallyold = 10; /* never automatically crank down to less */ SYSCTL_INT(_net_inet6_ip6, IPV6CTL_RTMINEXPIRE, rtminexpire, CTLFLAG_RW, &rtq_minreallyold , 0, ""); - + static int rtq_toomany = 128; /* 128 cached routes is ``too many'' */ SYSCTL_INT(_net_inet6_ip6, IPV6CTL_RTMAXCACHE, rtmaxcache, CTLFLAG_RW, &rtq_toomany , 0, ""); - + /* * On last reference drop, mark the route as belong to us so that it can be @@ -296,13 +354,19 @@ in6_clsroute(struct radix_node *rn, __unused struct radix_node_head *head) { struct rtentry *rt = (struct rtentry *)rn; + lck_mtx_assert(rnh_lock, LCK_MTX_ASSERT_OWNED); + RT_LOCK_ASSERT_HELD(rt); + if (!(rt->rt_flags & RTF_UP)) return; /* prophylactic measures */ if ((rt->rt_flags & (RTF_LLINFO | RTF_HOST)) != RTF_HOST) return; - if ((rt->rt_flags & (RTF_WASCLONED | RTPRF_OURS)) != RTF_WASCLONED) + if (rt->rt_flags & RTPRF_OURS) + return; + + if (!(rt->rt_flags & (RTF_WASCLONED | RTF_DYNAMIC))) return; /* @@ -316,11 +380,18 @@ in6_clsroute(struct radix_node *rn, __unused struct radix_node_head *head) * called when the route's reference count is 0, don't * deallocate it until we return from this routine by * telling rtrequest that we're interested in it. + * Safe to drop rt_lock and use rt_key, rt_gateway, + * since holding rnh_lock here prevents another thread + * from calling rt_setgate() on this route. */ - if (rtrequest_locked(RTM_DELETE, (struct sockaddr *)rt_key(rt), + RT_UNLOCK(rt); + if (rtrequest_locked(RTM_DELETE, rt_key(rt), rt->rt_gateway, rt_mask(rt), rt->rt_flags, &rt) == 0) { /* Now let the caller free it */ - rtunref(rt); + RT_LOCK(rt); + RT_REMREF_LOCKED(rt); + } else { + RT_LOCK(rt); } } else { struct timeval timenow; @@ -343,8 +414,10 @@ struct rtqk_arg { /* * Get rid of old routes. When draining, this deletes everything, even when - * the timeout is not expired yet. When updating, this makes sure that - * nothing has a timeout longer than the current value of rtq_reallyold. + * the timeout is not expired yet. This also applies if the route is dynamic + * and there are sufficiently large number of such routes (more than a half of + * maximum). When updating, this makes sure that nothing has a timeout longer + * than the current value of rtq_reallyold. */ static int in6_rtqkill(struct radix_node *rn, void *rock) @@ -355,19 +428,31 @@ in6_rtqkill(struct radix_node *rn, void *rock) struct timeval timenow; getmicrotime(&timenow); - lck_mtx_assert(rt_mtx, LCK_MTX_ASSERT_OWNED); + lck_mtx_assert(rnh_lock, LCK_MTX_ASSERT_OWNED); + RT_LOCK(rt); if (rt->rt_flags & RTPRF_OURS) { ap->found++; - if (ap->draining || rt->rt_rmx.rmx_expire <= timenow.tv_sec) { + if (ap->draining || rt->rt_rmx.rmx_expire <= timenow.tv_sec || + ((rt->rt_flags & RTF_DYNAMIC) != 0 && + ip6_maxdynroutes >= 0 && + in6dynroutes > ip6_maxdynroutes / 2)) { if (rt->rt_refcnt > 0) panic("rtqkill route really not free"); - err = rtrequest_locked(RTM_DELETE, - (struct sockaddr *)rt_key(rt), - rt->rt_gateway, rt_mask(rt), - rt->rt_flags, 0); + /* + * Delete this route since we're done with it; + * the route may be freed afterwards, so we + * can no longer refer to 'rt' upon returning + * from rtrequest(). Safe to drop rt_lock and + * use rt_key, rt_gateway, since holding rnh_lock + * here prevents another thread from calling + * rt_setgate() on this route. + */ + RT_UNLOCK(rt); + err = rtrequest_locked(RTM_DELETE, rt_key(rt), + rt->rt_gateway, rt_mask(rt), rt->rt_flags, 0); if (err) { log(LOG_WARNING, "in6_rtqkill: error %d", err); } else { @@ -382,7 +467,10 @@ in6_rtqkill(struct radix_node *rn, void *rock) } ap->nextstop = lmin(ap->nextstop, rt->rt_rmx.rmx_expire); + RT_UNLOCK(rt); } + } else { + RT_UNLOCK(rt); } return 0; @@ -400,7 +488,7 @@ in6_rtqtimo(void *rock) static time_t last_adjusted_timeout = 0; struct timeval timenow; - lck_mtx_lock(rt_mtx); + lck_mtx_lock(rnh_lock); /* Get the timestamp after we acquire the lock for better accuracy */ getmicrotime(&timenow); @@ -438,7 +526,7 @@ in6_rtqtimo(void *rock) atv.tv_usec = 0; atv.tv_sec = arg.nextstop - timenow.tv_sec; - lck_mtx_unlock(rt_mtx); + lck_mtx_unlock(rnh_lock); timeout(in6_rtqtimo, rock, tvtohz(&atv)); } @@ -463,6 +551,7 @@ in6_mtuexpire(struct radix_node *rn, void *rock) if (!rt) panic("rt == NULL in in6_mtuexpire"); + RT_LOCK(rt); if (rt->rt_rmx.rmx_expire && !(rt->rt_flags & RTF_PROBEMTU)) { if (rt->rt_rmx.rmx_expire <= timenow.tv_sec) { rt->rt_flags |= RTF_PROBEMTU; @@ -471,6 +560,7 @@ in6_mtuexpire(struct radix_node *rn, void *rock) rt->rt_rmx.rmx_expire); } } + RT_UNLOCK(rt); return 0; } @@ -489,7 +579,7 @@ in6_mtutimo(void *rock) arg.rnh = rnh; arg.nextstop = timenow.tv_sec + MTUTIMO_DEFAULT; - lck_mtx_lock(rt_mtx); + lck_mtx_lock(rnh_lock); rnh->rnh_walktree(rnh, in6_mtuexpire, &arg); atv.tv_usec = 0; @@ -501,7 +591,7 @@ in6_mtutimo(void *rock) arg.nextstop = timenow.tv_sec + 30; /*last resort*/ } atv.tv_sec -= timenow.tv_sec; - lck_mtx_unlock(rt_mtx); + lck_mtx_unlock(rnh_lock); timeout(in6_mtutimo, rock, tvtohz(&atv)); } @@ -539,6 +629,7 @@ in6_inithead(void **head, int off) rnh = *head; rnh->rnh_addaddr = in6_addroute; + rnh->rnh_deladdr = in6_deleteroute; rnh->rnh_matchaddr = in6_matroute; rnh->rnh_matchaddr_args = in6_matroute_args; rnh->rnh_close = in6_clsroute; diff --git a/bsd/netinet6/in6_src.c b/bsd/netinet6/in6_src.c index 6a6782cc5..71441847b 100644 --- a/bsd/netinet6/in6_src.c +++ b/bsd/netinet6/in6_src.c @@ -1,3 +1,31 @@ +/* + * Copyright (c) 2008 Apple Inc. All rights reserved. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. The rights granted to you under the License + * may not be used to create, or enable the creation or redistribution of, + * unlawful or unlicensed copies of an Apple operating system, or to + * circumvent, violate, or enable the circumvention or violation of, any + * terms of an Apple operating system software license agreement. + * + * Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ + */ + /* $FreeBSD: src/sys/netinet6/in6_src.c,v 1.1.2.2 2001/07/03 11:01:52 ume Exp $ */ /* $KAME: in6_src.c,v 1.37 2001/03/29 05:34:31 itojun Exp $ */ @@ -143,14 +171,25 @@ in6_selectsrc( * the interface. */ if (pi && pi->ipi6_ifindex) { + ifnet_t out_ifp = NULL; + ifnet_head_lock_shared(); + if (pi->ipi6_ifindex > if_index) { + ifnet_head_done(); + *errorp = EADDRNOTAVAIL; + return(0); + } else { + out_ifp = ifindex2ifnet[pi->ipi6_ifindex]; + } + ifnet_head_done(); + /* XXX boundary check is assumed to be already done. */ - ia6 = in6_ifawithscope(ifindex2ifnet[pi->ipi6_ifindex], - dst); + ia6 = in6_ifawithscope(out_ifp, dst); if (ia6 == 0) { *errorp = EADDRNOTAVAIL; return(0); } *src_storage = satosin6(&ia6->ia_addr)->sin6_addr; + ifafree(&ia6->ia_ifa); return src_storage; } @@ -168,19 +207,27 @@ in6_selectsrc( /* * I'm not sure if boundary check for scope_id is done * somewhere... + * + * Since sin6_scope_id is unsigned, we only need to check against if_index. */ - if (dstsock->sin6_scope_id < 0 || - if_index < dstsock->sin6_scope_id) { + ifnet_t out_ifp = NULL; + ifnet_head_lock_shared(); + if (if_index < dstsock->sin6_scope_id) { *errorp = ENXIO; /* XXX: better error? */ + ifnet_head_done(); return(0); + } else { + out_ifp = ifindex2ifnet[dstsock->sin6_scope_id]; } - ia6 = in6_ifawithscope(ifindex2ifnet[dstsock->sin6_scope_id], - dst); + ifnet_head_done(); + + ia6 = in6_ifawithscope(out_ifp, dst); if (ia6 == 0) { *errorp = EADDRNOTAVAIL; return(0); } *src_storage = satosin6(&ia6->ia_addr)->sin6_addr; + ifafree(&ia6->ia_ifa); return src_storage; } @@ -207,6 +254,7 @@ in6_selectsrc( return(0); } *src_storage = satosin6(&ia6->ia_addr)->sin6_addr; + ifafree(&ia6->ia_ifa); return src_storage; } } @@ -223,16 +271,27 @@ in6_selectsrc( if (opts && opts->ip6po_nexthop) { sin6_next = satosin6(opts->ip6po_nexthop); rt = nd6_lookup(&sin6_next->sin6_addr, 1, NULL, 0); - if (rt) { + if (rt != NULL) { + RT_LOCK_ASSERT_HELD(rt); ia6 = in6_ifawithscope(rt->rt_ifp, dst); - if (ia6 == 0) + if (ia6 == 0) { ia6 = ifatoia6(rt->rt_ifa); + if (ia6 != NULL) + ifaref(&ia6->ia_ifa); + } } if (ia6 == 0) { *errorp = EADDRNOTAVAIL; + if (rt != NULL) { + RT_REMREF_LOCKED(rt); + RT_UNLOCK(rt); + } return(0); } *src_storage = satosin6(&ia6->ia_addr)->sin6_addr; + ifafree(&ia6->ia_ifa); + RT_REMREF_LOCKED(rt); + RT_UNLOCK(rt); return src_storage; } } @@ -242,19 +301,23 @@ in6_selectsrc( * our src addr is taken from the i/f, else punt. */ if (ro) { - lck_mtx_lock(rt_mtx); - if (ro->ro_rt && + if (ro->ro_rt != NULL) + RT_LOCK(ro->ro_rt); + if (ro->ro_rt != NULL && (!(ro->ro_rt->rt_flags & RTF_UP) || - satosin6(&ro->ro_dst)->sin6_family != AF_INET6 || + satosin6(&ro->ro_dst)->sin6_family != AF_INET6 || + ro->ro_rt->generation_id != route_generation || !IN6_ARE_ADDR_EQUAL(&satosin6(&ro->ro_dst)->sin6_addr, dst))) { - rtfree_locked(ro->ro_rt); - ro->ro_rt = (struct rtentry *)0; + RT_UNLOCK(ro->ro_rt); + rtfree(ro->ro_rt); + ro->ro_rt = NULL; } - if (ro->ro_rt == (struct rtentry *)0 || - ro->ro_rt->rt_ifp == 0) { + if (ro->ro_rt == NULL || ro->ro_rt->rt_ifp == NULL) { struct sockaddr_in6 *sa6; + if (ro->ro_rt != NULL) + RT_UNLOCK(ro->ro_rt); /* No route yet, so try to acquire one */ bzero(&ro->ro_dst, sizeof(struct sockaddr_in6)); sa6 = (struct sockaddr_in6 *)&ro->ro_dst; @@ -265,13 +328,14 @@ in6_selectsrc( sa6->sin6_scope_id = dstsock->sin6_scope_id; #endif if (IN6_IS_ADDR_MULTICAST(dst)) { - ro->ro_rt = rtalloc1_locked( - &((struct route *)ro)->ro_dst, 0, 0UL); + ro->ro_rt = rtalloc1( + &((struct route *)ro)->ro_dst, 0, 0); } else { - rtalloc_ign_locked((struct route *)ro, 0UL); + rtalloc_ign((struct route *)ro, 0); } + if (ro->ro_rt != NULL) + RT_LOCK(ro->ro_rt); } - lck_mtx_unlock(rt_mtx); /* * in_pcbconnect() checks out IFF_LOOPBACK to skip using @@ -279,17 +343,15 @@ in6_selectsrc( * It is necessary to ensure the scope even for lo0 * so doesn't check out IFF_LOOPBACK. */ - - if (ro->ro_rt) { + if (ro->ro_rt != NULL) { + RT_LOCK_ASSERT_HELD(ro->ro_rt); ia6 = in6_ifawithscope(ro->ro_rt->rt_ifa->ifa_ifp, dst); if (ia6 == 0) { ia6 = ifatoia6(ro->ro_rt->rt_ifa); if (ia6) ifaref(&ia6->ia_ifa); } - else { - ifaref(&ia6->ia_ifa); - } + RT_UNLOCK(ro->ro_rt); } #if 0 /* @@ -308,6 +370,7 @@ in6_selectsrc( if (ia6 == 0) return(0); *src_storage = satosin6(&ia6->ia_addr)->sin6_addr; + ifafree(&ia6->ia_ifa); return src_storage; } #endif /* 0 */ @@ -336,12 +399,19 @@ in6_selecthlim( struct in6pcb *in6p, struct ifnet *ifp) { - if (in6p && in6p->in6p_hops >= 0) + if (in6p && in6p->in6p_hops >= 0) { return(in6p->in6p_hops); - else if (ifp) - return(nd_ifinfo[ifp->if_index].chlim); - else - return(ip6_defhlim); + } else { + lck_rw_lock_shared(nd_if_rwlock); + if (ifp && ifp->if_index < nd_ifinfo_indexlim) { + u_int8_t chlim = nd_ifinfo[ifp->if_index].chlim; + lck_rw_done(nd_if_rwlock); + return (chlim); + } else { + lck_rw_done(nd_if_rwlock); + return(ip6_defhlim); + } + } } /* @@ -378,7 +448,7 @@ in6_pcbsetport( last = ipport_hilastauto; lastport = &pcbinfo->lasthi; } else if (inp->inp_flags & INP_LOWPORT) { - if (p && (error = proc_suser(p))) { + if ((error = proc_suser(p)) != 0) { if (!locked) lck_rw_done(pcbinfo->mtx); return error; @@ -513,6 +583,7 @@ in6_embedscope( * KAME assumption: link id == interface id */ + ifnet_head_lock_shared(); if (in6p && in6p->in6p_outputopts && (pi = in6p->in6p_outputopts->ip6po_pktinfo) && pi->ipi6_ifindex) { @@ -524,13 +595,20 @@ in6_embedscope( ifp = in6p->in6p_moptions->im6o_multicast_ifp; in6->s6_addr16[1] = htons(ifp->if_index); } else if (scopeid) { - /* boundary check */ - if (scopeid < 0 || if_index < scopeid) + /* + * Since scopeid is unsigned, we only have to check it + * against if_index + */ + if (if_index < scopeid) { + ifnet_head_done(); return ENXIO; /* XXX EINVAL? */ + + } ifp = ifindex2ifnet[scopeid]; /*XXX assignment to 16bit from 32bit variable */ in6->s6_addr16[1] = htons(scopeid & 0xffff); } + ifnet_head_done(); if (ifpp) *ifpp = ifp; @@ -572,8 +650,13 @@ in6_recoverscope( */ scopeid = ntohs(sin6->sin6_addr.s6_addr16[1]); if (scopeid) { - /* sanity check */ - if (scopeid < 0 || if_index < scopeid) + /* + * sanity check + * + * Since scopeid is unsigned, we only have to check it + * against if_index + */ + if (if_index < scopeid) return ENXIO; if (ifp && ifp->if_index != scopeid) return ENXIO; diff --git a/bsd/netinet6/in6_var.h b/bsd/netinet6/in6_var.h index b6b232f68..f995b623f 100644 --- a/bsd/netinet6/in6_var.h +++ b/bsd/netinet6/in6_var.h @@ -1,3 +1,31 @@ +/* + * Copyright (c) 2008 Apple Inc. All rights reserved. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. The rights granted to you under the License + * may not be used to create, or enable the creation or redistribution of, + * unlawful or unlicensed copies of an Apple operating system, or to + * circumvent, violate, or enable the circumvention or violation of, any + * terms of an Apple operating system software license agreement. + * + * Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ + */ + /* $FreeBSD: src/sys/netinet6/in6_var.h,v 1.3.2.2 2001/07/03 11:01:52 ume Exp $ */ /* $KAME: in6_var.h,v 1.56 2001/03/29 05:34:31 itojun Exp $ */ @@ -73,13 +101,6 @@ #include #endif -/* - * Interface address, Internet version. One of these structures - * is allocated for each interface with an Internet address. - * The ifaddr structure contains the protocol-independent part - * of the structure and is assumed to be first. - */ - /* * pltime/vltime are just for future reference (required to implements 2 * hour rule for hosts). they should never be modified by nd6_timeout or @@ -95,7 +116,27 @@ struct in6_addrlifetime { u_int32_t ia6t_pltime; /* prefix lifetime */ }; -#ifdef PRIVATE +#if defined(KERNEL_PRIVATE) +struct in6_addrlifetime_32 { + u_int32_t ia6t_expire; + u_int32_t ia6t_preferred; + u_int32_t ia6t_vltime; + u_int32_t ia6t_pltime; +}; + +struct in6_addrlifetime_64 { + time_t ia6t_expire; + time_t ia6t_preferred __attribute__((aligned(8))); + u_int32_t ia6t_vltime __attribute__((aligned(8))); + u_int32_t ia6t_pltime; +}; + +/* + * Interface address, IPv6 version. One of these structures + * is allocated for each interface with an IPv6 address. + * The ifaddr structure contains the protocol-independent part + * of the structure and is assumed to be first. + */ struct in6_ifaddr { struct ifaddr ia_ifa; /* protocol-independent info */ #define ia_ifp ia_ifa.ifa_ifp @@ -115,8 +156,8 @@ struct in6_ifaddr { * (for autoconfigured addresses only) */ }; +#endif /* KERNEL_PRIVATE */ -#endif /* PRIVATE */ /* * IPv6 interface statistics, as defined in RFC2465 Ipv6IfStatsEntry (p12). */ @@ -261,6 +302,26 @@ struct in6_aliasreq { struct in6_addrlifetime ifra_lifetime; }; +#if defined(KERNEL_PRIVATE) +struct in6_aliasreq_32 { + char ifra_name[IFNAMSIZ]; + struct sockaddr_in6 ifra_addr; + struct sockaddr_in6 ifra_dstaddr; + struct sockaddr_in6 ifra_prefixmask; + int ifra_flags; + struct in6_addrlifetime_32 ifra_lifetime; +}; + +struct in6_aliasreq_64 { + char ifra_name[IFNAMSIZ]; + struct sockaddr_in6 ifra_addr; + struct sockaddr_in6 ifra_dstaddr; + struct sockaddr_in6 ifra_prefixmask; + int ifra_flags; + struct in6_addrlifetime_64 ifra_lifetime; +}; +#endif /* KERNEL_PRIVATE */ + /* prefix type macro */ #define IN6_PREFIX_ND 1 #define IN6_PREFIX_RR 2 @@ -357,12 +418,19 @@ struct in6_rrenumreq { #define IFA_DSTIN6(x) (&((struct sockaddr_in6 *)((x)->ifa_dstaddr))->sin6_addr) #define IFPR_IN6(x) (&((struct sockaddr_in6 *)((x)->ifpr_prefix))->sin6_addr) -#endif KERNEL_PRIVATE +#endif /* KERNEL_PRIVATE */ /* * Event data, internet6 style. */ +struct kev_in6_addrlifetime { + u_int32_t ia6t_expire; + u_int32_t ia6t_preferred; + u_int32_t ia6t_vltime; + u_int32_t ia6t_pltime; +}; + struct kev_in6_data { struct net_event_data link_data; struct sockaddr_in6 ia_addr; /* interface address */ @@ -371,10 +439,9 @@ struct kev_in6_data { struct sockaddr_in6 ia_prefixmask; /* prefix mask */ u_int32_t ia_plen; /* prefix length */ u_int32_t ia6_flags; /* address flags from in6_ifaddr */ - struct in6_addrlifetime ia_lifetime; /* address life info */ + struct kev_in6_addrlifetime ia_lifetime; /* address life info */ }; - /* * Define inet6 event subclass and specific inet6 events. */ @@ -390,8 +457,8 @@ struct kev_in6_data { #ifdef KERNEL_PRIVATE /* Utility function used inside netinet6 kernel code for generating events */ -void in6_post_msg(struct ifnet *, u_long, struct in6_ifaddr *); -#endif KERNEL_PRIVATE +void in6_post_msg(struct ifnet *, u_int32_t, struct in6_ifaddr *); +#endif /* KERNEL_PRIVATE */ #define IN6_ARE_MASKED_ADDR_EQUAL(d, a, m) ( \ (((d)->s6_addr32[0] ^ (a)->s6_addr32[0]) & (m)->s6_addr32[0]) == 0 && \ @@ -414,17 +481,37 @@ void in6_post_msg(struct ifnet *, u_long, struct in6_ifaddr *); #define SIOCDIFADDR_IN6 _IOW('i', 25, struct in6_ifreq) #define SIOCAIFADDR_IN6 _IOW('i', 26, struct in6_aliasreq) +#ifdef KERNEL_PRIVATE +#define SIOCAIFADDR_IN6_32 _IOW('i', 26, struct in6_aliasreq_32) +#define SIOCAIFADDR_IN6_64 _IOW('i', 26, struct in6_aliasreq_64) +#endif /* KERNEL_PRIVATE */ #define SIOCSIFPHYADDR_IN6 _IOW('i', 62, struct in6_aliasreq) +#ifdef KERNEL_PRIVATE +#define SIOCSIFPHYADDR_IN6_32 _IOW('i', 62, struct in6_aliasreq_32) +#define SIOCSIFPHYADDR_IN6_64 _IOW('i', 62, struct in6_aliasreq_64) +#endif /* KERNEL_PRIVATE */ #define SIOCGIFPSRCADDR_IN6 _IOWR('i', 63, struct in6_ifreq) #define SIOCGIFPDSTADDR_IN6 _IOWR('i', 64, struct in6_ifreq) #define SIOCGIFAFLAG_IN6 _IOWR('i', 73, struct in6_ifreq) #define SIOCGDRLST_IN6 _IOWR('i', 74, struct in6_drlist) +#ifdef KERNEL_PRIVATE +#define SIOCGDRLST_IN6_32 _IOWR('i', 74, struct in6_drlist_32) +#define SIOCGDRLST_IN6_64 _IOWR('i', 74, struct in6_drlist_64) +#endif /* KERNEL_PRIVATE */ #define SIOCGPRLST_IN6 _IOWR('i', 75, struct in6_prlist) +#ifdef KERNEL_PRIVATE +#define SIOCGPRLST_IN6_32 _IOWR('i', 75, struct in6_prlist_32) +#define SIOCGPRLST_IN6_64 _IOWR('i', 75, struct in6_prlist_64) +#endif /* KERNEL_PRIVATE */ #define OSIOCGIFINFO_IN6 _IOWR('i', 108, struct in6_ondireq) #define SIOCGIFINFO_IN6 _IOWR('i', 76, struct in6_ondireq) #define SIOCSNDFLUSH_IN6 _IOWR('i', 77, struct in6_ifreq) #define SIOCGNBRINFO_IN6 _IOWR('i', 78, struct in6_nbrinfo) +#ifdef KERNEL_PRIVATE +#define SIOCGNBRINFO_IN6_32 _IOWR('i', 78, struct in6_nbrinfo_32) +#define SIOCGNBRINFO_IN6_64 _IOWR('i', 78, struct in6_nbrinfo_64) +#endif /* KERNEL_PRIVATE */ #define SIOCSPFXFLUSH_IN6 _IOWR('i', 79, struct in6_ifreq) #define SIOCSRTRFLUSH_IN6 _IOWR('i', 80, struct in6_ifreq) @@ -435,6 +522,12 @@ void in6_post_msg(struct ifnet *, u_long, struct in6_ifaddr *); #define SIOCSDEFIFACE_IN6 _IOWR('i', 85, struct in6_ndifreq) #define SIOCGDEFIFACE_IN6 _IOWR('i', 86, struct in6_ndifreq) +#ifdef KERNEL_PRIVATE +#define SIOCSDEFIFACE_IN6_32 _IOWR('i', 85, struct in6_ndifreq_32) +#define SIOCSDEFIFACE_IN6_64 _IOWR('i', 85, struct in6_ndifreq_64) +#define SIOCGDEFIFACE_IN6_32 _IOWR('i', 86, struct in6_ndifreq_32) +#define SIOCGDEFIFACE_IN6_64 _IOWR('i', 86, struct in6_ndifreq_64) +#endif /* KERNEL_PRIVATE */ #define SIOCSIFINFO_FLAGS _IOWR('i', 87, struct in6_ndireq) /* XXX */ @@ -455,15 +548,27 @@ void in6_post_msg(struct ifnet *, u_long, struct in6_ifaddr *); struct sioc_sg_req6) /* get s,g pkt cnt */ #define SIOCGETMIFCNT_IN6 _IOWR('u', 107, \ struct sioc_mif_req6) /* get pkt cnt per if */ +#ifdef KERNEL_PRIVATE +#define SIOCGETMIFCNT_IN6_32 _IOWR('u', 107, struct sioc_mif_req6_32) +#define SIOCGETMIFCNT_IN6_64 _IOWR('u', 107, struct sioc_mif_req6_64) +#endif /* KERNEL_PRIVATE */ #ifdef PRIVATE /* * temporary control calls to attach/detach IP to/from an ethernet interface */ #define SIOCPROTOATTACH_IN6 _IOWR('i', 110, struct in6_aliasreq) /* attach proto to interface */ +#ifdef KERNEL_PRIVATE +#define SIOCPROTOATTACH_IN6_32 _IOWR('i', 110, struct in6_aliasreq_32) +#define SIOCPROTOATTACH_IN6_64 _IOWR('i', 110, struct in6_aliasreq_64) +#endif /* KERNEL_PRIVATE */ #define SIOCPROTODETACH_IN6 _IOWR('i', 111, struct in6_ifreq) /* detach proto from interface */ #define SIOCLL_START _IOWR('i', 130, struct in6_aliasreq) /* start aquiring linklocal on interface */ +#ifdef KERNEL_PRIVATE +#define SIOCLL_START_32 _IOWR('i', 130, struct in6_aliasreq_32) +#define SIOCLL_START_64 _IOWR('i', 130, struct in6_aliasreq_64) +#endif /* KERNEL_PRIVATE */ #define SIOCLL_STOP _IOWR('i', 131, struct in6_ifreq) /* deconfigure linklocal from interface */ #define SIOCAUTOCONF_START _IOWR('i', 132, struct in6_ifreq) /* accept rtadvd on this interface */ #define SIOCAUTOCONF_STOP _IOWR('i', 133, struct in6_ifreq) /* stop accepting rtadv for this interface */ @@ -489,7 +594,7 @@ void in6_post_msg(struct ifnet *, u_long, struct in6_ifaddr *); #ifdef KERNEL #define IN6_ARE_SCOPE_CMP(a,b) ((a)-(b)) #define IN6_ARE_SCOPE_EQUAL(a,b) ((a)==(b)) -#endif +#endif /* KERNEL */ #ifdef KERNEL_PRIVATE extern struct in6_ifaddr *in6_ifaddrs; @@ -500,22 +605,22 @@ extern struct icmp6stat icmp6stat; extern struct icmp6_ifstat **icmp6_ifstat; extern size_t icmp6_ifstatmax; #define in6_ifstat_inc(ifp, tag) \ -do { \ - int _z_index = ifp ? ifp->if_index : 0; \ +do { \ + int _z_index = ifp ? ifp->if_index : 0; \ if ((_z_index) && _z_index <= if_index \ - && _z_index < in6_ifstatmax \ + && _z_index < (signed)in6_ifstatmax \ && in6_ifstat && in6_ifstat[_z_index]) { \ in6_ifstat[_z_index]->tag++; \ - } \ + } \ } while (0) extern struct ifqueue ip6intrq; /* IP6 packet input queue */ extern struct in6_addr zeroin6_addr; extern u_char inet6ctlerrmap[]; -extern unsigned long in6_maxmtu; +extern u_int32_t in6_maxmtu; #ifdef MALLOC_DECLARE MALLOC_DECLARE(M_IPMADDR); -#endif MALLOC_DECLARE +#endif /* MALLOC_DECLARE */ /* * Macro for finding the internet address structure (in6_ifaddr) corresponding @@ -611,45 +716,48 @@ do { \ IN6_NEXT_MULTI((step), (in6m)); \ } while(0) -struct in6_multi *in6_addmulti __P((struct in6_addr *, struct ifnet *, - int *, int)); -void in6_delmulti __P((struct in6_multi *, int)); -extern int in6_ifindex2scopeid __P((int)); -extern int in6_mask2len __P((struct in6_addr *, u_char *)); -extern void in6_len2mask __P((struct in6_addr *, int)); -int in6_control __P((struct socket *, - u_long, caddr_t, struct ifnet *, struct proc *)); -int in6_update_ifa __P((struct ifnet *, struct in6_aliasreq *, - struct in6_ifaddr *)); -void in6_purgeaddr __P((struct ifaddr *, int)); -int in6if_do_dad __P((struct ifnet *)); -void in6_purgeif __P((struct ifnet *)); -void in6_savemkludge __P((struct in6_ifaddr *)); -void in6_setmaxmtu __P((void)); -void in6_restoremkludge __P((struct in6_ifaddr *, struct ifnet *)); -void in6_purgemkludge __P((struct ifnet *)); -struct in6_ifaddr *in6ifa_ifpforlinklocal __P((struct ifnet *, int)); -struct in6_ifaddr *in6ifa_ifpwithaddr __P((struct ifnet *, - struct in6_addr *)); -char *ip6_sprintf __P((const struct in6_addr *)); -int in6_addr2scopeid __P((struct ifnet *, struct in6_addr *)); -int in6_matchlen __P((struct in6_addr *, struct in6_addr *)); -int in6_are_prefix_equal __P((struct in6_addr *p1, struct in6_addr *p2, - int len)); -void in6_prefixlen2mask __P((struct in6_addr *maskp, int len)); -int in6_prefix_ioctl __P((struct socket *so, u_long cmd, caddr_t data, - struct ifnet *ifp)); -int in6_prefix_add_ifid __P((int iilen, struct in6_ifaddr *ia)); -void in6_prefix_remove_ifid __P((int iilen, struct in6_ifaddr *ia)); -void in6_purgeprefix __P((struct ifnet *)); - -int in6_is_addr_deprecated __P((struct sockaddr_in6 *)); +extern struct in6_multi *in6_addmulti(struct in6_addr *, struct ifnet *, + int *, int); +extern void in6_delmulti(struct in6_multi *, int); +extern int in6_ifindex2scopeid(int); +extern int in6_mask2len(struct in6_addr *, u_char *); +extern void in6_len2mask(struct in6_addr *, int); +extern int in6_control(struct socket *, u_long, caddr_t, struct ifnet *, + struct proc *); +extern int in6_update_ifa(struct ifnet *, struct in6_aliasreq *, + struct in6_ifaddr *, int); +extern void in6_purgeaddr(struct ifaddr *, int); +extern int in6if_do_dad(struct ifnet *); +extern void in6_purgeif(struct ifnet *); +extern void in6_savemkludge(struct in6_ifaddr *); +extern void in6_setmaxmtu(void); +extern void in6_restoremkludge(struct in6_ifaddr *, struct ifnet *); +extern void in6_purgemkludge(struct ifnet *); +extern struct in6_ifaddr *in6ifa_ifpforlinklocal(struct ifnet *, int); +extern struct in6_ifaddr *in6ifa_ifpwithaddr(struct ifnet *, struct in6_addr *); +extern char *ip6_sprintf(const struct in6_addr *); +extern int in6_addr2scopeid(struct ifnet *, struct in6_addr *); +extern int in6_matchlen(struct in6_addr *, struct in6_addr *); +extern int in6_are_prefix_equal(struct in6_addr *p1, struct in6_addr *p2, + int len); +extern void in6_prefixlen2mask(struct in6_addr *maskp, int len); +extern int in6_prefix_add_ifid(int iilen, struct in6_ifaddr *ia); +extern void in6_prefix_remove_ifid(int iilen, struct in6_ifaddr *ia); +extern void in6_purgeprefix(struct ifnet *); + +extern int in6_is_addr_deprecated(struct sockaddr_in6 *); + struct inpcb; -int in6_embedscope __P((struct in6_addr *, const struct sockaddr_in6 *, - struct inpcb *, struct ifnet **)); -int in6_recoverscope __P((struct sockaddr_in6 *, const struct in6_addr *, - struct ifnet *)); -void in6_clearscope __P((struct in6_addr *)); -#endif /* KERNEL_PRIVATE */ +extern int in6_embedscope(struct in6_addr *, const struct sockaddr_in6 *, + struct inpcb *, struct ifnet **); +extern int in6_recoverscope(struct sockaddr_in6 *, const struct in6_addr *, + struct ifnet *); +extern void in6_clearscope(struct in6_addr *); +extern void in6_aliasreq_64_to_32(struct in6_aliasreq_64 *, + struct in6_aliasreq_32 *); +extern void in6_aliasreq_32_to_64(struct in6_aliasreq_32 *, + struct in6_aliasreq_64 *); +extern void in6_ifaddr_init(void); +#endif /* KERNEL_PRIVATE */ #endif /* _NETINET6_IN6_VAR_H_ */ diff --git a/bsd/netinet6/ip6_ecn.h b/bsd/netinet6/ip6_ecn.h index 6e926018a..125a95581 100644 --- a/bsd/netinet6/ip6_ecn.h +++ b/bsd/netinet6/ip6_ecn.h @@ -1,3 +1,31 @@ +/* + * Copyright (c) 2008 Apple Inc. All rights reserved. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. The rights granted to you under the License + * may not be used to create, or enable the creation or redistribution of, + * unlawful or unlicensed copies of an Apple operating system, or to + * circumvent, violate, or enable the circumvention or violation of, any + * terms of an Apple operating system software license agreement. + * + * Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ + */ + /* $FreeBSD: src/sys/netinet6/ip6_ecn.h,v 1.2.2.2 2001/07/03 11:01:53 ume Exp $ */ /* $KAME: ip_ecn.h,v 1.5 2000/03/27 04:58:38 sumikawa Exp $ */ @@ -39,4 +67,4 @@ #ifdef KERNEL_PRIVATE extern void ip6_ecn_ingress(int, u_int32_t *, const u_int32_t *); extern void ip6_ecn_egress(int, const u_int32_t *, u_int32_t *); -#endif KERNEL_PRIVATE +#endif /* KERNEL_PRIVATE */ diff --git a/bsd/netinet6/ip6_forward.c b/bsd/netinet6/ip6_forward.c index 9f6f70b5f..c9f7075f4 100644 --- a/bsd/netinet6/ip6_forward.c +++ b/bsd/netinet6/ip6_forward.c @@ -1,3 +1,31 @@ +/* + * Copyright (c) 2008 Apple Inc. All rights reserved. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. The rights granted to you under the License + * may not be used to create, or enable the creation or redistribution of, + * unlawful or unlicensed copies of an Apple operating system, or to + * circumvent, violate, or enable the circumvention or violation of, any + * terms of an Apple operating system software license agreement. + * + * Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ + */ + /* $FreeBSD: src/sys/netinet6/ip6_forward.c,v 1.16 2002/10/16 02:25:05 sam Exp $ */ /* $KAME: ip6_forward.c,v 1.69 2001/05/17 03:48:30 itojun Exp $ */ @@ -73,6 +101,10 @@ extern lck_mtx_t *ip6_mutex; #include +#if PF +#include +#endif /* PF */ + /* * Forward a packet. If some error occurs return the sender * an icmp packet. Note we can't always generate a meaningful @@ -95,7 +127,7 @@ ip6_forward(struct mbuf *m, struct route_in6 *ip6forward_rt, struct rtentry *rt; int error, type = 0, code = 0; struct mbuf *mcopy = NULL; - struct ifnet *origifp; /* maybe unnecessary */ + struct ifnet *ifp, *origifp; /* maybe unnecessary */ #if IPSEC struct secpolicy *sp = NULL; #endif @@ -302,23 +334,46 @@ ip6_forward(struct mbuf *m, struct route_in6 *ip6forward_rt, skip_ipsec: #endif /* IPSEC */ + /* + * If "locked", ip6forward_rt points to the globally defined + * struct route cache which requires ip6_mutex, e.g. when this + * is called from ip6_input(). Else the caller is responsible + * for the struct route and its serialization (if needed), e.g. + * when this is called from ip6_rthdr0(). + */ + if (locked) + lck_mtx_assert(ip6_mutex, LCK_MTX_ASSERT_OWNED); dst = (struct sockaddr_in6 *)&ip6forward_rt->ro_dst; + if ((rt = ip6forward_rt->ro_rt) != NULL) { + RT_LOCK(rt); + /* Take an extra ref for ourselves */ + RT_ADDREF_LOCKED(rt); + } + if (!srcrt) { /* * ip6forward_rt->ro_dst.sin6_addr is equal to ip6->ip6_dst */ - if (ip6forward_rt->ro_rt == 0 || - (ip6forward_rt->ro_rt->rt_flags & RTF_UP) == 0) { - if (ip6forward_rt->ro_rt) { - rtfree(ip6forward_rt->ro_rt); - ip6forward_rt->ro_rt = 0; + if (rt == NULL || !(rt->rt_flags & RTF_UP) || + rt->generation_id != route_generation) { + if (rt != NULL) { + /* Release extra ref */ + RT_REMREF_LOCKED(rt); + RT_UNLOCK(rt); + rtfree(rt); + ip6forward_rt->ro_rt = NULL; } /* this probably fails but give it a try again */ rtalloc_ign((struct route *)ip6forward_rt, - RTF_PRCLONING); + RTF_PRCLONING); + if ((rt = ip6forward_rt->ro_rt) != NULL) { + RT_LOCK(rt); + /* Take an extra ref for ourselves */ + RT_ADDREF_LOCKED(rt); + } } - if (ip6forward_rt->ro_rt == 0) { + if (rt == NULL) { ip6stat.ip6s_noroute++; in6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_noroute); if (mcopy) { @@ -332,34 +387,41 @@ ip6_forward(struct mbuf *m, struct route_in6 *ip6forward_rt, m_freem(m); return; } - } else if ((rt = ip6forward_rt->ro_rt) == 0 || - !IN6_ARE_ADDR_EQUAL(&ip6->ip6_dst, &dst->sin6_addr)) { - if (ip6forward_rt->ro_rt) { - rtfree(ip6forward_rt->ro_rt); - ip6forward_rt->ro_rt = 0; + RT_LOCK_ASSERT_HELD(rt); + } else if (rt == NULL || !(rt->rt_flags & RTF_UP) || + !IN6_ARE_ADDR_EQUAL(&ip6->ip6_dst, &dst->sin6_addr) || + rt->generation_id != route_generation) { + if (rt != NULL) { + /* Release extra ref */ + RT_REMREF_LOCKED(rt); + RT_UNLOCK(rt); + rtfree(rt); + ip6forward_rt->ro_rt = NULL; } bzero(dst, sizeof(*dst)); dst->sin6_len = sizeof(struct sockaddr_in6); dst->sin6_family = AF_INET6; dst->sin6_addr = ip6->ip6_dst; - rtalloc_ign((struct route *)ip6forward_rt, RTF_PRCLONING); - if (ip6forward_rt->ro_rt == 0) { + rtalloc_ign((struct route *)ip6forward_rt, RTF_PRCLONING); + if ((rt = ip6forward_rt->ro_rt) == NULL) { ip6stat.ip6s_noroute++; in6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_noroute); if (mcopy) { if (locked) lck_mtx_unlock(ip6_mutex); icmp6_error(mcopy, ICMP6_DST_UNREACH, - ICMP6_DST_UNREACH_NOROUTE, 0); + ICMP6_DST_UNREACH_NOROUTE, 0); if (locked) lck_mtx_lock(ip6_mutex); } m_freem(m); return; } + RT_LOCK(rt); + /* Take an extra ref for ourselves */ + RT_ADDREF_LOCKED(rt); } - rt = ip6forward_rt->ro_rt; /* * Scope check: if a packet can't be delivered to its destination @@ -384,6 +446,9 @@ ip6_forward(struct mbuf *m, struct route_in6 *ip6forward_rt, ip6->ip6_nxt, if_name(m->m_pkthdr.rcvif), if_name(rt->rt_ifp)); } + /* Release extra ref */ + RT_REMREF_LOCKED(rt); + RT_UNLOCK(rt); if (mcopy) { if (locked) lck_mtx_unlock(ip6_mutex); @@ -399,7 +464,7 @@ ip6_forward(struct mbuf *m, struct route_in6 *ip6forward_rt, if (m->m_pkthdr.len > rt->rt_ifp->if_mtu) { in6_ifstat_inc(rt->rt_ifp, ifs6_in_toobig); if (mcopy) { - u_long mtu; + uint32_t mtu; #if IPSEC struct secpolicy *sp2; int ipsecerror; @@ -431,11 +496,18 @@ ip6_forward(struct mbuf *m, struct route_in6 *ip6forward_rt, if (mtu < IPV6_MMTU) mtu = IPV6_MMTU; #endif + /* Release extra ref */ + RT_REMREF_LOCKED(rt); + RT_UNLOCK(rt); if (locked) lck_mtx_unlock(ip6_mutex); icmp6_error(mcopy, ICMP6_PACKET_TOO_BIG, 0, mtu); if (locked) lck_mtx_lock(ip6_mutex); + } else { + /* Release extra ref */ + RT_REMREF_LOCKED(rt); + RT_UNLOCK(rt); } m_freem(m); return; @@ -466,6 +538,8 @@ ip6_forward(struct mbuf *m, struct route_in6 *ip6forward_rt, * type/code is based on suggestion by Rich Draves. * not sure if it is the best pick. */ + RT_REMREF_LOCKED(rt); /* Release extra ref */ + RT_UNLOCK(rt); if (locked) lck_mtx_unlock(ip6_mutex); icmp6_error(mcopy, ICMP6_DST_UNREACH, @@ -483,13 +557,19 @@ ip6_forward(struct mbuf *m, struct route_in6 *ip6forward_rt, */ if (ip6_fw_enable && ip6_fw_chk_ptr) { u_short port = 0; + ifp = rt->rt_ifp; + /* Drop the lock but retain the extra ref */ + RT_UNLOCK(rt); /* If ipfw says divert, we have to just drop packet */ - if (ip6_fw_chk_ptr(&ip6, rt->rt_ifp, &port, &m)) { + if (ip6_fw_chk_ptr(&ip6, ifp, &port, &m)) { m_freem(m); goto freecopy; } - if (!m) + if (!m) { goto freecopy; + } + /* We still have the extra ref on rt */ + RT_LOCK(rt); } /* @@ -538,28 +618,61 @@ ip6_forward(struct mbuf *m, struct route_in6 *ip6forward_rt, in6_clearscope(&ip6->ip6_dst); #endif - error = nd6_output(rt->rt_ifp, origifp, m, dst, rt, locked); + ifp = rt->rt_ifp; + /* Drop the lock but retain the extra ref */ + RT_UNLOCK(rt); + +#if PF + if (locked) + lck_mtx_unlock(ip6_mutex); + + /* Invoke outbound packet filter */ + error = pf_af_hook(ifp, NULL, &m, AF_INET6, FALSE); + + if (locked) + lck_mtx_lock(ip6_mutex); + + if (error) { + if (m != NULL) { + panic("%s: unexpected packet %p\n", __func__, m); + /* NOTREACHED */ + } + /* Already freed by callee */ + goto senderr; + } + ip6 = mtod(m, struct ip6_hdr *); +#endif /* PF */ + + error = nd6_output(ifp, origifp, m, dst, rt, locked); if (error) { - in6_ifstat_inc(rt->rt_ifp, ifs6_out_discard); + in6_ifstat_inc(ifp, ifs6_out_discard); ip6stat.ip6s_cantforward++; } else { ip6stat.ip6s_forward++; - in6_ifstat_inc(rt->rt_ifp, ifs6_out_forward); + in6_ifstat_inc(ifp, ifs6_out_forward); if (type) ip6stat.ip6s_redirectsent++; else { - if (mcopy) + if (mcopy) { goto freecopy; + } } } - if (mcopy == NULL) +#if PF +senderr: +#endif /* PF */ + if (mcopy == NULL) { + /* Release extra ref */ + RT_REMREF(rt); return; - + } switch (error) { case 0: #if 1 if (type == ND_REDIRECT) { icmp6_redirect_output(mcopy, rt); + /* Release extra ref */ + RT_REMREF(rt); return; } #endif @@ -587,9 +700,13 @@ ip6_forward(struct mbuf *m, struct route_in6 *ip6forward_rt, icmp6_error(mcopy, type, code, 0); if (locked) lck_mtx_lock(ip6_mutex); + /* Release extra ref */ + RT_REMREF(rt); return; freecopy: m_freem(mcopy); + /* Release extra ref */ + RT_REMREF(rt); return; } diff --git a/bsd/netinet6/ip6_fw.c b/bsd/netinet6/ip6_fw.c index 6abcb4cf2..f1b9f0508 100644 --- a/bsd/netinet6/ip6_fw.c +++ b/bsd/netinet6/ip6_fw.c @@ -196,6 +196,10 @@ static void ip6fw_report __P((struct ip6_fw *f, struct ip6_hdr *ip6, static int ip6_fw_chk __P((struct ip6_hdr **pip6, struct ifnet *oif, u_int16_t *cookie, struct mbuf **m)); static int ip6_fw_ctl __P((struct sockopt *)); +static void cp_to_user_64( struct ip6_fw_64 *userrule_64, struct ip6_fw *rule); +static void cp_from_user_64( struct ip6_fw_64 *userrule_64, struct ip6_fw *rule); +static void cp_to_user_32( struct ip6_fw_32 *userrule_32, struct ip6_fw *rule); +static void cp_from_user_32( struct ip6_fw_32 *userrule_32, struct ip6_fw *rule); static char err_prefix[] = "ip6_fw_ctl:"; extern lck_mtx_t *ip6_mutex; @@ -554,7 +558,7 @@ ip6_fw_chk(struct ip6_hdr **pip6, struct ip6_fw_chain *chain; struct ip6_fw *rule = NULL; struct ip6_hdr *ip6 = *pip6; - struct ifnet *const rif = (*m)->m_pkthdr.rcvif; + struct ifnet *const rif = ((*m)->m_flags & M_LOOP) ? ifunit("lo0") : (*m)->m_pkthdr.rcvif; u_short offset = 0; int off = sizeof(struct ip6_hdr), nxt = ip6->ip6_nxt; u_short src_port, dst_port; @@ -1137,124 +1141,6 @@ check_ip6fw_struct(struct ip6_fw *frwl) return frwl; } -/*#####*/ -#if 0 -static int -ip6_fw_ctl(int stage, struct mbuf **mm) -{ - int error; - struct mbuf *m; - - if (stage == IPV6_FW_GET) { - struct ip6_fw_chain *fcp = ip6_fw_chain.lh_first; - *mm = m = m_get(M_WAIT, MT_DATA); /* XXX */ - if (!m) - return(ENOBUFS); - if (sizeof *(fcp->rule) > MLEN) { - MCLGET(m, M_WAIT); - if ((m->m_flags & M_EXT) == 0) { - m_free(m); - return(ENOBUFS); - } - } - for (; fcp; fcp = fcp->chain.le_next) { - bcopy(fcp->rule, m->m_data, sizeof *(fcp->rule)); - m->m_len = sizeof *(fcp->rule); - m->m_next = m_get(M_WAIT, MT_DATA); /* XXX */ - if (!m->m_next) { - m_freem(*mm); - return(ENOBUFS); - } - m = m->m_next; - if (sizeof *(fcp->rule) > MLEN) { - MCLGET(m, M_WAIT); - if ((m->m_flags & M_EXT) == 0) { - m_freem(*mm); - return(ENOBUFS); - } - } - m->m_len = 0; - } - return (0); - } - m = *mm; - /* only allow get calls if secure mode > 2 */ - if (securelevel > 2) { - if (m) { - (void)m_freem(m); - *mm = 0; - } - return(EPERM); - } - if (stage == IPV6_FW_FLUSH) { - while (ip6_fw_chain.lh_first != NULL && - ip6_fw_chain.lh_first->rule->fw_number != (u_short)-1) { - struct ip6_fw_chain *fcp = ip6_fw_chain.lh_first; - int s = splnet(); - LIST_REMOVE(ip6_fw_chain.lh_first, chain); - splx(s); - FREE(fcp->rule, M_IP6FW); - FREE(fcp, M_IP6FW); - } - if (m) { - (void)m_freem(m); - *mm = 0; - } - return (0); - } - if (stage == IPV6_FW_ZERO) { - error = zero_entry6(m); - if (m) { - (void)m_freem(m); - *mm = 0; - } - return (error); - } - if (m == NULL) { - printf("%s NULL mbuf ptr\n", err_prefix); - return (EINVAL); - } - - if (stage == IPV6_FW_ADD) { - struct ip6_fw *frwl = check_ip6fw_mbuf(m); - - if (!frwl) - error = EINVAL; - else - error = add_entry6(&ip6_fw_chain, frwl); - if (m) { - (void)m_freem(m); - *mm = 0; - } - return error; - } - if (stage == IPV6_FW_DEL) { - if (m->m_len != sizeof(struct ip6_fw)) { - dprintf(("%s len=%ld, want %lu\n", err_prefix, m->m_len, - sizeof(struct ip6_fw))); - error = EINVAL; - } else if (mtod(m, struct ip6_fw *)->fw_number == (u_short)-1) { - dprintf(("%s can't delete rule 65535\n", err_prefix)); - error = EINVAL; - } else - error = del_entry6(&ip6_fw_chain, - mtod(m, struct ip6_fw *)->fw_number); - if (m) { - (void)m_freem(m); - *mm = 0; - } - return error; - } - - dprintf(("%s unknown request %d\n", err_prefix, stage)); - if (m) { - (void)m_freem(m); - *mm = 0; - } - return (EINVAL); -} -#endif - static void ip6fw_kev_post_msg(u_int32_t event_code) { @@ -1272,6 +1158,121 @@ ip6fw_kev_post_msg(u_int32_t event_code) } +static void +cp_to_user_64( struct ip6_fw_64 *userrule_64, struct ip6_fw *rule) +{ + userrule_64->version = rule->version; + userrule_64->context = CAST_USER_ADDR_T(rule->context); + userrule_64->fw_pcnt = rule->fw_pcnt; + userrule_64->fw_bcnt = rule->fw_bcnt; + userrule_64->fw_src = rule->fw_src; + userrule_64->fw_dst = rule->fw_dst; + userrule_64->fw_smsk = rule->fw_smsk; + userrule_64->fw_dmsk = rule->fw_dmsk; + userrule_64->fw_number = rule->fw_number; + userrule_64->fw_flg = rule->fw_flg; + userrule_64->fw_ipflg = rule->fw_ipflg; + bcopy( rule->fw_pts, userrule_64->fw_pts, IPV6_FW_MAX_PORTS); + userrule_64->fw_ip6opt= rule->fw_ip6opt; + userrule_64->fw_ip6nopt = rule->fw_ip6nopt; + userrule_64->fw_tcpf = rule->fw_tcpf; + userrule_64->fw_tcpnf = rule->fw_tcpnf; + bcopy( rule->fw_icmp6types, userrule_64->fw_icmp6types, sizeof(userrule_64->fw_icmp6types)); + userrule_64->fw_in_if = rule->fw_in_if; + userrule_64->fw_out_if = rule->fw_out_if; + userrule_64->timestamp = rule->timestamp; + userrule_64->fw_un.fu_divert_port = rule->fw_un.fu_divert_port; + userrule_64->fw_prot = rule->fw_prot; + userrule_64->fw_nports = rule->fw_nports; +} + + +static void +cp_from_user_64( struct ip6_fw_64 *userrule_64, struct ip6_fw *rule) +{ + rule->version = userrule_64->version; + rule->context = CAST_DOWN(void *, userrule_64->context); + rule->fw_pcnt = userrule_64->fw_pcnt; + rule->fw_bcnt = userrule_64->fw_bcnt; + rule->fw_src = userrule_64->fw_src; + rule->fw_dst = userrule_64->fw_dst; + rule->fw_smsk = userrule_64->fw_smsk; + rule->fw_dmsk = userrule_64->fw_dmsk; + rule->fw_number = userrule_64->fw_number; + rule->fw_flg = userrule_64->fw_flg; + rule->fw_ipflg = userrule_64->fw_ipflg; + bcopy( userrule_64->fw_pts, rule->fw_pts, IPV6_FW_MAX_PORTS); + rule->fw_ip6opt = userrule_64->fw_ip6opt; + rule->fw_ip6nopt = userrule_64->fw_ip6nopt; + rule->fw_tcpf = userrule_64->fw_tcpf; + rule->fw_tcpnf = userrule_64->fw_tcpnf; + bcopy( userrule_64->fw_icmp6types, rule->fw_icmp6types, sizeof(userrule_64->fw_icmp6types)); + rule->fw_in_if = userrule_64->fw_in_if; + rule->fw_out_if = userrule_64->fw_out_if; + rule->timestamp = CAST_DOWN( long, userrule_64->timestamp); + rule->fw_un.fu_divert_port = userrule_64->fw_un.fu_divert_port; + rule->fw_prot = userrule_64->fw_prot; + rule->fw_nports = userrule_64->fw_nports; +} + + +static void +cp_to_user_32( struct ip6_fw_32 *userrule_32, struct ip6_fw *rule) +{ + userrule_32->version = rule->version; + userrule_32->context = CAST_DOWN_EXPLICIT( user32_addr_t, rule->context); + userrule_32->fw_pcnt = rule->fw_pcnt; + userrule_32->fw_bcnt = rule->fw_bcnt; + userrule_32->fw_src = rule->fw_src; + userrule_32->fw_dst = rule->fw_dst; + userrule_32->fw_smsk = rule->fw_smsk; + userrule_32->fw_dmsk = rule->fw_dmsk; + userrule_32->fw_number = rule->fw_number; + userrule_32->fw_flg = rule->fw_flg; + userrule_32->fw_ipflg = rule->fw_ipflg; + bcopy( rule->fw_pts, userrule_32->fw_pts, IPV6_FW_MAX_PORTS); + userrule_32->fw_ip6opt = rule->fw_ip6opt ; + userrule_32->fw_ip6nopt = rule->fw_ip6nopt; + userrule_32->fw_tcpf = rule->fw_tcpf; + userrule_32->fw_tcpnf = rule->fw_tcpnf; + bcopy( rule->fw_icmp6types, userrule_32->fw_icmp6types, sizeof(rule->fw_icmp6types)); + userrule_32->fw_in_if = rule->fw_in_if; + userrule_32->fw_out_if = rule->fw_out_if; + userrule_32->timestamp = rule->timestamp; + userrule_32->fw_un.fu_divert_port = rule->fw_un.fu_divert_port; + userrule_32->fw_prot = rule->fw_prot; + userrule_32->fw_nports = rule->fw_nports; +} + + +static void +cp_from_user_32( struct ip6_fw_32 *userrule_32, struct ip6_fw *rule) +{ + rule->version = userrule_32->version; + rule->context = CAST_DOWN(void *, userrule_32->context); + rule->fw_pcnt = userrule_32->fw_pcnt; + rule->fw_bcnt = userrule_32->fw_bcnt; + rule->fw_src = userrule_32->fw_src; + rule->fw_dst = userrule_32->fw_dst; + rule->fw_smsk = userrule_32->fw_smsk; + rule->fw_dmsk = userrule_32->fw_dmsk; + rule->fw_number = userrule_32->fw_number; + rule->fw_flg = userrule_32->fw_flg; + rule->fw_ipflg = userrule_32->fw_ipflg; + bcopy( userrule_32->fw_pts, rule->fw_pts, IPV6_FW_MAX_PORTS); + rule->fw_ip6opt = userrule_32->fw_ip6opt; + rule->fw_ip6nopt = userrule_32->fw_ip6nopt; + rule->fw_tcpf = userrule_32->fw_tcpf; + rule->fw_tcpnf = userrule_32->fw_tcpnf; + bcopy( userrule_32->fw_icmp6types, rule->fw_icmp6types, sizeof(userrule_32->fw_icmp6types)); + rule->fw_in_if = userrule_32->fw_in_if; + rule->fw_out_if = userrule_32->fw_out_if; + rule->timestamp = CAST_DOWN(long, userrule_32->timestamp); + rule->fw_un.fu_divert_port = userrule_32->fw_un.fu_divert_port; + rule->fw_prot = userrule_32->fw_prot; + rule->fw_nports = userrule_32->fw_nports; +} + static int ip6_fw_ctl(struct sockopt *sopt) { @@ -1279,22 +1280,45 @@ ip6_fw_ctl(struct sockopt *sopt) int spl; int valsize; struct ip6_fw rule; + int is64user=0; + size_t userrulesize; if (securelevel >= 3 && (sopt->sopt_dir != SOPT_GET || sopt->sopt_name != IPV6_FW_GET)) return (EPERM); + if ( proc_is64bit(sopt->sopt_p) ){ + is64user = 1; + userrulesize = sizeof( struct ip6_fw_64 ); + } else + userrulesize = sizeof( struct ip6_fw_32 ); + /* We ALWAYS expect the client to pass in a rule structure so that we can * check the version of the API that they are using. In the case of a * IPV6_FW_GET operation, the first rule of the output buffer passed to us * must have the version set. */ - if (!sopt->sopt_val || sopt->sopt_valsize < sizeof rule) return EINVAL; + if (!sopt->sopt_val || sopt->sopt_valsize < userrulesize) return EINVAL; /* save sopt->sopt_valsize */ valsize = sopt->sopt_valsize; - if ((error = sooptcopyin(sopt, &rule, sizeof(rule), sizeof(rule)))) - return error; - + + if (is64user){ + struct ip6_fw_64 userrule_64; + + if ((error = sooptcopyin(sopt, &userrule_64, userrulesize, userrulesize))) + return error; + + cp_from_user_64( &userrule_64, &rule ); + } + else { + struct ip6_fw_32 userrule_32; + + if ((error = sooptcopyin(sopt, &userrule_32, userrulesize, userrulesize))) + return error; + + cp_from_user_32( &userrule_32, &rule ); + } + if (rule.version != IPV6_FW_CURRENT_API_VERSION) return EINVAL; rule.version = 0xFFFFFFFF; /* version is meaningless once rules "make it in the door". */ @@ -1305,21 +1329,38 @@ ip6_fw_ctl(struct sockopt *sopt) struct ip6_fw_chain *fcp; struct ip6_fw *buf; size_t size = 0; + size_t rulesize = 0; spl = splnet(); + + if ( is64user ) + rulesize = sizeof(struct ip6_fw_64 ); + else + rulesize = sizeof(struct ip6_fw_32 ); + LIST_FOREACH(fcp, &ip6_fw_chain, chain) - size += sizeof *buf; + size += rulesize; buf = _MALLOC(size, M_TEMP, M_WAITOK); if (!buf) error = ENOBUFS; else { - struct ip6_fw *bp = buf; + //struct ip6_fw *bp = buf; + caddr_t bp = (caddr_t)buf; + LIST_FOREACH(fcp, &ip6_fw_chain, chain) { - bcopy(fcp->rule, bp, sizeof *bp); - bp->version = IPV6_FW_CURRENT_API_VERSION; - bp++; + //bcopy(fcp->rule, bp, sizeof *bp); + if ( is64user ){ + cp_to_user_64( (struct ip6_fw_64*)bp, fcp->rule); + } + else { + cp_to_user_32( (struct ip6_fw_32*)bp, fcp->rule); + } + + ( (struct ip6_fw*)bp)->version = IPV6_FW_CURRENT_API_VERSION; + //bp++; + bp += rulesize; } } diff --git a/bsd/netinet6/ip6_fw.h b/bsd/netinet6/ip6_fw.h index 1d996fef1..1cfa5e116 100644 --- a/bsd/netinet6/ip6_fw.h +++ b/bsd/netinet6/ip6_fw.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2002 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2008 Apple Computer, Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -43,6 +43,8 @@ #ifndef _IP6_FW_H #define _IP6_FW_H +#include + /* * Define IPv6 Firewall event subclass, and associated events. */ @@ -78,11 +80,6 @@ #define KEV_IP6FW_ENABLE 4 - -#if !__LP64__ - -#include - #include #define IPV6_FW_CURRENT_API_VERSION 20 /* Version of this API */ @@ -119,12 +116,13 @@ union ip6_fw_if { * Warning: setsockopt() will fail if sizeof(struct ip_fw) > MLEN (108) */ + struct ip6_fw { u_int32_t version; /* Version of this structure. Should always be */ /* set to IP6_FW_CURRENT_API_VERSION by clients. */ void *context; /* Context that is usable by user processes to */ /* identify this rule. */ - u_long fw_pcnt,fw_bcnt; /* Packet and byte counters */ + u_int32_t fw_pcnt,fw_bcnt; /* Packet and byte counters */ struct in6_addr fw_src, fw_dst; /* Source and destination IPv6 addr */ struct in6_addr fw_smsk, fw_dmsk; /* Mask for src and dest IPv6 addr */ u_short fw_number; /* Rule number */ @@ -150,6 +148,76 @@ struct ip6_fw { /* count of 0 means match all ports) */ }; +#if defined(KERNEL_PRIVATE) +#pragma pack(4) + +struct ip6_fw_32 { + u_int32_t version; /* Version of this structure. Should always be */ + /* set to IP6_FW_CURRENT_API_VERSION by clients. */ + user32_addr_t context; /* Context that is usable by user processes to */ + /* identify this rule. */ + u_int32_t fw_pcnt,fw_bcnt; /* Packet and byte counters */ + struct in6_addr fw_src, fw_dst; /* Source and destination IPv6 addr */ + struct in6_addr fw_smsk, fw_dmsk; /* Mask for src and dest IPv6 addr */ + u_short fw_number; /* Rule number */ + u_short fw_flg; /* Flags word */ +#define IPV6_FW_MAX_PORTS 10 /* A reasonable maximum */ + u_int fw_ipflg; /* IP flags word */ + u_short fw_pts[IPV6_FW_MAX_PORTS]; /* Array of port numbers to match */ + u_char fw_ip6opt,fw_ip6nopt; /* IPv6 options set/unset */ + u_char fw_tcpf,fw_tcpnf; /* TCP flags set/unset */ +#define IPV6_FW_ICMPTYPES_DIM (256 / (sizeof(unsigned) * 8)) + unsigned fw_icmp6types[IPV6_FW_ICMPTYPES_DIM]; /* ICMP types bitmap */ + user32_time_t timestamp; /* timestamp (tv_sec) of last match */ + union ip6_fw_if fw_in_if, fw_out_if;/* Incoming and outgoing interfaces */ + union { + u_short fu_divert_port; /* Divert/tee port (options IP6DIVERT) */ + u_short fu_skipto_rule; /* SKIPTO command rule number */ + u_short fu_reject_code; /* REJECT response code */ + } fw_un; + u_char fw_prot; /* IPv6 protocol */ + u_char fw_nports; /* N'of src ports and # of dst ports */ + /* in ports array (dst ports follow */ + /* src ports; max of 10 ports in all; */ + /* count of 0 means match all ports) */ +}; + +#pragma pack() + +struct ip6_fw_64 { + u_int32_t version; /* Version of this structure. Should always be */ + /* set to IP6_FW_CURRENT_API_VERSION by clients. */ + __uint64_t context __attribute__((aligned(8))); /* Context that is usable by user processes to */ + /* identify this rule. */ + u_int32_t fw_pcnt,fw_bcnt; /* Packet and byte counters */ + struct in6_addr fw_src, fw_dst; /* Source and destination IPv6 addr */ + struct in6_addr fw_smsk, fw_dmsk; /* Mask for src and dest IPv6 addr */ + u_short fw_number; /* Rule number */ + u_short fw_flg; /* Flags word */ +#define IPV6_FW_MAX_PORTS 10 /* A reasonable maximum */ + u_int fw_ipflg; /* IP flags word */ + u_short fw_pts[IPV6_FW_MAX_PORTS]; /* Array of port numbers to match */ + u_char fw_ip6opt,fw_ip6nopt; /* IPv6 options set/unset */ + u_char fw_tcpf,fw_tcpnf; /* TCP flags set/unset */ +#define IPV6_FW_ICMPTYPES_DIM (256 / (sizeof(unsigned) * 8)) + unsigned fw_icmp6types[IPV6_FW_ICMPTYPES_DIM]; /* ICMP types bitmap */ + user64_time_t timestamp; /* timestamp (tv_sec) of last match */ + union ip6_fw_if fw_in_if, fw_out_if;/* Incoming and outgoing interfaces */ + union { + u_short fu_divert_port; /* Divert/tee port (options IP6DIVERT) */ + u_short fu_skipto_rule; /* SKIPTO command rule number */ + u_short fu_reject_code; /* REJECT response code */ + } fw_un; + u_char fw_prot; /* IPv6 protocol */ + u_char fw_nports; /* N'of src ports and # of dst ports */ + /* in ports array (dst ports follow */ + /* src ports; max of 10 ports in all; */ + /* count of 0 means match all ports) */ +}; + + +#endif /* KERNEL_PRIVATE */ + #define IPV6_FW_GETNSRCP(rule) ((rule)->fw_nports & 0x0f) #define IPV6_FW_SETNSRCP(rule, n) do { \ (rule)->fw_nports &= ~0x0f; \ @@ -274,5 +342,4 @@ extern int ip6_fw_enable; #endif /* KERNEL_PRIVATE */ -#endif /* !__LP64__ */ #endif /* _IP6_FW_H */ diff --git a/bsd/netinet6/ip6_input.c b/bsd/netinet6/ip6_input.c index 1c08cc09f..390be10d1 100644 --- a/bsd/netinet6/ip6_input.c +++ b/bsd/netinet6/ip6_input.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2003-2007 Apple Inc. All rights reserved. + * Copyright (c) 2003-2008 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -150,6 +150,10 @@ extern int ipsec_bypass; #include +#if PF +#include +#endif /* PF */ + extern struct domain inet6domain; extern struct ip6protosw inet6sw[]; @@ -178,10 +182,11 @@ struct ifqueue ip6intrq; lck_mtx_t *ip6_mutex; lck_mtx_t *dad6_mutex; lck_mtx_t *nd6_mutex; -lck_mtx_t *prefix6_mutex; +lck_mtx_t *prefix6_mutex; +lck_mtx_t *scope6_mutex; lck_attr_t *ip6_mutex_attr; -lck_grp_t *ip6_mutex_grp; -lck_grp_attr_t *ip6_mutex_grp_attr; +lck_grp_t *ip6_mutex_grp; +lck_grp_attr_t *ip6_mutex_grp_attr; extern lck_mtx_t *inet6_domain_mutex; #endif extern int loopattach_done; @@ -232,7 +237,7 @@ ip6_init() for (i = 0; i < IPPROTO_MAX; i++) ip6_protox[i] = pr; for (pr = (struct ip6protosw*)inet6domain.dom_protosw; pr; pr = pr->pr_next) { - if(!((unsigned int)pr->pr_domain)) continue; /* If uninitialized, skip */ + if(!(pr->pr_domain)) continue; /* If uninitialized, skip */ if (pr->pr_domain->dom_family == PF_INET6 && pr->pr_protocol && pr->pr_protocol != IPPROTO_RAW) { ip6_protox[pr->pr_protocol] = pr; @@ -245,26 +250,28 @@ ip6_init() ip6_mutex_attr = lck_attr_alloc_init(); if ((ip6_mutex = lck_mtx_alloc_init(ip6_mutex_grp, ip6_mutex_attr)) == NULL) { - printf("ip6_init: can't alloc ip6_mutex\n"); - return; + panic("ip6_init: can't alloc ip6_mutex\n"); } if ((dad6_mutex = lck_mtx_alloc_init(ip6_mutex_grp, ip6_mutex_attr)) == NULL) { - printf("ip6_init: can't alloc dad6_mutex\n"); - return; + panic("ip6_init: can't alloc dad6_mutex\n"); } if ((nd6_mutex = lck_mtx_alloc_init(ip6_mutex_grp, ip6_mutex_attr)) == NULL) { - printf("ip6_init: can't alloc nd6_mutex\n"); - return; + panic("ip6_init: can't alloc nd6_mutex\n"); } if ((prefix6_mutex = lck_mtx_alloc_init(ip6_mutex_grp, ip6_mutex_attr)) == NULL) { - printf("ip6_init: can't alloc prefix6_mutex\n"); - return; + panic("ip6_init: can't alloc prefix6_mutex\n"); + } + + if ((scope6_mutex = lck_mtx_alloc_init(ip6_mutex_grp, ip6_mutex_attr)) == NULL) { + panic("ip6_init: can't alloc scope6_mutex\n"); } + inet6domain.dom_flags = DOM_REENTRANT; ip6intrq.ifq_maxlen = ip6qmaxlen; + in6_ifaddr_init(); nd6_init(); frag6_init(); icmp6_init(); @@ -295,7 +302,7 @@ ip6_init2( timeout(ip6_init2, (caddr_t)0, 1 * hz); return; } - in6_ifattach(lo_ifp, NULL, NULL); + (void) in6_ifattach(lo_ifp, NULL, NULL); #ifdef __APPLE__ /* nd6_timer_init */ @@ -545,6 +552,21 @@ ip6_input(m) } #endif +#if PF + /* Invoke inbound packet filter */ + lck_mtx_unlock(ip6_mutex); + if (pf_af_hook(m->m_pkthdr.rcvif, NULL, &m, AF_INET6, TRUE) != 0) { + if (m != NULL) { + panic("%s: unexpected packet %p\n", __func__, m); + /* NOTREACHED */ + } + /* Already freed by callee */ + return; + } + ip6 = mtod(m, struct ip6_hdr *); + lck_mtx_lock(ip6_mutex); +#endif /* PF */ + /* drop packets if interface ID portion is already filled */ if ((m->m_pkthdr.rcvif->if_flags & IFF_LOOPBACK) == 0) { if (IN6_IS_SCOPE_LINKLOCAL(&ip6->ip6_src) && @@ -598,42 +620,49 @@ ip6_input(m) * Multicast check */ if (IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst)) { - struct in6_multi *in6m = 0; + struct in6_multi *in6m = 0; + struct ifnet *ifp = m->m_pkthdr.rcvif; - in6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_mcast); + in6_ifstat_inc(ifp, ifs6_in_mcast); /* * See if we belong to the destination multicast group on the * arrival interface. */ - IN6_LOOKUP_MULTI(ip6->ip6_dst, m->m_pkthdr.rcvif, in6m); + ifnet_lock_shared(ifp); + IN6_LOOKUP_MULTI(ip6->ip6_dst, ifp, in6m); + ifnet_lock_done(ifp); if (in6m) ours = 1; else if (!ip6_mrouter) { ip6stat.ip6s_notmember++; ip6stat.ip6s_cantforward++; - in6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_discard); + in6_ifstat_inc(ifp, ifs6_in_discard); goto bad; } - deliverifp = m->m_pkthdr.rcvif; + deliverifp = ifp; goto hbhcheck; } + if (ip6_forward_rt.ro_rt != NULL) + RT_LOCK(ip6_forward_rt.ro_rt); /* * Unicast check */ if (ip6_forward_rt.ro_rt != NULL && - (ip6_forward_rt.ro_rt->rt_flags & RTF_UP) != 0 && + (ip6_forward_rt.ro_rt->rt_flags & RTF_UP) && IN6_ARE_ADDR_EQUAL(&ip6->ip6_dst, - &((struct sockaddr_in6 *)(&ip6_forward_rt.ro_dst))->sin6_addr)) + &((struct sockaddr_in6 *)(&ip6_forward_rt.ro_dst))->sin6_addr) && + ip6_forward_rt.ro_rt->generation_id == route_generation) { ip6stat.ip6s_forward_cachehit++; - else { + } else { struct sockaddr_in6 *dst6; - if (ip6_forward_rt.ro_rt) { - /* route is down or destination is different */ + if (ip6_forward_rt.ro_rt != NULL) { + /* route is down/stale or destination is different */ ip6stat.ip6s_forward_cachemiss++; + RT_UNLOCK(ip6_forward_rt.ro_rt); rtfree(ip6_forward_rt.ro_rt); - ip6_forward_rt.ro_rt = 0; + ip6_forward_rt.ro_rt = NULL; } bzero(&ip6_forward_rt.ro_dst, sizeof(struct sockaddr_in6)); @@ -647,6 +676,8 @@ ip6_input(m) #endif rtalloc_ign((struct route *)&ip6_forward_rt, RTF_PRCLONING); + if (ip6_forward_rt.ro_rt != NULL) + RT_LOCK(ip6_forward_rt.ro_rt); } #define rt6_key(r) ((struct sockaddr_in6 *)((r)->rt_nodes->rn_key)) @@ -671,15 +702,12 @@ ip6_input(m) * while it would be less efficient. Or, should we rather install a * reject route for such a case? */ - if (ip6_forward_rt.ro_rt && + if (ip6_forward_rt.ro_rt != NULL && (ip6_forward_rt.ro_rt->rt_flags & (RTF_HOST|RTF_GATEWAY)) == RTF_HOST && #if RTF_WASCLONED !(ip6_forward_rt.ro_rt->rt_flags & RTF_WASCLONED) && #endif -#if RTF_CLONED - !(ip6_forward_rt.ro_rt->rt_flags & RTF_CLONED) && -#endif #if 0 /* * The check below is redundant since the comparison of @@ -712,8 +740,10 @@ ip6_input(m) ia6->ia_ifa.if_ipackets++; ia6->ia_ifa.if_ibytes += m->m_pkthdr.len; #endif + RT_UNLOCK(ip6_forward_rt.ro_rt); goto hbhcheck; } else { + RT_UNLOCK(ip6_forward_rt.ro_rt); /* address is not ready, so discard the packet. */ nd6log((LOG_INFO, "ip6_input: packet to an unready address %s->%s\n", @@ -733,10 +763,13 @@ ip6_input(m) /* XXX do we need more sanity checks? */ ours = 1; deliverifp = ip6_forward_rt.ro_rt->rt_ifp; /* faith */ + RT_UNLOCK(ip6_forward_rt.ro_rt); goto hbhcheck; } } #endif + if (ip6_forward_rt.ro_rt != NULL) + RT_UNLOCK(ip6_forward_rt.ro_rt); /* * Now there is no reason to process the packet if it's not our own @@ -767,6 +800,7 @@ ip6_input(m) * to the upper layers. */ } + ifafree(&ia6->ia_ifa); } } @@ -1343,6 +1377,20 @@ ip6_savecontrol(in6p, mp, ip6, m) mp = &(*mp)->m_next; } + if ((in6p->in6p_flags & IN6P_TCLASS) != 0) { + u_int32_t flowinfo; + int tclass; + + flowinfo = (u_int32_t)ntohl(ip6->ip6_flow & IPV6_FLOWINFO_MASK); + flowinfo >>= 20; + + tclass = flowinfo & 0xff; + *mp = sbcreatecontrol((caddr_t) &tclass, sizeof(tclass), + IPV6_TCLASS, IPPROTO_IPV6); + if (*mp) + mp = &(*mp)->m_next; + } + /* * IPV6_HOPOPTS socket option. Recall that we required super-user * privilege for the option (see ip6_ctloutput), but it might be too @@ -1774,8 +1822,8 @@ ip6_addaux( if (tag == NULL) { /* Allocate a tag */ tag = m_tag_alloc(KERNEL_MODULE_TAG_ID, KERNEL_TAG_TYPE_INET6, - sizeof(*tag), M_DONTWAIT); - + sizeof (struct ip6aux), M_DONTWAIT); + /* Attach it to the mbuf */ if (tag) { m_tag_prepend(m, tag); diff --git a/bsd/netinet6/ip6_mroute.c b/bsd/netinet6/ip6_mroute.c index e3e214ed6..3f0735c63 100644 --- a/bsd/netinet6/ip6_mroute.c +++ b/bsd/netinet6/ip6_mroute.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2003-2007 Apple Inc. All rights reserved. + * Copyright (c) 2003-2008 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -243,12 +243,12 @@ static int pim6; #if UPCALL_TIMING #define UPCALL_MAX 50 -u_long upcall_data[UPCALL_MAX + 1]; +u_int32_t upcall_data[UPCALL_MAX + 1]; static void collate(); #endif /* UPCALL_TIMING */ static int get_sg_cnt(struct sioc_sg_req6 *); -static int get_mif6_cnt(struct sioc_mif_req6 *); +static int get_mif6_cnt(void *, int); static int ip6_mrouter_init(struct socket *, int, int); static int add_m6if(struct mif6ctl *); static int del_m6if(mifi_t *); @@ -352,24 +352,25 @@ ip6_mrouter_get(so, sopt) * Handle ioctl commands to obtain information from the cache */ int -mrt6_ioctl(cmd, data) - int cmd; - caddr_t data; +mrt6_ioctl(u_long cmd, caddr_t data) { int error = 0; switch (cmd) { case SIOCGETSGCNT_IN6: - return(get_sg_cnt((struct sioc_sg_req6 *)data)); - break; /* for safety */ - case SIOCGETMIFCNT_IN6: - return(get_mif6_cnt((struct sioc_mif_req6 *)data)); - break; /* for safety */ + return (get_sg_cnt((struct sioc_sg_req6 *)data)); + /* NOTREACHED */ + + case SIOCGETMIFCNT_IN6_32: + case SIOCGETMIFCNT_IN6_64: + return (get_mif6_cnt(data, cmd == SIOCGETMIFCNT_IN6_64)); + /* NOTREACHED */ + default: - return (EINVAL); + error = EINVAL; break; } - return error; + return (error); } /* @@ -399,20 +400,34 @@ get_sg_cnt(req) * returns the input and output packet and byte counts on the mif provided */ static int -get_mif6_cnt(req) - struct sioc_mif_req6 *req; +get_mif6_cnt(void *data, int p64) { - mifi_t mifi = req->mifi; + if (p64) { + struct sioc_mif_req6_64 *req = data; - if (mifi >= nummifs) - return EINVAL; + mifi_t mifi = req->mifi; - req->icount = mif6table[mifi].m6_pkt_in; - req->ocount = mif6table[mifi].m6_pkt_out; - req->ibytes = mif6table[mifi].m6_bytes_in; - req->obytes = mif6table[mifi].m6_bytes_out; + if (mifi >= nummifs) + return (EINVAL); - return 0; + req->icount = mif6table[mifi].m6_pkt_in; + req->ocount = mif6table[mifi].m6_pkt_out; + req->ibytes = mif6table[mifi].m6_bytes_in; + req->obytes = mif6table[mifi].m6_bytes_out; + } else { + struct sioc_mif_req6_32 *req = data; + + mifi_t mifi = req->mifi; + + if (mifi >= nummifs) + return (EINVAL); + + req->icount = mif6table[mifi].m6_pkt_in; + req->ocount = mif6table[mifi].m6_pkt_out; + req->ibytes = mif6table[mifi].m6_bytes_in; + req->obytes = mif6table[mifi].m6_bytes_out; + } + return (0); } static int @@ -600,10 +615,18 @@ add_m6if(mifcp) mifp = mif6table + mifcp->mif6c_mifi; if (mifp->m6_ifp) return EADDRINUSE; /* XXX: is it appropriate? */ - if (mifcp->mif6c_pifi == 0 || mifcp->mif6c_pifi > if_index) + + ifnet_head_lock_shared(); + if (mifcp->mif6c_pifi == 0 || mifcp->mif6c_pifi > if_index) { + ifnet_head_done(); return ENXIO; + } ifp = ifindex2ifnet[mifcp->mif6c_pifi]; + ifnet_head_done(); + if (ifp == NULL) { + return ENXIO; + } if (mifcp->mif6c_flags & MIFF_REGISTER) { if (reg_mif_num == (mifi_t)-1) { multicast_register_if.if_name = "register_mif"; @@ -708,7 +731,7 @@ add_m6fc(mfccp) struct mf6cctl *mfccp; { struct mf6c *rt; - u_long hash; + u_int32_t hash; struct rtdetq *rte; u_short nstl; @@ -857,9 +880,9 @@ static void collate(t) struct timeval *t; { - u_long d; + u_int32_t d; struct timeval tp; - u_long delta; + u_int32_t delta; GET_TIME(tp); @@ -887,7 +910,7 @@ del_m6fc(mfccp) struct sockaddr_in6 mcastgrp; struct mf6c *rt; struct mf6c **nptr; - u_long hash; + u_int32_t hash; origin = mfccp->mf6cc_origin; mcastgrp = mfccp->mf6cc_mcastgrp; @@ -1021,7 +1044,7 @@ ip6_mforward(ip6, ifp, m) struct mbuf *mb0; struct rtdetq *rte; - u_long hash; + u_int32_t hash; /* int i, npkts;*/ #if UPCALL_TIMING struct timeval tp; @@ -1498,7 +1521,9 @@ phyint_send(ip6, mifp, m) * on the outgoing interface, loop back a copy. */ dst6 = (struct sockaddr_in6 *)&ro.ro_dst; + ifnet_lock_shared(ifp); IN6_LOOKUP_MULTI(ip6->ip6_dst, ifp, in6m); + ifnet_lock_done(ifp); if (in6m != NULL) { dst6->sin6_len = sizeof(struct sockaddr_in6); dst6->sin6_family = AF_INET6; diff --git a/bsd/netinet6/ip6_mroute.h b/bsd/netinet6/ip6_mroute.h index f38b57753..dd50d46bd 100644 --- a/bsd/netinet6/ip6_mroute.h +++ b/bsd/netinet6/ip6_mroute.h @@ -1,3 +1,31 @@ +/* + * Copyright (c) 2008 Apple Inc. All rights reserved. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. The rights granted to you under the License + * may not be used to create, or enable the creation or redistribution of, + * unlawful or unlicensed copies of an Apple operating system, or to + * circumvent, violate, or enable the circumvention or violation of, any + * terms of an Apple operating system software license agreement. + * + * Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ + */ + /* $FreeBSD: src/sys/netinet6/ip6_mroute.h,v 1.2.2.2 2001/07/03 11:01:53 ume Exp $ */ /* $KAME: ip6_mroute.h,v 1.17 2001/02/10 02:05:52 itojun Exp $ */ @@ -80,7 +108,7 @@ typedef u_int32_t if_mask; #define NIFBITS (sizeof(if_mask) * NBBY) /* bits per mask */ #ifndef howmany -#define howmany(x, y) (((x) + ((y) - 1)) / (y)) +#define howmany(x, y) ((((x) % (y)) == 0) ? ((x) / (y)) : (((x) / (y)) + 1)) #endif typedef struct if_set { @@ -144,7 +172,7 @@ struct mrt6stat { * XXX old version, superseded by mrt6msg. */ struct omrt6msg { - u_long unused1; + u_int32_t unused1; u_char im6_msgtype; /* what type of message */ #if 0 #define MRT6MSG_NOCACHE 1 @@ -157,7 +185,7 @@ struct omrt6msg { struct in6_addr im6_src, im6_dst; }; #endif -#endif KERNEL_PRIVATE +#endif /* KERNEL_PRIVATE */ /* * Structure used to communicate from kernel to multicast router. @@ -201,6 +229,24 @@ struct sioc_mif_req6 { u_quad_t obytes; /* Output byte count on mif */ }; +#if defined(KERNEL_PRIVATE) +struct sioc_mif_req6_32 { + mifi_t mifi; + u_quad_t icount; + u_quad_t ocount; + u_quad_t ibytes; + u_quad_t obytes; +} __attribute__((aligned(4), packed)); + +struct sioc_mif_req6_64 { + mifi_t mifi; + u_quad_t icount __attribute__((aligned(8))); + u_quad_t ocount; + u_quad_t ibytes; + u_quad_t obytes; +} __attribute__((aligned(8))); +#endif /* KERNEL_PRIVATE */ + #ifdef PRIVATE /* * The kernel's multicast-interface structure. @@ -267,14 +313,14 @@ struct rtdetq { /* XXX: rtdetq is also defined in ip_mroute.h */ }; #endif /* _NETINET_IP_MROUTE_H_ */ -#ifdef KERNEL +#ifdef KERNEL_PRIVATE extern struct mrt6stat mrt6stat; -int ip6_mrouter_set(struct socket *so, struct sockopt *sopt); -int ip6_mrouter_get(struct socket *so, struct sockopt *sopt); -int ip6_mrouter_done(void); -int mrt6_ioctl(int, caddr_t); -#endif /* KERNEL */ +extern int ip6_mrouter_set(struct socket *, struct sockopt *); +extern int ip6_mrouter_get(struct socket *, struct sockopt *); +extern int ip6_mrouter_done(void); +extern int mrt6_ioctl(u_long, caddr_t); +#endif /* KERNEL_PRIVATE */ #endif /* PRIVATE */ #endif /* !_NETINET6_IP6_MROUTE_H_ */ diff --git a/bsd/netinet6/ip6_output.c b/bsd/netinet6/ip6_output.c index b4c6491de..2827c1ca4 100644 --- a/bsd/netinet6/ip6_output.c +++ b/bsd/netinet6/ip6_output.c @@ -1,3 +1,31 @@ +/* + * Copyright (c) 2008 Apple Inc. All rights reserved. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. The rights granted to you under the License + * may not be used to create, or enable the creation or redistribution of, + * unlawful or unlicensed copies of an Apple operating system, or to + * circumvent, violate, or enable the circumvention or violation of, any + * terms of an Apple operating system software license agreement. + * + * Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ + */ + /* $FreeBSD: src/sys/netinet6/ip6_output.c,v 1.43 2002/10/31 19:45:48 ume Exp $ */ /* $KAME: ip6_output.c,v 1.279 2002/01/26 06:12:30 jinmei Exp $ */ @@ -117,13 +145,14 @@ extern lck_mtx_t *nd6_mutex; #include +#if PF +#include +#endif /* PF */ + #ifndef __APPLE__ static MALLOC_DEFINE(M_IPMOPTS, "ip6_moptions", "internet multicast options"); #endif - -extern u_long route_generation; - struct ip6_exthdrs { struct mbuf *ip6e_ip6; struct mbuf *ip6e_hbh; @@ -134,6 +163,9 @@ struct ip6_exthdrs { static int ip6_pcbopts(struct ip6_pktopts **, struct mbuf *, struct socket *, struct sockopt *sopt); +static int ip6_pcbopt(int optname, u_char *buf, int len, struct ip6_pktopts **pktopt); +static int ip6_getpcbopt(struct ip6_pktopts *pktopt, int optname, struct sockopt *sopt); +static int ip6_setpktopt(int optname, u_char *buf, int len, struct ip6_pktopts *opt); static int ip6_setmoptions(int, struct inpcb *, struct mbuf *); static int ip6_getmoptions(int, struct ip6_moptions *, struct mbuf **); static int ip6_copyexthdr(struct mbuf **, caddr_t, int); @@ -154,8 +186,8 @@ extern lck_mtx_t *ip6_mutex; * The mbuf chain containing the packet will be freed. * The mbuf opt, if present, will not be freed. * - * type of "mtu": rt_rmx.rmx_mtu is u_long, ifnet.ifr_mtu is int, and - * nd_ifinfo.linkmtu is u_int32_t. so we use u_long to hold largest one, + * type of "mtu": rt_rmx.rmx_mtu is u_int32_t, ifnet.ifr_mtu is int, and + * nd_ifinfo.linkmtu is u_int32_t. so we use u_int32_t to hold largest one, * which is rt_rmx.rmx_mtu. */ int @@ -176,7 +208,7 @@ ip6_output( struct sockaddr_in6 *dst; int error = 0; struct in6_ifaddr *ia = NULL; - u_long mtu; + u_int32_t mtu; u_int32_t optlen = 0, plen = 0, unfragpartlen = 0; struct ip6_exthdrs exthdrs; struct in6_addr finaldst; @@ -550,18 +582,21 @@ skip_ipsec2:; ro = &opt->ip6po_route; dst = (struct sockaddr_in6 *)&ro->ro_dst; /* - * If there is a cached route, - * check that it is to the same destination - * and is still up. If not, free it and try again. + * If there is a cached route, check that it is to the same + * destination and is still up. If not, free it and try again. + * Test rt_flags without holding rt_lock for performance reasons; + * if the route is down it will hopefully be caught by the layer + * below (since it uses this route as a hint) or during the + * next transmit. */ - if (ro->ro_rt && ((ro->ro_rt->rt_flags & RTF_UP) == 0 || - dst->sin6_family != AF_INET6 || - !IN6_ARE_ADDR_EQUAL(&dst->sin6_addr, &ip6->ip6_dst) || - ro->ro_rt->generation_id != route_generation)) { + if (ro->ro_rt != NULL && (!(ro->ro_rt->rt_flags & RTF_UP) || + dst->sin6_family != AF_INET6 || + !IN6_ARE_ADDR_EQUAL(&dst->sin6_addr, &ip6->ip6_dst) || + ro->ro_rt->generation_id != route_generation)) { rtfree(ro->ro_rt); - ro->ro_rt = (struct rtentry *)0; + ro->ro_rt = NULL; } - if (ro->ro_rt == 0) { + if (ro->ro_rt == NULL) { bzero(dst, sizeof(*dst)); dst->sin6_family = AF_INET6; dst->sin6_len = sizeof(struct sockaddr_in6); @@ -636,27 +671,28 @@ skip_ipsec2:; * if an interface is specified from an upper layer, * ifp must point it. */ - lck_mtx_lock(rt_mtx); - if (ro->ro_rt == 0) { + if (ro->ro_rt == NULL) { /* * non-bsdi always clone routes, if parent is * PRF_CLONING. */ - rtalloc_ign_locked((struct route *)ro, 0UL); + rtalloc_ign((struct route *)ro, 0); } - if (ro->ro_rt == 0) { + if (ro->ro_rt == NULL) { ip6stat.ip6s_noroute++; error = EHOSTUNREACH; - lck_mtx_unlock(rt_mtx); /* XXX in6_ifstat_inc(ifp, ifs6_out_discard); */ goto bad; } + RT_LOCK_SPIN(ro->ro_rt); ia = ifatoia6(ro->ro_rt->rt_ifa); + if (ia != NULL) + ifaref(&ia->ia_ifa); ifp = ro->ro_rt->rt_ifp; ro->ro_rt->rt_use++; if (ro->ro_rt->rt_flags & RTF_GATEWAY) dst = (struct sockaddr_in6 *)ro->ro_rt->rt_gateway; - lck_mtx_unlock(rt_mtx); + RT_UNLOCK(ro->ro_rt); m->m_flags &= ~(M_BCAST | M_MCAST); /* just in case */ in6_ifstat_inc(ifp, ifs6_out_request); @@ -679,6 +715,22 @@ skip_ipsec2:; } } + /* + * if specified, try to fill in the traffic class field. + * do not override if a non-zero value is already set. + * we check the diffserv field and the ecn field separately. + */ + if (opt && opt->ip6po_tclass >= 0) { + int mask = 0; + + if ((ip6->ip6_flow & htonl(0xfc << 20)) == 0) + mask |= 0xfc; + if ((ip6->ip6_flow & htonl(0x03 << 20)) == 0) + mask |= 0x03; + if (mask != 0) + ip6->ip6_flow |= htonl((opt->ip6po_tclass & mask) << 20); + } + if (opt && opt->ip6po_hlim != -1) ip6->ip6_hlim = opt->ip6po_hlim & 0xff; } else { @@ -703,8 +755,14 @@ skip_ipsec2:; * as an ancillary data. * Boundary check for ifindex is assumed to be already done. */ - if (opt && opt->ip6po_pktinfo && opt->ip6po_pktinfo->ipi6_ifindex) - ifp = ifindex2ifnet[opt->ip6po_pktinfo->ipi6_ifindex]; + if (opt && opt->ip6po_pktinfo && opt->ip6po_pktinfo->ipi6_ifindex) { + unsigned int index = opt->ip6po_pktinfo->ipi6_ifindex; + ifnet_head_lock_shared(); + if (index > 0 && index <= if_index) { + ifp = ifindex2ifnet[index]; + } + ifnet_head_done(); + } /* * If the destination is a node-local scope multicast, @@ -726,6 +784,22 @@ skip_ipsec2:; } } + /* + * if specified, try to fill in the traffic class field. + * do not override if a non-zero value is already set. + * we check the diffserv field and the ecn field separately. + */ + if (opt && opt->ip6po_tclass >= 0) { + int mask = 0; + + if ((ip6->ip6_flow & htonl(0xfc << 20)) == 0) + mask |= 0xfc; + if ((ip6->ip6_flow & htonl(0x03 << 20)) == 0) + mask |= 0x03; + if (mask != 0) + ip6->ip6_flow |= htonl((opt->ip6po_tclass & mask) << 20); + } + if (opt && opt->ip6po_hlim != -1) ip6->ip6_hlim = opt->ip6po_hlim & 0xff; @@ -737,22 +811,23 @@ skip_ipsec2:; * ``net'' ff00::/8). */ if (ifp == NULL) { - lck_mtx_lock(rt_mtx); - if (ro->ro_rt == 0) { - ro->ro_rt = rtalloc1_locked( - (struct sockaddr *)&ro->ro_dst, 0, 0UL); + if (ro->ro_rt == NULL) { + ro->ro_rt = rtalloc1( + (struct sockaddr *)&ro->ro_dst, 0, 0); } - if (ro->ro_rt == 0) { + if (ro->ro_rt == NULL) { ip6stat.ip6s_noroute++; - lck_mtx_unlock(rt_mtx); error = EHOSTUNREACH; /* XXX in6_ifstat_inc(ifp, ifs6_out_discard) */ goto bad; } + RT_LOCK_SPIN(ro->ro_rt); ia = ifatoia6(ro->ro_rt->rt_ifa); + if (ia != NULL) + ifaref(&ia->ia_ifa); ifp = ro->ro_rt->rt_ifp; ro->ro_rt->rt_use++; - lck_mtx_unlock(rt_mtx); + RT_UNLOCK(ro->ro_rt); } if ((flags & IPV6_FORWARDING) == 0) @@ -827,13 +902,14 @@ skip_ipsec2:; /* The first hop and the final destination may differ. */ struct sockaddr_in6 *sin6_fin = (struct sockaddr_in6 *)&ro_pmtu->ro_dst; - if (ro_pmtu->ro_rt && ((ro->ro_rt->rt_flags & RTF_UP) == 0 || - !IN6_ARE_ADDR_EQUAL(&sin6_fin->sin6_addr, - &finaldst))) { + if (ro_pmtu->ro_rt != NULL && + (!(ro_pmtu->ro_rt->rt_flags & RTF_UP) || + ro_pmtu->ro_rt->generation_id != route_generation || + !IN6_ARE_ADDR_EQUAL(&sin6_fin->sin6_addr, &finaldst))) { rtfree(ro_pmtu->ro_rt); - ro_pmtu->ro_rt = (struct rtentry *)0; + ro_pmtu->ro_rt = NULL; } - if (ro_pmtu->ro_rt == 0) { + if (ro_pmtu->ro_rt == NULL) { bzero(sin6_fin, sizeof(*sin6_fin)); sin6_fin->sin6_family = AF_INET6; sin6_fin->sin6_len = sizeof(struct sockaddr_in6); @@ -843,8 +919,13 @@ skip_ipsec2:; } } if (ro_pmtu->ro_rt != NULL) { - u_int32_t ifmtu = IN6_LINKMTU(ifp); + u_int32_t ifmtu; + lck_rw_lock_shared(nd_if_rwlock); + ifmtu = IN6_LINKMTU(ifp); + lck_rw_done(nd_if_rwlock); + + RT_LOCK_SPIN(ro_pmtu->ro_rt); mtu = ro_pmtu->ro_rt->rt_rmx.rmx_mtu; if (mtu > ifmtu || mtu == 0) { /* @@ -862,8 +943,11 @@ skip_ipsec2:; if ((ro_pmtu->ro_rt->rt_rmx.rmx_locks & RTV_MTU) == 0) ro_pmtu->ro_rt->rt_rmx.rmx_mtu = mtu; /* XXX */ } + RT_UNLOCK(ro_pmtu->ro_rt); } else { + lck_rw_lock_shared(nd_if_rwlock); mtu = IN6_LINKMTU(ifp); + lck_rw_done(nd_if_rwlock); } /* @@ -888,11 +972,17 @@ skip_ipsec2:; * of source and destination, which should already be assured. * Larger scopes than link will be supported in the future. */ + u_short index = 0; origifp = NULL; if (IN6_IS_SCOPE_LINKLOCAL(&ip6->ip6_src)) - origifp = ifindex2ifnet[ntohs(ip6->ip6_src.s6_addr16[1])]; + index = ntohs(ip6->ip6_src.s6_addr16[1]); else if (IN6_IS_SCOPE_LINKLOCAL(&ip6->ip6_dst)) - origifp = ifindex2ifnet[ntohs(ip6->ip6_dst.s6_addr16[1])]; + index = ntohs(ip6->ip6_dst.s6_addr16[1]); + ifnet_head_lock_shared(); + if (index > 0 && index <= if_index) { + origifp = ifindex2ifnet[index]; + } + ifnet_head_done(); /* * XXX: origifp can be NULL even in those two cases above. * For example, if we remove the (only) link-local address @@ -971,6 +1061,25 @@ skip_ipsec2:; m->m_pkthdr.rcvif = NULL; } +#if PF + lck_mtx_unlock(ip6_mutex); + + /* Invoke outbound packet filter */ + error = pf_af_hook(ifp, NULL, &m, AF_INET6, FALSE); + + lck_mtx_lock(ip6_mutex); + + if (error) { + if (m != NULL) { + panic("%s: unexpected packet %p\n", __func__, m); + /* NOTREACHED */ + } + /* Already freed by callee */ + goto done; + } + ip6 = mtod(m, struct ip6_hdr *); +#endif /* PF */ + /* * Send the packet to the outgoing interface. * If necessary, do IPv6 fragmentation before sending. @@ -1154,10 +1263,8 @@ skip_ipsec2:; lck_mtx_unlock(ip6_mutex); if (ro == &ip6route && ro->ro_rt) { /* brace necessary for rtfree */ rtfree(ro->ro_rt); - ro->ro_rt = NULL; } else if (ro_pmtu == &ip6route && ro_pmtu->ro_rt) { rtfree(ro_pmtu->ro_rt); - ro_pmtu->ro_rt = NULL; } #if IPSEC @@ -1165,6 +1272,8 @@ skip_ipsec2:; key_freesp(sp, KEY_SADB_UNLOCKED); #endif /* IPSEC */ + if (ia != NULL) + ifafree(&ia->ia_ifa); return(error); freehdrs: @@ -1363,23 +1472,22 @@ ip6_ctloutput(so, sopt) { int privileged; struct inpcb *in6p = sotoinpcb(so); - int error, optval; + int error = 0, optval = 0; int level, op = -1, optname = 0; int optlen = 0; - struct proc *p = NULL; + struct proc *p; - level = error = optval = 0; - if (sopt == NULL) + if (sopt == NULL) { panic("ip6_ctloutput: arg soopt is NULL"); - else { - level = sopt->sopt_level; - op = sopt->sopt_dir; - optname = sopt->sopt_name; - optlen = sopt->sopt_valsize; - p = sopt->sopt_p; + /* NOTREACHED */ } + level = sopt->sopt_level; + op = sopt->sopt_dir; + optname = sopt->sopt_name; + optlen = sopt->sopt_valsize; + p = sopt->sopt_p; - privileged = (p == 0 || proc_suser(p)) ? 0 : 1; + privileged = (proc_suser(p) == 0); if (level == IPPROTO_IPV6) { switch (op) { @@ -1423,6 +1531,7 @@ ip6_ctloutput(so, sopt) case IPV6_CHECKSUM: case IPV6_FAITH: + case IPV6_RECVTCLASS: case IPV6_V6ONLY: if (optlen != sizeof(int)) { error = EINVAL; @@ -1481,6 +1590,10 @@ do { \ else in6p->in6p_vflag |= INP_IPV4; break; + case IPV6_RECVTCLASS: + /* cannot mix with RFC2292 XXX */ + OPTSET(IN6P_TCLASS); + break; } break; @@ -1526,6 +1639,17 @@ do { \ break; #undef OPTSET + case IPV6_TCLASS: + if (optlen != sizeof(optval)) { + error = EINVAL; + break; + } + error = sooptcopyin(sopt, &optval, sizeof optval, sizeof optval); + if (error) + break; + error = ip6_pcbopt(optname, (u_char *)&optval, sizeof(optval), &in6p->in6p_outputopts); + break; + case IPV6_MULTICAST_IF: case IPV6_MULTICAST_HOPS: case IPV6_MULTICAST_LOOP: @@ -1538,7 +1662,8 @@ do { \ break; } /* XXX */ - MGET(m, sopt->sopt_p ? M_WAIT : M_DONTWAIT, MT_HEADER); + MGET(m, sopt->sopt_p != kernproc ? + M_WAIT : M_DONTWAIT, MT_HEADER); if (m == 0) { error = ENOBUFS; break; @@ -1633,6 +1758,10 @@ do { \ struct mbuf *m; m = m_copym(in6p->in6p_options, 0, M_COPYALL, M_WAIT); + if (m == NULL) { + error = ENOBUFS; + break; + } error = soopt_mcopyout(sopt, m); if (error == 0) m_freem(m); @@ -1646,6 +1775,7 @@ do { \ case IPV6_FAITH: case IPV6_V6ONLY: case IPV6_PORTRANGE: + case IPV6_RECVTCLASS: switch (optname) { case IPV6_UNICAST_HOPS: @@ -1676,6 +1806,10 @@ do { \ optval = 0; break; } + case IPV6_RECVTCLASS: + optval = OPTBIT(IN6P_TCLASS); + break; + } error = sooptcopyout(sopt, &optval, sizeof optval); @@ -1715,6 +1849,10 @@ do { \ sizeof optval); break; + case IPV6_TCLASS: + error = ip6_getpcbopt(in6p->in6p_outputopts, optname, sopt); + break; + case IPV6_MULTICAST_IF: case IPV6_MULTICAST_HOPS: case IPV6_MULTICAST_LOOP: @@ -1727,7 +1865,8 @@ do { \ if (error == 0) error = sooptcopyout(sopt, mtod(m, char *), m->m_len); - m_freem(m); + if (m != NULL) + m_freem(m); } break; @@ -1797,9 +1936,8 @@ ip6_pcbopts( struct sockopt *sopt) { struct ip6_pktopts *opt = *pktopt; - int error = 0; + int error = 0, priv; struct proc *p = sopt->sopt_p; - int priv = 0; /* turn off any old options. */ if (opt) { @@ -1810,8 +1948,11 @@ ip6_pcbopts( printf("ip6_pcbopts: all specified options are cleared.\n"); #endif ip6_clearpktopts(opt, 1, -1); - } else + } else { opt = _MALLOC(sizeof(*opt), M_IP6OPT, M_WAITOK); + if (opt == NULL) + return ENOBUFS; + } *pktopt = NULL; if (!m || m->m_len == 0) { @@ -1824,9 +1965,9 @@ ip6_pcbopts( return(0); } + priv = (proc_suser(p) == 0); + /* set options specified by user. */ - if (p && !proc_suser(p)) - priv = 1; if ((error = ip6_setpktoptions(m, opt, priv, 1)) != 0) { ip6_clearpktopts(opt, 1, -1); /* XXX: discard all options */ FREE(opt, M_IP6OPT); @@ -1836,17 +1977,84 @@ ip6_pcbopts( return(0); } +static int +ip6_pcbopt(int optname, u_char *buf, int len, struct ip6_pktopts **pktopt) +{ + struct ip6_pktopts *opt; + + opt = *pktopt; + if (opt == NULL) { + opt = _MALLOC(sizeof(*opt), M_IP6OPT, M_WAITOK); + ip6_initpktopts(opt); + *pktopt = opt; + } + + return (ip6_setpktopt(optname, buf, len, opt)); +} + +static int +ip6_getpcbopt(struct ip6_pktopts *pktopt, int optname, struct sockopt *sopt) +{ + void *optdata = NULL; + int optdatalen = 0; + int deftclass = 0; + int error = 0; + + switch (optname) { + case IPV6_TCLASS: + if (pktopt && pktopt->ip6po_tclass >= 0) + optdata = &pktopt->ip6po_tclass; + else + optdata = &deftclass; + optdatalen = sizeof(int); + break; + default: /* should not happen */ +#ifdef DIAGNOSTIC + panic("ip6_getpcbopt: unexpected option\n"); +#endif + return (ENOPROTOOPT); + } + + error = sooptcopyout(sopt, optdata, optdatalen); + return (error); +} + +static int +ip6_setpktopt(int optname, u_char *buf, int len, struct ip6_pktopts *opt) +{ + switch (optname) { + case IPV6_TCLASS: + { + int tclass; + + if (len != sizeof(int)) + return (EINVAL); + tclass = *(int *)buf; + if (tclass < -1 || tclass > 255) + return (EINVAL); + + opt->ip6po_tclass = tclass; + break; + } + + default: + return (ENOPROTOOPT); + } /* end of switch */ + + return (0); +} + /* * initialize ip6_pktopts. beware that there are non-zero default values in * the struct. */ void -init_ip6pktopts(opt) +ip6_initpktopts(opt) struct ip6_pktopts *opt; { - bzero(opt, sizeof(*opt)); opt->ip6po_hlim = -1; /* -1 means default hop limit */ + opt->ip6po_tclass = -1; /* -1 means default traffic class */ } void @@ -1864,6 +2072,8 @@ ip6_clearpktopts(pktopt, needfree, optname) } if (optname == -1) pktopt->ip6po_hlim = -1; + if (optname == -1) + pktopt->ip6po_tclass = -1; if (optname == -1) { if (needfree && pktopt->ip6po_nexthop) FREE(pktopt->ip6po_nexthop, M_IP6OPT); @@ -1925,6 +2135,7 @@ ip6_copypktopts(src, canwait) bzero(dst, sizeof(*dst)); dst->ip6po_hlim = src->ip6po_hlim; + dst->ip6po_tclass = src->ip6po_tclass; if (src->ip6po_pktinfo) { dst->ip6po_pktinfo = _MALLOC(sizeof(*dst->ip6po_pktinfo), M_IP6OPT, canwait); @@ -2030,11 +2241,16 @@ ip6_setmoptions( break; } bcopy(mtod(m, u_int *), &ifindex, sizeof(ifindex)); - if (ifindex < 0 || if_index < ifindex) { + + ifnet_head_lock_shared(); + /* Don't need to check is ifindex is < 0 since it's unsigned */ + if (if_index < ifindex) { error = ENXIO; /* XXX EINVAL? */ + ifnet_head_done(); break; } ifp = ifindex2ifnet[ifindex]; + ifnet_head_done(); if (ifp == NULL || (ifp->if_flags & IFF_MULTICAST) == 0) { error = EADDRNOTAVAIL; break; @@ -2096,13 +2312,18 @@ ip6_setmoptions( mreq = mtod(m, struct ipv6_mreq *); /* * If the interface is specified, validate it. + * + * Don't need to check if it's < 0, since it's unsigned */ - if (mreq->ipv6mr_interface < 0 - || if_index < mreq->ipv6mr_interface) { + ifnet_head_lock_shared(); + if (if_index < mreq->ipv6mr_interface) { + ifnet_head_done(); error = ENXIO; /* XXX EINVAL? */ break; } - + ifp = ifindex2ifnet[mreq->ipv6mr_interface]; + ifnet_head_done(); + if (IN6_IS_ADDR_UNSPECIFIED(&mreq->ipv6mr_multiaddr)) { /* * We use the unspecified address to specify to accept @@ -2124,16 +2345,14 @@ ip6_setmoptions( if (mreq->ipv6mr_interface != 0) { struct in_ifaddr *ifa; - ifp = ifindex2ifnet[mreq->ipv6mr_interface]; - - lck_mtx_lock(rt_mtx); + lck_rw_lock_shared(in_ifaddr_rwlock); TAILQ_FOREACH(ifa, &in_ifaddrhead, ia_link) { if (ifa->ia_ifp == ifp) { v4req.imr_interface = IA_SIN(ifa)->sin_addr; break; } } - lck_mtx_unlock(rt_mtx); + lck_rw_done(in_ifaddr_rwlock); if (v4req.imr_multiaddr.s_addr == 0) { /* Interface has no IPv4 address. */ @@ -2178,8 +2397,7 @@ ip6_setmoptions( rtfree(ro.ro_rt); ro.ro_rt = NULL; } - } else - ifp = ifindex2ifnet[mreq->ipv6mr_interface]; + } /* * See if we found an interface, and confirm that it @@ -2245,13 +2463,17 @@ ip6_setmoptions( /* * If an interface address was specified, get a pointer * to its ifnet structure. + * + * Don't need to check if it's < 0, since it's unsigned. */ - if (mreq->ipv6mr_interface < 0 - || if_index < mreq->ipv6mr_interface) { + ifnet_head_lock_shared(); + if (if_index < mreq->ipv6mr_interface) { + ifnet_head_done(); error = ENXIO; /* XXX EINVAL? */ break; } ifp = ifindex2ifnet[mreq->ipv6mr_interface]; + ifnet_head_done(); if (IN6_IS_ADDR_UNSPECIFIED(&mreq->ipv6mr_multiaddr)) { if (suser(kauth_cred_get(), 0)) { @@ -2267,14 +2489,14 @@ ip6_setmoptions( if (ifp != NULL) { struct in_ifaddr *ifa; - lck_mtx_lock(rt_mtx); + lck_rw_lock_shared(in_ifaddr_rwlock); TAILQ_FOREACH(ifa, &in_ifaddrhead, ia_link) { if (ifa->ia_ifp == ifp) { v4req.imr_interface = IA_SIN(ifa)->sin_addr; break; } } - lck_mtx_unlock(rt_mtx); + lck_rw_done(in_ifaddr_rwlock); } error = ip_dropmembership(imo, &v4req); @@ -2360,6 +2582,8 @@ ip6_getmoptions(optname, im6o, mp) u_int *hlim, *loop, *ifindex; *mp = m_get(M_WAIT, MT_HEADER); /*XXX*/ + if (*mp == NULL) + return ENOBUFS; switch (optname) { @@ -2432,7 +2656,7 @@ ip6_setpktoptions(control, opt, priv, needcopy) if (control == 0 || opt == 0) return(EINVAL); - init_ip6pktopts(opt); + ip6_initpktopts(opt); /* * XXX: Currently, we assume all the optional information is stored @@ -2461,6 +2685,8 @@ ip6_setpktoptions(control, opt, priv, needcopy) opt->ip6po_pktinfo = _MALLOC(sizeof(struct in6_pktinfo), M_IP6OPT, M_WAITOK); + if (opt->ip6po_pktinfo == NULL) + return ENOBUFS; bcopy(CMSG_DATA(cm), opt->ip6po_pktinfo, sizeof(struct in6_pktinfo)); } else @@ -2471,8 +2697,7 @@ ip6_setpktoptions(control, opt, priv, needcopy) opt->ip6po_pktinfo->ipi6_addr.s6_addr16[1] = htons(opt->ip6po_pktinfo->ipi6_ifindex); - if (opt->ip6po_pktinfo->ipi6_ifindex > if_index - || opt->ip6po_pktinfo->ipi6_ifindex < 0) { + if (opt->ip6po_pktinfo->ipi6_ifindex > if_index) { return(ENXIO); } @@ -2511,6 +2736,15 @@ ip6_setpktoptions(control, opt, priv, needcopy) return(EINVAL); break; + case IPV6_TCLASS: + if (cm->cmsg_len != CMSG_LEN(sizeof(int))) + return(EINVAL); + + opt->ip6po_tclass = *(int *)CMSG_DATA(cm); + if (opt->ip6po_tclass < -1 || opt->ip6po_tclass > 255) + return (EINVAL); + break; + case IPV6_NEXTHOP: if (!priv) return(EPERM); @@ -2524,6 +2758,8 @@ ip6_setpktoptions(control, opt, priv, needcopy) opt->ip6po_nexthop = _MALLOC(*CMSG_DATA(cm), M_IP6OPT, M_WAITOK); + if (opt->ip6po_nexthop == NULL) + return ENOBUFS; bcopy(CMSG_DATA(cm), opt->ip6po_nexthop, *CMSG_DATA(cm)); @@ -2547,6 +2783,8 @@ ip6_setpktoptions(control, opt, priv, needcopy) if (needcopy) { opt->ip6po_hbh = _MALLOC(hbhlen, M_IP6OPT, M_WAITOK); + if (opt->ip6po_hbh == NULL) + return ENOBUFS; bcopy(hbh, opt->ip6po_hbh, hbhlen); } else opt->ip6po_hbh = hbh; @@ -2585,6 +2823,8 @@ ip6_setpktoptions(control, opt, priv, needcopy) if (needcopy) { *newdest = _MALLOC(destlen, M_IP6OPT, M_WAITOK); + if (*newdest == NULL) + return ENOBUFS; bcopy(dest, *newdest, destlen); } else *newdest = dest; @@ -2622,6 +2862,8 @@ ip6_setpktoptions(control, opt, priv, needcopy) if (needcopy) { opt->ip6po_rthdr = _MALLOC(rthlen, M_IP6OPT, M_WAITOK); + if (opt->ip6po_rthdr == NULL) + return ENOBUFS; bcopy(rth, opt->ip6po_rthdr, rthlen); } else opt->ip6po_rthdr = rth; diff --git a/bsd/netinet6/ip6_var.h b/bsd/netinet6/ip6_var.h index f42089272..a895cad31 100644 --- a/bsd/netinet6/ip6_var.h +++ b/bsd/netinet6/ip6_var.h @@ -1,3 +1,31 @@ +/* + * Copyright (c) 2008 Apple Inc. All rights reserved. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. The rights granted to you under the License + * may not be used to create, or enable the creation or redistribution of, + * unlawful or unlicensed copies of an Apple operating system, or to + * circumvent, violate, or enable the circumvention or violation of, any + * terms of an Apple operating system software license agreement. + * + * Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ + */ + /* $FreeBSD: src/sys/netinet6/ip6_var.h,v 1.2.2.2 2001/07/03 11:01:54 ume Exp $ */ /* $KAME: ip6_var.h,v 1.62 2001/05/03 14:51:48 itojun Exp $ */ @@ -149,6 +177,8 @@ struct ip6_pktopts { /* Destination options header (after a routing header) */ struct ip6_dest *ip6po_dest2; + + int ip6po_tclass; /* traffic class */ }; /* @@ -309,7 +339,6 @@ int icmp6_dgram_attach(struct socket *, int , struct proc *); struct in6_ifaddr; void ip6_init(void); -void ip6intr(void); void ip6_input(struct mbuf *); struct in6_ifaddr *ip6_getdstifaddr(struct mbuf *); void ip6_freepcbopts(struct ip6_pktopts *); @@ -328,10 +357,6 @@ int ip6_process_hopopts(struct mbuf *, u_int8_t *, int, u_int32_t *, u_int32_t *); void ip6_savecontrol(struct inpcb *, struct mbuf **, struct ip6_hdr *, struct mbuf *); -void ip6_notify_pmtu(struct inpcb *, struct sockaddr_in6 *, - u_int32_t *); -int ip6_sysctl(int *, u_int, void *, size_t *, void *, size_t); - void ip6_forward(struct mbuf *, struct route_in6 *, int, int); void ip6_mloopback(struct ifnet *, struct mbuf *, struct sockaddr_in6 *); @@ -340,7 +365,7 @@ int ip6_output(struct mbuf *, struct ip6_pktopts *, int, struct ip6_moptions *, struct ifnet **, int locked); int ip6_ctloutput(struct socket *, struct sockopt *sopt); -void init_ip6pktopts(struct ip6_pktopts *); +void ip6_initpktopts(struct ip6_pktopts *); int ip6_setpktoptions(struct mbuf *, struct ip6_pktopts *, int, int); void ip6_clearpktopts(struct ip6_pktopts *, int, int); struct ip6_pktopts *ip6_copypktopts(struct ip6_pktopts *, int); @@ -353,16 +378,12 @@ int frag6_input(struct mbuf **, int *); void frag6_slowtimo(void); void frag6_drain(void); -void rip6_init(void); int rip6_input(struct mbuf **mp, int *offset); void rip6_ctlinput(int, struct sockaddr *, void *); int rip6_ctloutput(struct socket *so, struct sockopt *sopt); int rip6_output(struct mbuf *, struct socket *, struct sockaddr_in6 *, struct mbuf *); -int rip6_usrreq(struct socket *, - int, struct mbuf *, struct mbuf *, struct mbuf *, struct proc *); int dest6_input(struct mbuf **, int *); -int none_input(struct mbuf **, int *); #endif /* KERNEL */ #endif /* KERNEL_PRIVATE */ diff --git a/bsd/netinet6/ip6protosw.h b/bsd/netinet6/ip6protosw.h index cc4b14191..303f61964 100644 --- a/bsd/netinet6/ip6protosw.h +++ b/bsd/netinet6/ip6protosw.h @@ -1,4 +1,32 @@ /* $FreeBSD: src/sys/netinet6/ip6protosw.h,v 1.2.2.3 2001/07/03 11:01:54 ume Exp $ */ +/* + * Copyright (c) 2008 Apple Inc. All rights reserved. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. The rights granted to you under the License + * may not be used to create, or enable the creation or redistribution of, + * unlawful or unlicensed copies of an Apple operating system, or to + * circumvent, violate, or enable the circumvention or violation of, any + * terms of an Apple operating system software license agreement. + * + * Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ + */ + /* $KAME: ip6protosw.h,v 1.22 2001/02/08 18:02:08 itojun Exp $ */ /* @@ -120,6 +148,9 @@ struct ip6ctlparam { u_int8_t ip6c_nxt; /* final next header field */ }; +#ifdef __LP64__ // K64todo: might also make sense for the generic case +#pragma pack(4) +#endif struct ip6protosw { short pr_type; /* socket type used for */ struct domain *pr_domain; /* domain protocol a member of */ @@ -155,8 +186,8 @@ struct ip6protosw { struct pr_usrreqs *pr_usrreqs; /* supersedes pr_usrreq() */ #ifdef __APPLE__ - int (*pr_lock) (struct socket *so, int locktype, int debug); /* lock function for protocol */ - int (*pr_unlock) (struct socket *so, int locktype, int debug); /* unlock for protocol */ + int (*pr_lock) (struct socket *so, int locktype, void *debug); /* lock function for protocol */ + int (*pr_unlock) (struct socket *so, int locktype, void *debug); /* unlock for protocol */ #ifdef _KERN_LOCKS_H_ lck_mtx_t * (*pr_getlock) (struct socket *so, int locktype); /* unlock for protocol */ #else @@ -165,9 +196,12 @@ struct ip6protosw { /* Filter hooks */ TAILQ_HEAD(pr6_sfilter, NFDescriptor) pr_sfilter; struct ip6protosw *pr_next; /* Chain for domain */ - u_long reserved[1]; + u_int32_t reserved[1]; #endif }; +#ifdef __LP64__ // K64todo: might also make sense for the generic case +#pragma pack() +#endif -#endif KERNEL_PRIVATE -#endif _NETINET6_IP6PROTOSW_H_ +#endif /* KERNEL_PRIVATE */ +#endif /* _NETINET6_IP6PROTOSW_H_ */ diff --git a/bsd/netinet6/ipcomp6.h b/bsd/netinet6/ipcomp6.h index fa72e314e..8fd6fdba9 100644 --- a/bsd/netinet6/ipcomp6.h +++ b/bsd/netinet6/ipcomp6.h @@ -43,6 +43,6 @@ extern int ipcomp6_input(struct mbuf **, int *); extern int ipcomp6_output(struct mbuf *, u_char *, struct mbuf *, struct secasvar *); -#endif KERNEL_PRIVATE +#endif /* KERNEL_PRIVATE */ #endif /*_NETINET6_IPCOMP6_H_*/ diff --git a/bsd/netinet6/ipsec.c b/bsd/netinet6/ipsec.c index 97dd1761e..b65d9a5ef 100644 --- a/bsd/netinet6/ipsec.c +++ b/bsd/netinet6/ipsec.c @@ -1,3 +1,31 @@ +/* + * Copyright (c) 2008 Apple Inc. All rights reserved. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. The rights granted to you under the License + * may not be used to create, or enable the creation or redistribution of, + * unlawful or unlicensed copies of an Apple operating system, or to + * circumvent, violate, or enable the circumvention or violation of, any + * terms of an Apple operating system software license agreement. + * + * Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ + */ + /* $FreeBSD: src/sys/netinet6/ipsec.c,v 1.3.2.7 2001/07/19 06:37:23 kris Exp $ */ /* $KAME: ipsec.c,v 1.103 2001/05/24 07:14:18 sakane Exp $ */ @@ -130,6 +158,7 @@ int ip4_ipsec_ecn = 0; /* ECN ignore(-1)/forbidden(0)/allowed(1) */ int ip4_esp_randpad = -1; int esp_udp_encap_port = 0; static int sysctl_def_policy SYSCTL_HANDLER_ARGS; +extern int natt_keepalive_interval; extern u_int32_t natt_now; struct ipsec_tag; @@ -245,7 +274,7 @@ static int ipsec64_encapsulate(struct mbuf *, struct secasvar *); static struct ipsec_tag *ipsec_addaux(struct mbuf *); static struct ipsec_tag *ipsec_findaux(struct mbuf *); static void ipsec_optaux(struct mbuf *, struct ipsec_tag *); -void ipsec_send_natt_keepalive(struct secasvar *sav); +int ipsec_send_natt_keepalive(struct secasvar *sav); static int sysctl_def_policy SYSCTL_HANDLER_ARGS @@ -253,6 +282,8 @@ sysctl_def_policy SYSCTL_HANDLER_ARGS int old_policy = ip4_def_policy.policy; int error = sysctl_handle_int(oidp, oidp->oid_arg1, oidp->oid_arg2, req); +#pragma unused(arg1, arg2) + if (ip4_def_policy.policy != IPSEC_POLICY_NONE && ip4_def_policy.policy != IPSEC_POLICY_DISCARD) { ip4_def_policy.policy = old_policy; @@ -2089,7 +2120,7 @@ ipsec4_hdrsiz(m, dir, inp) KEYDEBUG(KEYDEBUG_IPSEC_STAMP, printf("DP ipsec4_hdrsiz call free SP:%p\n", sp)); KEYDEBUG(KEYDEBUG_IPSEC_DATA, - printf("ipsec4_hdrsiz: size:%lu.\n", (unsigned long)size)); + printf("ipsec4_hdrsiz: size:%lu.\n", (u_int32_t)size)); key_freesp(sp, KEY_SADB_UNLOCKED); return size; @@ -2129,7 +2160,7 @@ ipsec6_hdrsiz(m, dir, in6p) KEYDEBUG(KEYDEBUG_IPSEC_STAMP, printf("DP ipsec6_hdrsiz call free SP:%p\n", sp)); KEYDEBUG(KEYDEBUG_IPSEC_DATA, - printf("ipsec6_hdrsiz: size:%lu.\n", (unsigned long)size)); + printf("ipsec6_hdrsiz: size:%lu.\n", (u_int32_t)size)); key_freesp(sp, KEY_SADB_UNLOCKED); return size; @@ -2933,9 +2964,10 @@ ipsec4_output( state->ro = &sav->sah->sa_route; state->dst = (struct sockaddr *)&state->ro->ro_dst; dst4 = (struct sockaddr_in *)state->dst; - if (state->ro->ro_rt - && ((state->ro->ro_rt->rt_flags & RTF_UP) == 0 - || dst4->sin_addr.s_addr != ip->ip_dst.s_addr)) { + if (state->ro->ro_rt != NULL && + (state->ro->ro_rt->generation_id != route_generation || + !(state->ro->ro_rt->rt_flags & RTF_UP) || + dst4->sin_addr.s_addr != ip->ip_dst.s_addr)) { rtfree(state->ro->ro_rt); state->ro->ro_rt = NULL; } @@ -2946,12 +2978,20 @@ ipsec4_output( rtalloc(state->ro); } if (state->ro->ro_rt == 0) { - OSAddAtomic(1, (SInt32*)&ipstat.ips_noroute); + OSAddAtomic(1, &ipstat.ips_noroute); error = EHOSTUNREACH; goto bad; } - /* adjust state->dst if tunnel endpoint is offlink */ + /* + * adjust state->dst if tunnel endpoint is offlink + * + * XXX: caching rt_gateway value in the state is + * not really good, since it may point elsewhere + * when the gateway gets modified to a larger + * sockaddr via rt_setgate(). This is currently + * addressed by SA_SIZE roundup in that routine. + */ if (state->ro->ro_rt->rt_flags & RTF_GATEWAY) { state->dst = (struct sockaddr *)state->ro->ro_rt->rt_gateway; dst4 = (struct sockaddr_in *)state->dst; @@ -3367,9 +3407,10 @@ ipsec6_output_tunnel( ro4 = &sav->sah->sa_route; dst4 = (struct sockaddr_in *)&ro4->ro_dst; - if (ro4->ro_rt - && ((ro4->ro_rt->rt_flags & RTF_UP) == 0 - || dst4->sin_addr.s_addr != ip->ip_dst.s_addr)) { + if (ro4->ro_rt != NULL && + (ro4->ro_rt->generation_id != route_generation || + !(ro4->ro_rt->rt_flags & RTF_UP) || + dst4->sin_addr.s_addr != ip->ip_dst.s_addr)) { rtfree(ro4->ro_rt); ro4->ro_rt = NULL; } @@ -3380,7 +3421,7 @@ ipsec6_output_tunnel( rtalloc(ro4); } if (ro4->ro_rt == NULL) { - OSAddAtomic(1, (SInt32*)&ipstat.ips_noroute); + OSAddAtomic(1, &ipstat.ips_noroute); error = EHOSTUNREACH; goto bad; } @@ -3448,9 +3489,10 @@ ipsec6_output_tunnel( state->ro = &sav->sah->sa_route; state->dst = (struct sockaddr *)&state->ro->ro_dst; dst6 = (struct sockaddr_in6 *)state->dst; - if (state->ro->ro_rt - && ((state->ro->ro_rt->rt_flags & RTF_UP) == 0 - || !IN6_ARE_ADDR_EQUAL(&dst6->sin6_addr, &ip6->ip6_dst))) { + if (state->ro->ro_rt != NULL && + (state->ro->ro_rt->generation_id != route_generation || + !(state->ro->ro_rt->rt_flags & RTF_UP) || + !IN6_ARE_ADDR_EQUAL(&dst6->sin6_addr, &ip6->ip6_dst))) { rtfree(state->ro->ro_rt); state->ro->ro_rt = NULL; } @@ -3468,7 +3510,15 @@ ipsec6_output_tunnel( goto bad; } - /* adjust state->dst if tunnel endpoint is offlink */ + /* + * adjust state->dst if tunnel endpoint is offlink + * + * XXX: caching rt_gateway value in the state is + * not really good, since it may point elsewhere + * when the gateway gets modified to a larger + * sockaddr via rt_setgate(). This is currently + * addressed by SA_SIZE roundup in that routine. + */ if (state->ro->ro_rt->rt_flags & RTF_GATEWAY) { state->dst = (struct sockaddr *)state->ro->ro_rt->rt_gateway; dst6 = (struct sockaddr_in6 *)state->dst; @@ -3832,9 +3882,7 @@ ipsec_copypkt(m) MGETHDR(mnew, M_DONTWAIT, MT_HEADER); /* MAC-OK */ if (mnew == NULL) goto fail; - mnew->m_pkthdr = n->m_pkthdr; M_COPY_PKTHDR(mnew, n); - mnew->m_flags = n->m_flags & M_COPYFLAGS; } else { MGET(mnew, M_DONTWAIT, MT_DATA); @@ -4076,7 +4124,7 @@ ipsec_clearhist( ipsec_optaux(m, itag); } -__private_extern__ void +__private_extern__ int ipsec_send_natt_keepalive( struct secasvar *sav) { @@ -4087,10 +4135,13 @@ ipsec_send_natt_keepalive( lck_mtx_assert(sadb_mutex, LCK_MTX_ASSERT_NOTOWNED); - if ((esp_udp_encap_port & 0xFFFF) == 0 || sav->remote_ike_port == 0) return; - + if ((esp_udp_encap_port & 0xFFFF) == 0 || sav->remote_ike_port == 0) return FALSE; + + // natt timestamp may have changed... reverify + if ((natt_now - sav->natt_last_activity) < natt_keepalive_interval) return FALSE; + m = m_gethdr(M_NOWAIT, MT_DATA); - if (m == NULL) return; + if (m == NULL) return FALSE; /* * Create a UDP packet complete with IP header. @@ -4108,8 +4159,13 @@ ipsec_send_natt_keepalive( ip->ip_len = m->m_len; ip->ip_ttl = ip_defttl; ip->ip_p = IPPROTO_UDP; - ip->ip_src = ((struct sockaddr_in*)&sav->sah->saidx.src)->sin_addr; - ip->ip_dst = ((struct sockaddr_in*)&sav->sah->saidx.dst)->sin_addr; + if (sav->sah->dir != IPSEC_DIR_INBOUND) { + ip->ip_src = ((struct sockaddr_in*)&sav->sah->saidx.src)->sin_addr; + ip->ip_dst = ((struct sockaddr_in*)&sav->sah->saidx.dst)->sin_addr; + } else { + ip->ip_src = ((struct sockaddr_in*)&sav->sah->saidx.dst)->sin_addr; + ip->ip_dst = ((struct sockaddr_in*)&sav->sah->saidx.src)->sin_addr; + } uh->uh_sport = htons((u_short)esp_udp_encap_port); uh->uh_dport = htons(sav->remote_ike_port); uh->uh_ulen = htons(1 + sizeof(struct udphdr)); @@ -4117,7 +4173,9 @@ ipsec_send_natt_keepalive( *(u_int8_t*)((char*)m_mtod(m) + sizeof(struct ip) + sizeof(struct udphdr)) = 0xFF; error = ip_output(m, NULL, &sav->sah->sa_route, IP_NOIPSEC, NULL, NULL); - if (error == 0) + if (error == 0) { sav->natt_last_activity = natt_now; - + return TRUE; + } + return FALSE; } diff --git a/bsd/netinet6/ipsec.h b/bsd/netinet6/ipsec.h index 0fba00274..8bb0feace 100644 --- a/bsd/netinet6/ipsec.h +++ b/bsd/netinet6/ipsec.h @@ -36,6 +36,7 @@ #ifndef _NETINET6_IPSEC_H_ #define _NETINET6_IPSEC_H_ +#include #include #include @@ -43,10 +44,10 @@ #include /* lock for IPSec stats */ -lck_grp_t *sadb_stat_mutex_grp; -lck_grp_attr_t *sadb_stat_mutex_grp_attr; -lck_attr_t *sadb_stat_mutex_attr; -lck_mtx_t *sadb_stat_mutex; +extern lck_grp_t *sadb_stat_mutex_grp; +extern lck_grp_attr_t *sadb_stat_mutex_grp_attr; +extern lck_attr_t *sadb_stat_mutex_attr; +extern lck_mtx_t *sadb_stat_mutex; #define IPSEC_STAT_INCREMENT(x) \ @@ -346,15 +347,17 @@ extern struct socket *ipsec_getsocket(struct mbuf *); extern int ipsec_addhist(struct mbuf *, int, u_int32_t); extern struct ipsec_history *ipsec_gethist(struct mbuf *, int *); extern void ipsec_clearhist(struct mbuf *); -#endif KERNEL -#endif KERNEL_PRIVATE +#endif /* KERNEL */ +#endif /* KERNEL_PRIVATE */ #ifndef KERNEL +__BEGIN_DECLS extern caddr_t ipsec_set_policy(char *, int); extern int ipsec_get_policylen(caddr_t); extern char *ipsec_dump_policy(caddr_t, char *); extern const char *ipsec_strerror(void); -#endif KERNEL +__END_DECLS +#endif /* KERNEL */ #endif /* _NETINET6_IPSEC_H_ */ diff --git a/bsd/netinet6/ipsec6.h b/bsd/netinet6/ipsec6.h index 319670313..7a4a59050 100644 --- a/bsd/netinet6/ipsec6.h +++ b/bsd/netinet6/ipsec6.h @@ -79,6 +79,6 @@ extern int ipsec6_output_tunnel(struct ipsec_output_state *, struct secpolicy *, int, int*); extern int ipsec6_tunnel_validate(struct mbuf *, int, u_int, struct secasvar *); -#endif KERNEL_PRIVATE +#endif /* KERNEL_PRIVATE */ -#endif _NETINET6_IPSEC6_H_ +#endif /* _NETINET6_IPSEC6_H_ */ diff --git a/bsd/netinet6/mld6.c b/bsd/netinet6/mld6.c index afc28c6fd..36e09c8b8 100644 --- a/bsd/netinet6/mld6.c +++ b/bsd/netinet6/mld6.c @@ -1,3 +1,31 @@ +/* + * Copyright (c) 2008 Apple Inc. All rights reserved. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. The rights granted to you under the License + * may not be used to create, or enable the creation or redistribution of, + * unlawful or unlicensed copies of an Apple operating system, or to + * circumvent, violate, or enable the circumvention or violation of, any + * terms of an Apple operating system software license agreement. + * + * Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ + */ + /* $FreeBSD: src/sys/netinet6/mld6.c,v 1.4.2.2 2001/07/03 11:01:54 ume Exp $ */ /* $KAME: mld6.c,v 1.27 2001/04/04 05:17:30 itojun Exp $ */ @@ -142,7 +170,7 @@ mld6_init() hbh_buf[5] = IP6OPT_RTALERT_LEN - 2; bcopy((caddr_t)&rtalert_code, &hbh_buf[6], sizeof(u_int16_t)); - init_ip6pktopts(&ip6_opts); + ip6_initpktopts(&ip6_opts); ip6_opts.ip6po_hbh = hbh; } @@ -414,11 +442,14 @@ mld6_sendpkt( * it is more convenient when inserting the hop-by-hop option later. */ MGETHDR(mh, M_DONTWAIT, MT_HEADER); - if (mh == NULL) + if (mh == NULL) { + ifafree(&ia->ia_ifa); return; + } MGET(md, M_DONTWAIT, MT_DATA); if (md == NULL) { m_free(mh); + ifafree(&ia->ia_ifa); return; } mh->m_next = md; @@ -488,5 +519,6 @@ mld6_sendpkt( break; } } + ifafree(&ia->ia_ifa); } diff --git a/bsd/netinet6/mld6_var.h b/bsd/netinet6/mld6_var.h index cd583fef1..bbeda1ff9 100644 --- a/bsd/netinet6/mld6_var.h +++ b/bsd/netinet6/mld6_var.h @@ -49,6 +49,6 @@ void mld6_input(struct mbuf *, int); void mld6_start_listening(struct in6_multi *); void mld6_stop_listening(struct in6_multi *); void mld6_fasttimeo(void); -#endif KERNEL_PRIVATE +#endif /* KERNEL_PRIVATE */ -#endif _NETINET6_MLD6_VAR_H_ +#endif /* _NETINET6_MLD6_VAR_H_ */ diff --git a/bsd/netinet6/nd6.c b/bsd/netinet6/nd6.c index 66f4cd2b9..5f2c2abcd 100644 --- a/bsd/netinet6/nd6.c +++ b/bsd/netinet6/nd6.c @@ -1,3 +1,31 @@ +/* + * Copyright (c) 2008 Apple Inc. All rights reserved. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. The rights granted to you under the License + * may not be used to create, or enable the creation or redistribution of, + * unlawful or unlicensed copies of an Apple operating system, or to + * circumvent, violate, or enable the circumvention or violation of, any + * terms of an Apple operating system software license agreement. + * + * Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ + */ + /* $FreeBSD: src/sys/netinet6/nd6.c,v 1.20 2002/08/02 20:49:14 rwatson Exp $ */ /* $KAME: nd6.c,v 1.144 2001/05/24 07:44:00 itojun Exp $ */ @@ -50,8 +78,9 @@ #include #include #include +#include #include -#include +#include #define DONT_WARN_OBSOLETE #include @@ -62,6 +91,7 @@ #include #include +#include #include #include #include @@ -78,8 +108,10 @@ #define ND6_SLOWTIMER_INTERVAL (60 * 60) /* 1 hour */ #define ND6_RECALC_REACHTM_INTERVAL (60 * 120) /* 2 hours */ +#define SA(p) ((struct sockaddr *)(p)) #define SIN6(s) ((struct sockaddr_in6 *)s) #define SDL(s) ((struct sockaddr_dl *)s) +#define equal(a1, a2) (bcmp((caddr_t)(a1), (caddr_t)(a2), (a1)->sa_len) == 0) /* timer values */ int nd6_prune = 1; /* walk list every 1 seconds */ @@ -103,9 +135,45 @@ int nd6_debug = 0; /* for debugging? */ static int nd6_inuse, nd6_allocated; -struct llinfo_nd6 llinfo_nd6 = {&llinfo_nd6, &llinfo_nd6, NULL, NULL, 0, 0, 0, 0, 0 }; +/* + * Synchronization notes: + * + * The global list of ND entries are stored in llinfo_nd6; an entry + * gets inserted into the list when the route is created and gets + * removed from the list when it is deleted; this is done as part + * of RTM_ADD/RTM_RESOLVE/RTM_DELETE in nd6_rtrequest(). + * + * Because rnh_lock and rt_lock for the entry are held during those + * operations, the same locks (and thus lock ordering) must be used + * elsewhere to access the relevant data structure fields: + * + * ln_next, ln_prev, ln_rt + * + * - Routing lock (rnh_lock) + * + * ln_hold, ln_asked, ln_expire, ln_state, ln_router, ln_byhint, ln_flags + * + * - Routing entry lock (rt_lock) + * + * Due to the dependency on rt_lock, llinfo_nd6 has the same lifetime + * as the route entry itself. When a route is deleted (RTM_DELETE), + * it is simply removed from the global list but the memory is not + * freed until the route itself is freed. + */ +struct llinfo_nd6 llinfo_nd6 = { + &llinfo_nd6, &llinfo_nd6, NULL, NULL, 0, 0, 0, 0, 0, 0 +}; + +/* Protected by nd_if_rwlock */ size_t nd_ifinfo_indexlim = 32; /* increased for 5589193 */ struct nd_ifinfo *nd_ifinfo = NULL; + +static lck_grp_attr_t *nd_if_rwlock_grp_attr; +static lck_grp_t *nd_if_rwlock_grp; +static lck_attr_t *nd_if_rwlock_attr; +lck_rw_t *nd_if_rwlock; + +/* Protected by nd6_mutex */ struct nd_drhead nd_defrouter; struct nd_prhead nd_prefix = { 0 }; @@ -113,12 +181,41 @@ int nd6_recalc_reachtm_interval = ND6_RECALC_REACHTM_INTERVAL; static struct sockaddr_in6 all1_sa; static int regen_tmpaddr(struct in6_ifaddr *); -extern lck_mtx_t *rt_mtx; extern lck_mtx_t *ip6_mutex; extern lck_mtx_t *nd6_mutex; static void nd6_slowtimo(void *ignored_arg); +static struct llinfo_nd6 *nd6_llinfo_alloc(void); +static void nd6_llinfo_free(void *); +static void nd6_siocgdrlst(void *, int); +static void nd6_siocgprlst(void *, int); + +/* + * Insertion and removal from llinfo_nd6 must be done with rnh_lock held. + */ +#define LN_DEQUEUE(_ln) do { \ + lck_mtx_assert(rnh_lock, LCK_MTX_ASSERT_OWNED); \ + RT_LOCK_ASSERT_HELD((_ln)->ln_rt); \ + (_ln)->ln_next->ln_prev = (_ln)->ln_prev; \ + (_ln)->ln_prev->ln_next = (_ln)->ln_next; \ + (_ln)->ln_prev = (_ln)->ln_next = NULL; \ + (_ln)->ln_flags &= ~ND6_LNF_IN_USE; \ +} while (0) + +#define LN_INSERTHEAD(_ln) do { \ + lck_mtx_assert(rnh_lock, LCK_MTX_ASSERT_OWNED); \ + RT_LOCK_ASSERT_HELD((_ln)->ln_rt); \ + (_ln)->ln_next = llinfo_nd6.ln_next; \ + llinfo_nd6.ln_next = (_ln); \ + (_ln)->ln_prev = &llinfo_nd6; \ + (_ln)->ln_next->ln_prev = (_ln); \ + (_ln)->ln_flags |= ND6_LNF_IN_USE; \ +} while (0) + +static struct zone *llinfo_nd6_zone; +#define LLINFO_ND6_ZONE_MAX 256 /* maximum elements in zone */ +#define LLINFO_ND6_ZONE_NAME "llinfo_nd6" /* name for zone */ void nd6_init() @@ -127,7 +224,7 @@ nd6_init() int i; if (nd6_init_done) { - log(LOG_NOTICE, "nd6_init called more than once(ignored)\n"); + log(LOG_NOTICE, "nd6_init called more than once (ignored)\n"); return; } @@ -139,44 +236,88 @@ nd6_init() /* initialization of the default router list */ TAILQ_INIT(&nd_defrouter); + nd_if_rwlock_grp_attr = lck_grp_attr_alloc_init(); + nd_if_rwlock_grp = lck_grp_alloc_init("nd_if_rwlock", + nd_if_rwlock_grp_attr); + nd_if_rwlock_attr = lck_attr_alloc_init(); + nd_if_rwlock = lck_rw_alloc_init(nd_if_rwlock_grp, nd_if_rwlock_attr); + + llinfo_nd6_zone = zinit(sizeof (struct llinfo_nd6), + LLINFO_ND6_ZONE_MAX * sizeof (struct llinfo_nd6), 0, + LLINFO_ND6_ZONE_NAME); + if (llinfo_nd6_zone == NULL) + panic("%s: failed allocating llinfo_nd6_zone", __func__); + + zone_change(llinfo_nd6_zone, Z_EXPAND, TRUE); + nd6_init_done = 1; /* start timer */ timeout(nd6_slowtimo, (caddr_t)0, ND6_SLOWTIMER_INTERVAL * hz); } -void -nd6_ifattach( - struct ifnet *ifp) +static struct llinfo_nd6 * +nd6_llinfo_alloc(void) +{ + return (zalloc(llinfo_nd6_zone)); +} + +static void +nd6_llinfo_free(void *arg) +{ + struct llinfo_nd6 *ln = arg; + + if (ln->ln_next != NULL || ln->ln_prev != NULL) { + panic("%s: trying to free %p when it is in use", __func__, ln); + /* NOTREACHED */ + } + + /* Just in case there's anything there, free it */ + if (ln->ln_hold != NULL) { + m_freem(ln->ln_hold); + ln->ln_hold = NULL; + } + + zfree(llinfo_nd6_zone, ln); +} + +int +nd6_ifattach(struct ifnet *ifp) { /* * We have some arrays that should be indexed by if_index. * since if_index will grow dynamically, they should grow too. */ + lck_rw_lock_exclusive(nd_if_rwlock); if (nd_ifinfo == NULL || if_index >= nd_ifinfo_indexlim) { size_t n; caddr_t q; + size_t newlim = nd_ifinfo_indexlim; - while (if_index >= nd_ifinfo_indexlim) - nd_ifinfo_indexlim <<= 1; + while (if_index >= newlim) + newlim <<= 1; /* grow nd_ifinfo */ - n = nd_ifinfo_indexlim * sizeof(struct nd_ifinfo); + n = newlim * sizeof(struct nd_ifinfo); q = (caddr_t)_MALLOC(n, M_IP6NDP, M_WAITOK); + if (q == NULL) { + lck_rw_done(nd_if_rwlock); + return ENOBUFS; + } bzero(q, n); + nd_ifinfo_indexlim = newlim; if (nd_ifinfo) { bcopy((caddr_t)nd_ifinfo, q, n/2); - /* Radar 5589193: - * SU fix purposely leaks the old nd_ifinfo array - * if we grow the arraw to more than 32 interfaces - * Fix for future release is to use proper locking. - + /* + * We might want to pattern fill the old + * array to catch use-after-free cases. + */ FREE((caddr_t)nd_ifinfo, M_IP6NDP); - */ } nd_ifinfo = (struct nd_ifinfo *)q; } + lck_rw_done(nd_if_rwlock); #define ND nd_ifinfo[ifp->if_index] @@ -187,18 +328,23 @@ nd6_ifattach( * the linkmtu member, which was not suitable because it could be * initialized via "ifconfig mtu". */ - if (ND.basereachable) - return; - - ND.linkmtu = ifindex2ifnet[ifp->if_index]->if_mtu; + lck_rw_lock_shared(nd_if_rwlock); + if (ND.basereachable) { + lck_rw_done(nd_if_rwlock); + return 0; + } + ND.linkmtu = ifp->if_mtu; ND.chlim = IPV6_DEFHLIM; ND.basereachable = REACHABLE_TIME; ND.reachable = ND_COMPUTE_RTIME(ND.basereachable); ND.retrans = RETRANS_TIMER; ND.receivedra = 0; ND.flags = ND6_IFF_PERFORMNUD; + lck_rw_done(nd_if_rwlock); nd6_setmtu(ifp); #undef ND + + return 0; } /* @@ -209,14 +355,15 @@ void nd6_setmtu(struct ifnet *ifp) { struct nd_ifinfo *ndi; - u_long oldmaxmtu; + u_int32_t oldmaxmtu, maxmtu; /* * Make sure IPv6 is enabled for the interface first, * because this can be called directly from SIOCSIFMTU for IPv4 */ - + lck_rw_lock_shared(nd_if_rwlock); if (ifp->if_index >= nd_ifinfo_indexlim) { + lck_rw_done(nd_if_rwlock); return; /* we're out of bound for nd_ifinfo */ } @@ -234,7 +381,7 @@ nd6_setmtu(struct ifnet *ifp) * ND level maxmtu and linkmtu (the latter obtained via RA) are done * via IN6_LINKMTU() which does further checking against if_mtu. */ - ndi->maxmtu = ifp->if_mtu; + maxmtu = ndi->maxmtu = ifp->if_mtu; /* * Decreasing the interface MTU under IPV6 minimum MTU may cause @@ -244,12 +391,13 @@ nd6_setmtu(struct ifnet *ifp) */ if (oldmaxmtu >= IPV6_MMTU && ndi->maxmtu < IPV6_MMTU) { log(LOG_NOTICE, "nd6_setmtu: " - "new link MTU on %s%d (%lu) is too small for IPv6\n", - ifp->if_name, ifp->if_unit, (unsigned long)ndi->maxmtu); + "new link MTU on %s%d (%u) is too small for IPv6\n", + ifp->if_name, ifp->if_unit, (uint32_t)ndi->maxmtu); } + lck_rw_done(nd_if_rwlock); /* also adjust in6_maxmtu if necessary. */ - if (ndi->maxmtu > in6_maxmtu) + if (maxmtu > in6_maxmtu) in6_setmaxmtu(); } @@ -416,151 +564,208 @@ nd6_timer( struct in6_ifaddr *ia6, *nia6; struct in6_addrlifetime *lt6; struct timeval timenow; - int count = 0; getmicrotime(&timenow); - - +again: + /* + * The global list llinfo_nd6 is modified by nd6_request() and is + * therefore protected by rnh_lock. For obvious reasons, we cannot + * hold rnh_lock across calls that might lead to code paths which + * attempt to acquire rnh_lock, else we deadlock. Hence for such + * cases we drop rt_lock and rnh_lock, make the calls, and repeat the + * loop. To ensure that we don't process the same entry more than + * once in a single timeout, we mark the "already-seen" entries with + * ND6_LNF_TIMER_SKIP flag. At the end of the loop, we do a second + * pass thru the entries and clear the flag so they can be processed + * during the next timeout. + */ + lck_mtx_lock(rnh_lock); ln = llinfo_nd6.ln_next; - while (ln && ln != &llinfo_nd6) { + while (ln != NULL && ln != &llinfo_nd6) { struct rtentry *rt; struct sockaddr_in6 *dst; - struct llinfo_nd6 *next = ln->ln_next; - /* XXX: used for the DELAY case only: */ - struct nd_ifinfo *ndi = NULL; + struct llinfo_nd6 *next; + struct nd_ifinfo ndi; + + /* ln_next/prev/rt is protected by rnh_lock */ + next = ln->ln_next; + rt = ln->ln_rt; + RT_LOCK(rt); - if ((rt = ln->ln_rt) == NULL) { + /* We've seen this already; skip it */ + if (ln->ln_flags & ND6_LNF_TIMER_SKIP) { + RT_UNLOCK(rt); ln = next; continue; } + + /* rt->rt_ifp should never be NULL */ if ((ifp = rt->rt_ifp) == NULL) { - ln = next; - continue; + panic("%s: ln(%p) rt(%p) rt_ifp == NULL", __func__, + ln, rt); + /* NOTREACHED */ } - ndi = &nd_ifinfo[ifp->if_index]; - dst = (struct sockaddr_in6 *)rt_key(rt); - - count++; - - if (ln->ln_expire > timenow.tv_sec) { - - /* Radar 6871508 Check if we have too many cache entries. - * In that case purge 20% of the table to make space - * for the new entries. - * This is a bit crude but keeps the deletion in timer - * thread only. - */ - - if ((ip6_neighborgcthresh >= 0 && - nd6_inuse >= ip6_neighborgcthresh) && - ((count % 5) == 0)) { - - if (ln->ln_state > ND6_LLINFO_INCOMPLETE) - ln->ln_state = ND6_LLINFO_STALE; - else - ln->ln_state = ND6_LLINFO_PURGE; - ln->ln_expire = timenow.tv_sec; - /* fallthrough and call nd6_free() */ - } + /* rt_llinfo must always be equal to ln */ + if ((struct llinfo_nd6 *)rt->rt_llinfo != ln) { + panic("%s: rt_llinfo(%p) is not equal to ln(%p)", + __func__, rt->rt_llinfo, ln); + /* NOTREACHED */ + } - else { - ln = next; - continue; - } + /* rt_key should never be NULL */ + dst = (struct sockaddr_in6 *)rt_key(rt); + if (dst == NULL) { + panic("%s: rt(%p) key is NULL ln(%p)", __func__, + rt, ln); + /* NOTREACHED */ } - /* sanity check */ - if (!rt) { - printf("rt=0 in nd6_timer(ln=%p)\n", ln); + /* Set the flag in case we jump to "again" */ + ln->ln_flags |= ND6_LNF_TIMER_SKIP; + + if (ln->ln_expire > timenow.tv_sec) { + RT_UNLOCK(rt); ln = next; continue; } - if (rt->rt_llinfo && (struct llinfo_nd6 *)rt->rt_llinfo != ln) { - printf("rt_llinfo(%p) is not equal to ln(%p)\n", - rt->rt_llinfo, ln); - ln = next; - continue; - } - if (!dst) { - printf("dst=0 in nd6_timer(ln=%p)\n", ln); + + /* Make a copy (we're using it read-only anyway) */ + lck_rw_lock_shared(nd_if_rwlock); + if (ifp->if_index >= nd_ifinfo_indexlim) { + lck_rw_done(nd_if_rwlock); + RT_UNLOCK(rt); ln = next; continue; } + ndi = nd_ifinfo[ifp->if_index]; + lck_rw_done(nd_if_rwlock); + + RT_LOCK_ASSERT_HELD(rt); switch (ln->ln_state) { case ND6_LLINFO_INCOMPLETE: if (ln->ln_asked < nd6_mmaxtries) { ln->ln_asked++; ln->ln_expire = timenow.tv_sec + - nd_ifinfo[ifp->if_index].retrans / 1000; + ndi.retrans / 1000; + RT_ADDREF_LOCKED(rt); + RT_UNLOCK(rt); + lck_mtx_unlock(rnh_lock); nd6_ns_output(ifp, NULL, &dst->sin6_addr, ln, 0, 0); + RT_REMREF(rt); } else { struct mbuf *m = ln->ln_hold; ln->ln_hold = NULL; - if (m) { - if (rt->rt_ifp) { - /* - * Fake rcvif to make ICMP error - * more helpful in diagnosing - * for the receiver. - * XXX: should we consider - * older rcvif? - */ - m->m_pkthdr.rcvif = rt->rt_ifp; - } + if (m != NULL) { + /* + * Fake rcvif to make ICMP error + * more helpful in diagnosing + * for the receiver. + * XXX: should we consider + * older rcvif? + */ + m->m_pkthdr.rcvif = ifp; + RT_UNLOCK(rt); + lck_mtx_unlock(rnh_lock); icmp6_error(m, ICMP6_DST_UNREACH, ICMP6_DST_UNREACH_ADDR, 0); - ln->ln_hold = NULL; + } else { + RT_UNLOCK(rt); + lck_mtx_unlock(rnh_lock); } - next = nd6_free(rt); + nd6_free(rt); } - break; + lck_mtx_assert(rnh_lock, LCK_MTX_ASSERT_NOTOWNED); + goto again; + case ND6_LLINFO_REACHABLE: if (ln->ln_expire) { ln->ln_state = ND6_LLINFO_STALE; ln->ln_expire = timenow.tv_sec + nd6_gctimer; } + RT_UNLOCK(rt); break; case ND6_LLINFO_STALE: case ND6_LLINFO_PURGE: /* Garbage Collection(RFC 2461 5.3) */ - if (ln->ln_expire) - next = nd6_free(rt); + if (ln->ln_expire) { + RT_UNLOCK(rt); + lck_mtx_unlock(rnh_lock); + nd6_free(rt); + lck_mtx_assert(rnh_lock, + LCK_MTX_ASSERT_NOTOWNED); + goto again; + } else { + RT_UNLOCK(rt); + } break; case ND6_LLINFO_DELAY: - if (ndi && (ndi->flags & ND6_IFF_PERFORMNUD) != 0) { + if ((ndi.flags & ND6_IFF_PERFORMNUD) != 0) { /* We need NUD */ ln->ln_asked = 1; ln->ln_state = ND6_LLINFO_PROBE; ln->ln_expire = timenow.tv_sec + - ndi->retrans / 1000; + ndi.retrans / 1000; + RT_ADDREF_LOCKED(rt); + RT_UNLOCK(rt); + lck_mtx_unlock(rnh_lock); nd6_ns_output(ifp, &dst->sin6_addr, - &dst->sin6_addr, - ln, 0, 0); - } else { - ln->ln_state = ND6_LLINFO_STALE; /* XXX */ - ln->ln_expire = timenow.tv_sec + nd6_gctimer; + &dst->sin6_addr, ln, 0, 0); + lck_mtx_assert(rnh_lock, + LCK_MTX_ASSERT_NOTOWNED); + RT_REMREF(rt); + goto again; } + ln->ln_state = ND6_LLINFO_STALE; /* XXX */ + ln->ln_expire = timenow.tv_sec + nd6_gctimer; + RT_UNLOCK(rt); break; + case ND6_LLINFO_PROBE: if (ln->ln_asked < nd6_umaxtries) { ln->ln_asked++; ln->ln_expire = timenow.tv_sec + - nd_ifinfo[ifp->if_index].retrans / 1000; + ndi.retrans / 1000; + RT_ADDREF_LOCKED(rt); + RT_UNLOCK(rt); + lck_mtx_unlock(rnh_lock); nd6_ns_output(ifp, &dst->sin6_addr, - &dst->sin6_addr, ln, 0, 0); + &dst->sin6_addr, ln, 0, 0); + RT_REMREF(rt); } else { - next = nd6_free(rt); + RT_UNLOCK(rt); + lck_mtx_unlock(rnh_lock); + nd6_free(rt); } + lck_mtx_assert(rnh_lock, LCK_MTX_ASSERT_NOTOWNED); + goto again; + + default: + RT_UNLOCK(rt); break; } ln = next; } - + lck_mtx_assert(rnh_lock, LCK_MTX_ASSERT_OWNED); + + /* Now clear the flag from all entries */ + ln = llinfo_nd6.ln_next; + while (ln != NULL && ln != &llinfo_nd6) { + struct rtentry *rt = ln->ln_rt; + struct llinfo_nd6 *next = ln->ln_next; + + RT_LOCK_SPIN(rt); + if (ln->ln_flags & ND6_LNF_TIMER_SKIP) + ln->ln_flags &= ~ND6_LNF_TIMER_SKIP; + RT_UNLOCK(rt); + ln = next; + } + lck_mtx_unlock(rnh_lock); + /* expire default router list */ lck_mtx_lock(nd6_mutex); dr = TAILQ_FIRST(&nd_defrouter); @@ -589,6 +794,14 @@ nd6_timer( if (IFA6_IS_INVALID(ia6)) { int regen = 0; + /* + * Extra reference for ourselves; it's no-op if + * we don't have to regenerate temporary address, + * otherwise it protects the address from going + * away since we drop nd6_mutex below. + */ + ifaref(&ia6->ia_ifa); + /* * If the expiring address is temporary, try * regenerating a new one. This would be useful when @@ -615,6 +828,9 @@ nd6_timer( in6_purgeaddr(&ia6->ia_ifa, 1); + /* Release extra reference taken above */ + ifafree(&ia6->ia_ifa); + if (regen) goto addrloop; /* XXX: see below */ } @@ -744,7 +960,7 @@ regen_tmpaddr( if (public_ifa6 != NULL) { int e; - if ((e = in6_tmpifadd(public_ifa6, 0)) != 0) { + if ((e = in6_tmpifadd(public_ifa6, 0, M_WAITOK)) != 0) { log(LOG_NOTICE, "regen_tmpaddr: failed to create a new" " tmp addr,errno=%d\n", e); return(-1); @@ -763,7 +979,7 @@ void nd6_purge( struct ifnet *ifp) { - struct llinfo_nd6 *ln, *nln; + struct llinfo_nd6 *ln; struct nd_defrouter *dr, *ndr, drany; struct nd_prefix *pr, *npr; @@ -801,8 +1017,14 @@ nd6_purge( } /* cancel default outgoing interface setting */ - if (nd6_defifindex == ifp->if_index) + if (nd6_defifindex == ifp->if_index) { + /* Release nd6_mutex as it will be acquired + * during nd6_setdefaultiface again + */ + lck_mtx_unlock(nd6_mutex); nd6_setdefaultiface(0); + lck_mtx_lock(nd6_mutex); + } if (!ip6_forwarding && (ip6_accept_rtadv || (ifp->if_eflags & IFEF_ACCEPT_RTADVD))) { /* refresh default router list */ @@ -818,23 +1040,48 @@ nd6_purge( * due to KAME goto ours hack. See RTM_RESOLVE case in * nd6_rtrequest(), and ip6_input(). */ +again: + lck_mtx_lock(rnh_lock); ln = llinfo_nd6.ln_next; - while (ln && ln != &llinfo_nd6) { + while (ln != NULL && ln != &llinfo_nd6) { struct rtentry *rt; - struct sockaddr_dl *sdl; + struct llinfo_nd6 *nln; nln = ln->ln_next; rt = ln->ln_rt; - if (rt && rt->rt_gateway && - rt->rt_gateway->sa_family == AF_LINK) { - sdl = (struct sockaddr_dl *)rt->rt_gateway; - if (sdl->sdl_index == ifp->if_index) - nln = nd6_free(rt); + RT_LOCK(rt); + if (rt->rt_gateway != NULL && + rt->rt_gateway->sa_family == AF_LINK && + SDL(rt->rt_gateway)->sdl_index == ifp->if_index) { + RT_UNLOCK(rt); + lck_mtx_unlock(rnh_lock); + /* + * See comments on nd6_timer() for reasons why + * this loop is repeated; we bite the costs of + * going thru the same llinfo_nd6 more than once + * here, since this purge happens during detach, + * and that unlike the timer case, it's possible + * there's more than one purges happening at the + * same time (thus a flag wouldn't buy anything). + */ + nd6_free(rt); + lck_mtx_assert(rnh_lock, LCK_MTX_ASSERT_NOTOWNED); + goto again; + } else { + RT_UNLOCK(rt); } ln = nln; } + lck_mtx_unlock(rnh_lock); } +/* + * Upon success, the returned route will be locked and the caller is + * responsible for releasing the reference and doing RT_UNLOCK(rt). + * This routine does not require rnh_lock to be held by the caller, + * although it needs to be indicated of such a case in order to call + * the correct variant of the relevant routing routines. + */ struct rtentry * nd6_lookup( struct in6_addr *addr6, @@ -852,23 +1099,34 @@ nd6_lookup( #if SCOPEDROUTING sin6.sin6_scope_id = in6_addr2scopeid(ifp, addr6); #endif - if (!rt_locked) - lck_mtx_lock(rt_mtx); - rt = rtalloc1_locked((struct sockaddr *)&sin6, create, 0UL); - if (rt && (rt->rt_flags & RTF_LLINFO) == 0) { - /* - * This is the case for the default route. - * If we want to create a neighbor cache for the address, we - * should free the route for the destination and allocate an - * interface route. - */ - if (create) { - rtfree_locked(rt); - rt = 0; + if (rt_locked) + lck_mtx_assert(rnh_lock, LCK_MTX_ASSERT_OWNED); + + rt = rt_locked ? rtalloc1_locked((struct sockaddr *)&sin6, create, 0) : + rtalloc1((struct sockaddr *)&sin6, create, 0); + + if (rt != NULL) { + RT_LOCK(rt); + if ((rt->rt_flags & RTF_LLINFO) == 0) { + /* + * This is the case for the default route. If we + * want to create a neighbor cache for the address, + * we should free the route for the destination and + * allocate an interface route. + */ + if (create) { + RT_UNLOCK(rt); + if (rt_locked) + rtfree_locked(rt); + else + rtfree(rt); + rt = NULL; + } } } - if (!rt) { + if (rt == NULL) { if (create && ifp) { + struct ifaddr *ifa; int e; /* @@ -878,13 +1136,9 @@ nd6_lookup( * This hack is necessary for a neighbor which can't * be covered by our own prefix. */ - struct ifaddr *ifa = - ifaof_ifpforaddr((struct sockaddr *)&sin6, ifp); - if (ifa == NULL) { - if (!rt_locked) - lck_mtx_unlock(rt_mtx); + ifa = ifaof_ifpforaddr((struct sockaddr *)&sin6, ifp); + if (ifa == NULL) return(NULL); - } /* * Create a new route. RTF_LLINFO is necessary @@ -892,37 +1146,34 @@ nd6_lookup( * destination in nd6_rtrequest which will be * called in rtrequest via ifa->ifa_rtrequest. */ - if ((e = rtrequest_locked(RTM_ADD, (struct sockaddr *)&sin6, - ifa->ifa_addr, - (struct sockaddr *)&all1_sa, - (ifa->ifa_flags | - RTF_HOST | RTF_LLINFO) & - ~RTF_CLONING, - &rt)) != 0) { + if (!rt_locked) + lck_mtx_lock(rnh_lock); + if ((e = rtrequest_locked(RTM_ADD, + (struct sockaddr *)&sin6, ifa->ifa_addr, + (struct sockaddr *)&all1_sa, + (ifa->ifa_flags | RTF_HOST | RTF_LLINFO) & + ~RTF_CLONING, &rt)) != 0) { if (e != EEXIST) - log(LOG_ERR, - "nd6_lookup: failed to add route for a " - "neighbor(%s), errno=%d\n", - ip6_sprintf(addr6), e); + log(LOG_ERR, "%s: failed to add route " + "for a neighbor(%s), errno=%d\n", + __func__, ip6_sprintf(addr6), e); } + if (!rt_locked) + lck_mtx_unlock(rnh_lock); ifafree(ifa); - if (rt == NULL) { - if (!rt_locked) - lck_mtx_unlock(rt_mtx); + if (rt == NULL) return(NULL); - } + + RT_LOCK(rt); if (rt->rt_llinfo) { - struct llinfo_nd6 *ln = - (struct llinfo_nd6 *)rt->rt_llinfo; + struct llinfo_nd6 *ln = rt->rt_llinfo; ln->ln_state = ND6_LLINFO_NOSTATE; } } else { - if (!rt_locked) - lck_mtx_unlock(rt_mtx); return(NULL); } } - rtunref(rt); + RT_LOCK_ASSERT_HELD(rt); /* * Validation for the entry. * Note that the check for rt_llinfo is necessary because a cloned @@ -935,20 +1186,23 @@ nd6_lookup( * use rt->rt_ifa->ifa_ifp, which would specify the REAL * interface. */ - if ((ifp && ifp->if_type !=IFT_PPP) && ((rt->rt_flags & RTF_GATEWAY) || (rt->rt_flags & RTF_LLINFO) == 0 || + if (((ifp && (ifp->if_type != IFT_PPP)) && ((ifp->if_eflags & IFEF_NOAUTOIPV6LL) == 0)) && + ((rt->rt_flags & RTF_GATEWAY) || (rt->rt_flags & RTF_LLINFO) == 0 || rt->rt_gateway->sa_family != AF_LINK || rt->rt_llinfo == NULL || (ifp && rt->rt_ifa->ifa_ifp != ifp))) { - if (!rt_locked) - lck_mtx_unlock(rt_mtx); + RT_REMREF_LOCKED(rt); + RT_UNLOCK(rt); if (create) { - log(LOG_DEBUG, "nd6_lookup: failed to lookup %s (if = %s)\n", - ip6_sprintf(addr6), ifp ? if_name(ifp) : "unspec"); + log(LOG_DEBUG, "%s: failed to lookup %s " + "(if = %s)\n", __func__, ip6_sprintf(addr6), + ifp ? if_name(ifp) : "unspec"); /* xxx more logs... kazu */ } return(NULL); - } - if (!rt_locked) - lck_mtx_unlock(rt_mtx); + } + /* + * Caller needs to release reference and call RT_UNLOCK(rt). + */ return(rt); } @@ -963,6 +1217,7 @@ nd6_is_addr_neighbor( int rt_locked) { struct ifaddr *ifa; + struct rtentry *rt; int i; #define IFADDR6(a) ((((struct in6_ifaddr *)(a))->ia_addr).sin6_addr) @@ -1002,10 +1257,15 @@ nd6_is_addr_neighbor( /* * Even if the address matches none of our addresses, it might be - * in the neighbor cache. + * in the neighbor cache. Callee returns a locked route upon + * success. */ - if (nd6_lookup(&addr->sin6_addr, 0, ifp, rt_locked) != NULL) + if ((rt = nd6_lookup(&addr->sin6_addr, 0, ifp, rt_locked)) != NULL) { + RT_LOCK_ASSERT_HELD(rt); + RT_REMREF_LOCKED(rt); + RT_UNLOCK(rt); return(1); + } return(0); #undef IFADDR6 @@ -1015,31 +1275,50 @@ nd6_is_addr_neighbor( /* * Free an nd6 llinfo entry. */ -struct llinfo_nd6 * +void nd6_free( struct rtentry *rt) { - struct llinfo_nd6 *ln = (struct llinfo_nd6 *)rt->rt_llinfo, *next; - struct in6_addr in6 = ((struct sockaddr_in6 *)rt_key(rt))->sin6_addr; + struct llinfo_nd6 *ln; + struct in6_addr in6; struct nd_defrouter *dr; + lck_mtx_assert(rnh_lock, LCK_MTX_ASSERT_NOTOWNED); + RT_LOCK_ASSERT_NOTHELD(rt); + lck_mtx_lock(nd6_mutex); + + RT_LOCK(rt); + RT_ADDREF_LOCKED(rt); /* Extra ref */ + ln = rt->rt_llinfo; + in6 = ((struct sockaddr_in6 *)rt_key(rt))->sin6_addr; + + /* + * Prevent another thread from modifying rt_key, rt_gateway + * via rt_setgate() after the rt_lock is dropped by marking + * the route as defunct. + */ + rt->rt_flags |= RTF_CONDEMNED; + /* * we used to have pfctlinput(PRC_HOSTDEAD) here. * even though it is not harmful, it was not really necessary. */ - if (!ip6_forwarding && (ip6_accept_rtadv || (rt->rt_ifp->if_eflags & IFEF_ACCEPT_RTADVD))) { - lck_mtx_lock(nd6_mutex); - dr = defrouter_lookup(&((struct sockaddr_in6 *)rt_key(rt))->sin6_addr, - rt->rt_ifp); + if (!ip6_forwarding && (ip6_accept_rtadv || + (rt->rt_ifp->if_eflags & IFEF_ACCEPT_RTADVD))) { + dr = defrouter_lookup(&((struct sockaddr_in6 *)rt_key(rt))-> + sin6_addr, rt->rt_ifp); - if ((ln && ln->ln_router) || dr) { + if (ln->ln_router || dr) { /* * rt6_flush must be called whether or not the neighbor * is in the Default Router List. * See a corresponding comment in nd6_na_input(). */ + RT_UNLOCK(rt); rt6_flush(&in6, rt->rt_ifp); + } else { + RT_UNLOCK(rt); } if (dr) { @@ -1056,6 +1335,7 @@ nd6_free( * Below the state will be set correctly, * or the entry itself will be deleted. */ + RT_LOCK_SPIN(rt); ln->ln_state = ND6_LLINFO_INCOMPLETE; /* @@ -1064,6 +1344,7 @@ nd6_free( * before the default router selection, we perform * the check now. */ + RT_UNLOCK(rt); pfxlist_onlink_check(1); if (dr == TAILQ_FIRST(&nd_defrouter)) { @@ -1080,29 +1361,22 @@ nd6_free( defrouter_select(); } } - lck_mtx_unlock(nd6_mutex); + RT_LOCK_ASSERT_NOTHELD(rt); + } else { + RT_UNLOCK(rt); } - /* - * Before deleting the entry, remember the next entry as the - * return value. We need this because pfxlist_onlink_check() above - * might have freed other entries (particularly the old next entry) as - * a side effect (XXX). - */ - if (ln) - next = ln->ln_next; - else - next = 0; - + lck_mtx_unlock(nd6_mutex); /* * Detach the route from the routing tree and the list of neighbor * caches, and disable the route entry not to be used in already * cached routes. */ - rtrequest(RTM_DELETE, rt_key(rt), (struct sockaddr *)0, + (void) rtrequest(RTM_DELETE, rt_key(rt), (struct sockaddr *)0, rt_mask(rt), 0, (struct rtentry **)0); - return(next); + /* Extra ref held above; now free it */ + rtfree(rt); } /* @@ -1128,8 +1402,13 @@ nd6_nud_hint( if (!rt) { if (!dst6) return; - if (!(rt = nd6_lookup(dst6, 0, NULL, 0))) + /* Callee returns a locked route upon success */ + if ((rt = nd6_lookup(dst6, 0, NULL, 0)) == NULL) return; + RT_LOCK_ASSERT_HELD(rt); + } else { + RT_LOCK(rt); + RT_ADDREF_LOCKED(rt); } if ((rt->rt_flags & RTF_GATEWAY) != 0 || @@ -1137,12 +1416,12 @@ nd6_nud_hint( !rt->rt_llinfo || !rt->rt_gateway || rt->rt_gateway->sa_family != AF_LINK) { /* This is not a host route. */ - return; + goto done; } - ln = (struct llinfo_nd6 *)rt->rt_llinfo; + ln = rt->rt_llinfo; if (ln->ln_state < ND6_LLINFO_REACHABLE) - return; + goto done; /* * if we get upper-layer reachability confirmation many times, @@ -1151,13 +1430,19 @@ nd6_nud_hint( if (!force) { ln->ln_byhint++; if (ln->ln_byhint > nd6_maxnudhint) - return; + goto done; } ln->ln_state = ND6_LLINFO_REACHABLE; - if (ln->ln_expire) + if (ln->ln_expire) { + lck_rw_lock_shared(nd_if_rwlock); ln->ln_expire = timenow.tv_sec + nd_ifinfo[rt->rt_ifp->if_index].reachable; + lck_rw_done(nd_if_rwlock); + } +done: + RT_REMREF_LOCKED(rt); + RT_UNLOCK(rt); } void @@ -1167,13 +1452,15 @@ nd6_rtrequest( __unused struct sockaddr *sa) { struct sockaddr *gate = rt->rt_gateway; - struct llinfo_nd6 *ln = (struct llinfo_nd6 *)rt->rt_llinfo; + struct llinfo_nd6 *ln = rt->rt_llinfo; static struct sockaddr_dl null_sdl = {sizeof(null_sdl), AF_LINK, 0, 0, 0, 0, 0, {0,0,0,0,0,0,0,0,0,0,0,0,} }; struct ifnet *ifp = rt->rt_ifp; struct ifaddr *ifa; struct timeval timenow; + lck_mtx_assert(rnh_lock, LCK_MTX_ASSERT_OWNED); + RT_LOCK_ASSERT_HELD(rt); if ((rt->rt_flags & RTF_GATEWAY)) return; @@ -1189,9 +1476,27 @@ nd6_rtrequest( return; } - if (req == RTM_RESOLVE && - (nd6_need_cache(ifp) == 0 || /* stf case */ - !nd6_is_addr_neighbor((struct sockaddr_in6 *)rt_key(rt), ifp, 1))) { + if (req == RTM_RESOLVE) { + int no_nd_cache; + + if (!nd6_need_cache(ifp)) { /* stf case */ + no_nd_cache = 1; + } else { + /* + * nd6_is_addr_neighbor() may call nd6_lookup(), + * therefore we drop rt_lock to avoid deadlock + * during the lookup. Using rt_key(rt) is still + * safe because it won't change while rnh_lock + * is held. + */ + RT_ADDREF_LOCKED(rt); + RT_UNLOCK(rt); + no_nd_cache = !nd6_is_addr_neighbor( + (struct sockaddr_in6 *)rt_key(rt), ifp, 1); + RT_LOCK(rt); + RT_REMREF_LOCKED(rt); + } + /* * FreeBSD and BSD/OS often make a cloned host route based * on a less-specific route (e.g. the default route). @@ -1206,8 +1511,10 @@ nd6_rtrequest( * so that ndp(8) will not try to get the neighbor information * of the destination. */ - rt->rt_flags &= ~RTF_LLINFO; - return; + if (no_nd_cache) { + rt->rt_flags &= ~RTF_LLINFO; + return; + } } getmicrotime(&timenow); @@ -1227,23 +1534,18 @@ nd6_rtrequest( * for a host route whose destination should be * treated as on-link. */ - rt_setgate(rt, rt_key(rt), - (struct sockaddr *)&null_sdl); - gate = rt->rt_gateway; - SDL(gate)->sdl_type = ifp->if_type; - SDL(gate)->sdl_index = ifp->if_index; - if (ln) - ln->ln_expire = timenow.tv_sec; -#if 1 - if (ln && ln->ln_expire == 0) { - /* kludge for desktops */ -#if 0 - printf("nd6_rtequest: time.tv_sec is zero; " - "treat it as 1\n"); -#endif - ln->ln_expire = 1; + if (rt_setgate(rt, rt_key(rt), + (struct sockaddr *)&null_sdl) == 0) { + gate = rt->rt_gateway; + SDL(gate)->sdl_type = ifp->if_type; + SDL(gate)->sdl_index = ifp->if_index; + /* + * In case we're called before 1.0 sec. + * has elapsed. + */ + if (ln != NULL) + ln->ln_expire = MAX(timenow.tv_sec, 1); } -#endif if ((rt->rt_flags & RTF_CLONING)) break; } @@ -1297,12 +1599,13 @@ nd6_rtrequest( * Case 2: This route may come from cloning, or a manual route * add with a LL address. */ - R_Malloc(ln, struct llinfo_nd6 *, sizeof(*ln)); - rt->rt_llinfo = (caddr_t)ln; - if (!ln) { + rt->rt_llinfo = ln = nd6_llinfo_alloc(); + if (ln == NULL) { log(LOG_DEBUG, "nd6_rtrequest: malloc failed\n"); break; } + rt->rt_llinfo_free = nd6_llinfo_free; + nd6_inuse++; nd6_allocated++; Bzero(ln, sizeof(*ln)); @@ -1322,13 +1625,44 @@ nd6_rtrequest( * initialized in rtrequest(), so rt_expire is 0. */ ln->ln_state = ND6_LLINFO_NOSTATE; - ln->ln_expire = timenow.tv_sec; + /* In case we're called before 1.0 sec. has elapsed */ + ln->ln_expire = MAX(timenow.tv_sec, 1); } rt->rt_flags |= RTF_LLINFO; - ln->ln_next = llinfo_nd6.ln_next; - llinfo_nd6.ln_next = ln; - ln->ln_prev = &llinfo_nd6; - ln->ln_next->ln_prev = ln; + LN_INSERTHEAD(ln); + + /* + * If we have too many cache entries, initiate immediate + * purging for some "less recently used" entries. Note that + * we cannot directly call nd6_free() here because it would + * cause re-entering rtable related routines triggering an LOR + * problem. + */ + if (ip6_neighborgcthresh >= 0 && + nd6_inuse >= ip6_neighborgcthresh) { + int i; + + for (i = 0; i < 10 && llinfo_nd6.ln_prev != ln; i++) { + struct llinfo_nd6 *ln_end = llinfo_nd6.ln_prev; + struct rtentry *rt_end = ln_end->ln_rt; + + /* Move this entry to the head */ + RT_LOCK(rt_end); + LN_DEQUEUE(ln_end); + LN_INSERTHEAD(ln_end); + + if (ln_end->ln_expire == 0) { + RT_UNLOCK(rt_end); + continue; + } + if (ln_end->ln_state > ND6_LLINFO_INCOMPLETE) + ln_end->ln_state = ND6_LLINFO_STALE; + else + ln_end->ln_state = ND6_LLINFO_PURGE; + ln_end->ln_expire = timenow.tv_sec; + RT_UNLOCK(rt_end); + } + } /* * check if rt_key(rt) is one of my address assigned @@ -1359,6 +1693,7 @@ nd6_rtrequest( rtsetifa(rt, ifa); } } + ifafree(ifa); } else if (rt->rt_flags & RTF_ANNOUNCE) { ln->ln_expire = 0; ln->ln_state = ND6_LLINFO_REACHABLE; @@ -1408,166 +1743,281 @@ nd6_rtrequest( in6_delmulti(in6m, 0); } nd6_inuse--; - ln->ln_next->ln_prev = ln->ln_prev; - ln->ln_prev->ln_next = ln->ln_next; - ln->ln_prev = NULL; - rt->rt_llinfo = 0; + /* + * Unchain it but defer the actual freeing until the route + * itself is to be freed. rt->rt_llinfo still points to + * llinfo_nd6, and likewise, ln->ln_rt stil points to this + * route entry, except that RTF_LLINFO is now cleared. + */ + if (ln->ln_flags & ND6_LNF_IN_USE) + LN_DEQUEUE(ln); rt->rt_flags &= ~RTF_LLINFO; - if (ln->ln_hold) + if (ln->ln_hold != NULL) m_freem(ln->ln_hold); ln->ln_hold = NULL; - R_Free((caddr_t)ln); } } -int -nd6_ioctl( - u_long cmd, - caddr_t data, - struct ifnet *ifp) +static void +nd6_siocgdrlst(void *data, int data_is_64) { - struct in6_drlist *drl = (struct in6_drlist *)data; - struct in6_prlist *prl = (struct in6_prlist *)data; - struct in6_ndireq *ndi = (struct in6_ndireq *)data; - struct in6_nbrinfo *nbi = (struct in6_nbrinfo *)data; - struct in6_ndifreq *ndif = (struct in6_ndifreq *)data; - struct nd_defrouter *dr, any; - struct nd_prefix *pr; - struct rtentry *rt; - int i = 0, error = 0; + struct in6_drlist_64 *drl_64 = (struct in6_drlist_64 *)data; + struct in6_drlist_32 *drl_32 = (struct in6_drlist_32 *)data; + struct nd_defrouter *dr; + int i = 0; - switch (cmd) { - case SIOCGDRLST_IN6: - /* - * obsolete API, use sysctl under net.inet6.icmp6 - */ - lck_mtx_lock(nd6_mutex); - bzero(drl, sizeof(*drl)); - dr = TAILQ_FIRST(&nd_defrouter); + lck_mtx_assert(nd6_mutex, LCK_MTX_ASSERT_OWNED); + + bzero(data, data_is_64 ? sizeof (*drl_64) : sizeof (*drl_32)); + dr = TAILQ_FIRST(&nd_defrouter); + if (data_is_64) { + /* For 64-bit process */ while (dr && i < DRLSTSIZ) { - drl->defrouter[i].rtaddr = dr->rtaddr; - if (IN6_IS_ADDR_LINKLOCAL(&drl->defrouter[i].rtaddr)) { + drl_64->defrouter[i].rtaddr = dr->rtaddr; + if (IN6_IS_ADDR_LINKLOCAL(&drl_64->defrouter[i].rtaddr)) { /* XXX: need to this hack for KAME stack */ - drl->defrouter[i].rtaddr.s6_addr16[1] = 0; - } else + drl_64->defrouter[i].rtaddr.s6_addr16[1] = 0; + } else { log(LOG_ERR, "default router list contains a " "non-linklocal address(%s)\n", - ip6_sprintf(&drl->defrouter[i].rtaddr)); - - drl->defrouter[i].flags = dr->flags; - drl->defrouter[i].rtlifetime = dr->rtlifetime; - drl->defrouter[i].expire = dr->expire; - drl->defrouter[i].if_index = dr->ifp->if_index; + ip6_sprintf(&drl_64->defrouter[i].rtaddr)); + } + drl_64->defrouter[i].flags = dr->flags; + drl_64->defrouter[i].rtlifetime = dr->rtlifetime; + drl_64->defrouter[i].expire = dr->expire; + drl_64->defrouter[i].if_index = dr->ifp->if_index; i++; dr = TAILQ_NEXT(dr, dr_entry); } - lck_mtx_unlock(nd6_mutex); - break; - case SIOCGPRLST_IN6: - /* - * obsolete API, use sysctl under net.inet6.icmp6 - */ - /* - * XXX meaning of fields, especialy "raflags", is very - * differnet between RA prefix list and RR/static prefix list. - * how about separating ioctls into two? - */ - bzero(prl, sizeof(*prl)); - lck_mtx_lock(nd6_mutex); - pr = nd_prefix.lh_first; + return; + } + /* For 32-bit process */ + while (dr && i < DRLSTSIZ) { + drl_32->defrouter[i].rtaddr = dr->rtaddr; + if (IN6_IS_ADDR_LINKLOCAL(&drl_32->defrouter[i].rtaddr)) { + /* XXX: need to this hack for KAME stack */ + drl_32->defrouter[i].rtaddr.s6_addr16[1] = 0; + } else { + log(LOG_ERR, + "default router list contains a " + "non-linklocal address(%s)\n", + ip6_sprintf(&drl_32->defrouter[i].rtaddr)); + } + drl_32->defrouter[i].flags = dr->flags; + drl_32->defrouter[i].rtlifetime = dr->rtlifetime; + drl_32->defrouter[i].expire = dr->expire; + drl_32->defrouter[i].if_index = dr->ifp->if_index; + i++; + dr = TAILQ_NEXT(dr, dr_entry); + } +} + +static void +nd6_siocgprlst(void *data, int data_is_64) +{ + struct in6_prlist_64 *prl_64 = (struct in6_prlist_64 *)data; + struct in6_prlist_32 *prl_32 = (struct in6_prlist_32 *)data; + struct nd_prefix *pr; + struct rr_prefix *rpp; + int i = 0; + + lck_mtx_assert(nd6_mutex, LCK_MTX_ASSERT_OWNED); + /* + * XXX meaning of fields, especialy "raflags", is very + * differnet between RA prefix list and RR/static prefix list. + * how about separating ioctls into two? + */ + bzero(data, data_is_64 ? sizeof (*prl_64) : sizeof (*prl_32)); + pr = nd_prefix.lh_first; + if (data_is_64) { + /* For 64-bit process */ while (pr && i < PRLSTSIZ) { struct nd_pfxrouter *pfr; int j; - (void)in6_embedscope(&prl->prefix[i].prefix, + (void) in6_embedscope(&prl_64->prefix[i].prefix, &pr->ndpr_prefix, NULL, NULL); - prl->prefix[i].raflags = pr->ndpr_raf; - prl->prefix[i].prefixlen = pr->ndpr_plen; - prl->prefix[i].vltime = pr->ndpr_vltime; - prl->prefix[i].pltime = pr->ndpr_pltime; - prl->prefix[i].if_index = pr->ndpr_ifp->if_index; - prl->prefix[i].expire = pr->ndpr_expire; + prl_64->prefix[i].raflags = pr->ndpr_raf; + prl_64->prefix[i].prefixlen = pr->ndpr_plen; + prl_64->prefix[i].vltime = pr->ndpr_vltime; + prl_64->prefix[i].pltime = pr->ndpr_pltime; + prl_64->prefix[i].if_index = pr->ndpr_ifp->if_index; + prl_64->prefix[i].expire = pr->ndpr_expire; pfr = pr->ndpr_advrtrs.lh_first; j = 0; while (pfr) { if (j < DRLSTSIZ) { -#define RTRADDR prl->prefix[i].advrtr[j] +#define RTRADDR prl_64->prefix[i].advrtr[j] RTRADDR = pfr->router->rtaddr; if (IN6_IS_ADDR_LINKLOCAL(&RTRADDR)) { /* XXX: hack for KAME */ RTRADDR.s6_addr16[1] = 0; - } else + } else { log(LOG_ERR, "a router(%s) advertises " "a prefix with " "non-link local address\n", ip6_sprintf(&RTRADDR)); + } #undef RTRADDR } j++; pfr = pfr->pfr_next; } - prl->prefix[i].advrtrs = j; - prl->prefix[i].origin = PR_ORIG_RA; + prl_64->prefix[i].advrtrs = j; + prl_64->prefix[i].origin = PR_ORIG_RA; i++; pr = pr->ndpr_next; } - { - struct rr_prefix *rpp; for (rpp = LIST_FIRST(&rr_prefix); rpp; rpp = LIST_NEXT(rpp, rp_entry)) { if (i >= PRLSTSIZ) break; - (void)in6_embedscope(&prl->prefix[i].prefix, + (void) in6_embedscope(&prl_64->prefix[i].prefix, &pr->ndpr_prefix, NULL, NULL); - prl->prefix[i].raflags = rpp->rp_raf; - prl->prefix[i].prefixlen = rpp->rp_plen; - prl->prefix[i].vltime = rpp->rp_vltime; - prl->prefix[i].pltime = rpp->rp_pltime; - prl->prefix[i].if_index = rpp->rp_ifp->if_index; - prl->prefix[i].expire = rpp->rp_expire; - prl->prefix[i].advrtrs = 0; - prl->prefix[i].origin = rpp->rp_origin; + prl_64->prefix[i].raflags = rpp->rp_raf; + prl_64->prefix[i].prefixlen = rpp->rp_plen; + prl_64->prefix[i].vltime = rpp->rp_vltime; + prl_64->prefix[i].pltime = rpp->rp_pltime; + prl_64->prefix[i].if_index = rpp->rp_ifp->if_index; + prl_64->prefix[i].expire = rpp->rp_expire; + prl_64->prefix[i].advrtrs = 0; + prl_64->prefix[i].origin = rpp->rp_origin; i++; } - } + return; + } + /* For 32-bit process */ + while (pr && i < PRLSTSIZ) { + struct nd_pfxrouter *pfr; + int j; + + (void) in6_embedscope(&prl_32->prefix[i].prefix, + &pr->ndpr_prefix, NULL, NULL); + prl_32->prefix[i].raflags = pr->ndpr_raf; + prl_32->prefix[i].prefixlen = pr->ndpr_plen; + prl_32->prefix[i].vltime = pr->ndpr_vltime; + prl_32->prefix[i].pltime = pr->ndpr_pltime; + prl_32->prefix[i].if_index = pr->ndpr_ifp->if_index; + prl_32->prefix[i].expire = pr->ndpr_expire; + + pfr = pr->ndpr_advrtrs.lh_first; + j = 0; + while (pfr) { + if (j < DRLSTSIZ) { +#define RTRADDR prl_32->prefix[i].advrtr[j] + RTRADDR = pfr->router->rtaddr; + if (IN6_IS_ADDR_LINKLOCAL(&RTRADDR)) { + /* XXX: hack for KAME */ + RTRADDR.s6_addr16[1] = 0; + } else { + log(LOG_ERR, + "a router(%s) advertises " + "a prefix with " + "non-link local address\n", + ip6_sprintf(&RTRADDR)); + } +#undef RTRADDR + } + j++; + pfr = pfr->pfr_next; + } + prl_32->prefix[i].advrtrs = j; + prl_32->prefix[i].origin = PR_ORIG_RA; + + i++; + pr = pr->ndpr_next; + } + + for (rpp = LIST_FIRST(&rr_prefix); rpp; + rpp = LIST_NEXT(rpp, rp_entry)) { + if (i >= PRLSTSIZ) + break; + (void) in6_embedscope(&prl_32->prefix[i].prefix, + &pr->ndpr_prefix, NULL, NULL); + prl_32->prefix[i].raflags = rpp->rp_raf; + prl_32->prefix[i].prefixlen = rpp->rp_plen; + prl_32->prefix[i].vltime = rpp->rp_vltime; + prl_32->prefix[i].pltime = rpp->rp_pltime; + prl_32->prefix[i].if_index = rpp->rp_ifp->if_index; + prl_32->prefix[i].expire = rpp->rp_expire; + prl_32->prefix[i].advrtrs = 0; + prl_32->prefix[i].origin = rpp->rp_origin; + i++; + } +} + +int +nd6_ioctl(u_long cmd, caddr_t data, struct ifnet *ifp) +{ + struct in6_ndireq *ndi = (struct in6_ndireq *)data; + struct in6_ondireq *ondi = (struct in6_ondireq *)data; + struct nd_defrouter *dr, any; + struct nd_prefix *pr; + struct rtentry *rt; + int i = ifp->if_index, error = 0; + + switch (cmd) { + case SIOCGDRLST_IN6_32: + case SIOCGDRLST_IN6_64: + /* + * obsolete API, use sysctl under net.inet6.icmp6 + */ + lck_mtx_lock(nd6_mutex); + nd6_siocgdrlst(data, cmd == SIOCGDRLST_IN6_64); lck_mtx_unlock(nd6_mutex); break; - case OSIOCGIFINFO_IN6: - if (!nd_ifinfo || i >= nd_ifinfo_indexlim) { - error = EINVAL; - break; - } - ndi->ndi.linkmtu = IN6_LINKMTU(ifp); - ndi->ndi.maxmtu = nd_ifinfo[ifp->if_index].maxmtu; - ndi->ndi.basereachable = - nd_ifinfo[ifp->if_index].basereachable; - ndi->ndi.reachable = nd_ifinfo[ifp->if_index].reachable; - ndi->ndi.retrans = nd_ifinfo[ifp->if_index].retrans; - ndi->ndi.flags = nd_ifinfo[ifp->if_index].flags; - ndi->ndi.recalctm = nd_ifinfo[ifp->if_index].recalctm; - ndi->ndi.chlim = nd_ifinfo[ifp->if_index].chlim; - ndi->ndi.receivedra = nd_ifinfo[ifp->if_index].receivedra; + + case SIOCGPRLST_IN6_32: + case SIOCGPRLST_IN6_64: + /* + * obsolete API, use sysctl under net.inet6.icmp6 + */ + lck_mtx_lock(nd6_mutex); + nd6_siocgprlst(data, cmd == SIOCGPRLST_IN6_64); + lck_mtx_unlock(nd6_mutex); break; + + case OSIOCGIFINFO_IN6: case SIOCGIFINFO_IN6: + /* + * SIOCGIFINFO_IN6 ioctl is encoded with in6_ondireq + * instead of in6_ndireq, so we treat it as such. + */ + lck_rw_lock_shared(nd_if_rwlock); if (!nd_ifinfo || i >= nd_ifinfo_indexlim) { + lck_rw_done(nd_if_rwlock); error = EINVAL; break; } - ndi->ndi = nd_ifinfo[ifp->if_index]; + ondi->ndi.linkmtu = IN6_LINKMTU(ifp); + ondi->ndi.maxmtu = nd_ifinfo[i].maxmtu; + ondi->ndi.basereachable = nd_ifinfo[i].basereachable; + ondi->ndi.reachable = nd_ifinfo[i].reachable; + ondi->ndi.retrans = nd_ifinfo[i].retrans; + ondi->ndi.flags = nd_ifinfo[i].flags; + ondi->ndi.recalctm = nd_ifinfo[i].recalctm; + ondi->ndi.chlim = nd_ifinfo[i].chlim; + ondi->ndi.receivedra = nd_ifinfo[i].receivedra; + lck_rw_done(nd_if_rwlock); break; + case SIOCSIFINFO_FLAGS: /* XXX: almost all other fields of ndi->ndi is unused */ + lck_rw_lock_shared(nd_if_rwlock); if (!nd_ifinfo || i >= nd_ifinfo_indexlim) { + lck_rw_done(nd_if_rwlock); error = EINVAL; break; } - nd_ifinfo[ifp->if_index].flags = ndi->ndi.flags; + nd_ifinfo[i].flags = ndi->ndi.flags; + lck_rw_done(nd_if_rwlock); break; + case SIOCSNDFLUSH_IN6: /* XXX: the ioctl name is confusing... */ /* flush default router list */ /* @@ -1581,8 +2031,8 @@ nd6_ioctl( lck_mtx_unlock(nd6_mutex); /* xxx sumikawa: flush prefix list */ break; - case SIOCSPFXFLUSH_IN6: - { + + case SIOCSPFXFLUSH_IN6: { /* flush all the prefix advertised by routers */ struct nd_prefix *next; lck_mtx_lock(nd6_mutex); @@ -1610,9 +2060,9 @@ nd6_ioctl( } lck_mtx_unlock(nd6_mutex); break; - } - case SIOCSRTRFLUSH_IN6: - { + } + + case SIOCSRTRFLUSH_IN6: { /* flush all the default routers */ struct nd_defrouter *next; @@ -1630,51 +2080,106 @@ nd6_ioctl( } lck_mtx_unlock(nd6_mutex); break; - } - case SIOCGNBRINFO_IN6: - { + } + + case SIOCGNBRINFO_IN6_32: { struct llinfo_nd6 *ln; - struct in6_addr nb_addr = nbi->addr; /* make local for safety */ + struct in6_nbrinfo_32 *nbi_32 = (struct in6_nbrinfo_32 *)data; + /* make local for safety */ + struct in6_addr nb_addr = nbi_32->addr; /* * XXX: KAME specific hack for scoped addresses * XXXX: for other scopes than link-local? */ - if (IN6_IS_ADDR_LINKLOCAL(&nbi->addr) || - IN6_IS_ADDR_MC_LINKLOCAL(&nbi->addr)) { + if (IN6_IS_ADDR_LINKLOCAL(&nbi_32->addr) || + IN6_IS_ADDR_MC_LINKLOCAL(&nbi_32->addr)) { u_int16_t *idp = (u_int16_t *)&nb_addr.s6_addr[2]; if (*idp == 0) *idp = htons(ifp->if_index); } + /* Callee returns a locked route upon success */ if ((rt = nd6_lookup(&nb_addr, 0, ifp, 0)) == NULL) { error = EINVAL; break; } - ln = (struct llinfo_nd6 *)rt->rt_llinfo; - nbi->state = ln->ln_state; - nbi->asked = ln->ln_asked; - nbi->isrouter = ln->ln_router; - nbi->expire = ln->ln_expire; - + RT_LOCK_ASSERT_HELD(rt); + ln = rt->rt_llinfo; + nbi_32->state = ln->ln_state; + nbi_32->asked = ln->ln_asked; + nbi_32->isrouter = ln->ln_router; + nbi_32->expire = ln->ln_expire; + RT_REMREF_LOCKED(rt); + RT_UNLOCK(rt); break; - } - case SIOCGDEFIFACE_IN6: /* XXX: should be implemented as a sysctl? */ - ndif->ifindex = nd6_defifindex; + } + + case SIOCGNBRINFO_IN6_64: { + struct llinfo_nd6 *ln; + struct in6_nbrinfo_64 *nbi_64 = (struct in6_nbrinfo_64 *)data; + /* make local for safety */ + struct in6_addr nb_addr = nbi_64->addr; + + /* + * XXX: KAME specific hack for scoped addresses + * XXXX: for other scopes than link-local? + */ + if (IN6_IS_ADDR_LINKLOCAL(&nbi_64->addr) || + IN6_IS_ADDR_MC_LINKLOCAL(&nbi_64->addr)) { + u_int16_t *idp = (u_int16_t *)&nb_addr.s6_addr[2]; + + if (*idp == 0) + *idp = htons(ifp->if_index); + } + + /* Callee returns a locked route upon success */ + if ((rt = nd6_lookup(&nb_addr, 0, ifp, 0)) == NULL) { + error = EINVAL; + break; + } + RT_LOCK_ASSERT_HELD(rt); + ln = rt->rt_llinfo; + nbi_64->state = ln->ln_state; + nbi_64->asked = ln->ln_asked; + nbi_64->isrouter = ln->ln_router; + nbi_64->expire = ln->ln_expire; + RT_REMREF_LOCKED(rt); + RT_UNLOCK(rt); break; - case SIOCSDEFIFACE_IN6: /* XXX: should be implemented as a sysctl? */ - return(nd6_setdefaultiface(ndif->ifindex)); + } + + case SIOCGDEFIFACE_IN6_32: /* XXX: should be implemented as a sysctl? */ + case SIOCGDEFIFACE_IN6_64: { + struct in6_ndifreq_64 *ndif_64 = (struct in6_ndifreq_64 *)data; + struct in6_ndifreq_32 *ndif_32 = (struct in6_ndifreq_32 *)data; + + if (cmd == SIOCGDEFIFACE_IN6_64) + ndif_64->ifindex = nd6_defifindex; + else + ndif_32->ifindex = nd6_defifindex; break; } - return(error); + + case SIOCSDEFIFACE_IN6_32: /* XXX: should be implemented as a sysctl? */ + case SIOCSDEFIFACE_IN6_64: { + struct in6_ndifreq_64 *ndif_64 = (struct in6_ndifreq_64 *)data; + struct in6_ndifreq_32 *ndif_32 = (struct in6_ndifreq_32 *)data; + + return (nd6_setdefaultiface(cmd == SIOCSDEFIFACE_IN6_64 ? + ndif_64->ifindex : ndif_32->ifindex)); + /* NOTREACHED */ + } + } + return (error); } /* * Create neighbor cache entry and cache link-layer address, * on reception of inbound ND6 packets. (RS/RA/NS/redirect) */ -struct rtentry * +void nd6_cache_lladdr( struct ifnet *ifp, struct in6_addr *from, @@ -1700,7 +2205,7 @@ nd6_cache_lladdr( /* nothing must be updated for unspecified address */ if (IN6_IS_ADDR_UNSPECIFIED(from)) - return NULL; + return; /* * Validation about ifp->if_addrlen and lladdrlen must be done in @@ -1713,36 +2218,37 @@ nd6_cache_lladdr( */ getmicrotime(&timenow); - lck_mtx_lock(rt_mtx); - rt = nd6_lookup(from, 0, ifp, 1); - if (!rt) { + rt = nd6_lookup(from, 0, ifp, 0); + if (rt == NULL) { #if 0 /* nothing must be done if there's no lladdr */ if (!lladdr || !lladdrlen) - return NULL; + return; #endif - rt = nd6_lookup(from, 1, ifp, 1); + if ((rt = nd6_lookup(from, 1, ifp, 0)) == NULL) + return; + RT_LOCK_ASSERT_HELD(rt); is_newentry = 1; } else { + RT_LOCK_ASSERT_HELD(rt); /* do nothing if static ndp is set */ if (rt->rt_flags & RTF_STATIC) { - lck_mtx_unlock(rt_mtx); - return NULL; + RT_REMREF_LOCKED(rt); + RT_UNLOCK(rt); + return; } is_newentry = 0; } - lck_mtx_unlock(rt_mtx); - - if (!rt) - return NULL; if ((rt->rt_flags & (RTF_GATEWAY | RTF_LLINFO)) != RTF_LLINFO) { fail: - (void)nd6_free(rt); - return NULL; + RT_UNLOCK(rt); + nd6_free(rt); + rtfree(rt); + return; } - ln = (struct llinfo_nd6 *)rt->rt_llinfo; + ln = rt->rt_llinfo; if (!ln) goto fail; if (!rt->rt_gateway) @@ -1802,6 +2308,7 @@ nd6_cache_lladdr( ln->ln_state = newstate; if (ln->ln_state == ND6_LLINFO_STALE) { + struct mbuf *m = ln->ln_hold; /* * XXX: since nd6_output() below will cause * state tansition to DELAY and reset the timer, @@ -1809,16 +2316,17 @@ nd6_cache_lladdr( * meaningless. */ ln->ln_expire = timenow.tv_sec + nd6_gctimer; + ln->ln_hold = NULL; - if (ln->ln_hold) { + if (m != NULL) { /* * we assume ifp is not a p2p here, so just * set the 2nd argument as the 1st one. */ - nd6_output(ifp, ifp, ln->ln_hold, - (struct sockaddr_in6 *)rt_key(rt), - rt, 0); - ln->ln_hold = NULL; + RT_UNLOCK(rt); + nd6_output(ifp, ifp, m, + (struct sockaddr_in6 *)rt_key(rt), rt, 0); + RT_LOCK(rt); } } else if (ln->ln_state == ND6_LLINFO_INCOMPLETE) { /* probe right away */ @@ -1906,13 +2414,17 @@ nd6_cache_lladdr( * for those are not autoconfigured hosts, we explicitly avoid such * cases for safety. */ - if (do_update && ln->ln_router && !ip6_forwarding && (ip6_accept_rtadv || (ifp->if_eflags & IFEF_ACCEPT_RTADVD))) { + if (do_update && ln->ln_router && !ip6_forwarding && + (ip6_accept_rtadv || (ifp->if_eflags & IFEF_ACCEPT_RTADVD))) { + RT_REMREF_LOCKED(rt); + RT_UNLOCK(rt); lck_mtx_lock(nd6_mutex); defrouter_select(); lck_mtx_unlock(nd6_mutex); + } else { + RT_REMREF_LOCKED(rt); + RT_UNLOCK(rt); } - - return rt; } static void @@ -1922,10 +2434,10 @@ nd6_slowtimo( int i; struct nd_ifinfo *nd6if; - lck_mtx_lock(nd6_mutex); + lck_rw_lock_shared(nd_if_rwlock); for (i = 1; i < if_index + 1; i++) { if (!nd_ifinfo || i >= nd_ifinfo_indexlim) - continue; + break; nd6if = &nd_ifinfo[i]; if (nd6if->basereachable && /* already initialized */ (nd6if->recalctm -= ND6_SLOWTIMER_INTERVAL) <= 0) { @@ -1939,58 +2451,85 @@ nd6_slowtimo( nd6if->reachable = ND_COMPUTE_RTIME(nd6if->basereachable); } } - lck_mtx_unlock(nd6_mutex); + lck_rw_done(nd_if_rwlock); timeout(nd6_slowtimo, (caddr_t)0, ND6_SLOWTIMER_INTERVAL * hz); } - #define senderr(e) { error = (e); goto bad;} int -nd6_output( - struct ifnet *ifp, - struct ifnet *origifp, - struct mbuf *m0, - struct sockaddr_in6 *dst, - struct rtentry *rt0, - int locked) +nd6_output(struct ifnet *ifp, struct ifnet *origifp, struct mbuf *m0, + struct sockaddr_in6 *dst, struct rtentry *hint0, int locked) { struct mbuf *m = m0; - struct rtentry *rt = rt0; - struct sockaddr_in6 *gw6 = NULL; + struct rtentry *rt = hint0, *hint = hint0; struct llinfo_nd6 *ln = NULL; int error = 0; struct timeval timenow; + struct rtentry *rtrele = NULL; - if (IN6_IS_ADDR_MULTICAST(&dst->sin6_addr)) - goto sendpkt; + if (rt != NULL) { + RT_LOCK_SPIN(rt); + RT_ADDREF_LOCKED(rt); + } - if (nd6_need_cache(ifp) == 0) + if (IN6_IS_ADDR_MULTICAST(&dst->sin6_addr) || !nd6_need_cache(ifp)) { + if (rt != NULL) + RT_UNLOCK(rt); goto sendpkt; + } /* - * next hop determination. This routine is derived from ether_outpout. + * Next hop determination. Because we may involve the gateway route + * in addition to the original route, locking is rather complicated. + * The general concept is that regardless of whether the route points + * to the original route or to the gateway route, this routine takes + * an extra reference on such a route. This extra reference will be + * released at the end. + * + * Care must be taken to ensure that the "hint0" route never gets freed + * via rtfree(), since the caller may have stored it inside a struct + * route with a reference held for that placeholder. + * + * This logic is similar to, though not exactly the same as the one + * used by arp_route_to_gateway_route(). */ - lck_mtx_lock(rt_mtx); - if (rt) { - if ((rt->rt_flags & RTF_UP) == 0) { - if ((rt0 = rt = rtalloc1_locked((struct sockaddr *)dst, 1, 0UL)) != - NULL) - { - rtunref(rt); + if (rt != NULL) { + /* + * We have a reference to "rt" by now (or below via rtalloc1), + * which will either be released or freed at the end of this + * routine. + */ + RT_LOCK_ASSERT_HELD(rt); + if (!(rt->rt_flags & RTF_UP)) { + RT_REMREF_LOCKED(rt); + RT_UNLOCK(rt); + if ((hint = rt = rtalloc1((struct sockaddr *)dst, + 1, 0)) != NULL) { + RT_LOCK_SPIN(rt); if (rt->rt_ifp != ifp) { /* XXX: loop care? */ - lck_mtx_unlock(rt_mtx); - return nd6_output(ifp, origifp, m0, - dst, rt, locked); + RT_UNLOCK(rt); + error = nd6_output(ifp, origifp, m0, + dst, rt, locked); + rtfree(rt); + return (error); } } else { - lck_mtx_unlock(rt_mtx); senderr(EHOSTUNREACH); } } if (rt->rt_flags & RTF_GATEWAY) { - gw6 = (struct sockaddr_in6 *)rt->rt_gateway; + struct rtentry *gwrt; + struct in6_ifaddr *ia6 = NULL; + struct sockaddr_in6 gw6; + + gw6 = *((struct sockaddr_in6 *)rt->rt_gateway); + /* + * Must drop rt_lock since nd6_is_addr_neighbor() + * calls nd6_lookup() and acquires rnh_lock. + */ + RT_UNLOCK(rt); /* * We skip link-layer address resolution and NUD @@ -2000,33 +2539,119 @@ nd6_output( * if the gateway is our own address, which is * sometimes used to install a route to a p2p link. */ - if (!nd6_is_addr_neighbor(gw6, ifp, 1) || - in6ifa_ifpwithaddr(ifp, &gw6->sin6_addr)) { + if (!nd6_is_addr_neighbor(&gw6, ifp, 0) || + (ia6 = in6ifa_ifpwithaddr(ifp, &gw6.sin6_addr))) { /* * We allow this kind of tricky route only * when the outgoing interface is p2p. * XXX: we may need a more generic rule here. */ - lck_mtx_unlock(rt_mtx); + if (ia6 != NULL) + ifafree(&ia6->ia_ifa); if ((ifp->if_flags & IFF_POINTOPOINT) == 0) senderr(EHOSTUNREACH); - goto sendpkt; } - if (rt->rt_gwroute == 0) + RT_LOCK_SPIN(rt); + gw6 = *((struct sockaddr_in6 *)rt->rt_gateway); + + /* If hint is now down, give up */ + if (!(rt->rt_flags & RTF_UP)) { + RT_UNLOCK(rt); + senderr(EHOSTUNREACH); + } + + /* If there's no gateway route, look it up */ + if ((gwrt = rt->rt_gwroute) == NULL) { + RT_UNLOCK(rt); goto lookup; - if (((rt = rt->rt_gwroute)->rt_flags & RTF_UP) == 0) { - rtfree_locked(rt); rt = rt0; - lookup: rt->rt_gwroute = rtalloc1_locked(rt->rt_gateway, 1, 0UL); - if ((rt = rt->rt_gwroute) == 0) { - lck_mtx_unlock(rt_mtx); + } + /* Become a regular mutex */ + RT_CONVERT_LOCK(rt); + + /* + * Take gwrt's lock while holding route's lock; + * this is okay since gwrt never points back + * to rt, so no lock ordering issues. + */ + RT_LOCK_SPIN(gwrt); + if (!(gwrt->rt_flags & RTF_UP)) { + struct rtentry *ogwrt; + + rt->rt_gwroute = NULL; + RT_UNLOCK(gwrt); + RT_UNLOCK(rt); + rtfree(gwrt); +lookup: + gwrt = rtalloc1((struct sockaddr *)&gw6, 1, 0); + + RT_LOCK(rt); + /* + * Bail out if the route is down, no route + * to gateway, circular route, or if the + * gateway portion of "rt" has changed. + */ + if (!(rt->rt_flags & RTF_UP) || + gwrt == NULL || gwrt == rt || + !equal(SA(&gw6), rt->rt_gateway)) { + if (gwrt == rt) { + RT_REMREF_LOCKED(gwrt); + gwrt = NULL; + } + RT_UNLOCK(rt); + if (gwrt != NULL) + rtfree(gwrt); + senderr(EHOSTUNREACH); + } + + /* Remove any existing gwrt */ + ogwrt = rt->rt_gwroute; + if ((rt->rt_gwroute = gwrt) != NULL) + RT_ADDREF(gwrt); + + RT_UNLOCK(rt); + /* Now free the replaced gwrt */ + if (ogwrt != NULL) + rtfree(ogwrt); + /* If still no route to gateway, bail out */ + if (gwrt == NULL) + senderr(EHOSTUNREACH); + /* Remember to release/free "rt" at the end */ + rtrele = rt; + rt = gwrt; + RT_LOCK_SPIN(rt); + /* If gwrt is now down, give up */ + if (!(rt->rt_flags & RTF_UP)) { + RT_UNLOCK(rt); + rtfree(rt); + rt = NULL; + /* "rtrele" == original "rt" */ + senderr(EHOSTUNREACH); + } + } else { + RT_ADDREF_LOCKED(gwrt); + RT_UNLOCK(gwrt); + RT_UNLOCK(rt); + RT_LOCK_SPIN(gwrt); + /* If gwrt is now down, give up */ + if (!(gwrt->rt_flags & RTF_UP)) { + RT_UNLOCK(gwrt); + rtfree(gwrt); senderr(EHOSTUNREACH); } + /* Remember to release/free "rt" at the end */ + rtrele = rt; + rt = gwrt; } } + /* Become a regular mutex */ + RT_CONVERT_LOCK(rt); } + if (rt != NULL) + RT_LOCK_ASSERT_HELD(rt); + /* * Address resolution or Neighbor Unreachability Detection * for the next hop. @@ -2035,28 +2660,52 @@ nd6_output( */ /* Look up the neighbor cache for the nexthop */ - if (rt && (rt->rt_flags & RTF_LLINFO) != 0) - ln = (struct llinfo_nd6 *)rt->rt_llinfo; - else { + if (rt && (rt->rt_flags & RTF_LLINFO) != 0) { + ln = rt->rt_llinfo; + } else { /* * Since nd6_is_addr_neighbor() internally calls nd6_lookup(), * the condition below is not very efficient. But we believe * it is tolerable, because this should be a rare case. + * Must drop rt_lock since nd6_is_addr_neighbor() calls + * nd6_lookup() and acquires rnh_lock. */ - if (nd6_is_addr_neighbor(dst, ifp, 1) && - (rt = nd6_lookup(&dst->sin6_addr, 1, ifp, 1)) != NULL) - ln = (struct llinfo_nd6 *)rt->rt_llinfo; + if (rt != NULL) + RT_UNLOCK(rt); + if (nd6_is_addr_neighbor(dst, ifp, 0)) { + /* "rtrele" may have been used, so clean up "rt" now */ + if (rt != NULL) { + /* Don't free "hint0" */ + if (rt == hint0) + RT_REMREF(rt); + else + rtfree(rt); + } + /* Callee returns a locked route upon success */ + rt = nd6_lookup(&dst->sin6_addr, 1, ifp, 0); + if (rt != NULL) { + RT_LOCK_ASSERT_HELD(rt); + ln = rt->rt_llinfo; + } + } else if (rt != NULL) { + RT_LOCK(rt); + } } - lck_mtx_unlock(rt_mtx); + if (!ln || !rt) { + if (rt != NULL) + RT_UNLOCK(rt); + lck_rw_lock_shared(nd_if_rwlock); if ((ifp->if_flags & IFF_POINTOPOINT) == 0 && !(nd_ifinfo[ifp->if_index].flags & ND6_IFF_PERFORMNUD)) { + lck_rw_done(nd_if_rwlock); log(LOG_DEBUG, "nd6_output: can't allocate llinfo for %s " "(ln=%p, rt=%p)\n", ip6_sprintf(&dst->sin6_addr), ln, rt); senderr(EIO); /* XXX: good error? */ } + lck_rw_done(nd_if_rwlock); goto sendpkt; /* send anyway */ } @@ -2088,8 +2737,23 @@ nd6_output( * (i.e. its link-layer address is already resolved), just * send the packet. */ - if (ln->ln_state > ND6_LLINFO_INCOMPLETE) + if (ln->ln_state > ND6_LLINFO_INCOMPLETE) { + RT_UNLOCK(rt); + /* + * Move this entry to the head of the queue so that it is + * less likely for this entry to be a target of forced + * garbage collection (see nd6_rtrequest()). + */ + lck_mtx_lock(rnh_lock); + RT_LOCK_SPIN(rt); + if (ln->ln_flags & ND6_LNF_IN_USE) { + LN_DEQUEUE(ln); + LN_INSERTHEAD(ln); + } + RT_UNLOCK(rt); + lck_mtx_unlock(rnh_lock); goto sendpkt; + } /* * There is a neighbor cache entry, but no ethernet address @@ -2105,45 +2769,75 @@ nd6_output( if (ln->ln_hold) m_freem(ln->ln_hold); ln->ln_hold = m; - if (ln->ln_expire) { - if (ln->ln_asked < nd6_mmaxtries && - ln->ln_expire < timenow.tv_sec) { - ln->ln_asked++; - ln->ln_expire = timenow.tv_sec + - nd_ifinfo[ifp->if_index].retrans / 1000; - nd6_ns_output(ifp, NULL, &dst->sin6_addr, ln, 0, locked); - } + if (ln->ln_expire && ln->ln_asked < nd6_mmaxtries && + ln->ln_expire < timenow.tv_sec) { + ln->ln_asked++; + lck_rw_lock_shared(nd_if_rwlock); + ln->ln_expire = timenow.tv_sec + + nd_ifinfo[ifp->if_index].retrans / 1000; + lck_rw_done(nd_if_rwlock); + RT_UNLOCK(rt); + /* We still have a reference on rt (for ln) */ + nd6_ns_output(ifp, NULL, &dst->sin6_addr, ln, 0, locked); + } else { + RT_UNLOCK(rt); } - return(0); - - sendpkt: -#ifdef __APPLE__ + /* + * Move this entry to the head of the queue so that it is + * less likely for this entry to be a target of forced + * garbage collection (see nd6_rtrequest()). + */ + lck_mtx_lock(rnh_lock); + RT_LOCK_SPIN(rt); + if (ln->ln_flags & ND6_LNF_IN_USE) { + LN_DEQUEUE(ln); + LN_INSERTHEAD(ln); + } + /* Clean up "rt" now while we can */ + if (rt == hint0) { + RT_REMREF_LOCKED(rt); + RT_UNLOCK(rt); + } else { + RT_UNLOCK(rt); + rtfree_locked(rt); + } + rt = NULL; /* "rt" has been taken care of */ + lck_mtx_unlock(rnh_lock); - /* Make sure the HW checksum flags are cleaned before sending the packet */ + error = 0; + goto release; + +sendpkt: + if (rt != NULL) + RT_LOCK_ASSERT_NOTHELD(rt); + /* Clean up HW checksum flags before sending the packet */ m->m_pkthdr.csum_data = 0; m->m_pkthdr.csum_flags = 0; if ((ifp->if_flags & IFF_LOOPBACK) != 0) { - m->m_pkthdr.rcvif = origifp; /* forwarding rules require the original scope_id */ + /* forwarding rules require the original scope_id */ + m->m_pkthdr.rcvif = origifp; if (locked) lck_mtx_unlock(ip6_mutex); - error = dlil_output(origifp, PF_INET6, m, (caddr_t)rt, (struct sockaddr *)dst, 0); + error = dlil_output(origifp, PF_INET6, m, (caddr_t)rt, + (struct sockaddr *)dst, 0); if (locked) lck_mtx_lock(ip6_mutex); - return error; + goto release; } else { /* Do not allow loopback address to wind up on a wire */ struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *); - + if ((IN6_IS_ADDR_LOOPBACK(&ip6->ip6_src) || IN6_IS_ADDR_LOOPBACK(&ip6->ip6_dst))) { ip6stat.ip6s_badscope++; - /* - * Do not simply drop the packet just like a firewall -- we want the - * the application to feel the pain. - * Return ENETUNREACH like ip6_output does in some similar cases. - * This can startle the otherwise clueless process that specifies + /* + * Do not simply drop the packet just like a + * firewall -- we want the the application to feel + * the pain. Return ENETUNREACH like ip6_output + * does in some similar cases. This can startle + * the otherwise clueless process that specifies * loopback as the source address. */ error = ENETUNREACH; @@ -2151,26 +2845,44 @@ nd6_output( } } - m->m_pkthdr.rcvif = 0; + m->m_pkthdr.rcvif = NULL; if (locked) lck_mtx_unlock(ip6_mutex); - error = dlil_output(ifp, PF_INET6, m, (caddr_t)rt, (struct sockaddr *)dst, 0); + error = dlil_output(ifp, PF_INET6, m, (caddr_t)rt, + (struct sockaddr *)dst, 0); if (locked) lck_mtx_lock(ip6_mutex); - return(error); -#else - if ((ifp->if_flags & IFF_LOOPBACK) != 0) { - return((*ifp->if_output)(origifp, m, (struct sockaddr *)dst, - rt)); - } - return((*ifp->if_output)(ifp, m, (struct sockaddr *)dst, rt)); -#endif - - bad: - if (m) + goto release; + +bad: + if (m != NULL) m_freem(m); + +release: + /* Clean up "rt" unless it's already been done */ + if (rt != NULL) { + RT_LOCK_SPIN(rt); + if (rt == hint0) { + RT_REMREF_LOCKED(rt); + RT_UNLOCK(rt); + } else { + RT_UNLOCK(rt); + rtfree(rt); + } + } + /* And now clean up "rtrele" if there is any */ + if (rtrele != NULL) { + RT_LOCK_SPIN(rtrele); + if (rtrele == hint0) { + RT_REMREF_LOCKED(rtrele); + RT_UNLOCK(rtrele); + } else { + RT_UNLOCK(rtrele); + rtfree(rtrele); + } + } return (error); -} +} #undef senderr int @@ -2240,206 +2952,330 @@ nd6_storelladdr( /* this could happen, if we could not allocate memory */ return(0); /* caller will free mbuf */ } + RT_LOCK(rt); if (rt->rt_gateway->sa_family != AF_LINK) { printf("nd6_storelladdr: something odd happens\n"); + RT_UNLOCK(rt); return(0); /* caller will free mbuf */ } sdl = SDL(rt->rt_gateway); if (sdl->sdl_alen == 0) { /* this should be impossible, but we bark here for debugging */ printf("nd6_storelladdr: sdl_alen == 0\n"); + RT_UNLOCK(rt); return(0); /* caller will free mbuf */ } bcopy(LLADDR(sdl), desten, sdl->sdl_alen); + RT_UNLOCK(rt); return(1); } -extern errno_t arp_route_to_gateway_route(const struct sockaddr *net_dest, - route_t hint, route_t *out_route); - +/* + * This is the ND pre-output routine; care must be taken to ensure that + * the "hint" route never gets freed via rtfree(), since the caller may + * have stored it inside a struct route with a reference held for that + * placeholder. + */ errno_t -nd6_lookup_ipv6( - ifnet_t ifp, - const struct sockaddr_in6 *ip6_dest, - struct sockaddr_dl *ll_dest, - size_t ll_dest_len, - route_t hint, - mbuf_t packet) +nd6_lookup_ipv6(ifnet_t ifp, const struct sockaddr_in6 *ip6_dest, + struct sockaddr_dl *ll_dest, size_t ll_dest_len, route_t hint, + mbuf_t packet) { route_t route = hint; errno_t result = 0; struct sockaddr_dl *sdl = NULL; size_t copy_len; - + if (ip6_dest->sin6_family != AF_INET6) - return EAFNOSUPPORT; - + return (EAFNOSUPPORT); + if ((ifp->if_flags & (IFF_UP|IFF_RUNNING)) != (IFF_UP|IFF_RUNNING)) - return ENETDOWN; - - if (hint) { - result = arp_route_to_gateway_route((const struct sockaddr*)ip6_dest, hint, &route); + return (ENETDOWN); + + if (hint != NULL) { + /* + * Callee holds a reference on the route and returns + * with the route entry locked, upon success. + */ + result = arp_route_to_gateway_route( + (const struct sockaddr*)ip6_dest, hint, &route); if (result != 0) - return result; + return (result); + if (route != NULL) + RT_LOCK_ASSERT_HELD(route); } - + if ((packet->m_flags & M_MCAST) != 0) { - return dlil_resolve_multi(ifp, (const struct sockaddr*)ip6_dest, - (struct sockaddr *)ll_dest, ll_dest_len); + if (route != NULL) + RT_UNLOCK(route); + result = dlil_resolve_multi(ifp, + (const struct sockaddr*)ip6_dest, + (struct sockaddr *)ll_dest, ll_dest_len); + if (route != NULL) + RT_LOCK(route); + goto release; } - + if (route == NULL) { - /* this could happen, if we could not allocate memory */ - return ENOBUFS; + /* + * This could happen, if we could not allocate memory or + * if arp_route_to_gateway_route() didn't return a route. + */ + result = ENOBUFS; + goto release; } - - lck_mtx_lock(rt_mtx); - + if (route->rt_gateway->sa_family != AF_LINK) { printf("nd6_lookup_ipv6: gateway address not AF_LINK\n"); result = EADDRNOTAVAIL; - goto done; + goto release; } - + sdl = SDL(route->rt_gateway); if (sdl->sdl_alen == 0) { /* this should be impossible, but we bark here for debugging */ - printf("nd6_storelladdr: sdl_alen == 0\n"); + printf("nd6_lookup_ipv6: sdl_alen == 0\n"); result = EHOSTUNREACH; + goto release; } - + copy_len = sdl->sdl_len <= ll_dest_len ? sdl->sdl_len : ll_dest_len; bcopy(sdl, ll_dest, copy_len); -done: - lck_mtx_unlock(rt_mtx); - return result; +release: + if (route != NULL) { + if (route == hint) { + RT_REMREF_LOCKED(route); + RT_UNLOCK(route); + } else { + RT_UNLOCK(route); + rtfree(route); + } + } + return (result); } SYSCTL_DECL(_net_inet6_icmp6); static int -nd6_sysctl_drlist SYSCTL_HANDLER_ARGS +nd6_sysctl_drlist SYSCTL_HANDLER_ARGS { #pragma unused(oidp, arg1, arg2) - int error; + int error = 0; char buf[1024]; - struct in6_defrouter *d, *de; struct nd_defrouter *dr; + int p64 = proc_is64bit(req->p); if (req->newptr) - return EPERM; - error = 0; + return (EPERM); lck_mtx_lock(nd6_mutex); - for (dr = TAILQ_FIRST(&nd_defrouter); - dr; - dr = TAILQ_NEXT(dr, dr_entry)) { - d = (struct in6_defrouter *)buf; - de = (struct in6_defrouter *)(buf + sizeof(buf)); - - if (d + 1 <= de) { - bzero(d, sizeof(*d)); - d->rtaddr.sin6_family = AF_INET6; - d->rtaddr.sin6_len = sizeof(d->rtaddr); - if (in6_recoverscope(&d->rtaddr, &dr->rtaddr, - dr->ifp) != 0) - log(LOG_ERR, - "scope error in " - "default router list (%s)\n", - ip6_sprintf(&dr->rtaddr)); - d->flags = dr->flags; - d->rtlifetime = dr->rtlifetime; - d->expire = dr->expire; - d->if_index = dr->ifp->if_index; - } else - panic("buffer too short"); - - error = SYSCTL_OUT(req, buf, sizeof(*d)); - if (error) - break; + if (p64) { + struct in6_defrouter_64 *d, *de; + + for (dr = TAILQ_FIRST(&nd_defrouter); + dr; + dr = TAILQ_NEXT(dr, dr_entry)) { + d = (struct in6_defrouter_64 *)buf; + de = (struct in6_defrouter_64 *)(buf + sizeof (buf)); + + if (d + 1 <= de) { + bzero(d, sizeof (*d)); + d->rtaddr.sin6_family = AF_INET6; + d->rtaddr.sin6_len = sizeof (d->rtaddr); + if (in6_recoverscope(&d->rtaddr, &dr->rtaddr, + dr->ifp) != 0) + log(LOG_ERR, + "scope error in " + "default router list (%s)\n", + ip6_sprintf(&dr->rtaddr)); + d->flags = dr->flags; + d->rtlifetime = dr->rtlifetime; + d->expire = dr->expire; + d->if_index = dr->ifp->if_index; + } else { + panic("buffer too short"); + } + error = SYSCTL_OUT(req, buf, sizeof (*d)); + if (error) + break; + } + } else { + struct in6_defrouter_32 *d_32, *de_32; + + for (dr = TAILQ_FIRST(&nd_defrouter); + dr; + dr = TAILQ_NEXT(dr, dr_entry)) { + d_32 = (struct in6_defrouter_32 *)buf; + de_32 = (struct in6_defrouter_32 *)(buf + sizeof (buf)); + + if (d_32 + 1 <= de_32) { + bzero(d_32, sizeof (*d_32)); + d_32->rtaddr.sin6_family = AF_INET6; + d_32->rtaddr.sin6_len = sizeof (d_32->rtaddr); + if (in6_recoverscope(&d_32->rtaddr, &dr->rtaddr, + dr->ifp) != 0) + log(LOG_ERR, + "scope error in " + "default router list (%s)\n", + ip6_sprintf(&dr->rtaddr)); + d_32->flags = dr->flags; + d_32->rtlifetime = dr->rtlifetime; + d_32->expire = dr->expire; + d_32->if_index = dr->ifp->if_index; + } else { + panic("buffer too short"); + } + error = SYSCTL_OUT(req, buf, sizeof (*d_32)); + if (error) + break; + } } lck_mtx_unlock(nd6_mutex); - return error; + return (error); } static int -nd6_sysctl_prlist SYSCTL_HANDLER_ARGS +nd6_sysctl_prlist SYSCTL_HANDLER_ARGS { #pragma unused(oidp, arg1, arg2) - int error; + int error = 0; char buf[1024]; - struct in6_prefix *p, *pe; struct nd_prefix *pr; + int p64 = proc_is64bit(req->p); if (req->newptr) - return EPERM; - error = 0; + return (EPERM); lck_mtx_lock(nd6_mutex); + if (p64) { + struct in6_prefix_64 *p, *pe; - for (pr = nd_prefix.lh_first; pr; pr = pr->ndpr_next) { - u_short advrtrs = 0; - size_t advance; - struct sockaddr_in6 *sin6, *s6; - struct nd_pfxrouter *pfr; + for (pr = nd_prefix.lh_first; pr; pr = pr->ndpr_next) { + u_short advrtrs = 0; + size_t advance; + struct sockaddr_in6 *sin6, *s6; + struct nd_pfxrouter *pfr; - p = (struct in6_prefix *)buf; - pe = (struct in6_prefix *)(buf + sizeof(buf)); + p = (struct in6_prefix_64 *)buf; + pe = (struct in6_prefix_64 *)(buf + sizeof (buf)); - if (p + 1 <= pe) { - bzero(p, sizeof(*p)); - sin6 = (struct sockaddr_in6 *)(p + 1); + if (p + 1 <= pe) { + bzero(p, sizeof (*p)); + sin6 = (struct sockaddr_in6 *)(p + 1); - p->prefix = pr->ndpr_prefix; - if (in6_recoverscope(&p->prefix, - &p->prefix.sin6_addr, pr->ndpr_ifp) != 0) - log(LOG_ERR, - "scope error in prefix list (%s)\n", - ip6_sprintf(&p->prefix.sin6_addr)); - p->raflags = pr->ndpr_raf; - p->prefixlen = pr->ndpr_plen; - p->vltime = pr->ndpr_vltime; - p->pltime = pr->ndpr_pltime; - p->if_index = pr->ndpr_ifp->if_index; - p->expire = pr->ndpr_expire; - p->refcnt = pr->ndpr_refcnt; - p->flags = pr->ndpr_stateflags; - p->origin = PR_ORIG_RA; - advrtrs = 0; - for (pfr = pr->ndpr_advrtrs.lh_first; - pfr; - pfr = pfr->pfr_next) { - if ((void *)&sin6[advrtrs + 1] > - (void *)pe) { + p->prefix = pr->ndpr_prefix; + if (in6_recoverscope(&p->prefix, + &p->prefix.sin6_addr, pr->ndpr_ifp) != 0) + log(LOG_ERR, + "scope error in prefix list (%s)\n", + ip6_sprintf(&p->prefix.sin6_addr)); + p->raflags = pr->ndpr_raf; + p->prefixlen = pr->ndpr_plen; + p->vltime = pr->ndpr_vltime; + p->pltime = pr->ndpr_pltime; + p->if_index = pr->ndpr_ifp->if_index; + p->expire = pr->ndpr_expire; + p->refcnt = pr->ndpr_refcnt; + p->flags = pr->ndpr_stateflags; + p->origin = PR_ORIG_RA; + advrtrs = 0; + for (pfr = pr->ndpr_advrtrs.lh_first; + pfr; + pfr = pfr->pfr_next) { + if ((void *)&sin6[advrtrs + 1] > + (void *)pe) { + advrtrs++; + continue; + } + s6 = &sin6[advrtrs]; + bzero(s6, sizeof (*s6)); + s6->sin6_family = AF_INET6; + s6->sin6_len = sizeof (*sin6); + if (in6_recoverscope(s6, + &pfr->router->rtaddr, + pfr->router->ifp) != 0) + log(LOG_ERR, "scope error in " + "prefix list (%s)\n", + ip6_sprintf(&pfr->router-> + rtaddr)); advrtrs++; - continue; } - s6 = &sin6[advrtrs]; - bzero(s6, sizeof(*s6)); - s6->sin6_family = AF_INET6; - s6->sin6_len = sizeof(*sin6); - if (in6_recoverscope(s6, - &pfr->router->rtaddr, - pfr->router->ifp) != 0) - log(LOG_ERR, - "scope error in " - "prefix list (%s)\n", - ip6_sprintf(&pfr->router->rtaddr)); - advrtrs++; + p->advrtrs = advrtrs; + } else { + panic("buffer too short"); } - p->advrtrs = advrtrs; - } else - panic("buffer too short"); + advance = sizeof (*p) + sizeof (*sin6) * advrtrs; + error = SYSCTL_OUT(req, buf, advance); + if (error) + break; + } + } else { + struct in6_prefix_32 *p_32, *pe_32; - advance = sizeof(*p) + sizeof(*sin6) * advrtrs; - error = SYSCTL_OUT(req, buf, advance); - if (error) - break; + for (pr = nd_prefix.lh_first; pr; pr = pr->ndpr_next) { + u_short advrtrs = 0; + size_t advance; + struct sockaddr_in6 *sin6, *s6; + struct nd_pfxrouter *pfr; + + p_32 = (struct in6_prefix_32 *)buf; + pe_32 = (struct in6_prefix_32 *)(buf + sizeof (buf)); + + if (p_32 + 1 <= pe_32) { + bzero(p_32, sizeof (*p_32)); + sin6 = (struct sockaddr_in6 *)(p_32 + 1); + + p_32->prefix = pr->ndpr_prefix; + if (in6_recoverscope(&p_32->prefix, + &p_32->prefix.sin6_addr, pr->ndpr_ifp) != 0) + log(LOG_ERR, "scope error in prefix " + "list (%s)\n", ip6_sprintf(&p_32-> + prefix.sin6_addr)); + p_32->raflags = pr->ndpr_raf; + p_32->prefixlen = pr->ndpr_plen; + p_32->vltime = pr->ndpr_vltime; + p_32->pltime = pr->ndpr_pltime; + p_32->if_index = pr->ndpr_ifp->if_index; + p_32->expire = pr->ndpr_expire; + p_32->refcnt = pr->ndpr_refcnt; + p_32->flags = pr->ndpr_stateflags; + p_32->origin = PR_ORIG_RA; + advrtrs = 0; + for (pfr = pr->ndpr_advrtrs.lh_first; + pfr; + pfr = pfr->pfr_next) { + if ((void *)&sin6[advrtrs + 1] > + (void *)pe_32) { + advrtrs++; + continue; + } + s6 = &sin6[advrtrs]; + bzero(s6, sizeof (*s6)); + s6->sin6_family = AF_INET6; + s6->sin6_len = sizeof (*sin6); + if (in6_recoverscope(s6, + &pfr->router->rtaddr, + pfr->router->ifp) != 0) + log(LOG_ERR, "scope error in " + "prefix list (%s)\n", + ip6_sprintf(&pfr->router-> + rtaddr)); + advrtrs++; + } + p_32->advrtrs = advrtrs; + } else { + panic("buffer too short"); + } + advance = sizeof (*p_32) + sizeof (*sin6) * advrtrs; + error = SYSCTL_OUT(req, buf, advance); + if (error) + break; + } } lck_mtx_unlock(nd6_mutex); - return error; + return (error); } SYSCTL_PROC(_net_inet6_icmp6, ICMPV6CTL_ND6_DRLIST, nd6_drlist, CTLFLAG_RD, 0, 0, nd6_sysctl_drlist, "S,in6_defrouter",""); diff --git a/bsd/netinet6/nd6.h b/bsd/netinet6/nd6.h index 4b66e6be6..dc0cfec20 100644 --- a/bsd/netinet6/nd6.h +++ b/bsd/netinet6/nd6.h @@ -1,3 +1,31 @@ +/* + * Copyright (c) 2008 Apple Inc. All rights reserved. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. The rights granted to you under the License + * may not be used to create, or enable the creation or redistribution of, + * unlawful or unlicensed copies of an Apple operating system, or to + * circumvent, violate, or enable the circumvention or violation of, any + * terms of an Apple operating system software license agreement. + * + * Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ + */ + /* $FreeBSD: src/sys/netinet6/nd6.h,v 1.2.2.3 2001/08/13 01:10:49 simokawa Exp $ */ /* $KAME: nd6.h,v 1.55 2001/04/27 15:09:49 itojun Exp $ */ @@ -42,17 +70,30 @@ #include #ifdef KERNEL_PRIVATE +#include + struct llinfo_nd6 { + /* + * The following are protected by rnh_lock + */ struct llinfo_nd6 *ln_next; struct llinfo_nd6 *ln_prev; struct rtentry *ln_rt; + /* + * The following are protected by rt_lock + */ struct mbuf *ln_hold; /* last packet until resolved/timeout */ long ln_asked; /* number of queries already sent for this addr */ - u_long ln_expire; /* lifetime for NDP state transition */ + u_int32_t ln_expire; /* lifetime for NDP state transition */ short ln_state; /* reachability state */ short ln_router; /* 2^0: ND6 router bit */ int ln_byhint; /* # of times we made it reachable by UL hint */ + u_int32_t ln_flags; /* flags; see below */ }; + +/* Values for ln_flags */ +#define ND6_LNF_TIMER_SKIP 0x1 /* modified by nd6_timer() */ +#define ND6_LNF_IN_USE 0x2 /* currently in llinfo_nd6 list */ #endif /* KERNEL_PRIVATE */ #define ND6_LLINFO_PURGE -3 @@ -71,8 +112,32 @@ struct llinfo_nd6 { #define ND6_LLINFO_DELAY 3 #define ND6_LLINFO_PROBE 4 +#ifdef KERNEL_PRIVATE #define ND6_IS_LLINFO_PROBREACH(n) ((n)->ln_state > ND6_LLINFO_INCOMPLETE) +#endif /* KERNEL_PRIVATE */ + +#if !defined(KERNEL_PRIVATE) +struct nd_ifinfo { +#else +/* For binary compatibility, this structure must not change */ +struct nd_ifinfo_compat { +#endif /* !KERNEL_PRIVATE */ + u_int32_t linkmtu; /* LinkMTU */ + u_int32_t maxmtu; /* Upper bound of LinkMTU */ + u_int32_t basereachable; /* BaseReachableTime */ + u_int32_t reachable; /* Reachable Time */ + u_int32_t retrans; /* Retrans Timer */ + u_int32_t flags; /* Flags */ + int recalctm; /* BaseReacable re-calculation timer */ + u_int8_t chlim; /* CurHopLimit */ + u_int8_t receivedra; + /* the following 3 members are for privacy extension for addrconf */ + u_int8_t randomseed0[8]; /* upper 64 bits of MD5 digest */ + u_int8_t randomseed1[8]; /* lower 64 bits (usually the EUI64 IFID) */ + u_int8_t randomid[8]; /* current random ID */ +}; +#if defined(KERNEL_PRIVATE) struct nd_ifinfo { u_int32_t linkmtu; /* LinkMTU */ u_int32_t maxmtu; /* Upper bound of LinkMTU */ @@ -91,6 +156,7 @@ struct nd_ifinfo { int32_t nprefixes; int32_t ndefrouters; }; +#endif /* KERNEL_PRIVATE */ #define ND6_IFF_PERFORMNUD 0x1 @@ -103,8 +169,29 @@ struct in6_nbrinfo { int expire; /* lifetime for NDP state transition */ }; +#if defined(KERNEL_PRIVATE) +struct in6_nbrinfo_32 { + char ifname[IFNAMSIZ]; + struct in6_addr addr; + u_int32_t asked; + int isrouter; + int state; + int expire; +}; + +struct in6_nbrinfo_64 { + char ifname[IFNAMSIZ]; + struct in6_addr addr; + long asked; + int isrouter __attribute__((aligned(8))); + int state; + int expire; +} __attribute__((aligned(8))); +#endif /* KERNEL_PRIVATE */ + #define DRLSTSIZ 10 #define PRLSTSIZ 10 + struct in6_drlist { char ifname[IFNAMSIZ]; struct { @@ -116,6 +203,30 @@ struct in6_drlist { } defrouter[DRLSTSIZ]; }; +#if defined(KERNEL_PRIVATE) +struct in6_drlist_32 { + char ifname[IFNAMSIZ]; + struct { + struct in6_addr rtaddr; + u_char flags; + u_short rtlifetime; + u_int32_t expire; + u_short if_index; + } defrouter[DRLSTSIZ]; +}; + +struct in6_drlist_64 { + char ifname[IFNAMSIZ]; + struct { + struct in6_addr rtaddr; + u_char flags; + u_short rtlifetime; + u_long expire __attribute__((aligned(8))); + u_short if_index __attribute__((aligned(8))); + } defrouter[DRLSTSIZ] __attribute__((aligned(8))); +}; +#endif /* KERNEL_PRIVATE */ + struct in6_defrouter { struct sockaddr_in6 rtaddr; u_char flags; @@ -124,6 +235,24 @@ struct in6_defrouter { u_short if_index; }; +#if defined(KERNEL_PRIVATE) +struct in6_defrouter_32 { + struct sockaddr_in6 rtaddr; + u_char flags; + u_short rtlifetime; + u_int32_t expire; + u_short if_index; +}; + +struct in6_defrouter_64 { + struct sockaddr_in6 rtaddr; + u_char flags; + u_short rtlifetime; + u_long expire __attribute__((aligned(8))); + u_short if_index __attribute__((aligned(8))); +} __attribute__((aligned(8))); +#endif /* KERNEL_PRIVATE */ + struct in6_prlist { char ifname[IFNAMSIZ]; struct { @@ -140,6 +269,41 @@ struct in6_prlist { } prefix[PRLSTSIZ]; }; +#if defined(KERNEL_PRIVATE) +struct in6_prlist_32 { + char ifname[IFNAMSIZ]; + struct { + struct in6_addr prefix; + struct prf_ra raflags; + u_char prefixlen; + u_char origin; + u_int32_t vltime; + u_int32_t pltime; + u_int32_t expire; + u_short if_index; + u_short advrtrs; + struct in6_addr advrtr[DRLSTSIZ]; + } prefix[PRLSTSIZ]; +}; + +struct in6_prlist_64 { + char ifname[IFNAMSIZ]; + struct { + struct in6_addr prefix; + struct prf_ra raflags; + u_char prefixlen; + u_char origin; + u_long vltime __attribute__((aligned(8))); + u_long pltime __attribute__((aligned(8))); + u_long expire __attribute__((aligned(8))); + u_short if_index; + u_short advrtrs; + u_int32_t pad; + struct in6_addr advrtr[DRLSTSIZ]; + } prefix[PRLSTSIZ]; +}; +#endif /* KERNEL_PRIVATE */ + struct in6_prefix { struct sockaddr_in6 prefix; struct prf_ra raflags; @@ -155,6 +319,38 @@ struct in6_prefix { /* struct sockaddr_in6 advrtr[] */ }; +#if defined(KERNEL_PRIVATE) +struct in6_prefix_32 { + struct sockaddr_in6 prefix; + struct prf_ra raflags; + u_char prefixlen; + u_char origin; + u_int32_t vltime; + u_int32_t pltime; + u_int32_t expire; + u_int32_t flags; + int refcnt; + u_short if_index; + u_short advrtrs; + /* struct sockaddr_in6 advrtr[] */ +}; + +struct in6_prefix_64 { + struct sockaddr_in6 prefix; + struct prf_ra raflags; + u_char prefixlen; + u_char origin; + u_long vltime __attribute__((aligned(8))); + u_long pltime __attribute__((aligned(8))); + u_long expire __attribute__((aligned(8))); + u_int32_t flags __attribute__((aligned(8))); + int refcnt; + u_short if_index; + u_short advrtrs; + /* struct sockaddr_in6 advrtr[] */ +}; +#endif /* KERNEL_PRIVATE */ + struct in6_ondireq { char ifname[IFNAMSIZ]; struct { @@ -170,16 +366,35 @@ struct in6_ondireq { } ndi; }; +#if !defined(KERNEL_PRIVATE) struct in6_ndireq { char ifname[IFNAMSIZ]; struct nd_ifinfo ndi; }; +#else +struct in6_ndireq { + char ifname[IFNAMSIZ]; + struct nd_ifinfo_compat ndi; +}; +#endif /* !KERNEL_PRIVATE */ struct in6_ndifreq { char ifname[IFNAMSIZ]; u_long ifindex; }; +#if defined(KERNEL_PRIVATE) +struct in6_ndifreq_32 { + char ifname[IFNAMSIZ]; + u_int32_t ifindex; +}; + +struct in6_ndifreq_64 { + char ifname[IFNAMSIZ]; + u_long ifindex __attribute__((aligned(8))); +}; +#endif /* KERNEL_PRIVATE */ + /* Prefix status */ #define NDPRF_ONLINK 0x1 #define NDPRF_DETACHED 0x2 @@ -192,8 +407,14 @@ struct in6_ndifreq { #define ND6_INFINITE_LIFETIME 0xffffffff #ifdef KERNEL_PRIVATE +/* + * Protects nd_ifinfo[] + */ +__private_extern__ lck_rw_t *nd_if_rwlock; + #define ND_IFINFO(ifp) \ - (&nd_ifinfo[(ifp)->if_index]) + ((ifp)->if_index < nd_ifinfo_indexlim ? &nd_ifinfo[(ifp)->if_index] : NULL) + /* * In a more readable form, we derive linkmtu based on: * @@ -205,10 +426,11 @@ struct in6_ndifreq { * linkmtu = ifp->if_mtu; */ #define IN6_LINKMTU(ifp) \ + (ND_IFINFO(ifp) == NULL ? (ifp)->if_mtu : \ ((ND_IFINFO(ifp)->linkmtu && \ ND_IFINFO(ifp)->linkmtu < (ifp)->if_mtu) ? ND_IFINFO(ifp)->linkmtu : \ ((ND_IFINFO(ifp)->maxmtu && ND_IFINFO(ifp)->maxmtu < (ifp)->if_mtu) ? \ - ND_IFINFO(ifp)->maxmtu : (ifp)->if_mtu)) + ND_IFINFO(ifp)->maxmtu : (ifp)->if_mtu))) /* node constants */ #define MAX_REACHABLE_TIME 3600000 /* msec */ @@ -230,9 +452,9 @@ struct nd_defrouter { struct in6_addr rtaddr; u_char flags; /* flags on RA message */ u_short rtlifetime; - u_long expire; - u_long advint; /* Mobile IPv6 addition (milliseconds) */ - u_long advint_expire; /* Mobile IPv6 addition */ + u_int32_t expire; + u_int32_t advint; /* Mobile IPv6 addition (milliseconds) */ + u_int32_t advint_expire; /* Mobile IPv6 addition */ int advints_lost; /* Mobile IPv6 addition */ struct ifnet *ifp; }; @@ -277,10 +499,10 @@ struct inet6_ndpr_msghdr { u_char inpm_version; /* future binary compatibility */ u_char inpm_type; /* message type */ struct in6_addr inpm_prefix; - u_long prm_vltim; - u_long prm_pltime; - u_long prm_expire; - u_long prm_preferred; + u_int32_t prm_vltim; + u_int32_t prm_pltime; + u_int32_t prm_expire; + u_int32_t prm_preferred; struct in6_prflags prm_flags; u_short prm_index; /* index for associated ifp */ u_char prm_plen; /* length of prefix in bits */ @@ -318,6 +540,7 @@ extern struct nd_ifinfo *nd_ifinfo; extern struct nd_drhead nd_defrouter; extern struct nd_prhead nd_prefix; extern int nd6_debug; +extern size_t nd_ifinfo_indexlim; #define nd6log(x) do { if (nd6_debug) log x; } while (0) @@ -362,67 +585,66 @@ union nd_opts { /* XXX: need nd6_var.h?? */ /* nd6.c */ -void nd6_init(void); -void nd6_ifattach(struct ifnet *); -int nd6_is_addr_neighbor(struct sockaddr_in6 *, struct ifnet *, int); -void nd6_option_init(void *, int, union nd_opts *); -struct nd_opt_hdr *nd6_option(union nd_opts *); -int nd6_options(union nd_opts *); -struct rtentry *nd6_lookup(struct in6_addr *, int, struct ifnet *, int); -void nd6_setmtu(struct ifnet *); -void nd6_timer(void *); -void nd6_purge(struct ifnet *); -struct llinfo_nd6 *nd6_free(struct rtentry *); -void nd6_nud_hint(struct rtentry *, struct in6_addr *, int); -int nd6_resolve(struct ifnet *, struct rtentry *, - struct mbuf *, struct sockaddr *, u_char *); -void nd6_rtrequest(int, struct rtentry *, struct sockaddr *); -int nd6_ioctl(u_long, caddr_t, struct ifnet *); -struct rtentry *nd6_cache_lladdr(struct ifnet *, struct in6_addr *, - char *, int, int, int); -int nd6_output(struct ifnet *, struct ifnet *, struct mbuf *, - struct sockaddr_in6 *, struct rtentry *, int); -int nd6_storelladdr(struct ifnet *, struct rtentry *, struct mbuf *, - struct sockaddr *, u_char *); -int nd6_need_cache(struct ifnet *); +extern void nd6_init(void); +extern int nd6_ifattach(struct ifnet *); +extern int nd6_is_addr_neighbor(struct sockaddr_in6 *, struct ifnet *, int); +extern void nd6_option_init(void *, int, union nd_opts *); +extern struct nd_opt_hdr *nd6_option(union nd_opts *); +extern int nd6_options(union nd_opts *); +extern struct rtentry *nd6_lookup(struct in6_addr *, int, struct ifnet *, int); +extern void nd6_setmtu(struct ifnet *); +extern void nd6_timer(void *); +extern void nd6_purge(struct ifnet *); +extern void nd6_free(struct rtentry *); +extern void nd6_nud_hint(struct rtentry *, struct in6_addr *, int); +extern int nd6_resolve(struct ifnet *, struct rtentry *, + struct mbuf *, struct sockaddr *, u_char *); +extern void nd6_rtrequest(int, struct rtentry *, struct sockaddr *); +extern int nd6_ioctl(u_long, caddr_t, struct ifnet *); +extern void nd6_cache_lladdr(struct ifnet *, struct in6_addr *, + char *, int, int, int); +extern int nd6_output(struct ifnet *, struct ifnet *, struct mbuf *, + struct sockaddr_in6 *, struct rtentry *, int); +extern int nd6_storelladdr(struct ifnet *, struct rtentry *, struct mbuf *, + struct sockaddr *, u_char *); +extern int nd6_need_cache(struct ifnet *); /* nd6_nbr.c */ -void nd6_na_input(struct mbuf *, int, int); -void nd6_na_output(struct ifnet *, const struct in6_addr *, - const struct in6_addr *, u_long, int, struct sockaddr *); -void nd6_ns_input(struct mbuf *, int, int); -void nd6_ns_output(struct ifnet *, const struct in6_addr *, - const struct in6_addr *, struct llinfo_nd6 *, int, int); -caddr_t nd6_ifptomac(struct ifnet *); -void nd6_dad_start(struct ifaddr *, int *); -void nd6_dad_stop(struct ifaddr *); -void nd6_dad_duplicated(struct ifaddr *); +extern void nd6_na_input(struct mbuf *, int, int); +extern void nd6_na_output(struct ifnet *, const struct in6_addr *, + const struct in6_addr *, u_int32_t, int, struct sockaddr *); +extern void nd6_ns_input(struct mbuf *, int, int); +extern void nd6_ns_output(struct ifnet *, const struct in6_addr *, + const struct in6_addr *, struct llinfo_nd6 *, int, int); +extern caddr_t nd6_ifptomac(struct ifnet *); +extern void nd6_dad_start(struct ifaddr *, int *); +extern void nd6_dad_stop(struct ifaddr *); +extern void nd6_dad_duplicated(struct ifaddr *); /* nd6_rtr.c */ -void nd6_rs_input(struct mbuf *, int, int); -void nd6_ra_input(struct mbuf *, int, int); -void prelist_del(struct nd_prefix *); -void defrouter_addreq(struct nd_defrouter *); -void defrouter_delreq(struct nd_defrouter *, int); -void defrouter_select(void); -void defrtrlist_del(struct nd_defrouter *, int); -void prelist_remove(struct nd_prefix *, int); -int prelist_update(struct nd_prefix *, struct nd_defrouter *, - struct mbuf *); -int nd6_prelist_add(struct nd_prefix *, struct nd_defrouter *, - struct nd_prefix **); -int nd6_prefix_onlink(struct nd_prefix *, int, int); -int nd6_prefix_offlink(struct nd_prefix *); -void pfxlist_onlink_check(int); -struct nd_defrouter *defrouter_lookup(struct in6_addr *, - struct ifnet *); -struct nd_prefix *nd6_prefix_lookup(struct nd_prefix *); -int in6_init_prefix_ltimes(struct nd_prefix *ndpr); -void rt6_flush(struct in6_addr *, struct ifnet *); -int nd6_setdefaultiface(int); -int in6_tmpifadd(const struct in6_ifaddr *, int); -void ndpr_hold(struct nd_prefix *, boolean_t); -void ndpr_rele(struct nd_prefix *, boolean_t); +extern void nd6_rs_input(struct mbuf *, int, int); +extern void nd6_ra_input(struct mbuf *, int, int); +extern void prelist_del(struct nd_prefix *); +extern void defrouter_addreq(struct nd_defrouter *); +extern void defrouter_delreq(struct nd_defrouter *, int); +extern void defrouter_select(void); +extern void defrtrlist_del(struct nd_defrouter *, int); +extern void prelist_remove(struct nd_prefix *, int); +extern int prelist_update(struct nd_prefix *, struct nd_defrouter *, + struct mbuf *); +extern int nd6_prelist_add(struct nd_prefix *, struct nd_defrouter *, + struct nd_prefix **); +extern int nd6_prefix_onlink(struct nd_prefix *, int, int); +extern int nd6_prefix_offlink(struct nd_prefix *); +extern void pfxlist_onlink_check(int); +extern struct nd_defrouter *defrouter_lookup(struct in6_addr *, struct ifnet *); +extern struct nd_prefix *nd6_prefix_lookup(struct nd_prefix *); +extern int in6_init_prefix_ltimes(struct nd_prefix *ndpr); +extern void rt6_flush(struct in6_addr *, struct ifnet *); +extern int nd6_setdefaultiface(int); +extern int in6_tmpifadd(const struct in6_ifaddr *, int, int); +extern void ndpr_hold(struct nd_prefix *, boolean_t); +extern void ndpr_rele(struct nd_prefix *, boolean_t); #endif /* KERNEL_PRIVATE */ #ifdef KERNEL @@ -432,7 +654,7 @@ void ndpr_rele(struct nd_prefix *, boolean_t); @discussion This function will check the routing table for a cached neighbor discovery entry or trigger an neighbor discovery query to resolve the IPv6 address to a link-layer address. - + nd entries are stored in the routing table. This function will lookup the IPv6 destination in the routing table. If the destination requires forwarding to a gateway, the route of the @@ -451,9 +673,9 @@ void ndpr_rele(struct nd_prefix *, boolean_t); value is returned, the caller is responsible for disposing of the packet. */ -errno_t nd6_lookup_ipv6(ifnet_t interface, const struct sockaddr_in6 *ip6_dest, - struct sockaddr_dl *ll_dest, size_t ll_dest_len, route_t hint, - mbuf_t packet); +extern errno_t nd6_lookup_ipv6(ifnet_t interface, + const struct sockaddr_in6 *ip6_dest, struct sockaddr_dl *ll_dest, + size_t ll_dest_len, route_t hint, mbuf_t packet); -#endif KERNEL +#endif /* KERNEL */ #endif /* _NETINET6_ND6_H_ */ diff --git a/bsd/netinet6/nd6_nbr.c b/bsd/netinet6/nd6_nbr.c index c8686c533..8efbdd2c5 100644 --- a/bsd/netinet6/nd6_nbr.c +++ b/bsd/netinet6/nd6_nbr.c @@ -1,3 +1,30 @@ +/* + * Copyright (c) 2008 Apple Inc. All rights reserved. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. The rights granted to you under the License + * may not be used to create, or enable the creation or redistribution of, + * unlawful or unlicensed copies of an Apple operating system, or to + * circumvent, violate, or enable the circumvention or violation of, any + * terms of an Apple operating system software license agreement. + * + * Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ + */ /* $FreeBSD: src/sys/netinet6/nd6_nbr.c,v 1.4.2.4 2001/07/06 05:32:25 sumikawa Exp $ */ /* $KAME: nd6_nbr.c,v 1.64 2001/05/17 03:48:30 itojun Exp $ */ @@ -105,12 +132,12 @@ nd6_ns_input( struct in6_addr taddr6; struct in6_addr myaddr6; char *lladdr = NULL; - struct ifaddr *ifa; + struct ifaddr *ifa = NULL; int lladdrlen = 0; int anycast = 0, proxy = 0, tentative = 0; int tlladdr; union nd_opts ndopts; - struct sockaddr_dl *proxydl = NULL; + struct sockaddr_dl proxydl; #ifndef PULLDOWN_TEST IP6_EXTHDR_CHECK(m, off, icmp6len, return); @@ -216,21 +243,24 @@ nd6_ns_input( tsin6.sin6_family = AF_INET6; tsin6.sin6_addr = taddr6; - rt = rtalloc1((struct sockaddr *)&tsin6, 0, 0UL); - if (rt && (rt->rt_flags & RTF_ANNOUNCE) != 0 && - rt->rt_gateway->sa_family == AF_LINK) { - /* - * proxy NDP for single entry - */ - ifa = (struct ifaddr *)in6ifa_ifpforlinklocal(ifp, - IN6_IFF_NOTREADY|IN6_IFF_ANYCAST); - if (ifa) { - proxy = 1; - proxydl = SDL(rt->rt_gateway); + rt = rtalloc1((struct sockaddr *)&tsin6, 0, 0); + if (rt != NULL) { + RT_LOCK(rt); + if ((rt->rt_flags & RTF_ANNOUNCE) != 0 && + rt->rt_gateway->sa_family == AF_LINK) { + /* + * proxy NDP for single entry + */ + ifa = (struct ifaddr *)in6ifa_ifpforlinklocal( + ifp, IN6_IFF_NOTREADY|IN6_IFF_ANYCAST); + if (ifa) { + proxy = 1; + proxydl = *SDL(rt->rt_gateway); + } } - } - if (rt) + RT_UNLOCK(rt); rtfree(rt); + } } if (!ifa) { /* @@ -302,7 +332,7 @@ nd6_ns_input( ((anycast || proxy || !tlladdr) ? 0 : ND_NA_FLAG_OVERRIDE) | (ip6_forwarding ? ND_NA_FLAG_ROUTER : 0), - tlladdr, (struct sockaddr *)proxydl); + tlladdr, proxy ? (struct sockaddr *)&proxydl : NULL); goto freeit; } @@ -312,9 +342,11 @@ nd6_ns_input( ((anycast || proxy || !tlladdr) ? 0 : ND_NA_FLAG_OVERRIDE) | (ip6_forwarding ? ND_NA_FLAG_ROUTER : 0) | ND_NA_FLAG_SOLICITED, - tlladdr, (struct sockaddr *)proxydl); + tlladdr, proxy ? (struct sockaddr *)&proxydl : NULL); freeit: m_freem(m); + if (ifa != NULL) + ifafree(ifa); return; bad: @@ -323,6 +355,8 @@ nd6_ns_input( nd6log((LOG_ERR, "nd6_ns_input: tgt=%s\n", ip6_sprintf(&taddr6))); icmp6stat.icp6s_badns++; m_freem(m); + if (ifa != NULL) + ifafree(ifa); } /* @@ -333,6 +367,9 @@ nd6_ns_input( * * Based on RFC 2461 * Based on RFC 2462 (duplicated address detection) + * + * Caller must bump up ln->ln_rt refcnt to make sure 'ln' doesn't go + * away if there is a llinfo_nd6 passed in. */ void nd6_ns_output( @@ -438,31 +475,54 @@ nd6_ns_output( * - saddr6 belongs to the outgoing interface. * Otherwise, we perform a scope-wise match. */ - struct ip6_hdr *hip6; /* hold ip6 */ - struct in6_addr *saddr6; - - if (ln && ln->ln_hold) { - hip6 = mtod(ln->ln_hold, struct ip6_hdr *); - /* XXX pullup? */ - if (sizeof(*hip6) < ln->ln_hold->m_len) - saddr6 = &hip6->ip6_src; - else - saddr6 = NULL; - } else - saddr6 = NULL; - if (saddr6 && in6ifa_ifpwithaddr(ifp, saddr6)) - bcopy(saddr6, &ip6->ip6_src, sizeof(*saddr6)); - else { + struct ip6_hdr *hip6 = NULL; /* hold ip6 */ + struct in6_addr saddr6; + + /* Caller holds ref on this route */ + if (ln != NULL) { + RT_LOCK(ln->ln_rt); + if (ln->ln_hold != NULL) { + hip6 = mtod(ln->ln_hold, struct ip6_hdr *); + /* XXX pullup? */ + if (sizeof (*hip6) < ln->ln_hold->m_len) + saddr6 = hip6->ip6_src; + else + hip6 = NULL; + } + /* + * hip6 is used only to indicate whether or + * not there is a valid source address from + * the held packet in ln_hold. For obvious + * reasons we should not dereference it after + * releasing the lock though we can simply + * test if it's non-NULL. + */ + RT_UNLOCK(ln->ln_rt); + } + + if (ia != NULL) + ifafree(&ia->ia_ifa); + if (hip6 != NULL && (ia = in6ifa_ifpwithaddr(ifp, &saddr6))) { + bcopy(&saddr6, &ip6->ip6_src, sizeof (saddr6)); + } else { ia = in6_ifawithifp(ifp, &ip6->ip6_dst); if (ia == NULL) { - if (ln && ln->ln_hold) - m_freem(ln->ln_hold); - ln->ln_hold = NULL; + if (ln != NULL) { + RT_LOCK(ln->ln_rt); + if (ln->ln_hold != NULL) + m_freem(ln->ln_hold); + ln->ln_hold = NULL; + RT_UNLOCK(ln->ln_rt); + } m_freem(m); return; } ip6->ip6_src = ia->ia_addr.sin6_addr; } + if (ia != NULL) { + ifafree(&ia->ia_ifa); + ia = NULL; + } #endif } else { /* @@ -555,7 +615,7 @@ nd6_na_input( int is_override; char *lladdr = NULL; int lladdrlen = 0; - struct ifaddr *ifa; + struct ifaddr *ifa = NULL; struct llinfo_nd6 *ln; struct rtentry *rt; struct sockaddr_dl *sdl; @@ -651,12 +711,18 @@ nd6_na_input( /* * If no neighbor cache entry is found, NA SHOULD silently be discarded. + * Callee returns a locked route upon success. */ - rt = nd6_lookup(&taddr6, 0, ifp, 0); - if ((rt == NULL) || - ((ln = (struct llinfo_nd6 *)rt->rt_llinfo) == NULL) || - ((sdl = SDL(rt->rt_gateway)) == NULL)) + if ((rt = nd6_lookup(&taddr6, 0, ifp, 0)) == NULL) + goto freeit; + + RT_LOCK_ASSERT_HELD(rt); + if ((ln = rt->rt_llinfo) == NULL || + (sdl = SDL(rt->rt_gateway)) == NULL) { + RT_REMREF_LOCKED(rt); + RT_UNLOCK(rt); goto freeit; + } getmicrotime(&timenow); if (ln->ln_state == ND6_LLINFO_INCOMPLETE) { @@ -664,8 +730,11 @@ nd6_na_input( * If the link-layer has address, and no lladdr option came, * discard the packet. */ - if (ifp->if_addrlen && !lladdr) + if (ifp->if_addrlen && !lladdr) { + RT_REMREF_LOCKED(rt); + RT_UNLOCK(rt); goto freeit; + } /* * Record link-layer address, and update the state. @@ -675,9 +744,12 @@ nd6_na_input( if (is_solicited) { ln->ln_state = ND6_LLINFO_REACHABLE; ln->ln_byhint = 0; - if (ln->ln_expire) + if (ln->ln_expire) { + lck_rw_lock_shared(nd_if_rwlock); ln->ln_expire = timenow.tv_sec + nd_ifinfo[rt->rt_ifp->if_index].reachable; + lck_rw_done(nd_if_rwlock); + } } else { ln->ln_state = ND6_LLINFO_STALE; ln->ln_expire = timenow.tv_sec + nd6_gctimer; @@ -688,7 +760,9 @@ nd6_na_input( * non-reachable to probably reachable, and might * affect the status of associated prefixes.. */ + RT_UNLOCK(rt); pfxlist_onlink_check(0); + RT_LOCK(rt); } } else { int llchange; @@ -736,6 +810,8 @@ nd6_na_input( ln->ln_state = ND6_LLINFO_STALE; ln->ln_expire = timenow.tv_sec + nd6_gctimer; } + RT_REMREF_LOCKED(rt); + RT_UNLOCK(rt); goto freeit; } else if (is_override /* (2a) */ || (!is_override && (lladdr && !llchange)) /* (2b) */ @@ -757,8 +833,10 @@ nd6_na_input( ln->ln_state = ND6_LLINFO_REACHABLE; ln->ln_byhint = 0; if (ln->ln_expire) { + lck_rw_lock_shared(nd_if_rwlock); ln->ln_expire = timenow.tv_sec + nd_ifinfo[ifp->if_index].reachable; + lck_rw_done(nd_if_rwlock); } } else { if (lladdr && llchange) { @@ -776,6 +854,7 @@ nd6_na_input( */ struct nd_defrouter *dr; struct in6_addr *in6; + struct ifnet *rt_ifp = rt->rt_ifp; in6 = &((struct sockaddr_in6 *)rt_key(rt))->sin6_addr; @@ -785,15 +864,16 @@ nd6_na_input( * is only called under the network software interrupt * context. However, we keep it just for safety. */ + RT_UNLOCK(rt); lck_mtx_lock(nd6_mutex); - dr = defrouter_lookup(in6, rt->rt_ifp); + dr = defrouter_lookup(in6, rt_ifp); if (dr) { defrtrlist_del(dr, 1); lck_mtx_unlock(nd6_mutex); } else { lck_mtx_unlock(nd6_mutex); - if (!ip6_forwarding && (ip6_accept_rtadv || (rt->rt_ifp->if_eflags & IFEF_ACCEPT_RTADVD))) { + if (!ip6_forwarding && (ip6_accept_rtadv || (rt_ifp->if_eflags & IFEF_ACCEPT_RTADVD))) { /* * Even if the neighbor is not in the default * router list, the neighbor may be used @@ -801,31 +881,42 @@ nd6_na_input( * (e.g. redirect case). So we must * call rt6_flush explicitly. */ - rt6_flush(&ip6->ip6_src, rt->rt_ifp); + rt6_flush(&ip6->ip6_src, rt_ifp); } } + RT_LOCK(rt); } ln->ln_router = is_router; } + RT_LOCK_ASSERT_HELD(rt); rt->rt_flags &= ~RTF_REJECT; ln->ln_asked = 0; - if (ln->ln_hold) { + if (ln->ln_hold != NULL) { + struct mbuf *n = ln->ln_hold; + ln->ln_hold = NULL; /* * we assume ifp is not a loopback here, so just set the 2nd * argument as the 1st one. */ - nd6_output(ifp, ifp, ln->ln_hold, - (struct sockaddr_in6 *)rt_key(rt), rt, 0); - ln->ln_hold = 0; + RT_UNLOCK(rt); + nd6_output(ifp, ifp, n, (struct sockaddr_in6 *)rt_key(rt), + rt, 0); + RT_LOCK_SPIN(rt); } + RT_REMREF_LOCKED(rt); + RT_UNLOCK(rt); freeit: m_freem(m); + if (ifa != NULL) + ifafree(ifa); return; bad: icmp6stat.icp6s_badna++; m_freem(m); + if (ifa != NULL) + ifafree(ifa); } /* @@ -842,7 +933,7 @@ nd6_na_output( struct ifnet *ifp, const struct in6_addr *daddr6, const struct in6_addr *taddr6, - u_long flags, + uint32_t flags, int tlladdr, /* 1 if include target link-layer address */ struct sockaddr *sdl0) /* sockaddr_dl (= proxy NA) or NULL */ { @@ -917,6 +1008,9 @@ nd6_na_output( return; } ip6->ip6_src = ia->ia_addr.sin6_addr; + ifafree(&ia->ia_ifa); + ia = NULL; + nd_na = (struct nd_neighbor_advert *)(ip6 + 1); nd_na->nd_na_type = ND_NEIGHBOR_ADVERT; nd_na->nd_na_code = 0; @@ -1121,9 +1215,12 @@ nd6_dad_start( dp->dad_ns_icount = dp->dad_na_icount = 0; dp->dad_ns_ocount = dp->dad_ns_tcount = 0; if (tick_delay == NULL) { + u_int32_t retrans; nd6_dad_ns_output(dp, ifa); - timeout((void (*)(void *))nd6_dad_timer, (void *)ifa, - nd_ifinfo[ifa->ifa_ifp->if_index].retrans * hz / 1000); + lck_rw_lock_shared(nd_if_rwlock); + retrans = nd_ifinfo[ifa->ifa_ifp->if_index].retrans * hz / 1000; + lck_rw_done(nd_if_rwlock); + timeout((void (*)(void *))nd6_dad_timer, (void *)ifa, retrans); } else { int ntick; @@ -1213,12 +1310,15 @@ nd6_dad_timer( /* Need more checks? */ if (dp->dad_ns_ocount < dp->dad_count) { + u_int32_t retrans; /* * We have more NS to go. Send NS packet for DAD. */ nd6_dad_ns_output(dp, ifa); - timeout((void (*)(void *))nd6_dad_timer, (void *)ifa, - nd_ifinfo[ifa->ifa_ifp->if_index].retrans * hz / 1000); + lck_rw_lock_shared(nd_if_rwlock); + retrans = nd_ifinfo[ifa->ifa_ifp->if_index].retrans * hz / 1000; + lck_rw_done(nd_if_rwlock); + timeout((void (*)(void *))nd6_dad_timer, (void *)ifa, retrans); } else { /* * We have transmitted sufficient number of DAD packets. diff --git a/bsd/netinet6/nd6_rtr.c b/bsd/netinet6/nd6_rtr.c index 5af29d31e..10e965185 100644 --- a/bsd/netinet6/nd6_rtr.c +++ b/bsd/netinet6/nd6_rtr.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2003-2007 Apple Inc. All rights reserved. + * Copyright (c) 2003-2008 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -124,7 +124,6 @@ static int ip6_temp_valid_lifetime = 1800; */ int ip6_temp_regen_advance = TEMPADDR_REGEN_ADVANCE; -extern lck_mtx_t *rt_mtx; extern lck_mtx_t *nd6_mutex; /* @@ -235,7 +234,7 @@ nd6_ra_input( int icmp6len) { struct ifnet *ifp = m->m_pkthdr.rcvif; - struct nd_ifinfo *ndi = &nd_ifinfo[ifp->if_index]; + struct nd_ifinfo *ndi = NULL; struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *); struct nd_router_advert *nd_ra; struct in6_addr saddr6 = ip6->ip6_src; @@ -293,6 +292,12 @@ nd6_ra_input( struct nd_defrouter dr0; u_int32_t advreachable = nd_ra->nd_ra_reachable; + lck_rw_lock_shared(nd_if_rwlock); + if (ifp->if_index >= nd_ifinfo_indexlim) { + lck_rw_done(nd_if_rwlock); + goto freeit; + } + ndi = &nd_ifinfo[ifp->if_index]; dr0.rtaddr = saddr6; dr0.flags = nd_ra->nd_ra_flags_reserved; dr0.rtlifetime = ntohs(nd_ra->nd_ra_router_lifetime); @@ -315,6 +320,8 @@ nd6_ra_input( ndi->retrans = ntohl(nd_ra->nd_ra_retransmit); if (nd_ra->nd_ra_curhoplimit) ndi->chlim = nd_ra->nd_ra_curhoplimit; + lck_rw_done(nd_if_rwlock); + ndi = NULL; dr = defrtrlist_update(&dr0); } @@ -406,12 +413,19 @@ nd6_ra_input( goto skip; } + lck_rw_lock_shared(nd_if_rwlock); + if (ifp->if_index >= nd_ifinfo_indexlim) { + lck_rw_done(nd_if_rwlock); + goto freeit; + } + ndi = &nd_ifinfo[ifp->if_index]; /* upper bound */ if (ndi->maxmtu) { if (mtu <= ndi->maxmtu) { int change = (ndi->linkmtu != mtu); ndi->linkmtu = mtu; + lck_rw_done(nd_if_rwlock); if (change) /* in6_maxmtu may change */ in6_setmaxmtu(); } else { @@ -420,13 +434,16 @@ nd6_ra_input( "exceeds maxmtu %d, ignoring\n", mtu, ip6_sprintf(&ip6->ip6_src), ndi->maxmtu)); + lck_rw_done(nd_if_rwlock); } } else { + lck_rw_done(nd_if_rwlock); nd6log((LOG_INFO, "nd6_ra_input: mtu option " "mtu=%d sent from %s; maxmtu unknown, " "ignoring\n", mtu, ip6_sprintf(&ip6->ip6_src))); } + ndi = NULL; } skip: @@ -481,18 +498,22 @@ nd6_rtmsg(cmd, rt) struct rtentry *rt; { struct rt_addrinfo info; + struct ifnet *ifp = rt->rt_ifp; - lck_mtx_assert(rt_mtx, LCK_MTX_ASSERT_OWNED); + RT_LOCK_ASSERT_HELD(rt); bzero((caddr_t)&info, sizeof(info)); + /* Lock ifp for if_addrlist */ + ifnet_lock_shared(ifp); info.rti_info[RTAX_DST] = rt_key(rt); info.rti_info[RTAX_GATEWAY] = rt->rt_gateway; info.rti_info[RTAX_NETMASK] = rt_mask(rt); info.rti_info[RTAX_IFP] = - TAILQ_FIRST(&rt->rt_ifp->if_addrlist)->ifa_addr; + TAILQ_FIRST(&ifp->if_addrlist)->ifa_addr; info.rti_info[RTAX_IFA] = rt->rt_ifa->ifa_addr; rt_missmsg(cmd, &info, rt->rt_flags, 0); + ifnet_lock_done(ifp); } void @@ -511,15 +532,15 @@ defrouter_addreq( def.sin6_family = mask.sin6_family = gate.sin6_family = AF_INET6; gate.sin6_addr = new->rtaddr; - lck_mtx_lock(rt_mtx); - (void)rtrequest_locked(RTM_ADD, (struct sockaddr *)&def, - (struct sockaddr *)&gate, (struct sockaddr *)&mask, - RTF_GATEWAY, &newrt); + (void) rtrequest(RTM_ADD, (struct sockaddr *)&def, + (struct sockaddr *)&gate, (struct sockaddr *)&mask, + RTF_GATEWAY, &newrt); if (newrt) { + RT_LOCK(newrt); nd6_rtmsg(RTM_ADD, newrt); /* tell user process */ - rtunref(newrt); + RT_REMREF_LOCKED(newrt); + RT_UNLOCK(newrt); } - lck_mtx_unlock(rt_mtx); return; } @@ -532,7 +553,7 @@ defrouter_addifreq( struct ifaddr *ifa = NULL; struct rtentry *newrt = NULL; int error; - u_long flags; + u_int32_t flags; bzero(&def, sizeof(def)); bzero(&mask, sizeof(mask)); @@ -552,26 +573,23 @@ defrouter_addifreq( return; } - lck_mtx_lock(rt_mtx); flags = ifa->ifa_flags; - error = rtrequest_locked(RTM_ADD, (struct sockaddr *)&def, ifa->ifa_addr, - (struct sockaddr *)&mask, flags, &newrt); + error = rtrequest(RTM_ADD, (struct sockaddr *)&def, ifa->ifa_addr, + (struct sockaddr *)&mask, flags, &newrt); if (error != 0) { nd6log((LOG_ERR, "defrouter_addifreq: failed to install a route to " "interface %s (errno = %d)\n", if_name(ifp), error)); - - if (newrt) /* maybe unnecessary, but do it for safety */ - rtunref(newrt); } else { if (newrt) { + RT_LOCK(newrt); nd6_rtmsg(RTM_ADD, newrt); - rtunref(newrt); + RT_REMREF_LOCKED(newrt); + RT_UNLOCK(newrt); } in6_post_msg(ifp, KEV_INET6_DEFROUTER, (struct in6_ifaddr *)ifa); } - lck_mtx_unlock(rt_mtx); ifafree(ifa); } @@ -611,19 +629,18 @@ defrouter_delreq( def.sin6_family = mask.sin6_family = gate.sin6_family = AF_INET6; gate.sin6_addr = dr->rtaddr; - lck_mtx_lock(rt_mtx); - rtrequest_locked(RTM_DELETE, (struct sockaddr *)&def, - (struct sockaddr *)&gate, - (struct sockaddr *)&mask, - RTF_GATEWAY, &oldrt); + (void) rtrequest(RTM_DELETE, (struct sockaddr *)&def, + (struct sockaddr *)&gate, (struct sockaddr *)&mask, + RTF_GATEWAY, &oldrt); if (oldrt) { + RT_LOCK(oldrt); nd6_rtmsg(RTM_DELETE, oldrt); - rtfree_locked(oldrt); + RT_UNLOCK(oldrt); + rtfree(oldrt); } if (dofree) /* XXX: necessary? */ FREE(dr, M_IP6NDP); - lck_mtx_unlock(rt_mtx); } void @@ -631,16 +648,17 @@ defrtrlist_del( struct nd_defrouter *dr, int nd6locked) { struct nd_defrouter *deldr = NULL; - struct nd_ifinfo *ndi = &nd_ifinfo[dr->ifp->if_index]; struct nd_prefix *pr; + struct ifnet *ifp = dr->ifp; /* * Flush all the routing table entries that use the router * as a next hop. */ - if (!ip6_forwarding && (ip6_accept_rtadv || (dr->ifp->if_eflags & IFEF_ACCEPT_RTADVD))) { + if (!ip6_forwarding && + (ip6_accept_rtadv || (ifp->if_eflags & IFEF_ACCEPT_RTADVD))) { /* above is a good condition? */ - rt6_flush(&dr->rtaddr, dr->ifp); + rt6_flush(&dr->rtaddr, ifp); } if (nd6locked == 0) @@ -668,11 +686,16 @@ defrtrlist_del( if (deldr) defrouter_select(); - ndi->ndefrouters--; - if (ndi->ndefrouters < 0) { - log(LOG_WARNING, "defrtrlist_del: negative count on %s\n", - if_name(dr->ifp)); + lck_rw_lock_shared(nd_if_rwlock); + if (ifp->if_index < nd_ifinfo_indexlim) { + struct nd_ifinfo *ndi = &nd_ifinfo[ifp->if_index]; + ndi->ndefrouters--; + if (ndi->ndefrouters < 0) { + log(LOG_WARNING, "defrtrlist_del: negative " + "count on %s\n", if_name(ifp)); + } } + lck_rw_done(nd_if_rwlock); if (nd6locked == 0) lck_mtx_unlock(nd6_mutex); @@ -704,13 +727,20 @@ defrouter_select() for (dr = TAILQ_FIRST(&nd_defrouter); dr; dr = TAILQ_NEXT(dr, dr_entry)) { - if ((rt = nd6_lookup(&dr->rtaddr, 0, dr->ifp, 0)) && - (ln = (struct llinfo_nd6 *)rt->rt_llinfo) && - ND6_IS_LLINFO_PROBREACH(ln)) { - /* Got it, and move it to the head */ - TAILQ_REMOVE(&nd_defrouter, dr, dr_entry); - TAILQ_INSERT_HEAD(&nd_defrouter, dr, dr_entry); - break; + /* Callee returns a locked route upon success */ + if ((rt = nd6_lookup(&dr->rtaddr, 0, dr->ifp, 0)) != NULL) { + RT_LOCK_ASSERT_HELD(rt); + if ((ln = rt->rt_llinfo) != NULL && + ND6_IS_LLINFO_PROBREACH(ln)) { + RT_REMREF_LOCKED(rt); + RT_UNLOCK(rt); + /* Got it, and move it to the head */ + TAILQ_REMOVE(&nd_defrouter, dr, dr_entry); + TAILQ_INSERT_HEAD(&nd_defrouter, dr, dr_entry); + break; + } + RT_REMREF_LOCKED(rt); + RT_UNLOCK(rt); } } @@ -767,10 +797,11 @@ defrtrlist_update( struct nd_defrouter *new) { struct nd_defrouter *dr, *n; - struct nd_ifinfo *ndi = &nd_ifinfo[new->ifp->if_index]; + struct ifnet *ifp = new->ifp; + struct nd_ifinfo *ndi; lck_mtx_lock(nd6_mutex); - if ((dr = defrouter_lookup(&new->rtaddr, new->ifp)) != NULL) { + if ((dr = defrouter_lookup(&new->rtaddr, ifp)) != NULL) { /* entry exists */ if (new->rtlifetime == 0) { defrtrlist_del(dr, 1); @@ -791,17 +822,27 @@ defrtrlist_update( return(NULL); } + n = (struct nd_defrouter *)_MALLOC(sizeof(*n), M_IP6NDP, M_NOWAIT); + if (n == NULL) { + lck_mtx_unlock(nd6_mutex); + return(NULL); + } + + lck_rw_lock_shared(nd_if_rwlock); + if (ifp->if_index >= nd_ifinfo_indexlim) + goto freeit; + ndi = &nd_ifinfo[ifp->if_index]; if (ip6_maxifdefrouters >= 0 && ndi->ndefrouters >= ip6_maxifdefrouters) { +freeit: + lck_rw_done(nd_if_rwlock); lck_mtx_unlock(nd6_mutex); + FREE(n, M_IP6NDP); return (NULL); } + ndi->ndefrouters++; + lck_rw_done(nd_if_rwlock); - n = (struct nd_defrouter *)_MALLOC(sizeof(*n), M_IP6NDP, M_NOWAIT); - if (n == NULL) { - lck_mtx_unlock(nd6_mutex); - return(NULL); - } bzero(n, sizeof(*n)); *n = *new; @@ -813,9 +854,7 @@ defrtrlist_update( TAILQ_INSERT_TAIL(&nd_defrouter, n, dr_entry); if (TAILQ_FIRST(&nd_defrouter) == n) defrouter_select(); - - ndi->ndefrouters++; - + lck_mtx_unlock(nd6_mutex); return(n); } @@ -927,7 +966,7 @@ purge_detached(struct ifnet *ifp) struct nd_prefix *pr, *pr_next; struct in6_ifaddr *ia; struct ifaddr *ifa, *ifa_next; - + lck_mtx_lock(nd6_mutex); for (pr = nd_prefix.lh_first; pr; pr = pr_next) { @@ -937,7 +976,8 @@ purge_detached(struct ifnet *ifp) ((pr->ndpr_stateflags & NDPRF_DETACHED) == 0 && !LIST_EMPTY(&pr->ndpr_advrtrs))) continue; - +repeat: + ifnet_lock_shared(ifp); for (ifa = ifp->if_addrlist.tqh_first; ifa; ifa = ifa_next) { ifa_next = ifa->ifa_list.tqe_next; if (ifa->ifa_addr->sa_family != AF_INET6) @@ -945,9 +985,19 @@ purge_detached(struct ifnet *ifp) ia = (struct in6_ifaddr *)ifa; if ((ia->ia6_flags & IN6_IFF_AUTOCONF) == IN6_IFF_AUTOCONF && ia->ia6_ndpr == pr) { + ifaref(ifa); + /* + * Purging the address requires writer access + * to the address list, so drop the ifnet lock + * now and repeat from beginning. + */ + ifnet_lock_done(ifp); in6_purgeaddr(ifa, 1); + ifafree(ifa); + goto repeat; } } + ifnet_lock_done(ifp); if (pr->ndpr_refcnt == 0) prelist_remove(pr, 1); } @@ -962,14 +1012,33 @@ nd6_prelist_add( struct nd_prefix **newp) { struct nd_prefix *new = NULL; + struct ifnet *ifp = pr->ndpr_ifp; + struct nd_ifinfo *ndi = NULL; int i; - struct nd_ifinfo *ndi = &nd_ifinfo[pr->ndpr_ifp->if_index]; if (ip6_maxifprefixes >= 0) { - if (ndi->nprefixes >= ip6_maxifprefixes / 2) - purge_detached(pr->ndpr_ifp); - if (ndi->nprefixes >= ip6_maxifprefixes) + lck_rw_lock_shared(nd_if_rwlock); + if (ifp->if_index >= nd_ifinfo_indexlim) { + lck_rw_done(nd_if_rwlock); + return (EINVAL); + } + ndi = &nd_ifinfo[ifp->if_index]; + if (ndi->nprefixes >= ip6_maxifprefixes / 2) { + lck_rw_done(nd_if_rwlock); + purge_detached(ifp); + lck_rw_lock_shared(nd_if_rwlock); + /* + * Refresh pointer since nd_ifinfo[] may have grown; + * repeating the bounds check against nd_ifinfo_indexlim + * isn't necessary since the array never shrinks. + */ + ndi = &nd_ifinfo[ifp->if_index]; + } + if (ndi->nprefixes >= ip6_maxifprefixes) { + lck_rw_done(nd_if_rwlock); return(ENOMEM); + } + lck_rw_done(nd_if_rwlock); } new = (struct nd_prefix *)_MALLOC(sizeof(*new), M_IP6NDP, M_NOWAIT); @@ -1003,7 +1072,7 @@ nd6_prelist_add( nd6log((LOG_ERR, "nd6_prelist_add: failed to make " "the prefix %s/%d on-link on %s (errno=%d)\n", ip6_sprintf(&pr->ndpr_prefix.sin6_addr), - pr->ndpr_plen, if_name(pr->ndpr_ifp), e)); + pr->ndpr_plen, if_name(ifp), e)); /* proceed anyway. XXX: is it correct? */ } } @@ -1012,7 +1081,15 @@ nd6_prelist_add( pfxrtr_add(new, dr); } + lck_rw_lock_shared(nd_if_rwlock); + /* + * Refresh pointer since nd_ifinfo[] may have grown; + * repeating the bounds check against nd_ifinfo_indexlim + * isn't necessary since the array never shrinks. + */ + ndi = &nd_ifinfo[ifp->if_index]; ndi->nprefixes++; + lck_rw_done(nd_if_rwlock); lck_mtx_unlock(nd6_mutex); @@ -1024,8 +1101,8 @@ prelist_remove( struct nd_prefix *pr, int nd6locked) { struct nd_pfxrouter *pfr, *next; + struct ifnet *ifp = pr->ndpr_ifp; int e; - struct nd_ifinfo *ndi = &nd_ifinfo[pr->ndpr_ifp->if_index]; /* make sure to invalidate the prefix until it is really freed. */ pr->ndpr_vltime = 0; @@ -1043,7 +1120,7 @@ prelist_remove( nd6log((LOG_ERR, "prelist_remove: failed to make %s/%d offlink " "on %s, errno=%d\n", ip6_sprintf(&pr->ndpr_prefix.sin6_addr), - pr->ndpr_plen, if_name(pr->ndpr_ifp), e)); + pr->ndpr_plen, if_name(ifp), e)); /* what should we do? */ } @@ -1063,11 +1140,16 @@ prelist_remove( FREE(pfr, M_IP6NDP); } - ndi->nprefixes--; - if (ndi->nprefixes < 0) { - log(LOG_WARNING, "prelist_remove: negative count on %s\n", - if_name(pr->ndpr_ifp)); + lck_rw_lock_shared(nd_if_rwlock); + if (ifp->if_index < nd_ifinfo_indexlim) { + struct nd_ifinfo *ndi = &nd_ifinfo[ifp->if_index]; + ndi->nprefixes--; + if (ndi->nprefixes < 0) { + log(LOG_WARNING, "prelist_remove: negative " + "count on %s\n", if_name(ifp)); + } } + lck_rw_done(nd_if_rwlock); FREE(pr, M_IP6NDP); @@ -1344,13 +1426,15 @@ prelist_update( */ if (ip6_use_tempaddr) { int e; - if ((e = in6_tmpifadd(ia6, 1)) != 0) { + if ((e = in6_tmpifadd(ia6, 1, M_NOWAIT)) != 0) { nd6log((LOG_NOTICE, "prelist_update: " "failed to create a temporary " "address, errno=%d\n", e)); } } + ifafree(&ia6->ia_ifa); + ia6 = NULL; /* * A newly added address might affect the status @@ -1390,11 +1474,19 @@ find_pfxlist_reachable_router( for (pfxrtr = LIST_FIRST(&pr->ndpr_advrtrs); pfxrtr; pfxrtr = LIST_NEXT(pfxrtr, pfr_entry)) { + /* Callee returns a locked route upon success */ if ((rt = nd6_lookup(&pfxrtr->router->rtaddr, 0, - pfxrtr->router->ifp, 0)) && - (ln = (struct llinfo_nd6 *)rt->rt_llinfo) && - ND6_IS_LLINFO_PROBREACH(ln)) - break; /* found */ + pfxrtr->router->ifp, 0)) != NULL) { + RT_LOCK_ASSERT_HELD(rt); + if ((ln = rt->rt_llinfo) != NULL && + ND6_IS_LLINFO_PROBREACH(ln)) { + RT_REMREF_LOCKED(rt); + RT_UNLOCK(rt); + break; /* found */ + } + RT_REMREF_LOCKED(rt); + RT_UNLOCK(rt); + } } return(pfxrtr); @@ -1569,7 +1661,7 @@ nd6_prefix_onlink( struct ifnet *ifp = pr->ndpr_ifp; struct sockaddr_in6 mask6; struct nd_prefix *opr; - u_long rtflags; + u_int32_t rtflags; int error = 0; struct rtentry *rt = NULL; @@ -1626,6 +1718,8 @@ nd6_prefix_onlink( if (ifa->ifa_addr->sa_family == AF_INET6) break; } + if (ifa != NULL) + ifaref(ifa); ifnet_lock_done(ifp); /* should we care about ia6_flags? */ } @@ -1653,7 +1747,7 @@ nd6_prefix_onlink( mask6.sin6_addr = pr->ndpr_mask; if (rtlocked == 0) - lck_mtx_lock(rt_mtx); + lck_mtx_lock(rnh_lock); rtflags = ifa->ifa_flags | RTF_CLONING | RTF_UP; if (nd6_need_cache(ifp)) { @@ -1669,8 +1763,11 @@ nd6_prefix_onlink( ifa->ifa_addr, (struct sockaddr *)&mask6, rtflags, &rt); if (error == 0) { - if (rt != NULL) /* this should be non NULL, though */ + if (rt != NULL) { /* this should be non NULL, though */ + RT_LOCK(rt); nd6_rtmsg(RTM_ADD, rt); + RT_UNLOCK(rt); + } pr->ndpr_stateflags |= NDPRF_ONLINK; } else { @@ -1684,10 +1781,13 @@ nd6_prefix_onlink( } if (rt != NULL) - rtunref(rt); + RT_REMREF(rt); if (rtlocked == 0) - lck_mtx_unlock(rt_mtx); + lck_mtx_unlock(rnh_lock); + + ifafree(ifa); + return(error); } @@ -1718,15 +1818,18 @@ nd6_prefix_offlink( mask6.sin6_family = AF_INET6; mask6.sin6_len = sizeof(sa6); bcopy(&pr->ndpr_mask, &mask6.sin6_addr, sizeof(struct in6_addr)); - lck_mtx_lock(rt_mtx); + lck_mtx_lock(rnh_lock); error = rtrequest_locked(RTM_DELETE, (struct sockaddr *)&sa6, NULL, (struct sockaddr *)&mask6, 0, &rt); if (error == 0) { pr->ndpr_stateflags &= ~NDPRF_ONLINK; /* report the route deletion to the routing socket. */ - if (rt != NULL) + if (rt != NULL) { + RT_LOCK(rt); nd6_rtmsg(RTM_DELETE, rt); + RT_UNLOCK(rt); + } /* * There might be the same prefix on another interface, @@ -1780,7 +1883,7 @@ nd6_prefix_offlink( if (rt != NULL) rtfree_locked(rt); - lck_mtx_unlock(rt_mtx); + lck_mtx_unlock(rnh_lock); return(error); } @@ -1834,6 +1937,7 @@ in6_ifadd( /* if link-local address is not eligible, do not autoconfigure. */ if (((struct in6_ifaddr *)ifa)->ia6_flags & IN6_IFF_NOTREADY) { printf("in6_ifadd: link-local address not ready\n"); + ifafree(ifa); return NULL; } #endif @@ -1844,6 +1948,7 @@ in6_ifadd( nd6log((LOG_INFO, "in6_ifadd: wrong prefixlen for %s " "(prefix=%d ifid=%d)\n", if_name(ifp), prefixlen, 128 - plen0)); + ifafree(ifa); return NULL; } @@ -1908,8 +2013,11 @@ in6_ifadd( */ pr->ndpr_addr = ifra.ifra_addr.sin6_addr; + ifafree(ifa); + ifa = NULL; + /* allocate ifaddr structure, link into chain, etc. */ - if ((error = in6_update_ifa(ifp, &ifra, NULL)) != 0) { + if ((error = in6_update_ifa(ifp, &ifra, NULL, M_NOWAIT)) != 0) { nd6log((LOG_ERR, "in6_ifadd: failed to make ifaddr %s on %s (errno=%d)\n", ip6_sprintf(&ifra.ifra_addr.sin6_addr), if_name(ifp), @@ -1927,10 +2035,11 @@ in6_ifadd( int in6_tmpifadd( const struct in6_ifaddr *ia0, /* corresponding public address */ - int forcegen) + int forcegen, + int how) { struct ifnet *ifp = ia0->ia_ifa.ifa_ifp; - struct in6_ifaddr *newia; + struct in6_ifaddr *ia, *newia; struct in6_aliasreq ifra; int i, error; int trylimit = 3; /* XXX: adhoc value */ @@ -1966,7 +2075,8 @@ in6_tmpifadd( * interface identifier and repeat this step. * RFC 3041 3.3 (4). */ - if (in6ifa_ifpwithaddr(ifp, &ifra.ifra_addr.sin6_addr) != NULL) { + if ((ia = in6ifa_ifpwithaddr(ifp, &ifra.ifra_addr.sin6_addr)) != NULL) { + ifafree(&ia->ia_ifa); if (trylimit-- == 0) { nd6log((LOG_NOTICE, "in6_tmpifadd: failed to find " "a unique random IFID\n")); @@ -2014,7 +2124,7 @@ in6_tmpifadd( ifra.ifra_flags |= (IN6_IFF_AUTOCONF|IN6_IFF_TEMPORARY); /* allocate ifaddr structure, link into chain, etc. */ - if ((error = in6_update_ifa(ifp, &ifra, NULL)) != 0) + if ((error = in6_update_ifa(ifp, &ifra, NULL, how)) != 0) return(error); newia = in6ifa_ifpwithaddr(ifp, &ifra.ifra_addr.sin6_addr); @@ -2027,7 +2137,6 @@ in6_tmpifadd( lck_mtx_lock(nd6_mutex); newia->ia6_ndpr = ia0->ia6_ndpr; newia->ia6_ndpr->ndpr_refcnt++; - /* * A newly added address might affect the status of other addresses. * XXX: when the temporary address is generated with a new public @@ -2038,6 +2147,7 @@ in6_tmpifadd( */ pfxlist_onlink_check(1); lck_mtx_unlock(nd6_mutex); + ifafree(&newia->ia_ifa); return(0); } @@ -2107,12 +2217,12 @@ rt6_flush( if (!IN6_IS_ADDR_LINKLOCAL(gateway)) { return; } - lck_mtx_lock(rt_mtx); + lck_mtx_lock(rnh_lock); /* XXX: hack for KAME's link-local address kludge */ gateway->s6_addr16[1] = htons(ifp->if_index); rnh->rnh_walktree(rnh, rt6_deleteroute, (void *)gateway); - lck_mtx_unlock(rt_mtx); + lck_mtx_unlock(rnh_lock); } static int @@ -2124,31 +2234,44 @@ rt6_deleteroute( struct rtentry *rt = (struct rtentry *)rn; struct in6_addr *gate = (struct in6_addr *)arg; - lck_mtx_assert(rt_mtx, LCK_MTX_ASSERT_OWNED); + lck_mtx_assert(rnh_lock, LCK_MTX_ASSERT_OWNED); - if (rt->rt_gateway == NULL || rt->rt_gateway->sa_family != AF_INET6) + RT_LOCK(rt); + if (rt->rt_gateway == NULL || rt->rt_gateway->sa_family != AF_INET6) { + RT_UNLOCK(rt); return(0); + } - if (!IN6_ARE_ADDR_EQUAL(gate, &SIN6(rt->rt_gateway)->sin6_addr)) + if (!IN6_ARE_ADDR_EQUAL(gate, &SIN6(rt->rt_gateway)->sin6_addr)) { + RT_UNLOCK(rt); return(0); - + } /* * Do not delete a static route. * XXX: this seems to be a bit ad-hoc. Should we consider the * 'cloned' bit instead? */ - if ((rt->rt_flags & RTF_STATIC) != 0) + if ((rt->rt_flags & RTF_STATIC) != 0) { + RT_UNLOCK(rt); return(0); - + } /* * We delete only host route. This means, in particular, we don't * delete default route. */ - if ((rt->rt_flags & RTF_HOST) == 0) + if ((rt->rt_flags & RTF_HOST) == 0) { + RT_UNLOCK(rt); return(0); + } - return(rtrequest_locked(RTM_DELETE, rt_key(rt), - rt->rt_gateway, rt_mask(rt), rt->rt_flags, 0)); + /* + * Safe to drop rt_lock and use rt_key, rt_gateway, since holding + * rnh_lock here prevents another thread from calling rt_setgate() + * on this route. + */ + RT_UNLOCK(rt); + return (rtrequest_locked(RTM_DELETE, rt_key(rt), rt->rt_gateway, + rt_mask(rt), rt->rt_flags, 0)); #undef SIN6 } @@ -2157,15 +2280,21 @@ nd6_setdefaultiface( int ifindex) { int error = 0; + ifnet_t def_ifp = NULL; - if (ifindex < 0 || if_index < ifindex) + ifnet_head_lock_shared(); + if (ifindex < 0 || if_index < ifindex) { + ifnet_head_done(); return(EINVAL); + } + def_ifp = ifindex2ifnet[ifindex]; + ifnet_head_done(); lck_mtx_lock(nd6_mutex); if (nd6_defifindex != ifindex) { nd6_defifindex = ifindex; if (nd6_defifindex > 0) - nd6_defifp = ifindex2ifnet[nd6_defifindex]; + nd6_defifp = def_ifp; else nd6_defifp = NULL; diff --git a/bsd/netinet6/pim6_var.h b/bsd/netinet6/pim6_var.h index c90b9bdde..050f8e4c8 100644 --- a/bsd/netinet6/pim6_var.h +++ b/bsd/netinet6/pim6_var.h @@ -68,5 +68,5 @@ struct pim6stat { int pim6_input(struct mbuf **, int*); -#endif KERNEL_PRIVATE -#endif _NETINET6_PIM6_VAR_H_ +#endif /* KERNEL_PRIVATE */ +#endif /* _NETINET6_PIM6_VAR_H_ */ diff --git a/bsd/netinet6/raw_ip6.c b/bsd/netinet6/raw_ip6.c index e84171ae6..8fb3931cb 100644 --- a/bsd/netinet6/raw_ip6.c +++ b/bsd/netinet6/raw_ip6.c @@ -1,3 +1,30 @@ +/* + * Copyright (c) 2008 Apple Inc. All rights reserved. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. The rights granted to you under the License + * may not be used to create, or enable the creation or redistribution of, + * unlawful or unlicensed copies of an Apple operating system, or to + * circumvent, violate, or enable the circumvention or violation of, any + * terms of an Apple operating system software license agreement. + * + * Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ + */ /* * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project. * All rights reserved. @@ -109,9 +136,8 @@ extern int ipsec_bypass; extern struct inpcbhead ripcb; extern struct inpcbinfo ripcbinfo; -extern u_long rip_sendspace; -extern u_long rip_recvspace; -extern u_long route_generation; +extern u_int32_t rip_sendspace; +extern u_int32_t rip_recvspace; struct rip6stat rip6stat; @@ -330,6 +356,10 @@ rip6_output( } M_PREPEND(m, sizeof(*ip6), M_WAIT); + if (m == NULL) { + error = ENOBUFS; + goto bad; + } ip6 = mtod(m, struct ip6_hdr *); /* @@ -350,6 +380,7 @@ rip6_output( * XXX Boundary check is assumed to be already done in * ip6_setpktoptions(). */ + ifnet_head_lock_shared(); if (optp && (pi = optp->ip6po_pktinfo) && pi->ipi6_ifindex) { ip6->ip6_dst.s6_addr16[1] = htons(pi->ipi6_ifindex); oifp = ifindex2ifnet[pi->ipi6_ifindex]; @@ -359,15 +390,21 @@ rip6_output( oifp = in6p->in6p_moptions->im6o_multicast_ifp; ip6->ip6_dst.s6_addr16[1] = htons(oifp->if_index); } else if (dstsock->sin6_scope_id) { - /* boundary check */ - if (dstsock->sin6_scope_id < 0 - || if_index < dstsock->sin6_scope_id) { + /* + * boundary check + * + * Sinced stsock->sin6_scope_id is unsigned, we don't + * need to check if it's < 0 + */ + if (if_index < dstsock->sin6_scope_id) { error = ENXIO; /* XXX EINVAL? */ + ifnet_head_done(); goto bad; } ip6->ip6_dst.s6_addr16[1] = htons(dstsock->sin6_scope_id & 0xffff);/*XXX*/ } + ifnet_head_done(); } /* @@ -376,7 +413,7 @@ rip6_output( { struct in6_addr *in6a; struct in6_addr storage; - + u_short index = 0; if ((in6a = in6_selectsrc(dstsock, optp, in6p->in6p_moptions, &in6p->in6p_route, @@ -387,8 +424,18 @@ rip6_output( goto bad; } ip6->ip6_src = *in6a; - if (in6p->in6p_route.ro_rt) - oifp = ifindex2ifnet[in6p->in6p_route.ro_rt->rt_ifp->if_index]; + if (in6p->in6p_route.ro_rt != NULL) { + RT_LOCK(in6p->in6p_route.ro_rt); + if (in6p->in6p_route.ro_rt->rt_ifp != NULL) + index = in6p->in6p_route.ro_rt->rt_ifp->if_index; + RT_UNLOCK(in6p->in6p_route.ro_rt); + ifnet_head_lock_shared(); + if (index == 0 || if_index < index) { + panic("bad if_index on interface from route"); + } + oifp = ifindex2ifnet[index]; + ifnet_head_done(); + } } ip6->ip6_flow = (ip6->ip6_flow & ~IPV6_FLOWINFO_MASK) | (in6p->in6p_flowinfo & IPV6_FLOWINFO_MASK); @@ -434,9 +481,10 @@ rip6_output( } #endif /*IPSEC*/ - if (in6p->in6p_route.ro_rt && in6p->in6p_route.ro_rt->generation_id != route_generation) { + if (in6p->in6p_route.ro_rt != NULL && + in6p->in6p_route.ro_rt->generation_id != route_generation) { rtfree(in6p->in6p_route.ro_rt); - in6p->in6p_route.ro_rt = (struct rtentry *)0; + in6p->in6p_route.ro_rt = NULL; } error = ip6_output(m, optp, &in6p->in6p_route, 0, @@ -556,7 +604,7 @@ rip6_ctloutput( } static int -rip6_attach(struct socket *so, int proto, __unused struct proc *p) +rip6_attach(struct socket *so, int proto, struct proc *p) { struct inpcb *inp; int error; @@ -564,7 +612,7 @@ rip6_attach(struct socket *so, int proto, __unused struct proc *p) inp = sotoinpcb(so); if (inp) panic("rip6_attach"); - if (p && (error = proc_suser(p)) != 0) + if ((error = proc_suser(p)) != 0) return error; error = soreserve(so, rip_sendspace, rip_recvspace); @@ -575,7 +623,7 @@ rip6_attach(struct socket *so, int proto, __unused struct proc *p) return error; inp = (struct inpcb *)so->so_pcb; inp->inp_vflag |= INP_IPV6; - inp->in6p_ip6_nxt = (long)proto; + inp->in6p_ip6_nxt = (char)proto; inp->in6p_hops = -1; /* use kernel default */ inp->in6p_cksum = -1; MALLOC(inp->in6p_icmp6filt, struct icmp6_filter *, diff --git a/bsd/netinet6/raw_ip6.h b/bsd/netinet6/raw_ip6.h index 30cf70e60..608e1366c 100644 --- a/bsd/netinet6/raw_ip6.h +++ b/bsd/netinet6/raw_ip6.h @@ -1,3 +1,31 @@ +/* + * Copyright (c) 2008 Apple Inc. All rights reserved. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. The rights granted to you under the License + * may not be used to create, or enable the creation or redistribution of, + * unlawful or unlicensed copies of an Apple operating system, or to + * circumvent, violate, or enable the circumvention or violation of, any + * terms of an Apple operating system software license agreement. + * + * Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ + */ + /* $FreeBSD: src/sys/netinet6/raw_ip6.h,v 1.1.2.1 2001/07/03 11:01:55 ume Exp $ */ /* $KAME: raw_ip6.h,v 1.2 2001/05/27 13:28:35 itojun Exp $ */ @@ -48,6 +76,7 @@ struct rip6stat { u_quad_t rip6s_opackets; /* total output packets */ }; +#ifdef KERNEL_PRIVATE extern struct rip6stat rip6stat; - +#endif /* KERNEL_PRIVATE */ #endif diff --git a/bsd/netinet6/scope6.c b/bsd/netinet6/scope6.c index 76640cc32..70e90dfa9 100644 --- a/bsd/netinet6/scope6.c +++ b/bsd/netinet6/scope6.c @@ -45,6 +45,8 @@ #include #include +extern lck_mtx_t *scope6_mutex; + struct scope6_id { /* * 16 is correspondent to 4bit multicast scope field. @@ -52,10 +54,10 @@ struct scope6_id { */ u_int32_t s6id_list[16]; }; -static size_t if_indexlim = 8; +static size_t if_scope_indexlim = 8; struct scope6_id *scope6_ids = NULL; -void +int scope6_ifattach( struct ifnet *ifp) { @@ -63,17 +65,24 @@ scope6_ifattach( * We have some arrays that should be indexed by if_index. * since if_index will grow dynamically, they should grow too. */ - if (scope6_ids == NULL || if_index >= if_indexlim) { + lck_mtx_lock(scope6_mutex); + if (scope6_ids == NULL || if_index >= if_scope_indexlim) { size_t n; caddr_t q; + int newlim = if_scope_indexlim; - while (if_index >= if_indexlim) - if_indexlim <<= 1; + while (if_index >= newlim) + newlim <<= 1; /* grow scope index array */ - n = if_indexlim * sizeof(struct scope6_id); + n = newlim * sizeof(struct scope6_id); /* XXX: need new malloc type? */ q = (caddr_t)_MALLOC(n, M_IFADDR, M_WAITOK); + if (q == NULL) { + lck_mtx_unlock(scope6_mutex); + return ENOBUFS; + } + if_scope_indexlim = newlim; bzero(q, n); if (scope6_ids) { bcopy((caddr_t)scope6_ids, q, n/2); @@ -86,7 +95,8 @@ scope6_ifattach( /* don't initialize if called twice */ if (SID.s6id_list[IPV6_ADDR_SCOPE_LINKLOCAL]) { - return; + lck_mtx_unlock(scope6_mutex); + return 0; } /* @@ -100,6 +110,9 @@ scope6_ifattach( SID.s6id_list[IPV6_ADDR_SCOPE_ORGLOCAL] = 1; #endif #undef SID + lck_mtx_unlock(scope6_mutex); + + return 0; } int @@ -123,6 +136,7 @@ scope6_set( * interface addresses, routing table entries, PCB entries... */ + lck_mtx_lock(scope6_mutex); for (i = 0; i < 16; i++) { if (idlist[i] && idlist[i] != scope6_ids[ifp->if_index].s6id_list[i]) { @@ -134,6 +148,7 @@ scope6_set( * IDs, but we check the consistency for * safety in later use. */ + lck_mtx_unlock(scope6_mutex); return(EINVAL); } @@ -145,6 +160,7 @@ scope6_set( scope6_ids[ifp->if_index].s6id_list[i] = idlist[i]; } } + lck_mtx_unlock(scope6_mutex); return(error); } @@ -157,8 +173,10 @@ scope6_get( if (scope6_ids == NULL) /* paranoid? */ return(EINVAL); + lck_mtx_lock(scope6_mutex); bcopy(scope6_ids[ifp->if_index].s6id_list, idlist, sizeof(scope6_ids[ifp->if_index].s6id_list)); + lck_mtx_unlock(scope6_mutex); return(0); } @@ -230,30 +248,38 @@ in6_addr2scopeid( { int scope = in6_addrscope(addr); int index = ifp->if_index; + int retid = 0; if (scope6_ids == NULL) /* paranoid? */ return(0); /* XXX */ - if (index >= if_indexlim) + + lck_mtx_lock(scope6_mutex); + if (index >= if_scope_indexlim) { + lck_mtx_unlock(scope6_mutex); return(0); /* XXX */ + } #define SID scope6_ids[index] switch(scope) { case IPV6_ADDR_SCOPE_NODELOCAL: - return(-1); /* XXX: is this an appropriate value? */ - + retid = -1; /* XXX: is this an appropriate value? */ + break; case IPV6_ADDR_SCOPE_LINKLOCAL: - return(SID.s6id_list[IPV6_ADDR_SCOPE_LINKLOCAL]); - + retid=SID.s6id_list[IPV6_ADDR_SCOPE_LINKLOCAL]; + break; case IPV6_ADDR_SCOPE_SITELOCAL: - return(SID.s6id_list[IPV6_ADDR_SCOPE_SITELOCAL]); - + retid=SID.s6id_list[IPV6_ADDR_SCOPE_SITELOCAL]; + break; case IPV6_ADDR_SCOPE_ORGLOCAL: - return(SID.s6id_list[IPV6_ADDR_SCOPE_ORGLOCAL]); - + retid=SID.s6id_list[IPV6_ADDR_SCOPE_ORGLOCAL]; + break; default: - return(0); /* XXX: treat as global. */ + break; /* XXX: value 0, treat as global. */ } #undef SID + + lck_mtx_unlock(scope6_mutex); + return retid; } void @@ -266,12 +292,14 @@ scope6_setdefault( * We might eventually have to separate the notion of "link" from * "interface" and provide a user interface to set the default. */ + lck_mtx_lock(scope6_mutex); if (ifp) { scope6_ids[0].s6id_list[IPV6_ADDR_SCOPE_LINKLOCAL] = ifp->if_index; } else scope6_ids[0].s6id_list[IPV6_ADDR_SCOPE_LINKLOCAL] = 0; + lck_mtx_unlock(scope6_mutex); } int @@ -281,8 +309,10 @@ scope6_get_default( if (scope6_ids == NULL) /* paranoid? */ return(EINVAL); + lck_mtx_lock(scope6_mutex); bcopy(scope6_ids[0].s6id_list, idlist, sizeof(scope6_ids[0].s6id_list)); + lck_mtx_unlock(scope6_mutex); return(0); } @@ -291,5 +321,10 @@ u_int32_t scope6_addr2default( struct in6_addr *addr) { - return(scope6_ids[0].s6id_list[in6_addrscope(addr)]); + u_int32_t id = 0; + int index = in6_addrscope(addr); + lck_mtx_lock(scope6_mutex); + id = scope6_ids[0].s6id_list[index]; + lck_mtx_unlock(scope6_mutex); + return (id); } diff --git a/bsd/netinet6/scope6_var.h b/bsd/netinet6/scope6_var.h index d7fd15e77..2b3a9954a 100644 --- a/bsd/netinet6/scope6_var.h +++ b/bsd/netinet6/scope6_var.h @@ -35,13 +35,13 @@ #include #ifdef KERNEL_PRIVATE -void scope6_ifattach(struct ifnet *); +int scope6_ifattach(struct ifnet *); int scope6_set(struct ifnet *, u_int32_t *); int scope6_get(struct ifnet *, u_int32_t *); void scope6_setdefault(struct ifnet *); int scope6_get_default(u_int32_t *); u_int32_t scope6_in6_addrscope(struct in6_addr *); u_int32_t scope6_addr2default(struct in6_addr *); -#endif KERNEL_PRIVATE +#endif /* KERNEL_PRIVATE */ -#endif _NETINET6_SCOPE6_VAR_H_ +#endif /* _NETINET6_SCOPE6_VAR_H_ */ diff --git a/bsd/netinet6/tcp6_var.h b/bsd/netinet6/tcp6_var.h index 5b535dda5..9d7c44968 100644 --- a/bsd/netinet6/tcp6_var.h +++ b/bsd/netinet6/tcp6_var.h @@ -84,6 +84,6 @@ struct rtentry *tcp_rtlookup6(struct inpcb *); extern struct pr_usrreqs tcp6_usrreqs; -#endif KERNEL_PRIVATE +#endif /* KERNEL_PRIVATE */ -#endif _NETINET_TCP6_VAR_H_ +#endif /* _NETINET_TCP6_VAR_H_ */ diff --git a/bsd/netinet6/udp6_output.c b/bsd/netinet6/udp6_output.c index 226b199c0..191896a47 100644 --- a/bsd/netinet6/udp6_output.c +++ b/bsd/netinet6/udp6_output.c @@ -1,3 +1,32 @@ +/* + * Copyright (c) 2008 Apple Inc. All rights reserved. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. The rights granted to you under the License + * may not be used to create, or enable the creation or redistribution of, + * unlawful or unlicensed copies of an Apple operating system, or to + * circumvent, violate, or enable the circumvention or violation of, any + * terms of an Apple operating system software license agreement. + * + * Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ + */ + + /* $FreeBSD: src/sys/netinet6/udp6_output.c,v 1.1.2.3 2001/08/31 13:49:58 jlemon Exp $ */ /* $KAME: udp6_output.c,v 1.31 2001/05/21 16:39:15 jinmei Exp $ */ @@ -126,7 +155,7 @@ get_socket_id(struct socket * s) if (s == NULL) { return (0); } - val = (u_int16_t)(((u_int32_t)s) / sizeof(struct socket)); + val = (u_int16_t)(((uintptr_t)s) / sizeof(struct socket)); if (val == 0) { val = 0xffff; } @@ -155,13 +184,8 @@ udp6_output(in6p, m, addr6, control, p) struct sockaddr_in6 tmp; struct in6_addr storage; - priv = 0; -#ifdef __APPLE__ - if (p && !proc_suser(p)) -#else - if (p && !suser(p)) -#endif - priv = 1; + priv = (proc_suser(p) == 0); + if (control) { if ((error = ip6_setpktoptions(control, &opt, priv, 0)) != 0) goto release; diff --git a/bsd/netinet6/udp6_usrreq.c b/bsd/netinet6/udp6_usrreq.c index b97874db6..fed294b90 100644 --- a/bsd/netinet6/udp6_usrreq.c +++ b/bsd/netinet6/udp6_usrreq.c @@ -359,6 +359,11 @@ udp6_input( */ if (reuse_sock == 0 || ((m = n) == NULL)) break; + /* + * Recompute IP and UDP header pointers for new mbuf + */ + ip6 = mtod(m, struct ip6_hdr *); + uh = (struct udphdr *)((caddr_t)ip6 + off); } lck_rw_done(pcbinfo->mtx); diff --git a/bsd/netinet6/udp6_var.h b/bsd/netinet6/udp6_var.h index 49e35cc55..18274d10f 100644 --- a/bsd/netinet6/udp6_var.h +++ b/bsd/netinet6/udp6_var.h @@ -76,6 +76,6 @@ int udp6_input(struct mbuf **, int *); int udp6_output(struct inpcb *inp, struct mbuf *m, struct sockaddr *addr, struct mbuf *control, struct proc *p); -#endif KERNEL_PRIVATE +#endif /* KERNEL_PRIVATE */ -#endif _NETINET6_UDP6_VAR_H_ +#endif /* _NETINET6_UDP6_VAR_H_ */ diff --git a/bsd/netkey/key.c b/bsd/netkey/key.c index 75e405b3c..73a605869 100644 --- a/bsd/netkey/key.c +++ b/bsd/netkey/key.c @@ -1,3 +1,31 @@ +/* + * Copyright (c) 2008 Apple Inc. All rights reserved. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. The rights granted to you under the License + * may not be used to create, or enable the creation or redistribution of, + * unlawful or unlicensed copies of an Apple operating system, or to + * circumvent, violate, or enable the circumvention or violation of, any + * terms of an Apple operating system software license agreement. + * + * Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ + */ + /* $FreeBSD: src/sys/netkey/key.c,v 1.16.2.13 2002/07/24 18:17:40 ume Exp $ */ /* $KAME: key.c,v 1.191 2001/06/27 10:46:49 sakane Exp $ */ @@ -148,7 +176,7 @@ static u_int key_larval_lifetime = 30; /* interval to expire acquiring, 30(s)*/ static int key_blockacq_count = 10; /* counter for blocking SADB_ACQUIRE.*/ static int key_blockacq_lifetime = 20; /* lifetime for blocking SADB_ACQUIRE.*/ static int key_preferred_oldsa = 0; /* preferred old sa rather than new sa.*/ -static int natt_keepalive_interval = 20; /* interval between natt keepalives.*/ +__private_extern__ int natt_keepalive_interval = 20; /* interval between natt keepalives.*/ static int ipsec_policy_count = 0; static int ipsec_sav_count = 0; @@ -209,6 +237,8 @@ static const int minsize[] = { 0, /* SADB_X_EXT_KMPRIVATE */ sizeof(struct sadb_x_policy), /* SADB_X_EXT_POLICY */ sizeof(struct sadb_x_sa2), /* SADB_X_SA2 */ + sizeof(struct sadb_session_id), /* SADB_EXT_SESSION_ID */ + sizeof(struct sadb_sastat), /* SADB_EXT_SASTAT */ }; static const int maxsize[] = { sizeof(struct sadb_msg), /* SADB_EXT_RESERVED */ @@ -231,6 +261,8 @@ static const int maxsize[] = { 0, /* SADB_X_EXT_KMPRIVATE */ 0, /* SADB_X_EXT_POLICY */ sizeof(struct sadb_x_sa2), /* SADB_X_SA2 */ + 0, /* SADB_EXT_SESSION_ID */ + 0, /* SADB_EXT_SASTAT */ }; static int ipsec_esp_keymin = 256; @@ -333,21 +365,21 @@ do { \ #if 1 #define KMALLOC_WAIT(p, t, n) \ - ((p) = (t) _MALLOC((unsigned long)(n), M_SECA, M_WAITOK)) + ((p) = (t) _MALLOC((u_int32_t)(n), M_SECA, M_WAITOK)) #define KMALLOC_NOWAIT(p, t, n) \ - ((p) = (t) _MALLOC((unsigned long)(n), M_SECA, M_NOWAIT)) + ((p) = (t) _MALLOC((u_int32_t)(n), M_SECA, M_NOWAIT)) #define KFREE(p) \ _FREE((caddr_t)(p), M_SECA); #else #define KMALLOC_WAIT(p, t, n) \ do { \ - ((p) = (t)_MALLOC((unsigned long)(n), M_SECA, M_WAITOK)); \ + ((p) = (t)_MALLOC((u_int32_t)(n), M_SECA, M_WAITOK)); \ printf("%s %d: %p <- KMALLOC_WAIT(%s, %d)\n", \ __FILE__, __LINE__, (p), #t, n); \ } while (0) #define KMALLOC_NOWAIT(p, t, n) \ do { \ - ((p) = (t)_MALLOC((unsigned long)(n), M_SECA, M_NOWAIT)); \ + ((p) = (t)_MALLOC((u_int32_t)(n), M_SECA, M_NOWAIT)); \ printf("%s %d: %p <- KMALLOC_NOWAIT(%s, %d)\n", \ __FILE__, __LINE__, (p), #t, n); \ } while (0) @@ -384,13 +416,13 @@ do { \ (idx)->proto = (p); \ (idx)->mode = (m); \ (idx)->reqid = (r); \ - bcopy((s), &(idx)->src, ((struct sockaddr *)(s))->sa_len); \ - bcopy((d), &(idx)->dst, ((struct sockaddr *)(d))->sa_len); \ + bcopy((s), &(idx)->src, ((const struct sockaddr *)(s))->sa_len); \ + bcopy((d), &(idx)->dst, ((const struct sockaddr *)(d))->sa_len); \ } while (0) /* key statistics */ struct _keystat { - u_long getspi_count; /* the avarage of count to try to get new SPI */ + u_int32_t getspi_count; /* the avarage of count to try to get new SPI */ } keystat; struct sadb_msghdr { @@ -425,7 +457,7 @@ static struct mbuf *key_setdumpsp(struct secpolicy *, u_int8_t, u_int32_t, u_int32_t); static u_int key_getspreqmsglen(struct secpolicy *); static int key_spdexpire(struct secpolicy *); -static struct secashead *key_newsah(struct secasindex *); +static struct secashead *key_newsah(struct secasindex *, u_int8_t); static void key_delsah(struct secashead *); static struct secasvar *key_newsav(struct mbuf *, const struct sadb_msghdr *, struct secashead *, int *); @@ -455,6 +487,7 @@ static void *key_newbuf(const void *, u_int); #if INET6 static int key_ismyaddr6(struct sockaddr_in6 *); #endif +static void key_update_natt_keepalive_timestamp(struct secasvar *, struct secasvar *); /* flags for key_cmpsaidx() */ #define CMP_HEAD 0x1 /* protocol, addresses. */ @@ -521,14 +554,14 @@ static int key_validate_ext(const struct sadb_ext *, int); static int key_align(struct mbuf *, struct sadb_msghdr *); static void key_sa_chgstate(struct secasvar *, u_int8_t); static struct mbuf *key_alloc_mbuf(int); +static int key_getsastat (struct socket *, struct mbuf *, const struct sadb_msghdr *); extern int ipsec_bypass; -void ipsec_send_natt_keepalive(struct secasvar *sav); +extern int esp_udp_encap_port; +int ipsec_send_natt_keepalive(struct secasvar *sav); void key_init(void); -static errno_t ipsecif_register_control(void); - /* @@ -564,9 +597,6 @@ key_init(void) LIST_INIT(&spihash[i]); raw_init(); - - /* register ip_if application of kernel control */ - ipsecif_register_control(); } @@ -2939,8 +2969,9 @@ key_spdexpire(sp) * others : pointer to new SA head. */ static struct secashead * -key_newsah(saidx) +key_newsah(saidx, dir) struct secasindex *saidx; + u_int8_t dir; { struct secashead *newsah; @@ -2976,6 +3007,7 @@ key_newsah(saidx) break; } + newsah->dir = dir; /* add to saidxtree */ newsah->state = SADB_SASTATE_MATURE; LIST_INSERT_HEAD(&sahtree, newsah, chain); @@ -3390,12 +3422,12 @@ key_setsaval(sav, m, mhp) */ if ((sav->flags & SADB_X_EXT_NATT) != 0) { if (mhp->extlen[SADB_EXT_SA] < sizeof(struct sadb_sa_2) || - ((struct sadb_sa_2*)(sa0))->sadb_sa_natt_port == 0) { + ((const struct sadb_sa_2*)(sa0))->sadb_sa_natt_port == 0) { ipseclog((LOG_DEBUG, "key_setsaval: natt port not set.\n")); error = EINVAL; goto fail; } - sav->remote_ike_port = ((struct sadb_sa_2*)(sa0))->sadb_sa_natt_port; + sav->remote_ike_port = ((const struct sadb_sa_2*)(sa0))->sadb_sa_natt_port; } /* @@ -3671,7 +3703,9 @@ key_mature(sav) switch (sav->sah->saidx.proto) { case IPPROTO_ESP: case IPPROTO_AH: - if (ntohl(sav->spi) >= 0 && ntohl(sav->spi) <= 255) { + + /* No reason to test if this is >= 0, because ntohl(sav->spi) is unsigned. */ + if (ntohl(sav->spi) <= 255) { ipseclog((LOG_DEBUG, "key_mature: illegal range of SPI %u.\n", (u_int32_t)ntohl(sav->spi))); @@ -4109,6 +4143,76 @@ key_setsadbaddr(exttype, saddr, prefixlen, ul_proto) return m; } +/* + * set data into sadb_session_id + */ +static struct mbuf * +key_setsadbsession_id (u_int64_t session_ids[]) +{ + struct mbuf *m; + struct sadb_session_id *p; + size_t len; + + len = PFKEY_ALIGN8(sizeof(*p)); + m = key_alloc_mbuf(len); + if (!m || m->m_next) { /*XXX*/ + if (m) + m_freem(m); + return NULL; + } + + p = mtod(m, __typeof__(p)); + + bzero(p, len); + p->sadb_session_id_len = PFKEY_UNIT64(len); + p->sadb_session_id_exttype = SADB_EXT_SESSION_ID; + p->sadb_session_id_v[0] = session_ids[0]; + p->sadb_session_id_v[1] = session_ids[1]; + + return m; +} + +/* + * copy stats data into sadb_sastat type. + */ +static struct mbuf * +key_setsadbsastat (u_int32_t dir, + struct sastat *stats, + u_int32_t max_stats) +{ + struct mbuf *m; + struct sadb_sastat *p; + int list_len, len; + + if (!stats) { + return NULL; + } + + list_len = sizeof(*stats) * max_stats; + len = PFKEY_ALIGN8(sizeof(*p)) + PFKEY_ALIGN8(list_len); + m = key_alloc_mbuf(len); + if (!m || m->m_next) { /*XXX*/ + if (m) + m_freem(m); + return NULL; + } + + p = mtod(m, __typeof__(p)); + + bzero(p, len); + p->sadb_sastat_len = PFKEY_UNIT64(len); + p->sadb_sastat_exttype = SADB_EXT_SASTAT; + p->sadb_sastat_dir = dir; + p->sadb_sastat_list_len = max_stats; + if (list_len) { + bcopy(stats, + mtod(m, caddr_t) + PFKEY_ALIGN8(sizeof(*p)), + list_len); + } + + return m; +} + #if 0 /* * set data into sadb_ident. @@ -4263,7 +4367,7 @@ key_ismyaddr(sa) switch (sa->sa_family) { #if INET case AF_INET: - lck_mtx_lock(rt_mtx); + lck_rw_lock_shared(in_ifaddr_rwlock); sin = (struct sockaddr_in *)sa; for (ia = in_ifaddrhead.tqh_first; ia; ia = ia->ia_link.tqe_next) @@ -4272,11 +4376,11 @@ key_ismyaddr(sa) sin->sin_len == ia->ia_addr.sin_len && sin->sin_addr.s_addr == ia->ia_addr.sin_addr.s_addr) { - lck_mtx_unlock(rt_mtx); + lck_rw_done(in_ifaddr_rwlock); return 1; } } - lck_mtx_unlock(rt_mtx); + lck_rw_done(in_ifaddr_rwlock); break; #endif #if INET6 @@ -4319,7 +4423,9 @@ key_ismyaddr6(sin6) * XXX scope */ in6m = NULL; + ifnet_lock_shared(ia->ia_ifp); IN6_LOOKUP_MULTI(sin6->sin6_addr, ia->ia_ifp, in6m); + ifnet_lock_done(ia->ia_ifp); if (in6m) { lck_mtx_unlock(nd6_mutex); return 1; @@ -4731,7 +4837,7 @@ key_timehandler(void) empty_sah_count++; continue; } - + /* if LARVAL entry doesn't become MATURE, delete it. */ for (sav = LIST_FIRST(&sah->savtree[SADB_SASTATE_LARVAL]); sav != NULL; @@ -4758,9 +4864,7 @@ key_timehandler(void) */ if (savkabuf && savkacount < savbufcount) { sav = LIST_FIRST(&sah->savtree[SADB_SASTATE_MATURE]); //%%% should we check dying list if this is empty??? - if (natt_keepalive_interval && sav && (sav->flags & SADB_X_EXT_NATT_KEEPALIVE) != 0 && - (natt_now - sav->natt_last_activity) >= natt_keepalive_interval) { - //ipsec_send_natt_keepalive(sav); + if (natt_keepalive_interval && sav && (sav->flags & SADB_X_EXT_NATT_KEEPALIVE) != 0) { sav->refcnt++; *savkaptr++ = sav; savkacount++; @@ -4832,7 +4936,7 @@ key_timehandler(void) savexcount++; } } - + /* check DYING entry to change status to DEAD. */ for (sav = LIST_FIRST(&sah->savtree[SADB_SASTATE_DYING]); sav != NULL; @@ -4997,9 +5101,20 @@ key_timehandler(void) key_spdexpire(*(--spptr)); } if (savkabuf && savkacount > 0) { - cnt = savkacount; - while (cnt--) - ipsec_send_natt_keepalive(*(--savkaptr)); + struct secasvar **savkaptr_sav = savkaptr; + int cnt_send = savkacount; + + while (cnt_send--) { + if (ipsec_send_natt_keepalive(*(--savkaptr))) { + // iterate (all over again) and update timestamps + struct secasvar **savkaptr_update = savkaptr_sav; + int cnt_update = savkacount; + while (cnt_update--) { + key_update_natt_keepalive_timestamp(*savkaptr, + *(--savkaptr_update)); + } + } + } } if (savexbuf && savexcount > 0) { cnt = savexcount; @@ -5055,10 +5170,10 @@ key_srandom() return; } -u_long +u_int32_t key_random() { - u_long value; + u_int32_t value; key_randomfill(&value, sizeof(value)); return value; @@ -5074,7 +5189,7 @@ key_randomfill(p, l) read_random(p, (u_int)l); #else size_t n; - u_long v; + u_int32_t v; static int warn = 1; n = 0; @@ -5255,8 +5370,8 @@ key_getspi(so, m, mhp) /* get a SA index */ if ((newsah = key_getsah(&saidx)) == NULL) { - /* create a new SA index */ - if ((newsah = key_newsah(&saidx)) == NULL) { + /* create a new SA index: key_addspi is always used for inbound spi */ + if ((newsah = key_newsah(&saidx, IPSEC_DIR_INBOUND)) == NULL) { lck_mtx_unlock(sadb_mutex); ipseclog((LOG_DEBUG, "key_getspi: No more memory.\n")); return key_senderror(so, m, ENOBUFS); @@ -5409,14 +5524,14 @@ key_do_getnewspi(spirange, saidx) } else { - u_long range = keymax - keymin + 1; /* overflow value of zero means full range */ + u_int32_t range = keymax - keymin + 1; /* overflow value of zero means full range */ /* init SPI */ newspi = 0; /* when requesting to allocate spi ranged */ while (count--) { - u_long rand_val = key_random(); + u_int32_t rand_val = key_random(); /* generate pseudo-random SPI value ranged. */ newspi = (range == 0 ? rand_val : keymin + (rand_val % range)); @@ -5741,8 +5856,8 @@ key_add(so, m, mhp) /* get a SA header */ if ((newsah = key_getsah(&saidx)) == NULL) { - /* create a new SA header */ - if ((newsah = key_newsah(&saidx)) == NULL) { + /* create a new SA header: key_addspi is always used for outbound spi */ + if ((newsah = key_newsah(&saidx, IPSEC_DIR_OUTBOUND)) == NULL) { lck_mtx_unlock(sadb_mutex); ipseclog((LOG_DEBUG, "key_add: No more memory.\n")); return key_senderror(so, m, ENOBUFS); @@ -6234,6 +6349,82 @@ key_get(so, m, mhp) } } +/* + * get SA stats by spi. + * OUT: -1 : not found + * 0 : found, arg pointer to a SA stats is updated. + */ +static int +key_getsastatbyspi_one (u_int32_t spi, + struct sastat *stat) +{ + struct secashead *sah; + struct secasvar *sav = NULL; + + if ((void *)stat == NULL) { + return -1; + } + + lck_mtx_lock(sadb_mutex); + + /* get a SA header */ + LIST_FOREACH(sah, &sahtree, chain) { + if (sah->state == SADB_SASTATE_DEAD) + continue; + + /* get a SA with SPI. */ + sav = key_getsavbyspi(sah, spi); + if (sav) { + stat->spi = sav->spi; + stat->created = sav->created; + if (sav->lft_c) { + bcopy(sav->lft_c,&stat->lft_c, sizeof(stat->lft_c)); + } else { + bzero(&stat->lft_c, sizeof(stat->lft_c)); + } + lck_mtx_unlock(sadb_mutex); + return 0; + } + } + + lck_mtx_unlock(sadb_mutex); + + return -1; +} + +/* + * get SA stats collection by indices. + * OUT: -1 : not found + * 0 : found, arg pointers to a SA stats and 'maximum stats' are updated. + */ +static int +key_getsastatbyspi (struct sastat *stat_arg, + u_int32_t max_stat_arg, + struct sastat *stat_res, + u_int32_t *max_stat_res) +{ + int cur, found = 0; + + if (stat_arg == NULL || + stat_res == NULL || + max_stat_res == NULL) { + return -1; + } + + for (cur = 0; cur < max_stat_arg; cur++) { + if (key_getsastatbyspi_one(stat_arg[cur].spi, + &stat_res[found]) == 0) { + found++; + } + } + *max_stat_res = found; + + if (found) { + return 0; + } + return -1; +} + /* XXX make it sysctl-configurable? */ static void key_getcomb_setlifetime(comb) @@ -6899,10 +7090,11 @@ key_acquire2(so, m, mhp) return key_senderror(so, m, EINVAL); } - src0 = (struct sadb_address *)mhp->ext[SADB_EXT_ADDRESS_SRC]; - dst0 = (struct sadb_address *)mhp->ext[SADB_EXT_ADDRESS_DST]; + src0 = (const struct sadb_address *)mhp->ext[SADB_EXT_ADDRESS_SRC]; + dst0 = (const struct sadb_address *)mhp->ext[SADB_EXT_ADDRESS_DST]; /* XXX boundary check against sa_len */ + /* cast warnings */ KEY_SETSECASIDX(proto, IPSEC_MODE_ANY, 0, src0 + 1, dst0 + 1, &saidx); /* get a SA index */ @@ -7564,6 +7756,7 @@ static int (*key_typesw[])(struct socket *, struct mbuf *, key_spdadd, /* SADB_X_SPDSETIDX */ NULL, /* SADB_X_SPDEXPIRE */ key_spddelete2, /* SADB_X_SPDDELETE2 */ + key_getsastat, /* SADB_GETSASTAT */ }; /* @@ -7908,6 +8101,8 @@ key_align(m, mhp) case SADB_EXT_SPIRANGE: case SADB_X_EXT_POLICY: case SADB_X_EXT_SA2: + case SADB_EXT_SESSION_ID: + case SADB_EXT_SASTAT: /* duplicate check */ /* * XXX Are there duplication payloads of either @@ -7992,7 +8187,7 @@ key_validate_ext(ext, len) break; case SADB_EXT_IDENTITY_SRC: case SADB_EXT_IDENTITY_DST: - if (((struct sadb_ident *)ext)->sadb_ident_type == + if (((const struct sadb_ident *)ext)->sadb_ident_type == SADB_X_IDENTTYPE_ADDR) { baselen = PFKEY_ALIGN8(sizeof(struct sadb_ident)); checktype = ADDR; @@ -8008,7 +8203,8 @@ key_validate_ext(ext, len) case NONE: break; case ADDR: - sa = (struct sockaddr *)((caddr_t)ext + baselen); + sa = (struct sockaddr *)((caddr_t)(uintptr_t)ext + baselen); + if (len < baselen + sal) return EINVAL; if (baselen + PFKEY_ALIGN8(sa->sa_len) != len) @@ -8239,654 +8435,197 @@ key_alloc_mbuf(l) return m; } +static struct mbuf * +key_setdumpsastats (u_int32_t dir, + struct sastat *stats, + u_int32_t max_stats, + u_int64_t session_ids[], + u_int32_t seq, + u_int32_t pid) +{ + struct mbuf *result = NULL, *m = NULL; -/* ---------------------------------------------------------------------------------- -Application of kernel control for interface creation + m = key_setsadbmsg(SADB_GETSASTAT, 0, 0, seq, pid, 0); + if (!m) { + goto fail; + } + result = m; -Theory of operation: -ipsecif acts as glue between kernel control sockets and ipsec network interfaces. This -kernel control will register an interface for every client that connects. -ipsec interface do not send or receive packets, an they are intercepted by ipsec before -they reach the interface. ipsec needs interface to attach tunnel ip addresses. -In the future, we may want to change the control mechanism to use PF_KEY to create -interfaces for ipsec ----------------------------------------------------------------------------------- */ + m = key_setsadbsession_id(session_ids); + if (!m) { + goto fail; + } + m_cat(result, m); -#include -//#include "if_ip.h" -#include -#include -#include -#include -#include -#include -#include -#include -#include /* Until leopard, our ugly bpf protocol prepend will need this */ -#include -#include -#include + m = key_setsadbsastat(dir, + stats, + max_stats); + if (!m) { + goto fail; + } + m_cat(result, m); -/* -*/ - -#define IPSECIF_CONTROL_NAME "com.apple.net.ipsecif_control" - -/* Kernel Control functions */ -static errno_t ipsecif_ctl_connect(kern_ctl_ref kctlref, struct sockaddr_ctl *sac, - void **unitinfo); -static errno_t ipsecif_ctl_disconnect(kern_ctl_ref kctlref, u_int32_t unit, - void *unitinfo); -static errno_t ipsecif_ctl_send(kern_ctl_ref kctlref, u_int32_t unit, - void *unitinfo, mbuf_t m, int flags); - -/* Network Interface functions */ -static errno_t ipsecif_output(ifnet_t interface, mbuf_t data); -static errno_t ipsecif_demux(ifnet_t interface, mbuf_t data, char *frame_header, - protocol_family_t *protocol); -static errno_t ipsecif_add_proto(ifnet_t interface, protocol_family_t protocol, - const struct ifnet_demux_desc *demux_array, - u_int32_t demux_count); -static errno_t ipsecif_del_proto(ifnet_t interface, protocol_family_t protocol); -static errno_t ipsecif_ioctl(ifnet_t interface, u_int32_t cmd, void *data); -static errno_t ipsecif_settap(ifnet_t interface, bpf_tap_mode mode, - bpf_packet_func callback); -static void ipsecif_detached(ifnet_t interface); - -/* Protocol handlers */ -static errno_t ipsecif_attach_proto(ifnet_t interface, protocol_family_t proto); -static errno_t ipsecif_proto_input(ifnet_t interface, protocol_family_t protocol, - mbuf_t m, char *frame_header); - -/* Control block allocated for each kernel control connection */ -struct ipsecif_pcb { - kern_ctl_ref ctlref; - u_int32_t unit; - ifnet_t ifp; - bpf_tap_mode mode; - bpf_packet_func tap; -}; + if ((result->m_flags & M_PKTHDR) == 0) { + goto fail; + } -static kern_ctl_ref ipsecif_kctlref; -static u_int32_t ipsecif_family; -static OSMallocTag ipsecif_malloc_tag; -static SInt32 ipsecif_ifcount = 0; + if (result->m_len < sizeof(struct sadb_msg)) { + result = m_pullup(result, sizeof(struct sadb_msg)); + if (result == NULL) { + goto fail; + } + } -/* Prepend length */ -static void* -ipsecif_alloc(size_t size) -{ - size_t *mem = OSMalloc(size + sizeof(size_t), ipsecif_malloc_tag); - - if (mem) { - *mem = size + sizeof(size_t); - mem++; + result->m_pkthdr.len = 0; + for (m = result; m; m = m->m_next) { + result->m_pkthdr.len += m->m_len; } - - return (void*)mem; -} -static void -ipsecif_free(void *ptr) -{ - size_t *size = ptr; - size--; - OSFree(size, *size, ipsecif_malloc_tag); -} + mtod(result, struct sadb_msg *)->sadb_msg_len = + PFKEY_UNIT64(result->m_pkthdr.len); -static errno_t -ipsecif_register_control(void) -{ - struct kern_ctl_reg kern_ctl; - errno_t result = 0; - - /* Create a tag to allocate memory */ - ipsecif_malloc_tag = OSMalloc_Tagalloc(IPSECIF_CONTROL_NAME, OSMT_DEFAULT); - - /* Find a unique value for our interface family */ - result = mbuf_tag_id_find(IPSECIF_CONTROL_NAME, &ipsecif_family); - if (result != 0) { - printf("ipsecif_register_control - mbuf_tag_id_find_internal failed: %d\n", result); - return result; - } - - bzero(&kern_ctl, sizeof(kern_ctl)); - strncpy(kern_ctl.ctl_name, IPSECIF_CONTROL_NAME, sizeof(kern_ctl.ctl_name)); - kern_ctl.ctl_name[sizeof(kern_ctl.ctl_name) - 1] = 0; - kern_ctl.ctl_flags = CTL_FLAG_PRIVILEGED; /* Require root */ - kern_ctl.ctl_connect = ipsecif_ctl_connect; - kern_ctl.ctl_disconnect = ipsecif_ctl_disconnect; - kern_ctl.ctl_send = ipsecif_ctl_send; - - result = ctl_register(&kern_ctl, &ipsecif_kctlref); - if (result != 0) { - printf("ipsecif_register_control - ctl_register failed: %d\n", result); - return result; - } - - /* Register the protocol plumbers */ - if ((result = proto_register_plumber(PF_INET, ipsecif_family, - ipsecif_attach_proto, NULL)) != 0) { - printf("ipsecif_register_control - proto_register_plumber(PF_INET, %d) failed: %d\n", - ipsecif_family, result); - ctl_deregister(ipsecif_kctlref); - return result; - } - - /* Register the protocol plumbers */ - if ((result = proto_register_plumber(PF_INET6, ipsecif_family, - ipsecif_attach_proto, NULL)) != 0) { - proto_unregister_plumber(PF_INET, ipsecif_family); - ctl_deregister(ipsecif_kctlref); - printf("ipsecif_register_control - proto_register_plumber(PF_INET6, %d) failed: %d\n", - ipsecif_family, result); - return result; - } - - return 0; -} - -/* Kernel control functions */ + return result; -static errno_t -ipsecif_ctl_connect( - kern_ctl_ref kctlref, - struct sockaddr_ctl *sac, - void **unitinfo) -{ - struct ifnet_init_params ipsecif_init; - struct ipsecif_pcb *pcb; - errno_t result; - - /* kernel control allocates, interface frees */ - pcb = ipsecif_alloc(sizeof(*pcb)); - if (pcb == NULL) - return ENOMEM; - - /* Setup the protocol control block */ - bzero(pcb, sizeof(*pcb)); - *unitinfo = pcb; - pcb->ctlref = kctlref; - pcb->unit = sac->sc_unit; - printf("ipsecif_ctl_connect: creating unit ip%d\n", pcb->unit); - - /* Create the interface */ - bzero(&ipsecif_init, sizeof(ipsecif_init)); - ipsecif_init.name = "ipsec"; - ipsecif_init.unit = pcb->unit; - ipsecif_init.family = ipsecif_family; - ipsecif_init.type = IFT_OTHER; - ipsecif_init.output = ipsecif_output; - ipsecif_init.demux = ipsecif_demux; - ipsecif_init.add_proto = ipsecif_add_proto; - ipsecif_init.del_proto = ipsecif_del_proto; - ipsecif_init.softc = pcb; - ipsecif_init.ioctl = ipsecif_ioctl; - ipsecif_init.set_bpf_tap = ipsecif_settap; - ipsecif_init.detach = ipsecif_detached; - - result = ifnet_allocate(&ipsecif_init, &pcb->ifp); - if (result != 0) { - printf("ipsecif_ctl_connect - ifnet_allocate failed: %d\n", result); - ipsecif_free(pcb); - return result; - } - OSIncrementAtomic(&ipsecif_ifcount); - - /* Set flags and additional information. */ - ifnet_set_mtu(pcb->ifp, 1280); - ifnet_set_flags(pcb->ifp, IFF_UP | IFF_MULTICAST | IFF_BROADCAST, 0xffff); -// ifnet_set_flags(pcb->ifp, IFF_UP | IFF_MULTICAST | IFF_POINTOPOINT, 0xffff); - - /* Attach the interface */ - result = ifnet_attach(pcb->ifp, NULL); - if (result != 0) { - printf("ipsecif_ctl_connect - ifnet_allocate failed: %d\n", result); - ifnet_release(pcb->ifp); - ipsecif_free(pcb); + fail: + if (result) { + m_freem(result); } - - /* Attach to bpf */ - if (result == 0) - bpfattach(pcb->ifp, DLT_NULL, 4); - - return result; + return NULL; } /* - * These defines are marked private but it's impossible to remove an interface - * without them. + * SADB_GETSASTAT processing + * dump all stats for matching entries in SAD. + * + * m will always be freed. */ -#ifndef SIOCPROTODETACH -#define SIOCPROTODETACH _IOWR('i', 81, struct ifreq) /* detach proto from interface */ -#endif /* SIOCPROTODETACH */ - -#ifndef SIOCPROTODETACH_IN6 -#define SIOCPROTODETACH_IN6 _IOWR('i', 111, struct in6_ifreq) /* detach proto from interface */ -#endif /* SIOCPROTODETACH */ + +static int +key_getsastat (struct socket *so, + struct mbuf *m, + const struct sadb_msghdr *mhp) +{ + struct sadb_session_id *session_id; + u_int32_t bufsize, arg_count, res_count; + struct sadb_sastat *sa_stats_arg; + struct sastat *sa_stats_sav = NULL; + struct mbuf *n; + int error = 0; + /* sanity check */ + if (so == NULL || m == NULL || mhp == NULL || mhp->msg == NULL) + panic("%s: NULL pointer is passed.\n", __FUNCTION__); -static errno_t -ipsecif_detach_ip( - ifnet_t interface, - protocol_family_t protocol, - socket_t pf_socket) -{ - errno_t result = EPROTONOSUPPORT; - - /* Attempt a detach */ - if (protocol == PF_INET) { - struct ifreq ifr; - - bzero(&ifr, sizeof(ifr)); - snprintf(ifr.ifr_name, sizeof(ifr.ifr_name), "%s%d", - ifnet_name(interface), ifnet_unit(interface)); - - result = sock_ioctl(pf_socket, SIOCPROTODETACH, &ifr); - } - else if (protocol == PF_INET6) { - struct in6_ifreq ifr6; - - bzero(&ifr6, sizeof(ifr6)); - snprintf(ifr6.ifr_name, sizeof(ifr6.ifr_name), "%s%d", - ifnet_name(interface), ifnet_unit(interface)); - - result = sock_ioctl(pf_socket, SIOCPROTODETACH_IN6, &ifr6); - } - - return result; -} + if (mhp->ext[SADB_EXT_SESSION_ID] == NULL) { + printf("%s: invalid message is passed. missing session-id.\n", __FUNCTION__); + return key_senderror(so, m, EINVAL); + } + if (mhp->extlen[SADB_EXT_SESSION_ID] < sizeof(struct sadb_session_id)) { + printf("%s: invalid message is passed. short session-id.\n", __FUNCTION__); + return key_senderror(so, m, EINVAL); + } + if (mhp->ext[SADB_EXT_SASTAT] == NULL) { + printf("%s: invalid message is passed. missing stat args.\n", __FUNCTION__); + return key_senderror(so, m, EINVAL); + } + if (mhp->extlen[SADB_EXT_SASTAT] < sizeof(*sa_stats_arg)) { + printf("%s: invalid message is passed. short stat args.\n", __FUNCTION__); + return key_senderror(so, m, EINVAL); + } -static void -ipsecif_remove_address( - ifnet_t interface, - protocol_family_t protocol, - ifaddr_t address, - socket_t pf_socket) -{ - errno_t result = 0; - - /* Attempt a detach */ - if (protocol == PF_INET) { - struct ifreq ifr; - - bzero(&ifr, sizeof(ifr)); - snprintf(ifr.ifr_name, sizeof(ifr.ifr_name), "%s%d", - ifnet_name(interface), ifnet_unit(interface)); - result = ifaddr_address(address, &ifr.ifr_addr, sizeof(ifr.ifr_addr)); - if (result != 0) { - printf("ipsecif_remove_address - ifaddr_address failed: %d", result); - } - else { - result = sock_ioctl(pf_socket, SIOCDIFADDR, &ifr); - if (result != 0) { - printf("ipsecif_remove_address - SIOCDIFADDR failed: %d", result); - } - } - } - else if (protocol == PF_INET6) { - struct in6_ifreq ifr6; - - bzero(&ifr6, sizeof(ifr6)); - snprintf(ifr6.ifr_name, sizeof(ifr6.ifr_name), "%s%d", - ifnet_name(interface), ifnet_unit(interface)); - result = ifaddr_address(address, (struct sockaddr*)&ifr6.ifr_addr, - sizeof(ifr6.ifr_addr)); - if (result != 0) { - printf("ipsecif_remove_address - ifaddr_address failed (v6): %d", - result); - } - else { - result = sock_ioctl(pf_socket, SIOCDIFADDR_IN6, &ifr6); - if (result != 0) { - printf("ipsecif_remove_address - SIOCDIFADDR_IN6 failed: %d", - result); - } - } - } -} + lck_mtx_assert(sadb_mutex, LCK_MTX_ASSERT_NOTOWNED); -static void -ipsecif_cleanup_family( - ifnet_t interface, - protocol_family_t protocol) -{ - errno_t result = 0; - socket_t pf_socket = NULL; - ifaddr_t *addresses = NULL; - int i; - - if (protocol != PF_INET && protocol != PF_INET6) { - printf("ipsecif_cleanup_family - invalid protocol family %d\n", protocol); - return; - } - - /* Create a socket for removing addresses and detaching the protocol */ - result = sock_socket(protocol, SOCK_DGRAM, 0, NULL, NULL, &pf_socket); - if (result != 0) { - if (result != EAFNOSUPPORT) - printf("ipsecif_cleanup_family - failed to create %s socket: %d\n", - protocol == PF_INET ? "IP" : "IPv6", result); - goto cleanup; - } - - result = ipsecif_detach_ip(interface, protocol, pf_socket); - if (result == 0 || result == ENXIO) { - /* We are done! We either detached or weren't attached. */ - goto cleanup; - } - else if (result != EBUSY) { - /* Uh, not really sure what happened here... */ - printf("ipsecif_cleanup_family - ipsecif_detach_ip failed: %d\n", result); - goto cleanup; - } - - /* - * At this point, we received an EBUSY error. This means there are - * addresses attached. We should detach them and then try again. - */ - result = ifnet_get_address_list_family(interface, &addresses, protocol); - if (result != 0) { - printf("fnet_get_address_list_family(%s%d, 0xblah, %s) - failed: %d\n", - ifnet_name(interface), ifnet_unit(interface), - protocol == PF_INET ? "PF_INET" : "PF_INET6", result); - goto cleanup; - } - - for (i = 0; addresses[i] != 0; i++) { - ipsecif_remove_address(interface, protocol, addresses[i], pf_socket); - } - ifnet_free_address_list(addresses); - addresses = NULL; - - /* - * The addresses should be gone, we should try the remove again. - */ - result = ipsecif_detach_ip(interface, protocol, pf_socket); - if (result != 0 && result != ENXIO) { - printf("ipsecif_cleanup_family - ipsecif_detach_ip failed: %d\n", result); + // exit early if there are no active SAs + if (ipsec_sav_count <= 0) { + printf("%s: No active SAs.\n", __FUNCTION__); + error = ENOENT; + goto end; } - -cleanup: - if (pf_socket != NULL) - sock_close(pf_socket); - - if (addresses != NULL) - ifnet_free_address_list(addresses); -} + bufsize = (ipsec_sav_count + 1) * sizeof(*sa_stats_sav); -static errno_t -ipsecif_ctl_disconnect( - __unused kern_ctl_ref kctlref, - __unused u_int32_t unit, - void *unitinfo) -{ - struct ipsecif_pcb *pcb = unitinfo; - ifnet_t ifp = pcb->ifp; - errno_t result = 0; - - pcb->ctlref = NULL; - pcb->unit = 0; - - /* - * We want to do everything in our power to ensure that the interface - * really goes away when the socket is closed. We must remove IP/IPv6 - * addresses and detach the protocols. Finally, we can remove and - * release the interface. - */ - ipsecif_cleanup_family(ifp, AF_INET); - ipsecif_cleanup_family(ifp, AF_INET6); - - if ((result = ifnet_detach(ifp)) != 0) { - printf("ipsecif_ctl_disconnect - ifnet_detach failed: %d\n", result); - } - - if ((result = ifnet_release(ifp)) != 0) { - printf("ipsecif_ctl_disconnect - ifnet_release failed: %d\n", result); + KMALLOC_WAIT(sa_stats_sav, __typeof__(sa_stats_sav), bufsize); + if (sa_stats_sav == NULL) { + printf("%s: No more memory.\n", __FUNCTION__); + error = ENOMEM; + goto end; } - - return 0; -} + bzero(sa_stats_sav, bufsize); -static inline void -call_bpf_tap( - ifnet_t ifp, - bpf_packet_func tap, - mbuf_t m) -{ - struct m_hdr hack_hdr; - struct mbuf *n; - int af; - - if (!tap) - return; - - af = (((*(char*)(mbuf_data(m))) & 0xf0) >> 4); // 4 or 6 - if(af == 4) { - af = AF_INET; - } - else if (af == 6) { - af = AF_INET6; - } - else { - /* Uh...this ain't right */ - af = 0; + sa_stats_arg = (__typeof__(sa_stats_arg))mhp->ext[SADB_EXT_SASTAT]; + arg_count = sa_stats_arg->sadb_sastat_list_len; + // exit early if there are no requested SAs + if (arg_count == 0) { + printf("%s: No SAs requested.\n", __FUNCTION__); + error = ENOENT; + goto end; } - - hack_hdr.mh_next = (struct mbuf*)m; - hack_hdr.mh_nextpkt = NULL; - hack_hdr.mh_len = 4; - hack_hdr.mh_data = (char *)⁡ - hack_hdr.mh_type = ((struct mbuf*)m)->m_type; - hack_hdr.mh_flags = 0; - - n = (struct mbuf*)&hack_hdr; - - tap(ifp, (mbuf_t)n); -} - + res_count = 0; -static errno_t -ipsecif_ctl_send( - __unused kern_ctl_ref kctlref, - __unused u_int32_t unit, - void *unitinfo, - mbuf_t m, - __unused int flags) -{ - struct ipsecif_pcb *pcb = unitinfo; - struct ifnet_stat_increment_param incs; - errno_t result; - - bzero(&incs, sizeof(incs)); - - mbuf_pkthdr_setrcvif(m, pcb->ifp); - - if (pcb->mode & BPF_MODE_INPUT) { - call_bpf_tap(pcb->ifp, pcb->tap, m); + if (key_getsastatbyspi((struct sastat *)(sa_stats_arg + 1), + arg_count, + sa_stats_sav, + &res_count)) { + printf("%s: Error finding SAs.\n", __FUNCTION__); + error = ENOENT; + goto end; } - - incs.packets_in = 1; - incs.bytes_in = mbuf_pkthdr_len(m); - result = ifnet_input(pcb->ifp, m, &incs); - if (result != 0) { - ifnet_stat_increment_in(pcb->ifp, 0, 0, 1); - printf("ipsecif_ctl_send - ifnet_input failed: %d\n", result); - mbuf_freem(m); + if (!res_count) { + printf("%s: No SAs found.\n", __FUNCTION__); + error = ENOENT; + goto end; } - - return 0; -} -/* Network Interface functions */ -static errno_t -ipsecif_output( - ifnet_t interface, - mbuf_t data) -{ - struct ipsecif_pcb *pcb = ifnet_softc(interface); - errno_t result; - - if (pcb->mode & BPF_MODE_OUTPUT) { - call_bpf_tap(interface, pcb->tap, data); - } - - // no packet should go to the ipsec interface - mbuf_freem(data); - -#if 0 - if (pcb->ctlref) { - int length = mbuf_pkthdr_len(data); - result = ctl_enqueuembuf(pcb->ctlref, pcb->unit, data, CTL_DATA_EOR); - if (result != 0) { - mbuf_freem(data); - printf("ipsecif_output - ctl_enqueuembuf failed: %d\n", result); - ifnet_stat_increment_out(interface, 0, 0, 1); - } - else { - ifnet_stat_increment_out(interface, 1, length, 0); - } - } - else - mbuf_freem(data); -#endif - - return 0; -} + session_id = (__typeof__(session_id))mhp->ext[SADB_EXT_SESSION_ID]; -/* Network Interface functions */ -static errno_t -ipsecif_demux( - __unused ifnet_t interface, - mbuf_t data, - __unused char *frame_header, - protocol_family_t *protocol) -{ - u_int8_t *vers; - - while (data != NULL && mbuf_len(data) < 1) { - data = mbuf_next(data); - } - - if (data != NULL) { - vers = mbuf_data(data); - switch(((*vers) & 0xf0) >> 4) { - case 4: - *protocol = PF_INET; - return 0; - - case 6: - *protocol = PF_INET6; - return 0; - } + /* send this to the userland. */ + n = key_setdumpsastats(sa_stats_arg->sadb_sastat_dir, + sa_stats_sav, + res_count, + session_id->sadb_session_id_v, + mhp->msg->sadb_msg_seq, + mhp->msg->sadb_msg_pid); + if (!n) { + printf("%s: No bufs to dump stats.\n", __FUNCTION__); + error = ENOBUFS; + goto end; } - - return ENOENT; -} -static errno_t -ipsecif_add_proto( - __unused ifnet_t interface, - protocol_family_t protocol, - __unused const struct ifnet_demux_desc *demux_array, - __unused u_int32_t demux_count) -{ - switch(protocol) { - case PF_INET: - return 0; - case PF_INET6: - return 0; - default: - break; + key_sendup_mbuf(so, n, KEY_SENDUP_ALL); +end: + if (sa_stats_sav) { + KFREE(sa_stats_sav); } - - return ENOPROTOOPT; -} -static errno_t -ipsecif_del_proto( - __unused ifnet_t interface, - __unused protocol_family_t protocol) -{ - return 0; -} - -static errno_t -ipsecif_ioctl( - __unused ifnet_t interface, - __unused u_int32_t command, - __unused void *data) -{ - errno_t result = 0; - - switch(command) { - case SIOCSIFMTU: - ifnet_set_mtu(interface, ((struct ifreq*)data)->ifr_mtu); - break; - - default: - result = EOPNOTSUPP; - } - - return result; -} + if (error) + return key_senderror(so, m, error); -static errno_t -ipsecif_settap( - ifnet_t interface, - bpf_tap_mode mode, - bpf_packet_func callback) -{ - struct ipsecif_pcb *pcb = ifnet_softc(interface); - - pcb->mode = mode; - pcb->tap = callback; - + m_freem(m); return 0; } static void -ipsecif_detached( - ifnet_t interface) +key_update_natt_keepalive_timestamp (struct secasvar *sav_sent, + struct secasvar *sav_update) { - struct ipsecif_pcb *pcb = ifnet_softc(interface); - - ipsecif_free(pcb); - - OSDecrementAtomic(&ipsecif_ifcount); -} + struct secasindex saidx_swap_sent_addr; -/* Protocol Handlers */ + // exit early if two SAs are identical, or if sav_update is current + if (sav_sent == sav_update || + sav_update->natt_last_activity == natt_now) { + return; + } -static errno_t -ipsecif_proto_input( - __unused ifnet_t interface, - protocol_family_t protocol, - mbuf_t m, - __unused char *frame_header) -{ - proto_input(protocol, m); - - return 0; -} + // assuming that (sav_update->remote_ike_port != 0 && (esp_udp_encap_port & 0xFFFF) != 0) -static errno_t -ipsecif_attach_proto( - ifnet_t interface, - protocol_family_t protocol) -{ - struct ifnet_attach_proto_param proto; - errno_t result; - - bzero(&proto, sizeof(proto)); - proto.input = ipsecif_proto_input; - - result = ifnet_attach_protocol(interface, protocol, &proto); - if (result != 0 && result != EEXIST) { - printf("ipsecif_attach_inet - ifnet_attach_protocol %d failed: %d\n", - protocol, result); + bzero(&saidx_swap_sent_addr, sizeof(saidx_swap_sent_addr)); + memcpy(&saidx_swap_sent_addr.src, &sav_sent->sah->saidx.dst, sizeof(saidx_swap_sent_addr.src)); + memcpy(&saidx_swap_sent_addr.dst, &sav_sent->sah->saidx.src, sizeof(saidx_swap_sent_addr.dst)); + saidx_swap_sent_addr.proto = sav_sent->sah->saidx.proto; + saidx_swap_sent_addr.mode = sav_sent->sah->saidx.mode; + // we ignore reqid for split-tunnel setups + + if (key_cmpsaidx(&sav_sent->sah->saidx, &sav_update->sah->saidx, CMP_MODE | CMP_PORT) || + key_cmpsaidx(&saidx_swap_sent_addr, &sav_update->sah->saidx, CMP_MODE | CMP_PORT)) { + sav_update->natt_last_activity = natt_now; } - - return result; } - diff --git a/bsd/netkey/key.h b/bsd/netkey/key.h index 985b347fe..3dda20469 100644 --- a/bsd/netkey/key.h +++ b/bsd/netkey/key.h @@ -67,7 +67,7 @@ extern struct mbuf *key_sp2msg(struct secpolicy *); extern int key_ismyaddr(struct sockaddr *); extern int key_spdacquire(struct secpolicy *); extern void key_timehandler(void); -extern u_long key_random(void); +extern u_int32_t key_random(void); extern void key_randomfill(void *, size_t); extern void key_freereg(struct socket *); extern int key_parse(struct mbuf *, struct socket *); diff --git a/bsd/netkey/key_debug.c b/bsd/netkey/key_debug.c index 054edecb1..1d7522054 100644 --- a/bsd/netkey/key_debug.c +++ b/bsd/netkey/key_debug.c @@ -149,6 +149,12 @@ kdebug_sadb(base) case SADB_X_EXT_SA2: kdebug_sadb_x_sa2(ext); break; + case SADB_EXT_SESSION_ID: + kdebug_sadb_session_id(ext); + break; + case SADB_EXT_SASTAT: + kdebug_sadb_sastat(ext); + break; default: printf("kdebug_sadb: invalid ext_type %u was passed.\n", ext->sadb_ext_type); @@ -197,15 +203,15 @@ kdebug_sadb_prop(ext) "soft_bytes=%lu hard_bytes=%lu\n", comb->sadb_comb_soft_allocations, comb->sadb_comb_hard_allocations, - (unsigned long)comb->sadb_comb_soft_bytes, - (unsigned long)comb->sadb_comb_hard_bytes); + (u_int32_t)comb->sadb_comb_soft_bytes, + (u_int32_t)comb->sadb_comb_hard_bytes); printf(" soft_alloc=%lu hard_alloc=%lu " "soft_bytes=%lu hard_bytes=%lu }\n", - (unsigned long)comb->sadb_comb_soft_addtime, - (unsigned long)comb->sadb_comb_hard_addtime, - (unsigned long)comb->sadb_comb_soft_usetime, - (unsigned long)comb->sadb_comb_hard_usetime); + (u_int32_t)comb->sadb_comb_soft_addtime, + (u_int32_t)comb->sadb_comb_hard_addtime, + (u_int32_t)comb->sadb_comb_soft_usetime, + (u_int32_t)comb->sadb_comb_hard_usetime); comb++; } printf("}\n"); @@ -230,7 +236,7 @@ kdebug_sadb_identity(ext) switch (id->sadb_ident_type) { default: printf(" type=%d id=%lu", - id->sadb_ident_type, (u_long)id->sadb_ident_id); + id->sadb_ident_type, (u_int32_t)id->sadb_ident_id); if (len) { #ifdef KERNEL ipsec_hexdump((caddr_t)(id + 1), len); /*XXX cast ?*/ @@ -389,6 +395,47 @@ kdebug_sadb_x_sa2(ext) return; } +static void +kdebug_sadb_session_id(ext) + struct sadb_ext *ext; +{ + struct sadb_session_id *p = (__typeof__(p))ext; + + /* sanity check */ + if (ext == NULL) + panic("kdebug_sadb_session_id: NULL pointer was passed.\n"); + + printf("sadb_session_id{ id0=%llx, id1=%llx}\n", + p->sadb_session_id_v[0], + p->sadb_session_id_v[1]); + + return; +} + +static void +kdebug_sadb_sastat(ext) + struct sadb_ext *ext; +{ + struct sadb_sastat *p = (__typeof__(p))ext; + struct sastat *stats; + int i; + + /* sanity check */ + if (ext == NULL) + panic("kdebug_sadb_sastat: NULL pointer was passed.\n"); + + printf("sadb_sastat{ dir=%u num=%u\n", + p->sadb_sastat_dir, p->sadb_sastat_list_len); + stats = (__typeof__(stats))(p + 1); + for (i = 0; i < p->sadb_sastat_list_len; i++) { + printf(" spi=%x,\n", + stats[i].spi); + } + printf("}\n"); + + return; +} + void kdebug_sadb_x_policy(ext) struct sadb_ext *ext; diff --git a/bsd/netkey/key_debug.h b/bsd/netkey/key_debug.h index 761571725..b2e94a3ad 100644 --- a/bsd/netkey/key_debug.h +++ b/bsd/netkey/key_debug.h @@ -79,7 +79,7 @@ extern void kdebug_secasindex(struct secasindex *); extern void kdebug_secasv(struct secasvar *); extern void kdebug_mbufhdr(struct mbuf *); extern void kdebug_mbuf(struct mbuf *); -#endif KERNEL +#endif /* KERNEL */ struct sockaddr; extern void kdebug_sockaddr(struct sockaddr *); diff --git a/bsd/netkey/keydb.h b/bsd/netkey/keydb.h index 53cddffce..e304c336f 100644 --- a/bsd/netkey/keydb.h +++ b/bsd/netkey/keydb.h @@ -59,6 +59,7 @@ struct secashead { struct sadb_ident *identd; /* destination identity */ /* XXX I don't know how to use them. */ + u_int8_t dir; /* IPSEC_DIR_INBOUND or IPSEC_DIR_OUTBOUND */ u_int8_t state; /* MATURE or DEAD. */ LIST_HEAD(_satree, secasvar) savtree[SADB_SASTATE_MAX+1]; /* SA chain */ diff --git a/bsd/nfs/Makefile b/bsd/nfs/Makefile index 9464a92ae..10e246402 100644 --- a/bsd/nfs/Makefile +++ b/bsd/nfs/Makefile @@ -29,7 +29,7 @@ INSTALL_MI_LIST = ${DATAFILES} INSTALL_MI_DIR = nfs -EXPORT_MI_LIST = ${DATAFILES} +EXPORT_MI_LIST = EXPORT_MI_DIR = nfs diff --git a/bsd/nfs/krpc_subr.c b/bsd/nfs/krpc_subr.c index b1d597105..8ded0f04b 100644 --- a/bsd/nfs/krpc_subr.c +++ b/bsd/nfs/krpc_subr.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2007 Apple Inc. All rights reserved. + * Copyright (c) 2000-2008 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -76,7 +76,6 @@ #include #include #include -#include #include #include @@ -336,7 +335,7 @@ krpc_call(sa, sotype, prog, vers, func, data, from_p) */ if (sotype == SOCK_STREAM) { /* first, fill in RPC record marker */ - u_long *recmark = mbuf_data(mhead); + u_int32_t *recmark = mbuf_data(mhead); *recmark = htonl(0x80000000 | (mbuf_pkthdr_len(mhead) - 4)); call = (struct rpc_call *)(recmark + 1); } else { @@ -401,11 +400,11 @@ krpc_call(sa, sotype, prog, vers, func, data, from_p) } if (sotype == SOCK_STREAM) { int maxretries = 60; - struct iovec_32 aio; - aio.iov_base = (uintptr_t) &len; - aio.iov_len = sizeof(u_long); + struct iovec aio; + aio.iov_base = &len; + aio.iov_len = sizeof(u_int32_t); bzero(&msg, sizeof(msg)); - msg.msg_iov = (struct iovec *) &aio; + msg.msg_iov = &aio; msg.msg_iovlen = 1; do { error = sock_receive(so, &msg, MSG_WAITALL, &readlen); @@ -416,7 +415,7 @@ krpc_call(sa, sotype, prog, vers, func, data, from_p) /* only log a message if we got a partial word */ if (readlen != 0) printf("short receive (%ld/%ld) from server " IP_FORMAT "\n", - readlen, sizeof(u_long), IP_LIST(&(sin->sin_addr.s_addr))); + readlen, sizeof(u_int32_t), IP_LIST(&(sin->sin_addr.s_addr))); error = EPIPE; } if (error) diff --git a/bsd/nfs/nfs.h b/bsd/nfs/nfs.h index 4ff6d43f7..821af8e5b 100644 --- a/bsd/nfs/nfs.h +++ b/bsd/nfs/nfs.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2007 Apple Inc. All rights reserved. + * Copyright (c) 2000-2009 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -97,7 +97,7 @@ __private_extern__ int nfs_ticks; #define NFS_RSIZE NFS_RWSIZE /* Def. read data size <= 32K */ #define NFS_DGRAM_WSIZE 8192 /* UDP Def. write data size <= 8K */ #define NFS_DGRAM_RSIZE 8192 /* UDP Def. read data size <= 8K */ -#define NFS_READDIRSIZE 8192 /* Def. readdir size */ +#define NFS_READDIRSIZE 32768 /* Def. readdir size */ #define NFS_DEFRAHEAD 16 /* Def. read ahead # blocks */ #define NFS_MAXRAHEAD 128 /* Max. read ahead # blocks */ #define NFS_DEFMAXASYNCWRITES 128 /* Def. max # concurrent async write RPCs */ @@ -111,7 +111,7 @@ __private_extern__ int nfs_ticks; #ifndef NFSRV_WGATHERDELAY #define NFSRV_WGATHERDELAY 1 /* Default write gather delay (msec) */ #endif -#define NFS_DIRBLKSIZ 4096 /* Must be a multiple of DIRBLKSIZ */ +#define NFS_DIRBLKSIZ 4096 /* size of NFS directory buffers */ #if defined(KERNEL) && !defined(DIRBLKSIZ) #define DIRBLKSIZ 512 /* XXX we used to use ufs's DIRBLKSIZ */ /* can't be larger than NFS_FABLKSIZE */ @@ -126,9 +126,9 @@ __private_extern__ int nfs_ticks; */ #define NFS_CMPFH(n, f, s) \ ((n)->n_fhsize == (s) && !bcmp((caddr_t)(n)->n_fhp, (caddr_t)(f), (s))) -#define NFS_SRVMAXDATA(n) \ +#define NFSRV_NDMAXDATA(n) \ (((n)->nd_vers == NFS_VER3) ? (((n)->nd_nam2) ? \ - NFS_MAXDGRAMDATA : NFS_MAXDATA) : NFS_V2MAXDATA) + NFS_MAXDGRAMDATA : NFSRV_MAXDATA) : NFS_V2MAXDATA) /* * The IO_METASYNC flag should be implemented for local file systems. @@ -148,21 +148,29 @@ __private_extern__ int nfs_ticks; * becomes bunk!). * Note that some of these structures come out of there own nfs zones. */ -#define NFS_NODEALLOC 512 -#define NFS_MNTALLOC 512 -#define NFS_SVCALLOC 256 +#define NFS_NODEALLOC 1024 +#define NFS_MNTALLOC 1024 +#define NFS_SVCALLOC 512 /* * Arguments to mount NFS */ -#define NFS_ARGSVERSION 5 /* change when nfs_args changes */ +#define NFS_ARGSVERSION 6 /* change when nfs_args changes */ struct nfs_args { int version; /* args structure version number */ +#ifdef KERNEL + user32_addr_t addr; /* file server address */ +#else struct sockaddr *addr; /* file server address */ +#endif int addrlen; /* length of address */ int sotype; /* Socket type */ int proto; /* and Protocol */ +#ifdef KERNEL + user32_addr_t fh; /* File handle to be mounted */ +#else u_char *fh; /* File handle to be mounted */ +#endif int fhsize; /* Size, in bytes, of fh */ int flags; /* flags */ int wsize; /* write size in bytes */ @@ -174,7 +182,52 @@ struct nfs_args { int readahead; /* # of blocks to readahead */ int leaseterm; /* obsolete: Term (sec) of lease */ int deadthresh; /* obsolete: Retrans threshold */ +#ifdef KERNEL + user32_addr_t hostname; /* server's name */ +#else char *hostname; /* server's name */ +#endif + /* NFS_ARGSVERSION 3 ends here */ + int acregmin; /* reg file min attr cache timeout */ + int acregmax; /* reg file max attr cache timeout */ + int acdirmin; /* dir min attr cache timeout */ + int acdirmax; /* dir max attr cache timeout */ + /* NFS_ARGSVERSION 4 ends here */ + uint32_t auth; /* security mechanism flavor */ + /* NFS_ARGSVERSION 5 ends here */ + uint32_t deadtimeout; /* secs until unresponsive mount considered dead */ +}; +struct nfs_args5 { + int version; /* args structure version number */ +#ifdef KERNEL + user32_addr_t addr; /* file server address */ +#else + struct sockaddr *addr; /* file server address */ +#endif + int addrlen; /* length of address */ + int sotype; /* Socket type */ + int proto; /* and Protocol */ +#ifdef KERNEL + user32_addr_t fh; /* File handle to be mounted */ +#else + u_char *fh; /* File handle to be mounted */ +#endif + int fhsize; /* Size, in bytes, of fh */ + int flags; /* flags */ + int wsize; /* write size in bytes */ + int rsize; /* read size in bytes */ + int readdirsize; /* readdir size in bytes */ + int timeo; /* initial timeout in .1 secs */ + int retrans; /* times to retry send */ + int maxgrouplist; /* Max. size of group list */ + int readahead; /* # of blocks to readahead */ + int leaseterm; /* obsolete: Term (sec) of lease */ + int deadthresh; /* obsolete: Retrans threshold */ +#ifdef KERNEL + user32_addr_t hostname; /* server's name */ +#else + char *hostname; /* server's name */ +#endif /* NFS_ARGSVERSION 3 ends here */ int acregmin; /* reg file min attr cache timeout */ int acregmax; /* reg file max attr cache timeout */ @@ -185,11 +238,19 @@ struct nfs_args { }; struct nfs_args4 { int version; /* args structure version number */ +#ifdef KERNEL + user32_addr_t addr; /* file server address */ +#else struct sockaddr *addr; /* file server address */ +#endif int addrlen; /* length of address */ int sotype; /* Socket type */ int proto; /* and Protocol */ +#ifdef KERNEL + user32_addr_t fh; /* File handle to be mounted */ +#else u_char *fh; /* File handle to be mounted */ +#endif int fhsize; /* Size, in bytes, of fh */ int flags; /* flags */ int wsize; /* write size in bytes */ @@ -201,7 +262,11 @@ struct nfs_args4 { int readahead; /* # of blocks to readahead */ int leaseterm; /* obsolete: Term (sec) of lease */ int deadthresh; /* obsolete: Retrans threshold */ +#ifdef KERNEL + user32_addr_t hostname; /* server's name */ +#else char *hostname; /* server's name */ +#endif /* NFS_ARGSVERSION 3 ends here */ int acregmin; /* reg file min attr cache timeout */ int acregmax; /* reg file max attr cache timeout */ @@ -211,11 +276,19 @@ struct nfs_args4 { struct nfs_args3 { int version; /* args structure version number */ +#ifdef KERNEL + user32_addr_t addr; /* file server address */ +#else struct sockaddr *addr; /* file server address */ +#endif int addrlen; /* length of address */ int sotype; /* Socket type */ int proto; /* and Protocol */ +#ifdef KERNEL + user32_addr_t fh; /* File handle to be mounted */ +#else u_char *fh; /* File handle to be mounted */ +#endif int fhsize; /* Size, in bytes, of fh */ int flags; /* flags */ int wsize; /* write size in bytes */ @@ -227,7 +300,11 @@ struct nfs_args3 { int readahead; /* # of blocks to readahead */ int leaseterm; /* obsolete: Term (sec) of lease */ int deadthresh; /* obsolete: Retrans threshold */ +#ifdef KERNEL + user32_addr_t hostname; /* server's name */ +#else char *hostname; /* server's name */ +#endif }; #ifdef KERNEL @@ -261,6 +338,35 @@ struct user_nfs_args { int acdirmax; /* dir max attr cache timeout */ /* NFS_ARGSVERSION 4 ends here */ uint32_t auth; /* security mechanism flavor */ + /* NFS_ARGSVERSION 5 ends here */ + uint32_t deadtimeout; /* secs until unresponsive mount considered dead */ +}; +struct user_nfs_args5 { + int version; /* args structure version number */ + user_addr_t addr __attribute((aligned(8))); /* file server address */ + int addrlen; /* length of address */ + int sotype; /* Socket type */ + int proto; /* and Protocol */ + user_addr_t fh __attribute((aligned(8))); /* File handle to be mounted */ + int fhsize; /* Size, in bytes, of fh */ + int flags; /* flags */ + int wsize; /* write size in bytes */ + int rsize; /* read size in bytes */ + int readdirsize; /* readdir size in bytes */ + int timeo; /* initial timeout in .1 secs */ + int retrans; /* times to retry send */ + int maxgrouplist; /* Max. size of group list */ + int readahead; /* # of blocks to readahead */ + int leaseterm; /* obsolete: Term (sec) of lease */ + int deadthresh; /* obsolete: Retrans threshold */ + user_addr_t hostname __attribute((aligned(8))); /* server's name */ + /* NFS_ARGSVERSION 3 ends here */ + int acregmin; /* reg file min attr cache timeout */ + int acregmax; /* reg file max attr cache timeout */ + int acdirmin; /* dir min attr cache timeout */ + int acdirmax; /* dir max attr cache timeout */ + /* NFS_ARGSVERSION 4 ends here */ + uint32_t auth; /* security mechanism flavor */ }; struct user_nfs_args4 { int version; /* args structure version number */ @@ -325,9 +431,9 @@ struct user_nfs_args3 { #define NFSMNT_NFSV3 0x00000200 /* Use NFS Version 3 protocol */ #define NFSMNT_NFSV4 0x00000400 /* Use NFS Version 4 protocol */ #define NFSMNT_DUMBTIMR 0x00000800 /* Don't estimate rtt dynamically */ -// #define NFSMNT_UNUSED 0x00001000 /* unused */ +#define NFSMNT_DEADTIMEOUT 0x00001000 /* unmount after a period of unresponsiveness */ #define NFSMNT_READAHEAD 0x00002000 /* set read ahead */ -// #define NFSMNT_UNUSED 0x00004000 /* unused */ +#define NFSMNT_CALLUMNT 0x00004000 /* call MOUNTPROC_UMNT on unmount */ #define NFSMNT_RESVPORT 0x00008000 /* Allocate a reserved port */ #define NFSMNT_RDIRPLUS 0x00010000 /* Use Readdirplus for V3 */ #define NFSMNT_READDIRSIZE 0x00020000 /* Set readdir size */ @@ -338,7 +444,8 @@ struct user_nfs_args3 { #define NFSMNT_ACDIRMIN 0x00400000 /* dir min attr cache timeout */ #define NFSMNT_ACDIRMAX 0x00800000 /* dir max attr cache timeout */ #define NFSMNT_SECFLAVOR 0x01000000 /* Use security flavor */ -#define NFSMNT_SECGIVEN 0x02000000 /* A sec= mount option was given */ +#define NFSMNT_SECSYSOK 0x02000000 /* Server can support auth sys */ +#define NFSMNT_MUTEJUKEBOX 0x04000000 /* don't treat jukebox errors as unresponsive */ /* * Structures for the nfssvc(2) syscall. Not that anyone but nfsd @@ -346,7 +453,11 @@ struct user_nfs_args3 { */ struct nfsd_args { int sock; /* Socket to serve */ +#ifdef KERNEL + user32_addr_t name; /* Client addr for connection based sockets */ +#else caddr_t name; /* Client addr for connection based sockets */ +#endif int namelen; /* Length of name */ }; @@ -418,11 +529,20 @@ struct nfs_export_net_args { struct nfs_export_args { uint32_t nxa_fsid; /* export FS ID */ uint32_t nxa_expid; /* export ID */ +#ifdef KERNEL + user32_addr_t nxa_fspath; /* export FS path */ + user32_addr_t nxa_exppath; /* export sub-path */ +#else char *nxa_fspath; /* export FS path */ char *nxa_exppath; /* export sub-path */ +#endif uint32_t nxa_flags; /* export arg flags */ uint32_t nxa_netcount; /* #entries in ex_nets array */ +#ifdef KERNEL + user32_addr_t nxa_nets; /* array of net args */ +#else struct nfs_export_net_args *nxa_nets; /* array of net args */ +#endif }; #ifdef KERNEL @@ -446,6 +566,7 @@ struct user_nfs_export_args { #define NXA_REPLACE 0x0003 /* delete and add the specified export(s) */ #define NXA_DELETE_ALL 0x0004 /* delete all exports */ #define NXA_OFFLINE 0x0008 /* export is offline */ +#define NXA_CHECK 0x0010 /* check if exportable */ /* export option flags */ #define NX_READONLY 0x0001 /* exported read-only */ @@ -545,9 +666,9 @@ struct nfs_export_stat_counters { #define NFSStatAdd64(PTR, VAL) \ do { \ uint32_t NFSSA_OldValue = \ - OSAddAtomic((VAL), (SInt32*)&(PTR)->lo); \ + OSAddAtomic((VAL), &(PTR)->lo); \ if ((NFSSA_OldValue + (VAL)) < NFSSA_OldValue) \ - OSAddAtomic(1, (SInt32*)&(PTR)->hi); \ + OSAddAtomic(1, &(PTR)->hi); \ } while (0) /* Some defines for dealing with active user list stats */ @@ -622,6 +743,7 @@ struct nfs_exportfs { __private_extern__ LIST_HEAD(nfsrv_expfs_list, nfs_exportfs) nfsrv_exports; __private_extern__ lck_rw_t nfsrv_export_rwlock; // lock for export data structures +#define NFSRVEXPHASHSZ 64 #define NFSRVEXPHASHVAL(FSID, EXPID) \ (((FSID) >> 24) ^ ((FSID) >> 16) ^ ((FSID) >> 8) ^ (EXPID)) #define NFSRVEXPHASH(FSID, EXPID) \ @@ -629,6 +751,7 @@ __private_extern__ lck_rw_t nfsrv_export_rwlock; // lock for export data struct __private_extern__ LIST_HEAD(nfsrv_export_hashhead, nfs_export) *nfsrv_export_hashtbl; __private_extern__ u_long nfsrv_export_hash; +#if CONFIG_FSE /* * NFS server file mod fsevents */ @@ -640,13 +763,16 @@ struct nfsrv_fmod { }; #define NFSRVFMODHASHSZ 128 -#define NFSRVFMODHASH(vp) (((u_long) vp) & nfsrv_fmod_hash) +#define NFSRVFMODHASH(vp) (((uintptr_t) vp) & nfsrv_fmod_hash) __private_extern__ LIST_HEAD(nfsrv_fmod_hashhead, nfsrv_fmod) *nfsrv_fmod_hashtbl; __private_extern__ u_long nfsrv_fmod_hash; __private_extern__ lck_mtx_t *nfsrv_fmod_mutex; __private_extern__ int nfsrv_fmod_pending, nfsrv_fsevents_enabled; -__private_extern__ int nfsrv_async, nfsrv_reqcache_size, nfsrv_sock_max_rec_queue_length; +#endif +__private_extern__ int nfsrv_async, nfsrv_export_hash_size, + nfsrv_reqcache_size, nfsrv_sock_max_rec_queue_length; +__private_extern__ uint32_t nfsrv_gss_context_ttl; __private_extern__ struct nfsstats nfsstats; #endif // KERNEL @@ -745,12 +871,16 @@ MALLOC_DECLARE(M_NFSD); MALLOC_DECLARE(M_NFSBIGFH); #endif -struct uio; struct vnode_attr; struct nameidata; struct dqblk; /* XXX */ +struct vnode_attr; struct nameidata; struct dqblk; struct sockaddr_in; /* XXX */ struct nfsbuf; struct nfs_vattr; struct nfs_fsattr; struct nfsnode; typedef struct nfsnode * nfsnode_t; +struct nfs_open_owner; +struct nfs_open_file; +struct nfs_lock_owner; +struct nfs_file_lock; struct nfsreq; /* @@ -793,7 +923,7 @@ struct gss_seq { struct nfsreq_cbinfo { void (*rcb_func)(struct nfsreq *); /* async request callback function */ struct nfsbuf *rcb_bp; /* buffer I/O RPC is for */ - uint32_t rcb_args[2]; /* additional callback args */ + uint32_t rcb_args[3]; /* additional callback args */ }; /* @@ -812,7 +942,7 @@ struct nfsreq { struct nfsmount *r_nmp; /* NFS mount point */ uint64_t r_xid; /* RPC transaction ID */ uint32_t r_procnum; /* NFS procedure number */ - u_long r_mreqlen; /* request length */ + uint32_t r_mreqlen; /* request length */ int r_flags; /* flags on request, see below */ int r_lflags; /* flags protected by list mutex, see below */ int r_refs; /* # outstanding references */ @@ -822,9 +952,9 @@ struct nfsreq { int r_rtt; /* RTT for rpc */ thread_t r_thread; /* thread that did I/O system call */ kauth_cred_t r_cred; /* credential used for request */ - long r_start; /* request start time */ - long r_lastmsg; /* time of last tprintf */ - long r_resendtime; /* time of next jukebox error resend */ + time_t r_start; /* request start time */ + time_t r_lastmsg; /* time of last tprintf */ + time_t r_resendtime; /* time of next jukebox error resend */ struct nfs_gss_clnt_ctx *r_gss_ctx; /* RPCSEC_GSS context */ SLIST_HEAD(, gss_seq) r_gss_seqlist; /* RPCSEC_GSS sequence numbers */ uint32_t r_gss_argoff; /* RPCSEC_GSS offset to args */ @@ -845,33 +975,35 @@ __private_extern__ lck_grp_t *nfs_request_grp; #define NFSREQNOLIST ((struct nfsreq *)0xdeadbeef) /* sentinel value for nfsreq lists */ /* Flag values for r_flags */ -#define R_TIMING 0x0001 /* timing request (in mntp) */ -#define R_CWND 0x0002 /* request accounted for in congestion window */ -#define R_SOFTTERM 0x0004 /* request terminated (e.g. soft mnt) */ -#define R_RESTART 0x0008 /* RPC should be restarted. */ -#define R_INITTED 0x0010 /* request has been initialized */ -#define R_TPRINTFMSG 0x0020 /* Did a tprintf msg. */ -#define R_MUSTRESEND 0x0040 /* Must resend request */ -#define R_ALLOCATED 0x0080 /* request was allocated */ -#define R_SENT 0x0100 /* request has been sent */ -#define R_WAITSENT 0x0200 /* someone is waiting for request to be sent */ -#define R_RESENDERR 0x0400 /* resend failed */ -#define R_JBTPRINTFMSG 0x0800 /* Did a tprintf msg for jukebox error */ -#define R_ASYNC 0x1000 /* async request */ -#define R_ASYNCWAIT 0x2000 /* async request now being waited on */ -#define R_RESENDQ 0x4000 /* async request currently on resendq */ - -#define R_SETUP 0x8000 /* a setup RPC - during (re)connection */ -#define R_OPTMASK 0x8000 /* mask of all RPC option flags */ +#define R_TIMING 0x00000001 /* timing request (in mntp) */ +#define R_CWND 0x00000002 /* request accounted for in congestion window */ +#define R_SOFTTERM 0x00000004 /* request terminated (e.g. soft mnt) */ +#define R_RESTART 0x00000008 /* RPC should be restarted. */ +#define R_INITTED 0x00000010 /* request has been initialized */ +#define R_TPRINTFMSG 0x00000020 /* Did a tprintf msg. */ +#define R_MUSTRESEND 0x00000040 /* Must resend request */ +#define R_ALLOCATED 0x00000080 /* request was allocated */ +#define R_SENT 0x00000100 /* request has been sent */ +#define R_WAITSENT 0x00000200 /* someone is waiting for request to be sent */ +#define R_RESENDERR 0x00000400 /* resend failed */ +#define R_JBTPRINTFMSG 0x00000800 /* Did a tprintf msg for jukebox error */ +#define R_ASYNC 0x00001000 /* async request */ +#define R_ASYNCWAIT 0x00002000 /* async request now being waited on */ +#define R_RESENDQ 0x00004000 /* async request currently on resendq */ +#define R_SENDING 0x00008000 /* request currently being sent */ + +#define R_RECOVER 0x40000000 /* a state recovery RPC - during NFSSTA_RECOVER */ +#define R_SETUP 0x80000000 /* a setup RPC - during (re)connection */ +#define R_OPTMASK 0xc0000000 /* mask of all RPC option flags */ /* Flag values for r_lflags */ #define RL_BUSY 0x0001 /* Locked. */ #define RL_WAITING 0x0002 /* Someone waiting for lock. */ #define RL_QUEUED 0x0004 /* request is on the queue */ -__private_extern__ u_long nfs_xid, nfs_xidwrap; -__private_extern__ int nfs_iosize, nfs_access_cache_timeout, nfs_allow_async, nfs_statfs_rate_limit; -__private_extern__ int nfs_lockd_mounts, nfs_lockd_request_sent; +__private_extern__ u_int32_t nfs_xid, nfs_xidwrap; +__private_extern__ int nfs_iosize, nfs_access_cache_timeout, nfs_access_delete, nfs_allow_async, nfs_statfs_rate_limit; +__private_extern__ int nfs_lockd_mounts, nfs_lockd_request_sent, nfs_single_des; __private_extern__ int nfs_tprintf_initial_delay, nfs_tprintf_delay; __private_extern__ int nfsiod_thread_count, nfsiod_thread_max, nfs_max_async_writes; @@ -890,7 +1022,7 @@ struct nfs_dulookup { * Network address hash list element */ union nethostaddr { - u_long had_inetaddr; + u_int32_t had_inetaddr; mbuf_t had_nam; }; @@ -915,7 +1047,7 @@ struct nfsrv_sock { int ns_cc; int ns_reclen; int ns_reccnt; - u_long ns_sref; + u_int32_t ns_sref; time_t ns_timestamp; /* socket timestamp */ lck_mtx_t ns_wgmutex; /* mutex for write gather fields */ u_quad_t ns_wgtime; /* next Write deadline (usec) */ @@ -991,7 +1123,7 @@ struct nfsrv_descript { int nd_vers; /* NFS version */ int nd_len; /* Length of this write */ int nd_repstat; /* Reply status */ - u_long nd_retxid; /* Reply xid */ + u_int32_t nd_retxid; /* Reply xid */ struct timeval nd_starttime; /* Time RPC initiated */ struct nfs_filehandle nd_fh; /* File handle */ uint32_t nd_sec; /* Security flavor */ @@ -1011,11 +1143,21 @@ __private_extern__ int nfsd_thread_count, nfsd_thread_max; __private_extern__ lck_mtx_t *nfs_request_mutex; __private_extern__ int nfs_request_timer_on; +/* mutex for nfs client globals */ +__private_extern__ lck_mtx_t *nfs_global_mutex; + +/* NFSv4 callback globals */ +__private_extern__ int nfs4_callback_timer_on; +__private_extern__ in_port_t nfs4_cb_port; + /* nfs timer call structures */ __private_extern__ thread_call_t nfs_request_timer_call; __private_extern__ thread_call_t nfs_buf_timer_call; +__private_extern__ thread_call_t nfs4_callback_timer_call; __private_extern__ thread_call_t nfsrv_deadsock_timer_call; +#if CONFIG_FSE __private_extern__ thread_call_t nfsrv_fmod_timer_call; +#endif __BEGIN_DECLS @@ -1029,20 +1171,31 @@ void nfs_nhinit(void); void nfs_nhinit_finish(void); u_long nfs_hash(u_char *, int); +int nfs4_init_clientid(struct nfsmount *); int nfs4_setclientid(struct nfsmount *); +int nfs4_renew(struct nfsmount *, int); void nfs4_renew_timer(void *, void *); -int nfs_connect(struct nfsmount *); +void nfs4_mount_callback_setup(struct nfsmount *); +void nfs4_mount_callback_shutdown(struct nfsmount *); +void nfs4_cb_accept(socket_t, void *, int); +void nfs4_cb_rcv(socket_t, void *, int); +void nfs4_callback_timer(void *, void *); + +int nfs_connect(struct nfsmount *, int); void nfs_disconnect(struct nfsmount *); +void nfs_need_reconnect(struct nfsmount *); void nfs_mount_sock_thread_wake(struct nfsmount *); +void nfs_mount_check_dead_timeout(struct nfsmount *); int nfs_getattr(nfsnode_t, struct nfs_vattr *, vfs_context_t, int); -int nfs_getattrcache(nfsnode_t, struct nfs_vattr *, int); +int nfs_getattrcache(nfsnode_t, struct nfs_vattr *); int nfs_loadattrcache(nfsnode_t, struct nfs_vattr *, u_int64_t *, int); int nfs_attrcachetimeout(nfsnode_t); int nfs_buf_page_inval(vnode_t vp, off_t offset); int nfs_vinvalbuf(vnode_t, int, vfs_context_t, int); int nfs_vinvalbuf2(vnode_t, int, thread_t, kauth_cred_t, int); +int nfs_vinvalbuf_internal(nfsnode_t, int, thread_t, kauth_cred_t, int, int); int nfs_request_create(nfsnode_t, mount_t, struct nfsm_chain *, int, thread_t, kauth_cred_t, struct nfsreq **); void nfs_request_destroy(struct nfsreq *); @@ -1054,10 +1207,13 @@ void nfs_request_wait(struct nfsreq *); int nfs_request_finish(struct nfsreq *, struct nfsm_chain *, int *); int nfs_request(nfsnode_t, mount_t, struct nfsm_chain *, int, vfs_context_t, struct nfsm_chain *, u_int64_t *, int *); int nfs_request2(nfsnode_t, mount_t, struct nfsm_chain *, int, thread_t, kauth_cred_t, int, struct nfsm_chain *, u_int64_t *, int *); +int nfs_request_gss(mount_t, struct nfsm_chain *, thread_t, kauth_cred_t, int, struct nfs_gss_clnt_ctx *, struct nfsm_chain *, int *); int nfs_request_async(nfsnode_t, mount_t, struct nfsm_chain *, int, thread_t, kauth_cred_t, struct nfsreq_cbinfo *cb, struct nfsreq **); int nfs_request_async_finish(struct nfsreq *, struct nfsm_chain *, u_int64_t *, int *); void nfs_request_async_cancel(struct nfsreq *); void nfs_request_timer(void *, void *); +int nfs_aux_request(struct nfsmount *, thread_t, struct sockaddr_in *, mbuf_t, uint32_t, int, int, struct nfsm_chain *); +void nfs_get_xid(uint64_t *); int nfs_sigintr(struct nfsmount *, struct nfsreq *, thread_t, int); int nfs_noremotehang(thread_t); @@ -1066,16 +1222,70 @@ int nfs_sndlock(struct nfsreq *); void nfs_sndunlock(struct nfsreq *); int nfs_lookitup(nfsnode_t, char *, int, vfs_context_t, nfsnode_t *); -void nfs_dulookup_init(struct nfs_dulookup *, nfsnode_t, const char *, int); +void nfs_dulookup_init(struct nfs_dulookup *, nfsnode_t, const char *, int, vfs_context_t); void nfs_dulookup_start(struct nfs_dulookup *, nfsnode_t, vfs_context_t); void nfs_dulookup_finish(struct nfs_dulookup *, nfsnode_t, vfs_context_t); +int nfs_dir_buf_cache_lookup(nfsnode_t, nfsnode_t *, struct componentname *, vfs_context_t, int); +int nfs_dir_buf_search(struct nfsbuf *, struct componentname *, fhandle_t *, struct nfs_vattr *, uint64_t *, time_t *, daddr64_t *, int); +void nfs_name_cache_purge(nfsnode_t, nfsnode_t, struct componentname *, vfs_context_t); int nfs_parsefattr(struct nfsm_chain *, int, struct nfs_vattr *); int nfs4_parsefattr(struct nfsm_chain *, struct nfs_fsattr *, struct nfs_vattr *, fhandle_t *, struct dqblk *); void nfs_vattr_set_supported(uint32_t *, struct vnode_attr *); void nfs3_pathconf_cache(struct nfsmount *, struct nfs_fsattr *); +void nfs3_umount_rpc(struct nfsmount *, vfs_context_t, int); int nfs_node_mode_slot(nfsnode_t, uid_t, int); +void nfs_avoid_needless_id_setting_on_create(nfsnode_t, struct vnode_attr *, vfs_context_t); +int nfs4_create_rpc(vfs_context_t, nfsnode_t, struct componentname *, struct vnode_attr *, int, char *, nfsnode_t *); +int nfs_open_state_set_busy(nfsnode_t, vfs_context_t); +void nfs_open_state_clear_busy(nfsnode_t); +struct nfs_open_owner *nfs_open_owner_find(struct nfsmount *, kauth_cred_t, int); +void nfs_open_owner_destroy(struct nfs_open_owner *); +void nfs_open_owner_ref(struct nfs_open_owner *); +void nfs_open_owner_rele(struct nfs_open_owner *); +int nfs_open_owner_set_busy(struct nfs_open_owner *, thread_t); +void nfs_open_owner_clear_busy(struct nfs_open_owner *); +void nfs_owner_seqid_increment(struct nfs_open_owner *, struct nfs_lock_owner *, int); +int nfs_open_file_find(nfsnode_t, struct nfs_open_owner *, struct nfs_open_file **, uint32_t, uint32_t, int); +void nfs_open_file_destroy(struct nfs_open_file *); +int nfs_open_file_set_busy(struct nfs_open_file *, thread_t); +void nfs_open_file_clear_busy(struct nfs_open_file *); +void nfs_get_stateid(nfsnode_t, thread_t, kauth_cred_t, nfs_stateid *); +int nfs4_open(nfsnode_t, struct nfs_open_file *, uint32_t, uint32_t, vfs_context_t); +int nfs4_close(nfsnode_t, struct nfs_open_file *, uint32_t, uint32_t, vfs_context_t); +int nfs4_check_for_locks(struct nfs_open_owner *, struct nfs_open_file *); +void nfs4_reopen(struct nfs_open_file *, thread_t); +int nfs4_open_rpc(struct nfs_open_file *, vfs_context_t, struct componentname *, struct vnode_attr *, vnode_t, vnode_t *, int, int, int); +int nfs4_open_rpc_internal(struct nfs_open_file *, vfs_context_t, thread_t, kauth_cred_t, struct componentname *, struct vnode_attr *, vnode_t, vnode_t *, int, int, int); +int nfs4_open_reopen_rpc(struct nfs_open_file *, thread_t, kauth_cred_t, struct componentname *, vnode_t, vnode_t *, int, int); +int nfs4_open_reclaim_rpc(struct nfs_open_file *, int, int); +int nfs4_open_downgrade_rpc(nfsnode_t, struct nfs_open_file *, vfs_context_t); +int nfs4_close_rpc(nfsnode_t, struct nfs_open_file *, thread_t, kauth_cred_t, int); +int nfs4_delegreturn_rpc(struct nfsmount *, u_char *, int, struct nfs_stateid *, thread_t, kauth_cred_t); +struct nfs_lock_owner *nfs_lock_owner_find(nfsnode_t, proc_t, int); +void nfs_lock_owner_destroy(struct nfs_lock_owner *); +void nfs_lock_owner_ref(struct nfs_lock_owner *); +void nfs_lock_owner_rele(struct nfs_lock_owner *); +int nfs_lock_owner_set_busy(struct nfs_lock_owner *, thread_t); +void nfs_lock_owner_clear_busy(struct nfs_lock_owner *); +void nfs_lock_owner_insert_held_lock(struct nfs_lock_owner *, struct nfs_file_lock *); +struct nfs_file_lock *nfs_file_lock_alloc(struct nfs_lock_owner *); +void nfs_file_lock_destroy(struct nfs_file_lock *); +int nfs_file_lock_conflict(struct nfs_file_lock *, struct nfs_file_lock *, int *); +int nfs4_lock_rpc(nfsnode_t, struct nfs_open_file *, struct nfs_file_lock *, int, thread_t, kauth_cred_t); +int nfs4_unlock_rpc(nfsnode_t, struct nfs_lock_owner *, int, uint64_t, uint64_t, vfs_context_t); +int nfs4_getlock(nfsnode_t, struct nfs_lock_owner *, struct flock *, uint64_t, uint64_t, vfs_context_t); +int nfs4_setlock(nfsnode_t, struct nfs_open_file *, struct nfs_lock_owner *, int, uint64_t, uint64_t, int, short, vfs_context_t); +int nfs4_unlock(nfsnode_t, struct nfs_open_file *, struct nfs_lock_owner *, uint64_t, uint64_t, int, vfs_context_t); + +int nfs_mount_state_in_use_start(struct nfsmount *); +int nfs_mount_state_in_use_end(struct nfsmount *, int); +int nfs_mount_state_error_should_restart(int); +uint nfs_mount_state_max_restarts(struct nfsmount *); +int nfs_mount_state_wait_for_recovery(struct nfsmount *); +void nfs4_recover(struct nfsmount *); + int nfs_vnop_access(struct vnop_access_args *); int nfs3_vnop_open(struct vnop_open_args *); @@ -1085,34 +1295,36 @@ int nfs4_vnop_create(struct vnop_create_args *); int nfs4_vnop_mknod(struct vnop_mknod_args *); int nfs4_vnop_open(struct vnop_open_args *); int nfs4_vnop_close(struct vnop_close_args *); +int nfs4_vnop_mmap(struct vnop_mmap_args *); +int nfs4_vnop_mnomap(struct vnop_mnomap_args *); int nfs4_vnop_getattr(struct vnop_getattr_args *); +int nfs4_vnop_read(struct vnop_read_args *); int nfs4_vnop_link(struct vnop_link_args *); int nfs4_vnop_mkdir(struct vnop_mkdir_args *); int nfs4_vnop_rmdir(struct vnop_rmdir_args *); int nfs4_vnop_symlink(struct vnop_symlink_args *); int nfs4_vnop_advlock(struct vnop_advlock_args *ap); -int nfs_read_rpc(nfsnode_t, struct uio *, vfs_context_t); -int nfs_write_rpc(nfsnode_t, struct uio *, vfs_context_t, int *, uint64_t *); -int nfs_write_rpc2(nfsnode_t, struct uio *, thread_t, kauth_cred_t, int *, uint64_t *); +int nfs_read_rpc(nfsnode_t, uio_t, vfs_context_t); +int nfs_write_rpc(nfsnode_t, uio_t, vfs_context_t, int *, uint64_t *); +int nfs_write_rpc2(nfsnode_t, uio_t, thread_t, kauth_cred_t, int *, uint64_t *); -int nfs3_access_rpc(nfsnode_t, u_long *, vfs_context_t); -int nfs4_access_rpc(nfsnode_t, u_long *, vfs_context_t); +int nfs3_access_rpc(nfsnode_t, u_int32_t *, vfs_context_t); +int nfs4_access_rpc(nfsnode_t, u_int32_t *, vfs_context_t); int nfs3_getattr_rpc(nfsnode_t, mount_t, u_char *, size_t, vfs_context_t, struct nfs_vattr *, u_int64_t *); int nfs4_getattr_rpc(nfsnode_t, mount_t, u_char *, size_t, vfs_context_t, struct nfs_vattr *, u_int64_t *); -int nfs3_setattr_rpc(nfsnode_t, struct vnode_attr *, vfs_context_t, int); -int nfs4_setattr_rpc(nfsnode_t, struct vnode_attr *, vfs_context_t, int); +int nfs3_setattr_rpc(nfsnode_t, struct vnode_attr *, vfs_context_t); +int nfs4_setattr_rpc(nfsnode_t, struct vnode_attr *, vfs_context_t); int nfs3_read_rpc_async(nfsnode_t, off_t, size_t, thread_t, kauth_cred_t, struct nfsreq_cbinfo *, struct nfsreq **); int nfs4_read_rpc_async(nfsnode_t, off_t, size_t, thread_t, kauth_cred_t, struct nfsreq_cbinfo *, struct nfsreq **); -int nfs3_read_rpc_async_finish(nfsnode_t, struct nfsreq *, struct uio *, size_t *, int *); -int nfs4_read_rpc_async_finish(nfsnode_t, struct nfsreq *, struct uio *, size_t *, int *); -int nfs3_write_rpc_async(nfsnode_t, struct uio *, size_t, thread_t, kauth_cred_t, int, struct nfsreq_cbinfo *, struct nfsreq **); -int nfs4_write_rpc_async(nfsnode_t, struct uio *, size_t, thread_t, kauth_cred_t, int, struct nfsreq_cbinfo *, struct nfsreq **); +int nfs3_read_rpc_async_finish(nfsnode_t, struct nfsreq *, uio_t, size_t *, int *); +int nfs4_read_rpc_async_finish(nfsnode_t, struct nfsreq *, uio_t, size_t *, int *); +int nfs3_write_rpc_async(nfsnode_t, uio_t, size_t, thread_t, kauth_cred_t, int, struct nfsreq_cbinfo *, struct nfsreq **); +int nfs4_write_rpc_async(nfsnode_t, uio_t, size_t, thread_t, kauth_cred_t, int, struct nfsreq_cbinfo *, struct nfsreq **); int nfs3_write_rpc_async_finish(nfsnode_t, struct nfsreq *, int *, size_t *, uint64_t *); int nfs4_write_rpc_async_finish(nfsnode_t, struct nfsreq *, int *, size_t *, uint64_t *); -int nfs3_readdir_rpc(nfsnode_t, struct uio *, vfs_context_t); -int nfs3_readdirplus_rpc(nfsnode_t, struct uio *, vfs_context_t); -int nfs4_readdir_rpc(nfsnode_t, struct uio *, vfs_context_t); +int nfs3_readdir_rpc(nfsnode_t, struct nfsbuf *, vfs_context_t); +int nfs4_readdir_rpc(nfsnode_t, struct nfsbuf *, vfs_context_t); int nfs3_readlink_rpc(nfsnode_t, char *, uint32_t *, vfs_context_t); int nfs4_readlink_rpc(nfsnode_t, char *, uint32_t *, vfs_context_t); int nfs3_commit_rpc(nfsnode_t, u_int64_t, u_int64_t, kauth_cred_t); @@ -1140,7 +1352,9 @@ int nfsrv_export(struct user_nfs_export_args *, vfs_context_t); int nfsrv_fhmatch(struct nfs_filehandle *, struct nfs_filehandle *); int nfsrv_fhtovp(struct nfs_filehandle *, struct nfsrv_descript *, vnode_t *, struct nfs_export **, struct nfs_export_options **); +#if CONFIG_FSE void nfsrv_fmod_timer(void *, void *); +#endif int nfsrv_getcache(struct nfsrv_descript *, struct nfsrv_sock *, mbuf_t *); void nfsrv_group_sort(gid_t *, int); void nfsrv_init(void); @@ -1189,6 +1403,7 @@ int nfsrv_write(struct nfsrv_descript *, struct nfsrv_sock *, vfs_context_t, mbu void nfs_interval_timer_start(thread_call_t, int); void nfs_up(struct nfsmount *, thread_t, int, const char *); void nfs_down(struct nfsmount *, thread_t, int, int, const char *); +int nfs_msg(thread_t, const char *, const char *, int); int nfs_mountroot(void); struct nfs_diskless; diff --git a/bsd/nfs/nfs4_subs.c b/bsd/nfs/nfs4_subs.c index 977748800..6b1786cda 100644 --- a/bsd/nfs/nfs4_subs.c +++ b/bsd/nfs/nfs4_subs.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2006-2007 Apple Inc. All rights reserved. + * Copyright (c) 2006-2009 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -44,7 +44,6 @@ #include #include #include -#include #include #include #include @@ -72,71 +71,187 @@ /* - * NFSv4 SETCLIENTID + * Create the unique client ID to use for this mount. + * + * Format: unique ID + en0_address + server_address + mntfromname + mntonname + * + * We could possibly use one client ID for all mounts of the same server; + * however, that would complicate some aspects of state management. + * + * Each mount socket connection sends a SETCLIENTID. If the ID is the same but + * the verifier (mounttime) changes, then all previous (mounts') state gets dropped. + * + * State is typically managed per-mount and in order to keep it that way + * each mount needs to use a separate client ID. However, we also need to + * make sure that each mount uses the same client ID each time. + * + * In an attempt to differentiate mounts we include the mntfromname and mntonname + * strings to the client ID (as long as they fit). We also make sure that the + * value does not conflict with any existing values in use. */ int -nfs4_setclientid(struct nfsmount *nmp) +nfs4_init_clientid(struct nfsmount *nmp) { + struct nfs_client_id *ncip, *ncip2; struct sockaddr *saddr; - uint64_t verifier; - char id[128]; - int idlen, len, error = 0, status, numops; - u_int64_t xid; - vfs_context_t ctx; - thread_t thd; - kauth_cred_t cred; - struct nfsm_chain nmreq, nmrep; + int error, len, len2, cmp; + struct vfsstatfs *vsfs; static uint8_t en0addr[6]; static uint8_t en0addr_set = 0; - lck_mtx_lock(nfs_request_mutex); + lck_mtx_lock(nfs_global_mutex); if (!en0addr_set) { ifnet_t interface = NULL; error = ifnet_find_by_name("en0", &interface); if (!error) error = ifnet_lladdr_copy_bytes(interface, en0addr, sizeof(en0addr)); if (error) - printf("nfs4_setclientid: error getting en0 address, %d\n", error); + printf("nfs4_init_clientid: error getting en0 address, %d\n", error); if (!error) en0addr_set = 1; - error = 0; if (interface) ifnet_release(interface); } - lck_mtx_unlock(nfs_request_mutex); + lck_mtx_unlock(nfs_global_mutex); - ctx = vfs_context_kernel(); /* XXX */ - thd = vfs_context_thread(ctx); - cred = vfs_context_ucred(ctx); + MALLOC(ncip, struct nfs_client_id *, sizeof(struct nfs_client_id), M_TEMP, M_WAITOK); + if (!ncip) + return (ENOMEM); + + vsfs = vfs_statfs(nmp->nm_mountp); + saddr = mbuf_data(nmp->nm_nam); + ncip->nci_idlen = sizeof(uint32_t) + sizeof(en0addr) + saddr->sa_len + + strlen(vsfs->f_mntfromname) + 1 + strlen(vsfs->f_mntonname) + 1; + if (ncip->nci_idlen > NFS4_OPAQUE_LIMIT) + ncip->nci_idlen = NFS4_OPAQUE_LIMIT; + MALLOC(ncip->nci_id, char *, ncip->nci_idlen, M_TEMP, M_WAITOK); + if (!ncip->nci_id) { + FREE(ncip, M_TEMP); + return (ENOMEM); + } + + *(uint32_t*)ncip->nci_id = 0; + len = sizeof(uint32_t); + len2 = min(sizeof(en0addr), ncip->nci_idlen-len); + bcopy(en0addr, &ncip->nci_id[len], len2); + len += sizeof(en0addr); + len2 = min(saddr->sa_len, ncip->nci_idlen-len); + bcopy(saddr, &ncip->nci_id[len], len2); + len += len2; + if (len < ncip->nci_idlen) { + len2 = strlcpy(&ncip->nci_id[len], vsfs->f_mntfromname, ncip->nci_idlen-len); + if (len2 < (ncip->nci_idlen - len)) + len += len2 + 1; + else + len = ncip->nci_idlen; + } + if (len < ncip->nci_idlen) { + len2 = strlcpy(&ncip->nci_id[len], vsfs->f_mntonname, ncip->nci_idlen-len); + if (len2 < (ncip->nci_idlen - len)) + len += len2 + 1; + else + len = ncip->nci_idlen; + } + + /* make sure the ID is unique, and add it to the sorted list */ + lck_mtx_lock(nfs_global_mutex); + TAILQ_FOREACH(ncip2, &nfsclientids, nci_link) { + if (ncip->nci_idlen > ncip2->nci_idlen) + continue; + if (ncip->nci_idlen < ncip2->nci_idlen) + break; + cmp = bcmp(ncip->nci_id + sizeof(uint32_t), + ncip2->nci_id + sizeof(uint32_t), + ncip->nci_idlen - sizeof(uint32_t)); + if (cmp > 0) + continue; + if (cmp < 0) + break; + if (*(uint32_t*)ncip->nci_id > *(uint32_t*)ncip2->nci_id) + continue; + if (*(uint32_t*)ncip->nci_id < *(uint32_t*)ncip2->nci_id) + break; + *(uint32_t*)ncip->nci_id += 1; + } + if (*(uint32_t*)ncip->nci_id) + printf("nfs client ID collision (%d) for %s on %s\n", *(uint32_t*)ncip->nci_id, + vsfs->f_mntfromname, vsfs->f_mntonname); + if (ncip2) + TAILQ_INSERT_BEFORE(ncip2, ncip, nci_link); + else + TAILQ_INSERT_TAIL(&nfsclientids, ncip, nci_link); + nmp->nm_longid = ncip; + lck_mtx_unlock(nfs_global_mutex); + + return (0); +} + +/* + * NFSv4 SETCLIENTID + */ +int +nfs4_setclientid(struct nfsmount *nmp) +{ + uint64_t verifier, xid; + int error = 0, status, numops; + uint32_t bitmap[NFS_ATTR_BITMAP_LEN]; + thread_t thd; + kauth_cred_t cred; + struct nfsm_chain nmreq, nmrep; + struct sockaddr_in sin; + uint8_t *addr; + char raddr[32]; + int ralen = 0; + + thd = current_thread(); + cred = IS_VALID_CRED(nmp->nm_mcred) ? nmp->nm_mcred : vfs_context_ucred(vfs_context_kernel()); + kauth_cred_ref(cred); nfsm_chain_null(&nmreq); nfsm_chain_null(&nmrep); - /* ID: en0_address + server_address */ - idlen = len = sizeof(en0addr); - bcopy(en0addr, &id[0], len); - saddr = mbuf_data(nmp->nm_nam); - len = min(saddr->sa_len, sizeof(id)-idlen); - bcopy(saddr, &id[idlen], len); - idlen += len; + if (!nmp->nm_longid) + error = nfs4_init_clientid(nmp); // SETCLIENTID numops = 1; - nfsm_chain_build_alloc_init(error, &nmreq, 14 * NFSX_UNSIGNED + idlen); - nfsm_chain_add_compound_header(error, &nmreq, "setclientid", numops); + nfsm_chain_build_alloc_init(error, &nmreq, 14 * NFSX_UNSIGNED + nmp->nm_longid->nci_idlen); + nfsm_chain_add_compound_header(error, &nmreq, "setclid", numops); numops--; nfsm_chain_add_32(error, &nmreq, NFS_OP_SETCLIENTID); /* nfs_client_id4 client; */ nfsm_chain_add_64(error, &nmreq, nmp->nm_mounttime); - nfsm_chain_add_32(error, &nmreq, idlen); - nfsm_chain_add_opaque(error, &nmreq, id, idlen); + nfsm_chain_add_32(error, &nmreq, nmp->nm_longid->nci_idlen); + nfsm_chain_add_opaque(error, &nmreq, nmp->nm_longid->nci_id, nmp->nm_longid->nci_idlen); /* cb_client4 callback; */ - /* We don't provide callback info yet */ - nfsm_chain_add_32(error, &nmreq, 0); /* callback program */ - nfsm_chain_add_string(error, &nmreq, "", 0); /* callback r_netid */ - nfsm_chain_add_string(error, &nmreq, "", 0); /* callback r_addr */ - nfsm_chain_add_32(error, &nmreq, 0); /* callback_ident */ + if (nmp->nm_cbid && nfs4_cb_port && + !(error = sock_getsockname(nmp->nm_so, (struct sockaddr*)&sin, sizeof(sin)))) { + /* assemble r_addr = h1.h2.h3.h4.p1.p2 */ + /* h = source address of nmp->nm_so */ + /* p = nfs4_cb_port */ + addr = (uint8_t*)&sin.sin_addr.s_addr; + ralen = snprintf(raddr, sizeof(raddr), "%d.%d.%d.%d.%d.%d", + addr[0], addr[1], addr[2], addr[3], + ((nfs4_cb_port >> 8) & 0xff), + (nfs4_cb_port & 0xff)); + /* make sure it fit, give up if it didn't */ + if (ralen >= (int)sizeof(raddr)) + ralen = 0; + } + if (ralen > 0) { + /* add callback info */ + nfsm_chain_add_32(error, &nmreq, NFS4_CALLBACK_PROG); /* callback program */ + nfsm_chain_add_string(error, &nmreq, "tcp", 3); /* callback r_netid */ + nfsm_chain_add_string(error, &nmreq, raddr, ralen); /* callback r_addr */ + nfsm_chain_add_32(error, &nmreq, nmp->nm_cbid); /* callback_ident */ + } else { + /* don't provide valid callback info */ + nfsm_chain_add_32(error, &nmreq, 0); /* callback program */ + nfsm_chain_add_string(error, &nmreq, "", 0); /* callback r_netid */ + nfsm_chain_add_string(error, &nmreq, "", 0); /* callback r_addr */ + nfsm_chain_add_32(error, &nmreq, 0); /* callback_ident */ + } nfsm_chain_build_done(error, &nmreq); nfsm_assert(error, (numops == 0), EPROTO); nfsmout_if(error); @@ -152,14 +267,25 @@ nfs4_setclientid(struct nfsmount *nmp) nfsm_chain_cleanup(&nmreq); nfsm_chain_cleanup(&nmrep); - // SETCLIENTID_CONFIRM - numops = 1; - nfsm_chain_build_alloc_init(error, &nmreq, 13 * NFSX_UNSIGNED); - nfsm_chain_add_compound_header(error, &nmreq, "setclientid_confirm", numops); + // SETCLIENTID_CONFIRM, PUTFH, GETATTR(FS) + numops = nmp->nm_dnp ? 3 : 1; + nfsm_chain_build_alloc_init(error, &nmreq, 28 * NFSX_UNSIGNED); + nfsm_chain_add_compound_header(error, &nmreq, "setclid_conf", numops); numops--; nfsm_chain_add_32(error, &nmreq, NFS_OP_SETCLIENTID_CONFIRM); nfsm_chain_add_64(error, &nmreq, nmp->nm_clientid); nfsm_chain_add_64(error, &nmreq, verifier); + if (nmp->nm_dnp) { + /* refresh fs attributes too */ + numops--; + nfsm_chain_add_32(error, &nmreq, NFS_OP_PUTFH); + nfsm_chain_add_fh(error, &nmreq, nmp->nm_vers, nmp->nm_dnp->n_fhp, nmp->nm_dnp->n_fhsize); + numops--; + nfsm_chain_add_32(error, &nmreq, NFS_OP_GETATTR); + NFS_CLEAR_ATTRIBUTES(bitmap); + NFS4_PER_FS_ATTRIBUTES(bitmap); + nfsm_chain_add_bitmap(error, &nmreq, bitmap, NFS_ATTR_BITMAP_LEN); + } nfsm_chain_build_done(error, &nmreq); nfsm_assert(error, (numops == 0), EPROTO); nfsmout_if(error); @@ -169,27 +295,37 @@ nfs4_setclientid(struct nfsmount *nmp) nfsm_chain_op_check(error, &nmrep, NFS_OP_SETCLIENTID_CONFIRM); if (error) printf("nfs4_setclientid: confirm error %d\n", error); + if (nmp->nm_dnp) { + nfsm_chain_op_check(error, &nmrep, NFS_OP_PUTFH); + nfsm_chain_op_check(error, &nmrep, NFS_OP_GETATTR); + nfsmout_if(error); + lck_mtx_lock(&nmp->nm_lock); + error = nfs4_parsefattr(&nmrep, &nmp->nm_fsattr, NULL, NULL, NULL); + lck_mtx_unlock(&nmp->nm_lock); + } + nfsmout: nfsm_chain_cleanup(&nmreq); nfsm_chain_cleanup(&nmrep); + kauth_cred_unref(&cred); if (error) printf("nfs4_setclientid failed, %d\n", error); return (error); } /* - * periodic timer to renew lease state on server + * renew/check lease state on server */ -void -nfs4_renew_timer(void *param0, __unused void *param1) +int +nfs4_renew(struct nfsmount *nmp, int rpcflag) { - struct nfsmount *nmp = param0; - int error = 0, status, numops, interval; + int error = 0, status, numops; u_int64_t xid; - vfs_context_t ctx; struct nfsm_chain nmreq, nmrep; + kauth_cred_t cred; - ctx = vfs_context_kernel(); /* XXX */ + cred = IS_VALID_CRED(nmp->nm_mcred) ? nmp->nm_mcred : vfs_context_ucred(vfs_context_kernel()); + kauth_cred_ref(cred); nfsm_chain_null(&nmreq); nfsm_chain_null(&nmrep); @@ -204,19 +340,55 @@ nfs4_renew_timer(void *param0, __unused void *param1) nfsm_chain_build_done(error, &nmreq); nfsm_assert(error, (numops == 0), EPROTO); nfsmout_if(error); - error = nfs_request(NULL, nmp->nm_mountp, &nmreq, NFSPROC4_COMPOUND, ctx, &nmrep, &xid, &status); + error = nfs_request2(NULL, nmp->nm_mountp, &nmreq, NFSPROC4_COMPOUND, + current_thread(), cred, rpcflag, &nmrep, &xid, &status); nfsm_chain_skip_tag(error, &nmrep); nfsm_chain_get_32(error, &nmrep, numops); nfsm_chain_op_check(error, &nmrep, NFS_OP_RENEW); nfsmout: - if (error) - printf("nfs4_renew_timer: error %d\n", error); nfsm_chain_cleanup(&nmreq); nfsm_chain_cleanup(&nmrep); + kauth_cred_unref(&cred); + return (error); +} + + +/* + * periodic timer to renew lease state on server + */ +void +nfs4_renew_timer(void *param0, __unused void *param1) +{ + struct nfsmount *nmp = param0; + u_int64_t clientid; + int error = 0, interval; + + lck_mtx_lock(&nmp->nm_lock); + clientid = nmp->nm_clientid; + if ((nmp->nm_state & NFSSTA_RECOVER) || !(nmp->nm_sockflags & NMSOCK_READY)) { + lck_mtx_unlock(&nmp->nm_lock); + goto out; + } + lck_mtx_unlock(&nmp->nm_lock); + + error = nfs4_renew(nmp, R_RECOVER); +out: + if (error == ETIMEDOUT) + nfs_need_reconnect(nmp); + else if (error) + printf("nfs4_renew_timer: error %d\n", error); + lck_mtx_lock(&nmp->nm_lock); + if (error && (error != ETIMEDOUT) && + (nmp->nm_clientid == clientid) && !(nmp->nm_state & NFSSTA_RECOVER)) { + printf("nfs4_renew_timer: error %d, initiating recovery\n", error); + nmp->nm_state |= NFSSTA_RECOVER; + nfs_mount_sock_thread_wake(nmp); + } interval = nmp->nm_fsattr.nfsa_lease / (error ? 4 : 2); - if (interval < 1) + if ((interval < 1) || (nmp->nm_state & NFSSTA_RECOVER)) interval = 1; + lck_mtx_unlock(&nmp->nm_lock); nfs_interval_timer_start(nmp->nm_renew_timer, interval * 1000); } @@ -327,7 +499,7 @@ nfs4_parsefattr( if (val & ~0xff) printf("nfs: warning unknown fh type: 0x%x\n", val); nfsap->nfsa_flags &= ~NFS_FSFLAG_FHTYPE_MASK; - nfsap->nfsa_flags |= val << 24; + nfsap->nfsa_flags |= val << NFS_FSFLAG_FHTYPE_SHIFT; attrbytes -= NFSX_UNSIGNED; } if (NFS_BITMAP_ISSET(bitmap, NFS_FATTR_CHANGE)) { @@ -551,7 +723,8 @@ nfs4_parsefattr( nvap->nva_nlink = val; attrbytes -= NFSX_UNSIGNED; } - if (NFS_BITMAP_ISSET(bitmap, NFS_FATTR_OWNER)) { /* XXX ugly hack for now */ + if (NFS_BITMAP_ISSET(bitmap, NFS_FATTR_OWNER)) { + /* XXX Need ID mapping infrastructure - use ugly hack for now */ nfsm_chain_get_32(error, nmc, len); nfsm_chain_get_opaque_pointer(error, nmc, len, s); attrbytes -= NFSX_UNSIGNED + nfsm_rndup(len); @@ -565,7 +738,8 @@ nfs4_parsefattr( else nvap->nva_uid = 99; /* unknown */ } - if (NFS_BITMAP_ISSET(bitmap, NFS_FATTR_OWNER_GROUP)) { /* XXX ugly hack for now */ + if (NFS_BITMAP_ISSET(bitmap, NFS_FATTR_OWNER_GROUP)) { + /* XXX Need ID mapping infrastructure - use ugly hack for now */ nfsm_chain_get_32(error, nmc, len); nfsm_chain_get_opaque_pointer(error, nmc, len, s); attrbytes -= NFSX_UNSIGNED + nfsm_rndup(len); @@ -746,22 +920,34 @@ nfsm_chain_add_fattr4_f(struct nfsm_chain *nmc, struct vnode_attr *vap, struct n attrbytes += NFSX_UNSIGNED; } if (NFS_BITMAP_ISSET(bitmap, NFS_FATTR_OWNER)) { - slen = snprintf(s, sizeof(s), "%d", vap->va_uid); + /* XXX Need ID mapping infrastructure - use ugly hack for now */ + if (vap->va_uid == 0) + slen = snprintf(s, sizeof(s), "root@localdomain"); + else if (vap->va_uid == (uid_t)-2) + slen = snprintf(s, sizeof(s), "nobody@localdomain"); + else + slen = snprintf(s, sizeof(s), "%d", vap->va_uid); nfsm_chain_add_string(error, nmc, s, slen); attrbytes += NFSX_UNSIGNED + nfsm_rndup(slen); } if (NFS_BITMAP_ISSET(bitmap, NFS_FATTR_OWNER_GROUP)) { - slen = snprintf(s, sizeof(s), "%d", vap->va_gid); + /* XXX Need ID mapping infrastructure - use ugly hack for now */ + if (vap->va_gid == 0) + slen = snprintf(s, sizeof(s), "root@localdomain"); + else if (vap->va_gid == (gid_t)-2) + slen = snprintf(s, sizeof(s), "nobody@localdomain"); + else + slen = snprintf(s, sizeof(s), "%d", vap->va_gid); nfsm_chain_add_string(error, nmc, s, slen); attrbytes += NFSX_UNSIGNED + nfsm_rndup(slen); } // NFS_BITMAP_SET(bitmap, NFS_FATTR_SYSTEM) if (NFS_BITMAP_ISSET(bitmap, NFS_FATTR_TIME_ACCESS_SET)) { if (vap->va_vaflags & VA_UTIMES_NULL) { - nfsm_chain_add_32(error, nmc, NFS_TIME_SET_TO_SERVER); + nfsm_chain_add_32(error, nmc, NFS4_TIME_SET_TO_SERVER); attrbytes += NFSX_UNSIGNED; } else { - nfsm_chain_add_32(error, nmc, NFS_TIME_SET_TO_CLIENT); + nfsm_chain_add_32(error, nmc, NFS4_TIME_SET_TO_CLIENT); nfsm_chain_add_64(error, nmc, vap->va_access_time.tv_sec); nfsm_chain_add_32(error, nmc, vap->va_access_time.tv_nsec); attrbytes += 4*NFSX_UNSIGNED; @@ -779,10 +965,10 @@ nfsm_chain_add_fattr4_f(struct nfsm_chain *nmc, struct vnode_attr *vap, struct n } if (NFS_BITMAP_ISSET(bitmap, NFS_FATTR_TIME_MODIFY_SET)) { if (vap->va_vaflags & VA_UTIMES_NULL) { - nfsm_chain_add_32(error, nmc, NFS_TIME_SET_TO_SERVER); + nfsm_chain_add_32(error, nmc, NFS4_TIME_SET_TO_SERVER); attrbytes += NFSX_UNSIGNED; } else { - nfsm_chain_add_32(error, nmc, NFS_TIME_SET_TO_CLIENT); + nfsm_chain_add_32(error, nmc, NFS4_TIME_SET_TO_CLIENT); nfsm_chain_add_64(error, nmc, vap->va_modify_time.tv_sec); nfsm_chain_add_32(error, nmc, vap->va_modify_time.tv_nsec); attrbytes += 4*NFSX_UNSIGNED; @@ -795,3 +981,204 @@ nfsm_chain_add_fattr4_f(struct nfsm_chain *nmc, struct vnode_attr *vap, struct n return (error); } +/* + * Recover state for an NFS mount. + * + * Iterates over all open files, reclaiming opens and lock state. + */ +void +nfs4_recover(struct nfsmount *nmp) +{ + struct timespec ts = { 1, 0 }; + int error, lost, reopen; + struct nfs_open_owner *noop; + struct nfs_open_file *nofp; + struct nfs_file_lock *nflp, *nextnflp; + struct nfs_lock_owner *nlop; + thread_t thd = current_thread(); + +restart: + error = 0; + lck_mtx_lock(&nmp->nm_lock); + /* + * First, wait for the state inuse count to go to zero so + * we know there are no state operations in progress. + */ + do { + if ((error = nfs_sigintr(nmp, NULL, NULL, 1))) + break; + if (!(nmp->nm_sockflags & NMSOCK_READY)) + error = EPIPE; + if (nmp->nm_state & NFSSTA_FORCE) + error = ENXIO; + if (nmp->nm_sockflags & NMSOCK_UNMOUNT) + error = ENXIO; + if (error) + break; + if (nmp->nm_stateinuse) + msleep(&nmp->nm_stateinuse, &nmp->nm_lock, (PZERO-1), "nfsrecoverstartwait", &ts); + } while (nmp->nm_stateinuse); + if (error) { + if (error == EPIPE) + printf("nfs recovery reconnecting\n"); + else + printf("nfs recovery aborted\n"); + lck_mtx_unlock(&nmp->nm_lock); + return; + } + + printf("nfs recovery started\n"); + if (++nmp->nm_stategenid == 0) + ++nmp->nm_stategenid; + lck_mtx_unlock(&nmp->nm_lock); + + /* for each open owner... */ + TAILQ_FOREACH(noop, &nmp->nm_open_owners, noo_link) { + /* for each of its opens... */ + TAILQ_FOREACH(nofp, &noop->noo_opens, nof_oolink) { + if (!nofp->nof_access || (nofp->nof_flags & NFS_OPEN_FILE_LOST)) + continue; + lost = reopen = 0; + if (nofp->nof_rw_drw) + error = nfs4_open_reclaim_rpc(nofp, NFS_OPEN_SHARE_ACCESS_BOTH, NFS_OPEN_SHARE_DENY_BOTH); + if (!error && nofp->nof_w_drw) + error = nfs4_open_reclaim_rpc(nofp, NFS_OPEN_SHARE_ACCESS_WRITE, NFS_OPEN_SHARE_DENY_BOTH); + if (!error && nofp->nof_r_drw) + error = nfs4_open_reclaim_rpc(nofp, NFS_OPEN_SHARE_ACCESS_READ, NFS_OPEN_SHARE_DENY_BOTH); + if (!error && nofp->nof_rw_dw) + error = nfs4_open_reclaim_rpc(nofp, NFS_OPEN_SHARE_ACCESS_BOTH, NFS_OPEN_SHARE_DENY_WRITE); + if (!error && nofp->nof_w_dw) + error = nfs4_open_reclaim_rpc(nofp, NFS_OPEN_SHARE_ACCESS_WRITE, NFS_OPEN_SHARE_DENY_WRITE); + if (!error && nofp->nof_r_dw) + error = nfs4_open_reclaim_rpc(nofp, NFS_OPEN_SHARE_ACCESS_READ, NFS_OPEN_SHARE_DENY_WRITE); + /* + * deny-none opens with no locks can just be reopened (later) if reclaim fails. + */ + if (!error && nofp->nof_rw) { + error = nfs4_open_reclaim_rpc(nofp, NFS_OPEN_SHARE_ACCESS_BOTH, NFS_OPEN_SHARE_DENY_NONE); + if ((error == NFSERR_ADMIN_REVOKED) || (error == NFSERR_EXPIRED) || (error == NFSERR_NO_GRACE)) + reopen = 1; + } + if (!error && nofp->nof_w) { + error = nfs4_open_reclaim_rpc(nofp, NFS_OPEN_SHARE_ACCESS_WRITE, NFS_OPEN_SHARE_DENY_NONE); + if ((error == NFSERR_ADMIN_REVOKED) || (error == NFSERR_EXPIRED) || (error == NFSERR_NO_GRACE)) + reopen = 1; + } + if (!error && nofp->nof_r) { + error = nfs4_open_reclaim_rpc(nofp, NFS_OPEN_SHARE_ACCESS_READ, NFS_OPEN_SHARE_DENY_NONE); + if ((error == NFSERR_ADMIN_REVOKED) || (error == NFSERR_EXPIRED) || (error == NFSERR_NO_GRACE)) + reopen = 1; + } + + if (error) { + /* restart recovery? */ + if ((error == ETIMEDOUT) || nfs_mount_state_error_should_restart(error)) { + if (error == ETIMEDOUT) + nfs_need_reconnect(nmp); + tsleep(&lbolt, (PZERO-1), "nfsrecoverrestart", 0); + printf("nfs recovery restarting %d\n", error); + goto restart; + } + if (reopen && (nfs4_check_for_locks(noop, nofp) == 0)) { + /* just reopen the file on next access */ + const char *vname = vnode_getname(NFSTOV(nofp->nof_np)); + printf("nfs4_recover: %d, need reopen for %s\n", error, vname ? vname : "???"); + vnode_putname(vname); + lck_mtx_lock(&nofp->nof_lock); + nofp->nof_flags |= NFS_OPEN_FILE_REOPEN; + lck_mtx_unlock(&nofp->nof_lock); + error = 0; + } else { + /* open file state lost */ + lost = 1; + error = 0; + lck_mtx_lock(&nofp->nof_lock); + nofp->nof_flags &= ~NFS_OPEN_FILE_REOPEN; + lck_mtx_unlock(&nofp->nof_lock); + } + } else { + /* no error, so make sure the reopen flag isn't set */ + lck_mtx_lock(&nofp->nof_lock); + nofp->nof_flags &= ~NFS_OPEN_FILE_REOPEN; + lck_mtx_unlock(&nofp->nof_lock); + } + /* + * Scan this node's lock owner list for entries with this open owner, + * then walk the lock owner's held lock list recovering each lock. + */ +rescanlocks: + TAILQ_FOREACH(nlop, &nofp->nof_np->n_lock_owners, nlo_link) { + if (nlop->nlo_open_owner != noop) + continue; + TAILQ_FOREACH_SAFE(nflp, &nlop->nlo_locks, nfl_lolink, nextnflp) { + if (nflp->nfl_flags & (NFS_FILE_LOCK_DEAD|NFS_FILE_LOCK_BLOCKED)) + continue; + if (!lost) { + error = nfs4_lock_rpc(nofp->nof_np, nofp, nflp, 1, thd, noop->noo_cred); + if (!error) + continue; + /* restart recovery? */ + if ((error == ETIMEDOUT) || nfs_mount_state_error_should_restart(error)) { + if (error == ETIMEDOUT) + nfs_need_reconnect(nmp); + tsleep(&lbolt, (PZERO-1), "nfsrecoverrestart", 0); + printf("nfs recovery restarting %d\n", error); + goto restart; + } + /* lock state lost - attempt to close file */ + lost = 1; + error = nfs4_close_rpc(nofp->nof_np, nofp, NULL, noop->noo_cred, R_RECOVER); + if ((error == ETIMEDOUT) || nfs_mount_state_error_should_restart(error)) { + if (error == ETIMEDOUT) + nfs_need_reconnect(nmp); + tsleep(&lbolt, (PZERO-1), "nfsrecoverrestart", 0); + printf("nfs recovery restarting %d\n", error); + goto restart; + } + error = 0; + /* rescan locks so we can drop them all */ + goto rescanlocks; + } + if (lost) { + /* kill/remove the lock */ + lck_mtx_lock(&nofp->nof_np->n_openlock); + nflp->nfl_flags |= NFS_FILE_LOCK_DEAD; + lck_mtx_lock(&nlop->nlo_lock); + nextnflp = TAILQ_NEXT(nflp, nfl_lolink); + TAILQ_REMOVE(&nlop->nlo_locks, nflp, nfl_lolink); + lck_mtx_unlock(&nlop->nlo_lock); + if (nflp->nfl_blockcnt) { + /* wake up anyone blocked on this lock */ + wakeup(nflp); + } else { + /* remove nflp from lock list and destroy */ + TAILQ_REMOVE(&nofp->nof_np->n_locks, nflp, nfl_link); + nfs_file_lock_destroy(nflp); + } + lck_mtx_unlock(&nofp->nof_np->n_openlock); + } + } + } + if (lost) { + /* revoke open file state */ + lck_mtx_lock(&nofp->nof_lock); + nofp->nof_flags |= NFS_OPEN_FILE_LOST; + lck_mtx_unlock(&nofp->nof_lock); + const char *vname = vnode_getname(NFSTOV(nofp->nof_np)); + printf("nfs4_recover: state lost for %s\n", vname ? vname : "???"); + vnode_putname(vname); + } + } + } + + if (!error) { + lck_mtx_lock(&nmp->nm_lock); + nmp->nm_state &= ~NFSSTA_RECOVER; + wakeup(&nmp->nm_state); + printf("nfs recovery completed\n"); + lck_mtx_unlock(&nmp->nm_lock); + } else { + printf("nfs recovery failed %d\n", error); + } +} + diff --git a/bsd/nfs/nfs4_vnops.c b/bsd/nfs/nfs4_vnops.c index d8247eafc..ffd12d88f 100644 --- a/bsd/nfs/nfs4_vnops.c +++ b/bsd/nfs/nfs4_vnops.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2006-2007 Apple Inc. All rights reserved. + * Copyright (c) 2006-2009 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -46,7 +46,7 @@ #include #include #include -#include +#include #include @@ -77,15 +77,14 @@ #include #include - int -nfs4_access_rpc(nfsnode_t np, u_long *mode, vfs_context_t ctx) +nfs4_access_rpc(nfsnode_t np, u_int32_t *mode, vfs_context_t ctx) { - int error = 0, status, numops, slot; + int error = 0, lockerror = ENOENT, status, numops, slot; u_int64_t xid; struct nfsm_chain nmreq, nmrep; struct timeval now; - uint32_t access, supported = 0, missing; + uint32_t access = 0, supported = 0, missing; struct nfsmount *nmp = NFSTONMP(np); int nfsvers = nmp->nm_vers; uid_t uid; @@ -93,7 +92,8 @@ nfs4_access_rpc(nfsnode_t np, u_long *mode, vfs_context_t ctx) nfsm_chain_null(&nmreq); nfsm_chain_null(&nmrep); - numops = 3; // PUTFH + ACCESS + GETATTR + // PUTFH, ACCESS, GETATTR + numops = 3; nfsm_chain_build_alloc_init(error, &nmreq, 17 * NFSX_UNSIGNED); nfsm_chain_add_compound_header(error, &nmreq, "access", numops); numops--; @@ -111,6 +111,8 @@ nfs4_access_rpc(nfsnode_t np, u_long *mode, vfs_context_t ctx) nfsmout_if(error); error = nfs_request(np, NULL, &nmreq, NFSPROC4_COMPOUND, ctx, &nmrep, &xid, &status); + if ((lockerror = nfs_node_lock(np))) + error = lockerror; nfsm_chain_skip_tag(error, &nmrep); nfsm_chain_get_32(error, &nmrep, numops); nfsm_chain_op_check(error, &nmrep, NFS_OP_PUTFH); @@ -130,6 +132,9 @@ nfs4_access_rpc(nfsnode_t np, u_long *mode, vfs_context_t ctx) access |= NFS_ACCESS_DELETE; } } + /* Some servers report DELETE support but erroneously give a denied answer. */ + if ((*mode & NFS_ACCESS_DELETE) && nfs_access_delete && !(access & NFS_ACCESS_DELETE)) + access |= NFS_ACCESS_DELETE; nfsm_chain_op_check(error, &nmrep, NFS_OP_GETATTR); nfsm_chain_loadattr(error, &nmrep, np, nfsvers, NULL, &xid); nfsmout_if(error); @@ -144,6 +149,8 @@ nfs4_access_rpc(nfsnode_t np, u_long *mode, vfs_context_t ctx) /* pass back the mode returned with this request */ *mode = np->n_mode[slot]; nfsmout: + if (!lockerror) + nfs_node_unlock(np); nfsm_chain_cleanup(&nmreq); nfsm_chain_cleanup(&nmrep); return (error); @@ -170,7 +177,8 @@ nfs4_getattr_rpc( nfsm_chain_null(&nmreq); nfsm_chain_null(&nmrep); - numops = 2; // PUTFH + GETATTR + // PUTFH, GETATTR + numops = 2; nfsm_chain_build_alloc_init(error, &nmreq, 15 * NFSX_UNSIGNED); nfsm_chain_add_compound_header(error, &nmreq, "getattr", numops); numops--; @@ -213,7 +221,8 @@ nfs4_readlink_rpc(nfsnode_t np, char *buf, uint32_t *buflenp, vfs_context_t ctx) nfsm_chain_null(&nmreq); nfsm_chain_null(&nmrep); - numops = 3; // PUTFH + GETATTR + READLINK + // PUTFH, GETATTR, READLINK + numops = 3; nfsm_chain_build_alloc_init(error, &nmreq, 16 * NFSX_UNSIGNED); nfsm_chain_add_compound_header(error, &nmreq, "readlink", numops); numops--; @@ -230,7 +239,7 @@ nfs4_readlink_rpc(nfsnode_t np, char *buf, uint32_t *buflenp, vfs_context_t ctx) nfsmout_if(error); error = nfs_request(np, NULL, &nmreq, NFSPROC4_COMPOUND, ctx, &nmrep, &xid, &status); - if ((lockerror = nfs_lock(np, NFS_NODE_LOCK_EXCLUSIVE))) + if ((lockerror = nfs_node_lock(np))) error = lockerror; nfsm_chain_skip_tag(error, &nmrep); nfsm_chain_get_32(error, &nmrep, numops); @@ -251,7 +260,7 @@ nfs4_readlink_rpc(nfsnode_t np, char *buf, uint32_t *buflenp, vfs_context_t ctx) *buflenp = len; nfsmout: if (!lockerror) - nfs_unlock(np); + nfs_node_unlock(np); nfsm_chain_cleanup(&nmreq); nfsm_chain_cleanup(&nmrep); return (error); @@ -269,6 +278,7 @@ nfs4_read_rpc_async( { struct nfsmount *nmp; int error = 0, nfsvers, numops; + nfs_stateid stateid; struct nfsm_chain nmreq; nmp = NFSTONMP(np); @@ -278,7 +288,7 @@ nfs4_read_rpc_async( nfsm_chain_null(&nmreq); - // PUTFH + READ + GETATTR + // PUTFH, READ, GETATTR numops = 3; nfsm_chain_build_alloc_init(error, &nmreq, 22 * NFSX_UNSIGNED); nfsm_chain_add_compound_header(error, &nmreq, "read", numops); @@ -287,13 +297,8 @@ nfs4_read_rpc_async( nfsm_chain_add_fh(error, &nmreq, nfsvers, np->n_fhp, np->n_fhsize); numops--; nfsm_chain_add_32(error, &nmreq, NFS_OP_READ); - - /* XXX use special stateid for now */ - nfsm_chain_add_32(error, &nmreq, 0xffffffff); - nfsm_chain_add_32(error, &nmreq, 0xffffffff); - nfsm_chain_add_32(error, &nmreq, 0xffffffff); - nfsm_chain_add_32(error, &nmreq, 0xffffffff); - + nfs_get_stateid(np, thd, cred, &stateid); + nfsm_chain_add_stateid(error, &nmreq, &stateid); nfsm_chain_add_64(error, &nmreq, offset); nfsm_chain_add_32(error, &nmreq, len); numops--; @@ -313,7 +318,7 @@ int nfs4_read_rpc_async_finish( nfsnode_t np, struct nfsreq *req, - struct uio *uiop, + uio_t uio, size_t *lenp, int *eofp) { @@ -336,7 +341,7 @@ nfs4_read_rpc_async_finish( if (error == EINPROGRESS) /* async request restarted */ return (error); - if ((lockerror = nfs_lock(np, NFS_NODE_LOCK_EXCLUSIVE))) + if ((lockerror = nfs_node_lock(np))) error = lockerror; nfsm_chain_skip_tag(error, &nmrep); nfsm_chain_get_32(error, &nmrep, numops); @@ -346,12 +351,12 @@ nfs4_read_rpc_async_finish( nfsm_chain_get_32(error, &nmrep, retlen); if (!error) { *lenp = MIN(retlen, *lenp); - error = nfsm_chain_get_uio(&nmrep, *lenp, uiop); + error = nfsm_chain_get_uio(&nmrep, *lenp, uio); } nfsm_chain_op_check(error, &nmrep, NFS_OP_GETATTR); nfsm_chain_loadattr(error, &nmrep, np, nfsvers, NULL, &xid); if (!lockerror) - nfs_unlock(np); + nfs_node_unlock(np); if (eofp) { if (!eof && !retlen) eof = 1; @@ -364,7 +369,7 @@ nfs4_read_rpc_async_finish( int nfs4_write_rpc_async( nfsnode_t np, - struct uio *uiop, + uio_t uio, size_t len, thread_t thd, kauth_cred_t cred, @@ -374,7 +379,7 @@ nfs4_write_rpc_async( { struct nfsmount *nmp; int error = 0, nfsvers, numops; - off_t offset; + nfs_stateid stateid; struct nfsm_chain nmreq; nmp = NFSTONMP(np); @@ -382,11 +387,9 @@ nfs4_write_rpc_async( return (ENXIO); nfsvers = nmp->nm_vers; - offset = uiop->uio_offset; - nfsm_chain_null(&nmreq); - // PUTFH + WRITE + GETATTR + // PUTFH, WRITE, GETATTR numops = 3; nfsm_chain_build_alloc_init(error, &nmreq, 25 * NFSX_UNSIGNED + len); nfsm_chain_add_compound_header(error, &nmreq, "write", numops); @@ -395,18 +398,13 @@ nfs4_write_rpc_async( nfsm_chain_add_fh(error, &nmreq, nfsvers, np->n_fhp, np->n_fhsize); numops--; nfsm_chain_add_32(error, &nmreq, NFS_OP_WRITE); - - /* XXX use special stateid for now */ - nfsm_chain_add_32(error, &nmreq, 0xffffffff); - nfsm_chain_add_32(error, &nmreq, 0xffffffff); - nfsm_chain_add_32(error, &nmreq, 0xffffffff); - nfsm_chain_add_32(error, &nmreq, 0xffffffff); - - nfsm_chain_add_64(error, &nmreq, uiop->uio_offset); + nfs_get_stateid(np, thd, cred, &stateid); + nfsm_chain_add_stateid(error, &nmreq, &stateid); + nfsm_chain_add_64(error, &nmreq, uio_offset(uio)); nfsm_chain_add_32(error, &nmreq, iomode); nfsm_chain_add_32(error, &nmreq, len); if (!error) - error = nfsm_chain_add_uio(&nmreq, uiop, len); + error = nfsm_chain_add_uio(&nmreq, uio, len); numops--; nfsm_chain_add_32(error, &nmreq, NFS_OP_GETATTR); nfsm_chain_add_bitmap_masked(error, &nmreq, nfs_getattr_bitmap, @@ -452,7 +450,7 @@ nfs4_write_rpc_async_finish( nmp = NFSTONMP(np); if (!nmp) error = ENXIO; - if (!error && (lockerror = nfs_lock(np, NFS_NODE_LOCK_EXCLUSIVE))) + if (!error && (lockerror = nfs_node_lock(np))) error = lockerror; nfsm_chain_skip_tag(error, &nmrep); nfsm_chain_get_32(error, &nmrep, numops); @@ -480,7 +478,7 @@ nfs4_write_rpc_async_finish( nfsm_chain_loadattr(error, &nmrep, np, nfsvers, NULL, &xid); nfsmout: if (!lockerror) - nfs_unlock(np); + nfs_node_unlock(np); nfsm_chain_cleanup(&nmrep); if ((committed != NFS_WRITE_FILESYNC) && nfs_allow_async && ((mp = NFSTOMP(np))) && (vfs_flags(mp) & MNT_ASYNC)) @@ -497,7 +495,7 @@ nfs4_remove_rpc( thread_t thd, kauth_cred_t cred) { - int error = 0, remove_error = 0, status; + int error = 0, lockerror = ENOENT, remove_error = 0, status; struct nfsmount *nmp; int nfsvers, numops; u_int64_t xid; @@ -507,7 +505,7 @@ nfs4_remove_rpc( if (!nmp) return (ENXIO); nfsvers = nmp->nm_vers; - +restart: nfsm_chain_null(&nmreq); nfsm_chain_null(&nmrep); @@ -531,6 +529,8 @@ nfs4_remove_rpc( error = nfs_request2(dnp, NULL, &nmreq, NFSPROC4_COMPOUND, thd, cred, 0, &nmrep, &xid, &status); + if ((lockerror = nfs_node_lock(dnp))) + error = lockerror; nfsm_chain_skip_tag(error, &nmrep); nfsm_chain_get_32(error, &nmrep, numops); nfsm_chain_op_check(error, &nmrep, NFS_OP_PUTFH); @@ -539,13 +539,20 @@ nfs4_remove_rpc( nfsm_chain_check_change_info(error, &nmrep, dnp); nfsm_chain_op_check(error, &nmrep, NFS_OP_GETATTR); nfsm_chain_loadattr(error, &nmrep, dnp, nfsvers, NULL, &xid); - if (error) + if (error && !lockerror) NATTRINVALIDATE(dnp); nfsmout: nfsm_chain_cleanup(&nmreq); nfsm_chain_cleanup(&nmrep); - dnp->n_flag |= NMODIFIED; + if (!lockerror) { + dnp->n_flag |= NMODIFIED; + nfs_node_unlock(dnp); + } + if (error == NFSERR_GRACE) { + tsleep(&nmp->nm_state, (PZERO-1), "nfsgrace", 2*hz); + goto restart; + } return (remove_error); } @@ -560,7 +567,7 @@ nfs4_rename_rpc( int tnamelen, vfs_context_t ctx) { - int error = 0, status, nfsvers, numops; + int error = 0, lockerror = ENOENT, status, nfsvers, numops; struct nfsmount *nmp; u_int64_t xid, savedxid; struct nfsm_chain nmreq, nmrep; @@ -605,6 +612,8 @@ nfs4_rename_rpc( error = nfs_request(fdnp, NULL, &nmreq, NFSPROC4_COMPOUND, ctx, &nmrep, &xid, &status); + if ((lockerror = nfs_node_lock2(fdnp, tdnp))) + error = lockerror; nfsm_chain_skip_tag(error, &nmrep); nfsm_chain_get_32(error, &nmrep, numops); nfsm_chain_op_check(error, &nmrep, NFS_OP_PUTFH); @@ -617,19 +626,22 @@ nfs4_rename_rpc( nfsm_chain_op_check(error, &nmrep, NFS_OP_GETATTR); savedxid = xid; nfsm_chain_loadattr(error, &nmrep, tdnp, nfsvers, NULL, &xid); - if (error) + if (error && !lockerror) NATTRINVALIDATE(tdnp); nfsm_chain_op_check(error, &nmrep, NFS_OP_RESTOREFH); nfsm_chain_op_check(error, &nmrep, NFS_OP_GETATTR); xid = savedxid; nfsm_chain_loadattr(error, &nmrep, fdnp, nfsvers, NULL, &xid); - if (error) + if (error && !lockerror) NATTRINVALIDATE(fdnp); nfsmout: nfsm_chain_cleanup(&nmreq); nfsm_chain_cleanup(&nmrep); - fdnp->n_flag |= NMODIFIED; - tdnp->n_flag |= NMODIFIED; + if (!lockerror) { + fdnp->n_flag |= NMODIFIED; + tdnp->n_flag |= NMODIFIED; + nfs_node_unlock2(fdnp, tdnp); + } /* Kludge: Map EEXIST => 0 assuming that it is a reply to a retry. */ if (error == EEXIST) error = 0; @@ -639,59 +651,48 @@ nfs4_rename_rpc( /* * NFS V4 readdir RPC. */ -#define DIRHDSIZ ((int)(sizeof(struct dirent) - (MAXNAMLEN + 1))) int -nfs4_readdir_rpc(nfsnode_t dnp, struct uio *uiop, vfs_context_t ctx) -{ - size_t len, tlen, skiplen, left; - struct dirent *dp = NULL; - vnode_t newvp; - nfsuint64 *cookiep; - struct componentname cn, *cnp = &cn; - nfsuint64 cookie; +nfs4_readdir_rpc(nfsnode_t dnp, struct nfsbuf *bp, vfs_context_t ctx) +{ struct nfsmount *nmp; - nfsnode_t np; - int error = 0, lockerror, status, more_entries = 1, blksiz = 0, bigenough = 1; - int nfsvers, rdirplus, nmreaddirsize, nmrsize, eof, i, numops; - u_int64_t xid, savexid; - struct nfs_vattr nvattr; - struct nfsm_chain nmreq, nmrep; - char *cp; + int error = 0, lockerror, nfsvers, rdirplus, bigcookies, numops; + int i, status, more_entries = 1, eof, bp_dropped = 0; + uint32_t nmreaddirsize, nmrsize; + uint32_t namlen, skiplen, fhlen, xlen, attrlen, reclen, space_free, space_needed; + uint64_t cookie, lastcookie, xid, savedxid; + struct nfsm_chain nmreq, nmrep, nmrepsave; + fhandle_t fh; + struct nfs_vattr nvattr, *nvattrp; + struct nfs_dir_buf_header *ndbhp; + struct direntry *dp; + char *padstart, padlen; const char *tag; uint32_t entry_attrs[NFS_ATTR_BITMAP_LEN]; - fhandle_t fh; + struct timeval now; -#if DIAGNOSTIC - /* XXX limitation based on need to adjust uio */ - if (uiop->uio_iovcnt != 1 || (uiop->uio_offset & (DIRBLKSIZ - 1)) || - (uio_uio_resid(uiop) & (DIRBLKSIZ - 1))) - panic("nfs4_readdir_rpc: bad uio"); -#endif nmp = NFSTONMP(dnp); if (!nmp) return (ENXIO); nfsvers = nmp->nm_vers; nmreaddirsize = nmp->nm_readdirsize; nmrsize = nmp->nm_rsize; - rdirplus = (nmp->nm_flag & NFSMNT_RDIRPLUS) ? 1 : 0; - - bzero(cnp, sizeof(*cnp)); - newvp = NULLVP; + bigcookies = nmp->nm_state & NFSSTA_BIGCOOKIES; + rdirplus = ((nfsvers > NFS_VER2) && (nmp->nm_flag & NFSMNT_RDIRPLUS)) ? 1 : 0; /* * Set up attribute request for entries. * For READDIRPLUS functionality, get everything. - * Otherwise, just get what we need for struct dirent. + * Otherwise, just get what we need for struct direntry. */ if (rdirplus) { - tag = "READDIRPLUS"; + tag = "readdirplus"; for (i=0; i < NFS_ATTR_BITMAP_LEN; i++) entry_attrs[i] = nfs_getattr_bitmap[i] & nmp->nm_fsattr.nfsa_supp_attr[i]; NFS_BITMAP_SET(entry_attrs, NFS_FATTR_FILEHANDLE); } else { - tag = "READDIR"; + tag = "readdir"; NFS_CLEAR_ATTRIBUTES(entry_attrs); NFS_BITMAP_SET(entry_attrs, NFS_FATTR_TYPE); NFS_BITMAP_SET(entry_attrs, NFS_FATTR_FILEID); @@ -699,78 +700,89 @@ nfs4_readdir_rpc(nfsnode_t dnp, struct uio *uiop, vfs_context_t ctx) /* XXX NFS_BITMAP_SET(entry_attrs, NFS_FATTR_MOUNTED_ON_FILEID); */ NFS_BITMAP_SET(entry_attrs, NFS_FATTR_RDATTR_ERROR); - if ((lockerror = nfs_lock(dnp, NFS_NODE_LOCK_SHARED))) + /* lock to protect access to cookie verifier */ + if ((lockerror = nfs_node_lock(dnp))) return (lockerror); - /* - * If there is no cookie, assume directory was stale. - */ - cookiep = nfs_getcookie(dnp, uiop->uio_offset, 0); - if (cookiep) - cookie = *cookiep; - else { - nfs_unlock(dnp); - return (NFSERR_BAD_COOKIE); + /* determine cookie to use, and move dp to the right offset */ + ndbhp = (struct nfs_dir_buf_header*)bp->nb_data; + dp = NFS_DIR_BUF_FIRST_DIRENTRY(bp); + if (ndbhp->ndbh_count) { + for (i=0; i < ndbhp->ndbh_count-1; i++) + dp = NFS_DIRENTRY_NEXT(dp); + cookie = dp->d_seekoff; + dp = NFS_DIRENTRY_NEXT(dp); + } else { + cookie = bp->nb_lblkno; + /* increment with every buffer read */ + OSAddAtomic(1, &nfsstats.readdir_bios); } + lastcookie = cookie; /* - * The NFS client is responsible for the "." and ".." - * entries in the directory. So, we put them at the top. + * The NFS client is responsible for the "." and ".." entries in the + * directory. So, we put them at the start of the first buffer. */ - if ((uiop->uio_offset == 0) && - ((2*(4 + DIRHDSIZ)) <= uio_uio_resid(uiop))) { - /* add "." entry */ - len = 2; - tlen = nfsm_rndup(len); - // LP64todo - fix this! - dp = (struct dirent *) CAST_DOWN(caddr_t, uio_iov_base(uiop)); + if ((bp->nb_lblkno == 0) && (ndbhp->ndbh_count == 0)) { + fh.fh_len = 0; + fhlen = rdirplus ? fh.fh_len + 1 : 0; + xlen = rdirplus ? (fhlen + sizeof(time_t)) : 0; + /* "." */ + namlen = 1; + reclen = NFS_DIRENTRY_LEN(namlen + xlen); + if (xlen) + bzero(&dp->d_name[namlen+1], xlen); + dp->d_namlen = namlen; + strlcpy(dp->d_name, ".", namlen+1); dp->d_fileno = dnp->n_vattr.nva_fileid; - dp->d_namlen = len; - dp->d_reclen = tlen + DIRHDSIZ; dp->d_type = DT_DIR; - strlcpy(dp->d_name, ".", len); - blksiz += dp->d_reclen; - if (blksiz == DIRBLKSIZ) - blksiz = 0; - uiop->uio_offset += DIRHDSIZ + tlen; - uio_iov_base_add(uiop, DIRHDSIZ + tlen); - uio_uio_resid_add(uiop, -(DIRHDSIZ + tlen)); - uio_iov_len_add(uiop, -(DIRHDSIZ + tlen)); - /* add ".." entry */ - len = 3; - tlen = nfsm_rndup(len); - // LP64todo - fix this! - dp = (struct dirent *) CAST_DOWN(caddr_t, uio_iov_base(uiop)); + dp->d_reclen = reclen; + dp->d_seekoff = 1; + padstart = dp->d_name + dp->d_namlen + 1 + xlen; + dp = NFS_DIRENTRY_NEXT(dp); + padlen = (char*)dp - padstart; + if (padlen > 0) + bzero(padstart, padlen); + if (rdirplus) /* zero out attributes */ + bzero(NFS_DIR_BUF_NVATTR(bp, 0), sizeof(struct nfs_vattr)); + + /* ".." */ + namlen = 2; + reclen = NFS_DIRENTRY_LEN(namlen + xlen); + if (xlen) + bzero(&dp->d_name[namlen+1], xlen); + dp->d_namlen = namlen; + strlcpy(dp->d_name, "..", namlen+1); if (dnp->n_parent) dp->d_fileno = VTONFS(dnp->n_parent)->n_vattr.nva_fileid; else dp->d_fileno = dnp->n_vattr.nva_fileid; - dp->d_namlen = len; - dp->d_reclen = tlen + DIRHDSIZ; dp->d_type = DT_DIR; - strlcpy(dp->d_name, "..", len); - blksiz += dp->d_reclen; - if (blksiz == DIRBLKSIZ) - blksiz = 0; - uiop->uio_offset += DIRHDSIZ + tlen; - uio_iov_base_add(uiop, DIRHDSIZ + tlen); - uio_uio_resid_add(uiop, -(DIRHDSIZ + tlen)); - uio_iov_len_add(uiop, -(DIRHDSIZ + tlen)); - cookie.nfsuquad[0] = 0; - cookie.nfsuquad[1] = 2; + dp->d_reclen = reclen; + dp->d_seekoff = 2; + padstart = dp->d_name + dp->d_namlen + 1 + xlen; + dp = NFS_DIRENTRY_NEXT(dp); + padlen = (char*)dp - padstart; + if (padlen > 0) + bzero(padstart, padlen); + if (rdirplus) /* zero out attributes */ + bzero(NFS_DIR_BUF_NVATTR(bp, 1), sizeof(struct nfs_vattr)); + + ndbhp->ndbh_entry_end = (char*)dp - bp->nb_data; + ndbhp->ndbh_count = 2; } /* - * Loop around doing readdir rpc's of size nm_readdirsize - * truncated to a multiple of DIRBLKSIZ. - * The stopping criteria is EOF or buffer full. + * Loop around doing readdir(plus) RPCs of size nm_readdirsize until + * the buffer is full (or we hit EOF). Then put the remainder of the + * results in the next buffer(s). */ - while (more_entries && bigenough) { - nfsm_chain_null(&nmreq); - nfsm_chain_null(&nmrep); - nfsm_assert(error, NFSTONMP(dnp), ENXIO); + nfsm_chain_null(&nmreq); + nfsm_chain_null(&nmrep); + while (nfs_dir_buf_freespace(bp, rdirplus) && !(ndbhp->ndbh_flags & NDB_FULL)) { - numops = 3; // PUTFH + GETATTR + READDIR + // PUTFH, GETATTR, READDIR + numops = 3; nfsm_chain_build_alloc_init(error, &nmreq, 26 * NFSX_UNSIGNED); nfsm_chain_add_compound_header(error, &nmreq, tag, numops); numops--; @@ -782,195 +794,203 @@ nfs4_readdir_rpc(nfsnode_t dnp, struct uio *uiop, vfs_context_t ctx) NFS_ATTR_BITMAP_LEN, nmp->nm_fsattr.nfsa_supp_attr); numops--; nfsm_chain_add_32(error, &nmreq, NFS_OP_READDIR); - /* opaque values don't need swapping, but as long */ - /* as we are consistent about it, it should be ok */ - nfsm_chain_add_32(error, &nmreq, cookie.nfsuquad[0]); - if ((cookie.nfsuquad[0] == 0) && (cookie.nfsuquad[1] <= 2)) - nfsm_chain_add_32(error, &nmreq, 0); - else - nfsm_chain_add_32(error, &nmreq, cookie.nfsuquad[1]); - nfsm_chain_add_32(error, &nmreq, dnp->n_cookieverf.nfsuquad[0]); - nfsm_chain_add_32(error, &nmreq, dnp->n_cookieverf.nfsuquad[1]); + nfsm_chain_add_64(error, &nmreq, (cookie <= 2) ? 0 : cookie); + nfsm_chain_add_64(error, &nmreq, dnp->n_cookieverf); nfsm_chain_add_32(error, &nmreq, nmreaddirsize); nfsm_chain_add_32(error, &nmreq, nmrsize); nfsm_chain_add_bitmap(error, &nmreq, entry_attrs, NFS_ATTR_BITMAP_LEN); nfsm_chain_build_done(error, &nmreq); nfsm_assert(error, (numops == 0), EPROTO); - nfs_unlock(dnp); + nfs_node_unlock(dnp); nfsmout_if(error); error = nfs_request(dnp, NULL, &nmreq, NFSPROC4_COMPOUND, ctx, &nmrep, &xid, &status); - if ((lockerror = nfs_lock(dnp, NFS_NODE_LOCK_EXCLUSIVE))) + if ((lockerror = nfs_node_lock(dnp))) error = lockerror; - savexid = xid; + + savedxid = xid; nfsm_chain_skip_tag(error, &nmrep); nfsm_chain_get_32(error, &nmrep, numops); nfsm_chain_op_check(error, &nmrep, NFS_OP_PUTFH); nfsm_chain_op_check(error, &nmrep, NFS_OP_GETATTR); nfsm_chain_loadattr(error, &nmrep, dnp, nfsvers, NULL, &xid); nfsm_chain_op_check(error, &nmrep, NFS_OP_READDIR); - nfsm_chain_get_32(error, &nmrep, dnp->n_cookieverf.nfsuquad[0]); - nfsm_chain_get_32(error, &nmrep, dnp->n_cookieverf.nfsuquad[1]); + nfsm_chain_get_64(error, &nmrep, dnp->n_cookieverf); nfsm_chain_get_32(error, &nmrep, more_entries); - nfs_unlock(dnp); + + if (!lockerror) { + nfs_node_unlock(dnp); + lockerror = ENOENT; + } nfsmout_if(error); - /* Loop through the entries, massaging them into "dirent" form. */ - /* If READDIRPLUS, also create the vnodes. */ - while (more_entries && bigenough) { + if (rdirplus) + microuptime(&now); + + /* loop through the entries packing them into the buffer */ + while (more_entries) { /* Entry: COOKIE, NAME, FATTR */ - nfsm_chain_get_32(error, &nmrep, cookie.nfsuquad[0]); - nfsm_chain_get_32(error, &nmrep, cookie.nfsuquad[1]); - nfsm_chain_get_32(error, &nmrep, len); + nfsm_chain_get_64(error, &nmrep, cookie); + nfsm_chain_get_32(error, &nmrep, namlen); nfsmout_if(error); - /* Note: NFS supports longer names, but struct dirent doesn't */ - /* so we just truncate the names to fit */ - if (len <= 0) { + if (!bigcookies && (cookie >> 32) && (nmp == NFSTONMP(dnp))) { + /* we've got a big cookie, make sure flag is set */ + lck_mtx_lock(&nmp->nm_lock); + nmp->nm_state |= NFSSTA_BIGCOOKIES; + lck_mtx_unlock(&nmp->nm_lock); + bigcookies = 1; + } + /* just truncate names that don't fit in direntry.d_name */ + if (namlen <= 0) { error = EBADRPC; goto nfsmout; } - if (len > MAXNAMLEN) { - skiplen = len - MAXNAMLEN; - len = MAXNAMLEN; + if (namlen > (sizeof(dp->d_name)-1)) { + skiplen = namlen - sizeof(dp->d_name) + 1; + namlen = sizeof(dp->d_name) - 1; } else { skiplen = 0; } - tlen = nfsm_rndup(len); - if (tlen == len) - tlen += 4; /* To ensure null termination */ - left = DIRBLKSIZ - blksiz; - if ((tlen + DIRHDSIZ) > left) { - dp->d_reclen += left; - uio_iov_base_add(uiop, left); - uio_iov_len_add(uiop, -left); - uiop->uio_offset += left; - uio_uio_resid_add(uiop, -left); - blksiz = 0; - } - if ((tlen + DIRHDSIZ) > uio_uio_resid(uiop)) { - bigenough = 0; - break; + /* guess that fh size will be same as parent */ + fhlen = rdirplus ? (1 + dnp->n_fhsize) : 0; + xlen = rdirplus ? (fhlen + sizeof(time_t)) : 0; + attrlen = rdirplus ? sizeof(struct nfs_vattr) : 0; + reclen = NFS_DIRENTRY_LEN(namlen + xlen); + space_needed = reclen + attrlen; + space_free = nfs_dir_buf_freespace(bp, rdirplus); + if (space_needed > space_free) { + /* + * We still have entries to pack, but we've + * run out of room in the current buffer. + * So we need to move to the next buffer. + * The block# for the next buffer is the + * last cookie in the current buffer. + */ +nextbuffer: + ndbhp->ndbh_flags |= NDB_FULL; + nfs_buf_release(bp, 0); + bp_dropped = 1; + bp = NULL; + error = nfs_buf_get(dnp, lastcookie, NFS_DIRBLKSIZ, vfs_context_thread(ctx), NBLK_READ, &bp); + nfsmout_if(error); + /* initialize buffer */ + ndbhp = (struct nfs_dir_buf_header*)bp->nb_data; + ndbhp->ndbh_flags = 0; + ndbhp->ndbh_count = 0; + ndbhp->ndbh_entry_end = sizeof(*ndbhp); + ndbhp->ndbh_ncgen = dnp->n_ncgen; + space_free = nfs_dir_buf_freespace(bp, rdirplus); + dp = NFS_DIR_BUF_FIRST_DIRENTRY(bp); + /* increment with every buffer read */ + OSAddAtomic(1, &nfsstats.readdir_bios); } - // LP64todo - fix this! - dp = (struct dirent *) CAST_DOWN(caddr_t, uio_iov_base(uiop)); - dp->d_fileno = 0; - dp->d_namlen = len; - dp->d_reclen = tlen + DIRHDSIZ; + nmrepsave = nmrep; + dp->d_fileno = cookie; /* placeholder */ + dp->d_seekoff = cookie; + dp->d_namlen = namlen; + dp->d_reclen = reclen; dp->d_type = DT_UNKNOWN; - blksiz += dp->d_reclen; - if (blksiz == DIRBLKSIZ) - blksiz = 0; - uiop->uio_offset += DIRHDSIZ; -#if LP64KERN - uio_uio_resid_add(uiop, -((int64_t)DIRHDSIZ)); - uio_iov_len_add(uiop, -((int64_t)DIRHDSIZ)); -#else - uio_uio_resid_add(uiop, -((int)DIRHDSIZ)); - uio_iov_len_add(uiop, -((int)DIRHDSIZ)); -#endif - uio_iov_base_add(uiop, DIRHDSIZ); - // LP64todo - fix this! - cnp->cn_nameptr = CAST_DOWN(caddr_t, uio_iov_base(uiop)); - cnp->cn_namelen = len; - error = nfsm_chain_get_uio(&nmrep, len, uiop); + nfsm_chain_get_opaque(error, &nmrep, namlen, dp->d_name); + nfsmout_if(error); + dp->d_name[namlen] = '\0'; if (skiplen) nfsm_chain_adv(error, &nmrep, - nfsm_rndup(len + skiplen) - nfsm_rndup(len)); + nfsm_rndup(namlen + skiplen) - nfsm_rndup(namlen)); nfsmout_if(error); - NFS_CLEAR_ATTRIBUTES(nvattr.nva_bitmap); - error = nfs4_parsefattr(&nmrep, NULL, &nvattr, &fh, NULL); - if (error && NFS_BITMAP_ISSET(nvattr.nva_bitmap, NFS_FATTR_RDATTR_ERROR)) { + nvattrp = rdirplus ? NFS_DIR_BUF_NVATTR(bp, ndbhp->ndbh_count) : &nvattr; + NFS_CLEAR_ATTRIBUTES(nvattrp->nva_bitmap); + error = nfs4_parsefattr(&nmrep, NULL, nvattrp, &fh, NULL); + if (error && NFS_BITMAP_ISSET(nvattrp->nva_bitmap, NFS_FATTR_RDATTR_ERROR)) { /* OK, we didn't get attributes, whatever... */ - NFS_CLEAR_ATTRIBUTES(nvattr.nva_bitmap); + if (rdirplus) /* mark the attributes invalid */ + bzero(nvattrp, sizeof(struct nfs_vattr)); + else + NFS_CLEAR_ATTRIBUTES(nvattrp->nva_bitmap); error = 0; } + /* check for more entries after this one */ nfsm_chain_get_32(error, &nmrep, more_entries); nfsmout_if(error); - cp = CAST_DOWN(caddr_t, uio_iov_base(uiop)); - tlen -= len; - *cp = '\0'; - uio_iov_base_add(uiop, tlen); - uio_iov_len_add(uiop, -tlen); - uiop->uio_offset += tlen; - uio_uio_resid_add(uiop, -tlen); - - /* - * Skip any "." and ".." entries returned from server. - * (Actually, just leave it in place with d_fileno == 0.) - */ - if ((cnp->cn_nameptr[0] == '.') && - ((len == 1) || ((len == 2) && (cnp->cn_nameptr[1] == '.')))) { - /* clear the name too */ - dp->d_namlen = 0; - dp->d_name[0] = '\0'; + /* Skip any "." and ".." entries returned from server. */ + if ((dp->d_name[0] == '.') && ((namlen == 1) || ((namlen == 2) && (dp->d_name[1] == '.')))) { + lastcookie = cookie; continue; } - if (NFS_BITMAP_ISSET(nvattr.nva_bitmap, NFS_FATTR_TYPE)) - dp->d_type = IFTODT(VTTOIF(nvattr.nva_type)); - if (NFS_BITMAP_ISSET(nvattr.nva_bitmap, NFS_FATTR_FILEID)) - dp->d_fileno = (int)nvattr.nva_fileid; - if (rdirplus && NFS_BITMAP_ISSET(nvattr.nva_bitmap, NFS_FATTR_FILEHANDLE) && - !NFS_CMPFH(dnp, fh.fh_data, fh.fh_len)) { - cnp->cn_hash = 0; - error = nfs_nget(NFSTOMP(dnp), dnp, cnp, - fh.fh_data, fh.fh_len, &nvattr, &xid, NG_MAKEENTRY, &np); - if (!error) { - nfs_unlock(np); - vnode_put(NFSTOV(np)); + if (NFS_BITMAP_ISSET(nvattrp->nva_bitmap, NFS_FATTR_TYPE)) + dp->d_type = IFTODT(VTTOIF(nvattrp->nva_type)); + if (NFS_BITMAP_ISSET(nvattrp->nva_bitmap, NFS_FATTR_FILEID)) + dp->d_fileno = nvattrp->nva_fileid; + if (rdirplus) { + /* fileid is already in d_fileno, so stash xid in attrs */ + nvattrp->nva_fileid = savedxid; + if (NFS_BITMAP_ISSET(nvattrp->nva_bitmap, NFS_FATTR_FILEHANDLE)) { + fhlen = fh.fh_len + 1; + xlen = fhlen + sizeof(time_t); + reclen = NFS_DIRENTRY_LEN(namlen + xlen); + space_needed = reclen + attrlen; + if (space_needed > space_free) { + /* didn't actually have the room... move on to next buffer */ + nmrep = nmrepsave; + goto nextbuffer; + } + /* pack the file handle into the record */ + dp->d_name[dp->d_namlen+1] = fh.fh_len; + bcopy(fh.fh_data, &dp->d_name[dp->d_namlen+2], fh.fh_len); + } else { + /* mark the file handle invalid */ + fh.fh_len = 0; + fhlen = fh.fh_len + 1; + xlen = fhlen + sizeof(time_t); + reclen = NFS_DIRENTRY_LEN(namlen + xlen); + bzero(&dp->d_name[dp->d_namlen+1], fhlen); } + *(time_t*)(&dp->d_name[dp->d_namlen+1+fhlen]) = now.tv_sec; + dp->d_reclen = reclen; } - nfsmout_if(error); + padstart = dp->d_name + dp->d_namlen + 1 + xlen; + ndbhp->ndbh_count++; + lastcookie = cookie; + + /* advance to next direntry in buffer */ + dp = NFS_DIRENTRY_NEXT(dp); + ndbhp->ndbh_entry_end = (char*)dp - bp->nb_data; + /* zero out the pad bytes */ + padlen = (char*)dp - padstart; + if (padlen > 0) + bzero(padstart, padlen); + } + /* Finally, get the eof boolean */ + nfsm_chain_get_32(error, &nmrep, eof); + nfsmout_if(error); + if (eof) { + ndbhp->ndbh_flags |= (NDB_FULL|NDB_EOF); + nfs_node_lock_force(dnp); + dnp->n_eofcookie = lastcookie; + nfs_node_unlock(dnp); + } else { + more_entries = 1; } - /* If at end of rpc data, get the eof boolean */ - if (!more_entries) { - nfsm_chain_get_32(error, &nmrep, eof); - if (!error) - more_entries = (eof == 0); + if (bp_dropped) { + nfs_buf_release(bp, 0); + bp = NULL; + break; } - if ((lockerror = nfs_lock(dnp, NFS_NODE_LOCK_SHARED))) + if ((lockerror = nfs_node_lock(dnp))) error = lockerror; nfsmout_if(error); nfsm_chain_cleanup(&nmrep); + nfsm_chain_null(&nmreq); } - nfs_unlock(dnp); - /* - * Fill last record, iff any, out to a multiple of DIRBLKSIZ - * by increasing d_reclen for the last record. - */ - if (blksiz > 0) { - left = DIRBLKSIZ - blksiz; - dp->d_reclen += left; - uio_iov_base_add(uiop, left); - uio_iov_len_add(uiop, -left); - uiop->uio_offset += left; - uio_uio_resid_add(uiop, -left); - } - - if ((lockerror = nfs_lock(dnp, NFS_NODE_LOCK_EXCLUSIVE))) - error = lockerror; - nfsmout_if(error); - - /* - * We are now either at the end of the directory or have filled the - * block. - */ - if (bigenough) - dnp->n_direofoffset = uiop->uio_offset; - else { - if (uio_uio_resid(uiop) > 0) - printf("EEK! nfs4_readdir_rpc resid > 0\n"); - cookiep = nfs_getcookie(dnp, uiop->uio_offset, 1); - if (cookiep) - *cookiep = cookie; - } - - nfs_unlock(dnp); nfsmout: + if (bp_dropped && bp) + nfs_buf_release(bp, 0); + if (!lockerror) + nfs_node_unlock(dnp); nfsm_chain_cleanup(&nmreq); nfsm_chain_cleanup(&nmrep); - return (error); + return (bp_dropped ? NFSERR_DIRBUFDROPPED : error); } int @@ -1041,7 +1061,7 @@ nfs4_lookup_rpc_async_finish( fhandle_t *fhp, struct nfs_vattr *nvap) { - int error = 0, status, nfsvers, numops; + int error = 0, lockerror = ENOENT, status, nfsvers, numops; uint32_t val = 0; u_int64_t xid; struct nfsmount *nmp; @@ -1054,6 +1074,8 @@ nfs4_lookup_rpc_async_finish( error = nfs_request_async_finish(req, &nmrep, &xid, &status); + if ((lockerror = nfs_node_lock(dnp))) + error = lockerror; nfsm_chain_skip_tag(error, &nmrep); nfsm_chain_get_32(error, &nmrep, numops); nfsm_chain_op_check(error, &nmrep, NFS_OP_PUTFH); @@ -1078,6 +1100,8 @@ nfs4_lookup_rpc_async_finish( goto nfsmout; } nfsmout: + if (!lockerror) + nfs_node_unlock(dnp); nfsm_chain_cleanup(&nmrep); return (error); } @@ -1132,7 +1156,7 @@ nfs4_commit_rpc( error = nfs_request2(np, NULL, &nmreq, NFSPROC4_COMPOUND, current_thread(), cred, 0, &nmrep, &xid, &status); - if ((lockerror = nfs_lock(np, NFS_NODE_LOCK_EXCLUSIVE))) + if ((lockerror = nfs_node_lock(np))) error = lockerror; nfsm_chain_skip_tag(error, &nmrep); nfsm_chain_get_32(error, &nmrep, numops); @@ -1142,7 +1166,7 @@ nfs4_commit_rpc( nfsm_chain_op_check(error, &nmrep, NFS_OP_GETATTR); nfsm_chain_loadattr(error, &nmrep, np, nfsvers, NULL, &xid); if (!lockerror) - nfs_unlock(np); + nfs_node_unlock(np); nfsmout_if(error); lck_mtx_lock(&nmp->nm_lock); if (nmp->nm_verf != wverf) { @@ -1177,7 +1201,8 @@ nfs4_pathconf_rpc( nfsm_chain_null(&nmrep); /* NFSv4: fetch "pathconf" info for this node */ - numops = 2; // PUTFH + GETATTR + // PUTFH, GETATTR + numops = 2; nfsm_chain_build_alloc_init(error, &nmreq, 16 * NFSX_UNSIGNED); nfsm_chain_add_compound_header(error, &nmreq, "pathconf", numops); numops--; @@ -1207,11 +1232,12 @@ nfs4_pathconf_rpc( NFS_CLEAR_ATTRIBUTES(nvattr.nva_bitmap); error = nfs4_parsefattr(&nmrep, nfsap, &nvattr, NULL, NULL); nfsmout_if(error); - if ((lockerror = nfs_lock(np, NFS_NODE_LOCK_EXCLUSIVE))) + if ((lockerror = nfs_node_lock(np))) error = lockerror; - nfs_loadattrcache(np, &nvattr, &xid, 0); + if (!error) + nfs_loadattrcache(np, &nvattr, &xid, 0); if (!lockerror) - nfs_unlock(np); + nfs_node_unlock(np); nfsmout: nfsm_chain_cleanup(&nmreq); nfsm_chain_cleanup(&nmrep); @@ -1231,7 +1257,7 @@ nfs4_vnop_getattr( struct nfs_vattr nva; int error; - error = nfs_getattr(VTONFS(ap->a_vp), &nva, ap->a_context, 0); + error = nfs_getattr(VTONFS(ap->a_vp), &nva, ap->a_context, NGA_CACHED); if (error) return (error); @@ -1308,14 +1334,14 @@ int nfs4_setattr_rpc( nfsnode_t np, struct vnode_attr *vap, - vfs_context_t ctx, - int alreadylocked) + vfs_context_t ctx) { struct nfsmount *nmp = NFSTONMP(np); int error = 0, lockerror = ENOENT, status, nfsvers, numops; - u_int64_t xid; + u_int64_t xid, nextxid; struct nfsm_chain nmreq, nmrep; - uint32_t bitmap[NFS_ATTR_BITMAP_LEN], bmlen, stateid; + uint32_t bitmap[NFS_ATTR_BITMAP_LEN], bmlen; + nfs_stateid stateid; if (!nmp) return (ENXIO); @@ -1342,13 +1368,10 @@ nfs4_setattr_rpc( numops--; nfsm_chain_add_32(error, &nmreq, NFS_OP_SETATTR); if (VATTR_IS_ACTIVE(vap, va_data_size)) - stateid = 0xffffffff; /* XXX use the special stateid for now */ + nfs_get_stateid(np, vfs_context_thread(ctx), vfs_context_ucred(ctx), &stateid); else - stateid = 0; - nfsm_chain_add_32(error, &nmreq, stateid); - nfsm_chain_add_32(error, &nmreq, stateid); - nfsm_chain_add_32(error, &nmreq, stateid); - nfsm_chain_add_32(error, &nmreq, stateid); + stateid.seqid = stateid.other[0] = stateid.other[1] = stateid.other[2] = 0; + nfsm_chain_add_stateid(error, &nmreq, &stateid); nfsm_chain_add_fattr4(error, &nmreq, vap, nmp); numops--; nfsm_chain_add_32(error, &nmreq, NFS_OP_GETATTR); @@ -1359,7 +1382,7 @@ nfs4_setattr_rpc( nfsmout_if(error); error = nfs_request(np, NULL, &nmreq, NFSPROC4_COMPOUND, ctx, &nmrep, &xid, &status); - if (!alreadylocked && ((lockerror = nfs_lock(np, NFS_NODE_LOCK_EXCLUSIVE)))) + if ((lockerror = nfs_node_lock(np))) error = lockerror; nfsm_chain_skip_tag(error, &nmrep); nfsm_chain_get_32(error, &nmrep, numops); @@ -1373,333 +1396,3618 @@ nfs4_setattr_rpc( nfsm_chain_loadattr(error, &nmrep, np, nfsvers, NULL, &xid); if (error) NATTRINVALIDATE(np); + /* + * We just changed the attributes and we want to make sure that we + * see the latest attributes. Get the next XID. If it's not the + * next XID after the SETATTR XID, then it's possible that another + * RPC was in flight at the same time and it might put stale attributes + * in the cache. In that case, we invalidate the attributes and set + * the attribute cache XID to guarantee that newer attributes will + * get loaded next. + */ + nextxid = 0; + nfs_get_xid(&nextxid); + if (nextxid != (xid + 1)) { + np->n_xid = nextxid; + NATTRINVALIDATE(np); + } nfsmout: - if (!alreadylocked && !lockerror) - nfs_unlock(np); + if (!lockerror) + nfs_node_unlock(np); nfsm_chain_cleanup(&nmreq); nfsm_chain_cleanup(&nmrep); return (error); } +/* + * Wait for any pending recovery to complete. + */ int -nfs4_vnop_open(struct vnop_open_args *ap) +nfs_mount_state_wait_for_recovery(struct nfsmount *nmp) { - return nfs3_vnop_open(ap); + struct timespec ts = { 1, 0 }; + int error = 0, slpflag = (nmp->nm_flag & NFSMNT_INT) ? PCATCH : 0; + + lck_mtx_lock(&nmp->nm_lock); + while (nmp->nm_state & NFSSTA_RECOVER) { + if ((error = nfs_sigintr(nmp, NULL, current_thread(), 1))) + break; + nfs_mount_sock_thread_wake(nmp); + msleep(&nmp->nm_state, &nmp->nm_lock, slpflag|(PZERO-1), "nfsrecoverwait", &ts); + } + lck_mtx_unlock(&nmp->nm_lock); + + return (error); } +/* + * We're about to use/manipulate NFS mount's open/lock state. + * Wait for any pending state recovery to complete, then + * mark the state as being in use (which will hold off + * the recovery thread until we're done). + */ int -nfs4_vnop_close(struct vnop_close_args *ap) +nfs_mount_state_in_use_start(struct nfsmount *nmp) { - return nfs3_vnop_close(ap); + struct timespec ts = { 1, 0 }; + int error = 0, slpflag = (nmp->nm_flag & NFSMNT_INT) ? PCATCH : 0; + + if (!nmp) + return (ENXIO); + lck_mtx_lock(&nmp->nm_lock); + while (nmp->nm_state & NFSSTA_RECOVER) { + if ((error = nfs_sigintr(nmp, NULL, current_thread(), 1))) + break; + nfs_mount_sock_thread_wake(nmp); + msleep(&nmp->nm_state, &nmp->nm_lock, slpflag|(PZERO-1), "nfsrecoverwait", &ts); + } + if (!error) + nmp->nm_stateinuse++; + lck_mtx_unlock(&nmp->nm_lock); + + return (error); } +/* + * We're done using/manipulating the NFS mount's open/lock + * state. If the given error indicates that recovery should + * be performed, we'll initiate recovery. + */ int -nfs4_vnop_advlock(__unused struct vnop_advlock_args *ap) +nfs_mount_state_in_use_end(struct nfsmount *nmp, int error) { - return (ENOSYS); + int restart = nfs_mount_state_error_should_restart(error); + + if (!nmp) + return (restart); + lck_mtx_lock(&nmp->nm_lock); + if (restart && (error != NFSERR_OLD_STATEID) && (error != NFSERR_GRACE)) { + if (!(nmp->nm_state & NFSSTA_RECOVER)) { + printf("nfs_mount_state_in_use_end: error %d, initiating recovery\n", error); + nmp->nm_state |= NFSSTA_RECOVER; + nfs_mount_sock_thread_wake(nmp); + } + } + if (nmp->nm_stateinuse > 0) + nmp->nm_stateinuse--; + else + panic("NFS mount state in use count underrun"); + if (!nmp->nm_stateinuse && (nmp->nm_state & NFSSTA_RECOVER)) + wakeup(&nmp->nm_stateinuse); + lck_mtx_unlock(&nmp->nm_lock); + if (error == NFSERR_GRACE) + tsleep(&nmp->nm_state, (PZERO-1), "nfsgrace", 2*hz); + + return (restart); } /* - * Note: the NFSv4 CREATE RPC is for everything EXCEPT regular files. - * Files are created using the NFSv4 OPEN RPC. So we must open the - * file to create it and then close it immediately. + * Does the error mean we should restart/redo a state-related operation? */ int -nfs4_vnop_create( - struct vnop_create_args /* { - struct vnodeop_desc *a_desc; - vnode_t a_dvp; - vnode_t *a_vpp; - struct componentname *a_cnp; - struct vnode_attr *a_vap; - vfs_context_t a_context; - } */ *ap) +nfs_mount_state_error_should_restart(int error) { - vfs_context_t ctx = ap->a_context; - struct componentname *cnp = ap->a_cnp; - struct vnode_attr *vap = ap->a_vap; - vnode_t dvp = ap->a_dvp; - vnode_t *vpp = ap->a_vpp; - struct nfsmount *nmp; - struct nfs_vattr nvattr, dnvattr; - int error = 0, create_error = EIO, lockerror = ENOENT, status; - int nfsvers, numops; - u_int64_t xid, savedxid = 0; - nfsnode_t dnp = VTONFS(dvp); - nfsnode_t np = NULL; - vnode_t newvp = NULL; - struct nfsm_chain nmreq, nmrep; - uint32_t bitmap[NFS_ATTR_BITMAP_LEN], bmlen; - uint32_t seqid, stateid[4], rflags, delegation, val; - fhandle_t fh; - struct nfsreq *req = NULL; - struct nfs_dulookup dul; + switch (error) { + case NFSERR_STALE_STATEID: + case NFSERR_STALE_CLIENTID: + case NFSERR_ADMIN_REVOKED: + case NFSERR_EXPIRED: + case NFSERR_OLD_STATEID: + case NFSERR_BAD_STATEID: + case NFSERR_GRACE: + return (1); + } + return (0); +} - static uint32_t nfs4_open_owner_hack = 0; +/* + * In some cases we may want to limit how many times we restart a + * state-related operation - e.g. we're repeatedly getting NFSERR_GRACE. + * Base the limit on the lease (as long as it's not too short). + */ +uint +nfs_mount_state_max_restarts(struct nfsmount *nmp) +{ + return (MAX(nmp->nm_fsattr.nfsa_lease, 60)); +} - nmp = VTONMP(dvp); + +/* + * Mark an NFS node's open state as busy. + */ +int +nfs_open_state_set_busy(nfsnode_t np, vfs_context_t ctx) +{ + struct nfsmount *nmp; + thread_t thd = vfs_context_thread(ctx); + struct timespec ts = {2, 0}; + int error = 0, slpflag; + + nmp = NFSTONMP(np); if (!nmp) return (ENXIO); - nfsvers = nmp->nm_vers; + slpflag = (nmp->nm_flag & NFSMNT_INT) ? PCATCH : 0; - seqid = stateid[0] = stateid[1] = stateid[2] = stateid[3] = 0; - rflags = 0; + lck_mtx_lock(&np->n_openlock); + while (np->n_openflags & N_OPENBUSY) { + if ((error = nfs_sigintr(nmp, NULL, thd, 0))) + break; + np->n_openflags |= N_OPENWANT; + msleep(&np->n_openflags, &np->n_openlock, slpflag, "nfs_open_state_set_busy", &ts); + } + if (!error) + np->n_openflags |= N_OPENBUSY; + lck_mtx_unlock(&np->n_openlock); - nfs_dulookup_init(&dul, dnp, cnp->cn_nameptr, cnp->cn_namelen); + return (error); +} - nfsm_chain_null(&nmreq); - nfsm_chain_null(&nmrep); +/* + * Clear an NFS node's open state busy flag and wake up + * anyone wanting it. + */ +void +nfs_open_state_clear_busy(nfsnode_t np) +{ + int wanted; + + lck_mtx_lock(&np->n_openlock); + if (!(np->n_openflags & N_OPENBUSY)) + panic("nfs_open_state_clear_busy"); + wanted = (np->n_openflags & N_OPENWANT); + np->n_openflags &= ~(N_OPENBUSY|N_OPENWANT); + lck_mtx_unlock(&np->n_openlock); + if (wanted) + wakeup(&np->n_openflags); +} - // PUTFH, SAVEFH, OPEN(CREATE), GETATTR(FH), RESTOREFH, GETATTR - numops = 6; - nfsm_chain_build_alloc_init(error, &nmreq, 53 * NFSX_UNSIGNED + cnp->cn_namelen); - nfsm_chain_add_compound_header(error, &nmreq, "create", numops); - numops--; - nfsm_chain_add_32(error, &nmreq, NFS_OP_PUTFH); - nfsm_chain_add_fh(error, &nmreq, nfsvers, dnp->n_fhp, dnp->n_fhsize); - numops--; - nfsm_chain_add_32(error, &nmreq, NFS_OP_SAVEFH); - numops--; - nfsm_chain_add_32(error, &nmreq, NFS_OP_OPEN); - nfsm_chain_add_32(error, &nmreq, seqid); - seqid++; - nfsm_chain_add_32(error, &nmreq, NFS_OPEN_SHARE_ACCESS_BOTH); - nfsm_chain_add_32(error, &nmreq, NFS_OPEN_SHARE_DENY_NONE); - nfsm_chain_add_64(error, &nmreq, nmp->nm_clientid); // open_owner4.clientid - OSAddAtomic(1, (SInt32*)&nfs4_open_owner_hack); - nfsm_chain_add_32(error, &nmreq, sizeof(nfs4_open_owner_hack)); - nfsm_chain_add_opaque(error, &nmreq, &nfs4_open_owner_hack, sizeof(nfs4_open_owner_hack)); // open_owner4.owner - // openflag4 - nfsm_chain_add_32(error, &nmreq, NFS_OPEN_CREATE); - nfsm_chain_add_32(error, &nmreq, NFS_CREATE_UNCHECKED); // XXX exclusive/guarded - nfsm_chain_add_fattr4(error, &nmreq, vap, nmp); - // open_claim4 - nfsm_chain_add_32(error, &nmreq, NFS_CLAIM_NULL); - nfsm_chain_add_string(error, &nmreq, cnp->cn_nameptr, cnp->cn_namelen); - numops--; - nfsm_chain_add_32(error, &nmreq, NFS_OP_GETATTR); - NFS_COPY_ATTRIBUTES(nfs_getattr_bitmap, bitmap); - NFS_BITMAP_SET(bitmap, NFS_FATTR_FILEHANDLE); - nfsm_chain_add_bitmap_masked(error, &nmreq, bitmap, - NFS_ATTR_BITMAP_LEN, nmp->nm_fsattr.nfsa_supp_attr); - numops--; - nfsm_chain_add_32(error, &nmreq, NFS_OP_RESTOREFH); - numops--; - nfsm_chain_add_32(error, &nmreq, NFS_OP_GETATTR); - nfsm_chain_add_bitmap_masked(error, &nmreq, nfs_getattr_bitmap, - NFS_ATTR_BITMAP_LEN, nmp->nm_fsattr.nfsa_supp_attr); - nfsm_chain_build_done(error, &nmreq); - nfsm_assert(error, (numops == 0), EPROTO); - nfsmout_if(error); - if ((lockerror = nfs_lock(dnp, NFS_NODE_LOCK_EXCLUSIVE))) - error = lockerror; - nfsmout_if(error); +/* + * Search a mount's open owner list for the owner for this credential. + * If not found and "alloc" is set, then allocate a new one. + */ +struct nfs_open_owner * +nfs_open_owner_find(struct nfsmount *nmp, kauth_cred_t cred, int alloc) +{ + uid_t uid = kauth_cred_getuid(cred); + struct nfs_open_owner *noop, *newnoop = NULL; - error = nfs_request_async(dnp, NULL, &nmreq, NFSPROC4_COMPOUND, - vfs_context_thread(ctx), vfs_context_ucred(ctx), NULL, &req); - if (!error) { - nfs_dulookup_start(&dul, dnp, ctx); - error = nfs_request_async_finish(req, &nmrep, &xid, &status); +tryagain: + lck_mtx_lock(&nmp->nm_lock); + TAILQ_FOREACH(noop, &nmp->nm_open_owners, noo_link) { + if (kauth_cred_getuid(noop->noo_cred) == uid) + break; } - savedxid = xid; - nfsm_chain_skip_tag(error, &nmrep); - nfsm_chain_get_32(error, &nmrep, numops); - nfsm_chain_op_check(error, &nmrep, NFS_OP_PUTFH); - nfsm_chain_op_check(error, &nmrep, NFS_OP_SAVEFH); - nfsm_chain_op_check(error, &nmrep, NFS_OP_OPEN); - nfsm_chain_get_32(error, &nmrep, stateid[0]); - nfsm_chain_get_32(error, &nmrep, stateid[1]); - nfsm_chain_get_32(error, &nmrep, stateid[2]); - nfsm_chain_get_32(error, &nmrep, stateid[3]); - nfsm_chain_check_change_info(error, &nmrep, dnp); - nfsm_chain_get_32(error, &nmrep, rflags); - bmlen = NFS_ATTR_BITMAP_LEN; - nfsm_chain_get_bitmap(error, &nmrep, bitmap, bmlen); - nfsm_chain_get_32(error, &nmrep, delegation); - if (!error) - switch (delegation) { - case NFS_OPEN_DELEGATE_NONE: - break; - case NFS_OPEN_DELEGATE_READ: - printf("nfs4_vnop_create: read delegation?\n"); - nfsm_chain_adv(error, &nmrep, 5*NFSX_UNSIGNED); - // ACE: - nfsm_chain_adv(error, &nmrep, 3 * NFSX_UNSIGNED); - nfsm_chain_get_32(error, &nmrep, val); /* string length */ - nfsm_chain_adv(error, &nmrep, nfsm_rndup(val)); - break; - case NFS_OPEN_DELEGATE_WRITE: - printf("nfs4_vnop_create: write delegation?\n"); - nfsm_chain_adv(error, &nmrep, 5*NFSX_UNSIGNED); - nfsm_chain_adv(error, &nmrep, 3*NFSX_UNSIGNED); - // ACE: - nfsm_chain_adv(error, &nmrep, 3 * NFSX_UNSIGNED); - nfsm_chain_get_32(error, &nmrep, val); /* string length */ - nfsm_chain_adv(error, &nmrep, nfsm_rndup(val)); - break; - default: - error = EBADRPC; - break; - } - /* At this point if we have no error, the object was created. */ - /* if we don't get attributes, then we should lookitup. */ - create_error = error; - nfsmout_if(error); - nfs_vattr_set_supported(bitmap, vap); - nfsm_chain_op_check(error, &nmrep, NFS_OP_GETATTR); - nfsmout_if(error); - NFS_CLEAR_ATTRIBUTES(nvattr.nva_bitmap); - error = nfs4_parsefattr(&nmrep, NULL, &nvattr, &fh, NULL); - nfsmout_if(error); - if (!NFS_BITMAP_ISSET(nvattr.nva_bitmap, NFS_FATTR_FILEHANDLE)) { - printf("nfs: open/create didn't return filehandle?\n"); + if (!noop && !newnoop && alloc) { + lck_mtx_unlock(&nmp->nm_lock); + MALLOC(newnoop, struct nfs_open_owner *, sizeof(struct nfs_open_owner), M_TEMP, M_WAITOK); + if (!newnoop) + return (NULL); + bzero(newnoop, sizeof(*newnoop)); + lck_mtx_init(&newnoop->noo_lock, nfs_open_grp, LCK_ATTR_NULL); + newnoop->noo_mount = nmp; + kauth_cred_ref(cred); + newnoop->noo_cred = cred; + newnoop->noo_name = OSAddAtomic(1, &nfs_open_owner_seqnum); + TAILQ_INIT(&newnoop->noo_opens); + goto tryagain; + } + if (!noop && newnoop) { + newnoop->noo_flags |= NFS_OPEN_OWNER_LINK; + TAILQ_INSERT_HEAD(&nmp->nm_open_owners, newnoop, noo_link); + noop = newnoop; + } + lck_mtx_unlock(&nmp->nm_lock); + + if (newnoop && (noop != newnoop)) + nfs_open_owner_destroy(newnoop); + + if (noop) + nfs_open_owner_ref(noop); + + return (noop); +} + +/* + * destroy an open owner that's no longer needed + */ +void +nfs_open_owner_destroy(struct nfs_open_owner *noop) +{ + if (noop->noo_cred) + kauth_cred_unref(&noop->noo_cred); + lck_mtx_destroy(&noop->noo_lock, nfs_open_grp); + FREE(noop, M_TEMP); +} + +/* + * acquire a reference count on an open owner + */ +void +nfs_open_owner_ref(struct nfs_open_owner *noop) +{ + lck_mtx_lock(&noop->noo_lock); + noop->noo_refcnt++; + lck_mtx_unlock(&noop->noo_lock); +} + +/* + * drop a reference count on an open owner and destroy it if + * it is no longer referenced and no longer on the mount's list. + */ +void +nfs_open_owner_rele(struct nfs_open_owner *noop) +{ + lck_mtx_lock(&noop->noo_lock); + if (noop->noo_refcnt < 1) + panic("nfs_open_owner_rele: no refcnt"); + noop->noo_refcnt--; + if (!noop->noo_refcnt && (noop->noo_flags & NFS_OPEN_OWNER_BUSY)) + panic("nfs_open_owner_rele: busy"); + /* XXX we may potentially want to clean up idle/unused open owner structures */ + if (noop->noo_refcnt || (noop->noo_flags & NFS_OPEN_OWNER_LINK)) { + lck_mtx_unlock(&noop->noo_lock); + return; + } + /* owner is no longer referenced or linked to mount, so destroy it */ + lck_mtx_unlock(&noop->noo_lock); + nfs_open_owner_destroy(noop); +} + +/* + * Mark an open owner as busy because we are about to + * start an operation that uses and updates open owner state. + */ +int +nfs_open_owner_set_busy(struct nfs_open_owner *noop, thread_t thd) +{ + struct nfsmount *nmp; + struct timespec ts = {2, 0}; + int error = 0, slpflag; + + nmp = noop->noo_mount; + if (!nmp) + return (ENXIO); + slpflag = (nmp->nm_flag & NFSMNT_INT) ? PCATCH : 0; + + lck_mtx_lock(&noop->noo_lock); + while (noop->noo_flags & NFS_OPEN_OWNER_BUSY) { + if ((error = nfs_sigintr(nmp, NULL, thd, 0))) + break; + noop->noo_flags |= NFS_OPEN_OWNER_WANT; + msleep(noop, &noop->noo_lock, slpflag, "nfs_open_owner_set_busy", &ts); + } + if (!error) + noop->noo_flags |= NFS_OPEN_OWNER_BUSY; + lck_mtx_unlock(&noop->noo_lock); + + return (error); +} + +/* + * Clear the busy flag on an open owner and wake up anyone waiting + * to mark it busy. + */ +void +nfs_open_owner_clear_busy(struct nfs_open_owner *noop) +{ + int wanted; + + lck_mtx_lock(&noop->noo_lock); + if (!(noop->noo_flags & NFS_OPEN_OWNER_BUSY)) + panic("nfs_open_owner_clear_busy"); + wanted = (noop->noo_flags & NFS_OPEN_OWNER_WANT); + noop->noo_flags &= ~(NFS_OPEN_OWNER_BUSY|NFS_OPEN_OWNER_WANT); + lck_mtx_unlock(&noop->noo_lock); + if (wanted) + wakeup(noop); +} + +/* + * Given an open/lock owner and an error code, increment the + * sequence ID if appropriate. + */ +void +nfs_owner_seqid_increment(struct nfs_open_owner *noop, struct nfs_lock_owner *nlop, int error) +{ + switch (error) { + case NFSERR_STALE_CLIENTID: + case NFSERR_STALE_STATEID: + case NFSERR_OLD_STATEID: + case NFSERR_BAD_STATEID: + case NFSERR_BAD_SEQID: + case NFSERR_BADXDR: + case NFSERR_RESOURCE: + case NFSERR_NOFILEHANDLE: + /* do not increment the open seqid on these errors */ + return; + } + if (noop) + noop->noo_seqid++; + if (nlop) + nlop->nlo_seqid++; +} + +/* + * Search a node's open file list for any conflicts with this request. + * Also find this open owner's open file structure. + * If not found and "alloc" is set, then allocate one. + */ +int +nfs_open_file_find( + nfsnode_t np, + struct nfs_open_owner *noop, + struct nfs_open_file **nofpp, + uint32_t accessMode, + uint32_t denyMode, + int alloc) +{ + struct nfs_open_file *nofp = NULL, *nofp2, *newnofp = NULL; + + if (!np) + goto alloc; +tryagain: + lck_mtx_lock(&np->n_openlock); + TAILQ_FOREACH(nofp2, &np->n_opens, nof_link) { + if (nofp2->nof_owner == noop) { + nofp = nofp2; + if (!accessMode) + break; + } + if ((accessMode & nofp2->nof_deny) || (denyMode & nofp2->nof_access)) { + /* This request conflicts with an existing open on this client. */ + lck_mtx_unlock(&np->n_openlock); + *nofpp = NULL; + return (EACCES); + } + } + + /* + * If this open owner doesn't have an open + * file structure yet, we create one for it. + */ + if (!nofp && !newnofp && alloc) { + lck_mtx_unlock(&np->n_openlock); +alloc: + MALLOC(newnofp, struct nfs_open_file *, sizeof(struct nfs_open_file), M_TEMP, M_WAITOK); + if (!newnofp) { + *nofpp = NULL; + return (ENOMEM); + } + bzero(newnofp, sizeof(*newnofp)); + lck_mtx_init(&newnofp->nof_lock, nfs_open_grp, LCK_ATTR_NULL); + newnofp->nof_owner = noop; + nfs_open_owner_ref(noop); + newnofp->nof_np = np; + lck_mtx_lock(&noop->noo_lock); + TAILQ_INSERT_HEAD(&noop->noo_opens, newnofp, nof_oolink); + lck_mtx_unlock(&noop->noo_lock); + if (np) + goto tryagain; + } + if (!nofp && newnofp) { + if (np) + TAILQ_INSERT_HEAD(&np->n_opens, newnofp, nof_link); + nofp = newnofp; + } + if (np) + lck_mtx_unlock(&np->n_openlock); + + if (newnofp && (nofp != newnofp)) + nfs_open_file_destroy(newnofp); + + *nofpp = nofp; + return (nofp ? 0 : ESRCH); +} + +/* + * Destroy an open file structure. + */ +void +nfs_open_file_destroy(struct nfs_open_file *nofp) +{ + lck_mtx_lock(&nofp->nof_owner->noo_lock); + TAILQ_REMOVE(&nofp->nof_owner->noo_opens, nofp, nof_oolink); + lck_mtx_unlock(&nofp->nof_owner->noo_lock); + nfs_open_owner_rele(nofp->nof_owner); + lck_mtx_destroy(&nofp->nof_lock, nfs_open_grp); + FREE(nofp, M_TEMP); +} + +/* + * Mark an open file as busy because we are about to + * start an operation that uses and updates open file state. + */ +int +nfs_open_file_set_busy(struct nfs_open_file *nofp, thread_t thd) +{ + struct nfsmount *nmp; + struct timespec ts = {2, 0}; + int error = 0, slpflag; + + nmp = nofp->nof_owner->noo_mount; + if (!nmp) + return (ENXIO); + slpflag = (nmp->nm_flag & NFSMNT_INT) ? PCATCH : 0; + + lck_mtx_lock(&nofp->nof_lock); + while (nofp->nof_flags & NFS_OPEN_FILE_BUSY) { + if ((error = nfs_sigintr(nmp, NULL, thd, 0))) + break; + nofp->nof_flags |= NFS_OPEN_FILE_WANT; + msleep(nofp, &nofp->nof_lock, slpflag, "nfs_open_file_set_busy", &ts); + } + if (!error) + nofp->nof_flags |= NFS_OPEN_FILE_BUSY; + lck_mtx_unlock(&nofp->nof_lock); + + return (error); +} + +/* + * Clear the busy flag on an open file and wake up anyone waiting + * to mark it busy. + */ +void +nfs_open_file_clear_busy(struct nfs_open_file *nofp) +{ + int wanted; + + lck_mtx_lock(&nofp->nof_lock); + if (!(nofp->nof_flags & NFS_OPEN_FILE_BUSY)) + panic("nfs_open_file_clear_busy"); + wanted = (nofp->nof_flags & NFS_OPEN_FILE_WANT); + nofp->nof_flags &= ~(NFS_OPEN_FILE_BUSY|NFS_OPEN_FILE_WANT); + lck_mtx_unlock(&nofp->nof_lock); + if (wanted) + wakeup(nofp); +} + +/* + * Get the current (delegation, lock, open, default) stateid for this node. + * If node has a delegation, use that stateid. + * If pid has a lock, use the lockowner's stateid. + * Or use the open file's stateid. + * If no open file, use a default stateid of all ones. + */ +void +nfs_get_stateid(nfsnode_t np, thread_t thd, kauth_cred_t cred, nfs_stateid *sid) +{ + struct nfsmount *nmp = NFSTONMP(np); + proc_t p = thd ? get_bsdthreadtask_info(thd) : current_thread(); // XXX async I/O requests don't have a thread + struct nfs_open_owner *noop = NULL; + struct nfs_open_file *nofp = NULL; + struct nfs_lock_owner *nlop = NULL; + nfs_stateid *s = NULL; + + if (np->n_openflags & N_DELEG_MASK) + s = &np->n_dstateid; + else if (p) + nlop = nfs_lock_owner_find(np, p, 0); + if (nlop && !TAILQ_EMPTY(&nlop->nlo_locks)) { + /* we hold locks, use lock stateid */ + s = &nlop->nlo_stateid; + } else if (((noop = nfs_open_owner_find(nmp, cred, 0))) && + (nfs_open_file_find(np, noop, &nofp, 0, 0, 0) == 0) && + !(nofp->nof_flags & NFS_OPEN_FILE_LOST) && + nofp->nof_access) { + /* we (should) have the file open, use open stateid */ + if (nofp->nof_flags & NFS_OPEN_FILE_REOPEN) + nfs4_reopen(nofp, thd); + if (!(nofp->nof_flags & NFS_OPEN_FILE_LOST)) + s = &nofp->nof_stateid; + } + + if (s) { + sid->seqid = s->seqid; + sid->other[0] = s->other[0]; + sid->other[1] = s->other[1]; + sid->other[2] = s->other[2]; + } else { + const char *vname = vnode_getname(NFSTOV(np)); + printf("nfs_get_stateid: no stateid for %s\n", vname ? vname : "???"); + vnode_putname(vname); + sid->seqid = sid->other[0] = sid->other[1] = sid->other[2] = 0xffffffff; + } + if (nlop) + nfs_lock_owner_rele(nlop); + if (noop) + nfs_open_owner_rele(noop); +} + +/* + * We always send the open RPC even if this open's mode is a subset of all + * the existing opens. This makes sure that we will always be able to do a + * downgrade to any of the open modes. + * + * Note: local conflicts should have already been checked. (nfs_open_file_find) + */ +int +nfs4_open( + nfsnode_t np, + struct nfs_open_file *nofp, + uint32_t accessMode, + uint32_t denyMode, + vfs_context_t ctx) +{ + vnode_t vp = NFSTOV(np); + vnode_t dvp = NULL; + struct componentname cn; + const char *vname = NULL; + size_t namelen; + char smallname[128]; + char *filename = NULL; + int error = 0, readtoo = 0; + + dvp = vnode_getparent(vp); + vname = vnode_getname(vp); + if (!dvp || !vname) { + error = EIO; + goto out; + } + filename = &smallname[0]; + namelen = snprintf(filename, sizeof(smallname), "%s", vname); + if (namelen >= sizeof(smallname)) { + namelen++; /* snprintf result doesn't include '\0' */ + MALLOC(filename, char *, namelen, M_TEMP, M_WAITOK); + if (!filename) { + error = ENOMEM; + goto out; + } + snprintf(filename, namelen, "%s", vname); + } + bzero(&cn, sizeof(cn)); + cn.cn_nameptr = filename; + cn.cn_namelen = namelen; + + if (!(accessMode & NFS_OPEN_SHARE_ACCESS_READ)) { + /* + * Try to open it for read access too, + * so the buffer cache can read data. + */ + readtoo = 1; + accessMode |= NFS_OPEN_SHARE_ACCESS_READ; + } +tryagain: + error = nfs4_open_rpc(nofp, ctx, &cn, NULL, dvp, &vp, NFS_OPEN_NOCREATE, accessMode, denyMode); + if (error) { + if (!nfs_mount_state_error_should_restart(error) && readtoo) { + /* try again without the extra read access */ + accessMode &= ~NFS_OPEN_SHARE_ACCESS_READ; + readtoo = 0; + goto tryagain; + } + goto out; + } + nofp->nof_access |= accessMode; + nofp->nof_deny |= denyMode; + + if (denyMode == NFS_OPEN_SHARE_DENY_NONE) { + if (accessMode == NFS_OPEN_SHARE_ACCESS_READ) + nofp->nof_r++; + else if (accessMode == NFS_OPEN_SHARE_ACCESS_WRITE) + nofp->nof_w++; + else if (accessMode == NFS_OPEN_SHARE_ACCESS_BOTH) + nofp->nof_rw++; + } else if (denyMode == NFS_OPEN_SHARE_DENY_WRITE) { + if (accessMode == NFS_OPEN_SHARE_ACCESS_READ) + nofp->nof_r_dw++; + else if (accessMode == NFS_OPEN_SHARE_ACCESS_WRITE) + nofp->nof_w_dw++; + else if (accessMode == NFS_OPEN_SHARE_ACCESS_BOTH) + nofp->nof_rw_dw++; + } else { /* NFS_OPEN_SHARE_DENY_BOTH */ + if (accessMode == NFS_OPEN_SHARE_ACCESS_READ) + nofp->nof_r_drw++; + else if (accessMode == NFS_OPEN_SHARE_ACCESS_WRITE) + nofp->nof_w_drw++; + else if (accessMode == NFS_OPEN_SHARE_ACCESS_BOTH) + nofp->nof_rw_drw++; + } + nofp->nof_opencnt++; +out: + if (filename && (filename != &smallname[0])) + FREE(filename, M_TEMP); + if (vname) + vnode_putname(vname); + if (dvp != NULLVP) + vnode_put(dvp); + return (error); +} + + +int +nfs4_vnop_open( + struct vnop_open_args /* { + struct vnodeop_desc *a_desc; + vnode_t a_vp; + int a_mode; + vfs_context_t a_context; + } */ *ap) +{ + vfs_context_t ctx = ap->a_context; + vnode_t vp = ap->a_vp; + nfsnode_t np = VTONFS(vp); + struct nfsmount *nmp; + int error, accessMode, denyMode, opened = 0; + struct nfs_open_owner *noop = NULL; + struct nfs_open_file *nofp = NULL; + + if (!(ap->a_mode & (FREAD|FWRITE))) + return (EINVAL); + + nmp = VTONMP(vp); + if (!nmp) + return (ENXIO); + + /* First, call the common code */ + if ((error = nfs3_vnop_open(ap))) + return (error); + + if (!vnode_isreg(vp)) { + /* Just mark that it was opened */ + lck_mtx_lock(&np->n_openlock); + np->n_openrefcnt++; + lck_mtx_unlock(&np->n_openlock); + return (0); + } + + /* mode contains some combination of: FREAD, FWRITE, O_SHLOCK, O_EXLOCK */ + accessMode = 0; + if (ap->a_mode & FREAD) + accessMode |= NFS_OPEN_SHARE_ACCESS_READ; + if (ap->a_mode & FWRITE) + accessMode |= NFS_OPEN_SHARE_ACCESS_WRITE; + if (ap->a_mode & O_EXLOCK) + denyMode = NFS_OPEN_SHARE_DENY_BOTH; + else if (ap->a_mode & O_SHLOCK) + denyMode = NFS_OPEN_SHARE_DENY_WRITE; + else + denyMode = NFS_OPEN_SHARE_DENY_NONE; + + noop = nfs_open_owner_find(nmp, vfs_context_ucred(ctx), 1); + if (!noop) + return (ENOMEM); + +restart: + error = nfs_mount_state_in_use_start(nmp); + if (error) { + nfs_open_owner_rele(noop); + return (error); + } + + error = nfs_open_file_find(np, noop, &nofp, accessMode, denyMode, 1); + if (!error && (nofp->nof_flags & NFS_OPEN_FILE_LOST)) { + const char *vname = vnode_getname(NFSTOV(np)); + printf("nfs_vnop_open: LOST %s\n", vname); + vnode_putname(vname); + error = EIO; + } + if (!error && (nofp->nof_flags & NFS_OPEN_FILE_REOPEN)) { + nfs_mount_state_in_use_end(nmp, 0); + nfs4_reopen(nofp, vfs_context_thread(ctx)); + nofp = NULL; + goto restart; + } + if (!error) + error = nfs_open_file_set_busy(nofp, vfs_context_thread(ctx)); + if (error) { + nofp = NULL; + goto out; + } + + /* + * If we just created the file and the modes match, then we simply use + * the open performed in the create. Otherwise, send the request. + */ + if ((nofp->nof_flags & NFS_OPEN_FILE_CREATE) && + (nofp->nof_creator == current_thread()) && + (accessMode == NFS_OPEN_SHARE_ACCESS_BOTH) && + (denyMode == NFS_OPEN_SHARE_DENY_NONE)) { + nofp->nof_flags &= ~NFS_OPEN_FILE_CREATE; + nofp->nof_creator = NULL; + } else { + if (!opened) + error = nfs4_open(np, nofp, accessMode, denyMode, ctx); + if ((error == EACCES) && (nofp->nof_flags & NFS_OPEN_FILE_CREATE) && + (nofp->nof_creator == current_thread())) { + /* + * Ugh. This can happen if we just created the file with read-only + * perms and we're trying to open it for real with different modes + * (e.g. write-only or with a deny mode) and the server decides to + * not allow the second open because of the read-only perms. + * The best we can do is to just use the create's open. + * We may have access we don't need or we may not have a requested + * deny mode. We may log complaints later, but we'll try to avoid it. + */ + if (denyMode != NFS_OPEN_SHARE_DENY_NONE) { + const char *vname = vnode_getname(NFSTOV(np)); + printf("nfs4_vnop_open: deny mode foregone on create, %s\n", vname); + vnode_putname(vname); + } + nofp->nof_creator = NULL; + error = 0; + } + if (error) + goto out; + opened = 1; + /* + * If we had just created the file, we already had it open. + * If the actual open mode is less than what we grabbed at + * create time, then we'll downgrade the open here. + */ + if ((nofp->nof_flags & NFS_OPEN_FILE_CREATE) && + (nofp->nof_creator == current_thread())) { + error = nfs4_close(np, nofp, NFS_OPEN_SHARE_ACCESS_BOTH, NFS_OPEN_SHARE_DENY_NONE, ctx); + if (error) { + const char *vname = vnode_getname(NFSTOV(np)); + printf("nfs_vnop_open: create close error %d, %s\n", error, vname); + vnode_putname(vname); + } + if (!nfs_mount_state_error_should_restart(error)) { + error = 0; + nofp->nof_flags &= ~NFS_OPEN_FILE_CREATE; + } + } + } + +out: + if (nofp) + nfs_open_file_clear_busy(nofp); + if (nfs_mount_state_in_use_end(nmp, error)) { + nofp = NULL; + goto restart; + } + if (noop) + nfs_open_owner_rele(noop); + if (error) { + const char *vname = vnode_getname(NFSTOV(np)); + printf("nfs_vnop_open: error %d, %s\n", error, vname); + vnode_putname(vname); + } + return (error); +} + +int +nfs4_close( + nfsnode_t np, + struct nfs_open_file *nofp, + uint32_t accessMode, + uint32_t denyMode, + vfs_context_t ctx) +{ + struct nfs_lock_owner *nlop; + int error = 0, changed = 0, closed = 0; + uint32_t newAccessMode, newDenyMode; + + /* warn if modes don't match current state */ + if (((accessMode & nofp->nof_access) != accessMode) || ((denyMode & nofp->nof_deny) != denyMode)) { + const char *vname = vnode_getname(NFSTOV(np)); + printf("nfs4_close: mode mismatch %d %d, current %d %d, %s\n", + accessMode, denyMode, nofp->nof_access, nofp->nof_deny, vname); + vnode_putname(vname); + } + + /* + * If we're closing a write-only open, we may not have a write-only count + * if we also grabbed read access. So, check the read-write count. + */ + if (denyMode == NFS_OPEN_SHARE_DENY_NONE) { + if ((accessMode == NFS_OPEN_SHARE_ACCESS_WRITE) && + (nofp->nof_w == 0) && nofp->nof_rw) + accessMode = NFS_OPEN_SHARE_ACCESS_BOTH; + } else if (denyMode == NFS_OPEN_SHARE_DENY_WRITE) { + if ((accessMode == NFS_OPEN_SHARE_ACCESS_WRITE) && + (nofp->nof_w_dw == 0) && nofp->nof_rw_dw) + accessMode = NFS_OPEN_SHARE_ACCESS_BOTH; + } else { /* NFS_OPEN_SHARE_DENY_BOTH */ + if ((accessMode == NFS_OPEN_SHARE_ACCESS_WRITE) && + (nofp->nof_w_drw == 0) && nofp->nof_rw_drw) + accessMode = NFS_OPEN_SHARE_ACCESS_BOTH; + } + + /* + * Calculate new modes: a mode bit gets removed when there's only + * one count in all the corresponding counts + */ + newAccessMode = nofp->nof_access; + newDenyMode = nofp->nof_deny; + if ((accessMode & NFS_OPEN_SHARE_ACCESS_READ) && + (newAccessMode & NFS_OPEN_SHARE_ACCESS_READ) && + ((nofp->nof_r + nofp->nof_rw + nofp->nof_r_dw + + nofp->nof_rw_dw + nofp->nof_r_drw + nofp->nof_rw_dw) == 1)) { + newAccessMode &= ~NFS_OPEN_SHARE_ACCESS_READ; + changed = 1; + } + if ((accessMode & NFS_OPEN_SHARE_ACCESS_WRITE) && + (newAccessMode & NFS_OPEN_SHARE_ACCESS_WRITE) && + ((nofp->nof_w + nofp->nof_rw + nofp->nof_w_dw + + nofp->nof_rw_dw + nofp->nof_w_drw + nofp->nof_rw_dw) == 1)) { + newAccessMode &= ~NFS_OPEN_SHARE_ACCESS_WRITE; + changed = 1; + } + if ((denyMode & NFS_OPEN_SHARE_DENY_READ) && + (newDenyMode & NFS_OPEN_SHARE_DENY_READ) && + ((nofp->nof_r_drw + nofp->nof_w_drw + nofp->nof_rw_drw) == 1)) { + newDenyMode &= ~NFS_OPEN_SHARE_DENY_READ; + changed = 1; + } + if ((denyMode & NFS_OPEN_SHARE_DENY_WRITE) && + (newDenyMode & NFS_OPEN_SHARE_DENY_WRITE) && + ((nofp->nof_r_drw + nofp->nof_w_drw + nofp->nof_rw_drw + + nofp->nof_r_dw + nofp->nof_w_dw + nofp->nof_rw_dw) == 1)) { + newDenyMode &= ~NFS_OPEN_SHARE_DENY_WRITE; + changed = 1; + } + + + if ((newAccessMode == 0) || (nofp->nof_opencnt == 1)) { + /* + * No more access after this close, so clean up and close it. + */ + closed = 1; + if (!(nofp->nof_flags & NFS_OPEN_FILE_LOST)) + error = nfs4_close_rpc(np, nofp, vfs_context_thread(ctx), vfs_context_ucred(ctx), 0); + if (error == NFSERR_LOCKS_HELD) { + /* + * Hmm... the server says we have locks we need to release first + * Find the lock owner and try to unlock everything. + */ + nlop = nfs_lock_owner_find(np, vfs_context_proc(ctx), 0); + if (nlop) { + nfs4_unlock_rpc(np, nlop, F_WRLCK, 0, UINT64_MAX, ctx); + nfs_lock_owner_rele(nlop); + } + error = nfs4_close_rpc(np, nofp, vfs_context_thread(ctx), vfs_context_ucred(ctx), 0); + } + } else if (changed) { + /* + * File is still open but with less access, so downgrade the open. + */ + if (!(nofp->nof_flags & NFS_OPEN_FILE_LOST)) + error = nfs4_open_downgrade_rpc(np, nofp, ctx); + } + + if (error) { + const char *vname = vnode_getname(NFSTOV(np)); + printf("nfs4_close: error %d, %s\n", error, vname); + vnode_putname(vname); + return (error); + } + + /* Decrement the corresponding open access/deny mode counter. */ + if (denyMode == NFS_OPEN_SHARE_DENY_NONE) { + if (accessMode == NFS_OPEN_SHARE_ACCESS_READ) { + if (nofp->nof_r == 0) + printf("nfs4_close: open(R) count underrun\n"); + else + nofp->nof_r--; + } else if (accessMode == NFS_OPEN_SHARE_ACCESS_WRITE) { + if (nofp->nof_w == 0) + printf("nfs4_close: open(W) count underrun\n"); + else + nofp->nof_w--; + } else if (accessMode == NFS_OPEN_SHARE_ACCESS_BOTH) { + if (nofp->nof_rw == 0) + printf("nfs4_close: open(RW) count underrun\n"); + else + nofp->nof_rw--; + } + } else if (denyMode == NFS_OPEN_SHARE_DENY_WRITE) { + if (accessMode == NFS_OPEN_SHARE_ACCESS_READ) { + if (nofp->nof_r_dw == 0) + printf("nfs4_close: open(R,DW) count underrun\n"); + else + nofp->nof_r_dw--; + } else if (accessMode == NFS_OPEN_SHARE_ACCESS_WRITE) { + if (nofp->nof_w_dw == 0) + printf("nfs4_close: open(W,DW) count underrun\n"); + else + nofp->nof_w_dw--; + } else if (accessMode == NFS_OPEN_SHARE_ACCESS_BOTH) { + if (nofp->nof_rw_dw == 0) + printf("nfs4_close: open(RW,DW) count underrun\n"); + else + nofp->nof_rw_dw--; + } + } else { /* NFS_OPEN_SHARE_DENY_BOTH */ + if (accessMode == NFS_OPEN_SHARE_ACCESS_READ) { + if (nofp->nof_r_drw == 0) + printf("nfs4_close: open(R,DRW) count underrun\n"); + else + nofp->nof_r_drw--; + } else if (accessMode == NFS_OPEN_SHARE_ACCESS_WRITE) { + if (nofp->nof_w_drw == 0) + printf("nfs4_close: open(W,DRW) count underrun\n"); + else + nofp->nof_w_drw--; + } else if (accessMode == NFS_OPEN_SHARE_ACCESS_BOTH) { + if (nofp->nof_rw_drw == 0) + printf("nfs4_close: open(RW,DRW) count underrun\n"); + else + nofp->nof_rw_drw--; + } + } + /* update the modes */ + nofp->nof_access = newAccessMode; + nofp->nof_deny = newDenyMode; + if (closed) { + if (nofp->nof_r || nofp->nof_w || + (nofp->nof_rw && !((nofp->nof_flags & NFS_OPEN_FILE_CREATE) && !nofp->nof_creator && (nofp->nof_rw == 1))) || + nofp->nof_r_dw || nofp->nof_w_dw || nofp->nof_rw_dw || + nofp->nof_r_drw || nofp->nof_w_drw || nofp->nof_rw_drw) + printf("nfs4_close: unexpected count: %u %u %u dw %u %u %u drw %u %u %u flags 0x%x\n", + nofp->nof_r, nofp->nof_w, nofp->nof_rw, + nofp->nof_r_dw, nofp->nof_w_dw, nofp->nof_rw_dw, + nofp->nof_r_drw, nofp->nof_w_drw, nofp->nof_rw_drw, + nofp->nof_flags); + /* clear out all open info, just to be safe */ + nofp->nof_access = nofp->nof_deny = 0; + nofp->nof_mmap_access = nofp->nof_mmap_deny = 0; + nofp->nof_r = nofp->nof_w = nofp->nof_rw = 0; + nofp->nof_r_dw = nofp->nof_w_dw = nofp->nof_rw_dw = 0; + nofp->nof_r_drw = nofp->nof_w_drw = nofp->nof_rw_drw = 0; + nofp->nof_flags &= ~NFS_OPEN_FILE_CREATE; + /* XXX we may potentially want to clean up idle/unused open file structures */ + } + nofp->nof_opencnt--; + if (nofp->nof_flags & NFS_OPEN_FILE_LOST) { + error = EIO; + if (!nofp->nof_opencnt) + nofp->nof_flags &= ~NFS_OPEN_FILE_LOST; + const char *vname = vnode_getname(NFSTOV(np)); + printf("nfs_close: LOST%s, %s\n", !(nofp->nof_flags & NFS_OPEN_FILE_LOST) ? " (last)" : "", vname); + vnode_putname(vname); + } + return (error); +} + +int +nfs4_vnop_close( + struct vnop_close_args /* { + struct vnodeop_desc *a_desc; + vnode_t a_vp; + int a_fflag; + vfs_context_t a_context; + } */ *ap) +{ + vfs_context_t ctx = ap->a_context; + vnode_t vp = ap->a_vp; + int fflag = ap->a_fflag; + int error, common_error, accessMode, denyMode; + nfsnode_t np = VTONFS(vp); + struct nfsmount *nmp; + struct nfs_open_owner *noop = NULL; + struct nfs_open_file *nofp = NULL; + + nmp = VTONMP(vp); + if (!nmp) + return (ENXIO); + + /* First, call the common code */ + common_error = nfs3_vnop_close(ap); + + if (!vnode_isreg(vp)) { + /* Just mark that it was closed */ + lck_mtx_lock(&np->n_openlock); + np->n_openrefcnt--; + lck_mtx_unlock(&np->n_openlock); + return (common_error); + } + + noop = nfs_open_owner_find(nmp, vfs_context_ucred(ctx), 0); + if (!noop) { + printf("nfs4_vnop_close: can't get open owner!\n"); + return (EIO); + } + +restart: + error = nfs_mount_state_in_use_start(nmp); + if (error) { + nfs_open_owner_rele(noop); + return (error); + } + + error = nfs_open_file_find(np, noop, &nofp, 0, 0, 0); + if (!error && (nofp->nof_flags & NFS_OPEN_FILE_REOPEN)) { + nfs_mount_state_in_use_end(nmp, 0); + nfs4_reopen(nofp, vfs_context_thread(ctx)); + nofp = NULL; + goto restart; + } + if (error) { + const char *vname = vnode_getname(NFSTOV(np)); + printf("nfs4_vnop_close: no open file for owner %d, %s\n", error, vname); + vnode_putname(vname); + error = EBADF; + goto out; + } + error = nfs_open_file_set_busy(nofp, vfs_context_thread(ctx)); + if (error) { + nofp = NULL; + goto out; + } + + /* fflag contains some combination of: FREAD, FWRITE, FHASLOCK */ + accessMode = 0; + if (fflag & FREAD) + accessMode |= NFS_OPEN_SHARE_ACCESS_READ; + if (fflag & FWRITE) + accessMode |= NFS_OPEN_SHARE_ACCESS_WRITE; +// XXX It would be nice if we still had the O_EXLOCK/O_SHLOCK flags that were on the open +// if (fflag & O_EXLOCK) +// denyMode = NFS_OPEN_SHARE_DENY_BOTH; +// else if (fflag & O_SHLOCK) +// denyMode = NFS_OPEN_SHARE_DENY_WRITE; +// else +// denyMode = NFS_OPEN_SHARE_DENY_NONE; + if (fflag & FHASLOCK) { + /* XXX assume FHASLOCK is for the deny mode and not flock */ + /* FHASLOCK flock will be unlocked in the close path, but the flag is not cleared. */ + if (nofp->nof_deny & NFS_OPEN_SHARE_DENY_READ) + denyMode = NFS_OPEN_SHARE_DENY_BOTH; + else if (nofp->nof_deny & NFS_OPEN_SHARE_DENY_WRITE) + denyMode = NFS_OPEN_SHARE_DENY_WRITE; + else + denyMode = NFS_OPEN_SHARE_DENY_NONE; + } else { + denyMode = NFS_OPEN_SHARE_DENY_NONE; + } + + if (!accessMode) { + error = EINVAL; + goto out; + } + + error = nfs4_close(np, nofp, accessMode, denyMode, ctx); + if (error) { + const char *vname = vnode_getname(NFSTOV(np)); + printf("nfs_vnop_close: close error %d, %s\n", error, vname); + vnode_putname(vname); + } + +out: + if (nofp) + nfs_open_file_clear_busy(nofp); + if (nfs_mount_state_in_use_end(nmp, error)) { + nofp = NULL; + goto restart; + } + if (noop) + nfs_open_owner_rele(noop); + if (error) { + const char *vname = vnode_getname(NFSTOV(np)); + printf("nfs_vnop_close: error %d, %s\n", error, vname); + vnode_putname(vname); + } + if (!error) + error = common_error; + return (error); +} + +int +nfs4_vnop_mmap( + struct vnop_mmap_args /* { + struct vnodeop_desc *a_desc; + vnode_t a_vp; + int a_fflags; + vfs_context_t a_context; + } */ *ap) +{ + vfs_context_t ctx = ap->a_context; + vnode_t vp = ap->a_vp; + nfsnode_t np = VTONFS(vp); + int error = 0, accessMode, denyMode; + struct nfsmount *nmp; + struct nfs_open_owner *noop = NULL; + struct nfs_open_file *nofp = NULL; + + nmp = VTONMP(vp); + if (!nmp) + return (ENXIO); + + if (!vnode_isreg(vp) || !(ap->a_fflags & (PROT_READ|PROT_WRITE))) + return (EINVAL); + + /* + * fflags contains some combination of: PROT_READ, PROT_WRITE + * Since it's not possible to mmap() without having the file open for reading, + * read access is always there (regardless if PROT_READ is not set). + */ + accessMode = NFS_OPEN_SHARE_ACCESS_READ; + if (ap->a_fflags & PROT_WRITE) + accessMode |= NFS_OPEN_SHARE_ACCESS_WRITE; + denyMode = NFS_OPEN_SHARE_DENY_NONE; + + noop = nfs_open_owner_find(nmp, vfs_context_ucred(ctx), 0); + if (!noop) { + printf("nfs4_vnop_mmap: no open owner\n"); + return (EPERM); + } + +restart: + error = nfs_mount_state_in_use_start(nmp); + if (error) { + nfs_open_owner_rele(noop); + return (error); + } + + error = nfs_open_file_find(np, noop, &nofp, 0, 0, 1); + if (error || (!error && (nofp->nof_flags & NFS_OPEN_FILE_LOST))) { + printf("nfs4_vnop_mmap: no open file for owner %d\n", error); + error = EPERM; + } + if (!error && (nofp->nof_flags & NFS_OPEN_FILE_REOPEN)) { + nfs_mount_state_in_use_end(nmp, 0); + nfs4_reopen(nofp, vfs_context_thread(ctx)); + nofp = NULL; + goto restart; + } + if (!error) + error = nfs_open_file_set_busy(nofp, vfs_context_thread(ctx)); + if (error) { + nofp = NULL; + goto out; + } + + /* + * The open reference for mmap must mirror an existing open because + * we may need to reclaim it after the file is closed. + * So grab another open count matching the accessMode passed in. + * If we already had an mmap open, prefer read/write without deny mode. + * This means we may have to drop the current mmap open first. + */ + + /* determine deny mode for open */ + if (accessMode == NFS_OPEN_SHARE_ACCESS_BOTH) { + if (nofp->nof_rw) + denyMode = NFS_OPEN_SHARE_DENY_NONE; + else if (nofp->nof_rw_dw) + denyMode = NFS_OPEN_SHARE_DENY_WRITE; + else if (nofp->nof_rw_drw) + denyMode = NFS_OPEN_SHARE_DENY_BOTH; + else + error = EPERM; + } else { /* NFS_OPEN_SHARE_ACCESS_READ */ + if (nofp->nof_r) + denyMode = NFS_OPEN_SHARE_DENY_NONE; + else if (nofp->nof_r_dw) + denyMode = NFS_OPEN_SHARE_DENY_WRITE; + else if (nofp->nof_r_drw) + denyMode = NFS_OPEN_SHARE_DENY_BOTH; + else + error = EPERM; + } + if (error) /* mmap mode without proper open mode */ + goto out; + + /* + * If the existing mmap access is more than the new access OR the + * existing access is the same and the existing deny mode is less, + * then we'll stick with the existing mmap open mode. + */ + if ((nofp->nof_mmap_access > accessMode) || + ((nofp->nof_mmap_access == accessMode) && (nofp->nof_mmap_deny <= denyMode))) + goto out; + + /* update mmap open mode */ + if (nofp->nof_mmap_access) { + error = nfs4_close(np, nofp, nofp->nof_mmap_access, nofp->nof_mmap_deny, ctx); + if (error) { + if (!nfs_mount_state_error_should_restart(error)) + printf("nfs_vnop_mmap: close of previous mmap mode failed: %d\n", error); + const char *vname = vnode_getname(NFSTOV(np)); + printf("nfs_vnop_mmap: update, close error %d, %s\n", error, vname); + vnode_putname(vname); + goto out; + } + nofp->nof_mmap_access = nofp->nof_mmap_deny = 0; + } + + if (accessMode == NFS_OPEN_SHARE_ACCESS_BOTH) { + if (denyMode == NFS_OPEN_SHARE_DENY_NONE) + nofp->nof_rw++; + else if (denyMode == NFS_OPEN_SHARE_DENY_WRITE) + nofp->nof_rw_dw++; + else /* NFS_OPEN_SHARE_DENY_BOTH */ + nofp->nof_rw_drw++; + } else if (accessMode == NFS_OPEN_SHARE_ACCESS_READ) { + if (denyMode == NFS_OPEN_SHARE_DENY_NONE) + nofp->nof_r++; + else if (denyMode == NFS_OPEN_SHARE_DENY_WRITE) + nofp->nof_r_dw++; + else /* NFS_OPEN_SHARE_DENY_BOTH */ + nofp->nof_r_drw++; + } + nofp->nof_mmap_access = accessMode; + nofp->nof_mmap_deny = denyMode; + nofp->nof_opencnt++; + +out: + if (nofp) + nfs_open_file_clear_busy(nofp); + if (nfs_mount_state_in_use_end(nmp, error)) { + nofp = NULL; + goto restart; + } + if (noop) + nfs_open_owner_rele(noop); + return (error); +} + + +int +nfs4_vnop_mnomap( + struct vnop_mnomap_args /* { + struct vnodeop_desc *a_desc; + vnode_t a_vp; + vfs_context_t a_context; + } */ *ap) +{ + vfs_context_t ctx = ap->a_context; + vnode_t vp = ap->a_vp; + nfsnode_t np = VTONFS(vp); + struct nfsmount *nmp; + struct nfs_open_file *nofp = NULL; + int error; + + nmp = VTONMP(vp); + if (!nmp) + return (ENXIO); + + /* walk all open files and close all mmap opens */ +loop: + error = nfs_mount_state_in_use_start(nmp); + if (error) + return (error); + lck_mtx_lock(&np->n_openlock); + TAILQ_FOREACH(nofp, &np->n_opens, nof_link) { + if (!nofp->nof_mmap_access) + continue; + lck_mtx_unlock(&np->n_openlock); + if (nofp->nof_flags & NFS_OPEN_FILE_REOPEN) { + nfs_mount_state_in_use_end(nmp, 0); + nfs4_reopen(nofp, vfs_context_thread(ctx)); + goto loop; + } + error = nfs_open_file_set_busy(nofp, vfs_context_thread(ctx)); + if (error) { + lck_mtx_lock(&np->n_openlock); + break; + } + if (nofp->nof_mmap_access) { + error = nfs4_close(np, nofp, nofp->nof_mmap_access, nofp->nof_mmap_deny, ctx); + if (!nfs_mount_state_error_should_restart(error)) { + if (error) /* not a state-operation-restarting error, so just clear the access */ + printf("nfs_vnop_mnomap: close of mmap mode failed: %d\n", error); + nofp->nof_mmap_access = nofp->nof_mmap_deny = 0; + } + if (error) { + const char *vname = vnode_getname(NFSTOV(np)); + printf("nfs_vnop_mnomap: error %d, %s\n", error, vname); + vnode_putname(vname); + } + } + nfs_open_file_clear_busy(nofp); + nfs_mount_state_in_use_end(nmp, error); + goto loop; + } + lck_mtx_unlock(&np->n_openlock); + nfs_mount_state_in_use_end(nmp, error); + return (error); +} + +/* + * Search a node's lock owner list for the owner for this process. + * If not found and "alloc" is set, then allocate a new one. + */ +struct nfs_lock_owner * +nfs_lock_owner_find(nfsnode_t np, proc_t p, int alloc) +{ + pid_t pid = proc_pid(p); + struct nfs_lock_owner *nlop, *newnlop = NULL; + +tryagain: + lck_mtx_lock(&np->n_openlock); + TAILQ_FOREACH(nlop, &np->n_lock_owners, nlo_link) { + if (nlop->nlo_pid != pid) + continue; + if (timevalcmp(&nlop->nlo_pid_start, &p->p_start, ==)) + break; + /* stale lock owner... reuse it if we can */ + if (nlop->nlo_refcnt) { + TAILQ_REMOVE(&np->n_lock_owners, nlop, nlo_link); + nlop->nlo_flags &= ~NFS_LOCK_OWNER_LINK; + lck_mtx_unlock(&np->n_openlock); + goto tryagain; + } + nlop->nlo_pid_start = p->p_start; + nlop->nlo_seqid = 0; + nlop->nlo_stategenid = 0; + break; + } + + if (!nlop && !newnlop && alloc) { + lck_mtx_unlock(&np->n_openlock); + MALLOC(newnlop, struct nfs_lock_owner *, sizeof(struct nfs_lock_owner), M_TEMP, M_WAITOK); + if (!newnlop) + return (NULL); + bzero(newnlop, sizeof(*newnlop)); + lck_mtx_init(&newnlop->nlo_lock, nfs_open_grp, LCK_ATTR_NULL); + newnlop->nlo_pid = pid; + newnlop->nlo_pid_start = p->p_start; + newnlop->nlo_name = OSAddAtomic(1, &nfs_lock_owner_seqnum); + TAILQ_INIT(&newnlop->nlo_locks); + goto tryagain; + } + if (!nlop && newnlop) { + newnlop->nlo_flags |= NFS_LOCK_OWNER_LINK; + TAILQ_INSERT_HEAD(&np->n_lock_owners, newnlop, nlo_link); + nlop = newnlop; + } + lck_mtx_unlock(&np->n_openlock); + + if (newnlop && (nlop != newnlop)) + nfs_lock_owner_destroy(newnlop); + + if (nlop) + nfs_lock_owner_ref(nlop); + + return (nlop); +} + +/* + * destroy a lock owner that's no longer needed + */ +void +nfs_lock_owner_destroy(struct nfs_lock_owner *nlop) +{ + if (nlop->nlo_open_owner) { + nfs_open_owner_rele(nlop->nlo_open_owner); + nlop->nlo_open_owner = NULL; + } + lck_mtx_destroy(&nlop->nlo_lock, nfs_open_grp); + FREE(nlop, M_TEMP); +} + +/* + * acquire a reference count on a lock owner + */ +void +nfs_lock_owner_ref(struct nfs_lock_owner *nlop) +{ + lck_mtx_lock(&nlop->nlo_lock); + nlop->nlo_refcnt++; + lck_mtx_unlock(&nlop->nlo_lock); +} + +/* + * drop a reference count on a lock owner and destroy it if + * it is no longer referenced and no longer on the mount's list. + */ +void +nfs_lock_owner_rele(struct nfs_lock_owner *nlop) +{ + lck_mtx_lock(&nlop->nlo_lock); + if (nlop->nlo_refcnt < 1) + panic("nfs_lock_owner_rele: no refcnt"); + nlop->nlo_refcnt--; + if (!nlop->nlo_refcnt && (nlop->nlo_flags & NFS_LOCK_OWNER_BUSY)) + panic("nfs_lock_owner_rele: busy"); + /* XXX we may potentially want to clean up idle/unused lock owner structures */ + if (nlop->nlo_refcnt || (nlop->nlo_flags & NFS_LOCK_OWNER_LINK)) { + lck_mtx_unlock(&nlop->nlo_lock); + return; + } + /* owner is no longer referenced or linked to mount, so destroy it */ + lck_mtx_unlock(&nlop->nlo_lock); + nfs_lock_owner_destroy(nlop); +} + +/* + * Mark a lock owner as busy because we are about to + * start an operation that uses and updates lock owner state. + */ +int +nfs_lock_owner_set_busy(struct nfs_lock_owner *nlop, thread_t thd) +{ + struct nfsmount *nmp; + struct timespec ts = {2, 0}; + int error = 0, slpflag; + + nmp = nlop->nlo_open_owner->noo_mount; + if (!nmp) + return (ENXIO); + slpflag = (nmp->nm_flag & NFSMNT_INT) ? PCATCH : 0; + + lck_mtx_lock(&nlop->nlo_lock); + while (nlop->nlo_flags & NFS_LOCK_OWNER_BUSY) { + if ((error = nfs_sigintr(nmp, NULL, thd, 0))) + break; + nlop->nlo_flags |= NFS_LOCK_OWNER_WANT; + msleep(nlop, &nlop->nlo_lock, slpflag, "nfs_lock_owner_set_busy", &ts); + } + if (!error) + nlop->nlo_flags |= NFS_LOCK_OWNER_BUSY; + lck_mtx_unlock(&nlop->nlo_lock); + + return (error); +} + +/* + * Clear the busy flag on a lock owner and wake up anyone waiting + * to mark it busy. + */ +void +nfs_lock_owner_clear_busy(struct nfs_lock_owner *nlop) +{ + int wanted; + + lck_mtx_lock(&nlop->nlo_lock); + if (!(nlop->nlo_flags & NFS_LOCK_OWNER_BUSY)) + panic("nfs_lock_owner_clear_busy"); + wanted = (nlop->nlo_flags & NFS_LOCK_OWNER_WANT); + nlop->nlo_flags &= ~(NFS_LOCK_OWNER_BUSY|NFS_LOCK_OWNER_WANT); + lck_mtx_unlock(&nlop->nlo_lock); + if (wanted) + wakeup(nlop); +} + +/* + * Insert a held lock into a lock owner's sorted list. + * (flock locks are always inserted at the head the list) + */ +void +nfs_lock_owner_insert_held_lock(struct nfs_lock_owner *nlop, struct nfs_file_lock *newnflp) +{ + struct nfs_file_lock *nflp; + + /* insert new lock in lock owner's held lock list */ + lck_mtx_lock(&nlop->nlo_lock); + if ((newnflp->nfl_flags & NFS_FILE_LOCK_STYLE_MASK) == NFS_FILE_LOCK_STYLE_FLOCK) { + TAILQ_INSERT_HEAD(&nlop->nlo_locks, newnflp, nfl_lolink); + } else { + TAILQ_FOREACH(nflp, &nlop->nlo_locks, nfl_lolink) { + if (newnflp->nfl_start < nflp->nfl_start) + break; + } + if (nflp) + TAILQ_INSERT_BEFORE(nflp, newnflp, nfl_lolink); + else + TAILQ_INSERT_TAIL(&nlop->nlo_locks, newnflp, nfl_lolink); + } + lck_mtx_unlock(&nlop->nlo_lock); +} + +/* + * Get a file lock structure for this lock owner. + */ +struct nfs_file_lock * +nfs_file_lock_alloc(struct nfs_lock_owner *nlop) +{ + struct nfs_file_lock *nflp = NULL; + + lck_mtx_lock(&nlop->nlo_lock); + if (!nlop->nlo_alock.nfl_owner) { + nflp = &nlop->nlo_alock; + nflp->nfl_owner = nlop; + } + lck_mtx_unlock(&nlop->nlo_lock); + if (!nflp) { + MALLOC(nflp, struct nfs_file_lock *, sizeof(struct nfs_file_lock), M_TEMP, M_WAITOK); + if (!nflp) + return (NULL); + bzero(nflp, sizeof(*nflp)); + nflp->nfl_flags |= NFS_FILE_LOCK_ALLOC; + nflp->nfl_owner = nlop; + } + nfs_lock_owner_ref(nlop); + return (nflp); +} + +/* + * destroy the given NFS file lock structure + */ +void +nfs_file_lock_destroy(struct nfs_file_lock *nflp) +{ + struct nfs_lock_owner *nlop = nflp->nfl_owner; + + if (nflp->nfl_flags & NFS_FILE_LOCK_ALLOC) { + nflp->nfl_owner = NULL; + FREE(nflp, M_TEMP); + } else { + lck_mtx_lock(&nlop->nlo_lock); + bzero(nflp, sizeof(nflp)); + lck_mtx_unlock(&nlop->nlo_lock); + } + nfs_lock_owner_rele(nlop); +} + +/* + * Check if one file lock conflicts with another. + * (nflp1 is the new lock. nflp2 is the existing lock.) + */ +int +nfs_file_lock_conflict(struct nfs_file_lock *nflp1, struct nfs_file_lock *nflp2, int *willsplit) +{ + /* no conflict if lock is dead */ + if ((nflp1->nfl_flags & NFS_FILE_LOCK_DEAD) || (nflp2->nfl_flags & NFS_FILE_LOCK_DEAD)) + return (0); + /* no conflict if it's ours - unless the lock style doesn't match */ + if ((nflp1->nfl_owner == nflp2->nfl_owner) && + ((nflp1->nfl_flags & NFS_FILE_LOCK_STYLE_MASK) == (nflp2->nfl_flags & NFS_FILE_LOCK_STYLE_MASK))) { + if (willsplit && (nflp1->nfl_type != nflp2->nfl_type) && + (nflp1->nfl_start > nflp2->nfl_start) && + (nflp1->nfl_end < nflp2->nfl_end)) + *willsplit = 1; + return (0); + } + /* no conflict if ranges don't overlap */ + if ((nflp1->nfl_start > nflp2->nfl_end) || (nflp1->nfl_end < nflp2->nfl_start)) + return (0); + /* no conflict if neither lock is exclusive */ + if ((nflp1->nfl_type != F_WRLCK) && (nflp2->nfl_type != F_WRLCK)) + return (0); + /* conflict */ + return (1); +} + +/* + * Send an NFSv4 LOCK RPC to the server. + */ +int +nfs4_lock_rpc( + nfsnode_t np, + struct nfs_open_file *nofp, + struct nfs_file_lock *nflp, + int reclaim, + thread_t thd, + kauth_cred_t cred) +{ + struct nfs_lock_owner *nlop = nflp->nfl_owner; + struct nfsmount *nmp; + struct nfsm_chain nmreq, nmrep; + uint64_t xid; + uint32_t locktype; + int error = 0, lockerror = ENOENT, newlocker, numops, status; + + nmp = NFSTONMP(np); + if (!nmp) + return (ENXIO); + + newlocker = (nlop->nlo_stategenid != nmp->nm_stategenid); + locktype = (nflp->nfl_flags & NFS_FILE_LOCK_WAIT) ? + ((nflp->nfl_type == F_WRLCK) ? + NFS_LOCK_TYPE_WRITEW : + NFS_LOCK_TYPE_READW) : + ((nflp->nfl_type == F_WRLCK) ? + NFS_LOCK_TYPE_WRITE : + NFS_LOCK_TYPE_READ); + if (newlocker) { + error = nfs_open_file_set_busy(nofp, thd); + if (error) + return (error); + error = nfs_open_owner_set_busy(nofp->nof_owner, thd); + if (error) { + nfs_open_file_clear_busy(nofp); + return (error); + } + if (!nlop->nlo_open_owner) { + nfs_open_owner_ref(nofp->nof_owner); + nlop->nlo_open_owner = nofp->nof_owner; + } + } + error = nfs_lock_owner_set_busy(nlop, thd); + if (error) { + if (newlocker) { + nfs_open_owner_clear_busy(nofp->nof_owner); + nfs_open_file_clear_busy(nofp); + } + return (error); + } + + nfsm_chain_null(&nmreq); + nfsm_chain_null(&nmrep); + + // PUTFH, GETATTR, LOCK + numops = 3; + nfsm_chain_build_alloc_init(error, &nmreq, 33 * NFSX_UNSIGNED); + nfsm_chain_add_compound_header(error, &nmreq, "lock", numops); + numops--; + nfsm_chain_add_32(error, &nmreq, NFS_OP_PUTFH); + nfsm_chain_add_fh(error, &nmreq, NFS_VER4, np->n_fhp, np->n_fhsize); + numops--; + nfsm_chain_add_32(error, &nmreq, NFS_OP_GETATTR); + nfsm_chain_add_bitmap_masked(error, &nmreq, nfs_getattr_bitmap, + NFS_ATTR_BITMAP_LEN, nmp->nm_fsattr.nfsa_supp_attr); + numops--; + nfsm_chain_add_32(error, &nmreq, NFS_OP_LOCK); + nfsm_chain_add_32(error, &nmreq, locktype); + nfsm_chain_add_32(error, &nmreq, reclaim); + nfsm_chain_add_64(error, &nmreq, nflp->nfl_start); + nfsm_chain_add_64(error, &nmreq, NFS_LOCK_LENGTH(nflp->nfl_start, nflp->nfl_end)); + nfsm_chain_add_32(error, &nmreq, newlocker); + if (newlocker) { + nfsm_chain_add_32(error, &nmreq, nofp->nof_owner->noo_seqid); + nfsm_chain_add_stateid(error, &nmreq, &nofp->nof_stateid); + nfsm_chain_add_32(error, &nmreq, nlop->nlo_seqid); + nfsm_chain_add_lock_owner4(error, &nmreq, nmp, nlop); + } else { + nfsm_chain_add_stateid(error, &nmreq, &nlop->nlo_stateid); + nfsm_chain_add_32(error, &nmreq, nlop->nlo_seqid); + } + nfsm_chain_build_done(error, &nmreq); + nfsm_assert(error, (numops == 0), EPROTO); + nfsmout_if(error); + + error = nfs_request2(np, NULL, &nmreq, NFSPROC4_COMPOUND, thd, cred, (reclaim ? R_RECOVER : 0), &nmrep, &xid, &status); + + if ((lockerror = nfs_node_lock(np))) + error = lockerror; + nfsm_chain_skip_tag(error, &nmrep); + nfsm_chain_get_32(error, &nmrep, numops); + nfsm_chain_op_check(error, &nmrep, NFS_OP_PUTFH); + nfsmout_if(error); + nfsm_chain_op_check(error, &nmrep, NFS_OP_GETATTR); + nfsm_chain_loadattr(error, &nmrep, np, NFS_VER4, NULL, &xid); + nfsmout_if(error); + nfsm_chain_op_check(error, &nmrep, NFS_OP_LOCK); + nfs_owner_seqid_increment(newlocker ? nofp->nof_owner : NULL, nlop, error); + nfsm_chain_get_stateid(error, &nmrep, &nlop->nlo_stateid); + + /* Update the lock owner's stategenid once it appears the server has state for it. */ + /* We determine this by noting the request was successful (we got a stateid). */ + if (newlocker && !error) + nlop->nlo_stategenid = nmp->nm_stategenid; +nfsmout: + if (!lockerror) + nfs_node_unlock(np); + nfs_lock_owner_clear_busy(nlop); + if (newlocker) { + nfs_open_owner_clear_busy(nofp->nof_owner); + nfs_open_file_clear_busy(nofp); + } + nfsm_chain_cleanup(&nmreq); + nfsm_chain_cleanup(&nmrep); + return (error); +} + +/* + * Send an NFSv4 LOCKU RPC to the server. + */ +int +nfs4_unlock_rpc( + nfsnode_t np, + struct nfs_lock_owner *nlop, + int type, + uint64_t start, + uint64_t end, + vfs_context_t ctx) +{ + struct nfsmount *nmp; + struct nfsm_chain nmreq, nmrep; + uint64_t xid; + int error = 0, lockerror = ENOENT, numops, status; + + nmp = NFSTONMP(np); + if (!nmp) + return (ENXIO); + + error = nfs_lock_owner_set_busy(nlop, vfs_context_thread(ctx)); + if (error) + return (error); + + nfsm_chain_null(&nmreq); + nfsm_chain_null(&nmrep); + + // PUTFH, GETATTR, LOCKU + numops = 3; + nfsm_chain_build_alloc_init(error, &nmreq, 26 * NFSX_UNSIGNED); + nfsm_chain_add_compound_header(error, &nmreq, "unlock", numops); + numops--; + nfsm_chain_add_32(error, &nmreq, NFS_OP_PUTFH); + nfsm_chain_add_fh(error, &nmreq, NFS_VER4, np->n_fhp, np->n_fhsize); + numops--; + nfsm_chain_add_32(error, &nmreq, NFS_OP_GETATTR); + nfsm_chain_add_bitmap_masked(error, &nmreq, nfs_getattr_bitmap, + NFS_ATTR_BITMAP_LEN, nmp->nm_fsattr.nfsa_supp_attr); + numops--; + nfsm_chain_add_32(error, &nmreq, NFS_OP_LOCKU); + nfsm_chain_add_32(error, &nmreq, (type == F_WRLCK) ? NFS_LOCK_TYPE_WRITE : NFS_LOCK_TYPE_READ); + nfsm_chain_add_32(error, &nmreq, nlop->nlo_seqid); + nfsm_chain_add_stateid(error, &nmreq, &nlop->nlo_stateid); + nfsm_chain_add_64(error, &nmreq, start); + nfsm_chain_add_64(error, &nmreq, NFS_LOCK_LENGTH(start, end)); + nfsm_chain_build_done(error, &nmreq); + nfsm_assert(error, (numops == 0), EPROTO); + nfsmout_if(error); + + error = nfs_request(np, NULL, &nmreq, NFSPROC4_COMPOUND, ctx, &nmrep, &xid, &status); + + if ((lockerror = nfs_node_lock(np))) + error = lockerror; + nfsm_chain_skip_tag(error, &nmrep); + nfsm_chain_get_32(error, &nmrep, numops); + nfsm_chain_op_check(error, &nmrep, NFS_OP_PUTFH); + nfsmout_if(error); + nfsm_chain_op_check(error, &nmrep, NFS_OP_GETATTR); + nfsm_chain_loadattr(error, &nmrep, np, NFS_VER4, NULL, &xid); + nfsmout_if(error); + nfsm_chain_op_check(error, &nmrep, NFS_OP_LOCKU); + nfs_owner_seqid_increment(NULL, nlop, error); + nfsm_chain_get_stateid(error, &nmrep, &nlop->nlo_stateid); +nfsmout: + if (!lockerror) + nfs_node_unlock(np); + nfs_lock_owner_clear_busy(nlop); + nfsm_chain_cleanup(&nmreq); + nfsm_chain_cleanup(&nmrep); + return (error); +} + +/* + * Check for any conflicts with the given lock. + * + * Checking for a lock doesn't require the file to be opened. + * So we skip all the open owner, open file, lock owner work + * and just check for a conflicting lock. + */ +int +nfs4_getlock( + nfsnode_t np, + struct nfs_lock_owner *nlop, + struct flock *fl, + uint64_t start, + uint64_t end, + vfs_context_t ctx) +{ + struct nfsmount *nmp; + struct nfs_file_lock *nflp; + struct nfsm_chain nmreq, nmrep; + uint64_t xid, val64 = 0; + uint32_t val = 0; + int error = 0, lockerror = ENOENT, numops, status; + + nmp = NFSTONMP(np); + if (!nmp) + return (ENXIO); + + lck_mtx_lock(&np->n_openlock); + /* scan currently held locks for conflict */ + TAILQ_FOREACH(nflp, &np->n_locks, nfl_link) { + if (nflp->nfl_flags & NFS_FILE_LOCK_BLOCKED) + continue; + if ((start <= nflp->nfl_end) && (end >= nflp->nfl_start) && + ((fl->l_type == F_WRLCK) || (nflp->nfl_type == F_WRLCK))) + break; + } + if (nflp) { + /* found a conflicting lock */ + fl->l_type = nflp->nfl_type; + fl->l_pid = (nflp->nfl_flags & NFS_FILE_LOCK_STYLE_FLOCK) ? -1 : nflp->nfl_owner->nlo_pid; + fl->l_start = nflp->nfl_start; + fl->l_len = NFS_FLOCK_LENGTH(nflp->nfl_start, nflp->nfl_end); + fl->l_whence = SEEK_SET; + } + lck_mtx_unlock(&np->n_openlock); + if (nflp) + return (0); + + /* no conflict found locally, so ask the server */ + + nfsm_chain_null(&nmreq); + nfsm_chain_null(&nmrep); + + // PUTFH, GETATTR, LOCKT + numops = 3; + nfsm_chain_build_alloc_init(error, &nmreq, 26 * NFSX_UNSIGNED); + nfsm_chain_add_compound_header(error, &nmreq, "locktest", numops); + numops--; + nfsm_chain_add_32(error, &nmreq, NFS_OP_PUTFH); + nfsm_chain_add_fh(error, &nmreq, NFS_VER4, np->n_fhp, np->n_fhsize); + numops--; + nfsm_chain_add_32(error, &nmreq, NFS_OP_GETATTR); + nfsm_chain_add_bitmap_masked(error, &nmreq, nfs_getattr_bitmap, + NFS_ATTR_BITMAP_LEN, nmp->nm_fsattr.nfsa_supp_attr); + numops--; + nfsm_chain_add_32(error, &nmreq, NFS_OP_LOCKT); + nfsm_chain_add_32(error, &nmreq, (fl->l_type == F_WRLCK) ? NFS_LOCK_TYPE_WRITE : NFS_LOCK_TYPE_READ); + nfsm_chain_add_64(error, &nmreq, start); + nfsm_chain_add_64(error, &nmreq, NFS_LOCK_LENGTH(start, end)); + nfsm_chain_add_lock_owner4(error, &nmreq, nmp, nlop); + nfsm_chain_build_done(error, &nmreq); + nfsm_assert(error, (numops == 0), EPROTO); + nfsmout_if(error); + + error = nfs_request(np, NULL, &nmreq, NFSPROC4_COMPOUND, ctx, &nmrep, &xid, &status); + + if ((lockerror = nfs_node_lock(np))) + error = lockerror; + nfsm_chain_skip_tag(error, &nmrep); + nfsm_chain_get_32(error, &nmrep, numops); + nfsm_chain_op_check(error, &nmrep, NFS_OP_PUTFH); + nfsmout_if(error); + nfsm_chain_op_check(error, &nmrep, NFS_OP_GETATTR); + nfsm_chain_loadattr(error, &nmrep, np, NFS_VER4, NULL, &xid); + nfsmout_if(error); + nfsm_chain_op_check(error, &nmrep, NFS_OP_LOCKT); + if (error == NFSERR_DENIED) { + error = 0; + nfsm_chain_get_64(error, &nmrep, fl->l_start); + nfsm_chain_get_64(error, &nmrep, val64); + fl->l_len = (val64 == UINT64_MAX) ? 0 : val64; + nfsm_chain_get_32(error, &nmrep, val); + fl->l_type = (val == NFS_LOCK_TYPE_WRITE) ? F_WRLCK : F_RDLCK; + fl->l_pid = 0; + fl->l_whence = SEEK_SET; + } else if (!error) { + fl->l_type = F_UNLCK; + } +nfsmout: + if (!lockerror) + nfs_node_unlock(np); + nfsm_chain_cleanup(&nmreq); + nfsm_chain_cleanup(&nmrep); + return (error); +} + +/* + * Acquire a file lock for the given range. + * + * Add the lock (request) to the lock queue. + * Scan the lock queue for any conflicting locks. + * If a conflict is found, block or return an error. + * Once end of queue is reached, send request to the server. + * If the server grants the lock, scan the lock queue and + * update any existing locks. Then (optionally) scan the + * queue again to coalesce any locks adjacent to the new one. + */ +int +nfs4_setlock( + nfsnode_t np, + struct nfs_open_file *nofp, + struct nfs_lock_owner *nlop, + int op, + uint64_t start, + uint64_t end, + int style, + short type, + vfs_context_t ctx) +{ + struct nfsmount *nmp; + struct nfs_file_lock *newnflp, *nflp, *nflp2 = NULL, *nextnflp, *flocknflp = NULL; + struct nfs_file_lock *coalnflp; + int error = 0, error2, willsplit = 0, delay, slpflag, busy = 0, inuse = 0, restart, inqueue = 0; + struct timespec ts = {1, 0}; + + nmp = NFSTONMP(np); + if (!nmp) + return (ENXIO); + slpflag = (nmp->nm_flag & NFSMNT_INT) ? PCATCH : 0; + + /* allocate a new lock */ + newnflp = nfs_file_lock_alloc(nlop); + if (!newnflp) + return (ENOLCK); + newnflp->nfl_start = start; + newnflp->nfl_end = end; + newnflp->nfl_type = type; + if (op == F_SETLKW) + newnflp->nfl_flags |= NFS_FILE_LOCK_WAIT; + newnflp->nfl_flags |= style; + newnflp->nfl_flags |= NFS_FILE_LOCK_BLOCKED; + + if ((style == NFS_FILE_LOCK_STYLE_FLOCK) && (type == F_WRLCK)) { + /* + * For exclusive flock-style locks, if we block waiting for the + * lock, we need to first release any currently held shared + * flock-style lock. So, the first thing we do is check if we + * have a shared flock-style lock. + */ + nflp = TAILQ_FIRST(&nlop->nlo_locks); + if (nflp && ((nflp->nfl_flags & NFS_FILE_LOCK_STYLE_MASK) != NFS_FILE_LOCK_STYLE_FLOCK)) + nflp = NULL; + if (nflp && (nflp->nfl_type != F_RDLCK)) + nflp = NULL; + flocknflp = nflp; + } + +restart: + restart = 0; + error = nfs_mount_state_in_use_start(nmp); + if (error) + goto error_out; + inuse = 1; + if (nofp->nof_flags & NFS_OPEN_FILE_REOPEN) { + nfs_mount_state_in_use_end(nmp, 0); + inuse = 0; + nfs4_reopen(nofp, vfs_context_thread(ctx)); + goto restart; + } + + lck_mtx_lock(&np->n_openlock); + if (!inqueue) { + /* insert new lock at beginning of list */ + TAILQ_INSERT_HEAD(&np->n_locks, newnflp, nfl_link); + inqueue = 1; + } + + /* scan current list of locks (held and pending) for conflicts */ + for (nflp = TAILQ_NEXT(newnflp, nfl_link); nflp; nflp = TAILQ_NEXT(nflp, nfl_link)) { + if (!nfs_file_lock_conflict(newnflp, nflp, &willsplit)) + continue; + /* Conflict */ + if (!(newnflp->nfl_flags & NFS_FILE_LOCK_WAIT)) { + error = EAGAIN; + break; + } + /* Block until this lock is no longer held. */ + if (nflp->nfl_blockcnt == UINT_MAX) { + error = ENOLCK; + break; + } + nflp->nfl_blockcnt++; + do { + if (flocknflp) { + /* release any currently held shared lock before sleeping */ + lck_mtx_unlock(&np->n_openlock); + nfs_mount_state_in_use_end(nmp, 0); + inuse = 0; + error = nfs4_unlock(np, nofp, nlop, 0, UINT64_MAX, NFS_FILE_LOCK_STYLE_FLOCK, ctx); + flocknflp = NULL; + if (!error) + error = nfs_mount_state_in_use_start(nmp); + if (error) { + lck_mtx_lock(&np->n_openlock); + break; + } + inuse = 1; + lck_mtx_lock(&np->n_openlock); + /* no need to block/sleep if the conflict is gone */ + if (!nfs_file_lock_conflict(newnflp, nflp, NULL)) + break; + } + msleep(nflp, &np->n_openlock, slpflag, "nfs4_setlock_blocked", &ts); + error = nfs_sigintr(NFSTONMP(np), NULL, vfs_context_thread(ctx), 0); + if (!error && (nmp->nm_state & NFSSTA_RECOVER)) { + /* looks like we have a recover pending... restart */ + restart = 1; + lck_mtx_unlock(&np->n_openlock); + nfs_mount_state_in_use_end(nmp, 0); + inuse = 0; + lck_mtx_lock(&np->n_openlock); + break; + } + } while (!error && nfs_file_lock_conflict(newnflp, nflp, NULL)); + nflp->nfl_blockcnt--; + if ((nflp->nfl_flags & NFS_FILE_LOCK_DEAD) && !nflp->nfl_blockcnt) { + TAILQ_REMOVE(&np->n_locks, nflp, nfl_link); + nfs_file_lock_destroy(nflp); + } + if (error || restart) + break; + } + lck_mtx_unlock(&np->n_openlock); + if (restart) + goto restart; + if (error) + goto error_out; + + if (willsplit) { + /* + * It looks like this operation is splitting a lock. + * We allocate a new lock now so we don't have to worry + * about the allocation failing after we've updated some state. + */ + nflp2 = nfs_file_lock_alloc(nlop); + if (!nflp2) { + error = ENOLCK; + goto error_out; + } + } + + /* once scan for local conflicts is clear, send request to server */ + if ((error = nfs_open_state_set_busy(np, ctx))) + goto error_out; + busy = 1; + delay = 0; + do { + error = nfs4_lock_rpc(np, nofp, newnflp, 0, vfs_context_thread(ctx), vfs_context_ucred(ctx)); + if (!error || ((error != NFSERR_DENIED) && (error != NFSERR_GRACE))) + break; + /* request was denied due to either conflict or grace period */ + if ((error != NFSERR_GRACE) && !(newnflp->nfl_flags & NFS_FILE_LOCK_WAIT)) { + error = EAGAIN; + break; + } + if (flocknflp) { + /* release any currently held shared lock before sleeping */ + nfs_open_state_clear_busy(np); + busy = 0; + nfs_mount_state_in_use_end(nmp, 0); + inuse = 0; + error2 = nfs4_unlock(np, nofp, nlop, 0, UINT64_MAX, NFS_FILE_LOCK_STYLE_FLOCK, ctx); + flocknflp = NULL; + if (!error2) + error2 = nfs_mount_state_in_use_start(nmp); + if (!error2) { + inuse = 1; + error2 = nfs_open_state_set_busy(np, ctx); + } + if (error2) { + error = error2; + break; + } + busy = 1; + } + /* wait a little bit and send the request again */ + if (error == NFSERR_GRACE) + delay = 4; + if (delay < 4) + delay++; + tsleep(newnflp, slpflag, "nfs4_setlock_delay", delay * (hz/2)); + error = nfs_sigintr(NFSTONMP(np), NULL, vfs_context_thread(ctx), 0); + if (!error && (nmp->nm_state & NFSSTA_RECOVER)) { + /* looks like we have a recover pending... restart */ + nfs_open_state_clear_busy(np); + busy = 0; + nfs_mount_state_in_use_end(nmp, 0); + inuse = 0; + goto restart; + } + } while (!error); + +error_out: + if (nfs_mount_state_error_should_restart(error)) { + /* looks like we need to restart this operation */ + if (busy) { + nfs_open_state_clear_busy(np); + busy = 0; + } + if (inuse) { + nfs_mount_state_in_use_end(nmp, error); + inuse = 0; + } + goto restart; + } + lck_mtx_lock(&np->n_openlock); + newnflp->nfl_flags &= ~NFS_FILE_LOCK_BLOCKED; + if (error) { + newnflp->nfl_flags |= NFS_FILE_LOCK_DEAD; + if (newnflp->nfl_blockcnt) { + /* wake up anyone blocked on this lock */ + wakeup(newnflp); + } else { + /* remove newnflp from lock list and destroy */ + TAILQ_REMOVE(&np->n_locks, newnflp, nfl_link); + nfs_file_lock_destroy(newnflp); + } + lck_mtx_unlock(&np->n_openlock); + if (busy) + nfs_open_state_clear_busy(np); + if (inuse) + nfs_mount_state_in_use_end(nmp, error); + if (nflp2) + nfs_file_lock_destroy(nflp2); + return (error); + } + + /* server granted the lock */ + + /* + * Scan for locks to update. + * + * Locks completely covered are killed. + * At most two locks may need to be clipped. + * It's possible that a single lock may need to be split. + */ + TAILQ_FOREACH_SAFE(nflp, &np->n_locks, nfl_link, nextnflp) { + if (nflp == newnflp) + continue; + if (nflp->nfl_flags & (NFS_FILE_LOCK_BLOCKED|NFS_FILE_LOCK_DEAD)) + continue; + if (nflp->nfl_owner != nlop) + continue; + if ((newnflp->nfl_flags & NFS_FILE_LOCK_STYLE_MASK) != (nflp->nfl_flags & NFS_FILE_LOCK_STYLE_MASK)) + continue; + if ((newnflp->nfl_start > nflp->nfl_end) || (newnflp->nfl_end < nflp->nfl_start)) + continue; + /* here's one to update */ + if ((newnflp->nfl_start <= nflp->nfl_start) && (newnflp->nfl_end >= nflp->nfl_end)) { + /* The entire lock is being replaced. */ + nflp->nfl_flags |= NFS_FILE_LOCK_DEAD; + lck_mtx_lock(&nlop->nlo_lock); + TAILQ_REMOVE(&nlop->nlo_locks, nflp, nfl_lolink); + lck_mtx_unlock(&nlop->nlo_lock); + /* lock will be destroyed below, if no waiters */ + } else if ((newnflp->nfl_start > nflp->nfl_start) && (newnflp->nfl_end < nflp->nfl_end)) { + /* We're replacing a range in the middle of a lock. */ + /* The current lock will be split into two locks. */ + /* Update locks and insert new lock after current lock. */ + nflp2->nfl_flags |= (nflp->nfl_flags & NFS_FILE_LOCK_STYLE_MASK); + nflp2->nfl_type = nflp->nfl_type; + nflp2->nfl_start = newnflp->nfl_end + 1; + nflp2->nfl_end = nflp->nfl_end; + nflp->nfl_end = newnflp->nfl_start - 1; + TAILQ_INSERT_AFTER(&np->n_locks, nflp, nflp2, nfl_link); + nfs_lock_owner_insert_held_lock(nlop, nflp2); + nextnflp = nflp2; + nflp2 = NULL; + } else if (newnflp->nfl_start > nflp->nfl_start) { + /* We're replacing the end of a lock. */ + nflp->nfl_end = newnflp->nfl_start - 1; + } else if (newnflp->nfl_end < nflp->nfl_end) { + /* We're replacing the start of a lock. */ + nflp->nfl_start = newnflp->nfl_end + 1; + } + if (nflp->nfl_blockcnt) { + /* wake up anyone blocked on this lock */ + wakeup(nflp); + } else if (nflp->nfl_flags & NFS_FILE_LOCK_DEAD) { + /* remove nflp from lock list and destroy */ + TAILQ_REMOVE(&np->n_locks, nflp, nfl_link); + nfs_file_lock_destroy(nflp); + } + } + + nfs_lock_owner_insert_held_lock(nlop, newnflp); + + /* + * POSIX locks should be coalesced when possible. + */ + if ((style == NFS_FILE_LOCK_STYLE_POSIX) && (nofp->nof_flags & NFS_OPEN_FILE_POSIXLOCK)) { + /* + * Walk through the lock queue and check each of our held locks with + * the previous and next locks in the lock owner's "held lock list". + * If the two locks can be coalesced, we merge the current lock into + * the other (previous or next) lock. Merging this way makes sure that + * lock ranges are always merged forward in the lock queue. This is + * important because anyone blocked on the lock being "merged away" + * will still need to block on that range and it will simply continue + * checking locks that are further down the list. + */ + TAILQ_FOREACH_SAFE(nflp, &np->n_locks, nfl_link, nextnflp) { + if (nflp->nfl_flags & (NFS_FILE_LOCK_BLOCKED|NFS_FILE_LOCK_DEAD)) + continue; + if (nflp->nfl_owner != nlop) + continue; + if ((nflp->nfl_flags & NFS_FILE_LOCK_STYLE_MASK) != NFS_FILE_LOCK_STYLE_POSIX) + continue; + if (((coalnflp = TAILQ_PREV(nflp, nfs_file_lock_queue, nfl_lolink))) && + ((coalnflp->nfl_flags & NFS_FILE_LOCK_STYLE_MASK) == NFS_FILE_LOCK_STYLE_POSIX) && + (coalnflp->nfl_type == nflp->nfl_type) && + (coalnflp->nfl_end == (nflp->nfl_start - 1))) { + coalnflp->nfl_end = nflp->nfl_end; + nflp->nfl_flags |= NFS_FILE_LOCK_DEAD; + lck_mtx_lock(&nlop->nlo_lock); + TAILQ_REMOVE(&nlop->nlo_locks, nflp, nfl_lolink); + lck_mtx_unlock(&nlop->nlo_lock); + } else if (((coalnflp = TAILQ_NEXT(nflp, nfl_lolink))) && + ((coalnflp->nfl_flags & NFS_FILE_LOCK_STYLE_MASK) == NFS_FILE_LOCK_STYLE_POSIX) && + (coalnflp->nfl_type == nflp->nfl_type) && + (coalnflp->nfl_start == (nflp->nfl_end + 1))) { + coalnflp->nfl_start = nflp->nfl_start; + nflp->nfl_flags |= NFS_FILE_LOCK_DEAD; + lck_mtx_lock(&nlop->nlo_lock); + TAILQ_REMOVE(&nlop->nlo_locks, nflp, nfl_lolink); + lck_mtx_unlock(&nlop->nlo_lock); + } + if (!(nflp->nfl_flags & NFS_FILE_LOCK_DEAD)) + continue; + if (nflp->nfl_blockcnt) { + /* wake up anyone blocked on this lock */ + wakeup(nflp); + } else { + /* remove nflp from lock list and destroy */ + TAILQ_REMOVE(&np->n_locks, nflp, nfl_link); + nfs_file_lock_destroy(nflp); + } + } + } + + lck_mtx_unlock(&np->n_openlock); + nfs_open_state_clear_busy(np); + nfs_mount_state_in_use_end(nmp, error); + + if (nflp2) + nfs_file_lock_destroy(nflp2); + return (error); +} + +int +nfs4_unlock( + nfsnode_t np, + struct nfs_open_file *nofp, + struct nfs_lock_owner *nlop, + uint64_t start, + uint64_t end, + int style, + vfs_context_t ctx) +{ + struct nfsmount *nmp; + struct nfs_file_lock *nflp, *nextnflp, *newnflp = NULL; + int error = 0, willsplit = 0, send_unlock_rpcs = 1; + + nmp = NFSTONMP(np); + if (!nmp) + return (ENXIO); + +restart: + if ((error = nfs_mount_state_in_use_start(nmp))) + return (error); + if (nofp->nof_flags & NFS_OPEN_FILE_REOPEN) { + nfs_mount_state_in_use_end(nmp, 0); + nfs4_reopen(nofp, vfs_context_thread(ctx)); + goto restart; + } + if ((error = nfs_open_state_set_busy(np, ctx))) { + nfs_mount_state_in_use_end(nmp, error); + return (error); + } + + lck_mtx_lock(&np->n_openlock); + if ((start > 0) && (end < UINT64_MAX) && !willsplit) { + /* + * We may need to allocate a new lock if an existing lock gets split. + * So, we first scan the list to check for a split, and if there's + * going to be one, we'll allocate one now. + */ + TAILQ_FOREACH_SAFE(nflp, &np->n_locks, nfl_link, nextnflp) { + if (nflp->nfl_flags & (NFS_FILE_LOCK_BLOCKED|NFS_FILE_LOCK_DEAD)) + continue; + if (nflp->nfl_owner != nlop) + continue; + if ((nflp->nfl_flags & NFS_FILE_LOCK_STYLE_MASK) != style) + continue; + if ((start > nflp->nfl_end) || (end < nflp->nfl_start)) + continue; + if ((start > nflp->nfl_start) && (end < nflp->nfl_end)) { + willsplit = 1; + break; + } + } + if (willsplit) { + lck_mtx_unlock(&np->n_openlock); + nfs_open_state_clear_busy(np); + nfs_mount_state_in_use_end(nmp, 0); + newnflp = nfs_file_lock_alloc(nlop); + if (!newnflp) + return (ENOMEM); + goto restart; + } + } + + /* + * Free all of our locks in the given range. + * + * Note that this process requires sending requests to the server. + * Because of this, we will release the n_openlock while performing + * the unlock RPCs. The N_OPENBUSY state keeps the state of *held* + * locks from changing underneath us. However, other entries in the + * list may be removed. So we need to be careful walking the list. + */ + + /* + * Don't unlock ranges that are held by other-style locks. + * If style is posix, don't send any unlock rpcs if flock is held. + * If we unlock an flock, don't send unlock rpcs for any posix-style + * ranges held - instead send unlocks for the ranges not held. + */ + if ((style == NFS_FILE_LOCK_STYLE_POSIX) && + ((nflp = TAILQ_FIRST(&nlop->nlo_locks))) && + ((nflp->nfl_flags & NFS_FILE_LOCK_STYLE_MASK) == NFS_FILE_LOCK_STYLE_FLOCK)) + send_unlock_rpcs = 0; + if ((style == NFS_FILE_LOCK_STYLE_FLOCK) && + ((nflp = TAILQ_FIRST(&nlop->nlo_locks))) && + ((nflp->nfl_flags & NFS_FILE_LOCK_STYLE_MASK) == NFS_FILE_LOCK_STYLE_FLOCK) && + ((nflp = TAILQ_NEXT(nflp, nfl_lolink))) && + ((nflp->nfl_flags & NFS_FILE_LOCK_STYLE_MASK) == NFS_FILE_LOCK_STYLE_POSIX)) { + uint64_t s = 0; + int type = TAILQ_FIRST(&nlop->nlo_locks)->nfl_type; + while (nflp) { + if ((nflp->nfl_flags & NFS_FILE_LOCK_STYLE_MASK) == NFS_FILE_LOCK_STYLE_POSIX) { + /* unlock the range preceding this lock */ + lck_mtx_unlock(&np->n_openlock); + error = nfs4_unlock_rpc(np, nlop, type, s, nflp->nfl_start-1, ctx); + if (nfs_mount_state_error_should_restart(error)) { + nfs_open_state_clear_busy(np); + nfs_mount_state_in_use_end(nmp, error); + goto restart; + } + lck_mtx_lock(&np->n_openlock); + if (error) + goto out; + s = nflp->nfl_end+1; + } + nflp = TAILQ_NEXT(nflp, nfl_lolink); + } + lck_mtx_unlock(&np->n_openlock); + error = nfs4_unlock_rpc(np, nlop, type, s, end, ctx); + if (nfs_mount_state_error_should_restart(error)) { + nfs_open_state_clear_busy(np); + nfs_mount_state_in_use_end(nmp, error); + goto restart; + } + lck_mtx_lock(&np->n_openlock); + if (error) + goto out; + send_unlock_rpcs = 0; + } + + TAILQ_FOREACH_SAFE(nflp, &np->n_locks, nfl_link, nextnflp) { + if (nflp->nfl_flags & (NFS_FILE_LOCK_BLOCKED|NFS_FILE_LOCK_DEAD)) + continue; + if (nflp->nfl_owner != nlop) + continue; + if ((nflp->nfl_flags & NFS_FILE_LOCK_STYLE_MASK) != style) + continue; + if ((start > nflp->nfl_end) || (end < nflp->nfl_start)) + continue; + /* here's one to unlock */ + if ((start <= nflp->nfl_start) && (end >= nflp->nfl_end)) { + /* The entire lock is being unlocked. */ + if (send_unlock_rpcs) { + lck_mtx_unlock(&np->n_openlock); + error = nfs4_unlock_rpc(np, nlop, nflp->nfl_type, nflp->nfl_start, nflp->nfl_end, ctx); + if (nfs_mount_state_error_should_restart(error)) { + nfs_open_state_clear_busy(np); + nfs_mount_state_in_use_end(nmp, error); + goto restart; + } + lck_mtx_lock(&np->n_openlock); + } + nextnflp = TAILQ_NEXT(nflp, nfl_link); + if (error) + break; + nflp->nfl_flags |= NFS_FILE_LOCK_DEAD; + lck_mtx_lock(&nlop->nlo_lock); + TAILQ_REMOVE(&nlop->nlo_locks, nflp, nfl_lolink); + lck_mtx_unlock(&nlop->nlo_lock); + /* lock will be destroyed below, if no waiters */ + } else if ((start > nflp->nfl_start) && (end < nflp->nfl_end)) { + /* We're unlocking a range in the middle of a lock. */ + /* The current lock will be split into two locks. */ + if (send_unlock_rpcs) { + lck_mtx_unlock(&np->n_openlock); + error = nfs4_unlock_rpc(np, nlop, nflp->nfl_type, start, end, ctx); + if (nfs_mount_state_error_should_restart(error)) { + nfs_open_state_clear_busy(np); + nfs_mount_state_in_use_end(nmp, error); + goto restart; + } + lck_mtx_lock(&np->n_openlock); + } + if (error) + break; + /* update locks and insert new lock after current lock */ + newnflp->nfl_flags |= (nflp->nfl_flags & NFS_FILE_LOCK_STYLE_MASK); + newnflp->nfl_type = nflp->nfl_type; + newnflp->nfl_start = end + 1; + newnflp->nfl_end = nflp->nfl_end; + nflp->nfl_end = start - 1; + TAILQ_INSERT_AFTER(&np->n_locks, nflp, newnflp, nfl_link); + nfs_lock_owner_insert_held_lock(nlop, newnflp); + nextnflp = newnflp; + newnflp = NULL; + } else if (start > nflp->nfl_start) { + /* We're unlocking the end of a lock. */ + if (send_unlock_rpcs) { + lck_mtx_unlock(&np->n_openlock); + error = nfs4_unlock_rpc(np, nlop, nflp->nfl_type, start, nflp->nfl_end, ctx); + if (nfs_mount_state_error_should_restart(error)) { + nfs_open_state_clear_busy(np); + nfs_mount_state_in_use_end(nmp, error); + goto restart; + } + lck_mtx_lock(&np->n_openlock); + } + nextnflp = TAILQ_NEXT(nflp, nfl_link); + if (error) + break; + nflp->nfl_end = start - 1; + } else if (end < nflp->nfl_end) { + /* We're unlocking the start of a lock. */ + if (send_unlock_rpcs) { + lck_mtx_unlock(&np->n_openlock); + error = nfs4_unlock_rpc(np, nlop, nflp->nfl_type, nflp->nfl_start, end, ctx); + if (nfs_mount_state_error_should_restart(error)) { + nfs_open_state_clear_busy(np); + nfs_mount_state_in_use_end(nmp, error); + goto restart; + } + lck_mtx_lock(&np->n_openlock); + } + nextnflp = TAILQ_NEXT(nflp, nfl_link); + if (error) + break; + nflp->nfl_start = end + 1; + } + if (nflp->nfl_blockcnt) { + /* wake up anyone blocked on this lock */ + wakeup(nflp); + } else if (nflp->nfl_flags & NFS_FILE_LOCK_DEAD) { + /* remove nflp from lock list and destroy */ + TAILQ_REMOVE(&np->n_locks, nflp, nfl_link); + nfs_file_lock_destroy(nflp); + } + } +out: + lck_mtx_unlock(&np->n_openlock); + nfs_open_state_clear_busy(np); + nfs_mount_state_in_use_end(nmp, 0); + + if (newnflp) + nfs_file_lock_destroy(newnflp); + return (error); +} + +/* + * NFSv4 advisory file locking + */ +int +nfs4_vnop_advlock( + struct vnop_advlock_args /* { + struct vnodeop_desc *a_desc; + vnode_t a_vp; + caddr_t a_id; + int a_op; + struct flock *a_fl; + int a_flags; + vfs_context_t a_context; + } */ *ap) +{ + vnode_t vp = ap->a_vp; + nfsnode_t np = VTONFS(ap->a_vp); + struct flock *fl = ap->a_fl; + int op = ap->a_op; + int flags = ap->a_flags; + vfs_context_t ctx = ap->a_context; + struct nfsmount *nmp; + struct nfs_vattr nvattr; + struct nfs_open_owner *noop = NULL; + struct nfs_open_file *nofp = NULL; + struct nfs_lock_owner *nlop = NULL; + off_t lstart; + uint64_t start, end; + int error = 0, modified, style; +#define OFF_MAX QUAD_MAX + + nmp = VTONMP(ap->a_vp); + if (!nmp) + return (ENXIO); + + switch (fl->l_whence) { + case SEEK_SET: + case SEEK_CUR: + /* + * Caller is responsible for adding any necessary offset + * to fl->l_start when SEEK_CUR is used. + */ + lstart = fl->l_start; + break; + case SEEK_END: + /* need to flush, and refetch attributes to make */ + /* sure we have the correct end of file offset */ + if ((error = nfs_node_lock(np))) + return (error); + modified = (np->n_flag & NMODIFIED); + nfs_node_unlock(np); + if (modified && ((error = nfs_vinvalbuf(vp, V_SAVE, ctx, 1)))) + return (error); + if ((error = nfs_getattr(np, &nvattr, ctx, NGA_UNCACHED))) + return (error); + nfs_data_lock(np, NFS_DATA_LOCK_SHARED); + if ((np->n_size > OFF_MAX) || + ((fl->l_start > 0) && (np->n_size > (u_quad_t)(OFF_MAX - fl->l_start)))) + error = EOVERFLOW; + lstart = np->n_size + fl->l_start; + nfs_data_unlock(np); + if (error) + return (error); + break; + default: + return (EINVAL); + } + if (lstart < 0) + return (EINVAL); + start = lstart; + if (fl->l_len == 0) { + end = UINT64_MAX; + } else if (fl->l_len > 0) { + if ((fl->l_len - 1) > (OFF_MAX - lstart)) + return (EOVERFLOW); + end = start - 1 + fl->l_len; + } else { /* l_len is negative */ + if ((lstart + fl->l_len) < 0) + return (EINVAL); + end = start - 1; + start += fl->l_len; + } + if (error) + return (error); + + style = (flags & F_FLOCK) ? NFS_FILE_LOCK_STYLE_FLOCK : NFS_FILE_LOCK_STYLE_POSIX; + if ((style == NFS_FILE_LOCK_STYLE_FLOCK) && ((start != 0) || (end != UINT64_MAX))) + return (EINVAL); + + /* find the lock owner, alloc if not unlock */ + nlop = nfs_lock_owner_find(np, vfs_context_proc(ctx), (op != F_UNLCK)); + if (!nlop) { + error = (op == F_UNLCK) ? 0 : ENOMEM; + if (error) + printf("nfs4_vnop_advlock: no lock owner %d\n", error); + goto out; + } + + if (op == F_GETLK) { + error = nfs4_getlock(np, nlop, fl, start, end, ctx); + } else { + /* find the open owner */ + noop = nfs_open_owner_find(nmp, vfs_context_ucred(ctx), 0); + if (!noop) { + printf("nfs4_vnop_advlock: no open owner\n"); + error = EPERM; + goto out; + } + /* find the open file */ +restart: + error = nfs_open_file_find(np, noop, &nofp, 0, 0, 0); + if (error) + error = EBADF; + if (!error && (nofp->nof_flags & NFS_OPEN_FILE_LOST)) { + printf("nfs_vnop_advlock: LOST\n"); + error = EIO; + } + if (!error && (nofp->nof_flags & NFS_OPEN_FILE_REOPEN)) { + nfs4_reopen(nofp, vfs_context_thread(ctx)); + nofp = NULL; + goto restart; + } + if (error) { + printf("nfs4_vnop_advlock: no open file %d\n", error); + goto out; + } + if (op == F_UNLCK) { + error = nfs4_unlock(np, nofp, nlop, start, end, style, ctx); + } else if ((op == F_SETLK) || (op == F_SETLKW)) { + if ((op == F_SETLK) && (flags & F_WAIT)) + op = F_SETLKW; + error = nfs4_setlock(np, nofp, nlop, op, start, end, style, fl->l_type, ctx); + } else { + /* not getlk, unlock or lock? */ + error = EINVAL; + } + } + +out: + if (nlop) + nfs_lock_owner_rele(nlop); + if (noop) + nfs_open_owner_rele(noop); + return (error); +} + +/* + * Check if an open owner holds any locks on a file. + */ +int +nfs4_check_for_locks(struct nfs_open_owner *noop, struct nfs_open_file *nofp) +{ + struct nfs_lock_owner *nlop; + + TAILQ_FOREACH(nlop, &nofp->nof_np->n_lock_owners, nlo_link) { + if (nlop->nlo_open_owner != noop) + continue; + if (!TAILQ_EMPTY(&nlop->nlo_locks)) + break; + } + return (nlop ? 1 : 0); +} + +/* + * Reopen simple (no deny, no locks) open state that was lost. + */ +void +nfs4_reopen(struct nfs_open_file *nofp, thread_t thd) +{ + struct nfs_open_owner *noop = nofp->nof_owner; + struct nfsmount *nmp = NFSTONMP(nofp->nof_np); + vnode_t vp = NFSTOV(nofp->nof_np); + vnode_t dvp = NULL; + struct componentname cn; + const char *vname = NULL; + size_t namelen; + char smallname[128]; + char *filename = NULL; + int error = 0, done = 0, slpflag = (nmp->nm_flag & NFSMNT_INT) ? PCATCH : 0; + struct timespec ts = { 1, 0 }; + + lck_mtx_lock(&nofp->nof_lock); + while (nofp->nof_flags & NFS_OPEN_FILE_REOPENING) { + if ((error = nfs_sigintr(nmp, NULL, thd, 0))) + break; + msleep(&nofp->nof_flags, &nofp->nof_lock, slpflag|(PZERO-1), "nfsreopenwait", &ts); + } + if (!(nofp->nof_flags & NFS_OPEN_FILE_REOPEN)) { + lck_mtx_unlock(&nofp->nof_lock); + return; + } + nofp->nof_flags |= NFS_OPEN_FILE_REOPENING; + lck_mtx_unlock(&nofp->nof_lock); + + dvp = vnode_getparent(vp); + vname = vnode_getname(vp); + if (!dvp || !vname) { + error = EIO; + goto out; + } + filename = &smallname[0]; + namelen = snprintf(filename, sizeof(smallname), "%s", vname); + if (namelen >= sizeof(smallname)) { + namelen++; /* snprintf result doesn't include '\0' */ + MALLOC(filename, char *, namelen, M_TEMP, M_WAITOK); + if (!filename) { + error = ENOMEM; + goto out; + } + snprintf(filename, namelen, "%s", vname); + } + bzero(&cn, sizeof(cn)); + cn.cn_nameptr = filename; + cn.cn_namelen = namelen; + +restart: + done = 0; + if ((error = nfs_mount_state_in_use_start(nmp))) + goto out; + + if (nofp->nof_rw) + error = nfs4_open_reopen_rpc(nofp, thd, noop->noo_cred, &cn, dvp, &vp, NFS_OPEN_SHARE_ACCESS_BOTH, NFS_OPEN_SHARE_DENY_NONE); + if (!error && nofp->nof_w) + error = nfs4_open_reopen_rpc(nofp, thd, noop->noo_cred, &cn, dvp, &vp, NFS_OPEN_SHARE_ACCESS_WRITE, NFS_OPEN_SHARE_DENY_NONE); + if (!error && nofp->nof_r) + error = nfs4_open_reopen_rpc(nofp, thd, noop->noo_cred, &cn, dvp, &vp, NFS_OPEN_SHARE_ACCESS_READ, NFS_OPEN_SHARE_DENY_NONE); + + if (nfs_mount_state_in_use_end(nmp, error)) { + if (error == NFSERR_GRACE) + goto restart; + error = 0; + goto out; + } + done = 1; +out: + lck_mtx_lock(&nofp->nof_lock); + nofp->nof_flags &= ~NFS_OPEN_FILE_REOPENING; + if (error) + nofp->nof_flags |= NFS_OPEN_FILE_LOST; + if (done) + nofp->nof_flags &= ~NFS_OPEN_FILE_REOPEN; + else + printf("nfs4_reopen: failed, error %d, lost %d\n", error, (nofp->nof_flags & NFS_OPEN_FILE_LOST) ? 1 : 0); + lck_mtx_unlock(&nofp->nof_lock); + if (filename && (filename != &smallname[0])) + FREE(filename, M_TEMP); + if (vname) + vnode_putname(vname); + if (dvp != NULLVP) + vnode_put(dvp); +} + +/* + * Send a normal OPEN RPC to open/create a file. + */ +int +nfs4_open_rpc( + struct nfs_open_file *nofp, + vfs_context_t ctx, + struct componentname *cnp, + struct vnode_attr *vap, + vnode_t dvp, + vnode_t *vpp, + int create, + int share_access, + int share_deny) +{ + return (nfs4_open_rpc_internal(nofp, ctx, vfs_context_thread(ctx), vfs_context_ucred(ctx), + cnp, vap, dvp, vpp, create, share_access, share_deny)); +} + +/* + * Send an OPEN RPC to reopen a file. + */ +int +nfs4_open_reopen_rpc( + struct nfs_open_file *nofp, + thread_t thd, + kauth_cred_t cred, + struct componentname *cnp, + vnode_t dvp, + vnode_t *vpp, + int share_access, + int share_deny) +{ + return (nfs4_open_rpc_internal(nofp, NULL, thd, cred, cnp, NULL, dvp, vpp, 0, share_access, share_deny)); +} + +/* + * common OPEN RPC code + * + * If create is set, ctx must be passed in. + */ +int +nfs4_open_rpc_internal( + struct nfs_open_file *nofp, + vfs_context_t ctx, + thread_t thd, + kauth_cred_t cred, + struct componentname *cnp, + struct vnode_attr *vap, + vnode_t dvp, + vnode_t *vpp, + int create, + int share_access, + int share_deny) +{ + struct nfsmount *nmp; + struct nfs_open_owner *noop = nofp->nof_owner; + struct nfs_vattr nvattr, dnvattr; + int error = 0, open_error = EIO, lockerror = ENOENT, busyerror = ENOENT, status; + int nfsvers, numops, exclusive = 0, gotuid, gotgid; + u_int64_t xid, savedxid = 0; + nfsnode_t dnp = VTONFS(dvp); + nfsnode_t np, newnp = NULL; + vnode_t newvp = NULL; + struct nfsm_chain nmreq, nmrep; + uint32_t bitmap[NFS_ATTR_BITMAP_LEN], bmlen; + uint32_t rflags, delegation = 0, recall = 0, val; + struct nfs_stateid stateid, dstateid, *sid; + fhandle_t fh; + struct nfsreq *req = NULL; + struct nfs_dulookup dul; + + if (create && !ctx) + return (EINVAL); + + nmp = VTONMP(dvp); + if (!nmp) + return (ENXIO); + nfsvers = nmp->nm_vers; + + np = *vpp ? VTONFS(*vpp) : NULL; + if (create && vap) { + exclusive = (vap->va_vaflags & VA_EXCLUSIVE); + nfs_avoid_needless_id_setting_on_create(dnp, vap, ctx); + gotuid = VATTR_IS_ACTIVE(vap, va_uid); + gotgid = VATTR_IS_ACTIVE(vap, va_gid); + } else { + exclusive = gotuid = gotgid = 0; + } + if (nofp) { + sid = &nofp->nof_stateid; + } else { + stateid.seqid = stateid.other[0] = stateid.other[1] = stateid.other[2] = 0; + sid = &stateid; + } + + if ((error = nfs_open_owner_set_busy(noop, thd))) + return (error); +again: + rflags = 0; + + nfsm_chain_null(&nmreq); + nfsm_chain_null(&nmrep); + + // PUTFH, SAVEFH, OPEN(CREATE?), GETATTR(FH), RESTOREFH, GETATTR + numops = 6; + nfsm_chain_build_alloc_init(error, &nmreq, 53 * NFSX_UNSIGNED + cnp->cn_namelen); + nfsm_chain_add_compound_header(error, &nmreq, create ? "create" : "open", numops); + numops--; + nfsm_chain_add_32(error, &nmreq, NFS_OP_PUTFH); + nfsm_chain_add_fh(error, &nmreq, nfsvers, dnp->n_fhp, dnp->n_fhsize); + numops--; + nfsm_chain_add_32(error, &nmreq, NFS_OP_SAVEFH); + numops--; + nfsm_chain_add_32(error, &nmreq, NFS_OP_OPEN); + nfsm_chain_add_32(error, &nmreq, noop->noo_seqid); + nfsm_chain_add_32(error, &nmreq, share_access); + nfsm_chain_add_32(error, &nmreq, share_deny); + + // open owner: clientid + uid + nfsm_chain_add_64(error, &nmreq, nmp->nm_clientid); // open_owner4.clientid + nfsm_chain_add_32(error, &nmreq, NFSX_UNSIGNED); + nfsm_chain_add_32(error, &nmreq, kauth_cred_getuid(noop->noo_cred)); // open_owner4.owner + + // openflag4 + nfsm_chain_add_32(error, &nmreq, create); + if (create) { + if (exclusive) { + static uint32_t create_verf; // XXX need a better verifier + create_verf++; + nfsm_chain_add_32(error, &nmreq, NFS_CREATE_EXCLUSIVE); + /* insert 64 bit verifier */ + nfsm_chain_add_32(error, &nmreq, create_verf); + nfsm_chain_add_32(error, &nmreq, create_verf); + } else { + nfsm_chain_add_32(error, &nmreq, NFS_CREATE_UNCHECKED); + nfsm_chain_add_fattr4(error, &nmreq, vap, nmp); + } + } + + // open_claim4 + nfsm_chain_add_32(error, &nmreq, NFS_CLAIM_NULL); + nfsm_chain_add_string(error, &nmreq, cnp->cn_nameptr, cnp->cn_namelen); + numops--; + nfsm_chain_add_32(error, &nmreq, NFS_OP_GETATTR); + NFS_COPY_ATTRIBUTES(nfs_getattr_bitmap, bitmap); + NFS_BITMAP_SET(bitmap, NFS_FATTR_FILEHANDLE); + nfsm_chain_add_bitmap_masked(error, &nmreq, bitmap, + NFS_ATTR_BITMAP_LEN, nmp->nm_fsattr.nfsa_supp_attr); + numops--; + nfsm_chain_add_32(error, &nmreq, NFS_OP_RESTOREFH); + numops--; + nfsm_chain_add_32(error, &nmreq, NFS_OP_GETATTR); + nfsm_chain_add_bitmap_masked(error, &nmreq, nfs_getattr_bitmap, + NFS_ATTR_BITMAP_LEN, nmp->nm_fsattr.nfsa_supp_attr); + nfsm_chain_build_done(error, &nmreq); + nfsm_assert(error, (numops == 0), EPROTO); + if (!error) + error = busyerror = nfs_node_set_busy(dnp, thd); + nfsmout_if(error); + + if (create) + nfs_dulookup_init(&dul, dnp, cnp->cn_nameptr, cnp->cn_namelen, ctx); + + error = nfs_request_async(dnp, NULL, &nmreq, NFSPROC4_COMPOUND, thd, cred, NULL, &req); + if (!error) { + if (create) + nfs_dulookup_start(&dul, dnp, ctx); + error = nfs_request_async_finish(req, &nmrep, &xid, &status); + savedxid = xid; + } + + if (create) + nfs_dulookup_finish(&dul, dnp, ctx); + + if ((lockerror = nfs_node_lock(dnp))) + error = lockerror; + nfsm_chain_skip_tag(error, &nmrep); + nfsm_chain_get_32(error, &nmrep, numops); + nfsm_chain_op_check(error, &nmrep, NFS_OP_PUTFH); + nfsm_chain_op_check(error, &nmrep, NFS_OP_SAVEFH); + nfsmout_if(error); + nfsm_chain_op_check(error, &nmrep, NFS_OP_OPEN); + nfs_owner_seqid_increment(noop, NULL, error); + nfsm_chain_get_stateid(error, &nmrep, sid); + nfsm_chain_check_change_info(error, &nmrep, dnp); + nfsm_chain_get_32(error, &nmrep, rflags); + bmlen = NFS_ATTR_BITMAP_LEN; + nfsm_chain_get_bitmap(error, &nmrep, bitmap, bmlen); + nfsm_chain_get_32(error, &nmrep, delegation); + if (!error) + switch (delegation) { + case NFS_OPEN_DELEGATE_NONE: + break; + case NFS_OPEN_DELEGATE_READ: + nfsm_chain_get_stateid(error, &nmrep, &dstateid); + nfsm_chain_get_32(error, &nmrep, recall); + // ACE: (skip) XXX + nfsm_chain_adv(error, &nmrep, 3 * NFSX_UNSIGNED); + nfsm_chain_get_32(error, &nmrep, val); /* string length */ + nfsm_chain_adv(error, &nmrep, nfsm_rndup(val)); + break; + case NFS_OPEN_DELEGATE_WRITE: + nfsm_chain_get_stateid(error, &nmrep, &dstateid); + nfsm_chain_get_32(error, &nmrep, recall); + // space (skip) XXX + nfsm_chain_adv(error, &nmrep, 3 * NFSX_UNSIGNED); + // ACE: (skip) XXX + nfsm_chain_adv(error, &nmrep, 3 * NFSX_UNSIGNED); + nfsm_chain_get_32(error, &nmrep, val); /* string length */ + nfsm_chain_adv(error, &nmrep, nfsm_rndup(val)); + break; + default: + error = EBADRPC; + break; + } + /* At this point if we have no error, the object was created/opened. */ + /* if we don't get attributes, then we should lookitup. */ + open_error = error; + nfsmout_if(error); + if (create && !exclusive) + nfs_vattr_set_supported(bitmap, vap); + nfsm_chain_op_check(error, &nmrep, NFS_OP_GETATTR); + nfsmout_if(error); + NFS_CLEAR_ATTRIBUTES(nvattr.nva_bitmap); + error = nfs4_parsefattr(&nmrep, NULL, &nvattr, &fh, NULL); + nfsmout_if(error); + if (!NFS_BITMAP_ISSET(nvattr.nva_bitmap, NFS_FATTR_FILEHANDLE)) { + printf("nfs: open/create didn't return filehandle?\n"); error = EBADRPC; goto nfsmout; } + if (!create && np && !NFS_CMPFH(np, fh.fh_data, fh.fh_len)) { + // XXX for the open case, what if fh doesn't match the vnode we think we're opening? + printf("nfs4_open_rpc: warning: file handle mismatch\n"); + } /* directory attributes: if we don't get them, make sure to invalidate */ nfsm_chain_op_check(error, &nmrep, NFS_OP_RESTOREFH); nfsm_chain_op_check(error, &nmrep, NFS_OP_GETATTR); nfsm_chain_loadattr(error, &nmrep, dnp, nfsvers, NULL, &xid); if (error) NATTRINVALIDATE(dnp); + nfsmout_if(error); + + if (rflags & NFS_OPEN_RESULT_LOCKTYPE_POSIX) + nofp->nof_flags |= NFS_OPEN_FILE_POSIXLOCK; + + if (rflags & NFS_OPEN_RESULT_CONFIRM) { + nfs_node_unlock(dnp); + lockerror = ENOENT; + nfsm_chain_cleanup(&nmreq); + nfsm_chain_cleanup(&nmrep); + // PUTFH, OPEN_CONFIRM, GETATTR + numops = 3; + nfsm_chain_build_alloc_init(error, &nmreq, 23 * NFSX_UNSIGNED); + nfsm_chain_add_compound_header(error, &nmreq, "open_confirm", numops); + numops--; + nfsm_chain_add_32(error, &nmreq, NFS_OP_PUTFH); + nfsm_chain_add_fh(error, &nmreq, nfsvers, fh.fh_data, fh.fh_len); + numops--; + nfsm_chain_add_32(error, &nmreq, NFS_OP_OPEN_CONFIRM); + nfsm_chain_add_stateid(error, &nmreq, sid); + nfsm_chain_add_32(error, &nmreq, noop->noo_seqid); + numops--; + nfsm_chain_add_32(error, &nmreq, NFS_OP_GETATTR); + nfsm_chain_add_bitmap_masked(error, &nmreq, nfs_getattr_bitmap, + NFS_ATTR_BITMAP_LEN, nmp->nm_fsattr.nfsa_supp_attr); + nfsm_chain_build_done(error, &nmreq); + nfsm_assert(error, (numops == 0), EPROTO); + nfsmout_if(error); + error = nfs_request2(dnp, NULL, &nmreq, NFSPROC4_COMPOUND, thd, cred, 0, &nmrep, &xid, &status); + + nfsm_chain_skip_tag(error, &nmrep); + nfsm_chain_get_32(error, &nmrep, numops); + nfsm_chain_op_check(error, &nmrep, NFS_OP_PUTFH); + nfsmout_if(error); + nfsm_chain_op_check(error, &nmrep, NFS_OP_OPEN_CONFIRM); + nfs_owner_seqid_increment(noop, NULL, error); + nfsm_chain_get_stateid(error, &nmrep, sid); + nfsm_chain_op_check(error, &nmrep, NFS_OP_GETATTR); + nfsmout_if(error); + NFS_CLEAR_ATTRIBUTES(nvattr.nva_bitmap); + error = nfs4_parsefattr(&nmrep, NULL, &nvattr, NULL, NULL); + nfsmout_if(error); + savedxid = xid; + if ((lockerror = nfs_node_lock(dnp))) + error = lockerror; + } + +nfsmout: + nfsm_chain_cleanup(&nmreq); + nfsm_chain_cleanup(&nmrep); + + if (!lockerror && create) { + if (!open_error && (dnp->n_flag & NNEGNCENTRIES)) { + dnp->n_flag &= ~NNEGNCENTRIES; + cache_purge_negatives(dvp); + } + dnp->n_flag |= NMODIFIED; + nfs_node_unlock(dnp); + lockerror = ENOENT; + nfs_getattr(dnp, &dnvattr, ctx, NGA_CACHED); + } + if (!lockerror) + nfs_node_unlock(dnp); + if (!error && create && fh.fh_len) { + /* create the vnode with the filehandle and attributes */ + xid = savedxid; + error = nfs_nget(NFSTOMP(dnp), dnp, cnp, fh.fh_data, fh.fh_len, &nvattr, &xid, NG_MAKEENTRY, &newnp); + if (!error) + newvp = NFSTOV(newnp); + } + if (!busyerror) + nfs_node_clear_busy(dnp); + if ((delegation == NFS_OPEN_DELEGATE_READ) || (delegation == NFS_OPEN_DELEGATE_WRITE)) { + if (!np) + np = newnp; + if (!error && np && !recall) { + /* stuff the delegation state in the node */ + lck_mtx_lock(&np->n_openlock); + np->n_openflags &= ~N_DELEG_MASK; + np->n_openflags |= ((delegation == NFS_OPEN_DELEGATE_READ) ? N_DELEG_READ : N_DELEG_WRITE); + np->n_dstateid = dstateid; + lck_mtx_unlock(&np->n_openlock); + } + if (recall) { + nfs4_delegreturn_rpc(nmp, fh.fh_data, fh.fh_len, &dstateid, thd, cred); + if (np) { + lck_mtx_lock(&np->n_openlock); + np->n_openflags &= ~N_DELEG_MASK; + lck_mtx_unlock(&np->n_openlock); + } + } + } + if (error) { + if (exclusive && (error == NFSERR_NOTSUPP)) { + exclusive = 0; + goto again; + } + if (newvp) { + nfs_node_unlock(newnp); + vnode_put(newvp); + } + } else if (create) { + nfs_node_unlock(newnp); + if (exclusive) { + error = nfs4_setattr_rpc(newnp, vap, ctx); + if (error && (gotuid || gotgid)) { + /* it's possible the server didn't like our attempt to set IDs. */ + /* so, let's try it again without those */ + VATTR_CLEAR_ACTIVE(vap, va_uid); + VATTR_CLEAR_ACTIVE(vap, va_gid); + error = nfs4_setattr_rpc(newnp, vap, ctx); + } + } + if (error) + vnode_put(newvp); + else + *vpp = newvp; + } + nfs_open_owner_clear_busy(noop); + return (error); +} + +/* + * Send an OPEN RPC to reclaim an open file. + */ +int +nfs4_open_reclaim_rpc( + struct nfs_open_file *nofp, + int share_access, + int share_deny) +{ + struct nfsmount *nmp; + struct nfs_open_owner *noop = nofp->nof_owner; + struct nfs_vattr nvattr; + int error = 0, lockerror = ENOENT, status; + int nfsvers, numops; + u_int64_t xid; + nfsnode_t np = nofp->nof_np; + struct nfsm_chain nmreq, nmrep; + uint32_t bitmap[NFS_ATTR_BITMAP_LEN], bmlen; + uint32_t rflags = 0, delegation, recall = 0, val; + fhandle_t fh; + struct nfs_stateid dstateid; + + nmp = NFSTONMP(np); + if (!nmp) + return (ENXIO); + nfsvers = nmp->nm_vers; + + if ((error = nfs_open_owner_set_busy(noop, current_thread()))) + return (error); + + delegation = NFS_OPEN_DELEGATE_NONE; + + nfsm_chain_null(&nmreq); + nfsm_chain_null(&nmrep); + + // PUTFH, OPEN, GETATTR(FH) + numops = 3; + nfsm_chain_build_alloc_init(error, &nmreq, 48 * NFSX_UNSIGNED); + nfsm_chain_add_compound_header(error, &nmreq, "open_reclaim", numops); + numops--; + nfsm_chain_add_32(error, &nmreq, NFS_OP_PUTFH); + nfsm_chain_add_fh(error, &nmreq, nfsvers, np->n_fhp, np->n_fhsize); + numops--; + nfsm_chain_add_32(error, &nmreq, NFS_OP_OPEN); + nfsm_chain_add_32(error, &nmreq, noop->noo_seqid); + nfsm_chain_add_32(error, &nmreq, share_access); + nfsm_chain_add_32(error, &nmreq, share_deny); + // open owner: clientid + uid + nfsm_chain_add_64(error, &nmreq, nmp->nm_clientid); // open_owner4.clientid + nfsm_chain_add_32(error, &nmreq, NFSX_UNSIGNED); + nfsm_chain_add_32(error, &nmreq, kauth_cred_getuid(noop->noo_cred)); // open_owner4.owner + // openflag4 + nfsm_chain_add_32(error, &nmreq, NFS_OPEN_NOCREATE); + // open_claim4 + nfsm_chain_add_32(error, &nmreq, NFS_CLAIM_PREVIOUS); + delegation = (np->n_openflags & N_DELEG_READ) ? NFS_OPEN_DELEGATE_READ : + (np->n_openflags & N_DELEG_WRITE) ? NFS_OPEN_DELEGATE_WRITE : + NFS_OPEN_DELEGATE_NONE; + nfsm_chain_add_32(error, &nmreq, delegation); + delegation = NFS_OPEN_DELEGATE_NONE; + numops--; + nfsm_chain_add_32(error, &nmreq, NFS_OP_GETATTR); + NFS_COPY_ATTRIBUTES(nfs_getattr_bitmap, bitmap); + NFS_BITMAP_SET(bitmap, NFS_FATTR_FILEHANDLE); + nfsm_chain_add_bitmap_masked(error, &nmreq, bitmap, + NFS_ATTR_BITMAP_LEN, nmp->nm_fsattr.nfsa_supp_attr); + nfsm_chain_build_done(error, &nmreq); + nfsm_assert(error, (numops == 0), EPROTO); + nfsmout_if(error); + + error = nfs_request2(np, nmp->nm_mountp, &nmreq, NFSPROC4_COMPOUND, current_thread(), noop->noo_cred, R_RECOVER, &nmrep, &xid, &status); + + if ((lockerror = nfs_node_lock(np))) + error = lockerror; + nfsm_chain_skip_tag(error, &nmrep); + nfsm_chain_get_32(error, &nmrep, numops); + nfsm_chain_op_check(error, &nmrep, NFS_OP_PUTFH); + nfsmout_if(error); + nfsm_chain_op_check(error, &nmrep, NFS_OP_OPEN); + nfs_owner_seqid_increment(noop, NULL, error); + nfsm_chain_get_stateid(error, &nmrep, &nofp->nof_stateid); + nfsm_chain_check_change_info(error, &nmrep, np); + nfsm_chain_get_32(error, &nmrep, rflags); + bmlen = NFS_ATTR_BITMAP_LEN; + nfsm_chain_get_bitmap(error, &nmrep, bitmap, bmlen); + nfsm_chain_get_32(error, &nmrep, delegation); + if (!error) + switch (delegation) { + case NFS_OPEN_DELEGATE_NONE: + break; + case NFS_OPEN_DELEGATE_READ: + nfsm_chain_get_stateid(error, &nmrep, &dstateid); + nfsm_chain_get_32(error, &nmrep, recall); + // ACE: (skip) XXX + nfsm_chain_adv(error, &nmrep, 3 * NFSX_UNSIGNED); + nfsm_chain_get_32(error, &nmrep, val); /* string length */ + nfsm_chain_adv(error, &nmrep, nfsm_rndup(val)); + if (!error) { + /* stuff the delegation state in the node */ + lck_mtx_lock(&np->n_openlock); + np->n_openflags &= ~N_DELEG_MASK; + np->n_openflags |= N_DELEG_READ; + np->n_dstateid = dstateid; + lck_mtx_unlock(&np->n_openlock); + } + break; + case NFS_OPEN_DELEGATE_WRITE: + nfsm_chain_get_stateid(error, &nmrep, &dstateid); + nfsm_chain_get_32(error, &nmrep, recall); + // space (skip) XXX + nfsm_chain_adv(error, &nmrep, 3 * NFSX_UNSIGNED); + // ACE: (skip) XXX + nfsm_chain_adv(error, &nmrep, 3 * NFSX_UNSIGNED); + nfsm_chain_get_32(error, &nmrep, val); /* string length */ + nfsm_chain_adv(error, &nmrep, nfsm_rndup(val)); + if (!error) { + /* stuff the delegation state in the node */ + lck_mtx_lock(&np->n_openlock); + np->n_openflags &= ~N_DELEG_MASK; + np->n_openflags |= N_DELEG_WRITE; + np->n_dstateid = dstateid; + lck_mtx_unlock(&np->n_openlock); + } + break; + default: + error = EBADRPC; + break; + } + nfsmout_if(error); + nfsm_chain_op_check(error, &nmrep, NFS_OP_GETATTR); + NFS_CLEAR_ATTRIBUTES(nvattr.nva_bitmap); + error = nfs4_parsefattr(&nmrep, NULL, &nvattr, &fh, NULL); + nfsmout_if(error); + if (!NFS_BITMAP_ISSET(nvattr.nva_bitmap, NFS_FATTR_FILEHANDLE)) { + printf("nfs: open reclaim didn't return filehandle?\n"); + error = EBADRPC; + goto nfsmout; + } + if (!NFS_CMPFH(np, fh.fh_data, fh.fh_len)) { + // XXX what if fh doesn't match the vnode we think we're re-opening? + printf("nfs4_open_reclaim_rpc: warning: file handle mismatch\n"); + } + error = nfs_loadattrcache(np, &nvattr, &xid, 1); + nfsmout_if(error); + if (rflags & NFS_OPEN_RESULT_LOCKTYPE_POSIX) + nofp->nof_flags |= NFS_OPEN_FILE_POSIXLOCK; +nfsmout: + nfsm_chain_cleanup(&nmreq); + nfsm_chain_cleanup(&nmrep); + if (!lockerror) + nfs_node_unlock(np); + nfs_open_owner_clear_busy(noop); + if ((delegation == NFS_OPEN_DELEGATE_READ) || (delegation == NFS_OPEN_DELEGATE_WRITE)) { + if (recall) { + nfs4_delegreturn_rpc(nmp, fh.fh_data, fh.fh_len, &dstateid, current_thread(), noop->noo_cred); + lck_mtx_lock(&np->n_openlock); + np->n_openflags &= ~N_DELEG_MASK; + lck_mtx_unlock(&np->n_openlock); + } + } + return (error); +} - if (rflags & NFS_OPEN_RESULT_CONFIRM) { - nfsm_chain_cleanup(&nmreq); - nfsm_chain_cleanup(&nmrep); - // PUTFH, OPEN_CONFIRM, GETATTR - numops = 3; - nfsm_chain_build_alloc_init(error, &nmreq, 23 * NFSX_UNSIGNED); - nfsm_chain_add_compound_header(error, &nmreq, "create_confirm", numops); - numops--; - nfsm_chain_add_32(error, &nmreq, NFS_OP_PUTFH); - nfsm_chain_add_fh(error, &nmreq, nfsvers, fh.fh_data, fh.fh_len); - numops--; - nfsm_chain_add_32(error, &nmreq, NFS_OP_OPEN_CONFIRM); - nfsm_chain_add_32(error, &nmreq, stateid[0]); - nfsm_chain_add_32(error, &nmreq, stateid[1]); - nfsm_chain_add_32(error, &nmreq, stateid[2]); - nfsm_chain_add_32(error, &nmreq, stateid[3]); - nfsm_chain_add_32(error, &nmreq, seqid); - seqid++; - numops--; - nfsm_chain_add_32(error, &nmreq, NFS_OP_GETATTR); - nfsm_chain_add_bitmap_masked(error, &nmreq, nfs_getattr_bitmap, - NFS_ATTR_BITMAP_LEN, nmp->nm_fsattr.nfsa_supp_attr); - nfsm_chain_build_done(error, &nmreq); - nfsm_assert(error, (numops == 0), EPROTO); - nfsmout_if(error); - error = nfs_request(dnp, NULL, &nmreq, NFSPROC4_COMPOUND, ctx, &nmrep, &xid, &status); +int +nfs4_open_downgrade_rpc( + nfsnode_t np, + struct nfs_open_file *nofp, + vfs_context_t ctx) +{ + struct nfs_open_owner *noop = nofp->nof_owner; + struct nfsmount *nmp; + int error, lockerror = ENOENT, status, nfsvers, numops; + struct nfsm_chain nmreq, nmrep; + u_int64_t xid; - nfsm_chain_skip_tag(error, &nmrep); - nfsm_chain_get_32(error, &nmrep, numops); - nfsm_chain_op_check(error, &nmrep, NFS_OP_PUTFH); - nfsm_chain_op_check(error, &nmrep, NFS_OP_OPEN_CONFIRM); - nfsm_chain_get_32(error, &nmrep, stateid[0]); - nfsm_chain_get_32(error, &nmrep, stateid[1]); - nfsm_chain_get_32(error, &nmrep, stateid[2]); - nfsm_chain_get_32(error, &nmrep, stateid[3]); - nfsm_chain_op_check(error, &nmrep, NFS_OP_GETATTR); - nfsmout_if(error); - NFS_CLEAR_ATTRIBUTES(nvattr.nva_bitmap); - error = nfs4_parsefattr(&nmrep, NULL, &nvattr, NULL, NULL); - nfsmout_if(error); - savedxid = xid; - } + nmp = NFSTONMP(np); + if (!nmp) + return (ENXIO); + nfsvers = nmp->nm_vers; + + if ((error = nfs_open_owner_set_busy(noop, vfs_context_thread(ctx)))) + return (error); + + nfsm_chain_null(&nmreq); + nfsm_chain_null(&nmrep); + + // PUTFH, OPEN_DOWNGRADE, GETATTR + numops = 3; + nfsm_chain_build_alloc_init(error, &nmreq, 23 * NFSX_UNSIGNED); + nfsm_chain_add_compound_header(error, &nmreq, "open_downgrd", numops); + numops--; + nfsm_chain_add_32(error, &nmreq, NFS_OP_PUTFH); + nfsm_chain_add_fh(error, &nmreq, nfsvers, np->n_fhp, np->n_fhsize); + numops--; + nfsm_chain_add_32(error, &nmreq, NFS_OP_OPEN_DOWNGRADE); + nfsm_chain_add_stateid(error, &nmreq, &nofp->nof_stateid); + nfsm_chain_add_32(error, &nmreq, noop->noo_seqid); + nfsm_chain_add_32(error, &nmreq, nofp->nof_access); + nfsm_chain_add_32(error, &nmreq, nofp->nof_deny); + numops--; + nfsm_chain_add_32(error, &nmreq, NFS_OP_GETATTR); + nfsm_chain_add_bitmap_masked(error, &nmreq, nfs_getattr_bitmap, + NFS_ATTR_BITMAP_LEN, nmp->nm_fsattr.nfsa_supp_attr); + nfsm_chain_build_done(error, &nmreq); + nfsm_assert(error, (numops == 0), EPROTO); + nfsmout_if(error); + error = nfs_request(np, NULL, &nmreq, NFSPROC4_COMPOUND, ctx, &nmrep, &xid, &status); + + if ((lockerror = nfs_node_lock(np))) + error = lockerror; + nfsm_chain_skip_tag(error, &nmrep); + nfsm_chain_get_32(error, &nmrep, numops); + nfsm_chain_op_check(error, &nmrep, NFS_OP_PUTFH); nfsmout_if(error); + nfsm_chain_op_check(error, &nmrep, NFS_OP_OPEN_DOWNGRADE); + nfs_owner_seqid_increment(noop, NULL, error); + nfsm_chain_get_stateid(error, &nmrep, &nofp->nof_stateid); + nfsm_chain_op_check(error, &nmrep, NFS_OP_GETATTR); + nfsm_chain_loadattr(error, &nmrep, np, nfsvers, NULL, &xid); +nfsmout: + if (!lockerror) + nfs_node_unlock(np); + nfs_open_owner_clear_busy(noop); nfsm_chain_cleanup(&nmreq); nfsm_chain_cleanup(&nmrep); + return (error); +} - // PUTFH, CLOSE - numops = 2; - nfsm_chain_build_alloc_init(error, &nmreq, 19 * NFSX_UNSIGNED); - nfsm_chain_add_compound_header(error, &nmreq, "create_close", numops); +int +nfs4_close_rpc( + nfsnode_t np, + struct nfs_open_file *nofp, + thread_t thd, + kauth_cred_t cred, + int flag) +{ + struct nfs_open_owner *noop = nofp->nof_owner; + struct nfsmount *nmp; + int error, lockerror = ENOENT, status, nfsvers, numops; + struct nfsm_chain nmreq, nmrep; + u_int64_t xid; + + nmp = NFSTONMP(np); + if (!nmp) + return (ENXIO); + nfsvers = nmp->nm_vers; + + if ((error = nfs_open_owner_set_busy(noop, thd))) + return (error); + + nfsm_chain_null(&nmreq); + nfsm_chain_null(&nmrep); + + // PUTFH, CLOSE, GETFH + numops = 3; + nfsm_chain_build_alloc_init(error, &nmreq, 23 * NFSX_UNSIGNED); + nfsm_chain_add_compound_header(error, &nmreq, "close", numops); numops--; nfsm_chain_add_32(error, &nmreq, NFS_OP_PUTFH); - nfsm_chain_add_fh(error, &nmreq, nfsvers, fh.fh_data, fh.fh_len); + nfsm_chain_add_fh(error, &nmreq, nfsvers, np->n_fhp, np->n_fhsize); numops--; nfsm_chain_add_32(error, &nmreq, NFS_OP_CLOSE); - nfsm_chain_add_32(error, &nmreq, seqid); - seqid++; - nfsm_chain_add_32(error, &nmreq, stateid[0]); - nfsm_chain_add_32(error, &nmreq, stateid[1]); - nfsm_chain_add_32(error, &nmreq, stateid[2]); - nfsm_chain_add_32(error, &nmreq, stateid[3]); + nfsm_chain_add_32(error, &nmreq, noop->noo_seqid); + nfsm_chain_add_stateid(error, &nmreq, &nofp->nof_stateid); + numops--; + nfsm_chain_add_32(error, &nmreq, NFS_OP_GETATTR); + nfsm_chain_add_bitmap_masked(error, &nmreq, nfs_getattr_bitmap, + NFS_ATTR_BITMAP_LEN, nmp->nm_fsattr.nfsa_supp_attr); nfsm_chain_build_done(error, &nmreq); nfsm_assert(error, (numops == 0), EPROTO); nfsmout_if(error); - error = nfs_request(dnp, NULL, &nmreq, NFSPROC4_COMPOUND, ctx, &nmrep, &xid, &status); + error = nfs_request2(np, NULL, &nmreq, NFSPROC4_COMPOUND, thd, cred, flag, &nmrep, &xid, &status); + if ((lockerror = nfs_node_lock(np))) + error = lockerror; nfsm_chain_skip_tag(error, &nmrep); nfsm_chain_get_32(error, &nmrep, numops); nfsm_chain_op_check(error, &nmrep, NFS_OP_PUTFH); + nfsmout_if(error); nfsm_chain_op_check(error, &nmrep, NFS_OP_CLOSE); - nfsm_chain_get_32(error, &nmrep, stateid[0]); - nfsm_chain_get_32(error, &nmrep, stateid[1]); - nfsm_chain_get_32(error, &nmrep, stateid[2]); - nfsm_chain_get_32(error, &nmrep, stateid[3]); - if (error) - printf("nfs4_vnop_create: close error %d\n", error); + nfs_owner_seqid_increment(noop, NULL, error); + nfsm_chain_get_stateid(error, &nmrep, &nofp->nof_stateid); + nfsm_chain_op_check(error, &nmrep, NFS_OP_GETATTR); + nfsm_chain_loadattr(error, &nmrep, np, nfsvers, NULL, &xid); +nfsmout: + if (!lockerror) + nfs_node_unlock(np); + nfs_open_owner_clear_busy(noop); + nfsm_chain_cleanup(&nmreq); + nfsm_chain_cleanup(&nmrep); + return (error); +} + + +int +nfs4_delegreturn_rpc(struct nfsmount *nmp, u_char *fhp, int fhlen, struct nfs_stateid *sid, thread_t thd, kauth_cred_t cred) +{ + int error = 0, status, numops; + uint64_t xid; + struct nfsm_chain nmreq, nmrep; + + nfsm_chain_null(&nmreq); + nfsm_chain_null(&nmrep); + // PUTFH, DELEGRETURN + numops = 2; + nfsm_chain_build_alloc_init(error, &nmreq, 16 * NFSX_UNSIGNED); + nfsm_chain_add_compound_header(error, &nmreq, "delegreturn", numops); + numops--; + nfsm_chain_add_32(error, &nmreq, NFS_OP_PUTFH); + nfsm_chain_add_fh(error, &nmreq, nmp->nm_vers, fhp, fhlen); + numops--; + nfsm_chain_add_32(error, &nmreq, NFS_OP_DELEGRETURN); + nfsm_chain_add_stateid(error, &nmreq, sid); + nfsm_chain_build_done(error, &nmreq); + nfsm_assert(error, (numops == 0), EPROTO); + nfsmout_if(error); + error = nfs_request2(NULL, nmp->nm_mountp, &nmreq, NFSPROC4_COMPOUND, thd, cred, R_RECOVER, &nmrep, &xid, &status); + nfsm_chain_skip_tag(error, &nmrep); + nfsm_chain_get_32(error, &nmrep, numops); + nfsm_chain_op_check(error, &nmrep, NFS_OP_PUTFH); + nfsm_chain_op_check(error, &nmrep, NFS_OP_DELEGRETURN); nfsmout: nfsm_chain_cleanup(&nmreq); nfsm_chain_cleanup(&nmrep); + return (error); +} - if (!lockerror) { - if (!create_error && (dnp->n_flag & NNEGNCENTRIES)) { - dnp->n_flag &= ~NNEGNCENTRIES; - cache_purge_negatives(dvp); + +/* + * NFSv4 read call. + * Just call nfs_bioread() to do the work. + * + * Note: the exec code paths have a tendency to call VNOP_READ (and VNOP_MMAP) + * without first calling VNOP_OPEN, so we make sure the file is open here. + */ +int +nfs4_vnop_read( + struct vnop_read_args /* { + struct vnodeop_desc *a_desc; + vnode_t a_vp; + struct uio *a_uio; + int a_ioflag; + vfs_context_t a_context; + } */ *ap) +{ + vnode_t vp = ap->a_vp; + vfs_context_t ctx = ap->a_context; + nfsnode_t np; + struct nfsmount *nmp; + struct nfs_open_owner *noop; + struct nfs_open_file *nofp; + int error; + + if (vnode_vtype(ap->a_vp) != VREG) + return (EPERM); + + np = VTONFS(vp); + nmp = NFSTONMP(np); + if (!nmp) + return (ENXIO); + + noop = nfs_open_owner_find(nmp, vfs_context_ucred(ctx), 1); + if (!noop) + return (ENOMEM); +restart: + error = nfs_open_file_find(np, noop, &nofp, 0, 0, 1); + if (!error && (nofp->nof_flags & NFS_OPEN_FILE_LOST)) { + printf("nfs_vnop_read: LOST\n"); + error = EIO; + } + if (!error && (nofp->nof_flags & NFS_OPEN_FILE_REOPEN)) { + nfs4_reopen(nofp, vfs_context_thread(ctx)); + nofp = NULL; + goto restart; + } + if (error) { + nfs_open_owner_rele(noop); + return (error); + } + if (!nofp->nof_access) { + /* we don't have the file open, so open it for read access */ + error = nfs_mount_state_in_use_start(nmp); + if (error) { + nfs_open_owner_rele(noop); + return (error); } - dnp->n_flag |= NMODIFIED; - if (!nfs_getattr(dnp, &dnvattr, ctx, 1)) { - if (NFS_CHANGED_NC(nfsvers, dnp, &dnvattr)) { - dnp->n_flag &= ~NNEGNCENTRIES; - cache_purge(dvp); - NFS_CHANGED_UPDATE_NC(nfsvers, dnp, &dnvattr); - } + error = nfs_open_file_set_busy(nofp, vfs_context_thread(ctx)); + if (error) + nofp = NULL; + if (!error) + error = nfs4_open(np, nofp, NFS_OPEN_SHARE_ACCESS_READ, NFS_OPEN_SHARE_DENY_NONE, ctx); + if (!error) + nofp->nof_flags |= NFS_OPEN_FILE_NEEDCLOSE; + if (nofp) + nfs_open_file_clear_busy(nofp); + if (nfs_mount_state_in_use_end(nmp, error)) { + nofp = NULL; + goto restart; } } + nfs_open_owner_rele(noop); + if (error) + return (error); + return (nfs_bioread(VTONFS(ap->a_vp), ap->a_uio, ap->a_ioflag, ap->a_context)); +} - if (!error && fh.fh_len) { - /* create the vnode with the filehandle and attributes */ - xid = savedxid; - error = nfs_nget(NFSTOMP(dnp), dnp, cnp, fh.fh_data, fh.fh_len, &nvattr, &xid, NG_MAKEENTRY, &np); - if (!error) - newvp = NFSTOV(np); +/* + * Note: the NFSv4 CREATE RPC is for everything EXCEPT regular files. + * Files are created using the NFSv4 OPEN RPC. So we must open the + * file to create it and then close it. + */ +int +nfs4_vnop_create( + struct vnop_create_args /* { + struct vnodeop_desc *a_desc; + vnode_t a_dvp; + vnode_t *a_vpp; + struct componentname *a_cnp; + struct vnode_attr *a_vap; + vfs_context_t a_context; + } */ *ap) +{ + vfs_context_t ctx = ap->a_context; + struct componentname *cnp = ap->a_cnp; + struct vnode_attr *vap = ap->a_vap; + vnode_t dvp = ap->a_dvp; + vnode_t *vpp = ap->a_vpp; + struct nfsmount *nmp; + nfsnode_t np; + int error = 0; + struct nfs_open_owner *noop = NULL; + struct nfs_open_file *nofp = NULL; + + nmp = VTONMP(dvp); + if (!nmp) + return (ENXIO); + + nfs_avoid_needless_id_setting_on_create(VTONFS(dvp), vap, ctx); + + noop = nfs_open_owner_find(nmp, vfs_context_ucred(ctx), 1); + if (!noop) + return (ENOMEM); + +restart: + error = nfs_mount_state_in_use_start(nmp); + if (error) { + nfs_open_owner_rele(noop); + return (error); } - nfs_dulookup_finish(&dul, dnp, ctx); + error = nfs_open_file_find(NULL, noop, &nofp, 0, 0, 1); + if (!error && (nofp->nof_flags & NFS_OPEN_FILE_LOST)) { + printf("nfs_vnop_create: LOST\n"); + error = EIO; + } + if (!error && (nofp->nof_flags & NFS_OPEN_FILE_REOPEN)) { + nfs_mount_state_in_use_end(nmp, 0); + nfs4_reopen(nofp, vfs_context_thread(ctx)); + nofp = NULL; + goto restart; + } + if (!error) + error = nfs_open_file_set_busy(nofp, vfs_context_thread(ctx)); + if (error) { + nofp = NULL; + goto out; + } - /* - * Kludge: Map EEXIST => 0 assuming that you have a reply to a retry - * if we can succeed in looking up the object. - */ - if ((create_error == EEXIST) || (!create_error && !newvp)) { - error = nfs_lookitup(dnp, cnp->cn_nameptr, cnp->cn_namelen, ctx, &np); - if (!error) { - newvp = NFSTOV(np); - if (vnode_vtype(newvp) != VLNK) - error = EEXIST; - } + nofp->nof_opencnt++; + nofp->nof_access = NFS_OPEN_SHARE_ACCESS_BOTH; + nofp->nof_deny = NFS_OPEN_SHARE_DENY_NONE; + nofp->nof_rw++; + + error = nfs4_open_rpc(nofp, ctx, cnp, vap, dvp, vpp, NFS_OPEN_CREATE, + NFS_OPEN_SHARE_ACCESS_BOTH, NFS_OPEN_SHARE_DENY_NONE); + if (!error && !*vpp) { + printf("nfs4_open_rpc returned without a node?\n"); + /* Hmmm... with no node, we have no filehandle and can't close it */ + error = EIO; } - if (!lockerror) - nfs_unlock(dnp); if (error) { - if (newvp) { - nfs_unlock(np); - vnode_put(newvp); + nofp->nof_rw--; + nofp->nof_access = 0; + nofp->nof_deny = 0; + nofp->nof_opencnt--; + } + if (*vpp) { + nofp->nof_np = np = VTONFS(*vpp); + /* insert nofp onto np's open list */ + TAILQ_INSERT_HEAD(&np->n_opens, nofp, nof_link); + if (!error) { + nofp->nof_flags |= NFS_OPEN_FILE_CREATE; + nofp->nof_creator = current_thread(); } - } else { - nfs_unlock(np); - *vpp = newvp; } +out: + if (nofp) + nfs_open_file_clear_busy(nofp); + if (nfs_mount_state_in_use_end(nmp, error)) { + nofp = NULL; + goto restart; + } + if (noop) + nfs_open_owner_rele(noop); return (error); } +void +nfs_avoid_needless_id_setting_on_create(nfsnode_t dnp, struct vnode_attr *vap, vfs_context_t ctx) +{ + /* + * Don't bother setting UID if it's the same as the credential performing the create. + * Don't bother setting GID if it's the same as the directory or credential. + */ + if (VATTR_IS_ACTIVE(vap, va_uid)) { + if (kauth_cred_getuid(vfs_context_ucred(ctx)) == vap->va_uid) + VATTR_CLEAR_ACTIVE(vap, va_uid); + } + if (VATTR_IS_ACTIVE(vap, va_gid)) { + if ((vap->va_gid == dnp->n_vattr.nva_gid) || + (kauth_cred_getgid(vfs_context_ucred(ctx)) == vap->va_gid)) + VATTR_CLEAR_ACTIVE(vap, va_gid); + } +} + /* * Note: the NFSv4 CREATE RPC is for everything EXCEPT regular files. */ -static int +int nfs4_create_rpc( vfs_context_t ctx, nfsnode_t dnp, @@ -1711,7 +5019,7 @@ nfs4_create_rpc( { struct nfsmount *nmp; struct nfs_vattr nvattr, dnvattr; - int error = 0, create_error = EIO, lockerror = ENOENT, status; + int error = 0, create_error = EIO, lockerror = ENOENT, busyerror = ENOENT, status; int nfsvers, numops; u_int64_t xid, savedxid = 0; nfsnode_t np = NULL; @@ -1754,7 +5062,10 @@ nfs4_create_rpc( return (EINVAL); } - nfs_dulookup_init(&dul, dnp, cnp->cn_nameptr, cnp->cn_namelen); + nfs_avoid_needless_id_setting_on_create(dnp, vap, ctx); + + error = busyerror = nfs_node_set_busy(dnp, vfs_context_thread(ctx)); + nfs_dulookup_init(&dul, dnp, cnp->cn_nameptr, cnp->cn_namelen, ctx); nfsm_chain_null(&nmreq); nfsm_chain_null(&nmrep); @@ -1794,9 +5105,6 @@ nfs4_create_rpc( nfsm_chain_build_done(error, &nmreq); nfsm_assert(error, (numops == 0), EPROTO); nfsmout_if(error); - if ((lockerror = nfs_lock(dnp, NFS_NODE_LOCK_EXCLUSIVE))) - error = lockerror; - nfsmout_if(error); error = nfs_request_async(dnp, NULL, &nmreq, NFSPROC4_COMPOUND, vfs_context_thread(ctx), vfs_context_ucred(ctx), NULL, &req); @@ -1805,6 +5113,8 @@ nfs4_create_rpc( error = nfs_request_async_finish(req, &nmrep, &xid, &status); } + if ((lockerror = nfs_node_lock(dnp))) + error = lockerror; nfsm_chain_skip_tag(error, &nmrep); nfsm_chain_get_32(error, &nmrep, numops); nfsm_chain_op_check(error, &nmrep, NFS_OP_PUTFH); @@ -1847,13 +5157,9 @@ nfs4_create_rpc( cache_purge_negatives(NFSTOV(dnp)); } dnp->n_flag |= NMODIFIED; - if (!nfs_getattr(dnp, &dnvattr, ctx, 1)) { - if (NFS_CHANGED_NC(nfsvers, dnp, &dnvattr)) { - dnp->n_flag &= ~NNEGNCENTRIES; - cache_purge(NFSTOV(dnp)); - NFS_CHANGED_UPDATE_NC(nfsvers, dnp, &dnvattr); - } - } + nfs_node_unlock(dnp); + /* nfs_getattr() will check changed and purge caches */ + nfs_getattr(dnp, &dnvattr, ctx, NGA_CACHED); } if (!error && fh.fh_len) { @@ -1878,15 +5184,15 @@ nfs4_create_rpc( error = EEXIST; } } - if (!lockerror) - nfs_unlock(dnp); + if (!busyerror) + nfs_node_clear_busy(dnp); if (error) { if (newvp) { - nfs_unlock(np); + nfs_node_unlock(np); vnode_put(newvp); } } else { - nfs_unlock(np); + nfs_node_unlock(np); *npp = np; } return (error); @@ -1987,7 +5293,7 @@ nfs4_vnop_link( vnode_t vp = ap->a_vp; vnode_t tdvp = ap->a_tdvp; struct componentname *cnp = ap->a_cnp; - int error = 0, status; + int error = 0, lockerror = ENOENT, status; struct nfsmount *nmp; nfsnode_t np = VTONFS(vp); nfsnode_t tdnp = VTONFS(tdvp); @@ -2010,8 +5316,7 @@ nfs4_vnop_link( */ nfs_flush(np, MNT_WAIT, vfs_context_thread(ctx), V_IGNORE_WRITEERR); - error = nfs_lock2(tdnp, np, NFS_NODE_LOCK_EXCLUSIVE); - if (error) + if ((error = nfs_node_set_busy2(tdnp, np, vfs_context_thread(ctx)))) return (error); nfsm_chain_null(&nmreq); @@ -2047,6 +5352,10 @@ nfs4_vnop_link( nfsmout_if(error); error = nfs_request(tdnp, NULL, &nmreq, NFSPROC4_COMPOUND, ctx, &nmrep, &xid, &status); + if ((lockerror = nfs_node_lock2(tdnp, np))) { + error = lockerror; + goto nfsmout; + } nfsm_chain_skip_tag(error, &nmrep); nfsm_chain_get_32(error, &nmrep, numops); nfsm_chain_op_check(error, &nmrep, NFS_OP_PUTFH); @@ -2070,7 +5379,8 @@ nfs4_vnop_link( nfsmout: nfsm_chain_cleanup(&nmreq); nfsm_chain_cleanup(&nmrep); - tdnp->n_flag |= NMODIFIED; + if (!lockerror) + tdnp->n_flag |= NMODIFIED; /* Kludge: Map EEXIST => 0 assuming that it is a reply to a retry. */ if (error == EEXIST) error = 0; @@ -2078,7 +5388,9 @@ nfs4_vnop_link( tdnp->n_flag &= ~NNEGNCENTRIES; cache_purge_negatives(tdvp); } - nfs_unlock2(tdnp, np); + if (!lockerror) + nfs_node_unlock2(tdnp, np); + nfs_node_clear_busy2(tdnp, np); return (error); } @@ -2105,27 +5417,20 @@ nfs4_vnop_rmdir( if (vnode_vtype(vp) != VDIR) return (EINVAL); - nfs_dulookup_init(&dul, dnp, cnp->cn_nameptr, cnp->cn_namelen); - - if ((error = nfs_lock2(dnp, np, NFS_NODE_LOCK_EXCLUSIVE))) + if ((error = nfs_node_set_busy2(dnp, np, vfs_context_thread(ctx)))) return (error); + nfs_dulookup_init(&dul, dnp, cnp->cn_nameptr, cnp->cn_namelen, ctx); nfs_dulookup_start(&dul, dnp, ctx); error = nfs4_remove_rpc(dnp, cnp->cn_nameptr, cnp->cn_namelen, vfs_context_thread(ctx), vfs_context_ucred(ctx)); - cache_purge(vp); - if (!nfs_getattr(dnp, &dnvattr, ctx, 1)) { - if (NFS_CHANGED_NC(NFS_VER4, dnp, &dnvattr)) { - dnp->n_flag &= ~NNEGNCENTRIES; - cache_purge(dvp); - NFS_CHANGED_UPDATE_NC(NFS_VER4, dnp, &dnvattr); - } - } - + nfs_name_cache_purge(dnp, np, cnp, ctx); + /* nfs_getattr() will check changed and purge caches */ + nfs_getattr(dnp, &dnvattr, ctx, NGA_CACHED); nfs_dulookup_finish(&dul, dnp, ctx); - nfs_unlock2(dnp, np); + nfs_node_clear_busy2(dnp, np); /* * Kludge: Map ENOENT => 0 assuming that you have a reply to a retry. diff --git a/bsd/nfs/nfs_bio.c b/bsd/nfs/nfs_bio.c index b1dccb036..1c1c19123 100644 --- a/bsd/nfs/nfs_bio.c +++ b/bsd/nfs/nfs_bio.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2007 Apple Inc. All rights reserved. + * Copyright (c) 2000-2009 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -364,11 +364,20 @@ nfs_buf_page_inval(vnode_t vp, off_t offset) */ if (bp->nb_dirtyend > 0) { int start = offset - NBOFF(bp); - if (bp->nb_dirtyend <= start || - bp->nb_dirtyoff >= (start + PAGE_SIZE)) - error = 0; - else + if ((bp->nb_dirtyend > start) && + (bp->nb_dirtyoff < (start + PAGE_SIZE))) { + /* + * Before returning the bad news, move the + * buffer to the start of the delwri list and + * give the list a push to try to flush the + * buffer out. + */ error = EBUSY; + nfs_buf_remfree(bp); + TAILQ_INSERT_HEAD(&nfsbufdelwri, bp, nb_free); + nfsbufdelwricnt++; + nfs_buf_delwri_push(1); + } } out: lck_mtx_unlock(nfs_buf_mutex); @@ -481,7 +490,7 @@ nfs_buf_map(struct nfsbuf *bp) if (!ISSET(bp->nb_flags, NB_PAGELIST)) return (EINVAL); - kret = ubc_upl_map(bp->nb_pagelist, (vm_address_t *)&(bp->nb_data)); + kret = ubc_upl_map(bp->nb_pagelist, (vm_offset_t *)&(bp->nb_data)); if (kret != KERN_SUCCESS) panic("nfs_buf_map: ubc_upl_map() failed with (%d)", kret); if (bp->nb_data == 0) @@ -523,7 +532,7 @@ nfs_buf_normalize_valid_range(nfsnode_t np, struct nfsbuf *bp) * process some entries on the delayed write queue * (must be called with nfs_buf_mutex held) */ -static void +void nfs_buf_delwri_service(void) { struct nfsbuf *bp; @@ -565,7 +574,7 @@ nfs_buf_delwri_service(void) /* * thread to service the delayed write queue when asked */ -static void +void nfs_buf_delwri_thread(__unused void *arg, __unused wait_result_t wr) { struct timespec ts = { 30, 0 }; @@ -585,7 +594,7 @@ nfs_buf_delwri_thread(__unused void *arg, __unused wait_result_t wr) * try to push out some delayed/uncommitted writes * ("locked" indicates whether nfs_buf_mutex is already held) */ -static void +void nfs_buf_delwri_push(int locked) { if (TAILQ_EMPTY(&nfsbufdelwri)) @@ -627,7 +636,7 @@ int nfs_buf_get( nfsnode_t np, daddr64_t blkno, - int size, + uint32_t size, thread_t thd, int flags, struct nfsbuf **bpp) @@ -635,7 +644,7 @@ nfs_buf_get( vnode_t vp = NFSTOV(np); struct nfsmount *nmp = VTONMP(vp); struct nfsbuf *bp; - int bufsize; + uint32_t bufsize; int slpflag = PCATCH; int operation = (flags & NBLK_OPMASK); int error = 0; @@ -655,7 +664,7 @@ nfs_buf_get( if (!UBCINFOEXISTS(vp)) { operation = NBLK_META; - } else if (bufsize < nmp->nm_biosize) { + } else if (bufsize < (uint32_t)nmp->nm_biosize) { /* reg files should always have biosize blocks */ bufsize = nmp->nm_biosize; } @@ -855,8 +864,8 @@ nfs_buf_get( } } - /* setup nfsbuf */ - bp->nb_lflags = NBL_BUSY; + /* set up nfsbuf */ + SET(bp->nb_lflags, NBL_BUSY); bp->nb_flags = 0; bp->nb_lblkno = blkno; /* insert buf in hash */ @@ -969,9 +978,9 @@ nfs_buf_release(struct nfsbuf *bp, int freeup) vp = np ? NFSTOV(np) : NULL; if (vp && UBCINFOEXISTS(vp) && bp->nb_bufsize) { - int upl_flags; + int upl_flags, rv; upl_t upl; - int i, rv; + uint32_t i; if (!ISSET(bp->nb_flags, NB_PAGELIST) && !ISSET(bp->nb_flags, NB_INVAL)) { rv = nfs_buf_upl_setup(bp); @@ -1012,6 +1021,10 @@ nfs_buf_release(struct nfsbuf *bp, int freeup) upl_flags = UPL_COMMIT_SET_DIRTY; else upl_flags = UPL_COMMIT_CLEAR_DIRTY; + + if (!ISSET(bp->nb_flags, (NB_NEEDCOMMIT | NB_DELWRI))) + upl_flags |= UPL_COMMIT_CLEAR_PRECIOUS; + ubc_upl_commit_range(upl, i*PAGE_SIZE, PAGE_SIZE, upl_flags | @@ -1020,12 +1033,13 @@ nfs_buf_release(struct nfsbuf *bp, int freeup) } } pagelist_cleanup_done: - /* was this the last buffer in the file? */ + /* invalidate any pages past EOF */ if (NBOFF(bp) + bp->nb_bufsize > (off_t)(np->n_size)) { - /* if so, invalidate all pages of last buffer past EOF */ off_t start, end; start = trunc_page_64(np->n_size) + PAGE_SIZE_64; end = trunc_page_64(NBOFF(bp) + bp->nb_bufsize); + if (start < NBOFF(bp)) + start = NBOFF(bp); if (end > start) { if (!(rv = ubc_sync_range(vp, start, end, UBC_INVALIDATE))) printf("nfs_buf_release(): ubc_sync_range failed!\n"); @@ -1172,6 +1186,9 @@ nfs_buf_iodone(struct nfsbuf *bp) * any throttled write operations */ vnode_writedone(NFSTOV(bp->nb_np)); + nfs_node_lock_force(bp->nb_np); + bp->nb_np->n_numoutput--; + nfs_node_unlock(bp->nb_np); } if (ISSET(bp->nb_flags, NB_ASYNC)) { /* if async, release it */ SET(bp->nb_flags, NB_DONE); /* note that it's done */ @@ -1219,9 +1236,9 @@ nfs_buf_write_delayed(struct nfsbuf *bp) vnode_waitforwrites(NFSTOV(np), VNODE_ASYNC_THROTTLE, 0, 0, "nfs_buf_write_delayed"); /* the file is in a modified state, so make sure the flag's set */ - nfs_lock(np, NFS_NODE_LOCK_FORCE); + nfs_node_lock_force(np); np->n_flag |= NMODIFIED; - nfs_unlock(np); + nfs_node_unlock(np); /* * If we have too many delayed write buffers, @@ -1266,10 +1283,10 @@ nfs_buf_check_write_verifier(nfsnode_t np, struct nfsbuf *bp) /* write verifier changed, clear commit/wverf flags */ CLR(bp->nb_flags, (NB_NEEDCOMMIT | NB_STALEWVERF)); bp->nb_verf = 0; - nfs_lock(np, NFS_NODE_LOCK_FORCE); + nfs_node_lock_force(np); np->n_needcommitcnt--; CHECK_NEEDCOMMITCNT(np); - nfs_unlock(np); + nfs_node_unlock(np); } /* @@ -1303,7 +1320,7 @@ nfs_buf_acquire(struct nfsbuf *bp, int flags, int slpflag, int slptimeo) if (ISSET(bp->nb_lflags, NBL_BUSY)) { /* - * since the mutex_lock may block, the buffer + * since the lck_mtx_lock may block, the buffer * may become BUSY, so we need to recheck for * a NOWAIT request */ @@ -1442,7 +1459,7 @@ nfs_buf_read(struct nfsbuf *bp) NFS_BUF_MAP(bp); - OSAddAtomic(1, (SInt32 *)&nfsstats.read_bios); + OSAddAtomic(1, &nfsstats.read_bios); error = nfs_buf_read_rpc(bp, thd, cred); /* @@ -1468,7 +1485,7 @@ nfs_buf_read_finish(struct nfsbuf *bp) /* update valid range */ bp->nb_validoff = 0; bp->nb_validend = bp->nb_endio; - if (bp->nb_endio < bp->nb_bufsize) { + if (bp->nb_endio < (int)bp->nb_bufsize) { /* * The read may be short because we have unflushed writes * that are extending the file size and the reads hit the @@ -1506,7 +1523,8 @@ nfs_buf_read_rpc(struct nfsbuf *bp, thread_t thd, kauth_cred_t cred) struct nfsmount *nmp; nfsnode_t np = bp->nb_np; int error = 0, nfsvers, async; - int offset, length, nmrsize, nrpcs, len; + int offset, nrpcs; + uint32_t nmrsize, length, len; off_t boff; struct nfsreq *req; struct nfsreq_cbinfo cb; @@ -1557,6 +1575,8 @@ nfs_buf_read_rpc(struct nfsbuf *bp, thread_t thd, kauth_cred_t cred) len = (length > nmrsize) ? nmrsize : length; cb.rcb_args[0] = offset; cb.rcb_args[1] = len; + if (nmp->nm_vers >= NFS_VER4) + cb.rcb_args[2] = nmp->nm_stategenid; req = NULL; error = nmp->nm_funcs->nf_read_rpc_async(np, boff + offset, len, thd, cred, &cb, &req); if (error) @@ -1618,8 +1638,8 @@ nfs_buf_read_rpc_finish(struct nfsreq *req) nfsnode_t np; thread_t thd; kauth_cred_t cred; - struct uio uio; - struct iovec_32 io; + uio_t auio; + char uio_buf [ UIO_SIZEOF(1) ]; finish: np = req->r_np; @@ -1645,28 +1665,33 @@ nfs_buf_read_rpc_finish(struct nfsreq *req) offset = cb.rcb_args[0]; rlen = length = cb.rcb_args[1]; - uio.uio_iovs.iov32p = &io; - uio.uio_iovcnt = 1; - uio.uio_rw = UIO_READ; -#if 1 /* LP64todo - can't use new segment flags until the drivers are ready */ - uio.uio_segflg = UIO_SYSSPACE; -#else - uio.uio_segflg = UIO_SYSSPACE32; -#endif - io.iov_len = length; - uio_uio_resid_set(&uio, io.iov_len); - uio.uio_offset = NBOFF(bp) + offset; - io.iov_base = (uintptr_t) bp->nb_data + offset; + auio = uio_createwithbuffer(1, NBOFF(bp) + offset, UIO_SYSSPACE, + UIO_READ, &uio_buf, sizeof(uio_buf)); + uio_addiov(auio, CAST_USER_ADDR_T(bp->nb_data + offset), length); /* finish the RPC */ - error = nmp->nm_funcs->nf_read_rpc_async_finish(np, req, &uio, &rlen, &eof); + error = nmp->nm_funcs->nf_read_rpc_async_finish(np, req, auio, &rlen, &eof); if ((error == EINPROGRESS) && cb.rcb_func) { /* async request restarted */ if (IS_VALID_CRED(cred)) kauth_cred_unref(&cred); return; } - + if ((nmp->nm_vers >= NFS_VER4) && nfs_mount_state_error_should_restart(error) && !ISSET(bp->nb_flags, NB_ERROR)) { + lck_mtx_lock(&nmp->nm_lock); + if ((error != NFSERR_GRACE) && (cb.rcb_args[2] == nmp->nm_stategenid) && !(nmp->nm_state & NFSSTA_RECOVER)) { + printf("nfs_buf_read_rpc_finish: error %d, initiating recovery\n", error); + nmp->nm_state |= NFSSTA_RECOVER; + nfs_mount_sock_thread_wake(nmp); + } + lck_mtx_unlock(&nmp->nm_lock); + if (error == NFSERR_GRACE) + tsleep(&nmp->nm_state, (PZERO-1), "nfsgrace", 2*hz); + if (!(error = nfs_mount_state_wait_for_recovery(nmp))) { + rlen = 0; + goto readagain; + } + } if (error) { SET(bp->nb_flags, NB_ERROR); bp->nb_error = error; @@ -1692,17 +1717,21 @@ nfs_buf_read_rpc_finish(struct nfsreq *req) * requested, so we need to issue another read for the rest. * (Don't bother if the buffer already hit an error.) */ +readagain: offset += rlen; length -= rlen; cb.rcb_args[0] = offset; cb.rcb_args[1] = length; - error = nmp->nm_funcs->nf_read_rpc_async(np, offset, length, thd, cred, &cb, &rreq); + if (nmp->nm_vers >= NFS_VER4) + cb.rcb_args[2] = nmp->nm_stategenid; + error = nmp->nm_funcs->nf_read_rpc_async(np, NBOFF(bp) + offset, length, thd, cred, &cb, &rreq); if (!error) { if (IS_VALID_CRED(cred)) kauth_cred_unref(&cred); if (!cb.rcb_func) { /* if !async we'll need to wait for this RPC to finish */ req = rreq; + rreq = NULL; goto finish; } /* @@ -1752,7 +1781,7 @@ nfs_buf_read_rpc_finish(struct nfsreq *req) * Do buffer readahead. * Initiate async I/O to read buffers not in cache. */ -static int +int nfs_buf_readahead(nfsnode_t np, int ioflag, daddr64_t *rabnp, daddr64_t lastrabn, thread_t thd, kauth_cred_t cred) { struct nfsmount *nmp = NFSTONMP(np); @@ -1768,9 +1797,17 @@ nfs_buf_readahead(nfsnode_t np, int ioflag, daddr64_t *rabnp, daddr64_t lastrabn for (nra = 0; (nra < nmp->nm_readahead) && (*rabnp <= lastrabn); nra++, *rabnp = *rabnp + 1) { /* check if block exists and is valid. */ + if ((*rabnp * nmp->nm_biosize) >= (off_t)np->n_size) { + /* stop reading ahead if we're beyond EOF */ + *rabnp = lastrabn; + break; + } error = nfs_buf_get(np, *rabnp, nmp->nm_biosize, thd, NBLK_READ|NBLK_NOWAIT, &bp); if (error) break; + nfs_node_lock_force(np); + np->n_lastrahead = *rabnp; + nfs_node_unlock(np); if (!bp) continue; if ((ioflag & IO_NOCACHE) && ISSET(bp->nb_flags, NB_CACHE) && @@ -1798,209 +1835,156 @@ nfs_buf_readahead(nfsnode_t np, int ioflag, daddr64_t *rabnp, daddr64_t lastrabn } /* - * NFS buffer I/O for reading files/directories. + * NFS buffer I/O for reading files. */ int -nfs_bioread(nfsnode_t np, struct uio *uio, int ioflag, int *eofflag, vfs_context_t ctx) +nfs_bioread(nfsnode_t np, uio_t uio, int ioflag, vfs_context_t ctx) { vnode_t vp = NFSTOV(np); struct nfsbuf *bp = NULL; struct nfs_vattr nvattr; struct nfsmount *nmp = VTONMP(vp); - daddr64_t lbn, rabn = 0, lastrabn, maxrabn = -1, tlbn; + daddr64_t lbn, rabn = 0, lastrabn, maxrabn = -1; off_t diff; int error = 0, n = 0, on = 0; - int nfsvers, biosize; - caddr_t dp; - struct dirent *direntp = NULL; - enum vtype vtype; + int nfsvers, biosize, modified, readaheads = 0; thread_t thd; kauth_cred_t cred; + int64_t io_resid; - FSDBG_TOP(514, np, uio->uio_offset, uio_uio_resid(uio), ioflag); - - if (uio_uio_resid(uio) == 0) { - FSDBG_BOT(514, np, 0xd1e0001, 0, 0); - return (0); - } - if (uio->uio_offset < 0) { - FSDBG_BOT(514, np, 0xd1e0002, 0, EINVAL); - return (EINVAL); - } + FSDBG_TOP(514, np, uio_offset(uio), uio_resid(uio), ioflag); nfsvers = nmp->nm_vers; biosize = nmp->nm_biosize; thd = vfs_context_thread(ctx); cred = vfs_context_ucred(ctx); - vtype = vnode_vtype(vp); - if ((vtype != VREG) && (vtype != VDIR)) { - printf("nfs_bioread: type %x unexpected\n", vtype); + if (vnode_vtype(vp) != VREG) { + printf("nfs_bioread: type %x unexpected\n", vnode_vtype(vp)); FSDBG_BOT(514, np, 0xd1e0016, 0, EINVAL); return (EINVAL); } /* - * For nfs, cache consistency can only be maintained approximately. + * For NFS, cache consistency can only be maintained approximately. * Although RFC1094 does not specify the criteria, the following is * believed to be compatible with the reference port. - * For nfs: - * If the file's modify time on the server has changed since the - * last read rpc or you have written to the file, - * you may have lost data cache consistency with the - * server, so flush all of the file's data out of the cache. - * Then force a getattr rpc to ensure that you have up to date - * attributes. + * + * If the file has changed since the last read RPC or you have + * written to the file, you may have lost data cache consistency + * with the server. So, check for a change, and flush all of the + * file's data out of the cache. * NB: This implies that cache data can be read when up to - * NFS_MAXATTRTIMEO seconds out of date. If you find that you need - * current attributes this could be forced by calling - * NATTRINVALIDATE() before the nfs_getattr() call. + * NFS_MAXATTRTIMO seconds out of date. If you find that you + * need current attributes, nfs_getattr() can be forced to fetch + * new attributes (via NATTRINVALIDATE() or NGA_UNCACHED). */ if (ISSET(np->n_flag, NUPDATESIZE)) nfs_data_update_size(np, 0); - if ((error = nfs_lock(np, NFS_NODE_LOCK_EXCLUSIVE))) { + if ((error = nfs_node_lock(np))) { FSDBG_BOT(514, np, 0xd1e0222, 0, error); return (error); } if (np->n_flag & NNEEDINVALIDATE) { np->n_flag &= ~NNEEDINVALIDATE; - nfs_unlock(np); - nfs_vinvalbuf(vp, V_SAVE|V_IGNORE_WRITEERR, ctx, 1); - if ((error = nfs_lock(np, NFS_NODE_LOCK_EXCLUSIVE))) { + nfs_node_unlock(np); + error = nfs_vinvalbuf(vp, V_SAVE|V_IGNORE_WRITEERR, ctx, 1); + if (!error) + error = nfs_node_lock(np); + if (error) { FSDBG_BOT(514, np, 0xd1e0322, 0, error); return (error); } } - if (np->n_flag & NMODIFIED) { - if (vtype == VDIR) { - nfs_invaldir(np); - nfs_unlock(np); - error = nfs_vinvalbuf(vp, V_SAVE, ctx, 1); - if (!error) - error = nfs_lock(np, NFS_NODE_LOCK_EXCLUSIVE); - if (error) { - FSDBG_BOT(514, np, 0xd1e0003, 0, error); - return (error); - } - } - NATTRINVALIDATE(np); - error = nfs_getattr(np, &nvattr, ctx, 1); - if (error) { - nfs_unlock(np); - FSDBG_BOT(514, np, 0xd1e0004, 0, error); - return (error); - } - if (vtype == VDIR) { - /* if directory changed, purge any name cache entries */ - if (NFS_CHANGED_NC(nfsvers, np, &nvattr)) - cache_purge(vp); - NFS_CHANGED_UPDATE_NC(nfsvers, np, &nvattr); - } - NFS_CHANGED_UPDATE(nfsvers, np, &nvattr); - } else { - error = nfs_getattr(np, &nvattr, ctx, 1); - if (error) { - nfs_unlock(np); - FSDBG_BOT(514, np, 0xd1e0005, 0, error); - return (error); - } - if (NFS_CHANGED(nfsvers, np, &nvattr)) { - if (vtype == VDIR) { - nfs_invaldir(np); - /* purge name cache entries */ - if (NFS_CHANGED_NC(nfsvers, np, &nvattr)) - cache_purge(vp); - } - nfs_unlock(np); - error = nfs_vinvalbuf(vp, V_SAVE, ctx, 1); - if (!error) - error = nfs_lock(np, NFS_NODE_LOCK_EXCLUSIVE); - if (error) { - FSDBG_BOT(514, np, 0xd1e0006, 0, error); - return (error); - } - if (vtype == VDIR) - NFS_CHANGED_UPDATE_NC(nfsvers, np, &nvattr); - NFS_CHANGED_UPDATE(nfsvers, np, &nvattr); - } + modified = (np->n_flag & NMODIFIED); + nfs_node_unlock(np); + /* nfs_getattr() will check changed and purge caches */ + error = nfs_getattr(np, &nvattr, ctx, modified ? NGA_UNCACHED : NGA_CACHED); + if (error) { + FSDBG_BOT(514, np, 0xd1e0004, 0, error); + return (error); } - nfs_unlock(np); + if (uio_resid(uio) == 0) { + FSDBG_BOT(514, np, 0xd1e0001, 0, 0); + return (0); + } + if (uio_offset(uio) < 0) { + FSDBG_BOT(514, np, 0xd1e0002, 0, EINVAL); + return (EINVAL); + } - if (vtype == VREG) { - if ((ioflag & IO_NOCACHE) && (uio_uio_resid(uio) < (2*biosize))) { - /* We have only a block or so to read, just do the rpc directly. */ - error = nfs_read_rpc(np, uio, ctx); - FSDBG_BOT(514, np, uio->uio_offset, uio_uio_resid(uio), error); - return (error); - } - /* - * set up readahead - which may be limited by: - * + current request length (for IO_NOCACHE) - * + readahead setting - * + file size - */ - if (nmp->nm_readahead > 0) { - off_t end = uio->uio_offset + uio_uio_resid(uio); - if (end > (off_t)np->n_size) - end = np->n_size; - rabn = uio->uio_offset / biosize; - maxrabn = (end - 1) / biosize; - if (!(ioflag & IO_NOCACHE) && - (!rabn || (rabn == np->n_lastread) || (rabn == (np->n_lastread+1)))) { - maxrabn += nmp->nm_readahead; - if ((maxrabn * biosize) >= (off_t)np->n_size) - maxrabn = ((off_t)np->n_size - 1)/biosize; - } - } else { - rabn = maxrabn = 0; - } + /* + * set up readahead - which may be limited by: + * + current request length (for IO_NOCACHE) + * + readahead setting + * + file size + */ + if (nmp->nm_readahead > 0) { + off_t end = uio_offset(uio) + uio_resid(uio); + if (end > (off_t)np->n_size) + end = np->n_size; + rabn = uio_offset(uio) / biosize; + maxrabn = (end - 1) / biosize; + nfs_node_lock_force(np); + if (!(ioflag & IO_NOCACHE) && + (!rabn || (rabn == np->n_lastread) || (rabn == (np->n_lastread+1)))) { + maxrabn += nmp->nm_readahead; + if ((maxrabn * biosize) >= (off_t)np->n_size) + maxrabn = ((off_t)np->n_size - 1)/biosize; + } + if (maxrabn < np->n_lastrahead) + np->n_lastrahead = -1; + if (rabn < np->n_lastrahead) + rabn = np->n_lastrahead + 1; + nfs_node_unlock(np); + } else { + rabn = maxrabn = 0; } do { - if (vtype == VREG) { - nfs_data_lock(np, NFS_NODE_LOCK_SHARED); - lbn = uio->uio_offset / biosize; + nfs_data_lock(np, NFS_DATA_LOCK_SHARED); + lbn = uio_offset(uio) / biosize; /* * Copy directly from any cached pages without grabbing the bufs. - * - * Note: for "nocache" reads, we don't copy directly from UBC - * because any cached pages will be for readahead buffers that - * need to be invalidated anyway before we finish this request. + * (If we are NOCACHE and we've issued readahead requests, we need + * to grab the NB_NCRDAHEAD bufs to drop them.) */ - if (!(ioflag & IO_NOCACHE) && - (uio->uio_segflg == UIO_USERSPACE32 || - uio->uio_segflg == UIO_USERSPACE64 || - uio->uio_segflg == UIO_USERSPACE)) { - // LP64todo - fix this! - int io_resid = uio_uio_resid(uio); - diff = np->n_size - uio->uio_offset; + if ((!(ioflag & IO_NOCACHE) || !readaheads) && + ((uio->uio_segflg == UIO_USERSPACE32 || + uio->uio_segflg == UIO_USERSPACE64 || + uio->uio_segflg == UIO_USERSPACE))) { + io_resid = uio_resid(uio); + diff = np->n_size - uio_offset(uio); if (diff < io_resid) io_resid = diff; if (io_resid > 0) { - error = cluster_copy_ubc_data(vp, uio, &io_resid, 0); + int count = (io_resid > INT_MAX) ? INT_MAX : io_resid; + error = cluster_copy_ubc_data(vp, uio, &count, 0); if (error) { nfs_data_unlock(np); - FSDBG_BOT(514, np, uio->uio_offset, 0xcacefeed, error); + FSDBG_BOT(514, np, uio_offset(uio), 0xcacefeed, error); return (error); } } /* count any biocache reads that we just copied directly */ - if (lbn != (uio->uio_offset / biosize)) { - OSAddAtomic((uio->uio_offset / biosize) - lbn, (SInt32*)&nfsstats.biocache_reads); - FSDBG(514, np, 0xcacefeed, uio->uio_offset, error); + if (lbn != (uio_offset(uio)/biosize)) { + OSAddAtomic((uio_offset(uio)/biosize) - lbn, &nfsstats.biocache_reads); + FSDBG(514, np, 0xcacefeed, uio_offset(uio), error); } } - lbn = uio->uio_offset / biosize; - on = uio->uio_offset % biosize; - np->n_lastread = (uio->uio_offset - 1) / biosize; + lbn = uio_offset(uio) / biosize; + on = uio_offset(uio) % biosize; + nfs_node_lock_force(np); + np->n_lastread = (uio_offset(uio) - 1) / biosize; + nfs_node_unlock(np); /* adjust readahead block number, if necessary */ if (rabn < lbn) @@ -2013,15 +1997,16 @@ nfs_bioread(nfsnode_t np, struct uio *uio, int ioflag, int *eofflag, vfs_context FSDBG_BOT(514, np, 0xd1e000b, 1, error); return (error); } + readaheads = 1; } - if ((uio_uio_resid(uio) <= 0) || (uio->uio_offset >= (off_t)np->n_size)) { + if ((uio_resid(uio) <= 0) || (uio_offset(uio) >= (off_t)np->n_size)) { nfs_data_unlock(np); - FSDBG_BOT(514, np, uio->uio_offset, uio_uio_resid(uio), 0xaaaaaaaa); + FSDBG_BOT(514, np, uio_offset(uio), uio_resid(uio), 0xaaaaaaaa); return (0); } - OSAddAtomic(1, (SInt32*)&nfsstats.biocache_reads); + OSAddAtomic(1, &nfsstats.biocache_reads); /* * If the block is in the cache and has the required data @@ -2030,9 +2015,9 @@ nfs_bioread(nfsnode_t np, struct uio *uio, int ioflag, int *eofflag, vfs_context * as required. */ again: - // LP64todo - fix this! - n = min((unsigned)(biosize - on), uio_uio_resid(uio)); - diff = np->n_size - uio->uio_offset; + io_resid = uio_resid(uio); + n = (io_resid > (biosize - on)) ? (biosize - on) : io_resid; + diff = np->n_size - uio_offset(uio); if (diff < n) n = diff; @@ -2055,11 +2040,9 @@ nfs_bioread(nfsnode_t np, struct uio *uio, int ioflag, int *eofflag, vfs_context SET(bp->nb_flags, NB_NOCACHE); goto flushbuffer; } - if (!ISSET(bp->nb_flags, NB_NCRDAHEAD)) { - CLR(bp->nb_flags, NB_CACHE); - bp->nb_valid = 0; - } else { + if (ISSET(bp->nb_flags, NB_NCRDAHEAD)) { CLR(bp->nb_flags, NB_NCRDAHEAD); + SET(bp->nb_flags, NB_NOCACHE); } } @@ -2127,7 +2110,7 @@ nfs_bioread(nfsnode_t np, struct uio *uio, int ioflag, int *eofflag, vfs_context if (!auio) { error = ENOMEM; } else { - uio_addiov(auio, CAST_USER_ADDR_T((bp->nb_data + firstpg * PAGE_SIZE)), + uio_addiov(auio, CAST_USER_ADDR_T(bp->nb_data + (firstpg * PAGE_SIZE)), ((lastpg - firstpg + 1) * PAGE_SIZE)); error = nfs_read_rpc(np, auio, ctx); } @@ -2162,6 +2145,8 @@ nfs_bioread(nfsnode_t np, struct uio *uio, int ioflag, int *eofflag, vfs_context SET(bp->nb_flags, NB_READ); CLR(bp->nb_flags, (NB_DONE | NB_ERROR | NB_INVAL)); error = nfs_buf_read(bp); + if (ioflag & IO_NOCACHE) + SET(bp->nb_flags, NB_NOCACHE); if (error) { nfs_data_unlock(np); nfs_buf_release(bp, 1); @@ -2176,154 +2161,25 @@ nfs_bioread(nfsnode_t np, struct uio *uio, int ioflag, int *eofflag, vfs_context if (diff < n) n = diff; } - if (n > 0) - NFS_BUF_MAP(bp); - } else if (vtype == VDIR) { - OSAddAtomic(1, (SInt32*)&nfsstats.biocache_readdirs); - error = nfs_lock(np, NFS_NODE_LOCK_SHARED); - if (error || (np->n_direofoffset && (uio->uio_offset >= np->n_direofoffset))) { - if (!error) - nfs_unlock(np); - if (eofflag) - *eofflag = 1; - FSDBG_BOT(514, np, 0xde0f0001, 0, 0); - return (0); - } - nfs_unlock(np); - lbn = uio->uio_offset / NFS_DIRBLKSIZ; - on = uio->uio_offset & (NFS_DIRBLKSIZ - 1); - error = nfs_buf_get(np, lbn, NFS_DIRBLKSIZ, thd, NBLK_READ, &bp); - if (error) { - FSDBG_BOT(514, np, 0xd1e0012, 0, error); - return (error); - } - if (!ISSET(bp->nb_flags, NB_CACHE)) { - SET(bp->nb_flags, NB_READ); - error = nfs_buf_readdir(bp, ctx); - if (error) - nfs_buf_release(bp, 1); - while (error == NFSERR_BAD_COOKIE) { - error = nfs_lock(np, NFS_NODE_LOCK_EXCLUSIVE); - if (!error) { - nfs_invaldir(np); - nfs_unlock(np); - } - error = nfs_vinvalbuf(vp, 0, ctx, 1); - /* - * Yuck! The directory has been modified on the - * server. The only way to get the block is by - * reading from the beginning to get all the - * offset cookies. - */ - for (tlbn = 0; tlbn <= lbn && !error; tlbn++) { - if ((error = nfs_lock(np, NFS_NODE_LOCK_SHARED))) - break; - if (np->n_direofoffset - && (tlbn * NFS_DIRBLKSIZ) >= np->n_direofoffset) { - nfs_unlock(np); - if (eofflag) - *eofflag = 1; - FSDBG_BOT(514, np, 0xde0f0002, 0, 0); - return (0); - } - nfs_unlock(np); - error = nfs_buf_get(np, tlbn, NFS_DIRBLKSIZ, thd, NBLK_READ, &bp); - if (error) { - FSDBG_BOT(514, np, 0xd1e0013, 0, error); - return (error); - } - if (!ISSET(bp->nb_flags, NB_CACHE)) { - SET(bp->nb_flags, NB_READ); - error = nfs_buf_readdir(bp, ctx); - /* - * no error + NB_INVAL == directory EOF, - * use the block. - */ - if (error == 0 && ISSET(bp->nb_flags, NB_INVAL)) { - if (eofflag) - *eofflag = 1; - break; - } - } - /* - * An error will throw away the block and the - * for loop will break out. If no error and this - * is not the block we want, we throw away the - * block and go for the next one via the for loop. - */ - if (error || (tlbn < lbn)) - nfs_buf_release(bp, 1); - } - } - /* - * The above while is repeated if we hit another cookie - * error. If we hit an error and it wasn't a cookie error, - * we give up. - */ - if (error) { - FSDBG_BOT(514, np, 0xd1e0014, 0, error); - return (error); - } - } - /* - * Make sure we use a signed variant of min() since - * the second term may be negative. - */ - // LP64todo - fix this! - n = lmin(uio_uio_resid(uio), bp->nb_validend - on); - /* - * We keep track of the directory eof in - * np->n_direofoffset and chop it off as an - * extra step right here. - */ - if ((error = nfs_lock(np, NFS_NODE_LOCK_SHARED))) { - FSDBG_BOT(514, np, 0xd1e0115, 0, error); - return (error); - } - if (np->n_direofoffset && - n > np->n_direofoffset - uio->uio_offset) - n = np->n_direofoffset - uio->uio_offset; - nfs_unlock(np); - /* - * Make sure that we return an integral number of entries so - * that any subsequent calls will start copying from the start - * of the next entry. - * - * If the current value of n has the last entry cut short, - * set n to copy everything up to the last entry instead. - */ if (n > 0) { - dp = bp->nb_data + on; - while (dp < (bp->nb_data + on + n)) { - direntp = (struct dirent *)dp; - dp += direntp->d_reclen; - } - if (dp > (bp->nb_data + on + n)) - n = (dp - direntp->d_reclen) - (bp->nb_data + on); + NFS_BUF_MAP(bp); + error = uiomove(bp->nb_data + on, n, uio); } - } - - if (n > 0) - error = uiomove(bp->nb_data + on, (int)n, uio); - if (vtype == VREG) { - if (ioflag & IO_NOCACHE) - SET(bp->nb_flags, NB_NOCACHE); nfs_buf_release(bp, 1); nfs_data_unlock(np); - np->n_lastread = (uio->uio_offset - 1) / biosize; - } else { - nfs_buf_release(bp, 1); - } - } while (error == 0 && uio_uio_resid(uio) > 0 && n > 0); - FSDBG_BOT(514, np, uio->uio_offset, uio_uio_resid(uio), error); + nfs_node_lock_force(np); + np->n_lastread = (uio_offset(uio) - 1) / biosize; + nfs_node_unlock(np); + } while (error == 0 && uio_resid(uio) > 0 && n > 0); + FSDBG_BOT(514, np, uio_offset(uio), uio_resid(uio), error); return (error); } /* * limit the number of outstanding async I/O writes */ -static int +int nfs_async_write_start(struct nfsmount *nmp) { int error = 0, slpflag = (nmp->nm_flag & NFSMNT_INT) ? PCATCH : 0; @@ -2343,7 +2199,7 @@ nfs_async_write_start(struct nfsmount *nmp) lck_mtx_unlock(&nmp->nm_lock); return (error); } -static void +void nfs_async_write_done(struct nfsmount *nmp) { if (nmp->nm_asyncwrites <= 0) @@ -2402,10 +2258,13 @@ nfs_buf_write(struct nfsbuf *bp) LIST_INSERT_HEAD(&np->n_cleanblkhd, bp, nb_vnbufs); lck_mtx_unlock(nfs_buf_mutex); } + nfs_node_lock_force(np); + np->n_numoutput++; + nfs_node_unlock(np); vnode_startwrite(NFSTOV(np)); if (p && p->p_stats) - OSIncrementAtomic(&p->p_stats->p_ru.ru_oublock); + OSIncrementAtomicLong(&p->p_stats->p_ru.ru_oublock); cred = bp->nb_wcred; if (!IS_VALID_CRED(cred) && ISSET(bp->nb_flags, NB_READ)) @@ -2454,10 +2313,10 @@ nfs_buf_write(struct nfsbuf *bp) } bp->nb_dirtyoff = bp->nb_dirtyend = 0; CLR(bp->nb_flags, NB_NEEDCOMMIT); - nfs_lock(np, NFS_NODE_LOCK_FORCE); + nfs_node_lock_force(np); np->n_needcommitcnt--; CHECK_NEEDCOMMITCNT(np); - nfs_unlock(np); + nfs_node_unlock(np); } if (!error && (bp->nb_dirtyend > 0)) { /* sanity check the dirty range */ @@ -2486,7 +2345,7 @@ nfs_buf_write(struct nfsbuf *bp) dend = round_page_32(dend); /* try to expand write range to include trailing dirty pages */ if (!(dend & PAGE_MASK)) - while ((dend < bp->nb_bufsize) && NBPGDIRTY(bp, dend / PAGE_SIZE)) + while ((dend < (int)bp->nb_bufsize) && NBPGDIRTY(bp, dend / PAGE_SIZE)) dend += PAGE_SIZE; /* make sure to keep dend clipped to EOF */ if ((NBOFF(bp) + dend) > (off_t) np->n_size) @@ -2513,7 +2372,7 @@ nfs_buf_write(struct nfsbuf *bp) bp->nb_offio = doff; bp->nb_endio = dend; - OSAddAtomic(1, (SInt32 *)&nfsstats.write_bios); + OSAddAtomic(1, &nfsstats.write_bios); SET(bp->nb_flags, NB_WRITEINPROG); error = nfs_buf_write_rpc(bp, iomode, thd, cred); @@ -2546,12 +2405,12 @@ nfs_buf_write(struct nfsbuf *bp) if ((np->n_flag & NNEEDINVALIDATE) && !(np->n_bflag & (NBINVALINPROG|NBFLUSHINPROG))) { int invalidate = 0; - nfs_lock(np, NFS_NODE_LOCK_FORCE); + nfs_node_lock_force(np); if (np->n_flag & NNEEDINVALIDATE) { invalidate = 1; np->n_flag &= ~NNEEDINVALIDATE; } - nfs_unlock(np); + nfs_node_unlock(np); if (invalidate) { /* * There was a write error and we need to @@ -2603,19 +2462,19 @@ nfs_buf_write_finish(struct nfsbuf *bp, thread_t thd, kauth_cred_t cred) /* manage needcommit state */ if (!error && (bp->nb_commitlevel == NFS_WRITE_UNSTABLE)) { if (!ISSET(bp->nb_flags, NB_NEEDCOMMIT)) { - nfs_lock(np, NFS_NODE_LOCK_FORCE); + nfs_node_lock_force(np); np->n_needcommitcnt++; - nfs_unlock(np); + nfs_node_unlock(np); SET(bp->nb_flags, NB_NEEDCOMMIT); } /* make sure nb_dirtyoff/nb_dirtyend reflect actual range written */ bp->nb_dirtyoff = bp->nb_offio; bp->nb_dirtyend = bp->nb_endio; } else if (ISSET(bp->nb_flags, NB_NEEDCOMMIT)) { - nfs_lock(np, NFS_NODE_LOCK_FORCE); + nfs_node_lock_force(np); np->n_needcommitcnt--; CHECK_NEEDCOMMITCNT(np); - nfs_unlock(np); + nfs_node_unlock(np); CLR(bp->nb_flags, NB_NEEDCOMMIT); } @@ -2664,11 +2523,11 @@ nfs_buf_write_finish(struct nfsbuf *bp, thread_t thd, kauth_cred_t cred) * buffer busy. Set a flag to do it after releasing * the buffer. */ - nfs_lock(np, NFS_NODE_LOCK_FORCE); + nfs_node_lock_force(np); np->n_error = error; np->n_flag |= (NWRITEERR | NNEEDINVALIDATE); NATTRINVALIDATE(np); - nfs_unlock(np); + nfs_node_unlock(np); } /* clear the dirty range */ bp->nb_dirtyoff = bp->nb_dirtyend = 0; @@ -2694,27 +2553,21 @@ nfs_buf_write_dirty_pages(struct nfsbuf *bp, thread_t thd, kauth_cred_t cred) int error = 0, commit, iomode, iomode2, len, pg, count, npages, off; uint32_t dirty = bp->nb_dirty; uint64_t wverf; - struct uio uio; - struct iovec_32 io; + uio_t auio; + char uio_buf [ UIO_SIZEOF(1) ]; if (!bp->nb_dirty) return (0); /* there are pages marked dirty that need to be written out */ - OSAddAtomic(1, (SInt32 *)&nfsstats.write_bios); + OSAddAtomic(1, &nfsstats.write_bios); NFS_BUF_MAP(bp); SET(bp->nb_flags, NB_WRITEINPROG); npages = bp->nb_bufsize / PAGE_SIZE; iomode = NFS_WRITE_UNSTABLE; - uio.uio_iovs.iov32p = &io; - uio.uio_iovcnt = 1; - uio.uio_rw = UIO_WRITE; -#if 1 /* LP64todo - can't use new segment flags until the drivers are ready */ - uio.uio_segflg = UIO_SYSSPACE; -#else - uio.uio_segflg = UIO_SYSSPACE32; -#endif + auio = uio_createwithbuffer(1, 0, UIO_SYSSPACE, UIO_WRITE, + &uio_buf, sizeof(uio_buf)); again: dirty = bp->nb_dirty; @@ -2734,11 +2587,9 @@ nfs_buf_write_dirty_pages(struct nfsbuf *bp, thread_t thd, kauth_cred_t cred) len -= (NBOFF(bp) + off + len) - np->n_size; if (len > 0) { iomode2 = iomode; - io.iov_len = len; - uio_uio_resid_set(&uio, io.iov_len); - uio.uio_offset = NBOFF(bp) + off; - io.iov_base = (uintptr_t) bp->nb_data + off; - error = nfs_write_rpc2(np, &uio, thd, cred, &iomode2, &bp->nb_verf); + uio_reset(auio, NBOFF(bp) + off, UIO_SYSSPACE, UIO_WRITE); + uio_addiov(auio, CAST_USER_ADDR_T(bp->nb_data + off), len); + error = nfs_write_rpc2(np, auio, thd, cred, &iomode2, &bp->nb_verf); if (error) break; if (iomode2 < commit) /* Retain the lowest commitment level returned. */ @@ -2784,11 +2635,12 @@ nfs_buf_write_rpc(struct nfsbuf *bp, int iomode, thread_t thd, kauth_cred_t cred struct nfsmount *nmp; nfsnode_t np = bp->nb_np; int error = 0, nfsvers, async; - int offset, length, nmwsize, nrpcs, len; + int offset, nrpcs; + uint32_t nmwsize, length, len; struct nfsreq *req; struct nfsreq_cbinfo cb; - struct uio uio; - struct iovec_32 io; + uio_t auio; + char uio_buf [ UIO_SIZEOF(1) ]; nmp = NFSTONMP(np); if (!nmp) { @@ -2816,18 +2668,9 @@ nfs_buf_write_rpc(struct nfsbuf *bp, int iomode, thread_t thd, kauth_cred_t cred return (error); } - uio.uio_iovs.iov32p = &io; - uio.uio_iovcnt = 1; - uio.uio_rw = UIO_WRITE; -#if 1 /* LP64todo - can't use new segment flags until the drivers are ready */ - uio.uio_segflg = UIO_SYSSPACE; -#else - uio.uio_segflg = UIO_SYSSPACE32; -#endif - io.iov_len = length; - uio_uio_resid_set(&uio, io.iov_len); - uio.uio_offset = NBOFF(bp) + offset; - io.iov_base = (uintptr_t) bp->nb_data + offset; + auio = uio_createwithbuffer(1, NBOFF(bp) + offset, UIO_SYSSPACE, + UIO_WRITE, &uio_buf, sizeof(uio_buf)); + uio_addiov(auio, CAST_USER_ADDR_T(bp->nb_data + offset), length); bp->nb_rpcs = nrpcs = (length + nmwsize - 1) / nmwsize; if (async && (nrpcs > 1)) { @@ -2844,10 +2687,12 @@ nfs_buf_write_rpc(struct nfsbuf *bp, int iomode, thread_t thd, kauth_cred_t cred len = (length > nmwsize) ? nmwsize : length; cb.rcb_args[0] = offset; cb.rcb_args[1] = len; + if (nmp->nm_vers >= NFS_VER4) + cb.rcb_args[2] = nmp->nm_stategenid; if (async && ((error = nfs_async_write_start(nmp)))) break; req = NULL; - error = nmp->nm_funcs->nf_write_rpc_async(np, &uio, len, thd, cred, + error = nmp->nm_funcs->nf_write_rpc_async(np, auio, len, thd, cred, iomode, &cb, &req); if (error) { if (async) @@ -2909,8 +2754,8 @@ nfs_buf_write_rpc_finish(struct nfsreq *req) nfsnode_t np; thread_t thd; kauth_cred_t cred; - struct uio uio; - struct iovec_32 io; + uio_t auio; + char uio_buf [ UIO_SIZEOF(1) ]; finish: np = req->r_np; @@ -2944,7 +2789,21 @@ nfs_buf_write_rpc_finish(struct nfsreq *req) kauth_cred_unref(&cred); return; } - + if ((nmp->nm_vers >= NFS_VER4) && nfs_mount_state_error_should_restart(error) && !ISSET(bp->nb_flags, NB_ERROR)) { + lck_mtx_lock(&nmp->nm_lock); + if ((error != NFSERR_GRACE) && (cb.rcb_args[2] == nmp->nm_stategenid) && !(nmp->nm_state & NFSSTA_RECOVER)) { + printf("nfs_buf_write_rpc_finish: error %d, initiating recovery\n", error); + nmp->nm_state |= NFSSTA_RECOVER; + nfs_mount_sock_thread_wake(nmp); + } + lck_mtx_unlock(&nmp->nm_lock); + if (error == NFSERR_GRACE) + tsleep(&nmp->nm_state, (PZERO-1), "nfsgrace", 2*hz); + if (!(error = nfs_mount_state_wait_for_recovery(nmp))) { + rlen = 0; + goto writeagain; + } + } if (error) { SET(bp->nb_flags, NB_ERROR); bp->nb_error = error; @@ -2979,26 +2838,21 @@ nfs_buf_write_rpc_finish(struct nfsreq *req) * (Don't bother if the buffer hit an error or stale wverf.) */ if (((int)rlen < length) && !(bp->nb_flags & (NB_STALEWVERF|NB_ERROR))) { +writeagain: offset += rlen; length -= rlen; - uio.uio_iovs.iov32p = &io; - uio.uio_iovcnt = 1; - uio.uio_rw = UIO_WRITE; -#if 1 /* LP64todo - can't use new segment flags until the drivers are ready */ - uio.uio_segflg = UIO_SYSSPACE; -#else - uio.uio_segflg = UIO_SYSSPACE32; -#endif - io.iov_len = length; - uio_uio_resid_set(&uio, io.iov_len); - uio.uio_offset = NBOFF(bp) + offset; - io.iov_base = (uintptr_t) bp->nb_data + offset; + auio = uio_createwithbuffer(1, NBOFF(bp) + offset, UIO_SYSSPACE, + UIO_WRITE, &uio_buf, sizeof(uio_buf)); + uio_addiov(auio, CAST_USER_ADDR_T(bp->nb_data + offset), length); cb.rcb_args[0] = offset; cb.rcb_args[1] = length; + if (nmp->nm_vers >= NFS_VER4) + cb.rcb_args[2] = nmp->nm_stategenid; - error = nmp->nm_funcs->nf_write_rpc_async(np, &uio, length, thd, cred, + // XXX iomode should really match the original request + error = nmp->nm_funcs->nf_write_rpc_async(np, auio, length, thd, cred, NFS_WRITE_FILESYNC, &cb, &wreq); if (!error) { if (IS_VALID_CRED(cred)) @@ -3006,6 +2860,7 @@ nfs_buf_write_rpc_finish(struct nfsreq *req) if (!cb.rcb_func) { /* if !async we'll need to wait for this RPC to finish */ req = wreq; + wreq = NULL; goto finish; } /* @@ -3059,7 +2914,7 @@ int nfs_flushcommits(nfsnode_t np, int nowait) { struct nfsmount *nmp; - struct nfsbuf *bp; + struct nfsbuf *bp, *prevlbp, *lbp; struct nfsbuflists blist, commitlist; int error = 0, retv, wcred_set, flags, dirty; u_quad_t off, endoff, toff; @@ -3075,11 +2930,11 @@ nfs_flushcommits(nfsnode_t np, int nowait) * and the commit rpc is done. */ if (!LIST_EMPTY(&np->n_dirtyblkhd)) { - error = nfs_lock(np, NFS_NODE_LOCK_EXCLUSIVE); + error = nfs_node_lock(np); if (error) goto done; np->n_flag |= NMODIFIED; - nfs_unlock(np); + nfs_node_unlock(np); } off = (u_quad_t)-1; @@ -3116,40 +2971,13 @@ nfs_flushcommits(nfsnode_t np, int nowait) continue; } nfs_buf_remfree(bp); - lck_mtx_unlock(nfs_buf_mutex); - /* - * we need a upl to see if the page has been - * dirtied (think mmap) since the unstable write, and - * also to prevent vm from paging it during our commit rpc - */ - if (!ISSET(bp->nb_flags, NB_PAGELIST)) { - retv = nfs_buf_upl_setup(bp); - if (retv) { - /* unable to create upl */ - /* vm object must no longer exist */ - /* this could be fatal if we need */ - /* to write the data again, we'll see... */ - printf("nfs_flushcommits: upl create failed %d\n", retv); - bp->nb_valid = bp->nb_dirty = 0; - } - } - nfs_buf_upl_check(bp); - lck_mtx_lock(nfs_buf_mutex); + + /* buffer UPLs will be grabbed *in order* below */ FSDBG(557, bp, bp->nb_flags, bp->nb_valid, bp->nb_dirty); FSDBG(557, bp->nb_validoff, bp->nb_validend, bp->nb_dirtyoff, bp->nb_dirtyend); - /* - * We used to check for dirty pages here; if there were any - * we'd abort the commit and force the entire buffer to be - * written again. - * - * Instead of doing that, we now go ahead and commit the dirty - * range, and then leave the buffer around with dirty pages - * that will be written out later. - */ - /* * Work out if all buffers are using the same cred * so we can deal with them all with one commit. @@ -3168,14 +2996,23 @@ nfs_flushcommits(nfsnode_t np, int nowait) SET(bp->nb_flags, NB_WRITEINPROG); /* - * A list of these buffers is kept so that the - * second loop knows which buffers have actually - * been committed. This is necessary, since there - * may be a race between the commit rpc and new - * uncommitted writes on the file. + * Add this buffer to the list of buffers we are committing. + * Buffers are inserted into the list in ascending order so that + * we can take the UPLs in order after the list is complete. */ + prevlbp = NULL; + LIST_FOREACH(lbp, &commitlist, nb_vnbufs) { + if (bp->nb_lblkno < lbp->nb_lblkno) + break; + prevlbp = lbp; + } LIST_REMOVE(bp, nb_vnbufs); - LIST_INSERT_HEAD(&commitlist, bp, nb_vnbufs); + if (prevlbp) + LIST_INSERT_AFTER(prevlbp, bp, nb_vnbufs); + else + LIST_INSERT_HEAD(&commitlist, bp, nb_vnbufs); + + /* update commit range start, end */ toff = NBOFF(bp) + bp->nb_dirtyoff; if (toff < off) off = toff; @@ -3192,6 +3029,28 @@ nfs_flushcommits(nfsnode_t np, int nowait) goto done; } + /* + * We need a UPL to prevent others from accessing the buffers during + * our commit RPC(s). + * + * We used to also check for dirty pages here; if there were any we'd + * abort the commit and force the entire buffer to be written again. + * Instead of doing that, we just go ahead and commit the dirty range, + * and then leave the buffer around with dirty pages that will be + * written out later. + */ + LIST_FOREACH(bp, &commitlist, nb_vnbufs) { + if (!ISSET(bp->nb_flags, NB_PAGELIST)) { + retv = nfs_buf_upl_setup(bp); + if (retv) { + /* Unable to create the UPL, the VM object probably no longer exists. */ + printf("nfs_flushcommits: upl create failed %d\n", retv); + bp->nb_valid = bp->nb_dirty = 0; + } + } + nfs_buf_upl_check(bp); + } + /* * Commit data on the server, as required. * If all bufs are using the same wcred, then use that with @@ -3227,11 +3086,11 @@ nfs_flushcommits(nfsnode_t np, int nowait) while ((bp = LIST_FIRST(&commitlist))) { LIST_REMOVE(bp, nb_vnbufs); FSDBG(557, bp, retv, bp->nb_flags, bp->nb_dirty); - nfs_lock(np, NFS_NODE_LOCK_FORCE); + nfs_node_lock_force(np); CLR(bp->nb_flags, (NB_NEEDCOMMIT | NB_WRITEINPROG)); np->n_needcommitcnt--; CHECK_NEEDCOMMITCNT(np); - nfs_unlock(np); + nfs_node_unlock(np); if (retv) { /* move back to dirty list */ @@ -3242,6 +3101,9 @@ nfs_flushcommits(nfsnode_t np, int nowait) continue; } + nfs_node_lock_force(np); + np->n_numoutput++; + nfs_node_unlock(np); vnode_startwrite(NFSTOV(np)); if (ISSET(bp->nb_flags, NB_DELWRI)) { lck_mtx_lock(nfs_buf_mutex); @@ -3303,16 +3165,16 @@ nfs_flush(nfsnode_t np, int waitfor, thread_t thd, int ignore_writeerr) slpflag = PCATCH; if (!LIST_EMPTY(&np->n_dirtyblkhd)) { - nfs_lock(np, NFS_NODE_LOCK_FORCE); + nfs_node_lock_force(np); np->n_flag |= NMODIFIED; - nfs_unlock(np); + nfs_node_unlock(np); } lck_mtx_lock(nfs_buf_mutex); while (np->n_bflag & NBFLUSHINPROG) { np->n_bflag |= NBFLUSHWANT; - error = msleep(&np->n_bflag, nfs_buf_mutex, slpflag, "nfs_flush", NULL); - if (error) { + msleep(&np->n_bflag, nfs_buf_mutex, slpflag, "nfs_flush", NULL); + if ((error = nfs_sigintr(NFSTONMP(np), NULL, thd, 0))) { lck_mtx_unlock(nfs_buf_mutex); goto out; } @@ -3339,7 +3201,7 @@ nfs_flush(nfsnode_t np, int waitfor, thread_t thd, int ignore_writeerr) while ((bp = LIST_FIRST(&blist))) { LIST_REMOVE(bp, nb_vnbufs); LIST_INSERT_HEAD(&np->n_dirtyblkhd, bp, nb_vnbufs); - flags = (passone || (waitfor != MNT_WAIT)) ? NBAC_NOWAIT : 0; + flags = (passone || !(waitfor == MNT_WAIT || waitfor == MNT_DWAIT)) ? NBAC_NOWAIT : 0; if (flags != NBAC_NOWAIT) nfs_buf_refget(bp); while ((error = nfs_buf_acquire(bp, flags, slpflag, slptimeo))) { @@ -3379,7 +3241,7 @@ nfs_flush(nfsnode_t np, int waitfor, thread_t thd, int ignore_writeerr) continue; } FSDBG(525, bp, passone, bp->nb_lflags, bp->nb_flags); - if ((passone || (waitfor != MNT_WAIT)) && + if ((passone || !(waitfor == MNT_WAIT || waitfor == MNT_DWAIT)) && ISSET(bp->nb_flags, NB_NEEDCOMMIT)) { nfs_buf_drop(bp); continue; @@ -3387,10 +3249,10 @@ nfs_flush(nfsnode_t np, int waitfor, thread_t thd, int ignore_writeerr) nfs_buf_remfree(bp); lck_mtx_unlock(nfs_buf_mutex); if (ISSET(bp->nb_flags, NB_ERROR)) { - nfs_lock(np, NFS_NODE_LOCK_FORCE); + nfs_node_lock_force(np); np->n_error = bp->nb_error ? bp->nb_error : EIO; np->n_flag |= NWRITEERR; - nfs_unlock(np); + nfs_node_unlock(np); nfs_buf_release(bp, 1); lck_mtx_lock(nfs_buf_mutex); continue; @@ -3407,7 +3269,7 @@ nfs_flush(nfsnode_t np, int waitfor, thread_t thd, int ignore_writeerr) } lck_mtx_unlock(nfs_buf_mutex); - if (waitfor == MNT_WAIT) { + if (waitfor == MNT_WAIT || waitfor == MNT_DWAIT) { while ((error = vnode_waitforwrites(NFSTOV(np), 0, slpflag, slptimeo, "nfsflush"))) { error2 = nfs_sigintr(NFSTONMP(np), NULL, thd, 0); if (error2) { @@ -3432,30 +3294,45 @@ nfs_flush(nfsnode_t np, int waitfor, thread_t thd, int ignore_writeerr) if (passone) { passone = 0; if (!LIST_EMPTY(&np->n_dirtyblkhd)) { - nfs_lock(np, NFS_NODE_LOCK_FORCE); + nfs_node_lock_force(np); np->n_flag |= NMODIFIED; - nfs_unlock(np); + nfs_node_unlock(np); } lck_mtx_lock(nfs_buf_mutex); goto again; } - if (waitfor == MNT_WAIT) { + if (waitfor == MNT_WAIT || waitfor == MNT_DWAIT) { if (!LIST_EMPTY(&np->n_dirtyblkhd)) { - nfs_lock(np, NFS_NODE_LOCK_FORCE); + nfs_node_lock_force(np); np->n_flag |= NMODIFIED; - nfs_unlock(np); + nfs_node_unlock(np); } lck_mtx_lock(nfs_buf_mutex); if (!LIST_EMPTY(&np->n_dirtyblkhd)) goto again; lck_mtx_unlock(nfs_buf_mutex); - nfs_lock(np, NFS_NODE_LOCK_FORCE); - /* if we have no dirty blocks, we can clear the modified flag */ - if (!np->n_wrbusy) + nfs_node_lock_force(np); + /* + * OK, it looks like there are no dirty blocks. If we have no + * writes in flight and no one in the write code, we can clear + * the modified flag. In order to make sure we see the latest + * attributes and size, we also invalidate the attributes and + * advance the attribute cache XID to guarantee that attributes + * newer than our clearing of NMODIFIED will get loaded next. + * (If we don't do this, it's possible for the flush's final + * write/commit (xid1) to be executed in parallel with a subsequent + * getattr request (xid2). The getattr could return attributes + * from *before* the write/commit completed but the stale attributes + * would be preferred because of the xid ordering.) + */ + if (!np->n_wrbusy && !np->n_numoutput) { np->n_flag &= ~NMODIFIED; + NATTRINVALIDATE(np); + nfs_get_xid(&np->n_xid); + } } else { - nfs_lock(np, NFS_NODE_LOCK_FORCE); + nfs_node_lock_force(np); } FSDBG(526, np->n_flag, np->n_error, 0, 0); @@ -3463,7 +3340,7 @@ nfs_flush(nfsnode_t np, int waitfor, thread_t thd, int ignore_writeerr) error = np->n_error; np->n_flag &= ~NWRITEERR; } - nfs_unlock(np); + nfs_node_unlock(np); done: lck_mtx_lock(nfs_buf_mutex); flags = np->n_bflag; @@ -3480,7 +3357,7 @@ nfs_flush(nfsnode_t np, int waitfor, thread_t thd, int ignore_writeerr) * Flush out and invalidate all buffers associated with a vnode. * Called with the underlying object locked. */ -static int +int nfs_vinvalbuf_internal( nfsnode_t np, int flags, @@ -3580,10 +3457,9 @@ nfs_vinvalbuf_internal( // Note: bp has been released if (error) { FSDBG(554, bp, 0xd00dee, 0xbad, error); - nfs_lock(np, NFS_NODE_LOCK_FORCE); + nfs_node_lock_force(np); np->n_error = error; np->n_flag |= NWRITEERR; - nfs_unlock(np); /* * There was a write error and we need to * invalidate attrs to sync with server. @@ -3591,6 +3467,18 @@ nfs_vinvalbuf_internal( * we may no longer know the correct size) */ NATTRINVALIDATE(np); + nfs_node_unlock(np); + if (error == EINTR) { + /* + * Abort on EINTR. If we don't, we could + * be stuck in this loop forever because + * the buffer will continue to stay dirty. + */ + lck_mtx_lock(nfs_buf_mutex); + nfs_buf_itercomplete(np, &blist, list); + lck_mtx_unlock(nfs_buf_mutex); + return (error); + } error = 0; } lck_mtx_lock(nfs_buf_mutex); @@ -3607,11 +3495,12 @@ nfs_vinvalbuf_internal( if (!LIST_EMPTY(&(np)->n_dirtyblkhd) || !LIST_EMPTY(&(np)->n_cleanblkhd)) panic("nfs_vinvalbuf: flush/inval failed"); lck_mtx_unlock(nfs_buf_mutex); - if (!(flags & V_SAVE)) { - nfs_lock(np, NFS_NODE_LOCK_FORCE); + nfs_node_lock_force(np); + if (!(flags & V_SAVE)) np->n_flag &= ~NMODIFIED; - nfs_unlock(np); - } + if (vnode_vtype(NFSTOV(np)) == VREG) + np->n_lastrahead = -1; + nfs_node_unlock(np); NFS_BUF_FREEUP(); return (0); } @@ -3651,8 +3540,8 @@ nfs_vinvalbuf2(vnode_t vp, int flags, thread_t thd, kauth_cred_t cred, int intrf lck_mtx_lock(nfs_buf_mutex); while (np->n_bflag & NBINVALINPROG) { np->n_bflag |= NBINVALWANT; - error = msleep(&np->n_bflag, nfs_buf_mutex, slpflag, "nfs_vinvalbuf", NULL); - if (error) { + msleep(&np->n_bflag, nfs_buf_mutex, slpflag, "nfs_vinvalbuf", NULL); + if ((error = nfs_sigintr(VTONMP(vp), NULL, thd, 0))) { lck_mtx_unlock(nfs_buf_mutex); return (error); } @@ -3763,7 +3652,7 @@ nfs_asyncio_resend(struct nfsreq *req) return; nfs_gss_clnt_rpcdone(req); lck_mtx_lock(&nmp->nm_lock); - if (req->r_rchain.tqe_next == NFSREQNOLIST) { + if (!(req->r_flags & R_RESENDQ)) { TAILQ_INSERT_TAIL(&nmp->nm_resendq, req, r_rchain); req->r_flags |= R_RESENDQ; } @@ -3772,64 +3661,29 @@ nfs_asyncio_resend(struct nfsreq *req) } /* - * Read an NFS buffer for a directory. + * Read directory data into a buffer. + * + * Buffer will be filled (unless EOF is hit). + * Buffers after this one may also be completely/partially filled. */ int nfs_buf_readdir(struct nfsbuf *bp, vfs_context_t ctx) { - nfsnode_t np; - vnode_t vp; - struct nfsmount *nmp; - int error = 0, nfsvers; - struct uio uio; - struct iovec_32 io; + nfsnode_t np = bp->nb_np; + struct nfsmount *nmp = NFSTONMP(np); + int error = 0; - np = bp->nb_np; - vp = NFSTOV(np); - nmp = VTONMP(vp); - nfsvers = nmp->nm_vers; - uio.uio_iovs.iov32p = &io; - uio.uio_iovcnt = 1; -#if 1 /* LP64todo - can't use new segment flags until the drivers are ready */ - uio.uio_segflg = UIO_SYSSPACE; -#else - uio.uio_segflg = UIO_SYSSPACE32; -#endif - - /* sanity check */ - if (ISSET(bp->nb_flags, NB_DONE)) - CLR(bp->nb_flags, NB_DONE); + if (!nmp) + return (ENXIO); - uio.uio_rw = UIO_READ; - io.iov_len = bp->nb_bufsize; - uio_uio_resid_set(&uio, io.iov_len); - io.iov_base = (uintptr_t) bp->nb_data; - uio.uio_offset = NBOFF(bp); - - OSAddAtomic(1, (SInt32*)&nfsstats.readdir_bios); - if (nfsvers < NFS_VER4) { - if (nmp->nm_flag & NFSMNT_RDIRPLUS) { - error = nfs3_readdirplus_rpc(np, &uio, ctx); - if (error == NFSERR_NOTSUPP) { - lck_mtx_lock(&nmp->nm_lock); - nmp->nm_flag &= ~NFSMNT_RDIRPLUS; - lck_mtx_unlock(&nmp->nm_lock); - } - } - if (!(nmp->nm_flag & NFSMNT_RDIRPLUS)) - error = nfs3_readdir_rpc(np, &uio, ctx); - } else { - error = nfs4_readdir_rpc(np, &uio, ctx); - } - if (error) { + if (nmp->nm_vers < NFS_VER4) + error = nfs3_readdir_rpc(np, bp, ctx); + else + error = nfs4_readdir_rpc(np, bp, ctx); + + if (error && (error != NFSERR_DIRBUFDROPPED)) { SET(bp->nb_flags, NB_ERROR); bp->nb_error = error; - } else { - bp->nb_validoff = 0; - bp->nb_validend = uio.uio_offset - NBOFF(bp); - bp->nb_valid = (1 << (round_page_32(bp->nb_validend)/PAGE_SIZE)) - 1; } - - nfs_buf_iodone(bp); return (error); } diff --git a/bsd/nfs/nfs_boot.c b/bsd/nfs/nfs_boot.c index cfe0d5df6..33bc25128 100644 --- a/bsd/nfs/nfs_boot.c +++ b/bsd/nfs/nfs_boot.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2007 Apple Inc. All rights reserved. + * Copyright (c) 2000-2008 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -167,7 +167,7 @@ static int bp_getfile(struct sockaddr_in *bpsin, const char *key, /* mountd RPC */ static int md_mount(struct sockaddr_in *mdsin, char *path, int v3, int sotype, - u_char *fhp, u_long *fhlenp); + u_char *fhp, u_int32_t *fhlenp); /* other helpers */ static int get_file_handle(struct nfs_dlmount *ndmntp); @@ -216,11 +216,17 @@ nfs_boot_init(struct nfs_diskless *nd) error = ENOMEM; goto failed; } + MALLOC_ZONE(nd->nd_root.ndm_mntfrom, char *, MAXPATHLEN, M_NAMEI, M_WAITOK); + if (!nd->nd_root.ndm_mntfrom) { + printf("nfs_boot: can't allocate root mntfrom buffer\n"); + error = ENOMEM; + goto failed; + } sin_p = &nd->nd_root.ndm_saddr; bzero((caddr_t)sin_p, sizeof(*sin_p)); sin_p->sin_len = sizeof(*sin_p); sin_p->sin_family = AF_INET; - if (netboot_rootpath(&sin_p->sin_addr, nd->nd_root.ndm_host, + if (netboot_rootpath(&sin_p->sin_addr, nd->nd_root.ndm_host, sizeof(nd->nd_root.ndm_host), nd->nd_root.ndm_path, MAXPATHLEN) == TRUE) { do_bpgetfile = FALSE; @@ -270,6 +276,12 @@ nfs_boot_init(struct nfs_diskless *nd) error = ENOMEM; goto failed; } + MALLOC_ZONE(nd->nd_private.ndm_mntfrom, char *, MAXPATHLEN, M_NAMEI, M_WAITOK); + if (!nd->nd_private.ndm_mntfrom) { + printf("nfs_boot: can't allocate private host buffer\n"); + error = ENOMEM; + goto failed; + } error = bp_getfile(&bp_sin, "private", &nd->nd_private.ndm_saddr, nd->nd_private.ndm_host, @@ -357,10 +369,12 @@ get_file_handle(ndmntp) return (error); /* Construct remote path (for getmntinfo(3)) */ - dp = ndmntp->ndm_host; - endp = dp + MNAMELEN - 1; - dp += strlen(dp); - *dp++ = ':'; + dp = ndmntp->ndm_mntfrom; + endp = dp + MAXPATHLEN - 1; + for (sp = ndmntp->ndm_host; *sp && dp < endp;) + *dp++ = *sp++; + if (dp < endp) + *dp++ = ':'; for (sp = ndmntp->ndm_path; *sp && dp < endp;) *dp++ = *sp++; *dp = '\0'; @@ -399,7 +413,7 @@ mbuf_get_with_len(size_t msg_len, mbuf_t *m) * String representation for RPC. */ struct rpc_string { - u_long len; /* length without null or padding */ + u_int32_t len; /* length without null or padding */ u_char data[4]; /* data (longer, of course) */ /* data is padded to a long-word boundary */ }; @@ -408,11 +422,11 @@ struct rpc_string { /* * Inet address in RPC messages - * (Note, really four longs, NOT chars. Blech.) + * (Note, really four 32-bit ints, NOT chars. Blech.) */ struct bp_inaddr { - u_long atype; - long addr[4]; + u_int32_t atype; + int32_t addr[4]; }; @@ -439,10 +453,10 @@ bp_whoami(bpsin, my_ip, gw_ip) { /* RPC structures for PMAPPROC_CALLIT */ struct whoami_call { - u_long call_prog; - u_long call_vers; - u_long call_proc; - u_long call_arglen; + u_int32_t call_prog; + u_int32_t call_vers; + u_int32_t call_proc; + u_int32_t call_arglen; struct bp_inaddr call_ia; } *call; @@ -453,7 +467,7 @@ bp_whoami(bpsin, my_ip, gw_ip) int error; size_t msg_len, cn_len, dn_len; u_char *p; - long *lp; + int32_t *lp; /* * Get message buffer of sufficient size. @@ -629,7 +643,7 @@ bp_getfile(bpsin, key, md_sin, serv_name, pathname) sn_len = ntohl(str->len); if (msg_len < sn_len) goto bad; - if (sn_len >= MNAMELEN) + if (sn_len >= MAXHOSTNAMELEN) goto bad; bcopy(str->data, serv_name, sn_len); serv_name[sn_len] = '\0'; @@ -689,13 +703,13 @@ md_mount(mdsin, path, v3, sotype, fhp, fhlenp) int v3; int sotype; u_char *fhp; - u_long *fhlenp; + u_int32_t *fhlenp; { /* The RPC structures */ struct rpc_string *str; struct rdata { - u_long errno; - u_char data[NFSX_V3FHMAX + sizeof(u_long)]; + u_int32_t errno; + u_char data[NFSX_V3FHMAX + sizeof(u_int32_t)]; } *rdata; mbuf_t m; int error, mlen, slen; @@ -738,25 +752,25 @@ md_mount(mdsin, path, v3, sotype, fhp, fhlenp) * + a v3 filehandle length + a v3 filehandle */ mlen = mbuf_len(m); - if (mlen < (int)sizeof(u_long)) + if (mlen < (int)sizeof(u_int32_t)) goto bad; rdata = mbuf_data(m); error = ntohl(rdata->errno); if (error) goto out; if (v3) { - u_long fhlen; + u_int32_t fhlen; u_char *fh; - if (mlen < (int)sizeof(u_long)*2) + if (mlen < (int)sizeof(u_int32_t)*2) goto bad; - fhlen = ntohl(*(u_long*)rdata->data); - fh = rdata->data + sizeof(u_long); - if (mlen < (int)(sizeof(u_long)*2 + fhlen)) + fhlen = ntohl(*(u_int32_t*)rdata->data); + fh = rdata->data + sizeof(u_int32_t); + if (mlen < (int)(sizeof(u_int32_t)*2 + fhlen)) goto bad; bcopy(fh, fhp, fhlen); *fhlenp = fhlen; } else { - if (mlen < ((int)sizeof(u_long) + NFSX_V2FH)) + if (mlen < ((int)sizeof(u_int32_t) + NFSX_V2FH)) goto bad; bcopy(rdata->data, fhp, NFSX_V2FH); *fhlenp = NFSX_V2FH; diff --git a/bsd/nfs/nfs_gss.c b/bsd/nfs/nfs_gss.c index 62cc01a47..b8dbbb4a2 100644 --- a/bsd/nfs/nfs_gss.c +++ b/bsd/nfs/nfs_gss.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2007 Apple Inc. All rights reserved. + * Copyright (c) 2007-2009 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -108,17 +108,39 @@ #include #include +#include "nfs_gss_crypto.h" + #define NFS_GSS_MACH_MAX_RETRIES 3 +typedef struct { + int type; + union { + MD5_DESCBC_CTX m_ctx; + HMAC_SHA1_DES3KD_CTX h_ctx; + }; +} GSS_DIGEST_CTX; + +#define MAX_DIGEST SHA_DIGEST_LENGTH +#ifdef NFS_KERNEL_DEBUG +#define HASHLEN(ki) (((ki)->hash_len > MAX_DIGEST) ? \ + (panic("nfs_gss.c:%d ki->hash_len is invalid = %d\n", __LINE__, (ki)->hash_len), MAX_DIGEST) : (ki)->hash_len) +#else +#define HASHLEN(ki) (((ki)->hash_len > MAX_DIGEST) ? \ + (printf("nfs_gss.c:%d ki->hash_len is invalid = %d\n", __LINE__, (ki)->hash_len), MAX_DIGEST) : (ki)->hash_len) +#endif + #if NFSSERVER u_long nfs_gss_svc_ctx_hash; struct nfs_gss_svc_ctx_hashhead *nfs_gss_svc_ctx_hashtbl; lck_mtx_t *nfs_gss_svc_ctx_mutex; lck_grp_t *nfs_gss_svc_grp; +uint32_t nfsrv_gss_context_ttl = GSS_CTX_EXPIRE; +#define GSS_SVC_CTX_TTL ((uint64_t)max(2*GSS_CTX_PEND, nfsrv_gss_context_ttl) * NSEC_PER_SEC) #endif /* NFSSERVER */ #if NFSCLIENT lck_grp_t *nfs_gss_clnt_grp; +int nfs_single_des; #endif /* NFSCLIENT */ /* @@ -128,9 +150,14 @@ lck_grp_t *nfs_gss_clnt_grp; static u_char krb5_tokhead[] = { 0x60, 0x23 }; static u_char krb5_mech[] = { 0x06, 0x09, 0x2a, 0x86, 0x48, 0x86, 0xf7, 0x12, 0x01, 0x02, 0x02 }; static u_char krb5_mic[] = { 0x01, 0x01, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff }; +static u_char krb5_mic3[] = { 0x01, 0x01, 0x04, 0x00, 0xff, 0xff, 0xff, 0xff }; static u_char krb5_wrap[] = { 0x02, 0x01, 0x00, 0x00, 0x00, 0x00, 0xff, 0xff }; +static u_char krb5_wrap3[] = { 0x02, 0x01, 0x04, 0x00, 0x02, 0x00, 0xff, 0xff }; static u_char iv0[] = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }; // DES MAC Initialization Vector +#define ALG_MIC(ki) (((ki)->type == NFS_GSS_1DES) ? krb5_mic : krb5_mic3) +#define ALG_WRAP(ki) (((ki)->type == NFS_GSS_1DES) ? krb5_wrap : krb5_wrap3) + /* * The size of the Kerberos v5 ASN.1 token * in the verifier. @@ -148,10 +175,10 @@ static u_char iv0[] = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }; // DES #define KRB5_SZ_MECH sizeof(krb5_mech) #define KRB5_SZ_ALG sizeof(krb5_mic) // 8 - same as krb5_wrap #define KRB5_SZ_SEQ 8 -#define KRB5_SZ_CKSUM 8 #define KRB5_SZ_EXTRA 3 // a wrap token may be longer by up to this many octets -#define KRB5_SZ_TOKEN (KRB5_SZ_TOKHEAD + KRB5_SZ_MECH + KRB5_SZ_ALG + KRB5_SZ_SEQ + KRB5_SZ_CKSUM) -#define KRB5_SZ_TOKMAX (KRB5_SZ_TOKEN + KRB5_SZ_EXTRA) +#define KRB5_SZ_TOKEN_NOSUM (KRB5_SZ_TOKHEAD + KRB5_SZ_MECH + KRB5_SZ_ALG + KRB5_SZ_SEQ) +#define KRB5_SZ_TOKEN(cksumlen) ((cksumlen) + KRB5_SZ_TOKEN_NOSUM) +#define KRB5_SZ_TOKMAX(cksumlen) (KRB5_SZ_TOKEN(cksumlen) + KRB5_SZ_EXTRA) #if NFSCLIENT static int nfs_gss_clnt_ctx_find(struct nfsreq *); @@ -176,22 +203,26 @@ static void task_release_special_port(mach_port_t); static mach_port_t task_copy_special_port(mach_port_t); static void nfs_gss_mach_alloc_buffer(u_char *, uint32_t, vm_map_copy_t *); static int nfs_gss_mach_vmcopyout(vm_map_copy_t, uint32_t, u_char *); -static int nfs_gss_token_get(des_key_schedule, u_char *, u_char *, int, uint32_t *, u_char *); -static int nfs_gss_token_put(des_key_schedule, u_char *, u_char *, int, int, u_char *); +static int nfs_gss_token_get(gss_key_info *ki, u_char *, u_char *, int, uint32_t *, u_char *); +static int nfs_gss_token_put(gss_key_info *ki, u_char *, u_char *, int, int, u_char *); static int nfs_gss_der_length_size(int); static void nfs_gss_der_length_put(u_char **, int); static int nfs_gss_der_length_get(u_char **); static int nfs_gss_mchain_length(mbuf_t); static int nfs_gss_append_chain(struct nfsm_chain *, mbuf_t); static void nfs_gss_nfsm_chain(struct nfsm_chain *, mbuf_t); -static void nfs_gss_cksum_mchain(des_key_schedule, mbuf_t, u_char *, int, int, u_char *); -static void nfs_gss_cksum_chain(des_key_schedule, struct nfsm_chain *, u_char *, int, int, u_char *); -static void nfs_gss_cksum_rep(des_key_schedule, uint32_t, u_char *); -static void nfs_gss_encrypt_mchain(u_char *, mbuf_t, int, int, int); -static void nfs_gss_encrypt_chain(u_char *, struct nfsm_chain *, int, int, int); -static DES_LONG des_cbc_cksum(des_cblock *, des_cblock *, long, des_key_schedule, des_cblock *); -static void des_cbc_encrypt(des_cblock *, des_cblock *, long, des_key_schedule, - des_cblock *, des_cblock *, int); +static void nfs_gss_cksum_mchain(gss_key_info *, mbuf_t, u_char *, int, int, u_char *); +static void nfs_gss_cksum_chain(gss_key_info *, struct nfsm_chain *, u_char *, int, int, u_char *); +static void nfs_gss_cksum_rep(gss_key_info *, uint32_t, u_char *); +static void nfs_gss_encrypt_mchain(gss_key_info *, mbuf_t, int, int, int); +static void nfs_gss_encrypt_chain(gss_key_info *, struct nfsm_chain *, int, int, int); + +static void gss_digest_Init(GSS_DIGEST_CTX *, gss_key_info *); +static void gss_digest_Update(GSS_DIGEST_CTX *, void *, size_t); +static void gss_digest_Final(GSS_DIGEST_CTX *, void *); +static void gss_des_crypt(gss_key_info *, des_cblock *, des_cblock *, + int32_t, des_cblock *, des_cblock *, int, int); +static int gss_key_init(gss_key_info *, uint32_t); #if NFSSERVER thread_call_t nfs_gss_svc_ctx_timer_call; @@ -240,7 +271,6 @@ nfs_gss_clnt_ctx_find(struct nfsreq *req) int error = 0; int retrycnt = 0; -retry: lck_mtx_lock(&nmp->nm_lock); TAILQ_FOREACH(cp, &nmp->nm_gsscl, gss_clnt_entries) { if (cp->gss_clnt_uid == uid) { @@ -280,17 +310,16 @@ nfs_gss_clnt_ctx_find(struct nfsreq *req) * to failover to sec=sys. */ if (req->r_thread == NULL) { - if ((nmp->nm_flag & NFSMNT_SECGIVEN) == 0) { + if (nmp->nm_flag & NFSMNT_SECSYSOK) { error = nfs_gss_clnt_ctx_failover(req); } else { printf("nfs_gss_clnt_ctx_find: no context for async\n"); - error = EAUTH; + error = NFSERR_EAUTH; } lck_mtx_unlock(&nmp->nm_lock); return (error); } - MALLOC(cp, struct nfs_gss_clnt_ctx *, sizeof(*cp), M_TEMP, M_WAITOK|M_ZERO); if (cp == NULL) { @@ -305,23 +334,41 @@ nfs_gss_clnt_ctx_find(struct nfsreq *req) TAILQ_INSERT_TAIL(&nmp->nm_gsscl, cp, gss_clnt_entries); lck_mtx_unlock(&nmp->nm_lock); +retry: error = nfs_gss_clnt_ctx_init(req, cp); - if (error) - nfs_gss_clnt_ctx_unref(req); - if (error == ENEEDAUTH) { error = nfs_gss_clnt_ctx_delay(req, &retrycnt); if (!error) goto retry; + + /* Giving up on this context */ + cp->gss_clnt_flags |= GSS_CTX_INVAL; + + /* + * Wake any threads waiting to use the context + */ + lck_mtx_lock(cp->gss_clnt_mtx); + cp->gss_clnt_thread = NULL; + if (cp->gss_clnt_flags & GSS_NEEDCTX) { + cp->gss_clnt_flags &= ~GSS_NEEDCTX; + wakeup(cp); + } + lck_mtx_unlock(cp->gss_clnt_mtx); + } + if (error) + nfs_gss_clnt_ctx_unref(req); + /* * If we failed to set up a Kerberos context for this - * user and no sec= mount option was given then set + * user and no sec= mount option was given, but the + * server indicated that it could support AUTH_SYS, then set * up a dummy context that allows this user to attempt * sec=sys calls. */ - if (error && (nmp->nm_flag & NFSMNT_SECGIVEN) == 0) { + if (error && (nmp->nm_flag & NFSMNT_SECSYSOK) && + (error != ENXIO) && (error != ETIMEDOUT)) { lck_mtx_lock(&nmp->nm_lock); error = nfs_gss_clnt_ctx_failover(req); lck_mtx_unlock(&nmp->nm_lock); @@ -371,19 +418,24 @@ nfs_gss_clnt_ctx_failover(struct nfsreq *req) int nfs_gss_clnt_cred_put(struct nfsreq *req, struct nfsm_chain *nmc, mbuf_t args) { - struct nfsmount *nmp = req->r_nmp; struct nfs_gss_clnt_ctx *cp; uint32_t seqnum = 0; int error = 0; - int slpflag = 0; + int slpflag, recordmark = 0; int start, len, offset = 0; int pad, toklen; struct nfsm_chain nmc_tmp; struct gss_seq *gsp; - u_char tokbuf[KRB5_SZ_TOKMAX]; - u_char cksum[8]; + u_char tokbuf[KRB5_SZ_TOKMAX(MAX_DIGEST)]; + u_char cksum[MAX_DIGEST]; struct timeval now; - + gss_key_info *ki; + + slpflag = (PZERO-1); + if (req->r_nmp) { + slpflag |= ((req->r_nmp->nm_flag & NFSMNT_INT) && req->r_thread) ? PCATCH : 0; + recordmark = (req->r_nmp->nm_sotype == SOCK_STREAM); + } retry: if (req->r_gss_ctx == NULL) { /* @@ -430,15 +482,15 @@ nfs_gss_clnt_cred_put(struct nfsreq *req, struct nfsm_chain *nmc, mbuf_t args) lck_mtx_lock(cp->gss_clnt_mtx); if (cp->gss_clnt_thread && cp->gss_clnt_thread != current_thread()) { cp->gss_clnt_flags |= GSS_NEEDCTX; - slpflag = (PZERO-1) | PDROP | (((nmp->nm_flag & NFSMNT_INT) && req->r_thread) ? PCATCH : 0); - msleep(cp, cp->gss_clnt_mtx, slpflag, "ctxwait", NULL); - if ((error = nfs_sigintr(nmp, req, req->r_thread, 0))) + msleep(cp, cp->gss_clnt_mtx, slpflag | PDROP, "ctxwait", NULL); + if ((error = nfs_sigintr(req->r_nmp, req, req->r_thread, 0))) return (error); nfs_gss_clnt_ctx_unref(req); goto retry; } lck_mtx_unlock(cp->gss_clnt_mtx); + ki = &cp->gss_clnt_kinfo; if (cp->gss_clnt_flags & GSS_CTX_COMPLETE) { /* * Get a sequence number for this request. @@ -451,9 +503,8 @@ nfs_gss_clnt_cred_put(struct nfsreq *req, struct nfsm_chain *nmc, mbuf_t args) while (win_getbit(cp->gss_clnt_seqbits, ((cp->gss_clnt_seqnum - cp->gss_clnt_seqwin) + 1) % cp->gss_clnt_seqwin)) { cp->gss_clnt_flags |= GSS_NEEDSEQ; - slpflag = (PZERO-1) | (((nmp->nm_flag & NFSMNT_INT) && req->r_thread) ? PCATCH : 0); msleep(cp, cp->gss_clnt_mtx, slpflag, "seqwin", NULL); - if ((error = nfs_sigintr(nmp, req, req->r_thread, 0))) { + if ((error = nfs_sigintr(req->r_nmp, req, req->r_thread, 0))) { lck_mtx_unlock(cp->gss_clnt_mtx); return (error); } @@ -483,8 +534,13 @@ nfs_gss_clnt_cred_put(struct nfsreq *req, struct nfsm_chain *nmc, mbuf_t args) nfsm_chain_add_32(error, nmc, seqnum); nfsm_chain_add_32(error, nmc, cp->gss_clnt_service); nfsm_chain_add_32(error, nmc, cp->gss_clnt_handle_len); - nfsm_chain_add_opaque(error, nmc, cp->gss_clnt_handle, cp->gss_clnt_handle_len); - + if (cp->gss_clnt_handle_len > 0) { + if (cp->gss_clnt_handle == NULL) + return (EBADRPC); + nfsm_chain_add_opaque(error, nmc, cp->gss_clnt_handle, cp->gss_clnt_handle_len); + } + if (error) + return(error); /* * Now add the verifier */ @@ -502,11 +558,11 @@ nfs_gss_clnt_cred_put(struct nfsreq *req, struct nfsm_chain *nmc, mbuf_t args) return (error); } - offset = nmp->nm_sotype == SOCK_STREAM ? NFSX_UNSIGNED : 0; // record mark + offset = recordmark ? NFSX_UNSIGNED : 0; // record mark nfsm_chain_build_done(error, nmc); - nfs_gss_cksum_chain(cp->gss_clnt_sched, nmc, krb5_mic, offset, 0, cksum); + nfs_gss_cksum_chain(ki, nmc, ALG_MIC(ki), offset, 0, cksum); - toklen = nfs_gss_token_put(cp->gss_clnt_sched, krb5_mic, tokbuf, 1, 0, cksum); + toklen = nfs_gss_token_put(ki, ALG_MIC(ki), tokbuf, 1, 0, cksum); nfsm_chain_add_32(error, nmc, RPCSEC_GSS); // flavor nfsm_chain_add_32(error, nmc, toklen); // length nfsm_chain_add_opaque(error, nmc, tokbuf, toklen); @@ -536,10 +592,10 @@ nfs_gss_clnt_cred_put(struct nfsreq *req, struct nfsm_chain *nmc, mbuf_t args) nfs_gss_append_chain(nmc, args); // Append the args mbufs /* Now compute a checksum over the seqnum + args */ - nfs_gss_cksum_chain(cp->gss_clnt_sched, nmc, krb5_mic, start, len, cksum); + nfs_gss_cksum_chain(ki, nmc, ALG_MIC(ki), start, len, cksum); /* Insert it into a token and append to the request */ - toklen = nfs_gss_token_put(cp->gss_clnt_sched, krb5_mic, tokbuf, 1, 0, cksum); + toklen = nfs_gss_token_put(ki, ALG_MIC(ki), tokbuf, 1, 0, cksum); nfsm_chain_finish_mbuf(error, nmc); // force checksum into new mbuf nfsm_chain_add_32(error, nmc, toklen); nfsm_chain_add_opaque(error, nmc, tokbuf, toklen); @@ -577,10 +633,10 @@ nfs_gss_clnt_cred_put(struct nfsreq *req, struct nfsm_chain *nmc, mbuf_t args) nfsm_chain_build_done(error, &nmc_tmp); /* Now compute a checksum over the confounder + seqnum + args */ - nfs_gss_cksum_chain(cp->gss_clnt_sched, &nmc_tmp, krb5_wrap, 0, len, cksum); + nfs_gss_cksum_chain(ki, &nmc_tmp, ALG_WRAP(ki), 0, len, cksum); /* Insert it into a token */ - toklen = nfs_gss_token_put(cp->gss_clnt_sched, krb5_wrap, tokbuf, 1, len, cksum); + toklen = nfs_gss_token_put(ki, ALG_WRAP(ki), tokbuf, 1, len, cksum); nfsm_chain_add_32(error, nmc, toklen + len); // token + args length nfsm_chain_add_opaque_nopad(error, nmc, tokbuf, toklen); req->r_gss_argoff = nfsm_chain_offset(nmc); // Stash offset @@ -590,7 +646,7 @@ nfs_gss_clnt_cred_put(struct nfsreq *req, struct nfsm_chain *nmc, mbuf_t args) nfs_gss_append_chain(nmc, nmc_tmp.nmc_mhead); // Append the args mbufs /* Finally, encrypt the args */ - nfs_gss_encrypt_chain(cp->gss_clnt_skey, &nmc_tmp, 0, len, DES_ENCRYPT); + nfs_gss_encrypt_chain(ki, &nmc_tmp, 0, len, DES_ENCRYPT); /* Add null XDR pad if the ASN.1 token misaligned the data */ pad = nfsm_pad(toklen + len); @@ -620,20 +676,21 @@ nfs_gss_clnt_verf_get( uint32_t verflen, uint32_t *accepted_statusp) { - u_char tokbuf[KRB5_SZ_TOKMAX]; - u_char cksum1[8], cksum2[8]; + u_char tokbuf[KRB5_SZ_TOKMAX(MAX_DIGEST)]; + u_char cksum1[MAX_DIGEST], cksum2[MAX_DIGEST]; uint32_t seqnum = 0; struct nfs_gss_clnt_ctx *cp = req->r_gss_ctx; struct nfsm_chain nmc_tmp; struct gss_seq *gsp; uint32_t reslen, start, cksumlen, toklen; int error = 0; + gss_key_info *ki = &cp->gss_clnt_kinfo; reslen = cksumlen = 0; *accepted_statusp = 0; if (cp == NULL) - return (EAUTH); + return (NFSERR_EAUTH); /* * If it's not an RPCSEC_GSS verifier, then it has to * be a null verifier that resulted from either @@ -643,19 +700,16 @@ nfs_gss_clnt_verf_get( */ if (verftype != RPCSEC_GSS) { if (verftype != RPCAUTH_NULL) - return (EAUTH); + return (NFSERR_EAUTH); if (cp->gss_clnt_flags & GSS_CTX_COMPLETE && cp->gss_clnt_service != RPCSEC_GSS_SVC_SYS) - return (EAUTH); + return (NFSERR_EAUTH); if (verflen > 0) nfsm_chain_adv(error, nmc, nfsm_rndup(verflen)); nfsm_chain_get_32(error, nmc, *accepted_statusp); return (error); } - if (verflen != KRB5_SZ_TOKEN) - return (EAUTH); - /* * If we received an RPCSEC_GSS verifier but the * context isn't yet complete, then it must be @@ -675,6 +729,9 @@ nfs_gss_clnt_verf_get( return (error); } + if (verflen != KRB5_SZ_TOKEN(ki->hash_len)) + return (NFSERR_EAUTH); + /* * Get the 8 octet sequence number * checksum out of the verifier token. @@ -682,7 +739,7 @@ nfs_gss_clnt_verf_get( nfsm_chain_get_opaque(error, nmc, verflen, tokbuf); if (error) goto nfsmout; - error = nfs_gss_token_get(cp->gss_clnt_sched, krb5_mic, tokbuf, 0, NULL, cksum1); + error = nfs_gss_token_get(ki, ALG_MIC(ki), tokbuf, 0, NULL, cksum1); if (error) goto nfsmout; @@ -692,12 +749,12 @@ nfs_gss_clnt_verf_get( * the one in the verifier returned by the server. */ SLIST_FOREACH(gsp, &req->r_gss_seqlist, gss_seqnext) { - nfs_gss_cksum_rep(cp->gss_clnt_sched, gsp->gss_seqnum, cksum2); - if (bcmp(cksum1, cksum2, 8) == 0) + nfs_gss_cksum_rep(ki, gsp->gss_seqnum, cksum2); + if (bcmp(cksum1, cksum2, HASHLEN(ki)) == 0) break; } if (gsp == NULL) - return (EAUTH); + return (NFSERR_EAUTH); /* * Get the RPC accepted status @@ -732,7 +789,7 @@ nfs_gss_clnt_verf_get( /* Compute a checksum over the sequence number + results */ start = nfsm_chain_offset(nmc); - nfs_gss_cksum_chain(cp->gss_clnt_sched, nmc, krb5_mic, start, reslen, cksum1); + nfs_gss_cksum_chain(ki, nmc, ALG_MIC(ki), start, reslen, cksum1); /* * Get the sequence number prepended to the results @@ -756,20 +813,19 @@ nfs_gss_clnt_verf_get( reslen -= NFSX_UNSIGNED; // already skipped seqnum nfsm_chain_adv(error, &nmc_tmp, reslen); // skip over the results nfsm_chain_get_32(error, &nmc_tmp, cksumlen); // length of checksum - if (cksumlen != KRB5_SZ_TOKEN) { + if (cksumlen != KRB5_SZ_TOKEN(ki->hash_len)) { error = EBADRPC; goto nfsmout; } nfsm_chain_get_opaque(error, &nmc_tmp, cksumlen, tokbuf); if (error) goto nfsmout; - error = nfs_gss_token_get(cp->gss_clnt_sched, krb5_mic, tokbuf, 0, - NULL, cksum2); + error = nfs_gss_token_get(ki, ALG_MIC(ki), tokbuf, 0, NULL, cksum2); if (error) goto nfsmout; /* Verify that the checksums are the same */ - if (bcmp(cksum1, cksum2, 8) != 0) { + if (bcmp(cksum1, cksum2, HASHLEN(ki)) != 0) { error = EBADRPC; goto nfsmout; } @@ -791,10 +847,10 @@ nfs_gss_clnt_verf_get( } /* Get the token that prepends the encrypted results */ - nfsm_chain_get_opaque(error, nmc, KRB5_SZ_TOKMAX, tokbuf); + nfsm_chain_get_opaque(error, nmc, KRB5_SZ_TOKMAX(ki->hash_len), tokbuf); if (error) goto nfsmout; - error = nfs_gss_token_get(cp->gss_clnt_sched, krb5_wrap, tokbuf, 0, + error = nfs_gss_token_get(ki, ALG_WRAP(ki), tokbuf, 0, &toklen, cksum1); if (error) goto nfsmout; @@ -803,13 +859,13 @@ nfs_gss_clnt_verf_get( /* decrypt the confounder + sequence number + results */ start = nfsm_chain_offset(nmc); - nfs_gss_encrypt_chain(cp->gss_clnt_skey, nmc, start, reslen, DES_DECRYPT); + nfs_gss_encrypt_chain(ki, nmc, start, reslen, DES_DECRYPT); /* Compute a checksum over the confounder + sequence number + results */ - nfs_gss_cksum_chain(cp->gss_clnt_sched, nmc, krb5_wrap, start, reslen, cksum2); + nfs_gss_cksum_chain(ki, nmc, ALG_WRAP(ki), start, reslen, cksum2); /* Verify that the checksums are the same */ - if (bcmp(cksum1, cksum2, 8) != 0) { + if (bcmp(cksum1, cksum2, HASHLEN(ki)) != 0) { error = EBADRPC; goto nfsmout; } @@ -856,7 +912,7 @@ nfs_gss_clnt_args_restore(struct nfsreq *req) int len, error = 0; if (cp == NULL) - return (EAUTH); + return (NFSERR_EAUTH); if ((cp->gss_clnt_flags & GSS_CTX_COMPLETE) == 0) return (ENEEDAUTH); @@ -894,8 +950,8 @@ nfs_gss_clnt_args_restore(struct nfsreq *req) */ len = req->r_gss_arglen; len += len % 8 > 0 ? 4 : 8; // add DES padding length - nfs_gss_encrypt_chain(cp->gss_clnt_skey, nmc, - req->r_gss_argoff, len, DES_DECRYPT); + nfs_gss_encrypt_chain(&cp->gss_clnt_kinfo, nmc, + req->r_gss_argoff, len, DES_DECRYPT); nfsm_chain_adv(error, nmc, req->r_gss_arglen); if (error) return (error); @@ -923,17 +979,19 @@ nfs_gss_clnt_ctx_init(struct nfsreq *req, struct nfs_gss_clnt_ctx *cp) struct nfsmount *nmp = req->r_nmp; int client_complete = 0; int server_complete = 0; - u_char cksum1[8], cksum2[8]; + u_char cksum1[MAX_DIGEST], cksum2[MAX_DIGEST]; int error = 0; struct timeval now; + gss_key_info *ki = &cp->gss_clnt_kinfo; /* Initialize a new client context */ cp->gss_clnt_svcname = nfs_gss_clnt_svcname(nmp); if (cp->gss_clnt_svcname == NULL) { - error = EAUTH; + error = NFSERR_EAUTH; goto nfsmout; } + cp->gss_clnt_proc = RPCSEC_GSS_INIT; cp->gss_clnt_service = @@ -941,6 +999,7 @@ nfs_gss_clnt_ctx_init(struct nfsreq *req, struct nfs_gss_clnt_ctx *cp) nmp->nm_auth == RPCAUTH_KRB5I ? RPCSEC_GSS_SVC_INTEGRITY : nmp->nm_auth == RPCAUTH_KRB5P ? RPCSEC_GSS_SVC_PRIVACY : 0; + cp->gss_clnt_gssd_flags = (nfs_single_des ? GSSD_NFS_1DES : 0); /* * Now loop around alternating gss_init_sec_context and * gss_accept_sec_context upcalls to the gssd on the client @@ -948,6 +1007,7 @@ nfs_gss_clnt_ctx_init(struct nfsreq *req, struct nfs_gss_clnt_ctx *cp) */ for (;;) { +retry: /* Upcall to the gss_init_sec_context in the gssd */ error = nfs_gss_clnt_gssd_upcall(req, cp); if (error) @@ -958,7 +1018,7 @@ nfs_gss_clnt_ctx_init(struct nfsreq *req, struct nfs_gss_clnt_ctx *cp) if (server_complete) break; } else if (cp->gss_clnt_major != GSS_S_CONTINUE_NEEDED) { - error = EAUTH; + error = NFSERR_EAUTH; goto nfsmout; } @@ -966,15 +1026,26 @@ nfs_gss_clnt_ctx_init(struct nfsreq *req, struct nfs_gss_clnt_ctx *cp) * Pass the token to the server. */ error = nfs_gss_clnt_ctx_callserver(req, cp); - if (error) + if (error) { + if (cp->gss_clnt_proc == RPCSEC_GSS_INIT && + (cp->gss_clnt_gssd_flags & (GSSD_RESTART | GSSD_NFS_1DES)) == 0) { + cp->gss_clnt_gssd_flags = (GSSD_RESTART | GSSD_NFS_1DES); + if (cp->gss_clnt_token) + FREE(cp->gss_clnt_token, M_TEMP); + cp->gss_clnt_token = NULL; + cp->gss_clnt_tokenlen = 0; + goto retry; + } + // Reset flags, if error = ENEEDAUTH we will try 3des again + cp->gss_clnt_gssd_flags = 0; goto nfsmout; - + } if (cp->gss_clnt_major == GSS_S_COMPLETE) { server_complete = 1; if (client_complete) break; } else if (cp->gss_clnt_major != GSS_S_CONTINUE_NEEDED) { - error = EAUTH; + error = NFSERR_EAUTH; goto nfsmout; } @@ -989,31 +1060,23 @@ nfs_gss_clnt_ctx_init(struct nfsreq *req, struct nfs_gss_clnt_ctx *cp) microuptime(&now); cp->gss_clnt_ctime = now.tv_sec; // time stamp - /* - * Construct a key schedule from our shiny new session key - */ - error = des_key_sched((des_cblock *) cp->gss_clnt_skey, cp->gss_clnt_sched); - if (error) { - error = EAUTH; - goto nfsmout; - } /* * Compute checksum of the server's window */ - nfs_gss_cksum_rep(cp->gss_clnt_sched, cp->gss_clnt_seqwin, cksum1); + nfs_gss_cksum_rep(ki, cp->gss_clnt_seqwin, cksum1); /* * and see if it matches the one in the * verifier the server returned. */ - error = nfs_gss_token_get(cp->gss_clnt_sched, krb5_mic, cp->gss_clnt_verf, 0, + error = nfs_gss_token_get(ki, ALG_MIC(ki), cp->gss_clnt_verf, 0, NULL, cksum2); FREE(cp->gss_clnt_verf, M_TEMP); cp->gss_clnt_verf = NULL; - if (error || bcmp(cksum1, cksum2, 8) != 0) { - error = EAUTH; + if (error || bcmp(cksum1, cksum2, HASHLEN(ki)) != 0) { + error = NFSERR_EAUTH; goto nfsmout; } @@ -1032,8 +1095,16 @@ nfs_gss_clnt_ctx_init(struct nfsreq *req, struct nfs_gss_clnt_ctx *cp) MALLOC(cp->gss_clnt_seqbits, uint32_t *, nfsm_rndup((cp->gss_clnt_seqwin + 7) / 8), M_TEMP, M_WAITOK|M_ZERO); if (cp->gss_clnt_seqbits == NULL) - error = EAUTH; + error = NFSERR_EAUTH; nfsmout: + /* + * If the error is ENEEDAUTH we're not done, so no need + * to wake up other threads again. This thread will retry in + * the find or renew routines. + */ + if (error == ENEEDAUTH) + return (error); + /* * If there's an error, just mark it as invalid. * It will be removed when the reference count @@ -1065,25 +1136,26 @@ nfs_gss_clnt_ctx_init(struct nfsreq *req, struct nfs_gss_clnt_ctx *cp) static int nfs_gss_clnt_ctx_callserver(struct nfsreq *req, struct nfs_gss_clnt_ctx *cp) { - struct nfsmount *nmp = req->r_nmp; struct nfsm_chain nmreq, nmrep; int error = 0, status; - u_int64_t xid; int sz; + if (!req->r_nmp) + return (ENXIO); nfsm_chain_null(&nmreq); nfsm_chain_null(&nmrep); sz = NFSX_UNSIGNED + nfsm_rndup(cp->gss_clnt_tokenlen); nfsm_chain_build_alloc_init(error, &nmreq, sz); nfsm_chain_add_32(error, &nmreq, cp->gss_clnt_tokenlen); - nfsm_chain_add_opaque(error, &nmreq, cp->gss_clnt_token, cp->gss_clnt_tokenlen); + if (cp->gss_clnt_tokenlen > 0) + nfsm_chain_add_opaque(error, &nmreq, cp->gss_clnt_token, cp->gss_clnt_tokenlen); nfsm_chain_build_done(error, &nmreq); if (error) goto nfsmout; /* Call the server */ - error = nfs_request2(NULL, nmp->nm_mountp, &nmreq, NFSPROC_NULL, - req->r_thread, req->r_cred, 0, &nmrep, &xid, &status); + error = nfs_request_gss(req->r_nmp->nm_mountp, &nmreq, req->r_thread, req->r_cred, + (req->r_flags & R_OPTMASK), cp, &nmrep, &status); if (cp->gss_clnt_token != NULL) { FREE(cp->gss_clnt_token, M_TEMP); cp->gss_clnt_token = NULL; @@ -1096,8 +1168,10 @@ nfs_gss_clnt_ctx_callserver(struct nfsreq *req, struct nfs_gss_clnt_ctx *cp) /* Get the server's reply */ nfsm_chain_get_32(error, &nmrep, cp->gss_clnt_handle_len); - if (cp->gss_clnt_handle != NULL) + if (cp->gss_clnt_handle != NULL) { FREE(cp->gss_clnt_handle, M_TEMP); + cp->gss_clnt_handle = NULL; + } if (cp->gss_clnt_handle_len > 0) { MALLOC(cp->gss_clnt_handle, u_char *, cp->gss_clnt_handle_len, M_TEMP, M_WAITOK); if (cp->gss_clnt_handle == NULL) { @@ -1127,10 +1201,12 @@ nfs_gss_clnt_ctx_callserver(struct nfsreq *req, struct nfs_gss_clnt_ctx *cp) if (cp->gss_clnt_major != GSS_S_COMPLETE && cp->gss_clnt_major != GSS_S_CONTINUE_NEEDED) { char who[] = "server"; + char unknown[] = ""; (void) mach_gss_log_error( cp->gss_clnt_mport, - vfs_statfs(nmp->nm_mountp)->f_mntfromname, + !req->r_nmp ? unknown : + vfs_statfs(req->r_nmp->nm_mountp)->f_mntfromname, cp->gss_clnt_uid, who, cp->gss_clnt_major, @@ -1154,10 +1230,12 @@ nfs_gss_clnt_ctx_callserver(struct nfsreq *req, struct nfs_gss_clnt_ctx *cp) static char * nfs_gss_clnt_svcname(struct nfsmount *nmp) { - char *svcname, *d; - char* mntfromhere = &vfs_statfs(nmp->nm_mountp)->f_mntfromname[0]; + char *svcname, *d, *mntfromhere; int len; + if (!nmp) + return (NULL); + mntfromhere = &vfs_statfs(nmp->nm_mountp)->f_mntfromname[0]; len = strlen(mntfromhere) + 5; /* "nfs/" plus null */ MALLOC(svcname, char *, len, M_TEMP, M_NOWAIT); if (svcname == NULL) @@ -1187,8 +1265,10 @@ nfs_gss_clnt_gssd_upcall(struct nfsreq *req, struct nfs_gss_clnt_ctx *cp) int retry_cnt = 0; vm_map_copy_t itoken = NULL; byte_buffer otoken = NULL; + mach_msg_type_number_t otokenlen; int error = 0; char uprinc[1]; + uint32_t ret_flags; /* * NFS currently only supports default principals or @@ -1202,13 +1282,13 @@ nfs_gss_clnt_gssd_upcall(struct nfsreq *req, struct nfs_gss_clnt_ctx *cp) if (cp->gss_clnt_mport == NULL) { kr = task_get_gssd_port(get_threadtask(req->r_thread), &cp->gss_clnt_mport); if (kr != KERN_SUCCESS) { - printf("nfs_gss_clnt_gssd_upcall: can't get gssd port, status %d\n", kr); - return (EAUTH); + printf("nfs_gss_clnt_gssd_upcall: can't get gssd port, status %x (%d)\n", kr, kr); + goto out; } if (!IPC_PORT_VALID(cp->gss_clnt_mport)) { printf("nfs_gss_clnt_gssd_upcall: gssd port not valid\n"); cp->gss_clnt_mport = NULL; - return (EAUTH); + goto out; } } @@ -1223,23 +1303,29 @@ nfs_gss_clnt_gssd_upcall(struct nfsreq *req, struct nfs_gss_clnt_ctx *cp) cp->gss_clnt_uid, uprinc, cp->gss_clnt_svcname, - GSSD_MUTUAL_FLAG | GSSD_NO_UI, - &cp->gss_clnt_gssd_verf, + GSSD_MUTUAL_FLAG, + cp->gss_clnt_gssd_flags, &cp->gss_clnt_context, &cp->gss_clnt_cred_handle, + &ret_flags, &okey, (mach_msg_type_number_t *) &skeylen, - &otoken, (mach_msg_type_number_t *) &cp->gss_clnt_tokenlen, + &otoken, &otokenlen, &cp->gss_clnt_major, &cp->gss_clnt_minor); - if (kr != 0) { - printf("nfs_gss_clnt_gssd_upcall: mach_gss_init_sec_context failed: %x\n", kr); + cp->gss_clnt_gssd_flags &= ~GSSD_RESTART; + + if (kr != KERN_SUCCESS) { + printf("nfs_gss_clnt_gssd_upcall: mach_gss_init_sec_context failed: %x (%d)\n", kr, kr); if (kr == MIG_SERVER_DIED && cp->gss_clnt_cred_handle == 0 && - retry_cnt++ < NFS_GSS_MACH_MAX_RETRIES) + retry_cnt++ < NFS_GSS_MACH_MAX_RETRIES) { + if (cp->gss_clnt_tokenlen > 0) + nfs_gss_mach_alloc_buffer(cp->gss_clnt_token, cp->gss_clnt_tokenlen, &itoken); goto retry; + } task_release_special_port(cp->gss_clnt_mport); cp->gss_clnt_mport = NULL; - return (EAUTH); + goto out; } /* @@ -1248,9 +1334,11 @@ nfs_gss_clnt_gssd_upcall(struct nfsreq *req, struct nfs_gss_clnt_ctx *cp) if (cp->gss_clnt_major != GSS_S_COMPLETE && cp->gss_clnt_major != GSS_S_CONTINUE_NEEDED) { char who[] = "client"; + char unknown[] = ""; (void) mach_gss_log_error( cp->gss_clnt_mport, + !req->r_nmp ? unknown : vfs_statfs(req->r_nmp->nm_mountp)->f_mntfromname, cp->gss_clnt_uid, who, @@ -1259,26 +1347,56 @@ nfs_gss_clnt_gssd_upcall(struct nfsreq *req, struct nfs_gss_clnt_ctx *cp) } if (skeylen > 0) { - if (skeylen != SKEYLEN) { + if (skeylen != SKEYLEN && skeylen != SKEYLEN3) { printf("nfs_gss_clnt_gssd_upcall: bad key length (%d)\n", skeylen); - return (EAUTH); + vm_map_copy_discard((vm_map_copy_t) okey); + vm_map_copy_discard((vm_map_copy_t) otoken); + goto out; + } + error = nfs_gss_mach_vmcopyout((vm_map_copy_t) okey, skeylen, + cp->gss_clnt_kinfo.skey); + if (error) { + vm_map_copy_discard((vm_map_copy_t) otoken); + goto out; } - error = nfs_gss_mach_vmcopyout((vm_map_copy_t) okey, skeylen, cp->gss_clnt_skey); + + error = gss_key_init(&cp->gss_clnt_kinfo, skeylen); if (error) - return (EAUTH); + goto out; } - if (cp->gss_clnt_tokenlen > 0) { - MALLOC(cp->gss_clnt_token, u_char *, cp->gss_clnt_tokenlen, M_TEMP, M_WAITOK); - if (cp->gss_clnt_token == NULL) + /* Free context token used as input */ + if (cp->gss_clnt_token) + FREE(cp->gss_clnt_token, M_TEMP); + cp->gss_clnt_token = NULL; + cp->gss_clnt_tokenlen = 0; + + if (otokenlen > 0) { + /* Set context token to gss output token */ + MALLOC(cp->gss_clnt_token, u_char *, otokenlen, M_TEMP, M_WAITOK); + if (cp->gss_clnt_token == NULL) { + printf("nfs_gss_clnt_gssd_upcall: could not allocate %d bytes\n", otokenlen); + vm_map_copy_discard((vm_map_copy_t) otoken); return (ENOMEM); - error = nfs_gss_mach_vmcopyout((vm_map_copy_t) otoken, cp->gss_clnt_tokenlen, - cp->gss_clnt_token); - if (error) - return (EAUTH); + } + error = nfs_gss_mach_vmcopyout((vm_map_copy_t) otoken, otokenlen, cp->gss_clnt_token); + if (error) { + FREE(cp->gss_clnt_token, M_TEMP); + cp->gss_clnt_token = NULL; + return (NFSERR_EAUTH); + } + cp->gss_clnt_tokenlen = otokenlen; } return (0); + +out: + if (cp->gss_clnt_token) + FREE(cp->gss_clnt_token, M_TEMP); + cp->gss_clnt_token = NULL; + cp->gss_clnt_tokenlen = 0; + + return (NFSERR_EAUTH); } /* @@ -1425,7 +1543,7 @@ nfs_gss_clnt_ctx_renew(struct nfsreq *req) mach_port_t saved_mport; int retrycnt = 0; - if (cp == NULL || !(cp->gss_clnt_flags & GSS_CTX_COMPLETE)) + if (cp == NULL) return (0); lck_mtx_lock(cp->gss_clnt_mtx); @@ -1438,9 +1556,7 @@ nfs_gss_clnt_ctx_renew(struct nfsreq *req) saved_mport = task_copy_special_port(cp->gss_clnt_mport); /* Remove the old context */ - lck_mtx_lock(&nmp->nm_lock); cp->gss_clnt_flags |= GSS_CTX_INVAL; - lck_mtx_unlock(&nmp->nm_lock); /* * If there's a thread waiting @@ -1452,14 +1568,14 @@ nfs_gss_clnt_ctx_renew(struct nfsreq *req) } lck_mtx_unlock(cp->gss_clnt_mtx); -retry: /* * Create a new context */ MALLOC(ncp, struct nfs_gss_clnt_ctx *, sizeof(*ncp), M_TEMP, M_WAITOK|M_ZERO); if (ncp == NULL) { - return (ENOMEM); + error = ENOMEM; + goto out; } ncp->gss_clnt_uid = saved_uid; @@ -1474,13 +1590,14 @@ nfs_gss_clnt_ctx_renew(struct nfsreq *req) nfs_gss_clnt_ctx_unref(req); nfs_gss_clnt_ctx_ref(req, ncp); +retry: error = nfs_gss_clnt_ctx_init(req, ncp); // Initialize new context if (error == ENEEDAUTH) { error = nfs_gss_clnt_ctx_delay(req, &retrycnt); if (!error) goto retry; } - +out: task_release_special_port(saved_mport); if (error) nfs_gss_clnt_ctx_unref(req); @@ -1499,7 +1616,6 @@ nfs_gss_clnt_ctx_unmount(struct nfsmount *nmp, int mntflags) struct ucred temp_cred; kauth_cred_t cred; struct nfsm_chain nmreq, nmrep; - u_int64_t xid; int error, status; struct nfsreq req; @@ -1532,8 +1648,8 @@ nfs_gss_clnt_ctx_unmount(struct nfsmount *nmp, int mntflags) nfsm_chain_build_alloc_init(error, &nmreq, 0); nfsm_chain_build_done(error, &nmreq); if (!error) - nfs_request2(NULL, nmp->nm_mountp, &nmreq, NFSPROC_NULL, - current_thread(), cred, 0, &nmrep, &xid, &status); + nfs_request_gss(nmp->nm_mountp, &nmreq, + current_thread(), cred, 0, cp, &nmrep, &status); nfsm_chain_cleanup(&nmreq); nfsm_chain_cleanup(&nmrep); kauth_cred_unref(&cred); @@ -1564,6 +1680,8 @@ nfs_gss_clnt_ctx_delay(struct nfsreq *req, int *retry) struct timeval now; time_t waituntil; + if (!nmp) + return (ENXIO); if ((nmp->nm_flag & NFSMNT_SOFT) && *retry > nmp->nm_retry) return (ETIMEDOUT); if (timeo > 60) @@ -1573,7 +1691,7 @@ nfs_gss_clnt_ctx_delay(struct nfsreq *req, int *retry) waituntil = now.tv_sec + timeo; while (now.tv_sec < waituntil) { tsleep(&lbolt, PSOCK, "nfs_gss_clnt_ctx_delay", 0); - error = nfs_sigintr(nmp, req, current_thread(), 0); + error = nfs_sigintr(req->r_nmp, req, current_thread(), 0); if (error) break; microuptime(&now); @@ -1602,13 +1720,38 @@ nfs_gss_svc_ctx_find(uint32_t handle) { struct nfs_gss_svc_ctx_hashhead *head; struct nfs_gss_svc_ctx *cp; - + uint64_t timenow; + + if (handle == 0) + return (NULL); + head = &nfs_gss_svc_ctx_hashtbl[SVC_CTX_HASH(handle)]; + /* + * Don't return a context that is going to expire in GSS_CTX_PEND seconds + */ + clock_interval_to_deadline(GSS_CTX_PEND, NSEC_PER_SEC, &timenow); lck_mtx_lock(nfs_gss_svc_ctx_mutex); + LIST_FOREACH(cp, head, gss_svc_entries) - if (cp->gss_svc_handle == handle) + if (cp->gss_svc_handle == handle) { + if (timenow > cp->gss_svc_incarnation + GSS_SVC_CTX_TTL) { + /* + * Context has or is about to expire. Don't use. + * We'll return null and the client will have to create + * a new context. + */ + cp->gss_svc_handle = 0; + /* + * Make sure though that we stay around for GSS_CTC_PEND seconds + * for other threads that might be using the context. + */ + cp->gss_svc_incarnation = timenow; + cp = NULL; + } break; + } + lck_mtx_unlock(nfs_gss_svc_ctx_mutex); return (cp); @@ -1631,9 +1774,11 @@ nfs_gss_svc_ctx_insert(struct nfs_gss_svc_ctx *cp) if (!nfs_gss_timer_on) { nfs_gss_timer_on = 1; + nfs_interval_timer_start(nfs_gss_svc_ctx_timer_call, - GSS_TIMER_PERIOD * MSECS_PER_SEC); + min(GSS_TIMER_PERIOD, max(GSS_CTX_TTL_MIN, GSS_SVC_CTX_TTL)) * MSECS_PER_SEC); } + lck_mtx_unlock(nfs_gss_svc_ctx_mutex); } @@ -1667,7 +1812,8 @@ nfs_gss_svc_ctx_timer(__unused void *param1, __unused void *param2) for (cp = LIST_FIRST(head); cp; cp = next) { contexts++; next = LIST_NEXT(cp, gss_svc_entries); - if (timenow > cp->gss_svc_expiretime) { + if (timenow > cp->gss_svc_incarnation + + (cp->gss_svc_handle ? GSS_SVC_CTX_TTL : 0)) { /* * A stale context - remove it */ @@ -1690,7 +1836,7 @@ nfs_gss_svc_ctx_timer(__unused void *param1, __unused void *param2) nfs_gss_timer_on = nfs_gss_ctx_count > 0; if (nfs_gss_timer_on) nfs_interval_timer_start(nfs_gss_svc_ctx_timer_call, - GSS_TIMER_PERIOD * MSECS_PER_SEC); + min(GSS_TIMER_PERIOD, max(GSS_CTX_TTL_MIN, GSS_SVC_CTX_TTL)) * MSECS_PER_SEC); lck_mtx_unlock(nfs_gss_svc_ctx_mutex); } @@ -1714,10 +1860,11 @@ nfs_gss_svc_cred_get(struct nfsrv_descript *nd, struct nfsm_chain *nmc) uint32_t flavor = 0, verflen = 0; int error = 0; uint32_t arglen, start, toklen, cksumlen; - u_char tokbuf[KRB5_SZ_TOKMAX]; - u_char cksum1[8], cksum2[8]; + u_char tokbuf[KRB5_SZ_TOKMAX(MAX_DIGEST)]; + u_char cksum1[MAX_DIGEST], cksum2[MAX_DIGEST]; struct nfsm_chain nmc_tmp; - + gss_key_info *ki; + vers = proc = seqnum = service = handle_len = 0; arglen = cksumlen = 0; @@ -1794,6 +1941,7 @@ nfs_gss_svc_cred_get(struct nfsrv_descript *nd, struct nfsm_chain *nmc) } cp->gss_svc_proc = proc; + ki = &cp->gss_svc_kinfo; if (proc == RPCSEC_GSS_DATA || proc == RPCSEC_GSS_DESTROY) { struct ucred temp_cred; @@ -1815,7 +1963,7 @@ nfs_gss_svc_cred_get(struct nfsrv_descript *nd, struct nfsm_chain *nmc) } /* Now compute the client's call header checksum */ - nfs_gss_cksum_chain(cp->gss_svc_sched, nmc, krb5_mic, 0, 0, cksum1); + nfs_gss_cksum_chain(ki, nmc, ALG_MIC(ki), 0, 0, cksum1); /* * Validate the verifier. @@ -1827,19 +1975,19 @@ nfs_gss_svc_cred_get(struct nfsrv_descript *nd, struct nfsm_chain *nmc) */ nfsm_chain_get_32(error, nmc, flavor); nfsm_chain_get_32(error, nmc, verflen); - if (flavor != RPCSEC_GSS || verflen != KRB5_SZ_TOKEN) + if (flavor != RPCSEC_GSS || verflen != KRB5_SZ_TOKEN(ki->hash_len)) error = NFSERR_AUTHERR | AUTH_BADVERF; nfsm_chain_get_opaque(error, nmc, verflen, tokbuf); if (error) goto nfsmout; /* Get the checksum from the token inside the verifier */ - error = nfs_gss_token_get(cp->gss_svc_sched, krb5_mic, tokbuf, 1, + error = nfs_gss_token_get(ki, ALG_MIC(ki), tokbuf, 1, NULL, cksum2); if (error) goto nfsmout; - if (bcmp(cksum1, cksum2, 8) != 0) { + if (bcmp(cksum1, cksum2, HASHLEN(ki)) != 0) { error = NFSERR_AUTHERR | RPCSEC_GSS_CTXPROBLEM; goto nfsmout; } @@ -1860,8 +2008,7 @@ nfs_gss_svc_cred_get(struct nfsrv_descript *nd, struct nfsm_chain *nmc) error = ENOMEM; goto nfsmout; } - clock_interval_to_deadline(GSS_CTX_EXPIRE, NSEC_PER_SEC, - &cp->gss_svc_expiretime); + clock_get_uptime(&cp->gss_svc_incarnation); /* * If the call arguments are integrity or privacy protected @@ -1889,7 +2036,7 @@ nfs_gss_svc_cred_get(struct nfsrv_descript *nd, struct nfsm_chain *nmc) /* Compute the checksum over the call args */ start = nfsm_chain_offset(nmc); - nfs_gss_cksum_chain(cp->gss_svc_sched, nmc, krb5_mic, start, arglen, cksum1); + nfs_gss_cksum_chain(ki, nmc, ALG_MIC(ki), start, arglen, cksum1); /* * Get the sequence number prepended to the args @@ -1910,18 +2057,18 @@ nfs_gss_svc_cred_get(struct nfsrv_descript *nd, struct nfsm_chain *nmc) arglen -= NFSX_UNSIGNED; // skipped seqnum nfsm_chain_adv(error, &nmc_tmp, arglen); // skip args nfsm_chain_get_32(error, &nmc_tmp, cksumlen); // length of checksum - if (cksumlen != KRB5_SZ_TOKEN) { + if (cksumlen != KRB5_SZ_TOKEN(ki->hash_len)) { error = EBADRPC; goto nfsmout; } nfsm_chain_get_opaque(error, &nmc_tmp, cksumlen, tokbuf); if (error) goto nfsmout; - error = nfs_gss_token_get(cp->gss_svc_sched, krb5_mic, tokbuf, 1, + error = nfs_gss_token_get(ki, ALG_MIC(ki), tokbuf, 1, NULL, cksum2); /* Verify that the checksums are the same */ - if (error || bcmp(cksum1, cksum2, 8) != 0) { + if (error || bcmp(cksum1, cksum2, HASHLEN(ki)) != 0) { error = EBADRPC; goto nfsmout; } @@ -1943,11 +2090,11 @@ nfs_gss_svc_cred_get(struct nfsrv_descript *nd, struct nfsm_chain *nmc) } /* Get the token that prepends the encrypted args */ - nfsm_chain_get_opaque(error, nmc, KRB5_SZ_TOKMAX, tokbuf); + nfsm_chain_get_opaque(error, nmc, KRB5_SZ_TOKMAX(ki->hash_len), tokbuf); if (error) goto nfsmout; - error = nfs_gss_token_get(cp->gss_svc_sched, krb5_wrap, tokbuf, 1, - &toklen, cksum1); + error = nfs_gss_token_get(ki, ALG_WRAP(ki), tokbuf, 1, + &toklen, cksum1); if (error) goto nfsmout; nfsm_chain_reverse(nmc, nfsm_pad(toklen)); @@ -1955,13 +2102,13 @@ nfs_gss_svc_cred_get(struct nfsrv_descript *nd, struct nfsm_chain *nmc) /* decrypt the 8 byte confounder + seqnum + args */ start = nfsm_chain_offset(nmc); arglen -= toklen; - nfs_gss_encrypt_chain(cp->gss_svc_skey, nmc, start, arglen, DES_DECRYPT); + nfs_gss_encrypt_chain(ki, nmc, start, arglen, DES_DECRYPT); /* Compute a checksum over the sequence number + results */ - nfs_gss_cksum_chain(cp->gss_svc_sched, nmc, krb5_wrap, start, arglen, cksum2); + nfs_gss_cksum_chain(ki, nmc, ALG_WRAP(ki), start, arglen, cksum2); /* Verify that the checksums are the same */ - if (bcmp(cksum1, cksum2, 8) != 0) { + if (bcmp(cksum1, cksum2, HASHLEN(ki)) != 0) { error = EBADRPC; goto nfsmout; } @@ -2008,12 +2155,14 @@ nfs_gss_svc_verf_put(struct nfsrv_descript *nd, struct nfsm_chain *nmc) { struct nfs_gss_svc_ctx *cp; int error = 0; - u_char tokbuf[KRB5_SZ_TOKEN]; + u_char tokbuf[KRB5_SZ_TOKEN(MAX_DIGEST)]; int toklen; - u_char cksum[8]; + u_char cksum[MAX_DIGEST]; + gss_key_info *ki; cp = nd->nd_gss_context; - + ki = &cp->gss_svc_kinfo; + if (cp->gss_svc_major != GSS_S_COMPLETE) { /* * If the context isn't yet complete @@ -2032,14 +2181,14 @@ nfs_gss_svc_verf_put(struct nfsrv_descript *nd, struct nfsm_chain *nmc) */ if (cp->gss_svc_proc == RPCSEC_GSS_INIT || cp->gss_svc_proc == RPCSEC_GSS_CONTINUE_INIT) - nfs_gss_cksum_rep(cp->gss_svc_sched, cp->gss_svc_seqwin, cksum); + nfs_gss_cksum_rep(ki, cp->gss_svc_seqwin, cksum); else - nfs_gss_cksum_rep(cp->gss_svc_sched, nd->nd_gss_seqnum, cksum); + nfs_gss_cksum_rep(ki, nd->nd_gss_seqnum, cksum); /* * Now wrap it in a token and add * the verifier to the reply. */ - toklen = nfs_gss_token_put(cp->gss_svc_sched, krb5_mic, tokbuf, 0, 0, cksum); + toklen = nfs_gss_token_put(ki, ALG_MIC(ki), tokbuf, 0, 0, cksum); nfsm_chain_add_32(error, nmc, RPCSEC_GSS); nfsm_chain_add_32(error, nmc, toklen); nfsm_chain_add_opaque(error, nmc, tokbuf, toklen); @@ -2097,10 +2246,11 @@ nfs_gss_svc_protect_reply(struct nfsrv_descript *nd, mbuf_t mrep) struct nfsm_chain nmrep_pre, *nmc_pre = &nmrep_pre; mbuf_t mb, results; uint32_t reslen; - u_char tokbuf[KRB5_SZ_TOKMAX]; + u_char tokbuf[KRB5_SZ_TOKMAX(MAX_DIGEST)]; int pad, toklen; - u_char cksum[8]; + u_char cksum[MAX_DIGEST]; int error = 0; + gss_key_info *ki = &cp->gss_svc_kinfo; /* * Using a reference to the mbuf where we previously split the reply @@ -2127,10 +2277,10 @@ nfs_gss_svc_protect_reply(struct nfsrv_descript *nd, mbuf_t mrep) nfs_gss_append_chain(nmc_pre, results); // Append the results mbufs /* Now compute the checksum over the results data */ - nfs_gss_cksum_mchain(cp->gss_svc_sched, results, krb5_mic, 0, reslen, cksum); + nfs_gss_cksum_mchain(ki, results, ALG_MIC(ki), 0, reslen, cksum); /* Put it into a token and append to the request */ - toklen = nfs_gss_token_put(cp->gss_svc_sched, krb5_mic, tokbuf, 0, 0, cksum); + toklen = nfs_gss_token_put(ki, ALG_MIC(ki), tokbuf, 0, 0, cksum); nfsm_chain_add_32(error, nmc_res, toklen); nfsm_chain_add_opaque(error, nmc_res, tokbuf, toklen); nfsm_chain_build_done(error, nmc_res); @@ -2152,10 +2302,10 @@ nfs_gss_svc_protect_reply(struct nfsrv_descript *nd, mbuf_t mrep) nfsm_chain_build_done(error, nmc_res); /* Now compute the checksum over the results data */ - nfs_gss_cksum_mchain(cp->gss_svc_sched, results, krb5_wrap, 0, reslen, cksum); + nfs_gss_cksum_mchain(ki, results, ALG_WRAP(ki), 0, reslen, cksum); /* Put it into a token and insert in the reply */ - toklen = nfs_gss_token_put(cp->gss_svc_sched, krb5_wrap, tokbuf, 0, reslen, cksum); + toklen = nfs_gss_token_put(ki, ALG_WRAP(ki), tokbuf, 0, reslen, cksum); nfsm_chain_add_32(error, nmc_pre, toklen + reslen); nfsm_chain_add_opaque_nopad(error, nmc_pre, tokbuf, toklen); nfsm_chain_build_done(error, nmc_pre); @@ -2164,7 +2314,7 @@ nfs_gss_svc_protect_reply(struct nfsrv_descript *nd, mbuf_t mrep) nfs_gss_append_chain(nmc_pre, results); // Append the results mbufs /* Encrypt the confounder + seqnum + results */ - nfs_gss_encrypt_mchain(cp->gss_svc_skey, results, 0, reslen, DES_ENCRYPT); + nfs_gss_encrypt_mchain(ki, results, 0, reslen, DES_ENCRYPT); /* Add null XDR pad if the ASN.1 token misaligned the data */ pad = nfsm_pad(toklen + reslen); @@ -2217,7 +2367,7 @@ nfs_gss_svc_ctx_init(struct nfsrv_descript *nd, struct nfsrv_sock *slp, mbuf_t * cp->gss_svc_handle = handle; cp->gss_svc_mtx = lck_mtx_alloc_init(nfs_gss_svc_grp, LCK_ATTR_NULL); clock_interval_to_deadline(GSS_CTX_PEND, NSEC_PER_SEC, - &cp->gss_svc_expiretime); + &cp->gss_svc_incarnation); nfs_gss_svc_ctx_insert(cp); @@ -2241,7 +2391,7 @@ nfs_gss_svc_ctx_init(struct nfsrv_descript *nd, struct nfsrv_sock *slp, mbuf_t * error = nfs_gss_svc_gssd_upcall(cp); if (error) { autherr = RPCSEC_GSS_CREDPROBLEM; - if (error == EAUTH) + if (error == NFSERR_EAUTH) error = 0; break; } @@ -2257,8 +2407,8 @@ nfs_gss_svc_ctx_init(struct nfsrv_descript *nd, struct nfsrv_sock *slp, mbuf_t * * Now the server context is complete. * Finish setup. */ - clock_interval_to_deadline(GSS_CTX_EXPIRE, NSEC_PER_SEC, - &cp->gss_svc_expiretime); + clock_get_uptime(&cp->gss_svc_incarnation); + cp->gss_svc_seqwin = GSS_SVC_SEQWINDOW; MALLOC(cp->gss_svc_seqbits, uint32_t *, nfsm_rndup((cp->gss_svc_seqwin + 7) / 8), M_TEMP, M_WAITOK|M_ZERO); @@ -2266,16 +2416,6 @@ nfs_gss_svc_ctx_init(struct nfsrv_descript *nd, struct nfsrv_sock *slp, mbuf_t * autherr = RPCSEC_GSS_CREDPROBLEM; break; } - - /* - * Generate a key schedule from our shiny new DES key - */ - error = des_key_sched((des_cblock *) cp->gss_svc_skey, cp->gss_svc_sched); - if (error) { - autherr = RPCSEC_GSS_CREDPROBLEM; - error = 0; - break; - } break; case RPCSEC_GSS_DATA: @@ -2294,7 +2434,7 @@ nfs_gss_svc_ctx_init(struct nfsrv_descript *nd, struct nfsrv_sock *slp, mbuf_t * cp->gss_svc_handle = 0; // so it can't be found lck_mtx_lock(cp->gss_svc_mtx); clock_interval_to_deadline(GSS_CTX_PEND, NSEC_PER_SEC, - &cp->gss_svc_expiretime); + &cp->gss_svc_incarnation); lck_mtx_unlock(cp->gss_svc_mtx); } break; @@ -2323,8 +2463,8 @@ nfs_gss_svc_ctx_init(struct nfsrv_descript *nd, struct nfsrv_sock *slp, mbuf_t * nfsm_chain_add_32(error, &nmrep, cp->gss_svc_seqwin); nfsm_chain_add_32(error, &nmrep, cp->gss_svc_tokenlen); - nfsm_chain_add_opaque(error, &nmrep, cp->gss_svc_token, cp->gss_svc_tokenlen); if (cp->gss_svc_token != NULL) { + nfsm_chain_add_opaque(error, &nmrep, cp->gss_svc_token, cp->gss_svc_tokenlen); FREE(cp->gss_svc_token, M_TEMP); cp->gss_svc_token = NULL; } @@ -2332,6 +2472,7 @@ nfs_gss_svc_ctx_init(struct nfsrv_descript *nd, struct nfsrv_sock *slp, mbuf_t * nfsmout: if (autherr != 0) { + nd->nd_gss_context = NULL; LIST_REMOVE(cp, gss_svc_entries); if (cp->gss_svc_seqbits != NULL) FREE(cp->gss_svc_seqbits, M_TEMP); @@ -2363,19 +2504,21 @@ nfs_gss_svc_gssd_upcall(struct nfs_gss_svc_ctx *cp) int retry_cnt = 0; byte_buffer okey = NULL; uint32_t skeylen = 0; + uint32_t ret_flags; vm_map_copy_t itoken = NULL; byte_buffer otoken = NULL; + mach_msg_type_number_t otokenlen; int error = 0; char svcname[] = "nfs"; kr = task_get_gssd_port(get_threadtask(current_thread()), &mp); if (kr != KERN_SUCCESS) { - printf("nfs_gss_svc_gssd_upcall: can't get gssd port, status 0x%08x\n", kr); - return (EAUTH); + printf("nfs_gss_svc_gssd_upcall: can't get gssd port, status %x (%d)\n", kr, kr); + goto out; } if (!IPC_PORT_VALID(mp)) { printf("nfs_gss_svc_gssd_upcall: gssd port not valid\n"); - return (EAUTH); + goto out; } if (cp->gss_svc_tokenlen > 0) @@ -2387,48 +2530,80 @@ nfs_gss_svc_gssd_upcall(struct nfs_gss_svc_ctx *cp) (byte_buffer) itoken, (mach_msg_type_number_t) cp->gss_svc_tokenlen, svcname, 0, - &cp->gss_svc_gssd_verf, &cp->gss_svc_context, &cp->gss_svc_cred_handle, + &ret_flags, &cp->gss_svc_uid, cp->gss_svc_gids, &cp->gss_svc_ngroups, &okey, (mach_msg_type_number_t *) &skeylen, - &otoken, (mach_msg_type_number_t *) &cp->gss_svc_tokenlen, + &otoken, &otokenlen, &cp->gss_svc_major, &cp->gss_svc_minor); if (kr != KERN_SUCCESS) { - printf("nfs_gss_svc_gssd_upcall failed: %d\n", kr); + printf("nfs_gss_svc_gssd_upcall failed: %x (%d)\n", kr, kr); if (kr == MIG_SERVER_DIED && cp->gss_svc_context == 0 && - retry_cnt++ < NFS_GSS_MACH_MAX_RETRIES) + retry_cnt++ < NFS_GSS_MACH_MAX_RETRIES) { + if (cp->gss_svc_tokenlen > 0) + nfs_gss_mach_alloc_buffer(cp->gss_svc_token, cp->gss_svc_tokenlen, &itoken); goto retry; + } task_release_special_port(mp); - return (EAUTH); + goto out; } task_release_special_port(mp); + if (skeylen > 0) { - if (skeylen != SKEYLEN) { + if (skeylen != SKEYLEN && skeylen != SKEYLEN3) { printf("nfs_gss_svc_gssd_upcall: bad key length (%d)\n", skeylen); - return (EAUTH); + vm_map_copy_discard((vm_map_copy_t) okey); + vm_map_copy_discard((vm_map_copy_t) otoken); + goto out; } - error = nfs_gss_mach_vmcopyout((vm_map_copy_t) okey, skeylen, cp->gss_svc_skey); + error = nfs_gss_mach_vmcopyout((vm_map_copy_t) okey, skeylen, cp->gss_svc_kinfo.skey); + if (error) { + vm_map_copy_discard((vm_map_copy_t) otoken); + goto out; + } + error = gss_key_init(&cp->gss_svc_kinfo, skeylen); if (error) - return (EAUTH); + goto out; + } - if (cp->gss_svc_tokenlen > 0) { - MALLOC(cp->gss_svc_token, u_char *, cp->gss_svc_tokenlen, M_TEMP, M_WAITOK); - if (cp->gss_svc_token == NULL) + /* Free context token used as input */ + if (cp->gss_svc_token) + FREE(cp->gss_svc_token, M_TEMP); + cp->gss_svc_token = NULL; + cp->gss_svc_tokenlen = 0; + + if (otokenlen > 0) { + /* Set context token to gss output token */ + MALLOC(cp->gss_svc_token, u_char *, otokenlen, M_TEMP, M_WAITOK); + if (cp->gss_svc_token == NULL) { + printf("nfs_gss_svc_gssd_upcall: could not allocate %d bytes\n", otokenlen); + vm_map_copy_discard((vm_map_copy_t) otoken); return (ENOMEM); - error = nfs_gss_mach_vmcopyout((vm_map_copy_t) otoken, cp->gss_svc_tokenlen, - cp->gss_svc_token); - if (error) - return (EAUTH); + } + error = nfs_gss_mach_vmcopyout((vm_map_copy_t) otoken, otokenlen, cp->gss_svc_token); + if (error) { + FREE(cp->gss_svc_token, M_TEMP); + cp->gss_svc_token = NULL; + return (NFSERR_EAUTH); + } + cp->gss_svc_tokenlen = otokenlen; } - return (kr); + return (0); + +out: + FREE(cp->gss_svc_token, M_TEMP); + cp->gss_svc_tokenlen = 0; + cp->gss_svc_token = NULL; + + return (NFSERR_EAUTH); } /* @@ -2580,9 +2755,15 @@ nfs_gss_mach_alloc_buffer(u_char *buf, uint32_t buflen, vm_map_copy_t *addr) kr = vm_map_wire(ipc_kernel_map, vm_map_trunc_page(kmem_buf), vm_map_round_page(kmem_buf + tbuflen), VM_PROT_READ|VM_PROT_WRITE, FALSE); - + if (kr != 0) { + printf("nfs_gss_mach_alloc_buffer: vm_map_wire failed\n"); + return; + } + bcopy(buf, (void *) kmem_buf, buflen); - + // Shouldn't need to bzero below since vm_allocate returns zeroed pages + // bzero(kmem_buf + buflen, tbuflen - buflen); + kr = vm_map_unwire(ipc_kernel_map, vm_map_trunc_page(kmem_buf), vm_map_round_page(kmem_buf + tbuflen), FALSE); if (kr != 0) { @@ -2596,9 +2777,6 @@ nfs_gss_mach_alloc_buffer(u_char *buf, uint32_t buflen, vm_map_copy_t *addr) printf("nfs_gss_mach_alloc_buffer: vm_map_copyin failed\n"); return; } - - if (buflen != tbuflen) - kmem_free(ipc_kernel_map, kmem_buf + buflen, tbuflen - buflen); } /* @@ -2632,7 +2810,7 @@ nfs_gss_mach_vmcopyout(vm_map_copy_t in, uint32_t len, u_char *out) */ static int nfs_gss_token_put( - des_key_schedule sched, + gss_key_info *ki, u_char *alg, u_char *p, int initiator, @@ -2651,7 +2829,7 @@ nfs_gss_token_put( * MIC token, or 35 + encrypted octets for a wrap token; */ *p++ = 0x060; - toklen = KRB5_SZ_MECH + KRB5_SZ_ALG + KRB5_SZ_SEQ + KRB5_SZ_CKSUM; + toklen = KRB5_SZ_MECH + KRB5_SZ_ALG + KRB5_SZ_SEQ + HASHLEN(ki); nfs_gss_der_length_put(&p, toklen + datalen); /* @@ -2693,18 +2871,18 @@ nfs_gss_token_put( plain[i] = (u_char) ((seqnum >> (i * 8)) & 0xff); for (i = 4; i < 8; i++) plain[i] = initiator ? 0x00 : 0xff; - des_cbc_encrypt((des_cblock *) plain, (des_cblock *) p, 8, - sched, (des_cblock *) cksum, NULL, DES_ENCRYPT); + gss_des_crypt(ki, (des_cblock *) plain, (des_cblock *) p, 8, + (des_cblock *) cksum, NULL, DES_ENCRYPT, KG_USAGE_SEQ); p += 8; /* - * Finally, append 8 octets of DES MAC MD5 + * Finally, append the octets of the * checksum of the alg + plaintext data. * The plaintext could be an RPC call header, * the window value, or a sequence number. */ - bcopy(cksum, p, 8); - p += 8; + bcopy(cksum, p, HASHLEN(ki)); + p += HASHLEN(ki); return (p - psave); } @@ -2771,7 +2949,7 @@ nfs_gss_der_length_get(u_char **pp) */ static int nfs_gss_token_get( - des_key_schedule sched, + gss_key_info *ki, u_char *alg, u_char *p, int initiator, @@ -2806,15 +2984,15 @@ nfs_gss_token_get( /* * Now decrypt the sequence number. - * Note that the DES CBC decryption uses the first 8 octets + * Note that the gss decryption uses the first 8 octets * of the checksum field as an initialization vector (p + 8). * Per RFC 2203 section 5.2.2 we don't check the sequence number * in the ASN.1 token because the RPCSEC_GSS protocol has its * own sequence number described in section 5.3.3.1 */ seqnum = 0; - des_cbc_encrypt((des_cblock *) p, (des_cblock *) plain, 8, - sched, (des_cblock *) (p + 8), NULL, DES_DECRYPT); + gss_des_crypt(ki, (des_cblock *)p, (des_cblock *) plain, 8, + (des_cblock *) (p + 8), NULL, DES_DECRYPT, KG_USAGE_SEQ); p += 8; for (i = 0; i < 4; i++) seqnum |= plain[i] << (i * 8); @@ -2831,8 +3009,8 @@ nfs_gss_token_get( /* * Finally, get the checksum */ - bcopy(p, cksum, 8); - p += 8; + bcopy(p, cksum, HASHLEN(ki)); + p += HASHLEN(ki); if (len != NULL) *len = p - psave; @@ -2911,26 +3089,25 @@ nfs_gss_nfsm_chain(struct nfsm_chain *nmc, mbuf_t mc) */ static void nfs_gss_cksum_mchain( - des_key_schedule sched, + gss_key_info *ki, mbuf_t mhead, u_char *alg, int offset, int len, - u_char *cksum) + u_char *digest) { mbuf_t mb; u_char *ptr; int left, bytes; - MD5_CTX context; - u_char digest[16]; + GSS_DIGEST_CTX context; - MD5Init(&context); + gss_digest_Init(&context, ki); /* * Logically prepend the first 8 bytes of the algorithm * field as required by RFC 1964, section 1.2.1.1 */ - MD5Update(&context, alg, KRB5_SZ_ALG); + gss_digest_Update(&context, alg, KRB5_SZ_ALG); /* * Move down the mbuf chain until we reach the given @@ -2953,17 +3130,11 @@ nfs_gss_cksum_mchain( bytes = left < len ? left : len; if (bytes > 0) - MD5Update(&context, ptr, bytes); + gss_digest_Update(&context, ptr, bytes); len -= bytes; } - MD5Final(digest, &context); - - /* - * Now get the DES CBC checksum for the digest. - */ - (void) des_cbc_cksum((des_cblock *) digest, (des_cblock *) cksum, - sizeof(digest), sched, (des_cblock *) iv0); + gss_digest_Final(&context, digest); } /* @@ -2975,7 +3146,7 @@ nfs_gss_cksum_mchain( */ static void nfs_gss_cksum_chain( - des_key_schedule sched, + gss_key_info *ki, struct nfsm_chain *nmc, u_char *alg, int offset, @@ -2990,7 +3161,7 @@ nfs_gss_cksum_chain( if (len == 0) len = nfsm_chain_offset(nmc) - offset; - return (nfs_gss_cksum_mchain(sched, nmc->nmc_mhead, alg, offset, len, cksum)); + return (nfs_gss_cksum_mchain(ki, nmc->nmc_mhead, alg, offset, len, cksum)); } /* @@ -2998,31 +3169,24 @@ nfs_gss_cksum_chain( * of an RPCSEC_GSS reply. */ static void -nfs_gss_cksum_rep(des_key_schedule sched, uint32_t seqnum, u_char *cksum) +nfs_gss_cksum_rep(gss_key_info *ki, uint32_t seqnum, u_char *cksum) { - MD5_CTX context; - u_char digest[16]; + GSS_DIGEST_CTX context; uint32_t val = htonl(seqnum); - MD5Init(&context); + gss_digest_Init(&context, ki); /* * Logically prepend the first 8 bytes of the MIC * token as required by RFC 1964, section 1.2.1.1 */ - MD5Update(&context, krb5_mic, KRB5_SZ_ALG); + gss_digest_Update(&context, ALG_MIC(ki), KRB5_SZ_ALG); /* * Compute the digest of the seqnum in network order */ - MD5Update(&context, (u_char *) &val, 4); - MD5Final(digest, &context); - - /* - * Now get the DES CBC checksum for the digest. - */ - (void) des_cbc_cksum((des_cblock *) digest, (des_cblock *) cksum, - sizeof(digest), sched, (des_cblock *) iv0); + gss_digest_Update(&context, &val, 4); + gss_digest_Final(&context, cksum); } /* @@ -3030,26 +3194,19 @@ nfs_gss_cksum_rep(des_key_schedule sched, uint32_t seqnum, u_char *cksum) */ static void nfs_gss_encrypt_mchain( - u_char *key, + gss_key_info *ki, mbuf_t mhead, int offset, int len, int encrypt) { - des_key_schedule sched; mbuf_t mb, mbn; u_char *ptr, *nptr; u_char tmp[8], ivec[8]; - int i, left, left8, remain; + int left, left8, remain; - /* - * Make the key schedule per RFC 1964 section 1.2.2.3 - */ - for (i = 0; i < 8; i++) - tmp[i] = key[i] ^ 0xf0; - bzero(ivec, 8); - (void) des_key_sched((des_cblock *) tmp, sched); + bzero(ivec, 8); /* * Move down the mbuf chain until we reach the given @@ -3072,7 +3229,7 @@ nfs_gss_encrypt_mchain( offset = 0; /* - * DES CBC has to encrypt 8 bytes at a time. + * DES or DES3 CBC has to encrypt 8 bytes at a time. * If the number of bytes to be encrypted in this * mbuf isn't some multiple of 8 bytes, encrypt all * the 8 byte blocks, then combine the remaining @@ -3084,8 +3241,8 @@ nfs_gss_encrypt_mchain( left8 = left - remain; left = left8 < len ? left8 : len; if (left > 0) { - des_cbc_encrypt((des_cblock *) ptr, (des_cblock *) ptr, left, sched, - (des_cblock *) ivec, (des_cblock *) ivec, encrypt); + gss_des_crypt(ki, (des_cblock *) ptr, (des_cblock *) ptr, + left, &ivec, &ivec, encrypt, KG_USAGE_SEAL); len -= left; } @@ -3094,8 +3251,8 @@ nfs_gss_encrypt_mchain( offset = 8 - remain; bcopy(ptr + left, tmp, remain); // grab from this mbuf bcopy(nptr, tmp + remain, offset); // grab from next mbuf - des_cbc_encrypt((des_cblock *) tmp, (des_cblock *) tmp, 8, sched, - (des_cblock *) ivec, (des_cblock *) ivec, encrypt); + gss_des_crypt(ki, (des_cblock *) tmp, (des_cblock *) tmp, 8, + &ivec, &ivec, encrypt, KG_USAGE_SEAL); bcopy(tmp, ptr + left, remain); // return to this mbuf bcopy(tmp + remain, nptr, offset); // return to next mbuf len -= 8; @@ -3108,7 +3265,7 @@ nfs_gss_encrypt_mchain( */ static void nfs_gss_encrypt_chain( - u_char *key, + gss_key_info *ki, struct nfsm_chain *nmc, int offset, int len, @@ -3122,134 +3279,127 @@ nfs_gss_encrypt_chain( if (len == 0) len = nfsm_chain_offset(nmc) - offset; - return (nfs_gss_encrypt_mchain(key, nmc->nmc_mhead, offset, len, encrypt)); + return (nfs_gss_encrypt_mchain(ki, nmc->nmc_mhead, offset, len, encrypt)); } /* - * XXX This function borrowed from OpenBSD. - * It will likely be moved into kernel crypto. + * The routines that follow provide abstractions for doing digests and crypto. */ -static DES_LONG -des_cbc_cksum(input, output, length, schedule, ivec) - des_cblock (*input); - des_cblock (*output); - long length; - des_key_schedule schedule; - des_cblock (*ivec); + +static void +gss_digest_Init(GSS_DIGEST_CTX *ctx, gss_key_info *ki) { - register unsigned long tout0,tout1,tin0,tin1; - register long l=length; - unsigned long tin[2]; - unsigned char *in,*out,*iv; - - in=(unsigned char *)input; - out=(unsigned char *)output; - iv=(unsigned char *)ivec; - - c2l(iv,tout0); - c2l(iv,tout1); - for (; l>0; l-=8) { - if (l >= 8) { - c2l(in,tin0); - c2l(in,tin1); - } else - c2ln(in,tin0,tin1,l); - - tin0^=tout0; tin[0]=tin0; - tin1^=tout1; tin[1]=tin1; - des_encrypt1((DES_LONG *)tin,schedule,DES_ENCRYPT); - /* fix 15/10/91 eay - thanks to keithr@sco.COM */ - tout0=tin[0]; - tout1=tin[1]; - } - if (out != NULL) { - l2c(tout0,out); - l2c(tout1,out); - } - tout0=tin0=tin1=tin[0]=tin[1]=0; - return(tout1); + ctx->type = ki->type; + switch (ki->type) { + case NFS_GSS_1DES: MD5_DESCBC_Init(&ctx->m_ctx, &ki->ks_u.des.gss_sched); + break; + case NFS_GSS_3DES: HMAC_SHA1_DES3KD_Init(&ctx->h_ctx, ki->ks_u.des3.ckey, 0); + break; + default: + printf("gss_digest_Init: Unknown key info type %d\n", ki->type); + } } -/* - * XXX This function borrowed from OpenBSD. - * It will likely be moved into kernel crypto. - */ static void -des_cbc_encrypt(input, output, length, schedule, ivec, retvec, encrypt) - des_cblock (*input); - des_cblock (*output); - long length; - des_key_schedule schedule; - des_cblock (*ivec); - des_cblock (*retvec); - int encrypt; +gss_digest_Update(GSS_DIGEST_CTX *ctx, void *data, size_t len) { - register unsigned long tin0,tin1; - register unsigned long tout0,tout1,xor0,xor1; - register unsigned char *in,*out,*retval; - register long l=length; - unsigned long tin[2]; - unsigned char *iv; - tin0 = tin1 = 0; - - in=(unsigned char *)input; - out=(unsigned char *)output; - retval=(unsigned char *)retvec; - iv=(unsigned char *)ivec; - - if (encrypt) { - c2l(iv,tout0); - c2l(iv,tout1); - for (l-=8; l>=0; l-=8) { - c2l(in,tin0); - c2l(in,tin1); - tin0^=tout0; tin[0]=tin0; - tin1^=tout1; tin[1]=tin1; - des_encrypt1((DES_LONG *)tin,schedule,DES_ENCRYPT); - tout0=tin[0]; l2c(tout0,out); - tout1=tin[1]; l2c(tout1,out); - } - if (l != -8) { - c2ln(in,tin0,tin1,l+8); - tin0^=tout0; tin[0]=tin0; - tin1^=tout1; tin[1]=tin1; - des_encrypt1((DES_LONG *)tin,schedule,DES_ENCRYPT); - tout0=tin[0]; l2c(tout0,out); - tout1=tin[1]; l2c(tout1,out); - } - if (retval) { - l2c(tout0,retval); - l2c(tout1,retval); - } - } else { - c2l(iv,xor0); - c2l(iv,xor1); - for (l-=8; l>=0; l-=8) { - c2l(in,tin0); tin[0]=tin0; - c2l(in,tin1); tin[1]=tin1; - des_encrypt1((DES_LONG *)tin,schedule,DES_DECRYPT); - tout0=tin[0]^xor0; - tout1=tin[1]^xor1; - l2c(tout0,out); - l2c(tout1,out); - xor0=tin0; - xor1=tin1; - } - if (l != -8) { - c2l(in,tin0); tin[0]=tin0; - c2l(in,tin1); tin[1]=tin1; - des_encrypt1((DES_LONG *)tin,schedule,DES_DECRYPT); - tout0=tin[0]^xor0; - tout1=tin[1]^xor1; - l2cn(tout0,tout1,out,l+8); - /* xor0=tin0; - xor1=tin1; */ - } - if (retval) { - l2c(tin0,retval); - l2c(tin1,retval); - } + switch (ctx->type) { + case NFS_GSS_1DES: MD5_DESCBC_Update(&ctx->m_ctx, data, len); + break; + case NFS_GSS_3DES: HMAC_SHA1_DES3KD_Update(&ctx->h_ctx, data, len); + break; + } +} + +static void +gss_digest_Final(GSS_DIGEST_CTX *ctx, void *digest) +{ + switch (ctx->type) { + case NFS_GSS_1DES: MD5_DESCBC_Final(digest, &ctx->m_ctx); + break; + case NFS_GSS_3DES: HMAC_SHA1_DES3KD_Final(digest, &ctx->h_ctx); + break; + } +} + +static void +gss_des_crypt(gss_key_info *ki, des_cblock *in, des_cblock *out, + int32_t len, des_cblock *iv, des_cblock *retiv, int encrypt, int usage) +{ + switch (ki->type) { + case NFS_GSS_1DES: + { + des_key_schedule *sched = ((usage == KG_USAGE_SEAL) ? + &ki->ks_u.des.gss_sched_Ke : + &ki->ks_u.des.gss_sched); + des_cbc_encrypt(in, out, len, *sched, iv, retiv, encrypt); + } + break; + case NFS_GSS_3DES: + + des3_cbc_encrypt(in, out, len, ki->ks_u.des3.gss_sched, iv, retiv, encrypt); + break; + } +} + +static int +gss_key_init(gss_key_info *ki, uint32_t skeylen) +{ + size_t i; + int rc; + des_cblock k[3]; + + ki->keybytes = skeylen; + switch (skeylen) { + case sizeof(des_cblock): + ki->type = NFS_GSS_1DES; + ki->hash_len = MD5_DESCBC_DIGEST_LENGTH; + ki->ks_u.des.key = (des_cblock *)ki->skey; + rc = des_key_sched(ki->ks_u.des.key, ki->ks_u.des.gss_sched); + if (rc) + return (rc); + for (i = 0; i < ki->keybytes; i++) + k[0][i] = 0xf0 ^ (*ki->ks_u.des.key)[i]; + rc = des_key_sched(&k[0], ki->ks_u.des.gss_sched_Ke); + break; + case 3*sizeof(des_cblock): + ki->type = NFS_GSS_3DES; + ki->hash_len = SHA_DIGEST_LENGTH; + ki->ks_u.des3.key = (des_cblock (*)[3])ki->skey; + des3_derive_key(*ki->ks_u.des3.key, ki->ks_u.des3.ckey, + KEY_USAGE_DES3_SIGN, KEY_USAGE_LEN); + rc = des3_key_sched(*ki->ks_u.des3.key, ki->ks_u.des3.gss_sched); + if (rc) + return (rc); + break; + default: + printf("gss_key_init: Invalid key length %d\n", skeylen); + rc = EINVAL; + break; + } + + return (rc); +} + +#if 0 +#define DISPLAYLEN 16 +#define MAXDISPLAYLEN 256 + +static void +hexdump(const char *msg, void *data, size_t len) +{ + size_t i, j; + u_char *d = data; + char *p, disbuf[3*DISPLAYLEN+1]; + + printf("NFS DEBUG %s len=%d:\n", msg, (uint32_t)len); + if (len > MAXDISPLAYLEN) + len = MAXDISPLAYLEN; + + for (i = 0; i < len; i += DISPLAYLEN) { + for (p = disbuf, j = 0; (j + i) < len && j < DISPLAYLEN; j++, p += 3) + snprintf(p, 4, "%02x ", d[i + j]); + printf("\t%s\n", disbuf); } - tin0=tin1=tout0=tout1=xor0=xor1=0; - tin[0]=tin[1]=0; } +#endif diff --git a/bsd/nfs/nfs_gss.h b/bsd/nfs/nfs_gss.h index a3536bd29..aa6d55e96 100644 --- a/bsd/nfs/nfs_gss.h +++ b/bsd/nfs/nfs_gss.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2007 Apple Inc. All rights reserved. + * Copyright (c) 2007-2008 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -30,9 +30,8 @@ #define _NFS_NFS_GSS_H_ #include -#include -#include #include +#include #define RPCSEC_GSS 6 #define RPCSEC_GSS_VERS_1 1 @@ -63,7 +62,33 @@ enum rpcsec_gss_service { #define GSS_CLNT_SEQLISTMAX 32 // Max length of req seq num list #define GSS_CLNT_SYS_VALID 300 // Valid time (sec) for failover ctx + #define SKEYLEN 8 // length of DES key +#define SKEYLEN3 24 // length of DES3 keyboard +#define MAX_SKEYLEN SKEYLEN3 + +typedef struct { + uint32_t type; // See defines below + uint32_t keybytes; // Session key length bytes; + uint32_t hash_len; + u_char skey[MAX_SKEYLEN]; // Session key; + union { + struct { + des_cblock *key; + des_key_schedule gss_sched; + des_key_schedule gss_sched_Ke; + } des; + struct { + des_cblock (*key)[3]; + des_cblock ckey[3]; + des_key_schedule gss_sched[3]; + } des3; + } ks_u; +} gss_key_info; + +#define NFS_GSS_0DES 0 // Not DES or uninitialized +#define NFS_GSS_1DES 1 // Single DES with DES_MAC_MD5 +#define NFS_GSS_3DES 2 // Triple EDE DES KD with SHA1 /* * The client's RPCSEC_GSS context information @@ -85,14 +110,13 @@ struct nfs_gss_clnt_ctx { uint32_t *gss_clnt_seqbits; // Bitmap to track seq numbers in use mach_port_t gss_clnt_mport; // Mach port for gssd upcall u_char *gss_clnt_verf; // RPC verifier from server - uint64_t gss_clnt_gssd_verf; // Verifier from gssd char *gss_clnt_svcname; // Service name e.g. "nfs/big.apple.com" - uint32_t gss_clnt_cred_handle; // Opaque cred handle from gssd - uint32_t gss_clnt_context; // Opaque context handle from gssd + gss_cred gss_clnt_cred_handle; // Opaque cred handle from gssd + gss_ctx gss_clnt_context; // Opaque context handle from gssd u_char *gss_clnt_token; // GSS token exchanged via gssd & server uint32_t gss_clnt_tokenlen; // Length of token - u_char gss_clnt_skey[SKEYLEN]; // Context session key (DES) - des_key_schedule gss_clnt_sched; // Schedule derived from key + gss_key_info gss_clnt_kinfo; // GSS key info + uint32_t gss_clnt_gssd_flags; // Special flag bits to gssd uint32_t gss_clnt_major; // GSS major result from gssd or server uint32_t gss_clnt_minor; // GSS minor result from gssd or server }; @@ -116,17 +140,15 @@ struct nfs_gss_svc_ctx { uid_t gss_svc_uid; // UID of this user gid_t gss_svc_gids[NGROUPS]; // GIDs of this user uint32_t gss_svc_ngroups; // Count of gids - uint64_t gss_svc_expiretime; // Delete ctx if we exceed this + uint64_t gss_svc_incarnation; // Delete ctx if we exceed this + ttl value uint32_t gss_svc_seqmax; // Current max GSS sequence number uint32_t gss_svc_seqwin; // GSS sequence number window uint32_t *gss_svc_seqbits; // Bitmap to track seq numbers - uint64_t gss_svc_gssd_verf; // Verifier from gssd - uint32_t gss_svc_cred_handle; // Opaque cred handle from gssd - uint32_t gss_svc_context; // Opaque context handle from gssd + gss_cred gss_svc_cred_handle; // Opaque cred handle from gssd + gss_ctx gss_svc_context; // Opaque context handle from gssd u_char *gss_svc_token; // GSS token exchanged via gssd & client uint32_t gss_svc_tokenlen; // Length of token - u_char gss_svc_skey[SKEYLEN]; // Context session key (DES) - des_key_schedule gss_svc_sched; // Schedule derived from key + gss_key_info gss_svc_kinfo; // Session key info uint32_t gss_svc_major; // GSS major result from gssd uint32_t gss_svc_minor; // GSS minor result from gssd }; @@ -147,6 +169,7 @@ LIST_HEAD(nfs_gss_svc_ctx_hashhead, nfs_gss_svc_ctx); */ #define GSS_CTX_PEND 5 // seconds #define GSS_CTX_EXPIRE (8 * 3600) // seconds +#define GSS_CTX_TTL_MIN 1 // seconds #define GSS_TIMER_PERIOD 300 // seconds #define MSECS_PER_SEC 1000 diff --git a/bsd/nfs/nfs_gss_crypto.c b/bsd/nfs/nfs_gss_crypto.c new file mode 100644 index 000000000..1d275ba8f --- /dev/null +++ b/bsd/nfs/nfs_gss_crypto.c @@ -0,0 +1,534 @@ +/* + * Copyright (c) 2008 Apple Inc. All rights reserved. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. The rights granted to you under the License + * may not be used to create, or enable the creation or redistribution of, + * unlawful or unlicensed copies of an Apple operating system, or to + * circumvent, violate, or enable the circumvention or violation of, any + * terms of an Apple operating system software license agreement. + * + * Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ + */ + + +/* + * Copyright (C) 1998 by the FundsXpress, INC. + * + * All rights reserved. + * + * Export of this software from the United States of America may require + * a specific license from the United States Government. It is the + * responsibility of any person or organization contemplating export to + * obtain such a license before exporting. + * + * WITHIN THAT CONSTRAINT, permission to use, copy, modify, and + * distribute this software and its documentation for any purpose and + * without fee is hereby granted, provided that the above copyright + * notice appear in all copies and that both that copyright notice and + * this permission notice appear in supporting documentation, and that + * the name of FundsXpress. not be used in advertising or publicity pertaining + * to distribution of the software without specific, written prior + * permission. FundsXpress makes no representations about the suitability of + * this software for any purpose. It is provided "as is" without express + * or implied warranty. + * + * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED + * WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE. + */ + +#include "nfs_gss_crypto.h" + + +/* +n-fold(k-bits): + l = lcm(n,k) + r = l/k + s = k-bits | k-bits rot 13 | k-bits rot 13*2 | ... | k-bits rot 13*(r-1) + compute the 1's complement sum: + n-fold = s[0..n-1]+s[n..2n-1]+s[2n..3n-1]+..+s[(k-1)*n..k*n-1] +*/ + +/* representation: msb first, assume n and k are multiples of 8, and + that k>=16. this is the case of all the cryptosystems which are + likely to be used. this function can be replaced if that + assumption ever fails. */ + +/* input length is in bits */ + +void +krb5_nfold(unsigned int inbits, const unsigned char *in, unsigned int outbits, + unsigned char *out) +{ + int a,b,c,lcm; + int byte, i, msbit; + + /* the code below is more readable if I make these bytes + instead of bits */ + + inbits >>= 3; + outbits >>= 3; + + /* first compute lcm(n,k) */ + + a = outbits; + b = inbits; + + while(b != 0) { + c = b; + b = a%b; + a = c; + } + + lcm = outbits*inbits/a; + + /* now do the real work */ + + memset(out, 0, outbits); + byte = 0; + + /* this will end up cycling through k lcm(k,n)/k times, which + is correct */ + for (i=lcm-1; i>=0; i--) { + /* compute the msbit in k which gets added into this byte */ + msbit = (/* first, start with the msbit in the first, unrotated + byte */ + ((inbits<<3)-1) + /* then, for each byte, shift to the right for each + repetition */ + +(((inbits<<3)+13)*(i/inbits)) + /* last, pick out the correct byte within that + shifted repetition */ + +((inbits-(i%inbits))<<3) + )%(inbits<<3); + + /* pull out the byte value itself */ + byte += (((in[((inbits-1)-(msbit>>3))%inbits]<<8)| + (in[((inbits)-(msbit>>3))%inbits])) + >>((msbit&7)+1))&0xff; + + /* do the addition */ + byte += out[i%outbits]; + out[i%outbits] = byte&0xff; + +#if 0 + printf("msbit[%d] = %d\tbyte = %02x\tsum = %03x\n", i, msbit, + (((in[((inbits-1)-(msbit>>3))%inbits]<<8)| + (in[((inbits)-(msbit>>3))%inbits])) + >>((msbit&7)+1))&0xff, byte); +#endif + + /* keep around the carry bit, if any */ + byte >>= 8; + +#if 0 + printf("carry=%d\n", byte); +#endif + } + + /* if there's a carry bit left over, add it back in */ + if (byte) { + for (i=outbits-1; i>=0; i--) { + /* do the addition */ + byte += out[i]; + out[i] = byte&0xff; + + /* keep around the carry bit, if any */ + byte >>= 8; + } + } +} + +/* + * Given 21 bytes of random bits, make a triple DES key. + */ + +void +des3_make_key(const unsigned char randombits[21], des_cblock key[3]) +{ + int i; + + for (i = 0; i < 3; i++) { + memcpy(&key[i], &randombits[i*7], 7); + key[i][7] = (((key[i][0] & 1) << 1) | + ((key[i][1] & 1) << 2) | + ((key[i][2] & 1) << 3) | + ((key[i][3] & 1) << 4) | + ((key[i][4] & 1) << 5) | + ((key[i][5] & 1) << 6) | + ((key[i][6] & 1) << 7)); + des_fixup_key_parity(&key[i]); + } +} + +/* + * Make a triple des key schedule, from a triple des key. + */ + +int +des3_key_sched(des_cblock key[3], des_key_schedule sched[3]) +{ + int i; + int rc = 0; + + for (i = 0; i < 3; i++) + rc |= des_key_sched(&key[i], sched[i]); + + return (rc); +} + +/* + * Triple DES cipher block chaining mode encryption. + */ + +void +des3_cbc_encrypt(des_cblock *input, des_cblock *output, int32_t length, + des_key_schedule schedule[3], des_cblock *ivec, des_cblock *retvec, int encrypt) +{ + register DES_LONG tin0,tin1; + register DES_LONG tout0,tout1,xor0,xor1; + register unsigned char *in,*out,*retval; + register int32_t l=length; + DES_LONG tin[2]; + unsigned char *iv; + tin0 = tin1 = 0; + + in=(unsigned char *)input; + out=(unsigned char *)output; + retval=(unsigned char *)retvec; + iv=(unsigned char *)ivec; + + if (encrypt) { + c2l(iv,tout0); + c2l(iv,tout1); + for (l-=8; l>=0; l-=8) { + c2l(in,tin0); + c2l(in,tin1); + tin0^=tout0; tin[0]=tin0; + tin1^=tout1; tin[1]=tin1; + des_encrypt3((DES_LONG *)tin,schedule[0], schedule[1], schedule[2]); + tout0=tin[0]; l2c(tout0,out); + tout1=tin[1]; l2c(tout1,out); + } + if (l != -8) { + c2ln(in,tin0,tin1,l+8); + tin0^=tout0; tin[0]=tin0; + tin1^=tout1; tin[1]=tin1; + des_encrypt3((DES_LONG *)tin,schedule[0], schedule[1], schedule[2]); + tout0=tin[0]; l2c(tout0,out); + tout1=tin[1]; l2c(tout1,out); + } + if (retval) { + l2c(tout0,retval); + l2c(tout1,retval); + } + } else { + c2l(iv,xor0); + c2l(iv,xor1); + for (l-=8; l>=0; l-=8) { + c2l(in,tin0); tin[0]=tin0; + c2l(in,tin1); tin[1]=tin1; + des_decrypt3((DES_LONG *)tin,schedule[0],schedule[1],schedule[2]); + tout0=tin[0]^xor0; + tout1=tin[1]^xor1; + l2c(tout0,out); + l2c(tout1,out); + xor0=tin0; + xor1=tin1; + } + if (l != -8) { + c2l(in,tin0); tin[0]=tin0; + c2l(in,tin1); tin[1]=tin1; + des_decrypt3((DES_LONG *)tin,schedule[0],schedule[1],schedule[2]); + tout0=tin[0]^xor0; + tout1=tin[1]^xor1; + l2cn(tout0,tout1,out,l+8); + /* xor0=tin0; + xor1=tin1; */ + } + if (retval) { + l2c(tin0,retval); + l2c(tin1,retval); + } + } + tin0=tin1=tout0=tout1=xor0=xor1=0; + tin[0]=tin[1]=0; +} + +/* + * Key derivation for triple DES. + * Given the session key in in key, produce a new key in out key using + * the supplied constant. + */ + +int +des3_derive_key(des_cblock inkey[3], des_cblock outkey[3], + const unsigned char *constant, int clen) +{ + des_cblock inblock, outblock, ivec; + des_key_schedule sched[3]; + unsigned char rawkey[21]; + size_t n, keybytes = sizeof(rawkey); + + /* initialize the input block */ + + if (clen == sizeof(des_cblock)) { + memcpy(inblock, constant, clen); + } else { + krb5_nfold(clen*8, constant, sizeof(des_cblock)*8, inblock); + } + + /* loop encrypting the blocks until enough key bytes are generated */ + + bzero(ivec, sizeof(ivec)); + des3_key_sched(inkey, sched); + for (n = 0; n < sizeof(rawkey); n += sizeof(des_cblock)) { + des3_cbc_encrypt(&inblock, &outblock, sizeof(outblock), sched, &ivec, NULL, 1); + if ((keybytes - n) <= sizeof (des_cblock)) { + memcpy(rawkey+n, outblock, (keybytes - n)); + break; + } + memcpy(rawkey+n, outblock, sizeof(des_cblock)); + memcpy(inblock, outblock, sizeof(des_cblock)); + } + + /* postprocess the key */ + des3_make_key(rawkey, outkey); + + /* clean memory, free resources and exit */ + + bzero(inblock, sizeof (des_cblock)); + bzero(outblock, sizeof (des_cblock)); + bzero(rawkey, keybytes); + bzero(sched, sizeof (sched)); + + return(0); +} + +/* + * Initialize a context for HMAC SHA1 + * if drived is true we derive a new key + * based on KG_USAGE_SIGN + */ + +void +HMAC_SHA1_DES3KD_Init(HMAC_SHA1_DES3KD_CTX *ctx, des_cblock key[3], int derive) +{ + unsigned char ipad[64]; + size_t i, j; + + SHA1Init(&ctx->sha1_ctx); + if (derive) + des3_derive_key(key, ctx->dk, KEY_USAGE_DES3_SIGN, KEY_USAGE_LEN); + else + memcpy(ctx->dk, key, 3*sizeof(des_cblock)); + memset(ipad, 0x36, sizeof(ipad)); + for (i = 0; i < 3; i++) + for (j = 0; j < sizeof(des_cblock); j++) + ipad[j + i * sizeof(des_cblock)] ^= ctx->dk[i][j]; + SHA1Update(&ctx->sha1_ctx, ipad, sizeof(ipad)); +} + +/* + * Update the HMAC SHA1 context with the supplied data. + */ +void +HMAC_SHA1_DES3KD_Update(HMAC_SHA1_DES3KD_CTX *ctx, void *data, size_t len) +{ + SHA1Update(&ctx->sha1_ctx, data, len); +} + +/* + * Finish the context and produce the HMAC SHA1 digest. + */ + +void +HMAC_SHA1_DES3KD_Final(void *digest, HMAC_SHA1_DES3KD_CTX *ctx) +{ + unsigned char opad[64]; + size_t i, j; + + SHA1Final(digest, &ctx->sha1_ctx); + memset(opad, 0x5c, sizeof(opad)); + for (i = 0; i < 3; i++) + for (j = 0; j < sizeof(des_cblock); j++) + opad[j + i * sizeof(des_cblock)] ^= ctx->dk[i][j]; + SHA1Init(&ctx->sha1_ctx); + SHA1Update(&ctx->sha1_ctx, opad, sizeof(opad)); + SHA1Update(&ctx->sha1_ctx, digest, SHA1_RESULTLEN); + SHA1Final(digest, &ctx->sha1_ctx); +} + +/* + * XXX This function borrowed from OpenBSD. + * It will likely be moved into kernel crypto. + */ +DES_LONG +des_cbc_cksum(des_cblock *input, des_cblock *output, + int32_t length, des_key_schedule schedule, des_cblock *ivec) +{ + register DES_LONG tout0,tout1,tin0,tin1; + register int32_t l=length; + DES_LONG tin[2]; + unsigned char *in,*out,*iv; + + in=(unsigned char *)input; + out=(unsigned char *)output; + iv=(unsigned char *)ivec; + + c2l(iv,tout0); + c2l(iv,tout1); + for (; l>0; l-=8) { + if (l >= 8) { + c2l(in,tin0); + c2l(in,tin1); + } else + c2ln(in,tin0,tin1,l); + + tin0^=tout0; tin[0]=tin0; + tin1^=tout1; tin[1]=tin1; + des_encrypt1((DES_LONG *)tin,schedule,DES_ENCRYPT); + /* fix 15/10/91 eay - thanks to keithr@sco.COM */ + tout0=tin[0]; + tout1=tin[1]; + } + if (out != NULL) { + l2c(tout0,out); + l2c(tout1,out); + } + tout0=tin0=tin1=tin[0]=tin[1]=0; + return(tout1); +} + +/* + * XXX This function borrowed from OpenBSD. + * It will likely be moved into kernel crypto. + */ +void +des_cbc_encrypt(des_cblock *input, des_cblock *output, int32_t length, + des_key_schedule schedule, des_cblock *ivec, des_cblock *retvec, int encrypt) +{ + register DES_LONG tin0,tin1; + register DES_LONG tout0,tout1,xor0,xor1; + register unsigned char *in,*out,*retval; + register int32_t l=length; + DES_LONG tin[2]; + unsigned char *iv; + tin0 = tin1 = 0; + + in=(unsigned char *)input; + out=(unsigned char *)output; + retval=(unsigned char *)retvec; + iv=(unsigned char *)ivec; + + if (encrypt) { + c2l(iv,tout0); + c2l(iv,tout1); + for (l-=8; l>=0; l-=8) { + c2l(in,tin0); + c2l(in,tin1); + tin0^=tout0; tin[0]=tin0; + tin1^=tout1; tin[1]=tin1; + des_encrypt1((DES_LONG *)tin,schedule,DES_ENCRYPT); + tout0=tin[0]; l2c(tout0,out); + tout1=tin[1]; l2c(tout1,out); + } + if (l != -8) { + c2ln(in,tin0,tin1,l+8); + tin0^=tout0; tin[0]=tin0; + tin1^=tout1; tin[1]=tin1; + des_encrypt1((DES_LONG *)tin,schedule,DES_ENCRYPT); + tout0=tin[0]; l2c(tout0,out); + tout1=tin[1]; l2c(tout1,out); + } + if (retval) { + l2c(tout0,retval); + l2c(tout1,retval); + } + } else { + c2l(iv,xor0); + c2l(iv,xor1); + for (l-=8; l>=0; l-=8) { + c2l(in,tin0); tin[0]=tin0; + c2l(in,tin1); tin[1]=tin1; + des_encrypt1((DES_LONG *)tin,schedule,DES_DECRYPT); + tout0=tin[0]^xor0; + tout1=tin[1]^xor1; + l2c(tout0,out); + l2c(tout1,out); + xor0=tin0; + xor1=tin1; + } + if (l != -8) { + c2l(in,tin0); tin[0]=tin0; + c2l(in,tin1); tin[1]=tin1; + des_encrypt1((DES_LONG *)tin,schedule,DES_DECRYPT); + tout0=tin[0]^xor0; + tout1=tin[1]^xor1; + l2cn(tout0,tout1,out,l+8); + /* xor0=tin0; + xor1=tin1; */ + } + if (retval) { + l2c(tin0,retval); + l2c(tin1,retval); + } + } + tin0=tin1=tout0=tout1=xor0=xor1=0; + tin[0]=tin[1]=0; +} + +/* + * Initialize an MD5 DES CBC context with a schedule. + */ + +void MD5_DESCBC_Init(MD5_DESCBC_CTX *ctx, des_key_schedule *sched) +{ + MD5Init(&ctx->md5_ctx); + ctx->sched = sched; +} + +/* + * Update MD5 DES CBC context with the supplied data. + */ + +void MD5_DESCBC_Update(MD5_DESCBC_CTX *ctx, void *data, size_t len) +{ + MD5Update(&ctx->md5_ctx, data, len); +} + +/* + * Finalize the context and extract the digest. + */ + +void MD5_DESCBC_Final(void *digest, MD5_DESCBC_CTX *ctx) +{ + des_cblock iv0; + unsigned char md5_digest[MD5_DIGEST_LENGTH]; + + MD5Final(md5_digest, &ctx->md5_ctx); + + /* + * Now get the DES CBC checksum for the digest. + */ + bzero(iv0, sizeof (iv0)); + (void) des_cbc_cksum((des_cblock *) md5_digest, (des_cblock *)digest, + sizeof (md5_digest), *ctx->sched, &iv0); +} + diff --git a/bsd/nfs/nfs_gss_crypto.h b/bsd/nfs/nfs_gss_crypto.h new file mode 100644 index 000000000..677647f16 --- /dev/null +++ b/bsd/nfs/nfs_gss_crypto.h @@ -0,0 +1,79 @@ +/* + * Copyright (c) 2008 Apple Inc. All rights reserved. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. The rights granted to you under the License + * may not be used to create, or enable the creation or redistribution of, + * unlawful or unlicensed copies of an Apple operating system, or to + * circumvent, violate, or enable the circumvention or violation of, any + * terms of an Apple operating system software license agreement. + * + * Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ + */ + + +#ifndef _NFS_NFS_GSS_CRYPTO_H_ +#define _NFS_NFS_GSS_CRYPTO_H_ +#include +#include +#include +#include + +#define KG_USAGE_SEAL 22 +#define KG_USAGE_SIGN 23 +#define KG_USAGE_SEQ 24 + +#define KEY_USAGE_DES3_SEAL (const unsigned char *)"\x00\x00\x00\x16\xaa" +#define KEY_USAGE_DES3_SIGN (const unsigned char *)"\x00\x00\x00\x17\x99" +#define KEY_USAGE_DES3_SEQ (const unsigned char *)"\x00\x00\x00\x18\x55" +#define KEY_USAGE_LEN 5 + +typedef struct { + SHA1_CTX sha1_ctx; + des_cblock dk[3]; +} HMAC_SHA1_DES3KD_CTX; + +typedef struct { + MD5_CTX md5_ctx; + des_key_schedule *sched; +} MD5_DESCBC_CTX; + +#define MD5_DESCBC_DIGEST_LENGTH 8 + +__BEGIN_DECLS + +void krb5_nfold(unsigned int, const unsigned char *, unsigned int, unsigned char *); +void des3_make_key(const unsigned char[21], des_cblock[3]); +int des3_key_sched(des_cblock[3], des_key_schedule[3]); +void des3_cbc_encrypt(des_cblock *, des_cblock *, int32_t, + des_key_schedule[3], des_cblock *, des_cblock *, int); +int des3_derive_key(des_cblock[3], des_cblock[3], const unsigned char *, int); +void HMAC_SHA1_DES3KD_Init(HMAC_SHA1_DES3KD_CTX *, des_cblock[3], int); +void HMAC_SHA1_DES3KD_Update(HMAC_SHA1_DES3KD_CTX *, void *, size_t); +void HMAC_SHA1_DES3KD_Final(void *, HMAC_SHA1_DES3KD_CTX *); +DES_LONG des_cbc_cksum(des_cblock *, des_cblock *, int32_t, des_key_schedule, des_cblock *); +void des_cbc_encrypt(des_cblock *, des_cblock *, int32_t, des_key_schedule, + des_cblock *, des_cblock *, int); + +void MD5_DESCBC_Init(MD5_DESCBC_CTX *, des_key_schedule *); +void MD5_DESCBC_Update(MD5_DESCBC_CTX *, void *, size_t); +void MD5_DESCBC_Final(void *, MD5_DESCBC_CTX *); + +__END_DECLS + +#endif /* _NFS_NFS_GSS_CRYPTO_H_ */ diff --git a/bsd/nfs/nfs_lock.c b/bsd/nfs/nfs_lock.c index 68125dd26..590a70619 100644 --- a/bsd/nfs/nfs_lock.c +++ b/bsd/nfs/nfs_lock.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2002-2007 Apple Inc. All rights reserved. + * Copyright (c) 2002-2008 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -124,11 +124,20 @@ struct nfs_lock_pid { (&nfs_lock_pid_hash_tbl[(pid) & nfs_lock_pid_hash]) static LIST_HEAD(, nfs_lock_pid) *nfs_lock_pid_hash_tbl; static TAILQ_HEAD(, nfs_lock_pid) nfs_lock_pid_lru; -static u_long nfs_lock_pid_hash, nfs_lock_pid_hash_trusted; +static u_long nfs_lock_pid_hash; +static uint32_t nfs_lock_pid_hash_trusted; static lck_grp_t *nfs_lock_lck_grp; static lck_mtx_t *nfs_lock_mutex; +void nfs_lockdmsg_enqueue(LOCKD_MSG_REQUEST *); +void nfs_lockdmsg_dequeue(LOCKD_MSG_REQUEST *); +int nfs_lockdmsg_compare_to_answer(LOCKD_MSG_REQUEST *, struct lockd_ans *); +LOCKD_MSG_REQUEST *nfs_lockdmsg_find_by_answer(struct lockd_ans *); +LOCKD_MSG_REQUEST *nfs_lockdmsg_find_by_xid(uint64_t); +uint64_t nfs_lockxid_get(void); +int nfs_lock_pid_check(proc_t, int); +int nfs_lockd_send_request(LOCKD_MSG *, int); /* * initialize global nfs lock state @@ -195,7 +204,7 @@ nfs_lockd_mount_change(int i) * insert a lock request message into the pending queue * (nfs_lock_mutex must be held) */ -static inline void +inline void nfs_lockdmsg_enqueue(LOCKD_MSG_REQUEST *msgreq) { LOCKD_MSG_REQUEST *mr; @@ -221,7 +230,7 @@ nfs_lockdmsg_enqueue(LOCKD_MSG_REQUEST *msgreq) * remove a lock request message from the pending queue * (nfs_lock_mutex must be held) */ -static inline void +inline void nfs_lockdmsg_dequeue(LOCKD_MSG_REQUEST *msgreq) { TAILQ_REMOVE(&nfs_pendlockq, msgreq, lmr_next); @@ -239,7 +248,7 @@ nfs_lockdmsg_dequeue(LOCKD_MSG_REQUEST *msgreq) * * (nfs_lock_mutex must be held) */ -static inline LOCKD_MSG_REQUEST * +inline LOCKD_MSG_REQUEST * nfs_lockdmsg_find_by_xid(uint64_t lockxid) { LOCKD_MSG_REQUEST *mr; @@ -265,7 +274,7 @@ nfs_lockdmsg_find_by_xid(uint64_t lockxid) * * returns 0 on equality and 1 if different */ -static inline int +inline int nfs_lockdmsg_compare_to_answer(LOCKD_MSG_REQUEST *msgreq, struct lockd_ans *ansp) { if (!(ansp->la_flags & LOCKD_ANS_LOCK_INFO)) @@ -298,7 +307,7 @@ nfs_lockdmsg_compare_to_answer(LOCKD_MSG_REQUEST *msgreq, struct lockd_ans *ansp * * (nfs_lock_mutex must be held) */ -static inline LOCKD_MSG_REQUEST * +inline LOCKD_MSG_REQUEST * nfs_lockdmsg_find_by_answer(struct lockd_ans *ansp) { LOCKD_MSG_REQUEST *mr; @@ -316,7 +325,7 @@ nfs_lockdmsg_find_by_answer(struct lockd_ans *ansp) * return the next unique lock request transaction ID * (nfs_lock_mutex must be held) */ -static inline uint64_t +inline uint64_t nfs_lockxid_get(void) { LOCKD_MSG_REQUEST *mr; @@ -358,7 +367,7 @@ nfs_lockxid_get(void) * (Also, if adding, try to clean up some stale entries.) * (nfs_lock_mutex must be held) */ -static int +int nfs_lock_pid_check(proc_t p, int addflag) { struct nfs_lock_pid *lp, *lplru, *lplru_next, *mlp; @@ -489,8 +498,8 @@ nfs_lock_pid_check(proc_t p, int addflag) #define MACH_MAX_TRIES 3 -static int -send_request(LOCKD_MSG *msg, int interruptable) +int +nfs_lockd_send_request(LOCKD_MSG *msg, int interruptable) { kern_return_t kr; int retries = 0; @@ -560,7 +569,7 @@ nfs3_vnop_advlock( vnode_t vp; nfsnode_t np; int error, error2; - int interruptable; + int interruptable, modified; struct flock *fl; struct nfsmount *nmp; struct nfs_vattr nvattr; @@ -642,28 +651,16 @@ nfs3_vnop_advlock( case SEEK_END: /* need to flush, and refetch attributes to make */ /* sure we have the correct end of file offset */ - error = nfs_lock(np, NFS_NODE_LOCK_EXCLUSIVE); - if (error) + if ((error = nfs_node_lock(np))) return (error); - NATTRINVALIDATE(np); - if (np->n_flag & NMODIFIED) { - nfs_unlock(np); - error = nfs_vinvalbuf(vp, V_SAVE, ctx, 1); - if (error) - return (error); - } else - nfs_unlock(np); - - error = nfs_getattr(np, &nvattr, ctx, 0); - nfs_data_lock(np, NFS_NODE_LOCK_SHARED); - if (!error) - error = nfs_lock(np, NFS_NODE_LOCK_SHARED); - if (error) { - nfs_data_unlock(np); + modified = (np->n_flag & NMODIFIED); + nfs_node_unlock(np); + if (modified && ((error = nfs_vinvalbuf(vp, V_SAVE, ctx, 1)))) return (error); - } + if ((error = nfs_getattr(np, &nvattr, ctx, NGA_UNCACHED))) + return (error); + nfs_data_lock(np, NFS_DATA_LOCK_SHARED); start = np->n_size + fl->l_start; - nfs_unlock(np); nfs_data_unlock(np); break; default: @@ -738,9 +735,9 @@ nfs3_vnop_advlock( for (;;) { nfs_lockd_request_sent = 1; - /* need to drop nfs_lock_mutex while calling send_request() */ + /* need to drop nfs_lock_mutex while calling nfs_lockd_send_request() */ lck_mtx_unlock(nfs_lock_mutex); - error = send_request(msg, interruptable); + error = nfs_lockd_send_request(msg, interruptable); lck_mtx_lock(nfs_lock_mutex); if (error && error != EAGAIN) break; @@ -863,6 +860,7 @@ nfs3_vnop_advlock( /* warn if we're not getting any response */ microuptime(&now); if ((msgreq.lmr_errno != EINPROGRESS) && + !(msg->lm_flags & LOCKD_MSG_DENIED_GRACE) && (nmp->nm_tprintf_initial_delay != 0) && ((lastmsg + nmp->nm_tprintf_delay) < now.tv_sec)) { lck_mtx_unlock(&nmp->nm_lock); @@ -895,6 +893,23 @@ nfs3_vnop_advlock( /* send cancel then resend request */ continue; } + + if (msg->lm_flags & LOCKD_MSG_DENIED_GRACE) { + /* + * Time to resend a request previously denied due to a grace period. + */ + msg->lm_flags &= ~LOCKD_MSG_DENIED_GRACE; + nfs_lockdmsg_dequeue(&msgreq); + msg->lm_xid = nfs_lockxid_get(); + nfs_lockdmsg_enqueue(&msgreq); + msgreq.lmr_saved_errno = 0; + msgreq.lmr_errno = 0; + msgreq.lmr_answered = 0; + timeo = 2; + /* resend request */ + continue; + } + /* * We timed out, so we will resend the request. */ @@ -910,6 +925,17 @@ nfs3_vnop_advlock( wentdown ? "lockd alive again" : NULL); wentdown = 0; + if (msgreq.lmr_answered && (msg->lm_flags & LOCKD_MSG_DENIED_GRACE)) { + /* + * The lock request was denied because the server lockd is + * still in its grace period. So, we need to try the + * request again in a little bit. + */ + timeo = 4; + msgreq.lmr_answered = 0; + goto wait_for_granted; + } + if (msgreq.lmr_errno == EINPROGRESS) { /* got NLM_BLOCKED response */ /* need to wait for NLM_GRANTED */ @@ -1069,6 +1095,8 @@ nfslockdans(proc_t p, struct lockd_ans *ansp) msgreq->lmr_msg.lm_fl.l_type = F_UNLCK; } } + if (ansp->la_flags & LOCKD_ANS_DENIED_GRACE) + msgreq->lmr_msg.lm_flags |= LOCKD_MSG_DENIED_GRACE; msgreq->lmr_answered = 1; lck_mtx_unlock(nfs_lock_mutex); diff --git a/bsd/nfs/nfs_lock.h b/bsd/nfs/nfs_lock.h index 0737615c1..7bd4e91a8 100644 --- a/bsd/nfs/nfs_lock.h +++ b/bsd/nfs/nfs_lock.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2002-2007 Apple Inc. All rights reserved. + * Copyright (c) 2002-2008 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -90,6 +90,7 @@ typedef struct nfs_lock_msg { #define LOCKD_MSG_TEST 0x0002 /* just a lock test */ #define LOCKD_MSG_NFSV3 0x0004 /* NFSv3 request */ #define LOCKD_MSG_CANCEL 0x0008 /* cancelling blocked request */ +#define LOCKD_MSG_DENIED_GRACE 0x0010 /* lock denied due to grace period */ /* The structure used to maintain the pending request queue */ typedef struct nfs_lock_msg_request { @@ -124,6 +125,7 @@ struct lockd_ans { #define LOCKD_ANS_GRANTED 0x0001 /* NLM_GRANTED request */ #define LOCKD_ANS_LOCK_INFO 0x0002 /* lock info valid */ #define LOCKD_ANS_LOCK_EXCL 0x0004 /* lock is exclusive */ +#define LOCKD_ANS_DENIED_GRACE 0x0008 /* lock denied due to grace period */ #ifdef KERNEL diff --git a/bsd/nfs/nfs_node.c b/bsd/nfs/nfs_node.c index e42d5022d..7d1926787 100644 --- a/bsd/nfs/nfs_node.c +++ b/bsd/nfs/nfs_node.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2007 Apple Inc. All rights reserved. + * Copyright (c) 2000-2009 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -67,6 +67,7 @@ #include +#include #include #include #include @@ -89,6 +90,7 @@ static u_long nfsnodehash; static lck_grp_t *nfs_node_hash_lck_grp; static lck_grp_t *nfs_node_lck_grp; +static lck_grp_t *nfs_data_lck_grp; lck_mtx_t *nfs_node_hash_mutex; /* @@ -101,6 +103,7 @@ nfs_nhinit(void) nfs_node_hash_lck_grp = lck_grp_alloc_init("nfs_node_hash", LCK_GRP_ATTR_NULL); nfs_node_hash_mutex = lck_mtx_alloc_init(nfs_node_hash_lck_grp, LCK_ATTR_NULL); nfs_node_lck_grp = lck_grp_alloc_init("nfs_node", LCK_GRP_ATTR_NULL); + nfs_data_lck_grp = lck_grp_alloc_init("nfs_data", LCK_GRP_ATTR_NULL); } void @@ -174,7 +177,7 @@ nfs_nget( continue; FSDBG(263, dnp, np, np->n_flag, 0xcace0000); /* if the node is locked, sleep on it */ - if (np->n_hflag & NHLOCKED) { + if ((np->n_hflag & NHLOCKED) && !(flags & NG_NOCREATE)) { np->n_hflag |= NHLOCKWANT; FSDBG(263, dnp, np, np->n_flag, 0xcace2222); msleep(np, nfs_node_hash_mutex, PDROP | PINOD, "nfs_nget", NULL); @@ -192,17 +195,23 @@ nfs_nget( FSDBG_BOT(263, dnp, *npp, 0xcace0d1e, error); return (error); } - if ((error = nfs_lock(np, NFS_NODE_LOCK_EXCLUSIVE))) { + if ((error = nfs_node_lock(np))) { /* this only fails if the node is now unhashed */ /* so let's see if we can find/create it again */ FSDBG(263, dnp, *npp, 0xcaced1e2, error); vnode_put(vp); + if (flags & NG_NOCREATE) { + *npp = 0; + FSDBG_BOT(263, dnp, *npp, 0xcaced1e0, ENOENT); + return (ENOENT); + } goto loop; } /* update attributes */ - error = nfs_loadattrcache(np, nvap, xidp, 0); + if (nvap) + error = nfs_loadattrcache(np, nvap, xidp, 0); if (error) { - nfs_unlock(np); + nfs_node_unlock(np); vnode_put(vp); } else { if (dnp && cnp && (flags & NG_MAKEENTRY)) @@ -215,6 +224,13 @@ nfs_nget( FSDBG(263, mp, dnp, npp, 0xaaaaaaaa); + if (flags & NG_NOCREATE) { + lck_mtx_unlock(nfs_node_hash_mutex); + *npp = 0; + FSDBG_BOT(263, dnp, *npp, 0x80000001, ENOENT); + return (ENOENT); + } + /* * allocate and initialize nfsnode and stick it in the hash * before calling getnewvnode(). Anyone finding it in the @@ -230,6 +246,10 @@ nfs_nget( bzero(np, sizeof *np); np->n_hflag |= (NHINIT | NHLOCKED); np->n_mount = mp; + TAILQ_INIT(&np->n_opens); + TAILQ_INIT(&np->n_lock_owners); + TAILQ_INIT(&np->n_locks); + np->n_dlink.tqe_next = NFSNOLIST; if (dnp && cnp && ((cnp->cn_namelen != 2) || (cnp->cn_nameptr[0] != '.') || (cnp->cn_nameptr[1] != '.'))) { @@ -264,9 +284,10 @@ nfs_nget( FSDBG(266, 0, np, np->n_flag, np->n_hflag); /* lock the new nfsnode */ - lck_rw_init(&np->n_lock, nfs_node_lck_grp, LCK_ATTR_NULL); - lck_rw_init(&np->n_datalock, nfs_node_lck_grp, LCK_ATTR_NULL); - nfs_lock(np, NFS_NODE_LOCK_FORCE); + lck_mtx_init(&np->n_lock, nfs_node_lck_grp, LCK_ATTR_NULL); + lck_rw_init(&np->n_datalock, nfs_data_lck_grp, LCK_ATTR_NULL); + lck_mtx_init(&np->n_openlock, nfs_open_grp, LCK_ATTR_NULL); + lck_mtx_lock(&np->n_lock); /* release lock on hash table */ lck_mtx_unlock(nfs_node_hash_mutex); @@ -275,7 +296,7 @@ nfs_nget( error = nfs_loadattrcache(np, nvap, xidp, 1); if (error) { FSDBG(266, 0, np, np->n_flag, 0xb1eb1e); - nfs_unlock(np); + nfs_node_unlock(np); lck_mtx_lock(nfs_node_hash_mutex); LIST_REMOVE(np, n_hash); np->n_hflag &= ~(NHHASHED|NHINIT|NHLOCKED); @@ -291,8 +312,9 @@ nfs_nget( } np->n_parent = NULL; } - lck_rw_destroy(&np->n_lock, nfs_node_lck_grp); - lck_rw_destroy(&np->n_datalock, nfs_node_lck_grp); + lck_mtx_destroy(&np->n_lock, nfs_node_lck_grp); + lck_rw_destroy(&np->n_datalock, nfs_data_lck_grp); + lck_mtx_destroy(&np->n_openlock, nfs_open_grp); if (np->n_fhsize > NFS_SMALLFH) FREE_ZONE(np->n_fhp, np->n_fhsize, M_NFSBIGFH); FREE_ZONE(np, sizeof *np, M_NFSNODE); @@ -344,7 +366,7 @@ nfs_nget( error = vnode_create(VNCREATE_FLAVOR, VCREATESIZE, &vfsp, &np->n_vnode); if (error) { FSDBG(266, 0, np, np->n_flag, 0xb1eb1e); - nfs_unlock(np); + nfs_node_unlock(np); lck_mtx_lock(nfs_node_hash_mutex); LIST_REMOVE(np, n_hash); np->n_hflag &= ~(NHHASHED|NHINIT|NHLOCKED); @@ -360,8 +382,9 @@ nfs_nget( } np->n_parent = NULL; } - lck_rw_destroy(&np->n_lock, nfs_node_lck_grp); - lck_rw_destroy(&np->n_datalock, nfs_node_lck_grp); + lck_mtx_destroy(&np->n_lock, nfs_node_lck_grp); + lck_rw_destroy(&np->n_datalock, nfs_data_lck_grp); + lck_mtx_destroy(&np->n_openlock, nfs_open_grp); if (np->n_fhsize > NFS_SMALLFH) FREE_ZONE(np->n_fhp, np->n_fhsize, M_NFSBIGFH); FREE_ZONE(np, sizeof *np, M_NFSNODE); @@ -397,22 +420,143 @@ nfs_vnop_inactive(ap) vfs_context_t a_context; } */ *ap; { - vnode_t vp; - nfsnode_t np; + vnode_t vp = ap->a_vp; + vfs_context_t ctx = ap->a_context; + nfsnode_t np = VTONFS(ap->a_vp); struct nfs_sillyrename *nsp; struct nfs_vattr nvattr; - int unhash, attrerr; + int unhash, attrerr, busyerror, error, inuse, busied; + struct nfs_open_file *nofp; + const char *vname = NULL; + struct componentname cn; + struct nfsmount *nmp = NFSTONMP(np); + +restart: + error = 0; + inuse = ((nmp->nm_vers >= NFS_VER4) && (nfs_mount_state_in_use_start(nmp) == 0)); + + /* There shouldn't be any open or lock state at this point */ + lck_mtx_lock(&np->n_openlock); + if (np->n_openrefcnt) { + vname = vnode_getname(vp); + printf("nfs_vnop_inactive: still open: %d %s\n", np->n_openrefcnt, vname ? vname : "//"); + } + TAILQ_FOREACH(nofp, &np->n_opens, nof_link) { + lck_mtx_lock(&nofp->nof_lock); + if (nofp->nof_flags & NFS_OPEN_FILE_BUSY) { + if (!vname) + vname = vnode_getname(vp); + printf("nfs_vnop_inactive: open file busy: %s\n", vname ? vname : "//"); + busied = 0; + } else { + nofp->nof_flags |= NFS_OPEN_FILE_BUSY; + busied = 1; + } + lck_mtx_unlock(&nofp->nof_lock); + /* + * If we just created the file, we already had it open in + * anticipation of getting a subsequent open call. If the + * node has gone inactive without being open, we need to + * clean up (close) the open done in the create. + */ + if ((nofp->nof_flags & NFS_OPEN_FILE_CREATE) && nofp->nof_creator) { + if (nofp->nof_flags & NFS_OPEN_FILE_REOPEN) { + lck_mtx_unlock(&np->n_openlock); + if (busied) + nfs_open_file_clear_busy(nofp); + if (inuse) + nfs_mount_state_in_use_end(nmp, 0); + nfs4_reopen(nofp, vfs_context_thread(ctx)); + goto restart; + } + nofp->nof_flags &= ~NFS_OPEN_FILE_CREATE; + lck_mtx_unlock(&np->n_openlock); + error = nfs4_close(np, nofp, NFS_OPEN_SHARE_ACCESS_BOTH, NFS_OPEN_SHARE_DENY_NONE, ctx); + if (error) { + if (!vname) + vname = vnode_getname(vp); + printf("nfs_vnop_inactive: create close error: %d, %s\n", error, vname); + nofp->nof_flags |= NFS_OPEN_FILE_CREATE; + } + if (busied) + nfs_open_file_clear_busy(nofp); + if (inuse) + nfs_mount_state_in_use_end(nmp, error); + goto restart; + } + if (nofp->nof_flags & NFS_OPEN_FILE_NEEDCLOSE) { + /* + * If the file is marked as needing reopen, but this was the only + * open on the file, just drop the open. + */ + nofp->nof_flags &= ~NFS_OPEN_FILE_NEEDCLOSE; + if ((nofp->nof_flags & NFS_OPEN_FILE_REOPEN) && (nofp->nof_opencnt == 1)) { + nofp->nof_flags &= ~NFS_OPEN_FILE_REOPEN; + nofp->nof_r--; + nofp->nof_opencnt--; + nofp->nof_access = 0; + } else { + lck_mtx_unlock(&np->n_openlock); + if (nofp->nof_flags & NFS_OPEN_FILE_REOPEN) { + if (busied) + nfs_open_file_clear_busy(nofp); + if (inuse) + nfs_mount_state_in_use_end(nmp, 0); + nfs4_reopen(nofp, vfs_context_thread(ctx)); + goto restart; + } + error = nfs4_close(np, nofp, NFS_OPEN_SHARE_ACCESS_READ, NFS_OPEN_SHARE_DENY_NONE, ctx); + if (error) { + if (!vname) + vname = vnode_getname(vp); + printf("nfs_vnop_inactive: need close error: %d, %s\n", error, vname); + nofp->nof_flags |= NFS_OPEN_FILE_NEEDCLOSE; + } + if (busied) + nfs_open_file_clear_busy(nofp); + if (inuse) + nfs_mount_state_in_use_end(nmp, error); + goto restart; + } + } + if (nofp->nof_opencnt) { + if (!vname) + vname = vnode_getname(vp); + printf("nfs_vnop_inactive: file still open: %d %s\n", nofp->nof_opencnt, vname ? vname : "//"); + } + if (nofp->nof_access || nofp->nof_deny || + nofp->nof_mmap_access || nofp->nof_mmap_deny || + nofp->nof_r || nofp->nof_w || nofp->nof_rw || + nofp->nof_r_dw || nofp->nof_w_dw || nofp->nof_rw_dw || + nofp->nof_r_drw || nofp->nof_w_drw || nofp->nof_rw_drw) { + if (!vname) + vname = vnode_getname(vp); + printf("nfs_vnop_inactive: non-zero access: %d %d %d %d # %u %u %u dw %u %u %u drw %u %u %u %s\n", + nofp->nof_access, nofp->nof_deny, + nofp->nof_mmap_access, nofp->nof_mmap_deny, + nofp->nof_r, nofp->nof_w, nofp->nof_rw, + nofp->nof_r_dw, nofp->nof_w_dw, nofp->nof_rw_dw, + nofp->nof_r_drw, nofp->nof_w_drw, nofp->nof_rw_drw, + vname ? vname : "//"); + } + if (busied) + nfs_open_file_clear_busy(nofp); + } + lck_mtx_unlock(&np->n_openlock); + if (vname) + vnode_putname(vname); - vp = ap->a_vp; - np = VTONFS(ap->a_vp); + if (inuse && nfs_mount_state_in_use_end(nmp, error)) + goto restart; - nfs_lock(np, NFS_NODE_LOCK_FORCE); + nfs_node_lock_force(np); if (vnode_vtype(vp) != VDIR) { - nsp = np->n_sillyrename; + nsp = np->n_sillyrename; np->n_sillyrename = NULL; - } else + } else { nsp = NULL; + } FSDBG_TOP(264, vp, np, np->n_flag, nsp); @@ -420,22 +564,19 @@ nfs_vnop_inactive(ap) /* no silly file to clean up... */ /* clear all flags other than these */ np->n_flag &= (NMODIFIED); - nfs_unlock(np); + nfs_node_unlock(np); FSDBG_BOT(264, vp, np, np->n_flag, 0); return (0); } + nfs_node_unlock(np); /* Remove the silly file that was rename'd earlier */ /* flush all the buffers */ - nfs_unlock(np); - nfs_vinvalbuf2(vp, V_SAVE, vfs_context_thread(ap->a_context), nsp->nsr_cred, 1); - - /* purge the name cache to deter others from finding it */ - cache_purge(vp); + nfs_vinvalbuf2(vp, V_SAVE, vfs_context_thread(ctx), nsp->nsr_cred, 1); /* try to get the latest attributes */ - attrerr = nfs_getattr(np, &nvattr, ap->a_context, 0); + attrerr = nfs_getattr(np, &nvattr, ctx, NGA_UNCACHED); /* Check if we should remove it from the node hash. */ /* Leave it if inuse or it has multiple hard links. */ @@ -446,8 +587,8 @@ nfs_vnop_inactive(ap) ubc_setsize(vp, 0); } - /* grab node lock on this node and the directory */ - nfs_lock2(nsp->nsr_dnp, np, NFS_NODE_LOCK_FORCE); + /* mark this node and the directory busy while we do the remove */ + busyerror = nfs_node_set_busy2(nsp->nsr_dnp, np, vfs_context_thread(ctx)); /* lock the node while we remove the silly file */ lck_mtx_lock(nfs_node_hash_mutex); @@ -458,8 +599,11 @@ nfs_vnop_inactive(ap) np->n_hflag |= NHLOCKED; lck_mtx_unlock(nfs_node_hash_mutex); - /* purge again in case it was looked up while we were locking */ - cache_purge(vp); + /* purge the name cache to deter others from finding it */ + bzero(&cn, sizeof(cn)); + cn.cn_nameptr = nsp->nsr_name; + cn.cn_namelen = nsp->nsr_namlen; + nfs_name_cache_purge(nsp->nsr_dnp, np, &cn, ctx); FSDBG(264, np, np->n_size, np->n_vattr.nva_size, 0xf00d00f1); @@ -467,8 +611,12 @@ nfs_vnop_inactive(ap) nfs_removeit(nsp); /* clear all flags other than these */ + nfs_node_lock_force(np); np->n_flag &= (NMODIFIED); - nfs_unlock2(nsp->nsr_dnp, np); + nfs_node_unlock(np); + + if (!busyerror) + nfs_node_clear_busy2(nsp->nsr_dnp, np); if (unhash && vnode_isinuse(vp, 0)) { /* vnode now inuse after silly remove? */ @@ -521,14 +669,126 @@ nfs_vnop_reclaim(ap) { vnode_t vp = ap->a_vp; nfsnode_t np = VTONFS(vp); - struct nfsdmap *dp, *dp2; + vfs_context_t ctx = ap->a_context; + struct nfs_open_file *nofp, *nextnofp; + struct nfs_file_lock *nflp, *nextnflp; + struct nfs_lock_owner *nlop, *nextnlop; + const char *vname = NULL; + struct nfsmount *nmp = np->n_mount ? VFSTONFS(np->n_mount) : NFSTONMP(np); FSDBG_TOP(265, vp, np, np->n_flag, 0); + /* There shouldn't be any open or lock state at this point */ + lck_mtx_lock(&np->n_openlock); + + if (nmp && (nmp->nm_vers >= NFS_VER4)) { + /* need to drop a delegation */ + if (np->n_dlink.tqe_next != NFSNOLIST) { + /* remove this node from the recall list */ + lck_mtx_lock(&nmp->nm_lock); + if (np->n_dlink.tqe_next != NFSNOLIST) { + TAILQ_REMOVE(&nmp->nm_recallq, np, n_dlink); + np->n_dlink.tqe_next = NFSNOLIST; + } + lck_mtx_unlock(&nmp->nm_lock); + } + if (np->n_openflags & N_DELEG_MASK) { + np->n_openflags &= ~N_DELEG_MASK; + nfs4_delegreturn_rpc(nmp, np->n_fhp, np->n_fhsize, &np->n_dstateid, + vfs_context_thread(ctx), vfs_context_ucred(ctx)); + } + } + + /* clean up file locks */ + TAILQ_FOREACH_SAFE(nflp, &np->n_locks, nfl_link, nextnflp) { + if (!(nflp->nfl_flags & NFS_FILE_LOCK_DEAD)) { + if (!vname) + vname = vnode_getname(vp); + printf("nfs_vnop_reclaim: lock 0x%llx 0x%llx 0x%x (bc %d) %s\n", + nflp->nfl_start, nflp->nfl_end, nflp->nfl_flags, + nflp->nfl_blockcnt, vname ? vname : "//"); + } + if (!(nflp->nfl_flags & NFS_FILE_LOCK_BLOCKED)) { + lck_mtx_lock(&nflp->nfl_owner->nlo_lock); + TAILQ_REMOVE(&nflp->nfl_owner->nlo_locks, nflp, nfl_lolink); + lck_mtx_unlock(&nflp->nfl_owner->nlo_lock); + } + TAILQ_REMOVE(&np->n_locks, nflp, nfl_link); + nfs_file_lock_destroy(nflp); + } + /* clean up lock owners */ + TAILQ_FOREACH_SAFE(nlop, &np->n_lock_owners, nlo_link, nextnlop) { + if (!TAILQ_EMPTY(&nlop->nlo_locks)) { + if (!vname) + vname = vnode_getname(vp); + printf("nfs_vnop_reclaim: lock owner with locks %s\n", + vname ? vname : "//"); + } + TAILQ_REMOVE(&np->n_lock_owners, nlop, nlo_link); + nfs_lock_owner_destroy(nlop); + } + /* clean up open state */ + if (np->n_openrefcnt) { + if (!vname) + vname = vnode_getname(vp); + printf("nfs_vnop_reclaim: still open: %d %s\n", + np->n_openrefcnt, vname ? vname : "//"); + } + TAILQ_FOREACH_SAFE(nofp, &np->n_opens, nof_link, nextnofp) { + if (nofp->nof_flags & NFS_OPEN_FILE_BUSY) { + if (!vname) + vname = vnode_getname(vp); + printf("nfs_vnop_reclaim: open file busy: %s\n", + vname ? vname : "//"); + } + if (nofp->nof_opencnt) { + if (!vname) + vname = vnode_getname(vp); + printf("nfs_vnop_reclaim: file still open: %d %s\n", + nofp->nof_opencnt, vname ? vname : "//"); + } + if (nofp->nof_access || nofp->nof_deny || + nofp->nof_mmap_access || nofp->nof_mmap_deny || + nofp->nof_r || nofp->nof_w || nofp->nof_rw || + nofp->nof_r_dw || nofp->nof_w_dw || nofp->nof_rw_dw || + nofp->nof_r_drw || nofp->nof_w_drw || nofp->nof_rw_drw) { + if (!vname) + vname = vnode_getname(vp); + printf("nfs_vnop_reclaim: non-zero access: %d %d %d %d # %u %u %u dw %u %u %u drw %u %u %u %s\n", + nofp->nof_access, nofp->nof_deny, + nofp->nof_mmap_access, nofp->nof_mmap_deny, + nofp->nof_r, nofp->nof_w, nofp->nof_rw, + nofp->nof_r_dw, nofp->nof_w_dw, nofp->nof_rw_dw, + nofp->nof_r_drw, nofp->nof_w_drw, nofp->nof_rw_drw, + vname ? vname : "//"); + } + TAILQ_REMOVE(&np->n_opens, nofp, nof_link); + nfs_open_file_destroy(nofp); + } + lck_mtx_unlock(&np->n_openlock); + + lck_mtx_lock(nfs_buf_mutex); + if (!LIST_EMPTY(&np->n_dirtyblkhd) || !LIST_EMPTY(&np->n_cleanblkhd)) { + if (!vname) + vname = vnode_getname(vp); + printf("nfs_reclaim: dropping %s buffers for file %s\n", + (!LIST_EMPTY(&np->n_dirtyblkhd) ? "dirty" : "clean"), + (vname ? vname : "//")); + } + lck_mtx_unlock(nfs_buf_mutex); + if (vname) + vnode_putname(vname); + nfs_vinvalbuf(vp, V_IGNORE_WRITEERR, ap->a_context, 0); + lck_mtx_lock(nfs_node_hash_mutex); - if ((vnode_vtype(vp) != VDIR) && np->n_sillyrename) + if ((vnode_vtype(vp) != VDIR) && np->n_sillyrename) { printf("nfs_reclaim: leaving unlinked file %s\n", np->n_sillyrename->nsr_name); + if (np->n_sillyrename->nsr_cred != NOCRED) + kauth_cred_unref(&np->n_sillyrename->nsr_cred); + vnode_rele(NFSTOV(np->n_sillyrename->nsr_dnp)); + FREE_ZONE(np->n_sillyrename, sizeof(*np->n_sillyrename), M_NFSREQ); + } vnode_removefsref(vp); @@ -540,25 +800,15 @@ nfs_vnop_reclaim(ap) lck_mtx_unlock(nfs_node_hash_mutex); /* - * Free up any directory cookie structures and - * large file handle structures that might be associated with - * this nfs node. + * Free up any directory cookie structures and large file handle + * structures that might be associated with this nfs node. */ - nfs_lock(np, NFS_NODE_LOCK_FORCE); - if (vnode_vtype(vp) == VDIR) { - dp = np->n_cookies.lh_first; - while (dp) { - dp2 = dp; - dp = dp->ndm_list.le_next; - FREE_ZONE((caddr_t)dp2, - sizeof (struct nfsdmap), M_NFSDIROFF); - } - } - if (np->n_fhsize > NFS_SMALLFH) { + nfs_node_lock_force(np); + if ((vnode_vtype(vp) == VDIR) && np->n_cookiecache) + FREE_ZONE(np->n_cookiecache, sizeof(struct nfsdmap), M_NFSDIROFF); + if (np->n_fhsize > NFS_SMALLFH) FREE_ZONE(np->n_fhp, np->n_fhsize, M_NFSBIGFH); - } - - nfs_unlock(np); + nfs_node_unlock(np); vnode_clearfsnode(vp); if (np->n_parent) { @@ -569,8 +819,9 @@ nfs_vnop_reclaim(ap) np->n_parent = NULL; } - lck_rw_destroy(&np->n_lock, nfs_node_lck_grp); - lck_rw_destroy(&np->n_datalock, nfs_node_lck_grp); + lck_mtx_destroy(&np->n_lock, nfs_node_lck_grp); + lck_rw_destroy(&np->n_datalock, nfs_data_lck_grp); + lck_mtx_destroy(&np->n_openlock, nfs_open_grp); FSDBG_BOT(265, vp, np, np->n_flag, 0xd1ed1e); FREE_ZONE(np, sizeof(struct nfsnode), M_NFSNODE); @@ -580,157 +831,191 @@ nfs_vnop_reclaim(ap) /* * Acquire an NFS node lock */ + int -nfs_lock(nfsnode_t np, int locktype) +nfs_node_lock_internal(nfsnode_t np, int force) { - FSDBG_TOP(268, np, locktype, np->n_lockowner, 0); - if (locktype == NFS_NODE_LOCK_SHARED) { - lck_rw_lock_shared(&np->n_lock); - } else { - lck_rw_lock_exclusive(&np->n_lock); - np->n_lockowner = current_thread(); - } - if ((locktype != NFS_NODE_LOCK_FORCE) && !(np->n_hflag && NHHASHED)) { - FSDBG_BOT(268, np, 0xdead, np->n_lockowner, 0); - nfs_unlock(np); + FSDBG_TOP(268, np, force, 0, 0); + lck_mtx_lock(&np->n_lock); + if (!force && !(np->n_hflag && NHHASHED)) { + FSDBG_BOT(268, np, 0xdead, 0, 0); + lck_mtx_unlock(&np->n_lock); return (ENOENT); } - FSDBG_BOT(268, np, locktype, np->n_lockowner, 0); + FSDBG_BOT(268, np, force, 0, 0); return (0); } +int +nfs_node_lock(nfsnode_t np) +{ + return nfs_node_lock_internal(np, 0); +} + +void +nfs_node_lock_force(nfsnode_t np) +{ + nfs_node_lock_internal(np, 1); +} + /* * Release an NFS node lock */ void -nfs_unlock(nfsnode_t np) +nfs_node_unlock(nfsnode_t np) { - FSDBG(269, np, np->n_lockowner, current_thread(), 0); - np->n_lockowner = NULL; - lck_rw_done(&np->n_lock); + FSDBG(269, np, current_thread(), 0, 0); + lck_mtx_unlock(&np->n_lock); } /* * Acquire 2 NFS node locks - * - locks taken in order given (assumed to be parent-child order) + * - locks taken in reverse address order * - both or neither of the locks are taken * - only one lock taken per node (dup nodes are skipped) */ int -nfs_lock2(nfsnode_t np1, nfsnode_t np2, int locktype) +nfs_node_lock2(nfsnode_t np1, nfsnode_t np2) { + nfsnode_t first, second; int error; - if ((error = nfs_lock(np1, locktype))) + first = (np1 > np2) ? np1 : np2; + second = (np1 > np2) ? np2 : np1; + if ((error = nfs_node_lock(first))) return (error); if (np1 == np2) return (error); - if ((error = nfs_lock(np2, locktype))) - nfs_unlock(np1); + if ((error = nfs_node_lock(second))) + nfs_node_unlock(first); return (error); } -/* - * Unlock a couple of NFS nodes - */ void -nfs_unlock2(nfsnode_t np1, nfsnode_t np2) +nfs_node_unlock2(nfsnode_t np1, nfsnode_t np2) { - nfs_unlock(np1); + nfs_node_unlock(np1); if (np1 != np2) - nfs_unlock(np2); + nfs_node_unlock(np2); } /* - * Acquire 4 NFS node locks - * - fdnp/fnp and tdnp/tnp locks taken in order given - * - otherwise locks taken in node address order. - * - all or none of the locks are taken - * - only one lock taken per node (dup nodes are skipped) - * - some of the node pointers may be null + * Manage NFS node busy state. + * (Similar to NFS node locks above) */ int -nfs_lock4(nfsnode_t fdnp, nfsnode_t fnp, nfsnode_t tdnp, nfsnode_t tnp, int locktype) +nfs_node_set_busy(nfsnode_t np, thread_t thd) { - nfsnode_t list[4]; - int i, lcnt = 0, error; - - if (fdnp == tdnp) { - list[lcnt++] = fdnp; - } else if (fdnp->n_parent && (tdnp == VTONFS(fdnp->n_parent))) { - list[lcnt++] = tdnp; - list[lcnt++] = fdnp; - } else if (tdnp->n_parent && (fdnp == VTONFS(tdnp->n_parent))) { - list[lcnt++] = fdnp; - list[lcnt++] = tdnp; - } else if (fdnp < tdnp) { - list[lcnt++] = fdnp; - list[lcnt++] = tdnp; - } else { - list[lcnt++] = tdnp; - list[lcnt++] = fdnp; + struct timespec ts = { 2, 0 }; + int error; + + if ((error = nfs_node_lock(np))) + return (error); + while (ISSET(np->n_flag, NBUSY)) { + SET(np->n_flag, NBUSYWANT); + msleep(np, &np->n_lock, PZERO-1, "nfsbusywant", &ts); + if ((error = nfs_sigintr(NFSTONMP(np), NULL, thd, 0))) + break; } + if (!error) + SET(np->n_flag, NBUSY); + nfs_node_unlock(np); + return (error); +} - if (!tnp || (fnp == tnp) || (tnp == fdnp)) { - list[lcnt++] = fnp; - } else if (fnp < tnp) { - list[lcnt++] = fnp; - list[lcnt++] = tnp; - } else { - list[lcnt++] = tnp; - list[lcnt++] = fnp; +void +nfs_node_clear_busy(nfsnode_t np) +{ + int wanted; + + nfs_node_lock_force(np); + wanted = ISSET(np->n_flag, NBUSYWANT); + CLR(np->n_flag, NBUSY|NBUSYWANT); + nfs_node_unlock(np); + if (wanted) + wakeup(np); +} + +int +nfs_node_set_busy2(nfsnode_t np1, nfsnode_t np2, thread_t thd) +{ + nfsnode_t first, second; + int error; + + first = (np1 > np2) ? np1 : np2; + second = (np1 > np2) ? np2 : np1; + if ((error = nfs_node_set_busy(first, thd))) + return (error); + if (np1 == np2) + return (error); + if ((error = nfs_node_set_busy(second, thd))) + nfs_node_clear_busy(first); + return (error); +} + +void +nfs_node_clear_busy2(nfsnode_t np1, nfsnode_t np2) +{ + nfs_node_clear_busy(np1); + if (np1 != np2) + nfs_node_clear_busy(np2); +} + +/* helper function to sort four nodes in reverse address order (no dupes) */ +static void +nfs_node_sort4(nfsnode_t np1, nfsnode_t np2, nfsnode_t np3, nfsnode_t np4, nfsnode_t *list, int *lcntp) +{ + nfsnode_t na[2], nb[2]; + int a, b, i, lcnt; + + /* sort pairs then merge */ + na[0] = (np1 > np2) ? np1 : np2; + na[1] = (np1 > np2) ? np2 : np1; + nb[0] = (np3 > np4) ? np3 : np4; + nb[1] = (np3 > np4) ? np4 : np3; + for (a = b = i = lcnt = 0; i < 4; i++) { + if (a >= 2) + list[lcnt] = nb[b++]; + else if ((b >= 2) || (na[a] >= nb[b])) + list[lcnt] = na[a++]; + else + list[lcnt] = nb[b++]; + if ((lcnt <= 0) || (list[lcnt] != list[lcnt-1])) + lcnt++; /* omit dups */ } + if (list[lcnt-1] == NULL) + lcnt--; + *lcntp = lcnt; +} + +int +nfs_node_set_busy4(nfsnode_t np1, nfsnode_t np2, nfsnode_t np3, nfsnode_t np4, thread_t thd) +{ + nfsnode_t list[4]; + int i, lcnt, error; + + nfs_node_sort4(np1, np2, np3, np4, list, &lcnt); /* Now we can lock using list[0 - lcnt-1] */ - for (i = 0; i < lcnt; ++i) { - if (list[i]) - if ((error = nfs_lock(list[i], locktype))) { - /* Drop any locks we acquired. */ - while (--i >= 0) { - if (list[i]) - nfs_unlock(list[i]); - } - return (error); - } - } + for (i = 0; i < lcnt; ++i) + if ((error = nfs_node_set_busy(list[i], thd))) { + /* Drop any locks we acquired. */ + while (--i >= 0) + nfs_node_clear_busy(list[i]); + return (error); + } return (0); } -/* - * Unlock a group of NFS nodes - */ void -nfs_unlock4(nfsnode_t np1, nfsnode_t np2, nfsnode_t np3, nfsnode_t np4) +nfs_node_clear_busy4(nfsnode_t np1, nfsnode_t np2, nfsnode_t np3, nfsnode_t np4) { nfsnode_t list[4]; - int i, k = 0; - - if (np1) { - nfs_unlock(np1); - list[k++] = np1; - } - if (np2) { - for (i = 0; i < k; ++i) - if (list[i] == np2) - goto skip2; - nfs_unlock(np2); - list[k++] = np2; - } -skip2: - if (np3) { - for (i = 0; i < k; ++i) - if (list[i] == np3) - goto skip3; - nfs_unlock(np3); - list[k++] = np3; - } -skip3: - if (np4) { - for (i = 0; i < k; ++i) - if (list[i] == np4) - return; - nfs_unlock(np4); - } + int lcnt; + + nfs_node_sort4(np1, np2, np3, np4, list, &lcnt); + while (--lcnt >= 0) + nfs_node_clear_busy(list[lcnt]); } /* @@ -739,13 +1024,18 @@ nfs_unlock4(nfsnode_t np1, nfsnode_t np2, nfsnode_t np3, nfsnode_t np4) void nfs_data_lock(nfsnode_t np, int locktype) { - nfs_data_lock2(np, locktype, 1); + nfs_data_lock_internal(np, locktype, 1); +} +void +nfs_data_lock_noupdate(nfsnode_t np, int locktype) +{ + nfs_data_lock_internal(np, locktype, 0); } void -nfs_data_lock2(nfsnode_t np, int locktype, int updatesize) +nfs_data_lock_internal(nfsnode_t np, int locktype, int updatesize) { FSDBG_TOP(270, np, locktype, np->n_datalockowner, 0); - if (locktype == NFS_NODE_LOCK_SHARED) { + if (locktype == NFS_DATA_LOCK_SHARED) { if (updatesize && ISSET(np->n_flag, NUPDATESIZE)) nfs_data_update_size(np, 0); lck_rw_lock_shared(&np->n_datalock); @@ -764,10 +1054,15 @@ nfs_data_lock2(nfsnode_t np, int locktype, int updatesize) void nfs_data_unlock(nfsnode_t np) { - nfs_data_unlock2(np, 1); + nfs_data_unlock_internal(np, 1); +} +void +nfs_data_unlock_noupdate(nfsnode_t np) +{ + nfs_data_unlock_internal(np, 0); } void -nfs_data_unlock2(nfsnode_t np, int updatesize) +nfs_data_unlock_internal(nfsnode_t np, int updatesize) { int mine = (np->n_datalockowner == current_thread()); FSDBG_TOP(271, np, np->n_datalockowner, current_thread(), 0); @@ -791,16 +1086,16 @@ nfs_data_update_size(nfsnode_t np, int datalocked) FSDBG_TOP(272, np, np->n_flag, np->n_size, np->n_newsize); if (!datalocked) { - nfs_data_lock(np, NFS_NODE_LOCK_EXCLUSIVE); + nfs_data_lock(np, NFS_DATA_LOCK_EXCLUSIVE); /* grabbing data lock will automatically update size */ nfs_data_unlock(np); FSDBG_BOT(272, np, np->n_flag, np->n_size, np->n_newsize); return; } - error = nfs_lock(np, NFS_NODE_LOCK_EXCLUSIVE); + error = nfs_node_lock(np); if (error || !ISSET(np->n_flag, NUPDATESIZE)) { if (!error) - nfs_unlock(np); + nfs_node_unlock(np); FSDBG_BOT(272, np, np->n_flag, np->n_size, np->n_newsize); return; } @@ -808,7 +1103,7 @@ nfs_data_update_size(nfsnode_t np, int datalocked) np->n_size = np->n_newsize; /* make sure we invalidate buffers the next chance we get */ SET(np->n_flag, NNEEDINVALIDATE); - nfs_unlock(np); + nfs_node_unlock(np); ubc_setsize(NFSTOV(np), (off_t)np->n_size); /* XXX error? */ FSDBG_BOT(272, np, np->n_flag, np->n_size, np->n_newsize); } diff --git a/bsd/nfs/nfs_serv.c b/bsd/nfs/nfs_serv.c index b7be99468..e224a921d 100644 --- a/bsd/nfs/nfs_serv.c +++ b/bsd/nfs/nfs_serv.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2007 Apple Inc. All rights reserved. + * Copyright (c) 2000-2009 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -117,22 +117,26 @@ struct nfsrv_sock *nfsrv_udpsock = NULL; /* NFS exports */ struct nfsrv_expfs_list nfsrv_exports; -struct nfsrv_export_hashhead *nfsrv_export_hashtbl; +struct nfsrv_export_hashhead *nfsrv_export_hashtbl = NULL; +int nfsrv_export_hash_size = NFSRVEXPHASHSZ; u_long nfsrv_export_hash; lck_grp_t *nfsrv_export_rwlock_group; lck_rw_t nfsrv_export_rwlock; +#if CONFIG_FSE /* NFS server file modification event generator */ struct nfsrv_fmod_hashhead *nfsrv_fmod_hashtbl; u_long nfsrv_fmod_hash; lck_grp_t *nfsrv_fmod_grp; lck_mtx_t *nfsrv_fmod_mutex; static int nfsrv_fmod_timer_on = 0; - int nfsrv_fsevents_enabled = 1; +#endif /* NFS server timers */ +#if CONFIG_FSE thread_call_t nfsrv_fmod_timer_call; +#endif thread_call_t nfsrv_deadsock_timer_call; thread_call_t nfsrv_wg_timer_call; int nfsrv_wg_timer_on; @@ -149,10 +153,12 @@ int nfsrv_wg_delay_v3 = 0; int nfsrv_async = 0; -static int nfsrv_authorize(vnode_t,vnode_t,kauth_action_t,vfs_context_t,struct nfs_export_options*,int); -static int nfsrv_wg_coalesce(struct nfsrv_descript *, struct nfsrv_descript *); +int nfsrv_authorize(vnode_t,vnode_t,kauth_action_t,vfs_context_t,struct nfs_export_options*,int); +int nfsrv_wg_coalesce(struct nfsrv_descript *, struct nfsrv_descript *); +void nfsrv_modified(vnode_t, vfs_context_t); extern void IOSleep(int); +extern int safe_getpath(struct vnode *dvp, char *leafname, char *path, int _len, int *truncated_path); /* * Initialize the data structures for the server. @@ -180,10 +186,8 @@ nfsrv_init(void) return; } - if (sizeof (struct nfsrv_sock) > NFS_SVCALLOC) { + if (sizeof (struct nfsrv_sock) > NFS_SVCALLOC) printf("struct nfsrv_sock bloated (> %dbytes)\n",NFS_SVCALLOC); - printf("Try reducing NFS_UIDHASHSIZ\n"); - } /* init nfsd mutex */ nfsd_lck_grp = lck_grp_alloc_init("nfsd", LCK_GRP_ATTR_NULL); @@ -194,7 +198,6 @@ nfsrv_init(void) nfsrv_slp_mutex_group = lck_grp_alloc_init("nfsrv-slp-mutex", LCK_GRP_ATTR_NULL); /* init export data structures */ - nfsrv_export_hashtbl = hashinit(8, M_TEMP, &nfsrv_export_hash); LIST_INIT(&nfsrv_exports); nfsrv_export_rwlock_group = lck_grp_alloc_init("nfsrv-export-rwlock", LCK_GRP_ATTR_NULL); lck_rw_init(&nfsrv_export_rwlock, nfsrv_export_rwlock_group, LCK_ATTR_NULL); @@ -206,13 +209,17 @@ nfsrv_init(void) nfsrv_reqcache_lck_grp = lck_grp_alloc_init("nfsrv_reqcache", LCK_GRP_ATTR_NULL); nfsrv_reqcache_mutex = lck_mtx_alloc_init(nfsrv_reqcache_lck_grp, LCK_ATTR_NULL); +#if CONFIG_FSE /* init NFS server file modified event generation */ nfsrv_fmod_hashtbl = hashinit(NFSRVFMODHASHSZ, M_TEMP, &nfsrv_fmod_hash); nfsrv_fmod_grp = lck_grp_alloc_init("nfsrv_fmod", LCK_GRP_ATTR_NULL); nfsrv_fmod_mutex = lck_mtx_alloc_init(nfsrv_fmod_grp, LCK_ATTR_NULL); +#endif /* initialize NFS server timer callouts */ +#if CONFIG_FSE nfsrv_fmod_timer_call = thread_call_allocate(nfsrv_fmod_timer, NULL); +#endif nfsrv_deadsock_timer_call = thread_call_allocate(nfsrv_deadsock_timer, NULL); nfsrv_wg_timer_call = thread_call_allocate(nfsrv_wg_timer, NULL); @@ -268,7 +275,7 @@ nfsrv_access( int error, attrerr; struct vnode_attr vattr; struct nfs_filehandle nfh; - u_long nfsmode; + u_int32_t nfsmode; kauth_action_t testaction; struct nfs_export *nx; struct nfs_export_options *nxo; @@ -587,13 +594,13 @@ nfsrv_lookup( vfs_context_t ctx, mbuf_t *mrepp) { - struct nameidata ni, *nip = ∋ + struct nameidata ni; vnode_t vp, dirp = NULL; struct nfs_filehandle dnfh, nfh; struct nfs_export *nx = NULL; struct nfs_export_options *nxo; int error, attrerr, dirattrerr, isdotdot; - uint32_t len; + uint32_t len = 0; uid_t saved_uid; struct vnode_attr va, dirattr, *vap = &va; struct nfsm_chain *nmreq, nmrep; @@ -635,7 +642,7 @@ nfsrv_lookup( nameidone(&ni); - vp = nip->ni_vp; + vp = ni.ni_vp; error = nfsrv_vptofh(nx, nd->nd_vers, (isdotdot ? &dnfh : NULL), vp, ctx, &nfh); if (!error) { nfsm_srv_vattr_init(vap, nd->nd_vers); @@ -689,7 +696,7 @@ nfsrv_readlink( struct nfs_export_options *nxo; struct nfsm_chain *nmreq, nmrep; mbuf_t mpath, mp; - uio_t uiop = NULL; + uio_t auio = NULL; char uio_buf[ UIO_SIZEOF(4) ]; char *uio_bufp = &uio_buf[0]; int uio_buflen = UIO_SIZEOF(4); @@ -715,13 +722,13 @@ nfsrv_readlink( error = ENOMEM; nfsmerr_if(error); } - uiop = uio_createwithbuffer(mpcnt, 0, UIO_SYSSPACE, UIO_READ, uio_bufp, uio_buflen); - if (!uiop) + auio = uio_createwithbuffer(mpcnt, 0, UIO_SYSSPACE, UIO_READ, uio_bufp, uio_buflen); + if (!auio) error = ENOMEM; nfsmerr_if(error); for (mp = mpath; mp; mp = mbuf_next(mp)) - uio_addiov(uiop, CAST_USER_ADDR_T((caddr_t)mbuf_data(mp)), mbuf_len(mp)); + uio_addiov(auio, CAST_USER_ADDR_T((caddr_t)mbuf_data(mp)), mbuf_len(mp)); error = nfsrv_fhtovp(&nfh, nd, &vp, &nx, &nxo); nfsmerr_if(error); @@ -745,7 +752,7 @@ nfsrv_readlink( if (!error) error = nfsrv_authorize(vp, NULL, KAUTH_VNODE_READ_DATA, ctx, nxo, 0); if (!error) - error = VNOP_READLINK(vp, uiop, ctx); + error = VNOP_READLINK(vp, auio, ctx); if (vp) { if (nd->nd_vers == NFS_VER3) { nfsm_srv_vattr_init(&vattr, NFS_VER3); @@ -772,9 +779,8 @@ nfsrv_readlink( nfsm_chain_build_done(error, &nmrep); goto nfsmout; } - if (uiop && (uio_resid(uiop) > 0)) { - // LP64todo - fix this - len -= uio_resid(uiop); + if (auio && (uio_resid(auio) > 0)) { + len -= uio_resid(auio); tlen = nfsm_rndup(len); nfsm_adj(mpath, NFS_MAXPATHLEN-tlen, tlen-len); } @@ -815,7 +821,7 @@ nfsrv_read( struct nfs_filehandle nfh; struct nfs_export *nx; struct nfs_export_options *nxo; - uio_t uiop = NULL; + uio_t auio = NULL; char *uio_bufp = NULL; struct vnode_attr vattr, *vap = &vattr; off_t off; @@ -839,7 +845,7 @@ nfsrv_read( else nfsm_chain_get_32(error, nmreq, off); nfsm_chain_get_32(error, nmreq, reqlen); - maxlen = NFS_SRVMAXDATA(nd); + maxlen = NFSRV_NDMAXDATA(nd); if (reqlen > maxlen) reqlen = maxlen; nfsmerr_if(error); @@ -883,18 +889,18 @@ nfsrv_read( nfsmerr_if(error); MALLOC(uio_bufp, char *, UIO_SIZEOF(mreadcnt), M_TEMP, M_WAITOK); if (uio_bufp) - uiop = uio_createwithbuffer(mreadcnt, off, UIO_SYSSPACE, + auio = uio_createwithbuffer(mreadcnt, off, UIO_SYSSPACE, UIO_READ, uio_bufp, UIO_SIZEOF(mreadcnt)); - if (!uio_bufp || !uiop) { + if (!uio_bufp || !auio) { error = ENOMEM; goto errorexit; } for (m = mread; m; m = mbuf_next(m)) - uio_addiov(uiop, CAST_USER_ADDR_T((caddr_t)mbuf_data(m)), mbuf_len(m)); - error = VNOP_READ(vp, uiop, IO_NODELOCKED, ctx); + uio_addiov(auio, CAST_USER_ADDR_T((caddr_t)mbuf_data(m)), mbuf_len(m)); + error = VNOP_READ(vp, auio, IO_NODELOCKED, ctx); } else { - uiop = uio_createwithbuffer(0, 0, UIO_SYSSPACE, UIO_READ, &uio_buf[0], sizeof(uio_buf)); - if (!uiop) { + auio = uio_createwithbuffer(0, 0, UIO_SYSSPACE, UIO_READ, &uio_buf[0], sizeof(uio_buf)); + if (!auio) { error = ENOMEM; goto errorexit; } @@ -913,8 +919,7 @@ nfsrv_read( vp = NULL; /* trim off any data not actually read */ - // LP64todo - fix this - len -= uio_resid(uiop); + len -= uio_resid(auio); tlen = nfsm_rndup(len); if (count != tlen || tlen != len) nfsm_adj(mread, count - tlen, tlen - len); @@ -964,6 +969,7 @@ nfsrv_read( return (error); } +#if CONFIG_FSE /* * NFS File modification reporting * @@ -989,13 +995,14 @@ int nfsrv_fmod_min_interval = 100; /* msec min interval between callbacks */ void nfsrv_fmod_timer(__unused void *param0, __unused void *param1) { - struct nfsrv_fmod_hashhead *head; - struct nfsrv_fmod *fp, *nfp; + struct nfsrv_fmod_hashhead *headp, firehead; + struct nfsrv_fmod *fp, *nfp, *pfp; uint64_t timenow, next_deadline; - int interval = 0; - int i; + int interval = 0, i, fmod_fire; + LIST_INIT(&firehead); lck_mtx_lock(nfsrv_fmod_mutex); +again: clock_get_uptime(&timenow); clock_interval_to_deadline(nfsrv_fmod_pendtime, 1000 * 1000, &next_deadline); @@ -1003,13 +1010,14 @@ nfsrv_fmod_timer(__unused void *param0, __unused void *param1) /* * Scan all the hash chains */ + fmod_fire = 0; for (i = 0; i < NFSRVFMODHASHSZ; i++) { /* * For each hash chain, look for an entry * that has exceeded the deadline. */ - head = &nfsrv_fmod_hashtbl[i]; - LIST_FOREACH(fp, head, fm_link) { + headp = &nfsrv_fmod_hashtbl[i]; + LIST_FOREACH(fp, headp, fm_link) { if (timenow >= fp->fm_deadline) break; if (fp->fm_deadline < next_deadline) @@ -1022,25 +1030,40 @@ nfsrv_fmod_timer(__unused void *param0, __unused void *param1) * following entries in the chain, since they're * sorted in time order. */ + pfp = NULL; while (fp) { - /* - * Fire off the content modified fsevent for each - * entry, remove it from the list, and free it. - */ -#if CONFIG_FSE + /* move each entry to the fire list */ + nfp = LIST_NEXT(fp, fm_link); + LIST_REMOVE(fp, fm_link); + fmod_fire++; + if (pfp) + LIST_INSERT_AFTER(pfp, fp, fm_link); + else + LIST_INSERT_HEAD(&firehead, fp, fm_link); + pfp = fp; + fp = nfp; + } + } + + if (fmod_fire) { + lck_mtx_unlock(nfsrv_fmod_mutex); + /* + * Fire off the content modified fsevent for each + * entry and free it. + */ + LIST_FOREACH_SAFE(fp, &firehead, fm_link, nfp) { if (nfsrv_fsevents_enabled) add_fsevent(FSE_CONTENT_MODIFIED, &fp->fm_context, FSE_ARG_VNODE, fp->fm_vp, FSE_ARG_DONE); -#endif vnode_put(fp->fm_vp); kauth_cred_unref(&fp->fm_context.vc_ucred); - nfp = LIST_NEXT(fp, fm_link); LIST_REMOVE(fp, fm_link); FREE(fp, M_TEMP); - nfsrv_fmod_pending--; - fp = nfp; } + lck_mtx_lock(nfsrv_fmod_mutex); + nfsrv_fmod_pending -= fmod_fire; + goto again; } /* @@ -1062,14 +1085,13 @@ nfsrv_fmod_timer(__unused void *param0, __unused void *param1) lck_mtx_unlock(nfsrv_fmod_mutex); } -#if CONFIG_FSE /* * When a vnode has been written to, enter it in the hash * table of vnodes pending creation of an fsevent. If the * callout timer isn't already running, schedule a callback * for nfsrv_fmod_pendtime msec from now. */ -static void +void nfsrv_modified(vnode_t vp, vfs_context_t ctx) { uint64_t deadline; @@ -1156,7 +1178,7 @@ nfsrv_write( struct nfs_filehandle nfh; struct nfs_export *nx; struct nfs_export_options *nxo; - uio_t uiop = NULL; + uio_t auio = NULL; char *uio_bufp = NULL; off_t off; uid_t saved_uid; @@ -1204,7 +1226,7 @@ nfsrv_write( } else { mlen = 0; } - if ((len > NFS_MAXDATA) || (len < 0) || (mlen < len)) { + if ((len > NFSRV_MAXDATA) || (len < 0) || (mlen < len)) { error = EIO; goto nfsmerr; } @@ -1237,13 +1259,13 @@ nfsrv_write( mcount++; MALLOC(uio_bufp, char *, UIO_SIZEOF(mcount), M_TEMP, M_WAITOK); if (uio_bufp) - uiop = uio_createwithbuffer(mcount, off, UIO_SYSSPACE, UIO_WRITE, uio_bufp, UIO_SIZEOF(mcount)); - if (!uio_bufp || !uiop) + auio = uio_createwithbuffer(mcount, off, UIO_SYSSPACE, UIO_WRITE, uio_bufp, UIO_SIZEOF(mcount)); + if (!uio_bufp || !auio) error = ENOMEM; nfsmerr_if(error); for (m = nmreq->nmc_mcur; m; m = mbuf_next(m)) if ((mlen = mbuf_len(m)) > 0) - uio_addiov(uiop, CAST_USER_ADDR_T((caddr_t)mbuf_data(m)), mlen); + uio_addiov(auio, CAST_USER_ADDR_T((caddr_t)mbuf_data(m)), mlen); /* * XXX The IO_METASYNC flag indicates that all metadata (and not just * enough to ensure data integrity) mus be written to stable storage @@ -1256,8 +1278,8 @@ nfsrv_write( else ioflags = (IO_METASYNC | IO_SYNC | IO_NODELOCKED); - error = VNOP_WRITE(vp, uiop, ioflags, ctx); - OSAddAtomic(1, (SInt32*)&nfsstats.srvvop_writes); + error = VNOP_WRITE(vp, auio, ioflags, ctx); + OSAddAtomic(1, &nfsstats.srvvop_writes); /* update export stats */ NFSStatAdd64(&nx->nx_stats.bytes_written, len); @@ -1324,7 +1346,7 @@ nfsrv_write( */ #define NWDELAYHASH(sock, f) \ - (&(sock)->ns_wdelayhashtbl[(*((u_long *)(f))) % NFS_WDELAYHASHSIZ]) + (&(sock)->ns_wdelayhashtbl[(*((u_int32_t *)(f))) % NFS_WDELAYHASHSIZ]) /* These macros compare nfsrv_descript structures. */ #define NFSW_CONTIG(o, n) \ (((o)->nd_eoff >= (n)->nd_off) && nfsrv_fhmatch(&(o)->nd_fh, &(n)->nd_fh)) @@ -1354,7 +1376,7 @@ nfsrv_writegather( int preattrerr, postattrerr; vnode_t vp; mbuf_t m; - uio_t uiop = NULL; + uio_t auio = NULL; char *uio_bufp = NULL; u_quad_t cur_usec; struct timeval now; @@ -1404,7 +1426,7 @@ nfsrv_writegather( mlen = 0; } - if ((nd->nd_len > NFS_MAXDATA) || (nd->nd_len < 0) || (mlen < nd->nd_len)) { + if ((nd->nd_len > NFSRV_MAXDATA) || (nd->nd_len < 0) || (mlen < nd->nd_len)) { error = EIO; nfsmerr: nd->nd_repstat = error; @@ -1527,16 +1549,16 @@ nfsrv_writegather( MALLOC(uio_bufp, char *, UIO_SIZEOF(i), M_TEMP, M_WAITOK); if (uio_bufp) - uiop = uio_createwithbuffer(i, nd->nd_off, UIO_SYSSPACE, + auio = uio_createwithbuffer(i, nd->nd_off, UIO_SYSSPACE, UIO_WRITE, uio_bufp, UIO_SIZEOF(i)); - if (!uio_bufp || !uiop) + if (!uio_bufp || !auio) error = ENOMEM; if (!error) { for (m = nmreq->nmc_mhead; m; m = mbuf_next(m)) if ((tlen = mbuf_len(m)) > 0) - uio_addiov(uiop, CAST_USER_ADDR_T((caddr_t)mbuf_data(m)), tlen); - error = VNOP_WRITE(vp, uiop, ioflags, ctx); - OSAddAtomic(1, (SInt32*)&nfsstats.srvvop_writes); + uio_addiov(auio, CAST_USER_ADDR_T((caddr_t)mbuf_data(m)), tlen); + error = VNOP_WRITE(vp, auio, ioflags, ctx); + OSAddAtomic(1, &nfsstats.srvvop_writes); /* update export stats */ NFSStatAdd64(&nx->nx_stats.bytes_written, nd->nd_len); @@ -1657,7 +1679,7 @@ nfsrv_writegather( * - update the nd_eoff and nd_stable for owp * - put nd on owp's nd_coalesce list */ -static int +int nfsrv_wg_coalesce(struct nfsrv_descript *owp, struct nfsrv_descript *nd) { int overlap, error; @@ -1788,7 +1810,7 @@ nfsrv_create( struct nameidata ni; int error, rdev, dpreattrerr, dpostattrerr, postattrerr; int how, exclusive_flag; - uint32_t len; + uint32_t len = 0, cnflags; vnode_t vp, dvp, dirp; struct nfs_filehandle nfh; struct nfs_export *nx = NULL; @@ -2006,7 +2028,12 @@ nfsrv_create( ni.ni_cnd.cn_context = ctx; ni.ni_startdir = dvp; ni.ni_usedvp = dvp; - error = lookup(&ni); + cnflags = ni.ni_cnd.cn_flags; /* store in case we have to restore */ + while ((error = lookup(&ni)) == ERECYCLE) { + ni.ni_cnd.cn_flags = cnflags; + ni.ni_cnd.cn_nameptr = ni.ni_cnd.cn_pnbuf; + ni.ni_usedvp = ni.ni_dvp = ni.ni_startdir = dvp; + } if (!error) { if (ni.ni_cnd.cn_flags & ISSYMLINK) error = EINVAL; @@ -2123,8 +2150,8 @@ nfsrv_mknod( struct vnode_attr va, *vap = &va; struct nameidata ni; int error, dpreattrerr, dpostattrerr, postattrerr; - uint32_t len; - u_long major, minor; + uint32_t len = 0, cnflags; + u_int32_t major = 0, minor = 0; enum vtype vtyp; vnode_t vp, dvp, dirp; struct nfs_filehandle nfh; @@ -2272,7 +2299,12 @@ nfsrv_mknod( ni.ni_cnd.cn_context = vfs_context_current(); ni.ni_startdir = dvp; ni.ni_usedvp = dvp; - error = lookup(&ni); + cnflags = ni.ni_cnd.cn_flags; /* store in case we have to restore */ + while ((error = lookup(&ni)) == ERECYCLE) { + ni.ni_cnd.cn_flags = cnflags; + ni.ni_cnd.cn_nameptr = ni.ni_cnd.cn_pnbuf; + ni.ni_usedvp = ni.ni_dvp = ni.ni_startdir = dvp; + } if (!error) { vp = ni.ni_vp; if (ni.ni_cnd.cn_flags & ISSYMLINK) @@ -2362,7 +2394,7 @@ nfsrv_remove( { struct nameidata ni; int error, dpreattrerr, dpostattrerr; - uint32_t len; + uint32_t len = 0; uid_t saved_uid; vnode_t vp, dvp, dirp = NULL; struct vnode_attr dpreattr, dpostattr; @@ -2515,7 +2547,7 @@ nfsrv_rename( struct nfsm_chain *nmreq, nmrep; char *from_name, *to_name; #if CONFIG_FSE - int from_len, to_len; + int from_len=0, to_len=0; fse_info from_finfo, to_finfo; #endif u_char didstats = 0; @@ -2525,6 +2557,7 @@ nfsrv_rename( fdpreattrerr = fdpostattrerr = ENOENT; tdpreattrerr = tdpostattrerr = ENOENT; saved_uid = kauth_cred_getuid(nd->nd_cr); + fromlen = tolen = 0; frompath = topath = NULL; fdirp = tdirp = NULL; nmreq = &nd->nd_nmreq; @@ -2912,50 +2945,26 @@ nfsrv_rename( */ #if CONFIG_FSE if (nfsrv_fsevents_enabled && need_fsevent(FSE_RENAME, fvp)) { + int from_truncated = 0, to_truncated = 0; + get_fse_info(fvp, &from_finfo, ctx); if (tvp) get_fse_info(tvp, &to_finfo, ctx); from_name = get_pathbuff(); - from_len = MAXPATHLEN; - if (from_name && vn_getpath(fdvp, from_name, &from_len)) { - release_pathbuff(from_name); - from_name = NULL; - } else if ((from_len + 1 + fromni.ni_cnd.cn_namelen + 1) < MAXPATHLEN) { - // if the path is not just "/", then append a "/" - if (from_len > 2) { - from_name[from_len-1] = '/'; - } else { - from_len--; - } - strlcpy(&from_name[from_len], fromni.ni_cnd.cn_nameptr, MAXPATHLEN-from_len); - from_len += fromni.ni_cnd.cn_namelen + 1; - from_name[from_len] = '\0'; + if (from_name) { + from_len = safe_getpath(fdvp, fromni.ni_cnd.cn_nameptr, from_name, MAXPATHLEN, &from_truncated); } - + to_name = from_name ? get_pathbuff() : NULL; - to_len = MAXPATHLEN; + if (to_name) { + to_len = safe_getpath(tdvp, toni.ni_cnd.cn_nameptr, to_name, MAXPATHLEN, &to_truncated); + } - if (!to_name) { - if (from_name) { - release_pathbuff(from_name); - from_name = NULL; - } - } else if (vn_getpath(tdvp, to_name, &to_len)) { - release_pathbuff(from_name); - release_pathbuff(to_name); - from_name = to_name = NULL; - } else if ((to_len + 1 + toni.ni_cnd.cn_namelen + 1) < MAXPATHLEN) { - // if the path is not just "/", then append a "/" - if (to_len > 2) { - to_name[to_len-1] = '/'; - } else { - to_len--; - } - strlcpy(&to_name[to_len], toni.ni_cnd.cn_nameptr, MAXPATHLEN-to_len); - to_len += toni.ni_cnd.cn_namelen + 1; - to_name[to_len] = '\0'; + if (from_truncated || to_truncated) { + from_finfo.mode |= FSE_TRUNCATED_PATH; } + } else { from_name = NULL; to_name = NULL; @@ -3197,25 +3206,26 @@ nfsrv_link( #if CONFIG_FSE if (nfsrv_fsevents_enabled && !error && need_fsevent(FSE_CREATE_FILE, dvp)) { char *target_path = NULL; - int plen; + int plen, truncated=0; fse_info finfo; /* build the path to the new link file */ - plen = MAXPATHLEN; - if ((target_path = get_pathbuff()) && !vn_getpath(dvp, target_path, &plen)) { - if ((plen + 1 + ni.ni_cnd.cn_namelen + 1) < MAXPATHLEN) { - target_path[plen-1] = '/'; - strlcpy(&target_path[plen], ni.ni_cnd.cn_nameptr, MAXPATHLEN-plen); - plen += ni.ni_cnd.cn_namelen; - } - if (get_fse_info(vp, &finfo, ctx) == 0) + target_path = get_pathbuff(); + if (target_path) { + plen = safe_getpath(dvp, ni.ni_cnd.cn_nameptr, target_path, MAXPATHLEN, &truncated); + + if (get_fse_info(vp, &finfo, ctx) == 0) { + if (truncated) { + finfo.mode |= FSE_TRUNCATED_PATH; + } add_fsevent(FSE_CREATE_FILE, ctx, FSE_ARG_STRING, plen, target_path, FSE_ARG_FINFO, &finfo, FSE_ARG_DONE); - } - if (target_path) + } + release_pathbuff(target_path); + } } #endif @@ -3279,7 +3289,7 @@ nfsrv_symlink( struct vnode_attr va, *vap = &va; struct nameidata ni; int error, dpreattrerr, dpostattrerr, postattrerr; - uint32_t len, linkdatalen; + uint32_t len = 0, linkdatalen, cnflags; uid_t saved_uid; char *linkdata; vnode_t vp, dvp, dirp; @@ -3408,7 +3418,12 @@ nfsrv_symlink( ni.ni_cnd.cn_context = ctx; ni.ni_startdir = dvp; ni.ni_usedvp = dvp; - error = lookup(&ni); + cnflags = ni.ni_cnd.cn_flags; /* store in case we have to restore */ + while ((error = lookup(&ni)) == ERECYCLE) { + ni.ni_cnd.cn_flags = cnflags; + ni.ni_cnd.cn_nameptr = ni.ni_cnd.cn_pnbuf; + ni.ni_usedvp = ni.ni_dvp = ni.ni_startdir = dvp; + } if (!error) vp = ni.ni_vp; } @@ -3504,7 +3519,7 @@ nfsrv_mkdir( struct vnode_attr va, *vap = &va; struct nameidata ni; int error, dpreattrerr, dpostattrerr, postattrerr; - uint32_t len; + uint32_t len = 0; vnode_t vp, dvp, dirp; struct nfs_filehandle nfh; struct nfs_export *nx = NULL; @@ -3716,7 +3731,7 @@ nfsrv_rmdir( mbuf_t *mrepp) { int error, dpreattrerr, dpostattrerr; - uint32_t len; + uint32_t len = 0; uid_t saved_uid; vnode_t vp, dvp, dirp; struct vnode_attr dpreattr, dpostattr; @@ -3903,7 +3918,7 @@ nfsrv_readdir( error = 0; attrerr = ENOENT; - nentries = 0; + count = nentries = 0; nmreq = &nd->nd_nmreq; nfsm_chain_null(&nmrep); rbuf = NULL; @@ -3923,7 +3938,7 @@ nfsrv_readdir( off = toff; siz = ((count + DIRBLKSIZ - 1) & ~(DIRBLKSIZ - 1)); - xfer = NFS_SRVMAXDATA(nd); + xfer = NFSRV_NDMAXDATA(nd); if (siz > xfer) siz = xfer; fullsiz = siz; @@ -3974,7 +3989,6 @@ nfsrv_readdir( nfsmerr_if(error); if (uio_resid(auio) != 0) { - // LP64todo - fix this siz -= uio_resid(auio); /* If nothing read, return empty reply with eof set */ @@ -4139,7 +4153,7 @@ nfsrv_readdirplus( nfsmerr_if(error); off = toff; - xfer = NFS_SRVMAXDATA(nd); + xfer = NFSRV_NDMAXDATA(nd); dircount = ((dircount + DIRBLKSIZ - 1) & ~(DIRBLKSIZ - 1)); if (dircount > xfer) dircount = xfer; @@ -4191,7 +4205,6 @@ nfsrv_readdirplus( nfsmerr_if(error); if (uio_resid(auio) != 0) { - // LP64todo - fix this siz -= uio_resid(auio); /* If nothing read, return empty reply with eof set */ @@ -4574,7 +4587,7 @@ nfsrv_fsinfo( maxsize = NFS_MAXDGRAMDATA; prefsize = NFS_PREFDGRAMDATA; } else - maxsize = prefsize = NFS_MAXDATA; + maxsize = prefsize = NFSRV_MAXDATA; nfsm_chain_add_32(error, &nmrep, maxsize); nfsm_chain_add_32(error, &nmrep, prefsize); @@ -4797,7 +4810,7 @@ int (*nfsrv_procs[NFS_NPROCS])(struct nfsrv_descript *nd, * will return EPERM instead of EACCESS. EPERM is always an error. */ -static int +int nfsrv_authorize( vnode_t vp, vnode_t dvp, diff --git a/bsd/nfs/nfs_socket.c b/bsd/nfs/nfs_socket.c index 44536c331..e630eb95c 100644 --- a/bsd/nfs/nfs_socket.c +++ b/bsd/nfs/nfs_socket.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2008 Apple Inc. All rights reserved. + * Copyright (c) 2000-2009 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -83,7 +83,6 @@ #include #include #include -#include #include #include @@ -113,19 +112,25 @@ kern_return_t thread_terminate(thread_t); #if NFSSERVER int nfsrv_sock_max_rec_queue_length = 128; /* max # RPC records queued on (UDP) socket */ -static int nfsrv_getstream(struct nfsrv_sock *,int); -static int nfsrv_getreq(struct nfsrv_descript *); +int nfsrv_getstream(struct nfsrv_sock *,int); +int nfsrv_getreq(struct nfsrv_descript *); extern int nfsv3_procid[NFS_NPROCS]; #endif /* NFSSERVER */ #if NFSCLIENT -static int nfs_connect_setup(struct nfsmount *); -static void nfs_reqdequeue(struct nfsreq *); -static void nfs_udp_rcv(socket_t, void*, int); -static void nfs_tcp_rcv(socket_t, void*, int); -static void nfs_request_match_reply(struct nfsmount *, mbuf_t); -static void nfs_softterm(struct nfsreq *); +int nfs_reconnect(struct nfsmount *); +int nfs_connect_setup(struct nfsmount *); +void nfs_mount_sock_thread(void *, wait_result_t); +void nfs_udp_rcv(socket_t, void*, int); +void nfs_tcp_rcv(socket_t, void*, int); +void nfs_sock_poke(struct nfsmount *); +void nfs_request_match_reply(struct nfsmount *, mbuf_t); +void nfs_reqdequeue(struct nfsreq *); +void nfs_reqbusy(struct nfsreq *); +struct nfsreq *nfs_reqnext(struct nfsreq *); +int nfs_wait_reply(struct nfsreq *); +void nfs_softterm(struct nfsreq *); #ifdef NFS_SOCKET_DEBUGGING #define NFS_SOCK_DBG(X) printf X @@ -186,7 +191,7 @@ static int nfs_backoff[8] = { 2, 4, 8, 16, 32, 64, 128, 256, }; * Initialize socket state and perform setup for a new NFS connection. */ int -nfs_connect(struct nfsmount *nmp) +nfs_connect(struct nfsmount *nmp, int verbose) { socket_t so; int error, on = 1, proto; @@ -194,7 +199,6 @@ nfs_connect(struct nfsmount *nmp) struct sockaddr *saddr; struct sockaddr_in sin; struct timeval timeo; - u_short tport; lck_mtx_lock(&nmp->nm_lock); nmp->nm_sockflags |= NMSOCK_CONNECTING; @@ -212,18 +216,19 @@ nfs_connect(struct nfsmount *nmp) * Some servers require that the client port be a reserved port number. */ if (saddr->sa_family == AF_INET && (nmp->nm_flag & NFSMNT_RESVPORT)) { - lck_mtx_unlock(&nmp->nm_lock); - sin.sin_len = sizeof (struct sockaddr_in); - sin.sin_family = AF_INET; - sin.sin_addr.s_addr = INADDR_ANY; - tport = IPPORT_RESERVED - 1; - sin.sin_port = htons(tport); - while (((error = sock_bind(so, (struct sockaddr *) &sin)) == EADDRINUSE) && - (--tport > IPPORT_RESERVED / 2)) - sin.sin_port = htons(tport); - if (error) + int portrange = IP_PORTRANGE_LOW; + error = sock_setsockopt(so, IPPROTO_IP, IP_PORTRANGE, &portrange, sizeof(portrange)); + if (!error) { /* bind now to check for failure */ + sin.sin_len = sizeof (struct sockaddr_in); + sin.sin_family = AF_INET; + sin.sin_addr.s_addr = INADDR_ANY; + sin.sin_port = 0; + error = sock_bind(so, (struct sockaddr *) &sin); + } + if (error) { + lck_mtx_unlock(&nmp->nm_lock); goto bad; - lck_mtx_lock(&nmp->nm_lock); + } } /* @@ -238,7 +243,7 @@ nfs_connect(struct nfsmount *nmp) } } else { int tocnt = 0, optlen = sizeof(error); - struct timespec ts = { 2, 0 }; + struct timespec ts = { 1, 0 }; lck_mtx_unlock(&nmp->nm_lock); error = sock_connect(so, mbuf_data(nmp->nm_nam), MSG_DONTWAIT); @@ -246,18 +251,20 @@ nfs_connect(struct nfsmount *nmp) goto bad; lck_mtx_lock(&nmp->nm_lock); while (!sock_isconnected(so)) { - if (tocnt++ == 15) /* log a warning if connect is taking a while */ + nfs_mount_check_dead_timeout(nmp); + if ((tocnt++ == 30) && verbose) /* log a warning if connect is taking a while */ log(LOG_INFO, "nfs_connect: socket connect taking a while for %s\n", vfs_statfs(nmp->nm_mountp)->f_mntfromname); /* check for error on socket */ sock_getsockopt(so, SOL_SOCKET, SO_ERROR, &error, &optlen); if (error) { - log(LOG_INFO, "nfs_connect: socket error %d for %s\n", - error, vfs_statfs(nmp->nm_mountp)->f_mntfromname); + if (verbose) + log(LOG_INFO, "nfs_connect: socket error %d for %s\n", + error, vfs_statfs(nmp->nm_mountp)->f_mntfromname); break; } - if (tocnt > 60) { - /* abort if this is taking too long */ + /* abort if this is taking too long or we're unmounting */ + if ((tocnt > 120) || (nmp->nm_sockflags & NMSOCK_UNMOUNT)) { error = ENOTCONN; break; } @@ -265,7 +272,7 @@ nfs_connect(struct nfsmount *nmp) break; msleep(&nmp->nm_so, &nmp->nm_lock, PSOCK, "nfs_socket_connect", &ts); } - if (tocnt > 15) + if ((tocnt > 30) && verbose) log(LOG_INFO, "nfs_connect: socket connect %s for %s\n", error ? "aborted" : "completed", vfs_statfs(nmp->nm_mountp)->f_mntfromname); @@ -351,13 +358,11 @@ nfs_connect(struct nfsmount *nmp) wakeup(&nmp->nm_sockflags); } lck_mtx_unlock(&nmp->nm_lock); - if (error) - nfs_disconnect(nmp); return (error); } /* setup & confirm socket connection is functional */ -static int +int nfs_connect_setup(struct nfsmount *nmp) { struct nfsm_chain nmreq, nmrep; @@ -366,6 +371,17 @@ nfs_connect_setup(struct nfsmount *nmp) if (nmp->nm_vers >= NFS_VER4) { error = nfs4_setclientid(nmp); + if (error) + return (error); + error = nfs4_renew(nmp, R_SETUP); + if ((error == NFSERR_ADMIN_REVOKED) || + (error == NFSERR_EXPIRED) || + (error == NFSERR_LEASE_MOVED) || + (error == NFSERR_STALE_CLIENTID)) { + lck_mtx_lock(&nmp->nm_lock); + nmp->nm_state |= NFSSTA_RECOVER; + lck_mtx_unlock(&nmp->nm_lock); + } } else { /* verify connection's OK by sending a NULL request */ nfsm_chain_null(&nmreq); @@ -392,20 +408,23 @@ nfs_connect_setup(struct nfsmount *nmp) * - set R_MUSTRESEND for all outstanding requests on mount point * If this fails the mount point is DEAD! */ -static int +int nfs_reconnect(struct nfsmount *nmp) { struct nfsreq *rq; struct timeval now; thread_t thd = current_thread(); - int error, lastmsg, wentdown = 0; + int error, wentdown = 0, verbose = 1; + time_t lastmsg; microuptime(&now); lastmsg = now.tv_sec - (nmp->nm_tprintf_delay - nmp->nm_tprintf_initial_delay); nfs_disconnect(nmp); - while ((error = nfs_connect(nmp))) { + while ((error = nfs_connect(nmp, verbose))) { + verbose = 0; + nfs_disconnect(nmp); if (error == EINTR || error == ERESTART) return (EINTR); if (error == EIO) @@ -423,6 +442,7 @@ nfs_reconnect(struct nfsmount *nmp) lck_mtx_unlock(&nmp->nm_lock); return (error); } + nfs_mount_check_dead_timeout(nmp); if ((error = nfs_sigintr(nmp, NULL, thd, 1))) { lck_mtx_unlock(&nmp->nm_lock); return (error); @@ -449,7 +469,7 @@ nfs_reconnect(struct nfsmount *nmp) rq->r_flags |= R_MUSTRESEND; rq->r_rtt = -1; wakeup(rq); - if ((rq->r_flags & (R_ASYNC|R_ASYNCWAIT)) == R_ASYNC) + if ((rq->r_flags & (R_ASYNC|R_ASYNCWAIT|R_SENDING)) == R_ASYNC) nfs_asyncio_resend(rq); } lck_mtx_unlock(&rq->r_mtx); @@ -486,7 +506,7 @@ nfs_disconnect(struct nfsmount *nmp) /* * mark an NFS mount as needing a reconnect/resends. */ -static void +void nfs_need_reconnect(struct nfsmount *nmp) { struct nfsreq *rq; @@ -507,7 +527,7 @@ nfs_need_reconnect(struct nfsmount *nmp) rq->r_flags |= R_MUSTRESEND; rq->r_rtt = -1; wakeup(rq); - if ((rq->r_flags & (R_ASYNC|R_ASYNCWAIT)) == R_ASYNC) + if ((rq->r_flags & (R_ASYNC|R_ASYNCWAIT|R_SENDING)) == R_ASYNC) nfs_asyncio_resend(rq); } lck_mtx_unlock(&rq->r_mtx); @@ -519,7 +539,7 @@ nfs_need_reconnect(struct nfsmount *nmp) /* * thread to handle miscellaneous async NFS socket work (reconnects/resends) */ -static void +void nfs_mount_sock_thread(void *arg, __unused wait_result_t wr) { struct nfsmount *nmp = arg; @@ -528,10 +548,18 @@ nfs_mount_sock_thread(void *arg, __unused wait_result_t wr) struct nfsreq *req; struct timeval now; int error, dofinish, force; + nfsnode_t np; + fhandle_t fh; + nfs_stateid dstateid; lck_mtx_lock(&nmp->nm_lock); - while (!(nmp->nm_sockflags & NMSOCK_READY) || !TAILQ_EMPTY(&nmp->nm_resendq)) { + while (!(nmp->nm_sockflags & NMSOCK_READY) || + !TAILQ_EMPTY(&nmp->nm_resendq) || + nmp->nm_deadto_start || + ((nmp->nm_vers >= NFS_VER4) && + ((nmp->nm_state & NFSSTA_RECOVER) || !TAILQ_EMPTY(&nmp->nm_recallq)))) + { if (nmp->nm_sockflags & NMSOCK_UNMOUNT) break; force = (nmp->nm_state & NFSSTA_FORCE); @@ -543,15 +571,41 @@ nfs_mount_sock_thread(void *arg, __unused wait_result_t wr) } lck_mtx_unlock(&nmp->nm_lock); NFS_SOCK_DBG(("nfs reconnect %s\n", vfs_statfs(nmp->nm_mountp)->f_mntfromname)); - if ((error = nfs_reconnect(nmp))) - printf("nfs_reconnect failed %d for %s\n", error, - vfs_statfs(nmp->nm_mountp)->f_mntfromname); - else + if (nfs_reconnect(nmp) == 0) nmp->nm_reconnect_start = 0; lck_mtx_lock(&nmp->nm_lock); } + if ((nmp->nm_sockflags & NMSOCK_READY) && + (nmp->nm_state & NFSSTA_RECOVER) && + !(nmp->nm_sockflags & NMSOCK_UNMOUNT) && !force) { + /* perform state recovery */ + lck_mtx_unlock(&nmp->nm_lock); + nfs4_recover(nmp); + lck_mtx_lock(&nmp->nm_lock); + } + /* handle NFSv4 delegation recalls */ + while ((nmp->nm_vers >= NFS_VER4) && !force && + (nmp->nm_sockflags & NMSOCK_READY) && !(nmp->nm_state & NFSSTA_RECOVER) && + ((np = TAILQ_FIRST(&nmp->nm_recallq)))) { + TAILQ_REMOVE(&nmp->nm_recallq, np, n_dlink); + np->n_dlink.tqe_next = NFSNOLIST; + lck_mtx_unlock(&nmp->nm_lock); + lck_mtx_lock(&np->n_openlock); + dstateid = np->n_dstateid; + if (np->n_openflags & N_DELEG_MASK) { + fh.fh_len = np->n_fhsize; + bcopy(np->n_fhp, &fh.fh_data, fh.fh_len); + np->n_openflags &= ~N_DELEG_MASK; + lck_mtx_unlock(&np->n_openlock); + nfs4_delegreturn_rpc(nmp, fh.fh_data, fh.fh_len, &dstateid, thd, nmp->nm_mcred); + } else { + lck_mtx_unlock(&np->n_openlock); + } + lck_mtx_lock(&nmp->nm_lock); + } /* do resends, if necessary/possible */ - while (((nmp->nm_sockflags & NMSOCK_READY) || force) && ((req = TAILQ_FIRST(&nmp->nm_resendq)))) { + while ((((nmp->nm_sockflags & NMSOCK_READY) && !(nmp->nm_state & NFSSTA_RECOVER)) || force) && + ((req = TAILQ_FIRST(&nmp->nm_resendq)))) { if (req->r_resendtime) microuptime(&now); while (req && !force && req->r_resendtime && (now.tv_sec < req->r_resendtime)) @@ -595,7 +649,7 @@ nfs_mount_sock_thread(void *arg, __unused wait_result_t wr) if (!error) error = nfs_request_send(req, 0); lck_mtx_lock(&req->r_mtx); - if (req->r_rchain.tqe_next == NFSREQNOLIST) + if (req->r_flags & R_RESENDQ) req->r_flags &= ~R_RESENDQ; if (error) req->r_error = error; @@ -614,11 +668,12 @@ nfs_mount_sock_thread(void *arg, __unused wait_result_t wr) if (!error) error = nfs_sigintr(nmp, req, req->r_thread, 0); if (!error) { + req->r_flags |= R_SENDING; lck_mtx_unlock(&req->r_mtx); error = nfs_send(req, 0); lck_mtx_lock(&req->r_mtx); if (!error) { - if (req->r_rchain.tqe_next == NFSREQNOLIST) + if (req->r_flags & R_RESENDQ) req->r_flags &= ~R_RESENDQ; wakeup(req); lck_mtx_unlock(&req->r_mtx); @@ -627,7 +682,7 @@ nfs_mount_sock_thread(void *arg, __unused wait_result_t wr) } } req->r_error = error; - if (req->r_rchain.tqe_next == NFSREQNOLIST) + if (req->r_flags & R_RESENDQ) req->r_flags &= ~R_RESENDQ; wakeup(req); dofinish = req->r_callback.rcb_func && !(req->r_flags & R_WAITSENT); @@ -636,11 +691,27 @@ nfs_mount_sock_thread(void *arg, __unused wait_result_t wr) nfs_asyncio_finish(req); lck_mtx_lock(&nmp->nm_lock); } - if (nmp->nm_sockflags & NMSOCK_READY) { - ts.tv_sec = TAILQ_EMPTY(&nmp->nm_resendq) ? 30 : 1; - msleep(&nmp->nm_sockthd, &nmp->nm_lock, PSOCK, "nfssockthread", &ts); - } else if (force) + if (nmp->nm_deadto_start) + nfs_mount_check_dead_timeout(nmp); + if (force || (nmp->nm_state & NFSSTA_DEAD)) break; + if ((nmp->nm_sockflags & NMSOCK_READY) || (nmp->nm_state & NFSSTA_RECOVER)) { + if (nmp->nm_deadto_start || !TAILQ_EMPTY(&nmp->nm_resendq) || + (nmp->nm_state & NFSSTA_RECOVER)) + ts.tv_sec = 1; + else + ts.tv_sec = 30; + msleep(&nmp->nm_sockthd, &nmp->nm_lock, PSOCK, "nfssockthread", &ts); + } + } + + /* If we're unmounting, send the unmount RPC, if requested/appropriate. */ + if ((nmp->nm_sockflags & NMSOCK_UNMOUNT) && (nmp->nm_flag & NFSMNT_CALLUMNT) && + (nmp->nm_vers < NFS_VER4) && !(nmp->nm_state & (NFSSTA_FORCE|NFSSTA_DEAD))) { + lck_mtx_unlock(&nmp->nm_lock); + nfs3_umount_rpc(nmp, vfs_context_kernel(), + (nmp->nm_sockflags & NMSOCK_READY) ? 6 : 2); + lck_mtx_lock(&nmp->nm_lock); } if (nmp->nm_sockthd == thd) @@ -660,6 +731,825 @@ nfs_mount_sock_thread_wake(struct nfsmount *nmp) thread_deallocate(nmp->nm_sockthd); } +/* + * Check if we should mark the mount dead because the + * unresponsive mount has reached the dead timeout. + * (must be called with nmp locked) + */ +void +nfs_mount_check_dead_timeout(struct nfsmount *nmp) +{ + struct timeval now; + + if (!(nmp->nm_flag & NFSMNT_DEADTIMEOUT)) + return; + if (nmp->nm_deadto_start == 0) + return; + if (nmp->nm_state & NFSSTA_DEAD) + return; + microuptime(&now); + if ((now.tv_sec - nmp->nm_deadto_start) < nmp->nm_deadtimeout) + return; + printf("nfs server %s: dead\n", vfs_statfs(nmp->nm_mountp)->f_mntfromname); + nmp->nm_state |= NFSSTA_DEAD; + vfs_event_signal(&vfs_statfs(nmp->nm_mountp)->f_fsid, VQ_DEAD, 0); +} + +/* + * RPC record marker parsing state + */ +struct nfs_rpc_record_state +{ + uint16_t nrrs_lastfrag; /* last fragment of record */ + uint16_t nrrs_markerleft; /* marker bytes remaining */ + uint32_t nrrs_fragleft; /* fragment bytes remaining */ + uint32_t nrrs_reclen; /* length of RPC record */ + mbuf_t nrrs_m; /* mbufs for current record */ + mbuf_t nrrs_mlast; +}; +int nfs_rpc_record_read(socket_t, struct nfs_rpc_record_state *, int *, mbuf_t *); + +/* + * NFS callback channel socket state + */ +struct nfs_callback_socket +{ + TAILQ_ENTRY(nfs_callback_socket) ncbs_link; + socket_t ncbs_so; /* the socket */ + struct sockaddr_in ncbs_sin; /* socket address */ + struct nfs_rpc_record_state ncbs_rrs; /* RPC record parsing state */ + time_t ncbs_stamp; /* last accessed at */ + uint32_t ncbs_flags; /* see below */ +}; +#define NCBSOCK_UPCALL 0x0001 +#define NCBSOCK_UPCALLWANT 0x0002 +#define NCBSOCK_DEAD 0x0004 + +/* + * NFS callback channel state + * + * One listening socket for accepting socket connections from servers and + * a list of connected sockets to handle callback requests on. + * Mounts registered with the callback channel are assigned IDs and + * put on a list so that the callback request handling code can match + * the requests up with mounts. + */ +socket_t nfs4_cb_so = NULL; +in_port_t nfs4_cb_port = 0; +uint32_t nfs4_cb_id = 0; +uint32_t nfs4_cb_so_usecount = 0; +TAILQ_HEAD(nfs4_cb_sock_list,nfs_callback_socket) nfs4_cb_socks; +TAILQ_HEAD(nfs4_cb_mount_list,nfsmount) nfs4_cb_mounts; + +int nfs4_cb_handler(struct nfs_callback_socket *, mbuf_t); + +/* + * Set up the callback channel for the NFS mount. + * + * Initializes the callback channel socket state and + * assigns a callback ID to the mount. + */ +void +nfs4_mount_callback_setup(struct nfsmount *nmp) +{ + struct sockaddr_in sin; + socket_t so = NULL; + struct timeval timeo; + int error, on = 1; + + lck_mtx_lock(nfs_global_mutex); + if (nfs4_cb_id == 0) { + TAILQ_INIT(&nfs4_cb_mounts); + TAILQ_INIT(&nfs4_cb_socks); + nfs4_cb_id++; + } + nmp->nm_cbid = nfs4_cb_id++; + if (nmp->nm_cbid == 0) + nmp->nm_cbid = nfs4_cb_id++; + nfs4_cb_so_usecount++; + TAILQ_INSERT_HEAD(&nfs4_cb_mounts, nmp, nm_cblink); + + if (nfs4_cb_so) { + lck_mtx_unlock(nfs_global_mutex); + return; + } + + error = sock_socket(AF_INET, SOCK_STREAM, IPPROTO_TCP, nfs4_cb_accept, NULL, &nfs4_cb_so); + if (error) { + log(LOG_INFO, "nfs callback setup: error %d creating listening socket\n", error); + goto fail; + } + so = nfs4_cb_so; + + sin.sin_len = sizeof(struct sockaddr_in); + sin.sin_family = AF_INET; + sin.sin_addr.s_addr = htonl(INADDR_ANY); + sin.sin_port = 0; + error = sock_bind(so, (struct sockaddr *)&sin); + if (error) { + log(LOG_INFO, "nfs callback setup: error %d binding listening socket\n", error); + goto fail; + } + error = sock_getsockname(so, (struct sockaddr *)&sin, sin.sin_len); + if (error) { + log(LOG_INFO, "nfs callback setup: error %d getting listening socket port\n", error); + goto fail; + } + nfs4_cb_port = ntohs(sin.sin_port); + + error = sock_listen(so, 32); + if (error) { + log(LOG_INFO, "nfs callback setup: error %d on listen\n", error); + goto fail; + } + + /* receive timeout shouldn't matter. If timeout on send, we'll want to drop the socket */ + timeo.tv_usec = 0; + timeo.tv_sec = 60; + error = sock_setsockopt(so, SOL_SOCKET, SO_RCVTIMEO, &timeo, sizeof(timeo)); + if (error) + log(LOG_INFO, "nfs callback setup: error %d setting socket rx timeout\n", error); + error = sock_setsockopt(so, SOL_SOCKET, SO_SNDTIMEO, &timeo, sizeof(timeo)); + if (error) + log(LOG_INFO, "nfs callback setup: error %d setting socket tx timeout\n", error); + sock_setsockopt(so, IPPROTO_TCP, TCP_NODELAY, &on, sizeof(on)); + sock_setsockopt(so, SOL_SOCKET, SO_NOADDRERR, &on, sizeof(on)); + sock_setsockopt(so, SOL_SOCKET, SO_UPCALLCLOSEWAIT, &on, sizeof(on)); + error = 0; + +fail: + if (error) { + nfs4_cb_so = NULL; + lck_mtx_unlock(nfs_global_mutex); + if (so) { + sock_shutdown(so, SHUT_RDWR); + sock_close(so); + } + } else { + lck_mtx_unlock(nfs_global_mutex); + } +} + +/* + * Shut down the callback channel for the NFS mount. + * + * Clears the mount's callback ID and releases the mounts + * reference on the callback socket. Last reference dropped + * will also shut down the callback socket(s). + */ +void +nfs4_mount_callback_shutdown(struct nfsmount *nmp) +{ + struct nfs_callback_socket *ncbsp; + socket_t so; + struct nfs4_cb_sock_list cb_socks; + struct timespec ts = {1,0}; + + lck_mtx_lock(nfs_global_mutex); + TAILQ_REMOVE(&nfs4_cb_mounts, nmp, nm_cblink); + /* wait for any callbacks in progress to complete */ + while (nmp->nm_cbrefs) + msleep(&nmp->nm_cbrefs, nfs_global_mutex, PSOCK, "cbshutwait", &ts); + if (--nfs4_cb_so_usecount) { + lck_mtx_unlock(nfs_global_mutex); + return; + } + so = nfs4_cb_so; + nfs4_cb_so = NULL; + TAILQ_INIT(&cb_socks); + TAILQ_CONCAT(&cb_socks, &nfs4_cb_socks, ncbs_link); + lck_mtx_unlock(nfs_global_mutex); + if (so) { + sock_shutdown(so, SHUT_RDWR); + sock_close(so); + } + while ((ncbsp = TAILQ_FIRST(&cb_socks))) { + TAILQ_REMOVE(&cb_socks, ncbsp, ncbs_link); + sock_shutdown(ncbsp->ncbs_so, SHUT_RDWR); + sock_close(ncbsp->ncbs_so); + FREE(ncbsp, M_TEMP); + } +} + +/* + * Check periodically for stale/unused nfs callback sockets + */ +#define NFS4_CB_TIMER_PERIOD 30 +#define NFS4_CB_IDLE_MAX 300 +void +nfs4_callback_timer(__unused void *param0, __unused void *param1) +{ + struct nfs_callback_socket *ncbsp, *nextncbsp; + struct timeval now; + +loop: + lck_mtx_lock(nfs_global_mutex); + if (TAILQ_EMPTY(&nfs4_cb_socks)) { + nfs4_callback_timer_on = 0; + lck_mtx_unlock(nfs_global_mutex); + return; + } + microuptime(&now); + TAILQ_FOREACH_SAFE(ncbsp, &nfs4_cb_socks, ncbs_link, nextncbsp) { + if (!(ncbsp->ncbs_flags & NCBSOCK_DEAD) && + (now.tv_sec < (ncbsp->ncbs_stamp + NFS4_CB_IDLE_MAX))) + continue; + TAILQ_REMOVE(&nfs4_cb_socks, ncbsp, ncbs_link); + lck_mtx_unlock(nfs_global_mutex); + sock_shutdown(ncbsp->ncbs_so, SHUT_RDWR); + sock_close(ncbsp->ncbs_so); + FREE(ncbsp, M_TEMP); + goto loop; + } + nfs4_callback_timer_on = 1; + nfs_interval_timer_start(nfs4_callback_timer_call, + NFS4_CB_TIMER_PERIOD * 1000); + lck_mtx_unlock(nfs_global_mutex); +} + +/* + * Accept a new callback socket. + */ +void +nfs4_cb_accept(socket_t so, __unused void *arg, __unused int waitflag) +{ + socket_t newso = NULL; + struct nfs_callback_socket *ncbsp; + struct nfsmount *nmp; + struct timeval timeo, now; + struct sockaddr_in *saddr; + int error, on = 1; + + if (so != nfs4_cb_so) + return; + + /* allocate/initialize a new nfs_callback_socket */ + MALLOC(ncbsp, struct nfs_callback_socket *, sizeof(struct nfs_callback_socket), M_TEMP, M_WAITOK); + if (!ncbsp) { + log(LOG_ERR, "nfs callback accept: no memory for new socket\n"); + return; + } + bzero(ncbsp, sizeof(*ncbsp)); + ncbsp->ncbs_sin.sin_len = sizeof(struct sockaddr_in); + ncbsp->ncbs_rrs.nrrs_markerleft = sizeof(ncbsp->ncbs_rrs.nrrs_fragleft); + + /* accept a new socket */ + error = sock_accept(so, (struct sockaddr*)&ncbsp->ncbs_sin, + ncbsp->ncbs_sin.sin_len, MSG_DONTWAIT, + nfs4_cb_rcv, ncbsp, &newso); + if (error) { + log(LOG_INFO, "nfs callback accept: error %d accepting socket\n", error); + FREE(ncbsp, M_TEMP); + return; + } + + /* set up the new socket */ + /* receive timeout shouldn't matter. If timeout on send, we'll want to drop the socket */ + timeo.tv_usec = 0; + timeo.tv_sec = 60; + error = sock_setsockopt(newso, SOL_SOCKET, SO_RCVTIMEO, &timeo, sizeof(timeo)); + if (error) + log(LOG_INFO, "nfs callback socket: error %d setting socket rx timeout\n", error); + error = sock_setsockopt(newso, SOL_SOCKET, SO_SNDTIMEO, &timeo, sizeof(timeo)); + if (error) + log(LOG_INFO, "nfs callback socket: error %d setting socket tx timeout\n", error); + sock_setsockopt(newso, IPPROTO_TCP, TCP_NODELAY, &on, sizeof(on)); + sock_setsockopt(newso, SOL_SOCKET, SO_NOADDRERR, &on, sizeof(on)); + sock_setsockopt(newso, SOL_SOCKET, SO_UPCALLCLOSEWAIT, &on, sizeof(on)); + + ncbsp->ncbs_so = newso; + microuptime(&now); + ncbsp->ncbs_stamp = now.tv_sec; + + lck_mtx_lock(nfs_global_mutex); + + /* add it to the list */ + TAILQ_INSERT_HEAD(&nfs4_cb_socks, ncbsp, ncbs_link); + + /* verify it's from a host we have mounted */ + TAILQ_FOREACH(nmp, &nfs4_cb_mounts, nm_cblink) { + /* check socket's source address matches this mount's server address */ + saddr = mbuf_data(nmp->nm_nam); + if ((ncbsp->ncbs_sin.sin_len == saddr->sin_len) && + (ncbsp->ncbs_sin.sin_family == saddr->sin_family) && + (ncbsp->ncbs_sin.sin_addr.s_addr == saddr->sin_addr.s_addr)) + break; + } + if (!nmp) /* we don't want this socket, mark it dead */ + ncbsp->ncbs_flags |= NCBSOCK_DEAD; + + /* make sure the callback socket cleanup timer is running */ + /* (shorten the timer if we've got a socket we don't want) */ + if (!nfs4_callback_timer_on) { + nfs4_callback_timer_on = 1; + nfs_interval_timer_start(nfs4_callback_timer_call, + !nmp ? 500 : (NFS4_CB_TIMER_PERIOD * 1000)); + } else if (!nmp && (nfs4_callback_timer_on < 2)) { + nfs4_callback_timer_on = 2; + thread_call_cancel(nfs4_callback_timer_call); + nfs_interval_timer_start(nfs4_callback_timer_call, 500); + } + + lck_mtx_unlock(nfs_global_mutex); +} + +/* + * Receive mbufs from callback sockets into RPC records and process each record. + * Detect connection has been closed and shut down. + */ +void +nfs4_cb_rcv(socket_t so, void *arg, __unused int waitflag) +{ + struct nfs_callback_socket *ncbsp = arg; + struct timespec ts = {1,0}; + struct timeval now; + mbuf_t m; + int error = 0, recv = 1; + + lck_mtx_lock(nfs_global_mutex); + while (ncbsp->ncbs_flags & NCBSOCK_UPCALL) { + /* wait if upcall is already in progress */ + ncbsp->ncbs_flags |= NCBSOCK_UPCALLWANT; + msleep(ncbsp, nfs_global_mutex, PSOCK, "cbupcall", &ts); + } + ncbsp->ncbs_flags |= NCBSOCK_UPCALL; + lck_mtx_unlock(nfs_global_mutex); + + /* loop while we make error-free progress */ + while (!error && recv) { + error = nfs_rpc_record_read(so, &ncbsp->ncbs_rrs, &recv, &m); + if (m) /* handle the request */ + error = nfs4_cb_handler(ncbsp, m); + } + + /* note: no error and no data indicates server closed its end */ + if ((error != EWOULDBLOCK) && (error || !recv)) { + /* + * Socket is either being closed or should be. + * We can't close the socket in the context of the upcall. + * So we mark it as dead and leave it for the cleanup timer to reap. + */ + ncbsp->ncbs_stamp = 0; + ncbsp->ncbs_flags |= NCBSOCK_DEAD; + } else { + microuptime(&now); + ncbsp->ncbs_stamp = now.tv_sec; + } + + lck_mtx_lock(nfs_global_mutex); + ncbsp->ncbs_flags &= ~NCBSOCK_UPCALL; + lck_mtx_unlock(nfs_global_mutex); + wakeup(ncbsp); +} + +/* + * Handle an NFS callback channel request. + */ +int +nfs4_cb_handler(struct nfs_callback_socket *ncbsp, mbuf_t mreq) +{ + socket_t so = ncbsp->ncbs_so; + struct nfsm_chain nmreq, nmrep; + mbuf_t mhead = NULL, mrest = NULL, m; + struct sockaddr_in *saddr; + struct msghdr msg; + struct nfsmount *nmp; + fhandle_t fh; + nfsnode_t np; + nfs_stateid stateid; + uint32_t bitmap[NFS_ATTR_BITMAP_LEN], rbitmap[NFS_ATTR_BITMAP_LEN], bmlen, truncate, attrbytes; + uint32_t val, xid, procnum, taglen, cbid, numops, op, status; + uint32_t auth_type, auth_len; + uint32_t numres, *pnumres; + int error = 0, replen, len; + size_t sentlen = 0; + + xid = numops = op = status = procnum = taglen = cbid = 0; + + nfsm_chain_dissect_init(error, &nmreq, mreq); + nfsm_chain_get_32(error, &nmreq, xid); // RPC XID + nfsm_chain_get_32(error, &nmreq, val); // RPC Call + nfsm_assert(error, (val == RPC_CALL), EBADRPC); + nfsm_chain_get_32(error, &nmreq, val); // RPC Version + nfsm_assert(error, (val == RPC_VER2), ERPCMISMATCH); + nfsm_chain_get_32(error, &nmreq, val); // RPC Program Number + nfsm_assert(error, (val == NFS4_CALLBACK_PROG), EPROGUNAVAIL); + nfsm_chain_get_32(error, &nmreq, val); // NFS Callback Program Version Number + nfsm_assert(error, (val == NFS4_CALLBACK_PROG_VERSION), EPROGMISMATCH); + nfsm_chain_get_32(error, &nmreq, procnum); // NFS Callback Procedure Number + nfsm_assert(error, (procnum <= NFSPROC4_CB_COMPOUND), EPROCUNAVAIL); + + /* Handle authentication */ + /* XXX just ignore auth for now - handling kerberos may be tricky */ + nfsm_chain_get_32(error, &nmreq, auth_type); // RPC Auth Flavor + nfsm_chain_get_32(error, &nmreq, auth_len); // RPC Auth Length + nfsm_assert(error, (auth_len <= RPCAUTH_MAXSIZ), EBADRPC); + if (!error && (auth_len > 0)) + nfsm_chain_adv(error, &nmreq, nfsm_rndup(auth_len)); + nfsm_chain_adv(error, &nmreq, NFSX_UNSIGNED); // verifier flavor (should be AUTH_NONE) + nfsm_chain_get_32(error, &nmreq, auth_len); // verifier length + nfsm_assert(error, (auth_len <= RPCAUTH_MAXSIZ), EBADRPC); + if (!error && (auth_len > 0)) + nfsm_chain_adv(error, &nmreq, nfsm_rndup(auth_len)); + if (error) { + status = error; + error = 0; + goto nfsmout; + } + + switch (procnum) { + case NFSPROC4_CB_NULL: + status = NFSERR_RETVOID; + break; + case NFSPROC4_CB_COMPOUND: + /* tag, minorversion, cb ident, numops, op array */ + nfsm_chain_get_32(error, &nmreq, taglen); /* tag length */ + nfsm_assert(error, (val <= NFS4_OPAQUE_LIMIT), EBADRPC); + + /* start building the body of the response */ + nfsm_mbuf_get(error, &mrest, nfsm_rndup(taglen) + 5*NFSX_UNSIGNED); + nfsm_chain_init(&nmrep, mrest); + + /* copy tag from request to response */ + nfsm_chain_add_32(error, &nmrep, taglen); /* tag length */ + for (len = (int)taglen; !error && (len > 0); len -= NFSX_UNSIGNED) { + nfsm_chain_get_32(error, &nmreq, val); + nfsm_chain_add_32(error, &nmrep, val); + } + + /* insert number of results placeholder */ + numres = 0; + nfsm_chain_add_32(error, &nmrep, numres); + pnumres = (uint32_t*)(nmrep.nmc_ptr - NFSX_UNSIGNED); + + nfsm_chain_get_32(error, &nmreq, val); /* minorversion */ + nfsm_assert(error, (val == 0), NFSERR_MINOR_VERS_MISMATCH); + nfsm_chain_get_32(error, &nmreq, cbid); /* callback ID */ + nfsm_chain_get_32(error, &nmreq, numops); /* number of operations */ + if (error) { + if ((error == EBADRPC) || (error == NFSERR_MINOR_VERS_MISMATCH)) + status = error; + else if ((error == ENOBUFS) || (error == ENOMEM)) + status = NFSERR_RESOURCE; + else + status = NFSERR_SERVERFAULT; + error = 0; + nfsm_chain_null(&nmrep); + goto nfsmout; + } + /* match the callback ID to a registered mount */ + lck_mtx_lock(nfs_global_mutex); + TAILQ_FOREACH(nmp, &nfs4_cb_mounts, nm_cblink) { + if (nmp->nm_cbid != cbid) + continue; + /* verify socket's source address matches this mount's server address */ + saddr = mbuf_data(nmp->nm_nam); + if ((ncbsp->ncbs_sin.sin_len != saddr->sin_len) || + (ncbsp->ncbs_sin.sin_family != saddr->sin_family) || + (ncbsp->ncbs_sin.sin_addr.s_addr != saddr->sin_addr.s_addr)) + continue; + break; + } + /* mark the NFS mount as busy */ + if (nmp) + nmp->nm_cbrefs++; + lck_mtx_unlock(nfs_global_mutex); + if (!nmp) { + /* if no mount match, just drop socket. */ + error = EPERM; + nfsm_chain_null(&nmrep); + goto out; + } + + /* process ops, adding results to mrest */ + while (numops > 0) { + numops--; + nfsm_chain_get_32(error, &nmreq, op); + if (error) + break; + switch (op) { + case NFS_OP_CB_GETATTR: + // (FH, BITMAP) -> (STATUS, BITMAP, ATTRS) + np = NULL; + nfsm_chain_get_fh(error, &nmreq, NFS_VER4, &fh); + bmlen = NFS_ATTR_BITMAP_LEN; + nfsm_chain_get_bitmap(error, &nmreq, bitmap, bmlen); + if (error) { + status = error; + error = 0; + numops = 0; /* don't process any more ops */ + } else { + /* find the node for the file handle */ + error = nfs_nget(nmp->nm_mountp, NULL, NULL, fh.fh_data, fh.fh_len, NULL, NULL, NG_NOCREATE, &np); + if (error || !np) { + status = NFSERR_BADHANDLE; + error = 0; + np = NULL; + numops = 0; /* don't process any more ops */ + } + } + nfsm_chain_add_32(error, &nmrep, op); + nfsm_chain_add_32(error, &nmrep, status); + if (!error && (status == EBADRPC)) + error = status; + if (np) { + /* only allow returning size, change, and mtime attrs */ + NFS_CLEAR_ATTRIBUTES(&rbitmap); + attrbytes = 0; + if (NFS_BITMAP_ISSET(&bitmap, NFS_FATTR_CHANGE)) { + NFS_BITMAP_SET(&rbitmap, NFS_FATTR_CHANGE); + attrbytes += 2 * NFSX_UNSIGNED; + } + if (NFS_BITMAP_ISSET(&bitmap, NFS_FATTR_SIZE)) { + NFS_BITMAP_SET(&rbitmap, NFS_FATTR_SIZE); + attrbytes += 2 * NFSX_UNSIGNED; + } + if (NFS_BITMAP_ISSET(&bitmap, NFS_FATTR_TIME_MODIFY)) { + NFS_BITMAP_SET(&rbitmap, NFS_FATTR_TIME_MODIFY); + attrbytes += 3 * NFSX_UNSIGNED; + } + nfsm_chain_add_bitmap(error, &nmrep, rbitmap, NFS_ATTR_BITMAP_LEN); + nfsm_chain_add_32(error, &nmrep, attrbytes); + if (NFS_BITMAP_ISSET(&bitmap, NFS_FATTR_CHANGE)) + nfsm_chain_add_64(error, &nmrep, + np->n_vattr.nva_change + ((np->n_flag & NMODIFIED) ? 1 : 0)); + if (NFS_BITMAP_ISSET(&bitmap, NFS_FATTR_SIZE)) + nfsm_chain_add_64(error, &nmrep, np->n_size); + if (NFS_BITMAP_ISSET(&bitmap, NFS_FATTR_TIME_MODIFY)) { + nfsm_chain_add_64(error, &nmrep, np->n_vattr.nva_timesec[NFSTIME_MODIFY]); + nfsm_chain_add_32(error, &nmrep, np->n_vattr.nva_timensec[NFSTIME_MODIFY]); + } + nfs_node_unlock(np); + vnode_put(NFSTOV(np)); + np = NULL; + } + /* + * If we hit an error building the reply, we can't easily back up. + * So we'll just update the status and hope the server ignores the + * extra garbage. + */ + break; + case NFS_OP_CB_RECALL: + // (STATEID, TRUNCATE, FH) -> (STATUS) + np = NULL; + nfsm_chain_get_stateid(error, &nmreq, &stateid); + nfsm_chain_get_32(error, &nmreq, truncate); + nfsm_chain_get_fh(error, &nmreq, NFS_VER4, &fh); + if (error) { + status = error; + error = 0; + numops = 0; /* don't process any more ops */ + } else { + /* find the node for the file handle */ + error = nfs_nget(nmp->nm_mountp, NULL, NULL, fh.fh_data, fh.fh_len, NULL, NULL, NG_NOCREATE, &np); + if (error || !np) { + status = NFSERR_BADHANDLE; + error = 0; + np = NULL; + numops = 0; /* don't process any more ops */ + } else if (!(np->n_openflags & N_DELEG_MASK) || + bcmp(&np->n_dstateid, &stateid, sizeof(stateid))) { + /* delegation stateid state doesn't match */ + status = NFSERR_BAD_STATEID; + numops = 0; /* don't process any more ops */ + } + if (!status) { + /* add node to recall queue, and wake socket thread */ + lck_mtx_lock(&nmp->nm_lock); + if (np->n_dlink.tqe_next == NFSNOLIST) + TAILQ_INSERT_TAIL(&nmp->nm_recallq, np, n_dlink); + nfs_mount_sock_thread_wake(nmp); + lck_mtx_unlock(&nmp->nm_lock); + } + if (np) { + nfs_node_unlock(np); + vnode_put(NFSTOV(np)); + } + } + nfsm_chain_add_32(error, &nmrep, op); + nfsm_chain_add_32(error, &nmrep, status); + if (!error && (status == EBADRPC)) + error = status; + break; + case NFS_OP_CB_ILLEGAL: + default: + nfsm_chain_add_32(error, &nmrep, NFS_OP_CB_ILLEGAL); + status = NFSERR_OP_ILLEGAL; + nfsm_chain_add_32(error, &nmrep, status); + numops = 0; /* don't process any more ops */ + break; + } + numres++; + } + + if (!status && error) { + if (error == EBADRPC) + status = error; + else if ((error == ENOBUFS) || (error == ENOMEM)) + status = NFSERR_RESOURCE; + else + status = NFSERR_SERVERFAULT; + error = 0; + } + + /* Now, set the numres field */ + *pnumres = txdr_unsigned(numres); + nfsm_chain_build_done(error, &nmrep); + nfsm_chain_null(&nmrep); + + /* drop the callback reference on the mount */ + lck_mtx_lock(nfs_global_mutex); + nmp->nm_cbrefs--; + if (!nmp->nm_cbid) + wakeup(&nmp->nm_cbrefs); + lck_mtx_unlock(nfs_global_mutex); + break; + } + +nfsmout: + if (status == EBADRPC) + OSAddAtomic(1, &nfsstats.rpcinvalid); + + /* build reply header */ + error = mbuf_gethdr(MBUF_WAITOK, MBUF_TYPE_DATA, &mhead); + nfsm_chain_init(&nmrep, mhead); + nfsm_chain_add_32(error, &nmrep, 0); /* insert space for an RPC record mark */ + nfsm_chain_add_32(error, &nmrep, xid); + nfsm_chain_add_32(error, &nmrep, RPC_REPLY); + if ((status == ERPCMISMATCH) || (status & NFSERR_AUTHERR)) { + nfsm_chain_add_32(error, &nmrep, RPC_MSGDENIED); + if (status & NFSERR_AUTHERR) { + nfsm_chain_add_32(error, &nmrep, RPC_AUTHERR); + nfsm_chain_add_32(error, &nmrep, (status & ~NFSERR_AUTHERR)); + } else { + nfsm_chain_add_32(error, &nmrep, RPC_MISMATCH); + nfsm_chain_add_32(error, &nmrep, RPC_VER2); + nfsm_chain_add_32(error, &nmrep, RPC_VER2); + } + } else { + /* reply status */ + nfsm_chain_add_32(error, &nmrep, RPC_MSGACCEPTED); + /* XXX RPCAUTH_NULL verifier */ + nfsm_chain_add_32(error, &nmrep, RPCAUTH_NULL); + nfsm_chain_add_32(error, &nmrep, 0); + /* accepted status */ + switch (status) { + case EPROGUNAVAIL: + nfsm_chain_add_32(error, &nmrep, RPC_PROGUNAVAIL); + break; + case EPROGMISMATCH: + nfsm_chain_add_32(error, &nmrep, RPC_PROGMISMATCH); + nfsm_chain_add_32(error, &nmrep, NFS4_CALLBACK_PROG_VERSION); + nfsm_chain_add_32(error, &nmrep, NFS4_CALLBACK_PROG_VERSION); + break; + case EPROCUNAVAIL: + nfsm_chain_add_32(error, &nmrep, RPC_PROCUNAVAIL); + break; + case EBADRPC: + nfsm_chain_add_32(error, &nmrep, RPC_GARBAGE); + break; + default: + nfsm_chain_add_32(error, &nmrep, RPC_SUCCESS); + if (status != NFSERR_RETVOID) + nfsm_chain_add_32(error, &nmrep, status); + break; + } + } + nfsm_chain_build_done(error, &nmrep); + if (error) { + nfsm_chain_null(&nmrep); + goto out; + } + error = mbuf_setnext(nmrep.nmc_mcur, mrest); + if (error) { + printf("nfs cb: mbuf_setnext failed %d\n", error); + goto out; + } + mrest = NULL; + /* Calculate the size of the reply */ + replen = 0; + for (m = nmrep.nmc_mhead; m; m = mbuf_next(m)) + replen += mbuf_len(m); + mbuf_pkthdr_setlen(mhead, replen); + error = mbuf_pkthdr_setrcvif(mhead, NULL); + nfsm_chain_set_recmark(error, &nmrep, (replen - NFSX_UNSIGNED) | 0x80000000); + nfsm_chain_null(&nmrep); + + /* send the reply */ + bzero(&msg, sizeof(msg)); + error = sock_sendmbuf(so, &msg, mhead, 0, &sentlen); + mhead = NULL; + if (!error && ((int)sentlen != replen)) + error = EWOULDBLOCK; + if (error == EWOULDBLOCK) /* inability to send response is considered fatal */ + error = ETIMEDOUT; +out: + if (error) + nfsm_chain_cleanup(&nmrep); + if (mhead) + mbuf_freem(mhead); + if (mrest) + mbuf_freem(mrest); + if (mreq) + mbuf_freem(mreq); + return (error); +} + + +/* + * Read the next (marked) RPC record from the socket. + * + * *recvp returns if any data was received. + * *mp returns the next complete RPC record + */ +int +nfs_rpc_record_read(socket_t so, struct nfs_rpc_record_state *nrrsp, int *recvp, mbuf_t *mp) +{ + struct iovec aio; + struct msghdr msg; + size_t rcvlen; + int error = 0; + mbuf_t m; + + *recvp = 0; + *mp = NULL; + + /* read the TCP RPC record marker */ + while (!error && nrrsp->nrrs_markerleft) { + aio.iov_base = ((char*)&nrrsp->nrrs_fragleft + + sizeof(nrrsp->nrrs_fragleft) - nrrsp->nrrs_markerleft); + aio.iov_len = nrrsp->nrrs_markerleft; + bzero(&msg, sizeof(msg)); + msg.msg_iov = &aio; + msg.msg_iovlen = 1; + error = sock_receive(so, &msg, MSG_DONTWAIT, &rcvlen); + if (error || !rcvlen) + break; + *recvp = 1; + nrrsp->nrrs_markerleft -= rcvlen; + if (nrrsp->nrrs_markerleft) + continue; + /* record marker complete */ + nrrsp->nrrs_fragleft = ntohl(nrrsp->nrrs_fragleft); + if (nrrsp->nrrs_fragleft & 0x80000000) { + nrrsp->nrrs_lastfrag = 1; + nrrsp->nrrs_fragleft &= ~0x80000000; + } + nrrsp->nrrs_reclen += nrrsp->nrrs_fragleft; + if (nrrsp->nrrs_reclen > NFS_MAXPACKET) { + /* + * This is SERIOUS! We are out of sync with the sender + * and forcing a disconnect/reconnect is all I can do. + */ + log(LOG_ERR, "impossible RPC record length (%d) on callback", nrrsp->nrrs_reclen); + error = EFBIG; + } + } + + /* read the TCP RPC record fragment */ + while (!error && !nrrsp->nrrs_markerleft && nrrsp->nrrs_fragleft) { + m = NULL; + rcvlen = nrrsp->nrrs_fragleft; + error = sock_receivembuf(so, NULL, &m, MSG_DONTWAIT, &rcvlen); + if (error || !rcvlen || !m) + break; + *recvp = 1; + /* append mbufs to list */ + nrrsp->nrrs_fragleft -= rcvlen; + if (!nrrsp->nrrs_m) { + nrrsp->nrrs_m = m; + } else { + error = mbuf_setnext(nrrsp->nrrs_mlast, m); + if (error) { + printf("nfs tcp rcv: mbuf_setnext failed %d\n", error); + mbuf_freem(m); + break; + } + } + while (mbuf_next(m)) + m = mbuf_next(m); + nrrsp->nrrs_mlast = m; + } + + /* done reading fragment? */ + if (!error && !nrrsp->nrrs_markerleft && !nrrsp->nrrs_fragleft) { + /* reset socket fragment parsing state */ + nrrsp->nrrs_markerleft = sizeof(nrrsp->nrrs_fragleft); + if (nrrsp->nrrs_lastfrag) { + /* RPC record complete */ + *mp = nrrsp->nrrs_m; + /* reset socket record parsing state */ + nrrsp->nrrs_reclen = 0; + nrrsp->nrrs_m = nrrsp->nrrs_mlast = NULL; + nrrsp->nrrs_lastfrag = 0; + } + } + + return (error); +} + + + /* * The NFS client send routine. * @@ -667,7 +1557,7 @@ nfs_mount_sock_thread_wake(struct nfsmount *nmp) * Holds nfs_sndlock() for the duration of this call. * * - check for request termination (sigintr) - * - perform reconnect, if necessary + * - wait for reconnect, if necessary * - UDP: check the congestion window * - make a copy of the request to send * - UDP: update the congestion window @@ -699,21 +1589,39 @@ nfs_send(struct nfsreq *req, int wait) again: error = nfs_sndlock(req); - if (error) + if (error) { + lck_mtx_lock(&req->r_mtx); + req->r_error = error; + req->r_flags &= ~R_SENDING; + lck_mtx_unlock(&req->r_mtx); return (error); + } error = nfs_sigintr(req->r_nmp, req, req->r_thread, 0); if (error) { nfs_sndunlock(req); + lck_mtx_lock(&req->r_mtx); + req->r_error = error; + req->r_flags &= ~R_SENDING; + lck_mtx_unlock(&req->r_mtx); return (error); } nmp = req->r_nmp; sotype = nmp->nm_sotype; - if ((req->r_flags & R_SETUP) && !(nmp->nm_sockflags & NMSOCK_SETUP)) { - /* a setup RPC but we're not in SETUP... must need reconnect */ + /* + * If it's a setup RPC but we're not in SETUP... must need reconnect. + * If it's a recovery RPC but the socket's not ready... must need reconnect. + */ + if (((req->r_flags & R_SETUP) && !(nmp->nm_sockflags & NMSOCK_SETUP)) || + ((req->r_flags & R_RECOVER) && !(nmp->nm_sockflags & NMSOCK_READY))) { + error = ETIMEDOUT; nfs_sndunlock(req); - return (EPIPE); + lck_mtx_lock(&req->r_mtx); + req->r_error = error; + req->r_flags &= ~R_SENDING; + lck_mtx_unlock(&req->r_mtx); + return (error); } /* If the socket needs reconnection, do that now. */ @@ -727,6 +1635,7 @@ nfs_send(struct nfsreq *req, int wait) nfs_sndunlock(req); if (!wait) { lck_mtx_lock(&req->r_mtx); + req->r_flags &= ~R_SENDING; req->r_flags |= R_MUSTRESEND; req->r_rtt = 0; lck_mtx_unlock(&req->r_mtx); @@ -744,6 +1653,17 @@ nfs_send(struct nfsreq *req, int wait) error = EIO; break; } + if ((nmp->nm_flag & NFSMNT_SOFT) && (nmp->nm_reconnect_start > 0)) { + struct timeval now; + microuptime(&now); + if ((now.tv_sec - nmp->nm_reconnect_start) >= 8) { + /* soft mount in reconnect for a while... terminate ASAP */ + OSAddAtomic(1, &nfsstats.rpctimeouts); + req->r_flags |= R_SOFTTERM; + req->r_error = error = ETIMEDOUT; + break; + } + } /* make sure socket thread is running, then wait */ nfs_mount_sock_thread_wake(nmp); if ((error = nfs_sigintr(req->r_nmp, req, req->r_thread, 1))) @@ -752,8 +1672,13 @@ nfs_send(struct nfsreq *req, int wait) slpflag = 0; } lck_mtx_unlock(&nmp->nm_lock); - if (error) + if (error) { + lck_mtx_lock(&req->r_mtx); + req->r_error = error; + req->r_flags &= ~R_SENDING; + lck_mtx_unlock(&req->r_mtx); return (error); + } goto again; } so = nmp->nm_so; @@ -761,6 +1686,7 @@ nfs_send(struct nfsreq *req, int wait) if (!so) { nfs_sndunlock(req); lck_mtx_lock(&req->r_mtx); + req->r_flags &= ~R_SENDING; req->r_flags |= R_MUSTRESEND; req->r_rtt = 0; lck_mtx_unlock(&req->r_mtx); @@ -777,6 +1703,7 @@ nfs_send(struct nfsreq *req, int wait) slpflag = ((nmp->nm_flag & NFSMNT_INT) && req->r_thread) ? PCATCH : 0; lck_mtx_unlock(&nmp->nm_lock); nfs_sndunlock(req); + req->r_flags &= ~R_SENDING; req->r_flags |= R_MUSTRESEND; lck_mtx_unlock(&req->r_mtx); if (!wait) { @@ -829,6 +1756,7 @@ nfs_send(struct nfsreq *req, int wait) log(LOG_INFO, "nfs_send: mbuf copy failed %d\n", error); nfs_sndunlock(req); lck_mtx_lock(&req->r_mtx); + req->r_flags &= ~R_SENDING; req->r_flags |= R_MUSTRESEND; req->r_rtt = 0; lck_mtx_unlock(&req->r_mtx); @@ -853,6 +1781,7 @@ nfs_send(struct nfsreq *req, int wait) needrecon = ((sotype == SOCK_STREAM) && sentlen && (sentlen != req->r_mreqlen)); lck_mtx_lock(&req->r_mtx); + req->r_flags &= ~R_SENDING; req->r_rtt = 0; if (rexmit && (++req->r_rexmit > NFS_MAXREXMIT)) req->r_rexmit = NFS_MAXREXMIT; @@ -861,7 +1790,7 @@ nfs_send(struct nfsreq *req, int wait) /* SUCCESS */ req->r_flags &= ~R_RESENDERR; if (rexmit) - OSAddAtomic(1, (SInt32*)&nfsstats.rpcretries); + OSAddAtomic(1, &nfsstats.rpcretries); req->r_flags |= R_SENT; if (req->r_flags & R_WAITSENT) { req->r_flags &= ~R_WAITSENT; @@ -969,7 +1898,7 @@ nfs_send(struct nfsreq *req, int wait) */ /* NFS client UDP socket upcall */ -static void +void nfs_udp_rcv(socket_t so, void *arg, __unused int waitflag) { struct nfsmount *nmp = arg; @@ -1002,11 +1931,11 @@ nfs_udp_rcv(socket_t so, void *arg, __unused int waitflag) } /* NFS client TCP socket upcall */ -static void +void nfs_tcp_rcv(socket_t so, void *arg, __unused int waitflag) { struct nfsmount *nmp = arg; - struct iovec_32 aio; + struct iovec aio; struct msghdr msg; size_t rcvlen; mbuf_t m; @@ -1035,11 +1964,11 @@ nfs_tcp_rcv(socket_t so, void *arg, __unused int waitflag) /* read the TCP RPC record marker */ while (!error && nmp->nm_markerleft) { - aio.iov_base = (uintptr_t)((char*)&nmp->nm_fragleft + + aio.iov_base = ((char*)&nmp->nm_fragleft + sizeof(nmp->nm_fragleft) - nmp->nm_markerleft); aio.iov_len = nmp->nm_markerleft; bzero(&msg, sizeof(msg)); - msg.msg_iov = (struct iovec *) &aio; + msg.msg_iov = &aio; msg.msg_iovlen = 1; lck_mtx_unlock(&nmp->nm_lock); error = sock_receive(so, &msg, MSG_DONTWAIT, &rcvlen); @@ -1138,10 +2067,10 @@ nfs_tcp_rcv(socket_t so, void *arg, __unused int waitflag) /* * "poke" a socket to try to provoke any pending errors */ -static void +void nfs_sock_poke(struct nfsmount *nmp) { - struct iovec_32 aio; + struct iovec aio; struct msghdr msg; size_t len; int error = 0; @@ -1153,11 +2082,11 @@ nfs_sock_poke(struct nfsmount *nmp) return; } lck_mtx_unlock(&nmp->nm_lock); - aio.iov_base = (uintptr_t)&dummy; + aio.iov_base = &dummy; aio.iov_len = 0; len = 0; bzero(&msg, sizeof(msg)); - msg.msg_iov = (struct iovec *) &aio; + msg.msg_iov = &aio; msg.msg_iovlen = 1; error = sock_send(nmp->nm_so, &msg, MSG_DONTWAIT, &len); NFS_SOCK_DBG(("nfs_sock_poke: error %d\n", error)); @@ -1166,21 +2095,20 @@ nfs_sock_poke(struct nfsmount *nmp) /* * Match an RPC reply with the corresponding request */ -static void +void nfs_request_match_reply(struct nfsmount *nmp, mbuf_t mrep) { struct nfsreq *req; struct nfsm_chain nmrep; - u_long reply = 0, rxid = 0; - long t1; - int error = 0, asyncioq, asyncgss; + u_int32_t reply = 0, rxid = 0; + int error = 0, asyncioq, t1; /* Get the xid and check that it is an rpc reply */ nfsm_chain_dissect_init(error, &nmrep, mrep); nfsm_chain_get_32(error, &nmrep, rxid); nfsm_chain_get_32(error, &nmrep, reply); if (error || (reply != RPC_REPLY)) { - OSAddAtomic(1, (SInt32*)&nfsstats.rpcinvalid); + OSAddAtomic(1, &nfsstats.rpcinvalid); mbuf_freem(mrep); return; } @@ -1255,14 +2183,10 @@ nfs_request_match_reply(struct nfsmount *nmp, mbuf_t mrep) /* signal anyone waiting on this request */ wakeup(req); asyncioq = (req->r_callback.rcb_func != NULL); - if ((asyncgss = ((req->r_gss_ctx != NULL) && ((req->r_flags & (R_ASYNC|R_ASYNCWAIT|R_ALLOCATED)) == (R_ASYNC|R_ALLOCATED))))) - nfs_request_ref(req, 1); + if (req->r_gss_ctx != NULL) + nfs_gss_clnt_rpcdone(req); lck_mtx_unlock(&req->r_mtx); lck_mtx_unlock(nfs_request_mutex); - if (asyncgss) { - nfs_gss_clnt_rpcdone(req); - nfs_request_rele(req); - } /* if it's an async RPC with a callback, queue it up */ if (asyncioq) nfs_asyncio_finish(req); @@ -1272,7 +2196,7 @@ nfs_request_match_reply(struct nfsmount *nmp, mbuf_t mrep) if (!req) { /* not matched to a request, so drop it. */ lck_mtx_unlock(nfs_request_mutex); - OSAddAtomic(1, (SInt32*)&nfsstats.rpcunexpected); + OSAddAtomic(1, &nfsstats.rpcunexpected); mbuf_freem(mrep); } } @@ -1281,21 +2205,20 @@ nfs_request_match_reply(struct nfsmount *nmp, mbuf_t mrep) * Wait for the reply for a given request... * ...potentially resending the request if necessary. */ -static int +int nfs_wait_reply(struct nfsreq *req) { - struct nfsmount *nmp = req->r_nmp; - struct timespec ts = { 30, 0 }; + struct timespec ts = { 2, 0 }; int error = 0, slpflag; - if ((nmp->nm_flag & NFSMNT_INT) && req->r_thread) + if (req->r_nmp && (req->r_nmp->nm_flag & NFSMNT_INT) && req->r_thread) slpflag = PCATCH; else slpflag = 0; lck_mtx_lock(&req->r_mtx); while (!req->r_nmrep.nmc_mhead) { - if ((error = nfs_sigintr(nmp, req, req->r_thread, 0))) + if ((error = nfs_sigintr(req->r_nmp, req, req->r_thread, 0))) break; if (((error = req->r_error)) || req->r_nmrep.nmc_mhead) break; @@ -1303,6 +2226,7 @@ nfs_wait_reply(struct nfsreq *req) if (req->r_flags & R_MUSTRESEND) { NFS_SOCK_DBG(("nfs wait resend: p %d x 0x%llx f 0x%x rtt %d\n", req->r_procnum, req->r_xid, req->r_flags, req->r_rtt)); + req->r_flags |= R_SENDING; lck_mtx_unlock(&req->r_mtx); if (req->r_gss_ctx) { /* @@ -1311,6 +2235,9 @@ nfs_wait_reply(struct nfsreq *req) * without bumping the cred sequence number. * Go back and re-build the request. */ + lck_mtx_lock(&req->r_mtx); + req->r_flags &= ~R_SENDING; + lck_mtx_unlock(&req->r_mtx); return (EAGAIN); } error = nfs_send(req, 1); @@ -1405,7 +2332,7 @@ nfs_request_create( } if ((nmp->nm_vers != NFS_VER4) && (procnum >= 0) && (procnum < NFS_NPROCS)) - OSAddAtomic(1, (SInt32*)&nfsstats.rpccnt[procnum]); + OSAddAtomic(1, &nfsstats.rpccnt[procnum]); if ((nmp->nm_vers == NFS_VER4) && (procnum != NFSPROC4_COMPOUND) && (procnum != NFSPROC4_NULL)) panic("nfs_request: invalid NFSv4 RPC request %d\n", procnum); @@ -1448,6 +2375,7 @@ nfs_request_destroy(struct nfsreq *req) struct nfsmount *nmp = req->r_np ? NFSTONMP(req->r_np) : req->r_nmp; struct gss_seq *gsp, *ngsp; struct timespec ts = { 1, 0 }; + int clearjbtimeo = 0; if (!req || !(req->r_flags & R_INITTED)) return; @@ -1463,23 +2391,31 @@ nfs_request_destroy(struct nfsreq *req) } lck_mtx_unlock(nfsiod_mutex); } + lck_mtx_lock(&req->r_mtx); if (nmp) { lck_mtx_lock(&nmp->nm_lock); if (req->r_rchain.tqe_next != NFSREQNOLIST) { TAILQ_REMOVE(&nmp->nm_resendq, req, r_rchain); req->r_rchain.tqe_next = NFSREQNOLIST; - req->r_flags &= ~R_RESENDQ; + if (req->r_flags & R_RESENDQ) + req->r_flags &= ~R_RESENDQ; } if (req->r_cchain.tqe_next != NFSREQNOLIST) { TAILQ_REMOVE(&nmp->nm_cwndq, req, r_cchain); req->r_cchain.tqe_next = NFSREQNOLIST; } + if (req->r_flags & R_JBTPRINTFMSG) { + req->r_flags &= ~R_JBTPRINTFMSG; + nmp->nm_jbreqs--; + clearjbtimeo = (nmp->nm_jbreqs == 0) ? NFSSTA_JUKEBOXTIMEO : 0; + } lck_mtx_unlock(&nmp->nm_lock); } - lck_mtx_lock(&req->r_mtx); while (req->r_flags & R_RESENDQ) msleep(req, &req->r_mtx, (PZERO - 1), "nfsresendqwait", &ts); lck_mtx_unlock(&req->r_mtx); + if (clearjbtimeo) + nfs_up(nmp, req->r_thread, clearjbtimeo, NULL); if (req->r_mhead) mbuf_freem(req->r_mhead); else if (req->r_mrest) @@ -1594,6 +2530,10 @@ nfs_request_send(struct nfsreq *req, int wait) struct nfsmount *nmp; struct timeval now; + lck_mtx_lock(&req->r_mtx); + req->r_flags |= R_SENDING; + lck_mtx_unlock(&req->r_mtx); + lck_mtx_lock(nfs_request_mutex); nmp = req->r_np ? NFSTONMP(req->r_np) : req->r_nmp; @@ -1609,7 +2549,7 @@ nfs_request_send(struct nfsreq *req, int wait) ((nmp->nm_tprintf_delay) - (nmp->nm_tprintf_initial_delay)); } - OSAddAtomic(1, (SInt32*)&nfsstats.rpcrequests); + OSAddAtomic(1, &nfsstats.rpcrequests); /* * Chain request into list of outstanding requests. Be sure @@ -1658,7 +2598,7 @@ nfs_request_finish( uint32_t auth_status = 0; uint32_t accepted_status = 0; struct nfsm_chain nmrep; - int error, auth; + int error, auth, clearjbtimeo; error = req->r_error; @@ -1675,7 +2615,7 @@ nfs_request_finish( /* * Decrement the outstanding request count. */ - if (req->r_flags & R_CWND) { + if ((req->r_flags & R_CWND) && nmp) { req->r_flags &= ~R_CWND; lck_mtx_lock(&nmp->nm_lock); FSDBG(273, R_XID32(req->r_xid), req, nmp->nm_sent, nmp->nm_cwnd); @@ -1762,7 +2702,7 @@ nfs_request_finish( error = nfs_gss_clnt_args_restore(req); if (error && error != ENEEDAUTH) break; - + if (!error) { error = nfs_gss_clnt_ctx_renew(req); if (error) @@ -1827,7 +2767,7 @@ nfs_request_finish( if ((req->r_delay >= 30) && !(nmp->nm_state & NFSSTA_MOUNTED)) { /* we're not yet completely mounted and */ /* we can't complete an RPC, so we fail */ - OSAddAtomic(1, (SInt32*)&nfsstats.rpctimeouts); + OSAddAtomic(1, &nfsstats.rpctimeouts); nfs_softterm(req); error = req->r_error; goto nfsmout; @@ -1835,10 +2775,22 @@ nfs_request_finish( req->r_delay = !req->r_delay ? NFS_TRYLATERDEL : (req->r_delay * 2); if (req->r_delay > 30) req->r_delay = 30; - if (nmp->nm_tprintf_initial_delay && (req->r_delay == 30)) { + if (nmp->nm_tprintf_initial_delay && (req->r_delay >= nmp->nm_tprintf_initial_delay)) { + if (!(req->r_flags & R_JBTPRINTFMSG)) { + req->r_flags |= R_JBTPRINTFMSG; + lck_mtx_lock(&nmp->nm_lock); + nmp->nm_jbreqs++; + lck_mtx_unlock(&nmp->nm_lock); + } nfs_down(req->r_nmp, req->r_thread, 0, NFSSTA_JUKEBOXTIMEO, "resource temporarily unavailable (jukebox)"); - req->r_flags |= R_JBTPRINTFMSG; + } + if ((nmp->nm_flag & NFSMNT_SOFT) && (req->r_delay == 30)) { + /* for soft mounts, just give up after a short while */ + OSAddAtomic(1, &nfsstats.rpctimeouts); + nfs_softterm(req); + error = req->r_error; + goto nfsmout; } delay = req->r_delay; if (req->r_callback.rcb_func) { @@ -1848,7 +2800,7 @@ nfs_request_finish( } else { do { if ((error = nfs_sigintr(req->r_nmp, req, req->r_thread, 0))) - return (error); + goto nfsmout; tsleep(&lbolt, PSOCK|slpflag, "nfs_jukebox_trylater", 0); } while (--delay > 0); } @@ -1859,8 +2811,14 @@ nfs_request_finish( return (0); } - if (req->r_flags & R_JBTPRINTFMSG) - nfs_up(nmp, req->r_thread, NFSSTA_JUKEBOXTIMEO, "resource available again"); + if (req->r_flags & R_JBTPRINTFMSG) { + req->r_flags &= ~R_JBTPRINTFMSG; + lck_mtx_lock(&nmp->nm_lock); + nmp->nm_jbreqs--; + clearjbtimeo = (nmp->nm_jbreqs == 0) ? NFSSTA_JUKEBOXTIMEO : 0; + lck_mtx_unlock(&nmp->nm_lock); + nfs_up(nmp, req->r_thread, clearjbtimeo, "resource available again"); + } if (*status == NFS_OK) { /* @@ -1903,8 +2861,15 @@ nfs_request_finish( break; } nfsmout: - if (!error && (req->r_flags & R_JBTPRINTFMSG)) - nfs_up(nmp, req->r_thread, NFSSTA_JUKEBOXTIMEO, NULL); + if (req->r_flags & R_JBTPRINTFMSG) { + req->r_flags &= ~R_JBTPRINTFMSG; + lck_mtx_lock(&nmp->nm_lock); + nmp->nm_jbreqs--; + clearjbtimeo = (nmp->nm_jbreqs == 0) ? NFSSTA_JUKEBOXTIMEO : 0; + lck_mtx_unlock(&nmp->nm_lock); + if (clearjbtimeo) + nfs_up(nmp, req->r_thread, clearjbtimeo, NULL); + } FSDBG(273, R_XID32(req->r_xid), nmp, req, (!error && (*status == NFS_OK)) ? 0xf0f0f0f0 : error); return (error); @@ -1971,6 +2936,57 @@ nfs_request2( return (error); } + +/* + * Set up a new null proc request to exchange GSS context tokens with the + * server. Associate the context that we are setting up with the request that we + * are sending. + */ + +int +nfs_request_gss( + mount_t mp, + struct nfsm_chain *nmrest, + thread_t thd, + kauth_cred_t cred, + int flags, + struct nfs_gss_clnt_ctx *cp, /* Set to gss context to renew or setup */ + struct nfsm_chain *nmrepp, + int *status) +{ + struct nfsreq rq, *req = &rq; + int error; + + if ((error = nfs_request_create(NULL, mp, nmrest, NFSPROC_NULL, thd, cred, &req))) + return (error); + req->r_flags |= (flags & R_OPTMASK); + + if (cp == NULL) { + printf("nfs_request_gss request has no context\n"); + nfs_request_rele(req); + return (NFSERR_EAUTH); + } + nfs_gss_clnt_ctx_ref(req, cp); + + FSDBG_TOP(273, R_XID32(req->r_xid), NULL, NFSPROC_NULL, 0); + do { + req->r_error = 0; + req->r_flags &= ~R_RESTART; + if ((error = nfs_request_add_header(req))) + break; + + if ((error = nfs_request_send(req, 1))) + break; + nfs_request_wait(req); + if ((error = nfs_request_finish(req, nmrepp, status))) + break; + } while (req->r_flags & R_RESTART); + + FSDBG_BOT(273, R_XID32(req->r_xid), NULL, NFSPROC_NULL, error); + nfs_request_rele(req); + return (error); +} + /* * Create and start an asynchronous NFS request. */ @@ -2094,7 +3110,7 @@ nfs_request_async_cancel(struct nfsreq *req) /* * Flag a request as being terminated. */ -static void +void nfs_softterm(struct nfsreq *req) { struct nfsmount *nmp = req->r_nmp; @@ -2120,7 +3136,7 @@ nfs_softterm(struct nfsreq *req) /* * Ensure req isn't in use by the timer, then dequeue it. */ -static void +void nfs_reqdequeue(struct nfsreq *req) { lck_mtx_lock(nfs_request_mutex); @@ -2139,7 +3155,7 @@ nfs_reqdequeue(struct nfsreq *req) * Busy (lock) a nfsreq, used by the nfs timer to make sure it's not * free()'d out from under it. */ -static void +void nfs_reqbusy(struct nfsreq *req) { if (req->r_lflags & RL_BUSY) @@ -2150,7 +3166,7 @@ nfs_reqbusy(struct nfsreq *req) /* * Unbusy the nfsreq passed in, return the next nfsreq in the chain busied. */ -static struct nfsreq * +struct nfsreq * nfs_reqnext(struct nfsreq *req) { struct nfsreq * nextreq; @@ -2238,7 +3254,7 @@ nfs_request_timer(__unused void *param0, __unused void *param1) lck_mtx_unlock(&nmp->nm_lock); /* we're not yet completely mounted and */ /* we can't complete an RPC, so we fail */ - OSAddAtomic(1, (SInt32*)&nfsstats.rpctimeouts); + OSAddAtomic(1, &nfsstats.rpctimeouts); nfs_softterm(req); finish_asyncio = ((req->r_callback.rcb_func != NULL) && !(req->r_flags & R_WAITSENT)); wakeup(req); @@ -2256,7 +3272,8 @@ nfs_request_timer(__unused void *param0, __unused void *param1) */ if (!(nmp->nm_flag & NFSMNT_SOFT)) maxtime = NFS_MAXTIMEO; - else if ((req->r_flags & R_SETUP) || ((nmp->nm_reconnect_start <= 0) || ((now.tv_sec - nmp->nm_reconnect_start) < 8))) + else if ((req->r_flags & (R_SETUP|R_RECOVER)) || + ((nmp->nm_reconnect_start <= 0) || ((now.tv_sec - nmp->nm_reconnect_start) < 8))) maxtime = (NFS_MAXTIMEO / (nmp->nm_timeouts+1))/2; else maxtime = NFS_MINTIMEO/4; @@ -2299,6 +3316,7 @@ nfs_request_timer(__unused void *param0, __unused void *param1) (now.tv_sec - req->r_start)*NFS_HZ, maxtime)); if (nmp->nm_timeouts < 8) nmp->nm_timeouts++; + nfs_mount_check_dead_timeout(nmp); /* if it's been a few seconds, try poking the socket */ if ((nmp->nm_sotype == SOCK_STREAM) && ((now.tv_sec - req->r_start) >= 3) && @@ -2309,11 +3327,11 @@ nfs_request_timer(__unused void *param0, __unused void *param1) lck_mtx_unlock(&nmp->nm_lock); } - /* For soft mounts (& SETUPs), check for too many retransmits/timeout. */ - if (((nmp->nm_flag & NFSMNT_SOFT) || (req->r_flags & R_SETUP)) && + /* For soft mounts (& SETUPs/RECOVERs), check for too many retransmits/timeout. */ + if (((nmp->nm_flag & NFSMNT_SOFT) || (req->r_flags & (R_SETUP|R_RECOVER))) && ((req->r_rexmit >= req->r_retry) || /* too many */ ((now.tv_sec - req->r_start)*NFS_HZ > maxtime))) { /* too long */ - OSAddAtomic(1, (SInt32*)&nfsstats.rpctimeouts); + OSAddAtomic(1, &nfsstats.rpctimeouts); lck_mtx_lock(&nmp->nm_lock); if (!(nmp->nm_state & NFSSTA_TIMEO)) { lck_mtx_unlock(&nmp->nm_lock); @@ -2360,7 +3378,7 @@ nfs_request_timer(__unused void *param0, __unused void *param1) req->r_flags |= R_MUSTRESEND; req->r_rtt = -1; wakeup(req); - if ((req->r_flags & (R_ASYNC|R_ASYNCWAIT)) == R_ASYNC) + if ((req->r_flags & (R_ASYNC|R_ASYNCWAIT|R_SENDING)) == R_ASYNC) nfs_asyncio_resend(req); lck_mtx_unlock(&req->r_mtx); } @@ -2425,6 +3443,10 @@ nfs_sigintr(struct nfsmount *nmp, struct nfsreq *req, thread_t thd, int nmplocke nmp->nm_state |= NFSSTA_FORCE; } + /* Check if the mount is marked dead. */ + if (!error && (nmp->nm_state & NFSSTA_DEAD)) + error = ENXIO; + /* * If the mount is hung and we've requested not to hang * on remote filesystems, then bail now. @@ -2517,6 +3539,221 @@ nfs_sndunlock(struct nfsreq *req) wakeup(statep); } +int +nfs_aux_request( + struct nfsmount *nmp, + thread_t thd, + struct sockaddr_in *saddr, + mbuf_t mreq, + uint32_t xid, + int bindresv, + int timeo, + struct nfsm_chain *nmrep) +{ + int error = 0, on = 1, try, sendat = 2; + socket_t so = NULL; + struct sockaddr_in sin; + struct timeval tv = { 1, 0 }; + mbuf_t m, mrep = NULL; + struct msghdr msg; + uint32_t rxid = 0, reply = 0, reply_status, rejected_status; + uint32_t verf_type, verf_len, accepted_status; + size_t readlen; + + /* create socket and set options */ + if (((error = sock_socket(saddr->sin_family, SOCK_DGRAM, IPPROTO_UDP, NULL, NULL, &so))) || + ((error = sock_setsockopt(so, SOL_SOCKET, SO_RCVTIMEO, &tv, sizeof(tv)))) || + ((error = sock_setsockopt(so, SOL_SOCKET, SO_SNDTIMEO, &tv, sizeof(tv)))) || + ((error = sock_setsockopt(so, SOL_SOCKET, SO_NOADDRERR, &on, sizeof(on))))) + goto nfsmout; + if (bindresv) { + int portrange = IP_PORTRANGE_LOW; + error = sock_setsockopt(so, IPPROTO_IP, IP_PORTRANGE, &portrange, sizeof(portrange)); + nfsmout_if(error); + /* bind now to check for failure */ + sin.sin_len = sizeof (struct sockaddr_in); + sin.sin_family = AF_INET; + sin.sin_addr.s_addr = INADDR_ANY; + sin.sin_port = 0; + error = sock_bind(so, (struct sockaddr *) &sin); + nfsmout_if(error); + } + + for (try=0; try < timeo; try++) { + if ((error = nfs_sigintr(nmp, NULL, thd, 0))) + break; + if (!try || (try == sendat)) { + /* send the request (resending periodically) */ + if ((error = mbuf_copym(mreq, 0, MBUF_COPYALL, MBUF_WAITOK, &m))) + goto nfsmout; + bzero(&msg, sizeof(msg)); + msg.msg_name = saddr; + msg.msg_namelen = saddr->sin_len; + if ((error = sock_sendmbuf(so, &msg, m, 0, NULL))) + goto nfsmout; + sendat *= 2; + if (sendat > 30) + sendat = 30; + } + /* wait for the response */ + readlen = 1<<18; + bzero(&msg, sizeof(msg)); + error = sock_receivembuf(so, &msg, &mrep, 0, &readlen); + if (error == EWOULDBLOCK) + continue; + nfsmout_if(error); + /* parse the response */ + nfsm_chain_dissect_init(error, nmrep, mrep); + nfsm_chain_get_32(error, nmrep, rxid); + nfsm_chain_get_32(error, nmrep, reply); + nfsmout_if(error); + if ((rxid != xid) || (reply != RPC_REPLY)) + error = EBADRPC; + nfsm_chain_get_32(error, nmrep, reply_status); + nfsmout_if(error); + if (reply_status == RPC_MSGDENIED) { + nfsm_chain_get_32(error, nmrep, rejected_status); + nfsmout_if(error); + error = (rejected_status == RPC_MISMATCH) ? ENOTSUP : EACCES; + goto nfsmout; + } + nfsm_chain_get_32(error, nmrep, verf_type); /* verifier flavor */ + nfsm_chain_get_32(error, nmrep, verf_len); /* verifier length */ + nfsmout_if(error); + if (verf_len) + nfsm_chain_adv(error, nmrep, nfsm_rndup(verf_len)); + nfsm_chain_get_32(error, nmrep, accepted_status); + nfsm_assert(error, (accepted_status == RPC_SUCCESS), EIO); + break; + } +nfsmout: + if (so) { + sock_shutdown(so, SHUT_RDWR); + sock_close(so); + } + mbuf_freem(mreq); + return (error); +} + +int +nfs_msg(thread_t thd, + const char *server, + const char *msg, + int error) +{ + proc_t p = thd ? get_bsdthreadtask_info(thd) : NULL; + tpr_t tpr; + + if (p) + tpr = tprintf_open(p); + else + tpr = NULL; + if (error) + tprintf(tpr, "nfs server %s: %s, error %d\n", server, msg, error); + else + tprintf(tpr, "nfs server %s: %s\n", server, msg); + tprintf_close(tpr); + return (0); +} + +void +nfs_down(struct nfsmount *nmp, thread_t thd, int error, int flags, const char *msg) +{ + int timeoutmask, wasunresponsive, unresponsive, softnobrowse; + uint32_t do_vfs_signal; + struct timeval now; + + if (nmp == NULL) + return; + + lck_mtx_lock(&nmp->nm_lock); + + timeoutmask = NFSSTA_TIMEO | NFSSTA_LOCKTIMEO | NFSSTA_JUKEBOXTIMEO; + if (nmp->nm_flag & NFSMNT_MUTEJUKEBOX) /* jukebox timeouts don't count as unresponsive if muted */ + timeoutmask &= ~NFSSTA_JUKEBOXTIMEO; + wasunresponsive = (nmp->nm_state & timeoutmask); + + /* XXX don't allow users to know about/disconnect unresponsive, soft, nobrowse mounts */ + softnobrowse = ((nmp->nm_flag & NFSMNT_SOFT) && (vfs_flags(nmp->nm_mountp) & MNT_DONTBROWSE)); + + if ((flags & NFSSTA_TIMEO) && !(nmp->nm_state & NFSSTA_TIMEO)) + nmp->nm_state |= NFSSTA_TIMEO; + if ((flags & NFSSTA_LOCKTIMEO) && !(nmp->nm_state & NFSSTA_LOCKTIMEO)) + nmp->nm_state |= NFSSTA_LOCKTIMEO; + if ((flags & NFSSTA_JUKEBOXTIMEO) && !(nmp->nm_state & NFSSTA_JUKEBOXTIMEO)) + nmp->nm_state |= NFSSTA_JUKEBOXTIMEO; + + unresponsive = (nmp->nm_state & timeoutmask); + + if (unresponsive && (nmp->nm_flag & NFSMNT_DEADTIMEOUT)) { + microuptime(&now); + if (!wasunresponsive) { + nmp->nm_deadto_start = now.tv_sec; + nfs_mount_sock_thread_wake(nmp); + } else if ((now.tv_sec - nmp->nm_deadto_start) > nmp->nm_deadtimeout) { + if (!(nmp->nm_state & NFSSTA_DEAD)) + printf("nfs server %s: dead\n", vfs_statfs(nmp->nm_mountp)->f_mntfromname); + nmp->nm_state |= NFSSTA_DEAD; + } + } + lck_mtx_unlock(&nmp->nm_lock); + + if (nmp->nm_state & NFSSTA_DEAD) + do_vfs_signal = VQ_DEAD; + else if (softnobrowse || wasunresponsive || !unresponsive) + do_vfs_signal = 0; + else + do_vfs_signal = VQ_NOTRESP; + if (do_vfs_signal) + vfs_event_signal(&vfs_statfs(nmp->nm_mountp)->f_fsid, do_vfs_signal, 0); + + nfs_msg(thd, vfs_statfs(nmp->nm_mountp)->f_mntfromname, msg, error); +} + +void +nfs_up(struct nfsmount *nmp, thread_t thd, int flags, const char *msg) +{ + int timeoutmask, wasunresponsive, unresponsive, softnobrowse; + int do_vfs_signal; + + if (nmp == NULL) + return; + + if (msg) + nfs_msg(thd, vfs_statfs(nmp->nm_mountp)->f_mntfromname, msg, 0); + + lck_mtx_lock(&nmp->nm_lock); + + timeoutmask = NFSSTA_TIMEO | NFSSTA_LOCKTIMEO | NFSSTA_JUKEBOXTIMEO; + if (nmp->nm_flag & NFSMNT_MUTEJUKEBOX) /* jukebox timeouts don't count as unresponsive if muted */ + timeoutmask &= ~NFSSTA_JUKEBOXTIMEO; + wasunresponsive = (nmp->nm_state & timeoutmask); + + /* XXX don't allow users to know about/disconnect unresponsive, soft, nobrowse mounts */ + softnobrowse = ((nmp->nm_flag & NFSMNT_SOFT) && (vfs_flags(nmp->nm_mountp) & MNT_DONTBROWSE)); + + if ((flags & NFSSTA_TIMEO) && (nmp->nm_state & NFSSTA_TIMEO)) + nmp->nm_state &= ~NFSSTA_TIMEO; + if ((flags & NFSSTA_LOCKTIMEO) && (nmp->nm_state & NFSSTA_LOCKTIMEO)) + nmp->nm_state &= ~NFSSTA_LOCKTIMEO; + if ((flags & NFSSTA_JUKEBOXTIMEO) && (nmp->nm_state & NFSSTA_JUKEBOXTIMEO)) + nmp->nm_state &= ~NFSSTA_JUKEBOXTIMEO; + + unresponsive = (nmp->nm_state & timeoutmask); + + if (nmp->nm_deadto_start) + nmp->nm_deadto_start = 0; + lck_mtx_unlock(&nmp->nm_lock); + + if (softnobrowse) + do_vfs_signal = 0; + else + do_vfs_signal = (wasunresponsive && !unresponsive); + if (do_vfs_signal) + vfs_event_signal(&vfs_statfs(nmp->nm_mountp)->f_fsid, VQ_NOTRESP, 1); +} + + #endif /* NFSCLIENT */ #if NFSSERVER @@ -2533,7 +3770,7 @@ nfsrv_rephead( size_t siz) { mbuf_t mrep; - u_long *tl; + u_int32_t *tl; struct nfsm_chain nmrep; int err, error; @@ -2629,7 +3866,7 @@ nfsrv_rephead( *nmrepp = nmrep; if ((err != 0) && (err != NFSERR_RETVOID)) - OSAddAtomic(1, (SInt32*)&nfsstats.srvrpc_errs); + OSAddAtomic(1, &nfsstats.srvrpc_errs); return (0); } @@ -2823,14 +4060,14 @@ nfsrv_rcv_locked(socket_t so, struct nfsrv_sock *slp, int waitflag) * stream socket. The "waitflag" argument indicates whether or not it * can sleep. */ -static int +int nfsrv_getstream(struct nfsrv_sock *slp, int waitflag) { mbuf_t m; char *cp1, *cp2, *mdata; int len, mlen, error; mbuf_t om, m2, recm; - u_long recmark; + u_int32_t recmark; if (slp->ns_flag & SLP_GETSTREAM) panic("nfs getstream"); @@ -2870,7 +4107,7 @@ nfsrv_getstream(struct nfsrv_sock *slp, int waitflag) slp->ns_flag |= SLP_LASTFRAG; else slp->ns_flag &= ~SLP_LASTFRAG; - if (slp->ns_reclen < NFS_MINPACKET || slp->ns_reclen > NFS_MAXPACKET) { + if (slp->ns_reclen <= 0 || slp->ns_reclen > NFS_MAXPACKET) { slp->ns_flag &= ~SLP_GETSTREAM; return (EPERM); } @@ -3026,12 +4263,12 @@ nfsrv_dorec( * - verify it * - fill in the cred struct. */ -static int +int nfsrv_getreq(struct nfsrv_descript *nd) { struct nfsm_chain *nmreq; int len, i; - u_long nfsvers, auth_type; + u_int32_t nfsvers, auth_type; int error = 0; uid_t user_id; gid_t group_id; @@ -3196,85 +4433,3 @@ nfsrv_wakenfsd(struct nfsrv_sock *slp) #endif /* NFSSERVER */ -static int -nfs_msg(thread_t thd, - const char *server, - const char *msg, - int error) -{ - proc_t p = thd ? get_bsdthreadtask_info(thd) : NULL; - tpr_t tpr; - - if (p) - tpr = tprintf_open(p); - else - tpr = NULL; - if (error) - tprintf(tpr, "nfs server %s: %s, error %d\n", server, msg, error); - else - tprintf(tpr, "nfs server %s: %s\n", server, msg); - tprintf_close(tpr); - return (0); -} - -void -nfs_down(struct nfsmount *nmp, thread_t thd, int error, int flags, const char *msg) -{ - int ostate, do_vfs_signal; - - if (nmp == NULL) - return; - - lck_mtx_lock(&nmp->nm_lock); - ostate = nmp->nm_state; - if ((flags & NFSSTA_TIMEO) && !(ostate & NFSSTA_TIMEO)) - nmp->nm_state |= NFSSTA_TIMEO; - if ((flags & NFSSTA_LOCKTIMEO) && !(ostate & NFSSTA_LOCKTIMEO)) - nmp->nm_state |= NFSSTA_LOCKTIMEO; - if ((flags & NFSSTA_JUKEBOXTIMEO) && !(ostate & NFSSTA_JUKEBOXTIMEO)) - nmp->nm_state |= NFSSTA_JUKEBOXTIMEO; - lck_mtx_unlock(&nmp->nm_lock); - - /* XXX don't allow users to know about/disconnect unresponsive, soft, nobrowse mounts */ - if ((nmp->nm_flag & NFSMNT_SOFT) && (vfs_flags(nmp->nm_mountp) & MNT_DONTBROWSE)) - do_vfs_signal = 0; - else - do_vfs_signal = !(ostate & (NFSSTA_TIMEO|NFSSTA_LOCKTIMEO|NFSSTA_JUKEBOXTIMEO)); - if (do_vfs_signal) - vfs_event_signal(&vfs_statfs(nmp->nm_mountp)->f_fsid, VQ_NOTRESP, 0); - - nfs_msg(thd, vfs_statfs(nmp->nm_mountp)->f_mntfromname, msg, error); -} - -void -nfs_up(struct nfsmount *nmp, thread_t thd, int flags, const char *msg) -{ - int ostate, state, do_vfs_signal; - - if (nmp == NULL) - return; - - if (msg) - nfs_msg(thd, vfs_statfs(nmp->nm_mountp)->f_mntfromname, msg, 0); - - lck_mtx_lock(&nmp->nm_lock); - ostate = nmp->nm_state; - if ((flags & NFSSTA_TIMEO) && (ostate & NFSSTA_TIMEO)) - nmp->nm_state &= ~NFSSTA_TIMEO; - if ((flags & NFSSTA_LOCKTIMEO) && (ostate & NFSSTA_LOCKTIMEO)) - nmp->nm_state &= ~NFSSTA_LOCKTIMEO; - if ((flags & NFSSTA_JUKEBOXTIMEO) && (ostate & NFSSTA_JUKEBOXTIMEO)) - nmp->nm_state &= ~NFSSTA_JUKEBOXTIMEO; - state = nmp->nm_state; - lck_mtx_unlock(&nmp->nm_lock); - - /* XXX don't allow users to know about/disconnect unresponsive, soft, nobrowse mounts */ - if ((nmp->nm_flag & NFSMNT_SOFT) && (vfs_flags(nmp->nm_mountp) & MNT_DONTBROWSE)) - do_vfs_signal = 0; - else - do_vfs_signal = (ostate & (NFSSTA_TIMEO|NFSSTA_LOCKTIMEO|NFSSTA_JUKEBOXTIMEO)) && - !(state & (NFSSTA_TIMEO|NFSSTA_LOCKTIMEO|NFSSTA_JUKEBOXTIMEO)); - if (do_vfs_signal) - vfs_event_signal(&vfs_statfs(nmp->nm_mountp)->f_fsid, VQ_NOTRESP, 1); -} - diff --git a/bsd/nfs/nfs_srvcache.c b/bsd/nfs/nfs_srvcache.c index 1b28bdcfb..db1c6e6a7 100644 --- a/bsd/nfs/nfs_srvcache.c +++ b/bsd/nfs/nfs_srvcache.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2006 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2009 Apple Computer, Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -247,10 +247,10 @@ nfsrv_getcache( if (rp->rc_state == RC_UNUSED) panic("nfsrv cache"); if (rp->rc_state == RC_INPROG) { - OSAddAtomic(1, (SInt32*)&nfsstats.srvcache_inproghits); + OSAddAtomic(1, &nfsstats.srvcache_inproghits); ret = RC_DROPIT; } else if (rp->rc_flag & RC_REPSTATUS) { - OSAddAtomic(1, (SInt32*)&nfsstats.srvcache_nonidemdonehits); + OSAddAtomic(1, &nfsstats.srvcache_nonidemdonehits); nd->nd_repstat = rp->rc_status; error = nfsrv_rephead(nd, slp, &nmrep, 0); if (error) { @@ -262,7 +262,7 @@ nfsrv_getcache( *mrepp = nmrep.nmc_mhead; } } else if (rp->rc_flag & RC_REPMBUF) { - OSAddAtomic(1, (SInt32*)&nfsstats.srvcache_nonidemdonehits); + OSAddAtomic(1, &nfsstats.srvcache_nonidemdonehits); error = mbuf_copym(rp->rc_reply, 0, MBUF_COPYALL, MBUF_WAITOK, mrepp); if (error) { printf("nfsrv cache: reply copym failed for nonidem request hit\n"); @@ -271,7 +271,7 @@ nfsrv_getcache( ret = RC_REPLY; } } else { - OSAddAtomic(1, (SInt32*)&nfsstats.srvcache_idemdonehits); + OSAddAtomic(1, &nfsstats.srvcache_idemdonehits); rp->rc_state = RC_INPROG; ret = RC_DOIT; } @@ -284,7 +284,7 @@ nfsrv_getcache( return (ret); } } - OSAddAtomic(1, (SInt32*)&nfsstats.srvcache_misses); + OSAddAtomic(1, &nfsstats.srvcache_misses); if (nfsrv_reqcache_count < nfsrv_reqcache_size) { /* try to allocate a new entry */ MALLOC(rp, struct nfsrvcache *, sizeof *rp, M_NFSD, M_WAITOK); diff --git a/bsd/nfs/nfs_subs.c b/bsd/nfs/nfs_subs.c index f0fd596a9..40b55e86e 100644 --- a/bsd/nfs/nfs_subs.c +++ b/bsd/nfs/nfs_subs.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2007 Apple Inc. All rights reserved. + * Copyright (c) 2000-2009 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -84,7 +84,7 @@ #include #include #include -#include +#include #include #include #include @@ -185,9 +185,23 @@ nfstov_type(nfstype nvtype, int nfsvers) int vtonfsv2_mode(enum vtype vtype, mode_t m) { - if (vtype == VFIFO) + switch (vtype) { + case VNON: + case VREG: + case VDIR: + case VBLK: + case VCHR: + case VLNK: + case VSOCK: + return vnode_makeimode(vtype, m); + case VFIFO: return vnode_makeimode(VCHR, m); - return vnode_makeimode(vtype, m); + case VBAD: + case VSTR: + case VCPLX: + default: + return vnode_makeimode(VNON, m); + } } #if NFSSERVER @@ -425,7 +439,7 @@ nfsm_chain_add_opaque_nopad_f(struct nfsm_chain *nmc, const u_char *buf, uint32_ * Add "len" bytes of data from "uio" to the given chain. */ int -nfsm_chain_add_uio(struct nfsm_chain *nmc, struct uio *uiop, uint32_t len) +nfsm_chain_add_uio(struct nfsm_chain *nmc, uio_t uio, uint32_t len) { uint32_t paddedlen, tlen; int error; @@ -443,7 +457,7 @@ nfsm_chain_add_uio(struct nfsm_chain *nmc, struct uio *uiop, uint32_t len) if (len) { if (tlen > len) tlen = len; - uiomove(nmc->nmc_ptr, tlen, uiop); + uiomove(nmc->nmc_ptr, tlen, uio); } else { bzero(nmc->nmc_ptr, tlen); } @@ -739,7 +753,7 @@ nfsm_chain_get_opaque_f(struct nfsm_chain *nmc, uint32_t len, u_char *buf) * The nfsm_chain is advanced by nfsm_rndup("len") bytes. */ int -nfsm_chain_get_uio(struct nfsm_chain *nmc, uint32_t len, struct uio *uiop) +nfsm_chain_get_uio(struct nfsm_chain *nmc, uint32_t len, uio_t uio) { uint32_t cplen, padlen; int error = 0; @@ -751,7 +765,7 @@ nfsm_chain_get_uio(struct nfsm_chain *nmc, uint32_t len, struct uio *uiop) /* copy as much as we need/can */ cplen = MIN(nmc->nmc_left, len); if (cplen) { - error = uiomove(nmc->nmc_ptr, cplen, uiop); + error = uiomove(nmc->nmc_ptr, cplen, uio); if (error) return (error); nmc->nmc_ptr += cplen; @@ -933,6 +947,36 @@ nfsm_chain_get_wcc_data_f( return (error); } +/* + * Get the next RPC transaction ID (XID) + */ +void +nfs_get_xid(uint64_t *xidp) +{ + struct timeval tv; + + lck_mtx_lock(nfs_request_mutex); + if (!nfs_xid) { + /* + * Derive initial xid from system time. + * + * Note: it's OK if this code inits nfs_xid to 0 (for example, + * due to a broken clock) because we immediately increment it + * and we guarantee to never use xid 0. So, nfs_xid should only + * ever be 0 the first time this function is called. + */ + microtime(&tv); + nfs_xid = tv.tv_sec << 12; + } + if (++nfs_xid == 0) { + /* Skip zero xid if it should ever happen. */ + nfs_xidwrap++; + nfs_xid++; + } + *xidp = nfs_xid + ((uint64_t)nfs_xidwrap << 32); + lck_mtx_unlock(nfs_request_mutex); +} + /* * Build the RPC header and fill in the authorization info. * Returns the head of the mbuf list and the xid. @@ -962,7 +1006,6 @@ nfsm_rpchead2(int sotype, int prog, int vers, int proc, int auth_type, int auth_ mbuf_t mreq, mb; int error, i, grpsiz, authsiz, reqlen; size_t headlen; - struct timeval tv; struct nfsm_chain nmreq; /* allocate the packet */ @@ -992,33 +1035,11 @@ nfsm_rpchead2(int sotype, int prog, int vers, int proc, int auth_type, int auth_ * it may be a higher-level resend with a GSSAPI credential. * Otherwise, allocate a new one. */ - if (*xidp == 0) { - lck_mtx_lock(nfs_request_mutex); - if (!nfs_xid) { - /* - * Derive initial xid from system time. - * - * Note: it's OK if this code inits nfs_xid to 0 (for example, - * due to a broken clock) because we immediately increment it - * and we guarantee to never use xid 0. So, nfs_xid should only - * ever be 0 the first time this function is called. - */ - microtime(&tv); - nfs_xid = tv.tv_sec << 12; - } - if (++nfs_xid == 0) { - /* Skip zero xid if it should ever happen. */ - nfs_xidwrap++; - nfs_xid++; - } - *xidp = nfs_xid + ((u_int64_t)nfs_xidwrap << 32); - lck_mtx_unlock(nfs_request_mutex); - } + if (*xidp == 0) + nfs_get_xid(xidp); /* build the header(s) */ - nmreq.nmc_mcur = nmreq.nmc_mhead = mreq; - nmreq.nmc_ptr = mbuf_data(nmreq.nmc_mcur); - nmreq.nmc_left = mbuf_trailingspace(nmreq.nmc_mcur); + nfsm_chain_init(&nmreq, mreq); /* First, if it's a TCP stream insert space for an RPC record mark */ if (sotype == SOCK_STREAM) @@ -1288,29 +1309,29 @@ nfs_loadattrcache( npnvap = &np->n_vattr; bcopy((caddr_t)nvap, (caddr_t)npnvap, sizeof(*nvap)); - if (nvap->nva_size != np->n_size) { - /* - * n_size is protected by the data lock, so we need to - * defer updating it until it's safe. We save the new size - * and set a flag and it'll get updated the next time we get/drop - * the data lock or the next time we do a getattr. - */ - np->n_newsize = nvap->nva_size; + if (!vp || (nvap->nva_type != VREG)) { + np->n_size = nvap->nva_size; + } else if (nvap->nva_size != np->n_size) { FSDBG(527, np, nvap->nva_size, np->n_size, (nvap->nva_type == VREG) | (np->n_flag & NMODIFIED ? 6 : 4)); - SET(np->n_flag, NUPDATESIZE); - if (vp && (nvap->nva_type == VREG)) { - if (!UBCINFOEXISTS(vp) || (dontshrink && (np->n_newsize < np->n_size))) { - /* asked not to shrink, so stick with current size */ - FSDBG(527, np, np->n_size, np->n_vattr.nva_size, 0xf00d0001); - nvap->nva_size = np->n_size; - CLR(np->n_flag, NUPDATESIZE); - NATTRINVALIDATE(np); - } else if ((np->n_flag & NMODIFIED) && (nvap->nva_size < np->n_size)) { - /* if we've modified, use larger size */ - FSDBG(527, np, np->n_size, np->n_vattr.nva_size, 0xf00d0002); - nvap->nva_size = np->n_size; - CLR(np->n_flag, NUPDATESIZE); - } + if (!UBCINFOEXISTS(vp) || (dontshrink && (nvap->nva_size < np->n_size))) { + /* asked not to shrink, so stick with current size */ + FSDBG(527, np, np->n_size, np->n_vattr.nva_size, 0xf00d0001); + nvap->nva_size = np->n_size; + NATTRINVALIDATE(np); + } else if ((np->n_flag & NMODIFIED) && (nvap->nva_size < np->n_size)) { + /* if we've modified, stick with larger size */ + FSDBG(527, np, np->n_size, np->n_vattr.nva_size, 0xf00d0002); + nvap->nva_size = np->n_size; + npnvap->nva_size = np->n_size; + } else { + /* + * n_size is protected by the data lock, so we need to + * defer updating it until it's safe. We save the new size + * and set a flag and it'll get updated the next time we get/drop + * the data lock or the next time we do a getattr. + */ + np->n_newsize = nvap->nva_size; + SET(np->n_flag, NUPDATESIZE); } } @@ -1351,7 +1372,7 @@ nfs_attrcachetimeout(nfsnode_t np) /* Note that if the client and server clocks are way out of sync, */ /* timeout will probably get clamped to a min or max value */ microtime(&now); - timeo = (now.tv_sec - (np)->n_mtime.tv_sec) / 10; + timeo = (now.tv_sec - (np)->n_vattr.nva_timesec[NFSTIME_MODIFY]) / 10; if (isdir) { if (timeo < nmp->nm_acdirmin) timeo = nmp->nm_acdirmin; @@ -1369,28 +1390,21 @@ nfs_attrcachetimeout(nfsnode_t np) } /* - * Check the time stamp + * Check the attribute cache time stamp. * If the cache is valid, copy contents to *nvaper and return 0 - * otherwise return an error + * otherwise return an error. + * Must be called with the node locked. */ int -nfs_getattrcache(nfsnode_t np, struct nfs_vattr *nvaper, int alreadylocked) +nfs_getattrcache(nfsnode_t np, struct nfs_vattr *nvaper) { struct nfs_vattr *nvap; struct timeval nowup; int32_t timeo; - if (!alreadylocked && nfs_lock(np, NFS_NODE_LOCK_SHARED)) { - FSDBG(528, np, 0, 0xffffff00, ENOENT); - OSAddAtomic(1, (SInt32*)&nfsstats.attrcache_misses); - return (ENOENT); - } - if (!NATTRVALID(np)) { - if (!alreadylocked) - nfs_unlock(np); FSDBG(528, np, 0, 0xffffff01, ENOENT); - OSAddAtomic(1, (SInt32*)&nfsstats.attrcache_misses); + OSAddAtomic(1, &nfsstats.attrcache_misses); return (ENOENT); } @@ -1398,37 +1412,31 @@ nfs_getattrcache(nfsnode_t np, struct nfs_vattr *nvaper, int alreadylocked) microuptime(&nowup); if ((nowup.tv_sec - np->n_attrstamp) >= timeo) { - if (!alreadylocked) - nfs_unlock(np); FSDBG(528, np, 0, 0xffffff02, ENOENT); - OSAddAtomic(1, (SInt32*)&nfsstats.attrcache_misses); + OSAddAtomic(1, &nfsstats.attrcache_misses); return (ENOENT); } nvap = &np->n_vattr; FSDBG(528, np, nvap->nva_size, np->n_size, 0xcace); - OSAddAtomic(1, (SInt32*)&nfsstats.attrcache_hits); + OSAddAtomic(1, &nfsstats.attrcache_hits); - if (nvap->nva_size != np->n_size) { - /* - * n_size is protected by the data lock, so we need to - * defer updating it until it's safe. We save the new size - * and set a flag and it'll get updated the next time we get/drop - * the data lock or the next time we do a getattr. - */ - if (!alreadylocked) { - /* need to upgrade shared lock to exclusive */ - if (lck_rw_lock_shared_to_exclusive(&np->n_lock) == FALSE) - lck_rw_lock_exclusive(&np->n_lock); - } - np->n_newsize = nvap->nva_size; + if (nvap->nva_type != VREG) { + np->n_size = nvap->nva_size; + } else if (nvap->nva_size != np->n_size) { FSDBG(528, np, nvap->nva_size, np->n_size, (nvap->nva_type == VREG) | (np->n_flag & NMODIFIED ? 6 : 4)); - SET(np->n_flag, NUPDATESIZE); - if ((nvap->nva_type == VREG) && (np->n_flag & NMODIFIED) && - (nvap->nva_size < np->n_size)) { - /* if we've modified, use larger size */ + if ((np->n_flag & NMODIFIED) && (nvap->nva_size < np->n_size)) { + /* if we've modified, stick with larger size */ nvap->nva_size = np->n_size; - CLR(np->n_flag, NUPDATESIZE); + } else { + /* + * n_size is protected by the data lock, so we need to + * defer updating it until it's safe. We save the new size + * and set a flag and it'll get updated the next time we get/drop + * the data lock or the next time we do a getattr. + */ + np->n_newsize = nvap->nva_size; + SET(np->n_flag, NUPDATESIZE); } } @@ -1443,85 +1451,9 @@ nfs_getattrcache(nfsnode_t np, struct nfs_vattr *nvaper, int alreadylocked) nvaper->nva_timensec[NFSTIME_MODIFY] = np->n_mtim.tv_nsec; } } - if (!alreadylocked) - nfs_unlock(np); return (0); } - -static nfsuint64 nfs_nullcookie = { { 0, 0 } }; -/* - * This function finds the directory cookie that corresponds to the - * logical byte offset given. - */ -nfsuint64 * -nfs_getcookie(nfsnode_t dnp, off_t off, int add) -{ - struct nfsdmap *dp, *dp2; - int pos; - - pos = off / NFS_DIRBLKSIZ; - if (pos == 0) - return (&nfs_nullcookie); - pos--; - dp = dnp->n_cookies.lh_first; - if (!dp) { - if (add) { - MALLOC_ZONE(dp, struct nfsdmap *, sizeof(struct nfsdmap), - M_NFSDIROFF, M_WAITOK); - if (!dp) - return ((nfsuint64 *)0); - dp->ndm_eocookie = 0; - LIST_INSERT_HEAD(&dnp->n_cookies, dp, ndm_list); - } else - return ((nfsuint64 *)0); - } - while (pos >= NFSNUMCOOKIES) { - pos -= NFSNUMCOOKIES; - if (dp->ndm_list.le_next) { - if (!add && dp->ndm_eocookie < NFSNUMCOOKIES && - pos >= dp->ndm_eocookie) - return ((nfsuint64 *)0); - dp = dp->ndm_list.le_next; - } else if (add) { - MALLOC_ZONE(dp2, struct nfsdmap *, sizeof(struct nfsdmap), - M_NFSDIROFF, M_WAITOK); - if (!dp2) - return ((nfsuint64 *)0); - dp2->ndm_eocookie = 0; - LIST_INSERT_AFTER(dp, dp2, ndm_list); - dp = dp2; - } else - return ((nfsuint64 *)0); - } - if (pos >= dp->ndm_eocookie) { - if (add) - dp->ndm_eocookie = pos + 1; - else - return ((nfsuint64 *)0); - } - return (&dp->ndm_cookies[pos]); -} - -/* - * Invalidate cached directory information, except for the actual directory - * blocks (which are invalidated separately). - * Done mainly to avoid the use of stale offset cookies. - */ -void -nfs_invaldir(nfsnode_t dnp) -{ - if (vnode_vtype(NFSTOV(dnp)) != VDIR) { - printf("nfs: invaldir not dir\n"); - return; - } - dnp->n_direofoffset = 0; - dnp->n_cookieverf.nfsuquad[0] = 0; - dnp->n_cookieverf.nfsuquad[1] = 0; - if (dnp->n_cookies.lh_first) - dnp->n_cookies.lh_first->ndm_eocookie = 0; -} - #endif /* NFSCLIENT */ /* @@ -1540,8 +1472,16 @@ nfs_interval_timer_start(thread_call_t call, int interval) #if NFSSERVER -static void nfsrv_init_user_list(struct nfs_active_user_list *); -static void nfsrv_free_user_list(struct nfs_active_user_list *); +int nfsrv_cmp_secflavs(struct nfs_sec *, struct nfs_sec *); +int nfsrv_hang_addrlist(struct nfs_export *, struct user_nfs_export_args *); +int nfsrv_free_netopt(struct radix_node *, void *); +int nfsrv_free_addrlist(struct nfs_export *, struct user_nfs_export_args *); +struct nfs_export_options *nfsrv_export_lookup(struct nfs_export *, mbuf_t); +struct nfs_export *nfsrv_fhtoexport(struct nfs_filehandle *); +int nfsrv_cmp_sockaddr(struct sockaddr_storage *, struct sockaddr_storage *); +struct nfs_user_stat_node *nfsrv_get_user_stat_node(struct nfs_active_user_list *, struct sockaddr_storage *, uid_t); +void nfsrv_init_user_list(struct nfs_active_user_list *); +void nfsrv_free_user_list(struct nfs_active_user_list *); /* * add NFSv3 WCC data to an mbuf chain @@ -1634,6 +1574,7 @@ nfsrv_namei( vnode_t dp; int error; struct componentname *cnp = &nip->ni_cnd; + uint32_t cnflags; char *tmppn; *retdirp = NULL; @@ -1664,16 +1605,23 @@ nfsrv_namei( /* * And call lookup() to do the real work */ - error = lookup(nip); + cnflags = nip->ni_cnd.cn_flags; /* store in case we have to restore */ + while ((error = lookup(nip)) == ERECYCLE) { + nip->ni_cnd.cn_flags = cnflags; + cnp->cn_nameptr = cnp->cn_pnbuf; + nip->ni_usedvp = nip->ni_dvp = nip->ni_startdir = dp; + } if (error) goto out; /* Check for encountering a symbolic link */ if (cnp->cn_flags & ISSYMLINK) { +#ifndef __LP64__ if ((cnp->cn_flags & FSNODELOCKHELD)) { cnp->cn_flags &= ~FSNODELOCKHELD; unlock_fsnode(nip->ni_dvp, NULL); } +#endif /* __LP64__ */ if (cnp->cn_flags & (LOCKPARENT | WANTPARENT)) vnode_put(nip->ni_dvp); if (nip->ni_vp) { @@ -1693,7 +1641,7 @@ nfsrv_namei( } /* - * A fiddled version of m_adj() that ensures null fill to a long + * A fiddled version of m_adj() that ensures null fill to a 4-byte * boundary and only trims off the back end */ void @@ -1861,7 +1809,7 @@ nfsm_chain_get_sattr( struct nfsm_chain *nmc, struct vnode_attr *vap) { - int error = 0, nullflag = 0; + int error = 0; uint32_t val = 0; uint64_t val64; struct timespec now; @@ -1932,10 +1880,11 @@ nfsm_chain_get_sattr( vap->va_access_time.tv_sec, vap->va_access_time.tv_nsec); VATTR_SET_ACTIVE(vap, va_access_time); + vap->va_vaflags &= ~VA_UTIMES_NULL; break; case NFS_TIME_SET_TO_SERVER: VATTR_SET(vap, va_access_time, now); - nullflag = VA_UTIMES_NULL; + vap->va_vaflags |= VA_UTIMES_NULL; break; } nfsm_chain_get_32(error, nmc, val); @@ -1945,10 +1894,12 @@ nfsm_chain_get_sattr( vap->va_modify_time.tv_sec, vap->va_modify_time.tv_nsec); VATTR_SET_ACTIVE(vap, va_modify_time); + vap->va_vaflags &= ~VA_UTIMES_NULL; break; case NFS_TIME_SET_TO_SERVER: VATTR_SET(vap, va_modify_time, now); - vap->va_vaflags |= nullflag; + if (!VATTR_IS_ACTIVE(vap, va_access_time)) + vap->va_vaflags |= VA_UTIMES_NULL; break; } @@ -1958,7 +1909,7 @@ nfsm_chain_get_sattr( /* * Compare two security flavor structs */ -static int +int nfsrv_cmp_secflavs(struct nfs_sec *sf1, struct nfs_sec *sf2) { int i; @@ -1975,7 +1926,7 @@ nfsrv_cmp_secflavs(struct nfs_sec *sf1, struct nfs_sec *sf2) * Build hash lists of net addresses and hang them off the NFS export. * Called by nfsrv_export() to set up the lists of export addresses. */ -static int +int nfsrv_hang_addrlist(struct nfs_export *nx, struct user_nfs_export_args *unxa) { struct nfs_export_net_args nxna; @@ -2116,7 +2067,7 @@ struct nfsrv_free_netopt_arg { struct radix_node_head *rnh; }; -static int +int nfsrv_free_netopt(struct radix_node *rn, void *w) { struct nfsrv_free_netopt_arg *fna = (struct nfsrv_free_netopt_arg *)w; @@ -2135,7 +2086,7 @@ nfsrv_free_netopt(struct radix_node *rn, void *w) /* * Free the net address hash lists that are hanging off the mount points. */ -static int +int nfsrv_free_addrlist(struct nfs_export *nx, struct user_nfs_export_args *unxa) { struct nfs_export_net_args nxna; @@ -2226,6 +2177,39 @@ nfsrv_export(struct user_nfs_export_args *unxa, vfs_context_t ctx) char path[MAXPATHLEN]; int expisroot; + if (unxa->nxa_flags == NXA_CHECK) { + /* just check if the path is an NFS-exportable file system */ + error = copyinstr(unxa->nxa_fspath, path, MAXPATHLEN, (size_t *)&pathlen); + if (error) + return (error); + NDINIT(&mnd, LOOKUP, FOLLOW | LOCKLEAF | AUDITVNPATH1, + UIO_SYSSPACE, CAST_USER_ADDR_T(path), ctx); + error = namei(&mnd); + if (error) + return (error); + mvp = mnd.ni_vp; + mp = vnode_mount(mvp); + /* make sure it's the root of a file system */ + if (!vnode_isvroot(mvp)) + error = EINVAL; + /* make sure the file system is NFS-exportable */ + if (!error) { + nfh.nfh_len = NFSV3_MAX_FID_SIZE; + error = VFS_VPTOFH(mvp, (int*)&nfh.nfh_len, &nfh.nfh_fid[0], NULL); + } + if (!error && (nfh.nfh_len > (int)NFSV3_MAX_FID_SIZE)) + error = EIO; + if (!error && !(mp->mnt_vtable->vfc_vfsflags & VFC_VFSREADDIR_EXTENDED)) + error = EISDIR; + vnode_put(mvp); + nameidone(&mnd); + return (error); + } + + /* all other operations: must be super user */ + if ((error = vfs_context_suser(ctx))) + return (error); + if (unxa->nxa_flags & NXA_DELETE_ALL) { /* delete all exports on all file systems */ lck_rw_lock_exclusive(&nfsrv_export_rwlock); @@ -2252,6 +2236,11 @@ nfsrv_export(struct user_nfs_export_args *unxa, vfs_context_t ctx) FREE(nxfs->nxfs_path, M_TEMP); FREE(nxfs, M_TEMP); } + if (nfsrv_export_hashtbl) { + /* all exports deleted, clean up export hash table */ + FREE(nfsrv_export_hashtbl, M_TEMP); + nfsrv_export_hashtbl = NULL; + } lck_rw_done(&nfsrv_export_rwlock); return (0); } @@ -2262,6 +2251,13 @@ nfsrv_export(struct user_nfs_export_args *unxa, vfs_context_t ctx) lck_rw_lock_exclusive(&nfsrv_export_rwlock); + /* init export hash table if not already */ + if (!nfsrv_export_hashtbl) { + if (nfsrv_export_hash_size <= 0) + nfsrv_export_hash_size = NFSRVEXPHASHSZ; + nfsrv_export_hashtbl = hashinit(nfsrv_export_hash_size, M_TEMP, &nfsrv_export_hash); + } + // first check if we've already got an exportfs with the given ID LIST_FOREACH(nxfs, &nfsrv_exports, nxfs_next) { if (nxfs->nxfs_id == unxa->nxa_fsid) @@ -2328,6 +2324,8 @@ nfsrv_export(struct user_nfs_export_args *unxa, vfs_context_t ctx) error = VFS_VPTOFH(mvp, (int*)&nfh.nfh_len, &nfh.nfh_fid[0], NULL); if (!error && (nfh.nfh_len > (int)NFSV3_MAX_FID_SIZE)) error = EIO; + if (!error && !(mp->mnt_vtable->vfc_vfsflags & VFC_VFSREADDIR_EXTENDED)) + error = EISDIR; if (error) goto out; } @@ -2491,7 +2489,11 @@ nfsrv_export(struct user_nfs_export_args *unxa, vfs_context_t ctx) xnd.ni_startdir = mvp; xnd.ni_usedvp = mvp; xnd.ni_cnd.cn_context = ctx; - error = lookup(&xnd); + while ((error = lookup(&xnd)) == ERECYCLE) { + xnd.ni_cnd.cn_flags = LOCKLEAF; + xnd.ni_cnd.cn_nameptr = xnd.ni_cnd.cn_pnbuf; + xnd.ni_usedvp = xnd.ni_dvp = xnd.ni_startdir = mvp; + } if (error) goto out1; xvp = xnd.ni_vp; @@ -2602,7 +2604,7 @@ nfsrv_export(struct user_nfs_export_args *unxa, vfs_context_t ctx) return (error); } -static struct nfs_export_options * +struct nfs_export_options * nfsrv_export_lookup(struct nfs_export *nx, mbuf_t nam) { struct nfs_export_options *nxo = NULL; @@ -2630,13 +2632,15 @@ nfsrv_export_lookup(struct nfs_export *nx, mbuf_t nam) } /* find an export for the given handle */ -static struct nfs_export * +struct nfs_export * nfsrv_fhtoexport(struct nfs_filehandle *nfhp) { struct nfs_exphandle *nxh = (struct nfs_exphandle*)nfhp->nfh_fhp; struct nfs_export *nx; uint32_t fsid, expid; + if (!nfsrv_export_hashtbl) + return (NULL); fsid = ntohl(nxh->nxh_fsid); expid = ntohl(nxh->nxh_expid); nx = NFSRVEXPHASH(fsid, expid)->lh_first; @@ -2647,7 +2651,7 @@ nfsrv_fhtoexport(struct nfs_filehandle *nfhp) continue; break; } - return nx; + return (nx); } /* @@ -2728,7 +2732,7 @@ nfsrv_fhtovp( } if (nxo && (nxo->nxo_flags & NX_OFFLINE)) - return ((nd->nd_vers == NFS_VER2) ? ESTALE : NFSERR_TRYLATER); + return ((nd == NULL || nd->nd_vers == NFS_VER2) ? ESTALE : NFSERR_TRYLATER); /* find mount structure */ mp = vfs_getvfs_by_mntonname((*nxp)->nx_fs->nxfs_path); @@ -2737,7 +2741,7 @@ nfsrv_fhtovp( * We have an export, but no mount? * Perhaps the export just hasn't been marked offline yet. */ - return ((nd->nd_vers == NFS_VER2) ? ESTALE : NFSERR_TRYLATER); + return ((nd == NULL || nd->nd_vers == NFS_VER2) ? ESTALE : NFSERR_TRYLATER); } fidp = nfhp->nfh_fhp + sizeof(*nxh); @@ -2863,7 +2867,7 @@ nfsrv_fhmatch(struct nfs_filehandle *fh1, struct nfs_filehandle *fh2) * Compare address fields of two sockaddr_storage structures. * Returns zero if they match. */ -static int +int nfsrv_cmp_sockaddr(struct sockaddr_storage *sock1, struct sockaddr_storage *sock2) { struct sockaddr_in *ipv4_sock1, *ipv4_sock2; @@ -2908,7 +2912,7 @@ nfsrv_cmp_sockaddr(struct sockaddr_storage *sock1, struct sockaddr_storage *sock * * The list's user_mutex lock MUST be held. */ -static struct nfs_user_stat_node * +struct nfs_user_stat_node * nfsrv_get_user_stat_node(struct nfs_active_user_list *list, struct sockaddr_storage *sock, uid_t uid) { struct nfs_user_stat_node *unode; @@ -2944,7 +2948,7 @@ nfsrv_get_user_stat_node(struct nfs_active_user_list *list, struct sockaddr_stor return NULL; /* increment node count */ - OSAddAtomic(1, (SInt32*)&nfsrv_user_stat_node_count); + OSAddAtomic(1, &nfsrv_user_stat_node_count); list->node_count++; } else { /* reuse the oldest node in the lru list */ @@ -3014,7 +3018,7 @@ nfsrv_update_user_stat(struct nfs_export *nx, struct nfsrv_descript *nd, uid_t u } /* initialize an active user list */ -static void +void nfsrv_init_user_list(struct nfs_active_user_list *ulist) { uint i; @@ -3031,7 +3035,7 @@ nfsrv_init_user_list(struct nfs_active_user_list *ulist) } /* Free all nodes in an active user list */ -static void +void nfsrv_free_user_list(struct nfs_active_user_list *ulist) { struct nfs_user_stat_node *unode; @@ -3046,7 +3050,7 @@ nfsrv_free_user_list(struct nfs_active_user_list *ulist) FREE(unode, M_TEMP); /* decrement node count */ - OSAddAtomic(-1, (SInt32*)&nfsrv_user_stat_node_count); + OSAddAtomic(-1, &nfsrv_user_stat_node_count); } ulist->node_count = 0; @@ -3090,7 +3094,7 @@ nfsrv_active_user_list_reclaim(void) LIST_INSERT_HEAD(&oldlist, unode, hash_link); /* decrement node count */ - OSAddAtomic(-1, (SInt32*)&nfsrv_user_stat_node_count); + OSAddAtomic(-1, &nfsrv_user_stat_node_count); ulist->node_count--; } /* can unlock this export's list now */ diff --git a/bsd/nfs/nfs_syscalls.c b/bsd/nfs/nfs_syscalls.c index 9d4d4f309..c28eac76c 100644 --- a/bsd/nfs/nfs_syscalls.c +++ b/bsd/nfs/nfs_syscalls.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2007 Apple Inc. All rights reserved. + * Copyright (c) 2000-2009 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -101,7 +101,7 @@ #include #include -#include +#include #include #include @@ -133,13 +133,12 @@ extern int nfsrv_wg_delay_v3; static int nfsrv_require_resv_port = 0; static int nfsrv_deadsock_timer_on = 0; -static int nfssvc_addsock(socket_t, mbuf_t); -static int nfssvc_nfsd(void); -static int nfssvc_export(user_addr_t); - -static void nfsrv_zapsock(struct nfsrv_sock *slp); -static void nfsrv_slpderef(struct nfsrv_sock *); -static void nfsrv_slpfree(struct nfsrv_sock *); +int nfssvc_export(user_addr_t argp); +int nfssvc_nfsd(void); +int nfssvc_addsock(socket_t, mbuf_t); +void nfsrv_zapsock(struct nfsrv_sock *); +void nfsrv_slpderef(struct nfsrv_sock *); +void nfsrv_slpfree(struct nfsrv_sock *); #endif /* NFSSERVER */ @@ -161,6 +160,8 @@ SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, nfsiod_thread_max, CTLFLAG_RW, &nf SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, nfsiod_thread_count, CTLFLAG_RD, &nfsiod_thread_count, 0, ""); SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, lockd_mounts, CTLFLAG_RD, &nfs_lockd_mounts, 0, ""); SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, max_async_writes, CTLFLAG_RW, &nfs_max_async_writes, 0, ""); +SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, single_des, CTLFLAG_RW, &nfs_single_des, 0, ""); +SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, access_delete, CTLFLAG_RW, &nfs_access_delete, 0, ""); #endif /* NFSCLIENT */ #if NFSSERVER @@ -169,10 +170,14 @@ SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, wg_delay, CTLFLAG_RW, &nfsrv_wg_de SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, wg_delay_v3, CTLFLAG_RW, &nfsrv_wg_delay_v3, 0, ""); SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, require_resv_port, CTLFLAG_RW, &nfsrv_require_resv_port, 0, ""); SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, async, CTLFLAG_RW, &nfsrv_async, 0, ""); +SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, export_hash_size, CTLFLAG_RW, &nfsrv_export_hash_size, 0, ""); SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, reqcache_size, CTLFLAG_RW, &nfsrv_reqcache_size, 0, ""); SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, request_queue_length, CTLFLAG_RW, &nfsrv_sock_max_rec_queue_length, 0, ""); SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, user_stats, CTLFLAG_RW, &nfsrv_user_stat_enabled, 0, ""); +SYSCTL_UINT(_vfs_generic_nfs_server, OID_AUTO, gss_context_ttl, CTLFLAG_RW, &nfsrv_gss_context_ttl, 0, ""); +#if CONFIG_FSE SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, fsevents, CTLFLAG_RW, &nfsrv_fsevents_enabled, 0, ""); +#endif SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, nfsd_thread_max, CTLFLAG_RW, &nfsd_thread_max, 0, ""); SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, nfsd_thread_count, CTLFLAG_RD, &nfsd_thread_count, 0, ""); #endif /* NFSSERVER */ @@ -206,7 +211,6 @@ nfsclnt(proc_t p, struct nfsclnt_args *uap, __unused int *retval) * Async requests will pull the next struct nfsiod from the head of the free list, * put it on the work queue, and wake whatever thread is waiting on that struct. */ -static int nfsiod_continue(int); /* * nfsiod thread exit routine @@ -214,7 +218,7 @@ static int nfsiod_continue(int); * Must be called with nfsiod_mutex held so that the * decision to terminate is atomic with the termination. */ -static void +void nfsiod_terminate(struct nfsiod *niod) { nfsiod_thread_count--; @@ -228,7 +232,7 @@ nfsiod_terminate(struct nfsiod *niod) } /* nfsiod thread startup routine */ -static void +void nfsiod_thread(void) { struct nfsiod *niod; @@ -238,6 +242,7 @@ nfsiod_thread(void) if (!niod) { lck_mtx_lock(nfsiod_mutex); nfsiod_thread_count--; + wakeup(current_thread()); lck_mtx_unlock(nfsiod_mutex); thread_terminate(current_thread()); /*NOTREACHED*/ @@ -263,7 +268,7 @@ nfsiod_thread(void) int nfsiod_start(void) { - thread_t thd; + thread_t thd = THREAD_NULL; lck_mtx_lock(nfsiod_mutex); if ((nfsiod_thread_count >= NFSIOD_MAX) && (nfsiod_thread_count > 0)) { @@ -271,9 +276,13 @@ nfsiod_start(void) return (EBUSY); } nfsiod_thread_count++; - thd = kernel_thread(kernel_task, nfsiod_thread); + if (kernel_thread_start((thread_continue_t)nfsiod_thread, NULL, &thd) != KERN_SUCCESS) { + lck_mtx_unlock(nfsiod_mutex); + return (EBUSY); + } /* wait for the thread to complete startup */ msleep(thd, nfsiod_mutex, PWAIT | PDROP, "nfsiodw", NULL); + thread_deallocate(thd); return (0); } @@ -282,7 +291,7 @@ nfsiod_start(void) * * Grab an nfsiod struct to work on, do some work, then drop it */ -static int +int nfsiod_continue(int error) { struct nfsiod *niod; @@ -295,8 +304,6 @@ nfsiod_continue(int error) niod = TAILQ_FIRST(&nfsiodwork); if (!niod) { /* there's no work queued up */ - if (error != EWOULDBLOCK) - printf("nfsiod: error %d work %p\n", error, niod); /* remove an old nfsiod struct and terminate */ if ((niod = TAILQ_LAST(&nfsiodfree, nfsiodlist))) TAILQ_REMOVE(&nfsiodfree, niod, niod_link); @@ -474,7 +481,7 @@ extern struct fileops vnops; int fhopen( proc_t p, struct fhopen_args *uap, - register_t *retval) + int32_t *retval) { vnode_t vp; struct nfs_filehandle nfh; @@ -629,10 +636,9 @@ nfssvc(proc_t p, struct nfssvc_args *uap, __unused int *retval) AUDIT_ARG(cmd, uap->flag); /* - * Must be super user + * Must be super user for most operations (export ops checked later). */ - error = proc_suser(p); - if (error) + if ((uap->flag != NFSSVC_EXPORT) && ((error = proc_suser(p)))) return (error); #if CONFIG_MACF error = mac_system_check_nfsd(kauth_cred_get()); @@ -695,7 +701,7 @@ nfssvc(proc_t p, struct nfssvc_args *uap, __unused int *retval) /* * Adds a socket to the list for servicing by nfsds. */ -static int +int nfssvc_addsock(socket_t so, mbuf_t mynam) { struct nfsrv_sock *slp; @@ -830,7 +836,7 @@ nfssvc_addsock(socket_t so, mbuf_t mynam) * have any work are simply dropped from the queue. * */ -static int +int nfssvc_nfsd(void) { mbuf_t m, mrep; @@ -1049,7 +1055,7 @@ nfssvc_nfsd(void) } if (error) { - OSAddAtomic(1, (SInt32*)&nfsstats.srv_errs); + OSAddAtomic(1, &nfsstats.srv_errs); nfsrv_updatecache(nd, FALSE, mrep); if (nd->nd_nam2) { mbuf_freem(nd->nd_nam2); @@ -1057,7 +1063,7 @@ nfssvc_nfsd(void) } break; } - OSAddAtomic(1, (SInt32*)&nfsstats.srvrpccnt[nd->nd_procnum]); + OSAddAtomic(1, &nfsstats.srvrpccnt[nd->nd_procnum]); nfsrv_updatecache(nd, TRUE, mrep); /* FALLTHRU */ @@ -1098,7 +1104,7 @@ nfssvc_nfsd(void) if (slp->ns_sotype == SOCK_STREAM) { error = mbuf_prepend(&m, NFSX_UNSIGNED, MBUF_WAITOK); if (!error) - *(u_long*)mbuf_data(m) = htonl(0x80000000 | siz); + *(u_int32_t*)mbuf_data(m) = htonl(0x80000000 | siz); } if (!error) { if (slp->ns_flag & SLP_VALID) { @@ -1189,7 +1195,7 @@ nfssvc_nfsd(void) return (error); } -static int +int nfssvc_export(user_addr_t argp) { int error = 0, is_64bit; @@ -1230,7 +1236,7 @@ nfssvc_export(user_addr_t argp) * will stop using it and clear ns_flag at the end so that it will not be * reassigned during cleanup. */ -static void +void nfsrv_zapsock(struct nfsrv_sock *slp) { socket_t so; @@ -1258,7 +1264,7 @@ nfsrv_zapsock(struct nfsrv_sock *slp) /* * cleanup and release a server socket structure. */ -static void +void nfsrv_slpfree(struct nfsrv_sock *slp) { struct nfsrv_descript *nwp, *nnwp; @@ -1403,8 +1409,10 @@ nfsrv_cleanup(void) { struct nfsrv_sock *slp, *nslp; struct timeval now; +#if CONFIG_FSE struct nfsrv_fmod *fp, *nfp; int i; +#endif microuptime(&now); for (slp = TAILQ_FIRST(&nfsrv_socklist); slp != 0; slp = nslp) { @@ -1436,6 +1444,7 @@ nfsrv_cleanup(void) } } +#if CONFIG_FSE /* * Flush pending file write fsevents */ @@ -1446,12 +1455,10 @@ nfsrv_cleanup(void) * Fire off the content modified fsevent for each * entry, remove it from the list, and free it. */ -#if CONFIG_FSE if (nfsrv_fsevents_enabled) add_fsevent(FSE_CONTENT_MODIFIED, &fp->fm_context, FSE_ARG_VNODE, fp->fm_vp, FSE_ARG_DONE); -#endif vnode_put(fp->fm_vp); kauth_cred_unref(&fp->fm_context.vc_ucred); nfp = LIST_NEXT(fp, fm_link); @@ -1461,6 +1468,7 @@ nfsrv_cleanup(void) } nfsrv_fmod_pending = 0; lck_mtx_unlock(nfsrv_fmod_mutex); +#endif nfs_gss_svc_cleanup(); /* Remove any RPCSEC_GSS contexts */ diff --git a/bsd/nfs/nfs_vfsops.c b/bsd/nfs/nfs_vfsops.c index d7e3d0c3c..e92c58cdf 100644 --- a/bsd/nfs/nfs_vfsops.c +++ b/bsd/nfs/nfs_vfsops.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2008 Apple Inc. All rights reserved. + * Copyright (c) 2000-2009 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -123,10 +123,12 @@ */ int nfs_ticks; -static lck_grp_t *nfs_mount_grp; +static lck_grp_t *nfs_global_grp, *nfs_mount_grp; +lck_mtx_t *nfs_global_mutex; uint32_t nfs_fs_attr_bitmap[NFS_ATTR_BITMAP_LEN]; uint32_t nfs_object_attr_bitmap[NFS_ATTR_BITMAP_LEN]; uint32_t nfs_getattr_bitmap[NFS_ATTR_BITMAP_LEN]; +struct nfsclientidlist nfsclientids; /* NFS requests */ struct nfs_reqqhead nfs_reqq; @@ -134,11 +136,18 @@ lck_grp_t *nfs_request_grp; lck_mtx_t *nfs_request_mutex; thread_call_t nfs_request_timer_call; int nfs_request_timer_on; -u_long nfs_xid = 0; -u_long nfs_xidwrap = 0; /* to build a (non-wrapping) 64 bit xid */ +u_int32_t nfs_xid = 0; +u_int32_t nfs_xidwrap = 0; /* to build a (non-wrapping) 64 bit xid */ thread_call_t nfs_buf_timer_call; +/* NFSv4 */ +lck_grp_t *nfs_open_grp; +uint32_t nfs_open_owner_seqnum = 0; +uint32_t nfs_lock_owner_seqnum = 0; +thread_call_t nfs4_callback_timer_call; +int nfs4_callback_timer_on = 0; + /* nfsiod */ lck_grp_t *nfsiod_lck_grp; lck_mtx_t *nfsiod_mutex; @@ -150,6 +159,7 @@ int nfs_max_async_writes = NFS_DEFMAXASYNCWRITES; int nfs_iosize = NFS_IOSIZE; int nfs_access_cache_timeout = NFS_MAXATTRTIMO; +int nfs_access_delete = 0; int nfs_allow_async = 0; int nfs_statfs_rate_limit = NFS_DEFSTATFSRATELIMIT; int nfs_lockd_mounts = 0; @@ -159,7 +169,7 @@ int nfs_tprintf_initial_delay = NFS_TPRINTF_INITIAL_DELAY; int nfs_tprintf_delay = NFS_TPRINTF_DELAY; -static int mountnfs(struct user_nfs_args *,mount_t,mbuf_t,vfs_context_t,vnode_t *); +int mountnfs(struct user_nfs_args *,mount_t,mbuf_t,vfs_context_t,vnode_t *); static int nfs_mount_diskless(struct nfs_dlmount *, const char *, int, vnode_t *, mount_t *, vfs_context_t); #if !defined(NO_MOUNT_PRIVATE) static int nfs_mount_diskless_private(struct nfs_dlmount *, const char *, int, vnode_t *, mount_t *, vfs_context_t); @@ -168,18 +178,18 @@ static int nfs_mount_diskless_private(struct nfs_dlmount *, const char *, int, v /* * NFS VFS operations. */ -static int nfs_vfs_mount(mount_t, vnode_t, user_addr_t, vfs_context_t); -static int nfs_vfs_start(mount_t, int, vfs_context_t); -static int nfs_vfs_unmount(mount_t, int, vfs_context_t); -static int nfs_vfs_root(mount_t, vnode_t *, vfs_context_t); -static int nfs_vfs_quotactl(mount_t, int, uid_t, caddr_t, vfs_context_t); -static int nfs_vfs_getattr(mount_t, struct vfs_attr *, vfs_context_t); -static int nfs_vfs_sync(mount_t, int, vfs_context_t); -static int nfs_vfs_vget(mount_t, ino64_t, vnode_t *, vfs_context_t); -static int nfs_vfs_vptofh(vnode_t, int *, unsigned char *, vfs_context_t); -static int nfs_vfs_fhtovp(mount_t, int, unsigned char *, vnode_t *, vfs_context_t); -static int nfs_vfs_init(struct vfsconf *); -static int nfs_vfs_sysctl(int *, u_int, user_addr_t, size_t *, user_addr_t, size_t, vfs_context_t); +int nfs_vfs_mount(mount_t, vnode_t, user_addr_t, vfs_context_t); +int nfs_vfs_start(mount_t, int, vfs_context_t); +int nfs_vfs_unmount(mount_t, int, vfs_context_t); +int nfs_vfs_root(mount_t, vnode_t *, vfs_context_t); +int nfs_vfs_quotactl(mount_t, int, uid_t, caddr_t, vfs_context_t); +int nfs_vfs_getattr(mount_t, struct vfs_attr *, vfs_context_t); +int nfs_vfs_sync(mount_t, int, vfs_context_t); +int nfs_vfs_vget(mount_t, ino64_t, vnode_t *, vfs_context_t); +int nfs_vfs_vptofh(vnode_t, int *, unsigned char *, vfs_context_t); +int nfs_vfs_fhtovp(mount_t, int, unsigned char *, vnode_t *, vfs_context_t); +int nfs_vfs_init(struct vfsconf *); +int nfs_vfs_sysctl(int *, u_int, user_addr_t, size_t *, user_addr_t, size_t, vfs_context_t); struct vfsops nfs_vfsops = { nfs_vfs_mount, @@ -208,16 +218,17 @@ struct vfsops nfs_vfsops = { /* * version-specific NFS functions */ -static int nfs3_mount(struct nfsmount *, vfs_context_t, struct user_nfs_args *, nfsnode_t *); -static int nfs4_mount(struct nfsmount *, vfs_context_t, struct user_nfs_args *, nfsnode_t *); -static int nfs3_update_statfs(struct nfsmount *, vfs_context_t); -static int nfs4_update_statfs(struct nfsmount *, vfs_context_t); +int nfs3_mount(struct nfsmount *, vfs_context_t, struct user_nfs_args *, nfsnode_t *); +int nfs4_mount(struct nfsmount *, vfs_context_t, struct user_nfs_args *, nfsnode_t *); +int nfs3_fsinfo(struct nfsmount *, nfsnode_t, vfs_context_t); +int nfs3_update_statfs(struct nfsmount *, vfs_context_t); +int nfs4_update_statfs(struct nfsmount *, vfs_context_t); #if !QUOTA #define nfs3_getquota NULL #define nfs4_getquota NULL #else -static int nfs3_getquota(struct nfsmount *, vfs_context_t, u_long, int, struct dqblk *); -static int nfs4_getquota(struct nfsmount *, vfs_context_t, u_long, int, struct dqblk *); +int nfs3_getquota(struct nfsmount *, vfs_context_t, uid_t, int, struct dqblk *); +int nfs4_getquota(struct nfsmount *, vfs_context_t, uid_t, int, struct dqblk *); #endif struct nfs_funcs nfs3_funcs = { @@ -260,8 +271,8 @@ struct nfs_funcs nfs4_funcs = { /* * Called once to initialize data structures... */ -static int -nfs_vfs_init(struct vfsconf *vfsp) +int +nfs_vfs_init(__unused struct vfsconf *vfsp) { int i; @@ -272,10 +283,8 @@ nfs_vfs_init(struct vfsconf *vfsp) printf("struct nfsnode bloated (> %dbytes)\n", NFS_NODEALLOC); printf("Try reducing NFS_SMALLFH\n"); } - if (sizeof (struct nfsmount) > NFS_MNTALLOC) { + if (sizeof (struct nfsmount) > NFS_MNTALLOC) printf("struct nfsmount bloated (> %dbytes)\n", NFS_MNTALLOC); - printf("Try reducing NFS_MUIDHASHSIZ\n"); - } nfs_ticks = (hz * NFS_TICKINTVL + 500) / 1000; if (nfs_ticks < 1) @@ -288,8 +297,12 @@ nfs_vfs_init(struct vfsconf *vfsp) nfsiod_lck_grp = lck_grp_alloc_init("nfsiod", LCK_GRP_ATTR_NULL); nfsiod_mutex = lck_mtx_alloc_init(nfsiod_lck_grp, LCK_ATTR_NULL); - /* init mount lock group */ + /* init lock groups, etc. */ nfs_mount_grp = lck_grp_alloc_init("nfs_mount", LCK_GRP_ATTR_NULL); + nfs_open_grp = lck_grp_alloc_init("nfs_open", LCK_GRP_ATTR_NULL); + nfs_global_grp = lck_grp_alloc_init("nfs_global", LCK_GRP_ATTR_NULL); + + nfs_global_mutex = lck_mtx_alloc_init(nfs_global_grp, LCK_ATTR_NULL); /* init request list mutex */ nfs_request_grp = lck_grp_alloc_init("nfs_request", LCK_GRP_ATTR_NULL); @@ -309,31 +322,34 @@ nfs_vfs_init(struct vfsconf *vfsp) NFS4_DEFAULT_ATTRIBUTES(nfs_getattr_bitmap); for (i=0; i < NFS_ATTR_BITMAP_LEN; i++) nfs_getattr_bitmap[i] &= nfs_object_attr_bitmap[i]; + TAILQ_INIT(&nfsclientids); /* initialize NFS timer callouts */ nfs_request_timer_call = thread_call_allocate(nfs_request_timer, NULL); nfs_buf_timer_call = thread_call_allocate(nfs_buf_timer, NULL); + nfs4_callback_timer_call = thread_call_allocate(nfs4_callback_timer, NULL); - vfsp->vfc_refcount++; /* make us non-unloadable */ return (0); } /* * nfs statfs call */ -static int +int nfs3_update_statfs(struct nfsmount *nmp, vfs_context_t ctx) { nfsnode_t np; int error = 0, lockerror, status, nfsvers; u_int64_t xid; struct nfsm_chain nmreq, nmrep; - uint32_t val; + uint32_t val = 0; nfsvers = nmp->nm_vers; np = nmp->nm_dnp; + if (!np) + return (ENXIO); if ((error = vnode_get(NFSTOV(np)))) - return(error); + return (error); nfsm_chain_null(&nmreq); nfsm_chain_null(&nmrep); @@ -344,12 +360,12 @@ nfs3_update_statfs(struct nfsmount *nmp, vfs_context_t ctx) nfsmout_if(error); error = nfs_request(np, NULL, &nmreq, NFSPROC_FSSTAT, ctx, &nmrep, &xid, &status); - if ((lockerror = nfs_lock(np, NFS_NODE_LOCK_EXCLUSIVE))) + if ((lockerror = nfs_node_lock(np))) error = lockerror; if (nfsvers == NFS_VER3) nfsm_chain_postop_attr_update(error, &nmrep, np, &xid); if (!lockerror) - nfs_unlock(np); + nfs_node_unlock(np); if (!error) error = status; nfsm_assert(error, NFSTONMP(np), ENXIO); @@ -393,7 +409,7 @@ nfs3_update_statfs(struct nfsmount *nmp, vfs_context_t ctx) return (error); } -static int +int nfs4_update_statfs(struct nfsmount *nmp, vfs_context_t ctx) { nfsnode_t np; @@ -405,8 +421,10 @@ nfs4_update_statfs(struct nfsmount *nmp, vfs_context_t ctx) nfsvers = nmp->nm_vers; np = nmp->nm_dnp; + if (!np) + return (ENXIO); if ((error = vnode_get(NFSTOV(np)))) - return(error); + return (error); nfsm_chain_null(&nmreq); nfsm_chain_null(&nmrep); @@ -439,12 +457,12 @@ nfs4_update_statfs(struct nfsmount *nmp, vfs_context_t ctx) error = nfs4_parsefattr(&nmrep, &nmp->nm_fsattr, &nvattr, NULL, NULL); lck_mtx_unlock(&nmp->nm_lock); nfsmout_if(error); - if ((lockerror = nfs_lock(np, NFS_NODE_LOCK_EXCLUSIVE))) + if ((lockerror = nfs_node_lock(np))) error = lockerror; if (!error) nfs_loadattrcache(np, &nvattr, &xid, 0); if (!lockerror) - nfs_unlock(np); + nfs_node_unlock(np); nfsm_assert(error, NFSTONMP(np), ENXIO); nfsmout_if(error); nmp->nm_fsattr.nfsa_bsize = NFS_FABLKSIZE; @@ -461,7 +479,7 @@ nfs4_update_statfs(struct nfsmount *nmp, vfs_context_t ctx) * using the nf_update_statfs() function, and other attributes are cobbled * together from whatever sources we can (getattr, fsinfo, pathconf). */ -static int +int nfs_vfs_getattr(mount_t mp, struct vfs_attr *fsap, vfs_context_t ctx) { struct nfsmount *nmp; @@ -534,12 +552,11 @@ nfs_vfs_getattr(mount_t mp, struct vfs_attr *fsap, vfs_context_t ctx) if (VFSATTR_IS_ACTIVE(fsap, f_capabilities)) { u_int32_t caps, valid; - nfsnode_t np; + nfsnode_t np = nmp->nm_dnp; - nfsm_assert(error, VFSTONFS(mp), ENXIO); + nfsm_assert(error, VFSTONFS(mp) && np, ENXIO); if (error) return (error); - np = nmp->nm_dnp; lck_mtx_lock(&nmp->nm_lock); /* @@ -717,12 +734,12 @@ nfs_vfs_getattr(mount_t mp, struct vfs_attr *fsap, vfs_context_t ctx) /* * nfs version 3 fsinfo rpc call */ -static int +int nfs3_fsinfo(struct nfsmount *nmp, nfsnode_t np, vfs_context_t ctx) { - int error = 0, lockerror, status, prefsize, maxsize, nmlocked = 0; + int error = 0, lockerror, status, nmlocked = 0; u_int64_t xid; - uint32_t val; + uint32_t val, prefsize, maxsize; struct nfsm_chain nmreq, nmrep; nfsm_chain_null(&nmreq); @@ -734,11 +751,11 @@ nfs3_fsinfo(struct nfsmount *nmp, nfsnode_t np, vfs_context_t ctx) nfsmout_if(error); error = nfs_request(np, NULL, &nmreq, NFSPROC_FSINFO, ctx, &nmrep, &xid, &status); - if ((lockerror = nfs_lock(np, NFS_NODE_LOCK_EXCLUSIVE))) + if ((lockerror = nfs_node_lock(np))) error = lockerror; nfsm_chain_postop_attr_update(error, &nmrep, np, &xid); if (!lockerror) - nfs_unlock(np); + nfs_node_unlock(np); if (!error) error = status; nfsmout_if(error); @@ -862,9 +879,15 @@ nfs_mountroot(void) error = nfs_boot_getfh(&nd, v3, sotype); if (error) { if (error == EHOSTDOWN || error == EHOSTUNREACH) { + if (nd.nd_root.ndm_mntfrom) + FREE_ZONE(nd.nd_root.ndm_mntfrom, + MAXPATHLEN, M_NAMEI); if (nd.nd_root.ndm_path) FREE_ZONE(nd.nd_root.ndm_path, MAXPATHLEN, M_NAMEI); + if (nd.nd_private.ndm_mntfrom) + FREE_ZONE(nd.nd_private.ndm_mntfrom, + MAXPATHLEN, M_NAMEI); if (nd.nd_private.ndm_path) FREE_ZONE(nd.nd_private.ndm_path, MAXPATHLEN, M_NAMEI); @@ -936,7 +959,7 @@ nfs_mountroot(void) panic("nfs_mount_diskless(v2,UDP) root failed with %d: %s\n", error, PE_boot_args()); } } - printf("root on %s\n", (char *)&nd.nd_root.ndm_host); + printf("root on %s\n", nd.nd_root.ndm_mntfrom); vfs_unbusy(mp); mount_list_add(mp); @@ -949,7 +972,7 @@ nfs_mountroot(void) if (error) { panic("nfs_mount_diskless private failed with %d\n", error); } - printf("private on %s\n", (char *)&nd.nd_private.ndm_host); + printf("private on %s\n", nd.nd_private.ndm_mntfrom); vfs_unbusy(mppriv); mount_list_add(mppriv); @@ -957,13 +980,17 @@ nfs_mountroot(void) #endif /* NO_MOUNT_PRIVATE */ + if (nd.nd_root.ndm_mntfrom) + FREE_ZONE(nd.nd_root.ndm_mntfrom, MAXPATHLEN, M_NAMEI); if (nd.nd_root.ndm_path) FREE_ZONE(nd.nd_root.ndm_path, MAXPATHLEN, M_NAMEI); + if (nd.nd_private.ndm_mntfrom) + FREE_ZONE(nd.nd_private.ndm_mntfrom, MAXPATHLEN, M_NAMEI); if (nd.nd_private.ndm_path) FREE_ZONE(nd.nd_private.ndm_path, MAXPATHLEN, M_NAMEI); /* Get root attributes (for the time). */ - error = nfs_getattr(VTONFS(vp), &nvattr, ctx, 0); + error = nfs_getattr(VTONFS(vp), &nvattr, ctx, NGA_UNCACHED); if (error) panic("nfs_mountroot: getattr for root"); return (0); } @@ -985,7 +1012,7 @@ nfs_mount_diskless( mbuf_t m; int error; - if ((error = vfs_rootmountalloc("nfs", ndmntp->ndm_host, &mp))) { + if ((error = vfs_rootmountalloc("nfs", ndmntp->ndm_mntfrom, &mp))) { printf("nfs_mount_diskless: NFS not configured"); return (error); } @@ -1001,7 +1028,7 @@ nfs_mount_diskless( args.sotype = ndmntp->ndm_sotype; args.fh = CAST_USER_ADDR_T(&ndmntp->ndm_fh[0]); args.fhsize = ndmntp->ndm_fhlen; - args.hostname = CAST_USER_ADDR_T(ndmntp->ndm_host); + args.hostname = CAST_USER_ADDR_T(ndmntp->ndm_mntfrom); args.flags = NFSMNT_RESVPORT; if (ndmntp->ndm_nfsv3) args.flags |= NFSMNT_NFSV3; @@ -1080,7 +1107,7 @@ nfs_mount_diskless_private( /* * Get vnode to be covered */ - NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_SYSSPACE32, + NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_SYSSPACE, CAST_USER_ADDR_T(mntname), ctx); if ((error = namei(&nd))) { printf("nfs_mountroot: private namei failed!\n"); @@ -1121,14 +1148,14 @@ nfs_mount_diskless_private( /* * Allocate and initialize the filesystem. */ - mp = _MALLOC_ZONE((u_long)sizeof(struct mount), M_MOUNT, M_WAITOK); + mp = _MALLOC_ZONE((u_int32_t)sizeof(struct mount), M_MOUNT, M_WAITOK); if (!mp) { printf("nfs_mountroot: unable to allocate mount structure\n"); vnode_put(vp); error = ENOMEM; goto out; } - bzero((char *)mp, (u_long)sizeof(struct mount)); + bzero((char *)mp, sizeof(struct mount)); /* Initialize the default IO constraints */ mp->mnt_maxreadcnt = mp->mnt_maxwritecnt = MAXPHYS; @@ -1155,8 +1182,8 @@ nfs_mount_diskless_private( vp->v_mountedhere = mp; mp->mnt_vnodecovered = vp; mp->mnt_vfsstat.f_owner = kauth_cred_getuid(kauth_cred_get()); - (void) copystr(mntname, mp->mnt_vfsstat.f_mntonname, MNAMELEN - 1, 0); - (void) copystr(ndmntp->ndm_host, mp->mnt_vfsstat.f_mntfromname, MNAMELEN - 1, 0); + (void) copystr(mntname, mp->mnt_vfsstat.f_mntonname, MAXPATHLEN - 1, 0); + (void) copystr(ndmntp->ndm_mntfrom, mp->mnt_vfsstat.f_mntfromname, MAXPATHLEN - 1, 0); #if CONFIG_MACF mac_mount_label_init(mp); mac_mount_label_associate(ctx, mp); @@ -1169,7 +1196,7 @@ nfs_mount_diskless_private( args.sotype = ndmntp->ndm_sotype; args.fh = CAST_USER_ADDR_T(ndmntp->ndm_fh); args.fhsize = ndmntp->ndm_fhlen; - args.hostname = CAST_USER_ADDR_T(ndmntp->ndm_host); + args.hostname = CAST_USER_ADDR_T(ndmntp->ndm_mntfrom); args.flags = NFSMNT_RESVPORT; if (ndmntp->ndm_nfsv3) args.flags |= NFSMNT_NFSV3; @@ -1207,7 +1234,7 @@ nfs_mount_diskless_private( * * mount system call */ -static int +int nfs_vfs_mount(mount_t mp, vnode_t vp, user_addr_t data, vfs_context_t ctx) { int error, argsvers; @@ -1236,6 +1263,12 @@ nfs_vfs_mount(mount_t mp, vnode_t vp, user_addr_t data, vfs_context_t ctx) error = copyin(data, (caddr_t)&tempargs, sizeof (struct nfs_args4)); break; case 5: + if (vfs_context_is64bit(ctx)) + error = copyin(data, (caddr_t)&args, sizeof (struct user_nfs_args5)); + else + error = copyin(data, (caddr_t)&tempargs, sizeof (struct nfs_args5)); + break; + case 6: if (vfs_context_is64bit(ctx)) error = copyin(data, (caddr_t)&args, sizeof (args)); else @@ -1274,6 +1307,8 @@ nfs_vfs_mount(mount_t mp, vnode_t vp, user_addr_t data, vfs_context_t ctx) } if (argsvers >= 5) args.auth = tempargs.auth; + if (argsvers >= 6) + args.deadtimeout = tempargs.deadtimeout; } if (args.fhsize < 0 || args.fhsize > NFSX_V3FHMAX) @@ -1304,7 +1339,7 @@ nfs_vfs_mount(mount_t mp, vnode_t vp, user_addr_t data, vfs_context_t ctx) * Common code for mount and mountroot */ -static int +int nfs3_mount( struct nfsmount *nmp, vfs_context_t ctx, @@ -1322,7 +1357,6 @@ nfs3_mount( * Get file attributes for the mountpoint. These are needed * in order to properly create the root vnode. */ - // LP64todo - fix CAST_DOWN of argp->fh fhp = CAST_DOWN(u_char *, argp->fh); error = nfs3_getattr_rpc(NULL, nmp->nm_mountp, fhp, argp->fhsize, ctx, &nvattr, &xid); @@ -1332,7 +1366,7 @@ nfs3_mount( error = nfs_nget(nmp->nm_mountp, NULL, NULL, fhp, argp->fhsize, &nvattr, &xid, NG_MARKROOT, npp); if (*npp) - nfs_unlock(*npp); + nfs_node_unlock(*npp); if (error) goto out; @@ -1363,12 +1397,13 @@ nfs3_mount( out: if (*npp && error) { vnode_put(NFSTOV(*npp)); + vnode_recycle(NFSTOV(*npp)); *npp = NULL; } return (error); } -static int +int nfs4_mount( struct nfsmount *nmp, vfs_context_t ctx, @@ -1382,12 +1417,12 @@ nfs4_mount( fhandle_t fh; struct nfs_vattr nvattr; u_int64_t xid; - struct timeval now; *npp = NULL; fh.fh_len = 0; - microtime(&now); - nmp->nm_mounttime = ((uint64_t)now.tv_sec << 32) | now.tv_usec; + TAILQ_INIT(&nmp->nm_open_owners); + TAILQ_INIT(&nmp->nm_recallq); + nmp->nm_stategenid = 1; /* look up path to get fh and attrs for mount point root */ numops = 2; // PUTROOTFH + LOOKUP* + GETATTR @@ -1462,9 +1497,6 @@ nfs4_mount( error = nfs_nget(nmp->nm_mountp, NULL, NULL, fh.fh_data, fh.fh_len, &nvattr, &xid, NG_MARKROOT, npp); nfsmout_if(error); - /* XXX local locking for now */ - vfs_setlocklocal(nmp->nm_mountp); - /* adjust I/O sizes to server limits */ if (NFS_BITMAP_ISSET(nmp->nm_fsattr.nfsa_bitmap, NFS_FATTR_MAXREAD)) { if (nmp->nm_fsattr.nfsa_maxread < (uint64_t)nmp->nm_rsize) { @@ -1490,11 +1522,11 @@ nfs4_mount( nfsmout: if (*npp) - nfs_unlock(*npp); + nfs_node_unlock(*npp); return (error); } -static int +int mountnfs( struct user_nfs_args *argp, mount_t mp, @@ -1504,7 +1536,8 @@ mountnfs( { struct nfsmount *nmp; nfsnode_t np; - int error, maxio, iosize; + int error; + uint32_t maxio, iosize; struct vfsstatfs *sbp; struct timespec ts = { 1, 0 }; @@ -1563,6 +1596,7 @@ mountnfs( nmp->nm_acdirmin = NFS_MINDIRATTRTIMO; nmp->nm_acdirmax = NFS_MAXDIRATTRTIMO; nmp->nm_auth = RPCAUTH_SYS; + nmp->nm_deadtimeout = 0; vfs_getnewfsid(mp); nmp->nm_mountp = mp; @@ -1682,6 +1716,12 @@ mountnfs( } } } + if (argp->version >= 6) { + if (argp->flags & NFSMNT_DEADTIMEOUT) + nmp->nm_deadtimeout = argp->deadtimeout; + } + if ((nmp->nm_flag & NFSMNT_DEADTIMEOUT) && (nmp->nm_deadtimeout <= 0)) + nmp->nm_flag &= ~NFSMNT_DEADTIMEOUT; /* set up the version-specific function tables */ if (nmp->nm_vers < NFS_VER4) @@ -1701,11 +1741,18 @@ mountnfs( if (!nfs_mbuf_mhlen) nfs_mbuf_init(); - /* NFS does its own node locking */ - mp->mnt_vtable->vfc_threadsafe = TRUE; + if (nmp->nm_vers >= NFS_VER4) { + struct timeval now; + microtime(&now); + nmp->nm_mounttime = ((uint64_t)now.tv_sec << 32) | now.tv_usec; + nmp->nm_mcred = vfs_context_ucred(ctx); + if (IS_VALID_CRED(nmp->nm_mcred)) + kauth_cred_ref(nmp->nm_mcred); + nfs4_mount_callback_setup(nmp); + } /* set up the socket */ - if ((error = nfs_connect(nmp))) + if ((error = nfs_connect(nmp, 1))) goto bad; /* @@ -1727,14 +1774,22 @@ mountnfs( /* get usecount and drop iocount */ error = vnode_ref(*vpp); vnode_put(*vpp); - if (error) + if (error) { + vnode_recycle(*vpp); goto bad; + } /* * Do statfs to ensure static info gets set to reasonable values. */ - if ((error = nmp->nm_funcs->nf_update_statfs(nmp, ctx))) + if ((error = nmp->nm_funcs->nf_update_statfs(nmp, ctx))) { + int error2 = vnode_getwithref(*vpp); + vnode_rele(*vpp); + if (!error2) + vnode_put(*vpp); + vnode_recycle(*vpp); goto bad; + } sbp = vfs_statfs(mp); sbp->f_bsize = nmp->nm_fsattr.nfsa_bsize; sbp->f_blocks = nmp->nm_fsattr.nfsa_space_total / sbp->f_bsize; @@ -1794,9 +1849,24 @@ mountnfs( /* tear down the socket */ lck_mtx_unlock(&nmp->nm_lock); nfs_disconnect(nmp); - if (nmp->nm_renew_timer) { - thread_call_cancel(nmp->nm_renew_timer); - thread_call_free(nmp->nm_renew_timer); + if (nmp->nm_vers >= NFS_VER4) { + if (nmp->nm_cbid) + nfs4_mount_callback_shutdown(nmp); + if (nmp->nm_renew_timer) { + thread_call_cancel(nmp->nm_renew_timer); + thread_call_free(nmp->nm_renew_timer); + } + if (nmp->nm_longid) { + /* remove/deallocate the client ID data */ + lck_mtx_lock(nfs_global_mutex); + TAILQ_REMOVE(&nfsclientids, nmp->nm_longid, nci_link); + if (nmp->nm_longid->nci_id) + FREE(nmp->nm_longid->nci_id, M_TEMP); + FREE(nmp->nm_longid, M_TEMP); + lck_mtx_unlock(nfs_global_mutex); + } + if (IS_VALID_CRED(nmp->nm_mcred)) + kauth_cred_unref(&nmp->nm_mcred); } lck_mtx_destroy(&nmp->nm_lock, nfs_mount_grp); FREE_ZONE((caddr_t)nmp, sizeof (struct nfsmount), M_NFSMNT); @@ -1804,11 +1874,74 @@ mountnfs( return (error); } +void +nfs3_umount_rpc(struct nfsmount *nmp, vfs_context_t ctx, int timeo) +{ + int error = 0, auth_len, slen; + thread_t thd = vfs_context_thread(ctx); + kauth_cred_t cred = vfs_context_ucred(ctx); + char *path; + uint64_t xid = 0; + struct nfsm_chain nmreq, nmrep; + mbuf_t mreq; + uint32_t mntport = 0; + struct sockaddr *nam = mbuf_data(nmp->nm_nam); + struct sockaddr_in saddr; + + bcopy(nam, &saddr, min(sizeof(saddr), nam->sa_len)); + auth_len = ((((cred->cr_ngroups - 1) > nmp->nm_numgrps) ? + nmp->nm_numgrps : (cred->cr_ngroups - 1)) << 2) + + 5 * NFSX_UNSIGNED; + nfsm_chain_null(&nmreq); + nfsm_chain_null(&nmrep); + + /* send portmap request to get mountd port */ + saddr.sin_port = htons(PMAPPORT); + nfsm_chain_build_alloc_init(error, &nmreq, 4*NFSX_UNSIGNED); + nfsm_chain_add_32(error, &nmreq, RPCPROG_MNT); + nfsm_chain_add_32(error, &nmreq, RPCMNT_VER1); + nfsm_chain_add_32(error, &nmreq, IPPROTO_UDP); + nfsm_chain_add_32(error, &nmreq, 0); + nfsm_chain_build_done(error, &nmreq); + nfsmout_if(error); + error = nfsm_rpchead2(SOCK_DGRAM, PMAPPROG, PMAPVERS, PMAPPROC_GETPORT, + RPCAUTH_SYS, auth_len, cred, NULL, nmreq.nmc_mhead, &xid, &mreq); + nfsmout_if(error); + nmreq.nmc_mhead = NULL; + error = nfs_aux_request(nmp, thd, &saddr, mreq, R_XID32(xid), 0, timeo, &nmrep); + nfsmout_if(error); + + /* grab mountd port from portmap response */ + nfsm_chain_get_32(error, &nmrep, mntport); + nfsmout_if(error); + nfsm_chain_cleanup(&nmreq); + nfsm_chain_cleanup(&nmrep); + xid = 0; + + /* MOUNT protocol UNMOUNT request */ + saddr.sin_port = htons(mntport); + path = &vfs_statfs(nmp->nm_mountp)->f_mntfromname[0]; + while (*path && (*path != '/')) + path++; + slen = strlen(path); + nfsm_chain_build_alloc_init(error, &nmreq, NFSX_UNSIGNED + nfsm_rndup(slen)); + nfsm_chain_add_string(error, &nmreq, path, slen); + nfsm_chain_build_done(error, &nmreq); + nfsmout_if(error); + error = nfsm_rpchead2(SOCK_DGRAM, RPCPROG_MNT, RPCMNT_VER1, RPCMNT_UMOUNT, + RPCAUTH_SYS, auth_len, cred, NULL, nmreq.nmc_mhead, &xid, &mreq); + nfsmout_if(error); + nmreq.nmc_mhead = NULL; + error = nfs_aux_request(nmp, thd, &saddr, mreq, R_XID32(xid), 1, timeo, &nmrep); +nfsmout: + nfsm_chain_cleanup(&nmreq); + nfsm_chain_cleanup(&nmrep); +} /* * unmount system call */ -static int +int nfs_vfs_unmount( mount_t mp, int mntflags, @@ -1820,6 +1953,8 @@ nfs_vfs_unmount( struct nfsreq *req, *treq; struct nfs_reqqhead iodq; struct timespec ts = { 1, 0 }; + struct nfs_open_owner *noop, *nextnoop; + nfsnode_t np; nmp = VFSTONFS(mp); lck_mtx_lock(&nmp->nm_lock); @@ -1843,7 +1978,7 @@ nfs_vfs_unmount( */ vp = NFSTOV(nmp->nm_dnp); lck_mtx_unlock(&nmp->nm_lock); - + /* * vflush will check for busy vnodes on mountpoint. * Will do the right thing for MNT_FORCE. That is, we should @@ -1862,12 +1997,16 @@ nfs_vfs_unmount( lck_mtx_lock(&nmp->nm_lock); nmp->nm_state &= ~NFSSTA_MOUNTED; + nmp->nm_dnp = NULL; lck_mtx_unlock(&nmp->nm_lock); /* * Release the root vnode reference held by mountnfs() */ + error = vnode_get(vp); vnode_rele(vp); + if (!error) + vnode_put(vp); vflush(mp, NULLVP, FORCECLOSE); @@ -1877,29 +2016,47 @@ nfs_vfs_unmount( if (!TAILQ_EMPTY(&nmp->nm_gsscl)) nfs_gss_clnt_ctx_unmount(nmp, mntflags); - vfs_setfsprivate(mp, 0); /* don't want to end up using stale vp */ - /* mark the socket for termination */ lck_mtx_lock(&nmp->nm_lock); nmp->nm_sockflags |= NMSOCK_UNMOUNT; + /* stop callbacks */ + if ((nmp->nm_vers >= NFS_VER4) && nmp->nm_cbid) + nfs4_mount_callback_shutdown(nmp); + /* wait for any socket poking to complete */ while (nmp->nm_sockflags & NMSOCK_POKE) msleep(&nmp->nm_sockflags, &nmp->nm_lock, PZERO-1, "nfswaitpoke", &ts); + /* Have the socket thread send the unmount RPC, if requested/appropriate. */ + if ((nmp->nm_vers < NFS_VER4) && !(mntflags & MNT_FORCE) && (nmp->nm_flag & NFSMNT_CALLUMNT)) + nfs_mount_sock_thread_wake(nmp); + /* wait for the socket thread to terminate */ while (nmp->nm_sockthd) { wakeup(&nmp->nm_sockthd); msleep(&nmp->nm_sockthd, &nmp->nm_lock, PZERO-1, "nfswaitsockthd", &ts); } - /* tear down the socket */ lck_mtx_unlock(&nmp->nm_lock); + + /* tear down the socket */ nfs_disconnect(nmp); + + vfs_setfsprivate(mp, NULL); + lck_mtx_lock(&nmp->nm_lock); + if ((nmp->nm_vers >= NFS_VER4) && nmp->nm_cbid) { + /* clear out any pending recall requests */ + while ((np = TAILQ_FIRST(&nmp->nm_recallq))) { + TAILQ_REMOVE(&nmp->nm_recallq, np, n_dlink); + np->n_dlink.tqe_next = NFSNOLIST; + } + } + /* cancel any renew timer */ - if (nmp->nm_renew_timer) { + if ((nmp->nm_vers >= NFS_VER4) && nmp->nm_renew_timer) { thread_call_cancel(nmp->nm_renew_timer); thread_call_free(nmp->nm_renew_timer); } @@ -1907,9 +2064,19 @@ nfs_vfs_unmount( mbuf_freem(nmp->nm_nam); lck_mtx_unlock(&nmp->nm_lock); - if (!(nmp->nm_flag & (NFSMNT_NOLOCKS|NFSMNT_LOCALLOCKS))) + if ((nmp->nm_vers < NFS_VER4) && !(nmp->nm_flag & (NFSMNT_NOLOCKS|NFSMNT_LOCALLOCKS))) nfs_lockd_mount_change(-1); + if ((nmp->nm_vers >= NFS_VER4) && nmp->nm_longid) { + /* remove/deallocate the client ID data */ + lck_mtx_lock(nfs_global_mutex); + TAILQ_REMOVE(&nfsclientids, nmp->nm_longid, nci_link); + if (nmp->nm_longid->nci_id) + FREE(nmp->nm_longid->nci_id, M_TEMP); + FREE(nmp->nm_longid, M_TEMP); + lck_mtx_unlock(nfs_global_mutex); + } + /* * Loop through outstanding request list and remove dangling * references to defunct nfsmount struct @@ -1928,13 +2095,17 @@ nfs_vfs_unmount( TAILQ_INSERT_TAIL(&iodq, req, r_achain); lck_mtx_unlock(nfsiod_mutex); } + lck_mtx_lock(&req->r_mtx); lck_mtx_lock(&nmp->nm_lock); - if (req->r_rchain.tqe_next != NFSREQNOLIST) { - TAILQ_REMOVE(&nmp->nm_resendq, req, r_rchain); - req->r_rchain.tqe_next = NFSREQNOLIST; + if (req->r_flags & R_RESENDQ) { + if (req->r_rchain.tqe_next != NFSREQNOLIST) { + TAILQ_REMOVE(&nmp->nm_resendq, req, r_rchain); + req->r_rchain.tqe_next = NFSREQNOLIST; + } req->r_flags &= ~R_RESENDQ; } lck_mtx_unlock(&nmp->nm_lock); + lck_mtx_unlock(&req->r_mtx); wakeup(req); } } @@ -1955,6 +2126,21 @@ nfs_vfs_unmount( req->r_callback.rcb_func(req); } + /* clean up open owner list */ + if (nmp->nm_vers >= NFS_VER4) { + lck_mtx_lock(&nmp->nm_lock); + TAILQ_FOREACH_SAFE(noop, &nmp->nm_open_owners, noo_link, nextnoop) { + TAILQ_REMOVE(&nmp->nm_open_owners, noop, noo_link); + noop->noo_flags &= ~NFS_OPEN_OWNER_LINK; + if (noop->noo_refcnt) + continue; + nfs_open_owner_destroy(noop); + } + lck_mtx_unlock(&nmp->nm_lock); + if (IS_VALID_CRED(nmp->nm_mcred)) + kauth_cred_unref(&nmp->nm_mcred); + } + lck_mtx_destroy(&nmp->nm_lock, nfs_mount_grp); FREE_ZONE((caddr_t)nmp, sizeof (struct nfsmount), M_NFSMNT); return (0); @@ -1963,15 +2149,17 @@ nfs_vfs_unmount( /* * Return root of a filesystem */ -static int +int nfs_vfs_root(mount_t mp, vnode_t *vpp, __unused vfs_context_t ctx) { vnode_t vp; struct nfsmount *nmp; int error; - u_long vpid; + u_int32_t vpid; nmp = VFSTONFS(mp); + if (!nmp || !nmp->nm_dnp) + return (ENXIO); vp = NFSTOV(nmp->nm_dnp); vpid = vnode_vid(vp); while ((error = vnode_getwithvid(vp, vpid))) { @@ -1989,7 +2177,7 @@ nfs_vfs_root(mount_t mp, vnode_t *vpp, __unused vfs_context_t ctx) * Do operations associated with quotas */ #if !QUOTA -static int +int nfs_vfs_quotactl( __unused mount_t mp, __unused int cmds, @@ -2000,90 +2188,9 @@ nfs_vfs_quotactl( return (ENOTSUP); } #else -static int -nfs_aux_request( - struct nfsmount *nmp, - thread_t thd, - struct sockaddr_in *saddr, - mbuf_t mreq, - uint32_t xid, - int timeo, - struct nfsm_chain *nmrep) -{ - int error = 0, on = 1, try, sendat = 2; - socket_t so = NULL; - struct timeval tv = { 1, 0 }; - mbuf_t m, mrep = NULL; - struct msghdr msg; - uint32_t rxid, reply, reply_status, rejected_status; - uint32_t verf_type, verf_len, accepted_status; - size_t readlen; - - /* create socket and set options */ - if (((error = sock_socket(saddr->sin_family, SOCK_DGRAM, IPPROTO_UDP, NULL, NULL, &so))) || - ((error = sock_setsockopt(so, SOL_SOCKET, SO_RCVTIMEO, &tv, sizeof(tv)))) || - ((error = sock_setsockopt(so, SOL_SOCKET, SO_SNDTIMEO, &tv, sizeof(tv)))) || - ((error = sock_setsockopt(so, SOL_SOCKET, SO_NOADDRERR, &on, sizeof(on))))) - goto nfsmout; - - for (try=0; try < timeo; try++) { - if ((error = nfs_sigintr(nmp, NULL, thd, 0))) - break; - if (!try || (try == sendat)) { - /* send the request (resending periodically) */ - if ((error = mbuf_copym(mreq, 0, MBUF_COPYALL, MBUF_WAITOK, &m))) - goto nfsmout; - bzero(&msg, sizeof(msg)); - msg.msg_name = saddr; - msg.msg_namelen = saddr->sin_len; - if ((error = sock_sendmbuf(so, &msg, m, 0, NULL))) - goto nfsmout; - sendat *= 2; - if (sendat > 30) - sendat = 30; - } - /* wait for the response */ - readlen = 1<<18; - bzero(&msg, sizeof(msg)); - error = sock_receivembuf(so, &msg, &mrep, 0, &readlen); - if (error == EWOULDBLOCK) - continue; - nfsmout_if(error); - /* parse the response */ - nfsm_chain_dissect_init(error, nmrep, mrep); - nfsm_chain_get_32(error, nmrep, rxid); - nfsm_chain_get_32(error, nmrep, reply); - nfsmout_if(error); - if ((rxid != xid) || (reply != RPC_REPLY)) - error = EBADRPC; - nfsm_chain_get_32(error, nmrep, reply_status); - nfsmout_if(error); - if (reply_status == RPC_MSGDENIED) { - nfsm_chain_get_32(error, nmrep, rejected_status); - nfsmout_if(error); - error = (rejected_status == RPC_MISMATCH) ? ENOTSUP : EACCES; - goto nfsmout; - } - nfsm_chain_get_32(error, nmrep, verf_type); /* verifier flavor */ - nfsm_chain_get_32(error, nmrep, verf_len); /* verifier length */ - nfsmout_if(error); - if (verf_len) - nfsm_chain_adv(error, nmrep, nfsm_rndup(verf_len)); - nfsm_chain_get_32(error, nmrep, accepted_status); - nfsm_assert(error, (accepted_status == RPC_SUCCESS), EIO); - break; - } -nfsmout: - if (so) { - sock_shutdown(so, SHUT_RDWR); - sock_close(so); - } - mbuf_freem(mreq); - return (error); -} -static int -nfs3_getquota(struct nfsmount *nmp, vfs_context_t ctx, u_long id, int type, struct dqblk *dqb) +int +nfs3_getquota(struct nfsmount *nmp, vfs_context_t ctx, uid_t id, int type, struct dqblk *dqb) { int error = 0, auth_len, slen, timeo; int rqvers = (type == GRPQUOTA) ? RPCRQUOTA_EXT_VER : RPCRQUOTA_VER; @@ -2093,7 +2200,7 @@ nfs3_getquota(struct nfsmount *nmp, vfs_context_t ctx, u_long id, int type, stru uint64_t xid = 0; struct nfsm_chain nmreq, nmrep; mbuf_t mreq; - uint32_t val = 0, bsize; + uint32_t val = 0, bsize = 0; struct sockaddr *nam = mbuf_data(nmp->nm_nam); struct sockaddr_in saddr; struct timeval now; @@ -2126,7 +2233,7 @@ nfs3_getquota(struct nfsmount *nmp, vfs_context_t ctx, u_long id, int type, stru RPCAUTH_SYS, auth_len, cred, NULL, nmreq.nmc_mhead, &xid, &mreq); nfsmout_if(error); nmreq.nmc_mhead = NULL; - error = nfs_aux_request(nmp, thd, &saddr, mreq, R_XID32(xid), timeo, &nmrep); + error = nfs_aux_request(nmp, thd, &saddr, mreq, R_XID32(xid), 0, timeo, &nmrep); nfsmout_if(error); /* grab rquota port from portmap response */ @@ -2157,7 +2264,7 @@ nfs3_getquota(struct nfsmount *nmp, vfs_context_t ctx, u_long id, int type, stru RPCAUTH_SYS, auth_len, cred, NULL, nmreq.nmc_mhead, &xid, &mreq); nfsmout_if(error); nmreq.nmc_mhead = NULL; - error = nfs_aux_request(nmp, thd, &saddr, mreq, R_XID32(xid), timeo, &nmrep); + error = nfs_aux_request(nmp, thd, &saddr, mreq, R_XID32(xid), 0, timeo, &nmrep); nfsmout_if(error); /* parse rquota response */ @@ -2194,8 +2301,8 @@ nfs3_getquota(struct nfsmount *nmp, vfs_context_t ctx, u_long id, int type, stru return (error); } -static int -nfs4_getquota(struct nfsmount *nmp, vfs_context_t ctx, u_long id, int type, struct dqblk *dqb) +int +nfs4_getquota(struct nfsmount *nmp, vfs_context_t ctx, uid_t id, int type, struct dqblk *dqb) { nfsnode_t np; int error = 0, status, nfsvers, numops; @@ -2233,7 +2340,9 @@ nfs4_getquota(struct nfsmount *nmp, vfs_context_t ctx, u_long id, int type, stru nfsvers = nmp->nm_vers; np = nmp->nm_dnp; - if ((error = vnode_get(NFSTOV(np)))) { + if (!np) + error = ENXIO; + if (error || ((error = vnode_get(NFSTOV(np))))) { kauth_cred_unref(&cred); return(error); } @@ -2277,7 +2386,7 @@ nfs4_getquota(struct nfsmount *nmp, vfs_context_t ctx, u_long id, int type, stru return (error); } -static int +int nfs_vfs_quotactl(mount_t mp, int cmds, uid_t uid, caddr_t datap, vfs_context_t ctx) { struct nfsmount *nmp; @@ -2326,6 +2435,7 @@ nfs_vfs_quotactl(mount_t mp, int cmds, uid_t uid, caddr_t datap, vfs_context_t c /* * Flush out the buffer cache */ +int nfs_sync_callout(vnode_t, void *); struct nfs_sync_cargs { thread_t thd; @@ -2333,7 +2443,7 @@ struct nfs_sync_cargs { int error; }; -static int +int nfs_sync_callout(vnode_t vp, void *arg) { struct nfs_sync_cargs *cargs = (struct nfs_sync_cargs*)arg; @@ -2353,7 +2463,7 @@ nfs_sync_callout(vnode_t vp, void *arg) return (VNODE_RETURNED); } -static int +int nfs_vfs_sync(mount_t mp, int waitfor, vfs_context_t ctx) { struct nfs_sync_cargs cargs; @@ -2372,7 +2482,7 @@ nfs_vfs_sync(mount_t mp, int waitfor, vfs_context_t ctx) * Currently unsupported. */ /*ARGSUSED*/ -static int +int nfs_vfs_vget( __unused mount_t mp, __unused ino64_t ino, @@ -2387,7 +2497,7 @@ nfs_vfs_vget( * At this point, this should never happen */ /*ARGSUSED*/ -static int +int nfs_vfs_fhtovp( __unused mount_t mp, __unused int fhlen, @@ -2403,7 +2513,7 @@ nfs_vfs_fhtovp( * Vnode pointer to File handle, should never happen either */ /*ARGSUSED*/ -static int +int nfs_vfs_vptofh( __unused vnode_t vp, __unused int *fhlenp, @@ -2418,7 +2528,7 @@ nfs_vfs_vptofh( * Vfs start routine, a no-op. */ /*ARGSUSED*/ -static int +int nfs_vfs_start( __unused mount_t mp, __unused int flags, @@ -2431,14 +2541,13 @@ nfs_vfs_start( /* * Do that sysctl thang... */ -static int +int nfs_vfs_sysctl(int *name, u_int namelen, user_addr_t oldp, size_t *oldlenp, user_addr_t newp, size_t newlen, vfs_context_t ctx) { int error = 0, val, softnobrowse; struct sysctl_req *req = NULL; - struct vfsidctl vc; - struct user_vfsidctl user_vc; + union union_vfsidctl vc; mount_t mp; struct nfsmount *nmp = NULL; struct vfsquery vq; @@ -2471,17 +2580,10 @@ nfs_vfs_sysctl(int *name, u_int namelen, user_addr_t oldp, size_t *oldlenp, case VFS_CTL_QUERY: case VFS_CTL_NOLOCKS: req = CAST_DOWN(struct sysctl_req *, oldp); - if (is_64_bit) { - error = SYSCTL_IN(req, &user_vc, sizeof(user_vc)); - if (error) - return (error); - mp = vfs_getvfs(&user_vc.vc_fsid); - } else { - error = SYSCTL_IN(req, &vc, sizeof(vc)); - if (error) - return (error); - mp = vfs_getvfs(&vc.vc_fsid); - } + error = SYSCTL_IN(req, &vc, is_64_bit? sizeof(vc.vc64):sizeof(vc.vc32)); + if (error) + return (error); + mp = vfs_getvfs(&vc.vc32.vc_fsid); /* works for 32 and 64 */ if (mp == NULL) return (ENOENT); nmp = VFSTONFS(mp); @@ -2490,11 +2592,11 @@ nfs_vfs_sysctl(int *name, u_int namelen, user_addr_t oldp, size_t *oldlenp, bzero(&vq, sizeof(vq)); req->newidx = 0; if (is_64_bit) { - req->newptr = user_vc.vc_ptr; - req->newlen = (size_t)user_vc.vc_len; + req->newptr = vc.vc64.vc_ptr; + req->newlen = (size_t)vc.vc64.vc_len; } else { - req->newptr = CAST_USER_ADDR_T(vc.vc_ptr); - req->newlen = vc.vc_len; + req->newptr = CAST_USER_ADDR_T(vc.vc32.vc_ptr); + req->newlen = vc.vc32.vc_len; } } @@ -2795,11 +2897,16 @@ nfs_vfs_sysctl(int *name, u_int namelen, user_addr_t oldp, size_t *oldlenp, lck_mtx_lock(&nmp->nm_lock); /* XXX don't allow users to know about/disconnect unresponsive, soft, nobrowse mounts */ softnobrowse = ((nmp->nm_flag & NFSMNT_SOFT) && (vfs_flags(nmp->nm_mountp) & MNT_DONTBROWSE)); - if (!softnobrowse && (nmp->nm_state & (NFSSTA_TIMEO|NFSSTA_JUKEBOXTIMEO))) + if (!softnobrowse && (nmp->nm_state & NFSSTA_TIMEO)) + vq.vq_flags |= VQ_NOTRESP; + if (!softnobrowse && (nmp->nm_state & NFSSTA_JUKEBOXTIMEO) && + !(nmp->nm_flag & NFSMNT_MUTEJUKEBOX)) vq.vq_flags |= VQ_NOTRESP; - if (!softnobrowse && !(nmp->nm_flag & (NFSMNT_NOLOCKS|NFSMNT_LOCALLOCKS)) && - (nmp->nm_state & NFSSTA_LOCKTIMEO)) + if (!softnobrowse && (nmp->nm_state & NFSSTA_LOCKTIMEO) && + !(nmp->nm_flag & (NFSMNT_NOLOCKS|NFSMNT_LOCALLOCKS))) vq.vq_flags |= VQ_NOTRESP; + if (nmp->nm_state & NFSSTA_DEAD) + vq.vq_flags |= VQ_DEAD; lck_mtx_unlock(&nmp->nm_lock); error = SYSCTL_OUT(req, &vq, sizeof(vq)); break; diff --git a/bsd/nfs/nfs_vnops.c b/bsd/nfs/nfs_vnops.c index 47c461076..00199a6df 100644 --- a/bsd/nfs/nfs_vnops.c +++ b/bsd/nfs/nfs_vnops.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2007 Apple Inc. All rights reserved. + * Copyright (c) 2000-2009 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -112,48 +112,50 @@ #include #include #include + #include +#include #include #include -#include /* * NFS vnode ops */ -static int nfs_vnop_lookup(struct vnop_lookup_args *); -static int nfsspec_vnop_read(struct vnop_read_args *); -static int nfsspec_vnop_write(struct vnop_write_args *); -static int nfsspec_vnop_close(struct vnop_close_args *); +int nfs_vnop_lookup(struct vnop_lookup_args *); +int nfsspec_vnop_read(struct vnop_read_args *); +int nfsspec_vnop_write(struct vnop_write_args *); +int nfsspec_vnop_close(struct vnop_close_args *); #if FIFO -static int nfsfifo_vnop_read(struct vnop_read_args *); -static int nfsfifo_vnop_write(struct vnop_write_args *); -static int nfsfifo_vnop_close(struct vnop_close_args *); +int nfsfifo_vnop_read(struct vnop_read_args *); +int nfsfifo_vnop_write(struct vnop_write_args *); +int nfsfifo_vnop_close(struct vnop_close_args *); #endif -static int nfs_vnop_ioctl(struct vnop_ioctl_args *); -static int nfs_vnop_select(struct vnop_select_args *); -static int nfs_vnop_setattr(struct vnop_setattr_args *); -static int nfs_vnop_read(struct vnop_read_args *); -static int nfs_vnop_mmap(struct vnop_mmap_args *); -static int nfs_vnop_fsync(struct vnop_fsync_args *); -static int nfs_vnop_remove(struct vnop_remove_args *); -static int nfs_vnop_rename(struct vnop_rename_args *); -static int nfs_vnop_readdir(struct vnop_readdir_args *); -static int nfs_vnop_readlink(struct vnop_readlink_args *); -static int nfs_vnop_pathconf(struct vnop_pathconf_args *); -static int nfs_vnop_pagein(struct vnop_pagein_args *); -static int nfs_vnop_pageout(struct vnop_pageout_args *); -static int nfs_vnop_blktooff(struct vnop_blktooff_args *); -static int nfs_vnop_offtoblk(struct vnop_offtoblk_args *); -static int nfs_vnop_blockmap(struct vnop_blockmap_args *); - -static int nfs3_vnop_create(struct vnop_create_args *); -static int nfs3_vnop_mknod(struct vnop_mknod_args *); -static int nfs3_vnop_getattr(struct vnop_getattr_args *); -static int nfs3_vnop_link(struct vnop_link_args *); -static int nfs3_vnop_mkdir(struct vnop_mkdir_args *); -static int nfs3_vnop_rmdir(struct vnop_rmdir_args *); -static int nfs3_vnop_symlink(struct vnop_symlink_args *); +int nfs_vnop_ioctl(struct vnop_ioctl_args *); +int nfs_vnop_select(struct vnop_select_args *); +int nfs_vnop_setattr(struct vnop_setattr_args *); +int nfs_vnop_read(struct vnop_read_args *); +int nfs_vnop_write(struct vnop_write_args *); +int nfs_vnop_mmap(struct vnop_mmap_args *); +int nfs_vnop_fsync(struct vnop_fsync_args *); +int nfs_vnop_remove(struct vnop_remove_args *); +int nfs_vnop_rename(struct vnop_rename_args *); +int nfs_vnop_readdir(struct vnop_readdir_args *); +int nfs_vnop_readlink(struct vnop_readlink_args *); +int nfs_vnop_pathconf(struct vnop_pathconf_args *); +int nfs_vnop_pagein(struct vnop_pagein_args *); +int nfs_vnop_pageout(struct vnop_pageout_args *); +int nfs_vnop_blktooff(struct vnop_blktooff_args *); +int nfs_vnop_offtoblk(struct vnop_offtoblk_args *); +int nfs_vnop_blockmap(struct vnop_blockmap_args *); + +int nfs3_vnop_create(struct vnop_create_args *); +int nfs3_vnop_mknod(struct vnop_mknod_args *); +int nfs3_vnop_getattr(struct vnop_getattr_args *); +int nfs3_vnop_link(struct vnop_link_args *); +int nfs3_vnop_mkdir(struct vnop_mkdir_args *); +int nfs3_vnop_rmdir(struct vnop_rmdir_args *); +int nfs3_vnop_symlink(struct vnop_symlink_args *); vnop_t **nfsv2_vnodeop_p; static struct vnodeopv_entry_desc nfsv2_vnodeop_entries[] = { @@ -209,12 +211,13 @@ static struct vnodeopv_entry_desc nfsv4_vnodeop_entries[] = { { &vnop_access_desc, (vnop_t *)nfs_vnop_access }, /* access */ { &vnop_getattr_desc, (vnop_t *)nfs4_vnop_getattr }, /* getattr */ { &vnop_setattr_desc, (vnop_t *)nfs_vnop_setattr }, /* setattr */ - { &vnop_read_desc, (vnop_t *)nfs_vnop_read }, /* read */ + { &vnop_read_desc, (vnop_t *)nfs4_vnop_read }, /* read */ { &vnop_write_desc, (vnop_t *)nfs_vnop_write }, /* write */ { &vnop_ioctl_desc, (vnop_t *)nfs_vnop_ioctl }, /* ioctl */ { &vnop_select_desc, (vnop_t *)nfs_vnop_select }, /* select */ { &vnop_revoke_desc, (vnop_t *)nfs_vnop_revoke }, /* revoke */ - { &vnop_mmap_desc, (vnop_t *)nfs_vnop_mmap }, /* mmap */ + { &vnop_mmap_desc, (vnop_t *)nfs4_vnop_mmap }, /* mmap */ + { &vnop_mnomap_desc, (vnop_t *)nfs4_vnop_mnomap }, /* mnomap */ { &vnop_fsync_desc, (vnop_t *)nfs_vnop_fsync }, /* fsync */ { &vnop_remove_desc, (vnop_t *)nfs_vnop_remove }, /* remove */ { &vnop_link_desc, (vnop_t *)nfs4_vnop_link }, /* link */ @@ -410,7 +413,7 @@ struct vnodeopv_desc fifo_nfsv4nodeop_opv_desc = #endif /* FIFO */ -static int nfs_sillyrename(nfsnode_t,nfsnode_t,struct componentname *,vfs_context_t); +int nfs_sillyrename(nfsnode_t,nfsnode_t,struct componentname *,vfs_context_t); /* * Find the slot in the access cache for this UID. @@ -435,9 +438,9 @@ nfs_node_mode_slot(nfsnode_t np, uid_t uid, int add) } int -nfs3_access_rpc(nfsnode_t np, u_long *mode, vfs_context_t ctx) +nfs3_access_rpc(nfsnode_t np, u_int32_t *mode, vfs_context_t ctx) { - int error = 0, status, slot; + int error = 0, lockerror = ENOENT, status, slot; uint32_t access = 0; u_int64_t xid; struct nfsm_chain nmreq, nmrep; @@ -454,6 +457,8 @@ nfs3_access_rpc(nfsnode_t np, u_long *mode, vfs_context_t ctx) nfsmout_if(error); error = nfs_request(np, NULL, &nmreq, NFSPROC_ACCESS, ctx, &nmrep, &xid, &status); + if ((lockerror = nfs_node_lock(np))) + error = lockerror; nfsm_chain_postop_attr_update(error, &nmrep, np, &xid); if (!error) error = status; @@ -480,6 +485,8 @@ nfs3_access_rpc(nfsnode_t np, u_long *mode, vfs_context_t ctx) /* pass back the mode returned with this request */ *mode = np->n_mode[slot]; nfsmout: + if (!lockerror) + nfs_node_unlock(np); nfsm_chain_cleanup(&nmreq); nfsm_chain_cleanup(&nmrep); return (error); @@ -503,7 +510,7 @@ nfs_vnop_access( vfs_context_t ctx = ap->a_context; vnode_t vp = ap->a_vp; int error = 0, slot, dorpc; - u_long mode, wmode; + u_int32_t mode, wmode; nfsnode_t np = VTONFS(vp); struct nfsmount *nmp; int nfsvers; @@ -583,7 +590,7 @@ nfs_vnop_access( wmode = mode; } - if ((error = nfs_lock(np, NFS_NODE_LOCK_EXCLUSIVE))) + if ((error = nfs_node_lock(np))) return (error); /* @@ -593,23 +600,28 @@ nfs_vnop_access( uid = kauth_cred_getuid(vfs_context_ucred(ctx)); slot = nfs_node_mode_slot(np, uid, 0); dorpc = 1; - if (NMODEVALID(np, slot)) { + if (mode == 0) { + /* not asking for any rights understood by NFS, so don't bother doing an RPC */ + /* OSAddAtomic(1, &nfsstats.accesscache_hits); */ + dorpc = 0; + wmode = 0; + } else if (NMODEVALID(np, slot)) { microuptime(&now); if ((now.tv_sec < (np->n_modestamp[slot] + nfs_access_cache_timeout)) && ((np->n_mode[slot] & mode) == mode)) { - /* OSAddAtomic(1, (SInt32*)&nfsstats.accesscache_hits); */ + /* OSAddAtomic(1, &nfsstats.accesscache_hits); */ dorpc = 0; wmode = np->n_mode[slot]; } } + nfs_node_unlock(np); if (dorpc) { /* Either a no, or a don't know. Go to the wire. */ - /* OSAddAtomic(1, (SInt32*)&nfsstats.accesscache_misses); */ + /* OSAddAtomic(1, &nfsstats.accesscache_misses); */ error = nmp->nm_funcs->nf_access_rpc(np, &wmode, ctx); } if (!error && ((wmode & mode) != mode)) error = EACCES; - nfs_unlock(np); return (error); } @@ -632,79 +644,41 @@ nfs3_vnop_open( struct nfsmount *nmp; struct nfs_vattr nvattr; enum vtype vtype; - int error, nfsvers; + int error; nmp = VTONMP(vp); if (!nmp) return (ENXIO); - nfsvers = nmp->nm_vers; vtype = vnode_vtype(vp); if ((vtype != VREG) && (vtype != VDIR) && (vtype != VLNK)) return (EACCES); if (ISSET(np->n_flag, NUPDATESIZE)) nfs_data_update_size(np, 0); - if ((error = nfs_lock(np, NFS_NODE_LOCK_EXCLUSIVE))) + if ((error = nfs_node_lock(np))) return (error); if (np->n_flag & NNEEDINVALIDATE) { np->n_flag &= ~NNEEDINVALIDATE; - nfs_unlock(np); + if (vtype == VDIR) + nfs_invaldir(np); + nfs_node_unlock(np); nfs_vinvalbuf(vp, V_SAVE|V_IGNORE_WRITEERR, ctx, 1); - if ((error = nfs_lock(np, NFS_NODE_LOCK_EXCLUSIVE))) + if ((error = nfs_node_lock(np))) return (error); } + if (vnode_vtype(NFSTOV(np)) == VREG) + np->n_lastrahead = -1; if (np->n_flag & NMODIFIED) { - nfs_unlock(np); - if ((error = nfs_vinvalbuf(vp, V_SAVE, ctx, 1)) == EINTR) - return (error); - if ((error = nfs_lock(np, NFS_NODE_LOCK_EXCLUSIVE))) - return (error); if (vtype == VDIR) - np->n_direofoffset = 0; - NATTRINVALIDATE(np); /* For Open/Close consistency */ - error = nfs_getattr(np, &nvattr, ctx, 1); - if (error) { - nfs_unlock(np); + nfs_invaldir(np); + nfs_node_unlock(np); + if ((error = nfs_vinvalbuf(vp, V_SAVE|V_IGNORE_WRITEERR, ctx, 1))) return (error); - } - if (vtype == VDIR) { - /* if directory changed, purge any name cache entries */ - if (NFS_CHANGED_NC(nfsvers, np, &nvattr)) { - np->n_flag &= ~NNEGNCENTRIES; - cache_purge(vp); - } - NFS_CHANGED_UPDATE_NC(nfsvers, np, &nvattr); - } - NFS_CHANGED_UPDATE(nfsvers, np, &nvattr); } else { - NATTRINVALIDATE(np); /* For Open/Close consistency */ - error = nfs_getattr(np, &nvattr, ctx, 1); - if (error) { - nfs_unlock(np); - return (error); - } - if (NFS_CHANGED(nfsvers, np, &nvattr)) { - if (vtype == VDIR) { - np->n_direofoffset = 0; - nfs_invaldir(np); - /* purge name cache entries */ - if (NFS_CHANGED_NC(nfsvers, np, &nvattr)) { - np->n_flag &= ~NNEGNCENTRIES; - cache_purge(vp); - } - } - nfs_unlock(np); - if ((error = nfs_vinvalbuf(vp, V_SAVE, ctx, 1)) == EINTR) - return (error); - if ((error = nfs_lock(np, NFS_NODE_LOCK_EXCLUSIVE))) - return (error); - if (vtype == VDIR) - NFS_CHANGED_UPDATE_NC(nfsvers, np, &nvattr); - NFS_CHANGED_UPDATE(nfsvers, np, &nvattr); - } + nfs_node_unlock(np); } - nfs_unlock(np); - return (0); + /* nfs_getattr() will check changed and purge caches */ + return (nfs_getattr(np, &nvattr, ctx, NGA_UNCACHED)); } /* @@ -754,31 +728,31 @@ nfs3_vnop_close( if (ISSET(np->n_flag, NUPDATESIZE)) nfs_data_update_size(np, 0); - if ((error = nfs_lock(np, NFS_NODE_LOCK_EXCLUSIVE))) + if ((error = nfs_node_lock(np))) return (error); if (np->n_flag & NNEEDINVALIDATE) { np->n_flag &= ~NNEEDINVALIDATE; - nfs_unlock(np); + nfs_node_unlock(np); nfs_vinvalbuf(vp, V_SAVE|V_IGNORE_WRITEERR, ctx, 1); - if ((error = nfs_lock(np, NFS_NODE_LOCK_EXCLUSIVE))) + if ((error = nfs_node_lock(np))) return (error); } if (np->n_flag & NMODIFIED) { - nfs_unlock(np); + nfs_node_unlock(np); if (nfsvers != NFS_VER2) error = nfs_flush(np, MNT_WAIT, vfs_context_thread(ctx), 0); else error = nfs_vinvalbuf(vp, V_SAVE, ctx, 1); if (error) return (error); - nfs_lock(np, NFS_NODE_LOCK_FORCE); + nfs_node_lock_force(np); NATTRINVALIDATE(np); } if (np->n_flag & NWRITEERR) { np->n_flag &= ~NWRITEERR; error = np->n_error; } - nfs_unlock(np); + nfs_node_unlock(np); return (error); } @@ -824,40 +798,60 @@ nfs3_getattr_rpc( int -nfs_getattr(nfsnode_t np, struct nfs_vattr *nvap, vfs_context_t ctx, int alreadylocked) +nfs_getattr(nfsnode_t np, struct nfs_vattr *nvap, vfs_context_t ctx, int uncached) { struct nfsmount *nmp; - int error = 0, lockerror = ENOENT, nfsvers, avoidfloods; + int error = 0, nfsvers, inprogset = 0, wanted = 0, avoidfloods; + struct timespec ts = { 2, 0 }; u_int64_t xid; FSDBG_TOP(513, np->n_size, np, np->n_vattr.nva_size, np->n_flag); + if (!(nmp = NFSTONMP(np))) + return (ENXIO); + nfsvers = nmp->nm_vers; + /* Update local times for special files. */ if (np->n_flag & (NACC | NUPD)) { - if (!alreadylocked) - nfs_lock(np, NFS_NODE_LOCK_FORCE); + nfs_node_lock_force(np); np->n_flag |= NCHG; - if (!alreadylocked) - nfs_unlock(np); + nfs_node_unlock(np); } /* Update size, if necessary */ - if (!alreadylocked && ISSET(np->n_flag, NUPDATESIZE)) + if (ISSET(np->n_flag, NUPDATESIZE)) nfs_data_update_size(np, 0); - /* - * First look in the cache. - */ - if ((error = nfs_getattrcache(np, nvap, alreadylocked)) == 0) - goto nfsmout; - if (error != ENOENT) - goto nfsmout; + error = nfs_node_lock(np); + nfsmout_if(error); + if (!uncached) { + while (1) { + error = nfs_getattrcache(np, nvap); + if (!error || (error != ENOENT)) { + nfs_node_unlock(np); + goto nfsmout; + } + if (!ISSET(np->n_flag, NGETATTRINPROG)) + break; + SET(np->n_flag, NGETATTRWANT); + msleep(np, &np->n_lock, PZERO-1, "nfsgetattrwant", &ts); + if ((error = nfs_sigintr(NFSTONMP(np), NULL, vfs_context_thread(ctx), 0))) { + nfs_node_unlock(np); + goto nfsmout; + } + } + SET(np->n_flag, NGETATTRINPROG); + inprogset = 1; + } else if (!ISSET(np->n_flag, NGETATTRINPROG)) { + SET(np->n_flag, NGETATTRINPROG); + inprogset = 1; + } + nfs_node_unlock(np); nmp = NFSTONMP(np); if (!nmp) { error = ENXIO; goto nfsmout; } - nfsvers = nmp->nm_vers; /* * Try to get both the attributes and access info by making an @@ -866,69 +860,79 @@ nfs_getattr(nfsnode_t np, struct nfs_vattr *nvap, vfs_context_t ctx, int already * attributes returned wouldn't be cached. */ if ((nfsvers != NFS_VER2) && (nfs_access_cache_timeout > 0)) { - if (!alreadylocked && ((error = lockerror = nfs_lock(np, NFS_NODE_LOCK_EXCLUSIVE)))) - goto nfsmout; if (nfs_attrcachetimeout(np) > 0) { - /* OSAddAtomic(1, (SInt32*)&nfsstats.accesscache_misses); */ - u_long mode = NFS_ACCESS_ALL; + /* OSAddAtomic(1, &nfsstats.accesscache_misses); */ + u_int32_t mode = NFS_ACCESS_ALL; error = nmp->nm_funcs->nf_access_rpc(np, &mode, ctx); if (error) goto nfsmout; - if ((error = nfs_getattrcache(np, nvap, 1)) == 0) - goto nfsmout; - if (error != ENOENT) + nfs_node_lock_force(np); + error = nfs_getattrcache(np, nvap); + nfs_node_unlock(np); + if (!error || (error != ENOENT)) goto nfsmout; + /* Well, that didn't work... just do a getattr... */ error = 0; } - } else if (!alreadylocked) { - error = lockerror = nfs_lock(np, NFS_NODE_LOCK_EXCLUSIVE); - nfsmout_if(error); } + avoidfloods = 0; tryagain: error = nmp->nm_funcs->nf_getattr_rpc(np, NULL, np->n_fhp, np->n_fhsize, ctx, nvap, &xid); - nfsmout_if(error); - error = nfs_loadattrcache(np, nvap, &xid, 0); + if (!error) { + nfs_node_lock_force(np); + error = nfs_loadattrcache(np, nvap, &xid, 0); + nfs_node_unlock(np); + } nfsmout_if(error); if (!xid) { /* out-of-order rpc - attributes were dropped */ FSDBG(513, -1, np, np->n_xid >> 32, np->n_xid); - if (avoidfloods++ < 100) + if (avoidfloods++ < 20) goto tryagain; - /* avoidfloods>1 is bizarre. at 100 pull the plug */ - panic("nfs_getattr: getattr flood\n"); + /* avoidfloods>1 is bizarre. at 20 pull the plug */ + /* just return the last attributes we got */ + } +nfsmout: + nfs_node_lock_force(np); + if (inprogset) { + wanted = ISSET(np->n_flag, NGETATTRWANT); + CLR(np->n_flag, (NGETATTRINPROG | NGETATTRWANT)); } - if (NFS_CHANGED(nfsvers, np, nvap)) { + if (!error) { + /* check if the node changed on us */ vnode_t vp = NFSTOV(np); enum vtype vtype = vnode_vtype(vp); - FSDBG(513, -1, np, -1, np); - if (vtype == VDIR) { - nfs_invaldir(np); - /* purge name cache entries */ - if (NFS_CHANGED_NC(nfsvers, np, nvap)) { - np->n_flag &= ~NNEGNCENTRIES; - cache_purge(vp); - } + if ((vtype == VDIR) && NFS_CHANGED_NC(nfsvers, np, nvap)) { + FSDBG(513, -1, np, 0, np); + np->n_flag &= ~NNEGNCENTRIES; + cache_purge(vp); + np->n_ncgen++; + NFS_CHANGED_UPDATE_NC(nfsvers, np, nvap); } - if (!alreadylocked) { - nfs_unlock(np); - lockerror = ENOENT; + if (NFS_CHANGED(nfsvers, np, nvap)) { + FSDBG(513, -1, np, -1, np); + if (vtype == VDIR) + nfs_invaldir(np); + nfs_node_unlock(np); + if (wanted) + wakeup(np); error = nfs_vinvalbuf(vp, V_SAVE, ctx, 1); FSDBG(513, -1, np, -2, error); - if (!error) - error = lockerror = nfs_lock(np, NFS_NODE_LOCK_EXCLUSIVE); if (!error) { - if (vtype == VDIR) - NFS_CHANGED_UPDATE_NC(nfsvers, np, nvap); + nfs_node_lock_force(np); NFS_CHANGED_UPDATE(nfsvers, np, nvap); + nfs_node_unlock(np); } } else { - /* invalidate later */ - np->n_flag |= NNEEDINVALIDATE; + nfs_node_unlock(np); + if (wanted) + wakeup(np); } + } else { + nfs_node_unlock(np); + if (wanted) + wakeup(np); } -nfsmout: - if (!lockerror) - nfs_unlock(np); FSDBG_BOT(513, np->n_size, error, np->n_vattr.nva_size, np->n_flag); return (error); } @@ -936,7 +940,7 @@ nfs_getattr(nfsnode_t np, struct nfs_vattr *nvap, vfs_context_t ctx, int already /* * NFS getattr call from vfs. */ -static int +int nfs3_vnop_getattr( struct vnop_getattr_args /* { struct vnodeop_desc *a_desc; @@ -950,7 +954,7 @@ nfs3_vnop_getattr( struct vnode_attr *vap = ap->a_vap; dev_t rdev; - error = nfs_getattr(VTONFS(ap->a_vp), &nva, ap->a_context, 0); + error = nfs_getattr(VTONFS(ap->a_vp), &nva, ap->a_context, NGA_CACHED); if (error) return (error); @@ -983,7 +987,7 @@ nfs3_vnop_getattr( /* * NFS setattr call. */ -static int +int nfs_vnop_setattr( struct vnop_setattr_args /* { struct vnodeop_desc *a_desc; @@ -1004,6 +1008,9 @@ nfs_vnop_setattr( nfsnode_t dnp = NULL; vnode_t dvp = NULL; const char *vname = NULL; + struct nfs_open_owner *noop = NULL; + struct nfs_open_file *nofp = NULL; + struct nfs_vattr nvattr; nmp = VTONMP(vp); if (!nmp) @@ -1043,11 +1050,11 @@ nfs_vnop_setattr( FSDBG_TOP(512, np->n_size, vap->va_data_size, np->n_vattr.nva_size, np->n_flag); /* clear NNEEDINVALIDATE, if set */ - if ((error = nfs_lock(np, NFS_NODE_LOCK_EXCLUSIVE))) + if ((error = nfs_node_lock(np))) return (error); if (np->n_flag & NNEEDINVALIDATE) np->n_flag &= ~NNEEDINVALIDATE; - nfs_unlock(np); + nfs_node_unlock(np); /* flush everything */ error = nfs_vinvalbuf(vp, (vap->va_data_size ? V_SAVE : 0) , ctx, 1); if (error) { @@ -1055,7 +1062,45 @@ nfs_vnop_setattr( FSDBG_BOT(512, np->n_size, vap->va_data_size, np->n_vattr.nva_size, -1); return (error); } - nfs_data_lock(np, NFS_NODE_LOCK_EXCLUSIVE); + if (nfsvers >= NFS_VER4) { + /* setting file size requires having the file open for write access */ + noop = nfs_open_owner_find(nmp, vfs_context_ucred(ctx), 1); + if (!noop) + return (ENOMEM); +retryopen: + error = nfs_open_file_find(np, noop, &nofp, 0, 0, 1); + if (!error && (nofp->nof_flags & NFS_OPEN_FILE_LOST)) + error = EIO; + if (!error && (nofp->nof_flags & NFS_OPEN_FILE_REOPEN)) { + nfs4_reopen(nofp, vfs_context_thread(ctx)); + nofp = NULL; + goto retryopen; + } + if (error) { + nfs_open_owner_rele(noop); + return (error); + } + if (!(nofp->nof_access & NFS_OPEN_SHARE_ACCESS_WRITE)) { + /* we don't have the file open for write access, so open it */ + error = nfs_mount_state_in_use_start(nmp); + if (!error) + error = nfs_open_file_set_busy(nofp, vfs_context_thread(ctx)); + if (error) { + nfs_open_owner_rele(noop); + return (error); + } + error = nfs4_open(np, nofp, NFS_OPEN_SHARE_ACCESS_WRITE, NFS_OPEN_SHARE_DENY_NONE, ctx); + if (!error) + nofp->nof_flags |= NFS_OPEN_FILE_SETATTR; + if (nfs_mount_state_error_should_restart(error)) { + nfs_open_file_clear_busy(nofp); + nofp = NULL; + } + if (nfs_mount_state_in_use_end(nmp, error)) + goto retryopen; + } + } + nfs_data_lock(np, NFS_DATA_LOCK_EXCLUSIVE); if (np->n_size > vap->va_data_size) { /* shrinking? */ daddr64_t obn, bn; int neweofoff, mustwrite; @@ -1111,7 +1156,7 @@ nfs_vnop_setattr( // Note: bp has been released if (error) { FSDBG(512, bp, 0xd00dee, 0xbad, error); - nfs_lock(np, NFS_NODE_LOCK_FORCE); + nfs_node_lock_force(np); np->n_error = error; np->n_flag |= NWRITEERR; /* @@ -1122,10 +1167,10 @@ nfs_vnop_setattr( * we may no longer know the correct size) */ NATTRINVALIDATE(np); - nfs_unlock(np); + nfs_node_unlock(np); nfs_data_unlock(np); nfs_vinvalbuf(vp, V_SAVE|V_IGNORE_WRITEERR, ctx, 1); - nfs_data_lock(np, NFS_NODE_LOCK_EXCLUSIVE); + nfs_data_lock(np, NFS_DATA_LOCK_EXCLUSIVE); error = 0; } } @@ -1134,57 +1179,69 @@ nfs_vnop_setattr( ubc_setsize(vp, (off_t)vap->va_data_size); /* XXX error? */ origsize = np->n_size; np->n_size = np->n_vattr.nva_size = vap->va_data_size; + nfs_node_lock_force(np); CLR(np->n_flag, NUPDATESIZE); + nfs_node_unlock(np); FSDBG(512, np, np->n_size, np->n_vattr.nva_size, 0xf00d0001); } } else if (VATTR_IS_ACTIVE(vap, va_modify_time) || VATTR_IS_ACTIVE(vap, va_access_time) || (vap->va_vaflags & VA_UTIMES_NULL)) { - if ((error = nfs_lock(np, NFS_NODE_LOCK_SHARED))) + if ((error = nfs_node_lock(np))) return (error); if ((np->n_flag & NMODIFIED) && (vnode_vtype(vp) == VREG)) { - nfs_unlock(np); + nfs_node_unlock(np); error = nfs_vinvalbuf(vp, V_SAVE, ctx, 1); if (error == EINTR) return (error); } else { - nfs_unlock(np); + nfs_node_unlock(np); } } if (VATTR_IS_ACTIVE(vap, va_mode) || VATTR_IS_ACTIVE(vap, va_uid) || VATTR_IS_ACTIVE(vap, va_gid)) { - if ((error = nfs_lock(np, NFS_NODE_LOCK_EXCLUSIVE))) { + if ((error = nfs_node_lock(np))) { if (VATTR_IS_ACTIVE(vap, va_data_size)) nfs_data_unlock(np); return (error); } NMODEINVALIDATE(np); - nfs_unlock(np); + nfs_node_unlock(np); dvp = vnode_getparent(vp); vname = vnode_getname(vp); dnp = (dvp && vname) ? VTONFS(dvp) : NULL; if (dnp) { - error = nfs_lock(dnp, NFS_NODE_LOCK_EXCLUSIVE); + error = nfs_node_set_busy(dnp, vfs_context_thread(ctx)); if (error) { dnp = NULL; error = 0; } } if (dnp) { - nfs_dulookup_init(&dul, dnp, vname, strlen(vname)); + nfs_dulookup_init(&dul, dnp, vname, strlen(vname), ctx); nfs_dulookup_start(&dul, dnp, ctx); } } - error = nmp->nm_funcs->nf_setattr_rpc(np, vap, ctx, 0); +retrysetattr: + if (VATTR_IS_ACTIVE(vap, va_data_size) && (nfsvers >= NFS_VER4)) + error = nfs_mount_state_in_use_start(nmp); + + if (!error) { + error = nmp->nm_funcs->nf_setattr_rpc(np, vap, ctx); + + if (VATTR_IS_ACTIVE(vap, va_data_size) && (nfsvers >= NFS_VER4)) + if (nfs_mount_state_in_use_end(nmp, error)) + goto retrysetattr; + } if (VATTR_IS_ACTIVE(vap, va_mode) || VATTR_IS_ACTIVE(vap, va_uid) || VATTR_IS_ACTIVE(vap, va_gid)) { if (dnp) { nfs_dulookup_finish(&dul, dnp, ctx); - nfs_unlock(dnp); + nfs_node_clear_busy(dnp); } if (dvp != NULLVP) vnode_put(dvp); @@ -1198,15 +1255,45 @@ nfs_vnop_setattr( /* make every effort to resync file size w/ server... */ int err; /* preserve "error" for return */ np->n_size = np->n_vattr.nva_size = origsize; + nfs_node_lock_force(np); CLR(np->n_flag, NUPDATESIZE); + nfs_node_unlock(np); FSDBG(512, np, np->n_size, np->n_vattr.nva_size, 0xf00d0002); ubc_setsize(vp, (off_t)np->n_size); /* XXX check error */ vap->va_data_size = origsize; - err = nmp->nm_funcs->nf_setattr_rpc(np, vap, ctx, 0); + err = nmp->nm_funcs->nf_setattr_rpc(np, vap, ctx); if (err) printf("nfs_vnop_setattr: nfs%d_setattr_rpc %d %d\n", nfsvers, error, err); } + nfs_node_lock_force(np); + /* + * The size was just set. If the size is already marked for update, don't + * trust the newsize (it may have been set while the setattr was in progress). + * Clear the update flag and make sure we fetch new attributes so we are sure + * we have the latest size. + */ + if (ISSET(np->n_flag, NUPDATESIZE)) { + CLR(np->n_flag, NUPDATESIZE); + NATTRINVALIDATE(np); + nfs_node_unlock(np); + nfs_getattr(np, &nvattr, ctx, NGA_UNCACHED); + } else { + nfs_node_unlock(np); + } nfs_data_unlock(np); + if (nfsvers >= NFS_VER4) { + if (nofp->nof_flags & NFS_OPEN_FILE_SETATTR) { + int err = nfs4_close(np, nofp, NFS_OPEN_SHARE_ACCESS_WRITE, NFS_OPEN_SHARE_DENY_NONE, ctx); + if (err) { + vname = vnode_getname(NFSTOV(np)); + printf("nfs_vnop_setattr: close error: %d, %s\n", err, vname); + vnode_putname(vname); + } + nofp->nof_flags &= ~NFS_OPEN_FILE_SETATTR; + nfs_open_file_clear_busy(nofp); + } + nfs_open_owner_rele(noop); + } } return (error); } @@ -1218,12 +1305,11 @@ int nfs3_setattr_rpc( nfsnode_t np, struct vnode_attr *vap, - vfs_context_t ctx, - int alreadylocked) + vfs_context_t ctx) { struct nfsmount *nmp = NFSTONMP(np); int error = 0, lockerror = ENOENT, status, wccpostattr = 0, nfsvers; - u_int64_t xid; + u_int64_t xid, nextxid; struct nfsm_chain nmreq, nmrep; if (!nmp) @@ -1330,7 +1416,7 @@ nfs3_setattr_rpc( nfsmout_if(error); error = nfs_request(np, NULL, &nmreq, NFSPROC_SETATTR, ctx, &nmrep, &xid, &status); - if (!alreadylocked && ((lockerror = nfs_lock(np, NFS_NODE_LOCK_EXCLUSIVE)))) + if ((lockerror = nfs_node_lock(np))) error = lockerror; if (nfsvers == NFS_VER3) { struct timespec premtime = { 0, 0 }; @@ -1351,9 +1437,24 @@ nfs3_setattr_rpc( error = status; nfsm_chain_loadattr(error, &nmrep, np, nfsvers, NULL, &xid); } + /* + * We just changed the attributes and we want to make sure that we + * see the latest attributes. Get the next XID. If it's not the + * next XID after the SETATTR XID, then it's possible that another + * RPC was in flight at the same time and it might put stale attributes + * in the cache. In that case, we invalidate the attributes and set + * the attribute cache XID to guarantee that newer attributes will + * get loaded next. + */ + nextxid = 0; + nfs_get_xid(&nextxid); + if (nextxid != (xid + 1)) { + np->n_xid = nextxid; + NATTRINVALIDATE(np); + } nfsmout: - if (!alreadylocked && !lockerror) - nfs_unlock(np); + if (!lockerror) + nfs_node_unlock(np); nfsm_chain_cleanup(&nmreq); nfsm_chain_cleanup(&nmrep); return (error); @@ -1364,7 +1465,7 @@ nfs3_setattr_rpc( * First look in cache * If not found, unlock the directory nfsnode and do the RPC */ -static int +int nfs_vnop_lookup( struct vnop_lookup_args /* { struct vnodeop_desc *a_desc; @@ -1383,7 +1484,7 @@ nfs_vnop_lookup( nfsnode_t dnp, np; struct nfsmount *nmp; mount_t mp; - int nfsvers, error, lockerror = ENOENT, isdot, isdotdot, negnamecache; + int nfsvers, error, busyerror = ENOENT, isdot, isdotdot, negnamecache; u_int64_t xid; struct nfs_vattr nvattr; int ngflags; @@ -1404,20 +1505,11 @@ nfs_vnop_lookup( nfsvers = nmp->nm_vers; negnamecache = !(nmp->nm_flag & NFSMNT_NONEGNAMECACHE); - error = lockerror = nfs_lock(dnp, NFS_NODE_LOCK_EXCLUSIVE); - if (!error) - error = nfs_getattr(dnp, &nvattr, ctx, 1); - if (error) + if ((error = busyerror = nfs_node_set_busy(dnp, vfs_context_thread(ctx)))) + goto error_return; + /* nfs_getattr() will check changed and purge caches */ + if ((error = nfs_getattr(dnp, &nvattr, ctx, NGA_CACHED))) goto error_return; - if (NFS_CHANGED_NC(nfsvers, dnp, &nvattr)) { - /* - * This directory has changed on us. - * Purge any name cache entries. - */ - dnp->n_flag &= ~NNEGNCENTRIES; - cache_purge(dvp); - NFS_CHANGED_UPDATE_NC(nfsvers, dnp, &nvattr); - } error = cache_lookup(dvp, vpp, cnp); switch (error) { @@ -1426,13 +1518,23 @@ nfs_vnop_lookup( goto error_return; case 0: /* cache miss */ - break; + if ((nfsvers > NFS_VER2) && (nmp->nm_flag & NFSMNT_RDIRPLUS)) { + /* if rdirplus, try dir buf cache lookup */ + error = nfs_dir_buf_cache_lookup(dnp, &np, cnp, ctx, 0); + if (!error && np) { + /* dir buf cache hit */ + *vpp = NFSTOV(np); + error = -1; + } + } + if (error != -1) /* cache miss */ + break; + /* FALLTHROUGH */ case -1: /* cache hit, not really an error */ - OSAddAtomic(1, (SInt32*)&nfsstats.lookupcache_hits); + OSAddAtomic(1, &nfsstats.lookupcache_hits); - nfs_unlock(dnp); - lockerror = ENOENT; + nfs_node_clear_busy(dnp); /* check for directory access */ naa.a_vp = dvp; @@ -1467,7 +1569,7 @@ nfs_vnop_lookup( goto error_return; } if (NFS_BITMAP_ISSET(nmp->nm_fsattr.nfsa_bitmap, NFS_FATTR_MAXNAME) && - (cnp->cn_namelen > (long)nmp->nm_fsattr.nfsa_maxname)) { + (cnp->cn_namelen > (int)nmp->nm_fsattr.nfsa_maxname)) { error = ENAMETOOLONG; goto error_return; } @@ -1475,7 +1577,7 @@ nfs_vnop_lookup( error = 0; newvp = NULLVP; - OSAddAtomic(1, (SInt32*)&nfsstats.lookupcache_misses); + OSAddAtomic(1, &nfsstats.lookupcache_misses); error = nmp->nm_funcs->nf_lookup_rpc_async(dnp, cnp->cn_nameptr, cnp->cn_namelen, ctx, &req); nfsmout_if(error); @@ -1486,7 +1588,6 @@ nfs_vnop_lookup( isdot = NFS_CMPFH(dnp, fh.fh_data, fh.fh_len); found: - if (flags & ISLASTCN) { switch (cnp->cn_nameiop) { case DELETE: @@ -1503,8 +1604,6 @@ nfs_vnop_lookup( } if (isdotdot) { - nfs_unlock(dnp); - lockerror = ENOENT; newvp = vnode_getparent(dvp); if (!newvp) { error = ENOENT; @@ -1515,15 +1614,17 @@ nfs_vnop_lookup( if (error) goto error_return; newvp = dvp; + nfs_node_lock_force(dnp); if (fh.fh_len && (dnp->n_xid <= xid)) nfs_loadattrcache(dnp, &nvattr, &xid, 0); + nfs_node_unlock(dnp); } else { ngflags = (cnp->cn_flags & MAKEENTRY) ? NG_MAKEENTRY : 0; error = nfs_nget(mp, dnp, cnp, fh.fh_data, fh.fh_len, &nvattr, &xid, ngflags, &np); if (error) goto error_return; newvp = NFSTOV(np); - nfs_unlock(np); + nfs_node_unlock(np); } *vpp = newvp; @@ -1540,12 +1641,14 @@ nfs_vnop_lookup( if ((error == ENOENT) && (cnp->cn_flags & MAKEENTRY) && (cnp->cn_nameiop != CREATE) && negnamecache) { /* add a negative entry in the name cache */ + nfs_node_lock_force(dnp); cache_enter(dvp, NULL, cnp); dnp->n_flag |= NNEGNCENTRIES; + nfs_node_unlock(dnp); } error_return: - if (!lockerror) - nfs_unlock(dnp); + if (!busyerror) + nfs_node_clear_busy(dnp); if (error && *vpp) { vnode_put(*vpp); *vpp = NULLVP; @@ -1557,7 +1660,7 @@ nfs_vnop_lookup( * NFS read call. * Just call nfs_bioread() to do the work. */ -static int +int nfs_vnop_read( struct vnop_read_args /* { struct vnodeop_desc *a_desc; @@ -1569,14 +1672,14 @@ nfs_vnop_read( { if (vnode_vtype(ap->a_vp) != VREG) return (EPERM); - return (nfs_bioread(VTONFS(ap->a_vp), ap->a_uio, ap->a_ioflag, NULL, ap->a_context)); + return (nfs_bioread(VTONFS(ap->a_vp), ap->a_uio, ap->a_ioflag, ap->a_context)); } /* * NFS readlink call */ -static int +int nfs_vnop_readlink( struct vnop_readlink_args /* { struct vnodeop_desc *a_desc; @@ -1588,18 +1691,18 @@ nfs_vnop_readlink( vfs_context_t ctx = ap->a_context; nfsnode_t np = VTONFS(ap->a_vp); struct nfsmount *nmp; - int error = 0, lockerror, nfsvers, changed = 0, n; + int error = 0, nfsvers; uint32_t buflen; - struct uio *uio = ap->a_uio; + uio_t uio = ap->a_uio; struct nfs_vattr nvattr; struct nfsbuf *bp = NULL; if (vnode_vtype(ap->a_vp) != VLNK) return (EPERM); - if (uio_uio_resid(uio) == 0) + if (uio_resid(uio) == 0) return (0); - if (uio->uio_offset < 0) + if (uio_offset(uio) < 0) return (EINVAL); nmp = VTONMP(ap->a_vp); @@ -1607,34 +1710,20 @@ nfs_vnop_readlink( return (ENXIO); nfsvers = nmp->nm_vers; - error = lockerror = nfs_lock(np, NFS_NODE_LOCK_EXCLUSIVE); - if (!error) - error = nfs_getattr(np, &nvattr, ctx, 1); - if (error) { - if (!lockerror) - nfs_unlock(np); + /* nfs_getattr() will check changed and purge caches */ + if ((error = nfs_getattr(np, &nvattr, ctx, NGA_CACHED))) { FSDBG(531, np, 0xd1e0001, 0, error); return (error); } - if (NFS_CHANGED(nfsvers, np, &nvattr)) { - /* link changed, so just ignore NB_CACHE */ - changed = 1; - NFS_CHANGED_UPDATE(nfsvers, np, &nvattr); - } - nfs_unlock(np); - OSAddAtomic(1, (SInt32*)&nfsstats.biocache_readlinks); + OSAddAtomic(1, &nfsstats.biocache_readlinks); error = nfs_buf_get(np, 0, NFS_MAXPATHLEN, vfs_context_thread(ctx), NBLK_READ, &bp); if (error) { FSDBG(531, np, 0xd1e0002, 0, error); return (error); } - if (changed) - CLR(bp->nb_flags, NB_CACHE); if (!ISSET(bp->nb_flags, NB_CACHE)) { - SET(bp->nb_flags, NB_READ); - CLR(bp->nb_flags, NB_DONE); - OSAddAtomic(1, (SInt32*)&nfsstats.readlink_bios); + OSAddAtomic(1, &nfsstats.readlink_bios); buflen = bp->nb_bufsize; error = nmp->nm_funcs->nf_readlink_rpc(np, bp->nb_data, &buflen, ctx); if (error) { @@ -1644,14 +1733,9 @@ nfs_vnop_readlink( bp->nb_validoff = 0; bp->nb_validend = buflen; } - nfs_buf_iodone(bp); - } - if (!error) { - // LP64todo - fix this! - n = min(uio_uio_resid(uio), bp->nb_validend); - if (n > 0) - error = uiomove(bp->nb_data, n, uio); } + if (!error && (bp->nb_validend > 0)) + error = uiomove(bp->nb_data, bp->nb_validend, uio); FSDBG(531, np, bp->nb_validend, 0, error); nfs_buf_release(bp, 1); return (error); @@ -1682,7 +1766,7 @@ nfs3_readlink_rpc(nfsnode_t np, char *buf, uint32_t *buflenp, vfs_context_t ctx) nfsmout_if(error); error = nfs_request(np, NULL, &nmreq, NFSPROC_READLINK, ctx, &nmrep, &xid, &status); - if ((lockerror = nfs_lock(np, NFS_NODE_LOCK_EXCLUSIVE))) + if ((lockerror = nfs_node_lock(np))) error = lockerror; if (nfsvers == NFS_VER3) nfsm_chain_postop_attr_update(error, &nmrep, np, &xid); @@ -1705,7 +1789,7 @@ nfs3_readlink_rpc(nfsnode_t np, char *buf, uint32_t *buflenp, vfs_context_t ctx) *buflenp = len; nfsmout: if (!lockerror) - nfs_unlock(np); + nfs_node_unlock(np); nfsm_chain_cleanup(&nmreq); nfsm_chain_cleanup(&nmrep); return (error); @@ -1716,37 +1800,53 @@ nfs3_readlink_rpc(nfsnode_t np, char *buf, uint32_t *buflenp, vfs_context_t ctx) * Ditto above */ int -nfs_read_rpc(nfsnode_t np, struct uio *uiop, vfs_context_t ctx) +nfs_read_rpc(nfsnode_t np, uio_t uio, vfs_context_t ctx) { struct nfsmount *nmp; int error = 0, nfsvers, eof = 0; - size_t nmrsize, len, retlen, tsiz; + size_t nmrsize, len, retlen; + user_ssize_t tsiz; off_t txoffset; struct nfsreq rq, *req = &rq; + uint32_t stategenid = 0, restart = 0; - FSDBG_TOP(536, np, uiop->uio_offset, uio_uio_resid(uiop), 0); + FSDBG_TOP(536, np, uio_offset(uio), uio_resid(uio), 0); nmp = NFSTONMP(np); if (!nmp) return (ENXIO); nfsvers = nmp->nm_vers; nmrsize = nmp->nm_rsize; - // LP64todo - fix this - tsiz = uio_uio_resid(uiop); - if (((u_int64_t)uiop->uio_offset + (unsigned int)tsiz > 0xffffffff) && (nfsvers == NFS_VER2)) { - FSDBG_BOT(536, np, uiop->uio_offset, uio_uio_resid(uiop), EFBIG); + txoffset = uio_offset(uio); + tsiz = uio_resid(uio); + if ((nfsvers == NFS_VER2) && ((uint64_t)(txoffset + tsiz) > 0xffffffffULL)) { + FSDBG_BOT(536, np, uio_offset(uio), uio_resid(uio), EFBIG); return (EFBIG); } - txoffset = uiop->uio_offset; - while (tsiz > 0) { - len = retlen = (tsiz > nmrsize) ? nmrsize : tsiz; + len = retlen = (tsiz > (user_ssize_t)nmrsize) ? nmrsize : (size_t)tsiz; FSDBG(536, np, txoffset, len, 0); + if (nmp->nm_vers >= NFS_VER4) + stategenid = nmp->nm_stategenid; error = nmp->nm_funcs->nf_read_rpc_async(np, txoffset, len, vfs_context_thread(ctx), vfs_context_ucred(ctx), NULL, &req); if (!error) - error = nmp->nm_funcs->nf_read_rpc_async_finish(np, req, uiop, &retlen, &eof); + error = nmp->nm_funcs->nf_read_rpc_async_finish(np, req, uio, &retlen, &eof); + if ((nmp->nm_vers >= NFS_VER4) && nfs_mount_state_error_should_restart(error) && + (++restart <= nfs_mount_state_max_restarts(nmp))) { /* guard against no progress */ + lck_mtx_lock(&nmp->nm_lock); + if ((error != NFSERR_GRACE) && (stategenid == nmp->nm_stategenid) && !(nmp->nm_state & NFSSTA_RECOVER)) { + printf("nfs_read_rpc: error %d, initiating recovery\n", error); + nmp->nm_state |= NFSSTA_RECOVER; + nfs_mount_sock_thread_wake(nmp); + } + lck_mtx_unlock(&nmp->nm_lock); + if (error == NFSERR_GRACE) + tsleep(&nmp->nm_state, (PZERO-1), "nfsgrace", 2*hz); + if (!(error = nfs_mount_state_wait_for_recovery(nmp))) + continue; + } if (error) break; txoffset += retlen; @@ -1758,7 +1858,7 @@ nfs_read_rpc(nfsnode_t np, struct uio *uiop, vfs_context_t ctx) tsiz = 0; } - FSDBG_BOT(536, np, eof, uio_uio_resid(uiop), error); + FSDBG_BOT(536, np, eof, uio_resid(uio), error); return (error); } @@ -1804,7 +1904,7 @@ int nfs3_read_rpc_async_finish( nfsnode_t np, struct nfsreq *req, - struct uio *uiop, + uio_t uio, size_t *lenp, int *eofp) { @@ -1827,7 +1927,7 @@ nfs3_read_rpc_async_finish( if (error == EINPROGRESS) /* async request restarted */ return (error); - if ((lockerror = nfs_lock(np, NFS_NODE_LOCK_EXCLUSIVE))) + if ((lockerror = nfs_node_lock(np))) error = lockerror; if (nfsvers == NFS_VER3) nfsm_chain_postop_attr_update(error, &nmrep, np, &xid); @@ -1840,12 +1940,12 @@ nfs3_read_rpc_async_finish( nfsm_chain_loadattr(error, &nmrep, np, nfsvers, NULL, &xid); } if (!lockerror) - nfs_unlock(np); + nfs_node_unlock(np); nfsm_chain_get_32(error, &nmrep, retlen); if ((nfsvers == NFS_VER2) && (retlen > *lenp)) error = EBADRPC; nfsmout_if(error); - error = nfsm_chain_get_uio(&nmrep, MIN(retlen, *lenp), uiop); + error = nfsm_chain_get_uio(&nmrep, MIN(retlen, *lenp), uio); if (eofp) { if (nfsvers == NFS_VER3) { if (!eof && !retlen) @@ -1875,7 +1975,7 @@ nfs_vnop_write( } */ *ap) { vfs_context_t ctx = ap->a_context; - struct uio *uio = ap->a_uio; + uio_t uio = ap->a_uio; vnode_t vp = ap->a_vp; nfsnode_t np = VTONFS(vp); int ioflag = ap->a_ioflag; @@ -1886,26 +1986,26 @@ nfs_vnop_write( int biosize; int n, on, error = 0; off_t boff, start, end; - struct iovec_32 iov; - struct uio auio; + uio_t auio; + char auio_buf [ UIO_SIZEOF(1) ]; thread_t thd; kauth_cred_t cred; - FSDBG_TOP(515, np, uio->uio_offset, uio_uio_resid(uio), ioflag); + FSDBG_TOP(515, np, uio_offset(uio), uio_resid(uio), ioflag); if (vnode_vtype(vp) != VREG) { - FSDBG_BOT(515, np, uio->uio_offset, uio_uio_resid(uio), EIO); + FSDBG_BOT(515, np, uio_offset(uio), uio_resid(uio), EIO); return (EIO); } thd = vfs_context_thread(ctx); cred = vfs_context_ucred(ctx); - nfs_data_lock(np, NFS_NODE_LOCK_SHARED); + nfs_data_lock(np, NFS_DATA_LOCK_SHARED); - if ((error = nfs_lock(np, NFS_NODE_LOCK_EXCLUSIVE))) { + if ((error = nfs_node_lock(np))) { nfs_data_unlock(np); - FSDBG_BOT(515, np, uio->uio_offset, uio_uio_resid(uio), error); + FSDBG_BOT(515, np, uio_offset(uio), uio_resid(uio), error); return (error); } np->n_wrbusy++; @@ -1916,71 +2016,67 @@ nfs_vnop_write( } if (np->n_flag & NNEEDINVALIDATE) { np->n_flag &= ~NNEEDINVALIDATE; - nfs_unlock(np); + nfs_node_unlock(np); nfs_data_unlock(np); nfs_vinvalbuf(vp, V_SAVE|V_IGNORE_WRITEERR, ctx, 1); - nfs_data_lock(np, NFS_NODE_LOCK_SHARED); - if (error || ((error = nfs_lock(np, NFS_NODE_LOCK_EXCLUSIVE)))) - goto out; + nfs_data_lock(np, NFS_DATA_LOCK_SHARED); + } else { + nfs_node_unlock(np); } - if (error) { - nfs_unlock(np); + if (error) goto out; - } biosize = nmp->nm_biosize; if (ioflag & (IO_APPEND | IO_SYNC)) { + nfs_node_lock_force(np); if (np->n_flag & NMODIFIED) { NATTRINVALIDATE(np); - nfs_unlock(np); + nfs_node_unlock(np); nfs_data_unlock(np); error = nfs_vinvalbuf(vp, V_SAVE, ctx, 1); - nfs_data_lock(np, NFS_NODE_LOCK_SHARED); - if (error || ((error = nfs_lock(np, NFS_NODE_LOCK_EXCLUSIVE)))) { - FSDBG(515, np, uio->uio_offset, 0x10bad01, error); + nfs_data_lock(np, NFS_DATA_LOCK_SHARED); + if (error) { + FSDBG(515, np, uio_offset(uio), 0x10bad01, error); goto out; } + } else { + nfs_node_unlock(np); } if (ioflag & IO_APPEND) { - NATTRINVALIDATE(np); - nfs_unlock(np); nfs_data_unlock(np); - error = nfs_getattr(np, &nvattr, ctx, 0); + /* nfs_getattr() will check changed and purge caches */ + error = nfs_getattr(np, &nvattr, ctx, NGA_UNCACHED); /* we'll be extending the file, so take the data lock exclusive */ - nfs_data_lock(np, NFS_NODE_LOCK_EXCLUSIVE); - if (error || ((error = nfs_lock(np, NFS_NODE_LOCK_EXCLUSIVE)))) { - FSDBG(515, np, uio->uio_offset, 0x10bad02, error); + nfs_data_lock(np, NFS_DATA_LOCK_EXCLUSIVE); + if (error) { + FSDBG(515, np, uio_offset(uio), 0x10bad02, error); goto out; } - uio->uio_offset = np->n_size; + uio_setoffset(uio, np->n_size); } } - if (uio->uio_offset < 0) { - nfs_unlock(np); + if (uio_offset(uio) < 0) { error = EINVAL; - FSDBG_BOT(515, np, uio->uio_offset, 0xbad0ff, error); + FSDBG_BOT(515, np, uio_offset(uio), 0xbad0ff, error); goto out; } - if (uio_uio_resid(uio) == 0) { - nfs_unlock(np); + if (uio_resid(uio) == 0) goto out; - } - - nfs_unlock(np); - if (((uio->uio_offset + uio_uio_resid(uio)) > (off_t)np->n_size) && !(ioflag & IO_APPEND)) { + if (((uio_offset(uio) + uio_resid(uio)) > (off_t)np->n_size) && !(ioflag & IO_APPEND)) { /* it looks like we'll be extending the file, so take the data lock exclusive */ nfs_data_unlock(np); - nfs_data_lock(np, NFS_NODE_LOCK_EXCLUSIVE); + nfs_data_lock(np, NFS_DATA_LOCK_EXCLUSIVE); } do { - OSAddAtomic(1, (SInt32*)&nfsstats.biocache_writes); - lbn = uio->uio_offset / biosize; - on = uio->uio_offset % biosize; - // LP64todo - fix this - n = min((unsigned)(biosize - on), uio_uio_resid(uio)); + OSAddAtomic(1, &nfsstats.biocache_writes); + lbn = uio_offset(uio) / biosize; + on = uio_offset(uio) % biosize; + n = biosize - on; + if (uio_resid(uio) < n) + n = uio_resid(uio); again: /* * Get a cache block for writing. The range to be written is @@ -2017,7 +2113,7 @@ nfs_vnop_write( */ if (bp->nb_dirtyend > 0) { if (on > bp->nb_dirtyend || (on + n) < bp->nb_dirtyoff || bp->nb_dirty) { - FSDBG(515, np, uio->uio_offset, bp, 0xd15c001); + FSDBG(515, np, uio_offset(uio), bp, 0xd15c001); /* write/commit buffer "synchronously" */ /* (NB_STABLE indicates that data writes should be FILESYNC) */ CLR(bp->nb_flags, (NB_DONE | NB_ERROR | NB_INVAL)); @@ -2036,7 +2132,7 @@ nfs_vnop_write( pagemask = ((1 << (lastpg+1)) - 1) & ~((1 << firstpg) - 1); /* check if there are dirty pages outside the write range */ if (bp->nb_dirty & ~pagemask) { - FSDBG(515, np, uio->uio_offset, bp, 0xd15c002); + FSDBG(515, np, uio_offset(uio), bp, 0xd15c002); /* write/commit buffer "synchronously" */ /* (NB_STABLE indicates that data writes should be FILESYNC) */ CLR(bp->nb_flags, (NB_DONE | NB_ERROR | NB_INVAL)); @@ -2049,7 +2145,7 @@ nfs_vnop_write( /* if the first or last pages are already dirty */ /* make sure that the dirty range encompasses those pages */ if (NBPGDIRTY(bp,firstpg) || NBPGDIRTY(bp,lastpg)) { - FSDBG(515, np, uio->uio_offset, bp, 0xd15c003); + FSDBG(515, np, uio_offset(uio), bp, 0xd15c003); bp->nb_dirtyoff = min(on, firstpg * PAGE_SIZE); if (NBPGDIRTY(bp,lastpg)) { bp->nb_dirtyend = (lastpg+1) * PAGE_SIZE; @@ -2070,13 +2166,13 @@ nfs_vnop_write( * If there was a partial buf at the old eof, validate * and zero the new bytes. */ - if ((uio->uio_offset + n) > (off_t)np->n_size) { + if ((uio_offset(uio) + n) > (off_t)np->n_size) { struct nfsbuf *eofbp = NULL; daddr64_t eofbn = np->n_size / biosize; int eofoff = np->n_size % biosize; - int neweofoff = (uio->uio_offset + n) % biosize; + int neweofoff = (uio_offset(uio) + n) % biosize; - FSDBG(515, 0xb1ffa000, uio->uio_offset + n, eofoff, neweofoff); + FSDBG(515, 0xb1ffa000, uio_offset(uio) + n, eofoff, neweofoff); if (eofoff && (eofbn < lbn) && ((error = nfs_buf_get(np, eofbn, biosize, thd, NBLK_WRITE|NBLK_ONLYVALID, &eofbp)))) @@ -2111,11 +2207,11 @@ nfs_vnop_write( } } } - np->n_size = uio->uio_offset + n; - nfs_lock(np, NFS_NODE_LOCK_FORCE); + np->n_size = uio_offset(uio) + n; + nfs_node_lock_force(np); CLR(np->n_flag, NUPDATESIZE); np->n_flag |= NMODIFIED; - nfs_unlock(np); + nfs_node_unlock(np); FSDBG(516, np, np->n_size, np->n_vattr.nva_size, 0xf00d0001); ubc_setsize(vp, (off_t)np->n_size); /* XXX errors */ if (eofbp) { @@ -2222,29 +2318,20 @@ nfs_vnop_write( NFS_BUF_MAP(bp); /* setup uio for read(s) */ boff = NBOFF(bp); - auio.uio_iovs.iov32p = &iov; - auio.uio_iovcnt = 1; -#if 1 /* LP64todo - can't use new segment flags until the drivers are ready */ - auio.uio_segflg = UIO_SYSSPACE; -#else - auio.uio_segflg = UIO_SYSSPACE32; -#endif - auio.uio_rw = UIO_READ; + auio = uio_createwithbuffer(1, 0, UIO_SYSSPACE, UIO_READ, + &auio_buf, sizeof(auio_buf)); if (dirtypg <= (end-1)/PAGE_SIZE) { /* there's a dirty page in the way, so just do two reads */ /* we'll read the preceding data here */ - auio.uio_offset = boff + start; - iov.iov_len = on - start; - uio_uio_resid_set(&auio, iov.iov_len); - iov.iov_base = (uintptr_t) bp->nb_data + start; - error = nfs_read_rpc(np, &auio, ctx); + uio_reset(auio, boff + start, UIO_SYSSPACE, UIO_READ); + uio_addiov(auio, CAST_USER_ADDR_T(bp->nb_data + start), on - start); + error = nfs_read_rpc(np, auio, ctx); if (error) /* couldn't read the data, so treat buffer as NOCACHE */ SET(bp->nb_flags, (NB_NOCACHE|NB_STABLE)); - if (uio_uio_resid(&auio) > 0) { - FSDBG(516, bp, (caddr_t)iov.iov_base - bp->nb_data, uio_uio_resid(&auio), 0xd00dee01); - // LP64todo - fix this - bzero((caddr_t)iov.iov_base, uio_uio_resid(&auio)); + if (uio_resid(auio) > 0) { + FSDBG(516, bp, (caddr_t)uio_curriovbase(auio) - bp->nb_data, uio_resid(auio), 0xd00dee01); + bzero(CAST_DOWN(caddr_t, uio_curriovbase(auio)), uio_resid(auio)); } if (!error) { /* update validoff/validend if necessary */ @@ -2285,17 +2372,14 @@ nfs_vnop_write( error = 0; } else if (!ISSET(bp->nb_flags, NB_NOCACHE)) { /* now we'll read the (rest of the) data */ - auio.uio_offset = boff + start; - iov.iov_len = end - start; - uio_uio_resid_set(&auio, iov.iov_len); - iov.iov_base = (uintptr_t) (bp->nb_data + start); - error = nfs_read_rpc(np, &auio, ctx); + uio_reset(auio, boff + start, UIO_SYSSPACE, UIO_READ); + uio_addiov(auio, CAST_USER_ADDR_T(bp->nb_data + start), end - start); + error = nfs_read_rpc(np, auio, ctx); if (error) /* couldn't read the data, so treat buffer as NOCACHE */ SET(bp->nb_flags, (NB_NOCACHE|NB_STABLE)); - if (uio_uio_resid(&auio) > 0) { - FSDBG(516, bp, (caddr_t)iov.iov_base - bp->nb_data, uio_uio_resid(&auio), 0xd00dee02); - // LP64todo - fix this - bzero((caddr_t)iov.iov_base, uio_uio_resid(&auio)); + if (uio_resid(auio) > 0) { + FSDBG(516, bp, (caddr_t)uio_curriovbase(auio) - bp->nb_data, uio_resid(auio), 0xd00dee02); + bzero(CAST_DOWN(caddr_t, uio_curriovbase(auio)), uio_resid(auio)); } } if (!error) { @@ -2307,10 +2391,10 @@ nfs_vnop_write( if ((off_t)np->n_size > boff + bp->nb_validend) bp->nb_validend = min(np->n_size - (boff + start), biosize); /* validate any pages before the write offset's page */ - for (; start < trunc_page_32(on); start+=PAGE_SIZE) + for (; start < (off_t)trunc_page_32(on); start+=PAGE_SIZE) NBPGVALID_SET(bp, start/PAGE_SIZE); /* validate any pages after the range of pages being written to */ - for (; (end - 1) > round_page_32(on+n-1); end-=PAGE_SIZE) + for (; (end - 1) > (off_t)round_page_32(on+n-1); end-=PAGE_SIZE) NBPGVALID_SET(bp, (end-1)/PAGE_SIZE); } /* Note: pages being written to will be validated when written */ @@ -2323,9 +2407,9 @@ nfs_vnop_write( goto out; } - nfs_lock(np, NFS_NODE_LOCK_FORCE); + nfs_node_lock_force(np); np->n_flag |= NMODIFIED; - nfs_unlock(np); + nfs_node_unlock(np); NFS_BUF_MAP(bp); error = uiomove((char *)bp->nb_data + on, n, uio); @@ -2372,20 +2456,21 @@ nfs_vnop_write( * again and not just committed. */ if (ISSET(bp->nb_flags, NB_NEEDCOMMIT)) { - nfs_lock(np, NFS_NODE_LOCK_FORCE); + nfs_node_lock_force(np); if (ISSET(bp->nb_flags, NB_NEEDCOMMIT)) { np->n_needcommitcnt--; CHECK_NEEDCOMMITCNT(np); } CLR(bp->nb_flags, NB_NEEDCOMMIT); - nfs_unlock(np); + nfs_node_unlock(np); } if (ioflag & IO_SYNC) { error = nfs_buf_write(bp); if (error) goto out; - } else if (((n + on) == biosize) || (ioflag & IO_NOCACHE) || ISSET(bp->nb_flags, NB_NOCACHE)) { + } else if (((n + on) == biosize) || (ioflag & IO_APPEND) || + (ioflag & IO_NOCACHE) || ISSET(bp->nb_flags, NB_NOCACHE)) { SET(bp->nb_flags, NB_ASYNC); error = nfs_buf_write(bp); if (error) @@ -2395,21 +2480,21 @@ nfs_vnop_write( if (!ISSET(bp->nb_flags, NB_DELWRI)) { proc_t p = vfs_context_proc(ctx); if (p && p->p_stats) - OSIncrementAtomic(&p->p_stats->p_ru.ru_oublock); + OSIncrementAtomicLong(&p->p_stats->p_ru.ru_oublock); } nfs_buf_write_delayed(bp); } if (np->n_needcommitcnt >= NFS_A_LOT_OF_NEEDCOMMITS) nfs_flushcommits(np, 1); - } while (uio_uio_resid(uio) > 0 && n > 0); + } while (uio_resid(uio) > 0 && n > 0); out: - nfs_lock(np, NFS_NODE_LOCK_FORCE); + nfs_node_lock_force(np); np->n_wrbusy--; - nfs_unlock(np); + nfs_node_unlock(np); nfs_data_unlock(np); - FSDBG_BOT(515, np, uio->uio_offset, uio_uio_resid(uio), error); + FSDBG_BOT(515, np, uio_offset(uio), uio_resid(uio), error); return (error); } @@ -2420,61 +2505,77 @@ nfs_vnop_write( int nfs_write_rpc( nfsnode_t np, - struct uio *uiop, + uio_t uio, vfs_context_t ctx, int *iomodep, uint64_t *wverfp) { - return nfs_write_rpc2(np, uiop, vfs_context_thread(ctx), vfs_context_ucred(ctx), iomodep, wverfp); + return nfs_write_rpc2(np, uio, vfs_context_thread(ctx), vfs_context_ucred(ctx), iomodep, wverfp); } int nfs_write_rpc2( nfsnode_t np, - struct uio *uiop, + uio_t uio, thread_t thd, kauth_cred_t cred, int *iomodep, uint64_t *wverfp) { struct nfsmount *nmp; - int error = 0, nfsvers, restart; + int error = 0, nfsvers; int backup, wverfset, commit, committed; uint64_t wverf = 0, wverf2; size_t nmwsize, totalsize, tsiz, len, rlen; struct nfsreq rq, *req = &rq; + uint32_t stategenid = 0, vrestart = 0, restart = 0; #if DIAGNOSTIC /* XXX limitation based on need to back up uio on short write */ - if (uiop->uio_iovcnt != 1) + if (uio_iovcnt(uio) != 1) panic("nfs3_write_rpc: iovcnt > 1"); #endif - FSDBG_TOP(537, np, uiop->uio_offset, uio_uio_resid(uiop), *iomodep); + FSDBG_TOP(537, np, uio_offset(uio), uio_resid(uio), *iomodep); nmp = NFSTONMP(np); if (!nmp) return (ENXIO); nfsvers = nmp->nm_vers; nmwsize = nmp->nm_wsize; - restart = wverfset = 0; + wverfset = 0; committed = NFS_WRITE_FILESYNC; - // LP64todo - fix this - totalsize = tsiz = uio_uio_resid(uiop); - if (((u_int64_t)uiop->uio_offset + (unsigned int)tsiz > 0xffffffff) && (nfsvers == NFS_VER2)) { - FSDBG_BOT(537, np, uiop->uio_offset, uio_uio_resid(uiop), EFBIG); + totalsize = tsiz = uio_resid(uio); + if ((nfsvers == NFS_VER2) && ((uint64_t)(uio_offset(uio) + tsiz) > 0xffffffffULL)) { + FSDBG_BOT(537, np, uio_offset(uio), uio_resid(uio), EFBIG); return (EFBIG); } while (tsiz > 0) { len = (tsiz > nmwsize) ? nmwsize : tsiz; - FSDBG(537, np, uiop->uio_offset, len, 0); - error = nmp->nm_funcs->nf_write_rpc_async(np, uiop, len, thd, cred, *iomodep, NULL, &req); + FSDBG(537, np, uio_offset(uio), len, 0); + if (nmp->nm_vers >= NFS_VER4) + stategenid = nmp->nm_stategenid; + error = nmp->nm_funcs->nf_write_rpc_async(np, uio, len, thd, cred, *iomodep, NULL, &req); if (!error) error = nmp->nm_funcs->nf_write_rpc_async_finish(np, req, &commit, &rlen, &wverf2); nmp = NFSTONMP(np); if (!nmp) error = ENXIO; + if ((nmp->nm_vers >= NFS_VER4) && nfs_mount_state_error_should_restart(error) && + (++restart <= nfs_mount_state_max_restarts(nmp))) { /* guard against no progress */ + lck_mtx_lock(&nmp->nm_lock); + if ((error != NFSERR_GRACE) && (stategenid == nmp->nm_stategenid) && !(nmp->nm_state & NFSSTA_RECOVER)) { + printf("nfs_write_rpc: error %d, initiating recovery\n", error); + nmp->nm_state |= NFSSTA_RECOVER; + nfs_mount_sock_thread_wake(nmp); + } + lck_mtx_unlock(&nmp->nm_lock); + if (error == NFSERR_GRACE) + tsleep(&nmp->nm_state, (PZERO-1), "nfsgrace", 2*hz); + if (!(error = nfs_mount_state_wait_for_recovery(nmp))) + continue; + } if (error) break; if (nfsvers == NFS_VER2) { @@ -2485,10 +2586,7 @@ nfs_write_rpc2( /* check for a short write */ if (rlen < len) { backup = len - rlen; - uio_iov_base_add(uiop, -backup); - uio_iov_len_add(uiop, backup); - uiop->uio_offset -= backup; - uio_uio_resid_add(uiop, backup); + uio_pushback(uio, backup); len = rlen; } @@ -2504,16 +2602,13 @@ nfs_write_rpc2( wverfset = 1; } else if (wverf != wverf2) { /* verifier changed, so we need to restart all the writes */ - if (++restart > 10) { + if (++vrestart > 100) { /* give up after too many restarts */ error = EIO; break; } backup = totalsize - tsiz; - uio_iov_base_add(uiop, -backup); - uio_iov_len_add(uiop, backup); - uiop->uio_offset -= backup; - uio_uio_resid_add(uiop, backup); + uio_pushback(uio, backup); committed = NFS_WRITE_FILESYNC; wverfset = 0; tsiz = totalsize; @@ -2523,15 +2618,15 @@ nfs_write_rpc2( *wverfp = wverf; *iomodep = committed; if (error) - uio_uio_resid_set(uiop, tsiz); - FSDBG_BOT(537, np, committed, uio_uio_resid(uiop), error); + uio_setresid(uio, tsiz); + FSDBG_BOT(537, np, committed, uio_resid(uio), error); return (error); } int nfs3_write_rpc_async( nfsnode_t np, - struct uio *uiop, + uio_t uio, size_t len, thread_t thd, kauth_cred_t cred, @@ -2541,7 +2636,6 @@ nfs3_write_rpc_async( { struct nfsmount *nmp; int error = 0, nfsvers; - off_t offset; struct nfsm_chain nmreq; nmp = NFSTONMP(np); @@ -2549,24 +2643,22 @@ nfs3_write_rpc_async( return (ENXIO); nfsvers = nmp->nm_vers; - offset = uiop->uio_offset; - nfsm_chain_null(&nmreq); nfsm_chain_build_alloc_init(error, &nmreq, NFSX_FH(nfsvers) + 5 * NFSX_UNSIGNED + nfsm_rndup(len)); nfsm_chain_add_fh(error, &nmreq, nfsvers, np->n_fhp, np->n_fhsize); if (nfsvers == NFS_VER3) { - nfsm_chain_add_64(error, &nmreq, offset); + nfsm_chain_add_64(error, &nmreq, uio_offset(uio)); nfsm_chain_add_32(error, &nmreq, len); nfsm_chain_add_32(error, &nmreq, iomode); } else { nfsm_chain_add_32(error, &nmreq, 0); - nfsm_chain_add_32(error, &nmreq, offset); + nfsm_chain_add_32(error, &nmreq, uio_offset(uio)); nfsm_chain_add_32(error, &nmreq, 0); } nfsm_chain_add_32(error, &nmreq, len); nfsmout_if(error); - error = nfsm_chain_add_uio(&nmreq, uiop, len); + error = nfsm_chain_add_uio(&nmreq, uio, len); nfsm_chain_build_done(error, &nmreq); nfsmout_if(error); error = nfs_request_async(np, NULL, &nmreq, NFSPROC_WRITE, thd, cred, cb, reqp); @@ -2605,7 +2697,7 @@ nfs3_write_rpc_async_finish( nmp = NFSTONMP(np); if (!nmp) error = ENXIO; - if (!error && (lockerror = nfs_lock(np, NFS_NODE_LOCK_EXCLUSIVE))) + if (!error && (lockerror = nfs_node_lock(np))) error = lockerror; if (nfsvers == NFS_VER3) { struct timespec premtime = { 0, 0 }; @@ -2642,7 +2734,7 @@ nfs3_write_rpc_async_finish( NFS_CHANGED_UPDATE(nfsvers, np, &np->n_vattr); nfsmout: if (!lockerror) - nfs_unlock(np); + nfs_node_unlock(np); nfsm_chain_cleanup(&nmrep); if ((committed != NFS_WRITE_FILESYNC) && nfs_allow_async && ((mp = NFSTOMP(np))) && (vfs_flags(mp) & MNT_ASYNC)) @@ -2657,7 +2749,7 @@ nfs3_write_rpc_async_finish( * For NFS v2 this is a kludge. Use a create RPC but with the IFMT bits of the * mode set to specify the file type and the size field for rdev. */ -static int +int nfs3_vnop_mknod( struct vnop_mknod_args /* { struct vnodeop_desc *a_desc; @@ -2679,9 +2771,9 @@ nfs3_vnop_mknod( nfsnode_t dnp = VTONFS(dvp); struct nfs_vattr nvattr, dnvattr; fhandle_t fh; - int error = 0, lockerror = ENOENT, status, wccpostattr = 0; + int error = 0, lockerror = ENOENT, busyerror = ENOENT, status, wccpostattr = 0; struct timespec premtime = { 0, 0 }; - u_long rdev; + u_int32_t rdev; u_int64_t xid, dxid; int nfsvers, gotuid, gotgid; struct nfsm_chain nmreq, nmrep; @@ -2733,13 +2825,14 @@ nfs3_vnop_mknod( nfsm_chain_add_v2sattr(error, &nmreq, vap, rdev); } nfsm_chain_build_done(error, &nmreq); - nfsmout_if(error); - if ((lockerror = nfs_lock(dnp, NFS_NODE_LOCK_EXCLUSIVE))) - error = lockerror; + if (!error) + error = busyerror = nfs_node_set_busy(dnp, vfs_context_thread(ctx)); nfsmout_if(error); error = nfs_request(dnp, NULL, &nmreq, NFSPROC_MKNOD, ctx, &nmrep, &xid, &status); + if ((lockerror = nfs_node_lock(dnp))) + error = lockerror; /* XXX no EEXIST kludge here? */ dxid = xid; if (!error && !status) { @@ -2762,15 +2855,9 @@ nfs3_vnop_mknod( /* if directory hadn't changed, update namecache mtime */ if (nfstimespeccmp(&dnp->n_ncmtime, &premtime, ==)) NFS_CHANGED_UPDATE_NC(nfsvers, dnp, &dnp->n_vattr); - if (!wccpostattr) - NATTRINVALIDATE(dnp); - if (!nfs_getattr(dnp, &dnvattr, ctx, 1)) { - if (NFS_CHANGED_NC(nfsvers, dnp, &dnvattr)) { - dnp->n_flag &= ~NNEGNCENTRIES; - cache_purge(dvp); - NFS_CHANGED_UPDATE_NC(nfsvers, dnp, &dnvattr); - } - } + nfs_node_unlock(dnp); + /* nfs_getattr() will check changed and purge caches */ + nfs_getattr(dnp, &dnvattr, ctx, wccpostattr ? NGA_CACHED : NGA_UNCACHED); } if (!error && fh.fh_len) @@ -2779,11 +2866,11 @@ nfs3_vnop_mknod( error = nfs_lookitup(dnp, cnp->cn_nameptr, cnp->cn_namelen, ctx, &np); if (!error && np) newvp = NFSTOV(np); - if (!lockerror) - nfs_unlock(dnp); + if (!busyerror) + nfs_node_clear_busy(dnp); if (!error && (gotuid || gotgid) && - (!newvp || nfs_getattrcache(np, &nvattr, 1) || + (!newvp || nfs_getattrcache(np, &nvattr) || (gotuid && (nvattr.nva_uid != vap->va_uid)) || (gotgid && (nvattr.nva_gid != vap->va_gid)))) { /* clear ID bits if server didn't use them (or we can't tell) */ @@ -2792,21 +2879,21 @@ nfs3_vnop_mknod( } if (error) { if (newvp) { - nfs_unlock(np); + nfs_node_unlock(np); vnode_put(newvp); } } else { *vpp = newvp; - nfs_unlock(np); + nfs_node_unlock(np); } return (error); } -static u_long create_verf; +static uint32_t create_verf; /* * NFS file create call */ -static int +int nfs3_vnop_create( struct vnop_create_args /* { struct vnodeop_desc *a_desc; @@ -2827,7 +2914,7 @@ nfs3_vnop_create( struct nfsmount *nmp; nfsnode_t dnp = VTONFS(dvp); vnode_t newvp = NULL; - int error = 0, lockerror = ENOENT, status, wccpostattr = 0, fmode = 0; + int error = 0, lockerror = ENOENT, busyerror = ENOENT, status, wccpostattr = 0, fmode = 0; struct timespec premtime = { 0, 0 }; int nfsvers, gotuid, gotgid; u_int64_t xid, dxid; @@ -2858,7 +2945,8 @@ nfs3_vnop_create( again: req = NULL; - nfs_dulookup_init(&dul, dnp, cnp->cn_nameptr, cnp->cn_namelen); + error = busyerror = nfs_node_set_busy(dnp, vfs_context_thread(ctx)); + nfs_dulookup_init(&dul, dnp, cnp->cn_nameptr, cnp->cn_namelen, ctx); nfsm_chain_null(&nmreq); nfsm_chain_null(&nmrep); @@ -2871,10 +2959,12 @@ nfs3_vnop_create( if (nfsvers == NFS_VER3) { if (fmode & O_EXCL) { nfsm_chain_add_32(error, &nmreq, NFS_CREATE_EXCLUSIVE); + lck_rw_lock_shared(in_ifaddr_rwlock); if (!TAILQ_EMPTY(&in_ifaddrhead)) val = IA_SIN(in_ifaddrhead.tqh_first)->sin_addr.s_addr; else val = create_verf; + lck_rw_done(in_ifaddr_rwlock); nfsm_chain_add_32(error, &nmreq, val); ++create_verf; nfsm_chain_add_32(error, &nmreq, create_verf); @@ -2887,9 +2977,6 @@ nfs3_vnop_create( } nfsm_chain_build_done(error, &nmreq); nfsmout_if(error); - if ((lockerror = nfs_lock(dnp, NFS_NODE_LOCK_EXCLUSIVE))) - error = lockerror; - nfsmout_if(error); error = nfs_request_async(dnp, NULL, &nmreq, NFSPROC_CREATE, vfs_context_thread(ctx), vfs_context_ucred(ctx), NULL, &req); @@ -2898,6 +2985,8 @@ nfs3_vnop_create( error = nfs_request_async_finish(req, &nmrep, &xid, &status); } + if ((lockerror = nfs_node_lock(dnp))) + error = lockerror; dxid = xid; if (!error && !status) { if (dnp->n_flag & NNEGNCENTRIES) { @@ -2919,15 +3008,9 @@ nfs3_vnop_create( /* if directory hadn't changed, update namecache mtime */ if (nfstimespeccmp(&dnp->n_ncmtime, &premtime, ==)) NFS_CHANGED_UPDATE_NC(nfsvers, dnp, &dnp->n_vattr); - if (!wccpostattr) - NATTRINVALIDATE(dnp); - if (!nfs_getattr(dnp, &dnvattr, ctx, 1)) { - if (NFS_CHANGED_NC(nfsvers, dnp, &dnvattr)) { - dnp->n_flag &= ~NNEGNCENTRIES; - cache_purge(dvp); - NFS_CHANGED_UPDATE_NC(nfsvers, dnp, &dnvattr); - } - } + nfs_node_unlock(dnp); + /* nfs_getattr() will check changed and purge caches */ + nfs_getattr(dnp, &dnvattr, ctx, wccpostattr ? NGA_CACHED : NGA_UNCACHED); } if (!error && fh.fh_len) @@ -2938,8 +3021,8 @@ nfs3_vnop_create( newvp = NFSTOV(np); nfs_dulookup_finish(&dul, dnp, ctx); - if (!lockerror) - nfs_unlock(dnp); + if (!busyerror) + nfs_node_clear_busy(dnp); if (error) { if ((nfsvers == NFS_VER3) && (fmode & O_EXCL) && (error == NFSERR_NOTSUPP)) { @@ -2947,27 +3030,28 @@ nfs3_vnop_create( goto again; } if (newvp) { - nfs_unlock(np); + nfs_node_unlock(np); vnode_put(newvp); } } else if ((nfsvers == NFS_VER3) && (fmode & O_EXCL)) { - error = nfs3_setattr_rpc(np, vap, ctx, 1); + nfs_node_unlock(np); + error = nfs3_setattr_rpc(np, vap, ctx); if (error && (gotuid || gotgid)) { /* it's possible the server didn't like our attempt to set IDs. */ /* so, let's try it again without those */ VATTR_CLEAR_ACTIVE(vap, va_uid); VATTR_CLEAR_ACTIVE(vap, va_gid); - error = nfs3_setattr_rpc(np, vap, ctx, 1); + error = nfs3_setattr_rpc(np, vap, ctx); } - if (error) { - nfs_unlock(np); + if (error) vnode_put(newvp); - } + else + nfs_node_lock_force(np); } if (!error) *ap->a_vpp = newvp; if (!error && (gotuid || gotgid) && - (!newvp || nfs_getattrcache(np, &nvattr, 1) || + (!newvp || nfs_getattrcache(np, &nvattr) || (gotuid && (nvattr.nva_uid != vap->va_uid)) || (gotgid && (nvattr.nva_gid != vap->va_gid)))) { /* clear ID bits if server didn't use them (or we can't tell) */ @@ -2975,7 +3059,7 @@ nfs3_vnop_create( VATTR_CLEAR_SUPPORTED(vap, va_gid); } if (!error) - nfs_unlock(np); + nfs_node_unlock(np); return (error); } @@ -2990,7 +3074,7 @@ nfs3_vnop_create( * else * do the remove RPC */ -static int +int nfs_vnop_remove( struct vnop_remove_args /* { struct vnodeop_desc *a_desc; @@ -3020,7 +3104,7 @@ nfs_vnop_remove( nfsvers = nmp->nm_vers; again_relock: - error = nfs_lock2(dnp, np, NFS_NODE_LOCK_EXCLUSIVE); + error = nfs_node_set_busy2(dnp, np, vfs_context_thread(ctx)); if (error) return (error); @@ -3033,7 +3117,7 @@ nfs_vnop_remove( np->n_hflag |= NHLOCKED; lck_mtx_unlock(nfs_node_hash_mutex); - nfs_dulookup_init(&dul, dnp, cnp->cn_nameptr, cnp->cn_namelen); + nfs_dulookup_init(&dul, dnp, cnp->cn_nameptr, cnp->cn_namelen, ctx); again: inuse = vnode_isinuse(vp, 0); if ((ap->a_flags & VNODE_REMOVE_NODELETEBUSY) && inuse) { @@ -3042,7 +3126,7 @@ nfs_vnop_remove( goto out; } if (inuse && !gotattr) { - if (nfs_getattr(np, &nvattr, ctx, 1)) + if (nfs_getattr(np, &nvattr, ctx, NGA_CACHED)) nvattr.nva_nlink = 1; gotattr = 1; goto again; @@ -3058,25 +3142,33 @@ nfs_vnop_remove( wakeup(np); } lck_mtx_unlock(nfs_node_hash_mutex); - nfs_unlock2(dnp, np); + nfs_node_clear_busy2(dnp, np); error = nfs_vinvalbuf(vp, V_SAVE, ctx, 1); FSDBG(260, np, np->n_size, np->n_vattr.nva_size, 0xf00d0011); flushed = 1; if (error == EINTR) { - nfs_lock(np, NFS_NODE_LOCK_FORCE); + nfs_node_lock_force(np); NATTRINVALIDATE(np); - nfs_unlock(np); + nfs_node_unlock(np); return (error); } goto again_relock; } + if ((nmp->nm_vers >= NFS_VER4) && (np->n_openflags & N_DELEG_MASK)) { + lck_mtx_lock(&np->n_openlock); + np->n_openflags &= ~N_DELEG_MASK; + lck_mtx_unlock(&np->n_openlock); + nfs4_delegreturn_rpc(nmp, np->n_fhp, np->n_fhsize, &np->n_dstateid, + vfs_context_thread(ctx), vfs_context_ucred(ctx)); + } + /* * Purge the name cache so that the chance of a lookup for * the name succeeding while the remove is in progress is * minimized. */ - cache_purge(vp); + nfs_name_cache_purge(dnp, np, cnp, ctx); nfs_dulookup_start(&dul, dnp, ctx); @@ -3109,29 +3201,32 @@ nfs_vnop_remove( lck_mtx_unlock(nfs_node_hash_mutex); /* clear flags now: won't get nfs_vnop_inactive for recycled vnode */ /* clear all flags other than these */ + nfs_node_lock_force(np); np->n_flag &= (NMODIFIED); - vnode_recycle(vp); NATTRINVALIDATE(np); + nfs_node_unlock(np); + vnode_recycle(vp); setsize = 1; } else { + nfs_node_lock_force(np); NATTRINVALIDATE(np); + nfs_node_unlock(np); } } else if (!np->n_sillyrename) { nfs_dulookup_start(&dul, dnp, ctx); error = nfs_sillyrename(dnp, np, cnp, ctx); + nfs_node_lock_force(np); NATTRINVALIDATE(np); + nfs_node_unlock(np); } else { + nfs_node_lock_force(np); NATTRINVALIDATE(np); + nfs_node_unlock(np); nfs_dulookup_start(&dul, dnp, ctx); } - if (!nfs_getattr(dnp, &nvattr, ctx, 1)) { - if (NFS_CHANGED_NC(nfsvers, dnp, &nvattr)) { - dnp->n_flag &= ~NNEGNCENTRIES; - cache_purge(dvp); - NFS_CHANGED_UPDATE_NC(nfsvers, dnp, &nvattr); - } - } + /* nfs_getattr() will check changed and purge caches */ + nfs_getattr(dnp, &nvattr, ctx, NGA_CACHED); nfs_dulookup_finish(&dul, dnp, ctx); out: /* unlock the node */ @@ -3142,7 +3237,7 @@ nfs_vnop_remove( wakeup(np); } lck_mtx_unlock(nfs_node_hash_mutex); - nfs_unlock2(dnp, np); + nfs_node_clear_busy2(dnp, np); if (setsize) ubc_setsize(vp, 0); return (error); @@ -3171,7 +3266,7 @@ nfs3_remove_rpc( thread_t thd, kauth_cred_t cred) { - int error = 0, status, wccpostattr = 0; + int error = 0, lockerror = ENOENT, status, wccpostattr = 0; struct timespec premtime = { 0, 0 }; struct nfsmount *nmp; int nfsvers; @@ -3197,8 +3292,11 @@ nfs3_remove_rpc( error = nfs_request2(dnp, NULL, &nmreq, NFSPROC_REMOVE, thd, cred, 0, &nmrep, &xid, &status); + if ((lockerror = nfs_node_lock(dnp))) + error = lockerror; if (nfsvers == NFS_VER3) nfsm_chain_get_wcc_data(error, &nmrep, dnp, &premtime, &wccpostattr, &xid); + nfsmout_if(error); dnp->n_flag |= NMODIFIED; /* if directory hadn't changed, update namecache mtime */ if (nfstimespeccmp(&dnp->n_ncmtime, &premtime, ==)) @@ -3208,6 +3306,8 @@ nfs3_remove_rpc( if (!error) error = status; nfsmout: + if (!lockerror) + nfs_node_unlock(dnp); nfsm_chain_cleanup(&nmreq); nfsm_chain_cleanup(&nmrep); return (error); @@ -3216,7 +3316,7 @@ nfs3_remove_rpc( /* * NFS file rename call */ -static int +int nfs_vnop_rename( struct vnop_rename_args /* { struct vnodeop_desc *a_desc; @@ -3241,7 +3341,6 @@ nfs_vnop_rename( mount_t fmp, tdmp, tmp; struct nfs_vattr nvattr; struct nfsmount *nmp; - struct nfs_dulookup fdul, tdul; fdnp = VTONFS(fdvp); fnp = VTONFS(fvp); @@ -3253,7 +3352,7 @@ nfs_vnop_rename( return (ENXIO); nfsvers = nmp->nm_vers; - error = nfs_lock4(fdnp, fnp, tdnp, tnp, NFS_NODE_LOCK_EXCLUSIVE); + error = nfs_node_set_busy4(fdnp, fnp, tdnp, tnp, vfs_context_thread(ctx)); if (error) return (error); @@ -3269,9 +3368,6 @@ nfs_vnop_rename( locked = 1; } - nfs_dulookup_init(&fdul, fdnp, fcnp->cn_nameptr, fcnp->cn_namelen); - nfs_dulookup_init(&tdul, tdnp, tcnp->cn_nameptr, tcnp->cn_namelen); - /* Check for cross-device rename */ fmp = vnode_mount(fvp); tmp = tvp ? vnode_mount(tvp) : NULL; @@ -3301,11 +3397,14 @@ nfs_vnop_rename( /* sillyrename succeeded.*/ tvp = NULL; } + } else if (tvp && (nmp->nm_vers >= NFS_VER4) && (tnp->n_openflags & N_DELEG_MASK)) { + lck_mtx_lock(&tnp->n_openlock); + tnp->n_openflags &= ~N_DELEG_MASK; + lck_mtx_unlock(&tnp->n_openlock); + nfs4_delegreturn_rpc(nmp, tnp->n_fhp, tnp->n_fhsize, &tnp->n_dstateid, + vfs_context_thread(ctx), vfs_context_ucred(ctx)); } - nfs_dulookup_start(&fdul, fdnp, ctx); - nfs_dulookup_start(&tdul, tdnp, ctx); - error = nmp->nm_funcs->nf_rename_rpc(fdnp, fcnp->cn_nameptr, fcnp->cn_namelen, tdnp, tcnp->cn_nameptr, tcnp->cn_namelen, ctx); @@ -3316,8 +3415,10 @@ nfs_vnop_rename( error = 0; if (tvp && (tvp != fvp) && !tnp->n_sillyrename) { + nfs_node_lock_force(tnp); tvprecycle = (!error && !vnode_isinuse(tvp, 0) && - (nfs_getattrcache(tnp, &nvattr, 1) || (nvattr.nva_nlink == 1))); + (nfs_getattrcache(tnp, &nvattr) || (nvattr.nva_nlink == 1))); + nfs_node_unlock(tnp); lck_mtx_lock(nfs_node_hash_mutex); if (tvprecycle && (tnp->n_hflag & NHHASHED)) { /* @@ -3333,21 +3434,26 @@ nfs_vnop_rename( } /* purge the old name cache entries and enter the new one */ - cache_purge(fvp); + nfs_name_cache_purge(fdnp, fnp, fcnp, ctx); if (tvp) { - cache_purge(tvp); + nfs_name_cache_purge(tdnp, tnp, tcnp, ctx); if (tvprecycle) { /* clear flags now: won't get nfs_vnop_inactive for recycled vnode */ /* clear all flags other than these */ + nfs_node_lock_force(tnp); tnp->n_flag &= (NMODIFIED); + nfs_node_unlock(tnp); vnode_recycle(tvp); } } if (!error) { + nfs_node_lock_force(tdnp); if (tdnp->n_flag & NNEGNCENTRIES) { tdnp->n_flag &= ~NNEGNCENTRIES; cache_purge_negatives(tdvp); } + nfs_node_unlock(tdnp); + nfs_node_lock_force(fnp); cache_enter(tdvp, fvp, tcnp); if (tdvp != fdvp) { /* update parent pointer */ if (fnp->n_parent && !vnode_get(fnp->n_parent)) { @@ -3364,24 +3470,12 @@ nfs_vnop_rename( fnp->n_parent = NULL; } } + nfs_node_unlock(fnp); } out: - if (!nfs_getattr(fdnp, &nvattr, ctx, 1)) { - if (NFS_CHANGED_NC(nfsvers, fdnp, &nvattr)) { - fdnp->n_flag &= ~NNEGNCENTRIES; - cache_purge(fdvp); - NFS_CHANGED_UPDATE_NC(nfsvers, fdnp, &nvattr); - } - } - if (!nfs_getattr(tdnp, &nvattr, ctx, 1)) { - if (NFS_CHANGED_NC(nfsvers, tdnp, &nvattr)) { - tdnp->n_flag &= ~NNEGNCENTRIES; - cache_purge(tdvp); - NFS_CHANGED_UPDATE_NC(nfsvers, tdnp, &nvattr); - } - } - nfs_dulookup_finish(&fdul, fdnp, ctx); - nfs_dulookup_finish(&tdul, tdnp, ctx); + /* nfs_getattr() will check changed and purge caches */ + nfs_getattr(fdnp, &nvattr, ctx, NGA_CACHED); + nfs_getattr(tdnp, &nvattr, ctx, NGA_CACHED); if (locked) { /* unlock node */ lck_mtx_lock(nfs_node_hash_mutex); @@ -3392,7 +3486,7 @@ nfs_vnop_rename( } lck_mtx_unlock(nfs_node_hash_mutex); } - nfs_unlock4(fdnp, fnp, tdnp, tnp); + nfs_node_clear_busy4(fdnp, fnp, tdnp, tnp); return (error); } @@ -3409,7 +3503,7 @@ nfs3_rename_rpc( int tnamelen, vfs_context_t ctx) { - int error = 0, status, fwccpostattr = 0, twccpostattr = 0; + int error = 0, lockerror = ENOENT, status, fwccpostattr = 0, twccpostattr = 0; struct timespec fpremtime = { 0, 0 }, tpremtime = { 0, 0 }; struct nfsmount *nmp; int nfsvers; @@ -3439,6 +3533,8 @@ nfs3_rename_rpc( error = nfs_request(fdnp, NULL, &nmreq, NFSPROC_RENAME, ctx, &nmrep, &xid, &status); + if ((lockerror = nfs_node_lock2(fdnp, tdnp))) + error = lockerror; if (nfsvers == NFS_VER3) { txid = xid; nfsm_chain_get_wcc_data(error, &nmrep, fdnp, &fpremtime, &fwccpostattr, &xid); @@ -3449,25 +3545,28 @@ nfs3_rename_rpc( nfsmout: nfsm_chain_cleanup(&nmreq); nfsm_chain_cleanup(&nmrep); - fdnp->n_flag |= NMODIFIED; - /* if directory hadn't changed, update namecache mtime */ - if (nfstimespeccmp(&fdnp->n_ncmtime, &fpremtime, ==)) - NFS_CHANGED_UPDATE_NC(nfsvers, fdnp, &fdnp->n_vattr); - if (!fwccpostattr) - NATTRINVALIDATE(fdnp); - tdnp->n_flag |= NMODIFIED; - /* if directory hadn't changed, update namecache mtime */ - if (nfstimespeccmp(&tdnp->n_ncmtime, &tpremtime, ==)) - NFS_CHANGED_UPDATE_NC(nfsvers, tdnp, &tdnp->n_vattr); - if (!twccpostattr) - NATTRINVALIDATE(tdnp); + if (!lockerror) { + fdnp->n_flag |= NMODIFIED; + /* if directory hadn't changed, update namecache mtime */ + if (nfstimespeccmp(&fdnp->n_ncmtime, &fpremtime, ==)) + NFS_CHANGED_UPDATE_NC(nfsvers, fdnp, &fdnp->n_vattr); + if (!fwccpostattr) + NATTRINVALIDATE(fdnp); + tdnp->n_flag |= NMODIFIED; + /* if directory hadn't changed, update namecache mtime */ + if (nfstimespeccmp(&tdnp->n_ncmtime, &tpremtime, ==)) + NFS_CHANGED_UPDATE_NC(nfsvers, tdnp, &tdnp->n_vattr); + if (!twccpostattr) + NATTRINVALIDATE(tdnp); + nfs_node_unlock2(fdnp, tdnp); + } return (error); } /* * NFS hard link create call */ -static int +int nfs3_vnop_link( struct vnop_link_args /* { struct vnodeop_desc *a_desc; @@ -3481,7 +3580,7 @@ nfs3_vnop_link( vnode_t vp = ap->a_vp; vnode_t tdvp = ap->a_tdvp; struct componentname *cnp = ap->a_cnp; - int error = 0, status, wccpostattr = 0, attrflag = 0; + int error = 0, lockerror = ENOENT, status, wccpostattr = 0, attrflag = 0; struct timespec premtime = { 0, 0 }; struct nfsmount *nmp; nfsnode_t np = VTONFS(vp); @@ -3507,7 +3606,7 @@ nfs3_vnop_link( */ nfs_flush(np, MNT_WAIT, vfs_context_thread(ctx), V_IGNORE_WRITEERR); - error = nfs_lock2(tdnp, np, NFS_NODE_LOCK_EXCLUSIVE); + error = nfs_node_set_busy2(tdnp, np, vfs_context_thread(ctx)); if (error) return (error); @@ -3523,6 +3622,11 @@ nfs3_vnop_link( nfsmout_if(error); error = nfs_request(np, NULL, &nmreq, NFSPROC_LINK, ctx, &nmrep, &xid, &status); + + if ((lockerror = nfs_node_lock2(tdnp, np))) { + error = lockerror; + goto nfsmout; + } if (nfsvers == NFS_VER3) { txid = xid; nfsm_chain_postop_attr_update_flag(error, &nmrep, np, attrflag, &xid); @@ -3533,19 +3637,22 @@ nfs3_vnop_link( nfsmout: nfsm_chain_cleanup(&nmreq); nfsm_chain_cleanup(&nmrep); - tdnp->n_flag |= NMODIFIED; - if (!attrflag) - NATTRINVALIDATE(np); - /* if directory hadn't changed, update namecache mtime */ - if (nfstimespeccmp(&tdnp->n_ncmtime, &premtime, ==)) - NFS_CHANGED_UPDATE_NC(nfsvers, tdnp, &tdnp->n_vattr); - if (!wccpostattr) - NATTRINVALIDATE(tdnp); - if (!error && (tdnp->n_flag & NNEGNCENTRIES)) { - tdnp->n_flag &= ~NNEGNCENTRIES; - cache_purge_negatives(tdvp); + if (!lockerror) { + if (!attrflag) + NATTRINVALIDATE(np); + tdnp->n_flag |= NMODIFIED; + /* if directory hadn't changed, update namecache mtime */ + if (nfstimespeccmp(&tdnp->n_ncmtime, &premtime, ==)) + NFS_CHANGED_UPDATE_NC(nfsvers, tdnp, &tdnp->n_vattr); + if (!wccpostattr) + NATTRINVALIDATE(tdnp); + if (!error && (tdnp->n_flag & NNEGNCENTRIES)) { + tdnp->n_flag &= ~NNEGNCENTRIES; + cache_purge_negatives(tdvp); + } + nfs_node_unlock2(tdnp, np); } - nfs_unlock2(tdnp, np); + nfs_node_clear_busy2(tdnp, np); /* * Kludge: Map EEXIST => 0 assuming that it is a reply to a retry. */ @@ -3557,7 +3664,7 @@ nfs3_vnop_link( /* * NFS symbolic link create call */ -static int +int nfs3_vnop_symlink( struct vnop_symlink_args /* { struct vnodeop_desc *a_desc; @@ -3575,7 +3682,7 @@ nfs3_vnop_symlink( struct componentname *cnp = ap->a_cnp; struct nfs_vattr nvattr, dnvattr; fhandle_t fh; - int slen, error = 0, lockerror = ENOENT, status, wccpostattr = 0; + int slen, error = 0, lockerror = ENOENT, busyerror = ENOENT, status, wccpostattr = 0; struct timespec premtime = { 0, 0 }; vnode_t newvp = NULL; int nfsvers, gotuid, gotgid; @@ -3606,7 +3713,8 @@ nfs3_vnop_symlink( gotuid = VATTR_IS_ACTIVE(vap, va_uid); gotgid = VATTR_IS_ACTIVE(vap, va_gid); - nfs_dulookup_init(&dul, dnp, cnp->cn_nameptr, cnp->cn_namelen); + error = busyerror = nfs_node_set_busy(dnp, vfs_context_thread(ctx)); + nfs_dulookup_init(&dul, dnp, cnp->cn_nameptr, cnp->cn_namelen, ctx); nfsm_chain_null(&nmreq); nfsm_chain_null(&nmrep); @@ -3623,9 +3731,6 @@ nfs3_vnop_symlink( nfsm_chain_add_v2sattr(error, &nmreq, vap, -1); nfsm_chain_build_done(error, &nmreq); nfsmout_if(error); - if ((lockerror = nfs_lock(dnp, NFS_NODE_LOCK_EXCLUSIVE))) - error = lockerror; - nfsmout_if(error); error = nfs_request_async(dnp, NULL, &nmreq, NFSPROC_SYMLINK, vfs_context_thread(ctx), vfs_context_ucred(ctx), NULL, &req); @@ -3634,6 +3739,8 @@ nfs3_vnop_symlink( error = nfs_request_async_finish(req, &nmrep, &xid, &status); } + if ((lockerror = nfs_node_lock(dnp))) + error = lockerror; dxid = xid; if (!error && !status) { if (dnp->n_flag & NNEGNCENTRIES) { @@ -3658,15 +3765,9 @@ nfs3_vnop_symlink( /* if directory hadn't changed, update namecache mtime */ if (nfstimespeccmp(&dnp->n_ncmtime, &premtime, ==)) NFS_CHANGED_UPDATE_NC(nfsvers, dnp, &dnp->n_vattr); - if (!wccpostattr) - NATTRINVALIDATE(dnp); - if (!nfs_getattr(dnp, &dnvattr, ctx, 1)) { - if (NFS_CHANGED_NC(nfsvers, dnp, &dnvattr)) { - dnp->n_flag &= ~NNEGNCENTRIES; - cache_purge(dvp); - NFS_CHANGED_UPDATE_NC(nfsvers, dnp, &dnvattr); - } - } + nfs_node_unlock(dnp); + /* nfs_getattr() will check changed and purge caches */ + nfs_getattr(dnp, &dnvattr, ctx, wccpostattr ? NGA_CACHED : NGA_UNCACHED); } if (!error && fh.fh_len) @@ -3682,7 +3783,7 @@ nfs3_vnop_symlink( */ if ((error == EEXIST) || (!error && !newvp)) { if (newvp) { - nfs_unlock(np); + nfs_node_unlock(np); vnode_put(newvp); newvp = NULL; } @@ -3693,10 +3794,10 @@ nfs3_vnop_symlink( error = EEXIST; } } - if (!lockerror) - nfs_unlock(dnp); + if (!busyerror) + nfs_node_clear_busy(dnp); if (!error && (gotuid || gotgid) && - (!newvp || nfs_getattrcache(np, &nvattr, 1) || + (!newvp || nfs_getattrcache(np, &nvattr) || (gotuid && (nvattr.nva_uid != vap->va_uid)) || (gotgid && (nvattr.nva_gid != vap->va_gid)))) { /* clear ID bits if server didn't use them (or we can't tell) */ @@ -3705,11 +3806,11 @@ nfs3_vnop_symlink( } if (error) { if (newvp) { - nfs_unlock(np); + nfs_node_unlock(np); vnode_put(newvp); } } else { - nfs_unlock(np); + nfs_node_unlock(np); *ap->a_vpp = newvp; } return (error); @@ -3718,7 +3819,7 @@ nfs3_vnop_symlink( /* * NFS make dir call */ -static int +int nfs3_vnop_mkdir( struct vnop_mkdir_args /* { struct vnodeop_desc *a_desc; @@ -3738,7 +3839,7 @@ nfs3_vnop_mkdir( struct nfsmount *nmp; nfsnode_t dnp = VTONFS(dvp); vnode_t newvp = NULL; - int error = 0, lockerror = ENOENT, status, wccpostattr = 0; + int error = 0, lockerror = ENOENT, busyerror = ENOENT, status, wccpostattr = 0; struct timespec premtime = { 0, 0 }; int nfsvers, gotuid, gotgid; u_int64_t xid, dxid; @@ -3763,7 +3864,8 @@ nfs3_vnop_mkdir( gotuid = VATTR_IS_ACTIVE(vap, va_uid); gotgid = VATTR_IS_ACTIVE(vap, va_gid); - nfs_dulookup_init(&dul, dnp, cnp->cn_nameptr, cnp->cn_namelen); + error = busyerror = nfs_node_set_busy(dnp, vfs_context_thread(ctx)); + nfs_dulookup_init(&dul, dnp, cnp->cn_nameptr, cnp->cn_namelen, ctx); nfsm_chain_null(&nmreq); nfsm_chain_null(&nmrep); @@ -3779,9 +3881,6 @@ nfs3_vnop_mkdir( nfsm_chain_add_v2sattr(error, &nmreq, vap, -1); nfsm_chain_build_done(error, &nmreq); nfsmout_if(error); - if ((lockerror = nfs_lock(dnp, NFS_NODE_LOCK_EXCLUSIVE))) - error = lockerror; - nfsmout_if(error); error = nfs_request_async(dnp, NULL, &nmreq, NFSPROC_MKDIR, vfs_context_thread(ctx), vfs_context_ucred(ctx), NULL, &req); @@ -3790,6 +3889,8 @@ nfs3_vnop_mkdir( error = nfs_request_async_finish(req, &nmrep, &xid, &status); } + if ((lockerror = nfs_node_lock(dnp))) + error = lockerror; dxid = xid; if (!error && !status) { if (dnp->n_flag & NNEGNCENTRIES) { @@ -3811,15 +3912,9 @@ nfs3_vnop_mkdir( /* if directory hadn't changed, update namecache mtime */ if (nfstimespeccmp(&dnp->n_ncmtime, &premtime, ==)) NFS_CHANGED_UPDATE_NC(nfsvers, dnp, &dnp->n_vattr); - if (!wccpostattr) - NATTRINVALIDATE(dnp); - if (!nfs_getattr(dnp, &dnvattr, ctx, 1)) { - if (NFS_CHANGED_NC(nfsvers, dnp, &dnvattr)) { - dnp->n_flag &= ~NNEGNCENTRIES; - cache_purge(dvp); - NFS_CHANGED_UPDATE_NC(nfsvers, dnp, &dnvattr); - } - } + nfs_node_unlock(dnp); + /* nfs_getattr() will check changed and purge caches */ + nfs_getattr(dnp, &dnvattr, ctx, wccpostattr ? NGA_CACHED : NGA_UNCACHED); } if (!error && fh.fh_len) @@ -3833,9 +3928,9 @@ nfs3_vnop_mkdir( * Kludge: Map EEXIST => 0 assuming that you have a reply to a retry * if we can succeed in looking up the directory. */ - if (error == EEXIST || (!error && !newvp)) { + if ((error == EEXIST) || (!error && !newvp)) { if (newvp) { - nfs_unlock(np); + nfs_node_unlock(np); vnode_put(newvp); newvp = NULL; } @@ -3846,10 +3941,10 @@ nfs3_vnop_mkdir( error = EEXIST; } } - if (!lockerror) - nfs_unlock(dnp); + if (!busyerror) + nfs_node_clear_busy(dnp); if (!error && (gotuid || gotgid) && - (!newvp || nfs_getattrcache(np, &nvattr, 1) || + (!newvp || nfs_getattrcache(np, &nvattr) || (gotuid && (nvattr.nva_uid != vap->va_uid)) || (gotgid && (nvattr.nva_gid != vap->va_gid)))) { /* clear ID bits if server didn't use them (or we can't tell) */ @@ -3858,11 +3953,11 @@ nfs3_vnop_mkdir( } if (error) { if (newvp) { - nfs_unlock(np); + nfs_node_unlock(np); vnode_put(newvp); } } else { - nfs_unlock(np); + nfs_node_unlock(np); *ap->a_vpp = newvp; } return (error); @@ -3871,7 +3966,7 @@ nfs3_vnop_mkdir( /* * NFS remove directory call */ -static int +int nfs3_vnop_rmdir( struct vnop_rmdir_args /* { struct vnodeop_desc *a_desc; @@ -3885,7 +3980,7 @@ nfs3_vnop_rmdir( vnode_t vp = ap->a_vp; vnode_t dvp = ap->a_dvp; struct componentname *cnp = ap->a_cnp; - int error = 0, status, wccpostattr = 0; + int error = 0, lockerror = ENOENT, status, wccpostattr = 0; struct timespec premtime = { 0, 0 }; struct nfsmount *nmp; nfsnode_t np = VTONFS(vp); @@ -3904,11 +3999,11 @@ nfs3_vnop_rmdir( if ((nfsvers == NFS_VER2) && (cnp->cn_namelen > NFS_MAXNAMLEN)) return (ENAMETOOLONG); - nfs_dulookup_init(&dul, dnp, cnp->cn_nameptr, cnp->cn_namelen); - - if ((error = nfs_lock2(dnp, np, NFS_NODE_LOCK_EXCLUSIVE))) + if ((error = nfs_node_set_busy2(dnp, np, vfs_context_thread(ctx)))) return (error); + nfs_dulookup_init(&dul, dnp, cnp->cn_nameptr, cnp->cn_namelen, ctx); + nfsm_chain_null(&nmreq); nfsm_chain_null(&nmrep); @@ -3926,6 +4021,8 @@ nfs3_vnop_rmdir( error = nfs_request_async_finish(req, &nmrep, &xid, &status); } + if ((lockerror = nfs_node_lock(dnp))) + error = lockerror; if (nfsvers == NFS_VER3) nfsm_chain_get_wcc_data(error, &nmrep, dnp, &premtime, &wccpostattr, &xid); if (!error) @@ -3934,22 +4031,18 @@ nfs3_vnop_rmdir( nfsm_chain_cleanup(&nmreq); nfsm_chain_cleanup(&nmrep); - dnp->n_flag |= NMODIFIED; - /* if directory hadn't changed, update namecache mtime */ - if (nfstimespeccmp(&dnp->n_ncmtime, &premtime, ==)) - NFS_CHANGED_UPDATE_NC(nfsvers, dnp, &dnp->n_vattr); - if (!wccpostattr) - NATTRINVALIDATE(dnp); - cache_purge(vp); - if (!nfs_getattr(dnp, &dnvattr, ctx, 1)) { - if (NFS_CHANGED_NC(nfsvers, dnp, &dnvattr)) { - dnp->n_flag &= ~NNEGNCENTRIES; - cache_purge(dvp); - NFS_CHANGED_UPDATE_NC(nfsvers, dnp, &dnvattr); - } + if (!lockerror) { + dnp->n_flag |= NMODIFIED; + /* if directory hadn't changed, update namecache mtime */ + if (nfstimespeccmp(&dnp->n_ncmtime, &premtime, ==)) + NFS_CHANGED_UPDATE_NC(nfsvers, dnp, &dnp->n_vattr); + nfs_node_unlock(dnp); + nfs_name_cache_purge(dnp, np, cnp, ctx); + /* nfs_getattr() will check changed and purge caches */ + nfs_getattr(dnp, &dnvattr, ctx, wccpostattr ? NGA_CACHED : NGA_UNCACHED); } nfs_dulookup_finish(&dul, dnp, ctx); - nfs_unlock2(dnp, np); + nfs_node_clear_busy2(dnp, np); /* * Kludge: Map ENOENT => 0 assuming that you have a reply to a retry. @@ -3975,609 +4068,976 @@ nfs3_vnop_rmdir( /* * NFS readdir call + * + * The incoming "offset" is a directory cookie indicating where in the + * directory entries should be read from. A zero cookie means start at + * the beginning of the directory. Any other cookie will be a cookie + * returned from the server. + * + * Using that cookie, determine which buffer (and where in that buffer) + * to start returning entries from. Buffer logical block numbers are + * the cookies they start at. If a buffer is found that is not full, + * call into the bio/RPC code to fill it. The RPC code will probably + * fill several buffers (dropping the first, requiring a re-get). + * + * When done copying entries to the buffer, set the offset to the current + * entry's cookie and enter that cookie in the cookie cache. + * + * Note: because the getdirentries(2) API returns a long-typed offset, + * the incoming offset is a potentially truncated cookie (ptc). + * The cookie matching code is aware of this and will fall back to + * matching only 32 bits of the cookie. */ -static int +int nfs_vnop_readdir( struct vnop_readdir_args /* { struct vnodeop_desc *a_desc; vnode_t a_vp; struct uio *a_uio; + int a_flags; int *a_eofflag; - int *a_ncookies; - u_long **a_cookies; + int *a_numdirent; vfs_context_t a_context; } */ *ap) { vfs_context_t ctx = ap->a_context; - vnode_t vp = ap->a_vp; - nfsnode_t np = VTONFS(vp); + vnode_t dvp = ap->a_vp; + nfsnode_t dnp = VTONFS(dvp); struct nfsmount *nmp; - struct uio *uio = ap->a_uio; - int tresid, error, nfsvers; + uio_t uio = ap->a_uio; + int error, nfsvers, extended, numdirent, bigcookies, ptc, done; struct nfs_vattr nvattr; + uint16_t i, iptc, rlen, nlen; + uint64_t cookie, nextcookie, lbn = 0; + struct nfsbuf *bp = NULL; + struct nfs_dir_buf_header *ndbhp; + struct direntry *dp, *dpptc; + struct dirent dent; + char *cp = NULL; + thread_t thd; - if (vnode_vtype(vp) != VDIR) - return (EPERM); - - nmp = VTONMP(vp); + nmp = VTONMP(dvp); if (!nmp) return (ENXIO); nfsvers = nmp->nm_vers; + bigcookies = (nmp->nm_state & NFSSTA_BIGCOOKIES); + extended = (ap->a_flags & VNODE_READDIR_EXTENDED); - if ((error = nfs_lock(np, NFS_NODE_LOCK_EXCLUSIVE))) - return (error); + if (vnode_vtype(dvp) != VDIR) + return (EPERM); + + if (ap->a_eofflag) + *ap->a_eofflag = 0; + + if (uio_resid(uio) == 0) + return (0); + + thd = vfs_context_thread(ctx); + numdirent = done = 0; + nextcookie = uio_offset(uio); + ptc = bigcookies && NFS_DIR_COOKIE_POTENTIALLY_TRUNCATED(nextcookie); + + if ((error = nfs_node_lock(dnp))) + goto out; + + if (dnp->n_flag & NNEEDINVALIDATE) { + dnp->n_flag &= ~NNEEDINVALIDATE; + nfs_invaldir(dnp); + nfs_node_unlock(dnp); + error = nfs_vinvalbuf(dvp, 0, ctx, 1); + if (!error) + error = nfs_node_lock(dnp); + if (error) + goto out; + } /* - * First, check for hit on the EOF offset cache + * check for need to invalidate when (re)starting at beginning */ - if (np->n_direofoffset > 0 && uio->uio_offset >= np->n_direofoffset && - (np->n_flag & NMODIFIED) == 0) { - if (!nfs_getattr(np, &nvattr, ctx, 1)) { - if (!NFS_CHANGED(nfsvers, np, &nvattr)) { - nfs_unlock(np); - OSAddAtomic(1, (SInt32*)&nfsstats.direofcache_hits); + if (!nextcookie) { + if (dnp->n_flag & NMODIFIED) { + nfs_invaldir(dnp); + nfs_node_unlock(dnp); + if ((error = nfs_vinvalbuf(dvp, 0, ctx, 1))) + goto out; + } else { + nfs_node_unlock(dnp); + } + /* nfs_getattr() will check changed and purge caches */ + if ((error = nfs_getattr(dnp, &nvattr, ctx, NGA_UNCACHED))) + goto out; + } else { + nfs_node_unlock(dnp); + } + + error = nfs_dir_cookie_to_lbn(dnp, nextcookie, &ptc, &lbn); + if (error) { + if (error < 0) { /* just hit EOF cookie */ + done = 1; + error = 0; + } + if (ap->a_eofflag) + *ap->a_eofflag = 1; + } + + while (!error && !done) { + OSAddAtomic(1, &nfsstats.biocache_readdirs); + cookie = nextcookie; +getbuffer: + error = nfs_buf_get(dnp, lbn, NFS_DIRBLKSIZ, thd, NBLK_READ, &bp); + if (error) + goto out; + ndbhp = (struct nfs_dir_buf_header*)bp->nb_data; + if (!ISSET(bp->nb_flags, NB_CACHE) || !ISSET(ndbhp->ndbh_flags, NDB_FULL)) { + if (!ISSET(bp->nb_flags, NB_CACHE)) { /* initialize the buffer */ + ndbhp->ndbh_flags = 0; + ndbhp->ndbh_count = 0; + ndbhp->ndbh_entry_end = sizeof(*ndbhp); + ndbhp->ndbh_ncgen = dnp->n_ncgen; + } + error = nfs_buf_readdir(bp, ctx); + if (error == NFSERR_DIRBUFDROPPED) + goto getbuffer; + if (error) + nfs_buf_release(bp, 1); + if (error && (error != ENXIO) && (error != ETIMEDOUT) && (error != EINTR) && (error != ERESTART)) { + if (!nfs_node_lock(dnp)) { + nfs_invaldir(dnp); + nfs_node_unlock(dnp); + } + nfs_vinvalbuf(dvp, 0, ctx, 1); + if (error == NFSERR_BAD_COOKIE) + error = ENOENT; + } + if (error) + goto out; + } + + /* find next entry to return */ + dp = NFS_DIR_BUF_FIRST_DIRENTRY(bp); + i = 0; + if ((lbn != cookie) && !(ptc && NFS_DIR_COOKIE_SAME32(lbn, cookie))) { + dpptc = NULL; + iptc = 0; + for (; (i < ndbhp->ndbh_count) && (cookie != dp->d_seekoff); i++) { + if (ptc && !dpptc && NFS_DIR_COOKIE_SAME32(cookie, dp->d_seekoff)) { + iptc = i; + dpptc = dp; + } + nextcookie = dp->d_seekoff; + dp = NFS_DIRENTRY_NEXT(dp); + } + if ((i == ndbhp->ndbh_count) && dpptc) { + i = iptc; + dp = dpptc; + } + if (i < ndbhp->ndbh_count) { + nextcookie = dp->d_seekoff; + dp = NFS_DIRENTRY_NEXT(dp); + i++; + } + } + ptc = 0; /* only have to deal with ptc on first cookie */ + + /* return as many entries as we can */ + for (; i < ndbhp->ndbh_count; i++) { + if (extended) { + rlen = dp->d_reclen; + cp = (char*)dp; + } else { + if (!cp) { + cp = (char*)&dent; + bzero(cp, sizeof(dent)); + } + if (dp->d_namlen > (sizeof(dent.d_name) - 1)) + nlen = sizeof(dent.d_name) - 1; + else + nlen = dp->d_namlen; + rlen = NFS_DIRENT_LEN(nlen); + dent.d_reclen = rlen; + dent.d_ino = dp->d_ino; + dent.d_type = dp->d_type; + dent.d_namlen = nlen; + strlcpy(dent.d_name, dp->d_name, nlen + 1); + } + /* check that the record fits */ + if (rlen > uio_resid(uio)) { + done = 1; + break; + } + if ((error = uiomove(cp, rlen, uio))) + break; + numdirent++; + nextcookie = dp->d_seekoff; + dp = NFS_DIRENTRY_NEXT(dp); + } + + if (i == ndbhp->ndbh_count) { + /* hit end of buffer, move to next buffer */ + lbn = nextcookie; + /* if we also hit EOF, we're done */ + if (ISSET(ndbhp->ndbh_flags, NDB_EOF)) { + done = 1; if (ap->a_eofflag) *ap->a_eofflag = 1; - return (0); - } - if (NFS_CHANGED_NC(nfsvers, np, &nvattr)) { - /* directory changed, purge any name cache entries */ - np->n_flag &= ~NNEGNCENTRIES; - cache_purge(vp); } } + if (!error) + uio_setoffset(uio, nextcookie); + if (!error && !done && (nextcookie == cookie)) { + printf("nfs readdir cookie didn't change 0x%llx, %d/%d\n", cookie, i, ndbhp->ndbh_count); + error = EIO; + } + nfs_buf_release(bp, 1); } - nfs_unlock(np); - if (ap->a_eofflag) - *ap->a_eofflag = 0; - /* - * Call nfs_bioread() to do the real work. - */ - // LP64todo - fix this - tresid = uio_uio_resid(uio); - error = nfs_bioread(np, uio, 0, ap->a_eofflag, ctx); + if (!error) + nfs_dir_cookie_cache(dnp, nextcookie, lbn); - if (!error && uio_uio_resid(uio) == tresid) - OSAddAtomic(1, (SInt32*)&nfsstats.direofcache_misses); + if (ap->a_numdirent) + *ap->a_numdirent = numdirent; +out: return (error); } + /* - * Readdir RPC call. - * Called from below the buffer cache by nfs_buf_readdir(). + * Invalidate cached directory information, except for the actual directory + * blocks (which are invalidated separately). */ -#define DIRHDSIZ ((int)(sizeof(struct dirent) - (MAXNAMLEN + 1))) -int -nfs3_readdir_rpc(nfsnode_t dnp, struct uio *uiop, vfs_context_t ctx) +void +nfs_invaldir(nfsnode_t dnp) { - int len, skiplen, left; - struct dirent *dp = NULL; - nfsuint64 *cookiep; - nfsuint64 cookie; - struct nfsmount *nmp; - u_quad_t fileno; - int error = 0, lockerror, status, tlen, more_dirs = 1, blksiz = 0, bigenough = 1, eof; - int nfsvers, nmreaddirsize; - u_int64_t xid; - struct nfsm_chain nmreq, nmrep; - char *cp; + if (vnode_vtype(NFSTOV(dnp)) != VDIR) + return; + dnp->n_eofcookie = 0; + dnp->n_cookieverf = 0; + if (!dnp->n_cookiecache) + return; + dnp->n_cookiecache->free = 0; + dnp->n_cookiecache->mru = -1; + memset(dnp->n_cookiecache->next, -1, NFSNUMCOOKIES); +} -#if DIAGNOSTIC - /* XXX limitation based on need to adjust uio */ - if (uiop->uio_iovcnt != 1 || (uiop->uio_offset & (DIRBLKSIZ - 1)) || - (uio_uio_resid(uiop) & (DIRBLKSIZ - 1))) - panic("nfs_readdirrpc: bad uio"); -#endif - nmp = NFSTONMP(dnp); - if (!nmp) - return (ENXIO); - nfsvers = nmp->nm_vers; - nmreaddirsize = nmp->nm_readdirsize; +/* + * calculate how much space is available for additional directory entries. + */ +uint32_t +nfs_dir_buf_freespace(struct nfsbuf *bp, int rdirplus) +{ + struct nfs_dir_buf_header *ndbhp = (struct nfs_dir_buf_header*)bp->nb_data; + uint32_t space; - if ((lockerror = nfs_lock(dnp, NFS_NODE_LOCK_SHARED))) - return (lockerror); + if (!ndbhp) + return (0); + space = bp->nb_bufsize - ndbhp->ndbh_entry_end; + if (rdirplus) + space -= ndbhp->ndbh_count * sizeof(struct nfs_vattr); + return (space); +} - /* - * If there is no cookie, assume directory was stale. - */ - cookiep = nfs_getcookie(dnp, uiop->uio_offset, 0); - if (cookiep) - cookie = *cookiep; - else { - nfs_unlock(dnp); - return (NFSERR_BAD_COOKIE); +/* + * add/update a cookie->lbn entry in the directory cookie cache + */ +void +nfs_dir_cookie_cache(nfsnode_t dnp, uint64_t cookie, uint64_t lbn) +{ + struct nfsdmap *ndcc; + int8_t i, prev; + + if (!cookie) + return; + + if (nfs_node_lock(dnp)) + return; + + if (cookie == dnp->n_eofcookie) { /* EOF cookie */ + nfs_node_unlock(dnp); + return; + } + + ndcc = dnp->n_cookiecache; + if (!ndcc) { + /* allocate the cookie cache structure */ + MALLOC_ZONE(dnp->n_cookiecache, struct nfsdmap *, + sizeof(struct nfsdmap), M_NFSDIROFF, M_WAITOK); + if (!dnp->n_cookiecache) { + nfs_node_unlock(dnp); + return; + } + ndcc = dnp->n_cookiecache; + ndcc->free = 0; + ndcc->mru = -1; + memset(ndcc->next, -1, NFSNUMCOOKIES); } /* - * Loop around doing readdir rpc's of size nm_readdirsize - * truncated to a multiple of DIRBLKSIZ. - * The stopping criteria is EOF or buffer full. + * Search the list for this cookie. + * Keep track of previous and last entries. */ - nfsm_chain_null(&nmreq); - nfsm_chain_null(&nmrep); - while (more_dirs && bigenough) { - nfsm_chain_build_alloc_init(error, &nmreq, - NFSX_FH(nfsvers) + NFSX_READDIR(nfsvers)); - nfsm_chain_add_fh(error, &nmreq, nfsvers, dnp->n_fhp, dnp->n_fhsize); - if (nfsvers == NFS_VER3) { - /* opaque values don't need swapping, but as long */ - /* as we are consistent about it, it should be ok */ - nfsm_chain_add_32(error, &nmreq, cookie.nfsuquad[0]); - nfsm_chain_add_32(error, &nmreq, cookie.nfsuquad[1]); - nfsm_chain_add_32(error, &nmreq, dnp->n_cookieverf.nfsuquad[0]); - nfsm_chain_add_32(error, &nmreq, dnp->n_cookieverf.nfsuquad[1]); - } else { - nfsm_chain_add_32(error, &nmreq, cookie.nfsuquad[0]); - } - nfsm_chain_add_32(error, &nmreq, nmreaddirsize); - nfsm_chain_build_done(error, &nmreq); - nfs_unlock(dnp); - lockerror = ENOENT; - nfsmout_if(error); + prev = -1; + i = ndcc->mru; + while ((i != -1) && (cookie != ndcc->cookies[i].key)) { + if (ndcc->next[i] == -1) /* stop on last entry so we can reuse */ + break; + prev = i; + i = ndcc->next[i]; + } + if ((i != -1) && (cookie == ndcc->cookies[i].key)) { + /* found it, remove from list */ + if (prev != -1) + ndcc->next[prev] = ndcc->next[i]; + else + ndcc->mru = ndcc->next[i]; + } else { + /* not found, use next free entry or reuse last entry */ + if (ndcc->free != NFSNUMCOOKIES) + i = ndcc->free++; + else + ndcc->next[prev] = -1; + ndcc->cookies[i].key = cookie; + ndcc->cookies[i].lbn = lbn; + } + /* insert cookie at head of MRU list */ + ndcc->next[i] = ndcc->mru; + ndcc->mru = i; + nfs_node_unlock(dnp); +} - error = nfs_request(dnp, NULL, &nmreq, NFSPROC_READDIR, ctx, - &nmrep, &xid, &status); +/* + * Try to map the given directory cookie to a directory buffer (return lbn). + * If we have a possibly truncated cookie (ptc), check for 32-bit matches too. + */ +int +nfs_dir_cookie_to_lbn(nfsnode_t dnp, uint64_t cookie, int *ptc, uint64_t *lbnp) +{ + struct nfsdmap *ndcc = dnp->n_cookiecache; + int8_t i, eofptc, iptc, found; + struct nfsmount *nmp; + struct nfsbuf *bp, *lastbp; + struct nfsbuflists blist; + struct direntry *dp, *dpptc; + struct nfs_dir_buf_header *ndbhp; + + if (!cookie) { /* initial cookie */ + *lbnp = 0; + *ptc = 0; + return (0); + } - if ((lockerror = nfs_lock(dnp, NFS_NODE_LOCK_EXCLUSIVE))) - error = lockerror; + if (nfs_node_lock(dnp)) + return (ENOENT); - if (nfsvers == NFS_VER3) - nfsm_chain_postop_attr_update(error, &nmrep, dnp, &xid); - if (!error) - error = status; - if (nfsvers == NFS_VER3) { - nfsm_chain_get_32(error, &nmrep, dnp->n_cookieverf.nfsuquad[0]); - nfsm_chain_get_32(error, &nmrep, dnp->n_cookieverf.nfsuquad[1]); - } - nfsm_chain_get_32(error, &nmrep, more_dirs); + if (cookie == dnp->n_eofcookie) { /* EOF cookie */ + nfs_node_unlock(dnp); + OSAddAtomic(1, &nfsstats.direofcache_hits); + *ptc = 0; + return (-1); + } + /* note if cookie is a 32-bit match with the EOF cookie */ + eofptc = *ptc ? NFS_DIR_COOKIE_SAME32(cookie, dnp->n_eofcookie) : 0; + iptc = -1; - if (!lockerror) { - nfs_unlock(dnp); - lockerror = ENOENT; + /* search the list for the cookie */ + for (i = ndcc ? ndcc->mru : -1; i >= 0; i = ndcc->next[i]) { + if (ndcc->cookies[i].key == cookie) { + /* found a match for this cookie */ + *lbnp = ndcc->cookies[i].lbn; + nfs_node_unlock(dnp); + OSAddAtomic(1, &nfsstats.direofcache_hits); + *ptc = 0; + return (0); } - nfsmout_if(error); + /* check for 32-bit match */ + if (*ptc && (iptc == -1) && NFS_DIR_COOKIE_SAME32(ndcc->cookies[i].key, cookie)) + iptc = i; + } + /* exact match not found */ + if (eofptc) { + /* but 32-bit match hit the EOF cookie */ + nfs_node_unlock(dnp); + OSAddAtomic(1, &nfsstats.direofcache_hits); + return (-1); + } + if (iptc >= 0) { + /* but 32-bit match got a hit */ + *lbnp = ndcc->cookies[iptc].lbn; + nfs_node_unlock(dnp); + OSAddAtomic(1, &nfsstats.direofcache_hits); + return (0); + } + nfs_node_unlock(dnp); - /* loop thru the dir entries, doctoring them to 4bsd form */ - while (more_dirs && bigenough) { - if (nfsvers == NFS_VER3) - nfsm_chain_get_64(error, &nmrep, fileno); + /* + * No match found in the cookie cache... hmm... + * Let's search the directory's buffers for the cookie. + */ + nmp = NFSTONMP(dnp); + if (!nmp) + return (ENXIO); + dpptc = NULL; + found = 0; + + lck_mtx_lock(nfs_buf_mutex); + /* + * Scan the list of buffers, keeping them in order. + * Note that itercomplete inserts each of the remaining buffers + * into the head of list (thus reversing the elements). So, we + * make sure to iterate through all buffers, inserting them after + * each other, to keep them in order. + * Also note: the LIST_INSERT_AFTER(lastbp) is only safe because + * we don't drop nfs_buf_mutex. + */ + if (!nfs_buf_iterprepare(dnp, &blist, NBI_CLEAN)) { + lastbp = NULL; + while ((bp = LIST_FIRST(&blist))) { + LIST_REMOVE(bp, nb_vnbufs); + if (!lastbp) + LIST_INSERT_HEAD(&dnp->n_cleanblkhd, bp, nb_vnbufs); else - nfsm_chain_get_32(error, &nmrep, fileno); - nfsm_chain_get_32(error, &nmrep, len); - nfsmout_if(error); - /* Note: v3 supports longer names, but struct dirent doesn't */ - /* so we just truncate the names to fit */ - if (len <= 0) { - error = EBADRPC; - goto nfsmout; + LIST_INSERT_AFTER(lastbp, bp, nb_vnbufs); + lastbp = bp; + if (found) + continue; + nfs_buf_refget(bp); + if (nfs_buf_acquire(bp, NBAC_NOWAIT, 0, 0)) { + /* just skip this buffer */ + nfs_buf_refrele(bp); + continue; } - if (len > MAXNAMLEN) { - skiplen = len - MAXNAMLEN; - len = MAXNAMLEN; - } else { - skiplen = 0; + nfs_buf_refrele(bp); + + /* scan the buffer for the cookie */ + ndbhp = (struct nfs_dir_buf_header*)bp->nb_data; + dp = NFS_DIR_BUF_FIRST_DIRENTRY(bp); + dpptc = NULL; + for (i=0; (i < ndbhp->ndbh_count) && (cookie != dp->d_seekoff); i++) { + if (*ptc && !dpptc && NFS_DIR_COOKIE_SAME32(cookie, dp->d_seekoff)) { + dpptc = dp; + iptc = i; + } + dp = NFS_DIRENTRY_NEXT(dp); } - tlen = nfsm_rndup(len); - if (tlen == len) - tlen += 4; /* To ensure null termination */ - left = DIRBLKSIZ - blksiz; - if ((tlen + DIRHDSIZ) > left) { - dp->d_reclen += left; - uio_iov_base_add(uiop, left); - uio_iov_len_add(uiop, -left); - uiop->uio_offset += left; - uio_uio_resid_add(uiop, -left); - blksiz = 0; + if ((i == ndbhp->ndbh_count) && dpptc) { + /* found only a PTC match */ + dp = dpptc; + i = iptc; + } else if (i < ndbhp->ndbh_count) { + *ptc = 0; } - if ((tlen + DIRHDSIZ) > uio_uio_resid(uiop)) - bigenough = 0; - if (bigenough) { - // LP64todo - fix this! - dp = (struct dirent *) CAST_DOWN(caddr_t, uio_iov_base(uiop)); - dp->d_fileno = (int)fileno; - dp->d_namlen = len; - dp->d_reclen = tlen + DIRHDSIZ; - dp->d_type = DT_UNKNOWN; - blksiz += dp->d_reclen; - if (blksiz == DIRBLKSIZ) - blksiz = 0; - uiop->uio_offset += DIRHDSIZ; -#if LP64KERN - uio_uio_resid_add(uiop, -((int64_t)DIRHDSIZ)); - uio_iov_len_add(uiop, -((int64_t)DIRHDSIZ)); -#else - uio_uio_resid_add(uiop, -((int)DIRHDSIZ)); - uio_iov_len_add(uiop, -((int)DIRHDSIZ)); -#endif - uio_iov_base_add(uiop, DIRHDSIZ); - error = nfsm_chain_get_uio(&nmrep, len, uiop); - nfsmout_if(error); - // LP64todo - fix this! - cp = CAST_DOWN(caddr_t, uio_iov_base(uiop)); - tlen -= len; - *cp = '\0'; /* null terminate */ - uio_iov_base_add(uiop, tlen); - uio_iov_len_add(uiop, -tlen); - uiop->uio_offset += tlen; - uio_uio_resid_add(uiop, -tlen); - if (skiplen) - nfsm_chain_adv(error, &nmrep, - nfsm_rndup(len + skiplen) - nfsm_rndup(len)); - } else { - nfsm_chain_adv(error, &nmrep, nfsm_rndup(len + skiplen)); + if (i < (ndbhp->ndbh_count-1)) { + /* next entry is *in* this buffer: return this block */ + *lbnp = bp->nb_lblkno; + found = 1; + } else if (i == (ndbhp->ndbh_count-1)) { + /* next entry refers to *next* buffer: return next block */ + *lbnp = dp->d_seekoff; + found = 1; } - if (bigenough) { - nfsm_chain_get_32(error, &nmrep, cookie.nfsuquad[0]); - if (nfsvers == NFS_VER3) - nfsm_chain_get_32(error, &nmrep, cookie.nfsuquad[1]); - } else if (nfsvers == NFS_VER3) - nfsm_chain_adv(error, &nmrep, 2 * NFSX_UNSIGNED); - else - nfsm_chain_adv(error, &nmrep, NFSX_UNSIGNED); - nfsm_chain_get_32(error, &nmrep, more_dirs); - nfsmout_if(error); - } - /* - * If at end of rpc data, get the eof boolean - */ - if (!more_dirs) { - nfsm_chain_get_32(error, &nmrep, eof); - if (!error) - more_dirs = (eof == 0); + nfs_buf_drop(bp); } - if ((lockerror = nfs_lock(dnp, NFS_NODE_LOCK_SHARED))) - error = lockerror; - nfsmout_if(error); - nfsm_chain_cleanup(&nmrep); - nfsm_chain_null(&nmreq); + nfs_buf_itercomplete(dnp, &blist, NBI_CLEAN); } - if (!lockerror) { - nfs_unlock(dnp); - lockerror = ENOENT; + lck_mtx_unlock(nfs_buf_mutex); + if (found) { + OSAddAtomic(1, &nfsstats.direofcache_hits); + return (0); } - /* - * Fill last record, iff any, out to a multiple of DIRBLKSIZ - * by increasing d_reclen for the last record. - */ - if (blksiz > 0) { - left = DIRBLKSIZ - blksiz; - dp->d_reclen += left; - uio_iov_base_add(uiop, left); - uio_iov_len_add(uiop, -left); - uiop->uio_offset += left; - uio_uio_resid_add(uiop, -left); + + /* still not found... oh well, just start a new block */ + *lbnp = cookie; + OSAddAtomic(1, &nfsstats.direofcache_misses); + return (0); +} + +/* + * scan a directory buffer for the given name + * Returns: ESRCH if not found, ENOENT if found invalid, 0 if found + * Note: should only be called with RDIRPLUS directory buffers + */ + +#define NDBS_PURGE 1 +#define NDBS_UPDATE 2 + +int +nfs_dir_buf_search( + struct nfsbuf *bp, + struct componentname *cnp, + fhandle_t *fhp, + struct nfs_vattr *nvap, + uint64_t *xidp, + time_t *attrstampp, + daddr64_t *nextlbnp, + int flags) +{ + struct direntry *dp; + struct nfs_dir_buf_header *ndbhp; + struct nfs_vattr *nvattrp; + daddr64_t nextlbn = 0; + int i, error = ESRCH, fhlen; + + /* scan the buffer for the name */ + ndbhp = (struct nfs_dir_buf_header*)bp->nb_data; + dp = NFS_DIR_BUF_FIRST_DIRENTRY(bp); + for (i=0; i < ndbhp->ndbh_count; i++) { + nextlbn = dp->d_seekoff; + if ((cnp->cn_namelen == dp->d_namlen) && !strcmp(cnp->cn_nameptr, dp->d_name)) { + fhlen = dp->d_name[dp->d_namlen+1]; + nvattrp = NFS_DIR_BUF_NVATTR(bp, i); + if ((ndbhp->ndbh_ncgen != bp->nb_np->n_ncgen) || (fhp->fh_len == 0) || + (nvattrp->nva_type == VNON) || (nvattrp->nva_fileid == 0)) { + /* entry is no longer valid */ + error = ENOENT; + break; + } + if (flags == NDBS_PURGE) { + dp->d_fileno = 0; + bzero(nvattrp, sizeof(*nvattrp)); + error = ENOENT; + break; + } + if (flags == NDBS_UPDATE) { + /* update direntry's attrs if fh matches */ + if ((fhp->fh_len == fhlen) && !bcmp(&dp->d_name[dp->d_namlen+2], fhp->fh_data, fhlen)) { + bcopy(nvap, nvattrp, sizeof(*nvap)); + dp->d_fileno = nvattrp->nva_fileid; + nvattrp->nva_fileid = *xidp; + *(time_t*)(&dp->d_name[dp->d_namlen+2+fhp->fh_len]) = *attrstampp; + } + error = 0; + break; + } + /* copy out fh, attrs, attrstamp, and xid */ + fhp->fh_len = fhlen; + bcopy(&dp->d_name[dp->d_namlen+2], fhp->fh_data, MAX(fhp->fh_len, (int)sizeof(fhp->fh_data))); + *attrstampp = *(time_t*)(&dp->d_name[dp->d_namlen+2+fhp->fh_len]); + bcopy(nvattrp, nvap, sizeof(*nvap)); + *xidp = nvap->nva_fileid; + nvap->nva_fileid = dp->d_fileno; + error = 0; + break; + } + dp = NFS_DIRENTRY_NEXT(dp); } + if (nextlbnp) + *nextlbnp = nextlbn; + return (error); +} - if ((lockerror = nfs_lock(dnp, NFS_NODE_LOCK_EXCLUSIVE))) - error = lockerror; - nfsmout_if(error); +/* + * Look up a name in a directory's buffers. + * Note: should only be called with RDIRPLUS directory buffers + */ +int +nfs_dir_buf_cache_lookup(nfsnode_t dnp, nfsnode_t *npp, struct componentname *cnp, vfs_context_t ctx, int purge) +{ + nfsnode_t newnp; + struct nfsmount *nmp; + int error = 0, slpflag, slptimeo, i, found = 0, count = 0; + u_int64_t xid; + struct nfs_vattr nvattr; + fhandle_t fh; + time_t attrstamp = 0; + thread_t thd = vfs_context_thread(ctx); + struct nfsbuf *bp, *lastbp, *foundbp; + struct nfsbuflists blist; + daddr64_t lbn, nextlbn; + int dotunder = (cnp->cn_namelen > 2) && (cnp->cn_nameptr[0] == '.') && (cnp->cn_nameptr[1] == '_'); + + if (!(nmp = NFSTONMP(dnp))) + return (ENXIO); + slpflag = (nmp->nm_flag & NFSMNT_INT) ? PCATCH : 0; + slptimeo = 0; + if (!purge) + *npp = NULL; + + /* first check most recent buffer (and next one too) */ + lbn = dnp->n_lastdbl; + for (i=0; i < 2; i++) { + if ((error = nfs_buf_get(dnp, lbn, NFS_DIRBLKSIZ, thd, NBLK_READ|NBLK_ONLYVALID, &bp))) + return (error); + if (!bp) + break; + count++; + error = nfs_dir_buf_search(bp, cnp, &fh, &nvattr, &xid, &attrstamp, &nextlbn, purge ? NDBS_PURGE : 0); + nfs_buf_release(bp, 0); + if (error == ESRCH) { + error = 0; + } else { + found = 1; + break; + } + lbn = nextlbn; + } + + lck_mtx_lock(nfs_buf_mutex); + if (found) { + dnp->n_lastdbl = lbn; + goto done; + } /* - * We are now either at the end of the directory or have filled the - * block. + * Scan the list of buffers, keeping them in order. + * Note that itercomplete inserts each of the remaining buffers + * into the head of list (thus reversing the elements). So, we + * make sure to iterate through all buffers, inserting them after + * each other, to keep them in order. + * Also note: the LIST_INSERT_AFTER(lastbp) is only safe because + * we don't drop nfs_buf_mutex. */ - if (bigenough) - dnp->n_direofoffset = uiop->uio_offset; - else { - if (uio_uio_resid(uiop) > 0) - printf("EEK! readdirrpc resid > 0\n"); - cookiep = nfs_getcookie(dnp, uiop->uio_offset, 1); - if (cookiep) - *cookiep = cookie; + if (!nfs_buf_iterprepare(dnp, &blist, NBI_CLEAN)) { + lastbp = foundbp = NULL; + while ((bp = LIST_FIRST(&blist))) { + LIST_REMOVE(bp, nb_vnbufs); + if (!lastbp) + LIST_INSERT_HEAD(&dnp->n_cleanblkhd, bp, nb_vnbufs); + else + LIST_INSERT_AFTER(lastbp, bp, nb_vnbufs); + lastbp = bp; + if (error || found) + continue; + if (!purge && dotunder && (count > 100)) /* don't waste too much time looking for ._ files */ + continue; + nfs_buf_refget(bp); + lbn = bp->nb_lblkno; + if (nfs_buf_acquire(bp, NBAC_NOWAIT, 0, 0)) { + /* just skip this buffer */ + nfs_buf_refrele(bp); + continue; + } + nfs_buf_refrele(bp); + count++; + error = nfs_dir_buf_search(bp, cnp, &fh, &nvattr, &xid, &attrstamp, NULL, purge ? NDBS_PURGE : 0); + if (error == ESRCH) { + error = 0; + } else { + found = 1; + foundbp = bp; + } + nfs_buf_drop(bp); + } + if (found) { + LIST_REMOVE(foundbp, nb_vnbufs); + LIST_INSERT_HEAD(&dnp->n_cleanblkhd, foundbp, nb_vnbufs); + dnp->n_lastdbl = foundbp->nb_lblkno; + } + nfs_buf_itercomplete(dnp, &blist, NBI_CLEAN); + } +done: + lck_mtx_unlock(nfs_buf_mutex); + + if (!error && found && !purge) { + error = nfs_nget(NFSTOMP(dnp), dnp, cnp, fh.fh_data, fh.fh_len, + &nvattr, &xid, NG_MAKEENTRY, &newnp); + if (error) + return (error); + newnp->n_attrstamp = attrstamp; + *npp = newnp; + nfs_node_unlock(newnp); + /* check if the dir buffer's attrs are out of date */ + if (!nfs_getattr(newnp, &nvattr, ctx, NGA_CACHED) && + (newnp->n_attrstamp != attrstamp)) { + /* they are, so update them */ + error = nfs_buf_get(dnp, lbn, NFS_DIRBLKSIZ, thd, NBLK_READ|NBLK_ONLYVALID, &bp); + if (!error && bp) { + attrstamp = newnp->n_attrstamp; + xid = newnp->n_xid; + nfs_dir_buf_search(bp, cnp, &fh, &nvattr, &xid, &attrstamp, NULL, NDBS_UPDATE); + nfs_buf_release(bp, 0); + } + error = 0; + } } -nfsmout: - if (!lockerror) - nfs_unlock(dnp); - nfsm_chain_cleanup(&nmreq); - nfsm_chain_cleanup(&nmrep); return (error); } /* - * NFS V3 readdir plus RPC. Used in place of nfs_readdirrpc(). + * Purge name cache entries for the given node. + * For RDIRPLUS, also invalidate the entry in the directory's buffers. + */ +void +nfs_name_cache_purge(nfsnode_t dnp, nfsnode_t np, struct componentname *cnp, vfs_context_t ctx) +{ + struct nfsmount *nmp = NFSTONMP(dnp); + + cache_purge(NFSTOV(np)); + if (nmp && (nmp->nm_vers > NFS_VER2) && (nmp->nm_flag & NFSMNT_RDIRPLUS)) + nfs_dir_buf_cache_lookup(dnp, NULL, cnp, ctx, 1); +} + +/* + * NFS V3 readdir (plus) RPC. */ int -nfs3_readdirplus_rpc(nfsnode_t dnp, struct uio *uiop, vfs_context_t ctx) +nfs3_readdir_rpc(nfsnode_t dnp, struct nfsbuf *bp, vfs_context_t ctx) { - size_t len, tlen, skiplen, left; - struct dirent *dp = NULL; - vnode_t newvp; - nfsuint64 *cookiep; - struct componentname cn, *cnp = &cn; - nfsuint64 cookie; struct nfsmount *nmp; - nfsnode_t np; - u_char *fhp; - u_quad_t fileno; - int error = 0, lockerror, status, more_dirs = 1, blksiz = 0, doit, bigenough = 1; - int nfsvers, nmreaddirsize, nmrsize, attrflag, eof; - size_t fhsize; - u_int64_t xid, savexid; - struct nfs_vattr nvattr; - struct nfsm_chain nmreq, nmrep; - char *cp; + int error = 0, lockerror, nfsvers, rdirplus, bigcookies; + int i, status, attrflag, fhflag, more_entries = 1, eof, bp_dropped = 0; + uint32_t nmreaddirsize, nmrsize; + uint32_t namlen, skiplen, fhlen, xlen, attrlen, reclen, space_free, space_needed; + uint64_t cookie, lastcookie, xid, savedxid, fileno; + struct nfsm_chain nmreq, nmrep, nmrepsave; + fhandle_t fh; + struct nfs_vattr *nvattrp; + struct nfs_dir_buf_header *ndbhp; + struct direntry *dp; + char *padstart, padlen; + struct timeval now; -#if DIAGNOSTIC - /* XXX limitation based on need to adjust uio */ - if (uiop->uio_iovcnt != 1 || (uiop->uio_offset & (DIRBLKSIZ - 1)) || - (uio_uio_resid(uiop) & (DIRBLKSIZ - 1))) - panic("nfs3_readdirplus_rpc: bad uio"); -#endif nmp = NFSTONMP(dnp); if (!nmp) return (ENXIO); nfsvers = nmp->nm_vers; nmreaddirsize = nmp->nm_readdirsize; nmrsize = nmp->nm_rsize; + bigcookies = nmp->nm_state & NFSSTA_BIGCOOKIES; +noplus: + rdirplus = ((nfsvers > NFS_VER2) && (nmp->nm_flag & NFSMNT_RDIRPLUS)) ? 1 : 0; - bzero(cnp, sizeof(*cnp)); - newvp = NULLVP; - - if ((lockerror = nfs_lock(dnp, NFS_NODE_LOCK_SHARED))) + if ((lockerror = nfs_node_lock(dnp))) return (lockerror); - /* - * If there is no cookie, assume directory was stale. - */ - cookiep = nfs_getcookie(dnp, uiop->uio_offset, 0); - if (cookiep) - cookie = *cookiep; - else { - nfs_unlock(dnp); - return (NFSERR_BAD_COOKIE); + /* determine cookie to use, and move dp to the right offset */ + ndbhp = (struct nfs_dir_buf_header*)bp->nb_data; + dp = NFS_DIR_BUF_FIRST_DIRENTRY(bp); + if (ndbhp->ndbh_count) { + for (i=0; i < ndbhp->ndbh_count-1; i++) + dp = NFS_DIRENTRY_NEXT(dp); + cookie = dp->d_seekoff; + dp = NFS_DIRENTRY_NEXT(dp); + } else { + cookie = bp->nb_lblkno; + /* increment with every buffer read */ + OSAddAtomic(1, &nfsstats.readdir_bios); } + lastcookie = cookie; /* - * Loop around doing readdir rpc's of size nm_readdirsize - * truncated to a multiple of DIRBLKSIZ. - * The stopping criteria is EOF or buffer full. + * Loop around doing readdir(plus) RPCs of size nm_readdirsize until + * the buffer is full (or we hit EOF). Then put the remainder of the + * results in the next buffer(s). */ nfsm_chain_null(&nmreq); nfsm_chain_null(&nmrep); - while (more_dirs && bigenough) { + while (nfs_dir_buf_freespace(bp, rdirplus) && !(ndbhp->ndbh_flags & NDB_FULL)) { nfsm_chain_build_alloc_init(error, &nmreq, - NFSX_FH(NFS_VER3) + 6 * NFSX_UNSIGNED); + NFSX_FH(nfsvers) + NFSX_READDIR(nfsvers) + NFSX_UNSIGNED); nfsm_chain_add_fh(error, &nmreq, nfsvers, dnp->n_fhp, dnp->n_fhsize); - /* opaque values don't need swapping, but as long */ - /* as we are consistent about it, it should be ok */ - nfsm_chain_add_32(error, &nmreq, cookie.nfsuquad[0]); - nfsm_chain_add_32(error, &nmreq, cookie.nfsuquad[1]); - nfsm_chain_add_32(error, &nmreq, dnp->n_cookieverf.nfsuquad[0]); - nfsm_chain_add_32(error, &nmreq, dnp->n_cookieverf.nfsuquad[1]); + if (nfsvers == NFS_VER3) { + /* opaque values don't need swapping, but as long */ + /* as we are consistent about it, it should be ok */ + nfsm_chain_add_64(error, &nmreq, cookie); + nfsm_chain_add_64(error, &nmreq, dnp->n_cookieverf); + } else { + nfsm_chain_add_32(error, &nmreq, cookie); + } nfsm_chain_add_32(error, &nmreq, nmreaddirsize); - nfsm_chain_add_32(error, &nmreq, nmrsize); + if (rdirplus) + nfsm_chain_add_32(error, &nmreq, nmrsize); nfsm_chain_build_done(error, &nmreq); - nfs_unlock(dnp); + nfs_node_unlock(dnp); lockerror = ENOENT; nfsmout_if(error); - error = nfs_request(dnp, NULL, &nmreq, NFSPROC_READDIRPLUS, ctx, - &nmrep, &xid, &status); + error = nfs_request(dnp, NULL, &nmreq, + rdirplus ? NFSPROC_READDIRPLUS : NFSPROC_READDIR, + ctx, &nmrep, &xid, &status); - if ((lockerror = nfs_lock(dnp, NFS_NODE_LOCK_EXCLUSIVE))) + if ((lockerror = nfs_node_lock(dnp))) error = lockerror; - savexid = xid; - nfsm_chain_postop_attr_update(error, &nmrep, dnp, &xid); + savedxid = xid; + if (nfsvers == NFS_VER3) + nfsm_chain_postop_attr_update(error, &nmrep, dnp, &xid); if (!error) error = status; - nfsm_chain_get_32(error, &nmrep, dnp->n_cookieverf.nfsuquad[0]); - nfsm_chain_get_32(error, &nmrep, dnp->n_cookieverf.nfsuquad[1]); - nfsm_chain_get_32(error, &nmrep, more_dirs); + if (nfsvers == NFS_VER3) + nfsm_chain_get_64(error, &nmrep, dnp->n_cookieverf); + nfsm_chain_get_32(error, &nmrep, more_entries); if (!lockerror) { - nfs_unlock(dnp); + nfs_node_unlock(dnp); lockerror = ENOENT; } + if (error == NFSERR_NOTSUPP) { + /* oops... it doesn't look like readdirplus is supported */ + lck_mtx_lock(&nmp->nm_lock); + nmp->nm_flag &= ~NFSMNT_RDIRPLUS; + lck_mtx_unlock(&nmp->nm_lock); + goto noplus; + } nfsmout_if(error); - nfsmout_if(error); - /* loop thru the dir entries, doctoring them to 4bsd form */ - while (more_dirs && bigenough) { - nfsm_chain_get_64(error, &nmrep, fileno); - nfsm_chain_get_32(error, &nmrep, len); + if (rdirplus) + microuptime(&now); + + /* loop through the entries packing them into the buffer */ + while (more_entries) { + if (nfsvers == NFS_VER3) + nfsm_chain_get_64(error, &nmrep, fileno); + else + nfsm_chain_get_32(error, &nmrep, fileno); + nfsm_chain_get_32(error, &nmrep, namlen); nfsmout_if(error); - /* Note: v3 supports longer names, but struct dirent doesn't */ - /* so we just truncate the names to fit */ - if (len <= 0) { + /* just truncate names that don't fit in direntry.d_name */ + if (namlen <= 0) { error = EBADRPC; goto nfsmout; } - if (len > MAXNAMLEN) { - skiplen = len - MAXNAMLEN; - len = MAXNAMLEN; + if (namlen > (sizeof(dp->d_name)-1)) { + skiplen = namlen - sizeof(dp->d_name) + 1; + namlen = sizeof(dp->d_name) - 1; } else { skiplen = 0; } - tlen = nfsm_rndup(len); - if (tlen == len) - tlen += 4; /* To ensure null termination */ - left = DIRBLKSIZ - blksiz; - if ((tlen + DIRHDSIZ) > left) { - dp->d_reclen += left; - uio_iov_base_add(uiop, left); - uio_iov_len_add(uiop, -left); - uiop->uio_offset += left; - uio_uio_resid_add(uiop, -left); - blksiz = 0; - } - if ((tlen + DIRHDSIZ) > uio_uio_resid(uiop)) - bigenough = 0; - if (bigenough) { - // LP64todo - fix this! - dp = (struct dirent *) CAST_DOWN(caddr_t, uio_iov_base(uiop)); - dp->d_fileno = (int)fileno; - dp->d_namlen = len; - dp->d_reclen = tlen + DIRHDSIZ; - dp->d_type = DT_UNKNOWN; - blksiz += dp->d_reclen; - if (blksiz == DIRBLKSIZ) - blksiz = 0; - uiop->uio_offset += DIRHDSIZ; -#if LP64KERN - uio_uio_resid_add(uiop, -((int64_t)DIRHDSIZ)); - uio_iov_len_add(uiop, -((int64_t)DIRHDSIZ)); -#else - uio_uio_resid_add(uiop, -((int)DIRHDSIZ)); - uio_iov_len_add(uiop, -((int)DIRHDSIZ)); -#endif - uio_iov_base_add(uiop, DIRHDSIZ); - // LP64todo - fix this! - cnp->cn_nameptr = CAST_DOWN(caddr_t, uio_iov_base(uiop)); - cnp->cn_namelen = len; - error = nfsm_chain_get_uio(&nmrep, len, uiop); + /* guess that fh size will be same as parent */ + fhlen = rdirplus ? (1 + dnp->n_fhsize) : 0; + xlen = rdirplus ? (fhlen + sizeof(time_t)) : 0; + attrlen = rdirplus ? sizeof(struct nfs_vattr) : 0; + reclen = NFS_DIRENTRY_LEN(namlen + xlen); + space_needed = reclen + attrlen; + space_free = nfs_dir_buf_freespace(bp, rdirplus); + if (space_needed > space_free) { + /* + * We still have entries to pack, but we've + * run out of room in the current buffer. + * So we need to move to the next buffer. + * The block# for the next buffer is the + * last cookie in the current buffer. + */ +nextbuffer: + ndbhp->ndbh_flags |= NDB_FULL; + nfs_buf_release(bp, 0); + bp_dropped = 1; + bp = NULL; + error = nfs_buf_get(dnp, lastcookie, NFS_DIRBLKSIZ, vfs_context_thread(ctx), NBLK_READ, &bp); nfsmout_if(error); - cp = CAST_DOWN(caddr_t, uio_iov_base(uiop)); - tlen -= len; - *cp = '\0'; - uio_iov_base_add(uiop, tlen); - uio_iov_len_add(uiop, -tlen); - uiop->uio_offset += tlen; - uio_uio_resid_add(uiop, -tlen); - if (skiplen) - nfsm_chain_adv(error, &nmrep, - nfsm_rndup(len + skiplen) - nfsm_rndup(len)); - } else { - nfsm_chain_adv(error, &nmrep, nfsm_rndup(len + skiplen)); + /* initialize buffer */ + ndbhp = (struct nfs_dir_buf_header*)bp->nb_data; + ndbhp->ndbh_flags = 0; + ndbhp->ndbh_count = 0; + ndbhp->ndbh_entry_end = sizeof(*ndbhp); + ndbhp->ndbh_ncgen = dnp->n_ncgen; + space_free = nfs_dir_buf_freespace(bp, rdirplus); + dp = NFS_DIR_BUF_FIRST_DIRENTRY(bp); + /* increment with every buffer read */ + OSAddAtomic(1, &nfsstats.readdir_bios); } - if (bigenough) { - nfsm_chain_get_32(error, &nmrep, cookie.nfsuquad[0]); - nfsm_chain_get_32(error, &nmrep, cookie.nfsuquad[1]); - } else - nfsm_chain_adv(error, &nmrep, 2 * NFSX_UNSIGNED); - - nfsm_chain_get_32(error, &nmrep, attrflag); + nmrepsave = nmrep; + dp->d_fileno = fileno; + dp->d_namlen = namlen; + dp->d_reclen = reclen; + dp->d_type = DT_UNKNOWN; + nfsm_chain_get_opaque(error, &nmrep, namlen, dp->d_name); nfsmout_if(error); - if (attrflag) { - /* grab attributes */ - error = nfs_parsefattr(&nmrep, NFS_VER3, &nvattr); - nfsmout_if(error); - dp->d_type = IFTODT(VTTOIF(nvattr.nva_type)); - /* check for file handle */ - nfsm_chain_get_32(error, &nmrep, doit); - nfsmout_if(error); - if (doit) { - nfsm_chain_get_fh_ptr(error, &nmrep, NFS_VER3, fhp, fhsize); + dp->d_name[namlen] = '\0'; + if (skiplen) + nfsm_chain_adv(error, &nmrep, + nfsm_rndup(namlen + skiplen) - nfsm_rndup(namlen)); + if (nfsvers == NFS_VER3) + nfsm_chain_get_64(error, &nmrep, cookie); + else + nfsm_chain_get_32(error, &nmrep, cookie); + nfsmout_if(error); + dp->d_seekoff = cookie; + if (!bigcookies && (cookie >> 32) && (nmp == NFSTONMP(dnp))) { + /* we've got a big cookie, make sure flag is set */ + lck_mtx_lock(&nmp->nm_lock); + nmp->nm_state |= NFSSTA_BIGCOOKIES; + lck_mtx_unlock(&nmp->nm_lock); + bigcookies = 1; + } + if (rdirplus) { + nvattrp = NFS_DIR_BUF_NVATTR(bp, ndbhp->ndbh_count); + /* check for attributes */ + nfsm_chain_get_32(error, &nmrep, attrflag); nfsmout_if(error); - if (NFS_CMPFH(dnp, fhp, fhsize)) { - error = vnode_ref(NFSTOV(dnp)); - if (error) { - doit = 0; - } else { - if ((lockerror = nfs_lock(dnp, NFS_NODE_LOCK_EXCLUSIVE))) - error = lockerror; - if (error) { - vnode_rele(NFSTOV(dnp)); - goto nfsmout; + if (attrflag) { + /* grab attributes */ + error = nfs_parsefattr(&nmrep, NFS_VER3, nvattrp); + nfsmout_if(error); + dp->d_type = IFTODT(VTTOIF(nvattrp->nva_type)); + /* fileid is already in d_fileno, so stash xid in attrs */ + nvattrp->nva_fileid = savedxid; + } else { + /* mark the attributes invalid */ + bzero(nvattrp, sizeof(struct nfs_vattr)); + } + /* check for file handle */ + nfsm_chain_get_32(error, &nmrep, fhflag); + nfsmout_if(error); + if (fhflag) { + nfsm_chain_get_fh(error, &nmrep, NFS_VER3, &fh); + nfsmout_if(error); + fhlen = fh.fh_len + 1; + xlen = fhlen + sizeof(time_t); + reclen = NFS_DIRENTRY_LEN(namlen + xlen); + space_needed = reclen + attrlen; + if (space_needed > space_free) { + /* didn't actually have the room... move on to next buffer */ + nmrep = nmrepsave; + goto nextbuffer; } - newvp = NFSTOV(dnp); - np = dnp; - } - } else if (!bigenough || - (cnp->cn_namelen == 2 && - cnp->cn_nameptr[1] == '.' && - cnp->cn_nameptr[0] == '.')) { - /* - * XXXmacko I don't think this ".." thing is a problem anymore. - * don't doit if we can't guarantee - * that this entry is NOT ".." because - * we would have to drop the lock on - * the directory before getting the - * lock on the ".." vnode... and we - * don't want to drop the dvp lock in - * the middle of a readdirplus. - */ - doit = 0; + /* pack the file handle into the record */ + dp->d_name[dp->d_namlen+1] = fh.fh_len; + bcopy(fh.fh_data, &dp->d_name[dp->d_namlen+2], fh.fh_len); } else { - cnp->cn_hash = 0; - - error = nfs_nget(NFSTOMP(dnp), dnp, cnp, - fhp, fhsize, &nvattr, &xid, NG_MAKEENTRY, &np); - if (error) - doit = 0; - else - newvp = NFSTOV(np); + /* mark the file handle invalid */ + fh.fh_len = 0; + fhlen = fh.fh_len + 1; + xlen = fhlen + sizeof(time_t); + reclen = NFS_DIRENTRY_LEN(namlen + xlen); + bzero(&dp->d_name[dp->d_namlen+1], fhlen); } - } - /* update attributes if not already updated */ - if (doit && bigenough && (np->n_xid <= savexid)) { - xid = savexid; - nfs_loadattrcache(np, &nvattr, &xid, 0); - /* any error can be ignored */ - } - } else { - /* Just skip over the file handle */ - nfsm_chain_get_32(error, &nmrep, fhsize); - nfsm_chain_adv(error, &nmrep, nfsm_rndup(fhsize)); - } - if (newvp != NULLVP) { - nfs_unlock(np); - if (newvp == NFSTOV(dnp)) - vnode_rele(newvp); - else - vnode_put(newvp); - newvp = NULLVP; + *(time_t*)(&dp->d_name[dp->d_namlen+1+fhlen]) = now.tv_sec; + dp->d_reclen = reclen; } - nfsm_chain_get_32(error, &nmrep, more_dirs); + padstart = dp->d_name + dp->d_namlen + 1 + xlen; + ndbhp->ndbh_count++; + lastcookie = cookie; + /* advance to next direntry in buffer */ + dp = NFS_DIRENTRY_NEXT(dp); + ndbhp->ndbh_entry_end = (char*)dp - bp->nb_data; + /* zero out the pad bytes */ + padlen = (char*)dp - padstart; + if (padlen > 0) + bzero(padstart, padlen); + /* check for more entries */ + nfsm_chain_get_32(error, &nmrep, more_entries); nfsmout_if(error); } - /* - * If at end of rpc data, get the eof boolean - */ - if (!more_dirs) { - nfsm_chain_get_32(error, &nmrep, eof); - if (!error) - more_dirs = (eof == 0); + /* Finally, get the eof boolean */ + nfsm_chain_get_32(error, &nmrep, eof); + nfsmout_if(error); + if (eof) { + ndbhp->ndbh_flags |= (NDB_FULL|NDB_EOF); + nfs_node_lock_force(dnp); + dnp->n_eofcookie = lastcookie; + nfs_node_unlock(dnp); + } else { + more_entries = 1; + } + if (bp_dropped) { + nfs_buf_release(bp, 0); + bp = NULL; + break; } - if ((lockerror = nfs_lock(dnp, NFS_NODE_LOCK_SHARED))) + if ((lockerror = nfs_node_lock(dnp))) error = lockerror; nfsmout_if(error); nfsm_chain_cleanup(&nmrep); nfsm_chain_null(&nmreq); } - if (!lockerror) { - nfs_unlock(dnp); - lockerror = ENOENT; - } - /* - * Fill last record, iff any, out to a multiple of DIRBLKSIZ - * by increasing d_reclen for the last record. - */ - if (blksiz > 0) { - left = DIRBLKSIZ - blksiz; - dp->d_reclen += left; - uio_iov_base_add(uiop, left); - uio_iov_len_add(uiop, -left); - uiop->uio_offset += left; - uio_uio_resid_add(uiop, -left); - } - - if ((lockerror = nfs_lock(dnp, NFS_NODE_LOCK_EXCLUSIVE))) - error = lockerror; - nfsmout_if(error); - - /* - * We are now either at the end of the directory or have filled the - * block. - */ - if (bigenough) - dnp->n_direofoffset = uiop->uio_offset; - else { - if (uio_uio_resid(uiop) > 0) - printf("EEK! readdirplus_rpc resid > 0\n"); - cookiep = nfs_getcookie(dnp, uiop->uio_offset, 1); - if (cookiep) - *cookiep = cookie; - } - nfsmout: + if (bp_dropped && bp) + nfs_buf_release(bp, 0); if (!lockerror) - nfs_unlock(dnp); + nfs_node_unlock(dnp); nfsm_chain_cleanup(&nmreq); nfsm_chain_cleanup(&nmrep); - return (error); + return (bp_dropped ? NFSERR_DIRBUFDROPPED : error); } /* @@ -4595,7 +5055,7 @@ nfs3_readdirplus_rpc(nfsnode_t dnp, struct uio *uiop, vfs_context_t ctx) /* starting from zero isn't silly enough */ static uint32_t nfs_sillyrename_number = 0x20051025; -static int +int nfs_sillyrename( nfsnode_t dnp, nfsnode_t np, @@ -4613,7 +5073,7 @@ nfs_sillyrename( if (!nmp) return (ENXIO); - cache_purge(NFSTOV(np)); + nfs_name_cache_purge(dnp, np, cnp, ctx); MALLOC_ZONE(nsp, struct nfs_sillyrename *, sizeof (struct nfs_sillyrename), M_NFSREQ, M_WAITOK); @@ -4629,7 +5089,7 @@ nfs_sillyrename( /* Fudge together a funny name */ pid = vfs_context_pid(ctx); - num = OSAddAtomic(1, (SInt32*)&nfs_sillyrename_number); + num = OSAddAtomic(1, &nfs_sillyrename_number); nsp->nsr_namlen = snprintf(nsp->nsr_name, sizeof(nsp->nsr_name), NFS_SILLYNAME_FORMAT, num, (pid & 0xffff)); if (nsp->nsr_namlen >= (int)sizeof(nsp->nsr_name)) @@ -4637,7 +5097,7 @@ nfs_sillyrename( /* Try lookitups until we get one that isn't there */ while (nfs_lookitup(dnp, nsp->nsr_name, nsp->nsr_namlen, ctx, NULL) == 0) { - num = OSAddAtomic(1, (SInt32*)&nfs_sillyrename_number); + num = OSAddAtomic(1, &nfs_sillyrename_number); nsp->nsr_namlen = snprintf(nsp->nsr_name, sizeof(nsp->nsr_name), NFS_SILLYNAME_FORMAT, num, (pid & 0xffff)); if (nsp->nsr_namlen >= (int)sizeof(nsp->nsr_name)) @@ -4647,15 +5107,21 @@ nfs_sillyrename( /* now, do the rename */ error = nmp->nm_funcs->nf_rename_rpc(dnp, cnp->cn_nameptr, cnp->cn_namelen, dnp, nsp->nsr_name, nsp->nsr_namlen, ctx); - if (!error && (dnp->n_flag & NNEGNCENTRIES)) { - dnp->n_flag &= ~NNEGNCENTRIES; - cache_purge_negatives(NFSTOV(dnp)); + if (!error) { + nfs_node_lock_force(dnp); + if (dnp->n_flag & NNEGNCENTRIES) { + dnp->n_flag &= ~NNEGNCENTRIES; + cache_purge_negatives(NFSTOV(dnp)); + } + nfs_node_unlock(dnp); } FSDBG(267, dnp, np, num, error); if (error) goto bad; error = nfs_lookitup(dnp, nsp->nsr_name, nsp->nsr_namlen, ctx, &np); + nfs_node_lock_force(np); np->n_sillyrename = nsp; + nfs_node_unlock(np); return (0); bad: vnode_rele(NFSTOV(dnp)); @@ -4707,7 +5173,7 @@ nfs3_lookup_rpc_async_finish( fhandle_t *fhp, struct nfs_vattr *nvap) { - int error = 0, status, nfsvers, attrflag; + int error = 0, lockerror = ENOENT, status, nfsvers, attrflag; u_int64_t xid; struct nfsmount *nmp; struct nfsm_chain nmrep; @@ -4719,6 +5185,8 @@ nfs3_lookup_rpc_async_finish( error = nfs_request_async_finish(req, &nmrep, xidp, &status); + if ((lockerror = nfs_node_lock(dnp))) + error = lockerror; xid = *xidp; if (error || status) { if (nfsvers == NFS_VER3) @@ -4743,6 +5211,8 @@ nfs3_lookup_rpc_async_finish( error = nfs_parsefattr(&nmrep, nfsvers, nvap); } nfsmout: + if (!lockerror) + nfs_node_unlock(dnp); nfsm_chain_cleanup(&nmrep); return (error); } @@ -4776,7 +5246,7 @@ nfs_lookitup( return (ENXIO); if (NFS_BITMAP_ISSET(nmp->nm_fsattr.nfsa_bitmap, NFS_FATTR_MAXNAME) && - (namelen > (long)nmp->nm_fsattr.nfsa_maxname)) + (namelen > (int)nmp->nm_fsattr.nfsa_maxname)) return (ENAMETOOLONG); /* check for lookup of "." */ @@ -4811,12 +5281,16 @@ nfs_lookitup( } bcopy(fh.fh_data, np->n_fhp, fh.fh_len); np->n_fhsize = fh.fh_len; + nfs_node_lock_force(np); error = nfs_loadattrcache(np, &nvattr, &xid, 0); + nfs_node_unlock(np); nfsmout_if(error); newnp = np; } else if (NFS_CMPFH(dnp, fh.fh_data, fh.fh_len)) { + nfs_node_lock_force(dnp); if (dnp->n_xid <= xid) error = nfs_loadattrcache(dnp, &nvattr, &xid, 0); + nfs_node_unlock(dnp); nfsmout_if(error); newnp = dnp; } else { @@ -4841,7 +5315,7 @@ nfs_lookitup( * performing async lookups. */ void -nfs_dulookup_init(struct nfs_dulookup *dulp, nfsnode_t dnp, const char *name, int namelen) +nfs_dulookup_init(struct nfs_dulookup *dulp, nfsnode_t dnp, const char *name, int namelen, vfs_context_t ctx) { int error, du_namelen; vnode_t du_vp; @@ -4861,14 +5335,27 @@ nfs_dulookup_init(struct nfs_dulookup *dulp, nfsnode_t dnp, const char *name, in dulp->du_cn.cn_namelen = du_namelen; snprintf(dulp->du_cn.cn_nameptr, du_namelen + 1, "._%s", name); dulp->du_cn.cn_nameptr[du_namelen] = '\0'; + dulp->du_cn.cn_nameiop = LOOKUP; + dulp->du_cn.cn_flags = MAKEENTRY; error = cache_lookup(NFSTOV(dnp), &du_vp, &dulp->du_cn); - if (error == -1) + if (error == -1) { vnode_put(du_vp); - else if (!error) - dulp->du_flags |= NFS_DULOOKUP_DOIT; - else if (dulp->du_cn.cn_nameptr != dulp->du_smallname) - FREE(dulp->du_cn.cn_nameptr, M_TEMP); + } else if (!error) { + struct nfsmount *nmp = NFSTONMP(dnp); + if (nmp && (nmp->nm_vers > NFS_VER2) && (nmp->nm_flag & NFSMNT_RDIRPLUS)) { + /* if rdirplus, try dir buf cache lookup */ + nfsnode_t du_np = NULL; + if (!nfs_dir_buf_cache_lookup(dnp, &du_np, &dulp->du_cn, ctx, 0) && du_np) { + /* dir buf cache hit */ + du_vp = NFSTOV(du_np); + vnode_put(du_vp); + error = -1; + } + } + if (!error) + dulp->du_flags |= NFS_DULOOKUP_DOIT; + } } /* @@ -4907,13 +5394,15 @@ nfs_dulookup_finish(struct nfs_dulookup *dulp, nfsnode_t dnp, vfs_context_t ctx) dulp->du_flags &= ~NFS_DULOOKUP_INPROG; if (error == ENOENT) { /* add a negative entry in the name cache */ + nfs_node_lock_force(dnp); cache_enter(NFSTOV(dnp), NULL, &dulp->du_cn); dnp->n_flag |= NNEGNCENTRIES; + nfs_node_unlock(dnp); } else if (!error) { error = nfs_nget(NFSTOMP(dnp), dnp, &dulp->du_cn, fh.fh_data, fh.fh_len, &nvattr, &xid, NG_MAKEENTRY, &du_np); if (!error) { - nfs_unlock(du_np); + nfs_node_unlock(du_np); vnode_put(NFSTOV(du_np)); } } @@ -4966,12 +5455,12 @@ nfs3_commit_rpc( nfsmout_if(error); error = nfs_request2(np, NULL, &nmreq, NFSPROC_COMMIT, current_thread(), cred, 0, &nmrep, &xid, &status); - if ((lockerror = nfs_lock(np, NFS_NODE_LOCK_EXCLUSIVE))) + if ((lockerror = nfs_node_lock(np))) error = lockerror; /* can we do anything useful with the wcc info? */ nfsm_chain_get_wcc_data(error, &nmrep, np, &premtime, &wccpostattr, &xid); if (!lockerror) - nfs_unlock(np); + nfs_node_unlock(np); if (!error) error = status; nfsm_chain_get_64(error, &nmrep, wverf); @@ -4989,7 +5478,7 @@ nfs3_commit_rpc( } -static int +int nfs_vnop_blockmap( __unused struct vnop_blockmap_args /* { struct vnodeop_desc *a_desc; @@ -5011,7 +5500,7 @@ nfs_vnop_blockmap( * NB Currently unsupported. */ /*ARGSUSED*/ -static int +int nfs_vnop_mmap( __unused struct vnop_mmap_args /* { struct vnodeop_desc *a_desc; @@ -5027,7 +5516,7 @@ nfs_vnop_mmap( * fsync vnode op. Just call nfs_flush(). */ /* ARGSUSED */ -static int +int nfs_vnop_fsync( struct vnop_fsync_args /* { struct vnodeop_desc *a_desc; @@ -5069,11 +5558,11 @@ nfs3_pathconf_rpc( nfsmout_if(error); error = nfs_request(np, NULL, &nmreq, NFSPROC_PATHCONF, ctx, &nmrep, &xid, &status); - if ((lockerror = nfs_lock(np, NFS_NODE_LOCK_EXCLUSIVE))) + if ((lockerror = nfs_node_lock(np))) error = lockerror; nfsm_chain_postop_attr_update(error, &nmrep, np, &xid); if (!lockerror) - nfs_unlock(np); + nfs_node_unlock(np); if (!error) error = status; nfsm_chain_get_32(error, &nmrep, nfsap->nfsa_maxlink); @@ -5128,13 +5617,13 @@ nfs3_pathconf_cache(struct nfsmount *nmp, struct nfs_fsattr *nfsap) * for V2. */ /* ARGSUSED */ -static int +int nfs_vnop_pathconf( struct vnop_pathconf_args /* { struct vnodeop_desc *a_desc; vnode_t a_vp; int a_name; - register_t *a_retval; + int32_t *a_retval; vfs_context_t a_context; } */ *ap) { @@ -5227,35 +5716,35 @@ nfs_vnop_pathconf( break; case _PC_CHOWN_RESTRICTED: if (NFS_BITMAP_ISSET(nfsap->nfsa_bitmap, NFS_FATTR_CHOWN_RESTRICTED)) - *ap->a_retval = (nmp->nm_fsattr.nfsa_flags & NFS_FSFLAG_CHOWN_RESTRICTED) ? 200112 /* _POSIX_CHOWN_RESTRICTED */ : 0; + *ap->a_retval = (nfsap->nfsa_flags & NFS_FSFLAG_CHOWN_RESTRICTED) ? 200112 /* _POSIX_CHOWN_RESTRICTED */ : 0; else error = EINVAL; break; case _PC_NO_TRUNC: if (NFS_BITMAP_ISSET(nfsap->nfsa_bitmap, NFS_FATTR_NO_TRUNC)) - *ap->a_retval = (nmp->nm_fsattr.nfsa_flags & NFS_FSFLAG_NO_TRUNC) ? 200112 /* _POSIX_NO_TRUNC */ : 0; + *ap->a_retval = (nfsap->nfsa_flags & NFS_FSFLAG_NO_TRUNC) ? 200112 /* _POSIX_NO_TRUNC */ : 0; else error = EINVAL; break; case _PC_CASE_SENSITIVE: if (NFS_BITMAP_ISSET(nfsap->nfsa_bitmap, NFS_FATTR_CASE_INSENSITIVE)) - *ap->a_retval = (nmp->nm_fsattr.nfsa_flags & NFS_FSFLAG_CASE_INSENSITIVE) ? 0 : 1; + *ap->a_retval = (nfsap->nfsa_flags & NFS_FSFLAG_CASE_INSENSITIVE) ? 0 : 1; else error = EINVAL; break; case _PC_CASE_PRESERVING: if (NFS_BITMAP_ISSET(nfsap->nfsa_bitmap, NFS_FATTR_CASE_PRESERVING)) - *ap->a_retval = (nmp->nm_fsattr.nfsa_flags & NFS_FSFLAG_CASE_PRESERVING) ? 1 : 0; + *ap->a_retval = (nfsap->nfsa_flags & NFS_FSFLAG_CASE_PRESERVING) ? 1 : 0; else error = EINVAL; break; case _PC_FILESIZEBITS: - if (!NFS_BITMAP_ISSET(nmp->nm_fsattr.nfsa_bitmap, NFS_FATTR_MAXFILESIZE)) { + if (!NFS_BITMAP_ISSET(nfsap->nfsa_bitmap, NFS_FATTR_MAXFILESIZE)) { *ap->a_retval = 64; error = 0; break; } - maxFileSize = nmp->nm_fsattr.nfsa_maxfilesize; + maxFileSize = nfsap->nfsa_maxfilesize; nbits = 1; if (maxFileSize & 0xffffffff00000000ULL) { nbits += 32; @@ -5294,7 +5783,7 @@ nfs_vnop_pathconf( /* * Read wrapper for special devices. */ -static int +int nfsspec_vnop_read( struct vnop_read_args /* { struct vnodeop_desc *a_desc; @@ -5311,20 +5800,20 @@ nfsspec_vnop_read( /* * Set access flag. */ - if ((error = nfs_lock(np, NFS_NODE_LOCK_EXCLUSIVE))) + if ((error = nfs_node_lock(np))) return (error); np->n_flag |= NACC; microtime(&now); np->n_atim.tv_sec = now.tv_sec; np->n_atim.tv_nsec = now.tv_usec * 1000; - nfs_unlock(np); + nfs_node_unlock(np); return (VOCALL(spec_vnodeop_p, VOFFSET(vnop_read), ap)); } /* * Write wrapper for special devices. */ -static int +int nfsspec_vnop_write( struct vnop_write_args /* { struct vnodeop_desc *a_desc; @@ -5341,13 +5830,13 @@ nfsspec_vnop_write( /* * Set update flag. */ - if ((error = nfs_lock(np, NFS_NODE_LOCK_EXCLUSIVE))) + if ((error = nfs_node_lock(np))) return (error); np->n_flag |= NUPD; microtime(&now); np->n_mtim.tv_sec = now.tv_sec; np->n_mtim.tv_nsec = now.tv_usec * 1000; - nfs_unlock(np); + nfs_node_unlock(np); return (VOCALL(spec_vnodeop_p, VOFFSET(vnop_write), ap)); } @@ -5356,7 +5845,7 @@ nfsspec_vnop_write( * * Update the times on the nfsnode then do device close. */ -static int +int nfsspec_vnop_close( struct vnop_close_args /* { struct vnodeop_desc *a_desc; @@ -5371,11 +5860,11 @@ nfsspec_vnop_close( mount_t mp; int error; - if ((error = nfs_lock(np, NFS_NODE_LOCK_EXCLUSIVE))) + if ((error = nfs_node_lock(np))) return (error); if (np->n_flag & (NACC | NUPD)) { np->n_flag |= NCHG; - if (!vnode_isinuse(vp, 1) && (mp = vnode_mount(vp)) && !vfs_isrdonly(mp)) { + if (!vnode_isinuse(vp, 0) && (mp = vnode_mount(vp)) && !vfs_isrdonly(mp)) { VATTR_INIT(&vattr); if (np->n_flag & NACC) { vattr.va_access_time = np->n_atim; @@ -5385,13 +5874,13 @@ nfsspec_vnop_close( vattr.va_modify_time = np->n_mtim; VATTR_SET_ACTIVE(&vattr, va_modify_time); } - nfs_unlock(np); + nfs_node_unlock(np); vnode_setattr(vp, &vattr, ap->a_context); } else { - nfs_unlock(np); + nfs_node_unlock(np); } } else { - nfs_unlock(np); + nfs_node_unlock(np); } return (VOCALL(spec_vnodeop_p, VOFFSET(vnop_close), ap)); } @@ -5402,7 +5891,7 @@ extern vnop_t **fifo_vnodeop_p; /* * Read wrapper for fifos. */ -static int +int nfsfifo_vnop_read( struct vnop_read_args /* { struct vnodeop_desc *a_desc; @@ -5419,20 +5908,20 @@ nfsfifo_vnop_read( /* * Set access flag. */ - if ((error = nfs_lock(np, NFS_NODE_LOCK_EXCLUSIVE))) + if ((error = nfs_node_lock(np))) return (error); np->n_flag |= NACC; microtime(&now); np->n_atim.tv_sec = now.tv_sec; np->n_atim.tv_nsec = now.tv_usec * 1000; - nfs_unlock(np); + nfs_node_unlock(np); return (VOCALL(fifo_vnodeop_p, VOFFSET(vnop_read), ap)); } /* * Write wrapper for fifos. */ -static int +int nfsfifo_vnop_write( struct vnop_write_args /* { struct vnodeop_desc *a_desc; @@ -5449,13 +5938,13 @@ nfsfifo_vnop_write( /* * Set update flag. */ - if ((error = nfs_lock(np, NFS_NODE_LOCK_EXCLUSIVE))) + if ((error = nfs_node_lock(np))) return (error); np->n_flag |= NUPD; microtime(&now); np->n_mtim.tv_sec = now.tv_sec; np->n_mtim.tv_nsec = now.tv_usec * 1000; - nfs_unlock(np); + nfs_node_unlock(np); return (VOCALL(fifo_vnodeop_p, VOFFSET(vnop_write), ap)); } @@ -5464,7 +5953,7 @@ nfsfifo_vnop_write( * * Update the times on the nfsnode then do fifo close. */ -static int +int nfsfifo_vnop_close( struct vnop_close_args /* { struct vnodeop_desc *a_desc; @@ -5480,7 +5969,7 @@ nfsfifo_vnop_close( mount_t mp; int error; - if ((error = nfs_lock(np, NFS_NODE_LOCK_EXCLUSIVE))) + if ((error = nfs_node_lock(np))) return (error); if (np->n_flag & (NACC | NUPD)) { microtime(&now); @@ -5503,25 +5992,25 @@ nfsfifo_vnop_close( vattr.va_modify_time = np->n_mtim; VATTR_SET_ACTIVE(&vattr, va_modify_time); } - nfs_unlock(np); + nfs_node_unlock(np); vnode_setattr(vp, &vattr, ap->a_context); } else { - nfs_unlock(np); + nfs_node_unlock(np); } } else { - nfs_unlock(np); + nfs_node_unlock(np); } return (VOCALL(fifo_vnodeop_p, VOFFSET(vnop_close), ap)); } #endif /* FIFO */ /*ARGSUSED*/ -static int +int nfs_vnop_ioctl( __unused struct vnop_ioctl_args /* { struct vnodeop_desc *a_desc; vnode_t a_vp; - u_long a_command; + u_int32_t a_command; caddr_t a_data; int a_fflag; vfs_context_t a_context; @@ -5536,7 +6025,7 @@ nfs_vnop_ioctl( } /*ARGSUSED*/ -static int +int nfs_vnop_select( __unused struct vnop_select_args /* { struct vnodeop_desc *a_desc; @@ -5559,7 +6048,7 @@ nfs_vnop_select( * * No buffer I/O, just RPCs straight into the mapped pages. */ -static int +int nfs_vnop_pagein( struct vnop_pagein_args /* { struct vnodeop_desc *a_desc; @@ -5585,15 +6074,16 @@ nfs_vnop_pagein( off_t txoffset; struct nfsmount *nmp; int error = 0; - vm_offset_t ioaddr; - struct uio auio; - struct iovec_32 aiov; - struct uio * uio = &auio; + vm_offset_t ioaddr, rxaddr; + uio_t uio; + char uio_buf [ UIO_SIZEOF(1) ]; int nofreeupl = flags & UPL_NOCOMMIT; upl_page_info_t *plinfo; #define MAXPAGINGREQS 16 /* max outstanding RPCs for pagein/pageout */ struct nfsreq *req[MAXPAGINGREQS]; int nextsend, nextwait; + uint32_t stategenid = 0, restart = 0; + kern_return_t kret; FSDBG(322, np, f_offset, size, flags); if (pl == (upl_t)NULL) @@ -5617,14 +6107,8 @@ nfs_vnop_pagein( if (!IS_VALID_CRED(cred)) cred = vfs_context_ucred(ap->a_context); - auio.uio_offset = f_offset; -#if 1 /* LP64todo - can't use new segment flags until the drivers are ready */ - auio.uio_segflg = UIO_SYSSPACE; -#else - auio.uio_segflg = UIO_SYSSPACE32; -#endif - auio.uio_rw = UIO_READ; - auio.uio_procp = vfs_context_proc(ap->a_context); + uio = uio_createwithbuffer(1, f_offset, UIO_SYSSPACE, UIO_READ, + &uio_buf, sizeof(uio_buf)); nmp = VTONMP(vp); if (!nmp) { @@ -5636,10 +6120,17 @@ nfs_vnop_pagein( nmrsize = nmp->nm_rsize; plinfo = ubc_upl_pageinfo(pl); - ubc_upl_map(pl, &ioaddr); + kret = ubc_upl_map(pl, &ioaddr); + if (kret != KERN_SUCCESS) + panic("nfs_vnop_pagein: ubc_upl_map() failed with (%d)", kret); ioaddr += pl_offset; + +tryagain: + if (nmp->nm_vers >= NFS_VER4) + stategenid = nmp->nm_stategenid; txsize = rxsize = size; txoffset = f_offset; + rxaddr = ioaddr; bzero(req, sizeof(req)); nextsend = nextwait = 0; @@ -5658,49 +6149,67 @@ nfs_vnop_pagein( /* wait while we need to and break out if more requests to send */ while ((rxsize > 0) && req[nextwait]) { iosize = retsize = MIN(nmrsize, rxsize); - aiov.iov_len = iosize; - aiov.iov_base = (uintptr_t)ioaddr; - auio.uio_iovs.iov32p = &aiov; - auio.uio_iovcnt = 1; - uio_uio_resid_set(&auio, iosize); - FSDBG(322, uio->uio_offset, uio_uio_resid(uio), ioaddr, rxsize); -#ifdef UPL_DEBUG - upl_ubc_alias_set(pl, current_thread(), 2); + uio_reset(uio, uio_offset(uio), UIO_SYSSPACE, UIO_READ); + uio_addiov(uio, CAST_USER_ADDR_T(rxaddr), iosize); + FSDBG(322, uio_offset(uio), uio_resid(uio), rxaddr, rxsize); +#if UPL_DEBUG + upl_ubc_alias_set(pl, (uintptr_t) current_thread(), (uintptr_t) 2); #endif /* UPL_DEBUG */ - OSAddAtomic(1, (SInt32*)&nfsstats.pageins); + OSAddAtomic(1, &nfsstats.pageins); error = nmp->nm_funcs->nf_read_rpc_async_finish(np, req[nextwait], uio, &retsize, NULL); req[nextwait] = NULL; nextwait = (nextwait + 1) % MAXPAGINGREQS; + if ((nmp->nm_vers >= NFS_VER4) && nfs_mount_state_error_should_restart(error)) { + lck_mtx_lock(&nmp->nm_lock); + if ((error != NFSERR_GRACE) && (stategenid == nmp->nm_stategenid) && !(nmp->nm_state & NFSSTA_RECOVER)) { + printf("nfs_vnop_pagein: error %d, initiating recovery\n", error); + nmp->nm_state |= NFSSTA_RECOVER; + nfs_mount_sock_thread_wake(nmp); + } + lck_mtx_unlock(&nmp->nm_lock); + if (error == NFSERR_GRACE) + tsleep(&nmp->nm_state, (PZERO-1), "nfsgrace", 2*hz); + restart++; + goto cancel; + } if (error) { - FSDBG(322, uio->uio_offset, uio_uio_resid(uio), error, -1); + FSDBG(322, uio_offset(uio), uio_resid(uio), error, -1); break; } if (retsize < iosize) { /* Just zero fill the rest of the valid area. */ - // LP64todo - fix this int zcnt = iosize - retsize; - bzero((char *)ioaddr + retsize, zcnt); - FSDBG(324, uio->uio_offset, retsize, zcnt, ioaddr); - uio->uio_offset += zcnt; + bzero((char *)rxaddr + retsize, zcnt); + FSDBG(324, uio_offset(uio), retsize, zcnt, rxaddr); + uio_update(uio, zcnt); } - ioaddr += iosize; + rxaddr += iosize; rxsize -= iosize; if (txsize) break; } } while (!error && (txsize || rxsize)); - ubc_upl_unmap(pl); + restart = 0; if (error) { +cancel: /* cancel any outstanding requests */ while (req[nextwait]) { nfs_request_async_cancel(req[nextwait]); req[nextwait] = NULL; nextwait = (nextwait + 1) % MAXPAGINGREQS; } + if (restart) { + if ((restart <= nfs_mount_state_max_restarts(nmp)) && /* guard against no progress */ + (!(error = nfs_mount_state_wait_for_recovery(nmp)))) + goto tryagain; + printf("nfs_pagein: too many restarts, aborting.\n"); + } } + ubc_upl_unmap(pl); + if (!nofreeupl) { if (error) ubc_upl_abort_range(pl, pl_offset, size, @@ -5722,6 +6231,7 @@ nfs_vnop_pagein( * are expected to match the same numbers here. If not, our actions maybe * erroneous. */ +char nfs_pageouterrorhandler(int); enum actiontype {NOACTION, DUMP, DUMPANDLOG, RETRY, RETRYWITHSLEEP, SEVER}; #define NFS_ELAST 88 static u_char errorcount[NFS_ELAST+1]; /* better be zeros when initialized */ @@ -5826,7 +6336,7 @@ static const char errortooutcome[NFS_ELAST+1] = { DUMPANDLOG, /* EBADMACHO 88 Malformed Macho file */ }; -static char +char nfs_pageouterrorhandler(int error) { if (error > NFS_ELAST) @@ -5842,7 +6352,7 @@ nfs_pageouterrorhandler(int error) * No buffer I/O, just RPCs straight from the mapped pages. * File size changes are not permitted in pageout. */ -static int +int nfs_vnop_pageout( struct vnop_pageout_args /* { struct vnodeop_desc *a_desc; @@ -5870,13 +6380,15 @@ nfs_vnop_pageout( int error = 0, iomode; off_t off, txoffset, rxoffset; vm_offset_t ioaddr, txaddr, rxaddr; - struct uio auio; - struct iovec_32 aiov; + uio_t auio; + char uio_buf [ UIO_SIZEOF(1) ]; int nofreeupl = flags & UPL_NOCOMMIT; size_t nmwsize, biosize, iosize, pgsize, txsize, rxsize, xsize, remsize; struct nfsreq *req[MAXPAGINGREQS]; - int nextsend, nextwait, wverfset, commit, restart = 0; + int nextsend, nextwait, wverfset, commit; uint64_t wverf, wverf2; + uint32_t stategenid = 0, vrestart = 0, restart = 0, vrestarts = 0, restarts = 0; + kern_return_t kret; FSDBG(323, f_offset, size, pl, pl_offset); @@ -5898,7 +6410,7 @@ nfs_vnop_pageout( biosize = nmp->nm_biosize; nmwsize = nmp->nm_wsize; - nfs_data_lock2(np, NFS_NODE_LOCK_SHARED, 0); + nfs_data_lock_noupdate(np, NFS_DATA_LOCK_SHARED); /* * Check to see whether the buffer is incore. @@ -5916,7 +6428,7 @@ nfs_vnop_pageout( FSDBG(323, off, bp, bp->nb_lflags, bp->nb_flags); if (nfs_buf_acquire(bp, NBAC_NOWAIT, 0, 0)) { lck_mtx_unlock(nfs_buf_mutex); - nfs_data_unlock2(np, 0); + nfs_data_unlock_noupdate(np); /* no panic. just tell vm we are busy */ if (!nofreeupl) ubc_upl_abort(pl, 0); @@ -5949,11 +6461,22 @@ nfs_vnop_pageout( end -= boff; if ((bp->nb_dirtyoff < start) && (bp->nb_dirtyend > end)) { - /* not gonna be able to clip the dirty region */ + /* + * not gonna be able to clip the dirty region + * + * But before returning the bad news, move the + * buffer to the start of the delwri list and + * give the list a push to try to flush the + * buffer out. + */ FSDBG(323, np, bp, 0xd00deebc, EBUSY); + nfs_buf_remfree(bp); + TAILQ_INSERT_HEAD(&nfsbufdelwri, bp, nb_free); + nfsbufdelwricnt++; nfs_buf_drop(bp); + nfs_buf_delwri_push(1); lck_mtx_unlock(nfs_buf_mutex); - nfs_data_unlock2(np, 0); + nfs_data_unlock_noupdate(np); if (!nofreeupl) ubc_upl_abort(pl, 0); return (EBUSY); @@ -5975,13 +6498,13 @@ nfs_vnop_pageout( nfs_buf_remfree(bp); lck_mtx_unlock(nfs_buf_mutex); SET(bp->nb_flags, NB_INVAL); - nfs_lock(np, NFS_NODE_LOCK_FORCE); + nfs_node_lock_force(np); if (ISSET(bp->nb_flags, NB_NEEDCOMMIT)) { CLR(bp->nb_flags, NB_NEEDCOMMIT); np->n_needcommitcnt--; CHECK_NEEDCOMMITCNT(np); } - nfs_unlock(np); + nfs_node_unlock(np); nfs_buf_release(bp, 1); } else { lck_mtx_unlock(nfs_buf_mutex); @@ -5993,28 +6516,30 @@ nfs_vnop_pageout( if (!IS_VALID_CRED(cred)) cred = vfs_context_ucred(ap->a_context); - nfs_lock(np, NFS_NODE_LOCK_FORCE); + nfs_node_lock_force(np); if (np->n_flag & NWRITEERR) { error = np->n_error; - nfs_unlock(np); - nfs_data_unlock2(np, 0); + nfs_node_unlock(np); + nfs_data_unlock_noupdate(np); if (!nofreeupl) ubc_upl_abort_range(pl, pl_offset, size, UPL_ABORT_FREE_ON_EMPTY); return (error); } - nfs_unlock(np); + nfs_node_unlock(np); if (f_offset < 0 || f_offset >= (off_t)np->n_size || f_offset & PAGE_MASK_64 || size & PAGE_MASK_64) { - nfs_data_unlock2(np, 0); + nfs_data_unlock_noupdate(np); if (!nofreeupl) ubc_upl_abort_range(pl, pl_offset, size, UPL_ABORT_FREE_ON_EMPTY); return (EINVAL); } - ubc_upl_map(pl, &ioaddr); + kret = ubc_upl_map(pl, &ioaddr); + if (kret != KERN_SUCCESS) + panic("nfs_vnop_pageout: ubc_upl_map() failed with (%d)", kret); ioaddr += pl_offset; if ((u_quad_t)f_offset + size > np->n_size) @@ -6037,17 +6562,14 @@ nfs_vnop_pageout( bzero((caddr_t)(ioaddr + io), size - io); FSDBG(321, np->n_size, f_offset, f_offset + io, size - io); } - nfs_data_unlock2(np, 0); + nfs_data_unlock_noupdate(np); -#if 1 /* LP64todo - can't use new segment flags until the drivers are ready */ - auio.uio_segflg = UIO_SYSSPACE; -#else - auio.uio_segflg = UIO_SYSSPACE32; -#endif - auio.uio_rw = UIO_WRITE; - auio.uio_procp = vfs_context_proc(ap->a_context); + auio = uio_createwithbuffer(1, 0, UIO_SYSSPACE, UIO_WRITE, + &uio_buf, sizeof(uio_buf)); tryagain: + if (nmp->nm_vers >= NFS_VER4) + stategenid = nmp->nm_stategenid; wverf = wverf2 = wverfset = 0; txsize = rxsize = xsize; txoffset = rxoffset = f_offset; @@ -6060,19 +6582,21 @@ nfs_vnop_pageout( /* send requests while we need to and have available slots */ while ((txsize > 0) && (req[nextsend] == NULL)) { iosize = MIN(nmwsize, txsize); - aiov.iov_len = iosize; - aiov.iov_base = (uintptr_t)txaddr; - auio.uio_iovs.iov32p = &aiov; - auio.uio_iovcnt = 1; - auio.uio_offset = txoffset; - uio_uio_resid_set(&auio, iosize); - FSDBG(323, auio.uio_offset, iosize, txaddr, txsize); - OSAddAtomic(1, (SInt32*)&nfsstats.pageouts); + uio_reset(auio, txoffset, UIO_SYSSPACE, UIO_WRITE); + uio_addiov(auio, CAST_USER_ADDR_T(txaddr), iosize); + FSDBG(323, uio_offset(auio), iosize, txaddr, txsize); + OSAddAtomic(1, &nfsstats.pageouts); + nfs_node_lock_force(np); + np->n_numoutput++; + nfs_node_unlock(np); vnode_startwrite(vp); iomode = NFS_WRITE_UNSTABLE; - if ((error = nmp->nm_funcs->nf_write_rpc_async(np, &auio, iosize, thd, cred, iomode, NULL, &req[nextsend]))) { + if ((error = nmp->nm_funcs->nf_write_rpc_async(np, auio, iosize, thd, cred, iomode, NULL, &req[nextsend]))) { req[nextsend] = NULL; vnode_writedone(vp); + nfs_node_lock_force(np); + np->n_numoutput--; + nfs_node_unlock(np); break; } txaddr += iosize; @@ -6087,6 +6611,22 @@ nfs_vnop_pageout( req[nextwait] = NULL; nextwait = (nextwait + 1) % MAXPAGINGREQS; vnode_writedone(vp); + nfs_node_lock_force(np); + np->n_numoutput--; + nfs_node_unlock(np); + if ((nmp->nm_vers >= NFS_VER4) && nfs_mount_state_error_should_restart(error)) { + lck_mtx_lock(&nmp->nm_lock); + if ((error != NFSERR_GRACE) && (stategenid == nmp->nm_stategenid) && !(nmp->nm_state & NFSSTA_RECOVER)) { + printf("nfs_vnop_pageout: error %d, initiating recovery\n", error); + nmp->nm_state |= NFSSTA_RECOVER; + nfs_mount_sock_thread_wake(nmp); + } + lck_mtx_unlock(&nmp->nm_lock); + if (error == NFSERR_GRACE) + tsleep(&nmp->nm_state, (PZERO-1), "nfsgrace", 2*hz); + restart = 1; + goto cancel; + } if (error) { FSDBG(323, rxoffset, rxsize, error, -1); break; @@ -6096,7 +6636,7 @@ nfs_vnop_pageout( wverfset = 1; } else if (wverf != wverf2) { /* verifier changed, so we need to restart all the writes */ - restart++; + vrestart = 1; goto cancel; } /* Retain the lowest commitment level returned. */ @@ -6109,21 +6649,31 @@ nfs_vnop_pageout( if (remsize > 0) { /* need to try sending the remainder */ iosize = remsize; - aiov.iov_len = remsize; - aiov.iov_base = (uintptr_t)rxaddr; - auio.uio_iovs.iov32p = &aiov; - auio.uio_iovcnt = 1; - auio.uio_offset = rxoffset; - uio_uio_resid_set(&auio, remsize); + uio_reset(auio, rxoffset, UIO_SYSSPACE, UIO_WRITE); + uio_addiov(auio, CAST_USER_ADDR_T(rxaddr), remsize); iomode = NFS_WRITE_UNSTABLE; - error = nfs_write_rpc2(np, &auio, thd, cred, &iomode, &wverf2); + error = nfs_write_rpc2(np, auio, thd, cred, &iomode, &wverf2); + if ((nmp->nm_vers >= NFS_VER4) && nfs_mount_state_error_should_restart(error)) { + printf("nfs_vnop_pageout: restart: error %d\n", error); + lck_mtx_lock(&nmp->nm_lock); + if ((error != NFSERR_GRACE) && (stategenid == nmp->nm_stategenid) && !(nmp->nm_state & NFSSTA_RECOVER)) { + printf("nfs_vnop_pageout: error %d, initiating recovery\n", error); + nmp->nm_state |= NFSSTA_RECOVER; + nfs_mount_sock_thread_wake(nmp); + } + lck_mtx_unlock(&nmp->nm_lock); + if (error == NFSERR_GRACE) + tsleep(&nmp->nm_state, (PZERO-1), "nfsgrace", 2*hz); + restart = 1; + goto cancel; + } if (error) { FSDBG(323, rxoffset, rxsize, error, -1); break; } if (wverf != wverf2) { /* verifier changed, so we need to restart all the writes */ - restart++; + vrestart = 1; goto cancel; } if (iomode < commit) @@ -6137,12 +6687,12 @@ nfs_vnop_pageout( } } while (!error && (txsize || rxsize)); - restart = 0; + vrestart = 0; if (!error && (commit != NFS_WRITE_FILESYNC)) { error = nmp->nm_funcs->nf_commit_rpc(np, f_offset, xsize, cred); if (error == NFSERR_STALEWRITEVERF) { - restart++; + vrestart = 1; error = EIO; } } @@ -6155,9 +6705,19 @@ nfs_vnop_pageout( req[nextwait] = NULL; nextwait = (nextwait + 1) % MAXPAGINGREQS; vnode_writedone(vp); + nfs_node_lock_force(np); + np->n_numoutput--; + nfs_node_unlock(np); + } + if (vrestart) { + if (++vrestarts <= 100) /* guard against no progress */ + goto tryagain; + printf("nfs_pageout: too many restarts, aborting.\n"); + FSDBG(323, f_offset, xsize, ERESTART, -1); } if (restart) { - if (restart <= 10) + if ((restarts <= nfs_mount_state_max_restarts(nmp)) && /* guard against no progress */ + (!(error = nfs_mount_state_wait_for_recovery(nmp)))) goto tryagain; printf("nfs_pageout: too many restarts, aborting.\n"); FSDBG(323, f_offset, xsize, ERESTART, -1); @@ -6233,7 +6793,7 @@ nfs_vnop_pageout( } /* Blktooff derives file offset given a logical block number */ -static int +int nfs_vnop_blktooff( struct vnop_blktooff_args /* { struct vnodeop_desc *a_desc; @@ -6255,7 +6815,7 @@ nfs_vnop_blktooff( return (0); } -static int +int nfs_vnop_offtoblk( struct vnop_offtoblk_args /* { struct vnodeop_desc *a_desc; diff --git a/bsd/nfs/nfsdiskless.h b/bsd/nfs/nfsdiskless.h index fd33d5e20..69f03e73a 100644 --- a/bsd/nfs/nfsdiskless.h +++ b/bsd/nfs/nfsdiskless.h @@ -72,53 +72,15 @@ #include #ifdef __APPLE_API_PRIVATE -/* - * Structure that must be initialized for a diskless nfs client. - * This structure is used by nfs_mountroot() to set up the root and swap - * vnodes plus do a partial ifconfig(8) and route(8) so that the critical net - * interface can communicate with the server. - * The primary bootstrap is expected to fill in the appropriate fields before - * starting the kernel. Whether or not the swap area is nfs mounted is - * determined by the value in swdevt[0]. (equal to NODEV --> swap over nfs) - * Currently only works for AF_INET protocols. - * NB: All fields are stored in net byte order to avoid hassles with - * client/server byte ordering differences. - */ - -/* - * I have defined a new structure that can handle an NFS Version 3 file handle - * but the kernel still expects the old Version 2 one to be provided. The - * changes required in nfs_vfsops.c for using the new are documented there in - * comments. (I felt that breaking network booting code by changing this - * structure would not be prudent at this time, since almost all servers are - * still Version 2 anyhow.) - */ -struct nfsv3_diskless { - struct ifaliasreq myif; /* Default interface */ - struct sockaddr_in mygateway; /* Default gateway */ - struct nfs_args swap_args; /* Mount args for swap file */ - int swap_fhsize; /* Size of file handle */ - u_char swap_fh[NFSX_V3FHMAX]; /* Swap file's file handle */ - struct sockaddr_in swap_saddr; /* Address of swap server */ - char swap_hostnam[MNAMELEN]; /* Host name for mount pt */ - int swap_nblks; /* Size of server swap file */ - struct ucred swap_ucred; /* Swap credentials */ - struct nfs_args root_args; /* Mount args for root fs */ - int root_fhsize; /* Size of root file handle */ - u_char root_fh[NFSX_V3FHMAX]; /* File handle of root dir */ - struct sockaddr_in root_saddr; /* Address of root server */ - char root_hostnam[MNAMELEN]; /* Host name for mount pt */ - long root_time; /* Timestamp of root fs */ - char my_hostnam[MAXHOSTNAMELEN]; /* Client host name */ -}; struct nfs_dlmount { struct sockaddr_in ndm_saddr; /* Address of file server */ - char ndm_host[MNAMELEN]; /* Host name for mount pt */ + char ndm_host[MAXHOSTNAMELEN];/* Host name for mount pt */ char *ndm_path; /* path name for mount pt */ - u_long ndm_nfsv3; /* NFSv3 or NFSv2? */ - u_long ndm_sotype; /* SOCK_STREAM or SOCK_DGRAM? */ - u_long ndm_fhlen; /* length of file handle */ + char *ndm_mntfrom; /* mntfromname for mount pt */ + u_int32_t ndm_nfsv3; /* NFSv3 or NFSv2? */ + u_int32_t ndm_sotype; /* SOCK_STREAM or SOCK_DGRAM? */ + u_int32_t ndm_fhlen; /* length of file handle */ u_char ndm_fh[NFSX_V3FHMAX]; /* The file's file handle */ }; diff --git a/bsd/nfs/nfsm_subs.h b/bsd/nfs/nfsm_subs.h index 2daeebc9c..910636f85 100644 --- a/bsd/nfs/nfsm_subs.h +++ b/bsd/nfs/nfsm_subs.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2007 Apple Inc. All rights reserved. + * Copyright (c) 2000-2009 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -79,7 +79,7 @@ int nfsm_rpchead2(int, int, int, int, int, int, kauth_cred_t, struct nfsreq *, m int nfsm_chain_new_mbuf(struct nfsm_chain *, size_t); int nfsm_chain_add_opaque_f(struct nfsm_chain *, const u_char *, uint32_t); int nfsm_chain_add_opaque_nopad_f(struct nfsm_chain *, const u_char *, uint32_t); -int nfsm_chain_add_uio(struct nfsm_chain *, struct uio *, uint32_t); +int nfsm_chain_add_uio(struct nfsm_chain *, uio_t, uint32_t); int nfsm_chain_add_fattr4_f(struct nfsm_chain *, struct vnode_attr *, struct nfsmount *); int nfsm_chain_add_v2sattr_f(struct nfsm_chain *, struct vnode_attr *, uint32_t); int nfsm_chain_add_v3sattr_f(struct nfsm_chain *, struct vnode_attr *); @@ -89,7 +89,7 @@ int nfsm_chain_offset(struct nfsm_chain *); int nfsm_chain_reverse(struct nfsm_chain *, uint32_t); int nfsm_chain_get_opaque_pointer_f(struct nfsm_chain *, uint32_t, u_char **); int nfsm_chain_get_opaque_f(struct nfsm_chain *, uint32_t, u_char *); -int nfsm_chain_get_uio(struct nfsm_chain *, uint32_t, struct uio *); +int nfsm_chain_get_uio(struct nfsm_chain *, uint32_t, uio_t); int nfsm_chain_get_fh_attr(struct nfsm_chain *, nfsnode_t, vfs_context_t, int, uint64_t *, fhandle_t *, struct nfs_vattr *); int nfsm_chain_get_wcc_data_f(struct nfsm_chain *, nfsnode_t, struct timespec *, int *, u_int64_t *); @@ -420,12 +420,18 @@ int nfsm_chain_trim_data(struct nfsm_chain *, int, int *); } while (0) /* add NFSv4 COMPOUND header */ +#define NFS4_TAG_LENGTH 12 #define nfsm_chain_add_compound_header(E, NMC, TAG, NUMOPS) \ do { \ - if ((TAG) && strlen(TAG)) \ - nfsm_chain_add_string((E), (NMC), (TAG), strlen(TAG)); \ - else \ + if ((TAG) && strlen(TAG)) { \ + /* put tags into a fixed-length space-padded field */ \ + char __nfstag[NFS4_TAG_LENGTH+1]; \ + snprintf(__nfstag, sizeof(__nfstag), "%-*s", NFS4_TAG_LENGTH, (TAG)); \ + nfsm_chain_add_32((E), (NMC), NFS4_TAG_LENGTH); \ + nfsm_chain_add_opaque((E), (NMC), __nfstag, NFS4_TAG_LENGTH); \ + } else { \ nfsm_chain_add_32((E), (NMC), 0); \ + } \ nfsm_chain_add_32((E), (NMC), 0); /*minorversion*/ \ nfsm_chain_add_32((E), (NMC), (NUMOPS)); \ } while (0) @@ -448,6 +454,26 @@ int nfsm_chain_trim_data(struct nfsm_chain *, int, int *); nfsm_chain_add_32((E), (NMC), ((B)[__i] & (MASK)[__i])); \ } while (0) +/* Add an NFSv4 "stateid" structure to an mbuf chain */ +#define nfsm_chain_add_stateid(E, NMC, SID) \ + do { \ + nfsm_chain_add_32((E), (NMC), (SID)->seqid); \ + nfsm_chain_add_32((E), (NMC), (SID)->other[0]); \ + nfsm_chain_add_32((E), (NMC), (SID)->other[1]); \ + nfsm_chain_add_32((E), (NMC), (SID)->other[2]); \ + } while (0) + +/* add an NFSv4 lock owner structure to an mbuf chain */ +#define nfsm_chain_add_lock_owner4(E, NMC, NMP, NLOP) \ + do { \ + nfsm_chain_add_64((E), (NMC), (NMP)->nm_clientid); \ + nfsm_chain_add_32((E), (NMC), 5*NFSX_UNSIGNED); \ + nfsm_chain_add_32((E), (NMC), (NLOP)->nlo_name); \ + nfsm_chain_add_32((E), (NMC), (NLOP)->nlo_pid); \ + nfsm_chain_add_64((E), (NMC), (NLOP)->nlo_pid_start.tv_sec); \ + nfsm_chain_add_32((E), (NMC), (NLOP)->nlo_pid_start.tv_usec); \ + } while (0) + /* * macros for dissecting NFS mbuf chains */ @@ -634,20 +660,31 @@ int nfsm_chain_trim_data(struct nfsm_chain *, int, int *); /* get NFSv4 attr bitmap */ #define nfsm_chain_get_bitmap(E, NMC, B, LEN) \ do { \ - uint32_t __len, __i; \ + uint32_t __len = 0, __i; \ nfsm_chain_get_32((E), (NMC), __len); \ if (E) break; \ for (__i=0; __i < MIN(__len, (LEN)); __i++) \ nfsm_chain_get_32((E), (NMC), (B)[__i]); \ if (E) break; \ + for (; __i < __len; __i++) \ + nfsm_chain_adv((E), (NMC), NFSX_UNSIGNED); \ for (; __i < (LEN); __i++) \ (B)[__i] = 0; \ (LEN) = __len; \ } while (0) +/* get an NFSv4 "stateid" structure from an mbuf chain */ +#define nfsm_chain_get_stateid(E, NMC, SID) \ + do { \ + nfsm_chain_get_32((E), (NMC), (SID)->seqid); \ + nfsm_chain_get_32((E), (NMC), (SID)->other[0]); \ + nfsm_chain_get_32((E), (NMC), (SID)->other[1]); \ + nfsm_chain_get_32((E), (NMC), (SID)->other[2]); \ + } while (0) + #define nfsm_chain_skip_tag(E, NMC) \ do { \ - uint32_t __val; \ + uint32_t __val = 0; \ nfsm_chain_get_32((E), (NMC), __val); \ nfsm_chain_adv((E), (NMC), nfsm_rndup(__val)); \ } while (0) @@ -669,10 +706,12 @@ int nfsm_chain_trim_data(struct nfsm_chain *, int, int *); nfsm_chain_get_64((E), (NMC), __ci_before); \ nfsm_chain_get_64((E), (NMC), __ci_after); \ if (E) break; \ - if (__ci_atomic && (__ci_before == (DNP)->n_ncchange)) \ + if (__ci_atomic && (__ci_before == (DNP)->n_ncchange)) { \ (DNP)->n_ncchange = __ci_after; \ - else \ + } else { \ cache_purge(NFSTOV(DNP)); \ + (DNP)->n_ncgen++; \ + } \ } while (0) #endif /* __APPLE_API_PRIVATE */ diff --git a/bsd/nfs/nfsmount.h b/bsd/nfs/nfsmount.h index cfbdec71a..742c166c5 100644 --- a/bsd/nfs/nfsmount.h +++ b/bsd/nfs/nfsmount.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2007 Apple Inc. All rights reserved. + * Copyright (c) 2000-2009 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -114,14 +114,14 @@ struct nfs_fsattr { struct nfs_funcs { int (*nf_mount)(struct nfsmount *, vfs_context_t, struct user_nfs_args *, nfsnode_t *); int (*nf_update_statfs)(struct nfsmount *, vfs_context_t); - int (*nf_getquota)(struct nfsmount *, vfs_context_t, u_long, int, struct dqblk *); - int (*nf_access_rpc)(nfsnode_t, u_long *, vfs_context_t); + int (*nf_getquota)(struct nfsmount *, vfs_context_t, uid_t, int, struct dqblk *); + int (*nf_access_rpc)(nfsnode_t, u_int32_t *, vfs_context_t); int (*nf_getattr_rpc)(nfsnode_t, mount_t, u_char *, size_t, vfs_context_t, struct nfs_vattr *, u_int64_t *); - int (*nf_setattr_rpc)(nfsnode_t, struct vnode_attr *, vfs_context_t, int); + int (*nf_setattr_rpc)(nfsnode_t, struct vnode_attr *, vfs_context_t); int (*nf_read_rpc_async)(nfsnode_t, off_t, size_t, thread_t, kauth_cred_t, struct nfsreq_cbinfo *, struct nfsreq **); - int (*nf_read_rpc_async_finish)(nfsnode_t, struct nfsreq *, struct uio *, size_t *, int *); + int (*nf_read_rpc_async_finish)(nfsnode_t, struct nfsreq *, uio_t, size_t *, int *); int (*nf_readlink_rpc)(nfsnode_t, char *, uint32_t *, vfs_context_t); - int (*nf_write_rpc_async)(nfsnode_t, struct uio *, size_t, thread_t, kauth_cred_t, int, struct nfsreq_cbinfo *, struct nfsreq **); + int (*nf_write_rpc_async)(nfsnode_t, uio_t, size_t, thread_t, kauth_cred_t, int, struct nfsreq_cbinfo *, struct nfsreq **); int (*nf_write_rpc_async_finish)(nfsnode_t, struct nfsreq *, int *, size_t *, uint64_t *); int (*nf_commit_rpc)(nfsnode_t, uint64_t, uint64_t, kauth_cred_t); int (*nf_lookup_rpc_async)(nfsnode_t, char *, int, vfs_context_t, struct nfsreq **); @@ -130,6 +130,17 @@ struct nfs_funcs { int (*nf_rename_rpc)(nfsnode_t, char *, int, nfsnode_t, char *, int, vfs_context_t); }; +/* + * The long form of the NFSv4 client ID. + */ +struct nfs_client_id { + TAILQ_ENTRY(nfs_client_id) nci_link; /* list of client IDs */ + char *nci_id; /* client id buffer */ + int nci_idlen; /* length of client id buffer */ +}; +TAILQ_HEAD(nfsclientidlist, nfs_client_id); +__private_extern__ struct nfsclientidlist nfsclientids; + /* * Mount structure. * One allocated on every NFS mount. @@ -147,10 +158,10 @@ struct nfsmount { TAILQ_HEAD(, nfs_gss_clnt_ctx) nm_gsscl; /* GSS user contexts */ int nm_timeo; /* Init timer for NFSMNT_DUMBTIMR */ int nm_retry; /* Max retries */ - int nm_rsize; /* Max size of read rpc */ - int nm_wsize; /* Max size of write rpc */ - int nm_biosize; /* buffer I/O size */ - int nm_readdirsize; /* Size of a readdir rpc */ + uint32_t nm_rsize; /* Max size of read rpc */ + uint32_t nm_wsize; /* Max size of write rpc */ + uint32_t nm_biosize; /* buffer I/O size */ + uint32_t nm_readdirsize; /* Size of a readdir rpc */ int nm_readahead; /* Num. of blocks to readahead */ int nm_acregmin; /* reg file min attr cache timeout */ int nm_acregmax; /* reg file max attr cache timeout */ @@ -167,9 +178,18 @@ struct nfsmount { uint32_t rqportstamp; /* timestamp of rquota port */ } v3; struct { /* v4 specific fields */ - uint64_t clientid; /* client ID */ - uint64_t mounttime; /* mount verifier */ + struct nfs_client_id *longid; /* client ID, long form */ + uint64_t mounttime; /* used as client ID verifier */ + uint64_t clientid; /* client ID, short form */ thread_call_t renew_timer; /* RENEW timer call */ + TAILQ_HEAD(, nfs_open_owner) open_owners; /* list of open owners */ + TAILQ_HEAD(, nfsnode) recallq; /* list of nodes with recalled delegations */ + TAILQ_ENTRY(nfsmount) cblink; /* chain of mounts registered for callbacks */ + uint32_t stateinuse; /* state in use counter */ + uint32_t stategenid; /* state generation counter */ + kauth_cred_t mcred; /* credential used for the mount */ + uint32_t cbid; /* callback channel identifier */ + uint32_t cbrefs; /* # callbacks using this mount */ } v4; } nm_un; /* async I/O queue */ @@ -184,12 +204,15 @@ struct nfsmount { mbuf_t nm_nam; /* Address of server */ u_short nm_sockflags; /* socket state flags */ socket_t nm_so; /* RPC socket */ - int nm_reconnect_start; /* reconnect start time */ + time_t nm_deadto_start; /* dead timeout start time */ + time_t nm_reconnect_start; /* reconnect start time */ int nm_tprintf_initial_delay; /* delay first "server down" */ int nm_tprintf_delay; /* delay between "server down" */ + int nm_deadtimeout; /* delay between first "server down" and dead */ int nm_srtt[4]; /* Timers for RPCs */ int nm_sdrtt[4]; int nm_timeouts; /* Request timeouts */ + int nm_jbreqs; /* # R_JBTPRINTFMSG requests */ union { struct { int sent; /* Request send count */ @@ -211,6 +234,7 @@ struct nfsmount { /* * NFS mount state flags (nm_state) */ +#define NFSSTA_BIGCOOKIES 0x00000800 /* have seen >32bit dir cookies */ #define NFSSTA_JUKEBOXTIMEO 0x00001000 /* experienced a jukebox timeout */ #define NFSSTA_LOCKTIMEO 0x00002000 /* experienced a lock req timeout */ #define NFSSTA_MOUNTED 0x00004000 /* completely mounted */ @@ -222,6 +246,8 @@ struct nfsmount { #define NFSSTA_GOTFSINFO 0x00100000 /* Got the V3 fsinfo */ #define NFSSTA_SNDLOCK 0x01000000 /* Send socket lock */ #define NFSSTA_WANTSND 0x02000000 /* Want above */ +#define NFSSTA_DEAD 0x04000000 /* mount is dead */ +#define NFSSTA_RECOVER 0x08000000 /* mount state needs to be recovered */ /* flags for nm_sockflags */ #define NMSOCK_READY 0x0001 /* socket is ready for use */ @@ -245,9 +271,18 @@ struct nfsmount { /* aliases for version-specific fields */ #define nm_rqport nm_un.v3.rqport #define nm_rqportstamp nm_un.v3.rqportstamp +#define nm_longid nm_un.v4.longid #define nm_clientid nm_un.v4.clientid #define nm_mounttime nm_un.v4.mounttime #define nm_renew_timer nm_un.v4.renew_timer +#define nm_open_owners nm_un.v4.open_owners +#define nm_stateinuse nm_un.v4.stateinuse +#define nm_stategenid nm_un.v4.stategenid +#define nm_mcred nm_un.v4.mcred +#define nm_cbid nm_un.v4.cbid +#define nm_cblink nm_un.v4.cblink +#define nm_cbrefs nm_un.v4.cbrefs +#define nm_recallq nm_un.v4.recallq #if defined(KERNEL) /* diff --git a/bsd/nfs/nfsnode.h b/bsd/nfs/nfsnode.h index 794841e72..fa0d5bfc4 100644 --- a/bsd/nfs/nfsnode.h +++ b/bsd/nfs/nfsnode.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2007 Apple Inc. All rights reserved. + * Copyright (c) 2000-2009 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -87,22 +87,6 @@ struct nfs_sillyrename { char nsr_name[20]; }; -/* - * This structure is used to save the logical directory offset to - * NFS cookie mappings. - * The mappings are stored in a list headed - * by n_cookies, as required. - * There is one mapping for each NFS_DIRBLKSIZ bytes of directory information - * stored in increasing logical offset byte order. - */ -#define NFSNUMCOOKIES 31 - -struct nfsdmap { - LIST_ENTRY(nfsdmap) ndm_list; - int ndm_eocookie; - nfsuint64 ndm_cookies[NFSNUMCOOKIES]; -}; - /* * The nfsbuf is the nfs equivalent to a struct buf. */ @@ -110,10 +94,10 @@ struct nfsbuf { LIST_ENTRY(nfsbuf) nb_hash; /* hash chain */ LIST_ENTRY(nfsbuf) nb_vnbufs; /* nfsnode's nfsbuf chain */ TAILQ_ENTRY(nfsbuf) nb_free; /* free list position if not active. */ - volatile long nb_flags; /* NB_* flags. */ - volatile long nb_lflags; /* NBL_* flags. */ - volatile long nb_refs; /* outstanding references. */ - long nb_bufsize; /* buffer size */ + volatile uint32_t nb_flags; /* NB_* flags. */ + volatile uint32_t nb_lflags; /* NBL_* flags. */ + volatile uint32_t nb_refs; /* outstanding references. */ + uint32_t nb_bufsize; /* buffer size */ daddr64_t nb_lblkno; /* logical block number. */ uint64_t nb_verf; /* V3 write verifier */ int nb_commitlevel; /* lowest write commit level */ @@ -210,7 +194,7 @@ struct nfsbuf { LIST_HEAD(nfsbuflists, nfsbuf); TAILQ_HEAD(nfsbuffreehead, nfsbuf); -#define NFSNOLIST ((struct nfsbuf *)0xdeadbeef) +#define NFSNOLIST ((void*)0xdeadbeef) __private_extern__ lck_mtx_t *nfs_buf_mutex; __private_extern__ int nfsbufcnt, nfsbufmin, nfsbufmax, nfsbufmetacnt, nfsbufmetamax; @@ -248,6 +232,65 @@ __private_extern__ struct nfsbuffreehead nfsbuffree, nfsbufdelwri; #define NFSBUFCNTCHK() #endif +/* + * NFS directory buffer + * + * Each buffer for a directory consists of: + * + * - a small header + * - a packed list of direntry structures + * (if RDIRPLUS is enabled, a file handle and attrstamp are + * packed after the direntry name.) + * - free/unused space + * - if RDIRPLUS is enabled, an array of attributes + * that is indexed backwards from the end of the buffer. + */ +struct nfs_dir_buf_header { + uint16_t ndbh_flags; /* flags (see below) */ + uint16_t ndbh_count; /* # of entries */ + uint32_t ndbh_entry_end; /* end offset of direntry data */ + uint32_t ndbh_ncgen; /* name cache generation# */ + uint32_t ndbh_pad; /* reserved */ +}; +/* ndbh_flags */ +#define NDB_FULL 0x0001 /* buffer has been filled */ +#define NDB_EOF 0x0002 /* buffer contains EOF */ + +#define NFS_DIR_BUF_FIRST_DIRENTRY(BP) \ + ((struct direntry*)((char*)((BP)->nb_data) + sizeof(*ndbhp))) +#define NFS_DIR_BUF_NVATTR(BP, IDX) \ + (&((struct nfs_vattr*)((char*)((BP)->nb_data) + (BP)->nb_bufsize))[-((IDX)+1)]) +#define NFS_DIRENTRY_LEN(namlen) \ + ((sizeof(struct direntry) + (namlen) - (MAXPATHLEN-1) + 7) & ~7) +#define NFS_DIRENT_LEN(namlen) \ + ((sizeof(struct dirent) - (NAME_MAX+1)) + (((namlen) + 1 + 3) &~ 3)) +#define NFS_DIRENTRY_NEXT(DP) \ + ((struct direntry*)((char*)(DP) + (DP)->d_reclen)) +#define NFS_DIR_COOKIE_POTENTIALLY_TRUNCATED(C) \ + ((C) && ((((C) >> 32) == 0) || (((C) & 0x80000000ULL) && (((C) >> 32) == 0xffffffff)))) +#define NFS_DIR_COOKIE_SAME32(C1, C2) \ + (((C1) & 0xffffffffULL) == ((C2) & 0xffffffffULL)) + +/* + * NFS directory cookie cache + * + * This structure is used to cache cookie-to-buffer mappings for + * cookies recently returned from READDIR. The entries are kept in an + * array. The most-recently-used (MRU) list is headed by the entry at + * index "mru". The index of the next entry in the list is kept in the + * "next" array. (An index value of -1 marks an invalid entry.) + */ +#define NFSNUMCOOKIES 14 +struct nfsdmap { + int8_t free; /* next unused slot */ + int8_t mru; /* head of MRU list */ + int8_t next[NFSNUMCOOKIES]; /* MRU list links */ + struct { + uint64_t key; /* cookie */ + uint64_t lbn; /* lbn of buffer */ + } cookies[NFSNUMCOOKIES]; /* MRU list entries */ +}; + /* * NFS vnode attribute structure */ @@ -291,6 +334,10 @@ struct nfs_vattr { #define NFS_FFLAG_HIDDEN 0x0002 #define NFS_FFLAG_NAMED_ATTR 0x0004 /* file has named attributes */ +/* flags for nfs_getattr() */ +#define NGA_CACHED 0 +#define NGA_UNCACHED 1 + /* * macros for detecting node changes * @@ -323,6 +370,135 @@ struct nfs_vattr { NFS_COPY_TIME(&(NP)->n_ncmtime, (NVAP), MODIFY); \ } while (0) + +__private_extern__ lck_grp_t *nfs_open_grp; +__private_extern__ uint32_t nfs_open_owner_seqnum, nfs_lock_owner_seqnum; + +/* + * NFSv4 open owner structure - one per cred per mount + */ +struct nfs_open_owner { + TAILQ_ENTRY(nfs_open_owner) noo_link; /* List of open owners (on mount) */ + lck_mtx_t noo_lock; /* owner mutex */ + struct nfsmount * noo_mount; /* NFS mount */ + uint32_t noo_refcnt; /* # outstanding references */ + uint32_t noo_flags; /* see below */ + kauth_cred_t noo_cred; /* credentials of open owner */ + uint32_t noo_name; /* unique name used otw */ + uint32_t noo_seqid; /* client-side sequence ID */ + TAILQ_HEAD(,nfs_open_file) noo_opens; /* list of open files */ +}; +/* noo_flags */ +#define NFS_OPEN_OWNER_LINK 0x1 /* linked into mount's open owner list */ +#define NFS_OPEN_OWNER_BUSY 0x2 /* open state-modifying operation in progress */ +#define NFS_OPEN_OWNER_WANT 0x4 /* someone else wants to mark busy */ + +/* + * NFS open file structure - one per open owner per nfsnode + */ +struct nfs_open_file { + lck_mtx_t nof_lock; /* open file mutex */ + TAILQ_ENTRY(nfs_open_file) nof_link; /* list of open files */ + TAILQ_ENTRY(nfs_open_file) nof_oolink; /* list of open owner's open files */ + struct nfs_open_owner * nof_owner; /* open owner */ + nfsnode_t nof_np; /* nfsnode this open is for */ + nfs_stateid nof_stateid; /* open stateid */ + thread_t nof_creator; /* thread that created file */ + uint32_t nof_opencnt; /* open file count */ + uint16_t nof_flags; /* see below */ + uint8_t nof_access:4; /* access mode for this open */ + uint8_t nof_deny:4; /* deny mode for this open */ + uint8_t nof_mmap_access:4; /* mmap open access mode */ + uint8_t nof_mmap_deny:4; /* mmap open deny mode */ + /* counts of access/deny mode open combinations */ + uint32_t nof_r; /* read opens (deny none) */ + uint32_t nof_w; /* write opens (deny none) */ + uint32_t nof_rw; /* read/write opens (deny none) */ + uint32_t nof_r_dw; /* read deny-write opens */ + /* the rest of the counts have a max of 2 (1 for open + 1 for mmap) */ + uint32_t nof_w_dw:4; /* write deny-write opens (max 2) */ + uint32_t nof_rw_dw:4; /* read/write deny-write opens (max 2) */ + uint32_t nof_r_drw:4; /* read deny-read/write opens (max 2) */ + uint32_t nof_w_drw:4; /* write deny-read/write opens (max 2) */ + uint32_t nof_rw_drw:4; /* read/write deny-read/write opens (max 2) */ +}; +/* nof_flags */ +#define NFS_OPEN_FILE_BUSY 0x0001 /* open state-modifying operation in progress */ +#define NFS_OPEN_FILE_WANT 0x0002 /* someone else wants to mark busy */ +#define NFS_OPEN_FILE_CREATE 0x0004 /* has an open(RW) from a VNOP_CREATE call */ +#define NFS_OPEN_FILE_NEEDCLOSE 0x0008 /* has an open(R) from an (unopen) VNOP_READ call */ +#define NFS_OPEN_FILE_SETATTR 0x0020 /* has an open(W) to perform a SETATTR(size) */ +#define NFS_OPEN_FILE_POSIXLOCK 0x0040 /* server supports POSIX locking semantics */ +#define NFS_OPEN_FILE_LOST 0x0080 /* open state has been lost */ +#define NFS_OPEN_FILE_REOPEN 0x0100 /* file needs to be reopened */ +#define NFS_OPEN_FILE_REOPENING 0x0200 /* file is being reopened */ + +struct nfs_lock_owner; +/* + * NFS file lock + * + * Each lock request (pending or granted) has an + * nfs_file_lock structure representing its state. + */ +struct nfs_file_lock { + TAILQ_ENTRY(nfs_file_lock) nfl_link; /* List of locks on nfsnode */ + TAILQ_ENTRY(nfs_file_lock) nfl_lolink; /* List of locks held by locker */ + struct nfs_lock_owner * nfl_owner; /* lock owner that holds this lock */ + uint64_t nfl_start; /* starting offset */ + uint64_t nfl_end; /* ending offset (inclusive) */ + uint32_t nfl_blockcnt; /* # locks blocked on this lock */ + uint16_t nfl_flags; /* see below */ + uint8_t nfl_type; /* lock type: read/write */ +}; +/* nfl_flags */ +#define NFS_FILE_LOCK_ALLOC 0x01 /* lock was allocated */ +#define NFS_FILE_LOCK_STYLE_POSIX 0x02 /* POSIX-style fcntl() lock */ +#define NFS_FILE_LOCK_STYLE_FLOCK 0x04 /* flock(2)-style lock */ +#define NFS_FILE_LOCK_STYLE_MASK 0x06 /* lock style mask */ +#define NFS_FILE_LOCK_WAIT 0x08 /* may block on conflicting locks */ +#define NFS_FILE_LOCK_BLOCKED 0x10 /* request is blocked */ +#define NFS_FILE_LOCK_DEAD 0x20 /* lock (request) no longer exists */ + +TAILQ_HEAD(nfs_file_lock_queue, nfs_file_lock); + +/* + * Calculate length of lock range given the endpoints. + * Note that struct flock has "to EOF" reported as 0 but + * the NFSv4 protocol has "to EOF" reported as UINT64_MAX. + */ +#define NFS_FLOCK_LENGTH(S, E) (((E) == UINT64_MAX) ? 0 : ((E) - (S) + 1)) +#define NFS_LOCK_LENGTH(S, E) (((E) == UINT64_MAX) ? UINT64_MAX : ((E) - (S) + 1)) + +/* + * NFSv4 lock owner structure - per open owner per process per nfsnode + * + * A lock owner is a process + an nfsnode. + * + * Note that flock(2) locks technically should have the lock owner be + * an fglob pointer instead of a process. However, implementing that + * correctly would not be trivial. So, for now, flock(2) locks are + * essentially treated like whole-file POSIX locks. + */ +struct nfs_lock_owner { + lck_mtx_t nlo_lock; /* owner mutex */ + TAILQ_ENTRY(nfs_lock_owner) nlo_link; /* List of lock owners (on nfsnode) */ + struct nfs_open_owner * nlo_open_owner; /* corresponding open owner */ + struct nfs_file_lock_queue nlo_locks; /* list of locks held */ + struct nfs_file_lock nlo_alock; /* most lockers will only ever have one */ + struct timeval nlo_pid_start; /* Start time of process id */ + pid_t nlo_pid; /* lock-owning process ID */ + uint32_t nlo_refcnt; /* # outstanding references */ + uint32_t nlo_flags; /* see below */ + uint32_t nlo_name; /* unique name used otw */ + uint32_t nlo_seqid; /* client-side sequence ID */ + uint32_t nlo_stategenid; /* mount state generation ID */ + nfs_stateid nlo_stateid; /* lock stateid */ +}; +/* nlo_flags */ +#define NFS_LOCK_OWNER_LINK 0x1 /* linked into mount's lock owner list */ +#define NFS_LOCK_OWNER_BUSY 0x2 /* lock state-modifying operation in progress */ +#define NFS_LOCK_OWNER_WANT 0x4 /* someone else wants to mark busy */ + /* * The nfsnode is the NFS equivalent of an inode. * There is a unique nfsnode for each NFS vnode. @@ -334,8 +510,7 @@ struct nfs_vattr { #define NFS_ACCESS_CACHE_SIZE 3 struct nfsnode { - lck_rw_t n_lock; /* nfs node lock */ - void *n_lockowner; /* nfs node lock owner (exclusive) */ + lck_mtx_t n_lock; /* nfs node lock */ lck_rw_t n_datalock; /* nfs node data lock */ void *n_datalockowner;/* nfs node data lock owner (exclusive) */ LIST_ENTRY(nfsnode) n_hash; /* Hash chain */ @@ -363,17 +538,18 @@ struct nfsnode { mount_t n_mount; /* associated mount (NHINIT) */ int n_error; /* Save write error value */ union { - struct timespec nf_atim; /* Special file times */ - nfsuint64 nd_cookieverf; /* Cookie verifier (dir only) */ + struct timespec ns_atim; /* Special file times */ + daddr64_t nf_lastread; /* last block# read from (for readahead) */ + uint64_t nd_cookieverf; /* Cookie verifier (dir only) */ } n_un1; union { - struct timespec nf_mtim; /* Special file times */ - daddr64_t nf_lastread; /* last block# read from (for readahead) */ - off_t nd_direof; /* Dir. EOF offset cache */ + struct timespec ns_mtim; /* Special file times */ + daddr64_t nf_lastrahead; /* last block# read ahead */ + uint64_t nd_eofcookie; /* Dir. EOF cookie cache */ } n_un2; union { struct nfs_sillyrename *nf_silly;/* Ptr to silly rename struct */ - LIST_HEAD(, nfsdmap) nd_cook; /* cookies */ + struct nfsdmap *nd_cookiecache; /* dir cookie cache */ } n_un3; u_short n_fhsize; /* size in bytes, of fh */ u_short n_flag; /* node flags */ @@ -382,14 +558,31 @@ struct nfsnode { u_char n_fh[NFS_SMALLFH];/* Small File Handle */ struct nfsbuflists n_cleanblkhd; /* clean blocklist head */ struct nfsbuflists n_dirtyblkhd; /* dirty blocklist head */ - int n_wrbusy; /* # threads in write/fsync */ - int n_needcommitcnt;/* # bufs that need committing */ + union { + int nf_wrbusy; /* # threads in write/fsync */ + uint32_t nd_ncgen; /* dir name cache generation# */ + } n_un5; + union { + int nf_needcommitcnt;/* # bufs that need committing */ + daddr64_t nd_lastdbl; /* last dir buf lookup block# */ + } n_un6; int n_bufiterflags; /* buf iterator flags */ + int n_numoutput; /* I/O in progress */ + /* open state */ + lck_mtx_t n_openlock; /* nfs node open lock */ + uint32_t n_openflags; /* open state flags */ + uint32_t n_openrefcnt; /* # non-file opens */ + TAILQ_HEAD(,nfs_open_file) n_opens; /* list of open files */ + /* lock state */ + TAILQ_HEAD(, nfs_lock_owner) n_lock_owners; /* list of lock owners */ + struct nfs_file_lock_queue n_locks; /* list of locks */ + /* delegation state */ + nfs_stateid n_dstateid; /* delegation stateid */ + TAILQ_ENTRY(nfsnode) n_dlink; /* delegation recall list link */ }; -#define NFS_NODE_LOCK_SHARED 1 -#define NFS_NODE_LOCK_EXCLUSIVE 2 -#define NFS_NODE_LOCK_FORCE 3 +#define NFS_DATA_LOCK_SHARED 1 +#define NFS_DATA_LOCK_EXCLUSIVE 2 #define nfstimespeccmp(tvp, uvp, cmp) \ (((tvp)->tv_sec == (uvp)->tv_sec) ? \ @@ -404,13 +597,18 @@ struct nfsnode { } \ } while (0) -#define n_atim n_un1.nf_atim -#define n_mtim n_un2.nf_mtim -#define n_lastread n_un2.nf_lastread +#define n_atim n_un1.ns_atim +#define n_mtim n_un2.ns_mtim +#define n_lastread n_un1.nf_lastread +#define n_lastrahead n_un2.nf_lastrahead #define n_sillyrename n_un3.nf_silly +#define n_wrbusy n_un5.nf_wrbusy +#define n_needcommitcnt n_un6.nf_needcommitcnt #define n_cookieverf n_un1.nd_cookieverf -#define n_direofoffset n_un2.nd_direof -#define n_cookies n_un3.nd_cook +#define n_eofcookie n_un2.nd_eofcookie +#define n_cookiecache n_un3.nd_cookiecache +#define n_ncgen n_un5.nd_ncgen +#define n_lastdbl n_un6.nd_lastdbl #define n_mtime n_un4.v3.n3_mtime #define n_ncmtime n_un4.v3.n3_ncmtime #define n_change n_un4.v4.n4_change @@ -423,10 +621,14 @@ struct nfsnode { #define NMODIFIED 0x0004 /* Might have a modified buffer in bio */ #define NWRITEERR 0x0008 /* Flag write errors so close will know */ #define NNEEDINVALIDATE 0x0010 /* need to call vinvalbuf() */ +#define NGETATTRINPROG 0x0020 /* GETATTR RPC in progress */ +#define NGETATTRWANT 0x0040 /* waiting for GETATTR RPC */ #define NACC 0x0100 /* Special file accessed */ #define NUPD 0x0200 /* Special file updated */ #define NCHG 0x0400 /* Special file times changed */ #define NNEGNCENTRIES 0x0800 /* directory has negative name cache entries */ +#define NBUSY 0x1000 /* node is busy */ +#define NBUSYWANT 0x2000 /* waiting on busy node */ /* * Flags for n_hflag @@ -446,6 +648,16 @@ struct nfsnode { #define NBINVALINPROG 0x0004 /* Avoid multiple calls to nfs_vinvalbuf() */ #define NBINVALWANT 0x0008 /* waiting for nfs_vinvalbuf() to complete */ +/* + * n_openflags + * Note: protected by n_openlock + */ +#define N_OPENBUSY 0x0001 /* open state is busy - being updated */ +#define N_OPENWANT 0x0002 /* someone wants to mark busy */ +#define N_DELEG_READ 0x0004 /* we have a read delegation */ +#define N_DELEG_WRITE 0x0008 /* we have a write delegation */ +#define N_DELEG_MASK 0x000c /* delegation mask */ + /* attr/mode timestamp macros */ #define NATTRVALID(np) ((np)->n_attrstamp != ~0) #define NATTRINVALIDATE(np) ((np)->n_attrstamp = ~0) @@ -468,6 +680,7 @@ struct nfsnode { */ #define NG_MARKROOT 0x0001 /* mark vnode as root of FS */ #define NG_MAKEENTRY 0x0002 /* add name cache entry for vnode */ +#define NG_NOCREATE 0x0004 /* don't create a new node, return existing one */ /* * Convert between nfsnode pointers and vnode pointers @@ -504,28 +717,37 @@ extern vnop_t **spec_nfsv4nodeop_p; /* * Prototypes for NFS vnode operations */ -int nfs_vnop_write(struct vnop_write_args *); #define nfs_vnop_revoke nop_revoke int nfs_vnop_inactive(struct vnop_inactive_args *); int nfs_vnop_reclaim(struct vnop_reclaim_args *); -int nfs_lock(nfsnode_t, int); -void nfs_unlock(nfsnode_t); -int nfs_lock2(nfsnode_t, nfsnode_t, int); -void nfs_unlock2(nfsnode_t, nfsnode_t); -int nfs_lock4(nfsnode_t, nfsnode_t, nfsnode_t, nfsnode_t, int); -void nfs_unlock4(nfsnode_t, nfsnode_t, nfsnode_t, nfsnode_t); +int nfs_node_lock(nfsnode_t); +int nfs_node_lock_internal(nfsnode_t, int); +void nfs_node_lock_force(nfsnode_t); +void nfs_node_unlock(nfsnode_t); +int nfs_node_lock2(nfsnode_t, nfsnode_t); +void nfs_node_unlock2(nfsnode_t, nfsnode_t); +int nfs_node_set_busy(nfsnode_t, thread_t); +int nfs_node_set_busy2(nfsnode_t, nfsnode_t, thread_t); +int nfs_node_set_busy4(nfsnode_t, nfsnode_t, nfsnode_t, nfsnode_t, thread_t); +void nfs_node_clear_busy(nfsnode_t); +void nfs_node_clear_busy2(nfsnode_t, nfsnode_t); +void nfs_node_clear_busy4(nfsnode_t, nfsnode_t, nfsnode_t, nfsnode_t); void nfs_data_lock(nfsnode_t, int); -void nfs_data_lock2(nfsnode_t, int, int); +void nfs_data_lock_noupdate(nfsnode_t, int); +void nfs_data_lock_internal(nfsnode_t, int, int); void nfs_data_unlock(nfsnode_t); -void nfs_data_unlock2(nfsnode_t, int); +void nfs_data_unlock_noupdate(nfsnode_t); +void nfs_data_unlock_internal(nfsnode_t, int); void nfs_data_update_size(nfsnode_t, int); /* other stuff */ int nfs_removeit(struct nfs_sillyrename *); int nfs_nget(mount_t,nfsnode_t,struct componentname *,u_char *,int,struct nfs_vattr *,u_int64_t *,int,nfsnode_t*); -nfsuint64 *nfs_getcookie(nfsnode_t, off_t, int); +void nfs_dir_cookie_cache(nfsnode_t, uint64_t, uint64_t); +int nfs_dir_cookie_to_lbn(nfsnode_t, uint64_t, int *, uint64_t *); void nfs_invaldir(nfsnode_t); +uint32_t nfs_dir_buf_freespace(struct nfsbuf *, int); /* nfsbuf functions */ void nfs_nbinit(void); @@ -533,7 +755,7 @@ void nfs_buf_timer(void *, void *); void nfs_buf_remfree(struct nfsbuf *); boolean_t nfs_buf_is_incore(nfsnode_t, daddr64_t); struct nfsbuf * nfs_buf_incore(nfsnode_t, daddr64_t); -int nfs_buf_get(nfsnode_t, daddr64_t, int, thread_t, int, struct nfsbuf **); +int nfs_buf_get(nfsnode_t, daddr64_t, uint32_t, thread_t, int, struct nfsbuf **); int nfs_buf_upl_setup(struct nfsbuf *bp); void nfs_buf_upl_check(struct nfsbuf *bp); void nfs_buf_normalize_valid_range(nfsnode_t, struct nfsbuf *); @@ -551,7 +773,8 @@ errno_t nfs_buf_acquire(struct nfsbuf *, int, int, int); int nfs_buf_iterprepare(nfsnode_t, struct nfsbuflists *, int); void nfs_buf_itercomplete(nfsnode_t, struct nfsbuflists *, int); -int nfs_bioread(nfsnode_t, struct uio *, int, int *, vfs_context_t); +int nfs_bioread(nfsnode_t, uio_t, int, vfs_context_t); +int nfs_buf_readahead(nfsnode_t, int, daddr64_t *, daddr64_t, thread_t, kauth_cred_t); int nfs_buf_readdir(struct nfsbuf *, vfs_context_t); int nfs_buf_read(struct nfsbuf *); void nfs_buf_read_finish(struct nfsbuf *); @@ -565,10 +788,18 @@ int nfs_buf_write_dirty_pages(struct nfsbuf *, thread_t, kauth_cred_t); int nfs_flushcommits(nfsnode_t, int); int nfs_flush(nfsnode_t, int, thread_t, int); +void nfs_buf_delwri_push(int); +void nfs_buf_delwri_service(void); +void nfs_buf_delwri_thread(void *, wait_result_t);; int nfsiod_start(void); +void nfsiod_terminate(struct nfsiod *); +void nfsiod_thread(void); +int nfsiod_continue(int); void nfs_asyncio_finish(struct nfsreq *); void nfs_asyncio_resend(struct nfsreq *); +int nfs_async_write_start(struct nfsmount *); +void nfs_async_write_done(struct nfsmount *); #endif /* KERNEL */ diff --git a/bsd/nfs/nfsproto.h b/bsd/nfs/nfsproto.h index 47ba3faf1..9823531f4 100644 --- a/bsd/nfs/nfsproto.h +++ b/bsd/nfs/nfsproto.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2007 Apple Inc. All rights reserved. + * Copyright (c) 2000-2009 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -90,14 +90,16 @@ #define NFS_MAXDGRAMDATA 16384 #define NFS_PREFDGRAMDATA 8192 #define NFS_MAXDATA (64*1024) // XXX not ready for >64K +#define NFSRV_MAXDATA (64*1024) // XXX not ready for >64K #define NFS_MAXPATHLEN 1024 #define NFS_MAXNAMLEN 255 -#define NFS_MAXPKTHDR 404 -#define NFS_MAXPACKET (NFS_MAXPKTHDR + NFS_MAXDATA) -#define NFS_MINPACKET 20 +#define NFS_MAXPACKET (16*1024*1024) #define NFS_UDPSOCKBUF (224*1024) #define NFS_FABLKSIZE 512 /* Size in bytes of a block wrt fa_blocks */ +#define NFS4_CALLBACK_PROG 0x4E465343 /* "NFSC" */ +#define NFS4_CALLBACK_PROG_VERSION 1 + /* Stat numbers for NFS RPC returns */ #define NFS_OK 0 #define NFSERR_PERM 1 @@ -173,6 +175,14 @@ #define NFSERR_CB_PATH_DOWN 10048 #define NFSERR_STALEWRITEVERF 30001 /* Fake return for nfs_commit() */ +#define NFSERR_DIRBUFDROPPED 30002 /* Fake return for nfs*_readdir_rpc() */ + +/* + * For gss we would like to return EAUTH when we don't have or can't get credentials, + * but some callers don't know what to do with it, so we define our own version + * of EAUTH to be EACCES + */ +#define NFSERR_EAUTH EACCES #define NFSERR_RETVOID 0x20000000 /* Return void, not error */ #define NFSERR_AUTHERR 0x40000000 /* Mark an authentication error */ @@ -291,6 +301,8 @@ #define NFS_TIME_DONT_CHANGE 0 #define NFS_TIME_SET_TO_SERVER 1 #define NFS_TIME_SET_TO_CLIENT 2 +#define NFS4_TIME_SET_TO_SERVER 0 +#define NFS4_TIME_SET_TO_CLIENT 1 /* access() constants */ #define NFS_ACCESS_READ 0x01 @@ -653,6 +665,8 @@ __private_extern__ uint32_t nfs_getattr_bitmap[NFS_ATTR_BITMAP_LEN]; /* NFSv4 RPC procedures */ #define NFSPROC4_NULL 0 #define NFSPROC4_COMPOUND 1 +#define NFSPROC4_CB_NULL 0 +#define NFSPROC4_CB_COMPOUND 1 /* NFSv4 opcodes */ #define NFS_OP_ACCESS 3 @@ -750,19 +764,19 @@ __private_extern__ uint32_t nfs_getattr_bitmap[NFS_ATTR_BITMAP_LEN]; /* - * Quads are defined as arrays of 2 longs to ensure dense packing for the - * protocol and to facilitate xdr conversion. + * Quads are defined as arrays of 2 32-bit values to ensure dense packing + * for the protocol and to facilitate xdr conversion. */ struct nfs_uquad { - u_long nfsuquad[2]; + u_int32_t nfsuquad[2]; }; typedef struct nfs_uquad nfsuint64; /* - * Used to convert between two u_longs and a u_quad_t. + * Used to convert between two u_int32_ts and a u_quad_t. */ union nfs_quadconvert { - u_long lval[2]; + u_int32_t lval[2]; u_quad_t qval; }; typedef union nfs_quadconvert nfsquad_t; @@ -785,23 +799,14 @@ struct nfs_fsid { }; typedef struct nfs_fsid nfs_fsid; - /* - * remove these once we're sure nobody's using them + * NFSv4 stateid structure */ -struct nfsv2_time { - u_long nfsv2_sec; - u_long nfsv2_usec; -}; -typedef struct nfsv2_time nfstime2; -struct nfsv2_sattr { - u_long sa_mode; - u_long sa_uid; - u_long sa_gid; - u_long sa_size; - nfstime2 sa_atime; - nfstime2 sa_mtime; +struct nfs_stateid { + uint32_t seqid; + uint32_t other[3]; }; +typedef struct nfs_stateid nfs_stateid; #endif /* __APPLE_API_PRIVATE */ #endif /* _NFS_NFSPROTO_H_ */ diff --git a/bsd/nfs/nfsrvcache.h b/bsd/nfs/nfsrvcache.h index 481dcab81..fa23f1877 100644 --- a/bsd/nfs/nfsrvcache.h +++ b/bsd/nfs/nfsrvcache.h @@ -81,7 +81,7 @@ struct nfsrvcache { TAILQ_ENTRY(nfsrvcache) rc_lru; /* LRU chain */ LIST_ENTRY(nfsrvcache) rc_hash; /* Hash chain */ - u_long rc_xid; /* rpc id number */ + u_int32_t rc_xid; /* rpc id number */ union { mbuf_t ru_repmb; /* Reply mbuf list OR */ int ru_repstat; /* Reply status */ diff --git a/bsd/nfs/xdr_subs.h b/bsd/nfs/xdr_subs.h index be5c6444c..9a399db19 100644 --- a/bsd/nfs/xdr_subs.h +++ b/bsd/nfs/xdr_subs.h @@ -84,16 +84,16 @@ * but we cannot count on their alignment anyway. */ -#define fxdr_unsigned(t, v) ((t)ntohl((long)(v))) -#define txdr_unsigned(v) (htonl((long)(v))) +#define fxdr_unsigned(t, v) ((t)ntohl((uint32_t)(v))) +#define txdr_unsigned(v) (htonl((uint32_t)(v))) #define fxdr_hyper(f, t) { \ - ((long *)(t))[_QUAD_HIGHWORD] = ntohl(((long *)(f))[0]); \ - ((long *)(t))[_QUAD_LOWWORD] = ntohl(((long *)(f))[1]); \ + ((uint32_t *)(t))[_QUAD_HIGHWORD] = ntohl(((uint32_t *)(f))[0]); \ + ((uint32_t *)(t))[_QUAD_LOWWORD] = ntohl(((uint32_t *)(f))[1]); \ } #define txdr_hyper(f, t) { \ - ((long *)(t))[0] = htonl(((long *)(f))[_QUAD_HIGHWORD]); \ - ((long *)(t))[1] = htonl(((long *)(f))[_QUAD_LOWWORD]); \ + ((uint32_t *)(t))[0] = htonl(((uint32_t *)(f))[_QUAD_HIGHWORD]); \ + ((uint32_t *)(t))[1] = htonl(((uint32_t *)(f))[_QUAD_LOWWORD]); \ } #endif /* __APPLE_API_PRIVATE */ diff --git a/bsd/ppc/Makefile b/bsd/ppc/Makefile index 42d55dcc1..21878d7f3 100644 --- a/bsd/ppc/Makefile +++ b/bsd/ppc/Makefile @@ -9,13 +9,13 @@ include $(MakeInc_def) DATAFILES = \ endian.h fasttrap_isa.h param.h profile.h \ - setjmp.h signal.h \ + setjmp.h signal.h limits.h _limits.h \ types.h vmparam.h _structs.h _types.h _param.h KERNELFILES = \ disklabel.h \ endian.h param.h profile.h \ - signal.h \ + signal.h limits.h _limits.h \ types.h vmparam.h _structs.h _types.h _param.h INSTALL_MD_LIST = ${DATAFILES} diff --git a/bsd/ppc/_limits.h b/bsd/ppc/_limits.h new file mode 100644 index 000000000..d512ec411 --- /dev/null +++ b/bsd/ppc/_limits.h @@ -0,0 +1,27 @@ +/* + * Copyright (c) 2004 Apple Computer, Inc. All rights reserved. + * + * @APPLE_LICENSE_HEADER_START@ + * + * The contents of this file constitute Original Code as defined in and + * are subject to the Apple Public Source License Version 1.1 (the + * "License"). You may not use this file except in compliance with the + * License. Please obtain a copy of the License at + * http://www.apple.com/publicsource and read it before using this file. + * + * This Original Code and all software distributed under the License are + * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the + * License for the specific language governing rights and limitations + * under the License. + * + * @APPLE_LICENSE_HEADER_END@ + */ +#ifndef _PPC__LIMITS_H_ +#define _PPC__LIMITS_H_ + +#define __DARWIN_CLK_TCK 100 /* ticks per second */ + +#endif /* _PPC__LIMITS_H_ */ diff --git a/bsd/ppc/_param.h b/bsd/ppc/_param.h index 7adb76efc..938fc499f 100644 --- a/bsd/ppc/_param.h +++ b/bsd/ppc/_param.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2004, 2008 Apple Inc. All rights reserved. + * Copyright (c) 2008 Apple Computer, Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -37,9 +37,10 @@ * cast to any desired pointer type. */ #define __DARWIN_ALIGNBYTES (sizeof(__darwin_size_t) - 1) -#define __DARWIN_ALIGN(p) ((__darwin_size_t)((char *)(__darwin_intptr_t)(p) + __DARWIN_ALIGNBYTES) &~ __DARWIN_ALIGNBYTES) +#define __DARWIN_ALIGN(p) ((__darwin_size_t)((char *)(__darwin_size_t)(p) + __DARWIN_ALIGNBYTES) &~ __DARWIN_ALIGNBYTES) -#define __DARWIN_ALIGNBYTES32 (sizeof(__uint32_t) - 1) -#define __DARWIN_ALIGN32(p) ((__darwin_size_t)((char *)(__darwin_intptr_t)(p) + __DARWIN_ALIGNBYTES32) &~ __DARWIN_ALIGNBYTES32) +#define __DARWIN_ALIGNBYTES32 (sizeof(__uint32_t) - 1) +#define __DARWIN_ALIGN32(p) ((__darwin_size_t)((char *)(__darwin_size_t)(p) + __DARWIN_ALIGNBYTES32) &~ __DARWIN_ALIGNBYTES32) -#endif /* _PPC__PARAM_H_ */ + +#endif /* _PPC__PARAM_H_ */ diff --git a/bsd/ppc/_structs.h b/bsd/ppc/_structs.h index 0d7fd4c9e..c028f7efb 100644 --- a/bsd/ppc/_structs.h +++ b/bsd/ppc/_structs.h @@ -120,7 +120,6 @@ typedef _STRUCT_MCONTEXT64 *mcontext64_t; #endif /* (_POSIX_C_SOURCE && !_DARWIN_C_SOURCE) */ /* - * LP64todo - Have to decide how to handle this. * For now, just duplicate the 32-bit context as the generic one. */ #ifdef __need_struct_sigcontext diff --git a/EXTERNAL_HEADERS/ppc/limits.h b/bsd/ppc/limits.h similarity index 57% rename from EXTERNAL_HEADERS/ppc/limits.h rename to bsd/ppc/limits.h index f28b43326..8f7decbec 100644 --- a/EXTERNAL_HEADERS/ppc/limits.h +++ b/bsd/ppc/limits.h @@ -1,32 +1,5 @@ /* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ - * - * This file contains Original Code and/or Modifications of Original Code - * as defined in and that are subject to the Apple Public Source License - * Version 2.0 (the 'License'). You may not use this file except in - * compliance with the License. The rights granted to you under the License - * may not be used to create, or enable the creation or redistribution of, - * unlawful or unlicensed copies of an Apple operating system, or to - * circumvent, violate, or enable the circumvention or violation of, any - * terms of an Apple operating system software license agreement. - * - * Please obtain a copy of the License at - * http://www.opensource.apple.com/apsl/ and read it before using this file. - * - * The Original Code and all software distributed under the License are - * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER - * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, - * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. - * Please see the License for the specific language governing rights and - * limitations under the License. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ - */ -/* Copyright (c) 1995 NeXT Computer, Inc. All Rights Reserved */ -/* Copyright (c) 1988, 1993 + * Copyright (c) 1988, 1993 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -39,8 +12,8 @@ * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: - * This product includes software developed by the University of - * California, Berkeley and its contributors. + * This product includes software developed by the University of + * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. @@ -59,24 +32,19 @@ * * @(#)limits.h 8.3 (Berkeley) 1/4/94 */ -/* - * HISTORY - * - * 10-July-97 Umesh Vaishampayan (umeshv@apple.com) - * Fixed conflicts with float.h. Avoid multiple includes. - */ #ifndef _PPC_LIMITS_H_ #define _PPC_LIMITS_H_ +#include #include #define CHAR_BIT 8 /* number of bits in a char */ #define MB_LEN_MAX 6 /* Allow 31 bit UTF2 */ -#ifndef CLK_TCK +#if !defined(_ANSI_SOURCE) && (!defined(_POSIX_C_SOURCE) || defined(_DARWIN_C_SOURCE)) #define CLK_TCK __DARWIN_CLK_TCK /* ticks per second */ -#endif +#endif /* !_ANSI_SOURCE && (!_POSIX_C_SOURCE || _DARWIN_C_SOURCE) */ /* * According to ANSI (section 2.2.4.2), the values below must be usable by @@ -103,26 +71,37 @@ #define INT_MAX 2147483647 /* max value for an int */ #define INT_MIN (-2147483647-1) /* min value for an int */ -#define ULONG_MAX 0xffffffff /* max value for an unsigned long */ -#define LONG_MAX 2147483647 /* max value for a long */ -#define LONG_MIN (-2147483647-1) /* min value for a long */ +#ifdef __LP64__ +#define ULONG_MAX 0xffffffffffffffffUL /* max unsigned long */ +#define LONG_MAX 0x7fffffffffffffffL /* max signed long */ +#define LONG_MIN (-0x7fffffffffffffffL-1) /* min signed long */ +#else /* !__LP64__ */ +#define ULONG_MAX 0xffffffffUL /* max unsigned long */ +#define LONG_MAX 2147483647L /* max signed long */ +#define LONG_MIN (-2147483647L-1) /* min signed long */ +#endif /* __LP64__ */ + +#define ULLONG_MAX 0xffffffffffffffffULL /* max unsigned long long */ +#define LLONG_MAX 0x7fffffffffffffffLL /* max signed long long */ +#define LLONG_MIN (-0x7fffffffffffffffLL-1) /* min signed long long */ #if !defined(_ANSI_SOURCE) -#define SSIZE_MAX INT_MAX /* max value for a ssize_t */ +#ifdef __LP64__ +#define LONG_BIT 64 +#else /* !__LP64__ */ +#define LONG_BIT 32 +#endif /* __LP64__ */ +#define SSIZE_MAX LONG_MAX /* max value for a ssize_t */ +#define WORD_BIT 32 -#if !defined(_POSIX_SOURCE) && !defined(_XOPEN_SOURCE) -#define SIZE_T_MAX UINT_MAX /* max value for a size_t */ +#if (!defined(_POSIX_C_SOURCE) && !defined(_XOPEN_SOURCE)) || defined(_DARWIN_C_SOURCE) +#define SIZE_T_MAX ULONG_MAX /* max value for a size_t */ -#define UQUAD_MAX 0xffffffffffffffffULL /* max unsigned quad */ -#define QUAD_MAX 0x7fffffffffffffffLL /* max signed quad */ -#define QUAD_MIN (-0x7fffffffffffffffLL-1) /* min signed quad */ +#define UQUAD_MAX ULLONG_MAX +#define QUAD_MAX LLONG_MAX +#define QUAD_MIN LLONG_MIN -#endif /* !_POSIX_SOURCE && !_XOPEN_SOURCE */ +#endif /* (!_POSIX_C_SOURCE && !_XOPEN_SOURCE) || _DARWIN_C_SOURCE */ #endif /* !_ANSI_SOURCE */ -#if (!defined(_ANSI_SOURCE)&&!defined(_POSIX_SOURCE)) || defined(_XOPEN_SOURCE) -#define LONG_BIT 32 -#define WORD_BIT 32 -#endif /* (!(_ANSI_SOURCE) && !(_POSIX_SOURCE)) || (_XOPEN_SOURCE) */ - #endif /* _PPC_LIMITS_H_ */ diff --git a/bsd/ppc/types.h b/bsd/ppc/types.h index dca303748..21265f8e0 100644 --- a/bsd/ppc/types.h +++ b/bsd/ppc/types.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2008 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -28,7 +28,7 @@ /* * Copyright 1995 NeXT Computer, Inc. All rights reserved. */ -/*- +/* * Copyright (c) 1990, 1993 * The Regents of the University of California. All rights reserved. * @@ -94,7 +94,7 @@ typedef long long int64_t; #endif typedef unsigned long long u_int64_t; -#if defined(__ppc64__) +#if __LP64__ typedef int64_t register_t; #else typedef int32_t register_t; @@ -110,22 +110,59 @@ typedef unsigned long uintptr_t; #endif #if !defined(_ANSI_SOURCE) && (!defined(_POSIX_C_SOURCE) || defined(_DARWIN_C_SOURCE)) -/* with LP64 support pointers and longs from user address space may vary */ -/* in size depending on the type of process (currently 32 or 64-bit, but */ -/* may change in the future). These types are used for reserving the largest */ -/* possible size. */ -typedef u_int64_t user_addr_t; -typedef u_int64_t user_size_t; -typedef int64_t user_ssize_t; -typedef int64_t user_long_t; -typedef u_int64_t user_ulong_t; -typedef int64_t user_time_t; +/* These types are used for reserving the largest possible size. */ +typedef u_int64_t user_addr_t; +typedef u_int64_t user_size_t; +typedef int64_t user_ssize_t; +typedef int64_t user_long_t; +typedef u_int64_t user_ulong_t; +typedef int64_t user_time_t; +typedef int64_t user_off_t; #define USER_ADDR_NULL ((user_addr_t) 0) #define CAST_USER_ADDR_T(a_ptr) ((user_addr_t)((uintptr_t)(a_ptr))) + +#ifdef KERNEL + +/* + * These types are used when you know the word size of the target + * user process. They can be used to create struct layouts independent + * of the types and alignment requirements of the current running + * kernel. + */ + +/* + * The default ABI for the 32-bit PowerPC userspace is called "Power" + * alignment, and aligns fundamental integral data types to their + * natural boundary, with a maximum alignment of 4, even for 8-byte + * quantites. Power alignment also pads a structure to 8-byte alignment + * if the first field is an 8-byte quantity, which is not handled by + * these typedefs. The default ABI for 64-bit PowerPC userspace is called + * "Natural" alignment, and aligns fundamental integral data types + * to their natural boundaries. + */ + +typedef __uint64_t user64_addr_t __attribute__((aligned(8))); +typedef __uint64_t user64_size_t __attribute__((aligned(8))); +typedef __int64_t user64_ssize_t __attribute__((aligned(8))); +typedef __int64_t user64_long_t __attribute__((aligned(8))); +typedef __uint64_t user64_ulong_t __attribute__((aligned(8))); +typedef __int64_t user64_time_t __attribute__((aligned(8))); +typedef __int64_t user64_off_t __attribute__((aligned(8))); + +typedef __uint32_t user32_addr_t; +typedef __uint32_t user32_size_t; +typedef __int32_t user32_ssize_t; +typedef __int32_t user32_long_t; +typedef __uint32_t user32_ulong_t; +typedef __int32_t user32_time_t; +typedef __int64_t user32_off_t __attribute__((aligned(4))); + +#endif /* KERNEL */ + #endif /* !_ANSI_SOURCE && (!_POSIX_C_SOURCE || _DARWIN_C_SOURCE) */ /* This defines the size of syscall arguments after copying into the kernel: */ -typedef u_int64_t syscall_arg_t; +typedef u_int64_t syscall_arg_t; #ifndef __offsetof #define __offsetof(type, field) ((size_t)(&((type *)0)->field)) diff --git a/bsd/ufs/Makefile b/bsd/security/Makefile similarity index 81% rename from bsd/ufs/Makefile rename to bsd/security/Makefile index e0bf3fa37..b574d2956 100644 --- a/bsd/ufs/Makefile +++ b/bsd/security/Makefile @@ -8,21 +8,27 @@ include $(MakeInc_cmd) include $(MakeInc_def) INSTINC_SUBDIRS = \ - ffs \ - ufs + audit INSTINC_SUBDIRS_PPC = \ INSTINC_SUBDIRS_I386 = \ +INSTINC_SUBDIRS_X86_64 = \ + +INSTINC_SUBDIRS_ARM = \ + EXPINC_SUBDIRS = \ - ffs \ - ufs + audit EXPINC_SUBDIRS_PPC = \ EXPINC_SUBDIRS_I386 = \ +EXPINC_SUBDIRS_X86_64 = \ + +EXPINC_SUBDIRS_ARM = \ + SETUP_SUBDIRS = \ COMP_SUBDIRS = \ @@ -32,5 +38,3 @@ INST_SUBDIRS = \ include $(MakeInc_rule) include $(MakeInc_dir) - - diff --git a/bsd/isofs/cd9660/Makefile b/bsd/security/audit/Makefile similarity index 58% rename from bsd/isofs/cd9660/Makefile rename to bsd/security/audit/Makefile index fe4e6d369..660e7c155 100644 --- a/bsd/isofs/cd9660/Makefile +++ b/bsd/security/audit/Makefile @@ -13,23 +13,36 @@ INSTINC_SUBDIRS_PPC = \ INSTINC_SUBDIRS_I386 = \ +INSTINC_SUBDIRS_X86_64 = \ + +INSTINC_SUBDIRS_ARM = \ + EXPINC_SUBDIRS = \ EXPINC_SUBDIRS_PPC = \ EXPINC_SUBDIRS_I386 = \ +EXPINC_SUBDIRS_X86_64 = \ + +EXPINC_SUBDIRS_ARM = \ + DATAFILES = \ - cd9660_mount.h cd9660_node.h cd9660_rrip.h iso.h iso_rrip.h + audit_ioctl.h + +PRIVATE_DATAFILES = INSTALL_MI_LIST = ${DATAFILES} -INSTALL_MI_DIR = isofs/cd9660 +INSTALL_MI_DIR = security/audit + +INSTALL_KF_MI_LIST = ${DATAFILES} -EXPORT_MI_LIST = +EXPORT_MI_LIST = ${DATAFILES} audit.h audit_bsd.h audit_private.h -EXPORT_MI_DIR = +EXPORT_MI_DIR = security/audit +INSTALL_MI_LIST = ${DATAFILES} ${PRIVATE_DATAFILES} include $(MakeInc_rule) include $(MakeInc_dir) diff --git a/bsd/security/audit/audit.c b/bsd/security/audit/audit.c new file mode 100644 index 000000000..c454867bf --- /dev/null +++ b/bsd/security/audit/audit.c @@ -0,0 +1,802 @@ +/*- + * Copyright (c) 1999-2009 Apple Inc. + * Copyright (c) 2006-2007 Robert N. M. Watson + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of Apple Inc. ("Apple") nor the names of + * its contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY APPLE AND ITS CONTRIBUTORS "AS IS" AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL APPLE OR ITS CONTRIBUTORS BE LIABLE FOR + * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING + * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + */ +/* + * NOTICE: This file was modified by McAfee Research in 2004 to introduce + * support for mandatory and extensible security protections. This notice + * is included in support of clause 2.2 (b) of the Apple Public License, + * Version 2.0. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +#include +#include +#include + +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +#include + +#include +#include + +#if CONFIG_AUDIT +MALLOC_DEFINE(M_AUDITDATA, "audit_data", "Audit data storage"); +MALLOC_DEFINE(M_AUDITPATH, "audit_path", "Audit path storage"); +MALLOC_DEFINE(M_AUDITTEXT, "audit_text", "Audit text storage"); + +/* + * Audit control settings that are set/read by system calls and are hence + * non-static. + * + * Define the audit control flags. + */ +int audit_enabled; +int audit_suspended; + +int audit_syscalls; +au_class_t audit_kevent_mask; + +/* + * Flags controlling behavior in low storage situations. Should we panic if + * a write fails? Should we fail stop if we're out of disk space? + */ +int audit_panic_on_write_fail; +int audit_fail_stop; +int audit_argv; +int audit_arge; + +/* + * Are we currently "failing stop" due to out of disk space? + */ +int audit_in_failure; + +/* + * Global audit statistics. + */ +struct audit_fstat audit_fstat; + +/* + * Preselection mask for non-attributable events. + */ +struct au_mask audit_nae_mask; + +/* + * Mutex to protect global variables shared between various threads and + * processes. + */ +struct mtx audit_mtx; + +/* + * Queue of audit records ready for delivery to disk. We insert new records + * at the tail, and remove records from the head. Also, a count of the + * number of records used for checking queue depth. In addition, a counter + * of records that we have allocated but are not yet in the queue, which is + * needed to estimate the total size of the combined set of records + * outstanding in the system. + */ +struct kaudit_queue audit_q; +int audit_q_len; +int audit_pre_q_len; + +/* + * Audit queue control settings (minimum free, low/high water marks, etc.) + */ +struct au_qctrl audit_qctrl; + +/* + * Condition variable to signal to the worker that it has work to do: either + * new records are in the queue, or a log replacement is taking place. + */ +struct cv audit_worker_cv; + +/* + * Condition variable to signal when the worker is done draining the audit + * queue. + */ +struct cv audit_drain_cv; + +/* + * Condition variable to flag when crossing the low watermark, meaning that + * threads blocked due to hitting the high watermark can wake up and continue + * to commit records. + */ +struct cv audit_watermark_cv; + +/* + * Condition variable for auditing threads wait on when in fail-stop mode. + * Threads wait on this CV forever (and ever), never seeing the light of day + * again. + */ +static struct cv audit_fail_cv; + +static zone_t audit_record_zone; + +/* + * Kernel audit information. This will store the current audit address + * or host information that the kernel will use when it's generating + * audit records. This data is modified by the A_GET{SET}KAUDIT auditon(2) + * command. + */ +static struct auditinfo_addr audit_kinfo; +static struct rwlock audit_kinfo_lock; + +#define KINFO_LOCK_INIT() rw_init(&audit_kinfo_lock, \ + "audit_kinfo_lock") +#define KINFO_RLOCK() rw_rlock(&audit_kinfo_lock) +#define KINFO_WLOCK() rw_wlock(&audit_kinfo_lock) +#define KINFO_RUNLOCK() rw_runlock(&audit_kinfo_lock) +#define KINFO_WUNLOCK() rw_wunlock(&audit_kinfo_lock) + +void +audit_set_kinfo(struct auditinfo_addr *ak) +{ + + KASSERT(ak->ai_termid.at_type == AU_IPv4 || + ak->ai_termid.at_type == AU_IPv6, + ("audit_set_kinfo: invalid address type")); + + KINFO_WLOCK(); + bcopy(ak, &audit_kinfo, sizeof(audit_kinfo)); + KINFO_WUNLOCK(); +} + +void +audit_get_kinfo(struct auditinfo_addr *ak) +{ + + KASSERT(audit_kinfo.ai_termid.at_type == AU_IPv4 || + audit_kinfo.ai_termid.at_type == AU_IPv6, + ("audit_set_kinfo: invalid address type")); + + KINFO_RLOCK(); + bcopy(&audit_kinfo, ak, sizeof(*ak)); + KINFO_RUNLOCK(); +} + +/* + * Construct an audit record for the passed thread. + */ +static void +audit_record_ctor(proc_t p, struct kaudit_record *ar) +{ + kauth_cred_t cred; + + bzero(ar, sizeof(*ar)); + ar->k_ar.ar_magic = AUDIT_RECORD_MAGIC; + nanotime(&ar->k_ar.ar_starttime); + + cred = kauth_cred_proc_ref(p); + + /* + * Export the subject credential. + */ + cru2x(cred, &ar->k_ar.ar_subj_cred); + ar->k_ar.ar_subj_ruid = cred->cr_ruid; + ar->k_ar.ar_subj_rgid = cred->cr_rgid; + ar->k_ar.ar_subj_egid = cred->cr_groups[0]; + ar->k_ar.ar_subj_pid = p->p_pid; + ar->k_ar.ar_subj_auid = cred->cr_audit.as_aia_p->ai_auid; + ar->k_ar.ar_subj_asid = cred->cr_audit.as_aia_p->ai_asid; + bcopy(&cred->cr_audit.as_mask, &ar->k_ar.ar_subj_amask, + sizeof(struct au_mask)); + bcopy(&cred->cr_audit.as_aia_p->ai_termid, &ar->k_ar.ar_subj_term_addr, + sizeof(struct au_tid_addr)); + kauth_cred_unref(&cred); +} + +static void +audit_record_dtor(struct kaudit_record *ar) +{ + + if (ar->k_ar.ar_arg_upath1 != NULL) + free(ar->k_ar.ar_arg_upath1, M_AUDITPATH); + if (ar->k_ar.ar_arg_upath2 != NULL) + free(ar->k_ar.ar_arg_upath2, M_AUDITPATH); + if (ar->k_ar.ar_arg_kpath1 != NULL) + free(ar->k_ar.ar_arg_kpath1, M_AUDITPATH); + if (ar->k_ar.ar_arg_kpath2 != NULL) + free(ar->k_ar.ar_arg_kpath2, M_AUDITPATH); + if (ar->k_ar.ar_arg_text != NULL) + free(ar->k_ar.ar_arg_text, M_AUDITTEXT); + if (ar->k_ar.ar_arg_opaque != NULL) + free(ar->k_ar.ar_arg_opaque, M_AUDITDATA); + if (ar->k_ar.ar_arg_data != NULL) + free(ar->k_ar.ar_arg_data, M_AUDITDATA); + if (ar->k_udata != NULL) + free(ar->k_udata, M_AUDITDATA); + if (ar->k_ar.ar_arg_argv != NULL) + free(ar->k_ar.ar_arg_argv, M_AUDITTEXT); + if (ar->k_ar.ar_arg_envv != NULL) + free(ar->k_ar.ar_arg_envv, M_AUDITTEXT); +} + +/* + * Initialize the Audit subsystem: configuration state, work queue, + * synchronization primitives, worker thread, and trigger device node. Also + * call into the BSM assembly code to initialize it. + */ +void +audit_init(void) +{ + + audit_enabled = 0; + audit_syscalls = 0; + audit_kevent_mask = 0; + audit_suspended = 0; + audit_panic_on_write_fail = 0; + audit_fail_stop = 0; + audit_in_failure = 0; + audit_argv = 0; + audit_arge = 0; + + audit_fstat.af_filesz = 0; /* '0' means unset, unbounded. */ + audit_fstat.af_currsz = 0; + audit_nae_mask.am_success = 0; + audit_nae_mask.am_failure = 0; + + TAILQ_INIT(&audit_q); + audit_q_len = 0; + audit_pre_q_len = 0; + audit_qctrl.aq_hiwater = AQ_HIWATER; + audit_qctrl.aq_lowater = AQ_LOWATER; + audit_qctrl.aq_bufsz = AQ_BUFSZ; + audit_qctrl.aq_minfree = AU_FS_MINFREE; + + audit_kinfo.ai_termid.at_type = AU_IPv4; + audit_kinfo.ai_termid.at_addr[0] = INADDR_ANY; + + mtx_init(&audit_mtx, "audit_mtx", NULL, MTX_DEF); + KINFO_LOCK_INIT(); + cv_init(&audit_worker_cv, "audit_worker_cv"); + cv_init(&audit_drain_cv, "audit_drain_cv"); + cv_init(&audit_watermark_cv, "audit_watermark_cv"); + cv_init(&audit_fail_cv, "audit_fail_cv"); + + audit_record_zone = zinit(sizeof(struct kaudit_record), + AQ_HIWATER*sizeof(struct kaudit_record), 8192, "audit_zone"); +#if CONFIG_MACF + audit_mac_init(); +#endif + /* Init audit session subsystem. */ + audit_session_init(); + + /* Initialize the BSM audit subsystem. */ + kau_init(); + + /* audit_trigger_init(); */ + + /* Start audit worker thread. */ + (void) audit_pipe_init(); + + /* Start audit worker thread. */ + audit_worker_init(); +} + +/* + * Drain the audit queue and close the log at shutdown. Note that this can + * be called both from the system shutdown path and also from audit + * configuration syscalls, so 'arg' and 'howto' are ignored. + */ +void +audit_shutdown(void) +{ + + audit_rotate_vnode(NULL, NULL); +} + +/* + * Return the current thread's audit record, if any. + */ +__inline__ struct kaudit_record * +currecord(void) +{ + + return (curthread()->uu_ar); +} + +/* + * XXXAUDIT: There are a number of races present in the code below due to + * release and re-grab of the mutex. The code should be revised to become + * slightly less racy. + * + * XXXAUDIT: Shouldn't there be logic here to sleep waiting on available + * pre_q space, suspending the system call until there is room? + */ +struct kaudit_record * +audit_new(int event, proc_t p, __unused struct uthread *uthread) +{ + struct kaudit_record *ar; + int no_record; + + mtx_lock(&audit_mtx); + no_record = (audit_suspended || !audit_enabled); + mtx_unlock(&audit_mtx); + if (no_record) + return (NULL); + + /* + * Initialize the audit record header. + * XXX: We may want to fail-stop if allocation fails. + * + * Note: the number of outstanding uncommitted audit records is + * limited to the number of concurrent threads servicing system calls + * in the kernel. + */ + ar = zalloc(audit_record_zone); + if (ar == NULL) + return NULL; + audit_record_ctor(p, ar); + ar->k_ar.ar_event = event; + +#if CONFIG_MACF + if (audit_mac_new(p, ar) != 0) { + zfree(audit_record_zone, ar); + return (NULL); + } +#endif + + mtx_lock(&audit_mtx); + audit_pre_q_len++; + mtx_unlock(&audit_mtx); + + return (ar); +} + +void +audit_free(struct kaudit_record *ar) +{ + + audit_record_dtor(ar); +#if CONFIG_MACF + audit_mac_free(ar); +#endif + zfree(audit_record_zone, ar); +} + +void +audit_commit(struct kaudit_record *ar, int error, int retval) +{ + au_event_t event; + au_class_t class; + au_id_t auid; + int sorf; + struct au_mask *aumask; + + if (ar == NULL) + return; + + /* + * Decide whether to commit the audit record by checking the error + * value from the system call and using the appropriate audit mask. + */ + if (ar->k_ar.ar_subj_auid == AU_DEFAUDITID) + aumask = &audit_nae_mask; + else + aumask = &ar->k_ar.ar_subj_amask; + + if (error) + sorf = AU_PRS_FAILURE; + else + sorf = AU_PRS_SUCCESS; + + switch(ar->k_ar.ar_event) { + case AUE_OPEN_RWTC: + /* + * The open syscall always writes a AUE_OPEN_RWTC event; + * change it to the proper type of event based on the flags + * and the error value. + */ + ar->k_ar.ar_event = audit_flags_and_error_to_openevent( + ar->k_ar.ar_arg_fflags, error); + break; + + case AUE_OPEN_EXTENDED_RWTC: + /* + * The open_extended syscall always writes a + * AUE_OPEN_EXTENDEDRWTC event; change it to the proper type of + * event based on the flags and the error value. + */ + ar->k_ar.ar_event = audit_flags_and_error_to_openextendedevent( + ar->k_ar.ar_arg_fflags, error); + break; + + case AUE_SYSCTL: + ar->k_ar.ar_event = audit_ctlname_to_sysctlevent( + ar->k_ar.ar_arg_ctlname, ar->k_ar.ar_valid_arg); + break; + + case AUE_AUDITON: + /* Convert the auditon() command to an event. */ + ar->k_ar.ar_event = auditon_command_event(ar->k_ar.ar_arg_cmd); + break; + + case AUE_FCNTL: + /* Convert some fcntl() commands to their own events. */ + ar->k_ar.ar_event = audit_fcntl_command_event( + ar->k_ar.ar_arg_cmd, ar->k_ar.ar_arg_fflags, error); + break; + } + + auid = ar->k_ar.ar_subj_auid; + event = ar->k_ar.ar_event; + class = au_event_class(event); + + ar->k_ar_commit |= AR_COMMIT_KERNEL; + if (au_preselect(event, class, aumask, sorf) != 0) + ar->k_ar_commit |= AR_PRESELECT_TRAIL; + if (audit_pipe_preselect(auid, event, class, sorf, + ar->k_ar_commit & AR_PRESELECT_TRAIL) != 0) + ar->k_ar_commit |= AR_PRESELECT_PIPE; + if ((ar->k_ar_commit & (AR_PRESELECT_TRAIL | AR_PRESELECT_PIPE | + AR_PRESELECT_USER_TRAIL | AR_PRESELECT_USER_PIPE)) == 0) { + mtx_lock(&audit_mtx); + audit_pre_q_len--; + mtx_unlock(&audit_mtx); + audit_free(ar); + return; + } + + ar->k_ar.ar_errno = error; + ar->k_ar.ar_retval = retval; + nanotime(&ar->k_ar.ar_endtime); + + /* + * Note: it could be that some records initiated while audit was + * enabled should still be committed? + */ + mtx_lock(&audit_mtx); + if (audit_suspended || !audit_enabled) { + audit_pre_q_len--; + mtx_unlock(&audit_mtx); + audit_free(ar); + return; + } + + /* + * Constrain the number of committed audit records based on the + * configurable parameter. + */ + while (audit_q_len >= audit_qctrl.aq_hiwater) + cv_wait(&audit_watermark_cv, &audit_mtx); + + TAILQ_INSERT_TAIL(&audit_q, ar, k_q); + audit_q_len++; + audit_pre_q_len--; + cv_signal(&audit_worker_cv); + mtx_unlock(&audit_mtx); +} + +/* + * audit_syscall_enter() is called on entry to each system call. It is + * responsible for deciding whether or not to audit the call (preselection), + * and if so, allocating a per-thread audit record. audit_new() will fill in + * basic thread/credential properties. + */ +void +audit_syscall_enter(unsigned int code, proc_t proc, struct uthread *uthread) +{ + struct au_mask *aumask; + au_class_t class; + au_event_t event; + au_id_t auid; + kauth_cred_t cred; + + /* + * In FreeBSD, each ABI has its own system call table, and hence + * mapping of system call codes to audit events. Convert the code to + * an audit event identifier using the process system call table + * reference. In Darwin, there's only one, so we use the global + * symbol for the system call table. No audit record is generated + * for bad system calls, as no operation has been performed. + * + * In Mac OS X, the audit events are stored in a table seperate from + * the syscall table(s). This table is generated by makesyscalls.sh + * from syscalls.master and stored in audit_kevents.c. + */ + if (code > NUM_SYSENT) + return; + event = sys_au_event[code]; + if (event == AUE_NULL) + return; + + KASSERT(uthread->uu_ar == NULL, + ("audit_syscall_enter: uthread->uu_ar != NULL")); + + /* + * Check which audit mask to use; either the kernel non-attributable + * event mask or the process audit mask. + */ + cred = kauth_cred_proc_ref(proc); + auid = cred->cr_audit.as_aia_p->ai_auid; + if (auid == AU_DEFAUDITID) + aumask = &audit_nae_mask; + else + aumask = &cred->cr_audit.as_mask; + + /* + * Allocate an audit record, if preselection allows it, and store in + * the thread for later use. + */ + class = au_event_class(event); +#if CONFIG_MACF + /* + * Note: audit_mac_syscall_enter() may call audit_new() and allocate + * memory for the audit record (uu_ar). + */ + if (audit_mac_syscall_enter(code, proc, uthread, cred, event) == 0) + goto out; +#endif + if (au_preselect(event, class, aumask, AU_PRS_BOTH)) { + /* + * If we're out of space and need to suspend unprivileged + * processes, do that here rather than trying to allocate + * another audit record. + * + * Note: we might wish to be able to continue here in the + * future, if the system recovers. That should be possible + * by means of checking the condition in a loop around + * cv_wait(). It might be desirable to reevaluate whether an + * audit record is still required for this event by + * re-calling au_preselect(). + */ + if (audit_in_failure && + suser(cred, &proc->p_acflag) != 0) { + cv_wait(&audit_fail_cv, &audit_mtx); + panic("audit_failing_stop: thread continued"); + } + if (uthread->uu_ar == NULL) + uthread->uu_ar = audit_new(event, proc, uthread); + } else if (audit_pipe_preselect(auid, event, class, AU_PRS_BOTH, 0)) { + if (uthread->uu_ar == NULL) + uthread->uu_ar = audit_new(event, proc, uthread); + } + +out: + kauth_cred_unref(&cred); +} + +/* + * audit_syscall_exit() is called from the return of every system call, or in + * the event of exit1(), during the execution of exit1(). It is responsible + * for committing the audit record, if any, along with return condition. + * + * Note: The audit_syscall_exit() parameter list was modified to support + * mac_audit_check_postselect(), which requires the syscall number. + */ +#if CONFIG_MACF +void +audit_syscall_exit(unsigned int code, int error, __unused proc_t proc, + struct uthread *uthread) +#else +void +audit_syscall_exit(int error, __unsed proc_t proc, struct uthread *uthread) +#endif +{ + int retval; + + /* + * Commit the audit record as desired; once we pass the record into + * audit_commit(), the memory is owned by the audit subsystem. The + * return value from the system call is stored on the user thread. + * If there was an error, the return value is set to -1, imitating + * the behavior of the cerror routine. + */ + if (error) + retval = -1; + else + retval = uthread->uu_rval[0]; + +#if CONFIG_MACF + if (audit_mac_syscall_exit(code, uthread, error, retval) != 0) + goto out; +#endif + audit_commit(uthread->uu_ar, error, retval); + +out: + uthread->uu_ar = NULL; +} + +/* + * Calls to set up and tear down audit structures used during Mach system + * calls. + */ +void +audit_mach_syscall_enter(unsigned short event) +{ + struct uthread *uthread; + proc_t proc; + struct au_mask *aumask; + kauth_cred_t cred; + au_class_t class; + au_id_t auid; + + if (event == AUE_NULL) + return; + + uthread = curthread(); + if (uthread == NULL) + return; + + proc = current_proc(); + if (proc == NULL) + return; + + KASSERT(uthread->uu_ar == NULL, + ("audit_mach_syscall_enter: uthread->uu_ar != NULL")); + + cred = kauth_cred_proc_ref(proc); + auid = cred->cr_audit.as_aia_p->ai_auid; + + /* + * Check which audit mask to use; either the kernel non-attributable + * event mask or the process audit mask. + */ + if (auid == AU_DEFAUDITID) + aumask = &audit_nae_mask; + else + aumask = &cred->cr_audit.as_mask; + + /* + * Allocate an audit record, if desired, and store in the BSD thread + * for later use. + */ + class = au_event_class(event); + if (au_preselect(event, class, aumask, AU_PRS_BOTH)) + uthread->uu_ar = audit_new(event, proc, uthread); + else if (audit_pipe_preselect(auid, event, class, AU_PRS_BOTH, 0)) + uthread->uu_ar = audit_new(event, proc, uthread); + else + uthread->uu_ar = NULL; + + kauth_cred_unref(&cred); +} + +void +audit_mach_syscall_exit(int retval, struct uthread *uthread) +{ + /* + * The error code from Mach system calls is the same as the + * return value + */ + /* XXX Is the above statement always true? */ + audit_commit(uthread->uu_ar, retval, retval); + uthread->uu_ar = NULL; +} + +/* + * kau_will_audit can be used by a security policy to determine + * if an audit record will be stored, reducing wasted memory allocation + * and string handling. + */ +int +kau_will_audit(void) +{ + + return (audit_enabled && currecord() != NULL); +} + +void +audit_proc_coredump(proc_t proc, char *path, int errcode) +{ + struct kaudit_record *ar; + struct au_mask *aumask; + au_class_t class; + int ret, sorf; + char **pathp; + au_id_t auid; + kauth_cred_t my_cred; + struct uthread *uthread; + + ret = 0; + + /* + * Make sure we are using the correct preselection mask. + */ + my_cred = kauth_cred_proc_ref(proc); + auid = my_cred->cr_audit.as_aia_p->ai_auid; + if (auid == AU_DEFAUDITID) + aumask = &audit_nae_mask; + else + aumask = &my_cred->cr_audit.as_mask; + kauth_cred_unref(&my_cred); + /* + * It's possible for coredump(9) generation to fail. Make sure that + * we handle this case correctly for preselection. + */ + if (errcode != 0) + sorf = AU_PRS_FAILURE; + else + sorf = AU_PRS_SUCCESS; + class = au_event_class(AUE_CORE); + if (au_preselect(AUE_CORE, class, aumask, sorf) == 0 && + audit_pipe_preselect(auid, AUE_CORE, class, sorf, 0) == 0) + return; + /* + * If we are interested in seeing this audit record, allocate it. + * Where possible coredump records should contain a pathname and arg32 + * (signal) tokens. + */ + uthread = curthread(); + ar = audit_new(AUE_CORE, proc, uthread); + if (path != NULL) { + pathp = &ar->k_ar.ar_arg_upath1; + *pathp = malloc(MAXPATHLEN, M_AUDITPATH, M_WAITOK); + if (audit_canon_path(vfs_context_cwd(vfs_context_current()), path, + *pathp)) + free(*pathp, M_AUDITPATH); + else + ARG_SET_VALID(ar, ARG_UPATH1); + } + ar->k_ar.ar_arg_signum = proc->p_sigacts->ps_sig; + ARG_SET_VALID(ar, ARG_SIGNUM); + if (errcode != 0) + ret = 1; + audit_commit(ar, errcode, ret); +} +#endif /* CONFIG_AUDIT */ diff --git a/bsd/security/audit/audit.h b/bsd/security/audit/audit.h new file mode 100644 index 000000000..5af1da795 --- /dev/null +++ b/bsd/security/audit/audit.h @@ -0,0 +1,423 @@ +/*- + * Copyright (c) 2004-2009 Apple Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of Apple Inc. ("Apple") nor the names of + * its contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY APPLE AND ITS CONTRIBUTORS "AS IS" AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL APPLE OR ITS CONTRIBUTORS BE LIABLE FOR + * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING + * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + */ +/* + * NOTICE: This file was modified by SPARTA, Inc. in 2005 to introduce + * support for mandatory and extensible security protections. This notice + * is included in support of clause 2.2 (b) of the Apple Public License, + * Version 2.0. + */ + +/* + * This header includes function prototypes and type definitions that are + * necessary for the kernel as a whole to interact with the audit subsystem. + */ + +#ifndef _SECURITY_AUDIT_AUDIT_H +#define _SECURITY_AUDIT_AUDIT_H + +#if defined(_KERNEL) || defined(KERNEL) + +#if CONFIG_MACF +#include +#include +#endif + +#include + +#include +#include +#include + +/* + * Audit subsystem condition flags. The audit_enabled flag is set and + * removed automatically as a result of configuring log files, and can be + * observed but should not be directly manipulated. The audit suspension + * flag permits audit to be temporarily disabled without reconfiguring the + * audit target. The audit syscalls flag is set at the first hint that kernel + * events (system and mach calls) need to be audited. It is used for + * performance so an event class map table lookup doesn't have be done for + * every system call if only user events are being audited. + */ +extern int audit_enabled; +extern int audit_suspended; +extern int audit_syscalls; + +/* + * Define the masks for the audited arguments. + * + * XXXRW: These need to remain in audit.h for now because our vnode and name + * lookup audit calls rely on passing in flags to indicate which name or + * vnode is being logged. These should move to audit_private.h when that is + * fixed. + */ +#define ARG_EUID 0x0000000000000001ULL +#define ARG_RUID 0x0000000000000002ULL +#define ARG_SUID 0x0000000000000004ULL +#define ARG_EGID 0x0000000000000008ULL +#define ARG_RGID 0x0000000000000010ULL +#define ARG_SGID 0x0000000000000020ULL +#define ARG_PID 0x0000000000000040ULL +#define ARG_UID 0x0000000000000080ULL +#define ARG_AUID 0x0000000000000100ULL +#define ARG_GID 0x0000000000000200ULL +#define ARG_FD 0x0000000000000400ULL +#define ARG_POSIX_IPC_PERM 0x0000000000000800ULL +#define ARG_FFLAGS 0x0000000000001000ULL +#define ARG_MODE 0x0000000000002000ULL +#define ARG_VALUE32 0x0000000000004000ULL +#define ARG_ADDR32 0x0000000000008000ULL +#define ARG_ADDR ARG_ADDR32 +#define ARG_LEN 0x0000000000010000ULL +#define ARG_MASK 0x0000000000020000ULL +#define ARG_SIGNUM 0x0000000000040000ULL +#define ARG_LOGIN 0x0000000000080000ULL +#define ARG_SADDRINET 0x0000000000100000ULL +#define ARG_SADDRINET6 0x0000000000200000ULL +#define ARG_SADDRUNIX 0x0000000000400000ULL +#define ARG_TERMID_ADDR ARG_SADDRUNIX +#define ARG_KPATH1 0x0000000000800000ULL /* darwin-only */ +#define ARG_KPATH2 0x0000000001000000ULL /* darwin-only */ +#define ARG_UPATH1 0x0000000002000000ULL +#define ARG_UPATH2 0x0000000004000000ULL +#define ARG_TEXT 0x0000000008000000ULL +#define ARG_VNODE1 0x0000000010000000ULL +#define ARG_VNODE2 0x0000000020000000ULL +#define ARG_SVIPC_CMD 0x0000000040000000ULL +#define ARG_SVIPC_PERM 0x0000000080000000ULL +#define ARG_SVIPC_ID 0x0000000100000000ULL +#define ARG_SVIPC_ADDR 0x0000000200000000ULL +#define ARG_GROUPSET 0x0000000400000000ULL +#define ARG_CMD 0x0000000800000000ULL +#define ARG_SOCKINFO 0x0000001000000000ULL +#define ARG_ASID 0x0000002000000000ULL +#define ARG_TERMID 0x0000004000000000ULL +#define ARG_AUDITON 0x0000008000000000ULL +#define ARG_VALUE64 0x0000010000000000ULL /* darwin-only */ +#define ARG_AMASK 0x0000020000000000ULL +#define ARG_CTLNAME 0x0000040000000000ULL +#define ARG_PROCESS 0x0000080000000000ULL +#define ARG_MACHPORT1 0x0000100000000000ULL +#define ARG_MACHPORT2 0x0000200000000000ULL +#define ARG_MAC_STRING 0x0000400000000000ULL +#define ARG_EXIT 0x0000800000000000ULL +#define ARG_IOVECSTR 0x0001000000000000ULL +#define ARG_ARGV 0x0002000000000000ULL +#define ARG_ENVV 0x0004000000000000ULL +#define ARG_OPAQUE 0x0008000000000000ULL /* darwin-only */ +#define ARG_DATA 0x0010000000000000ULL /* darwin-only */ +#define ARG_ADDR64 0x0020000000000000ULL /* darwin-only */ +#define ARG_NONE 0x0000000000000000ULL +#define ARG_ALL 0xFFFFFFFFFFFFFFFFULL + +#if CONFIG_MACF + +#define MAC_AUDIT_LABEL_LEN 1024 +#define MAC_AUDIT_DATA_TYPE 0 +#define MAC_AUDIT_TEXT_TYPE 1 + +struct mac_audit_record { + int type; /* one of the types defined above */ + int length; /* byte length of the data field */ + u_char *data; /* the payload */ + LIST_ENTRY(mac_audit_record) records; +}; + +#endif + +struct proc; +struct vnode; +struct componentname; + +int kau_will_audit(void); +void audit_init(void); +void audit_shutdown(void); +void audit_syscall_enter(unsigned int code, + struct proc *proc, struct uthread *uthread); +#if CONFIG_MACF +/* + * The parameter list of audit_syscall_exit() was modified to also take the + * Darwin syscall number, which is required by mac_audit_check_postselect(). + */ +void audit_syscall_exit(unsigned int code, int error, + struct proc *proc, struct uthread *uthread); +#else +void audit_syscall_exit(int error, struct proc *proc, + struct uthread *uthread); +#endif +void audit_mach_syscall_enter(unsigned short audit_event); +void audit_mach_syscall_exit(int retval, struct uthread *uthread); + +extern struct auditinfo_addr audit_default_aia; + +/* + * The remaining kernel functions are conditionally compiled in as they are + * wrapped by a macro, and the macro should be the only place in the source + * tree where these functions are referenced. + */ +#if CONFIG_AUDIT +struct ipc_perm; +struct sockaddr; +union auditon_udata; +void audit_arg_addr(struct kaudit_record *ar, user_addr_t addr); +void audit_arg_exit(struct kaudit_record *ar, int status, int retval); +void audit_arg_len(struct kaudit_record *ar, user_size_t len); +void audit_arg_fd(struct kaudit_record *ar, int fd); +void audit_arg_fflags(struct kaudit_record *ar, int fflags); +void audit_arg_gid(struct kaudit_record *ar, gid_t gid); +void audit_arg_uid(struct kaudit_record *ar, uid_t uid); +void audit_arg_egid(struct kaudit_record *ar, gid_t egid); +void audit_arg_euid(struct kaudit_record *ar, uid_t euid); +void audit_arg_rgid(struct kaudit_record *ar, gid_t rgid); +void audit_arg_ruid(struct kaudit_record *ar, uid_t ruid); +void audit_arg_sgid(struct kaudit_record *ar, gid_t sgid); +void audit_arg_suid(struct kaudit_record *ar, uid_t suid); +void audit_arg_groupset(struct kaudit_record *ar, gid_t *gidset, + u_int gidset_size); +void audit_arg_login(struct kaudit_record *ar, char *login); +void audit_arg_ctlname(struct kaudit_record *ar, int *name, int namelen); +void audit_arg_mask(struct kaudit_record *ar, int mask); +void audit_arg_mode(struct kaudit_record *ar, mode_t mode); +void audit_arg_value32(struct kaudit_record *ar, uint32_t value32); +void audit_arg_value64(struct kaudit_record *ar, uint64_t value64); +void audit_arg_owner(struct kaudit_record *ar, uid_t uid, gid_t gid); +void audit_arg_pid(struct kaudit_record *ar, pid_t pid); +void audit_arg_process(struct kaudit_record *ar, proc_t p); +void audit_arg_signum(struct kaudit_record *ar, u_int signum); +void audit_arg_socket(struct kaudit_record *ar, int sodomain, int sotype, + int soprotocol); +void audit_arg_sockaddr(struct kaudit_record *ar, struct vnode *cwd_vp, + struct sockaddr *so); +void audit_arg_auid(struct kaudit_record *ar, uid_t auid); +void audit_arg_auditinfo(struct kaudit_record *ar, + struct auditinfo *au_info); +void audit_arg_auditinfo_addr(struct kaudit_record *ar, + struct auditinfo_addr *au_info); +void audit_arg_upath(struct kaudit_record *ar, struct vnode *cwd_vp, + char *upath, u_int64_t flags); +void audit_arg_vnpath(struct kaudit_record *ar, struct vnode *vp, + u_int64_t flags); +void audit_arg_vnpath_withref(struct kaudit_record *ar, struct vnode *vp, + u_int64_t flags); +void audit_arg_text(struct kaudit_record *ar, char *text); +void audit_arg_opaque(struct kaudit_record *ar, void *data, size_t size); +void audit_arg_data(struct kaudit_record *ar, void *data, size_t size, + size_t number); +void audit_arg_cmd(struct kaudit_record *ar, int cmd); +void audit_arg_svipc_cmd(struct kaudit_record *ar, int cmd); +void audit_arg_svipc_perm(struct kaudit_record *ar, struct ipc_perm *perm); +void audit_arg_svipc_id(struct kaudit_record *ar, int id); +void audit_arg_svipc_addr(struct kaudit_record *ar, user_addr_t addr); +void audit_arg_posix_ipc_perm(struct kaudit_record *ar, uid_t uid, + gid_t gid, mode_t mode); +void audit_arg_auditon(struct kaudit_record *ar, + union auditon_udata *udata); +void audit_arg_file(struct kaudit_record *ar, struct proc *p, + struct fileproc *fp); +void audit_arg_argv(struct kaudit_record *ar, char *argv, int argc, + int length); +void audit_arg_envv(struct kaudit_record *ar, char *envv, int envc, + int length); + +void audit_arg_mach_port1(struct kaudit_record *ar, mach_port_name_t port); +void audit_arg_mach_port2(struct kaudit_record *ar, mach_port_name_t port); +void audit_sysclose(struct kaudit_record *ar, struct proc *p, int fd); + +void audit_proc_coredump(proc_t proc, char *path, int errcode); +void audit_proc_init(struct proc *p); +void audit_proc_fork(struct proc *parent, struct proc *child); +void audit_proc_free(struct proc *p); + +#ifndef _KAUTH_CRED_T +#define _KAUTH_CRED_T +struct ucred; +typedef struct ucred *kauth_cred_t; +#endif /* !_KAUTH_CRED_T */ + +void audit_session_ref(kauth_cred_t cred); +void audit_session_unref(kauth_cred_t cred); +void audit_session_procnew(kauth_cred_t cred); +void audit_session_procexit(kauth_cred_t cred); +int audit_session_spawnjoin(proc_t p, ipc_port_t port); + +/* + * Audit session macros. + */ +#define IS_VALID_SESSION(a) ((a) != NULL && (a) != &audit_default_aia) + +#define AUDIT_SESSION_REF(cred) audit_session_ref(cred) +#define AUDIT_SESSION_UNREF(cred) audit_session_unref(cred) + +#define AUDIT_SESSION_PROCNEW(cred) audit_session_procnew(cred) +#define AUDIT_SESSION_PROCEXIT(cred) audit_session_procexit(cred) + +#if CONFIG_MACF +/* + * audit_mac_data() is the MAC Framework's entry point to the audit subsystem. + * It currently creates only text and data audit tokens. + */ +int audit_mac_data(int type, int len, u_char *data); +void audit_arg_mac_string(struct kaudit_record *ar, char *string); + +#endif + +extern au_event_t sys_au_event[]; + +#define AUDIT_RECORD() \ + ((struct uthread*)get_bsdthread_info(current_thread()))->uu_ar + +#ifndef AUDIT_USE_BUILDIN_EXPECT +#define AUDIT_USE_BUILDIN_EXPECT +#endif + +#ifdef AUDIT_USE_BUILTIN_EXPECT +/* + * Use branch prediction for the case of auditing enabled but not + * auditing system calls. + */ +#define AUDIT_SYSCALLS() __builtin_expect(audit_syscalls, 0) +#define AUDIT_ENABLED() __builtin_expect(audit_syscalls && \ + audit_enabled, 0) +#define AUDIT_AUDITING(x) __builtin_expect(NULL != (x), 0) + +#else + +#define AUDIT_SYSCALLS() (audit_syscalls) +#define AUDIT_ENABLED() (audit_syscalls && audit_enabled) +#define AUDIT_AUDITING(x) (NULL != (x)) + +#endif /* AUDIT_USE_BUILTIN_EXPECT */ + +/* + * Define a macro to wrap the audit_arg_* calls by checking the global + * audit_enabled flag before performing the actual call. + */ +#define AUDIT_ARG(op, args...) do { \ + if (AUDIT_SYSCALLS()) { \ + struct kaudit_record *__ar = AUDIT_RECORD(); \ + if (AUDIT_AUDITING(__ar)) \ + audit_arg_ ## op (__ar, args); \ + } \ +} while (0) + +#define AUDIT_SYSCALL_ENTER(args...) do { \ + if (AUDIT_ENABLED()) { \ + audit_syscall_enter(args); \ + } \ +} while (0) + +/* + * Wrap the audit_syscall_exit() function so that it is called only when + * we have a audit record on the thread. Audit records can persist after + * auditing is disabled, so we don't just check audit_enabled here. + */ +#define AUDIT_SYSCALL_EXIT(code, proc, uthread, error) do { \ + if (AUDIT_AUDITING(uthread->uu_ar)) \ + audit_syscall_exit(code, error, proc, uthread); \ +} while (0) + +/* + * Wrap the audit_mach_syscall_enter() and audit_mach_syscall_exit() + * functions in a manner similar to other system call enter/exit functions. + */ +#define AUDIT_MACH_SYSCALL_ENTER(args...) do { \ + if (AUDIT_ENABLED()) { \ + audit_mach_syscall_enter(args); \ + } \ +} while (0) + +#define AUDIT_MACH_SYSCALL_EXIT(retval) do { \ + if (AUDIT_SYSCALLS()) { \ + struct uthread *__uthread = \ + get_bsdthread_info(current_thread()); \ + if (AUDIT_AUDITING(__uthread->uu_ar)) \ + audit_mach_syscall_exit(retval, __uthread); \ + } \ +} while (0) + +/* + * A Macro to wrap the audit_sysclose() function. + */ +#define AUDIT_SYSCLOSE(args...) do { \ + if (AUDIT_SYSCALLS()) { \ + struct kaudit_record *__ar = AUDIT_RECORD(); \ + if (AUDIT_AUDITING(__ar)) \ + audit_sysclose(__ar, args); \ + } \ +} while (0) + +#else /* !CONFIG_AUDIT */ + +#define AUDIT_ARG(op, args...) do { \ +} while (0) + +#define AUDIT_SYSCALL_ENTER(args...) do { \ +} while (0) + +#define AUDIT_SYSCALL_EXIT(code, proc, uthread, error) do { \ +} while (0) + +#define AUDIT_MACH_SYSCALL_ENTER(args...) do { \ +} while (0) + +#define AUDIT_MACH_SYSCALL_EXIT(retval) do { \ +} while (0) + +#define AUDIT_SYSCLOSE(op, args...) do { \ +} while (0) + +#define AUDIT_SESSION_REF(cred) do { \ +} while (0) + +#define AUDIT_SESSION_UNREF(cred) do { \ +} while (0) + +#define AUDIT_SESSION_PROCNEW(cred) do { \ +} while (0) + +#define AUDIT_SESSION_PROCEXIT(cred) do { \ +} while (0) + +#define AUDIT_SESSION_REF(cred) do { \ +} while (0) + +#define AUDIT_SESSION_UNREF(cred) do { \ +} while (0) + +#define AUDIT_SESSION_PROCNEW(cred) do { \ +} while (0) + +#define AUDIT_SESSION_PROCEXIT(cred) do { \ +} while (0) + +#endif /* CONFIG_AUDIT */ + +#endif /* KERNEL */ + +#endif /* !_SECURITY_AUDIT_ADUIT_H */ diff --git a/bsd/security/audit/audit_arg.c b/bsd/security/audit/audit_arg.c new file mode 100644 index 000000000..66792758f --- /dev/null +++ b/bsd/security/audit/audit_arg.c @@ -0,0 +1,903 @@ +/*- + * Copyright (c) 1999-2009 Apple Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of Apple Inc. ("Apple") nor the names of + * its contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY APPLE AND ITS CONTRIBUTORS "AS IS" AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL APPLE OR ITS CONTRIBUTORS BE LIABLE FOR + * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING + * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + */ +/* + * NOTICE: This file was modified by McAfee Research in 2004 to introduce + * support for mandatory and extensible security protections. This notice + * is included in support of clause 2.2 (b) of the Apple Public License, + * Version 2.0. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +#include +#include +#include + +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +#if CONFIG_MACF +#include +#include +#include +#include +extern zone_t audit_mac_label_zone; +#endif + +#include + +#include +#include + +#if CONFIG_AUDIT +/* + * Calls to manipulate elements of the audit record structure from system + * call code. Macro wrappers will prevent this functions from being entered + * if auditing is disabled, avoiding the function call cost. We check the + * thread audit record pointer anyway, as the audit condition could change, + * and pre-selection may not have allocated an audit record for this event. + * + * XXXAUDIT: Should we assert, in each case, that this field of the record + * hasn't already been filled in? + */ +void +audit_arg_addr(struct kaudit_record *ar, user_addr_t addr) +{ + struct proc *p = current_proc(); + + ar->k_ar.ar_arg_addr = addr; + + /* + * If the process is 64-bit then flag the address as such. + */ + if (proc_is64bit(p)) + ARG_SET_VALID(ar, ARG_ADDR64); + else + ARG_SET_VALID(ar, ARG_ADDR32); +} + +void +audit_arg_exit(struct kaudit_record *ar, int status, int retval) +{ + + ar->k_ar.ar_arg_exitstatus = status; + ar->k_ar.ar_arg_exitretval = retval; + ARG_SET_VALID(ar, ARG_EXIT); +} + +void +audit_arg_len(struct kaudit_record *ar, user_size_t len) +{ + + ar->k_ar.ar_arg_len = len; + ARG_SET_VALID(ar, ARG_LEN); +} + +void +audit_arg_fd(struct kaudit_record *ar, int fd) +{ + + ar->k_ar.ar_arg_fd = fd; + ARG_SET_VALID(ar, ARG_FD); +} + +void +audit_arg_fflags(struct kaudit_record *ar, int fflags) +{ + + ar->k_ar.ar_arg_fflags = fflags; + ARG_SET_VALID(ar, ARG_FFLAGS); +} + +void +audit_arg_gid(struct kaudit_record *ar, gid_t gid) +{ + + ar->k_ar.ar_arg_gid = gid; + ARG_SET_VALID(ar, ARG_GID); +} + +void +audit_arg_uid(struct kaudit_record *ar, uid_t uid) +{ + + ar->k_ar.ar_arg_uid = uid; + ARG_SET_VALID(ar, ARG_UID); +} + +void +audit_arg_egid(struct kaudit_record *ar, gid_t egid) +{ + + ar->k_ar.ar_arg_egid = egid; + ARG_SET_VALID(ar, ARG_EGID); +} + +void +audit_arg_euid(struct kaudit_record *ar, uid_t euid) +{ + + ar->k_ar.ar_arg_euid = euid; + ARG_SET_VALID(ar, ARG_EUID); +} + +void +audit_arg_rgid(struct kaudit_record *ar, gid_t rgid) +{ + + ar->k_ar.ar_arg_rgid = rgid; + ARG_SET_VALID(ar, ARG_RGID); +} + +void +audit_arg_ruid(struct kaudit_record *ar, uid_t ruid) +{ + + ar->k_ar.ar_arg_ruid = ruid; + ARG_SET_VALID(ar, ARG_RUID); +} + +void +audit_arg_sgid(struct kaudit_record *ar, gid_t sgid) +{ + + ar->k_ar.ar_arg_sgid = sgid; + ARG_SET_VALID(ar, ARG_SGID); +} + +void +audit_arg_suid(struct kaudit_record *ar, uid_t suid) +{ + + ar->k_ar.ar_arg_suid = suid; + ARG_SET_VALID(ar, ARG_SUID); +} + +void +audit_arg_groupset(struct kaudit_record *ar, gid_t *gidset, u_int gidset_size) +{ + u_int i; + + for (i = 0; i < gidset_size; i++) + ar->k_ar.ar_arg_groups.gidset[i] = gidset[i]; + ar->k_ar.ar_arg_groups.gidset_size = gidset_size; + ARG_SET_VALID(ar, ARG_GROUPSET); +} + +void +audit_arg_login(struct kaudit_record *ar, char *login) +{ + + strlcpy(ar->k_ar.ar_arg_login, login, MAXLOGNAME); + ARG_SET_VALID(ar, ARG_LOGIN); +} + +void +audit_arg_ctlname(struct kaudit_record *ar, int *name, int namelen) +{ + + bcopy(name, &ar->k_ar.ar_arg_ctlname, namelen * sizeof(int)); + ar->k_ar.ar_arg_len = namelen; + ARG_SET_VALID(ar, ARG_CTLNAME | ARG_LEN); +} + +void +audit_arg_mask(struct kaudit_record *ar, int mask) +{ + + ar->k_ar.ar_arg_mask = mask; + ARG_SET_VALID(ar, ARG_MASK); +} + +void +audit_arg_mode(struct kaudit_record *ar, mode_t mode) +{ + + ar->k_ar.ar_arg_mode = mode; + ARG_SET_VALID(ar, ARG_MODE); +} + +void +audit_arg_value32(struct kaudit_record *ar, uint32_t value32) +{ + + ar->k_ar.ar_arg_value32 = value32; + ARG_SET_VALID(ar, ARG_VALUE32); +} + +void +audit_arg_value64(struct kaudit_record *ar, uint64_t value64) +{ + + ar->k_ar.ar_arg_value64 = value64; + ARG_SET_VALID(ar, ARG_VALUE64); +} + +void +audit_arg_owner(struct kaudit_record *ar, uid_t uid, gid_t gid) +{ + + ar->k_ar.ar_arg_uid = uid; + ar->k_ar.ar_arg_gid = gid; + ARG_SET_VALID(ar, ARG_UID | ARG_GID); +} + +void +audit_arg_pid(struct kaudit_record *ar, pid_t pid) +{ + + ar->k_ar.ar_arg_pid = pid; + ARG_SET_VALID(ar, ARG_PID); +} + +void +audit_arg_process(struct kaudit_record *ar, proc_t p) +{ + kauth_cred_t my_cred; + + KASSERT(p != NULL, ("audit_arg_process: p == NULL")); + + if ( p == NULL) + return; + + my_cred = kauth_cred_proc_ref(p); + ar->k_ar.ar_arg_auid = my_cred->cr_audit.as_aia_p->ai_auid; + ar->k_ar.ar_arg_asid = my_cred->cr_audit.as_aia_p->ai_asid; + bcopy(&my_cred->cr_audit.as_aia_p->ai_termid, + &ar->k_ar.ar_arg_termid_addr, sizeof(au_tid_addr_t)); + ar->k_ar.ar_arg_euid = my_cred->cr_uid; + ar->k_ar.ar_arg_egid = my_cred->cr_groups[0]; + ar->k_ar.ar_arg_ruid = my_cred->cr_ruid; + ar->k_ar.ar_arg_rgid = my_cred->cr_rgid; + kauth_cred_unref(&my_cred); + ar->k_ar.ar_arg_pid = p->p_pid; + ARG_SET_VALID(ar, ARG_AUID | ARG_EUID | ARG_EGID | ARG_RUID | + ARG_RGID | ARG_ASID | ARG_TERMID_ADDR | ARG_PID | ARG_PROCESS); +} + +void +audit_arg_signum(struct kaudit_record *ar, u_int signum) +{ + + ar->k_ar.ar_arg_signum = signum; + ARG_SET_VALID(ar, ARG_SIGNUM); +} + +void +audit_arg_socket(struct kaudit_record *ar, int sodomain, int sotype, + int soprotocol) +{ + + ar->k_ar.ar_arg_sockinfo.sai_domain = sodomain; + ar->k_ar.ar_arg_sockinfo.sai_type = sotype; + ar->k_ar.ar_arg_sockinfo.sai_protocol = soprotocol; + ARG_SET_VALID(ar, ARG_SOCKINFO); +} + +/* + * Note that the current working directory vp must be supplied at the audit + * call site to permit per thread current working directories, and that it + * must take a upath starting with '/' into account for chroot if the path + * is absolute. This results in the real (non-chroot) path being recorded + * in the audit record. + */ +void +audit_arg_sockaddr(struct kaudit_record *ar, struct vnode *cwd_vp, + struct sockaddr *sa) +{ + int slen; + struct sockaddr_un *sun; + char path[SOCK_MAXADDRLEN - offsetof(struct sockaddr_un, sun_path) + 1]; + + KASSERT(sa != NULL, ("audit_arg_sockaddr: sa == NULL")); + + if (cwd_vp == NULL || sa == NULL) + return; + + bcopy(sa, &ar->k_ar.ar_arg_sockaddr, sa->sa_len); + switch (sa->sa_family) { + case AF_INET: + ARG_SET_VALID(ar, ARG_SADDRINET); + break; + + case AF_INET6: + ARG_SET_VALID(ar, ARG_SADDRINET6); + break; + + case AF_UNIX: + sun = (struct sockaddr_un *)sa; + slen = sun->sun_len - offsetof(struct sockaddr_un, sun_path); + + if (slen >= 0) { + /* + * Make sure the path is NULL-terminated + */ + if (sun->sun_path[slen] != 0) { + bcopy(sun->sun_path, path, slen); + path[slen] = 0; + audit_arg_upath(ar, cwd_vp, path, ARG_UPATH1); + } else { + audit_arg_upath(ar, cwd_vp, sun->sun_path, + ARG_UPATH1); + } + } + ARG_SET_VALID(ar, ARG_SADDRUNIX); + break; + /* XXXAUDIT: default:? */ + } +} + +void +audit_arg_auid(struct kaudit_record *ar, uid_t auid) +{ + + ar->k_ar.ar_arg_auid = auid; + ARG_SET_VALID(ar, ARG_AUID); +} + +void +audit_arg_auditinfo(struct kaudit_record *ar, struct auditinfo *au_info) +{ + + ar->k_ar.ar_arg_auid = au_info->ai_auid; + ar->k_ar.ar_arg_asid = au_info->ai_asid; + ar->k_ar.ar_arg_amask.am_success = au_info->ai_mask.am_success; + ar->k_ar.ar_arg_amask.am_failure = au_info->ai_mask.am_failure; + ar->k_ar.ar_arg_termid.port = au_info->ai_termid.port; + ar->k_ar.ar_arg_termid.machine = au_info->ai_termid.machine; + ARG_SET_VALID(ar, ARG_AUID | ARG_ASID | ARG_AMASK | ARG_TERMID); +} + +void +audit_arg_auditinfo_addr(struct kaudit_record *ar, + struct auditinfo_addr *au_info) +{ + + ar->k_ar.ar_arg_auid = au_info->ai_auid; + ar->k_ar.ar_arg_asid = au_info->ai_asid; + ar->k_ar.ar_arg_amask.am_success = au_info->ai_mask.am_success; + ar->k_ar.ar_arg_amask.am_failure = au_info->ai_mask.am_failure; + ar->k_ar.ar_arg_termid_addr.at_type = au_info->ai_termid.at_type; + ar->k_ar.ar_arg_termid_addr.at_port = au_info->ai_termid.at_port; + ar->k_ar.ar_arg_termid_addr.at_addr[0] = au_info->ai_termid.at_addr[0]; + ar->k_ar.ar_arg_termid_addr.at_addr[1] = au_info->ai_termid.at_addr[1]; + ar->k_ar.ar_arg_termid_addr.at_addr[2] = au_info->ai_termid.at_addr[2]; + ar->k_ar.ar_arg_termid_addr.at_addr[3] = au_info->ai_termid.at_addr[3]; + ARG_SET_VALID(ar, ARG_AUID | ARG_ASID | ARG_AMASK | ARG_TERMID_ADDR); +} + +void +audit_arg_text(struct kaudit_record *ar, char *text) +{ + + KASSERT(text != NULL, ("audit_arg_text: text == NULL")); + + /* Invalidate the text string */ + ar->k_ar.ar_valid_arg &= (ARG_ALL ^ ARG_TEXT); + if (text == NULL) + return; + + if (ar->k_ar.ar_arg_text == NULL) + ar->k_ar.ar_arg_text = malloc(MAXPATHLEN, M_AUDITTEXT, + M_WAITOK); + + strncpy(ar->k_ar.ar_arg_text, text, MAXPATHLEN); + ARG_SET_VALID(ar, ARG_TEXT); +} + +void +audit_arg_opaque(struct kaudit_record *ar, void *data, size_t size) +{ + + KASSERT(data != NULL, ("audit_arg_opaque: data == NULL")); + KASSERT(size <= UINT16_MAX, ("audit_arg_opaque: size > UINT16_MAX")); + + if (data == NULL || size > UINT16_MAX) + return; + + if (ar->k_ar.ar_arg_opaque == NULL) + ar->k_ar.ar_arg_opaque = malloc(size, M_AUDITDATA, M_WAITOK); + else + return; + + memcpy(ar->k_ar.ar_arg_opaque, data, size); + ar->k_ar.ar_arg_opq_size = (u_int16_t) size; + ARG_SET_VALID(ar, ARG_OPAQUE); +} + +void +audit_arg_data(struct kaudit_record *ar, void *data, size_t size, size_t number) +{ + size_t sz; + + KASSERT(data != NULL, ("audit_arg_data: data == NULL")); + KASSERT(size >= AUR_BYTE_SIZE && size <= AUR_INT64_SIZE, + ("audit_arg_data: size < AUR_BYTE_SIZE or size > AUR_INT64_SIZE")); + KASSERT(number <= UINT8_MAX, + ("audit_arg_data: number > UINT8_MAX")); + + if (data == NULL || size < AUR_BYTE_SIZE || size > AUR_INT64_SIZE || + number > UINT8_MAX) + return; + + sz = size * number; + + if (ar->k_ar.ar_arg_data == NULL) + ar->k_ar.ar_arg_data = malloc(sz, M_AUDITDATA, M_WAITOK); + else + return; + + memcpy(ar->k_ar.ar_arg_data, data, sz); + + switch(size) { + case AUR_BYTE_SIZE: + ar->k_ar.ar_arg_data_type = AUR_BYTE; + break; + + case AUR_SHORT_SIZE: + ar->k_ar.ar_arg_data_type = AUR_SHORT; + break; + + case AUR_INT32_SIZE: + ar->k_ar.ar_arg_data_type = AUR_INT32; + break; + + case AUR_INT64_SIZE: + ar->k_ar.ar_arg_data_type = AUR_INT64; + break; + + default: + free(ar->k_ar.ar_arg_data, M_AUDITDATA); + ar->k_ar.ar_arg_data = NULL; + return; + } + + ar->k_ar.ar_arg_data_count = (u_char)number; + + ARG_SET_VALID(ar, ARG_DATA); +} + +void +audit_arg_cmd(struct kaudit_record *ar, int cmd) +{ + + ar->k_ar.ar_arg_cmd = cmd; + ARG_SET_VALID(ar, ARG_CMD); +} + +void +audit_arg_svipc_cmd(struct kaudit_record *ar, int cmd) +{ + + ar->k_ar.ar_arg_svipc_cmd = cmd; + ARG_SET_VALID(ar, ARG_SVIPC_CMD); +} + +void +audit_arg_svipc_perm(struct kaudit_record *ar, struct ipc_perm *perm) +{ + + bcopy(perm, &ar->k_ar.ar_arg_svipc_perm, + sizeof(ar->k_ar.ar_arg_svipc_perm)); + ARG_SET_VALID(ar, ARG_SVIPC_PERM); +} + +void +audit_arg_svipc_id(struct kaudit_record *ar, int id) +{ + + ar->k_ar.ar_arg_svipc_id = id; + ARG_SET_VALID(ar, ARG_SVIPC_ID); +} + +void +audit_arg_svipc_addr(struct kaudit_record *ar, user_addr_t addr) +{ + + ar->k_ar.ar_arg_svipc_addr = addr; + ARG_SET_VALID(ar, ARG_SVIPC_ADDR); +} + +void +audit_arg_posix_ipc_perm(struct kaudit_record *ar, uid_t uid, gid_t gid, + mode_t mode) +{ + + ar->k_ar.ar_arg_pipc_perm.pipc_uid = uid; + ar->k_ar.ar_arg_pipc_perm.pipc_gid = gid; + ar->k_ar.ar_arg_pipc_perm.pipc_mode = mode; + ARG_SET_VALID(ar, ARG_POSIX_IPC_PERM); +} + +void +audit_arg_auditon(struct kaudit_record *ar, union auditon_udata *udata) +{ + + bcopy((void *)udata, &ar->k_ar.ar_arg_auditon, + sizeof(ar->k_ar.ar_arg_auditon)); + ARG_SET_VALID(ar, ARG_AUDITON); +} + +/* + * Audit information about a file, either the file's vnode info, or its + * socket address info. + */ +void +audit_arg_file(struct kaudit_record *ar, __unused proc_t p, + struct fileproc *fp) +{ + struct socket *so; + struct inpcb *pcb; + struct sockaddr_in *sin; + struct sockaddr_in6 *sin6; + + switch (fp->f_fglob->fg_type) { + case DTYPE_VNODE: + /* case DTYPE_FIFO: */ + audit_arg_vnpath_withref(ar, + (struct vnode *)fp->f_fglob->fg_data, ARG_VNODE1); + break; + + case DTYPE_SOCKET: + so = (struct socket *)fp->f_fglob->fg_data; + if (INP_CHECK_SOCKAF(so, PF_INET)) { + if (so->so_pcb == NULL) + break; + ar->k_ar.ar_arg_sockinfo.sai_type = + so->so_type; + ar->k_ar.ar_arg_sockinfo.sai_domain = + INP_SOCKAF(so); + ar->k_ar.ar_arg_sockinfo.sai_protocol = + so->so_proto->pr_protocol; + pcb = (struct inpcb *)so->so_pcb; + sin = (struct sockaddr_in *) + &ar->k_ar.ar_arg_sockinfo.sai_faddr; + sin->sin_addr.s_addr = pcb->inp_faddr.s_addr; + sin->sin_port = pcb->inp_fport; + sin = (struct sockaddr_in *) + &ar->k_ar.ar_arg_sockinfo.sai_laddr; + sin->sin_addr.s_addr = pcb->inp_laddr.s_addr; + sin->sin_port = pcb->inp_lport; + ARG_SET_VALID(ar, ARG_SOCKINFO); + } + if (INP_CHECK_SOCKAF(so, PF_INET6)) { + if (so->so_pcb == NULL) + break; + ar->k_ar.ar_arg_sockinfo.sai_type = + so->so_type; + ar->k_ar.ar_arg_sockinfo.sai_domain = + INP_SOCKAF(so); + ar->k_ar.ar_arg_sockinfo.sai_protocol = + so->so_proto->pr_protocol; + pcb = (struct inpcb *)so->so_pcb; + sin6 = (struct sockaddr_in6 *) + &ar->k_ar.ar_arg_sockinfo.sai_faddr; + sin6->sin6_addr = pcb->in6p_faddr; + sin6->sin6_port = pcb->in6p_fport; + sin6 = (struct sockaddr_in6 *) + &ar->k_ar.ar_arg_sockinfo.sai_laddr; + sin6->sin6_addr = pcb->in6p_laddr; + sin6->sin6_port = pcb->in6p_lport; + ARG_SET_VALID(ar, ARG_SOCKINFO); + } + break; + + default: + /* XXXAUDIT: else? */ + break; + } +} + +/* + * Store a path as given by the user process for auditing into the audit + * record stored on the user thread. This function will allocate the memory + * to store the path info if not already available. This memory will be + * freed when the audit record is freed. + * + * Note that the current working directory vp must be supplied at the audit call + * site to permit per thread current working directories, and that it must take + * a upath starting with '/' into account for chroot if the path is absolute. + * This results in the real (non-chroot) path being recorded in the audit + * record. + * + * XXXAUDIT: Possibly assert that the memory isn't already allocated? + */ +void +audit_arg_upath(struct kaudit_record *ar, struct vnode *cwd_vp, char *upath, u_int64_t flag) +{ + char **pathp; + + KASSERT(upath != NULL, ("audit_arg_upath: upath == NULL")); + KASSERT((flag == ARG_UPATH1) || (flag == ARG_UPATH2), + ("audit_arg_upath: flag %llu", (unsigned long long)flag)); + KASSERT((flag != ARG_UPATH1) || (flag != ARG_UPATH2), + ("audit_arg_upath: flag %llu", (unsigned long long)flag)); + + if (flag == ARG_UPATH1) + pathp = &ar->k_ar.ar_arg_upath1; + else + pathp = &ar->k_ar.ar_arg_upath2; + + if (*pathp == NULL) + *pathp = malloc(MAXPATHLEN, M_AUDITPATH, M_WAITOK); + else + return; + + if (audit_canon_path(cwd_vp, upath, *pathp) == 0) + ARG_SET_VALID(ar, flag); + else { + free(*pathp, M_AUDITPATH); + *pathp = NULL; + } +} + +/* + * Function to save the path and vnode attr information into the audit + * record. + * + * It is assumed that the caller will hold any vnode locks necessary to + * perform a VNOP_GETATTR() on the passed vnode. + * + * XXX: The attr code is very similar to vfs_vnops.c:vn_stat(), but always + * provides access to the generation number as we need that to construct the + * BSM file ID. + * + * XXX: We should accept the process argument from the caller, since it's + * very likely they already have a reference. + * + * XXX: Error handling in this function is poor. + * + * XXXAUDIT: Possibly KASSERT the path pointer is NULL? + */ +void +audit_arg_vnpath(struct kaudit_record *ar, struct vnode *vp, u_int64_t flags) +{ + struct vnode_attr va; + int error; + int len; + char **pathp; + struct vnode_au_info *vnp; + proc_t p; +#if CONFIG_MACF + char **vnode_mac_labelp; + struct mac mac; +#endif + + KASSERT(vp != NULL, ("audit_arg_vnpath: vp == NULL")); + KASSERT((flags == ARG_VNODE1) || (flags == ARG_VNODE2), + ("audit_arg_vnpath: flags != ARG_VNODE[1,2]")); + + p = current_proc(); + + /* + * XXXAUDIT: The below clears, and then resets the flags for valid + * arguments. Ideally, either the new vnode is used, or the old one + * would be. + */ + if (flags & ARG_VNODE1) { + ar->k_ar.ar_valid_arg &= (ARG_ALL ^ ARG_KPATH1); + ar->k_ar.ar_valid_arg &= (ARG_ALL ^ ARG_VNODE1); + pathp = &ar->k_ar.ar_arg_kpath1; + vnp = &ar->k_ar.ar_arg_vnode1; +#if CONFIG_MACF + vnode_mac_labelp = &ar->k_ar.ar_vnode1_mac_labels; +#endif + } else { + ar->k_ar.ar_valid_arg &= (ARG_ALL ^ ARG_KPATH2); + ar->k_ar.ar_valid_arg &= (ARG_ALL ^ ARG_VNODE2); + pathp = &ar->k_ar.ar_arg_kpath2; + vnp = &ar->k_ar.ar_arg_vnode2; +#if CONFIG_MACF + vnode_mac_labelp = &ar->k_ar.ar_vnode2_mac_labels; +#endif + } + + if (*pathp == NULL) + *pathp = malloc(MAXPATHLEN, M_AUDITPATH, M_WAITOK); + else + return; + + /* + * If vn_getpath() succeeds, place it in a string buffer + * attached to the audit record, and set a flag indicating + * it is present. + */ + len = MAXPATHLEN; + if (vn_getpath(vp, *pathp, &len) == 0) { + if (flags & ARG_VNODE1) + ARG_SET_VALID(ar, ARG_KPATH1); + else + ARG_SET_VALID(ar, ARG_KPATH2); + } else { + free(*pathp, M_AUDITPATH); + *pathp = NULL; + } + + VATTR_INIT(&va); + VATTR_WANTED(&va, va_mode); + VATTR_WANTED(&va, va_uid); + VATTR_WANTED(&va, va_gid); + VATTR_WANTED(&va, va_rdev); + VATTR_WANTED(&va, va_fsid); + VATTR_WANTED(&va, va_fileid); + VATTR_WANTED(&va, va_gen); + error = vnode_getattr(vp, &va, vfs_context_current()); + if (error) { + /* XXX: How to handle this case? */ + return; + } + +#if CONFIG_MACF + if (*vnode_mac_labelp == NULL && (vp->v_lflag & VL_LABELED) == VL_LABELED) { + *vnode_mac_labelp = (char *)zalloc(audit_mac_label_zone); + if (*vnode_mac_labelp != NULL) { + mac.m_buflen = MAC_AUDIT_LABEL_LEN; + mac.m_string = *vnode_mac_labelp; + mac_vnode_label_externalize_audit(vp, &mac); + } + } +#endif + + /* + * XXX do we want to fall back here when these aren't supported? + */ + vnp->vn_mode = va.va_mode; + vnp->vn_uid = va.va_uid; + vnp->vn_gid = va.va_gid; + vnp->vn_dev = va.va_rdev; + vnp->vn_fsid = va.va_fsid; + vnp->vn_fileid = (u_int32_t)va.va_fileid; + vnp->vn_gen = va.va_gen; + if (flags & ARG_VNODE1) + ARG_SET_VALID(ar, ARG_VNODE1); + else + ARG_SET_VALID(ar, ARG_VNODE2); +} + +void +audit_arg_vnpath_withref(struct kaudit_record *ar, struct vnode *vp, u_int64_t flags) +{ + if (vp == NULL || vnode_getwithref(vp)) + return; + audit_arg_vnpath(ar, vp, flags); + (void)vnode_put(vp); +} + +void +audit_arg_mach_port1(struct kaudit_record *ar, mach_port_name_t port) +{ + + ar->k_ar.ar_arg_mach_port1 = port; + ARG_SET_VALID(ar, ARG_MACHPORT1); +} + +void +audit_arg_mach_port2(struct kaudit_record *ar, mach_port_name_t port) +{ + + ar->k_ar.ar_arg_mach_port2 = port; + ARG_SET_VALID(ar, ARG_MACHPORT2); +} + + +/* + * Audit the argument strings passed to exec. + */ +void +audit_arg_argv(struct kaudit_record *ar, char *argv, int argc, int length) +{ + + if (audit_argv == 0 || argc == 0) + return; + + if (ar->k_ar.ar_arg_argv == NULL) + ar->k_ar.ar_arg_argv = malloc(length, M_AUDITTEXT, M_WAITOK); + bcopy(argv, ar->k_ar.ar_arg_argv, length); + ar->k_ar.ar_arg_argc = argc; + ARG_SET_VALID(ar, ARG_ARGV); +} + +/* + * Audit the environment strings passed to exec. + */ +void +audit_arg_envv(struct kaudit_record *ar, char *envv, int envc, int length) +{ + + if (audit_arge == 0 || envc == 0) + return; + + if (ar->k_ar.ar_arg_envv == NULL) + ar->k_ar.ar_arg_envv = malloc(length, M_AUDITTEXT, M_WAITOK); + bcopy(envv, ar->k_ar.ar_arg_envv, length); + ar->k_ar.ar_arg_envc = envc; + ARG_SET_VALID(ar, ARG_ENVV); +} + +/* + * The close() system call uses it's own audit call to capture the path/vnode + * information because those pieces are not easily obtained within the system + * call itself. + */ +void +audit_sysclose(struct kaudit_record *ar, proc_t p, int fd) +{ + struct fileproc *fp; + struct vnode *vp; + + KASSERT(p != NULL, ("audit_sysclose: p == NULL")); + + audit_arg_fd(ar, fd); + + if (fp_getfvp(p, fd, &fp, &vp) != 0) + return; + + audit_arg_vnpath_withref(ar, (struct vnode *)fp->f_fglob->fg_data, + ARG_VNODE1); + fp_drop(p, fd, fp, 0); +} + +#endif /* CONFIG_AUDIT */ diff --git a/bsd/security/audit/audit_bsd.c b/bsd/security/audit/audit_bsd.c new file mode 100644 index 000000000..fdae0d79d --- /dev/null +++ b/bsd/security/audit/audit_bsd.c @@ -0,0 +1,563 @@ +/*- + * Copyright (c) 2008-2009 Apple Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of Apple Inc. ("Apple") nor the names of + * its contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY APPLE AND ITS CONTRIBUTORS "AS IS" AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL APPLE OR ITS CONTRIBUTORS BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include + +#include +#include +#include + +#include +#include +#include +#include + +#include + +#include +#include + +#include +#include +#include + +#include +#include +#include + +#if CONFIG_AUDIT +struct mhdr { + size_t mh_size; + au_malloc_type_t *mh_type; + u_long mh_magic; + char mh_data[0]; +}; + +#define AUDIT_MHMAGIC 0x4D656C53 + +#if AUDIT_MALLOC_DEBUG +#define AU_MAX_SHORTDESC 20 +#define AU_MAX_LASTCALLER 20 +struct au_malloc_debug_info { + SInt64 md_size; + SInt64 md_maxsize; + SInt32 md_inuse; + SInt32 md_maxused; + unsigned md_type; + unsigned md_magic; + char md_shortdesc[AU_MAX_SHORTDESC]; + char md_lastcaller[AU_MAX_LASTCALLER]; +}; +typedef struct au_malloc_debug_info au_malloc_debug_info_t; + +au_malloc_type_t *audit_malloc_types[NUM_MALLOC_TYPES]; + +static int audit_sysctl_malloc_debug(struct sysctl_oid *oidp, void *arg1, + int arg2, struct sysctl_req *req); + +SYSCTL_PROC(_kern, OID_AUTO, audit_malloc_debug, CTLFLAG_RD, NULL, 0, + audit_sysctl_malloc_debug, "S,audit_malloc_debug", + "Current malloc debug info for auditing."); + +#define AU_MALLOC_DBINFO_SZ \ + (NUM_MALLOC_TYPES * sizeof(au_malloc_debug_info_t)) + +/* + * Copy out the malloc debug info via the sysctl interface. The userland code + * is something like the following: + * + * error = sysctlbyname("kern.audit_malloc_debug", buffer_ptr, &buffer_len, + * NULL, 0); + */ +static int +audit_sysctl_malloc_debug(__unused struct sysctl_oid *oidp, __unused void *arg1, + __unused int arg2, struct sysctl_req *req) +{ + int i; + size_t sz; + au_malloc_debug_info_t *amdi_ptr, *nxt_ptr; + int err; + + /* + * This provides a read-only node. + */ + if (req->newptr != USER_ADDR_NULL) + return (EPERM); + + /* + * If just querying then return the space required. + */ + if (req->oldptr == USER_ADDR_NULL) { + req->oldidx = AU_MALLOC_DBINFO_SZ; + return (0); + } + + /* + * Alloc a temporary buffer. + */ + if (req->oldlen < AU_MALLOC_DBINFO_SZ) + return (ENOMEM); + amdi_ptr = (au_malloc_debug_info_t *)kalloc(AU_MALLOC_DBINFO_SZ); + if (amdi_ptr == NULL) + return (ENOMEM); + bzero(amdi_ptr, AU_MALLOC_DBINFO_SZ); + + /* + * Build the record array. + */ + sz = 0; + nxt_ptr = amdi_ptr; + for(i = 0; i < NUM_MALLOC_TYPES; i++) { + if (audit_malloc_types[i] == NULL) + continue; + if (audit_malloc_types[i]->mt_magic != M_MAGIC) { + nxt_ptr->md_magic = audit_malloc_types[i]->mt_magic; + continue; + } + nxt_ptr->md_magic = audit_malloc_types[i]->mt_magic; + nxt_ptr->md_size = audit_malloc_types[i]->mt_size; + nxt_ptr->md_maxsize = audit_malloc_types[i]->mt_maxsize; + nxt_ptr->md_inuse = (int)audit_malloc_types[i]->mt_inuse; + nxt_ptr->md_maxused = (int)audit_malloc_types[i]->mt_maxused; + strlcpy(nxt_ptr->md_shortdesc, + audit_malloc_types[i]->mt_shortdesc, AU_MAX_SHORTDESC - 1); + strlcpy(nxt_ptr->md_lastcaller, + audit_malloc_types[i]->mt_lastcaller, AU_MAX_LASTCALLER-1); + sz += sizeof(au_malloc_debug_info_t); + nxt_ptr++; + } + + req->oldlen = sz; + err = SYSCTL_OUT(req, amdi_ptr, sz); + kfree(amdi_ptr, AU_MALLOC_DBINFO_SZ); + + return (err); +} +#endif /* AUDIT_MALLOC_DEBUG */ + +/* + * BSD malloc() + * + * If the M_NOWAIT flag is set then it may not block and return NULL. + * If the M_ZERO flag is set then zero out the buffer. + */ +void * +#if AUDIT_MALLOC_DEBUG +_audit_malloc(size_t size, au_malloc_type_t *type, int flags, const char *fn) +#else +_audit_malloc(size_t size, au_malloc_type_t *type, int flags) +#endif +{ + union { + struct mhdr hdr; + char mem[size + sizeof (struct mhdr)]; + } *mem; + size_t memsize = sizeof (*mem); + + if (size == 0) + return (NULL); + if (flags & M_NOWAIT) { + mem = (void *)kalloc_noblock(memsize); + } else { + mem = (void *)kalloc(memsize); + if (mem == NULL) + panic("_audit_malloc: kernel memory exhausted"); + } + if (mem == NULL) + return (NULL); + mem->hdr.mh_size = memsize; + mem->hdr.mh_type = type; + mem->hdr.mh_magic = AUDIT_MHMAGIC; + if (flags & M_ZERO) + memset(mem->hdr.mh_data, 0, size); +#if AUDIT_MALLOC_DEBUG + if (type != NULL && type->mt_type < NUM_MALLOC_TYPES) { + OSAddAtomic64(memsize, &type->mt_size); + type->mt_maxsize = max(type->mt_size, type->mt_maxsize); + OSAddAtomic(1, &type->mt_inuse); + type->mt_maxused = max(type->mt_inuse, type->mt_maxused); + type->mt_lastcaller = fn; + audit_malloc_types[type->mt_type] = type; + } +#endif /* AUDIT_MALLOC_DEBUG */ + return (mem->hdr.mh_data); +} + +/* + * BSD free() + */ +void +#if AUDIT_MALLOC_DEBUG +_audit_free(void *addr, au_malloc_type_t *type) +#else +_audit_free(void *addr, __unused au_malloc_type_t *type) +#endif +{ + struct mhdr *hdr; + + if (addr == NULL) + return; + hdr = addr; hdr--; + + KASSERT(hdr->mh_magic == AUDIT_MHMAGIC, + ("_audit_free(): hdr->mh_magic != AUDIT_MHMAGIC")); + +#if AUDIT_MALLOC_DEBUG + if (type != NULL) { + OSAddAtomic64(-hdr->mh_size, &type->mt_size); + OSAddAtomic(-1, &type->mt_inuse); + } +#endif /* AUDIT_MALLOC_DEBUG */ + kfree(hdr, hdr->mh_size); +} + +/* + * Initialize a condition variable. Must be called before use. + */ +void +_audit_cv_init(struct cv *cvp, const char *desc) +{ + + if (desc == NULL) + cvp->cv_description = "UNKNOWN"; + else + cvp->cv_description = desc; + cvp->cv_waiters = 0; +} + +/* + * Destory a condition variable. + */ +void +_audit_cv_destroy(struct cv *cvp) +{ + + cvp->cv_description = NULL; + cvp->cv_waiters = 0; +} + +/* + * Signal a condition variable, wakes up one waiting thread. + */ +void +_audit_cv_signal(struct cv *cvp) +{ + + if (cvp->cv_waiters > 0) { + wakeup_one((caddr_t)cvp); + cvp->cv_waiters--; + } +} + +/* + * Broadcast a signal to a condition variable. + */ +void +_audit_cv_broadcast(struct cv *cvp) +{ + + if (cvp->cv_waiters > 0) { + wakeup((caddr_t)cvp); + cvp->cv_waiters = 0; + } +} + +/* + * Wait on a condition variable. A cv_signal or cv_broadcast on the same + * condition variable will resume the thread. It is recommended that the mutex + * be held when cv_signal or cv_broadcast are called. + */ +void +_audit_cv_wait(struct cv *cvp, lck_mtx_t *mp, const char *desc) +{ + + cvp->cv_waiters++; + (void) msleep(cvp, mp, PZERO, desc, 0); +} + +/* + * Wait on a condition variable, allowing interruption by signals. Return 0 + * if the thread was resumed with cv_signal or cv_broadcast, EINTR or + * ERESTART if a signal was caught. If ERESTART is returned the system call + * should be restarted if possible. + */ +int +_audit_cv_wait_sig(struct cv *cvp, lck_mtx_t *mp, const char *desc) +{ + + cvp->cv_waiters++; + return (msleep(cvp, mp, PSOCK | PCATCH, desc, 0)); +} + +/* + * Simple recursive lock. + */ +void +_audit_rlck_init(struct rlck *lp, const char *grpname) +{ + + lp->rl_grp = lck_grp_alloc_init(grpname, LCK_GRP_ATTR_NULL); + lp->rl_mtx = lck_mtx_alloc_init(lp->rl_grp, LCK_ATTR_NULL); + + lp->rl_thread = 0; + lp->rl_recurse = 0; +} + +/* + * Recursive lock. Allow same thread to recursively lock the same lock. + */ +void +_audit_rlck_lock(struct rlck *lp) +{ + + if (lp->rl_thread == current_thread()) { + OSAddAtomic(1, &lp->rl_recurse); + KASSERT(lp->rl_recurse < 10000, + ("_audit_rlck_lock: lock nested too deep.")); + } else { + lck_mtx_lock(lp->rl_mtx); + lp->rl_thread = current_thread(); + lp->rl_recurse = 1; + } +} + +/* + * Recursive unlock. It should be the same thread that does the unlock. + */ +void +_audit_rlck_unlock(struct rlck *lp) +{ + KASSERT(lp->rl_thread == current_thread(), + ("_audit_rlck_unlock(): Don't own lock.")); + + /* Note: OSAddAtomic returns old value. */ + if (OSAddAtomic(-1, &lp->rl_recurse) == 1) { + lp->rl_thread = 0; + lck_mtx_unlock(lp->rl_mtx); + } +} + +void +_audit_rlck_destroy(struct rlck *lp) +{ + + if (lp->rl_mtx) { + lck_mtx_free(lp->rl_mtx, lp->rl_grp); + lp->rl_mtx = 0; + } + if (lp->rl_grp) { + lck_grp_free(lp->rl_grp); + lp->rl_grp = 0; + } +} + +/* + * Recursive lock assert. + */ +void +_audit_rlck_assert(struct rlck *lp, u_int assert) +{ + thread_t cthd = current_thread(); + + if (assert == LCK_MTX_ASSERT_OWNED && lp->rl_thread == cthd) + panic("recursive lock (%p) not held by this thread (%p).", + lp, cthd); + if (assert == LCK_MTX_ASSERT_NOTOWNED && lp->rl_thread != 0) + panic("recursive lock (%p) held by thread (%p).", + lp, cthd); +} + +/* + * Simple sleep lock. + */ +void +_audit_slck_init(struct slck *lp, const char *grpname) +{ + + lp->sl_grp = lck_grp_alloc_init(grpname, LCK_GRP_ATTR_NULL); + lp->sl_mtx = lck_mtx_alloc_init(lp->sl_grp, LCK_ATTR_NULL); + + lp->sl_locked = 0; + lp->sl_waiting = 0; +} + +/* + * Sleep lock lock. The 'intr' flag determines if the lock is interruptible. + * If 'intr' is true then signals or other events can interrupt the sleep lock. + */ +wait_result_t +_audit_slck_lock(struct slck *lp, int intr) +{ + wait_result_t res = THREAD_AWAKENED; + + lck_mtx_lock(lp->sl_mtx); + while (lp->sl_locked && res == THREAD_AWAKENED) { + lp->sl_waiting = 1; + res = lck_mtx_sleep(lp->sl_mtx, LCK_SLEEP_DEFAULT, + (event_t) lp, (intr) ? THREAD_INTERRUPTIBLE : THREAD_UNINT); + } + if (res == THREAD_AWAKENED) + lp->sl_locked = 1; + lck_mtx_unlock(lp->sl_mtx); + + return (res); +} + +/* + * Sleep lock unlock. Wake up all the threads waiting for this lock. + */ +void +_audit_slck_unlock(struct slck *lp) +{ + + lck_mtx_lock(lp->sl_mtx); + lp->sl_locked = 0; + if (lp->sl_waiting) { + lp->sl_waiting = 0; + + /* Wake up *all* sleeping threads. */ + thread_wakeup_prim((event_t) lp, /*1 thr*/ 0, THREAD_AWAKENED); + } + lck_mtx_unlock(lp->sl_mtx); +} + +/* + * Sleep lock try. Don't sleep if it doesn't get the lock. + */ +int +_audit_slck_trylock(struct slck *lp) +{ + int result; + + lck_mtx_lock(lp->sl_mtx); + result = !lp->sl_locked; + if (result) + lp->sl_locked = 1; + lck_mtx_unlock(lp->sl_mtx); + + return (result); +} + +/* + * Sleep lock assert. + */ +void +_audit_slck_assert(struct slck *lp, u_int assert) +{ + + if (assert == LCK_MTX_ASSERT_OWNED && lp->sl_locked == 0) + panic("sleep lock (%p) not held.", lp); + if (assert == LCK_MTX_ASSERT_NOTOWNED && lp->sl_locked == 1) + panic("sleep lock (%p) held.", lp); +} + +void +_audit_slck_destroy(struct slck *lp) +{ + + if (lp->sl_mtx) { + lck_mtx_free(lp->sl_mtx, lp->sl_grp); + lp->sl_mtx = 0; + } + if (lp->sl_grp) { + lck_grp_free(lp->sl_grp); + lp->sl_grp = 0; + } +} + +/* + * XXXss - This code was taken from bsd/netinet6/icmp6.c. Maybe ppsratecheck() + * should be made global in icmp6.c. + */ +#ifndef timersub +#define timersub(tvp, uvp, vvp) \ + do { \ + (vvp)->tv_sec = (tvp)->tv_sec - (uvp)->tv_sec; \ + (vvp)->tv_usec = (tvp)->tv_usec - (uvp)->tv_usec; \ + if ((vvp)->tv_usec < 0) { \ + (vvp)->tv_sec--; \ + (vvp)->tv_usec += 1000000; \ + } \ + } while (0) +#endif + +/* + * Packets (or events) per second limitation. + */ +int +_audit_ppsratecheck(struct timeval *lasttime, int *curpps, int maxpps) +{ + struct timeval tv, delta; + int rv; + + microtime(&tv); + + timersub(&tv, lasttime, &delta); + + /* + * Check for 0,0 so that the message will be seen at least once. + * If more than one second has passed since the last update of + * lasttime, reset the counter. + * + * we do increment *curpps even in *curpps < maxpps case, as some may + * try to use *curpps for stat purposes as well. + */ + if ((lasttime->tv_sec == 0 && lasttime->tv_usec == 0) || + delta.tv_sec >= 1) { + *lasttime = tv; + *curpps = 0; + rv = 1; + } else if (maxpps < 0) + rv = 1; + else if (*curpps < maxpps) + rv = 1; + else + rv = 0; + if (*curpps + 1 > 0) + *curpps = *curpps + 1; + + return (rv); +} + +int +audit_send_trigger(unsigned int trigger) +{ + mach_port_t audit_port; + int error; + + error = host_get_audit_control_port(host_priv_self(), &audit_port); + if (error == KERN_SUCCESS && audit_port != MACH_PORT_NULL) { + audit_triggers(audit_port, trigger); + return (0); + } else { + printf("Cannot get audit control port\n"); + return (error); + } +} +#endif /* CONFIG_AUDIT */ diff --git a/bsd/security/audit/audit_bsd.h b/bsd/security/audit/audit_bsd.h new file mode 100644 index 000000000..23b61a5df --- /dev/null +++ b/bsd/security/audit/audit_bsd.h @@ -0,0 +1,329 @@ +/*- + * Copyright (c) 2008, Apple Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of Apple Inc. ("Apple") nor the names of + * its contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY APPLE AND ITS CONTRIBUTORS "AS IS" AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL APPLE OR ITS CONTRIBUTORS BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _SECURITY_AUDIT_AUDIT_BSD_H +#define _SECURITY_AUDIT_AUDIT_BSD_H + +#include +#include + +#if defined(_KERNEL) || defined(KERNEL) + +#if DIAGNOSTIC +#ifdef KASSERT +#undef KASSERT +#endif +#ifdef AUDIT_KASSERT_DEBUG +#define KASSERT(exp, msg) do { \ + if (__builtin_expect(!(exp), 0)) { \ + printf("%s:%d KASSERT failed: ", __FILE__, __LINE__); \ + printf msg; \ + printf("\n"); \ + } \ +} while (0) +#else +#define KASSERT(exp, msg) do { \ + if (__builtin_expect(!(exp), 0)) \ + panic msg; \ +} while (0) +#endif +#endif /* DIAGNOSTIC */ + +#if __DARWIN_BYTE_ORDER == __DARWIN_BIG_ENDIAN +#define be16enc(p, d) *(p) = (d) +#define be32enc(p, d) *(p) = (d) +#define be64enc(p, d) *(p) = (d) + +#else /* __DARWIN_BYTE_ORDER == __DARWIN_LITTLE_ENDIAN */ + +#include + +#define be16enc(p, d) OSWriteSwapInt16(p, 0, d) +#define be32enc(p, d) OSWriteSwapInt32(p, 0, d) +#define be64enc(p, d) OSWriteSwapInt64(p, 0, d) +#endif /* __DARWIN_BYTE_ORDER == __DARWIN_LITTLE_ENDIAN */ + +/* + * BSD kernel memory allocation. + */ +#define AUDIT_MALLOC_DEBUG 0 /* Change to 1 for malloc debugging. */ + +#define M_AUDITUNKNOWN 0 +#define M_AUDITDATA 1 +#define M_AUDITPATH 2 +#define M_AUDITTEXT 3 +#define M_AUDITBSM 4 +#define M_AUDITEVCLASS 5 +#define M_AUDIT_PIPE 6 +#define M_AUDIT_PIPE_ENTRY 7 +#define M_AUDIT_PIPE_PRESELECT 8 +#define M_AU_SESSION 9 +#define M_AU_EV_PLIST 10 + +#define NUM_MALLOC_TYPES 11 + +#ifdef M_NOWAIT +#undef M_NOWAIT +#endif +#define M_NOWAIT 0x0000 /* do not block */ +#ifdef M_WAITOK +#undef M_WAITOK +#define M_WAITOK 0x0001 /* ok to block */ +#endif +#ifdef M_ZERO +#undef M_ZERO +#endif +#define M_ZERO 0x0004 /* bzero the allocation */ + +#ifdef M_MAGIC +#undef M_MAGIC +#endif +#define M_MAGIC 877983977 + +#ifdef MALLOC_DEFINE +#undef MALLOC_DEFINE +#endif +#if AUDIT_MALLOC_DEBUG +struct au_malloc_type { + SInt64 mt_size; + SInt64 mt_maxsize; + SInt32 mt_inuse; + SInt32 mt_maxused; + unsigned mt_type; + unsigned mt_magic; + const char *mt_shortdesc; + const char *mt_lastcaller; +}; +typedef struct au_malloc_type au_malloc_type_t; + +#define MALLOC_DEFINE(type, shortdesc, longdesc) \ + au_malloc_type_t audit_##type[1] = { \ + { 0, 0, 0, 0, (type < NUM_MALLOC_TYPES) ? type :\ + M_AUDITUNKNOWN, M_MAGIC, shortdesc, NULL } \ + } + +extern au_malloc_type_t *audit_malloc_types[]; + +#else + +struct au_malloc_type { + uint32_t mt_magic; + const char *mt_shortdesc; +}; +typedef struct au_malloc_type au_malloc_type_t; + +#define MALLOC_DEFINE(type, shortdesc, longdesc) \ + au_malloc_type_t audit_##type[1] = { \ + {M_MAGIC, shortdesc } \ + } + +#endif /* AUDIT_MALLOC_DEBUG */ + +#ifdef MALLOC_DECLARE +#undef MALLOC_DECLARE +#endif +#define MALLOC_DECLARE(type) \ + extern au_malloc_type_t audit_##type[] + +#if AUDIT_MALLOC_DEBUG +#define malloc(sz, tp, fl) _audit_malloc(sz, audit_##tp, fl, __FUNCTION__) +void *_audit_malloc(size_t size, au_malloc_type_t *type, int flags, + const char *fn); +#else +#define malloc(sz, tp, fl) _audit_malloc(sz, audit_##tp, fl) +void *_audit_malloc(size_t size, au_malloc_type_t *type, int flags); +#endif + +#define free(ad, tp) _audit_free(ad, audit_##tp) +void _audit_free(void *addr, au_malloc_type_t *type); + +/* + * BSD condition variable. + */ +struct cv { + const char *cv_description; + int cv_waiters; +}; + +/* + * BSD mutex. + */ +struct mtx { + lck_mtx_t *mtx_lock; + lck_grp_t *mtx_grp; +}; + +/* + * BSD rw lock. + */ +struct rwlock { + lck_rw_t *rw_lock; + lck_grp_t *rw_grp; +}; + +/* + * Sleep lock. + */ +struct slck { + lck_mtx_t *sl_mtx; + lck_grp_t *sl_grp; + int sl_locked; + int sl_waiting; +}; + +/* + * Recursive lock. + */ +struct rlck { + lck_mtx_t *rl_mtx; + lck_grp_t *rl_grp; + uint32_t rl_recurse; + thread_t rl_thread; +}; + +/* + * BSD condition variables functions. + */ +void _audit_cv_init(struct cv *cvp, const char *desc); +void _audit_cv_destroy(struct cv *cvp); +void _audit_cv_signal(struct cv *cvp); +void _audit_cv_broadcast(struct cv *cvp); +void _audit_cv_wait(struct cv *cvp, lck_mtx_t *mp, const char *desc); +int _audit_cv_wait_sig(struct cv *cvp, lck_mtx_t *mp, const char *desc); +#define cv_init(cvp, desc) _audit_cv_init(cvp, desc) +#define cv_destroy(cvp) _audit_cv_destroy(cvp) +#define cv_signal(cvp) _audit_cv_signal(cvp) +#define cv_broadcast(cvp) _audit_cv_broadcast(cvp) +#define cv_broadcastpri(cvp, pri) _audit_cv_broadcast(cvp) +#define cv_wait(cvp, mp) _audit_cv_wait(cvp, (mp)->mtx_lock, #cvp) +#define cv_wait_sig(cvp, mp) _audit_cv_wait_sig(cvp, (mp)->mtx_lock, #cvp) + +/* + * BSD Mutexes. + */ +#define LOCK_MAX_NAME 64 +#define mtx_init(mp, name, type, opts) do { \ + (mp)->mtx_grp = lck_grp_alloc_init(name, LCK_GRP_ATTR_NULL); \ + (mp)->mtx_lock = lck_mtx_alloc_init((mp)->mtx_grp, \ + LCK_ATTR_NULL); \ +} while(0) +#define mtx_lock(mp) lck_mtx_lock((mp)->mtx_lock) +#define mtx_unlock(mp) lck_mtx_unlock((mp)->mtx_lock) +#define mtx_destroy(mp) do { \ + if ((mp)->mtx_lock) { \ + lck_mtx_free((mp)->mtx_lock, (mp)->mtx_grp); \ + (mp)->mtx_lock = 0; \ + } \ + if ((mp)->mtx_grp) { \ + lck_grp_free((mp)->mtx_grp); \ + (mp)->mtx_grp = 0; \ + } \ +} while (0) + +/* + * Sleep lock functions. + */ +void _audit_slck_init(struct slck *lp, const char *grpname); +wait_result_t _audit_slck_lock(struct slck *lp, int intr); +void _audit_slck_unlock(struct slck *lp); +int _audit_slck_trylock(struct slck *lp); +void _audit_slck_assert(struct slck *lp, u_int assert); +void _audit_slck_destroy(struct slck *lp); +#define slck_init(lp, name) _audit_slck_init((lp), (name)) +#define slck_lock(lp) _audit_slck_lock((lp), 0) +#define slck_lock_sig(lp) (_audit_slck_lock((lp), 1) != THREAD_AWAKENED) +#define slck_unlock(lp) _audit_slck_unlock((lp)) +#define slck_destroy(lp) _audit_slck_destroy((lp)) + +/* + * Recursive lock functions. + */ +void _audit_rlck_init(struct rlck *lp, const char *grpname); +void _audit_rlck_lock(struct rlck *lp); +void _audit_rlck_unlock(struct rlck *lp); +void _audit_rlck_assert(struct rlck *lp, u_int assert); +void _audit_rlck_destroy(struct rlck *lp); +#define rlck_init(lp, name) _audit_rlck_init((lp), (name)) +#define rlck_lock(lp) _audit_rlck_lock((lp)) +#define rlck_unlock(lp) _audit_rlck_unlock((lp)) +#define rlck_destroy(lp) _audit_rlck_destroy((lp)) + +/* + * BSD rw locks. + */ +#define rw_init(lp, name) do { \ + (lp)->rw_grp = lck_grp_alloc_init(name, LCK_GRP_ATTR_NULL); \ + (lp)->rw_lock = lck_rw_alloc_init((lp)->rw_grp, \ + LCK_ATTR_NULL); \ +} while(0) +#define rw_rlock(lp) lck_rw_lock_shared((lp)->rw_lock) +#define rw_runlock(lp) lck_rw_unlock_shared((lp)->rw_lock) +#define rw_wlock(lp) lck_rw_lock_exclusive((lp)->rw_lock) +#define rw_wunlock(lp) lck_rw_unlock_exclusive((lp)->rw_lock) +#define rw_destroy(lp) do { \ + if ((lp)->rw_lock) { \ + lck_rw_free((lp)->rw_lock, (lp)->rw_grp); \ + (lp)->rw_lock = 0; \ + } \ + if ((lp)->rw_grp) { \ + lck_grp_free((lp)->rw_grp); \ + (lp)->rw_grp = 0; \ + } \ +} while (0) + +#define MA_OWNED LCK_MTX_ASSERT_OWNED +#define RA_LOCKED LCK_RW_ASSERT_HELD +#define RA_RLOCKED LCK_RW_ASSERT_SHARED +#define RA_WLOCKED LCK_RW_ASSERT_EXCLUSIVE +#define SA_LOCKED LCK_RW_ASSERT_HELD +#define SA_XLOCKED LCK_RW_ASSERT_EXCLUSIVE +#define SL_OWNED LCK_MTX_ASSERT_OWNED +#define SL_NOTOWNED LCK_MTX_ASSERT_NOTOWNED +#if DIAGNOSTIC +#define mtx_assert(mp, wht) lck_mtx_assert((mp)->mtx_lock, wht) +#define rw_assert(lp, wht) lck_rw_assert((lp)->rw_lock, wht) +#define sx_assert(lp, wht) lck_rw_assert((lp)->sx_lock, wht) +#define rlck_assert(lp, wht) _audit_rlck_assert((lp), wht) +#define slck_assert(lp, wht) _audit_slck_assert((lp), wht) +#else +#define mtx_assert(mp, wht) +#define rw_assert(lp, wht) +#define sx_assert(lp, wht) +#define rlck_assert(lp, wht) +#define slck_assert(lp, wht) +#endif /* DIAGNOSTIC */ + +/* + * BSD (IPv6) event rate limiter. + */ +int _audit_ppsratecheck(struct timeval *lasttime, int *curpps, int maxpps); +#define ppsratecheck(tv, cr, mr) _audit_ppsratecheck(tv, cr, mr) + +#endif /* defined(_KERNEL) || defined(KERNEL) */ +#endif /* _SECURITY_AUDIT_AUDIT_BSD_H */ diff --git a/bsd/security/audit/audit_bsm.c b/bsd/security/audit/audit_bsm.c new file mode 100644 index 000000000..0ee35a074 --- /dev/null +++ b/bsd/security/audit/audit_bsm.c @@ -0,0 +1,1960 @@ +/* + * Copyright (c) 1999-2009 Apple Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of Apple Inc. ("Apple") nor the names of + * its contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY APPLE AND ITS CONTRIBUTORS "AS IS" AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL APPLE OR ITS CONTRIBUTORS BE LIABLE FOR + * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING + * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ +/* + * NOTICE: This file was modified by SPARTA, Inc. in 2005 to introduce + * support for mandatory and extensible security protections. This notice + * is included in support of clause 2.2 (b) of the Apple Public License, + * Version 2.0. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +#include +#include +#include + +#include +#include +#include + +#include + +#if CONFIG_AUDIT +MALLOC_DEFINE(M_AUDITBSM, "audit_bsm", "Audit BSM data"); + +#if CONFIG_MACF +#include +#endif + +static void audit_sys_auditon(struct audit_record *ar, + struct au_record *rec); +static void audit_sys_fcntl(struct kaudit_record *kar, + struct au_record *rec); + +/* + * Initialize the BSM auditing subsystem. + */ +void +kau_init(void) +{ + + au_evclassmap_init(); +} + +/* + * This call reserves memory for the audit record. Memory must be guaranteed + * before any auditable event can be generated. The au_record structure + * maintains a reference to the memory allocated above and also the list of + * tokens associated with this record. + */ +static struct au_record * +kau_open(void) +{ + struct au_record *rec; + + rec = malloc(sizeof(*rec), M_AUDITBSM, M_WAITOK); + rec->data = NULL; + TAILQ_INIT(&rec->token_q); + rec->len = 0; + rec->used = 1; + + return (rec); +} + +/* + * Store the token with the record descriptor. + */ +static void +kau_write(struct au_record *rec, struct au_token *tok) +{ + + KASSERT(tok != NULL, ("kau_write: tok == NULL")); + + TAILQ_INSERT_TAIL(&rec->token_q, tok, tokens); + rec->len += tok->len; +} + +/* + * Close out the audit record by adding the header token, identifying any + * missing tokens. Write out the tokens to the record memory. + */ +static void +kau_close(struct au_record *rec, struct timespec *ctime, short event) +{ + u_char *dptr; + size_t tot_rec_size; + token_t *cur, *hdr, *trail; + struct timeval tm; + size_t hdrsize; + struct auditinfo_addr ak; + struct in6_addr *ap; + + audit_get_kinfo(&ak); + hdrsize = 0; + switch (ak.ai_termid.at_type) { + case AU_IPv4: + hdrsize = (ak.ai_termid.at_addr[0] == INADDR_ANY) ? + AUDIT_HEADER_SIZE : AUDIT_HEADER_EX_SIZE(&ak); + break; + case AU_IPv6: + ap = (struct in6_addr *)&ak.ai_termid.at_addr[0]; + hdrsize = (IN6_IS_ADDR_UNSPECIFIED(ap)) ? AUDIT_HEADER_SIZE : + AUDIT_HEADER_EX_SIZE(&ak); + break; + default: + panic("kau_close: invalid address family"); + } + tot_rec_size = rec->len + AUDIT_HEADER_SIZE + AUDIT_TRAILER_SIZE; + rec->data = malloc(tot_rec_size, M_AUDITBSM, M_WAITOK | M_ZERO); + + tm.tv_usec = ctime->tv_nsec / 1000; + tm.tv_sec = ctime->tv_sec; + if (hdrsize != AUDIT_HEADER_SIZE) + hdr = au_to_header32_ex_tm(tot_rec_size, event, 0, tm, &ak); + else + hdr = au_to_header32_tm(tot_rec_size, event, 0, tm); + TAILQ_INSERT_HEAD(&rec->token_q, hdr, tokens); + + trail = au_to_trailer(tot_rec_size); + TAILQ_INSERT_TAIL(&rec->token_q, trail, tokens); + + rec->len = tot_rec_size; + dptr = rec->data; + TAILQ_FOREACH(cur, &rec->token_q, tokens) { + memcpy(dptr, cur->t_data, cur->len); + dptr += cur->len; + } +} + +/* + * Free a BSM audit record by releasing all the tokens and clearing the audit + * record information. + */ +void +kau_free(struct au_record *rec) +{ + struct au_token *tok; + + /* Free the token list. */ + while ((tok = TAILQ_FIRST(&rec->token_q))) { + TAILQ_REMOVE(&rec->token_q, tok, tokens); + free(tok->t_data, M_AUDITBSM); + free(tok, M_AUDITBSM); + } + + rec->used = 0; + rec->len = 0; + free(rec->data, M_AUDITBSM); + free(rec, M_AUDITBSM); +} + +/* + * XXX: May want turn some (or all) of these macros into functions in order + * to reduce the generated code size. + * + * XXXAUDIT: These macros assume that 'kar', 'ar', 'rec', and 'tok' in the + * caller are OK with this. + */ +#if CONFIG_MACF +#define MAC_VNODE1_LABEL_TOKEN do { \ + if (ar->ar_vnode1_mac_labels != NULL && \ + strlen(ar->ar_vnode1_mac_labels) != 0) { \ + tok = au_to_text(ar->ar_vnode1_mac_labels); \ + kau_write(rec, tok); \ + } \ +} while (0) + +#define MAC_VNODE2_LABEL_TOKEN do { \ + if (ar->ar_vnode2_mac_labels != NULL && \ + strlen(ar->ar_vnode2_mac_labels) != 0) { \ + tok = au_to_text(ar->ar_vnode2_mac_labels); \ + kau_write(rec, tok); \ + } \ +} while (0) +#else +#define MAC_VNODE1_LABEL_TOKEN +#define MAC_VNODE2_LABEL_TOKEN +#endif +#define UPATH1_TOKENS do { \ + if (ARG_IS_VALID(kar, ARG_UPATH1)) { \ + tok = au_to_path(ar->ar_arg_upath1); \ + kau_write(rec, tok); \ + } \ +} while (0) + +#define UPATH2_TOKENS do { \ + if (ARG_IS_VALID(kar, ARG_UPATH2)) { \ + tok = au_to_path(ar->ar_arg_upath2); \ + kau_write(rec, tok); \ + } \ +} while (0) + +#define VNODE1_TOKENS do { \ + if (ARG_IS_VALID(kar, ARG_KPATH1)) { \ + tok = au_to_path(ar->ar_arg_kpath1); \ + kau_write(rec, tok); \ + } \ + if (ARG_IS_VALID(kar, ARG_VNODE1)) { \ + tok = au_to_attr32(&ar->ar_arg_vnode1); \ + kau_write(rec, tok); \ + MAC_VNODE1_LABEL_TOKEN; \ + } \ +} while (0) + +#define UPATH1_VNODE1_TOKENS do { \ + if (ARG_IS_VALID(kar, ARG_UPATH1)) { \ + tok = au_to_path(ar->ar_arg_upath1); \ + kau_write(rec, tok); \ + } \ + if (ARG_IS_VALID(kar, ARG_KPATH1)) { \ + tok = au_to_path(ar->ar_arg_kpath1); \ + kau_write(rec, tok); \ + } \ + if (ARG_IS_VALID(kar, ARG_VNODE1)) { \ + tok = au_to_attr32(&ar->ar_arg_vnode1); \ + kau_write(rec, tok); \ + MAC_VNODE1_LABEL_TOKEN; \ + } \ +} while (0) + +#define VNODE2_TOKENS do { \ + if (ARG_IS_VALID(kar, ARG_VNODE2)) { \ + tok = au_to_attr32(&ar->ar_arg_vnode2); \ + kau_write(rec, tok); \ + MAC_VNODE2_LABEL_TOKEN; \ + } \ +} while (0) + +#define FD_VNODE1_TOKENS do { \ + if (ARG_IS_VALID(kar, ARG_VNODE1)) { \ + if (ARG_IS_VALID(kar, ARG_KPATH1)) { \ + tok = au_to_path(ar->ar_arg_kpath1); \ + kau_write(rec, tok); \ + } \ + if (ARG_IS_VALID(kar, ARG_FD)) { \ + tok = au_to_arg32(1, "fd", ar->ar_arg_fd); \ + kau_write(rec, tok); \ + MAC_VNODE1_LABEL_TOKEN; \ + } \ + tok = au_to_attr32(&ar->ar_arg_vnode1); \ + kau_write(rec, tok); \ + } else { \ + if (ARG_IS_VALID(kar, ARG_FD)) { \ + tok = au_to_arg32(1, "fd", \ + ar->ar_arg_fd); \ + kau_write(rec, tok); \ + MAC_VNODE1_LABEL_TOKEN; \ + } \ + } \ +} while (0) + +#define PROCESS_PID_TOKENS(argn) do { \ + if ((ar->ar_arg_pid > 0) /* Reference a single process */ \ + && (ARG_IS_VALID(kar, ARG_PROCESS))) { \ + tok = au_to_process32_ex(ar->ar_arg_auid, \ + ar->ar_arg_euid, ar->ar_arg_egid, \ + ar->ar_arg_ruid, ar->ar_arg_rgid, \ + ar->ar_arg_pid, ar->ar_arg_asid, \ + &ar->ar_arg_termid_addr); \ + kau_write(rec, tok); \ + } else if (ARG_IS_VALID(kar, ARG_PID)) { \ + tok = au_to_arg32(argn, "process", ar->ar_arg_pid); \ + kau_write(rec, tok); \ + } \ +} while (0) + +#define EXTATTR_TOKENS do { \ + if (ARG_IS_VALID(kar, ARG_VALUE32)) { \ + switch (ar->ar_arg_value32) { \ + case EXTATTR_NAMESPACE_USER: \ + tok = au_to_text(EXTATTR_NAMESPACE_USER_STRING);\ + break; \ + case EXTATTR_NAMESPACE_SYSTEM: \ + tok = au_to_text(EXTATTR_NAMESPACE_SYSTEM_STRING);\ + break; \ + default: \ + tok = au_to_arg32(3, "attrnamespace", \ + ar->ar_arg_value32); \ + break; \ + } \ + kau_write(rec, tok); \ + } \ + /* attrname is in the text field */ \ + if (ARG_IS_VALID(kar, ARG_TEXT)) { \ + tok = au_to_text(ar->ar_arg_text); \ + kau_write(rec, tok); \ + } \ +} while (0) + +#define EXTENDED_TOKENS(n) do { \ + /* ACL data */ \ + if (ARG_IS_VALID(kar, ARG_OPAQUE)) { \ + tok = au_to_opaque(ar->ar_arg_opaque, \ + ar->ar_arg_opq_size); \ + kau_write(rec, tok); \ + } \ + if (ARG_IS_VALID(kar, ARG_MODE)) { \ + tok = au_to_arg32(n+2, "mode", ar->ar_arg_mode);\ + kau_write(rec, tok); \ + } \ + if (ARG_IS_VALID(kar, ARG_GID)) { \ + tok = au_to_arg32(n+1, "gid", ar->ar_arg_gid); \ + kau_write(rec, tok); \ + } \ + if (ARG_IS_VALID(kar, ARG_UID)) { \ + tok = au_to_arg32(n, "uid", ar->ar_arg_uid); \ + kau_write(rec, tok); \ + } \ +} while (0) + +#define PROCESS_MAC_TOKENS do { \ + if (ar->ar_valid_arg & ARG_MAC_STRING) { \ + tok = au_to_text(ar->ar_arg_mac_string); \ + kau_write(rec, tok); \ + } \ +} while (0) + +/* + * Implement auditing for the auditon() system call. The audit tokens that + * are generated depend on the command that was sent into the auditon() + * system call. + */ +static void +audit_sys_auditon(struct audit_record *ar, struct au_record *rec) +{ + struct au_token *tok; + + switch (ar->ar_arg_cmd) { + case A_OLDSETPOLICY: + if (ar->ar_arg_len > sizeof(int)) { + tok = au_to_arg32(3, "length", ar->ar_arg_len); + kau_write(rec, tok); + tok = au_to_arg64(2, "policy", + ar->ar_arg_auditon.au_policy64); + kau_write(rec, tok); + break; + } + /* FALLTHROUGH */ + case A_SETPOLICY: + tok = au_to_arg32(3, "length", ar->ar_arg_len); + kau_write(rec, tok); + tok = au_to_arg32(2, "policy", ar->ar_arg_auditon.au_policy); + kau_write(rec, tok); + break; + + case A_SETKMASK: + tok = au_to_arg32(3, "length", ar->ar_arg_len); + kau_write(rec, tok); + tok = au_to_arg32(2, "setkmask:as_success", + ar->ar_arg_auditon.au_mask.am_success); + kau_write(rec, tok); + tok = au_to_arg32(2, "setkmask:as_failure", + ar->ar_arg_auditon.au_mask.am_failure); + kau_write(rec, tok); + break; + + case A_OLDSETQCTRL: + if (ar->ar_arg_len > sizeof(au_qctrl_t)) { + tok = au_to_arg32(3, "length", ar->ar_arg_len); + kau_write(rec, tok); + tok = au_to_arg64(2, "setqctrl:aq_hiwater", + ar->ar_arg_auditon.au_qctrl64.aq64_hiwater); + kau_write(rec, tok); + tok = au_to_arg64(2, "setqctrl:aq_lowater", + ar->ar_arg_auditon.au_qctrl64.aq64_lowater); + kau_write(rec, tok); + tok = au_to_arg64(2, "setqctrl:aq_bufsz", + ar->ar_arg_auditon.au_qctrl64.aq64_bufsz); + kau_write(rec, tok); + tok = au_to_arg64(2, "setqctrl:aq_delay", + ar->ar_arg_auditon.au_qctrl64.aq64_delay); + kau_write(rec, tok); + tok = au_to_arg32(2, "setqctrl:aq_minfree", + ar->ar_arg_auditon.au_qctrl64.aq64_minfree); + kau_write(rec, tok); + break; + } + /* FALLTHROUGH */ + case A_SETQCTRL: + tok = au_to_arg32(3, "length", ar->ar_arg_len); + kau_write(rec, tok); + tok = au_to_arg32(2, "setqctrl:aq_hiwater", + ar->ar_arg_auditon.au_qctrl.aq_hiwater); + kau_write(rec, tok); + tok = au_to_arg32(2, "setqctrl:aq_lowater", + ar->ar_arg_auditon.au_qctrl.aq_lowater); + kau_write(rec, tok); + tok = au_to_arg32(2, "setqctrl:aq_bufsz", + ar->ar_arg_auditon.au_qctrl.aq_bufsz); + kau_write(rec, tok); + tok = au_to_arg32(2, "setqctrl:aq_delay", + ar->ar_arg_auditon.au_qctrl.aq_delay); + kau_write(rec, tok); + tok = au_to_arg32(2, "setqctrl:aq_minfree", + ar->ar_arg_auditon.au_qctrl.aq_minfree); + kau_write(rec, tok); + break; + + case A_SETUMASK: + tok = au_to_arg32(3, "length", ar->ar_arg_len); + kau_write(rec, tok); + tok = au_to_arg32(2, "setumask:as_success", + ar->ar_arg_auditon.au_auinfo.ai_mask.am_success); + kau_write(rec, tok); + tok = au_to_arg32(2, "setumask:as_failure", + ar->ar_arg_auditon.au_auinfo.ai_mask.am_failure); + kau_write(rec, tok); + break; + + case A_SETSMASK: + tok = au_to_arg32(3, "length", ar->ar_arg_len); + kau_write(rec, tok); + tok = au_to_arg32(2, "setsmask:as_success", + ar->ar_arg_auditon.au_auinfo.ai_mask.am_success); + kau_write(rec, tok); + tok = au_to_arg32(2, "setsmask:as_failure", + ar->ar_arg_auditon.au_auinfo.ai_mask.am_failure); + kau_write(rec, tok); + break; + + case A_OLDSETCOND: + if (ar->ar_arg_len > sizeof(int)) { + tok = au_to_arg32(3, "length", ar->ar_arg_len); + kau_write(rec, tok); + tok = au_to_arg64(2, "setcond", + ar->ar_arg_auditon.au_cond64); + kau_write(rec, tok); + break; + } + /* FALLTHROUGH */ + case A_SETCOND: + tok = au_to_arg32(3, "length", ar->ar_arg_len); + kau_write(rec, tok); + tok = au_to_arg32(2, "setcond", ar->ar_arg_auditon.au_cond); + kau_write(rec, tok); + break; + + case A_SETCLASS: + tok = au_to_arg32(3, "length", ar->ar_arg_len); + kau_write(rec, tok); + tok = au_to_arg32(2, "setclass:ec_event", + ar->ar_arg_auditon.au_evclass.ec_number); + kau_write(rec, tok); + tok = au_to_arg32(3, "setclass:ec_class", + ar->ar_arg_auditon.au_evclass.ec_class); + kau_write(rec, tok); + break; + + case A_SETPMASK: + tok = au_to_arg32(3, "length", ar->ar_arg_len); + kau_write(rec, tok); + tok = au_to_arg32(2, "setpmask:as_success", + ar->ar_arg_auditon.au_aupinfo.ap_mask.am_success); + kau_write(rec, tok); + tok = au_to_arg32(2, "setpmask:as_failure", + ar->ar_arg_auditon.au_aupinfo.ap_mask.am_failure); + kau_write(rec, tok); + break; + + case A_SETFSIZE: + tok = au_to_arg32(3, "length", ar->ar_arg_len); + kau_write(rec, tok); + tok = au_to_arg32(2, "setfsize:filesize", + ar->ar_arg_auditon.au_fstat.af_filesz); + kau_write(rec, tok); + break; + + default: + break; + } + tok = au_to_arg32(1, "cmd", ar->ar_arg_cmd); + kau_write(rec, tok); +} + +/* + * Implement auditing for the fcntl() system call. The audit tokens that + * are generated depend on the command that was sent into the fcntl() + * system call. + */ +static void +audit_sys_fcntl(struct kaudit_record *kar, struct au_record *rec) +{ + struct au_token *tok; + struct audit_record *ar = &kar->k_ar; + + switch (ar->ar_arg_cmd) { + + case F_DUPFD: + if (ARG_IS_VALID(kar, ARG_VALUE32)) { + tok = au_to_arg32(3, "min fd", ar->ar_arg_value32); + kau_write(rec, tok); + } + break; + + case F_SETFD: + if (ARG_IS_VALID(kar, ARG_VALUE32)) { + tok = au_to_arg32(3, "close-on-exec flag", + ar->ar_arg_value32); + kau_write(rec, tok); + } + break; + + case F_SETFL: + if (ARG_IS_VALID(kar, ARG_VALUE32)) { + tok = au_to_arg32(3, "fd flags", ar->ar_arg_value32); + kau_write(rec, tok); + } + break; + + case F_SETOWN: + if (ARG_IS_VALID(kar, ARG_VALUE32)) { + tok = au_to_arg32(3, "pid", ar->ar_arg_value32); + kau_write(rec, tok); + } + break; + +#ifdef F_SETSIZE + case F_SETSIZE: + if (ARG_IS_VALID(kar, ARG_VALUE64)) { + tok = au_to_arg64(3, "offset", ar->ar_arg_value64); + kau_write(rec, tok); + } + break; +#endif /* F_SETSIZE */ + +#ifdef F_PATHPKG_CHECK + case F_PATHPKG_CHECK: + if (ARG_IS_VALID(kar, ARG_TEXT)) { + tok = au_to_text(ar->ar_arg_text); + kau_write(rec, tok); + } + break; +#endif + + default: + break; + } + tok = au_to_arg32(2, "cmd", au_fcntl_cmd_to_bsm(ar->ar_arg_cmd)); + kau_write(rec, tok); +} + +/* + * Convert an internal kernel audit record to a BSM record and return a + * success/failure indicator. The BSM record is passed as an out parameter to + * this function. + * + * Return conditions: + * BSM_SUCCESS: The BSM record is valid + * BSM_FAILURE: Failure; the BSM record is NULL. + * BSM_NOAUDIT: The event is not auditable for BSM; the BSM record is NULL. + */ +int +kaudit_to_bsm(struct kaudit_record *kar, struct au_record **pau) +{ + struct au_token *tok = NULL, *subj_tok; + struct au_record *rec; + au_tid_t tid; + struct audit_record *ar; + int ctr; + u_int uctr; + + KASSERT(kar != NULL, ("kaudit_to_bsm: kar == NULL")); + + *pau = NULL; + ar = &kar->k_ar; + rec = kau_open(); + + /* + * Create the subject token. + */ + switch (ar->ar_subj_term_addr.at_type) { + case AU_IPv4: + tid.port = ar->ar_subj_term_addr.at_port; + tid.machine = ar->ar_subj_term_addr.at_addr[0]; + subj_tok = au_to_subject32(ar->ar_subj_auid, /* audit ID */ + ar->ar_subj_cred.cr_uid, /* eff uid */ + ar->ar_subj_egid, /* eff group id */ + ar->ar_subj_ruid, /* real uid */ + ar->ar_subj_rgid, /* real group id */ + ar->ar_subj_pid, /* process id */ + ar->ar_subj_asid, /* session ID */ + &tid); + break; + case AU_IPv6: + subj_tok = au_to_subject32_ex(ar->ar_subj_auid, + ar->ar_subj_cred.cr_uid, + ar->ar_subj_egid, + ar->ar_subj_ruid, + ar->ar_subj_rgid, + ar->ar_subj_pid, + ar->ar_subj_asid, + &ar->ar_subj_term_addr); + break; + default: + bzero(&tid, sizeof(tid)); + subj_tok = au_to_subject32(ar->ar_subj_auid, + ar->ar_subj_cred.cr_uid, + ar->ar_subj_egid, + ar->ar_subj_ruid, + ar->ar_subj_rgid, + ar->ar_subj_pid, + ar->ar_subj_asid, + &tid); + } + + /* + * The logic inside each case fills in the tokens required for the + * event, except for the header, trailer, and return tokens. The + * header and trailer tokens are added by the kau_close() function. + * The return token is added outside of the switch statement. + */ + switch(ar->ar_event) { + case AUE_SENDFILE: + /* For sendfile the file and socket descriptor are both saved */ + if (ARG_IS_VALID(kar, ARG_VALUE32)) { + tok = au_to_arg32(2, "sd", ar->ar_arg_value32); + kau_write(rec, tok); + } + /* FALLTHROUGH */ + case AUE_ACCEPT: + case AUE_BIND: + case AUE_LISTEN: + case AUE_CONNECT: + case AUE_RECVFROM: + case AUE_RECVMSG: + case AUE_SENDMSG: + case AUE_SENDTO: + /* + * Socket-related events. + */ + if (ARG_IS_VALID(kar, ARG_FD)) { + tok = au_to_arg32(1, "fd", ar->ar_arg_fd); + kau_write(rec, tok); + } + if (ARG_IS_VALID(kar, ARG_SADDRINET)) { + tok = au_to_sock_inet((struct sockaddr_in *) + &ar->ar_arg_sockaddr); + kau_write(rec, tok); + } + if (ARG_IS_VALID(kar, ARG_SADDRUNIX)) { + tok = au_to_sock_unix((struct sockaddr_un *) + &ar->ar_arg_sockaddr); + kau_write(rec, tok); + UPATH1_TOKENS; + } + if (ARG_IS_VALID(kar, ARG_SADDRINET6)) { + tok = au_to_sock_inet128((struct sockaddr_in6 *) + &ar->ar_arg_sockaddr); + kau_write(rec, tok); + } + break; + + case AUE_SOCKET: + case AUE_SOCKETPAIR: + if (ARG_IS_VALID(kar, ARG_SOCKINFO)) { + tok = au_to_arg32(1,"domain", + au_domain_to_bsm(ar->ar_arg_sockinfo.sai_domain)); + kau_write(rec, tok); + tok = au_to_arg32(2,"type", + au_socket_type_to_bsm(ar->ar_arg_sockinfo.sai_type)); + kau_write(rec, tok); + tok = au_to_arg32(3,"protocol", + ar->ar_arg_sockinfo.sai_protocol); + kau_write(rec, tok); + } + break; + + case AUE_SETSOCKOPT: + case AUE_SHUTDOWN: + if (ARG_IS_VALID(kar, ARG_FD)) { + tok = au_to_arg32(1, "fd", ar->ar_arg_fd); + kau_write(rec, tok); + } + break; + + case AUE_ACCT: + if (ARG_IS_VALID(kar, (ARG_KPATH1 | ARG_UPATH1))) { + UPATH1_VNODE1_TOKENS; + } else { + tok = au_to_arg32(1, "accounting off", 0); + kau_write(rec, tok); + } + break; + + case AUE_SETAUID: + if (ARG_IS_VALID(kar, ARG_AUID)) { + tok = au_to_arg32(2, "setauid", ar->ar_arg_auid); + kau_write(rec, tok); + } + break; + + case AUE_SETAUDIT: + if (ARG_IS_VALID(kar, ARG_AUID) && + ARG_IS_VALID(kar, ARG_ASID) && + ARG_IS_VALID(kar, ARG_AMASK) && + ARG_IS_VALID(kar, ARG_TERMID)) { + tok = au_to_arg32(1, "setaudit:auid", + ar->ar_arg_auid); + kau_write(rec, tok); + tok = au_to_arg32(1, "setaudit:port", + ar->ar_arg_termid.port); + kau_write(rec, tok); + tok = au_to_arg32(1, "setaudit:machine", + ar->ar_arg_termid.machine); + kau_write(rec, tok); + tok = au_to_arg32(1, "setaudit:as_success", + ar->ar_arg_amask.am_success); + kau_write(rec, tok); + tok = au_to_arg32(1, "setaudit:as_failure", + ar->ar_arg_amask.am_failure); + kau_write(rec, tok); + tok = au_to_arg32(1, "setaudit:asid", + ar->ar_arg_asid); + kau_write(rec, tok); + } + break; + + case AUE_SETAUDIT_ADDR: + if (ARG_IS_VALID(kar, ARG_AUID) && + ARG_IS_VALID(kar, ARG_ASID) && + ARG_IS_VALID(kar, ARG_AMASK) && + ARG_IS_VALID(kar, ARG_TERMID_ADDR)) { + tok = au_to_arg32(1, "setaudit_addr:auid", + ar->ar_arg_auid); + kau_write(rec, tok); + tok = au_to_arg32(1, "setaudit_addr:as_success", + ar->ar_arg_amask.am_success); + kau_write(rec, tok); + tok = au_to_arg32(1, "setaudit_addr:as_failure", + ar->ar_arg_amask.am_failure); + kau_write(rec, tok); + tok = au_to_arg32(1, "setaudit_addr:asid", + ar->ar_arg_asid); + kau_write(rec, tok); + tok = au_to_arg32(1, "setaudit_addr:type", + ar->ar_arg_termid_addr.at_type); + kau_write(rec, tok); + tok = au_to_arg32(1, "setaudit_addr:port", + ar->ar_arg_termid_addr.at_port); + kau_write(rec, tok); + if (ar->ar_arg_termid_addr.at_type == AU_IPv6) + tok = au_to_in_addr_ex((struct in6_addr *) + &ar->ar_arg_termid_addr.at_addr[0]); + if (ar->ar_arg_termid_addr.at_type == AU_IPv4) + tok = au_to_in_addr((struct in_addr *) + &ar->ar_arg_termid_addr.at_addr[0]); + kau_write(rec, tok); + } + break; + + case AUE_AUDITON: + /* + * For AUDITON commands without own event, audit the cmd. + */ + if (ARG_IS_VALID(kar, ARG_CMD)) { + tok = au_to_arg32(1, "cmd", ar->ar_arg_cmd); + kau_write(rec, tok); + } + /* FALLTHROUGH */ + + case AUE_AUDITON_GETCAR: + case AUE_AUDITON_GETCLASS: + case AUE_AUDITON_GETCOND: + case AUE_AUDITON_GETCWD: + case AUE_AUDITON_GETKMASK: + case AUE_AUDITON_GETSTAT: + case AUE_AUDITON_GPOLICY: + case AUE_AUDITON_GQCTRL: + case AUE_AUDITON_SETCLASS: + case AUE_AUDITON_SETCOND: + case AUE_AUDITON_SETKMASK: + case AUE_AUDITON_SETSMASK: + case AUE_AUDITON_SETSTAT: + case AUE_AUDITON_SETUMASK: + case AUE_AUDITON_SPOLICY: + case AUE_AUDITON_SQCTRL: + if (ARG_IS_VALID(kar, ARG_AUDITON)) + audit_sys_auditon(ar, rec); + break; + + case AUE_AUDITCTL: + UPATH1_VNODE1_TOKENS; + break; + + case AUE_EXIT: + if (ARG_IS_VALID(kar, ARG_EXIT)) { + tok = au_to_exit(ar->ar_arg_exitretval, + ar->ar_arg_exitstatus); + kau_write(rec, tok); + } + break; + + case AUE_ADJTIME: + case AUE_AUDIT: + case AUE_DUP2: + case AUE_GETAUDIT: + case AUE_GETAUDIT_ADDR: + case AUE_GETAUID: + case AUE_GETFSSTAT: + case AUE_KQUEUE: + case AUE_LSEEK: +#if 0 +/* XXXss replace with kext */ + case AUE_MODLOAD: + case AUE_MODUNLOAD: +#endif + case AUE_MAC_GETFSSTAT: + case AUE_PIPE: + case AUE_PROFILE: + case AUE_SEMSYS: + case AUE_SHMSYS: + case AUE_SETPGRP: + case AUE_SETRLIMIT: + case AUE_SETSID: + case AUE_SETTIMEOFDAY: + case AUE_KDEBUGTRACE: + case AUE_PTHREADSIGMASK: + /* + * Header, subject, and return tokens added at end. + */ + break; + + case AUE_MKFIFO: + if (ARG_IS_VALID(kar, ARG_MODE)) { + tok = au_to_arg32(2, "mode", ar->ar_arg_mode); + kau_write(rec, tok); + } + UPATH1_VNODE1_TOKENS; + break; + + case AUE_ACCESS_EXTENDED: + /* + * The access_extended() argument vector is stored in an + * opaque token. + */ + if (ARG_IS_VALID(kar, ARG_OPAQUE)) { + tok = au_to_opaque(ar->ar_arg_opaque, + ar->ar_arg_opq_size); + kau_write(rec, tok); + } + /* + * The access_extended() result vector is stored in an arbitrary + * data token. + */ + if (ARG_IS_VALID(kar, ARG_DATA)) { + tok = au_to_data(AUP_DECIMAL, ar->ar_arg_data_type, + ar->ar_arg_data_count, ar->ar_arg_data); + kau_write(rec, tok); + } + UPATH1_VNODE1_TOKENS; + break; + + case AUE_LSTAT_EXTENDED: + case AUE_STAT_EXTENDED: + case AUE_ACCESS: + case AUE_CHDIR: + case AUE_CHROOT: + case AUE_GETATTRLIST: + case AUE_NFS_GETFH: + case AUE_LSTAT: + case AUE_PATHCONF: + case AUE_READLINK: + case AUE_REVOKE: + case AUE_RMDIR: + case AUE_SEARCHFS: + case AUE_SETATTRLIST: + case AUE_STAT: + case AUE_STATFS: + case AUE_TRUNCATE: + case AUE_UNDELETE: + case AUE_UNLINK: + case AUE_UTIMES: + UPATH1_VNODE1_TOKENS; + break; + + case AUE_FHOPEN: + break; + + case AUE_CHFLAGS: + if (ARG_IS_VALID(kar, ARG_FFLAGS)) { + tok = au_to_arg32(2, "flags", ar->ar_arg_fflags); + kau_write(rec, tok); + } + UPATH1_VNODE1_TOKENS; + break; + + case AUE_CHMOD: + if (ARG_IS_VALID(kar, ARG_MODE)) { + tok = au_to_arg32(2, "new file mode", + ar->ar_arg_mode); + kau_write(rec, tok); + } + UPATH1_VNODE1_TOKENS; + break; + + case AUE_CHOWN: + case AUE_LCHOWN: + if (ARG_IS_VALID(kar, ARG_UID)) { + tok = au_to_arg32(2, "new file uid", ar->ar_arg_uid); + kau_write(rec, tok); + } + if (ARG_IS_VALID(kar, ARG_GID)) { + tok = au_to_arg32(3, "new file gid", ar->ar_arg_gid); + kau_write(rec, tok); + } + UPATH1_VNODE1_TOKENS; + break; + + case AUE_EXCHANGEDATA: + UPATH1_VNODE1_TOKENS; + UPATH2_TOKENS; + break; + + case AUE_CLOSE: + if (ARG_IS_VALID(kar, ARG_FD)) { + tok = au_to_arg32(2, "fd", ar->ar_arg_fd); + kau_write(rec, tok); + } + UPATH1_VNODE1_TOKENS; + break; + + case AUE_CORE: + if (ARG_IS_VALID(kar, ARG_SIGNUM)) { + tok = au_to_arg32(0, "signal", ar->ar_arg_signum); + kau_write(rec, tok); + } + UPATH1_VNODE1_TOKENS; + break; + + case AUE_POSIX_SPAWN: + if (ARG_IS_VALID(kar, ARG_PID)) { + tok = au_to_arg32(0, "child PID", ar->ar_arg_pid); + kau_write(rec, tok); + } + /* FALLTHROUGH */ + + case AUE_EXECVE: + if (ARG_IS_VALID(kar, ARG_ARGV)) { + tok = au_to_exec_args(ar->ar_arg_argv, + ar->ar_arg_argc); + kau_write(rec, tok); + } + if (ARG_IS_VALID(kar, ARG_ENVV)) { + tok = au_to_exec_env(ar->ar_arg_envv, + ar->ar_arg_envc); + kau_write(rec, tok); + } + UPATH1_VNODE1_TOKENS; + break; + + case AUE_FCHMOD_EXTENDED: + EXTENDED_TOKENS(2); + FD_VNODE1_TOKENS; + break; + + case AUE_FCHMOD: + if (ARG_IS_VALID(kar, ARG_MODE)) { + tok = au_to_arg32(2, "new file mode", + ar->ar_arg_mode); + kau_write(rec, tok); + } + FD_VNODE1_TOKENS; + break; + + case AUE_NFS_SVC: + tok = au_to_arg32(1, "request", ar->ar_arg_cmd); + kau_write(rec, tok); + if (ar->ar_valid_arg & (ARG_KPATH1 | ARG_UPATH1)) { + UPATH1_VNODE1_TOKENS; + } + break; + + /* + * XXXRW: Some of these need to handle non-vnode cases as well. + */ + case AUE_FSTAT_EXTENDED: + case AUE_FCHDIR: + case AUE_FPATHCONF: + case AUE_FSTAT: /* XXX Need to handle sockets and shm */ + case AUE_FSTATFS: + case AUE_FSYNC: + case AUE_FTRUNCATE: + case AUE_FUTIMES: + case AUE_GETDIRENTRIES: + case AUE_GETDIRENTRIESATTR: +#if 0 /* XXXss new */ + case AUE_POLL: +#endif + case AUE_READ: + case AUE_READV: + case AUE_PREAD: + case AUE_WRITE: + case AUE_WRITEV: + case AUE_PWRITE: + FD_VNODE1_TOKENS; + break; + + case AUE_FCHOWN: + if (ARG_IS_VALID(kar, ARG_UID)) { + tok = au_to_arg32(2, "new file uid", ar->ar_arg_uid); + kau_write(rec, tok); + } + if (ARG_IS_VALID(kar, ARG_GID)) { + tok = au_to_arg32(3, "new file gid", ar->ar_arg_gid); + kau_write(rec, tok); + } + FD_VNODE1_TOKENS; + break; + + case AUE_FCNTL: + if (ARG_IS_VALID(kar, ARG_CMD)) + audit_sys_fcntl(kar, rec); + FD_VNODE1_TOKENS; + break; + + case AUE_FSCTL: + if (ARG_IS_VALID(kar, ARG_VALUE32)) { + tok = au_to_arg32(4, "options", ar->ar_arg_value32); + kau_write(rec, tok); + } + if (ARG_IS_VALID(kar, ARG_CMD)) { + tok = au_to_arg32(2, "cmd", ar->ar_arg_cmd); + kau_write(rec, tok); + } + UPATH1_VNODE1_TOKENS; + break; + + case AUE_FFSCTL: + if (ARG_IS_VALID(kar, ARG_VALUE32)) { + tok = au_to_arg32(4, "options", ar->ar_arg_value32); + kau_write(rec, tok); + } + if (ARG_IS_VALID(kar, ARG_CMD)) { + tok = au_to_arg32(2, "cmd", ar->ar_arg_cmd); + kau_write(rec, tok); + } + FD_VNODE1_TOKENS; + break; + + + case AUE_FCHFLAGS: + if (ARG_IS_VALID(kar, ARG_FFLAGS)) { + tok = au_to_arg32(2, "flags", ar->ar_arg_fflags); + kau_write(rec, tok); + } + FD_VNODE1_TOKENS; + break; + + case AUE_FLOCK: + if (ARG_IS_VALID(kar, ARG_CMD)) { + tok = au_to_arg32(2, "operation", ar->ar_arg_cmd); + kau_write(rec, tok); + } + FD_VNODE1_TOKENS; + break; + + case AUE_FORK: + case AUE_VFORK: + if (ARG_IS_VALID(kar, ARG_PID)) { + tok = au_to_arg32(0, "child PID", ar->ar_arg_pid); + kau_write(rec, tok); + } + break; + + case AUE_GETLCID: + if (ARG_IS_VALID(kar, ARG_PID)) { + tok = au_to_arg32(1, "pid", (u_int32_t)ar->ar_arg_pid); + kau_write(rec, tok); + } + break; + + case AUE_SETLCID: + if (ARG_IS_VALID(kar, ARG_PID)) { + tok = au_to_arg32(1, "pid", (u_int32_t)ar->ar_arg_pid); + kau_write(rec, tok); + } + if (ARG_IS_VALID(kar, ARG_VALUE32)) { + tok = au_to_arg32(2, "lcid", + (u_int32_t)ar->ar_arg_value32); + kau_write(rec, tok); + } + break; + + case AUE_IOCTL: + if (ARG_IS_VALID(kar, ARG_CMD)) { + tok = au_to_arg32(2, "cmd", ar->ar_arg_cmd); + kau_write(rec, tok); + } + if (ARG_IS_VALID(kar, ARG_VALUE64)) { + tok = au_to_arg64(2, "cmd", ar->ar_arg_value64); + kau_write(rec, tok); + } + if (ARG_IS_VALID(kar, ARG_ADDR64)) { + tok = au_to_arg64(3, "arg", ar->ar_arg_addr); + kau_write(rec, tok); + } else if (ARG_IS_VALID(kar, ARG_ADDR32)) { + tok = au_to_arg32(3, "arg", + (u_int32_t)ar->ar_arg_addr); + kau_write(rec, tok); + } + if (ARG_IS_VALID(kar, ARG_VNODE1)) + FD_VNODE1_TOKENS; + else { + if (ARG_IS_VALID(kar, ARG_SOCKINFO)) { + tok = au_to_socket_ex( + ar->ar_arg_sockinfo.sai_domain, + ar->ar_arg_sockinfo.sai_type, + (struct sockaddr *) + &ar->ar_arg_sockinfo.sai_laddr, + (struct sockaddr *) + &ar->ar_arg_sockinfo.sai_faddr); + kau_write(rec, tok); + } else { + if (ARG_IS_VALID(kar, ARG_FD)) { + tok = au_to_arg32(1, "fd", + ar->ar_arg_fd); + kau_write(rec, tok); + } + } + } + break; + + case AUE_KILL: + if (ARG_IS_VALID(kar, ARG_SIGNUM)) { + tok = au_to_arg32(2, "signal", ar->ar_arg_signum); + kau_write(rec, tok); + } + PROCESS_PID_TOKENS(1); + break; + + case AUE_LINK: + case AUE_RENAME: + UPATH1_VNODE1_TOKENS; + UPATH2_TOKENS; + break; + + case AUE_MKDIR_EXTENDED: + case AUE_CHMOD_EXTENDED: + case AUE_MKFIFO_EXTENDED: + EXTENDED_TOKENS(2); + UPATH1_VNODE1_TOKENS; + break; + + case AUE_MKDIR: + if (ARG_IS_VALID(kar, ARG_MODE)) { + tok = au_to_arg32(2, "mode", ar->ar_arg_mode); + kau_write(rec, tok); + } + UPATH1_VNODE1_TOKENS; + break; + + case AUE_MKNOD: + if (ARG_IS_VALID(kar, ARG_MODE)) { + tok = au_to_arg32(2, "mode", ar->ar_arg_mode); + kau_write(rec, tok); + } + if (ARG_IS_VALID(kar, ARG_VALUE32)) { + tok = au_to_arg32(3, "dev", ar->ar_arg_value32); + kau_write(rec, tok); + } + UPATH1_VNODE1_TOKENS; + break; + + case AUE_MMAP: + case AUE_MUNMAP: + case AUE_MPROTECT: + case AUE_MLOCK: + case AUE_MUNLOCK: + case AUE_MINHERIT: + if (ARG_IS_VALID(kar, ARG_ADDR64)) { + tok = au_to_arg64(1, "addr", ar->ar_arg_addr); + kau_write(rec, tok); + } else if (ARG_IS_VALID(kar, ARG_ADDR32)) { + tok = au_to_arg32(1, "addr", + (u_int32_t)ar->ar_arg_addr); + kau_write(rec, tok); + } + if (ARG_IS_VALID(kar, ARG_LEN)) { + tok = au_to_arg64(2, "len", ar->ar_arg_len); + kau_write(rec, tok); + } + if (ar->ar_event == AUE_MMAP) + FD_VNODE1_TOKENS; + if (ar->ar_event == AUE_MPROTECT) { + if (ARG_IS_VALID(kar, ARG_VALUE32)) { + tok = au_to_arg32(3, "protection", + ar->ar_arg_value32); + kau_write(rec, tok); + } + } + if (ar->ar_event == AUE_MINHERIT) { + if (ARG_IS_VALID(kar, ARG_VALUE32)) { + tok = au_to_arg32(3, "inherit", + ar->ar_arg_value32); + kau_write(rec, tok); + } + } + break; + +#if CONFIG_MACF + case AUE_MAC_MOUNT: + PROCESS_MAC_TOKENS; + /* FALLTHROUGH */ +#endif + case AUE_MOUNT: + /* XXX Need to handle NFS mounts */ + if (ARG_IS_VALID(kar, ARG_FFLAGS)) { + tok = au_to_arg32(3, "flags", ar->ar_arg_fflags); + kau_write(rec, tok); + } + if (ARG_IS_VALID(kar, ARG_TEXT)) { + tok = au_to_text(ar->ar_arg_text); + kau_write(rec, tok); + } + /* FALLTHROUGH */ + + case AUE_UMOUNT: + case AUE_UNMOUNT: + UPATH1_VNODE1_TOKENS; + break; + + case AUE_MSGCTL: + ar->ar_event = audit_msgctl_to_event(ar->ar_arg_svipc_cmd); + /* FALLTHROUGH */ + + case AUE_MSGRCV: + case AUE_MSGSND: + tok = au_to_arg32(1, "msg ID", ar->ar_arg_svipc_id); + kau_write(rec, tok); + if (ar->ar_errno != EINVAL) { + tok = au_to_ipc(AT_IPC_MSG, ar->ar_arg_svipc_id); + kau_write(rec, tok); + } + break; + + case AUE_MSGGET: + if (ar->ar_errno == 0) { + if (ARG_IS_VALID(kar, ARG_SVIPC_ID)) { + tok = au_to_ipc(AT_IPC_MSG, + ar->ar_arg_svipc_id); + kau_write(rec, tok); + } + } + break; + + case AUE_OPENAT_RC: + case AUE_OPENAT_RTC: + case AUE_OPENAT_RWC: + case AUE_OPENAT_RWTC: + case AUE_OPENAT_WC: + case AUE_OPENAT_WTC: + if (ARG_IS_VALID(kar, ARG_MODE)) { + tok = au_to_arg32(3, "mode", ar->ar_arg_mode); + kau_write(rec, tok); + } + if (ARG_IS_VALID(kar, ARG_FFLAGS)) { + tok = au_to_arg32(3, "flags", ar->ar_arg_fflags); + kau_write(rec, tok); + } + if (ARG_IS_VALID(kar, ARG_FD)) { + tok = au_to_arg32(1, "dir fd", ar->ar_arg_fd); + kau_write(rec, tok); + } + UPATH1_VNODE1_TOKENS; + break; + + case AUE_OPEN_EXTENDED_RC: + case AUE_OPEN_EXTENDED_RTC: + case AUE_OPEN_EXTENDED_RWC: + case AUE_OPEN_EXTENDED_RWTC: + case AUE_OPEN_EXTENDED_WC: + case AUE_OPEN_EXTENDED_WTC: + EXTENDED_TOKENS(3); + if (ARG_IS_VALID(kar, ARG_FFLAGS)) { + tok = au_to_arg32(2, "flags", ar->ar_arg_fflags); + kau_write(rec, tok); + } + UPATH1_VNODE1_TOKENS; + break; + + case AUE_OPEN_RC: + case AUE_OPEN_RTC: + case AUE_OPEN_RWC: + case AUE_OPEN_RWTC: + case AUE_OPEN_WC: + case AUE_OPEN_WTC: + if (ARG_IS_VALID(kar, ARG_MODE)) { + tok = au_to_arg32(3, "mode", ar->ar_arg_mode); + kau_write(rec, tok); + } + if (ARG_IS_VALID(kar, ARG_FFLAGS)) { + tok = au_to_arg32(2, "flags", ar->ar_arg_fflags); + kau_write(rec, tok); + } + UPATH1_VNODE1_TOKENS; + break; + + case AUE_OPENAT: + case AUE_OPENAT_R: + case AUE_OPENAT_RT: + case AUE_OPENAT_RW: + case AUE_OPENAT_RWT: + case AUE_OPENAT_W: + case AUE_OPENAT_WT: + if (ARG_IS_VALID(kar, ARG_FFLAGS)) { + tok = au_to_arg32(3, "flags", ar->ar_arg_fflags); + kau_write(rec, tok); + } + if (ARG_IS_VALID(kar, ARG_FD)) { + tok = au_to_arg32(1, "dir fd", ar->ar_arg_fd); + kau_write(rec, tok); + } + UPATH1_VNODE1_TOKENS; + break; + + case AUE_OPEN_EXTENDED: + case AUE_OPEN_EXTENDED_R: + case AUE_OPEN_EXTENDED_RT: + case AUE_OPEN_EXTENDED_RW: + case AUE_OPEN_EXTENDED_RWT: + case AUE_OPEN_EXTENDED_W: + case AUE_OPEN_EXTENDED_WT: + EXTENDED_TOKENS(3); + if (ARG_IS_VALID(kar, ARG_FFLAGS)) { + tok = au_to_arg32(2, "flags", ar->ar_arg_fflags); + kau_write(rec, tok); + } + UPATH1_VNODE1_TOKENS; + break; + + case AUE_OPEN: + case AUE_OPEN_R: + case AUE_OPEN_RT: + case AUE_OPEN_RW: + case AUE_OPEN_RWT: + case AUE_OPEN_W: + case AUE_OPEN_WT: + if (ARG_IS_VALID(kar, ARG_FFLAGS)) { + tok = au_to_arg32(2, "flags", ar->ar_arg_fflags); + kau_write(rec, tok); + } + UPATH1_VNODE1_TOKENS; + break; + + case AUE_UNLINKAT: + if (ARG_IS_VALID(kar, ARG_FD)) { + tok = au_to_arg32(1, "dir fd", ar->ar_arg_fd); + kau_write(rec, tok); + } + UPATH1_VNODE1_TOKENS; + break; + + case AUE_PTRACE: + if (ARG_IS_VALID(kar, ARG_CMD)) { + tok = au_to_arg32(1, "request", ar->ar_arg_cmd); + kau_write(rec, tok); + } + if (ARG_IS_VALID(kar, ARG_ADDR64)) { + tok = au_to_arg64(3, "addr", ar->ar_arg_addr); + kau_write(rec, tok); + } else if (ARG_IS_VALID(kar, ARG_ADDR32)) { + tok = au_to_arg32(3, "addr", + (u_int32_t)ar->ar_arg_addr); + kau_write(rec, tok); + } + if (ARG_IS_VALID(kar, ARG_VALUE32)) { + tok = au_to_arg32(4, "data", ar->ar_arg_value32); + kau_write(rec, tok); + } + PROCESS_PID_TOKENS(2); + break; + + case AUE_QUOTACTL: + if (ARG_IS_VALID(kar, ARG_CMD)) { + tok = au_to_arg32(2, "command", ar->ar_arg_cmd); + kau_write(rec, tok); + } + if (ARG_IS_VALID(kar, ARG_UID)) { + tok = au_to_arg32(3, "uid", ar->ar_arg_uid); + kau_write(rec, tok); + } + UPATH1_VNODE1_TOKENS; + break; + + case AUE_REBOOT: + if (ARG_IS_VALID(kar, ARG_CMD)) { + tok = au_to_arg32(1, "howto", ar->ar_arg_cmd); + kau_write(rec, tok); + } + break; + + case AUE_SEMCTL: + ar->ar_event = audit_semctl_to_event(ar->ar_arg_svipc_cmd); + /* FALLTHROUGH */ + + case AUE_SEMOP: + if (ARG_IS_VALID(kar, ARG_SVIPC_ID)) { + tok = au_to_arg32(1, "sem ID", ar->ar_arg_svipc_id); + kau_write(rec, tok); + if (ar->ar_errno != EINVAL) { + tok = au_to_ipc(AT_IPC_SEM, + ar->ar_arg_svipc_id); + kau_write(rec, tok); + } + } + break; + + case AUE_SEMGET: + if (ar->ar_errno == 0) { + if (ARG_IS_VALID(kar, ARG_SVIPC_ID)) { + tok = au_to_ipc(AT_IPC_SEM, + ar->ar_arg_svipc_id); + kau_write(rec, tok); + } + } + break; + + case AUE_SETEGID: + if (ARG_IS_VALID(kar, ARG_EGID)) { + tok = au_to_arg32(1, "gid", ar->ar_arg_egid); + kau_write(rec, tok); + } + break; + + case AUE_SETEUID: + if (ARG_IS_VALID(kar, ARG_EUID)) { + tok = au_to_arg32(1, "uid", ar->ar_arg_euid); + kau_write(rec, tok); + } + break; + + case AUE_SETREGID: + if (ARG_IS_VALID(kar, ARG_RGID)) { + tok = au_to_arg32(1, "rgid", ar->ar_arg_rgid); + kau_write(rec, tok); + } + if (ARG_IS_VALID(kar, ARG_EGID)) { + tok = au_to_arg32(2, "egid", ar->ar_arg_egid); + kau_write(rec, tok); + } + break; + + case AUE_SETREUID: + if (ARG_IS_VALID(kar, ARG_RUID)) { + tok = au_to_arg32(1, "ruid", ar->ar_arg_ruid); + kau_write(rec, tok); + } + if (ARG_IS_VALID(kar, ARG_EUID)) { + tok = au_to_arg32(2, "euid", ar->ar_arg_euid); + kau_write(rec, tok); + } + break; + + case AUE_SETGID: + if (ARG_IS_VALID(kar, ARG_GID)) { + tok = au_to_arg32(1, "gid", ar->ar_arg_gid); + kau_write(rec, tok); + } + break; + + case AUE_SETUID: + if (ARG_IS_VALID(kar, ARG_UID)) { + tok = au_to_arg32(1, "uid", ar->ar_arg_uid); + kau_write(rec, tok); + } + break; + + case AUE_SETGROUPS: + if (ARG_IS_VALID(kar, ARG_GROUPSET)) { + for (uctr = 0; uctr < ar->ar_arg_groups.gidset_size; + uctr++) { + tok = au_to_arg32(1, "setgroups", + ar->ar_arg_groups.gidset[uctr]); + kau_write(rec, tok); + } + } + break; + + case AUE_SETLOGIN: + if (ARG_IS_VALID(kar, ARG_TEXT)) { + tok = au_to_text(ar->ar_arg_text); + kau_write(rec, tok); + } + break; + + case AUE_SETPRIORITY: + if (ARG_IS_VALID(kar, ARG_CMD)) { + tok = au_to_arg32(1, "which", ar->ar_arg_cmd); + kau_write(rec, tok); + } + if (ARG_IS_VALID(kar, ARG_UID)) { + tok = au_to_arg32(2, "who", ar->ar_arg_uid); + kau_write(rec, tok); + } + if (ARG_IS_VALID(kar, ARG_VALUE32)) { + tok = au_to_arg32(2, "priority", ar->ar_arg_value32); + kau_write(rec, tok); + } + break; + + case AUE_SETPRIVEXEC: + if (ARG_IS_VALID(kar, ARG_VALUE32)) { + tok = au_to_arg32(1, "flag", ar->ar_arg_value32); + kau_write(rec, tok); + } + break; + + /* AUE_SHMAT, AUE_SHMCTL, AUE_SHMDT and AUE_SHMGET are SysV IPC */ + case AUE_SHMAT: + if (ARG_IS_VALID(kar, ARG_SVIPC_ID)) { + tok = au_to_arg32(1, "shmid", ar->ar_arg_svipc_id); + kau_write(rec, tok); + /* XXXAUDIT: Does having the ipc token make sense? */ + tok = au_to_ipc(AT_IPC_SHM, ar->ar_arg_svipc_id); + kau_write(rec, tok); + } + if (ARG_IS_VALID(kar, ARG_SVIPC_ADDR)) { + tok = au_to_arg64(2, "shmaddr", ar->ar_arg_svipc_addr); + kau_write(rec, tok); + } + if (ARG_IS_VALID(kar, ARG_SVIPC_PERM)) { + tok = au_to_ipc_perm(&ar->ar_arg_svipc_perm); + kau_write(rec, tok); + } + break; + + case AUE_SHMCTL: + if (ARG_IS_VALID(kar, ARG_SVIPC_ID)) { + tok = au_to_arg32(1, "shmid", ar->ar_arg_svipc_id); + kau_write(rec, tok); + /* XXXAUDIT: Does having the ipc token make sense? */ + tok = au_to_ipc(AT_IPC_SHM, ar->ar_arg_svipc_id); + kau_write(rec, tok); + } + switch (ar->ar_arg_svipc_cmd) { + case IPC_STAT: + ar->ar_event = AUE_SHMCTL_STAT; + break; + case IPC_RMID: + ar->ar_event = AUE_SHMCTL_RMID; + break; + case IPC_SET: + ar->ar_event = AUE_SHMCTL_SET; + if (ARG_IS_VALID(kar, ARG_SVIPC_PERM)) { + tok = au_to_ipc_perm(&ar->ar_arg_svipc_perm); + kau_write(rec, tok); + } + break; + default: + break; /* We will audit a bad command */ + } + break; + + case AUE_SHMDT: + if (ARG_IS_VALID(kar, ARG_SVIPC_ADDR)) { + tok = au_to_arg64(1, "shmaddr", + (int)(uintptr_t)ar->ar_arg_svipc_addr); + kau_write(rec, tok); + } + break; + + case AUE_SHMGET: + /* This is unusual; the return value is in an argument token */ + if (ARG_IS_VALID(kar, ARG_SVIPC_ID)) { + tok = au_to_arg32(0, "shmid", ar->ar_arg_svipc_id); + kau_write(rec, tok); + tok = au_to_ipc(AT_IPC_SHM, ar->ar_arg_svipc_id); + kau_write(rec, tok); + } + if (ARG_IS_VALID(kar, ARG_SVIPC_PERM)) { + tok = au_to_ipc_perm(&ar->ar_arg_svipc_perm); + kau_write(rec, tok); + } + break; + + /* AUE_SHMOPEN, AUE_SHMUNLINK, AUE_SEMOPEN, AUE_SEMCLOSE + * and AUE_SEMUNLINK are Posix IPC */ + case AUE_SHMOPEN: + if (ARG_IS_VALID(kar, ARG_SVIPC_ADDR)) { + tok = au_to_arg32(2, "flags", ar->ar_arg_fflags); + kau_write(rec, tok); + } + if (ARG_IS_VALID(kar, ARG_MODE)) { + tok = au_to_arg32(3, "mode", ar->ar_arg_mode); + kau_write(rec, tok); + } + /* FALLTHROUGH */ + + case AUE_SHMUNLINK: + if (ARG_IS_VALID(kar, ARG_TEXT)) { + tok = au_to_text(ar->ar_arg_text); + kau_write(rec, tok); + } + if (ARG_IS_VALID(kar, ARG_POSIX_IPC_PERM)) { + struct ipc_perm perm; + + perm.uid = ar->ar_arg_pipc_perm.pipc_uid; + perm.gid = ar->ar_arg_pipc_perm.pipc_gid; + perm.cuid = ar->ar_arg_pipc_perm.pipc_uid; + perm.cgid = ar->ar_arg_pipc_perm.pipc_gid; + perm.mode = ar->ar_arg_pipc_perm.pipc_mode; + perm._seq = 0; + perm._key = 0; + tok = au_to_ipc_perm(&perm); + kau_write(rec, tok); + } + break; + + case AUE_SEMOPEN: + if (ARG_IS_VALID(kar, ARG_FFLAGS)) { + tok = au_to_arg32(2, "flags", ar->ar_arg_fflags); + kau_write(rec, tok); + } + if (ARG_IS_VALID(kar, ARG_MODE)) { + tok = au_to_arg32(3, "mode", ar->ar_arg_mode); + kau_write(rec, tok); + } + if (ARG_IS_VALID(kar, ARG_VALUE32)) { + tok = au_to_arg32(4, "value", ar->ar_arg_value32); + kau_write(rec, tok); + } + /* FALLTHROUGH */ + + case AUE_SEMUNLINK: + if (ARG_IS_VALID(kar, ARG_TEXT)) { + tok = au_to_text(ar->ar_arg_text); + kau_write(rec, tok); + } + if (ARG_IS_VALID(kar, ARG_POSIX_IPC_PERM)) { + struct ipc_perm perm; + + perm.uid = ar->ar_arg_pipc_perm.pipc_uid; + perm.gid = ar->ar_arg_pipc_perm.pipc_gid; + perm.cuid = ar->ar_arg_pipc_perm.pipc_uid; + perm.cgid = ar->ar_arg_pipc_perm.pipc_gid; + perm.mode = ar->ar_arg_pipc_perm.pipc_mode; + perm._seq = 0; + perm._key = 0; + tok = au_to_ipc_perm(&perm); + kau_write(rec, tok); + } + break; + + case AUE_SEMCLOSE: + if (ARG_IS_VALID(kar, ARG_FD)) { + tok = au_to_arg32(1, "sem", ar->ar_arg_fd); + kau_write(rec, tok); + } + break; + + case AUE_SYMLINK: + if (ARG_IS_VALID(kar, ARG_TEXT)) { + tok = au_to_text(ar->ar_arg_text); + kau_write(rec, tok); + } + UPATH1_VNODE1_TOKENS; + break; + + case AUE_SYSCTL: + case AUE_SYSCTL_NONADMIN: + if (ARG_IS_VALID(kar, ARG_CTLNAME | ARG_LEN)) { + for (ctr = 0; ctr < (int)ar->ar_arg_len; ctr++) { + tok = au_to_arg32(1, "name", + ar->ar_arg_ctlname[ctr]); + kau_write(rec, tok); + } + } + if (ARG_IS_VALID(kar, ARG_VALUE32)) { + tok = au_to_arg32(5, "newval", ar->ar_arg_value32); + kau_write(rec, tok); + } + if (ARG_IS_VALID(kar, ARG_TEXT)) { + tok = au_to_text(ar->ar_arg_text); + kau_write(rec, tok); + } + break; + + case AUE_UMASK_EXTENDED: + /* ACL data */ + if (ARG_IS_VALID(kar, ARG_OPAQUE)) { + tok = au_to_opaque(ar->ar_arg_opaque, + ar->ar_arg_opq_size); + kau_write(rec, tok); + } + /* FALLTHROUGH */ + + case AUE_UMASK: + if (ARG_IS_VALID(kar, ARG_MASK)) { + tok = au_to_arg32(1, "new mask", ar->ar_arg_mask); + kau_write(rec, tok); + } + tok = au_to_arg32(0, "prev mask", ar->ar_retval); + kau_write(rec, tok); + break; + + case AUE_WAIT4: +#if 0 /* XXXss - new */ + case AUE_WAITID: +#endif + if (ARG_IS_VALID(kar, ARG_PID)) { + tok = au_to_arg32(0, "pid", ar->ar_arg_pid); + kau_write(rec, tok); + } + break; + + case AUE_FSGETPATH: + if (ARG_IS_VALID(kar, ARG_VALUE32)) { + tok = au_to_arg32(3, "volfsid", ar->ar_arg_value32); + kau_write(rec, tok); + } + if (ARG_IS_VALID(kar, ARG_VALUE64)) { + tok = au_to_arg64(4, "objid", ar->ar_arg_value64); + kau_write(rec, tok); + } + if (ARG_IS_VALID(kar, ARG_TEXT)) { + tok = au_to_text(ar->ar_arg_text); + kau_write(rec, tok); + } + break; + + /************************ + * Mach system calls * + ************************/ + case AUE_INITPROCESS: + break; + + case AUE_PIDFORTASK: + if (ARG_IS_VALID(kar, ARG_MACHPORT1)) { + tok = au_to_arg32(1, "port", + (u_int32_t)ar->ar_arg_mach_port1); + kau_write(rec, tok); + } + if (ARG_IS_VALID(kar, ARG_PID)) { + tok = au_to_arg32(2, "pid", (u_int32_t)ar->ar_arg_pid); + kau_write(rec, tok); + } + break; + + case AUE_TASKFORPID: + case AUE_TASKNAMEFORPID: + if (ARG_IS_VALID(kar, ARG_MACHPORT1)) { + tok = au_to_arg32(1, "target port", + (u_int32_t)ar->ar_arg_mach_port1); + kau_write(rec, tok); + } + if (ARG_IS_VALID(kar, ARG_MACHPORT2)) { + tok = au_to_arg32(3, "task port", + (u_int32_t)ar->ar_arg_mach_port2); + kau_write(rec, tok); + } + PROCESS_PID_TOKENS(2); + break; + + case AUE_SWAPON: + if (ARG_IS_VALID(kar, ARG_VALUE32)) { + tok = au_to_arg32(4, "priority", + (u_int32_t)ar->ar_arg_value32); + kau_write(rec, tok); + } + UPATH1_VNODE1_TOKENS; + break; + + case AUE_SWAPOFF: + UPATH1_VNODE1_TOKENS; + break; + + case AUE_MAPFD: + if (ARG_IS_VALID(kar, ARG_ADDR64)) { + tok = au_to_arg64(3, "va", ar->ar_arg_addr); + kau_write(rec, tok); + } else if (ARG_IS_VALID(kar, ARG_ADDR32)) { + tok = au_to_arg32(3, "va", + (u_int32_t)ar->ar_arg_addr); + kau_write(rec, tok); + } + FD_VNODE1_TOKENS; + break; + +#if CONFIG_MACF + case AUE_MAC_GET_FILE: + case AUE_MAC_SET_FILE: + case AUE_MAC_GET_LINK: + case AUE_MAC_SET_LINK: + case AUE_MAC_GET_MOUNT: + UPATH1_VNODE1_TOKENS; + PROCESS_MAC_TOKENS; + break; + + case AUE_MAC_GET_FD: + case AUE_MAC_SET_FD: + FD_VNODE1_TOKENS; + PROCESS_MAC_TOKENS; + break; + + case AUE_MAC_SYSCALL: + PROCESS_MAC_TOKENS; + if (ARG_IS_VALID(kar, ARG_VALUE32)) { + tok = au_to_arg32(3, "call", ar->ar_arg_value32); + kau_write(rec, tok); + } + break; + + case AUE_MAC_EXECVE: + UPATH1_VNODE1_TOKENS; + PROCESS_MAC_TOKENS; + break; + + case AUE_MAC_GET_PID: + if (ARG_IS_VALID(kar, ARG_PID)) { + tok = au_to_arg32(1, "pid", (u_int32_t)ar->ar_arg_pid); + kau_write(rec, tok); + } + PROCESS_MAC_TOKENS; + break; + + case AUE_MAC_GET_LCID: + if (ARG_IS_VALID(kar, ARG_VALUE32)) { + tok = au_to_arg32(1, "lcid", + (u_int32_t)ar->ar_arg_value32); + kau_write(rec, tok); + } + PROCESS_MAC_TOKENS; + break; + + case AUE_MAC_GET_PROC: + case AUE_MAC_SET_PROC: + case AUE_MAC_GET_LCTX: + case AUE_MAC_SET_LCTX: + PROCESS_MAC_TOKENS; + break; +#endif + case AUE_NULL: + default: +#if DIAGNOSTIC + printf("BSM conversion requested for unknown event %d\n", + ar->ar_event); +#endif + + /* + * Write the subject token so it is properly freed here. + */ + kau_write(rec, subj_tok); + kau_free(rec); + return (BSM_NOAUDIT); + } + +#if CONFIG_MACF + do { + /* Convert the audit data from the MAC policies */ + struct mac_audit_record *mar; + + LIST_FOREACH(mar, ar->ar_mac_records, records) { + switch (mar->type) { + case MAC_AUDIT_DATA_TYPE: + tok = au_to_data(AUP_BINARY, AUR_BYTE, + mar->length, + (const char *)mar->data); + break; + case MAC_AUDIT_TEXT_TYPE: + tok = au_to_text((char*) mar->data); + break; + default: + /* + * XXX: we can either continue, + * skipping this particular entry, + * or we can pre-verify the list and + * abort before writing any records + */ + printf("kaudit_to_bsm(): " + "BSM conversion requested for" + "unknown mac_audit data type %d\n", + mar->type); + } + + kau_write(rec, tok); + } + } while (0); +#endif + + kau_write(rec, subj_tok); + +#if CONFIG_MACF + if (ar->ar_cred_mac_labels != NULL && + strlen(ar->ar_cred_mac_labels) != 0) { + tok = au_to_text(ar->ar_cred_mac_labels); + kau_write(rec, tok); + } +#endif + + tok = au_to_return32(au_errno_to_bsm(ar->ar_errno), ar->ar_retval); + kau_write(rec, tok); /* Every record gets a return token */ + + kau_close(rec, &ar->ar_endtime, ar->ar_event); + + *pau = rec; + return (BSM_SUCCESS); +} + +/* + * Verify that a record is a valid BSM record. This verification is simple + * now, but may be expanded on sometime in the future. Return 1 if the + * record is good, 0 otherwise. + */ +int +bsm_rec_verify(void *rec) +{ + char c = *(char *)rec; + + /* + * Check the token ID of the first token; it has to be a header + * token. + * + * XXXAUDIT There needs to be a token structure to map a token. + * XXXAUDIT 'Shouldn't be simply looking at the first char. + */ + if ((c != AUT_HEADER32) && (c != AUT_HEADER32_EX) && + (c != AUT_HEADER64) && (c != AUT_HEADER64_EX)) + return (0); + return (1); +} +#endif /* CONFIG_AUDIT */ diff --git a/bsd/security/audit/audit_bsm_domain.c b/bsd/security/audit/audit_bsm_domain.c new file mode 100644 index 000000000..998f65050 --- /dev/null +++ b/bsd/security/audit/audit_bsm_domain.c @@ -0,0 +1,500 @@ +/*- + * Copyright (c) 2008-2009 Apple Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of Apple Inc. ("Apple") nor the names of + * its contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY APPLE AND ITS CONTRIBUTORS "AS IS" AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL APPLE OR ITS CONTRIBUTORS BE LIABLE FOR + * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING + * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + */ + +#include +#include + +#include + +#include +#include + +#if CONFIG_AUDIT +struct bsm_domain { + u_short bd_bsm_domain; + int bd_local_domain; +}; + +#define PF_NO_LOCAL_MAPPING -600 + +static const struct bsm_domain bsm_domains[] = { + { BSM_PF_UNSPEC, PF_UNSPEC }, + { BSM_PF_LOCAL, PF_LOCAL }, + { BSM_PF_INET, PF_INET }, + { BSM_PF_IMPLINK, +#ifdef PF_IMPLINK + PF_IMPLINK +#else + PF_NO_LOCAL_MAPPING +#endif + }, + { BSM_PF_PUP, +#ifdef PF_PUP + PF_PUP +#else + PF_NO_LOCAL_MAPPING +#endif + }, + { BSM_PF_CHAOS, +#ifdef PF_CHAOS + PF_CHAOS +#else + PF_NO_LOCAL_MAPPING +#endif + }, + { BSM_PF_NS, +#ifdef PF_NS + PF_NS +#else + PF_NO_LOCAL_MAPPING +#endif + }, + { BSM_PF_NBS, +#ifdef PF_NBS + PF_NBS +#else + PF_NO_LOCAL_MAPPING +#endif + }, + { BSM_PF_ECMA, +#ifdef PF_ECMA + PF_ECMA +#else + PF_NO_LOCAL_MAPPING +#endif + }, + { BSM_PF_DATAKIT, +#ifdef PF_DATAKIT + PF_DATAKIT +#else + PF_NO_LOCAL_MAPPING +#endif + }, + { BSM_PF_CCITT, +#ifdef PF_CCITT + PF_CCITT +#else + PF_NO_LOCAL_MAPPING +#endif + }, + { BSM_PF_SNA, PF_SNA }, + { BSM_PF_DECnet, PF_DECnet }, + { BSM_PF_DLI, +#ifdef PF_DLI + PF_DLI +#else + PF_NO_LOCAL_MAPPING +#endif + }, + { BSM_PF_LAT, +#ifdef PF_LAT + PF_LAT +#else + PF_NO_LOCAL_MAPPING +#endif + }, + { BSM_PF_HYLINK, +#ifdef PF_HYLINK + PF_HYLINK +#else + PF_NO_LOCAL_MAPPING +#endif + }, + { BSM_PF_APPLETALK, PF_APPLETALK }, + { BSM_PF_NIT, +#ifdef PF_NIT + PF_NIT +#else + PF_NO_LOCAL_MAPPING +#endif + }, + { BSM_PF_802, +#ifdef PF_802 + PF_802 +#else + PF_NO_LOCAL_MAPPING +#endif + }, + { BSM_PF_OSI, +#ifdef PF_OSI + PF_OSI +#else + PF_NO_LOCAL_MAPPING +#endif + }, + { BSM_PF_X25, +#ifdef PF_X25 + PF_X25 +#else + PF_NO_LOCAL_MAPPING +#endif + }, + { BSM_PF_OSINET, +#ifdef PF_OSINET + PF_OSINET +#else + PF_NO_LOCAL_MAPPING +#endif + }, + { BSM_PF_GOSIP, +#ifdef PF_GOSIP + PF_GOSIP +#else + PF_NO_LOCAL_MAPPING +#endif + }, + { BSM_PF_IPX, PF_IPX }, + { BSM_PF_ROUTE, PF_ROUTE }, + { BSM_PF_LINK, +#ifdef PF_LINK + PF_LINK +#else + PF_NO_LOCAL_MAPPING +#endif + }, + { BSM_PF_INET6, PF_INET6 }, + { BSM_PF_KEY, PF_KEY }, + { BSM_PF_NCA, +#ifdef PF_NCA + PF_NCA +#else + PF_NO_LOCAL_MAPPING +#endif + }, + { BSM_PF_POLICY, +#ifdef PF_POLICY + PF_POLICY +#else + PF_NO_LOCAL_MAPPING +#endif + }, + { BSM_PF_INET_OFFLOAD, +#ifdef PF_INET_OFFLOAD + PF_INET_OFFLOAD +#else + PF_NO_LOCAL_MAPPING +#endif + }, + { BSM_PF_NETBIOS, +#ifdef PF_NETBIOS + PF_NETBIOS +#else + PF_NO_LOCAL_MAPPING +#endif + }, + { BSM_PF_ISO, +#ifdef PF_ISO + PF_ISO +#else + PF_NO_LOCAL_MAPPING +#endif + }, + { BSM_PF_XTP, +#ifdef PF_XTP + PF_XTP +#else + PF_NO_LOCAL_MAPPING +#endif + }, + { BSM_PF_COIP, +#ifdef PF_COIP + PF_COIP +#else + PF_NO_LOCAL_MAPPING +#endif + }, + { BSM_PF_CNT, +#ifdef PF_CNT + PF_CNT +#else + PF_NO_LOCAL_MAPPING +#endif + }, + { BSM_PF_RTIP, +#ifdef PF_RTIP + PF_RTIP +#else + PF_NO_LOCAL_MAPPING +#endif + }, + { BSM_PF_SIP, +#ifdef PF_SIP + PF_SIP +#else + PF_NO_LOCAL_MAPPING +#endif + }, + { BSM_PF_PIP, +#ifdef PF_PIP + PF_PIP +#else + PF_NO_LOCAL_MAPPING +#endif + }, + { BSM_PF_ISDN, +#ifdef PF_ISDN + PF_ISDN +#else + PF_NO_LOCAL_MAPPING +#endif + }, + { BSM_PF_E164, +#ifdef PF_E164 + PF_E164 +#else + PF_NO_LOCAL_MAPPING +#endif + }, + { BSM_PF_NATM, +#ifdef PF_NATM + PF_NATM +#else + PF_NO_LOCAL_MAPPING +#endif + }, + { BSM_PF_ATM, +#ifdef PF_ATM + PF_ATM +#else + PF_NO_LOCAL_MAPPING +#endif + }, + { BSM_PF_NETGRAPH, +#ifdef PF_NETGRAPH + PF_NETGRAPH +#else + PF_NO_LOCAL_MAPPING +#endif + }, + { BSM_PF_SLOW, +#ifdef PF_SLOW + PF_SLOW +#else + PF_NO_LOCAL_MAPPING +#endif + }, + { BSM_PF_SCLUSTER, +#ifdef PF_SCLUSTER + PF_SCLUSTER +#else + PF_NO_LOCAL_MAPPING +#endif + }, + { BSM_PF_ARP, +#ifdef PF_ARP + PF_ARP +#else + PF_NO_LOCAL_MAPPING +#endif + }, + { BSM_PF_BLUETOOTH, +#ifdef PF_BLUETOOTH + PF_BLUETOOTH +#else + PF_NO_LOCAL_MAPPING +#endif + }, + { BSM_PF_IEEE80211, +#ifdef PF_IEEE80211 + PF_IEEE80211 +#else + PF_NO_LOCAL_MAPPING +#endif + }, + { BSM_PF_AX25, +#ifdef PF_AX25 + PF_AX25 +#else + PF_NO_LOCAL_MAPPING +#endif + }, + { BSM_PF_ROSE, +#ifdef PF_ROSE + PF_ROSE +#else + PF_NO_LOCAL_MAPPING +#endif + }, + { BSM_PF_NETBEUI, +#ifdef PF_NETBEUI + PF_NETBEUI +#else + PF_NO_LOCAL_MAPPING +#endif + }, + { BSM_PF_SECURITY, +#ifdef PF_SECURITY + PF_SECURITY +#else + PF_NO_LOCAL_MAPPING +#endif + }, + { BSM_PF_PACKET, +#ifdef PF_PACKET + PF_PACKET +#else + PF_NO_LOCAL_MAPPING +#endif + }, + { BSM_PF_ASH, +#ifdef PF_ASH + PF_ASH +#else + PF_NO_LOCAL_MAPPING +#endif + }, + { BSM_PF_ECONET, +#ifdef PF_ECONET + PF_ECONET +#else + PF_NO_LOCAL_MAPPING +#endif + }, + { BSM_PF_ATMSVC, +#ifdef PF_ATMSVC + PF_ATMSVC +#else + PF_NO_LOCAL_MAPPING +#endif + }, + { BSM_PF_IRDA, +#ifdef PF_IRDA + PF_IRDA +#else + PF_NO_LOCAL_MAPPING +#endif + }, + { BSM_PF_PPPOX, +#ifdef PF_PPPOX + PF_PPPOX +#else + PF_NO_LOCAL_MAPPING +#endif + }, + { BSM_PF_WANPIPE, +#ifdef PF_WANPIPE + PF_WANPIPE +#else + PF_NO_LOCAL_MAPPING +#endif + }, + { BSM_PF_LLC, +#ifdef PF_LLC + PF_LLC +#else + PF_NO_LOCAL_MAPPING +#endif + }, + { BSM_PF_CAN, +#ifdef PF_CAN + PF_CAN +#else + PF_NO_LOCAL_MAPPING +#endif + }, + { BSM_PF_TIPC, +#ifdef PF_TIPC + PF_TIPC +#else + PF_NO_LOCAL_MAPPING +#endif + }, + { BSM_PF_IUCV, +#ifdef PF_IUCV + PF_IUCV +#else + PF_NO_LOCAL_MAPPING +#endif + }, + { BSM_PF_RXRPC, +#ifdef PF_RXRPC + PF_RXRPC +#else + PF_NO_LOCAL_MAPPING +#endif + }, + { BSM_PF_PHONET, +#ifdef PF_PHONET + PF_PHONET +#else + PF_NO_LOCAL_MAPPING +#endif + }, +}; +static const int bsm_domains_count = sizeof(bsm_domains) / + sizeof(bsm_domains[0]); + +static const struct bsm_domain * +bsm_lookup_local_domain(int local_domain) +{ + int i; + + for (i = 0; i < bsm_domains_count; i++) { + if (bsm_domains[i].bd_local_domain == local_domain) + return (&bsm_domains[i]); + } + return (NULL); +} + +u_short +au_domain_to_bsm(int local_domain) +{ + const struct bsm_domain *bstp; + + bstp = bsm_lookup_local_domain(local_domain); + if (bstp == NULL) + return (BSM_PF_UNKNOWN); + return (bstp->bd_bsm_domain); +} + +static const struct bsm_domain * +bsm_lookup_bsm_domain(u_short bsm_domain) +{ + int i; + + for (i = 0; i < bsm_domains_count; i++) { + if (bsm_domains[i].bd_bsm_domain == bsm_domain) + return (&bsm_domains[i]); + } + return (NULL); +} + +int +au_bsm_to_domain(u_short bsm_domain, int *local_domainp) +{ + const struct bsm_domain *bstp; + + bstp = bsm_lookup_bsm_domain(bsm_domain); + if (bstp == NULL || bstp->bd_local_domain) + return (-1); + *local_domainp = bstp->bd_local_domain; + return (0); +} +#endif /* CONFIG_AUDIT */ diff --git a/bsd/security/audit/audit_bsm_errno.c b/bsd/security/audit/audit_bsm_errno.c new file mode 100644 index 000000000..fe24ed2a6 --- /dev/null +++ b/bsd/security/audit/audit_bsm_errno.c @@ -0,0 +1,661 @@ +/*- + * Copyright (c) 2008-2009 Apple Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of Apple Inc. ("Apple") nor the names of + * its contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY APPLE AND ITS CONTRIBUTORS "AS IS" AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL APPLE OR ITS CONTRIBUTORS BE LIABLE FOR + * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING + * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + */ + +#include + +#include + +#include +#include + +#include + +#if CONFIG_AUDIT +/* + * Different operating systems use different numeric constants for different + * error numbers, and sometimes error numbers don't exist in more than one + * operating system. These routines convert between BSM and local error + * number spaces, subject to the above realities. BSM error numbers are + * stored in a single 8-bit character, so don't have a byte order. + * + * Don't include string definitions when this code is compiled into a kernel. + */ +struct bsm_errno { + int be_bsm_errno; + int be_local_errno; +#if !defined(KERNEL) && !defined(_KERNEL) + const char *be_strerror; +#endif +}; + +#define ERRNO_NO_LOCAL_MAPPING -600 + +#if !defined(KERNEL) && !defined(_KERNEL) +#define ES(x) x +#else +#define ES(x) +#endif + +/* + * Mapping table -- please maintain in numeric sorted order with respect to + * the BSM constant. Today we do a linear lookup, but could switch to a + * binary search if it makes sense. We only ifdef errors that aren't + * generally available, but it does make the table a lot more ugly. + * + * XXXRW: It would be nice to have a similar ordered table mapping to BSM + * constant from local constant, but the order of local constants varies by + * OS. Really we need to build that table at compile-time but don't do that + * yet. + * + * XXXRW: We currently embed English-language error strings here, but should + * support catalogues; these are only used if the OS doesn't have an error + * string using strerror(3). + */ +static const struct bsm_errno bsm_errnos[] = { + { BSM_ERRNO_ESUCCESS, 0, ES("Success") }, + { BSM_ERRNO_EPERM, EPERM, ES("Operation not permitted") }, + { BSM_ERRNO_ENOENT, ENOENT, ES("No such file or directory") }, + { BSM_ERRNO_ESRCH, ESRCH, ES("No such process") }, + { BSM_ERRNO_EINTR, EINTR, ES("Interrupted system call") }, + { BSM_ERRNO_EIO, EIO, ES("Input/output error") }, + { BSM_ERRNO_ENXIO, ENXIO, ES("Device not configured") }, + { BSM_ERRNO_E2BIG, E2BIG, ES("Argument list too long") }, + { BSM_ERRNO_ENOEXEC, ENOEXEC, ES("Exec format error") }, + { BSM_ERRNO_EBADF, EBADF, ES("Bad file descriptor") }, + { BSM_ERRNO_ECHILD, ECHILD, ES("No child processes") }, + { BSM_ERRNO_EAGAIN, EAGAIN, ES("Resource temporarily unavailable") }, + { BSM_ERRNO_ENOMEM, ENOMEM, ES("Cannot allocate memory") }, + { BSM_ERRNO_EACCES, EACCES, ES("Permission denied") }, + { BSM_ERRNO_EFAULT, EFAULT, ES("Bad address") }, + { BSM_ERRNO_ENOTBLK, ENOTBLK, ES("Block device required") }, + { BSM_ERRNO_EBUSY, EBUSY, ES("Device busy") }, + { BSM_ERRNO_EEXIST, EEXIST, ES("File exists") }, + { BSM_ERRNO_EXDEV, EXDEV, ES("Cross-device link") }, + { BSM_ERRNO_ENODEV, ENODEV, ES("Operation not supported by device") }, + { BSM_ERRNO_ENOTDIR, ENOTDIR, ES("Not a directory") }, + { BSM_ERRNO_EISDIR, EISDIR, ES("Is a directory") }, + { BSM_ERRNO_EINVAL, EINVAL, ES("Invalid argument") }, + { BSM_ERRNO_ENFILE, ENFILE, ES("Too many open files in system") }, + { BSM_ERRNO_EMFILE, EMFILE, ES("Too many open files") }, + { BSM_ERRNO_ENOTTY, ENOTTY, ES("Inappropriate ioctl for device") }, + { BSM_ERRNO_ETXTBSY, ETXTBSY, ES("Text file busy") }, + { BSM_ERRNO_EFBIG, EFBIG, ES("File too large") }, + { BSM_ERRNO_ENOSPC, ENOSPC, ES("No space left on device") }, + { BSM_ERRNO_ESPIPE, ESPIPE, ES("Illegal seek") }, + { BSM_ERRNO_EROFS, EROFS, ES("Read-only file system") }, + { BSM_ERRNO_EMLINK, EMLINK, ES("Too many links") }, + { BSM_ERRNO_EPIPE, EPIPE, ES("Broken pipe") }, + { BSM_ERRNO_EDOM, EDOM, ES("Numerical argument out of domain") }, + { BSM_ERRNO_ERANGE, ERANGE, ES("Result too large") }, + { BSM_ERRNO_ENOMSG, ENOMSG, ES("No message of desired type") }, + { BSM_ERRNO_EIDRM, EIDRM, ES("Identifier removed") }, + { BSM_ERRNO_ECHRNG, +#ifdef ECHRNG + ECHRNG, +#else + ERRNO_NO_LOCAL_MAPPING, +#endif + ES("Channel number out of range") }, + { BSM_ERRNO_EL2NSYNC, +#ifdef EL2NSYNC + EL2NSYNC, +#else + ERRNO_NO_LOCAL_MAPPING, +#endif + ES("Level 2 not synchronized") }, + { BSM_ERRNO_EL3HLT, +#ifdef EL3HLT + EL3HLT, +#else + ERRNO_NO_LOCAL_MAPPING, +#endif + ES("Level 3 halted") }, + { BSM_ERRNO_EL3RST, +#ifdef EL3RST + EL3RST, +#else + ERRNO_NO_LOCAL_MAPPING, +#endif + ES("Level 3 reset") }, + { BSM_ERRNO_ELNRNG, +#ifdef ELNRNG + ELNRNG, +#else + ERRNO_NO_LOCAL_MAPPING, +#endif + ES("Link number out of range") }, + { BSM_ERRNO_EUNATCH, +#ifdef EUNATCH + EUNATCH, +#else + ERRNO_NO_LOCAL_MAPPING, +#endif + ES("Protocol driver not attached") }, + { BSM_ERRNO_ENOCSI, +#ifdef ENOCSI + ENOCSI, +#else + ERRNO_NO_LOCAL_MAPPING, +#endif + ES("No CSI structure available") }, + { BSM_ERRNO_EL2HLT, +#ifdef EL2HLT + EL2HLT, +#else + ERRNO_NO_LOCAL_MAPPING, +#endif + ES("Level 2 halted") }, + { BSM_ERRNO_EDEADLK, EDEADLK, ES("Resource deadlock avoided") }, + { BSM_ERRNO_ENOLCK, ENOLCK, ES("No locks available") }, + { BSM_ERRNO_ECANCELED, ECANCELED, ES("Operation canceled") }, + { BSM_ERRNO_ENOTSUP, ENOTSUP, ES("Operation not supported") }, + { BSM_ERRNO_EDQUOT, EDQUOT, ES("Disc quota exceeded") }, + { BSM_ERRNO_EBADE, +#ifdef EBADE + EBADE, +#else + ERRNO_NO_LOCAL_MAPPING, +#endif + ES("Invalid exchange") }, + { BSM_ERRNO_EBADR, +#ifdef EBADR + EBADR, +#else + ERRNO_NO_LOCAL_MAPPING, +#endif + ES("Invalid request descriptor") }, + { BSM_ERRNO_EXFULL, +#ifdef EXFULL + EXFULL, +#else + ERRNO_NO_LOCAL_MAPPING, +#endif + ES("Exchange full") }, + { BSM_ERRNO_ENOANO, +#ifdef ENOANO + ENOANO, +#else + ERRNO_NO_LOCAL_MAPPING, +#endif + ES("No anode") }, + { BSM_ERRNO_EBADRQC, +#ifdef EBADRQC + EBADRQC, +#else + ERRNO_NO_LOCAL_MAPPING, +#endif + ES("Invalid request descriptor") }, + { BSM_ERRNO_EBADSLT, +#ifdef EBADSLT + EBADSLT, +#else + ERRNO_NO_LOCAL_MAPPING, +#endif + ES("Invalid slot") }, + { BSM_ERRNO_EDEADLOCK, +#ifdef EDEADLOCK + EDEADLOCK, +#else + ERRNO_NO_LOCAL_MAPPING, +#endif + ES("Resource deadlock avoided") }, + { BSM_ERRNO_EBFONT, +#ifdef EBFONT + EBFONT, +#else + ERRNO_NO_LOCAL_MAPPING, +#endif + ES("Bad font file format") }, + { BSM_ERRNO_EOWNERDEAD, +#ifdef EOWNERDEAD + EOWNERDEAD, +#else + ERRNO_NO_LOCAL_MAPPING, +#endif + ES("Process died with the lock") }, + { BSM_ERRNO_ENOTRECOVERABLE, +#ifdef ENOTRECOVERABLE + ENOTRECOVERABLE, +#else + ERRNO_NO_LOCAL_MAPPING, +#endif + ES("Lock is not recoverable") }, + { BSM_ERRNO_ENOSTR, +#ifdef ENOSTR + ENOSTR, +#else + ERRNO_NO_LOCAL_MAPPING, +#endif + ES("Device not a stream") }, + { BSM_ERRNO_ENONET, +#ifdef ENONET + ENONET, +#else + ERRNO_NO_LOCAL_MAPPING, +#endif + ES("Machine is not on the network") }, + { BSM_ERRNO_ENOPKG, +#ifdef ENOPKG + ENOPKG, +#else + ERRNO_NO_LOCAL_MAPPING, +#endif + ES("Package not installed") }, + { BSM_ERRNO_EREMOTE, EREMOTE, + ES("Too many levels of remote in path") }, + { BSM_ERRNO_ENOLINK, +#ifdef ENOLINK + ENOLINK, +#else + ERRNO_NO_LOCAL_MAPPING, +#endif + ES("Link has been severed") }, + { BSM_ERRNO_EADV, +#ifdef EADV + EADV, +#else + ERRNO_NO_LOCAL_MAPPING, +#endif + ES("Advertise error") }, + { BSM_ERRNO_ESRMNT, +#ifdef ESRMNT + ESRMNT, +#else + ERRNO_NO_LOCAL_MAPPING, +#endif + ES("srmount error") }, + { BSM_ERRNO_ECOMM, +#ifdef ECOMM + ECOMM, +#else + ERRNO_NO_LOCAL_MAPPING, +#endif + ES("Communication error on send") }, + { BSM_ERRNO_EPROTO, +#ifdef EPROTO + EPROTO, +#else + ERRNO_NO_LOCAL_MAPPING, +#endif + ES("Protocol error") }, + { BSM_ERRNO_ELOCKUNMAPPED, +#ifdef ELOCKUNMAPPED + ELOCKUNMAPPED, +#else + ERRNO_NO_LOCAL_MAPPING, +#endif + ES("Locked lock was unmapped") }, + { BSM_ERRNO_ENOTACTIVE, +#ifdef ENOTACTIVE + ENOTACTIVE, +#else + ERRNO_NO_LOCAL_MAPPING, +#endif + ES("Facility is not active") }, + { BSM_ERRNO_EMULTIHOP, +#ifdef EMULTIHOP + EMULTIHOP, +#else + ERRNO_NO_LOCAL_MAPPING, +#endif + ES("Multihop attempted") }, + { BSM_ERRNO_EBADMSG, +#ifdef EBADMSG + EBADMSG, +#else + ERRNO_NO_LOCAL_MAPPING, +#endif + ES("Bad message") }, + { BSM_ERRNO_ENAMETOOLONG, ENAMETOOLONG, ES("File name too long") }, + { BSM_ERRNO_EOVERFLOW, EOVERFLOW, + ES("Value too large to be stored in data type") }, + { BSM_ERRNO_ENOTUNIQ, +#ifdef ENOTUNIQ + ENOTUNIQ, +#else + ERRNO_NO_LOCAL_MAPPING, +#endif + ES("Given log name not unique") }, + { BSM_ERRNO_EBADFD, +#ifdef EBADFD + EBADFD, +#else + ERRNO_NO_LOCAL_MAPPING, +#endif + ES("Given f.d. invalid for this operation") }, + { BSM_ERRNO_EREMCHG, +#ifdef EREMCHG + EREMCHG, +#else + ERRNO_NO_LOCAL_MAPPING, +#endif + ES("Remote address changed") }, + { BSM_ERRNO_ELIBACC, +#ifdef ELIBACC + ELIBACC, +#else + ERRNO_NO_LOCAL_MAPPING, +#endif + ES("Can't access a needed shared lib") }, + { BSM_ERRNO_ELIBBAD, +#ifdef ELIBBAD + ELIBBAD, +#else + ERRNO_NO_LOCAL_MAPPING, +#endif + ES("Accessing a corrupted shared lib") }, + { BSM_ERRNO_ELIBSCN, +#ifdef ELIBSCN + ELIBSCN, +#else + ERRNO_NO_LOCAL_MAPPING, +#endif + ES(".lib section in a.out corrupted") }, + { BSM_ERRNO_ELIBMAX, +#ifdef ELIBMAX + ELIBMAX, +#else + ERRNO_NO_LOCAL_MAPPING, +#endif + ES("Attempting to link in too many libs") }, + { BSM_ERRNO_ELIBEXEC, +#ifdef ELIBEXEC + ELIBEXEC, +#else + ERRNO_NO_LOCAL_MAPPING, +#endif + ES("Attempting to exec a shared library") }, + { BSM_ERRNO_EILSEQ, EILSEQ, ES("Illegal byte sequence") }, + { BSM_ERRNO_ENOSYS, ENOSYS, ES("Function not implemented") }, + { BSM_ERRNO_ELOOP, ELOOP, ES("Too many levels of symbolic links") }, + { BSM_ERRNO_ERESTART, +#ifdef ERESTART + ERESTART, +#else + ERRNO_NO_LOCAL_MAPPING, +#endif + ES("Restart syscall") }, + { BSM_ERRNO_ESTRPIPE, +#ifdef ESTRPIPE + ESTRPIPE, +#else + ERRNO_NO_LOCAL_MAPPING, +#endif + ES("If pipe/FIFO, don't sleep in stream head") }, + { BSM_ERRNO_ENOTEMPTY, ENOTEMPTY, ES("Directory not empty") }, + { BSM_ERRNO_EUSERS, EUSERS, ES("Too many users") }, + { BSM_ERRNO_ENOTSOCK, ENOTSOCK, + ES("Socket operation on non-socket") }, + { BSM_ERRNO_EDESTADDRREQ, EDESTADDRREQ, + ES("Destination address required") }, + { BSM_ERRNO_EMSGSIZE, EMSGSIZE, ES("Message too long") }, + { BSM_ERRNO_EPROTOTYPE, EPROTOTYPE, + ES("Protocol wrong type for socket") }, + { BSM_ERRNO_ENOPROTOOPT, ENOPROTOOPT, ES("Protocol not available") }, + { BSM_ERRNO_EPROTONOSUPPORT, EPROTONOSUPPORT, + ES("Protocol not supported") }, + { BSM_ERRNO_ESOCKTNOSUPPORT, ESOCKTNOSUPPORT, + ES("Socket type not supported") }, + { BSM_ERRNO_EOPNOTSUPP, EOPNOTSUPP, ES("Operation not supported") }, + { BSM_ERRNO_EPFNOSUPPORT, EPFNOSUPPORT, + ES("Protocol family not supported") }, + { BSM_ERRNO_EAFNOSUPPORT, EAFNOSUPPORT, + ES("Address family not supported by protocol family") }, + { BSM_ERRNO_EADDRINUSE, EADDRINUSE, ES("Address already in use") }, + { BSM_ERRNO_EADDRNOTAVAIL, EADDRNOTAVAIL, + ES("Can't assign requested address") }, + { BSM_ERRNO_ENETDOWN, ENETDOWN, ES("Network is down") }, + { BSM_ERRNO_ENETRESET, ENETRESET, + ES("Network dropped connection on reset") }, + { BSM_ERRNO_ECONNABORTED, ECONNABORTED, + ES("Software caused connection abort") }, + { BSM_ERRNO_ECONNRESET, ECONNRESET, ES("Connection reset by peer") }, + { BSM_ERRNO_ENOBUFS, ENOBUFS, ES("No buffer space available") }, + { BSM_ERRNO_EISCONN, EISCONN, ES("Socket is already connected") }, + { BSM_ERRNO_ENOTCONN, ENOTCONN, ES("Socket is not connected") }, + { BSM_ERRNO_ESHUTDOWN, ESHUTDOWN, + ES("Can't send after socket shutdown") }, + { BSM_ERRNO_ETOOMANYREFS, ETOOMANYREFS, + ES("Too many references: can't splice") }, + { BSM_ERRNO_ETIMEDOUT, ETIMEDOUT, ES("Operation timed out") }, + { BSM_ERRNO_ECONNREFUSED, ECONNREFUSED, ES("Connection refused") }, + { BSM_ERRNO_EHOSTDOWN, EHOSTDOWN, ES("Host is down") }, + { BSM_ERRNO_EHOSTUNREACH, EHOSTUNREACH, ES("No route to host") }, + { BSM_ERRNO_EALREADY, EALREADY, ES("Operation already in progress") }, + { BSM_ERRNO_EINPROGRESS, EINPROGRESS, + ES("Operation now in progress") }, + { BSM_ERRNO_ESTALE, ESTALE, ES("Stale NFS file handle") }, + { BSM_ERRNO_EPWROFF, +#ifdef EPWROFF + EPWROFF, +#else + ERRNO_NO_LOCAL_MAPPING, +#endif + ES("Device power is off") }, + { BSM_ERRNO_EDEVERR, +#ifdef EDEVERR + EDEVERR, +#else + ERRNO_NO_LOCAL_MAPPING, +#endif + ES("Device error") }, + { BSM_ERRNO_EBADEXEC, +#ifdef EBADEXEC + EBADEXEC, +#else + ERRNO_NO_LOCAL_MAPPING, +#endif + ES("Bad executable") }, + { BSM_ERRNO_EBADARCH, +#ifdef EBADARCH + EBADARCH, +#else + ERRNO_NO_LOCAL_MAPPING, +#endif + ES("Bad CPU type in executable") }, + { BSM_ERRNO_ESHLIBVERS, +#ifdef ESHLIBVERS + ESHLIBVERS, +#else + ERRNO_NO_LOCAL_MAPPING, +#endif + ES("Shared library version mismatch") }, + { BSM_ERRNO_EBADMACHO, +#ifdef EBADMACHO + EBADMACHO, +#else + ERRNO_NO_LOCAL_MAPPING, +#endif + ES("Malformed Macho file") }, + { BSM_ERRNO_EPOLICY, +#ifdef EPOLICY + EPOLICY, +#else + ERRNO_NO_LOCAL_MAPPING, +#endif + ES("Operation failed by policy") }, + { BSM_ERRNO_EDOTDOT, +#ifdef EDOTDOT + EDOTDOT, +#else + ERRNO_NO_LOCAL_MAPPING, +#endif + ES("RFS specific error") }, + { BSM_ERRNO_EUCLEAN, +#ifdef EUCLEAN + EUCLEAN, +#else + ERRNO_NO_LOCAL_MAPPING, +#endif + ES("Structure needs cleaning") }, + { BSM_ERRNO_ENOTNAM, +#ifdef ENOTNAM + ENOTNAM, +#else + ERRNO_NO_LOCAL_MAPPING, +#endif + ES("Not a XENIX named type file") }, + { BSM_ERRNO_ENAVAIL, +#ifdef ENAVAIL + ENAVAIL, +#else + ERRNO_NO_LOCAL_MAPPING, +#endif + ES("No XENIX semaphores available") }, + { BSM_ERRNO_EISNAM, +#ifdef EISNAM + EISNAM, +#else + ERRNO_NO_LOCAL_MAPPING, +#endif + ES("Is a named type file") }, + { BSM_ERRNO_EREMOTEIO, +#ifdef EREMOTEIO + EREMOTEIO, +#else + ERRNO_NO_LOCAL_MAPPING, +#endif + ES("Remote I/O error") }, + { BSM_ERRNO_ENOMEDIUM, +#ifdef ENOMEDIUM + ENOMEDIUM, +#else + ERRNO_NO_LOCAL_MAPPING, +#endif + ES("No medium found") }, + { BSM_ERRNO_EMEDIUMTYPE, +#ifdef EMEDIUMTYPE + EMEDIUMTYPE, +#else + ERRNO_NO_LOCAL_MAPPING, +#endif + ES("Wrong medium type") }, + { BSM_ERRNO_ENOKEY, +#ifdef ENOKEY + ENOKEY, +#else + ERRNO_NO_LOCAL_MAPPING, +#endif + ES("Required key not available") }, + { BSM_ERRNO_EKEYEXPIRED, +#ifdef EKEEXPIRED + EKEYEXPIRED, +#else + ERRNO_NO_LOCAL_MAPPING, +#endif + ES("Key has expired") }, + { BSM_ERRNO_EKEYREVOKED, +#ifdef EKEYREVOKED + EKEYREVOKED, +#else + ERRNO_NO_LOCAL_MAPPING, +#endif + ES("Key has been revoked") }, + { BSM_ERRNO_EKEYREJECTED, +#ifdef EKEREJECTED + EKEYREJECTED, +#else + ERRNO_NO_LOCAL_MAPPING, +#endif + ES("Key was rejected by service") }, +}; +static const int bsm_errnos_count = sizeof(bsm_errnos) / sizeof(bsm_errnos[0]); + +static const struct bsm_errno * +bsm_lookup_errno_local(int local_errno) +{ + int i; + + for (i = 0; i < bsm_errnos_count; i++) { + if (bsm_errnos[i].be_local_errno == local_errno) + return (&bsm_errnos[i]); + } + return (NULL); +} + +/* + * Conversion to the BSM errno space isn't allowed to fail; we simply map to + * BSM_ERRNO_UNKNOWN and let the remote endpoint deal with it. + */ +u_char +au_errno_to_bsm(int local_errno) +{ + const struct bsm_errno *bsme; + + bsme = bsm_lookup_errno_local(local_errno); + if (bsme == NULL) + return (BSM_ERRNO_UNKNOWN); + return (bsme->be_bsm_errno); +} + +static const struct bsm_errno * +bsm_lookup_errno_bsm(u_char bsm_errno) +{ + int i; + + for (i = 0; i < bsm_errnos_count; i++) { + if (bsm_errnos[i].be_bsm_errno == bsm_errno) + return (&bsm_errnos[i]); + } + return (NULL); +} + +/* + * Converstion from a BSM error to a local error number may fail if either + * OpenBSM doesn't recognize the error on the wire, or because there is no + * appropriate local mapping. + */ +int +au_bsm_to_errno(u_char bsm_errno, int *errorp) +{ + const struct bsm_errno *bsme; + + bsme = bsm_lookup_errno_bsm(bsm_errno); + if (bsme == NULL || bsme->be_local_errno == ERRNO_NO_LOCAL_MAPPING) + return (-1); + *errorp = bsme->be_local_errno; + return (0); +} + +#if !defined(KERNEL) && !defined(_KERNEL) +const char * +au_strerror(u_char bsm_errno) +{ + const struct bsm_errno *bsme; + + bsme = bsm_lookup_errno_bsm(bsm_errno); + if (bsme == NULL) + return ("Unrecognized BSM error"); + if (bsme->be_local_errno != ERRNO_NO_LOCAL_MAPPING) + return (strerror(bsme->be_local_errno)); + return (bsme->be_strerror); +} +#endif +#endif /* CONFIG_AUDIT */ diff --git a/bsd/security/audit/audit_bsm_fcntl.c b/bsd/security/audit/audit_bsm_fcntl.c new file mode 100644 index 000000000..c741986bf --- /dev/null +++ b/bsd/security/audit/audit_bsm_fcntl.c @@ -0,0 +1,291 @@ +/*- + * Copyright (c) 2008-2009 Apple Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of Apple Inc. ("Apple") nor the names of + * its contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY APPLE AND ITS CONTRIBUTORS "AS IS" AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL APPLE OR ITS CONTRIBUTORS BE LIABLE FOR + * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING + * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + * $P4: //depot/projects/trustedbsd/openbsm/libbsm/bsm_fcntl.c#2 $ + */ + +#include +#include + +#include + +#include +#include + +#if CONFIG_AUDIT +struct bsm_fcntl_cmd { + u_short bfc_bsm_fcntl_cmd; + int bfc_local_fcntl_cmd; +}; +typedef struct bsm_fcntl_cmd bsm_fcntl_cmd_t; + +static const bsm_fcntl_cmd_t bsm_fcntl_cmdtab[] = { + { BSM_F_DUPFD, F_DUPFD }, + { BSM_F_GETFD, F_GETFD }, + { BSM_F_SETFD, F_SETFD }, + { BSM_F_GETFL, F_GETFL }, + { BSM_F_SETFL, F_SETFL }, +#ifdef F_O_GETLK + { BSM_F_O_GETLK, F_O_GETLK }, +#endif + { BSM_F_SETLK, F_SETLK }, + { BSM_F_SETLKW, F_SETLK }, +#ifdef F_CHFL + { BSM_F_CHKFL, F_CHKFL }, +#endif +#ifdef F_DUP2FD + { BSM_F_DUP2FD, F_DUP2FD }, +#endif +#ifdef F_ALLOCSP + { BSM_F_ALLOCSP, F_ALLOCSP }, +#endif +#ifdef F_FREESP + { BSM_F_FREESP, F_FREESP }, +#endif +#ifdef F_ISSTREAM + { BSM_F_ISSTREAM, F_ISSTREAM}, +#endif + { BSM_F_GETLK, F_GETLK }, +#ifdef F_PRIV + { BSM_F_PRIV, F_PRIV }, +#endif +#ifdef F_NPRIV + { BSM_F_NPRIV, F_NPRIV }, +#endif +#ifdef F_QUOTACTL + { BSM_F_QUOTACTL, F_QUOTACTL }, +#endif +#ifdef F_BLOCKS + { BSM_F_BLOCKS, F_BLOCKS }, +#endif +#ifdef F_BLKSIZE + { BSM_F_BLKSIZE, F_BLKSIZE }, +#endif + { BSM_F_GETOWN, F_GETOWN }, + { BSM_F_SETOWN, F_SETOWN }, +#ifdef F_REVOKE + { BSM_F_REVOKE, F_REVOKE }, +#endif +#ifdef F_HASREMOTEBLOCKS + { BSM_F_HASREMOTEBLOCKS, + F_HASREMOTEBLOCKS }, +#endif +#ifdef F_FREESP + { BSM_F_FREESP, F_FREESP }, +#endif +#ifdef F_ALLOCSP + { BSM_F_ALLOCSP, F_ALLOCSP }, +#endif +#ifdef F_FREESP64 + { BSM_F_FREESP64, F_FREESP64 }, +#endif +#ifdef F_ALLOCSP64 + { BSM_F_ALLOCSP64, F_ALLOCSP64 }, +#endif +#ifdef F_GETLK64 + { BSM_F_GETLK64, F_GETLK64 }, +#endif +#ifdef F_SETLK64 + { BSM_F_SETLK64, F_SETLK64 }, +#endif +#ifdef F_SETLKW64 + { BSM_F_SETLKW64, F_SETLKW64 }, +#endif +#ifdef F_SHARE + { BSM_F_SHARE, F_SHARE }, +#endif +#ifdef F_UNSHARE + { BSM_F_UNSHARE, F_UNSHARE }, +#endif +#ifdef F_SETLK_NBMAND + { BSM_F_SETLK_NBMAND, F_SETLK_NBMAND }, +#endif +#ifdef F_SHARE_NBMAND + { BSM_F_SHARE_NBMAND, F_SHARE_NBMAND }, +#endif +#ifdef F_SETLK64_NBMAND + { BSM_F_SETLK64_NBMAND, F_SETLK64_NBMAND }, +#endif +#ifdef F_GETXFL + { BSM_F_GETXFL, F_GETXFL }, +#endif +#ifdef F_BADFD + { BSM_F_BADFD, F_BADFD }, +#endif +#ifdef F_OGETLK + { BSM_F_OGETLK, F_OGETLK }, +#endif +#ifdef F_OSETLK + { BSM_F_OSETLK, F_OSETLK }, +#endif +#ifdef F_OSETLKW + { BSM_F_OSETLKW, F_OSETLKW }, +#endif +#ifdef F_SETLK_REMOTE + { BSM_F_SETLK_REMOTE, F_SETLK_REMOTE }, +#endif + +#ifdef F_SETSIG + { BSM_F_SETSIG, F_SETSIG }, +#endif +#ifdef F_GETSIG + { BSM_F_GETSIG, F_GETSIG }, +#endif + +#ifdef F_CHKCLEAN + { BSM_F_CHKCLEAN, F_CHKCLEAN }, +#endif +#ifdef F_PREALLOCATE + { BSM_F_PREALLOCATE, F_PREALLOCATE }, +#endif +#ifdef F_SETSIZE + { BSM_F_SETSIZE, F_SETSIZE }, +#endif +#ifdef F_RDADVISE + { BSM_F_RDADVISE, F_RDADVISE }, +#endif +#ifdef F_RDAHEAD + { BSM_F_RDAHEAD, F_RDAHEAD }, +#endif +#ifdef F_READBOOTSTRAP + { BSM_F_READBOOTSTRAP, F_READBOOTSTRAP }, +#endif +#ifdef F_WRITEBOOTSTRAP + { BSM_F_WRITEBOOTSTRAP, F_WRITEBOOTSTRAP }, +#endif +#ifdef F_NOCACHE + { BSM_F_NOCACHE, F_NOCACHE }, +#endif +#ifdef F_LOG2PHYS + { BSM_F_LOG2PHYS, F_LOG2PHYS }, +#endif +#ifdef F_GETPATH + { BSM_F_GETPATH, F_GETPATH }, +#endif +#ifdef F_FULLFSYNC + { BSM_F_FULLFSYNC, F_FULLFSYNC }, +#endif +#ifdef F_PATHPKG_CHECK + { BSM_F_PATHPKG_CHECK, F_PATHPKG_CHECK }, +#endif +#ifdef F_FREEZE_FS + { BSM_F_FREEZE_FS, F_FREEZE_FS }, +#endif +#ifdef F_THAW_FS + { BSM_F_THAW_FS, F_THAW_FS }, +#endif +#ifdef F_GLOBAL_NOCACHE + { BSM_F_GLOBAL_NOCACHE, F_GLOBAL_NOCACHE }, +#endif +#ifdef F_OPENFROM + { BSM_F_OPENFROM, F_OPENFROM }, +#endif +#ifdef F_UNLINKFROM + { BSM_F_UNLINKFROM, F_UNLINKFROM }, +#endif +#ifdef F_CHECK_OPENEVT + { BSM_F_CHECK_OPENEVT, F_CHECK_OPENEVT }, +#endif +#ifdef F_ADDSIGS + { BSM_F_ADDSIGS, F_ADDSIGS }, +#endif +#ifdef F_MARKDEPENDENCY + { BSM_F_MARKDEPENDENCY, F_MARKDEPENDENCY }, +#endif + +#ifdef FCNTL_FS_SPECIFIC_BASE + { BSM_F_FS_SPECIFIC_0, FCNTL_FS_SPECIFIC_BASE}, + { BSM_F_FS_SPECIFIC_1, FCNTL_FS_SPECIFIC_BASE + 1}, + { BSM_F_FS_SPECIFIC_2, FCNTL_FS_SPECIFIC_BASE + 2}, + { BSM_F_FS_SPECIFIC_3, FCNTL_FS_SPECIFIC_BASE + 3}, + { BSM_F_FS_SPECIFIC_4, FCNTL_FS_SPECIFIC_BASE + 4}, + { BSM_F_FS_SPECIFIC_5, FCNTL_FS_SPECIFIC_BASE + 5}, + { BSM_F_FS_SPECIFIC_6, FCNTL_FS_SPECIFIC_BASE + 6}, + { BSM_F_FS_SPECIFIC_7, FCNTL_FS_SPECIFIC_BASE + 7}, + { BSM_F_FS_SPECIFIC_8, FCNTL_FS_SPECIFIC_BASE + 8}, + { BSM_F_FS_SPECIFIC_9, FCNTL_FS_SPECIFIC_BASE + 9}, + { BSM_F_FS_SPECIFIC_10, FCNTL_FS_SPECIFIC_BASE + 10}, + { BSM_F_FS_SPECIFIC_11, FCNTL_FS_SPECIFIC_BASE + 11}, + { BSM_F_FS_SPECIFIC_12, FCNTL_FS_SPECIFIC_BASE + 12}, + { BSM_F_FS_SPECIFIC_13, FCNTL_FS_SPECIFIC_BASE + 13}, + { BSM_F_FS_SPECIFIC_14, FCNTL_FS_SPECIFIC_BASE + 14}, + { BSM_F_FS_SPECIFIC_15, FCNTL_FS_SPECIFIC_BASE + 15}, +#endif /* FCNTL_FS_SPECIFIC_BASE */ +}; +static const int bsm_fcntl_cmd_count = sizeof(bsm_fcntl_cmdtab) / + sizeof(bsm_fcntl_cmdtab[0]); + +static const bsm_fcntl_cmd_t * +bsm_lookup_local_fcntl_cmd(int local_fcntl_cmd) +{ + int i; + + for (i = 0; i < bsm_fcntl_cmd_count; i++) { + if (bsm_fcntl_cmdtab[i].bfc_local_fcntl_cmd == + local_fcntl_cmd) + return (&bsm_fcntl_cmdtab[i]); + } + return (NULL); +} + +u_short +au_fcntl_cmd_to_bsm(int local_fcntl_cmd) +{ + const bsm_fcntl_cmd_t *bfcp; + + bfcp = bsm_lookup_local_fcntl_cmd(local_fcntl_cmd); + if (bfcp == NULL) + return (BSM_F_UNKNOWN); + return (bfcp->bfc_bsm_fcntl_cmd); +} + +static const bsm_fcntl_cmd_t * +bsm_lookup_bsm_fcntl_cmd(u_short bsm_fcntl_cmd) +{ + int i; + + for (i = 0; i < bsm_fcntl_cmd_count; i++) { + if (bsm_fcntl_cmdtab[i].bfc_bsm_fcntl_cmd == + bsm_fcntl_cmd) + return (&bsm_fcntl_cmdtab[i]); + } + return (NULL); +} + +int +au_bsm_to_fcntl_cmd(u_short bsm_fcntl_cmd, int *local_fcntl_cmdp) +{ + const bsm_fcntl_cmd_t *bfcp; + + bfcp = bsm_lookup_bsm_fcntl_cmd(bsm_fcntl_cmd); + if (bfcp == NULL || bfcp->bfc_local_fcntl_cmd) + return (-1); + *local_fcntl_cmdp = bfcp->bfc_local_fcntl_cmd; + return (0); +} +#endif /* CONFIG_AUDIT */ diff --git a/bsd/security/audit/audit_bsm_klib.c b/bsd/security/audit/audit_bsm_klib.c new file mode 100644 index 000000000..02fd0ead5 --- /dev/null +++ b/bsd/security/audit/audit_bsm_klib.c @@ -0,0 +1,733 @@ +/*- + * Copyright (c) 1999-2009 Apple Inc. + * Copyright (c) 2005 Robert N. M. Watson + * All rights reserved. + * + * @APPLE_BSD_LICENSE_HEADER_START@ + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of Apple Inc. ("Apple") nor the names of + * its contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY APPLE AND ITS CONTRIBUTORS "AS IS" AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL APPLE OR ITS CONTRIBUTORS BE LIABLE FOR + * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING + * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + * @APPLE_BSD_LICENSE_HEADER_END@ + */ +/* + * NOTICE: This file was modified by SPARTA, Inc. in 2005 to introduce + * support for mandatory and extensible security protections. This notice + * is included in support of clause 2.2 (b) of the Apple Public License, + * Version 2.0. + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +#if CONFIG_AUDIT +/* + * Hash table functions for the audit event number to event class mask + * mapping. + */ +#define EVCLASSMAP_HASH_TABLE_SIZE 251 +struct evclass_elem { + au_event_t event; + au_class_t class; + LIST_ENTRY(evclass_elem) entry; +}; +struct evclass_list { + LIST_HEAD(, evclass_elem) head; +}; + +static MALLOC_DEFINE(M_AUDITEVCLASS, "audit_evclass", "Audit event class"); +static struct rwlock evclass_lock; +static struct evclass_list evclass_hash[EVCLASSMAP_HASH_TABLE_SIZE]; + +#define EVCLASS_LOCK_INIT() rw_init(&evclass_lock, "evclass_lock") +#define EVCLASS_RLOCK() rw_rlock(&evclass_lock) +#define EVCLASS_RUNLOCK() rw_runlock(&evclass_lock) +#define EVCLASS_WLOCK() rw_wlock(&evclass_lock) +#define EVCLASS_WUNLOCK() rw_wunlock(&evclass_lock) + +/* + * Look up the class for an audit event in the class mapping table. + */ +au_class_t +au_event_class(au_event_t event) +{ + struct evclass_list *evcl; + struct evclass_elem *evc; + au_class_t class; + + EVCLASS_RLOCK(); + evcl = &evclass_hash[event % EVCLASSMAP_HASH_TABLE_SIZE]; + class = 0; + LIST_FOREACH(evc, &evcl->head, entry) { + if (evc->event == event) { + class = evc->class; + goto out; + } + } +out: + EVCLASS_RUNLOCK(); + return (class); +} + +/* + * Insert a event to class mapping. If the event already exists in the + * mapping, then replace the mapping with the new one. + * + * XXX There is currently no constraints placed on the number of mappings. + * May want to either limit to a number, or in terms of memory usage. + */ +void +au_evclassmap_insert(au_event_t event, au_class_t class) +{ + struct evclass_list *evcl; + struct evclass_elem *evc, *evc_new; + + /* + * If this event requires auditing a system call then add it to our + * audit kernel event mask. We use audit_kevent_mask to check to see + * if the audit syscalls flag needs to be set when preselection masks + * are set. + */ + if (AUE_IS_A_KEVENT(event)) + audit_kevent_mask |= class; + + /* + * Pessimistically, always allocate storage before acquiring mutex. + * Free if there is already a mapping for this event. + */ + evc_new = malloc(sizeof(*evc), M_AUDITEVCLASS, M_WAITOK); + + EVCLASS_WLOCK(); + evcl = &evclass_hash[event % EVCLASSMAP_HASH_TABLE_SIZE]; + LIST_FOREACH(evc, &evcl->head, entry) { + if (evc->event == event) { + evc->class = class; + EVCLASS_WUNLOCK(); + free(evc_new, M_AUDITEVCLASS); + return; + } + } + evc = evc_new; + evc->event = event; + evc->class = class; + LIST_INSERT_HEAD(&evcl->head, evc, entry); + EVCLASS_WUNLOCK(); +} + +void +au_evclassmap_init(void) +{ + int i; + + EVCLASS_LOCK_INIT(); + for (i = 0; i < EVCLASSMAP_HASH_TABLE_SIZE; i++) + LIST_INIT(&evclass_hash[i].head); + + /* + * Set up the initial event to class mapping for system calls. + */ + for (i = 0; i < NUM_SYSENT; i++) { + if (sys_au_event[i] != AUE_NULL) + au_evclassmap_insert(sys_au_event[i], 0); + + } + + /* + * Add the Mach system call events. These are not in sys_au_event[]. + */ + au_evclassmap_insert(AUE_TASKFORPID, 0); + au_evclassmap_insert(AUE_PIDFORTASK, 0); + au_evclassmap_insert(AUE_SWAPON, 0); + au_evclassmap_insert(AUE_SWAPOFF, 0); + au_evclassmap_insert(AUE_MAPFD, 0); + au_evclassmap_insert(AUE_INITPROCESS, 0); +} + +/* + * Check whether an event is aditable by comparing the mask of classes this + * event is part of against the given mask. + */ +int +au_preselect(__unused au_event_t event, au_class_t class, au_mask_t *mask_p, + int sorf) +{ + au_class_t effmask = 0; + + if (mask_p == NULL) + return (-1); + + /* + * Perform the actual check of the masks against the event. + */ + if (sorf & AU_PRS_SUCCESS) + effmask |= (mask_p->am_success & class); + + if (sorf & AU_PRS_FAILURE) + effmask |= (mask_p->am_failure & class); + + if (effmask) + return (1); + else + return (0); +} + +/* + * Convert sysctl names and present arguments to events. + */ +au_event_t +audit_ctlname_to_sysctlevent(int name[], uint64_t valid_arg) +{ + + /* can't parse it - so return the worst case */ + if ((valid_arg & (ARG_CTLNAME | ARG_LEN)) != (ARG_CTLNAME | ARG_LEN)) + return (AUE_SYSCTL); + + switch (name[0]) { + /* non-admin "lookups" treat them special */ + case KERN_OSTYPE: + case KERN_OSRELEASE: + case KERN_OSREV: + case KERN_VERSION: + case KERN_ARGMAX: + case KERN_CLOCKRATE: + case KERN_BOOTTIME: + case KERN_POSIX1: + case KERN_NGROUPS: + case KERN_JOB_CONTROL: + case KERN_SAVED_IDS: + case KERN_OSRELDATE: + case KERN_NETBOOT: + case KERN_SYMFILE: + case KERN_SHREG_PRIVATIZABLE: + case KERN_OSVERSION: + return (AUE_SYSCTL_NONADMIN); + + /* only treat the changeable controls as admin */ + case KERN_MAXVNODES: + case KERN_MAXPROC: + case KERN_MAXFILES: + case KERN_MAXPROCPERUID: + case KERN_MAXFILESPERPROC: + case KERN_HOSTID: + case KERN_AIOMAX: + case KERN_AIOPROCMAX: + case KERN_AIOTHREADS: + case KERN_COREDUMP: + case KERN_SUGID_COREDUMP: + case KERN_NX_PROTECTION: + return ((valid_arg & ARG_VALUE32) ? + AUE_SYSCTL : AUE_SYSCTL_NONADMIN); + + default: + return (AUE_SYSCTL); + } + /* NOTREACHED */ +} + +/* + * Convert an open flags specifier into a specific type of open event for + * auditing purposes. + */ +au_event_t +audit_flags_and_error_to_openevent(int oflags, int error) +{ + au_event_t aevent; + + /* + * Need to check only those flags we care about. + */ + oflags = oflags & (O_RDONLY | O_CREAT | O_TRUNC | O_RDWR | O_WRONLY); + + /* + * These checks determine what flags are on with the condition that + * ONLY that combination is on, and no other flags are on. + */ + switch (oflags) { + case O_RDONLY: + aevent = AUE_OPEN_R; + break; + + case (O_RDONLY | O_CREAT): + aevent = AUE_OPEN_RC; + break; + + case (O_RDONLY | O_CREAT | O_TRUNC): + aevent = AUE_OPEN_RTC; + break; + + case (O_RDONLY | O_TRUNC): + aevent = AUE_OPEN_RT; + break; + + case O_RDWR: + aevent = AUE_OPEN_RW; + break; + + case (O_RDWR | O_CREAT): + aevent = AUE_OPEN_RWC; + break; + + case (O_RDWR | O_CREAT | O_TRUNC): + aevent = AUE_OPEN_RWTC; + break; + + case (O_RDWR | O_TRUNC): + aevent = AUE_OPEN_RWT; + break; + + case O_WRONLY: + aevent = AUE_OPEN_W; + break; + + case (O_WRONLY | O_CREAT): + aevent = AUE_OPEN_WC; + break; + + case (O_WRONLY | O_CREAT | O_TRUNC): + aevent = AUE_OPEN_WTC; + break; + + case (O_WRONLY | O_TRUNC): + aevent = AUE_OPEN_WT; + break; + + default: + aevent = AUE_OPEN; + break; + } + + /* + * Convert chatty errors to better matching events. Failures to + * find a file are really just attribute events -- so recast them as + * such. + * + * XXXAUDIT: Solaris defines that AUE_OPEN will never be returned, it + * is just a placeholder. However, in Darwin we return that in + * preference to other events. + * + * XXXRW: This behavior differs from FreeBSD, so possibly revise this + * code or this comment. + */ + switch (aevent) { + case AUE_OPEN_R: + case AUE_OPEN_RT: + case AUE_OPEN_RW: + case AUE_OPEN_RWT: + case AUE_OPEN_W: + case AUE_OPEN_WT: + if (error == ENOENT) + aevent = AUE_OPEN; + } + return (aevent); +} + +/* + * Convert an open flags specifier into a specific type of open_extended event + * for auditing purposes. + */ +au_event_t +audit_flags_and_error_to_openextendedevent(int oflags, int error) +{ + au_event_t aevent; + + /* + * Need to check only those flags we care about. + */ + oflags = oflags & (O_RDONLY | O_CREAT | O_TRUNC | O_RDWR | O_WRONLY); + + /* + * These checks determine what flags are on with the condition that + * ONLY that combination is on, and no other flags are on. + */ + switch (oflags) { + case O_RDONLY: + aevent = AUE_OPEN_EXTENDED_R; + break; + + case (O_RDONLY | O_CREAT): + aevent = AUE_OPEN_EXTENDED_RC; + break; + + case (O_RDONLY | O_CREAT | O_TRUNC): + aevent = AUE_OPEN_EXTENDED_RTC; + break; + + case (O_RDONLY | O_TRUNC): + aevent = AUE_OPEN_EXTENDED_RT; + break; + + case O_RDWR: + aevent = AUE_OPEN_EXTENDED_RW; + break; + + case (O_RDWR | O_CREAT): + aevent = AUE_OPEN_EXTENDED_RWC; + break; + + case (O_RDWR | O_CREAT | O_TRUNC): + aevent = AUE_OPEN_EXTENDED_RWTC; + break; + + case (O_RDWR | O_TRUNC): + aevent = AUE_OPEN_EXTENDED_RWT; + break; + + case O_WRONLY: + aevent = AUE_OPEN_EXTENDED_W; + break; + + case (O_WRONLY | O_CREAT): + aevent = AUE_OPEN_EXTENDED_WC; + break; + + case (O_WRONLY | O_CREAT | O_TRUNC): + aevent = AUE_OPEN_EXTENDED_WTC; + break; + + case (O_WRONLY | O_TRUNC): + aevent = AUE_OPEN_EXTENDED_WT; + break; + + default: + aevent = AUE_OPEN_EXTENDED; + break; + } + + /* + * Convert chatty errors to better matching events. Failures to + * find a file are really just attribute events -- so recast them as + * such. + * + * XXXAUDIT: Solaris defines that AUE_OPEN will never be returned, it + * is just a placeholder. However, in Darwin we return that in + * preference to other events. + * + * XXXRW: This behavior differs from FreeBSD, so possibly revise this + * code or this comment. + */ + switch (aevent) { + case AUE_OPEN_EXTENDED_R: + case AUE_OPEN_EXTENDED_RT: + case AUE_OPEN_EXTENDED_RW: + case AUE_OPEN_EXTENDED_RWT: + case AUE_OPEN_EXTENDED_W: + case AUE_OPEN_EXTENDED_WT: + if (error == ENOENT) + aevent = AUE_OPEN_EXTENDED; + } + return (aevent); +} + +/* + * Convert a MSGCTL command to a specific event. + */ +au_event_t +audit_msgctl_to_event(int cmd) +{ + + switch (cmd) { + case IPC_RMID: + return (AUE_MSGCTL_RMID); + + case IPC_SET: + return (AUE_MSGCTL_SET); + + case IPC_STAT: + return (AUE_MSGCTL_STAT); + + default: + /* We will audit a bad command. */ + return (AUE_MSGCTL); + } +} + +/* + * Convert a SEMCTL command to a specific event. + */ +au_event_t +audit_semctl_to_event(int cmd) +{ + + switch (cmd) { + case GETALL: + return (AUE_SEMCTL_GETALL); + + case GETNCNT: + return (AUE_SEMCTL_GETNCNT); + + case GETPID: + return (AUE_SEMCTL_GETPID); + + case GETVAL: + return (AUE_SEMCTL_GETVAL); + + case GETZCNT: + return (AUE_SEMCTL_GETZCNT); + + case IPC_RMID: + return (AUE_SEMCTL_RMID); + + case IPC_SET: + return (AUE_SEMCTL_SET); + + case SETALL: + return (AUE_SEMCTL_SETALL); + + case SETVAL: + return (AUE_SEMCTL_SETVAL); + + case IPC_STAT: + return (AUE_SEMCTL_STAT); + + default: + /* We will audit a bad command. */ + return (AUE_SEMCTL); + } +} + +/* + * Convert a command for the auditon() system call to a audit event. + */ +au_event_t +auditon_command_event(int cmd) +{ + + switch(cmd) { + case A_GETPOLICY: + return (AUE_AUDITON_GPOLICY); + + case A_SETPOLICY: + return (AUE_AUDITON_SPOLICY); + + case A_GETKMASK: + return (AUE_AUDITON_GETKMASK); + + case A_SETKMASK: + return (AUE_AUDITON_SETKMASK); + + case A_GETQCTRL: + return (AUE_AUDITON_GQCTRL); + + case A_SETQCTRL: + return (AUE_AUDITON_SQCTRL); + + case A_GETCWD: + return (AUE_AUDITON_GETCWD); + + case A_GETCAR: + return (AUE_AUDITON_GETCAR); + + case A_GETSTAT: + return (AUE_AUDITON_GETSTAT); + + case A_SETSTAT: + return (AUE_AUDITON_SETSTAT); + + case A_SETUMASK: + return (AUE_AUDITON_SETUMASK); + + case A_SETSMASK: + return (AUE_AUDITON_SETSMASK); + + case A_GETCOND: + return (AUE_AUDITON_GETCOND); + + case A_SETCOND: + return (AUE_AUDITON_SETCOND); + + case A_GETCLASS: + return (AUE_AUDITON_GETCLASS); + + case A_SETCLASS: + return (AUE_AUDITON_SETCLASS); + + case A_GETPINFO: + case A_SETPMASK: + case A_SETFSIZE: + case A_GETFSIZE: + case A_GETPINFO_ADDR: + case A_GETKAUDIT: + case A_SETKAUDIT: + case A_GETSINFO_ADDR: + default: + return (AUE_AUDITON); /* No special record */ + } +} + +/* + * For darwin we rewrite events generated by fcntl(F_OPENFROM,...) and + * fcntl(F_UNLINKFROM,...) system calls to AUE_OPENAT_* and AUE_UNLINKAT audit + * events. + */ +au_event_t +audit_fcntl_command_event(int cmd, int oflags, int error) +{ + au_event_t aevent; + + switch(cmd) { + case F_OPENFROM: + /* + * Need to check only those flags we care about. + */ + oflags = oflags & (O_RDONLY | O_CREAT | O_TRUNC | O_RDWR | + O_WRONLY); + + /* + * These checks determine what flags are on with the condition + * that ONLY that combination is on, and no other flags are on. + */ + switch (oflags) { + case O_RDONLY: + aevent = AUE_OPENAT_R; + break; + + case (O_RDONLY | O_CREAT): + aevent = AUE_OPENAT_RC; + break; + + case (O_RDONLY | O_CREAT | O_TRUNC): + aevent = AUE_OPENAT_RTC; + break; + + case (O_RDONLY | O_TRUNC): + aevent = AUE_OPENAT_RT; + break; + + case O_RDWR: + aevent = AUE_OPENAT_RW; + break; + + case (O_RDWR | O_CREAT): + aevent = AUE_OPENAT_RWC; + break; + + case (O_RDWR | O_CREAT | O_TRUNC): + aevent = AUE_OPENAT_RWTC; + break; + + case (O_RDWR | O_TRUNC): + aevent = AUE_OPENAT_RWT; + break; + + case O_WRONLY: + aevent = AUE_OPENAT_W; + break; + + case (O_WRONLY | O_CREAT): + aevent = AUE_OPENAT_WC; + break; + + case (O_WRONLY | O_CREAT | O_TRUNC): + aevent = AUE_OPENAT_WTC; + break; + + case (O_WRONLY | O_TRUNC): + aevent = AUE_OPENAT_WT; + break; + + default: + aevent = AUE_OPENAT; + break; + } + + /* + * Convert chatty errors to better matching events. Failures to + * find a file are really just attribute events -- so recast + * them as such. + */ + switch (aevent) { + case AUE_OPENAT_R: + case AUE_OPENAT_RT: + case AUE_OPENAT_RW: + case AUE_OPENAT_RWT: + case AUE_OPENAT_W: + case AUE_OPENAT_WT: + if (error == ENOENT) + aevent = AUE_OPENAT; + } + + return (aevent); + + case F_UNLINKFROM: + return (AUE_UNLINKAT); + + default: + return (AUE_FCNTL); /* Don't change from AUE_FCNTL. */ + } +} + +/* + * Create a canonical path from given path by prefixing either the root + * directory, or the current working directory. + */ +int +audit_canon_path(struct vnode *cwd_vp, char *path, char *cpath) +{ + int len; + int ret; + char *bufp = path; + + /* + * Convert multiple leading '/' into a single '/' if the cwd_vp is + * NULL (i.e. an absolute path), and strip them entirely if the + * cwd_vp represents a chroot directory (i.e. the caller checked for + * an initial '/' character itself, saw one, and passed fdp->fd_rdir). + * Somewhat complicated, but it places the onus for locking structs + * involved on the caller, and makes proxy operations explicit rather + * than implicit. + */ + if (*(path) == '/') { + while (*(bufp) == '/') + bufp++; /* skip leading '/'s */ + if (cwd_vp == NULL) + bufp--; /* restore one '/' */ + } + if (cwd_vp != NULL) { + len = MAXPATHLEN; + ret = vn_getpath(cwd_vp, cpath, &len); + if (ret != 0) { + cpath[0] = '\0'; + return (ret); + } + if (len < MAXPATHLEN) + cpath[len-1] = '/'; + strlcpy(cpath + len, bufp, MAXPATHLEN - len); + } else { + strlcpy(cpath, bufp, MAXPATHLEN); + } + return (0); +} +#endif /* CONFIG_AUDIT */ diff --git a/bsd/security/audit/audit_bsm_socket_type.c b/bsd/security/audit/audit_bsm_socket_type.c new file mode 100644 index 000000000..6b373b27d --- /dev/null +++ b/bsd/security/audit/audit_bsm_socket_type.c @@ -0,0 +1,105 @@ +/*- + * Copyright (c) 2008-2009 Apple Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of Apple Inc. ("Apple") nor the names of + * its contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY APPLE AND ITS CONTRIBUTORS "AS IS" AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL APPLE OR ITS CONTRIBUTORS BE LIABLE FOR + * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING + * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + */ + +#include +#include + +#include + +#include +#include + +#if CONFIG_AUDIT +struct bsm_socket_type { + u_short bst_bsm_socket_type; + int bst_local_socket_type; +}; + +#define ST_NO_LOCAL_MAPPING -600 + +static const struct bsm_socket_type bsm_socket_types[] = { + { BSM_SOCK_DGRAM, SOCK_DGRAM }, + { BSM_SOCK_STREAM, SOCK_STREAM }, + { BSM_SOCK_RAW, SOCK_RAW }, + { BSM_SOCK_RDM, SOCK_RDM }, + { BSM_SOCK_SEQPACKET, SOCK_SEQPACKET }, +}; +static const int bsm_socket_types_count = sizeof(bsm_socket_types) / + sizeof(bsm_socket_types[0]); + +static const struct bsm_socket_type * +bsm_lookup_local_socket_type(int local_socket_type) +{ + int i; + + for (i = 0; i < bsm_socket_types_count; i++) { + if (bsm_socket_types[i].bst_local_socket_type == + local_socket_type) + return (&bsm_socket_types[i]); + } + return (NULL); +} + +u_short +au_socket_type_to_bsm(int local_socket_type) +{ + const struct bsm_socket_type *bstp; + + bstp = bsm_lookup_local_socket_type(local_socket_type); + if (bstp == NULL) + return (BSM_SOCK_UNKNOWN); + return (bstp->bst_bsm_socket_type); +} + +static const struct bsm_socket_type * +bsm_lookup_bsm_socket_type(u_short bsm_socket_type) +{ + int i; + + for (i = 0; i < bsm_socket_types_count; i++) { + if (bsm_socket_types[i].bst_bsm_socket_type == + bsm_socket_type) + return (&bsm_socket_types[i]); + } + return (NULL); +} + +int +au_bsm_to_socket_type(u_short bsm_socket_type, int *local_socket_typep) +{ + const struct bsm_socket_type *bstp; + + bstp = bsm_lookup_bsm_socket_type(bsm_socket_type); + if (bstp == NULL || bstp->bst_local_socket_type) + return (-1); + *local_socket_typep = bstp->bst_local_socket_type; + return (0); +} +#endif /* CONFIG_AUDIT */ diff --git a/bsd/security/audit/audit_bsm_token.c b/bsd/security/audit/audit_bsm_token.c new file mode 100644 index 000000000..cf0b781a8 --- /dev/null +++ b/bsd/security/audit/audit_bsm_token.c @@ -0,0 +1,1401 @@ +/*- + * Copyright (c) 2004-2009 Apple Inc. + * Copyright (c) 2005 SPARTA, Inc. + * All rights reserved. + * + * This code was developed in part by Robert N. M. Watson, Senior Principal + * Scientist, SPARTA, Inc. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of Apple Inc. ("Apple") nor the names of + * its contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY APPLE AND ITS CONTRIBUTORS "AS IS" AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL APPLE OR ITS CONTRIBUTORS BE LIABLE FOR + * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING + * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include +#include +#include +#include + +#include + +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +#include +#include + +#include + +#if CONFIG_AUDIT +#define GET_TOKEN_AREA(t, dptr, length) do { \ + t = malloc(sizeof(token_t), M_AUDITBSM, M_WAITOK); \ + t->t_data = malloc(length, M_AUDITBSM, M_WAITOK | M_ZERO); \ + t->len = length; \ + dptr = t->t_data; \ +} while (0) + +/* + * token ID 1 byte + * argument # 1 byte + * argument value 4 bytes/8 bytes (32-bit/64-bit value) + * text length 2 bytes + * text N bytes + 1 terminating NULL byte + */ +token_t * +au_to_arg32(char n, const char *text, u_int32_t v) +{ + token_t *t; + u_char *dptr = NULL; + u_int16_t textlen; + + textlen = strlen(text); + textlen += 1; + + GET_TOKEN_AREA(t, dptr, 2 * sizeof(u_char) + sizeof(u_int32_t) + + sizeof(u_int16_t) + textlen); + + ADD_U_CHAR(dptr, AUT_ARG32); + ADD_U_CHAR(dptr, n); + ADD_U_INT32(dptr, v); + ADD_U_INT16(dptr, textlen); + ADD_STRING(dptr, text, textlen); + + return (t); +} + +token_t * +au_to_arg64(char n, const char *text, u_int64_t v) +{ + token_t *t; + u_char *dptr = NULL; + u_int16_t textlen; + + textlen = strlen(text); + textlen += 1; + + GET_TOKEN_AREA(t, dptr, 2 * sizeof(u_char) + sizeof(u_int64_t) + + sizeof(u_int16_t) + textlen); + + ADD_U_CHAR(dptr, AUT_ARG64); + ADD_U_CHAR(dptr, n); + ADD_U_INT64(dptr, v); + ADD_U_INT16(dptr, textlen); + ADD_STRING(dptr, text, textlen); + + return (t); +} + +token_t * +au_to_arg(char n, const char *text, u_int32_t v) +{ + + return (au_to_arg32(n, text, v)); +} + +#if defined(_KERNEL) || defined(KERNEL) +/* + * token ID 1 byte + * file access mode 4 bytes + * owner user ID 4 bytes + * owner group ID 4 bytes + * file system ID 4 bytes + * node ID 8 bytes + * device 4 bytes/8 bytes (32-bit/64-bit) + */ +token_t * +au_to_attr32(struct vnode_au_info *vni) +{ + token_t *t; + u_char *dptr = NULL; + u_int16_t pad0_16 = 0; + u_int32_t pad0_32 = 0; + + GET_TOKEN_AREA(t, dptr, sizeof(u_char) + 2 * sizeof(u_int16_t) + + 3 * sizeof(u_int32_t) + sizeof(u_int64_t) + sizeof(u_int32_t)); + + ADD_U_CHAR(dptr, AUT_ATTR32); + + /* + * Darwin defines the size for the file mode + * as 2 bytes; BSM defines 4 so pad with 0 + */ + ADD_U_INT16(dptr, pad0_16); + ADD_U_INT16(dptr, vni->vn_mode); + + ADD_U_INT32(dptr, vni->vn_uid); + ADD_U_INT32(dptr, vni->vn_gid); + ADD_U_INT32(dptr, vni->vn_fsid); + + /* + * Some systems use 32-bit file ID's, others use 64-bit file IDs. + * Attempt to handle both, and let the compiler sort it out. If we + * could pick this out at compile-time, it would be better, so as to + * avoid the else case below. + */ + if (sizeof(vni->vn_fileid) == sizeof(uint32_t)) { + ADD_U_INT32(dptr, pad0_32); + ADD_U_INT32(dptr, vni->vn_fileid); + } else if (sizeof(vni->vn_fileid) == sizeof(uint64_t)) + ADD_U_INT64(dptr, vni->vn_fileid); + else + ADD_U_INT64(dptr, 0LL); + + ADD_U_INT32(dptr, vni->vn_dev); + + return (t); +} + +token_t * +au_to_attr64(struct vnode_au_info *vni) +{ + token_t *t; + u_char *dptr = NULL; + u_int16_t pad0_16 = 0; + u_int16_t pad0_32 = 0; + + GET_TOKEN_AREA(t, dptr, sizeof(u_char) + 2 * sizeof(u_int16_t) + + 3 * sizeof(u_int32_t) + sizeof(u_int64_t) * 2); + + ADD_U_CHAR(dptr, AUT_ATTR64); + + /* + * Darwin defines the size for the file mode + * as 2 bytes; BSM defines 4 so pad with 0 + */ + ADD_U_INT16(dptr, pad0_16); + ADD_U_INT16(dptr, vni->vn_mode); + + ADD_U_INT32(dptr, vni->vn_uid); + ADD_U_INT32(dptr, vni->vn_gid); + ADD_U_INT32(dptr, vni->vn_fsid); + + /* + * Some systems use 32-bit file ID's, other's use 64-bit file IDs. + * Attempt to handle both, and let the compiler sort it out. If we + * could pick this out at compile-time, it would be better, so as to + * avoid the else case below. + */ + if (sizeof(vni->vn_fileid) == sizeof(uint32_t)) { + ADD_U_INT32(dptr, pad0_32); + ADD_U_INT32(dptr, vni->vn_fileid); + } else if (sizeof(vni->vn_fileid) == sizeof(uint64_t)) + ADD_U_INT64(dptr, vni->vn_fileid); + else + ADD_U_INT64(dptr, 0LL); + + ADD_U_INT64(dptr, vni->vn_dev); + + return (t); +} + +token_t * +au_to_attr(struct vnode_au_info *vni) +{ + + return (au_to_attr32(vni)); +} +#endif /* defined(_KERNEL) || defined(KERNEL) */ + +/* + * token ID 1 byte + * how to print 1 byte + * basic unit 1 byte + * unit count 1 byte + * data items (depends on basic unit) + */ +token_t * +au_to_data(char unit_print, char unit_type, char unit_count, const char *p) +{ + token_t *t; + u_char *dptr = NULL; + size_t datasize, totdata; + + /* Determine the size of the basic unit. */ + switch (unit_type) { + case AUR_BYTE: + /* case AUR_CHAR: */ + datasize = AUR_BYTE_SIZE; + break; + + case AUR_SHORT: + datasize = AUR_SHORT_SIZE; + break; + + case AUR_INT32: + /* case AUR_INT: */ + datasize = AUR_INT32_SIZE; + break; + + case AUR_INT64: + datasize = AUR_INT64_SIZE; + break; + + default: + /* For unknown assume byte. */ + datasize = AUR_BYTE_SIZE; + break; + } + + totdata = datasize * (size_t)unit_count; + + GET_TOKEN_AREA(t, dptr, 4 * sizeof(u_char) + totdata); + + ADD_U_CHAR(dptr, AUT_DATA); + ADD_U_CHAR(dptr, unit_print); + ADD_U_CHAR(dptr, unit_type); + ADD_U_CHAR(dptr, unit_count); + ADD_MEM(dptr, p, totdata); + + return (t); +} + +/* + * token ID 1 byte + * status 4 bytes + * return value 4 bytes + */ +token_t * +au_to_exit(int retval, int err) +{ + token_t *t; + u_char *dptr = NULL; + + GET_TOKEN_AREA(t, dptr, sizeof(u_char) + 2 * sizeof(u_int32_t)); + + ADD_U_CHAR(dptr, AUT_EXIT); + ADD_U_INT32(dptr, err); + ADD_U_INT32(dptr, retval); + + return (t); +} + +/* + */ +token_t * +au_to_groups(int *groups) +{ + + return (au_to_newgroups(AUDIT_MAX_GROUPS, (gid_t *)groups)); +} + +/* + * token ID 1 byte + * number groups 2 bytes + * group list count * 4 bytes + */ +token_t * +au_to_newgroups(u_int16_t n, gid_t *groups) +{ + token_t *t; + u_char *dptr = NULL; + int i; + + GET_TOKEN_AREA(t, dptr, sizeof(u_char) + sizeof(u_int16_t) + + n * sizeof(u_int32_t)); + + ADD_U_CHAR(dptr, AUT_NEWGROUPS); + ADD_U_INT16(dptr, n); + for (i = 0; i < n; i++) + ADD_U_INT32(dptr, groups[i]); + + return (t); +} + +/* + * token ID 1 byte + * internet address 4 bytes + */ +token_t * +au_to_in_addr(struct in_addr *internet_addr) +{ + token_t *t; + u_char *dptr = NULL; + + GET_TOKEN_AREA(t, dptr, sizeof(u_char) + sizeof(uint32_t)); + + ADD_U_CHAR(dptr, AUT_IN_ADDR); + ADD_MEM(dptr, &internet_addr->s_addr, sizeof(uint32_t)); + + return (t); +} + +/* + * token ID 1 byte + * address type/length 4 bytes + * address 16 bytes + */ +token_t * +au_to_in_addr_ex(struct in6_addr *internet_addr) +{ + token_t *t; + u_char *dptr = NULL; + u_int32_t type = AU_IPv6; + + GET_TOKEN_AREA(t, dptr, sizeof(u_char) + 5 * sizeof(uint32_t)); + + ADD_U_CHAR(dptr, AUT_IN_ADDR_EX); + ADD_U_INT32(dptr, type); + ADD_MEM(dptr, internet_addr, 4 * sizeof(uint32_t)); + + return (t); +} + +/* + * token ID 1 byte + * ip header 20 bytes + * + * The IP header should be submitted in network byte order. + */ +token_t * +au_to_ip(struct ip *ip) +{ + token_t *t; + u_char *dptr = NULL; + + GET_TOKEN_AREA(t, dptr, sizeof(u_char) + sizeof(struct ip)); + + ADD_U_CHAR(dptr, AUT_IP); + ADD_MEM(dptr, ip, sizeof(struct ip)); + + return (t); +} + +/* + * token ID 1 byte + * object ID type 1 byte + * object ID 4 bytes + */ +token_t * +au_to_ipc(char type, int id) +{ + token_t *t; + u_char *dptr = NULL; + + GET_TOKEN_AREA(t, dptr, 2 * sizeof(u_char) + sizeof(u_int32_t)); + + ADD_U_CHAR(dptr, AUT_IPC); + ADD_U_CHAR(dptr, type); + ADD_U_INT32(dptr, id); + + return (t); +} + +/* + * token ID 1 byte + * owner user ID 4 bytes + * owner group ID 4 bytes + * creator user ID 4 bytes + * creator group ID 4 bytes + * access mode 4 bytes + * slot sequence # 4 bytes + * key 4 bytes + */ +token_t * +au_to_ipc_perm(struct ipc_perm *perm) +{ + token_t *t; + u_char *dptr = NULL; + u_int16_t pad0 = 0; + + if (perm == NULL) + return NULL; + + GET_TOKEN_AREA(t, dptr, sizeof(u_char) + 12 * sizeof(u_int16_t) + + sizeof(u_int32_t)); + + ADD_U_CHAR(dptr, AUT_IPC_PERM); + + /* + * Darwin defines the size for the file mode + * as 2 bytes; BSM defines 4 so pad with 0 + */ + ADD_U_INT32(dptr, perm->uid); + ADD_U_INT32(dptr, perm->gid); + ADD_U_INT32(dptr, perm->cuid); + ADD_U_INT32(dptr, perm->cgid); + + ADD_U_INT16(dptr, pad0); + ADD_U_INT16(dptr, perm->mode); + + ADD_U_INT16(dptr, pad0); + ADD_U_INT16(dptr, perm->_seq); + + ADD_U_INT16(dptr, pad0); + ADD_U_INT16(dptr, perm->_key); + + return (t); +} + +/* + * token ID 1 byte + * port IP address 2 bytes + */ +token_t * +au_to_iport(u_int16_t iport) +{ + token_t *t; + u_char *dptr = NULL; + + GET_TOKEN_AREA(t, dptr, sizeof(u_char) + sizeof(u_int16_t)); + + ADD_U_CHAR(dptr, AUT_IPORT); + ADD_U_INT16(dptr, iport); + + return (t); +} + +/* + * token ID 1 byte + * size 2 bytes + * data size bytes + */ +token_t * +au_to_opaque(const char *data, uint16_t bytes) +{ + token_t *t; + u_char *dptr = NULL; + + GET_TOKEN_AREA(t, dptr, sizeof(u_char) + sizeof(u_int16_t) + bytes); + + ADD_U_CHAR(dptr, AUT_OPAQUE); + ADD_U_INT16(dptr, bytes); + ADD_MEM(dptr, data, bytes); + + return (t); +} + +/* + * token ID 1 byte + * seconds of time 4 bytes + * milliseconds of time 4 bytes + * file name len 2 bytes + * file pathname N bytes + 1 terminating NULL byte + */ +token_t * +au_to_file(const char *file, struct timeval tm) +{ + token_t *t; + u_char *dptr = NULL; + u_int16_t filelen; + u_int32_t timems; + + filelen = strlen(file); + filelen += 1; + + GET_TOKEN_AREA(t, dptr, sizeof(u_char) + 2 * sizeof(u_int32_t) + + sizeof(u_int16_t) + filelen); + + timems = tm.tv_usec/1000; + + ADD_U_CHAR(dptr, AUT_OTHER_FILE32); + ADD_U_INT32(dptr, tm.tv_sec); + ADD_U_INT32(dptr, timems); /* We need time in ms. */ + ADD_U_INT16(dptr, filelen); + ADD_STRING(dptr, file, filelen); + + return (t); +} + +/* + * token ID 1 byte + * text length 2 bytes + * text N bytes + 1 terminating NULL byte + */ +token_t * +au_to_text(const char *text) +{ + token_t *t; + u_char *dptr = NULL; + u_int16_t textlen; + + textlen = strlen(text); + textlen += 1; + + GET_TOKEN_AREA(t, dptr, sizeof(u_char) + sizeof(u_int16_t) + textlen); + + ADD_U_CHAR(dptr, AUT_TEXT); + ADD_U_INT16(dptr, textlen); + ADD_STRING(dptr, text, textlen); + + return (t); +} + +/* + * token ID 1 byte + * path length 2 bytes + * path N bytes + 1 terminating NULL byte + */ +token_t * +au_to_path(const char *text) +{ + token_t *t; + u_char *dptr = NULL; + u_int16_t textlen; + + textlen = strlen(text); + textlen += 1; + + GET_TOKEN_AREA(t, dptr, sizeof(u_char) + sizeof(u_int16_t) + textlen); + + ADD_U_CHAR(dptr, AUT_PATH); + ADD_U_INT16(dptr, textlen); + ADD_STRING(dptr, text, textlen); + + return (t); +} + +/* + * token ID 1 byte + * audit ID 4 bytes + * effective user ID 4 bytes + * effective group ID 4 bytes + * real user ID 4 bytes + * real group ID 4 bytes + * process ID 4 bytes + * session ID 4 bytes + * terminal ID + * port ID 4 bytes/8 bytes (32-bit/64-bit value) + * machine address 4 bytes + */ +token_t * +au_to_process32(au_id_t auid, uid_t euid, gid_t egid, uid_t ruid, gid_t rgid, + pid_t pid, au_asid_t sid, au_tid_t *tid) +{ + token_t *t; + u_char *dptr = NULL; + + GET_TOKEN_AREA(t, dptr, sizeof(u_char) + 9 * sizeof(u_int32_t)); + + ADD_U_CHAR(dptr, AUT_PROCESS32); + ADD_U_INT32(dptr, auid); + ADD_U_INT32(dptr, euid); + ADD_U_INT32(dptr, egid); + ADD_U_INT32(dptr, ruid); + ADD_U_INT32(dptr, rgid); + ADD_U_INT32(dptr, pid); + ADD_U_INT32(dptr, sid); + ADD_U_INT32(dptr, tid->port); + ADD_MEM(dptr, &tid->machine, sizeof(u_int32_t)); + + return (t); +} + +token_t * +au_to_process64(au_id_t auid, uid_t euid, gid_t egid, uid_t ruid, gid_t rgid, + pid_t pid, au_asid_t sid, au_tid_t *tid) +{ + token_t *t; + u_char *dptr = NULL; + + GET_TOKEN_AREA(t, dptr, sizeof(u_char) + 8 * sizeof(u_int32_t) + + sizeof(u_int64_t)); + + ADD_U_CHAR(dptr, AUT_PROCESS64); + ADD_U_INT32(dptr, auid); + ADD_U_INT32(dptr, euid); + ADD_U_INT32(dptr, egid); + ADD_U_INT32(dptr, ruid); + ADD_U_INT32(dptr, rgid); + ADD_U_INT32(dptr, pid); + ADD_U_INT32(dptr, sid); + ADD_U_INT64(dptr, tid->port); + ADD_MEM(dptr, &tid->machine, sizeof(u_int32_t)); + + return (t); +} + +token_t * +au_to_process(au_id_t auid, uid_t euid, gid_t egid, uid_t ruid, gid_t rgid, + pid_t pid, au_asid_t sid, au_tid_t *tid) +{ + + return (au_to_process32(auid, euid, egid, ruid, rgid, pid, sid, + tid)); +} + +/* + * token ID 1 byte + * audit ID 4 bytes + * effective user ID 4 bytes + * effective group ID 4 bytes + * real user ID 4 bytes + * real group ID 4 bytes + * process ID 4 bytes + * session ID 4 bytes + * terminal ID + * port ID 4 bytes/8 bytes (32-bit/64-bit value) + * address type-len 4 bytes + * machine address 4/16 bytes + */ +token_t * +au_to_process32_ex(au_id_t auid, uid_t euid, gid_t egid, uid_t ruid, + gid_t rgid, pid_t pid, au_asid_t sid, au_tid_addr_t *tid) +{ + token_t *t; + u_char *dptr = NULL; + + KASSERT((tid->at_type == AU_IPv4) || (tid->at_type == AU_IPv6), + ("au_to_process32_ex: type %u", (unsigned int)tid->at_type)); + if (tid->at_type == AU_IPv6) + GET_TOKEN_AREA(t, dptr, sizeof(u_char) + 13 * + sizeof(u_int32_t)); + else + GET_TOKEN_AREA(t, dptr, sizeof(u_char) + 10 * + sizeof(u_int32_t)); + + ADD_U_CHAR(dptr, AUT_PROCESS32_EX); + ADD_U_INT32(dptr, auid); + ADD_U_INT32(dptr, euid); + ADD_U_INT32(dptr, egid); + ADD_U_INT32(dptr, ruid); + ADD_U_INT32(dptr, rgid); + ADD_U_INT32(dptr, pid); + ADD_U_INT32(dptr, sid); + ADD_U_INT32(dptr, tid->at_port); + ADD_U_INT32(dptr, tid->at_type); + if (tid->at_type == AU_IPv6) + ADD_MEM(dptr, &tid->at_addr[0], 4 * sizeof(u_int32_t)); + else + ADD_MEM(dptr, &tid->at_addr[0], sizeof(u_int32_t)); + + return (t); +} + +token_t * +au_to_process64_ex(au_id_t auid, uid_t euid, gid_t egid, uid_t ruid, + gid_t rgid, pid_t pid, au_asid_t sid, au_tid_addr_t *tid) +{ + token_t *t = NULL; + u_char *dptr = NULL; + + if (tid->at_type == AU_IPv4) + GET_TOKEN_AREA(t, dptr, sizeof(u_char) + + 7 * sizeof(u_int32_t) + sizeof(u_int64_t) + + 2 * sizeof(u_int32_t)); + else if (tid->at_type == AU_IPv6) + GET_TOKEN_AREA(t, dptr, sizeof(u_char) + + 7 * sizeof(u_int32_t) + sizeof(u_int64_t) + + 5 * sizeof(u_int32_t)); + else + panic("au_to_process64_ex: invalidate at_type (%d)", + tid->at_type); + + ADD_U_CHAR(dptr, AUT_PROCESS64_EX); + ADD_U_INT32(dptr, auid); + ADD_U_INT32(dptr, euid); + ADD_U_INT32(dptr, egid); + ADD_U_INT32(dptr, ruid); + ADD_U_INT32(dptr, rgid); + ADD_U_INT32(dptr, pid); + ADD_U_INT32(dptr, sid); + ADD_U_INT64(dptr, tid->at_port); + ADD_U_INT32(dptr, tid->at_type); + ADD_MEM(dptr, &tid->at_addr[0], sizeof(u_int32_t)); + if (tid->at_type == AU_IPv6) { + ADD_MEM(dptr, &tid->at_addr[1], sizeof(u_int32_t)); + ADD_MEM(dptr, &tid->at_addr[2], sizeof(u_int32_t)); + ADD_MEM(dptr, &tid->at_addr[3], sizeof(u_int32_t)); + } + + return (t); +} + +token_t * +au_to_process_ex(au_id_t auid, uid_t euid, gid_t egid, uid_t ruid, + gid_t rgid, pid_t pid, au_asid_t sid, au_tid_addr_t *tid) +{ + + return (au_to_process32_ex(auid, euid, egid, ruid, rgid, pid, sid, + tid)); +} + +/* + * token ID 1 byte + * error status 1 byte + * return value 4 bytes/8 bytes (32-bit/64-bit value) + */ +token_t * +au_to_return32(char status, u_int32_t ret) +{ + token_t *t; + u_char *dptr = NULL; + + GET_TOKEN_AREA(t, dptr, 2 * sizeof(u_char) + sizeof(u_int32_t)); + + ADD_U_CHAR(dptr, AUT_RETURN32); + ADD_U_CHAR(dptr, status); + ADD_U_INT32(dptr, ret); + + return (t); +} + +token_t * +au_to_return64(char status, u_int64_t ret) +{ + token_t *t; + u_char *dptr = NULL; + + GET_TOKEN_AREA(t, dptr, 2 * sizeof(u_char) + sizeof(u_int64_t)); + + ADD_U_CHAR(dptr, AUT_RETURN64); + ADD_U_CHAR(dptr, status); + ADD_U_INT64(dptr, ret); + + return (t); +} + +token_t * +au_to_return(char status, u_int32_t ret) +{ + + return (au_to_return32(status, ret)); +} + +/* + * token ID 1 byte + * sequence number 4 bytes + */ +token_t * +au_to_seq(long audit_count) +{ + token_t *t; + u_char *dptr = NULL; + + GET_TOKEN_AREA(t, dptr, sizeof(u_char) + sizeof(u_int32_t)); + + ADD_U_CHAR(dptr, AUT_SEQ); + ADD_U_INT32(dptr, (u_int32_t) audit_count); + + return (t); +} + +/* + * token ID 1 byte + * socket domain 2 bytes + * socket type 2 bytes + * address type 2 bytes + * local port 2 bytes + * local address 4 bytes/16 bytes (IPv4/IPv6 address) + * remote port 2 bytes + * remote address 4 bytes/16 bytes (IPv4/IPv6 address) + */ +token_t * +au_to_socket_ex(u_short so_domain, u_short so_type, + struct sockaddr *sa_local, struct sockaddr *sa_remote) +{ + token_t *t; + u_char *dptr = NULL; + struct sockaddr_in *sin; + struct sockaddr_in6 *sin6; + + if (so_domain == AF_INET) + GET_TOKEN_AREA(t, dptr, sizeof(u_char) + + 5 * sizeof(u_int16_t) + 2 * sizeof(u_int32_t)); + else if (so_domain == AF_INET6) + GET_TOKEN_AREA(t, dptr, sizeof(u_char) + + 5 * sizeof(u_int16_t) + 8 * sizeof(u_int32_t)); + else + return (NULL); + + ADD_U_CHAR(dptr, AUT_SOCKET_EX); + ADD_U_INT16(dptr, au_domain_to_bsm(so_domain)); + ADD_U_INT16(dptr, au_socket_type_to_bsm(so_type)); + if (so_domain == AF_INET) { + ADD_U_INT16(dptr, AU_IPv4); + sin = (struct sockaddr_in *)sa_local; + ADD_MEM(dptr, &sin->sin_port, sizeof(uint16_t)); + ADD_MEM(dptr, &sin->sin_addr.s_addr, sizeof(uint32_t)); + sin = (struct sockaddr_in *)sa_remote; + ADD_MEM(dptr, &sin->sin_port, sizeof(uint16_t)); + ADD_MEM(dptr, &sin->sin_addr.s_addr, sizeof(uint32_t)); + } else /* if (so_domain == AF_INET6) */ { + ADD_U_INT16(dptr, AU_IPv6); + sin6 = (struct sockaddr_in6 *)sa_local; + ADD_MEM(dptr, &sin6->sin6_port, sizeof(uint16_t)); + ADD_MEM(dptr, &sin6->sin6_addr, 4 * sizeof(uint32_t)); + sin6 = (struct sockaddr_in6 *)sa_remote; + ADD_MEM(dptr, &sin6->sin6_port, sizeof(uint16_t)); + ADD_MEM(dptr, &sin6->sin6_addr, 4 * sizeof(uint32_t)); + } + + return (t); +} + +/* + * token ID 1 byte + * socket family 2 bytes + * path (up to) 104 bytes + NULL + */ +token_t * +au_to_sock_unix(struct sockaddr_un *so) +{ + token_t *t; + u_char *dptr; + size_t slen; + + /* + * Please note that sun_len may not be correctly set and sun_path may + * not be NULL terminated. + */ + if (so->sun_len >= offsetof(struct sockaddr_un, sun_path)) + slen = min(so->sun_len - offsetof(struct sockaddr_un, sun_path), + strnlen(so->sun_path, sizeof(so->sun_path))); + else + slen = strnlen(so->sun_path, sizeof(so->sun_path)); + + GET_TOKEN_AREA(t, dptr, 3 * sizeof(u_char) + slen + 1); + + ADD_U_CHAR(dptr, AUT_SOCKUNIX); + /* BSM token has two bytes for family */ + ADD_U_CHAR(dptr, 0); + ADD_U_CHAR(dptr, so->sun_family); + if (slen) + ADD_MEM(dptr, so->sun_path, slen); + ADD_U_CHAR(dptr, '\0'); /* make the path a null-terminated string */ + + return (t); +} + +/* + * token ID 1 byte + * socket family 2 bytes + * local port 2 bytes + * socket address 4 bytes + */ +token_t * +au_to_sock_inet32(struct sockaddr_in *so) +{ + token_t *t; + u_char *dptr = NULL; + + GET_TOKEN_AREA(t, dptr, sizeof(u_char) + 2 * sizeof(uint16_t) + + sizeof(uint32_t)); + + ADD_U_CHAR(dptr, AUT_SOCKINET32); + /* + * Convert sin_family to the BSM value. Assume that both the port and + * the address in the sockaddr_in are already in network byte order, + * but family is in local byte order. + */ + ADD_U_INT16(dptr, au_domain_to_bsm(so->sin_family)); + ADD_MEM(dptr, &so->sin_port, sizeof(uint16_t)); + ADD_MEM(dptr, &so->sin_addr.s_addr, sizeof(uint32_t)); + + return (t); +} + +/* + * token ID 1 byte + * socket family 2 bytes + * local port 2 bytes + * socket address 16 bytes + */ +token_t * +au_to_sock_inet128(struct sockaddr_in6 *so) +{ + token_t *t; + u_char *dptr = NULL; + + GET_TOKEN_AREA(t, dptr, sizeof(u_char) + 2 * sizeof(u_int16_t) + + 4 * sizeof(u_int32_t)); + + ADD_U_CHAR(dptr, AUT_SOCKINET128); + ADD_U_INT16(dptr, au_domain_to_bsm(so->sin6_family)); + + ADD_U_INT16(dptr, so->sin6_port); + ADD_MEM(dptr, &so->sin6_addr, 4 * sizeof(uint32_t)); + + return (t); +} + +token_t * +au_to_sock_inet(struct sockaddr_in *so) +{ + + return (au_to_sock_inet32(so)); +} + +/* + * token ID 1 byte + * audit ID 4 bytes + * effective user ID 4 bytes + * effective group ID 4 bytes + * real user ID 4 bytes + * real group ID 4 bytes + * process ID 4 bytes + * session ID 4 bytes + * terminal ID + * port ID 4 bytes/8 bytes (32-bit/64-bit value) + * machine address 4 bytes + */ +token_t * +au_to_subject32(au_id_t auid, uid_t euid, gid_t egid, uid_t ruid, gid_t rgid, + pid_t pid, au_asid_t sid, au_tid_t *tid) +{ + token_t *t; + u_char *dptr = NULL; + + GET_TOKEN_AREA(t, dptr, sizeof(u_char) + 9 * sizeof(u_int32_t)); + + ADD_U_CHAR(dptr, AUT_SUBJECT32); + ADD_U_INT32(dptr, auid); + ADD_U_INT32(dptr, euid); + ADD_U_INT32(dptr, egid); + ADD_U_INT32(dptr, ruid); + ADD_U_INT32(dptr, rgid); + ADD_U_INT32(dptr, pid); + ADD_U_INT32(dptr, sid); + ADD_U_INT32(dptr, tid->port); + ADD_MEM(dptr, &tid->machine, sizeof(u_int32_t)); + + return (t); +} + +token_t * +au_to_subject64(au_id_t auid, uid_t euid, gid_t egid, uid_t ruid, gid_t rgid, + pid_t pid, au_asid_t sid, au_tid_t *tid) +{ + token_t *t; + u_char *dptr = NULL; + + GET_TOKEN_AREA(t, dptr, sizeof(u_char) + 7 * sizeof(u_int32_t) + + sizeof(u_int64_t) + sizeof(u_int32_t)); + + ADD_U_CHAR(dptr, AUT_SUBJECT64); + ADD_U_INT32(dptr, auid); + ADD_U_INT32(dptr, euid); + ADD_U_INT32(dptr, egid); + ADD_U_INT32(dptr, ruid); + ADD_U_INT32(dptr, rgid); + ADD_U_INT32(dptr, pid); + ADD_U_INT32(dptr, sid); + ADD_U_INT64(dptr, tid->port); + ADD_MEM(dptr, &tid->machine, sizeof(u_int32_t)); + + return (t); +} + +token_t * +au_to_subject(au_id_t auid, uid_t euid, gid_t egid, uid_t ruid, gid_t rgid, + pid_t pid, au_asid_t sid, au_tid_t *tid) +{ + + return (au_to_subject32(auid, euid, egid, ruid, rgid, pid, sid, + tid)); +} + +/* + * token ID 1 byte + * audit ID 4 bytes + * effective user ID 4 bytes + * effective group ID 4 bytes + * real user ID 4 bytes + * real group ID 4 bytes + * process ID 4 bytes + * session ID 4 bytes + * terminal ID + * port ID 4 bytes/8 bytes (32-bit/64-bit value) + * address type/length 4 bytes + * machine address 4/16 bytes + */ +token_t * +au_to_subject32_ex(au_id_t auid, uid_t euid, gid_t egid, uid_t ruid, + gid_t rgid, pid_t pid, au_asid_t sid, au_tid_addr_t *tid) +{ + token_t *t; + u_char *dptr = NULL; + + KASSERT((tid->at_type == AU_IPv4) || (tid->at_type == AU_IPv6), + ("au_to_subject32_ex: type %u", (unsigned int)tid->at_type)); + if (tid->at_type == AU_IPv6) + GET_TOKEN_AREA(t, dptr, sizeof(u_char) + 13 * + sizeof(u_int32_t)); + else + GET_TOKEN_AREA(t, dptr, sizeof(u_char) + 10 * + sizeof(u_int32_t)); + + ADD_U_CHAR(dptr, AUT_SUBJECT32_EX); + ADD_U_INT32(dptr, auid); + ADD_U_INT32(dptr, euid); + ADD_U_INT32(dptr, egid); + ADD_U_INT32(dptr, ruid); + ADD_U_INT32(dptr, rgid); + ADD_U_INT32(dptr, pid); + ADD_U_INT32(dptr, sid); + ADD_U_INT32(dptr, tid->at_port); + ADD_U_INT32(dptr, tid->at_type); + if (tid->at_type == AU_IPv6) + ADD_MEM(dptr, &tid->at_addr[0], 4 * sizeof(u_int32_t)); + else + ADD_MEM(dptr, &tid->at_addr[0], sizeof(u_int32_t)); + + return (t); +} + +token_t * +au_to_subject64_ex(au_id_t auid, uid_t euid, gid_t egid, uid_t ruid, + gid_t rgid, pid_t pid, au_asid_t sid, au_tid_addr_t *tid) +{ + token_t *t = NULL; + u_char *dptr = NULL; + + if (tid->at_type == AU_IPv4) + GET_TOKEN_AREA(t, dptr, sizeof(u_char) + + 7 * sizeof(u_int32_t) + sizeof(u_int64_t) + + 2 * sizeof(u_int32_t)); + else if (tid->at_type == AU_IPv6) + GET_TOKEN_AREA(t, dptr, sizeof(u_char) + + 7 * sizeof(u_int32_t) + sizeof(u_int64_t) + + 5 * sizeof(u_int32_t)); + else + panic("au_to_subject64_ex: invalid at_type (%d)", + tid->at_type); + + ADD_U_CHAR(dptr, AUT_SUBJECT64_EX); + ADD_U_INT32(dptr, auid); + ADD_U_INT32(dptr, euid); + ADD_U_INT32(dptr, egid); + ADD_U_INT32(dptr, ruid); + ADD_U_INT32(dptr, rgid); + ADD_U_INT32(dptr, pid); + ADD_U_INT32(dptr, sid); + ADD_U_INT64(dptr, tid->at_port); + ADD_U_INT32(dptr, tid->at_type); + if (tid->at_type == AU_IPv6) + ADD_MEM(dptr, &tid->at_addr[0], 4 * sizeof(u_int32_t)); + else + ADD_MEM(dptr, &tid->at_addr[0], sizeof(u_int32_t)); + + return (t); +} + +token_t * +au_to_subject_ex(au_id_t auid, uid_t euid, gid_t egid, uid_t ruid, + gid_t rgid, pid_t pid, au_asid_t sid, au_tid_addr_t *tid) +{ + + return (au_to_subject32_ex(auid, euid, egid, ruid, rgid, pid, sid, + tid)); +} + +#if !defined(_KERNEL) && !defined(KERNEL) && defined(HAVE_AUDIT_SYSCALLS) +/* + * Collects audit information for the current process + * and creates a subject token from it + */ +token_t * +au_to_me(void) +{ + auditinfo_t auinfo; + + if (getaudit(&auinfo) != 0) + return (NULL); + + return (au_to_subject32(auinfo.ai_auid, geteuid(), getegid(), + getuid(), getgid(), getpid(), auinfo.ai_asid, &auinfo.ai_termid)); +} +#endif + +#if defined(_KERNEL) || defined(KERNEL) +static token_t * +au_to_exec_strings(const char *strs, int count, u_char type) +{ + token_t *t; + u_char *dptr = NULL; + u_int32_t totlen; + int ctr; + const char *p; + + totlen = 0; + ctr = count; + p = strs; + while (ctr-- > 0) { + totlen += strlen(p) + 1; + p = strs + totlen; + } + GET_TOKEN_AREA(t, dptr, sizeof(u_char) + sizeof(u_int32_t) + totlen); + ADD_U_CHAR(dptr, type); + ADD_U_INT32(dptr, count); + ADD_STRING(dptr, strs, totlen); + + return (t); +} + +/* + * token ID 1 byte + * count 4 bytes + * text count null-terminated strings + */ +token_t * +au_to_exec_args(char *args, int argc) +{ + + return (au_to_exec_strings(args, argc, AUT_EXEC_ARGS)); +} + +/* + * token ID 1 byte + * count 4 bytes + * text count null-terminated strings + */ +token_t * +au_to_exec_env(char *envs, int envc) +{ + + return (au_to_exec_strings(envs, envc, AUT_EXEC_ENV)); +} +#else +/* + * token ID 1 byte + * count 4 bytes + * text count null-terminated strings + */ +token_t * +au_to_exec_args(char **argv) +{ + token_t *t; + u_char *dptr = NULL; + const char *nextarg; + int i, count = 0; + size_t totlen = 0; + + nextarg = *argv; + + while (nextarg != NULL) { + int nextlen; + + nextlen = strlen(nextarg); + totlen += nextlen + 1; + count++; + nextarg = *(argv + count); + } + + totlen += count * sizeof(char); /* nul terminations. */ + GET_TOKEN_AREA(t, dptr, sizeof(u_char) + sizeof(u_int32_t) + totlen); + + ADD_U_CHAR(dptr, AUT_EXEC_ARGS); + ADD_U_INT32(dptr, count); + + for (i = 0; i < count; i++) { + nextarg = *(argv + i); + ADD_MEM(dptr, nextarg, strlen(nextarg) + 1); + } + + return (t); +} + +/* + * token ID 1 byte + * zonename length 2 bytes + * zonename N bytes + 1 terminating NULL byte + */ +token_t * +au_to_zonename(char *zonename) +{ + u_char *dptr = NULL; + u_int16_t textlen; + token_t *t; + + textlen = strlen(zonename); + textlen += 1; + GET_TOKEN_AREA(t, dptr, sizeof(u_char) + sizeof(u_int16_t) + textlen); + ADD_U_CHAR(dptr, AUT_ZONENAME); + ADD_U_INT16(dptr, textlen); + ADD_STRING(dptr, zonename, textlen); + return (t); +} + +/* + * token ID 1 byte + * count 4 bytes + * text count null-terminated strings + */ +token_t * +au_to_exec_env(char **envp) +{ + token_t *t; + u_char *dptr = NULL; + int i, count = 0; + size_t totlen = 0; + const char *nextenv; + + nextenv = *envp; + + while (nextenv != NULL) { + int nextlen; + + nextlen = strlen(nextenv); + totlen += nextlen + 1; + count++; + nextenv = *(envp + count); + } + + GET_TOKEN_AREA(t, dptr, sizeof(u_char) + sizeof(u_int32_t) + totlen); + + ADD_U_CHAR(dptr, AUT_EXEC_ENV); + ADD_U_INT32(dptr, count); + + for (i = 0; i < count; i++) { + nextenv = *(envp + i); + ADD_MEM(dptr, nextenv, strlen(nextenv) + 1); + } + + return (t); +} +#endif /* !(defined(_KERNEL) || defined(KERNEL)) */ + +/* + * token ID 1 byte + * record byte count 4 bytes + * version # 1 byte + * event type 2 bytes + * event modifier 2 bytes + * address type/length 4 bytes + * machine address 4 bytes/16 bytes (IPv4/IPv6 address) + * seconds of time 4 bytes/8 bytes (32/64-bits) + * milliseconds of time 4 bytes/8 bytes (32/64-bits) + */ +token_t * +au_to_header32_ex_tm(int rec_size, au_event_t e_type, au_emod_t e_mod, + struct timeval tm, struct auditinfo_addr *aia) +{ + token_t *t; + u_char *dptr = NULL; + u_int32_t timems; + struct au_tid_addr *tid; + + tid = &aia->ai_termid; + KASSERT(tid->at_type == AU_IPv4 || tid->at_type == AU_IPv6, + ("au_to_header32_ex_tm: invalid address family")); + + GET_TOKEN_AREA(t, dptr, sizeof(u_char) + sizeof(u_int32_t) + + sizeof(u_char) + 2 * sizeof(u_int16_t) + 3 * sizeof(u_int32_t) + + tid->at_type); + + ADD_U_CHAR(dptr, AUT_HEADER32_EX); + ADD_U_INT32(dptr, rec_size); + ADD_U_CHAR(dptr, AUDIT_HEADER_VERSION_OPENBSM); + ADD_U_INT16(dptr, e_type); + ADD_U_INT16(dptr, e_mod); + ADD_U_INT32(dptr, tid->at_type); + if (tid->at_type == AU_IPv6) + ADD_MEM(dptr, &tid->at_addr[0], 4 * sizeof(u_int32_t)); + else + ADD_MEM(dptr, &tid->at_addr[0], sizeof(u_int32_t)); + timems = tm.tv_usec / 1000; + /* Add the timestamp */ + ADD_U_INT32(dptr, tm.tv_sec); + ADD_U_INT32(dptr, timems); /* We need time in ms. */ + return (t); +} + +/* + * token ID 1 byte + * record byte count 4 bytes + * version # 1 byte [2] + * event type 2 bytes + * event modifier 2 bytes + * seconds of time 4 bytes/8 bytes (32-bit/64-bit value) + * milliseconds of time 4 bytes/8 bytes (32-bit/64-bit value) + */ +token_t * +au_to_header32_tm(int rec_size, au_event_t e_type, au_emod_t e_mod, + struct timeval tm) +{ + token_t *t; + u_char *dptr = NULL; + u_int32_t timems; + + GET_TOKEN_AREA(t, dptr, sizeof(u_char) + sizeof(u_int32_t) + + sizeof(u_char) + 2 * sizeof(u_int16_t) + 2 * sizeof(u_int32_t)); + + ADD_U_CHAR(dptr, AUT_HEADER32); + ADD_U_INT32(dptr, rec_size); + ADD_U_CHAR(dptr, AUDIT_HEADER_VERSION_OPENBSM); + ADD_U_INT16(dptr, e_type); + ADD_U_INT16(dptr, e_mod); + + timems = tm.tv_usec/1000; + /* Add the timestamp */ + ADD_U_INT32(dptr, tm.tv_sec); + ADD_U_INT32(dptr, timems); /* We need time in ms. */ + + return (t); +} + +token_t * +au_to_header64_tm(int rec_size, au_event_t e_type, au_emod_t e_mod, + struct timeval tm) +{ + token_t *t; + u_char *dptr = NULL; + u_int32_t timems; + + GET_TOKEN_AREA(t, dptr, sizeof(u_char) + sizeof(u_int32_t) + + sizeof(u_char) + 2 * sizeof(u_int16_t) + 2 * sizeof(u_int64_t)); + + ADD_U_CHAR(dptr, AUT_HEADER64); + ADD_U_INT32(dptr, rec_size); + ADD_U_CHAR(dptr, AUDIT_HEADER_VERSION_OPENBSM); + ADD_U_INT16(dptr, e_type); + ADD_U_INT16(dptr, e_mod); + + timems = tm.tv_usec/1000; + /* Add the timestamp */ + ADD_U_INT64(dptr, tm.tv_sec); + ADD_U_INT64(dptr, timems); /* We need time in ms. */ + + return (t); +} + +/* + * token ID 1 byte + * trailer magic number 2 bytes + * record byte count 4 bytes + */ +token_t * +au_to_trailer(int rec_size) +{ + token_t *t; + u_char *dptr = NULL; + u_int16_t magic = AUT_TRAILER_MAGIC; + + GET_TOKEN_AREA(t, dptr, sizeof(u_char) + sizeof(u_int16_t) + + sizeof(u_int32_t)); + + ADD_U_CHAR(dptr, AUT_TRAILER); + ADD_U_INT16(dptr, magic); + ADD_U_INT32(dptr, rec_size); + + return (t); +} +#endif /* CONFIG_AUDIT */ diff --git a/bsd/security/audit/audit_ioctl.h b/bsd/security/audit/audit_ioctl.h new file mode 100644 index 000000000..806f8ae93 --- /dev/null +++ b/bsd/security/audit/audit_ioctl.h @@ -0,0 +1,82 @@ +/*- + * Copyright (c) 2006 Robert N. M. Watson + * Copyright (c) 2008 Apple, Inc. + * All rights reserved. + * + * This software was developed by Robert Watson for the TrustedBSD Project. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#ifndef _SECURITY_AUDIT_AUDIT_IOCTL_H_ +#define _SECURITY_AUDIT_AUDIT_IOCTL_H_ + +#define AUDITPIPE_IOBASE 'A' + +/* + * Data structures used for complex ioctl arguments. Do not change existing + * structures, add new revised ones to be used by new ioctls, and keep the + * old structures and ioctls for backwards compatibility. + */ +struct auditpipe_ioctl_preselect { + au_id_t aip_auid; + au_mask_t aip_mask; +}; + +/* + * Possible modes of operation for audit pipe preselection. + */ +#define AUDITPIPE_PRESELECT_MODE_TRAIL 1 /* Global audit trail. */ +#define AUDITPIPE_PRESELECT_MODE_LOCAL 2 /* Local audit trail. */ + +/* + * Ioctls to read and control the behavior of individual audit pipe devices. + */ +#define AUDITPIPE_GET_QLEN _IOR(AUDITPIPE_IOBASE, 1, u_int) +#define AUDITPIPE_GET_QLIMIT _IOR(AUDITPIPE_IOBASE, 2, u_int) +#define AUDITPIPE_SET_QLIMIT _IOW(AUDITPIPE_IOBASE, 3, u_int) +#define AUDITPIPE_GET_QLIMIT_MIN _IOR(AUDITPIPE_IOBASE, 4, u_int) +#define AUDITPIPE_GET_QLIMIT_MAX _IOR(AUDITPIPE_IOBASE, 5, u_int) +#define AUDITPIPE_GET_PRESELECT_FLAGS _IOR(AUDITPIPE_IOBASE, 6, au_mask_t) +#define AUDITPIPE_SET_PRESELECT_FLAGS _IOW(AUDITPIPE_IOBASE, 7, au_mask_t) +#define AUDITPIPE_GET_PRESELECT_NAFLAGS _IOR(AUDITPIPE_IOBASE, 8, au_mask_t) +#define AUDITPIPE_SET_PRESELECT_NAFLAGS _IOW(AUDITPIPE_IOBASE, 9, au_mask_t) +#define AUDITPIPE_GET_PRESELECT_AUID _IOR(AUDITPIPE_IOBASE, 10, \ + struct auditpipe_ioctl_preselect) +#define AUDITPIPE_SET_PRESELECT_AUID _IOW(AUDITPIPE_IOBASE, 11, \ + struct auditpipe_ioctl_preselect) +#define AUDITPIPE_DELETE_PRESELECT_AUID _IOW(AUDITPIPE_IOBASE, 12, au_id_t) +#define AUDITPIPE_FLUSH_PRESELECT_AUID _IO(AUDITPIPE_IOBASE, 13) +#define AUDITPIPE_GET_PRESELECT_MODE _IOR(AUDITPIPE_IOBASE, 14, int) +#define AUDITPIPE_SET_PRESELECT_MODE _IOW(AUDITPIPE_IOBASE, 15, int) +#define AUDITPIPE_FLUSH _IO(AUDITPIPE_IOBASE, 16) +#define AUDITPIPE_GET_MAXAUDITDATA _IOR(AUDITPIPE_IOBASE, 17, u_int) + +/* + * Ioctls to retrieve audit pipe statistics. + */ +#define AUDITPIPE_GET_INSERTS _IOR(AUDITPIPE_IOBASE, 100, u_int64_t) +#define AUDITPIPE_GET_READS _IOR(AUDITPIPE_IOBASE, 101, u_int64_t) +#define AUDITPIPE_GET_DROPS _IOR(AUDITPIPE_IOBASE, 102, u_int64_t) +#define AUDITPIPE_GET_TRUNCATES _IOR(AUDITPIPE_IOBASE, 103, u_int64_t) + +#endif /* _SECURITY_AUDIT_AUDIT_IOCTL_H_ */ diff --git a/bsd/security/audit/audit_mac.c b/bsd/security/audit/audit_mac.c new file mode 100644 index 000000000..edddd58a7 --- /dev/null +++ b/bsd/security/audit/audit_mac.c @@ -0,0 +1,261 @@ +/*- + * Copyright (c) 1999-2008 Apple Inc. + * All rights reserved. + * + * @APPLE_BSD_LICENSE_HEADER_START@ + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of Apple Inc. ("Apple") nor the names of + * its contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY APPLE AND ITS CONTRIBUTORS "AS IS" AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL APPLE OR ITS CONTRIBUTORS BE LIABLE FOR + * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING + * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + * @APPLE_BSD_LICENSE_HEADER_END@ + */ +/* + * NOTICE: This file was modified by McAfee Research in 2004 to introduce + * support for mandatory and extensible security protections. This notice + * is included in support of clause 2.2 (b) of the Apple Public License, + * Version 2.0. + */ + +#include +#include +#include +#include +#include + +#include +#include +#include + +#include +#include + +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +#if CONFIG_AUDIT + +#if CONFIG_MACF +#include +#include +#include +#include +#define MAC_ARG_PREFIX "arg: " +#define MAC_ARG_PREFIX_LEN 5 + +zone_t audit_mac_label_zone; +extern zone_t mac_audit_data_zone; + +void +audit_mac_init(void) +{ + /* Assume 3 MAC labels for each audit record: two for vnodes, + * one for creds. + */ + audit_mac_label_zone = zinit(MAC_AUDIT_LABEL_LEN, + AQ_HIWATER * 3*MAC_AUDIT_LABEL_LEN, 8192, "audit_mac_label_zone"); +} + +int +audit_mac_new(proc_t p, struct kaudit_record *ar) +{ + struct mac mac; + + /* + * Retrieve the MAC labels for the process. + */ + ar->k_ar.ar_cred_mac_labels = (char *)zalloc(audit_mac_label_zone); + if (ar->k_ar.ar_cred_mac_labels == NULL) + return (1); + mac.m_buflen = MAC_AUDIT_LABEL_LEN; + mac.m_string = ar->k_ar.ar_cred_mac_labels; + mac_cred_label_externalize_audit(p, &mac); + + /* + * grab space for the reconds. + */ + ar->k_ar.ar_mac_records = (struct mac_audit_record_list_t *) + kalloc(sizeof(*ar->k_ar.ar_mac_records)); + if (ar->k_ar.ar_mac_records == NULL) { + zfree(audit_mac_label_zone, ar->k_ar.ar_cred_mac_labels); + return (1); + } + LIST_INIT(ar->k_ar.ar_mac_records); + ar->k_ar.ar_forced_by_mac = 0; + + return (0); +} + +void +audit_mac_free(struct kaudit_record *ar) +{ + struct mac_audit_record *head, *next; + + if (ar->k_ar.ar_vnode1_mac_labels != NULL) + zfree(audit_mac_label_zone, ar->k_ar.ar_vnode1_mac_labels); + if (ar->k_ar.ar_vnode2_mac_labels != NULL) + zfree(audit_mac_label_zone, ar->k_ar.ar_vnode2_mac_labels); + if (ar->k_ar.ar_cred_mac_labels != NULL) + zfree(audit_mac_label_zone, ar->k_ar.ar_cred_mac_labels); + if (ar->k_ar.ar_arg_mac_string != NULL) + kfree(ar->k_ar.ar_arg_mac_string, + MAC_MAX_LABEL_BUF_LEN + MAC_ARG_PREFIX_LEN); + + /* + * Free the audit data from the MAC policies. + */ + head = LIST_FIRST(ar->k_ar.ar_mac_records); + while (head != NULL) { + next = LIST_NEXT(head, records); + zfree(mac_audit_data_zone, head->data); + kfree(head, sizeof(*head)); + head = next; + } + kfree(ar->k_ar.ar_mac_records, sizeof(*ar->k_ar.ar_mac_records)); +} + +int +audit_mac_syscall_enter(unsigned short code, proc_t p, struct uthread *uthread, + kauth_cred_t my_cred, au_event_t event) +{ + int error; + + error = mac_audit_check_preselect(my_cred, code, + (void *)uthread->uu_arg); + if (error == MAC_AUDIT_YES) { + uthread->uu_ar = audit_new(event, p, uthread); + uthread->uu_ar->k_ar.ar_forced_by_mac = 1; + au_to_text("Forced by a MAC policy"); + return (1); + } else if (error == MAC_AUDIT_NO) { + return (0); + } else if (error == MAC_AUDIT_DEFAULT) { + return (1); + } + + return (0); +} + +int +audit_mac_syscall_exit(unsigned short code, struct uthread *uthread, int error, + int retval) +{ + int mac_error; + + if (uthread->uu_ar == NULL) /* syscall wasn't audited */ + return (1); + + /* + * Note, no other postselect mechanism exists. If + * mac_audit_check_postselect returns MAC_AUDIT_NO, the record will be + * suppressed. Other values at this point result in the audit record + * being committed. This suppression behavior will probably go away in + * the port to 10.3.4. + */ + mac_error = mac_audit_check_postselect(kauth_cred_get(), code, + (void *) uthread->uu_arg, error, retval, + uthread->uu_ar->k_ar.ar_forced_by_mac); + + if (mac_error == MAC_AUDIT_YES) + uthread->uu_ar->k_ar_commit |= AR_COMMIT_KERNEL; + else if (mac_error == MAC_AUDIT_NO) { + audit_free(uthread->uu_ar); + return (1); + } + return (0); +} + +/* + * This function is called by the MAC Framework to add audit data + * from a policy to the current audit record. + */ +int +audit_mac_data(int type, int len, u_char *data) { + struct kaudit_record *cur; + struct mac_audit_record *record; + + if (audit_enabled == 0) { + kfree(data, len); + return (ENOTSUP); + } + + cur = currecord(); + if (cur == NULL) { + kfree(data, len); + return (ENOTSUP); + } + + /* + * XXX: Note that we silently drop the audit data if this + * allocation fails - this is consistent with the rest of the + * audit implementation. + */ + record = kalloc(sizeof(*record)); + if (record == NULL) { + kfree(data, len); + return (0); + } + + record->type = type; + record->length = len; + record->data = data; + LIST_INSERT_HEAD(cur->k_ar.ar_mac_records, record, records); + + return (0); +} + +void +audit_arg_mac_string(struct kaudit_record *ar, char *string) +{ + + if (ar->k_ar.ar_arg_mac_string == NULL) + ar->k_ar.ar_arg_mac_string = + kalloc(MAC_MAX_LABEL_BUF_LEN + MAC_ARG_PREFIX_LEN); + + /* + * XXX This should be a rare event. If kalloc() returns NULL, + * the system is low on kernel virtual memory. To be + * consistent with the rest of audit, just return + * (may need to panic if required to for audit). + */ + if (ar->k_ar.ar_arg_mac_string == NULL) + if (ar->k_ar.ar_arg_mac_string == NULL) + return; + + strncpy(ar->k_ar.ar_arg_mac_string, MAC_ARG_PREFIX, + MAC_ARG_PREFIX_LEN); + strncpy(ar->k_ar.ar_arg_mac_string + MAC_ARG_PREFIX_LEN, string, + MAC_MAX_LABEL_BUF_LEN); + ARG_SET_VALID(ar, ARG_MAC_STRING); +} +#endif /* MAC */ + +#endif /* CONFIG_AUDIT */ diff --git a/bsd/security/audit/audit_pipe.c b/bsd/security/audit/audit_pipe.c new file mode 100644 index 000000000..9f64bba84 --- /dev/null +++ b/bsd/security/audit/audit_pipe.c @@ -0,0 +1,1140 @@ +/*- + * Copyright (c) 2006 Robert N. M. Watson + * Copyright (c) 2008-2009 Apple, Inc. + * All rights reserved. + * + * This software was developed by Robert Watson for the TrustedBSD Project. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +#if CONFIG_AUDIT +/* + * Implementation of a clonable special device providing a live stream of BSM + * audit data. Consumers receive a "tee" of the system audit trail by + * default, but may also define alternative event selections using ioctls. + * This interface provides unreliable but timely access to audit events. + * Consumers should be very careful to avoid introducing event cycles. + */ + +/* + * Memory types. + */ +static MALLOC_DEFINE(M_AUDIT_PIPE, "audit_pipe", "Audit pipes"); +static MALLOC_DEFINE(M_AUDIT_PIPE_ENTRY, "audit_pipeent", + "Audit pipe entries and buffers"); +static MALLOC_DEFINE(M_AUDIT_PIPE_PRESELECT, "audit_pipe_presel", + "Audit pipe preselection structure"); + +/* + * Audit pipe buffer parameters. + */ +#define AUDIT_PIPE_QLIMIT_DEFAULT (128) +#define AUDIT_PIPE_QLIMIT_MIN (1) +#define AUDIT_PIPE_QLIMIT_MAX (1024) + +/* + * Description of an entry in an audit_pipe. + */ +struct audit_pipe_entry { + void *ape_record; + u_int ape_record_len; + TAILQ_ENTRY(audit_pipe_entry) ape_queue; +}; + +/* + * Audit pipes allow processes to express "interest" in the set of records + * that are delivered via the pipe. They do this in a similar manner to the + * mechanism for audit trail configuration, by expressing two global masks, + * and optionally expressing per-auid masks. The following data structure is + * the per-auid mask description. The global state is stored in the audit + * pipe data structure. + * + * We may want to consider a more space/time-efficient data structure once + * usage patterns for per-auid specifications are clear. + */ +struct audit_pipe_preselect { + au_id_t app_auid; + au_mask_t app_mask; + TAILQ_ENTRY(audit_pipe_preselect) app_list; +}; + +/* + * Description of an individual audit_pipe. Consists largely of a bounded + * length queue. + */ +#define AUDIT_PIPE_ASYNC 0x00000001 +#define AUDIT_PIPE_NBIO 0x00000002 +struct audit_pipe { + int ap_open; /* Device open? */ + u_int ap_flags; + + struct selinfo ap_selinfo; + pid_t ap_sigio; + + /* + * Per-pipe mutex protecting most fields in this data structure. + */ + struct mtx ap_mtx; + + /* + * Per-pipe sleep lock serializing user-generated reads and flushes. + * uiomove() is called to copy out the current head record's data + * while the record remains in the queue, so we prevent other threads + * from removing it using this lock. + */ + struct slck ap_sx; + + /* + * Condition variable to signal when data has been delivered to a + * pipe. + */ + struct cv ap_cv; + + /* + * Various queue-related variables: qlen and qlimit are a count of + * records in the queue; qbyteslen is the number of bytes of data + * across all records, and qoffset is the amount read so far of the + * first record in the queue. The number of bytes available for + * reading in the queue is qbyteslen - qoffset. + */ + u_int ap_qlen; + u_int ap_qlimit; + u_int ap_qbyteslen; + u_int ap_qoffset; + + /* + * Per-pipe operation statistics. + */ + u_int64_t ap_inserts; /* Records added. */ + u_int64_t ap_reads; /* Records read. */ + u_int64_t ap_drops; /* Records dropped. */ + + /* + * Fields relating to pipe interest: global masks for unmatched + * processes (attributable, non-attributable), and a list of specific + * interest specifications by auid. + */ + int ap_preselect_mode; + au_mask_t ap_preselect_flags; + au_mask_t ap_preselect_naflags; + TAILQ_HEAD(, audit_pipe_preselect) ap_preselect_list; + + /* + * Current pending record list. Protected by a combination of ap_mtx + * and ap_sx. Note particularly that *both* locks are required to + * remove a record from the head of the queue, as an in-progress read + * may sleep while copying and therefore cannot hold ap_mtx. + */ + TAILQ_HEAD(, audit_pipe_entry) ap_queue; + + /* + * Global pipe list. + */ + TAILQ_ENTRY(audit_pipe) ap_list; +}; + +#define AUDIT_PIPE_LOCK(ap) mtx_lock(&(ap)->ap_mtx) +#define AUDIT_PIPE_LOCK_ASSERT(ap) mtx_assert(&(ap)->ap_mtx, MA_OWNED) +#define AUDIT_PIPE_LOCK_DESTROY(ap) mtx_destroy(&(ap)->ap_mtx) +#define AUDIT_PIPE_LOCK_INIT(ap) mtx_init(&(ap)->ap_mtx, \ + "audit_pipe_mtx", NULL, MTX_DEF) +#define AUDIT_PIPE_UNLOCK(ap) mtx_unlock(&(ap)->ap_mtx) +#define AUDIT_PIPE_MTX(ap) (&(ap)->ap_mtx) + +#define AUDIT_PIPE_SX_LOCK_DESTROY(ap) slck_destroy(&(ap)->ap_sx) +#define AUDIT_PIPE_SX_LOCK_INIT(ap) slck_init(&(ap)->ap_sx, "audit_pipe_sx") +#define AUDIT_PIPE_SX_XLOCK_ASSERT(ap) slck_assert(&(ap)->ap_sx, SA_XLOCKED) +#define AUDIT_PIPE_SX_XLOCK_SIG(ap) slck_lock_sig(&(ap)->ap_sx) +#define AUDIT_PIPE_SX_XUNLOCK(ap) slck_unlock(&(ap)->ap_sx) + + +/* + * Global list of audit pipes, rwlock to protect it. Individual record + * queues on pipes are protected by per-pipe locks; these locks synchronize + * between threads walking the list to deliver to individual pipes and add/ + * remove of pipes, and are mostly acquired for read. + */ +static TAILQ_HEAD(, audit_pipe) audit_pipe_list; +static struct rwlock audit_pipe_lock; + +#define AUDIT_PIPE_LIST_LOCK_INIT() rw_init(&audit_pipe_lock, \ + "audit_pipe_list_lock") +#define AUDIT_PIPE_LIST_RLOCK() rw_rlock(&audit_pipe_lock) +#define AUDIT_PIPE_LIST_RUNLOCK() rw_runlock(&audit_pipe_lock) +#define AUDIT_PIPE_LIST_WLOCK() rw_wlock(&audit_pipe_lock) +#define AUDIT_PIPE_LIST_WLOCK_ASSERT() rw_assert(&audit_pipe_lock, \ + RA_WLOCKED) +#define AUDIT_PIPE_LIST_WUNLOCK() rw_wunlock(&audit_pipe_lock) + +/* + * Cloning related variables and constants. + */ +#define AUDIT_PIPE_NAME "auditpipe" +#define MAX_AUDIT_PIPES 32 +static int audit_pipe_major; + +/* + * dev_t doesn't have a pointer for "softc" data. So we have to keep track of + * it with the following global array (indexed by the minor number). + * + * XXX We may want to dynamically grow this as needed. + */ +static struct audit_pipe *audit_pipe_dtab[MAX_AUDIT_PIPES]; + + +/* + * Special device methods and definition. + */ +static open_close_fcn_t audit_pipe_open; +static open_close_fcn_t audit_pipe_close; +static read_write_fcn_t audit_pipe_read; +static ioctl_fcn_t audit_pipe_ioctl; +static select_fcn_t audit_pipe_poll; + +static struct cdevsw audit_pipe_cdevsw = { + .d_open = audit_pipe_open, + .d_close = audit_pipe_close, + .d_read = audit_pipe_read, + .d_write = eno_rdwrt, + .d_ioctl = audit_pipe_ioctl, + .d_stop = eno_stop, + .d_reset = eno_reset, + .d_ttys = NULL, + .d_select = audit_pipe_poll, + .d_mmap = eno_mmap, + .d_strategy = eno_strat, + .d_type = 0 +}; + +/* + * Some global statistics on audit pipes. + */ +static int audit_pipe_count; /* Current number of pipes. */ +static u_int64_t audit_pipe_ever; /* Pipes ever allocated. */ +static u_int64_t audit_pipe_records; /* Records seen. */ +static u_int64_t audit_pipe_drops; /* Global record drop count. */ + +/* + * Free an audit pipe entry. + */ +static void +audit_pipe_entry_free(struct audit_pipe_entry *ape) +{ + + free(ape->ape_record, M_AUDIT_PIPE_ENTRY); + free(ape, M_AUDIT_PIPE_ENTRY); +} + +/* + * Find an audit pipe preselection specification for an auid, if any. + */ +static struct audit_pipe_preselect * +audit_pipe_preselect_find(struct audit_pipe *ap, au_id_t auid) +{ + struct audit_pipe_preselect *app; + + AUDIT_PIPE_LOCK_ASSERT(ap); + + TAILQ_FOREACH(app, &ap->ap_preselect_list, app_list) { + if (app->app_auid == auid) + return (app); + } + return (NULL); +} + +/* + * Query the per-pipe mask for a specific auid. + */ +static int +audit_pipe_preselect_get(struct audit_pipe *ap, au_id_t auid, + au_mask_t *maskp) +{ + struct audit_pipe_preselect *app; + int error; + + AUDIT_PIPE_LOCK(ap); + app = audit_pipe_preselect_find(ap, auid); + if (app != NULL) { + *maskp = app->app_mask; + error = 0; + } else + error = ENOENT; + AUDIT_PIPE_UNLOCK(ap); + return (error); +} + +/* + * Set the per-pipe mask for a specific auid. Add a new entry if needed; + * otherwise, update the current entry. + */ +static void +audit_pipe_preselect_set(struct audit_pipe *ap, au_id_t auid, au_mask_t mask) +{ + struct audit_pipe_preselect *app, *app_new; + + /* + * Pessimistically assume that the auid doesn't already have a mask + * set, and allocate. We will free it if it is unneeded. + */ + app_new = malloc(sizeof(*app_new), M_AUDIT_PIPE_PRESELECT, M_WAITOK); + AUDIT_PIPE_LOCK(ap); + app = audit_pipe_preselect_find(ap, auid); + if (app == NULL) { + app = app_new; + app_new = NULL; + app->app_auid = auid; + TAILQ_INSERT_TAIL(&ap->ap_preselect_list, app, app_list); + } + app->app_mask = mask; + AUDIT_PIPE_UNLOCK(ap); + if (app_new != NULL) + free(app_new, M_AUDIT_PIPE_PRESELECT); +} + +/* + * Delete a per-auid mask on an audit pipe. + */ +static int +audit_pipe_preselect_delete(struct audit_pipe *ap, au_id_t auid) +{ + struct audit_pipe_preselect *app; + int error; + + AUDIT_PIPE_LOCK(ap); + app = audit_pipe_preselect_find(ap, auid); + if (app != NULL) { + TAILQ_REMOVE(&ap->ap_preselect_list, app, app_list); + error = 0; + } else + error = ENOENT; + AUDIT_PIPE_UNLOCK(ap); + if (app != NULL) + free(app, M_AUDIT_PIPE_PRESELECT); + return (error); +} + +/* + * Delete all per-auid masks on an audit pipe. + */ +static void +audit_pipe_preselect_flush_locked(struct audit_pipe *ap) +{ + struct audit_pipe_preselect *app; + + AUDIT_PIPE_LOCK_ASSERT(ap); + + while ((app = TAILQ_FIRST(&ap->ap_preselect_list)) != NULL) { + TAILQ_REMOVE(&ap->ap_preselect_list, app, app_list); + free(app, M_AUDIT_PIPE_PRESELECT); + } +} + +static void +audit_pipe_preselect_flush(struct audit_pipe *ap) +{ + + AUDIT_PIPE_LOCK(ap); + audit_pipe_preselect_flush_locked(ap); + AUDIT_PIPE_UNLOCK(ap); +} + +/*- + * Determine whether a specific audit pipe matches a record with these + * properties. Algorithm is as follows: + * + * - If the pipe is configured to track the default trail configuration, then + * use the results of global preselection matching. + * - If not, search for a specifically configured auid entry matching the + * event. If an entry is found, use that. + * - Otherwise, use the default flags or naflags configured for the pipe. + */ +static int +audit_pipe_preselect_check(struct audit_pipe *ap, au_id_t auid, + au_event_t event, au_class_t class, int sorf, int trail_preselect) +{ + struct audit_pipe_preselect *app; + + AUDIT_PIPE_LOCK_ASSERT(ap); + + switch (ap->ap_preselect_mode) { + case AUDITPIPE_PRESELECT_MODE_TRAIL: + return (trail_preselect); + + case AUDITPIPE_PRESELECT_MODE_LOCAL: + app = audit_pipe_preselect_find(ap, auid); + if (app == NULL) { + if (auid == (uid_t)AU_DEFAUDITID) + return (au_preselect(event, class, + &ap->ap_preselect_naflags, sorf)); + else + return (au_preselect(event, class, + &ap->ap_preselect_flags, sorf)); + } else + return (au_preselect(event, class, &app->app_mask, + sorf)); + + default: + panic("audit_pipe_preselect_check: mode %d", + ap->ap_preselect_mode); + } + + return (0); +} + +/* + * Determine whether there exists a pipe interested in a record with specific + * properties. + */ +int +audit_pipe_preselect(au_id_t auid, au_event_t event, au_class_t class, + int sorf, int trail_preselect) +{ + struct audit_pipe *ap; + + /* Lockless read to avoid acquiring the global lock if not needed. */ + if (TAILQ_EMPTY(&audit_pipe_list)) + return (0); + + AUDIT_PIPE_LIST_RLOCK(); + TAILQ_FOREACH(ap, &audit_pipe_list, ap_list) { + AUDIT_PIPE_LOCK(ap); + if (audit_pipe_preselect_check(ap, auid, event, class, sorf, + trail_preselect)) { + AUDIT_PIPE_UNLOCK(ap); + AUDIT_PIPE_LIST_RUNLOCK(); + return (1); + } + AUDIT_PIPE_UNLOCK(ap); + } + AUDIT_PIPE_LIST_RUNLOCK(); + return (0); +} + +/* + * Append individual record to a queue -- allocate queue-local buffer, and + * add to the queue. If the queue is full or we can't allocate memory, drop + * the newest record. + */ +static void +audit_pipe_append(struct audit_pipe *ap, void *record, u_int record_len) +{ + struct audit_pipe_entry *ape; + + AUDIT_PIPE_LOCK_ASSERT(ap); + + if (ap->ap_qlen >= ap->ap_qlimit) { + ap->ap_drops++; + audit_pipe_drops++; + return; + } + + ape = malloc(sizeof(*ape), M_AUDIT_PIPE_ENTRY, M_NOWAIT | M_ZERO); + if (ape == NULL) { + ap->ap_drops++; + audit_pipe_drops++; + return; + } + + ape->ape_record = malloc(record_len, M_AUDIT_PIPE_ENTRY, M_NOWAIT); + if (ape->ape_record == NULL) { + free(ape, M_AUDIT_PIPE_ENTRY); + ap->ap_drops++; + audit_pipe_drops++; + return; + } + + bcopy(record, ape->ape_record, record_len); + ape->ape_record_len = record_len; + + TAILQ_INSERT_TAIL(&ap->ap_queue, ape, ape_queue); + ap->ap_inserts++; + ap->ap_qlen++; + ap->ap_qbyteslen += ape->ape_record_len; + selwakeup(&ap->ap_selinfo); + if (ap->ap_flags & AUDIT_PIPE_ASYNC) + pgsigio(ap->ap_sigio, SIGIO); +#if 0 /* XXX - fix select */ + selwakeuppri(&ap->ap_selinfo, PSOCK); + KNOTE_LOCKED(&ap->ap_selinfo.si_note, 0); + if (ap->ap_flags & AUDIT_PIPE_ASYNC) + pgsigio(&ap->ap_sigio, SIGIO, 0); +#endif + cv_broadcast(&ap->ap_cv); +} + +/* + * audit_pipe_submit(): audit_worker submits audit records via this + * interface, which arranges for them to be delivered to pipe queues. + */ +void +audit_pipe_submit(au_id_t auid, au_event_t event, au_class_t class, int sorf, + int trail_select, void *record, u_int record_len) +{ + struct audit_pipe *ap; + + /* + * Lockless read to avoid lock overhead if pipes are not in use. + */ + if (TAILQ_FIRST(&audit_pipe_list) == NULL) + return; + + AUDIT_PIPE_LIST_RLOCK(); + TAILQ_FOREACH(ap, &audit_pipe_list, ap_list) { + AUDIT_PIPE_LOCK(ap); + if (audit_pipe_preselect_check(ap, auid, event, class, sorf, + trail_select)) + audit_pipe_append(ap, record, record_len); + AUDIT_PIPE_UNLOCK(ap); + } + AUDIT_PIPE_LIST_RUNLOCK(); + + /* Unlocked increment. */ + audit_pipe_records++; +} + +/* + * audit_pipe_submit_user(): the same as audit_pipe_submit(), except that + * since we don't currently have selection information available, it is + * delivered to the pipe unconditionally. + * + * XXXRW: This is a bug. The BSM check routine for submitting a user record + * should parse that information and return it. + */ +void +audit_pipe_submit_user(void *record, u_int record_len) +{ + struct audit_pipe *ap; + + /* + * Lockless read to avoid lock overhead if pipes are not in use. + */ + if (TAILQ_FIRST(&audit_pipe_list) == NULL) + return; + + AUDIT_PIPE_LIST_RLOCK(); + TAILQ_FOREACH(ap, &audit_pipe_list, ap_list) { + AUDIT_PIPE_LOCK(ap); + audit_pipe_append(ap, record, record_len); + AUDIT_PIPE_UNLOCK(ap); + } + AUDIT_PIPE_LIST_RUNLOCK(); + + /* Unlocked increment. */ + audit_pipe_records++; +} + +/* + * Allocate a new audit pipe. Connects the pipe, on success, to the global + * list and updates statistics. + */ +static struct audit_pipe * +audit_pipe_alloc(void) +{ + struct audit_pipe *ap; + + AUDIT_PIPE_LIST_WLOCK_ASSERT(); + + ap = malloc(sizeof(*ap), M_AUDIT_PIPE, M_NOWAIT | M_ZERO); + if (ap == NULL) + return (NULL); + + ap->ap_qlimit = AUDIT_PIPE_QLIMIT_DEFAULT; + TAILQ_INIT(&ap->ap_queue); +#ifndef __APPLE__ + knlist_init(&ap->ap_selinfo.si_note, AUDIT_PIPE_MTX(ap), NULL, NULL, + NULL); +#endif + AUDIT_PIPE_LOCK_INIT(ap); + AUDIT_PIPE_SX_LOCK_INIT(ap); + cv_init(&ap->ap_cv, "audit_pipe"); + + /* + * Default flags, naflags, and auid-specific preselection settings to + * 0. Initialize the mode to the global trail so that if praudit(1) + * is run on /dev/auditpipe, it sees events associated with the + * default trail. Pipe-aware application can clear the flag, set + * custom masks, and flush the pipe as needed. + */ + bzero(&ap->ap_preselect_flags, sizeof(ap->ap_preselect_flags)); + bzero(&ap->ap_preselect_naflags, sizeof(ap->ap_preselect_naflags)); + TAILQ_INIT(&ap->ap_preselect_list); + ap->ap_preselect_mode = AUDITPIPE_PRESELECT_MODE_TRAIL; + + /* + * Add to global list and update global statistics. + */ + TAILQ_INSERT_HEAD(&audit_pipe_list, ap, ap_list); + audit_pipe_count++; + audit_pipe_ever++; + + return (ap); +} + +/* + * Flush all records currently present in an audit pipe; assume mutex is held. + */ +static void +audit_pipe_flush(struct audit_pipe *ap) +{ + struct audit_pipe_entry *ape; + + AUDIT_PIPE_LOCK_ASSERT(ap); + + while ((ape = TAILQ_FIRST(&ap->ap_queue)) != NULL) { + TAILQ_REMOVE(&ap->ap_queue, ape, ape_queue); + ap->ap_qbyteslen -= ape->ape_record_len; + audit_pipe_entry_free(ape); + ap->ap_qlen--; + } + ap->ap_qoffset = 0; + + KASSERT(ap->ap_qlen == 0, ("audit_pipe_free: ap_qbyteslen")); + KASSERT(ap->ap_qbyteslen == 0, ("audit_pipe_flush: ap_qbyteslen")); +} + +/* + * Free an audit pipe; this means freeing all preselection state and all + * records in the pipe. Assumes global write lock and pipe mutex are held to + * revent any new records from being inserted during the free, and that the + * audit pipe is still on the global list. + */ +static void +audit_pipe_free(struct audit_pipe *ap) +{ + + AUDIT_PIPE_LIST_WLOCK_ASSERT(); + AUDIT_PIPE_LOCK_ASSERT(ap); + + audit_pipe_preselect_flush_locked(ap); + audit_pipe_flush(ap); + cv_destroy(&ap->ap_cv); + AUDIT_PIPE_SX_LOCK_DESTROY(ap); + AUDIT_PIPE_LOCK_DESTROY(ap); +#ifndef __APPLE__ + knlist_destroy(&ap->ap_selinfo.si_note); +#endif + TAILQ_REMOVE(&audit_pipe_list, ap, ap_list); + free(ap, M_AUDIT_PIPE); + audit_pipe_count--; +} + +/* + * Audit pipe clone routine -- provides a new minor number, or to return (-1), + * if one can't be provided. Called with DEVFS_LOCK held. + */ +static int +audit_pipe_clone(__unused dev_t dev, int action) +{ + int i; + + if (action == DEVFS_CLONE_ALLOC) { + for(i = 0; i < MAX_AUDIT_PIPES; i++) + if (audit_pipe_dtab[i] == NULL) + return (i); + + /* + * XXX Should really return -1 here but that seems to hang + * things in devfs. Instead return 0 and let _open() tell + * userland the bad news. + */ + return (0); + } + + return (-1); +} + +/* + * Audit pipe open method. Explicit privilege check isn't used as this + * allows file permissions on the special device to be used to grant audit + * review access. Those file permissions should be managed carefully. + */ +static int +audit_pipe_open(dev_t dev, __unused int flags, __unused int devtype, + __unused proc_t p) +{ + struct audit_pipe *ap; + int u; + + u = minor(dev); + if (u < 0 || u > MAX_AUDIT_PIPES) + return (ENXIO); + + AUDIT_PIPE_LIST_WLOCK(); + ap = audit_pipe_dtab[u]; + if (ap == NULL) { + ap = audit_pipe_alloc(); + if (ap == NULL) { + AUDIT_PIPE_LIST_WUNLOCK(); + return (ENOMEM); + } + audit_pipe_dtab[u] = ap; + } else { + KASSERT(ap->ap_open, ("audit_pipe_open: ap && !ap_open")); + AUDIT_PIPE_LIST_WUNLOCK(); + return (EBUSY); + } + ap->ap_open = 1; + AUDIT_PIPE_LIST_WUNLOCK(); +#ifndef __APPLE__ + fsetown(td->td_proc->p_pid, &ap->ap_sigio); +#endif + return (0); +} + +/* + * Close audit pipe, tear down all records, etc. + */ +static int +audit_pipe_close(dev_t dev, __unused int flags, __unused int devtype, + __unused proc_t p) +{ + struct audit_pipe *ap; + int u; + + u = minor(dev); + ap = audit_pipe_dtab[u]; + KASSERT(ap != NULL, ("audit_pipe_close: ap == NULL")); + KASSERT(ap->ap_open, ("audit_pipe_close: !ap_open")); + +#ifndef __APPLE__ + funsetown(&ap->ap_sigio); +#endif + AUDIT_PIPE_LIST_WLOCK(); + AUDIT_PIPE_LOCK(ap); + ap->ap_open = 0; + audit_pipe_free(ap); + audit_pipe_dtab[u] = NULL; + AUDIT_PIPE_LIST_WUNLOCK(); + return (0); +} + +/* + * Audit pipe ioctl() routine. Handle file descriptor and audit pipe layer + * commands. + */ +static int +audit_pipe_ioctl(dev_t dev, u_long cmd, caddr_t data, + __unused int flag, __unused proc_t p) +{ + struct auditpipe_ioctl_preselect *aip; + struct audit_pipe *ap; + au_mask_t *maskp; + int error, mode; + au_id_t auid; + + ap = audit_pipe_dtab[minor(dev)]; + KASSERT(ap != NULL, ("audit_pipe_ioctl: ap == NULL")); + + /* + * Audit pipe ioctls: first come standard device node ioctls, then + * manipulation of pipe settings, and finally, statistics query + * ioctls. + */ + switch (cmd) { + case FIONBIO: + AUDIT_PIPE_LOCK(ap); + if (*(int *)data) + ap->ap_flags |= AUDIT_PIPE_NBIO; + else + ap->ap_flags &= ~AUDIT_PIPE_NBIO; + AUDIT_PIPE_UNLOCK(ap); + error = 0; + break; + + case FIONREAD: + AUDIT_PIPE_LOCK(ap); + *(int *)data = ap->ap_qbyteslen - ap->ap_qoffset; + AUDIT_PIPE_UNLOCK(ap); + error = 0; + break; + + case FIOASYNC: + AUDIT_PIPE_LOCK(ap); + if (*(int *)data) + ap->ap_flags |= AUDIT_PIPE_ASYNC; + else + ap->ap_flags &= ~AUDIT_PIPE_ASYNC; + AUDIT_PIPE_UNLOCK(ap); + error = 0; + break; + +#ifndef __APPLE__ + case FIOSETOWN: + error = fsetown(*(int *)data, &ap->ap_sigio); + break; + + case FIOGETOWN: + *(int *)data = fgetown(&ap->ap_sigio); + error = 0; + break; +#endif /* !__APPLE__ */ + + case AUDITPIPE_GET_QLEN: + *(u_int *)data = ap->ap_qlen; + error = 0; + break; + + case AUDITPIPE_GET_QLIMIT: + *(u_int *)data = ap->ap_qlimit; + error = 0; + break; + + case AUDITPIPE_SET_QLIMIT: + /* Lockless integer write. */ + if (*(u_int *)data >= AUDIT_PIPE_QLIMIT_MIN || + *(u_int *)data <= AUDIT_PIPE_QLIMIT_MAX) { + ap->ap_qlimit = *(u_int *)data; + error = 0; + } else + error = EINVAL; + break; + + case AUDITPIPE_GET_QLIMIT_MIN: + *(u_int *)data = AUDIT_PIPE_QLIMIT_MIN; + error = 0; + break; + + case AUDITPIPE_GET_QLIMIT_MAX: + *(u_int *)data = AUDIT_PIPE_QLIMIT_MAX; + error = 0; + break; + + case AUDITPIPE_GET_PRESELECT_FLAGS: + AUDIT_PIPE_LOCK(ap); + maskp = (au_mask_t *)data; + *maskp = ap->ap_preselect_flags; + AUDIT_PIPE_UNLOCK(ap); + error = 0; + break; + + case AUDITPIPE_SET_PRESELECT_FLAGS: + AUDIT_PIPE_LOCK(ap); + maskp = (au_mask_t *)data; + ap->ap_preselect_flags = *maskp; + AUDIT_CHECK_IF_KEVENTS_MASK(ap->ap_preselect_flags); + AUDIT_PIPE_UNLOCK(ap); + error = 0; + break; + + case AUDITPIPE_GET_PRESELECT_NAFLAGS: + AUDIT_PIPE_LOCK(ap); + maskp = (au_mask_t *)data; + *maskp = ap->ap_preselect_naflags; + AUDIT_PIPE_UNLOCK(ap); + error = 0; + break; + + case AUDITPIPE_SET_PRESELECT_NAFLAGS: + AUDIT_PIPE_LOCK(ap); + maskp = (au_mask_t *)data; + ap->ap_preselect_naflags = *maskp; + AUDIT_CHECK_IF_KEVENTS_MASK(ap->ap_preselect_naflags); + AUDIT_PIPE_UNLOCK(ap); + error = 0; + break; + + case AUDITPIPE_GET_PRESELECT_AUID: + aip = (struct auditpipe_ioctl_preselect *)data; + error = audit_pipe_preselect_get(ap, aip->aip_auid, + &aip->aip_mask); + break; + + case AUDITPIPE_SET_PRESELECT_AUID: + aip = (struct auditpipe_ioctl_preselect *)data; + audit_pipe_preselect_set(ap, aip->aip_auid, aip->aip_mask); + error = 0; + break; + + case AUDITPIPE_DELETE_PRESELECT_AUID: + auid = *(au_id_t *)data; + error = audit_pipe_preselect_delete(ap, auid); + break; + + case AUDITPIPE_FLUSH_PRESELECT_AUID: + audit_pipe_preselect_flush(ap); + error = 0; + break; + + case AUDITPIPE_GET_PRESELECT_MODE: + AUDIT_PIPE_LOCK(ap); + *(int *)data = ap->ap_preselect_mode; + AUDIT_PIPE_UNLOCK(ap); + error = 0; + break; + + case AUDITPIPE_SET_PRESELECT_MODE: + mode = *(int *)data; + switch (mode) { + case AUDITPIPE_PRESELECT_MODE_TRAIL: + case AUDITPIPE_PRESELECT_MODE_LOCAL: + AUDIT_PIPE_LOCK(ap); + ap->ap_preselect_mode = mode; + AUDIT_PIPE_UNLOCK(ap); + error = 0; + break; + + default: + error = EINVAL; + } + break; + + case AUDITPIPE_FLUSH: + if (AUDIT_PIPE_SX_XLOCK_SIG(ap) != 0) + return (EINTR); + AUDIT_PIPE_LOCK(ap); + audit_pipe_flush(ap); + AUDIT_PIPE_UNLOCK(ap); + AUDIT_PIPE_SX_XUNLOCK(ap); + error = 0; + break; + + case AUDITPIPE_GET_MAXAUDITDATA: + *(u_int *)data = MAXAUDITDATA; + error = 0; + break; + + case AUDITPIPE_GET_INSERTS: + *(u_int *)data = ap->ap_inserts; + error = 0; + break; + + case AUDITPIPE_GET_READS: + *(u_int *)data = ap->ap_reads; + error = 0; + break; + + case AUDITPIPE_GET_DROPS: + *(u_int *)data = ap->ap_drops; + error = 0; + break; + + case AUDITPIPE_GET_TRUNCATES: + *(u_int *)data = 0; + error = 0; + break; + + default: + error = ENOTTY; + } + return (error); +} + +/* + * Audit pipe read. Read one or more partial or complete records to user + * memory. + */ +static int +audit_pipe_read(dev_t dev, struct uio *uio, __unused int flag) +{ + struct audit_pipe_entry *ape; + struct audit_pipe *ap; + u_int toread; + int error; + + ap = audit_pipe_dtab[minor(dev)]; + KASSERT(ap != NULL, ("audit_pipe_read: ap == NULL")); + + /* + * We hold an sleep lock over read and flush because we rely on the + * stability of a record in the queue during uiomove(9). + */ + if (AUDIT_PIPE_SX_XLOCK_SIG(ap) != 0) + return (EINTR); + AUDIT_PIPE_LOCK(ap); + while (TAILQ_EMPTY(&ap->ap_queue)) { + if (ap->ap_flags & AUDIT_PIPE_NBIO) { + AUDIT_PIPE_UNLOCK(ap); + AUDIT_PIPE_SX_XUNLOCK(ap); + return (EAGAIN); + } + error = cv_wait_sig(&ap->ap_cv, AUDIT_PIPE_MTX(ap)); + if (error) { + AUDIT_PIPE_UNLOCK(ap); + AUDIT_PIPE_SX_XUNLOCK(ap); + return (error); + } + } + + /* + * Copy as many remaining bytes from the current record to userspace + * as we can. Keep processing records until we run out of records in + * the queue, or until the user buffer runs out of space. + * + * Note: we rely on the sleep lock to maintain ape's stability here. + */ + ap->ap_reads++; + while ((ape = TAILQ_FIRST(&ap->ap_queue)) != NULL && + uio_resid(uio) > 0) { + AUDIT_PIPE_LOCK_ASSERT(ap); + + KASSERT(ape->ape_record_len > ap->ap_qoffset, + ("audit_pipe_read: record_len > qoffset (1)")); + toread = MIN(ape->ape_record_len - ap->ap_qoffset, + uio_resid(uio)); + AUDIT_PIPE_UNLOCK(ap); + error = uiomove((char *)ape->ape_record + ap->ap_qoffset, + toread, uio); + if (error) { + AUDIT_PIPE_SX_XUNLOCK(ap); + return (error); + } + + /* + * If the copy succeeded, update book-keeping, and if no + * bytes remain in the current record, free it. + */ + AUDIT_PIPE_LOCK(ap); + KASSERT(TAILQ_FIRST(&ap->ap_queue) == ape, + ("audit_pipe_read: queue out of sync after uiomove")); + ap->ap_qoffset += toread; + KASSERT(ape->ape_record_len >= ap->ap_qoffset, + ("audit_pipe_read: record_len >= qoffset (2)")); + if (ap->ap_qoffset == ape->ape_record_len) { + TAILQ_REMOVE(&ap->ap_queue, ape, ape_queue); + ap->ap_qbyteslen -= ape->ape_record_len; + audit_pipe_entry_free(ape); + ap->ap_qlen--; + ap->ap_qoffset = 0; + } + } + AUDIT_PIPE_UNLOCK(ap); + AUDIT_PIPE_SX_XUNLOCK(ap); + return (0); +} + +/* + * Audit pipe poll. + */ +static int +audit_pipe_poll(dev_t dev, int events, void *wql, struct proc *p) +{ + struct audit_pipe *ap; + int revents; + + revents = 0; + ap = audit_pipe_dtab[minor(dev)]; + KASSERT(ap != NULL, ("audit_pipe_poll: ap == NULL")); + + if (events & (POLLIN | POLLRDNORM)) { + AUDIT_PIPE_LOCK(ap); + if (TAILQ_FIRST(&ap->ap_queue) != NULL) + revents |= events & (POLLIN | POLLRDNORM); + else + selrecord(p, &ap->ap_selinfo, wql); + AUDIT_PIPE_UNLOCK(ap); + } + return (revents); +} + +#ifndef __APPLE__ +/* + * Return true if there are records available for reading on the pipe. + */ +static int +audit_pipe_kqread(struct knote *kn, long hint) +{ + struct audit_pipe *ap; + + ap = (struct audit_pipe *)kn->kn_hook; + KASSERT(ap != NULL, ("audit_pipe_kqread: ap == NULL")); + AUDIT_PIPE_LOCK_ASSERT(ap); + + if (ap->ap_qlen != 0) { + kn->kn_data = ap->ap_qbyteslen - ap->ap_qoffset; + return (1); + } else { + kn->kn_data = 0; + return (0); + } +} + +/* + * Detach kqueue state from audit pipe. + */ +static void +audit_pipe_kqdetach(struct knote *kn) +{ + struct audit_pipe *ap; + + ap = (struct audit_pipe *)kn->kn_hook; + KASSERT(ap != NULL, ("audit_pipe_kqdetach: ap == NULL")); + + AUDIT_PIPE_LOCK(ap); + knlist_remove(&ap->ap_selinfo.si_note, kn, 1); + AUDIT_PIPE_UNLOCK(ap); +} +#endif /* !__APPLE__ */ + +static void *devnode; + +int +audit_pipe_init(void) +{ + dev_t dev; + + TAILQ_INIT(&audit_pipe_list); + AUDIT_PIPE_LIST_LOCK_INIT(); + + audit_pipe_major = cdevsw_add(-1, &audit_pipe_cdevsw); + if (audit_pipe_major < 0) + return (KERN_FAILURE); + + dev = makedev(audit_pipe_major, 0); + devnode = devfs_make_node_clone(dev, DEVFS_CHAR, UID_ROOT, GID_WHEEL, + 0600, audit_pipe_clone, "auditpipe", 0); + + if (devnode == NULL) + return (KERN_FAILURE); + + return (KERN_SUCCESS); +} + +int +audit_pipe_shutdown(void) +{ + + /* unwind everything */ + devfs_remove(devnode); + (void) cdevsw_remove(audit_pipe_major, &audit_pipe_cdevsw); + + return (KERN_SUCCESS); +} + +#endif /* CONFIG_AUDIT */ diff --git a/bsd/security/audit/audit_private.h b/bsd/security/audit/audit_private.h new file mode 100644 index 000000000..803a2b936 --- /dev/null +++ b/bsd/security/audit/audit_private.h @@ -0,0 +1,456 @@ +/*- + * Copyright (c) 1999-2009 Apple Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of Apple Inc. ("Apple") nor the names of + * its contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY APPLE AND ITS CONTRIBUTORS "AS IS" AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL APPLE OR ITS CONTRIBUTORS BE LIABLE FOR + * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING + * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ +/* + * NOTICE: This file was modified by SPARTA, Inc. in 2005 to introduce + * support for mandatory and extensible security protections. This notice + * is included in support of clause 2.2 (b) of the Apple Public License, + * Version 2.0. + */ + +/* + * This include file contains function prototypes and type definitions used + * within the audit implementation. + */ + +#ifndef _SECURITY_AUDIT_PRIVATE_H_ +#define _SECURITY_AUDIT_PRIVATE_H_ + +#if defined(_KERNEL) || defined(KERNEL) + +#if CONFIG_MACF +#include +#include +#endif + +#include +#include /* for PID_MAX */ +#include +#include + +#ifdef MALLOC_DECLARE +MALLOC_DECLARE(M_AUDITBSM); +MALLOC_DECLARE(M_AUDITDATA); +MALLOC_DECLARE(M_AUDITPATH); +MALLOC_DECLARE(M_AUDITTEXT); +#endif + +/* + * Audit control variables that are usually set/read via system calls and + * used to control various aspects of auditing. + */ +extern struct au_qctrl audit_qctrl; +extern struct audit_fstat audit_fstat; +extern struct au_mask audit_nae_mask; +extern int audit_panic_on_write_fail; +extern int audit_fail_stop; +extern int audit_argv; +extern int audit_arge; + +/* + * Kernel mask that is used to check to see if system calls need to be audited. + */ +extern au_class_t audit_kevent_mask; + +/* + * The macro used to check to see if the system calls need to be auditing. + * This will pessimisticly set the audit syscalls flag if the audit kevent + * mask has not been created yet. User code should build the event/class + * mapping table before setting preselection masks to avoid this. + */ +#define AUDIT_CHECK_IF_KEVENTS_MASK(m) do { \ + if ((m).am_success || (m).am_failure) \ + if (!audit_kevent_mask || \ + (audit_kevent_mask & (m).am_success) || \ + (audit_kevent_mask & (m).am_failure)) \ + audit_syscalls = 1; \ +} while (0) + +/* + * Success/failure conditions for the conversion of a kernel audit record to + * BSM format. + */ +#define BSM_SUCCESS 0 +#define BSM_FAILURE 1 +#define BSM_NOAUDIT 2 + +/* + * Defines for the kernel audit record k_ar_commit field. Flags are set to + * indicate what sort of record it is, and which preselection mechanism + * selected it. + */ +#define AR_COMMIT_KERNEL 0x00000001U +#define AR_COMMIT_USER 0x00000010U + +#define AR_PRESELECT_TRAIL 0x00001000U +#define AR_PRESELECT_PIPE 0x00002000U + +#define AR_PRESELECT_USER_TRAIL 0x00004000U +#define AR_PRESELECT_USER_PIPE 0x00008000U + +#define AR_DRAIN_QUEUE 0x80000000U + +/* + * Audit data is generated as a stream of struct audit_record structures, + * linked by struct kaudit_record, and contain storage for possible audit so + * that it will not need to be allocated during the processing of a system + * call, both improving efficiency and avoiding sleeping at untimely moments. + * This structure is converted to BSM format before being written to disk. + */ +struct vnode_au_info { + mode_t vn_mode; + uid_t vn_uid; + gid_t vn_gid; + dev_t vn_dev; + long vn_fsid; + long vn_fileid; + long vn_gen; +}; + +struct groupset { + gid_t gidset[NGROUPS]; + u_int gidset_size; +}; + +struct socket_au_info { + int sai_domain; + int sai_type; + int sai_protocol; + + /* Foreign (remote) address/port. */ + struct sockaddr_storage sai_faddr; + + /* Local address/port. */ + struct sockaddr_storage sai_laddr; +}; + +/* + * The following is used for A_OLDSETQCTRL and A_OLDGETQCTRL and a 64-bit + * userland. + */ +struct au_qctrl64 { + u_int64_t aq64_hiwater; + u_int64_t aq64_lowater; + u_int64_t aq64_bufsz; + u_int64_t aq64_delay; + int64_t aq64_minfree; +}; +typedef struct au_qctrl64 au_qctrl64_t; + +union auditon_udata { + char *au_path; + int au_cond; + int au_policy; + int64_t au_cond64; + int64_t au_policy64; + int au_trigger; + au_evclass_map_t au_evclass; + au_mask_t au_mask; + auditinfo_t au_auinfo; + auditpinfo_t au_aupinfo; + auditpinfo_addr_t au_aupinfo_addr; + au_qctrl_t au_qctrl; + au_qctrl64_t au_qctrl64; + au_stat_t au_stat; + au_fstat_t au_fstat; + auditinfo_addr_t au_kau_info; +}; + +struct posix_ipc_perm { + uid_t pipc_uid; + gid_t pipc_gid; + mode_t pipc_mode; +}; + +struct audit_record { + /* Audit record header. */ + u_int32_t ar_magic; + int ar_event; + int ar_retval; /* value returned to the process */ + int ar_errno; /* return status of system call */ + struct timespec ar_starttime; + struct timespec ar_endtime; + u_int64_t ar_valid_arg; /* Bitmask of valid arguments */ + + /* Audit subject information. */ + struct xucred ar_subj_cred; + uid_t ar_subj_ruid; + gid_t ar_subj_rgid; + gid_t ar_subj_egid; + uid_t ar_subj_auid; /* Audit user ID */ + pid_t ar_subj_asid; /* Audit session ID */ + pid_t ar_subj_pid; + struct au_tid ar_subj_term; + struct au_tid_addr ar_subj_term_addr; + struct au_mask ar_subj_amask; + + /* Operation arguments. */ + uid_t ar_arg_euid; + uid_t ar_arg_ruid; + uid_t ar_arg_suid; + gid_t ar_arg_egid; + gid_t ar_arg_rgid; + gid_t ar_arg_sgid; + pid_t ar_arg_pid; + pid_t ar_arg_asid; + struct au_tid ar_arg_termid; + struct au_tid_addr ar_arg_termid_addr; + uid_t ar_arg_uid; + uid_t ar_arg_auid; + gid_t ar_arg_gid; + struct groupset ar_arg_groups; + int ar_arg_fd; + int ar_arg_fflags; + mode_t ar_arg_mode; + uint32_t ar_arg_value32; + uint64_t ar_arg_value64; + user_addr_t ar_arg_addr; + user_size_t ar_arg_len; + int ar_arg_mask; + u_int ar_arg_signum; + char ar_arg_login[MAXLOGNAME]; + int ar_arg_ctlname[CTL_MAXNAME]; + struct socket_au_info ar_arg_sockinfo; + char *ar_arg_upath1; + char *ar_arg_upath2; + char *ar_arg_kpath1; /* darwin-only */ + char *ar_arg_kpath2; /* darwin-only */ +#if CONFIG_MACF + char *ar_vnode1_mac_labels; + char *ar_vnode2_mac_labels; + char *ar_cred_mac_labels; + char *ar_arg_mac_string; +#endif + char *ar_arg_text; + void *ar_arg_opaque; /* darwin-only */ + void *ar_arg_data; /* darwin-only */ + u_int16_t ar_arg_opq_size; /* darwin-only */ + u_char ar_arg_data_type; /* darwin-only */ + u_char ar_arg_data_count; /* darwin-only */ + struct au_mask ar_arg_amask; + struct vnode_au_info ar_arg_vnode1; + struct vnode_au_info ar_arg_vnode2; + int ar_arg_cmd; + int ar_arg_svipc_cmd; + struct ipc_perm ar_arg_svipc_perm; + int ar_arg_svipc_id; + user_addr_t ar_arg_svipc_addr; + struct posix_ipc_perm ar_arg_pipc_perm; + mach_port_name_t ar_arg_mach_port1; /* darwin-only */ + mach_port_name_t ar_arg_mach_port2; /* darwin-only */ + union auditon_udata ar_arg_auditon; + char *ar_arg_argv; + int ar_arg_argc; + char *ar_arg_envv; + int ar_arg_envc; + int ar_arg_exitstatus; + int ar_arg_exitretval; + struct sockaddr_storage ar_arg_sockaddr; + +#if CONFIG_MACF + /* + * MAC security related fields added by MAC policies ar_forced_by_mac + * is 1 if mac_audit_check_preselect() forced this call to be audited, + * 0 otherwise. + */ + LIST_HEAD(mac_audit_record_list_t, mac_audit_record) *ar_mac_records; + int ar_forced_by_mac; +#endif +}; + +/* + * Arguments in the audit record are initially not defined; flags are set to + * indicate if they are present so they can be included in the audit log + * stream only if defined. + */ +#define ARG_IS_VALID(kar, arg) ((kar)->k_ar.ar_valid_arg & (arg)) +#define ARG_SET_VALID(kar, arg) do { \ + (kar)->k_ar.ar_valid_arg |= (arg); \ +} while (0) + +/* + * Current thread macro. get_bsdthread_info() returns a void ptr for some + * reason. + */ +#define curthread() ((struct uthread *)get_bsdthread_info(current_thread())) + +/* + * In-kernel version of audit record; the basic record plus queue meta-data. + * This record can also have a pointer set to some opaque data that will be + * passed through to the audit writing mechanism. + */ +struct kaudit_record { + struct audit_record k_ar; + u_int32_t k_ar_commit; + void *k_udata; /* User data. */ + u_int k_ulen; /* User data length. */ + struct uthread *k_uthread; /* Audited thread. */ + TAILQ_ENTRY(kaudit_record) k_q; +}; +TAILQ_HEAD(kaudit_queue, kaudit_record); + +/* + * Functions to manage the allocation, release, and commit of kernel audit + * records. + */ +void audit_abort(struct kaudit_record *ar); +void audit_commit(struct kaudit_record *ar, int error, + int retval); +struct kaudit_record *audit_new(int event, proc_t p, struct uthread *td); + +/* + * Functions relating to the conversion of internal kernel audit records to + * the BSM file format. + */ +struct au_record; +int kaudit_to_bsm(struct kaudit_record *kar, struct au_record **pau); +int bsm_rec_verify(void *rec); + +/* + * Kernel versions of the libbsm audit record functions. + */ +void kau_free(struct au_record *rec); +void kau_init(void); + +/* + * Return values for pre-selection and post-selection decisions. + */ +#define AU_PRS_SUCCESS 1 +#define AU_PRS_FAILURE 2 +#define AU_PRS_BOTH (AU_PRS_SUCCESS|AU_PRS_FAILURE) + +/* + * Data structures relating to the kernel audit queue. Ideally, these might + * be abstracted so that only accessor methods are exposed. + */ +extern struct mtx audit_mtx; +extern struct cv audit_watermark_cv; +extern struct cv audit_worker_cv; +extern struct cv audit_drain_cv; +extern struct kaudit_queue audit_q; +extern int audit_q_len; +extern int audit_pre_q_len; +extern int audit_in_failure; + +/* + * Flags to use on audit files when opening and closing. + */ +#define AUDIT_OPEN_FLAGS (FWRITE | O_APPEND) +#define AUDIT_CLOSE_FLAGS (FWRITE | O_APPEND) + +#include +#include +#include + +/* + * Some of the BSM tokenizer functions take different parameters in the + * kernel implementations in order to save the copying of large kernel data + * structures. The prototypes of these functions are declared here. + */ +token_t *kau_to_socket(struct socket_au_info *soi); + +/* + * audit_klib prototypes + */ +int au_preselect(au_event_t event, au_class_t class, + au_mask_t *mask_p, int sorf); +void au_evclassmap_init(void); +void au_evclassmap_insert(au_event_t event, au_class_t class); +au_class_t au_event_class(au_event_t event); +au_event_t audit_ctlname_to_sysctlevent(int name[], uint64_t valid_arg); +au_event_t audit_flags_and_error_to_openevent(int oflags, int error); +au_event_t audit_flags_and_error_to_openextendedevent(int oflags, + int error); +au_event_t audit_msgctl_to_event(int cmd); +au_event_t audit_semctl_to_event(int cmr); +int audit_canon_path(struct vnode *cwd_vp, char *path, + char *cpath); +au_event_t auditon_command_event(int cmd); +au_event_t audit_fcntl_command_event(int cmd, int oflags, int error); + +/* + * Audit trigger events notify user space of kernel audit conditions + * asynchronously. + */ +int audit_send_trigger(unsigned int trigger); + +/* + * Accessor functions to manage global audit state. + */ +void audit_set_kinfo(struct auditinfo_addr *); +void audit_get_kinfo(struct auditinfo_addr *); + +/* + * General audit related functions. + */ +struct kaudit_record *currecord(void); +void audit_free(struct kaudit_record *ar); +void audit_rotate_vnode(struct ucred *cred, + struct vnode *vp); +void audit_worker_init(void); + +/* + * Audit pipe functions. + */ +int audit_pipe_init(void); +int audit_pipe_shutdown(void); +int audit_pipe_preselect(au_id_t auid, au_event_t event, + au_class_t class, int sorf, int trail_select); +void audit_pipe_submit(au_id_t auid, au_event_t event, au_class_t class, + int sorf, int trail_select, void *record, u_int record_len); +void audit_pipe_submit_user(void *record, u_int record_len); + +/* + * Audit MAC prototypes. + */ +void audit_mac_init(void); +int audit_mac_new(proc_t p, struct kaudit_record *ar); +void audit_mac_free(struct kaudit_record *ar); +int audit_mac_syscall_enter(unsigned short code, proc_t p, + struct uthread *uthread, kauth_cred_t my_cred, au_event_t event); +int audit_mac_syscall_exit(unsigned short code, struct uthread *uthread, + int error, int retval); + +/* + * Audit Session. + */ +void audit_session_init(void); +int audit_session_setaia(proc_t p, auditinfo_addr_t *aia_p, int newprocess); +auditinfo_addr_t *audit_session_update(auditinfo_addr_t *new_aia); +int audit_session_lookup(au_asid_t asid, auditinfo_addr_t *ret_aia); + +/* + * Kernel assigned audit session IDs start at PID_MAX + 1 and ends at + * ASSIGNED_ASID_MAX. + */ +#define ASSIGNED_ASID_MIN (PID_MAX + 1) +#define ASSIGNED_ASID_MAX (0xFFFFFFFF - 1) + +#endif /* defined(KERNEL) || defined(_KERNEL) */ + +#endif /* ! _SECURITY_AUDIT_PRIVATE_H_ */ diff --git a/bsd/security/audit/audit_session.c b/bsd/security/audit/audit_session.c new file mode 100644 index 000000000..8e05f9dcd --- /dev/null +++ b/bsd/security/audit/audit_session.c @@ -0,0 +1,1361 @@ +/*- + * Copyright (c) 2008-2009 Apple Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of Apple Inc. ("Apple") nor the names of + * its contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY APPLE AND ITS CONTRIBUTORS "AS IS" AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL APPLE OR ITS CONTRIBUTORS BE LIABLE FOR + * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING + * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#include +#include +#include +#include + +#include +#include + +kern_return_t ipc_object_copyin(ipc_space_t, mach_port_name_t, + mach_msg_type_name_t, ipc_port_t *); +void ipc_port_release_send(ipc_port_t); + +/* + * The default auditinfo_addr entry for ucred. + */ +struct auditinfo_addr audit_default_aia = { + .ai_auid = AU_DEFAUDITID, + .ai_asid = AU_DEFAUDITSID, + .ai_termid = { .at_type = AU_IPv4, }, +}; + +#if CONFIG_AUDIT + +/* + * Currently the hash table is a fixed size. + */ +#define HASH_TABLE_SIZE 97 +#define HASH_ASID(asid) (audit_session_hash(asid) % HASH_TABLE_SIZE) + +/* + * Audit Session Entry. This is treated as an object with public and private + * data. The se_auinfo field is the only information that is public and + * needs to be the first entry. + */ +struct au_sentry { + auditinfo_addr_t se_auinfo; /* Public audit session data. */ +#define se_asid se_auinfo.ai_asid +#define se_auid se_auinfo.ai_auid +#define se_mask se_auinfo.ai_mask +#define se_termid se_auinfo.ai_termid +#define se_flags se_auinfo.ai_flags + + long se_refcnt; /* Reference count. */ + long se_procnt; /* Processes in session. */ + ipc_port_t se_port; /* Session port. */ + struct klist se_klist; /* Knotes for session */ + struct mtx se_klist_mtx; /* se_klist mutex */ + LIST_ENTRY(au_sentry) se_link; /* Hash bucket link list (1) */ +}; +typedef struct au_sentry au_sentry_t; + +#define AU_SENTRY_PTR(aia_p) ((au_sentry_t *)(aia_p)) + +static struct rwlock se_entry_lck; /* (1) lock for se_link above */ + +LIST_HEAD(au_sentry_head, au_sentry); +static struct au_sentry_head *au_sentry_bucket = NULL; + +/* + * Audit Propagation Knote List is a list of kevent knotes that are assosiated + * with an any ASID knote. If the any ASID gets modified or deleted these are + * modified or deleted as well. + */ +struct au_plist { + struct knote *pl_knote; /* ptr to per-session knote */ + LIST_ENTRY(au_plist) pl_link; /* list link (2) */ +}; +typedef struct au_plist au_plist_t; + +struct au_plisthead { + struct rlck ph_rlck; /* (2) lock for pl_link list */ + LIST_HEAD(au_plhead, au_plist) ph_head; /* list head */ +}; +typedef struct au_plisthead au_plisthead_t; + +#define EV_ANY_ASID EV_FLAG0 + +MALLOC_DEFINE(M_AU_SESSION, "audit_session", "Audit session data"); +MALLOC_DEFINE(M_AU_EV_PLIST, "audit_ev_plist", "Audit session event plist"); + +/* + * Kevent filters. + */ +static int audit_filt_sessionattach(struct knote *kn); +static void audit_filt_sessiondetach(struct knote *kn); +static void audit_filt_sessiontouch(struct knote *kn, + struct kevent64_s *kev, long type); +static int audit_filt_session(struct knote *kn, long hint); + +static void audit_register_kevents(uint32_t asid, uint32_t auid); + +struct filterops audit_session_filtops = { + .f_attach = audit_filt_sessionattach, + .f_detach = audit_filt_sessiondetach, + .f_touch = audit_filt_sessiontouch, + .f_event = audit_filt_session, +}; + +/* + * The klist for consumers that are interested in any session (ASID). This list + * is not associated with any data structure but is used for registering + * new kevents when sessions are created. This klist is lock by + * anyas_klist_mtx. + */ +static struct klist anyas_klist; +struct mtx anyas_klist_mtx; + +#define AUDIT_ANYAS_KLIST_LOCK_INIT() mtx_init(&anyas_klist_mtx, \ + "audit anyas_klist_mtx", NULL, MTX_DEF) +#define AUDIT_ANYAS_KLIST_LOCK() mtx_lock(&anyas_klist_mtx) +#define AUDIT_ANYAS_KLIST_UNLOCK() mtx_unlock(&anyas_klist_mtx) +#define AUDIT_ANYAS_KLIST_LOCK_ASSERT() mtx_assert(&anyas_klist_mtx, MA_OWNED) + +#define AUDIT_SENTRY_RWLOCK_INIT() rw_init(&se_entry_lck, \ + "audit se_entry_lck") +#define AUDIT_SENTRY_RLOCK() rw_rlock(&se_entry_lck) +#define AUDIT_SENTRY_WLOCK() rw_wlock(&se_entry_lck) +#define AUDIT_SENTRY_RWLOCK_ASSERT() rw_assert(&se_entry_lck, RA_LOCKED) +#define AUDIT_SENTRY_RUNLOCK() rw_runlock(&se_entry_lck) +#define AUDIT_SENTRY_WUNLOCK() rw_wunlock(&se_entry_lck) + +#define AUDIT_SE_KLIST_LOCK_INIT(se, n) mtx_init(&(se)->se_klist_mtx, \ + n, NULL, MTX_DEF) +#define AUDIT_SE_KLIST_LOCK(se) mtx_lock(&(se)->se_klist_mtx) +#define AUDIT_SE_KLIST_UNLOCK(se) mtx_unlock(&(se)->se_klist_mtx) +#define AUDIT_SE_KLIST_LOCK_DESTROY(se) mtx_destroy(&(se)->se_klist_mtx) +#define AUDIT_SE_KLIST_LOCK_ASSERT(se) mtx_assert(&(se)->se_klist_mtx, \ + MA_OWNED) + +#define AUDIT_PLIST_LOCK_INIT(pl) rlck_init(&(pl)->ph_rlck, \ + "audit ph_rlck") +#define AUDIT_PLIST_LOCK(pl) rlck_lock(&(pl)->ph_rlck) +#define AUDIT_PLIST_UNLOCK(pl) rlck_unlock(&(pl)->ph_rlck) +#define AUDIT_PLIST_LOCK_DESTROY(pl) rlck_destroy(&(pl)->ph_rlck) + +#if AUDIT_SESSION_DEBUG +#include + +struct au_sentry_debug { + auditinfo_addr_t se_auinfo; + long se_refcnt; + long se_procnt; +}; +typedef struct au_sentry_debug au_sentry_debug_t; + +static int audit_sysctl_session_debug(struct sysctl_oid *oidp, void *arg1, + int arg2, struct sysctl_req *req); + +SYSCTL_PROC(_kern, OID_AUTO, audit_session_debug, CTLFLAG_RD, NULL, 0, + audit_sysctl_session_debug, "S,audit_session_debug", + "Current session debug info for auditing."); + +/* + * Copy out the session debug info via the sysctl interface. The userland code + * is something like the following: + * + * error = sysctlbyname("kern.audit_session_debug", buffer_ptr, &buffer_len, + * NULL, 0); + */ +static int +audit_sysctl_session_debug(__unused struct sysctl_oid *oidp, + __unused void *arg1, __unused int arg2, struct sysctl_req *req) +{ + au_sentry_t *se; + au_sentry_debug_t *sed_tab, *next_sed; + int i, entry_cnt = 0; + size_t sz; + int err = 0; + + /* + * This provides a read-only node. + */ + if (req->newptr != USER_ADDR_NULL) + return (EPERM); + + /* + * Walk the audit session hash table to determine the size. + */ + AUDIT_SENTRY_RLOCK(); + for(i = 0; i < HASH_TABLE_SIZE; i++) + LIST_FOREACH(se, &au_sentry_bucket[i], se_link) + if (se != NULL) + entry_cnt++; + + /* + * If just querying then return the space required. There is an + * obvious race condition here so we just fudge this by 3 in case + * the audit session table grows. + */ + if (req->oldptr == USER_ADDR_NULL) { + req->oldidx = (entry_cnt + 3) * sizeof(au_sentry_debug_t); + AUDIT_SENTRY_RUNLOCK(); + return (0); + } + + /* + * Alloc a temporary buffer. + */ + if (req->oldlen < (entry_cnt * sizeof(au_sentry_debug_t))) { + AUDIT_SENTRY_RUNLOCK(); + return (ENOMEM); + } + /* + * We hold the lock over the alloc since we don't want the table to + * grow on us. Therefore, use the non-blocking version of kalloc(). + */ + sed_tab = (au_sentry_debug_t *)kalloc_noblock(entry_cnt * + sizeof(au_sentry_debug_t)); + if (sed_tab == NULL) { + AUDIT_SENTRY_RUNLOCK(); + return (ENOMEM); + } + bzero(sed_tab, entry_cnt * sizeof(au_sentry_debug_t)); + + /* + * Walk the audit session hash table and build the record array. + */ + sz = 0; + next_sed = sed_tab; + for(i = 0; i < HASH_TABLE_SIZE; i++) { + LIST_FOREACH(se, &au_sentry_bucket[i], se_link) { + if (se != NULL) { + bcopy(se, next_sed, sizeof(next_sed)); + next_sed++; + sz += sizeof(au_sentry_debug_t); + } + } + } + AUDIT_SENTRY_RUNLOCK(); + + req->oldlen = sz; + err = SYSCTL_OUT(req, sed_tab, sz); + kfree(sed_tab, entry_cnt * sizeof(au_sentry_debug_t)); + + return (err); +} + +#endif /* AUDIT_SESSION_DEBUG */ + +/* + * Hash the audit session ID using a simple 32-bit mix. + */ +static inline uint32_t +audit_session_hash(au_asid_t asid) +{ + uint32_t a = (uint32_t) asid; + + a = (a - (a << 6)) ^ (a >> 17); + a = (a - (a << 9)) ^ (a << 4); + a = (a - (a << 3)) ^ (a << 10); + a = a ^ (a >> 15); + + return (a); +} + +/* + * Do an hash lookup and find the session entry for a given ASID. Return NULL + * if not found. + */ +static au_sentry_t * +audit_session_find(au_asid_t asid) +{ + uint32_t hkey; + au_sentry_t *found_se; + + AUDIT_SENTRY_RWLOCK_ASSERT(); + + hkey = HASH_ASID(asid); + + LIST_FOREACH(found_se, &au_sentry_bucket[hkey], se_link) + if (found_se->se_asid == asid) + return (found_se); + return (NULL); +} + +/* + * Call kqueue knote while holding the session entry klist lock. + */ +static void +audit_session_knote(au_sentry_t *se, long hint) +{ + + AUDIT_SE_KLIST_LOCK(se); + KNOTE(&se->se_klist, hint); + AUDIT_SE_KLIST_UNLOCK(se); +} + +/* + * Remove the given audit_session entry from the hash table. + */ +static void +audit_session_remove(au_sentry_t *se) +{ + uint32_t hkey; + au_sentry_t *found_se, *tmp_se; + + KASSERT(se->se_refcnt == 0, ("audit_session_remove: ref count != 0")); + + hkey = HASH_ASID(se->se_asid); + + AUDIT_SENTRY_WLOCK(); + LIST_FOREACH_SAFE(found_se, &au_sentry_bucket[hkey], se_link, tmp_se) { + if (found_se == se) { + + audit_session_knote(found_se, NOTE_AS_CLOSE); + + LIST_REMOVE(found_se, se_link); + AUDIT_SENTRY_WUNLOCK(); + AUDIT_SE_KLIST_LOCK_DESTROY(found_se); + found_se->se_refcnt = 0; + free(found_se, M_AU_SESSION); + + return; + } + } + AUDIT_SENTRY_WUNLOCK(); +} + +/* + * Reference the session by incrementing the sentry ref count. + */ +static void +audit_ref_session(au_sentry_t *se) +{ + long old_val; + + old_val = OSAddAtomicLong(1, &se->se_refcnt); + KASSERT(old_val < 100000, + ("audit_ref_session: Too many references on session.")); +} + +/* + * Decrement the sentry ref count and remove the session entry if last one. + */ +static void +audit_unref_session(au_sentry_t *se) +{ + long old_val; + + old_val = OSAddAtomicLong(-1, &se->se_refcnt); + if (old_val == 1) + audit_session_remove(se); + KASSERT(old_val > 0, + ("audit_unref_session: Too few references on session.")); +} + +/* + * Increment the process count in the session. + */ +static void +audit_inc_procount(au_sentry_t *se) +{ + long old_val; + + old_val = OSAddAtomicLong(1, &se->se_procnt); + KASSERT(old_val <= PID_MAX, + ("audit_inc_procount: proc count > PID_MAX")); +} + +/* + * Decrement the process count and add a knote if it is the last process + * to exit the session. + */ +static void +audit_dec_procount(au_sentry_t *se) +{ + long old_val; + + old_val = OSAddAtomicLong(-1, &se->se_procnt); + if (old_val == 1) + audit_session_knote(se, NOTE_AS_END); + KASSERT(old_val >= 1, + ("audit_dec_procount: proc count < 0")); +} + +/* + * Update the session entry and check to see if anything was updated. + * Returns: + * 0 Nothing was updated (We don't care about process preselection masks) + * 1 Something was updated. + */ +static int +audit_update_sentry(au_sentry_t *se, auditinfo_addr_t *new_aia) +{ + auditinfo_addr_t *aia = &se->se_auinfo; + int update; + + KASSERT(new_aia != &audit_default_aia, + ("audit_update_sentry: Trying to update the default aia.")); + + update = (aia->ai_auid != new_aia->ai_auid || + bcmp(&aia->ai_termid, &new_aia->ai_termid, + sizeof(new_aia->ai_termid)) || + aia->ai_flags != new_aia->ai_flags); + + if (update) + bcopy(new_aia, aia, sizeof(*aia)); + + return (update); +} + +/* + * Return the next session ID. The range of kernel generated audit session IDs + * is ASSIGNED_ASID_MIN to ASSIGNED_ASID_MAX. + */ +static uint32_t +audit_session_nextid(void) +{ + static uint32_t next_asid = ASSIGNED_ASID_MIN; + + AUDIT_SENTRY_RWLOCK_ASSERT(); + + if (next_asid > ASSIGNED_ASID_MAX) + next_asid = ASSIGNED_ASID_MIN; + + return (next_asid++); +} + +/* + * Allocated a new audit_session entry and add it to the hash table. If the + * given ASID is set to AU_ASSIGN_ASID then audit_session_new() will pick an + * audit session ID. Otherwise, it attempts use the one given. It creates a + * reference to the entry that must be unref'ed. + */ +static auditinfo_addr_t * +audit_session_new(auditinfo_addr_t *new_aia, int newprocess) +{ + au_asid_t asid; + au_sentry_t *se = NULL; + auditinfo_addr_t *aia = NULL; + char nm[LOCK_MAX_NAME]; + + KASSERT(new_aia != NULL, ("audit_session_new: new_aia == NULL")); + + asid = new_aia->ai_asid; + +#if 0 /* XXX this assertion is currently broken by securityd/LoginWindow */ + KASSERT((asid != AU_ASSIGN_ASID && asid <= PID_MAX), + ("audit_session_new: illegal ASID value: %d", asid)); +#endif + + /* + * Alloc a new session entry now so we don't wait holding the lock. + */ + se = malloc(sizeof(au_sentry_t), M_AU_SESSION, M_WAITOK | M_ZERO); + + snprintf(nm, sizeof(nm), "audit se_klist_mtx %d", asid); + AUDIT_SE_KLIST_LOCK_INIT(se, nm); + + /* + * Find an unique session ID, if desired. + */ + AUDIT_SENTRY_WLOCK(); + if (asid == AU_ASSIGN_ASID) { + do { + asid = (au_asid_t)audit_session_nextid(); + } while(audit_session_find(asid) != NULL); + } else { + au_sentry_t *found_se = NULL; + + /* + * Check to see if the requested ASID is already in the + * hash table. If so, update it with the new auditinfo. + */ + if ((found_se = audit_session_find(asid)) != NULL) { + int updated; + + updated = audit_update_sentry(found_se, new_aia); + audit_ref_session(found_se); + + AUDIT_SENTRY_WUNLOCK(); + AUDIT_SE_KLIST_LOCK_DESTROY(se); + free(se, M_AU_SESSION); + + if (updated) + audit_session_knote(found_se, NOTE_AS_UPDATE); + + /* + * If this is a new process joining this session then + * we need to update the proc count. + */ + if (newprocess) + audit_inc_procount(found_se); + + return (&found_se->se_auinfo); + } + } + + /* + * Start the reference and proc count at 1 to account for the process + * that invoked this via setaudit_addr() (or friends). + */ + se->se_refcnt = se->se_procnt = 1; + + /* + * Populate the new session entry. Note that process masks are stored + * in kauth ucred so just zero them here. + */ + se->se_port = IPC_PORT_NULL; + aia = &se->se_auinfo; + aia->ai_asid = asid; + aia->ai_auid = new_aia->ai_auid; + bzero(&new_aia->ai_mask, sizeof(new_aia->ai_mask)); + bcopy(&new_aia->ai_termid, &aia->ai_termid, sizeof(aia->ai_termid)); + aia->ai_flags = new_aia->ai_flags; + + /* + * Add it to the hash table. + */ + LIST_INSERT_HEAD(&au_sentry_bucket[HASH_ASID(asid)], se, se_link); + AUDIT_SENTRY_WUNLOCK(); + + /* + * Register kevents for consumers wanting events for any ASID + * and knote the event. + */ + audit_register_kevents(se->se_asid, se->se_auid); + audit_session_knote(se, NOTE_AS_START); + + return (aia); +} + +/* + * Lookup an existing session. A copy of the audit session info for a given + * ASID is returned in ret_aia. Returns 0 on success. + */ +int +audit_session_lookup(au_asid_t asid, auditinfo_addr_t *ret_aia) +{ + au_sentry_t *se = NULL; + + if ((uint32_t)asid > ASSIGNED_ASID_MAX) + return (-1); + AUDIT_SENTRY_RLOCK(); + if ((se = audit_session_find(asid)) == NULL) { + AUDIT_SENTRY_RUNLOCK(); + return (1); + } + if (ret_aia != NULL) + bcopy(&se->se_auinfo, ret_aia, sizeof(*ret_aia)); + AUDIT_SENTRY_RUNLOCK(); + + return (0); +} + +/* + * Add a reference to the session entry. + */ +void +audit_session_ref(kauth_cred_t cred) +{ + auditinfo_addr_t *aia_p; + + KASSERT(IS_VALID_CRED(cred), + ("audit_session_ref: Invalid kauth_cred.")); + + aia_p = cred->cr_audit.as_aia_p; + + if (IS_VALID_SESSION(aia_p)) + audit_ref_session(AU_SENTRY_PTR(aia_p)); +} + +/* + * Remove a reference to the session entry. + */ +void +audit_session_unref(kauth_cred_t cred) +{ + auditinfo_addr_t *aia_p; + + KASSERT(IS_VALID_CRED(cred), + ("audit_session_unref: Invalid kauth_cred.")); + + aia_p = cred->cr_audit.as_aia_p; + + if (IS_VALID_SESSION(aia_p)) + audit_unref_session(AU_SENTRY_PTR(aia_p)); +} + +void +audit_session_procnew(kauth_cred_t cred) +{ + auditinfo_addr_t *aia_p; + + KASSERT(IS_VALID_CRED(cred), + ("audit_session_procnew: Invalid kauth_cred.")); + + aia_p = cred->cr_audit.as_aia_p; + + if (IS_VALID_SESSION(aia_p)) + audit_inc_procount(AU_SENTRY_PTR(aia_p)); +} + +void +audit_session_procexit(kauth_cred_t cred) +{ + auditinfo_addr_t *aia_p; + + KASSERT(IS_VALID_CRED(cred), + ("audit_session_procexit: Invalid kauth_cred.")); + + aia_p = cred->cr_audit.as_aia_p; + + if (IS_VALID_SESSION(aia_p)) + audit_dec_procount(AU_SENTRY_PTR(aia_p)); +} + +/* + * Init the audit session code. + */ +void +audit_session_init(void) +{ + int i; + + KASSERT((ASSIGNED_ASID_MAX - ASSIGNED_ASID_MIN) > PID_MAX, + ("audit_session_init: ASSIGNED_ASID_MAX is not large enough.")); + + AUDIT_SENTRY_RWLOCK_INIT(); + AUDIT_ANYAS_KLIST_LOCK_INIT(); + + au_sentry_bucket = malloc( sizeof(struct au_sentry) * + HASH_TABLE_SIZE, M_AU_SESSION, M_WAITOK | M_ZERO); + + for (i = 0; i < HASH_TABLE_SIZE; i++) + LIST_INIT(&au_sentry_bucket[i]); +} + +/* + * Allocate a new kevent propagation list (plist). + */ +static caddr_t +audit_new_plist(void) +{ + au_plisthead_t *plhead; + + plhead = malloc(sizeof(au_plisthead_t), M_AU_EV_PLIST, M_WAITOK | + M_ZERO); + + LIST_INIT(&plhead->ph_head); + AUDIT_PLIST_LOCK_INIT(plhead); + + return ((caddr_t) plhead); +} + +/* + * Destroy a kevent propagation list (plist). The anyas_klist_mtx mutex must be + * held by the caller. + */ +static void +audit_destroy_plist(struct knote *anyas_kn) +{ + au_plisthead_t *plhead; + au_plist_t *plentry, *ple_tmp; + struct kevent64_s kev; + + KASSERT(anyas_kn != NULL, ("audit_destroy_plist: anyas = NULL")); + plhead = (au_plisthead_t *)anyas_kn->kn_hook; + KASSERT(plhead != NULL, ("audit_destroy_plist: plhead = NULL")); + + /* + * Delete everything in the propagation list. + */ + AUDIT_PLIST_LOCK(plhead); + LIST_FOREACH_SAFE(plentry, &plhead->ph_head, pl_link, ple_tmp) { + struct kqueue *kq = plentry->pl_knote->kn_kq; + + kev.ident = plentry->pl_knote->kn_id; + kev.filter = EVFILT_SESSION; + kev.flags = EV_DELETE; + + /* + * The plist entry gets removed in rm_from_plist() which is + * called indirectly by kevent_register(). + */ + kevent_register(kq, &kev, NULL); + } + AUDIT_PLIST_UNLOCK(plhead); + + /* + * Remove the head. + */ + AUDIT_PLIST_LOCK_DESTROY(plhead); + free(plhead, M_AU_EV_PLIST); +} + +/* + * Add a knote pointer entry to the kevent propagation list. + */ +static void +audit_add_to_plist(struct knote *anyas_kn, struct knote *kn) +{ + au_plisthead_t *plhead; + au_plist_t *plentry; + + KASSERT(anyas_kn != NULL, ("audit_add_to_plist: anyas = NULL")); + plhead = (au_plisthead_t *)anyas_kn->kn_hook; + KASSERT(plhead != NULL, ("audit_add_to_plist: plhead = NULL")); + + plentry = malloc(sizeof(au_plist_t), M_AU_EV_PLIST, M_WAITOK | M_ZERO); + + plentry->pl_knote = kn; + AUDIT_PLIST_LOCK(plhead); + LIST_INSERT_HEAD(&plhead->ph_head, plentry, pl_link); + AUDIT_PLIST_UNLOCK(plhead); +} + +/* + * Remote a knote pointer entry from the kevent propagation list. The lock + * on the plist may already be head (by audit_destroy_plist() above) so we use + * a recursive lock. + */ +static void +audit_rm_from_plist(struct knote *kn) +{ + struct knote *anyas_kn; + au_plisthead_t *plhd; + au_plist_t *plentry, *ple_tmp; + + KASSERT(kn != NULL, ("audit_rm_from_plist: kn = NULL")); + anyas_kn = (struct knote *)kn->kn_hook; + KASSERT(anyas_kn != NULL, ("audit_rm_to_plist: anyas = NULL")); + plhd = (au_plisthead_t *)anyas_kn->kn_hook; + + AUDIT_PLIST_LOCK(plhd); + LIST_FOREACH_SAFE(plentry, &plhd->ph_head, pl_link, ple_tmp) { + if (plentry->pl_knote == kn) { + LIST_REMOVE(plentry, pl_link); + free(plentry, M_AU_EV_PLIST); + AUDIT_PLIST_UNLOCK(plhd); + return; + } + } + AUDIT_PLIST_UNLOCK(plhd); +} + +/* + * The attach filter for EVFILT_SESSION. + */ +static int +audit_filt_sessionattach(struct knote *kn) +{ + au_sentry_t *se = NULL; + + /* + * Check flags for the events we currently support. + */ + if ((kn->kn_sfflags & (NOTE_AS_START | NOTE_AS_END | NOTE_AS_CLOSE + | NOTE_AS_UPDATE | NOTE_AS_ERR)) == 0) + return (ENOTSUP); + + /* + * If the interest is in any session then add to the any ASID knote + * list. Otherwise, add it to the knote list assosiated with the + * given session. + */ + if (kn->kn_id == AS_ANY_ASID) { + + kn->kn_flags |= EV_CLEAR; + kn->kn_ptr.p_se = NULL; + + /* + * Attach a kevent propagation list for any kevents that get + * added. + */ + kn->kn_hook = audit_new_plist(); + + AUDIT_ANYAS_KLIST_LOCK(); + KNOTE_ATTACH(&anyas_klist, kn); + AUDIT_ANYAS_KLIST_UNLOCK(); + + return (0); + } else { + + /* + * NOTE: The anyas klist lock will be held in this + * part of the code when indirectly called from + * audit_register_kevents() below. + */ + + /* + * Check to make sure it is a valid ASID. + */ + if (kn->kn_id > ASSIGNED_ASID_MAX) + return (EINVAL); + + AUDIT_SENTRY_RLOCK(); + se = audit_session_find(kn->kn_id); + AUDIT_SENTRY_RUNLOCK(); + if (se == NULL) + return (EINVAL); + + AUDIT_SE_KLIST_LOCK(se); + kn->kn_flags |= EV_CLEAR; + kn->kn_ptr.p_se = se; + + /* + * If this attach is the result of an "any ASID" (pseudo) + * kevent then attach the any session knote ptr to this knote. + * Also, add this knote to the its propagation list. + */ + if (kn->kn_flags & EV_ANY_ASID) { + struct knote *anyas_kn = + (struct knote *)((uintptr_t)kn->kn_kevent.ext[0]); + kn->kn_hook = (caddr_t) anyas_kn; + kn->kn_flags &= ~EV_ANY_ASID; + audit_add_to_plist(anyas_kn, kn); + } else + kn->kn_hook = NULL; + KNOTE_ATTACH(&se->se_klist, kn); + AUDIT_SE_KLIST_UNLOCK(se); + + return (0); + } +} + +/* + * The detach filter for EVFILT_SESSION. + */ +static void +audit_filt_sessiondetach(struct knote *kn) +{ + au_sentry_t *se = NULL; + + if (kn->kn_id == AS_ANY_ASID) { + + AUDIT_ANYAS_KLIST_LOCK(); + audit_destroy_plist(kn); + KNOTE_DETACH(&anyas_klist, kn); + AUDIT_ANYAS_KLIST_UNLOCK(); + + } else { + /* + * If this knote was created by any ASID kevent then remove + * from kevent propagation list. + */ + if (kn->kn_hook != NULL) { + audit_rm_from_plist(kn); + kn->kn_hook = NULL; + } + + /* + * Check to see if already detached. + */ + se = kn->kn_ptr.p_se; + if (se != NULL) { + AUDIT_SE_KLIST_LOCK(se); + kn->kn_ptr.p_se = NULL; + KNOTE_DETACH(&se->se_klist, kn); + AUDIT_SE_KLIST_UNLOCK(se); + } + } +} + +/* + * The touch filter for EVFILT_SESSION. Check for any ASID kevent updates and + * propagate the change. + */ +static void +audit_filt_sessiontouch(struct knote *kn, struct kevent64_s *kev, long type) +{ + struct knote *ple_kn; + struct kqueue *kq; + au_sentry_t *se; + au_plisthead_t *plhead; + au_plist_t *plentry; + struct kevent64_s newkev; + + switch (type) { + case EVENT_REGISTER: + kn->kn_sfflags = kev->fflags; + kn->kn_sdata = kev->data; + /* + * If an any ASID kevent was updated then we may need to + * propagate the update. + */ + if (kev->ident == AS_ANY_ASID && kn->kn_hook != NULL) { + + /* + * Propagate the change to each of the session kevents + * that were created by this any ASID kevent. + */ + plhead = (au_plisthead_t *)kn->kn_hook; + AUDIT_PLIST_LOCK(plhead); + LIST_FOREACH(plentry, &plhead->ph_head, pl_link) { + + if ((ple_kn = plentry->pl_knote) == NULL) + continue; + if ((se = ple_kn->kn_ptr.p_se) == NULL) + continue; + if ((kq = ple_kn->kn_kq) == NULL) + continue; + + newkev.ident = plentry->pl_knote->kn_id; + newkev.filter = EVFILT_SESSION; + newkev.flags = kev->flags; + newkev.fflags = kev->fflags; + newkev.data = kev->data; + newkev.udata = kev->udata; + kevent_register(kq, &newkev, NULL); + } + AUDIT_PLIST_UNLOCK(plhead); + } + break; + + case EVENT_PROCESS: + *kev = kn->kn_kevent; + if (kn->kn_flags & EV_CLEAR) { + kn->kn_data = 0; + kn->kn_fflags = 0; + } + break; + + default: + KASSERT((type == EVENT_REGISTER || type == EVENT_PROCESS), + ("filt_sessiontouch(): invalid type (%ld)", type)); + break; + } +} + +/* + * Event filter for EVFILT_SESSION. The AUDIT_SE_KLIST_LOCK should be held + * by audit_session_knote(). + */ +static int +audit_filt_session(struct knote *kn, long hint) +{ + int events = (int)hint; + au_sentry_t *se = kn->kn_ptr.p_se; + + if (hint != 0 && se != NULL) { + + if (kn->kn_sfflags & events) { + kn->kn_fflags |= events; + kn->kn_data = se->se_auid; + } + + /* + * If this is the last possible event for the knote, + * detach the knote from the audit session before the + * session goes away. + */ + if (events & NOTE_AS_CLOSE) { + + /* + * If created by any ASID kevent then remove from + * propagation list. + */ + if (kn->kn_hook != NULL) { + audit_rm_from_plist(kn); + kn->kn_hook = NULL; + } + kn->kn_flags |= (EV_EOF | EV_ONESHOT); + kn->kn_ptr.p_se = NULL; + AUDIT_SE_KLIST_LOCK_ASSERT(se); + KNOTE_DETACH(&se->se_klist, kn); + + return (1); + } + } + return (kn->kn_fflags != 0); +} + +/* + * For all the consumers wanting events for all sessions, register new + * kevents associated with the session for the given ASID. The actual + * attachment is done by the EVFILT_SESSION attach filter above. + */ +static void +audit_register_kevents(uint32_t asid, uint32_t auid) +{ + struct knote *kn; + + AUDIT_ANYAS_KLIST_LOCK(); + SLIST_FOREACH(kn, &anyas_klist, kn_selnext) { + struct kqueue *kq = kn->kn_kq; + struct kevent64_s kev; + int err; + + kev.ident = asid; + kev.filter = EVFILT_SESSION; + kev.flags = kn->kn_flags | EV_ADD | EV_ENABLE | EV_ANY_ASID; + kev.fflags = kn->kn_sfflags; + kev.data = auid; + kev.udata = kn->kn_kevent.udata; + + /* + * Save the knote ptr for this "any ASID" knote for the attach + * filter. + */ + kev.ext[0] = (uint64_t)((uintptr_t)kn); + + /* + * XXX kevent_register() may block here alloc'ing a new knote. + * We may want to think about using a lockless linked list or + * at least a sleep rwlock for the anyas_klist. + */ + err = kevent_register(kq, &kev, NULL); + if (err) + kn->kn_fflags |= NOTE_AS_ERR; + } + AUDIT_ANYAS_KLIST_UNLOCK(); +} + +/* + * Safely update kauth cred of the given process with new the given audit info. + * If the newprocess flag is set then we need to account for this process in + * the proc count. + */ +int +audit_session_setaia(proc_t p, auditinfo_addr_t *aia_p, int newprocess) +{ + kauth_cred_t my_cred, my_new_cred; + struct au_session as; + struct au_session tmp_as; + auditinfo_addr_t caia; + + /* + * If this is going to modify an existing session then do some + * immutable checks. + */ + if (audit_session_lookup(aia_p->ai_asid, &caia) == 0) { + + /* + * If the current audit ID is not the default then it is + * immutable. + */ + if (caia.ai_auid != AU_DEFAUDITID && + caia.ai_auid != aia_p->ai_auid) + return (EINVAL); + + /* + * If the current termid is not the default then it is + * immutable. + */ + if ((caia.ai_termid.at_type != AU_IPv4 || + caia.ai_termid.at_port != 0 || + caia.ai_termid.at_addr[0] != 0) && + (caia.ai_termid.at_port != aia_p->ai_termid.at_port || + caia.ai_termid.at_type != aia_p->ai_termid.at_type || + bcmp(&caia.ai_termid.at_addr, &aia_p->ai_termid.at_addr, + sizeof (caia.ai_termid.at_addr) )) ) + return (EINVAL); + + /* The audit flags are immutable. */ + if (caia.ai_flags != aia_p->ai_flags) + return (EINVAL); + + /* The audit masks are mutable. */ + } + + my_cred = kauth_cred_proc_ref(p); + bcopy(&aia_p->ai_mask, &as.as_mask, sizeof(as.as_mask)); + as.as_aia_p = audit_session_new(aia_p, newprocess); + + /* + * We are modifying the audit info in a credential so we need a new + * credential (or take another reference on an existing credential that + * matches our new one). We must do this because the audit info in the + * credential is used as part of our hash key. Get current credential + * in the target process and take a reference while we muck with it. + */ + for (;;) { + + /* + * Set the credential with new info. If there is no change, + * we get back the same credential we passed in; if there is + * a change, we drop the reference on the credential we + * passed in. The subsequent compare is safe, because it is + * a pointer compare rather than a contents compare. + */ + bcopy(&as, &tmp_as, sizeof(tmp_as)); + my_new_cred = kauth_cred_setauditinfo(my_cred, &tmp_as); + + if (my_cred != my_new_cred) { + proc_lock(p); + /* Need to protect for a race where another thread also + * changed the credential after we took our reference. + * If p_ucred has changed then we should restart this + * again with the new cred. + */ + if (p->p_ucred != my_cred) { + proc_unlock(p); + audit_session_unref(my_new_cred); + kauth_cred_unref(&my_new_cred); + /* try again */ + my_cred = kauth_cred_proc_ref(p); + continue; + } + p->p_ucred = my_new_cred; + proc_unlock(p); + } + /* + * Drop old proc reference or our extra reference. + */ + kauth_cred_unref(&my_cred); + break; + } + audit_session_unref(my_new_cred); + + /* + * Propagate the change from the process to the Mach task. + */ + set_security_token(p); + + return (0); +} + +/* + * audit_session_self (system call) + * + * Description: Obtain a Mach send right for the current session. + * + * Parameters: p Process calling audit_session_self(). + * + * Returns: *ret_port Named Mach send right, which may be + * MACH_PORT_NULL in the failure case. + * + * Errno: 0 Success + * EINVAL The calling process' session has not be set. + * ESRCH Bad process, can't get valid cred for process. + * ENOMEM Port allocation failed due to no free memory. + */ +int +audit_session_self(proc_t p, __unused struct audit_session_self_args *uap, + mach_port_name_t *ret_port) +{ + ipc_port_t sendport = IPC_PORT_NULL; + kauth_cred_t cred = NULL; + auditinfo_addr_t *aia_p; + au_sentry_t *se; + int err = 0; + + cred = kauth_cred_proc_ref(p); + if (!IS_VALID_CRED(cred)) { + err = ESRCH; + goto done; + } + + aia_p = cred->cr_audit.as_aia_p; + if (!IS_VALID_SESSION(aia_p)) { + err = EINVAL; + goto done; + } + + se = AU_SENTRY_PTR(aia_p); + + /* + * Processes that join using this mach port will inherit this process' + * pre-selection masks. + */ + if (se->se_port == IPC_PORT_NULL) + bcopy(&cred->cr_audit.as_mask, &se->se_mask, + sizeof(se->se_mask)); + + if ((sendport = audit_session_mksend(aia_p, &se->se_port)) == NULL) { + /* failed to alloc new port */ + err = ENOMEM; + goto done; + } + + /* + * This reference on the session is unref'ed in + * audit_session_port_destory(). This reference is needed so the + * session doesn't get dropped until the session join is done. + */ + audit_ref_session(se); + + +done: + if (cred != NULL) + kauth_cred_unref(&cred); + if (err == 0) + *ret_port = ipc_port_copyout_send(sendport, + get_task_ipcspace(p->task)); + else + *ret_port = MACH_PORT_NULL; + + return (err); +} + +void +audit_session_portaiadestroy(struct auditinfo_addr *port_aia_p) +{ + au_sentry_t *se; + + KASSERT(port_aia_p != NULL, + ("audit_session_infodestroy: port_aia_p = NULL")); + + se = AU_SENTRY_PTR(port_aia_p); + + /* + * Drop the reference added in audit_session_self(). + */ + if (se != NULL) { + se->se_port = IPC_PORT_NULL; + audit_unref_session(se); + } + +} + +static int +audit_session_join_internal(proc_t p, ipc_port_t port, au_asid_t *new_asid) +{ + auditinfo_addr_t *port_aia_p, *old_aia_p; + kauth_cred_t cred = NULL; + au_asid_t old_asid; + int err = 0; + + *new_asid = AU_DEFAUDITSID; + + if ((port_aia_p = audit_session_porttoaia(port)) == NULL) { + err = EINVAL; + goto done; + } + *new_asid = port_aia_p->ai_asid; + + cred = kauth_cred_proc_ref(p); + if (!IS_VALID_CRED(cred)) { + kauth_cred_unref(&cred); + err = ESRCH; + goto done; + } + old_aia_p = cred->cr_audit.as_aia_p; + old_asid = old_aia_p->ai_asid; + + /* + * Add process in if not already in the session. + */ + if (*new_asid != old_asid) { + audit_session_setaia(p, port_aia_p, 1); + /* + * If this process was in a valid session before then we + * need to decrement the process count of the session it + * came from. + */ + if (IS_VALID_SESSION(old_aia_p)) + audit_dec_procount(AU_SENTRY_PTR(old_aia_p)); + } + kauth_cred_unref(&cred); + +done: + if (port != IPC_PORT_NULL) + ipc_port_release_send(port); + + return (err); +} + +/* + * audit_session_spawnjoin + * + * Description: posix_spawn() interface to audit_session_join_internal(). + * + * Returns: 0 Success + * EINVAL Invalid Mach port name. + * ESRCH Invalid calling process/cred. + */ +int +audit_session_spawnjoin(proc_t p, ipc_port_t port) +{ + au_asid_t new_asid; + + return (audit_session_join_internal(p, port, &new_asid)); +} + +/* + * audit_session_join (system call) + * + * Description: Join the session for a given Mach port send right. + * + * Parameters: p Process calling session join. + * uap->port A Mach send right. + * + * Returns: *ret_asid Audit session ID of new session, which may + * be AU_DEFAUDITSID in the failure case. + * + * Errno: 0 Success + * EINVAL Invalid Mach port name. + * ESRCH Invalid calling process/cred. + */ +int +audit_session_join(proc_t p, struct audit_session_join_args *uap, + au_asid_t *ret_asid) +{ + ipc_port_t port = IPC_PORT_NULL; + mach_port_name_t send = uap->port; + int err = 0; + + + if (ipc_object_copyin(get_task_ipcspace(p->task), send, + MACH_MSG_TYPE_COPY_SEND, &port) != KERN_SUCCESS) { + *ret_asid = AU_DEFAUDITSID; + err = EINVAL; + } else + err = audit_session_join_internal(p, port, ret_asid); + + return (err); +} + +#else + +int +audit_session_self(proc_t p, struct audit_session_self_args *uap, + mach_port_name_t *ret_port) +{ +#pragma unused(p, uap, ret_port) + + return (ENOSYS); +} + +int +audit_session_join(proc_t p, struct audit_session_join_args *uap, + au_asid_t *ret_asid) +{ +#pragma unused(p, uap, ret_asid) + + return (ENOSYS); +} + +#endif /* CONFIG_AUDIT */ diff --git a/bsd/security/audit/audit_syscalls.c b/bsd/security/audit/audit_syscalls.c new file mode 100644 index 000000000..0ad24367a --- /dev/null +++ b/bsd/security/audit/audit_syscalls.c @@ -0,0 +1,1177 @@ +/*- + * Copyright (c) 1999-2009, Apple Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of Apple Inc. ("Apple") nor the names of + * its contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY APPLE AND ITS CONTRIBUTORS "AS IS" AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL APPLE OR ITS CONTRIBUTORS BE LIABLE FOR + * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING + * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ +/* + * NOTICE: This file was modified by McAfee Research in 2004 to introduce + * support for mandatory and extensible security protections. This notice + * is included in support of clause 2.2 (b) of the Apple Public License, + * Version 2.0. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#include +#include +#include + +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +#if CONFIG_MACF +#include +#include +#include +#include +#endif + +#include + +#include +#include + +#if CONFIG_AUDIT + +#define IS_NOT_VALID_PID(p) ((p) < 1 || (p) > PID_MAX) + +#ifdef AUDIT_API_WARNINGS +/* + * Macro to warn about auditinfo_addr_t/auditpinfo_addr_t changing sizes + * to encourage the userland code to be recompiled and updated. + */ +#define WARN_IF_AINFO_ADDR_CHANGED(sz1, sz2, scall, tp) do { \ + if ((size_t)(sz1) != (size_t)(sz2)) { \ + char pn[MAXCOMLEN + 1]; \ + \ + proc_selfname(pn, MAXCOMLEN + 1); \ + printf("Size of %s used by %s in %s is different from " \ + "kernel's. Please recompile %s.\n", (tp), \ + (scall), pn, pn); \ + } \ +} while (0) + +/* + * Macro to warn about using ASID's outside the range [1 to PID_MAX] to + * encourage userland code changes. + */ +#define WARN_IF_BAD_ASID(asid, scall) do { \ + if (((asid) < 1 || (asid) > PID_MAX) && \ + (asid) != AU_ASSIGN_ASID) { \ + char pn[MAXCOMLEN + 1]; \ + \ + proc_selfname(pn, MAXCOMLEN + 1); \ + printf("%s in %s is using an ASID (%u) outside the " \ + "range [1 to %d]. Please change %s to use an ASID "\ + "within this range or use AU_ASSIGN_ASID.\n", \ + (scall), pn, (uint32_t)(asid), PID_MAX, pn); \ + } \ +} while (0) + +#else /* ! AUDIT_API_WARNINGS */ + +#define WARN_IF_AINFO_ADDR_CHANGED(sz1, sz2, scall, tp) do { \ +} while (0) + +#define WARN_IF_BAD_ASID(asid, scall) do { \ +} while (0) + +#endif /* AUDIT_API_WARNINGS */ + +/* + * System call to allow a user space application to submit a BSM audit record + * to the kernel for inclusion in the audit log. This function does little + * verification on the audit record that is submitted. + * + * XXXAUDIT: Audit preselection for user records does not currently work, + * since we pre-select only based on the AUE_audit event type, not the event + * type submitted as part of the user audit data. + */ +/* ARGSUSED */ +int +audit(proc_t p, struct audit_args *uap, __unused int32_t *retval) +{ + int error; + void * rec; + struct kaudit_record *ar; + struct uthread *uthr; + + error = suser(kauth_cred_get(), &p->p_acflag); + if (error) + return (error); + + mtx_lock(&audit_mtx); + if ((uap->length <= 0) || (uap->length > (int)audit_qctrl.aq_bufsz)) { + mtx_unlock(&audit_mtx); + return (EINVAL); + } + mtx_unlock(&audit_mtx); + + ar = currecord(); + + /* + * If there's no current audit record (audit() itself not audited) + * commit the user audit record. + */ + if (ar == NULL) { + uthr = curthread(); + if (uthr == NULL) /* can this happen? */ + return (ENOTSUP); + + /* + * This is not very efficient; we're required to allocate a + * complete kernel audit record just so the user record can + * tag along. + */ + uthr->uu_ar = audit_new(AUE_NULL, p, uthr); + if (uthr->uu_ar == NULL) + return (ENOTSUP); + ar = uthr->uu_ar; + } + + if (uap->length > MAX_AUDIT_RECORD_SIZE) + return (EINVAL); + + rec = malloc(uap->length, M_AUDITDATA, M_WAITOK); + + error = copyin(uap->record, rec, uap->length); + if (error) + goto free_out; + +#if CONFIG_MACF + error = mac_system_check_audit(kauth_cred_get(), rec, uap->length); + if (error) + goto free_out; +#endif + + /* Verify the record. */ + if (bsm_rec_verify(rec) == 0) { + error = EINVAL; + goto free_out; + } + + /* + * Attach the user audit record to the kernel audit record. Because + * this system call is an auditable event, we will write the user + * record along with the record for this audit event. + * + * XXXAUDIT: KASSERT appropriate starting values of k_udata, k_ulen, + * k_ar_commit & AR_COMMIT_USER? + */ + ar->k_udata = rec; + ar->k_ulen = uap->length; + ar->k_ar_commit |= AR_COMMIT_USER; + + /* + * Currently we assume that all preselection has been performed in + * userspace. We unconditionally set these masks so that the records + * get committed both to the trail and pipe. In the future we will + * want to setup kernel based preselection. + */ + ar->k_ar_commit |= (AR_PRESELECT_USER_TRAIL | AR_PRESELECT_USER_PIPE); + return (0); + +free_out: + /* + * audit_syscall_exit() will free the audit record on the thread even + * if we allocated it above. + */ + free(rec, M_AUDITDATA); + return (error); +} + +/* + * System call to manipulate auditing. + */ +/* ARGSUSED */ +int +auditon(proc_t p, struct auditon_args *uap, __unused int32_t *retval) +{ + kauth_cred_t scred; + int error; + union auditon_udata udata; + proc_t tp = PROC_NULL; + struct auditinfo_addr aia; + + AUDIT_ARG(cmd, uap->cmd); + +#if CONFIG_MACF + error = mac_system_check_auditon(kauth_cred_get(), uap->cmd); + if (error) + return (error); +#endif + + if ((uap->length <= 0) || (uap->length > + (int)sizeof(union auditon_udata))) + return (EINVAL); + + memset((void *)&udata, 0, sizeof(udata)); + + /* + * Some of the GET commands use the arguments too. + */ + switch (uap->cmd) { + case A_SETPOLICY: + case A_OLDSETPOLICY: + case A_SETKMASK: + case A_SETQCTRL: + case A_OLDSETQCTRL: + case A_SETSTAT: + case A_SETUMASK: + case A_SETSMASK: + case A_SETCOND: + case A_OLDSETCOND: + case A_SETCLASS: + case A_SETPMASK: + case A_SETFSIZE: + case A_SETKAUDIT: + case A_GETCLASS: + case A_GETPINFO: + case A_GETPINFO_ADDR: + case A_SENDTRIGGER: + case A_GETSINFO_ADDR: + error = copyin(uap->data, (void *)&udata, uap->length); + if (error) + return (error); + AUDIT_ARG(auditon, &udata); + AUDIT_ARG(len, uap->length); + break; + } + + /* + * A_GETSINFO doesn't require priviledge but only superuser + * gets to see the audit masks. + */ + error = suser(kauth_cred_get(), &p->p_acflag); + if (A_GETSINFO_ADDR == uap->cmd) { + if ((sizeof(udata.au_kau_info) != uap->length) || + (audit_session_lookup(udata.au_kau_info.ai_asid, + &udata.au_kau_info) != 0)) + return (EINVAL); + if (error) { + udata.au_kau_info.ai_mask.am_success = ~0; + udata.au_kau_info.ai_mask.am_failure = ~0; + } + } else + if (error) + return (error); + + /* + * XXX Need to implement these commands by accessing the global + * values associated with the commands. + */ + mtx_lock(&audit_mtx); + switch (uap->cmd) { + case A_OLDGETPOLICY: + case A_GETPOLICY: + if (sizeof(udata.au_policy64) == uap->length) { + if (!audit_fail_stop) + udata.au_policy64 |= AUDIT_CNT; + if (audit_panic_on_write_fail) + udata.au_policy64 |= AUDIT_AHLT; + if (audit_argv) + udata.au_policy64 |= AUDIT_ARGV; + if (audit_arge) + udata.au_policy64 |= AUDIT_ARGE; + break; + } + if (sizeof(udata.au_policy) != uap->length) { + mtx_unlock(&audit_mtx); + return (EINVAL); + } + if (!audit_fail_stop) + udata.au_policy |= AUDIT_CNT; + if (audit_panic_on_write_fail) + udata.au_policy |= AUDIT_AHLT; + if (audit_argv) + udata.au_policy |= AUDIT_ARGV; + if (audit_arge) + udata.au_policy |= AUDIT_ARGE; + break; + + case A_OLDSETPOLICY: + case A_SETPOLICY: + if (sizeof(udata.au_policy64) == uap->length) { + if (udata.au_policy64 & ~(AUDIT_CNT|AUDIT_AHLT| + AUDIT_ARGV|AUDIT_ARGE)) { + mtx_unlock(&audit_mtx); + return (EINVAL); + } + audit_fail_stop = ((udata.au_policy64 & AUDIT_CNT) == + 0); + audit_panic_on_write_fail = (udata.au_policy64 & + AUDIT_AHLT); + audit_argv = (udata.au_policy64 & AUDIT_ARGV); + audit_arge = (udata.au_policy64 & AUDIT_ARGE); + + break; + } + if ((sizeof(udata.au_policy) != uap->length) || + (udata.au_policy & ~(AUDIT_CNT|AUDIT_AHLT|AUDIT_ARGV| + AUDIT_ARGE))) { + mtx_unlock(&audit_mtx); + return (EINVAL); + } + /* + * XXX - Need to wake up waiters if the policy relaxes? + */ + audit_fail_stop = ((udata.au_policy & AUDIT_CNT) == 0); + audit_panic_on_write_fail = (udata.au_policy & AUDIT_AHLT); + audit_argv = (udata.au_policy & AUDIT_ARGV); + audit_arge = (udata.au_policy & AUDIT_ARGE); + break; + + case A_GETKMASK: + if (sizeof(udata.au_mask) != uap->length) { + mtx_unlock(&audit_mtx); + return (EINVAL); + } + udata.au_mask = audit_nae_mask; + break; + + case A_SETKMASK: + if (sizeof(udata.au_mask) != uap->length) { + mtx_unlock(&audit_mtx); + return (EINVAL); + } + audit_nae_mask = udata.au_mask; + AUDIT_CHECK_IF_KEVENTS_MASK(audit_nae_mask); + break; + + case A_OLDGETQCTRL: + case A_GETQCTRL: + if (sizeof(udata.au_qctrl64) == uap->length) { + udata.au_qctrl64.aq64_hiwater = + (u_int64_t)audit_qctrl.aq_hiwater; + udata.au_qctrl64.aq64_lowater = + (u_int64_t)audit_qctrl.aq_lowater; + udata.au_qctrl64.aq64_bufsz = + (u_int64_t)audit_qctrl.aq_bufsz; + udata.au_qctrl64.aq64_delay = + (u_int64_t)audit_qctrl.aq_delay; + udata.au_qctrl64.aq64_minfree = + (int64_t)audit_qctrl.aq_minfree; + break; + } + if (sizeof(udata.au_qctrl) != uap->length) { + mtx_unlock(&audit_mtx); + return (EINVAL); + } + udata.au_qctrl = audit_qctrl; + break; + + case A_OLDSETQCTRL: + case A_SETQCTRL: + if (sizeof(udata.au_qctrl64) == uap->length) { + if ((udata.au_qctrl64.aq64_hiwater > AQ_MAXHIGH) || + (udata.au_qctrl64.aq64_lowater >= + udata.au_qctrl64.aq64_hiwater) || + (udata.au_qctrl64.aq64_bufsz > AQ_MAXBUFSZ) || + (udata.au_qctrl64.aq64_minfree < 0) || + (udata.au_qctrl64.aq64_minfree > 100)) { + mtx_unlock(&audit_mtx); + return (EINVAL); + } + audit_qctrl.aq_hiwater = + (int)udata.au_qctrl64.aq64_hiwater; + audit_qctrl.aq_lowater = + (int)udata.au_qctrl64.aq64_lowater; + audit_qctrl.aq_bufsz = + (int)udata.au_qctrl64.aq64_bufsz; + audit_qctrl.aq_minfree = + (int)udata.au_qctrl64.aq64_minfree; + audit_qctrl.aq_delay = -1; /* Not used. */ + + break; + } + if ((sizeof(udata.au_qctrl) != uap->length) || + (udata.au_qctrl.aq_hiwater > AQ_MAXHIGH) || + (udata.au_qctrl.aq_lowater >= udata.au_qctrl.aq_hiwater) || + (udata.au_qctrl.aq_bufsz > AQ_MAXBUFSZ) || + (udata.au_qctrl.aq_minfree < 0) || + (udata.au_qctrl.aq_minfree > 100)) { + mtx_unlock(&audit_mtx); + return (EINVAL); + } + + audit_qctrl = udata.au_qctrl; + /* XXX The queue delay value isn't used with the kernel. */ + audit_qctrl.aq_delay = -1; + break; + + case A_GETCWD: + mtx_unlock(&audit_mtx); + return (ENOSYS); + break; + + case A_GETCAR: + mtx_unlock(&audit_mtx); + return (ENOSYS); + break; + + case A_GETSTAT: + mtx_unlock(&audit_mtx); + return (ENOSYS); + break; + + case A_SETSTAT: + mtx_unlock(&audit_mtx); + return (ENOSYS); + break; + + case A_SETUMASK: + mtx_unlock(&audit_mtx); + return (ENOSYS); + break; + + case A_SETSMASK: + mtx_unlock(&audit_mtx); + return (ENOSYS); + break; + + case A_OLDGETCOND: + case A_GETCOND: + if (sizeof(udata.au_cond64) == uap->length) { + if (audit_enabled && !audit_suspended) + udata.au_cond64 = AUC_AUDITING; + else + udata.au_cond64 = AUC_NOAUDIT; + + break; + } + if (sizeof(udata.au_cond) != uap->length) { + mtx_unlock(&audit_mtx); + return (EINVAL); + } + if (audit_enabled && !audit_suspended) + udata.au_cond = AUC_AUDITING; + else + udata.au_cond = AUC_NOAUDIT; + break; + + case A_OLDSETCOND: + case A_SETCOND: + if (sizeof(udata.au_cond64) == uap->length) { + if (udata.au_cond64 == AUC_NOAUDIT) + audit_suspended = 1; + if (udata.au_cond64 == AUC_AUDITING) + audit_suspended = 0; + if (udata.au_cond64 == AUC_DISABLED) { + audit_suspended = 1; + mtx_unlock(&audit_mtx); + audit_shutdown(); + mtx_lock(&audit_mtx); + } + break; + } + if (sizeof(udata.au_cond) != uap->length) { + mtx_unlock(&audit_mtx); + return (EINVAL); + } + if (udata.au_cond == AUC_NOAUDIT) + audit_suspended = 1; + if (udata.au_cond == AUC_AUDITING) + audit_suspended = 0; + if (udata.au_cond == AUC_DISABLED) { + audit_suspended = 1; + mtx_unlock(&audit_mtx); + audit_shutdown(); + mtx_lock(&audit_mtx); + } + break; + + case A_GETCLASS: + if (sizeof(udata.au_evclass) != uap->length) { + mtx_unlock(&audit_mtx); + return (EINVAL); + } + udata.au_evclass.ec_class = au_event_class( + udata.au_evclass.ec_number); + break; + + case A_SETCLASS: + if (sizeof(udata.au_evclass) != uap->length) { + mtx_unlock(&audit_mtx); + return (EINVAL); + } + au_evclassmap_insert(udata.au_evclass.ec_number, + udata.au_evclass.ec_class); + break; + + case A_GETPINFO: + if ((sizeof(udata.au_aupinfo) != uap->length) || + IS_NOT_VALID_PID(udata.au_aupinfo.ap_pid)) { + mtx_unlock(&audit_mtx); + return (EINVAL); + } + if ((tp = proc_find(udata.au_aupinfo.ap_pid)) == NULL) { + mtx_unlock(&audit_mtx); + return (ESRCH); + } + + mtx_unlock(&audit_mtx); + scred = kauth_cred_proc_ref(tp); + if (scred->cr_audit.as_aia_p->ai_termid.at_type == AU_IPv6) { + kauth_cred_unref(&scred); + proc_rele(tp); + return (EINVAL); + } + + udata.au_aupinfo.ap_auid = + scred->cr_audit.as_aia_p->ai_auid; + udata.au_aupinfo.ap_mask.am_success = + scred->cr_audit.as_mask.am_success; + udata.au_aupinfo.ap_mask.am_failure = + scred->cr_audit.as_mask.am_failure; + udata.au_aupinfo.ap_termid.machine = + scred->cr_audit.as_aia_p->ai_termid.at_addr[0]; + udata.au_aupinfo.ap_termid.port = + scred->cr_audit.as_aia_p->ai_termid.at_port; + udata.au_aupinfo.ap_asid = + scred->cr_audit.as_aia_p->ai_asid; + kauth_cred_unref(&scred); + proc_rele(tp); + tp = PROC_NULL; + mtx_lock(&audit_mtx); + break; + + case A_SETPMASK: + if ((sizeof(udata.au_aupinfo) != uap->length) || + IS_NOT_VALID_PID(udata.au_aupinfo.ap_pid)) { + mtx_unlock(&audit_mtx); + return (EINVAL); + } + if ((tp = proc_find(udata.au_aupinfo.ap_pid)) == NULL) { + mtx_unlock(&audit_mtx); + return (ESRCH); + } + scred = kauth_cred_proc_ref(tp); + bcopy(scred->cr_audit.as_aia_p, &aia, sizeof(aia)); + kauth_cred_unref(&scred); + aia.ai_mask.am_success = + udata.au_aupinfo.ap_mask.am_success; + aia.ai_mask.am_failure = + udata.au_aupinfo.ap_mask.am_failure; + AUDIT_CHECK_IF_KEVENTS_MASK(aia.ai_mask); + error = audit_session_setaia(tp, &aia, 0); + mtx_unlock(&audit_mtx); + proc_rele(tp); + tp = PROC_NULL; + if (error) + return (error); + mtx_lock(&audit_mtx); + break; + + case A_SETFSIZE: + if ((sizeof(udata.au_fstat) != uap->length) || + ((udata.au_fstat.af_filesz != 0) && + (udata.au_fstat.af_filesz < MIN_AUDIT_FILE_SIZE))) { + mtx_unlock(&audit_mtx); + return (EINVAL); + } + audit_fstat.af_filesz = udata.au_fstat.af_filesz; + break; + + case A_GETFSIZE: + if (sizeof(udata.au_fstat) != uap->length) { + mtx_unlock(&audit_mtx); + return (EINVAL); + } + udata.au_fstat.af_filesz = audit_fstat.af_filesz; + udata.au_fstat.af_currsz = audit_fstat.af_currsz; + break; + + case A_GETPINFO_ADDR: + if ((sizeof(udata.au_aupinfo_addr) != uap->length) || + IS_NOT_VALID_PID(udata.au_aupinfo_addr.ap_pid)) { + mtx_unlock(&audit_mtx); + return (EINVAL); + } + if ((tp = proc_find(udata.au_aupinfo.ap_pid)) == NULL) { + mtx_unlock(&audit_mtx); + return (ESRCH); + } + WARN_IF_AINFO_ADDR_CHANGED(uap->length, + sizeof(auditpinfo_addr_t), "auditon(A_GETPINFO_ADDR,...)", + "auditpinfo_addr_t"); + scred = kauth_cred_proc_ref(tp); + udata.au_aupinfo_addr.ap_auid = + scred->cr_audit.as_aia_p->ai_auid; + udata.au_aupinfo_addr.ap_asid = + scred->cr_audit.as_aia_p->ai_asid; + udata.au_aupinfo_addr.ap_mask.am_success = + scred->cr_audit.as_mask.am_success; + udata.au_aupinfo_addr.ap_mask.am_failure = + scred->cr_audit.as_mask.am_failure; + bcopy(&scred->cr_audit.as_aia_p->ai_termid, + &udata.au_aupinfo_addr.ap_termid, + sizeof(au_tid_addr_t)); + udata.au_aupinfo_addr.ap_flags = + scred->cr_audit.as_aia_p->ai_flags; + kauth_cred_unref(&scred); + proc_rele(tp); + tp = PROC_NULL; + break; + + case A_GETKAUDIT: + mtx_unlock(&audit_mtx); + if (sizeof(udata.au_kau_info) != uap->length) + return (EINVAL); + audit_get_kinfo(&udata.au_kau_info); + mtx_lock(&audit_mtx); + break; + + case A_SETKAUDIT: + if ((sizeof(udata.au_kau_info) != uap->length) || + (udata.au_kau_info.ai_termid.at_type != AU_IPv4 && + udata.au_kau_info.ai_termid.at_type != AU_IPv6)) { + mtx_unlock(&audit_mtx); + return (EINVAL); + } + mtx_unlock(&audit_mtx); + audit_set_kinfo(&udata.au_kau_info); + mtx_lock(&audit_mtx); + break; + + case A_SENDTRIGGER: + if ((sizeof(udata.au_trigger) != uap->length) || + (udata.au_trigger < AUDIT_TRIGGER_MIN) || + (udata.au_trigger > AUDIT_TRIGGER_MAX)) { + mtx_unlock(&audit_mtx); + return (EINVAL); + } + mtx_unlock(&audit_mtx); + return (audit_send_trigger(udata.au_trigger)); + + case A_GETSINFO_ADDR: + /* Handled above before switch(). */ + break; + + default: + mtx_unlock(&audit_mtx); + return (EINVAL); + } + + /* + * Copy data back to userspace for the GET comands. + */ + switch (uap->cmd) { + case A_GETPOLICY: + case A_OLDGETPOLICY: + case A_GETKMASK: + case A_GETQCTRL: + case A_OLDGETQCTRL: + case A_GETCWD: + case A_GETCAR: + case A_GETSTAT: + case A_GETCOND: + case A_OLDGETCOND: + case A_GETCLASS: + case A_GETPINFO: + case A_GETFSIZE: + case A_GETPINFO_ADDR: + case A_GETKAUDIT: + case A_GETSINFO_ADDR: + error = copyout((void *)&udata, uap->data, uap->length); + if (error) { + mtx_unlock(&audit_mtx); + return (ENOSYS); + } + break; + } + + mtx_unlock(&audit_mtx); + return (0); +} + +/* + * System calls to manage the user audit information. + */ +/* ARGSUSED */ +int +getauid(proc_t p, struct getauid_args *uap, __unused int32_t *retval) +{ + au_id_t id; + int error; + kauth_cred_t scred; + +#if CONFIG_MACF + error = mac_proc_check_getauid(p); + if (error) + return (error); +#endif + scred = kauth_cred_proc_ref(p); + id = scred->cr_audit.as_aia_p->ai_auid; + kauth_cred_unref(&scred); + + error = copyout((void *)&id, uap->auid, sizeof(id)); + if (error) + return (error); + + return (0); +} + +/* ARGSUSED */ +int +setauid(proc_t p, struct setauid_args *uap, __unused int32_t *retval) +{ + int error; + au_id_t id; + kauth_cred_t scred; + struct auditinfo_addr aia; + + error = copyin(uap->auid, &id, sizeof(id)); + if (error) + return (error); + AUDIT_ARG(auid, id); + +#if CONFIG_MACF + error = mac_proc_check_setauid(p, id); + if (error) + return (error); +#endif + + scred = kauth_cred_proc_ref(p); + error = suser(scred, &p->p_acflag); + if (error) { + kauth_cred_unref(&scred); + return (error); + } + + bcopy(scred->cr_audit.as_aia_p, &aia, sizeof(aia)); + if (aia.ai_asid == AU_DEFAUDITSID) { + aia.ai_asid = AU_ASSIGN_ASID; + } + bcopy(&scred->cr_audit.as_mask, &aia.ai_mask, sizeof(au_mask_t)); + kauth_cred_unref(&scred); + aia.ai_auid = id; + error = audit_session_setaia(p, &aia, 0); + + return (error); +} + +static int +getaudit_internal(proc_t p, user_addr_t user_addr) +{ + struct auditinfo ai; + kauth_cred_t scred; + + scred = kauth_cred_proc_ref(p); + if (scred->cr_audit.as_aia_p->ai_termid.at_type == AU_IPv6) { + kauth_cred_unref(&scred); + return (ERANGE); + } + + bzero(&ai, sizeof(ai)); + ai.ai_auid = scred->cr_audit.as_aia_p->ai_auid; + ai.ai_asid = scred->cr_audit.as_aia_p->ai_asid; + + /* + * Only superuser gets to see the real mask. + */ + if (suser(scred, &p->p_acflag)) { + ai.ai_mask.am_success = ~0; + ai.ai_mask.am_failure = ~0; + } else { + ai.ai_mask.am_success = scred->cr_audit.as_mask.am_success; + ai.ai_mask.am_failure = scred->cr_audit.as_mask.am_failure; + } + ai.ai_termid.machine = scred->cr_audit.as_aia_p->ai_termid.at_addr[0]; + ai.ai_termid.port = scred->cr_audit.as_aia_p->ai_termid.at_port; + kauth_cred_unref(&scred); + + return (copyout(&ai, user_addr, sizeof (ai))); +} + +/* + * System calls to get and set process audit information. + */ +/* ARGSUSED */ +int +getaudit(proc_t p, struct getaudit_args *uap, __unused int32_t *retval) +{ + int error; + +#if CONFIG_MACF + error = mac_proc_check_getaudit(p); + if (error) + return (error); +#endif + return (getaudit_internal(p, uap->auditinfo)); +} + +/* ARGSUSED */ +int +setaudit(proc_t p, struct setaudit_args *uap, __unused int32_t *retval) +{ + struct auditinfo ai; + struct auditinfo_addr newaia; + kauth_cred_t scred; + int error; + + error = copyin(uap->auditinfo, &ai, sizeof(ai)); + if (error) + return (error); + AUDIT_ARG(auditinfo, &ai); + + if (ai.ai_asid != AU_ASSIGN_ASID && + (uint32_t)ai.ai_asid > ASSIGNED_ASID_MAX) + return (EINVAL); + +#if CONFIG_MACF + { + struct auditinfo_addr aia = { + .ai_auid = ai.ai_auid, + .ai_mask = ai.ai_mask, + .ai_termid = { + .at_port = ai.ai_termid.port, + .at_type = AU_IPv4, + .at_addr = { ai.ai_termid.machine, 0, 0, 0 } }, + .ai_asid = ai.ai_asid, + .ai_flags = 0 }; + error = mac_proc_check_setaudit(p, &aia); + } + if (error) + return (error); +#endif + + bzero(&newaia, sizeof(newaia)); + scred = kauth_cred_proc_ref(p); + error = suser(scred, &p->p_acflag); + if (error) { + kauth_cred_unref(&scred); + return (error); + } + newaia.ai_flags = scred->cr_audit.as_aia_p->ai_flags; + kauth_cred_unref(&scred); + + WARN_IF_BAD_ASID(ai.ai_asid, "setaudit(2)"); + + newaia.ai_auid = ai.ai_auid; + bcopy(&ai.ai_mask, &newaia.ai_mask, sizeof(au_mask_t)); + AUDIT_CHECK_IF_KEVENTS_MASK(ai.ai_mask); + newaia.ai_asid = ai.ai_asid; + if (ai.ai_asid == AU_DEFAUDITSID) + newaia.ai_asid = AU_ASSIGN_ASID; + else + newaia.ai_asid = ai.ai_asid; + newaia.ai_termid.at_addr[0] = ai.ai_termid.machine; + newaia.ai_termid.at_port = ai.ai_termid.port; + newaia.ai_termid.at_type = AU_IPv4; + + error = audit_session_setaia(p, &newaia, 0); + if (error) + return (error); + + /* + * If asked to assign an ASID then let the user know what the ASID is + * by copying the auditinfo struct back out. + */ + if (newaia.ai_asid == AU_ASSIGN_ASID) + error = getaudit_internal(p, uap->auditinfo); + + return (error); +} + +static int +getaudit_addr_internal(proc_t p, user_addr_t user_addr, size_t length) +{ + kauth_cred_t scred; + auditinfo_addr_t aia; + + scred = kauth_cred_proc_ref(p); + bcopy(scred->cr_audit.as_aia_p, &aia, sizeof (auditinfo_addr_t)); + /* + * Only superuser gets to see the real mask. + */ + if (suser(scred, &p->p_acflag)) { + aia.ai_mask.am_success = ~0; + aia.ai_mask.am_failure = ~0; + } + kauth_cred_unref(&scred); + + return (copyout(&aia, user_addr, min(sizeof(aia), length))); +} + +/* ARGSUSED */ +int +getaudit_addr(proc_t p, struct getaudit_addr_args *uap, + __unused int32_t *retval) +{ + + WARN_IF_AINFO_ADDR_CHANGED(uap->length, sizeof(auditinfo_addr_t), + "getaudit_addr(2)", "auditinfo_addr_t"); + + return (getaudit_addr_internal(p, uap->auditinfo_addr, uap->length)); +} + +/* ARGSUSED */ +int +setaudit_addr(proc_t p, struct setaudit_addr_args *uap, + __unused int32_t *retval) +{ + struct auditinfo_addr aia; + kauth_cred_t scred; + int error; + + bzero(&aia, sizeof(auditinfo_addr_t)); + error = copyin(uap->auditinfo_addr, &aia, + min(sizeof(aia), uap->length)); + if (error) + return (error); + AUDIT_ARG(auditinfo_addr, &aia); + if (aia.ai_termid.at_type != AU_IPv6 && + aia.ai_termid.at_type != AU_IPv4) + return (EINVAL); + if (aia.ai_asid != AU_ASSIGN_ASID && + (uint32_t)aia.ai_asid > ASSIGNED_ASID_MAX) + return (EINVAL); + +#if CONFIG_MACF + error = mac_proc_check_setaudit(p, &aia); + if (error) + return (error); +#endif + + scred = kauth_cred_proc_ref(p); + error = suser(scred, &p->p_acflag); + if (error) { + kauth_cred_unref(&scred); + return (error); + } + + WARN_IF_AINFO_ADDR_CHANGED(uap->length, sizeof(auditinfo_addr_t), + "setaudit_addr(2)", "auditinfo_addr_t"); + WARN_IF_BAD_ASID(aia.ai_asid, "setaudit_addr(2)"); + kauth_cred_unref(&scred); + + AUDIT_CHECK_IF_KEVENTS_MASK(aia.ai_mask); + if (aia.ai_asid == AU_DEFAUDITSID) + aia.ai_asid = AU_ASSIGN_ASID; + + error = audit_session_setaia(p, &aia, 0); + if (error) + return (error); + + /* + * If asked to assign an ASID then let the user know what the ASID is + * by copying the auditinfo_addr struct back out. + */ + if (aia.ai_asid == AU_ASSIGN_ASID) + error = getaudit_addr_internal(p, uap->auditinfo_addr, + uap->length); + + return (error); +} + +/* + * Syscall to manage audit files. + * + */ +/* ARGSUSED */ +int +auditctl(proc_t p, struct auditctl_args *uap, __unused int32_t *retval) +{ + struct nameidata nd; + kauth_cred_t cred; + struct vnode *vp; + int error = 0; + + error = suser(kauth_cred_get(), &p->p_acflag); + if (error) + return (error); + + vp = NULL; + cred = NULL; + + /* + * If a path is specified, open the replacement vnode, perform + * validity checks, and grab another reference to the current + * credential. + * + * XXX Changes API slightly. NULL path no longer disables audit but + * returns EINVAL. + */ + if (uap->path == USER_ADDR_NULL) + return (EINVAL); + + NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | AUDITVNPATH1, + (IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : + UIO_USERSPACE32), uap->path, vfs_context_current()); + error = vn_open(&nd, AUDIT_OPEN_FLAGS, 0); + if (error) + return (error); + vp = nd.ni_vp; +#if CONFIG_MACF + /* + * Accessibility of the vnode was determined in vn_open; the + * mac_system_check_auditctl should only determine whether that vnode + * is appropriate for storing audit data, or that the caller was + * permitted to control the auditing system at all. For example, a + * confidentiality policy may want to ensure that audit files are + * always high sensitivity. + */ + error = mac_system_check_auditctl(kauth_cred_get(), vp); + if (error) { + vn_close(vp, AUDIT_CLOSE_FLAGS, vfs_context_current()); + vnode_put(vp); + return (error); + } +#endif + if (vp->v_type != VREG) { + vn_close(vp, AUDIT_CLOSE_FLAGS, vfs_context_current()); + vnode_put(vp); + return (EINVAL); + } + mtx_lock(&audit_mtx); + /* + * XXXAUDIT: Should audit_suspended actually be cleared by + * audit_worker? + */ + audit_suspended = 0; + mtx_unlock(&audit_mtx); + + /* + * The following gets unreferenced in audit_rotate_vnode() + * after the rotation and it is no longer needed. + */ + cred = kauth_cred_get_with_ref(); + audit_rotate_vnode(cred, vp); + vnode_put(vp); + + return (error); +} + +#else /* !CONFIG_AUDIT */ + +int +audit(proc_t p, struct audit_args *uap, int32_t *retval) +{ +#pragma unused(p, uap, retval) + + return (ENOSYS); +} + +int +auditon(proc_t p, struct auditon_args *uap, int32_t *retval) +{ +#pragma unused(p, uap, retval) + + return (ENOSYS); +} + +int +getauid(proc_t p, struct getauid_args *uap, int32_t *retval) +{ +#pragma unused(p, uap, retval) + + return (ENOSYS); +} + +int +setauid(proc_t p, struct setauid_args *uap, int32_t *retval) +{ +#pragma unused(p, uap, retval) + + return (ENOSYS); +} + +int +getaudit(proc_t p, struct getaudit_args *uap, int32_t *retval) +{ +#pragma unused(p, uap, retval) + + return (ENOSYS); +} + +int +setaudit(proc_t p, struct setaudit_args *uap, int32_t *retval) +{ +#pragma unused(p, uap, retval) + + return (ENOSYS); +} + +int +getaudit_addr(proc_t p, struct getaudit_addr_args *uap, int32_t *retval) +{ +#pragma unused(p, uap, retval) + + return (ENOSYS); +} + +int +setaudit_addr(proc_t p, struct setaudit_addr_args *uap, int32_t *retval) +{ +#pragma unused(p, uap, retval) + + return (ENOSYS); +} + +int +auditctl(proc_t p, struct auditctl_args *uap, int32_t *retval) +{ +#pragma unused(p, uap, retval) + + return (ENOSYS); +} + +#endif /* CONFIG_AUDIT */ diff --git a/bsd/security/audit/audit_worker.c b/bsd/security/audit/audit_worker.c new file mode 100644 index 000000000..d307a7eb9 --- /dev/null +++ b/bsd/security/audit/audit_worker.c @@ -0,0 +1,548 @@ +/*- + * Copyright (c) 1999-2008 Apple Inc. + * Copyright (c) 2006-2008 Robert N. M. Watson + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of Apple Inc. ("Apple") nor the names of + * its contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY APPLE AND ITS CONTRIBUTORS "AS IS" AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL APPLE OR ITS CONTRIBUTORS BE LIABLE FOR + * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING + * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +#include +#include +#include + +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +#include + +#include +#include + +/* + * Worker thread that will schedule disk I/O, etc. + */ +static thread_t audit_thread; + +/* + * audit_ctx and audit_vp are the stored credential and vnode to use for + * active audit trail. They are protected by audit_worker_sl, which will be + * held across all I/O and all rotation to prevent them from being replaced + * (rotated) while in use. The audit_file_rotate_wait flag is set when the + * kernel has delivered a trigger to auditd to rotate the trail, and is + * cleared when the next rotation takes place. It is also protected by + * audit_worker_sl. + */ +static int audit_file_rotate_wait; +static struct slck audit_worker_sl; +static struct vfs_context audit_ctx; +static struct vnode *audit_vp; + +#define AUDIT_WORKER_SX_INIT() slck_init(&audit_worker_sl, \ + "audit_worker_sl") +#define AUDIT_WORKER_SX_XLOCK() slck_lock(&audit_worker_sl) +#define AUDIT_WORKER_SX_XUNLOCK() slck_unlock(&audit_worker_sl) +#define AUDIT_WORKER_SX_ASSERT() slck_assert(&audit_worker_sl, SL_OWNED) +#define AUDIT_WORKER_SX_DESTROY() slck_destroy(&audit_worker_sl) + +/* + * The audit_draining flag is set when audit is disabled and the audit + * worker queue is being drained. + */ +static int audit_draining; + +/* + * The special kernel audit record, audit_drain_kar, is used to mark the end of + * the queue when draining it. + */ +static struct kaudit_record audit_drain_kar = { + .k_ar = { + .ar_event = AUE_NULL, + }, + .k_ar_commit = AR_DRAIN_QUEUE, +}; + +/* + * Write an audit record to a file, performed as the last stage after both + * preselection and BSM conversion. Both space management and write failures + * are handled in this function. + * + * No attempt is made to deal with possible failure to deliver a trigger to + * the audit daemon, since the message is asynchronous anyway. + */ +static void +audit_record_write(struct vnode *vp, struct vfs_context *ctx, void *data, + size_t len) +{ + static struct timeval last_lowspace_trigger; + static struct timeval last_fail; + static int cur_lowspace_trigger; + struct vfsstatfs *mnt_stat; + int error; + static int cur_fail; + uint64_t temp; + off_t file_size; + + AUDIT_WORKER_SX_ASSERT(); /* audit_file_rotate_wait. */ + + if (vp == NULL) + return; + + if (vnode_getwithref(vp)) + return /*(ENOENT)*/; + + mnt_stat = &vp->v_mount->mnt_vfsstat; + + /* + * First, gather statistics on the audit log file and file system so + * that we know how we're doing on space. Consider failure of these + * operations to indicate a future inability to write to the file. + */ + error = vfs_update_vfsstat(vp->v_mount, ctx, VFS_KERNEL_EVENT); + if (error) + goto fail; + error = vnode_size(vp, &file_size, ctx); + if (error) + goto fail; + audit_fstat.af_currsz = (u_quad_t)file_size; + + /* + * We handle four different space-related limits: + * + * - A fixed (hard) limit on the minimum free blocks we require on + * the file system, and results in record loss, a trigger, and + * possible fail stop due to violating invariants. + * + * - An administrative (soft) limit, which when fallen below, results + * in the kernel notifying the audit daemon of low space. + * + * - An audit trail size limit, which when gone above, results in the + * kernel notifying the audit daemon that rotation is desired. + * + * - The total depth of the kernel audit record exceeding free space, + * which can lead to possible fail stop (with drain), in order to + * prevent violating invariants. Failure here doesn't halt + * immediately, but prevents new records from being generated. + * + * Possibly, the last of these should be handled differently, always + * allowing a full queue to be lost, rather than trying to prevent + * loss. + * + * First, handle the hard limit, which generates a trigger and may + * fail stop. This is handled in the same manner as ENOSPC from + * VOP_WRITE, and results in record loss. + */ + if (mnt_stat->f_bfree < AUDIT_HARD_LIMIT_FREE_BLOCKS) { + error = ENOSPC; + goto fail_enospc; + } + + /* + * Second, handle falling below the soft limit, if defined; we send + * the daemon a trigger and continue processing the record. Triggers + * are limited to 1/sec. + */ + if (audit_qctrl.aq_minfree != 0) { + temp = mnt_stat->f_blocks / (100 / audit_qctrl.aq_minfree); + if (mnt_stat->f_bfree < temp) { + if (ppsratecheck(&last_lowspace_trigger, + &cur_lowspace_trigger, 1)) { + (void)audit_send_trigger( + AUDIT_TRIGGER_LOW_SPACE); + printf("Warning: audit space low (< %d%% free)" + "on audit log file-system\n", + audit_qctrl.aq_minfree); + } + } + } + + /* + * If the current file is getting full, generate a rotation trigger + * to the daemon. This is only approximate, which is fine as more + * records may be generated before the daemon rotates the file. + */ + if ((audit_fstat.af_filesz != 0) && (audit_file_rotate_wait == 0) && + ((u_quad_t)file_size >= audit_fstat.af_filesz)) { + AUDIT_WORKER_SX_ASSERT(); + + audit_file_rotate_wait = 1; + (void)audit_send_trigger(AUDIT_TRIGGER_ROTATE_KERNEL); + } + + /* + * If the estimated amount of audit data in the audit event queue + * (plus records allocated but not yet queued) has reached the amount + * of free space on the disk, then we need to go into an audit fail + * stop state, in which we do not permit the allocation/committing of + * any new audit records. We continue to process records but don't + * allow any activities that might generate new records. In the + * future, we might want to detect when space is available again and + * allow operation to continue, but this behavior is sufficient to + * meet fail stop requirements in CAPP. + */ + if (audit_fail_stop) { + if ((unsigned long)((audit_q_len + audit_pre_q_len + 1) * + MAX_AUDIT_RECORD_SIZE) / mnt_stat->f_bsize >= + (unsigned long)(mnt_stat->f_bfree)) { + if (ppsratecheck(&last_fail, &cur_fail, 1)) + printf("audit_record_write: free space " + "below size of audit queue, failing " + "stop\n"); + audit_in_failure = 1; + } else if (audit_in_failure) { + /* + * Note: if we want to handle recovery, this is the + * spot to do it: unset audit_in_failure, and issue a + * wakeup on the cv. + */ + } + } + + error = vn_rdwr(UIO_WRITE, vp, data, len, (off_t)0, UIO_SYSSPACE, + IO_APPEND|IO_UNIT, vfs_context_ucred(ctx), NULL, + vfs_context_proc(ctx)); + if (error == ENOSPC) + goto fail_enospc; + else if (error) + goto fail; + + /* + * Catch completion of a queue drain here; if we're draining and the + * queue is now empty, fail stop. That audit_fail_stop is implicitly + * true, since audit_in_failure can only be set of audit_fail_stop is + * set. + * + * Note: if we handle recovery from audit_in_failure, then we need to + * make panic here conditional. + */ + if (audit_in_failure) { + if (audit_q_len == 0 && audit_pre_q_len == 0) { + (void)VNOP_FSYNC(vp, MNT_WAIT, ctx); + panic("Audit store overflow; record queue drained."); + } + } + + vnode_put(vp); + return; + +fail_enospc: + /* + * ENOSPC is considered a special case with respect to failures, as + * this can reflect either our preemptive detection of insufficient + * space, or ENOSPC returned by the vnode write call. + */ + if (audit_fail_stop) { + (void)VNOP_FSYNC(vp, MNT_WAIT, ctx); + panic("Audit log space exhausted and fail-stop set."); + } + (void)audit_send_trigger(AUDIT_TRIGGER_NO_SPACE); + audit_suspended = 1; + + /* FALLTHROUGH */ +fail: + /* + * We have failed to write to the file, so the current record is + * lost, which may require an immediate system halt. + */ + if (audit_panic_on_write_fail) { + (void)VNOP_FSYNC(vp, MNT_WAIT, ctx); + panic("audit_worker: write error %d\n", error); + } else if (ppsratecheck(&last_fail, &cur_fail, 1)) + printf("audit_worker: write error %d\n", error); + vnode_put(vp); +} + +/* + * Given a kernel audit record, process as required. Kernel audit records + * are converted to one, or possibly two, BSM records, depending on whether + * there is a user audit record present also. Kernel records need be + * converted to BSM before they can be written out. Both types will be + * written to disk, and audit pipes. + */ +static void +audit_worker_process_record(struct kaudit_record *ar) +{ + struct au_record *bsm; + au_class_t class; + au_event_t event; + au_id_t auid; + int error, sorf; + int trail_locked; + + /* + * We hold the audit_worker_sl lock over both writes, if there are + * two, so that the two records won't be split across a rotation and + * end up in two different trail files. + */ + if (((ar->k_ar_commit & AR_COMMIT_USER) && + (ar->k_ar_commit & AR_PRESELECT_USER_TRAIL)) || + (ar->k_ar_commit & AR_PRESELECT_TRAIL)) { + AUDIT_WORKER_SX_XLOCK(); + trail_locked = 1; + } else + trail_locked = 0; + + /* + * First, handle the user record, if any: commit to the system trail + * and audit pipes as selected. + */ + if ((ar->k_ar_commit & AR_COMMIT_USER) && + (ar->k_ar_commit & AR_PRESELECT_USER_TRAIL)) { + AUDIT_WORKER_SX_ASSERT(); + audit_record_write(audit_vp, &audit_ctx, ar->k_udata, + ar->k_ulen); + } + + if ((ar->k_ar_commit & AR_COMMIT_USER) && + (ar->k_ar_commit & AR_PRESELECT_USER_PIPE)) + audit_pipe_submit_user(ar->k_udata, ar->k_ulen); + + if (!(ar->k_ar_commit & AR_COMMIT_KERNEL) || + ((ar->k_ar_commit & AR_PRESELECT_PIPE) == 0 && + (ar->k_ar_commit & AR_PRESELECT_TRAIL) == 0)) + goto out; + + auid = ar->k_ar.ar_subj_auid; + event = ar->k_ar.ar_event; + class = au_event_class(event); + if (ar->k_ar.ar_errno == 0) + sorf = AU_PRS_SUCCESS; + else + sorf = AU_PRS_FAILURE; + + error = kaudit_to_bsm(ar, &bsm); + switch (error) { + case BSM_NOAUDIT: + goto out; + + case BSM_FAILURE: + printf("audit_worker_process_record: BSM_FAILURE\n"); + goto out; + + case BSM_SUCCESS: + break; + + default: + panic("kaudit_to_bsm returned %d", error); + } + + if (ar->k_ar_commit & AR_PRESELECT_TRAIL) { + AUDIT_WORKER_SX_ASSERT(); + audit_record_write(audit_vp, &audit_ctx, bsm->data, bsm->len); + } + + if (ar->k_ar_commit & AR_PRESELECT_PIPE) + audit_pipe_submit(auid, event, class, sorf, + ar->k_ar_commit & AR_PRESELECT_TRAIL, bsm->data, + bsm->len); + + kau_free(bsm); +out: + if (trail_locked) + AUDIT_WORKER_SX_XUNLOCK(); +} + +/* + * The audit_worker thread is responsible for watching the event queue, + * dequeueing records, converting them to BSM format, and committing them to + * disk. In order to minimize lock thrashing, records are dequeued in sets + * to a thread-local work queue. + * + * Note: this means that the effect bound on the size of the pending record + * queue is 2x the length of the global queue. + */ +static void +audit_worker(void) +{ + struct kaudit_queue ar_worklist; + struct kaudit_record *ar; + int lowater_signal; + + audit_ctx.vc_thread = current_thread(); + TAILQ_INIT(&ar_worklist); + mtx_lock(&audit_mtx); + while (1) { + mtx_assert(&audit_mtx, MA_OWNED); + + /* + * Wait for a record. + */ + while (TAILQ_EMPTY(&audit_q)) + cv_wait(&audit_worker_cv, &audit_mtx); + + /* + * If there are records in the global audit record queue, + * transfer them to a thread-local queue and process them + * one by one. If we cross the low watermark threshold, + * signal any waiting processes that they may wake up and + * continue generating records. + */ + lowater_signal = 0; + while ((ar = TAILQ_FIRST(&audit_q))) { + TAILQ_REMOVE(&audit_q, ar, k_q); + audit_q_len--; + if (audit_q_len == audit_qctrl.aq_lowater) + lowater_signal++; + TAILQ_INSERT_TAIL(&ar_worklist, ar, k_q); + } + if (lowater_signal) + cv_broadcast(&audit_watermark_cv); + + mtx_unlock(&audit_mtx); + while ((ar = TAILQ_FIRST(&ar_worklist))) { + TAILQ_REMOVE(&ar_worklist, ar, k_q); + if (ar->k_ar_commit & AR_DRAIN_QUEUE) { + audit_draining = 0; + cv_broadcast(&audit_drain_cv); + } else { + audit_worker_process_record(ar); + audit_free(ar); + } + } + mtx_lock(&audit_mtx); + } +} + +/* + * audit_rotate_vnode() is called by a user or kernel thread to configure or + * de-configure auditing on a vnode. The arguments are the replacement + * credential (referenced) and vnode (referenced and opened) to substitute + * for the current credential and vnode, if any. If either is set to NULL, + * both should be NULL, and this is used to indicate that audit is being + * disabled. Any previous cred/vnode will be closed and freed. We re-enable + * generating rotation requests to auditd. + */ +void +audit_rotate_vnode(kauth_cred_t cred, struct vnode *vp) +{ + kauth_cred_t old_audit_cred; + struct vnode *old_audit_vp; + int audit_was_enabled; + + KASSERT((cred != NULL && vp != NULL) || (cred == NULL && vp == NULL), + ("audit_rotate_vnode: cred %p vp %p", cred, vp)); + + /* + * Rotate the vnode/cred, and clear the rotate flag so that we will + * send a rotate trigger if the new file fills. + */ + AUDIT_WORKER_SX_XLOCK(); + old_audit_cred = audit_ctx.vc_ucred; + old_audit_vp = audit_vp; + audit_ctx.vc_ucred = cred; + audit_file_rotate_wait = 0; + audit_was_enabled = audit_enabled; + if ((audit_enabled = (NULL != vp))) + audit_vp = vp; + audit_draining = (audit_was_enabled && !audit_enabled); + AUDIT_WORKER_SX_XUNLOCK(); + + /* + * If audit (was enabled and) is now disabled then drain the audit + * record queue and wait until it is done. + */ + mtx_lock(&audit_mtx); + if (audit_draining) { + /* + * Insert the special drain record in the queue. + */ + while (audit_q_len >= audit_qctrl.aq_hiwater) + cv_wait(&audit_watermark_cv, &audit_mtx); + TAILQ_INSERT_TAIL(&audit_q, &audit_drain_kar, k_q); + audit_q_len++; + cv_signal(&audit_worker_cv); + + /* + * Wait for the audit worker thread to signal it is done. + */ + while (audit_draining) + cv_wait(&audit_drain_cv, &audit_mtx); + + audit_vp = NULL; + } + mtx_unlock(&audit_mtx); + + /* + * If there was an old vnode/credential, close and free. + */ + if (old_audit_vp != NULL) { + if (vnode_get(old_audit_vp) == 0) { + vn_close(old_audit_vp, AUDIT_CLOSE_FLAGS, + vfs_context_kernel()); + vnode_put(old_audit_vp); + } else + printf("audit_rotate_vnode: Couldn't close " + "audit file.\n"); + kauth_cred_unref(&old_audit_cred); + } +} + +void +audit_worker_init(void) +{ + + AUDIT_WORKER_SX_INIT(); + kernel_thread_start((thread_continue_t)audit_worker, NULL, + &audit_thread); + if (audit_thread == THREAD_NULL) + panic("audit_worker_init: Couldn't create audit_worker thread"); +} diff --git a/bsd/sys/Makefile b/bsd/sys/Makefile index 907cc9a64..06fc90203 100644 --- a/bsd/sys/Makefile +++ b/bsd/sys/Makefile @@ -39,11 +39,11 @@ DATAFILES = \ ioccom.h ioctl.h \ ioctl_compat.h ipc.h kernel.h kernel_types.h kern_event.h loadable_fs.h lock.h lockf.h \ kauth.h kdebug.h kern_control.h kern_memorystatus.h lctx.h malloc.h \ - mbuf.h mman.h mount.h msg.h msgbuf.h mtio.h netport.h param.h paths.h pipe.h poll.h \ + mbuf.h mman.h mount.h msg.h msgbuf.h netport.h param.h paths.h pipe.h poll.h \ proc.h proc_info.h ptrace.h queue.h quota.h random.h reboot.h resource.h resourcevar.h \ sbuf.h posix_sem.h posix_shm.h sdt.h \ select.h sem.h semaphore.h shm.h signal.h signalvar.h socket.h socketvar.h sockio.h stat.h \ - syscall.h sysctl.h syslimits.h syslog.h sys_domain.h termios.h time.h \ + sysctl.h syslimits.h syslog.h sys_domain.h termios.h time.h \ timeb.h times.h tprintf.h trace.h tty.h ttychars.h ttycom.h \ ttydefaults.h ttydev.h types.h ubc.h ucontext.h ucred.h uio.h un.h unistd.h unpcb.h \ user.h utfconv.h utsname.h vadvise.h vcmd.h \ @@ -56,10 +56,13 @@ DATAFILES = \ PRIVATE_DATAFILES = \ codesign.h \ disklabel.h \ + fsctl.h \ + fsgetpath.h \ fslog.h \ ipcs.h \ shm_internal.h \ spawn_internal.h \ + tree.h \ ux_exception.h \ proc_info.h \ vnioctl.h @@ -71,12 +74,12 @@ KERNELFILES = \ appleapiopts.h attr.h \ buf.h cdefs.h conf.h \ dir.h dirent.h disk.h disklabel.h dkstat.h \ - errno.h ev.h event.h fcntl.h file.h filedesc.h filio.h \ + errno.h ev.h event.h fcntl.h file.h filio.h \ ioccom.h ioctl.h ipc.h \ ioctl_compat.h kernel.h kernel_types.h kern_event.h lctx.h lock.h lockf.h \ - kauth.h kdebug.h md5.h kern_control.h kern_memorystatus.h imgact.h malloc.h namei.h \ - mman.h mbuf.h mount.h mtio.h netport.h param.h paths.h \ - proc.h queue.h quota.h random.h resource.h resourcevar.h \ + kauth.h kdebug.h md5.h kern_callout.h kern_control.h kern_memorystatus.h imgact.h malloc.h namei.h \ + mman.h mbuf.h mount.h netport.h param.h paths.h \ + proc.h queue.h random.h resource.h \ sbuf.h posix_sem.h posix_shm.h sem.h shm.h \ select.h signal.h socket.h socketvar.h sockio.h stat.h \ sysctl.h syslimits.h syslog.h systm.h sys_domain.h time.h \ @@ -85,7 +88,7 @@ KERNELFILES = \ _select.h _structs.h _types.h _endian.h protosw.h domain.h \ kpi_mbuf.h kpi_socket.h kpi_socketfilter.h \ ttycom.h termios.h msg.h \ - pipe.h sysproto.h semaphore.h wait.h \ + wait.h \ spawn.h # The last line was added to export needed headers for the MAC calls # whose source is outside of the xnu/bsd tree. @@ -94,35 +97,38 @@ KERNELFILES = \ # Installs header file for Apple internal use for kernel extensions - # $(DSTROOT)/System/Library/Frameworks/Kernel.framework/PrivateHeaders PRIVATE_KERNELFILES = \ + decmpfs.h \ disktab.h \ fbt.h \ - file_internal.h \ + fsctl.h \ fslog.h \ mach_swapon.h \ msgbuf.h \ eventvar.h \ - mount_internal.h \ - proc_internal.h \ - vnode_internal.h \ + quota.h \ sem_internal.h \ shm_internal.h \ signalvar.h \ tty.h ttychars.h \ ttydefaults.h ttydev.h \ user.h \ - ubc_internal.h \ - uio_internal.h \ vfs_context.h \ vmmeter.h \ spawn_internal.h - + # /System/Library/Frameworks/System.framework/Headers and /usr/include INSTALL_MI_LIST = ${DATAFILES} +INSTALL_MI_GEN_LIST = syscall.h + INSTALL_MI_DIR = sys -EXPORT_MI_LIST = ${KERNELFILES} ${PRIVATE_KERNELFILES} syscall.h linker_set.h bsdtask_info.h +EXPORT_MI_LIST = ${KERNELFILES} ${PRIVATE_KERNELFILES} linker_set.h bsdtask_info.h filedesc.h pipe.h resourcevar.h semaphore.h \ + vnode_internal.h proc_internal.h file_internal.h mount_internal.h \ + uio_internal.h + +EXPORT_MI_GEN_LIST = syscall.h sysproto.h EXPORT_MI_DIR = sys @@ -132,9 +138,23 @@ INSTALL_MI_LCL_LIST = ${DATAFILES} ${PRIVATE_DATAFILES} # /System/Library/Frameworks/Kernel.framework/PrivateHeaders INSTALL_KF_MI_LCL_LIST = ${KERNELFILES} ${PRIVATE_KERNELFILES} +INSTALL_KF_MI_LCL_GEN_LIST = sysproto.h + # /System/Library/Frameworks/Kernel.framework/Headers INSTALL_KF_MI_LIST = ${KERNELFILES} +INSTALL_KF_MI_GEN_LIST = + +MAKESYSCALLS = $(SRCROOT)/bsd/kern/makesyscalls.sh + +syscall.h: $(SRCROOT)/bsd/kern/syscalls.master $(MAKESYSCALLS) + @echo "Generating bsd/sys/$@ from $<"; + $(_v)$(MAKESYSCALLS) $< header > /dev/null + +sysproto.h: $(SRCROOT)/bsd/kern/syscalls.master $(MAKESYSCALLS) + @echo "Generating bsd/sys/$@ from $<"; + $(_v)$(MAKESYSCALLS) $< proto > /dev/null + include $(MakeInc_rule) include $(MakeInc_dir) diff --git a/bsd/sys/_endian.h b/bsd/sys/_endian.h index e1ec25078..3fb0f062f 100644 --- a/bsd/sys/_endian.h +++ b/bsd/sys/_endian.h @@ -107,10 +107,10 @@ __END_DECLS #elif __DARWIN_BYTE_ORDER == __DARWIN_BIG_ENDIAN -#define ntohl(x) (x) -#define ntohs(x) (x) -#define htonl(x) (x) -#define htons(x) (x) +#define ntohl(x) ((__uint32_t)(x)) +#define ntohs(x) ((__uint16_t)(x)) +#define htonl(x) ((__uint32_t)(x)) +#define htons(x) ((__uint16_t)(x)) #if defined(KERNEL) || (!defined(_POSIX_C_SOURCE) || defined(_DARWIN_C_SOURCE)) #define NTOHL(x) (x) diff --git a/bsd/sys/_structs.h b/bsd/sys/_structs.h index e0792b73f..102101f0b 100644 --- a/bsd/sys/_structs.h +++ b/bsd/sys/_structs.h @@ -172,10 +172,95 @@ _STRUCT_UCONTEXT64 _STRUCT_USER_TIMESPEC { user_time_t tv_sec; /* seconds */ - __int64_t tv_nsec __attribute((aligned(8))); /* and nanoseconds */ + user_long_t tv_nsec; /* and nanoseconds */ }; #endif /* _STRUCT_USER_TIMESPEC */ #endif /* __need_struct_user_timespec */ + +#ifdef __need_struct_user64_timespec +#undef __need_struct_user64_timespec +#ifndef _STRUCT_USER64_TIMESPEC +#define _STRUCT_USER64_TIMESPEC struct user64_timespec +_STRUCT_USER64_TIMESPEC +{ + user64_time_t tv_sec; /* seconds */ + user64_long_t tv_nsec; /* and nanoseconds */ +}; +#endif /* _STRUCT_USER64_TIMESPEC */ +#endif /* __need_struct_user64_timespec */ + +#ifdef __need_struct_user32_timespec +#undef __need_struct_user32_timespec +#ifndef _STRUCT_USER32_TIMESPEC +#define _STRUCT_USER32_TIMESPEC struct user32_timespec +_STRUCT_USER32_TIMESPEC +{ + user32_time_t tv_sec; /* seconds */ + user32_long_t tv_nsec; /* and nanoseconds */ +}; +#endif /* _STRUCT_USER32_TIMESPEC */ +#endif /* __need_struct_user32_timespec */ + +#ifdef __need_struct_user_timeval +#undef __need_struct_user_timeval +#ifndef _STRUCT_USER_TIMEVAL +#define _STRUCT_USER_TIMEVAL struct user_timeval +_STRUCT_USER_TIMEVAL +{ + user_time_t tv_sec; /* seconds */ + __int32_t tv_usec; /* and microseconds */ +}; +#endif /* _STRUCT_USER_TIMEVAL */ +#endif /* __need_struct_user_timeval */ + +#ifdef __need_struct_user64_timeval +#undef __need_struct_user64_timeval +#ifndef _STRUCT_USER64_TIMEVAL +#define _STRUCT_USER64_TIMEVAL struct user64_timeval +_STRUCT_USER64_TIMEVAL +{ + user64_time_t tv_sec; /* seconds */ + __int32_t tv_usec; /* and microseconds */ +}; +#endif /* _STRUCT_USER64_TIMEVAL */ +#endif /* __need_struct_user64_timeval */ + +#ifdef __need_struct_user32_timeval +#undef __need_struct_user32_timeval +#ifndef _STRUCT_USER32_TIMEVAL +#define _STRUCT_USER32_TIMEVAL struct user32_timeval +_STRUCT_USER32_TIMEVAL +{ + user32_time_t tv_sec; /* seconds */ + __int32_t tv_usec; /* and microseconds */ +}; +#endif /* _STRUCT_USER32_TIMEVAL */ +#endif /* __need_struct_user32_timeval */ + +#ifdef __need_struct_user64_itimerval +#undef __need_struct_user64_itimerval +#ifndef _STRUCT_USER64_ITIMERVAL +#define _STRUCT_USER64_ITIMERVAL struct user64_itimerval +_STRUCT_USER64_ITIMERVAL +{ + _STRUCT_USER64_TIMEVAL it_interval; /* timer interval */ + _STRUCT_USER64_TIMEVAL it_value; /* current value */ +}; +#endif /* _STRUCT_USER64_TIMEVAL */ +#endif /* __need_struct_user64_itimerval */ + +#ifdef __need_struct_user32_itimerval +#undef __need_struct_user32_itimerval +#ifndef _STRUCT_USER32_ITIMERVAL +#define _STRUCT_USER32_ITIMERVAL struct user32_itimerval +_STRUCT_USER32_ITIMERVAL +{ + _STRUCT_USER32_TIMEVAL it_interval; /* timer interval */ + _STRUCT_USER32_TIMEVAL it_value; /* current value */ +}; +#endif /* _STRUCT_USER32_TIMEVAL */ +#endif /* __need_struct_user32_itimerval */ + #endif /* KERNEL */ #ifdef __need_fd_set @@ -195,7 +280,7 @@ _STRUCT_USER_TIMESPEC #endif /* FD_SETSIZE */ #define __DARWIN_NBBY 8 /* bits in a byte */ #define __DARWIN_NFDBITS (sizeof(__int32_t) * __DARWIN_NBBY) /* bits per mask */ -#define __DARWIN_howmany(x, y) (((x) + ((y) - 1)) / (y)) /* # y's == x bits? */ +#define __DARWIN_howmany(x, y) ((((x) % (y)) == 0) ? ((x) / (y)) : (((x) / (y)) + 1)) /* # y's == x bits? */ __BEGIN_DECLS typedef struct fd_set { @@ -205,7 +290,7 @@ __END_DECLS /* This inline avoids argument side-effect issues with FD_ISSET() */ static __inline int -__darwin_fd_isset(int _n, struct fd_set *_p) +__darwin_fd_isset(int _n, const struct fd_set *_p) { return (_p->fds_bits[_n/__DARWIN_NFDBITS] & (1<<(_n % __DARWIN_NFDBITS))); } diff --git a/bsd/sys/_types.h b/bsd/sys/_types.h index 64dd43578..5d67dafc8 100644 --- a/bsd/sys/_types.h +++ b/bsd/sys/_types.h @@ -133,6 +133,7 @@ typedef __int32_t __darwin_suseconds_t; /* [???] microseconds */ typedef __uint32_t __darwin_uid_t; /* [???] user IDs */ typedef __uint32_t __darwin_useconds_t; /* [???] microseconds */ typedef unsigned char __darwin_uuid_t[16]; +typedef char __darwin_uuid_string_t[37]; #ifdef KERNEL #ifndef offsetof diff --git a/bsd/sys/aio.h b/bsd/sys/aio.h index bb0a7d7c5..d4f7e7a68 100644 --- a/bsd/sys/aio.h +++ b/bsd/sys/aio.h @@ -48,6 +48,10 @@ * In our case, this is limited to struct timespec, off_t and ssize_t. */ #define __need_struct_timespec +#ifdef KERNEL +#define __need_struct_user64_timespec +#define __need_struct_user32_timespec +#endif /* KERNEL */ #include #ifndef _OFF_T @@ -60,6 +64,21 @@ typedef __darwin_off_t off_t; typedef __darwin_ssize_t ssize_t; #endif +/* + * A aio_fsync() options that the calling thread is to continue execution + * while the lio_listio() operation is being performed, and no notification + * is given when the operation is complete + * + * [XSI] from + */ +#ifndef O_SYNC /* allow simultaneous inclusion of */ +#define O_SYNC 0x0080 /* synch I/O file integrity */ +#endif + +#ifndef O_DSYNC /* allow simultaneous inclusion of */ +#define O_DSYNC 0x400000 /* synch I/O data integrity */ +#endif + struct aiocb { int aio_fildes; /* File descriptor */ @@ -75,11 +94,31 @@ struct aiocb { struct user_aiocb { int aio_fildes; /* File descriptor */ - off_t aio_offset __attribute((aligned(8))); /* File offset */ - user_addr_t aio_buf __attribute((aligned(8))); /* Location of buffer */ + off_t aio_offset; /* File offset */ + user_addr_t aio_buf; /* Location of buffer */ user_size_t aio_nbytes; /* Length of transfer */ int aio_reqprio; /* Request priority offset */ - struct user_sigevent aio_sigevent __attribute((aligned(8))); /* Signal number and value */ + struct user_sigevent aio_sigevent; /* Signal number and value */ + int aio_lio_opcode; /* Operation to be performed */ +}; + +struct user64_aiocb { + int aio_fildes; /* File descriptor */ + user64_off_t aio_offset; /* File offset */ + user64_addr_t aio_buf; /* Location of buffer */ + user64_size_t aio_nbytes; /* Length of transfer */ + int aio_reqprio; /* Request priority offset */ + struct user64_sigevent aio_sigevent; /* Signal number and value */ + int aio_lio_opcode; /* Operation to be performed */ +}; + +struct user32_aiocb { + int aio_fildes; /* File descriptor */ + user32_off_t aio_offset; /* File offset */ + user32_addr_t aio_buf; /* Location of buffer */ + user32_size_t aio_nbytes; /* Length of transfer */ + int aio_reqprio; /* Request priority offset */ + struct user32_sigevent aio_sigevent; /* Signal number and value */ int aio_lio_opcode; /* Operation to be performed */ }; @@ -137,19 +176,6 @@ struct user_aiocb { */ #define AIO_LISTIO_MAX 16 -/* - * A aio_fsync() options - * that the calling thread is to continue execution while - * the lio_listio() operation is being performed, and no - * notification is given when the operation is complete - */ - -#ifndef O_SYNC /* XXX investigate documentation error */ -#define O_SYNC 0x0080 /* queued IO is completed as if by fsync() */ -#endif -#if 0 /* O_DSYNC - NOT SUPPORTED */ -#define O_DSYNC 0x1 /* queued async IO is completed as if by fdatasync() */ -#endif #ifndef KERNEL /* diff --git a/bsd/sys/aio_kern.h b/bsd/sys/aio_kern.h index 33ae502c9..18a801f98 100644 --- a/bsd/sys/aio_kern.h +++ b/bsd/sys/aio_kern.h @@ -42,35 +42,46 @@ struct aio_workq_entry { - TAILQ_ENTRY( aio_workq_entry ) aio_workq_link; - struct proc *procp; /* user proc that queued this request */ - user_addr_t uaiocbp; /* pointer passed in from user land */ - user_addr_t fsyncp; /* not NULL means this request must complete */ - /* before an aio_fsync call can proceed. */ - vm_map_t aio_map; /* user land map we have a reference to */ - user_ssize_t returnval; /* return value from read / write request */ - int errorval; /* error value from read / write request */ - int flags; - long group_tag; /* identifier used to group IO requests */ - struct user_aiocb aiocb; /* copy of aiocb from user land */ + TAILQ_ENTRY( aio_workq_entry ) aio_workq_link; /* Protected by queue lock */ + TAILQ_ENTRY( aio_workq_entry ) aio_proc_link; /* Proteced by proc's aio lock */ + + /* Proc lock */ + void *group_tag; /* identifier used to group IO requests */ + + /* Initialized and never changed, safe to access */ + struct proc *procp; /* user proc that queued this request */ + user_addr_t uaiocbp; /* pointer passed in from user land */ + struct user_aiocb aiocb; /* copy of aiocb from user land */ + vm_map_t aio_map; /* user land map we have a reference to */ + + /* Entry lock */ + int aio_refcount; + user_ssize_t returnval; /* return value from read / write request */ + int errorval; /* error value from read / write request */ + int flags; }; typedef struct aio_workq_entry aio_workq_entry; /* * definitions for aio_workq_entry.flags */ -#define AIO_READ 0x00000001 -#define AIO_WRITE 0x00000002 +#define AIO_READ 0x00000001 /* a read */ +#define AIO_WRITE 0x00000002 /* a write */ #define AIO_FSYNC 0x00000004 /* aio_fsync with op = O_SYNC */ #define AIO_DSYNC 0x00000008 /* aio_fsync with op = O_DSYNC (not supported yet) */ #define AIO_LIO 0x00000010 /* lio_listio generated IO */ -#define AIO_DO_FREE 0x00000800 /* entry needs to be freed */ -#define AIO_COMPLETION 0x00001000 /* entry is in completion processing (not freeable yet) */ +#define AIO_DO_FREE 0x00000800 /* entry should be freed when last reference is dropped. */ + /* set by aio_return() and _aio_exit() */ #define AIO_DISABLE 0x00002000 /* process is trying to exit or exec and we need */ - /* to disable normal completion notification */ -#define AIO_WAITING 0x00004000 /* process is trying to exit, exec, or close and is */ - /* waiting for one or more active IO requests to */ - /* complete */ + /* to not try to send a signal from do_aio_completion() */ +#define AIO_CLOSE_WAIT 0x00004000 /* process is trying to close and is */ + /* waiting for one or more active IO requests to */ + /* complete */ +#define AIO_EXIT_WAIT 0x00008000 /* process is trying to exit or exec and is */ + /* waiting for one or more active IO requests to */ + /* complete */ + +#define AIO_LIO_NOTIFY 0x00010000 /* wait for list complete */ /* * Prototypes diff --git a/bsd/sys/attr.h b/bsd/sys/attr.h index ca87e7052..487696334 100644 --- a/bsd/sys/attr.h +++ b/bsd/sys/attr.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2006 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2008 Apple Computer, Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -46,6 +46,8 @@ #define FSOPT_NOFOLLOW 0x00000001 #define FSOPT_NOINMEMUPDATE 0x00000002 #define FSOPT_REPORT_FULLSIZE 0x00000004 +/* The following option only valid when requesting ATTR_CMN_RETURNED_ATTRS */ +#define FSOPT_PACK_INVAL_ATTRS 0x00000008 /* we currently aren't anywhere near this amount for a valid * fssearchblock.sizeofsearchparams1 or fssearchblock.sizeofsearchparams2 @@ -179,10 +181,9 @@ typedef struct vol_capabilities_attr { * A volume that must always read from disk or always perform a network * transaction should not set this bit. * - * VOL_CAP_FMT_2TB_FILESIZE: When set, the volume format supports file - * size upto 2TB. This bit does not necessarily mean that the file - * system does not support file size more than 2TB. This bit does - * not mean that the currently available space on the volume is 2TB. + * VOL_CAP_FMT_2TB_FILESIZE: If this bit is set the volume format supports + * file sizes larger than 4GB, and potentially up to 2TB; it does not + * indicate whether the filesystem supports files larger than that. * * VOL_CAP_FMT_OPENDENYMODES: When set, the volume supports open deny * modes (e.g. "open for read write, deny write"; effectively, mandatory @@ -200,6 +201,13 @@ typedef struct vol_capabilities_attr { * capability and it is assumed that most file systems will not support * it. Its use is for legacy non-posix APIs like ResolveFileIDRef. * + * VOL_CAP_FMT_NO_VOLUME_SIZES: When set, the volume does not support + * returning values for total data blocks, available blocks, or free blocks + * (as in f_blocks, f_bavail, or f_bfree in "struct statfs"). Historically, + * those values were set to 0xFFFFFFFF for volumes that did not support them. + * + * VOL_CAP_FMT_DECMPFS_COMPRESSION: When set, the volume supports transparent + * decompression of compressed files using decmpfs. */ #define VOL_CAP_FMT_PERSISTENTOBJECTIDS 0x00000001 #define VOL_CAP_FMT_SYMBOLICLINKS 0x00000002 @@ -216,6 +224,8 @@ typedef struct vol_capabilities_attr { #define VOL_CAP_FMT_OPENDENYMODES 0x00001000 #define VOL_CAP_FMT_HIDDEN_FILES 0x00002000 #define VOL_CAP_FMT_PATH_FROM_ID 0x00004000 +#define VOL_CAP_FMT_NO_VOLUME_SIZES 0x00008000 +#define VOL_CAP_FMT_DECMPFS_COMPRESSION 0x00010000 /* @@ -290,6 +300,10 @@ typedef struct vol_capabilities_attr { #define VOL_CAP_INT_MANLOCK 0x00001000 #define VOL_CAP_INT_NAMEDSTREAMS 0x00002000 #define VOL_CAP_INT_EXTENDED_ATTR 0x00004000 +#ifdef PRIVATE +/* Volume supports kqueue notifications for remote events */ +#define VOL_CAP_INT_REMOTE_EVENT 0x00008000 +#endif /* PRIVATE */ typedef struct vol_attributes_attr { attribute_set_t validattr; @@ -315,14 +329,22 @@ typedef struct vol_attributes_attr { #define ATTR_CMN_GRPID 0x00010000 #define ATTR_CMN_ACCESSMASK 0x00020000 #define ATTR_CMN_FLAGS 0x00040000 +/* #define ATTR_CMN_NAMEDATTRCOUNT 0x00080000 not implemented */ +/* #define ATTR_CMN_NAMEDATTRLIST 0x00100000 not implemented */ #define ATTR_CMN_USERACCESS 0x00200000 #define ATTR_CMN_EXTENDED_SECURITY 0x00400000 #define ATTR_CMN_UUID 0x00800000 #define ATTR_CMN_GRPUUID 0x01000000 #define ATTR_CMN_FILEID 0x02000000 #define ATTR_CMN_PARENTID 0x04000000 +#define ATTR_CMN_FULLPATH 0x08000000 +/* + * ATTR_CMN_RETURNED_ATTRS is only valid with getattrlist(2). + * It is always the first attribute in the return buffer. + */ +#define ATTR_CMN_RETURNED_ATTRS 0x80000000 -#define ATTR_CMN_VALIDMASK 0x07FFFFFF +#define ATTR_CMN_VALIDMASK 0x8FE7FFFF #define ATTR_CMN_SETMASK 0x01C7FF00 #define ATTR_CMN_VOLSETMASK 0x00006700 @@ -344,10 +366,11 @@ typedef struct vol_attributes_attr { #define ATTR_VOL_MOUNTEDDEVICE 0x00008000 #define ATTR_VOL_ENCODINGSUSED 0x00010000 #define ATTR_VOL_CAPABILITIES 0x00020000 +#define ATTR_VOL_UUID 0x00040000 #define ATTR_VOL_ATTRIBUTES 0x40000000 #define ATTR_VOL_INFO 0x80000000 -#define ATTR_VOL_VALIDMASK 0xC003FFFF +#define ATTR_VOL_VALIDMASK 0xC007FFFF #define ATTR_VOL_SETMASK 0x80002000 @@ -372,7 +395,7 @@ typedef struct vol_attributes_attr { #define ATTR_FILE_RSRCLENGTH 0x00001000 #define ATTR_FILE_RSRCALLOCSIZE 0x00002000 -#define ATTR_FILE_VALIDMASK 0x00007FFF +#define ATTR_FILE_VALIDMASK 0x000037FF #define ATTR_FILE_SETMASK 0x00000020 #define ATTR_FORK_TOTALSIZE 0x00000001 @@ -423,21 +446,33 @@ struct fssearchblock { * WARNING - keep in sync with fssearchblock */ -struct user_fssearchblock { - user_addr_t returnattrs; - user_addr_t returnbuffer; - user_size_t returnbuffersize; - user_ulong_t maxmatches; - struct user_timeval timelimit; - user_addr_t searchparams1 __attribute((aligned(8))); - user_size_t sizeofsearchparams1; - user_addr_t searchparams2; - user_size_t sizeofsearchparams2; +struct user64_fssearchblock { + user64_addr_t returnattrs; + user64_addr_t returnbuffer; + user64_size_t returnbuffersize; + user64_ulong_t maxmatches; + struct user64_timeval timelimit; + user64_addr_t searchparams1; + user64_size_t sizeofsearchparams1; + user64_addr_t searchparams2; + user64_size_t sizeofsearchparams2; struct attrlist searchattrs; }; +struct user32_fssearchblock { + user32_addr_t returnattrs; + user32_addr_t returnbuffer; + user32_size_t returnbuffersize; + user32_ulong_t maxmatches; + struct user32_timeval timelimit; + user32_addr_t searchparams1; + user32_size_t sizeofsearchparams1; + user32_addr_t searchparams2; + user32_size_t sizeofsearchparams2; + struct attrlist searchattrs; +}; -#endif // KERNEL +#endif /* KERNEL */ struct searchstate { diff --git a/bsd/sys/bsdtask_info.h b/bsd/sys/bsdtask_info.h index 0da659b82..7b7b56610 100644 --- a/bsd/sys/bsdtask_info.h +++ b/bsd/sys/bsdtask_info.h @@ -51,6 +51,7 @@ struct proc_taskinfo_internal { int32_t pti_priority; /* task priority*/ }; +#define MAXTHREADNAMESIZE 64 struct proc_threadinfo_internal { uint64_t pth_user_time; /* user run time */ @@ -63,7 +64,7 @@ struct proc_threadinfo_internal { int32_t pth_curpri; /* cur priority*/ int32_t pth_priority; /* priority*/ int32_t pth_maxpriority; /* max priority*/ - char * pth_name[64]; /* thread name, if any */ + char pth_name[MAXTHREADNAMESIZE]; /* thread name, if any */ }; @@ -99,16 +100,17 @@ struct proc_regioninfo_internal { void vm_map_region_top_walk(vm_map_entry_t entry, vm_region_top_info_t top); void vm_map_region_walk(vm_map_t map, vm_map_offset_t a, vm_map_entry_t entry, vm_object_offset_t offset, vm_object_size_t range, vm_region_extended_info_t extended); -kern_return_t vnode_pager_get_object_vnode(memory_object_t mem_obj,uint32_t * vnodeaddr, uint32_t * vid); +kern_return_t vnode_pager_get_object_vnode(memory_object_t mem_obj, uintptr_t * vnodeaddr, uint32_t * vid); extern uint32_t vnode_vid(void *vp); #endif /* MACH_KERNEL_PRIVATE */ -extern int fill_procregioninfo(task_t t, uint64_t arg, struct proc_regioninfo_internal *pinfo, uint32_t *vp, uint32_t *vid); +extern int fill_procregioninfo(task_t t, uint64_t arg, struct proc_regioninfo_internal *pinfo, uintptr_t *vp, uint32_t *vid); void fill_taskprocinfo(task_t task, struct proc_taskinfo_internal * ptinfo); int fill_taskthreadinfo(task_t task, uint64_t thaddr, struct proc_threadinfo_internal * ptinfo, void *, int *); int fill_taskthreadlist(task_t task, void * buffer, int thcount); int get_numthreads(task_t); +void bsd_getthreadname(void *uth, char* buffer); void bsd_threadcdir(void * uth, void *vptr, int *vidp); #endif /*_SYS_BSDTASK_INFO_H */ diff --git a/bsd/sys/buf.h b/bsd/sys/buf.h index 5cb9c7f9c..d2a988d09 100644 --- a/bsd/sys/buf.h +++ b/bsd/sys/buf.h @@ -71,6 +71,7 @@ #include #include +#include #include @@ -87,6 +88,8 @@ #define B_RAW 0x00000200 /* Set by physio for raw transfers. */ #define B_FUA 0x00000400 /* Write-through disk cache(if supported) */ #define B_PASSIVE 0x00000800 /* PASSIVE I/Os are ignored by THROTTLE I/O */ +#define B_IOSTREAMING 0x00001000 /* sequential access pattern detected */ +#define B_THROTTLED_IO 0x00002000 /* low priority I/O */ /* * make sure to check when adding flags that * that the new flags don't overlap the definitions @@ -95,308 +98,559 @@ __BEGIN_DECLS -/* - * mark the buffer associated with buf_t - * as AGED with respect to the LRU cache +/*! + @function buf_markaged + @abstract Mark a buffer as "aged," i.e. as a good candidate to be discarded and reused after buf_brelse(). + @param bp Buffer to mark. */ void buf_markaged(buf_t); -/* - * mark the buffer associated with buf_t - * as invalid... on release, it will go - * directly to the free list +/*! + @function buf_markinvalid + @abstract Mark a buffer as not having valid data and being ready for immediate reuse after buf_brelse(). + @param bp Buffer to mark. */ void buf_markinvalid(buf_t); -/* - * mark the buffer assoicated with buf_t - * as a delayed write... +/*! + @function buf_markdelayed + @abstract Mark a buffer as a delayed write: mark it dirty without actually scheduling I/O. + @discussion Data will be flushed to disk at some later time, not with brelse(). A sync()/fsync() + or pressure necessitating reuse of the buffer will cause it to be written back to disk. + @param bp Buffer to mark. */ void buf_markdelayed(buf_t); -/* - * mark the buffer associated with buf_t - * as having been interrupted... EINTR +/*! + @function buf_markeintr + @abstract Mark a buffer as having been interrupted during I/O. + @discussion Waiters for I/O to complete (buf_biowait()) will return with EINTR when woken up. + buf_markeintr does not itself do a wakeup. + @param bp Buffer to mark. */ void buf_markeintr(buf_t); -/* - * mark the buffer associated with buf_t - * for write through disk cache if device supports +/*! + @function buf_markfua + @abstract Mark a buffer for write through disk cache, if disk supports it. + @param bp Buffer to mark. */ void buf_markfua(buf_t); -/* - * returns 1 if the buffer associated with buf_t - * is set for write through disk cache... 0 if it does not +/*! + @function buf_fua + @abstract Check if a buffer is marked for write through disk caches. + @param bp Buffer to test. + @return Nonzero if buffer is marked for write-through, 0 if not. */ int buf_fua(buf_t); -/* - * returns 1 if the buffer associated with buf_t - * contains valid data... 0 if it does not +/*! + @function buf_valid + @abstract Check if a buffer contains valid data. + @param bp Buffer to test. + @return Nonzero if buffer has valid data, 0 if not. */ int buf_valid(buf_t); -/* - * returns 1 if the buffer was already valid - * in the cache... i.e. no I/O was performed - * returns 0 otherwise +/*! + @function buf_fromcache + @abstract Check if a buffer's data was found in core. + @discussion Will return truth after a buf_getblk that finds a valid buffer in the cache or the relevant + data in core (but not in a buffer). + @param bp Buffer to test. + @return Nonzero if we got this buffer's data without doing I/O, 0 if not. */ int buf_fromcache(buf_t); -/* - * returns the UPL associated with buf_t +/*! + @function buf_upl + @abstract Get the upl (Universal Page List) associated with a buffer. + @discussion Buffers allocated with buf_alloc() are not returned with a upl, and + traditional buffers only have a upl while an I/O is in progress. + @param bp Buffer whose upl to grab. + @return Buffer's upl if it has one, else NULL. */ void * buf_upl(buf_t); -/* - * returns the offset into the UPL - * associated with buf_t which is to be - * used as the base offset for this I/O +/*! + @function buf_uploffset + @abstract Get the offset into a UPL at which this buffer begins. + @discussion This function should only be called on iobufs, i.e. buffers allocated with buf_alloc(). + @param bp Buffer whose uploffset to grab. + @return Buffer's uploffset--does not check whether that value makes sense for this buffer. */ uint32_t buf_uploffset(buf_t); -/* - * returns read credential associated with buf_t - * a reference is taken which must be explicilty dropped +/*! + @function buf_rcred + @abstract Get the credential associated with a buffer for reading. + @discussion No reference is taken; if the credential is to be held on to persistently, an additional + reference must be taken with kauth_cred_ref. + @param bp Buffer whose credential to grab. + @return Credential if it exists, else NULL. + */ +kauth_cred_t buf_rcred(buf_t); + +/*! + @function buf_wcred + @abstract Get the credential associated with a buffer for writing. + @discussion No reference is taken; if the credential is to be held on to persistently, an additional + reference must be taken with kauth_cred_ref. + @param bp Buffer whose credential to grab. + @return Credential if it exists, else NULL. + */ +kauth_cred_t buf_wcred(buf_t); + +/*! + @function buf_proc + @abstract Get the process associated with this buffer. + @discussion buf_proc() will generally return NULL; a process is currently only associated with + a buffer in the event of a physio() call. + @param bp Buffer whose associated process to find. + @return Associated process, possibly NULL. */ -ucred_t buf_rcred(buf_t); +proc_t buf_proc(buf_t); -/* - * returns write credential associated with buf_t - * a reference is taken which must be explicilty dropped +/*! + @function buf_dirtyoff + @abstract Get the starting offset of the dirty region associated with a buffer. + @discussion The dirty offset is zero unless someone explicitly calls buf_setdirtyoff() (which the kernel does not). + @param bp Buffer whose dirty offset to get. + @return Dirty offset (0 if not explicitly changed). */ -ucred_t buf_wcred(buf_t); +uint32_t buf_dirtyoff(buf_t); -/* - * returns process handle associated with buf_t - * i.e identity of task that issued the I/O +/*! + @function buf_dirtyend + @abstract Get the ending offset of the dirty region associated with a buffer. + @discussion If the buffer's data was found incore and dirty, the dirty end is the size of the block; otherwise, unless + someone outside of xnu explicitly changes it by calling buf_setdirtyend(), it will be zero. + @param bp Buffer whose dirty end to get. + @return 0 if buffer is found clean; size of buffer if found dirty. Can be set to any value by callers of buf_setdirtyend(). */ -proc_t buf_proc(buf_t); - -uint32_t buf_dirtyoff(buf_t); uint32_t buf_dirtyend(buf_t); + +/*! + @function buf_setdirtyoff + @abstract Set the starting offset of the dirty region associated with a buffer. + @discussion This value is zero unless someone set it explicitly. + @param bp Buffer whose dirty end to set. + @return void. + */ void buf_setdirtyoff(buf_t, uint32_t); + +/*! + @function buf_setdirtyend + @abstract Set the ending offset of the dirty region associated with a buffer. + @discussion If the buffer's data was found incore and dirty, the dirty end is the size of the block; otherwise, unless + someone outside of xnu explicitly changes it by calling buf_setdirtyend(), it will be zero. + @param bp Buffer whose dirty end to set. + @return void. + */ void buf_setdirtyend(buf_t, uint32_t); -/* - * return the errno value associated with buf_t +/*! + @function buf_error + @abstract Get the error value associated with a buffer. + @discussion Errors are set with buf_seterror(). + @param bp Buffer whose error value to retrieve. + @return Error value, directly. */ errno_t buf_error(buf_t); -/* - * set errno on buf_t +/*! + @function buf_seterror + @abstract Set an error value on a buffer. + @param bp Buffer whose error value to set. + @return void. */ void buf_seterror(buf_t, errno_t); -/* - * set specified flags on buf_t - * B_LOCKED/B_NOCACHE/B_ASYNC/B_READ/B_WRITE/B_PAGEIO/B_FUA +/*! + @function buf_setflags + @abstract Set flags on a buffer. + @discussion: buffer_flags |= flags + @param bp Buffer whose flags to set. + @param flags Flags to add to buffer's mask. B_LOCKED/B_NOCACHE/B_ASYNC/B_READ/B_WRITE/B_PAGEIO/B_FUA + @return void. */ void buf_setflags(buf_t, int32_t); -/* - * clear specified flags on buf_t - * B_LOCKED/B_NOCACHE/B_ASYNC/B_READ/B_WRITE/B_PAGEIO/B_FUA +/*! + @function buf_clearflags + @abstract Clear flags on a buffer. + @discussion: buffer_flags &= ~flags + @param bp Buffer whose flags to clear. + @param flags Flags to remove from buffer's mask. B_LOCKED/B_NOCACHE/B_ASYNC/B_READ/B_WRITE/B_PAGEIO/B_FUA + @return void. */ void buf_clearflags(buf_t, int32_t); -/* - * return external flags associated with buf_t - * B_CLUSTER/B_PHYS/B_LOCKED/B_DELWRI/B_ASYNC/B_READ/B_WRITE/B_META/B_PAGEIO/B_FUA +/*! + @function buf_flags + @abstract Get flags set on a buffer. + @discussion Valid flags are B_LOCKED/B_NOCACHE/B_ASYNC/B_READ/B_WRITE/B_PAGEIO/B_FUA. + @param bp Buffer whose flags to grab. + @return flags. */ int32_t buf_flags(buf_t); -/* - * clears I/O related flags (both internal and - * external) associated with buf_t and allows - * the following to be set... - * B_READ/B_WRITE/B_ASYNC/B_NOCACHE +/*! + @function buf_reset + @abstract Reset I/O flag state on a buffer. + @discussion Clears current flags on a buffer (internal and external) and allows some new flags to be set. + Used perhaps to prepare an iobuf for reuse. + @param bp Buffer whose flags to grab. + @param flags Flags to set on buffer: B_READ, B_WRITE, B_ASYNC, B_NOCACHE. + @return void. */ void buf_reset(buf_t, int32_t); -/* - * insure that the data storage associated with buf_t - * is addressable +/*! + @function buf_map + @abstract Get virtual mappings for buffer data. + @discussion For buffers created through buf_getblk() (i.e. traditional buffer cache usage), + buf_map() just returns the address at which data was mapped by but_getblk(). For a B_CLUSTER buffer, i.e. an iobuf + whose upl state is managed manually, there are two possibilities. If the buffer was created + with an underlying "real" buffer through cluster_bp(), the mapping of the "real" buffer is returned. + Otherwise, the buffer was created with buf_alloc() and buf_setupl() was subsequently called; buf_map() + will call ubc_upl_map() to get a mapping for the buffer's upl and return the start of that mapping + plus the buffer's upl offset (set in buf_setupl()). In the last case, buf_unmap() must later be called + to tear down the mapping. NOTE: buf_map() does not set the buffer data pointer; this must be done with buf_setdataptr(). + @param bp Buffer whose mapping to find or create. + @param io_addr Destination for mapping address. + @return 0 for success, ENOMEM if unable to map the buffer. */ errno_t buf_map(buf_t, caddr_t *); -/* - * release our need to have the storage associated - * with buf_t in an addressable state +/*! + @function buf_unmap + @abstract Release mappings for buffer data. + @discussion For buffers created through buf_getblk() (i.e. traditional buffer cache usage), + buf_unmap() does nothing; buf_brelse() will take care of unmapping. For a B_CLUSTER buffer, i.e. an iobuf + whose upl state is managed manually, there are two possibilities. If the buffer was created + with an underlying "real" buffer through cluster_bp(), buf_unmap() does nothing; buf_brelse() on the + underlying buffer will tear down the mapping. Otherwise, the buffer was created with buf_alloc() and + buf_setupl() was subsequently called; buf_map() created the mapping. In this case, buf_unmap() will + unmap the buffer. + @param bp Buffer whose mapping to find or create. + @param io_addr Destination for mapping address. + @return 0 for success, EINVAL if unable to unmap buffer. */ errno_t buf_unmap(buf_t); -/* - * set driver specific data for buf_t +/*! + @function buf_setdrvdata + @abstract Set driver-specific data on a buffer. + @param bp Buffer whose driver-data to set. + @param drvdata Opaque driver data. + @return void. */ void buf_setdrvdata(buf_t, void *); -/* - * retrieve driver specific data associated with buf_t +/*! + @function buf_setdrvdata + @abstract Get driver-specific data from a buffer. + @param bp Buffer whose driver data to get. + @return Opaque driver data. */ void * buf_drvdata(buf_t); -/* - * set fs specific data for buf_t +/*! + @function buf_setfsprivate + @abstract Set filesystem-specific data on a buffer. + @param bp Buffer whose filesystem data to set. + @param fsprivate Opaque filesystem data. + @return void. */ void buf_setfsprivate(buf_t, void *); -/* - * retrieve driver specific data associated with buf_t +/*! + @function buf_fsprivate + @abstract Get filesystem-specific data from a buffer. + @param bp Buffer whose filesystem data to get. + @return Opaque filesystem data. */ void * buf_fsprivate(buf_t); -/* - * retrieve the phsyical block number associated with buf_t +/*! + @function buf_blkno + @abstract Get physical block number associated with a buffer, in the sense of VNOP_BLOCKMAP. + @discussion When a buffer's physical block number is the same is its logical block number, then the physical + block number is considered uninitialized. A physical block number of -1 indicates that there is no valid + physical mapping (e.g. the logical block is invalid or corresponds to a sparse region in a file). Physical + block number is normally set by the cluster layer or by buf_getblk(). + @param bp Buffer whose physical block number to get. + @return Block number. */ daddr64_t buf_blkno(buf_t); -/* - * retrieve the logical block number associated with buf_t - * i.e. the block number derived from the file offset +/*! + @function buf_lblkno + @abstract Get logical block number associated with a buffer. + @discussion Logical block number is set on traditionally-used buffers by an argument passed to buf_getblk(), + for example by buf_bread(). + @param bp Buffer whose logical block number to get. + @return Block number. */ daddr64_t buf_lblkno(buf_t); -/* - * set the phsyical block number associated with buf_t +/*! + @function buf_setblkno + @abstract Set physical block number associated with a buffer. + @discussion Physical block number is generally set by the cluster layer or by buf_getblk(). + @param bp Buffer whose physical block number to set. + @param blkno Block number to set. + @return void. */ void buf_setblkno(buf_t, daddr64_t); -/* - * set the logical block number associated with buf_t - * i.e. the block number derived from the file offset +/*! + @function buf_setlblkno + @abstract Set logical block number associated with a buffer. + @discussion Logical block number is set on traditionally-used buffers by an argument passed to buf_getblk(), + for example by buf_bread(). + @param bp Buffer whose logical block number to set. + @param lblkno Block number to set. + @return void. */ void buf_setlblkno(buf_t, daddr64_t); -/* - * retrieve the count of valid bytes associated with buf_t +/*! + @function buf_count + @abstract Get count of valid bytes in a buffer. This may be less than the space allocated to the buffer. + @param bp Buffer whose byte count to get. + @return Byte count. */ uint32_t buf_count(buf_t); -/* - * retrieve the size of the data store assoicated with buf_t +/*! + @function buf_size + @abstract Get size of data region allocated to a buffer. + @discussion May be larger than amount of valid data in buffer. + @param bp Buffer whose size to get. + @return Size. */ uint32_t buf_size(buf_t); -/* - * retrieve the residual I/O count assoicated with buf_t - * i.e. number of bytes that have not yet been completed +/*! + @function buf_resid + @abstract Get a count of bytes which were not consumed by an I/O on a buffer. + @discussion Set when an I/O operations completes. + @param bp Buffer whose outstanding count to get. + @return Count of unwritten/unread bytes. */ uint32_t buf_resid(buf_t); -/* - * set the count of bytes associated with buf_t - * typically used to set the size of the I/O to be performed +/*! + @function buf_setcount + @abstract Set count of valid bytes in a buffer. This may be less than the space allocated to the buffer. + @param bp Buffer whose byte count to set. + @param bcount Count to set. + @return void. */ void buf_setcount(buf_t, uint32_t); -/* - * set the size of the buffer store associated with buf_t - * typically used when providing private storage to buf_t +/*! + @function buf_setsize + @abstract Set size of data region allocated to a buffer. + @discussion May be larger than amount of valid data in buffer. Should be used by + code which is manually providing storage for an iobuf, one allocated with buf_alloc(). + @param bp Buffer whose size to set. + @return void. */ void buf_setsize(buf_t, uint32_t); -/* - * set the size in bytes of the unfinished I/O associated with buf_t +/*! + @function buf_setresid + @abstract Set a count of bytes outstanding for I/O in a buffer. + @discussion Set when an I/O operations completes. Examples: called by IOStorageFamily when I/O + completes, often called on an "original" buffer when using a manipulated buffer to perform I/O + on behalf of the first. + @param bp Buffer whose outstanding count to set. + @return Count of unwritten/unread bytes. */ void buf_setresid(buf_t, uint32_t); -/* - * associate kernel addressable storage with buf_t +/*! + @function buf_setdataptr + @abstract Set the address at which a buffer's data will be stored. + @discussion In traditional buffer use, the data pointer will be set automatically. This routine is + useful with iobufs (allocated with buf_alloc()). + @param bp Buffer whose data pointer to set. + @param data Pointer to data region. + @return void. */ void buf_setdataptr(buf_t, uintptr_t); -/* - * retrieve pointer to buffer associated with buf_t - * if non-null, than guaranteed to be kernel addressable - * size of buffer can be retrieved via buf_size - * size of valid data can be retrieved via buf_count - * if NULL, than use buf_map/buf_unmap to manage access to the underlying storage +/*! + @function buf_dataptr + @abstract Get the address at which a buffer's data is stored; for iobufs, this must + be set with buf_setdataptr(). See buf_map(). + @param bp Buffer whose data pointer to retrieve. + @return Data pointer; NULL if unset. */ uintptr_t buf_dataptr(buf_t); -/* - * return the vnode_t associated with buf_t +/*! + @function buf_vnode + @abstract Get the vnode associated with a buffer. + @discussion Every buffer is associated with a file. Because there is an I/O in flight, + there is an iocount on this vnode; it is returned WITHOUT an extra iocount, and vnode_put() + need NOT be called. + @param bp Buffer whose vnode to retrieve. + @return Buffer's vnode. */ vnode_t buf_vnode(buf_t); -/* - * assign vnode_t to buf_t... the - * device currently associated with - * but_t is not changed. +/*! + @function buf_setvnode + @abstract Set the vnode associated with a buffer. + @discussion This call need not be used on traditional buffers; it is for use with iobufs. + @param bp Buffer whose vnode to set. + @param vp The vnode to attach to the buffer. + @return void. */ void buf_setvnode(buf_t, vnode_t); -/* - * return the dev_t associated with buf_t +/*! + @function buf_device + @abstract Get the device ID associated with a buffer. + @discussion In traditional buffer use, this value is NODEV until buf_strategy() is called unless + buf_getblk() was passed a device vnode. It is set on an iobuf if buf_alloc() is passed a device + vnode or if buf_setdevice() is called. + @param bp Buffer whose device ID to retrieve. + @return Device id. */ dev_t buf_device(buf_t); -/* - * assign the dev_t associated with vnode_t - * to buf_t +/*! + @function buf_setdevice + @abstract Set the device associated with a buffer. + @discussion A buffer's device is set in buf_strategy() (or in buf_getblk() if the file is a device). + It is also set on an iobuf if buf_alloc() is passed a device vnode. + @param bp Buffer whose device ID to set. + @param vp Device to set on the buffer. + @return 0 for success, EINVAL if vp is not a device file. */ errno_t buf_setdevice(buf_t, vnode_t); +/*! + @function buf_strategy + @abstract Pass an I/O request for a buffer down to the device layer. + @discussion This is one of the most important routines in the buffer cache layer. For buffers obtained + through buf_getblk, it handles finding physical block numbers for the I/O (with VNOP_BLKTOOFF and + VNOP_BLOCKMAP), packaging the I/O into page-sized chunks, and initiating I/O on the disk by calling + the device's strategy routine. If a buffer's UPL has been set manually with buf_setupl(), it assumes + that the request is already correctly configured with a block number and a size divisible by page size + and will just call directly to the device. + @param devvp Device on which to perform I/O + @param ap vnop_strategy_args structure (most importantly, a buffer). + @return 0 for success, or errors from filesystem or device layers. + */ errno_t buf_strategy(vnode_t, void *); -/* - * flags for buf_invalblkno +/* + * Flags for buf_invalblkno() */ #define BUF_WAIT 0x01 +/*! + @function buf_invalblkno + @abstract Invalidate a filesystem logical block in a file. + @discussion buf_invalblkno() tries to make the data for a given block in a file + invalid; if the buffer for that block is found in core and is not busy, we mark it + invalid and call buf_brelse() (see "flags" param for what happens if the buffer is busy). + buf_brelse(), noticing that it is invalid, will + will return the buffer to the empty-buffer list and tell the VM subsystem to abandon + the relevant pages. Data will not be written to backing store--it will be cast aside. + Note that this function will only work if the block in question has been + obtained with a buf_getblk(). If data has been read into core without using + traditional buffer cache routines, buf_invalblkno() will not be able to invalidate it--this + includes the use of iobufs. + @param bp Buffer whose block to invalidate. + @param lblkno Logical block number. + @param flags BUF_WAIT: wait for busy buffers to become unbusy and invalidate them then. Otherwise, + just return EBUSY for busy blocks. + @return 0 for success, EINVAL if vp is not a device file. + */ errno_t buf_invalblkno(vnode_t, daddr64_t, int); - -/* - * return the callback function pointer - * if the callback is still valid - * returns NULL if a buffer that was not - * allocated via buf_alloc is specified - * or if a callback has not been set or - * it has already fired... +/*! + @function buf_callback + @abstract Get the function set to be called when I/O on a buffer completes. + @discussion A function returned by buf_callback was originally set with buf_setcallback(). + @param bp Buffer whose callback to get. + @return 0 for success, or errors from filesystem or device layers. */ void * buf_callback(buf_t); -/* - * assign a one-shot callback function (driven from biodone) - * to a buf_t allocated via buf_alloc... a caller specified - * arg is passed to the callback function +/*! + @function buf_setcallback + @abstract Set a function to be called once when I/O on a buffer completes. + @discussion A one-shot callout set with buf_setcallback() will be called from buf_biodone() + when I/O completes. It will be passed the "transaction" argument as well as the buffer. + buf_setcallback() also marks the buffer as B_ASYNC. + @param bp Buffer whose callback to set. + @param callback function to use as callback. + @param transaction Additional argument to callback function. + @return 0; always succeeds. */ errno_t buf_setcallback(buf_t, void (*)(buf_t, void *), void *); -/* - * add a upl_t to a buffer allocated via buf_alloc - * and set the offset into the upl_t (must be page - * aligned). +/*! + @function buf_setupl + @abstract Set the UPL (Universal Page List), and offset therein, on a buffer. + @discussion buf_setupl() should only be called on buffers allocated with buf_alloc(). + A subsequent call to buf_map() will map the UPL and give back the address at which data + begins. After buf_setupl() is called, a buffer is marked B_CLUSTER; when this is the case, + buf_strategy() assumes that a buffer is correctly configured to be passed to the device + layer without modification. Passing a NULL upl will clear the upl and the B_CLUSTER flag on the + buffer. + @param bp Buffer whose upl to set. + @param upl UPL to set in the buffer. + @parma offset Offset within upl at which relevant data begin. + @return 0 for success, EINVAL if the buffer was not allocated with buf_alloc(). */ errno_t buf_setupl(buf_t, upl_t, uint32_t); -/* - * allocate a buf_t that is a clone of the buf_t - * passed in, but whose I/O range is a subset... - * if a callback routine is specified, it will - * be called from buf_biodone with the bp and - * arg specified. - * it must be freed via buf_free +/*! + @function buf_clone + @abstract Clone a buffer with a restricted range and an optional callback. + @discussion Generates a buffer which is identical to its "bp" argument except that + it spans a subset of the data of the original. The buffer to be cloned should + have been allocated with buf_alloc(). Checks its arguments to make sure + that the data subset is coherent. Optionally, adds a callback function and argument to it + to be called when I/O completes (as with buf_setcallback(), but B_ASYNC is not set). If the original buffer had + a upl set through buf_setupl(), this upl is copied to the new buffer; otherwise, the original's + data pointer is used raw. The buffer must be released with buf_free(). + @param bp Buffer to clone. + @param io_offset Offset, relative to start of data in original buffer, at which new buffer's data will begin. + @param io_size Size of buffer region in new buffer, in the sense of buf_count(). + @param iodone Callback to be called from buf_biodone() when I/O completes, in the sense of buf_setcallback(). + @param arg Argument to pass to iodone() callback. + @return NULL if io_offset/io_size combination is invalid for the buffer to be cloned; otherwise, the new buffer. */ buf_t buf_clone(buf_t, int, int, void (*)(buf_t, void *), void *); -/* - * allocate a buf_t associated with vnode_t - * that has NO storage associated with it - * but is suitable for use in issuing I/Os - * after storage has been assigned via buf_setdataptr - * or buf_addupl +/*! + @function buf_alloc + @abstract Allocate an uninitialized buffer. + @discussion A buffer returned by buf_alloc() is marked as busy and as an iobuf; it has no storage set up and must be + set up using buf_setdataptr() or buf_setupl()/buf_map(). + @param vp vnode to associate with the buffer: optionally NULL. If vp is a device file, then + the buffer's associated device will be set. If vp is NULL, it can be set later with buf_setvnode(). + @return New buffer. */ buf_t buf_alloc(vnode_t); -/* - * free a buf_t that was allocated via buf_alloc - * any private storage associated with buf_t is the - * responsiblity of the caller to release +/*! + @function buf_free + @abstract Free a buffer that was allocated with buf_alloc(). + @discussion The storage (UPL, data pointer) associated with an iobuf must be freed manually. + @param bp The buffer to free. + @return void. */ void buf_free(buf_t); @@ -406,43 +660,263 @@ void buf_free(buf_t); #define BUF_WRITE_DATA 0x0001 /* write data blocks first */ #define BUF_SKIP_META 0x0002 /* skip over metadata blocks */ +/*! + @function buf_invalidateblks + @abstract Invalidate all the blocks associated with a vnode. + @discussion This function does for all blocks associated with a vnode what buf_invalblkno does for one block. + Again, it will only be able to invalidate data which were populated with traditional buffer cache routines, + i.e. by buf_getblk() and callers thereof. Unlike buf_invalblkno(), it can be made to write dirty data to disk + rather than casting it aside. + @param bp The buffer whose data to invalidate. + @param flags BUF_WRITE_DATA: write dirty data to disk with VNOP_BWRITE() before kicking buffer cache entries out. + BUF_SKIP_META: do not invalidate metadata blocks. + @param slpflag Flags to pass to "msleep" while waiting to acquire busy buffers. + @param slptimeo Timeout in "hz" (1/100 second) to wait for a buffer to become unbusy before waking from sleep + and re-starting the scan. + @return 0 for success, error values from msleep(). + */ int buf_invalidateblks(vnode_t, int, int, int); + /* * flags for buf_flushdirtyblks and buf_iterate */ #define BUF_SKIP_NONLOCKED 0x01 #define BUF_SKIP_LOCKED 0x02 -#define BUF_SCAN_CLEAN 0x04 /* scan only the clean buffers */ -#define BUF_SCAN_DIRTY 0x08 /* scan only the dirty buffers */ +#define BUF_SCAN_CLEAN 0x04 /* scan the clean buffers */ +#define BUF_SCAN_DIRTY 0x08 /* scan the dirty buffers */ #define BUF_NOTIFY_BUSY 0x10 /* notify the caller about the busy pages during the scan */ -void buf_flushdirtyblks(vnode_t, int, int, const char *); -void buf_iterate(vnode_t, int (*)(buf_t, void *), int, void *); #define BUF_RETURNED 0 #define BUF_RETURNED_DONE 1 #define BUF_CLAIMED 2 #define BUF_CLAIMED_DONE 3 +/*! + @function buf_flushdirtyblks + @abstract Write dirty file blocks to disk. + @param vp The vnode whose blocks to flush. + @param wait Wait for writes to complete before returning. + @param flags Can pass zero, meaning "flush all dirty buffers." + BUF_SKIP_NONLOCKED: Skip buffers which are not busy when we encounter them. + BUF_SKIP_LOCKED: Skip buffers which are busy when we encounter them. + @param msg String to pass to msleep(). + @return void. + */ +void buf_flushdirtyblks(vnode_t, int, int, const char *); -/* - * zero the storage associated with buf_t +/*! + @function buf_iterate + @abstract Perform some operation on all buffers associated with a vnode. + @param vp The vnode whose buffers to scan. + @param callout Function to call on each buffer. Should return one of: + BUF_RETURNED: buf_iterate() should call buf_brelse() on the buffer. + BUF_RETURNED_DONE: buf_iterate() should call buf_brelse() on the buffer and then stop iterating. + BUF_CLAIMED: buf_iterate() should continue iterating (and not call buf_brelse()). + BUF_CLAIMED_DONE: buf_iterate() should stop iterating (and not call buf_brelse()). + @param flag + BUF_SKIP_NONLOCKED: Skip buffers which are not busy when we encounter them. BUF_SKIP_LOCKED: Skip buffers which are busy when we encounter them. + BUF_SCAN_CLEAN: Call out on clean buffers. + BUF_SCAN_DIRTY: Call out on dirty buffers. + BUF_NOTIFY_BUSY: If a buffer cannot be acquired, pass a NULL buffer to callout; otherwise, + that buffer will be silently skipped. + @param arg Argument to pass to callout in addition to buffer. + @return void. + */ +void buf_iterate(vnode_t, int (*)(buf_t, void *), int, void *); + +/*! + @function buf_clear + @abstract Zero out the storage associated with a buffer. + @discussion Calls buf_map() to get the buffer's data address; for a B_CLUSTER + buffer (one which has had buf_setupl() called on it), it tries to map the buffer's + UPL into memory; should only be called once during the life cycle of an iobuf (one allocated + with buf_alloc()). + @param bp The buffer to zero out. + @return void. */ void buf_clear(buf_t); +/*! + @function buf_bawrite + @abstract Start an asychronous write on a buffer. + @discussion Calls VNOP_BWRITE to start the process of propagating an asynchronous write down to the device layer. + Callers can wait for writes to complete at their discretion using buf_biowait(). When this function is called, + data should already have been written to the buffer's data region. + @param bp The buffer on which to initiate I/O. + @param throttle If "throttle" is nonzero and more than VNODE_ASYNC_THROTTLE writes are in progress on this file, + buf_bawrite() will block until the write count drops below VNODE_ASYNC_THROTTLE. If "throttle" is zero and the write + count is high, it will fail with EWOULDBLOCK; the caller can decide whether to make a blocking call or pursue + other opportunities. + @return EWOULDBLOCK if write count is high and "throttle" is zero; otherwise, errors from VNOP_BWRITE. + */ errno_t buf_bawrite(buf_t); + +/*! + @function buf_bdwrite + @abstract Mark a buffer for delayed write. + @discussion Marks a buffer as waiting for delayed write and the current I/O as complete; data will be written to backing store + before the buffer is reused, but it will not be queued for I/O immediately. Note that for buffers allocated + with buf_alloc(), there are no such guarantees; you must take care of your own flushing to disk. If + the number of delayed writes pending on the system is greater than an internal limit and the caller has not + requested otherwise [see return_error] , buf_bdwrite() will unilaterally launch an asynchronous I/O with buf_bawrite() to keep the pile of + delayed writes from getting too large. + @param bp The buffer to mark for delayed write. + @param return_error If the number of pending delayed writes systemwide is larger than an internal limit, + return EAGAIN rather than doing an asynchronous write. + @return EAGAIN for return_error != 0 case, 0 for succeess, errors from buf_bawrite. + */ errno_t buf_bdwrite(buf_t); + +/*! + @function buf_bwrite + @abstract Write a buffer's data to backing store. + @discussion Once the data in a buffer has been modified, buf_bwrite() starts sending it to disk by calling + VNOP_STRATEGY. Unless B_ASYNC has been set on the buffer (by buf_setflags() or otherwise), data will have + been written to disk when buf_bwrite() returns. See Bach (p 56). + @param bp The buffer to write to disk. + @return 0 for success; errors from buf_biowait(). + */ errno_t buf_bwrite(buf_t); +/*! + @function buf_biodone + @abstract Mark an I/O as completed. + @discussion buf_biodone() should be called by whosoever decides that an I/O on a buffer is complete; for example, + IOStorageFamily. It clears the dirty flag on a buffer and signals on the vnode that a write has completed + with vnode_writedone(). If a callout or filter has been set on the buffer, that function is called. In the case + of a callout, that function is expected to take care of cleaning up and freeing the buffer. + Otherwise, if the buffer is marked B_ASYNC (e.g. it was passed to buf_bawrite()), then buf_biodone() + considers itself justified in calling buf_brelse() to return it to free lists--no one is waiting for it. Finally, + waiters on the bp (e.g. in buf_biowait()) are woken up. + @param bp The buffer to mark as done with I/O. + @return void. + */ void buf_biodone(buf_t); + +/*! + @function buf_biowait + @abstract Wait for I/O on a buffer to complete. + @discussion Waits for I/O on a buffer to finish, as marked by a buf_biodone() call. + @param bp The buffer to wait on. + @return 0 for a successful wait; nonzero the buffer has been marked as EINTR or had an error set on it. + */ errno_t buf_biowait(buf_t); -void buf_brelse(buf_t); -errno_t buf_bread(vnode_t, daddr64_t, int, ucred_t, buf_t *); -errno_t buf_breadn(vnode_t, daddr64_t, int, daddr64_t *, int *, int, ucred_t, buf_t *); -errno_t buf_meta_bread(vnode_t, daddr64_t, int, ucred_t, buf_t *); -errno_t buf_meta_breadn(vnode_t, daddr64_t, int, daddr64_t *, int *, int, ucred_t, buf_t *); +/*! + @function buf_brelse + @abstract Release any claim to a buffer, sending it back to free lists. + @discussion buf_brelse() cleans up buffer state and releases a buffer to the free lists. If the buffer + is not marked invalid and its pages are dirty (e.g. a delayed write was made), its data will be commited + to backing store. If it is marked invalid, its data will be discarded completely. + A valid, cacheable buffer will be put on a list and kept in the buffer hash so it + can be found again; otherwise, it will be dissociated from its vnode and treated as empty. Which list a valid + buffer is placed on depends on the use of buf_markaged(), whether it is metadata, and the B_LOCKED flag. A + B_LOCKED buffer will not be available for reuse by other files, though its data may be paged out. + Note that buf_brelse() is intended for use with traditionally allocated buffers. + @param bp The buffer to release. + @retrn void. + */ +void buf_brelse(buf_t); +/*! + @function buf_bread + @abstract Synchronously read a block of a file. + @discussion buf_bread() is the traditional way to read a single logical block of a file through the buffer cache. + It tries to find the buffer and corresponding page(s) in core, calls VNOP_STRATEGY if necessary to bring the data + into memory, and waits for I/O to complete. It should not be used to read blocks of greater than 4K (one VM page) + in size; use cluster routines for large reads. Indeed, the cluster layer is a more efficient choice for reading DATA + unless you need some finely-tuned semantics that it cannot provide. + @param vp The file from which to read. + @param blkno The logical (filesystem) block number to read. + @param size Size of block; do not use for sizes > 4K. + @param cred Credential to store and use for reading from disk if data are not already in core. + @param bpp Destination pointer for buffer. + @return 0 for success, or an error from buf_biowait(). + */ +errno_t buf_bread(vnode_t, daddr64_t, int, kauth_cred_t, buf_t *); + +/*! + @function buf_breadn + @abstract Read a block from a file with read-ahead. + @discussion buf_breadn() reads one block synchronously in the style of buf_bread() and fires + off a specified set of asynchronous reads to improve the likelihood of future cache hits. + It should not be used to read blocks of greater than 4K (one VM page) in size; use cluster + routines for large reads. Indeed, the cluster layer is a more efficient choice for reading DATA + unless you need some finely-tuned semantics that it cannot provide. + @param vp The file from which to read. + @param blkno The logical (filesystem) block number to read synchronously. + @param size Size of block; do not use for sizes > 4K. + @param rablks Array of logical block numbers for asynchronous read-aheads. + @param rasizes Array of block sizes for asynchronous read-aheads, each index corresponding to same index in "rablks." + @param nrablks Number of entries in read-ahead arrays. + @param cred Credential to store and use for reading from disk if data are not already in core. + @param bpp Destination pointer for buffer. + @return 0 for success, or an error from buf_biowait(). + */ +errno_t buf_breadn(vnode_t, daddr64_t, int, daddr64_t *, int *, int, kauth_cred_t, buf_t *); + +/*! + @function buf_meta_bread + @abstract Synchronously read a metadata block of a file. + @discussion buf_meta_bread() is the traditional way to read a single logical block of a file through the buffer cache. + It tries to find the buffer and corresponding page(s) in core, calls VNOP_STRATEGY if necessary to bring the data + into memory, and waits for I/O to complete. It should not be used to read blocks of greater than 4K (one VM page) + in size; use cluster routines for large reads. Reading meta-data through the traditional buffer cache, unlike + reading data, is efficient and encouraged, especially if the blocks being read are significantly smaller than page size. + @param vp The file from which to read. + @param blkno The logical (filesystem) block number to read. + @param size Size of block; do not use for sizes > 4K. + @param cred Credential to store and use for reading from disk if data are not already in core. + @param bpp Destination pointer for buffer. + @return 0 for success, or an error from buf_biowait(). + */ +errno_t buf_meta_bread(vnode_t, daddr64_t, int, kauth_cred_t, buf_t *); + +/*! + @function buf_meta_breadn + @abstract Read a metadata block from a file with read-ahead. + @discussion buf_meta_breadn() reads one block synchronously in the style of buf_meta_bread() and fires + off a specified set of asynchronous reads to improve the likelihood of future cache hits. + It should not be used to read blocks of greater than 4K (one VM page) in size; use cluster + routines for large reads. + @param vp The file from which to read. + @param blkno The logical (filesystem) block number to read synchronously. + @param size Size of block; do not use for sizes > 4K. + @param rablks Array of logical block numbers for asynchronous read-aheads. + @param rasizes Array of block sizes for asynchronous read-aheads, each index corresponding to same index in "rablks." + @param nrablks Number of entries in read-ahead arrays. + @param cred Credential to store and use for reading from disk if data are not already in core. + @param bpp Destination pointer for buffer. + @return 0 for success, or an error from buf_biowait(). + */ +errno_t buf_meta_breadn(vnode_t, daddr64_t, int, daddr64_t *, int *, int, kauth_cred_t, buf_t *); + +/*! + @function minphys + @abstract Adjust a buffer's count to be no more than maximum physical I/O transfer size for the host architecture. + @discussion physio() takes as a parameter a function to bound transfer sizes for each VNOP_STRATEGY() call. minphys() + is a default implementation. It calls buf_setcount() to make the buffer's count the min() of its current count + and the max I/O size for the host architecture. + @param bp The buffer whose byte count to modify. + @return New byte count. + */ u_int minphys(buf_t bp); + +/*! + @function physio + @abstract Perform I/O on a device to/from target memory described by a uio. + @discussion physio() allows I/O directly from a device to user-space memory. It waits + for all I/O to complete before returning. + @param f_strategy Strategy routine to call to initiate I/O. + @param bp Buffer to configure and pass to strategy routine; can be NULL. + @param dev Device on which to perform I/O. + @param flags B_READ or B_WRITE. + @param f_minphys Function which calls buf_setcount() to set a byte count which is suitably + small for the device in question. Returns byte count that has been set (or unchanged) on the buffer. + @param uio UIO describing the I/O operation. + @param blocksize Logical block size for this vnode. + @return 0 for success; EFAULT for an invalid uio; errors from buf_biowait(). + */ int physio(void (*)(buf_t), buf_t, dev_t, int , u_int (*)(buf_t), struct uio *, int ); @@ -458,9 +932,44 @@ int physio(void (*)(buf_t), buf_t, dev_t, int , u_int (*)(buf_t), struct uio *, */ #define BLK_ONLYVALID 0x80000000 -/* timeout is in msecs */ +/*! + @function buf_getblk + @abstract Traditional buffer cache routine to get a buffer corresponding to a logical block in a file. + @discussion buf_getblk() gets a buffer, not necessarily containing valid data, representing a block in a file. + A metadata buffer will be returned with its own zone-allocated storage, managed by the traditional buffer-cache + layer, whereas data buffers will be returned hooked into backing by the UBC (which in fact controls the caching of data). + buf_getblk() first looks for the buffer header in cache; if the buffer is in-core but busy, buf_getblk() will wait for it to become + unbusy, depending on the slpflag and slptimeo parameters. If the buffer is found unbusy and is a metadata buffer, + it must already contain valid data and will be returned directly; data buffers will have a UPL configured to + prepare for interaction with the underlying UBC. If the buffer is found in core, it will be marked as such + and buf_fromcache() will return truth. A buffer is allocated and initialized (but not filled with data) + if none is found in core. buf_bread(), buf_breadn(), buf_meta_bread(), and buf_meta_breadn() all + return buffers obtained with buf_getblk(). + @param vp File for which to get block. + @param blkno Logical block number. + @param size Size of block. + @param slpflag Flag to pass to msleep() while waiting for buffer to become unbusy. + @param slptimeo Time, in milliseconds, to wait for buffer to become unbusy. 0 means to wait indefinitely. + @param operation BLK_READ: want a read buffer. BLK_WRITE: want a write buffer. BLK_META: want a metadata buffer. BLK_ONLYVALID: + only return buffers which are found in core (do not allocate anew), and do not change buffer size. The last remark means + that if a given logical block is found in core with a different size than what is requested, the buffer size will not be modified. + @return Buffer found in core or newly allocated, either containing valid data or ready for I/O. + */ buf_t buf_getblk(vnode_t, daddr64_t, int, int, int, int); + +/*! + @function buf_geteblk + @abstract Get a metadata buffer which is marked invalid and not associated with any vnode. + @discussion A buffer is returned with zone-allocated storage of the specified size, marked B_META and invalid. + It has no vnode and is not visible in the buffer hash. + @param size Size of buffer. + @return Always returns a new buffer. + */ buf_t buf_geteblk(int); +#ifdef KERNEL_PRIVATE +void buf_setfilter(buf_t, void (*)(buf_t, void *), void *, void **, void **); +#endif /* KERNEL_PRIVATE */ + __END_DECLS diff --git a/bsd/sys/buf_internal.h b/bsd/sys/buf_internal.h index bbe22779d..a11222c07 100644 --- a/bsd/sys/buf_internal.h +++ b/bsd/sys/buf_internal.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2007 Apple Inc. All rights reserved. + * Copyright (c) 2000-2008 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -90,15 +90,15 @@ struct buf { LIST_ENTRY(buf) b_vnbufs; /* Buffer's associated vnode. */ TAILQ_ENTRY(buf) b_freelist; /* Free list position if not active. */ int b_timestamp; /* timestamp for queuing operation */ - long b_whichq; /* the free list the buffer belongs to */ - volatile long b_flags; /* B_* flags. */ - volatile long b_lflags; /* BL_BUSY | BL_WANTED flags... protected by buf_mtx */ + int b_whichq; /* the free list the buffer belongs to */ + volatile uint32_t b_flags; /* B_* flags. */ + volatile uint32_t b_lflags; /* BL_BUSY | BL_WANTED flags... protected by buf_mtx */ int b_error; /* errno value. */ - long b_bufsize; /* Allocated buffer size. */ - long b_bcount; /* Valid bytes in buffer. */ - long b_resid; /* Remaining I/O. */ + int b_bufsize; /* Allocated buffer size. */ + int b_bcount; /* Valid bytes in buffer. */ + int b_resid; /* Remaining I/O. */ dev_t b_dev; /* Device associated with buffer. */ - uintptr_t b_datap; /* Memory, superblocks, indirect etc.*/ + uintptr_t b_datap; /* Memory, superblocks, indirect etc.*/ daddr64_t b_lblkno; /* Logical block number. */ daddr64_t b_blkno; /* Underlying physical block number. */ void (*b_iodone)(buf_t, void *); /* Function to call upon completion. */ @@ -109,7 +109,7 @@ struct buf { buf_t b_real_bp; /* used to track bp generated through cluster_bp */ TAILQ_ENTRY(buf) b_act; /* Device driver queue when active */ void * b_drvdata; /* Device driver private use */ - void * b_fsprivate; /* filesystem private use */ + void * b_fsprivate; /* filesystem private use */ void * b_transaction; /* journal private use */ int b_dirtyoff; /* Offset in buffer of dirty region. */ int b_dirtyend; /* Offset of end of dirty region. */ @@ -120,8 +120,8 @@ struct buf { void * b_owner; int b_tag; void * b_lastbrelse; - int b_stackbrelse[6]; - int b_stackgetblk[6]; + void * b_stackbrelse[6]; + void * b_stackgetblk[6]; #endif }; @@ -141,17 +141,23 @@ struct buf { #define BL_WANTED 0x00000002 /* Process wants this buffer. */ #define BL_IOBUF 0x00000004 /* buffer allocated via 'buf_alloc' */ #define BL_CALLDONE 0x00000008 /* callback routine on B_CALL bp has completed */ +#define BL_WANTDEALLOC 0x00000010 /* buffer should be put on empty list when clean */ + +/* + * Parameters for buffer cache garbage collection + */ +#define BUF_STALE_THRESHHOLD 30 /* Collect if untouched in the last 30 seconds */ /* * mask used by buf_flags... these are the readable external flags */ #define BUF_X_RDFLAGS (B_PHYS | B_RAW | B_LOCKED | B_ASYNC | B_READ | B_WRITE | B_PAGEIO |\ - B_META | B_CLUSTER | B_DELWRI | B_FUA | B_PASSIVE) + B_META | B_CLUSTER | B_DELWRI | B_FUA | B_PASSIVE | B_IOSTREAMING | B_THROTTLED_IO) /* * mask used by buf_clearflags/buf_setflags... these are the writable external flags */ #define BUF_X_WRFLAGS (B_PHYS | B_RAW | B_LOCKED | B_ASYNC | B_READ | B_WRITE | B_PAGEIO |\ - B_NOCACHE | B_FUA | B_PASSIVE) + B_NOCACHE | B_FUA | B_PASSIVE | B_IOSTREAMING) /* * These flags are kept in b_flags... access is lockless @@ -181,10 +187,6 @@ struct buf { */ #define B_NEED_IODONE 0x20000000 /* need biodone on the real_bp associated with a cluster_io */ #define B_COMMIT_UPL 0x40000000 /* commit/abort the UPL on I/O success/failure */ -/* - * can we deprecate? - */ -#define B_TAPE 0x80000000 /* Magnetic tape I/O. */ /* Flags to low-level allocation routines. */ @@ -221,8 +223,6 @@ void free_io_buf(buf_t); int allocbuf(struct buf *, int); void bufinit(void) __attribute__((section("__TEXT, initcode"))); -void buf_setfilter(buf_t, void (*)(buf_t, void *), void *, void **, void **); - /* * Flags for buf_acquire */ diff --git a/bsd/sys/cdefs.h b/bsd/sys/cdefs.h index 8cd063f27..59e922bea 100644 --- a/bsd/sys/cdefs.h +++ b/bsd/sys/cdefs.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2008 Apple Inc. All rights reserved. + * Copyright (c) 2000-2009 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -221,7 +221,7 @@ * Apple API extensions in scope. * * Most users will use this compilation environment to avoid - * behavioural differences between 32 and 64 bit code. + * behavioral differences between 32 and 64 bit code. * * LEGACY Defining _NONSTD_SOURCE will get pre-POSIX APIs plus Apple * API extensions in scope. @@ -246,7 +246,7 @@ * * In any compilation environment, for correct symbol resolution to occur, * function prototypes must be in scope. It is recommended that all Apple - * tools users add etiher the "-Wall" or "-Wimplicit-function-declaration" + * tools users add either the "-Wall" or "-Wimplicit-function-declaration" * compiler flags to their projects to be warned when a function is being * used without a prototype in scope. */ @@ -256,6 +256,8 @@ #define __DARWIN_ONLY_64_BIT_INO_T 0 #define __DARWIN_ONLY_UNIX_CONFORMANCE 0 #define __DARWIN_ONLY_VERS_1050 0 +#define __DARWIN_SUF_DARWIN10 "_darwin10" +#define __DARWIN10_ALIAS(sym) __asm("_" __STRING(sym) __DARWIN_SUF_DARWIN10) #else /* !KERNEL */ #ifdef PRODUCT_AppleTV /* Product: AppleTV */ @@ -279,19 +281,19 @@ /* * The __DARWIN_ALIAS macros are used to do symbol renaming; they allow - * legacy code to use the old symbol, thus maintiang binary compatability + * legacy code to use the old symbol, thus maintaining binary compatibility * while new code can use a standards compliant version of the same function. * * __DARWIN_ALIAS is used by itself if the function signature has not * changed, it is used along with a #ifdef check for __DARWIN_UNIX03 - * if the signature has changed. Because the __LP64__ enviroment - * only supports UNIX03 sementics it causes __DARWIN_UNIX03 to be + * if the signature has changed. Because the __LP64__ environment + * only supports UNIX03 semantics it causes __DARWIN_UNIX03 to be * defined, but causes __DARWIN_ALIAS to do no symbol mangling. * * As a special case, when XCode is used to target a specific version of the * OS, the manifest constant __ENVIRONMENT_MAC_OS_X_VERSION_MIN_REQUIRED__ * will be defined by the compiler, with the digits representing major version - * time 100 + minor version times 10 (e.g. 10.5 := 1050). If we are targetting + * time 100 + minor version times 10 (e.g. 10.5 := 1050). If we are targeting * pre-10.5, and it is the default compilation environment, revert the * compilation environment to pre-__DARWIN_UNIX03. */ @@ -343,8 +345,10 @@ # else /* default */ # if __DARWIN_ONLY_64_BIT_INO_T # define __DARWIN_64_BIT_INO_T 1 -# else /* !__DARWIN_ONLY_64_BIT_INO_T */ +# elif defined(__ENVIRONMENT_MAC_OS_X_VERSION_MIN_REQUIRED__) && ((__ENVIRONMENT_MAC_OS_X_VERSION_MIN_REQUIRED__-0) < 1060) || __DARWIN_UNIX03 == 0 # define __DARWIN_64_BIT_INO_T 0 +# else /* default */ +# define __DARWIN_64_BIT_INO_T 1 # endif /* __DARWIN_ONLY_64_BIT_INO_T */ # endif #endif /* !__DARWIN_64_BIT_INO_T */ @@ -354,10 +358,10 @@ # define __DARWIN_VERS_1050 0 # elif __DARWIN_ONLY_VERS_1050 # define __DARWIN_VERS_1050 1 -# elif defined(__ENVIRONMENT_MAC_OS_X_VERSION_MIN_REQUIRED__) && ((__ENVIRONMENT_MAC_OS_X_VERSION_MIN_REQUIRED__-0) >= 1050) -# define __DARWIN_VERS_1050 1 -# else /* default */ +# elif defined(__ENVIRONMENT_MAC_OS_X_VERSION_MIN_REQUIRED__) && ((__ENVIRONMENT_MAC_OS_X_VERSION_MIN_REQUIRED__-0) < 1050) || __DARWIN_UNIX03 == 0 # define __DARWIN_VERS_1050 0 +# else /* default */ +# define __DARWIN_VERS_1050 1 # endif #endif /* !__DARWIN_VERS_1050 */ @@ -431,6 +435,17 @@ #define __DARWIN_EXTSN(sym) __asm("_" __STRING(sym) __DARWIN_SUF_EXTSN) #define __DARWIN_EXTSN_C(sym) __asm("_" __STRING(sym) __DARWIN_SUF_EXTSN __DARWIN_SUF_NON_CANCELABLE) +/* + * symbol release macros + */ +#if defined(__ENVIRONMENT_MAC_OS_X_VERSION_MIN_REQUIRED__) && ((__ENVIRONMENT_MAC_OS_X_VERSION_MIN_REQUIRED__-0) < 1060) +#undef __DARWIN_10_6_AND_LATER +#define __DARWIN_10_6_AND_LATER_ALIAS(x) /* nothing */ +#else /* 10.6 and beyond */ +#define __DARWIN_10_6_AND_LATER +#define __DARWIN_10_6_AND_LATER_ALIAS(x) x +#endif + /* * POSIX.1 requires that the macros we test be defined before any standard @@ -583,4 +598,14 @@ #define _DARWIN_FEATURE_UNIX_CONFORMANCE 3 #endif +/* + * This macro casts away the qualifier from the variable + * + * Note: use at your own risk, removing qualifiers can result in + * catastrophic run-time failures. + */ +#ifndef __CAST_AWAY_QUALIFIER +#define __CAST_AWAY_QUALIFIER(variable, qualifier, type) (type) ((char *)0 + ((qualifier char *)(variable) - (qualifier char *)0) ) +#endif + #endif /* !_CDEFS_H_ */ diff --git a/bsd/sys/codesign.h b/bsd/sys/codesign.h index f8fd235c9..5f36a2c7a 100644 --- a/bsd/sys/codesign.h +++ b/bsd/sys/codesign.h @@ -45,6 +45,7 @@ #define CS_OPS_MARKKILL 3 /* set KILL flag (sticky) */ #define CS_OPS_PIDPATH 4 /* get executable's pathname */ #define CS_OPS_CDHASH 5 /* get code directory hash */ +#define CS_OPS_PIDOFFSET 6 /* get offset of active Mach-o slice */ #ifndef KERNEL diff --git a/bsd/sys/conf.h b/bsd/sys/conf.h index 319dd704e..4cf53a914 100644 --- a/bsd/sys/conf.h +++ b/bsd/sys/conf.h @@ -109,9 +109,6 @@ typedef int stop_fcn_t(struct tty *tp, int rw); typedef int reset_fcn_t(int uban); typedef int select_fcn_t(dev_t dev, int which, void * wql, struct proc *p); typedef int mmap_fcn_t(void); -typedef int getc_fcn_t(dev_t dev); -typedef int putc_fcn_t(dev_t dev, char c); -typedef int d_poll_t(dev_t dev, int events, struct proc *p); #define d_open_t open_close_fcn_t #define d_close_t open_close_fcn_t @@ -123,8 +120,6 @@ typedef int d_poll_t(dev_t dev, int events, struct proc *p); #define d_select_t select_fcn_t #define d_mmap_t mmap_fcn_t #define d_strategy_t strategy_fcn_t -#define d_getc_t getc_fcn_t -#define d_putc_t putc_fcn_t __BEGIN_DECLS int enodev(void); @@ -144,10 +139,11 @@ __END_DECLS #define eno_stop ((stop_fcn_t *)&enodev) #define eno_reset ((reset_fcn_t *)&enodev) #define eno_mmap ((mmap_fcn_t *)&enodev) -#define eno_getc ((getc_fcn_t *)&enodev) -#define eno_putc ((putc_fcn_t *)&enodev) #define eno_select ((select_fcn_t *)&enodev) +/* For source backward compatibility only! */ +#define eno_getc ((void *)&enodev) +#define eno_putc ((void *)&enodev) /* * Block device switch table @@ -186,16 +182,16 @@ struct cdevsw { open_close_fcn_t *d_close; read_write_fcn_t *d_read; read_write_fcn_t *d_write; - ioctl_fcn_t *d_ioctl; - stop_fcn_t *d_stop; - reset_fcn_t *d_reset; + ioctl_fcn_t *d_ioctl; + stop_fcn_t *d_stop; + reset_fcn_t *d_reset; struct tty **d_ttys; select_fcn_t *d_select; - mmap_fcn_t *d_mmap; + mmap_fcn_t *d_mmap; strategy_fcn_t *d_strategy; - getc_fcn_t *d_getc; - putc_fcn_t *d_putc; - int d_type; + void *d_reserved_1; + void *d_reserved_2; + int d_type; }; @@ -287,6 +283,7 @@ int cdevsw_isfree(int); int cdevsw_add(int, struct cdevsw *); int cdevsw_add_with_bdev(int index, struct cdevsw * csw, int bdev); int cdevsw_remove(int, struct cdevsw *); +int isdisk(dev_t, int); __END_DECLS #endif /* KERNEL */ diff --git a/bsd/sys/decmpfs.h b/bsd/sys/decmpfs.h new file mode 100644 index 000000000..72e99ee18 --- /dev/null +++ b/bsd/sys/decmpfs.h @@ -0,0 +1,152 @@ +/* + * Copyright (c) 2008 Apple Inc. All rights reserved. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. The rights granted to you under the License + * may not be used to create, or enable the creation or redistribution of, + * unlawful or unlicensed copies of an Apple operating system, or to + * circumvent, violate, or enable the circumvention or violation of, any + * terms of an Apple operating system software license agreement. + * + * Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ + */ +#ifndef _SYS_DECMPFS_H_ +#define _SYS_DECMPFS_H_ 1 + +#define MAX_DECMPFS_XATTR_SIZE 3802 + +/* + NOTE: decmpfs can only be used by thread-safe filesystems + */ + +#define DECMPFS_MAGIC 0x636d7066 /* cmpf */ + +#define DECMPFS_XATTR_NAME "com.apple.decmpfs" /* extended attribute to use for decmpfs */ + +typedef struct __attribute__((packed)) { + /* this structure represents the xattr on disk; the fields below are little-endian */ + uint32_t compression_magic; + uint32_t compression_type; /* see the enum below */ + uint64_t uncompressed_size; + unsigned char attr_bytes[0]; /* the bytes of the attribute after the header */ +} decmpfs_disk_header; + +typedef struct __attribute__((packed)) { + /* this structure represents the xattr in memory; the fields below are host-endian */ + uint32_t attr_size; + uint32_t compression_magic; + uint32_t compression_type; + uint64_t uncompressed_size; + unsigned char attr_bytes[0]; /* the bytes of the attribute after the header */ +} decmpfs_header; + +/* compression_type values */ +enum { + CMP_Type1 = 1, /* uncompressed data in xattr */ + + /* additional types defined in AppleFSCompression project */ + + CMP_MAX = 255 +}; + +typedef struct { + void *buf; + user_ssize_t size; +} decmpfs_vector; + +#if KERNEL + +#include + +#if defined(__i386__) || defined(__x86_64__) +#define DECMPFS_SUPPORTS_SWAP64 1 +/* otherwise, no OSCompareAndSwap64, so use a mutex */ +#endif + +typedef struct decmpfs_cnode { + uint8_t cmp_state; + uint8_t cmp_minimal_xattr; /* if non-zero, this file's com.apple.decmpfs xattr contained only the minimal decmpfs_disk_header */ + uint32_t cmp_type; + uint32_t lockcount; + void *lockowner; /* cnode's lock owner (if a thread is currently holding an exclusive lock) */ + uint64_t uncompressed_size; + lck_rw_t compressed_data_lock; +#if !DECMPFS_SUPPORTS_SWAP64 + /* we need a lock since we can't atomically fetch/set 64 bits */ + lck_mtx_t uncompressed_size_mtx; +#endif /* !DECMPFS_SUPPORTS_SWAP64 */ +} decmpfs_cnode; + +/* return values from decmpfs_file_is_compressed */ +enum { + FILE_TYPE_UNKNOWN = 0, + FILE_IS_NOT_COMPRESSED = 1, + FILE_IS_COMPRESSED = 2, + FILE_IS_CONVERTING = 3 /* file is converting from compressed to decompressed */ +}; + +/* vfs entrypoints */ +extern vfs_context_t decmpfs_ctx; + +/* client filesystem entrypoints */ +void decmpfs_init(void); +void decmpfs_cnode_init(decmpfs_cnode *cp); +void decmpfs_cnode_destroy(decmpfs_cnode *cp); + +int decmpfs_hides_rsrc(vfs_context_t ctx, decmpfs_cnode *cp); +int decmpfs_hides_xattr(vfs_context_t ctx, decmpfs_cnode *cp, const char *xattr); + +boolean_t decmpfs_trylock_compressed_data(decmpfs_cnode *cp, int exclusive); +void decmpfs_lock_compressed_data(decmpfs_cnode *cp, int exclusive); +void decmpfs_unlock_compressed_data(decmpfs_cnode *cp, int exclusive); + +uint32_t decmpfs_cnode_get_vnode_state(decmpfs_cnode *cp); +void decmpfs_cnode_set_vnode_state(decmpfs_cnode *cp, uint32_t state, int skiplock); +uint64_t decmpfs_cnode_get_vnode_cached_size(decmpfs_cnode *cp); + +int decmpfs_file_is_compressed(vnode_t vp, decmpfs_cnode *cp); +errno_t decmpfs_validate_compressed_file(vnode_t vp, decmpfs_cnode *cp); +int decmpfs_decompress_file(vnode_t vp, decmpfs_cnode *cp, off_t toSize, int truncate_okay, int skiplock); /* if toSize == -1, decompress the entire file */ +int decmpfs_free_compressed_data(vnode_t vp, decmpfs_cnode *cp); +int decmpfs_update_attributes(vnode_t vp, struct vnode_attr *vap); +/* the following two routines will set *is_compressed to 0 if the file was converted from compressed to decompressed before data could be fetched from the decompressor */ +errno_t decmpfs_pagein_compressed(struct vnop_pagein_args *ap, int *is_compressed, decmpfs_cnode *cp); +errno_t decmpfs_read_compressed(struct vnop_read_args *ap, int *is_compressed, decmpfs_cnode *cp); + +/* types shared between the kernel and kexts */ +typedef int (*decmpfs_validate_compressed_file_func)(vnode_t vp, vfs_context_t ctx, decmpfs_header *hdr); +typedef void (*decmpfs_adjust_fetch_region_func)(vnode_t vp, vfs_context_t ctx, decmpfs_header *hdr, off_t *offset, user_ssize_t *size); +typedef int (*decmpfs_fetch_uncompressed_data_func)(vnode_t vp, vfs_context_t ctx, decmpfs_header *hdr, off_t offset, user_ssize_t size, int nvec, decmpfs_vector *vec, uint64_t *bytes_read); +typedef int (*decmpfs_free_compressed_data_func)(vnode_t vp, vfs_context_t ctx, decmpfs_header *hdr); + +#define DECMPFS_REGISTRATION_VERSION 1 +typedef struct { + int decmpfs_registration; + decmpfs_validate_compressed_file_func validate; + decmpfs_adjust_fetch_region_func adjust_fetch; + decmpfs_fetch_uncompressed_data_func fetch; + decmpfs_free_compressed_data_func free_data; +} decmpfs_registration; + +/* hooks for kexts to call */ +errno_t register_decmpfs_decompressor(uint32_t compression_type, decmpfs_registration *registration); +errno_t unregister_decmpfs_decompressor(uint32_t compression_type, decmpfs_registration *registration); + +#endif /* KERNEL */ + +#endif /* _SYS_DECMPFS_H_ */ diff --git a/bsd/sys/dir.h b/bsd/sys/dir.h index 921ce9526..e6410375b 100644 --- a/bsd/sys/dir.h +++ b/bsd/sys/dir.h @@ -88,6 +88,6 @@ */ #undef DIRSIZ #define DIRSIZ(dp) \ - ((sizeof (struct direct) - (MAXNAMLEN+1)) + (((dp)->d_namlen+1 + 3) &~ 3)) + (((unsigned long)&((struct direct *)0)->d_name + (dp)->d_namlen+1 + 3) & ~3) #endif /* !_SYS_DIR_H_ */ diff --git a/bsd/sys/dirent.h b/bsd/sys/dirent.h index bef3ffb23..a559b8b37 100644 --- a/bsd/sys/dirent.h +++ b/bsd/sys/dirent.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2007 Apple Inc. All rights reserved. + * Copyright (c) 2000-2008 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -62,14 +62,14 @@ */ /* - * The dirent structure defines the format of directory entries returned by - * the getdirentries(2) system call. + * The dirent structure defines the format of directory entries. * * A directory entry has a struct dirent at the front of it, containing its * inode number, the length of the entry, and the length of the name * contained in the entry. These are followed by the name padded to a 4 * byte boundary with null bytes. All names are guaranteed null terminated. - * The maximum length of a name in a directory is MAXNAMLEN. + * The maximum length of a name in a directory is MAXNAMLEN when 32-bit + * ino_t is in effect; (MAXPATHLEN - 1) when 64-bit ino_t is in effect. */ #ifndef _SYS_DIRENT_H diff --git a/bsd/sys/disk.h b/bsd/sys/disk.h index 348253107..0a3da6952 100644 --- a/bsd/sys/disk.h +++ b/bsd/sys/disk.h @@ -67,6 +67,7 @@ * DKIOCGETMAXSEGMENTADDRESSABLEBITCOUNT get maximum segment width in bits * * DKIOCGETPHYSICALBLOCKSIZE get device's block size + * DKIOCGETCOMMANDPOOLSIZE get device's queue depth */ typedef struct @@ -132,6 +133,7 @@ typedef struct #define DKIOCGETMAXSEGMENTADDRESSABLEBITCOUNT _IOR('d', 75, uint64_t) #define DKIOCGETPHYSICALBLOCKSIZE _IOR('d', 77, uint32_t) +#define DKIOCGETCOMMANDPOOLSIZE _IOR('d', 78, uint32_t) #ifdef KERNEL #define DK_FEATURE_DISCARD 0x00000010 diff --git a/bsd/sys/domain.h b/bsd/sys/domain.h index 9d4fd75b5..9edf0db5e 100644 --- a/bsd/sys/domain.h +++ b/bsd/sys/domain.h @@ -107,8 +107,8 @@ struct domain { #else void *dom_mtx; /* domain global mutex */ #endif - u_long dom_flags; - u_long reserved[2]; + uint32_t dom_flags; + uint32_t reserved[2]; }; #pragma pack() diff --git a/bsd/sys/dtrace.h b/bsd/sys/dtrace.h index ebe0e4f49..6d9c6976a 100644 --- a/bsd/sys/dtrace.h +++ b/bsd/sys/dtrace.h @@ -20,14 +20,14 @@ */ /* - * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ #ifndef _SYS_DTRACE_H #define _SYS_DTRACE_H -/* #pragma ident "@(#)dtrace.h 1.32 06/08/07 SMI" */ +/* #pragma ident "@(#)dtrace.h 1.37 07/06/05 SMI" */ #ifdef __cplusplus extern "C" { @@ -56,6 +56,16 @@ extern "C" { #include #else /* is Apple Mac OS X */ +#if defined(__LP64__) +#if !defined(_LP64) +#define _LP64 /* Solaris vs. Darwin */ +#endif +#else +#if !defined(_ILP32) +#define _ILP32 /* Solaris vs. Darwin */ +#endif +#endif + #ifdef KERNEL #ifndef _KERNEL #define _KERNEL /* Solaris vs. Darwin */ @@ -90,12 +100,17 @@ extern "C" { #define P2ROUNDUP(x, align) (-(-(x) & -(align))) #define CTF_MODEL_ILP32 1 /* object data model is ILP32 */ +#define CTF_MODEL_LP64 2 /* object data model is LP64 */ +#ifdef __LP64__ +#define CTF_MODEL_NATIVE CTF_MODEL_LP64 +#else #define CTF_MODEL_NATIVE CTF_MODEL_ILP32 +#endif typedef uint8_t uchar_t; typedef uint16_t ushort_t; typedef uint32_t uint_t; -typedef uint32_t ulong_t; +typedef unsigned long ulong_t; typedef uint64_t u_longlong_t; typedef int64_t longlong_t; typedef int64_t off64_t; @@ -124,7 +139,8 @@ typedef uint32_t zoneid_t; #include typedef va_list __va_list; -#define proc_t struct proc +/* Solaris proc_t is the struct. Darwin's proc_t is a pointer to it. */ +#define proc_t struct proc /* Steer clear of the Darwin typedef for proc_t */ #endif /* __APPLE__ */ /* @@ -310,6 +326,10 @@ typedef enum dtrace_probespec { #define DIF_VAR_UID 0x011e /* process user ID */ #define DIF_VAR_GID 0x011f /* process group ID */ #define DIF_VAR_ERRNO 0x0120 /* thread errno */ +#if defined(__APPLE__) +#define DIF_VAR_PTHREAD_SELF 0x0200 /* Apple specific PTHREAD_SELF (Not currently supported!) */ +#define DIF_VAR_DISPATCHQADDR 0x0201 /* Apple specific dispatch queue addr */ +#endif /* __APPLE __ */ #define DIF_SUBR_RAND 0 #define DIF_SUBR_MUTEX_OWNED 1 @@ -346,9 +366,23 @@ typedef enum dtrace_probespec { #define DIF_SUBR_SUBSTR 32 #define DIF_SUBR_INDEX 33 #define DIF_SUBR_RINDEX 34 -#define DIF_SUBR_CHUD 35 +#define DIF_SUBR_HTONS 35 +#define DIF_SUBR_HTONL 36 +#define DIF_SUBR_HTONLL 37 +#define DIF_SUBR_NTOHS 38 +#define DIF_SUBR_NTOHL 39 +#define DIF_SUBR_NTOHLL 40 +#define DIF_SUBR_INET_NTOP 41 +#define DIF_SUBR_INET_NTOA 42 +#define DIF_SUBR_INET_NTOA6 43 +#if !defined(__APPLE__) -#define DIF_SUBR_MAX 35 /* max subroutine value */ +#define DIF_SUBR_MAX 43 /* max subroutine value */ +#else +#define DIF_SUBR_COREPROFILE 44 + +#define DIF_SUBR_MAX 44 /* max subroutine value */ +#endif /* __APPLE__ */ typedef uint32_t dif_instr_t; @@ -458,6 +492,10 @@ typedef struct dtrace_difv { #define DTRACEACT_PRINTA 4 /* printa() action */ #define DTRACEACT_LIBACT 5 /* library-controlled action */ +#if defined(__APPLE__) +#define DTRACEACT_APPLEBINARY 50 /* Apple DT perf. tool action */ +#endif /* __APPLE__ */ + #define DTRACEACT_PROC 0x0100 #define DTRACEACT_USTACK (DTRACEACT_PROC + 1) #define DTRACEACT_JSTACK (DTRACEACT_PROC + 2) @@ -526,10 +564,17 @@ typedef struct dtrace_difv { #define DTRACEACT_ISAGG(x) \ (DTRACEACT_CLASS(x) == DTRACEACT_AGGREGATION) +#if !defined(__APPLE__) /* Quiet compiler warning. */ #define DTRACE_QUANTIZE_NBUCKETS \ (((sizeof (uint64_t) * NBBY) - 1) * 2 + 1) #define DTRACE_QUANTIZE_ZEROBUCKET ((sizeof (uint64_t) * NBBY) - 1) +#else +#define DTRACE_QUANTIZE_NBUCKETS \ + (int)(((sizeof (uint64_t) * NBBY) - 1) * 2 + 1) + +#define DTRACE_QUANTIZE_ZEROBUCKET (int64_t)((sizeof (uint64_t) * NBBY) - 1) +#endif /* __APPLE __*/ #define DTRACE_QUANTIZE_BUCKETVAL(buck) \ (int64_t)((buck) < DTRACE_QUANTIZE_ZEROBUCKET ? \ @@ -561,6 +606,8 @@ typedef struct dtrace_difv { #define DTRACE_USTACK_ARG(x, y) \ ((((uint64_t)(y)) << 32) | ((x) & UINT32_MAX)) +#if !defined(__APPLE__) + #ifndef _LP64 #ifndef _LITTLE_ENDIAN #define DTRACE_PTR(type, name) uint32_t name##pad; type *name @@ -571,6 +618,16 @@ typedef struct dtrace_difv { #define DTRACE_PTR(type, name) type *name #endif +#else + +#ifndef _LP64 +#define DTRACE_PTR(type, name) user_addr_t name +#else +#define DTRACE_PTR(type, name) type *name +#endif + +#endif /* __APPLE__ */ + /* * DTrace Object Format (DOF) * @@ -680,8 +737,12 @@ typedef struct dof_hdr { #define DOF_VERSION_1 1 /* DOF version 1: Solaris 10 FCS */ #define DOF_VERSION_2 2 /* DOF version 2: Solaris Express 6/06 */ +#if !defined(__APPLE__) +#define DOF_VERSION DOF_VERSION_2 /* Latest DOF version */ +#else #define DOF_VERSION_3 3 /* DOF version 3: Minimum version for Leopard */ #define DOF_VERSION DOF_VERSION_3 /* Latest DOF version */ +#endif /* __APPLE__ */ #define DOF_FL_VALID 0 /* mask of all valid dofh_flags bits */ @@ -1036,12 +1097,12 @@ typedef struct dtrace_fmtdesc { #define DTRACEOPT_AGGSORTREV 24 /* reverse-sort aggregations */ #define DTRACEOPT_AGGSORTPOS 25 /* agg. position to sort on */ #define DTRACEOPT_AGGSORTKEYPOS 26 /* agg. key position to sort on */ -#if defined(__APPLE__) +#if !defined(__APPLE__) +#define DTRACEOPT_MAX 27 /* number of options */ +#else #define DTRACEOPT_STACKSYMBOLS 27 /* clear to prevent stack symbolication */ #define DTRACEOPT_MAX 28 /* number of options */ -#else -#define DTRACEOPT_MAX 27 /* number of options */ -#endif +#endif /* __APPLE__ */ #define DTRACEOPT_UNSET (dtrace_optval_t)-2 /* unset option */ @@ -1141,6 +1202,7 @@ typedef struct dtrace_conf { #define DTRACEFLT_KPRIV 6 /* Illegal kernel access */ #define DTRACEFLT_UPRIV 7 /* Illegal user access */ #define DTRACEFLT_TUPOFLOW 8 /* Tuple stack overflow */ +#define DTRACEFLT_BADSTACK 9 /* Bad stack */ #define DTRACEFLT_LIBRARY 1000 /* Library-level fault */ @@ -1393,7 +1455,11 @@ typedef struct dof_ioctl_data { #endif #define DTRACEMNR_DTRACE "dtrace" /* node for DTrace ops */ +#if !defined(__APPLE__) +#define DTRACEMNR_HELPER "helper" /* node for helpers */ +#else #define DTRACEMNR_HELPER "dtracehelper" /* node for helpers */ +#endif /* __APPLE__ */ #define DTRACEMNRN_DTRACE 0 /* minor for DTrace ops */ #define DTRACEMNRN_HELPER 1 /* minor for helpers */ #define DTRACEMNRN_CLONE 2 /* first clone minor */ @@ -2285,6 +2351,12 @@ extern void dtrace_vtime_disable_tnf(void); extern void dtrace_vtime_enable(void); extern void dtrace_vtime_disable(void); +#if !defined(__APPLE__) +struct regs; + +extern int (*dtrace_pid_probe_ptr)(struct regs *); +extern int (*dtrace_return_probe_ptr)(struct regs *); +#else #if defined (__ppc__) || defined (__ppc64__) extern int (*dtrace_pid_probe_ptr)(ppc_saved_state_t *regs); extern int (*dtrace_return_probe_ptr)(ppc_saved_state_t* regs); @@ -2294,7 +2366,7 @@ extern int (*dtrace_return_probe_ptr)(x86_saved_state_t* regs); #else #error architecture not supported #endif - +#endif /* __APPLE__ */ extern void (*dtrace_fasttrap_fork_ptr)(proc_t *, proc_t *); extern void (*dtrace_fasttrap_exec_ptr)(proc_t *); extern void (*dtrace_fasttrap_exit_ptr)(proc_t *); @@ -2334,7 +2406,10 @@ extern void dtrace_panic(const char *, ...); extern int dtrace_safe_defer_signal(void); extern void dtrace_safe_synchronous_signal(void); -#if defined(__i386__) || defined(__x86_64__) +extern int dtrace_mach_aframes(void); + +#if !defined(__APPLE__) +#if defined(__i386) || defined(__amd64) extern int dtrace_instr_size(uchar_t *instr); extern int dtrace_instr_size_isa(uchar_t *, model_t, int *); extern void dtrace_invop_add(int (*)(uintptr_t, uintptr_t *, uintptr_t)); @@ -2346,8 +2421,16 @@ extern void dtrace_invop_callsite(void); extern int dtrace_blksuword32(uintptr_t, uint32_t *, int); extern void dtrace_getfsr(uint64_t *); #endif +#else +#if defined(__i386__) || defined(__x86_64__) +extern int dtrace_instr_size(uchar_t *instr); +extern int dtrace_instr_size_isa(uchar_t *, model_t, int *); +extern void dtrace_invop_add(int (*)(uintptr_t, uintptr_t *, uintptr_t)); +extern void dtrace_invop_remove(int (*)(uintptr_t, uintptr_t *, uintptr_t)); +extern void dtrace_invop_callsite(void); +#endif -#if defined(__APPLE__) + #if defined (__ppc__) || defined (__ppc64__) extern void dtrace_invop_add(int (*)(uintptr_t, uintptr_t *, uintptr_t)); extern void dtrace_invop_remove(int (*)(uintptr_t, uintptr_t *, uintptr_t)); @@ -2368,6 +2451,17 @@ extern void dtrace_invop_remove(int (*)(uintptr_t, uintptr_t *, uintptr_t)); #endif /* _ASM */ +#if !defined(__APPLE__) +#if defined(__i386) || defined(__amd64) + +#define DTRACE_INVOP_PUSHL_EBP 1 +#define DTRACE_INVOP_POPL_EBP 2 +#define DTRACE_INVOP_LEAVE 3 +#define DTRACE_INVOP_NOP 4 +#define DTRACE_INVOP_RET 5 + +#endif +#else #if defined(__i386__) || defined(__x86_64__) #define DTRACE_INVOP_PUSHL_EBP 1 @@ -2378,7 +2472,6 @@ extern void dtrace_invop_remove(int (*)(uintptr_t, uintptr_t *, uintptr_t)); #endif -#if defined(__APPLE__) #if defined (__ppc__) || defined (__ppc64__) #define DTRACE_INVOP_NOP 4 #define DTRACE_INVOP_RET 5 diff --git a/bsd/sys/dtrace_glue.h b/bsd/sys/dtrace_glue.h index b6f9c2cd2..5612fe80c 100644 --- a/bsd/sys/dtrace_glue.h +++ b/bsd/sys/dtrace_glue.h @@ -43,12 +43,6 @@ #include #include -#ifdef QUIET_PLEASE - #ifndef NULL - #define NULL ((void *)0) /* quiets many warnings */ - #endif -#endif - /* * cmn_err */ @@ -64,8 +58,8 @@ extern void cmn_err( int, const char *, ... ); * pid/proc */ -typedef struct proc SUN_PROC_T; /* Solaris proc_t is the struct. Darwin's proc_t is a pointer to it. */ -#define proc_t SUN_PROC_T /* replace all the original uses of (Solaris) proc_t */ +/* Solaris proc_t is the struct. Darwin's proc_t is a pointer to it. */ +#define proc_t struct proc /* Steer clear of the Darwin typedef for proc_t */ #define curproc ((struct proc *)current_proc()) /* Called from probe context, must blacklist */ proc_t* sprlock(pid_t pid); @@ -126,19 +120,18 @@ extern cpu_t *cpu_list; * the structure is sized to avoid false sharing. */ #define CPU_CACHE_COHERENCE_SIZE 64 -#define CPUC_SIZE (sizeof (uint16_t)) -#define CPUC_PADSIZE CPU_CACHE_COHERENCE_SIZE - CPUC_SIZE typedef struct cpu_core { - uint16_t cpuc_dtrace_flags; /* DTrace flags */ - uint8_t cpuc_pad[CPUC_PADSIZE]; /* padding */ uint64_t cpuc_dtrace_illval; /* DTrace illegal value */ lck_mtx_t cpuc_pid_lock; /* DTrace pid provider lock */ + uint16_t cpuc_dtrace_flags; /* DTrace flags */ + uint64_t cpuc_missing_tos; /* Addr. of top most stack frame if missing */ + uint8_t cpuc_pad[CPU_CACHE_COHERENCE_SIZE - sizeof(uint64_t) - sizeof(lck_mtx_t) - sizeof(uint16_t) - sizeof(uint64_t) ]; /* padding */ } cpu_core_t; -extern cpu_core_t *cpu_core; /* XXX TLB lockdown? */ +extern cpu_core_t *cpu_core; extern unsigned int real_ncpus; -extern int cpu_number(void); /* XXX #include . Called from probe context, must blacklist. */ +extern int cpu_number(void); /* From #include . Called from probe context, must blacklist. */ #define CPU (&(cpu_list[cpu_number()])) /* Pointer to current CPU */ #define CPU_ON_INTR(cpup) ml_at_interrupt_context() /* always invoked on current cpu */ @@ -185,11 +178,13 @@ extern void unregister_cpu_setup_func(cpu_setup_func_t *, void *); #endif #define CPU_DTRACE_USTACK_FP 0x0400 /* pid provider hint to ustack() */ #define CPU_DTRACE_ENTRY 0x0800 /* pid provider hint to ustack() */ +#define CPU_DTRACE_BADSTACK 0x1000 /* DTrace fault: bad stack */ #define CPU_DTRACE_FAULT (CPU_DTRACE_BADADDR | CPU_DTRACE_BADALIGN | \ CPU_DTRACE_DIVZERO | CPU_DTRACE_ILLOP | \ CPU_DTRACE_NOSCRATCH | CPU_DTRACE_KPRIV | \ - CPU_DTRACE_UPRIV | CPU_DTRACE_TUPOFLOW) + CPU_DTRACE_UPRIV | CPU_DTRACE_TUPOFLOW | \ + CPU_DTRACE_BADSTACK) #define CPU_DTRACE_ERROR (CPU_DTRACE_FAULT | CPU_DTRACE_DROP) /* @@ -313,7 +308,7 @@ extern void ddi_soft_state_fini(void **); int ddi_getprop(dev_t dev, dev_info_t *dip, int flags, const char *name, int defvalue); extern int ddi_prop_free(void *); -extern int ddi_prop_lookup_int_array(dev_t, dev_info_t *, uint_t, char *, int **, uint_t *); +extern int ddi_prop_lookup_int_array(dev_t, dev_info_t *, uint_t, const char *, int **, uint_t *); extern int ddi_driver_major(dev_info_t *); @@ -323,7 +318,6 @@ extern void ddi_remove_minor_node(dev_info_t *, char *); extern major_t getemajor(dev_t); extern minor_t getminor(dev_t); -extern int _dtrace_dev; extern dev_t makedevice(major_t, minor_t); /* @@ -383,7 +377,7 @@ extern void *dt_kmem_zalloc_aligned(size_t, size_t, int); extern void dt_kmem_free_aligned(void*, size_t); extern kmem_cache_t * -kmem_cache_create(char *, size_t, size_t, int (*)(void *, void *, int), +kmem_cache_create(const char *, size_t, size_t, int (*)(void *, void *, int), void (*)(void *, void *), void (*)(void *), void *, vmem_t *, int); extern void *kmem_cache_alloc(kmem_cache_t *, int); extern void kmem_cache_free(kmem_cache_t *, void *); @@ -399,7 +393,9 @@ typedef struct _kthread kthread_t; /* For dtrace_vtime_switch(), dtrace_panicked * Loadable Modules */ +#if 0 /* kmod_lock has been removed */ decl_simple_lock_data(extern,kmod_lock) +#endif /* 0 */ /* Want to use Darwin's kmod_info in place of the Solaris modctl. Can't typedef since the (many) usages in the code are "struct modctl *" */ @@ -468,9 +464,25 @@ extern void vmem_free(vmem_t *vmp, void *vaddr, size_t size); static inline void atomic_add_32( uint32_t *theValue, int32_t theAmount ) { - (void)OSAddAtomic( theAmount, (SInt32 *)theValue ); + (void)OSAddAtomic( theAmount, theValue ); } +#if defined(__i386__) || defined(__x86_64__) +static inline void atomic_add_64( uint64_t *theValue, int64_t theAmount ) +{ + (void)OSAddAtomic64( theAmount, (SInt64 *)theValue ); +} +#elif defined(__ppc__) +static inline void atomic_add_64( uint64_t *theValue, int64_t theAmount ) +{ + // FIXME + // atomic_add_64() is at present only called from fasttrap.c to increment + // or decrement a 64bit counter. Narrow to 32bits since ppc32 (G4) has + // no convenient 64bit atomic op. + (void)OSAddAtomic( (int32_t)theAmount, &(((SInt32 *)theValue)[1])); +} +#endif + /* * Miscellaneous */ @@ -480,7 +492,6 @@ typedef uintptr_t greg_t; /* For dtrace_impl.h prototype of dtrace_getfp() */ extern struct regs *find_user_regs( thread_t thread); extern vm_offset_t dtrace_get_cpu_int_stack_top(void); extern vm_offset_t max_valid_stack_address(void); /* kern/thread.h */ -extern ppnum_t pmap_find_phys(pmap_t pmap, addr64_t va); /* machine/pmap.h */ extern volatile int panicwait; /* kern/debug.c */ #define panic_quiesce (panicwait) @@ -491,14 +502,27 @@ extern void delay( int ); /* kern/clock.h */ extern int vuprintf(const char *, va_list); -extern boolean_t dtxnu_is_RAM_page(ppnum_t); - extern hrtime_t dtrace_abs_to_nano(uint64_t); -__private_extern__ char * strstr(const char *, const char *); +__private_extern__ const char * strstr(const char *, const char *); #undef proc_t +/* + * Safe counted string compare against a literal string. The sizeof() intentionally + * counts the trailing NUL, and so ensures that all the characters in the literal + * can participate in the comparison. + */ +#define LIT_STRNEQL(s1, lit_s2) (0 == strncmp( (s1), (lit_s2), sizeof((lit_s2)) )) + +/* + * Safe counted string compare of a literal against the beginning of a string. Here + * the sizeof() is reduced by 1 so that the trailing null of the literal does not + * participate in the comparison. + */ +#define LIT_STRNSTART(s1, lit_s2) (0 == strncmp( (s1), (lit_s2), sizeof((lit_s2)) - 1 )) + +#define KERNELBASE VM_MIN_KERNEL_ADDRESS #endif /* KERNEL_BUILD */ #endif /* _DTRACE_GLUE_H */ diff --git a/bsd/sys/dtrace_impl.h b/bsd/sys/dtrace_impl.h index 4d75ea53d..4ef2ef655 100644 --- a/bsd/sys/dtrace_impl.h +++ b/bsd/sys/dtrace_impl.h @@ -20,14 +20,14 @@ */ /* - * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ #ifndef _SYS_DTRACE_IMPL_H #define _SYS_DTRACE_IMPL_H -/* #pragma ident "@(#)dtrace_impl.h 1.21 06/05/19 SMI" */ +/* #pragma ident "@(#)dtrace_impl.h 1.23 07/02/16 SMI" */ #ifdef __cplusplus extern "C" { @@ -910,6 +910,8 @@ typedef struct dtrace_mstate { int dtms_ipl; /* cached interrupt pri lev */ int dtms_fltoffs; /* faulting DIFO offset */ uintptr_t dtms_strtok; /* saved strtok() pointer */ + uint32_t dtms_access; /* memory access rights */ + dtrace_difo_t *dtms_difo; /* current dif object */ } dtrace_mstate_t; #define DTRACE_COND_OWNER 0x1 @@ -918,6 +920,12 @@ typedef struct dtrace_mstate { #define DTRACE_PROBEKEY_MAXDEPTH 8 /* max glob recursion depth */ +/* + * Access flag used by dtrace_mstate.dtms_access. + */ +#define DTRACE_ACCESS_KERNEL 0x1 /* the priv to read kmem */ + + /* * DTrace Activity * @@ -970,6 +978,7 @@ typedef enum dtrace_activity { DTRACE_ACTIVITY_KILLED /* killed */ } dtrace_activity_t; +#if defined(__APPLE__) /* * DTrace dof modes * @@ -999,6 +1008,7 @@ typedef enum dtrace_activity { #define DTRACE_DOF_MODE_LAZY_ON 1 #define DTRACE_DOF_MODE_LAZY_OFF 2 #define DTRACE_DOF_MODE_NON_LAZY 3 +#endif /* __APPLE__ */ /* * DTrace Helper Implementation @@ -1222,7 +1232,7 @@ typedef struct dtrace_anon { /* * DTrace Error Debugging */ -#ifdef DEBUG +#if DEBUG #define DTRACE_ERRDEBUG #endif @@ -1259,21 +1269,21 @@ typedef struct dtrace_toxrange { } dtrace_toxrange_t; extern uint64_t dtrace_getarg(int, int); -extern greg_t dtrace_getfp(void); extern int dtrace_getipl(void); extern uintptr_t dtrace_caller(int); extern uint32_t dtrace_cas32(uint32_t *, uint32_t, uint32_t); extern void *dtrace_casptr(void *, void *, void *); #if !defined(__APPLE__) -extern void dtrace_copyin(uintptr_t, uintptr_t, size_t); -extern void dtrace_copyinstr(uintptr_t, uintptr_t, size_t); -extern void dtrace_copyout(uintptr_t, uintptr_t, size_t); -extern void dtrace_copyoutstr(uintptr_t, uintptr_t, size_t); +extern void dtrace_copyin(uintptr_t, uintptr_t, size_t, volatile uint16_t *); +extern void dtrace_copyinstr(uintptr_t, uintptr_t, size_t, volatile uint16_t *); +extern void dtrace_copyout(uintptr_t, uintptr_t, size_t, volatile uint16_t *); +extern void dtrace_copyoutstr(uintptr_t, uintptr_t, size_t, + volatile uint16_t *); #else -extern void dtrace_copyin(user_addr_t, uintptr_t, size_t); -extern void dtrace_copyinstr(user_addr_t, uintptr_t, size_t); -extern void dtrace_copyout(uintptr_t, user_addr_t, size_t); -extern void dtrace_copyoutstr(uintptr_t, user_addr_t, size_t); +extern void dtrace_copyin(user_addr_t, uintptr_t, size_t, volatile uint16_t *); +extern void dtrace_copyinstr(user_addr_t, uintptr_t, size_t, volatile uint16_t *); +extern void dtrace_copyout(uintptr_t, user_addr_t, size_t, volatile uint16_t *); +extern void dtrace_copyoutstr(uintptr_t, user_addr_t, size_t, volatile uint16_t *); #endif /* __APPLE__ */ extern void dtrace_getpcstack(pc_t *, int, int, uint32_t *); #if !defined(__APPLE__) @@ -1312,7 +1322,7 @@ extern uint_t dtrace_getotherwin(void); extern uint_t dtrace_getfprs(void); #else extern void dtrace_copy(uintptr_t, uintptr_t, size_t); -extern void dtrace_copystr(uintptr_t, uintptr_t, size_t); +extern void dtrace_copystr(uintptr_t, uintptr_t, size_t, volatile uint16_t *); #endif /* @@ -1328,7 +1338,7 @@ extern void dtrace_copystr(uintptr_t, uintptr_t, size_t); * ASSERT.) */ #undef ASSERT -#ifdef DEBUG +#if DEBUG #define ASSERT(EX) ((void)((EX) || \ dtrace_assfail(#EX, __FILE__, __LINE__))) #else diff --git a/bsd/sys/errno.h b/bsd/sys/errno.h index ec42fa74d..54d5d0371 100644 --- a/bsd/sys/errno.h +++ b/bsd/sys/errno.h @@ -257,7 +257,13 @@ __END_DECLS /* pseudo-errors returned inside kernel to modify return to process */ #define ERESTART (-1) /* restart syscall */ #define EJUSTRETURN (-2) /* don't modify regs, just return */ + +#ifdef BSD_KERNEL_PRIVATE #define ERECYCLE (-5) /* restart lookup under heavy vnode pressure/recycling */ #define EREDRIVEOPEN (-6) +#else /* BSD_KERNEL_PRIVATE */ +/* -5 and -6 are reserved for kernel internal use */ +#endif /* BSD_KERNEL_PRIVATE */ + #endif #endif /* _SYS_ERRNO_H_ */ diff --git a/bsd/sys/event.h b/bsd/sys/event.h index d08051ca2..abbd60045 100644 --- a/bsd/sys/event.h +++ b/bsd/sys/event.h @@ -67,42 +67,59 @@ #define EVFILT_PROC (-5) /* attached to struct proc */ #define EVFILT_SIGNAL (-6) /* attached to struct proc */ #define EVFILT_TIMER (-7) /* timers */ -#define EVFILT_MACHPORT (-8) /* Mach ports */ +#define EVFILT_MACHPORT (-8) /* Mach portsets */ #define EVFILT_FS (-9) /* Filesystem events */ +#define EVFILT_USER (-10) /* User events */ +#define EVFILT_SESSION (-11) /* Audit session events */ -#define EVFILT_SYSCOUNT 9 +#define EVFILT_SYSCOUNT 11 #define EVFILT_THREADMARKER EVFILT_SYSCOUNT /* Internal use only */ #pragma pack(4) struct kevent { uintptr_t ident; /* identifier for this event */ - short filter; /* filter for event */ - unsigned short flags; /* general flags */ - unsigned int fflags; /* filter-specific flags */ + int16_t filter; /* filter for event */ + uint16_t flags; /* general flags */ + uint32_t fflags; /* filter-specific flags */ intptr_t data; /* filter-specific data */ -#ifdef KERNEL_PRIVATE - user_addr_t udata; /* opaque user data identifier */ -#else void *udata; /* opaque user data identifier */ -#endif }; #ifdef KERNEL_PRIVATE -struct user_kevent { +struct user64_kevent { uint64_t ident; /* identifier for this event */ - short filter; /* filter for event */ - unsigned short flags; /* general flags */ - unsigned int fflags; /* filter-specific flags */ + int16_t filter; /* filter for event */ + uint16_t flags; /* general flags */ + uint32_t fflags; /* filter-specific flags */ int64_t data; /* filter-specific data */ user_addr_t udata; /* opaque user data identifier */ }; +struct user32_kevent { + uint32_t ident; /* identifier for this event */ + int16_t filter; /* filter for event */ + uint16_t flags; /* general flags */ + uint32_t fflags; /* filter-specific flags */ + int32_t data; /* filter-specific data */ + user32_addr_t udata; /* opaque user data identifier */ +}; + #endif #pragma pack() +struct kevent64_s { + uint64_t ident; /* identifier for this event */ + int16_t filter; /* filter for event */ + uint16_t flags; /* general flags */ + uint32_t fflags; /* filter-specific flags */ + int64_t data; /* filter-specific data */ + uint64_t udata; /* opaque user data identifier */ + uint64_t ext[2]; /* filter-specific extensions */ +}; + #define EV_SET(kevp, a, b, c, d, e, f) do { \ struct kevent *__kevp__ = (kevp); \ __kevp__->ident = (a); \ @@ -113,6 +130,18 @@ struct user_kevent { __kevp__->udata = (f); \ } while(0) +#define EV_SET64(kevp, a, b, c, d, e, f, g, h) do { \ + struct kevent64_s *__kevp__ = (kevp); \ + __kevp__->ident = (a); \ + __kevp__->filter = (b); \ + __kevp__->flags = (c); \ + __kevp__->fflags = (d); \ + __kevp__->data = (e); \ + __kevp__->udata = (f); \ + __kevp__->ext[0] = (g); \ + __kevp__->ext[1] = (h); \ +} while(0) + /* actions */ #define EV_ADD 0x0001 /* add event to kq (implies enable) */ #define EV_DELETE 0x0002 /* delete event from kq */ @@ -123,6 +152,7 @@ struct user_kevent { /* flags */ #define EV_ONESHOT 0x0010 /* only report one occurrence */ #define EV_CLEAR 0x0020 /* clear event state after reporting */ +#define EV_DISPATCH 0x0080 /* disable event after reporting */ #define EV_SYSFLAGS 0xF000 /* reserved by system */ #define EV_FLAG0 0x1000 /* filter-specific flag */ @@ -153,6 +183,30 @@ struct user_kevent { #define EV_POLL EV_FLAG0 #define EV_OOBAND EV_FLAG1 +/* + * data/hint fflags for EVFILT_USER, shared with userspace + */ + +/* + * On input, NOTE_TRIGGER causes the event to be triggered for output. + */ +#define NOTE_TRIGGER 0x01000000 +#define EV_TRIGGER 0x0100 /*deprecated--for backwards compatibility only*/ + +/* + * On input, the top two bits of fflags specifies how the lower twenty four + * bits should be applied to the stored value of fflags. + * + * On output, the top two bits will always be set to NOTE_FFNOP and the + * remaining twenty four bits will contain the stored fflags value. + */ +#define NOTE_FFNOP 0x00000000 /* ignore input fflags */ +#define NOTE_FFAND 0x40000000 /* and fflags */ +#define NOTE_FFOR 0x80000000 /* or fflags */ +#define NOTE_FFCOPY 0xc0000000 /* copy fflags */ +#define NOTE_FFCTRLMASK 0xc0000000 /* mask for operations */ +#define NOTE_FFLAGSMASK 0x00ffffff + /* * data/hint fflags for EVFILT_{READ|WRITE}, shared with userspace * @@ -170,6 +224,7 @@ struct user_kevent { #define NOTE_LINK 0x00000010 /* link count changed */ #define NOTE_RENAME 0x00000020 /* vnode was renamed */ #define NOTE_REVOKE 0x00000040 /* vnode access was revoked */ +#define NOTE_NONE 0x00000080 /* No specific vnode event: to test for EVFILT_READ activation*/ /* * data/hint fflags for EVFILT_PROC, shared with userspace @@ -200,7 +255,49 @@ struct user_kevent { #define NOTE_NSECONDS 0x00000004 /* data is nanoseconds */ #define NOTE_ABSOLUTE 0x00000008 /* absolute timeout */ /* ... implicit EV_ONESHOT */ - +/* + * data/hint fflags for EVFILT_MACHPORT, shared with userspace. + * + * Only portsets are support at this time. + * + * The fflags field can optionally contain the MACH_RCV_MSG, MACH_RCV_LARGE, + * and related trailer receive options as defined in . + * The presence of these flags directs the kevent64() call to attempt to receive + * the message during kevent delivery, rather than just indicate that a message exists. + * On setup, The ext[0] field contains the receive buffer pointer and ext[1] contains + * the receive buffer length. Upon event delivery, the actual received message size + * is returned in ext[1]. As with mach_msg(), the buffer must be large enough to + * receive the message and the requested (or default) message trailers. In addition, + * the fflags field contains the return code normally returned by mach_msg(). + * + * If no message receipt options were provided in the fflags field on setup, no + * message is received by this call. Instead, on output, the data field simply + * contains the name of the actual port detected with a message waiting. + */ + +/* + * data/hint fflags for EVFILT_SESSION, shared with userspace. + * + * The kevent ident field should be set to AU_SESSION_ANY_ASID if interested + * in events for any session. + * + * NOTE_AS_UPDATE may be going away since struct auditinfo_addr may become + * immutable once initially set. + */ +#define NOTE_AS_START 0x00000001 /* start of new session */ +#define NOTE_AS_END 0x00000002 /* start of new session */ +#define NOTE_AS_ERR 0x00000004 /* error tracking new session */ +#define NOTE_AS_CLOSE 0x00000008 /* currently unsupported */ +#define NOTE_AS_UPDATE 0x00000010 /* session data updated */ + +/* + * Kevent ident value for any session. + */ +#define AS_ANY_ASID 0xFFFFFFFF + +struct au_sentry; /* Audit session entry */ + + /* * DEPRECATED!!!!!!!!! * NOTE_TRACK, NOTE_TRACKERR, and NOTE_CHILD are no longer supported as of 10.5 @@ -240,12 +337,14 @@ struct knote { union { struct fileproc *p_fp; /* file data pointer */ struct proc *p_proc; /* proc pointer */ + struct ipc_pset *p_pset; /* pset pointer */ + struct au_sentry *p_se; /* Audit session ptr */ } kn_ptr; struct filterops *kn_fop; int kn_status; /* status bits */ int kn_sfflags; /* saved filter flags */ - struct kevent kn_kevent; - caddr_t kn_hook; + struct kevent64_s kn_kevent; + void *kn_hook; int kn_hookid; int64_t kn_sdata; /* saved data field */ @@ -254,24 +353,36 @@ struct knote { #define KN_DISABLED 0x04 /* event is disabled */ #define KN_DROPPING 0x08 /* knote is being dropped */ #define KN_USEWAIT 0x10 /* wait for knote use */ -#define KN_DROPWAIT 0x20 /* wait for knote drop */ +#define KN_ATTACHING 0x20 /* event is pending attach */ +#define KN_STAYQUEUED 0x40 /* force event to stay on queue */ #define kn_id kn_kevent.ident #define kn_filter kn_kevent.filter #define kn_flags kn_kevent.flags #define kn_fflags kn_kevent.fflags #define kn_data kn_kevent.data +#define kn_udata kn_kevent.udata +#define kn_ext kn_kevent.ext #define kn_fp kn_ptr.p_fp }; +/* Hint values for f_touch filter operation */ +#define EVENT_REGISTER 1 +#define EVENT_PROCESS 2 + struct filterops { int f_isfd; /* true if ident == filedescriptor */ int (*f_attach)(struct knote *kn); void (*f_detach)(struct knote *kn); int (*f_event)(struct knote *kn, long hint); + /* Optional f_touch operation, called only if !f_isfd && non-NULL */ + void (*f_touch)(struct knote *kn, struct kevent64_s *kev, long type); + /* Optional f_peek operation, called only if KN_STAYQUEUED is set */ + int (*f_peek)(struct knote *kn); }; struct proc; +struct wait_queue; SLIST_HEAD(klist, knote); extern void knote_init(void) __attribute__((section("__TEXT, initcode"))); @@ -285,6 +396,8 @@ extern void klist_init(struct klist *list); extern void knote(struct klist *list, long hint); extern int knote_attach(struct klist *list, struct knote *kn); extern int knote_detach(struct klist *list, struct knote *kn); +extern int knote_link_wait_queue(struct knote *kn, struct wait_queue *wq); +extern void knote_unlink_wait_queue(struct knote *kn, struct wait_queue *wq); extern void knote_fdclose(struct proc *p, int fd); #endif /* !KERNEL_PRIVATE */ @@ -299,18 +412,13 @@ int kqueue(void); int kevent(int kq, const struct kevent *changelist, int nchanges, struct kevent *eventlist, int nevents, const struct timespec *timeout); +int kevent64(int kq, const struct kevent64_s *changelist, + int nchanges, struct kevent64_s *eventlist, + int nevents, unsigned int flags, + const struct timespec *timeout); __END_DECLS -#ifdef PRIVATE -#include - -__BEGIN_DECLS -mach_port_t kqueue_portset_np(int kq); -int kqueue_from_portset_np(mach_port_t portset); -__END_DECLS -#endif /* PRIVATE */ - #endif /* KERNEL */ diff --git a/bsd/sys/eventvar.h b/bsd/sys/eventvar.h index 9694af109..b94187572 100644 --- a/bsd/sys/eventvar.h +++ b/bsd/sys/eventvar.h @@ -64,11 +64,12 @@ #define KQEXTENT 256 /* linear growth by this amount */ struct kqueue { - decl_lck_spin_data( ,kq_lock) /* kqueue lock */ + wait_queue_set_t kq_wqs; /* private wait queue set */ + decl_lck_spin_data( ,kq_lock) /* kqueue lock */ int kq_state; - int kq_count; /* number of queued events */ - struct kqtailq kq_head; /* list of queued events */ - struct kqtailq kq_inprocess; /* list of in-process events */ + int kq_count; /* number of queued events */ + uint32_t kq_nprocess; /* atomic counter for kqueue_process */ + struct kqtailq kq_head; /* list of queued events */ struct selinfo kq_sel; /* parent select/kqueue info */ struct proc *kq_p; /* process containing kqueue */ int kq_level; /* nesting level */ @@ -76,16 +77,18 @@ struct kqueue { #define KQ_SEL 0x01 #define KQ_SLEEP 0x02 #define KQ_PROCWAIT 0x04 +#define KQ_KEV32 0x08 +#define KQ_KEV64 0x10 }; extern struct kqueue *kqueue_alloc(struct proc *); extern void kqueue_dealloc(struct kqueue *); -typedef int (*kevent_callback_t)(struct kqueue *, struct kevent *, void *); -typedef void (*kevent_continue_t)(struct kqueue *, void *, int); +typedef int (*kevent_callback_t)(struct kqueue *, struct kevent64_s *, void *); +typedef void (*kqueue_continue_t)(struct kqueue *, void *, int); -extern int kevent_register(struct kqueue *, struct kevent *, struct proc *); -extern int kevent_scan(struct kqueue *, kevent_callback_t, kevent_continue_t, +extern int kevent_register(struct kqueue *, struct kevent64_s *, struct proc *); +extern int kqueue_scan(struct kqueue *, kevent_callback_t, kqueue_continue_t, void *, struct timeval *, struct proc *); #endif /* !_SYS_EVENTVAR_H_ */ diff --git a/bsd/sys/fasttrap.h b/bsd/sys/fasttrap.h index 64f08842e..3aa0db471 100644 --- a/bsd/sys/fasttrap.h +++ b/bsd/sys/fasttrap.h @@ -79,6 +79,11 @@ typedef struct fasttrap_probe_spec { char ftps_func[DTRACE_FUNCNAMELEN]; char ftps_mod[DTRACE_MODNAMELEN]; +#if defined(__APPLE__) +#if !defined(__LP64__) + uint32_t pad; /* Explicit pad to keep ILP32 and LP64 lined up. */ +#endif +#endif uint64_t ftps_pc; uint64_t ftps_size; uint64_t ftps_noffs; diff --git a/bsd/sys/fasttrap_impl.h b/bsd/sys/fasttrap_impl.h index 0570f0c5e..259841c70 100644 --- a/bsd/sys/fasttrap_impl.h +++ b/bsd/sys/fasttrap_impl.h @@ -20,7 +20,7 @@ */ /* - * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Copyright 2008 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -28,7 +28,7 @@ #define _FASTTRAP_IMPL_H /* - * #pragma ident "@(#)fasttrap_impl.h 1.12 06/06/12 SMI" + * #pragma ident "@(#)fasttrap_impl.h 1.14 08/04/09 SMI" */ #include @@ -38,7 +38,8 @@ #include #include -#define proc_t struct proc +/* Solaris proc_t is the struct. Darwin's proc_t is a pointer to it. */ +#define proc_t struct proc /* Steer clear of the Darwin typedef for proc_t */ #ifdef __cplusplus extern "C" { @@ -52,7 +53,9 @@ extern "C" { * providers. Those providers are each represented by a fasttrap_provider_t. * All providers for a given process have a pointer to a shared * fasttrap_proc_t. The fasttrap_proc_t has two states: active or defunct. - * It becomes defunct when the process performs an exit or an exec. + * When the count of active providers goes to zero it becomes defunct; a + * provider drops its active count when it is removed individually or as part + * of a mass removal when a process exits or performs an exec. * * Each probe is represented by a fasttrap_probe_t which has a pointer to * its associated provider as well as a list of fasttrap_id_tp_t structures @@ -61,8 +64,8 @@ extern "C" { * and it contains two lists of fasttrap_id_t structures (to be fired pre- * and post-instruction emulation) that identify the probes attached to the * tracepoint. Tracepoints also have a pointer to the fasttrap_proc_t for the - * process they trace which is used when looking up a tracepoint both at - * probe fire time and when enabling and disabling probes. + * process they trace which is used when looking up a tracepoint both when a + * probe fires and when enabling and disabling probes. * * It's important to note that probes are preallocated with the necessary * number of tracepoints, but that tracepoints can be shared by probes and @@ -79,9 +82,9 @@ extern "C" { typedef struct fasttrap_proc { pid_t ftpc_pid; /* process ID for this proc */ - uint_t ftpc_defunct; /* denotes a lame duck proc */ - uint64_t ftpc_count; /* reference count */ - lck_mtx_t ftpc_mtx; /* proc lock */ + uint64_t ftpc_acount; /* count of active providers */ + uint64_t ftpc_rcount; /* count of extant providers */ + lck_mtx_t ftpc_mtx; /* lock on all but acount */ struct fasttrap_proc *ftpc_next; /* next proc in hash chain */ } fasttrap_proc_t; diff --git a/bsd/sys/fcntl.h b/bsd/sys/fcntl.h index 98b5c7c42..520d6ce9a 100644 --- a/bsd/sys/fcntl.h +++ b/bsd/sys/fcntl.h @@ -126,7 +126,9 @@ typedef __darwin_pid_t pid_t; #endif #define O_NONBLOCK 0x0004 /* no delay */ #define O_APPEND 0x0008 /* set append mode */ -#define O_SYNC 0x0080 /* synchronous writes */ +#ifndef O_SYNC /* allow simultaneous inclusion of */ +#define O_SYNC 0x0080 /* synch I/O file integrity */ +#endif #if !defined(_POSIX_C_SOURCE) || defined(_DARWIN_C_SOURCE) #define O_SHLOCK 0x0010 /* open with shared file lock */ #define O_EXLOCK 0x0020 /* open with exclusive file lock */ @@ -162,7 +164,9 @@ typedef __darwin_pid_t pid_t; #define O_SYMLINK 0x200000 /* allow open of a symlink */ #endif -//#define O_SYNC /* ??? POSIX: Write according to synchronized I/O file integrity completion */ +#ifndef O_DSYNC /* allow simultaneous inclusion of */ +#define O_DSYNC 0x400000 /* synch I/O data integrity */ +#endif #ifdef KERNEL /* convert from open() flags to/from fflags; convert O_RD/WR to FREAD/FWRITE */ @@ -170,9 +174,9 @@ typedef __darwin_pid_t pid_t; #define OFLAGS(fflags) ((fflags) - 1) /* bits to save after open */ -#define FMASK (FREAD|FWRITE|FAPPEND|FASYNC|FFSYNC|FNONBLOCK) +#define FMASK (FREAD|FWRITE|FAPPEND|FASYNC|FFSYNC|FFDSYNC|FNONBLOCK) /* bits settable by fcntl(F_SETFL, ...) */ -#define FCNTLFLAGS (FAPPEND|FASYNC|FFSYNC|FNONBLOCK) +#define FCNTLFLAGS (FAPPEND|FASYNC|FFSYNC|FFDSYNC|FNONBLOCK) #endif /* @@ -184,6 +188,7 @@ typedef __darwin_pid_t pid_t; #define FAPPEND O_APPEND /* kernel/compat */ #define FASYNC O_ASYNC /* kernel/compat */ #define FFSYNC O_FSYNC /* kernel */ +#define FFDSYNC O_DSYNC /* kernel */ #define FNONBLOCK O_NONBLOCK /* kernel */ #define FNDELAY O_NONBLOCK /* compat */ #define O_NDELAY O_NONBLOCK /* compat */ @@ -241,6 +246,8 @@ typedef __darwin_pid_t pid_t; #define F_MARKDEPENDENCY 60 /* this process hosts the device supporting the fs backing this fd */ +#define F_ADDFILESIGS 61 /* add signature from same file (used by dyld for shared libs) */ + // FS-specific fcntl()'s numbers begin at 0x00010000 and go up #define FCNTL_FS_SPECIFIC_BASE 0x00010000 @@ -287,7 +294,6 @@ typedef __darwin_pid_t pid_t; #define S_IFSOCK 0140000 /* [XSI] socket */ #if !defined(_POSIX_C_SOURCE) || defined(_DARWIN_C_SOURCE) #define S_IFWHT 0160000 /* whiteout */ -#define S_IFXATTR 0200000 /* extended attribute */ #endif /* File mode */ @@ -350,15 +356,25 @@ struct flock { * advisory file read data type - * information passed by user to system */ + +#ifdef KERNEL +#pragma pack(4) /* prevent structure padding in kernel */ +#endif /* KERNEL */ + struct radvisory { off_t ra_offset; int ra_count; }; +#ifdef KERNEL +#pragma pack() +#endif /* KERNEL */ + /* * detached code signatures data type - - * information passed by user to system - * used by F_ADDSIGS + * information passed by user to system used by F_ADDSIGS and F_ADDFILESIGS. + * F_ADDFILESIGS is a shortcut for files that contain their own signature and + * doesn't require mapping of the file in order to load the signature. */ typedef struct fsignatures { off_t fs_file_start; @@ -370,10 +386,19 @@ typedef struct fsignatures { * grow when we're dealing with a 64-bit process. * WARNING - keep in sync with fsignatures */ -typedef struct user_fsignatures { + +typedef struct user32_fsignatures { off_t fs_file_start; - user_addr_t fs_blob_start; - user_size_t fs_blob_size; + user32_addr_t fs_blob_start; + user32_size_t fs_blob_size; +} user32_fsignatures_t; + +typedef struct user_fsignatures { + off_t fs_file_start; /* offset of Mach-O image in FAT file */ + user_addr_t fs_blob_start; /* F_ADDSIGS: mem address of signature*/ + /* F_ADDFILESIGS: offset of signature */ + /* in Mach-O image */ + user_size_t fs_blob_size; /* size of signature blob */ } user_fsignatures_t; #endif /* KERNEL */ @@ -401,13 +426,18 @@ typedef struct fbootstraptransfer { void *fbt_buffer; /* IN: buffer to be read/written */ } fbootstraptransfer_t; - #ifdef KERNEL /* LP64 version of fbootstraptransfer. all pointers * grow when we're dealing with a 64-bit process. * WARNING - keep in sync with fbootstraptransfer */ +typedef struct user32_fbootstraptransfer { + off_t fbt_offset; /* IN: offset to start read/write */ + user32_size_t fbt_length; /* IN: number of bytes to transfer */ + user32_addr_t fbt_buffer; /* IN: buffer to be read/written */ +} user32_fbootstraptransfer_t; + typedef struct user_fbootstraptransfer { off_t fbt_offset; /* IN: offset to start read/write */ user_size_t fbt_length; /* IN: number of bytes to transfer */ @@ -462,6 +492,12 @@ struct fopenfrom { * * WARNING - keep in sync with fopenfrom (above) */ +struct user32_fopenfrom { + unsigned int o_flags; + mode_t o_mode; + user32_addr_t o_pathname; +}; + struct user_fopenfrom { unsigned int o_flags; mode_t o_mode; @@ -509,8 +545,9 @@ filesec_t filesec_init(void); filesec_t filesec_dup(filesec_t); void filesec_free(filesec_t); int filesec_get_property(filesec_t, filesec_property_t, void *); -int filesec_set_property(filesec_t, filesec_property_t, const void *); int filesec_query_property(filesec_t, filesec_property_t, int *); +int filesec_set_property(filesec_t, filesec_property_t, const void *); +int filesec_unset_property(filesec_t, filesec_property_t); #define _FILESEC_UNSET_PROPERTY ((void *)0) #define _FILESEC_REMOVE_ACL ((void *)1) #endif /* (!_POSIX_C_SOURCE || _DARWIN_C_SOURCE) */ diff --git a/bsd/sys/file.h b/bsd/sys/file.h index 9bf925acb..b236f0840 100644 --- a/bsd/sys/file.h +++ b/bsd/sys/file.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2006 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2008 Apple Computer, Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -74,6 +74,7 @@ #ifdef KERNEL #include #include +#include #endif #ifndef _KAUTH_CRED_T @@ -99,14 +100,18 @@ struct extern_file { #pragma pack() -#ifdef KERNEL __BEGIN_DECLS +#ifdef KERNEL int file_socket(int, socket_t *); int file_vnode(int, vnode_t *); +int file_vnode_withvid(int, vnode_t *, uint32_t *); int file_flags(int, int *); int file_drop(int); -__END_DECLS - #endif /* KERNEL */ +#ifdef KERNEL_PRIVATE +int fd_rdwr(int fd, enum uio_rw, uint64_t base, int64_t len, enum uio_seg, + off_t offset, int io_flg, int64_t *aresid); +#endif /* KERNEL_PRIVATE */ +__END_DECLS #endif /* !_SYS_FILE_H_ */ diff --git a/bsd/sys/file_internal.h b/bsd/sys/file_internal.h index ea0813377..6ac9ffb74 100644 --- a/bsd/sys/file_internal.h +++ b/bsd/sys/file_internal.h @@ -99,7 +99,14 @@ struct fileproc { #define FP_INCREATE 0x0001 #define FP_INCLOSE 0x0002 #define FP_INSELECT 0x0004 +/* + * see + */ +#if CONFIG_EMBEDDED +#define FP_INCHRREAD 0x0000 +#else #define FP_INCHRREAD 0x0008 +#endif #define FP_WRITTEN 0x0010 #define FP_CLOSING 0x0020 #define FP_WAITCLOSE 0x0040 @@ -175,6 +182,7 @@ extern struct filelist filehead; /* head of list of open files */ extern struct fmsglist fmsghead; /* head of list of open files */ extern int maxfiles; /* kernel limit on number of open files */ extern int nfiles; /* actual number of open files */ +extern int maxfilesperproc; #endif /* __APPLE_API_PRIVATE */ @@ -216,6 +224,8 @@ void procfdtbl_markclosefd(struct proc * p, int fd); void procfdtbl_releasefd(struct proc * p, int fd, struct fileproc * fp); void procfdtbl_waitfd(struct proc * p, int fd); void procfdtbl_clearfd(struct proc * p, int fd); +boolean_t filetype_issendable(file_type_t type); +extern int fdgetf_noref(proc_t, int, struct fileproc **); __END_DECLS #endif /* __APPLE_API_UNSTABLE */ diff --git a/bsd/sys/filedesc.h b/bsd/sys/filedesc.h index b9839f3e9..7ea50f5a9 100644 --- a/bsd/sys/filedesc.h +++ b/bsd/sys/filedesc.h @@ -66,7 +66,6 @@ #include -#ifdef __APPLE_API_UNSTABLE /* * This structure is used for the management of descriptors. It may be * shared by multiple processes. @@ -84,6 +83,8 @@ #define NDFILE 25 /* 125 bytes */ #define NDEXTENT 50 /* 250 bytes in 256-byte alloc. */ +#ifdef BSD_KERNEL_PRIVATE + struct klist; struct filedesc { @@ -95,7 +96,7 @@ struct filedesc { int fd_lastfile; /* high-water mark of fd_ofiles */ int fd_freefile; /* approx. next free file */ u_short fd_cmask; /* mask for file creation */ - u_long fd_refcnt; /* reference count */ + uint32_t fd_refcnt; /* reference count */ int fd_knlistsize; /* size of knlist */ struct klist *fd_knlist; /* list of attached knotes */ @@ -152,6 +153,6 @@ extern void fdexec(proc_t p); #endif /* KERNEL */ -#endif /* __APPLE_API_UNSTABLE */ +#endif /* BSD_KERNEL_PRIVATE */ #endif /* !_SYS_FILEDESC_H_ */ diff --git a/bsd/sys/fsctl.h b/bsd/sys/fsctl.h index 53bdbf31e..b70ba0651 100644 --- a/bsd/sys/fsctl.h +++ b/bsd/sys/fsctl.h @@ -71,12 +71,71 @@ #include +#define FSIOC_SYNC_VOLUME _IOW('A', 1, uint32_t) +#define FSCTL_SYNC_VOLUME IOCBASECMD(FSIOC_SYNC_VOLUME) + +#define FSCTL_SYNC_FULLSYNC (1<<0) /* Flush the data fully to disk, if supported by the filesystem */ +#define FSCTL_SYNC_WAIT (1<<1) /* Wait for the sync to complete */ + + +typedef struct package_ext_info { + const char *strings; + uint32_t num_entries; + uint32_t max_width; +} package_ext_info; + +#define FSIOC_SET_PACKAGE_EXTS _IOW('A', 2, struct package_ext_info) +#define FSCTL_SET_PACKAGE_EXTS IOCBASECMD(FSIOC_SET_PACKAGE_EXTS) + +#define FSIOC_WAIT_FOR_SYNC _IOR('A', 3, int32_t) +#define FSCTL_WAIT_FOR_SYNC IOCBASECMD(FSIOC_WAIT_FOR_SYNC) + + +// +// Spotlight and fseventsd use these fsctl()'s to find out +// the mount time of a volume and the last time it was +// unmounted. Both HFS and ZFS support these calls. +// +// User space code should pass the "_IOC_" macros while the +// kernel should test for the "_FSCTL_" variant of the macro +// in its vnop_ioctl function. +// +// NOTE: the values for these defines should _not_ be changed +// or else it will break binary compatibility with mds +// and fseventsd. +// +#define SPOTLIGHT_IOC_GET_MOUNT_TIME _IOR('h', 18, u_int32_t) +#define SPOTLIGHT_FSCTL_GET_MOUNT_TIME IOCBASECMD(SPOTLIGHT_IOC_GET_MOUNT_TIME) +#define SPOTLIGHT_IOC_GET_LAST_MTIME _IOR('h', 19, u_int32_t) +#define SPOTLIGHT_FSCTL_GET_LAST_MTIME IOCBASECMD(SPOTLIGHT_IOC_GET_LAST_MTIME) + + +#ifdef KERNEL + +typedef struct user64_package_ext_info { + user64_addr_t strings; + uint32_t num_entries; + uint32_t max_width; +} user64_package_ext_info; + +typedef struct user32_package_ext_info { + user32_addr_t strings; + uint32_t num_entries; + uint32_t max_width; +} user32_package_ext_info; + +#endif // KERNEL + + #ifndef KERNEL #include __BEGIN_DECLS -int fsctl(const char *, unsigned long, void *, unsigned long); + +int fsctl(const char *,unsigned long,void*,unsigned int); +int ffsctl(int,unsigned long,void*,unsigned int); + __END_DECLS #endif /* !KERNEL */ diff --git a/bsd/sys/fsevents.h b/bsd/sys/fsevents.h index b8508c577..e5cb3ec3c 100644 --- a/bsd/sys/fsevents.h +++ b/bsd/sys/fsevents.h @@ -94,10 +94,11 @@ // #define FSE_MODE_HLINK (1 << 31) // notification is for a hard-link #define FSE_MODE_LAST_HLINK (1 << 30) // link count == 0 on a hard-link delete - +#define FSE_REMOTE_DIR_EVENT (1 << 29) // this is a remotely generated directory-level granularity event +#define FSE_TRUNCATED_PATH (1 << 28) // the path for this item had to be truncated // ioctl's on /dev/fsevents -#if defined(__x86_64__) || defined(__ppc64__) +#if __LP64__ typedef struct fsevent_clone_args { int8_t *event_list; int32_t num_events; @@ -119,11 +120,13 @@ typedef struct fsevent_clone_args { // ioctl's on the cloned fd -#if defined(__x86_64__) || defined(__ppc64__) +#if __LP64__ +#pragma pack(push, 4) typedef struct fsevent_dev_filter_args { uint32_t num_devices; dev_t *devices; } fsevent_dev_filter_args; +#pragma pack(pop) #else typedef struct fsevent_dev_filter_args { uint32_t num_devices; @@ -135,6 +138,7 @@ typedef struct fsevent_dev_filter_args { #define FSEVENTS_DEVICE_FILTER _IOW('s', 100, fsevent_dev_filter_args) #define FSEVENTS_WANT_COMPACT_EVENTS _IO('s', 101) #define FSEVENTS_WANT_EXTENDED_INFO _IO('s', 102) +#define FSEVENTS_GET_CURRENT_ID _IOR('s', 103, uint64_t) #ifdef KERNEL @@ -143,6 +147,8 @@ void fsevents_init(void); int need_fsevent(int type, vnode_t vp); int add_fsevent(int type, vfs_context_t, ...); void fsevent_unmount(struct mount *mp); +struct vnode_attr; +void create_fsevent_from_kevent(vnode_t vp, uint32_t kevents, struct vnode_attr *vap); // misc utility functions for fsevent info and pathbuffers... typedef struct fse_info { diff --git a/libsa/c++rem3.h b/bsd/sys/fsgetpath.h similarity index 66% rename from libsa/c++rem3.h rename to bsd/sys/fsgetpath.h index 31842164a..735553e0c 100644 --- a/libsa/c++rem3.h +++ b/bsd/sys/fsgetpath.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2002 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2008 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -25,25 +25,34 @@ * * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ */ -/* - * History: - * 2002-02-26 gvdl Initial implementation of the gcc 2.95 -> gcc 3.x - * symbol remangler. - */ -#include +#ifndef _FSGETPATH_H_ +#define _FSGETPATH_H_ + +#include +#include -typedef enum Rem3Return { - kR3NotRemangled = 0, // Wasn't a 2.95 C++ symbol but otherwise OK - kR3Remangled, // Was sucessfully remangled from 2.95 -> 3.x - kR3InternalNotRemangled, // Symbol is too big to be parsed - kR3BufferTooSmallRemangled, // Is 2.95 symbol but insufficent output space - kR3BadArgument, // One of the pointers are NULL -} Rem3Return; +#ifdef __APPLE_API_PRIVATE +#ifndef KERNEL __BEGIN_DECLS -extern Rem3Return -rem3_remangle_name(char *gcc3, int *gcc3size, const char *gcc295); +#include +#include + +/* + * Obtain the full pathname of a file system object by id. + * + * This is a private SPI used by the File Manager. + * + * ssize_t fsgetpath_np(char *restrict buf, size_t bufsize, fsid_t fsid, uint64_t objid); + */ +#define fsgetpath(buf, bufsize, fsid, objid) \ + (ssize_t)syscall(SYS_fsgetpath, buf, (size_t)bufsize, fsid, (uint64_t)objid) __END_DECLS +#endif /* KERNEL */ + +#endif /* __APPLE_API_PRIVATE */ + +#endif /* !_FSGETPATH_H_ */ diff --git a/bsd/sys/gmon.h b/bsd/sys/gmon.h index 75e478c53..a6576006d 100644 --- a/bsd/sys/gmon.h +++ b/bsd/sys/gmon.h @@ -240,7 +240,7 @@ extern struct gmonparam _gmonparam; void kmstartup(void); void cfreemem(caddr_t, int); /* Currently only a stub function. */ -void mcount(u_long, u_long); +void mcount(uintptr_t, uintptr_t); #endif /* XNU_KERNEL_PRIVATE */ #endif /* GPROF */ diff --git a/bsd/sys/imgact.h b/bsd/sys/imgact.h index 36fb8d7e9..fa9be0460 100644 --- a/bsd/sys/imgact.h +++ b/bsd/sys/imgact.h @@ -99,12 +99,11 @@ struct image_params { char *ip_p_comm; /* optional alt p->p_comm */ struct vfs_context *ip_vfs_context; /* VFS context */ struct nameidata *ip_ndp; /* current nameidata */ - thread_t ip_vfork_thread; /* thread created, if vfork */ + thread_t ip_new_thread; /* thread for spawn/vfork */ struct label *ip_execlabelp; /* label of the executable */ struct label *ip_scriptlabelp; /* label of the script */ unsigned int ip_csflags; /* code signing flags */ - int ip_no_trans; /* allow suid/sgid transition?*/ void *ip_px_sa; void *ip_px_sfa; void *ip_px_spa; @@ -121,5 +120,6 @@ struct image_params { #endif #define IMGPF_WAS_64BIT 0x00000004 /* exec from a 64Bit binary */ #define IMGPF_IS_64BIT 0x00000008 /* exec to a 64Bit binary */ +#define IMGPF_SPAWN 0x00000010 /* spawn (without setexec) */ #endif /* !_SYS_IMGACT */ diff --git a/bsd/sys/ioccom.h b/bsd/sys/ioccom.h index eebf1e745..cc22148f0 100644 --- a/bsd/sys/ioccom.h +++ b/bsd/sys/ioccom.h @@ -64,6 +64,8 @@ #ifndef _SYS_IOCCOM_H_ #define _SYS_IOCCOM_H_ +#include + /* * Ioctl's have the command encoded in the lower word, and the size of * any in or out parameters in the upper word. The high 3 bits of the @@ -76,15 +78,15 @@ #define IOCPARM_MAX (IOCPARM_MASK + 1) /* max size of ioctl args */ /* no parameters */ -#define IOC_VOID (unsigned long)0x20000000 +#define IOC_VOID (__uint32_t)0x20000000 /* copy parameters out */ -#define IOC_OUT (unsigned long)0x40000000 +#define IOC_OUT (__uint32_t)0x40000000 /* copy parameters in */ -#define IOC_IN (unsigned long)0x80000000 +#define IOC_IN (__uint32_t)0x80000000 /* copy paramters in and out */ #define IOC_INOUT (IOC_IN|IOC_OUT) /* mask for IN/OUT/VOID */ -#define IOC_DIRMASK (unsigned long)0xe0000000 +#define IOC_DIRMASK (__uint32_t)0xe0000000 #define _IOC(inout,group,num,len) \ (inout | ((len & IOCPARM_MASK) << 16) | ((group) << 8) | (num)) diff --git a/bsd/sys/ioctl.h b/bsd/sys/ioctl.h index f8fa38926..38aea03bd 100644 --- a/bsd/sys/ioctl.h +++ b/bsd/sys/ioctl.h @@ -105,9 +105,8 @@ __END_DECLS * Compatability with old terminal driver * * Source level -> #define USE_OLD_TTY - * Kernel level -> options COMPAT_SUNOS + * Kernel level -> always on */ -#if defined(USE_OLD_TTY) || COMPAT_43 || defined(COMPAT_SUNOS) || \ - defined(COMPAT_SVR4) || defined(COMPAT_NEXT_3X) || COMPAT_43_TTY +#if defined(USE_OLD_TTY) || BSD_KERNEL_PRIVATE #include #endif /* !_SYS_IOCTL_H_ */ diff --git a/bsd/sys/ipc.h b/bsd/sys/ipc.h index 14bed3fb7..a43cacc55 100644 --- a/bsd/sys/ipc.h +++ b/bsd/sys/ipc.h @@ -105,6 +105,8 @@ typedef __darwin_mode_t mode_t; typedef __int32_t key_t; #endif +#pragma pack(4) + /* * Technically, we should force all code references to the new structure * definition, not in just the standards conformance case, and leave the @@ -148,6 +150,8 @@ struct __ipc_perm_old { }; #endif /* !__DARWIN_UNIX03 */ +#pragma pack() + /* * [XSI] Definitions shall be provided for the following constants: */ @@ -185,7 +189,7 @@ struct __ipc_perm_old { /* Macros to convert between ipc ids and array indices or sequence ids */ #define IPCID_TO_IX(id) ((id) & 0xffff) #define IPCID_TO_SEQ(id) (((id) >> 16) & 0xffff) -#define IXSEQ_TO_IPCID(ix,perm) (((perm._seq) << 16L) | (ix & 0xffff)) +#define IXSEQ_TO_IPCID(ix,perm) (((perm._seq) << 16L) | ((ix) & 0xffff)) struct ucred; diff --git a/bsd/sys/ipcs.h b/bsd/sys/ipcs.h index 26cf1dcfa..48215be74 100644 --- a/bsd/sys/ipcs.h +++ b/bsd/sys/ipcs.h @@ -67,6 +67,14 @@ struct user_IPCS_command { user_addr_t ipcs_data; /* OP specific data */ }; +struct user32_IPCS_command { + int ipcs_magic; /* Magic number for struct layout */ + int ipcs_op; /* Operation to perform */ + int ipcs_cursor; /* Cursor for iteration functions */ + int ipcs_datalen; /* Length of ipcs_data area */ + user32_addr_t ipcs_data; /* OP specific data */ +}; + #endif /* KERNEL_PRIVATE */ /* diff --git a/bsd/sys/kauth.h b/bsd/sys/kauth.h index 8b41ac018..33078a1f4 100644 --- a/bsd/sys/kauth.h +++ b/bsd/sys/kauth.h @@ -38,6 +38,7 @@ #include #include #include +#include /* __offsetof() */ #ifdef __APPLE_API_EVOLVING @@ -105,6 +106,12 @@ struct kauth_identity_extlookup { #define KAUTH_EXTLOOKUP_WANT_MEMBERSHIP (1<<12) #define KAUTH_EXTLOOKUP_VALID_MEMBERSHIP (1<<13) #define KAUTH_EXTLOOKUP_ISMEMBER (1<<14) + + __darwin_pid_t el_info_pid; /* request on behalf of PID */ + u_int32_t el_info_reserved_1; /* reserved (APPLE) */ + u_int32_t el_info_reserved_2; /* reserved (APPLE) */ + u_int32_t el_info_reserved_3; /* reserved (APPLE) */ + uid_t el_uid; /* user ID */ guid_t el_uguid; /* user GUID */ u_int32_t el_uguid_valid; /* TTL on translation result (seconds) */ @@ -121,6 +128,7 @@ struct kauth_identity_extlookup { #define KAUTH_EXTLOOKUP_REGISTER (0) #define KAUTH_EXTLOOKUP_RESULT (1<<0) #define KAUTH_EXTLOOKUP_WORKER (1<<1) +#define KAUTH_EXTLOOKUP_DEREGISTER (1<<2) #ifdef KERNEL @@ -169,7 +177,8 @@ struct kauth_cred { int kc_nwhtgroups; /* whiteout group list */ gid_t *kc_whtgroups; - struct auditinfo cr_au; /* user auditing data */ + struct auditinfo cr_au; + struct au_session cr_audit; /* user auditing data */ int kc_nsupplement; /* entry count in supplemental data pointer array */ kauth_cred_supplement_t *kc_supplement; @@ -186,30 +195,21 @@ __BEGIN_DECLS extern uid_t kauth_getuid(void); extern uid_t kauth_getruid(void); extern gid_t kauth_getgid(void); -extern gid_t kauth_getrgid(void); extern kauth_cred_t kauth_cred_get(void); extern kauth_cred_t kauth_cred_get_with_ref(void); extern kauth_cred_t kauth_cred_proc_ref(proc_t procp); -extern kauth_cred_t kauth_cred_alloc(void); extern kauth_cred_t kauth_cred_create(kauth_cred_t cred); extern void kauth_cred_ref(kauth_cred_t _cred); +#ifndef __LP64__ /* Use kauth_cred_unref(), not kauth_cred_rele() */ extern void kauth_cred_rele(kauth_cred_t _cred) __deprecated; -extern kauth_cred_t kauth_cred_dup(kauth_cred_t cred); -extern kauth_cred_t kauth_cred_copy_real(kauth_cred_t cred); +#endif extern void kauth_cred_unref(kauth_cred_t *_cred); -extern kauth_cred_t kauth_cred_setresuid(kauth_cred_t cred, uid_t ruid, uid_t euid, uid_t svuid, uid_t gmuid); -extern kauth_cred_t kauth_cred_setresgid(kauth_cred_t cred, gid_t rgid, gid_t egid, gid_t svgid); -extern kauth_cred_t kauth_cred_setuidgid(kauth_cred_t cred, uid_t uid, gid_t gid); -extern kauth_cred_t kauth_cred_setsvuidgid(kauth_cred_t cred, uid_t uid, gid_t gid); -extern kauth_cred_t kauth_cred_setgroups(kauth_cred_t cred, gid_t *groups, int groupcount, uid_t gmuid); -struct uthread; -extern void kauth_cred_uthread_update(struct uthread *, proc_t); + #if CONFIG_MACF struct label; extern kauth_cred_t kauth_cred_label_update(kauth_cred_t cred, struct label *label); extern int kauth_proc_label_update(struct proc *p, struct label *label); -extern int kauth_proc_label_update_execve(struct proc *p, struct vfs_context *ctx, struct vnode *vp, struct label *scriptlabel, struct label *execlabel); #else /* this is a temp hack to cover us when MAC is not built in a kernel configuration. * Since we cannot build our export list based on the kernel configuration we need @@ -220,8 +220,6 @@ extern int kauth_proc_label_update(struct proc *p, void *label); #endif extern kauth_cred_t kauth_cred_find(kauth_cred_t cred); -extern int kauth_cred_getgroups(gid_t *_groups, int *_groupcount); -extern int kauth_cred_assume(uid_t _uid); extern uid_t kauth_cred_getuid(kauth_cred_t _cred); extern gid_t kauth_cred_getgid(kauth_cred_t _cred); extern int kauth_cred_guid2uid(guid_t *_guid, uid_t *_uidp); @@ -238,14 +236,8 @@ extern int kauth_cred_gid2ntsid(gid_t _gid, ntsid_t *_sidp); extern int kauth_cred_guid2ntsid(guid_t *_guid, ntsid_t *_sidp); extern int kauth_cred_ismember_gid(kauth_cred_t _cred, gid_t _gid, int *_resultp); extern int kauth_cred_ismember_guid(kauth_cred_t _cred, guid_t *_guidp, int *_resultp); -extern int kauth_cred_gid_subset(kauth_cred_t _cred1, kauth_cred_t _cred2, int *_resultp); -struct auditinfo; -extern kauth_cred_t kauth_cred_setauditinfo(kauth_cred_t, struct auditinfo *); - -extern int kauth_cred_supplementary_register(const char *name, int *ident); -extern int kauth_cred_supplementary_add(kauth_cred_t cred, int ident, const void *data, size_t datasize); -extern int kauth_cred_supplementary_remove(kauth_cred_t cred, int ident); +extern int groupmember(gid_t gid, kauth_cred_t cred); /* currently only exported in unsupported for use by seatbelt */ extern int kauth_cred_issuser(kauth_cred_t _cred); @@ -254,8 +246,11 @@ extern int kauth_cred_issuser(kauth_cred_t _cred); /* GUID, NTSID helpers */ extern guid_t kauth_null_guid; extern int kauth_guid_equal(guid_t *_guid1, guid_t *_guid2); +#ifdef XNU_KERNEL_PRIVATE extern int kauth_ntsid_equal(ntsid_t *_sid1, ntsid_t *_sid2); +#endif /* XNU_KERNEL_PRIVATE */ +#ifdef XNU_KERNEL_PRIVATE extern int kauth_wellknown_guid(guid_t *_guid); #define KAUTH_WKG_NOT 0 /* not a well-known GUID */ #define KAUTH_WKG_OWNER 1 @@ -263,8 +258,31 @@ extern int kauth_wellknown_guid(guid_t *_guid); #define KAUTH_WKG_NOBODY 3 #define KAUTH_WKG_EVERYBODY 4 +extern kauth_cred_t kauth_cred_dup(kauth_cred_t cred); +extern gid_t kauth_getrgid(void); +extern kauth_cred_t kauth_cred_alloc(void); extern int cantrace(proc_t cur_procp, kauth_cred_t creds, proc_t traced_procp, int *errp); +extern kauth_cred_t kauth_cred_copy_real(kauth_cred_t cred); +extern kauth_cred_t kauth_cred_setresuid(kauth_cred_t cred, uid_t ruid, uid_t euid, uid_t svuid, uid_t gmuid); +extern kauth_cred_t kauth_cred_setresgid(kauth_cred_t cred, gid_t rgid, gid_t egid, gid_t svgid); +extern kauth_cred_t kauth_cred_setuidgid(kauth_cred_t cred, uid_t uid, gid_t gid); +extern kauth_cred_t kauth_cred_setsvuidgid(kauth_cred_t cred, uid_t uid, gid_t gid); +extern kauth_cred_t kauth_cred_setgroups(kauth_cred_t cred, gid_t *groups, int groupcount, uid_t gmuid); +struct uthread; +extern void kauth_cred_uthread_update(struct uthread *, proc_t); +#ifdef CONFIG_MACF +extern int kauth_proc_label_update_execve(struct proc *p, struct vfs_context *ctx, struct vnode *vp, struct label *scriptlabel, struct label *execlabel); +#endif +extern int kauth_cred_getgroups(gid_t *_groups, int *_groupcount); +extern int kauth_cred_assume(uid_t _uid); +extern int kauth_cred_gid_subset(kauth_cred_t _cred1, kauth_cred_t _cred2, int *_resultp); +struct auditinfo_addr; +extern kauth_cred_t kauth_cred_setauditinfo(kauth_cred_t, au_session_t *); +extern int kauth_cred_supplementary_register(const char *name, int *ident); +extern int kauth_cred_supplementary_add(kauth_cred_t cred, int ident, const void *data, size_t datasize); +extern int kauth_cred_supplementary_remove(kauth_cred_t cred, int ident); +#endif /* XNU_KERNEL_PRIVATE */ __END_DECLS #endif /* KERNEL */ @@ -318,7 +336,7 @@ struct kauth_acl { u_int32_t acl_entrycount; u_int32_t acl_flags; - struct kauth_ace acl_ace[]; + struct kauth_ace acl_ace[1]; }; /* @@ -353,7 +371,7 @@ struct kauth_acl { * entry (Windows treats this as "deny all") from one that merely indicates a * file group and/or owner guid values. */ -#define KAUTH_ACL_SIZE(c) (sizeof(struct kauth_acl) + ((u_int32_t)(c) != KAUTH_FILESEC_NOACL ? ((c) * sizeof(struct kauth_ace)) : 0)) +#define KAUTH_ACL_SIZE(c) (__offsetof(struct kauth_acl, acl_ace) + ((u_int32_t)(c) != KAUTH_FILESEC_NOACL ? ((c) * sizeof(struct kauth_ace)) : 0)) #define KAUTH_ACL_COPYSIZE(p) KAUTH_ACL_SIZE((p)->acl_entrycount) @@ -399,10 +417,10 @@ struct kauth_filesec { typedef struct kauth_filesec *kauth_filesec_t; #endif -#define KAUTH_FILESEC_SIZE(c) (sizeof(struct kauth_filesec) + (c) * sizeof(struct kauth_ace)) +#define KAUTH_FILESEC_SIZE(c) (__offsetof(struct kauth_filesec, fsec_acl) + __offsetof(struct kauth_acl, acl_ace) + (c) * sizeof(struct kauth_ace)) #define KAUTH_FILESEC_COPYSIZE(p) KAUTH_FILESEC_SIZE(((p)->fsec_entrycount == KAUTH_FILESEC_NOACL) ? 0 : (p)->fsec_entrycount) -#define KAUTH_FILESEC_COUNT(s) ((s - sizeof(struct kauth_filesec)) / sizeof(struct kauth_ace)) -#define KAUTH_FILESEC_VALID(s) ((s) >= sizeof(struct kauth_filesec) && (((s) - sizeof(struct kauth_filesec)) % sizeof(struct kauth_ace)) == 0) +#define KAUTH_FILESEC_COUNT(s) (((s) - KAUTH_FILESEC_SIZE(0)) / sizeof(struct kauth_ace)) +#define KAUTH_FILESEC_VALID(s) ((s) >= KAUTH_FILESEC_SIZE(0) && (((s) - KAUTH_FILESEC_SIZE(0)) % sizeof(struct kauth_ace)) == 0) #define KAUTH_FILESEC_XATTR "com.apple.system.Security" @@ -415,13 +433,6 @@ typedef struct kauth_filesec *kauth_filesec_t; #ifdef KERNEL -/* KPI */ -__BEGIN_DECLS -kauth_filesec_t kauth_filesec_alloc(int size); -void kauth_filesec_free(kauth_filesec_t fsp); -int kauth_copyinfilesec(user_addr_t xsecurity, kauth_filesec_t *xsecdestpp); - void kauth_filesec_acl_setendian(int, kauth_filesec_t, kauth_acl_t); -__END_DECLS /* * Scope management. @@ -467,18 +478,29 @@ struct kauth_acl_eval { typedef struct kauth_acl_eval *kauth_acl_eval_t; __BEGIN_DECLS +kauth_filesec_t kauth_filesec_alloc(int size); +void kauth_filesec_free(kauth_filesec_t fsp); extern kauth_scope_t kauth_register_scope(const char *_identifier, kauth_scope_callback_t _callback, void *_idata); extern void kauth_deregister_scope(kauth_scope_t _scope); extern kauth_listener_t kauth_listen_scope(const char *_identifier, kauth_scope_callback_t _callback, void *_idata); extern void kauth_unlisten_scope(kauth_listener_t _scope); extern int kauth_authorize_action(kauth_scope_t _scope, kauth_cred_t _credential, kauth_action_t _action, uintptr_t _arg0, uintptr_t _arg1, uintptr_t _arg2, uintptr_t _arg3); -extern int kauth_acl_evaluate(kauth_cred_t _credential, kauth_acl_eval_t _eval); -extern int kauth_acl_inherit(vnode_t _dvp, kauth_acl_t _initial, kauth_acl_t *_product, int _isdir, vfs_context_t _ctx); /* default scope handlers */ extern int kauth_authorize_allow(kauth_cred_t _credential, void *_idata, kauth_action_t _action, uintptr_t _arg0, uintptr_t _arg1, uintptr_t _arg2, uintptr_t _arg3); + + +#ifdef XNU_KERNEL_PRIVATE +void kauth_filesec_acl_setendian(int, kauth_filesec_t, kauth_acl_t); +int kauth_copyinfilesec(user_addr_t xsecurity, kauth_filesec_t *xsecdestpp); +extern int kauth_acl_evaluate(kauth_cred_t _credential, kauth_acl_eval_t _eval); +extern int kauth_acl_inherit(vnode_t _dvp, kauth_acl_t _initial, kauth_acl_t *_product, int _isdir, vfs_context_t _ctx); + +#endif /* XNU_KERNEL_PRIVATE */ + + __END_DECLS /* @@ -489,9 +511,11 @@ __END_DECLS /* Actions */ #define KAUTH_GENERIC_ISSUSER 1 +#ifdef XNU_KERNEL_PRIVATE __BEGIN_DECLS extern int kauth_authorize_generic(kauth_cred_t credential, kauth_action_t action); __END_DECLS +#endif /* XNU_KERNEL_PRIVATE */ /* * Process/task scope. @@ -558,7 +582,9 @@ __END_DECLS #define KAUTH_FILEOP_CLOSE_MODIFIED (1<<1) __BEGIN_DECLS +#ifdef XNU_KERNEL_PRIVATE extern int kauth_authorize_fileop_has_listeners(void); +#endif /* XNU_KERNEL_PRIVATE */ extern int kauth_authorize_fileop(kauth_cred_t _credential, kauth_action_t _action, uintptr_t _arg0, uintptr_t _arg1); __END_DECLS @@ -722,6 +748,7 @@ void kprintf(const char *fmt, ...); * Initialisation. */ extern lck_grp_t *kauth_lck_grp; +#ifdef XNU_KERNEL_PRIVATE __BEGIN_DECLS extern void kauth_init(void) __attribute__((section("__TEXT, initcode"))); extern void kauth_identity_init(void) __attribute__((section("__TEXT, initcode"))); @@ -729,6 +756,8 @@ extern void kauth_groups_init(void) __attribute__((section("__TEXT, initcode"))) extern void kauth_cred_init(void) __attribute__((section("__TEXT, initcode"))); extern void kauth_resolver_init(void) __attribute__((section("__TEXT, initcode"))); __END_DECLS +#endif /* XNU_KERNEL_PRIVATE */ + #endif /* KERNEL */ #endif /* __APPLE_API_EVOLVING */ diff --git a/bsd/sys/kdebug.h b/bsd/sys/kdebug.h index a5251673f..009398d14 100644 --- a/bsd/sys/kdebug.h +++ b/bsd/sys/kdebug.h @@ -57,6 +57,8 @@ __BEGIN_DECLS #define DBG_CACHE_HIT_FAULT 4 #define DBG_NZF_PAGE_FAULT 5 #define DBG_GUARD_FAULT 6 +#define DBG_PAGEINV_FAULT 7 +#define DBG_PAGEIND_FAULT 8 /* The debug code consists of the following @@ -88,6 +90,7 @@ __BEGIN_DECLS #define DBG_DYLD 31 #define DBG_QT 32 #define DBG_APPS 33 +#define DBG_LAUNCHD 34 #define DBG_MIG 255 /* **** The Kernel Debug Sub Classes for Mach (DBG_MACH) **** */ @@ -120,9 +123,10 @@ __BEGIN_DECLS #define MACH_CALLOUT 0x4 /* callouts */ #define MACH_STACK_DETACH 0x5 #define MACH_MAKE_RUNNABLE 0x6 /* make thread runnable */ -#define MACH_PROMOTE 0x7 /* promoted due to resource */ -#define MACH_DEMOTE 0x8 /* promotion undone */ -#define MACH_IDLE 0x9 /* processor idling */ +#define MACH_PROMOTE 0x7 /* promoted due to resource */ +#define MACH_DEMOTE 0x8 /* promotion undone */ +#define MACH_IDLE 0x9 /* processor idling */ +#define MACH_STACK_DEPTH 0xa /* stack depth at switch */ /* Codes for pmap (DBG_MACH_PMAP) */ #define PMAP__CREATE 0x0 @@ -171,8 +175,9 @@ __BEGIN_DECLS #define DBG_IOMCURS 5 /* Memory Cursor */ #define DBG_IOMDESC 6 /* Memory Descriptors */ #define DBG_IOPOWER 7 /* Power Managerment */ +#define DBG_IOSERVICE 8 /* Matching etc. */ -/* **** 8-32 reserved for internal IOKit usage **** */ +/* **** 9-32 reserved for internal IOKit usage **** */ #define DBG_IOSTORAGE 32 /* Storage layers */ #define DBG_IONETWORK 33 /* Network layers */ @@ -234,16 +239,24 @@ __BEGIN_DECLS #define DBG_FSVN 3 /* vnode operations (inc. locking/unlocking) */ #define DBG_FSLOOOKUP 4 /* namei and other lookup-related operations */ #define DBG_JOURNAL 5 /* journaling operations */ +#define DBG_IOCTL 6 /* ioctl to the disk */ +#define DBG_BOOTCACHE 7 /* bootcache operations */ /* The Kernel Debug Sub Classes for BSD */ +#define DBG_BSD_PROC 0x01 /* process/signals related */ #define DBG_BSD_EXCP_SC 0x0C /* System Calls */ #define DBG_BSD_AIO 0x0D /* aio (POSIX async IO) */ #define DBG_BSD_SC_EXTENDED_INFO 0x0E /* System Calls, extended info */ #define DBG_BSD_SC_EXTENDED_INFO2 0x0F /* System Calls, extended info */ + +/* The Codes for BSD subcode class DBG_BSD_PROC */ +#define BSD_PROC_EXIT 1 /* process exit */ +#define BSD_PROC_FRCEXIT 2 /* Kernel force termination */ /* The Kernel Debug Sub Classes for DBG_TRACE */ #define DBG_TRACE_DATA 0 #define DBG_TRACE_STRING 1 +#define DBG_TRACE_INFO 2 /* The Kernel Debug Sub Classes for DBG_MISC */ #define DBG_EVENT 0x10 @@ -328,16 +341,18 @@ extern unsigned int kdebug_enable; #if (!defined(NO_KDEBUG)) -#define KERNEL_DEBUG_CONSTANT(x,a,b,c,d,e) \ -do { \ - if (kdebug_enable) \ - kernel_debug(x,a,b,c,d,e); \ +#define KERNEL_DEBUG_CONSTANT(x,a,b,c,d,e) \ +do { \ + if (kdebug_enable) \ + kernel_debug(x,(uintptr_t)a,(uintptr_t)b,(uintptr_t)c, \ + (uintptr_t)d,(uintptr_t)e); \ } while(0) -#define KERNEL_DEBUG_CONSTANT1(x,a,b,c,d,e) \ -do { \ - if (kdebug_enable) \ - kernel_debug1(x,a,b,c,d,e); \ +#define KERNEL_DEBUG_CONSTANT1(x,a,b,c,d,e) \ +do { \ + if (kdebug_enable) \ + kernel_debug1(x,(uintptr_t)a,(uintptr_t)b,(uintptr_t)c, \ + (uintptr_t)d,(uintptr_t)e); \ } while(0) #else @@ -348,32 +363,37 @@ do { \ #define __kdebug_constant_only __unused #endif -extern void kernel_debug(unsigned int debugid, unsigned int arg1, unsigned int arg2, unsigned int arg3, unsigned int arg4, unsigned int arg5); - -extern void kernel_debug1(unsigned int debugid, unsigned int arg1, unsigned int arg2, unsigned int arg3, unsigned int arg4, unsigned int arg5); +extern void kernel_debug( + uint32_t debugid, + uintptr_t arg1, + uintptr_t arg2, + uintptr_t arg3, + uintptr_t arg4, + uintptr_t arg5); -/* - * LP64todo - for some reason these are problematic - */ -struct proc; -extern void kdbg_trace_data(struct proc *proc, long *arg_pid); +extern void kernel_debug1( + uint32_t debugid, + uintptr_t arg1, + uintptr_t arg2, + uintptr_t arg3, + uintptr_t arg4, + uintptr_t arg5); -extern void kdbg_trace_string(struct proc *proc, long *arg1, long *arg2, long *arg3, long *arg4); #if (KDEBUG && (!defined(NO_KDEBUG))) -#define KERNEL_DEBUG(x,a,b,c,d,e) \ -do { \ - if (kdebug_enable) \ - kernel_debug((unsigned int)x, (unsigned int)a, (unsigned int)b, \ - (unsigned int)c, (unsigned int)d, (unsigned int)e); \ +#define KERNEL_DEBUG(x,a,b,c,d,e) \ +do { \ + if (kdebug_enable) \ + kernel_debug((uint32_t)x, (uintptr_t)a, (uintptr_t)b, \ + (uintptr_t)c, (uintptr_t)d, (uintptr_t)e); \ } while(0) -#define KERNEL_DEBUG1(x,a,b,c,d,e) \ -do { \ - if (kdebug_enable) \ - kernel_debug1((unsigned int)x, (unsigned int)a, (unsigned int)b, \ - (unsigned int)c, (unsigned int)d, (unsigned int)e); \ +#define KERNEL_DEBUG1(x,a,b,c,d,e) \ +do { \ + if (kdebug_enable) \ + kernel_debug1((uint32_t)x, (uintptr_t)a, (uintptr_t)b, \ + (uintptr_t)c, (uintptr_t)d, (uintptr_t)e); \ } while(0) #define __kdebug_only @@ -386,7 +406,16 @@ do { \ #define __kdebug_only __unused #endif +#ifdef KERNEL_PRIVATE +struct proc; +extern void kdbg_trace_data(struct proc *proc, long *arg_pid); + +extern void kdbg_trace_string(struct proc *proc, long *arg1, long *arg2, long *arg3, long *arg4); + +extern void kdbg_dump_trace_to_file(const char *); void start_kern_tracing(unsigned int); +#endif /* KERNEL_PRIVATE */ + #endif /* __APPLE_API_UNSTABLE */ __END_DECLS @@ -400,28 +429,90 @@ __END_DECLS typedef struct { uint64_t timestamp; - unsigned int arg1; - unsigned int arg2; - unsigned int arg3; - unsigned int arg4; - unsigned int arg5; /* will hold current thread */ - unsigned int debugid; + uintptr_t arg1; + uintptr_t arg2; + uintptr_t arg3; + uintptr_t arg4; + uintptr_t arg5; /* will hold current thread */ + uint32_t debugid; +#if defined(__LP64__) + uint32_t cpuid; + uintptr_t unused; +#endif } kd_buf; -#define KDBG_TIMESTAMP_MASK 0x00ffffffffffffffULL -#define KDBG_CPU_MASK 0x0f00000000000000ULL -#define KDBG_CPU_SHIFT 56 +#if !defined(__LP64__) +#define KDBG_TIMESTAMP_MASK 0x00ffffffffffffffULL +#define KDBG_CPU_MASK 0x0f00000000000000ULL +#define KDBG_CPU_SHIFT 56 +static inline void +kdbg_set_cpu(kd_buf *kp, int cpu) +{ + kp->timestamp = (kp->timestamp & KDBG_TIMESTAMP_MASK) | + (((uint64_t) cpu) << KDBG_CPU_SHIFT); +} +static inline int +kdbg_get_cpu(kd_buf *kp) +{ + return (int) (((kp)->timestamp & KDBG_CPU_MASK) >> KDBG_CPU_SHIFT); +} +static inline void +kdbg_set_timestamp(kd_buf *kp, uint64_t time) +{ + kp->timestamp = time & KDBG_TIMESTAMP_MASK; +} +static inline uint64_t +kdbg_get_timestamp(kd_buf *kp) +{ + return kp->timestamp & KDBG_TIMESTAMP_MASK; +} +static inline void +kdbg_set_timestamp_and_cpu(kd_buf *kp, uint64_t time, int cpu) +{ + kp->timestamp = (time & KDBG_TIMESTAMP_MASK) | + (((uint64_t) cpu) << KDBG_CPU_SHIFT); +} +#else +#define KDBG_TIMESTAMP_MASK 0xffffffffffffffffULL +static inline void +kdbg_set_cpu(kd_buf *kp, int cpu) +{ + kp->cpuid = cpu; +} +static inline int +kdbg_get_cpu(kd_buf *kp) +{ + return kp->cpuid; +} +static inline void +kdbg_set_timestamp(kd_buf *kp, uint64_t time) +{ + kp->timestamp = time; +} +static inline uint64_t +kdbg_get_timestamp(kd_buf *kp) +{ + return kp->timestamp; +} +static inline void +kdbg_set_timestamp_and_cpu(kd_buf *kp, uint64_t time, int cpu) +{ + kdbg_set_timestamp(kp, time); + kdbg_set_cpu(kp, cpu); +} +#endif /* Debug Flags */ -#define KDBG_INIT 0x1 -#define KDBG_NOWRAP 0x2 -#define KDBG_FREERUN 0x4 -#define KDBG_WRAPPED 0x8 +#define KDBG_INIT 0x001 +#define KDBG_NOWRAP 0x002 +#define KDBG_FREERUN 0x004 +#define KDBG_WRAPPED 0x008 #define KDBG_USERFLAGS (KDBG_FREERUN|KDBG_NOWRAP|KDBG_INIT) -#define KDBG_PIDCHECK 0x10 -#define KDBG_MAPINIT 0x20 -#define KDBG_PIDEXCLUDE 0x40 -#define KDBG_LOCKINIT 0x80 +#define KDBG_PIDCHECK 0x010 +#define KDBG_MAPINIT 0x020 +#define KDBG_PIDEXCLUDE 0x040 +#define KDBG_LOCKINIT 0x080 +#define KDBG_LP64 0x100 typedef struct { unsigned int type; @@ -434,18 +525,17 @@ typedef struct { typedef struct { - int nkdbufs; - int nolog; - int flags; - int nkdthreads; - int bufid; + int nkdbufs; + int nolog; + int flags; + int nkdthreads; + int bufid; } kbufinfo_t; -typedef struct -{ - unsigned int thread; - int valid; - char command[20]; +typedef struct { + uintptr_t thread; + int valid; + char command[20]; } kd_threadmap; #define KDBG_CLASSTYPE 0x10000 diff --git a/bsd/sys/kern_callout.h b/bsd/sys/kern_callout.h new file mode 100644 index 000000000..6ac7642cd --- /dev/null +++ b/bsd/sys/kern_callout.h @@ -0,0 +1,68 @@ +/* + * Copyright (c) 2008 Apple Computer, Inc. All rights reserved. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. The rights granted to you under the License + * may not be used to create, or enable the creation or redistribution of, + * unlawful or unlicensed copies of an Apple operating system, or to + * circumvent, violate, or enable the circumvention or violation of, any + * terms of an Apple operating system software license agreement. + * + * Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ + */ + +#ifndef KPI_KERN_CALLOUT_H +#define KPI_KERN_CALLOUT_H + +#ifdef KERNEL + +/* + * Default sample threshold for validity + */ +#define MA_SMA_SAMPLES 10 /* simple moving average */ + +/* + * Flags bits for the ma_flags field + */ +#define KCO_MA_F_SMA 0x00000001 /* Simple moving average */ +#define KCO_MA_F_WMA 0x00000002 /* Weighted moving average */ +#define KCO_MA_F_NEEDS_INIT 0x80000000 /* Need initialization */ + +struct kco_moving_average { + int ma_flags; /* flags */ + uint64_t ma_sma; /* simple over MA_SMA_SAMPLES*/ + uint64_t ma_old_sma; /* previous value */ + uint64_t ma_sma_samples[MA_SMA_SAMPLES]; /* sample history */ + int32_t ma_sma_threshold; /* trigger delta (%) */ + int ma_sma_trigger_count; /* number of time triggered */ + uint64_t ma_wma; /* weighted */ + uint64_t ma_old_wma; /* previous value */ + int ma_wma_weight; /* weighting (< 100) */ + int32_t ma_wma_threshold; /* trigger delta (%) */ + int ma_wma_trigger_count; /* number of time triggered */ +}; + +__BEGIN_DECLS +int kco_ma_addsample(struct kco_moving_average *map, uint64_t sample_time); +void kco_ma_init(struct kco_moving_average *map, int32_t threshold, int kind); +int kco_ma_info(struct kco_moving_average *map, int kind, uint64_t *averagep, uint64_t *old_averagep, int32_t *thresholdp, int *countp); +__END_DECLS + +#endif /* KERNEL */ + +#endif /* KPI_KERN_CONTROL_H */ diff --git a/bsd/sys/kern_event.h b/bsd/sys/kern_event.h index 7a658ac9a..393638877 100644 --- a/bsd/sys/kern_event.h +++ b/bsd/sys/kern_event.h @@ -94,6 +94,12 @@ */ #define KEV_FIREWALL_CLASS 5 +/*! + @defined KEV_IEEE80211_CLASS + @discussion IEEE 802.11 kernel event class. +*/ +#define KEV_IEEE80211_CLASS 6 + /*! @struct kern_event_msg @discussion This structure is prepended to all kernel events. This structure diff --git a/bsd/sys/kern_memorystatus.h b/bsd/sys/kern_memorystatus.h index f94c4a7a7..3abe336cc 100644 --- a/bsd/sys/kern_memorystatus.h +++ b/bsd/sys/kern_memorystatus.h @@ -33,6 +33,12 @@ #ifndef SYS_KERN_MEMORYSTATUS_H #define SYS_KERN_MEMORYSTATUS_H +#ifndef MACH_KERNEL_PRIVATE + +#include +#include +#include + /* * Define Memory Status event subclass. * Subclass of KEV_SYSTEM_CLASS @@ -44,6 +50,11 @@ */ #define KEV_MEMORYSTATUS_SUBCLASS 3 +enum { + kMemoryStatusLevelNote = 1, + kMemoryStatusSnapshotNote = 2 +}; + enum { kMemoryStatusLevelAny = -1, kMemoryStatusLevelNormal = 0, @@ -52,6 +63,54 @@ enum { kMemoryStatusLevelCritical = 3 }; +typedef struct jetsam_priority_entry { + pid_t pid; + uint32_t flags; +} jetsam_priority_entry_t; + +/* +** maximum killable processes to keep track of +*/ +#define kMaxPriorityEntries 64 + +typedef struct jetsam_snapshot_entry { + pid_t pid; + char name[MAXCOMLEN+1]; + uint32_t pages; + uint32_t flags; + uint8_t uuid[16]; +} jetsam_snapshot_entry_t; + +/* +** how many processes to snapshot +*/ +#define kMaxSnapshotEntries 128 + +typedef struct jetsam_kernel_stats { + uint32_t free_pages; + uint32_t active_pages; + uint32_t inactive_pages; + uint32_t purgeable_pages; + uint32_t wired_pages; +} jetsam_kernel_stats_t; + +/* +** This is a variable-length struct. +** Allocate a buffer of the size returned by the sysctl, cast to a jetsam_snapshot_t * +*/ + +typedef struct jetsam_snapshot { + jetsam_kernel_stats_t stats; + size_t entry_count; + jetsam_snapshot_entry_t entries[1]; +} jetsam_snapshot_t; + +enum { + kJetsamFlagsFrontmost = (1 << 0), + kJetsamFlagsKilled = (1 << 1) +}; +#endif /* !MACH_KERNEL_PRIVATE */ + #ifdef KERNEL extern void kern_memorystatus_init(void) __attribute__((section("__TEXT, initcode"))); diff --git a/bsd/sys/kernel_types.h b/bsd/sys/kernel_types.h index a285994de..3577f436a 100644 --- a/bsd/sys/kernel_types.h +++ b/bsd/sys/kernel_types.h @@ -23,16 +23,14 @@ typedef struct buf * buf_t; struct file; typedef struct file * file_t; +#ifndef __LP64__ struct ucred; typedef struct ucred * ucred_t; +#endif struct mount; typedef struct mount * mount_t; -#ifdef TBDDDD -typedef struct fsid { int32_t val[2]; } fsid_t; /* file system id type */ -#endif /* TBDDDD */ - struct vnode; typedef struct vnode * vnode_t; @@ -72,7 +70,9 @@ typedef struct __rtentry* route_t; typedef struct buf * buf_t; typedef struct file * file_t; +#ifndef __LP64__ typedef struct ucred * ucred_t; +#endif typedef struct mount * mount_t; typedef struct vnode * vnode_t; typedef struct proc * proc_t; diff --git a/bsd/sys/kpi_mbuf.h b/bsd/sys/kpi_mbuf.h index 4e2b57329..e1558fa66 100644 --- a/bsd/sys/kpi_mbuf.h +++ b/bsd/sys/kpi_mbuf.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2004-2007 Apple Inc. All rights reserved. + * Copyright (c) 2008 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -29,14 +29,14 @@ @header kpi_mbuf.h This header defines an API for interacting with mbufs. mbufs are the primary method of storing packets in the networking stack. - + mbufs are used to store various items in the networking stack. The most common usage of an mbuf is to store a packet or data on a socket waiting to be sent or received. The mbuf is a contiguous structure with some header followed by some data. To store more data than would fit in an mbuf, external data is used. Most mbufs with external data use clusters to store the external data. - + mbufs can be chained, contiguous data in a packet can be found by following the m_next chain. Packets may be bundled together using m_nextpacket. Many parts of the stack do not properly handle chains @@ -69,13 +69,13 @@ being passed to any interface filters. */ enum { - MBUF_EXT = 0x0001, /* has associated external storage */ - MBUF_PKTHDR = 0x0002, /* start of record */ - MBUF_EOR = 0x0004, /* end of record */ - - MBUF_BCAST = 0x0100, /* send/received as link-level broadcast */ - MBUF_MCAST = 0x0200, /* send/received as link-level multicast */ - MBUF_FRAG = 0x0400, /* packet is a fragment of a larger packet */ + MBUF_EXT = 0x0001, /* has associated external storage */ + MBUF_PKTHDR = 0x0002, /* start of record */ + MBUF_EOR = 0x0004, /* end of record */ + + MBUF_BCAST = 0x0100, /* send/received as link-level broadcast */ + MBUF_MCAST = 0x0200, /* send/received as link-level multicast */ + MBUF_FRAG = 0x0400, /* packet is a fragment of a larger packet */ MBUF_FIRSTFRAG = 0x0800, /* packet is first fragment */ MBUF_LASTFRAG = 0x1000, /* packet is last fragment */ MBUF_PROMISC = 0x2000 /* packet is promiscuous */ @@ -119,7 +119,7 @@ enum { MBUF_TYPE_PCB = 4, /* protocol control block */ MBUF_TYPE_RTABLE = 5, /* routing tables */ MBUF_TYPE_HTABLE = 6, /* IMP host tables */ - MBUF_TYPE_ATABLE = 7, /* address resolution tables */ + MBUF_TYPE_ATABLE = 7, /* address resolution tables */ MBUF_TYPE_SONAME = 8, /* socket name */ MBUF_TYPE_SOOPTS = 10, /* socket options */ MBUF_TYPE_FTABLE = 11, /* fragment reassembly header */ @@ -146,10 +146,16 @@ typedef u_int32_t mbuf_type_t; @constant MBUF_CSUM_REQ_UDP Indicates the UDP checksum has not been calculated yet. */ +enum { + MBUF_TSO_IPV4 = 0x100000, + MBUF_TSO_IPV6 = 0x200000 +}; +typedef u_int32_t mbuf_tso_request_flags_t; + enum { #ifdef KERNEL_PRIVATE MBUF_CSUM_REQ_SUM16 = 0x1000, /* Weird apple hardware checksum */ -#endif KERNEL_PRIVATE +#endif /* KERNEL_PRIVATE */ MBUF_CSUM_REQ_IP = 0x0001, MBUF_CSUM_REQ_TCP = 0x0002, MBUF_CSUM_REQ_UDP = 0x0004 @@ -181,11 +187,11 @@ typedef u_int32_t mbuf_csum_request_flags_t; */ enum { #ifdef KERNEL_PRIVATE - MBUF_CSUM_TCP_SUM16 = MBUF_CSUM_REQ_SUM16, /* Weird apple hardware checksum */ -#endif - MBUF_CSUM_DID_IP = 0x0100, - MBUF_CSUM_IP_GOOD = 0x0200, - MBUF_CSUM_DID_DATA = 0x0400, + MBUF_CSUM_TCP_SUM16 = MBUF_CSUM_REQ_SUM16, /* Weird apple hardware checksum */ +#endif /* KERNEL_PRIVATE */ + MBUF_CSUM_DID_IP = 0x0100, + MBUF_CSUM_IP_GOOD = 0x0200, + MBUF_CSUM_DID_DATA = 0x0400, MBUF_CSUM_PSEUDO_HDR = 0x0800 }; typedef u_int32_t mbuf_csum_performed_flags_t; @@ -204,8 +210,8 @@ typedef u_int32_t mbuf_csum_performed_flags_t; block, if blocking is necessary fail and return immediately. */ enum { - MBUF_WAITOK = 0, /* Ok to block to get memory */ - MBUF_DONTWAIT = 1 /* Don't block, fail if blocking would be required */ + MBUF_WAITOK = 0, /* Ok to block to get memory */ + MBUF_DONTWAIT = 1 /* Don't block, fail if blocking would be required */ }; typedef u_int32_t mbuf_how_t; @@ -235,23 +241,23 @@ typedef u_int16_t mbuf_tag_type_t; @field bigmclbytes Length of a big mbuf cluster. */ struct mbuf_stat { - u_long mbufs; /* mbufs obtained from page pool */ - u_long clusters; /* clusters obtained from page pool */ - u_long clfree; /* free clusters */ - u_long drops; /* times failed to find space */ - u_long wait; /* times waited for space */ - u_long drain; /* times drained protocols for space */ - u_short mtypes[256]; /* type specific mbuf allocations */ - u_long mcfail; /* times m_copym failed */ - u_long mpfail; /* times m_pullup failed */ - u_long msize; /* length of an mbuf */ - u_long mclbytes; /* length of an mbuf cluster */ - u_long minclsize; /* min length of data to allocate a cluster */ - u_long mlen; /* length of data in an mbuf */ - u_long mhlen; /* length of data in a header mbuf */ - u_long bigclusters; /* number of big clusters */ - u_long bigclfree; /* number of big clustser free */ - u_long bigmclbytes; /* length of data in a big cluster */ + u_int32_t mbufs; /* mbufs obtained from page pool */ + u_int32_t clusters; /* clusters obtained from page pool */ + u_int32_t clfree; /* free clusters */ + u_int32_t drops; /* times failed to find space */ + u_int32_t wait; /* times waited for space */ + u_int32_t drain; /* times drained protocols for space */ + u_short mtypes[256]; /* type specific mbuf allocations */ + u_int32_t mcfail; /* times m_copym failed */ + u_int32_t mpfail; /* times m_pullup failed */ + u_int32_t msize; /* length of an mbuf */ + u_int32_t mclbytes; /* length of an mbuf cluster */ + u_int32_t minclsize; /* min length of data to allocate a cluster */ + u_int32_t mlen; /* length of data in an mbuf */ + u_int32_t mhlen; /* length of data in a header mbuf */ + u_int32_t bigclusters; /* number of big clusters */ + u_int32_t bigclfree; /* number of big clustser free */ + u_int32_t bigmclbytes; /* length of data in a big cluster */ }; /* Parameter for m_copym to copy all bytes */ @@ -277,7 +283,7 @@ __BEGIN_DECLS @param mbuf The mbuf. @result A pointer to the data in the mbuf. */ -void* mbuf_data(mbuf_t mbuf); +extern void *mbuf_data(mbuf_t mbuf); /*! @function mbuf_datastart @@ -285,12 +291,12 @@ void* mbuf_data(mbuf_t mbuf); data in an mbuf. An mbuf's data may come from a cluster or be embedded in the mbuf structure itself. The data pointer retrieved by mbuf_data may not be at the start of the data - (mbuf_leadingspace will be non-zero). This function will return to - you a pointer that matches mbuf_data() - mbuf_leadingspace(). + (mbuf_leadingspace will be non-zero). This function will return + a pointer that matches mbuf_data() - mbuf_leadingspace(). @param mbuf The mbuf. @result A pointer to smallest possible value for data. */ -void* mbuf_datastart(mbuf_t mbuf); +extern void *mbuf_datastart(mbuf_t mbuf); /*! @function mbuf_setdata @@ -306,7 +312,7 @@ void* mbuf_datastart(mbuf_t mbuf); @param len The new length of data in the mbuf. @result 0 on success, errno error on failure. */ -errno_t mbuf_setdata(mbuf_t mbuf, void *data, size_t len); +extern errno_t mbuf_setdata(mbuf_t mbuf, void *data, size_t len); /*! @function mbuf_align_32 @@ -319,7 +325,7 @@ errno_t mbuf_setdata(mbuf_t mbuf, void *data, size_t len); data location. @result 0 on success, errno error on failure. */ -errno_t mbuf_align_32(mbuf_t mbuf, size_t len); +extern errno_t mbuf_align_32(mbuf_t mbuf, size_t len); /*! @function mbuf_data_to_physical @@ -338,7 +344,7 @@ errno_t mbuf_align_32(mbuf_t mbuf, size_t len); @result The 64 bit physical address of the mbuf data or NULL if ptr does not point to data stored in an mbuf. */ -addr64_t mbuf_data_to_physical(void* ptr); +extern addr64_t mbuf_data_to_physical(void *ptr); /* Allocation */ @@ -351,7 +357,7 @@ addr64_t mbuf_data_to_physical(void* ptr); @param mbuf The mbuf. @result 0 on success, errno error on failure. */ -errno_t mbuf_get(mbuf_how_t how, mbuf_type_t type, mbuf_t* mbuf); +extern errno_t mbuf_get(mbuf_how_t how, mbuf_type_t type, mbuf_t *mbuf); /*! @function mbuf_gethdr @@ -363,7 +369,7 @@ errno_t mbuf_get(mbuf_how_t how, mbuf_type_t type, mbuf_t* mbuf); @param mbuf The mbuf. @result 0 on success, errno error on failure. */ -errno_t mbuf_gethdr(mbuf_how_t how, mbuf_type_t type, mbuf_t* mbuf); +extern errno_t mbuf_gethdr(mbuf_how_t how, mbuf_type_t type, mbuf_t *mbuf); /*! @function mbuf_attachcluster @@ -391,9 +397,8 @@ errno_t mbuf_gethdr(mbuf_how_t how, mbuf_type_t type, mbuf_t* mbuf); EINVAL - Invalid parameter ENOMEM - Not enough memory available */ -errno_t -mbuf_attachcluster(mbuf_how_t how, mbuf_type_t type, mbuf_t *mbuf, - caddr_t extbuf, void (*extfree)(caddr_t , u_int, caddr_t), +extern errno_t mbuf_attachcluster(mbuf_how_t how, mbuf_type_t type, + mbuf_t *mbuf, caddr_t extbuf, void (*extfree)(caddr_t , u_int, caddr_t), size_t extsize, caddr_t extarg); /*! @@ -419,7 +424,7 @@ mbuf_attachcluster(mbuf_how_t how, mbuf_type_t type, mbuf_t *mbuf, In this case, the caller is advised to use 4096 bytes or smaller during subseqent requests. */ -errno_t mbuf_alloccluster(mbuf_how_t how, size_t *size, caddr_t *addr); +extern errno_t mbuf_alloccluster(mbuf_how_t how, size_t *size, caddr_t *addr); /*! @function mbuf_freecluster @@ -430,7 +435,7 @@ errno_t mbuf_alloccluster(mbuf_how_t how, size_t *size, caddr_t *addr); @param addr The address of the cluster. @param size The actual size of the cluster. */ -void mbuf_freecluster(caddr_t addr, size_t size); +extern void mbuf_freecluster(caddr_t addr, size_t size); /*! @function mbuf_getcluster @@ -459,7 +464,8 @@ void mbuf_freecluster(caddr_t addr, size_t size); In this case, the caller is advised to use 4096 bytes or smaller during subsequent requests. */ -errno_t mbuf_getcluster(mbuf_how_t how, mbuf_type_t type, size_t size, mbuf_t* mbuf); +extern errno_t mbuf_getcluster(mbuf_how_t how, mbuf_type_t type, size_t size, + mbuf_t *mbuf); /*! @function mbuf_mclget @@ -476,7 +482,7 @@ errno_t mbuf_getcluster(mbuf_how_t how, mbuf_type_t type, size_t size, mbuf_t* will be freed. If you specify an mbuf value in *mbuf, mbuf_mclget will not free it. */ -errno_t mbuf_mclget(mbuf_how_t how, mbuf_type_t type, mbuf_t* mbuf); +extern errno_t mbuf_mclget(mbuf_how_t how, mbuf_type_t type, mbuf_t *mbuf); /*! @function mbuf_allocpacket @@ -513,7 +519,8 @@ errno_t mbuf_mclget(mbuf_how_t how, mbuf_type_t type, mbuf_t* mbuf); ENOBUFS - Buffers not big enough for the maximum number of chunks requested */ -errno_t mbuf_allocpacket(mbuf_how_t how, size_t packetlen, unsigned int * maxchunks, mbuf_t *mbuf); +extern errno_t mbuf_allocpacket(mbuf_how_t how, size_t packetlen, + unsigned int * maxchunks, mbuf_t *mbuf); /*! @function mbuf_allocpacket_list @@ -551,7 +558,8 @@ errno_t mbuf_allocpacket(mbuf_how_t how, size_t packetlen, unsigned int * maxchu ENOBUFS - Buffers not big enough for the maximum number of chunks requested */ -errno_t mbuf_allocpacket_list(unsigned int numpkts, mbuf_how_t how, size_t packetlen, unsigned int * maxchunks, mbuf_t *mbuf); +extern errno_t mbuf_allocpacket_list(unsigned int numpkts, mbuf_how_t how, + size_t packetlen, unsigned int * maxchunks, mbuf_t *mbuf); /*! @@ -562,7 +570,7 @@ errno_t mbuf_allocpacket_list(unsigned int numpkts, mbuf_how_t how, size_t packe @param mbuf Upon success, *mbuf will be a reference to the new mbuf. @result 0 on success, errno error on failure. */ -errno_t mbuf_getpacket(mbuf_how_t how, mbuf_t* mbuf); +extern errno_t mbuf_getpacket(mbuf_how_t how, mbuf_t *mbuf); /*! @function mbuf_free @@ -571,14 +579,14 @@ errno_t mbuf_getpacket(mbuf_how_t how, mbuf_t* mbuf); @param mbuf The mbuf to free. @result The next mbuf in the chain. */ -mbuf_t mbuf_free(mbuf_t mbuf); +extern mbuf_t mbuf_free(mbuf_t mbuf); /*! @function mbuf_freem @discussion Frees a chain of mbufs link through mnext. @param mbuf The first mbuf in the chain to free. */ -void mbuf_freem(mbuf_t mbuf); +extern void mbuf_freem(mbuf_t mbuf); /*! @function mbuf_freem_list @@ -587,7 +595,7 @@ void mbuf_freem(mbuf_t mbuf); @param mbuf The first mbuf in the linked list to free. @result The number of mbufs freed. */ -int mbuf_freem_list(mbuf_t mbuf); +extern int mbuf_freem_list(mbuf_t mbuf); /*! @function mbuf_leadingspace @@ -596,7 +604,7 @@ int mbuf_freem_list(mbuf_t mbuf); @param mbuf The mbuf. @result The number of unused bytes at the start of the mbuf. */ -size_t mbuf_leadingspace(const mbuf_t mbuf); +extern size_t mbuf_leadingspace(const mbuf_t mbuf); /*! @function mbuf_trailingspace @@ -605,7 +613,7 @@ size_t mbuf_leadingspace(const mbuf_t mbuf); @param mbuf The mbuf. @result The number of unused bytes following the current data. */ -size_t mbuf_trailingspace(const mbuf_t mbuf); +extern size_t mbuf_trailingspace(const mbuf_t mbuf); /* Manipulation */ @@ -623,8 +631,8 @@ size_t mbuf_trailingspace(const mbuf_t mbuf); @param new_mbuf Upon success, the newly allocated mbuf. @result 0 upon success otherwise the errno error. */ -errno_t mbuf_copym(const mbuf_t src, size_t offset, size_t len, - mbuf_how_t how, mbuf_t* new_mbuf); +extern errno_t mbuf_copym(const mbuf_t src, size_t offset, size_t len, + mbuf_how_t how, mbuf_t *new_mbuf); /*! @function mbuf_dup @@ -637,7 +645,7 @@ errno_t mbuf_copym(const mbuf_t src, size_t offset, size_t len, @param new_mbuf Upon success, the newly allocated mbuf. @result 0 upon success otherwise the errno error. */ -errno_t mbuf_dup(const mbuf_t src, mbuf_how_t how, mbuf_t* new_mbuf); +extern errno_t mbuf_dup(const mbuf_t src, mbuf_how_t how, mbuf_t *new_mbuf); /*! @function mbuf_prepend @@ -652,7 +660,7 @@ errno_t mbuf_dup(const mbuf_t src, mbuf_how_t how, mbuf_t* new_mbuf); @param how Blocking or non-blocking. @result 0 upon success otherwise the errno error. */ -errno_t mbuf_prepend(mbuf_t* mbuf, size_t len, mbuf_how_t how); +extern errno_t mbuf_prepend(mbuf_t *mbuf, size_t len, mbuf_how_t how); /*! @function mbuf_split @@ -667,8 +675,8 @@ errno_t mbuf_prepend(mbuf_t* mbuf, size_t len, mbuf_how_t how); failure, the original mbuf chain passed in to src will be preserved. */ -errno_t mbuf_split(mbuf_t src, size_t offset, - mbuf_how_t how, mbuf_t* new_mbuf); +extern errno_t mbuf_split(mbuf_t src, size_t offset, mbuf_how_t how, + mbuf_t *new_mbuf); /*! @function mbuf_pullup @@ -681,7 +689,7 @@ errno_t mbuf_split(mbuf_t src, size_t offset, @result 0 upon success otherwise the errno error. In the case of an error, the mbuf chain has been freed. */ -errno_t mbuf_pullup(mbuf_t* mbuf, size_t len); +extern errno_t mbuf_pullup(mbuf_t *mbuf, size_t len); /*! @function mbuf_pulldown @@ -701,7 +709,8 @@ errno_t mbuf_pullup(mbuf_t* mbuf, size_t len); in. @result 0 upon success otherwise the errno error. */ -errno_t mbuf_pulldown(mbuf_t src, size_t *offset, size_t length, mbuf_t *location); +extern errno_t mbuf_pulldown(mbuf_t src, size_t *offset, size_t length, + mbuf_t *location); /*! @function mbuf_adj @@ -712,7 +721,7 @@ errno_t mbuf_pulldown(mbuf_t src, size_t *offset, size_t length, mbuf_t *locati @param mbuf The mbuf chain to trim. @param len The number of bytes to trim from the mbuf chain. */ -void mbuf_adj(mbuf_t mbuf, int len); +extern void mbuf_adj(mbuf_t mbuf, int len); /*! @function mbuf_adjustlen @@ -725,7 +734,26 @@ void mbuf_adj(mbuf_t mbuf, int len); @param amount The number of bytes increment the length by. @result 0 upon success otherwise the errno error. */ -errno_t mbuf_adjustlen(mbuf_t mbuf, int amount); +extern errno_t mbuf_adjustlen(mbuf_t mbuf, int amount); + +/*! + @function mbuf_concatenate + @discussion Concatenate mbuf chain src to dst using m_next and return + a chain which represents the concatenated chain. The routine + does not prevent two chains of different mbuf types to be + concatenated, nor does it modify any packet header in the + destination chain. Therefore, it's the responsibility of the + caller to ensure that the resulted concatenated mbuf chain is + correct for further usages. + @param dst The destination mbuf chain. + @param src The source mbuf chain. + @result A pointer to the head of the concatenated mbuf chain. This + should be treated as the updated destination mbuf chain; the + caller must no longer refer to the original src or dst mbuf + chain. Otherwise it returns NULL if the original dst mbuf + chain is NULL. + */ +extern mbuf_t mbuf_concatenate(mbuf_t dst, mbuf_t src); /*! @function mbuf_copydata @@ -739,18 +767,19 @@ errno_t mbuf_adjustlen(mbuf_t mbuf, int amount); copied. @result 0 upon success otherwise the errno error. */ -errno_t mbuf_copydata(const mbuf_t mbuf, size_t offset, size_t length, void* out_data); +extern errno_t mbuf_copydata(const mbuf_t mbuf, size_t offset, size_t length, + void *out_data); /*! @function mbuf_copyback @discussion Copies data from a buffer to an mbuf chain. mbuf_copyback will grow the chain to fit the specified buffer. - + If mbuf_copydata is unable to allocate enough mbufs to grow the chain, ENOBUFS will be returned. The mbuf chain will be shorter than expected but all of the data up to the end of the mbuf chain will be valid. - + If an offset is specified, mbuf_copyback will skip that many bytes in the mbuf chain before starting to write the buffer in to the chain. If the mbuf chain does not contain this many @@ -763,8 +792,8 @@ errno_t mbuf_copydata(const mbuf_t mbuf, size_t offset, size_t length, void* ou @param how Blocking or non-blocking. @result 0 upon success, EINVAL or ENOBUFS upon failure. */ -errno_t mbuf_copyback(mbuf_t mbuf, size_t offset, size_t length, - const void *data, mbuf_how_t how); +extern errno_t mbuf_copyback(mbuf_t mbuf, size_t offset, size_t length, + const void *data, mbuf_how_t how); /*! @function mbuf_mclhasreference @@ -774,7 +803,7 @@ errno_t mbuf_copyback(mbuf_t mbuf, size_t offset, size_t length, @param mbuf The mbuf with the cluster to test. @result 0 if there is no reference by another mbuf, 1 otherwise. */ -int mbuf_mclhasreference(mbuf_t mbuf); +extern int mbuf_mclhasreference(mbuf_t mbuf); /* mbuf header */ @@ -785,7 +814,7 @@ int mbuf_mclhasreference(mbuf_t mbuf); @param mbuf The mbuf. @result The next mbuf in the chain. */ -mbuf_t mbuf_next(const mbuf_t mbuf); +extern mbuf_t mbuf_next(const mbuf_t mbuf); /*! @function mbuf_setnext @@ -794,7 +823,7 @@ mbuf_t mbuf_next(const mbuf_t mbuf); @param next The new next mbuf. @result 0 upon success otherwise the errno error. */ -errno_t mbuf_setnext(mbuf_t mbuf, mbuf_t next); +extern errno_t mbuf_setnext(mbuf_t mbuf, mbuf_t next); /*! @function mbuf_nextpkt @@ -802,7 +831,7 @@ errno_t mbuf_setnext(mbuf_t mbuf, mbuf_t next); @param mbuf The mbuf. @result The nextpkt. */ -mbuf_t mbuf_nextpkt(const mbuf_t mbuf); +extern mbuf_t mbuf_nextpkt(const mbuf_t mbuf); /*! @function mbuf_setnextpkt @@ -810,7 +839,7 @@ mbuf_t mbuf_nextpkt(const mbuf_t mbuf); @param mbuf The mbuf. @param nextpkt The new next packet. */ -void mbuf_setnextpkt(mbuf_t mbuf, mbuf_t nextpkt); +extern void mbuf_setnextpkt(mbuf_t mbuf, mbuf_t nextpkt); /*! @function mbuf_len @@ -818,7 +847,7 @@ void mbuf_setnextpkt(mbuf_t mbuf, mbuf_t nextpkt); @param mbuf The mbuf. @result The length. */ -size_t mbuf_len(const mbuf_t mbuf); +extern size_t mbuf_len(const mbuf_t mbuf); /*! @function mbuf_setlen @@ -828,7 +857,7 @@ size_t mbuf_len(const mbuf_t mbuf); @param len The new length. @result 0 upon success otherwise the errno error. */ -void mbuf_setlen(mbuf_t mbuf, size_t len); +extern void mbuf_setlen(mbuf_t mbuf, size_t len); /*! @function mbuf_maxlen @@ -839,7 +868,7 @@ void mbuf_setlen(mbuf_t mbuf, size_t len); @param mbuf The mbuf. @result The maximum lenght of data for this mbuf. */ -size_t mbuf_maxlen(const mbuf_t mbuf); +extern size_t mbuf_maxlen(const mbuf_t mbuf); /*! @function mbuf_type @@ -847,7 +876,7 @@ size_t mbuf_maxlen(const mbuf_t mbuf); @param mbuf The mbuf. @result The type. */ -mbuf_type_t mbuf_type(const mbuf_t mbuf); +extern mbuf_type_t mbuf_type(const mbuf_t mbuf); /*! @function mbuf_settype @@ -856,7 +885,7 @@ mbuf_type_t mbuf_type(const mbuf_t mbuf); @param new_type The new type. @result 0 upon success otherwise the errno error. */ -errno_t mbuf_settype(mbuf_t mbuf, mbuf_type_t new_type); +extern errno_t mbuf_settype(mbuf_t mbuf, mbuf_type_t new_type); /*! @function mbuf_flags @@ -864,7 +893,7 @@ errno_t mbuf_settype(mbuf_t mbuf, mbuf_type_t new_type); @param mbuf The mbuf. @result The flags. */ -mbuf_flags_t mbuf_flags(const mbuf_t mbuf); +extern mbuf_flags_t mbuf_flags(const mbuf_t mbuf); /*! @function mbuf_setflags @@ -873,7 +902,7 @@ mbuf_flags_t mbuf_flags(const mbuf_t mbuf); @param flags The flags that should be set, all other flags will be cleared. @result 0 upon success otherwise the errno error. */ -errno_t mbuf_setflags(mbuf_t mbuf, mbuf_flags_t flags); +extern errno_t mbuf_setflags(mbuf_t mbuf, mbuf_flags_t flags); /*! @function mbuf_setflags_mask @@ -884,8 +913,8 @@ errno_t mbuf_setflags(mbuf_t mbuf, mbuf_flags_t flags); @param mask The mask controlling which flags will be modified. @result 0 upon success otherwise the errno error. */ -errno_t mbuf_setflags_mask(mbuf_t mbuf, mbuf_flags_t flags, - mbuf_flags_t mask); +extern errno_t mbuf_setflags_mask(mbuf_t mbuf, mbuf_flags_t flags, + mbuf_flags_t mask); /*! @function mbuf_copy_pkthdr @@ -894,7 +923,7 @@ errno_t mbuf_setflags_mask(mbuf_t mbuf, mbuf_flags_t flags, @param mbuf The mbuf to which the packet header will be copied. @result 0 upon success otherwise the errno error. */ -errno_t mbuf_copy_pkthdr(mbuf_t dest, const mbuf_t src); +extern errno_t mbuf_copy_pkthdr(mbuf_t dest, const mbuf_t src); /*! @function mbuf_pkthdr_len @@ -903,7 +932,7 @@ errno_t mbuf_copy_pkthdr(mbuf_t dest, const mbuf_t src); be changed. @result The length, in bytes, of the packet. */ -size_t mbuf_pkthdr_len(const mbuf_t mbuf); +extern size_t mbuf_pkthdr_len(const mbuf_t mbuf); /*! @function mbuf_pkthdr_setlen @@ -911,7 +940,7 @@ size_t mbuf_pkthdr_len(const mbuf_t mbuf); @param mbuf The mbuf containing the packet header. @param len The new length of the packet. */ -void mbuf_pkthdr_setlen(mbuf_t mbuf, size_t len); +extern void mbuf_pkthdr_setlen(mbuf_t mbuf, size_t len); /*! @function mbuf_pkthdr_adjustlen @@ -920,7 +949,7 @@ void mbuf_pkthdr_setlen(mbuf_t mbuf, size_t len); @param amount The number of bytes to adjust the packet header length field by. */ -void mbuf_pkthdr_adjustlen(mbuf_t mbuf, int amount); +extern void mbuf_pkthdr_adjustlen(mbuf_t mbuf, int amount); /*! @function mbuf_pkthdr_rcvif @@ -934,7 +963,7 @@ void mbuf_pkthdr_adjustlen(mbuf_t mbuf, int amount); @param mbuf The mbuf containing the packet header. @result A reference to the interface. */ -ifnet_t mbuf_pkthdr_rcvif(const mbuf_t mbuf); +extern ifnet_t mbuf_pkthdr_rcvif(const mbuf_t mbuf); /*! @function mbuf_pkthdr_setrcvif @@ -943,7 +972,7 @@ ifnet_t mbuf_pkthdr_rcvif(const mbuf_t mbuf); @param ifnet A reference to an interface. @result 0 upon success otherwise the errno error. */ -errno_t mbuf_pkthdr_setrcvif(mbuf_t mbuf, ifnet_t ifp); +extern errno_t mbuf_pkthdr_setrcvif(mbuf_t mbuf, ifnet_t ifp); /*! @function mbuf_pkthdr_header @@ -951,7 +980,7 @@ errno_t mbuf_pkthdr_setrcvif(mbuf_t mbuf, ifnet_t ifp); @param mbuf The mbuf containing the packet header. @result A pointer to the packet header. */ -void* mbuf_pkthdr_header(const mbuf_t mbuf); +extern void *mbuf_pkthdr_header(const mbuf_t mbuf); /*! @function mbuf_pkthdr_setheader @@ -960,7 +989,7 @@ void* mbuf_pkthdr_header(const mbuf_t mbuf); @param ifnet A pointer to the header. @result 0 upon success otherwise the errno error. */ -void mbuf_pkthdr_setheader(mbuf_t mbuf, void* header); +extern void mbuf_pkthdr_setheader(mbuf_t mbuf, void *header); /* Checksums */ @@ -977,13 +1006,13 @@ void mbuf_pkthdr_setheader(mbuf_t mbuf, void* header); original checksum was valid. @param mbuf The mbuf that has been modified. */ -void mbuf_inbound_modified(mbuf_t mbuf); +extern void mbuf_inbound_modified(mbuf_t mbuf); /*! @function mbuf_outbound_finalize @discussion This function will "finalize" the packet allowing your code to inspect the final packet. - + There are a number of operations that are performed in hardware, such as calculating checksums. This function will perform in software the various opterations that were scheduled to be done @@ -997,10 +1026,10 @@ void mbuf_inbound_modified(mbuf_t mbuf); hardware, you must call this function to get an aproximate final packet. If you plan to modify the packet in any way, you should call this function. - + This function should be called before modifying any outbound packets. - + This function may be called at various levels, in some cases additional headers may have already been prepended, such as the case of a packet seen by an interface filter. To handle this, @@ -1014,8 +1043,8 @@ void mbuf_inbound_modified(mbuf_t mbuf); protocol header. For an IP packet with an ethernet header, this would be the length of an ethernet header. */ -void mbuf_outbound_finalize(mbuf_t mbuf, u_long protocol_family, - size_t protocol_offset); +extern void mbuf_outbound_finalize(mbuf_t mbuf, u_int32_t protocol_family, + size_t protocol_offset); /*! @function mbuf_set_vlan_tag @@ -1026,7 +1055,7 @@ void mbuf_outbound_finalize(mbuf_t mbuf, u_long protocol_family, @param vlan The protocol family of the aux data to add. @result 0 upon success otherwise the errno error. */ -errno_t mbuf_set_vlan_tag(mbuf_t mbuf, u_int16_t vlan); +extern errno_t mbuf_set_vlan_tag(mbuf_t mbuf, u_int16_t vlan); /*! @function mbuf_get_vlan_tag @@ -1040,7 +1069,7 @@ errno_t mbuf_set_vlan_tag(mbuf_t mbuf, u_int16_t vlan); @result 0 upon success otherwise the errno error. ENXIO indicates that the vlan tag is not set. */ -errno_t mbuf_get_vlan_tag(mbuf_t mbuf, u_int16_t *vlan); +extern errno_t mbuf_get_vlan_tag(mbuf_t mbuf, u_int16_t *vlan); /*! @function mbuf_clear_vlan_tag @@ -1049,7 +1078,7 @@ errno_t mbuf_get_vlan_tag(mbuf_t mbuf, u_int16_t *vlan); @param mbuf The mbuf containing the packet. @result 0 upon success otherwise the errno error. */ -errno_t mbuf_clear_vlan_tag(mbuf_t mbuf); +extern errno_t mbuf_clear_vlan_tag(mbuf_t mbuf); #ifdef KERNEL_PRIVATE /* @@ -1066,9 +1095,9 @@ errno_t mbuf_clear_vlan_tag(mbuf_t mbuf); @param value This parameter is currently unsupported. @result 0 upon success otherwise the errno error. */ -errno_t mbuf_set_csum_requested(mbuf_t mbuf, - mbuf_csum_request_flags_t request, u_int32_t value); -#endif +extern errno_t mbuf_set_csum_requested(mbuf_t mbuf, + mbuf_csum_request_flags_t request, u_int32_t value); +#endif /* KERNEL_PRIVATE */ /*! @function mbuf_get_csum_requested @@ -1080,8 +1109,21 @@ errno_t mbuf_set_csum_requested(mbuf_t mbuf, @param value This parameter is currently unsupported. @result 0 upon success otherwise the errno error. */ -errno_t mbuf_get_csum_requested(mbuf_t mbuf, - mbuf_csum_request_flags_t *request, u_int32_t *value); +extern errno_t mbuf_get_csum_requested(mbuf_t mbuf, + mbuf_csum_request_flags_t *request, u_int32_t *value); + +/*! + @function mbuf_get_tso_requested + @discussion This function is used by the driver to determine which + checksum operations should be performed in hardware. + @param mbuf The mbuf containing the packet. + @param request Flags indicating which values are being requested + for this packet. + @param value The requested value. + @result 0 upon success otherwise the errno error. + */ +extern errno_t mbuf_get_tso_requested(mbuf_t mbuf, + mbuf_tso_request_flags_t *request, u_int32_t *value); /*! @function mbuf_clear_csum_requested @@ -1089,7 +1131,7 @@ errno_t mbuf_get_csum_requested(mbuf_t mbuf, @param mbuf The mbuf containing the packet. @result 0 upon success otherwise the errno error. */ -errno_t mbuf_clear_csum_requested(mbuf_t mbuf); +extern errno_t mbuf_clear_csum_requested(mbuf_t mbuf); /*! @function mbuf_set_csum_performed @@ -1103,8 +1145,8 @@ errno_t mbuf_clear_csum_requested(mbuf_t mbuf); hardware. @result 0 upon success otherwise the errno error. */ -errno_t mbuf_set_csum_performed(mbuf_t mbuf, - mbuf_csum_performed_flags_t flags, u_int32_t value); +extern errno_t mbuf_set_csum_performed(mbuf_t mbuf, + mbuf_csum_performed_flags_t flags, u_int32_t value); #ifdef KERNEL_PRIVATE /* @@ -1119,9 +1161,27 @@ errno_t mbuf_set_csum_performed(mbuf_t mbuf, hardware. @result 0 upon success otherwise the errno error. */ -errno_t mbuf_get_csum_performed(mbuf_t mbuf, - mbuf_csum_performed_flags_t *flags, u_int32_t *value); -#endif +extern errno_t mbuf_get_csum_performed(mbuf_t mbuf, + mbuf_csum_performed_flags_t *flags, u_int32_t *value); +#endif /* KERNEL_PRIVATE */ + +/*! + @function mbuf_get_mlen + @discussion This routine returns the number of data bytes in a normal + mbuf, i.e. an mbuf that is not a packet header, nor one with + an external cluster attached to it. This is equivalent to the + legacy MLEN macro. + @result The number of bytes of available data. + */ +extern u_int32_t mbuf_get_mlen(void); + +/*! + @function mbuf_get_mhlen + @discussion This routine returns the number of data bytes in a packet + header mbuf. This is equivalent to the legacy MHLEN macro. + @result The number of bytes of available data. + */ +extern u_int32_t mbuf_get_mhlen(void); /*! @function mbuf_clear_csum_performed @@ -1129,7 +1189,7 @@ errno_t mbuf_get_csum_performed(mbuf_t mbuf, @param mbuf The mbuf containing the packet. @result 0 upon success otherwise the errno error. */ -errno_t mbuf_clear_csum_performed(mbuf_t mbuf); +extern errno_t mbuf_clear_csum_performed(mbuf_t mbuf); /*! @function mbuf_inet_cksum @@ -1157,8 +1217,8 @@ errno_t mbuf_clear_csum_performed(mbuf_t mbuf); this variable. The caller must set it to a non-NULL value. @result 0 upon success otherwise the errno error. */ -errno_t mbuf_inet_cksum(mbuf_t mbuf, int protocol, u_int32_t offset, - u_int32_t length, u_int16_t *csum); +extern errno_t mbuf_inet_cksum(mbuf_t mbuf, int protocol, u_int32_t offset, + u_int32_t length, u_int16_t *csum); /*! @function mbuf_inet6_cksum @@ -1186,8 +1246,8 @@ errno_t mbuf_inet_cksum(mbuf_t mbuf, int protocol, u_int32_t offset, this variable. The caller must set it to a non-NULL value. @result 0 upon success otherwise the errno error. */ -errno_t mbuf_inet6_cksum(mbuf_t mbuf, int protocol, u_int32_t offset, - u_int32_t length, u_int16_t *csum); +extern errno_t mbuf_inet6_cksum(mbuf_t mbuf, int protocol, u_int32_t offset, + u_int32_t length, u_int16_t *csum); /* mbuf tags */ @@ -1196,12 +1256,12 @@ errno_t mbuf_inet6_cksum(mbuf_t mbuf, int protocol, u_int32_t offset, @discussion Lookup the module id for a string. If there is no module id assigned to this string, a new module id will be assigned. The string should be the bundle id of the kext. In the case of a - tag that will be shared across multiple kexts, a common bundle id - style string should be used. - + tag that will be shared across multiple kexts, a common bundle + id style string should be used. + The lookup operation is not optimized. A module should call this - function once during startup and chache the module id. The module id - will not be resassigned until the machine reboots. + function once during startup and chache the module id. The + module id will not be resassigned until the machine reboots. @param module_string A unique string identifying your module. Example: com.apple.nke.SharedIP. @param module_id Upon return, a unique identifier for use with @@ -1209,16 +1269,16 @@ errno_t mbuf_inet6_cksum(mbuf_t mbuf, int protocol, u_int32_t offset, is rebooted. @result 0 upon success otherwise the errno error. */ -errno_t mbuf_tag_id_find(const char *module_string, - mbuf_tag_id_t *module_id); +extern errno_t mbuf_tag_id_find(const char *module_string, + mbuf_tag_id_t *module_id); /*! @function mbuf_tag_allocate @discussion Allocate an mbuf tag. Mbuf tags allow various portions of the stack to tag mbufs with data that will travel with the mbuf through the stack. - - Tags may only be added to mbufs with packet headers + + Tags may only be added to mbufs with packet headers (MBUF_PKTHDR flag is set). Mbuf tags are freed when the mbuf is freed or when mbuf_tag_free is called. @param mbuf The mbuf to attach this tag to. @@ -1233,9 +1293,8 @@ errno_t mbuf_tag_id_find(const char *module_string, buffer allocated for the mtag. @result 0 upon success otherwise the errno error. */ -errno_t mbuf_tag_allocate(mbuf_t mbuf, mbuf_tag_id_t module_id, - mbuf_tag_type_t type, size_t length, - mbuf_how_t how, void** data_p); +extern errno_t mbuf_tag_allocate(mbuf_t mbuf, mbuf_tag_id_t module_id, + mbuf_tag_type_t type, size_t length, mbuf_how_t how, void **data_p); /*! @function mbuf_tag_find @@ -1249,8 +1308,8 @@ errno_t mbuf_tag_allocate(mbuf_t mbuf, mbuf_tag_id_t module_id, buffer allocated for the mtag. @result 0 upon success otherwise the errno error. */ -errno_t mbuf_tag_find(mbuf_t mbuf, mbuf_tag_id_t module_id, - mbuf_tag_type_t type, size_t *length, void** data_p); +extern errno_t mbuf_tag_find(mbuf_t mbuf, mbuf_tag_id_t module_id, + mbuf_tag_type_t type, size_t *length, void **data_p); /*! @function mbuf_tag_free @@ -1259,8 +1318,8 @@ errno_t mbuf_tag_find(mbuf_t mbuf, mbuf_tag_id_t module_id, @param module_id The ID of the tag to free. @param type The type of the tag to free. */ -void mbuf_tag_free(mbuf_t mbuf, mbuf_tag_id_t module_id, - mbuf_tag_type_t type); +extern void mbuf_tag_free(mbuf_t mbuf, mbuf_tag_id_t module_id, + mbuf_tag_type_t type); /* mbuf stats */ @@ -1269,37 +1328,38 @@ void mbuf_tag_free(mbuf_t mbuf, mbuf_tag_id_t module_id, @discussion Get the mbuf statistics. @param stats Storage to copy the stats in to. */ -void mbuf_stats(struct mbuf_stat* stats); - +extern void mbuf_stats(struct mbuf_stat *stats); /* IF_QUEUE interaction */ -#define IF_ENQUEUE_MBUF(ifq, m) { \ - mbuf_setnextpkt((m), 0); \ - if ((ifq)->ifq_tail == 0) \ - (ifq)->ifq_head = (m); \ - else \ - mbuf_setnextpkt((mbuf_t)(ifq)->ifq_tail, (m)); \ - (ifq)->ifq_tail = (m); \ - (ifq)->ifq_len++; \ +#define IF_ENQUEUE_MBUF(ifq, m) { \ + mbuf_setnextpkt((m), 0); \ + if ((ifq)->ifq_tail == 0) \ + (ifq)->ifq_head = (m); \ + else \ + mbuf_setnextpkt((mbuf_t)(ifq)->ifq_tail, (m)); \ + (ifq)->ifq_tail = (m); \ + (ifq)->ifq_len++; \ } -#define IF_PREPEND_MBUF(ifq, m) { \ - mbuf_setnextpkt((m), (ifq)->ifq_head); \ - if ((ifq)->ifq_tail == 0) \ - (ifq)->ifq_tail = (m); \ - (ifq)->ifq_head = (m); \ - (ifq)->ifq_len++; \ + +#define IF_PREPEND_MBUF(ifq, m) { \ + mbuf_setnextpkt((m), (ifq)->ifq_head); \ + if ((ifq)->ifq_tail == 0) \ + (ifq)->ifq_tail = (m); \ + (ifq)->ifq_head = (m); \ + (ifq)->ifq_len++; \ } -#define IF_DEQUEUE_MBUF(ifq, m) { \ - (m) = (ifq)->ifq_head; \ - if (m) { \ - if (((ifq)->ifq_head = mbuf_nextpkt((m))) == 0) \ - (ifq)->ifq_tail = 0; \ - mbuf_setnextpkt((m), 0); \ - (ifq)->ifq_len--; \ - } \ + +#define IF_DEQUEUE_MBUF(ifq, m) { \ + (m) = (ifq)->ifq_head; \ + if (m) { \ + if (((ifq)->ifq_head = mbuf_nextpkt((m))) == 0) \ + (ifq)->ifq_tail = 0; \ + mbuf_setnextpkt((m), 0); \ + (ifq)->ifq_len--; \ + } \ } __END_DECLS -#endif +#endif /* __KPI_MBUF__ */ diff --git a/bsd/sys/kpi_socket.h b/bsd/sys/kpi_socket.h index 1e63945cf..28cbd28ac 100644 --- a/bsd/sys/kpi_socket.h +++ b/bsd/sys/kpi_socket.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2004 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2008 Apple Computer, Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -62,7 +62,7 @@ struct timeval; @param cookie The cookie passed in when the socket was created. @param waitf Indicates whether or not it's safe to block. */ -typedef void (*sock_upcall)(socket_t so, void* cookie, int waitf); +typedef void (*sock_upcall)(socket_t so, void *cookie, int waitf); /*! @function sock_accept @@ -85,8 +85,8 @@ typedef void (*sock_upcall)(socket_t so, void* cookie, int waitf); socket for tracking the connection. @result 0 on success otherwise the errno error. */ -errno_t sock_accept(socket_t so, struct sockaddr *from, int fromlen, - int flags, sock_upcall callback, void* cookie, socket_t *new_so); +extern errno_t sock_accept(socket_t so, struct sockaddr *from, int fromlen, + int flags, sock_upcall callback, void *cookie, socket_t *new_so); /*! @function sock_bind @@ -96,7 +96,7 @@ errno_t sock_accept(socket_t so, struct sockaddr *from, int fromlen, @param to The local address the socket should be bound to. @result 0 on success otherwise the errno error. */ -errno_t sock_bind(socket_t so, const struct sockaddr *to); +extern errno_t sock_bind(socket_t so, const struct sockaddr *to); /*! @function sock_connect @@ -112,7 +112,7 @@ errno_t sock_bind(socket_t so, const struct sockaddr *to); @result 0 on success, EINPROGRESS for a non-blocking connect that has not completed, otherwise the errno error. */ -errno_t sock_connect(socket_t so, const struct sockaddr *to, int flags); +extern errno_t sock_connect(socket_t so, const struct sockaddr *to, int flags); #ifdef KERNEL_PRIVATE /* @@ -132,7 +132,7 @@ errno_t sock_connect(socket_t so, const struct sockaddr *to, int flags); returned if the connection did not complete in the timeout specified. */ -errno_t sock_connectwait(socket_t so, const struct timeval *tv); +extern errno_t sock_connectwait(socket_t so, const struct timeval *tv); #endif /* KERNEL_PRIVATE */ /*! @@ -144,7 +144,7 @@ errno_t sock_connectwait(socket_t so, const struct timeval *tv); @param peernamelen Length of storage for the peer name. @result 0 on success otherwise the errno error. */ -errno_t sock_getpeername(socket_t so, struct sockaddr *peername, +extern errno_t sock_getpeername(socket_t so, struct sockaddr *peername, int peernamelen); /*! @@ -156,7 +156,7 @@ errno_t sock_getpeername(socket_t so, struct sockaddr *peername, @param socknamelen Length of storage for the socket name. @result 0 on success otherwise the errno error. */ -errno_t sock_getsockname(socket_t so, struct sockaddr *sockname, +extern errno_t sock_getsockname(socket_t so, struct sockaddr *sockname, int socknamelen); /*! @@ -169,8 +169,8 @@ errno_t sock_getsockname(socket_t so, struct sockaddr *sockname, @param optlen The length of optval, returns the actual length. @result 0 on success otherwise the errno error. */ -errno_t sock_getsockopt(socket_t so, int level, int optname, void *optval, - int *optlen); +extern errno_t sock_getsockopt(socket_t so, int level, int optname, + void *optval, int *optlen); /*! @function sock_ioctl @@ -180,7 +180,7 @@ errno_t sock_getsockopt(socket_t so, int level, int optname, void *optval, @param argp The argument. @result 0 on success otherwise the errno error. */ -errno_t sock_ioctl(socket_t so, unsigned long request, void *argp); +extern errno_t sock_ioctl(socket_t so, unsigned long request, void *argp); /*! @function sock_setsockopt @@ -192,8 +192,41 @@ errno_t sock_ioctl(socket_t so, unsigned long request, void *argp); @param optlen The length of optval. @result 0 on success otherwise the errno error. */ -errno_t sock_setsockopt(socket_t so, int level, int optname, const void *optval, - int optlen); +extern errno_t sock_setsockopt(socket_t so, int level, int optname, + const void *optval, int optlen); + +#ifdef KERNEL_PRIVATE +/* + This function was added to support AFP setting the traffic class + for a backup stream within a wireless LAN or over link-local address. + + If you feel you need to use this function, please contact us to + explain why. + + @function sock_settclassopt + @discussion Allows a caller to set the traffic class. + @param so The socket. + @param optval The option value. + @param optlen The length of optval. + @result 0 on success otherwise the errno error. + */ +extern errno_t sock_settclassopt(socket_t so, const void* optval, size_t optlen); + +/* + This function was added to support AFP getting the traffic class + set on a stream. + + This is also a private API, please contact us if you need to use it. + + @function sockgettclassopt + @discussion Allows a caller to get the traffic class. + @param so The socket. + @param optval The option value. + @param optlen The length of optval, returns the actual length. + @result 0 on success otherwise the errno error. +*/ +extern errno_t sock_gettclassopt(socket_t so, void* optval, size_t* optlen); +#endif /*! @function sock_listen @@ -203,7 +236,7 @@ errno_t sock_setsockopt(socket_t so, int level, int optname, const void *optval, @param backlog The maximum length of the queue of pending connections. @result 0 on success otherwise the errno error. */ -errno_t sock_listen(socket_t so, int backlog); +extern errno_t sock_listen(socket_t so, int backlog); /*! @function sock_receive @@ -217,7 +250,7 @@ errno_t sock_listen(socket_t so, int backlog); @result 0 on success, EWOULDBLOCK if non-blocking and operation would cause the thread to block, otherwise the errno error. */ -errno_t sock_receive(socket_t so, struct msghdr *msg, int flags, +extern errno_t sock_receive(socket_t so, struct msghdr *msg, int flags, size_t *recvdlen); /*! @@ -238,7 +271,7 @@ errno_t sock_receive(socket_t so, struct msghdr *msg, int flags, @result 0 on success, EWOULDBLOCK if non-blocking and operation would cause the thread to block, otherwise the errno error. */ -errno_t sock_receivembuf(socket_t so, struct msghdr *msg, mbuf_t *data, +extern errno_t sock_receivembuf(socket_t so, struct msghdr *msg, mbuf_t *data, int flags, size_t *recvlen); /*! @@ -253,7 +286,7 @@ errno_t sock_receivembuf(socket_t so, struct msghdr *msg, mbuf_t *data, @result 0 on success, EWOULDBLOCK if non-blocking and operation would cause the thread to block, otherwise the errno error. */ -errno_t sock_send(socket_t so, const struct msghdr *msg, int flags, +extern errno_t sock_send(socket_t so, const struct msghdr *msg, int flags, size_t *sentlen); /*! @@ -270,7 +303,7 @@ errno_t sock_send(socket_t so, const struct msghdr *msg, int flags, would cause the thread to block, otherwise the errno error. Regardless of return value, the mbuf chain 'data' will be freed. */ -errno_t sock_sendmbuf(socket_t so, const struct msghdr *msg, mbuf_t data, +extern errno_t sock_sendmbuf(socket_t so, const struct msghdr *msg, mbuf_t data, int flags, size_t *sentlen); /*! @@ -278,10 +311,12 @@ errno_t sock_sendmbuf(socket_t so, const struct msghdr *msg, mbuf_t data, @discussion Shutdown one or both directions of a connection. See 'man 2 shutdown' for more information. @param so The socket. - @param how SHUT_RD - shutdown receive. SHUT_WR - shutdown send. SHUT_RDWR - shutdown both. + @param how SHUT_RD - shutdown receive. + SHUT_WR - shutdown send. + SHUT_RDWR - shutdown both. @result 0 on success otherwise the errno error. */ -errno_t sock_shutdown(socket_t so, int how); +extern errno_t sock_shutdown(socket_t so, int how); /*! @function sock_socket @@ -297,18 +332,18 @@ errno_t sock_shutdown(socket_t so, int how); @param new_so Upon success, a reference to the new socket. @result 0 on success otherwise the errno error. */ -errno_t sock_socket(int domain, int type, int protocol, sock_upcall callback, - void* cookie, socket_t *new_so); +extern errno_t sock_socket(int domain, int type, int protocol, + sock_upcall callback, void *cookie, socket_t *new_so); /*! @function sock_close @discussion Close the socket. @param so The socket to close. This should only ever be a socket created with sock_socket. Closing a socket created in user space - using sock_close may leave a file descriptor pointing to the closed - socket, resulting in undefined behavior. + using sock_close may leave a file descriptor pointing to the + closed socket, resulting in undefined behavior. */ -void sock_close(socket_t so); +extern void sock_close(socket_t so); #ifdef KERNEL_PRIVATE /* @@ -321,7 +356,7 @@ void sock_close(socket_t so); that socket. It is used in conjunction with sock_release(socket_t so). */ -void sock_retain(socket_t so); +extern void sock_retain(socket_t so); /* @function sock_release @@ -331,7 +366,7 @@ void sock_retain(socket_t so); on a socket acquired with sock_retain. When the last retain count is reached, this will call sock_close to close the socket. */ -void sock_release(socket_t so); +extern void sock_release(socket_t so); #endif /* KERNEL_PRIVATE */ /*! @@ -342,7 +377,7 @@ void sock_release(socket_t so); @param on Indicate whether or not the SS_PRIV flag should be set. @result 0 on success otherwise the errno error. */ -errno_t sock_setpriv(socket_t so, int on); +extern errno_t sock_setpriv(socket_t so, int on); /*! @function sock_isconnected @@ -350,7 +385,7 @@ errno_t sock_setpriv(socket_t so, int on); @param so The socket to check. @result 0 - socket is not connected. 1 - socket is connected. */ -int sock_isconnected(socket_t so); +extern int sock_isconnected(socket_t so); /*! @function sock_isnonblocking @@ -363,7 +398,7 @@ int sock_isconnected(socket_t so); If the parameter is non-zero, the socket will not block. @result 0 - socket will block. 1 - socket will not block. */ -int sock_isnonblocking(socket_t so); +extern int sock_isnonblocking(socket_t so); /*! @function sock_gettype @@ -372,12 +407,12 @@ int sock_isnonblocking(socket_t so); parameters following so are NULL, that information is not retrieved. @param so The socket to check. - @param domain The domain of the socket (PF_INET, etc...). May be NULL. - @param type The socket type (SOCK_STREAM, SOCK_DGRAM, etc...). May be NULL. + @param domain The domain of the socket (PF_INET, ...). May be NULL. + @param type The socket type (SOCK_STREAM, SOCK_DGRAM, ...). May be NULL. @param protocol The socket protocol. May be NULL. @result 0 on success otherwise the errno error. */ -errno_t sock_gettype(socket_t so, int *domain, int *type, int *protocol); +extern errno_t sock_gettype(socket_t so, int *domain, int *type, int *protocol); #ifdef KERNEL_PRIVATE /* @@ -388,7 +423,7 @@ errno_t sock_gettype(socket_t so, int *domain, int *type, int *protocol); @param on Indicate whether or not the SB_NOINTR flag should be set. @result 0 on success otherwise the errno error. */ -errno_t sock_nointerrupt(socket_t so, int on); +extern errno_t sock_nointerrupt(socket_t so, int on); /* @function sock_getlistener @@ -410,7 +445,7 @@ errno_t sock_nointerrupt(socket_t so, int on); NULL if the socket is not in the incomplete/completed list of a listener. */ -socket_t sock_getlistener(socket_t so); +extern socket_t sock_getlistener(socket_t so); /* @function sock_getaddr @@ -424,14 +459,15 @@ socket_t sock_getlistener(socket_t so); @param peername 0 for local address, and non-zero for peer address. @result 0 on success otherwise the errno error. */ -errno_t sock_getaddr(socket_t so, struct sockaddr **psockname, int peername); +extern errno_t sock_getaddr(socket_t so, struct sockaddr **psockname, + int peername); /* @function sock_freeaddr @discussion Frees the socket address allocated by sock_getaddr. @param sockname The socket name to be freed. */ -void sock_freeaddr(struct sockaddr *sockname); +extern void sock_freeaddr(struct sockaddr *sockname); #endif /* KERNEL_PRIVATE */ __END_DECLS diff --git a/bsd/sys/kpi_socketfilter.h b/bsd/sys/kpi_socketfilter.h index 5be9ed377..e5ace3c03 100644 --- a/bsd/sys/kpi_socketfilter.h +++ b/bsd/sys/kpi_socketfilter.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2002 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2008 Apple Computer, Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -29,7 +29,7 @@ @header kpi_socketfilter.h This header defines an API for intercepting communications at the socket layer. - + For the most part, socket filters want to do three things: Filter data in and out, watch for state changes, and intercept a few calls for security. The number of function pointers supplied by a socket @@ -140,7 +140,7 @@ __BEGIN_DECLS /*! @typedef sf_unregistered_func - + @discussion sf_unregistered_func is called to notify the filter it has been unregistered. This is the last function the stack will call and this function will only be called once all other @@ -148,11 +148,11 @@ __BEGIN_DECLS function has been called, your kext may safely unload. @param handle The socket filter handle used to identify this filter. */ -typedef void (*sf_unregistered_func)(sflt_handle handle); +typedef void (*sf_unregistered_func)(sflt_handle handle); /*! @typedef sf_attach_func - + @discussion sf_attach_func is called to notify the filter it has been attached to a socket. The filter may allocate memory for this attachment and use the cookie to track it. This filter is @@ -170,7 +170,7 @@ typedef errno_t (*sf_attach_func)(void **cookie, socket_t so); /*! @typedef sf_detach_func - + @discussion sf_detach_func is called to notify the filter it has been detached from a socket. If the filter allocated any memory for this attachment, it should be freed. This function will @@ -181,11 +181,11 @@ typedef errno_t (*sf_attach_func)(void **cookie, socket_t so); @result If you return a non-zero value, your filter will not be attached to this socket. */ -typedef void (*sf_detach_func)(void *cookie, socket_t so); +typedef void (*sf_detach_func)(void *cookie, socket_t so); /*! @typedef sf_notify_func - + @discussion sf_notify_func is called to notify the filter of various state changes and other events occuring on the socket. @param cookie Cookie value specified when the filter attach was @@ -194,12 +194,12 @@ typedef void (*sf_detach_func)(void *cookie, socket_t so); @param event The type of event that has occurred. @param param Additional information about the event. */ -typedef void (*sf_notify_func)(void *cookie, socket_t so, - sflt_event_t event, void *param); +typedef void (*sf_notify_func)(void *cookie, socket_t so, sflt_event_t event, + void *param); /*! @typedef sf_getpeername_func - + @discussion sf_getpeername_func is called to allow a filter to to intercept the getpeername function. When called, sa will point to a pointer to a socket address that was malloced @@ -215,12 +215,12 @@ typedef void (*sf_notify_func)(void *cookie, socket_t so, but a result of zero will be returned to the caller of getpeername. */ -typedef int (*sf_getpeername_func)(void *cookie, socket_t so, - struct sockaddr **sa); +typedef int (*sf_getpeername_func)(void *cookie, socket_t so, + struct sockaddr **sa); /*! @typedef sf_getsockname_func - + @discussion sf_getsockname_func is called to allow a filter to to intercept the getsockname function. When called, sa will point to a pointer to a socket address that was malloced @@ -236,18 +236,18 @@ typedef int (*sf_getpeername_func)(void *cookie, socket_t so, but a result of zero will be returned to the caller of getsockname. */ -typedef int (*sf_getsockname_func)(void *cookie, socket_t so, - struct sockaddr **sa); +typedef int (*sf_getsockname_func)(void *cookie, socket_t so, + struct sockaddr **sa); /*! @typedef sf_data_in_func - + @discussion sf_data_in_func is called to filter incoming data. If your - filter intercepts data for later reinjection, it must queue all incoming - data to preserve the order of the data. Use sock_inject_data_in to later - reinject this data if you return EJUSTRETURN. Warning: This filter is on - the data path. Do not spend excesive time. Do not wait for data on - another socket. + filter intercepts data for later reinjection, it must queue + all incoming data to preserve the order of the data. Use + sock_inject_data_in to later reinject this data if you return + EJUSTRETURN. Warning: This filter is on the data path. Do not + spend excesive time. Do not wait for data on another socket. @param cookie Cookie value specified when the filter attach was called. @param so The socket the filter is attached to. @@ -261,16 +261,18 @@ typedef int (*sf_getsockname_func)(void *cookie, socket_t so, record. @result Return: 0 - The caller will continue with normal processing of the data. - EJUSTRETURN - The caller will stop processing the data, the data will not be freed. - Anything Else - The caller will free the data and stop processing. + EJUSTRETURN - The caller will stop processing the data, the + data will not be freed. + Anything Else - The caller will free the data and stop + processing. */ typedef errno_t (*sf_data_in_func)(void *cookie, socket_t so, - const struct sockaddr *from, mbuf_t *data, - mbuf_t *control, sflt_data_flag_t flags); + const struct sockaddr *from, mbuf_t *data, mbuf_t *control, + sflt_data_flag_t flags); /*! @typedef sf_data_out_func - + @discussion sf_data_out_func is called to filter outbound data. If your filter intercepts data for later reinjection, it must queue all outbound data to preserve the order of the data when @@ -289,35 +291,39 @@ typedef errno_t (*sf_data_in_func)(void *cookie, socket_t so, record. @result Return: 0 - The caller will continue with normal processing of the data. - EJUSTRETURN - The caller will stop processing the data, the data will not be freed. - Anything Else - The caller will free the data and stop processing. + EJUSTRETURN - The caller will stop processing the data, + the data will not be freed. + Anything Else - The caller will free the data and stop + processing. */ typedef errno_t (*sf_data_out_func)(void *cookie, socket_t so, - const struct sockaddr *to, mbuf_t *data, - mbuf_t *control, sflt_data_flag_t flags); + const struct sockaddr *to, mbuf_t *data, mbuf_t *control, + sflt_data_flag_t flags); /*! @typedef sf_connect_in_func - - @discussion sf_connect_in_func is called to filter inbound connections. A - protocol will call this before accepting an incoming connection and - placing it on the queue of completed connections. Warning: This filter - is on the data path. Do not spend excesive time. Do not wait for data on - another socket. + + @discussion sf_connect_in_func is called to filter inbound connections. + A protocol will call this before accepting an incoming + connection and placing it on the queue of completed connections. + Warning: This filter is on the data path. Do not spend excesive + time. Do not wait for data on another socket. @param cookie Cookie value specified when the filter attach was called. @param so The socket the filter is attached to. @param from The address the incoming connection is from. @result Return: - 0 - The caller will continue with normal processing of the connection. - Anything Else - The caller will rejecting the incoming connection. + 0 - The caller will continue with normal processing of the + connection. + Anything Else - The caller will rejecting the incoming + connection. */ typedef errno_t (*sf_connect_in_func)(void *cookie, socket_t so, - const struct sockaddr *from); + const struct sockaddr *from); /*! @typedef sf_connect_out_func - + @discussion sf_connect_out_func is called to filter outbound connections. A protocol will call this before initiating an outbound connection. @@ -326,15 +332,17 @@ typedef errno_t (*sf_connect_in_func)(void *cookie, socket_t so, @param so The socket the filter is attached to. @param to The remote address of the outbound connection. @result Return: - 0 - The caller will continue with normal processing of the connection. - Anything Else - The caller will rejecting the outbound connection. + 0 - The caller will continue with normal processing of the + connection. + Anything Else - The caller will rejecting the outbound + connection. */ typedef errno_t (*sf_connect_out_func)(void *cookie, socket_t so, - const struct sockaddr *to); + const struct sockaddr *to); /*! @typedef sf_bind_func - + @discussion sf_bind_func is called before performing a bind operation on a socket. @param cookie Cookie value specified when the filter attach was @@ -346,11 +354,11 @@ typedef errno_t (*sf_connect_out_func)(void *cookie, socket_t so, Anything Else - The caller will rejecting the bind. */ typedef errno_t (*sf_bind_func)(void *cookie, socket_t so, - const struct sockaddr *to); + const struct sockaddr *to); /*! @typedef sf_setoption_func - + @discussion sf_setoption_func is called before performing setsockopt on a socket. @param cookie Cookie value specified when the filter attach was @@ -358,15 +366,16 @@ typedef errno_t (*sf_bind_func)(void *cookie, socket_t so, @param so The socket the filter is attached to. @param opt The socket option to set. @result Return: - 0 - The caller will continue with normal processing of the setsockopt. - Anything Else - The caller will stop processing and return this error. + 0 - The caller will continue with normal processing of the + setsockopt. + Anything Else - The caller will stop processing and return + this error. */ -typedef errno_t (*sf_setoption_func)(void *cookie, socket_t so, - sockopt_t opt); +typedef errno_t (*sf_setoption_func)(void *cookie, socket_t so, sockopt_t opt); /*! @typedef sf_getoption_func - + @discussion sf_getoption_func is called before performing getsockopt on a socket. @param cookie Cookie value specified when the filter attach was @@ -374,15 +383,16 @@ typedef errno_t (*sf_setoption_func)(void *cookie, socket_t so, @param so The socket the filter is attached to. @param opt The socket option to get. @result Return: - 0 - The caller will continue with normal processing of the getsockopt. - Anything Else - The caller will stop processing and return this error. + 0 - The caller will continue with normal processing of the + getsockopt. + Anything Else - The caller will stop processing and return + this error. */ -typedef errno_t (*sf_getoption_func)(void *cookie, socket_t so, - sockopt_t opt); +typedef errno_t (*sf_getoption_func)(void *cookie, socket_t so, sockopt_t opt); /*! @typedef sf_listen_func - + @discussion sf_listen_func is called before performing listen on a socket. @param cookie Cookie value specified when the filter attach was @@ -390,13 +400,14 @@ typedef errno_t (*sf_getoption_func)(void *cookie, socket_t so, @param so The socket the filter is attached to. @result Return: 0 - The caller will continue with normal processing of listen. - Anything Else - The caller will stop processing and return this error. + Anything Else - The caller will stop processing and return + this error. */ typedef errno_t (*sf_listen_func)(void *cookie, socket_t so); /*! @typedef sf_ioctl_func - + @discussion sf_ioctl_func is called before performing an ioctl on a socket. @@ -409,11 +420,13 @@ typedef errno_t (*sf_listen_func)(void *cookie, socket_t so); @param request The ioctl name. @param argp A pointer to the ioctl parameter. @result Return: - 0 - The caller will continue with normal processing of this ioctl. - Anything Else - The caller will stop processing and return this error. + 0 - The caller will continue with normal processing of + this ioctl. + Anything Else - The caller will stop processing and return + this error. */ typedef errno_t (*sf_ioctl_func)(void *cookie, socket_t so, - u_int32_t request, const char* argp); + unsigned long request, const char* argp); /*! @typedef sf_accept_func @@ -533,8 +546,8 @@ struct sflt_filter { @param protocol The protocol these filters will be attached to. @result 0 on success otherwise the errno error. */ -errno_t sflt_register(const struct sflt_filter *filter, int domain, - int type, int protocol); +extern errno_t sflt_register(const struct sflt_filter *filter, int domain, + int type, int protocol); /*! @function sflt_unregister @@ -545,7 +558,7 @@ errno_t sflt_register(const struct sflt_filter *filter, int domain, @param handle The sf_handle of the socket filter to unregister. @result 0 on success otherwise the errno error. */ -errno_t sflt_unregister(sflt_handle handle); +extern errno_t sflt_unregister(sflt_handle handle); /*! @function sflt_attach @@ -555,7 +568,7 @@ errno_t sflt_unregister(sflt_handle handle); @param handle The handle of the registered filter to be attached. @result 0 on success otherwise the errno error. */ -errno_t sflt_attach(socket_t so, sflt_handle); +extern errno_t sflt_attach(socket_t so, sflt_handle); /*! @function sflt_detach @@ -564,7 +577,7 @@ errno_t sflt_attach(socket_t so, sflt_handle); @param handle The handle of the registered filter to be detached. @result 0 on success otherwise the errno error. */ -errno_t sflt_detach(socket_t so, sflt_handle); +extern errno_t sflt_detach(socket_t so, sflt_handle); /* Functions for manipulating sockets */ /* @@ -590,8 +603,8 @@ errno_t sflt_detach(socket_t so, sflt_handle); returns an error, the caller is responsible for freeing the mbuf. */ -errno_t sock_inject_data_in(socket_t so, const struct sockaddr* from, - mbuf_t data, mbuf_t control, sflt_data_flag_t flags); +extern errno_t sock_inject_data_in(socket_t so, const struct sockaddr *from, + mbuf_t data, mbuf_t control, sflt_data_flag_t flags); /*! @function sock_inject_data_out @@ -607,8 +620,8 @@ errno_t sock_inject_data_in(socket_t so, const struct sockaddr* from, @result 0 on success otherwise the errno error. The data and control values are always freed regardless of return value. */ -errno_t sock_inject_data_out(socket_t so, const struct sockaddr* to, - mbuf_t data, mbuf_t control, sflt_data_flag_t flags); +extern errno_t sock_inject_data_out(socket_t so, const struct sockaddr *to, + mbuf_t data, mbuf_t control, sflt_data_flag_t flags); /* @@ -628,7 +641,7 @@ typedef u_int8_t sockopt_dir; @param sopt The socket option. @result sock_opt_get or sock_opt_set. */ -sockopt_dir sockopt_direction(sockopt_t sopt); +extern sockopt_dir sockopt_direction(sockopt_t sopt); /*! @function sockopt_level @@ -636,7 +649,7 @@ sockopt_dir sockopt_direction(sockopt_t sopt); @param sopt The socket option. @result The socket option level. See man 2 setsockopt */ -int sockopt_level(sockopt_t sopt); +extern int sockopt_level(sockopt_t sopt); /*! @function sockopt_name @@ -644,7 +657,7 @@ int sockopt_level(sockopt_t sopt); @param sopt The socket option. @result The socket option name. See man 2 setsockopt */ -int sockopt_name(sockopt_t sopt); +extern int sockopt_name(sockopt_t sopt); /*! @function sockopt_valsize @@ -652,7 +665,7 @@ int sockopt_name(sockopt_t sopt); @param sopt The socket option. @result The length, in bytes, of the data. */ -size_t sockopt_valsize(sockopt_t sopt); +extern size_t sockopt_valsize(sockopt_t sopt); /*! @function sockopt_copyin @@ -662,7 +675,7 @@ size_t sockopt_valsize(sockopt_t sopt); @param length The number of bytes to copy. @result An errno error or zero upon success. */ -errno_t sockopt_copyin(sockopt_t sopt, void *data, size_t length); +extern errno_t sockopt_copyin(sockopt_t sopt, void *data, size_t length); /*! @function sockopt_copyout @@ -672,7 +685,7 @@ errno_t sockopt_copyin(sockopt_t sopt, void *data, size_t length); @param length The number of bytes to copy. @result An errno error or zero upon success. */ -errno_t sockopt_copyout(sockopt_t sopt, void *data, size_t length); +extern errno_t sockopt_copyout(sockopt_t sopt, void *data, size_t length); __END_DECLS -#endif +#endif /* __KPI_SOCKETFILTER__ */ diff --git a/bsd/sys/linker_set.h b/bsd/sys/linker_set.h index 45dd7742d..852f34695 100644 --- a/bsd/sys/linker_set.h +++ b/bsd/sys/linker_set.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2006 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2006-2008 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -24,8 +24,8 @@ * limitations under the License. * * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ - */ -/*- + * + * * Copyright (c) 1999 John D. Polstra * All rights reserved. * @@ -71,18 +71,12 @@ */ #ifdef KERNEL -# define MACH_KERNEL 1 -# include "mach-o/loader.h" -typedef int _ls_size_t; -# ifndef _KERN_MACH_HEADER_ -extern void *getsectdatafromheader(struct mach_header *, const char *, const char *, _ls_size_t *); -extern struct mach_header _mh_execute_header; -# endif +# include +# include #else # include # include # include -typedef unsigned long _ls_size_t; #endif @@ -142,7 +136,7 @@ typedef unsigned long _ls_size_t; * Preferred interface to linker_set_object_begin(), takes set name unquoted. * void **LINKER_SET_OBJECT_LIMIT(_object, _set) * Preferred interface to linker_set_object_begin(), takes set name unquoted. - * LINKER_SET_OBJECT_FOREACH(_object, (set_member_type **)_pvar, _set) + * LINKER_SET_OBJECT_FOREACH(_object, (set_member_type **)_pvar, _cast, _set) * Iterates over the members of _set within _object. Since the set contains * pointers to its elements, for a set of elements of type etyp, _pvar must * be (etyp **). @@ -151,30 +145,34 @@ typedef unsigned long _ls_size_t; * * void **LINKER_SET_BEGIN(_set) * void **LINKER_SET_LIMINT(_set) - * LINKER_SET_FOREACH((set_member_type **)_pvar, _set) + * LINKER_SET_FOREACH((set_member_type **)_pvar, _cast, _set) * set_member_type **LINKER_SET_ITEM(_set, _i) * These versions implicitly reference the kernel/application object. + * + * Example of _cast: For the _pvar "struct sysctl_oid **oidpp", _cast would be + * "struct sysctl_oid **" + * */ #define LINKER_SET_OBJECT_BEGIN(_object, _set) __linker_set_object_begin(_object, _set) #define LINKER_SET_OBJECT_LIMIT(_object, _set) __linker_set_object_limit(_object, _set) -#define LINKER_SET_OBJECT_FOREACH(_object, _pvar, _set) \ - for ((void **)_pvar = LINKER_SET_OBJECT_BEGIN(_object, _set); \ - (void **)_pvar < LINKER_SET_OBJECT_LIMIT(_object, _set); \ - ((void **)_pvar)++) +#define LINKER_SET_OBJECT_FOREACH(_object, _pvar, _cast, _set) \ + for (_pvar = (_cast) LINKER_SET_OBJECT_BEGIN(_object, _set); \ + _pvar < (_cast) LINKER_SET_OBJECT_LIMIT(_object, _set); \ + _pvar++) #define LINKER_SET_OBJECT_ITEM(_object, _set, _i) \ ((LINKER_SET_OBJECT_BEGIN(_object, _set))[_i]) #define LINKER_SET_BEGIN(_set) \ - LINKER_SET_OBJECT_BEGIN((struct mach_header *)&_mh_execute_header, _set) + LINKER_SET_OBJECT_BEGIN((kernel_mach_header_t *)&_mh_execute_header, _set) #define LINKER_SET_LIMIT(_set) \ - LINKER_SET_OBJECT_LIMIT((struct mach_header *)&_mh_execute_header, _set) -#define LINKER_SET_FOREACH(_pvar, _set) \ - LINKER_SET_OBJECT_FOREACH((struct mach_header *)&_mh_execute_header, _pvar, _set) + LINKER_SET_OBJECT_LIMIT((kernel_mach_header_t *)&_mh_execute_header, _set) +#define LINKER_SET_FOREACH(_pvar, _cast, _set) \ + LINKER_SET_OBJECT_FOREACH((kernel_mach_header_t *)&_mh_execute_header, _pvar, _cast, _set) #define LINKER_SET_ITEM(_set, _i) \ - LINKER_SET_OBJECT_ITEM((struct mach_header *)&_mh_execute_header, _set, _i) + LINKER_SET_OBJECT_ITEM((kernel_mach_header_t *)&_mh_execute_header, _set, _i) /* * Implementation. @@ -186,29 +184,30 @@ typedef unsigned long _ls_size_t; */ static __inline void ** -__linker_set_object_begin(struct mach_header *_header, const char *_set) +__linker_set_object_begin(kernel_mach_header_t *_header, const char *_set) __attribute__((__const__)); static __inline void ** -__linker_set_object_begin(struct mach_header *_header, const char *_set) +__linker_set_object_begin(kernel_mach_header_t *_header, const char *_set) { void *_set_begin; - _ls_size_t _size; + unsigned long _size; _set_begin = getsectdatafromheader(_header, "__DATA", _set, &_size); - return((void **)_set_begin); + return( (void **) _set_begin ); } static __inline void ** -__linker_set_object_limit(struct mach_header *_header, const char *_set) +__linker_set_object_limit(kernel_mach_header_t *_header, const char *_set) __attribute__((__const__)); static __inline void ** -__linker_set_object_limit(struct mach_header *_header, const char *_set) +__linker_set_object_limit(kernel_mach_header_t *_header, const char *_set) { void *_set_begin; - _ls_size_t _size; + unsigned long _size; _set_begin = getsectdatafromheader(_header, "__DATA", _set, &_size); - return((void **)((uintptr_t)_set_begin + _size)); + + return ((void **) ((uintptr_t) _set_begin + _size)); } #endif /* !KERNEL || __APPLE_API_PRIVATE */ diff --git a/bsd/sys/lockf.h b/bsd/sys/lockf.h index df4dec9c1..ffa779573 100644 --- a/bsd/sys/lockf.h +++ b/bsd/sys/lockf.h @@ -90,7 +90,6 @@ struct lockf { off_t lf_start; /* Byte # of the start of the lock */ off_t lf_end; /* Byte # of the end of the lock (-1=EOF) */ caddr_t lf_id; /* Id of the resource holding the lock */ - uint32_t lf_waiters; /* count of waiters on this lock */ struct lockf **lf_head; /* Back pointer to the head of the locf list */ struct vnode *lf_vnode; /* Back pointer to the inode */ struct lockf *lf_next; /* Pointer to the next lock on this inode */ @@ -105,6 +104,7 @@ struct lockf { __BEGIN_DECLS +#ifdef KERNEL_PRIVATE int lf_advlock(struct vnop_advlock_args *); int lf_assert(struct vnop_advlock_args *, void **); void lf_commit(void *, int); @@ -113,6 +113,7 @@ void lf_commit(void *, int); void lf_print(char *, struct lockf *); void lf_printlist(char *, struct lockf *); #endif +#endif /* KERNEL_PRIVATE */ __END_DECLS diff --git a/bsd/sys/lockstat.h b/bsd/sys/lockstat.h index a7c659970..74b5ee6a0 100644 --- a/bsd/sys/lockstat.h +++ b/bsd/sys/lockstat.h @@ -51,18 +51,8 @@ extern "C" { #define LS_LCK_MTX_TRY_LOCK_ACQUIRE 8 #define LS_LCK_MTX_TRY_SPIN_LOCK_ACQUIRE 9 #define LS_LCK_MTX_UNLOCK_RELEASE 10 -/* - * Although theoretically deprecated, there's still lots of uses of mutex_lock() style locks. - * Block and spin events share code with lck_mtx_lock() and friends, so we only need events - * for the acquire and release events. - */ -#define LS_MUTEX_LOCK_ACQUIRE 11 -#define LS_MUTEX_TRY_LOCK_ACQUIRE 12 -#define LS_MUTEX_TRY_SPIN_ACQUIRE 13 -#define LS_MUTEX_UNLOCK_RELEASE 14 -#define LS_MUTEX_LOCK_SPIN_ACQUIRE 15 -#define LS_MUTEX_CONVERT_SPIN_ACQUIRE 16 +#define LS_LCK_MTX_LOCK_SPIN_ACQUIRE 39 /* * Provide a parallel set for indirect mutexes */ @@ -99,7 +89,7 @@ extern "C" { #define LS_LCK_RW_LOCK_EXCL_TO_SHARED_DOWNGRADE 37 #define LS_LCK_RW_LOCK_EXCL_TO_SHARED_ILK_SPIN 38 -#define LS_NPROBES 39 +#define LS_NPROBES 40 /* * Name the various locking functions... @@ -112,12 +102,8 @@ extern "C" { #define LS_LCK_MTX_EXT_LOCK "lck_mtx_ext_lock" #define LS_LCK_MTX_EXT_UNLOCK "lck_mtx_ext_unlock" #define LS_LCK_MTX_EXT_TRY_LOCK "lck_mtx_ext_try_lock" -#define LS_MUTEX_CONVERT_SPIN "mutex_convert_spin" -#define LS_MUTEX_LOCK "mutex_lock" -#define LS_MUTEX_UNLOCK "mutex_unlock" -#define LS_MUTEX_TRY_LOCK "mutex_try_lock" -#define LS_MUTEX_TRY_SPIN "mutex_try_spin" -#define LS_MUTEX_LOCK_SPIN "mutex_lock_spin" +#define LS_LCK_MTX_LOCK_SPIN_LOCK "lck_mtx_lock_spin" + #define LS_LCK_SPIN_LOCK "lck_spin_lock" #define LS_LCK_SPIN_TRY_LOCK "lck_spin_try_lock" #define LS_LCK_SPIN_UNLOCK "lck_spin_unlock" @@ -185,9 +171,6 @@ extern void (*lockstat_probe)(dtrace_id_t, uint64_t, uint64_t, #ifdef _KERNEL #if CONFIG_DTRACE -extern int lockstat_depth(void); -extern void lockstat_hot_patch(boolean_t); - /* * Macros to record lockstat probes. */ diff --git a/bsd/sys/malloc.h b/bsd/sys/malloc.h index ea59c270b..dcbaaded7 100644 --- a/bsd/sys/malloc.h +++ b/bsd/sys/malloc.h @@ -192,14 +192,20 @@ #define M_FILEGLOB 99 /* fileglobal */ #define M_KAUTH 100 /* kauth subsystem */ #define M_DUMMYNET 101 /* dummynet */ +#ifndef __LP64__ #define M_UNSAFEFS 102 /* storage for vnode lock state for unsafe FS */ +#endif /* __LP64__ */ #define M_MACPIPELABEL 103 /* MAC pipe labels */ #define M_MACTEMP 104 /* MAC framework */ #define M_SBUF 105 /* string buffers */ #define M_EXTATTR 106 /* extended attribute */ #define M_LCTX 107 /* process login context */ +/* M_TRAFFIC_MGT 108 */ +#if HFS_COMPRESSION +#define M_DECMPFS_CNODE 109 /* decmpfs cnode structures */ +#endif /* HFS_COMPRESSION */ -#define M_LAST 109 /* Must be last type + 1 */ +#define M_LAST 110 /* Must be last type + 1 */ #else /* BSD_KERNEL_PRIVATE */ diff --git a/bsd/sys/mbuf.h b/bsd/sys/mbuf.h index 779dbde50..905d4119d 100644 --- a/bsd/sys/mbuf.h +++ b/bsd/sys/mbuf.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2007 Apple Inc. All rights reserved. + * Copyright (c) 2008 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -92,6 +92,10 @@ #include #include +#if PF_PKTHDR +#include +#endif /* PF_PKTHDR */ + /* * Mbufs are of a single size, MSIZE (machine/param.h), which * includes overhead. An mbuf may add a single "mbuf cluster" of size @@ -100,8 +104,21 @@ * at least MINCLSIZE of data must be stored. */ -#define MLEN (MSIZE - sizeof(struct m_hdr)) /* normal data len */ -#define MHLEN (MLEN - sizeof(struct pkthdr)) /* data len w/pkthdr */ +/* + * These macros are mapped to the appropriate KPIs, so that private code + * can be simply recompiled in order to be forward-compatible with future + * changes toward the struture sizes. + */ +#define MLEN mbuf_get_mlen() /* normal data len */ +#define MHLEN mbuf_get_mhlen() /* data len w/pkthdr */ + +/* + * The following _MLEN and _MHLEN macros are private to xnu. Private code + * that are outside of xnu must use the mbuf_get_{mlen,mhlen} routines since + * the sizes of the structures are dependent upon specific xnu configs. + */ +#define _MLEN (MSIZE - sizeof(struct m_hdr)) /* normal data len */ +#define _MHLEN (_MLEN - sizeof(struct pkthdr)) /* data len w/pkthdr */ #define MINCLSIZE (MHLEN + MLEN) /* smallest amount to put in cluster */ #define M_MAXCOMPRESS (MHLEN / 2) /* max amount to copy for compression */ @@ -120,7 +137,7 @@ struct m_hdr { struct mbuf *mh_next; /* next buffer in chain */ struct mbuf *mh_nextpkt; /* next chain in queue/record */ - long mh_len; /* amount of data in this mbuf */ + int32_t mh_len; /* amount of data in this mbuf */ caddr_t mh_data; /* location of data */ short mh_type; /* type of data in this mbuf */ short mh_flags; /* flags; see below */ @@ -144,19 +161,21 @@ struct pkthdr { /* variables for ip and tcp reassembly */ void *header; /* pointer to packet header */ /* variables for hardware checksum */ -#ifdef KERNEL_PRIVATE /* Note: csum_flags is used for hardware checksum and VLAN */ -#endif KERNEL_PRIVATE int csum_flags; /* flags regarding checksum */ int csum_data; /* data field used by csum routines */ - void *reserved0; /* unused, for future use */ -#ifdef KERNEL_PRIVATE + u_int tso_segsz; /* TSO segment size (actual MSS) */ u_short vlan_tag; /* VLAN tag, host byte order */ u_short socket_id; /* socket id */ -#else KERNEL_PRIVATE - u_int reserved1; /* for future use */ -#endif KERNEL_PRIVATE SLIST_HEAD(packet_tags, m_tag) tags; /* list of packet tags */ +#if PF_PKTHDR + /* + * Be careful; {en,dis}abling PF_PKTHDR will require xnu recompile; + * private code outside of xnu must use mbuf_get_mhlen() instead + * of MHLEN. + */ + struct pf_mtag pf_mtag; +#endif /* PF_PKTHDR */ }; @@ -175,6 +194,9 @@ struct m_ext { } *ext_refflags; }; +/* define m_ext to a type since it gets redefined below */ +typedef struct m_ext _m_ext_t; + struct mbuf { struct m_hdr m_hdr; union { @@ -182,10 +204,10 @@ struct mbuf { struct pkthdr MH_pkthdr; /* M_PKTHDR set */ union { struct m_ext MH_ext; /* M_EXT set */ - char MH_databuf[MHLEN]; + char MH_databuf[_MHLEN]; } MH_dat; } MH; - char M_databuf[MLEN]; /* !M_PKTHDR, !M_EXT */ + char M_databuf[_MLEN]; /* !M_PKTHDR, !M_EXT */ } M_dat; }; @@ -247,7 +269,11 @@ struct mbuf { /* VLAN tag present */ #define CSUM_VLAN_TAG_VALID 0x10000 /* vlan_tag field is valid */ -#endif KERNEL_PRIVATE + +/* TCP Segment Offloading requested on this mbuf */ +#define CSUM_TSO_IPV4 0x100000 /* This mbuf needs to be segmented by the NIC */ +#define CSUM_TSO_IPV6 0x200000 /* This mbuf needs to be segmented by the NIC */ +#endif /* KERNEL_PRIVATE */ /* mbuf types */ @@ -399,7 +425,8 @@ union m16kcluster { /* compatiblity with 4.3 */ #define m_copy(m, o, l) m_copym((m), (o), (l), M_DONTWAIT) -#define MBSHIFT 20 /* 1MB */ +#define MBSHIFT 20 /* 1MB */ +#define GBSHIFT 30 /* 1GB */ #endif /* KERNEL_PRIVATE */ @@ -451,6 +478,33 @@ struct ombstat { */ #define MAX_MBUF_CNAME 15 +#if defined(KERNEL_PRIVATE) +/* For backwards compatibility with 32-bit userland process */ +struct omb_class_stat { + char mbcl_cname[MAX_MBUF_CNAME + 1]; /* class name */ + u_int32_t mbcl_size; /* buffer size */ + u_int32_t mbcl_total; /* # of buffers created */ + u_int32_t mbcl_active; /* # of active buffers */ + u_int32_t mbcl_infree; /* # of available buffers */ + u_int32_t mbcl_slab_cnt; /* # of available slabs */ + u_int64_t mbcl_alloc_cnt; /* # of times alloc is called */ + u_int64_t mbcl_free_cnt; /* # of times free is called */ + u_int64_t mbcl_notified; /* # of notified wakeups */ + u_int64_t mbcl_purge_cnt; /* # of purges so far */ + u_int64_t mbcl_fail_cnt; /* # of allocation failures */ + u_int32_t mbcl_ctotal; /* total only for this class */ + /* + * Cache layer statistics + */ + u_int32_t mbcl_mc_state; /* cache state (see below) */ + u_int32_t mbcl_mc_cached; /* # of cached buffers */ + u_int32_t mbcl_mc_waiter_cnt; /* # waiters on the cache */ + u_int32_t mbcl_mc_wretry_cnt; /* # of wait retries */ + u_int32_t mbcl_mc_nwretry_cnt; /* # of no-wait retry attempts */ + u_int64_t mbcl_reserved[4]; /* for future use */ +} __attribute__((__packed__)); +#endif /* KERNEL_PRIVATE */ + typedef struct mb_class_stat { char mbcl_cname[MAX_MBUF_CNAME + 1]; /* class name */ u_int32_t mbcl_size; /* buffer size */ @@ -458,6 +512,9 @@ typedef struct mb_class_stat { u_int32_t mbcl_active; /* # of active buffers */ u_int32_t mbcl_infree; /* # of available buffers */ u_int32_t mbcl_slab_cnt; /* # of available slabs */ +#if defined(KERNEL) || defined(__LP64__) + u_int32_t mbcl_pad; /* padding */ +#endif /* KERNEL || __LP64__ */ u_int64_t mbcl_alloc_cnt; /* # of times alloc is called */ u_int64_t mbcl_free_cnt; /* # of times free is called */ u_int64_t mbcl_notified; /* # of notified wakeups */ @@ -480,8 +537,19 @@ typedef struct mb_class_stat { #define MCS_PURGING 2 /* cache is being purged */ #define MCS_OFFLINE 3 /* cache is offline (resizing) */ +#if defined(KERNEL_PRIVATE) +/* For backwards compatibility with 32-bit userland process */ +struct omb_stat { + u_int32_t mbs_cnt; /* number of classes */ + struct omb_class_stat mbs_class[1]; /* class array */ +} __attribute__((__packed__)); +#endif /* KERNEL_PRIVATE */ + typedef struct mb_stat { u_int32_t mbs_cnt; /* number of classes */ +#if defined(KERNEL) || defined(__LP64__) + u_int32_t mbs_pad; /* padding */ +#endif /* KERNEL || __LP64__ */ mb_class_stat_t mbs_class[1]; /* class array */ } mb_stat_t; @@ -491,7 +559,7 @@ typedef struct mb_stat { extern union mcluster *mbutl; /* virtual address of mclusters */ extern union mcluster *embutl; /* ending virtual address of mclusters */ extern struct mbstat mbstat; /* statistics */ -extern int nmbclusters; /* number of mapped clusters */ +extern unsigned int nmbclusters; /* number of mapped clusters */ extern int njcl; /* # of clusters for jumbo sizes */ extern int njclbytes; /* size of a jumbo cluster */ extern int max_linkhdr; /* largest link-level header */ @@ -501,6 +569,7 @@ extern int max_datalen; /* MHLEN - max_hdr */ __BEGIN_DECLS /* Not exported */ +__private_extern__ unsigned int mbuf_default_ncl(int, uint64_t); __private_extern__ void mbinit(void); __private_extern__ struct mbuf *m_clattach(struct mbuf *, int, caddr_t, void (*)(caddr_t , u_int, caddr_t), u_int, caddr_t, int); @@ -510,6 +579,8 @@ __private_extern__ struct mbuf *m_mbigget(struct mbuf *, int); __private_extern__ caddr_t m_16kalloc(int); __private_extern__ void m_16kfree(caddr_t, u_int, caddr_t); __private_extern__ struct mbuf *m_m16kget(struct mbuf *, int); +__private_extern__ void mbuf_growth_aggressive(void); +__private_extern__ void mbuf_growth_normal(void); /* Exported */ struct mbuf *m_copym(struct mbuf *, int, int, int); @@ -531,6 +602,7 @@ struct mbuf *m_devget(char *, int, int, struct ifnet *, void (*)(const void *, v char *mcl_to_paddr(char *); struct mbuf *m_pulldown(struct mbuf*, int, int, int*); +extern struct mbuf *m_getcl(int, int, int); struct mbuf *m_mclget(struct mbuf *, int); caddr_t m_mclalloc(int); void m_mclfree(caddr_t p); @@ -552,8 +624,10 @@ struct mbuf *m_normalize(struct mbuf *m); void m_mchtype(struct mbuf *m, int t); void m_mcheck(struct mbuf*); -void m_copyback(struct mbuf *, int , int , caddr_t); -void m_copydata(struct mbuf *, int , int , caddr_t); +extern void m_copyback(struct mbuf *, int , int , const void *); +extern struct mbuf *m_copyback_cow(struct mbuf *, int, int, const void *, int); +extern int m_makewritable(struct mbuf **, int, int, int); +void m_copydata(struct mbuf *, int , int , void *); struct mbuf* m_dup(struct mbuf *m, int how); void m_cat(struct mbuf *, struct mbuf *); struct mbuf *m_copym_with_hdrs(struct mbuf*, int, int, int, struct mbuf**, int*); @@ -604,7 +678,8 @@ enum { KERNEL_TAG_TYPE_MAC_POLICY_LABEL = 6, KERNEL_TAG_TYPE_ENCAP = 8, KERNEL_TAG_TYPE_INET6 = 9, - KERNEL_TAG_TYPE_IPSEC = 10 + KERNEL_TAG_TYPE_IPSEC = 10, + KERNEL_TAG_TYPE_PF = 11 }; /* @@ -645,5 +720,5 @@ __END_DECLS #endif /* KERNEL_PRIVATE */ #ifdef KERNEL #include -#endif +#endif /* KERNEL */ #endif /* !_SYS_MBUF_H_ */ diff --git a/bsd/sys/mman.h b/bsd/sys/mman.h index f3eb0578e..a82aec943 100644 --- a/bsd/sys/mman.h +++ b/bsd/sys/mman.h @@ -178,6 +178,10 @@ typedef __darwin_size_t size_t; #define MADV_WILLNEED POSIX_MADV_WILLNEED #define MADV_DONTNEED POSIX_MADV_DONTNEED #define MADV_FREE 5 /* pages unneeded, discard contents */ +#define MADV_ZERO_WIRED_PAGES 6 /* zero the wired pages that have not been unwired before the entry is deleted */ +#define MADV_FREE_REUSABLE 7 /* pages can be reused (by anyone) */ +#define MADV_FREE_REUSE 8 /* caller wants to reuse those pages */ +#define MADV_CAN_REUSE 9 /* * Return bits from mincore @@ -225,7 +229,7 @@ int minherit(void *, size_t, int); __END_DECLS #else /* KERNEL */ - +#ifdef XNU_KERNEL_PRIVATE void pshm_cache_init(void) __attribute__((section("__TEXT, initcode"))); /* for bsd_init() */ void pshm_lock_init(void); @@ -242,7 +246,8 @@ struct pshmnode; struct stat; int pshm_stat(struct pshmnode *pnode, void *ub, int isstat64); struct fileproc; -int pshm_truncate(struct proc *p, struct fileproc *fp, int fd, off_t length, register_t *retval); +int pshm_truncate(struct proc *p, struct fileproc *fp, int fd, off_t length, int32_t *retval); +#endif /* XNU_KERNEL_PRIVATE */ #endif /* KERNEL */ #endif /* !_SYS_MMAN_H_ */ diff --git a/bsd/sys/mount.h b/bsd/sys/mount.h index 9019d694b..d5a3d3272 100644 --- a/bsd/sys/mount.h +++ b/bsd/sys/mount.h @@ -82,6 +82,7 @@ #include #else #include +#include #endif typedef struct fsid { int32_t val[2]; } fsid_t; /* file system id type */ @@ -92,7 +93,12 @@ typedef struct fsid { int32_t val[2]; } fsid_t; /* file system id type */ #define MFSNAMELEN 15 /* length of fs type name, not inc. null */ #define MFSTYPENAMELEN 16 /* length of fs type name including null */ -#define MNAMELEN 90 /* length of buffer for returned name */ + +#if __DARWIN_64_BIT_INO_T +#define MNAMELEN MAXPATHLEN /* length of buffer for returned name */ +#else /* ! __DARWIN_64_BIT_INO_T */ +#define MNAMELEN 90 /* length of buffer for returned name */ +#endif /* __DARWIN_64_BIT_INO_T */ #define __DARWIN_STRUCT_STATFS64 { \ uint32_t f_bsize; /* fundamental file system block size */ \ @@ -147,13 +153,8 @@ struct statfs { char f_fstypename[MFSNAMELEN]; /* fs type name */ char f_mntonname[MNAMELEN]; /* directory on which mounted */ char f_mntfromname[MNAMELEN];/* mounted filesystem */ -#if COMPAT_GETFSSTAT - char f_reserved3[0]; /* For alignment */ - long f_reserved4[0]; /* For future use */ -#else char f_reserved3; /* For alignment */ long f_reserved4[4]; /* For future use */ -#endif }; #endif /* __DARWIN_64_BIT_INO_T */ @@ -221,12 +222,18 @@ struct vfsstatfs { #define VFSATTR_f_vol_name (1LL<< 21) #define VFSATTR_f_signature (1LL<< 22) #define VFSATTR_f_carbon_fsid (1LL<< 23) +#define VFSATTR_f_uuid (1LL<< 24) /* * Argument structure. */ #pragma pack(4) - +/* + * Note: the size of the vfs_attr structure can change. + * A kext should only reference the fields that are + * marked as active; it should not depend on the actual + * size of the structure or attempt to copy it. + */ struct vfs_attr { uint64_t f_supported; uint64_t f_active; @@ -261,6 +268,7 @@ struct vfs_attr { uint16_t f_signature; /* used for ATTR_VOL_SIGNATURE, Carbon's FSVolumeInfo.signature */ uint16_t f_carbon_fsid; /* same as Carbon's FSVolumeInfo.filesystemID */ + uuid_t f_uuid; /* file system UUID (version 3 or 5), available in 10.6 and later */ }; #pragma pack() @@ -359,8 +367,9 @@ struct vfs_attr { * * waitfor flags to vfs_sync() and getfsstat() */ -#define MNT_WAIT 1 /* synchronously wait for I/O to complete */ +#define MNT_WAIT 1 /* synchronized I/O file integrity completion */ #define MNT_NOWAIT 2 /* start all I/O, but do not wait for it */ +#define MNT_DWAIT 4 /* synchronized I/O data integrity completion */ #ifndef KERNEL @@ -370,14 +379,15 @@ struct vnode; typedef struct vnode * vnode_t; #endif +/* Reserved fields preserve binary compatibility */ struct vfsconf { - struct vfsops *vfc_vfsops; /* filesystem operations vector */ + uint32_t vfc_reserved1; /* opaque */ char vfc_name[MFSNAMELEN]; /* filesystem type name */ int vfc_typenum; /* historic filesystem type number */ int vfc_refcount; /* number mounted of this type */ int vfc_flags; /* permanent flags */ - int (*vfc_mountroot)(mount_t, vnode_t); /* if != NULL, routine to mount root */ - struct vfsconf *vfc_next; /* next in list */ + uint32_t vfc_reserved2; /* opaque */ + uint32_t vfc_reserved3; /* opaque */ }; struct vfsidctl { @@ -393,21 +403,6 @@ struct vfsidctl { #define VFS_CTL_VERS1 0x01 #ifdef KERNEL - -/* LP64 version of vfsconf. all pointers - * grow when we're dealing with a 64-bit process. - * WARNING - keep in sync with vfsconf - */ -struct user_vfsconf { - user_addr_t vfc_vfsops; /* filesystem operations vector */ - char vfc_name[MFSNAMELEN]; /* filesystem type name */ - int vfc_typenum; /* historic filesystem type number */ - int vfc_refcount; /* number mounted of this type */ - int vfc_flags; /* permanent flags */ - user_addr_t vfc_mountroot __attribute((aligned(8))); /* if != NULL, routine to mount root */ - user_addr_t vfc_next; /* next in list */ -}; - struct user_vfsidctl { int vc_vers; /* should be VFSIDCTL_VERS1 (below) */ fsid_t vc_fsid; /* fsid to operate on. */ @@ -416,6 +411,19 @@ struct user_vfsidctl { u_int32_t vc_spare[12]; /* spare (must be zero). */ }; +struct user32_vfsidctl { + int vc_vers; /* should be VFSIDCTL_VERS1 (below) */ + fsid_t vc_fsid; /* fsid to operate on. */ + user32_addr_t vc_ptr; /* pointer to data structure. */ + user32_size_t vc_len; /* sizeof said structure. */ + u_int32_t vc_spare[12]; /* spare (must be zero). */ +}; + +union union_vfsidctl { /* the fields vc_vers and vc_fsid are compatible */ + struct user32_vfsidctl vc32; + struct user_vfsidctl vc64; +}; + #endif /* KERNEL */ /* @@ -444,8 +452,8 @@ struct vfsquery { #define VQ_ASSIST 0x0040 /* filesystem needs assistance from external program */ #define VQ_NOTRESPLOCK 0x0080 /* server lockd down */ #define VQ_UPDATE 0x0100 /* filesystem information has changed */ -#define VQ_FLAG0200 0x0200 /* placeholder */ -#define VQ_FLAG0400 0x0400 /* placeholder */ +#define VQ_VERYLOWDISK 0x0200 /* file system has *very* little disk space left */ +#define VQ_SYNCEVENT 0x0400 /* a sync just happened */ #define VQ_FLAG0800 0x0800 /* placeholder */ #define VQ_FLAG1000 0x1000 /* placeholder */ #define VQ_FLAG2000 0x2000 /* placeholder */ @@ -484,6 +492,9 @@ struct vfsioattr { #define VFS_TBLGENERICMNTARGS 0x0200 /* force generic mount args for local fs */ #define VFS_TBLREADDIR_EXTENDED 0x0400 /* fs supports VNODE_READDIR_EXTENDED */ #define VFS_TBLNOMACLABEL 0x1000 +#define VFS_TBLVNOP_PAGEINV2 0x2000 +#define VFS_TBLVNOP_PAGEOUTV2 0x4000 + struct vfs_fsentry { struct vfsops * vfe_vfsops; /* vfs operations */ @@ -498,20 +509,168 @@ struct vfs_fsentry { struct vfsops { + /*! + @function vfs_mount + @abstract Perform filesystem-specific operations required for mounting. + @discussion Typical operations include setting the mount-specific data with vfs_setfsprivate(). + Note that if a mount call fails, the filesystem must clean up any state it has constructed, because + vfs-level mount code will not clean it up. + @param mp Mount structure for the newly mounted filesystem. + @param devvp Device that the filesystem is mounted from. + @param data Filesystem-specific data passed down from userspace. + @param context Context to authenticate for mount. + @return 0 for success, else an error code. Once success is returned, the filesystem should be ready to go active; + VFS will not ask again. + */ int (*vfs_mount)(struct mount *mp, vnode_t devvp, user_addr_t data, vfs_context_t context); + + /*! + @function vfs_start + @abstract Mark a mount as ready to be used. + @discussion After receiving this calldown, a filesystem will be hooked into the mount list and should expect + calls down from the VFS layer. + @param mp Mount structure being activated. + @param flags Unused. + @param context Context to authenticate for mount. + @return Return value is ignored. + */ int (*vfs_start)(struct mount *mp, int flags, vfs_context_t context); + + /*! + @function vfs_unmount + @abstract Perform filesystem-specific cleanup as part of unmount. + @discussion If the unmount downcall succeeds, VFS considers itself authorized to destroy all + state related to the mount. + @param mp Mount structure to unmount. + @param mntflags MNT_FORCE indicates that we wish to unmount even if there are active vnodes. + @param context Context to authenticate for unmount. + @return 0 for success, else an error code. + */ int (*vfs_unmount)(struct mount *mp, int mntflags, vfs_context_t context); + + /*! + @function vfs_root + @abstract Get the root vnode of a filesystem. + @discussion Upon success, should return with an iocount held on the root vnode which the caller will + drop with vnode_put(). + @param mp Mount for which to get the root. + @param vpp Destination for root vnode. + @param context Context to authenticate for getting the root. + @return 0 for success, else an error code. + */ int (*vfs_root)(struct mount *mp, struct vnode **vpp, vfs_context_t context); + + /*! + @function vfs_quotactl + @abstract Manipulate quotas for a volume. + @param mp Mount for which to manipulate quotas. + @param cmds Detailed in "quotactl" manual page. + @param uid Detailed in "quotactl" manual page. + @param arg Detailed in "quotactl" manual page. + @param context Context to authenticate for changing quotas. + @return 0 for success, else an error code. + */ int (*vfs_quotactl)(struct mount *mp, int cmds, uid_t uid, caddr_t arg, vfs_context_t context); + + /*! + @function vfs_getattr + @abstract Get filesystem attributes. + @discussion See VFSATTR_RETURN, VFSATTR_ACTIVE, VFSATTR_SET_SUPPORTED, VFSATTR_WANTED macros. + @param mp Mount for which to get parameters. + @param vfa Container for specifying which attributes are desired and which attributes the filesystem + supports, as well as for returning results. + @param ctx Context to authenticate for getting filesystem attributes. + @return 0 for success, else an error code. + */ int (*vfs_getattr)(struct mount *mp, struct vfs_attr *, vfs_context_t context); /* int (*vfs_statfs)(struct mount *mp, struct vfsstatfs *sbp, vfs_context_t context);*/ + + /*! + @function vfs_sync + @abstract Flush all filesystem data to backing store. + @discussion vfs_sync will be called as part of the sync() system call and during unmount. + @param mp Mountpoint to sync. + @param waitfor MNT_WAIT: flush synchronously, waiting for all data to be written before returning. MNT_NOWAIT: start I/O but do not wait for it. + @param ctx Context to authenticate for the sync. + @return 0 for success, else an error code. + */ int (*vfs_sync)(struct mount *mp, int waitfor, vfs_context_t context); + + /*! + @function vfs_vget + @abstract Get a vnode by file id (inode number). + @discussion This routine is chiefly used to build paths to vnodes. Result should be turned with an iocount that the + caller will drop with vnode_put(). + @param mp Mount against which to look up inode number. + @param ino File ID for desired file, as found through a readdir. + @param vpp Destination for vnode. + @return 0 for success, else an error code. + */ int (*vfs_vget)(struct mount *mp, ino64_t ino, struct vnode **vpp, vfs_context_t context); + + /*! + @function vfs_fhtovp + @abstract Get the vnode corresponding to a file handle. + @discussion Filesystems can return handles to files which are independent of their (transient) vnode identities. + vfs_thtovp converts that persistent handle back to a vnode. The vnode should be returned with an iocount which + the caller will drop with vnode_put(). + @param mp Mount against which to look up file handle. + @param fhlen Size of file handle structure, as returned by vfs_vptofh. + @param fhp Pointer to handle. + @param vpp Destination for vnode. + @param ctx Context against which to authenticate the file-handle conversion. + @return 0 for success, else an error code. + */ int (*vfs_fhtovp)(struct mount *mp, int fhlen, unsigned char *fhp, struct vnode **vpp, vfs_context_t context); + + /*! + @function vfs_vptofh + @abstract Get a persistent handle corresponding to a vnode. + @param mp Mount against which to convert the vnode to a handle. + @param fhlen Size of buffer provided for handle; set to size of actual handle returned. + @param fhp Pointer to buffer in which to place handle data. + @param ctx Context against which to authenticate the file-handle request. + @return 0 for success, else an error code. + */ int (*vfs_vptofh)(struct vnode *vp, int *fhlen, unsigned char *fhp, vfs_context_t context); + + /*! + @function vfs_init + @abstract Prepare a filesystem for having instances mounted. + @discussion This routine is called once, before any particular instance of a filesystem + is mounted; it allows the filesystem to initialize whatever global data structures + are shared across all mounts. If this returns successfully, a filesystem should be ready to have + instances mounted. + @param vfsconf Configuration information. Currently, the only useful data are the filesystem name, + typenum, and flags. The flags field will be either 0 or MNT_LOCAL. Many filesystems ignore this + parameter. + @return 0 for success, else an error code. + */ int (*vfs_init)(struct vfsconf *); + + /*! + @function vfs_sysctl + @abstract Broad interface for querying and controlling filesystem. + @discussion VFS defines VFS_CTL_QUERY as a generic status request which is answered + with the VQ_* macros in a "struct vfsquery." + A filesystem may also define implementation-specific commands. See "man 3 sysctl" + for the meaning of sysctl parameters. + @param context Context against which to authenticate command. + @return 0 for success, else an error code. + */ int (*vfs_sysctl)(int *, u_int, user_addr_t, size_t *, user_addr_t, size_t, vfs_context_t context); + + /*! + @function vfs_setattr + @abstract Set filesystem attributes. + @discussion The other side of the vfs_getattr coin. Currently only called to set volume name. + @param mp Mount on which to set attributes. + @param vfa VFS attribute structure containing requested attributes to set and their values. Currently + will only be called with f_vol_name set. + @param context Context against which to authenticate attribute change. + @return 0 for success, else an error code. + */ int (*vfs_setattr)(struct mount *mp, struct vfs_attr *, vfs_context_t context); void *vfs_reserved[7]; }; @@ -531,9 +690,7 @@ struct vfsops { __BEGIN_DECLS -/* - * prototypes for exported VFS operations - */ +#ifdef BSD_KERNEL_PRIVATE extern int VFS_MOUNT(mount_t, vnode_t, user_addr_t, vfs_context_t); extern int VFS_START(mount_t, int, vfs_context_t); extern int VFS_UNMOUNT(mount_t, int, vfs_context_t); @@ -545,37 +702,255 @@ extern int VFS_SYNC(mount_t, int, vfs_context_t); extern int VFS_VGET(mount_t, ino64_t, vnode_t *, vfs_context_t); extern int VFS_FHTOVP(mount_t, int, unsigned char *, vnode_t *, vfs_context_t); extern int VFS_VPTOFH(vnode_t, int *, unsigned char *, vfs_context_t); +#endif /* BSD_KERNEL_PRIVATE */ +/* + * prototypes for exported VFS operations + */ -/* The file system registrartion KPI */ +/*! + @function vfs_fsadd + @abstract Register a filesystem with VFS. + @discussion Typically called by a filesystem Kernel Extension when it is loaded. + @param vfe Filesystem information: table of vfs operations, list of vnode operation tables, + filesystem type number (can be omitted with VFS_TBLNOTYPENUM flag), name, flags. + @param handle Opaque handle which will be passed to vfs_fsremove. + @return 0 for success, else an error code. + */ int vfs_fsadd(struct vfs_fsentry *, vfstable_t *); + +/*! + @function vfs_fsremove + @abstract Unregister a filesystem with VFS. + @discussion Typically called by a filesystem Kernel Extension when it is unloaded. + @param handle Handle which was returned by vfs_fsadd. + @return 0 for success, else an error code. + */ int vfs_fsremove(vfstable_t); + +/*! + @function vfs_iterate + @abstract Iterate over all mountpoints with a callback. Used, for example, by sync(). + @param flags Unused. + @param callback Function which takes a mount and arbitrary passed-in "arg," and returns one of VFS_RETURNED_DONE or VFS_CLAIMED_DONE: end + iteration and return success. VFS_RETURNED or VFS_CLAIMED: continue iterating. Anything else: continue iterating. + @param arg Arbitrary data to pass to callback. + @return 0 for success, else an error code. + */ int vfs_iterate(int, int (*)(struct mount *, void *), void *); +/*! + @function vfs_init_io_attributes + @abstract Set I/O attributes on a mountpoint based on device properties. + @param devvp Block device vnode from which a filesystem is being mounted. + @param mp Mountpoint whose I/O parameters to initialize. + @return 0 for success, else an error code. + */ +int vfs_init_io_attributes(vnode_t, mount_t); + +/*! + @function vfs_flags + @abstract Retrieve mount flags. + @discussion Results will be in the bitwise "OR" of MNT_VISFLAGMASK and MNT_CMDFLAGS. + @param mp Mount whose flags to grab. + @return Flags. + */ uint64_t vfs_flags(mount_t); + +/*! + @function vfs_setflags + @abstract Set flags on a mount. + @discussion Sets mount flags to the bitwise "OR" of their current value and the specified bits. Often + used by a filesystem as part of the mount process. + @param mp Mount whose flags to set. + @param flags Flags to activate. Must be in the bitwise "OR" of MNT_VISFLAGMASK and MNT_CMDFLAGS. + @return Flags. + */ void vfs_setflags(mount_t, uint64_t); + +/*! + @function vfs_clearflags + @abstract Clear flags on a mount. + @discussion Sets mount flags to the bitwise "AND" of their current value and the complement of the specified bits. + @param mp Mount whose flags to set. + @param flags Flags to deactivate. Must be in the bitwise "OR" of MNT_VISFLAGMASK and MNT_CMDFLAGS. + @return void. + */ void vfs_clearflags(mount_t, uint64_t); +/*! + @function vfs_issynchronous + @abstract Determine if writes to a filesystem occur synchronously. + @param mp Mount to test. + @return Nonzero if writes occur synchronously, else 0. + */ int vfs_issynchronous(mount_t); + +/*! + @function vfs_iswriteupgrade + @abstract Determine if a filesystem is mounted read-only but a request has been made to upgrade + to read-write. + @param mp Mount to test. + @return Nonzero if a request has been made to update from read-only to read-write, else 0. + */ int vfs_iswriteupgrade(mount_t); + +/*! + @function vfs_isupdate + @abstract Determine if a mount update is in progress. + @param mp Mount to test. + @return Nonzero if a mount update is in progress, 0 otherwise. + */ int vfs_isupdate(mount_t); + +/*! + @function vfs_isreload + @abstract Determine if a reload of filesystem data is in progress. This can only be the case + for a read-only filesystem; all data is brought in from secondary storage. + @param mp Mount to test. + @return Nonzero if a request has been made to reload data, else 0. + */ int vfs_isreload(mount_t); + +/*! + @function vfs_isforce + @abstract Determine if a forced unmount is in progress. + @discussion A forced unmount invalidates open files. + @param mp Mount to test. + @return Nonzero if a request has been made to forcibly unmount, else 0. + */ int vfs_isforce(mount_t); + +/*! + @function vfs_isunmount + @abstract Determine if an unmount is in progress. + @discussion This is an unsynchronized snapshot of the mount state. It should only be called + if the mount is known to be valid, e.g. there are known to be live files on that volume. + @param mp Mount to test. + @return Nonzero if an unmount is in progress, else zero. + */ +int vfs_isunmount(mount_t mp); + +/*! + @function vfs_isrdonly + @abstract Determine if a filesystem is mounted read-only. + @param mp Mount to test. + @return Nonzero if filesystem is mounted read-only, else 0. + */ int vfs_isrdonly(mount_t); + +/*! + @function vfs_isrdwr + @abstract Determine if a filesystem is mounted with writes enabled. + @param mp Mount to test. + @return Nonzero if filesystem is mounted read-write, else 0. + */ int vfs_isrdwr(mount_t); + +/*! + @function vfs_authopaque + @abstract Determine if a filesystem's authorization decisions occur remotely. + @param mp Mount to test. + @return Nonzero if filesystem authorization is controlled remotely, else 0. + */ int vfs_authopaque(mount_t); + +/*! + @function vfs_authopaqueaccess + @abstract Check if a filesystem is marked as having reliable remote VNOP_ACCESS support. + @param mp Mount to test. + @return Nonzero if VNOP_ACCESS is supported remotely, else 0. + */ int vfs_authopaqueaccess(mount_t); + +/*! + @function vfs_setauthopaque + @abstract Mark a filesystem as having authorization decisions controlled remotely. + @param mp Mount to mark. + @return void. + */ void vfs_setauthopaque(mount_t); + +/*! + @function vfs_setauthopaqueaccess + @abstract Mark a filesystem as having remote VNOP_ACCESS support. + @param mp Mount to mark. + @return void. + */ void vfs_setauthopaqueaccess(mount_t); + +/*! + @function vfs_clearauthopaque + @abstract Mark a filesystem as not having remote authorization decisions. + @param mp Mount to mark. + @return void. + */ void vfs_clearauthopaque(mount_t); + +/*! + @function vfs_clearauthopaque + @abstract Mark a filesystem as not having remote VNOP_ACCESS support. + @param mp Mount to mark. + @return void. + */ void vfs_clearauthopaqueaccess(mount_t); -int vfs_extendedsecurity(mount_t); + +/*! + @function vfs_setextendedsecurity + @abstract Mark a filesystem as supporting security controls beyond POSIX permissions. + @discussion Specific controls include ACLs, file owner UUIDs, and group UUIDs. + @param mp Mount to test. + @return void. + */ void vfs_setextendedsecurity(mount_t); + +/*! + @function vfs_clearextendedsecurity + @abstract Mark a filesystem as NOT supporting security controls beyond POSIX permissions. + @discussion Specific controls include ACLs, file owner UUIDs, and group UUIDs. + @param mp Mount to test. + @return void. + */ void vfs_clearextendedsecurity(mount_t); + +/*! + @function vfs_setlocklocal + @abstract Mark a filesystem as using VFS-level advisory locking support. + @discussion Advisory locking operations will not call down to the filesystem if this flag is set. + @param mp Mount to mark. + @return void. + */ void vfs_setlocklocal(mount_t); + +/*! + @function vfs_authcache_ttl + @abstract Determine the time-to-live of cached authorized credentials for files in this filesystem. + @discussion If a filesystem is set to allow caching credentials, the VFS layer can authorize + previously-authorized actions from the same vfs_context_t without calling down to the filesystem (though + it will not deny based on the cache). + @param mp Mount for which to check cache lifetime. + @return Cache lifetime in seconds. CACHED_RIGHT_INFINITE_TTL indicates that credentials never expire. + */ int vfs_authcache_ttl(mount_t); + +/*! + @function vfs_setauthcache_ttl + @abstract Enable credential caching and set time-to-live of cached authorized credentials for files in this filesystem. + @discussion If a filesystem is set to allow caching credentials, the VFS layer can authorize + previously-authorized actions from the same vfs_context_t without calling down to the filesystem (though + it will not deny based on the cache). + @param mp Mount for which to set cache lifetime. + @return void. + */ void vfs_setauthcache_ttl(mount_t, int); + +/*! + @function vfs_clearauthcache_ttl + @abstract Remove time-to-live controls for cached credentials on a filesytem. Filesystems with remote authorization + decisions (opaque) will still have KAUTH_VNODE_SEARCH rights cached for a default of CACHED_LOOKUP_RIGHT_TTL seconds. + @param mp Mount for which to clear cache lifetime. + @return void. + */ void vfs_clearauthcache_ttl(mount_t); -void vfs_markdependency(mount_t); /* * return value from vfs_cachedrights_ttl if @@ -585,39 +960,213 @@ void vfs_markdependency(mount_t); */ #define CACHED_RIGHT_INFINITE_TTL ~0 - +/*! + @function vfs_maxsymlen + @abstract Get the maximum length of a symbolic link on a filesystem. + @param mp Mount from which to get symlink length cap. + @return Max symlink length. + */ uint32_t vfs_maxsymlen(mount_t); + +/*! + @function vfs_setmaxsymlen + @abstract Set the maximum length of a symbolic link on a filesystem. + @param mp Mount on which to set symlink length cap. + @param symlen Length to set. + @return Max symlink length. + */ void vfs_setmaxsymlen(mount_t, uint32_t); + +/*! + @function vfs_fsprivate + @abstract Get filesystem-private mount data. + @discussion A filesystem generally has an internal mount structure which it attaches to the VFS-level mount structure + as part of the mounting process. + @param mp Mount for which to get private data. + @return Private data. + */ void * vfs_fsprivate(mount_t); + +/*! + @function vfs_setfsprivate + @abstract Set filesystem-private mount data. + @discussion A filesystem generally has an internal mount structure which it attaches to the VFS-level mount structure + as part of the mounting process. + @param mp Mount for which to set private data. + @return Void. + */ void vfs_setfsprivate(mount_t, void *mntdata); +/*! + @function vfs_statfs + @abstract Get information about filesystem status. + @discussion Each filesystem has a struct vfsstatfs associated with it which is updated as events occur; this function + returns a pointer to it. Note that the data in the structure will continue to change over time and also that it may + be quite stale of vfs_update_vfsstat has not been called recently. + @param mp Mount for which to get vfsstatfs pointer. + @return Pointer to vfsstatfs. + */ struct vfsstatfs * vfs_statfs(mount_t); #define VFS_USER_EVENT 0 #define VFS_KERNEL_EVENT 1 + +/*! + @function vfs_update_vfsstat + @abstract Update cached filesystem status information in the VFS mount structure. + @discussion Each filesystem has a struct vfsstatfs associated with it which is updated as events occur; this function + updates it so that the structure pointer returned by vfs_statfs() returns a pointer to fairly recent data. + @param mp Mount for which to update cached status information. + @param ctx Context to authenticate against for call down to filesystem. + @param eventtype VFS_USER_EVENT: need for update is driven by user-level request; perform additional authentication. + VFS_KERNEL_EVENT: need for update is driven by in-kernel events. Skip extra authentication. + @return 0 for success, or an error code for authentication failure or problem with call to filesystem to + request information. + */ int vfs_update_vfsstat(mount_t, vfs_context_t, int eventtype); -int vfs_getattr(mount_t mp, struct vfs_attr *vfa, vfs_context_t ctx); -int vfs_setattr(mount_t mp, struct vfs_attr *vfa, vfs_context_t ctx); +/*! + @function vfs_typenum + @abstract Get (archaic) filesystem type number. + @discussion Filesystem type numbers are an old construct; most filesystems just get a number assigned based on + the order in which they are registered with the system. + @param mp Mount for which to get type number. + @return Type number. + */ int vfs_typenum(mount_t); + +/*! + @function vfs_name + @abstract Copy filesystem name into a buffer. + @discussion Get filesystem name; this refers to the filesystem type of which a mount is an instantiation, + rather than a name specific to the mountpoint. + @param mp Mount for which to get name. + @param buffer Destination for name; length should be at least MFSNAMELEN. + @return void. + */ void vfs_name(mount_t, char *); + +/*! + @function vfs_devblocksize + @abstract Get the block size of the device underlying a mount. + @param mp Mount for which to get block size. + @return Block size. + */ int vfs_devblocksize(mount_t); + +/*! + @function vfs_ioattr + @abstract Get I/O attributes associated with a mounpoint. + @param mp Mount for which to get attributes. If NULL, system defaults are filled into ioattrp. + @param ioattrp Destination for results. + @return void. + */ void vfs_ioattr(mount_t, struct vfsioattr *); + +/*! + @function vfs_setioattr + @abstract Set I/O attributes associated with a mounpoint. + @param mp Mount for which to set attributes. + @param ioattrp Structure containing I/O parameters; all fields must be filled in. + @return void. + */ void vfs_setioattr(mount_t, struct vfsioattr *); + +/*! + @function vfs_64bitready + @abstract Check if the filesystem associated with a mountpoint is marked ready for interaction with 64-bit user processes. + @param mp Mount to test. + @return Nonzero if filesystem is ready for 64-bit; 0 otherwise. + */ int vfs_64bitready(mount_t); #define LK_NOWAIT 1 +/*! + @function vfs_busy + @abstract "Busy" a mountpoint. + @discussion vfs_busy() will "busy" a mountpoint, preventing unmounts from taking off, by taking its reader-writer lock + in a shared manner. If a mount is dead, + it will fail; if an unmount is in progress, depending on flags, it will either fail immediately or block + until the unmount completes (then failing if the unmount has succeeded, or potentially succeeding if unmounting failed). + A successful vfs_busy() must be followed by a vfs_unbusy() to release the lock on the mount. + @param mp Mount to busy. + @param flags LK_NOWAIT: fail with ENOENT if an unmount is in progress. + @return 0 for success, with a lock held; an error code otherwise, with no lock held. + */ int vfs_busy(mount_t, int); + +/*! + @function vfs_unbusy + @abstract "Unbusy" a mountpoint by releasing its read-write lock. + @discussion A successful vfs_busy() must be followed by a vfs_unbusy() to release the lock on the mount. + @param mp Mount to unbusy. + @return void. + */ void vfs_unbusy(mount_t); +/*! + @function vfs_getnewfsid + @abstract Generate a unique filesystem ID for a mount and store it in the mount structure. + @discussion Filesystem IDs are returned as part of "struct statfs." This function is typically + called as part of file-system specific mount code (i.e. through VFS_MOUNT). + @param mp Mount to set an ID for. + @return void. + */ void vfs_getnewfsid(struct mount *); + +/*! + @function vfs_getvfs + @abstract Given a filesystem ID, look up a mount structure. + @param fsid Filesystem ID to look up. + @return Mountpoint if found, else NULL. Note unmounting mountpoints can be returned. + */ mount_t vfs_getvfs(fsid_t *); -mount_t vfs_getvfs_by_mntonname(char *); + +/*! + @function vfs_mountedon + @abstract Check whether a given block device has a filesystem mounted on it. + @discussion Note that this is NOT a check for a covered vnode (the directory upon which + a filesystem is mounted)--it is a test for whether a block device is being used as the source + of a filesystem. Note that a block device marked as being mounted on cannot be opened. + @param vp The vnode to test. + @return EBUSY if vnode is indeed the source of a filesystem; 0 if it is not. + */ int vfs_mountedon(struct vnode *); + +/*! + @function vfs_unmountbyfsid + @abstract Find a filesystem by ID and unmount it. + @param fsid ID of filesystem to unmount, as found through (for example) statfs. + @param flags MNT_FORCE: forcibly invalidate files open on the mount (though in-flight I/O operations + will be allowed to complete). + @param ctx Context against which to authenticate unmount operation. + @return 0 for succcess, nonero for failure. + */ int vfs_unmountbyfsid(fsid_t *, int, vfs_context_t); +/*! + @function vfs_event_signal + @abstract Post a kqueue-style event on a filesystem (EVFILT_FS). + @param fsid Unused. + @param event Events to post. + @param data Unused. + @return void. + */ void vfs_event_signal(fsid_t *, u_int32_t, intptr_t); -void vfs_event_init(void); +/*! + @function vfs_event_init + @abstract This function should not be called by kexts. + */ +void vfs_event_init(void); /* XXX We should not export this */ +#ifdef KERNEL_PRIVATE +int vfs_getattr(mount_t mp, struct vfs_attr *vfa, vfs_context_t ctx); +int vfs_setattr(mount_t mp, struct vfs_attr *vfa, vfs_context_t ctx); +int vfs_extendedsecurity(mount_t); +mount_t vfs_getvfs_by_mntonname(char *); +void vfs_markdependency(mount_t); +vnode_t vfs_vnodecovered(mount_t mp); /* Returns vnode with an iocount that must be released with vnode_put() */ +void * vfs_mntlabel(mount_t mp); /* Safe to cast to "struct label*"; returns "void*" to limit dependence of mount.h on security headers. */ +#endif /* KERNEL_PRIVATE */ __END_DECLS #endif /* KERNEL */ diff --git a/bsd/sys/mount_internal.h b/bsd/sys/mount_internal.h index eb5966172..6cca245d5 100644 --- a/bsd/sys/mount_internal.h +++ b/bsd/sys/mount_internal.h @@ -87,6 +87,15 @@ struct label; +#if defined(__i386__) || defined(__x86_64__) +typedef uint64_t pending_io_t; +#define INCR_PENDING_IO(a, b) OSAddAtomic64((int64_t)(a), (int64_t *)&(b)); +#else +typedef uint32_t pending_io_t; +#define INCR_PENDING_IO(a, b) OSAddAtomic((int32_t)(a), (int32_t *)&(b)); +#endif + + /* * Structure per mounted file system. Each mounted file system has an * array of operations and an instance record. The file systems are @@ -104,26 +113,32 @@ struct mount { struct vnodelst mnt_vnodelist; /* list of vnodes this mount */ struct vnodelst mnt_workerqueue; /* list of vnodes this mount */ struct vnodelst mnt_newvnodes; /* list of vnodes this mount */ - int mnt_flag; /* flags */ - int mnt_kern_flag; /* kernel only flags */ - int mnt_lflag; /* mount life cycle flags */ - int mnt_maxsymlinklen; /* max size of short symlink */ + uint32_t mnt_flag; /* flags */ + uint32_t mnt_kern_flag; /* kernel only flags */ + uint32_t mnt_lflag; /* mount life cycle flags */ + uint32_t mnt_maxsymlinklen; /* max size of short symlink */ struct vfsstatfs mnt_vfsstat; /* cache of filesystem stats */ qaddr_t mnt_data; /* private data */ /* Cached values of the IO constraints for the device */ - u_int32_t mnt_maxreadcnt; /* Max. byte count for read */ - u_int32_t mnt_maxwritecnt; /* Max. byte count for write */ - u_int32_t mnt_segreadcnt; /* Max. segment count for read */ - u_int32_t mnt_segwritecnt; /* Max. segment count for write */ - u_int32_t mnt_maxsegreadsize; /* Max. segment read size */ - u_int32_t mnt_maxsegwritesize; /* Max. segment write size */ - u_int32_t mnt_alignmentmask; /* Mask of bits that aren't addressable via DMA */ - u_int32_t mnt_devblocksize; /* the underlying device block size */ - u_int32_t mnt_ioflags; /* flags for underlying device */ + uint32_t mnt_maxreadcnt; /* Max. byte count for read */ + uint32_t mnt_maxwritecnt; /* Max. byte count for write */ + uint32_t mnt_segreadcnt; /* Max. segment count for read */ + uint32_t mnt_segwritecnt; /* Max. segment count for write */ + uint32_t mnt_maxsegreadsize; /* Max. segment read size */ + uint32_t mnt_maxsegwritesize; /* Max. segment write size */ + uint32_t mnt_alignmentmask; /* Mask of bits that aren't addressable via DMA */ + uint32_t mnt_devblocksize; /* the underlying device block size */ + uint32_t mnt_ioqueue_depth; /* the maxiumum number of commands a device can accept */ + uint32_t mnt_ioscale; /* scale the various throttles/limits imposed on the amount of I/O in flight */ + uint32_t mnt_ioflags; /* flags for underlying device */ + pending_io_t mnt_pending_write_size; /* byte count of pending writes */ + pending_io_t mnt_pending_read_size; /* byte count of pending reads */ + lck_rw_t mnt_rwlock; /* mutex readwrite lock */ - lck_mtx_t mnt_renamelock; /* mutex that serializes renames that change shape of tree */ + lck_mtx_t mnt_renamelock; /* mutex that serializes renames that change shape of tree */ vnode_t mnt_devvp; /* the device mounted on for local file systems */ uint32_t mnt_devbsdunit; /* the BSD unit number of the device */ + void *mnt_throttle_info; /* used by the throttle code */ int32_t mnt_crossref; /* refernces to cover lookups crossing into mp */ int32_t mnt_iterref; /* refernces to cover iterations; drained makes it -ve */ @@ -149,13 +164,13 @@ struct mount { * on it when we mount it */ vnode_t mnt_realrootvp; - int mnt_realrootvp_vid; + uint32_t mnt_realrootvp_vid; /* * bumped each time a mount or unmount * occurs... its used to invalidate * 'mnt_realrootvp' from the cache */ - int mnt_generation; + uint32_t mnt_generation; /* * if 'MNTK_AUTH_CACHE_TIMEOUT' is * set, then 'mnt_authcache_ttl' is @@ -188,6 +203,11 @@ struct mount { */ #define MNT_IOFLAGS_FUA_SUPPORTED 0x00000001 +/* + * ioqueue depth for devices that don't report one + */ +#define MNT_DEFAULT_IOQUEUE_DEPTH 32 + /* XXX 3762912 hack to support HFS filesystem 'owner' */ #define vfs_setowner(_mp, _uid, _gid) do {(_mp)->mnt_fsowner = (_uid); (_mp)->mnt_fsgroup = (_gid); } while (0) @@ -258,7 +278,6 @@ typedef struct fhandle fhandle_t; * mount time to identify the requested filesystem. */ struct vfstable { -/* THE FOLLOWING SHOULD KEEP THE SAME FOR user compat with sysctl */ struct vfsops *vfc_vfsops; /* filesystem operations vector */ char vfc_name[MFSNAMELEN]; /* filesystem type name */ int vfc_typenum; /* historic filesystem type number */ @@ -266,23 +285,28 @@ struct vfstable { int vfc_flags; /* permanent flags */ int (*vfc_mountroot)(mount_t, vnode_t, vfs_context_t); /* if != NULL, routine to mount root */ struct vfstable *vfc_next; /* next in list */ -/* Till the above we SHOULD KEEP THE SAME FOR user compat with sysctl */ - int vfc_threadsafe; /* FS is thread & premeption safe */ - lck_mtx_t vfc_lock; /* for non-threaded file systems */ + int32_t vfc_reserved1; + int32_t vfc_reserved2; int vfc_vfsflags; /* for optional types */ void * vfc_descptr; /* desc table allocated address */ int vfc_descsize; /* size allocated for desc table */ - int vfc_64bitready; /* The file system is ready for 64bit */ }; /* vfc_vfsflags: */ -#define VFC_VFSLOCALARGS 0x02 -#define VFC_VFSGENERICARGS 0x04 -#define VFC_VFSNATIVEXATTR 0x10 -#define VFC_VFSDIRLINKS 0x20 -#define VFC_VFSPREFLIGHT 0x40 -#define VFC_VFSREADDIR_EXTENDED 0x80 +#define VFC_VFSLOCALARGS 0x002 +#define VFC_VFSGENERICARGS 0x004 +#define VFC_VFSNATIVEXATTR 0x010 +#define VFC_VFSDIRLINKS 0x020 +#define VFC_VFSPREFLIGHT 0x040 +#define VFC_VFSREADDIR_EXTENDED 0x080 +#define VFC_VFS64BITREADY 0x100 +#ifndef __LP64__ +#define VFC_VFSTHREADSAFE 0x200 +#endif /* __LP64__ */ #define VFC_VFSNOMACLABEL 0x1000 +#define VFC_VFSVNOP_PAGEINV2 0x2000 +#define VFC_VFSVNOP_PAGEOUTV2 0x4000 + extern int maxvfsconf; /* highest defined filesystem type */ extern struct vfstable *vfsconf; /* head of list of filesystem types */ @@ -308,35 +332,57 @@ struct vfsmount_args { /* - * LP64 version of statfs structure. + * LP64 *user* version of statfs structure. * NOTE - must be kept in sync with struct statfs in mount.h */ -struct user_statfs { +struct user64_statfs { short f_otype; /* TEMPORARY SHADOW COPY OF f_type */ short f_oflags; /* TEMPORARY SHADOW COPY OF f_flags */ - user_long_t f_bsize __attribute((aligned(8))); /* fundamental file system block size */ - user_long_t f_iosize; /* optimal transfer block size */ - user_long_t f_blocks; /* total data blocks in file system */ - user_long_t f_bfree; /* free blocks in fs */ - user_long_t f_bavail; /* free blocks avail to non-superuser */ - user_long_t f_files; /* total file nodes in file system */ - user_long_t f_ffree; /* free file nodes in fs */ + user64_long_t f_bsize; /* fundamental file system block size */ + user64_long_t f_iosize; /* optimal transfer block size */ + user64_long_t f_blocks; /* total data blocks in file system */ + user64_long_t f_bfree; /* free blocks in fs */ + user64_long_t f_bavail; /* free blocks avail to non-superuser */ + user64_long_t f_files; /* total file nodes in file system */ + user64_long_t f_ffree; /* free file nodes in fs */ fsid_t f_fsid; /* file system id */ uid_t f_owner; /* user that mounted the filesystem */ short f_reserved1; /* spare for later */ short f_type; /* type of filesystem */ - user_long_t f_flags; /* copy of mount exported flags */ - user_long_t f_reserved2[2]; /* reserved for future use */ + user64_long_t f_flags; /* copy of mount exported flags */ + user64_long_t f_reserved2[2]; /* reserved for future use */ char f_fstypename[MFSNAMELEN]; /* fs type name */ char f_mntonname[MNAMELEN]; /* directory on which mounted */ char f_mntfromname[MNAMELEN];/* mounted filesystem */ -#if COMPAT_GETFSSTAT - char f_reserved3[0]; /* For alignment */ - user_long_t f_reserved4[0]; /* For future use */ -#else char f_reserved3; /* For alignment */ - user_long_t f_reserved4[4] __attribute((aligned(8))); /* For future use */ -#endif + user64_long_t f_reserved4[4]; /* For future use */ +}; + +/* + * ILP32 *user* version of statfs structure. + * NOTE - must be kept in sync with struct statfs in mount.h + */ +struct user32_statfs { + short f_otype; /* TEMPORARY SHADOW COPY OF f_type */ + short f_oflags; /* TEMPORARY SHADOW COPY OF f_flags */ + user32_long_t f_bsize; /* fundamental file system block size */ + user32_long_t f_iosize; /* optimal transfer block size */ + user32_long_t f_blocks; /* total data blocks in file system */ + user32_long_t f_bfree; /* free blocks in fs */ + user32_long_t f_bavail; /* free blocks avail to non-superuser */ + user32_long_t f_files; /* total file nodes in file system */ + user32_long_t f_ffree; /* free file nodes in fs */ + fsid_t f_fsid; /* file system id */ + uid_t f_owner; /* user that mounted the filesystem */ + short f_reserved1; /* spare for later */ + short f_type; /* type of filesystem */ + user32_long_t f_flags; /* copy of mount exported flags */ + user32_long_t f_reserved2[2]; /* reserved for future use */ + char f_fstypename[MFSNAMELEN]; /* fs type name */ + char f_mntonname[MNAMELEN]; /* directory on which mounted */ + char f_mntfromname[MNAMELEN];/* mounted filesystem */ + char f_reserved3; /* For alignment */ + user32_long_t f_reserved4[4]; /* For future use */ }; /* @@ -350,13 +396,14 @@ struct user_statfs { __BEGIN_DECLS -extern int mount_generation; +extern uint32_t mount_generation; extern TAILQ_HEAD(mntlist, mount) mountlist; void mount_list_lock(void); void mount_list_unlock(void); void mount_lock_init(mount_t); void mount_lock_destroy(mount_t); void mount_lock(mount_t); +void mount_lock_spin(mount_t); void mount_unlock(mount_t); void mount_lock_renames(mount_t); void mount_unlock_renames(mount_t); @@ -366,7 +413,6 @@ int mount_refdrain(mount_t); /* vfs_rootmountalloc should be kept as a private api */ errno_t vfs_rootmountalloc(const char *, const char *, mount_t *mpp); -errno_t vfs_init_io_attributes(vnode_t, mount_t); int vfs_mountroot(void); void vfs_unmountall(void); @@ -377,7 +423,7 @@ int dounmount(struct mount *, int, int, vfs_context_t); void mount_dropcrossref(mount_t, vnode_t, int); mount_t mount_lookupby_volfsid(int, int); mount_t mount_list_lookupby_fsid(fsid_t *, int, int); -void mount_list_add(mount_t); +int mount_list_add(mount_t); void mount_list_remove(mount_t); int mount_iterref(mount_t, int); int mount_isdrained(mount_t, int); @@ -388,7 +434,7 @@ void mount_iterreset(mount_t); /* throttled I/O api */ int throttle_get_io_policy(struct uthread **ut); extern void throttle_lowpri_io(boolean_t ok_to_sleep); -int throttle_io_will_be_throttled(int lowpri_window_msecs, size_t devbsdunit); +int throttle_io_will_be_throttled(int lowpri_window_msecs, mount_t mp); __END_DECLS diff --git a/bsd/sys/msg.h b/bsd/sys/msg.h index 7286e0473..9d856a984 100644 --- a/bsd/sys/msg.h +++ b/bsd/sys/msg.h @@ -96,12 +96,9 @@ typedef __darwin_ssize_t ssize_t; /* [XSI] Used for the number of messages in the message queue */ typedef unsigned long msgqnum_t; -typedef unsigned long long user_msgqnum_t; /* [XSI] Used for the number of bytes allowed in a message queue */ typedef unsigned long msglen_t; -typedef unsigned long long user_msglen_t; - /* * Possible values for the fifth parameter to msgrcv(), in addition to the @@ -186,6 +183,15 @@ struct __msqid_ds_old { #pragma options align=natural #endif +typedef user_ulong_t user_msgqnum_t; +typedef user64_ulong_t user64_msgqnum_t; +typedef user32_ulong_t user32_msgqnum_t; + +typedef user_ulong_t user_msglen_t; +typedef user64_ulong_t user64_msglen_t; +typedef user32_ulong_t user32_msglen_t; + +/* kernel version */ struct user_msqid_ds { struct ipc_perm msg_perm; /* [XSI] msg queue permissions */ struct msg *msg_first; /* first message in the queue */ @@ -204,6 +210,47 @@ struct user_msqid_ds { __int32_t msg_pad4[4]; }; +/* + * user64 version - this structure only has to be correct if + * compiled LP64, because the 32 bit kernel doesn't need it + */ +struct user64_msqid_ds { + struct ipc_perm msg_perm; /* [XSI] msg queue permissions */ + __int32_t msg_first; /* RESERVED: kernel use only */ + __int32_t msg_last; /* RESERVED: kernel use only */ + user64_msglen_t msg_cbytes; /* # of bytes on the queue */ + user64_msgqnum_t msg_qnum; /* [XSI] number of msgs on the queue */ + user64_msglen_t msg_qbytes; /* [XSI] max bytes on the queue */ + pid_t msg_lspid; /* [XSI] pid of last msgsnd() */ + pid_t msg_lrpid; /* [XSI] pid of last msgrcv() */ + user64_time_t msg_stime; /* [XSI] time of last msgsnd() */ + __int32_t msg_pad1; /* RESERVED: DO NOT USE */ + user64_time_t msg_rtime; /* [XSI] time of last msgrcv() */ + __int32_t msg_pad2; /* RESERVED: DO NOT USE */ + user64_time_t msg_ctime; /* [XSI] time of last msgctl() */ + __int32_t msg_pad3; /* RESERVED: DO NOT USE */ + __int32_t msg_pad4[4]; +} __attribute__((__packed__)); + +struct user32_msqid_ds +{ + struct __ipc_perm_new msg_perm; /* [XSI] msg queue permissions */ + __int32_t msg_first; /* RESERVED: kernel use only */ + __int32_t msg_last; /* RESERVED: kernel use only */ + user32_msglen_t msg_cbytes; /* # of bytes on the queue */ + user32_msgqnum_t msg_qnum; /* [XSI] number of msgs on the queue */ + user32_msglen_t msg_qbytes; /* [XSI] max bytes on the queue */ + pid_t msg_lspid; /* [XSI] pid of last msgsnd() */ + pid_t msg_lrpid; /* [XSI] pid of last msgrcv() */ + user32_time_t msg_stime; /* [XSI] time of last msgsnd() */ + __int32_t msg_pad1; /* RESERVED: DO NOT USE */ + user32_time_t msg_rtime; /* [XSI] time of last msgrcv() */ + __int32_t msg_pad2; /* RESERVED: DO NOT USE */ + user32_time_t msg_ctime; /* [XSI] time of last msgctl() */ + __int32_t msg_pad3; /* RESERVED: DO NOT USE */ + __int32_t msg_pad4[4]; /* RESERVED: DO NOT USE */ +}; + #if __DARWIN_ALIGN_NATURAL #pragma options align=reset #endif diff --git a/bsd/sys/msgbuf.h b/bsd/sys/msgbuf.h index 5b2a5fd2d..e05b73e9e 100644 --- a/bsd/sys/msgbuf.h +++ b/bsd/sys/msgbuf.h @@ -80,7 +80,7 @@ extern struct msgbuf *msgbufp; extern void log_putc(char); extern void log_putc_locked(char); extern void log_setsize(long size); -extern int log_dmesg(user_addr_t, uint32_t, register_t *); +extern int log_dmesg(user_addr_t, uint32_t, int32_t *); __END_DECLS #endif #endif /* !_SYS_MSGBUF_H_ */ diff --git a/bsd/sys/mtio.h b/bsd/sys/mtio.h deleted file mode 100644 index 31ae0dad5..000000000 --- a/bsd/sys/mtio.h +++ /dev/null @@ -1,178 +0,0 @@ -/* - * Copyright (c) 2000-2005 Apple Computer, Inc. All rights reserved. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ - * - * This file contains Original Code and/or Modifications of Original Code - * as defined in and that are subject to the Apple Public Source License - * Version 2.0 (the 'License'). You may not use this file except in - * compliance with the License. The rights granted to you under the License - * may not be used to create, or enable the creation or redistribution of, - * unlawful or unlicensed copies of an Apple operating system, or to - * circumvent, violate, or enable the circumvention or violation of, any - * terms of an Apple operating system software license agreement. - * - * Please obtain a copy of the License at - * http://www.opensource.apple.com/apsl/ and read it before using this file. - * - * The Original Code and all software distributed under the License are - * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER - * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, - * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. - * Please see the License for the specific language governing rights and - * limitations under the License. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ - */ -/* Copyright (c) 1995 NeXT Computer, Inc. All Rights Reserved */ -/* - * Copyright (c) 1982, 1986, 1993 - * The Regents of the University of California. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. All advertising materials mentioning features or use of this software - * must display the following acknowledgement: - * This product includes software developed by the University of - * California, Berkeley and its contributors. - * 4. Neither the name of the University nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - * @(#)mtio.h 8.1 (Berkeley) 6/2/93 - */ - -#ifndef _SYS_MTIO_H_ -#define _SYS_MTIO_H_ - -#include -#include - -#ifdef __APPLE_API_OBSOLETE - -/* - * Structures and definitions for mag tape io control commands - */ - -/* structure for MTIOCTOP - mag tape op command */ -/* LP64todo - not 64-bit safe */ -struct mtop { - short mt_op; /* operations defined below */ - daddr_t mt_count; /* how many of them */ -}; - -/* operations */ -#define MTWEOF 0 /* write an end-of-file record */ -#define MTFSF 1 /* forward space file */ -#define MTBSF 2 /* backward space file */ -#define MTFSR 3 /* forward space record */ -#define MTBSR 4 /* backward space record */ -#define MTREW 5 /* rewind */ -#define MTOFFL 6 /* rewind and put the drive offline */ -#define MTNOP 7 /* no operation, sets status only */ -#define MTRETEN 8 /* retension */ -#define MTERASE 9 /* erase entire tape */ -#define MTEOM 10 /* forward to end of media */ -#define MTNBSF 11 /* backward space to beginning of file */ -#define MTCACHE 12 /* enable controller cache */ -#define MTNOCACHE 13 /* disable controller cache */ -#define MTSETBSIZ 14 /* set block size; 0 for variable */ -#define MTSETDNSTY 15 /* set density code for current mode */ - -/* structure for MTIOCGET - mag tape get status command */ -/* LP64todo - not 64-bit safe */ -struct mtget { - short mt_type; /* type of magtape device */ -/* the following two registers are grossly device dependent */ - u_short mt_dsreg; /* ``drive status'' register. SCSI sense byte 0x02. */ - u_short mt_erreg; /* ``error'' register. SCSI sense byte 0x0C. */ - u_short mt_ext_err0; /* SCSI sense bytes 0x13..0x14 */ - u_short mt_ext_err1; /* SCSI sense bytes 0x15..0x16 */ -/* end device-dependent registers */ - short mt_resid; /* residual count */ -/* the following two are not yet implemented */ - daddr_t mt_fileno; /* file number of current position */ - daddr_t mt_blkno; /* block number of current position */ -/* end not yet implemented */ - daddr_t mt_blksiz; /* current block size */ - daddr_t mt_density; /* current density code */ - daddr_t mt_mblksiz[4]; /* block size for different modes */ - daddr_t mt_mdensity[4]; /* density codes for different modes */ -}; - -/* - * Constants for mt_type byte. These are the same - * for controllers compatible with the types listed. - */ -#define MT_ISTS 0x01 /* TS-11 */ -#define MT_ISHT 0x02 /* TM03 Massbus: TE16, TU45, TU77 */ -#define MT_ISTM 0x03 /* TM11/TE10 Unibus */ -#define MT_ISMT 0x04 /* TM78/TU78 Massbus */ -#define MT_ISUT 0x05 /* SI TU-45 emulation on Unibus */ -#define MT_ISCPC 0x06 /* SUN */ -#define MT_ISAR 0x07 /* SUN */ -#define MT_ISTMSCP 0x08 /* DEC TMSCP protocol (TU81, TK50) */ -#define MT_ISCY 0x09 /* CCI Cipher */ -#define MT_ISCT 0x0a /* HP 1/4 tape */ -#define MT_ISFHP 0x0b /* HP 7980 1/2 tape */ -#define MT_ISEXABYTE 0x0c /* Exabyte */ -#define MT_ISEXA8200 0x0c /* Exabyte EXB-8200 */ -#define MT_ISEXA8500 0x0d /* Exabyte EXB-8500 */ -#define MT_ISVIPER1 0x0e /* Archive Viper-150 */ -#define MT_ISPYTHON 0x0f /* Archive Python (DAT) */ -#define MT_ISHPDAT 0x10 /* HP 35450A DAT drive */ -#define MT_ISWANGTEK 0x11 /* WANGTEK 5150ES */ -#define MT_ISCALIPER 0x12 /* Caliper CP150 */ -#define MT_ISWTEK5099 0x13 /* WANGTEK 5099ES */ -#define MT_ISVIPER2525 0x14 /* Archive Viper 2525 */ -#define MT_ISMFOUR 0x11 /* M4 Data 1/2 9track drive */ -#define MT_ISTK50 0x12 /* DEC SCSI TK50 */ -#define MT_ISMT02 0x13 /* Emulex MT02 SCSI tape controller */ -#define MT_ISGS 0x14 /* Generic SCSI Tape */ - -/* mag tape io control commands */ -#define MTIOCTOP _IOW('m', 1, struct mtop) /* do a mag tape op */ -#define MTIOCGET _IOR('m', 2, struct mtget) /* get tape status */ -#define MTIOCIEOT _IO('m', 3) /* ignore EOT error */ -#define MTIOCEEOT _IO('m', 4) /* enable EOT error */ - -#ifndef KERNEL -#define DEFTAPE "/dev/rst0" -#endif - -#ifdef KERNEL -/* - * minor device number - */ - -#define T_UNIT 003 /* unit selection */ -#define T_NOREWIND 004 /* no rewind on close */ -#define T_DENSEL 030 /* density select */ -#define T_800BPI 000 /* select 800 bpi */ -#define T_1600BPI 010 /* select 1600 bpi */ -#define T_6250BPI 020 /* select 6250 bpi */ -#define T_BADBPI 030 /* undefined selection */ -#endif - -#endif /* __APPLE_API_OBSOLETE */ - -#endif /* !_SYS_MTIO_H_ */ diff --git a/bsd/sys/namei.h b/bsd/sys/namei.h index 50706beec..5aa2f701a 100644 --- a/bsd/sys/namei.h +++ b/bsd/sys/namei.h @@ -155,7 +155,9 @@ struct nameidata { #define AUDITVNPATH2 0x00200000 /* audit the path/vnode info */ #define USEDVP 0x00400000 /* start the lookup at ndp.ni_dvp */ #define CN_VOLFSPATH 0x00800000 /* user path was a volfs style path */ +#ifndef __LP64__ #define FSNODELOCKHELD 0x01000000 +#endif /* __LP64__ */ #define UNIONCREATED 0x02000000 /* union fs creation of vnode */ #if NAMEDRSRCFORK #define CN_WANTSRSRCFORK 0x04000000 @@ -173,9 +175,6 @@ struct nameidata { if ((segflg) == UIO_USERSPACE) { \ (ndp)->ni_segflg = ((IS_64BIT_PROCESS(vfs_context_proc(ctx))) ? UIO_USERSPACE64 : UIO_USERSPACE32); \ } \ - else if ((segflg) == UIO_SYSSPACE) { \ - (ndp)->ni_segflg = UIO_SYSSPACE32; \ - } \ else { \ (ndp)->ni_segflg = segflg; \ } \ diff --git a/bsd/sys/netport.h b/bsd/sys/netport.h index a56f0ce10..74eba9efa 100644 --- a/bsd/sys/netport.h +++ b/bsd/sys/netport.h @@ -32,7 +32,7 @@ #ifndef _SYS_NETPORT_H_ #define _SYS_NETPORT_H_ -typedef unsigned long netaddr_t; +typedef uint32_t netaddr_t; /* * Network Port structure. diff --git a/bsd/sys/param.h b/bsd/sys/param.h index f655358eb..93f926150 100644 --- a/bsd/sys/param.h +++ b/bsd/sys/param.h @@ -145,7 +145,8 @@ #define PRIMASK 0x0ff #define PCATCH 0x100 /* OR'd with pri for tsleep to check signals */ #define PTTYBLOCK 0x200 /* for tty SIGTTOU and SIGTTIN blocking */ -#define PDROP 0x400 /* OR'd with pri to stop re-entry of interlock mutex */ +#define PDROP 0x400 /* OR'd with pri to stop re-aquistion of mutex upon wakeup */ +#define PSPIN 0x800 /* OR'd with pri to require mutex in spin mode upon wakeup */ #define NBPW sizeof(int) /* number of bytes per word (integer) */ @@ -216,7 +217,7 @@ /* Macros for counting and rounding. */ #ifndef howmany -#define howmany(x, y) (((x)+((y)-1))/(y)) +#define howmany(x, y) ((((x) % (y)) == 0) ? ((x) / (y)) : (((x) / (y)) + 1)) #endif #define roundup(x, y) ((((x)+((y)-1))/(y))*(y)) #define powerof2(x) ((((x)-1)&(x))==0) diff --git a/bsd/sys/pipe.h b/bsd/sys/pipe.h index a13a27598..71557f0b0 100644 --- a/bsd/sys/pipe.h +++ b/bsd/sys/pipe.h @@ -107,7 +107,6 @@ struct pipebuf { /* * Information to support direct transfers between processes for pipes. */ -/* LP64todo - not 64bit safe */ struct pipemapping { vm_offset_t kva; /* kernel virtual address */ vm_size_t cnt; /* number of chars in buffer */ @@ -130,7 +129,9 @@ struct pipemapping { #define PIPE_LWANT 0x200 /* Process wants exclusive access to pointers/data. */ #define PIPE_DIRECTW 0x400 /* Pipe direct write active. */ #define PIPE_DIRECTOK 0x800 /* Direct mode ok. */ -#define PIPE_KNOTE 0x1000 /* Pipe has kernel events activated */ +#define PIPE_KNOTE 0x1000 /* Pipe has kernel events activated */ +#define PIPE_DRAIN 0x2000 /* Waiting for I/O to drop for a close. Treated like EOF; + only separate for easier debugging. */ #ifdef KERNEL diff --git a/bsd/sys/proc.h b/bsd/sys/proc.h index 6395fa2cf..92c86c0a1 100644 --- a/bsd/sys/proc.h +++ b/bsd/sys/proc.h @@ -81,15 +81,7 @@ #endif #include -#ifdef XNU_KERNEL_PRIVATE -#define PROC_DEF_ENABLED -#else -#ifndef KERNEL -#define PROC_DEF_ENABLED -#endif -#endif - -#ifdef PROC_DEF_ENABLED +#if defined(XNU_KERNEL_PRIVATE) || !defined(KERNEL) struct session; struct pgrp; @@ -134,7 +126,7 @@ struct extern_proc { u_quad_t p_iticks; /* Statclock hits processing intr. */ int p_traceflag; /* Kernel trace points. */ struct vnode *p_tracep; /* Trace to vnode. */ - int p_siglist; /* DEPRECATED */ + int p_siglist; /* DEPRECATED. */ struct vnode *p_textvp; /* Vnode of executable. */ int p_holdcnt; /* If non-zero, don't swap. */ sigset_t p_sigmask; /* DEPRECATED. */ @@ -215,7 +207,7 @@ struct extern_proc { #define P_FSTRACE 0 /* Obsolete: retained for compilation */ #define P_SSTEP 0 /* Obsolete: retained for compilation */ -#endif /* PROC_DEF_ENABLED */ +#endif /* XNU_KERNEL_PRIVATE || !KERNEL */ #ifdef KERNEL __BEGIN_DECLS @@ -226,7 +218,6 @@ extern int proc_is_classic(proc_t p); proc_t current_proc_EXTERNAL(void); extern int msleep(void *chan, lck_mtx_t *mtx, int pri, const char *wmesg, struct timespec * ts ); -extern void unsleep(proc_t); extern void wakeup(void *chan); extern void wakeup_one(caddr_t chan); @@ -272,18 +263,27 @@ extern int proc_exiting(proc_t); int proc_suser(proc_t p); /* returns the cred assicaited with the process; temporary api */ kauth_cred_t proc_ucred(proc_t p); -#ifdef __APPLE_API_UNSTABLE -/* returns the first thread_t in the process, or NULL XXX for NFS, DO NOT USE */ -thread_t proc_thread(proc_t); -#endif -// mark a process as being allowed to call vfs_markdependency() -void bsd_set_dependency_capable(task_t task); -extern int proc_pendingsignals(proc_t, sigset_t); extern int proc_tbe(proc_t); +/*! + @function proc_selfpgrpid + @abstract Get the process group id for the current process, as with proc_pgrpid(). + @return pgrpid of current process. + */ +pid_t proc_selfpgrpid(void); + +/*! + @function proc_pgrpid + @abstract Get the process group id for the passed-in process. + @param p Process whose pgrpid to grab. + @return pgrpid for "p". + */ +pid_t proc_pgrpid(proc_t); + #ifdef KERNEL_PRIVATE -/* LP64todo - figure out how to identify 64-bit processes if NULL procp */ +// mark a process as being allowed to call vfs_markdependency() +void bsd_set_dependency_capable(task_t task); extern int IS_64BIT_PROCESS(proc_t); extern int tsleep(void *chan, int pri, const char *wmesg, int timo); diff --git a/bsd/sys/proc_info.h b/bsd/sys/proc_info.h index 98bf6d972..e22cd3ab4 100644 --- a/bsd/sys/proc_info.h +++ b/bsd/sys/proc_info.h @@ -45,7 +45,7 @@ __BEGIN_DECLS -#define PROC_ALL_PIDS 1 +#define PROC_ALL_PIDS 1 #define PROC_PGRP_ONLY 2 #define PROC_TTY_ONLY 3 #define PROC_UID_ONLY 4 @@ -63,15 +63,17 @@ struct proc_bsdinfo { gid_t pbi_rgid; uid_t pbi_svuid; gid_t pbi_svgid; - char pbi_comm[MAXCOMLEN + 1]; - char pbi_name[2*MAXCOMLEN + 1]; /* empty if no name is registered */ + uint32_t rfu_1; /* reserved */ + char pbi_comm[MAXCOMLEN]; + char pbi_name[2*MAXCOMLEN]; /* empty if no name is registered */ uint32_t pbi_nfiles; uint32_t pbi_pgid; uint32_t pbi_pjobc; uint32_t e_tdev; /* controlling tty dev */ uint32_t e_tpgid; /* tty process group id */ - struct timeval pbi_start; int32_t pbi_nice; + uint64_t pbi_start_tvsec; + uint64_t pbi_start_tvusec; }; @@ -86,47 +88,56 @@ struct proc_bsdinfo { #define PROC_FLAG_CTTY 0x40 #define PROC_FLAG_CONTROLT 0x80 #define PROC_FLAG_THCWD 0x100 +/* process control bits for resource starvation */ +#define PROC_FLAG_PC_THROTTLE 0x200 +#define PROC_FLAG_PC_SUSP 0x400 +#define PROC_FLAG_PC_KILL 0x600 +#define PROC_FLAG_PC_MASK 0x600 +/* process action bits for resource starvation */ +#define PROC_FLAG_PA_THROTTLE 0x800 +#define PROC_FLAG_PA_SUSP 0x1000 struct proc_taskinfo { - uint64_t pti_virtual_size; /* virtual memory size (bytes) */ - uint64_t pti_resident_size; /* resident memory size (bytes) */ + uint64_t pti_virtual_size; /* virtual memory size (bytes) */ + uint64_t pti_resident_size; /* resident memory size (bytes) */ uint64_t pti_total_user; /* total time */ uint64_t pti_total_system; uint64_t pti_threads_user; /* existing threads only */ uint64_t pti_threads_system; int32_t pti_policy; /* default policy for new threads */ int32_t pti_faults; /* number of page faults */ - int32_t pti_pageins; /* number of actual pageins */ - int32_t pti_cow_faults; /* number of copy-on-write faults */ + int32_t pti_pageins; /* number of actual pageins */ + int32_t pti_cow_faults; /* number of copy-on-write faults */ int32_t pti_messages_sent; /* number of messages sent */ - int32_t pti_messages_received; /* number of messages received */ - int32_t pti_syscalls_mach; /* number of mach system calls */ - int32_t pti_syscalls_unix; /* number of unix system calls */ - int32_t pti_csw; /* number of context switches */ + int32_t pti_messages_received; /* number of messages received */ + int32_t pti_syscalls_mach; /* number of mach system calls */ + int32_t pti_syscalls_unix; /* number of unix system calls */ + int32_t pti_csw; /* number of context switches */ int32_t pti_threadnum; /* number of threads in the task */ int32_t pti_numrunning; /* number of running threads */ int32_t pti_priority; /* task priority*/ }; struct proc_taskallinfo { - struct proc_bsdinfo pbsd; - struct proc_taskinfo ptinfo; + struct proc_bsdinfo pbsd; + struct proc_taskinfo ptinfo; }; +#define MAXTHREADNAMESIZE 64 struct proc_threadinfo { - uint64_t pth_user_time; /* user run time */ - uint64_t pth_system_time; /* system run time */ - int32_t pth_cpu_usage; /* scaled cpu usage percentage */ + uint64_t pth_user_time; /* user run time */ + uint64_t pth_system_time; /* system run time */ + int32_t pth_cpu_usage; /* scaled cpu usage percentage */ int32_t pth_policy; /* scheduling policy in effect */ - int32_t pth_run_state; /* run state (see below) */ - int32_t pth_flags; /* various flags (see below) */ - int32_t pth_sleep_time; /* number of seconds that thread */ + int32_t pth_run_state; /* run state (see below) */ + int32_t pth_flags; /* various flags (see below) */ + int32_t pth_sleep_time; /* number of seconds that thread */ int32_t pth_curpri; /* cur priority*/ int32_t pth_priority; /* priority*/ - int32_t pth_maxpriority; /* max priority*/ - char * pth_name[64]; /* thread name, if any */ + int32_t pth_maxpriority; /* max priority*/ + char pth_name[MAXTHREADNAMESIZE]; /* thread name, if any */ }; struct proc_regioninfo { @@ -140,11 +151,11 @@ struct proc_regioninfo { uint32_t pri_user_tag; uint32_t pri_pages_resident; uint32_t pri_pages_shared_now_private; - uint32_t pri_pages_swapped_out; - uint32_t pri_pages_dirtied; - uint32_t pri_ref_count; - uint32_t pri_shadow_depth; - uint32_t pri_share_mode; + uint32_t pri_pages_swapped_out; + uint32_t pri_pages_dirtied; + uint32_t pri_ref_count; + uint32_t pri_shadow_depth; + uint32_t pri_share_mode; uint32_t pri_private_pages_resident; uint32_t pri_shared_pages_resident; uint32_t pri_obj_id; @@ -184,12 +195,19 @@ struct proc_regioninfo { #define TH_FLAGS_IDLE 0x2 /* thread is an idle thread */ +struct proc_workqueueinfo { + uint32_t pwq_nthreads; /* total number of workqueue threads */ + uint32_t pwq_runthreads; /* total number of running workqueue threads */ + uint32_t pwq_blockedthreads; /* total number of blocked workqueue threads */ + uint32_t reserved[1]; /* reserved for future use */ +}; struct proc_fileinfo { uint32_t fi_openflags; uint32_t fi_status; off_t fi_offset; int32_t fi_type; + int32_t rfu_1; /* reserved */ }; /* stats flags in proc_fileinfo */ @@ -226,13 +244,13 @@ struct vinfo_stat { struct vnode_info { struct vinfo_stat vi_stat; int vi_type; - fsid_t vi_fsid; int vi_pad; + fsid_t vi_fsid; }; struct vnode_info_path { struct vnode_info vip_vi; - char vip_path[MAXPATHLEN]; /* tail end of it */ + char vip_path[MAXPATHLEN]; /* tail end of it */ }; struct vnode_fdinfo { @@ -241,23 +259,23 @@ struct vnode_fdinfo { }; struct vnode_fdinfowithpath { - struct proc_fileinfo pfi; - struct vnode_info_path pvip; + struct proc_fileinfo pfi; + struct vnode_info_path pvip; }; struct proc_regionwithpathinfo { - struct proc_regioninfo prp_prinfo; - struct vnode_info_path prp_vip; + struct proc_regioninfo prp_prinfo; + struct vnode_info_path prp_vip; }; struct proc_vnodepathinfo { - struct vnode_info_path pvi_cdir; - struct vnode_info_path pvi_rdir; + struct vnode_info_path pvi_cdir; + struct vnode_info_path pvi_rdir; }; struct proc_threadwithpathinfo { - struct proc_threadinfo pt; - struct vnode_info_path pvip; + struct proc_threadinfo pt; + struct vnode_info_path pvip; }; /* @@ -273,69 +291,71 @@ struct proc_threadwithpathinfo { #define INI_IPV6 0x2 struct in4in6_addr { - u_int32_t i46a_pad32[3]; - struct in_addr i46a_addr4; + u_int32_t i46a_pad32[3]; + struct in_addr i46a_addr4; }; struct in_sockinfo { - int insi_fport; /* foreign port */ - int insi_lport; /* local port */ + int insi_fport; /* foreign port */ + int insi_lport; /* local port */ uint64_t insi_gencnt; /* generation count of this instance */ uint32_t insi_flags; /* generic IP/datagram flags */ uint32_t insi_flow; uint8_t insi_vflag; /* ini_IPV4 or ini_IPV6 */ - uint8_t insi_ip_ttl; /* time to live proto */ + uint8_t insi_ip_ttl; /* time to live proto */ + uint32_t rfu_1; /* reserved */ /* protocol dependent part */ union { - struct in4in6_addr ina_46; - struct in6_addr ina_6; - } insi_faddr; /* foreign host table entry */ + struct in4in6_addr ina_46; + struct in6_addr ina_6; + } insi_faddr; /* foreign host table entry */ union { - struct in4in6_addr ina_46; - struct in6_addr ina_6; - } insi_laddr; /* local host table entry */ + struct in4in6_addr ina_46; + struct in6_addr ina_6; + } insi_laddr; /* local host table entry */ struct { - u_char in4_tos; /* type of service */ - } insi_v4; + u_char in4_tos; /* type of service */ + } insi_v4; struct { - uint8_t in6_hlim; - int in6_cksum; - u_short in6_ifindex; - short in6_hops; - } insi_v6; + uint8_t in6_hlim; + int in6_cksum; + u_short in6_ifindex; + short in6_hops; + } insi_v6; }; /* * TCP Sockets */ -#define TSI_T_REXMT 0 /* retransmit */ -#define TSI_T_PERSIST 1 /* retransmit persistence */ -#define TSI_T_KEEP 2 /* keep alive */ -#define TSI_T_2MSL 3 /* 2*msl quiet time timer */ +#define TSI_T_REXMT 0 /* retransmit */ +#define TSI_T_PERSIST 1 /* retransmit persistence */ +#define TSI_T_KEEP 2 /* keep alive */ +#define TSI_T_2MSL 3 /* 2*msl quiet time timer */ #define TSI_T_NTIMERS 4 -#define TSI_S_CLOSED 0 /* closed */ -#define TSI_S_LISTEN 1 /* listening for connection */ -#define TSI_S_SYN_SENT 2 /* active, have sent syn */ -#define TSI_S_SYN_RECEIVED 3 /* have send and received syn */ -#define TSI_S_ESTABLISHED 4 /* established */ -#define TSI_S__CLOSE_WAIT 5 /* rcvd fin, waiting for close */ -#define TSI_S_FIN_WAIT_1 6 /* have closed, sent fin */ -#define TSI_S_CLOSING 7 /* closed xchd FIN; await FIN ACK */ -#define TSI_S_LAST_ACK 8 /* had fin and close; await FIN ACK */ -#define TSI_S_FIN_WAIT_2 9 /* have closed, fin is acked */ -#define TSI_S_TIME_WAIT 10 /* in 2*msl quiet wait after close */ -#define TSI_S_RESERVED 11 /* pseudo state: reserved */ +#define TSI_S_CLOSED 0 /* closed */ +#define TSI_S_LISTEN 1 /* listening for connection */ +#define TSI_S_SYN_SENT 2 /* active, have sent syn */ +#define TSI_S_SYN_RECEIVED 3 /* have send and received syn */ +#define TSI_S_ESTABLISHED 4 /* established */ +#define TSI_S__CLOSE_WAIT 5 /* rcvd fin, waiting for close */ +#define TSI_S_FIN_WAIT_1 6 /* have closed, sent fin */ +#define TSI_S_CLOSING 7 /* closed xchd FIN; await FIN ACK */ +#define TSI_S_LAST_ACK 8 /* had fin and close; await FIN ACK */ +#define TSI_S_FIN_WAIT_2 9 /* have closed, fin is acked */ +#define TSI_S_TIME_WAIT 10 /* in 2*msl quiet wait after close */ +#define TSI_S_RESERVED 11 /* pseudo state: reserved */ struct tcp_sockinfo { - struct in_sockinfo tcpsi_ini; - int tcpsi_state; - int tcpsi_timer[TSI_T_NTIMERS]; - int tcpsi_mss; - uint32_t tcpsi_flags; - uint64_t tcpsi_tp; /* opaque handle of TCP protocol control block */ + struct in_sockinfo tcpsi_ini; + int tcpsi_state; + int tcpsi_timer[TSI_T_NTIMERS]; + int tcpsi_mss; + uint32_t tcpsi_flags; + uint32_t rfu_1; /* reserved */ + uint64_t tcpsi_tp; /* opaque handle of TCP protocol control block */ }; /* @@ -344,16 +364,16 @@ struct tcp_sockinfo { struct un_sockinfo { - uint64_t unsi_conn_so; /* opaque handle of connected socket */ - uint64_t unsi_conn_pcb; /* opaque handle of connected protocol control block */ + uint64_t unsi_conn_so; /* opaque handle of connected socket */ + uint64_t unsi_conn_pcb; /* opaque handle of connected protocol control block */ union { - struct sockaddr_un ua_sun; - char ua_dummy[SOCK_MAXADDRLEN]; - } unsi_addr; /* bound address */ + struct sockaddr_un ua_sun; + char ua_dummy[SOCK_MAXADDRLEN]; + } unsi_addr; /* bound address */ union { - struct sockaddr_un ua_sun; - char ua_dummy[SOCK_MAXADDRLEN]; - } unsi_caddr; /* address of socket connected to */ + struct sockaddr_un ua_sun; + char ua_dummy[SOCK_MAXADDRLEN]; + } unsi_caddr; /* address of socket connected to */ }; /* @@ -361,9 +381,9 @@ struct un_sockinfo { */ struct ndrv_info { - uint32_t ndrvsi_if_family; - uint32_t ndrvsi_if_unit; - char ndrvsi_if_name[IF_NAMESIZE]; + uint32_t ndrvsi_if_family; + uint32_t ndrvsi_if_unit; + char ndrvsi_if_name[IF_NAMESIZE]; }; /* @@ -371,9 +391,9 @@ struct ndrv_info { */ struct kern_event_info { - uint32_t kesi_vendor_code_filter; - uint32_t kesi_class_filter; - uint32_t kesi_subclass_filter; + uint32_t kesi_vendor_code_filter; + uint32_t kesi_class_filter; + uint32_t kesi_subclass_filter; }; /* @@ -381,59 +401,59 @@ struct kern_event_info { */ struct kern_ctl_info { - uint32_t kcsi_id; - uint32_t kcsi_reg_unit; - uint32_t kcsi_flags; /* support flags */ - uint32_t kcsi_recvbufsize; /* request more than the default buffer size */ - uint32_t kcsi_sendbufsize; /* request more than the default buffer size */ - uint32_t kcsi_unit; - char kcsi_name[MAX_KCTL_NAME]; /* unique nke identifier, provided by DTS */ + uint32_t kcsi_id; + uint32_t kcsi_reg_unit; + uint32_t kcsi_flags; /* support flags */ + uint32_t kcsi_recvbufsize; /* request more than the default buffer size */ + uint32_t kcsi_sendbufsize; /* request more than the default buffer size */ + uint32_t kcsi_unit; + char kcsi_name[MAX_KCTL_NAME]; /* unique nke identifier, provided by DTS */ }; /* soi_state */ -#define SOI_S_NOFDREF 0x0001 /* no file table ref any more */ -#define SOI_S_ISCONNECTED 0x0002 /* socket connected to a peer */ -#define SOI_S_ISCONNECTING 0x0004 /* in process of connecting to peer */ -#define SOI_S_ISDISCONNECTING 0x0008 /* in process of disconnecting */ -#define SOI_S_CANTSENDMORE 0x0010 /* can't send more data to peer */ -#define SOI_S_CANTRCVMORE 0x0020 /* can't receive more data from peer */ -#define SOI_S_RCVATMARK 0x0040 /* at mark on input */ -#define SOI_S_PRIV 0x0080 /* privileged for broadcast, raw... */ -#define SOI_S_NBIO 0x0100 /* non-blocking ops */ -#define SOI_S_ASYNC 0x0200 /* async i/o notify */ -#define SOI_S_INCOMP 0x0800 /* Unaccepted, incomplete connection */ -#define SOI_S_COMP 0x1000 /* unaccepted, complete connection */ -#define SOI_S_ISDISCONNECTED 0x2000 /* socket disconnected from peer */ -#define SOI_S_DRAINING 0x4000 /* close waiting for blocked system calls to drain */ +#define SOI_S_NOFDREF 0x0001 /* no file table ref any more */ +#define SOI_S_ISCONNECTED 0x0002 /* socket connected to a peer */ +#define SOI_S_ISCONNECTING 0x0004 /* in process of connecting to peer */ +#define SOI_S_ISDISCONNECTING 0x0008 /* in process of disconnecting */ +#define SOI_S_CANTSENDMORE 0x0010 /* can't send more data to peer */ +#define SOI_S_CANTRCVMORE 0x0020 /* can't receive more data from peer */ +#define SOI_S_RCVATMARK 0x0040 /* at mark on input */ +#define SOI_S_PRIV 0x0080 /* privileged for broadcast, raw... */ +#define SOI_S_NBIO 0x0100 /* non-blocking ops */ +#define SOI_S_ASYNC 0x0200 /* async i/o notify */ +#define SOI_S_INCOMP 0x0800 /* Unaccepted, incomplete connection */ +#define SOI_S_COMP 0x1000 /* unaccepted, complete connection */ +#define SOI_S_ISDISCONNECTED 0x2000 /* socket disconnected from peer */ +#define SOI_S_DRAINING 0x4000 /* close waiting for blocked system calls to drain */ struct sockbuf_info { - uint32_t sbi_cc; - uint32_t sbi_hiwat; /* SO_RCVBUF, SO_SNDBUF */ - uint32_t sbi_mbcnt; - uint32_t sbi_mbmax; - uint32_t sbi_lowat; - short sbi_flags; - short sbi_timeo; + uint32_t sbi_cc; + uint32_t sbi_hiwat; /* SO_RCVBUF, SO_SNDBUF */ + uint32_t sbi_mbcnt; + uint32_t sbi_mbmax; + uint32_t sbi_lowat; + short sbi_flags; + short sbi_timeo; }; enum { - SOCKINFO_GENERIC = 0, - SOCKINFO_IN = 1, + SOCKINFO_GENERIC = 0, + SOCKINFO_IN = 1, SOCKINFO_TCP = 2, - SOCKINFO_UN = 3, + SOCKINFO_UN = 3, SOCKINFO_NDRV = 4, SOCKINFO_KERN_EVENT = 5, SOCKINFO_KERN_CTL = 6 }; struct socket_info { - struct stat soi_stat; + struct vinfo_stat soi_stat; uint64_t soi_so; /* opaque handle of socket */ uint64_t soi_pcb; /* opaque handle of protocol control block */ - int soi_type; - int soi_protocol; - int soi_family; + int soi_type; + int soi_protocol; + int soi_family; short soi_options; short soi_linger; short soi_state; @@ -443,174 +463,193 @@ struct socket_info { short soi_timeo; u_short soi_error; uint32_t soi_oobmark; - struct sockbuf_info soi_rcv; - struct sockbuf_info soi_snd; - int soi_kind; + struct sockbuf_info soi_rcv; + struct sockbuf_info soi_snd; + int soi_kind; + uint32_t rfu_1; /* reserved */ union { - struct in_sockinfo pri_in; /* SOCKINFO_IN */ - struct tcp_sockinfo pri_tcp; /* SOCKINFO_TCP */ - struct un_sockinfo pri_un; /* SOCKINFO_UN */ - struct ndrv_info pri_ndrv; /* SOCKINFO_NDRV */ - struct kern_event_info pri_kern_event; /* SOCKINFO_KERN_EVENT */ - struct kern_ctl_info pri_kern_ctl; /* SOCKINFO_KERN_CTL */ - } soi_proto; + struct in_sockinfo pri_in; /* SOCKINFO_IN */ + struct tcp_sockinfo pri_tcp; /* SOCKINFO_TCP */ + struct un_sockinfo pri_un; /* SOCKINFO_UN */ + struct ndrv_info pri_ndrv; /* SOCKINFO_NDRV */ + struct kern_event_info pri_kern_event; /* SOCKINFO_KERN_EVENT */ + struct kern_ctl_info pri_kern_ctl; /* SOCKINFO_KERN_CTL */ + } soi_proto; }; struct socket_fdinfo { - struct proc_fileinfo pfi; - struct socket_info psi; + struct proc_fileinfo pfi; + struct socket_info psi; }; struct psem_info { - struct vinfo_stat psem_stat; + struct vinfo_stat psem_stat; char psem_name[MAXPATHLEN]; }; struct psem_fdinfo { - struct proc_fileinfo pfi; + struct proc_fileinfo pfi; struct psem_info pseminfo; }; struct pshm_info { - struct vinfo_stat pshm_stat; + struct vinfo_stat pshm_stat; uint64_t pshm_mappaddr; char pshm_name[MAXPATHLEN]; }; struct pshm_fdinfo { - struct proc_fileinfo pfi; - struct pshm_info pshminfo; + struct proc_fileinfo pfi; + struct pshm_info pshminfo; }; struct pipe_info { - struct vinfo_stat pipe_stat; - uint64_t pipe_handle; - uint64_t pipe_peerhandle; + struct vinfo_stat pipe_stat; + uint64_t pipe_handle; + uint64_t pipe_peerhandle; int pipe_status; + int rfu_1; /* reserved */ }; struct pipe_fdinfo { - struct proc_fileinfo pfi; + struct proc_fileinfo pfi; struct pipe_info pipeinfo; }; struct kqueue_info { - struct vinfo_stat kq_stat; - uint32_t kq_state; + struct vinfo_stat kq_stat; + uint32_t kq_state; + uint32_t rfu_1; /* reserved */ }; #define PROC_KQUEUE_SELECT 1 #define PROC_KQUEUE_SLEEP 2 struct kqueue_fdinfo { - struct proc_fileinfo pfi; + struct proc_fileinfo pfi; struct kqueue_info kqueueinfo; }; struct appletalk_info { - struct vinfo_stat atalk_stat; + struct vinfo_stat atalk_stat; }; struct appletalk_fdinfo { - struct proc_fileinfo pfi; - struct appletalk_info appletalkinfo; + struct proc_fileinfo pfi; + struct appletalk_info appletalkinfo; }; /* defns of process file desc type */ -#define PROX_FDTYPE_ATALK 0 -#define PROX_FDTYPE_VNODE 1 -#define PROX_FDTYPE_SOCKET 2 -#define PROX_FDTYPE_PSHM 3 -#define PROX_FDTYPE_PSEM 4 -#define PROX_FDTYPE_KQUEUE 5 -#define PROX_FDTYPE_PIPE 6 -#define PROX_FDTYPE_FSEVENTS 7 +#define PROX_FDTYPE_ATALK 0 +#define PROX_FDTYPE_VNODE 1 +#define PROX_FDTYPE_SOCKET 2 +#define PROX_FDTYPE_PSHM 3 +#define PROX_FDTYPE_PSEM 4 +#define PROX_FDTYPE_KQUEUE 5 +#define PROX_FDTYPE_PIPE 6 +#define PROX_FDTYPE_FSEVENTS 7 struct proc_fdinfo { - int32_t proc_fd; - uint32_t proc_fdtype; + int32_t proc_fd; + uint32_t proc_fdtype; }; -/* Falvors for proc_pidinfo() */ -#define PROC_PIDLISTFDS 1 -#define PROC_PIDLISTFD_SIZE (sizeof(struct proc_fdinfo)) +/* Flavors for proc_pidinfo() */ +#define PROC_PIDLISTFDS 1 +#define PROC_PIDLISTFD_SIZE (sizeof(struct proc_fdinfo)) -#define PROC_PIDTASKALLINFO 2 -#define PROC_PIDTASKALLINFO_SIZE (sizeof(struct proc_taskallinfo)) +#define PROC_PIDTASKALLINFO 2 +#define PROC_PIDTASKALLINFO_SIZE (sizeof(struct proc_taskallinfo)) -#define PROC_PIDTBSDINFO 3 -#define PROC_PIDTBSDINFO_SIZE (sizeof(struct proc_bsdinfo)) +#define PROC_PIDTBSDINFO 3 +#define PROC_PIDTBSDINFO_SIZE (sizeof(struct proc_bsdinfo)) -#define PROC_PIDTASKINFO 4 -#define PROC_PIDTASKINFO_SIZE (sizeof(struct proc_taskinfo)) +#define PROC_PIDTASKINFO 4 +#define PROC_PIDTASKINFO_SIZE (sizeof(struct proc_taskinfo)) -#define PROC_PIDTHREADINFO 5 -#define PROC_PIDTHREADINFO_SIZE (sizeof(struct proc_threadinfo)) +#define PROC_PIDTHREADINFO 5 +#define PROC_PIDTHREADINFO_SIZE (sizeof(struct proc_threadinfo)) -#define PROC_PIDLISTTHREADS 6 -#define PROC_PIDLISTTHREADS_SIZE (2* sizeof(uint32_t)) +#define PROC_PIDLISTTHREADS 6 +#define PROC_PIDLISTTHREADS_SIZE (2* sizeof(uint32_t)) -#define PROC_PIDREGIONINFO 7 -#define PROC_PIDREGIONINFO_SIZE (sizeof(struct proc_regioninfo)) +#define PROC_PIDREGIONINFO 7 +#define PROC_PIDREGIONINFO_SIZE (sizeof(struct proc_regioninfo)) -#define PROC_PIDREGIONPATHINFO 8 -#define PROC_PIDREGIONPATHINFO_SIZE (sizeof(struct proc_regionwithpathinfo)) +#define PROC_PIDREGIONPATHINFO 8 +#define PROC_PIDREGIONPATHINFO_SIZE (sizeof(struct proc_regionwithpathinfo)) -#define PROC_PIDVNODEPATHINFO 9 -#define PROC_PIDVNODEPATHINFO_SIZE (sizeof(struct proc_vnodepathinfo)) +#define PROC_PIDVNODEPATHINFO 9 +#define PROC_PIDVNODEPATHINFO_SIZE (sizeof(struct proc_vnodepathinfo)) -#define PROC_PIDTHREADPATHINFO 10 -#define PROC_PIDTHREADPATHINFO_SIZE (sizeof(struct proc_threadwithpathinfo)) +#define PROC_PIDTHREADPATHINFO 10 +#define PROC_PIDTHREADPATHINFO_SIZE (sizeof(struct proc_threadwithpathinfo)) -#define PROC_PIDPATHINFO 11 -#define PROC_PIDPATHINFO_SIZE (MAXPATHLEN) -#define PROC_PIDPATHINFO_MAXSIZE (4*MAXPATHLEN) +#define PROC_PIDPATHINFO 11 +#define PROC_PIDPATHINFO_SIZE (MAXPATHLEN) +#define PROC_PIDPATHINFO_MAXSIZE (4*MAXPATHLEN) -/* Flavors for proc_pidfdinfo */ +#define PROC_PIDWORKQUEUEINFO 12 +#define PROC_PIDWORKQUEUEINFO_SIZE (sizeof(struct proc_workqueueinfo)) -#define PROC_PIDFDVNODEINFO 1 -#define PROC_PIDFDVNODEINFO_SIZE (sizeof(struct vnode_fdinfo)) +/* Flavors for proc_pidfdinfo */ -#define PROC_PIDFDVNODEPATHINFO 2 -#define PROC_PIDFDVNODEPATHINFO_SIZE (sizeof(struct vnode_fdinfowithpath)) +#define PROC_PIDFDVNODEINFO 1 +#define PROC_PIDFDVNODEINFO_SIZE (sizeof(struct vnode_fdinfo)) -#define PROC_PIDFDSOCKETINFO 3 -#define PROC_PIDFDSOCKETINFO_SIZE (sizeof(struct socket_fdinfo)) +#define PROC_PIDFDVNODEPATHINFO 2 +#define PROC_PIDFDVNODEPATHINFO_SIZE (sizeof(struct vnode_fdinfowithpath)) -#define PROC_PIDFDPSEMINFO 4 -#define PROC_PIDFDPSEMINFO_SIZE (sizeof(struct psem_fdinfo)) +#define PROC_PIDFDSOCKETINFO 3 +#define PROC_PIDFDSOCKETINFO_SIZE (sizeof(struct socket_fdinfo)) -#define PROC_PIDFDPSHMINFO 5 -#define PROC_PIDFDPSHMINFO_SIZE (sizeof(struct pshm_fdinfo)) +#define PROC_PIDFDPSEMINFO 4 +#define PROC_PIDFDPSEMINFO_SIZE (sizeof(struct psem_fdinfo)) -#define PROC_PIDFDPIPEINFO 6 -#define PROC_PIDFDPIPEINFO_SIZE (sizeof(struct pipe_fdinfo)) +#define PROC_PIDFDPSHMINFO 5 +#define PROC_PIDFDPSHMINFO_SIZE (sizeof(struct pshm_fdinfo)) -#define PROC_PIDFDKQUEUEINFO 7 -#define PROC_PIDFDKQUEUEINFO_SIZE (sizeof(struct kqueue_fdinfo)) +#define PROC_PIDFDPIPEINFO 6 +#define PROC_PIDFDPIPEINFO_SIZE (sizeof(struct pipe_fdinfo)) -#define PROC_PIDFDATALKINFO 8 -#define PROC_PIDFDATALKINFO_SIZE (sizeof(struct appletalk_fdinfo)) +#define PROC_PIDFDKQUEUEINFO 7 +#define PROC_PIDFDKQUEUEINFO_SIZE (sizeof(struct kqueue_fdinfo)) +#define PROC_PIDFDATALKINFO 8 +#define PROC_PIDFDATALKINFO_SIZE (sizeof(struct appletalk_fdinfo)) +/* used for proc_setcontrol */ +#define PROC_SELFSET_PCONTROL 1 #ifdef XNU_KERNEL_PRIVATE +#ifndef pshmnode +struct pshmnode; +#endif + +#ifndef psemnode +struct psemnode ; +#endif + +#ifndef pipe +struct pipe; +#endif + extern int fill_socketinfo(socket_t so, struct socket_info *si); extern int fill_pshminfo(struct pshmnode * pshm, struct pshm_info * pinfo); extern int fill_pseminfo(struct psemnode * psem, struct psem_info * pinfo); extern int fill_pipeinfo(struct pipe * cpipe, struct pipe_info * pinfo); extern int fill_kqueueinfo(struct kqueue * kq, struct kqueue_info * kinfo); +extern int fill_procworkqueue(proc_t, struct proc_workqueueinfo *); #endif /* XNU_KERNEL_PRIVATE */ - __END_DECLS #endif /*_SYS_PROC_INFO_H */ diff --git a/bsd/sys/proc_internal.h b/bsd/sys/proc_internal.h index 280e812b1..d6c551b1d 100644 --- a/bsd/sys/proc_internal.h +++ b/bsd/sys/proc_internal.h @@ -79,6 +79,9 @@ #include __BEGIN_DECLS #include +#if PSYNCH +#include +#endif /* PSYNCH */ __END_DECLS #if DEBUG @@ -121,7 +124,7 @@ struct session { struct proc * s_leader; /* Session leader.(static) */ struct vnode * s_ttyvp; /* Vnode of controlling terminal.(SL) */ int s_ttyvid; /* Vnode id of the controlling terminal (SL) */ - struct tty * s_ttyp; /* Controlling terminal. (SL) */ + struct tty * s_ttyp; /* Controlling terminal. (SL + ttyvp != NULL) */ pid_t s_ttypgrpid; /* tty's pgrp id */ pid_t s_sid; /* Session ID (static) */ char s_login[MAXLOGNAME]; /* Setlogin() name.(SL) */ @@ -133,6 +136,18 @@ struct session { #define SESSION_NULL (struct session *)0 +/* + * accessor for s_ttyp which treats it as invalid if s_ttyvp is not valid; + * note that s_ttyp is not a reference in the session structre, so it can + * become invalid out from under the session if the device is closed, without + * this protection. We can't safely make it into a reference without reflexive + * close notification of tty devices through cdevsw[]. + * + * NB: is not in scope and there is not typedef type enforcement, + * or '0' below would be 'TTY_NULL'. + */ +#define SESSION_TP(sp) (((sp)->s_ttyvp != 0) ? (sp)->s_ttyp : 0) + /* * Session flags; used to tunnel information to lower layers and line * disciplines, etc. @@ -213,6 +228,7 @@ struct proc { struct plimit *p_limit; /* Process limits.(PL) */ struct sigacts *p_sigacts; /* Signal actions, state (PL) */ + int p_siglist; /* signals captured back from threads */ lck_spin_t p_slock; /* spin lock for itimer/profil protection */ #define p_rlimit p_limit->pl_rlimit @@ -300,14 +316,16 @@ struct proc { struct pgrp *p_pgrp; /* Pointer to process group. (LL) */ int p_iopol_disk; /* disk I/O policy (PL) */ uint32_t p_csflags; /* flags for codesign (PL) */ + uint32_t p_pcaction; /* action for process control on starvation */ + uint8_t p_uuid[16]; /* from LC_UUID load command */ /* End area that is copied on creation. */ /* XXXXXXXXXXXXX End of BCOPY'ed on fork (AIOLOCK)XXXXXXXXXXXXXXXX */ -#define p_endcopy aio_active_count - int aio_active_count; /* entries on aio_activeq */ - int aio_done_count; /* entries on aio_doneq */ - TAILQ_HEAD( , aio_workq_entry ) aio_activeq; /* active async IO requests */ - TAILQ_HEAD( , aio_workq_entry ) aio_doneq; /* completed async IO requests */ +#define p_endcopy p_aio_total_count + int p_aio_total_count; /* all allocated AIO requests for this proc */ + int p_aio_active_count; /* all unfinished AIO requests for this proc */ + TAILQ_HEAD( , aio_workq_entry ) p_aio_activeq; /* active async IO requests */ + TAILQ_HEAD( , aio_workq_entry ) p_aio_doneq; /* completed async IO requests */ struct klist p_klist; /* knote list (PL ?)*/ @@ -323,13 +341,16 @@ struct proc { user_addr_t p_threadstart; /* pthread start fn */ user_addr_t p_wqthread; /* pthread workqueue fn */ int p_pthsize; /* pthread size */ + user_addr_t p_targconc; /* target concurrency ptr */ void * p_wqptr; /* workq ptr */ int p_wqsize; /* allocated size */ - lck_mtx_t p_wqlock; /* lock to protect work queue */ + boolean_t p_wqiniting; /* semaphore to serialze wq_open */ + lck_spin_t p_wqlock; /* lock to protect work queue */ struct timeval p_start; /* starting time */ void * p_rcall; int p_ractive; int p_idversion; /* version of process identity */ + void * p_pthhash; /* pthread waitqueue hash */ #if DIAGNOSTIC unsigned int p_fdlock_pc[4]; unsigned int p_fdunlock_pc[4]; @@ -338,6 +359,7 @@ struct proc { unsigned int unlockpc[8]; #endif /* SIGNAL_DEBUG */ #endif /* DIAGNOSTIC */ + uint64_t p_dispatchqueue_offset; }; #define PGRPID_DEAD 0xdeaddead @@ -361,6 +383,7 @@ struct proc { #define P_LIST_INPGRP 0x00020000 /* process is in pgrp */ #define P_LIST_PGRPTRANS 0x00040000 /* pgrp is getting replaced */ #define P_LIST_PGRPTRWAIT 0x00080000 /* wait for pgrp replacement */ +#define P_LIST_EXITCOUNT 0x00100000 /* counted for process exit */ /* local flags */ @@ -369,7 +392,7 @@ struct proc { #define P_LTERM 0x00000004 /* */ #define P_LEXIT 0x00000008 /* */ #define P_LPEXIT 0x00000010 -#define P_LTHSIGSTACK 0x00000020 +#define P_LTRANSCOMMIT 0x00000020 /* process is committed to trans */ #define P_LINTRANSIT 0x00000040 /* process in exec or in creation */ #define P_LTRANSWAIT 0x00000080 /* waiting for trans to complete */ #define P_LVFORK 0x00000100 /* */ @@ -387,6 +410,20 @@ struct proc { #define P_LINSIGNAL 0x00100000 #define P_LSIGNALWAIT 0x00200000 #define P_LRAGE_VNODES 0x00400000 +#define P_LREGISTER 0x00800000 /* thread start fns registered */ + +/* Process control state for resource starvation */ +#define P_PCTHROTTLE 1 +#define P_PCSUSP 2 +#define P_PCKILL 3 +#define P_PCMAX 3 + +/* Process control action state on resrouce starvation */ +#define PROC_ACTION_MASK 0xffff0000; +#define PROC_CONTROL_STATE(p) (p->p_pcaction & P_PCMAX) +#define PROC_ACTION_STATE(p) ((p->p_pcaction >> 16) & P_PCMAX) +#define PROC_SETACTION_STATE(p) (p->p_pcaction = (PROC_CONTROL_STATE(p) | (PROC_CONTROL_STATE(p) << 16))) +#define PROC_RESETACTION_STATE(p) (p->p_pcaction = PROC_CONTROL_STATE(p)) /* advisory flags in the proc */ #define P_LADVLOCK 0x01 @@ -406,6 +443,11 @@ struct proc { #define PROC_CLAIMED 2 #define PROC_CLAIMED_DONE 3 +/* process creation arguments */ +#define PROC_CREATE_FORK 0 /* independent child (running) */ +#define PROC_CREATE_SPAWN 1 /* independent child (suspended) */ +#define PROC_CREATE_VFORK 2 /* child borrows context */ + /* LP64 version of extern_proc. all pointers * grow when we're dealing with a 64-bit process. @@ -416,13 +458,66 @@ struct proc { #ifdef KERNEL #include /* user_timeval, user_itimerval */ -struct user_extern_proc { +#pragma pack(4) +struct user32_extern_proc { + union { + struct { + uint32_t __p_forw; /* Doubly-linked run/sleep queue. */ + uint32_t __p_back; + } p_st1; + struct user32_timeval __p_starttime; /* process start time */ + } p_un; + uint32_t p_vmspace; /* Address space. */ + uint32_t p_sigacts; /* Signal actions, state (PROC ONLY). */ + int p_flag; /* P_* flags. */ + char p_stat; /* S* process status. */ + pid_t p_pid; /* Process identifier. */ + pid_t p_oppid; /* Save parent pid during ptrace. XXX */ + int p_dupfd; /* Sideways return value from fdopen. XXX */ + /* Mach related */ + uint32_t user_stack; /* where user stack was allocated */ + uint32_t exit_thread; /* XXX Which thread is exiting? */ + int p_debugger; /* allow to debug */ + boolean_t sigwait; /* indication to suspend */ + /* scheduling */ + u_int p_estcpu; /* Time averaged value of p_cpticks. */ + int p_cpticks; /* Ticks of cpu time. */ + fixpt_t p_pctcpu; /* %cpu for this process during p_swtime */ + uint32_t p_wchan; /* Sleep address. */ + uint32_t p_wmesg; /* Reason for sleep. */ + u_int p_swtime; /* Time swapped in or out. */ + u_int p_slptime; /* Time since last blocked. */ + struct user32_itimerval p_realtimer; /* Alarm timer. */ + struct user32_timeval p_rtime; /* Real time. */ + u_quad_t p_uticks; /* Statclock hits in user mode. */ + u_quad_t p_sticks; /* Statclock hits in system mode. */ + u_quad_t p_iticks; /* Statclock hits processing intr. */ + int p_traceflag; /* Kernel trace points. */ + uint32_t p_tracep; /* Trace to vnode. */ + int p_siglist; /* DEPRECATED */ + uint32_t p_textvp; /* Vnode of executable. */ + int p_holdcnt; /* If non-zero, don't swap. */ + sigset_t p_sigmask; /* DEPRECATED. */ + sigset_t p_sigignore; /* Signals being ignored. */ + sigset_t p_sigcatch; /* Signals being caught by user. */ + u_char p_priority; /* Process priority. */ + u_char p_usrpri; /* User-priority based on p_cpu and p_nice. */ + char p_nice; /* Process "nice" value. */ + char p_comm[MAXCOMLEN+1]; + uint32_t p_pgrp; /* Pointer to process group. */ + uint32_t p_addr; /* Kernel virtual addr of u-area (PROC ONLY). */ + u_short p_xstat; /* Exit status for wait; also stop signal. */ + u_short p_acflag; /* Accounting flags. */ + uint32_t p_ru; /* Exit information. XXX */ +}; +#pragma pack() +struct user64_extern_proc { union { struct { user_addr_t __p_forw; /* Doubly-linked run/sleep queue. */ user_addr_t __p_back; } p_st1; - struct user_timeval __p_starttime; /* process start time */ + struct user64_timeval __p_starttime; /* process start time */ } p_un; user_addr_t p_vmspace; /* Address space. */ user_addr_t p_sigacts; /* Signal actions, state (PROC ONLY). */ @@ -444,8 +539,8 @@ struct user_extern_proc { user_addr_t p_wmesg; /* Reason for sleep. */ u_int p_swtime; /* Time swapped in or out. */ u_int p_slptime; /* Time since last blocked. */ - struct user_itimerval p_realtimer; /* Alarm timer. */ - struct user_timeval p_rtime; /* Real time. */ + struct user64_itimerval p_realtimer; /* Alarm timer. */ + struct user64_timeval p_rtime; /* Real time. */ u_quad_t p_uticks; /* Statclock hits in user mode. */ u_quad_t p_sticks; /* Statclock hits in system mode. */ u_quad_t p_iticks; /* Statclock hits processing intr. */ @@ -476,12 +571,19 @@ struct user_extern_proc { extern int nprocs, maxproc; /* Current and max number of procs. */ extern int maxprocperuid; /* Current number of procs per uid */ __private_extern__ int hard_maxproc; /* hard limit */ +extern unsigned int proc_shutdown_exitcount; #define PID_MAX 99999 #define NO_PID 100000 extern lck_mtx_t * proc_list_mlock; extern lck_mtx_t * proc_klist_mlock; +#define BSD_SIMUL_EXECS 33 /* 32 , allow for rounding */ +#define BSD_PAGABLE_MAP_SIZE (BSD_SIMUL_EXECS * (NCARGS + PAGE_SIZE)) +__private_extern__ int execargs_cache_size; +__private_extern__ int execargs_free_count; +__private_extern__ vm_offset_t * execargs_cache; + #define SESS_LEADER(p, sessp) ((sessp)->s_leader == (p)) /* Lock and unlock a login context. */ @@ -508,6 +610,9 @@ extern LIST_HEAD(sesshashhead, session) *sesshashtbl; extern u_long sesshash; extern lck_grp_t * proc_lck_grp; +extern lck_grp_t * proc_mlock_grp; +extern lck_grp_t * proc_fdmlock_grp; +extern lck_grp_t * proc_slock_grp; extern lck_grp_attr_t * proc_lck_grp_attr; extern lck_attr_t * proc_lck_attr; @@ -551,8 +656,9 @@ extern int sleep(void *chan, int pri); extern int tsleep0(void *chan, int pri, const char *wmesg, int timo, int (*continuation)(int)); extern int tsleep1(void *chan, int pri, const char *wmesg, u_int64_t abstime, int (*continuation)(int)); extern int msleep0(void *chan, lck_mtx_t *mtx, int pri, const char *wmesg, int timo, int (*continuation)(int)); -extern void vfork_return(struct proc *child, register_t *retval, int rval); +extern void vfork_return(struct proc *child, int32_t *retval, int rval); extern int exit1(struct proc *, int, int *); +extern int fork1(proc_t, thread_t *, int); extern void vfork_exit_internal(struct proc *p, int rv, int forced); extern void proc_reparentlocked(struct proc *child, struct proc * newparent, int cansignal, int locked); extern int pgrp_iterate(struct pgrp * pgrp, int flags, int (*callout)(proc_t , void *), void *arg, int (*filterfn)(proc_t , void *), void *filterarg); @@ -587,11 +693,30 @@ int itimerfix(struct timeval *tv); int itimerdecr(struct proc * p, struct itimerval *itp, int usec); void proc_signalstart(struct proc *, int locked); void proc_signalend(struct proc *, int locked); -void proc_transstart(struct proc *, int locked); +int proc_transstart(struct proc *, int locked); +void proc_transcommit(struct proc *, int locked); void proc_transend(struct proc *, int locked); -void proc_transwait(struct proc *, int locked); +int proc_transwait(struct proc *, int locked); void proc_rele_locked(struct proc * p); void proc_knote(struct proc * p, long hint); +void proc_knote_drain(struct proc *p); void workqueue_init_lock(proc_t p); void workqueue_destroy_lock(proc_t p); +void proc_setregister(proc_t p); +void proc_resetregister(proc_t p); +/* returns the first thread_t in the process, or NULL XXX for NFS, DO NOT USE */ +thread_t proc_thread(proc_t); +extern int proc_pendingsignals(proc_t, sigset_t); +int proc_getpcontrol(int pid, int * pcontrolp); +int proc_dopcontrol(proc_t p, void *unused_arg); +int proc_resetpcontrol(int pid); +#if PSYNCH +void pth_proc_hashinit(proc_t); +void pth_proc_hashdelete(proc_t); +void pth_global_hashinit(void); +extern thread_call_t psynch_thcall; +void psynch_wq_cleanup(__unused void * param, __unused void * param1); +extern lck_mtx_t * pthread_list_mlock; +#endif /* PSYNCH */ +struct uthread * current_uthread(void); #endif /* !_SYS_PROC_INTERNAL_H_ */ diff --git a/bsd/sys/protosw.h b/bsd/sys/protosw.h index aaa56fe74..67d6a538d 100644 --- a/bsd/sys/protosw.h +++ b/bsd/sys/protosw.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2006 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2008 Apple Computer, Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -144,8 +144,8 @@ struct protosw { #endif struct pr_usrreqs *pr_usrreqs; /* supersedes pr_usrreq() */ #if __APPLE__ - int (*pr_lock) (struct socket *so, int locktype, int debug); /* lock function for protocol */ - int (*pr_unlock) (struct socket *so, int locktype, int debug); /* unlock for protocol */ + int (*pr_lock) (struct socket *so, int locktype, void *debug); /* lock function for protocol */ + int (*pr_unlock) (struct socket *so, int locktype, void *debug); /* unlock for protocol */ #ifdef _KERN_LOCKS_H_ lck_mtx_t * (*pr_getlock) (struct socket *so, int locktype); #else @@ -156,7 +156,7 @@ struct protosw { /* Implant hooks */ TAILQ_HEAD(, socket_filter) pr_filter_head; struct protosw *pr_next; /* Chain for domain */ - u_long reserved[1]; /* Padding for future use */ + u_int32_t reserved[1]; /* Padding for future use */ #endif }; @@ -406,7 +406,7 @@ char *prcorequests[] = { #ifdef KERNEL __BEGIN_DECLS -void domaininit(void); +void domaininit(void) __attribute__((section("__TEXT, initcode"))); void pfctlinput(int, struct sockaddr *); void pfctlinput2(int, struct sockaddr *, void *); diff --git a/bsd/sys/pthread_internal.h b/bsd/sys/pthread_internal.h index 2f37bf1a8..dc68c04c2 100644 --- a/bsd/sys/pthread_internal.h +++ b/bsd/sys/pthread_internal.h @@ -33,135 +33,35 @@ #include -/* - * Mutex attributes - */ -typedef struct -{ - long sig; /* Unique signature for this structure */ - int prioceiling; - u_int32_t protocol:2, /* protocol attribute */ - type:2, /* mutex type */ - pshared:2, - rfu:26; -} pthread_mutexattr_t; - -#undef pthread_mutex_t -/* - * Mutex variables - */ -typedef struct _pthread_mutex -{ - long sig; /* Unique signature for this structure */ - lck_mtx_t * mutex; /* the kernel internal mutex */ - lck_mtx_t * lock; - thread_t owner; /* Which thread has this mutex locked */ - proc_t owner_proc; /* Which thread has this mutex locked */ - u_int32_t protocol:2, /* protocol */ - type:2, /* mutex type */ - pshared:2, /* mutex type */ - refcount:10, - lock_count:16; - int16_t prioceiling; - int16_t priority; /* Priority to restore when mutex unlocked */ -} pthread_mutex_t; - -#define MTX_LOCK lck_mtx_lock -#define MTX_UNLOCK lck_mtx_unlock - -/* - * Condition variable attributes - */ -#undef pthread_condattr_t -typedef struct -{ - long sig; /* Unique signature for this structure */ - u_int32_t pshared:2, /* pshared */ - unsupported:30; -} pthread_condattr_t; - -/* - * Condition variables - */ -#undef pthread_cond_t -typedef struct _pthread_cond -{ - long sig; /* Unique signature for this structure */ - lck_mtx_t * lock; /* Used for internal mutex on structure */ - u_int32_t waiters:15, /* Number of threads waiting */ - sigpending:15, /* Number of outstanding signals */ - pshared:2; - int refcount; - pthread_mutex_t * mutex; - proc_t owner_proc; /* Which thread has this mutex locked */ - semaphore_t sem; -} pthread_cond_t; -#define COND_LOCK lck_mtx_lock -#define COND_UNLOCK lck_mtx_unlock - -#undef pthread_rwlockattr_t -typedef struct { - long sig; /* Unique signature for this structure */ - int pshared; - int rfu[2]; /* reserved for future use */ -} pthread_rwlockattr_t; - -#undef pthread_rwlock_t -typedef struct { - long sig; - lck_rw_t * rwlock; - int pshared; - thread_t owner; - int rfu[2]; -} pthread_rwlock_t; - -#define _PTHREAD_NO_SIG 0x00000000 -#define _PTHREAD_MUTEX_ATTR_SIG 0x4D545841 /* 'MTXA' */ -#define _PTHREAD_MUTEX_SIG 0x4D555458 /* 'MUTX' */ -#define _PTHREAD_MUTEX_SIG_init 0x32AAABA7 /* [almost] ~'MUTX' */ -#define _PTHREAD_COND_ATTR_SIG 0x434E4441 /* 'CNDA' */ -#define _PTHREAD_COND_SIG 0x434F4E44 /* 'COND' */ -#define _PTHREAD_COND_SIG_init 0x3CB0B1BB /* [almost] ~'COND' */ -#define _PTHREAD_ATTR_SIG 0x54484441 /* 'THDA' */ -#define _PTHREAD_ONCE_SIG 0x4F4E4345 /* 'ONCE' */ -#define _PTHREAD_ONCE_SIG_init 0x30B1BCBA /* [almost] ~'ONCE' */ -#define _PTHREAD_SIG 0x54485244 /* 'THRD' */ -#define _PTHREAD_RWLOCK_ATTR_SIG 0x52574C41 /* 'RWLA' */ -#define _PTHREAD_RWLOCK_SIG 0x52574C4B /* 'RWLK' */ -#define _PTHREAD_RWLOCK_SIG_init 0x2DA8B3B4 /* [almost] ~'RWLK' */ - -#define _PTHREAD_KERN_COND_SIG 0x12345678 /* */ -#define _PTHREAD_KERN_MUTEX_SIG 0x34567812 /* */ -#define _PTHREAD_KERN_RWLOCK_SIG 0x56781234 /* */ - - -#define PTHREAD_PROCESS_SHARED 1 -#define PTHREAD_PROCESS_PRIVATE 2 - -#define WORKQUEUE_MAXTHREADS 64 #define WORKITEM_SIZE 64 -#define WORKQUEUE_NUMPRIOS 5 +#define WORKQUEUE_NUMPRIOS 3 + +#define WORKQUEUE_OVERCOMMIT 0x10000 struct threadlist { TAILQ_ENTRY(threadlist) th_entry; thread_t th_thread; int th_flags; - uint32_t th_unparked; - uint32_t th_affinity_tag; + uint32_t th_suspended; + uint16_t th_affinity_tag; + uint8_t th_priority; + uint8_t th_policy; struct workqueue *th_workq; mach_vm_size_t th_stacksize; mach_vm_size_t th_allocsize; mach_vm_offset_t th_stackaddr; - mach_port_t th_thport; + mach_port_name_t th_thport; }; #define TH_LIST_INITED 0x01 #define TH_LIST_RUNNING 0x02 #define TH_LIST_BLOCKED 0x04 #define TH_LIST_SUSPENDED 0x08 +#define TH_LIST_BUSY 0x10 struct workitem { TAILQ_ENTRY(workitem) wi_entry; user_addr_t wi_item; + uint32_t wi_affinity; }; struct workitemlist { @@ -169,44 +69,61 @@ struct workitemlist { TAILQ_HEAD(, workitem) wl_freelist; }; - struct workqueue { struct workitem wq_array[WORKITEM_SIZE * WORKQUEUE_NUMPRIOS]; proc_t wq_proc; vm_map_t wq_map; task_t wq_task; - thread_call_t wq_timer_call; + thread_call_t wq_atimer_call; int wq_flags; + int wq_lflags; int wq_itemcount; - struct timeval wq_lastran_ts; - struct timeval wq_reduce_ts; - uint32_t wq_stalled_count; - uint32_t wq_max_threads_scheduled; + uint64_t wq_thread_yielded_timestamp; + uint32_t wq_thread_yielded_count; + uint32_t wq_timer_interval; uint32_t wq_affinity_max; uint32_t wq_threads_scheduled; uint32_t wq_nthreads; - uint32_t wq_nextaffinitytag; - struct workitemlist wq_list[WORKQUEUE_NUMPRIOS]; /* prio based item list */ + uint32_t wq_thidlecount; + uint32_t wq_reqconc[WORKQUEUE_NUMPRIOS]; /* requested concurrency for each priority level */ + struct workitemlist wq_list[WORKQUEUE_NUMPRIOS]; /* priority based item list */ + uint32_t wq_list_bitmap; TAILQ_HEAD(, threadlist) wq_thrunlist; - TAILQ_HEAD(wq_thidlelist, threadlist) * wq_thidlelist; - uint32_t * wq_thactivecount; - uint32_t * wq_thcount; + TAILQ_HEAD(, threadlist) wq_thidlelist; + uint32_t *wq_thactive_count[WORKQUEUE_NUMPRIOS]; + uint32_t *wq_thscheduled_count[WORKQUEUE_NUMPRIOS]; + uint64_t *wq_lastblocked_ts[WORKQUEUE_NUMPRIOS]; }; #define WQ_LIST_INITED 0x01 -#define WQ_BUSY 0x02 -#define WQ_TIMER_RUNNING 0x04 -#define WQ_TIMER_WATCH 0x08 -#define WQ_ADD_TO_POOL 0x10 +#define WQ_ATIMER_RUNNING 0x02 +#define WQ_EXITING 0x04 + +#define WQL_ATIMER_BUSY 0x01 +#define WQL_ATIMER_WAITING 0x02 + + +#define WQ_VECT_SET_BIT(vector, bit) \ + vector[(bit) / 32] |= (1 << ((bit) % 32)) + +#define WQ_VECT_CLEAR_BIT(vector, bit) \ + vector[(bit) / 32] &= ~(1 << ((bit) % 32)) + +#define WQ_VECT_TEST_BIT(vector, bit) \ + vector[(bit) / 32] & (1 << ((bit) % 32)) + -#define WQ_STALLED_WINDOW_USECS 20000 -#define WQ_REDUCE_POOL_WINDOW_USECS 3000000 -#define WQ_MAX_RUN_LATENCY_USECS 500 -#define WQ_TIMER_INTERVAL_MSECS 40 +#define WORKQUEUE_MAXTHREADS 512 +#define WQ_YIELDED_THRESHOLD 2000 +#define WQ_YIELDED_WINDOW_USECS 30000 +#define WQ_STALLED_WINDOW_USECS 200 +#define WQ_REDUCE_POOL_WINDOW_USECS 5000000 +#define WQ_MAX_TIMER_INTERVAL_USECS 50000 -/* workq_ops commands */ +/* workq_kernreturn commands */ #define WQOPS_QUEUE_ADD 1 #define WQOPS_QUEUE_REMOVE 2 #define WQOPS_THREAD_RETURN 4 +#define WQOPS_THREAD_SETCONC 8 #define PTH_DEFAULT_STACKSIZE 512*1024 #define PTH_DEFAULT_GUARDSIZE 4*1024 @@ -214,17 +131,10 @@ struct workqueue { void workqueue_exit(struct proc *); -pthread_mutex_t * pthread_id_to_mutex(int mutexid); -int pthread_id_mutex_add(pthread_mutex_t *); -void pthread_id_mutex_remove(int); -void pthread_mutex_release(pthread_mutex_t *); -pthread_cond_t * pthread_id_to_cond(int condid); -int pthread_id_cond_add(pthread_cond_t *); -void pthread_id_cond_remove(int); -void pthread_cond_release(pthread_cond_t *); - -void pthread_list_lock(void); -void pthread_list_unlock(void); +void pthread_init(void); +extern lck_grp_attr_t *pthread_lck_grp_attr; +extern lck_grp_t *pthread_lck_grp; +extern lck_attr_t *pthread_lck_attr; #endif /* _SYS_PTHREAD_INTERNAL_H_ */ diff --git a/bsd/sys/quota.h b/bsd/sys/quota.h index b1a2db3aa..3693ff592 100644 --- a/bsd/sys/quota.h +++ b/bsd/sys/quota.h @@ -151,9 +151,9 @@ struct dqfilehdr { u_int32_t dqh_maxentries; /* must be a power of 2 */ u_int32_t dqh_entrycnt; /* count of active entries */ u_int32_t dqh_flags; /* reserved for now (0) */ - time_t dqh_chktime; /* time of last quota check */ - time_t dqh_btime; /* time limit for excessive disk use */ - time_t dqh_itime; /* time limit for excessive files */ + u_int32_t dqh_chktime; /* time of last quota check */ + u_int32_t dqh_btime; /* time limit for excessive disk use */ + u_int32_t dqh_itime; /* time limit for excessive files */ char dqh_string[16]; /* tag string */ u_int32_t dqh_spare[4]; /* pad struct to power of 2 */ }; @@ -165,8 +165,8 @@ struct dqblk { u_int32_t dqb_ihardlimit; /* maximum # allocated inodes + 1 */ u_int32_t dqb_isoftlimit; /* preferred inode limit */ u_int32_t dqb_curinodes; /* current # allocated inodes */ - time_t dqb_btime; /* time limit for excessive disk use */ - time_t dqb_itime; /* time limit for excessive files */ + u_int32_t dqb_btime; /* time limit for excessive disk use */ + u_int32_t dqb_itime; /* time limit for excessive files */ u_int32_t dqb_id; /* identifier (0 for empty entries) */ u_int32_t dqb_spare[4]; /* pad struct to power of 2 */ }; @@ -185,8 +185,8 @@ struct user_dqblk { u_int32_t dqb_ihardlimit; /* maximum # allocated inodes + 1 */ u_int32_t dqb_isoftlimit; /* preferred inode limit */ u_int32_t dqb_curinodes; /* current # allocated inodes */ - user_time_t dqb_btime __attribute((aligned(8))); /* time limit for excessive disk use */ - user_time_t dqb_itime; /* time limit for excessive files */ + u_int32_t dqb_btime; /* time limit for excessive disk use */ + u_int32_t dqb_itime; /* time limit for excessive files */ u_int32_t dqb_id; /* identifier (0 for empty entries) */ u_int32_t dqb_spare[4]; /* pad struct to power of 2 */ }; @@ -215,7 +215,7 @@ struct user_dqblk { * golden ratio to the machine's word size. */ #define dqhash1(id, shift, mask) \ - ((((id) * 2654435761UL) >> (shift)) & (mask)) + ((((id) * 2654435761U) >> (shift)) & (mask)) #define dqhash2(id, mask) \ (dqhash1((id), 11, (mask)>>1) | 1) @@ -229,10 +229,10 @@ struct user_dqblk { * Compute the hash shift value. * It is the word size, in bits, minus the hash table size, in bits. */ -static __inline int dqhashshift(u_long); +static __inline int dqhashshift(u_int32_t); static __inline int -dqhashshift(u_long size) +dqhashshift(u_int32_t size) { int shift; @@ -262,8 +262,8 @@ struct quotafile { int qf_shift; /* primary hash shift */ int qf_maxentries; /* size of hash table (power of 2) */ int qf_entrycnt; /* count of active entries */ - time_t qf_btime; /* block quota time limit */ - time_t qf_itime; /* inode quota time limit */ + u_int32_t qf_btime; /* block quota time limit */ + u_int32_t qf_itime; /* inode quota time limit */ /* the following 2 fields are protected */ /* by the quota list lock */ @@ -340,6 +340,7 @@ struct dquot { #define CHOWN 0x02 /* (advisory) change initiated by chown */ +#ifdef XNU_KERNEL_PRIVATE /* * Functions that manage the in-core dquot and the * on-disk dqblk data structures. @@ -349,7 +350,7 @@ void dqfileinit(struct quotafile *); int dqfileopen(struct quotafile *, int); void dqfileclose(struct quotafile *, int); void dqflush(struct vnode *); -int dqget(u_long, struct quotafile *, int, struct dquot **); +int dqget(u_int32_t, struct quotafile *, int, struct dquot **); void dqhashinit(void); void dqinit(void); int dqisinitialized(void); @@ -366,6 +367,7 @@ void qf_put(struct quotafile *, int type); __private_extern__ void munge_dqblk(struct dqblk *dqblkp, struct user_dqblk *user_dqblkp, boolean_t to64); __END_DECLS +#endif /* XNU_KERNEL_PRIVATE */ #endif /* KERNEL_PRIVATE */ diff --git a/bsd/sys/reboot.h b/bsd/sys/reboot.h index 67b55c786..f79f2a9e2 100644 --- a/bsd/sys/reboot.h +++ b/bsd/sys/reboot.h @@ -80,7 +80,6 @@ #define RB_ASKNAME 0x01 /* ask for file name to reboot from */ #define RB_SINGLE 0x02 /* reboot to single user only */ #define RB_NOSYNC 0x04 /* dont sync before reboot */ -#define RB_KDB 0x04 /* load kernel debugger */ #define RB_HALT 0x08 /* don't reboot, just halt */ #define RB_INITNAME 0x10 /* name given for /etc/init */ #define RB_DFLTROOT 0x20 /* use compiled-in rootdev */ @@ -139,6 +138,8 @@ __BEGIN_DECLS void boot(int, int, char *); __END_DECLS +#define PROC_SHUTDOWN_LOG "/var/log/kernel-shutdown.log" + #endif /* BSD_KERNEL_PRIVATE */ #endif /* _SYS_REBOOT_H_ */ diff --git a/bsd/sys/resource.h b/bsd/sys/resource.h index 747b77b68..85829a914 100644 --- a/bsd/sys/resource.h +++ b/bsd/sys/resource.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2006 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2008 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -73,6 +73,10 @@ * */ #define __need_struct_timeval +#ifdef KERNEL +#define __need_struct_user32_timeval +#define __need_struct_user64_timeval +#endif #include /* The id_t type shall be defined as described in */ @@ -171,37 +175,46 @@ struct rusage { #ifdef KERNEL -#include /* user_time_t */ -/* LP64 version of struct timeval. time_t is a long and must grow when - * we're dealing with a 64-bit process. - * WARNING - keep in sync with struct timeval - */ +struct user64_rusage { + struct user64_timeval ru_utime; /* user time used */ + struct user64_timeval ru_stime; /* system time used */ + user64_long_t ru_maxrss; /* max resident set size */ + user64_long_t ru_ixrss; /* integral shared memory size */ + user64_long_t ru_idrss; /* integral unshared data " */ + user64_long_t ru_isrss; /* integral unshared stack " */ + user64_long_t ru_minflt; /* page reclaims */ + user64_long_t ru_majflt; /* page faults */ + user64_long_t ru_nswap; /* swaps */ + user64_long_t ru_inblock; /* block input operations */ + user64_long_t ru_oublock; /* block output operations */ + user64_long_t ru_msgsnd; /* messages sent */ + user64_long_t ru_msgrcv; /* messages received */ + user64_long_t ru_nsignals; /* signals received */ + user64_long_t ru_nvcsw; /* voluntary context switches */ + user64_long_t ru_nivcsw; /* involuntary " */ +}; -struct user_rusage_timeval { - user_time_t tv_sec; /* seconds */ - __darwin_suseconds_t tv_usec __attribute((aligned(8))); /* and microseconds */ -}; -struct user_rusage { - struct user_rusage_timeval ru_utime; /* user time used */ - struct user_rusage_timeval ru_stime; /* system time used */ - user_long_t ru_maxrss; /* max resident set size */ - user_long_t ru_ixrss; /* integral shared memory size */ - user_long_t ru_idrss; /* integral unshared data " */ - user_long_t ru_isrss; /* integral unshared stack " */ - user_long_t ru_minflt; /* page reclaims */ - user_long_t ru_majflt; /* page faults */ - user_long_t ru_nswap; /* swaps */ - user_long_t ru_inblock; /* block input operations */ - user_long_t ru_oublock; /* block output operations */ - user_long_t ru_msgsnd; /* messages sent */ - user_long_t ru_msgrcv; /* messages received */ - user_long_t ru_nsignals; /* signals received */ - user_long_t ru_nvcsw; /* voluntary context switches */ - user_long_t ru_nivcsw; /* involuntary " */ +struct user32_rusage { + struct user32_timeval ru_utime; /* user time used */ + struct user32_timeval ru_stime; /* system time used */ + user32_long_t ru_maxrss; /* max resident set size */ + user32_long_t ru_ixrss; /* integral shared memory size */ + user32_long_t ru_idrss; /* integral unshared data " */ + user32_long_t ru_isrss; /* integral unshared stack " */ + user32_long_t ru_minflt; /* page reclaims */ + user32_long_t ru_majflt; /* page faults */ + user32_long_t ru_nswap; /* swaps */ + user32_long_t ru_inblock; /* block input operations */ + user32_long_t ru_oublock; /* block output operations */ + user32_long_t ru_msgsnd; /* messages sent */ + user32_long_t ru_msgrcv; /* messages received */ + user32_long_t ru_nsignals; /* signals received */ + user32_long_t ru_nvcsw; /* voluntary context switches */ + user32_long_t ru_nivcsw; /* involuntary " */ }; -#endif // KERNEL +#endif /* KERNEL */ /***** diff --git a/bsd/sys/resourcevar.h b/bsd/sys/resourcevar.h index 588847dc8..eaf7f148b 100644 --- a/bsd/sys/resourcevar.h +++ b/bsd/sys/resourcevar.h @@ -78,11 +78,11 @@ struct pstats { struct uprof { /* profile arguments */ struct uprof *pr_next; /* multiple prof buffers allowed */ caddr_t pr_base; /* buffer base */ - u_long pr_size; /* buffer size */ - u_long pr_off; /* pc offset */ - u_long pr_scale; /* pc scaling */ - u_long pr_addr; /* temp storage for addr until AST */ - u_long pr_ticks; /* temp storage for ticks until AST */ + u_int32_t pr_size; /* buffer size */ + u_int32_t pr_off; /* pc offset */ + u_int32_t pr_scale; /* pc scaling */ + u_int32_t pr_addr; /* temp storage for addr until AST */ + u_int32_t pr_ticks; /* temp storage for ticks until AST */ } p_prof; #define pstat_endzero p_start @@ -127,7 +127,7 @@ struct plimit { (proc_is64bit((p)) ? (p)->p_stats->user_p_prof.pr_ticks \ : (p)->p_stats->p_prof.pr_ticks)) -void addupc_intr(struct proc *p, u_long pc, u_int ticks); +void addupc_intr(struct proc *p, uint32_t pc, u_int ticks); void addupc_task(struct proc *p, user_addr_t pc, u_int ticks); void calcru(struct proc *p, struct timeval *up, struct timeval *sp, struct timeval *ip); diff --git a/bsd/sys/sdt.h b/bsd/sys/sdt.h index 420956bdb..31acf140a 100644 --- a/bsd/sys/sdt.h +++ b/bsd/sys/sdt.h @@ -29,9 +29,15 @@ /* * This is a wrapper header that wraps the mach visible sdt.h header so that - * the header file ends up vidible where software expects it to be. We also + * the header file ends up visible where software expects it to be. We also * do the C/C++ symbol wrapping here, since Mach headers are technically C * interfaces. + * + * Note: The process of adding USDT probes to code is slightly different + * than documented in the "Solaris Dynamic Tracing Guide". + * The DTRACE_PROBE*() macros are not supported on Mac OS X -- instead see + * "BUILDING CODE CONTAINING USDT PROBES" in the dtrace(1) manpage + * */ #include __BEGIN_DECLS diff --git a/bsd/sys/select.h b/bsd/sys/select.h index 0bb8d0594..79237174b 100644 --- a/bsd/sys/select.h +++ b/bsd/sys/select.h @@ -160,6 +160,7 @@ struct selinfo; __BEGIN_DECLS +extern int selwait; void selrecord(proc_t selector, struct selinfo *, void *); void selwakeup(struct selinfo *); void selthreadclear(struct selinfo *); diff --git a/bsd/sys/sem_internal.h b/bsd/sys/sem_internal.h index e59aca2aa..42fd3bffb 100644 --- a/bsd/sys/sem_internal.h +++ b/bsd/sys/sem_internal.h @@ -70,6 +70,34 @@ struct user_semid_ds { __int32_t sem_pad3[4]; /* RESERVED: DO NOT USE! */ }; +#pragma pack(4) +struct user64_semid_ds { + struct ipc_perm sem_perm; /* [XSI] operation permission struct */ + int32_t sem_base; /* 32 bit base ptr for semaphore set */ + unsigned short sem_nsems; /* [XSI] number of sems in set */ + user64_time_t sem_otime; /* [XSI] last operation time */ + int32_t sem_pad1; /* RESERVED: DO NOT USE! */ + user64_time_t sem_ctime; /* [XSI] last change time */ + /* Times measured in secs since */ + /* 00:00:00 GMT, Jan. 1, 1970 */ + int32_t sem_pad2; /* RESERVED: DO NOT USE! */ + int32_t sem_pad3[4]; /* RESERVED: DO NOT USE! */ +}; + +struct user32_semid_ds { + struct ipc_perm sem_perm; /* [XSI] operation permission struct */ + int32_t sem_base; /* 32 bit base ptr for semaphore set */ + unsigned short sem_nsems; /* [XSI] number of sems in set */ + user32_time_t sem_otime; /* [XSI] last operation time */ + int32_t sem_pad1; /* RESERVED: DO NOT USE! */ + user32_time_t sem_ctime; /* [XSI] last change time */ + /* Times measured in secs since */ + /* 00:00:00 GMT, Jan. 1, 1970 */ + int32_t sem_pad2; /* RESERVED: DO NOT USE! */ + int32_t sem_pad3[4]; /* RESERVED: DO NOT USE! */ +}; +#pragma pack() + union user_semun { user_addr_t buf; /* buffer for IPC_STAT & IPC_SET */ user_addr_t array; /* array for GETALL & SETALL */ diff --git a/bsd/sys/shm.h b/bsd/sys/shm.h index 40ac2ea53..e7a3a4b58 100644 --- a/bsd/sys/shm.h +++ b/bsd/sys/shm.h @@ -127,6 +127,8 @@ typedef unsigned short shmatt_t; #define SHM_R (IPC_R) #define SHM_W (IPC_W) +#pragma pack(4) + /* * Technically, we should force all code references to the new structure * definition, not in just the standards conformance case, and leave the @@ -178,6 +180,8 @@ struct __shmid_ds_old { }; #endif /* !__DARWIN_UNIX03 */ +#pragma pack() + #ifndef KERNEL __BEGIN_DECLS diff --git a/bsd/sys/shm_internal.h b/bsd/sys/shm_internal.h index 67b42d87a..86a785e97 100644 --- a/bsd/sys/shm_internal.h +++ b/bsd/sys/shm_internal.h @@ -76,9 +76,7 @@ #include -#if __DARWIN_ALIGN_NATURAL -#pragma options align=natural -#endif +#pragma pack(4) struct user_shmid_ds { struct ipc_perm shm_perm; /* operation permission structure */ @@ -86,15 +84,25 @@ struct user_shmid_ds { pid_t shm_lpid; /* PID of last shared memory op */ pid_t shm_cpid; /* PID of creator */ short shm_nattch; /* number of current attaches */ - time_t shm_atime; /* time of last shmat() */ - time_t shm_dtime; /* time of last shmdt() */ - time_t shm_ctime; /* time of last change by shmctl() */ + user_time_t shm_atime; /* time of last shmat() */ + user_time_t shm_dtime; /* time of last shmdt() */ + user_time_t shm_ctime; /* time of last change by shmctl() */ user_addr_t shm_internal; /* reserved for kernel use */ }; -#if __DARWIN_ALIGN_NATURAL -#pragma options align=reset -#endif +struct user32_shmid_ds { + struct ipc_perm shm_perm; /* operation permission structure */ + uint32_t shm_segsz; /* size of segment in bytes */ + pid_t shm_lpid; /* PID of last shared memory op */ + pid_t shm_cpid; /* PID of creator */ + short shm_nattch; /* number of current attaches */ + uint32_t shm_atime; /* time of last shmat() */ + uint32_t shm_dtime; /* time of last shmdt() */ + uint32_t shm_ctime; /* time of last change by shmctl() */ + user32_addr_t shm_internal; /* reserved for kernel use */ +}; + +#pragma pack() /* * System 5 style catch-all structure for shared memory constants that diff --git a/bsd/sys/signal.h b/bsd/sys/signal.h index 6899927f7..faa6fcc1d 100644 --- a/bsd/sys/signal.h +++ b/bsd/sys/signal.h @@ -184,9 +184,9 @@ union sigval { void *sival_ptr; }; -#define SIGEV_NONE 0 /* No async notification */ -#define SIGEV_SIGNAL 1 /* aio - completion notification */ -#define SIGEV_THREAD 3 /* A notification function will be called to perform notification */ +#define SIGEV_NONE 0 /* No async notification */ +#define SIGEV_SIGNAL 1 /* aio - completion notification */ +#define SIGEV_THREAD 3 /* [NOTIMP] [RTS] call notification function */ struct sigevent { int sigev_notify; /* Notification type */ @@ -198,12 +198,42 @@ struct sigevent { #ifdef BSD_KERNEL_PRIVATE +union user64_sigval { + struct { + uint32_t pad; /* assumes Motorola byte order */ + int32_t sival_int; + } size_equivalent; + user64_addr_t sival_ptr; +}; + +union user32_sigval { + /* Members as suggested by Annex C of POSIX 1003.1b. */ + int32_t sival_int; + user32_addr_t sival_ptr; +}; + union user_sigval { struct { - int pad; /* assumes Motorolla byte order */ - int sival_int; + uint32_t pad; /* assumes Motorola byte order */ + int32_t sival_int; } size_equivalent; - user_addr_t sival_ptr; + user_addr_t sival_ptr; +}; + +struct user64_sigevent { + int sigev_notify; /* Notification type */ + int sigev_signo; /* Signal number */ + union user64_sigval sigev_value; /* Signal value */ + user64_addr_t sigev_notify_function; /* Notify function */ + user64_addr_t sigev_notify_attributes; /* Notify attributes */ +}; + +struct user32_sigevent { + int sigev_notify; /* Notification type */ + int sigev_signo; /* Signal number */ + union user32_sigval sigev_value; /* Signal value */ + user32_addr_t sigev_notify_function; /* Notify function */ + user32_addr_t sigev_notify_attributes; /* Notify attributes */ }; struct user_sigevent { @@ -231,7 +261,7 @@ typedef struct __siginfo { #ifdef BSD_KERNEL_PRIVATE -typedef struct __user_siginfo { +typedef struct user_siginfo { int si_signo; /* signal number */ int si_errno; /* errno association */ int si_code; /* signal code */ @@ -244,6 +274,32 @@ typedef struct __user_siginfo { user_ulong_t pad[7]; /* Reserved for Future Use */ } user_siginfo_t; +typedef struct user64_siginfo { + int si_signo; /* signal number */ + int si_errno; /* errno association */ + int si_code; /* signal code */ + pid_t si_pid; /* sending process */ + uid_t si_uid; /* sender's ruid */ + int si_status; /* exit value */ + user64_addr_t si_addr; /* faulting instruction (see below) */ + union user64_sigval si_value; /* signal value */ + user64_long_t si_band; /* band event for SIGPOLL */ + user64_ulong_t __pad[7]; /* Reserved for Future Use */ +} user64_siginfo_t; + +typedef struct user32_siginfo { + int si_signo; /* signal number */ + int si_errno; /* errno association */ + int si_code; /* signal code */ + pid_t si_pid; /* sending process */ + uid_t si_uid; /* sender's ruid */ + int si_status; /* exit value */ + user32_addr_t si_addr; /* faulting instruction (see below) */ + union user32_sigval si_value; /* signal value */ + user32_long_t si_band; /* band event for SIGPOLL */ + user32_ulong_t __pad[7]; /* Reserved for Future Use */ +} user32_siginfo_t; + #endif /* BSD_KERNEL_PRIVATE */ /* @@ -282,8 +338,8 @@ typedef struct __user_siginfo { #define FPE_FLTRES 4 /* [XSI] floating point inexact result */ #define FPE_FLTINV 5 /* [XSI] invalid floating point operation */ #define FPE_FLTSUB 6 /* [XSI] subscript out of range -NOTIMP */ -#define FPE_INTDIV 7 /* [XSI] integer divide by zero -NOTIMP */ -#define FPE_INTOVF 8 /* [XSI] integer overflow -NOTIMP */ +#define FPE_INTDIV 7 /* [XSI] integer divide by zero */ +#define FPE_INTOVF 8 /* [XSI] integer overflow */ /* Codes for SIGSEGV */ #if !defined(_POSIX_C_SOURCE) || defined(_DARWIN_C_SOURCE) @@ -350,19 +406,55 @@ struct sigaction { #ifdef BSD_KERNEL_PRIVATE #include -union __user_sigaction_u { +union __user32_sigaction_u { + user32_addr_t __sa_handler; + user32_addr_t __sa_sigaction; +}; + +struct user32_sigaction { + union __user32_sigaction_u __sigaction_u; /* signal handler */ + sigset_t sa_mask; /* signal mask to apply */ + int sa_flags; /* see signal options below */ +}; + +struct __user32_sigaction { + union __user32_sigaction_u __sigaction_u; /* signal handler */ + user32_addr_t sa_tramp; + sigset_t sa_mask; /* signal mask to apply */ + int sa_flags; /* see signal options below */ +}; + +union __user64_sigaction_u { + user64_addr_t __sa_handler; + user64_addr_t __sa_sigaction; +}; + +struct user64_sigaction { + union __user64_sigaction_u __sigaction_u; /* signal handler */ + sigset_t sa_mask; /* signal mask to apply */ + int sa_flags; /* see signal options below */ +}; + +struct __user64_sigaction { + union __user64_sigaction_u __sigaction_u; /* signal handler */ + user64_addr_t sa_tramp; /* signal mask to apply */ + sigset_t sa_mask; /* signal mask to apply */ + int sa_flags; /* see signal options below */ +}; + +union __kern_sigaction_u { user_addr_t __sa_handler; user_addr_t __sa_sigaction; }; -struct user_sigaction { - union __user_sigaction_u __sigaction_u; /* signal handler */ +struct kern_sigaction { + union __kern_sigaction_u __sigaction_u; /* signal handler */ sigset_t sa_mask; /* signal mask to apply */ int sa_flags; /* see signal options below */ }; -struct __user_sigaction { - union __user_sigaction_u __sigaction_u; /* signal handler */ +struct __kern_sigaction { + union __kern_sigaction_u __sigaction_u; /* signal handler */ user_addr_t sa_tramp; /* signal mask to apply */ sigset_t sa_mask; /* signal mask to apply */ int sa_flags; /* see signal options below */ @@ -426,10 +518,22 @@ typedef void (*sig_t)(int); /* type of signal function */ */ #ifdef BSD_KERNEL_PRIVATE -struct user_sigaltstack { +struct user32_sigaltstack { + user32_addr_t ss_sp; /* signal stack base */ + user32_size_t ss_size; /* signal stack length */ + int ss_flags; /* SA_DISABLE and/or SA_ONSTACK */ +}; + +struct user64_sigaltstack { + user64_addr_t ss_sp; /* signal stack base */ + user64_size_t ss_size; /* signal stack length */ + int ss_flags; /* SA_DISABLE and/or SA_ONSTACK */ +}; + +struct kern_sigaltstack { user_addr_t ss_sp; /* signal stack base */ user_size_t ss_size; /* signal stack length */ - int ss_flags __attribute((aligned(8))); /* SA_DISABLE and/or SA_ONSTACK */ + int ss_flags; /* SA_DISABLE and/or SA_ONSTACK */ }; #endif /* BSD_KERNEL_PRIVATE */ @@ -484,6 +588,19 @@ struct sigstack { sigmask(SIGFPE)|sigmask(SIGBUS)|\ sigmask(SIGSEGV)|sigmask(SIGSYS)|\ sigmask(SIGPIPE)) + +#define workq_threadmask (threadmask | sigcantmask) + +/* + * Signals carried across exec. + */ +#define execmask (sigmask(SIGHUP)|sigmask(SIGINT)|\ + sigmask(SIGQUIT)|sigmask(SIGKILL)|\ + sigmask(SIGTERM)|sigmask(SIGSTOP)|\ + sigmask(SIGTSTP)|sigmask(SIGCONT)|\ + sigmask(SIGTTIN)|sigmask(SIGTTOU)|\ + sigmask(SIGUSR1)|sigmask(SIGUSR2)) + #endif /* BSD_KERNEL_PRIVATE */ #define BADSIG SIG_ERR diff --git a/bsd/sys/signalvar.h b/bsd/sys/signalvar.h index 1597c85ff..69ff9e15c 100644 --- a/bsd/sys/signalvar.h +++ b/bsd/sys/signalvar.h @@ -87,7 +87,7 @@ struct sigacts { sigset_t ps_siginfo; /* signals that want SA_SIGINFO args */ sigset_t ps_oldmask; /* saved mask from before sigpause */ int ps_flags; /* signal flags, below */ - struct user_sigaltstack ps_sigstk; /* sp, length & flags */ + struct kern_sigaltstack ps_sigstk; /* sp, length & flags */ int ps_sig; /* for core dump/debugger XXX */ int ps_code; /* for core dump/debugger XXX */ int ps_addr; /* for core dump/debugger XXX */ @@ -196,8 +196,7 @@ void execsigs(struct proc *p, thread_t thread); void gsignal(int pgid, int sig); int issignal(struct proc *p); int CURSIG(struct proc *p); -int clear_procsiglist(struct proc *p, int bit); -int clear_procsigmask(struct proc *p, int bit); +int clear_procsiglist(struct proc *p, int bit, int in_signalstart); int set_procsigmask(struct proc *p, int bit); void postsig(int sig); void siginit(struct proc *p) __attribute__((section("__TEXT, initcode"))); @@ -209,7 +208,7 @@ int hassigprop(int sig, int prop); * Machine-dependent functions: */ void sendsig(struct proc *, /*sig_t*/ user_addr_t action, int sig, - int returnmask, u_long code); + int returnmask, uint32_t code); void psignal(struct proc *p, int sig); void psignal_locked(struct proc *, int); diff --git a/bsd/sys/socket.h b/bsd/sys/socket.h index 82af93763..026ec3bb2 100644 --- a/bsd/sys/socket.h +++ b/bsd/sys/socket.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2008 Apple Inc. All rights reserved. + * Copyright (c) 2000-2007 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -72,7 +72,7 @@ #ifndef _SYS_SOCKET_H_ #define _SYS_SOCKET_H_ -#include +#include #include #include @@ -200,6 +200,8 @@ struct iovec { #define SO_RESTRICT_DENYIN 0x00000001 /* flag for SO_RESTRICTIONS - deny inbound */ #define SO_RESTRICT_DENYOUT 0x00000002 /* flag for SO_RESTRICTIONS - deny outbound */ #define SO_RESTRICT_DENYSET 0x80000000 /* flag for SO_RESTRICTIONS - deny has been set */ +#define SO_RANDOMPORT 0x1082 /* APPLE: request local port randomization */ +#define SO_NP_EXTENSIONS 0x1083 /* To turn off some POSIX behavior */ #endif #ifdef PRIVATE #define SO_EXECPATH 0x1085 /* Application Firewall Socket option */ @@ -223,6 +225,27 @@ struct accept_filter_arg { }; #endif +#if !defined(_POSIX_C_SOURCE) || defined(_DARWIN_C_SOURCE) +#ifdef __APPLE__ + +/* + * Structure to control non-portable Sockets extension to POSIX + */ +struct so_np_extensions { + u_int32_t npx_flags; + u_int32_t npx_mask; +}; + +#define SONPX_SETOPTSHUT 0x000000001 /* flag for allowing setsockopt after shutdown */ + + +#ifdef KERNEL_PRIVATE +#define SONPX_MASK_VALID (SONPX_SETOPTSHUT) +#endif + +#endif +#endif + /* * Level number for (get/set)sockopt() to apply to socket itself. */ @@ -293,7 +316,8 @@ struct accept_filter_arg { #ifndef __APPLE__ #define AF_NETGRAPH 32 /* Netgraph sockets */ #endif -#define AF_MAX 37 +#define AF_IEEE80211 37 /* IEEE 802.11 protocol */ +#define AF_MAX 38 #endif /* (!_POSIX_C_SOURCE || _DARWIN_C_SOURCE) */ /* @@ -507,21 +531,52 @@ struct msghdr { }; #ifdef KERNEL -/* LP64 version of struct msghdr. all pointers - * grow when we're dealing with a 64-bit process. - * WARNING - keep in sync with struct msghdr +/* + * In-kernel representation of "struct msghdr" from + * userspace. Has enough precision for 32-bit or + * 64-bit clients, but does not need to be packed. */ struct user_msghdr { user_addr_t msg_name; /* optional address */ socklen_t msg_namelen; /* size of address */ - user_addr_t msg_iov __attribute((aligned(8))); /* scatter/gather array */ + user_addr_t msg_iov; /* scatter/gather array */ + int msg_iovlen; /* # elements in msg_iov */ + user_addr_t msg_control; /* ancillary data, see below */ + socklen_t msg_controllen; /* ancillary data buffer len */ + int msg_flags; /* flags on received message */ +}; + +/* + * LP64 user version of struct msghdr. + * WARNING - keep in sync with struct msghdr + */ + +struct user64_msghdr { + user64_addr_t msg_name; /* optional address */ + socklen_t msg_namelen; /* size of address */ + user64_addr_t msg_iov; /* scatter/gather array */ int msg_iovlen; /* # elements in msg_iov */ - user_addr_t msg_control __attribute((aligned(8))); /* ancillary data, see below */ + user64_addr_t msg_control; /* ancillary data, see below */ socklen_t msg_controllen; /* ancillary data buffer len */ int msg_flags; /* flags on received message */ }; +/* + * ILP32 user version of struct msghdr. + * WARNING - keep in sync with struct msghdr + */ + +struct user32_msghdr { + user32_addr_t msg_name; /* optional address */ + socklen_t msg_namelen; /* size of address */ + user32_addr_t msg_iov; /* scatter/gather array */ + int msg_iovlen; /* # elements in msg_iov */ + user32_addr_t msg_control; /* ancillary data, see below */ + socklen_t msg_controllen; /* ancillary data buffer len */ + int msg_flags; /* flags on received message */ +}; + #endif // KERNEL #define MSG_OOB 0x1 /* process out-of-band data */ @@ -598,15 +653,30 @@ struct cmsgcred { #define CMSG_DATA(cmsg) ((unsigned char *)(cmsg) + \ __DARWIN_ALIGN32(sizeof(struct cmsghdr))) -/* given pointer to struct cmsghdr, return pointer to next cmsghdr */ -#define CMSG_NXTHDR(mhdr, cmsg) \ - (((unsigned char *)(cmsg) + __DARWIN_ALIGN32((uint32_t)(cmsg)->cmsg_len) + \ - __DARWIN_ALIGN32(sizeof(struct cmsghdr)) > \ - (unsigned char *)(mhdr)->msg_control + (mhdr)->msg_controllen) ? \ - (struct cmsghdr *)0L /* NULL */ : \ - (struct cmsghdr *)((unsigned char *)(cmsg) + __DARWIN_ALIGN32((uint32_t)(cmsg)->cmsg_len))) +/* + * RFC 2292 requires to check msg_controllen, in case that the kernel returns + * an empty list for some reasons. + */ +#define CMSG_FIRSTHDR(mhdr) \ + ((mhdr)->msg_controllen >= sizeof(struct cmsghdr) ? \ + (struct cmsghdr *)(mhdr)->msg_control : \ + (struct cmsghdr *)0L) + -#define CMSG_FIRSTHDR(mhdr) ((struct cmsghdr *)(mhdr)->msg_control) +/* + * Given pointer to struct cmsghdr, return pointer to next cmsghdr + * RFC 2292 says that CMSG_NXTHDR(mhdr, NULL) is equivalent to CMSG_FIRSTHDR(mhdr) + */ +#define CMSG_NXTHDR(mhdr, cmsg) \ + ((char *)(cmsg) == (char *)0L ? CMSG_FIRSTHDR(mhdr) : \ + ((((unsigned char *)(cmsg) + \ + __DARWIN_ALIGN32((__uint32_t)(cmsg)->cmsg_len) + \ + __DARWIN_ALIGN32(sizeof(struct cmsghdr))) > \ + ((unsigned char *)(mhdr)->msg_control + \ + (mhdr)->msg_controllen)) ? \ + (struct cmsghdr *)0L /* NULL */ : \ + (struct cmsghdr *)((unsigned char *)(cmsg) + \ + __DARWIN_ALIGN32((__uint32_t)(cmsg)->cmsg_len)))) #if !defined(_POSIX_C_SOURCE) || defined(_DARWIN_C_SOURCE) /* RFC 2292 additions */ @@ -614,7 +684,7 @@ struct cmsgcred { #define CMSG_LEN(l) (__DARWIN_ALIGN32(sizeof(struct cmsghdr)) + (l)) #ifdef KERNEL -#define CMSG_ALIGN(n) __DARWIN_ALIGN(n) +#define CMSG_ALIGN(n) __DARWIN_ALIGN32(n) #endif #endif /* (!_POSIX_C_SOURCE || _DARWIN_C_SOURCE) */ @@ -667,13 +737,29 @@ struct sf_hdtr { #ifdef KERNEL +/* In-kernel representation */ struct user_sf_hdtr { - user_addr_t headers __attribute((aligned(8))); /* pointer to an array of header struct iovec's */ + user_addr_t headers; /* pointer to an array of header struct iovec's */ int hdr_cnt; /* number of header iovec's */ - user_addr_t trailers __attribute((aligned(8))); /* pointer to an array of trailer struct iovec's */ + user_addr_t trailers; /* pointer to an array of trailer struct iovec's */ int trl_cnt; /* number of trailer iovec's */ }; +/* LP64 user version of struct sf_hdtr */ +struct user64_sf_hdtr { + user64_addr_t headers; /* pointer to an array of header struct iovec's */ + int hdr_cnt; /* number of header iovec's */ + user64_addr_t trailers; /* pointer to an array of trailer struct iovec's */ + int trl_cnt; /* number of trailer iovec's */ +}; + +/* ILP32 user version of struct sf_hdtr */ +struct user32_sf_hdtr { + user32_addr_t headers; /* pointer to an array of header struct iovec's */ + int hdr_cnt; /* number of header iovec's */ + user32_addr_t trailers; /* pointer to an array of trailer struct iovec's */ + int trl_cnt; /* number of trailer iovec's */ +}; #endif /* KERNEL */ diff --git a/bsd/sys/socketvar.h b/bsd/sys/socketvar.h index 9f55d37a6..2bd0c593e 100644 --- a/bsd/sys/socketvar.h +++ b/bsd/sys/socketvar.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2007 Apple Inc. All rights reserved. + * Copyright (c) 2000-2008 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -144,7 +144,7 @@ struct socket { short so_timeo; /* connection timeout */ u_short so_error; /* error affecting connection */ pid_t so_pgid; /* pgid for signals */ - u_long so_oobmark; /* chars to oob mark */ + u_int32_t so_oobmark; /* chars to oob mark */ #ifndef __APPLE__ /* We don't support AIO ops */ TAILQ_HEAD(, aiocblist) so_aiojobq; /* AIO ops waiting on socket */ @@ -153,12 +153,12 @@ struct socket { * Variables for socket buffering. */ struct sockbuf { - u_long sb_cc; /* actual chars in buffer */ - u_long sb_hiwat; /* max actual char count */ - u_long sb_mbcnt; /* chars of mbufs used */ - u_long sb_mbmax; /* max chars of mbufs to use */ - u_long sb_ctl; /* non-data chars in buffer */ - u_long sb_lowat; /* low water mark */ + u_int32_t sb_cc; /* actual chars in buffer */ + u_int32_t sb_hiwat; /* max actual char count */ + u_int32_t sb_mbcnt; /* chars of mbufs used */ + u_int32_t sb_mbmax; /* max chars of mbufs to use */ + u_int32_t sb_ctl; /* non-data chars in buffer */ + u_int32_t sb_lowat; /* low water mark */ struct mbuf *sb_mb; /* the mbuf chain */ struct mbuf *sb_mbtail; /* the last mbuf in the chain */ struct mbuf *sb_lastrecord; /* first mbuf of last record */ @@ -172,6 +172,7 @@ struct socket { void *reserved1[4]; /* for future use */ } so_rcv, so_snd; #define SB_MAX (8192*1024) /* default for max chars in sockbuf */ +#define LOW_SB_MAX (2*9*1024) /* lower limit on max socket buffer size, 2 max datagrams */ #define SB_LOCK 0x01 /* lock on data queue */ #define SB_WANT 0x02 /* someone is waiting to lock */ #define SB_WAIT 0x04 /* someone is waiting for data/space */ @@ -209,13 +210,13 @@ struct socket { int cached_in_sock_layer; /* bundled with pcb/pcb.inp_ppcb? */ struct socket *cache_next; struct socket *cache_prev; - u_long cache_timestamp; + u_int32_t cache_timestamp; caddr_t so_saved_pcb; /* Saved pcb when cacheing */ struct mbuf *so_temp; /* Holding area for outbound frags */ /* Plug-in support - make the socket interface overridable */ struct mbuf *so_tail; struct socket_filter_entry *so_filt; /* NKE hook */ - u_long so_flags; /* Flags */ + u_int32_t so_flags; /* Flags */ #define SOF_NOSIGPIPE 0x1 #define SOF_NOADDRAVAIL 0x2 /* EADDRNOTAVAIL if src addr is gone */ #define SOF_PCBCLEARING 0x4 /* pru_disconnect done; don't call pru_detach */ @@ -230,6 +231,8 @@ struct socket { #define SOF_NOTIFYCONFLICT 0x400 /* notify that a bind was done on a port already in use */ #endif #define SOF_UPCALLCLOSEWAIT 0x800 /* block on close until an upcall returns */ +#define SOF_BINDRANDOMPORT 0x1000 /* Request a randomized port number for the bind */ +#define SOF_NPX_SETOPTSHUT 0x2000 /* Non POSIX extension to allow setsockopt(2) after shut down */ int so_usecount; /* refcounting of socket use */; int so_retaincnt; u_int32_t so_filteruse; /* usecount for the socket filters */ @@ -239,9 +242,9 @@ struct socket { /* for debug pruposes */ #define SO_LCKDBG_MAX 4 /* number of debug locking Link Registers recorded */ - u_int32_t lock_lr[SO_LCKDBG_MAX]; /* locking calling history */ + void *lock_lr[SO_LCKDBG_MAX]; /* locking calling history */ int next_lock_lr; - u_int32_t unlock_lr[SO_LCKDBG_MAX]; /* unlocking caller history */ + void *unlock_lr[SO_LCKDBG_MAX]; /* unlocking caller history */ int next_unlock_lr; void *reserved; /* reserved for future use */ #endif /* __APPLE__ */ @@ -281,27 +284,7 @@ struct socket { #pragma pack(4) -/* - * Externalized form of struct socket used by the sysctl(3) interface. - */ -struct xsocket { - u_int32_t xso_len; /* length of this structure */ - _XSOCKET_PTR(struct socket *) xso_so; /* makes a convenient handle */ - short so_type; - short so_options; - short so_linger; - short so_state; - _XSOCKET_PTR(caddr_t) so_pcb; /* another convenient handle */ - int xso_protocol; - int xso_family; - short so_qlen; - short so_incqlen; - short so_qlimit; - short so_timeo; - u_short so_error; - pid_t so_pgid; - u_int32_t so_oobmark; - struct xsockbuf { +struct xsockbuf { u_int32_t sb_cc; u_int32_t sb_hiwat; u_int32_t sb_mbcnt; @@ -309,10 +292,59 @@ struct xsocket { int32_t sb_lowat; short sb_flags; short sb_timeo; - } so_rcv, so_snd; - uid_t so_uid; /* XXX */ }; +/* + * Externalized form of struct socket used by the sysctl(3) interface. + */ +struct xsocket { + u_int32_t xso_len; /* length of this structure */ + _XSOCKET_PTR(struct socket *) xso_so; /* makes a convenient handle */ + short so_type; + short so_options; + short so_linger; + short so_state; + _XSOCKET_PTR(caddr_t) so_pcb; /* another convenient handle */ + int xso_protocol; + int xso_family; + short so_qlen; + short so_incqlen; + short so_qlimit; + short so_timeo; + u_short so_error; + pid_t so_pgid; + u_int32_t so_oobmark; + struct xsockbuf so_rcv; + struct xsockbuf so_snd; + uid_t so_uid; /* XXX */ +}; + +#if !CONFIG_EMBEDDED + +struct xsocket64 { + u_int32_t xso_len; /* length of this structure */ + u_int64_t xso_so; /* makes a convenient handle */ + short so_type; + short so_options; + short so_linger; + short so_state; + u_int64_t so_pcb; /* another convenient handle */ + int xso_protocol; + int xso_family; + short so_qlen; + short so_incqlen; + short so_qlimit; + short so_timeo; + u_short so_error; + pid_t so_pgid; + u_int32_t so_oobmark; + struct xsockbuf so_rcv; + struct xsockbuf so_snd; + uid_t so_uid; /* XXX */ +}; + +#endif /* !CONFIG_EMBEDDED */ + #pragma pack() #ifdef KERNEL_PRIVATE @@ -330,7 +362,7 @@ struct xsocket { __BEGIN_DECLS int sb_notify(struct sockbuf *sb); -long sbspace(struct sockbuf *sb); +int sbspace(struct sockbuf *sb); int sosendallatonce(struct socket *so); int soreadable(struct socket *so); int sowriteable(struct socket *so); @@ -393,7 +425,7 @@ MALLOC_DECLARE(M_SONAME); #endif extern int maxsockets; -extern u_long sb_max; +extern u_int32_t sb_max; extern int socket_zone; extern so_gen_t so_gencnt; extern int socket_debug; @@ -451,7 +483,7 @@ extern void sbdroprecord(struct sockbuf *sb); extern void sbflush(struct sockbuf *sb); extern int sbinsertoob(struct sockbuf *sb, struct mbuf *m0); extern void sbrelease(struct sockbuf *sb); -extern int sbreserve(struct sockbuf *sb, u_long cc); +extern int sbreserve(struct sockbuf *sb, u_int32_t cc); extern void sbtoxsockbuf(struct sockbuf *sb, struct xsockbuf *xsb); extern int sbwait(struct sockbuf *sb); extern int sb_lock(struct sockbuf *sb); @@ -493,6 +525,8 @@ extern int sooptcopyout(struct sockopt *sopt, void *data, size_t len); extern int socket_lock(struct socket *so, int refcount); extern int socket_unlock(struct socket *so, int refcount); extern void sofreelastref(struct socket *, int); +extern int sogetaddr_locked(struct socket *, struct sockaddr **, int); +extern const char *solockhistory_nr(struct socket *); /* * XXX; prepare mbuf for (__FreeBSD__ < 3) routines. @@ -505,7 +539,7 @@ extern int soopt_mcopyout(struct sockopt *sopt, struct mbuf *m); extern int sopoll(struct socket *so, int events, struct ucred *cred, void *wql); extern int soreceive(struct socket *so, struct sockaddr **paddr, struct uio *uio, struct mbuf **mp0, struct mbuf **controlp, int *flagsp); -extern int soreserve(struct socket *so, u_long sndcc, u_long rcvcc); +extern int soreserve(struct socket *so, u_int32_t sndcc, u_int32_t rcvcc); extern void sorflush(struct socket *so); extern int sosend(struct socket *so, struct sockaddr *addr, struct uio *uio, struct mbuf *top, struct mbuf *control, int flags); @@ -514,6 +548,9 @@ extern int sosetopt(struct socket *so, struct sockopt *sopt); extern int soshutdown(struct socket *so, int how); extern int soshutdownlock(struct socket *so, int how); extern void sotoxsocket(struct socket *so, struct xsocket *xso); +#if !CONFIG_EMBEDDED +extern void sotoxsocket64(struct socket *so, struct xsocket64 *xso); +#endif extern void sowakeup(struct socket *so, struct sockbuf *sb); extern int soioctl(struct socket *so, u_long cmd, caddr_t data, struct proc *p); diff --git a/bsd/sys/sockio.h b/bsd/sys/sockio.h index 405c8088b..4a7700e0b 100644 --- a/bsd/sys/sockio.h +++ b/bsd/sys/sockio.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2007 Apple Inc. All rights reserved. + * Copyright (c) 2000-2008 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -77,11 +77,6 @@ #define SIOCSPGRP _IOW('s', 8, int) /* set process group */ #define SIOCGPGRP _IOR('s', 9, int) /* get process group */ -#if 0 -#define SIOCADDRT _IOW('r', 10, struct ortentry) /* add route */ -#define SIOCDELRT _IOW('r', 11, struct ortentry) /* delete route */ -#endif - /* * OSIOCGIF* ioctls are deprecated; they are kept for binary compatibility. */ @@ -101,6 +96,8 @@ #define SIOCSIFBRDADDR _IOW('i', 19, struct ifreq) /* set broadcast addr */ #ifdef KERNEL_PRIVATE #define OSIOCGIFCONF _IOWR('i', 20, struct ifconf) /* deprecated */ +#define OSIOCGIFCONF32 _IOWR('i', 20, struct ifconf32) /* deprecated */ +#define OSIOCGIFCONF64 _IOWR('i', 20, struct ifconf64) /* deprecated */ #define OSIOCGIFNETMASK _IOWR('i', 21, struct ifreq) /* deprecated */ #endif /* KERNEL_PRIVATE */ #define SIOCSIFNETMASK _IOW('i', 22, struct ifreq) /* set net addr mask */ @@ -118,10 +115,13 @@ #define SIOCGIFADDR _IOWR('i', 33, struct ifreq) /* get ifnet address */ #define SIOCGIFDSTADDR _IOWR('i', 34, struct ifreq) /* get p-p address */ #define SIOCGIFBRDADDR _IOWR('i', 35, struct ifreq) /* get broadcast addr */ +#if !defined(KERNEL) || defined(KERNEL_PRIVATE) #define SIOCGIFCONF _IOWR('i', 36, struct ifconf) /* get ifnet list */ +#endif /* !KERNEL || KERNEL_PRIVATE */ #ifdef KERNEL_PRIVATE +#define SIOCGIFCONF32 _IOWR('i', 36, struct ifconf32) /* get ifnet list */ #define SIOCGIFCONF64 _IOWR('i', 36, struct ifconf64) /* get ifnet list */ -#endif KERNEL_PRIVATE +#endif /* KERNEL_PRIVATE */ #define SIOCGIFNETMASK _IOWR('i', 37, struct ifreq) /* get net addr mask */ #define SIOCAUTOADDR _IOWR('i', 38, struct ifreq) /* autoconf address */ #define SIOCAUTONETMASK _IOW('i', 39, struct ifreq) /* autoconf netmask */ @@ -137,8 +137,9 @@ #define SIOCSIFMEDIA _IOWR('i', 55, struct ifreq) /* set net media */ #define SIOCGIFMEDIA _IOWR('i', 56, struct ifmediareq) /* get net media */ #ifdef KERNEL_PRIVATE +#define SIOCGIFMEDIA32 _IOWR('i', 56, struct ifmediareq32) /* get net media */ #define SIOCGIFMEDIA64 _IOWR('i', 56, struct ifmediareq64) /* get net media (64-bit) */ -#endif KERNEL_PRIVATE +#endif /* KERNEL_PRIVATE */ #define SIOCSIFGENERIC _IOW('i', 57, struct ifreq) /* generic IF set op */ #define SIOCGIFGENERIC _IOWR('i', 58, struct ifreq) /* generic IF get op */ #define SIOCRSLVMULTI _IOWR('i', 59, struct rslvmulti_req) @@ -165,13 +166,14 @@ #define SIOCGETVLAN SIOCGIFVLAN #ifdef KERNEL_PRIVATE #define SIOCSIFDEVMTU SIOCSIFALTMTU /* deprecated */ -#endif KERNEL_PRIVATE +#endif /* KERNEL_PRIVATE */ #ifdef PRIVATE #ifdef KERNEL #define SIOCIFGCLONERS _IOWR('i', 129, struct if_clonereq) /* get cloners */ +#define SIOCIFGCLONERS32 _IOWR('i', 129, struct if_clonereq32) /* get cloners */ #define SIOCIFGCLONERS64 _IOWR('i', 129, struct if_clonereq64) /* get cloners */ -#endif KERNEL +#endif /* KERNEL */ /* * temporary control calls to attach/detach IP to/from an ethernet interface @@ -193,4 +195,6 @@ #define SIOCSIFKPI _IOW('i', 134, struct ifreq) /* set interface kext param - root only */ #define SIOCGIFKPI _IOWR('i', 135, struct ifreq) /* get interface kext param */ +#define SIOCGIFWAKEFLAGS _IOWR('i', 136, struct ifreq) /* get interface wake property flags */ + #endif /* !_SYS_SOCKIO_H_ */ diff --git a/bsd/sys/spawn.h b/bsd/sys/spawn.h index c9f9e018f..f54fcc396 100644 --- a/bsd/sys/spawn.h +++ b/bsd/sys/spawn.h @@ -59,6 +59,17 @@ #define POSIX_SPAWN_SETEXEC 0x0040 #define POSIX_SPAWN_START_SUSPENDED 0x0080 +/* + * Possible values to be set for the process control actions on resource starvation. + * POSIX_SPAWN_PCONTROL_THROTTLE indicates that the process is to be throttled on starvation. + * POSIX_SPAWN_PCONTROL_SUSPEND indicates that the process is to be suspended on starvation. + * POSIX_SPAWN_PCONTROL_KILL indicates that the process is to be terminated on starvation. + */ +#define POSIX_SPAWN_PCONTROL_NONE 0x0000 +#define POSIX_SPAWN_PCONTROL_THROTTLE 0x0001 +#define POSIX_SPAWN_PCONTROL_SUSPEND 0x0002 +#define POSIX_SPAWN_PCONTROL_KILL 0x0003 + #endif /* (!_POSIX_C_SOURCE || _DARWIN_C_SOURCE) */ #endif /* _SYS_SPAWN_H_ */ diff --git a/bsd/sys/spawn_internal.h b/bsd/sys/spawn_internal.h index fe3524b84..0e8943947 100644 --- a/bsd/sys/spawn_internal.h +++ b/bsd/sys/spawn_internal.h @@ -53,6 +53,7 @@ typedef enum { PSPA_SPECIAL = 0, PSPA_EXCEPTION = 1, + PSPA_AU_SESSION = 2, } pspa_t; /* @@ -99,6 +100,7 @@ typedef struct _posix_spawnattr { pid_t psa_pgroup; /* pgroup to spawn into */ cpu_type_t psa_binprefs[NBINPREFS]; /* cpu affinity prefs*/ _posix_spawn_port_actions_t psa_ports; /* special/exception ports */ + int psa_pcontrol; /* process control bits on resource starvation */ } *_posix_spawnattr_t; @@ -200,6 +202,15 @@ struct _posix_spawn_args_desc { #pragma options align=natural #endif +struct user32__posix_spawn_args_desc { + uint32_t attr_size; /* size of attributes block */ + uint32_t attrp; /* pointer to block */ + uint32_t file_actions_size; /* size of file actions block */ + uint32_t file_actions; /* pointer to block */ + uint32_t port_actions_size; /* size of port actions block */ + uint32_t port_actions; /* pointer to block */ +}; + struct user__posix_spawn_args_desc { user_size_t attr_size; /* size of attributes block */ user_addr_t attrp; /* pointer to block */ diff --git a/bsd/sys/stat.h b/bsd/sys/stat.h index ab921b8ae..bcc8b79b4 100644 --- a/bsd/sys/stat.h +++ b/bsd/sys/stat.h @@ -81,7 +81,8 @@ /* [XSI] The timespec structure may be defined as described in */ #define __need_struct_timespec #ifdef KERNEL -#define __need_struct_user_timespec +#define __need_struct_user64_timespec +#define __need_struct_user32_timespec #endif /* KERNEL */ #include @@ -276,12 +277,13 @@ struct stat64 __DARWIN_STRUCT_STAT64; #ifdef KERNEL +#ifdef BSD_KERNEL_PRIVATE /* LP64 version of struct stat. time_t (see timespec) is a long and must * grow when we're dealing with a 64-bit process. * WARNING - keep in sync with struct stat */ -struct user_stat { +struct user64_stat { dev_t st_dev; /* [XSI] ID of device containing file */ ino_t st_ino; /* [XSI] File serial number */ mode_t st_mode; /* [XSI] Mode of file (see below) */ @@ -290,16 +292,16 @@ struct user_stat { gid_t st_gid; /* [XSI] Group ID of the file */ dev_t st_rdev; /* [XSI] Device ID */ #if !defined(_POSIX_C_SOURCE) || defined(_DARWIN_C_SOURCE) - struct user_timespec st_atimespec; /* time of last access */ - struct user_timespec st_mtimespec; /* time of last data modification */ - struct user_timespec st_ctimespec; /* time of last status change */ + struct user64_timespec st_atimespec; /* time of last access */ + struct user64_timespec st_mtimespec; /* time of last data modification */ + struct user64_timespec st_ctimespec; /* time of last status change */ #else - user_time_t st_atime; /* [XSI] Time of last access */ - __int64_t st_atimensec; /* nsec of last access */ - user_time_t st_mtime; /* [XSI] Last data modification */ - __int64_t st_mtimensec; /* last data modification nsec */ - user_time_t st_ctime; /* [XSI] Time of last status change */ - __int64_t st_ctimensec; /* nsec of last status change */ + user64_time_t st_atime; /* [XSI] Time of last access */ + user64_long_t st_atimensec; /* nsec of last access */ + user64_time_t st_mtime; /* [XSI] Last data modification */ + user64_long_t st_mtimensec; /* last data modification nsec */ + user64_time_t st_ctime; /* [XSI] Time of last status change */ + user64_long_t st_ctimensec; /* nsec of last status change */ #endif off_t st_size; /* [XSI] File size, in bytes */ blkcnt_t st_blocks; /* [XSI] Blocks allocated for file */ @@ -310,10 +312,44 @@ struct user_stat { __int64_t st_qspare[2]; /* RESERVED: DO NOT USE! */ }; -extern void munge_stat(struct stat *sbp, struct user_stat *usbp); +/* ILP32 version of struct stat. + * WARNING - keep in sync with struct stat + */ + +struct user32_stat { + dev_t st_dev; /* [XSI] ID of device containing file */ + ino_t st_ino; /* [XSI] File serial number */ + mode_t st_mode; /* [XSI] Mode of file (see below) */ + nlink_t st_nlink; /* [XSI] Number of hard links */ + uid_t st_uid; /* [XSI] User ID of the file */ + gid_t st_gid; /* [XSI] Group ID of the file */ + dev_t st_rdev; /* [XSI] Device ID */ +#if !defined(_POSIX_C_SOURCE) || defined(_DARWIN_C_SOURCE) + struct user32_timespec st_atimespec; /* time of last access */ + struct user32_timespec st_mtimespec; /* time of last data modification */ + struct user32_timespec st_ctimespec; /* time of last status change */ +#else + user32_time_t st_atime; /* [XSI] Time of last access */ + user32_long_t st_atimensec; /* nsec of last access */ + user32_time_t st_mtime; /* [XSI] Last data modification */ + user32_long_t st_mtimensec; /* last data modification nsec */ + user32_time_t st_ctime; /* [XSI] Time of last status change */ + user32_long_t st_ctimensec; /* nsec of last status change */ +#endif + off_t st_size; /* [XSI] File size, in bytes */ + blkcnt_t st_blocks; /* [XSI] Blocks allocated for file */ + blksize_t st_blksize; /* [XSI] Optimal blocksize for I/O */ + __uint32_t st_flags; /* user defined flags for file */ + __uint32_t st_gen; /* file generation number */ + __int32_t st_lspare; /* RESERVED: DO NOT USE! */ + __int64_t st_qspare[2]; /* RESERVED: DO NOT USE! */ +}; + +extern void munge_user64_stat(struct stat *sbp, struct user64_stat *usbp); +extern void munge_user32_stat(struct stat *sbp, struct user32_stat *usbp); -struct user_stat64 { +struct user64_stat64 { dev_t st_dev; /* [XSI] ID of device containing file */ mode_t st_mode; /* [XSI] Mode of file (see below) */ nlink_t st_nlink; /* [XSI] Number of hard links */ @@ -322,19 +358,19 @@ struct user_stat64 { gid_t st_gid; /* [XSI] Group ID of the file */ dev_t st_rdev; /* [XSI] Device ID */ #ifndef _POSIX_C_SOURCE - struct user_timespec st_atimespec; /* time of last access */ - struct user_timespec st_mtimespec; /* time of last data modification */ - struct user_timespec st_ctimespec; /* time of last status change */ - struct user_timespec st_birthtimespec; /* time of file creation(birth) */ + struct user64_timespec st_atimespec; /* time of last access */ + struct user64_timespec st_mtimespec; /* time of last data modification */ + struct user64_timespec st_ctimespec; /* time of last status change */ + struct user64_timespec st_birthtimespec; /* time of file creation(birth) */ #else - user_time_t st_atime; /* [XSI] Time of last access */ - __int64_t st_atimensec; /* nsec of last access */ - user_time_t st_mtime; /* [XSI] Last data modification time */ - __int64_t st_mtimensec; /* last data modification nsec */ - user_time_t st_ctime; /* [XSI] Time of last status change */ - __int64_t st_ctimensec; /* nsec of last status change */ - user_time_t st_birthtime; /* File creation time(birth) */ - __int64_t st_birthtimensec; /* nsec of File creation time */ + user64_time_t st_atime; /* [XSI] Time of last access */ + user64_long_t st_atimensec; /* nsec of last access */ + user64_time_t st_mtime; /* [XSI] Last data modification time */ + user64_long_t st_mtimensec; /* last data modification nsec */ + user64_time_t st_ctime; /* [XSI] Time of last status change */ + user64_long_t st_ctimensec; /* nsec of last status change */ + user64_time_t st_birthtime; /* File creation time(birth) */ + user64_long_t st_birthtimensec; /* nsec of File creation time */ #endif off_t st_size; /* [XSI] file size, in bytes */ blkcnt_t st_blocks; /* [XSI] blocks allocated for file */ @@ -345,9 +381,44 @@ struct user_stat64 { __int64_t st_qspare[2]; /* RESERVED: DO NOT USE! */ }; -extern void munge_stat64(struct stat64 *sbp, struct user_stat64 *usbp); +struct user32_stat64 { + dev_t st_dev; /* [XSI] ID of device containing file */ + mode_t st_mode; /* [XSI] Mode of file (see below) */ + nlink_t st_nlink; /* [XSI] Number of hard links */ + ino64_t st_ino; /* [XSI] File serial number */ + uid_t st_uid; /* [XSI] User ID of the file */ + gid_t st_gid; /* [XSI] Group ID of the file */ + dev_t st_rdev; /* [XSI] Device ID */ +#ifndef _POSIX_C_SOURCE + struct user32_timespec st_atimespec; /* time of last access */ + struct user32_timespec st_mtimespec; /* time of last data modification */ + struct user32_timespec st_ctimespec; /* time of last status change */ + struct user32_timespec st_birthtimespec; /* time of file creation(birth) */ +#else + user32_time_t st_atime; /* [XSI] Time of last access */ + user32_long_t st_atimensec; /* nsec of last access */ + user32_time_t st_mtime; /* [XSI] Last data modification time */ + user32_long_t st_mtimensec; /* last data modification nsec */ + user32_time_t st_ctime; /* [XSI] Time of last status change */ + user32_long_t st_ctimensec; /* nsec of last status change */ + user32_time_t st_birthtime; /* File creation time(birth) */ + user32_long_t st_birthtimensec; /* nsec of File creation time */ +#endif + off_t st_size; /* [XSI] file size, in bytes */ + blkcnt_t st_blocks; /* [XSI] blocks allocated for file */ + blksize_t st_blksize; /* [XSI] optimal blocksize for I/O */ + __uint32_t st_flags; /* user defined flags for file */ + __uint32_t st_gen; /* file generation number */ + __uint32_t st_lspare; /* RESERVED: DO NOT USE! */ + __int64_t st_qspare[2]; /* RESERVED: DO NOT USE! */ +} __attribute__((packed,aligned(4))); + +extern void munge_user64_stat64(struct stat64 *sbp, struct user64_stat64 *usbp); +extern void munge_user32_stat64(struct stat64 *sbp, struct user32_stat64 *usbp); -#endif // KERNEL +#endif /* BSD_KERNEL_PRIVATE */ + +#endif /* KERNEL */ #if !defined(_POSIX_C_SOURCE) || defined(_DARWIN_C_SOURCE) @@ -373,7 +444,6 @@ extern void munge_stat64(struct stat64 *sbp, struct user_stat64 *usbp); #define S_IFSOCK 0140000 /* [XSI] socket */ #if !defined(_POSIX_C_SOURCE) || defined(_DARWIN_C_SOURCE) #define S_IFWHT 0160000 /* whiteout */ -#define S_IFXATTR 0200000 /* extended attribute */ #endif /* File mode */ @@ -411,16 +481,15 @@ extern void munge_stat64(struct stat64 *sbp, struct user_stat64 *usbp); * of st_mode from a stat structure. The macro shall evaluate to a non-zero * value if the test is true; 0 if the test is false. */ -#define S_ISBLK(m) (((m) & 0170000) == 0060000) /* block special */ -#define S_ISCHR(m) (((m) & 0170000) == 0020000) /* char special */ -#define S_ISDIR(m) (((m) & 0170000) == 0040000) /* directory */ -#define S_ISFIFO(m) (((m) & 0170000) == 0010000) /* fifo or socket */ -#define S_ISREG(m) (((m) & 0170000) == 0100000) /* regular file */ -#define S_ISLNK(m) (((m) & 0170000) == 0120000) /* symbolic link */ -#define S_ISSOCK(m) (((m) & 0170000) == 0140000) /* socket */ +#define S_ISBLK(m) (((m) & S_IFMT) == S_IFBLK) /* block special */ +#define S_ISCHR(m) (((m) & S_IFMT) == S_IFCHR) /* char special */ +#define S_ISDIR(m) (((m) & S_IFMT) == S_IFDIR) /* directory */ +#define S_ISFIFO(m) (((m) & S_IFMT) == S_IFIFO) /* fifo or socket */ +#define S_ISREG(m) (((m) & S_IFMT) == S_IFREG) /* regular file */ +#define S_ISLNK(m) (((m) & S_IFMT) == S_IFLNK) /* symbolic link */ +#define S_ISSOCK(m) (((m) & S_IFMT) == S_IFSOCK) /* socket */ #if !defined(_POSIX_C_SOURCE) || defined(_DARWIN_C_SOURCE) -#define S_ISWHT(m) (((m) & 0170000) == 0160000) /* whiteout */ -#define S_ISXATTR(m) (((m) & 0200000) == 0200000) /* extended attribute */ +#define S_ISWHT(m) (((m) & S_IFMT) == S_IFWHT) /* whiteout */ #endif /* @@ -483,7 +552,8 @@ extern void munge_stat64(struct stat64 *sbp, struct user_stat64 *usbp); * in Mac OS X. */ /* #define UF_NOUNLINK 0x00000010 */ /* file may not be removed or renamed */ -/* Bits 0x0020 through 0x4000 are currently undefined. */ +#define UF_COMPRESSED 0x00000020 /* file is hfs-compressed */ +/* Bits 0x0040 through 0x4000 are currently undefined. */ #define UF_HIDDEN 0x00008000 /* hint that this item should not be */ /* displayed in a GUI */ /* @@ -493,6 +563,7 @@ extern void munge_stat64(struct stat64 *sbp, struct user_stat64 *usbp); #define SF_ARCHIVED 0x00010000 /* file is archived */ #define SF_IMMUTABLE 0x00020000 /* file may not be changed */ #define SF_APPEND 0x00040000 /* writes to file may only append */ + /* * The following two bits are reserved for FreeBSD. They are not * implemented in Mac OS X. diff --git a/bsd/sys/syscall.h b/bsd/sys/syscall.h deleted file mode 100644 index 8dd2a6b20..000000000 --- a/bsd/sys/syscall.h +++ /dev/null @@ -1,471 +0,0 @@ -/* - * Copyright (c) 2004-2007 Apple Inc. All rights reserved. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ - * - * This file contains Original Code and/or Modifications of Original Code - * as defined in and that are subject to the Apple Public Source License - * Version 2.0 (the 'License'). You may not use this file except in - * compliance with the License. The rights granted to you under the License - * may not be used to create, or enable the creation or redistribution of, - * unlawful or unlicensed copies of an Apple operating system, or to - * circumvent, violate, or enable the circumvention or violation of, any - * terms of an Apple operating system software license agreement. - * - * Please obtain a copy of the License at - * http://www.opensource.apple.com/apsl/ and read it before using this file. - * - * The Original Code and all software distributed under the License are - * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER - * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, - * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. - * Please see the License for the specific language governing rights and - * limitations under the License. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ - * - * - * System call switch table. - * - * DO NOT EDIT-- this file is automatically generated. - * created from syscalls.master - */ - -#ifndef _SYS_SYSCALL_H_ -#define _SYS_SYSCALL_H_ - -#include -#ifdef __APPLE_API_PRIVATE -#define SYS_syscall 0 -#define SYS_exit 1 -#define SYS_fork 2 -#define SYS_read 3 -#define SYS_write 4 -#define SYS_open 5 -#define SYS_close 6 -#define SYS_wait4 7 - /* 8 old creat */ -#define SYS_link 9 -#define SYS_unlink 10 - /* 11 old execv */ -#define SYS_chdir 12 -#define SYS_fchdir 13 -#define SYS_mknod 14 -#define SYS_chmod 15 -#define SYS_chown 16 -#define SYS_obreak 17 -#define SYS_ogetfsstat 18 -#define SYS_getfsstat 18 - /* 19 old lseek */ -#define SYS_getpid 20 - /* 21 old mount */ - /* 22 old umount */ -#define SYS_setuid 23 -#define SYS_getuid 24 -#define SYS_geteuid 25 -#define SYS_ptrace 26 -#define SYS_recvmsg 27 -#define SYS_sendmsg 28 -#define SYS_recvfrom 29 -#define SYS_accept 30 -#define SYS_getpeername 31 -#define SYS_getsockname 32 -#define SYS_access 33 -#define SYS_chflags 34 -#define SYS_fchflags 35 -#define SYS_sync 36 -#define SYS_kill 37 - /* 38 old stat */ -#define SYS_getppid 39 - /* 40 old lstat */ -#define SYS_dup 41 -#define SYS_pipe 42 -#define SYS_getegid 43 -#define SYS_profil 44 - /* 45 old ktrace */ -#define SYS_sigaction 46 -#define SYS_getgid 47 -#define SYS_sigprocmask 48 -#define SYS_getlogin 49 -#define SYS_setlogin 50 -#define SYS_acct 51 -#define SYS_sigpending 52 -#define SYS_sigaltstack 53 -#define SYS_ioctl 54 -#define SYS_reboot 55 -#define SYS_revoke 56 -#define SYS_symlink 57 -#define SYS_readlink 58 -#define SYS_execve 59 -#define SYS_umask 60 -#define SYS_chroot 61 - /* 62 old fstat */ - /* 63 used internally , reserved */ - /* 64 old getpagesize */ -#define SYS_msync 65 -#define SYS_vfork 66 - /* 67 old vread */ - /* 68 old vwrite */ -#define SYS_sbrk 69 -#define SYS_sstk 70 - /* 71 old mmap */ -#define SYS_ovadvise 72 -#define SYS_munmap 73 -#define SYS_mprotect 74 -#define SYS_madvise 75 - /* 76 old vhangup */ - /* 77 old vlimit */ -#define SYS_mincore 78 -#define SYS_getgroups 79 -#define SYS_setgroups 80 -#define SYS_getpgrp 81 -#define SYS_setpgid 82 -#define SYS_setitimer 83 - /* 84 old wait */ -#define SYS_swapon 85 -#define SYS_getitimer 86 - /* 87 old gethostname */ - /* 88 old sethostname */ -#define SYS_getdtablesize 89 -#define SYS_dup2 90 - /* 91 old getdopt */ -#define SYS_fcntl 92 -#define SYS_select 93 - /* 94 old setdopt */ -#define SYS_fsync 95 -#define SYS_setpriority 96 -#define SYS_socket 97 -#define SYS_connect 98 - /* 99 old accept */ -#define SYS_getpriority 100 - /* 101 old send */ - /* 102 old recv */ - /* 103 old sigreturn */ -#define SYS_bind 104 -#define SYS_setsockopt 105 -#define SYS_listen 106 - /* 107 old vtimes */ - /* 108 old sigvec */ - /* 109 old sigblock */ - /* 110 old sigsetmask */ -#define SYS_sigsuspend 111 - /* 112 old sigstack */ - /* 113 old recvmsg */ - /* 114 old sendmsg */ - /* 115 old vtrace */ -#define SYS_gettimeofday 116 -#define SYS_getrusage 117 -#define SYS_getsockopt 118 - /* 119 old resuba */ -#define SYS_readv 120 -#define SYS_writev 121 -#define SYS_settimeofday 122 -#define SYS_fchown 123 -#define SYS_fchmod 124 - /* 125 old recvfrom */ -#define SYS_setreuid 126 -#define SYS_setregid 127 -#define SYS_rename 128 - /* 129 old truncate */ - /* 130 old ftruncate */ -#define SYS_flock 131 -#define SYS_mkfifo 132 -#define SYS_sendto 133 -#define SYS_shutdown 134 -#define SYS_socketpair 135 -#define SYS_mkdir 136 -#define SYS_rmdir 137 -#define SYS_utimes 138 -#define SYS_futimes 139 -#define SYS_adjtime 140 - /* 141 old getpeername */ -#define SYS_gethostuuid 142 - /* 143 old sethostid */ - /* 144 old getrlimit */ - /* 145 old setrlimit */ - /* 146 old killpg */ -#define SYS_setsid 147 - /* 148 old setquota */ - /* 149 old qquota */ - /* 150 old getsockname */ -#define SYS_getpgid 151 -#define SYS_setprivexec 152 -#define SYS_pread 153 -#define SYS_pwrite 154 -#define SYS_nfssvc 155 - /* 156 old getdirentries */ -#define SYS_statfs 157 -#define SYS_fstatfs 158 -#define SYS_unmount 159 - /* 160 old async_daemon */ -#define SYS_getfh 161 - /* 162 old getdomainname */ - /* 163 old setdomainname */ - /* 164 */ -#define SYS_quotactl 165 - /* 166 old exportfs */ -#define SYS_mount 167 - /* 168 old ustat */ -#define SYS_csops 169 -#define SYS_table 170 - /* 171 old wait3 */ - /* 172 old rpause */ -#define SYS_waitid 173 - /* 174 old getdents */ - /* 175 old gc_control */ -#define SYS_add_profil 176 - /* 177 */ - /* 178 */ - /* 179 */ -#define SYS_kdebug_trace 180 -#define SYS_setgid 181 -#define SYS_setegid 182 -#define SYS_seteuid 183 -#define SYS_sigreturn 184 -#define SYS_chud 185 - /* 186 */ - /* 187 */ -#define SYS_stat 188 -#define SYS_fstat 189 -#define SYS_lstat 190 -#define SYS_pathconf 191 -#define SYS_fpathconf 192 - /* 193 */ -#define SYS_getrlimit 194 -#define SYS_setrlimit 195 -#define SYS_getdirentries 196 -#define SYS_mmap 197 - /* 198 __syscall */ -#define SYS_lseek 199 -#define SYS_truncate 200 -#define SYS_ftruncate 201 -#define SYS___sysctl 202 -#define SYS_mlock 203 -#define SYS_munlock 204 -#define SYS_undelete 205 -#define SYS_ATsocket 206 -#define SYS_ATgetmsg 207 -#define SYS_ATputmsg 208 -#define SYS_ATPsndreq 209 -#define SYS_ATPsndrsp 210 -#define SYS_ATPgetreq 211 -#define SYS_ATPgetrsp 212 - /* 213 Reserved for AppleTalk */ -#define SYS_kqueue_from_portset_np 214 -#define SYS_kqueue_portset_np 215 -#define SYS_mkcomplex 216 -#define SYS_statv 217 -#define SYS_lstatv 218 -#define SYS_fstatv 219 -#define SYS_getattrlist 220 -#define SYS_setattrlist 221 -#define SYS_getdirentriesattr 222 -#define SYS_exchangedata 223 - /* 224 was checkuseraccess */ -#define SYS_searchfs 225 -#define SYS_delete 226 -#define SYS_copyfile 227 - /* 228 */ - /* 229 */ -#define SYS_poll 230 -#define SYS_watchevent 231 -#define SYS_waitevent 232 -#define SYS_modwatch 233 -#define SYS_getxattr 234 -#define SYS_fgetxattr 235 -#define SYS_setxattr 236 -#define SYS_fsetxattr 237 -#define SYS_removexattr 238 -#define SYS_fremovexattr 239 -#define SYS_listxattr 240 -#define SYS_flistxattr 241 -#define SYS_fsctl 242 -#define SYS_initgroups 243 -#define SYS_posix_spawn 244 - /* 245 */ - /* 246 */ -#define SYS_nfsclnt 247 -#define SYS_fhopen 248 - /* 249 */ -#define SYS_minherit 250 -#define SYS_semsys 251 -#define SYS_msgsys 252 -#define SYS_shmsys 253 -#define SYS_semctl 254 -#define SYS_semget 255 -#define SYS_semop 256 - /* 257 */ -#define SYS_msgctl 258 -#define SYS_msgget 259 -#define SYS_msgsnd 260 -#define SYS_msgrcv 261 -#define SYS_shmat 262 -#define SYS_shmctl 263 -#define SYS_shmdt 264 -#define SYS_shmget 265 -#define SYS_shm_open 266 -#define SYS_shm_unlink 267 -#define SYS_sem_open 268 -#define SYS_sem_close 269 -#define SYS_sem_unlink 270 -#define SYS_sem_wait 271 -#define SYS_sem_trywait 272 -#define SYS_sem_post 273 -#define SYS_sem_getvalue 274 -#define SYS_sem_init 275 -#define SYS_sem_destroy 276 -#define SYS_open_extended 277 -#define SYS_umask_extended 278 -#define SYS_stat_extended 279 -#define SYS_lstat_extended 280 -#define SYS_fstat_extended 281 -#define SYS_chmod_extended 282 -#define SYS_fchmod_extended 283 -#define SYS_access_extended 284 -#define SYS_settid 285 -#define SYS_gettid 286 -#define SYS_setsgroups 287 -#define SYS_getsgroups 288 -#define SYS_setwgroups 289 -#define SYS_getwgroups 290 -#define SYS_mkfifo_extended 291 -#define SYS_mkdir_extended 292 -#define SYS_identitysvc 293 -#define SYS_shared_region_check_np 294 -#define SYS_shared_region_map_np 295 - /* 296 old load_shared_file */ - /* 297 old reset_shared_file */ - /* 298 old new_system_shared_regions */ - /* 299 old shared_region_map_file_np */ - /* 300 old shared_region_make_private_np */ -#define SYS___pthread_mutex_destroy 301 -#define SYS___pthread_mutex_init 302 -#define SYS___pthread_mutex_lock 303 -#define SYS___pthread_mutex_trylock 304 -#define SYS___pthread_mutex_unlock 305 -#define SYS___pthread_cond_init 306 -#define SYS___pthread_cond_destroy 307 -#define SYS___pthread_cond_broadcast 308 -#define SYS___pthread_cond_signal 309 -#define SYS_getsid 310 -#define SYS_settid_with_pid 311 -#define SYS___pthread_cond_timedwait 312 -#define SYS_aio_fsync 313 -#define SYS_aio_return 314 -#define SYS_aio_suspend 315 -#define SYS_aio_cancel 316 -#define SYS_aio_error 317 -#define SYS_aio_read 318 -#define SYS_aio_write 319 -#define SYS_lio_listio 320 -#define SYS___pthread_cond_wait 321 -#define SYS_iopolicysys 322 - /* 323 */ -#define SYS_mlockall 324 -#define SYS_munlockall 325 - /* 326 */ -#define SYS_issetugid 327 -#define SYS___pthread_kill 328 -#define SYS___pthread_sigmask 329 -#define SYS___sigwait 330 -#define SYS___disable_threadsignal 331 -#define SYS___pthread_markcancel 332 -#define SYS___pthread_canceled 333 -#define SYS___semwait_signal 334 - /* 335 old utrace */ -#define SYS_proc_info 336 -#define SYS_sendfile 337 -#define SYS_stat64 338 -#define SYS_fstat64 339 -#define SYS_lstat64 340 -#define SYS_stat64_extended 341 -#define SYS_lstat64_extended 342 -#define SYS_fstat64_extended 343 -#define SYS_getdirentries64 344 -#define SYS_statfs64 345 -#define SYS_fstatfs64 346 -#define SYS_getfsstat64 347 -#define SYS___pthread_chdir 348 -#define SYS___pthread_fchdir 349 -#define SYS_audit 350 -#define SYS_auditon 351 - /* 352 */ -#define SYS_getauid 353 -#define SYS_setauid 354 -#define SYS_getaudit 355 -#define SYS_setaudit 356 -#define SYS_getaudit_addr 357 -#define SYS_setaudit_addr 358 -#define SYS_auditctl 359 -#define SYS_bsdthread_create 360 -#define SYS_bsdthread_terminate 361 -#define SYS_kqueue 362 -#define SYS_kevent 363 -#define SYS_lchown 364 -#define SYS_stack_snapshot 365 -#define SYS_bsdthread_register 366 -#define SYS_workq_open 367 -#define SYS_workq_ops 368 - /* 369 */ - /* 370 */ - /* 371 */ - /* 372 */ - /* 373 */ - /* 374 */ - /* 375 */ - /* 376 */ - /* 377 */ - /* 378 */ - /* 379 */ -#define SYS___mac_execve 380 -#define SYS___mac_syscall 381 -#define SYS___mac_get_file 382 -#define SYS___mac_set_file 383 -#define SYS___mac_get_link 384 -#define SYS___mac_set_link 385 -#define SYS___mac_get_proc 386 -#define SYS___mac_set_proc 387 -#define SYS___mac_get_fd 388 -#define SYS___mac_set_fd 389 -#define SYS___mac_get_pid 390 -#define SYS___mac_get_lcid 391 -#define SYS___mac_get_lctx 392 -#define SYS___mac_set_lctx 393 -#define SYS_setlcid 394 -#define SYS_getlcid 395 -#define SYS_read_nocancel 396 -#define SYS_write_nocancel 397 -#define SYS_open_nocancel 398 -#define SYS_close_nocancel 399 -#define SYS_wait4_nocancel 400 -#define SYS_recvmsg_nocancel 401 -#define SYS_sendmsg_nocancel 402 -#define SYS_recvfrom_nocancel 403 -#define SYS_accept_nocancel 404 -#define SYS_msync_nocancel 405 -#define SYS_fcntl_nocancel 406 -#define SYS_select_nocancel 407 -#define SYS_fsync_nocancel 408 -#define SYS_connect_nocancel 409 -#define SYS_sigsuspend_nocancel 410 -#define SYS_readv_nocancel 411 -#define SYS_writev_nocancel 412 -#define SYS_sendto_nocancel 413 -#define SYS_pread_nocancel 414 -#define SYS_pwrite_nocancel 415 -#define SYS_waitid_nocancel 416 -#define SYS_poll_nocancel 417 -#define SYS_msgsnd_nocancel 418 -#define SYS_msgrcv_nocancel 419 -#define SYS_sem_wait_nocancel 420 -#define SYS_aio_suspend_nocancel 421 -#define SYS___sigwait_nocancel 422 -#define SYS___semwait_signal_nocancel 423 -#define SYS___mac_mount 424 -#define SYS___mac_get_mount 425 -#define SYS___mac_getfsstat 426 -#define SYS_MAXSYSCALL 427 - -#endif /* __APPLE_API_PRIVATE */ -#endif /* !_SYS_SYSCALL_H_ */ diff --git a/bsd/sys/sysctl.h b/bsd/sys/sysctl.h index 6b72d6bd5..083432071 100644 --- a/bsd/sys/sysctl.h +++ b/bsd/sys/sysctl.h @@ -282,25 +282,37 @@ __END_DECLS SYSCTL_OID(parent, nbr, name, CTLTYPE_STRING|access, \ arg, len, sysctl_handle_string, "A", descr) +#define SYSCTL_COMPAT_INT(parent, nbr, name, access, ptr, val, descr) \ + SYSCTL_OID(parent, nbr, name, CTLTYPE_INT|access, \ + ptr, val, sysctl_handle_int, "I", descr) + +#define SYSCTL_COMPAT_UINT(parent, nbr, name, access, ptr, val, descr) \ + SYSCTL_OID(parent, nbr, name, CTLTYPE_INT|access, \ + ptr, val, sysctl_handle_int, "IU", descr) + /* Oid for an int. If ptr is NULL, val is returned. */ #define SYSCTL_INT(parent, nbr, name, access, ptr, val, descr) \ SYSCTL_OID(parent, nbr, name, CTLTYPE_INT|access, \ - ptr, val, sysctl_handle_int, "I", descr) + ptr, val, sysctl_handle_int, "I", descr); \ + typedef char _sysctl_##parent##_##name##_size_check[(__builtin_constant_p(ptr) || sizeof(*(ptr)) == sizeof(int)) ? 0 : -1]; /* Oid for an unsigned int. If ptr is NULL, val is returned. */ #define SYSCTL_UINT(parent, nbr, name, access, ptr, val, descr) \ SYSCTL_OID(parent, nbr, name, CTLTYPE_INT|access, \ - ptr, val, sysctl_handle_int, "IU", descr) + ptr, val, sysctl_handle_int, "IU", descr); \ + typedef char _sysctl_##parent##_##name##_size_check[(__builtin_constant_p(ptr) || sizeof(*(ptr)) == sizeof(unsigned int)) ? 0 : -1]; /* Oid for a long. The pointer must be non NULL. */ #define SYSCTL_LONG(parent, nbr, name, access, ptr, descr) \ SYSCTL_OID(parent, nbr, name, CTLTYPE_INT|access, \ - ptr, 0, sysctl_handle_long, "L", descr) + ptr, 0, sysctl_handle_long, "L", descr); \ + typedef char _sysctl_##parent##_##name##_size_check[(__builtin_constant_p(ptr) || sizeof(*(ptr)) == sizeof(long)) ? 0 : -1]; /* Oid for a quad. The pointer must be non NULL. */ #define SYSCTL_QUAD(parent, nbr, name, access, ptr, descr) \ SYSCTL_OID(parent, nbr, name, CTLTYPE_QUAD|access, \ - ptr, 0, sysctl_handle_quad, "Q", descr) + ptr, 0, sysctl_handle_quad, "Q", descr); \ + typedef char _sysctl_##parent##_##name##_size_check[(__builtin_constant_p(ptr) || sizeof(*(ptr)) == sizeof(long long)) ? 0 : -1]; /* Oid for an opaque object. Specified by a pointer and a length. */ #define SYSCTL_OPAQUE(parent, nbr, name, access, ptr, len, fmt, descr) \ @@ -429,7 +441,7 @@ SYSCTL_DECL(_user); #define KERN_SUGID_COREDUMP 52 /* int: whether to dump SUGID cores */ #define KERN_PROCDELAYTERM 53 /* int: set/reset current proc for delayed termination during shutdown */ #define KERN_SHREG_PRIVATIZABLE 54 /* int: can shared regions be privatized ? */ -#define KERN_PROC_LOW_PRI_IO 55 /* int: set/reset current proc for low priority I/O */ + /* 55 was KERN_PROC_LOW_PRI_IO... now deprecated */ #define KERN_LOW_PRI_WINDOW 56 /* int: set/reset throttle window - milliseconds */ #define KERN_LOW_PRI_DELAY 57 /* int: set/reset throttle delay - milliseconds */ #define KERN_POSIX 58 /* node: posix tunables */ @@ -445,7 +457,8 @@ SYSCTL_DECL(_user); #define KERN_RAGEVNODE 68 #define KERN_TTY 69 /* node: tty settings */ #define KERN_CHECKOPENEVT 70 /* spi: check the VOPENEVT flag on vnodes at open time */ -#define KERN_MAXID 71 /* number of valid kern ids */ +#define KERN_THREADNAME 71 /* set/get thread name */ +#define KERN_MAXID 72 /* number of valid kern ids */ /* * Don't add any more sysctls like this. Instead, use the SYSCTL_*() macros * and OID_AUTO. This will have the added benefit of not having to recompile @@ -573,7 +586,8 @@ SYSCTL_DECL(_user); { "lctx", CTLTYPE_NODE }, \ { "rage_vnode", CTLTYPE_INT }, \ { "tty", CTLTYPE_NODE }, \ - { "check_openevt", CTLTYPE_INT } \ + { "check_openevt", CTLTYPE_INT }, \ + { "thread_name", CTLTYPE_STRING } \ } /* @@ -601,10 +615,11 @@ SYSCTL_DECL(_user); #define KERN_LCTX_ALL 0 /* everything */ #define KERN_LCTX_LCID 1 /* by login context id */ + +#if defined(XNU_KERNEL_PRIVATE) || !defined(KERNEL) /* * KERN_PROC subtype ops return arrays of augmented proc structures: */ -#ifdef __APPLE_API_UNSTABLE struct _pcred { char pc_lock[72]; /* opaque content */ @@ -662,6 +677,8 @@ struct kinfo_lctx { int mc; /* Member Count */ }; +#endif /* defined(XNU_KERNEL_PRIVATE) || !defined(KERNEL) */ + #ifdef BSD_KERNEL_PRIVATE #include @@ -670,26 +687,64 @@ struct kinfo_lctx { * WARNING - keep in sync with _pcred */ -struct user_pcred { +struct user32_pcred { + char pc_lock[72]; /* opaque content */ + user32_addr_t pc_ucred; /* Current credentials. */ + uid_t p_ruid; /* Real user id. */ + uid_t p_svuid; /* Saved effective user id. */ + gid_t p_rgid; /* Real group id. */ + gid_t p_svgid; /* Saved effective group id. */ + int p_refcnt; /* Number of references. */ +}; +struct user64_pcred { char pc_lock[72]; /* opaque content */ - user_addr_t pc_ucred; /* Current credentials. */ + user64_addr_t pc_ucred; /* Current credentials. */ uid_t p_ruid; /* Real user id. */ uid_t p_svuid; /* Saved effective user id. */ gid_t p_rgid; /* Real group id. */ gid_t p_svgid; /* Saved effective group id. */ - int p_refcnt __attribute((aligned(8))); /* Number of references. */ + int p_refcnt; /* Number of references. */ }; /* LP64 version of kinfo_proc. all pointers * grow when we're dealing with a 64-bit process. * WARNING - keep in sync with kinfo_proc */ -struct user_kinfo_proc { - struct user_extern_proc kp_proc; /* proc structure */ - struct user_eproc { +struct user32_kinfo_proc { + struct user32_extern_proc kp_proc; /* proc structure */ + struct user32_eproc { + user32_addr_t e_paddr; /* address of proc */ + user32_addr_t e_sess; /* session pointer */ + struct user32_pcred e_pcred; /* process credentials */ + struct _ucred e_ucred; /* current credentials */ + struct user32_vmspace e_vm; /* address space */ + pid_t e_ppid; /* parent process id */ + pid_t e_pgid; /* process group id */ + short e_jobc; /* job control counter */ + dev_t e_tdev; /* controlling tty dev */ + pid_t e_tpgid; /* tty process group id */ + user32_addr_t e_tsess; /* tty session pointer */ + char e_wmesg[WMESGLEN+1]; /* wchan message */ + segsz_t e_xsize; /* text size */ + short e_xrssize; /* text rss */ + short e_xccount; /* text references */ + short e_xswrss; + int32_t e_flag; + char e_login[COMAPT_MAXLOGNAME]; /* short setlogin() name */ +#if CONFIG_LCTX + pid_t e_lcid; + int32_t e_spare[3]; +#else + int32_t e_spare[4]; +#endif + } kp_eproc; +}; +struct user64_kinfo_proc { + struct user64_extern_proc kp_proc; /* proc structure */ + struct user64_eproc { user_addr_t e_paddr; /* address of proc */ user_addr_t e_sess; /* session pointer */ - struct user_pcred e_pcred; /* process credentials */ + struct user64_pcred e_pcred; /* process credentials */ struct _ucred e_ucred; /* current credentials */ struct user_vmspace e_vm; /* address space */ pid_t e_ppid; /* parent process id */ @@ -697,7 +752,7 @@ struct user_kinfo_proc { short e_jobc; /* job control counter */ dev_t e_tdev; /* controlling tty dev */ pid_t e_tpgid; /* tty process group id */ - user_addr_t e_tsess __attribute((aligned(8))); /* tty session pointer */ + user64_addr_t e_tsess __attribute((aligned(8))); /* tty session pointer */ char e_wmesg[WMESGLEN+1]; /* wchan message */ segsz_t e_xsize; /* text size */ short e_xrssize; /* text rss */ @@ -716,8 +771,6 @@ struct user_kinfo_proc { #endif /* BSD_KERNEL_PRIVATE */ -#endif /* __APPLE_API_UNSTABLE */ - /* * KERN_IPC identifiers */ @@ -774,9 +827,14 @@ extern struct loadavg averunnable; #ifdef BSD_KERNEL_PRIVATE -struct user_loadavg { +struct user32_loadavg { + fixpt_t ldavg[3]; + user32_long_t fscale; +}; + +struct user64_loadavg { fixpt_t ldavg[3]; - user_long_t fscale __attribute((aligned(8))); + user64_long_t fscale; }; #endif /* BSD_KERNEL_PRIVATE */ @@ -988,13 +1046,13 @@ struct user_loadavg { #define CTL_DEBUG_MAXID 20 -#if (CTL_MAXID != 9) || (KERN_MAXID != 71) || (VM_MAXID != 6) || (HW_MAXID != 26) || (USER_MAXID != 21) || (CTL_DEBUG_MAXID != 20) +#if (CTL_MAXID != 9) || (KERN_MAXID != 72) || (VM_MAXID != 6) || (HW_MAXID != 26) || (USER_MAXID != 21) || (CTL_DEBUG_MAXID != 20) #error Use the SYSCTL_*() macros and OID_AUTO instead! #endif #ifdef KERNEL -#ifdef DEBUG +#if DEBUG /* * CTL_DEBUG variables. * @@ -1031,7 +1089,7 @@ void sysctl_mib_init(void) __attribute__((section("__TEXT, initcode"))); int kernel_sysctl(struct proc *p, int *name, u_int namelen, void *old, size_t *oldlenp, void *newp, size_t newlen); int userland_sysctl(struct proc *p, int *name, u_int namelen, user_addr_t old, - size_t *oldlenp, int inkernel, user_addr_t newp, size_t newlen, + size_t *oldlenp, user_addr_t newp, size_t newlen, size_t *retval); /* diff --git a/bsd/sys/sysent.h b/bsd/sys/sysent.h index 59aec4ffc..44991a9d9 100644 --- a/bsd/sys/sysent.h +++ b/bsd/sys/sysent.h @@ -59,7 +59,7 @@ extern struct sysent sysent[]; #endif /* __INIT_SYSENT_C__ */ extern int nsysent; -#define NUM_SYSENT 427 /* Current number of defined syscalls */ +#define NUM_SYSENT 430 /* Current number of defined syscalls */ /* sy_funnel flags bits */ #define FUNNEL_MASK 0x07f diff --git a/bsd/sys/syslog.h b/bsd/sys/syslog.h index a681d394f..e85a4a817 100644 --- a/bsd/sys/syslog.h +++ b/bsd/sys/syslog.h @@ -310,10 +310,13 @@ struct reg_desc { #include __BEGIN_DECLS -void logpri(int); void log(int, const char *, ...); +#ifdef XNU_KERNEL_PRIVATE +void logpri(int); int vaddlog(const char *, va_list); void logtime(time_t); +#endif /* XNU_KERNEL_PRIVATE */ + __END_DECLS #endif /* !KERNEL */ diff --git a/bsd/sys/sysproto.h b/bsd/sys/sysproto.h deleted file mode 100644 index a918a8a16..000000000 --- a/bsd/sys/sysproto.h +++ /dev/null @@ -1,2112 +0,0 @@ -/* - * Copyright (c) 2004-2007 Apple Inc. All rights reserved. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ - * - * This file contains Original Code and/or Modifications of Original Code - * as defined in and that are subject to the Apple Public Source License - * Version 2.0 (the 'License'). You may not use this file except in - * compliance with the License. The rights granted to you under the License - * may not be used to create, or enable the creation or redistribution of, - * unlawful or unlicensed copies of an Apple operating system, or to - * circumvent, violate, or enable the circumvention or violation of, any - * terms of an Apple operating system software license agreement. - * - * Please obtain a copy of the License at - * http://www.opensource.apple.com/apsl/ and read it before using this file. - * - * The Original Code and all software distributed under the License are - * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER - * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, - * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. - * Please see the License for the specific language governing rights and - * limitations under the License. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ - * - * - * System call switch table. - * - * DO NOT EDIT-- this file is automatically generated. - * created from syscalls.master - */ - -#ifndef _SYS_SYSPROTO_H_ -#define _SYS_SYSPROTO_H_ - -#include -#include -#include -#include -#include -#include -#include -#include - -#ifdef KERNEL -#ifdef __APPLE_API_PRIVATE -#define PAD_(t) (sizeof(uint64_t) <= sizeof(t) \ - ? 0 : sizeof(uint64_t) - sizeof(t)) -#if BYTE_ORDER == LITTLE_ENDIAN -#define PADL_(t) 0 -#define PADR_(t) PAD_(t) -#else -#define PADL_(t) PAD_(t) -#define PADR_(t) 0 -#endif - -__BEGIN_DECLS -#ifndef __MUNGE_ONCE -#define __MUNGE_ONCE -void munge_w(const void *, void *); -void munge_ww(const void *, void *); -void munge_www(const void *, void *); -void munge_wwww(const void *, void *); -void munge_wwwww(const void *, void *); -void munge_wwwwww(const void *, void *); -void munge_wwwwwww(const void *, void *); -void munge_wwwwwwww(const void *, void *); -void munge_wl(const void *, void *); -void munge_wlw(const void *, void *); -void munge_wwwl(const void *, void *); -void munge_wwwlww(const void *, void *); -void munge_wwlwww(const void *, void *); -void munge_wwwwl(const void *, void *); -void munge_wwwwwl(const void *, void *); -void munge_wsw(const void *, void *); -void munge_wws(const void *, void *); -void munge_wwwsw(const void *, void *); -#ifdef __ppc__ -void munge_d(const void *, void *); -void munge_dd(const void *, void *); -void munge_ddd(const void *, void *); -void munge_dddd(const void *, void *); -void munge_ddddd(const void *, void *); -void munge_dddddd(const void *, void *); -void munge_ddddddd(const void *, void *); -void munge_dddddddd(const void *, void *); -#else -#define munge_d NULL -#define munge_dd NULL -#define munge_ddd NULL -#define munge_dddd NULL -#define munge_ddddd NULL -#define munge_dddddd NULL -#define munge_ddddddd NULL -#define munge_dddddddd NULL -#endif // __ppc__ -#endif /* !__MUNGE_ONCE */ - -struct nosys_args { - register_t dummy; -}; -struct exit_args { - char rval_l_[PADL_(int)]; int rval; char rval_r_[PADR_(int)]; -}; -struct fork_args { - register_t dummy; -}; -struct read_args { - char fd_l_[PADL_(int)]; int fd; char fd_r_[PADR_(int)]; - char cbuf_l_[PADL_(user_addr_t)]; user_addr_t cbuf; char cbuf_r_[PADR_(user_addr_t)]; - char nbyte_l_[PADL_(user_size_t)]; user_size_t nbyte; char nbyte_r_[PADR_(user_size_t)]; -}; -struct write_args { - char fd_l_[PADL_(int)]; int fd; char fd_r_[PADR_(int)]; - char cbuf_l_[PADL_(user_addr_t)]; user_addr_t cbuf; char cbuf_r_[PADR_(user_addr_t)]; - char nbyte_l_[PADL_(user_size_t)]; user_size_t nbyte; char nbyte_r_[PADR_(user_size_t)]; -}; -struct open_args { - char path_l_[PADL_(user_addr_t)]; user_addr_t path; char path_r_[PADR_(user_addr_t)]; - char flags_l_[PADL_(int)]; int flags; char flags_r_[PADR_(int)]; - char mode_l_[PADL_(int)]; int mode; char mode_r_[PADR_(int)]; -}; -struct close_args { - char fd_l_[PADL_(int)]; int fd; char fd_r_[PADR_(int)]; -}; -struct wait4_args { - char pid_l_[PADL_(int)]; int pid; char pid_r_[PADR_(int)]; - char status_l_[PADL_(user_addr_t)]; user_addr_t status; char status_r_[PADR_(user_addr_t)]; - char options_l_[PADL_(int)]; int options; char options_r_[PADR_(int)]; - char rusage_l_[PADL_(user_addr_t)]; user_addr_t rusage; char rusage_r_[PADR_(user_addr_t)]; -}; -struct link_args { - char path_l_[PADL_(user_addr_t)]; user_addr_t path; char path_r_[PADR_(user_addr_t)]; - char link_l_[PADL_(user_addr_t)]; user_addr_t link; char link_r_[PADR_(user_addr_t)]; -}; -struct unlink_args { - char path_l_[PADL_(user_addr_t)]; user_addr_t path; char path_r_[PADR_(user_addr_t)]; -}; -struct chdir_args { - char path_l_[PADL_(user_addr_t)]; user_addr_t path; char path_r_[PADR_(user_addr_t)]; -}; -struct fchdir_args { - char fd_l_[PADL_(int)]; int fd; char fd_r_[PADR_(int)]; -}; -struct mknod_args { - char path_l_[PADL_(user_addr_t)]; user_addr_t path; char path_r_[PADR_(user_addr_t)]; - char mode_l_[PADL_(int)]; int mode; char mode_r_[PADR_(int)]; - char dev_l_[PADL_(int)]; int dev; char dev_r_[PADR_(int)]; -}; -struct chmod_args { - char path_l_[PADL_(user_addr_t)]; user_addr_t path; char path_r_[PADR_(user_addr_t)]; - char mode_l_[PADL_(int)]; int mode; char mode_r_[PADR_(int)]; -}; -struct chown_args { - char path_l_[PADL_(user_addr_t)]; user_addr_t path; char path_r_[PADR_(user_addr_t)]; - char uid_l_[PADL_(int)]; int uid; char uid_r_[PADR_(int)]; - char gid_l_[PADL_(int)]; int gid; char gid_r_[PADR_(int)]; -}; -struct obreak_args { - char nsize_l_[PADL_(char *)]; char * nsize; char nsize_r_[PADR_(char *)]; -}; -#if COMPAT_GETFSSTAT -struct ogetfsstat_args { - char buf_l_[PADL_(user_addr_t)]; user_addr_t buf; char buf_r_[PADR_(user_addr_t)]; - char bufsize_l_[PADL_(int)]; int bufsize; char bufsize_r_[PADR_(int)]; - char flags_l_[PADL_(int)]; int flags; char flags_r_[PADR_(int)]; -}; -#else -struct getfsstat_args { - char buf_l_[PADL_(user_addr_t)]; user_addr_t buf; char buf_r_[PADR_(user_addr_t)]; - char bufsize_l_[PADL_(int)]; int bufsize; char bufsize_r_[PADR_(int)]; - char flags_l_[PADL_(int)]; int flags; char flags_r_[PADR_(int)]; -}; -#endif -struct getpid_args { - register_t dummy; -}; -struct setuid_args { - char uid_l_[PADL_(uid_t)]; uid_t uid; char uid_r_[PADR_(uid_t)]; -}; -struct getuid_args { - register_t dummy; -}; -struct geteuid_args { - register_t dummy; -}; -struct ptrace_args { - char req_l_[PADL_(int)]; int req; char req_r_[PADR_(int)]; - char pid_l_[PADL_(pid_t)]; pid_t pid; char pid_r_[PADR_(pid_t)]; - char addr_l_[PADL_(user_addr_t)]; user_addr_t addr; char addr_r_[PADR_(user_addr_t)]; - char data_l_[PADL_(int)]; int data; char data_r_[PADR_(int)]; -}; -#if SOCKETS -struct recvmsg_args { - char s_l_[PADL_(int)]; int s; char s_r_[PADR_(int)]; - char msg_l_[PADL_(user_addr_t)]; user_addr_t msg; char msg_r_[PADR_(user_addr_t)]; - char flags_l_[PADL_(int)]; int flags; char flags_r_[PADR_(int)]; -}; -struct sendmsg_args { - char s_l_[PADL_(int)]; int s; char s_r_[PADR_(int)]; - char msg_l_[PADL_(user_addr_t)]; user_addr_t msg; char msg_r_[PADR_(user_addr_t)]; - char flags_l_[PADL_(int)]; int flags; char flags_r_[PADR_(int)]; -}; -struct recvfrom_args { - char s_l_[PADL_(int)]; int s; char s_r_[PADR_(int)]; - char buf_l_[PADL_(user_addr_t)]; user_addr_t buf; char buf_r_[PADR_(user_addr_t)]; - char len_l_[PADL_(user_size_t)]; user_size_t len; char len_r_[PADR_(user_size_t)]; - char flags_l_[PADL_(int)]; int flags; char flags_r_[PADR_(int)]; - char from_l_[PADL_(user_addr_t)]; user_addr_t from; char from_r_[PADR_(user_addr_t)]; - char fromlenaddr_l_[PADL_(user_addr_t)]; user_addr_t fromlenaddr; char fromlenaddr_r_[PADR_(user_addr_t)]; -}; -struct accept_args { - char s_l_[PADL_(int)]; int s; char s_r_[PADR_(int)]; - char name_l_[PADL_(user_addr_t)]; user_addr_t name; char name_r_[PADR_(user_addr_t)]; - char anamelen_l_[PADL_(user_addr_t)]; user_addr_t anamelen; char anamelen_r_[PADR_(user_addr_t)]; -}; -struct getpeername_args { - char fdes_l_[PADL_(int)]; int fdes; char fdes_r_[PADR_(int)]; - char asa_l_[PADL_(user_addr_t)]; user_addr_t asa; char asa_r_[PADR_(user_addr_t)]; - char alen_l_[PADL_(user_addr_t)]; user_addr_t alen; char alen_r_[PADR_(user_addr_t)]; -}; -struct getsockname_args { - char fdes_l_[PADL_(int)]; int fdes; char fdes_r_[PADR_(int)]; - char asa_l_[PADL_(user_addr_t)]; user_addr_t asa; char asa_r_[PADR_(user_addr_t)]; - char alen_l_[PADL_(user_addr_t)]; user_addr_t alen; char alen_r_[PADR_(user_addr_t)]; -}; -#else -#endif /* SOCKETS */ -struct access_args { - char path_l_[PADL_(user_addr_t)]; user_addr_t path; char path_r_[PADR_(user_addr_t)]; - char flags_l_[PADL_(int)]; int flags; char flags_r_[PADR_(int)]; -}; -struct chflags_args { - char path_l_[PADL_(user_addr_t)]; user_addr_t path; char path_r_[PADR_(user_addr_t)]; - char flags_l_[PADL_(int)]; int flags; char flags_r_[PADR_(int)]; -}; -struct fchflags_args { - char fd_l_[PADL_(int)]; int fd; char fd_r_[PADR_(int)]; - char flags_l_[PADL_(int)]; int flags; char flags_r_[PADR_(int)]; -}; -struct sync_args { - register_t dummy; -}; -struct kill_args { - char pid_l_[PADL_(int)]; int pid; char pid_r_[PADR_(int)]; - char signum_l_[PADL_(int)]; int signum; char signum_r_[PADR_(int)]; - char posix_l_[PADL_(int)]; int posix; char posix_r_[PADR_(int)]; -}; -struct getppid_args { - register_t dummy; -}; -struct dup_args { - char fd_l_[PADL_(u_int)]; u_int fd; char fd_r_[PADR_(u_int)]; -}; -struct pipe_args { - register_t dummy; -}; -struct getegid_args { - register_t dummy; -}; -struct profil_args { - char bufbase_l_[PADL_(user_addr_t)]; user_addr_t bufbase; char bufbase_r_[PADR_(user_addr_t)]; - char bufsize_l_[PADL_(user_size_t)]; user_size_t bufsize; char bufsize_r_[PADR_(user_size_t)]; - char pcoffset_l_[PADL_(user_ulong_t)]; user_ulong_t pcoffset; char pcoffset_r_[PADR_(user_ulong_t)]; - char pcscale_l_[PADL_(u_int)]; u_int pcscale; char pcscale_r_[PADR_(u_int)]; -}; -struct sigaction_args { - char signum_l_[PADL_(int)]; int signum; char signum_r_[PADR_(int)]; - char nsa_l_[PADL_(user_addr_t)]; user_addr_t nsa; char nsa_r_[PADR_(user_addr_t)]; - char osa_l_[PADL_(user_addr_t)]; user_addr_t osa; char osa_r_[PADR_(user_addr_t)]; -}; -struct getgid_args { - register_t dummy; -}; -struct sigprocmask_args { - char how_l_[PADL_(int)]; int how; char how_r_[PADR_(int)]; - char mask_l_[PADL_(user_addr_t)]; user_addr_t mask; char mask_r_[PADR_(user_addr_t)]; - char omask_l_[PADL_(user_addr_t)]; user_addr_t omask; char omask_r_[PADR_(user_addr_t)]; -}; -struct getlogin_args { - char namebuf_l_[PADL_(user_addr_t)]; user_addr_t namebuf; char namebuf_r_[PADR_(user_addr_t)]; - char namelen_l_[PADL_(u_int)]; u_int namelen; char namelen_r_[PADR_(u_int)]; -}; -struct setlogin_args { - char namebuf_l_[PADL_(user_addr_t)]; user_addr_t namebuf; char namebuf_r_[PADR_(user_addr_t)]; -}; -struct acct_args { - char path_l_[PADL_(user_addr_t)]; user_addr_t path; char path_r_[PADR_(user_addr_t)]; -}; -struct sigpending_args { - char osv_l_[PADL_(user_addr_t)]; user_addr_t osv; char osv_r_[PADR_(user_addr_t)]; -}; -struct sigaltstack_args { - char nss_l_[PADL_(user_addr_t)]; user_addr_t nss; char nss_r_[PADR_(user_addr_t)]; - char oss_l_[PADL_(user_addr_t)]; user_addr_t oss; char oss_r_[PADR_(user_addr_t)]; -}; -struct ioctl_args { - char fd_l_[PADL_(int)]; int fd; char fd_r_[PADR_(int)]; - char com_l_[PADL_(user_ulong_t)]; user_ulong_t com; char com_r_[PADR_(user_ulong_t)]; - char data_l_[PADL_(user_addr_t)]; user_addr_t data; char data_r_[PADR_(user_addr_t)]; -}; -struct reboot_args { - char opt_l_[PADL_(int)]; int opt; char opt_r_[PADR_(int)]; - char command_l_[PADL_(user_addr_t)]; user_addr_t command; char command_r_[PADR_(user_addr_t)]; -}; -struct revoke_args { - char path_l_[PADL_(user_addr_t)]; user_addr_t path; char path_r_[PADR_(user_addr_t)]; -}; -struct symlink_args { - char path_l_[PADL_(user_addr_t)]; user_addr_t path; char path_r_[PADR_(user_addr_t)]; - char link_l_[PADL_(user_addr_t)]; user_addr_t link; char link_r_[PADR_(user_addr_t)]; -}; -struct readlink_args { - char path_l_[PADL_(user_addr_t)]; user_addr_t path; char path_r_[PADR_(user_addr_t)]; - char buf_l_[PADL_(user_addr_t)]; user_addr_t buf; char buf_r_[PADR_(user_addr_t)]; - char count_l_[PADL_(int)]; int count; char count_r_[PADR_(int)]; -}; -struct execve_args { - char fname_l_[PADL_(user_addr_t)]; user_addr_t fname; char fname_r_[PADR_(user_addr_t)]; - char argp_l_[PADL_(user_addr_t)]; user_addr_t argp; char argp_r_[PADR_(user_addr_t)]; - char envp_l_[PADL_(user_addr_t)]; user_addr_t envp; char envp_r_[PADR_(user_addr_t)]; -}; -struct umask_args { - char newmask_l_[PADL_(int)]; int newmask; char newmask_r_[PADR_(int)]; -}; -struct chroot_args { - char path_l_[PADL_(user_addr_t)]; user_addr_t path; char path_r_[PADR_(user_addr_t)]; -}; -struct msync_args { - char addr_l_[PADL_(user_addr_t)]; user_addr_t addr; char addr_r_[PADR_(user_addr_t)]; - char len_l_[PADL_(user_size_t)]; user_size_t len; char len_r_[PADR_(user_size_t)]; - char flags_l_[PADL_(int)]; int flags; char flags_r_[PADR_(int)]; -}; -struct vfork_args { - register_t dummy; -}; -struct sbrk_args { - char incr_l_[PADL_(int)]; int incr; char incr_r_[PADR_(int)]; -}; -struct sstk_args { - char incr_l_[PADL_(int)]; int incr; char incr_r_[PADR_(int)]; -}; -struct ovadvise_args { - register_t dummy; -}; -struct munmap_args { - char addr_l_[PADL_(user_addr_t)]; user_addr_t addr; char addr_r_[PADR_(user_addr_t)]; - char len_l_[PADL_(user_size_t)]; user_size_t len; char len_r_[PADR_(user_size_t)]; -}; -struct mprotect_args { - char addr_l_[PADL_(user_addr_t)]; user_addr_t addr; char addr_r_[PADR_(user_addr_t)]; - char len_l_[PADL_(user_size_t)]; user_size_t len; char len_r_[PADR_(user_size_t)]; - char prot_l_[PADL_(int)]; int prot; char prot_r_[PADR_(int)]; -}; -struct madvise_args { - char addr_l_[PADL_(user_addr_t)]; user_addr_t addr; char addr_r_[PADR_(user_addr_t)]; - char len_l_[PADL_(user_size_t)]; user_size_t len; char len_r_[PADR_(user_size_t)]; - char behav_l_[PADL_(int)]; int behav; char behav_r_[PADR_(int)]; -}; -struct mincore_args { - char addr_l_[PADL_(user_addr_t)]; user_addr_t addr; char addr_r_[PADR_(user_addr_t)]; - char len_l_[PADL_(user_size_t)]; user_size_t len; char len_r_[PADR_(user_size_t)]; - char vec_l_[PADL_(user_addr_t)]; user_addr_t vec; char vec_r_[PADR_(user_addr_t)]; -}; -struct getgroups_args { - char gidsetsize_l_[PADL_(u_int)]; u_int gidsetsize; char gidsetsize_r_[PADR_(u_int)]; - char gidset_l_[PADL_(user_addr_t)]; user_addr_t gidset; char gidset_r_[PADR_(user_addr_t)]; -}; -struct setgroups_args { - char gidsetsize_l_[PADL_(u_int)]; u_int gidsetsize; char gidsetsize_r_[PADR_(u_int)]; - char gidset_l_[PADL_(user_addr_t)]; user_addr_t gidset; char gidset_r_[PADR_(user_addr_t)]; -}; -struct getpgrp_args { - register_t dummy; -}; -struct setpgid_args { - char pid_l_[PADL_(int)]; int pid; char pid_r_[PADR_(int)]; - char pgid_l_[PADL_(int)]; int pgid; char pgid_r_[PADR_(int)]; -}; -struct setitimer_args { - char which_l_[PADL_(u_int)]; u_int which; char which_r_[PADR_(u_int)]; - char itv_l_[PADL_(user_addr_t)]; user_addr_t itv; char itv_r_[PADR_(user_addr_t)]; - char oitv_l_[PADL_(user_addr_t)]; user_addr_t oitv; char oitv_r_[PADR_(user_addr_t)]; -}; -struct swapon_args { - register_t dummy; -}; -struct getitimer_args { - char which_l_[PADL_(u_int)]; u_int which; char which_r_[PADR_(u_int)]; - char itv_l_[PADL_(user_addr_t)]; user_addr_t itv; char itv_r_[PADR_(user_addr_t)]; -}; -struct getdtablesize_args { - register_t dummy; -}; -struct dup2_args { - char from_l_[PADL_(u_int)]; u_int from; char from_r_[PADR_(u_int)]; - char to_l_[PADL_(u_int)]; u_int to; char to_r_[PADR_(u_int)]; -}; -struct fcntl_args { - char fd_l_[PADL_(int)]; int fd; char fd_r_[PADR_(int)]; - char cmd_l_[PADL_(int)]; int cmd; char cmd_r_[PADR_(int)]; - char arg_l_[PADL_(user_long_t)]; user_long_t arg; char arg_r_[PADR_(user_long_t)]; -}; -struct select_args { - char nd_l_[PADL_(int)]; int nd; char nd_r_[PADR_(int)]; - char in_l_[PADL_(user_addr_t)]; user_addr_t in; char in_r_[PADR_(user_addr_t)]; - char ou_l_[PADL_(user_addr_t)]; user_addr_t ou; char ou_r_[PADR_(user_addr_t)]; - char ex_l_[PADL_(user_addr_t)]; user_addr_t ex; char ex_r_[PADR_(user_addr_t)]; - char tv_l_[PADL_(user_addr_t)]; user_addr_t tv; char tv_r_[PADR_(user_addr_t)]; -}; -struct fsync_args { - char fd_l_[PADL_(int)]; int fd; char fd_r_[PADR_(int)]; -}; -struct setpriority_args { - char which_l_[PADL_(int)]; int which; char which_r_[PADR_(int)]; - char who_l_[PADL_(id_t)]; id_t who; char who_r_[PADR_(id_t)]; - char prio_l_[PADL_(int)]; int prio; char prio_r_[PADR_(int)]; -}; -#if SOCKETS -struct socket_args { - char domain_l_[PADL_(int)]; int domain; char domain_r_[PADR_(int)]; - char type_l_[PADL_(int)]; int type; char type_r_[PADR_(int)]; - char protocol_l_[PADL_(int)]; int protocol; char protocol_r_[PADR_(int)]; -}; -struct connect_args { - char s_l_[PADL_(int)]; int s; char s_r_[PADR_(int)]; - char name_l_[PADL_(user_addr_t)]; user_addr_t name; char name_r_[PADR_(user_addr_t)]; - char namelen_l_[PADL_(socklen_t)]; socklen_t namelen; char namelen_r_[PADR_(socklen_t)]; -}; -#else -#endif /* SOCKETS */ -struct getpriority_args { - char which_l_[PADL_(int)]; int which; char which_r_[PADR_(int)]; - char who_l_[PADL_(id_t)]; id_t who; char who_r_[PADR_(id_t)]; -}; -#if SOCKETS -struct bind_args { - char s_l_[PADL_(int)]; int s; char s_r_[PADR_(int)]; - char name_l_[PADL_(user_addr_t)]; user_addr_t name; char name_r_[PADR_(user_addr_t)]; - char namelen_l_[PADL_(socklen_t)]; socklen_t namelen; char namelen_r_[PADR_(socklen_t)]; -}; -struct setsockopt_args { - char s_l_[PADL_(int)]; int s; char s_r_[PADR_(int)]; - char level_l_[PADL_(int)]; int level; char level_r_[PADR_(int)]; - char name_l_[PADL_(int)]; int name; char name_r_[PADR_(int)]; - char val_l_[PADL_(user_addr_t)]; user_addr_t val; char val_r_[PADR_(user_addr_t)]; - char valsize_l_[PADL_(socklen_t)]; socklen_t valsize; char valsize_r_[PADR_(socklen_t)]; -}; -struct listen_args { - char s_l_[PADL_(int)]; int s; char s_r_[PADR_(int)]; - char backlog_l_[PADL_(int)]; int backlog; char backlog_r_[PADR_(int)]; -}; -#else -#endif /* SOCKETS */ -struct sigsuspend_args { - char mask_l_[PADL_(sigset_t)]; sigset_t mask; char mask_r_[PADR_(sigset_t)]; -}; -#if SOCKETS -#else -#endif /* SOCKETS */ -struct gettimeofday_args { - char tp_l_[PADL_(user_addr_t)]; user_addr_t tp; char tp_r_[PADR_(user_addr_t)]; - char tzp_l_[PADL_(user_addr_t)]; user_addr_t tzp; char tzp_r_[PADR_(user_addr_t)]; -}; -struct getrusage_args { - char who_l_[PADL_(int)]; int who; char who_r_[PADR_(int)]; - char rusage_l_[PADL_(user_addr_t)]; user_addr_t rusage; char rusage_r_[PADR_(user_addr_t)]; -}; -#if SOCKETS -struct getsockopt_args { - char s_l_[PADL_(int)]; int s; char s_r_[PADR_(int)]; - char level_l_[PADL_(int)]; int level; char level_r_[PADR_(int)]; - char name_l_[PADL_(int)]; int name; char name_r_[PADR_(int)]; - char val_l_[PADL_(user_addr_t)]; user_addr_t val; char val_r_[PADR_(user_addr_t)]; - char avalsize_l_[PADL_(user_addr_t)]; user_addr_t avalsize; char avalsize_r_[PADR_(user_addr_t)]; -}; -#else -#endif /* SOCKETS */ -struct readv_args { - char fd_l_[PADL_(int)]; int fd; char fd_r_[PADR_(int)]; - char iovp_l_[PADL_(user_addr_t)]; user_addr_t iovp; char iovp_r_[PADR_(user_addr_t)]; - char iovcnt_l_[PADL_(u_int)]; u_int iovcnt; char iovcnt_r_[PADR_(u_int)]; -}; -struct writev_args { - char fd_l_[PADL_(int)]; int fd; char fd_r_[PADR_(int)]; - char iovp_l_[PADL_(user_addr_t)]; user_addr_t iovp; char iovp_r_[PADR_(user_addr_t)]; - char iovcnt_l_[PADL_(u_int)]; u_int iovcnt; char iovcnt_r_[PADR_(u_int)]; -}; -struct settimeofday_args { - char tv_l_[PADL_(user_addr_t)]; user_addr_t tv; char tv_r_[PADR_(user_addr_t)]; - char tzp_l_[PADL_(user_addr_t)]; user_addr_t tzp; char tzp_r_[PADR_(user_addr_t)]; -}; -struct fchown_args { - char fd_l_[PADL_(int)]; int fd; char fd_r_[PADR_(int)]; - char uid_l_[PADL_(int)]; int uid; char uid_r_[PADR_(int)]; - char gid_l_[PADL_(int)]; int gid; char gid_r_[PADR_(int)]; -}; -struct fchmod_args { - char fd_l_[PADL_(int)]; int fd; char fd_r_[PADR_(int)]; - char mode_l_[PADL_(int)]; int mode; char mode_r_[PADR_(int)]; -}; -struct setreuid_args { - char ruid_l_[PADL_(uid_t)]; uid_t ruid; char ruid_r_[PADR_(uid_t)]; - char euid_l_[PADL_(uid_t)]; uid_t euid; char euid_r_[PADR_(uid_t)]; -}; -struct setregid_args { - char rgid_l_[PADL_(gid_t)]; gid_t rgid; char rgid_r_[PADR_(gid_t)]; - char egid_l_[PADL_(gid_t)]; gid_t egid; char egid_r_[PADR_(gid_t)]; -}; -struct rename_args { - char from_l_[PADL_(user_addr_t)]; user_addr_t from; char from_r_[PADR_(user_addr_t)]; - char to_l_[PADL_(user_addr_t)]; user_addr_t to; char to_r_[PADR_(user_addr_t)]; -}; -struct flock_args { - char fd_l_[PADL_(int)]; int fd; char fd_r_[PADR_(int)]; - char how_l_[PADL_(int)]; int how; char how_r_[PADR_(int)]; -}; -struct mkfifo_args { - char path_l_[PADL_(user_addr_t)]; user_addr_t path; char path_r_[PADR_(user_addr_t)]; - char mode_l_[PADL_(int)]; int mode; char mode_r_[PADR_(int)]; -}; -#if SOCKETS -struct sendto_args { - char s_l_[PADL_(int)]; int s; char s_r_[PADR_(int)]; - char buf_l_[PADL_(user_addr_t)]; user_addr_t buf; char buf_r_[PADR_(user_addr_t)]; - char len_l_[PADL_(user_size_t)]; user_size_t len; char len_r_[PADR_(user_size_t)]; - char flags_l_[PADL_(int)]; int flags; char flags_r_[PADR_(int)]; - char to_l_[PADL_(user_addr_t)]; user_addr_t to; char to_r_[PADR_(user_addr_t)]; - char tolen_l_[PADL_(socklen_t)]; socklen_t tolen; char tolen_r_[PADR_(socklen_t)]; -}; -struct shutdown_args { - char s_l_[PADL_(int)]; int s; char s_r_[PADR_(int)]; - char how_l_[PADL_(int)]; int how; char how_r_[PADR_(int)]; -}; -struct socketpair_args { - char domain_l_[PADL_(int)]; int domain; char domain_r_[PADR_(int)]; - char type_l_[PADL_(int)]; int type; char type_r_[PADR_(int)]; - char protocol_l_[PADL_(int)]; int protocol; char protocol_r_[PADR_(int)]; - char rsv_l_[PADL_(user_addr_t)]; user_addr_t rsv; char rsv_r_[PADR_(user_addr_t)]; -}; -#else -#endif /* SOCKETS */ -struct mkdir_args { - char path_l_[PADL_(user_addr_t)]; user_addr_t path; char path_r_[PADR_(user_addr_t)]; - char mode_l_[PADL_(int)]; int mode; char mode_r_[PADR_(int)]; -}; -struct rmdir_args { - char path_l_[PADL_(user_addr_t)]; user_addr_t path; char path_r_[PADR_(user_addr_t)]; -}; -struct utimes_args { - char path_l_[PADL_(user_addr_t)]; user_addr_t path; char path_r_[PADR_(user_addr_t)]; - char tptr_l_[PADL_(user_addr_t)]; user_addr_t tptr; char tptr_r_[PADR_(user_addr_t)]; -}; -struct futimes_args { - char fd_l_[PADL_(int)]; int fd; char fd_r_[PADR_(int)]; - char tptr_l_[PADL_(user_addr_t)]; user_addr_t tptr; char tptr_r_[PADR_(user_addr_t)]; -}; -struct adjtime_args { - char delta_l_[PADL_(user_addr_t)]; user_addr_t delta; char delta_r_[PADR_(user_addr_t)]; - char olddelta_l_[PADL_(user_addr_t)]; user_addr_t olddelta; char olddelta_r_[PADR_(user_addr_t)]; -}; -struct gethostuuid_args { - char uuid_buf_l_[PADL_(user_addr_t)]; user_addr_t uuid_buf; char uuid_buf_r_[PADR_(user_addr_t)]; - char timeoutp_l_[PADL_(user_addr_t)]; user_addr_t timeoutp; char timeoutp_r_[PADR_(user_addr_t)]; -}; -struct setsid_args { - register_t dummy; -}; -struct getpgid_args { - char pid_l_[PADL_(pid_t)]; pid_t pid; char pid_r_[PADR_(pid_t)]; -}; -struct setprivexec_args { - char flag_l_[PADL_(int)]; int flag; char flag_r_[PADR_(int)]; -}; -struct pread_args { - char fd_l_[PADL_(int)]; int fd; char fd_r_[PADR_(int)]; - char buf_l_[PADL_(user_addr_t)]; user_addr_t buf; char buf_r_[PADR_(user_addr_t)]; - char nbyte_l_[PADL_(user_size_t)]; user_size_t nbyte; char nbyte_r_[PADR_(user_size_t)]; - char offset_l_[PADL_(off_t)]; off_t offset; char offset_r_[PADR_(off_t)]; -}; -struct pwrite_args { - char fd_l_[PADL_(int)]; int fd; char fd_r_[PADR_(int)]; - char buf_l_[PADL_(user_addr_t)]; user_addr_t buf; char buf_r_[PADR_(user_addr_t)]; - char nbyte_l_[PADL_(user_size_t)]; user_size_t nbyte; char nbyte_r_[PADR_(user_size_t)]; - char offset_l_[PADL_(off_t)]; off_t offset; char offset_r_[PADR_(off_t)]; -}; -#if NFSSERVER -struct nfssvc_args { - char flag_l_[PADL_(int)]; int flag; char flag_r_[PADR_(int)]; - char argp_l_[PADL_(user_addr_t)]; user_addr_t argp; char argp_r_[PADR_(user_addr_t)]; -}; -#else -#endif -struct statfs_args { - char path_l_[PADL_(user_addr_t)]; user_addr_t path; char path_r_[PADR_(user_addr_t)]; - char buf_l_[PADL_(user_addr_t)]; user_addr_t buf; char buf_r_[PADR_(user_addr_t)]; -}; -struct fstatfs_args { - char fd_l_[PADL_(int)]; int fd; char fd_r_[PADR_(int)]; - char buf_l_[PADL_(user_addr_t)]; user_addr_t buf; char buf_r_[PADR_(user_addr_t)]; -}; -struct unmount_args { - char path_l_[PADL_(user_addr_t)]; user_addr_t path; char path_r_[PADR_(user_addr_t)]; - char flags_l_[PADL_(int)]; int flags; char flags_r_[PADR_(int)]; -}; -#if NFSSERVER -struct getfh_args { - char fname_l_[PADL_(user_addr_t)]; user_addr_t fname; char fname_r_[PADR_(user_addr_t)]; - char fhp_l_[PADL_(user_addr_t)]; user_addr_t fhp; char fhp_r_[PADR_(user_addr_t)]; -}; -#else -#endif -struct quotactl_args { - char path_l_[PADL_(user_addr_t)]; user_addr_t path; char path_r_[PADR_(user_addr_t)]; - char cmd_l_[PADL_(int)]; int cmd; char cmd_r_[PADR_(int)]; - char uid_l_[PADL_(int)]; int uid; char uid_r_[PADR_(int)]; - char arg_l_[PADL_(user_addr_t)]; user_addr_t arg; char arg_r_[PADR_(user_addr_t)]; -}; -struct mount_args { - char type_l_[PADL_(user_addr_t)]; user_addr_t type; char type_r_[PADR_(user_addr_t)]; - char path_l_[PADL_(user_addr_t)]; user_addr_t path; char path_r_[PADR_(user_addr_t)]; - char flags_l_[PADL_(int)]; int flags; char flags_r_[PADR_(int)]; - char data_l_[PADL_(user_addr_t)]; user_addr_t data; char data_r_[PADR_(user_addr_t)]; -}; -struct csops_args { - char pid_l_[PADL_(pid_t)]; pid_t pid; char pid_r_[PADR_(pid_t)]; - char ops_l_[PADL_(uint32_t)]; uint32_t ops; char ops_r_[PADR_(uint32_t)]; - char useraddr_l_[PADL_(user_addr_t)]; user_addr_t useraddr; char useraddr_r_[PADR_(user_addr_t)]; - char usersize_l_[PADL_(user_size_t)]; user_size_t usersize; char usersize_r_[PADR_(user_size_t)]; -}; -struct waitid_args { - char idtype_l_[PADL_(idtype_t)]; idtype_t idtype; char idtype_r_[PADR_(idtype_t)]; - char id_l_[PADL_(id_t)]; id_t id; char id_r_[PADR_(id_t)]; - char infop_l_[PADL_(user_addr_t)]; user_addr_t infop; char infop_r_[PADR_(user_addr_t)]; - char options_l_[PADL_(int)]; int options; char options_r_[PADR_(int)]; -}; -struct add_profil_args { - char bufbase_l_[PADL_(user_addr_t)]; user_addr_t bufbase; char bufbase_r_[PADR_(user_addr_t)]; - char bufsize_l_[PADL_(user_size_t)]; user_size_t bufsize; char bufsize_r_[PADR_(user_size_t)]; - char pcoffset_l_[PADL_(user_ulong_t)]; user_ulong_t pcoffset; char pcoffset_r_[PADR_(user_ulong_t)]; - char pcscale_l_[PADL_(u_int)]; u_int pcscale; char pcscale_r_[PADR_(u_int)]; -}; -struct kdebug_trace_args { - char code_l_[PADL_(int)]; int code; char code_r_[PADR_(int)]; - char arg1_l_[PADL_(int)]; int arg1; char arg1_r_[PADR_(int)]; - char arg2_l_[PADL_(int)]; int arg2; char arg2_r_[PADR_(int)]; - char arg3_l_[PADL_(int)]; int arg3; char arg3_r_[PADR_(int)]; - char arg4_l_[PADL_(int)]; int arg4; char arg4_r_[PADR_(int)]; - char arg5_l_[PADL_(int)]; int arg5; char arg5_r_[PADR_(int)]; -}; -struct setgid_args { - char gid_l_[PADL_(gid_t)]; gid_t gid; char gid_r_[PADR_(gid_t)]; -}; -struct setegid_args { - char egid_l_[PADL_(gid_t)]; gid_t egid; char egid_r_[PADR_(gid_t)]; -}; -struct seteuid_args { - char euid_l_[PADL_(uid_t)]; uid_t euid; char euid_r_[PADR_(uid_t)]; -}; -struct sigreturn_args { - char uctx_l_[PADL_(user_addr_t)]; user_addr_t uctx; char uctx_r_[PADR_(user_addr_t)]; - char infostyle_l_[PADL_(int)]; int infostyle; char infostyle_r_[PADR_(int)]; -}; -struct chud_args { - char code_l_[PADL_(int)]; int code; char code_r_[PADR_(int)]; - char arg1_l_[PADL_(int)]; int arg1; char arg1_r_[PADR_(int)]; - char arg2_l_[PADL_(int)]; int arg2; char arg2_r_[PADR_(int)]; - char arg3_l_[PADL_(int)]; int arg3; char arg3_r_[PADR_(int)]; - char arg4_l_[PADL_(int)]; int arg4; char arg4_r_[PADR_(int)]; - char arg5_l_[PADL_(int)]; int arg5; char arg5_r_[PADR_(int)]; -}; -struct stat_args { - char path_l_[PADL_(user_addr_t)]; user_addr_t path; char path_r_[PADR_(user_addr_t)]; - char ub_l_[PADL_(user_addr_t)]; user_addr_t ub; char ub_r_[PADR_(user_addr_t)]; -}; -struct fstat_args { - char fd_l_[PADL_(int)]; int fd; char fd_r_[PADR_(int)]; - char ub_l_[PADL_(user_addr_t)]; user_addr_t ub; char ub_r_[PADR_(user_addr_t)]; -}; -struct lstat_args { - char path_l_[PADL_(user_addr_t)]; user_addr_t path; char path_r_[PADR_(user_addr_t)]; - char ub_l_[PADL_(user_addr_t)]; user_addr_t ub; char ub_r_[PADR_(user_addr_t)]; -}; -struct pathconf_args { - char path_l_[PADL_(user_addr_t)]; user_addr_t path; char path_r_[PADR_(user_addr_t)]; - char name_l_[PADL_(int)]; int name; char name_r_[PADR_(int)]; -}; -struct fpathconf_args { - char fd_l_[PADL_(int)]; int fd; char fd_r_[PADR_(int)]; - char name_l_[PADL_(int)]; int name; char name_r_[PADR_(int)]; -}; -struct getrlimit_args { - char which_l_[PADL_(u_int)]; u_int which; char which_r_[PADR_(u_int)]; - char rlp_l_[PADL_(user_addr_t)]; user_addr_t rlp; char rlp_r_[PADR_(user_addr_t)]; -}; -struct setrlimit_args { - char which_l_[PADL_(u_int)]; u_int which; char which_r_[PADR_(u_int)]; - char rlp_l_[PADL_(user_addr_t)]; user_addr_t rlp; char rlp_r_[PADR_(user_addr_t)]; -}; -struct getdirentries_args { - char fd_l_[PADL_(int)]; int fd; char fd_r_[PADR_(int)]; - char buf_l_[PADL_(user_addr_t)]; user_addr_t buf; char buf_r_[PADR_(user_addr_t)]; - char count_l_[PADL_(u_int)]; u_int count; char count_r_[PADR_(u_int)]; - char basep_l_[PADL_(user_addr_t)]; user_addr_t basep; char basep_r_[PADR_(user_addr_t)]; -}; -struct mmap_args { - char addr_l_[PADL_(user_addr_t)]; user_addr_t addr; char addr_r_[PADR_(user_addr_t)]; - char len_l_[PADL_(user_size_t)]; user_size_t len; char len_r_[PADR_(user_size_t)]; - char prot_l_[PADL_(int)]; int prot; char prot_r_[PADR_(int)]; - char flags_l_[PADL_(int)]; int flags; char flags_r_[PADR_(int)]; - char fd_l_[PADL_(int)]; int fd; char fd_r_[PADR_(int)]; - char pos_l_[PADL_(off_t)]; off_t pos; char pos_r_[PADR_(off_t)]; -}; -struct lseek_args { - char fd_l_[PADL_(int)]; int fd; char fd_r_[PADR_(int)]; - char offset_l_[PADL_(off_t)]; off_t offset; char offset_r_[PADR_(off_t)]; - char whence_l_[PADL_(int)]; int whence; char whence_r_[PADR_(int)]; -}; -struct truncate_args { - char path_l_[PADL_(user_addr_t)]; user_addr_t path; char path_r_[PADR_(user_addr_t)]; - char length_l_[PADL_(off_t)]; off_t length; char length_r_[PADR_(off_t)]; -}; -struct ftruncate_args { - char fd_l_[PADL_(int)]; int fd; char fd_r_[PADR_(int)]; - char length_l_[PADL_(off_t)]; off_t length; char length_r_[PADR_(off_t)]; -}; -struct __sysctl_args { - char name_l_[PADL_(user_addr_t)]; user_addr_t name; char name_r_[PADR_(user_addr_t)]; - char namelen_l_[PADL_(u_int)]; u_int namelen; char namelen_r_[PADR_(u_int)]; - char old_l_[PADL_(user_addr_t)]; user_addr_t old; char old_r_[PADR_(user_addr_t)]; - char oldlenp_l_[PADL_(user_addr_t)]; user_addr_t oldlenp; char oldlenp_r_[PADR_(user_addr_t)]; - char new_l_[PADL_(user_addr_t)]; user_addr_t new; char new_r_[PADR_(user_addr_t)]; - char newlen_l_[PADL_(user_size_t)]; user_size_t newlen; char newlen_r_[PADR_(user_size_t)]; -}; -struct mlock_args { - char addr_l_[PADL_(user_addr_t)]; user_addr_t addr; char addr_r_[PADR_(user_addr_t)]; - char len_l_[PADL_(user_size_t)]; user_size_t len; char len_r_[PADR_(user_size_t)]; -}; -struct munlock_args { - char addr_l_[PADL_(user_addr_t)]; user_addr_t addr; char addr_r_[PADR_(user_addr_t)]; - char len_l_[PADL_(user_size_t)]; user_size_t len; char len_r_[PADR_(user_size_t)]; -}; -struct undelete_args { - char path_l_[PADL_(user_addr_t)]; user_addr_t path; char path_r_[PADR_(user_addr_t)]; -}; -#if NETAT -struct ATsocket_args { - char proto_l_[PADL_(int)]; int proto; char proto_r_[PADR_(int)]; -}; -struct ATgetmsg_args { - char fd_l_[PADL_(int)]; int fd; char fd_r_[PADR_(int)]; - char ctlptr_l_[PADL_(void *)]; void * ctlptr; char ctlptr_r_[PADR_(void *)]; - char datptr_l_[PADL_(void *)]; void * datptr; char datptr_r_[PADR_(void *)]; - char flags_l_[PADL_(int *)]; int * flags; char flags_r_[PADR_(int *)]; -}; -struct ATputmsg_args { - char fd_l_[PADL_(int)]; int fd; char fd_r_[PADR_(int)]; - char ctlptr_l_[PADL_(void *)]; void * ctlptr; char ctlptr_r_[PADR_(void *)]; - char datptr_l_[PADL_(void *)]; void * datptr; char datptr_r_[PADR_(void *)]; - char flags_l_[PADL_(int)]; int flags; char flags_r_[PADR_(int)]; -}; -struct ATPsndreq_args { - char fd_l_[PADL_(int)]; int fd; char fd_r_[PADR_(int)]; - char buf_l_[PADL_(unsigned char *)]; unsigned char * buf; char buf_r_[PADR_(unsigned char *)]; - char len_l_[PADL_(int)]; int len; char len_r_[PADR_(int)]; - char nowait_l_[PADL_(int)]; int nowait; char nowait_r_[PADR_(int)]; -}; -struct ATPsndrsp_args { - char fd_l_[PADL_(int)]; int fd; char fd_r_[PADR_(int)]; - char respbuff_l_[PADL_(unsigned char *)]; unsigned char * respbuff; char respbuff_r_[PADR_(unsigned char *)]; - char resplen_l_[PADL_(int)]; int resplen; char resplen_r_[PADR_(int)]; - char datalen_l_[PADL_(int)]; int datalen; char datalen_r_[PADR_(int)]; -}; -struct ATPgetreq_args { - char fd_l_[PADL_(int)]; int fd; char fd_r_[PADR_(int)]; - char buf_l_[PADL_(unsigned char *)]; unsigned char * buf; char buf_r_[PADR_(unsigned char *)]; - char buflen_l_[PADL_(int)]; int buflen; char buflen_r_[PADR_(int)]; -}; -struct ATPgetrsp_args { - char fd_l_[PADL_(int)]; int fd; char fd_r_[PADR_(int)]; - char bdsp_l_[PADL_(unsigned char *)]; unsigned char * bdsp; char bdsp_r_[PADR_(unsigned char *)]; -}; -#else -#endif /* NETAT */ -struct kqueue_from_portset_np_args { - char portset_l_[PADL_(int)]; int portset; char portset_r_[PADR_(int)]; -}; -struct kqueue_portset_np_args { - char fd_l_[PADL_(int)]; int fd; char fd_r_[PADR_(int)]; -}; -struct getattrlist_args { - char path_l_[PADL_(user_addr_t)]; user_addr_t path; char path_r_[PADR_(user_addr_t)]; - char alist_l_[PADL_(user_addr_t)]; user_addr_t alist; char alist_r_[PADR_(user_addr_t)]; - char attributeBuffer_l_[PADL_(user_addr_t)]; user_addr_t attributeBuffer; char attributeBuffer_r_[PADR_(user_addr_t)]; - char bufferSize_l_[PADL_(user_size_t)]; user_size_t bufferSize; char bufferSize_r_[PADR_(user_size_t)]; - char options_l_[PADL_(user_ulong_t)]; user_ulong_t options; char options_r_[PADR_(user_ulong_t)]; -}; -struct setattrlist_args { - char path_l_[PADL_(user_addr_t)]; user_addr_t path; char path_r_[PADR_(user_addr_t)]; - char alist_l_[PADL_(user_addr_t)]; user_addr_t alist; char alist_r_[PADR_(user_addr_t)]; - char attributeBuffer_l_[PADL_(user_addr_t)]; user_addr_t attributeBuffer; char attributeBuffer_r_[PADR_(user_addr_t)]; - char bufferSize_l_[PADL_(user_size_t)]; user_size_t bufferSize; char bufferSize_r_[PADR_(user_size_t)]; - char options_l_[PADL_(user_ulong_t)]; user_ulong_t options; char options_r_[PADR_(user_ulong_t)]; -}; -struct getdirentriesattr_args { - char fd_l_[PADL_(int)]; int fd; char fd_r_[PADR_(int)]; - char alist_l_[PADL_(user_addr_t)]; user_addr_t alist; char alist_r_[PADR_(user_addr_t)]; - char buffer_l_[PADL_(user_addr_t)]; user_addr_t buffer; char buffer_r_[PADR_(user_addr_t)]; - char buffersize_l_[PADL_(user_size_t)]; user_size_t buffersize; char buffersize_r_[PADR_(user_size_t)]; - char count_l_[PADL_(user_addr_t)]; user_addr_t count; char count_r_[PADR_(user_addr_t)]; - char basep_l_[PADL_(user_addr_t)]; user_addr_t basep; char basep_r_[PADR_(user_addr_t)]; - char newstate_l_[PADL_(user_addr_t)]; user_addr_t newstate; char newstate_r_[PADR_(user_addr_t)]; - char options_l_[PADL_(user_ulong_t)]; user_ulong_t options; char options_r_[PADR_(user_ulong_t)]; -}; -struct exchangedata_args { - char path1_l_[PADL_(user_addr_t)]; user_addr_t path1; char path1_r_[PADR_(user_addr_t)]; - char path2_l_[PADL_(user_addr_t)]; user_addr_t path2; char path2_r_[PADR_(user_addr_t)]; - char options_l_[PADL_(user_ulong_t)]; user_ulong_t options; char options_r_[PADR_(user_ulong_t)]; -}; -struct searchfs_args { - char path_l_[PADL_(user_addr_t)]; user_addr_t path; char path_r_[PADR_(user_addr_t)]; - char searchblock_l_[PADL_(user_addr_t)]; user_addr_t searchblock; char searchblock_r_[PADR_(user_addr_t)]; - char nummatches_l_[PADL_(user_addr_t)]; user_addr_t nummatches; char nummatches_r_[PADR_(user_addr_t)]; - char scriptcode_l_[PADL_(user_ulong_t)]; user_ulong_t scriptcode; char scriptcode_r_[PADR_(user_ulong_t)]; - char options_l_[PADL_(user_ulong_t)]; user_ulong_t options; char options_r_[PADR_(user_ulong_t)]; - char state_l_[PADL_(user_addr_t)]; user_addr_t state; char state_r_[PADR_(user_addr_t)]; -}; -struct delete_args { - char path_l_[PADL_(user_addr_t)]; user_addr_t path; char path_r_[PADR_(user_addr_t)]; -}; -struct copyfile_args { - char from_l_[PADL_(user_addr_t)]; user_addr_t from; char from_r_[PADR_(user_addr_t)]; - char to_l_[PADL_(user_addr_t)]; user_addr_t to; char to_r_[PADR_(user_addr_t)]; - char mode_l_[PADL_(int)]; int mode; char mode_r_[PADR_(int)]; - char flags_l_[PADL_(int)]; int flags; char flags_r_[PADR_(int)]; -}; -struct poll_args { - char fds_l_[PADL_(user_addr_t)]; user_addr_t fds; char fds_r_[PADR_(user_addr_t)]; - char nfds_l_[PADL_(u_int)]; u_int nfds; char nfds_r_[PADR_(u_int)]; - char timeout_l_[PADL_(int)]; int timeout; char timeout_r_[PADR_(int)]; -}; -struct watchevent_args { - char u_req_l_[PADL_(user_addr_t)]; user_addr_t u_req; char u_req_r_[PADR_(user_addr_t)]; - char u_eventmask_l_[PADL_(int)]; int u_eventmask; char u_eventmask_r_[PADR_(int)]; -}; -struct waitevent_args { - char u_req_l_[PADL_(user_addr_t)]; user_addr_t u_req; char u_req_r_[PADR_(user_addr_t)]; - char tv_l_[PADL_(user_addr_t)]; user_addr_t tv; char tv_r_[PADR_(user_addr_t)]; -}; -struct modwatch_args { - char u_req_l_[PADL_(user_addr_t)]; user_addr_t u_req; char u_req_r_[PADR_(user_addr_t)]; - char u_eventmask_l_[PADL_(int)]; int u_eventmask; char u_eventmask_r_[PADR_(int)]; -}; -struct getxattr_args { - char path_l_[PADL_(user_addr_t)]; user_addr_t path; char path_r_[PADR_(user_addr_t)]; - char attrname_l_[PADL_(user_addr_t)]; user_addr_t attrname; char attrname_r_[PADR_(user_addr_t)]; - char value_l_[PADL_(user_addr_t)]; user_addr_t value; char value_r_[PADR_(user_addr_t)]; - char size_l_[PADL_(user_size_t)]; user_size_t size; char size_r_[PADR_(user_size_t)]; - char position_l_[PADL_(uint32_t)]; uint32_t position; char position_r_[PADR_(uint32_t)]; - char options_l_[PADL_(int)]; int options; char options_r_[PADR_(int)]; -}; -struct fgetxattr_args { - char fd_l_[PADL_(int)]; int fd; char fd_r_[PADR_(int)]; - char attrname_l_[PADL_(user_addr_t)]; user_addr_t attrname; char attrname_r_[PADR_(user_addr_t)]; - char value_l_[PADL_(user_addr_t)]; user_addr_t value; char value_r_[PADR_(user_addr_t)]; - char size_l_[PADL_(user_size_t)]; user_size_t size; char size_r_[PADR_(user_size_t)]; - char position_l_[PADL_(uint32_t)]; uint32_t position; char position_r_[PADR_(uint32_t)]; - char options_l_[PADL_(int)]; int options; char options_r_[PADR_(int)]; -}; -struct setxattr_args { - char path_l_[PADL_(user_addr_t)]; user_addr_t path; char path_r_[PADR_(user_addr_t)]; - char attrname_l_[PADL_(user_addr_t)]; user_addr_t attrname; char attrname_r_[PADR_(user_addr_t)]; - char value_l_[PADL_(user_addr_t)]; user_addr_t value; char value_r_[PADR_(user_addr_t)]; - char size_l_[PADL_(user_size_t)]; user_size_t size; char size_r_[PADR_(user_size_t)]; - char position_l_[PADL_(uint32_t)]; uint32_t position; char position_r_[PADR_(uint32_t)]; - char options_l_[PADL_(int)]; int options; char options_r_[PADR_(int)]; -}; -struct fsetxattr_args { - char fd_l_[PADL_(int)]; int fd; char fd_r_[PADR_(int)]; - char attrname_l_[PADL_(user_addr_t)]; user_addr_t attrname; char attrname_r_[PADR_(user_addr_t)]; - char value_l_[PADL_(user_addr_t)]; user_addr_t value; char value_r_[PADR_(user_addr_t)]; - char size_l_[PADL_(user_size_t)]; user_size_t size; char size_r_[PADR_(user_size_t)]; - char position_l_[PADL_(uint32_t)]; uint32_t position; char position_r_[PADR_(uint32_t)]; - char options_l_[PADL_(int)]; int options; char options_r_[PADR_(int)]; -}; -struct removexattr_args { - char path_l_[PADL_(user_addr_t)]; user_addr_t path; char path_r_[PADR_(user_addr_t)]; - char attrname_l_[PADL_(user_addr_t)]; user_addr_t attrname; char attrname_r_[PADR_(user_addr_t)]; - char options_l_[PADL_(int)]; int options; char options_r_[PADR_(int)]; -}; -struct fremovexattr_args { - char fd_l_[PADL_(int)]; int fd; char fd_r_[PADR_(int)]; - char attrname_l_[PADL_(user_addr_t)]; user_addr_t attrname; char attrname_r_[PADR_(user_addr_t)]; - char options_l_[PADL_(int)]; int options; char options_r_[PADR_(int)]; -}; -struct listxattr_args { - char path_l_[PADL_(user_addr_t)]; user_addr_t path; char path_r_[PADR_(user_addr_t)]; - char namebuf_l_[PADL_(user_addr_t)]; user_addr_t namebuf; char namebuf_r_[PADR_(user_addr_t)]; - char bufsize_l_[PADL_(user_size_t)]; user_size_t bufsize; char bufsize_r_[PADR_(user_size_t)]; - char options_l_[PADL_(int)]; int options; char options_r_[PADR_(int)]; -}; -struct flistxattr_args { - char fd_l_[PADL_(int)]; int fd; char fd_r_[PADR_(int)]; - char namebuf_l_[PADL_(user_addr_t)]; user_addr_t namebuf; char namebuf_r_[PADR_(user_addr_t)]; - char bufsize_l_[PADL_(user_size_t)]; user_size_t bufsize; char bufsize_r_[PADR_(user_size_t)]; - char options_l_[PADL_(int)]; int options; char options_r_[PADR_(int)]; -}; -struct fsctl_args { - char path_l_[PADL_(user_addr_t)]; user_addr_t path; char path_r_[PADR_(user_addr_t)]; - char cmd_l_[PADL_(user_ulong_t)]; user_ulong_t cmd; char cmd_r_[PADR_(user_ulong_t)]; - char data_l_[PADL_(user_addr_t)]; user_addr_t data; char data_r_[PADR_(user_addr_t)]; - char options_l_[PADL_(user_ulong_t)]; user_ulong_t options; char options_r_[PADR_(user_ulong_t)]; -}; -struct initgroups_args { - char gidsetsize_l_[PADL_(u_int)]; u_int gidsetsize; char gidsetsize_r_[PADR_(u_int)]; - char gidset_l_[PADL_(user_addr_t)]; user_addr_t gidset; char gidset_r_[PADR_(user_addr_t)]; - char gmuid_l_[PADL_(int)]; int gmuid; char gmuid_r_[PADR_(int)]; -}; -struct posix_spawn_args { - char pid_l_[PADL_(user_addr_t)]; user_addr_t pid; char pid_r_[PADR_(user_addr_t)]; - char path_l_[PADL_(user_addr_t)]; user_addr_t path; char path_r_[PADR_(user_addr_t)]; - char adesc_l_[PADL_(user_addr_t)]; user_addr_t adesc; char adesc_r_[PADR_(user_addr_t)]; - char argv_l_[PADL_(user_addr_t)]; user_addr_t argv; char argv_r_[PADR_(user_addr_t)]; - char envp_l_[PADL_(user_addr_t)]; user_addr_t envp; char envp_r_[PADR_(user_addr_t)]; -}; -#if NFSCLIENT -struct nfsclnt_args { - char flag_l_[PADL_(int)]; int flag; char flag_r_[PADR_(int)]; - char argp_l_[PADL_(user_addr_t)]; user_addr_t argp; char argp_r_[PADR_(user_addr_t)]; -}; -#else -#endif -#if NFSSERVER -struct fhopen_args { - char u_fhp_l_[PADL_(user_addr_t)]; user_addr_t u_fhp; char u_fhp_r_[PADR_(user_addr_t)]; - char flags_l_[PADL_(int)]; int flags; char flags_r_[PADR_(int)]; -}; -#else -#endif -struct minherit_args { - char addr_l_[PADL_(user_addr_t)]; user_addr_t addr; char addr_r_[PADR_(user_addr_t)]; - char len_l_[PADL_(user_size_t)]; user_size_t len; char len_r_[PADR_(user_size_t)]; - char inherit_l_[PADL_(int)]; int inherit; char inherit_r_[PADR_(int)]; -}; -#if SYSV_SEM -struct semsys_args { - char which_l_[PADL_(u_int)]; u_int which; char which_r_[PADR_(u_int)]; - char a2_l_[PADL_(int)]; int a2; char a2_r_[PADR_(int)]; - char a3_l_[PADL_(int)]; int a3; char a3_r_[PADR_(int)]; - char a4_l_[PADL_(int)]; int a4; char a4_r_[PADR_(int)]; - char a5_l_[PADL_(int)]; int a5; char a5_r_[PADR_(int)]; -}; -#else -#endif -#if SYSV_MSG -struct msgsys_args { - char which_l_[PADL_(u_int)]; u_int which; char which_r_[PADR_(u_int)]; - char a2_l_[PADL_(int)]; int a2; char a2_r_[PADR_(int)]; - char a3_l_[PADL_(int)]; int a3; char a3_r_[PADR_(int)]; - char a4_l_[PADL_(int)]; int a4; char a4_r_[PADR_(int)]; - char a5_l_[PADL_(int)]; int a5; char a5_r_[PADR_(int)]; -}; -#else -#endif -#if SYSV_SHM -struct shmsys_args { - char which_l_[PADL_(u_int)]; u_int which; char which_r_[PADR_(u_int)]; - char a2_l_[PADL_(int)]; int a2; char a2_r_[PADR_(int)]; - char a3_l_[PADL_(int)]; int a3; char a3_r_[PADR_(int)]; - char a4_l_[PADL_(int)]; int a4; char a4_r_[PADR_(int)]; -}; -#else -#endif -#if SYSV_SEM -struct semctl_args { - char semid_l_[PADL_(int)]; int semid; char semid_r_[PADR_(int)]; - char semnum_l_[PADL_(int)]; int semnum; char semnum_r_[PADR_(int)]; - char cmd_l_[PADL_(int)]; int cmd; char cmd_r_[PADR_(int)]; - char arg_l_[PADL_(user_addr_t)]; user_addr_t arg; char arg_r_[PADR_(user_addr_t)]; -}; -struct semget_args { - char key_l_[PADL_(key_t)]; key_t key; char key_r_[PADR_(key_t)]; - char nsems_l_[PADL_(int)]; int nsems; char nsems_r_[PADR_(int)]; - char semflg_l_[PADL_(int)]; int semflg; char semflg_r_[PADR_(int)]; -}; -struct semop_args { - char semid_l_[PADL_(int)]; int semid; char semid_r_[PADR_(int)]; - char sops_l_[PADL_(user_addr_t)]; user_addr_t sops; char sops_r_[PADR_(user_addr_t)]; - char nsops_l_[PADL_(int)]; int nsops; char nsops_r_[PADR_(int)]; -}; -#else -#endif -#if SYSV_MSG -struct msgctl_args { - char msqid_l_[PADL_(int)]; int msqid; char msqid_r_[PADR_(int)]; - char cmd_l_[PADL_(int)]; int cmd; char cmd_r_[PADR_(int)]; - char buf_l_[PADL_(user_addr_t)]; user_addr_t buf; char buf_r_[PADR_(user_addr_t)]; -}; -struct msgget_args { - char key_l_[PADL_(key_t)]; key_t key; char key_r_[PADR_(key_t)]; - char msgflg_l_[PADL_(int)]; int msgflg; char msgflg_r_[PADR_(int)]; -}; -struct msgsnd_args { - char msqid_l_[PADL_(int)]; int msqid; char msqid_r_[PADR_(int)]; - char msgp_l_[PADL_(user_addr_t)]; user_addr_t msgp; char msgp_r_[PADR_(user_addr_t)]; - char msgsz_l_[PADL_(user_size_t)]; user_size_t msgsz; char msgsz_r_[PADR_(user_size_t)]; - char msgflg_l_[PADL_(int)]; int msgflg; char msgflg_r_[PADR_(int)]; -}; -struct msgrcv_args { - char msqid_l_[PADL_(int)]; int msqid; char msqid_r_[PADR_(int)]; - char msgp_l_[PADL_(user_addr_t)]; user_addr_t msgp; char msgp_r_[PADR_(user_addr_t)]; - char msgsz_l_[PADL_(user_size_t)]; user_size_t msgsz; char msgsz_r_[PADR_(user_size_t)]; - char msgtyp_l_[PADL_(user_long_t)]; user_long_t msgtyp; char msgtyp_r_[PADR_(user_long_t)]; - char msgflg_l_[PADL_(int)]; int msgflg; char msgflg_r_[PADR_(int)]; -}; -#else -#endif -#if SYSV_SHM -struct shmat_args { - char shmid_l_[PADL_(int)]; int shmid; char shmid_r_[PADR_(int)]; - char shmaddr_l_[PADL_(user_addr_t)]; user_addr_t shmaddr; char shmaddr_r_[PADR_(user_addr_t)]; - char shmflg_l_[PADL_(int)]; int shmflg; char shmflg_r_[PADR_(int)]; -}; -struct shmctl_args { - char shmid_l_[PADL_(int)]; int shmid; char shmid_r_[PADR_(int)]; - char cmd_l_[PADL_(int)]; int cmd; char cmd_r_[PADR_(int)]; - char buf_l_[PADL_(user_addr_t)]; user_addr_t buf; char buf_r_[PADR_(user_addr_t)]; -}; -struct shmdt_args { - char shmaddr_l_[PADL_(user_addr_t)]; user_addr_t shmaddr; char shmaddr_r_[PADR_(user_addr_t)]; -}; -struct shmget_args { - char key_l_[PADL_(key_t)]; key_t key; char key_r_[PADR_(key_t)]; - char size_l_[PADL_(user_size_t)]; user_size_t size; char size_r_[PADR_(user_size_t)]; - char shmflg_l_[PADL_(int)]; int shmflg; char shmflg_r_[PADR_(int)]; -}; -#else -#endif -struct shm_open_args { - char name_l_[PADL_(user_addr_t)]; user_addr_t name; char name_r_[PADR_(user_addr_t)]; - char oflag_l_[PADL_(int)]; int oflag; char oflag_r_[PADR_(int)]; - char mode_l_[PADL_(int)]; int mode; char mode_r_[PADR_(int)]; -}; -struct shm_unlink_args { - char name_l_[PADL_(user_addr_t)]; user_addr_t name; char name_r_[PADR_(user_addr_t)]; -}; -struct sem_open_args { - char name_l_[PADL_(user_addr_t)]; user_addr_t name; char name_r_[PADR_(user_addr_t)]; - char oflag_l_[PADL_(int)]; int oflag; char oflag_r_[PADR_(int)]; - char mode_l_[PADL_(int)]; int mode; char mode_r_[PADR_(int)]; - char value_l_[PADL_(int)]; int value; char value_r_[PADR_(int)]; -}; -struct sem_close_args { - char sem_l_[PADL_(user_addr_t)]; user_addr_t sem; char sem_r_[PADR_(user_addr_t)]; -}; -struct sem_unlink_args { - char name_l_[PADL_(user_addr_t)]; user_addr_t name; char name_r_[PADR_(user_addr_t)]; -}; -struct sem_wait_args { - char sem_l_[PADL_(user_addr_t)]; user_addr_t sem; char sem_r_[PADR_(user_addr_t)]; -}; -struct sem_trywait_args { - char sem_l_[PADL_(user_addr_t)]; user_addr_t sem; char sem_r_[PADR_(user_addr_t)]; -}; -struct sem_post_args { - char sem_l_[PADL_(user_addr_t)]; user_addr_t sem; char sem_r_[PADR_(user_addr_t)]; -}; -struct sem_getvalue_args { - char sem_l_[PADL_(user_addr_t)]; user_addr_t sem; char sem_r_[PADR_(user_addr_t)]; - char sval_l_[PADL_(user_addr_t)]; user_addr_t sval; char sval_r_[PADR_(user_addr_t)]; -}; -struct sem_init_args { - char sem_l_[PADL_(user_addr_t)]; user_addr_t sem; char sem_r_[PADR_(user_addr_t)]; - char phsared_l_[PADL_(int)]; int phsared; char phsared_r_[PADR_(int)]; - char value_l_[PADL_(u_int)]; u_int value; char value_r_[PADR_(u_int)]; -}; -struct sem_destroy_args { - char sem_l_[PADL_(user_addr_t)]; user_addr_t sem; char sem_r_[PADR_(user_addr_t)]; -}; -struct open_extended_args { - char path_l_[PADL_(user_addr_t)]; user_addr_t path; char path_r_[PADR_(user_addr_t)]; - char flags_l_[PADL_(int)]; int flags; char flags_r_[PADR_(int)]; - char uid_l_[PADL_(uid_t)]; uid_t uid; char uid_r_[PADR_(uid_t)]; - char gid_l_[PADL_(gid_t)]; gid_t gid; char gid_r_[PADR_(gid_t)]; - char mode_l_[PADL_(int)]; int mode; char mode_r_[PADR_(int)]; - char xsecurity_l_[PADL_(user_addr_t)]; user_addr_t xsecurity; char xsecurity_r_[PADR_(user_addr_t)]; -}; -struct umask_extended_args { - char newmask_l_[PADL_(int)]; int newmask; char newmask_r_[PADR_(int)]; - char xsecurity_l_[PADL_(user_addr_t)]; user_addr_t xsecurity; char xsecurity_r_[PADR_(user_addr_t)]; -}; -struct stat_extended_args { - char path_l_[PADL_(user_addr_t)]; user_addr_t path; char path_r_[PADR_(user_addr_t)]; - char ub_l_[PADL_(user_addr_t)]; user_addr_t ub; char ub_r_[PADR_(user_addr_t)]; - char xsecurity_l_[PADL_(user_addr_t)]; user_addr_t xsecurity; char xsecurity_r_[PADR_(user_addr_t)]; - char xsecurity_size_l_[PADL_(user_addr_t)]; user_addr_t xsecurity_size; char xsecurity_size_r_[PADR_(user_addr_t)]; -}; -struct lstat_extended_args { - char path_l_[PADL_(user_addr_t)]; user_addr_t path; char path_r_[PADR_(user_addr_t)]; - char ub_l_[PADL_(user_addr_t)]; user_addr_t ub; char ub_r_[PADR_(user_addr_t)]; - char xsecurity_l_[PADL_(user_addr_t)]; user_addr_t xsecurity; char xsecurity_r_[PADR_(user_addr_t)]; - char xsecurity_size_l_[PADL_(user_addr_t)]; user_addr_t xsecurity_size; char xsecurity_size_r_[PADR_(user_addr_t)]; -}; -struct fstat_extended_args { - char fd_l_[PADL_(int)]; int fd; char fd_r_[PADR_(int)]; - char ub_l_[PADL_(user_addr_t)]; user_addr_t ub; char ub_r_[PADR_(user_addr_t)]; - char xsecurity_l_[PADL_(user_addr_t)]; user_addr_t xsecurity; char xsecurity_r_[PADR_(user_addr_t)]; - char xsecurity_size_l_[PADL_(user_addr_t)]; user_addr_t xsecurity_size; char xsecurity_size_r_[PADR_(user_addr_t)]; -}; -struct chmod_extended_args { - char path_l_[PADL_(user_addr_t)]; user_addr_t path; char path_r_[PADR_(user_addr_t)]; - char uid_l_[PADL_(uid_t)]; uid_t uid; char uid_r_[PADR_(uid_t)]; - char gid_l_[PADL_(gid_t)]; gid_t gid; char gid_r_[PADR_(gid_t)]; - char mode_l_[PADL_(int)]; int mode; char mode_r_[PADR_(int)]; - char xsecurity_l_[PADL_(user_addr_t)]; user_addr_t xsecurity; char xsecurity_r_[PADR_(user_addr_t)]; -}; -struct fchmod_extended_args { - char fd_l_[PADL_(int)]; int fd; char fd_r_[PADR_(int)]; - char uid_l_[PADL_(uid_t)]; uid_t uid; char uid_r_[PADR_(uid_t)]; - char gid_l_[PADL_(gid_t)]; gid_t gid; char gid_r_[PADR_(gid_t)]; - char mode_l_[PADL_(int)]; int mode; char mode_r_[PADR_(int)]; - char xsecurity_l_[PADL_(user_addr_t)]; user_addr_t xsecurity; char xsecurity_r_[PADR_(user_addr_t)]; -}; -struct access_extended_args { - char entries_l_[PADL_(user_addr_t)]; user_addr_t entries; char entries_r_[PADR_(user_addr_t)]; - char size_l_[PADL_(user_size_t)]; user_size_t size; char size_r_[PADR_(user_size_t)]; - char results_l_[PADL_(user_addr_t)]; user_addr_t results; char results_r_[PADR_(user_addr_t)]; - char uid_l_[PADL_(uid_t)]; uid_t uid; char uid_r_[PADR_(uid_t)]; -}; -struct settid_args { - char uid_l_[PADL_(uid_t)]; uid_t uid; char uid_r_[PADR_(uid_t)]; - char gid_l_[PADL_(gid_t)]; gid_t gid; char gid_r_[PADR_(gid_t)]; -}; -struct gettid_args { - char uidp_l_[PADL_(user_addr_t)]; user_addr_t uidp; char uidp_r_[PADR_(user_addr_t)]; - char gidp_l_[PADL_(user_addr_t)]; user_addr_t gidp; char gidp_r_[PADR_(user_addr_t)]; -}; -struct setsgroups_args { - char setlen_l_[PADL_(int)]; int setlen; char setlen_r_[PADR_(int)]; - char guidset_l_[PADL_(user_addr_t)]; user_addr_t guidset; char guidset_r_[PADR_(user_addr_t)]; -}; -struct getsgroups_args { - char setlen_l_[PADL_(user_addr_t)]; user_addr_t setlen; char setlen_r_[PADR_(user_addr_t)]; - char guidset_l_[PADL_(user_addr_t)]; user_addr_t guidset; char guidset_r_[PADR_(user_addr_t)]; -}; -struct setwgroups_args { - char setlen_l_[PADL_(int)]; int setlen; char setlen_r_[PADR_(int)]; - char guidset_l_[PADL_(user_addr_t)]; user_addr_t guidset; char guidset_r_[PADR_(user_addr_t)]; -}; -struct getwgroups_args { - char setlen_l_[PADL_(user_addr_t)]; user_addr_t setlen; char setlen_r_[PADR_(user_addr_t)]; - char guidset_l_[PADL_(user_addr_t)]; user_addr_t guidset; char guidset_r_[PADR_(user_addr_t)]; -}; -struct mkfifo_extended_args { - char path_l_[PADL_(user_addr_t)]; user_addr_t path; char path_r_[PADR_(user_addr_t)]; - char uid_l_[PADL_(uid_t)]; uid_t uid; char uid_r_[PADR_(uid_t)]; - char gid_l_[PADL_(gid_t)]; gid_t gid; char gid_r_[PADR_(gid_t)]; - char mode_l_[PADL_(int)]; int mode; char mode_r_[PADR_(int)]; - char xsecurity_l_[PADL_(user_addr_t)]; user_addr_t xsecurity; char xsecurity_r_[PADR_(user_addr_t)]; -}; -struct mkdir_extended_args { - char path_l_[PADL_(user_addr_t)]; user_addr_t path; char path_r_[PADR_(user_addr_t)]; - char uid_l_[PADL_(uid_t)]; uid_t uid; char uid_r_[PADR_(uid_t)]; - char gid_l_[PADL_(gid_t)]; gid_t gid; char gid_r_[PADR_(gid_t)]; - char mode_l_[PADL_(int)]; int mode; char mode_r_[PADR_(int)]; - char xsecurity_l_[PADL_(user_addr_t)]; user_addr_t xsecurity; char xsecurity_r_[PADR_(user_addr_t)]; -}; -struct identitysvc_args { - char opcode_l_[PADL_(int)]; int opcode; char opcode_r_[PADR_(int)]; - char message_l_[PADL_(user_addr_t)]; user_addr_t message; char message_r_[PADR_(user_addr_t)]; -}; -struct shared_region_check_np_args { - char start_address_l_[PADL_(user_addr_t)]; user_addr_t start_address; char start_address_r_[PADR_(user_addr_t)]; -}; -struct shared_region_map_np_args { - char fd_l_[PADL_(int)]; int fd; char fd_r_[PADR_(int)]; - char count_l_[PADL_(uint32_t)]; uint32_t count; char count_r_[PADR_(uint32_t)]; - char mappings_l_[PADL_(user_addr_t)]; user_addr_t mappings; char mappings_r_[PADR_(user_addr_t)]; -}; -struct __pthread_mutex_destroy_args { - char mutexid_l_[PADL_(int)]; int mutexid; char mutexid_r_[PADR_(int)]; -}; -struct __pthread_mutex_init_args { - char mutex_l_[PADL_(user_addr_t)]; user_addr_t mutex; char mutex_r_[PADR_(user_addr_t)]; - char attr_l_[PADL_(user_addr_t)]; user_addr_t attr; char attr_r_[PADR_(user_addr_t)]; -}; -struct __pthread_mutex_lock_args { - char mutexid_l_[PADL_(int)]; int mutexid; char mutexid_r_[PADR_(int)]; -}; -struct __pthread_mutex_trylock_args { - char mutexid_l_[PADL_(int)]; int mutexid; char mutexid_r_[PADR_(int)]; -}; -struct __pthread_mutex_unlock_args { - char mutexid_l_[PADL_(int)]; int mutexid; char mutexid_r_[PADR_(int)]; -}; -struct __pthread_cond_init_args { - char cond_l_[PADL_(user_addr_t)]; user_addr_t cond; char cond_r_[PADR_(user_addr_t)]; - char attr_l_[PADL_(user_addr_t)]; user_addr_t attr; char attr_r_[PADR_(user_addr_t)]; -}; -struct __pthread_cond_destroy_args { - char condid_l_[PADL_(int)]; int condid; char condid_r_[PADR_(int)]; -}; -struct __pthread_cond_broadcast_args { - char condid_l_[PADL_(int)]; int condid; char condid_r_[PADR_(int)]; -}; -struct __pthread_cond_signal_args { - char condid_l_[PADL_(int)]; int condid; char condid_r_[PADR_(int)]; -}; -struct getsid_args { - char pid_l_[PADL_(pid_t)]; pid_t pid; char pid_r_[PADR_(pid_t)]; -}; -struct settid_with_pid_args { - char pid_l_[PADL_(pid_t)]; pid_t pid; char pid_r_[PADR_(pid_t)]; - char assume_l_[PADL_(int)]; int assume; char assume_r_[PADR_(int)]; -}; -struct __pthread_cond_timedwait_args { - char condid_l_[PADL_(int)]; int condid; char condid_r_[PADR_(int)]; - char mutexid_l_[PADL_(int)]; int mutexid; char mutexid_r_[PADR_(int)]; - char abstime_l_[PADL_(user_addr_t)]; user_addr_t abstime; char abstime_r_[PADR_(user_addr_t)]; -}; -struct aio_fsync_args { - char op_l_[PADL_(int)]; int op; char op_r_[PADR_(int)]; - char aiocbp_l_[PADL_(user_addr_t)]; user_addr_t aiocbp; char aiocbp_r_[PADR_(user_addr_t)]; -}; -struct aio_return_args { - char aiocbp_l_[PADL_(user_addr_t)]; user_addr_t aiocbp; char aiocbp_r_[PADR_(user_addr_t)]; -}; -struct aio_suspend_args { - char aiocblist_l_[PADL_(user_addr_t)]; user_addr_t aiocblist; char aiocblist_r_[PADR_(user_addr_t)]; - char nent_l_[PADL_(int)]; int nent; char nent_r_[PADR_(int)]; - char timeoutp_l_[PADL_(user_addr_t)]; user_addr_t timeoutp; char timeoutp_r_[PADR_(user_addr_t)]; -}; -struct aio_cancel_args { - char fd_l_[PADL_(int)]; int fd; char fd_r_[PADR_(int)]; - char aiocbp_l_[PADL_(user_addr_t)]; user_addr_t aiocbp; char aiocbp_r_[PADR_(user_addr_t)]; -}; -struct aio_error_args { - char aiocbp_l_[PADL_(user_addr_t)]; user_addr_t aiocbp; char aiocbp_r_[PADR_(user_addr_t)]; -}; -struct aio_read_args { - char aiocbp_l_[PADL_(user_addr_t)]; user_addr_t aiocbp; char aiocbp_r_[PADR_(user_addr_t)]; -}; -struct aio_write_args { - char aiocbp_l_[PADL_(user_addr_t)]; user_addr_t aiocbp; char aiocbp_r_[PADR_(user_addr_t)]; -}; -struct lio_listio_args { - char mode_l_[PADL_(int)]; int mode; char mode_r_[PADR_(int)]; - char aiocblist_l_[PADL_(user_addr_t)]; user_addr_t aiocblist; char aiocblist_r_[PADR_(user_addr_t)]; - char nent_l_[PADL_(int)]; int nent; char nent_r_[PADR_(int)]; - char sigp_l_[PADL_(user_addr_t)]; user_addr_t sigp; char sigp_r_[PADR_(user_addr_t)]; -}; -struct __pthread_cond_wait_args { - char condid_l_[PADL_(int)]; int condid; char condid_r_[PADR_(int)]; - char mutexid_l_[PADL_(int)]; int mutexid; char mutexid_r_[PADR_(int)]; -}; -struct iopolicysys_args { - char cmd_l_[PADL_(int)]; int cmd; char cmd_r_[PADR_(int)]; - char arg_l_[PADL_(user_addr_t)]; user_addr_t arg; char arg_r_[PADR_(user_addr_t)]; -}; -struct mlockall_args { - char how_l_[PADL_(int)]; int how; char how_r_[PADR_(int)]; -}; -struct munlockall_args { - char how_l_[PADL_(int)]; int how; char how_r_[PADR_(int)]; -}; -struct issetugid_args { - register_t dummy; -}; -struct __pthread_kill_args { - char thread_port_l_[PADL_(int)]; int thread_port; char thread_port_r_[PADR_(int)]; - char sig_l_[PADL_(int)]; int sig; char sig_r_[PADR_(int)]; -}; -struct __pthread_sigmask_args { - char how_l_[PADL_(int)]; int how; char how_r_[PADR_(int)]; - char set_l_[PADL_(user_addr_t)]; user_addr_t set; char set_r_[PADR_(user_addr_t)]; - char oset_l_[PADL_(user_addr_t)]; user_addr_t oset; char oset_r_[PADR_(user_addr_t)]; -}; -struct __sigwait_args { - char set_l_[PADL_(user_addr_t)]; user_addr_t set; char set_r_[PADR_(user_addr_t)]; - char sig_l_[PADL_(user_addr_t)]; user_addr_t sig; char sig_r_[PADR_(user_addr_t)]; -}; -struct __disable_threadsignal_args { - char value_l_[PADL_(int)]; int value; char value_r_[PADR_(int)]; -}; -struct __pthread_markcancel_args { - char thread_port_l_[PADL_(int)]; int thread_port; char thread_port_r_[PADR_(int)]; -}; -struct __pthread_canceled_args { - char action_l_[PADL_(int)]; int action; char action_r_[PADR_(int)]; -}; -struct __semwait_signal_args { - char cond_sem_l_[PADL_(int)]; int cond_sem; char cond_sem_r_[PADR_(int)]; - char mutex_sem_l_[PADL_(int)]; int mutex_sem; char mutex_sem_r_[PADR_(int)]; - char timeout_l_[PADL_(int)]; int timeout; char timeout_r_[PADR_(int)]; - char relative_l_[PADL_(int)]; int relative; char relative_r_[PADR_(int)]; - char tv_sec_l_[PADL_(time_t)]; time_t tv_sec; char tv_sec_r_[PADR_(time_t)]; - char tv_nsec_l_[PADL_(int32_t)]; int32_t tv_nsec; char tv_nsec_r_[PADR_(int32_t)]; -}; -struct proc_info_args { - char callnum_l_[PADL_(int32_t)]; int32_t callnum; char callnum_r_[PADR_(int32_t)]; - char pid_l_[PADL_(int32_t)]; int32_t pid; char pid_r_[PADR_(int32_t)]; - char flavor_l_[PADL_(uint32_t)]; uint32_t flavor; char flavor_r_[PADR_(uint32_t)]; - char arg_l_[PADL_(uint64_t)]; uint64_t arg; char arg_r_[PADR_(uint64_t)]; - char buffer_l_[PADL_(user_addr_t)]; user_addr_t buffer; char buffer_r_[PADR_(user_addr_t)]; - char buffersize_l_[PADL_(int32_t)]; int32_t buffersize; char buffersize_r_[PADR_(int32_t)]; -}; -#if SENDFILE -struct sendfile_args { - char fd_l_[PADL_(int)]; int fd; char fd_r_[PADR_(int)]; - char s_l_[PADL_(int)]; int s; char s_r_[PADR_(int)]; - char offset_l_[PADL_(off_t)]; off_t offset; char offset_r_[PADR_(off_t)]; - char nbytes_l_[PADL_(user_addr_t)]; user_addr_t nbytes; char nbytes_r_[PADR_(user_addr_t)]; - char hdtr_l_[PADL_(user_addr_t)]; user_addr_t hdtr; char hdtr_r_[PADR_(user_addr_t)]; - char flags_l_[PADL_(int)]; int flags; char flags_r_[PADR_(int)]; -}; -#else /* !SENDFILE */ -#endif /* SENDFILE */ -struct stat64_args { - char path_l_[PADL_(user_addr_t)]; user_addr_t path; char path_r_[PADR_(user_addr_t)]; - char ub_l_[PADL_(user_addr_t)]; user_addr_t ub; char ub_r_[PADR_(user_addr_t)]; -}; -struct fstat64_args { - char fd_l_[PADL_(int)]; int fd; char fd_r_[PADR_(int)]; - char ub_l_[PADL_(user_addr_t)]; user_addr_t ub; char ub_r_[PADR_(user_addr_t)]; -}; -struct lstat64_args { - char path_l_[PADL_(user_addr_t)]; user_addr_t path; char path_r_[PADR_(user_addr_t)]; - char ub_l_[PADL_(user_addr_t)]; user_addr_t ub; char ub_r_[PADR_(user_addr_t)]; -}; -struct stat64_extended_args { - char path_l_[PADL_(user_addr_t)]; user_addr_t path; char path_r_[PADR_(user_addr_t)]; - char ub_l_[PADL_(user_addr_t)]; user_addr_t ub; char ub_r_[PADR_(user_addr_t)]; - char xsecurity_l_[PADL_(user_addr_t)]; user_addr_t xsecurity; char xsecurity_r_[PADR_(user_addr_t)]; - char xsecurity_size_l_[PADL_(user_addr_t)]; user_addr_t xsecurity_size; char xsecurity_size_r_[PADR_(user_addr_t)]; -}; -struct lstat64_extended_args { - char path_l_[PADL_(user_addr_t)]; user_addr_t path; char path_r_[PADR_(user_addr_t)]; - char ub_l_[PADL_(user_addr_t)]; user_addr_t ub; char ub_r_[PADR_(user_addr_t)]; - char xsecurity_l_[PADL_(user_addr_t)]; user_addr_t xsecurity; char xsecurity_r_[PADR_(user_addr_t)]; - char xsecurity_size_l_[PADL_(user_addr_t)]; user_addr_t xsecurity_size; char xsecurity_size_r_[PADR_(user_addr_t)]; -}; -struct fstat64_extended_args { - char fd_l_[PADL_(int)]; int fd; char fd_r_[PADR_(int)]; - char ub_l_[PADL_(user_addr_t)]; user_addr_t ub; char ub_r_[PADR_(user_addr_t)]; - char xsecurity_l_[PADL_(user_addr_t)]; user_addr_t xsecurity; char xsecurity_r_[PADR_(user_addr_t)]; - char xsecurity_size_l_[PADL_(user_addr_t)]; user_addr_t xsecurity_size; char xsecurity_size_r_[PADR_(user_addr_t)]; -}; -struct getdirentries64_args { - char fd_l_[PADL_(int)]; int fd; char fd_r_[PADR_(int)]; - char buf_l_[PADL_(user_addr_t)]; user_addr_t buf; char buf_r_[PADR_(user_addr_t)]; - char bufsize_l_[PADL_(user_size_t)]; user_size_t bufsize; char bufsize_r_[PADR_(user_size_t)]; - char position_l_[PADL_(user_addr_t)]; user_addr_t position; char position_r_[PADR_(user_addr_t)]; -}; -struct statfs64_args { - char path_l_[PADL_(user_addr_t)]; user_addr_t path; char path_r_[PADR_(user_addr_t)]; - char buf_l_[PADL_(user_addr_t)]; user_addr_t buf; char buf_r_[PADR_(user_addr_t)]; -}; -struct fstatfs64_args { - char fd_l_[PADL_(int)]; int fd; char fd_r_[PADR_(int)]; - char buf_l_[PADL_(user_addr_t)]; user_addr_t buf; char buf_r_[PADR_(user_addr_t)]; -}; -struct getfsstat64_args { - char buf_l_[PADL_(user_addr_t)]; user_addr_t buf; char buf_r_[PADR_(user_addr_t)]; - char bufsize_l_[PADL_(int)]; int bufsize; char bufsize_r_[PADR_(int)]; - char flags_l_[PADL_(int)]; int flags; char flags_r_[PADR_(int)]; -}; -struct __pthread_chdir_args { - char path_l_[PADL_(user_addr_t)]; user_addr_t path; char path_r_[PADR_(user_addr_t)]; -}; -struct __pthread_fchdir_args { - char fd_l_[PADL_(int)]; int fd; char fd_r_[PADR_(int)]; -}; -#if AUDIT -struct audit_args { - char record_l_[PADL_(user_addr_t)]; user_addr_t record; char record_r_[PADR_(user_addr_t)]; - char length_l_[PADL_(int)]; int length; char length_r_[PADR_(int)]; -}; -struct auditon_args { - char cmd_l_[PADL_(int)]; int cmd; char cmd_r_[PADR_(int)]; - char data_l_[PADL_(user_addr_t)]; user_addr_t data; char data_r_[PADR_(user_addr_t)]; - char length_l_[PADL_(int)]; int length; char length_r_[PADR_(int)]; -}; -struct getauid_args { - char auid_l_[PADL_(user_addr_t)]; user_addr_t auid; char auid_r_[PADR_(user_addr_t)]; -}; -struct setauid_args { - char auid_l_[PADL_(user_addr_t)]; user_addr_t auid; char auid_r_[PADR_(user_addr_t)]; -}; -struct getaudit_args { - char auditinfo_l_[PADL_(user_addr_t)]; user_addr_t auditinfo; char auditinfo_r_[PADR_(user_addr_t)]; -}; -struct setaudit_args { - char auditinfo_l_[PADL_(user_addr_t)]; user_addr_t auditinfo; char auditinfo_r_[PADR_(user_addr_t)]; -}; -struct getaudit_addr_args { - char auditinfo_addr_l_[PADL_(user_addr_t)]; user_addr_t auditinfo_addr; char auditinfo_addr_r_[PADR_(user_addr_t)]; - char length_l_[PADL_(int)]; int length; char length_r_[PADR_(int)]; -}; -struct setaudit_addr_args { - char auditinfo_addr_l_[PADL_(user_addr_t)]; user_addr_t auditinfo_addr; char auditinfo_addr_r_[PADR_(user_addr_t)]; - char length_l_[PADL_(int)]; int length; char length_r_[PADR_(int)]; -}; -struct auditctl_args { - char path_l_[PADL_(user_addr_t)]; user_addr_t path; char path_r_[PADR_(user_addr_t)]; -}; -#else -#endif -struct bsdthread_create_args { - char func_l_[PADL_(user_addr_t)]; user_addr_t func; char func_r_[PADR_(user_addr_t)]; - char func_arg_l_[PADL_(user_addr_t)]; user_addr_t func_arg; char func_arg_r_[PADR_(user_addr_t)]; - char stack_l_[PADL_(user_addr_t)]; user_addr_t stack; char stack_r_[PADR_(user_addr_t)]; - char pthread_l_[PADL_(user_addr_t)]; user_addr_t pthread; char pthread_r_[PADR_(user_addr_t)]; - char flags_l_[PADL_(uint32_t)]; uint32_t flags; char flags_r_[PADR_(uint32_t)]; -}; -struct bsdthread_terminate_args { - char stackaddr_l_[PADL_(user_addr_t)]; user_addr_t stackaddr; char stackaddr_r_[PADR_(user_addr_t)]; - char freesize_l_[PADL_(user_size_t)]; user_size_t freesize; char freesize_r_[PADR_(user_size_t)]; - char port_l_[PADL_(uint32_t)]; uint32_t port; char port_r_[PADR_(uint32_t)]; - char sem_l_[PADL_(uint32_t)]; uint32_t sem; char sem_r_[PADR_(uint32_t)]; -}; -struct kqueue_args { - register_t dummy; -}; -struct kevent_args { - char fd_l_[PADL_(int)]; int fd; char fd_r_[PADR_(int)]; - char changelist_l_[PADL_(user_addr_t)]; user_addr_t changelist; char changelist_r_[PADR_(user_addr_t)]; - char nchanges_l_[PADL_(int)]; int nchanges; char nchanges_r_[PADR_(int)]; - char eventlist_l_[PADL_(user_addr_t)]; user_addr_t eventlist; char eventlist_r_[PADR_(user_addr_t)]; - char nevents_l_[PADL_(int)]; int nevents; char nevents_r_[PADR_(int)]; - char timeout_l_[PADL_(user_addr_t)]; user_addr_t timeout; char timeout_r_[PADR_(user_addr_t)]; -}; -struct lchown_args { - char path_l_[PADL_(user_addr_t)]; user_addr_t path; char path_r_[PADR_(user_addr_t)]; - char owner_l_[PADL_(uid_t)]; uid_t owner; char owner_r_[PADR_(uid_t)]; - char group_l_[PADL_(gid_t)]; gid_t group; char group_r_[PADR_(gid_t)]; -}; -struct stack_snapshot_args { - char pid_l_[PADL_(pid_t)]; pid_t pid; char pid_r_[PADR_(pid_t)]; - char tracebuf_l_[PADL_(user_addr_t)]; user_addr_t tracebuf; char tracebuf_r_[PADR_(user_addr_t)]; - char tracebuf_size_l_[PADL_(uint32_t)]; uint32_t tracebuf_size; char tracebuf_size_r_[PADR_(uint32_t)]; - char options_l_[PADL_(uint32_t)]; uint32_t options; char options_r_[PADR_(uint32_t)]; -}; -struct bsdthread_register_args { - char threadstart_l_[PADL_(user_addr_t)]; user_addr_t threadstart; char threadstart_r_[PADR_(user_addr_t)]; - char wqthread_l_[PADL_(user_addr_t)]; user_addr_t wqthread; char wqthread_r_[PADR_(user_addr_t)]; - char pthsize_l_[PADL_(int)]; int pthsize; char pthsize_r_[PADR_(int)]; -}; -struct workq_open_args { - register_t dummy; -}; -struct workq_ops_args { - char options_l_[PADL_(int)]; int options; char options_r_[PADR_(int)]; - char item_l_[PADL_(user_addr_t)]; user_addr_t item; char item_r_[PADR_(user_addr_t)]; - char prio_l_[PADL_(int)]; int prio; char prio_r_[PADR_(int)]; -}; -struct __mac_execve_args { - char fname_l_[PADL_(user_addr_t)]; user_addr_t fname; char fname_r_[PADR_(user_addr_t)]; - char argp_l_[PADL_(user_addr_t)]; user_addr_t argp; char argp_r_[PADR_(user_addr_t)]; - char envp_l_[PADL_(user_addr_t)]; user_addr_t envp; char envp_r_[PADR_(user_addr_t)]; - char mac_p_l_[PADL_(user_addr_t)]; user_addr_t mac_p; char mac_p_r_[PADR_(user_addr_t)]; -}; -struct __mac_syscall_args { - char policy_l_[PADL_(user_addr_t)]; user_addr_t policy; char policy_r_[PADR_(user_addr_t)]; - char call_l_[PADL_(int)]; int call; char call_r_[PADR_(int)]; - char arg_l_[PADL_(user_addr_t)]; user_addr_t arg; char arg_r_[PADR_(user_addr_t)]; -}; -struct __mac_get_file_args { - char path_p_l_[PADL_(user_addr_t)]; user_addr_t path_p; char path_p_r_[PADR_(user_addr_t)]; - char mac_p_l_[PADL_(user_addr_t)]; user_addr_t mac_p; char mac_p_r_[PADR_(user_addr_t)]; -}; -struct __mac_set_file_args { - char path_p_l_[PADL_(user_addr_t)]; user_addr_t path_p; char path_p_r_[PADR_(user_addr_t)]; - char mac_p_l_[PADL_(user_addr_t)]; user_addr_t mac_p; char mac_p_r_[PADR_(user_addr_t)]; -}; -struct __mac_get_link_args { - char path_p_l_[PADL_(user_addr_t)]; user_addr_t path_p; char path_p_r_[PADR_(user_addr_t)]; - char mac_p_l_[PADL_(user_addr_t)]; user_addr_t mac_p; char mac_p_r_[PADR_(user_addr_t)]; -}; -struct __mac_set_link_args { - char path_p_l_[PADL_(user_addr_t)]; user_addr_t path_p; char path_p_r_[PADR_(user_addr_t)]; - char mac_p_l_[PADL_(user_addr_t)]; user_addr_t mac_p; char mac_p_r_[PADR_(user_addr_t)]; -}; -struct __mac_get_proc_args { - char mac_p_l_[PADL_(user_addr_t)]; user_addr_t mac_p; char mac_p_r_[PADR_(user_addr_t)]; -}; -struct __mac_set_proc_args { - char mac_p_l_[PADL_(user_addr_t)]; user_addr_t mac_p; char mac_p_r_[PADR_(user_addr_t)]; -}; -struct __mac_get_fd_args { - char fd_l_[PADL_(int)]; int fd; char fd_r_[PADR_(int)]; - char mac_p_l_[PADL_(user_addr_t)]; user_addr_t mac_p; char mac_p_r_[PADR_(user_addr_t)]; -}; -struct __mac_set_fd_args { - char fd_l_[PADL_(int)]; int fd; char fd_r_[PADR_(int)]; - char mac_p_l_[PADL_(user_addr_t)]; user_addr_t mac_p; char mac_p_r_[PADR_(user_addr_t)]; -}; -struct __mac_get_pid_args { - char pid_l_[PADL_(pid_t)]; pid_t pid; char pid_r_[PADR_(pid_t)]; - char mac_p_l_[PADL_(user_addr_t)]; user_addr_t mac_p; char mac_p_r_[PADR_(user_addr_t)]; -}; -struct __mac_get_lcid_args { - char lcid_l_[PADL_(pid_t)]; pid_t lcid; char lcid_r_[PADR_(pid_t)]; - char mac_p_l_[PADL_(user_addr_t)]; user_addr_t mac_p; char mac_p_r_[PADR_(user_addr_t)]; -}; -struct __mac_get_lctx_args { - char mac_p_l_[PADL_(user_addr_t)]; user_addr_t mac_p; char mac_p_r_[PADR_(user_addr_t)]; -}; -struct __mac_set_lctx_args { - char mac_p_l_[PADL_(user_addr_t)]; user_addr_t mac_p; char mac_p_r_[PADR_(user_addr_t)]; -}; -struct setlcid_args { - char pid_l_[PADL_(pid_t)]; pid_t pid; char pid_r_[PADR_(pid_t)]; - char lcid_l_[PADL_(pid_t)]; pid_t lcid; char lcid_r_[PADR_(pid_t)]; -}; -struct getlcid_args { - char pid_l_[PADL_(pid_t)]; pid_t pid; char pid_r_[PADR_(pid_t)]; -}; -struct read_nocancel_args { - char fd_l_[PADL_(int)]; int fd; char fd_r_[PADR_(int)]; - char cbuf_l_[PADL_(user_addr_t)]; user_addr_t cbuf; char cbuf_r_[PADR_(user_addr_t)]; - char nbyte_l_[PADL_(user_size_t)]; user_size_t nbyte; char nbyte_r_[PADR_(user_size_t)]; -}; -struct write_nocancel_args { - char fd_l_[PADL_(int)]; int fd; char fd_r_[PADR_(int)]; - char cbuf_l_[PADL_(user_addr_t)]; user_addr_t cbuf; char cbuf_r_[PADR_(user_addr_t)]; - char nbyte_l_[PADL_(user_size_t)]; user_size_t nbyte; char nbyte_r_[PADR_(user_size_t)]; -}; -struct open_nocancel_args { - char path_l_[PADL_(user_addr_t)]; user_addr_t path; char path_r_[PADR_(user_addr_t)]; - char flags_l_[PADL_(int)]; int flags; char flags_r_[PADR_(int)]; - char mode_l_[PADL_(int)]; int mode; char mode_r_[PADR_(int)]; -}; -struct close_nocancel_args { - char fd_l_[PADL_(int)]; int fd; char fd_r_[PADR_(int)]; -}; -struct wait4_nocancel_args { - char pid_l_[PADL_(int)]; int pid; char pid_r_[PADR_(int)]; - char status_l_[PADL_(user_addr_t)]; user_addr_t status; char status_r_[PADR_(user_addr_t)]; - char options_l_[PADL_(int)]; int options; char options_r_[PADR_(int)]; - char rusage_l_[PADL_(user_addr_t)]; user_addr_t rusage; char rusage_r_[PADR_(user_addr_t)]; -}; -#if SOCKETS -struct recvmsg_nocancel_args { - char s_l_[PADL_(int)]; int s; char s_r_[PADR_(int)]; - char msg_l_[PADL_(user_addr_t)]; user_addr_t msg; char msg_r_[PADR_(user_addr_t)]; - char flags_l_[PADL_(int)]; int flags; char flags_r_[PADR_(int)]; -}; -struct sendmsg_nocancel_args { - char s_l_[PADL_(int)]; int s; char s_r_[PADR_(int)]; - char msg_l_[PADL_(user_addr_t)]; user_addr_t msg; char msg_r_[PADR_(user_addr_t)]; - char flags_l_[PADL_(int)]; int flags; char flags_r_[PADR_(int)]; -}; -struct recvfrom_nocancel_args { - char s_l_[PADL_(int)]; int s; char s_r_[PADR_(int)]; - char buf_l_[PADL_(user_addr_t)]; user_addr_t buf; char buf_r_[PADR_(user_addr_t)]; - char len_l_[PADL_(user_size_t)]; user_size_t len; char len_r_[PADR_(user_size_t)]; - char flags_l_[PADL_(int)]; int flags; char flags_r_[PADR_(int)]; - char from_l_[PADL_(user_addr_t)]; user_addr_t from; char from_r_[PADR_(user_addr_t)]; - char fromlenaddr_l_[PADL_(user_addr_t)]; user_addr_t fromlenaddr; char fromlenaddr_r_[PADR_(user_addr_t)]; -}; -struct accept_nocancel_args { - char s_l_[PADL_(int)]; int s; char s_r_[PADR_(int)]; - char name_l_[PADL_(user_addr_t)]; user_addr_t name; char name_r_[PADR_(user_addr_t)]; - char anamelen_l_[PADL_(user_addr_t)]; user_addr_t anamelen; char anamelen_r_[PADR_(user_addr_t)]; -}; -#else -#endif /* SOCKETS */ -struct msync_nocancel_args { - char addr_l_[PADL_(user_addr_t)]; user_addr_t addr; char addr_r_[PADR_(user_addr_t)]; - char len_l_[PADL_(user_size_t)]; user_size_t len; char len_r_[PADR_(user_size_t)]; - char flags_l_[PADL_(int)]; int flags; char flags_r_[PADR_(int)]; -}; -struct fcntl_nocancel_args { - char fd_l_[PADL_(int)]; int fd; char fd_r_[PADR_(int)]; - char cmd_l_[PADL_(int)]; int cmd; char cmd_r_[PADR_(int)]; - char arg_l_[PADL_(user_long_t)]; user_long_t arg; char arg_r_[PADR_(user_long_t)]; -}; -struct select_nocancel_args { - char nd_l_[PADL_(int)]; int nd; char nd_r_[PADR_(int)]; - char in_l_[PADL_(user_addr_t)]; user_addr_t in; char in_r_[PADR_(user_addr_t)]; - char ou_l_[PADL_(user_addr_t)]; user_addr_t ou; char ou_r_[PADR_(user_addr_t)]; - char ex_l_[PADL_(user_addr_t)]; user_addr_t ex; char ex_r_[PADR_(user_addr_t)]; - char tv_l_[PADL_(user_addr_t)]; user_addr_t tv; char tv_r_[PADR_(user_addr_t)]; -}; -struct fsync_nocancel_args { - char fd_l_[PADL_(int)]; int fd; char fd_r_[PADR_(int)]; -}; -#if SOCKETS -struct connect_nocancel_args { - char s_l_[PADL_(int)]; int s; char s_r_[PADR_(int)]; - char name_l_[PADL_(user_addr_t)]; user_addr_t name; char name_r_[PADR_(user_addr_t)]; - char namelen_l_[PADL_(socklen_t)]; socklen_t namelen; char namelen_r_[PADR_(socklen_t)]; -}; -#else -#endif /* SOCKETS */ -struct sigsuspend_nocancel_args { - char mask_l_[PADL_(sigset_t)]; sigset_t mask; char mask_r_[PADR_(sigset_t)]; -}; -struct readv_nocancel_args { - char fd_l_[PADL_(int)]; int fd; char fd_r_[PADR_(int)]; - char iovp_l_[PADL_(user_addr_t)]; user_addr_t iovp; char iovp_r_[PADR_(user_addr_t)]; - char iovcnt_l_[PADL_(u_int)]; u_int iovcnt; char iovcnt_r_[PADR_(u_int)]; -}; -struct writev_nocancel_args { - char fd_l_[PADL_(int)]; int fd; char fd_r_[PADR_(int)]; - char iovp_l_[PADL_(user_addr_t)]; user_addr_t iovp; char iovp_r_[PADR_(user_addr_t)]; - char iovcnt_l_[PADL_(u_int)]; u_int iovcnt; char iovcnt_r_[PADR_(u_int)]; -}; -#if SOCKETS -struct sendto_nocancel_args { - char s_l_[PADL_(int)]; int s; char s_r_[PADR_(int)]; - char buf_l_[PADL_(user_addr_t)]; user_addr_t buf; char buf_r_[PADR_(user_addr_t)]; - char len_l_[PADL_(user_size_t)]; user_size_t len; char len_r_[PADR_(user_size_t)]; - char flags_l_[PADL_(int)]; int flags; char flags_r_[PADR_(int)]; - char to_l_[PADL_(user_addr_t)]; user_addr_t to; char to_r_[PADR_(user_addr_t)]; - char tolen_l_[PADL_(socklen_t)]; socklen_t tolen; char tolen_r_[PADR_(socklen_t)]; -}; -#else -#endif /* SOCKETS */ -struct pread_nocancel_args { - char fd_l_[PADL_(int)]; int fd; char fd_r_[PADR_(int)]; - char buf_l_[PADL_(user_addr_t)]; user_addr_t buf; char buf_r_[PADR_(user_addr_t)]; - char nbyte_l_[PADL_(user_size_t)]; user_size_t nbyte; char nbyte_r_[PADR_(user_size_t)]; - char offset_l_[PADL_(off_t)]; off_t offset; char offset_r_[PADR_(off_t)]; -}; -struct pwrite_nocancel_args { - char fd_l_[PADL_(int)]; int fd; char fd_r_[PADR_(int)]; - char buf_l_[PADL_(user_addr_t)]; user_addr_t buf; char buf_r_[PADR_(user_addr_t)]; - char nbyte_l_[PADL_(user_size_t)]; user_size_t nbyte; char nbyte_r_[PADR_(user_size_t)]; - char offset_l_[PADL_(off_t)]; off_t offset; char offset_r_[PADR_(off_t)]; -}; -struct waitid_nocancel_args { - char idtype_l_[PADL_(idtype_t)]; idtype_t idtype; char idtype_r_[PADR_(idtype_t)]; - char id_l_[PADL_(id_t)]; id_t id; char id_r_[PADR_(id_t)]; - char infop_l_[PADL_(user_addr_t)]; user_addr_t infop; char infop_r_[PADR_(user_addr_t)]; - char options_l_[PADL_(int)]; int options; char options_r_[PADR_(int)]; -}; -struct poll_nocancel_args { - char fds_l_[PADL_(user_addr_t)]; user_addr_t fds; char fds_r_[PADR_(user_addr_t)]; - char nfds_l_[PADL_(u_int)]; u_int nfds; char nfds_r_[PADR_(u_int)]; - char timeout_l_[PADL_(int)]; int timeout; char timeout_r_[PADR_(int)]; -}; -#if SYSV_MSG -struct msgsnd_nocancel_args { - char msqid_l_[PADL_(int)]; int msqid; char msqid_r_[PADR_(int)]; - char msgp_l_[PADL_(user_addr_t)]; user_addr_t msgp; char msgp_r_[PADR_(user_addr_t)]; - char msgsz_l_[PADL_(user_size_t)]; user_size_t msgsz; char msgsz_r_[PADR_(user_size_t)]; - char msgflg_l_[PADL_(int)]; int msgflg; char msgflg_r_[PADR_(int)]; -}; -struct msgrcv_nocancel_args { - char msqid_l_[PADL_(int)]; int msqid; char msqid_r_[PADR_(int)]; - char msgp_l_[PADL_(user_addr_t)]; user_addr_t msgp; char msgp_r_[PADR_(user_addr_t)]; - char msgsz_l_[PADL_(user_size_t)]; user_size_t msgsz; char msgsz_r_[PADR_(user_size_t)]; - char msgtyp_l_[PADL_(user_long_t)]; user_long_t msgtyp; char msgtyp_r_[PADR_(user_long_t)]; - char msgflg_l_[PADL_(int)]; int msgflg; char msgflg_r_[PADR_(int)]; -}; -#else -#endif -struct sem_wait_nocancel_args { - char sem_l_[PADL_(user_addr_t)]; user_addr_t sem; char sem_r_[PADR_(user_addr_t)]; -}; -struct aio_suspend_nocancel_args { - char aiocblist_l_[PADL_(user_addr_t)]; user_addr_t aiocblist; char aiocblist_r_[PADR_(user_addr_t)]; - char nent_l_[PADL_(int)]; int nent; char nent_r_[PADR_(int)]; - char timeoutp_l_[PADL_(user_addr_t)]; user_addr_t timeoutp; char timeoutp_r_[PADR_(user_addr_t)]; -}; -struct __sigwait_nocancel_args { - char set_l_[PADL_(user_addr_t)]; user_addr_t set; char set_r_[PADR_(user_addr_t)]; - char sig_l_[PADL_(user_addr_t)]; user_addr_t sig; char sig_r_[PADR_(user_addr_t)]; -}; -struct __semwait_signal_nocancel_args { - char cond_sem_l_[PADL_(int)]; int cond_sem; char cond_sem_r_[PADR_(int)]; - char mutex_sem_l_[PADL_(int)]; int mutex_sem; char mutex_sem_r_[PADR_(int)]; - char timeout_l_[PADL_(int)]; int timeout; char timeout_r_[PADR_(int)]; - char relative_l_[PADL_(int)]; int relative; char relative_r_[PADR_(int)]; - char tv_sec_l_[PADL_(time_t)]; time_t tv_sec; char tv_sec_r_[PADR_(time_t)]; - char tv_nsec_l_[PADL_(int32_t)]; int32_t tv_nsec; char tv_nsec_r_[PADR_(int32_t)]; -}; -struct __mac_mount_args { - char type_l_[PADL_(user_addr_t)]; user_addr_t type; char type_r_[PADR_(user_addr_t)]; - char path_l_[PADL_(user_addr_t)]; user_addr_t path; char path_r_[PADR_(user_addr_t)]; - char flags_l_[PADL_(int)]; int flags; char flags_r_[PADR_(int)]; - char data_l_[PADL_(user_addr_t)]; user_addr_t data; char data_r_[PADR_(user_addr_t)]; - char mac_p_l_[PADL_(user_addr_t)]; user_addr_t mac_p; char mac_p_r_[PADR_(user_addr_t)]; -}; -struct __mac_get_mount_args { - char path_l_[PADL_(user_addr_t)]; user_addr_t path; char path_r_[PADR_(user_addr_t)]; - char mac_p_l_[PADL_(user_addr_t)]; user_addr_t mac_p; char mac_p_r_[PADR_(user_addr_t)]; -}; -struct __mac_getfsstat_args { - char buf_l_[PADL_(user_addr_t)]; user_addr_t buf; char buf_r_[PADR_(user_addr_t)]; - char bufsize_l_[PADL_(int)]; int bufsize; char bufsize_r_[PADR_(int)]; - char mac_l_[PADL_(user_addr_t)]; user_addr_t mac; char mac_r_[PADR_(user_addr_t)]; - char macsize_l_[PADL_(int)]; int macsize; char macsize_r_[PADR_(int)]; - char flags_l_[PADL_(int)]; int flags; char flags_r_[PADR_(int)]; -}; -int nosys(struct proc *, struct nosys_args *, int *); -void exit(struct proc *, struct exit_args *, int *); -int fork(struct proc *, struct fork_args *, int *); -int read(struct proc *, struct read_args *, user_ssize_t *); -int write(struct proc *, struct write_args *, user_ssize_t *); -int open(struct proc *, struct open_args *, int *); -int close(struct proc *, struct close_args *, int *); -int wait4(struct proc *, struct wait4_args *, int *); -int link(struct proc *, struct link_args *, int *); -int unlink(struct proc *, struct unlink_args *, int *); -int chdir(struct proc *, struct chdir_args *, int *); -int fchdir(struct proc *, struct fchdir_args *, int *); -int mknod(struct proc *, struct mknod_args *, int *); -int chmod(struct proc *, struct chmod_args *, int *); -int chown(struct proc *, struct chown_args *, int *); -int obreak(struct proc *, struct obreak_args *, int *); -#if COMPAT_GETFSSTAT -int ogetfsstat(struct proc *, struct ogetfsstat_args *, int *); -#else -int getfsstat(struct proc *, struct getfsstat_args *, int *); -#endif -int getpid(struct proc *, struct getpid_args *, int *); -int setuid(struct proc *, struct setuid_args *, int *); -int getuid(struct proc *, struct getuid_args *, int *); -int geteuid(struct proc *, struct geteuid_args *, int *); -int ptrace(struct proc *, struct ptrace_args *, int *); -#if SOCKETS -int recvmsg(struct proc *, struct recvmsg_args *, int *); -int sendmsg(struct proc *, struct sendmsg_args *, int *); -int recvfrom(struct proc *, struct recvfrom_args *, int *); -int accept(struct proc *, struct accept_args *, int *); -int getpeername(struct proc *, struct getpeername_args *, int *); -int getsockname(struct proc *, struct getsockname_args *, int *); -#else -#endif /* SOCKETS */ -int access(struct proc *, struct access_args *, int *); -int chflags(struct proc *, struct chflags_args *, int *); -int fchflags(struct proc *, struct fchflags_args *, int *); -int sync(struct proc *, struct sync_args *, int *); -int kill(struct proc *, struct kill_args *, int *); -int getppid(struct proc *, struct getppid_args *, int *); -int dup(struct proc *, struct dup_args *, int *); -int pipe(struct proc *, struct pipe_args *, int *); -int getegid(struct proc *, struct getegid_args *, int *); -int profil(struct proc *, struct profil_args *, int *); -int sigaction(struct proc *, struct sigaction_args *, int *); -int getgid(struct proc *, struct getgid_args *, int *); -int sigprocmask(struct proc *, struct sigprocmask_args *, int *); -int getlogin(struct proc *, struct getlogin_args *, int *); -int setlogin(struct proc *, struct setlogin_args *, int *); -int acct(struct proc *, struct acct_args *, int *); -int sigpending(struct proc *, struct sigpending_args *, int *); -int sigaltstack(struct proc *, struct sigaltstack_args *, int *); -int ioctl(struct proc *, struct ioctl_args *, int *); -int reboot(struct proc *, struct reboot_args *, int *); -int revoke(struct proc *, struct revoke_args *, int *); -int symlink(struct proc *, struct symlink_args *, int *); -int readlink(struct proc *, struct readlink_args *, int *); -int execve(struct proc *, struct execve_args *, int *); -int umask(struct proc *, struct umask_args *, int *); -int chroot(struct proc *, struct chroot_args *, int *); -int msync(struct proc *, struct msync_args *, int *); -int vfork(struct proc *, struct vfork_args *, int *); -int sbrk(struct proc *, struct sbrk_args *, int *); -int sstk(struct proc *, struct sstk_args *, int *); -int ovadvise(struct proc *, struct ovadvise_args *, int *); -int munmap(struct proc *, struct munmap_args *, int *); -int mprotect(struct proc *, struct mprotect_args *, int *); -int madvise(struct proc *, struct madvise_args *, int *); -int mincore(struct proc *, struct mincore_args *, int *); -int getgroups(struct proc *, struct getgroups_args *, int *); -int setgroups(struct proc *, struct setgroups_args *, int *); -int getpgrp(struct proc *, struct getpgrp_args *, int *); -int setpgid(struct proc *, struct setpgid_args *, int *); -int setitimer(struct proc *, struct setitimer_args *, int *); -int swapon(struct proc *, struct swapon_args *, int *); -int getitimer(struct proc *, struct getitimer_args *, int *); -int getdtablesize(struct proc *, struct getdtablesize_args *, int *); -int dup2(struct proc *, struct dup2_args *, int *); -int fcntl(struct proc *, struct fcntl_args *, int *); -int select(struct proc *, struct select_args *, int *); -int fsync(struct proc *, struct fsync_args *, int *); -int setpriority(struct proc *, struct setpriority_args *, int *); -#if SOCKETS -int socket(struct proc *, struct socket_args *, int *); -int connect(struct proc *, struct connect_args *, int *); -#else -#endif /* SOCKETS */ -int getpriority(struct proc *, struct getpriority_args *, int *); -#if SOCKETS -int bind(struct proc *, struct bind_args *, int *); -int setsockopt(struct proc *, struct setsockopt_args *, int *); -int listen(struct proc *, struct listen_args *, int *); -#else -#endif /* SOCKETS */ -int sigsuspend(struct proc *, struct sigsuspend_args *, int *); -#if SOCKETS -#else -#endif /* SOCKETS */ -int gettimeofday(struct proc *, struct gettimeofday_args *, int *); -int getrusage(struct proc *, struct getrusage_args *, int *); -#if SOCKETS -int getsockopt(struct proc *, struct getsockopt_args *, int *); -#else -#endif /* SOCKETS */ -int readv(struct proc *, struct readv_args *, user_ssize_t *); -int writev(struct proc *, struct writev_args *, user_ssize_t *); -int settimeofday(struct proc *, struct settimeofday_args *, int *); -int fchown(struct proc *, struct fchown_args *, int *); -int fchmod(struct proc *, struct fchmod_args *, int *); -int setreuid(struct proc *, struct setreuid_args *, int *); -int setregid(struct proc *, struct setregid_args *, int *); -int rename(struct proc *, struct rename_args *, int *); -int flock(struct proc *, struct flock_args *, int *); -int mkfifo(struct proc *, struct mkfifo_args *, int *); -#if SOCKETS -int sendto(struct proc *, struct sendto_args *, int *); -int shutdown(struct proc *, struct shutdown_args *, int *); -int socketpair(struct proc *, struct socketpair_args *, int *); -#else -#endif /* SOCKETS */ -int mkdir(struct proc *, struct mkdir_args *, int *); -int rmdir(struct proc *, struct rmdir_args *, int *); -int utimes(struct proc *, struct utimes_args *, int *); -int futimes(struct proc *, struct futimes_args *, int *); -int adjtime(struct proc *, struct adjtime_args *, int *); -int gethostuuid(struct proc *, struct gethostuuid_args *, int *); -int setsid(struct proc *, struct setsid_args *, int *); -int getpgid(struct proc *, struct getpgid_args *, int *); -int setprivexec(struct proc *, struct setprivexec_args *, int *); -int pread(struct proc *, struct pread_args *, user_ssize_t *); -int pwrite(struct proc *, struct pwrite_args *, user_ssize_t *); -#if NFSSERVER -int nfssvc(struct proc *, struct nfssvc_args *, int *); -#else -#endif -int statfs(struct proc *, struct statfs_args *, int *); -int fstatfs(struct proc *, struct fstatfs_args *, int *); -int unmount(struct proc *, struct unmount_args *, int *); -#if NFSSERVER -int getfh(struct proc *, struct getfh_args *, int *); -#else -#endif -int quotactl(struct proc *, struct quotactl_args *, int *); -int mount(struct proc *, struct mount_args *, int *); -int csops(struct proc *, struct csops_args *, int *); -int waitid(struct proc *, struct waitid_args *, int *); -int add_profil(struct proc *, struct add_profil_args *, int *); -int kdebug_trace(struct proc *, struct kdebug_trace_args *, int *); -int setgid(struct proc *, struct setgid_args *, int *); -int setegid(struct proc *, struct setegid_args *, int *); -int seteuid(struct proc *, struct seteuid_args *, int *); -int sigreturn(struct proc *, struct sigreturn_args *, int *); -int chud(struct proc *, struct chud_args *, int *); -int stat(struct proc *, struct stat_args *, int *); -int fstat(struct proc *, struct fstat_args *, int *); -int lstat(struct proc *, struct lstat_args *, int *); -int pathconf(struct proc *, struct pathconf_args *, int *); -int fpathconf(struct proc *, struct fpathconf_args *, int *); -int getrlimit(struct proc *, struct getrlimit_args *, int *); -int setrlimit(struct proc *, struct setrlimit_args *, int *); -int getdirentries(struct proc *, struct getdirentries_args *, int *); -int mmap(struct proc *, struct mmap_args *, user_addr_t *); -int lseek(struct proc *, struct lseek_args *, off_t *); -int truncate(struct proc *, struct truncate_args *, int *); -int ftruncate(struct proc *, struct ftruncate_args *, int *); -int __sysctl(struct proc *, struct __sysctl_args *, int *); -int mlock(struct proc *, struct mlock_args *, int *); -int munlock(struct proc *, struct munlock_args *, int *); -int undelete(struct proc *, struct undelete_args *, int *); -#if NETAT -int ATsocket(struct proc *, struct ATsocket_args *, int *); -int ATgetmsg(struct proc *, struct ATgetmsg_args *, int *); -int ATputmsg(struct proc *, struct ATputmsg_args *, int *); -int ATPsndreq(struct proc *, struct ATPsndreq_args *, int *); -int ATPsndrsp(struct proc *, struct ATPsndrsp_args *, int *); -int ATPgetreq(struct proc *, struct ATPgetreq_args *, int *); -int ATPgetrsp(struct proc *, struct ATPgetrsp_args *, int *); -#else -#endif /* NETAT */ -int kqueue_from_portset_np(struct proc *, struct kqueue_from_portset_np_args *, int *); -int kqueue_portset_np(struct proc *, struct kqueue_portset_np_args *, int *); -int getattrlist(struct proc *, struct getattrlist_args *, int *); -int setattrlist(struct proc *, struct setattrlist_args *, int *); -int getdirentriesattr(struct proc *, struct getdirentriesattr_args *, int *); -int exchangedata(struct proc *, struct exchangedata_args *, int *); -int searchfs(struct proc *, struct searchfs_args *, int *); -int delete(struct proc *, struct delete_args *, int *); -int copyfile(struct proc *, struct copyfile_args *, int *); -int poll(struct proc *, struct poll_args *, int *); -int watchevent(struct proc *, struct watchevent_args *, int *); -int waitevent(struct proc *, struct waitevent_args *, int *); -int modwatch(struct proc *, struct modwatch_args *, int *); -int getxattr(struct proc *, struct getxattr_args *, user_ssize_t *); -int fgetxattr(struct proc *, struct fgetxattr_args *, user_ssize_t *); -int setxattr(struct proc *, struct setxattr_args *, int *); -int fsetxattr(struct proc *, struct fsetxattr_args *, int *); -int removexattr(struct proc *, struct removexattr_args *, int *); -int fremovexattr(struct proc *, struct fremovexattr_args *, int *); -int listxattr(struct proc *, struct listxattr_args *, user_ssize_t *); -int flistxattr(struct proc *, struct flistxattr_args *, user_ssize_t *); -int fsctl(struct proc *, struct fsctl_args *, int *); -int initgroups(struct proc *, struct initgroups_args *, int *); -int posix_spawn(struct proc *, struct posix_spawn_args *, int *); -#if NFSCLIENT -int nfsclnt(struct proc *, struct nfsclnt_args *, int *); -#else -#endif -#if NFSSERVER -int fhopen(struct proc *, struct fhopen_args *, int *); -#else -#endif -int minherit(struct proc *, struct minherit_args *, int *); -#if SYSV_SEM -int semsys(struct proc *, struct semsys_args *, int *); -#else -#endif -#if SYSV_MSG -int msgsys(struct proc *, struct msgsys_args *, int *); -#else -#endif -#if SYSV_SHM -int shmsys(struct proc *, struct shmsys_args *, int *); -#else -#endif -#if SYSV_SEM -int semctl(struct proc *, struct semctl_args *, int *); -int semget(struct proc *, struct semget_args *, int *); -int semop(struct proc *, struct semop_args *, int *); -#else -#endif -#if SYSV_MSG -int msgctl(struct proc *, struct msgctl_args *, int *); -int msgget(struct proc *, struct msgget_args *, int *); -int msgsnd(struct proc *, struct msgsnd_args *, int *); -int msgrcv(struct proc *, struct msgrcv_args *, user_ssize_t *); -#else -#endif -#if SYSV_SHM -int shmat(struct proc *, struct shmat_args *, user_addr_t *); -int shmctl(struct proc *, struct shmctl_args *, int *); -int shmdt(struct proc *, struct shmdt_args *, int *); -int shmget(struct proc *, struct shmget_args *, int *); -#else -#endif -int shm_open(struct proc *, struct shm_open_args *, int *); -int shm_unlink(struct proc *, struct shm_unlink_args *, int *); -int sem_open(struct proc *, struct sem_open_args *, user_addr_t *); -int sem_close(struct proc *, struct sem_close_args *, int *); -int sem_unlink(struct proc *, struct sem_unlink_args *, int *); -int sem_wait(struct proc *, struct sem_wait_args *, int *); -int sem_trywait(struct proc *, struct sem_trywait_args *, int *); -int sem_post(struct proc *, struct sem_post_args *, int *); -int sem_getvalue(struct proc *, struct sem_getvalue_args *, int *); -int sem_init(struct proc *, struct sem_init_args *, int *); -int sem_destroy(struct proc *, struct sem_destroy_args *, int *); -int open_extended(struct proc *, struct open_extended_args *, int *); -int umask_extended(struct proc *, struct umask_extended_args *, int *); -int stat_extended(struct proc *, struct stat_extended_args *, int *); -int lstat_extended(struct proc *, struct lstat_extended_args *, int *); -int fstat_extended(struct proc *, struct fstat_extended_args *, int *); -int chmod_extended(struct proc *, struct chmod_extended_args *, int *); -int fchmod_extended(struct proc *, struct fchmod_extended_args *, int *); -int access_extended(struct proc *, struct access_extended_args *, int *); -int settid(struct proc *, struct settid_args *, int *); -int gettid(struct proc *, struct gettid_args *, int *); -int setsgroups(struct proc *, struct setsgroups_args *, int *); -int getsgroups(struct proc *, struct getsgroups_args *, int *); -int setwgroups(struct proc *, struct setwgroups_args *, int *); -int getwgroups(struct proc *, struct getwgroups_args *, int *); -int mkfifo_extended(struct proc *, struct mkfifo_extended_args *, int *); -int mkdir_extended(struct proc *, struct mkdir_extended_args *, int *); -int identitysvc(struct proc *, struct identitysvc_args *, int *); -int shared_region_check_np(struct proc *, struct shared_region_check_np_args *, int *); -int shared_region_map_np(struct proc *, struct shared_region_map_np_args *, int *); -int __pthread_mutex_destroy(struct proc *, struct __pthread_mutex_destroy_args *, int *); -int __pthread_mutex_init(struct proc *, struct __pthread_mutex_init_args *, int *); -int __pthread_mutex_lock(struct proc *, struct __pthread_mutex_lock_args *, int *); -int __pthread_mutex_trylock(struct proc *, struct __pthread_mutex_trylock_args *, int *); -int __pthread_mutex_unlock(struct proc *, struct __pthread_mutex_unlock_args *, int *); -int __pthread_cond_init(struct proc *, struct __pthread_cond_init_args *, int *); -int __pthread_cond_destroy(struct proc *, struct __pthread_cond_destroy_args *, int *); -int __pthread_cond_broadcast(struct proc *, struct __pthread_cond_broadcast_args *, int *); -int __pthread_cond_signal(struct proc *, struct __pthread_cond_signal_args *, int *); -int getsid(struct proc *, struct getsid_args *, int *); -int settid_with_pid(struct proc *, struct settid_with_pid_args *, int *); -int __pthread_cond_timedwait(struct proc *, struct __pthread_cond_timedwait_args *, int *); -int aio_fsync(struct proc *, struct aio_fsync_args *, int *); -int aio_return(struct proc *, struct aio_return_args *, user_ssize_t *); -int aio_suspend(struct proc *, struct aio_suspend_args *, int *); -int aio_cancel(struct proc *, struct aio_cancel_args *, int *); -int aio_error(struct proc *, struct aio_error_args *, int *); -int aio_read(struct proc *, struct aio_read_args *, int *); -int aio_write(struct proc *, struct aio_write_args *, int *); -int lio_listio(struct proc *, struct lio_listio_args *, int *); -int __pthread_cond_wait(struct proc *, struct __pthread_cond_wait_args *, int *); -int iopolicysys(struct proc *, struct iopolicysys_args *, int *); -int mlockall(struct proc *, struct mlockall_args *, int *); -int munlockall(struct proc *, struct munlockall_args *, int *); -int issetugid(struct proc *, struct issetugid_args *, int *); -int __pthread_kill(struct proc *, struct __pthread_kill_args *, int *); -int __pthread_sigmask(struct proc *, struct __pthread_sigmask_args *, int *); -int __sigwait(struct proc *, struct __sigwait_args *, int *); -int __disable_threadsignal(struct proc *, struct __disable_threadsignal_args *, int *); -int __pthread_markcancel(struct proc *, struct __pthread_markcancel_args *, int *); -int __pthread_canceled(struct proc *, struct __pthread_canceled_args *, int *); -int __semwait_signal(struct proc *, struct __semwait_signal_args *, int *); -int proc_info(struct proc *, struct proc_info_args *, int *); -#if SENDFILE -int sendfile(struct proc *, struct sendfile_args *, int *); -#else /* !SENDFILE */ -#endif /* SENDFILE */ -int stat64(struct proc *, struct stat64_args *, int *); -int fstat64(struct proc *, struct fstat64_args *, int *); -int lstat64(struct proc *, struct lstat64_args *, int *); -int stat64_extended(struct proc *, struct stat64_extended_args *, int *); -int lstat64_extended(struct proc *, struct lstat64_extended_args *, int *); -int fstat64_extended(struct proc *, struct fstat64_extended_args *, int *); -int getdirentries64(struct proc *, struct getdirentries64_args *, user_ssize_t *); -int statfs64(struct proc *, struct statfs64_args *, int *); -int fstatfs64(struct proc *, struct fstatfs64_args *, int *); -int getfsstat64(struct proc *, struct getfsstat64_args *, int *); -int __pthread_chdir(struct proc *, struct __pthread_chdir_args *, int *); -int __pthread_fchdir(struct proc *, struct __pthread_fchdir_args *, int *); -#if AUDIT -int audit(struct proc *, struct audit_args *, int *); -int auditon(struct proc *, struct auditon_args *, int *); -int getauid(struct proc *, struct getauid_args *, int *); -int setauid(struct proc *, struct setauid_args *, int *); -int getaudit(struct proc *, struct getaudit_args *, int *); -int setaudit(struct proc *, struct setaudit_args *, int *); -int getaudit_addr(struct proc *, struct getaudit_addr_args *, int *); -int setaudit_addr(struct proc *, struct setaudit_addr_args *, int *); -int auditctl(struct proc *, struct auditctl_args *, int *); -#else -#endif -int bsdthread_create(struct proc *, struct bsdthread_create_args *, user_addr_t *); -int bsdthread_terminate(struct proc *, struct bsdthread_terminate_args *, int *); -int kqueue(struct proc *, struct kqueue_args *, int *); -int kevent(struct proc *, struct kevent_args *, int *); -int lchown(struct proc *, struct lchown_args *, int *); -int stack_snapshot(struct proc *, struct stack_snapshot_args *, int *); -int bsdthread_register(struct proc *, struct bsdthread_register_args *, int *); -int workq_open(struct proc *, struct workq_open_args *, int *); -int workq_ops(struct proc *, struct workq_ops_args *, int *); -int __mac_execve(struct proc *, struct __mac_execve_args *, int *); -int __mac_syscall(struct proc *, struct __mac_syscall_args *, int *); -int __mac_get_file(struct proc *, struct __mac_get_file_args *, int *); -int __mac_set_file(struct proc *, struct __mac_set_file_args *, int *); -int __mac_get_link(struct proc *, struct __mac_get_link_args *, int *); -int __mac_set_link(struct proc *, struct __mac_set_link_args *, int *); -int __mac_get_proc(struct proc *, struct __mac_get_proc_args *, int *); -int __mac_set_proc(struct proc *, struct __mac_set_proc_args *, int *); -int __mac_get_fd(struct proc *, struct __mac_get_fd_args *, int *); -int __mac_set_fd(struct proc *, struct __mac_set_fd_args *, int *); -int __mac_get_pid(struct proc *, struct __mac_get_pid_args *, int *); -int __mac_get_lcid(struct proc *, struct __mac_get_lcid_args *, int *); -int __mac_get_lctx(struct proc *, struct __mac_get_lctx_args *, int *); -int __mac_set_lctx(struct proc *, struct __mac_set_lctx_args *, int *); -int setlcid(struct proc *, struct setlcid_args *, int *); -int getlcid(struct proc *, struct getlcid_args *, int *); -int read_nocancel(struct proc *, struct read_nocancel_args *, user_ssize_t *); -int write_nocancel(struct proc *, struct write_nocancel_args *, user_ssize_t *); -int open_nocancel(struct proc *, struct open_nocancel_args *, int *); -int close_nocancel(struct proc *, struct close_nocancel_args *, int *); -int wait4_nocancel(struct proc *, struct wait4_nocancel_args *, int *); -#if SOCKETS -int recvmsg_nocancel(struct proc *, struct recvmsg_nocancel_args *, int *); -int sendmsg_nocancel(struct proc *, struct sendmsg_nocancel_args *, int *); -int recvfrom_nocancel(struct proc *, struct recvfrom_nocancel_args *, int *); -int accept_nocancel(struct proc *, struct accept_nocancel_args *, int *); -#else -#endif /* SOCKETS */ -int msync_nocancel(struct proc *, struct msync_nocancel_args *, int *); -int fcntl_nocancel(struct proc *, struct fcntl_nocancel_args *, int *); -int select_nocancel(struct proc *, struct select_nocancel_args *, int *); -int fsync_nocancel(struct proc *, struct fsync_nocancel_args *, int *); -#if SOCKETS -int connect_nocancel(struct proc *, struct connect_nocancel_args *, int *); -#else -#endif /* SOCKETS */ -int sigsuspend_nocancel(struct proc *, struct sigsuspend_nocancel_args *, int *); -int readv_nocancel(struct proc *, struct readv_nocancel_args *, user_ssize_t *); -int writev_nocancel(struct proc *, struct writev_nocancel_args *, user_ssize_t *); -#if SOCKETS -int sendto_nocancel(struct proc *, struct sendto_nocancel_args *, int *); -#else -#endif /* SOCKETS */ -int pread_nocancel(struct proc *, struct pread_nocancel_args *, user_ssize_t *); -int pwrite_nocancel(struct proc *, struct pwrite_nocancel_args *, user_ssize_t *); -int waitid_nocancel(struct proc *, struct waitid_nocancel_args *, int *); -int poll_nocancel(struct proc *, struct poll_nocancel_args *, int *); -#if SYSV_MSG -int msgsnd_nocancel(struct proc *, struct msgsnd_nocancel_args *, int *); -int msgrcv_nocancel(struct proc *, struct msgrcv_nocancel_args *, user_ssize_t *); -#else -#endif -int sem_wait_nocancel(struct proc *, struct sem_wait_nocancel_args *, int *); -int aio_suspend_nocancel(struct proc *, struct aio_suspend_nocancel_args *, int *); -int __sigwait_nocancel(struct proc *, struct __sigwait_nocancel_args *, int *); -int __semwait_signal_nocancel(struct proc *, struct __semwait_signal_nocancel_args *, int *); -int __mac_mount(struct proc *, struct __mac_mount_args *, int *); -int __mac_get_mount(struct proc *, struct __mac_get_mount_args *, int *); -int __mac_getfsstat(struct proc *, struct __mac_getfsstat_args *, int *); - -__END_DECLS -#undef PAD_ -#undef PADL_ -#undef PADR_ - -#endif /* __APPLE_API_PRIVATE */ -#endif /* KERNEL */ - -#endif /* !_SYS_SYSPROTO_H_ */ diff --git a/bsd/sys/systm.h b/bsd/sys/systm.h index d482714ef..d5fdbe392 100644 --- a/bsd/sys/systm.h +++ b/bsd/sys/systm.h @@ -117,8 +117,8 @@ __BEGIN_DECLS __END_DECLS #ifdef BSD_KERNEL_PRIVATE -extern char version[]; /* system version */ -extern char copyright[]; /* system copyright */ +extern char version[]; /* system version */ +extern const char copyright[]; /* system copyright */ extern int boothowto; /* reboot flags, from console subsystem */ @@ -129,13 +129,16 @@ extern int nchrdev; /* number of entries in cdevsw */ #endif /* BSD_KERNEL_PRIVATE */ #ifdef KERNEL_PRIVATE -#define NO_FUNNEL 0 -#define KERNEL_FUNNEL 1 extern int securelevel; /* system security level */ extern dev_t rootdev; /* root device */ extern struct vnode *rootvp; /* vnode equivalent to above */ + +#ifdef XNU_KERNEL_PRIVATE +#define NO_FUNNEL 0 +#define KERNEL_FUNNEL 1 extern funnel_t * kernel_flock; +#endif /* XNU_KERNEL_PRIVATE */ #endif /* KERNEL_PRIVATE */ @@ -144,89 +147,42 @@ extern funnel_t * kernel_flock; #define getenv_int(a,b) (*b = 0) #define KASSERT(exp,msg) - /* * General function declarations. */ __BEGIN_DECLS -int nullop(void); -int nulldev(void); -int enoioctl(void); -int enosys(void); -int errsys(void); -void nullsys(void); -int enxio(void); -int eopnotsupp(void); -int einval(void); #ifdef BSD_KERNEL_PRIVATE +int einval(void); +void nullsys(void); +int errsys(void); int seltrue(dev_t dev, int which, struct proc *p); -void ttyprintf(struct tty *, const char *, ...); +void ttyprintf(struct tty *, const char *, ...) __printflike(2, 3); void realitexpire(struct proc *); int hzto(struct timeval *tv); -#endif /* __APPLE_API_UNSTABLE */ - -void *hashinit(int count, int type, u_long *hashmask); - void tablefull(const char *); - int kvprintf(char const *, void (*)(int, void*), void *, int, __darwin_va_list); - void uprintf(const char *, ...) __printflike(1,2); - - -void ovbcopy(const void *from, void *to, size_t len); int copywithin(void *saddr, void *daddr, size_t len); - -int fubyte(user_addr_t addr); -int fuibyte(user_addr_t addr); -int subyte(user_addr_t addr, int byte); -int suibyte(user_addr_t addr, int byte); -long fuword(user_addr_t addr); -long fuiword(user_addr_t addr); -int suword(user_addr_t addr, long word); -int suiword(user_addr_t addr, long word); int64_t fulong(user_addr_t addr); int sulong(user_addr_t addr, int64_t longword); uint64_t fuulong(user_addr_t addr); int suulong(user_addr_t addr, uint64_t ulongword); -#define fusize(_a) ((user_size_t)fulong(_a)) -#define susize(_a, _s) sulong((_a), (_s)) -#define fuptr(a) ((user_addr_t)fulong(_a) -#define suptr(_a, _p) sulong((_a), (_p)) -int useracc(user_addr_t addr, user_size_t len,int prot); - -typedef void (*timeout_fcn_t)(void *); -#ifdef KERNEL_PRIVATE -void timeout(void (*)(void *), void *arg, int ticks); -void untimeout(void (*)(void *), void *arg); -#endif /* KERNEL_PRIVATE */ -void bsd_timeout(void (*)(void *), void *arg, struct timespec * ts); -void bsd_untimeout(void (*)(void *), void *arg); - -void set_fsblocksize(struct vnode *); - -#ifdef BSD_KERNEL_PRIVATE int vslock(user_addr_t addr, user_size_t len); int vsunlock(user_addr_t addr, user_size_t len, int dirtied); int clone_system_shared_regions(int shared_regions_active, int chain_regions, int base_vnode); - extern kern_return_t bsd_exception(int, mach_exception_data_t codes, int); extern void bsdinit_task(void); extern void unix_syscall_return(int) __dead2; - void initclocks(void); - void startprofclock(struct proc *); void stopprofclock(struct proc *); void setstatclockrate(int hzrate); - struct time_value; void get_procrustime(struct time_value *tv); - void load_init_program(struct proc *p); void __pthread_testcancel(int presyscall); void syscall_exit_funnelcheck(void); @@ -234,7 +190,43 @@ void throttle_info_get_last_io_time(mount_t mp, struct timeval *tv); void update_last_io_time(mount_t mp); #endif /* BSD_KERNEL_PRIVATE */ +#ifdef KERNEL_PRIVATE +void timeout(void (*)(void *), void *arg, int ticks); +void untimeout(void (*)(void *), void *arg); +int bsd_hostname(char *, int, int*); +#endif /* KERNEL_PRIVATE */ +int nullop(void); +int nulldev(void); +int enoioctl(void); +int enosys(void); +int enxio(void); +int eopnotsupp(void); +void *hashinit(int count, int type, u_long *hashmask); +void ovbcopy(const void *from, void *to, size_t len); +int fubyte(user_addr_t addr); +int fuibyte(user_addr_t addr); +int subyte(user_addr_t addr, int byte); +int suibyte(user_addr_t addr, int byte); +long fuword(user_addr_t addr); +long fuiword(user_addr_t addr); +int suword(user_addr_t addr, long word); +int suiword(user_addr_t addr, long word); +#define fusize(_a) ((user_size_t)fulong(_a)) +#define susize(_a, _s) sulong((_a), (_s)) +#define fuptr(a) ((user_addr_t)fulong(_a) +#define suptr(_a, _p) sulong((_a), (_p)) +int useracc(user_addr_t addr, user_size_t len,int prot); +typedef void (*timeout_fcn_t)(void *); +void bsd_timeout(void (*)(void *), void *arg, struct timespec * ts); +void bsd_untimeout(void (*)(void *), void *arg); +void set_fsblocksize(struct vnode *); +uint64_t tvtoabstime(struct timeval *); +void *throttle_info_create(void); +void throttle_info_mount_ref(mount_t mp, void * throttle_info); +void throttle_info_mount_rel(mount_t mp); +void throttle_info_release(void *throttle_info); +void throttle_info_update(void *throttle_info, int flags); __END_DECLS #endif /* !_SYS_SYSTM_H_ */ diff --git a/bsd/sys/termios.h b/bsd/sys/termios.h index e6bd8c950..c1f3b5dac 100644 --- a/bsd/sys/termios.h +++ b/bsd/sys/termios.h @@ -277,8 +277,8 @@ struct termios { }; #ifdef KERNEL -typedef unsigned long long user_tcflag_t; -typedef unsigned long long user_speed_t; +typedef __uint64_t user_tcflag_t; +typedef __uint64_t user_speed_t; /* * LP64 version of struct termios. tcflag_t and speed_t are long and must @@ -296,6 +296,17 @@ struct user_termios { user_speed_t c_ospeed; /* output speed */ }; +/* 32 bit version */ +struct termios32 { + __uint32_t c_iflag; /* input flags */ + __uint32_t c_oflag; /* output flags */ + __uint32_t c_cflag; /* control flags */ + __uint32_t c_lflag; /* local flags */ + cc_t c_cc[NCCS]; /* control chars */ + __uint32_t c_ispeed; /* input speed */ + __uint32_t c_ospeed; /* output speed */ +}; + #endif /* KERNEL */ /* diff --git a/bsd/sys/time.h b/bsd/sys/time.h index 6f3d264cb..732d1ae76 100644 --- a/bsd/sys/time.h +++ b/bsd/sys/time.h @@ -79,6 +79,13 @@ #define __need_struct_timeval #ifdef KERNEL #define __need_struct_user_timespec +#define __need_struct_user32_timespec +#define __need_struct_user64_timespec +#define __need_struct_user_timeval +#define __need_struct_user32_timeval +#define __need_struct_user64_timeval +#define __need_struct_user32_itimerval +#define __need_struct_user64_itimerval #endif /* KERNEL */ #include @@ -137,30 +144,6 @@ struct itimerval { #define FD_COPY(f, t) __DARWIN_FD_COPY(f, t) #endif /* FD_COPY */ -#ifdef KERNEL -#ifndef _USERTIMEVAL -#define _USERTIMEVAL - -#include /* user_time_t */ -/* - * LP64 version of struct timeval. time_t is a long and must grow when - * we're dealing with a 64-bit process. - * WARNING - keep in sync with struct timeval - */ - -struct user_timeval { - user_time_t tv_sec; /* seconds */ - suseconds_t tv_usec __attribute((aligned(8))); /* and microseconds */ -}; - -struct user_itimerval { - struct user_timeval it_interval; /* timer interval */ - struct user_timeval it_value; /* current value */ -}; - -#endif /* _USERTIMEVAL */ -#endif /* KERNEL */ - #define TIMEVAL_TO_TIMESPEC(tv, ts) { \ (ts)->tv_sec = (tv)->tv_sec; \ (ts)->tv_nsec = (tv)->tv_usec * 1000; \ diff --git a/bsd/sys/tree.h b/bsd/sys/tree.h new file mode 100644 index 000000000..f4bf40c73 --- /dev/null +++ b/bsd/sys/tree.h @@ -0,0 +1,718 @@ +/* + * Copyright (c) 2008 Apple Computer, Inc. All rights reserved. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. The rights granted to you under the License + * may not be used to create, or enable the creation or redistribution of, + * unlawful or unlicensed copies of an Apple operating system, or to + * circumvent, violate, or enable the circumvention or violation of, any + * terms of an Apple operating system software license agreement. + * + * Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ + */ + +/* $NetBSD: tree.h,v 1.13 2006/08/27 22:32:38 christos Exp $ */ +/* $OpenBSD: tree.h,v 1.7 2002/10/17 21:51:54 art Exp $ */ +/* + * Copyright 2002 Niels Provos + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _SYS_TREE_H_ +#define _SYS_TREE_H_ + +/* + * This file defines data structures for different types of trees: + * splay trees and red-black trees. + * + * A splay tree is a self-organizing data structure. Every operation + * on the tree causes a splay to happen. The splay moves the requested + * node to the root of the tree and partly rebalances it. + * + * This has the benefit that request locality causes faster lookups as + * the requested nodes move to the top of the tree. On the other hand, + * every lookup causes memory writes. + * + * The Balance Theorem bounds the total access time for m operations + * and n inserts on an initially empty tree as O((m + n)lg n). The + * amortized cost for a sequence of m accesses to a splay tree is O(lg n); + * + * A red-black tree is a binary search tree with the node color as an + * extra attribute. It fulfills a set of conditions: + * - every search path from the root to a leaf consists of the + * same number of black nodes, + * - each red node (except for the root) has a black parent, + * - each leaf node is black. + * + * Every operation on a red-black tree is bounded as O(lg n). + * The maximum height of a red-black tree is 2lg (n+1). + */ + +#define SPLAY_HEAD(name, type) \ +struct name { \ + struct type *sph_root; /* root of the tree */ \ +} + +#define SPLAY_INITIALIZER(root) \ + { NULL } + +#define SPLAY_INIT(root) do { \ + (root)->sph_root = NULL; \ +} while (/*CONSTCOND*/ 0) + +#define SPLAY_ENTRY(type) \ +struct { \ + struct type *spe_left; /* left element */ \ + struct type *spe_right; /* right element */ \ +} + +#define SPLAY_LEFT(elm, field) (elm)->field.spe_left +#define SPLAY_RIGHT(elm, field) (elm)->field.spe_right +#define SPLAY_ROOT(head) (head)->sph_root +#define SPLAY_EMPTY(head) (SPLAY_ROOT(head) == NULL) + +/* SPLAY_ROTATE_{LEFT,RIGHT} expect that tmp hold SPLAY_{RIGHT,LEFT} */ +#define SPLAY_ROTATE_RIGHT(head, tmp, field) do { \ + SPLAY_LEFT((head)->sph_root, field) = SPLAY_RIGHT(tmp, field); \ + SPLAY_RIGHT(tmp, field) = (head)->sph_root; \ + (head)->sph_root = tmp; \ +} while (/*CONSTCOND*/ 0) + +#define SPLAY_ROTATE_LEFT(head, tmp, field) do { \ + SPLAY_RIGHT((head)->sph_root, field) = SPLAY_LEFT(tmp, field); \ + SPLAY_LEFT(tmp, field) = (head)->sph_root; \ + (head)->sph_root = tmp; \ +} while (/*CONSTCOND*/ 0) + +#define SPLAY_LINKLEFT(head, tmp, field) do { \ + SPLAY_LEFT(tmp, field) = (head)->sph_root; \ + tmp = (head)->sph_root; \ + (head)->sph_root = SPLAY_LEFT((head)->sph_root, field); \ +} while (/*CONSTCOND*/ 0) + +#define SPLAY_LINKRIGHT(head, tmp, field) do { \ + SPLAY_RIGHT(tmp, field) = (head)->sph_root; \ + tmp = (head)->sph_root; \ + (head)->sph_root = SPLAY_RIGHT((head)->sph_root, field); \ +} while (/*CONSTCOND*/ 0) + +#define SPLAY_ASSEMBLE(head, node, left, right, field) do { \ + SPLAY_RIGHT(left, field) = SPLAY_LEFT((head)->sph_root, field); \ + SPLAY_LEFT(right, field) = SPLAY_RIGHT((head)->sph_root, field);\ + SPLAY_LEFT((head)->sph_root, field) = SPLAY_RIGHT(node, field); \ + SPLAY_RIGHT((head)->sph_root, field) = SPLAY_LEFT(node, field); \ +} while (/*CONSTCOND*/ 0) + +/* Generates prototypes and inline functions */ + +#define SPLAY_PROTOTYPE(name, type, field, cmp) \ +void name##_SPLAY(struct name *, struct type *); \ +void name##_SPLAY_MINMAX(struct name *, int); \ +struct type *name##_SPLAY_INSERT(struct name *, struct type *); \ +struct type *name##_SPLAY_REMOVE(struct name *, struct type *); \ + \ +/* Finds the node with the same key as elm */ \ +static __inline struct type * \ +name##_SPLAY_FIND(struct name *head, struct type *elm) \ +{ \ + if (SPLAY_EMPTY(head)) \ + return(NULL); \ + name##_SPLAY(head, elm); \ + if ((cmp)(elm, (head)->sph_root) == 0) \ + return (head->sph_root); \ + return (NULL); \ +} \ + \ +static __inline struct type * \ +name##_SPLAY_NEXT(struct name *head, struct type *elm) \ +{ \ + name##_SPLAY(head, elm); \ + if (SPLAY_RIGHT(elm, field) != NULL) { \ + elm = SPLAY_RIGHT(elm, field); \ + while (SPLAY_LEFT(elm, field) != NULL) { \ + elm = SPLAY_LEFT(elm, field); \ + } \ + } else \ + elm = NULL; \ + return (elm); \ +} \ + \ +static __inline struct type * \ +name##_SPLAY_MIN_MAX(struct name *head, int val) \ +{ \ + name##_SPLAY_MINMAX(head, val); \ + return (SPLAY_ROOT(head)); \ +} + +/* Main splay operation. + * Moves node close to the key of elm to top + */ +#define SPLAY_GENERATE(name, type, field, cmp) \ +struct type * \ +name##_SPLAY_INSERT(struct name *head, struct type *elm) \ +{ \ + if (SPLAY_EMPTY(head)) { \ + SPLAY_LEFT(elm, field) = SPLAY_RIGHT(elm, field) = NULL; \ + } else { \ + int __comp; \ + name##_SPLAY(head, elm); \ + __comp = (cmp)(elm, (head)->sph_root); \ + if(__comp < 0) { \ + SPLAY_LEFT(elm, field) = SPLAY_LEFT((head)->sph_root, field);\ + SPLAY_RIGHT(elm, field) = (head)->sph_root; \ + SPLAY_LEFT((head)->sph_root, field) = NULL; \ + } else if (__comp > 0) { \ + SPLAY_RIGHT(elm, field) = SPLAY_RIGHT((head)->sph_root, field);\ + SPLAY_LEFT(elm, field) = (head)->sph_root; \ + SPLAY_RIGHT((head)->sph_root, field) = NULL; \ + } else \ + return ((head)->sph_root); \ + } \ + (head)->sph_root = (elm); \ + return (NULL); \ +} \ + \ +struct type * \ +name##_SPLAY_REMOVE(struct name *head, struct type *elm) \ +{ \ + struct type *__tmp; \ + if (SPLAY_EMPTY(head)) \ + return (NULL); \ + name##_SPLAY(head, elm); \ + if ((cmp)(elm, (head)->sph_root) == 0) { \ + if (SPLAY_LEFT((head)->sph_root, field) == NULL) { \ + (head)->sph_root = SPLAY_RIGHT((head)->sph_root, field);\ + } else { \ + __tmp = SPLAY_RIGHT((head)->sph_root, field); \ + (head)->sph_root = SPLAY_LEFT((head)->sph_root, field);\ + name##_SPLAY(head, elm); \ + SPLAY_RIGHT((head)->sph_root, field) = __tmp; \ + } \ + return (elm); \ + } \ + return (NULL); \ +} \ + \ +void \ +name##_SPLAY(struct name *head, struct type *elm) \ +{ \ + struct type __node, *__left, *__right, *__tmp; \ + int __comp; \ +\ + SPLAY_LEFT(&__node, field) = SPLAY_RIGHT(&__node, field) = NULL;\ + __left = __right = &__node; \ +\ + while ((__comp = (cmp)(elm, (head)->sph_root)) != 0) { \ + if (__comp < 0) { \ + __tmp = SPLAY_LEFT((head)->sph_root, field); \ + if (__tmp == NULL) \ + break; \ + if ((cmp)(elm, __tmp) < 0){ \ + SPLAY_ROTATE_RIGHT(head, __tmp, field); \ + if (SPLAY_LEFT((head)->sph_root, field) == NULL)\ + break; \ + } \ + SPLAY_LINKLEFT(head, __right, field); \ + } else if (__comp > 0) { \ + __tmp = SPLAY_RIGHT((head)->sph_root, field); \ + if (__tmp == NULL) \ + break; \ + if ((cmp)(elm, __tmp) > 0){ \ + SPLAY_ROTATE_LEFT(head, __tmp, field); \ + if (SPLAY_RIGHT((head)->sph_root, field) == NULL)\ + break; \ + } \ + SPLAY_LINKRIGHT(head, __left, field); \ + } \ + } \ + SPLAY_ASSEMBLE(head, &__node, __left, __right, field); \ +} \ + \ +/* Splay with either the minimum or the maximum element \ + * Used to find minimum or maximum element in tree. \ + */ \ +void name##_SPLAY_MINMAX(struct name *head, int __comp) \ +{ \ + struct type __node, *__left, *__right, *__tmp; \ +\ + SPLAY_LEFT(&__node, field) = SPLAY_RIGHT(&__node, field) = NULL;\ + __left = __right = &__node; \ +\ + while (1) { \ + if (__comp < 0) { \ + __tmp = SPLAY_LEFT((head)->sph_root, field); \ + if (__tmp == NULL) \ + break; \ + if (__comp < 0){ \ + SPLAY_ROTATE_RIGHT(head, __tmp, field); \ + if (SPLAY_LEFT((head)->sph_root, field) == NULL)\ + break; \ + } \ + SPLAY_LINKLEFT(head, __right, field); \ + } else if (__comp > 0) { \ + __tmp = SPLAY_RIGHT((head)->sph_root, field); \ + if (__tmp == NULL) \ + break; \ + if (__comp > 0) { \ + SPLAY_ROTATE_LEFT(head, __tmp, field); \ + if (SPLAY_RIGHT((head)->sph_root, field) == NULL)\ + break; \ + } \ + SPLAY_LINKRIGHT(head, __left, field); \ + } \ + } \ + SPLAY_ASSEMBLE(head, &__node, __left, __right, field); \ +} + +#define SPLAY_NEGINF -1 +#define SPLAY_INF 1 + +#define SPLAY_INSERT(name, x, y) name##_SPLAY_INSERT(x, y) +#define SPLAY_REMOVE(name, x, y) name##_SPLAY_REMOVE(x, y) +#define SPLAY_FIND(name, x, y) name##_SPLAY_FIND(x, y) +#define SPLAY_NEXT(name, x, y) name##_SPLAY_NEXT(x, y) +#define SPLAY_MIN(name, x) (SPLAY_EMPTY(x) ? NULL \ + : name##_SPLAY_MIN_MAX(x, SPLAY_NEGINF)) +#define SPLAY_MAX(name, x) (SPLAY_EMPTY(x) ? NULL \ + : name##_SPLAY_MIN_MAX(x, SPLAY_INF)) + +#define SPLAY_FOREACH(x, name, head) \ + for ((x) = SPLAY_MIN(name, head); \ + (x) != NULL; \ + (x) = SPLAY_NEXT(name, head, x)) + +/* Macros that define a red-black tree */ +#define RB_HEAD(name, type) \ +struct name { \ + struct type *rbh_root; /* root of the tree */ \ +} + +#define RB_INITIALIZER(root) \ + { NULL } + +#define RB_INIT(root) do { \ + (root)->rbh_root = NULL; \ +} while (/*CONSTCOND*/ 0) + +#define RB_BLACK 0 +#define RB_RED 1 +#define RB_ENTRY(type) \ +struct { \ + struct type *rbe_left; /* left element */ \ + struct type *rbe_right; /* right element */ \ + struct type *rbe_parent; /* parent element */ \ + int rbe_color; /* node color */ \ +} + +#define RB_LEFT(elm, field) (elm)->field.rbe_left +#define RB_RIGHT(elm, field) (elm)->field.rbe_right +#define RB_PARENT(elm, field) (elm)->field.rbe_parent +#define RB_COLOR(elm, field) (elm)->field.rbe_color +#define RB_ROOT(head) (head)->rbh_root +#define RB_EMPTY(head) (RB_ROOT(head) == NULL) + +#define RB_SET(elm, parent, field) do { \ + RB_PARENT(elm, field) = parent; \ + RB_LEFT(elm, field) = RB_RIGHT(elm, field) = NULL; \ + RB_COLOR(elm, field) = RB_RED; \ +} while (/*CONSTCOND*/ 0) + +#define RB_SET_BLACKRED(black, red, field) do { \ + RB_COLOR(black, field) = RB_BLACK; \ + RB_COLOR(red, field) = RB_RED; \ +} while (/*CONSTCOND*/ 0) + +#ifndef RB_AUGMENT +#define RB_AUGMENT(x) (void)(x) +#endif + +#define RB_ROTATE_LEFT(head, elm, tmp, field) do { \ + (tmp) = RB_RIGHT(elm, field); \ + if ((RB_RIGHT(elm, field) = RB_LEFT(tmp, field)) != NULL) { \ + RB_PARENT(RB_LEFT(tmp, field), field) = (elm); \ + } \ + RB_AUGMENT(elm); \ + if ((RB_PARENT(tmp, field) = RB_PARENT(elm, field)) != NULL) { \ + if ((elm) == RB_LEFT(RB_PARENT(elm, field), field)) \ + RB_LEFT(RB_PARENT(elm, field), field) = (tmp); \ + else \ + RB_RIGHT(RB_PARENT(elm, field), field) = (tmp); \ + } else \ + (head)->rbh_root = (tmp); \ + RB_LEFT(tmp, field) = (elm); \ + RB_PARENT(elm, field) = (tmp); \ + RB_AUGMENT(tmp); \ + if ((RB_PARENT(tmp, field))) \ + RB_AUGMENT(RB_PARENT(tmp, field)); \ +} while (/*CONSTCOND*/ 0) + +#define RB_ROTATE_RIGHT(head, elm, tmp, field) do { \ + (tmp) = RB_LEFT(elm, field); \ + if ((RB_LEFT(elm, field) = RB_RIGHT(tmp, field)) != NULL) { \ + RB_PARENT(RB_RIGHT(tmp, field), field) = (elm); \ + } \ + RB_AUGMENT(elm); \ + if ((RB_PARENT(tmp, field) = RB_PARENT(elm, field)) != NULL) { \ + if ((elm) == RB_LEFT(RB_PARENT(elm, field), field)) \ + RB_LEFT(RB_PARENT(elm, field), field) = (tmp); \ + else \ + RB_RIGHT(RB_PARENT(elm, field), field) = (tmp); \ + } else \ + (head)->rbh_root = (tmp); \ + RB_RIGHT(tmp, field) = (elm); \ + RB_PARENT(elm, field) = (tmp); \ + RB_AUGMENT(tmp); \ + if ((RB_PARENT(tmp, field))) \ + RB_AUGMENT(RB_PARENT(tmp, field)); \ +} while (/*CONSTCOND*/ 0) + +/* Generates prototypes and inline functions */ +#define RB_PROTOTYPE(name, type, field, cmp) \ +void name##_RB_INSERT_COLOR(struct name *, struct type *); \ +void name##_RB_REMOVE_COLOR(struct name *, struct type *, struct type *);\ +struct type *name##_RB_REMOVE(struct name *, struct type *); \ +struct type *name##_RB_INSERT(struct name *, struct type *); \ +struct type *name##_RB_FIND(struct name *, struct type *); \ +struct type *name##_RB_NEXT(struct type *); \ +struct type *name##_RB_MINMAX(struct name *, int); + +/* Generates prototypes (with storage class) and inline functions */ +#define RB_PROTOTYPE_SC(_sc_, name, type, field, cmp) \ +_sc_ void name##_RB_INSERT_COLOR(struct name *, struct type *); \ +_sc_ void name##_RB_REMOVE_COLOR(struct name *, struct type *, struct type *); \ +_sc_ struct type *name##_RB_REMOVE(struct name *, struct type *); \ +_sc_ struct type *name##_RB_INSERT(struct name *, struct type *); \ +_sc_ struct type *name##_RB_FIND(struct name *, struct type *); \ +_sc_ struct type *name##_RB_NEXT(struct type *); \ +_sc_ struct type *name##_RB_MINMAX(struct name *, int); + +/* Main rb operation. + * Moves node close to the key of elm to top + */ +#define RB_GENERATE(name, type, field, cmp) \ +void \ +name##_RB_INSERT_COLOR(struct name *head, struct type *elm) \ +{ \ + struct type *parent, *gparent, *tmp; \ + while ((parent = RB_PARENT(elm, field)) != NULL && \ + RB_COLOR(parent, field) == RB_RED) { \ + gparent = RB_PARENT(parent, field); \ + if (parent == RB_LEFT(gparent, field)) { \ + tmp = RB_RIGHT(gparent, field); \ + if (tmp && RB_COLOR(tmp, field) == RB_RED) { \ + RB_COLOR(tmp, field) = RB_BLACK; \ + RB_SET_BLACKRED(parent, gparent, field);\ + elm = gparent; \ + continue; \ + } \ + if (RB_RIGHT(parent, field) == elm) { \ + RB_ROTATE_LEFT(head, parent, tmp, field);\ + tmp = parent; \ + parent = elm; \ + elm = tmp; \ + } \ + RB_SET_BLACKRED(parent, gparent, field); \ + RB_ROTATE_RIGHT(head, gparent, tmp, field); \ + } else { \ + tmp = RB_LEFT(gparent, field); \ + if (tmp && RB_COLOR(tmp, field) == RB_RED) { \ + RB_COLOR(tmp, field) = RB_BLACK; \ + RB_SET_BLACKRED(parent, gparent, field);\ + elm = gparent; \ + continue; \ + } \ + if (RB_LEFT(parent, field) == elm) { \ + RB_ROTATE_RIGHT(head, parent, tmp, field);\ + tmp = parent; \ + parent = elm; \ + elm = tmp; \ + } \ + RB_SET_BLACKRED(parent, gparent, field); \ + RB_ROTATE_LEFT(head, gparent, tmp, field); \ + } \ + } \ + RB_COLOR(head->rbh_root, field) = RB_BLACK; \ +} \ + \ +void \ +name##_RB_REMOVE_COLOR(struct name *head, struct type *parent, struct type *elm) \ +{ \ + struct type *tmp; \ + while ((elm == NULL || RB_COLOR(elm, field) == RB_BLACK) && \ + elm != RB_ROOT(head)) { \ + if (RB_LEFT(parent, field) == elm) { \ + tmp = RB_RIGHT(parent, field); \ + if (RB_COLOR(tmp, field) == RB_RED) { \ + RB_SET_BLACKRED(tmp, parent, field); \ + RB_ROTATE_LEFT(head, parent, tmp, field);\ + tmp = RB_RIGHT(parent, field); \ + } \ + if ((RB_LEFT(tmp, field) == NULL || \ + RB_COLOR(RB_LEFT(tmp, field), field) == RB_BLACK) &&\ + (RB_RIGHT(tmp, field) == NULL || \ + RB_COLOR(RB_RIGHT(tmp, field), field) == RB_BLACK)) {\ + RB_COLOR(tmp, field) = RB_RED; \ + elm = parent; \ + parent = RB_PARENT(elm, field); \ + } else { \ + if (RB_RIGHT(tmp, field) == NULL || \ + RB_COLOR(RB_RIGHT(tmp, field), field) == RB_BLACK) {\ + struct type *oleft; \ + if ((oleft = RB_LEFT(tmp, field)) \ + != NULL) \ + RB_COLOR(oleft, field) = RB_BLACK;\ + RB_COLOR(tmp, field) = RB_RED; \ + RB_ROTATE_RIGHT(head, tmp, oleft, field);\ + tmp = RB_RIGHT(parent, field); \ + } \ + RB_COLOR(tmp, field) = RB_COLOR(parent, field);\ + RB_COLOR(parent, field) = RB_BLACK; \ + if (RB_RIGHT(tmp, field)) \ + RB_COLOR(RB_RIGHT(tmp, field), field) = RB_BLACK;\ + RB_ROTATE_LEFT(head, parent, tmp, field);\ + elm = RB_ROOT(head); \ + break; \ + } \ + } else { \ + tmp = RB_LEFT(parent, field); \ + if (RB_COLOR(tmp, field) == RB_RED) { \ + RB_SET_BLACKRED(tmp, parent, field); \ + RB_ROTATE_RIGHT(head, parent, tmp, field);\ + tmp = RB_LEFT(parent, field); \ + } \ + if ((RB_LEFT(tmp, field) == NULL || \ + RB_COLOR(RB_LEFT(tmp, field), field) == RB_BLACK) &&\ + (RB_RIGHT(tmp, field) == NULL || \ + RB_COLOR(RB_RIGHT(tmp, field), field) == RB_BLACK)) {\ + RB_COLOR(tmp, field) = RB_RED; \ + elm = parent; \ + parent = RB_PARENT(elm, field); \ + } else { \ + if (RB_LEFT(tmp, field) == NULL || \ + RB_COLOR(RB_LEFT(tmp, field), field) == RB_BLACK) {\ + struct type *oright; \ + if ((oright = RB_RIGHT(tmp, field)) \ + != NULL) \ + RB_COLOR(oright, field) = RB_BLACK;\ + RB_COLOR(tmp, field) = RB_RED; \ + RB_ROTATE_LEFT(head, tmp, oright, field);\ + tmp = RB_LEFT(parent, field); \ + } \ + RB_COLOR(tmp, field) = RB_COLOR(parent, field);\ + RB_COLOR(parent, field) = RB_BLACK; \ + if (RB_LEFT(tmp, field)) \ + RB_COLOR(RB_LEFT(tmp, field), field) = RB_BLACK;\ + RB_ROTATE_RIGHT(head, parent, tmp, field);\ + elm = RB_ROOT(head); \ + break; \ + } \ + } \ + } \ + if (elm) \ + RB_COLOR(elm, field) = RB_BLACK; \ +} \ + \ +struct type * \ +name##_RB_REMOVE(struct name *head, struct type *elm) \ +{ \ + struct type *child, *parent, *old = elm; \ + int color; \ + if (RB_LEFT(elm, field) == NULL) \ + child = RB_RIGHT(elm, field); \ + else if (RB_RIGHT(elm, field) == NULL) \ + child = RB_LEFT(elm, field); \ + else { \ + struct type *left; \ + elm = RB_RIGHT(elm, field); \ + while ((left = RB_LEFT(elm, field)) != NULL) \ + elm = left; \ + child = RB_RIGHT(elm, field); \ + parent = RB_PARENT(elm, field); \ + color = RB_COLOR(elm, field); \ + if (child) \ + RB_PARENT(child, field) = parent; \ + if (parent) { \ + if (RB_LEFT(parent, field) == elm) \ + RB_LEFT(parent, field) = child; \ + else \ + RB_RIGHT(parent, field) = child; \ + RB_AUGMENT(parent); \ + } else \ + RB_ROOT(head) = child; \ + if (RB_PARENT(elm, field) == old) \ + parent = elm; \ + (elm)->field = (old)->field; \ + if (RB_PARENT(old, field)) { \ + if (RB_LEFT(RB_PARENT(old, field), field) == old)\ + RB_LEFT(RB_PARENT(old, field), field) = elm;\ + else \ + RB_RIGHT(RB_PARENT(old, field), field) = elm;\ + RB_AUGMENT(RB_PARENT(old, field)); \ + } else \ + RB_ROOT(head) = elm; \ + RB_PARENT(RB_LEFT(old, field), field) = elm; \ + if (RB_RIGHT(old, field)) \ + RB_PARENT(RB_RIGHT(old, field), field) = elm; \ + if (parent) { \ + left = parent; \ + do { \ + RB_AUGMENT(left); \ + } while ((left = RB_PARENT(left, field)) != NULL); \ + } \ + goto color; \ + } \ + parent = RB_PARENT(elm, field); \ + color = RB_COLOR(elm, field); \ + if (child) \ + RB_PARENT(child, field) = parent; \ + if (parent) { \ + if (RB_LEFT(parent, field) == elm) \ + RB_LEFT(parent, field) = child; \ + else \ + RB_RIGHT(parent, field) = child; \ + RB_AUGMENT(parent); \ + } else \ + RB_ROOT(head) = child; \ +color: \ + if (color == RB_BLACK) \ + name##_RB_REMOVE_COLOR(head, parent, child); \ + return (old); \ +} \ + \ +/* Inserts a node into the RB tree */ \ +struct type * \ +name##_RB_INSERT(struct name *head, struct type *elm) \ +{ \ + struct type *tmp; \ + struct type *parent = NULL; \ + int comp = 0; \ + tmp = RB_ROOT(head); \ + while (tmp) { \ + parent = tmp; \ + comp = (cmp)(elm, parent); \ + if (comp < 0) \ + tmp = RB_LEFT(tmp, field); \ + else if (comp > 0) \ + tmp = RB_RIGHT(tmp, field); \ + else \ + return (tmp); \ + } \ + RB_SET(elm, parent, field); \ + if (parent != NULL) { \ + if (comp < 0) \ + RB_LEFT(parent, field) = elm; \ + else \ + RB_RIGHT(parent, field) = elm; \ + RB_AUGMENT(parent); \ + } else \ + RB_ROOT(head) = elm; \ + name##_RB_INSERT_COLOR(head, elm); \ + return (NULL); \ +} \ + \ +/* Finds the node with the same key as elm */ \ +struct type * \ +name##_RB_FIND(struct name *head, struct type *elm) \ +{ \ + struct type *tmp = RB_ROOT(head); \ + int comp; \ + while (tmp) { \ + comp = cmp(elm, tmp); \ + if (comp < 0) \ + tmp = RB_LEFT(tmp, field); \ + else if (comp > 0) \ + tmp = RB_RIGHT(tmp, field); \ + else \ + return (tmp); \ + } \ + return (NULL); \ +} \ + \ +/* ARGSUSED */ \ +struct type * \ +name##_RB_NEXT(struct type *elm) \ +{ \ + if (RB_RIGHT(elm, field)) { \ + elm = RB_RIGHT(elm, field); \ + while (RB_LEFT(elm, field)) \ + elm = RB_LEFT(elm, field); \ + } else { \ + if (RB_PARENT(elm, field) && \ + (elm == RB_LEFT(RB_PARENT(elm, field), field))) \ + elm = RB_PARENT(elm, field); \ + else { \ + while (RB_PARENT(elm, field) && \ + (elm == RB_RIGHT(RB_PARENT(elm, field), field)))\ + elm = RB_PARENT(elm, field); \ + elm = RB_PARENT(elm, field); \ + } \ + } \ + return (elm); \ +} \ + \ +struct type * \ +name##_RB_MINMAX(struct name *head, int val) \ +{ \ + struct type *tmp = RB_ROOT(head); \ + struct type *parent = NULL; \ + while (tmp) { \ + parent = tmp; \ + if (val < 0) \ + tmp = RB_LEFT(tmp, field); \ + else \ + tmp = RB_RIGHT(tmp, field); \ + } \ + return (parent); \ +} + +#define RB_NEGINF -1 +#define RB_INF 1 + +#define RB_INSERT(name, x, y) name##_RB_INSERT(x, y) +#define RB_REMOVE(name, x, y) name##_RB_REMOVE(x, y) +#define RB_FIND(name, x, y) name##_RB_FIND(x, y) +#define RB_NEXT(name, x, y) name##_RB_NEXT(y) +#define RB_MIN(name, x) name##_RB_MINMAX(x, RB_NEGINF) +#define RB_MAX(name, x) name##_RB_MINMAX(x, RB_INF) + +#define RB_FOREACH(x, name, head) \ + for ((x) = RB_MIN(name, head); \ + (x) != NULL; \ + (x) = name##_RB_NEXT(x)) + +#endif /* _SYS_TREE_H_ */ diff --git a/bsd/sys/tty.h b/bsd/sys/tty.h index 97178a419..f0f546c48 100644 --- a/bsd/sys/tty.h +++ b/bsd/sys/tty.h @@ -76,6 +76,11 @@ #ifdef KERNEL + +__BEGIN_DECLS +#include +__END_DECLS + /* * NetBSD Clists are actually ring buffers. The c_cc, c_cf, c_cl fields have * exactly the same behaviour as in true clists. @@ -107,6 +112,8 @@ struct clist { * (low, high, timeout). */ struct tty { + lck_mtx_t t_lock; /* Per tty lock */ + struct clist t_rawq; /* Device raw input queue. */ long t_rawcc; /* Raw input queue statistics. */ struct clist t_canq; /* Device canonical queue. */ @@ -136,6 +143,7 @@ struct tty { int t_hiwat; /* High water mark. */ int t_lowat; /* Low water mark. */ int t_gen; /* Generation number. */ + void *t_iokit; /* IOKit management */ }; #define TTY_NULL (struct tty *)0 @@ -273,17 +281,26 @@ void cinit(void); void clrbits(u_char *cp, int off, int len); #ifdef KERNEL_PRIVATE +void tty_init(void); +/* + * The locked version of this function is used from routines which hold + * the tty_lock(), such as ttcompat() in tty_compat.c + */ +int ttioctl_locked(struct tty *tp, u_long com, caddr_t data, int flag, + struct proc *p); + int ttcompat(struct tty *tp, u_long com, caddr_t data, int flag, struct proc *p); -int ttsetcompat(struct tty *tp, u_long *com, caddr_t data, struct termios *term); #endif /* KERNEL_PRIVATE */ +void tty_lock(struct tty *tp); +void tty_unlock(struct tty *tp); + void termioschars(struct termios *t); int tputchar(int c, struct tty *tp); int ttioctl(struct tty *tp, u_long com, caddr_t data, int flag, struct proc *p); int ttread(struct tty *tp, struct uio *uio, int flag); -void ttrstrt(void *tp); int ttyselect(struct tty *tp, int rw, void * wql, struct proc *p); int ttselect(dev_t dev, int rw, void * wql, struct proc *p); void ttsetwater(struct tty *tp); @@ -293,11 +310,11 @@ void ttwakeup(struct tty *tp); int ttwrite(struct tty *tp, struct uio *uio, int flag); void ttwwakeup(struct tty *tp); void ttyblock(struct tty *tp); -void ttychars(struct tty *tp); int ttycheckoutq(struct tty *tp, int wait); -int ttyclose(struct tty *tp); +int ttyclose(struct tty *tp); /* LEGACY: avoid using */ void ttyflush(struct tty *tp, int rw); void ttyinfo(struct tty *tp); +void ttyinfo_locked(struct tty *tp); int ttyinput(int c, struct tty *tp); int ttylclose(struct tty *tp, int flag); int ttymodem(struct tty *tp, int flag); diff --git a/bsd/sys/ttycom.h b/bsd/sys/ttycom.h index eb6a60830..a9c137862 100644 --- a/bsd/sys/ttycom.h +++ b/bsd/sys/ttycom.h @@ -111,6 +111,10 @@ struct winsize { #define TIOCSETAW _IOW('t', 21, struct termios) /* drain output, set */ #define TIOCSETAF _IOW('t', 22, struct termios) /* drn out, fls in, set */ #ifdef KERNEL +#define TIOCGETA_32 _IOR('t', 19, struct termios32) /* get termios struct */ +#define TIOCSETA_32 _IOW('t', 20, struct termios32) /* set termios struct */ +#define TIOCSETAW_32 _IOW('t', 21, struct termios32) /* drain output, set */ +#define TIOCSETAF_32 _IOW('t', 22, struct termios32) /* drn out, fls in, set */ #define TIOCGETA_64 _IOR('t', 19, struct user_termios) #define TIOCSETA_64 _IOW('t', 20, struct user_termios) #define TIOCSETAW_64 _IOW('t', 21, struct user_termios) @@ -165,8 +169,10 @@ struct winsize { #define TIOCDCDTIMESTAMP _IOR('t', 88, struct timeval) /* enable/get timestamp * of last DCd rise */ #ifdef KERNEL -#define TIOCTIMESTAMP_64 _IOR('t', 89, struct user_timeval) -#define TIOCDCDTIMESTAMP_64 _IOR('t', 88, struct user_timeval) +#define TIOCTIMESTAMP_32 _IOR('t', 89, struct user32_timeval) +#define TIOCDCDTIMESTAMP_32 _IOR('t', 88, struct user32_timeval) +#define TIOCTIMESTAMP_64 _IOR('t', 89, struct user64_timeval) +#define TIOCDCDTIMESTAMP_64 _IOR('t', 88, struct user64_timeval) #endif #define TIOCSDRAINWAIT _IOW('t', 87, int) /* set ttywait timeout */ #define TIOCGDRAINWAIT _IOR('t', 86, int) /* get ttywait timeout */ diff --git a/bsd/sys/types.h b/bsd/sys/types.h index 45ff20e24..eec5230dd 100644 --- a/bsd/sys/types.h +++ b/bsd/sys/types.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2008 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -26,7 +26,7 @@ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ */ /* Copyright (c) 1995 NeXT Computer, Inc. All Rights Reserved */ -/*- +/* * Copyright (c) 1982, 1986, 1991, 1993, 1994 * The Regents of the University of California. All rights reserved. * (c) UNIX System Laboratories, Inc. @@ -368,4 +368,5 @@ typedef __darwin_fsblkcnt_t fsblkcnt_t; typedef __darwin_fsfilcnt_t fsfilcnt_t; #endif + #endif /* !_SYS_TYPES_H_ */ diff --git a/bsd/sys/ubc.h b/bsd/sys/ubc.h index 739eae812..a26ba1caa 100644 --- a/bsd/sys/ubc.h +++ b/bsd/sys/ubc.h @@ -55,20 +55,22 @@ off_t ubc_getsize(struct vnode *); int ubc_setsize(struct vnode *, off_t); kauth_cred_t ubc_getcred(struct vnode *); -#ifdef __APPLE_API_OBSOLETE -/* This API continues to exist only until is resolved */ -int ubc_setcred(struct vnode *, struct proc *) __deprecated; -#endif struct thread; int ubc_setthreadcred(struct vnode *, struct proc *, struct thread *); int ubc_sync_range(vnode_t, off_t, off_t, int); errno_t ubc_msync(vnode_t, off_t, off_t, off_t *, int); int ubc_pages_resident(vnode_t); +int ubc_page_op(vnode_t, off_t, int, ppnum_t *, int *); +int ubc_range_op(vnode_t, off_t, off_t, int, int *); +#ifdef KERNEL_PRIVATE +/* This API continues to exist only until is resolved */ +int ubc_setcred(struct vnode *, struct proc *) __deprecated; /* code signing */ struct cs_blob; struct cs_blob *ubc_cs_blob_get(vnode_t, cpu_type_t, off_t); +#endif /* cluster IO routines */ int advisory_read(vnode_t, off_t, off_t, int); @@ -80,11 +82,11 @@ int cluster_read_ext(vnode_t, struct uio *, off_t, int, int (*)(buf_t, void *), int cluster_write(vnode_t, struct uio *, off_t, off_t, off_t, off_t, int); int cluster_write_ext(vnode_t, struct uio *, off_t, off_t, off_t, off_t, int, int (*)(buf_t, void *), void *); -int cluster_pageout(vnode_t, upl_t, vm_offset_t, off_t, int, off_t, int); -int cluster_pageout_ext(vnode_t, upl_t, vm_offset_t, off_t, int, off_t, int, int (*)(buf_t, void *), void *); +int cluster_pageout(vnode_t, upl_t, upl_offset_t, off_t, int, off_t, int); +int cluster_pageout_ext(vnode_t, upl_t, upl_offset_t, off_t, int, off_t, int, int (*)(buf_t, void *), void *); -int cluster_pagein(vnode_t, upl_t, vm_offset_t, off_t, int, off_t, int); -int cluster_pagein_ext(vnode_t, upl_t, vm_offset_t, off_t, int, off_t, int, int (*)(buf_t, void *), void *); +int cluster_pagein(vnode_t, upl_t, upl_offset_t, off_t, int, off_t, int); +int cluster_pagein_ext(vnode_t, upl_t, upl_offset_t, off_t, int, off_t, int, int (*)(buf_t, void *), void *); int cluster_push(vnode_t, int); int cluster_push_ext(vnode_t, int, int (*)(buf_t, void *), void *); @@ -92,15 +94,15 @@ int cluster_push_ext(vnode_t, int, int (*)(buf_t, void *), void *); int cluster_bp(buf_t); int cluster_bp_ext(buf_t, int (*)(buf_t, void *), void *); -void cluster_zero(upl_t, vm_offset_t, int, buf_t); +void cluster_zero(upl_t, upl_offset_t, int, buf_t); int cluster_copy_upl_data(uio_t, upl_t, int, int *); int cluster_copy_ubc_data(vnode_t, uio_t, int *, int); /* UPL routines */ -int ubc_create_upl(vnode_t, off_t, long, upl_t *, upl_page_info_t **, int); -int ubc_upl_map(upl_t, upl_offset_t *); +int ubc_create_upl(vnode_t, off_t, int, upl_t *, upl_page_info_t **, int); +int ubc_upl_map(upl_t, vm_offset_t *); int ubc_upl_unmap(upl_t); int ubc_upl_commit(upl_t); int ubc_upl_commit_range(upl_t, upl_offset_t, upl_size_t, int); @@ -110,6 +112,8 @@ int ubc_upl_abort_range(upl_t, upl_offset_t, upl_size_t, int); upl_page_info_t *ubc_upl_pageinfo(upl_t); upl_size_t ubc_upl_maxbufsize(void); +int is_file_clean(vnode_t, off_t); + __END_DECLS #endif /* _SYS_UBC_H_ */ diff --git a/bsd/sys/ubc_internal.h b/bsd/sys/ubc_internal.h index 9ac742bc1..7c73b04ee 100644 --- a/bsd/sys/ubc_internal.h +++ b/bsd/sys/ubc_internal.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 1999-2007 Apple Inc. All rights reserved. + * Copyright (c) 1999-2008 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -59,7 +59,9 @@ extern struct zone *ubc_info_zone; -#define MAX_CLUSTERS 4 /* maximum number of vfs clusters per vnode */ +#define MAX_CLUSTERS 8 /* maximum number of vfs clusters per vnode */ +#define SPARSE_PUSH_LIMIT 4 /* limit on number of concurrent sparse pushes outside of the cl_lockw */ + /* once we reach this limit, we'll hold the lock */ struct cl_extent { daddr64_t b_addr; @@ -82,7 +84,8 @@ struct cl_readahead { struct cl_writebehind { lck_mtx_t cl_lockw; void * cl_scmap; /* pointer to sparse cluster map */ - int cl_scdirty; /* number of dirty pages in the sparse cluster map */ + int cl_sparse_pushes; /* number of pushes outside of the cl_lockw in progress */ + int cl_sparse_wait; /* synchronous push is in progress */ int cl_number; /* number of packed write behind clusters currently valid */ struct cl_wextent cl_clusters[MAX_CLUSTERS]; /* packed write behind clusters */ }; @@ -92,9 +95,9 @@ struct cs_blob { struct cs_blob *csb_next; cpu_type_t csb_cpu_type; unsigned int csb_flags; - off_t csb_base_offset; - off_t csb_start_offset; - off_t csb_end_offset; + off_t csb_base_offset; /* Offset of Mach-O binary in fat binary */ + off_t csb_start_offset; /* Blob coverage area start, from csb_base_offset */ + off_t csb_end_offset; /* Blob coverage area end, from csb_base_offset */ ipc_port_t csb_mem_handle; vm_size_t csb_mem_size; vm_offset_t csb_mem_offset; @@ -109,7 +112,7 @@ struct cs_blob { struct ubc_info { memory_object_t ui_pager; /* pager */ memory_object_control_t ui_control; /* VM control for the pager */ - long ui_flags; /* flags */ + uint32_t ui_flags; /* flags */ vnode_t ui_vnode; /* vnode for this ubc_info */ kauth_cred_t ui_ucred; /* holds credentials for NFS paging */ off_t ui_size; /* file size for the vnode */ @@ -135,7 +138,7 @@ struct ubc_info { */ __BEGIN_DECLS -__private_extern__ void ubc_init(void); +__private_extern__ void ubc_init(void) __attribute__((section("__TEXT, initcode")));; __private_extern__ int ubc_umount(mount_t mp); __private_extern__ void ubc_unmountall(void); __private_extern__ memory_object_t ubc_getpager(vnode_t); @@ -144,8 +147,8 @@ __private_extern__ void ubc_destroy_named(vnode_t); /* internal only */ __private_extern__ void cluster_release(struct ubc_info *); __private_extern__ uint32_t cluster_max_io_size(mount_t, int); - - +__private_extern__ uint32_t cluster_hard_throttle_limit(vnode_t, uint32_t *, uint32_t); + /* Flags for ubc_getobject() */ #define UBC_FLAGS_NONE 0x0000 @@ -161,12 +164,11 @@ void ubc_info_deallocate(struct ubc_info *); int ubc_isinuse(vnode_t, int); int ubc_isinuse_locked(vnode_t, int, int); -int ubc_page_op(vnode_t, off_t, int, ppnum_t *, int *); -int ubc_range_op(vnode_t, off_t, off_t, int, int *); - int ubc_getcdhash(vnode_t, off_t, unsigned char *); +#ifdef XNU_KERNEL_PRIVATE int UBCINFOEXISTS(vnode_t); +#endif /* XNU_KERNEL_PRIVATE */ /* code signing */ struct cs_blob; diff --git a/bsd/sys/ucontext.h b/bsd/sys/ucontext.h index 7ddf51a15..b31d50ed3 100644 --- a/bsd/sys/ucontext.h +++ b/bsd/sys/ucontext.h @@ -52,16 +52,24 @@ typedef __darwin_sigset_t sigset_t; #include /* user_addr_t, user_size_t */ /* kernel representation of struct ucontext64 for 64 bit processes */ -struct user_ucontext64 { +typedef struct user_ucontext64 { int uc_onstack; sigset_t uc_sigmask; /* signal mask */ - struct user_sigaltstack uc_stack; /* stack */ + struct user64_sigaltstack uc_stack; /* stack */ user_addr_t uc_link; /* ucontext pointer */ user_size_t uc_mcsize; /* mcontext size */ user_addr_t uc_mcontext64; /* machine context */ -}; +} user_ucontext64_t; + +typedef struct user_ucontext32 { + int uc_onstack; + sigset_t uc_sigmask; /* signal mask */ + struct user32_sigaltstack uc_stack; /* stack */ + user32_addr_t uc_link; /* ucontext pointer */ + user32_size_t uc_mcsize; /* mcontext size */ + user32_addr_t uc_mcontext; /* machine context */ +} user_ucontext32_t; -typedef struct user_ucontext64 user_ucontext64_t; #endif /* KERNEL */ #endif /* _SYS_UCONTEXT_H_ */ diff --git a/bsd/sys/ucred.h b/bsd/sys/ucred.h index 786f23aee..0d8b0f2a4 100644 --- a/bsd/sys/ucred.h +++ b/bsd/sys/ucred.h @@ -78,6 +78,7 @@ struct label; #ifdef __APPLE_API_UNSTABLE +#include /* * In-kernel credential structure. @@ -101,7 +102,12 @@ struct ucred { gid_t cr_rgid; /* real group id */ gid_t cr_svgid; /* saved group id */ uid_t cr_gmuid; /* UID for group membership purposes */ - struct auditinfo cr_au; /* user auditing data */ + /* + * XXX - cr_au will be replaced with cr_audit below. + * cr_au is here to keep kexts from breaking. It seems to + * be currently used by the ucred hashing as well. + */ + struct auditinfo cr_au; /* XXX This needs to go away. */ struct label *cr_label; /* MAC label */ int cr_flags; /* flags on credential */ @@ -110,6 +116,7 @@ struct ucred { * added after the label, you must change * kauth_cred_find(). */ + struct au_session cr_audit; /* user auditing data */ }; #ifndef _KAUTH_CRED_T #define _KAUTH_CRED_T diff --git a/bsd/sys/uio.h b/bsd/sys/uio.h index 059c5fa5a..1c48b49cd 100644 --- a/bsd/sys/uio.h +++ b/bsd/sys/uio.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2008 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -119,15 +119,15 @@ enum uio_rw { UIO_READ, UIO_WRITE }; * NOTES - * UIO_USERSPACE is equivalent to UIO_USERSPACE32, but UIO_USERSPACE32 * is preferred. UIO_USERSPACE remains for backwards compatibility. - * UIO_SYSSPACE is equivalent to UIO_SYSSPACE32, but UIO_SYSSPACE32 - * is preferred. UIO_SYSSPACE remains for backwards compatibility. + * UIO_SYSSPACE is equivalent to UIO_SYSSPACE32, but UIO_SYSSPACE + * is preferred. */ enum uio_seg { UIO_USERSPACE = 0, /* kernel address is virtual, to/from user virtual */ UIO_SYSSPACE = 2, /* kernel address is virtual, to/from system virtual */ UIO_USERSPACE32 = 5, /* kernel address is virtual, to/from user 32-bit virtual */ UIO_USERSPACE64 = 8, /* kernel address is virtual, to/from user 64-bit virtual */ - UIO_SYSSPACE32 = 11 /* kernel address is virtual, to/from system virtual */ + UIO_SYSSPACE32 = 11 /* deprecated */ }; #define UIO_SEG_IS_USER_SPACE( a_uio_seg ) \ @@ -258,9 +258,7 @@ user_size_t uio_curriovlen( uio_t a_uio ); #define UIO_SMALLIOV 8 /* 8 on stack, else malloc */ extern int uiomove(const char * cp, int n, struct uio *uio); -extern int uiomove64(const unsigned long long cp, int n, struct uio *uio); -extern int ureadc(int c, struct uio *uio); -extern int uwritec(struct uio *uio); +extern int uiomove64(const __uint64_t cp, int n, struct uio *uio); __END_DECLS #endif /* KERNEL */ diff --git a/bsd/sys/uio_internal.h b/bsd/sys/uio_internal.h index db7c5790f..470291cf4 100644 --- a/bsd/sys/uio_internal.h +++ b/bsd/sys/uio_internal.h @@ -69,6 +69,7 @@ #ifdef KERNEL_PRIVATE #include #include +#include /* * user / kernel address space type flags. @@ -87,45 +88,50 @@ #define UIO_USERISPACE64 9 #define UIO_PHYS_USERSPACE64 10 // UIO_SYSSPACE32 11 defined in uio.h -#define UIO_PHYS_SYSSPACE32 12 -#define UIO_SYSSPACE64 13 -#define UIO_PHYS_SYSSPACE64 14 +// UIO_PHYS_SYSSPACE32 12 reserved, never used. Use UIO_PHYS_SYSSPACE +// UIO_SYSSPACE64 13 reserved, never used. Use UIO_SYSSPACE +// UIO_PHYS_SYSSPACE64 14 reserved, never used. Use UIO_PHYS_SYSSPACE __BEGIN_DECLS struct user_iovec; -// uio_iovsaddr was __private_extern__ temporary chnage for 3777436 -struct user_iovec * uio_iovsaddr( uio_t a_uio ); +#ifdef XNU_KERNEL_PRIVATE +__private_extern__ struct user_iovec * uio_iovsaddr( uio_t a_uio ); __private_extern__ void uio_calculateresid( uio_t a_uio ); __private_extern__ void uio_setcurriovlen( uio_t a_uio, user_size_t a_value ); -// uio_spacetype was __private_extern__ temporary chnage for 3777436 -int uio_spacetype( uio_t a_uio ); -__private_extern__ uio_t - uio_createwithbuffer( int a_iovcount, off_t a_offset, int a_spacetype, - int a_iodirection, void *a_buf_p, int a_buffer_size ); +__private_extern__ int uio_spacetype( uio_t a_uio ); +__private_extern__ uio_t uio_createwithbuffer( int a_iovcount, off_t a_offset, int a_spacetype, int a_iodirection, void *a_buf_p, size_t a_buffer_size ); +__private_extern__ int copyin_user_iovec_array(user_addr_t uaddr, int spacetype, int count, struct user_iovec *dst); +/* reverse of uio_update to "undo" uncommited I/O. This only works in + * limited cases */ +__private_extern__ void uio_pushback( uio_t a_uio, user_size_t a_count ); +#endif /* XNU_KERNEL_PRIVATE */ /* use kern_iovec for system space requests */ struct kern_iovec { - u_int32_t iov_base; /* Base address. */ - u_int32_t iov_len; /* Length. */ + u_int64_t iov_base; /* Base address. */ + u_int64_t iov_len; /* Length. */ }; - + /* use user_iovec for user space requests */ struct user_iovec { user_addr_t iov_base; /* Base address. */ user_size_t iov_len; /* Length. */ }; -#if 1 // LP64todo - remove this after kext adopt new KPI -#define uio_iov uio_iovs.iovp -#define iovec_32 kern_iovec -#define iovec_64 user_iovec -#define iov32p kiovp -#define iov64p uiovp -#endif +/* use user32_iovec/user64_iovec for representing + * in-memory structures in 32-64 processes during copyin */ +struct user32_iovec { + uint32_t iov_base; /* Base address. */ + uint32_t iov_len; /* Length. */ +}; + +struct user64_iovec { + uint64_t iov_base; /* Base address. */ + uint64_t iov_len; /* Length. */ +}; union iovecs { - struct iovec *iovp; struct kern_iovec *kiovp; struct user_iovec *uiovp; }; @@ -136,10 +142,8 @@ struct uio { union iovecs uio_iovs; /* current iovec */ int uio_iovcnt; /* active iovecs */ off_t uio_offset; - int uio_resid; /* compatibility uio_resid (pre-LP64) */ enum uio_seg uio_segflg; enum uio_rw uio_rw; - proc_t uio_procp; /* obsolete - not used! */ user_ssize_t uio_resid_64; int uio_size; /* size for use with kfree */ int uio_max_iovs; /* max number of iovecs this uio_t can hold */ @@ -149,6 +153,7 @@ struct uio { /* values for uio_flags */ #define UIO_FLAGS_INITED 0x00000001 #define UIO_FLAGS_WE_ALLOCED 0x00000002 +#define UIO_FLAGS_IS_COMPRESSED_FILE 0x00000004 __END_DECLS @@ -158,20 +163,8 @@ __END_DECLS * create a stack buffer that can be passed to uio_createwithbuffer. */ #define UIO_SIZEOF( a_iovcount ) \ - ( sizeof(struct uio) + (sizeof(struct user_iovec) * (a_iovcount)) ) + ( sizeof(struct uio) + (MAX(sizeof(struct user_iovec), sizeof(struct kern_iovec)) * (a_iovcount)) ) -#define UIO_IS_64_BIT_SPACE( a_uio_t ) \ - ( (a_uio_t)->uio_segflg == UIO_USERSPACE64 || (a_uio_t)->uio_segflg == UIO_USERISPACE64 || \ - (a_uio_t)->uio_segflg == UIO_PHYS_USERSPACE64 || (a_uio_t)->uio_segflg == UIO_SYSSPACE64 || \ - (a_uio_t)->uio_segflg == UIO_PHYS_SYSSPACE64 ) - -#define UIO_IS_32_BIT_SPACE( a_uio_t ) \ - ( (a_uio_t)->uio_segflg == UIO_USERSPACE || (a_uio_t)->uio_segflg == UIO_USERISPACE || \ - (a_uio_t)->uio_segflg == UIO_SYSSPACE || (a_uio_t)->uio_segflg == UIO_PHYS_USERSPACE || \ - (a_uio_t)->uio_segflg == UIO_USERISPACE32 || (a_uio_t)->uio_segflg == UIO_PHYS_USERSPACE32 || \ - (a_uio_t)->uio_segflg == UIO_SYSSPACE32 || (a_uio_t)->uio_segflg == UIO_PHYS_SYSSPACE32 || \ - (a_uio_t)->uio_segflg == UIO_PHYS_SYSSPACE || (a_uio_t)->uio_segflg == UIO_USERSPACE32 ) - #define UIO_IS_USER_SPACE32( a_uio_t ) \ ( (a_uio_t)->uio_segflg == UIO_USERSPACE32 || (a_uio_t)->uio_segflg == UIO_PHYS_USERSPACE32 || \ (a_uio_t)->uio_segflg == UIO_USERISPACE32 ) @@ -182,270 +175,11 @@ __END_DECLS ( UIO_IS_USER_SPACE32((a_uio_t)) || UIO_IS_USER_SPACE64((a_uio_t)) || \ (a_uio_t)->uio_segflg == UIO_USERSPACE || (a_uio_t)->uio_segflg == UIO_USERISPACE || \ (a_uio_t)->uio_segflg == UIO_PHYS_USERSPACE ) +#define UIO_IS_SYS_SPACE( a_uio_t ) \ + ( (a_uio_t)->uio_segflg == UIO_SYSSPACE || (a_uio_t)->uio_segflg == UIO_PHYS_SYSSPACE || \ + (a_uio_t)->uio_segflg == UIO_SYSSPACE32 ) - -/* - * W A R N I N G!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! - * anything in this section will be removed. please move to the uio KPI - */ - -#if 1 // UIO_KPI - WARNING OBSOLETE!!!! LP64todo - remove these!!!! -// DO NOT USE THESE -#define IS_UIO_USER_SPACE32( segflg ) \ - ( (segflg) == UIO_USERSPACE32 || (segflg) == UIO_PHYS_USERSPACE32 || \ - (segflg) == UIO_USERISPACE32 ) -#define IS_UIO_USER_SPACE64( segflg ) \ - ( (segflg) == UIO_USERSPACE64 || (segflg) == UIO_PHYS_USERSPACE64 || \ - (segflg) == UIO_USERISPACE64 ) -#define IS_UIO_USER_SPACE( segflg ) \ - ( IS_UIO_USER_SPACE32((segflg)) || IS_UIO_USER_SPACE64((segflg)) || \ - (segflg) == UIO_USERSPACE || (segflg) == UIO_USERISPACE || \ - (segflg) == UIO_PHYS_USERSPACE ) - -#define IS_UIO_SYS_SPACE32( segflg ) \ - ( (segflg) == UIO_SYSSPACE32 || (segflg) == UIO_PHYS_SYSSPACE32 || \ - (segflg) == UIO_SYSSPACE || (segflg) == UIO_PHYS_SYSSPACE ) -#define IS_UIO_SYS_SPACE64( segflg ) \ - ( (segflg) == UIO_SYSSPACE64 || (segflg) == UIO_PHYS_SYSSPACE64 ) -#define IS_UIO_SYS_SPACE( segflg ) \ - ( IS_UIO_SYS_SPACE32((segflg)) || IS_UIO_SYS_SPACE64((segflg)) ) - -#define IS_OBSOLETE_UIO_SEGFLG(segflg) \ - ( (segflg) == UIO_USERSPACE || (segflg) == UIO_USERISPACE || \ - (segflg) == UIO_SYSSPACE || (segflg) == UIO_PHYS_USERSPACE || \ - (segflg) == UIO_PHYS_SYSSPACE ) -#define IS_VALID_UIO_SEGFLG(segflg) \ - ( IS_UIO_USER_SPACE((segflg)) || IS_UIO_SYS_SPACE((segflg)) ) - -/* accessor routines for uio and embedded iovecs */ -// WARNING all these are OBSOLETE!!!! -static inline int64_t uio_uio_resid( struct uio *a_uiop ); -static inline void uio_uio_resid_add( struct uio *a_uiop, int64_t a_amount ); -static inline void uio_uio_resid_set( struct uio *a_uiop, int64_t a_value ); - -static inline void uio_iov_base_add( struct uio *a_uiop, int64_t a_amount ); -static inline void uio_iov_base_add_at( struct uio *a_uiop, int64_t a_amount, int a_index ); -static inline void uio_iov_len_add( struct uio *a_uiop, int64_t a_amount ); -static inline void uio_iov_len_add_at( struct uio *a_uiop, int64_t a_amount, int a_index ); -static inline u_int64_t uio_iov_len( struct uio *a_uiop ); -static inline u_int64_t uio_iov_len_at( struct uio *a_uiop, int a_index ); -static inline u_int64_t uio_iov_base( struct uio *a_uiop ); -static inline u_int64_t uio_iov_base_at( struct uio *a_uiop, int a_index ); -static inline void uio_next_iov( struct uio *a_uiop ); -static inline void uio_iov_len_set( struct uio *a_uiop, u_int64_t a_value ); -static inline void uio_iov_len_set_at( struct uio *a_uiop, u_int64_t a_value, int a_index ); - - -static inline int64_t uio_uio_resid( struct uio *a_uiop ) -{ -//#warning obsolete - use uio_resid call - return( (int64_t)a_uiop->uio_resid ); -} - -static inline void uio_uio_resid_add( struct uio *a_uiop, int64_t a_amount ) -{ -//#warning obsolete - use uio_update or uio_addiov or uio_setresid if in kernel and you must - a_uiop->uio_resid += ((int32_t) a_amount); -} - -static inline void uio_uio_resid_set( struct uio *a_uiop, int64_t a_value ) -{ -//#warning obsolete - use uio_update or uio_addiov or uio_setresid if in kernel and you must - a_uiop->uio_resid = a_value; -} - -static inline u_int64_t uio_iov_base( struct uio *a_uiop ) -{ -//#warning obsolete - use uio_curriovbase call - return(uio_iov_base_at(a_uiop, 0)); -} - -static inline u_int64_t uio_iov_base_at( struct uio *a_uiop, int a_index ) -{ -//#warning obsolete - use uio_curriovbase call - if (IS_UIO_USER_SPACE32(a_uiop->uio_segflg) || IS_OBSOLETE_UIO_SEGFLG(a_uiop->uio_segflg)) { - /* user space iovec was most likely a struct iovec so we must cast to uintptr_t first */ - return((u_int64_t)((uintptr_t)a_uiop->uio_iovs.iov32p[a_index].iov_base)); - } - if (IS_UIO_SYS_SPACE32(a_uiop->uio_segflg)) { - return((u_int64_t)a_uiop->uio_iovs.iov32p[a_index].iov_base); - } - if (IS_UIO_USER_SPACE64(a_uiop->uio_segflg) || IS_UIO_SYS_SPACE64(a_uiop->uio_segflg)) { - return(a_uiop->uio_iovs.iov64p[a_index].iov_base); - } - return(0); -} - -static inline u_int64_t uio_iov_len( struct uio *a_uiop ) -{ -//#warning obsolete - use uio_curriovlen call - return(uio_iov_len_at(a_uiop, 0)); -} - -static inline u_int64_t uio_iov_len_at( struct uio *a_uiop, int a_index ) -{ -//#warning obsolete - use uio_curriovlen call - if (IS_UIO_USER_SPACE32(a_uiop->uio_segflg) || - IS_UIO_SYS_SPACE32(a_uiop->uio_segflg) || - IS_OBSOLETE_UIO_SEGFLG(a_uiop->uio_segflg)) { - return((u_int64_t)a_uiop->uio_iovs.iov32p[a_index].iov_len); - } - if (IS_UIO_USER_SPACE64(a_uiop->uio_segflg) || IS_UIO_SYS_SPACE64(a_uiop->uio_segflg)) { - return(a_uiop->uio_iovs.iov64p[a_index].iov_len); - } - return(0); -} - -static inline void uio_iov_len_set_at( struct uio *a_uiop, u_int64_t a_value, int a_index ) -{ -//#warning obsolete - use uio_addiov call - if (IS_UIO_USER_SPACE32(a_uiop->uio_segflg) || - IS_UIO_SYS_SPACE32(a_uiop->uio_segflg) || - IS_OBSOLETE_UIO_SEGFLG(a_uiop->uio_segflg)) { - a_uiop->uio_iovs.iov32p[a_index].iov_len = a_value; - } - else if (IS_UIO_USER_SPACE64(a_uiop->uio_segflg) || IS_UIO_SYS_SPACE64(a_uiop->uio_segflg)) { - a_uiop->uio_iovs.iov64p[a_index].iov_len = a_value; - } - return; -} - -static inline void uio_iov_len_set( struct uio *a_uiop, u_int64_t a_value ) -{ -//#warning obsolete - use uio_addiov call - return(uio_iov_len_set_at(a_uiop, a_value, 0)); -} - -static inline void uio_iov_len_add_at( struct uio *a_uiop, int64_t a_amount, int a_index ) -{ -//#warning obsolete - use uio_addiov call - if (IS_UIO_USER_SPACE32(a_uiop->uio_segflg) || - IS_UIO_SYS_SPACE32(a_uiop->uio_segflg) || - IS_OBSOLETE_UIO_SEGFLG(a_uiop->uio_segflg)) { - a_uiop->uio_iovs.iov32p[a_index].iov_len += ((int32_t) a_amount); - } - else if (IS_UIO_USER_SPACE64(a_uiop->uio_segflg) || IS_UIO_SYS_SPACE64(a_uiop->uio_segflg)) { - a_uiop->uio_iovs.iov64p[a_index].iov_len += a_amount; - } - return; -} - -static inline void uio_iov_len_add( struct uio *a_uiop, int64_t a_amount ) -{ -//#warning obsolete - use uio_addiov call - return(uio_iov_len_add_at(a_uiop, a_amount, 0)); -} - -static inline void uio_iov_base_add_at( struct uio *a_uiop, int64_t a_amount, int a_index ) -{ -//#warning obsolete - use uio_addiov call - if (IS_UIO_USER_SPACE32(a_uiop->uio_segflg) || - IS_UIO_SYS_SPACE32(a_uiop->uio_segflg) || - IS_OBSOLETE_UIO_SEGFLG(a_uiop->uio_segflg)) { - a_uiop->uio_iovs.iov32p[a_index].iov_base += ((int32_t) a_amount); - } - else if (IS_UIO_USER_SPACE64(a_uiop->uio_segflg) || IS_UIO_SYS_SPACE64(a_uiop->uio_segflg)) { - a_uiop->uio_iovs.iov64p[a_index].iov_base += a_amount; - } - return; -} - -static inline void uio_iov_base_add( struct uio *a_uiop, int64_t a_amount ) -{ -//#warning obsolete - use uio_addiov call - return(uio_iov_base_add_at(a_uiop, a_amount, 0)); -} - -static inline void uio_next_iov( struct uio *a_uiop ) -{ -//#warning obsolete - use uio_update call - if (IS_UIO_USER_SPACE32(a_uiop->uio_segflg) || - IS_UIO_SYS_SPACE32(a_uiop->uio_segflg) || - IS_OBSOLETE_UIO_SEGFLG(a_uiop->uio_segflg)) { - a_uiop->uio_iovs.iov32p++; - } - else if (IS_UIO_USER_SPACE64(a_uiop->uio_segflg) || IS_UIO_SYS_SPACE64(a_uiop->uio_segflg)) { - a_uiop->uio_iovs.iov64p++; - } - return; -} - -/* - * WARNING - this routine relies on iovec_64 being larger than iovec_32 and will - * not work if you are going to initialize an array of iovec_64 as an array of - * iovec_32 then pass that array in a uio (since uio_iov is always expected to - * be an array of like sized iovecs - see how uio_next_iov gets to the next iovec) - */ -static inline void init_iovec( u_int64_t a_base, - u_int64_t a_len, - struct iovec_64 *a_iovp, - int is_64bit_process ) -{ -//#warning obsolete - use uio_create call - if (is_64bit_process) { - a_iovp->iov_base = a_base; - a_iovp->iov_len = a_len; - } - else { - struct iovec_32 *a_iov32p = (struct iovec_32 *) a_iovp; - a_iov32p->iov_base = a_base; - a_iov32p->iov_len = a_len; - } - return; -} - -#define INIT_UIO_BASE( uiop, iovcnt, offset, resid, rw, procp ) \ -{ \ - (uiop)->uio_iovcnt = (iovcnt); \ - (uiop)->uio_offset = (offset); \ - (uiop)->uio_resid = (resid); \ - (uiop)->uio_rw = (rw); \ - (uiop)->uio_procp = (procp); \ -} -#define INIT_UIO_USER32( uiop, iovp, iovcnt, offset, resid, rw, procp ) \ -{ \ - (uiop)->uio_iovs.iov32p = (iovp); \ - (uiop)->uio_segflg = UIO_USERSPACE; \ - INIT_UIO_BASE((uiop), (iovcnt), (offset), (resid), (rw), (procp)); \ -} -#define INIT_UIO_USER64( uiop, iovp, iovcnt, offset, resid, rw, procp ) \ -{ \ - (uiop)->uio_iovs.iov64p = (iovp); \ - (uiop)->uio_segflg = UIO_USERSPACE64; \ - INIT_UIO_BASE((uiop), (iovcnt), (offset), (resid), (rw), (procp)); \ -} -#define INIT_UIO_SYS32( uiop, iovp, iovcnt, offset, resid, rw, procp ) \ -{ \ - (uiop)->uio_iovs.iov32p = (iovp); \ - (uiop)->uio_segflg = UIO_SYSSPACE; \ - INIT_UIO_BASE((uiop), (iovcnt), (offset), (resid), (rw), (procp)); \ -} -#define INIT_UIO_USERSPACE( uiop, iovp, iovcnt, offset, resid, rw, procp ) \ -{ \ - if (IS_64BIT_PROCESS((procp))) { \ - (uiop)->uio_iovs.iov64p = (iovp); \ - (uiop)->uio_segflg = UIO_USERSPACE64; \ - } \ - else { \ - (uiop)->uio_iovs.iov32p = (struct iovec_32 *)(iovp); \ - (uiop)->uio_segflg = UIO_USERSPACE; \ - } \ - INIT_UIO_BASE((uiop), (iovcnt), (offset), (resid), (rw), (procp)); \ -} -#define INIT_UIO_SYSSPACE( uiop, iovp, iovcnt, offset, resid, rw, procp ) \ -{ \ - if (0) { /* we do not support 64-bit system space yet */ \ - (uiop)->uio_iovs.iov64p = (iovp); \ - (uiop)->uio_segflg = UIO_SYSSPACE64; \ - } \ - else { \ - (uiop)->uio_iovs.iov32p = (struct iovec_32 *)(iovp); \ - (uiop)->uio_segflg = UIO_SYSSPACE; \ - } \ - INIT_UIO_BASE((uiop), (iovcnt), (offset), (resid), (rw), (procp)); \ -} -#endif // UIO_KPI - WARNING OBSOLETE!!!! - +extern int ureadc(int c, struct uio *uio); -#endif /* KERNEL */ +#endif /* KERNEL_PRIVATE */ #endif /* !_SYS_UIO_INTERNAL_H_ */ diff --git a/bsd/sys/un.h b/bsd/sys/un.h index 92017608d..479058ff2 100644 --- a/bsd/sys/un.h +++ b/bsd/sys/un.h @@ -90,6 +90,7 @@ struct sockaddr_un { #ifdef KERNEL #ifdef PRIVATE +#include __BEGIN_DECLS struct mbuf; struct socket; @@ -103,6 +104,9 @@ void unp_dispose(struct mbuf *m); int unp_externalize(struct mbuf *rights); void unp_init(void) __attribute__((section("__TEXT, initcode"))); extern struct pr_usrreqs uipc_usrreqs; +int unp_lock(struct socket *, int, void *); +int unp_unlock(struct socket *, int, void *); +lck_mtx_t* unp_getlock(struct socket *, int); __END_DECLS #endif /* PRIVATE */ #else /* !KERNEL */ diff --git a/bsd/sys/unpcb.h b/bsd/sys/unpcb.h index bc2039358..2376c11f8 100644 --- a/bsd/sys/unpcb.h +++ b/bsd/sys/unpcb.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2008 Apple Computer, Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -114,8 +114,10 @@ struct name { \ #endif #ifdef PRIVATE +#ifndef KERNEL _UCPCB_LIST_HEAD(unp_head, unpcb); -#ifdef KERNEL +#else +LIST_HEAD(unp_head, unpcb); #define sotounpcb(so) ((struct unpcb *)((so)->so_pcb)) struct unpcb { @@ -132,6 +134,8 @@ struct unpcb { unp_gen_t unp_gencnt; /* generation count of this instance */ int unp_flags; /* flags */ struct xucred unp_peercred; /* peer credentials, if applicable */ + lck_mtx_t *unp_mtx; /* per unpcb lock */ + int rw_thrcount; /* disconnect should wait for this count to become zero */ }; #endif /* KERNEL */ @@ -148,8 +152,9 @@ struct unpcb { * (there may not even be a peer). This is set in unp_listen() when * it fills in unp_peercred for later consumption by unp_connect(). */ -#define UNP_HAVEPC 0x001 -#define UNP_HAVEPCCACHED 0x002 +#define UNP_HAVEPC 0x0001 +#define UNP_HAVEPCCACHED 0x0002 +#define UNP_DONTDISCONNECT 0x0004 #ifdef KERNEL struct unpcb_compat { @@ -162,7 +167,11 @@ struct unpcb { _UNPCB_PTR(struct vnode *) unp_vnode; /* if associated with file */ ino_t unp_ino; /* fake inode number */ _UNPCB_PTR(struct unpcb_compat *) unp_conn; /* control block of connected socket */ +#if defined(KERNEL) + u_int32_t unp_refs; +#else struct unp_head unp_refs; /* referencing socket linked list */ +#endif _UNPCB_LIST_ENTRY(unpcb_compat) unp_reflink; /* link in unp_refs list */ _UNPCB_PTR(struct sockaddr_un *) unp_addr; /* bound address of socket */ int unp_cc; /* copy of rcv.sb_cc */ @@ -193,9 +202,46 @@ struct xunpcb { u_quad_t xu_alignment_hack; }; +#if !CONFIG_EMBEDDED + +struct xunpcb64_list_entry { + u_int64_t le_next; + u_int64_t le_prev; +}; + +struct xunpcb64 { + u_int32_t xu_len; /* length of this structure */ + u_int64_t xu_unpp; /* to help netstat, fstat */ + struct xunpcb64_list_entry xunp_link; /* glue on list of all PCBs */ + u_int64_t xunp_socket; /* pointer back to socket */ + u_int64_t xunp_vnode; /* if associated with file */ + u_int64_t xunp_ino; /* fake inode number */ + u_int64_t xunp_conn; /* control block of connected socket */ + u_int64_t xunp_refs; /* referencing socket linked list */ + struct xunpcb64_list_entry xunp_reflink; /* link in unp_refs list */ + int xunp_cc; /* copy of rcv.sb_cc */ + int xunp_mbcnt; /* copy of rcv.sb_mbcnt */ + unp_gen_t xunp_gencnt; /* generation count of this instance */ + int xunp_flags; /* flags */ + union { + struct sockaddr_un xuu_addr; + char xu_dummy1[256]; + } xu_au; /* our bound address */ +#define xunp_addr xu_au.xuu_addr + union { + struct sockaddr_un xuu_caddr; + char xu_dummy2[256]; + } xu_cau; /* their bound address */ +#define xunp_caddr xu_cau.xuu_caddr + struct xsocket64 xu_socket; +}; + +#endif /* !CONFIG_EMBEDDED */ + #pragma pack() #endif /* _SYS_SOCKETVAR_H_ */ + #endif /* PRIVATE */ struct xunpgen { diff --git a/bsd/sys/user.h b/bsd/sys/user.h index 4aeb5c885..ed9b46543 100644 --- a/bsd/sys/user.h +++ b/bsd/sys/user.h @@ -73,8 +73,8 @@ #include #include #endif +#ifdef XNU_KERNEL_PRIVATE #include -#ifdef KERNEL_PRIVATE #include #endif #include /* XXX */ @@ -84,6 +84,8 @@ #ifdef __APPLE_API_PRIVATE #include + +#if !defined(__LP64__) || defined(XNU_KERNEL_PRIVATE) /* * VFS context structure (part of uthread) */ @@ -92,10 +94,13 @@ struct vfs_context { kauth_cred_t vc_ucred; /* per thread credential */ }; +#endif /* !__LP64 || XNU_KERNEL_PRIVATE */ + +#ifdef BSD_KERNEL_PRIVATE /* XXX Deprecated: xnu source compatability */ #define uu_ucred uu_context.vc_ucred -#ifdef BSD_KERNEL_PRIVATE +#define MAXTHREADNAMESIZE 64 /* * Per-thread U area. */ @@ -124,19 +129,19 @@ struct uthread { } uu_select; /* saved state for select() */ /* to support kevent continuations */ union { - struct _kevent_scan { + struct _kqueue_scan { kevent_callback_t call; /* per-event callback */ - kevent_continue_t cont; /* whole call continuation */ + kqueue_continue_t cont; /* whole call continuation */ uint64_t deadline; /* computed deadline for operation */ void *data; /* caller's private data */ - } ss_kevent_scan; /* saved state for kevent_scan() */ + } ss_kqueue_scan; /* saved state for kevent_scan() */ struct _kevent { - struct _kevent_scan scan;/* space for the generic data */ + struct _kqueue_scan scan;/* space for the generic data */ struct fileproc *fp; /* fileproc we hold iocount on */ int fd; /* filedescriptor for kq */ - register_t *retval; /* place to store return val */ + int32_t *retval; /* place to store return val */ user_addr_t eventlist; /* user-level event list address */ - size_t eventsize; /* user-level event size (LP64) */ + size_t eventsize; /* kevent or kevent64_s */ int eventcount; /* user-level event count */ int eventout; /* number of events output */ } ss_kevent; /* saved state for kevent() */ @@ -165,7 +170,7 @@ struct uthread { TAILQ_ENTRY(uthread) uu_list; /* List of uthreads in proc */ - struct kaudit_record *uu_ar; /* audit record */ + struct kaudit_record *uu_ar; /* audit record */ struct task* uu_aio_task; /* target task for async io */ /* network support for dlil layer locking */ @@ -173,9 +178,9 @@ struct uthread { lck_mtx_t *uu_mtx; int uu_lowpri_window; - size_t uu_devbsdunit; // to identify which device throttled I/Os are sent to + void * uu_throttle_info; /* pointer to throttled I/Os info */ - struct user_sigaltstack uu_sigstk; + struct kern_sigaltstack uu_sigstk; int uu_defer_reclaims; vnode_t uu_vreclaims; int uu_notrigger; /* XXX - flag for autofs */ @@ -188,6 +193,7 @@ struct uthread { void * uu_vps[32]; #endif #if CONFIG_DTRACE + siginfo_t t_dtrace_siginfo; uint32_t t_dtrace_errno; /* Most recent errno */ uint8_t t_dtrace_stop; /* indicates a DTrace-desired stop */ uint8_t t_dtrace_sig; /* signal sent via DTrace's raise() */ @@ -202,7 +208,7 @@ struct uthread { uint8_t _t_dtrace_reg; /* modified register */ #endif } _tds; - unsigned long _t_dtrace_ft; /* bitwise or of these flags */ + u_int32_t _t_dtrace_ft; /* bitwise or of these flags */ } _tdu; #define t_dtrace_ft _tdu._t_dtrace_ft #define t_dtrace_on _tdu._tds._t_dtrace_on @@ -225,6 +231,11 @@ struct uthread { #endif #endif /* CONFIG_DTRACE */ void * uu_threadlist; + char * pth_name; + TAILQ_ENTRY(uthread) uu_mtxlist; /* psynch waiters list*/ + uint32_t uu_lockseq; /* seq on arrival */ + uint32_t uu_psynchretval; /* pmtx retval */ + void * uu_kwqqueue; /* queue blocked on */ }; typedef struct uthread * uthread_t; diff --git a/bsd/sys/utfconv.h b/bsd/sys/utfconv.h index bff9d0667..c872fc0b6 100644 --- a/bsd/sys/utfconv.h +++ b/bsd/sys/utfconv.h @@ -53,6 +53,25 @@ __BEGIN_DECLS + +/* + * unicode_combinable - Test for a combining unicode character. + * + * This function is similar to __CFUniCharIsNonBaseCharacter except + * that it also includes Hangul Jamo characters. + */ + +int unicode_combinable(u_int16_t character); + +/* + * Test for a precomposed character. + * + * Similar to __CFUniCharIsDecomposableCharacter. + */ + +int unicode_decomposeable(u_int16_t character); + + /* * utf8_encodelen - Calculate the UTF-8 encoding length * diff --git a/bsd/sys/ux_exception.h b/bsd/sys/ux_exception.h index d7c3ac3bc..9883cf6fa 100644 --- a/bsd/sys/ux_exception.h +++ b/bsd/sys/ux_exception.h @@ -61,7 +61,11 @@ #include +#if defined(__x86_64__) +extern mach_port_t ux_exception_port; +#else extern mach_port_name_t ux_exception_port; +#endif /* __x86_64__ */ boolean_t machine_exception(int exception, mach_exception_code_t code, mach_exception_subcode_t subcode, diff --git a/bsd/sys/vm.h b/bsd/sys/vm.h index 77a6b7b57..a4e3df795 100644 --- a/bsd/sys/vm.h +++ b/bsd/sys/vm.h @@ -106,6 +106,18 @@ struct vmspace { * WARNING - keep in sync with vmspace */ +struct user32_vmspace { + int vm_refcnt; /* number of references */ + uint32_t vm_shm; /* SYS5 shared memory private data XXX */ + segsz_t vm_rssize; /* current resident set size in pages */ + segsz_t vm_swrss; /* resident set size before last swap */ + segsz_t vm_tsize; /* text size (pages) XXX */ + segsz_t vm_dsize; /* data size (pages) XXX */ + segsz_t vm_ssize; /* stack size (pages) */ + uint32_t vm_taddr; /* user virtual address of text XXX */ + uint32_t vm_daddr; /* user virtual address of data XXX */ + uint32_t vm_maxsaddr; /* user VA at max stack growth */ +}; struct user_vmspace { int vm_refcnt; /* number of references */ user_addr_t vm_shm __attribute((aligned(8))); /* SYS5 shared memory private data XXX */ diff --git a/bsd/sys/vnioctl.h b/bsd/sys/vnioctl.h index 5841be4b2..12e1b4741 100644 --- a/bsd/sys/vnioctl.h +++ b/bsd/sys/vnioctl.h @@ -94,12 +94,18 @@ struct vn_ioctl { #ifdef KERNEL_PRIVATE -struct user_vn_ioctl { +struct vn_ioctl_64 { u_int64_t vn_file; /* pathname of file to mount */ int vn_size; /* (returned) size of disk */ vncontrol_t vn_control; }; +struct vn_ioctl_32 { + u_int32_t vn_file; /* pathname of file to mount */ + int vn_size; /* (returned) size of disk */ + vncontrol_t vn_control; +}; + #endif /* KERNEL_PRIVATE */ /* @@ -116,9 +122,14 @@ struct user_vn_ioctl { #define VNIOCUCLEAR _IOWR('F', 5, u_int32_t ) /* reset --//-- */ #define VNIOCSHADOW _IOWR('F', 6, struct vn_ioctl) /* attach shadow */ #ifdef KERNEL_PRIVATE -#define VNIOCATTACH64 _IOWR('F', 0, struct user_vn_ioctl) /* attach file - LP64 */ -#define VNIOCDETACH64 _IOWR('F', 1, struct user_vn_ioctl) /* detach disk - LP64 */ -#define VNIOCSHADOW64 _IOWR('F', 6, struct user_vn_ioctl) /* attach shadow - LP64 */ +#define VNIOCATTACH64 _IOWR('F', 0, struct vn_ioctl_64) /* attach file - LP64 */ +#define VNIOCDETACH64 _IOWR('F', 1, struct vn_ioctl_64) /* detach disk - LP64 */ +#define VNIOCSHADOW64 _IOWR('F', 6, struct vn_ioctl_64) /* attach shadow - LP64 */ +#ifdef __LP64__ +#define VNIOCATTACH32 _IOWR('F', 0, struct vn_ioctl_32) /* attach file - U32 version for K64 */ +#define VNIOCDETACH32 _IOWR('F', 1, struct vn_ioctl_32) /* detach disk - U32 version for K64 */ +#define VNIOCSHADOW32 _IOWR('F', 6, struct vn_ioctl_32) /* attach shadow - U32 version for K64 */ +#endif #endif /* KERNEL_PRIVATE */ #define VN_LABELS 0x1 /* Use disk(/slice) labels */ diff --git a/bsd/sys/vnode.h b/bsd/sys/vnode.h index 5814b6eea..65620f277 100644 --- a/bsd/sys/vnode.h +++ b/bsd/sys/vnode.h @@ -118,6 +118,7 @@ enum vtagtype { */ #define VNOVAL (-1) + #ifdef KERNEL /* @@ -154,8 +155,8 @@ struct componentname { /* * Arguments to lookup. */ - u_long cn_nameiop; /* lookup operation */ - u_long cn_flags; /* flags (see below) */ + uint32_t cn_nameiop; /* lookup operation */ + uint32_t cn_flags; /* flags (see below) */ #ifdef BSD_KERNEL_PRIVATE vfs_context_t cn_context; void * pad_obsolete2; @@ -172,11 +173,11 @@ struct componentname { * Shared between lookup and commit routines. */ char *cn_pnbuf; /* pathname buffer */ - long cn_pnlen; /* length of allocated buffer */ + int cn_pnlen; /* length of allocated buffer */ char *cn_nameptr; /* pointer to looked up name */ - long cn_namelen; /* length of looked up component */ - u_long cn_hash; /* hash value of looked up name */ - long cn_consume; /* chars to consume in lookup() */ + int cn_namelen; /* length of looked up component */ + uint32_t cn_hash; /* hash value of looked up name */ + uint32_t cn_consume; /* chars to consume in lookup() */ }; /* @@ -204,7 +205,6 @@ struct componentname { #define DOWHITEOUT 0x00040000 /* do whiteouts */ - /* The following structure specifies a vnode for creation */ struct vnode_fsparam { struct mount * vnfs_mp; /* mount point to which this vnode_t is part of */ @@ -307,7 +307,7 @@ struct vnode_fsparam { VNODE_ATTR_BIT(va_name) | \ VNODE_ATTR_BIT(va_type) | \ VNODE_ATTR_BIT(va_nchildren) | \ - VNODE_ATTR_BIT(va_dirlinkcount)) + VNODE_ATTR_BIT(va_dirlinkcount)) /* * Attributes that can be applied to a new file object. */ @@ -325,6 +325,7 @@ struct vnode_fsparam { VNODE_ATTR_BIT(va_uuuid) | \ VNODE_ATTR_BIT(va_guuid)) + struct vnode_attr { /* bitfields */ uint64_t va_supported; @@ -341,8 +342,8 @@ struct vnode_attr { uint64_t va_nlink; /* number of references to this file */ uint64_t va_total_size; /* size in bytes of all forks */ uint64_t va_total_alloc; /* disk space used by all forks */ - uint64_t va_data_size; /* size in bytes of the main(data) fork */ - uint64_t va_data_alloc; /* disk space used by the main(data) fork */ + uint64_t va_data_size; /* size in bytes of the fork managed by current vnode */ + uint64_t va_data_alloc; /* disk space used by the fork managed by current vnode */ uint32_t va_iosize; /* optimal I/O blocksize */ /* file security information */ @@ -380,6 +381,7 @@ struct vnode_attr { uint64_t va_dirlinkcount; /* Real references to dir (i.e. excluding "." and ".." refs) */ /* add new fields here only */ + }; /* @@ -388,8 +390,6 @@ struct vnode_attr { #define VA_UTIMES_NULL 0x010000 /* utimes argument was NULL */ #define VA_EXCLUSIVE 0x020000 /* exclusive create request */ - - /* * Modes. Some values same as Ixxx entries from inode.h for now. */ @@ -400,7 +400,6 @@ struct vnode_attr { #define VWRITE 0x080 /*00200*/ #define VEXEC 0x040 /*00100*/ - /* * Convert between vnode types and inode formats (since POSIX.1 * defines mode word of stat structure in terms of inode formats). @@ -411,7 +410,6 @@ extern int vttoif_tab[]; #define VTTOIF(indx) (vttoif_tab[(int)(indx)]) #define MAKEIMODE(indx, mode) (int)(VTTOIF(indx) | (mode)) - /* * Flags to various vnode functions. */ @@ -439,13 +437,6 @@ extern int vttoif_tab[]; #define NULLVP ((struct vnode *)NULL) -/* - * Macro/function to check for client cache inconsistency w.r.t. leasing. - */ -#define LEASE_READ 0x1 /* Check lease for readers */ -#define LEASE_WRITE 0x2 /* Check lease for modifiers */ - - #ifndef BSD_KERNEL_PRIVATE struct vnodeop_desc; #endif @@ -466,8 +457,10 @@ struct vnodeopv_desc { struct vnodeopv_entry_desc *opv_desc_ops; /* null terminated list */ }; -/* - * A default routine which just returns an error. +/*! + @function vn_default_error + @abstract Default vnode operation to fill unsupported slots in vnode operation vectors. + @return ENOTSUP */ int vn_default_error(void); @@ -489,151 +482,816 @@ typedef int kauth_action_t; __BEGIN_DECLS -errno_t vnode_create(int, size_t, void *, vnode_t *); +/*! + @function vnode_create + @abstract Create and initialize a vnode. + @discussion Returns wth an iocount held on the vnode which must eventually be dropped with vnode_put(). + @param flavor Should be VNCREATE_FLAVOR. + @param size Size of the struct vnode_fsparam in "data". + @param data Pointer to a struct vnode_fsparam containing initialization information. + @param vpp Pointer to a vnode pointer, to be filled in with newly created vnode. + @return 0 for success, error code otherwise. + */ +errno_t vnode_create(uint32_t, uint32_t, void *, vnode_t *); + +/*! + @function vnode_addfsref + @abstract Mark a vnode as being stored in a filesystem hash. + @discussion Should only be called once on a vnode, and never if that vnode was created with VNFS_ADDFSREF. + There should be a corresponding call to vnode_removefsref() when the vnode is reclaimed; VFS assumes that a + n unused vnode will not be marked as referenced by a filesystem. + @param vp The vnode to mark. + @return Always 0. + */ int vnode_addfsref(vnode_t); + +/*! + @function vnode_removefsref + @abstract Mark a vnode as no longer being stored in a filesystem hash. + @discussion Should only be called once on a vnode (during a reclaim), and only after the vnode has either been created with VNFS_ADDFSREF or marked by vnode_addfsref(). + @param vp The vnode to unmark. + @return Always 0. + */ int vnode_removefsref(vnode_t); +/*! + @function vnode_hasdirtyblks + @abstract Check if a vnode has dirty data waiting to be written to disk. + @discussion Note that this routine is unsynchronized; it is only a snapshot and its result may cease to be true at the moment it is returned.. + @param vp The vnode to test. + @return Nonzero if there are dirty blocks, 0 otherwise + */ int vnode_hasdirtyblks(vnode_t); + +/*! + @function vnode_hascleanblks + @abstract Check if a vnode has clean buffers associated with it. + @discussion Note that this routine is unsynchronized; it is only a snapshot and its result may cease to be true at the moment it is returned.. + @param vp The vnode to test. + @return Nonzero if there are clean blocks, 0 otherwise. + */ int vnode_hascleanblks(vnode_t); + #define VNODE_ASYNC_THROTTLE 15 -/* timeout is in 10 msecs and not hz tick based */ +/*! + @function vnode_waitforwrites + @abstract Wait for the number of pending writes on a vnode to drop below a target. + @param vp The vnode to monitor. + @param output_target Max pending write count with which to return. + @param slpflag Flags for msleep(). + @param slptimeout Frequency with which to force a check for completion; increments of 10 ms. + @param msg String to pass msleep() . + @return 0 for success, or an error value from msleep(). + */ int vnode_waitforwrites(vnode_t, int, int, int, const char *); + +/*! + @function vnode_startwrite + @abstract Increment the count of pending writes on a vnode. + @param vp The vnode whose count to increment. + @return void. + */ void vnode_startwrite(vnode_t); + +/*! + @function vnode_startwrite + @abstract Decrement the count of pending writes on a vnode . + @discussion Also wakes up threads waiting for the write count to drop, as in vnode_waitforwrites. + @param vp The vnode whose count to decrement. + @return void. + */ void vnode_writedone(vnode_t); +/*! + @function vnode_vtype + @abstract Return a vnode's type. + @param vp The vnode whose type to grab. + @return The vnode's type. + */ enum vtype vnode_vtype(vnode_t); + +/*! + @function vnode_vid + @abstract Return a vnode's vid (generation number), which is constant from creation until reclaim. + @param vp The vnode whose vid to grab. + @return The vnode's vid. + */ uint32_t vnode_vid(vnode_t); + +/*! + @function vnode_mountedhere + @abstract Returns a pointer to a mount placed on top of a vnode, should it exist. + @param vp The vnode from whom to take the covering mount. + @return Pointer to mount covering a vnode, or NULL if none exists. + */ mount_t vnode_mountedhere(vnode_t vp); + +/*! + @function vnode_mount + @abstract Get the mount structure for the filesystem that a vnode belongs to. + @param vp The vnode whose mount to grab. + @return The mount, directly. + */ mount_t vnode_mount(vnode_t); -errno_t vfs_mountref(mount_t); -errno_t vfs_mountrele(mount_t); + +/*! + @function vnode_specrdev + @abstract Return the device id of the device associated with a special file. + @param vp The vnode whose device id to extract--vnode must be a special file. + @return The device id. + */ dev_t vnode_specrdev(vnode_t); + +/*! + @function vnode_fsnode + @abstract Gets the filesystem-specific data associated with a vnode. + @param vp The vnode whose data to grab. + @return The filesystem-specific data, directly. + */ void * vnode_fsnode(vnode_t); + +/*! + @function vnode_clearfsnode + @abstract Sets a vnode's filesystem-specific data to be NULL. + @discussion This routine should only be called when a vnode is no longer in use, i.e. during a VNOP_RECLAIM. + @param vp The vnode whose data to clear out. + @return void. + */ void vnode_clearfsnode(vnode_t); +/*! + @function vnode_isvroot + @abstract Determine if a vnode is the root of its filesystem. + @param vp The vnode to test. + @return Nonzero if the vnode is the root, 0 if it is not. + */ int vnode_isvroot(vnode_t); + +/*! + @function vnode_issystem + @abstract Determine if a vnode is marked as a System vnode. + @param vp The vnode to test. + @return Nonzero if the vnode is a system vnode, 0 if it is not. + */ int vnode_issystem(vnode_t); -int vnode_isswap(vnode_t vp); + +/*! + @function vnode_ismount + @abstract Determine if there is currently a mount occurring which will cover this vnode. + @discussion Note that this is only a snapshot; a mount may begin or end at any time. + @param vp The vnode to test. + @return Nonzero if there is a mount in progress, 0 otherwise. + */ int vnode_ismount(vnode_t); + +/*! + @function vnode_isreg + @abstract Determine if a vnode is a regular file. + @param vp The vnode to test. + @return Nonzero if the vnode is of type VREG, 0 otherwise. + */ int vnode_isreg(vnode_t); + +/*! + @function vnode_isdir + @abstract Determine if a vnode is a directory. + @param vp The vnode to test. + @return Nonzero if the vnode is of type VDIR, 0 otherwise. + */ int vnode_isdir(vnode_t); + +/*! + @function vnode_islnk + @abstract Determine if a vnode is a symbolic link. + @param vp The vnode to test. + @return Nonzero if the vnode is of type VLNK, 0 otherwise. + */ int vnode_islnk(vnode_t); + +/*! + @function vnode_isfifo + @abstract Determine if a vnode is a named pipe. + @param vp The vnode to test. + @return Nonzero if the vnode is of type VFIFO, 0 otherwise. + */ int vnode_isfifo(vnode_t); + +/*! + @function vnode_isblk + @abstract Determine if a vnode is a block device special file. + @param vp The vnode to test. + @return Nonzero if the vnode is of type VBLK, 0 otherwise. + */ int vnode_isblk(vnode_t); + +/*! + @function vnode_ischr + @abstract Determine if a vnode is a character device special file. + @param vp The vnode to test. + @return Nonzero if the vnode is of type VCHR, 0 otherwise. + */ int vnode_ischr(vnode_t); +/*! + @function vnode_isswap + @abstract Determine if a vnode is being used as a swap file. + @param vp The vnode to test. + @return Nonzero if the vnode is being used as swap, 0 otherwise. + */ +int vnode_isswap(vnode_t vp); + #ifdef __APPLE_API_UNSTABLE +/*! + @function vnode_isnamedstream + @abstract Determine if a vnode is a named stream. + @param vp The vnode to test. + @return Nonzero if the vnode is a named stream, 0 otherwise. + */ int vnode_isnamedstream(vnode_t); -int vnode_isshadow(vnode_t); #endif -enum vtype vnode_iftovt(int); -int vnode_vttoif(enum vtype); -int vnode_makeimode(int, int); - +/*! + @function vnode_ismountedon + @abstract Determine if a vnode is a block device on which a filesystem has been mounted. + @discussion A block device marked as being mounted on cannot be opened. + @param vp The vnode to test. + @return Nonzero if the vnode is a block device on which an filesystem is mounted, 0 otherwise. + */ int vnode_ismountedon(vnode_t); + +/*! + @function vnode_setmountedon + @abstract Set flags indicating that a block device vnode has been mounted as a filesystem. + @discussion A block device marked as being mounted on cannot be opened. + @param vp The vnode to set flags on, a block device. + @return void. + */ void vnode_setmountedon(vnode_t); + +/*! + @function vnode_clearmountedon + @abstract Clear flags indicating that a block device vnode has been mounted as a filesystem. + @param vp The vnode to clear flags on, a block device. + @return void. + */ void vnode_clearmountedon(vnode_t); +/*! + @function vnode_isrecycled + @abstract Check if a vnode is dead or in the process of being killed (recycled). + @discussion This is only a snapshot: a vnode may start to be recycled, or go from dead to in use, at any time. + @param vp The vnode to test. + @return Nonzero if vnode is dead or being recycled, 0 otherwise. + */ int vnode_isrecycled(vnode_t); + +/*! + @function vnode_isnocache + @abstract Check if a vnode is set to not have its data cached in memory (i.e. we write-through to disk and always read from disk). + @param vp The vnode to test. + @return Nonzero if vnode is set to not have data chached, 0 otherwise. + */ int vnode_isnocache(vnode_t); + +/*! + @function vnode_israge + @abstract Check if a vnode is marked for rapid aging + @param vp The vnode to test. + @return Nonzero if vnode is marked for rapid aging, 0 otherwise + */ +int vnode_israge(vnode_t); + +/*! + @function vnode_setnocache + @abstract Set a vnode to not have its data cached in memory (i.e. we write-through to disk and always read from disk). + @param vp The vnode whose flags to set. + @return void. + */ void vnode_setnocache(vnode_t); + +/*! + @function vnode_clearnocache + @abstract Clear the flag on a vnode indicating that data should not be cached in memory (i.e. we write-through to disk and always read from disk). + @param vp The vnode whose flags to clear. + @return void. + */ void vnode_clearnocache(vnode_t); + +/*! + @function vnode_isnoreadahead + @abstract Check if a vnode is set to not have data speculatively read in in hopes of future cache hits. + @param vp The vnode to test. + @return Nonzero if readahead is disabled, 0 otherwise. + */ int vnode_isnoreadahead(vnode_t); -int vnode_isstandard(vnode_t); + +/*! + @function vnode_setnoreadahead + @abstract Set a vnode to not have data speculatively read in in hopes of hitting in cache. + @param vp The vnode on which to prevent readahead. + @return void. + */ void vnode_setnoreadahead(vnode_t); -void vnode_clearnoreadahead(vnode_t); -int vnode_is_openevt(vnode_t); -void vnode_set_openevt(vnode_t); -void vnode_clear_openevt(vnode_t); +/*! + @function vnode_clearnoreadahead + @abstract Clear the flag indicating that a vnode should not have data speculatively read in. + @param vp The vnode whose flag to clear. + @return void. + */ +void vnode_clearnoreadahead(vnode_t); /* left only for compat reasons as User code depends on this from getattrlist, for ex */ + +/*! + @function vnode_settag + @abstract Set a vnode filesystem-specific "tag." + @discussion Sets a tag indicating which filesystem a vnode belongs to, e.g. VT_HFS, VT_UDF, VT_ZFS. The kernel never inspects this data, though the filesystem tags are defined in vnode.h; it is for the benefit of user programs via getattrlist. + @param vp The vnode whose tag to set. + @return void. + */ void vnode_settag(vnode_t, int); + +/*! + @function vnode_tag + @abstract Get the vnode filesystem-specific "tag." + @discussion Gets the tag indicating which filesystem a vnode belongs to, e.g. VT_HFS, VT_UDF, VT_ZFS. The kernel never inspects this data, though the filesystem tags are defined in vnode.h; it is for the benefit of user programs via getattrlist. + @param vp The vnode whose tag to grab. + @return The tag. + */ int vnode_tag(vnode_t); + +/*! + @function vnode_getattr + @abstract Get vnode attributes. + @discussion Desired attributes are set with VATTR_SET_ACTIVE and VNODE_ATTR* macros. Supported attributes are determined after call with VATTR_IS_SUPPORTED. + @param vp The vnode whose attributes to grab. + @param vap Structure containing: 1) A list of requested attributes 2) Space to indicate which attributes are supported and being returned 3) Space to return attributes. + @param ctx Context for authentication. + @return 0 for success or an error code. + */ int vnode_getattr(vnode_t vp, struct vnode_attr *vap, vfs_context_t ctx); + +/*! + @function vnode_setattr + @abstract Set vnode attributes. + @discussion Attributes to set are marked with VATTR_SET_ACTIVE and VNODE_ATTR* macros. Attributes successfully set are determined after call with VATTR_IS_SUPPORTED. + @param vp The vnode whose attributes to set. + @param vap Structure containing: 1) A list of attributes to set 2) Space for values for those attributes 3) Space to indicate which attributes were set. + @param ctx Context for authentication. + @return 0 for success or an error code. + */ int vnode_setattr(vnode_t vp, struct vnode_attr *vap, vfs_context_t ctx); +/*! + @function vfs_rootvnode + @abstract Returns the root vnode with an iocount. + @discussion Caller must vnode_put() the root node when done. + @return Pointer to root vnode if successful; error code if there is a problem taking an iocount. + */ vnode_t vfs_rootvnode(void); -uint32_t vnode_vfsvisflags(vnode_t); -uint32_t vnode_vfscmdflags(vnode_t); -struct vfsstatfs *vnode_vfsstatfs(vnode_t); -void *vnode_vfsfsprivate(vnode_t); -vnode_t current_workingdir(void); -vnode_t current_rootdir(void); +/*! + @function vnode_uncache_credentials + @abstract Clear out cached credentials on a vnode. + @discussion When we authorize an action on a vnode, we cache the credential that was authorized and the actions it was authorized for in case a similar request follows. This function destroys that caching. + @param vp The vnode whose cache to clear. + @return void. + */ void vnode_uncache_credentials(vnode_t vp); -void vnode_setmultipath(vnode_t vp); -#ifdef BSD_KERNEL_PRIVATE - -/* - * Indicate that a file has multiple hard links. VFS will always call - * VNOP_LOOKUP on this vnode. Volfs will always ask for it's parent - * object ID (instead of using the v_parent pointer). +/*! + @function vnode_setmultipath + @abstract Mark a vnode as being reachable by multiple paths, i.e. as a hard link. + @discussion "Multipath" vnodes can be reached through more than one entry in the filesystem, and so must be handled differently for caching and event notification purposes. A filesystem should mark a vnode with multiple hardlinks this way. + @param vp The vnode to mark. + @return void. */ +void vnode_setmultipath(vnode_t vp); -vnode_t vnode_parent(vnode_t); -void vnode_setparent(vnode_t, vnode_t); -const char * vnode_name(vnode_t); -void vnode_setname(vnode_t, char *); -int vnode_isnoflush(vnode_t); -void vnode_setnoflush(vnode_t); -void vnode_clearnoflush(vnode_t); -/* XXX temporary until we can arrive at a KPI for NFS, Seatbelt */ -thread_t vfs_context_thread(vfs_context_t); -#endif - +/*! + @function vnode_vfsmaxsymlen + @abstract Determine the maximum length of a symbolic link for the filesystem on which a vnode resides. + @param vp The vnode for which to get filesystem symlink size cap. + @return Max symlink length. + */ uint32_t vnode_vfsmaxsymlen(vnode_t); + +/*! + @function vnode_vfsisrdonly + @abstract Determine if the filesystem to which a vnode belongs is mounted read-only. + @param vp The vnode for which to get filesystem writeability. + @return Nonzero if the filesystem is read-only, 0 otherwise. + */ int vnode_vfsisrdonly(vnode_t); + +/*! + @function vnode_vfstypenum + @abstract Get the "type number" of the filesystem to which a vnode belongs. + @discussion This is an archaic construct; most filesystems are assigned a type number based on the order in which they are registered with the system. + @param vp The vnode whose filesystem to examine. + @return The type number of the fileystem to which the vnode belongs. + */ int vnode_vfstypenum(vnode_t); + +/*! + @function vnode_vfsname + @abstract Get the name of the filesystem to which a vnode belongs. + @param vp The vnode whose filesystem to examine. + @param buf Destination for vfs name: should have size MFSNAMELEN or greater. + @return The name of the fileystem to which the vnode belongs. + */ void vnode_vfsname(vnode_t, char *); + +/*! + @function vnode_vfs64bitready + @abstract Determine if the filesystem to which a vnode belongs is marked as ready to interact with 64-bit user processes. + @param vp The vnode whose filesystem to examine. + @return Nonzero if filesystem is marked ready for 64-bit interactions; 0 otherwise. + */ int vnode_vfs64bitready(vnode_t); +/* These should move to private ... not documenting for now */ int vfs_context_get_special_port(vfs_context_t, int, ipc_port_t *); int vfs_context_set_special_port(vfs_context_t, int, ipc_port_t); + +/*! + @function vfs_context_proc + @abstract Get the BSD process structure associated with a vfs_context_t. + @param ctx Context whose associated process to find. + @return Process if available, NULL otherwise. + */ proc_t vfs_context_proc(vfs_context_t); -vnode_t vfs_context_cwd(vfs_context_t); + +/*! + @function vfs_context_ucred + @abstract Get the credential associated with a vfs_context_t. + @discussion Succeeds if and only if the context has a thread, the thread has a task, and the task has a BSD proc. + @param ctx Context whose associated process to find. + @return Process if available, NULL otherwise. + */ kauth_cred_t vfs_context_ucred(vfs_context_t); -int vfs_context_issuser(vfs_context_t); + +/*! + @function vfs_context_pid + @abstract Get the process id of the BSD process associated with a vfs_context_t. + @param ctx Context whose associated process to find. + @return Process id. + */ int vfs_context_pid(vfs_context_t); + +/*! + @function vfs_context_issignal + @abstract Get a bitfield of pending signals for the BSD process associated with a vfs_context_t. + @discussion The bitfield is constructed using the sigmask() macro, in the sense of bits |= sigmask(SIGSEGV). + @param ctx Context whose associated process to find. + @return Bitfield of pending signals. + */ int vfs_context_issignal(vfs_context_t, sigset_t); + +/*! + @function vfs_context_suser + @abstract Determine if a vfs_context_t corresponds to the superuser. + @param ctx Context to examine. + @return Nonzero if context belongs to superuser, 0 otherwise. + */ int vfs_context_suser(vfs_context_t); + +/*! + @function vfs_context_is64bit + @abstract Determine if a vfs_context_t corresponds to a 64-bit user process. + @param ctx Context to examine. + @return Nonzero if context is of 64-bit process, 0 otherwise. + */ int vfs_context_is64bit(vfs_context_t); + +/*! + @function vfs_context_create + @abstract Create a new vfs_context_t with appropriate references held. + @discussion The context must be released with vfs_context_rele() when no longer in use. + @param ctx Context to copy, or NULL to use information from running thread. + @return The new context, or NULL in the event of failure. + */ vfs_context_t vfs_context_create(vfs_context_t); + +/*! + @function vfs_context_rele + @abstract Release references on components of a context and deallocate it. + @discussion A context should not be referenced after vfs_context_rele has been called. + @param ctx Context to release. + @return Always 0. + */ int vfs_context_rele(vfs_context_t); -vfs_context_t vfs_context_current(void); /* get from current uthread */ -#ifdef __APPLE_API_UNSTABLE -vfs_context_t vfs_context_kernel(void); /* get from 1st kernel thread */ -#endif +/*! + @function vfs_context_current + @abstract Get the vfs_context for the current thread, or the kernel context if there is no context for current thread. + @discussion Kexts should not use this function--it is preferred to use vfs_context_create(NULL) and vfs_context_rele(), which ensure proper reference counting of underlying structures. + @return Context for current thread, or kernel context if thread context is unavailable. + */ +vfs_context_t vfs_context_current(void); +#ifdef KERNEL_PRIVATE +int vfs_context_bind(vfs_context_t); +#endif +/*! + @function vflush + @abstract Reclaim the vnodes associated with a mount. + @param mp The mount whose vnodes to kill. + @param skipvp A specific vnode to not reclaim or to let interrupt an un-forced flush + @param flags Control which + @discussion This function is used to clear out the vnodes associated with a mount as part of the unmount process. + Its parameters can determine which vnodes to skip in the process and whether in-use vnodes should be forcibly reclaimed. + Filesystems should call this function from their unmount code, because VFS code will always call it with SKIPROOT | SKIPSWAP | SKIPSYSTEM; filesystems + must take care of such vnodes themselves. + SKIPSYSTEM skip vnodes marked VSYSTEM + FORCECLOSE force file closeure + WRITECLOSE only close writeable files + SKIPSWAP skip vnodes marked VSWAP + SKIPROOT skip root vnodes marked VROOT + @return 0 for success, EBUSY if vnodes were busy and FORCECLOSE was not set. + */ int vflush(struct mount *mp, struct vnode *skipvp, int flags); + +/*! + @function vnode_get + @abstract Increase the iocount on a vnode. + @discussion If vnode_get() succeeds, the resulting io-reference must be dropped with vnode_put(). + This function succeeds unless the vnode in question is dead or in the process of dying AND the current iocount is zero. + This means that it can block an ongoing reclaim which is blocked behind some other iocount. + + On success, vnode_get() returns with an iocount held on the vnode; this type of reference is intended to be held only for short periods of time (e.g. + across a function call) and provides a strong guarantee about the life of the vnode; vnodes with positive iocounts cannot be + recycled, and an iocount is required for any operation on a vnode. However, vnode_get() does not provide any guarantees + about the identity of the vnode it is called on; unless there is a known existing iocount on the vnode at time the call is made, + it could be recycled and put back in use before the vnode_get() succeeds, so the caller may be referencing a + completely different vnode than was intended. vnode_getwithref() and vnode_getwithvid() + provide guarantees about vnode identity. + + @return 0 for success, ENOENT if the vnode is dead and without existing io-reference. + */ int vnode_get(vnode_t); -int vnode_getwithvid(vnode_t, int); + +/*! + @function vnode_getwithvid + @abstract Increase the iocount on a vnode, checking that the vnode is alive and has not changed vid (i.e. been recycled) + @discussion If vnode_getwithvid() succeeds, the resulting io-reference must be dropped with vnode_put(). + This function succeeds unless the vnode in question is dead, in the process of dying, or has been recycled (and given a different vnode id). + The intended usage is that a vnode is stored and its vid (vnode_vid(vp)) recorded while an iocount is held (example: a filesystem hash). The + iocount is then dropped, and time passes (perhaps locks are dropped and picked back up). Subsequently, vnode_getwithvid() is called to get an iocount, + but we are alerted if the vnode has been recycled. + + On success, vnode_getwithvid() returns with an iocount held on the vnode; this type of reference is intended to be held only for short periods of time (e.g. + across a function call) and provides a strong guarantee about the life of the vnode. vnodes with positive iocounts cannot be + recycled. An iocount is required for any operation on a vnode. + @return 0 for success, ENOENT if the vnode is dead, in the process of being reclaimed, or has been recycled and reused. + */ +int vnode_getwithvid(vnode_t, uint32_t); + +/*! + @function vnode_getwithref + @abstract Increase the iocount on a vnode on which a usecount (persistent reference) is held. + @discussion If vnode_getwithref() succeeds, the resulting io-reference must be dropped with vnode_put(). + vnode_getwithref() will succeed on dead vnodes; it should fail with ENOENT on vnodes which are in the process of being reclaimed. + Because it is only called with a usecount on the vnode, the caller is guaranteed that the vnode has not been + reused for a different file, though it may now be dead and have deadfs vnops (which return errors like EIO, ENXIO, ENOTDIR). + On success, vnode_getwithref() returns with an iocount held on the vnode; this type of reference is intended to be held only for short periods of time (e.g. + across a function call) and provides a strong guarantee about the life of the vnode. vnodes with positive iocounts cannot be + recycled. An iocount is required for any operation on a vnode. + @return 0 for success, ENOENT if the vnode is dead, in the process of being reclaimed, or has been recycled and reused. + */ +int vnode_getwithref(vnode_t); + +/*! + @function vnode_put + @abstract Decrement the iocount on a vnode. + @discussion vnode_put() is called to indicate that a vnode is no longer in active use. It removes the guarantee that a + vnode will not be recycled. This routine should be used to release io references no matter how they were obtained. + @param vp The vnode whose iocount to drop. + @return Always 0. + */ int vnode_put(vnode_t); + +/*! + @function vnode_ref + @abstract Increment the usecount on a vnode. + @discussion If vnode_ref() succeeds, the resulting usecount must be released with vnode_rele(). vnode_ref() is called to obtain + a persistent reference on a vnode. This type of reference does not provide the same strong guarantee that a vnode will persist + as does an iocount--it merely ensures that a vnode will not be reused to represent a different file. However, a usecount may be + held for extended periods of time, whereas an iocount is intended to be obtained and released quickly as part of performing a + vnode operation. A holder of a usecount must call vnode_getwithref()/vnode_put() in order to perform any operations on that vnode. + @param vp The vnode on which to obtain a persistent reference. + @return 0 for success; ENOENT if the vnode is dead or in the process of being recycled AND the calling thread is not the vnode owner. + */ int vnode_ref(vnode_t); + +/*! + @function vnode_rele + @abstract Decrement the usecount on a vnode. + @discussion vnode_rele() is called to relese a persistent reference on a vnode. Releasing the last usecount + opens the door for a vnode to be reused as a new file; it also triggers a VNOP_INACTIVE call to the filesystem, + though that will not happen immediately if there are outstanding iocount references. + @param vp The vnode whose usecount to drop. + @return void. + */ void vnode_rele(vnode_t); + +/*! + @function vnode_isinuse + @abstract Determine if the number of persistent (usecount) references on a vnode is greater than a given count. + @discussion vnode_isinuse() compares a vnode's usecount (corresponding to vnode_ref() calls) to its refcnt parameter + (the number of references the caller expects to be on the vnode). Note that "kusecount" references, corresponding + to parties interested only in event notifications, e.g. open(..., O_EVTONLY), are not counted towards the total; the comparison is + (usecount - kusecount > recnt). It is + also important to note that the result is only a snapshot; usecounts can change from moment to moment, and the result of vnode_isinuse + may no longer be correct the very moment that the caller receives it. + @param vp The vnode whose use-status to check. + @param refcnt The threshold for saying that a vnode is in use. + @return void. + */ int vnode_isinuse(vnode_t, int); -void vnode_lock(vnode_t); -void vnode_unlock(vnode_t); -int vnode_recycle(vnode_t); -void vnode_reclaim(vnode_t); + +/*! + @function vnode_recycle + @abstract Cause a vnode to be reclaimed and prepared for reuse. + @discussion Like all vnode KPIs, must be called with an iocount on the target vnode. + vnode_recycle() will mark that vnode for reclaim when all existing references are dropped. + @param vp The vnode to recycle. + @return 1 if the vnode was reclaimed (i.e. there were no existing references), 0 if it was only marked for future reclaim. + */ +int vnode_recycle(vnode_t); + +#ifdef KERNEL_PRIVATE + +#define VNODE_EVENT_DELETE 0x00000001 /* file was removed */ +#define VNODE_EVENT_WRITE 0x00000002 /* file or directory contents changed */ +#define VNODE_EVENT_EXTEND 0x00000004 /* ubc size increased */ +#define VNODE_EVENT_ATTRIB 0x00000008 /* attributes changed (suitable for permission changes if type unknown)*/ +#define VNODE_EVENT_LINK 0x00000010 /* link count changed */ +#define VNODE_EVENT_RENAME 0x00000020 /* vnode was renamed */ +#define VNODE_EVENT_PERMS 0x00000040 /* permissions changed: will cause a NOTE_ATTRIB */ +#define VNODE_EVENT_FILE_CREATED 0x00000080 /* file created in directory: will cause NOTE_WRITE */ +#define VNODE_EVENT_DIR_CREATED 0x00000100 /* directory created inside this directory: will cause NOTE_WRITE */ +#define VNODE_EVENT_FILE_REMOVED 0x00000200 /* file removed from this directory: will cause NOTE_WRITE */ +#define VNODE_EVENT_DIR_REMOVED 0x00000400 /* subdirectory from this directory: will cause NOTE_WRITE */ + +#ifdef BSD_KERNEL_PRIVATE +#define VNODE_NOTIFY_ATTRS (VNODE_ATTR_BIT(va_fsid) | \ + VNODE_ATTR_BIT(va_fileid)| \ + VNODE_ATTR_BIT(va_mode) | \ + VNODE_ATTR_BIT(va_uid) | \ + VNODE_ATTR_BIT(va_gid) | \ + VNODE_ATTR_BIT(va_dirlinkcount) | \ + VNODE_ATTR_BIT(va_nlink)) + + + +#endif /* BSD_KERNEL_PRIVATE */ + +/*! + @function vnode_notify + @abstract Send a notification up to VFS. + @param vp Vnode for which to provide notification. + @param vap Attributes for that vnode, to be passed to fsevents. + @discussion Filesystem determines which attributes to pass up using + vfs_get_notify_attributes(&vap). The most specific events possible should be passed, + e.g. VNODE_EVENT_FILE_CREATED on a directory rather than just VNODE_EVENT_WRITE, but + a less specific event can be passed up if more specific information is not available. + Will not reenter the filesystem. + @return 0 for success, else an error code. + */ +int vnode_notify(vnode_t, uint32_t, struct vnode_attr*); + +/*! + @function vnode_ismonitored + @abstract Check whether a file has watchers that would make it useful to query a server + for file changes. + @param vp Vnode to examine. + @discussion Will not reenter the filesystem. + @return Zero if not monitored, nonzero if monitored. + */ +int vnode_ismonitored(vnode_t); + +/*! + @function vfs_get_notify_attributes + @abstract Determine what attributes are required to send up a notification with vnode_notify(). + @param vap Structure to initialize and activate required attributes on. + @discussion Will not reenter the filesystem. + @return 0 for success, nonzero for error (currently always succeeds). + */ +int vfs_get_notify_attributes(struct vnode_attr *vap); + +/*! + @function vn_getpath_fsenter + @abstract Attempt to get a vnode's path, willing to enter the filesystem. + @discussion Paths to vnodes are not always straightforward: a file with multiple hard-links will have multiple pathnames, + and it is sometimes impossible to determine a vnode's full path. vn_getpath_fsenter() may enter the filesystem + to try to construct a path, so filesystems should be wary of calling it. + @param vp Vnode whose path to get + @param pathbuf Buffer in which to store path. + @param len Destination for length of resulting path string. Result will include NULL-terminator in count--that is, "len" + will be strlen(pathbuf) + 1. + @return 0 for success or an error. + */ +int vn_getpath_fsenter(struct vnode *vp, char *pathbuf, int *len); + +#endif /* KERNEL_PRIVATE */ #define VNODE_UPDATE_PARENT 0x01 #define VNODE_UPDATE_NAME 0x02 #define VNODE_UPDATE_CACHE 0x04 -void vnode_update_identity(vnode_t vp, vnode_t dvp, const char *name, int name_len, int name_hashval, int flags); - +#define VNODE_UPDATE_PURGE 0x08 +/*! + @function vnode_update_identity + @abstract Update vnode data associated with the vfs cache. + @discussion The vfs namecache is central to tracking vnode-identifying data and to locating files on the system. vnode_update_identity() + is used to update vnode data associated with the cache. It can set a vnode's parent and/or name (also potentially set by vnode_create()) + or flush cache data. + @param vp The vnode whose information to update. + @param dvp Parent to set on the vnode if VNODE_UPDATE_PARENT is used. + @param name Name to set in the cache for the vnode if VNODE_UPDATE_NAME is used. The buffer passed in can be subsequently freed, as the cache + does its own name storage. String should be NULL-terminated unless length and hash value are specified. + @param name_len Length of name, if known. Passing 0 causes the cache to determine the length itself. + @param name_hashval Hash value of name, if known. Passing 0 causes the cache to hash the name itself. + @param flags VNODE_UPDATE_PARENT: set parent. VNODE_UPDATE_NAME: set name. VNODE_UPDATE_CACHE: flush cache entries for hard links + associated with this file. VNODE_UPDATE_PURGE: flush cache entries for hard links and children of this file. + @return void. + */ +void vnode_update_identity(vnode_t vp, vnode_t dvp, const char *name, int name_len, uint32_t name_hashval, int flags); + +/*! + @function vn_bwrite + @abstract System-provided implementation of "bwrite" vnop. + @discussion This routine is available for filesystems which do not want to implement their own "bwrite" vnop. It just calls + buf_bwrite() without modifying its arguments. + @param ap Standard parameters to a bwrite vnop. + @return Results of buf_bwrite directly. + */ int vn_bwrite(struct vnop_bwrite_args *ap); +/*! + @function vnode_authorize + @abstract Authorize a kauth-style action on a vnode. + @discussion Operations on dead vnodes are always allowed (though never do anything). + @param vp Vnode on which to authorize action. + @param dvp Parent of "vp," can be NULL. + @param action Action to authorize, e.g. KAUTH_VNODE_READ_DATA. See bsd/sys/kauth.h. + @param ctx Context for which to authorize actions. + @return EACCESS if permission is denied. 0 if operation allowed. Various errors from lower layers. + */ int vnode_authorize(vnode_t /*vp*/, vnode_t /*dvp*/, kauth_action_t, vfs_context_t); + +/*! + @function vnode_authattr + @abstract Given a vnode_attr structure, determine what kauth-style actions must be authorized in order to set those attributes. + @discussion vnode_authorize requires kauth-style actions; if we want to set a vnode_attr structure on a vnode, we need to translate + the set of attributes to a set of kauth-style actions. This routine will return errors for certain obviously disallowed, or + incoherent, actions. + @param vp The vnode on which to authorize action. + @param vap Pointer to vnode_attr struct containing desired attributes to set and their values. + @param actionp Destination for set of actions to authorize + @param ctx Context for which to authorize actions. + @return 0 (and a result in "actionp" for success. Otherwise, an error code. + */ int vnode_authattr(vnode_t, struct vnode_attr *, kauth_action_t *, vfs_context_t); + +/*! + @function vnode_authattr_new + @abstract Initialize and validate file creation parameters with respect to the current context. + @discussion vnode_authattr_new() will fill in unitialized values in the vnode_attr struct with defaults, and will validate the structure + with respect to the current context for file creation. + @param dvp The directory in which creation will occur. + @param vap Pointer to vnode_attr struct containing desired attributes to set and their values. + @param noauth If 1, treat the caller as the superuser, i.e. do not check permissions. + @param ctx Context for which to authorize actions. + @return KAUTH_RESULT_ALLOW for success, an error to indicate invalid or disallowed attributes. + */ int vnode_authattr_new(vnode_t /*dvp*/, struct vnode_attr *, int /*noauth*/, vfs_context_t); + +/*! + @function vnode_close + @abstract Close a file as opened with vnode_open(). + @discussion vnode_close() drops the refcount (persistent reference) picked up in vnode_open() and calls down to the filesystem with VNOP_CLOSE. It should + be called with both an iocount and a refcount on the vnode and will drop both. + @param vp The vnode to close. + @param flags Flags to close: FWASWRITTEN indicates that the file was written to. + @param ctx Context against which to validate operation. + @return 0 for success or an error from the filesystem. + */ errno_t vnode_close(vnode_t, int, vfs_context_t); +/*! + @function vn_getpath + @abstract Construct the path to a vnode. + @discussion Paths to vnodes are not always straightforward: a file with multiple hard-links will have multiple pathnames, + and it is sometimes impossible to determine a vnode's full path. vn_getpath() will not enter the filesystem. + @param vp The vnode whose path to obtain. + @param pathbuf Destination for pathname; should be of size MAXPATHLEN + @param len Destination for length of resulting path string. Result will include NULL-terminator in count--that is, "len" + will be strlen(pathbuf) + 1. + @return 0 for success or an error code. + */ int vn_getpath(struct vnode *vp, char *pathbuf, int *len); -int vn_getcdhash(struct vnode *vp, off_t offset, unsigned char *cdhash); /* * Flags for the vnode_lookup and vnode_open @@ -641,15 +1299,60 @@ int vn_getcdhash(struct vnode *vp, off_t offset, unsigned char *cdhash); #define VNODE_LOOKUP_NOFOLLOW 0x01 #define VNODE_LOOKUP_NOCROSSMOUNT 0x02 #define VNODE_LOOKUP_DOWHITEOUT 0x04 - +/*! + @function vnode_lookup + @abstract Convert a path into a vnode. + @discussion This routine is a thin wrapper around xnu-internal lookup routines; if successful, + it returns with an iocount held on the resulting vnode which must be dropped with vnode_put(). + @param path Path to look up. + @param flags VNODE_LOOKUP_NOFOLLOW: do not follow symbolic links. VNODE_LOOKUP_NOCROSSMOUNT: do not cross mount points. + @return Results 0 for success or an error code. + */ errno_t vnode_lookup(const char *, int, vnode_t *, vfs_context_t); + +/*! + @function vnode_open + @abstract Open a file identified by a path--roughly speaking an in-kernel open(2). + @discussion If vnode_open() succeeds, it returns with both an iocount and a usecount on the returned vnode. These must + be released eventually; the iocount should be released with vnode_put() as soon as any initial operations + on the vnode are over, whereas the usecount should be released via vnode_close(). + @param path Path to look up. + @param fmode e.g. O_NONBLOCK, O_APPEND; see bsd/sys/fcntl.h. + @param cmode Permissions with which to create file if it does not exist. + @param flags Same as vnode_lookup(). + @param vpp Destination for vnode. + @param ctx Context with which to authorize open/creation. + @return 0 for success or an error code. + */ errno_t vnode_open(const char *, int, int, int, vnode_t *, vfs_context_t); /* * exported vnode operations */ +/*! + @function vnode_iterate + @abstract Perform an operation on (almost) all vnodes from a given mountpoint. + @param mp Mount whose vnodes to operate on. + @param flags + VNODE_RELOAD Mark inactive vnodes for recycle. + VNODE_WAIT + VNODE_WRITEABLE Only examine vnodes with writes in progress. + VNODE_WITHID No effect. + VNODE_NOLOCK_INTERNAL No effect. + VNODE_NODEAD No effect. + VNODE_NOSUSPEND No effect. + VNODE_ITERATE_ALL No effect. + VNODE_ITERATE_ACTIVE No effect. + VNODE_ITERATE_INACTIVE No effect. + + @param callout Function to call on each vnode. + @param arg Argument which will be passed to callout along with each vnode. + @return Zero for success, else an error code. Will return 0 immediately if there are no vnodes hooked into the mount. + @discussion Skips vnodes which are dead, in the process of reclaim, suspended, or of type VNON. + */ int vnode_iterate(struct mount *, int, int (*)(struct vnode *, void *), void *); + /* * flags passed into vnode_iterate */ @@ -675,36 +1378,218 @@ int vnode_iterate(struct mount *, int, int (*)(struct vnode *, void *), void *); #define VNODE_CLAIMED 2 /* don't drop reference */ #define VNODE_CLAIMED_DONE 3 /* don't drop reference, terminate iteration */ - -#ifdef BSD_KERNEL_PRIVATE -/* Not in export list so can be private */ -struct stat; -int vn_stat(struct vnode *vp, void * sb, kauth_filesec_t *xsec, int isstat64, vfs_context_t ctx); -int vn_stat_noauth(struct vnode *vp, void * sb, kauth_filesec_t *xsec, int isstat64, vfs_context_t ctx); -int vaccess(mode_t file_mode, uid_t uid, gid_t gid, - mode_t acc_mode, kauth_cred_t cred); -#endif /* BSD_KERNEL_PRIVATE */ - +/*! + @function vn_revoke + @abstract Invalidate all references to a vnode. + @discussion Reclaims the vnode, giving it deadfs vnops (though not halting operations which are already in progress). + Also reclaims all aliased vnodes (important for devices). People holding usecounts on the vnode, e.g. processes + with the file open, will find that all subsequent operations but closing the file fail. + @param vp The vnode to revoke. + @param flags Unused. + @param ctx Context against which to validate operation. + @return 0 always. + */ int vn_revoke(vnode_t vp, int flags, vfs_context_t); - /* namecache function prototypes */ +/*! + @function cache_lookup + @abstract Check for a filename in a directory using the VFS name cache. + @discussion cache_lookup() will flush negative cache entries and return 0 if the operation of the cn_nameiop is CREATE or RENAME. + Often used from the filesystem during a lookup vnop. The filesystem will be called to if there is a negative cache entry for a file, + so it can make sense to initially check for negative entries (and possibly lush them). + @param dvp Directory in which lookup is occurring. + @param vpp Destination for vnode pointer. + @param cnp Various data about lookup, e.g. filename and intended operation. + @return ENOENT: the filesystem has previously added a negative entry with cache_enter() to indicate that there is no + file of the given name in "dp." -1: successfully found a cached vnode (vpp is set). 0: No data in the cache, or operation is CRETE/RENAME. + */ int cache_lookup(vnode_t dvp, vnode_t *vpp, struct componentname *cnp); + +/*! + @function cache_enter + @abstract Add a (name,vnode) entry to the VFS namecache. + @discussion Generally used to add a cache entry after a successful filesystem-level lookup or to add a + negative entry after one which did not find its target. + @param dvp Directory in which file lives. + @param vp File to add to cache. A non-NULL vp is stored for rapid access; a NULL vp indicates + that there is no such file in the directory and speeds future failed lookups. + @param cnp Various data about lookup, e.g. filename and intended operation. + @return void. + */ void cache_enter(vnode_t dvp, vnode_t vp, struct componentname *cnp); + +/*! + @function cache_purge + @abstract Remove all data relating to a vnode from the namecache. + @discussion Will flush all hardlinks to the vnode as well as all children (should any exist). Logical + to use when cached data about a vnode becomes invalid, for instance in an unlink. + @param vp The vnode to purge. + @return void. + */ void cache_purge(vnode_t vp); + +/*! + @function cache_purge_negatives + @abstract Remove all negative cache entries which are children of a given vnode. + @discussion Appropriate to use when negative cache information for a directory could have + become invalid, e.g. after file creation. + @param vp The vnode whose negative children to purge. + @return void. + */ void cache_purge_negatives(vnode_t vp); + /* * Global string-cache routines. You can pass zero for nc_hash * if you don't know it (add_name() will then compute the hash). * There are no flags for now but maybe someday. */ -const char *vfs_addname(const char *name, size_t len, u_int nc_hash, u_int flags); +/*! + @function vfs_addname + @abstract Deprecated + @discussion vnode_update_identity() and vnode_create() make vfs_addname() unnecessary for kexts. + */ +const char *vfs_addname(const char *name, uint32_t len, uint32_t nc_hash, uint32_t flags); + +/*! + @function vfs_removename + @abstract Deprecated + @discussion vnode_update_identity() and vnode_create() make vfs_addname() unnecessary for kexts. + */ int vfs_removename(const char *name); -int check_mountedon(dev_t dev, enum vtype type, int *errorp); +/*! + @function vcount + @abstract Count total references to a given file, disregarding "kusecount" (event listener, as with O_EVTONLY) references. + @discussion For a regular file, just return (usecount-kusecount); for device files, return the sum over all + vnodes 'v' which reference that device of (usecount(v) - kusecount(v)). Note that this is merely a snapshot and could be + invalid by the time the caller checks the result. + @param vp The vnode whose references to count. + @return Count of references. + */ int vcount(vnode_t vp); +/*! + @function vn_path_package_check + @abstract Figure out if a path corresponds to a Mac OS X package. + @discussion Determines if the extension on a path is a known OS X extension type. + @param vp Unused. + @param path Path to check. + @param pathlen Size of path buffer. + @param component Set to index of start of last path component if the path is found to be a package. Set to -1 if + the path is not a known package type. + @return 0 unless some parameter was invalid, in which case EINVAL is returned. Determine package-ness by checking + what *component is set to. + */ +int vn_path_package_check(vnode_t vp, char *path, int pathlen, int *component); + +#ifdef KERNEL_PRIVATE +/*! + @function vn_searchfs_inappropriate_name + @abstract Figure out if the component is inappropriate for a SearchFS query. + @param name component to check + @param len length of component. + @return 0 if no match, 1 if inappropriate. + */ +int vn_searchfs_inappropriate_name(const char *name, int len); +#endif + +/*! + @function vn_rdwr + @abstract Read from or write to a file. + @discussion vn_rdwr() abstracts the details of constructing a uio and picking a vnode operation to allow + simple in-kernel file I/O. + @param rw UIO_READ for a read, UIO_WRITE for a write. + @param vp The vnode on which to perform I/O. + @param base Start of buffer into which to read or from which to write data. + @param len Length of buffer. + @param offset Offset within the file at which to start I/O. + @param segflg What kind of address "base" is. See uio_seg definition in sys/uio.h. UIO_SYSSPACE for kernelspace, UIO_USERSPACE for userspace. + UIO_USERSPACE32 and UIO_USERSPACE64 are in general preferred, but vn_rdwr will make sure that has the correct address sizes. + @param ioflg Defined in vnode.h, e.g. IO_NOAUTH, IO_NOCACHE. + @param cred Credential to pass down to filesystem for authentication. + @param aresid Destination for amount of requested I/O which was not completed, as with uio_resid(). + @param p Process requesting I/O. + @return 0 for success; errors from filesystem, and EIO if did not perform all requested I/O and the "aresid" parameter is NULL. + */ +int vn_rdwr(enum uio_rw, vnode_t, caddr_t, int, off_t, enum uio_seg, int, kauth_cred_t, int *, proc_t); + +/*! + @function vnode_getname + @abstract Get the name of a vnode from the VFS namecache. + @discussion Not all vnodes have names, and vnode names can change (notably, hardlinks). Use this routine at your own risk. + The string is returned with a refcount incremented in the cache; callers must call vnode_putname() to release that reference. + @param vp The vnode whose name to grab. + @return The name, or NULL if unavailable. + */ +const char *vnode_getname(vnode_t vp); + +/*! + @function vnode_putname + @abstract Release a reference on a name from the VFS cache. + @discussion Should be called on a string obtained with vnode_getname(). + @param name String to release. + @return void. + */ +void vnode_putname(const char *name); + +/*! + @function vnode_getparent + @abstract Get an iocount on the parent of a vnode. + @discussion A vnode's parent may change over time or be reclaimed, so vnode_getparent() may return different + results at different times (e.g. a multiple-hardlink file). The parent is returned with an iocount which must + subsequently be dropped with vnode_put(). + @param vp The vnode whose parent to grab. + @return Parent if available, else NULL. + */ +vnode_t vnode_getparent(vnode_t vp); + +#ifdef BSD_KERNEL_PRIVATE +/* Not in export list so can be private */ +struct stat; +int vn_stat(struct vnode *vp, void * sb, kauth_filesec_t *xsec, int isstat64, vfs_context_t ctx); +int vn_stat_noauth(struct vnode *vp, void * sb, kauth_filesec_t *xsec, int isstat64, vfs_context_t ctx); +int vaccess(mode_t file_mode, uid_t uid, gid_t gid, + mode_t acc_mode, kauth_cred_t cred); +int check_mountedon(dev_t dev, enum vtype type, int *errorp); +int vn_getcdhash(struct vnode *vp, off_t offset, unsigned char *cdhash); +void vnode_reclaim(vnode_t); +vfs_context_t vfs_context_kernel(void); /* get from 1st kernel thread */ +int vfs_context_issuser(vfs_context_t); +vnode_t vfs_context_cwd(vfs_context_t); +vnode_t current_rootdir(void); +vnode_t current_workingdir(void); +void *vnode_vfsfsprivate(vnode_t); +struct vfsstatfs *vnode_vfsstatfs(vnode_t); +uint32_t vnode_vfsvisflags(vnode_t); +uint32_t vnode_vfscmdflags(vnode_t); +int vnode_is_openevt(vnode_t); +void vnode_set_openevt(vnode_t); +void vnode_clear_openevt(vnode_t); +int vnode_isstandard(vnode_t); +int vnode_makeimode(int, int); +enum vtype vnode_iftovt(int); +int vnode_vttoif(enum vtype); +int vnode_isshadow(vnode_t); +int vnode_istty(vnode_t vp); +/* + * Indicate that a file has multiple hard links. VFS will always call + * VNOP_LOOKUP on this vnode. Volfs will always ask for it's parent + * object ID (instead of using the v_parent pointer). + */ +vnode_t vnode_parent(vnode_t); +void vnode_setparent(vnode_t, vnode_t); +const char * vnode_name(vnode_t); +void vnode_setname(vnode_t, char *); +int vnode_isnoflush(vnode_t); +void vnode_setnoflush(vnode_t); +void vnode_clearnoflush(vnode_t); +/* XXX temporary until we can arrive at a KPI for NFS, Seatbelt */ +thread_t vfs_context_thread(vfs_context_t); + +#endif /* BSD_KERNEL_PRIVATE */ + __END_DECLS #endif /* KERNEL */ diff --git a/bsd/sys/vnode_if.h b/bsd/sys/vnode_if.h index 87fa29333..66812b08d 100644 --- a/bsd/sys/vnode_if.h +++ b/bsd/sys/vnode_if.h @@ -146,11 +146,7 @@ extern struct vnodeop_desc vnop_removenamedstream_desc; #endif __BEGIN_DECLS -/* - *# - *#% lookup dvp L ? ? - *#% lookup vpp - L - - */ + struct vnop_lookup_args { struct vnodeop_desc *a_desc; vnode_t a_dvp; @@ -158,16 +154,23 @@ struct vnop_lookup_args { struct componentname *a_cnp; vfs_context_t a_context; }; -extern errno_t VNOP_LOOKUP(vnode_t, vnode_t *, struct componentname *, vfs_context_t); - -/* - *# - *#% create dvp L L L - *#% create vpp - L - - *# +/*! + @function VNOP_LOOKUP + @abstract Call down to a filesystem to look for a directory entry by name. + @discussion VNOP_LOOKUP is the key pathway through which VFS asks a filesystem to find a file. The vnode + should be returned with an iocount to be dropped by the caller. A VNOP_LOOKUP() calldown can come without + a preceding VNOP_OPEN(). + @param dvp Directory in which to look up file. + @param vpp Destination for found vnode. + @param cnp Structure describing filename to find, reason for lookup, and various other data. + @param ctx Context against which to authenticate lookup request. + @return 0 for success or a filesystem-specific error. */ - +#ifdef XNU_KERNEL_PRIVATE +extern errno_t VNOP_LOOKUP(vnode_t, vnode_t *, struct componentname *, vfs_context_t); +#endif /* XNU_KERNEL_PRIVATE */ + struct vnop_create_args { struct vnodeop_desc *a_desc; vnode_t a_dvp; @@ -176,15 +179,24 @@ struct vnop_create_args { struct vnode_attr *a_vap; vfs_context_t a_context; }; -extern errno_t VNOP_CREATE(vnode_t, vnode_t *, struct componentname *, struct vnode_attr *, vfs_context_t); -/* - *# - *#% whiteout dvp L L L - *#% whiteout cnp - - - - *#% whiteout flag - - - - *# +/*! + @function VNOP_CREATE + @abstract Call down to a filesystem to create a regular file (VREG). + @discussion If file creation succeeds, "vpp" should be returned with an iocount to be dropped by the caller. + A VNOP_CREATE() calldown can come without a preceding VNOP_OPEN(). + @param dvp Directory in which to create file. + @param vpp Destination for vnode for newly created file. + @param cnp Description of filename to create. + @param vap File creation properties, as seen in vnode_getattr(). Manipulated with VATTR_ISACTIVE, VATTR_RETURN, + VATTR_SET_SUPPORTED, and so forth. + @param ctx Context against which to authenticate file creation. + @return 0 for success or a filesystem-specific error. */ +#ifdef XNU_KERNEL_PRIVATE +extern errno_t VNOP_CREATE(vnode_t, vnode_t *, struct componentname *, struct vnode_attr *, vfs_context_t); +#endif /* XNU_KERNEL_PRIVATE */ + struct vnop_whiteout_args { struct vnodeop_desc *a_desc; vnode_t a_dvp; @@ -192,14 +204,26 @@ struct vnop_whiteout_args { int a_flags; vfs_context_t a_context; }; -extern errno_t VNOP_WHITEOUT(vnode_t, struct componentname *, int, vfs_context_t); -/* - *# - *#% mknod dvp L U U - *#% mknod vpp - X - - *# +/*! + @function VNOP_WHITEOUT + @abstract Call down to a filesystem to create a whiteout. + @discussion Whiteouts are used to support the union filesystem, whereby one filesystem is mounted "transparently" + on top of another. A whiteout in the upper layer of a union mount is a "deletion" of a file in the lower layer; + lookups will catch the whiteout and fail, setting ISWHITEOUT in the componentname structure, even if an underlying + file of the same name exists. The whiteout vnop is used for creation, deletion, and checking whether a directory + supports whiteouts (see flags). + also support the LOOKUP flag, which is used to test whether a directory supports whiteouts. + @param dvp Directory in which to create. + @param cnp Name information for whiteout. + @param flags CREATE: create a whiteout. LOOKUP: check whether a directory supports whiteouts, DELETE: remove a whiteout. + @param ctx Context against which to authenticate whiteout creation. + @return 0 for success or a filesystem-specific error. Returning 0 for LOOKUP indicates that a directory does support whiteouts. */ +#ifdef XNU_KERNEL_PRIVATE +extern errno_t VNOP_WHITEOUT(vnode_t, struct componentname *, int, vfs_context_t); +#endif /* XNU_KERNEL_PRIVATE */ + struct vnop_mknod_args { struct vnodeop_desc *a_desc; vnode_t a_dvp; @@ -208,80 +232,141 @@ struct vnop_mknod_args { struct vnode_attr *a_vap; vfs_context_t a_context; }; -extern errno_t VNOP_MKNOD(vnode_t, vnode_t *, struct componentname *, struct vnode_attr *, vfs_context_t); -/* - *# - *#% open vp L L L - *# +/*! + @function VNOP_MKNOD + @abstract Call down to a filesystem to create a special file. + @discussion The mknod vnop is used to create character and block device files, named pipe (FIFO) files, and named sockets. + The newly created file should be returned with an iocount which will be dropped by the caller. A VNOP_MKNOD() call + can come down without a preceding VNOP_OPEN(). + @param dvp Directory in which to create the special file. + @param vpp Destination for newly created vnode. + @param cnp Name information for new file. + @param vap Attributes for new file, including type. + @param ctx Context against which to authenticate node creation. + @return 0 for success or a filesystem-specific error. */ +#ifdef XNU_KERNEL_PRIVATE +extern errno_t VNOP_MKNOD(vnode_t, vnode_t *, struct componentname *, struct vnode_attr *, vfs_context_t); +#endif /* XNU_KERNEL_PRIVATE */ + struct vnop_open_args { struct vnodeop_desc *a_desc; vnode_t a_vp; int a_mode; vfs_context_t a_context; }; -extern errno_t VNOP_OPEN(vnode_t, int, vfs_context_t); -/* - *# - *#% close vp U U U - *# +/*! + @function VNOP_OPEN + @abstract Call down to a filesystem to open a file. + @discussion The open vnop gives a filesystem a chance to initialize a file for + operations like reading, writing, and ioctls. VFS promises to send down exactly one VNOP_CLOSE() + for each VNOP_OPEN(). + @param vp File to open. + @param mode FREAD and/or FWRITE. + @param ctx Context against which to authenticate open. + @return 0 for success or a filesystem-specific error. */ +#ifdef XNU_KERNEL_PRIVATE +extern errno_t VNOP_OPEN(vnode_t, int, vfs_context_t); +#endif /* XNU_KERNEL_PRIVATE */ + struct vnop_close_args { struct vnodeop_desc *a_desc; vnode_t a_vp; int a_fflag; vfs_context_t a_context; }; -extern errno_t VNOP_CLOSE(vnode_t, int, vfs_context_t); -/* - *# - *#% access vp L L L - *# +/*! + @function VNOP_CLOSE + @abstract Call down to a filesystem to close a file. + @discussion The close vnop gives a filesystem a chance to release state set up + by a VNOP_OPEN(). VFS promises to send down exactly one VNOP_CLOSE() for each VNOP_OPEN(). + @param vp File to close. + @param fflag FREAD and/or FWRITE; in the case of a file opened with open(2), fflag corresponds + to how the file was opened. + @param ctx Context against which to authenticate close. + @return 0 for success or a filesystem-specific error. */ +#ifdef XNU_KERNEL_PRIVATE +extern errno_t VNOP_CLOSE(vnode_t, int, vfs_context_t); +#endif /* XNU_KERNEL_PRIVATE */ + struct vnop_access_args { struct vnodeop_desc *a_desc; vnode_t a_vp; int a_action; vfs_context_t a_context; }; -extern errno_t VNOP_ACCESS(vnode_t, int, vfs_context_t); - -/* - *# - *#% getattr vp = = = - *# +/*! + @function VNOP_ACCESS + @abstract Call down to a filesystem to see if a kauth-style operation is permitted. + @discussion VNOP_ACCESS is currently only called on filesystems which mark themselves + as doing their authentication remotely (vfs_setauthopaque(), vfs_authopaque()). A VNOP_ACCESS() + calldown may come without any preceding VNOP_OPEN(). + @param vp File to authorize action for. + @param action kauth-style action to be checked for permissions, e.g. KAUTH_VNODE_DELETE. + @param ctx Context against which to authenticate action. + @return 0 for success or a filesystem-specific error. */ +#ifdef XNU_KERNEL_PRIVATE +extern errno_t VNOP_ACCESS(vnode_t, int, vfs_context_t); +#endif /* XNU_KERNEL_PRIVATE */ + struct vnop_getattr_args { struct vnodeop_desc *a_desc; vnode_t a_vp; struct vnode_attr *a_vap; vfs_context_t a_context; }; -extern errno_t VNOP_GETATTR(vnode_t, struct vnode_attr *, vfs_context_t); -/* - *# - *#% setattr vp L L L - *# +/*! + @function VNOP_GETATTR + @abstract Call down to a filesystem to get vnode attributes. + @discussion Supported attributes ("Yes, I am returning this information") are set with VATTR_SET_SUPPORTED. + Which attributes have been requested is checked with VATTR_IS_ACTIVE. Attributes + are returned with VATTR_RETURN. It is through VNOP_GETATTR that routines like stat() get their information. + A VNOP_GETATTR() calldown may come without any preceding VNOP_OPEN(). + @param vp The vnode whose attributes to get. + @param vap Container for which attributes are requested, which attributes are supported by the filesystem, and attribute values. + @param ctx Context against which to authenticate request for attributes. + @return 0 for success or a filesystem-specific error. VNOP_GETATTR() can return success even if not + all requested attributes were returned; returning an error-value should indicate that something went wrong, rather than that + some attribute is not supported. */ +#ifdef XNU_KERNEL_PRIVATE +extern errno_t VNOP_GETATTR(vnode_t, struct vnode_attr *, vfs_context_t); +#endif /* XNU_KERNEL_PRIVATE */ + struct vnop_setattr_args { struct vnodeop_desc *a_desc; vnode_t a_vp; struct vnode_attr *a_vap; vfs_context_t a_context; }; -extern errno_t VNOP_SETATTR(vnode_t, struct vnode_attr *, vfs_context_t); - -/* - *# - *#% read vp L L L - *# +/*! + @function VNOP_SETATTR + @abstract Call down to a filesystem to set vnode attributes. + @discussion Supported attributes ("Yes, I am setting this attribute.") are set with VATTR_SET_SUPPORTED. + Requested attributes are checked with VATTR_IS_ACTIVE. Attribute values are accessed directly through + structure fields. VNOP_SETATTR() is the core of the KPI function vnode_setattr(), which is used by chmod(), + chown(), truncate(), and many others. A VNOP_SETATTR() call may come without any preceding VNOP_OPEN(). + @param vp The vnode whose attributes to set. + @param vap Container for which attributes are to be set and their desired values, as well as for the filesystem to + return information about which attributes were successfully set. + @param ctx Context against which to authenticate request for attribute change. + @return 0 for success or a filesystem-specific error. VNOP_SETATTR() can return success even if not + all requested attributes were set; returning an error-value should indicate that something went wrong, rather than that + some attribute is not supported. */ +#ifdef XNU_KERNEL_PRIVATE +extern errno_t VNOP_SETATTR(vnode_t, struct vnode_attr *, vfs_context_t); +#endif /* XNU_KERNEL_PRIVATE */ + struct vnop_read_args { struct vnodeop_desc *a_desc; vnode_t a_vp; @@ -289,14 +374,24 @@ struct vnop_read_args { int a_ioflag; vfs_context_t a_context; }; -extern errno_t VNOP_READ(vnode_t, struct uio *, int, vfs_context_t); - -/* - *# - *#% write vp L L L - *# +/*! + @function VNOP_READ + @abstract Call down to a filesystem to read file data. + @discussion VNOP_READ() is where the hard work of of the read() system call happens. The filesystem may use + the buffer cache, the cluster layer, or an alternative method to get its data; uio routines will be used to see that data + is copied to the correct virtual address in the correct address space and will update its uio argument + to indicate how much data has been moved. Filesystems will not receive a read request on a file without having + first received a VNOP_OPEN(). + @param vp The vnode to read from. + @param uio Description of request, including file offset, amount of data requested, destination address for data, + and whether that destination is in kernel or user space. + @param ctx Context against which to authenticate read request. + @return 0 for success or a filesystem-specific error. VNOP_READ() can return success even if less data was + read than originally requested; returning an error value should indicate that something actually went wrong. */ +extern errno_t VNOP_READ(vnode_t, struct uio *, int, vfs_context_t); + struct vnop_write_args { struct vnodeop_desc *a_desc; vnode_t a_vp; @@ -304,14 +399,24 @@ struct vnop_write_args { int a_ioflag; vfs_context_t a_context; }; -extern errno_t VNOP_WRITE(vnode_t, struct uio *, int, vfs_context_t); - -/* - *# - *#% ioctl vp U U U - *# +/*! + @function VNOP_WRITE + @abstract Call down to the filesystem to write file data. + @discussion VNOP_WRITE() is to write() as VNOP_READ() is to read(). The filesystem may use + the buffer cache, the cluster layer, or an alternative method to write its data; uio routines will be used to see that data + is copied to the correct virtual address in the correct address space and will update its uio argument + to indicate how much data has been moved. Filesystems will not receive a write request on a file without having + first received a VNOP_OPEN(). + @param vp The vnode to write to. + @param uio Description of request, including file offset, amount of data to write, source address for data, + and whether that destination is in kernel or user space. + @param ctx Context against which to authenticate write request. + @return 0 for success or a filesystem-specific error. VNOP_WRITE() can return success even if less data was + written than originally requested; returning an error value should indicate that something actually went wrong. */ +extern errno_t VNOP_WRITE(vnode_t, struct uio *, int, vfs_context_t); + struct vnop_ioctl_args { struct vnodeop_desc *a_desc; vnode_t a_vp; @@ -320,14 +425,26 @@ struct vnop_ioctl_args { int a_fflag; vfs_context_t a_context; }; -extern errno_t VNOP_IOCTL(vnode_t, u_long, caddr_t, int, vfs_context_t); - -/* - *# - *#% select vp U U U - *# +/*! + @function VNOP_IOCTL + @abstract Call down to a filesystem or device driver to execute various control operations on or request data about a file. + @discussion Ioctl controls are typically associated with devices, but they can in fact be passed + down for any file; they are used to implement any of a wide range of controls and information requests. + fcntl() calls VNOP_IOCTL for several commands, and will attempt a VNOP_IOCTL if it is passed an unknown command, + though no copyin or copyout of arguments can occur in this case--the "arg" must be an integer value. + Filesystems can define their own fcntls using this mechanism. How ioctl commands are structured + is slightly complicated; see the manual page for ioctl(2). + @param vp The vnode to execute the command on. + @param command Identifier for action to take. + @param data Pointer to data; this can be an integer constant (of 32 bits only) or an address to be read from or written to, + depending on "command." If it is an address, it is valid and resides in the kernel; callers of VNOP_IOCTL() are + responsible for copying to and from userland. + @param ctx Context against which to authenticate ioctl request. + @return 0 for success or a filesystem-specific error. */ +extern errno_t VNOP_IOCTL(vnode_t, u_long, caddr_t, int, vfs_context_t); + struct vnop_select_args { struct vnodeop_desc *a_desc; vnode_t a_vp; @@ -336,15 +453,28 @@ struct vnop_select_args { void *a_wql; vfs_context_t a_context; }; -extern errno_t VNOP_SELECT(vnode_t, int, int, void *, vfs_context_t); - -/* - *# - *#% exchange fvp L L L - *#% exchange tvp L L L - *# +/*! + @function VNOP_SELECT + @abstract Call down to a filesystem or device to check if a file is ready for I/O and request later notification if it is not currently ready. + @discussion In general, regular are always "ready for I/O" and their select vnops simply return "1." + Devices, though, may or may not be read; they keep track of who is selecting on them and send notifications + when they become ready. xnu provides structures and routines for tracking threads waiting for I/O and waking up + those threads: see selrecord(), selthreadclear(), seltrue(), selwait(), selwakeup(), and the selinfo structure (sys/select.h). + @param vp The vnode to check for I/O readiness. + @param which What kind of I/O is desired: FREAD, FWRITE. + @param fflags Flags from fileglob as seen in fcntl.h, e.g. O_NONBLOCK, O_APPEND. + @param wql Opaque object to pass to selrecord(). + @param ctx Context to authenticate for select request. + @return Nonzero indicates that a file is ready for I/O. 0 indicates that the file is not ready for I/O; + there is no way to return an error. 0 should be returned if the device (or file) is not ready for I/O + and the driver (or filesystem) is going to track the request and provide subsequent wakeups. + the device (or filesystem) will provide a wakeup. */ +#ifdef XNU_KERNEL_PRIVATE +extern errno_t VNOP_SELECT(vnode_t, int, int, void *, vfs_context_t); +#endif /* XNU_KERNEL_PRIVATE */ + struct vnop_exchange_args { struct vnodeop_desc *a_desc; vnode_t a_fvp; @@ -352,69 +482,100 @@ struct vnop_exchange_args { int a_options; vfs_context_t a_context; }; -extern errno_t VNOP_EXCHANGE(vnode_t, vnode_t, int, vfs_context_t); - -/* - *# - *#% revoke vp U U U - *# +/*! + @function VNOP_EXCHANGE + @abstract Call down to a filesystem to atomically exchange the data of two files. + @discussion VNOP_EXCHANGE() is currently only called by the exchangedata() system call. It will only + be applied to files on the same volume. + @param fvp First vnode. + @param tvp Second vnode. + @param options Unused. + @param ctx Context to authenticate for exchangedata request. + @return 0 for success, else an error code. */ +#ifdef XNU_KERNEL_PRIVATE +extern errno_t VNOP_EXCHANGE(vnode_t, vnode_t, int, vfs_context_t); +#endif /* XNU_KERNEL_PRIVATE */ + struct vnop_revoke_args { struct vnodeop_desc *a_desc; vnode_t a_vp; int a_flags; vfs_context_t a_context; }; -extern errno_t VNOP_REVOKE(vnode_t, int, vfs_context_t); - -/* - *# - *# mmap - vp U U U - *# +/*! + @function VNOP_REVOKE + @abstract Call down to a filesystem to invalidate all open file descriptors for a vnode. + @discussion This function is typically called as part of a TTY revoke, but can also be + used on regular files. Most filesystems simply use nop_revoke(), which calls vn_revoke(), + as their revoke vnop implementation. + @param vp The vnode to revoke. + @param flags Unused. + @param ctx Context to authenticate for revoke request. + @return 0 for success, else an error code. */ +#ifdef XNU_KERNEL_PRIVATE +extern errno_t VNOP_REVOKE(vnode_t, int, vfs_context_t); +#endif /* XNU_KERNEL_PRIVATE */ + struct vnop_mmap_args { struct vnodeop_desc *a_desc; vnode_t a_vp; int a_fflags; vfs_context_t a_context; }; -extern errno_t VNOP_MMAP(vnode_t, int, vfs_context_t); -/* - *# - *# mnomap - vp U U U - *# +/*! + @function VNOP_MMAP + @abstract Notify a filesystem that a file is being mmap-ed. + @discussion VNOP_MMAP is an advisory calldown to say that the system is mmap-ing a file. + @param vp The vnode being mmapped. + @param flags Memory protection: PROT_READ, PROT_WRITE, PROT_EXEC. + @param ctx Context to authenticate for mmap request. + @return 0 for success; all errors except EPERM are ignored. */ +#ifdef XNU_KERNEL_PRIVATE +extern errno_t VNOP_MMAP(vnode_t, int, vfs_context_t); +#endif /* XNU_KERNEL_PRIVATE */ + struct vnop_mnomap_args { struct vnodeop_desc *a_desc; vnode_t a_vp; vfs_context_t a_context; }; -extern errno_t VNOP_MNOMAP(vnode_t, vfs_context_t); - -/* - *# - *#% fsync vp L L L - *# +/*! + @function VNOP_MNOMAP + @abstract Inform a filesystem that a file is no longer mapped. + @discussion In general, no action is required of a filesystem for VNOP_MNOMAP. + @param vp The vnode which is no longer mapped. + @param ctx Context to authenticate for mnomap request. + @return Return value is ignored. */ +#ifdef XNU_KERNEL_PRIVATE +extern errno_t VNOP_MNOMAP(vnode_t, vfs_context_t); +#endif /* XNU_KERNEL_PRIVATE */ + struct vnop_fsync_args { struct vnodeop_desc *a_desc; vnode_t a_vp; int a_waitfor; vfs_context_t a_context; }; -extern errno_t VNOP_FSYNC(vnode_t, int, vfs_context_t); - -/* - *# - *#% remove dvp L U U - *#% remove vp L U U - *# +/*! + @function VNOP_FSYNC + @abstract Call down to a filesystem to synchronize a file with on-disk state. + @discussion VNOP_FSYNC is called whenever we need to make sure that a file's data has been + pushed to backing store, for example when recycling; it is also the heart of the fsync() system call. + @param vp The vnode whose data to flush to backing store. + @param ctx Context to authenticate for fsync request. + @return 0 for success, else an error code. */ +extern errno_t VNOP_FSYNC(vnode_t, int, vfs_context_t); + struct vnop_remove_args { struct vnodeop_desc *a_desc; vnode_t a_dvp; @@ -423,15 +584,22 @@ struct vnop_remove_args { int a_flags; vfs_context_t a_context; }; -extern errno_t VNOP_REMOVE(vnode_t, vnode_t, struct componentname *, int, vfs_context_t); - -/* - *# - *#% link vp U U U - *#% link tdvp L U U - *# +/*! + @function VNOP_REMOVE + @abstract Call down to a filesystem to delete a file. + @discussion VNOP_REMOVE is called to remove a file from a filesystem's namespace, for example by unlink(). + It can operate on regular files, named pipes, special files, and in some cases on directories. + @param dvp Directory in which to delete a file. + @param vp The file to delete. + @param cnp Filename information. + @param ctx Context to authenticate for fsync request. + @return 0 for success, else an error code. */ +#ifdef XNU_KERNEL_PRIVATE +extern errno_t VNOP_REMOVE(vnode_t, vnode_t, struct componentname *, int, vfs_context_t); +#endif /* XNU_KERNEL_PRIVATE */ + struct vnop_link_args { struct vnodeop_desc *a_desc; vnode_t a_vp; @@ -439,17 +607,21 @@ struct vnop_link_args { struct componentname *a_cnp; vfs_context_t a_context; }; -extern errno_t VNOP_LINK(vnode_t, vnode_t, struct componentname *, vfs_context_t); - -/* - *# - *#% rename fdvp U U U - *#% rename fvp U U U - *#% rename tdvp L U U - *#% rename tvp X U U - *# +/*! + @function VNOP_LINK + @abstract Call down to a filesystem to create a hardlink to a file. + @discussion See "man 2 link". + @param vp File to link to. + @param dvp Directory in which to create the link. + @param cnp Filename information for new link. + @param ctx Context to authenticate for link request. + @return 0 for success, else an error code. */ +#ifdef XNU_KERNEL_PRIVATE +extern errno_t VNOP_LINK(vnode_t, vnode_t, struct componentname *, vfs_context_t); +#endif /* XNU_KERNEL_PRIVATE */ + struct vnop_rename_args { struct vnodeop_desc *a_desc; vnode_t a_fdvp; @@ -460,15 +632,24 @@ struct vnop_rename_args { struct componentname *a_tcnp; vfs_context_t a_context; }; -extern errno_t VNOP_RENAME(vnode_t, vnode_t, struct componentname *, vnode_t, vnode_t, struct componentname *, vfs_context_t); - -/* - *# - *#% mkdir dvp L U U - *#% mkdir vpp - L - - *# +/*! + @function VNOP_RENAME + @abstract Call down to a filesystem to rename a file. + @discussion VNOP_RENAME() will only be called with a source and target on the same volume. + @param fdvp Directory in which source file resides. + @param fvp File being renamed. + @param fcnp Name information for source file. + @param tdvp Directory file is being moved to. + @param tvp Existing file with same name as target, should one exist. + @param tcnp Name information for target path. + @param ctx Context to authenticate for rename request. + @return 0 for success, else an error code. */ +#ifdef XNU_KERNEL_PRIVATE +extern errno_t VNOP_RENAME(vnode_t, vnode_t, struct componentname *, vnode_t, vnode_t, struct componentname *, vfs_context_t); +#endif /* XNU_KERNEL_PRIVATE */ + struct vnop_mkdir_args { struct vnodeop_desc *a_desc; vnode_t a_dvp; @@ -476,16 +657,23 @@ struct vnop_mkdir_args { struct componentname *a_cnp; struct vnode_attr *a_vap; vfs_context_t a_context; - }; -extern errno_t VNOP_MKDIR(vnode_t, vnode_t *, struct componentname *, struct vnode_attr *, vfs_context_t); - +}; -/* - *# - *#% rmdir dvp L U U - *#% rmdir vp L U U - *# +/*! + @function VNOP_MKDIR + @abstract Call down to a filesystem to create a directory. + @discussion The newly created directory should be returned with an iocount which will be dropped by the caller. + @param dvp Directory in which to create new directory. + @param vpp Destination for pointer to new directory's vnode. + @param cnp Name information for new directory. + @param vap Attributes for new directory. + @param ctx Context to authenticate for mkdir request. + @return 0 for success, else an error code. */ +#ifdef XNU_KERNEL_PRIVATE +extern errno_t VNOP_MKDIR(vnode_t, vnode_t *, struct componentname *, struct vnode_attr *, vfs_context_t); +#endif /* XNU_KERNEL_PRIVATE */ + struct vnop_rmdir_args { struct vnodeop_desc *a_desc; vnode_t a_dvp; @@ -493,15 +681,20 @@ struct vnop_rmdir_args { struct componentname *a_cnp; vfs_context_t a_context; }; -extern errno_t VNOP_RMDIR(vnode_t, vnode_t, struct componentname *, vfs_context_t); - -/* - *# - *#% symlink dvp L U U - *#% symlink vpp - U - - *# +/*! + @function VNOP_RMDIR + @abstract Call down to a filesystem to delete a directory. + @param dvp Parent of directory to be removed. + @param vp Directory to remove. + @param cnp Name information for directory to be deleted. + @param ctx Context to authenticate for rmdir request. + @return 0 for success, else an error code. */ +#ifdef XNU_KERNEL_PRIVATE +extern errno_t VNOP_RMDIR(vnode_t, vnode_t, struct componentname *, vfs_context_t); +#endif /* XNU_KERNEL_PRIVATE */ + struct vnop_symlink_args { struct vnodeop_desc *a_desc; vnode_t a_dvp; @@ -511,13 +704,27 @@ struct vnop_symlink_args { char *a_target; vfs_context_t a_context; }; + +/*! + @function VNOP_SYMLINK + @abstract Call down to a filesystem to create a symbolic link. + @param If VNOP_SYMLINK() is successful, the new file should be returned with an iocount which will + be dropped by the caller. VFS does not ensure that the target path will have a length shorter + than the max symlink length for the filesystem. + @param dvp Parent directory for new symlink file. + @param vpp + @param cnp Name information for new symlink. + @param vap Attributes for symlink. + @param target Path for symlink to store; for "ln -s /var/vardir linktovardir", "target" would be "/var/vardir" + @param ctx Context to authenticate for symlink request. + @return 0 for success, else an error code. + */ +#ifdef XNU_KERNEL_PRIVATE extern errno_t VNOP_SYMLINK(vnode_t, vnode_t *, struct componentname *, struct vnode_attr *, char *, vfs_context_t); +#endif /* XNU_KERNEL_PRIVATE */ /* - *# - *#% readdir vp L L L - *# * * When VNOP_READDIR is called from the NFS Server, the nfs_data * argument is non-NULL. @@ -545,89 +752,146 @@ struct vnop_readdir_args { int *a_numdirent; vfs_context_t a_context; }; -extern errno_t VNOP_READDIR(vnode_t, struct uio *, int, int *, int *, vfs_context_t); - -/* - *# - *#% readdirattr vp L L L - *# +/*! + @function VNOP_READDIR + @abstract Call down to a filesystem to enumerate directory entries. + @discussion VNOP_READDIR() packs a buffer with "struct dirent" directory entry representations as described + by the "getdirentries" manual page. + @param vp Directory to enumerate. + @param uio Destination information for resulting direntries. + @param flags VNODE_READDIR_EXTENDED, VNODE_READDIR_REQSEEKOFF, VNODE_READDIR_SEEKOFF32: Apple-internal flags. + @param eofflag Should be set to 1 if the end of the directory has been reached. + @param numdirent Should be set to number of entries written into buffer. + @param ctx Context to authenticate for readdir request. + @return 0 for success, else an error code. */ +#ifdef XNU_KERNEL_PRIVATE +extern errno_t VNOP_READDIR(vnode_t, struct uio *, int, int *, int *, vfs_context_t); +#endif /* XNU_KERNEL_PRIVATE */ + struct vnop_readdirattr_args { struct vnodeop_desc *a_desc; vnode_t a_vp; struct attrlist *a_alist; struct uio *a_uio; - u_long a_maxcount; - u_long a_options; - u_long *a_newstate; + uint32_t a_maxcount; + uint32_t a_options; + uint32_t *a_newstate; int *a_eofflag; - u_long *a_actualcount; + uint32_t *a_actualcount; vfs_context_t a_context; }; -extern errno_t VNOP_READDIRATTR(vnode_t, struct attrlist *, struct uio *, u_long, u_long, u_long *, int *, u_long *, vfs_context_t); - -/* - *# - *#% readlink vp L L L - *# +/*! + @function VNOP_READDIRATTR + @abstract Call down to get file attributes for many files in a directory at once. + @discussion VNOP_READDIRATTR() packs a buffer with file attributes, as if the results of many "getattrlist" calls. + @param vp Directory in which to enumerate entries' attributes. + @param alist Which attributes are wanted for each directory entry. + @param uio Destination information for resulting attributes. + @param maxcount Maximum count of files to get attributes for. + @param options FSOPT_NOFOLLOW: do not follow symbolic links. FSOPT_NOINMEMUPDATE: do not use data which have been + updated since an inode was loaded into memory. + @param newstate The "newstate" should be set to a value which changes if the contents of a directory change + through an addition or deletion but stays the same otherwise. + @param eofflag Should be set to 1 if the end of the directory has been reached. + @param actualcount Should be set to number of files whose attributes were written into buffer. + @param ctx Context to authenticate for readdirattr request. + @return 0 for success, else an error code. */ +#ifdef XNU_KERNEL_PRIVATE +extern errno_t VNOP_READDIRATTR(vnode_t, struct attrlist *, struct uio *, uint32_t, uint32_t, uint32_t *, int *, uint32_t *, vfs_context_t); +#endif /* XNU_KERNEL_PRIVATE */ + struct vnop_readlink_args { struct vnodeop_desc *a_desc; vnode_t a_vp; struct uio *a_uio; vfs_context_t a_context; }; -extern errno_t VNOP_READLINK(vnode_t, struct uio *, vfs_context_t); - -/* - *# - *#% inactive vp L U U - *# +/*! + @function VNOP_READLINK + @abstract Call down to a filesystem to get the pathname represented by a symbolic link. + @discussion VNOP_READLINK() gets the path stored in a symbolic link; it is called by namei() and the readlink() system call. + @param vp Symbolic link to read from. + @param uio Destination information for link path. + @param ctx Context to authenticate for readlink request. + @return 0 for success, else an error code. */ +#ifdef XNU_KERNEL_PRIVATE +extern errno_t VNOP_READLINK(vnode_t, struct uio *, vfs_context_t); +#endif /* XNU_KERNEL_PRIVATE */ + struct vnop_inactive_args { struct vnodeop_desc *a_desc; vnode_t a_vp; vfs_context_t a_context; }; -extern errno_t VNOP_INACTIVE(vnode_t, vfs_context_t); - -/* - *# - *#% reclaim vp U U U - *# +/*! + @function VNOP_INACTIVE + @abstract Notify a filesystem that the last usecount (persistent reference) on a vnode has been dropped. + @discussion VNOP_INACTVE() gives a filesystem a chance to aggressively release resources assocated with a vnode, perhaps + even to call vnode_recycle(), but no action is prescribed; it is acceptable for VNOP_INACTIVE to be a no-op and + to defer all reclamation until VNOP_RECLAIM(). + VNOP_INACTVE() will not be called on a vnode if no persistent reference is ever taken; an + important example is a stat(), which takes an iocount, reads its data, and drops that iocount. + @param vp The vnode which is now inactive. + @param ctx Context to authenticate for inactive message. + @return 0 for success, else an error code, but return value is currently ignored. */ +#ifdef XNU_KERNEL_PRIVATE +extern errno_t VNOP_INACTIVE(vnode_t, vfs_context_t); +#endif /* XNU_KERNEL_PRIVATE */ + struct vnop_reclaim_args { struct vnodeop_desc *a_desc; vnode_t a_vp; vfs_context_t a_context; }; -extern errno_t VNOP_RECLAIM(vnode_t, vfs_context_t); - -/* - *# - *#% pathconf vp L L L - *# +/*! + @function VNOP_RECLAIM + @abstract Release filesystem-internal resources for a vnode. + @discussion VNOP_RECLAIM() is called as part of the process of recycling a vnode. During + a reclaim routine, a filesystem should remove a vnode from its hash and deallocate any resources + allocated to that vnode. VFS guarantees that when VNOP_RECLAIM() is called, there are no more + iocount references on a vnode (though there may still be usecount references--these are invalidated + by the reclaim) and that no more will be granted. This means in practice that there will be no + filesystem calls on the vnode being reclaimed until the reclaim has finished and the vnode has + been reused. + @param vp The vnode to reclaim. + @param ctx Context to authenticate for reclaim. + @return 0 for success, or an error code. A nonzero return value results in a panic. */ +#ifdef XNU_KERNEL_PRIVATE +extern errno_t VNOP_RECLAIM(vnode_t, vfs_context_t); +#endif /* XNU_KERNEL_PRIVATE */ + struct vnop_pathconf_args { struct vnodeop_desc *a_desc; vnode_t a_vp; int a_name; - register_t *a_retval; + int32_t *a_retval; vfs_context_t a_context; }; -extern errno_t VNOP_PATHCONF(vnode_t, int, register_t *, vfs_context_t); /* register_t??????? */ - -/* - *# - *#% advlock vp U U U - *# +/*! + @function VNOP_PATHCONF + @abstract Query a filesystem for path properties. + @param vp The vnode whose filesystem to query. + @param name Which property to request: see unistd.h. For example: _PC_CASE_SENSITIVE (is + a filesystem case-sensitive?). Only one property can be requested at a time. + @param retval Destination for value of property. + @param ctx Context to authenticate for pathconf request. + @return 0 for success, or an error code. */ +#ifdef XNU_KERNEL_PRIVATE +extern errno_t VNOP_PATHCONF(vnode_t, int, int32_t *, vfs_context_t); +#endif /* XNU_KERNEL_PRIVATE */ + struct vnop_advlock_args { struct vnodeop_desc *a_desc; vnode_t a_vp; @@ -637,13 +901,29 @@ struct vnop_advlock_args { int a_flags; vfs_context_t a_context; }; -extern errno_t VNOP_ADVLOCK(vnode_t, caddr_t, int, struct flock *, int, vfs_context_t); -/* - *# - *#% allocate vp L L L - *# +/*! + @function VNOP_ADVLOCK + @abstract Aquire or release and advisory lock on a vnode. + @discussion Advisory locking is somewhat complicated. VNOP_ADVLOCK is overloaded for + both flock() and POSIX advisory locking usage, though not all filesystems support both (or any). VFS + provides an advisory locking mechanism for filesystems which can take advantage of it; vfs_setlocklocal() + marks a filesystem as using VFS advisory locking support. + @param vp The vnode to lock or unlock. + @param id Identifier for lock holder: ignored by most filesystems. + @param op Which locking operation: F_SETLK: set locking information about a region. + F_GETLK: get locking information about the specified region. F_UNLCK: Unlock a region. + @param fl Description of file region to lock. l_whence is as with "lseek." + Includes a type: F_RDLCK (shared lock), F_UNLCK (unlock) , and F_WRLCK (exclusive lock). + @param flags F_FLOCK: use flock() semantics. F_POSIX: use POSIX semantics. F_WAIT: sleep if necessary. + F_PROV: Non-coelesced provisional lock (unused in xnu). + @param ctx Context to authenticate for advisory locking request. + @return 0 for success, or an error code. */ +#ifdef XNU_KERNEL_PRIVATE +extern errno_t VNOP_ADVLOCK(vnode_t, caddr_t, int, struct flock *, int, vfs_context_t); +#endif /* XNU_KERNEL_PRIVATE */ + struct vnop_allocate_args { struct vnodeop_desc *a_desc; vnode_t a_vp; @@ -653,75 +933,119 @@ struct vnop_allocate_args { off_t a_offset; vfs_context_t a_context; }; -extern errno_t VNOP_ALLOCATE(vnode_t, off_t, u_int32_t, off_t *, off_t, vfs_context_t); -/* - *# - *#% pagein vp = = = - *# +/*! + @function VNOP_ALLOCATE + @abstract Pre-allocate space for a file. + @discussion VNOP_ALLOCATE() changes the amount of backing store set aside to + a file. It can be used to either shrink or grow a file. If the file shrinks, + its ubc size will be modified accordingly, but if it grows, then the ubc size is unchanged; + space is set aside without being actively used by the file. VNOP_ALLOCATE() is currently only + called as part of the F_PREALLOCATE fcntl, and is supported only by AFP and HFS. + @param vp The vnode for which to preallocate space. + @param length Desired preallocated file length. + @param flags + PREALLOCATE: preallocate allocation blocks. + ALLOCATECONTIG: allocate contigious space. + ALLOCATEALL: allocate all requested space or no space at all. + FREEREMAINDER: deallocate allocated but unfilled blocks. + ALLOCATEFROMPEOF: allocate from the physical eof. + ALLOCATEFROMVOL: allocate from the volume offset. + @param bytesallocated Additional bytes set aside for file. Set to 0 if none are allocated + OR if the file is contracted. + @param offset Hint for where to find free blocks. + @param ctx Context to authenticate for allocation request. + @return 0 for success, or an error code. */ +#ifdef XNU_KERNEL_PRIVATE +extern errno_t VNOP_ALLOCATE(vnode_t, off_t, u_int32_t, off_t *, off_t, vfs_context_t); +#endif /* XNU_KERNEL_PRIVATE */ + struct vnop_pagein_args { struct vnodeop_desc *a_desc; vnode_t a_vp; upl_t a_pl; - vm_offset_t a_pl_offset; + upl_offset_t a_pl_offset; off_t a_f_offset; size_t a_size; int a_flags; vfs_context_t a_context; }; -extern errno_t VNOP_PAGEIN(vnode_t, upl_t, vm_offset_t, off_t, size_t, int, vfs_context_t); /* vm_offset_t ? */ - -/* - *# - *#% pageout vp = = = - *# +/*! + @function VNOP_PAGEIN + @abstract Pull file data into memory. + @discussion VNOP_PAGEIN() is called by when a process faults on data mapped from a file or + when madvise() demands pre-fetching. It is conceptually somewhat similar to VNOP_READ(). Filesystems + are typically expected to call cluster_pagein() to handle the labor of mapping and committing the UPL. + @param vp The vnode for which to page in data. + @param pl UPL describing pages needing to be paged in. + @param pl_offset Offset in UPL at which to start placing data. + @param f_offset Offset in file of data needing to be paged in. + @param size Amount of data to page in (in bytes). + @param flags UPL-style flags: UPL_IOSYNC, UPL_NOCOMMIT, UPL_NORDAHEAD, UPL_VNODE_PAGER, UPL_MSYNC. + Filesystems should generally leave it to the cluster layer to handle these flags. See the + memory_object_types.h header in the kernel framework if interested. + @param ctx Context to authenticate for pagein request. + @return 0 for success, or an error code. */ +#ifdef XNU_KERNEL_PRIVATE +extern errno_t VNOP_PAGEIN(vnode_t, upl_t, upl_offset_t, off_t, size_t, int, vfs_context_t); +#endif /* XNU_KERNEL_PRIVATE */ + struct vnop_pageout_args { struct vnodeop_desc *a_desc; vnode_t a_vp; upl_t a_pl; - vm_offset_t a_pl_offset; + upl_offset_t a_pl_offset; off_t a_f_offset; size_t a_size; int a_flags; vfs_context_t a_context; }; -extern errno_t VNOP_PAGEOUT(vnode_t, upl_t, vm_offset_t, off_t, size_t, int, vfs_context_t); - -/* - *# - *#% searchfs vp L L L - *# +/*! + @function VNOP_PAGEOUT + @abstract Write data from a mapped file back to disk. + @discussion VNOP_PAGEOUT() is called when data from a mapped file needs to be flushed to disk, either + because of an msync() call or due to memory pressure. Filesystems are for the most part expected to + just call cluster_pageout(). + @param vp The vnode for which to page out data. + @param pl UPL describing pages needing to be paged out. + @param pl_offset Offset in UPL from which to start paging out data. + @param f_offset Offset in file of data needing to be paged out. + @param size Amount of data to page out (in bytes). + @param flags UPL-style flags: UPL_IOSYNC, UPL_NOCOMMIT, UPL_NORDAHEAD, UPL_VNODE_PAGER, UPL_MSYNC. + Filesystems should generally leave it to the cluster layer to handle these flags. See the + memory_object_types.h header in the kernel framework if interested. + @param ctx Context to authenticate for pageout request. + @return 0 for success, or an error code. */ +#ifdef XNU_KERNEL_PRIVATE +extern errno_t VNOP_PAGEOUT(vnode_t, upl_t, upl_offset_t, off_t, size_t, int, vfs_context_t); +#endif /* XNU_KERNEL_PRIVATE */ + struct vnop_searchfs_args { struct vnodeop_desc *a_desc; vnode_t a_vp; void *a_searchparams1; void *a_searchparams2; struct attrlist *a_searchattrs; - u_long a_maxmatches; + uint32_t a_maxmatches; struct timeval *a_timelimit; struct attrlist *a_returnattrs; - u_long *a_nummatches; - u_long a_scriptcode; - u_long a_options; + uint32_t *a_nummatches; + uint32_t a_scriptcode; + uint32_t a_options; struct uio *a_uio; struct searchstate *a_searchstate; vfs_context_t a_context; }; -extern errno_t VNOP_SEARCHFS(vnode_t, void *, void *, struct attrlist *, u_long, struct timeval *, struct attrlist *, u_long *, u_long, u_long, struct uio *, struct searchstate *, vfs_context_t); +#ifdef XNU_KERNEL_PRIVATE +extern errno_t VNOP_SEARCHFS(vnode_t, void *, void *, struct attrlist *, uint32_t, struct timeval *, struct attrlist *, uint32_t *, uint32_t, uint32_t, struct uio *, struct searchstate *, vfs_context_t); +#endif /* XNU_KERNEL_PRIVATE */ -/* - *# - *#% copyfile fvp U U U - *#% copyfile tdvp L U U - *#% copyfile tvp X U U - *# - */ struct vnop_copyfile_args { struct vnodeop_desc *a_desc; vnode_t a_fvp; @@ -732,8 +1056,10 @@ struct vnop_copyfile_args { int a_flags; vfs_context_t a_context; }; -extern errno_t VNOP_COPYFILE(vnode_t, vnode_t, vnode_t, struct componentname *, int, int, vfs_context_t); +#ifdef XNU_KERNEL_PRIVATE +extern errno_t VNOP_COPYFILE(vnode_t, vnode_t, vnode_t, struct componentname *, int, int, vfs_context_t); +#endif /* XNU_KERNEL_PRIVATE */ struct vnop_getxattr_args { struct vnodeop_desc *a_desc; @@ -745,6 +1071,18 @@ struct vnop_getxattr_args { vfs_context_t a_context; }; extern struct vnodeop_desc vnop_getxattr_desc; + +/*! + @function VNOP_GETXATTR + @abstract Get extended file attributes. + @param vp The vnode to get extended attributes for. + @param name Which property to extract. + @param uio Destination information for attribute value. + @param size Should be set to the amount of data written. + @param options XATTR_NOSECURITY: bypass security-checking. + @param ctx Context to authenticate for getxattr request. + @return 0 for success, or an error code. + */ extern errno_t VNOP_GETXATTR(vnode_t, const char *, uio_t, size_t *, int, vfs_context_t); struct vnop_setxattr_args { @@ -756,6 +1094,18 @@ struct vnop_setxattr_args { vfs_context_t a_context; }; extern struct vnodeop_desc vnop_setxattr_desc; + +/*! + @function VNOP_SETXATTR + @abstract Set extended file attributes. + @param vp The vnode to set extended attributes for. + @param name Which property to extract. + @param uio Source information for attribute value. + @param options XATTR_NOSECURITY: bypass security-checking. XATTR_CREATE: set value, fail if exists. + XATTR_REPLACE: set value, fail if does not exist. + @param ctx Context to authenticate for setxattr request. + @return 0 for success, or an error code. + */ extern errno_t VNOP_SETXATTR(vnode_t, const char *, uio_t, int, vfs_context_t); struct vnop_removexattr_args { @@ -766,7 +1116,19 @@ struct vnop_removexattr_args { vfs_context_t a_context; }; extern struct vnodeop_desc vnop_removexattr_desc; + +/*! + @function VNOP_REMOVEXATTR + @abstract Remove extended file attributes. + @param vp The vnode from which to remove extended attributes. + @param name Which attribute to delete. + @param options XATTR_NOSECURITY: bypass security-checking. + @param ctx Context to authenticate for attribute delete request. + @return 0 for success, or an error code. + */ +#ifdef XNU_KERNEL_PRIVATE extern errno_t VNOP_REMOVEXATTR(vnode_t, const char *, int, vfs_context_t); +#endif /* XNU_KERNEL_PRIVATE */ struct vnop_listxattr_args { struct vnodeop_desc *a_desc; @@ -777,42 +1139,63 @@ struct vnop_listxattr_args { vfs_context_t a_context; }; extern struct vnodeop_desc vnop_listxattr_desc; -extern errno_t VNOP_LISTXATTR(vnode_t, uio_t, size_t *, int, vfs_context_t); - -/* - *# - *#% blktooff vp = = = - *# +/*! + @function VNOP_LISTXATTR + @abstract List extended attribute keys. + @discussion Should write a sequence of unseparated, null-terminated extended-attribute + names into the space described by the provided uio. These keys can then be passed to + getxattr() (and VNOP_GETXATTR()). + @param vp The vnode for which to get extended attribute keys. + @param uio Description of target memory for attribute keys. + @param size Should be set to amount of data written to buffer. + @param options XATTR_NOSECURITY: bypass security checking. + @param ctx Context to authenticate for attribute name request. */ +#ifdef XNU_KERNEL_PRIVATE +extern errno_t VNOP_LISTXATTR(vnode_t, uio_t, size_t *, int, vfs_context_t); +#endif /* XNU_KERNEL_PRIVATE */ + struct vnop_blktooff_args { struct vnodeop_desc *a_desc; vnode_t a_vp; daddr64_t a_lblkno; off_t *a_offset; }; -extern errno_t VNOP_BLKTOOFF(vnode_t, daddr64_t, off_t *); - -/* - *# - *#% offtoblk vp = = = - *# +/*! + @function VNOP_BLKTOOFF + @abstract Call down to a filesystem to convert a logical block number to a file offset. + @discussion VNOP_BLKTOOFF() converts a logical block to a file offset in bytes. That offset + can be passed to VNOP_BLOCKMAP(), then, to get a physical block number--buf_strategy() does this. + @param vp The vnode for which to convert a logical block to an offset. + @param lblkno Logical block number to turn into offset. + @param offset Destination for file offset. + @return 0 for success, else an error code. */ +#ifdef XNU_KERNEL_PRIVATE +extern errno_t VNOP_BLKTOOFF(vnode_t, daddr64_t, off_t *); +#endif /* XNU_KERNEL_PRIVATE */ + struct vnop_offtoblk_args { struct vnodeop_desc *a_desc; vnode_t a_vp; off_t a_offset; daddr64_t *a_lblkno; }; -extern errno_t VNOP_OFFTOBLK(vnode_t, off_t, daddr64_t *); - -/* - *# - *#% blockmap vp L L L - *# +/*! + @function VNOP_OFFTOBLK + @abstract Call down to a filesystem to convert a file offset to a logical block number. + @param vp The vnode for which to convert an offset to a logical block number. + @param offset File offset to convert. + @param lblkno Destination for corresponding logical block number. + @return 0 for success, else an error code. */ +#ifdef XNU_KERNEL_PRIVATE +extern errno_t VNOP_OFFTOBLK(vnode_t, off_t, daddr64_t *); +#endif /* XNU_KERNEL_PRIVATE */ + struct vnop_blockmap_args { struct vnodeop_desc *a_desc; vnode_t a_vp; @@ -824,21 +1207,59 @@ struct vnop_blockmap_args { int a_flags; vfs_context_t a_context; }; + +/*! + @function VNOP_BLOCKMAP + @abstract Call down to a filesystem to get information about the on-disk layout of a file region. + @discussion VNOP_BLOCKMAP() returns the information required to pass a request for a contiguous region + down to a device's strategy routine. + @param vp The vnode for which to get on-disk information. + @param foffset Offset (in bytes) at which region starts. + @param size Size of region. + @param bpn Destination for physical block number at which region begins on disk. + @param run Destination for number of bytes which can be found contiguously on-disk before + first discontinuity. + @param poff Currently unused. + @param flags VNODE_READ: request is for a read. VNODE_WRITE: request is for a write. + @param ctx Context to authenticate for blockmap request; currently often set to NULL. + @return 0 for success, else an error code. + */ +#ifdef XNU_KERNEL_PRIVATE extern errno_t VNOP_BLOCKMAP(vnode_t, off_t, size_t, daddr64_t *, size_t *, void *, int, vfs_context_t); +#endif /* XNU_KERNEL_PRIVATE */ struct vnop_strategy_args { struct vnodeop_desc *a_desc; struct buf *a_bp; }; + +/*! + @function VNOP_STRATEGY + @abstract Initiate I/O on a file (both read and write). + @discussion A filesystem strategy routine takes a buffer, performs whatever manipulations are necessary for passing + the I/O request down to the device layer, and calls the appropriate device's strategy routine. Most filesystems should + just call buf_strategy() with "bp" as the argument. + @param bp Complete specificiation of requested I/O: region of data involved, whether request is for read or write, and so on. + @return 0 for success, else an error code. + */ extern errno_t VNOP_STRATEGY(struct buf *bp); struct vnop_bwrite_args { struct vnodeop_desc *a_desc; buf_t a_bp; }; -extern errno_t VNOP_BWRITE(buf_t); +/*! + @function VNOP_BWRITE + @abstract Write a buffer to backing store. + @discussion VNOP_BWRITE() is called by buf_bawrite() (asynchronous write) and potentially by buf_bdwrite() (delayed write) + but not by buf_bwrite(). A filesystem may choose to perform some kind of manipulation of the buffer in this routine; it + generally will end up calling VFS's default implementation, vn_bwrite() (which calls buf_bwrite() without further ado). + @param bp The buffer to write. + @return 0 for success, else an error code. + */ +extern errno_t VNOP_BWRITE(buf_t); struct vnop_kqfilt_add_args { struct vnodeop_desc *a_desc; @@ -847,7 +1268,10 @@ struct vnop_kqfilt_add_args { vfs_context_t a_context; }; extern struct vnodeop_desc vnop_kqfilt_add_desc; + +#ifdef XNU_KERNEL_PRIVATE extern errno_t VNOP_KQFILT_ADD(vnode_t , struct knote *, vfs_context_t); +#endif /* XNU_KERNEL_PRIVATE */ struct vnop_kqfilt_remove_args { struct vnodeop_desc *a_desc; @@ -856,7 +1280,52 @@ struct vnop_kqfilt_remove_args { vfs_context_t a_context; }; extern struct vnodeop_desc vnop_kqfilt_remove_desc; + +#ifdef XNU_KERNEL_PRIVATE errno_t VNOP_KQFILT_REMOVE(vnode_t , uintptr_t , vfs_context_t); +#endif /* XNU_KERNEL_PRIVATE */ + + +#ifdef KERNEL_PRIVATE +#define VNODE_MONITOR_BEGIN 0x01 +#define VNODE_MONITOR_END 0x02 +#define VNODE_MONITOR_UPDATE 0x04 +struct vnop_monitor_args { + struct vnodeop_desc *a_desc; + vnode_t a_vp; + uint32_t a_events; + uint32_t a_flags; + void *a_handle; + vfs_context_t a_context; +}; +extern struct vnodeop_desc vnop_monitor_desc; +#endif /* KERNEL_PRIVATE */ + +#ifdef XNU_KERNEL_PRIVATE +/*! + @function VNOP_MONITOR + @abstract Indicate to a filesystem that the number of watchers of a file has changed. + @param vp The vnode whose watch state has changed. + @param events Unused. Filesystems can ignore this parameter. + @param flags Type of change to the watch state. VNODE_MONITOR_BEGIN is passed when the kernel + begins tracking a new watcher of a file. VNODE_MONITOR_END is passed when a watcher stops watching a file. + VNODE_MONITOR_UPDATE is currently unused. A filesystem is guaranteed that each VNODE_MONITOR_BEGIN + will be matched by a VNODE_MONITOR_END with the same "handle" argument. + @param handle Unique identifier for a given watcher. A VNODE_MONITOR_BEGIN for a given handle will be matched with a + VNODE_MONITOR_END for the same handle; a filesystem need not consider this parameter unless + it for some reason wants be able to match specific VNOP_MONITOR calls rather than just keeping + a count. + @param ctx The context which is starting to monitor a file or ending a watch on a file. A matching + pair of VNODE_MONITOR_BEGIN and VNODE_MONITOR_END need not have the same context. + @discussion VNOP_MONITOR() is intended to let networked filesystems know when they should bother + listening for changes to files which occur remotely, so that they can post notifications using + vnode_notify(). Local filesystems should not implement a monitor vnop. + It is called when there is a new watcher for a file or when a watcher for a file goes away. + Each BEGIN will be matched with an END with the same handle. Note that vnode_ismonitored() can + be used to see if there are currently watchers for a file. + */ +errno_t VNOP_MONITOR(vnode_t , uint32_t, uint32_t, void*, vfs_context_t); +#endif /* XNU_KERNEL_PRIVATE */ struct label; struct vnop_setlabel_args { @@ -866,7 +1335,18 @@ struct vnop_setlabel_args { vfs_context_t a_context; }; extern struct vnodeop_desc vnop_setlabel_desc; + +/*! + @function VNOP_SETLABEL + @abstract Associate a MACF label with a file. + @param vp The vnode to label. + @param label The desired label. + @param ctx Context to authenticate for label change. + @return 0 for success, else an error code. + */ +#ifdef XNU_KERNEL_PRIVATE errno_t VNOP_SETLABEL(vnode_t, struct label *, vfs_context_t); +#endif /* XNU_KERNEL_PRIVATE */ #ifdef __APPLE_API_UNSTABLE @@ -883,7 +1363,26 @@ struct vnop_getnamedstream_args { int a_flags; vfs_context_t a_context; }; + +/*! + @function VNOP_GETNAMEDSTREAM + @abstract Get a named stream associated with a file. + @discussion If this call sucecss, svpp should be returned with an iocount which the caller + will drop. VFS provides a facility for simulating named streams when interacting with filesystems + which do not support them. + @param vp The vnode for which to get a named stream. + @param svpp Destination for pointer to named stream's vnode. + @param name The name of the named stream, e.g. "com.apple.ResourceFork". + @param operation Operation to perform. In HFS and AFP, this parameter is only considered as follows: + if the resource fork has not been opened and the operation is not NS_OPEN, fail with ENOATTR. Currently + only passed as NS_OPEN by VFS. + @param flags Currently unused. + @param ctx Context to authenticate for getting named stream. + @return 0 for success, else an error code. + */ +#ifdef XNU_KERNEL_PRIVATE extern errno_t VNOP_GETNAMEDSTREAM(vnode_t, vnode_t *, const char *, enum nsoperation, int flags, vfs_context_t); +#endif /* XNU_KERNEL_PRIVATE */ struct vnop_makenamedstream_args { struct vnodeop_desc *a_desc; @@ -893,7 +1392,23 @@ struct vnop_makenamedstream_args { int a_flags; vfs_context_t a_context; }; + +/*! + @function VNOP_MAKENAMEDSTREAM + @abstract Create a named stream associated with a file. + @discussion If this call succeeds, svpp should be returned with an iocount which the caller will drop. + VFS provides a facility for simulating named streams when interacting with filesystems + which do not support them. + @param vp The vnode for which to get a named stream. + @param svpp Destination for pointer to named stream's vnode. + @param name The name of the named stream, e.g. "com.apple.ResourceFork". + @param flags Currently unused. + @param ctx Context to authenticate creating named stream. + @return 0 for success, else an error code. + */ +#ifdef XNU_KERNEL_PRIVATE extern errno_t VNOP_MAKENAMEDSTREAM(vnode_t, vnode_t *, const char *, int flags, vfs_context_t); +#endif /* XNU_KERNEL_PRIVATE */ struct vnop_removenamedstream_args { struct vnodeop_desc *a_desc; @@ -903,7 +1418,22 @@ struct vnop_removenamedstream_args { int a_flags; vfs_context_t a_context; }; + +/*! + @function VNOP_REMOVENAMEDSTREAM + @abstract Delete a named stream associated with a file. + @discussion VFS provides a facility for simulating named streams when interacting with filesystems + which do not support them. + @param vp The vnode to which the named stream belongs. + @param svp The named stream's vnode. + @param name The name of the named stream, e.g. "com.apple.ResourceFork". + @param flags Currently unused. + @param ctx Context to authenticate deleting named stream. + @return 0 for success, else an error code. + */ +#ifdef XNU_KERNEL_PRIVATE extern errno_t VNOP_REMOVENAMEDSTREAM(vnode_t, vnode_t, const char *, int flags, vfs_context_t); +#endif /* XNU_KERNEL_PRIVATE */ #endif #endif diff --git a/bsd/sys/vnode_internal.h b/bsd/sys/vnode_internal.h index 8948d8310..cf8f7b455 100644 --- a/bsd/sys/vnode_internal.h +++ b/bsd/sys/vnode_internal.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2007 Apple Inc. All rights reserved. + * Copyright (c) 2000-2008 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -70,8 +70,6 @@ #ifndef _SYS_VNODE_INTERNAL_H_ #define _SYS_VNODE_INTERNAL_H_ -#define INTERIM_FSNODE_LOCK 1 - #include #include #include @@ -117,18 +115,18 @@ struct vnode { LIST_HEAD(, namecache) v_nclinks; /* name cache entries that name this vnode */ LIST_HEAD(, namecache) v_ncchildren; /* name cache entries that regard us as there parent */ vnode_t v_defer_reclaimlist; /* in case we have to defer the reclaim to avoid recursion */ - u_long v_listflag; /* flags protected by the vnode_list_lock (see below) */ - u_long v_flag; /* vnode flags (see below) */ - u_short v_lflag; /* vnode local and named ref flags */ - u_char v_iterblkflags; /* buf iterator flags */ - u_char v_references; /* number of times io_count has been granted */ + uint32_t v_listflag; /* flags protected by the vnode_list_lock (see below) */ + uint32_t v_flag; /* vnode flags (see below) */ + uint16_t v_lflag; /* vnode local and named ref flags */ + uint8_t v_iterblkflags; /* buf iterator flags */ + uint8_t v_references; /* number of times io_count has been granted */ int32_t v_kusecount; /* count of in-kernel refs */ int32_t v_usecount; /* reference count of users */ int32_t v_iocount; /* iocounters */ void * v_owner; /* act that owns the vnode */ - u_short v_type; /* vnode type */ - u_short v_tag; /* type of underlying data */ - int v_id; /* identity of vnode contents */ + uint16_t v_type; /* vnode type */ + uint16_t v_tag; /* type of underlying data */ + uint32_t v_id; /* identity of vnode contents */ union { struct mount *vu_mountedhere;/* ptr to mounted vfs (VDIR) */ struct socket *vu_socket; /* unix ipc (VSOCK) */ @@ -138,6 +136,7 @@ struct vnode { } v_un; struct buflists v_cleanblkhd; /* clean blocklist head */ struct buflists v_dirtyblkhd; /* dirty blocklist head */ + struct klist v_knotes; /* knotes attached to this vnode */ /* * the following 4 fields are protected * by the name_cache_lock held in @@ -150,14 +149,17 @@ struct vnode { /* * back to the vnode lock for protection */ - long v_numoutput; /* num of writes in progress */ - long v_writecount; /* reference count of writers */ + int32_t v_numoutput; /* num of writes in progress */ + int32_t v_writecount; /* reference count of writers */ const char *v_name; /* name component of the vnode */ vnode_t v_parent; /* pointer to parent vnode */ -#ifdef INTERIM_FSNODE_LOCK struct lockf *v_lockf; /* advisory lock list head */ +#ifndef __LP64__ struct unsafe_fsnode *v_unsafefs; /* pointer to struct used to lock */ -#endif /* vnodes on unsafe filesystems */ +#else + int32_t v_reserved1; + int32_t v_reserved2; +#endif /* __LP64__ */ int (**v_op)(void *); /* vnode operations vector */ mount_t v_mount; /* ptr to vfs we are in */ void * v_data; /* private data for fs */ @@ -193,10 +195,10 @@ struct vnode { */ #define VL_SUSPENDED 0x0001 /* vnode is suspended */ #define VL_DRAIN 0x0002 /* vnode is being drained */ -#define VL_TERMINATE 0x0004 /* vnode is marked for termination */ -#define VL_TERMWANT 0x0008 /* vnode is marked for termination */ -#define VL_DEAD 0x0010 /* vnode is dead and completed recycle */ -#define VL_MARKTERM 0x0020 /* vnode is dead and completed recycle */ +#define VL_TERMINATE 0x0004 /* vnode is in the process of being recycled */ +#define VL_TERMWANT 0x0008 /* there's a waiter for recycle finish (vnode_getiocount)*/ +#define VL_DEAD 0x0010 /* vnode is dead, cleaned of filesystem-specific info */ +#define VL_MARKTERM 0x0020 /* vnode should be recycled when no longer referenced */ #define VL_MOUNTDEAD 0x0040 /* v_moutnedhere is dead */ #define VL_NEEDINACTIVE 0x0080 /* delay VNOP_INACTIVE until iocount goes to 0 */ @@ -204,6 +206,7 @@ struct vnode { #define VL_LABELWAIT 0x0200 /* vnode is marked for labeling */ #define VL_LABELED 0x0400 /* vnode is labeled */ #define VL_LWARNED 0x0800 +#define VL_HASSTREAMS 0x1000 /* vnode has had at least one associated named stream vnode (may not have one any longer) */ #define VNAMED_UBC 0x2000 /* ubc named reference */ #define VNAMED_MOUNT 0x4000 /* mount point named reference */ @@ -221,14 +224,14 @@ struct vnode { #define VDEVFLUSH 0x000040 /* device vnode after vflush */ #define VMOUNT 0x000080 /* mount operation in progress */ #define VBWAIT 0x000100 /* waiting for output to complete */ -#define VALIASED 0x000200 /* vnode has an alias */ + /* Free slot here after removing VALIASED for radar #5971707 */ #define VNOCACHE_DATA 0x000400 /* don't keep data cached once it's been consumed */ #define VSTANDARD 0x000800 /* vnode obtained from common pool */ #define VAGE 0x001000 /* Insert vnode at head of free list */ #define VRAOFF 0x002000 /* read ahead disabled */ #define VNCACHEABLE 0x004000 /* vnode is allowed to be put back in name cache */ #if NAMEDSTREAMS -#define VISSHADOW 0x008000 /* vnode is a shadow file */ +#define VISSHADOW 0x008000 /* vnode is a shadow file */ #endif #define VSWAP 0x010000 /* vnode is being used as swapfile */ #define VTHROTTLED 0x020000 /* writes or pageouts have been throttled */ @@ -337,6 +340,7 @@ extern struct vnodeop_desc *vnodeop_descs[]; struct ostat; #define BUILDPATH_NO_FS_ENTER 0x1 /* Use cache values, do not enter file system */ +#define BUILDPATH_CHECKACCESS 0x2 /* Check if parents have search rights */ int build_path(vnode_t first_vp, char *buff, int buflen, int *outlen, int flags, vfs_context_t ctx); int bdevvp(dev_t dev, struct vnode **vpp); @@ -345,25 +349,32 @@ void vprint(const char *label, struct vnode *vp); __private_extern__ int is_package_name(const char *name, int len); -__private_extern__ int set_package_extensions_table(void *data, int nentries, int maxwidth); -int vn_rdwr(enum uio_rw rw, struct vnode *vp, caddr_t base, - int len, off_t offset, enum uio_seg segflg, int ioflg, - kauth_cred_t cred, int *aresid, struct proc *p); +__private_extern__ int set_package_extensions_table(user_addr_t data, int nentries, int maxwidth); int vn_rdwr_64(enum uio_rw rw, struct vnode *vp, uint64_t base, int64_t len, off_t offset, enum uio_seg segflg, - int ioflg, kauth_cred_t cred, int *aresid, + int ioflg, kauth_cred_t cred, int64_t *aresid, struct proc *p); #if CONFIG_MACF int vn_setlabel (struct vnode *vp, struct label *intlabel, vfs_context_t context); #endif void fifo_printinfo(struct vnode *vp); -int vn_lock(struct vnode *vp, int flags, struct proc *p); int vn_open(struct nameidata *ndp, int fmode, int cmode); int vn_open_modflags(struct nameidata *ndp, int *fmode, int cmode); int vn_open_auth(struct nameidata *ndp, int *fmode, struct vnode_attr *); int vn_close(vnode_t, int flags, vfs_context_t ctx); +void lock_vnode_and_post(vnode_t, int); + +#define post_event_if_success(_vp, _error, _event) \ + do { \ + if (0 == (_error)) { \ + lock_vnode_and_post((_vp), (_event)); \ + } \ + } while (0) + + + #define VN_CREATE_NOAUTH (1<<0) #define VN_CREATE_NOINHERIT (1<<1) #define VN_CREATE_UNION (1<<2) @@ -380,6 +391,8 @@ int default_getxattr(vnode_t, const char *, uio_t, size_t *, int, vfs_context_t) int default_setxattr(vnode_t, const char *, uio_t, int, vfs_context_t); int default_removexattr(vnode_t, const char *, int, vfs_context_t); +int check_appledouble_header(vnode_t, vfs_context_t); + #if NAMEDSTREAMS errno_t vnode_getnamedstream(vnode_t, vnode_t *, const char *, enum nsoperation, int, vfs_context_t); errno_t vnode_makenamedstream(vnode_t, vnode_t *, const char *, int, vfs_context_t); @@ -388,21 +401,16 @@ errno_t vnode_flushnamedstream(vnode_t vp, vnode_t svp, vfs_context_t context); errno_t vnode_relenamedstream(vnode_t vp, vnode_t svp, vfs_context_t context); #endif -int vn_path_package_check(vnode_t vp, char *path, int pathlen, int *component); void nchinit(void) __attribute__((section("__TEXT, initcode"))); -int resize_namecache(u_int newsize); +int resize_namecache(uint32_t newsize); void name_cache_lock_shared(void); void name_cache_lock(void); void name_cache_unlock(void); void cache_enter_with_gen(vnode_t dvp, vnode_t vp, struct componentname *cnp, int gen); +const char *cache_enter_create(vnode_t dvp, vnode_t vp, struct componentname *cnp); -const char *vnode_getname(vnode_t vp); -void vnode_putname(const char *name); - -vnode_t vnode_getparent(vnode_t vp); - -int vn_pathconf(vnode_t, int, register_t *, vfs_context_t); +int vn_pathconf(vnode_t, int, int32_t *, vfs_context_t); #define vnode_lock_convert(v) lck_mtx_convert_spin(&(v)->v_lock) @@ -414,14 +422,15 @@ void vnode_list_unlock(void); int vnode_ref_ext(vnode_t, int); void vnode_rele_ext(vnode_t, int, int); void vnode_rele_internal(vnode_t, int, int, int); -int vnode_getwithref(vnode_t); #ifdef BSD_KERNEL_PRIVATE int vnode_getalways(vnode_t); +int vget_internal(vnode_t, int, int); #endif /* BSD_KERNEL_PRIVATE */ int vnode_get_locked(vnode_t); int vnode_put_locked(vnode_t); int vnode_issock(vnode_t); +int vnode_isaliased(vnode_t); void unlock_fsnode(vnode_t, int *); int lock_fsnode(vnode_t, int *); @@ -433,6 +442,7 @@ errno_t vnode_suspend(vnode_t); errno_t vnode_size(vnode_t, off_t *, vfs_context_t); errno_t vnode_setsize(vnode_t, off_t, int ioflag, vfs_context_t); int vnode_setattr_fallback(vnode_t vp, struct vnode_attr *vap, vfs_context_t ctx); +int vnode_isspec(vnode_t vp); void vn_setunionwait(vnode_t); void vn_checkunionwait(vnode_t); @@ -441,19 +451,25 @@ void vn_clearunionwait(vnode_t, int); void SPECHASH_LOCK(void); void SPECHASH_UNLOCK(void); -int check_cdevmounted(dev_t, enum vtype, int *); - void vnode_authorize_init(void) __attribute__((section("__TEXT, initcode"))); -void vfsinit(void); +void vfsinit(void) __attribute__((section("__TEXT, initcode"))); +void vnode_lock(vnode_t); +void vnode_unlock(vnode_t); /* * XXX exported symbols; should be static */ void vfs_op_init(void) __attribute__((section("__TEXT, initcode"))); void vfs_opv_init(void) __attribute__((section("__TEXT, initcode"))); -int vfs_sysctl(int *name, u_int namelen, user_addr_t oldp, size_t *oldlenp, +int vfs_sysctl(int *name, uint32_t namelen, user_addr_t oldp, size_t *oldlenp, user_addr_t newp, size_t newlen, struct proc *p); int sysctl_vnode(struct sysctl_oid *oidp, void *arg1, int arg2, struct sysctl_req *req); +#ifdef BSD_KERNEL_PRIVATE +void vnode_knoteupdate(struct knote *kn); +void vnode_setneedinactive(vnode_t); +int vnode_hasnamedstreams(vnode_t); /* Does this vnode have associated named streams? */ +#endif + #endif /* !_SYS_VNODE_INTERNAL_H_ */ diff --git a/bsd/sys/xattr.h b/bsd/sys/xattr.h index 01f9c71e0..c9ecf4275 100644 --- a/bsd/sys/xattr.h +++ b/bsd/sys/xattr.h @@ -44,6 +44,9 @@ /* Set this to bypass the default extended attribute file (dot-underscore file) */ #define XATTR_NODEFAULT 0x0010 +/* option for f/getxattr() and f/listxattr() to expose the HFS Compression extended attributes */ +#define XATTR_SHOWCOMPRESSION 0x0020 + #define XATTR_MAXNAMELEN 127 #define XATTR_FINDERINFO_NAME "com.apple.FinderInfo" @@ -55,6 +58,8 @@ __BEGIN_DECLS int xattr_protected(const char *); int xattr_validatename(const char *); + +#define XATTR_MAXSIZE (64 * 1024 * 1024) __END_DECLS #endif /* KERNEL */ diff --git a/bsd/ufs/ffs/Makefile b/bsd/ufs/ffs/Makefile deleted file mode 100644 index 02f68adee..000000000 --- a/bsd/ufs/ffs/Makefile +++ /dev/null @@ -1,37 +0,0 @@ -export MakeInc_cmd=${SRCROOT}/makedefs/MakeInc.cmd -export MakeInc_def=${SRCROOT}/makedefs/MakeInc.def -export MakeInc_rule=${SRCROOT}/makedefs/MakeInc.rule -export MakeInc_dir=${SRCROOT}/makedefs/MakeInc.dir - - -include $(MakeInc_cmd) -include $(MakeInc_def) - -INSTINC_SUBDIRS = \ - -INSTINC_SUBDIRS_PPC = \ - -INSTINC_SUBDIRS_I386 = \ - -EXPINC_SUBDIRS = \ - -EXPINC_SUBDIRS_PPC = \ - -EXPINC_SUBDIRS_I386 = \ - -DATAFILES = \ - ffs_extern.h fs.h - -INSTALL_MI_LIST = ${DATAFILES} - -INSTALL_MI_DIR = ufs/ffs - -EXPORT_MI_LIST = ${DATAFILES} - -EXPORT_MI_DIR = ufs/ffs - - -include $(MakeInc_rule) -include $(MakeInc_dir) - - diff --git a/bsd/ufs/ffs/ffs_alloc.c b/bsd/ufs/ffs/ffs_alloc.c deleted file mode 100644 index 0c127dcfd..000000000 --- a/bsd/ufs/ffs/ffs_alloc.c +++ /dev/null @@ -1,1748 +0,0 @@ -/* - * Copyright (c) 2000-2003 Apple Computer, Inc. All rights reserved. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ - * - * This file contains Original Code and/or Modifications of Original Code - * as defined in and that are subject to the Apple Public Source License - * Version 2.0 (the 'License'). You may not use this file except in - * compliance with the License. The rights granted to you under the License - * may not be used to create, or enable the creation or redistribution of, - * unlawful or unlicensed copies of an Apple operating system, or to - * circumvent, violate, or enable the circumvention or violation of, any - * terms of an Apple operating system software license agreement. - * - * Please obtain a copy of the License at - * http://www.opensource.apple.com/apsl/ and read it before using this file. - * - * The Original Code and all software distributed under the License are - * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER - * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, - * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. - * Please see the License for the specific language governing rights and - * limitations under the License. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ - */ -/* Copyright (c) 1995 NeXT Computer, Inc. All Rights Reserved */ -/* - * Copyright (c) 1982, 1986, 1989, 1993 - * The Regents of the University of California. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. All advertising materials mentioning features or use of this software - * must display the following acknowledgement: - * This product includes software developed by the University of - * California, Berkeley and its contributors. - * 4. Neither the name of the University nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - * @(#)ffs_alloc.c 8.18 (Berkeley) 5/26/95 - */ -#include -#include - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include - -#include -#include - -#include -#include - -#if REV_ENDIAN_FS -#include -#include -#endif /* REV_ENDIAN_FS */ - -extern u_long nextgennumber; - -static ufs_daddr_t ffs_alloccg(struct inode *, int, ufs_daddr_t, int); -static ufs_daddr_t ffs_alloccgblk(struct fs *, struct cg *, ufs_daddr_t); -static ufs_daddr_t ffs_clusteralloc(struct inode *, int, ufs_daddr_t, int); -static ino_t ffs_dirpref(struct inode *); -static ufs_daddr_t ffs_fragextend(struct inode *, int, long, int, int); -static void ffs_fserr(struct fs *, u_int, char *); -static u_long ffs_hashalloc - (struct inode *, int, long, int, u_int32_t (*)()); -static ino_t ffs_nodealloccg(struct inode *, int, ufs_daddr_t, int); -static ufs_daddr_t ffs_mapsearch(struct fs *, struct cg *, ufs_daddr_t, int); -static void ffs_clusteracct - (struct fs *fs, struct cg *cgp, ufs_daddr_t blkno, int cnt); - -/* - * Allocate a block in the file system. - * - * The size of the requested block is given, which must be some - * multiple of fs_fsize and <= fs_bsize. - * A preference may be optionally specified. If a preference is given - * the following hierarchy is used to allocate a block: - * 1) allocate the requested block. - * 2) allocate a rotationally optimal block in the same cylinder. - * 3) allocate a block in the same cylinder group. - * 4) quadradically rehash into other cylinder groups, until an - * available block is located. - * If no block preference is given the following heirarchy is used - * to allocate a block: - * 1) allocate a block in the cylinder group that contains the - * inode for the file. - * 2) quadradically rehash into other cylinder groups, until an - * available block is located. - */ -ffs_alloc(ip, lbn, bpref, size, cred, bnp) - register struct inode *ip; - ufs_daddr_t lbn, bpref; - int size; - kauth_cred_t cred; - ufs_daddr_t *bnp; -{ - register struct fs *fs; - ufs_daddr_t bno; - int cg, error; - int devBlockSize=0; - *bnp = 0; - fs = ip->i_fs; -#if DIAGNOSTIC - if ((u_int)size > fs->fs_bsize || fragoff(fs, size) != 0) { - printf("dev = 0x%x, bsize = %d, size = %d, fs = %s\n", - ip->i_dev, fs->fs_bsize, size, fs->fs_fsmnt); - panic("ffs_alloc: bad size"); - } - if (!IS_VALID_CRED(cred)) - panic("ffs_alloc: missing credential\n"); -#endif /* DIAGNOSTIC */ - if (size == fs->fs_bsize && fs->fs_cstotal.cs_nbfree == 0) - goto nospace; - if (suser(cred, NULL) && freespace(fs, fs->fs_minfree) <= 0) - goto nospace; - devBlockSize = vfs_devblocksize(vnode_mount(ITOV(ip))); -#if QUOTA - if (error = chkdq(ip, (int64_t)size, cred, 0)) - return (error); -#endif /* QUOTA */ - if (bpref >= fs->fs_size) - bpref = 0; - if (bpref == 0) - cg = ino_to_cg(fs, ip->i_number); - else - cg = dtog(fs, bpref); - bno = (ufs_daddr_t)ffs_hashalloc(ip, cg, (long)bpref, size, - (u_int32_t (*)())ffs_alloccg); - if (bno > 0) { - ip->i_blocks += btodb(size, devBlockSize); - ip->i_flag |= IN_CHANGE | IN_UPDATE; - *bnp = bno; - return (0); - } -#if QUOTA - /* - * Restore user's disk quota because allocation failed. - */ - (void) chkdq(ip, (int64_t)-size, cred, FORCE); -#endif /* QUOTA */ -nospace: - ffs_fserr(fs, kauth_cred_getuid(cred), "file system full"); - uprintf("\n%s: write failed, file system is full\n", fs->fs_fsmnt); - return (ENOSPC); -} - -/* - * Reallocate a fragment to a bigger size - * - * The number and size of the old block is given, and a preference - * and new size is also specified. The allocator attempts to extend - * the original block. Failing that, the regular block allocator is - * invoked to get an appropriate block. - */ -ffs_realloccg(ip, lbprev, bpref, osize, nsize, cred, bpp) - register struct inode *ip; - ufs_daddr_t lbprev; - ufs_daddr_t bpref; - int osize, nsize; - kauth_cred_t cred; - struct buf **bpp; -{ - register struct fs *fs; - struct buf *bp; - int cg, request, error; - ufs_daddr_t bprev, bno; - int devBlockSize=0; - - *bpp = 0; - fs = ip->i_fs; -#if DIAGNOSTIC - if ((u_int)osize > fs->fs_bsize || fragoff(fs, osize) != 0 || - (u_int)nsize > fs->fs_bsize || fragoff(fs, nsize) != 0) { - printf( - "dev = 0x%x, bsize = %d, osize = %d, nsize = %d, fs = %s\n", - ip->i_dev, fs->fs_bsize, osize, nsize, fs->fs_fsmnt); - panic("ffs_realloccg: bad size"); - } - if (!IS_VALID_CRED(cred)) - panic("ffs_realloccg: missing credential\n"); -#endif /* DIAGNOSTIC */ - if (suser(cred, NULL) != 0 && freespace(fs, fs->fs_minfree) <= 0) - goto nospace; - if ((bprev = ip->i_db[lbprev]) == 0) { - printf("dev = 0x%x, bsize = %d, bprev = %d, fs = %s\n", - ip->i_dev, fs->fs_bsize, bprev, fs->fs_fsmnt); - panic("ffs_realloccg: bad bprev"); - } - /* - * Allocate the extra space in the buffer. - */ - if (error = (int)buf_bread(ITOV(ip), (daddr64_t)((unsigned)lbprev), osize, NOCRED, &bp)) { - buf_brelse(bp); - return (error); - } - devBlockSize = vfs_devblocksize(vnode_mount(ITOV(ip))); - -#if QUOTA - if (error = chkdq(ip, (int64_t)(nsize - osize), cred, 0)) - { - buf_brelse(bp); - return (error); - } -#endif /* QUOTA */ - /* - * Check for extension in the existing location. - */ - cg = dtog(fs, bprev); - if (bno = ffs_fragextend(ip, cg, (long)bprev, osize, nsize)) { - if ((ufs_daddr_t)buf_blkno(bp) != fsbtodb(fs, bno)) - panic("bad blockno"); - ip->i_blocks += btodb(nsize - osize, devBlockSize); - ip->i_flag |= IN_CHANGE | IN_UPDATE; - allocbuf(bp, nsize); - buf_setflags(bp, B_DONE); - bzero((char *)buf_dataptr(bp) + osize, (u_int)buf_size(bp) - osize); - *bpp = bp; - return (0); - } - /* - * Allocate a new disk location. - */ - if (bpref >= fs->fs_size) - bpref = 0; - switch ((int)fs->fs_optim) { - case FS_OPTSPACE: - /* - * Allocate an exact sized fragment. Although this makes - * best use of space, we will waste time relocating it if - * the file continues to grow. If the fragmentation is - * less than half of the minimum free reserve, we choose - * to begin optimizing for time. - */ - request = nsize; - if (fs->fs_minfree < 5 || - fs->fs_cstotal.cs_nffree > - fs->fs_dsize * fs->fs_minfree / (2 * 100)) - break; - log(LOG_NOTICE, "%s: optimization changed from SPACE to TIME\n", - fs->fs_fsmnt); - fs->fs_optim = FS_OPTTIME; - break; - case FS_OPTTIME: - /* - * At this point we have discovered a file that is trying to - * grow a small fragment to a larger fragment. To save time, - * we allocate a full sized block, then free the unused portion. - * If the file continues to grow, the `ffs_fragextend' call - * above will be able to grow it in place without further - * copying. If aberrant programs cause disk fragmentation to - * grow within 2% of the free reserve, we choose to begin - * optimizing for space. - */ - request = fs->fs_bsize; - if (fs->fs_cstotal.cs_nffree < - fs->fs_dsize * (fs->fs_minfree - 2) / 100) - break; - log(LOG_NOTICE, "%s: optimization changed from TIME to SPACE\n", - fs->fs_fsmnt); - fs->fs_optim = FS_OPTSPACE; - break; - default: - printf("dev = 0x%x, optim = %d, fs = %s\n", - ip->i_dev, fs->fs_optim, fs->fs_fsmnt); - panic("ffs_realloccg: bad optim"); - /* NOTREACHED */ - } - bno = (ufs_daddr_t)ffs_hashalloc(ip, cg, (long)bpref, request, - (u_int32_t (*)())ffs_alloccg); - if (bno > 0) { - buf_setblkno(bp, (daddr64_t)((unsigned)fsbtodb(fs, bno))); - ffs_blkfree(ip, bprev, (long)osize); - if (nsize < request) - ffs_blkfree(ip, bno + numfrags(fs, nsize), - (long)(request - nsize)); - ip->i_blocks += btodb(nsize - osize, devBlockSize); - ip->i_flag |= IN_CHANGE | IN_UPDATE; - allocbuf(bp, nsize); - buf_setflags(bp, B_DONE); - bzero((char *)buf_dataptr(bp) + osize, (u_int)buf_size(bp) - osize); - *bpp = bp; - return (0); - } -#if QUOTA - /* - * Restore user's disk quota because allocation failed. - */ - (void) chkdq(ip, (int64_t)-(nsize - osize), cred, FORCE); -#endif /* QUOTA */ - buf_brelse(bp); -nospace: - /* - * no space available - */ - ffs_fserr(fs, kauth_cred_getuid(cred), "file system full"); - uprintf("\n%s: write failed, file system is full\n", fs->fs_fsmnt); - return (ENOSPC); -} - -/* - * Reallocate a sequence of blocks into a contiguous sequence of blocks. - * - * The vnode and an array of buffer pointers for a range of sequential - * logical blocks to be made contiguous is given. The allocator attempts - * to find a range of sequential blocks starting as close as possible to - * an fs_rotdelay offset from the end of the allocation for the logical - * block immediately preceeding the current range. If successful, the - * physical block numbers in the buffer pointers and in the inode are - * changed to reflect the new allocation. If unsuccessful, the allocation - * is left unchanged. The success in doing the reallocation is returned. - * Note that the error return is not reflected back to the user. Rather - * the previous block allocation will be used. - */ -/* Note: This routine is unused in UBC cluster I/O */ - -int doasyncfree = 1; -int doreallocblks = 1; - - -/* - * Allocate an inode in the file system. - * - * If allocating a directory, use ffs_dirpref to select the inode. - * If allocating in a directory, the following hierarchy is followed: - * 1) allocate the preferred inode. - * 2) allocate an inode in the same cylinder group. - * 3) quadradically rehash into other cylinder groups, until an - * available inode is located. - * If no inode preference is given the following heirarchy is used - * to allocate an inode: - * 1) allocate an inode in cylinder group 0. - * 2) quadradically rehash into other cylinder groups, until an - * available inode is located. - */ -int -ffs_valloc( - struct vnode *pvp, - mode_t mode, - kauth_cred_t cred, - struct vnode **vpp) - -{ - register struct inode *pip; - register struct fs *fs; - register struct inode *ip; - struct timeval tv; - ino_t ino, ipref; - int cg, error; - - *vpp = NULL; - pip = VTOI(pvp); - fs = pip->i_fs; - if (fs->fs_cstotal.cs_nifree == 0) - goto noinodes; - - if ((mode & IFMT) == IFDIR) - ipref = ffs_dirpref(pip); - else - ipref = pip->i_number; - if (ipref >= fs->fs_ncg * fs->fs_ipg) - ipref = 0; - cg = ino_to_cg(fs, ipref); - /* - * Track the number of dirs created one after another - * in a cg without intervening files. - */ - if ((mode & IFMT) == IFDIR) { - if (fs->fs_contigdirs[cg] < 255) - fs->fs_contigdirs[cg]++; - } else { - if (fs->fs_contigdirs[cg] > 0) - fs->fs_contigdirs[cg]--; - } - ino = (ino_t)ffs_hashalloc(pip, cg, (long)ipref, mode, ffs_nodealloccg); - if (ino == 0) - goto noinodes; - - error = ffs_vget_internal(pvp->v_mount, ino, vpp, NULL, NULL, mode, 0); - if (error) { - ffs_vfree(pvp, ino, mode); - return (error); - } - ip = VTOI(*vpp); - - if (ip->i_mode) { - printf("mode = 0%o, inum = %d, fs = %s\n", - ip->i_mode, ip->i_number, fs->fs_fsmnt); - panic("ffs_valloc: dup alloc"); - } - if (ip->i_blocks) { /* XXX */ - printf("free inode %s/%d had %d blocks\n", - fs->fs_fsmnt, ino, ip->i_blocks); - ip->i_blocks = 0; - } - ip->i_flags = 0; - /* - * Set up a new generation number for this inode. - */ - microtime(&tv); - if (++nextgennumber < (u_long)tv.tv_sec) - nextgennumber = tv.tv_sec; - ip->i_gen = nextgennumber; - return (0); -noinodes: - ffs_fserr(fs, kauth_cred_getuid(cred), "out of inodes"); - uprintf("\n%s: create/symlink failed, no inodes free\n", fs->fs_fsmnt); - return (ENOSPC); -} - -/* - * Find a cylinder group to place a directory. - * - * The policy implemented by this algorithm is to allocate a - * directory inode in the same cylinder group as its parent - * directory, but also to reserve space for its files inodes - * and data. Restrict the number of directories which may be - * allocated one after another in the same cylinder group - * without intervening allocation of files. - */ -static ino_t -ffs_dirpref(pip) - struct inode *pip; -{ - register struct fs *fs; - int cg, prefcg, dirsize, cgsize; - int avgifree, avgbfree, avgndir, curdirsize; - int minifree, minbfree, maxndir; - int mincg, minndir; - int maxcontigdirs; - - fs = pip->i_fs; - avgifree = fs->fs_cstotal.cs_nifree / fs->fs_ncg; - avgbfree = fs->fs_cstotal.cs_nbfree / fs->fs_ncg; - avgndir = fs->fs_cstotal.cs_ndir / fs->fs_ncg; - - /* - * Force allocation in another cg if creating a first level dir. - */ - if (ITOV(pip)->v_flag & VROOT) { -#ifdef __APPLE__ - prefcg = random() % fs->fs_ncg; -#else - prefcg = arc4random() % fs->fs_ncg; -#endif - mincg = prefcg; - minndir = fs->fs_ipg; - for (cg = prefcg; cg < fs->fs_ncg; cg++) - if (fs->fs_cs(fs, cg).cs_ndir < minndir && - fs->fs_cs(fs, cg).cs_nifree >= avgifree && - fs->fs_cs(fs, cg).cs_nbfree >= avgbfree) { - mincg = cg; - minndir = fs->fs_cs(fs, cg).cs_ndir; - } - for (cg = 0; cg < prefcg; cg++) - if (fs->fs_cs(fs, cg).cs_ndir < minndir && - fs->fs_cs(fs, cg).cs_nifree >= avgifree && - fs->fs_cs(fs, cg).cs_nbfree >= avgbfree) { - mincg = cg; - minndir = fs->fs_cs(fs, cg).cs_ndir; - } - return ((ino_t)(fs->fs_ipg * mincg)); - } - - /* - * Count various limits which used for - * optimal allocation of a directory inode. - */ - maxndir = min(avgndir + fs->fs_ipg / 16, fs->fs_ipg); - minifree = avgifree - fs->fs_ipg / 4; - if (minifree < 0) - minifree = 0; - minbfree = avgbfree - fs->fs_fpg / fs->fs_frag / 4; - if (minbfree < 0) - minbfree = 0; - cgsize = fs->fs_fsize * fs->fs_fpg; - dirsize = fs->fs_avgfilesize * fs->fs_avgfpdir; - curdirsize = avgndir ? (cgsize - avgbfree * fs->fs_bsize) / avgndir : 0; - if (dirsize < curdirsize) - dirsize = curdirsize; - maxcontigdirs = min(cgsize / dirsize, 255); - if (fs->fs_avgfpdir > 0) - maxcontigdirs = min(maxcontigdirs, - fs->fs_ipg / fs->fs_avgfpdir); - if (maxcontigdirs == 0) - maxcontigdirs = 1; - - /* - * Limit number of dirs in one cg and reserve space for - * regular files, but only if we have no deficit in - * inodes or space. - */ - prefcg = ino_to_cg(fs, pip->i_number); - for (cg = prefcg; cg < fs->fs_ncg; cg++) - if (fs->fs_cs(fs, cg).cs_ndir < maxndir && - fs->fs_cs(fs, cg).cs_nifree >= minifree && - fs->fs_cs(fs, cg).cs_nbfree >= minbfree) { - if (fs->fs_contigdirs[cg] < maxcontigdirs) - return ((ino_t)(fs->fs_ipg * cg)); - } - for (cg = 0; cg < prefcg; cg++) - if (fs->fs_cs(fs, cg).cs_ndir < maxndir && - fs->fs_cs(fs, cg).cs_nifree >= minifree && - fs->fs_cs(fs, cg).cs_nbfree >= minbfree) { - if (fs->fs_contigdirs[cg] < maxcontigdirs) - return ((ino_t)(fs->fs_ipg * cg)); - } - /* - * This is a backstop when we have deficit in space. - */ - for (cg = prefcg; cg < fs->fs_ncg; cg++) - if (fs->fs_cs(fs, cg).cs_nifree >= avgifree) - return ((ino_t)(fs->fs_ipg * cg)); - for (cg = 0; cg < prefcg; cg++) - if (fs->fs_cs(fs, cg).cs_nifree >= avgifree) - break; - return ((ino_t)(fs->fs_ipg * cg)); -} - -/* - * Select the desired position for the next block in a file. The file is - * logically divided into sections. The first section is composed of the - * direct blocks. Each additional section contains fs_maxbpg blocks. - * - * If no blocks have been allocated in the first section, the policy is to - * request a block in the same cylinder group as the inode that describes - * the file. If no blocks have been allocated in any other section, the - * policy is to place the section in a cylinder group with a greater than - * average number of free blocks. An appropriate cylinder group is found - * by using a rotor that sweeps the cylinder groups. When a new group of - * blocks is needed, the sweep begins in the cylinder group following the - * cylinder group from which the previous allocation was made. The sweep - * continues until a cylinder group with greater than the average number - * of free blocks is found. If the allocation is for the first block in an - * indirect block, the information on the previous allocation is unavailable; - * here a best guess is made based upon the logical block number being - * allocated. - * - * If a section is already partially allocated, the policy is to - * contiguously allocate fs_maxcontig blocks. The end of one of these - * contiguous blocks and the beginning of the next is physically separated - * so that the disk head will be in transit between them for at least - * fs_rotdelay milliseconds. This is to allow time for the processor to - * schedule another I/O transfer. - */ -ufs_daddr_t -ffs_blkpref(ip, lbn, indx, bap) - struct inode *ip; - ufs_daddr_t lbn; - int indx; - ufs_daddr_t *bap; -{ - register struct fs *fs; - register int cg; - int avgbfree, startcg; - ufs_daddr_t nextblk; -#if REV_ENDIAN_FS - daddr_t prev=0; - struct vnode *vp=ITOV(ip); - struct mount *mp=vp->v_mount; - int rev_endian=(mp->mnt_flag & MNT_REVEND); -#endif /* REV_ENDIAN_FS */ - - fs = ip->i_fs; -#if REV_ENDIAN_FS - if (indx && bap) { - if (rev_endian) { - if (bap != &ip->i_db[0]) - prev = OSSwapInt32(bap[indx - 1]); - else - prev = bap[indx - 1]; - } else prev = bap[indx - 1]; - } - if (indx % fs->fs_maxbpg == 0 || prev == 0) -#else /* REV_ENDIAN_FS */ - if (indx % fs->fs_maxbpg == 0 || bap[indx - 1] == 0) -#endif /* REV_ENDIAN_FS */ - { - if (lbn < NDADDR) { - cg = ino_to_cg(fs, ip->i_number); - return (fs->fs_fpg * cg + fs->fs_frag); - } - /* - * Find a cylinder with greater than average number of - * unused data blocks. - */ -#if REV_ENDIAN_FS - if (indx == 0 || prev == 0) -#else /* REV_ENDIAN_FS */ - if (indx == 0 || bap[indx - 1] == 0) -#endif /* REV_ENDIAN_FS */ - startcg = - ino_to_cg(fs, ip->i_number) + lbn / fs->fs_maxbpg; - else -#if REV_ENDIAN_FS - startcg = dtog(fs, prev) + 1; -#else /* REV_ENDIAN_FS */ - startcg = dtog(fs, bap[indx - 1]) + 1; -#endif /* REV_ENDIAN_FS */ - startcg %= fs->fs_ncg; - avgbfree = fs->fs_cstotal.cs_nbfree / fs->fs_ncg; - for (cg = startcg; cg < fs->fs_ncg; cg++) - if (fs->fs_cs(fs, cg).cs_nbfree >= avgbfree) { - fs->fs_cgrotor = cg; - return (fs->fs_fpg * cg + fs->fs_frag); - } - for (cg = 0; cg <= startcg; cg++) - if (fs->fs_cs(fs, cg).cs_nbfree >= avgbfree) { - fs->fs_cgrotor = cg; - return (fs->fs_fpg * cg + fs->fs_frag); - } - return (NULL); - } - /* - * One or more previous blocks have been laid out. If less - * than fs_maxcontig previous blocks are contiguous, the - * next block is requested contiguously, otherwise it is - * requested rotationally delayed by fs_rotdelay milliseconds. - */ -#if REV_ENDIAN_FS - if (rev_endian) { - nextblk = prev + fs->fs_frag; - if (indx < fs->fs_maxcontig) { - return (nextblk); - } - if (bap != &ip->i_db[0]) - prev = OSSwapInt32(bap[indx - fs->fs_maxcontig]); - else - prev = bap[indx - fs->fs_maxcontig]; - if (prev + blkstofrags(fs, fs->fs_maxcontig) != nextblk) - return (nextblk); - } else { -#endif /* REV_ENDIAN_FS */ - nextblk = bap[indx - 1] + fs->fs_frag; - if (indx < fs->fs_maxcontig || bap[indx - fs->fs_maxcontig] + - blkstofrags(fs, fs->fs_maxcontig) != nextblk) - return (nextblk); -#if REV_ENDIAN_FS - } -#endif /* REV_ENDIAN_FS */ - if (fs->fs_rotdelay != 0) - /* - * Here we convert ms of delay to frags as: - * (frags) = (ms) * (rev/sec) * (sect/rev) / - * ((sect/frag) * (ms/sec)) - * then round up to the next block. - */ - nextblk += roundup(fs->fs_rotdelay * fs->fs_rps * fs->fs_nsect / - (NSPF(fs) * 1000), fs->fs_frag); - return (nextblk); -} - -/* - * Implement the cylinder overflow algorithm. - * - * The policy implemented by this algorithm is: - * 1) allocate the block in its requested cylinder group. - * 2) quadradically rehash on the cylinder group number. - * 3) brute force search for a free block. - */ -/*VARARGS5*/ -static u_long -ffs_hashalloc(ip, cg, pref, size, allocator) - struct inode *ip; - int cg; - long pref; - int size; /* size for data blocks, mode for inodes */ - u_int32_t (*allocator)(); -{ - register struct fs *fs; - long result; - int i, icg = cg; - - fs = ip->i_fs; - /* - * 1: preferred cylinder group - */ - result = (*allocator)(ip, cg, pref, size); - if (result) - return (result); - /* - * 2: quadratic rehash - */ - for (i = 1; i < fs->fs_ncg; i *= 2) { - cg += i; - if (cg >= fs->fs_ncg) - cg -= fs->fs_ncg; - result = (*allocator)(ip, cg, 0, size); - if (result) - return (result); - } - /* - * 3: brute force search - * Note that we start at i == 2, since 0 was checked initially, - * and 1 is always checked in the quadratic rehash. - */ - cg = (icg + 2) % fs->fs_ncg; - for (i = 2; i < fs->fs_ncg; i++) { - result = (*allocator)(ip, cg, 0, size); - if (result) - return (result); - cg++; - if (cg == fs->fs_ncg) - cg = 0; - } - return (NULL); -} - -/* - * Determine whether a fragment can be extended. - * - * Check to see if the necessary fragments are available, and - * if they are, allocate them. - */ -static ufs_daddr_t -ffs_fragextend(ip, cg, bprev, osize, nsize) - struct inode *ip; - int cg; - long bprev; - int osize, nsize; -{ - register struct fs *fs; - register struct cg *cgp; - struct buf *bp; - struct timeval tv; - long bno; - int frags, bbase; - int i, error; -#if REV_ENDIAN_FS - struct vnode *vp=ITOV(ip); - struct mount *mp=vp->v_mount; - int rev_endian=(mp->mnt_flag & MNT_REVEND); -#endif /* REV_ENDIAN_FS */ - - fs = ip->i_fs; - if (fs->fs_cs(fs, cg).cs_nffree < numfrags(fs, nsize - osize)) - return (NULL); - frags = numfrags(fs, nsize); /* number of fragments needed */ - bbase = fragnum(fs, bprev); /* offset in a frag (it is mod fragsize */ - if (bbase > fragnum(fs, (bprev + frags - 1))) { - /* cannot extend across a block boundary */ - return (NULL); - } - /* read corresponding cylinder group info */ - error = (int)buf_bread(ip->i_devvp, (daddr64_t)((unsigned)fsbtodb(fs, cgtod(fs, cg))), - (int)fs->fs_cgsize, NOCRED, &bp); - if (error) { - buf_brelse(bp); - return (NULL); - } - cgp = (struct cg *)buf_dataptr(bp); -#if REV_ENDIAN_FS - if (rev_endian) { - byte_swap_cgin(cgp, fs); - } -#endif /* REV_ENDIAN_FS */ - - if (!cg_chkmagic(cgp)) { -#if REV_ENDIAN_FS - if (rev_endian) - byte_swap_cgout(cgp,fs); -#endif /* REV_ENDIAN_FS */ - buf_brelse(bp); - return (NULL); - } - microtime(&tv); - cgp->cg_time = tv.tv_sec; - bno = dtogd(fs, bprev); - for (i = numfrags(fs, osize); i < frags; i++) - if (isclr(cg_blksfree(cgp), bno + i)) { -#if REV_ENDIAN_FS - if (rev_endian) - byte_swap_cgout(cgp,fs); -#endif /* REV_ENDIAN_FS */ - buf_brelse(bp); - return (NULL); - } - /* - * the current fragment can be extended - * deduct the count on fragment being extended into - * increase the count on the remaining fragment (if any) - * allocate the extended piece - */ - for (i = frags; i < fs->fs_frag - bbase; i++) - if (isclr(cg_blksfree(cgp), bno + i)) - break; - cgp->cg_frsum[i - numfrags(fs, osize)]--; - if (i != frags) - cgp->cg_frsum[i - frags]++; - for (i = numfrags(fs, osize); i < frags; i++) { - clrbit(cg_blksfree(cgp), bno + i); - cgp->cg_cs.cs_nffree--; - fs->fs_cstotal.cs_nffree--; - fs->fs_cs(fs, cg).cs_nffree--; - } - fs->fs_fmod = 1; -#if REV_ENDIAN_FS - if (rev_endian) - byte_swap_cgout(cgp,fs); -#endif /* REV_ENDIAN_FS */ - buf_bdwrite(bp); - return (bprev); -} - -/* - * Determine whether a block can be allocated. - * - * Check to see if a block of the appropriate size is available, - * and if it is, allocate it. - */ -static ufs_daddr_t -ffs_alloccg(ip, cg, bpref, size) - struct inode *ip; - int cg; - ufs_daddr_t bpref; - int size; -{ - register struct fs *fs; - register struct cg *cgp; - struct buf *bp; - struct timeval tv; - register int i; - int error, bno, frags, allocsiz; -#if REV_ENDIAN_FS - struct vnode *vp=ITOV(ip); - struct mount *mp=vp->v_mount; - int rev_endian=(mp->mnt_flag & MNT_REVEND); -#endif /* REV_ENDIAN_FS */ - - fs = ip->i_fs; - if (fs->fs_cs(fs, cg).cs_nbfree == 0 && size == fs->fs_bsize) - return (NULL); - error = (int)buf_bread(ip->i_devvp, (daddr64_t)((unsigned)fsbtodb(fs, cgtod(fs, cg))), - (int)fs->fs_cgsize, NOCRED, &bp); - if (error) { - buf_brelse(bp); - return (NULL); - } - cgp = (struct cg *)buf_dataptr(bp); -#if REV_ENDIAN_FS - if (rev_endian) - byte_swap_cgin(cgp,fs); -#endif /* REV_ENDIAN_FS */ - if (!cg_chkmagic(cgp) || - (cgp->cg_cs.cs_nbfree == 0 && size == fs->fs_bsize)) { -#if REV_ENDIAN_FS - if (rev_endian) - byte_swap_cgout(cgp,fs); -#endif /* REV_ENDIAN_FS */ - buf_brelse(bp); - return (NULL); - } - microtime(&tv); - cgp->cg_time = tv.tv_sec; - if (size == fs->fs_bsize) { - bno = ffs_alloccgblk(fs, cgp, bpref); -#if REV_ENDIAN_FS - if (rev_endian) - byte_swap_cgout(cgp,fs); -#endif /* REV_ENDIAN_FS */ - buf_bdwrite(bp); - return (bno); - } - /* - * check to see if any fragments are already available - * allocsiz is the size which will be allocated, hacking - * it down to a smaller size if necessary - */ - frags = numfrags(fs, size); - for (allocsiz = frags; allocsiz < fs->fs_frag; allocsiz++) - if (cgp->cg_frsum[allocsiz] != 0) - break; - if (allocsiz == fs->fs_frag) { - /* - * no fragments were available, so a block will be - * allocated, and hacked up - */ - if (cgp->cg_cs.cs_nbfree == 0) { -#if REV_ENDIAN_FS - if (rev_endian) - byte_swap_cgout(cgp,fs); -#endif /* REV_ENDIAN_FS */ - buf_brelse(bp); - return (NULL); - } - bno = ffs_alloccgblk(fs, cgp, bpref); - bpref = dtogd(fs, bno); - for (i = frags; i < fs->fs_frag; i++) - setbit(cg_blksfree(cgp), bpref + i); - i = fs->fs_frag - frags; - cgp->cg_cs.cs_nffree += i; - fs->fs_cstotal.cs_nffree += i; - fs->fs_cs(fs, cg).cs_nffree += i; - fs->fs_fmod = 1; - cgp->cg_frsum[i]++; -#if REV_ENDIAN_FS - if (rev_endian) - byte_swap_cgout(cgp,fs); -#endif /* REV_ENDIAN_FS */ - buf_bdwrite(bp); - return (bno); - } - bno = ffs_mapsearch(fs, cgp, bpref, allocsiz); - if (bno < 0) { -#if REV_ENDIAN_FS - if (rev_endian) - byte_swap_cgout(cgp,fs); -#endif /* REV_ENDIAN_FS */ - buf_brelse(bp); - return (NULL); - } - for (i = 0; i < frags; i++) - clrbit(cg_blksfree(cgp), bno + i); - cgp->cg_cs.cs_nffree -= frags; - fs->fs_cstotal.cs_nffree -= frags; - fs->fs_cs(fs, cg).cs_nffree -= frags; - fs->fs_fmod = 1; - cgp->cg_frsum[allocsiz]--; - if (frags != allocsiz) - cgp->cg_frsum[allocsiz - frags]++; -#if REV_ENDIAN_FS - if (rev_endian) - byte_swap_cgout(cgp,fs); -#endif /* REV_ENDIAN_FS */ - buf_bdwrite(bp); - return (cg * fs->fs_fpg + bno); -} - -/* - * Allocate a block in a cylinder group. - * - * This algorithm implements the following policy: - * 1) allocate the requested block. - * 2) allocate a rotationally optimal block in the same cylinder. - * 3) allocate the next available block on the block rotor for the - * specified cylinder group. - * Note that this routine only allocates fs_bsize blocks; these - * blocks may be fragmented by the routine that allocates them. - */ -static ufs_daddr_t -ffs_alloccgblk(fs, cgp, bpref) - register struct fs *fs; - register struct cg *cgp; - ufs_daddr_t bpref; -{ - ufs_daddr_t bno, blkno; - int cylno, pos, delta; - short *cylbp; - register int i; - - if (bpref == 0 || dtog(fs, bpref) != cgp->cg_cgx) { - bpref = cgp->cg_rotor; - goto norot; - } - bpref = blknum(fs, bpref); - bpref = dtogd(fs, bpref); - /* - * if the requested block is available, use it - */ - if (ffs_isblock(fs, cg_blksfree(cgp), fragstoblks(fs, bpref))) { - bno = bpref; - goto gotit; - } - if (fs->fs_nrpos <= 1 || fs->fs_cpc == 0) { - /* - * Block layout information is not available. - * Leaving bpref unchanged means we take the - * next available free block following the one - * we just allocated. Hopefully this will at - * least hit a track cache on drives of unknown - * geometry (e.g. SCSI). - */ - goto norot; - } - /* - * check for a block available on the same cylinder - */ - cylno = cbtocylno(fs, bpref); - if (cg_blktot(cgp)[cylno] == 0) - goto norot; - /* - * check the summary information to see if a block is - * available in the requested cylinder starting at the - * requested rotational position and proceeding around. - */ - cylbp = cg_blks(fs, cgp, cylno); - pos = cbtorpos(fs, bpref); - for (i = pos; i < fs->fs_nrpos; i++) - if (cylbp[i] > 0) - break; - if (i == fs->fs_nrpos) - for (i = 0; i < pos; i++) - if (cylbp[i] > 0) - break; - if (cylbp[i] > 0) { - /* - * found a rotational position, now find the actual - * block. A panic if none is actually there. - */ - pos = cylno % fs->fs_cpc; - bno = (cylno - pos) * fs->fs_spc / NSPB(fs); - if (fs_postbl(fs, pos)[i] == -1) { - printf("pos = %d, i = %d, fs = %s\n", - pos, i, fs->fs_fsmnt); - panic("ffs_alloccgblk: cyl groups corrupted"); - } - for (i = fs_postbl(fs, pos)[i];; ) { - if (ffs_isblock(fs, cg_blksfree(cgp), bno + i)) { - bno = blkstofrags(fs, (bno + i)); - goto gotit; - } - delta = fs_rotbl(fs)[i]; - if (delta <= 0 || - delta + i > fragstoblks(fs, fs->fs_fpg)) - break; - i += delta; - } - printf("pos = %d, i = %d, fs = %s\n", pos, i, fs->fs_fsmnt); - panic("ffs_alloccgblk: can't find blk in cyl"); - } -norot: - /* - * no blocks in the requested cylinder, so take next - * available one in this cylinder group. - */ - bno = ffs_mapsearch(fs, cgp, bpref, (int)fs->fs_frag); - if (bno < 0) - return (NULL); - cgp->cg_rotor = bno; -gotit: - blkno = fragstoblks(fs, bno); - ffs_clrblock(fs, cg_blksfree(cgp), (long)blkno); - ffs_clusteracct(fs, cgp, blkno, -1); - cgp->cg_cs.cs_nbfree--; - fs->fs_cstotal.cs_nbfree--; - fs->fs_cs(fs, cgp->cg_cgx).cs_nbfree--; - cylno = cbtocylno(fs, bno); - cg_blks(fs, cgp, cylno)[cbtorpos(fs, bno)]--; - cg_blktot(cgp)[cylno]--; - fs->fs_fmod = 1; - return (cgp->cg_cgx * fs->fs_fpg + bno); -} - -/* - * Determine whether a cluster can be allocated. - * - * We do not currently check for optimal rotational layout if there - * are multiple choices in the same cylinder group. Instead we just - * take the first one that we find following bpref. - */ -static ufs_daddr_t -ffs_clusteralloc(ip, cg, bpref, len) - struct inode *ip; - int cg; - ufs_daddr_t bpref; - int len; -{ - register struct fs *fs; - register struct cg *cgp; - struct buf *bp; - int i, got, run, bno, bit, map; - u_char *mapp; - int32_t *lp; -#if REV_ENDIAN_FS - struct vnode *vp=ITOV(ip); - struct mount *mp=vp->v_mount; - int rev_endian=(mp->mnt_flag & MNT_REVEND); -#endif /* REV_ENDIAN_FS */ - - fs = ip->i_fs; - if (fs->fs_maxcluster[cg] < len) - return (NULL); - if (buf_bread(ip->i_devvp, (daddr64_t)((unsigned)fsbtodb(fs, cgtod(fs, cg))), (int)fs->fs_cgsize, - NOCRED, &bp)) - goto fail; - cgp = (struct cg *)buf_dataptr(bp); -#if REV_ENDIAN_FS - if (rev_endian) - byte_swap_cgin(cgp,fs); -#endif /* REV_ENDIAN_FS */ - if (!cg_chkmagic(cgp)) { -#if REV_ENDIAN_FS - if (rev_endian) - byte_swap_cgout(cgp,fs); -#endif /* REV_ENDIAN_FS */ - goto fail; - } - /* - * Check to see if a cluster of the needed size (or bigger) is - * available in this cylinder group. - */ - lp = &cg_clustersum(cgp)[len]; - for (i = len; i <= fs->fs_contigsumsize; i++) - if (*lp++ > 0) - break; - if (i > fs->fs_contigsumsize) { - /* - * This is the first time looking for a cluster in this - * cylinder group. Update the cluster summary information - * to reflect the true maximum sized cluster so that - * future cluster allocation requests can avoid reading - * the cylinder group map only to find no clusters. - */ - lp = &cg_clustersum(cgp)[len - 1]; - for (i = len - 1; i > 0; i--) - if (*lp-- > 0) - break; - fs->fs_maxcluster[cg] = i; -#if REV_ENDIAN_FS - if (rev_endian) - byte_swap_cgout(cgp,fs); -#endif /* REV_ENDIAN_FS */ - goto fail; - } - /* - * Search the cluster map to find a big enough cluster. - * We take the first one that we find, even if it is larger - * than we need as we prefer to get one close to the previous - * block allocation. We do not search before the current - * preference point as we do not want to allocate a block - * that is allocated before the previous one (as we will - * then have to wait for another pass of the elevator - * algorithm before it will be read). We prefer to fail and - * be recalled to try an allocation in the next cylinder group. - */ - if (dtog(fs, bpref) != cg) - bpref = 0; - else - bpref = fragstoblks(fs, dtogd(fs, blknum(fs, bpref))); - mapp = &cg_clustersfree(cgp)[bpref / NBBY]; - map = *mapp++; - bit = 1 << (bpref % NBBY); - for (run = 0, got = bpref; got < cgp->cg_nclusterblks; got++) { - if ((map & bit) == 0) { - run = 0; - } else { - run++; - if (run == len) - break; - } - if ((got & (NBBY - 1)) != (NBBY - 1)) { - bit <<= 1; - } else { - map = *mapp++; - bit = 1; - } - } - if (got == cgp->cg_nclusterblks) { -#if REV_ENDIAN_FS - if (rev_endian) - byte_swap_cgout(cgp,fs); -#endif /* REV_ENDIAN_FS */ - goto fail; - } - /* - * Allocate the cluster that we have found. - */ - for (i = 1; i <= len; i++) - if (!ffs_isblock(fs, cg_blksfree(cgp), got - run + i)) - panic("ffs_clusteralloc: map mismatch"); - bno = cg * fs->fs_fpg + blkstofrags(fs, got - run + 1); - if (dtog(fs, bno) != cg) - panic("ffs_clusteralloc: allocated out of group"); - len = blkstofrags(fs, len); - for (i = 0; i < len; i += fs->fs_frag) - if ((got = ffs_alloccgblk(fs, cgp, bno + i)) != bno + i) - panic("ffs_clusteralloc: lost block"); -#if REV_ENDIAN_FS - if (rev_endian) - byte_swap_cgout(cgp,fs); -#endif /* REV_ENDIAN_FS */ - buf_bdwrite(bp); - return (bno); - -fail: - buf_brelse(bp); - return (0); -} - -/* - * Determine whether an inode can be allocated. - * - * Check to see if an inode is available, and if it is, - * allocate it using the following policy: - * 1) allocate the requested inode. - * 2) allocate the next available inode after the requested - * inode in the specified cylinder group. - */ -static ino_t -ffs_nodealloccg(ip, cg, ipref, mode) - struct inode *ip; - int cg; - ufs_daddr_t ipref; - int mode; -{ - register struct fs *fs; - register struct cg *cgp; - struct buf *bp; - struct timeval tv; - int error, start, len, loc, map, i; -#if REV_ENDIAN_FS - struct vnode *vp=ITOV(ip); - struct mount *mp=vp->v_mount; - int rev_endian=(mp->mnt_flag & MNT_REVEND); -#endif /* REV_ENDIAN_FS */ - - fs = ip->i_fs; - if (fs->fs_cs(fs, cg).cs_nifree == 0) - return (NULL); - error = (int)buf_bread(ip->i_devvp, (daddr64_t)((unsigned)fsbtodb(fs, cgtod(fs, cg))), - (int)fs->fs_cgsize, NOCRED, &bp); - if (error) { - buf_brelse(bp); - return (NULL); - } - cgp = (struct cg *)buf_dataptr(bp); -#if REV_ENDIAN_FS - if (rev_endian) - byte_swap_cgin(cgp,fs); -#endif /* REV_ENDIAN_FS */ - if (!cg_chkmagic(cgp) || cgp->cg_cs.cs_nifree == 0) { -#if REV_ENDIAN_FS - if (rev_endian) - byte_swap_cgout(cgp,fs); -#endif /* REV_ENDIAN_FS */ - buf_brelse(bp); - return (NULL); - } - - microtime(&tv); - cgp->cg_time = tv.tv_sec; - if (ipref) { - ipref %= fs->fs_ipg; - if (isclr(cg_inosused(cgp), ipref)) - goto gotit; - } - start = cgp->cg_irotor / NBBY; - len = howmany(fs->fs_ipg - cgp->cg_irotor, NBBY); - loc = skpc(0xff, len, &cg_inosused(cgp)[start]); - if (loc == 0) { - len = start + 1; - start = 0; - loc = skpc(0xff, len, &cg_inosused(cgp)[0]); - if (loc == 0) { - printf("cg = %d, irotor = %d, fs = %s\n", - cg, cgp->cg_irotor, fs->fs_fsmnt); - panic("ffs_nodealloccg: map corrupted"); - /* NOTREACHED */ - } - } - i = start + len - loc; - map = cg_inosused(cgp)[i]; - ipref = i * NBBY; - for (i = 1; i < (1 << NBBY); i <<= 1, ipref++) { - if ((map & i) == 0) { - cgp->cg_irotor = ipref; - goto gotit; - } - } - printf("fs = %s\n", fs->fs_fsmnt); - panic("ffs_nodealloccg: block not in map"); - /* NOTREACHED */ -gotit: - setbit(cg_inosused(cgp), ipref); - cgp->cg_cs.cs_nifree--; - fs->fs_cstotal.cs_nifree--; - fs->fs_cs(fs, cg).cs_nifree--; - fs->fs_fmod = 1; - if ((mode & IFMT) == IFDIR) { - cgp->cg_cs.cs_ndir++; - fs->fs_cstotal.cs_ndir++; - fs->fs_cs(fs, cg).cs_ndir++; - } -#if REV_ENDIAN_FS - if (rev_endian) - byte_swap_cgout(cgp,fs); -#endif /* REV_ENDIAN_FS */ - buf_bdwrite(bp); - return (cg * fs->fs_ipg + ipref); -} - -/* - * Free a block or fragment. - * - * The specified block or fragment is placed back in the - * free map. If a fragment is deallocated, a possible - * block reassembly is checked. - */ -void -ffs_blkfree(ip, bno, size) - register struct inode *ip; - ufs_daddr_t bno; - long size; -{ - register struct fs *fs; - register struct cg *cgp; - struct buf *bp; - struct timeval tv; - ufs_daddr_t blkno; - int i, error, cg, blk, frags, bbase; -#if REV_ENDIAN_FS - struct vnode *vp=ITOV(ip); - struct mount *mp=vp->v_mount; - int rev_endian=(mp->mnt_flag & MNT_REVEND); -#endif /* REV_ENDIAN_FS */ - - fs = ip->i_fs; - if ((u_int)size > fs->fs_bsize || fragoff(fs, size) != 0) { - printf("dev = 0x%x, bsize = %d, size = %d, fs = %s\n", - ip->i_dev, fs->fs_bsize, size, fs->fs_fsmnt); - panic("blkfree: bad size"); - } - cg = dtog(fs, bno); - if ((u_int)bno >= fs->fs_size) { - printf("bad block %d, ino %d\n", bno, ip->i_number); - ffs_fserr(fs, ip->i_uid, "bad block"); - return; - } - error = (int)buf_bread(ip->i_devvp, (daddr64_t)((unsigned)fsbtodb(fs, cgtod(fs, cg))), - (int)fs->fs_cgsize, NOCRED, &bp); - if (error) { - buf_brelse(bp); - return; - } - cgp = (struct cg *)buf_dataptr(bp); -#if REV_ENDIAN_FS - if (rev_endian) - byte_swap_cgin(cgp,fs); -#endif /* REV_ENDIAN_FS */ - if (!cg_chkmagic(cgp)) { -#if REV_ENDIAN_FS - if (rev_endian) - byte_swap_cgout(cgp,fs); -#endif /* REV_ENDIAN_FS */ - buf_brelse(bp); - return; - } - microtime(&tv); - cgp->cg_time = tv.tv_sec; - bno = dtogd(fs, bno); - if (size == fs->fs_bsize) { - blkno = fragstoblks(fs, bno); - if (ffs_isblock(fs, cg_blksfree(cgp), blkno)) { - printf("dev = 0x%x, block = %d, fs = %s\n", - ip->i_dev, bno, fs->fs_fsmnt); - panic("blkfree: freeing free block"); - } - ffs_setblock(fs, cg_blksfree(cgp), blkno); - ffs_clusteracct(fs, cgp, blkno, 1); - cgp->cg_cs.cs_nbfree++; - fs->fs_cstotal.cs_nbfree++; - fs->fs_cs(fs, cg).cs_nbfree++; - i = cbtocylno(fs, bno); - cg_blks(fs, cgp, i)[cbtorpos(fs, bno)]++; - cg_blktot(cgp)[i]++; - } else { - bbase = bno - fragnum(fs, bno); - /* - * decrement the counts associated with the old frags - */ - blk = blkmap(fs, cg_blksfree(cgp), bbase); - ffs_fragacct(fs, blk, cgp->cg_frsum, -1); - /* - * deallocate the fragment - */ - frags = numfrags(fs, size); - for (i = 0; i < frags; i++) { - if (isset(cg_blksfree(cgp), bno + i)) { - printf("dev = 0x%x, block = %d, fs = %s\n", - ip->i_dev, bno + i, fs->fs_fsmnt); - panic("blkfree: freeing free frag"); - } - setbit(cg_blksfree(cgp), bno + i); - } - cgp->cg_cs.cs_nffree += i; - fs->fs_cstotal.cs_nffree += i; - fs->fs_cs(fs, cg).cs_nffree += i; - /* - * add back in counts associated with the new frags - */ - blk = blkmap(fs, cg_blksfree(cgp), bbase); - ffs_fragacct(fs, blk, cgp->cg_frsum, 1); - /* - * if a complete block has been reassembled, account for it - */ - blkno = fragstoblks(fs, bbase); - if (ffs_isblock(fs, cg_blksfree(cgp), blkno)) { - cgp->cg_cs.cs_nffree -= fs->fs_frag; - fs->fs_cstotal.cs_nffree -= fs->fs_frag; - fs->fs_cs(fs, cg).cs_nffree -= fs->fs_frag; - ffs_clusteracct(fs, cgp, blkno, 1); - cgp->cg_cs.cs_nbfree++; - fs->fs_cstotal.cs_nbfree++; - fs->fs_cs(fs, cg).cs_nbfree++; - i = cbtocylno(fs, bbase); - cg_blks(fs, cgp, i)[cbtorpos(fs, bbase)]++; - cg_blktot(cgp)[i]++; - } - } - fs->fs_fmod = 1; -#if REV_ENDIAN_FS - if (rev_endian) - byte_swap_cgout(cgp,fs); -#endif /* REV_ENDIAN_FS */ - buf_bdwrite(bp); -} - -#if DIAGNOSTIC -/* - * Verify allocation of a block or fragment. Returns true if block or - * fragment is allocated, false if it is free. - */ -ffs_checkblk(ip, bno, size) - struct inode *ip; - ufs_daddr_t bno; - long size; -{ - struct fs *fs; - struct cg *cgp; - struct buf *bp; - int i, error, frags, free; -#if REV_ENDIAN_FS - struct vnode *vp=ITOV(ip); - struct mount *mp=vp->v_mount; - int rev_endian=(mp->mnt_flag & MNT_REVEND); -#endif /* REV_ENDIAN_FS */ - - fs = ip->i_fs; - if ((u_int)size > fs->fs_bsize || fragoff(fs, size) != 0) { - printf("bsize = %d, size = %d, fs = %s\n", - fs->fs_bsize, size, fs->fs_fsmnt); - panic("checkblk: bad size"); - } - if ((u_int)bno >= fs->fs_size) - panic("checkblk: bad block %d", bno); - error = (int)buf_bread(ip->i_devvp, (daddr64_t)((unsigned)fsbtodb(fs, cgtod(fs, dtog(fs, bno)))), - (int)fs->fs_cgsize, NOCRED, &bp); - if (error) { - buf_brelse(bp); - return; - } - cgp = (struct cg *)buf_dataptr(bp); -#if REV_ENDIAN_FS - if (rev_endian) - byte_swap_cgin(cgp,fs); -#endif /* REV_ENDIAN_FS */ - if (!cg_chkmagic(cgp)) { -#if REV_ENDIAN_FS - if (rev_endian) - byte_swap_cgout(cgp,fs); -#endif /* REV_ENDIAN_FS */ - buf_brelse(bp); - return; - } - bno = dtogd(fs, bno); - if (size == fs->fs_bsize) { - free = ffs_isblock(fs, cg_blksfree(cgp), fragstoblks(fs, bno)); - } else { - frags = numfrags(fs, size); - for (free = 0, i = 0; i < frags; i++) - if (isset(cg_blksfree(cgp), bno + i)) - free++; - if (free != 0 && free != frags) - panic("checkblk: partially free fragment"); - } -#if REV_ENDIAN_FS - if (rev_endian) - byte_swap_cgout(cgp,fs); -#endif /* REV_ENDIAN_FS */ - buf_brelse(bp); - return (!free); -} -#endif /* DIAGNOSTIC */ - -/* - * Free an inode. - * - * The specified inode is placed back in the free map. - */ -int -ffs_vfree(struct vnode *vp, ino_t ino, int mode) -{ - register struct fs *fs; - register struct cg *cgp; - register struct inode *pip; - struct buf *bp; - struct timeval tv; - int error, cg; -#if REV_ENDIAN_FS - struct mount *mp=vp->v_mount; - int rev_endian=(mp->mnt_flag & MNT_REVEND); -#endif /* REV_ENDIAN_FS */ - - pip = VTOI(vp); - fs = pip->i_fs; - if ((u_int)ino >= fs->fs_ipg * fs->fs_ncg) - panic("ifree: range: dev = 0x%x, ino = %d, fs = %s\n", - pip->i_dev, ino, fs->fs_fsmnt); - cg = ino_to_cg(fs, ino); - error = (int)buf_bread(pip->i_devvp, (daddr64_t)((unsigned)fsbtodb(fs, cgtod(fs, cg))), - (int)fs->fs_cgsize, NOCRED, &bp); - if (error) { - buf_brelse(bp); - return (0); - } - cgp = (struct cg *)buf_dataptr(bp); -#if REV_ENDIAN_FS - if (rev_endian) - byte_swap_cgin(cgp,fs); -#endif /* REV_ENDIAN_FS */ - if (!cg_chkmagic(cgp)) { -#if REV_ENDIAN_FS - if (rev_endian) - byte_swap_cgout(cgp,fs); -#endif /* REV_ENDIAN_FS */ - buf_brelse(bp); - return (0); - } - microtime(&tv); - cgp->cg_time = tv.tv_sec; - ino %= fs->fs_ipg; - if (isclr(cg_inosused(cgp), ino)) { - printf("dev = 0x%x, ino = %d, fs = %s\n", - pip->i_dev, ino, fs->fs_fsmnt); - if (fs->fs_ronly == 0) - panic("ifree: freeing free inode"); - } - clrbit(cg_inosused(cgp), ino); - if (ino < cgp->cg_irotor) - cgp->cg_irotor = ino; - cgp->cg_cs.cs_nifree++; - fs->fs_cstotal.cs_nifree++; - fs->fs_cs(fs, cg).cs_nifree++; - if ((mode & IFMT) == IFDIR) { - cgp->cg_cs.cs_ndir--; - fs->fs_cstotal.cs_ndir--; - fs->fs_cs(fs, cg).cs_ndir--; - } - fs->fs_fmod = 1; -#if REV_ENDIAN_FS - if (rev_endian) - byte_swap_cgout(cgp,fs); -#endif /* REV_ENDIAN_FS */ - buf_bdwrite(bp); - return (0); -} - -/* - * Find a block of the specified size in the specified cylinder group. - * - * It is a panic if a request is made to find a block if none are - * available. - */ -static ufs_daddr_t -ffs_mapsearch(fs, cgp, bpref, allocsiz) - register struct fs *fs; - register struct cg *cgp; - ufs_daddr_t bpref; - int allocsiz; -{ - ufs_daddr_t bno; - int start, len, loc, i; - int blk, field, subfield, pos; - - /* - * find the fragment by searching through the free block - * map for an appropriate bit pattern - */ - if (bpref) - start = dtogd(fs, bpref) / NBBY; - else - start = cgp->cg_frotor / NBBY; - len = howmany(fs->fs_fpg, NBBY) - start; - loc = scanc((u_int)len, (u_char *)&cg_blksfree(cgp)[start], - (u_char *)fragtbl[fs->fs_frag], - (u_char)(1 << (allocsiz - 1 + (fs->fs_frag % NBBY)))); - if (loc == 0) { - len = start + 1; - start = 0; - loc = scanc((u_int)len, (u_char *)&cg_blksfree(cgp)[0], - (u_char *)fragtbl[fs->fs_frag], - (u_char)(1 << (allocsiz - 1 + (fs->fs_frag % NBBY)))); - if (loc == 0) { - printf("start = %d, len = %d, fs = %s\n", - start, len, fs->fs_fsmnt); - panic("ffs_alloccg: map corrupted"); - /* NOTREACHED */ - } - } - bno = (start + len - loc) * NBBY; - cgp->cg_frotor = bno; - /* - * found the byte in the map - * sift through the bits to find the selected frag - */ - for (i = bno + NBBY; bno < i; bno += fs->fs_frag) { - blk = blkmap(fs, cg_blksfree(cgp), bno); - blk <<= 1; - field = around[allocsiz]; - subfield = inside[allocsiz]; - for (pos = 0; pos <= fs->fs_frag - allocsiz; pos++) { - if ((blk & field) == subfield) - return (bno + pos); - field <<= 1; - subfield <<= 1; - } - } - printf("bno = %d, fs = %s\n", bno, fs->fs_fsmnt); - panic("ffs_alloccg: block not in map"); - return (-1); -} - -/* - * Update the cluster map because of an allocation or free. - * - * Cnt == 1 means free; cnt == -1 means allocating. - */ -static void -ffs_clusteracct(struct fs *fs, struct cg *cgp, ufs_daddr_t blkno, int cnt) -{ - int32_t *sump; - int32_t *lp; - u_char *freemapp, *mapp; - int i, start, end, forw, back, map, bit; - - if (fs->fs_contigsumsize <= 0) - return; - freemapp = cg_clustersfree(cgp); - sump = cg_clustersum(cgp); - /* - * Allocate or clear the actual block. - */ - if (cnt > 0) - setbit(freemapp, blkno); - else - clrbit(freemapp, blkno); - /* - * Find the size of the cluster going forward. - */ - start = blkno + 1; - end = start + fs->fs_contigsumsize; - if (end >= cgp->cg_nclusterblks) - end = cgp->cg_nclusterblks; - mapp = &freemapp[start / NBBY]; - map = *mapp++; - bit = 1 << (start % NBBY); - for (i = start; i < end; i++) { - if ((map & bit) == 0) - break; - if ((i & (NBBY - 1)) != (NBBY - 1)) { - bit <<= 1; - } else { - map = *mapp++; - bit = 1; - } - } - forw = i - start; - /* - * Find the size of the cluster going backward. - */ - start = blkno - 1; - end = start - fs->fs_contigsumsize; - if (end < 0) - end = -1; - mapp = &freemapp[start / NBBY]; - map = *mapp--; - bit = 1 << (start % NBBY); - for (i = start; i > end; i--) { - if ((map & bit) == 0) - break; - if ((i & (NBBY - 1)) != 0) { - bit >>= 1; - } else { - map = *mapp--; - bit = 1 << (NBBY - 1); - } - } - back = start - i; - /* - * Account for old cluster and the possibly new forward and - * back clusters. - */ - i = back + forw + 1; - if (i > fs->fs_contigsumsize) - i = fs->fs_contigsumsize; - sump[i] += cnt; - if (back > 0) - sump[back] -= cnt; - if (forw > 0) - sump[forw] -= cnt; - /* - * Update cluster summary information. - */ - lp = &sump[fs->fs_contigsumsize]; - for (i = fs->fs_contigsumsize; i > 0; i--) - if (*lp-- > 0) - break; - fs->fs_maxcluster[cgp->cg_cgx] = i; -} - -/* - * Fserr prints the name of a file system with an error diagnostic. - * - * The form of the error message is: - * fs: error message - */ -static void -ffs_fserr(fs, uid, cp) - struct fs *fs; - u_int uid; - char *cp; -{ - - log(LOG_ERR, "uid %d on %s: %s\n", uid, fs->fs_fsmnt, cp); -} diff --git a/bsd/ufs/ffs/ffs_balloc.c b/bsd/ufs/ffs/ffs_balloc.c deleted file mode 100644 index cf998b4e5..000000000 --- a/bsd/ufs/ffs/ffs_balloc.c +++ /dev/null @@ -1,700 +0,0 @@ -/* - * Copyright (c) 2000-2002 Apple Computer, Inc. All rights reserved. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ - * - * This file contains Original Code and/or Modifications of Original Code - * as defined in and that are subject to the Apple Public Source License - * Version 2.0 (the 'License'). You may not use this file except in - * compliance with the License. The rights granted to you under the License - * may not be used to create, or enable the creation or redistribution of, - * unlawful or unlicensed copies of an Apple operating system, or to - * circumvent, violate, or enable the circumvention or violation of, any - * terms of an Apple operating system software license agreement. - * - * Please obtain a copy of the License at - * http://www.opensource.apple.com/apsl/ and read it before using this file. - * - * The Original Code and all software distributed under the License are - * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER - * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, - * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. - * Please see the License for the specific language governing rights and - * limitations under the License. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ - */ -/* Copyright (c) 1995 NeXT Computer, Inc. All Rights Reserved */ -/* - * Copyright (c) 1982, 1986, 1989, 1993 - * The Regents of the University of California. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. All advertising materials mentioning features or use of this software - * must display the following acknowledgement: - * This product includes software developed by the University of - * California, Berkeley and its contributors. - * 4. Neither the name of the University nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - * @(#)ffs_balloc.c 8.8 (Berkeley) 6/16/95 - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#if REV_ENDIAN_FS -#include -#endif /* REV_ENDIAN_FS */ - -#include - -#include -#include -#include - -#include -#include - -#if REV_ENDIAN_FS -#include -#include -#endif /* REV_ENDIAN_FS */ - -/* - * Balloc defines the structure of file system storage - * by allocating the physical blocks on a device given - * the inode and the logical block number in a file. - */ -ffs_balloc( - register struct inode *ip, - register ufs_daddr_t lbn, - int size, - kauth_cred_t cred, - struct buf **bpp, - int flags, - int * blk_alloc) -{ - register struct fs *fs; - register ufs_daddr_t nb; - struct buf *bp, *nbp; - struct vnode *vp = ITOV(ip); - struct indir indirs[NIADDR + 2]; - ufs_daddr_t newb, *bap, pref; - int deallocated, osize, nsize, num, i, error; - ufs_daddr_t *allocib, *blkp, *allocblk, allociblk[NIADDR + 1]; - int devBlockSize=0; - int alloc_buffer = 1; - struct mount *mp=vp->v_mount; -#if REV_ENDIAN_FS - int rev_endian=(mp->mnt_flag & MNT_REVEND); -#endif /* REV_ENDIAN_FS */ - - *bpp = NULL; - if (lbn < 0) - return (EFBIG); - fs = ip->i_fs; - if (flags & B_NOBUFF) - alloc_buffer = 0; - - if (blk_alloc) - *blk_alloc = 0; - - /* - * If the next write will extend the file into a new block, - * and the file is currently composed of a fragment - * this fragment has to be extended to be a full block. - */ - nb = lblkno(fs, ip->i_size); - if (nb < NDADDR && nb < lbn) { - /* the filesize prior to this write can fit in direct - * blocks (ie. fragmentaion is possibly done) - * we are now extending the file write beyond - * the block which has end of file prior to this write - */ - osize = blksize(fs, ip, nb); - /* osize gives disk allocated size in the last block. It is - * either in fragments or a file system block size */ - if (osize < fs->fs_bsize && osize > 0) { - /* few fragments are already allocated,since the - * current extends beyond this block - * allocate the complete block as fragments are only - * in last block - */ - error = ffs_realloccg(ip, nb, - ffs_blkpref(ip, nb, (int)nb, &ip->i_db[0]), - osize, (int)fs->fs_bsize, cred, &bp); - if (error) - return (error); - /* adjust the inode size we just grew */ - /* it is in nb+1 as nb starts from 0 */ - ip->i_size = (nb + 1) * fs->fs_bsize; - ubc_setsize(vp, (off_t)ip->i_size); - - ip->i_db[nb] = dbtofsb(fs, (ufs_daddr_t)buf_blkno(bp)); - ip->i_flag |= IN_CHANGE | IN_UPDATE; - - if ((flags & B_SYNC) || (!alloc_buffer)) { - if (!alloc_buffer) - buf_setflags(bp, B_NOCACHE); - buf_bwrite(bp); - } else - buf_bdwrite(bp); - /* note that bp is already released here */ - } - } - /* - * The first NDADDR blocks are direct blocks - */ - if (lbn < NDADDR) { - nb = ip->i_db[lbn]; - if (nb != 0 && ip->i_size >= (lbn + 1) * fs->fs_bsize) { - if (alloc_buffer) { - error = (int)buf_bread(vp, (daddr64_t)((unsigned)lbn), fs->fs_bsize, NOCRED, &bp); - if (error) { - buf_brelse(bp); - return (error); - } - *bpp = bp; - } - return (0); - } - if (nb != 0) { - /* - * Consider need to reallocate a fragment. - */ - osize = fragroundup(fs, blkoff(fs, ip->i_size)); - nsize = fragroundup(fs, size); - if (nsize <= osize) { - if (alloc_buffer) { - error = (int)buf_bread(vp, (daddr64_t)((unsigned)lbn), osize, NOCRED, &bp); - if (error) { - buf_brelse(bp); - return (error); - } - ip->i_flag |= IN_CHANGE | IN_UPDATE; - *bpp = bp; - return (0); - } - else { - ip->i_flag |= IN_CHANGE | IN_UPDATE; - return (0); - } - } else { - error = ffs_realloccg(ip, lbn, - ffs_blkpref(ip, lbn, (int)lbn, - &ip->i_db[0]), osize, nsize, cred, &bp); - if (error) - return (error); - ip->i_db[lbn] = dbtofsb(fs, (ufs_daddr_t)buf_blkno(bp)); - ip->i_flag |= IN_CHANGE | IN_UPDATE; - - /* adjust the inode size we just grew */ - ip->i_size = (lbn * fs->fs_bsize) + size; - ubc_setsize(vp, (off_t)ip->i_size); - - if (!alloc_buffer) { - buf_setflags(bp, B_NOCACHE); - if (flags & B_SYNC) - buf_bwrite(bp); - else - buf_bdwrite(bp); - } else - *bpp = bp; - return (0); - - } - } else { - if (ip->i_size < (lbn + 1) * fs->fs_bsize) - nsize = fragroundup(fs, size); - else - nsize = fs->fs_bsize; - error = ffs_alloc(ip, lbn, - ffs_blkpref(ip, lbn, (int)lbn, &ip->i_db[0]), - nsize, cred, &newb); - if (error) - return (error); - if (alloc_buffer) { - bp = buf_getblk(vp, (daddr64_t)((unsigned)lbn), nsize, 0, 0, BLK_WRITE); - buf_setblkno(bp, (daddr64_t)((unsigned)fsbtodb(fs, newb))); - - if (flags & B_CLRBUF) - buf_clear(bp); - } - ip->i_db[lbn] = newb; - ip->i_flag |= IN_CHANGE | IN_UPDATE; - if (blk_alloc) { - *blk_alloc = nsize; - } - if (alloc_buffer) - *bpp = bp; - return (0); - } - } - /* - * Determine the number of levels of indirection. - */ - pref = 0; - if (error = ufs_getlbns(vp, lbn, indirs, &num)) - return(error); -#if DIAGNOSTIC - if (num < 1) - panic ("ffs_balloc: ufs_bmaparray returned indirect block"); -#endif - /* - * Fetch the first indirect block allocating if necessary. - */ - --num; - nb = ip->i_ib[indirs[0].in_off]; - allocib = NULL; - allocblk = allociblk; - if (nb == 0) { - pref = ffs_blkpref(ip, lbn, 0, (ufs_daddr_t *)0); - if (error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize, - cred, &newb)) - return (error); - nb = newb; - *allocblk++ = nb; - bp = buf_getblk(vp, (daddr64_t)((unsigned)(indirs[1].in_lbn)), fs->fs_bsize, 0, 0, BLK_META); - buf_setblkno(bp, (daddr64_t)((unsigned)fsbtodb(fs, nb))); - buf_clear(bp); - /* - * Write synchronously conditional on mount flags. - */ - if ((vp)->v_mount->mnt_flag & MNT_ASYNC) { - error = 0; - buf_bdwrite(bp); - } else if ((error = buf_bwrite(bp)) != 0) { - goto fail; - } - allocib = &ip->i_ib[indirs[0].in_off]; - *allocib = nb; - ip->i_flag |= IN_CHANGE | IN_UPDATE; - } - /* - * Fetch through the indirect blocks, allocating as necessary. - */ - for (i = 1;;) { - error = (int)buf_meta_bread(vp, (daddr64_t)((unsigned)(indirs[i].in_lbn)), (int)fs->fs_bsize, NOCRED, &bp); - if (error) { - buf_brelse(bp); - goto fail; - } - bap = (ufs_daddr_t *)buf_dataptr(bp); -#if REV_ENDIAN_FS - if (rev_endian) - nb = OSSwapInt32(bap[indirs[i].in_off]); - else { -#endif /* REV_ENDIAN_FS */ - nb = bap[indirs[i].in_off]; -#if REV_ENDIAN_FS - } -#endif /* REV_ENDIAN_FS */ - if (i == num) - break; - i += 1; - if (nb != 0) { - buf_brelse(bp); - continue; - } - if (pref == 0) - pref = ffs_blkpref(ip, lbn, 0, (ufs_daddr_t *)0); - if (error = - ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize, cred, &newb)) { - buf_brelse(bp); - goto fail; - } - nb = newb; - *allocblk++ = nb; - nbp = buf_getblk(vp, (daddr64_t)((unsigned)(indirs[i].in_lbn)), fs->fs_bsize, 0, 0, BLK_META); - buf_setblkno(nbp, (daddr64_t)((unsigned)fsbtodb(fs, nb))); - buf_clear(nbp); - /* - * Write synchronously conditional on mount flags. - */ - if ((vp)->v_mount->mnt_flag & MNT_ASYNC) { - error = 0; - buf_bdwrite(nbp); - } else if (error = buf_bwrite(nbp)) { - buf_brelse(bp); - goto fail; - } -#if REV_ENDIAN_FS - if (rev_endian) - bap[indirs[i - 1].in_off] = OSSwapInt32(nb); - else { -#endif /* REV_ENDIAN_FS */ - bap[indirs[i - 1].in_off] = nb; -#if REV_ENDIAN_FS - } -#endif /* REV_ENDIAN_FS */ - /* - * If required, write synchronously, otherwise use - * delayed write. - */ - if (flags & B_SYNC) { - buf_bwrite(bp); - } else { - buf_bdwrite(bp); - } - } - /* - * Get the data block, allocating if necessary. - */ - if (nb == 0) { - pref = ffs_blkpref(ip, lbn, indirs[i].in_off, &bap[0]); - if (error = ffs_alloc(ip, - lbn, pref, (int)fs->fs_bsize, cred, &newb)) { - buf_brelse(bp); - goto fail; - } - nb = newb; - *allocblk++ = nb; -#if REV_ENDIAN_FS - if (rev_endian) - bap[indirs[i].in_off] = OSSwapInt32(nb); - else { -#endif /* REV_ENDIAN_FS */ - bap[indirs[i].in_off] = nb; -#if REV_ENDIAN_FS - } -#endif /* REV_ENDIAN_FS */ - /* - * If required, write synchronously, otherwise use - * delayed write. - */ - if ((flags & B_SYNC)) { - buf_bwrite(bp); - } else { - buf_bdwrite(bp); - } - if(alloc_buffer ) { - nbp = buf_getblk(vp, (daddr64_t)((unsigned)lbn), fs->fs_bsize, 0, 0, BLK_WRITE); - buf_setblkno(nbp, (daddr64_t)((unsigned)fsbtodb(fs, nb))); - - if (flags & B_CLRBUF) - buf_clear(nbp); - } - if (blk_alloc) { - *blk_alloc = fs->fs_bsize; - } - if(alloc_buffer) - *bpp = nbp; - - return (0); - } - buf_brelse(bp); - if (alloc_buffer) { - if (flags & B_CLRBUF) { - error = (int)buf_bread(vp, (daddr64_t)((unsigned)lbn), (int)fs->fs_bsize, NOCRED, &nbp); - if (error) { - buf_brelse(nbp); - goto fail; - } - } else { - nbp = buf_getblk(vp, (daddr64_t)((unsigned)lbn), fs->fs_bsize, 0, 0, BLK_WRITE); - buf_setblkno(nbp, (daddr64_t)((unsigned)fsbtodb(fs, nb))); - } - *bpp = nbp; - } - return (0); -fail: - /* - * If we have failed part way through block allocation, we - * have to deallocate any indirect blocks that we have allocated. - */ - for (deallocated = 0, blkp = allociblk; blkp < allocblk; blkp++) { - ffs_blkfree(ip, *blkp, fs->fs_bsize); - deallocated += fs->fs_bsize; - } - if (allocib != NULL) - *allocib = 0; - if (deallocated) { - devBlockSize = vfs_devblocksize(mp); -#if QUOTA - /* - * Restore user's disk quota because allocation failed. - */ - (void) chkdq(ip, (int64_t)-deallocated, cred, FORCE); -#endif /* QUOTA */ - ip->i_blocks -= btodb(deallocated, devBlockSize); - ip->i_flag |= IN_CHANGE | IN_UPDATE; - } - return (error); -} - -/* - * ffs_blkalloc allocates a disk block for ffs_pageout(), as a consequence - * it does no buf_breads (that could lead to deadblock as the page may be already - * marked busy as it is being paged out. Also important to note that we are not - * growing the file in pageouts. So ip->i_size cannot increase by this call - * due to the way UBC works. - * This code is derived from ffs_balloc and many cases of that are dealt - * in ffs_balloc are not applicable here - * Do not call with B_CLRBUF flags as this should only be called only - * from pageouts - */ -ffs_blkalloc( - struct inode *ip, - ufs_daddr_t lbn, - int size, - kauth_cred_t cred, - int flags) -{ - register struct fs *fs; - register ufs_daddr_t nb; - struct buf *bp, *nbp; - struct vnode *vp = ITOV(ip); - struct indir indirs[NIADDR + 2]; - ufs_daddr_t newb, *bap, pref; - int deallocated, osize, nsize, num, i, error; - ufs_daddr_t *allocib, *blkp, *allocblk, allociblk[NIADDR + 1]; - int devBlockSize=0; - struct mount *mp=vp->v_mount; -#if REV_ENDIAN_FS - int rev_endian=(mp->mnt_flag & MNT_REVEND); -#endif /* REV_ENDIAN_FS */ - - fs = ip->i_fs; - - if(size > fs->fs_bsize) - panic("ffs_blkalloc: too large for allocation"); - - /* - * If the next write will extend the file into a new block, - * and the file is currently composed of a fragment - * this fragment has to be extended to be a full block. - */ - nb = lblkno(fs, ip->i_size); - if (nb < NDADDR && nb < lbn) { - panic("ffs_blkalloc():cannot extend file: i_size %d, lbn %d", ip->i_size, lbn); - } - /* - * The first NDADDR blocks are direct blocks - */ - if (lbn < NDADDR) { - nb = ip->i_db[lbn]; - if (nb != 0 && ip->i_size >= (lbn + 1) * fs->fs_bsize) { - /* TBD: trivial case; the block is already allocated */ - return (0); - } - if (nb != 0) { - /* - * Consider need to reallocate a fragment. - */ - osize = fragroundup(fs, blkoff(fs, ip->i_size)); - nsize = fragroundup(fs, size); - if (nsize > osize) { - panic("ffs_allocblk: trying to extend a fragment"); - } - return(0); - } else { - if (ip->i_size < (lbn + 1) * fs->fs_bsize) - nsize = fragroundup(fs, size); - else - nsize = fs->fs_bsize; - error = ffs_alloc(ip, lbn, - ffs_blkpref(ip, lbn, (int)lbn, &ip->i_db[0]), - nsize, cred, &newb); - if (error) - return (error); - ip->i_db[lbn] = newb; - ip->i_flag |= IN_CHANGE | IN_UPDATE; - return (0); - } - } - /* - * Determine the number of levels of indirection. - */ - pref = 0; - if (error = ufs_getlbns(vp, lbn, indirs, &num)) - return(error); - - if(num == 0) { - panic("ffs_blkalloc: file with direct blocks only"); - } - - /* - * Fetch the first indirect block allocating if necessary. - */ - --num; - nb = ip->i_ib[indirs[0].in_off]; - allocib = NULL; - allocblk = allociblk; - if (nb == 0) { - pref = ffs_blkpref(ip, lbn, 0, (ufs_daddr_t *)0); - if (error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize, - cred, &newb)) - return (error); - nb = newb; - *allocblk++ = nb; - bp = buf_getblk(vp, (daddr64_t)((unsigned)(indirs[1].in_lbn)), fs->fs_bsize, 0, 0, BLK_META); - buf_setblkno(bp, (daddr64_t)((unsigned)fsbtodb(fs, nb))); - buf_clear(bp); - /* - * Write synchronously conditional on mount flags. - */ - if ((vp)->v_mount->mnt_flag & MNT_ASYNC) { - error = 0; - buf_bdwrite(bp); - } else if (error = buf_bwrite(bp)) { - goto fail; - } - allocib = &ip->i_ib[indirs[0].in_off]; - *allocib = nb; - ip->i_flag |= IN_CHANGE | IN_UPDATE; - } - /* - * Fetch through the indirect blocks, allocating as necessary. - */ - for (i = 1;;) { - error = (int)buf_meta_bread(vp, (daddr64_t)((unsigned)(indirs[i].in_lbn)), (int)fs->fs_bsize, NOCRED, &bp); - if (error) { - buf_brelse(bp); - goto fail; - } - bap = (ufs_daddr_t *)buf_dataptr(bp); -#if REV_ENDIAN_FS - if (rev_endian) - nb = OSSwapInt32(bap[indirs[i].in_off]); - else { -#endif /* REV_ENDIAN_FS */ - nb = bap[indirs[i].in_off]; -#if REV_ENDIAN_FS - } -#endif /* REV_ENDIAN_FS */ - if (i == num) - break; - i += 1; - if (nb != 0) { - buf_brelse(bp); - continue; - } - if (pref == 0) - pref = ffs_blkpref(ip, lbn, 0, (ufs_daddr_t *)0); - if (error = - ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize, cred, &newb)) { - buf_brelse(bp); - goto fail; - } - nb = newb; - *allocblk++ = nb; - nbp = buf_getblk(vp, (daddr64_t)((unsigned)(indirs[i].in_lbn)), fs->fs_bsize, 0, 0, BLK_META); - buf_setblkno(nbp, (daddr64_t)((unsigned)fsbtodb(fs, nb))); - buf_clear(nbp); - /* - * Write synchronously conditional on mount flags. - */ - if ((vp)->v_mount->mnt_flag & MNT_ASYNC) { - error = 0; - buf_bdwrite(nbp); - } else if (error = buf_bwrite(nbp)) { - buf_brelse(bp); - goto fail; - } -#if REV_ENDIAN_FS - if (rev_endian) - bap[indirs[i - 1].in_off] = OSSwapInt32(nb); - else { -#endif /* REV_ENDIAN_FS */ - bap[indirs[i - 1].in_off] = nb; -#if REV_ENDIAN_FS - } -#endif /* REV_ENDIAN_FS */ - /* - * If required, write synchronously, otherwise use - * delayed write. - */ - if (flags & B_SYNC) { - buf_bwrite(bp); - } else { - buf_bdwrite(bp); - } - } - /* - * Get the data block, allocating if necessary. - */ - if (nb == 0) { - pref = ffs_blkpref(ip, lbn, indirs[i].in_off, &bap[0]); - if (error = ffs_alloc(ip, - lbn, pref, (int)fs->fs_bsize, cred, &newb)) { - buf_brelse(bp); - goto fail; - } - nb = newb; - *allocblk++ = nb; -#if REV_ENDIAN_FS - if (rev_endian) - bap[indirs[i].in_off] = OSSwapInt32(nb); - else { -#endif /* REV_ENDIAN_FS */ - bap[indirs[i].in_off] = nb; -#if REV_ENDIAN_FS - } -#endif /* REV_ENDIAN_FS */ - /* - * If required, write synchronously, otherwise use - * delayed write. - */ - if (flags & B_SYNC) { - buf_bwrite(bp); - } else { - buf_bdwrite(bp); - } - return (0); - } - buf_brelse(bp); - return (0); -fail: - /* - * If we have failed part way through block allocation, we - * have to deallocate any indirect blocks that we have allocated. - */ - for (deallocated = 0, blkp = allociblk; blkp < allocblk; blkp++) { - ffs_blkfree(ip, *blkp, fs->fs_bsize); - deallocated += fs->fs_bsize; - } - if (allocib != NULL) - *allocib = 0; - if (deallocated) { - devBlockSize = vfs_devblocksize(mp); -#if QUOTA - /* - * Restore user's disk quota because allocation failed. - */ - (void) chkdq(ip, (int64_t)-deallocated, cred, FORCE); -#endif /* QUOTA */ - ip->i_blocks -= btodb(deallocated, devBlockSize); - ip->i_flag |= IN_CHANGE | IN_UPDATE; - } - return (error); -} diff --git a/bsd/ufs/ffs/ffs_extern.h b/bsd/ufs/ffs/ffs_extern.h deleted file mode 100644 index 840263068..000000000 --- a/bsd/ufs/ffs/ffs_extern.h +++ /dev/null @@ -1,160 +0,0 @@ -/* - * Copyright (c) 2000-2002 Apple Computer, Inc. All rights reserved. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ - * - * This file contains Original Code and/or Modifications of Original Code - * as defined in and that are subject to the Apple Public Source License - * Version 2.0 (the 'License'). You may not use this file except in - * compliance with the License. The rights granted to you under the License - * may not be used to create, or enable the creation or redistribution of, - * unlawful or unlicensed copies of an Apple operating system, or to - * circumvent, violate, or enable the circumvention or violation of, any - * terms of an Apple operating system software license agreement. - * - * Please obtain a copy of the License at - * http://www.opensource.apple.com/apsl/ and read it before using this file. - * - * The Original Code and all software distributed under the License are - * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER - * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, - * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. - * Please see the License for the specific language governing rights and - * limitations under the License. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ - */ -/* Copyright (c) 1995 NeXT Computer, Inc. All Rights Reserved */ - -/*- - * Copyright (c) 1991, 1993, 1994 - * The Regents of the University of California. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. All advertising materials mentioning features or use of this software - * must display the following acknowledgement: - * This product includes software developed by the University of - * California, Berkeley and its contributors. - * 4. Neither the name of the University nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - * @(#)ffs_extern.h 8.6 (Berkeley) 3/30/95 - */ -#ifndef __UFS_FFS_FFS_EXTERN_H__ -#define __UFS_FFS_FFS_EXTERN_H__ - -#include - -#ifdef __APPLE_API_UNSTABLE -/* - * Sysctl values for the fast filesystem. - */ -#define FFS_CLUSTERREAD 1 /* cluster reading enabled */ -#define FFS_CLUSTERWRITE 2 /* cluster writing enabled */ -#define FFS_REALLOCBLKS 3 /* block reallocation enabled */ -#define FFS_ASYNCFREE 4 /* asynchronous block freeing enabled */ -#define FFS_MAXID 5 /* number of valid ffs ids */ - -#define FFS_NAMES { \ - { 0, 0 }, \ - { "doclusterread", CTLTYPE_INT }, \ - { "doclusterwrite", CTLTYPE_INT }, \ - { "doreallocblks", CTLTYPE_INT }, \ - { "doasyncfree", CTLTYPE_INT }, \ -} -#endif /* __APPLE_API_UNSTABLE */ - -struct buf; -struct fs; -struct inode; -struct mount; -struct nameidata; -struct proc; -struct vfsstatfs; -struct timeval; -#ifndef _KAUTH_CRED_T -#define _KAUTH_CRED_T -struct ucred; -typedef struct ucred *kauth_cred_t; -#endif /* !_KAUTH_CRED_T */ -struct uio; -struct vnode; -struct mbuf; -struct vfsconf; - -#ifdef __APPLE_API_PRIVATE -__BEGIN_DECLS -int ffs_fsync_internal(vnode_t, int); - -int ffs_blkatoff(vnode_t, off_t, char **, buf_t *); - -int ffs_alloc(struct inode *, - ufs_daddr_t, ufs_daddr_t, int, kauth_cred_t, ufs_daddr_t *); -int ffs_balloc(struct inode *, - ufs_daddr_t, int, kauth_cred_t, struct buf **, int, int *); -void ffs_blkfree(struct inode *, ufs_daddr_t, long); -ufs_daddr_t ffs_blkpref(struct inode *, ufs_daddr_t, int, ufs_daddr_t *); -void ffs_clrblock(struct fs *, u_char *, ufs_daddr_t); -int ffs_fhtovp(struct mount *, int, unsigned char *, struct vnode **, vfs_context_t); -void ffs_fragacct(struct fs *, int, int32_t [], int); -int ffs_fsync(struct vnop_fsync_args *); -int ffs_init(struct vfsconf *); -int ffs_isblock(struct fs *, u_char *, ufs_daddr_t); -int ffs_mount(struct mount *, vnode_t , user_addr_t, vfs_context_t); -int ffs_mountfs(struct vnode *, struct mount *, vfs_context_t); -int ffs_mountroot(mount_t, vnode_t, vfs_context_t); -int ffs_read(struct vnop_read_args *); -int ffs_realloccg(struct inode *, - ufs_daddr_t, ufs_daddr_t, int, int, kauth_cred_t, struct buf **); -int ffs_reclaim(struct vnop_reclaim_args *); -void ffs_setblock(struct fs *, u_char *, ufs_daddr_t); -int ffs_vfs_getattr(struct mount *, struct vfs_attr *, vfs_context_t); -int ffs_vfs_setattr(struct mount *, struct vfs_attr *, vfs_context_t); -int ffs_sync(struct mount *, int, vfs_context_t); -int ffs_sysctl(int *, u_int, user_addr_t, size_t *, user_addr_t, size_t, vfs_context_t); -int ffs_unmount(struct mount *, int, vfs_context_t); -int ffs_update(struct vnode *, struct timeval *, struct timeval *, int); -int ffs_valloc(vnode_t dvp, mode_t mode, kauth_cred_t cred, vnode_t *vpp); -int ffs_vfree(struct vnode *vp, ino_t ino, int mode); -int ffs_vget(struct mount *, ino64_t, struct vnode **, vfs_context_t); -int ffs_vptofh(struct vnode *, int *, unsigned char *, vfs_context_t); -int ffs_write(struct vnop_write_args *); -int ffs_pagein(struct vnop_pagein_args *); -int ffs_pageout(struct vnop_pageout_args *); -int ffs_blktooff(struct vnop_blktooff_args *); -int ffs_offtoblk(struct vnop_offtoblk_args *); - -__END_DECLS - -extern int (**ffs_vnodeop_p)(void *); -extern int (**ffs_specop_p)(void *); -#if FIFO -extern int (**ffs_fifoop_p)(void *); -#define FFS_FIFOOPS ffs_fifoop_p -#else -#define FFS_FIFOOPS NULL -#endif - -#endif /* __APPLE_API_PRIVATE */ -#endif /* __UFS_FFS_FFS_EXTERN_H__ */ diff --git a/bsd/ufs/ffs/ffs_inode.c b/bsd/ufs/ffs/ffs_inode.c deleted file mode 100644 index 19342d55b..000000000 --- a/bsd/ufs/ffs/ffs_inode.c +++ /dev/null @@ -1,597 +0,0 @@ -/* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ - * - * This file contains Original Code and/or Modifications of Original Code - * as defined in and that are subject to the Apple Public Source License - * Version 2.0 (the 'License'). You may not use this file except in - * compliance with the License. The rights granted to you under the License - * may not be used to create, or enable the creation or redistribution of, - * unlawful or unlicensed copies of an Apple operating system, or to - * circumvent, violate, or enable the circumvention or violation of, any - * terms of an Apple operating system software license agreement. - * - * Please obtain a copy of the License at - * http://www.opensource.apple.com/apsl/ and read it before using this file. - * - * The Original Code and all software distributed under the License are - * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER - * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, - * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. - * Please see the License for the specific language governing rights and - * limitations under the License. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ - */ -/* Copyright (c) 1995 NeXT Computer, Inc. All Rights Reserved */ -/* - * Copyright (c) 1982, 1986, 1989, 1993 - * The Regents of the University of California. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. All advertising materials mentioning features or use of this software - * must display the following acknowledgement: - * This product includes software developed by the University of - * California, Berkeley and its contributors. - * 4. Neither the name of the University nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - * @(#)ffs_inode.c 8.13 (Berkeley) 4/21/95 - */ - -#include -#include - -#include -#include -#include -#include /* for accessing p_stats */ -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include - -#include -#include -#include -#include - -#include -#include - -#if REV_ENDIAN_FS -#include -#include -#endif /* REV_ENDIAN_FS */ -#include - -static int ffs_indirtrunc(struct inode *, ufs_daddr_t, ufs_daddr_t, - ufs_daddr_t, int, long *); - -/* - * Update the access, modified, and inode change times as specified by the - * IACCESS, IUPDATE, and ICHANGE flags respectively. The IMODIFIED flag is - * used to specify that the inode needs to be updated but that the times have - * already been set. The access and modified times are taken from the second - * and third parameters; the inode change time is always taken from the current - * time. If waitfor is set, then wait for the disk write of the inode to - * complete. - */ -int -ffs_update(struct vnode *vp, struct timeval *access, struct timeval *modify, int waitfor) -{ - register struct fs *fs; - struct buf *bp; - struct inode *ip; - struct timeval tv; - errno_t error; -#if REV_ENDIAN_FS - struct mount *mp=(vp)->v_mount; - int rev_endian=(mp->mnt_flag & MNT_REVEND); -#endif /* REV_ENDIAN_FS */ - - ip = VTOI(vp); - if (vp->v_mount->mnt_flag & MNT_RDONLY) { - ip->i_flag &= - ~(IN_ACCESS | IN_CHANGE | IN_MODIFIED | IN_UPDATE); - return (0); - } - if ((ip->i_flag & - (IN_ACCESS | IN_CHANGE | IN_MODIFIED | IN_UPDATE)) == 0) - return (0); - if (ip->i_flag & IN_ACCESS) - ip->i_atime = access->tv_sec; - if (ip->i_flag & IN_UPDATE) { - ip->i_mtime = modify->tv_sec; - ip->i_modrev++; - } - if (ip->i_flag & IN_CHANGE) { - microtime(&tv); - ip->i_ctime = tv.tv_sec; - } - ip->i_flag &= ~(IN_ACCESS | IN_CHANGE | IN_MODIFIED | IN_UPDATE); - fs = ip->i_fs; - /* - * Ensure that uid and gid are correct. This is a temporary - * fix until fsck has been changed to do the update. - */ - if (fs->fs_inodefmt < FS_44INODEFMT) { /* XXX */ - ip->i_din.di_ouid = ip->i_uid; /* XXX */ - ip->i_din.di_ogid = ip->i_gid; /* XXX */ - } /* XXX */ - if (error = buf_bread(ip->i_devvp, - (daddr64_t)((unsigned)fsbtodb(fs, ino_to_fsba(fs, ip->i_number))), - (int)fs->fs_bsize, NOCRED, &bp)) { - buf_brelse(bp); - return ((int)error); - } -#if REV_ENDIAN_FS - if (rev_endian) - byte_swap_inode_out(ip, ((struct dinode *)buf_dataptr(bp) + ino_to_fsbo(fs, ip->i_number))); - else { -#endif /* REV_ENDIAN_FS */ - *((struct dinode *)buf_dataptr(bp) + ino_to_fsbo(fs, ip->i_number)) = ip->i_din; -#if REV_ENDIAN_FS - } -#endif /* REV_ENDIAN_FS */ - - if (waitfor && (vp->v_mount->mnt_flag & MNT_ASYNC) == 0) - return ((int)buf_bwrite(bp)); - else { - buf_bdwrite(bp); - return (0); - } -} - - -#define SINGLE 0 /* index of single indirect block */ -#define DOUBLE 1 /* index of double indirect block */ -#define TRIPLE 2 /* index of triple indirect block */ - -int -ffs_truncate_internal(vnode_t ovp, off_t length, int flags, ucred_t cred) -{ - struct inode *oip; - struct fs *fs; - ufs_daddr_t lastblock; - ufs_daddr_t bn, lbn, lastiblock[NIADDR], indir_lbn[NIADDR]; - ufs_daddr_t oldblks[NDADDR + NIADDR], newblks[NDADDR + NIADDR]; - buf_t bp; - int offset, size, level, i; - long count, nblocks, vflags, blocksreleased = 0; - struct timeval tv; - int aflags, error, allerror; - off_t osize; - int devBlockSize=0; -#if QUOTA - int64_t change; /* in bytes */ -#endif /* QUOTA */ - - if (length < 0) - return (EINVAL); - - oip = VTOI(ovp); - fs = oip->i_fs; - - if (length > fs->fs_maxfilesize) - return (EFBIG); - - microtime(&tv); - if (ovp->v_type == VLNK && - oip->i_size < ovp->v_mount->mnt_maxsymlinklen) { -#if DIAGNOSTIC - if (length != 0) - panic("ffs_truncate: partial truncate of symlink"); -#endif - bzero((char *)&oip->i_shortlink, (u_int)oip->i_size); - oip->i_size = 0; - oip->i_flag |= IN_CHANGE | IN_UPDATE; - return (ffs_update(ovp, &tv, &tv, 1)); - } - - if (oip->i_size == length) { - oip->i_flag |= IN_CHANGE | IN_UPDATE; - return (ffs_update(ovp, &tv, &tv, 0)); - } -#if QUOTA - if (error = getinoquota(oip)) - return (error); -#endif - osize = oip->i_size; - - /* - * Lengthen the size of the file. We must ensure that the - * last byte of the file is allocated. Since the smallest - * value of osize is 0, length will be at least 1. - */ - if (osize < length) { - offset = blkoff(fs, length - 1); - lbn = lblkno(fs, length - 1); - aflags = B_CLRBUF; - if (flags & IO_SYNC) - aflags |= B_SYNC; - if (error = ffs_balloc(oip, lbn, offset + 1, cred, &bp, aflags, 0)) - return (error); - oip->i_size = length; - - if (UBCINFOEXISTS(ovp)) { - buf_markinvalid(bp); - buf_bwrite(bp); - ubc_setsize(ovp, (off_t)length); - } else { - if (aflags & B_SYNC) - buf_bwrite(bp); - else - buf_bawrite(bp); - } - oip->i_flag |= IN_CHANGE | IN_UPDATE; - return (ffs_update(ovp, &tv, &tv, 1)); - } - /* - * Shorten the size of the file. If the file is not being - * truncated to a block boundry, the contents of the - * partial block following the end of the file must be - * zero'ed in case it ever become accessable again because - * of subsequent file growth. - */ - if (UBCINFOEXISTS(ovp)) - ubc_setsize(ovp, (off_t)length); - - vflags = ((length > 0) ? BUF_WRITE_DATA : 0) | BUF_SKIP_META; - - if (vflags & BUF_WRITE_DATA) - ffs_fsync_internal(ovp, MNT_WAIT); - allerror = buf_invalidateblks(ovp, vflags, 0, 0); - - offset = blkoff(fs, length); - if (offset == 0) { - oip->i_size = length; - } else { - lbn = lblkno(fs, length); - aflags = B_CLRBUF; - if (flags & IO_SYNC) - aflags |= B_SYNC; - if (error = ffs_balloc(oip, lbn, offset, cred, &bp, aflags, 0)) - return (error); - oip->i_size = length; - size = blksize(fs, oip, lbn); - bzero((char *)buf_dataptr(bp) + offset, (u_int)(size - offset)); - allocbuf(bp, size); - if (UBCINFOEXISTS(ovp)) { - buf_markinvalid(bp); - buf_bwrite(bp); - } else { - if (aflags & B_SYNC) - buf_bwrite(bp); - else - buf_bawrite(bp); - } - } - /* - * Calculate index into inode's block list of - * last direct and indirect blocks (if any) - * which we want to keep. Lastblock is -1 when - * the file is truncated to 0. - */ - lastblock = lblkno(fs, length + fs->fs_bsize - 1) - 1; - lastiblock[SINGLE] = lastblock - NDADDR; - lastiblock[DOUBLE] = lastiblock[SINGLE] - NINDIR(fs); - lastiblock[TRIPLE] = lastiblock[DOUBLE] - NINDIR(fs) * NINDIR(fs); - - devBlockSize = vfs_devblocksize(vnode_mount(ovp)); - nblocks = btodb(fs->fs_bsize, devBlockSize); - - /* - * Update file and block pointers on disk before we start freeing - * blocks. If we crash before free'ing blocks below, the blocks - * will be returned to the free list. lastiblock values are also - * normalized to -1 for calls to ffs_indirtrunc below. - */ - bcopy((caddr_t)&oip->i_db[0], (caddr_t)oldblks, sizeof oldblks); - for (level = TRIPLE; level >= SINGLE; level--) - if (lastiblock[level] < 0) { - oip->i_ib[level] = 0; - lastiblock[level] = -1; - } - for (i = NDADDR - 1; i > lastblock; i--) - oip->i_db[i] = 0; - oip->i_flag |= IN_CHANGE | IN_UPDATE; - if (error = ffs_update(ovp, &tv, &tv, MNT_WAIT)) - allerror = error; - /* - * Having written the new inode to disk, save its new configuration - * and put back the old block pointers long enough to process them. - * Note that we save the new block configuration so we can check it - * when we are done. - */ - bcopy((caddr_t)&oip->i_db[0], (caddr_t)newblks, sizeof newblks); - bcopy((caddr_t)oldblks, (caddr_t)&oip->i_db[0], sizeof oldblks); - oip->i_size = osize; - - vflags = ((length > 0) ? BUF_WRITE_DATA : 0) | BUF_SKIP_META; - - if (vflags & BUF_WRITE_DATA) - ffs_fsync_internal(ovp, MNT_WAIT); - allerror = buf_invalidateblks(ovp, vflags, 0, 0); - - /* - * Indirect blocks first. - */ - indir_lbn[SINGLE] = -NDADDR; - indir_lbn[DOUBLE] = indir_lbn[SINGLE] - NINDIR(fs) - 1; - indir_lbn[TRIPLE] = indir_lbn[DOUBLE] - NINDIR(fs) * NINDIR(fs) - 1; - for (level = TRIPLE; level >= SINGLE; level--) { - bn = oip->i_ib[level]; - if (bn != 0) { - error = ffs_indirtrunc(oip, indir_lbn[level], - fsbtodb(fs, bn), lastiblock[level], level, &count); - if (error) - allerror = error; - blocksreleased += count; - if (lastiblock[level] < 0) { - oip->i_ib[level] = 0; - ffs_blkfree(oip, bn, fs->fs_bsize); - blocksreleased += nblocks; - } - } - if (lastiblock[level] >= 0) - goto done; - } - - /* - * All whole direct blocks or frags. - */ - for (i = NDADDR - 1; i > lastblock; i--) { - register long bsize; - - bn = oip->i_db[i]; - if (bn == 0) - continue; - oip->i_db[i] = 0; - bsize = blksize(fs, oip, i); - ffs_blkfree(oip, bn, bsize); - blocksreleased += btodb(bsize, devBlockSize); - } - if (lastblock < 0) - goto done; - - /* - * Finally, look for a change in size of the - * last direct block; release any frags. - */ - bn = oip->i_db[lastblock]; - if (bn != 0) { - long oldspace, newspace; - - /* - * Calculate amount of space we're giving - * back as old block size minus new block size. - */ - oldspace = blksize(fs, oip, lastblock); - oip->i_size = length; - newspace = blksize(fs, oip, lastblock); - if (newspace == 0) - panic("itrunc: newspace"); - if (oldspace - newspace > 0) { - /* - * Block number of space to be free'd is - * the old block # plus the number of frags - * required for the storage we're keeping. - */ - bn += numfrags(fs, newspace); - ffs_blkfree(oip, bn, oldspace - newspace); - blocksreleased += btodb(oldspace - newspace, devBlockSize); - } - } -done: -#if DIAGNOSTIC - for (level = SINGLE; level <= TRIPLE; level++) - if (newblks[NDADDR + level] != oip->i_ib[level]) - panic("itrunc1"); - for (i = 0; i < NDADDR; i++) - if (newblks[i] != oip->i_db[i]) - panic("itrunc2"); - if (length == 0 && - (vnode_hasdirtyblks(ovp) || vnode_hascleanblks(ovp))) - panic("itrunc3"); -#endif /* DIAGNOSTIC */ - /* - * Put back the real size. - */ - oip->i_size = length; - oip->i_blocks -= blocksreleased; - if (oip->i_blocks < 0) /* sanity */ - oip->i_blocks = 0; - oip->i_flag |= IN_CHANGE; -#if QUOTA - change = dbtob((int64_t)blocksreleased,devBlockSize); - (void) chkdq(oip, -change, NOCRED, 0); -#endif - return (allerror); -} - -/* - * Release blocks associated with the inode ip and stored in the indirect - * block bn. Blocks are free'd in LIFO order up to (but not including) - * lastbn. If level is greater than SINGLE, the block is an indirect block - * and recursive calls to indirtrunc must be used to cleanse other indirect - * blocks. - * - * NB: triple indirect blocks are untested. - */ -static int -ffs_indirtrunc(ip, lbn, dbn, lastbn, level, countp) - register struct inode *ip; - ufs_daddr_t lbn, lastbn; - ufs_daddr_t dbn; - int level; - long *countp; -{ - register int i; - struct buf *bp; - struct buf *tbp; - register struct fs *fs = ip->i_fs; - register ufs_daddr_t *bap; - struct vnode *vp=ITOV(ip); - ufs_daddr_t *copy, nb, nlbn, last; - long blkcount, factor; - int nblocks, blocksreleased = 0; - errno_t error = 0, allerror = 0; - int devBlockSize=0; - struct mount *mp=vp->v_mount; -#if REV_ENDIAN_FS - int rev_endian=(mp->mnt_flag & MNT_REVEND); -#endif /* REV_ENDIAN_FS */ - - /* - * Calculate index in current block of last - * block to be kept. -1 indicates the entire - * block so we need not calculate the index. - */ - factor = 1; - for (i = SINGLE; i < level; i++) - factor *= NINDIR(fs); - last = lastbn; - if (lastbn > 0) - last /= factor; - - devBlockSize = vfs_devblocksize(mp); - nblocks = btodb(fs->fs_bsize, devBlockSize); - - /* Doing a MALLOC here is asking for trouble. We can still - * deadlock on pagerfile lock, in case we are running - * low on memory and block in MALLOC - */ - - tbp = buf_geteblk(fs->fs_bsize); - copy = (ufs_daddr_t *)buf_dataptr(tbp); - - /* - * Get buffer of block pointers, zero those entries corresponding - * to blocks to be free'd, and update on disk copy first. Since - * double(triple) indirect before single(double) indirect, calls - * to bmap on these blocks will fail. However, we already have - * the on disk address, so we have to set the blkno field - * explicitly instead of letting buf_bread do everything for us. - */ - - vp = ITOV(ip); - bp = buf_getblk(vp, (daddr64_t)((unsigned)lbn), (int)fs->fs_bsize, 0, 0, BLK_META); - - if (buf_valid(bp)) { - /* Braces must be here in case trace evaluates to nothing. */ - trace(TR_BREADHIT, pack(vp, fs->fs_bsize), lbn); - } else { - trace(TR_BREADMISS, pack(vp, fs->fs_bsize), lbn); - OSIncrementAtomic(¤t_proc()->p_stats->p_ru.ru_inblock); /* pay for read */ - buf_setflags(bp, B_READ); - if (buf_count(bp) > buf_size(bp)) - panic("ffs_indirtrunc: bad buffer size"); - buf_setblkno(bp, (daddr64_t)((unsigned)dbn)); - VNOP_STRATEGY(bp); - error = buf_biowait(bp); - } - if (error) { - buf_brelse(bp); - *countp = 0; - buf_brelse(tbp); - return ((int)error); - } - - bap = (ufs_daddr_t *)buf_dataptr(bp); - bcopy((caddr_t)bap, (caddr_t)copy, (u_int)fs->fs_bsize); - bzero((caddr_t)&bap[last + 1], - (u_int)(NINDIR(fs) - (last + 1)) * sizeof (ufs_daddr_t)); - if (last == -1) - buf_markinvalid(bp); - if (last != -1 && (vp)->v_mount->mnt_flag & MNT_ASYNC) { - error = 0; - buf_bdwrite(bp); - } else { - error = buf_bwrite(bp); - if (error) - allerror = error; - } - bap = copy; - - /* - * Recursively free totally unused blocks. - */ - for (i = NINDIR(fs) - 1, nlbn = lbn + 1 - i * factor; i > last; - i--, nlbn += factor) { -#if REV_ENDIAN_FS - if (rev_endian) - nb = OSSwapInt32(bap[i]); - else { -#endif /* REV_ENDIAN_FS */ - nb = bap[i]; -#if REV_ENDIAN_FS - } -#endif /* REV_ENDIAN_FS */ - if (nb == 0) - continue; - if (level > SINGLE) { - if (error = ffs_indirtrunc(ip, nlbn, fsbtodb(fs, nb), - (ufs_daddr_t)-1, level - 1, &blkcount)) - allerror = error; - blocksreleased += blkcount; - } - ffs_blkfree(ip, nb, fs->fs_bsize); - blocksreleased += nblocks; - } - - /* - * Recursively free last partial block. - */ - if (level > SINGLE && lastbn >= 0) { - last = lastbn % factor; -#if REV_ENDIAN_FS - if (rev_endian) - nb = OSSwapInt32(bap[i]); - else { -#endif /* REV_ENDIAN_FS */ - nb = bap[i]; -#if REV_ENDIAN_FS - } -#endif /* REV_ENDIAN_FS */ - if (nb != 0) { - if (error = ffs_indirtrunc(ip, nlbn, fsbtodb(fs, nb), - last, level - 1, &blkcount)) - allerror = error; - blocksreleased += blkcount; - } - } - buf_brelse(tbp); - *countp = blocksreleased; - return ((int)allerror); -} - diff --git a/bsd/ufs/ffs/ffs_subr.c b/bsd/ufs/ffs/ffs_subr.c deleted file mode 100644 index d226a5680..000000000 --- a/bsd/ufs/ffs/ffs_subr.c +++ /dev/null @@ -1,250 +0,0 @@ -/* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ - * - * This file contains Original Code and/or Modifications of Original Code - * as defined in and that are subject to the Apple Public Source License - * Version 2.0 (the 'License'). You may not use this file except in - * compliance with the License. The rights granted to you under the License - * may not be used to create, or enable the creation or redistribution of, - * unlawful or unlicensed copies of an Apple operating system, or to - * circumvent, violate, or enable the circumvention or violation of, any - * terms of an Apple operating system software license agreement. - * - * Please obtain a copy of the License at - * http://www.opensource.apple.com/apsl/ and read it before using this file. - * - * The Original Code and all software distributed under the License are - * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER - * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, - * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. - * Please see the License for the specific language governing rights and - * limitations under the License. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ - */ -/* Copyright (c) 1995 NeXT Computer, Inc. All Rights Reserved */ -/* - * Copyright (c) 1982, 1986, 1989, 1993 - * The Regents of the University of California. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. All advertising materials mentioning features or use of this software - * must display the following acknowledgement: - * This product includes software developed by the University of - * California, Berkeley and its contributors. - * 4. Neither the name of the University nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - * @(#)ffs_subr.c 8.5 (Berkeley) 3/21/95 - */ - -#include -#include -#if REV_ENDIAN_FS -#include -#endif /* REV_ENDIAN_FS */ - -#ifndef KERNEL -#include -#include -#else - -#include -#include -#include -#include -#include -#include -#include -#include -#if REV_ENDIAN_FS -#include -#endif /* REV_ENDIAN_FS */ - -/* - * Return buffer with the contents of block "offset" from the beginning of - * directory "ip". If "res" is non-zero, fill it in with a pointer to the - * remaining space in the directory. - */ -__private_extern__ -int -ffs_blkatoff(vnode_t vp, off_t offset, char **res, buf_t *bpp) -{ - struct inode *ip; - register struct fs *fs; - struct buf *bp; - ufs_daddr_t lbn; - int bsize, error; -#if REV_ENDIAN_FS - struct mount *mp = vnode_mount(vp); - int rev_endian=(mp->mnt_flag & MNT_REVEND); -#endif /* REV_ENDIAN_FS */ - - ip = VTOI(vp); - fs = ip->i_fs; - lbn = lblkno(fs, offset); - bsize = blksize(fs, ip, lbn); - - *bpp = NULL; - if (error = (int)buf_bread(vp, (daddr64_t)((unsigned)lbn), bsize, NOCRED, &bp)) { - buf_brelse(bp); - return (error); - } -#if REV_ENDIAN_FS - if (rev_endian) - byte_swap_dir_block_in((char *)buf_dataptr(bp), buf_count(bp)); -#endif /* REV_ENDIAN_FS */ - - if (res) - *res = (char *)buf_dataptr(bp) + blkoff(fs, offset); - *bpp = bp; - return (0); -} -#endif - -/* - * Update the frsum fields to reflect addition or deletion - * of some frags. - */ -void -ffs_fragacct(fs, fragmap, fraglist, cnt) - struct fs *fs; - int fragmap; - int32_t fraglist[]; - int cnt; -{ - int inblk; - register int field, subfield; - register int siz, pos; - - inblk = (int)(fragtbl[fs->fs_frag][fragmap]) << 1; - fragmap <<= 1; - for (siz = 1; siz < fs->fs_frag; siz++) { - if ((inblk & (1 << (siz + (fs->fs_frag % NBBY)))) == 0) - continue; - field = around[siz]; - subfield = inside[siz]; - for (pos = siz; pos <= fs->fs_frag; pos++) { - if ((fragmap & field) == subfield) { - fraglist[siz] += cnt; - pos += siz; - field <<= siz; - subfield <<= siz; - } - field <<= 1; - subfield <<= 1; - } - } -} - -/* - * block operations - * - * check if a block is available - */ -int -ffs_isblock(fs, cp, h) - struct fs *fs; - unsigned char *cp; - ufs_daddr_t h; -{ - unsigned char mask; - - switch ((int)fs->fs_frag) { - case 8: - return (cp[h] == 0xff); - case 4: - mask = 0x0f << ((h & 0x1) << 2); - return ((cp[h >> 1] & mask) == mask); - case 2: - mask = 0x03 << ((h & 0x3) << 1); - return ((cp[h >> 2] & mask) == mask); - case 1: - mask = 0x01 << (h & 0x7); - return ((cp[h >> 3] & mask) == mask); - default: - panic("ffs_isblock"); - } - /* NOTREACHED */ - return 0; -} - -/* - * take a block out of the map - */ -void -ffs_clrblock(fs, cp, h) - struct fs *fs; - u_char *cp; - ufs_daddr_t h; -{ - - switch ((int)fs->fs_frag) { - case 8: - cp[h] = 0; - return; - case 4: - cp[h >> 1] &= ~(0x0f << ((h & 0x1) << 2)); - return; - case 2: - cp[h >> 2] &= ~(0x03 << ((h & 0x3) << 1)); - return; - case 1: - cp[h >> 3] &= ~(0x01 << (h & 0x7)); - return; - default: - panic("ffs_clrblock"); - } -} - -/* - * put a block into the map - */ -void -ffs_setblock(fs, cp, h) - struct fs *fs; - unsigned char *cp; - ufs_daddr_t h; -{ - - switch ((int)fs->fs_frag) { - - case 8: - cp[h] = 0xff; - return; - case 4: - cp[h >> 1] |= (0x0f << ((h & 0x1) << 2)); - return; - case 2: - cp[h >> 2] |= (0x03 << ((h & 0x3) << 1)); - return; - case 1: - cp[h >> 3] |= (0x01 << (h & 0x7)); - return; - default: - panic("ffs_setblock"); - } -} diff --git a/bsd/ufs/ffs/ffs_tables.c b/bsd/ufs/ffs/ffs_tables.c deleted file mode 100644 index 6dd00a64a..000000000 --- a/bsd/ufs/ffs/ffs_tables.c +++ /dev/null @@ -1,164 +0,0 @@ -/* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ - * - * This file contains Original Code and/or Modifications of Original Code - * as defined in and that are subject to the Apple Public Source License - * Version 2.0 (the 'License'). You may not use this file except in - * compliance with the License. The rights granted to you under the License - * may not be used to create, or enable the creation or redistribution of, - * unlawful or unlicensed copies of an Apple operating system, or to - * circumvent, violate, or enable the circumvention or violation of, any - * terms of an Apple operating system software license agreement. - * - * Please obtain a copy of the License at - * http://www.opensource.apple.com/apsl/ and read it before using this file. - * - * The Original Code and all software distributed under the License are - * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER - * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, - * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. - * Please see the License for the specific language governing rights and - * limitations under the License. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ - */ -/* Copyright (c) 1995 NeXT Computer, Inc. All Rights Reserved */ -/* - * Copyright (c) 1982, 1986, 1993 - * The Regents of the University of California. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. All advertising materials mentioning features or use of this software - * must display the following acknowledgement: - * This product includes software developed by the University of - * California, Berkeley and its contributors. - * 4. Neither the name of the University nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - * @(#)ffs_tables.c 8.1 (Berkeley) 6/11/93 - */ - -#include - -/* - * Bit patterns for identifying fragments in the block map - * used as ((map & around) == inside) - */ -int around[9] = { - 0x3, 0x7, 0xf, 0x1f, 0x3f, 0x7f, 0xff, 0x1ff, 0x3ff -}; -int inside[9] = { - 0x0, 0x2, 0x6, 0xe, 0x1e, 0x3e, 0x7e, 0xfe, 0x1fe -}; - -/* - * Given a block map bit pattern, the frag tables tell whether a - * particular size fragment is available. - * - * used as: - * if ((1 << (size - 1)) & fragtbl[fs->fs_frag][map] { - * at least one fragment of the indicated size is available - * } - * - * These tables are used by the scanc instruction on the VAX to - * quickly find an appropriate fragment. - */ -u_char fragtbl124[256] = { - 0x00, 0x16, 0x16, 0x2a, 0x16, 0x16, 0x26, 0x4e, - 0x16, 0x16, 0x16, 0x3e, 0x2a, 0x3e, 0x4e, 0x8a, - 0x16, 0x16, 0x16, 0x3e, 0x16, 0x16, 0x36, 0x5e, - 0x16, 0x16, 0x16, 0x3e, 0x3e, 0x3e, 0x5e, 0x9e, - 0x16, 0x16, 0x16, 0x3e, 0x16, 0x16, 0x36, 0x5e, - 0x16, 0x16, 0x16, 0x3e, 0x3e, 0x3e, 0x5e, 0x9e, - 0x2a, 0x3e, 0x3e, 0x2a, 0x3e, 0x3e, 0x2e, 0x6e, - 0x3e, 0x3e, 0x3e, 0x3e, 0x2a, 0x3e, 0x6e, 0xaa, - 0x16, 0x16, 0x16, 0x3e, 0x16, 0x16, 0x36, 0x5e, - 0x16, 0x16, 0x16, 0x3e, 0x3e, 0x3e, 0x5e, 0x9e, - 0x16, 0x16, 0x16, 0x3e, 0x16, 0x16, 0x36, 0x5e, - 0x16, 0x16, 0x16, 0x3e, 0x3e, 0x3e, 0x5e, 0x9e, - 0x26, 0x36, 0x36, 0x2e, 0x36, 0x36, 0x26, 0x6e, - 0x36, 0x36, 0x36, 0x3e, 0x2e, 0x3e, 0x6e, 0xae, - 0x4e, 0x5e, 0x5e, 0x6e, 0x5e, 0x5e, 0x6e, 0x4e, - 0x5e, 0x5e, 0x5e, 0x7e, 0x6e, 0x7e, 0x4e, 0xce, - 0x16, 0x16, 0x16, 0x3e, 0x16, 0x16, 0x36, 0x5e, - 0x16, 0x16, 0x16, 0x3e, 0x3e, 0x3e, 0x5e, 0x9e, - 0x16, 0x16, 0x16, 0x3e, 0x16, 0x16, 0x36, 0x5e, - 0x16, 0x16, 0x16, 0x3e, 0x3e, 0x3e, 0x5e, 0x9e, - 0x16, 0x16, 0x16, 0x3e, 0x16, 0x16, 0x36, 0x5e, - 0x16, 0x16, 0x16, 0x3e, 0x3e, 0x3e, 0x5e, 0x9e, - 0x3e, 0x3e, 0x3e, 0x3e, 0x3e, 0x3e, 0x3e, 0x7e, - 0x3e, 0x3e, 0x3e, 0x3e, 0x3e, 0x3e, 0x7e, 0xbe, - 0x2a, 0x3e, 0x3e, 0x2a, 0x3e, 0x3e, 0x2e, 0x6e, - 0x3e, 0x3e, 0x3e, 0x3e, 0x2a, 0x3e, 0x6e, 0xaa, - 0x3e, 0x3e, 0x3e, 0x3e, 0x3e, 0x3e, 0x3e, 0x7e, - 0x3e, 0x3e, 0x3e, 0x3e, 0x3e, 0x3e, 0x7e, 0xbe, - 0x4e, 0x5e, 0x5e, 0x6e, 0x5e, 0x5e, 0x6e, 0x4e, - 0x5e, 0x5e, 0x5e, 0x7e, 0x6e, 0x7e, 0x4e, 0xce, - 0x8a, 0x9e, 0x9e, 0xaa, 0x9e, 0x9e, 0xae, 0xce, - 0x9e, 0x9e, 0x9e, 0xbe, 0xaa, 0xbe, 0xce, 0x8a, -}; - -u_char fragtbl8[256] = { - 0x00, 0x01, 0x01, 0x02, 0x01, 0x01, 0x02, 0x04, - 0x01, 0x01, 0x01, 0x03, 0x02, 0x03, 0x04, 0x08, - 0x01, 0x01, 0x01, 0x03, 0x01, 0x01, 0x03, 0x05, - 0x02, 0x03, 0x03, 0x02, 0x04, 0x05, 0x08, 0x10, - 0x01, 0x01, 0x01, 0x03, 0x01, 0x01, 0x03, 0x05, - 0x01, 0x01, 0x01, 0x03, 0x03, 0x03, 0x05, 0x09, - 0x02, 0x03, 0x03, 0x02, 0x03, 0x03, 0x02, 0x06, - 0x04, 0x05, 0x05, 0x06, 0x08, 0x09, 0x10, 0x20, - 0x01, 0x01, 0x01, 0x03, 0x01, 0x01, 0x03, 0x05, - 0x01, 0x01, 0x01, 0x03, 0x03, 0x03, 0x05, 0x09, - 0x01, 0x01, 0x01, 0x03, 0x01, 0x01, 0x03, 0x05, - 0x03, 0x03, 0x03, 0x03, 0x05, 0x05, 0x09, 0x11, - 0x02, 0x03, 0x03, 0x02, 0x03, 0x03, 0x02, 0x06, - 0x03, 0x03, 0x03, 0x03, 0x02, 0x03, 0x06, 0x0a, - 0x04, 0x05, 0x05, 0x06, 0x05, 0x05, 0x06, 0x04, - 0x08, 0x09, 0x09, 0x0a, 0x10, 0x11, 0x20, 0x40, - 0x01, 0x01, 0x01, 0x03, 0x01, 0x01, 0x03, 0x05, - 0x01, 0x01, 0x01, 0x03, 0x03, 0x03, 0x05, 0x09, - 0x01, 0x01, 0x01, 0x03, 0x01, 0x01, 0x03, 0x05, - 0x03, 0x03, 0x03, 0x03, 0x05, 0x05, 0x09, 0x11, - 0x01, 0x01, 0x01, 0x03, 0x01, 0x01, 0x03, 0x05, - 0x01, 0x01, 0x01, 0x03, 0x03, 0x03, 0x05, 0x09, - 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x07, - 0x05, 0x05, 0x05, 0x07, 0x09, 0x09, 0x11, 0x21, - 0x02, 0x03, 0x03, 0x02, 0x03, 0x03, 0x02, 0x06, - 0x03, 0x03, 0x03, 0x03, 0x02, 0x03, 0x06, 0x0a, - 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x07, - 0x02, 0x03, 0x03, 0x02, 0x06, 0x07, 0x0a, 0x12, - 0x04, 0x05, 0x05, 0x06, 0x05, 0x05, 0x06, 0x04, - 0x05, 0x05, 0x05, 0x07, 0x06, 0x07, 0x04, 0x0c, - 0x08, 0x09, 0x09, 0x0a, 0x09, 0x09, 0x0a, 0x0c, - 0x10, 0x11, 0x11, 0x12, 0x20, 0x21, 0x40, 0x80, -}; - -/* - * The actual fragtbl array. - */ -u_char *fragtbl[MAXFRAG + 1] = { - 0, fragtbl124, fragtbl124, 0, fragtbl124, 0, 0, 0, fragtbl8, -}; diff --git a/bsd/ufs/ffs/ffs_vfsops.c b/bsd/ufs/ffs/ffs_vfsops.c deleted file mode 100644 index 2214f3753..000000000 --- a/bsd/ufs/ffs/ffs_vfsops.c +++ /dev/null @@ -1,1648 +0,0 @@ -/* - * Copyright (c) 2000-2006 Apple Computer, Inc. All rights reserved. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ - * - * This file contains Original Code and/or Modifications of Original Code - * as defined in and that are subject to the Apple Public Source License - * Version 2.0 (the 'License'). You may not use this file except in - * compliance with the License. The rights granted to you under the License - * may not be used to create, or enable the creation or redistribution of, - * unlawful or unlicensed copies of an Apple operating system, or to - * circumvent, violate, or enable the circumvention or violation of, any - * terms of an Apple operating system software license agreement. - * - * Please obtain a copy of the License at - * http://www.opensource.apple.com/apsl/ and read it before using this file. - * - * The Original Code and all software distributed under the License are - * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER - * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, - * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. - * Please see the License for the specific language governing rights and - * limitations under the License. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ - */ -/* Copyright (c) 1995 NeXT Computer, Inc. All Rights Reserved */ -/* - * Copyright (c) 1989, 1991, 1993, 1994 - * The Regents of the University of California. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. All advertising materials mentioning features or use of this software - * must display the following acknowledgement: - * This product includes software developed by the University of - * California, Berkeley and its contributors. - * 4. Neither the name of the University nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - * @(#)ffs_vfsops.c 8.31 (Berkeley) 5/20/95 - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include - -#include -#include -#include -#include - -#include -#include -#if REV_ENDIAN_FS -#include -#include -#endif /* REV_ENDIAN_FS */ - -int ffs_sbupdate(struct ufsmount *, int); - -struct vfsops ufs_vfsops = { - ffs_mount, - ufs_start, - ffs_unmount, - ufs_root, - ufs_quotactl, - ffs_vfs_getattr, - ffs_sync, - ffs_vget, - ffs_fhtovp, - ffs_vptofh, - ffs_init, - ffs_sysctl, - ffs_vfs_setattr, - {0} -}; - -extern u_long nextgennumber; - -union _qcvt { - int64_t qcvt; - int32_t val[2]; -}; -#define SETHIGH(q, h) { \ - union _qcvt tmp; \ - tmp.qcvt = (q); \ - tmp.val[_QUAD_HIGHWORD] = (h); \ - (q) = tmp.qcvt; \ -} -#define SETLOW(q, l) { \ - union _qcvt tmp; \ - tmp.qcvt = (q); \ - tmp.val[_QUAD_LOWWORD] = (l); \ - (q) = tmp.qcvt; \ -} - -/* - * Called by main() when ufs is going to be mounted as root. - */ -int -ffs_mountroot(mount_t mp, vnode_t rvp, vfs_context_t context) -{ - struct proc *p = current_proc(); /* XXX */ - int error; - - /* Set asynchronous flag by default */ - vfs_setflags(mp, MNT_ASYNC); - - if (error = ffs_mountfs(rvp, mp, context)) - return (error); - - (void)ffs_statfs(mp, vfs_statfs(mp), NULL); - - return (0); -} - -/* - * VFS Operations. - * - * mount system call - */ -int -ffs_mount(struct mount *mp, vnode_t devvp, __unused user_addr_t data, vfs_context_t context) -{ - struct proc *p = vfs_context_proc(context); - struct ufsmount *ump; - register struct fs *fs; - u_int size; - int error = 0, flags; - mode_t accessmode; - int ronly; - int reload = 0; - - /* - * If updating, check whether changing from read-write to - * read-only; if there is no device name, that's all we do. - */ - if (mp->mnt_flag & MNT_UPDATE) { - ump = VFSTOUFS(mp); - fs = ump->um_fs; - if (fs->fs_ronly == 0 && (mp->mnt_flag & MNT_RDONLY)) { - /* - * Flush any dirty data. - */ - VFS_SYNC(mp, MNT_WAIT, context); - /* - * Check for and optionally get rid of files open - * for writing. - */ - flags = WRITECLOSE; - if (mp->mnt_flag & MNT_FORCE) - flags |= FORCECLOSE; - if (error = ffs_flushfiles(mp, flags, p)) - return (error); - fs->fs_clean = 1; - fs->fs_ronly = 1; - if (error = ffs_sbupdate(ump, MNT_WAIT)) { - fs->fs_clean = 0; - fs->fs_ronly = 0; - return (error); - } - } - /* save fs_ronly to later use */ - ronly = fs->fs_ronly; - if ((mp->mnt_flag & MNT_RELOAD) || ronly) - reload = 1; - if ((reload) && - (error = ffs_reload(mp, vfs_context_ucred(context), p))) - return (error); - /* replace the ronly after load */ - fs->fs_ronly = ronly; - /* - * Do not update the file system if the user was in singleuser - * and then tries to mount -uw without fscking - */ - if (!fs->fs_clean && ronly) { - printf("WARNING: trying to mount a dirty file system\n"); - if (issingleuser() && (mp->mnt_flag & MNT_ROOTFS)) { - printf("WARNING: R/W mount of %s denied. Filesystem is not clean - run fsck\n",fs->fs_fsmnt); - /* - * Reset the readonly bit as reload might have - * modified this bit - */ - fs->fs_ronly = 1; - return(EPERM); - } - } - - if (ronly && (mp->mnt_kern_flag & MNTK_WANTRDWR)) { - fs->fs_ronly = 0; - fs->fs_clean = 0; - (void) ffs_sbupdate(ump, MNT_WAIT); - } - if (devvp == 0) { - return(0); - } - } - if ((mp->mnt_flag & MNT_UPDATE) == 0) { - ufs_ihashinit(); - error = ffs_mountfs(devvp, mp, context); - } else { - if (devvp != ump->um_devvp) - error = EINVAL; /* needs translation */ - } - if (error) { - return (error); - } - ump = VFSTOUFS(mp); - fs = ump->um_fs; - bzero(fs->fs_fsmnt , sizeof(fs->fs_fsmnt)); - strncpy(fs->fs_fsmnt, (caddr_t)mp->mnt_vfsstat.f_mntonname, sizeof(fs->fs_fsmnt) - 1); - (void)ffs_statfs(mp, &mp->mnt_vfsstat, p); - return (0); -} - - -struct ffs_reload_cargs { - struct vnode *devvp; - kauth_cred_t cred; - struct fs *fs; - struct proc *p; - int error; -#if REV_ENDIAN_FS - int rev_endian; -#endif /* REV_ENDIAN_FS */ -}; - - -static int -ffs_reload_callback(struct vnode *vp, void *cargs) -{ - struct inode *ip; - struct buf *bp; - struct fs *fs; - struct ffs_reload_cargs *args; - - args = (struct ffs_reload_cargs *)cargs; - - /* - * flush all the buffers associated with this node - */ - if (buf_invalidateblks(vp, 0, 0, 0)) - panic("ffs_reload: dirty2"); - - /* - * Step 6: re-read inode data - */ - ip = VTOI(vp); - fs = args->fs; - - if (args->error = (int)buf_bread(args->devvp, (daddr64_t)((unsigned)fsbtodb(fs, ino_to_fsba(fs, ip->i_number))), - (int)fs->fs_bsize, NOCRED, &bp)) { - buf_brelse(bp); - - return (VNODE_RETURNED_DONE); - } - -#if REV_ENDIAN_FS - if (args->rev_endian) { - byte_swap_inode_in(((struct dinode *)buf_dataptr(bp) + - ino_to_fsbo(fs, ip->i_number)), ip); - } else { -#endif /* REV_ENDIAN_FS */ - ip->i_din = *((struct dinode *)buf_dataptr(bp) + - ino_to_fsbo(fs, ip->i_number)); -#if REV_ENDIAN_FS - } -#endif /* REV_ENDIAN_FS */ - - buf_brelse(bp); - - return (VNODE_RETURNED); -} - - -/* - * Reload all incore data for a filesystem (used after running fsck on - * the root filesystem and finding things to fix). The filesystem must - * be mounted read-only. - * - * Things to do to update the mount: - * 1) invalidate all cached meta-data. - * 2) re-read superblock from disk. - * 3) re-read summary information from disk. - * 4) invalidate all inactive vnodes. - * 5) invalidate all cached file data. - * 6) re-read inode data for all active vnodes. - */ -ffs_reload(struct mount *mountp, kauth_cred_t cred, struct proc *p) -{ - register struct vnode *devvp; - void *space; - struct buf *bp; - struct fs *fs, *newfs; - int i, blks, size, error; - u_int64_t maxfilesize; /* XXX */ - int32_t *lp; - struct ffs_reload_cargs args; -#if REV_ENDIAN_FS - int rev_endian = (mountp->mnt_flag & MNT_REVEND); -#endif /* REV_ENDIAN_FS */ - - if ((mountp->mnt_flag & MNT_RDONLY) == 0) - return (EINVAL); - /* - * Step 1: invalidate all cached meta-data. - */ - devvp = VFSTOUFS(mountp)->um_devvp; - if (buf_invalidateblks(devvp, 0, 0, 0)) - panic("ffs_reload: dirty1"); - /* - * Step 2: re-read superblock from disk. - */ - size = vfs_devblocksize(mountp); - - if (error = (int)buf_bread(devvp, (daddr64_t)((unsigned)(SBOFF/size)), SBSIZE, NOCRED,&bp)) { - buf_brelse(bp); - return (error); - } - newfs = (struct fs *)buf_dataptr(bp); -#if REV_ENDIAN_FS - if (rev_endian) { - error = byte_swap_sbin(newfs); - if (error) { - buf_brelse(bp); - return (error); - } - } -#endif /* REV_ENDIAN_FS */ - if (newfs->fs_magic != FS_MAGIC || newfs->fs_bsize > MAXBSIZE || - newfs->fs_bsize < sizeof(struct fs)) { -#if REV_ENDIAN_FS - if (rev_endian) - byte_swap_sbout(newfs); -#endif /* REV_ENDIAN_FS */ - - buf_brelse(bp); - return (EIO); /* XXX needs translation */ - } - fs = VFSTOUFS(mountp)->um_fs; - /* - * Copy pointer fields back into superblock before copying in XXX - * new superblock. These should really be in the ufsmount. XXX - * Note that important parameters (eg fs_ncg) are unchanged. - */ - newfs->fs_csp = fs->fs_csp; - newfs->fs_maxcluster = fs->fs_maxcluster; - newfs->fs_contigdirs = fs->fs_contigdirs; - bcopy(newfs, fs, (u_int)fs->fs_sbsize); - if (fs->fs_sbsize < SBSIZE) - buf_markinvalid(bp); -#if REV_ENDIAN_FS - if (rev_endian) - byte_swap_sbout(newfs); -#endif /* REV_ENDIAN_FS */ - buf_brelse(bp); - mountp->mnt_maxsymlinklen = fs->fs_maxsymlinklen; - ffs_oldfscompat(fs); - maxfilesize = 0x100000000ULL; /* 4GB */ - if (fs->fs_maxfilesize > maxfilesize) /* XXX */ - fs->fs_maxfilesize = maxfilesize; /* XXX */ - /* - * Step 3: re-read summary information from disk. - */ - blks = howmany(fs->fs_cssize, fs->fs_fsize); - space = fs->fs_csp; - for (i = 0; i < blks; i += fs->fs_frag) { - size = fs->fs_bsize; - if (i + fs->fs_frag > blks) - size = (blks - i) * fs->fs_fsize; - if (error = (int)buf_bread(devvp, (daddr64_t)((unsigned)fsbtodb(fs, fs->fs_csaddr + i)), size, - NOCRED, &bp)) { - buf_brelse(bp); - return (error); - } -#if REV_ENDIAN_FS - if (rev_endian) { - /* csum swaps */ - byte_swap_ints((int *)buf_dataptr(bp), size / sizeof(int)); - } -#endif /* REV_ENDIAN_FS */ - bcopy((char *)buf_dataptr(bp), space, (u_int)size); -#if REV_ENDIAN_FS - if (rev_endian) { - /* csum swaps */ - byte_swap_ints((int *)buf_dataptr(bp), size / sizeof(int)); - } -#endif /* REV_ENDIAN_FS */ - space = (char *) space + size; - buf_brelse(bp); - } - /* - * We no longer know anything about clusters per cylinder group. - */ - if (fs->fs_contigsumsize > 0) { - lp = fs->fs_maxcluster; - for (i = 0; i < fs->fs_ncg; i++) - *lp++ = fs->fs_contigsumsize; - } -#if REV_ENDIAN_FS - args.rev_endian = rev_endian; -#endif /* REV_ENDIAN_FS */ - args.devvp = devvp; - args.cred = cred; - args.fs = fs; - args.p = p; - args.error = 0; - /* - * ffs_reload_callback will be called for each vnode - * hung off of this mount point that can't be recycled... - * vnode_iterate will recycle those that it can (the VNODE_RELOAD option) - * the vnode will be in an 'unbusy' state (VNODE_WAIT) and - * properly referenced and unreferenced around the callback - */ - vnode_iterate(mountp, VNODE_RELOAD | VNODE_WAIT, ffs_reload_callback, (void *)&args); - - return (args.error); -} - -/* - * Common code for mount and mountroot - */ -int -ffs_mountfs(devvp, mp, context) - struct vnode *devvp; - struct mount *mp; - vfs_context_t context; -{ - struct ufsmount *ump; - struct buf *bp; - struct fs *fs; - dev_t dev; - struct buf *cgbp; - struct cg *cgp; - int32_t clustersumoff; - void *space; - int error, i, blks, ronly; - u_int32_t size; - int32_t *lp; - kauth_cred_t cred; - u_int64_t maxfilesize; /* XXX */ - u_int dbsize = DEV_BSIZE; -#if REV_ENDIAN_FS - int rev_endian=0; -#endif /* REV_ENDIAN_FS */ - dev = devvp->v_rdev; - cred = vfs_context_ucred(context); - - ronly = vfs_isrdonly(mp); - bp = NULL; - ump = NULL; - - /* Advisory locking should be handled at the VFS layer */ - vfs_setlocklocal(mp); - - /* Obtain the actual device block size */ - if (VNOP_IOCTL(devvp, DKIOCGETBLOCKSIZE, (caddr_t)&size, 0, context)) { - error = ENXIO; - goto out; - } - - if (error = (int)buf_bread(devvp, (daddr64_t)((unsigned)(SBOFF/size)), - SBSIZE, cred, &bp)) - goto out; - fs = (struct fs *)buf_dataptr(bp); -#if REV_ENDIAN_FS - if (fs->fs_magic != FS_MAGIC || fs->fs_bsize > MAXBSIZE || - fs->fs_bsize < sizeof(struct fs)) { - int magic = fs->fs_magic; - - byte_swap_ints(&magic, 1); - if (magic != FS_MAGIC) { - error = EINVAL; - goto out; - } - if (error = byte_swap_sbin(fs)) - goto out; - - if (fs->fs_magic != FS_MAGIC || fs->fs_bsize > MAXBSIZE || - fs->fs_bsize < sizeof(struct fs)) { - byte_swap_sbout(fs); - error = EINVAL; /* XXX needs translation */ - goto out; - } - rev_endian=1; - } -#endif /* REV_ENDIAN_FS */ - if (fs->fs_magic != FS_MAGIC || fs->fs_bsize > MAXBSIZE || - fs->fs_bsize < sizeof(struct fs)) { -#if REV_ENDIAN_FS - if (rev_endian) - byte_swap_sbout(fs); -#endif /* REV_ENDIAN_FS */ - error = EINVAL; /* XXX needs translation */ - goto out; - } - - if (fs->fs_sbsize < 0 || fs->fs_sbsize > SBSIZE) { - error = EINVAL; - goto out; - } - - /* - * Buffer cache does not handle multiple pages in a buf when - * invalidating incore buffer in pageout. There are no locks - * in the pageout path. So there is a danger of loosing data when - * block allocation happens at the same time a pageout of buddy - * page occurs. incore() returns buf with both - * pages, this leads vnode-pageout to incorrectly flush of entire. - * buf. Till the low level ffs code is modified to deal with these - * do not mount any FS more than 4K size. - */ - /* - * Can't mount filesystems with a fragment size less than DIRBLKSIZ - */ - /* - * Don't mount dirty filesystems, except for the root filesystem - */ - if ((fs->fs_bsize > PAGE_SIZE) || (fs->fs_fsize < DIRBLKSIZ) || - ((!(mp->mnt_flag & MNT_ROOTFS)) && (!fs->fs_clean))) { -#if REV_ENDIAN_FS - if (rev_endian) - byte_swap_sbout(fs); -#endif /* REV_ENDIAN_FS */ - error = ENOTSUP; - goto out; - } - - /* Let's figure out the devblock size the file system is with */ - /* the device block size = fragment size / number of sectors per frag */ - - dbsize = fs->fs_fsize / NSPF(fs); - if(dbsize <= 0 ) { - kprintf("device blocksize computaion failed\n"); - } else { - if (VNOP_IOCTL(devvp, DKIOCSETBLOCKSIZE, (caddr_t)&dbsize, - FWRITE, context) != 0) { - kprintf("failed to set device blocksize\n"); - } - /* force the specfs to reread blocksize from size() */ - set_fsblocksize(devvp); - } - - /* XXX updating 4.2 FFS superblocks trashes rotational layout tables */ - if (fs->fs_postblformat == FS_42POSTBLFMT && !ronly) { -#if REV_ENDIAN_FS - if (rev_endian) - byte_swap_sbout(fs); -#endif /* REV_ENDIAN_FS */ - error = EROFS; /* needs translation */ - goto out; - } - - /* If we are not mounting read only, then check for overlap - * condition in cylinder group's free block map. - * If overlap exists, then force this into a read only mount - * to avoid further corruption. PR#2216969 - */ - if (ronly == 0){ - if (error = (int)buf_bread (devvp, (daddr64_t)((unsigned)fsbtodb(fs, cgtod(fs, 0))), - (int)fs->fs_cgsize, NOCRED, &cgbp)) { - buf_brelse(cgbp); - goto out; - } - cgp = (struct cg *)buf_dataptr(cgbp); -#if REV_ENDIAN_FS - if (rev_endian) - byte_swap_cgin(cgp,fs); -#endif /* REV_ENDIAN_FS */ - if (!cg_chkmagic(cgp)){ -#if REV_ENDIAN_FS - if (rev_endian) - byte_swap_cgout(cgp,fs); -#endif /* REV_ENDIAN_FS */ - buf_brelse(cgbp); - goto out; - } - if (cgp->cg_clustersumoff != 0) { - /* Check for overlap */ - clustersumoff = cgp->cg_freeoff + - howmany(fs->fs_cpg * fs->fs_spc / NSPF(fs), NBBY); - clustersumoff = roundup(clustersumoff, sizeof(long)); - if (cgp->cg_clustersumoff < clustersumoff) { - /* Overlap exists */ - mp->mnt_flag |= MNT_RDONLY; - ronly = 1; - } - } -#if REV_ENDIAN_FS - if (rev_endian) - byte_swap_cgout(cgp,fs); -#endif /* REV_ENDIAN_FS */ - buf_brelse(cgbp); - } - - ump = _MALLOC(sizeof *ump, M_UFSMNT, M_WAITOK); - bzero((caddr_t)ump, sizeof *ump); - ump->um_fs = _MALLOC((u_long)fs->fs_sbsize, M_UFSMNT, - M_WAITOK); - bcopy((char *)buf_dataptr(bp), ump->um_fs, (u_int)fs->fs_sbsize); - if (fs->fs_sbsize < SBSIZE) - buf_markinvalid(bp); -#if REV_ENDIAN_FS - if (rev_endian) - byte_swap_sbout(fs); -#endif /* REV_ENDIAN_FS */ - buf_brelse(bp); - bp = NULL; - fs = ump->um_fs; - fs->fs_ronly = ronly; - if (fs->fs_cssize < 1 || fs->fs_fsize < 1 || fs->fs_ncg < 1) { - error = EINVAL; - goto out; - } - if (fs->fs_frag < 1 || fs->fs_frag > MAXFRAG) { - error = EINVAL; - goto out; - } - - size = fs->fs_cssize; - blks = howmany(size, fs->fs_fsize); - if (fs->fs_contigsumsize > 0) { - if (fs->fs_ncg > INT_MAX / sizeof(int32_t) || size > INT_MAX - fs->fs_ncg * sizeof(int32_t)) { - error = EINVAL; - goto out; - } - size += fs->fs_ncg * sizeof(int32_t); - } - if (fs->fs_ncg > INT_MAX / sizeof(u_int8_t) || size > INT_MAX - fs->fs_ncg * sizeof(u_int8_t)) { - error = EINVAL; - goto out; - } - size += fs->fs_ncg * sizeof(u_int8_t); - space = _MALLOC((u_long)size, M_UFSMNT, M_WAITOK); - fs->fs_csp = space; - for (i = 0; i < blks; i += fs->fs_frag) { - size = fs->fs_bsize; - if (i + fs->fs_frag > blks) - size = (blks - i) * fs->fs_fsize; - if (error = (int)buf_bread(devvp, (daddr64_t)((unsigned)fsbtodb(fs, fs->fs_csaddr + i)), - size, cred, &bp)) { - _FREE(fs->fs_csp, M_UFSMNT); - goto out; - } - bcopy((char *)buf_dataptr(bp), space, (u_int)size); -#if REV_ENDIAN_FS - if (rev_endian) - byte_swap_ints((int *) space, size / sizeof(int)); -#endif /* REV_ENDIAN_FS */ - space = (char *)space + size; - buf_brelse(bp); - bp = NULL; - } - if (fs->fs_contigsumsize > 0) { - fs->fs_maxcluster = lp = space; - for (i = 0; i < fs->fs_ncg; i++) - *lp++ = fs->fs_contigsumsize; - space = lp; - } - size = fs->fs_ncg * sizeof(u_int8_t); - fs->fs_contigdirs = (u_int8_t *)space; - space = (u_int8_t *)space + size; - bzero(fs->fs_contigdirs, size); - /* XXX Compatibility for old filesystems */ - if (fs->fs_avgfilesize <= 0) - fs->fs_avgfilesize = AVFILESIZ; - if (fs->fs_avgfpdir <= 0) - fs->fs_avgfpdir = AFPDIR; - /* XXX End of compatibility */ - mp->mnt_data = (qaddr_t)ump; - mp->mnt_vfsstat.f_fsid.val[0] = (long)dev; - mp->mnt_vfsstat.f_fsid.val[1] = vfs_typenum(mp); - /* XXX warning hardcoded max symlen and not "mp->mnt_maxsymlinklen = fs->fs_maxsymlinklen;" */ - mp->mnt_maxsymlinklen = 60; -#if REV_ENDIAN_FS - if (rev_endian) - mp->mnt_flag |= MNT_REVEND; -#endif /* REV_ENDIAN_FS */ - ump->um_mountp = mp; - ump->um_dev = dev; - ump->um_devvp = devvp; - ump->um_nindir = fs->fs_nindir; - ump->um_bptrtodb = fs->fs_fsbtodb; - ump->um_seqinc = fs->fs_frag; - for (i = 0; i < MAXQUOTAS; i++) - dqfileinit(&ump->um_qfiles[i]); - ffs_oldfscompat(fs); - ump->um_savedmaxfilesize = fs->fs_maxfilesize; /* XXX */ - maxfilesize = 0x100000000ULL; /* 4GB */ -#if 0 - maxfilesize = (u_int64_t)0x40000000 * fs->fs_bsize - 1; /* XXX */ -#endif /* 0 */ - if (fs->fs_maxfilesize > maxfilesize) /* XXX */ - fs->fs_maxfilesize = maxfilesize; /* XXX */ - if (ronly == 0) { - fs->fs_clean = 0; - (void) ffs_sbupdate(ump, MNT_WAIT); - } - return (0); -out: - if (bp) - buf_brelse(bp); - if (ump) { - _FREE(ump->um_fs, M_UFSMNT); - _FREE(ump, M_UFSMNT); - } - return (error); -} - -/* - * Sanity checks for old file systems. - * - * XXX - goes away some day. - */ -ffs_oldfscompat(fs) - struct fs *fs; -{ - int i; - - fs->fs_npsect = max(fs->fs_npsect, fs->fs_nsect); /* XXX */ - fs->fs_interleave = max(fs->fs_interleave, 1); /* XXX */ - if (fs->fs_postblformat == FS_42POSTBLFMT) /* XXX */ - fs->fs_nrpos = 8; /* XXX */ - if (fs->fs_inodefmt < FS_44INODEFMT) { /* XXX */ - u_int64_t sizepb = fs->fs_bsize; /* XXX */ - /* XXX */ - fs->fs_maxfilesize = fs->fs_bsize * NDADDR - 1; /* XXX */ - for (i = 0; i < NIADDR; i++) { /* XXX */ - sizepb *= NINDIR(fs); /* XXX */ - fs->fs_maxfilesize += sizepb; /* XXX */ - } /* XXX */ - fs->fs_qbmask = ~fs->fs_bmask; /* XXX */ - fs->fs_qfmask = ~fs->fs_fmask; /* XXX */ - } /* XXX */ - return (0); -} - -/* - * unmount system call - */ -int -ffs_unmount(mp, mntflags, context) - struct mount *mp; - int mntflags; - vfs_context_t context; -{ - struct proc *p = vfs_context_proc(context); - register struct ufsmount *ump; - register struct fs *fs; - int error, flags; - int force; - - flags = 0; - force = 0; - if (mntflags & MNT_FORCE) { - flags |= FORCECLOSE; - force = 1; - } - if ( (error = ffs_flushfiles(mp, flags, p)) && !force ) - return (error); - ump = VFSTOUFS(mp); - fs = ump->um_fs; - - if (fs->fs_ronly == 0) { - fs->fs_clean = 1; - if (error = ffs_sbupdate(ump, MNT_WAIT)) { - fs->fs_clean = 0; -#ifdef notyet - /* we can atleast cleanup ; as the media could be WP */ - /* & during mount, we do not check for write failures */ - /* FIXME LATER : the Correct fix would be to have */ - /* mount detect the WP media and downgrade to readonly mount */ - /* For now, here it is */ - return (error); -#endif /* notyet */ - } - } - _FREE(fs->fs_csp, M_UFSMNT); - _FREE(fs, M_UFSMNT); - _FREE(ump, M_UFSMNT); - - return (0); -} - -/* - * Flush out all the files in a filesystem. - */ -ffs_flushfiles(mp, flags, p) - register struct mount *mp; - int flags; - struct proc *p; -{ - register struct ufsmount *ump; - int i, error; - - ump = VFSTOUFS(mp); - -#if QUOTA - /* - * NOTE: The open quota files have an indirect reference - * on the root directory vnode. We must account for this - * extra reference when doing the intial vflush. - */ - if (mp->mnt_flag & MNT_QUOTA) { - struct vnode *rootvp = NULLVP; - int quotafilecnt = 0; - - /* Find out how many quota files we have open. */ - for (i = 0; i < MAXQUOTAS; i++) { - if (ump->um_qfiles[i].qf_vp != NULLVP) - ++quotafilecnt; - } - - /* - * Check if the root vnode is in our inode hash - * (so we can skip over it). - */ - rootvp = ufs_ihashget(ump->um_dev, ROOTINO); - - error = vflush(mp, rootvp, SKIPSYSTEM|flags); - - if (rootvp) { - /* - * See if there are additional references on the - * root vp besides the ones obtained from the open - * quota files and the hfs_chashget call above. - */ - if ((error == 0) && - (rootvp->v_usecount > (1 + quotafilecnt))) { - error = EBUSY; /* root dir is still open */ - } - vnode_put(rootvp); - } - if (error && (flags & FORCECLOSE) == 0) - return (error); - - for (i = 0; i < MAXQUOTAS; i++) { - if (ump->um_qfiles[i].qf_vp == NULLVP) - continue; - quotaoff(mp, i); - } - /* - * Here we fall through to vflush again to ensure - * that we have gotten rid of all the system vnodes. - */ - } -#endif - error = vflush(mp, NULLVP, SKIPSWAP|flags); - error = vflush(mp, NULLVP, flags); - return (error); -} - -/* - * Get file system statistics. - */ -int -ffs_statfs(mp, sbp, context) - struct mount *mp; - register struct vfsstatfs *sbp; - vfs_context_t context; -{ - register struct ufsmount *ump; - register struct fs *fs; - - ump = VFSTOUFS(mp); - fs = ump->um_fs; - if (fs->fs_magic != FS_MAGIC) - panic("ffs_statfs"); - sbp->f_bsize = fs->fs_fsize; - sbp->f_iosize = fs->fs_bsize; - sbp->f_blocks = (uint64_t)((unsigned long)fs->fs_dsize); - sbp->f_bfree = (uint64_t) ((unsigned long)(fs->fs_cstotal.cs_nbfree * fs->fs_frag + - fs->fs_cstotal.cs_nffree)); - sbp->f_bavail = (uint64_t) ((unsigned long)freespace(fs, fs->fs_minfree)); - sbp->f_files = (uint64_t) ((unsigned long)(fs->fs_ncg * fs->fs_ipg - ROOTINO)); - sbp->f_ffree = (uint64_t) ((unsigned long)fs->fs_cstotal.cs_nifree); - return (0); -} - -int -ffs_vfs_getattr(mp, fsap, context) - struct mount *mp; - struct vfs_attr *fsap; - vfs_context_t context; -{ - struct ufsmount *ump; - struct fs *fs; - kauth_cred_t cred; - struct vnode *devvp; - struct buf *bp; - struct ufslabel *ulp; - char *offset; - int bs, error, length; - - ump = VFSTOUFS(mp); - fs = ump->um_fs; - cred = vfs_context_ucred(context); - - VFSATTR_RETURN(fsap, f_bsize, fs->fs_fsize); - VFSATTR_RETURN(fsap, f_iosize, fs->fs_bsize); - VFSATTR_RETURN(fsap, f_blocks, (uint64_t)((unsigned long)fs->fs_dsize)); - VFSATTR_RETURN(fsap, f_bfree, (uint64_t)((unsigned long) - (fs->fs_cstotal.cs_nbfree * fs->fs_frag + - fs->fs_cstotal.cs_nffree))); - VFSATTR_RETURN(fsap, f_bavail, (uint64_t)((unsigned long)freespace(fs, - fs->fs_minfree))); - VFSATTR_RETURN(fsap, f_files, (uint64_t)((unsigned long) - (fs->fs_ncg * fs->fs_ipg - ROOTINO))); - VFSATTR_RETURN(fsap, f_ffree, (uint64_t)((unsigned long) - fs->fs_cstotal.cs_nifree)); - - if (VFSATTR_IS_ACTIVE(fsap, f_fsid)) { - fsap->f_fsid.val[0] = mp->mnt_vfsstat.f_fsid.val[0]; - fsap->f_fsid.val[1] = mp->mnt_vfsstat.f_fsid.val[1]; - VFSATTR_SET_SUPPORTED(fsap, f_fsid); - } - - if (VFSATTR_IS_ACTIVE(fsap, f_vol_name)) { - devvp = ump->um_devvp; - bs = vfs_devblocksize(mp); - - if (error = (int)buf_meta_bread(devvp, - (daddr64_t)(UFS_LABEL_OFFSET / bs), - MAX(bs, UFS_LABEL_SIZE), cred, &bp)) { - if (bp) - buf_brelse(bp); - return (error); - } - - /* - * Since the disklabel is read directly by older user space - * code, make sure this buffer won't remain in the cache when - * we release it. - */ - buf_setflags(bp, B_NOCACHE); - - offset = buf_dataptr(bp) + (UFS_LABEL_OFFSET % bs); - ulp = (struct ufslabel *)offset; - - if (ufs_label_check(ulp)) { - length = ulp->ul_namelen; -#if REV_ENDIAN_FS - if (mp->mnt_flag & MNT_REVEND) - length = OSSwapInt16(length); -#endif - if (length > 0 && length <= UFS_MAX_LABEL_NAME) { - bcopy(ulp->ul_name, fsap->f_vol_name, length); - fsap->f_vol_name[UFS_MAX_LABEL_NAME - 1] = '\0'; - fsap->f_vol_name[length] = '\0'; - } - } - - buf_brelse(bp); - VFSATTR_SET_SUPPORTED(fsap, f_vol_name); - } - - if (VFSATTR_IS_ACTIVE(fsap, f_capabilities)) { - fsap->f_capabilities.capabilities[VOL_CAPABILITIES_FORMAT] = - VOL_CAP_FMT_SYMBOLICLINKS | - VOL_CAP_FMT_HARDLINKS | - VOL_CAP_FMT_SPARSE_FILES | - VOL_CAP_FMT_CASE_SENSITIVE | - VOL_CAP_FMT_CASE_PRESERVING | - VOL_CAP_FMT_FAST_STATFS | - VOL_CAP_FMT_HIDDEN_FILES ; - fsap->f_capabilities.capabilities[VOL_CAPABILITIES_INTERFACES] - = VOL_CAP_INT_NFSEXPORT | - VOL_CAP_INT_VOL_RENAME | - VOL_CAP_INT_ADVLOCK | - VOL_CAP_INT_FLOCK; - fsap->f_capabilities.capabilities[VOL_CAPABILITIES_RESERVED1] - = 0; - fsap->f_capabilities.capabilities[VOL_CAPABILITIES_RESERVED2] - = 0; - - /* Capabilities we know about: */ - fsap->f_capabilities.valid[VOL_CAPABILITIES_FORMAT] = - VOL_CAP_FMT_PERSISTENTOBJECTIDS | - VOL_CAP_FMT_SYMBOLICLINKS | - VOL_CAP_FMT_HARDLINKS | - VOL_CAP_FMT_JOURNAL | - VOL_CAP_FMT_JOURNAL_ACTIVE | - VOL_CAP_FMT_NO_ROOT_TIMES | - VOL_CAP_FMT_SPARSE_FILES | - VOL_CAP_FMT_ZERO_RUNS | - VOL_CAP_FMT_CASE_SENSITIVE | - VOL_CAP_FMT_CASE_PRESERVING | - VOL_CAP_FMT_FAST_STATFS | - VOL_CAP_FMT_2TB_FILESIZE | - VOL_CAP_FMT_OPENDENYMODES | - VOL_CAP_FMT_HIDDEN_FILES ; - fsap->f_capabilities.valid[VOL_CAPABILITIES_INTERFACES] = - VOL_CAP_INT_SEARCHFS | - VOL_CAP_INT_ATTRLIST | - VOL_CAP_INT_NFSEXPORT | - VOL_CAP_INT_READDIRATTR | - VOL_CAP_INT_EXCHANGEDATA | - VOL_CAP_INT_COPYFILE | - VOL_CAP_INT_ALLOCATE | - VOL_CAP_INT_VOL_RENAME | - VOL_CAP_INT_ADVLOCK | - VOL_CAP_INT_FLOCK | - VOL_CAP_INT_MANLOCK; - fsap->f_capabilities.valid[VOL_CAPABILITIES_RESERVED1] = 0; - fsap->f_capabilities.valid[VOL_CAPABILITIES_RESERVED2] = 0; - - VFSATTR_SET_SUPPORTED(fsap, f_capabilities); - } - - if (VFSATTR_IS_ACTIVE(fsap, f_attributes)) { - fsap->f_attributes.validattr.commonattr = 0; - fsap->f_attributes.validattr.volattr = - ATTR_VOL_NAME | ATTR_VOL_CAPABILITIES | ATTR_VOL_ATTRIBUTES; - fsap->f_attributes.validattr.dirattr = 0; - fsap->f_attributes.validattr.fileattr = 0; - fsap->f_attributes.validattr.forkattr = 0; - - fsap->f_attributes.nativeattr.commonattr = 0; - fsap->f_attributes.nativeattr.volattr = - ATTR_VOL_NAME | ATTR_VOL_CAPABILITIES | ATTR_VOL_ATTRIBUTES; - fsap->f_attributes.nativeattr.dirattr = 0; - fsap->f_attributes.nativeattr.fileattr = 0; - fsap->f_attributes.nativeattr.forkattr = 0; - - VFSATTR_SET_SUPPORTED(fsap, f_attributes); - } - - return (0); -} - - -int -ffs_vfs_setattr(mp, fsap, context) - struct mount *mp; - struct vfs_attr *fsap; - vfs_context_t context; -{ - struct ufsmount *ump; - struct vnode *devvp; - struct buf *bp; - struct ufslabel *ulp; - kauth_cred_t cred; - char *offset; - int bs, error; - - - ump = VFSTOUFS(mp); - cred = vfs_context_ucred(context); - - if (VFSATTR_IS_ACTIVE(fsap, f_vol_name)) { - devvp = ump->um_devvp; - bs = vfs_devblocksize(mp); - if (error = buf_meta_bread(devvp, - (daddr64_t)(UFS_LABEL_OFFSET / bs), - MAX(bs, UFS_LABEL_SIZE), cred, &bp)) { - if (bp) - buf_brelse(bp); - return (error); - } - - /* - * Since the disklabel is read directly by older user space - * code, make sure this buffer won't remain in the cache when - * we release it. - */ - buf_setflags(bp, B_NOCACHE); - - /* Validate the label structure; init if not valid */ - offset = buf_dataptr(bp) + (UFS_LABEL_OFFSET % bs); - ulp = (struct ufslabel *)offset; - if (!ufs_label_check(ulp)) - ufs_label_init(ulp); - - /* Copy new name over existing name */ - ulp->ul_namelen = strlen(fsap->f_vol_name); - bcopy(fsap->f_vol_name, ulp->ul_name, ulp->ul_namelen); - ulp->ul_name[UFS_MAX_LABEL_NAME - 1] = '\0'; - ulp->ul_name[ulp->ul_namelen] = '\0'; - -#if REV_ENDIAN_FS - if (mp->mnt_flag & MNT_REVEND) - ulp->ul_namelen = OSSwapInt16(ulp->ul_namelen); -#endif - - /* Update the checksum */ - ulp->ul_checksum = 0; - ulp->ul_checksum = ul_cksum(ulp, sizeof(*ulp)); - - /* Write the label back to disk */ - buf_bwrite(bp); - bp = NULL; - - VFSATTR_SET_SUPPORTED(fsap, f_vol_name); - } - - return (0); - } -struct ffs_sync_cargs { - vfs_context_t context; - int waitfor; - int error; -}; - - -static int -ffs_sync_callback(struct vnode *vp, void *cargs) -{ - struct inode *ip; - struct ffs_sync_cargs *args; - int error; - - args = (struct ffs_sync_cargs *)cargs; - - ip = VTOI(vp); - - if ((ip->i_flag & (IN_ACCESS | IN_CHANGE | IN_MODIFIED | IN_UPDATE)) || vnode_hasdirtyblks(vp)) { - error = VNOP_FSYNC(vp, args->waitfor, args->context); - - if (error) - args->error = error; - - } - return (VNODE_RETURNED); -} - -/* - * Go through the disk queues to initiate sandbagged IO; - * go through the inodes to write those that have been modified; - * initiate the writing of the super block if it has been modified. - * - * Note: we are always called with the filesystem marked `MPBUSY'. - */ -int -ffs_sync(mp, waitfor, context) - struct mount *mp; - int waitfor; - vfs_context_t context; -{ - struct vnode *nvp, *vp; - struct ufsmount *ump = VFSTOUFS(mp); - struct fs *fs; - struct timeval tv; - int error, allerror = 0; - struct ffs_sync_cargs args; - - fs = ump->um_fs; - if (fs->fs_fmod != 0 && fs->fs_ronly != 0) { /* XXX */ - printf("fs = %s\n", fs->fs_fsmnt); - panic("update: rofs mod"); - } - /* - * Write back each (modified) inode. - */ - args.context = context; - args.waitfor = waitfor; - args.error = 0; - /* - * ffs_sync_callback will be called for each vnode - * hung off of this mount point... the vnode will be - * properly referenced and unreferenced around the callback - */ - vnode_iterate(mp, 0, ffs_sync_callback, (void *)&args); - - if (args.error) - allerror = args.error; - - /* - * Force stale file system control information to be flushed. - */ - if (error = VNOP_FSYNC(ump->um_devvp, waitfor, context)) - allerror = error; -#if QUOTA - qsync(mp); -#endif - /* - * Write back modified superblock. - */ - if (fs->fs_fmod != 0) { - fs->fs_fmod = 0; - microtime(&tv); - fs->fs_time = tv.tv_sec; - if (error = ffs_sbupdate(ump, waitfor)) - allerror = error; - } - return (allerror); -} - -/* - * Look up a FFS dinode number to find its incore vnode, otherwise read it - * in from disk. If it is in core, wait for the lock bit to clear, then - * return the inode locked. Detection and handling of mount points must be - * done by the calling routine. - */ -int -ffs_vget(mp, ino, vpp, context) - mount_t mp; - ino64_t ino; - vnode_t *vpp; - vfs_context_t context; -{ - return(ffs_vget_internal(mp, (ino_t)ino, vpp, NULL, NULL, 0, 0)); -} - - -int -ffs_vget_internal(mp, ino, vpp, dvp, cnp, mode, fhwanted) - mount_t mp; - ino_t ino; - vnode_t *vpp; - vnode_t dvp; - struct componentname *cnp; - int mode; - int fhwanted; -{ - struct proc *p = current_proc(); /* XXX */ - struct fs *fs; - struct inode *ip; - struct ufsmount *ump; - struct buf *bp; - struct vnode *vp; - struct vnode_fsparam vfsp; - struct timeval tv; - enum vtype vtype; - dev_t dev; - int i, type, error = 0; - - *vpp = NULL; - ump = VFSTOUFS(mp); - dev = ump->um_dev; -#if 0 - /* Check for unmount in progress */ - if (mp->mnt_kern_flag & MNTK_UNMOUNT) { - return (EPERM); - } -#endif - /* - * Allocate a new inode... do it before we check the - * cache, because the MALLOC_ZONE may block - */ - type = M_FFSNODE; - MALLOC_ZONE(ip, struct inode *, sizeof(struct inode), type, M_WAITOK); - - /* - * check in the inode hash - */ - if ((*vpp = ufs_ihashget(dev, ino)) != NULL) { - /* - * found it... get rid of the allocation - * that we didn't need and return - * the 'found' vnode - */ - FREE_ZONE(ip, sizeof(struct inode), type); - vp = *vpp; - return (0); - } - bzero((caddr_t)ip, sizeof(struct inode)); - /* - * lock the inode - */ -// lockinit(&ip->i_lock, PINOD, "inode", 0, 0); -// lockmgr(&ip->i_lock, LK_EXCLUSIVE, (struct slock *)0, p); - - ip->i_fs = fs = ump->um_fs; - ip->i_dev = dev; - ip->i_number = ino; -#if QUOTA - for (i = 0; i < MAXQUOTAS; i++) - ip->i_dquot[i] = NODQUOT; -#endif - SET(ip->i_flag, IN_ALLOC); - /* - * Put it onto its hash chain locked so that other requests for - * this inode will block if they arrive while we are sleeping waiting - * for old data structures to be purged or for the contents of the - * disk portion of this inode to be read. - */ - ufs_ihashins(ip); - - /* Read in the disk contents for the inode, copy into the inode. */ - if (error = (int)buf_bread(ump->um_devvp, (daddr64_t)((unsigned)fsbtodb(fs, ino_to_fsba(fs, ino))), - (int)fs->fs_bsize, NOCRED, &bp)) { - buf_brelse(bp); - goto errout; - } -#if REV_ENDIAN_FS - if (mp->mnt_flag & MNT_REVEND) { - byte_swap_inode_in(((struct dinode *)buf_dataptr(bp) + ino_to_fsbo(fs, ino)),ip); - } else { - ip->i_din = *((struct dinode *)buf_dataptr(bp) + ino_to_fsbo(fs, ino)); - } -#else - ip->i_din = *((struct dinode *)buf_dataptr(bp) + ino_to_fsbo(fs, ino)); -#endif /* REV_ENDIAN_FS */ - buf_brelse(bp); - - if (mode == 0) - vtype = IFTOVT(ip->i_mode); - else - vtype = IFTOVT(mode); - - if (vtype == VNON) { - if (fhwanted) { - /* NFS is in play */ - error = ESTALE; - goto errout; - } else { - error = ENOENT; - goto errout; - } - } - - vfsp.vnfs_mp = mp; - vfsp.vnfs_vtype = vtype; - vfsp.vnfs_str = "ufs"; - vfsp.vnfs_dvp = dvp; - vfsp.vnfs_fsnode = ip; - vfsp.vnfs_cnp = cnp; - - if (mode == 0) - vfsp.vnfs_filesize = ip->i_din.di_size; - else - vfsp.vnfs_filesize = 0; - - if (vtype == VFIFO ) - vfsp.vnfs_vops = FFS_FIFOOPS; - else if (vtype == VBLK || vtype == VCHR) - vfsp.vnfs_vops = ffs_specop_p; - else - vfsp.vnfs_vops = ffs_vnodeop_p; - - if (vtype == VBLK || vtype == VCHR) - vfsp.vnfs_rdev = ip->i_rdev; - else - vfsp.vnfs_rdev = 0; - - if (dvp && cnp && (cnp->cn_flags & MAKEENTRY)) - vfsp.vnfs_flags = 0; - else - vfsp.vnfs_flags = VNFS_NOCACHE; - - /* - * Tag root directory - */ - vfsp.vnfs_markroot = (ip->i_number == ROOTINO); - vfsp.vnfs_marksystem = 0; - - if ((error = vnode_create(VNCREATE_FLAVOR, VCREATESIZE, &vfsp, &vp))) - goto errout; - - /* - * Finish inode initialization now that aliasing has been resolved. - */ - ip->i_devvp = ump->um_devvp; - ip->i_vnode = vp; - - vnode_ref(ip->i_devvp); - vnode_addfsref(vp); - vnode_settag(vp, VT_UFS); - - /* - * Initialize modrev times - */ - microtime(&tv); - SETHIGH(ip->i_modrev, tv.tv_sec); - SETLOW(ip->i_modrev, tv.tv_usec * 4294); - - /* - * Set up a generation number for this inode if it does not - * already have one. This should only happen on old filesystems. - */ - if (ip->i_gen == 0) { - if (++nextgennumber < (u_long)tv.tv_sec) - nextgennumber = tv.tv_sec; - ip->i_gen = nextgennumber; - if ((vp->v_mount->mnt_flag & MNT_RDONLY) == 0) - ip->i_flag |= IN_MODIFIED; - } - /* - * Ensure that uid and gid are correct. This is a temporary - * fix until fsck has been changed to do the update. - */ - if (fs->fs_inodefmt < FS_44INODEFMT) { /* XXX */ - ip->i_uid = ip->i_din.di_ouid; /* XXX */ - ip->i_gid = ip->i_din.di_ogid; /* XXX */ - } /* XXX */ - *vpp = vp; - - CLR(ip->i_flag, IN_ALLOC); - - if (ISSET(ip->i_flag, IN_WALLOC)) - wakeup(ip); - - return (0); - -errout: - ufs_ihashrem(ip); - - if (ISSET(ip->i_flag, IN_WALLOC)) - wakeup(ip); - FREE_ZONE(ip, sizeof(struct inode), type); - - return (error); -} - -/* - * File handle to vnode - * - * Have to be really careful about stale file handles: - * - check that the inode number is valid - * - call vget to get the locked inode - * - check for an unallocated inode (i_mode == 0) - */ -int -ffs_fhtovp(mp, fhlen, fhp, vpp, context) - register struct mount *mp; - int fhlen; - unsigned char *fhp; - struct vnode **vpp; - vfs_context_t context; -{ - register struct ufid *ufhp; - register struct inode *ip; - struct vnode *nvp; - struct fs *fs; - int error; - ino_t ino; - - if (fhlen < (int)sizeof(struct ufid)) - return (EINVAL); - ufhp = (struct ufid *)fhp; - fs = VFSTOUFS(mp)->um_fs; - ino = ntohl(ufhp->ufid_ino); - if (ino < ROOTINO || ino >= fs->fs_ncg * fs->fs_ipg) - return (ESTALE); - error = ffs_vget_internal(mp, ino, &nvp, NULL, NULL, 0, 1); - if (error) { - *vpp = NULLVP; - return (error); - } - ip = VTOI(nvp); - if (ip->i_mode == 0 || ip->i_gen != ntohl(ufhp->ufid_gen)) { - vnode_put(nvp); - *vpp = NULLVP; - return (ESTALE); - } - *vpp = nvp; - return (0); -} - -/* - * Vnode pointer to File handle - */ -/* ARGSUSED */ -int -ffs_vptofh(vp, fhlenp, fhp, context) - struct vnode *vp; - int *fhlenp; - unsigned char *fhp; - vfs_context_t context; -{ - register struct inode *ip; - register struct ufid *ufhp; - - if (*fhlenp < (int)sizeof(struct ufid)) - return (EOVERFLOW); - ip = VTOI(vp); - ufhp = (struct ufid *)fhp; - ufhp->ufid_ino = htonl(ip->i_number); - ufhp->ufid_gen = htonl(ip->i_gen); - *fhlenp = sizeof(struct ufid); - return (0); -} - -/* - * Initialize the filesystem; just use ufs_init. - */ -int -ffs_init(vfsp) - struct vfsconf *vfsp; -{ - - return (ufs_init(vfsp)); -} - -/* - * fast filesystem related variables. - */ -ffs_sysctl(int *name, u_int namelen, user_addr_t oldp, size_t *oldlenp, - user_addr_t newp, size_t newlen, vfs_context_t context) -{ - extern int doclusterread, doclusterwrite, doreallocblks, doasyncfree; - - /* all sysctl names at this level are terminal */ - if (namelen != 1) - return (ENOTDIR); /* overloaded */ - - switch (name[0]) { - case FFS_CLUSTERREAD: - return (sysctl_int(oldp, oldlenp, newp, newlen, - &doclusterread)); - case FFS_CLUSTERWRITE: - return (sysctl_int(oldp, oldlenp, newp, newlen, - &doclusterwrite)); - case FFS_REALLOCBLKS: - return (sysctl_int(oldp, oldlenp, newp, newlen, - &doreallocblks)); - case FFS_ASYNCFREE: - return (sysctl_int(oldp, oldlenp, newp, newlen, &doasyncfree)); - default: - return (ENOTSUP); - } - /* NOTREACHED */ -} - -/* - * Write a superblock and associated information back to disk. - */ -int -ffs_sbupdate(mp, waitfor) - struct ufsmount *mp; - int waitfor; -{ - register struct fs *dfs, *fs = mp->um_fs; - register struct buf *bp; - int blks; - void *space; - int i, size, error, allerror = 0; - int devBlockSize=0; -#if REV_ENDIAN_FS - int rev_endian=(mp->um_mountp->mnt_flag & MNT_REVEND); -#endif /* REV_ENDIAN_FS */ - - /* - * First write back the summary information. - */ - blks = howmany(fs->fs_cssize, fs->fs_fsize); - space = fs->fs_csp; - for (i = 0; i < blks; i += fs->fs_frag) { - size = fs->fs_bsize; - if (i + fs->fs_frag > blks) - size = (blks - i) * fs->fs_fsize; - bp = buf_getblk(mp->um_devvp, (daddr64_t)((unsigned)fsbtodb(fs, fs->fs_csaddr + i)), - size, 0, 0, BLK_META); - bcopy(space, (char *)buf_dataptr(bp), (u_int)size); -#if REV_ENDIAN_FS - if (rev_endian) { - byte_swap_ints((int *)buf_dataptr(bp), size / sizeof(int)); - } -#endif /* REV_ENDIAN_FS */ - space = (char *)space + size; - if (waitfor != MNT_WAIT) - buf_bawrite(bp); - else if (error = (int)buf_bwrite(bp)) - allerror = error; - } - /* - * Now write back the superblock itself. If any errors occurred - * up to this point, then fail so that the superblock avoids - * being written out as clean. - */ - if (allerror) - return (allerror); - devBlockSize = vfs_devblocksize(mp->um_mountp); - - bp = buf_getblk(mp->um_devvp, (daddr64_t)((unsigned)(SBOFF/devBlockSize)), (int)fs->fs_sbsize, 0, 0, BLK_META); - bcopy((caddr_t)fs, (char *)buf_dataptr(bp), (u_int)fs->fs_sbsize); - /* Restore compatibility to old file systems. XXX */ - dfs = (struct fs *)buf_dataptr(bp); /* XXX */ - if (fs->fs_postblformat == FS_42POSTBLFMT) /* XXX */ - dfs->fs_nrpos = -1; /* XXX */ -#if REV_ENDIAN_FS - /* - * Swapping bytes here ; so that in case - * of inode format < FS_44INODEFMT appropriate - * fields get moved - */ - if (rev_endian) { - byte_swap_sbout((struct fs *)buf_dataptr(bp)); - } -#endif /* REV_ENDIAN_FS */ - if (fs->fs_inodefmt < FS_44INODEFMT) { /* XXX */ - int32_t *lp, tmp; /* XXX */ - /* XXX */ - lp = (int32_t *)&dfs->fs_qbmask; /* XXX */ - tmp = lp[4]; /* XXX */ - for (i = 4; i > 0; i--) /* XXX */ - lp[i] = lp[i-1]; /* XXX */ - lp[0] = tmp; /* XXX */ - } /* XXX */ -#if REV_ENDIAN_FS - /* Note that dfs is already swapped so swap the filesize - * before writing - */ - if (rev_endian) { - dfs->fs_maxfilesize = OSSwapInt64(mp->um_savedmaxfilesize); /* XXX */ - } else { -#endif /* REV_ENDIAN_FS */ - dfs->fs_maxfilesize = mp->um_savedmaxfilesize; /* XXX */ -#if REV_ENDIAN_FS - } -#endif /* REV_ENDIAN_FS */ - if (waitfor != MNT_WAIT) - buf_bawrite(bp); - else if (error = (int)buf_bwrite(bp)) - allerror = error; - - return (allerror); -} diff --git a/bsd/ufs/ffs/ffs_vnops.c b/bsd/ufs/ffs/ffs_vnops.c deleted file mode 100644 index 9120f2831..000000000 --- a/bsd/ufs/ffs/ffs_vnops.c +++ /dev/null @@ -1,339 +0,0 @@ -/* - * Copyright (c) 2000-2002 Apple Computer, Inc. All rights reserved. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ - * - * This file contains Original Code and/or Modifications of Original Code - * as defined in and that are subject to the Apple Public Source License - * Version 2.0 (the 'License'). You may not use this file except in - * compliance with the License. The rights granted to you under the License - * may not be used to create, or enable the creation or redistribution of, - * unlawful or unlicensed copies of an Apple operating system, or to - * circumvent, violate, or enable the circumvention or violation of, any - * terms of an Apple operating system software license agreement. - * - * Please obtain a copy of the License at - * http://www.opensource.apple.com/apsl/ and read it before using this file. - * - * The Original Code and all software distributed under the License are - * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER - * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, - * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. - * Please see the License for the specific language governing rights and - * limitations under the License. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ - */ -/* Copyright (c) 1995 NeXT Computer, Inc. All Rights Reserved */ -/* - * Copyright (c) 1982, 1986, 1989, 1993 - * The Regents of the University of California. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. All advertising materials mentioning features or use of this software - * must display the following acknowledgement: - * This product includes software developed by the University of - * California, Berkeley and its contributors. - * 4. Neither the name of the University nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - * @(#)ffs_vnops.c 8.15 (Berkeley) 5/14/95 - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include - -#include -#include - -#include -#include -#include -#include -#include - -#include -#include -#if REV_ENDIAN_FS -#include -#endif /* REV_ENDIAN_FS */ - -#define VOPFUNC int (*)(void *) - -/* Global vfs data structures for ufs. */ -int (**ffs_vnodeop_p)(void *); -struct vnodeopv_entry_desc ffs_vnodeop_entries[] = { - { &vnop_default_desc, (VOPFUNC)vn_default_error }, - { &vnop_lookup_desc, (VOPFUNC)ufs_lookup }, /* lookup */ - { &vnop_create_desc, (VOPFUNC)ufs_create }, /* create */ - { &vnop_whiteout_desc, (VOPFUNC)ufs_whiteout }, /* whiteout */ - { &vnop_mknod_desc, (VOPFUNC)ufs_mknod }, /* mknod */ - { &vnop_open_desc, (VOPFUNC)ufs_open }, /* open */ - { &vnop_close_desc, (VOPFUNC)ufs_close }, /* close */ - { &vnop_getattr_desc, (VOPFUNC)ufs_getattr }, /* getattr */ - { &vnop_setattr_desc, (VOPFUNC)ufs_setattr }, /* setattr */ - { &vnop_read_desc, (VOPFUNC)ffs_read }, /* read */ - { &vnop_write_desc, (VOPFUNC)ffs_write }, /* write */ - { &vnop_ioctl_desc, (VOPFUNC)ufs_ioctl }, /* ioctl */ - { &vnop_select_desc, (VOPFUNC)ufs_select }, /* select */ - { &vnop_revoke_desc, (VOPFUNC)ufs_revoke }, /* revoke */ - { &vnop_mmap_desc, (VOPFUNC)ufs_mmap }, /* mmap */ - { &vnop_fsync_desc, (VOPFUNC)ffs_fsync }, /* fsync */ - { &vnop_remove_desc, (VOPFUNC)ufs_remove }, /* remove */ - { &vnop_link_desc, (VOPFUNC)ufs_link }, /* link */ - { &vnop_rename_desc, (VOPFUNC)ufs_rename }, /* rename */ - { &vnop_mkdir_desc, (VOPFUNC)ufs_mkdir }, /* mkdir */ - { &vnop_rmdir_desc, (VOPFUNC)ufs_rmdir }, /* rmdir */ - { &vnop_symlink_desc, (VOPFUNC)ufs_symlink }, /* symlink */ - { &vnop_readdir_desc, (VOPFUNC)ufs_readdir }, /* readdir */ - { &vnop_readlink_desc, (VOPFUNC)ufs_readlink }, /* readlink */ - { &vnop_inactive_desc, (VOPFUNC)ufs_inactive }, /* inactive */ - { &vnop_reclaim_desc, (VOPFUNC)ffs_reclaim }, /* reclaim */ - { &vnop_strategy_desc, (VOPFUNC)ufs_strategy }, /* strategy */ - { &vnop_pathconf_desc, (VOPFUNC)ufs_pathconf }, /* pathconf */ - { &vnop_advlock_desc, (VOPFUNC)err_advlock }, /* advlock */ - { &vnop_bwrite_desc, (VOPFUNC)vn_bwrite }, - { &vnop_pagein_desc, (VOPFUNC)ffs_pagein }, /* Pagein */ - { &vnop_pageout_desc, (VOPFUNC)ffs_pageout }, /* Pageout */ - { &vnop_copyfile_desc, (VOPFUNC)err_copyfile }, /* Copy File */ - { &vnop_blktooff_desc, (VOPFUNC)ffs_blktooff }, /* blktooff */ - { &vnop_offtoblk_desc, (VOPFUNC)ffs_offtoblk }, /* offtoblk */ - { &vnop_blockmap_desc, (VOPFUNC)ufs_blockmap }, /* blockmap */ - { &vnop_kqfilt_add_desc, (VOPFUNC)ufs_kqfilt_add }, /* kqfilt_add */ - { (struct vnodeop_desc*)NULL, (int(*)())NULL } -}; -struct vnodeopv_desc ffs_vnodeop_opv_desc = - { &ffs_vnodeop_p, ffs_vnodeop_entries }; - -int (**ffs_specop_p)(void *); -struct vnodeopv_entry_desc ffs_specop_entries[] = { - { &vnop_default_desc, (VOPFUNC)vn_default_error }, - { &vnop_lookup_desc, (VOPFUNC)spec_lookup }, /* lookup */ - { &vnop_create_desc, (VOPFUNC)spec_create }, /* create */ - { &vnop_mknod_desc, (VOPFUNC)spec_mknod }, /* mknod */ - { &vnop_open_desc, (VOPFUNC)spec_open }, /* open */ - { &vnop_close_desc, (VOPFUNC)ufsspec_close }, /* close */ - { &vnop_getattr_desc, (VOPFUNC)ufs_getattr }, /* getattr */ - { &vnop_setattr_desc, (VOPFUNC)ufs_setattr }, /* setattr */ - { &vnop_read_desc, (VOPFUNC)ufsspec_read }, /* read */ - { &vnop_write_desc, (VOPFUNC)ufsspec_write }, /* write */ - { &vnop_ioctl_desc, (VOPFUNC)spec_ioctl }, /* ioctl */ - { &vnop_select_desc, (VOPFUNC)spec_select }, /* select */ - { &vnop_revoke_desc, (VOPFUNC)spec_revoke }, /* revoke */ - { &vnop_mmap_desc, (VOPFUNC)spec_mmap }, /* mmap */ - { &vnop_fsync_desc, (VOPFUNC)ffs_fsync }, /* fsync */ - { &vnop_remove_desc, (VOPFUNC)spec_remove }, /* remove */ - { &vnop_link_desc, (VOPFUNC)spec_link }, /* link */ - { &vnop_rename_desc, (VOPFUNC)spec_rename }, /* rename */ - { &vnop_mkdir_desc, (VOPFUNC)spec_mkdir }, /* mkdir */ - { &vnop_rmdir_desc, (VOPFUNC)spec_rmdir }, /* rmdir */ - { &vnop_symlink_desc, (VOPFUNC)spec_symlink }, /* symlink */ - { &vnop_readdir_desc, (VOPFUNC)spec_readdir }, /* readdir */ - { &vnop_readlink_desc, (VOPFUNC)spec_readlink }, /* readlink */ - { &vnop_inactive_desc, (VOPFUNC)ufs_inactive }, /* inactive */ - { &vnop_reclaim_desc, (VOPFUNC)ffs_reclaim }, /* reclaim */ - { &vnop_strategy_desc, (VOPFUNC)spec_strategy }, /* strategy */ - { &vnop_pathconf_desc, (VOPFUNC)spec_pathconf }, /* pathconf */ - { &vnop_advlock_desc, (VOPFUNC)err_advlock }, /* advlock */ - { &vnop_bwrite_desc, (VOPFUNC)vn_bwrite }, - { &vnop_pagein_desc, (VOPFUNC)ffs_pagein }, /* Pagein */ - { &vnop_pageout_desc, (VOPFUNC)ffs_pageout }, /* Pageout */ - { &vnop_copyfile_desc, (VOPFUNC)err_copyfile }, /* Copy File */ - { &vnop_blktooff_desc, (VOPFUNC)ffs_blktooff }, /* blktooff */ - { &vnop_offtoblk_desc, (VOPFUNC)ffs_offtoblk }, /* offtoblk */ - { &vnop_blockmap_desc, (VOPFUNC)spec_blockmap }, /* blockmap */ - { (struct vnodeop_desc*)NULL, (int(*)())NULL } -}; -struct vnodeopv_desc ffs_specop_opv_desc = - { &ffs_specop_p, ffs_specop_entries }; - -#if FIFO -int (**ffs_fifoop_p)(void *); -struct vnodeopv_entry_desc ffs_fifoop_entries[] = { - { &vnop_default_desc, (VOPFUNC)vn_default_error }, - { &vnop_lookup_desc, (VOPFUNC)fifo_lookup }, /* lookup */ - { &vnop_create_desc, (VOPFUNC)fifo_create }, /* create */ - { &vnop_mknod_desc, (VOPFUNC)fifo_mknod }, /* mknod */ - { &vnop_open_desc, (VOPFUNC)fifo_open }, /* open */ - { &vnop_close_desc, (VOPFUNC)ufsfifo_close }, /* close */ - { &vnop_getattr_desc, (VOPFUNC)ufs_getattr }, /* getattr */ - { &vnop_setattr_desc, (VOPFUNC)ufs_setattr }, /* setattr */ - { &vnop_read_desc, (VOPFUNC)ufsfifo_read }, /* read */ - { &vnop_write_desc, (VOPFUNC)ufsfifo_write }, /* write */ - { &vnop_ioctl_desc, (VOPFUNC)fifo_ioctl }, /* ioctl */ - { &vnop_select_desc, (VOPFUNC)fifo_select }, /* select */ - { &vnop_revoke_desc, (VOPFUNC)fifo_revoke }, /* revoke */ - { &vnop_mmap_desc, (VOPFUNC)fifo_mmap }, /* mmap */ - { &vnop_fsync_desc, (VOPFUNC)ffs_fsync }, /* fsync */ - { &vnop_remove_desc, (VOPFUNC)fifo_remove }, /* remove */ - { &vnop_link_desc, (VOPFUNC)fifo_link }, /* link */ - { &vnop_rename_desc, (VOPFUNC)fifo_rename }, /* rename */ - { &vnop_mkdir_desc, (VOPFUNC)fifo_mkdir }, /* mkdir */ - { &vnop_rmdir_desc, (VOPFUNC)fifo_rmdir }, /* rmdir */ - { &vnop_symlink_desc, (VOPFUNC)fifo_symlink }, /* symlink */ - { &vnop_readdir_desc, (VOPFUNC)fifo_readdir }, /* readdir */ - { &vnop_readlink_desc, (VOPFUNC)fifo_readlink }, /* readlink */ - { &vnop_inactive_desc, (VOPFUNC)ufs_inactive }, /* inactive */ - { &vnop_reclaim_desc, (VOPFUNC)ffs_reclaim }, /* reclaim */ - { &vnop_strategy_desc, (VOPFUNC)fifo_strategy }, /* strategy */ - { &vnop_pathconf_desc, (VOPFUNC)fifo_pathconf }, /* pathconf */ - { &vnop_advlock_desc, (VOPFUNC)err_advlock }, /* advlock */ - { &vnop_bwrite_desc, (VOPFUNC)vn_bwrite }, - { &vnop_pagein_desc, (VOPFUNC)ffs_pagein }, /* Pagein */ - { &vnop_pageout_desc, (VOPFUNC)ffs_pageout }, /* Pageout */ - { &vnop_copyfile_desc, (VOPFUNC)err_copyfile }, /* Copy File */ - { &vnop_blktooff_desc, (VOPFUNC)ffs_blktooff }, /* blktooff */ - { &vnop_offtoblk_desc, (VOPFUNC)ffs_offtoblk }, /* offtoblk */ - { &vnop_blockmap_desc, (VOPFUNC)ufs_blockmap }, /* blockmap */ - { &vnop_kqfilt_add_desc, (VOPFUNC)ufsfifo_kqfilt_add }, /* kqfilt_add */ - { (struct vnodeop_desc*)NULL, (int(*)())NULL } -}; -struct vnodeopv_desc ffs_fifoop_opv_desc = - { &ffs_fifoop_p, ffs_fifoop_entries }; -#endif /* FIFO */ - -/* - * Enabling cluster read/write operations. - */ -int doclusterread = 0; -int doclusterwrite = 0; - -#include - -/* - * Synch an open file. - */ -/* ARGSUSED */ -int -ffs_fsync(ap) - struct vnop_fsync_args /* { - struct vnode *a_vp; - int a_waitfor; - vfs_context_t a_context; - } */ *ap; -{ - return(ffs_fsync_internal(ap->a_vp, ap->a_waitfor)); -} - - -int -ffs_fsync_internal(vnode_t vp, int waitfor) -{ - struct timeval tv; - int wait = (waitfor == MNT_WAIT); - - /* - * Write out any clusters. - */ - cluster_push(vp, 0); - - /* - * Flush all dirty buffers associated with a vnode. - */ - buf_flushdirtyblks(vp, wait, 0, "ffs_fsync"); - microtime(&tv); - - return (ffs_update(vp, &tv, &tv, wait)); -} - -/* - * Reclaim an inode so that it can be used for other purposes. - */ -int -ffs_reclaim(ap) - struct vnop_reclaim_args /* { - struct vnode *a_vp; - vfs_context_t a_context; - } */ *ap; -{ - register struct vnode *vp = ap->a_vp; - int error; - - if ( (error = ufs_reclaim(vp, vfs_context_proc(ap->a_context))) ) - return (error); - - FREE_ZONE(vnode_fsnode(vp), sizeof (struct inode), M_FFSNODE); - - vnode_clearfsnode(vp); - - return (0); -} - -/* Blktooff converts a logical block number to a file offset */ -int -ffs_blktooff(ap) - struct vnop_blktooff_args /* { - struct vnode *a_vp; - daddr64_t a_lblkno; - off_t *a_offset; - } */ *ap; -{ - register struct inode *ip; - register FS *fs; - - if (ap->a_vp == NULL) - return (EINVAL); - - fs = VTOI(ap->a_vp)->I_FS; - - *ap->a_offset = (off_t)lblktosize(fs, ap->a_lblkno); - - return (0); -} - -/* Blktooff converts a logical block number to a file offset */ -int -ffs_offtoblk(ap) - struct vnop_offtoblk_args /* { - struct vnode *a_vp; - off_t a_offset; - daddr64_t *a_lblkno; - } */ *ap; -{ - register FS *fs; - - if (ap->a_vp == NULL) - return (EINVAL); - - fs = VTOI(ap->a_vp)->I_FS; - - *ap->a_lblkno = (daddr64_t)lblkno(fs, ap->a_offset); - - return (0); -} diff --git a/bsd/ufs/ffs/fs.h b/bsd/ufs/ffs/fs.h deleted file mode 100644 index 3b9afc7b9..000000000 --- a/bsd/ufs/ffs/fs.h +++ /dev/null @@ -1,593 +0,0 @@ -/* - * Copyright (c) 2000-2003 Apple Computer, Inc. All rights reserved. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ - * - * This file contains Original Code and/or Modifications of Original Code - * as defined in and that are subject to the Apple Public Source License - * Version 2.0 (the 'License'). You may not use this file except in - * compliance with the License. The rights granted to you under the License - * may not be used to create, or enable the creation or redistribution of, - * unlawful or unlicensed copies of an Apple operating system, or to - * circumvent, violate, or enable the circumvention or violation of, any - * terms of an Apple operating system software license agreement. - * - * Please obtain a copy of the License at - * http://www.opensource.apple.com/apsl/ and read it before using this file. - * - * The Original Code and all software distributed under the License are - * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER - * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, - * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. - * Please see the License for the specific language governing rights and - * limitations under the License. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ - */ -/* Copyright (c) 1995 NeXT Computer, Inc. All Rights Reserved */ -/* - * Copyright (c) 1982, 1986, 1993 - * The Regents of the University of California. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. All advertising materials mentioning features or use of this software - * must display the following acknowledgement: - * This product includes software developed by the University of - * California, Berkeley and its contributors. - * 4. Neither the name of the University nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - * @(#)fs.h 8.13 (Berkeley) 3/21/95 - */ -#ifndef _FFS_FS_H_ -#define _FFS_FS_H_ - -#include - -#ifdef __APPLE_API_UNSTABLE -/* - * Each disk drive contains some number of file systems. - * A file system consists of a number of cylinder groups. - * Each cylinder group has inodes and data. - * - * A file system is described by its super-block, which in turn - * describes the cylinder groups. The super-block is critical - * data and is replicated in each cylinder group to protect against - * catastrophic loss. This is done at `newfs' time and the critical - * super-block data does not change, so the copies need not be - * referenced further unless disaster strikes. - * - * For file system fs, the offsets of the various blocks of interest - * are given in the super block as: - * [fs->fs_sblkno] Super-block - * [fs->fs_cblkno] Cylinder group block - * [fs->fs_iblkno] Inode blocks - * [fs->fs_dblkno] Data blocks - * The beginning of cylinder group cg in fs, is given by - * the ``cgbase(fs, cg)'' macro. - * - * The first boot and super blocks are given in absolute disk addresses. - * The byte-offset forms are preferred, as they don't imply a sector size. - */ -#define BBSIZE 8192 -#define SBSIZE 8192 -#define BBOFF ((off_t)(0)) -#define SBOFF ((off_t)(BBOFF + BBSIZE)) -#define BBLOCK ((ufs_daddr_t)(0)) -#define SBLOCK ((ufs_daddr_t)(BBLOCK + BBSIZE / DEV_BSIZE)) - -/* - * Addresses stored in inodes are capable of addressing fragments - * of `blocks'. File system blocks of at most size MAXBSIZE can - * be optionally broken into 2, 4, or 8 pieces, each of which is - * addressible; these pieces may be DEV_BSIZE, or some multiple of - * a DEV_BSIZE unit. - * - * Large files consist of exclusively large data blocks. To avoid - * undue wasted disk space, the last data block of a small file may be - * allocated as only as many fragments of a large block as are - * necessary. The file system format retains only a single pointer - * to such a fragment, which is a piece of a single large block that - * has been divided. The size of such a fragment is determinable from - * information in the inode, using the ``blksize(fs, ip, lbn)'' macro. - * - * The file system records space availability at the fragment level; - * to determine block availability, aligned fragments are examined. - */ - -/* - * MINBSIZE is the smallest allowable block size. - * In order to insure that it is possible to create files of size - * 2^32 with only two levels of indirection, MINBSIZE is set to 4096. - * MINBSIZE must be big enough to hold a cylinder group block, - * thus changes to (struct cg) must keep its size within MINBSIZE. - * Note that super blocks are always of size SBSIZE, - * and that both SBSIZE and MAXBSIZE must be >= MINBSIZE. - */ -#define MINBSIZE 4096 - -/* - * The path name on which the file system is mounted is maintained - * in fs_fsmnt. MAXMNTLEN defines the amount of space allocated in - * the super block for this name. - */ -#define MAXMNTLEN 512 - -/* - * The limit on the amount of summary information per file system - * is defined by MAXCSBUFS. It is currently parameterized for a - * size of 128 bytes (2 million cylinder groups on machines with - * 32-bit pointers, and 1 million on 64-bit machines). One pointer - * is taken away to point to an array of cluster sizes that is - * computed as cylinder groups are inspected. - * There is a 128-byte region in the superblock reserved for in-core - * pointers to summary information. Originally this included an array - * of pointers to blocks of struct csum; now there are just three - * pointers and the remaining space is padded with fs_ocsp[]. - * - * NOCSPTRS determines the size of this padding. One pointer (fs_csp) - * is taken away to point to a contiguous array of struct csum for - * all cylinder groups; a second (fs_maxcluster) points to an array - * of cluster sizes that is computed as cylinder groups are inspected, - * and the third points to an array that tracks the creation of new - * directories. - */ -#define NOCSPTRS ((128 / sizeof(void *)) - 3) - -/* - * A summary of contiguous blocks of various sizes is maintained - * in each cylinder group. Normally this is set by the initial - * value of fs_maxcontig. To conserve space, a maximum summary size - * is set by FS_MAXCONTIG. - */ -#define FS_MAXCONTIG 16 - -/* - * MINFREE gives the minimum acceptable percentage of file system - * blocks which may be free. If the freelist drops below this level - * only the superuser may continue to allocate blocks. This may - * be set to 0 if no reserve of free blocks is deemed necessary, - * however throughput drops by fifty percent if the file system - * is run at between 95% and 100% full; thus the minimum default - * value of fs_minfree is 5%. However, to get good clustering - * performance, 10% is a better choice. hence we use 10% as our - * default value. With 10% free space, fragmentation is not a - * problem, so we choose to optimize for time. - */ -#define MINFREE 5 -#define DEFAULTOPT FS_OPTTIME - -/* Grigoriy Orlov has done some extensive work to fine - * tune the layout preferences for directories within a filesystem. - * His algorithm can be tuned by adjusting the following parameters - * which tell the system the average file size and the average number - * of files per directory. These defaults are well selected for typical - * filesystems, but may need to be tuned for odd cases like filesystems - * being used for squid caches or news spools. - */ -#define AVFILESIZ 16384 -#define AFPDIR 64 - -/* - * Per cylinder group information; summarized in blocks allocated - * from first cylinder group data blocks. These blocks have to be - * read in from fs_csaddr (size fs_cssize) in addition to the - * super block. - */ -struct csum { - int32_t cs_ndir; /* number of directories */ - int32_t cs_nbfree; /* number of free blocks */ - int32_t cs_nifree; /* number of free inodes */ - int32_t cs_nffree; /* number of free frags */ -}; - -/* - * Super block for an FFS file system. - */ -struct fs { - int32_t fs_firstfield; /* historic file system linked list, */ - int32_t fs_unused_1; /* used for incore super blocks */ - ufs_daddr_t fs_sblkno; /* addr of super-block in filesys */ - ufs_daddr_t fs_cblkno; /* offset of cyl-block in filesys */ - ufs_daddr_t fs_iblkno; /* offset of inode-blocks in filesys */ - ufs_daddr_t fs_dblkno; /* offset of first data after cg */ - int32_t fs_cgoffset; /* cylinder group offset in cylinder */ - int32_t fs_cgmask; /* used to calc mod fs_ntrak */ - time_t fs_time; /* last time written */ - int32_t fs_size; /* number of blocks in fs */ - int32_t fs_dsize; /* number of data blocks in fs */ - int32_t fs_ncg; /* number of cylinder groups */ - int32_t fs_bsize; /* size of basic blocks in fs */ - int32_t fs_fsize; /* size of frag blocks in fs */ - int32_t fs_frag; /* number of frags in a block in fs */ -/* these are configuration parameters */ - int32_t fs_minfree; /* minimum percentage of free blocks */ - int32_t fs_rotdelay; /* num of ms for optimal next block */ - int32_t fs_rps; /* disk revolutions per second */ -/* these fields can be computed from the others */ - int32_t fs_bmask; /* ``blkoff'' calc of blk offsets */ - int32_t fs_fmask; /* ``fragoff'' calc of frag offsets */ - int32_t fs_bshift; /* ``lblkno'' calc of logical blkno */ - int32_t fs_fshift; /* ``numfrags'' calc number of frags */ -/* these are configuration parameters */ - int32_t fs_maxcontig; /* max number of contiguous blks */ - int32_t fs_maxbpg; /* max number of blks per cyl group */ -/* these fields can be computed from the others */ - int32_t fs_fragshift; /* block to frag shift */ - int32_t fs_fsbtodb; /* fsbtodb and dbtofsb shift constant */ - int32_t fs_sbsize; /* actual size of super block */ - int32_t fs_csmask; /* csum block offset (now unused) */ - int32_t fs_csshift; /* csum block number (now unused) */ - int32_t fs_nindir; /* value of NINDIR */ - int32_t fs_inopb; /* value of INOPB */ - int32_t fs_nspf; /* value of NSPF */ -/* yet another configuration parameter */ - int32_t fs_optim; /* optimization preference, see below */ -/* these fields are derived from the hardware */ - int32_t fs_npsect; /* # sectors/track including spares */ - int32_t fs_interleave; /* hardware sector interleave */ - int32_t fs_trackskew; /* sector 0 skew, per track */ - int32_t fs_headswitch; /* head switch time, usec */ - int32_t fs_trkseek; /* track-to-track seek, usec */ -/* sizes determined by number of cylinder groups and their sizes */ - ufs_daddr_t fs_csaddr; /* blk addr of cyl grp summary area */ - int32_t fs_cssize; /* size of cyl grp summary area */ - int32_t fs_cgsize; /* cylinder group size */ -/* these fields are derived from the hardware */ - int32_t fs_ntrak; /* tracks per cylinder */ - int32_t fs_nsect; /* sectors per track */ - int32_t fs_spc; /* sectors per cylinder */ -/* this comes from the disk driver partitioning */ - int32_t fs_ncyl; /* cylinders in file system */ -/* these fields can be computed from the others */ - int32_t fs_cpg; /* cylinders per group */ - int32_t fs_ipg; /* inodes per group */ - int32_t fs_fpg; /* blocks per group * fs_frag */ -/* this data must be re-computed after crashes */ - struct csum fs_cstotal; /* cylinder summary information */ -/* these fields are cleared at mount time */ - int8_t fs_fmod; /* super block modified flag */ - int8_t fs_clean; /* file system is clean flag */ - int8_t fs_ronly; /* mounted read-only flag */ - int8_t fs_flags; /* currently unused flag */ - u_char fs_fsmnt[MAXMNTLEN]; /* name mounted on */ -/* these fields retain the current block allocation info */ - int32_t fs_cgrotor; /* last cg searched */ - void *fs_ocsp[NOCSPTRS]; /* list of fs_cs info buffers */ - u_int8_t *fs_contigdirs; /* # of contiguously allocated dirs */ - struct csum *fs_csp; /* list of fs_cs info buffers */ - int32_t *fs_maxcluster; /* max cluster in each cyl group */ - int32_t fs_cpc; /* cyl per cycle in postbl */ - int16_t fs_opostbl[16][8]; /* old rotation block list head */ - int32_t fs_avgfilesize; /* expected average file size */ - int32_t fs_avgfpdir; /* expected # of files per directory */ - int32_t fs_sparecon[48]; /* reserved for future constants */ - int32_t fs_contigsumsize; /* size of cluster summary array */ - int32_t fs_maxsymlinklen; /* max length of an internal symlink */ - int32_t fs_inodefmt; /* format of on-disk inodes */ - u_int64_t fs_maxfilesize; /* maximum representable file size */ - int64_t fs_qbmask; /* ~fs_bmask for use with 64-bit size */ - int64_t fs_qfmask; /* ~fs_fmask for use with 64-bit size */ - int32_t fs_state; /* validate fs_clean field */ - int32_t fs_postblformat; /* format of positional layout tables */ - int32_t fs_nrpos; /* number of rotational positions */ - int32_t fs_postbloff; /* (u_int16) rotation block list head */ - int32_t fs_rotbloff; /* (u_int8) blocks for each rotation */ - int32_t fs_magic; /* magic number */ - u_int8_t fs_space[1]; /* list of blocks for each rotation */ -/* actually longer */ -}; - -/* - * Filesystem identification - */ -#define FS_MAGIC 0x011954 /* the fast filesystem magic number */ -#define FS_OKAY 0x7c269d38 /* superblock checksum */ -#define FS_42INODEFMT -1 /* 4.2BSD inode format */ -#define FS_44INODEFMT 2 /* 4.4BSD inode format */ -/* - * Preference for optimization. - */ -#define FS_OPTTIME 0 /* minimize allocation time */ -#define FS_OPTSPACE 1 /* minimize disk fragmentation */ - -/* - * Rotational layout table format types - */ -#define FS_42POSTBLFMT -1 /* 4.2BSD rotational table format */ -#define FS_DYNAMICPOSTBLFMT 1 /* dynamic rotational table format */ -/* - * Macros for access to superblock array structures - */ -#define fs_postbl(fs, cylno) \ - (((fs)->fs_postblformat == FS_42POSTBLFMT) \ - ? ((fs)->fs_opostbl[cylno]) \ - : ((int16_t *)((u_int8_t *)(fs) + \ - (fs)->fs_postbloff) + (cylno) * (fs)->fs_nrpos)) -#define fs_rotbl(fs) \ - (((fs)->fs_postblformat == FS_42POSTBLFMT) \ - ? ((fs)->fs_space) \ - : ((u_int8_t *)((u_int8_t *)(fs) + (fs)->fs_rotbloff))) - -/* - * The size of a cylinder group is calculated by CGSIZE. The maximum size - * is limited by the fact that cylinder groups are at most one block. - * Its size is derived from the size of the maps maintained in the - * cylinder group and the (struct cg) size. - */ -#define CGSIZE(fs) \ - /* base cg */ (sizeof(struct cg) + sizeof(int32_t) + \ - /* blktot size */ (fs)->fs_cpg * sizeof(int32_t) + \ - /* blks size */ (fs)->fs_cpg * (fs)->fs_nrpos * sizeof(int16_t) + \ - /* inode map */ howmany((fs)->fs_ipg, NBBY) + \ - /* block map */ howmany((fs)->fs_cpg * (fs)->fs_spc / NSPF(fs), NBBY) +\ - /* if present */ ((fs)->fs_contigsumsize <= 0 ? 0 : \ - /* cluster sum */ (fs)->fs_contigsumsize * sizeof(int32_t) + \ - /* cluster map */ howmany((fs)->fs_cpg * (fs)->fs_spc / NSPB(fs), NBBY))) - -/* - * Convert cylinder group to base address of its global summary info. - * - * N.B. This macro assumes that sizeof(struct csum) is a power of two. - */ -#define fs_cs(fs, indx) fs_csp[indx] - -/* - * Cylinder group block for a file system. - */ -#define CG_MAGIC 0x090255 -struct cg { - int32_t cg_firstfield; /* historic cyl groups linked list */ - int32_t cg_magic; /* magic number */ - time_t cg_time; /* time last written */ - int32_t cg_cgx; /* we are the cgx'th cylinder group */ - int16_t cg_ncyl; /* number of cyl's this cg */ - int16_t cg_niblk; /* number of inode blocks this cg */ - int32_t cg_ndblk; /* number of data blocks this cg */ - struct csum cg_cs; /* cylinder summary information */ - int32_t cg_rotor; /* position of last used block */ - int32_t cg_frotor; /* position of last used frag */ - int32_t cg_irotor; /* position of last used inode */ - int32_t cg_frsum[MAXFRAG]; /* counts of available frags */ - int32_t cg_btotoff; /* (int32) block totals per cylinder */ - int32_t cg_boff; /* (u_int16) free block positions */ - int32_t cg_iusedoff; /* (u_int8) used inode map */ - int32_t cg_freeoff; /* (u_int8) free block map */ - int32_t cg_nextfreeoff; /* (u_int8) next available space */ - int32_t cg_clustersumoff; /* (u_int32) counts of avail clusters */ - int32_t cg_clusteroff; /* (u_int8) free cluster map */ - int32_t cg_nclusterblks; /* number of clusters this cg */ - int32_t cg_sparecon[13]; /* reserved for future use */ - u_int8_t cg_space[1]; /* space for cylinder group maps */ -/* actually longer */ -}; - -/* - * Macros for access to cylinder group array structures - */ -#define cg_blktot(cgp) \ - (((cgp)->cg_magic != CG_MAGIC) \ - ? (((struct ocg *)(cgp))->cg_btot) \ - : ((int32_t *)((u_int8_t *)(cgp) + (cgp)->cg_btotoff))) -#define cg_blks(fs, cgp, cylno) \ - (((cgp)->cg_magic != CG_MAGIC) \ - ? (((struct ocg *)(cgp))->cg_b[cylno]) \ - : ((int16_t *)((u_int8_t *)(cgp) + \ - (cgp)->cg_boff) + (cylno) * (fs)->fs_nrpos)) -#define cg_inosused(cgp) \ - (((cgp)->cg_magic != CG_MAGIC) \ - ? (((struct ocg *)(cgp))->cg_iused) \ - : ((u_int8_t *)((u_int8_t *)(cgp) + (cgp)->cg_iusedoff))) -#define cg_blksfree(cgp) \ - (((cgp)->cg_magic != CG_MAGIC) \ - ? (((struct ocg *)(cgp))->cg_free) \ - : ((u_int8_t *)((u_int8_t *)(cgp) + (cgp)->cg_freeoff))) -#define cg_chkmagic(cgp) \ - ((cgp)->cg_magic == CG_MAGIC || ((struct ocg *)(cgp))->cg_magic == CG_MAGIC) -#define cg_clustersfree(cgp) \ - ((u_int8_t *)((u_int8_t *)(cgp) + (cgp)->cg_clusteroff)) -#define cg_clustersum(cgp) \ - ((int32_t *)((u_int8_t *)(cgp) + (cgp)->cg_clustersumoff)) - -/* - * The following structure is defined - * for compatibility with old file systems. - */ -struct ocg { - int32_t cg_firstfield; /* historic linked list of cyl groups */ - int32_t cg_unused_1; /* used for incore cyl groups */ - time_t cg_time; /* time last written */ - int32_t cg_cgx; /* we are the cgx'th cylinder group */ - int16_t cg_ncyl; /* number of cyl's this cg */ - int16_t cg_niblk; /* number of inode blocks this cg */ - int32_t cg_ndblk; /* number of data blocks this cg */ - struct csum cg_cs; /* cylinder summary information */ - int32_t cg_rotor; /* position of last used block */ - int32_t cg_frotor; /* position of last used frag */ - int32_t cg_irotor; /* position of last used inode */ - int32_t cg_frsum[8]; /* counts of available frags */ - int32_t cg_btot[32]; /* block totals per cylinder */ - int16_t cg_b[32][8]; /* positions of free blocks */ - u_int8_t cg_iused[256]; /* used inode map */ - int32_t cg_magic; /* magic number */ - u_int8_t cg_free[1]; /* free block map */ -/* actually longer */ -}; - -/* - * Turn file system block numbers into disk block addresses. - * This maps file system blocks to device size blocks. - */ -#define fsbtodb(fs, b) ((b) << (fs)->fs_fsbtodb) -#define dbtofsb(fs, b) ((b) >> (fs)->fs_fsbtodb) - -/* - * Cylinder group macros to locate things in cylinder groups. - * They calc file system addresses of cylinder group data structures. - */ -#define cgbase(fs, c) ((ufs_daddr_t)((fs)->fs_fpg * (c))) -#define cgdmin(fs, c) (cgstart(fs, c) + (fs)->fs_dblkno) /* 1st data */ -#define cgimin(fs, c) (cgstart(fs, c) + (fs)->fs_iblkno) /* inode blk */ -#define cgsblock(fs, c) (cgstart(fs, c) + (fs)->fs_sblkno) /* super blk */ -#define cgtod(fs, c) (cgstart(fs, c) + (fs)->fs_cblkno) /* cg block */ -#define cgstart(fs, c) \ - (cgbase(fs, c) + (fs)->fs_cgoffset * ((c) & ~((fs)->fs_cgmask))) - -/* - * Macros for handling inode numbers: - * inode number to file system block offset. - * inode number to cylinder group number. - * inode number to file system block address. - */ -#define ino_to_cg(fs, x) ((x) / (fs)->fs_ipg) -#define ino_to_fsba(fs, x) \ - ((ufs_daddr_t)(cgimin(fs, ino_to_cg(fs, x)) + \ - (blkstofrags((fs), (((x) % (fs)->fs_ipg) / INOPB(fs)))))) -#define ino_to_fsbo(fs, x) ((x) % INOPB(fs)) - -/* - * Give cylinder group number for a file system block. - * Give cylinder group block number for a file system block. - */ -#define dtog(fs, d) ((d) / (fs)->fs_fpg) -#define dtogd(fs, d) ((d) % (fs)->fs_fpg) - -/* - * Extract the bits for a block from a map. - * Compute the cylinder and rotational position of a cyl block addr. - */ -#define blkmap(fs, map, loc) \ - (((map)[(loc) / NBBY] >> ((loc) % NBBY)) & (0xff >> (NBBY - (fs)->fs_frag))) -#define cbtocylno(fs, bno) \ - ((bno) * NSPF(fs) / (fs)->fs_spc) -#define cbtorpos(fs, bno) \ - (((bno) * NSPF(fs) % (fs)->fs_spc / (fs)->fs_nsect * (fs)->fs_trackskew + \ - (bno) * NSPF(fs) % (fs)->fs_spc % (fs)->fs_nsect * (fs)->fs_interleave) % \ - (fs)->fs_nsect * (fs)->fs_nrpos / (fs)->fs_npsect) - -/* - * The following macros optimize certain frequently calculated - * quantities by using shifts and masks in place of divisions - * modulos and multiplications. - */ -#define blkoff(fs, loc) /* calculates (loc % fs->fs_bsize) */ \ - ((loc) & (fs)->fs_qbmask) -#define fragoff(fs, loc) /* calculates (loc % fs->fs_fsize) */ \ - ((loc) & (fs)->fs_qfmask) -#define lblktosize(fs, blk) /* calculates (blk * fs->fs_bsize) */ \ - ((blk) << (fs)->fs_bshift) -#define lblkno(fs, loc) /* calculates (loc / fs->fs_bsize) */ \ - ((loc) >> (fs)->fs_bshift) -#define numfrags(fs, loc) /* calculates (loc / fs->fs_fsize) */ \ - ((loc) >> (fs)->fs_fshift) -#define blkroundup(fs, size) /* calculates roundup(size, fs->fs_bsize) */ \ - (((size) + (fs)->fs_qbmask) & (fs)->fs_bmask) -#define fragroundup(fs, size) /* calculates roundup(size, fs->fs_fsize) */ \ - (((size) + (fs)->fs_qfmask) & (fs)->fs_fmask) -#define fragstoblks(fs, frags) /* calculates (frags / fs->fs_frag) */ \ - ((frags) >> (fs)->fs_fragshift) -#define blkstofrags(fs, blks) /* calculates (blks * fs->fs_frag) */ \ - ((blks) << (fs)->fs_fragshift) -#define fragnum(fs, fsb) /* calculates (fsb % fs->fs_frag) */ \ - ((fsb) & ((fs)->fs_frag - 1)) -#define blknum(fs, fsb) /* calculates rounddown(fsb, fs->fs_frag) */ \ - ((fsb) &~ ((fs)->fs_frag - 1)) - -/* - * Determine the number of available frags given a - * percentage to hold in reserve. - */ -#define freespace(fs, percentreserved) \ - (blkstofrags((fs), (fs)->fs_cstotal.cs_nbfree) + \ - (fs)->fs_cstotal.cs_nffree - ((fs)->fs_dsize * (percentreserved) / 100)) - -/* - * Determining the size of a file block in the file system. - */ -#define blksize(fs, ip, lbn) \ - (((lbn) >= NDADDR || (ip)->i_size >= ((lbn) + 1) << (fs)->fs_bshift) \ - ? (fs)->fs_bsize \ - : (fragroundup(fs, blkoff(fs, (ip)->i_size)))) -#define dblksize(fs, dip, lbn) \ - (((lbn) >= NDADDR || (dip)->di_size >= ((lbn) + 1) << (fs)->fs_bshift) \ - ? (fs)->fs_bsize \ - : (fragroundup(fs, blkoff(fs, (dip)->di_size)))) - -/* - * Number of disk sectors per block/fragment; assumes DEV_BSIZE byte - * sector size. - */ -#define NSPB(fs) ((fs)->fs_nspf << (fs)->fs_fragshift) -#define NSPF(fs) ((fs)->fs_nspf) - -/* - * Number of inodes in a secondary storage block/fragment. - */ -#define INOPB(fs) ((fs)->fs_inopb) -#define INOPF(fs) ((fs)->fs_inopb >> (fs)->fs_fragshift) - -/* - * Number of indirects in a file system block. - */ -#define NINDIR(fs) ((fs)->fs_nindir) - -/* - * This macro controls whether the file system format is byte swapped or not. - * At NeXT, all little endian machines read and write big endian file systems. - */ -#define BIG_ENDIAN_FS (__LITTLE_ENDIAN__) - -#ifdef __APPLE_API_PRIVATE -extern int inside[], around[]; -extern u_char *fragtbl[]; -#endif /* __APPLE_API_PRIVATE */ - - -/* - * UFS Label: - * The purpose of this label is to name a UFS/FFS filesystem. The label - * is located at offset 7K (BBSIZE=8K - UFS_LABEL_SIZE=1K = 7K) of the - * partition. The first 7K is still available for boot blocks. - */ - -#define UFS_LABEL_MAGIC { 'L', 'A', 'B', 'L' } -#define UFS_LABEL_SIZE 1024 -#define UFS_LABEL_OFFSET (BBSIZE - UFS_LABEL_SIZE) /* top 1K */ -#define UFS_LABEL_VERSION 1 -#define UFS_MAX_LABEL_NAME 512 - -struct ufslabel { - u_int32_t ul_magic; - u_int16_t ul_checksum; /* checksum over entire label*/ - u_int32_t ul_version; /* label version */ - u_int32_t ul_time; /* creation time */ - u_int16_t ul_namelen; /* filesystem name length */ - u_char ul_name[UFS_MAX_LABEL_NAME]; /* filesystem name */ - u_int64_t ul_uuid; /* filesystem uuid */ - u_char ul_reserved[24];/* reserved for future use */ - u_char ul_unused[460]; /* pad out to 1K */ -}; - -#endif /* __APPLE_API_UNSTABLE */ -#endif /* ! _FFS_FS_H_ */ diff --git a/bsd/ufs/ufs/Makefile b/bsd/ufs/ufs/Makefile deleted file mode 100644 index 08c53815e..000000000 --- a/bsd/ufs/ufs/Makefile +++ /dev/null @@ -1,37 +0,0 @@ -export MakeInc_cmd=${SRCROOT}/makedefs/MakeInc.cmd -export MakeInc_def=${SRCROOT}/makedefs/MakeInc.def -export MakeInc_rule=${SRCROOT}/makedefs/MakeInc.rule -export MakeInc_dir=${SRCROOT}/makedefs/MakeInc.dir - - -include $(MakeInc_cmd) -include $(MakeInc_def) - -INSTINC_SUBDIRS = \ - -INSTINC_SUBDIRS_PPC = \ - -INSTINC_SUBDIRS_I386 = \ - -EXPINC_SUBDIRS = \ - -EXPINC_SUBDIRS_PPC = \ - -EXPINC_SUBDIRS_I386 = \ - -DATAFILES = \ - dinode.h dir.h inode.h quota.h ufs_extern.h ufsmount.h - -INSTALL_MI_LIST = ${DATAFILES} - -INSTALL_MI_DIR = ufs/ufs - -EXPORT_MI_LIST = ${DATAFILES} - -EXPORT_MI_DIR = ufs/ufs - - -include $(MakeInc_rule) -include $(MakeInc_dir) - - diff --git a/bsd/ufs/ufs/dinode.h b/bsd/ufs/ufs/dinode.h deleted file mode 100644 index 1e8abccec..000000000 --- a/bsd/ufs/ufs/dinode.h +++ /dev/null @@ -1,161 +0,0 @@ -/* - * Copyright (c) 2000-2002 Apple Computer, Inc. All rights reserved. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ - * - * This file contains Original Code and/or Modifications of Original Code - * as defined in and that are subject to the Apple Public Source License - * Version 2.0 (the 'License'). You may not use this file except in - * compliance with the License. The rights granted to you under the License - * may not be used to create, or enable the creation or redistribution of, - * unlawful or unlicensed copies of an Apple operating system, or to - * circumvent, violate, or enable the circumvention or violation of, any - * terms of an Apple operating system software license agreement. - * - * Please obtain a copy of the License at - * http://www.opensource.apple.com/apsl/ and read it before using this file. - * - * The Original Code and all software distributed under the License are - * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER - * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, - * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. - * Please see the License for the specific language governing rights and - * limitations under the License. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ - */ -/* Copyright (c) 1995 NeXT Computer, Inc. All Rights Reserved */ -/* - * Copyright (c) 1982, 1989, 1993 - * The Regents of the University of California. All rights reserved. - * (c) UNIX System Laboratories, Inc. - * All or some portions of this file are derived from material licensed - * to the University of California by American Telephone and Telegraph - * Co. or Unix System Laboratories, Inc. and are reproduced herein with - * the permission of UNIX System Laboratories, Inc. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. All advertising materials mentioning features or use of this software - * must display the following acknowledgement: - * This product includes software developed by the University of - * California, Berkeley and its contributors. - * 4. Neither the name of the University nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - * @(#)dinode.h 8.9 (Berkeley) 3/29/95 - */ -#ifndef _UFS_DINODE_H_ -#define _UFS_DINODE_H_ - -#include - - -#ifdef __APPLE_API_UNSTABLE -/* - * The root inode is the root of the file system. Inode 0 can't be used for - * normal purposes and historically bad blocks were linked to inode 1, thus - * the root inode is 2. (Inode 1 is no longer used for this purpose, however - * numerous dump tapes make this assumption, so we are stuck with it). - */ -#define ROOTINO ((ino_t)2) - -/* - * The Whiteout inode# is a dummy non-zero inode number which will - * never be allocated to a real file. It is used as a place holder - * in the directory entry which has been tagged as a DT_W entry. - * See the comments about ROOTINO above. - */ -#define WINO ((ino_t)1) - -/* - * A dinode contains all the meta-data associated with a UFS file. - * This structure defines the on-disk format of a dinode. Since - * this structure describes an on-disk structure, all its fields - * are defined by types with precise widths. - */ - -#define NDADDR 12 /* Direct addresses in inode. */ -#define NIADDR 3 /* Indirect addresses in inode. */ - -typedef int32_t ufs_daddr_t; - -struct dinode { - u_int16_t di_mode; /* 0: IFMT, permissions; see below. */ - int16_t di_nlink; /* 2: File link count. */ - union { - u_int16_t oldids[2]; /* 4: Ffs: old user and group ids. */ - int32_t inumber; /* 4: Lfs: inode number. */ - } di_u; - u_int64_t di_size; /* 8: File byte count. */ - int32_t di_atime; /* 16: Last access time. */ - int32_t di_atimensec; /* 20: Last access time. */ - int32_t di_mtime; /* 24: Last modified time. */ - int32_t di_mtimensec; /* 28: Last modified time. */ - int32_t di_ctime; /* 32: Last inode change time. */ - int32_t di_ctimensec; /* 36: Last inode change time. */ - ufs_daddr_t di_db[NDADDR]; /* 40: Direct disk blocks. */ - ufs_daddr_t di_ib[NIADDR]; /* 88: Indirect disk blocks. */ - u_int32_t di_flags; /* 100: Status flags (chflags). */ - u_int32_t di_blocks; /* 104: Blocks actually held. */ - int32_t di_gen; /* 108: Generation number. */ - u_int32_t di_uid; /* 112: File owner. */ - u_int32_t di_gid; /* 116: File group. */ - int32_t di_spare[2]; /* 120: Reserved; currently unused */ -}; - -/* - * The di_db fields may be overlaid with other information for - * file types that do not have associated disk storage. Block - * and character devices overlay the first data block with their - * dev_t value. Short symbolic links place their path in the - * di_db area. - */ -#define di_inumber di_u.inumber -#define di_ogid di_u.oldids[1] -#define di_ouid di_u.oldids[0] -#define di_rdev di_db[0] -#define di_shortlink di_db -#define MAXSYMLINKLEN ((NDADDR + NIADDR) * sizeof(ufs_daddr_t)) - -/* File permissions. */ -#define IEXEC 0000100 /* Executable. */ -#define IWRITE 0000200 /* Writeable. */ -#define IREAD 0000400 /* Readable. */ -#define ISVTX 0001000 /* Sticky bit. */ -#define ISGID 0002000 /* Set-gid. */ -#define ISUID 0004000 /* Set-uid. */ - -/* File types. */ -#define IFMT 0170000 /* Mask of file type. */ -#define IFIFO 0010000 /* Named pipe (fifo). */ -#define IFCHR 0020000 /* Character device. */ -#define IFDIR 0040000 /* Directory file. */ -#define IFBLK 0060000 /* Block device. */ -#define IFREG 0100000 /* Regular file. */ -#define IFLNK 0120000 /* Symbolic link. */ -#define IFSOCK 0140000 /* UNIX domain socket. */ -#define IFWHT 0160000 /* Whiteout. */ - -#endif /* __APPLE_API_UNSTABLE */ -#endif /* ! _UFS_DINODE_H_ */ diff --git a/bsd/ufs/ufs/dir.h b/bsd/ufs/ufs/dir.h deleted file mode 100644 index fb78b431d..000000000 --- a/bsd/ufs/ufs/dir.h +++ /dev/null @@ -1,191 +0,0 @@ -/* - * Copyright (c) 2000-2002 Apple Computer, Inc. All rights reserved. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ - * - * This file contains Original Code and/or Modifications of Original Code - * as defined in and that are subject to the Apple Public Source License - * Version 2.0 (the 'License'). You may not use this file except in - * compliance with the License. The rights granted to you under the License - * may not be used to create, or enable the creation or redistribution of, - * unlawful or unlicensed copies of an Apple operating system, or to - * circumvent, violate, or enable the circumvention or violation of, any - * terms of an Apple operating system software license agreement. - * - * Please obtain a copy of the License at - * http://www.opensource.apple.com/apsl/ and read it before using this file. - * - * The Original Code and all software distributed under the License are - * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER - * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, - * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. - * Please see the License for the specific language governing rights and - * limitations under the License. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ - */ -/* Copyright (c) 1995 NeXT Computer, Inc. All Rights Reserved */ -/* - * Copyright (c) 1982, 1986, 1989, 1993 - * The Regents of the University of California. All rights reserved. - * (c) UNIX System Laboratories, Inc. - * All or some portions of this file are derived from material licensed - * to the University of California by American Telephone and Telegraph - * Co. or Unix System Laboratories, Inc. and are reproduced herein with - * the permission of UNIX System Laboratories, Inc. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. All advertising materials mentioning features or use of this software - * must display the following acknowledgement: - * This product includes software developed by the University of - * California, Berkeley and its contributors. - * 4. Neither the name of the University nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - * @(#)dir.h 8.5 (Berkeley) 4/27/95 - */ - -#ifndef _DIR_H_ -#define _DIR_H_ - -#include - -#ifdef __APPLE_API_UNSTABLE -/* - * Theoretically, directories can be more than 2Gb in length, however, in - * practice this seems unlikely. So, we define the type doff_t as a 32-bit - * quantity to keep down the cost of doing lookup on a 32-bit machine. - */ -#define doff_t int32_t -#define MAXDIRSIZE (0x7fffffff) - -/* - * A directory consists of some number of blocks of DIRBLKSIZ - * bytes, where DIRBLKSIZ is chosen such that it can be transferred - * to disk in a single atomic operation (e.g. 512 bytes on most machines). - * - * Each DIRBLKSIZ byte block contains some number of directory entry - * structures, which are of variable length. Each directory entry has - * a struct direct at the front of it, containing its inode number, - * the length of the entry, and the length of the name contained in - * the entry. These are followed by the name padded to a 4 byte boundary - * with null bytes. All names are guaranteed null terminated. - * The maximum length of a name in a directory is UFSMAXNAMLEN. - * - * The macro DIRSIZ(fmt, dp) gives the amount of space required to represent - * a directory entry. Free space in a directory is represented by - * entries which have dp->d_reclen > DIRSIZ(fmt, dp). All DIRBLKSIZ bytes - * in a directory block are claimed by the directory entries. This - * usually results in the last entry in a directory having a large - * dp->d_reclen. When entries are deleted from a directory, the - * space is returned to the previous entry in the same directory - * block by increasing its dp->d_reclen. If the first entry of - * a directory block is free, then its dp->d_ino is set to 0. - * Entries other than the first in a directory do not normally have - * dp->d_ino set to 0. - */ -#ifdef __APPLE__ -#define DIRBLKSIZ 1024 -#else -#define DIRBLKSIZ DEV_BSIZE -#endif -#define UFSMAXNAMLEN 255 - -struct direct { - u_int32_t d_ino; /* inode number of entry */ - u_int16_t d_reclen; /* length of this record */ - u_int8_t d_type; /* file type, see below */ - u_int8_t d_namlen; /* length of string in d_name */ - char d_name[UFSMAXNAMLEN + 1];/* name with length <= UFSMAXNAMLEN */ -}; - -/* - * File types - */ -#define DT_UNKNOWN 0 -#define DT_FIFO 1 -#define DT_CHR 2 -#define DT_DIR 4 -#define DT_BLK 6 -#define DT_REG 8 -#define DT_LNK 10 -#define DT_SOCK 12 -#define DT_WHT 14 - -/* - * Convert between stat structure types and directory types. - */ -#define IFTODT(mode) (((mode) & 0170000) >> 12) -#define DTTOIF(dirtype) ((dirtype) << 12) - -/* - * The DIRSIZ macro gives the minimum record length which will hold - * the directory entry. This requires the amount of space in struct direct - * without the d_name field, plus enough space for the name with a terminating - * null byte (dp->d_namlen+1), rounded up to a 4 byte boundary. - */ -#if (BYTE_ORDER == LITTLE_ENDIAN) -#define DIRSIZ(oldfmt, dp) \ - ((oldfmt) ? \ - ((sizeof(struct direct) - (UFSMAXNAMLEN+1)) + (((dp)->d_type+1 + 3) &~ 3)) : \ - ((sizeof(struct direct) - (UFSMAXNAMLEN+1)) + (((dp)->d_namlen+1 + 3) &~ 3))) -#else -#define DIRSIZ(oldfmt, dp) \ - ((sizeof(struct direct) - (UFSMAXNAMLEN+1)) + (((dp)->d_namlen+1 + 3) &~ 3)) -#endif -#define OLDDIRFMT 1 -#define NEWDIRFMT 0 - -/* - * Template for manipulating directories. Should use struct direct's, - * but the name field is UFSMAXNAMLEN - 1, and this just won't do. - */ -struct dirtemplate { - u_int32_t dot_ino; - int16_t dot_reclen; - u_int8_t dot_type; - u_int8_t dot_namlen; - char dot_name[4]; /* must be multiple of 4 */ - u_int32_t dotdot_ino; - int16_t dotdot_reclen; - u_int8_t dotdot_type; - u_int8_t dotdot_namlen; - char dotdot_name[4]; /* ditto */ -}; - -/* - * This is the old format of directories, sanz type element. - */ -struct odirtemplate { - u_int32_t dot_ino; - int16_t dot_reclen; - u_int16_t dot_namlen; - char dot_name[4]; /* must be multiple of 4 */ - u_int32_t dotdot_ino; - int16_t dotdot_reclen; - u_int16_t dotdot_namlen; - char dotdot_name[4]; /* ditto */ -}; -#endif /* __APPLE_API_UNSTABLE */ -#endif /* !_DIR_H_ */ diff --git a/bsd/ufs/ufs/inode.h b/bsd/ufs/ufs/inode.h deleted file mode 100644 index 3a6f212ed..000000000 --- a/bsd/ufs/ufs/inode.h +++ /dev/null @@ -1,199 +0,0 @@ -/* - * Copyright (c) 2000-2002 Apple Computer, Inc. All rights reserved. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ - * - * This file contains Original Code and/or Modifications of Original Code - * as defined in and that are subject to the Apple Public Source License - * Version 2.0 (the 'License'). You may not use this file except in - * compliance with the License. The rights granted to you under the License - * may not be used to create, or enable the creation or redistribution of, - * unlawful or unlicensed copies of an Apple operating system, or to - * circumvent, violate, or enable the circumvention or violation of, any - * terms of an Apple operating system software license agreement. - * - * Please obtain a copy of the License at - * http://www.opensource.apple.com/apsl/ and read it before using this file. - * - * The Original Code and all software distributed under the License are - * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER - * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, - * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. - * Please see the License for the specific language governing rights and - * limitations under the License. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ - */ -/* Copyright (c) 1995 NeXT Computer, Inc. All Rights Reserved */ -/* - * Copyright (c) 1982, 1989, 1993 - * The Regents of the University of California. All rights reserved. - * (c) UNIX System Laboratories, Inc. - * All or some portions of this file are derived from material licensed - * to the University of California by American Telephone and Telegraph - * Co. or Unix System Laboratories, Inc. and are reproduced herein with - * the permission of UNIX System Laboratories, Inc. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. All advertising materials mentioning features or use of this software - * must display the following acknowledgement: - * This product includes software developed by the University of - * California, Berkeley and its contributors. - * 4. Neither the name of the University nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - * @(#)inode.h 8.9 (Berkeley) 5/14/95 - */ -#ifndef _UFS_INDOE_H_ -#define _UFS_INDOE_H_ - -#include - -#ifdef __APPLE_API_PRIVATE -#include -#include -#include -#include -#include -#include - -/* - * The inode is used to describe each active (or recently active) file in the - * UFS filesystem. It is composed of two types of information. The first part - * is the information that is needed only while the file is active (such as - * the identity of the file and linkage to speed its lookup). The second part - * is * the permanent meta-data associated with the file which is read in - * from the permanent dinode from long term storage when the file becomes - * active, and is put back when the file is no longer being used. - */ -struct inode { - LIST_ENTRY(inode) i_hash;/* Hash chain. */ - struct vnode *i_vnode;/* Vnode associated with this inode. */ - struct vnode *i_devvp;/* Vnode for block I/O. */ - u_int32_t i_flag; /* flags, see below */ - dev_t i_dev; /* Device associated with the inode. */ - ino_t i_number; /* The identity of the inode. */ - - union { /* Associated filesystem. */ - struct fs *fs; /* FFS */ - } inode_u; -#define i_fs inode_u.fs - - struct klist i_knotes; /* knotes attached to this vnode */ - struct dquot *i_dquot[MAXQUOTAS]; /* Dquot structures. */ - u_quad_t i_modrev; /* Revision level for NFS lease. */ - void *i_lockf; /* DEPRECATED */ - - /* - * Side effects; used during directory lookup. - */ - int32_t i_count; /* Size of free slot in directory. */ - doff_t i_endoff; /* End of useful stuff in directory. */ - doff_t i_diroff; /* Offset in dir, where we found last entry. */ - doff_t i_offset; /* Offset of free space in directory. */ - ino_t i_ino; /* Inode number of found directory. */ - u_int32_t i_reclen; /* Size of found directory entry. */ - daddr_t i_lastr; /* last read... read-ahead */ - /* - * The on-disk dinode itself. - */ - struct dinode i_din; /* 128 bytes of the on-disk dinode. */ -}; - -#define i_atime i_din.di_atime -#define i_atimensec i_din.di_atimensec -#define i_blocks i_din.di_blocks -#define i_ctime i_din.di_ctime -#define i_ctimensec i_din.di_ctimensec -#define i_db i_din.di_db -#define i_flags i_din.di_flags -#define i_gen i_din.di_gen -#define i_gid i_din.di_gid -#define i_ib i_din.di_ib -#define i_mode i_din.di_mode -#define i_mtime i_din.di_mtime -#define i_mtimensec i_din.di_mtimensec -#define i_nlink i_din.di_nlink -#define i_rdev i_din.di_rdev -#define i_shortlink i_din.di_shortlink -#define i_size i_din.di_size -#define i_uid i_din.di_uid -#define i_spare i_din.di_spare -#define i_oldids i_din.di_u.oldids -#define i_inumber i_din.di_u.inumber - -/* These flags are kept in i_flag. */ -#define IN_ACCESS 0x0001 /* Access time update request. */ -#define IN_CHANGE 0x0002 /* Inode change time update request. */ -#define IN_UPDATE 0x0004 /* Modification time update request. */ -#define IN_MODIFIED 0x0008 /* Inode has been modified. */ -#define IN_RENAME 0x0010 /* Inode is being renamed. */ -#define IN_SHLOCK 0x0020 /* File has shared lock. */ -#define IN_EXLOCK 0x0040 /* File has exclusive lock. */ -#define IN_TRANSIT 0x0080 /* inode is getting recycled */ -#define IN_WTRANSIT 0x0100 /* waiting for inode getting recycled */ -#define IN_ALLOC 0x0200 /* being allocated */ -#define IN_WALLOC 0x0400 /* waiting for allocation to be done */ - -#ifdef KERNEL -/* - * Structure used to pass around logical block paths generated by - * ufs_getlbns and used by truncate and bmap code. - */ -struct indir { - ufs_daddr_t in_lbn; /* Logical block number. */ - int in_off; /* Offset in buffer. */ - int in_exists; /* Flag if the block exists. */ -}; - -/* Convert between inode pointers and vnode pointers. */ -#define VTOI(vp) ((struct inode *)(vp)->v_data) -#define ITOV(ip) ((ip)->i_vnode) - -#define ITIMES(ip, t1, t2) { \ - if ((ip)->i_flag & (IN_ACCESS | IN_CHANGE | IN_UPDATE)) { \ - (ip)->i_flag |= IN_MODIFIED; \ - if ((ip)->i_flag & IN_ACCESS) \ - (ip)->i_atime = (t1)->tv_sec; \ - if ((ip)->i_flag & IN_UPDATE) { \ - (ip)->i_mtime = (t2)->tv_sec; \ - (ip)->i_modrev++; \ - } \ - if ((ip)->i_flag & IN_CHANGE) \ - (ip)->i_ctime = (t2)->tv_sec; \ - (ip)->i_flag &= ~(IN_ACCESS | IN_CHANGE | IN_UPDATE); \ - } \ -} - -#define VN_KNOTE(vp, hint) KNOTE(&VTOI(vp)->i_knotes, (hint)) - -/* This overlays the FileID portion of NFS file handles. */ -struct ufid { - ino_t ufid_ino; /* File number (ino). */ - int32_t ufid_gen; /* Generation number. */ -}; -#endif /* KERNEL */ - -#endif /* __APPLE_API_PRIVATE */ -#endif /* ! _UFS_INDOE_H_ */ diff --git a/bsd/ufs/ufs/quota.h b/bsd/ufs/ufs/quota.h deleted file mode 100644 index e8d532b76..000000000 --- a/bsd/ufs/ufs/quota.h +++ /dev/null @@ -1,113 +0,0 @@ -/* - * Copyright (c) 2000-2002 Apple Computer, Inc. All rights reserved. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ - * - * This file contains Original Code and/or Modifications of Original Code - * as defined in and that are subject to the Apple Public Source License - * Version 2.0 (the 'License'). You may not use this file except in - * compliance with the License. The rights granted to you under the License - * may not be used to create, or enable the creation or redistribution of, - * unlawful or unlicensed copies of an Apple operating system, or to - * circumvent, violate, or enable the circumvention or violation of, any - * terms of an Apple operating system software license agreement. - * - * Please obtain a copy of the License at - * http://www.opensource.apple.com/apsl/ and read it before using this file. - * - * The Original Code and all software distributed under the License are - * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER - * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, - * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. - * Please see the License for the specific language governing rights and - * limitations under the License. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ - */ -/* - * Copyright (c) 1982, 1986, 1993 - * The Regents of the University of California. All rights reserved. - * - * This code is derived from software contributed to Berkeley by - * Robert Elz at The University of Melbourne. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. All advertising materials mentioning features or use of this software - * must display the following acknowledgement: - * This product includes software developed by the University of - * California, Berkeley and its contributors. - * 4. Neither the name of the University nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - * @(#)quota.h 8.3 (Berkeley) 8/19/94 - */ - -#ifndef _UFS_QUOTA_H -#define _UFS_QUOTA_H - -#include - -#ifdef __APPLE_API_PRIVATE -#include - -#ifdef KERNEL -#include - - -#include - -struct inode; -struct mount; -struct proc; - -#ifndef _KAUTH_CRED_T -#define _KAUTH_CRED_T -struct ucred; -typedef struct ucred *kauth_cred_t; -#endif /* !_KAUTH_CRED_T */ - -__BEGIN_DECLS -int chkdq(struct inode *, int64_t, kauth_cred_t, int); -int chkdqchg(struct inode *, int64_t, kauth_cred_t, int); -int chkiq(struct inode *, long, kauth_cred_t, int); -int chkiqchg(struct inode *, long, kauth_cred_t, int); -int getinoquota(struct inode *); -int getquota(struct mount *, u_long, int, caddr_t); -int qsync(struct mount *mp); -int quotaoff(struct mount *, int); -int quotaon(vfs_context_t, struct mount *, int, caddr_t); -int setquota(struct mount *, u_long, int, caddr_t); -int setuse(struct mount *, u_long, int, caddr_t); -int ufs_quotactl(struct mount *, int, uid_t, caddr_t, vfs_context_t); -__END_DECLS - -#if DIAGNOSTIC -__BEGIN_DECLS -void chkdquot(struct inode *); -__END_DECLS -#endif -#endif /* KERNEL */ - -#endif /* __APPLE_API_PRIVATE */ -#endif /* ! _UFS_QUOTA_H */ diff --git a/bsd/ufs/ufs/ufs_attrlist.c b/bsd/ufs/ufs/ufs_attrlist.c deleted file mode 100644 index 99a0940ee..000000000 --- a/bsd/ufs/ufs/ufs_attrlist.c +++ /dev/null @@ -1,175 +0,0 @@ -/* - * Copyright (c) 2002-2004 Apple Computer, Inc. All rights reserved. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ - * - * This file contains Original Code and/or Modifications of Original Code - * as defined in and that are subject to the Apple Public Source License - * Version 2.0 (the 'License'). You may not use this file except in - * compliance with the License. The rights granted to you under the License - * may not be used to create, or enable the creation or redistribution of, - * unlawful or unlicensed copies of an Apple operating system, or to - * circumvent, violate, or enable the circumvention or violation of, any - * terms of an Apple operating system software license agreement. - * - * Please obtain a copy of the License at - * http://www.opensource.apple.com/apsl/ and read it before using this file. - * - * The Original Code and all software distributed under the License are - * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER - * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, - * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. - * Please see the License for the specific language governing rights and - * limitations under the License. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ - */ - -/* - * ufs_attrlist.c - UFS attribute list processing - * - * Copyright (c) 2002, Apple Computer, Inc. All Rights Reserved. - */ - -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include "ufsmount.h" - -static char ufs_label_magic[4] = UFS_LABEL_MAGIC; - -/* Copied from diskdev_cmds/disklib/ufslabel.c */ -typedef union { - char c[2]; - u_short s; -} short_union_t; - -/* Copied from diskdev_cmds/disklib/ufslabel.c */ -typedef union { - u_short s[2]; - long l; -} long_union_t; - -/* Copied from diskdev_cmds/disklib/ufslabel.c */ -static __inline__ void -reduce(int *sum) -{ - long_union_t l_util; - - l_util.l = *sum; - *sum = l_util.s[0] + l_util.s[1]; - if (*sum > 65535) - *sum -= 65535; - return; -} - -/* Copied from diskdev_cmds/disklib/ufslabel.c */ -__private_extern__ unsigned short -ul_cksum(void *data, int len) -{ - u_short *w; - int sum; - - sum = 0; - w = (u_short *)data; - while ((len -= 32) >= 0) { - sum += w[0]; sum += w[1]; - sum += w[2]; sum += w[3]; - sum += w[4]; sum += w[5]; - sum += w[6]; sum += w[7]; - sum += w[8]; sum += w[9]; - sum += w[10]; sum += w[11]; - sum += w[12]; sum += w[13]; - sum += w[14]; sum += w[15]; - w += 16; - } - len += 32; - while ((len -= 8) >= 0) { - sum += w[0]; sum += w[1]; - sum += w[2]; sum += w[3]; - w += 4; - } - len += 8; - if (len) { - reduce(&sum); - while ((len -= 2) >= 0) { - sum += *w++; - } - } - if (len == -1) { /* odd-length data */ - short_union_t s_util; - - s_util.s = 0; - s_util.c[0] = *((char *)w); - s_util.c[1] = 0; - sum += s_util.s; - } - reduce(&sum); - return (~sum & 0xffff); -} - -/* Adapted from diskdev_cmds/disklib/ufslabel.c */ -__private_extern__ boolean_t -ufs_label_check(struct ufslabel *ul_p) -{ - u_int16_t calc; - u_int16_t checksum; - - if (bcmp(&ul_p->ul_magic, ufs_label_magic, - sizeof(ul_p->ul_magic))) { -#ifdef DEBUG - printf("ufslabel_check: label has bad magic number\n"); -#endif - return (FALSE); - } - if (ntohl(ul_p->ul_version) != UFS_LABEL_VERSION) { -#ifdef DEBUG - printf("ufslabel_check: label has incorect version %d " - "(should be %d)\n", ntohl(ul_p->ul_version), - UFS_LABEL_VERSION); -#endif - return (FALSE); - } - if (ntohs(ul_p->ul_namelen) > UFS_MAX_LABEL_NAME) { -#ifdef DEBUG - printf("ufslabel_check: name length %d is too big (> %d)\n", - ntohs(ul_p->ul_namelen), UFS_MAX_LABEL_NAME); -#endif - return (FALSE); - } - - checksum = ul_p->ul_checksum; /* Remember previous checksum. */ - ul_p->ul_checksum = 0; - calc = ul_cksum(ul_p, sizeof(*ul_p)); - if (calc != checksum) { -#ifdef DEBUG - printf("ufslabel_check: label checksum %x (should be %x)\n", - checksum, calc); -#endif - return (FALSE); - } - return (TRUE); -} - -__private_extern__ void -ufs_label_init(struct ufslabel *ul_p) -{ - struct timeval tv; - - microtime(&tv); - - bzero(ul_p, sizeof(*ul_p)); - ul_p->ul_version = htonl(UFS_LABEL_VERSION); - bcopy(ufs_label_magic, &ul_p->ul_magic, sizeof(ul_p->ul_magic)); - ul_p->ul_time = htonl(tv.tv_sec); -} - diff --git a/bsd/ufs/ufs/ufs_bmap.c b/bsd/ufs/ufs/ufs_bmap.c deleted file mode 100644 index ea9811544..000000000 --- a/bsd/ufs/ufs/ufs_bmap.c +++ /dev/null @@ -1,450 +0,0 @@ -/* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ - * - * This file contains Original Code and/or Modifications of Original Code - * as defined in and that are subject to the Apple Public Source License - * Version 2.0 (the 'License'). You may not use this file except in - * compliance with the License. The rights granted to you under the License - * may not be used to create, or enable the creation or redistribution of, - * unlawful or unlicensed copies of an Apple operating system, or to - * circumvent, violate, or enable the circumvention or violation of, any - * terms of an Apple operating system software license agreement. - * - * Please obtain a copy of the License at - * http://www.opensource.apple.com/apsl/ and read it before using this file. - * - * The Original Code and all software distributed under the License are - * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER - * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, - * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. - * Please see the License for the specific language governing rights and - * limitations under the License. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ - */ -/* Copyright (c) 1995 NeXT Computer, Inc. All Rights Reserved */ -/* - * Copyright (c) 1989, 1991, 1993 - * The Regents of the University of California. All rights reserved. - * (c) UNIX System Laboratories, Inc. - * All or some portions of this file are derived from material licensed - * to the University of California by American Telephone and Telegraph - * Co. or Unix System Laboratories, Inc. and are reproduced herein with - * the permission of UNIX System Laboratories, Inc. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. All advertising materials mentioning features or use of this software - * must display the following acknowledgement: - * This product includes software developed by the University of - * California, Berkeley and its contributors. - * 4. Neither the name of the University nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - * @(#)ufs_bmap.c 8.7 (Berkeley) 3/21/95 - */ -/* - * HISTORY - * 11-July-97 Umesh Vaishampayan (umeshv@apple.com) - * Cleanup. Fixed compilation error when tracing is turned on. - */ -#include -#include -#include -#include /* for p_stats */ -#include -#include -#include -#include -#include - -#include - -#include -#include -#include -#include -#if REV_ENDIAN_FS -#include -#include -#endif /* REV_ENDIAN_FS */ -#include - - -/* - * Indirect blocks are now on the vnode for the file. They are given negative - * logical block numbers. Indirect blocks are addressed by the negative - * address of the first data block to which they point. Double indirect blocks - * are addressed by one less than the address of the first indirect block to - * which they point. Triple indirect blocks are addressed by one less than - * the address of the first double indirect block to which they point. - * - * ufs_bmaparray does the bmap conversion, and if requested returns the - * array of logical blocks which must be traversed to get to a block. - * Each entry contains the offset into that block that gets you to the - * next block and the disk address of the block (if it is assigned). - */ - -int -ufs_bmaparray(vp, bn, bnp, ap, nump, runp) - vnode_t vp; - ufs_daddr_t bn; - ufs_daddr_t *bnp; - struct indir *ap; - int *nump; - int *runp; -{ - register struct inode *ip; - struct buf *bp; - struct ufsmount *ump; - struct mount *mp; - struct vnode *devvp; - struct indir a[NIADDR], *xap; - ufs_daddr_t daddr; - long metalbn; - int error, maxrun, num; -#if REV_ENDIAN_FS - int rev_endian=0; -#endif /* REV_ENDIAN_FS */ - - ip = VTOI(vp); - mp = vp->v_mount; - ump = VFSTOUFS(mp); - -#if REV_ENDIAN_FS - rev_endian=(mp->mnt_flag & MNT_REVEND); -#endif /* REV_ENDIAN_FS */ - -#if DIAGNOSTIC - if (ap != NULL && nump == NULL || ap == NULL && nump != NULL) - panic("ufs_bmaparray: invalid arguments"); -#endif - - if (runp) { - /* - * XXX - * If MAXPHYSIO is the largest transfer the disks can handle, - * we probably want maxrun to be 1 block less so that we - * don't create a block larger than the device can handle. - */ - *runp = 0; - maxrun = MAXPHYSIO / mp->mnt_vfsstat.f_iosize - 1; - } - - xap = ap == NULL ? a : ap; - if (!nump) - nump = # - if (error = ufs_getlbns(vp, bn, xap, nump)) - return (error); - - num = *nump; - if (num == 0) { - *bnp = blkptrtodb(ump, ip->i_db[bn]); - if (*bnp == 0) - *bnp = -1; - else if (runp) - for (++bn; bn < NDADDR && *runp < maxrun && - is_sequential(ump, ip->i_db[bn - 1], ip->i_db[bn]); - ++bn, ++*runp); - return (0); - } - - - /* Get disk address out of indirect block array */ - daddr = ip->i_ib[xap->in_off]; - - devvp = VFSTOUFS(vp->v_mount)->um_devvp; - for (bp = NULL, ++xap; --num; ++xap) { - ufs_daddr_t *dataptr; - int bop; - - if ((metalbn = xap->in_lbn) == bn) - /* - * found the indirect block we were - * looking for... exit the loop - */ - break; - - if (daddr == 0) - bop = BLK_ONLYVALID | BLK_META; - else - bop = BLK_META; - - if (bp) - buf_brelse(bp); - bp = buf_getblk(vp, (daddr64_t)((unsigned)metalbn), mp->mnt_vfsstat.f_iosize, 0, 0, bop); - - if (bp == 0) { - /* - * Exit the loop if there is no disk address assigned yet and - * the indirect block isn't in the cache - */ - break; - } - /* - * If we get here, we've either got the block in the cache - * or we have a disk address for it, go fetch it. - */ - xap->in_exists = 1; - - if (buf_valid(bp)) { - trace(TR_BREADHIT, pack(vp, mp->mnt_vfsstat.f_iosize), metalbn); - } - else { - trace(TR_BREADMISS, pack(vp, mp->mnt_vfsstat.f_iosize), metalbn); - buf_setblkno(bp, blkptrtodb(ump, (daddr64_t)((unsigned)daddr))); - buf_setflags(bp, B_READ); - VNOP_STRATEGY(bp); - OSIncrementAtomic(¤t_proc()->p_stats->p_ru.ru_inblock); - if (error = (int)buf_biowait(bp)) { - buf_brelse(bp); - return (error); - } - } - dataptr = (ufs_daddr_t *)buf_dataptr(bp); - daddr = dataptr[xap->in_off]; -#if REV_ENDIAN_FS - if (rev_endian) - daddr = OSSwapInt32(daddr); -#endif /* REV_ENDIAN_FS */ - if (num == 1 && daddr && runp) { -#if REV_ENDIAN_FS - if (rev_endian) { - for (bn = xap->in_off + 1; - bn < MNINDIR(ump) && *runp < maxrun && - is_sequential(ump, - OSSwapInt32(dataptr[bn - 1]), - OSSwapInt32(dataptr[bn])); - ++bn, ++*runp); - } else { -#endif /* REV_ENDIAN_FS */ - for (bn = xap->in_off + 1; - bn < MNINDIR(ump) && *runp < maxrun && - is_sequential(ump, - dataptr[bn - 1], - dataptr[bn]); - ++bn, ++*runp); -#if REV_ENDIAN_FS - } -#endif /* REV_ENDIAN_FS */ - } - } - if (bp) - buf_brelse(bp); - - daddr = blkptrtodb(ump, daddr); - *bnp = daddr == 0 ? -1 : daddr; - return (0); -} - -/* - * Create an array of logical block number/offset pairs which represent the - * path of indirect blocks required to access a data block. The first "pair" - * contains the logical block number of the appropriate single, double or - * triple indirect block and the offset into the inode indirect block array. - * Note, the logical block number of the inode single/double/triple indirect - * block appears twice in the array, once with the offset into the i_ib and - * once with the offset into the page itself. - */ -int -ufs_getlbns(vp, bn, ap, nump) - struct vnode *vp; - ufs_daddr_t bn; - struct indir *ap; - int *nump; -{ - long metalbn, realbn; - struct ufsmount *ump; - int blockcnt, i, numlevels, off; - - ump = VFSTOUFS(vp->v_mount); - if (nump) - *nump = 0; - numlevels = 0; - realbn = bn; - if ((long)bn < 0) - bn = -(long)bn; - - /* The first NDADDR blocks are direct blocks. */ - if (bn < NDADDR) - return (0); - - /* - * Determine the number of levels of indirection. After this loop - * is done, blockcnt indicates the number of data blocks possible - * at the given level of indirection, and NIADDR - i is the number - * of levels of indirection needed to locate the requested block. - */ - for (blockcnt = 1, i = NIADDR, bn -= NDADDR;; i--, bn -= blockcnt) { - if (i == 0) - return (EFBIG); - blockcnt *= MNINDIR(ump); - if (bn < blockcnt) - break; - } - - /* Calculate the address of the first meta-block. */ - if (realbn >= 0) - metalbn = -(realbn - bn + NIADDR - i); - else - metalbn = -(-realbn - bn + NIADDR - i); - - /* - * At each iteration, off is the offset into the bap array which is - * an array of disk addresses at the current level of indirection. - * The logical block number and the offset in that block are stored - * into the argument array. - */ - ap->in_lbn = metalbn; - ap->in_off = off = NIADDR - i; - ap->in_exists = 0; - ap++; - for (++numlevels; i <= NIADDR; i++) { - /* If searching for a meta-data block, quit when found. */ - if (metalbn == realbn) - break; - - blockcnt /= MNINDIR(ump); - off = (bn / blockcnt) % MNINDIR(ump); - - ++numlevels; - ap->in_lbn = metalbn; - ap->in_off = off; - ap->in_exists = 0; - ++ap; - - metalbn -= -1 + off * blockcnt; - } - if (nump) - *nump = numlevels; - return (0); -} -/* - * blockmap converts a file offsetto its physical block - * number on the disk... it optionally returns the physically - * contiguous size. - */ -int -ufs_blockmap(ap) - struct vnop_blockmap_args /* { - struct vnode *a_vp; - off_t a_foffset; - size_t a_size; - daddr64_t *a_bpn; - size_t *a_run; - void *a_poff; - int a_flags; - } */ *ap; -{ - vnode_t vp = ap->a_vp; - daddr64_t * bnp = ap->a_bpn; - size_t * runp = ap->a_run; - int size = ap->a_size; - struct fs * fs; - struct inode *ip; - ufs_daddr_t lbn; - ufs_daddr_t daddr = 0; - int devBlockSize = 0; - int retsize = 0; - int error = 0; - int nblks; - int lblk_offset; - - ip = VTOI(vp); - fs = ip->i_fs; - - devBlockSize = vfs_devblocksize(vnode_mount(vp)); - - if (ap->a_foffset % devBlockSize) - panic("ufs_blockmap; allocation requested inside a device block"); - - if (size % devBlockSize) - panic("ufs_blockmap: size is not multiple of device block size\n"); - - /* - * round down to the beginning of a filesystem block - */ - lbn = (ufs_daddr_t)lblkno(fs, ap->a_foffset); - - lblk_offset = (int)(ap->a_foffset - lblktosize(fs, lbn)); - - if ((error = ufs_bmaparray(vp, lbn, &daddr, NULL, NULL, &nblks))) - return (error); - - if (ap->a_poff) - *(int *)ap->a_poff = 0; - - if (lbn < 0) { - /* - * we're dealing with the indirect blocks - * which are always fs_bsize in size - */ - retsize = (nblks + 1) * fs->fs_bsize; - } else if (daddr == -1 || nblks == 0) { - /* - * we're dealing with a 'hole'... UFS doesn't - * have a clean way to determine it's size - * or - * there's are no physically contiguous blocks - * so - * just return the size of the lbn we started with - */ - retsize = blksize(fs, ip, lbn); - } else { - /* - * we have 1 or more blocks that are physically contiguous - * to our starting block number... the orignal block + (nblks - 1) - * blocks must be full sized since only the last block can be - * composed of fragments... - */ - retsize = nblks * fs->fs_bsize; - - /* - * now compute the size of the last block and add it in - */ - retsize += blksize(fs, ip, (lbn + nblks)); - } - if (lblk_offset) { - if (daddr != -1) - daddr += (lblk_offset / devBlockSize); - - if (retsize > lblk_offset) - retsize -= lblk_offset; - else { - retsize = 0; - daddr = -1; - } - } - if (runp) { - if (retsize < size) - *runp = retsize; - else - *runp = size; - } - if (bnp) - *bnp = (daddr64_t)daddr; - - return (0); -} diff --git a/bsd/ufs/ufs/ufs_byte_order.c b/bsd/ufs/ufs/ufs_byte_order.c deleted file mode 100644 index 6562d1376..000000000 --- a/bsd/ufs/ufs/ufs_byte_order.c +++ /dev/null @@ -1,413 +0,0 @@ -/* - * Copyright (c) 2000-2002 Apple Computer, Inc. All rights reserved. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ - * - * This file contains Original Code and/or Modifications of Original Code - * as defined in and that are subject to the Apple Public Source License - * Version 2.0 (the 'License'). You may not use this file except in - * compliance with the License. The rights granted to you under the License - * may not be used to create, or enable the creation or redistribution of, - * unlawful or unlicensed copies of an Apple operating system, or to - * circumvent, violate, or enable the circumvention or violation of, any - * terms of an Apple operating system software license agreement. - * - * Please obtain a copy of the License at - * http://www.opensource.apple.com/apsl/ and read it before using this file. - * - * The Original Code and all software distributed under the License are - * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER - * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, - * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. - * Please see the License for the specific language governing rights and - * limitations under the License. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ - */ -/* Copyright 1998 Apple Computer, Inc. - * - * UFS byte swapping routines to make a big endian file system useful on a - * little endian machine. - * - * HISTORY - * - * 16 Feb 1998 A. Ramesh at Apple - * MacOSX version created. - */ - -#include -#include -#include -#include -#include - -#define byte_swap_longlong(thing) ((thing) = OSSwapInt64(thing)) -#define byte_swap_int(thing) ((thing) = OSSwapInt32(thing)) -#define byte_swap_short(thing) ((thing) = OSSwapInt16(thing)) - -void -byte_swap_longlongs(unsigned long long *array, int count) -{ - register unsigned long long i; - - for (i = 0; i < count; i++) - byte_swap_longlong(array[i]); -} - -void -byte_swap_ints(int *array, int count) -{ - register int i; - - for (i = 0; i < count; i++) - byte_swap_int(array[i]); -} - -void -byte_swap_shorts(short *array, int count) -{ - register int i; - - for (i = 0; i < count; i++) - byte_swap_short(array[i]); -} - -int -byte_swap_sbin(struct fs *sb) -{ - u_int16_t *usptr; - unsigned long size; - - byte_swap_ints(((int32_t *)&sb->fs_firstfield), 52); - byte_swap_int(sb->fs_cgrotor); - byte_swap_int(sb->fs_cpc); - byte_swap_shorts((int16_t *)sb->fs_opostbl, - sizeof(sb->fs_opostbl) / sizeof(int16_t)); - byte_swap_int(sb->fs_avgfilesize); - byte_swap_int(sb->fs_avgfpdir); - byte_swap_ints((int32_t *)sb->fs_sparecon, - sizeof(sb->fs_sparecon) / sizeof(int32_t)); - byte_swap_ints((int32_t *)&sb->fs_contigsumsize, 3); - byte_swap_longlongs((u_int64_t *)&sb->fs_maxfilesize,3); - byte_swap_ints((int32_t *)&sb->fs_state, 6); - - /* Got these magic numbers from mkfs.c in newfs */ - if (sb->fs_cpc < 0 || sb->fs_nrpos < 0) - return EINVAL; /* Those are not legal values */ - if (sb->fs_nrpos != 8 || sb->fs_cpc > 16) { - usptr = (u_int16_t *)((u_int8_t *)(sb) + (sb)->fs_postbloff); - size = sb->fs_cpc * sb->fs_nrpos; - - if (sb->fs_nrpos > (INT_MAX / sb->fs_cpc)) - return EINVAL; /* overflow again */ - if (size > INT_MAX / sizeof(short)) - return EINVAL; /* size overflows */ - if (sb->fs_postbloff < 0) - return EINVAL; /* Invalid block list */ - if ((sb->fs_postbloff + size * sizeof(short)) >= SBSIZE) - return EINVAL; /* Otherwise a buffer overflow */ - if ((sb->fs_postbloff + size * sizeof(short)) < sb->fs_postbloff) - return EINVAL; /* Otherwise, a buffer underflow */ - - byte_swap_shorts(usptr,size); /* fs_postbloff */ - } - - return 0; -} - -void -byte_swap_sbout(struct fs *sb) -{ - u_int16_t *usptr; - unsigned long size; - /* Got these magic numbers from mkfs.c in newfs */ - if (sb->fs_nrpos != 8 || sb->fs_cpc > 16) { - usptr = (u_int16_t *)((u_int8_t *)(sb) + (sb)->fs_postbloff); - size = sb->fs_cpc * sb->fs_nrpos; - byte_swap_shorts(usptr,size); /* fs_postbloff */ - } - - byte_swap_ints(((int32_t *)&sb->fs_firstfield), 52); - byte_swap_int(sb->fs_cgrotor); - byte_swap_int(sb->fs_cpc); - byte_swap_shorts((int16_t *)sb->fs_opostbl, - sizeof(sb->fs_opostbl) / sizeof(int16_t)); - byte_swap_int(sb->fs_avgfilesize); - byte_swap_int(sb->fs_avgfpdir); - byte_swap_ints((int32_t *)sb->fs_sparecon, - sizeof(sb->fs_sparecon) / sizeof(int32_t)); - byte_swap_ints((int32_t *)&sb->fs_contigsumsize, 3); - byte_swap_longlongs((u_int64_t *)&sb->fs_maxfilesize,3); - byte_swap_ints((int32_t *)&sb->fs_state, 6); -} - -void -byte_swap_csum(struct csum *cs) -{ - byte_swap_ints((int *) cs, sizeof(struct csum) / sizeof(int32_t)); -} - -/* This is for the new 4.4 cylinder group block */ -void -byte_swap_cgin(struct cg *cg, struct fs * fs) -{ - int32_t * ulptr; - int16_t * usptr; - int size; - - byte_swap_int(cg->cg_firstfield); - byte_swap_int(cg->cg_magic); - byte_swap_int(cg->cg_time); - byte_swap_int(cg->cg_cgx); - byte_swap_short(cg->cg_ncyl); - byte_swap_short(cg->cg_niblk); - byte_swap_int(cg->cg_ndblk); - byte_swap_csum(&cg->cg_cs); - byte_swap_int(cg->cg_rotor); - byte_swap_int(cg->cg_frotor); - byte_swap_int(cg->cg_irotor); - byte_swap_ints(cg->cg_frsum, MAXFRAG); - byte_swap_int(cg->cg_iusedoff); - byte_swap_int(cg->cg_freeoff); - byte_swap_int(cg->cg_nextfreeoff); - byte_swap_int(cg->cg_clusteroff); - byte_swap_int(cg->cg_nclusterblks); - byte_swap_ints((int *)&cg->cg_sparecon, 13); - - byte_swap_int(cg->cg_btotoff); - ulptr = ((int32_t *)((u_int8_t *)(cg) + (cg)->cg_btotoff)); - size = fs->fs_cpg; - byte_swap_ints(ulptr, size); /*cg_btotoff*/ - - byte_swap_int(cg->cg_boff); - usptr = ((int16_t *)((u_int8_t *)(cg) + (cg)->cg_boff)); - size = fs->fs_cpg * fs->fs_nrpos; - byte_swap_shorts(usptr,size); /*cg_boff*/ - - byte_swap_int(cg->cg_clustersumoff); - - if ((unsigned int)fs->fs_contigsumsize > 0) { - - ulptr = ((int32_t *)((u_int8_t *)(cg) + (cg)->cg_clustersumoff)); - size = (fs->fs_contigsumsize + 1); - byte_swap_ints(ulptr, size); /*cg_clustersumoff*/ - } - -} - -/* This is for the new 4.4 cylinder group block */ -void -byte_swap_cgout(struct cg *cg, struct fs * fs) -{ - int32_t * ulptr; - int16_t * usptr; - int size; - - byte_swap_int(cg->cg_firstfield); - byte_swap_int(cg->cg_magic); - byte_swap_int(cg->cg_time); - byte_swap_int(cg->cg_cgx); - byte_swap_short(cg->cg_ncyl); - byte_swap_short(cg->cg_niblk); - byte_swap_int(cg->cg_ndblk); - byte_swap_csum(&cg->cg_cs); - byte_swap_int(cg->cg_rotor); - byte_swap_int(cg->cg_frotor); - byte_swap_int(cg->cg_irotor); - byte_swap_ints(cg->cg_frsum, MAXFRAG); - byte_swap_int(cg->cg_freeoff); - byte_swap_int(cg->cg_nextfreeoff); - byte_swap_int(cg->cg_nclusterblks); - byte_swap_ints((int *)&cg->cg_sparecon, 13); - - byte_swap_int(cg->cg_iusedoff); - byte_swap_int(cg->cg_clusteroff); - ulptr = ((int32_t *)((u_int8_t *)(cg) + (cg)->cg_btotoff)); - size = fs->fs_cpg; - byte_swap_ints(ulptr, size); /*cg_btotoff*/ - byte_swap_int(cg->cg_btotoff); - - usptr = ((int16_t *)((u_int8_t *)(cg) + (cg)->cg_boff)); - size = fs->fs_cpg * fs->fs_nrpos; - byte_swap_shorts(usptr,size); /*cg_boff*/ - byte_swap_int(cg->cg_boff); - - if ((unsigned int)fs->fs_contigsumsize > 0) { - ulptr = ((int32_t *)((u_int8_t *)(cg) + (cg)->cg_clustersumoff)); - size = (fs->fs_contigsumsize + 1); - byte_swap_ints(ulptr, size); /*cg_clustersumoff*/ - - } - byte_swap_int(cg->cg_clustersumoff); - -} - -/* This value MUST correspond to the value set in the ffs_mounts */ - -#define RESYMLNKLEN 60 - -void -byte_swap_inode_in(struct dinode *di, struct inode *ip) -{ - int i; - - ip->i_mode = OSSwapInt16(di->di_mode); - ip->i_nlink = OSSwapInt16(di->di_nlink); - ip->i_oldids[0] = OSSwapInt16(di->di_u.oldids[0]); - ip->i_oldids[1] = OSSwapInt16(di->di_u.oldids[1]); - ip->i_size = OSSwapInt64(di->di_size); - ip->i_atime = OSSwapInt32(di->di_atime); - ip->i_atimensec = OSSwapInt32(di->di_atimensec); - ip->i_mtime = OSSwapInt32(di->di_mtime); - ip->i_mtimensec = OSSwapInt32(di->di_mtimensec); - ip->i_ctime = OSSwapInt32(di->di_ctime); - ip->i_ctimensec = OSSwapInt32(di->di_ctimensec); - if (((ip->i_mode & IFMT) == IFLNK ) && (ip->i_size <= RESYMLNKLEN)) { - bcopy(&di->di_shortlink, &ip->i_shortlink, RESYMLNKLEN); - } else { - for (i=0; i < NDADDR; i++) /* direct blocks */ - ip->i_db[i] = OSSwapInt32(di->di_db[i]); - for (i=0; i < NIADDR; i++) /* indirect blocks */ - ip->i_ib[i] = OSSwapInt32(di->di_ib[i]); - } - ip->i_flags = OSSwapInt32(di->di_flags); - ip->i_blocks = OSSwapInt32(di->di_blocks); - ip->i_gen = OSSwapInt32(di->di_gen); - ip->i_uid = OSSwapInt32(di->di_uid); - ip->i_gid = OSSwapInt32(di->di_gid); - ip->i_spare[0] = OSSwapInt32(di->di_spare[0]); - ip->i_spare[1] = OSSwapInt32(di->di_spare[1]); -} - -void -byte_swap_inode_out(struct inode *ip, struct dinode *di) -{ - int i; - int mode, inosize; - - mode = (ip->i_mode & IFMT); - inosize = ip->i_size; - - di->di_mode = OSSwapInt16(ip->i_mode); - di->di_nlink = OSSwapInt16(ip->i_nlink); - di->di_u.oldids[0] = OSSwapInt16(ip->i_oldids[0]); - di->di_u.oldids[1] = OSSwapInt16(ip->i_oldids[1]); - di->di_size = OSSwapInt64(ip->i_size); - di->di_atime = OSSwapInt32(ip->i_atime); - di->di_atimensec = OSSwapInt32(ip->i_atimensec); - di->di_mtime = OSSwapInt32(ip->i_mtime); - di->di_mtimensec = OSSwapInt32(ip->i_mtimensec); - di->di_ctime = OSSwapInt32(ip->i_ctime); - di->di_ctimensec = OSSwapInt32(ip->i_ctimensec); - if ((mode == IFLNK) && (inosize <= RESYMLNKLEN)) { - bcopy( &ip->i_shortlink, &di->di_shortlink, RESYMLNKLEN); - } else { - for (i=0; i < NDADDR; i++) /* direct blocks */ - di->di_db[i] = OSSwapInt32(ip->i_db[i]); - for (i=0; i < NIADDR; i++) /* indirect blocks */ - di->di_ib[i] = OSSwapInt32(ip->i_ib[i]); - } - di->di_flags = OSSwapInt32(ip->i_flags); - di->di_blocks = OSSwapInt32(ip->i_blocks); - di->di_gen = OSSwapInt32(ip->i_gen); - di->di_uid = OSSwapInt32(ip->i_uid); - di->di_gid = OSSwapInt32(ip->i_gid); - di->di_spare[0] = OSSwapInt32(ip->i_spare[0]); - di->di_spare[1] = OSSwapInt32(ip->i_spare[1]); -} - -void -byte_swap_direct(struct direct *dirp) -{ - byte_swap_int(dirp->d_ino); - byte_swap_short(dirp->d_reclen); -} - -void -byte_swap_dir_block_in(char *addr, int count) -{ - struct direct *ep = (struct direct *) addr; - int entryoffsetinblk = 0; - - while (entryoffsetinblk < count) { - ep = (struct direct *) (entryoffsetinblk + addr); - byte_swap_int(ep->d_ino); - byte_swap_short(ep->d_reclen); - entryoffsetinblk += ep->d_reclen; - if (ep->d_reclen < 12) /* handle garbage in dirs */ - break; - } -} - -void -byte_swap_dir_out(char *addr, int count) -{ - struct direct *ep = (struct direct *) addr; - int entryoffsetinblk = 0; - int reclen; - - while (entryoffsetinblk < count) { - ep = (struct direct *) (entryoffsetinblk + addr); - reclen = ep->d_reclen; - entryoffsetinblk += reclen; - byte_swap_int(ep->d_ino); - byte_swap_short(ep->d_reclen); - if (reclen < 12) - break; - } -} - -void -byte_swap_dir_block_out(struct buf *bp) -{ - struct direct *ep = (struct direct *) buf_dataptr(bp); - int reclen, entryoffsetinblk = 0; - - while (entryoffsetinblk < buf_count(bp)) { - ep = (struct direct *) (entryoffsetinblk + buf_dataptr(bp)); - reclen = ep->d_reclen; - entryoffsetinblk += reclen; - byte_swap_int(ep->d_ino); - byte_swap_short(ep->d_reclen); - if (reclen < 12) - break; - } -} - -void -byte_swap_dirtemplate_in(struct dirtemplate *dirt) -{ - byte_swap_int(dirt->dot_ino); - byte_swap_short(dirt->dot_reclen); - byte_swap_int(dirt->dotdot_ino); - byte_swap_short(dirt->dotdot_reclen); -} - -void -byte_swap_minidir_in(struct direct *dirp) -{ - byte_swap_int(dirp->d_ino); - byte_swap_short(dirp->d_reclen); -} - -#if 0 -/* This is for the compatability (old) cylinder group block */ -void -byte_swap_ocylgroup(struct cg *cg) -{ - byte_swap_int(cg->cg_time); - byte_swap_int(cg->cg_cgx); - byte_swap_short(cg->cg_ncyl); - byte_swap_short(cg->cg_niblk); - byte_swap_int(cg->cg_ndblk); - byte_swap_csum(&cg->cg_cs); - byte_swap_int(cg->cg_rotor); - byte_swap_int(cg->cg_frotor); - byte_swap_int(cg->cg_irotor); - byte_swap_ints(&cg->cg_frsum, 8); - byte_swap_ints(&cg->cg_btot, 32); - byte_swap_shorts((short *)&cg->cg_b, 32 * 8); - byte_swap_int(cg->cg_magic); -} -#endif /* 0 */ diff --git a/bsd/ufs/ufs/ufs_byte_order.h b/bsd/ufs/ufs/ufs_byte_order.h deleted file mode 100644 index 009a51de5..000000000 --- a/bsd/ufs/ufs/ufs_byte_order.h +++ /dev/null @@ -1,73 +0,0 @@ -/* - * Copyright (c) 2000-2002 Apple Computer, Inc. All rights reserved. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ - * - * This file contains Original Code and/or Modifications of Original Code - * as defined in and that are subject to the Apple Public Source License - * Version 2.0 (the 'License'). You may not use this file except in - * compliance with the License. The rights granted to you under the License - * may not be used to create, or enable the creation or redistribution of, - * unlawful or unlicensed copies of an Apple operating system, or to - * circumvent, violate, or enable the circumvention or violation of, any - * terms of an Apple operating system software license agreement. - * - * Please obtain a copy of the License at - * http://www.opensource.apple.com/apsl/ and read it before using this file. - * - * The Original Code and all software distributed under the License are - * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER - * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, - * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. - * Please see the License for the specific language governing rights and - * limitations under the License. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ - */ -/* Copyright 1998 Apple Computer, Inc. - * - * UFS byte swapping routines to make a big endian file system useful on a - * little endian machine. - * - */ - -#ifdef KERNEL_PRIVATE - -#ifndef _UFS_BYTE_ORDER_H_ -#define _UFS_BYTE_ORDER_H_ - -#include - -#ifdef __APPLE_API_PRIVATE -#include -#include -#include -#include -#include -#include - -void byte_swap_longlongs(unsigned long long *, int); -void byte_swap_ints(int *, int); -void byte_swap_shorts(short *, int); - -/* void byte_swap_superblock(struct fs *); */ -int byte_swap_sbin(struct fs *); -void byte_swap_sbout(struct fs *); -void byte_swap_csum(struct csum *); -void byte_swap_ocylgroup(struct cg *); -void byte_swap_cgin(struct cg *, struct fs *); -void byte_swap_cgout(struct cg *, struct fs *); - -void byte_swap_inode_in(struct dinode *, struct inode *); -void byte_swap_inode_out(struct inode *, struct dinode *); - -void byte_swap_dir_block_in(char *, int); -void byte_swap_dir_block_out(buf_t); -void byte_swap_direct(struct direct *); -void byte_swap_dirtemplate_in(struct dirtemplate *); -void byte_swap_minidir_in(struct direct *); - -#endif /* __APPLE_API_PRIVATE */ -#endif /* _UFS_BYTE_ORDER_H_ */ -#endif /* KERNEL_PRIVATE */ diff --git a/bsd/ufs/ufs/ufs_extern.h b/bsd/ufs/ufs/ufs_extern.h deleted file mode 100644 index 3d1cfd1bb..000000000 --- a/bsd/ufs/ufs/ufs_extern.h +++ /dev/null @@ -1,167 +0,0 @@ -/* - * Copyright (c) 2000-2002 Apple Computer, Inc. All rights reserved. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ - * - * This file contains Original Code and/or Modifications of Original Code - * as defined in and that are subject to the Apple Public Source License - * Version 2.0 (the 'License'). You may not use this file except in - * compliance with the License. The rights granted to you under the License - * may not be used to create, or enable the creation or redistribution of, - * unlawful or unlicensed copies of an Apple operating system, or to - * circumvent, violate, or enable the circumvention or violation of, any - * terms of an Apple operating system software license agreement. - * - * Please obtain a copy of the License at - * http://www.opensource.apple.com/apsl/ and read it before using this file. - * - * The Original Code and all software distributed under the License are - * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER - * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, - * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. - * Please see the License for the specific language governing rights and - * limitations under the License. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ - */ -/* Copyright (c) 1995 NeXT Computer, Inc. All Rights Reserved */ -/*- - * Copyright (c) 1991, 1993, 1994 - * The Regents of the University of California. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. All advertising materials mentioning features or use of this software - * must display the following acknowledgement: - * This product includes software developed by the University of - * California, Berkeley and its contributors. - * 4. Neither the name of the University nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - * @(#)ufs_extern.h 8.10 (Berkeley) 5/14/95 - */ -#ifndef _UFS_EXTERN_H_ -#define _UFS_EXTERN_H_ - -#include - -#ifdef __APPLE_API_PRIVATE -struct buf; -struct direct; -struct disklabel; -struct flock; -struct inode; -struct mbuf; -struct mount; -struct nameidata; -struct proc; -#ifndef _KAUTH_CRED_T -#define _KAUTH_CRED_T -struct ucred; -typedef struct ucred *kauth_cred_t; -#endif /* !_KAUTH_CRED_T */ -struct ufs_args; -struct uio; -struct vnode_attr; -struct vfsconf; -struct vnode; - -__BEGIN_DECLS -int ufs_remove_internal(vnode_t, vnode_t, struct componentname *, int); -int ufs_access_internal(vnode_t, mode_t, kauth_cred_t); - -int ffs_read_internal(vnode_t, struct uio *, int); -int ffs_write_internal(vnode_t, struct uio *, int, kauth_cred_t); -int ffs_truncate_internal(vnode_t, off_t, int, kauth_cred_t); - -void diskerr - (struct buf *, char *, char *, int, int, struct disklabel *); -void disksort(struct buf *, struct buf *); -u_int dkcksum(struct disklabel *); -char *readdisklabel(dev_t, int (*)(), struct disklabel *); -int setdisklabel(struct disklabel *, struct disklabel *, u_long); -int writedisklabel(dev_t, int (*)(), struct disklabel *); - -int ufs_access(struct vnop_access_args *); -int ufs_checkpath(struct inode *, struct inode *, kauth_cred_t); -int ufs_close(struct vnop_close_args *); -int ufs_create(struct vnop_create_args *); -void ufs_dirbad(struct inode *, doff_t, const char *); -int ufs_dirbadentry(struct vnode *, struct direct *, int); -int ufs_dirempty(struct inode *, ino_t, kauth_cred_t); -int ufs_direnter(struct inode *, struct vnode *,struct componentname *); -int ufs_dirremove(struct vnode *, struct componentname*); -int ufs_dirrewrite - (struct inode *, struct inode *, struct componentname *); -int ufs_getattr(struct vnop_getattr_args *); -int ufs_getlbns(struct vnode *, ufs_daddr_t, struct indir *, int *); -struct vnode * - ufs_ihashget(dev_t, ino_t); -void ufs_ihashinit(void); -void ufs_ihashins(struct inode *); -struct vnode * - ufs_ihashlookup(dev_t, ino_t); -void ufs_ihashrem(struct inode *); -int ufs_inactive(struct vnop_inactive_args *); -int ufs_init(struct vfsconf *); -int ufs_ioctl(struct vnop_ioctl_args *); -int ufs_link(struct vnop_link_args *); -int ufs_lookup(struct vnop_lookup_args *); -int ufs_makeinode(struct vnode_attr *, struct vnode *, struct vnode **, struct componentname *); -int ufs_mkdir(struct vnop_mkdir_args *); -int ufs_mknod(struct vnop_mknod_args *); -int ufs_mmap(struct vnop_mmap_args *); -int ufs_open(struct vnop_open_args *); -int ufs_pathconf(struct vnop_pathconf_args *); -int ufs_readdir(struct vnop_readdir_args *); -int ufs_readlink(struct vnop_readlink_args *); -int ufs_reclaim(struct vnode *, struct proc *); -int ufs_remove(struct vnop_remove_args *); -int ufs_rename(struct vnop_rename_args *); -#define ufs_revoke nop_revoke -int ufs_rmdir(struct vnop_rmdir_args *); -int ufs_root(struct mount *, struct vnode **, vfs_context_t); -int ufs_select(struct vnop_select_args *); -int ufs_kqfilt_add(struct vnop_kqfilt_add_args *); -int ufs_setattr(struct vnop_setattr_args *); -int ufs_start(struct mount *, int, vfs_context_t); -int ufs_strategy(struct vnop_strategy_args *); -int ufs_symlink(struct vnop_symlink_args *); -int ufs_whiteout(struct vnop_whiteout_args *); -int ufsspec_close(struct vnop_close_args *); -int ufsspec_read(struct vnop_read_args *); -int ufsspec_write(struct vnop_write_args *); - -#if FIFO -int ufsfifo_read(struct vnop_read_args *); -int ufsfifo_write(struct vnop_write_args *); -int ufsfifo_close(struct vnop_close_args *); -int ufsfifo_kqfilt_add(struct vnop_kqfilt_add_args *); -#endif -int ufs_blktooff(struct vnop_blktooff_args *); -int ufs_blockmap(struct vnop_blockmap_args *); - -__END_DECLS - -#endif /* __APPLE_API_PRIVATE */ -#endif /* ! _UFS_EXTERN_H_ */ diff --git a/bsd/ufs/ufs/ufs_ihash.c b/bsd/ufs/ufs/ufs_ihash.c deleted file mode 100644 index 18909bae7..000000000 --- a/bsd/ufs/ufs/ufs_ihash.c +++ /dev/null @@ -1,206 +0,0 @@ -/* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ - * - * This file contains Original Code and/or Modifications of Original Code - * as defined in and that are subject to the Apple Public Source License - * Version 2.0 (the 'License'). You may not use this file except in - * compliance with the License. The rights granted to you under the License - * may not be used to create, or enable the creation or redistribution of, - * unlawful or unlicensed copies of an Apple operating system, or to - * circumvent, violate, or enable the circumvention or violation of, any - * terms of an Apple operating system software license agreement. - * - * Please obtain a copy of the License at - * http://www.opensource.apple.com/apsl/ and read it before using this file. - * - * The Original Code and all software distributed under the License are - * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER - * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, - * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. - * Please see the License for the specific language governing rights and - * limitations under the License. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ - */ -/* Copyright (c) 1995 NeXT Computer, Inc. All Rights Reserved */ -/* - * Copyright (c) 1982, 1986, 1989, 1991, 1993, 1995 - * The Regents of the University of California. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. All advertising materials mentioning features or use of this software - * must display the following acknowledgement: - * This product includes software developed by the University of - * California, Berkeley and its contributors. - * 4. Neither the name of the University nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - * @(#)ufs_ihash.c 8.7 (Berkeley) 5/17/95 - */ - -#include -#include -#include -#include -#include -#include - -#include -#include -#include - -/* - * Structures associated with inode cacheing. - */ -LIST_HEAD(ihashhead, inode) *ihashtbl; -u_long ihash; /* size of hash table - 1 */ -#define INOHASH(device, inum) (&ihashtbl[((device) + (inum)) & ihash]) - -/* - * Initialize inode hash table. - */ -void -ufs_ihashinit() -{ - static int done; - - if (done) - return; - - done = 1; - ihashtbl = hashinit(desiredvnodes, M_UFSMNT, &ihash); -} - -/* - * Use the device/inum pair to find the incore inode, and return a pointer - * to it. If it is in core, return it, even if it is locked. - */ -struct vnode * -ufs_ihashlookup(dev, inum) - dev_t dev; - ino_t inum; -{ - struct inode *ip; - - for (ip = INOHASH(dev, inum)->lh_first; ip; ip = ip->i_hash.le_next) - if (inum == ip->i_number && dev == ip->i_dev) - break; - if (ip) - return (ITOV(ip)); - return (NULLVP); -} - -/* - * Use the device/inum pair to find the incore inode, and return a pointer - * to it. If it is in core, but locked, wait for it. - */ -struct vnode * -ufs_ihashget(dev, inum) - dev_t dev; - ino_t inum; -{ - struct proc *p = current_proc(); /* XXX */ - struct inode *ip; - struct vnode *vp; - uint32_t vid; - -loop: - for (ip = INOHASH(dev, inum)->lh_first; ip; ip = ip->i_hash.le_next) { - if (inum == ip->i_number && dev == ip->i_dev) { - - if (ISSET(ip->i_flag, IN_ALLOC)) { - /* - * inode is being created. Wait for it - * to finish creation - */ - SET(ip->i_flag, IN_WALLOC); - (void)tsleep((caddr_t)ip, PINOD, "ufs_ihashget", 0); - goto loop; - } - - if (ISSET(ip->i_flag, IN_TRANSIT)) { - /* - * inode is getting reclaimed wait till - * the operation is complete and return - * error - */ - SET(ip->i_flag, IN_WTRANSIT); - (void)tsleep((caddr_t)ip, PINOD, "ufs_ihashget1", 0); - goto loop; - } - vp = ITOV(ip); - /* - * the vid needs to be grabbed before we drop - * lock protecting the hash - */ - vid = vnode_vid(vp); - - /* - * we currently depend on running under the FS funnel - * when we do proper locking and advertise ourselves - * as thread safe, we'll need a lock to protect the - * hash lookup... this is where we would drop it - */ - if (vnode_getwithvid(vp, vid)) { - /* - * If vnode is being reclaimed, or has - * already changed identity, no need to wait - */ - return (NULL); - } - return (vp); - } - } - return (NULL); -} - -/* - * Insert the inode into the hash table, - * inode is assumed to be locked by the caller - */ -void -ufs_ihashins(ip) - struct inode *ip; -{ - struct ihashhead *ipp; - - ipp = INOHASH(ip->i_dev, ip->i_number); - LIST_INSERT_HEAD(ipp, ip, i_hash); -} - -/* - * Remove the inode from the hash table. - */ -void -ufs_ihashrem(ip) - struct inode *ip; -{ - LIST_REMOVE(ip, i_hash); -#if DIAGNOSTIC - ip->i_hash.le_next = NULL; - ip->i_hash.le_prev = NULL; -#endif -} diff --git a/bsd/ufs/ufs/ufs_inode.c b/bsd/ufs/ufs/ufs_inode.c deleted file mode 100644 index dcd42d9d3..000000000 --- a/bsd/ufs/ufs/ufs_inode.c +++ /dev/null @@ -1,182 +0,0 @@ -/* - * Copyright (c) 2000-2001 Apple Computer, Inc. All rights reserved. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ - * - * This file contains Original Code and/or Modifications of Original Code - * as defined in and that are subject to the Apple Public Source License - * Version 2.0 (the 'License'). You may not use this file except in - * compliance with the License. The rights granted to you under the License - * may not be used to create, or enable the creation or redistribution of, - * unlawful or unlicensed copies of an Apple operating system, or to - * circumvent, violate, or enable the circumvention or violation of, any - * terms of an Apple operating system software license agreement. - * - * Please obtain a copy of the License at - * http://www.opensource.apple.com/apsl/ and read it before using this file. - * - * The Original Code and all software distributed under the License are - * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER - * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, - * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. - * Please see the License for the specific language governing rights and - * limitations under the License. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ - */ -/* Copyright (c) 1995 NeXT Computer, Inc. All Rights Reserved */ -/* - * Copyright (c) 1991, 1993, 1995 - * The Regents of the University of California. All rights reserved. - * (c) UNIX System Laboratories, Inc. - * All or some portions of this file are derived from material licensed - * to the University of California by American Telephone and Telegraph - * Co. or Unix System Laboratories, Inc. and are reproduced herein with - * the permission of UNIX System Laboratories, Inc. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. All advertising materials mentioning features or use of this software - * must display the following acknowledgement: - * This product includes software developed by the University of - * California, Berkeley and its contributors. - * 4. Neither the name of the University nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - * @(#)ufs_inode.c 8.9 (Berkeley) 5/14/95 - */ - -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include - -u_long nextgennumber; /* Next generation number to assign. */ -extern int prtactive; - -/* - * Last reference to an inode. If necessary, write or delete it. - */ -int -ufs_inactive(ap) - struct vnop_inactive_args /* { - struct vnode *a_vp; - vfs_context_t a_context; - } */ *ap; -{ - struct vnode *vp = ap->a_vp; - struct inode *ip = VTOI(vp); - struct proc *p = vfs_context_proc(ap->a_context); - struct timeval tv; - int mode, error = 0; - extern int prtactive; - - if (prtactive && vp->v_usecount != 0) - vprint("ffs_inactive: pushing active", vp); - - /* - * Ignore inodes related to stale file handles. - */ - if (ip->i_mode == 0) - goto out; - if (ip->i_nlink <= 0 && (vp->v_mount->mnt_flag & MNT_RDONLY) == 0) { -#if QUOTA - if (!getinoquota(ip)) - (void)chkiq(ip, -1, NOCRED, 0); -#endif - /* - * marking inode in transit so that one can get this - * inode from inodecache - */ - SET(ip->i_flag, IN_TRANSIT); - error = ffs_truncate_internal(vp, (off_t)0, 0, NOCRED); - ip->i_rdev = 0; - mode = ip->i_mode; - ip->i_mode = 0; - ip->i_flag |= IN_CHANGE | IN_UPDATE; - ffs_vfree(vp, ip->i_number, mode); - } - if (ip->i_flag & (IN_ACCESS | IN_CHANGE | IN_MODIFIED | IN_UPDATE)) { - microtime(&tv); - ffs_update(vp, &tv, &tv, 0); - } -out: - /* - * If we are done with the inode, reclaim it - * so that it can be reused immediately. - */ - if (ip->i_mode == 0) - vnode_recycle(vp); - return (error); -} - -/* - * Reclaim an inode so that it can be used for other purposes. - */ -int -ufs_reclaim(vp, p) - struct vnode *vp; - struct proc *p; -{ - register struct inode *ip; - int i; - extern int prtactive; - - if (prtactive && vp->v_usecount != 0) - vprint("ufs_reclaim: pushing active", vp); - - vnode_removefsref(vp); - /* - * Remove the inode from its hash chain. - */ - ip = VTOI(vp); - ufs_ihashrem(ip); - - if (ip->i_devvp) { - struct vnode *tvp = ip->i_devvp; - ip->i_devvp = NULL; - vnode_rele(tvp); - } -#if QUOTA - for (i = 0; i < MAXQUOTAS; i++) { - if (ip->i_dquot[i] != NODQUOT) { - dqrele(ip->i_dquot[i]); - ip->i_dquot[i] = NODQUOT; - } - } -#endif - CLR(ip->i_flag, (IN_ALLOC|IN_TRANSIT)); - if (ISSET(ip->i_flag, IN_WALLOC)|| ISSET(ip->i_flag, IN_WTRANSIT)) - wakeup(ip); - - return (0); -} diff --git a/bsd/ufs/ufs/ufs_lookup.c b/bsd/ufs/ufs/ufs_lookup.c deleted file mode 100644 index 77b3857aa..000000000 --- a/bsd/ufs/ufs/ufs_lookup.c +++ /dev/null @@ -1,1066 +0,0 @@ -/* - * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ - * - * This file contains Original Code and/or Modifications of Original Code - * as defined in and that are subject to the Apple Public Source License - * Version 2.0 (the 'License'). You may not use this file except in - * compliance with the License. The rights granted to you under the License - * may not be used to create, or enable the creation or redistribution of, - * unlawful or unlicensed copies of an Apple operating system, or to - * circumvent, violate, or enable the circumvention or violation of, any - * terms of an Apple operating system software license agreement. - * - * Please obtain a copy of the License at - * http://www.opensource.apple.com/apsl/ and read it before using this file. - * - * The Original Code and all software distributed under the License are - * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER - * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, - * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. - * Please see the License for the specific language governing rights and - * limitations under the License. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ - */ -/* Copyright (c) 1995 NeXT Computer, Inc. All Rights Reserved */ -/* - * Copyright (c) 1989, 1993 - * The Regents of the University of California. All rights reserved. - * (c) UNIX System Laboratories, Inc. - * All or some portions of this file are derived from material licensed - * to the University of California by American Telephone and Telegraph - * Co. or Unix System Laboratories, Inc. and are reproduced herein with - * the permission of UNIX System Laboratories, Inc. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. All advertising materials mentioning features or use of this software - * must display the following acknowledgement: - * This product includes software developed by the University of - * California, Berkeley and its contributors. - * 4. Neither the name of the University nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - * @(#)ufs_lookup.c 8.15 (Berkeley) 6/16/95 - */ -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#include -#if REV_ENDIAN_FS -#include -#endif /* REV_ENDIAN_FS */ - -struct nchstats ufs_nchstats; -#if DIAGNOSTIC -int dirchk = 1; -#else -int dirchk = 0; -#endif - -#define FSFMT(vp) ((vp)->v_mount->mnt_maxsymlinklen <= 0) - -/* - * Convert a component of a pathname into a pointer to a locked inode. - * This is a very central and rather complicated routine. - * If the file system is not maintained in a strict tree hierarchy, - * this can result in a deadlock situation (see comments in code below). - * - * The cnp->cn_nameiop argument is LOOKUP, CREATE, RENAME, or DELETE depending - * on whether the name is to be looked up, created, renamed, or deleted. - * When CREATE, RENAME, or DELETE is specified, information usable in - * creating, renaming, or deleting a directory entry may be calculated. - * If flag has LOCKPARENT or'ed into it and the target of the pathname - * exists, lookup returns both the target and its parent directory locked. - * When creating or renaming and LOCKPARENT is specified, the target may - * not be ".". When deleting and LOCKPARENT is specified, the target may - * be "."., - * - * Overall outline of ufs_lookup: - * - * check accessibility of directory - * look for name in cache, if found, then if at end of path - * and deleting or creating, drop it, else return name - * search for name in directory, to found or notfound - * notfound: - * if creating, return locked directory, leaving info on available slots - * else return error - * found: - * if at end of path and deleting, return information to allow delete - * if at end of path and rewriting (RENAME and LOCKPARENT), lock target - * inode and return info to allow rewrite - * if not at end, add name to cache; if at end and neither creating - * nor deleting, add name to cache - */ -int -ufs_lookup(ap) - struct vnop_lookup_args /* { - struct vnode *a_dvp; - struct vnode **a_vpp; - struct componentname *a_cnp; - vfs_context_t a_context - } */ *ap; -{ - register struct vnode *vdp; /* vnode for directory being searched */ - register struct inode *dp; /* inode for directory being searched */ - struct buf *bp; /* a buffer of directory entries */ - register struct direct *ep; /* the current directory entry */ - int entryoffsetinblock; /* offset of ep in bp's buffer */ - enum {NONE, COMPACT, FOUND} slotstatus; - doff_t slotoffset; /* offset of area with free space */ - int slotsize; /* size of area at slotoffset */ - int slotfreespace; /* amount of space free in slot */ - int slotneeded; /* size of the entry we're seeking */ - int numdirpasses; /* strategy for directory search */ - doff_t endsearch; /* offset to end directory search */ - doff_t prevoff; /* prev entry dp->i_offset */ - struct vnode *pdp; /* saved dp during symlink work */ - struct vnode *tdp; /* returned by VFS_VGET */ - doff_t enduseful; /* pointer past last used dir slot */ - u_long bmask; /* block offset mask */ - int wantparent; /* 1 => wantparent or lockparent flag */ - int namlen, error; - struct vnode **vpp = ap->a_vpp; - struct componentname *cnp = ap->a_cnp; - int flags = cnp->cn_flags; - int nameiop = cnp->cn_nameiop; - vfs_context_t context = ap->a_context; - kauth_cred_t cred; -#if REV_ENDIAN_FS - int rev_endian=0; -#endif /* REV_ENDIAN_FS */ - - - if (cnp->cn_namelen > UFSMAXNAMLEN) - return (ENAMETOOLONG); - - cred = vfs_context_ucred(context); - bp = NULL; - slotoffset = -1; - *vpp = NULL; - vdp = ap->a_dvp; - dp = VTOI(vdp); - - wantparent = flags & (LOCKPARENT|WANTPARENT); - -#if REV_ENDIAN_FS - rev_endian=(vdp->v_mount->mnt_flag & MNT_REVEND); -#endif /* REV_ENDIAN_FS */ - - /* - * Check accessiblity of directory. - */ - if ((dp->i_mode & IFMT) != IFDIR) - return (ENOTDIR); - - /* - * We now have a segment name to search for, and a directory to search. - * - * Before tediously performing a linear scan of the directory, - * check the name cache to see if the directory/name pair - * we are looking for is known already. - */ - if (error = cache_lookup(vdp, vpp, cnp)) { - if (error == ENOENT) - return (error); - return (0); - } - /* - * Suppress search for slots unless creating - * file and at end of pathname, in which case - * we watch for a place to put the new file in - * case it doesn't already exist. - */ - slotstatus = FOUND; - slotfreespace = slotsize = slotneeded = 0; - if ((nameiop == CREATE || nameiop == RENAME) && - (flags & ISLASTCN)) { - slotstatus = NONE; - slotneeded = (sizeof(struct direct) - UFSMAXNAMLEN + - cnp->cn_namelen + 3) &~ 3; - } - /* - * If there is cached information on a previous search of - * this directory, pick up where we last left off. - * We cache only lookups as these are the most common - * and have the greatest payoff. Caching CREATE has little - * benefit as it usually must search the entire directory - * to determine that the entry does not exist. Caching the - * location of the last DELETE or RENAME has not reduced - * profiling time and hence has been removed in the interest - * of simplicity. - */ - bmask = VFSTOUFS(vdp->v_mount)->um_mountp->mnt_vfsstat.f_iosize - 1; - if (nameiop != LOOKUP || dp->i_diroff == 0 || - dp->i_diroff > dp->i_size) { - entryoffsetinblock = 0; - dp->i_offset = 0; - numdirpasses = 1; - } else { - dp->i_offset = dp->i_diroff; - if ((entryoffsetinblock = dp->i_offset & bmask) && - (error = ffs_blkatoff(vdp, (off_t)dp->i_offset, NULL, &bp))) - goto out; - numdirpasses = 2; - ufs_nchstats.ncs_2passes++; - } - prevoff = dp->i_offset; - endsearch = roundup(dp->i_size, DIRBLKSIZ); - enduseful = 0; - -searchloop: - while (dp->i_offset < endsearch) { - /* - * If necessary, get the next directory block. - */ - if ((dp->i_offset & bmask) == 0) { - if (bp != NULL) { -#if REV_ENDIAN_FS - if (rev_endian) - byte_swap_dir_block_out(bp); -#endif /* REV_ENDIAN_FS */ - buf_brelse(bp); - } - if (error = ffs_blkatoff(vdp, (off_t)dp->i_offset, NULL, &bp)) - goto out; - entryoffsetinblock = 0; - } - /* - * If still looking for a slot, and at a DIRBLKSIZE - * boundary, have to start looking for free space again. - */ - if (slotstatus == NONE && - (entryoffsetinblock & (DIRBLKSIZ - 1)) == 0) { - slotoffset = -1; - slotfreespace = 0; - } - /* - * Get pointer to next entry. - * Full validation checks are slow, so we only check - * enough to insure forward progress through the - * directory. Complete checks can be run by patching - * "dirchk" to be true. - */ - ep = (struct direct *)((char *)buf_dataptr(bp) + entryoffsetinblock); - if (ep->d_reclen == 0 || - dirchk && ufs_dirbadentry(vdp, ep, entryoffsetinblock)) { - int i; - - ufs_dirbad(dp, dp->i_offset, "mangled entry"); - i = DIRBLKSIZ - (entryoffsetinblock & (DIRBLKSIZ - 1)); - dp->i_offset += i; - entryoffsetinblock += i; - continue; - } - - /* - * If an appropriate sized slot has not yet been found, - * check to see if one is available. Also accumulate space - * in the current block so that we can determine if - * compaction is viable. - */ - if (slotstatus != FOUND) { - int size = ep->d_reclen; - - if (ep->d_ino != 0) - size -= DIRSIZ(FSFMT(vdp), ep); - if (size > 0) { - if (size >= slotneeded) { - slotstatus = FOUND; - slotoffset = dp->i_offset; - slotsize = ep->d_reclen; - } else if (slotstatus == NONE) { - slotfreespace += size; - if (slotoffset == -1) - slotoffset = dp->i_offset; - if (slotfreespace >= slotneeded) { - slotstatus = COMPACT; - slotsize = dp->i_offset + - ep->d_reclen - slotoffset; - } - } - } - } - - /* - * Check for a name match. - */ - if (ep->d_ino) { -# if (BYTE_ORDER == LITTLE_ENDIAN) - if (vdp->v_mount->mnt_maxsymlinklen > 0) - namlen = ep->d_namlen; - else - namlen = ep->d_type; -# else - namlen = ep->d_namlen; -# endif - if (namlen == cnp->cn_namelen && - !bcmp(cnp->cn_nameptr, ep->d_name, - (unsigned)namlen)) { - /* - * Save directory entry's inode number and - * reclen in ndp->ni_ufs area, and release - * directory buffer. - */ - if (vdp->v_mount->mnt_maxsymlinklen > 0 && - ep->d_type == DT_WHT) { - slotstatus = FOUND; - slotoffset = dp->i_offset; - slotsize = ep->d_reclen; - dp->i_reclen = slotsize; - enduseful = dp->i_size; - ap->a_cnp->cn_flags |= ISWHITEOUT; - numdirpasses--; - goto notfound; - } - dp->i_ino = ep->d_ino; - dp->i_reclen = ep->d_reclen; -#if REV_ENDIAN_FS - if (rev_endian) - byte_swap_dir_block_out(bp); -#endif /* REV_ENDIAN_FS */ - buf_brelse(bp); - goto found; - } - } - prevoff = dp->i_offset; - dp->i_offset += ep->d_reclen; - entryoffsetinblock += ep->d_reclen; - if (ep->d_ino) - enduseful = dp->i_offset; - } -notfound: - /* - * If we started in the middle of the directory and failed - * to find our target, we must check the beginning as well. - */ - if (numdirpasses == 2) { - numdirpasses--; - dp->i_offset = 0; - endsearch = dp->i_diroff; - goto searchloop; - } - if (bp != NULL) { -#if REV_ENDIAN_FS - if (rev_endian) - byte_swap_dir_block_out(bp); -#endif /* REV_ENDIAN_FS */ - buf_brelse(bp); - } - /* - * If creating, and at end of pathname and current - * directory has not been removed, then can consider - * allowing file to be created. - */ - if ((nameiop == CREATE || nameiop == RENAME || - (nameiop == DELETE && - (ap->a_cnp->cn_flags & DOWHITEOUT) && - (ap->a_cnp->cn_flags & ISWHITEOUT))) && - (flags & ISLASTCN) && dp->i_nlink != 0) { - /* - * Return an indication of where the new directory - * entry should be put. If we didn't find a slot, - * then set dp->i_count to 0 indicating - * that the new slot belongs at the end of the - * directory. If we found a slot, then the new entry - * can be put in the range from dp->i_offset to - * dp->i_offset + dp->i_count. - */ - if (slotstatus == NONE) { - dp->i_offset = roundup(dp->i_size, DIRBLKSIZ); - dp->i_count = 0; - enduseful = dp->i_offset; - } else if (nameiop == DELETE) { - dp->i_offset = slotoffset; - if ((dp->i_offset & (DIRBLKSIZ - 1)) == 0) - dp->i_count = 0; - else - dp->i_count = dp->i_offset - prevoff; - } else { - dp->i_offset = slotoffset; - dp->i_count = slotsize; - if (enduseful < slotoffset + slotsize) - enduseful = slotoffset + slotsize; - } - dp->i_endoff = roundup(enduseful, DIRBLKSIZ); - dp->i_flag |= IN_CHANGE | IN_UPDATE; - /* - * We return with the directory locked, so that - * the parameters we set up above will still be - * valid if we actually decide to do a direnter(). - * We return ni_vp == NULL to indicate that the entry - * does not currently exist; we leave a pointer to - * the (locked) directory inode in ndp->ni_dvp. - * - * NB - if the directory is unlocked, then this - * information cannot be used. - */ - error = EJUSTRETURN; - goto out; - } - /* - * Insert name into cache (as non-existent) if appropriate. - */ - if ((cnp->cn_flags & MAKEENTRY) && nameiop != CREATE) - cache_enter(vdp, *vpp, cnp); - error = ENOENT; - goto out; - -found: - if (numdirpasses == 2) - ufs_nchstats.ncs_pass2++; - /* - * Check that directory length properly reflects presence - * of this entry. - */ - if (entryoffsetinblock + DIRSIZ(FSFMT(vdp), ep) > dp->i_size) { - ufs_dirbad(dp, dp->i_offset, "i_size too small"); - dp->i_size = entryoffsetinblock + DIRSIZ(FSFMT(vdp), ep); - dp->i_flag |= IN_CHANGE | IN_UPDATE; - } - - /* - * Found component in pathname. - * If the final component of path name, save information - * in the cache as to where the entry was found. - */ - if ((flags & ISLASTCN) && nameiop == LOOKUP) - dp->i_diroff = dp->i_offset &~ (DIRBLKSIZ - 1); - - /* - * If deleting, and at end of pathname, return - * parameters which can be used to remove file. - * If the wantparent flag isn't set, we return only - * the directory (in ndp->ni_dvp), otherwise we go - * on and lock the inode, being careful with ".". - */ - if (nameiop == DELETE && (flags & ISLASTCN)) { - /* - * Return pointer to current entry in dp->i_offset, - * and distance past previous entry (if there - * is a previous entry in this block) in dp->i_count. - * Save directory inode pointer in ndp->ni_dvp for dirremove(). - */ - if ((dp->i_offset & (DIRBLKSIZ - 1)) == 0) - dp->i_count = 0; - else - dp->i_count = dp->i_offset - prevoff; - if (dp->i_number == dp->i_ino) { - vnode_get(vdp); - *vpp = vdp; - error = 0; - goto out; - } - if (error = ffs_vget_internal(vdp->v_mount, dp->i_ino, &tdp, vdp, cnp, 0, 0)) - goto out; - *vpp = tdp; - goto out; - } - - /* - * If rewriting (RENAME), return the inode and the - * information required to rewrite the present directory - * Must get inode of directory entry to verify it's a - * regular file, or empty directory. - */ - if (nameiop == RENAME && wantparent && (flags & ISLASTCN)) { - /* - * Careful about locking second inode. - * This can only occur if the target is ".". - */ - if (dp->i_number == dp->i_ino) { - error =EISDIR; - goto out; - } - if (error = ffs_vget_internal(vdp->v_mount, dp->i_ino, &tdp, vdp, cnp, 0, 0)) - goto out; - *vpp = tdp; - - goto out; - } - - /* - * Step through the translation in the name. We do not `vnode_put' the - * directory because we may need it again if a symbolic link - * is relative to the current directory. Instead we save it - * unlocked as "pdp". We must get the target inode before unlocking - * the directory to insure that the inode will not be removed - * before we get it. We prevent deadlock by always fetching - * inodes from the root, moving down the directory tree. Thus - * when following backward pointers ".." we must unlock the - * parent directory before getting the requested directory. - * There is a potential race condition here if both the current - * and parent directories are removed before the VFS_VGET for the - * inode associated with ".." returns. We hope that this occurs - * infrequently since we cannot avoid this race condition without - * implementing a sophisticated deadlock detection algorithm. - * Note also that this simple deadlock detection scheme will not - * work if the file system has any hard links other than ".." - * that point backwards in the directory structure. - */ - pdp = vdp; - if (flags & ISDOTDOT) { - if (error = ffs_vget_internal(vdp->v_mount, dp->i_ino, &tdp, vdp, cnp, 0, 0)) { - goto out; - } - *vpp = tdp; - } else if (dp->i_number == dp->i_ino) { - vnode_get(vdp); /* we want ourself, ie "." */ - *vpp = vdp; - } else { - if (error = ffs_vget_internal(vdp->v_mount, dp->i_ino, &tdp, vdp, cnp, 0, 0)) - goto out; - *vpp = tdp; - } - - error = 0; -out: - return (error); -} - -void -ufs_dirbad(ip, offset, how) - struct inode *ip; - doff_t offset; - const char *how; -{ - struct mount *mp; - - mp = ITOV(ip)->v_mount; - (void)printf("%s: bad dir ino %d at offset %d: %s\n", - mp->mnt_vfsstat.f_mntonname, ip->i_number, offset, how); -#if 0 - if ((mp->mnt_vfsstat.f_flags & MNT_RDONLY) == 0) - panic("bad dir"); -#endif -} - -/* - * Do consistency checking on a directory entry: - * record length must be multiple of 4 - * entry must fit in rest of its DIRBLKSIZ block - * record must be large enough to contain entry - * name is not longer than UFSMAXNAMLEN - * name must be as long as advertised, and null terminated - */ -int -ufs_dirbadentry(dp, ep, entryoffsetinblock) - struct vnode *dp; - register struct direct *ep; - int entryoffsetinblock; -{ - register int i; - int namlen; - ino_t maxino = 0; - struct fs *fs; - struct ufsmount *ump = VFSTOUFS(dp->v_mount); - -# if (BYTE_ORDER == LITTLE_ENDIAN) - if (dp->v_mount->mnt_maxsymlinklen > 0) - namlen = ep->d_namlen; - else - namlen = ep->d_type; -# else - namlen = ep->d_namlen; -# endif - if ((ep->d_reclen & 0x3) != 0 || - ep->d_reclen > DIRBLKSIZ - (entryoffsetinblock & (DIRBLKSIZ - 1)) || - ep->d_reclen < DIRSIZ(FSFMT(dp), ep) || namlen > UFSMAXNAMLEN) { - /*return (1); */ - printf("First bad\n"); - goto bad; - } - if (ep->d_ino == 0) - return (0); - for (i = 0; i < namlen; i++) - if (ep->d_name[i] == '\0') { - /*return (1); */ - printf("Second bad\n"); - goto bad; - } - if (ep->d_name[i]) - goto bad; - - fs = ump->um_fs; - maxino = fs->fs_ncg * fs->fs_ipg; - if (ep->d_ino > maxino) { - printf("Third bad\n"); - goto bad; - } - - return (0); -bad: - return (1); -} - -/* - * Write a directory entry after a call to namei, using the parameters - * that it left in nameidata. The argument ip is the inode which the new - * directory entry will refer to. Dvp is a pointer to the directory to - * be written, which was left locked by namei. Remaining parameters - * (dp->i_offset, dp->i_count) indicate how the space for the new - * entry is to be obtained. - */ -int -ufs_direnter(ip, dvp, cnp) - struct inode *ip; - struct vnode *dvp; - register struct componentname *cnp; -{ - register struct inode *dp; - struct direct newdir; - - dp = VTOI(dvp); - newdir.d_ino = ip->i_number; - newdir.d_namlen = cnp->cn_namelen; - bcopy(cnp->cn_nameptr, newdir.d_name, (unsigned)cnp->cn_namelen + 1); - if (dvp->v_mount->mnt_maxsymlinklen > 0) - newdir.d_type = IFTODT(ip->i_mode); - else { - newdir.d_type = 0; -# if (BYTE_ORDER == LITTLE_ENDIAN) - { u_char tmp = newdir.d_namlen; - newdir.d_namlen = newdir.d_type; - newdir.d_type = tmp; } -# endif - } - return (ufs_direnter2(dvp, &newdir, cnp->cn_context)); -} - -/* - * Common entry point for directory entry removal used by ufs_direnter - * and ufs_whiteout - */ -int -ufs_direnter2(struct vnode *dvp, struct direct *dirp, vfs_context_t ctx) -{ - int newentrysize; - struct inode *dp; - struct buf *bp; - uio_t auio; - u_int dsize; - struct direct *ep, *nep; - int error, loc, spacefree; - char *dirbuf; - char uio_buf[ UIO_SIZEOF(1) ]; -#if REV_ENDIAN_FS - struct mount *mp=dvp->v_mount; - int rev_endian=(mp->mnt_flag & MNT_REVEND); -#endif /* REV_ENDIAN_FS */ - - dp = VTOI(dvp); - newentrysize = DIRSIZ(FSFMT(dvp), dirp); - - if (dp->i_count == 0) { - /* - * If dp->i_count is 0, then namei could find no - * space in the directory. Here, dp->i_offset will - * be on a directory block boundary and we will write the - * new entry into a fresh block. - */ - if (dp->i_offset & (DIRBLKSIZ - 1)) - panic("ufs_direnter2: newblk"); - dirp->d_reclen = DIRBLKSIZ; - auio = uio_createwithbuffer(1, dp->i_offset, UIO_SYSSPACE, UIO_WRITE, - &uio_buf[0], sizeof(uio_buf)); - uio_addiov(auio, CAST_USER_ADDR_T(dirp), newentrysize); - - error = ffs_write_internal(dvp, auio, IO_SYNC, vfs_context_ucred(ctx)); - if (DIRBLKSIZ > - VFSTOUFS(dvp->v_mount)->um_mountp->mnt_vfsstat.f_bsize) - /* XXX should grow with balloc() */ - panic("ufs_direnter2: frag size"); - else if (!error) { - dp->i_size = roundup(dp->i_size, DIRBLKSIZ); - dp->i_flag |= IN_CHANGE; - } - return (error); - } - - /* - * If dp->i_count is non-zero, then namei found space - * for the new entry in the range dp->i_offset to - * dp->i_offset + dp->i_count in the directory. - * To use this space, we may have to compact the entries located - * there, by copying them together towards the beginning of the - * block, leaving the free space in one usable chunk at the end. - */ - - /* - * Increase size of directory if entry eats into new space. - * This should never push the size past a new multiple of - * DIRBLKSIZE. - * - * N.B. - THIS IS AN ARTIFACT OF 4.2 AND SHOULD NEVER HAPPEN. - */ - if (dp->i_offset + dp->i_count > dp->i_size) - dp->i_size = dp->i_offset + dp->i_count; - /* - * Get the block containing the space for the new directory entry. - */ - if (error = ffs_blkatoff(dvp, (off_t)dp->i_offset, &dirbuf, &bp)) - return (error); - /* - * Find space for the new entry. In the simple case, the entry at - * offset base will have the space. If it does not, then namei - * arranged that compacting the region dp->i_offset to - * dp->i_offset + dp->i_count would yield the - * space. - */ - ep = (struct direct *)dirbuf; - dsize = DIRSIZ(FSFMT(dvp), ep); - spacefree = ep->d_reclen - dsize; - for (loc = ep->d_reclen; loc < dp->i_count; ) { - nep = (struct direct *)(dirbuf + loc); - if (ep->d_ino) { - /* trim the existing slot */ - ep->d_reclen = dsize; - ep = (struct direct *)((char *)ep + dsize); - } else { - /* overwrite; nothing there; header is ours */ - spacefree += dsize; - } - dsize = DIRSIZ(FSFMT(dvp), nep); - spacefree += nep->d_reclen - dsize; - loc += nep->d_reclen; - bcopy((caddr_t)nep, (caddr_t)ep, dsize); - } - /* - * Update the pointer fields in the previous entry (if any), - * copy in the new entry, and write out the block. - */ - if (ep->d_ino == 0 || - (ep->d_ino == WINO && - bcmp(ep->d_name, dirp->d_name, dirp->d_namlen) == 0)) { - if (spacefree + dsize < newentrysize) - panic("ufs_direnter2: compact1"); - dirp->d_reclen = spacefree + dsize; - } else { - if (spacefree < newentrysize) - panic("ufs_direnter2: compact2"); - dirp->d_reclen = spacefree; - ep->d_reclen = dsize; - ep = (struct direct *)((char *)ep + dsize); - } - bcopy((caddr_t)dirp, (caddr_t)ep, (u_int)newentrysize); -#if REV_ENDIAN_FS - if (rev_endian) - byte_swap_dir_block_out(bp); -#endif /* REV_ENDIAN_FS */ - if (mp->mnt_flag & MNT_ASYNC) { - error = 0; - buf_bdwrite(bp); - } else { - error = VNOP_BWRITE(bp); - } - dp->i_flag |= IN_CHANGE | IN_UPDATE; - if (!error && dp->i_endoff && dp->i_endoff < dp->i_size) - error = ffs_truncate_internal(dvp, (off_t)dp->i_endoff, IO_SYNC, vfs_context_ucred(ctx)); - - return (error); -} - -/* - * Remove a directory entry after a call to namei, using - * the parameters which it left in nameidata. The entry - * dp->i_offset contains the offset into the directory of the - * entry to be eliminated. The dp->i_count field contains the - * size of the previous record in the directory. If this - * is 0, the first entry is being deleted, so we need only - * zero the inode number to mark the entry as free. If the - * entry is not the first in the directory, we must reclaim - * the space of the now empty record by adding the record size - * to the size of the previous entry. - */ -int -ufs_dirremove(dvp, cnp) - struct vnode *dvp; - struct componentname *cnp; -{ - register struct inode *dp; - struct direct *ep; - struct buf *bp; - int error; -#if REV_ENDIAN_FS - struct mount *mp=dvp->v_mount; - int rev_endian=(mp->mnt_flag & MNT_REVEND); -#endif /* REV_ENDIAN_FS */ - - dp = VTOI(dvp); - - if (cnp->cn_flags & DOWHITEOUT) { - /* - * Whiteout entry: set d_ino to WINO. - */ - if (error = ffs_blkatoff(dvp, (off_t)dp->i_offset, (char **)&ep, &bp)) - return (error); - ep->d_ino = WINO; - ep->d_type = DT_WHT; -#if REV_ENDIAN_FS - if (rev_endian) - byte_swap_dir_block_out(bp); -#endif /* REV_ENDIAN_FS */ - if (mp->mnt_flag & MNT_ASYNC) { - error = 0; - buf_bdwrite(bp); - } else { - error = VNOP_BWRITE(bp); - } - dp->i_flag |= IN_CHANGE | IN_UPDATE; - return (error); - } - - if (dp->i_count == 0) { - /* - * First entry in block: set d_ino to zero. - */ - if (error = ffs_blkatoff(dvp, (off_t)dp->i_offset, (char **)&ep, &bp)) - return (error); - ep->d_ino = 0; -#if REV_ENDIAN_FS - if (rev_endian) - byte_swap_dir_block_out(bp); -#endif /* REV_ENDIAN_FS */ - if (mp->mnt_flag & MNT_ASYNC) { - error = 0; - buf_bdwrite(bp); - } else { - error = VNOP_BWRITE(bp); - } - dp->i_flag |= IN_CHANGE | IN_UPDATE; - return (error); - } - /* - * Collapse new free space into previous entry. - */ - if (error = ffs_blkatoff(dvp, (off_t)(dp->i_offset - dp->i_count), - (char **)&ep, &bp)) - return (error); - ep->d_reclen += dp->i_reclen; -#if REV_ENDIAN_FS - if (rev_endian) - byte_swap_dir_block_out(bp); -#endif /* REV_ENDIAN_FS */ - if (mp->mnt_flag & MNT_ASYNC) { - error = 0; - buf_bdwrite(bp); - } else { - error = VNOP_BWRITE(bp); - } - dp->i_flag |= IN_CHANGE | IN_UPDATE; - - return (error); -} - -/* - * Rewrite an existing directory entry to point at the inode - * supplied. The parameters describing the directory entry are - * set up by a call to namei. - */ -int -ufs_dirrewrite(dp, ip, cnp) - struct inode *dp, *ip; - struct componentname *cnp; -{ - struct buf *bp; - struct direct *ep; - struct vnode *vdp = ITOV(dp); - int error; - - if (error = ffs_blkatoff(vdp, (off_t)dp->i_offset, (char **)&ep, &bp)) - return (error); - ep->d_ino = ip->i_number; - if (vdp->v_mount->mnt_maxsymlinklen > 0) - ep->d_type = IFTODT(ip->i_mode); -#if REV_ENDIAN_FS - if (vdp->v_mount->mnt_flag & MNT_REVEND) - byte_swap_dir_block_out(bp); -#endif /* REV_ENDIAN_FS */ - if (vdp->v_mount->mnt_flag & MNT_ASYNC) { - error = 0; - buf_bdwrite(bp); - } else { - error = VNOP_BWRITE(bp); - } - dp->i_flag |= IN_CHANGE | IN_UPDATE; - return (error); -} - -/* - * Check if a directory is empty or not. - * Inode supplied must be locked. - * - * Using a struct dirtemplate here is not precisely - * what we want, but better than using a struct direct. - * - * NB: does not handle corrupted directories. - */ -int -ufs_dirempty(struct inode *ip, ino_t parentino, kauth_cred_t cred) -{ - register off_t off; - struct dirtemplate dbuf; - register struct direct *dp = (struct direct *)&dbuf; - int error, count, namlen; -#if REV_ENDIAN_FS - struct vnode *vp=ITOV(ip); - struct mount *mp=vp->v_mount; - int rev_endian=(mp->mnt_flag & MNT_REVEND); -#endif /* REV_ENDIAN_FS */ - -#define MINDIRSIZ (sizeof (struct dirtemplate) / 2) - - for (off = 0; off < ip->i_size; off += dp->d_reclen) { - error = vn_rdwr(UIO_READ, ITOV(ip), (caddr_t)dp, MINDIRSIZ, off, - UIO_SYSSPACE32, IO_NODELOCKED, cred, &count, (struct proc *)0); - /* - * Since we read MINDIRSIZ, residual must - * be 0 unless we're at end of file. - */ - if (error || count != 0) - return (0); -#if 0 /*REV_ENDIAN_FS */ - if (rev_endian) - byte_swap_minidir_in(dp); -#endif /* REV_ENDIAN_FS */ - /* avoid infinite loops */ - if (dp->d_reclen == 0) - return (0); - /* skip empty entries */ - if (dp->d_ino == 0 || dp->d_ino == WINO) - continue; - /* accept only "." and ".." */ -# if (BYTE_ORDER == LITTLE_ENDIAN) - if (ITOV(ip)->v_mount->mnt_maxsymlinklen > 0) - namlen = dp->d_namlen; - else - namlen = dp->d_type; -# else - namlen = dp->d_namlen; -# endif - if (namlen > 2) - return (0); - if (dp->d_name[0] != '.') - return (0); - /* - * At this point namlen must be 1 or 2. - * 1 implies ".", 2 implies ".." if second - * char is also "." - */ - if (namlen == 1) - continue; - if (dp->d_name[1] == '.' && dp->d_ino == parentino) - continue; - return (0); - } - return (1); -} - -/* - * Check if source directory is in the path of the target directory. - * Target is supplied locked, source is unlocked. - */ -int -ufs_checkpath(source, target, cred) - struct inode *source, *target; - kauth_cred_t cred; -{ - struct vnode *vp; - int error, rootino, namlen; - int need_put = 0; - struct dirtemplate dirbuf; - - vp = ITOV(target); - if (target->i_number == source->i_number) { - error = EEXIST; - goto out; - } - rootino = ROOTINO; - error = 0; - if (target->i_number == rootino) - goto out; - - for (;;) { - if (vp->v_type != VDIR) { - error = ENOTDIR; - break; - } - error = vn_rdwr(UIO_READ, vp, (caddr_t)&dirbuf, - sizeof (struct dirtemplate), (off_t)0, UIO_SYSSPACE32, - IO_NODELOCKED, cred, (int *)0, (struct proc *)0); - if (error != 0) - break; -# if (BYTE_ORDER == LITTLE_ENDIAN) - if (vp->v_mount->mnt_maxsymlinklen > 0) - namlen = dirbuf.dotdot_namlen; - else - namlen = dirbuf.dotdot_type; -# else - namlen = dirbuf.dotdot_namlen; -# endif - if (namlen != 2 || - dirbuf.dotdot_name[0] != '.' || - dirbuf.dotdot_name[1] != '.') { - error = ENOTDIR; - break; - } - if (dirbuf.dotdot_ino == source->i_number) { - error = EINVAL; - break; - } - if (dirbuf.dotdot_ino == rootino) - break; - - if (need_put) - vnode_put(vp); - - if (error = VFS_VGET(vp->v_mount, (ino64_t)dirbuf.dotdot_ino, &vp, NULL)) { /* XXX need context */ - vp = NULL; - break; - } - need_put = 1; - } - -out: - if (error == ENOTDIR) - printf("checkpath: .. not a directory\n"); - if (need_put && vp) - vnode_put(vp); - - return (error); -} diff --git a/bsd/ufs/ufs/ufs_quota.c b/bsd/ufs/ufs/ufs_quota.c deleted file mode 100644 index df236996f..000000000 --- a/bsd/ufs/ufs/ufs_quota.c +++ /dev/null @@ -1,826 +0,0 @@ -/* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ - * - * This file contains Original Code and/or Modifications of Original Code - * as defined in and that are subject to the Apple Public Source License - * Version 2.0 (the 'License'). You may not use this file except in - * compliance with the License. The rights granted to you under the License - * may not be used to create, or enable the creation or redistribution of, - * unlawful or unlicensed copies of an Apple operating system, or to - * circumvent, violate, or enable the circumvention or violation of, any - * terms of an Apple operating system software license agreement. - * - * Please obtain a copy of the License at - * http://www.opensource.apple.com/apsl/ and read it before using this file. - * - * The Original Code and all software distributed under the License are - * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER - * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, - * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. - * Please see the License for the specific language governing rights and - * limitations under the License. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ - */ -/* - * Copyright (c) 1982, 1986, 1990, 1993, 1995 - * The Regents of the University of California. All rights reserved. - * - * This code is derived from software contributed to Berkeley by - * Robert Elz at The University of Melbourne. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. All advertising materials mentioning features or use of this software - * must display the following acknowledgement: - * This product includes software developed by the University of - * California, Berkeley and its contributors. - * 4. Neither the name of the University nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - * @(#)ufs_quota.c 8.5 (Berkeley) 5/20/95 - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include - -/* - * Quota name to error message mapping. - */ -static char *quotatypes[] = INITQFNAMES; - -/* - * Set up the quotas for an inode. - * - * This routine completely defines the semantics of quotas. - * If other criterion want to be used to establish quotas, the - * MAXQUOTAS value in quotas.h should be increased, and the - * additional dquots set up here. - */ -int -getinoquota(ip) - register struct inode *ip; -{ - struct ufsmount *ump; - struct vnode *vp = ITOV(ip); - int error; - - ump = VFSTOUFS(vp->v_mount); - /* - * Set up the user quota based on file uid. - * EINVAL means that quotas are not enabled. - */ - if (ip->i_dquot[USRQUOTA] == NODQUOT && - (error = - dqget(ip->i_uid, &ump->um_qfiles[USRQUOTA], USRQUOTA, &ip->i_dquot[USRQUOTA])) && - error != EINVAL) - return (error); - /* - * Set up the group quota based on file gid. - * EINVAL means that quotas are not enabled. - */ - if (ip->i_dquot[GRPQUOTA] == NODQUOT && - (error = - dqget(ip->i_gid, &ump->um_qfiles[GRPQUOTA], GRPQUOTA, &ip->i_dquot[GRPQUOTA])) && - error != EINVAL) - return (error); - return (0); -} - -/* - * Update disk usage, and take corrective action. - */ -int -chkdq(struct inode *ip, int64_t change, kauth_cred_t cred, int flags) -{ - register struct dquot *dq; - register int i; - int64_t ncurbytes; - int error; - struct proc *p; - -#if DIAGNOSTIC - if ((flags & CHOWN) == 0) - chkdquot(ip); -#endif - if (change == 0) - return (0); - if (change < 0) { - for (i = 0; i < MAXQUOTAS; i++) { - if ((dq = ip->i_dquot[i]) == NODQUOT) - continue; - dqlock(dq); - - ncurbytes = dq->dq_curbytes + change; - if (ncurbytes >= 0) - dq->dq_curbytes = ncurbytes; - else - dq->dq_curbytes = 0; - dq->dq_flags &= ~DQ_BLKS; - dq->dq_flags |= DQ_MOD; - - dqunlock(dq); - } - return (0); - } -#warning "hack for no cred passed to chkdq()" - /* - * This use of proc_ucred() is safe because kernproc credential never - * changes. - */ - p = current_proc(); - if (!IS_VALID_CRED(cred)) - cred = proc_ucred(kernproc); - if ((flags & FORCE) == 0 && (suser(cred, NULL) || (proc_forcequota(p)))) { - for (i = 0; i < MAXQUOTAS; i++) { - if ((dq = ip->i_dquot[i]) == NODQUOT) - continue; - if ( (error = chkdqchg(ip, change, cred, i)) ) - return (error); - } - } - for (i = 0; i < MAXQUOTAS; i++) { - if ((dq = ip->i_dquot[i]) == NODQUOT) - continue; - dqlock(dq); - - dq->dq_curbytes += change; - dq->dq_flags |= DQ_MOD; - - dqunlock(dq); - } - return (0); -} - -/* - * Check for a valid change to a users allocation. - * Issue an error message if appropriate. - */ -int -chkdqchg(struct inode *ip, int64_t change, kauth_cred_t cred, int type) -{ - register struct dquot *dq = ip->i_dquot[type]; - u_int64_t ncurbytes; - - dqlock(dq); - - ncurbytes = dq->dq_curbytes + change; - /* - * If user would exceed their hard limit, disallow space allocation. - */ - if (ncurbytes >= dq->dq_bhardlimit && dq->dq_bhardlimit) { - if ((dq->dq_flags & DQ_BLKS) == 0 && - ip->i_uid == kauth_cred_getuid(cred)) { -#if 1 - printf("\n%s: write failed, %s disk limit reached\n", - ITOV(ip)->v_mount->mnt_vfsstat.f_mntonname, - quotatypes[type]); -#endif - dq->dq_flags |= DQ_BLKS; - } - dqunlock(dq); - - return (EDQUOT); - } - /* - * If user is over their soft limit for too long, disallow space - * allocation. Reset time limit as they cross their soft limit. - */ - if (ncurbytes >= dq->dq_bsoftlimit && dq->dq_bsoftlimit) { - struct timeval tv; - - microtime(&tv); - if (dq->dq_curbytes < dq->dq_bsoftlimit) { - dq->dq_btime = tv.tv_sec + - VFSTOUFS(ITOV(ip)->v_mount)->um_qfiles[type].qf_btime; -#if 1 - if (ip->i_uid == kauth_cred_getuid(cred)) - printf("\n%s: warning, %s %s\n", - ITOV(ip)->v_mount->mnt_vfsstat.f_mntonname, - quotatypes[type], "disk quota exceeded"); -#endif - dqunlock(dq); - - return (0); - } - if (tv.tv_sec > dq->dq_btime) { - if ((dq->dq_flags & DQ_BLKS) == 0 && - ip->i_uid == kauth_cred_getuid(cred)) { -#if 1 - printf("\n%s: write failed, %s %s\n", - ITOV(ip)->v_mount->mnt_vfsstat.f_mntonname, - quotatypes[type], - "disk quota exceeded for too long"); -#endif - dq->dq_flags |= DQ_BLKS; - } - dqunlock(dq); - - return (EDQUOT); - } - } - dqunlock(dq); - - return (0); -} - -/* - * Check the inode limit, applying corrective action. - */ -int -chkiq(struct inode *ip, long change, kauth_cred_t cred, int flags) -{ - register struct dquot *dq; - register int i; - int ncurinodes, error; - struct proc *p; - -#if DIAGNOSTIC - if ((flags & CHOWN) == 0) - chkdquot(ip); -#endif - if (change == 0) - return (0); - if (change < 0) { - for (i = 0; i < MAXQUOTAS; i++) { - if ((dq = ip->i_dquot[i]) == NODQUOT) - continue; - dqlock(dq); - - ncurinodes = dq->dq_curinodes + change; - if (ncurinodes >= 0) - dq->dq_curinodes = ncurinodes; - else - dq->dq_curinodes = 0; - dq->dq_flags &= ~DQ_INODS; - dq->dq_flags |= DQ_MOD; - - dqunlock(dq); - } - return (0); - } -#warning "hack for no cred passed to chkiq()" - /* - * This use of proc_ucred() is safe because kernproc credential never - * changes. - */ - p = current_proc(); - if (!IS_VALID_CRED(cred)) - cred = proc_ucred(kernproc); - if ((flags & FORCE) == 0 && (suser(cred, NULL) || (proc_forcequota(p)))) { - for (i = 0; i < MAXQUOTAS; i++) { - if ((dq = ip->i_dquot[i]) == NODQUOT) - continue; - if ( (error = chkiqchg(ip, change, cred, i)) ) - return (error); - } - } - for (i = 0; i < MAXQUOTAS; i++) { - if ((dq = ip->i_dquot[i]) == NODQUOT) - continue; - dqlock(dq); - - dq->dq_curinodes += change; - dq->dq_flags |= DQ_MOD; - - dqunlock(dq); - } - return (0); -} - -/* - * Check for a valid change to a users allocation. - * Issue an error message if appropriate. - */ -int -chkiqchg(struct inode *ip, long change, kauth_cred_t cred, int type) -{ - register struct dquot *dq = ip->i_dquot[type]; - long ncurinodes; - - dqlock(dq); - - ncurinodes = dq->dq_curinodes + change; - /* - * If user would exceed their hard limit, disallow inode allocation. - */ - if (ncurinodes >= dq->dq_ihardlimit && dq->dq_ihardlimit) { - if ((dq->dq_flags & DQ_INODS) == 0 && - ip->i_uid == kauth_cred_getuid(cred)) { -#if 1 - printf("\n%s: write failed, %s inode limit reached\n", - ITOV(ip)->v_mount->mnt_vfsstat.f_mntonname, - quotatypes[type]); -#endif - dq->dq_flags |= DQ_INODS; - } - dqunlock(dq); - - return (EDQUOT); - } - /* - * If user is over their soft limit for too long, disallow inode - * allocation. Reset time limit as they cross their soft limit. - */ - if (ncurinodes >= dq->dq_isoftlimit && dq->dq_isoftlimit) { - struct timeval tv; - - microtime(&tv); - if (dq->dq_curinodes < dq->dq_isoftlimit) { - dq->dq_itime = tv.tv_sec + - VFSTOUFS(ITOV(ip)->v_mount)->um_qfiles[type].qf_itime; -#if 1 - if (ip->i_uid == kauth_cred_getuid(cred)) - printf("\n%s: warning, %s %s\n", - ITOV(ip)->v_mount->mnt_vfsstat.f_mntonname, - quotatypes[type], "inode quota exceeded"); -#endif - dqunlock(dq); - - return (0); - } - if (tv.tv_sec > dq->dq_itime) { - if ((dq->dq_flags & DQ_INODS) == 0 && - ip->i_uid == kauth_cred_getuid(cred)) { -#if 1 - printf("\n%s: write failed, %s %s\n", - ITOV(ip)->v_mount->mnt_vfsstat.f_mntonname, - quotatypes[type], - "inode quota exceeded for too long"); -#endif - dq->dq_flags |= DQ_INODS; - } - dqunlock(dq); - - return (EDQUOT); - } - } - dqunlock(dq); - - return (0); -} - -#if DIAGNOSTIC -/* - * On filesystems with quotas enabled, it is an error for a file to change - * size and not to have a dquot structure associated with it. - */ -void -chkdquot(ip) - register struct inode *ip; -{ - struct ufsmount *ump = VFSTOUFS(ITOV(ip)->v_mount); - register int i; - - for (i = 0; i < MAXQUOTAS; i++) { - if (ump->um_qfiles[i].qf_vp == NULLVP) - continue; - if (ip->i_dquot[i] == NODQUOT) { - vprint("chkdquot: missing dquot", ITOV(ip)); - panic("missing dquot"); - } - } -} -#endif - -/* - * Code to process quotactl commands. - */ - - -struct ufs_quotaon_cargs { - int error; -}; - - -static int -ufs_quotaon_callback(struct vnode *vp, void *cargs) -{ - struct ufs_quotaon_cargs *args; - - args = (struct ufs_quotaon_cargs *)cargs; - - if ( (args->error = getinoquota(VTOI(vp))) ) - return (VNODE_RETURNED_DONE); - - return (VNODE_RETURNED); -} - - -/* - * Q_QUOTAON - set up a quota file for a particular file system. - */ -int -quotaon(context, mp, type, fnamep) - vfs_context_t context; - struct mount *mp; - register int type; - caddr_t fnamep; -{ - struct ufsmount *ump = VFSTOUFS(mp); - struct quotafile *qfp; - struct vnode *vp; - int error = 0; - struct ufs_quotaon_cargs args; - - /* Finish setting up quota structures. */ - dqhashinit(); - - qfp = &ump->um_qfiles[type]; - - if ( (qf_get(qfp, QTF_OPENING)) ) - return (0); - - error = vnode_open(fnamep, FREAD|FWRITE, 0, 0, &vp, NULL); - if (error) { - goto out; - } - if (!vnode_isreg(vp)) { - (void) vnode_close(vp, FREAD|FWRITE, NULL); - error = EACCES; - goto out; - } - vfs_setflags(mp, (uint64_t)((unsigned int)MNT_QUOTA)); - vnode_setnoflush(vp); - /* - * Save the credential of the process that turned on quotas. - */ - qfp->qf_vp = vp; - qfp->qf_cred = vfs_context_ucred(context); - kauth_cred_ref(qfp->qf_cred); - - /* - * Finish initializing the quota file - */ - if ( (error = dqfileopen(&ump->um_qfiles[type], type)) ) { - (void) vnode_close(vp, FREAD|FWRITE, NULL); - - kauth_cred_unref(&qfp->qf_cred); - qfp->qf_vp = NULLVP; - goto out; - } - qf_put(qfp, QTF_OPENING); - - /* - * Search vnodes associated with this mount point, - * adding references to quota file being opened. - * NB: only need to add dquot's for inodes being modified. - * - * ufs_quota_callback will be called for each vnode open for - * 'write' (VNODE_WRITEABLE) hung off of this mount point - * the vnode will be in an 'unbusy' state (VNODE_WAIT) and - * properly referenced and unreferenced around the callback - */ - args.error = 0; - - vnode_iterate(mp, VNODE_WRITEABLE | VNODE_WAIT, ufs_quotaon_callback, (void *)&args); - - error = args.error; - - if (error) - quotaoff(mp, type); - return (error); -out: - qf_put(qfp, QTF_OPENING); - - return (error); -} - - - -struct ufs_quotaoff_cargs { - int type; -}; - -static int -ufs_quotaoff_callback(struct vnode *vp, void *cargs) -{ - struct ufs_quotaoff_cargs *args; - struct inode *ip; - struct dquot *dq; - - args = (struct ufs_quotaoff_cargs *)cargs; - - ip = VTOI(vp); - - dq = ip->i_dquot[args->type]; - ip->i_dquot[args->type] = NODQUOT; - - dqrele(dq); - - return (VNODE_RETURNED); -} - -/* - * Q_QUOTAOFF - turn off disk quotas for a filesystem. - */ -int -quotaoff(struct mount *mp, register int type) -{ - struct vnode *qvp; - struct ufsmount *ump = VFSTOUFS(mp); - struct quotafile *qfp; - int error = 0; - struct ufs_quotaoff_cargs args; - - /* - * If quotas haven't been initialized, there's no work to be done. - */ - if (!dqisinitialized()) - return (0); - - qfp = &ump->um_qfiles[type]; - - if ( (qf_get(qfp, QTF_CLOSING)) ) - return (0); - qvp = qfp->qf_vp; - - /* - * Sync out any orpaned dirty dquot entries. - */ - dqsync_orphans(qfp); - - /* - * Search vnodes associated with this mount point, - * deleting any references to quota file being closed. - * - * ufs_quotaoff_callback will be called for each vnode - * hung off of this mount point - * the vnode will be in an 'unbusy' state (VNODE_WAIT) and - * properly referenced and unreferenced around the callback - */ - args.type = type; - - vnode_iterate(mp, VNODE_WAIT, ufs_quotaoff_callback, (void *)&args); - - dqflush(qvp); - /* Finish tearing down the quota file */ - dqfileclose(qfp, type); - - vnode_clearnoflush(qvp); - error = vnode_close(qvp, FREAD|FWRITE, NULL); - - qfp->qf_vp = NULLVP; - if (IS_VALID_CRED(qfp->qf_cred)) { - kauth_cred_unref(&qfp->qf_cred); - } - for (type = 0; type < MAXQUOTAS; type++) - if (ump->um_qfiles[type].qf_vp != NULLVP) - break; - if (type == MAXQUOTAS) - mp->mnt_flag &= ~MNT_QUOTA; - - qf_put(qfp, QTF_CLOSING); - - return (error); -} - -/* - * Q_GETQUOTA - return current values in a dqblk structure. - */ -int -getquota(mp, id, type, datap) - struct mount *mp; - u_long id; - int type; - caddr_t datap; -{ - struct dquot *dq; - int error; - - if ( (error = dqget(id, &VFSTOUFS(mp)->um_qfiles[type], type, &dq)) ) - return (error); - dqlock(dq); - - bcopy(&dq->dq_dqb, datap, sizeof(dq->dq_dqb)); - - dqunlock(dq); - dqrele(dq); - - return (error); -} - -/* - * Q_SETQUOTA - assign an entire dqblk structure. - */ -int -setquota(mp, id, type, datap) - struct mount *mp; - u_long id; - int type; - caddr_t datap; -{ - struct dquot *dq; - struct ufsmount *ump = VFSTOUFS(mp); - struct dqblk * newlimp = (struct dqblk *) datap; - struct timeval tv; - int error; - - error = dqget(id, &ump->um_qfiles[type], type, &dq); - if (error) - return (error); - dqlock(dq); - - /* - * Copy all but the current values. - * Reset time limit if previously had no soft limit or were - * under it, but now have a soft limit and are over it. - */ - newlimp->dqb_curbytes = dq->dq_curbytes; - newlimp->dqb_curinodes = dq->dq_curinodes; - if (dq->dq_id != 0) { - newlimp->dqb_btime = dq->dq_btime; - newlimp->dqb_itime = dq->dq_itime; - } - if (newlimp->dqb_bsoftlimit && - dq->dq_curbytes >= newlimp->dqb_bsoftlimit && - (dq->dq_bsoftlimit == 0 || dq->dq_curbytes < dq->dq_bsoftlimit)) { - microtime(&tv); - newlimp->dqb_btime = tv.tv_sec + ump->um_qfiles[type].qf_btime; - } - if (newlimp->dqb_isoftlimit && - dq->dq_curinodes >= newlimp->dqb_isoftlimit && - (dq->dq_isoftlimit == 0 || dq->dq_curinodes < dq->dq_isoftlimit)) { - microtime(&tv); - newlimp->dqb_itime = tv.tv_sec + ump->um_qfiles[type].qf_itime; - } - bcopy(newlimp, &dq->dq_dqb, sizeof(dq->dq_dqb)); - if (dq->dq_curbytes < dq->dq_bsoftlimit) - dq->dq_flags &= ~DQ_BLKS; - if (dq->dq_curinodes < dq->dq_isoftlimit) - dq->dq_flags &= ~DQ_INODS; - if (dq->dq_isoftlimit == 0 && dq->dq_bsoftlimit == 0 && - dq->dq_ihardlimit == 0 && dq->dq_bhardlimit == 0) - dq->dq_flags |= DQ_FAKE; - else - dq->dq_flags &= ~DQ_FAKE; - dq->dq_flags |= DQ_MOD; - - dqunlock(dq); - dqrele(dq); - - return (0); -} - -/* - * Q_SETUSE - set current inode and byte usage. - */ -int -setuse(mp, id, type, datap) - struct mount *mp; - u_long id; - int type; - caddr_t datap; -{ - struct dquot *dq; - struct ufsmount *ump = VFSTOUFS(mp); - struct timeval tv; - int error; - struct dqblk *quotablkp = (struct dqblk *) datap; - - error = dqget(id, &ump->um_qfiles[type], type, &dq); - if (error) - return (error); - dqlock(dq); - - /* - * Reset time limit if have a soft limit and were - * previously under it, but are now over it. - */ - if (dq->dq_bsoftlimit && dq->dq_curbytes < dq->dq_bsoftlimit && - quotablkp->dqb_curbytes >= dq->dq_bsoftlimit) { - microtime(&tv); - dq->dq_btime = tv.tv_sec + ump->um_qfiles[type].qf_btime; - } - if (dq->dq_isoftlimit && dq->dq_curinodes < dq->dq_isoftlimit && - quotablkp->dqb_curinodes >= dq->dq_isoftlimit) { - microtime(&tv); - dq->dq_itime = tv.tv_sec + ump->um_qfiles[type].qf_itime; - } - dq->dq_curbytes = quotablkp->dqb_curbytes; - dq->dq_curinodes = quotablkp->dqb_curinodes; - if (dq->dq_curbytes < dq->dq_bsoftlimit) - dq->dq_flags &= ~DQ_BLKS; - if (dq->dq_curinodes < dq->dq_isoftlimit) - dq->dq_flags &= ~DQ_INODS; - dq->dq_flags |= DQ_MOD; - - dqunlock(dq); - dqrele(dq); - - return (0); -} - - - -static int -ufs_qsync_callback(struct vnode *vp, __unused void *cargs) -{ - struct inode *ip; - struct dquot *dq; - int i; - - ip = VTOI(vp); - - for (i = 0; i < MAXQUOTAS; i++) { - dq = ip->i_dquot[i]; - if (dq != NODQUOT && (dq->dq_flags & DQ_MOD)) - dqsync(dq); - } - return (VNODE_RETURNED); -} - - -/* - * Q_SYNC - sync quota files to disk. - */ -int -qsync(mp) - struct mount *mp; -{ - struct ufsmount *ump = VFSTOUFS(mp); - int i; - - if (!dqisinitialized()) - return (0); - - /* - * Check if the mount point has any quotas. - * If not, simply return. - */ - for (i = 0; i < MAXQUOTAS; i++) - if (ump->um_qfiles[i].qf_vp != NULLVP) - break; - if (i == MAXQUOTAS) - return (0); - /* - * Search vnodes associated with this mount point, - * synchronizing any modified dquot structures. - * - * ufs_qsync_callback will be called for each vnode - * hung off of this mount point - * the vnode will be - * properly referenced and unreferenced around the callback - */ - vnode_iterate(mp, 0, ufs_qsync_callback, (void *)NULL); - - return (0); -} - -/* - * Q_QUOTASTAT - get quota on/off status - */ -int -quotastat(mp, type, datap) - struct mount *mp; - register int type; - caddr_t datap; -{ - struct ufsmount *ump = VFSTOUFS(mp); - int error = 0; - int qstat; - - if ((mp->mnt_flag & MNT_QUOTA) && (ump->um_qfiles[type].qf_vp != NULLVP)) - qstat = 1; /* quotas are on for this type */ - else - qstat = 0; /* quotas are off for this type */ - *((int *)datap) = qstat; - return (error); -} - diff --git a/bsd/ufs/ufs/ufs_readwrite.c b/bsd/ufs/ufs/ufs_readwrite.c deleted file mode 100644 index 50fdce582..000000000 --- a/bsd/ufs/ufs/ufs_readwrite.c +++ /dev/null @@ -1,658 +0,0 @@ -/* - * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ - * - * This file contains Original Code and/or Modifications of Original Code - * as defined in and that are subject to the Apple Public Source License - * Version 2.0 (the 'License'). You may not use this file except in - * compliance with the License. The rights granted to you under the License - * may not be used to create, or enable the creation or redistribution of, - * unlawful or unlicensed copies of an Apple operating system, or to - * circumvent, violate, or enable the circumvention or violation of, any - * terms of an Apple operating system software license agreement. - * - * Please obtain a copy of the License at - * http://www.opensource.apple.com/apsl/ and read it before using this file. - * - * The Original Code and all software distributed under the License are - * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER - * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, - * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. - * Please see the License for the specific language governing rights and - * limitations under the License. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ - */ -/* Copyright (c) 1995 NeXT Computer, Inc. All Rights Reserved */ -/*- - * Copyright (c) 1993 - * The Regents of the University of California. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. All advertising materials mentioning features or use of this software - * must display the following acknowledgement: - * This product includes software developed by the University of - * California, Berkeley and its contributors. - * 4. Neither the name of the University nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - * @(#)ufs_readwrite.c 8.11 (Berkeley) 5/8/95 - */ - -#include -#include - - -#define BLKSIZE(a, b, c) blksize(a, b, c) -#define FS struct fs -#define I_FS i_fs - - - -/* - * Vnode op for reading. - */ -/* ARGSUSED */ -ffs_read(ap) - struct vnop_read_args /* { - struct vnode *a_vp; - struct uio *a_uio; - int a_ioflag; - vfs_context_t a_context; - } */ *ap; -{ - return(ffs_read_internal(ap->a_vp, ap->a_uio, ap->a_ioflag)); -} - - -int -ffs_read_internal(vnode_t vp, struct uio *uio, int ioflag) -{ - struct inode *ip; - FS *fs; - buf_t bp = (struct buf *)0; - ufs_daddr_t lbn, nextlbn; - off_t bytesinfile; - long size, xfersize, blkoffset; - int error; - u_short mode; -#if REV_ENDIAN_FS - int rev_endian=0; -#endif /* REV_ENDIAN_FS */ - - ip = VTOI(vp); - mode = ip->i_mode; - -#if REV_ENDIAN_FS - rev_endian=(vp->v_mount->mnt_flag & MNT_REVEND); -#endif /* REV_ENDIAN_FS */ - -#if DIAGNOSTIC - if (uio->uio_rw != UIO_READ) - panic("ffs_read: invalid uio_rw = %x", uio->uio_rw); - - if (vp->v_type == VLNK) { - if ((int)ip->i_size < vp->v_mount->mnt_maxsymlinklen) - panic("ffs_read: short symlink = %d", ip->i_size); - } else if (vp->v_type != VREG && vp->v_type != VDIR) - panic("ffs_read: invalid v_type = %x", vp->v_type); -#endif - fs = ip->I_FS; - if (uio->uio_offset < 0) - return (EINVAL); - if (uio->uio_offset > fs->fs_maxfilesize) - return (EFBIG); - - if (UBCINFOEXISTS(vp)) { - error = cluster_read(vp, uio, (off_t)ip->i_size, ioflag); - } else { - for (error = 0, bp = NULL; uio_resid(uio) > 0; - bp = NULL) { - char *buf_data; - - if ((bytesinfile = ip->i_size - uio->uio_offset) <= 0) - break; - lbn = lblkno(fs, uio->uio_offset); - nextlbn = lbn + 1; - size = BLKSIZE(fs, ip, lbn); - blkoffset = blkoff(fs, uio->uio_offset); - xfersize = fs->fs_bsize - blkoffset; - // LP64todo - fix this - if (uio_resid(uio) < xfersize) - xfersize = uio_resid(uio); - if (bytesinfile < xfersize) - xfersize = bytesinfile; - - if (lblktosize(fs, nextlbn) >= ip->i_size) - error = (int)buf_bread(vp, (daddr64_t)((unsigned)lbn), size, NOCRED, &bp); - else if (lbn - 1 == ip->i_lastr && !(vp->v_flag & VRAOFF)) { - int nextsize = BLKSIZE(fs, ip, nextlbn); - error = (int)buf_breadn(vp, (daddr64_t)((unsigned)lbn), - size, &nextlbn, &nextsize, 1, NOCRED, &bp); - } else - error = (int)buf_bread(vp, lbn, size, NOCRED, &bp); - if (error) - break; - ip->i_lastr = lbn; - - /* - * We should only get non-zero buffer resid when an I/O error - * has occurred, which should cause us to break above. - * However, if the short read did not cause an error, - * then we want to ensure that we do not uiomove bad - * or uninitialized data. - */ - size -= buf_resid(bp); - if (size < xfersize) { - if (size == 0) - break; - xfersize = size; - } - buf_data = (char *)buf_dataptr(bp); -#if REV_ENDIAN_FS - if (rev_endian && S_ISDIR(mode)) { - byte_swap_dir_block_in(buf_data + blkoffset, xfersize); - } -#endif /* REV_ENDIAN_FS */ - if (error = - uiomove(buf_data + blkoffset, (int)xfersize, uio)) { -#if REV_ENDIAN_FS - if (rev_endian && S_ISDIR(mode)) { - byte_swap_dir_block_in(buf_data + blkoffset, xfersize); - } -#endif /* REV_ENDIAN_FS */ - break; - } - -#if REV_ENDIAN_FS - if (rev_endian && S_ISDIR(mode)) { - byte_swap_dir_out(buf_data + blkoffset, xfersize); - } -#endif /* REV_ENDIAN_FS */ - if (S_ISREG(mode) && (xfersize + blkoffset == fs->fs_bsize || - uio->uio_offset == ip->i_size)) - buf_markaged(bp); - buf_brelse(bp); - } - } - if (bp != NULL) - buf_brelse(bp); - if ((vnode_vfsvisflags(vp) & MNT_NOATIME) == 0) - ip->i_flag |= IN_ACCESS; - return (error); -} - -/* - * Vnode op for writing. - */ -ffs_write(ap) - struct vnop_write_args /* { - struct vnode *a_vp; - struct uio *a_uio; - int a_ioflag; - vfs_context_t a_context; - } */ *ap; -{ - return(ffs_write_internal(ap->a_vp, ap->a_uio, ap->a_ioflag, vfs_context_ucred(ap->a_context))); -} - - -ffs_write_internal(vnode_t vp, struct uio *uio, int ioflag, kauth_cred_t cred) -{ - buf_t bp; - proc_t p; - struct inode *ip; - FS *fs; - ufs_daddr_t lbn; - off_t osize; - int blkoffset, flags, resid, rsd, size, xfersize; - int save_error=0, save_size=0; - int blkalloc = 0; - int error = 0; - int file_extended = 0; - int doingdirectory = 0; - user_ssize_t clippedsize = 0; /* Truncate writes near fs->fs_maxfilesize */ - user_ssize_t residcount, oldcount; - int partialwrite=0; - -#if REV_ENDIAN_FS - int rev_endian=0; -#endif /* REV_ENDIAN_FS */ - - ip = VTOI(vp); -#if REV_ENDIAN_FS - rev_endian=(vp->v_mount->mnt_flag & MNT_REVEND); -#endif /* REV_ENDIAN_FS */ - -#if DIAGNOSTIC - if (uio->uio_rw != UIO_WRITE) - panic("ffs_write: uio_rw = %x\n", uio->uio_rw); -#endif - - switch (vp->v_type) { - case VREG: - if (ioflag & IO_APPEND) - uio->uio_offset = ip->i_size; - if ((ip->i_flags & APPEND) && uio->uio_offset != ip->i_size) - return (EPERM); - /* FALLTHROUGH */ - case VLNK: - break; - case VDIR: - doingdirectory = 1; - if ((ioflag & IO_SYNC) == 0) - panic("ffs_write: nonsync dir write"); - break; - default: - panic("ffs_write: invalid v_type=%x", vp->v_type); - } - - fs = ip->I_FS; - if (uio->uio_offset < 0) - return (EFBIG); - if ( uio_resid(uio) > fs->fs_maxfilesize - uio->uio_offset ) { - residcount = uio_resid(uio); - clippedsize = residcount - (fs->fs_maxfilesize - uio->uio_offset); - if (clippedsize >= residcount) { - return (EFBIG); - } else { - uio_setresid(uio, residcount - clippedsize); - partialwrite = 1; - } - } - if (uio_resid(uio) == 0) - return (0); - - // LP64todo - fix this - resid = uio_resid(uio); - osize = ip->i_size; - flags = 0; - if ((ioflag & IO_SYNC) && !((vp)->v_mount->mnt_flag & MNT_ASYNC)) - flags = B_SYNC; - - if (UBCINFOEXISTS(vp)) { - off_t filesize; - off_t endofwrite; - off_t local_offset; - off_t head_offset; - int local_flags; - int first_block; - int fboff; - int fblk; - int loopcount; - - // LP64todo - fix this - endofwrite = uio->uio_offset + uio_resid(uio); - - if (endofwrite > ip->i_size) { - filesize = endofwrite; - file_extended = 1; - } else - filesize = ip->i_size; - - head_offset = ip->i_size; - - /* Go ahead and allocate the block that are going to be written */ - // LP64todo - fix this - rsd = uio_resid(uio); - local_offset = uio->uio_offset; - local_flags = 0; - if ((ioflag & IO_SYNC) && !((vp)->v_mount->mnt_flag & MNT_ASYNC)) - local_flags = B_SYNC; - local_flags |= B_NOBUFF; - - first_block = 1; - fboff = 0; - fblk = 0; - loopcount = 0; - - for (error = 0; rsd > 0;) { - blkalloc = 0; - lbn = lblkno(fs, local_offset); - blkoffset = blkoff(fs, local_offset); - xfersize = fs->fs_bsize - blkoffset; - if (first_block) - fboff = blkoffset; - if (rsd < xfersize) - xfersize = rsd; - if (fs->fs_bsize > xfersize) - local_flags |= B_CLRBUF; - else - local_flags &= ~B_CLRBUF; - - /* Allocate block without reading into a buf */ - error = ffs_balloc(ip, - lbn, blkoffset + xfersize, cred, - &bp, local_flags, &blkalloc); - if (error) - break; - if (first_block) { - fblk = blkalloc; - first_block = 0; - } - loopcount++; - - rsd -= xfersize; - local_offset += (off_t)xfersize; - if (local_offset > ip->i_size) - ip->i_size = local_offset; - } - - if(error) { - save_error = error; - save_size = rsd; - uio_setresid(uio, (uio_resid(uio) - rsd)); - if (file_extended) - filesize -= rsd; - } - - flags = ioflag & ~(IO_TAILZEROFILL | IO_HEADZEROFILL | IO_NOZEROVALID | IO_NOZERODIRTY); - - if((error == 0) && fblk && fboff) { - if( fblk > fs->fs_bsize) - panic("ffs_balloc : allocated more than bsize(head)"); - /* We need to zero out the head */ - head_offset = uio->uio_offset - (off_t)fboff ; - flags |= IO_HEADZEROFILL; - } - - if((error == 0) && blkalloc && ((blkalloc - xfersize) > 0)) { - /* We need to zero out the tail */ - if( blkalloc > fs->fs_bsize) - panic("ffs_balloc : allocated more than bsize(tail)"); - local_offset += (blkalloc - xfersize); - if (loopcount == 1) { - /* blkalloc is same as fblk; so no need to check again*/ - local_offset -= fboff; - } - flags |= IO_TAILZEROFILL; - /* Freshly allocated block; bzero even if - * find a page - */ - /* flags &= ~IO_NOZEROVALID; */ - } - /* - * if the write starts beyond the current EOF then - * we we'll zero fill from the current EOF to where the write begins - */ - - error = cluster_write(vp, uio, osize, filesize, head_offset, local_offset, flags); - - if (uio->uio_offset > osize) { - if (error && ((ioflag & IO_UNIT)==0)) - (void)ffs_truncate_internal(vp, uio->uio_offset, ioflag & IO_SYNC, cred); - ip->i_size = uio->uio_offset; - ubc_setsize(vp, (off_t)ip->i_size); - } - if(save_error) { - uio_setresid(uio, (uio_resid(uio) + save_size)); - if(!error) - error = save_error; - } - ip->i_flag |= IN_CHANGE | IN_UPDATE; - } else { - flags = 0; - if ((ioflag & IO_SYNC) && !((vp)->v_mount->mnt_flag & MNT_ASYNC)) - flags = B_SYNC; - - for (error = 0; uio_resid(uio) > 0;) { - char *buf_data; - - lbn = lblkno(fs, uio->uio_offset); - blkoffset = blkoff(fs, uio->uio_offset); - xfersize = fs->fs_bsize - blkoffset; - if (uio_resid(uio) < xfersize) - // LP64todo - fix this - xfersize = uio_resid(uio); - - if (fs->fs_bsize > xfersize) - flags |= B_CLRBUF; - else - flags &= ~B_CLRBUF; - - error = ffs_balloc(ip, lbn, blkoffset + xfersize, cred, &bp, flags, 0); - if (error) - break; - if (uio->uio_offset + xfersize > ip->i_size) { - ip->i_size = uio->uio_offset + xfersize; - ubc_setsize(vp, (u_long)ip->i_size); - } - - size = BLKSIZE(fs, ip, lbn) - buf_resid(bp); - if (size < xfersize) - xfersize = size; - - buf_data = (char *)buf_dataptr(bp); - - error = uiomove(buf_data + blkoffset, (int)xfersize, uio); -#if REV_ENDIAN_FS - if (rev_endian && S_ISDIR(ip->i_mode)) { - byte_swap_dir_out(buf_data + blkoffset, xfersize); - } -#endif /* REV_ENDIAN_FS */ - if (doingdirectory == 0 && (ioflag & IO_SYNC)) - (void)buf_bwrite(bp); - else if (xfersize + blkoffset == fs->fs_bsize) { - buf_markaged(bp); - buf_bdwrite(bp); - } - else - buf_bdwrite(bp); - if (error || xfersize == 0) - break; - ip->i_flag |= IN_CHANGE | IN_UPDATE; - } - } - /* - * If we successfully wrote any data, and we are not the superuser - * we clear the setuid and setgid bits as a precaution against - * tampering. - */ - if (resid > uio_resid(uio) && cred && suser(cred, NULL)) - ip->i_mode &= ~(ISUID | ISGID); - if (resid > uio_resid(uio)) - VN_KNOTE(vp, NOTE_WRITE | (file_extended ? NOTE_EXTEND : 0)); - if (error) { - if (ioflag & IO_UNIT) { - (void)ffs_truncate_internal(vp, osize, ioflag & IO_SYNC, cred); - // LP64todo - fix this - uio->uio_offset -= resid - uio_resid(uio); - uio_setresid(uio, resid); - } - } else if (resid > uio_resid(uio) && (ioflag & IO_SYNC)) { - struct timeval tv; - - microtime(&tv); - error = ffs_update(vp, &tv, &tv, 1); - } - if (partialwrite) { - oldcount = uio_resid(uio); - uio_setresid(uio, oldcount + clippedsize); - } - return (error); -} - -/* - * Vnode op for pagein. - * Similar to ffs_read() - */ -/* ARGSUSED */ -ffs_pagein(ap) - struct vnop_pagein_args /* { - struct vnode *a_vp, - upl_t a_pl, - vm_offset_t a_pl_offset, - off_t a_f_offset, - size_t a_size, - int a_flags - vfs_context_t a_context; - } */ *ap; -{ - register struct vnode *vp = ap->a_vp; - upl_t pl = ap->a_pl; - size_t size= ap->a_size; - off_t f_offset = ap->a_f_offset; - vm_offset_t pl_offset = ap->a_pl_offset; - int flags = ap->a_flags; - register struct inode *ip; - int error; - - ip = VTOI(vp); - -#if DIAGNOSTIC - if (vp->v_type == VLNK) { - if ((int)ip->i_size < vp->v_mount->mnt_maxsymlinklen) - panic("%s: short symlink", "ffs_pagein"); - } else if (vp->v_type != VREG && vp->v_type != VDIR) - panic("%s: type %d", "ffs_pagein", vp->v_type); -#endif - - error = cluster_pagein(vp, pl, pl_offset, f_offset, size, (off_t)ip->i_size, flags); - - /* ip->i_flag |= IN_ACCESS; */ - return (error); -} - -/* - * Vnode op for pageout. - * Similar to ffs_write() - * make sure the buf is not in hash queue when you return - */ -ffs_pageout(ap) - struct vnop_pageout_args /* { - struct vnode *a_vp, - upl_t a_pl, - vm_offset_t a_pl_offset, - off_t a_f_offset, - size_t a_size, - int a_flags - vfs_context_t a_context; - } */ *ap; -{ - register struct vnode *vp = ap->a_vp; - upl_t pl = ap->a_pl; - size_t size= ap->a_size; - off_t f_offset = ap->a_f_offset; - vm_offset_t pl_offset = ap->a_pl_offset; - int flags = ap->a_flags; - register struct inode *ip; - register FS *fs; - int error ; - size_t xfer_size = 0; - int local_flags=0; - off_t local_offset; - int resid, blkoffset; - size_t xsize, lsize; - daddr_t lbn; - int save_error =0, save_size=0; - vm_offset_t lupl_offset; - int nocommit = flags & UPL_NOCOMMIT; - int devBlockSize = 0; - struct buf *bp; - - ip = VTOI(vp); - - if (vp->v_mount->mnt_flag & MNT_RDONLY) { - if (!nocommit) - ubc_upl_abort_range(pl, pl_offset, size, - UPL_ABORT_FREE_ON_EMPTY); - return (EROFS); - } - fs = ip->I_FS; - - if (f_offset < 0 || f_offset >= ip->i_size) { - if (!nocommit) - ubc_upl_abort_range(pl, pl_offset, size, - UPL_ABORT_FREE_ON_EMPTY); - return (EINVAL); - } - - /* - * once we enable multi-page pageouts we will - * need to make sure we abort any pages in the upl - * that we don't issue an I/O for - */ - if (f_offset + size > ip->i_size) - xfer_size = ip->i_size - f_offset; - else - xfer_size = size; - - devBlockSize = vfs_devblocksize(vnode_mount(vp)); - - if (xfer_size & (PAGE_SIZE - 1)) { - /* if not a multiple of page size - * then round up to be a multiple - * the physical disk block size - */ - xfer_size = (xfer_size + (devBlockSize - 1)) & ~(devBlockSize - 1); - } - - /* - * once the block allocation is moved to ufs_blockmap - * we can remove all the size and offset checks above - * cluster_pageout does all of this now - * we need to continue to do it here so as not to - * allocate blocks that aren't going to be used because - * of a bogus parameter being passed in - */ - local_flags = 0; - resid = xfer_size; - local_offset = f_offset; - for (error = 0; resid > 0;) { - lbn = lblkno(fs, local_offset); - blkoffset = blkoff(fs, local_offset); - xsize = fs->fs_bsize - blkoffset; - if (resid < xsize) - xsize = resid; - /* Allocate block without reading into a buf */ - error = ffs_blkalloc(ip, - lbn, blkoffset + xsize, vfs_context_ucred(ap->a_context), - local_flags); - if (error) - break; - resid -= xsize; - local_offset += (off_t)xsize; - } - - if (error) { - save_size = resid; - save_error = error; - xfer_size -= save_size; - } - - - error = cluster_pageout(vp, pl, pl_offset, f_offset, round_page_32(xfer_size), ip->i_size, flags); - - if(save_error) { - lupl_offset = size - save_size; - resid = round_page_32(save_size); - if (!nocommit) - ubc_upl_abort_range(pl, lupl_offset, resid, - UPL_ABORT_FREE_ON_EMPTY); - if(!error) - error= save_error; - } - return (error); -} diff --git a/bsd/ufs/ufs/ufs_vfsops.c b/bsd/ufs/ufs/ufs_vfsops.c deleted file mode 100644 index 26e59bafd..000000000 --- a/bsd/ufs/ufs/ufs_vfsops.c +++ /dev/null @@ -1,206 +0,0 @@ -/* - * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ - * - * This file contains Original Code and/or Modifications of Original Code - * as defined in and that are subject to the Apple Public Source License - * Version 2.0 (the 'License'). You may not use this file except in - * compliance with the License. The rights granted to you under the License - * may not be used to create, or enable the creation or redistribution of, - * unlawful or unlicensed copies of an Apple operating system, or to - * circumvent, violate, or enable the circumvention or violation of, any - * terms of an Apple operating system software license agreement. - * - * Please obtain a copy of the License at - * http://www.opensource.apple.com/apsl/ and read it before using this file. - * - * The Original Code and all software distributed under the License are - * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER - * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, - * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. - * Please see the License for the specific language governing rights and - * limitations under the License. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ - */ -/* Copyright (c) 1995 NeXT Computer, Inc. All Rights Reserved */ -/* - * Copyright (c) 1991, 1993, 1994 - * The Regents of the University of California. All rights reserved. - * (c) UNIX System Laboratories, Inc. - * All or some portions of this file are derived from material licensed - * to the University of California by American Telephone and Telegraph - * Co. or Unix System Laboratories, Inc. and are reproduced herein with - * the permission of UNIX System Laboratories, Inc. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. All advertising materials mentioning features or use of this software - * must display the following acknowledgement: - * This product includes software developed by the University of - * California, Berkeley and its contributors. - * 4. Neither the name of the University nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - * @(#)ufs_vfsops.c 8.8 (Berkeley) 5/20/95 - */ - -#include -#include -#include -#include -#include -#include -#include - -#include - -#include -#include -#include -#include - -/* - * Make a filesystem operational. - * Nothing to do at the moment. - */ -/* ARGSUSED */ -int -ufs_start(mp, flags, context) - struct mount *mp; - int flags; - vfs_context_t context; -{ - - return (0); -} - -/* - * Return the root of a filesystem. - */ -int -ufs_root(mp, vpp, context) - struct mount *mp; - struct vnode **vpp; - vfs_context_t context; -{ - struct vnode *nvp; - int error; - - if (error = VFS_VGET(mp, (ino64_t)ROOTINO, &nvp, context)) - return (error); - *vpp = nvp; - return (0); -} - -/* - * Do operations associated with quotas - */ -int -ufs_quotactl(mp, cmds, uid, datap, context) - struct mount *mp; - int cmds; - uid_t uid; - caddr_t datap; - vfs_context_t context; -{ - struct proc *p = vfs_context_proc(context); - int cmd, type, error; - -#if !QUOTA - return (ENOTSUP); -#else - if (uid == -1) - uid = vfs_context_ucred(context)->cr_ruid; - cmd = cmds >> SUBCMDSHIFT; - - switch (cmd) { - case Q_SYNC: - case Q_QUOTASTAT: - break; - case Q_GETQUOTA: - if (uid == vfs_context_ucred(context)->cr_ruid) - break; - /* fall through */ - default: - if (error = vfs_context_suser(context)) - return (error); - } - - type = cmds & SUBCMDMASK; - if ((u_int)type >= MAXQUOTAS) - return (EINVAL); - if (vfs_busy(mp, LK_NOWAIT)) - return (0); - - switch (cmd) { - - case Q_QUOTAON: - error = quotaon(context, mp, type, datap); - break; - - case Q_QUOTAOFF: - error = quotaoff(mp, type); - break; - - case Q_SETQUOTA: - error = setquota(mp, uid, type, datap); - break; - - case Q_SETUSE: - error = setuse(mp, uid, type, datap); - break; - - case Q_GETQUOTA: - error = getquota(mp, uid, type, datap); - break; - - case Q_SYNC: - error = qsync(mp); - break; - - case Q_QUOTASTAT: - error = quotastat(mp, type, datap); - break; - - default: - error = EINVAL; - break; - } - vfs_unbusy(mp); - - return (error); -#endif -} - -/* - * Initial UFS filesystems, done only once. - */ -int -ufs_init(vfsp) - struct vfsconf *vfsp; -{ - return (0); -} - diff --git a/bsd/ufs/ufs/ufs_vnops.c b/bsd/ufs/ufs/ufs_vnops.c deleted file mode 100644 index c3f07dd6d..000000000 --- a/bsd/ufs/ufs/ufs_vnops.c +++ /dev/null @@ -1,2041 +0,0 @@ -/* - * Copyright (c) 2000-2007 Apple Inc. All rights reserved. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ - * - * This file contains Original Code and/or Modifications of Original Code - * as defined in and that are subject to the Apple Public Source License - * Version 2.0 (the 'License'). You may not use this file except in - * compliance with the License. The rights granted to you under the License - * may not be used to create, or enable the creation or redistribution of, - * unlawful or unlicensed copies of an Apple operating system, or to - * circumvent, violate, or enable the circumvention or violation of, any - * terms of an Apple operating system software license agreement. - * - * Please obtain a copy of the License at - * http://www.opensource.apple.com/apsl/ and read it before using this file. - * - * The Original Code and all software distributed under the License are - * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER - * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, - * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. - * Please see the License for the specific language governing rights and - * limitations under the License. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ - */ -/* Copyright (c) 1995 NeXT Computer, Inc. All Rights Reserved */ -/* - * Copyright (c) 1982, 1986, 1989, 1993, 1995 - * The Regents of the University of California. All rights reserved. - * (c) UNIX System Laboratories, Inc. - * All or some portions of this file are derived from material licensed - * to the University of California by American Telephone and Telegraph - * Co. or Unix System Laboratories, Inc. and are reproduced herein with - * the permission of UNIX System Laboratories, Inc. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. All advertising materials mentioning features or use of this software - * must display the following acknowledgement: - * This product includes software developed by the University of - * California, Berkeley and its contributors. - * 4. Neither the name of the University nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - * @(#)ufs_vnops.c 8.27 (Berkeley) 5/27/95 - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include - -#include - -#include -#include -#include -#include -#include - -#if REV_ENDIAN_FS -#include -#endif /* REV_ENDIAN_FS */ - - -static int ufs_chmod(struct vnode *, int, kauth_cred_t, struct proc *); -static int ufs_chown(struct vnode *, uid_t, gid_t, kauth_cred_t, - struct proc *); -static int filt_ufsread(struct knote *kn, long hint); -static int filt_ufswrite(struct knote *kn, long hint); -static int filt_ufsvnode(struct knote *kn, long hint); -static void filt_ufsdetach(struct knote *kn); - -#if FIFO -extern void fifo_printinfo(struct vnode *vp); -#endif /* FIFO */ -extern int ufs_direnter2(struct vnode *dvp, struct direct *dirp, - vfs_context_t ctx); - -static int ufs_readdirext(vnode_t vp, uio_t uio, int *eofflag, int *numdirent, - vfs_context_t context); - -/* - * Create a regular file - */ -int -ufs_create(ap) - struct vnop_create_args /* { - struct vnode *a_dvp; - struct vnode **a_vpp; - struct componentname *a_cnp; - struct vnode_vattr *a_vap; - vfs_context_t a_context; - } */ *ap; -{ - int error; - - if ( (error = ufs_makeinode(ap->a_vap, ap->a_dvp, ap->a_vpp, ap->a_cnp)) ) - return (error); - VN_KNOTE(ap->a_dvp, NOTE_WRITE); - return (0); -} - -/* - * Mknod vnode call - */ -int -ufs_mknod(ap) - struct vnop_mknod_args /* { - struct vnode *a_dvp; - struct vnode **a_vpp; - struct componentname *a_cnp; - struct vnode_attr *a_vap; - vfs_context_t a_context; - } */ *ap; -{ - struct vnode_attr *vap = ap->a_vap; - struct vnode **vpp = ap->a_vpp; - struct vnode *dvp = ap->a_dvp; - struct vnode *tvp; - struct inode *ip; - struct componentname *cnp = ap->a_cnp; - int error; - - /* use relookup to force correct directory hints */ - cnp->cn_flags &= ~MODMASK; - cnp->cn_flags |= (WANTPARENT | NOCACHE); - cnp->cn_nameiop = CREATE; - - (void) relookup(dvp, &tvp, cnp); - - /* get rid of reference relookup returned */ - if (tvp) - vnode_put(tvp); - - if ( (error = - ufs_makeinode(ap->a_vap, ap->a_dvp, vpp, ap->a_cnp)) ) - return (error); - VN_KNOTE(ap->a_dvp, NOTE_WRITE); - ip = VTOI(*vpp); - ip->i_flag |= IN_ACCESS | IN_CHANGE | IN_UPDATE; - if (vap->va_rdev != VNOVAL) { - /* - * Want to be able to use this to make badblock - * inodes, so don't truncate the dev number. - */ - ip->i_rdev = vap->va_rdev; - } - return (0); -} - -/* - * Open called. - * - * Nothing to do. - */ -int -ufs_open(ap) - struct vnop_open_args /* { - struct vnode *a_vp; - int a_mode; - vfs_context_t a_context; - } */ *ap; -{ - - /* - * Files marked append-only must be opened for appending. - */ - if ((VTOI(ap->a_vp)->i_flags & APPEND) && - (ap->a_mode & (FWRITE | O_APPEND)) == FWRITE) - return (EPERM); - return (0); -} - -/* - * Close called. - * - * Update the times on the inode. - */ -int -ufs_close(ap) - struct vnop_close_args /* { - struct vnode *a_vp; - int a_fflag; - vfs_context_t a_context; - } */ *ap; -{ - register struct vnode *vp = ap->a_vp; - register struct inode *ip = VTOI(vp); - struct timeval tv; - - if (vnode_isinuse(vp, 1)) { - microtime(&tv); - ITIMES(ip, &tv, &tv); - } - - cluster_push(vp, IO_CLOSE); - - return (0); -} - -int -ufs_getattr(ap) - struct vnop_getattr_args /* { - struct vnode *a_vp; - struct vnode_attr *a_vap; - vfs_context_t a_context; - } */ *ap; -{ - register struct vnode *vp = ap->a_vp; - register struct inode *ip = VTOI(vp); - register struct vnode_attr *vap = ap->a_vap; - int devBlockSize=0; - struct timeval tv; - - microtime(&tv); - - ITIMES(ip, &tv, &tv); - /* - * Copy from inode table - */ - VATTR_RETURN(vap, va_fsid, ip->i_dev); - VATTR_RETURN(vap, va_fileid, ip->i_number); - VATTR_RETURN(vap, va_mode, ip->i_mode & ~IFMT); - VATTR_RETURN(vap, va_nlink, ip->i_nlink); - VATTR_RETURN(vap, va_uid, ip->i_uid); - VATTR_RETURN(vap, va_gid, ip->i_gid); - VATTR_RETURN(vap, va_rdev, (dev_t)ip->i_rdev); - VATTR_RETURN(vap, va_data_size, ip->i_din.di_size); - vap->va_access_time.tv_sec = ip->i_atime; - vap->va_access_time.tv_nsec = ip->i_atimensec; - VATTR_SET_SUPPORTED(vap, va_access_time); - vap->va_modify_time.tv_sec = ip->i_mtime; - vap->va_modify_time.tv_nsec = ip->i_mtimensec; - VATTR_SET_SUPPORTED(vap, va_modify_time); - vap->va_change_time.tv_sec = ip->i_ctime; - vap->va_change_time.tv_nsec = ip->i_ctimensec; - VATTR_SET_SUPPORTED(vap, va_change_time); - VATTR_RETURN(vap, va_flags, ip->i_flags); - VATTR_RETURN(vap, va_gen, ip->i_gen); - if (vp->v_type == VBLK) - VATTR_RETURN(vap, va_iosize, BLKDEV_IOSIZE); - else if (vp->v_type == VCHR) - VATTR_RETURN(vap, va_iosize, MAXPHYSIO); - else - VATTR_RETURN(vap, va_iosize, vp->v_mount->mnt_vfsstat.f_iosize); - devBlockSize = vfs_devblocksize(vnode_mount(vp)); - VATTR_RETURN(vap, va_data_alloc, dbtob((u_quad_t)ip->i_blocks, devBlockSize)); - VATTR_RETURN(vap, va_type, vp->v_type); - VATTR_RETURN(vap, va_filerev, ip->i_modrev); - return (0); -} - -/* - * Set attribute vnode op. called from several syscalls - */ -int -ufs_setattr(ap) - struct vnop_setattr_args /* { - struct vnode *a_vp; - struct vnode_attr *a_vap; - struct proc *a_p; - vfs_context_t a_context; - } */ *ap; -{ - struct vnode_attr *vap = ap->a_vap; - struct vnode *vp = ap->a_vp; - struct inode *ip = VTOI(vp); - kauth_cred_t cred = vfs_context_ucred(ap->a_context); - struct proc *p = vfs_context_proc(ap->a_context); - struct timeval atimeval, mtimeval; - int error; - uid_t nuid; - gid_t ngid; - - /* - * Go through the fields and update iff set. - */ - if (VATTR_IS_ACTIVE(vap, va_flags)) { - ip->i_flags = vap->va_flags; - ip->i_flag |= IN_CHANGE; - } - VATTR_SET_SUPPORTED(vap, va_flags); - - nuid = VATTR_IS_ACTIVE(vap, va_uid) ? vap->va_uid : (uid_t)VNOVAL; - ngid = VATTR_IS_ACTIVE(vap, va_gid) ? vap->va_gid : (gid_t)VNOVAL; - if (nuid != (uid_t)VNOVAL || ngid != (gid_t)VNOVAL) { - if ( (error = ufs_chown(vp, nuid, ngid, cred, p)) ) - return (error); - } - VATTR_SET_SUPPORTED(vap, va_uid); - VATTR_SET_SUPPORTED(vap, va_gid); - - if (VATTR_IS_ACTIVE(vap, va_data_size)) { - if ( (error = ffs_truncate_internal(vp, vap->va_data_size, vap->va_vaflags & 0xffff, cred)) ) - return (error); - } - VATTR_SET_SUPPORTED(vap, va_data_size); - - ip = VTOI(vp); - if (VATTR_IS_ACTIVE(vap, va_access_time) || VATTR_IS_ACTIVE(vap, va_modify_time)) { - if (VATTR_IS_ACTIVE(vap, va_access_time)) - ip->i_flag |= IN_ACCESS; - if (VATTR_IS_ACTIVE(vap, va_modify_time)) - ip->i_flag |= IN_CHANGE | IN_UPDATE; - atimeval.tv_sec = vap->va_access_time.tv_sec; - atimeval.tv_usec = vap->va_access_time.tv_nsec / 1000; - mtimeval.tv_sec = vap->va_modify_time.tv_sec; - mtimeval.tv_usec = vap->va_modify_time.tv_nsec / 1000; - if ( (error = ffs_update(vp, &atimeval, &mtimeval, 1)) ) - return (error); - } - VATTR_SET_SUPPORTED(vap, va_access_time); - VATTR_SET_SUPPORTED(vap, va_modify_time); - - if (VATTR_IS_ACTIVE(vap, va_mode)) { - if ((error = ufs_chmod(vp, (int)vap->va_mode, cred, p))) - return (error); - } - VATTR_SET_SUPPORTED(vap, va_mode); - - VN_KNOTE(vp, NOTE_ATTRIB); - - return (0); -} - -/* - * Change the mode on a file. - * Inode must be locked before calling. - */ -static int -ufs_chmod(struct vnode *vp, int mode, kauth_cred_t cred, struct proc *p) -{ - register struct inode *ip = VTOI(vp); - - ip->i_mode &= ~ALLPERMS; - ip->i_mode |= (mode & ALLPERMS); - ip->i_flag |= IN_CHANGE; - return (0); -} - -/* - * Perform chown operation on inode ip; - * inode must be locked prior to call. - */ -static int -ufs_chown(struct vnode *vp, uid_t uid, gid_t gid, kauth_cred_t cred, - struct proc *p) -{ - register struct inode *ip = VTOI(vp); - uid_t ouid; - gid_t ogid; - int error = 0; - int is_member; -#if QUOTA - register int i; - int64_t change; /* in bytes */ - int devBlockSize=0; -#endif /* QUOTA */ - - if (uid == (uid_t)VNOVAL) - uid = ip->i_uid; - if (gid == (gid_t)VNOVAL) - gid = ip->i_gid; - ogid = ip->i_gid; - ouid = ip->i_uid; -#if QUOTA - if ( (error = getinoquota(ip)) ) - return (error); - if (ouid == uid) { - dqrele(ip->i_dquot[USRQUOTA]); - ip->i_dquot[USRQUOTA] = NODQUOT; - } - if (ogid == gid) { - dqrele(ip->i_dquot[GRPQUOTA]); - ip->i_dquot[GRPQUOTA] = NODQUOT; - } - devBlockSize = vfs_devblocksize(vnode_mount(vp)); - - change = dbtob((int64_t)ip->i_blocks, devBlockSize); - (void) chkdq(ip, -change, cred, CHOWN); - (void) chkiq(ip, -1, cred, CHOWN); - for (i = 0; i < MAXQUOTAS; i++) { - dqrele(ip->i_dquot[i]); - ip->i_dquot[i] = NODQUOT; - } -#endif - ip->i_gid = gid; - ip->i_uid = uid; -#if QUOTA - if ((error = getinoquota(ip)) == 0) { - if (ouid == uid) { - dqrele(ip->i_dquot[USRQUOTA]); - ip->i_dquot[USRQUOTA] = NODQUOT; - } - if (ogid == gid) { - dqrele(ip->i_dquot[GRPQUOTA]); - ip->i_dquot[GRPQUOTA] = NODQUOT; - } - if ((error = chkdq(ip, change, cred, CHOWN)) == 0) { - if ((error = chkiq(ip, 1, cred, CHOWN)) == 0) - goto good; - else - (void) chkdq(ip, -change, cred, CHOWN|FORCE); - } - for (i = 0; i < MAXQUOTAS; i++) { - dqrele(ip->i_dquot[i]); - ip->i_dquot[i] = NODQUOT; - } - } - ip->i_gid = ogid; - ip->i_uid = ouid; - if (getinoquota(ip) == 0) { - if (ouid == uid) { - dqrele(ip->i_dquot[USRQUOTA]); - ip->i_dquot[USRQUOTA] = NODQUOT; - } - if (ogid == gid) { - dqrele(ip->i_dquot[GRPQUOTA]); - ip->i_dquot[GRPQUOTA] = NODQUOT; - } - (void) chkdq(ip, change, cred, FORCE|CHOWN); - (void) chkiq(ip, 1, cred, FORCE|CHOWN); - (void) getinoquota(ip); - } - return (error); -good: - if (getinoquota(ip)) - panic("chown: lost quota"); -#endif /* QUOTA */ - if (ouid != uid || ogid != gid) - ip->i_flag |= IN_CHANGE; - return (0); -} - -int -ufs_ioctl(ap) - struct vnop_ioctl_args /* { - struct vnode *a_vp; - int a_command; - caddr_t a_data; - int a_fflag; - vfs_context_t a_context; - } */ *ap; -{ - - switch (ap->a_command) { - - case 1: - { register struct inode *ip; - register struct vnode *vp; - register struct fs *fs; - register struct radvisory *ra; - int devBlockSize = 0; - int error; - - vp = ap->a_vp; - - ra = (struct radvisory *)(ap->a_data); - ip = VTOI(vp); - fs = ip->i_fs; - - if ((u_int64_t)ra->ra_offset >= ip->i_size) { - return (EFBIG); - } - devBlockSize = vfs_devblocksize(vnode_mount(vp)); - - error = advisory_read(vp, ip->i_size, ra->ra_offset, ra->ra_count); - - return (error); - } - default: - return (ENOTTY); - } -} - -int -ufs_select(__unused struct vnop_select_args *ap) -{ - /* - * We should really check to see if I/O is possible. - */ - return (1); -} - -/* - * Mmap a file - * - * NB Currently unsupported. - */ -int -ufs_mmap(__unused struct vnop_mmap_args *ap) -{ - return (EINVAL); -} - -int -ufs_remove(ap) - struct vnop_remove_args /* { - struct vnode *a_dvp; - struct vnode *a_vp; - struct componentname *a_cnp; - int *a_flags; - vfs_context_t a_context; - } */ *ap; -{ - return(ufs_remove_internal(ap->a_dvp, ap->a_vp, ap->a_cnp, ap->a_flags)); -} - - -int -ufs_remove_internal(vnode_t dvp, vnode_t vp, struct componentname *cnp, int flags) -{ - struct inode *ip; - struct vnode *tvp; - int error; - - if (flags & VNODE_REMOVE_NODELETEBUSY) { - /* Caller requested Carbon delete semantics */ - if (vnode_isinuse(vp, 0)) { - error = EBUSY; - goto out; - } - } - cnp->cn_flags &= ~MODMASK; - cnp->cn_flags |= (WANTPARENT | NOCACHE); - cnp->cn_nameiop = DELETE; - - (void) relookup(dvp, &tvp, cnp); - - if (tvp == NULL) - return (ENOENT); - if (tvp != vp) { - /* - * The file has already gone away. Somewhat annoying, but that's - * life in a threaded world. We need to release the reference we - * got, and then return ENOENT. - */ - vnode_put(tvp); - return ENOENT; - } - /* - * get rid of reference relookup returned - */ - vnode_put(tvp); - - - ip = VTOI(vp); - - if ((error = ufs_dirremove(dvp, cnp)) == 0) { - ip->i_nlink--; - ip->i_flag |= IN_CHANGE; - VN_KNOTE(vp, NOTE_DELETE); - VN_KNOTE(dvp, NOTE_WRITE); - } -out: - return (error); -} - -/* - * link vnode call - */ -int -ufs_link(ap) - struct vnop_link_args /* { - struct vnode *a_vp; - struct vnode *a_tdvp; - struct componentname *a_cnp; - vfs_context_t a_context; - } */ *ap; -{ - struct vnode *vp = ap->a_vp; - struct vnode *tdvp = ap->a_tdvp; - struct componentname *cnp = ap->a_cnp; - vfs_context_t ctx = cnp->cn_context; - struct proc *p = vfs_context_proc(ctx); - struct inode *ip; - struct timeval tv; - int error; - - ip = VTOI(vp); - - if ((nlink_t)ip->i_nlink >= LINK_MAX) { - error = EMLINK; - goto out1; - } - ip->i_nlink++; - ip->i_flag |= IN_CHANGE; - microtime(&tv); - error = ffs_update(vp, &tv, &tv, 1); - if (!error) - error = ufs_direnter(ip, tdvp, cnp); - if (error) { - ip->i_nlink--; - ip->i_flag |= IN_CHANGE; - } - VN_KNOTE(vp, NOTE_LINK); - VN_KNOTE(tdvp, NOTE_WRITE); -out1: - return (error); -} - -/* - * whiteout vnode call - */ - -int -ufs_whiteout(ap) - struct vnop_whiteout_args /* { - struct vnode *a_dvp; - struct componentname *a_cnp; - int a_flags; - vfs_context_t a_context; - } */ *ap; -{ - struct vnode *dvp = ap->a_dvp; - struct componentname *cnp = ap->a_cnp; - struct direct newdir; - int error = 0; - - switch (ap->a_flags) { - case LOOKUP: - /* 4.4 format directories support whiteout operations */ - if (dvp->v_mount->mnt_maxsymlinklen > 0) - return (0); - return (ENOTSUP); - - case CREATE: - /* create a new directory whiteout */ -#if DIAGNOSTIC - if (dvp->v_mount->mnt_maxsymlinklen <= 0) - panic("ufs_whiteout: old format filesystem"); -#endif - - newdir.d_ino = WINO; - newdir.d_namlen = cnp->cn_namelen; - bcopy(cnp->cn_nameptr, newdir.d_name, (unsigned)cnp->cn_namelen + 1); - newdir.d_type = DT_WHT; - error = ufs_direnter2(dvp, &newdir, cnp->cn_context); - break; - - case DELETE: - /* remove an existing directory whiteout */ -#if DIAGNOSTIC - if (dvp->v_mount->mnt_maxsymlinklen <= 0) - panic("ufs_whiteout: old format filesystem"); -#endif - - cnp->cn_flags &= ~DOWHITEOUT; - error = ufs_dirremove(dvp, cnp); - break; - } - return (error); -} - - -/* - * Rename system call. - * rename("foo", "bar"); - * is essentially - * unlink("bar"); - * link("foo", "bar"); - * unlink("foo"); - * but ``atomically''. Can't do full commit without saving state in the - * inode on disk which isn't feasible at this time. Best we can do is - * always guarantee the target exists. - * - * Basic algorithm is: - * - * 1) Bump link count on source while we're linking it to the - * target. This also ensure the inode won't be deleted out - * from underneath us while we work (it may be truncated by - * a concurrent `trunc' or `open' for creation). - * 2) Link source to destination. If destination already exists, - * delete it first. - * 3) Unlink source reference to inode if still around. If a - * directory was moved and the parent of the destination - * is different from the source, patch the ".." entry in the - * directory. - */ -int -ufs_rename(ap) - struct vnop_rename_args /* { - struct vnode *a_fdvp; - struct vnode *a_fvp; - struct componentname *a_fcnp; - struct vnode *a_tdvp; - struct vnode *a_tvp; - struct componentname *a_tcnp; - vfs_context_t a_context; - } */ *ap; -{ - struct vnode *tvp = ap->a_tvp; - register struct vnode *tdvp = ap->a_tdvp; - struct vnode *fvp = ap->a_fvp; - struct vnode *fdvp = ap->a_fdvp; - struct componentname *tcnp = ap->a_tcnp; - struct componentname *fcnp = ap->a_fcnp; - vfs_context_t ctx = fcnp->cn_context; - struct proc *p = vfs_context_proc(ctx); - struct inode *ip, *xp, *dp; - struct dirtemplate dirbuf; - struct timeval tv; - ino_t doingdirectory = 0, oldparent = 0, newparent = 0; - int error = 0, ioflag; - u_char namlen; - struct vnode *rl_vp = NULL; - - - /* - * Check if just deleting a link name or if we've lost a race. - * If another process completes the same rename after we've looked - * up the source and have blocked looking up the target, then the - * source and target inodes may be identical now although the - * names were never linked. - */ - if (fvp == tvp) { - if (fvp->v_type == VDIR) { - /* - * Linked directories are impossible, so we must - * have lost the race. Pretend that the rename - * completed before the lookup. - */ -#ifdef UFS_RENAME_DEBUG - printf("ufs_rename: fvp == tvp for directories\n"); -#endif - error = ENOENT; - goto abortit; - } - - /* - * don't need to check in here for permissions, must already have been granted - * ufs_remove_internal now does the relookup - */ - error = ufs_remove_internal(fdvp, fvp, fcnp, 0); - - return (error); - } - /* - * because the vnode_authorization code may have looked up in this directory - * between the original lookup and the actual call to VNOP_RENAME, we need - * to reset the directory hints... since we haven't dropped the FSNODELOCK - * on tdvp since this whole thing started, we expect relookup to return - * tvp (which may be NULL) - */ - tcnp->cn_flags &= ~MODMASK; - tcnp->cn_flags |= (WANTPARENT | NOCACHE); - - if ( (error = relookup(tdvp, &rl_vp, tcnp)) ) - panic("ufs_rename: relookup on target returned error"); - if (rl_vp != tvp) { - /* - * Don't panic. The only way this state will be reached is if - * another rename has taken effect. In that case, it's safe - * to restart this rename and let things sort themselves out. - */ - if (rl_vp) - vnode_put(rl_vp); - error = ERESTART; - goto abortit; - } - if (rl_vp) { - vnode_put(rl_vp); - rl_vp = NULL; - } - dp = VTOI(fdvp); - ip = VTOI(fvp); - - if ((ip->i_mode & IFMT) == IFDIR) { - if (ip->i_flag & IN_RENAME) { - error = EINVAL; - goto abortit; - } - ip->i_flag |= IN_RENAME; - oldparent = dp->i_number; - doingdirectory++; - } - VN_KNOTE(fdvp, NOTE_WRITE); /* XXX right place? */ - - /* - * When the target exists, both the directory - * and target vnodes are returned locked. - */ - dp = VTOI(tdvp); - xp = NULL; - if (tvp) - xp = VTOI(tvp); - - /* - * 1) Bump link count while we're moving stuff - * around. If we crash somewhere before - * completing our work, the link count - * may be wrong, but correctable. - */ - ip->i_nlink++; - ip->i_flag |= IN_CHANGE; - microtime(&tv); - if ( (error = ffs_update(fvp, &tv, &tv, 1)) ) { - goto bad; - } - - /* - * If ".." must be changed (ie the directory gets a new - * parent) then the source directory must not be in the - * directory heirarchy above the target, as this would - * orphan everything below the source directory. Also - * the user must have write permission in the source so - * as to be able to change "..". We must repeat the call - * to namei, as the parent directory is unlocked by the - * call to checkpath(). - */ - - if (oldparent != dp->i_number) - newparent = dp->i_number; - - if (doingdirectory && newparent) { - if (error) /* write access check above */ - goto bad; - - if ( (error = ufs_checkpath(ip, dp, vfs_context_ucred(tcnp->cn_context))) ) - goto bad; - - if ( (error = relookup(tdvp, &tvp, tcnp)) ) - goto bad; - rl_vp = tvp; - - dp = VTOI(tdvp); - if (tvp) - xp = VTOI(tvp); - else - xp = NULL; - } - /* - * 2) If target doesn't exist, link the target - * to the source and unlink the source. - * Otherwise, rewrite the target directory - * entry to reference the source inode and - * expunge the original entry's existence. - */ - if (xp == NULL) { - if (dp->i_dev != ip->i_dev) - panic("rename: EXDEV"); - /* - * Account for ".." in new directory. - * When source and destination have the same - * parent we don't fool with the link count. - */ - if (doingdirectory && newparent) { - if ((nlink_t)dp->i_nlink >= LINK_MAX) { - error = EMLINK; - goto bad; - } - dp->i_nlink++; - dp->i_flag |= IN_CHANGE; - if ( (error = ffs_update(tdvp, &tv, &tv, 1)) ) - goto bad; - } - if ( (error = ufs_direnter(ip, tdvp, tcnp)) ) { - if (doingdirectory && newparent) { - dp->i_nlink--; - dp->i_flag |= IN_CHANGE; - (void)ffs_update(tdvp, &tv, &tv, 1); - } - goto bad; - } - VN_KNOTE(tdvp, NOTE_WRITE); - } else { - if (xp->i_dev != dp->i_dev || xp->i_dev != ip->i_dev) - panic("rename: EXDEV"); - /* - * Short circuit rename(foo, foo). - */ - if (xp->i_number == ip->i_number) - panic("rename: same file"); - /* - * Target must be empty if a directory and have no links - * to it. Also, ensure source and target are compatible - * (both directories, or both not directories). - */ - if ((xp->i_mode&IFMT) == IFDIR) { - if (!ufs_dirempty(xp, dp->i_number, vfs_context_ucred(tcnp->cn_context)) || - xp->i_nlink > 2) { - error = ENOTEMPTY; - goto bad; - } - if (!doingdirectory) { - error = ENOTDIR; - goto bad; - } - cache_purge(tdvp); - } else if (doingdirectory) { - error = EISDIR; - goto bad; - } - if ( (error = ufs_dirrewrite(dp, ip, tcnp)) ) - goto bad; - /* - * If the target directory is in the same - * directory as the source directory, - * decrement the link count on the parent - * of the target directory. - */ - if (doingdirectory && !newparent) { - dp->i_nlink--; - dp->i_flag |= IN_CHANGE; - } - VN_KNOTE(tdvp, NOTE_WRITE); - /* - * Adjust the link count of the target to - * reflect the dirrewrite above. If this is - * a directory it is empty and there are - * no links to it, so we can squash the inode and - * any space associated with it. We disallowed - * renaming over top of a directory with links to - * it above, as the remaining link would point to - * a directory without "." or ".." entries. - */ - xp->i_nlink--; - if (doingdirectory) { - if (--xp->i_nlink != 0) - panic("rename: linked directory"); - ioflag = ((tvp)->v_mount->mnt_flag & MNT_ASYNC) ? - 0 : IO_SYNC; - error = ffs_truncate_internal(tvp, (off_t)0, ioflag, vfs_context_ucred(tcnp->cn_context)); - } - xp->i_flag |= IN_CHANGE; - VN_KNOTE(tvp, NOTE_DELETE); - xp = NULL; - } - if (rl_vp) - vnode_put(rl_vp); - rl_vp = NULL; - - /* - * 3) Unlink the source. - */ - fcnp->cn_flags &= ~MODMASK; - fcnp->cn_flags |= (WANTPARENT | NOCACHE); - - (void) relookup(fdvp, &fvp, fcnp); - - if (fvp != NULL) { - xp = VTOI(fvp); - dp = VTOI(fdvp); - rl_vp = fvp; - } else { - /* - * From name has disappeared. - */ - if (doingdirectory) - panic("rename: lost dir entry"); - - return (0); - } - /* - * Ensure that the directory entry still exists and has not - * changed while the new name has been entered. If the source is - * a file then the entry may have been unlinked or renamed. In - * either case there is no further work to be done. If the source - * is a directory then it cannot have been rmdir'ed; its link - * count of three would cause a rmdir to fail with ENOTEMPTY. - * The IN_RENAME flag ensures that it cannot be moved by another - * rename. - */ - if (xp != ip) { - if (doingdirectory) - panic("rename: lost dir entry"); - } else { - /* - * If the source is a directory with a - * new parent, the link count of the old - * parent directory must be decremented - * and ".." set to point to the new parent. - */ - if (doingdirectory && newparent) { - dp->i_nlink--; - dp->i_flag |= IN_CHANGE; - error = vn_rdwr(UIO_READ, fvp, (caddr_t)&dirbuf, - sizeof (struct dirtemplate), (off_t)0, - UIO_SYSSPACE32, IO_NODELOCKED, - vfs_context_ucred(tcnp->cn_context), (int *)0, (struct proc *)0); - if (error == 0) { -# if (BYTE_ORDER == LITTLE_ENDIAN) - if (fvp->v_mount->mnt_maxsymlinklen <= 0) - namlen = dirbuf.dotdot_type; - else - namlen = dirbuf.dotdot_namlen; -# else - namlen = dirbuf.dotdot_namlen; -# endif - if (namlen != 2 || - dirbuf.dotdot_name[0] != '.' || - dirbuf.dotdot_name[1] != '.') { - ufs_dirbad(xp, (doff_t)12, - "rename: mangled dir"); - } else { - dirbuf.dotdot_ino = newparent; - (void) vn_rdwr(UIO_WRITE, fvp, - (caddr_t)&dirbuf, - sizeof (struct dirtemplate), - (off_t)0, UIO_SYSSPACE32, - IO_NODELOCKED|IO_SYNC, - vfs_context_ucred(tcnp->cn_context), (int *)0, - (struct proc *)0); - cache_purge(fdvp); - } - } - } - error = ufs_dirremove(fdvp, fcnp); - if (!error) { - xp->i_nlink--; - xp->i_flag |= IN_CHANGE; - } - xp->i_flag &= ~IN_RENAME; - } - VN_KNOTE(fvp, NOTE_RENAME); - - if (rl_vp) - vnode_put(rl_vp); - - return (error); - -bad: - if (rl_vp) - vnode_put(rl_vp); - - if (doingdirectory) - ip->i_flag &= ~IN_RENAME; - - ip->i_nlink--; - ip->i_flag |= IN_CHANGE; - ip->i_flag &= ~IN_RENAME; - -abortit: - return (error); -} - -/* - * A virgin directory (no blushing please). - */ -static struct dirtemplate mastertemplate = { - 0, 12, DT_DIR, 1, ".", - 0, DIRBLKSIZ - 12, DT_DIR, 2, ".." -}; -static struct odirtemplate omastertemplate = { - 0, 12, 1, ".", - 0, DIRBLKSIZ - 12, 2, ".." -}; - -/* - * Mkdir system call - */ -int -ufs_mkdir(ap) - struct vnop_mkdir_args /* { - struct vnode *a_dvp; - struct vnode **a_vpp; - struct componentname *a_cnp; - struct vnode_attr *a_vap; - vfs_context_t a_context; - } */ *ap; -{ - register struct vnode *dvp = ap->a_dvp; - register struct vnode_attr *vap = ap->a_vap; - register struct componentname *cnp = ap->a_cnp; - register struct inode *ip, *dp; - struct vnode *tvp; - struct dirtemplate dirtemplate, *dtp; - struct timeval tv; - int error, dmode; - - /* use relookup to force correct directory hints */ - cnp->cn_flags &= ~MODMASK; - cnp->cn_flags |= (WANTPARENT | NOCACHE); - cnp->cn_nameiop = CREATE; - - (void) relookup(dvp, &tvp, cnp); - - /* get rid of reference relookup returned */ - if (tvp) - vnode_put(tvp); - - dp = VTOI(dvp); - if ((nlink_t)dp->i_nlink >= LINK_MAX) { - error = EMLINK; - goto out; - } - dmode = vap->va_mode & 0777; - dmode |= IFDIR; - - /* - * Must simulate part of ufs_makeinode here to acquire the inode, - * but not have it entered in the parent directory. The entry is - * made later after writing "." and ".." entries. - */ - if ( (error = ffs_valloc(dvp, (mode_t)dmode, vfs_context_ucred(cnp->cn_context), &tvp)) ) - goto out; - ip = VTOI(tvp); - ip->i_uid = ap->a_vap->va_uid; - ip->i_gid = ap->a_vap->va_gid; - VATTR_SET_SUPPORTED(ap->a_vap, va_mode); - VATTR_SET_SUPPORTED(ap->a_vap, va_uid); - VATTR_SET_SUPPORTED(ap->a_vap, va_gid); -#if QUOTA - if ((error = getinoquota(ip)) || - (error = chkiq(ip, 1, vfs_context_ucred(cnp->cn_context), 0))) { - ffs_vfree(tvp, ip->i_number, dmode); - vnode_put(tvp); - return (error); - } -#endif - ip->i_flag |= IN_ACCESS | IN_CHANGE | IN_UPDATE; - ip->i_mode = dmode; - ip->i_nlink = 2; - if (cnp->cn_flags & ISWHITEOUT) - ip->i_flags |= UF_OPAQUE; - microtime(&tv); - error = ffs_update(tvp, &tv, &tv, 1); - - /* - * Bump link count in parent directory - * to reflect work done below. Should - * be done before reference is created - * so reparation is possible if we crash. - */ - dp->i_nlink++; - dp->i_flag |= IN_CHANGE; - if ( (error = ffs_update(dvp, &tv, &tv, 1)) ) - goto bad; - - /* Initialize directory with "." and ".." from static template. */ - if (dvp->v_mount->mnt_maxsymlinklen > 0) - dtp = &mastertemplate; - else - dtp = (struct dirtemplate *)&omastertemplate; - dirtemplate = *dtp; - dirtemplate.dot_ino = ip->i_number; - dirtemplate.dotdot_ino = dp->i_number; - error = vn_rdwr(UIO_WRITE, tvp, (caddr_t)&dirtemplate, - sizeof (dirtemplate), (off_t)0, UIO_SYSSPACE32, - IO_NODELOCKED|IO_SYNC, vfs_context_ucred(cnp->cn_context), (int *)0, (struct proc *)0); - if (error) { - dp->i_nlink--; - dp->i_flag |= IN_CHANGE; - goto bad; - } - if (DIRBLKSIZ > VFSTOUFS(dvp->v_mount)->um_mountp->mnt_vfsstat.f_bsize) - panic("ufs_mkdir: blksize"); /* XXX should grow with balloc() */ - else { - ip->i_size = DIRBLKSIZ; - ip->i_flag |= IN_CHANGE; - } - - /* Directory set up, now install it's entry in the parent directory. */ - if ( (error = ufs_direnter(ip, dvp, cnp)) ) { - dp->i_nlink--; - dp->i_flag |= IN_CHANGE; - } -bad: - /* - * No need to do an explicit vnop_truncate here, vnode_put will do it - * for us because we set the link count to 0. - */ - if (error) { - ip->i_nlink = 0; - ip->i_flag |= IN_CHANGE; - /* - * since we're not returning tvp due to the error, - * we're responsible for releasing it here - */ - vnode_put(tvp); - } else { - VN_KNOTE(dvp, NOTE_WRITE | NOTE_LINK); - *ap->a_vpp = tvp; - }; -out: - return (error); -} - -/* - * Rmdir system call. - */ -int -ufs_rmdir(ap) - struct vnop_rmdir_args /* { - struct vnode *a_dvp; - struct vnode *a_vp; - struct componentname *a_cnp; - vfs_context_t a_context; - } */ *ap; -{ - struct vnode *vp = ap->a_vp; - struct vnode *dvp = ap->a_dvp; - struct vnode *tvp; - struct componentname *cnp = ap->a_cnp; - struct inode *ip, *dp; - int error, ioflag; - - - ip = VTOI(vp); - dp = VTOI(dvp); - /* - * No rmdir "." please. - */ - if (dp == ip) - return (EINVAL); - - - cnp->cn_flags &= ~MODMASK; - cnp->cn_flags |= (WANTPARENT | NOCACHE); - - (void) relookup(dvp, &tvp, cnp); - - if (tvp == NULL) - return (ENOENT); - if (tvp != vp) - panic("ufs_rmdir: relookup returned a different vp"); - /* - * get rid of reference relookup returned - */ - vnode_put(tvp); - - - /* - * Verify the directory is empty (and valid). - * (Rmdir ".." won't be valid since - * ".." will contain a reference to - * the current directory and thus be - * non-empty.) - */ - error = 0; - if (ip->i_nlink != 2 || - !ufs_dirempty(ip, dp->i_number, vfs_context_ucred(cnp->cn_context))) { - error = ENOTEMPTY; - goto out; - } - /* - * Delete reference to directory before purging - * inode. If we crash in between, the directory - * will be reattached to lost+found, - */ - if ( (error = ufs_dirremove(dvp, cnp)) ) - goto out; - VN_KNOTE(dvp, NOTE_WRITE | NOTE_LINK); - dp->i_nlink--; - dp->i_flag |= IN_CHANGE; - cache_purge(dvp); - /* - * Truncate inode. The only stuff left - * in the directory is "." and "..". The - * "." reference is inconsequential since - * we're quashing it. The ".." reference - * has already been adjusted above. We've - * removed the "." reference and the reference - * in the parent directory, but there may be - * other hard links so decrement by 2 and - * worry about them later. - */ - ip->i_nlink -= 2; - ioflag = ((vp)->v_mount->mnt_flag & MNT_ASYNC) ? 0 : IO_SYNC; - error = ffs_truncate_internal(vp, (off_t)0, ioflag, vfs_context_ucred(cnp->cn_context)); - cache_purge(ITOV(ip)); -out: - VN_KNOTE(vp, NOTE_DELETE); - return (error); -} - -/* - * symlink -- make a symbolic link - */ -int -ufs_symlink(ap) - struct vnop_symlink_args /* { - struct vnode *a_dvp; - struct vnode **a_vpp; - struct componentname *a_cnp; - struct vnode_attr *a_vap; - char *a_target; - vfs_context_t a_context; - } */ *ap; -{ - register struct vnode *vp, **vpp = ap->a_vpp; - register struct inode *ip; - int len, error; - - if ( (error = ufs_makeinode(ap->a_vap, ap->a_dvp, vpp, ap->a_cnp)) ) - return (error); - VN_KNOTE(ap->a_dvp, NOTE_WRITE); - vp = *vpp; - len = strlen(ap->a_target); - if (len < vp->v_mount->mnt_maxsymlinklen) { - ip = VTOI(vp); - bcopy(ap->a_target, (char *)ip->i_shortlink, len); - ip->i_size = len; - ip->i_flag |= IN_CHANGE | IN_UPDATE; - } else - error = vn_rdwr(UIO_WRITE, vp, ap->a_target, len, (off_t)0, - UIO_SYSSPACE32, IO_NODELOCKED, vfs_context_ucred(ap->a_cnp->cn_context), (int *)0, - (struct proc *)0); - return (error); -} - -/* - * Vnode op for reading directories. - * - * The routine below assumes that the on-disk format of a directory - * is the same as that defined by . If the on-disk - * format changes, then it will be necessary to do a conversion - * from the on-disk format that read returns to the format defined - * by . - */ -int -ufs_readdir(ap) - struct vnop_readdir_args /* { - struct vnode *a_vp; - struct uio *a_uio; - int a_flags; - int *a_eofflag; - int *a_numdirent; - vfs_context_t a_context; - } */ *ap; -{ - struct uio *uio = ap->a_uio; - int error; - size_t count, lost; - - if (ap->a_flags & VNODE_READDIR_EXTENDED) { - return ufs_readdirext(ap->a_vp, uio, ap->a_eofflag, - ap->a_numdirent, ap->a_context); - } - - // LP64todo - fix this - count = uio_resid(uio); - /* Make sure we don't return partial entries. */ - count -= (uio->uio_offset + count) & (DIRBLKSIZ -1); - if (count <= 0) - return (EINVAL); - // LP64todo - fix this - lost = uio_resid(uio) - count; - uio_setresid(uio, count); - uio_iov_len_set(uio, count); -# if (BYTE_ORDER == LITTLE_ENDIAN) - if (ap->a_vp->v_mount->mnt_maxsymlinklen > 0) { - error = ffs_read_internal(ap->a_vp, uio, 0); - } else { - struct dirent *dp, *edp; - struct uio auio; - struct iovec_32 aiov; - caddr_t dirbuf; - int readcnt; - u_char tmp; - - auio = *uio; - auio.uio_iovs.iov32p = &aiov; - auio.uio_iovcnt = 1; -#if 1 /* LP64todo - can't use new segment flags until the drivers are ready */ - auio.uio_segflg = UIO_SYSSPACE; -#else - auio.uio_segflg = UIO_SYSSPACE32; -#endif - aiov.iov_len = count; - MALLOC(dirbuf, caddr_t, count, M_TEMP, M_WAITOK); - aiov.iov_base = (uintptr_t)dirbuf; - error = ffs_read_internal(ap->a_vp, &auio, 0); - if (error == 0) { - // LP64todo - fix this - readcnt = count - uio_resid(&auio); - edp = (struct dirent *)&dirbuf[readcnt]; - for (dp = (struct dirent *)dirbuf; dp < edp; ) { - tmp = dp->d_namlen; - dp->d_namlen = dp->d_type; - dp->d_type = tmp; - if (dp->d_reclen > 0) { - dp = (struct dirent *) - ((char *)dp + dp->d_reclen); - } else { - error = EIO; - break; - } - } - if (dp >= edp) - error = uiomove(dirbuf, readcnt, uio); - } - FREE(dirbuf, M_TEMP); - } -# else - error = ffs_read_internal(ap->a_vp, uio, 0); -# endif - - uio_setresid(uio, (uio_resid(uio) + lost)); - if (ap->a_eofflag) - *ap->a_eofflag = (off_t)VTOI(ap->a_vp)->i_size <= uio->uio_offset; - return (error); -} - - -/* - * ufs_readdirext reads directory entries into the buffer pointed - * to by uio, in a filesystem independent format. Up to uio_resid - * bytes of data can be transferred. The data in the buffer is a - * series of packed direntry structures where each one contains the - * following entries: - * - * d_reclen: length of record - * d_ino: file number of entry - * d_seekoff: seek offset (used by NFS server, aka cookie) - * d_type: file type - * d_namlen: length of string in d_name - * d_name: null terminated file name - * - * The current position (uio_offset) refers to the next block of - * entries. The offset will only be set to a value previously - * returned by ufs_readdirext or zero. This offset does not have - * to match the number of bytes returned (in uio_resid). - */ -#define EXT_DIRENT_LEN(namlen) \ - ((sizeof(struct direntry) + (namlen) - (MAXPATHLEN-1) + 3) & ~3) - -static int -ufs_readdirext(vnode_t vp, uio_t uio, int *eofflag, int *numdirent, - __unused vfs_context_t context) -{ - int error; - size_t count, lost; - off_t off = uio->uio_offset; - struct dirent *dp, *edp; - struct uio auio; - struct iovec_32 aiov; - caddr_t dirbuf; - struct direntry *xdp; - int nentries = 0; - - // LP64todo - fix this - count = uio_resid(uio); - /* Make sure we don't return partial entries. */ - count -= (uio->uio_offset + count) & (DIRBLKSIZ -1); - if (count <= 0) - return (EINVAL); - // LP64todo - fix this - lost = uio_resid(uio) - count; - uio_setresid(uio, count); - uio_iov_len_set(uio, count); - - auio = *uio; - auio.uio_iovs.iov32p = &aiov; - auio.uio_iovcnt = 1; - /* LP64todo - can't use new segment flags until the drivers are ready */ - auio.uio_segflg = UIO_SYSSPACE; - aiov.iov_len = count; - MALLOC(dirbuf, caddr_t, count, M_TEMP, M_WAITOK); - aiov.iov_base = (uintptr_t)dirbuf; - - MALLOC(xdp, struct direntry *, sizeof(struct direntry), M_TEMP, M_WAITOK); - - error = ffs_read_internal(vp, &auio, 0); - if (error) - goto out; - - // LP64todo - fix this - edp = (struct dirent *)&dirbuf[count - uio_resid(&auio)]; - for (dp = (struct dirent *)dirbuf; dp < edp; ) { - -#if (BYTE_ORDER == LITTLE_ENDIAN) - u_char tmp; - - /* - * We only need to swap the d_namlen and - * d_type fields for older versions of UFS, - * which we check by looking at the mnt_maxsymlinklen - * field. - */ - if (vp->v_mount->mnt_maxsymlinklen <= 0) { - tmp = dp->d_namlen; - dp->d_namlen = dp->d_type; - dp->d_type = tmp; - } -#endif - - xdp->d_reclen = EXT_DIRENT_LEN(dp->d_namlen); - if (xdp->d_reclen > uio_resid(uio)) { - break; /* user buffer is full */ - } - xdp->d_ino = dp->d_ino; - xdp->d_namlen = dp->d_namlen; - xdp->d_type = dp->d_type; - - bcopy(dp->d_name, xdp->d_name, dp->d_namlen + 1); - off += dp->d_reclen; - xdp->d_seekoff = off; - error = uiomove((caddr_t)xdp, xdp->d_reclen, uio); - if (error) { - off -= dp->d_reclen; - break; /* unexpected this error is */ - } - nentries++; - - if (dp->d_reclen > 0) { - dp = (struct dirent *) - ((char *)dp + dp->d_reclen); - } else { - error = EIO; - break; - } - } -out: - FREE(dirbuf, M_TEMP); - FREE(xdp, M_TEMP); - - /* Use the on-disk dirent offset */ - uio_setoffset(uio, off); - *numdirent = nentries; - uio_setresid(uio, (uio_resid(uio) + lost)); - if (eofflag) - *eofflag = (off_t)VTOI(vp)->i_size <= uio->uio_offset; - return (error); -} - - -/* - * Return target name of a symbolic link - */ -int -ufs_readlink(ap) - struct vnop_readlink_args /* { - struct vnode *a_vp; - struct uio *a_uio; - vfs_context_t a_context; - } */ *ap; -{ - register struct vnode *vp = ap->a_vp; - register struct inode *ip = VTOI(vp); - int isize; - - isize = ip->i_size; - if (isize < vp->v_mount->mnt_maxsymlinklen) { - uiomove((char *)ip->i_shortlink, isize, ap->a_uio); - return (0); - } - return (ffs_read_internal(vp, ap->a_uio, 0)); -} - -/* - * prepare and issue the I/O - */ -errno_t -ufs_strategy(ap) - struct vnop_strategy_args /* { - struct buf *a_bp; - } */ *ap; -{ - buf_t bp = ap->a_bp; - vnode_t vp = buf_vnode(bp); - struct inode *ip = VTOI(vp); - - return (buf_strategy(ip->i_devvp, ap)); -} - -/* - * Read wrapper for special devices. - */ -int -ufsspec_read(ap) - struct vnop_read_args /* { - struct vnode *a_vp; - struct uio *a_uio; - int a_ioflag; - vfs_context_t a_context; - } */ *ap; -{ - - /* - * Set access flag. - */ - VTOI(ap->a_vp)->i_flag |= IN_ACCESS; - return (VOCALL (spec_vnodeop_p, VOFFSET(vnop_read), ap)); -} - -/* - * Write wrapper for special devices. - */ -int -ufsspec_write( - struct vnop_write_args /* { - struct vnode *a_vp; - struct uio *a_uio; - int a_ioflag; - kauth_cred_t a_cred; - } */ *ap) -{ - - /* - * Set update and change flags. - */ - VTOI(ap->a_vp)->i_flag |= IN_CHANGE | IN_UPDATE; - return (VOCALL (spec_vnodeop_p, VOFFSET(vnop_write), ap)); -} - -/* - * Close wrapper for special devices. - * - * Update the times on the inode then do device close. - */ -int -ufsspec_close(ap) - struct vnop_close_args /* { - struct vnode *a_vp; - int a_fflag; - vfs_context_t a_context; - } */ *ap; -{ - struct vnode *vp = ap->a_vp; - struct inode *ip = VTOI(vp); - struct timeval tv; - - if (ap->a_vp->v_usecount > 1) { - microtime(&tv); - ITIMES(ip, &tv, &tv); - } - return (VOCALL (spec_vnodeop_p, VOFFSET(vnop_close), ap)); -} - -#if FIFO -/* - * Read wrapper for fifo's - */ -int -ufsfifo_read(ap) - struct vnop_read_args /* { - struct vnode *a_vp; - struct uio *a_uio; - int a_ioflag; - vfs_context_t a_context; - } */ *ap; -{ - extern int (**fifo_vnodeop_p)(void *); - - /* - * Set access flag. - */ - VTOI(ap->a_vp)->i_flag |= IN_ACCESS; - return (VOCALL (fifo_vnodeop_p, VOFFSET(vnop_read), ap)); -} - -/* - * Write wrapper for fifo's. - */ -int -ufsfifo_write( - struct vnop_write_args /* { - struct vnode *a_vp; - struct uio *a_uio; - int a_ioflag; - kauth_cred_t a_cred; - } */ *ap) -{ - extern int (**fifo_vnodeop_p)(void *); - - /* - * Set update and change flags. - */ - VTOI(ap->a_vp)->i_flag |= IN_CHANGE | IN_UPDATE; - return (VOCALL (fifo_vnodeop_p, VOFFSET(vnop_write), ap)); -} - -/* - * Close wrapper for fifo's. - * - * Update the times on the inode then do device close. - */ -int -ufsfifo_close(ap) - struct vnop_close_args /* { - struct vnode *a_vp; - int a_fflag; - vfs_context_t a_context; - } */ *ap; -{ - extern int (**fifo_vnodeop_p)(void *); - struct vnode *vp = ap->a_vp; - struct inode *ip = VTOI(vp); - struct timeval tv; - - if (ap->a_vp->v_usecount > 1) { - microtime(&tv); - ITIMES(ip, &tv, &tv); - } - return (VOCALL (fifo_vnodeop_p, VOFFSET(vnop_close), ap)); -} - -/* - * kqfilt_add wrapper for fifos. - * - * Fall through to ufs kqfilt_add routines if needed - */ -int -ufsfifo_kqfilt_add(ap) - struct vnop_kqfilt_add_args *ap; -{ - extern int (**fifo_vnodeop_p)(void *); - int error; - - error = VOCALL(fifo_vnodeop_p, VOFFSET(vnop_kqfilt_add), ap); - if (error) - error = ufs_kqfilt_add(ap); - return (error); -} - -#if 0 -/* - * kqfilt_remove wrapper for fifos. - * - * Fall through to ufs kqfilt_remove routines if needed - */ -int -ufsfifo_kqfilt_remove(ap) - struct vnop_kqfilt_remove_args *ap; -{ - extern int (**fifo_vnodeop_p)(void *); - int error; - - error = VOCALL(fifo_vnodeop_p, VOFFSET(vnop_kqfilt_remove), ap); - if (error) - error = ufs_kqfilt_remove(ap); - return (error); -} -#endif - -#endif /* FIFO */ - - -static struct filterops ufsread_filtops = - { 1, NULL, filt_ufsdetach, filt_ufsread }; -static struct filterops ufswrite_filtops = - { 1, NULL, filt_ufsdetach, filt_ufswrite }; -static struct filterops ufsvnode_filtops = - { 1, NULL, filt_ufsdetach, filt_ufsvnode }; - -/* - # - #% kqfilt_add vp L L L - # - vnop_kqfilt_add - IN struct vnode *vp; - IN struct knote *kn; - IN vfs_context_t context; - */ -int -ufs_kqfilt_add(ap) - struct vnop_kqfilt_add_args /* { - struct vnode *a_vp; - struct knote *a_kn; - vfs_context_t a_context; - } */ *ap; -{ - struct vnode *vp = ap->a_vp; - struct knote *kn = ap->a_kn; - - switch (kn->kn_filter) { - case EVFILT_READ: - kn->kn_fop = &ufsread_filtops; - break; - case EVFILT_WRITE: - kn->kn_fop = &ufswrite_filtops; - break; - case EVFILT_VNODE: - kn->kn_fop = &ufsvnode_filtops; - break; - default: - return (1); - } - - kn->kn_hook = (caddr_t)vp; - kn->kn_hookid = vnode_vid(vp); - - KNOTE_ATTACH(&VTOI(vp)->i_knotes, kn); - - return (0); -} - -static void -filt_ufsdetach(struct knote *kn) -{ - struct vnode *vp; - int result; - struct proc *p = current_proc(); - - vp = (struct vnode *)kn->kn_hook; - - if (vnode_getwithvid(vp, kn->kn_hookid)) - return; - - result = KNOTE_DETACH(&VTOI(vp)->i_knotes, kn); - vnode_put(vp); -} - -static int -filt_ufsread(struct knote *kn, long hint) -{ - struct vnode *vp = (struct vnode *)kn->kn_hook; - struct inode *ip; - int dropvp = 0; - int result; - - if (hint == 0) { - if ((vnode_getwithvid(vp, kn->kn_hookid) != 0)) { - hint = NOTE_REVOKE; - } else - dropvp = 1; - } - if (hint == NOTE_REVOKE) { - /* - * filesystem is gone, so set the EOF flag and schedule - * the knote for deletion. - */ - kn->kn_flags |= (EV_EOF | EV_ONESHOT); - return (1); - } - - /* poll(2) semantics dictate always returning true */ - if (kn->kn_flags & EV_POLL) { - kn->kn_data = 1; - result = 1; - } else { - off_t amount; - - ip = VTOI(vp); - amount = ip->i_size - kn->kn_fp->f_fglob->fg_offset; - if (amount > (off_t)INTPTR_MAX) - kn->kn_data = INTPTR_MAX; - else if (amount < (off_t)INTPTR_MIN) - kn->kn_data = INTPTR_MIN; - else - kn->kn_data = (intptr_t)amount; - result = (kn->kn_data != 0); - } - - if (dropvp) - vnode_put(vp); - - return (result); -} - -static int -filt_ufswrite(struct knote *kn, long hint) -{ - - int dropvp = 0; - int result; - - if (hint == 0) { - if ((vnode_getwithvid(kn->kn_hook, kn->kn_hookid) != 0)) { - hint = NOTE_REVOKE; - } else - vnode_put(kn->kn_hook); - } - if (hint == NOTE_REVOKE) { - /* - * filesystem is gone, so set the EOF flag and schedule - * the knote for deletion. - */ - kn->kn_data = 0; - kn->kn_flags |= (EV_EOF | EV_ONESHOT); - return (1); - } - kn->kn_data = 0; - return (1); -} - -static int -filt_ufsvnode(struct knote *kn, long hint) -{ - - if (hint == 0) { - if ((vnode_getwithvid(kn->kn_hook, kn->kn_hookid) != 0)) { - hint = NOTE_REVOKE; - } else - vnode_put(kn->kn_hook); - } - if (kn->kn_sfflags & hint) - kn->kn_fflags |= hint; - if ((hint == NOTE_REVOKE)) { - kn->kn_flags |= (EV_EOF | EV_ONESHOT); - return (1); - } - - return (kn->kn_fflags != 0); -} - -/* - * Return POSIX pathconf information applicable to ufs filesystems. - */ -int -ufs_pathconf(ap) - struct vnop_pathconf_args /* { - struct vnode *a_vp; - int a_name; - int *a_retval; - vfs_context_t a_context; - } */ *ap; -{ - - switch (ap->a_name) { - case _PC_LINK_MAX: - *ap->a_retval = LINK_MAX; - return (0); - case _PC_NAME_MAX: - *ap->a_retval = NAME_MAX; - return (0); - case _PC_PATH_MAX: - *ap->a_retval = PATH_MAX; - return (0); - case _PC_PIPE_BUF: - *ap->a_retval = PIPE_BUF; - return (0); - case _PC_CHOWN_RESTRICTED: - *ap->a_retval = 200112; /* _POSIX_CHOWN_RESTRICTED */ - return (0); - case _PC_NO_TRUNC: - *ap->a_retval = 200112; /* _POSIX_NO_TRUNC */ - return (0); - case _PC_FILESIZEBITS: - *ap->a_retval = 34; - return (0); - - default: - return (EINVAL); - } - /* NOTREACHED */ -} - -/* - * Allocate a new inode. - */ -int -ufs_makeinode(vap, dvp, vpp, cnp) - struct vnode_attr *vap; - struct vnode *dvp; - struct vnode **vpp; - struct componentname *cnp; -{ - register struct inode *ip, *pdir; - struct timeval tv; - struct vnode *tvp; - int error; - int is_member; - int mode; - - mode = MAKEIMODE(vap->va_type, vap->va_mode); - pdir = VTOI(dvp); - *vpp = NULL; - if ((mode & IFMT) == 0) - mode |= IFREG; - - if ( (error = ffs_valloc(dvp, (mode_t)mode, vfs_context_ucred(cnp->cn_context), &tvp)) ) - return (error); - - ip = VTOI(tvp); - ip->i_gid = vap->va_gid; - ip->i_uid = vap->va_uid; - VATTR_SET_SUPPORTED(vap, va_mode); - VATTR_SET_SUPPORTED(vap, va_uid); - VATTR_SET_SUPPORTED(vap, va_gid); -#if QUOTA - if ((error = getinoquota(ip)) || - (error = chkiq(ip, 1, vfs_context_ucred(cnp->cn_context), 0))) { - ffs_vfree(tvp, ip->i_number, mode); - vnode_put(tvp); - return (error); - } -#endif - ip->i_flag |= IN_ACCESS | IN_CHANGE | IN_UPDATE; - ip->i_mode = mode; - ip->i_nlink = 1; - - if (cnp->cn_flags & ISWHITEOUT) - ip->i_flags |= UF_OPAQUE; - - /* - * Make sure inode goes to disk before directory entry. - */ - microtime(&tv); - if ( (error = ffs_update(tvp, &tv, &tv, 1)) ) - goto bad; - if ( (error = ufs_direnter(ip, dvp, cnp)) ) - goto bad; - - *vpp = tvp; - return (0); - -bad: - /* - * Write error occurred trying to update the inode - * or the directory so must deallocate the inode. - */ - ip->i_nlink = 0; - ip->i_flag |= IN_CHANGE; - vnode_put(tvp); - - return (error); -} - diff --git a/bsd/ufs/ufs/ufsmount.h b/bsd/ufs/ufs/ufsmount.h deleted file mode 100644 index 6a25572e5..000000000 --- a/bsd/ufs/ufs/ufsmount.h +++ /dev/null @@ -1,130 +0,0 @@ -/* - * Copyright (c) 2000-2002 Apple Computer, Inc. All rights reserved. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ - * - * This file contains Original Code and/or Modifications of Original Code - * as defined in and that are subject to the Apple Public Source License - * Version 2.0 (the 'License'). You may not use this file except in - * compliance with the License. The rights granted to you under the License - * may not be used to create, or enable the creation or redistribution of, - * unlawful or unlicensed copies of an Apple operating system, or to - * circumvent, violate, or enable the circumvention or violation of, any - * terms of an Apple operating system software license agreement. - * - * Please obtain a copy of the License at - * http://www.opensource.apple.com/apsl/ and read it before using this file. - * - * The Original Code and all software distributed under the License are - * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER - * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, - * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. - * Please see the License for the specific language governing rights and - * limitations under the License. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ - */ -/* Copyright (c) 1995 NeXT Computer, Inc. All Rights Reserved */ -/* - * Copyright (c) 1982, 1986, 1989, 1993 - * The Regents of the University of California. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. All advertising materials mentioning features or use of this software - * must display the following acknowledgement: - * This product includes software developed by the University of - * California, Berkeley and its contributors. - * 4. Neither the name of the University nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - * @(#)ufsmount.h 8.6 (Berkeley) 3/30/95 - */ -#ifndef _UFS_UFSMOUNT_H_ -#define _UFS_UFSMOUNT_H_ - -#include - -#include - -#ifdef __APPLE_API_UNSTABLE -/* - * Arguments to mount UFS-based filesystems - */ -struct ufs_args { - char *fspec; /* block special device to mount */ -}; -#endif /* __APPLE_API_UNSTABLE */ - -#ifdef __APPLE_API_OBSOLETE -#if MFS -/* - * Arguments to mount MFS - */ -struct mfs_args { - char *fspec; /* name to export for statfs */ - caddr_t base; /* base of file system in memory */ - u_long size; /* size of file system */ -}; -#endif /* MFS */ -#endif /* __APPLE_API_OBSOLETE */ - -#ifdef KERNEL -#ifdef __APPLE_API_PRIVATE -struct fs; -struct mount; -struct vnode; - -/* This structure describes the UFS specific mount structure data. */ -struct ufsmount { - struct mount *um_mountp; /* filesystem vfs structure */ - dev_t um_dev; /* device mounted */ - struct vnode *um_devvp; /* block device mounted vnode */ - - union { /* pointer to superblock */ - struct fs *fs; /* FFS */ - } ufsmount_u; -#define um_fs ufsmount_u.fs - - struct quotafile um_qfiles[MAXQUOTAS]; /* quota files */ - u_long um_nindir; /* indirect ptrs per block */ - u_long um_bptrtodb; /* indir ptr to disk block */ - u_long um_seqinc; /* inc between seq blocks */ - int64_t um_savedmaxfilesize; /* XXX - limit maxfilesize */ -}; - - -/* Convert mount ptr to ufsmount ptr. */ -#define VFSTOUFS(mp) ((struct ufsmount *)((mp)->mnt_data)) - -/* - * Macros to access file system parameters in the ufsmount structure. - * Used by ufs_bmap. - */ -#define MNINDIR(ump) ((ump)->um_nindir) -#define blkptrtodb(ump, b) ((b) << (ump)->um_bptrtodb) -#define is_sequential(ump, a, b) ((b) == (a) + ump->um_seqinc) -#endif /* __APPLE_API_PRIVATE */ -#endif /* KERNEL */ - -#endif /* ! _UFS_UFSMOUNT_H_ */ diff --git a/bsd/uuid/uuid.h b/bsd/uuid/uuid.h index 3d172d2f6..be1f90fe2 100644 --- a/bsd/uuid/uuid.h +++ b/bsd/uuid/uuid.h @@ -42,6 +42,11 @@ typedef __darwin_uuid_t uuid_t; #endif /* _UUID_T */ +#ifndef _UUID_STRING_T +#define _UUID_STRING_T +typedef __darwin_uuid_string_t uuid_string_t; +#endif /* _UUID_STRING_T */ + #define UUID_DEFINE(name,u0,u1,u2,u3,u4,u5,u6,u7,u8,u9,u10,u11,u12,u13,u14,u15) \ static const uuid_t name __attribute__ ((unused)) = {u0,u1,u2,u3,u4,u5,u6,u7,u8,u9,u10,u11,u12,u13,u14,u15} @@ -61,11 +66,11 @@ void uuid_generate_time(uuid_t out); int uuid_is_null(const uuid_t uu); -int uuid_parse(const char *in, uuid_t uu); +int uuid_parse(const uuid_string_t in, uuid_t uu); -void uuid_unparse(const uuid_t uu, char *out); -void uuid_unparse_lower(const uuid_t uu, char *out); -void uuid_unparse_upper(const uuid_t uu, char *out); +void uuid_unparse(const uuid_t uu, uuid_string_t out); +void uuid_unparse_lower(const uuid_t uu, uuid_string_t out); +void uuid_unparse_upper(const uuid_t uu, uuid_string_t out); #ifdef __cplusplus } diff --git a/bsd/uxkern/ux_exception.c b/bsd/uxkern/ux_exception.c index e63b6f3ba..dfbe2e5d2 100644 --- a/bsd/uxkern/ux_exception.c +++ b/bsd/uxkern/ux_exception.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2008 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -90,7 +90,12 @@ static void ux_exception(int exception, mach_exception_code_t code, mach_exception_subcode_t subcode, int *ux_signal, mach_exception_code_t *ux_code); +#if defined(__x86_64__) +mach_port_t ux_exception_port; +#else mach_port_name_t ux_exception_port; +#endif /* __x86_64__ */ + static task_t ux_handler_self; static @@ -155,7 +160,7 @@ ux_handler(void) mach_port_name_t reply_port; kern_return_t result; - exc_msg.Head.msgh_local_port = (mach_port_t)exc_set_name; + exc_msg.Head.msgh_local_port = CAST_MACH_NAME_TO_PORT(exc_set_name); exc_msg.Head.msgh_size = sizeof (exc_msg); #if 0 result = mach_msg_receive(&exc_msg.Head); @@ -166,14 +171,15 @@ ux_handler(void) 0); #endif if (result == MACH_MSG_SUCCESS) { - reply_port = (mach_port_name_t)exc_msg.Head.msgh_remote_port; + reply_port = CAST_MACH_PORT_TO_NAME(exc_msg.Head.msgh_remote_port); - if (mach_exc_server(&exc_msg.Head, &rep_msg.Head)) - (void) mach_msg_send(&rep_msg.Head, MACH_SEND_MSG, + if (mach_exc_server(&exc_msg.Head, &rep_msg.Head)) { + result = mach_msg_send(&rep_msg.Head, MACH_SEND_MSG, sizeof (rep_msg),MACH_MSG_TIMEOUT_NONE,MACH_PORT_NULL); + if (reply_port != 0 && result != MACH_MSG_SUCCESS) + mach_port_deallocate(get_task_ipcspace(ux_handler_self), reply_port); + } - if (reply_port != MACH_PORT_NULL) - (void) mach_port_deallocate(get_task_ipcspace(ux_handler_self), reply_port); } else if (result == MACH_RCV_TOO_LARGE) /* ignore oversized messages */; @@ -185,8 +191,11 @@ ux_handler(void) void ux_handler_init(void) { + thread_t thread = THREAD_NULL; + ux_exception_port = MACH_PORT_NULL; - (void) kernel_thread(kernel_task, ux_handler); + (void) kernel_thread_start((thread_continue_t)ux_handler, NULL, &thread); + thread_deallocate(thread); proc_list_lock(); if (ux_exception_port == MACH_PORT_NULL) { (void)msleep(&ux_exception_port, proc_list_mlock, 0, "ux_handler_wait", 0); @@ -236,8 +245,8 @@ catch_mach_exception_raise( int ux_signal = 0; mach_exception_code_t ucode = 0; struct uthread *ut; - mach_port_name_t thread_name = (mach_port_name_t)thread; /* XXX */ - mach_port_name_t task_name = (mach_port_name_t)task; /* XXX */ + mach_port_name_t thread_name = CAST_MACH_PORT_TO_NAME(thread); + mach_port_name_t task_name = CAST_MACH_PORT_TO_NAME(task); /* * Convert local thread name to global port. @@ -345,10 +354,9 @@ catch_mach_exception_raise( result = KERN_INVALID_ARGUMENT; /* - * Delete our send rights to the task and thread ports. + * Delete our send rights to the task port. */ (void)mach_port_deallocate(get_task_ipcspace(ux_handler_self), task_name); - (void)mach_port_deallocate(get_task_ipcspace(ux_handler_self), thread_name); return (result); } diff --git a/bsd/vfs/Makefile b/bsd/vfs/Makefile index 1ed043ac2..3d578ffd7 100644 --- a/bsd/vfs/Makefile +++ b/bsd/vfs/Makefile @@ -20,7 +20,7 @@ EXPINC_SUBDIRS_PPC = \ EXPINC_SUBDIRS_I386 = \ DATAFILES = \ - vfs_support.h vfs_journal.h + vfs_support.h INSTALL_MI_LIST = ${DATAFILES} diff --git a/bsd/vfs/kpi_vfs.c b/bsd/vfs/kpi_vfs.c index 44c482c8f..50338b255 100644 --- a/bsd/vfs/kpi_vfs.c +++ b/bsd/vfs/kpi_vfs.c @@ -98,6 +98,7 @@ #include #include #include +#include #include #include #include @@ -126,20 +127,35 @@ #define COMPAT_ONLY +#ifndef __LP64__ #define THREAD_SAFE_FS(VP) \ ((VP)->v_unsafefs ? 0 : 1) +#endif /* __LP64__ */ #define NATIVE_XATTR(VP) \ ((VP)->v_mount ? (VP)->v_mount->mnt_kern_flag & MNTK_EXTENDED_ATTRS : 0) static void xattrfile_remove(vnode_t dvp, const char *basename, - vfs_context_t ctx, int thread_safe, int force); + vfs_context_t ctx, int force); static void xattrfile_setattr(vnode_t dvp, const char * basename, - struct vnode_attr * vap, vfs_context_t ctx, - int thread_safe); + struct vnode_attr * vap, vfs_context_t ctx); - -static void +/* + * vnode_setneedinactive + * + * Description: Indicate that when the last iocount on this vnode goes away, + * and the usecount is also zero, we should inform the filesystem + * via VNOP_INACTIVE. + * + * Parameters: vnode_t vnode to mark + * + * Returns: Nothing + * + * Notes: Notably used when we're deleting a file--we need not have a + * usecount, so VNOP_INACTIVE may not get called by anyone. We + * want it called when we drop our iocount. + */ +void vnode_setneedinactive(vnode_t vp) { cache_purge(vp); @@ -150,6 +166,7 @@ vnode_setneedinactive(vnode_t vp) } +#ifndef __LP64__ int lock_fsnode(vnode_t vp, int *funnel_state) { @@ -189,6 +206,7 @@ unlock_fsnode(vnode_t vp, int *funnel_state) if (funnel_state) (void) thread_funnel_set(kernel_flock, *funnel_state); } +#endif /* __LP64__ */ @@ -197,24 +215,26 @@ unlock_fsnode(vnode_t vp, int *funnel_state) /* ====================================================================== */ /* - * prototypes for exported VFS operations + * implementations of exported VFS operations */ int VFS_MOUNT(mount_t mp, vnode_t devvp, user_addr_t data, vfs_context_t ctx) { int error; +#ifndef __LP64__ int thread_safe; int funnel_state = 0; +#endif /* __LP64__ */ if ((mp == dead_mountp) || (mp->mnt_op->vfs_mount == 0)) return(ENOTSUP); - thread_safe = mp->mnt_vtable->vfc_threadsafe; - - +#ifndef __LP64__ + thread_safe = (mp->mnt_vtable->vfc_vfsflags & VFC_VFSTHREADSAFE); if (!thread_safe) { funnel_state = thread_funnel_set(kernel_flock, TRUE); } +#endif /* __LP64__ */ if (vfs_context_is64bit(ctx)) { if (vfs_64bitready(mp)) { @@ -228,9 +248,12 @@ VFS_MOUNT(mount_t mp, vnode_t devvp, user_addr_t data, vfs_context_t ctx) error = (*mp->mnt_op->vfs_mount)(mp, devvp, data, ctx); } +#ifndef __LP64__ if (!thread_safe) { (void) thread_funnel_set(kernel_flock, funnel_state); } +#endif /* __LP64__ */ + return (error); } @@ -238,21 +261,30 @@ int VFS_START(mount_t mp, int flags, vfs_context_t ctx) { int error; +#ifndef __LP64__ int thread_safe; int funnel_state = 0; +#endif /* __LP64__ */ if ((mp == dead_mountp) || (mp->mnt_op->vfs_start == 0)) return(ENOTSUP); - thread_safe = mp->mnt_vtable->vfc_threadsafe; +#ifndef __LP64__ + thread_safe = (mp->mnt_vtable->vfc_vfsflags & VFC_VFSTHREADSAFE); if (!thread_safe) { funnel_state = thread_funnel_set(kernel_flock, TRUE); } +#endif /* __LP64__ */ + error = (*mp->mnt_op->vfs_start)(mp, flags, ctx); + +#ifndef __LP64__ if (!thread_safe) { (void) thread_funnel_set(kernel_flock, funnel_state); } +#endif /* __LP64__ */ + return (error); } @@ -260,21 +292,30 @@ int VFS_UNMOUNT(mount_t mp, int flags, vfs_context_t ctx) { int error; +#ifndef __LP64__ int thread_safe; int funnel_state = 0; +#endif /* __LP64__ */ if ((mp == dead_mountp) || (mp->mnt_op->vfs_unmount == 0)) return(ENOTSUP); - thread_safe = mp->mnt_vtable->vfc_threadsafe; +#ifndef __LP64__ + thread_safe = (mp->mnt_vtable->vfc_vfsflags & VFC_VFSTHREADSAFE); if (!thread_safe) { funnel_state = thread_funnel_set(kernel_flock, TRUE); } +#endif /* __LP64__ */ + error = (*mp->mnt_op->vfs_unmount)(mp, flags, ctx); + +#ifndef __LP64__ if (!thread_safe) { (void) thread_funnel_set(kernel_flock, funnel_state); } +#endif /* __LP64__ */ + return (error); } @@ -299,8 +340,10 @@ int VFS_ROOT(mount_t mp, struct vnode ** vpp, vfs_context_t ctx) { int error; +#ifndef __LP64__ int thread_safe; int funnel_state = 0; +#endif /* __LP64__ */ if ((mp == dead_mountp) || (mp->mnt_op->vfs_root == 0)) return(ENOTSUP); @@ -308,15 +351,22 @@ VFS_ROOT(mount_t mp, struct vnode ** vpp, vfs_context_t ctx) if (ctx == NULL) { ctx = vfs_context_current(); } - thread_safe = mp->mnt_vtable->vfc_threadsafe; +#ifndef __LP64__ + thread_safe = (mp->mnt_vtable->vfc_vfsflags & VFC_VFSTHREADSAFE); if (!thread_safe) { funnel_state = thread_funnel_set(kernel_flock, TRUE); } +#endif /* __LP64__ */ + error = (*mp->mnt_op->vfs_root)(mp, vpp, ctx); + +#ifndef __LP64__ if (!thread_safe) { (void) thread_funnel_set(kernel_flock, funnel_state); } +#endif /* __LP64__ */ + return (error); } @@ -324,21 +374,29 @@ int VFS_QUOTACTL(mount_t mp, int cmd, uid_t uid, caddr_t datap, vfs_context_t ctx) { int error; +#ifndef __LP64__ int thread_safe; int funnel_state = 0; +#endif /* __LP64__ */ if ((mp == dead_mountp) || (mp->mnt_op->vfs_quotactl == 0)) return(ENOTSUP); - thread_safe = mp->mnt_vtable->vfc_threadsafe; - +#ifndef __LP64__ + thread_safe = (mp->mnt_vtable->vfc_vfsflags & VFC_VFSTHREADSAFE); if (!thread_safe) { funnel_state = thread_funnel_set(kernel_flock, TRUE); } +#endif /* __LP64__ */ + error = (*mp->mnt_op->vfs_quotactl)(mp, cmd, uid, datap, ctx); + +#ifndef __LP64__ if (!thread_safe) { (void) thread_funnel_set(kernel_flock, funnel_state); } +#endif /* __LP64__ */ + return (error); } @@ -346,8 +404,10 @@ int VFS_GETATTR(mount_t mp, struct vfs_attr *vfa, vfs_context_t ctx) { int error; +#ifndef __LP64__ int thread_safe; int funnel_state = 0; +#endif /* __LP64__ */ if ((mp == dead_mountp) || (mp->mnt_op->vfs_getattr == 0)) return(ENOTSUP); @@ -356,15 +416,21 @@ VFS_GETATTR(mount_t mp, struct vfs_attr *vfa, vfs_context_t ctx) ctx = vfs_context_current(); } - thread_safe = mp->mnt_vtable->vfc_threadsafe; - +#ifndef __LP64__ + thread_safe = (mp->mnt_vtable->vfc_vfsflags & VFC_VFSTHREADSAFE); if (!thread_safe) { funnel_state = thread_funnel_set(kernel_flock, TRUE); } +#endif /* __LP64__ */ + error = (*mp->mnt_op->vfs_getattr)(mp, vfa, ctx); + +#ifndef __LP64__ if (!thread_safe) { (void) thread_funnel_set(kernel_flock, funnel_state); } +#endif /* __LP64__ */ + return(error); } @@ -372,8 +438,10 @@ int VFS_SETATTR(mount_t mp, struct vfs_attr *vfa, vfs_context_t ctx) { int error; +#ifndef __LP64__ int thread_safe; int funnel_state = 0; +#endif /* __LP64__ */ if ((mp == dead_mountp) || (mp->mnt_op->vfs_setattr == 0)) return(ENOTSUP); @@ -382,15 +450,21 @@ VFS_SETATTR(mount_t mp, struct vfs_attr *vfa, vfs_context_t ctx) ctx = vfs_context_current(); } - thread_safe = mp->mnt_vtable->vfc_threadsafe; - +#ifndef __LP64__ + thread_safe = (mp->mnt_vtable->vfc_vfsflags & VFC_VFSTHREADSAFE); if (!thread_safe) { funnel_state = thread_funnel_set(kernel_flock, TRUE); } +#endif /* __LP64__ */ + error = (*mp->mnt_op->vfs_setattr)(mp, vfa, ctx); + +#ifndef __LP64__ if (!thread_safe) { (void) thread_funnel_set(kernel_flock, funnel_state); } +#endif /* __LP64__ */ + return(error); } @@ -398,8 +472,10 @@ int VFS_SYNC(mount_t mp, int flags, vfs_context_t ctx) { int error; +#ifndef __LP64__ int thread_safe; int funnel_state = 0; +#endif /* __LP64__ */ if ((mp == dead_mountp) || (mp->mnt_op->vfs_sync == 0)) return(ENOTSUP); @@ -407,15 +483,22 @@ VFS_SYNC(mount_t mp, int flags, vfs_context_t ctx) if (ctx == NULL) { ctx = vfs_context_current(); } - thread_safe = mp->mnt_vtable->vfc_threadsafe; +#ifndef __LP64__ + thread_safe = (mp->mnt_vtable->vfc_vfsflags & VFC_VFSTHREADSAFE); if (!thread_safe) { funnel_state = thread_funnel_set(kernel_flock, TRUE); } +#endif /* __LP64__ */ + error = (*mp->mnt_op->vfs_sync)(mp, flags, ctx); + +#ifndef __LP64__ if (!thread_safe) { (void) thread_funnel_set(kernel_flock, funnel_state); } +#endif /* __LP64__ */ + return(error); } @@ -423,8 +506,10 @@ int VFS_VGET(mount_t mp, ino64_t ino, struct vnode **vpp, vfs_context_t ctx) { int error; +#ifndef __LP64__ int thread_safe; int funnel_state = 0; +#endif /* __LP64__ */ if ((mp == dead_mountp) || (mp->mnt_op->vfs_vget == 0)) return(ENOTSUP); @@ -432,15 +517,22 @@ VFS_VGET(mount_t mp, ino64_t ino, struct vnode **vpp, vfs_context_t ctx) if (ctx == NULL) { ctx = vfs_context_current(); } - thread_safe = mp->mnt_vtable->vfc_threadsafe; +#ifndef __LP64__ + thread_safe = (mp->mnt_vtable->vfc_vfsflags & VFC_VFSTHREADSAFE); if (!thread_safe) { funnel_state = thread_funnel_set(kernel_flock, TRUE); } +#endif /* __LP64__ */ + error = (*mp->mnt_op->vfs_vget)(mp, ino, vpp, ctx); + +#ifndef __LP64__ if (!thread_safe) { (void) thread_funnel_set(kernel_flock, funnel_state); } +#endif /* __LP64__ */ + return(error); } @@ -448,8 +540,10 @@ int VFS_FHTOVP(mount_t mp, int fhlen, unsigned char * fhp, vnode_t * vpp, vfs_context_t ctx) { int error; +#ifndef __LP64__ int thread_safe; int funnel_state = 0; +#endif /* __LP64__ */ if ((mp == dead_mountp) || (mp->mnt_op->vfs_fhtovp == 0)) return(ENOTSUP); @@ -457,15 +551,22 @@ VFS_FHTOVP(mount_t mp, int fhlen, unsigned char * fhp, vnode_t * vpp, vfs_contex if (ctx == NULL) { ctx = vfs_context_current(); } - thread_safe = mp->mnt_vtable->vfc_threadsafe; +#ifndef __LP64__ + thread_safe = (mp->mnt_vtable->vfc_vfsflags & VFC_VFSTHREADSAFE); if (!thread_safe) { funnel_state = thread_funnel_set(kernel_flock, TRUE); } +#endif /* __LP64__ */ + error = (*mp->mnt_op->vfs_fhtovp)(mp, fhlen, fhp, vpp, ctx); + +#ifndef __LP64__ if (!thread_safe) { (void) thread_funnel_set(kernel_flock, funnel_state); } +#endif /* __LP64__ */ + return(error); } @@ -473,8 +574,10 @@ int VFS_VPTOFH(struct vnode * vp, int *fhlenp, unsigned char * fhp, vfs_context_t ctx) { int error; +#ifndef __LP64__ int thread_safe; int funnel_state = 0; +#endif /* __LP64__ */ if ((vp->v_mount == dead_mountp) || (vp->v_mount->mnt_op->vfs_vptofh == 0)) return(ENOTSUP); @@ -482,15 +585,22 @@ VFS_VPTOFH(struct vnode * vp, int *fhlenp, unsigned char * fhp, vfs_context_t ct if (ctx == NULL) { ctx = vfs_context_current(); } - thread_safe = THREAD_SAFE_FS(vp); +#ifndef __LP64__ + thread_safe = THREAD_SAFE_FS(vp); if (!thread_safe) { funnel_state = thread_funnel_set(kernel_flock, TRUE); } +#endif /* __LP64__ */ + error = (*vp->v_mount->mnt_op->vfs_vptofh)(vp, fhlenp, fhp, ctx); + +#ifndef __LP64__ if (!thread_safe) { (void) thread_funnel_set(kernel_flock, funnel_state); } +#endif /* __LP64__ */ + return(error); } @@ -509,6 +619,12 @@ vfs_typenum(mount_t mp) return(mp->mnt_vtable->vfc_typenum); } +/* Safe to cast to "struct label*"; returns "void*" to limit dependence of mount.h on security headers. */ +void* +vfs_mntlabel(mount_t mp) +{ + return (void*)mp->mnt_mntlabel; +} /* returns command modifier flags of mount_t ie. MNT_CMDFLAGS */ uint64_t @@ -584,7 +700,7 @@ vfs_isreload(mount_t mp) return ((mp->mnt_flag & MNT_UPDATE) && (mp->mnt_flag & MNT_RELOAD)); } -/* Is mount_t marked for reload (ie MNT_FORCE) */ +/* Is mount_t marked for forced unmount (ie MNT_FORCE or MNTK_FRCUNMOUNT) */ int vfs_isforce(mount_t mp) { @@ -594,10 +710,20 @@ vfs_isforce(mount_t mp) return(0); } +int +vfs_isunmount(mount_t mp) +{ + if ((mp->mnt_lflag & MNT_LUNMOUNT)) { + return 1; + } else { + return 0; + } +} + int vfs_64bitready(mount_t mp) { - if ((mp->mnt_vtable->vfc_64bitready)) + if ((mp->mnt_vtable->vfc_vfsflags & VFC_VFS64BITREADY)) return(1); else return(0); @@ -803,6 +929,19 @@ vfs_devblocksize(mount_t mp) { return(mp->mnt_devblocksize); } +/* + * Returns vnode with an iocount that must be released with vnode_put() + */ +vnode_t +vfs_vnodecovered(mount_t mp) +{ + vnode_t vp = mp->mnt_vnodecovered; + if ((vp == NULL) || (vnode_getwithref(vp) != 0)) { + return NULL; + } else { + return vp; + } +} /* * return the io attributes associated with mount_t @@ -882,10 +1021,16 @@ vfs_fsadd(struct vfs_fsentry *vfe, vfstable_t * handle) return(EINVAL); desccount = vfe->vfe_vopcnt; - if ((desccount <=0) || ((desccount > 5)) || (vfe->vfe_vfsops == (struct vfsops *)NULL) + if ((desccount <=0) || ((desccount > 8)) || (vfe->vfe_vfsops == (struct vfsops *)NULL) || (vfe->vfe_opvdescs == (struct vnodeopv_desc **)NULL)) return(EINVAL); +#ifdef __LP64__ + /* Non-threadsafe filesystems are not supported for K64 */ + if ((vfe->vfe_flags & (VFS_TBLTHREADSAFE | VFS_TBLFSNODELOCK)) == 0) { + return (EINVAL); + } +#endif /* __LP64__ */ MALLOC(newvfstbl, void *, sizeof(struct vfstable), M_TEMP, M_WAITOK); @@ -901,14 +1046,19 @@ vfs_fsadd(struct vfs_fsentry *vfe, vfstable_t * handle) newvfstbl->vfc_flags = 0; newvfstbl->vfc_mountroot = NULL; newvfstbl->vfc_next = NULL; - newvfstbl->vfc_threadsafe = 0; newvfstbl->vfc_vfsflags = 0; if (vfe->vfe_flags & VFS_TBL64BITREADY) - newvfstbl->vfc_64bitready= 1; + newvfstbl->vfc_vfsflags |= VFC_VFS64BITREADY; + if (vfe->vfe_flags & VFS_TBLVNOP_PAGEINV2) + newvfstbl->vfc_vfsflags |= VFC_VFSVNOP_PAGEINV2; + if (vfe->vfe_flags & VFS_TBLVNOP_PAGEOUTV2) + newvfstbl->vfc_vfsflags |= VFC_VFSVNOP_PAGEOUTV2; +#ifndef __LP64__ if (vfe->vfe_flags & VFS_TBLTHREADSAFE) - newvfstbl->vfc_threadsafe= 1; + newvfstbl->vfc_vfsflags |= VFC_VFSTHREADSAFE; if (vfe->vfe_flags & VFS_TBLFSNODELOCK) - newvfstbl->vfc_threadsafe= 1; + newvfstbl->vfc_vfsflags |= VFC_VFSTHREADSAFE; +#endif /* __LP64__ */ if ((vfe->vfe_flags & VFS_TBLLOCALVOL) == VFS_TBLLOCALVOL) newvfstbl->vfc_flags |= MNT_LOCAL; if ((vfe->vfe_flags & VFS_TBLLOCALVOL) && (vfe->vfe_flags & VFS_TBLGENERICMNTARGS) == 0) @@ -958,7 +1108,7 @@ vfs_fsadd(struct vfs_fsentry *vfe, vfstable_t * handle) /* * Sanity check: is this operation listed * in the list of operations? We check this - * by seeing if its offest is zero. Since + * by seeing if its offset is zero. Since * the default routine should always be listed * first, it should be the only one with a zero * offset. Any other operation with a zero @@ -1013,10 +1163,20 @@ vfs_fsadd(struct vfs_fsentry *vfe, vfstable_t * handle) if (newvfstbl->vfc_typenum <= maxvfsconf ) maxvfsconf = newvfstbl->vfc_typenum + 1; - numused_vfsslots++; - if (newvfstbl->vfc_vfsops->vfs_init) - (*newvfstbl->vfc_vfsops->vfs_init)((struct vfsconf *)handle); + if (newvfstbl->vfc_vfsops->vfs_init) { + struct vfsconf vfsc; + bzero(&vfsc, sizeof(struct vfsconf)); + vfsc.vfc_reserved1 = 0; + bcopy((*handle)->vfc_name, vfsc.vfc_name, sizeof(vfsc.vfc_name)); + vfsc.vfc_typenum = (*handle)->vfc_typenum; + vfsc.vfc_refcount = (*handle)->vfc_refcount; + vfsc.vfc_flags = (*handle)->vfc_flags; + vfsc.vfc_reserved2 = 0; + vfsc.vfc_reserved3 = 0; + + (*newvfstbl->vfc_vfsops->vfs_init)(&vfsc); + } FREE(newvfstbl, M_TEMP); @@ -1041,7 +1201,6 @@ vfs_fsremove(vfstable_t handle) mount_list_unlock(); return EBUSY; } - mount_list_unlock(); /* * save the old descriptor; the free cannot occur unconditionally, @@ -1052,6 +1211,8 @@ vfs_fsremove(vfstable_t handle) } err = vfstable_del(vfstbl); + mount_list_unlock(); + /* free the descriptor if the delete was successful */ if (err == 0 && old_desc) { FREE(old_desc, M_TEMP); @@ -1060,25 +1221,6 @@ vfs_fsremove(vfstable_t handle) return(err); } -/* - * This returns a reference to mount_t - * which should be dropped using vfs_mountrele(). - * Not doing so will leak a mountpoint - * and associated data structures. - */ -errno_t -vfs_mountref(__unused mount_t mp ) /* gives a reference */ -{ - return(0); -} - -/* This drops the reference on mount_t that was acquired */ -errno_t -vfs_mountrele(__unused mount_t mp ) /* drops reference */ -{ - return(0); -} - int vfs_context_pid(vfs_context_t ctx) { @@ -1092,6 +1234,8 @@ vfs_context_suser(vfs_context_t ctx) } /* + * Return bit field of signals posted to all threads in the context's process. + * * XXX Signals should be tied to threads, not processes, for most uses of this * XXX call. */ @@ -1259,7 +1403,19 @@ vfs_context_cwd(vfs_context_t ctx) return(cwd); } - +/* + * vfs_context_create + * + * Description: Allocate and initialize a new context. + * + * Parameters: vfs_context_t: Context to copy, or NULL for new + * + * Returns: Pointer to new context + * + * Notes: Copy cred and thread from argument, if available; else + * initialize with current thread and new cred. Returns + * with a reference held on the credential. + */ vfs_context_t vfs_context_create(vfs_context_t ctx) { @@ -1343,7 +1499,7 @@ vfs_context_rele(vfs_context_t ctx) } -ucred_t +kauth_cred_t vfs_context_ucred(vfs_context_t ctx) { return (ctx->vc_ucred); @@ -1358,6 +1514,26 @@ vfs_context_issuser(vfs_context_t ctx) return(kauth_cred_issuser(vfs_context_ucred(ctx))); } +/* + * Given a context, for all fields of vfs_context_t which + * are not held with a reference, set those fields to the + * values for the current execution context. Currently, this + * just means the vc_thread. + * + * Returns: 0 for success, nonzero for failure + * + * The intended use is: + * 1. vfs_context_create() gets the caller a context + * 2. vfs_context_bind() sets the unrefcounted data + * 3. vfs_context_rele() releases the context + * + */ +int +vfs_context_bind(vfs_context_t ctx) +{ + ctx->vc_thread = current_thread(); + return 0; +} /* XXXXXXXXXXXXXX VNODE KAPIS XXXXXXXXXXXXXXXXXXXXXXXXX */ @@ -1389,7 +1565,7 @@ vnode_makeimode(int indx, int mode) * vnode manipulation functions. */ -/* returns system root vnode reference; It should be dropped using vrele() */ +/* returns system root vnode iocount; It should be released using vnode_put() */ vnode_t vfs_rootvnode(void) { @@ -1409,14 +1585,12 @@ vnode_vid(vnode_t vp) return ((uint32_t)(vp->v_id)); } -/* returns a mount reference; drop it with vfs_mountrelease() */ mount_t vnode_mount(vnode_t vp) { return (vp->v_mount); } -/* returns a mount reference iff vnode_t is a dir and is a mount point */ mount_t vnode_mountedhere(vnode_t vp) { @@ -1478,6 +1652,13 @@ vnode_isswap(vnode_t vp) return ((vp->v_flag & VSWAP)? 1 : 0); } +/* is vnode_t a tty */ +int +vnode_istty(vnode_t vp) +{ + return ((vp->v_flag & VISTTY) ? 1 : 0); +} + /* if vnode_t mount operation in progress */ int vnode_ismount(vnode_t vp) @@ -1497,6 +1678,14 @@ vnode_isrecycled(vnode_t vp) return(ret); } +/* vnode was created by background task requesting rapid aging + and has not since been referenced by a normal task */ +int +vnode_israge(vnode_t vp) +{ + return ((vp->v_flag & VRAGE)? 1 : 0); +} + /* is vnode_t marked to not keep data cached once it's been consumed */ int vnode_isnocache(vnode_t vp) @@ -1568,6 +1757,12 @@ vnode_isblk(vnode_t vp) return ((vp->v_type == VBLK)? 1 : 0); } +int +vnode_isspec(vnode_t vp) +{ + return (((vp->v_type == VCHR) || (vp->v_type == VBLK)) ? 1 : 0); +} + /* is vnode_t a char device? */ int vnode_ischr(vnode_t vp) @@ -1582,6 +1777,18 @@ vnode_issock(vnode_t vp) return ((vp->v_type == VSOCK)? 1 : 0); } +/* is vnode_t a device with multiple active vnodes referring to it? */ +int +vnode_isaliased(vnode_t vp) +{ + enum vtype vt = vp->v_type; + if (!((vt == VCHR) || (vt == VBLK))) { + return 0; + } else { + return (vp->v_specflags & SI_ALIASED); + } +} + /* is vnode_t a named stream? */ int vnode_isnamedstream( @@ -1599,22 +1806,38 @@ vnode_isnamedstream( #endif } -int +int vnode_isshadow( #if NAMEDSTREAMS - vnode_t vp + vnode_t vp #else - __unused vnode_t vp + __unused vnode_t vp #endif - ) + ) { #if NAMEDSTREAMS - return ((vp->v_flag & VISSHADOW) ? 1 : 0); + return ((vp->v_flag & VISSHADOW) ? 1 : 0); #else - return (0); + return (0); #endif } +/* does vnode have associated named stream vnodes ? */ +int +vnode_hasnamedstreams( +#if NAMEDSTREAMS + vnode_t vp +#else + __unused vnode_t vp +#endif + ) +{ +#if NAMEDSTREAMS + return ((vp->v_lflag & VL_HASSTREAMS) ? 1 : 0); +#else + return (0); +#endif +} /* TBD: set vnode_t to not cache data after it is consumed once; used for quota */ void vnode_setnocache(vnode_t vp) @@ -1765,7 +1988,10 @@ int vnode_vfs64bitready(vnode_t vp) { - if ((vp->v_mount->mnt_vtable->vfc_64bitready)) + /* + * Checking for dead_mountp is a bit of a hack for SnowLeopard: + */ + if ((vp->v_mount != dead_mountp) && (vp->v_mount->mnt_vtable->vfc_vfsflags & VFC_VFS64BITREADY)) return(1); else return(0); @@ -2038,7 +2264,7 @@ vnode_set_filesec(vnode_t vp, kauth_filesec_t fsec, kauth_acl_t acl, vfs_context kauth_filesec_acl_setendian(KAUTH_ENDIAN_DISK, fsec, acl); - uio_addiov(fsec_uio, CAST_USER_ADDR_T(fsec), sizeof(struct kauth_filesec) - sizeof(struct kauth_acl)); + uio_addiov(fsec_uio, CAST_USER_ADDR_T(fsec), KAUTH_FILESEC_SIZE(0) - KAUTH_ACL_SIZE(KAUTH_FILESEC_NOACL)); uio_addiov(fsec_uio, CAST_USER_ADDR_T(acl), saved_acl_copysize); error = vn_setxattr(vp, KAUTH_FILESEC_XATTR, @@ -2350,8 +2576,8 @@ vnode_setattr(vnode_t vp, struct vnode_attr *vap, vfs_context_t ctx) #if CONFIG_FSE // only send a stat_changed event if this is more than - // just an access time update - if (error == 0 && (vap->va_active != VNODE_ATTR_BIT(va_access_time))) { + // just an access or backup time update + if (error == 0 && (vap->va_active != VNODE_ATTR_BIT(va_access_time)) && (vap->va_active != VNODE_ATTR_BIT(va_backup_time))) { if (is_perm_change) { if (need_fsevent(FSE_CHOWN, vp)) { add_fsevent(FSE_CHOWN, ctx, FSE_ARG_VNODE, vp, FSE_ARG_DONE); @@ -2505,6 +2731,86 @@ vnode_setattr_fallback(vnode_t vp, struct vnode_attr *vap, vfs_context_t ctx) return(error); } +/* + * Upcall for a filesystem to tell VFS about an EVFILT_VNODE-type + * event on a vnode. + */ +int +vnode_notify(vnode_t vp, uint32_t events, struct vnode_attr *vap) +{ + /* These are the same as the corresponding knotes, at least for now. Cheating a little. */ + uint32_t knote_mask = (VNODE_EVENT_WRITE | VNODE_EVENT_DELETE | VNODE_EVENT_RENAME + | VNODE_EVENT_LINK | VNODE_EVENT_EXTEND | VNODE_EVENT_ATTRIB); + uint32_t dir_contents_mask = (VNODE_EVENT_DIR_CREATED | VNODE_EVENT_FILE_CREATED + | VNODE_EVENT_DIR_REMOVED | VNODE_EVENT_FILE_REMOVED); + uint32_t knote_events = (events & knote_mask); + + /* Permissions are not explicitly part of the kqueue model */ + if (events & VNODE_EVENT_PERMS) { + knote_events |= NOTE_ATTRIB; + } + + /* Directory contents information just becomes NOTE_WRITE */ + if ((vnode_isdir(vp)) && (events & dir_contents_mask)) { + knote_events |= NOTE_WRITE; + } + + if (knote_events) { + lock_vnode_and_post(vp, knote_events); +#if CONFIG_FSE + if (vap != NULL) { + create_fsevent_from_kevent(vp, events, vap); + } +#else + (void)vap; +#endif + } + + return 0; +} + +/* + * For a filesystem that isn't tracking its own vnode watchers: + * check whether a vnode is being monitored. + */ +int +vnode_ismonitored(vnode_t vp) { + return (vp->v_knotes.slh_first != NULL); +} + +/* + * Conceived as a function available only in BSD kernel so that if kevent_register + * changes what a knote of type EVFILT_VNODE is watching, it can push + * that updated information down to a networked filesystem that may + * need to update server-side monitoring. + * + * Blunted to do nothing--because we want to get both kqueue and fsevents support + * from the VNOP_MONITOR design, we always want all the events a filesystem can provide us. + */ +void +vnode_knoteupdate(__unused struct knote *kn) +{ +#if 0 + vnode_t vp = (vnode_t)kn->kn_hook; + if (vnode_getwithvid(vp, kn->kn_hookid) == 0) { + VNOP_MONITOR(vp, kn->kn_sfflags, VNODE_MONITOR_UPDATE, (void*)kn, NULL); + vnode_put(vp); + } +#endif +} + +/* + * Initialize a struct vnode_attr and activate the attributes required + * by the vnode_notify() call. + */ +int +vfs_get_notify_attributes(struct vnode_attr *vap) +{ + VATTR_INIT(vap); + vap->va_active = VNODE_NOTIFY_ATTRS; + return 0; +} + /* * Definition of vnode operations. */ @@ -2552,25 +2858,31 @@ VNOP_LOOKUP(vnode_t dvp, vnode_t *vpp, struct componentname *cnp, vfs_context_t int _err; struct vnop_lookup_args a; vnode_t vp; +#ifndef __LP64__ int thread_safe; int funnel_state = 0; +#endif /* __LP64__ */ a.a_desc = &vnop_lookup_desc; a.a_dvp = dvp; a.a_vpp = vpp; a.a_cnp = cnp; a.a_context = ctx; - thread_safe = THREAD_SAFE_FS(dvp); +#ifndef __LP64__ + thread_safe = THREAD_SAFE_FS(dvp); if (!thread_safe) { if ( (_err = lock_fsnode(dvp, &funnel_state)) ) { return (_err); } } +#endif /* __LP64__ */ + _err = (*dvp->v_op[vnop_lookup_desc.vdesc_offset])(&a); vp = *vpp; +#ifndef __LP64__ if (!thread_safe) { if ( (cnp->cn_flags & ISLASTCN) ) { if ( (cnp->cn_flags & LOCKPARENT) ) { @@ -2591,6 +2903,8 @@ VNOP_LOOKUP(vnode_t dvp, vnode_t *vpp, struct componentname *cnp, vfs_context_t } unlock_fsnode(dvp, &funnel_state); } +#endif /* __LP64__ */ + return (_err); } @@ -2616,8 +2930,10 @@ VNOP_CREATE(vnode_t dvp, vnode_t * vpp, struct componentname * cnp, struct vnode { int _err; struct vnop_create_args a; +#ifndef __LP64__ int thread_safe; int funnel_state = 0; +#endif /* __LP64__ */ a.a_desc = &vnop_create_desc; a.a_dvp = dvp; @@ -2625,23 +2941,32 @@ VNOP_CREATE(vnode_t dvp, vnode_t * vpp, struct componentname * cnp, struct vnode a.a_cnp = cnp; a.a_vap = vap; a.a_context = ctx; - thread_safe = THREAD_SAFE_FS(dvp); +#ifndef __LP64__ + thread_safe = THREAD_SAFE_FS(dvp); if (!thread_safe) { if ( (_err = lock_fsnode(dvp, &funnel_state)) ) { return (_err); } } +#endif /* __LP64__ */ + _err = (*dvp->v_op[vnop_create_desc.vdesc_offset])(&a); if (_err == 0 && !NATIVE_XATTR(dvp)) { /* * Remove stale Apple Double file (if any). */ - xattrfile_remove(dvp, cnp->cn_nameptr, ctx, thread_safe, 0); + xattrfile_remove(dvp, cnp->cn_nameptr, ctx, 0); } + +#ifndef __LP64__ if (!thread_safe) { unlock_fsnode(dvp, &funnel_state); } +#endif /* __LP64__ */ + + post_event_if_success(dvp, _err, NOTE_WRITE); + return (_err); } @@ -2666,25 +2991,36 @@ VNOP_WHITEOUT(vnode_t dvp, struct componentname * cnp, int flags, vfs_context_t { int _err; struct vnop_whiteout_args a; +#ifndef __LP64__ int thread_safe; int funnel_state = 0; +#endif /* __LP64__ */ a.a_desc = &vnop_whiteout_desc; a.a_dvp = dvp; a.a_cnp = cnp; a.a_flags = flags; a.a_context = ctx; - thread_safe = THREAD_SAFE_FS(dvp); +#ifndef __LP64__ + thread_safe = THREAD_SAFE_FS(dvp); if (!thread_safe) { if ( (_err = lock_fsnode(dvp, &funnel_state)) ) { return (_err); } } +#endif /* __LP64__ */ + _err = (*dvp->v_op[vnop_whiteout_desc.vdesc_offset])(&a); + +#ifndef __LP64__ if (!thread_safe) { unlock_fsnode(dvp, &funnel_state); } +#endif /* __LP64__ */ + + post_event_if_success(dvp, _err, NOTE_WRITE); + return (_err); } @@ -2710,8 +3046,10 @@ VNOP_MKNOD(vnode_t dvp, vnode_t * vpp, struct componentname * cnp, struct vnode_ int _err; struct vnop_mknod_args a; - int thread_safe; - int funnel_state = 0; +#ifndef __LP64__ + int thread_safe; + int funnel_state = 0; +#endif /* __LP64__ */ a.a_desc = &vnop_mknod_desc; a.a_dvp = dvp; @@ -2719,17 +3057,26 @@ VNOP_MKNOD(vnode_t dvp, vnode_t * vpp, struct componentname * cnp, struct vnode_ a.a_cnp = cnp; a.a_vap = vap; a.a_context = ctx; - thread_safe = THREAD_SAFE_FS(dvp); +#ifndef __LP64__ + thread_safe = THREAD_SAFE_FS(dvp); if (!thread_safe) { if ( (_err = lock_fsnode(dvp, &funnel_state)) ) { return (_err); } } +#endif /* __LP64__ */ + _err = (*dvp->v_op[vnop_mknod_desc.vdesc_offset])(&a); + +#ifndef __LP64__ if (!thread_safe) { unlock_fsnode(dvp, &funnel_state); } +#endif /* __LP64__ */ + + post_event_if_success(dvp, _err, NOTE_WRITE); + return (_err); } @@ -2751,8 +3098,10 @@ VNOP_OPEN(vnode_t vp, int mode, vfs_context_t ctx) { int _err; struct vnop_open_args a; +#ifndef __LP64__ int thread_safe; int funnel_state = 0; +#endif /* __LP64__ */ if (ctx == NULL) { ctx = vfs_context_current(); @@ -2761,8 +3110,9 @@ VNOP_OPEN(vnode_t vp, int mode, vfs_context_t ctx) a.a_vp = vp; a.a_mode = mode; a.a_context = ctx; - thread_safe = THREAD_SAFE_FS(vp); +#ifndef __LP64__ + thread_safe = THREAD_SAFE_FS(vp); if (!thread_safe) { funnel_state = thread_funnel_set(kernel_flock, TRUE); if (vp->v_type != VCHR && vp->v_type != VFIFO && vp->v_type != VSOCK) { @@ -2772,13 +3122,19 @@ VNOP_OPEN(vnode_t vp, int mode, vfs_context_t ctx) } } } +#endif /* __LP64__ */ + _err = (*vp->v_op[vnop_open_desc.vdesc_offset])(&a); + +#ifndef __LP64__ if (!thread_safe) { if (vp->v_type != VCHR && vp->v_type != VFIFO && vp->v_type != VSOCK) { unlock_fsnode(vp, NULL); } (void) thread_funnel_set(kernel_flock, funnel_state); } +#endif /* __LP64__ */ + return (_err); } @@ -2800,8 +3156,10 @@ VNOP_CLOSE(vnode_t vp, int fflag, vfs_context_t ctx) { int _err; struct vnop_close_args a; +#ifndef __LP64__ int thread_safe; int funnel_state = 0; +#endif /* __LP64__ */ if (ctx == NULL) { ctx = vfs_context_current(); @@ -2810,8 +3168,9 @@ VNOP_CLOSE(vnode_t vp, int fflag, vfs_context_t ctx) a.a_vp = vp; a.a_fflag = fflag; a.a_context = ctx; - thread_safe = THREAD_SAFE_FS(vp); +#ifndef __LP64__ + thread_safe = THREAD_SAFE_FS(vp); if (!thread_safe) { funnel_state = thread_funnel_set(kernel_flock, TRUE); if (vp->v_type != VCHR && vp->v_type != VFIFO && vp->v_type != VSOCK) { @@ -2821,13 +3180,19 @@ VNOP_CLOSE(vnode_t vp, int fflag, vfs_context_t ctx) } } } +#endif /* __LP64__ */ + _err = (*vp->v_op[vnop_close_desc.vdesc_offset])(&a); + +#ifndef __LP64__ if (!thread_safe) { if (vp->v_type != VCHR && vp->v_type != VFIFO && vp->v_type != VSOCK) { unlock_fsnode(vp, NULL); } (void) thread_funnel_set(kernel_flock, funnel_state); } +#endif /* __LP64__ */ + return (_err); } @@ -2849,8 +3214,10 @@ VNOP_ACCESS(vnode_t vp, int action, vfs_context_t ctx) { int _err; struct vnop_access_args a; +#ifndef __LP64__ int thread_safe; int funnel_state = 0; +#endif /* __LP64__ */ if (ctx == NULL) { ctx = vfs_context_current(); @@ -2859,17 +3226,24 @@ VNOP_ACCESS(vnode_t vp, int action, vfs_context_t ctx) a.a_vp = vp; a.a_action = action; a.a_context = ctx; - thread_safe = THREAD_SAFE_FS(vp); +#ifndef __LP64__ + thread_safe = THREAD_SAFE_FS(vp); if (!thread_safe) { if ( (_err = lock_fsnode(vp, &funnel_state)) ) { return (_err); } } +#endif /* __LP64__ */ + _err = (*vp->v_op[vnop_access_desc.vdesc_offset])(&a); + +#ifndef __LP64__ if (!thread_safe) { unlock_fsnode(vp, &funnel_state); } +#endif /* __LP64__ */ + return (_err); } @@ -2891,24 +3265,33 @@ VNOP_GETATTR(vnode_t vp, struct vnode_attr * vap, vfs_context_t ctx) { int _err; struct vnop_getattr_args a; +#ifndef __LP64__ int thread_safe; - int funnel_state = 0; /* protected by thread_safe */ + int funnel_state = 0; +#endif /* __LP64__ */ a.a_desc = &vnop_getattr_desc; a.a_vp = vp; a.a_vap = vap; a.a_context = ctx; - thread_safe = THREAD_SAFE_FS(vp); +#ifndef __LP64__ + thread_safe = THREAD_SAFE_FS(vp); if (!thread_safe) { if ( (_err = lock_fsnode(vp, &funnel_state)) ) { return (_err); } } +#endif /* __LP64__ */ + _err = (*vp->v_op[vnop_getattr_desc.vdesc_offset])(&a); + +#ifndef __LP64__ if (!thread_safe) { unlock_fsnode(vp, &funnel_state); } +#endif /* __LP64__ */ + return (_err); } @@ -2930,20 +3313,25 @@ VNOP_SETATTR(vnode_t vp, struct vnode_attr * vap, vfs_context_t ctx) { int _err; struct vnop_setattr_args a; +#ifndef __LP64__ int thread_safe; - int funnel_state = 0; /* protected by thread_safe */ + int funnel_state = 0; +#endif /* __LP64__ */ a.a_desc = &vnop_setattr_desc; a.a_vp = vp; a.a_vap = vap; a.a_context = ctx; - thread_safe = THREAD_SAFE_FS(vp); +#ifndef __LP64__ + thread_safe = THREAD_SAFE_FS(vp); if (!thread_safe) { if ( (_err = lock_fsnode(vp, &funnel_state)) ) { return (_err); } } +#endif /* __LP64__ */ + _err = (*vp->v_op[vnop_setattr_desc.vdesc_offset])(&a); /* @@ -2973,16 +3361,20 @@ VNOP_SETATTR(vnode_t vp, struct vnode_attr * vap, vfs_context_t ctx) dvp = vnode_getparent(vp); vname = vnode_getname(vp); - xattrfile_setattr(dvp, vname, &va, ctx, thread_safe); + xattrfile_setattr(dvp, vname, &va, ctx); if (dvp != NULLVP) vnode_put(dvp); if (vname != NULL) vnode_putname(vname); } } + +#ifndef __LP64__ if (!thread_safe) { unlock_fsnode(vp, &funnel_state); } +#endif /* __LP64__ */ + /* * If we have changed any of the things about the file that are likely * to result in changes to authorization results, blow the vnode auth @@ -2995,9 +3387,23 @@ VNOP_SETATTR(vnode_t vp, struct vnode_attr * vap, vfs_context_t ctx) VATTR_IS_SUPPORTED(vap, va_flags) || VATTR_IS_SUPPORTED(vap, va_acl) || VATTR_IS_SUPPORTED(vap, va_uuuid) || - VATTR_IS_SUPPORTED(vap, va_guuid))) + VATTR_IS_SUPPORTED(vap, va_guuid))) { vnode_uncache_authorized_action(vp, KAUTH_INVALIDATE_CACHED_RIGHTS); +#if NAMEDSTREAMS + if (vfs_authopaque(vp->v_mount) && vnode_hasnamedstreams(vp)) { + vnode_t svp; + if (vnode_getnamedstream(vp, &svp, XATTR_RESOURCEFORK_NAME, NS_OPEN, 0, ctx) == 0) { + vnode_uncache_authorized_action(svp, KAUTH_INVALIDATE_CACHED_RIGHTS); + vnode_put(svp); + } + } +#endif /* NAMEDSTREAMS */ + } + + + post_event_if_success(vp, _err, NOTE_ATTRIB); + return (_err); } @@ -3021,8 +3427,10 @@ VNOP_READ(vnode_t vp, struct uio * uio, int ioflag, vfs_context_t ctx) { int _err; struct vnop_read_args a; +#ifndef __LP64__ int thread_safe; int funnel_state = 0; +#endif /* __LP64__ */ if (ctx == NULL) { ctx = vfs_context_current(); @@ -3033,8 +3441,9 @@ VNOP_READ(vnode_t vp, struct uio * uio, int ioflag, vfs_context_t ctx) a.a_uio = uio; a.a_ioflag = ioflag; a.a_context = ctx; - thread_safe = THREAD_SAFE_FS(vp); +#ifndef __LP64__ + thread_safe = THREAD_SAFE_FS(vp); if (!thread_safe) { funnel_state = thread_funnel_set(kernel_flock, TRUE); if (vp->v_type != VCHR && vp->v_type != VFIFO && vp->v_type != VSOCK) { @@ -3044,14 +3453,19 @@ VNOP_READ(vnode_t vp, struct uio * uio, int ioflag, vfs_context_t ctx) } } } +#endif /* __LP64__ */ + _err = (*vp->v_op[vnop_read_desc.vdesc_offset])(&a); +#ifndef __LP64__ if (!thread_safe) { if (vp->v_type != VCHR && vp->v_type != VFIFO && vp->v_type != VSOCK) { unlock_fsnode(vp, NULL); } (void) thread_funnel_set(kernel_flock, funnel_state); } +#endif /* __LP64__ */ + return (_err); } @@ -3075,8 +3489,10 @@ VNOP_WRITE(vnode_t vp, struct uio * uio, int ioflag, vfs_context_t ctx) { struct vnop_write_args a; int _err; +#ifndef __LP64__ int thread_safe; int funnel_state = 0; +#endif /* __LP64__ */ if (ctx == NULL) { ctx = vfs_context_current(); @@ -3087,8 +3503,9 @@ VNOP_WRITE(vnode_t vp, struct uio * uio, int ioflag, vfs_context_t ctx) a.a_uio = uio; a.a_ioflag = ioflag; a.a_context = ctx; - thread_safe = THREAD_SAFE_FS(vp); +#ifndef __LP64__ + thread_safe = THREAD_SAFE_FS(vp); if (!thread_safe) { funnel_state = thread_funnel_set(kernel_flock, TRUE); if (vp->v_type != VCHR && vp->v_type != VFIFO && vp->v_type != VSOCK) { @@ -3098,14 +3515,21 @@ VNOP_WRITE(vnode_t vp, struct uio * uio, int ioflag, vfs_context_t ctx) } } } +#endif /* __LP64__ */ + _err = (*vp->v_op[vnop_write_desc.vdesc_offset])(&a); +#ifndef __LP64__ if (!thread_safe) { if (vp->v_type != VCHR && vp->v_type != VFIFO && vp->v_type != VSOCK) { unlock_fsnode(vp, NULL); } (void) thread_funnel_set(kernel_flock, funnel_state); } +#endif /* __LP64__ */ + + post_event_if_success(vp, _err, NOTE_WRITE); + return (_err); } @@ -3130,15 +3554,31 @@ VNOP_IOCTL(vnode_t vp, u_long command, caddr_t data, int fflag, vfs_context_t ct { int _err; struct vnop_ioctl_args a; +#ifndef __LP64__ int thread_safe; int funnel_state = 0; +#endif /* __LP64__ */ if (ctx == NULL) { ctx = vfs_context_current(); } - if (vfs_context_is64bit(ctx)) { - if (!vnode_vfs64bitready(vp)) { + /* + * This check should probably have been put in the TTY code instead... + * + * We have to be careful about what we assume during startup and shutdown. + * We have to be able to use the root filesystem's device vnode even when + * devfs isn't mounted (yet/anymore), so we can't go looking at its mount + * structure. If there is no data pointer, it doesn't matter whether + * the device is 64-bit ready. Any command (like DKIOCSYNCHRONIZECACHE) + * which passes NULL for its data pointer can therefore be used during + * mount or unmount of the root filesystem. + * + * Depending on what root filesystems need to do during mount/unmount, we + * may need to loosen this check again in the future. + */ + if (vfs_context_is64bit(ctx) && !(vnode_ischr(vp) || vnode_isblk(vp))) { + if (data != NULL && !vnode_vfs64bitready(vp)) { return(ENOTTY); } } @@ -3149,8 +3589,9 @@ VNOP_IOCTL(vnode_t vp, u_long command, caddr_t data, int fflag, vfs_context_t ct a.a_data = data; a.a_fflag = fflag; a.a_context= ctx; - thread_safe = THREAD_SAFE_FS(vp); +#ifndef __LP64__ + thread_safe = THREAD_SAFE_FS(vp); if (!thread_safe) { funnel_state = thread_funnel_set(kernel_flock, TRUE); if (vp->v_type != VCHR && vp->v_type != VFIFO && vp->v_type != VSOCK) { @@ -3160,13 +3601,19 @@ VNOP_IOCTL(vnode_t vp, u_long command, caddr_t data, int fflag, vfs_context_t ct } } } +#endif /* __LP64__ */ + _err = (*vp->v_op[vnop_ioctl_desc.vdesc_offset])(&a); + +#ifndef __LP64__ if (!thread_safe) { if (vp->v_type != VCHR && vp->v_type != VFIFO && vp->v_type != VSOCK) { unlock_fsnode(vp, NULL); } (void) thread_funnel_set(kernel_flock, funnel_state); } +#endif /* __LP64__ */ + return (_err); } @@ -3191,8 +3638,10 @@ VNOP_SELECT(vnode_t vp, int which , int fflags, void * wql, vfs_context_t ctx) { int _err; struct vnop_select_args a; +#ifndef __LP64__ int thread_safe; int funnel_state = 0; +#endif /* __LP64__ */ if (ctx == NULL) { ctx = vfs_context_current(); @@ -3203,8 +3652,9 @@ VNOP_SELECT(vnode_t vp, int which , int fflags, void * wql, vfs_context_t ctx) a.a_fflags = fflags; a.a_context = ctx; a.a_wql = wql; - thread_safe = THREAD_SAFE_FS(vp); +#ifndef __LP64__ + thread_safe = THREAD_SAFE_FS(vp); if (!thread_safe) { funnel_state = thread_funnel_set(kernel_flock, TRUE); if (vp->v_type != VCHR && vp->v_type != VFIFO && vp->v_type != VSOCK) { @@ -3214,13 +3664,19 @@ VNOP_SELECT(vnode_t vp, int which , int fflags, void * wql, vfs_context_t ctx) } } } +#endif /* __LP64__ */ + _err = (*vp->v_op[vnop_select_desc.vdesc_offset])(&a); + +#ifndef __LP64__ if (!thread_safe) { if (vp->v_type != VCHR && vp->v_type != VFIFO && vp->v_type != VSOCK) { unlock_fsnode(vp, NULL); } (void) thread_funnel_set(kernel_flock, funnel_state); } +#endif /* __LP64__ */ + return (_err); } @@ -3245,17 +3701,20 @@ VNOP_EXCHANGE(vnode_t fvp, vnode_t tvp, int options, vfs_context_t ctx) { int _err; struct vnop_exchange_args a; +#ifndef __LP64__ int thread_safe; int funnel_state = 0; vnode_t lock_first = NULL, lock_second = NULL; +#endif /* __LP64__ */ a.a_desc = &vnop_exchange_desc; a.a_fvp = fvp; a.a_tvp = tvp; a.a_options = options; a.a_context = ctx; - thread_safe = THREAD_SAFE_FS(fvp); +#ifndef __LP64__ + thread_safe = THREAD_SAFE_FS(fvp); if (!thread_safe) { /* * Lock in vnode address order to avoid deadlocks @@ -3275,11 +3734,21 @@ VNOP_EXCHANGE(vnode_t fvp, vnode_t tvp, int options, vfs_context_t ctx) return (_err); } } +#endif /* __LP64__ */ + _err = (*fvp->v_op[vnop_exchange_desc.vdesc_offset])(&a); + +#ifndef __LP64__ if (!thread_safe) { unlock_fsnode(lock_second, NULL); unlock_fsnode(lock_first, &funnel_state); } +#endif /* __LP64__ */ + + /* Don't post NOTE_WRITE because file descriptors follow the data ... */ + post_event_if_success(fvp, _err, NOTE_ATTRIB); + post_event_if_success(tvp, _err, NOTE_ATTRIB); + return (_err); } @@ -3302,22 +3771,31 @@ VNOP_REVOKE(vnode_t vp, int flags, vfs_context_t ctx) { struct vnop_revoke_args a; int _err; +#ifndef __LP64__ int thread_safe; int funnel_state = 0; +#endif /* __LP64__ */ a.a_desc = &vnop_revoke_desc; a.a_vp = vp; a.a_flags = flags; a.a_context = ctx; - thread_safe = THREAD_SAFE_FS(vp); +#ifndef __LP64__ + thread_safe = THREAD_SAFE_FS(vp); if (!thread_safe) { funnel_state = thread_funnel_set(kernel_flock, TRUE); } +#endif /* __LP64__ */ + _err = (*vp->v_op[vnop_revoke_desc.vdesc_offset])(&a); + +#ifndef __LP64__ if (!thread_safe) { (void) thread_funnel_set(kernel_flock, funnel_state); } +#endif /* __LP64__ */ + return (_err); } @@ -3340,24 +3818,33 @@ VNOP_MMAP(vnode_t vp, int fflags, vfs_context_t ctx) { int _err; struct vnop_mmap_args a; +#ifndef __LP64__ int thread_safe; int funnel_state = 0; +#endif /* __LP64__ */ a.a_desc = &vnop_mmap_desc; a.a_vp = vp; a.a_fflags = fflags; a.a_context = ctx; - thread_safe = THREAD_SAFE_FS(vp); +#ifndef __LP64__ + thread_safe = THREAD_SAFE_FS(vp); if (!thread_safe) { if ( (_err = lock_fsnode(vp, &funnel_state)) ) { return (_err); } } +#endif /* __LP64__ */ + _err = (*vp->v_op[vnop_mmap_desc.vdesc_offset])(&a); + +#ifndef __LP64__ if (!thread_safe) { unlock_fsnode(vp, &funnel_state); } +#endif /* __LP64__ */ + return (_err); } @@ -3379,23 +3866,32 @@ VNOP_MNOMAP(vnode_t vp, vfs_context_t ctx) { int _err; struct vnop_mnomap_args a; +#ifndef __LP64__ int thread_safe; int funnel_state = 0; +#endif /* __LP64__ */ a.a_desc = &vnop_mnomap_desc; a.a_vp = vp; a.a_context = ctx; - thread_safe = THREAD_SAFE_FS(vp); +#ifndef __LP64__ + thread_safe = THREAD_SAFE_FS(vp); if (!thread_safe) { if ( (_err = lock_fsnode(vp, &funnel_state)) ) { return (_err); } } +#endif /* __LP64__ */ + _err = (*vp->v_op[vnop_mnomap_desc.vdesc_offset])(&a); + +#ifndef __LP64__ if (!thread_safe) { unlock_fsnode(vp, &funnel_state); } +#endif /* __LP64__ */ + return (_err); } @@ -3418,24 +3914,33 @@ VNOP_FSYNC(vnode_t vp, int waitfor, vfs_context_t ctx) { struct vnop_fsync_args a; int _err; +#ifndef __LP64__ int thread_safe; int funnel_state = 0; +#endif /* __LP64__ */ a.a_desc = &vnop_fsync_desc; a.a_vp = vp; a.a_waitfor = waitfor; a.a_context = ctx; - thread_safe = THREAD_SAFE_FS(vp); +#ifndef __LP64__ + thread_safe = THREAD_SAFE_FS(vp); if (!thread_safe) { if ( (_err = lock_fsnode(vp, &funnel_state)) ) { return (_err); } } +#endif /* __LP64__ */ + _err = (*vp->v_op[vnop_fsync_desc.vdesc_offset])(&a); + +#ifndef __LP64__ if (!thread_safe) { unlock_fsnode(vp, &funnel_state); } +#endif /* __LP64__ */ + return (_err); } @@ -3461,8 +3966,10 @@ VNOP_REMOVE(vnode_t dvp, vnode_t vp, struct componentname * cnp, int flags, vfs_ { int _err; struct vnop_remove_args a; +#ifndef __LP64__ int thread_safe; int funnel_state = 0; +#endif /* __LP64__ */ a.a_desc = &vnop_remove_desc; a.a_dvp = dvp; @@ -3470,13 +3977,16 @@ VNOP_REMOVE(vnode_t dvp, vnode_t vp, struct componentname * cnp, int flags, vfs_ a.a_cnp = cnp; a.a_flags = flags; a.a_context = ctx; - thread_safe = THREAD_SAFE_FS(dvp); +#ifndef __LP64__ + thread_safe = THREAD_SAFE_FS(dvp); if (!thread_safe) { if ( (_err = lock_fsnode(vp, &funnel_state)) ) { return (_err); } } +#endif /* __LP64__ */ + _err = (*dvp->v_op[vnop_remove_desc.vdesc_offset])(&a); if (_err == 0) { @@ -3486,12 +3996,19 @@ VNOP_REMOVE(vnode_t dvp, vnode_t vp, struct componentname * cnp, int flags, vfs_ /* * Remove any associated extended attribute file (._ AppleDouble file). */ - xattrfile_remove(dvp, cnp->cn_nameptr, ctx, thread_safe, 1); + xattrfile_remove(dvp, cnp->cn_nameptr, ctx, 1); } } + +#ifndef __LP64__ if (!thread_safe) { unlock_fsnode(vp, &funnel_state); } +#endif /* __LP64__ */ + + post_event_if_success(vp, _err, NOTE_DELETE | NOTE_LINK); + post_event_if_success(dvp, _err, NOTE_WRITE); + return (_err); } @@ -3516,8 +4033,10 @@ VNOP_LINK(vnode_t vp, vnode_t tdvp, struct componentname * cnp, vfs_context_t ct { int _err; struct vnop_link_args a; +#ifndef __LP64__ int thread_safe; int funnel_state = 0; +#endif /* __LP64__ */ /* * For file systems with non-native extended attributes, @@ -3542,17 +4061,27 @@ VNOP_LINK(vnode_t vp, vnode_t tdvp, struct componentname * cnp, vfs_context_t ct a.a_tdvp = tdvp; a.a_cnp = cnp; a.a_context = ctx; - thread_safe = THREAD_SAFE_FS(vp); +#ifndef __LP64__ + thread_safe = THREAD_SAFE_FS(vp); if (!thread_safe) { if ( (_err = lock_fsnode(vp, &funnel_state)) ) { return (_err); } } +#endif /* __LP64__ */ + _err = (*tdvp->v_op[vnop_link_desc.vdesc_offset])(&a); + +#ifndef __LP64__ if (!thread_safe) { unlock_fsnode(vp, &funnel_state); } +#endif /* __LP64__ */ + + post_event_if_success(vp, _err, NOTE_LINK); + post_event_if_success(tdvp, _err, NOTE_WRITE); + return (_err); } @@ -3582,16 +4111,23 @@ VNOP_RENAME(struct vnode *fdvp, struct vnode *fvp, struct componentname *fcnp, struct vnode *tdvp, struct vnode *tvp, struct componentname *tcnp, vfs_context_t ctx) { - int _err; + int _err = 0; + int events; struct vnop_rename_args a; - int funnel_state = 0; char smallname1[48]; char smallname2[48]; char *xfromname = NULL; char *xtoname = NULL; +#ifndef __LP64__ + int funnel_state = 0; vnode_t lock_first = NULL, lock_second = NULL; vnode_t fdvp_unsafe = NULLVP; vnode_t tdvp_unsafe = NULLVP; +#endif /* __LP64__ */ + vnode_t src_attr_vp = NULLVP; + vnode_t dst_attr_vp = NULLVP; + struct nameidata fromnd; + struct nameidata tond; a.a_desc = &vnop_rename_desc; a.a_fdvp = fdvp; @@ -3602,6 +4138,7 @@ VNOP_RENAME(struct vnode *fdvp, struct vnode *fvp, struct componentname *fcnp, a.a_tcnp = tcnp; a.a_context = ctx; +#ifndef __LP64__ if (!THREAD_SAFE_FS(fdvp)) fdvp_unsafe = fdvp; if (!THREAD_SAFE_FS(tdvp)) @@ -3615,52 +4152,59 @@ VNOP_RENAME(struct vnode *fdvp, struct vnode *fvp, struct componentname *fcnp, * in the root of a filesystem... in that case, tdvp is the * in the filesystem that this root is mounted on */ - if (tdvp_unsafe == NULL || fdvp_unsafe == tdvp_unsafe) { - lock_first = fdvp_unsafe; + if (tdvp_unsafe == NULL || fdvp_unsafe == tdvp_unsafe) { + lock_first = fdvp_unsafe; lock_second = NULL; } else if (fdvp_unsafe < tdvp_unsafe) { - lock_first = fdvp_unsafe; + lock_first = fdvp_unsafe; lock_second = tdvp_unsafe; } else { - lock_first = tdvp_unsafe; + lock_first = tdvp_unsafe; lock_second = fdvp_unsafe; } if ( (_err = lock_fsnode(lock_first, &funnel_state)) ) - return (_err); + return (_err); if (lock_second != NULL && (_err = lock_fsnode(lock_second, NULL))) { - unlock_fsnode(lock_first, &funnel_state); + unlock_fsnode(lock_first, &funnel_state); return (_err); } /* * Lock both children in vnode address order to avoid deadlocks */ - if (tvp == NULL || tvp == fvp) { - lock_first = fvp; + if (tvp == NULL || tvp == fvp) { + lock_first = fvp; lock_second = NULL; } else if (fvp < tvp) { - lock_first = fvp; + lock_first = fvp; lock_second = tvp; } else { - lock_first = tvp; + lock_first = tvp; lock_second = fvp; } if ( (_err = lock_fsnode(lock_first, NULL)) ) - goto out1; + goto out1; if (lock_second != NULL && (_err = lock_fsnode(lock_second, NULL))) { unlock_fsnode(lock_first, NULL); goto out1; } } +#endif /* __LP64__ */ + /* - * Save source and destination names (._ AppleDouble files). - * Skip if source already has a "._" prefix. + * We need to preflight any potential AppleDouble file for the source file + * before doing the rename operation, since we could potentially be doing + * this operation on a network filesystem, and would end up duplicating + * the work. Also, save the source and destination names. Skip it if the + * source has a "._" prefix. */ + if (!NATIVE_XATTR(fdvp) && !(fcnp->cn_nameptr[0] == '.' && fcnp->cn_nameptr[1] == '_')) { size_t len; + int error; /* Get source attribute file name. */ len = fcnp->cn_namelen + 3; @@ -3683,15 +4227,58 @@ VNOP_RENAME(struct vnode *fdvp, struct vnode *fvp, struct componentname *fcnp, strlcpy(xtoname, "._", min(sizeof smallname2, len)); strncat(xtoname, tcnp->cn_nameptr, tcnp->cn_namelen); xtoname[len-1] = '\0'; + + /* + * Look up source attribute file, keep reference on it if exists. + * Note that we do the namei with the nameiop of RENAME, which is different than + * in the rename syscall. It's OK if the source file does not exist, since this + * is only for AppleDouble files. + */ + if (xfromname != NULL) { + NDINIT(&fromnd, RENAME, NOFOLLOW | USEDVP | CN_NBMOUNTLOOK, UIO_SYSSPACE, + CAST_USER_ADDR_T(xfromname), ctx); + fromnd.ni_dvp = fdvp; + error = namei(&fromnd); + + /* + * If there was an error looking up source attribute file, + * we'll behave as if it didn't exist. + */ + + if (error == 0) { + if (fromnd.ni_vp) { + /* src_attr_vp indicates need to call vnode_put / nameidone later */ + src_attr_vp = fromnd.ni_vp; + + if (fromnd.ni_vp->v_type != VREG) { + src_attr_vp = NULLVP; + vnode_put(fromnd.ni_vp); + } + } + /* + * Either we got an invalid vnode type (not a regular file) or the namei lookup + * suppressed ENOENT as a valid error since we're renaming. Either way, we don't + * have a vnode here, so we drop our namei buffer for the source attribute file + */ + if (src_attr_vp == NULLVP) { + nameidone(&fromnd); + } + } + } } + + /* do the rename of the main file. */ _err = (*fdvp->v_op[vnop_rename_desc.vdesc_offset])(&a); +#ifndef __LP64__ if (fdvp_unsafe != NULLVP) { if (lock_second != NULL) unlock_fsnode(lock_second, NULL); unlock_fsnode(lock_first, NULL); } +#endif /* __LP64__ */ + if (_err == 0) { if (tvp && tvp != fvp) vnode_setneedinactive(tvp); @@ -3701,175 +4288,203 @@ VNOP_RENAME(struct vnode *fdvp, struct vnode *fvp, struct componentname *fcnp, * Rename any associated extended attribute file (._ AppleDouble file). */ if (_err == 0 && !NATIVE_XATTR(fdvp) && xfromname != NULL) { - struct nameidata fromnd, tond; - int killdest = 0; - int error; - + int error = 0; + /* - * Get source attribute file vnode. - * Note that fdvp already has an iocount reference and - * using DELETE will take an additional reference. + * Get destination attribute file vnode. + * Note that tdvp already has an iocount reference. Make sure to check that we + * get a valid vnode from namei. */ - NDINIT(&fromnd, DELETE, NOFOLLOW | USEDVP | CN_NBMOUNTLOOK, UIO_SYSSPACE, - CAST_USER_ADDR_T(xfromname), ctx); - fromnd.ni_dvp = fdvp; - error = namei(&fromnd); - - if (error) { - /* When source doesn't exist there still may be a destination. */ - if (error == ENOENT) { - killdest = 1; - } else { - goto out; - } - } else if (fromnd.ni_vp->v_type != VREG) { - vnode_put(fromnd.ni_vp); - nameidone(&fromnd); - killdest = 1; + NDINIT(&tond, RENAME, + NOCACHE | NOFOLLOW | USEDVP | CN_NBMOUNTLOOK, UIO_SYSSPACE, + CAST_USER_ADDR_T(xtoname), ctx); + tond.ni_dvp = tdvp; + error = namei(&tond); + + if (error) + goto out; + + if (tond.ni_vp) { + dst_attr_vp = tond.ni_vp; } - if (killdest) { - struct vnop_remove_args args; + + if (src_attr_vp) { + /* attempt to rename src -> dst */ + + a.a_desc = &vnop_rename_desc; + a.a_fdvp = fdvp; + a.a_fvp = src_attr_vp; + a.a_fcnp = &fromnd.ni_cnd; + a.a_tdvp = tdvp; + a.a_tvp = dst_attr_vp; + a.a_tcnp = &tond.ni_cnd; + a.a_context = ctx; + +#ifndef __LP64__ + if (fdvp_unsafe != NULLVP) { + /* + * Lock in vnode address order to avoid deadlocks + */ + if (dst_attr_vp == NULL || dst_attr_vp == src_attr_vp) { + lock_first = src_attr_vp; + lock_second = NULL; + } else if (src_attr_vp < dst_attr_vp) { + lock_first = src_attr_vp; + lock_second = dst_attr_vp; + } else { + lock_first = dst_attr_vp; + lock_second = src_attr_vp; + } + if ( (error = lock_fsnode(lock_first, NULL)) == 0) { + if (lock_second != NULL && (error = lock_fsnode(lock_second, NULL)) ) + unlock_fsnode(lock_first, NULL); + } + } +#endif /* __LP64__ */ + if (error == 0) { + const char *oname; + vnode_t oparent; + + /* Save these off so we can later verify them (fix up below) */ + oname = src_attr_vp->v_name; + oparent = src_attr_vp->v_parent; + + error = (*fdvp->v_op[vnop_rename_desc.vdesc_offset])(&a); + +#ifndef __LP64__ + if (fdvp_unsafe != NULLVP) { + if (lock_second != NULL) + unlock_fsnode(lock_second, NULL); + unlock_fsnode(lock_first, NULL); + } +#endif /* __LP64__ */ + + if (error == 0) { + vnode_setneedinactive(src_attr_vp); + + if (dst_attr_vp && dst_attr_vp != src_attr_vp) + vnode_setneedinactive(dst_attr_vp); + /* + * Fix up name & parent pointers on ._ file + */ + if (oname == src_attr_vp->v_name && + oparent == src_attr_vp->v_parent) { + int update_flags; + + update_flags = VNODE_UPDATE_NAME; + if (fdvp != tdvp) + update_flags |= VNODE_UPDATE_PARENT; + + vnode_update_identity(src_attr_vp, tdvp, + tond.ni_cnd.cn_nameptr, + tond.ni_cnd.cn_namelen, + tond.ni_cnd.cn_hash, + update_flags); + } + } + } + /* kevent notifications for moving resource files + * _err is zero if we're here, so no need to notify directories, code + * below will do that. only need to post the rename on the source and + * possibly a delete on the dest + */ + post_event_if_success(src_attr_vp, error, NOTE_RENAME); + if (dst_attr_vp) { + post_event_if_success(dst_attr_vp, error, NOTE_DELETE); + } + + } else if (dst_attr_vp) { /* - * Get destination attribute file vnode. + * Just delete destination attribute file vnode if it exists, since + * we didn't have a source attribute file. * Note that tdvp already has an iocount reference. */ - NDINIT(&tond, DELETE, NOFOLLOW | USEDVP | CN_NBMOUNTLOOK, UIO_SYSSPACE, - CAST_USER_ADDR_T(xtoname), ctx); - tond.ni_dvp = tdvp; - error = namei(&tond); - if (error) { - goto out; - } - if (tond.ni_vp->v_type != VREG) { - vnode_put(tond.ni_vp); - nameidone(&tond); - goto out; - } + + struct vnop_remove_args args; + args.a_desc = &vnop_remove_desc; args.a_dvp = tdvp; - args.a_vp = tond.ni_vp; + args.a_vp = dst_attr_vp; args.a_cnp = &tond.ni_cnd; args.a_context = ctx; +#ifndef __LP64__ if (fdvp_unsafe != NULLVP) - error = lock_fsnode(tond.ni_vp, NULL); + error = lock_fsnode(dst_attr_vp, NULL); +#endif /* __LP64__ */ if (error == 0) { - error = (*tdvp->v_op[vnop_remove_desc.vdesc_offset])(&args); + error = (*tdvp->v_op[vnop_remove_desc.vdesc_offset])(&args); +#ifndef __LP64__ if (fdvp_unsafe != NULLVP) - unlock_fsnode(tond.ni_vp, NULL); + unlock_fsnode(dst_attr_vp, NULL); +#endif /* __LP64__ */ if (error == 0) - vnode_setneedinactive(tond.ni_vp); + vnode_setneedinactive(dst_attr_vp); } - vnode_put(tond.ni_vp); - nameidone(&tond); - goto out; - } - - /* - * Get destination attribute file vnode. - */ - NDINIT(&tond, RENAME, - NOCACHE | NOFOLLOW | USEDVP | CN_NBMOUNTLOOK, UIO_SYSSPACE, - CAST_USER_ADDR_T(xtoname), ctx); - tond.ni_dvp = tdvp; - error = namei(&tond); - - if (error) { - vnode_put(fromnd.ni_vp); - nameidone(&fromnd); - goto out; - } - a.a_desc = &vnop_rename_desc; - a.a_fdvp = fdvp; - a.a_fvp = fromnd.ni_vp; - a.a_fcnp = &fromnd.ni_cnd; - a.a_tdvp = tdvp; - a.a_tvp = tond.ni_vp; - a.a_tcnp = &tond.ni_cnd; - a.a_context = ctx; - - if (fdvp_unsafe != NULLVP) { - /* - * Lock in vnode address order to avoid deadlocks - */ - if (tond.ni_vp == NULL || tond.ni_vp == fromnd.ni_vp) { - lock_first = fromnd.ni_vp; - lock_second = NULL; - } else if (fromnd.ni_vp < tond.ni_vp) { - lock_first = fromnd.ni_vp; - lock_second = tond.ni_vp; - } else { - lock_first = tond.ni_vp; - lock_second = fromnd.ni_vp; - } - if ( (error = lock_fsnode(lock_first, NULL)) == 0) { - if (lock_second != NULL && (error = lock_fsnode(lock_second, NULL)) ) - unlock_fsnode(lock_first, NULL); - } - } - if (error == 0) { - const char *oname; - vnode_t oparent; - - /* Save these off so we can later verify them (fix up below) */ - oname = fromnd.ni_vp->v_name; - oparent = fromnd.ni_vp->v_parent; - - error = (*fdvp->v_op[vnop_rename_desc.vdesc_offset])(&a); - - if (fdvp_unsafe != NULLVP) { - if (lock_second != NULL) - unlock_fsnode(lock_second, NULL); - unlock_fsnode(lock_first, NULL); - } - if (error == 0) { - vnode_setneedinactive(fromnd.ni_vp); - - if (tond.ni_vp && tond.ni_vp != fromnd.ni_vp) - vnode_setneedinactive(tond.ni_vp); - /* - * Fix up name & parent pointers on ._ file - */ - if (oname == fromnd.ni_vp->v_name && - oparent == fromnd.ni_vp->v_parent) { - int update_flags; - - update_flags = VNODE_UPDATE_NAME; - if (fdvp != tdvp) - update_flags |= VNODE_UPDATE_PARENT; - - vnode_update_identity(fromnd.ni_vp, tdvp, - tond.ni_cnd.cn_nameptr, - tond.ni_cnd.cn_namelen, - tond.ni_cnd.cn_hash, - update_flags); - } - } - } - vnode_put(fromnd.ni_vp); - if (tond.ni_vp) { - vnode_put(tond.ni_vp); + /* kevent notification for deleting the destination's attribute file + * if it existed. Only need to post the delete on the destination, since + * the code below will handle the directories. + */ + post_event_if_success(dst_attr_vp, error, NOTE_DELETE); } - nameidone(&tond); - nameidone(&fromnd); } out: + if (src_attr_vp) { + vnode_put(src_attr_vp); + nameidone(&fromnd); + } + if (dst_attr_vp) { + vnode_put(dst_attr_vp); + nameidone(&tond); + } + if (xfromname && xfromname != &smallname1[0]) { FREE(xfromname, M_TEMP); } if (xtoname && xtoname != &smallname2[0]) { FREE(xtoname, M_TEMP); } + +#ifndef __LP64__ out1: if (fdvp_unsafe != NULLVP) { if (tdvp_unsafe != NULLVP) unlock_fsnode(tdvp_unsafe, NULL); unlock_fsnode(fdvp_unsafe, &funnel_state); } +#endif /* __LP64__ */ + + /* Wrote at least one directory. If transplanted a dir, also changed link counts */ + if (0 == _err) { + events = NOTE_WRITE; + if (vnode_isdir(fvp)) { + /* Link count on dir changed only if we are moving a dir and... + * --Moved to new dir, not overwriting there + * --Kept in same dir and DID overwrite + */ + if (((fdvp != tdvp) && (!tvp)) || ((fdvp == tdvp) && (tvp))) { + events |= NOTE_LINK; + } + } + + lock_vnode_and_post(fdvp, events); + if (fdvp != tdvp) { + lock_vnode_and_post(tdvp, events); + } + + /* If you're replacing the target, post a deletion for it */ + if (tvp) + { + lock_vnode_and_post(tvp, NOTE_DELETE); + } + + lock_vnode_and_post(fvp, NOTE_RENAME); + } + return (_err); } @@ -3895,8 +4510,10 @@ VNOP_MKDIR(struct vnode *dvp, struct vnode **vpp, struct componentname *cnp, { int _err; struct vnop_mkdir_args a; +#ifndef __LP64__ int thread_safe; int funnel_state = 0; +#endif /* __LP64__ */ a.a_desc = &vnop_mkdir_desc; a.a_dvp = dvp; @@ -3904,23 +4521,32 @@ VNOP_MKDIR(struct vnode *dvp, struct vnode **vpp, struct componentname *cnp, a.a_cnp = cnp; a.a_vap = vap; a.a_context = ctx; - thread_safe = THREAD_SAFE_FS(dvp); +#ifndef __LP64__ + thread_safe = THREAD_SAFE_FS(dvp); if (!thread_safe) { if ( (_err = lock_fsnode(dvp, &funnel_state)) ) { return (_err); } } +#endif /* __LP64__ */ + _err = (*dvp->v_op[vnop_mkdir_desc.vdesc_offset])(&a); if (_err == 0 && !NATIVE_XATTR(dvp)) { /* * Remove stale Apple Double file (if any). */ - xattrfile_remove(dvp, cnp->cn_nameptr, ctx, thread_safe, 0); + xattrfile_remove(dvp, cnp->cn_nameptr, ctx, 0); } + +#ifndef __LP64__ if (!thread_safe) { unlock_fsnode(dvp, &funnel_state); } +#endif /* __LP64__ */ + + post_event_if_success(dvp, _err, NOTE_LINK | NOTE_WRITE); + return (_err); } @@ -3946,21 +4572,26 @@ VNOP_RMDIR(struct vnode *dvp, struct vnode *vp, struct componentname *cnp, vfs_c { int _err; struct vnop_rmdir_args a; +#ifndef __LP64__ int thread_safe; int funnel_state = 0; +#endif /* __LP64__ */ a.a_desc = &vnop_rmdir_desc; a.a_dvp = dvp; a.a_vp = vp; a.a_cnp = cnp; a.a_context = ctx; - thread_safe = THREAD_SAFE_FS(dvp); +#ifndef __LP64__ + thread_safe = THREAD_SAFE_FS(dvp); if (!thread_safe) { if ( (_err = lock_fsnode(vp, &funnel_state)) ) { return (_err); } } +#endif /* __LP64__ */ + _err = (*vp->v_op[vnop_rmdir_desc.vdesc_offset])(&a); if (_err == 0) { @@ -3970,12 +4601,20 @@ VNOP_RMDIR(struct vnode *dvp, struct vnode *vp, struct componentname *cnp, vfs_c /* * Remove any associated extended attribute file (._ AppleDouble file). */ - xattrfile_remove(dvp, cnp->cn_nameptr, ctx, thread_safe, 1); + xattrfile_remove(dvp, cnp->cn_nameptr, ctx, 1); } } + +#ifndef __LP64__ if (!thread_safe) { unlock_fsnode(vp, &funnel_state); } +#endif /* __LP64__ */ + + /* If you delete a dir, it loses its "." reference --> NOTE_LINK */ + post_event_if_success(vp, _err, NOTE_DELETE | NOTE_LINK); + post_event_if_success(dvp, _err, NOTE_LINK | NOTE_WRITE); + return (_err); } @@ -3984,7 +4623,8 @@ VNOP_RMDIR(struct vnode *dvp, struct vnode *vp, struct componentname *cnp, vfs_c */ #define AD_STALE_SECS (180) static void -xattrfile_remove(vnode_t dvp, const char * basename, vfs_context_t ctx, int thread_safe, int force) { +xattrfile_remove(vnode_t dvp, const char * basename, vfs_context_t ctx, int force) +{ vnode_t xvp; struct nameidata nd; char smallname[64]; @@ -4040,6 +4680,9 @@ xattrfile_remove(vnode_t dvp, const char * basename, vfs_context_t ctx, int thre if (force) { struct vnop_remove_args a; int error; +#ifndef __LP64__ + int thread_safe = THREAD_SAFE_FS(dvp); +#endif /* __LP64__ */ a.a_desc = &vnop_remove_desc; a.a_dvp = nd.ni_dvp; @@ -4047,18 +4690,27 @@ xattrfile_remove(vnode_t dvp, const char * basename, vfs_context_t ctx, int thre a.a_cnp = &nd.ni_cnd; a.a_context = ctx; +#ifndef __LP64__ if (!thread_safe) { if ( (lock_fsnode(xvp, NULL)) ) goto out1; } +#endif /* __LP64__ */ + error = (*dvp->v_op[vnop_remove_desc.vdesc_offset])(&a); +#ifndef __LP64__ if (!thread_safe) unlock_fsnode(xvp, NULL); +#endif /* __LP64__ */ if (error == 0) vnode_setneedinactive(xvp); + + post_event_if_success(xvp, error, NOTE_DELETE); + post_event_if_success(dvp, error, NOTE_WRITE); } + out1: vnode_put(dvp); vnode_put(xvp); @@ -4073,7 +4725,8 @@ xattrfile_remove(vnode_t dvp, const char * basename, vfs_context_t ctx, int thre */ static void xattrfile_setattr(vnode_t dvp, const char * basename, struct vnode_attr * vap, - vfs_context_t ctx, int thread_safe) { + vfs_context_t ctx) +{ vnode_t xvp; struct nameidata nd; char smallname[64]; @@ -4102,6 +4755,9 @@ xattrfile_setattr(vnode_t dvp, const char * basename, struct vnode_attr * vap, nameidone(&nd); if (xvp->v_type == VREG) { +#ifndef __LP64__ + int thread_safe = THREAD_SAFE_FS(dvp); +#endif /* __LP64__ */ struct vnop_setattr_args a; a.a_desc = &vnop_setattr_desc; @@ -4109,17 +4765,28 @@ xattrfile_setattr(vnode_t dvp, const char * basename, struct vnode_attr * vap, a.a_vap = vap; a.a_context = ctx; +#ifndef __LP64__ if (!thread_safe) { if ( (lock_fsnode(xvp, NULL)) ) goto out1; } +#endif /* __LP64__ */ + (void) (*xvp->v_op[vnop_setattr_desc.vdesc_offset])(&a); + +#ifndef __LP64__ if (!thread_safe) { unlock_fsnode(xvp, NULL); } +#endif /* __LP64__ */ } + + +#ifndef __LP64__ out1: +#endif /* __LP64__ */ vnode_put(xvp); + out2: if (filename && filename != &smallname[0]) { FREE(filename, M_TEMP); @@ -4150,8 +4817,10 @@ VNOP_SYMLINK(struct vnode *dvp, struct vnode **vpp, struct componentname *cnp, { int _err; struct vnop_symlink_args a; +#ifndef __LP64__ int thread_safe; int funnel_state = 0; +#endif /* __LP64__ */ a.a_desc = &vnop_symlink_desc; a.a_dvp = dvp; @@ -4160,24 +4829,34 @@ VNOP_SYMLINK(struct vnode *dvp, struct vnode **vpp, struct componentname *cnp, a.a_vap = vap; a.a_target = target; a.a_context = ctx; - thread_safe = THREAD_SAFE_FS(dvp); +#ifndef __LP64__ + thread_safe = THREAD_SAFE_FS(dvp); if (!thread_safe) { if ( (_err = lock_fsnode(dvp, &funnel_state)) ) { return (_err); } } +#endif /* __LP64__ */ + _err = (*dvp->v_op[vnop_symlink_desc.vdesc_offset])(&a); if (_err == 0 && !NATIVE_XATTR(dvp)) { /* - * Remove stale Apple Double file (if any). + * Remove stale Apple Double file (if any). Posts its own knotes */ - xattrfile_remove(dvp, cnp->cn_nameptr, ctx, thread_safe, 0); + xattrfile_remove(dvp, cnp->cn_nameptr, ctx, 0); } - if (!thread_safe) { - unlock_fsnode(dvp, &funnel_state); - } - return (_err); + + +#ifndef __LP64__ + if (!thread_safe) { + unlock_fsnode(dvp, &funnel_state); + } +#endif /* __LP64__ */ + + post_event_if_success(dvp, _err, NOTE_WRITE); + + return (_err); } #if 0 @@ -4203,8 +4882,10 @@ VNOP_READDIR(struct vnode *vp, struct uio *uio, int flags, int *eofflag, { int _err; struct vnop_readdir_args a; +#ifndef __LP64__ int thread_safe; int funnel_state = 0; +#endif /* __LP64__ */ a.a_desc = &vnop_readdir_desc; a.a_vp = vp; @@ -4213,6 +4894,7 @@ VNOP_READDIR(struct vnode *vp, struct uio *uio, int flags, int *eofflag, a.a_eofflag = eofflag; a.a_numdirent = numdirent; a.a_context = ctx; +#ifndef __LP64__ thread_safe = THREAD_SAFE_FS(vp); if (!thread_safe) { @@ -4220,10 +4902,15 @@ VNOP_READDIR(struct vnode *vp, struct uio *uio, int flags, int *eofflag, return (_err); } } +#endif /* __LP64__ */ + _err = (*vp->v_op[vnop_readdir_desc.vdesc_offset])(&a); + +#ifndef __LP64__ if (!thread_safe) { unlock_fsnode(vp, &funnel_state); } +#endif /* __LP64__ */ return (_err); } @@ -4238,23 +4925,25 @@ struct vnop_readdirattr_args { vnode_t a_vp; struct attrlist *a_alist; struct uio *a_uio; - u_long a_maxcount; - u_long a_options; - u_long *a_newstate; + uint32_t a_maxcount; + uint32_t a_options; + uint32_t *a_newstate; int *a_eofflag; - u_long *a_actualcount; + uint32_t *a_actualcount; vfs_context_t a_context; }; #endif /* 0*/ errno_t -VNOP_READDIRATTR(struct vnode *vp, struct attrlist *alist, struct uio *uio, u_long maxcount, - u_long options, u_long *newstate, int *eofflag, u_long *actualcount, vfs_context_t ctx) +VNOP_READDIRATTR(struct vnode *vp, struct attrlist *alist, struct uio *uio, uint32_t maxcount, + uint32_t options, uint32_t *newstate, int *eofflag, uint32_t *actualcount, vfs_context_t ctx) { int _err; struct vnop_readdirattr_args a; +#ifndef __LP64__ int thread_safe; int funnel_state = 0; +#endif /* __LP64__ */ a.a_desc = &vnop_readdirattr_desc; a.a_vp = vp; @@ -4266,17 +4955,24 @@ VNOP_READDIRATTR(struct vnode *vp, struct attrlist *alist, struct uio *uio, u_lo a.a_eofflag = eofflag; a.a_actualcount = actualcount; a.a_context = ctx; - thread_safe = THREAD_SAFE_FS(vp); +#ifndef __LP64__ + thread_safe = THREAD_SAFE_FS(vp); if (!thread_safe) { if ( (_err = lock_fsnode(vp, &funnel_state)) ) { return (_err); } } +#endif /* __LP64__ */ + _err = (*vp->v_op[vnop_readdirattr_desc.vdesc_offset])(&a); + +#ifndef __LP64__ if (!thread_safe) { unlock_fsnode(vp, &funnel_state); } +#endif /* __LP64__ */ + return (_err); } @@ -4318,24 +5014,33 @@ VNOP_READLINK(struct vnode *vp, struct uio *uio, vfs_context_t ctx) { int _err; struct vnop_readlink_args a; +#ifndef __LP64__ int thread_safe; int funnel_state = 0; +#endif /* __LP64__ */ a.a_desc = &vnop_readlink_desc; a.a_vp = vp; a.a_uio = uio; a.a_context = ctx; - thread_safe = THREAD_SAFE_FS(vp); +#ifndef __LP64__ + thread_safe = THREAD_SAFE_FS(vp); if (!thread_safe) { if ( (_err = lock_fsnode(vp, &funnel_state)) ) { return (_err); } } +#endif /* __LP64__ */ + _err = (*vp->v_op[vnop_readlink_desc.vdesc_offset])(&a); + +#ifndef __LP64__ if (!thread_safe) { unlock_fsnode(vp, &funnel_state); } +#endif /* __LP64__ */ + return (_err); } @@ -4356,34 +5061,42 @@ VNOP_INACTIVE(struct vnode *vp, vfs_context_t ctx) { int _err; struct vnop_inactive_args a; +#ifndef __LP64__ int thread_safe; int funnel_state = 0; +#endif /* __LP64__ */ a.a_desc = &vnop_inactive_desc; a.a_vp = vp; a.a_context = ctx; + +#ifndef __LP64__ thread_safe = THREAD_SAFE_FS(vp); - if (!thread_safe) { if ( (_err = lock_fsnode(vp, &funnel_state)) ) { return (_err); } } +#endif /* __LP64__ */ + _err = (*vp->v_op[vnop_inactive_desc.vdesc_offset])(&a); + +#ifndef __LP64__ if (!thread_safe) { unlock_fsnode(vp, &funnel_state); } +#endif /* __LP64__ */ #if NAMEDSTREAMS - /* For file systems that do not support namedstreams natively, mark - * the shadow stream file vnode to be recycled as soon as the last - * reference goes away. To avoid re-entering reclaim code, do not - * call recycle on terminating named stream vnodes. + /* For file systems that do not support namedstream natively, mark + * the shadow stream file vnode to be recycled as soon as the last + * reference goes away. To avoid re-entering reclaim code, do not + * call recycle on terminating namedstream vnodes. */ if (vnode_isnamedstream(vp) && - (vp->v_parent != NULLVP) && - (vnode_isshadow(vp)) && - ((vp->v_lflag & VL_TERMINATE) == 0)) { + (vp->v_parent != NULLVP) && + vnode_isshadow(vp) && + ((vp->v_lflag & VL_TERMINATE) == 0)) { vnode_recycle(vp); } #endif @@ -4409,21 +5122,30 @@ VNOP_RECLAIM(struct vnode *vp, vfs_context_t ctx) { int _err; struct vnop_reclaim_args a; +#ifndef __LP64__ int thread_safe; int funnel_state = 0; +#endif /* __LP64__ */ a.a_desc = &vnop_reclaim_desc; a.a_vp = vp; a.a_context = ctx; - thread_safe = THREAD_SAFE_FS(vp); +#ifndef __LP64__ + thread_safe = THREAD_SAFE_FS(vp); if (!thread_safe) { funnel_state = thread_funnel_set(kernel_flock, TRUE); } +#endif /* __LP64__ */ + _err = (*vp->v_op[vnop_reclaim_desc.vdesc_offset])(&a); + +#ifndef __LP64__ if (!thread_safe) { (void) thread_funnel_set(kernel_flock, funnel_state); } +#endif /* __LP64__ */ + return (_err); } @@ -4445,34 +5167,43 @@ struct vnop_pathconf_args { struct vnodeop_desc *a_desc; vnode_t a_vp; int a_name; - register_t *a_retval; + int32_t *a_retval; vfs_context_t a_context; }; #endif /* 0*/ errno_t -VNOP_PATHCONF(struct vnode *vp, int name, register_t *retval, vfs_context_t ctx) +VNOP_PATHCONF(struct vnode *vp, int name, int32_t *retval, vfs_context_t ctx) { int _err; struct vnop_pathconf_args a; +#ifndef __LP64__ int thread_safe; int funnel_state = 0; +#endif /* __LP64__ */ a.a_desc = &vnop_pathconf_desc; a.a_vp = vp; a.a_name = name; a.a_retval = retval; a.a_context = ctx; - thread_safe = THREAD_SAFE_FS(vp); +#ifndef __LP64__ + thread_safe = THREAD_SAFE_FS(vp); if (!thread_safe) { if ( (_err = lock_fsnode(vp, &funnel_state)) ) { return (_err); } } +#endif /* __LP64__ */ + _err = (*vp->v_op[vnop_pathconf_desc.vdesc_offset])(&a); + +#ifndef __LP64__ if (!thread_safe) { unlock_fsnode(vp, &funnel_state); } +#endif /* __LP64__ */ + return (_err); } @@ -4508,9 +5239,10 @@ VNOP_ADVLOCK(struct vnode *vp, caddr_t id, int op, struct flock *fl, int flags, { int _err; struct vnop_advlock_args a; +#ifndef __LP64__ int thread_safe; int funnel_state = 0; - struct uthread * uth; +#endif /* __LP64__ */ a.a_desc = &vnop_advlock_desc; a.a_vp = vp; @@ -4519,12 +5251,14 @@ VNOP_ADVLOCK(struct vnode *vp, caddr_t id, int op, struct flock *fl, int flags, a.a_fl = fl; a.a_flags = flags; a.a_context = ctx; - thread_safe = THREAD_SAFE_FS(vp); - uth = get_bsdthread_info(current_thread()); +#ifndef __LP64__ + thread_safe = THREAD_SAFE_FS(vp); if (!thread_safe) { funnel_state = thread_funnel_set(kernel_flock, TRUE); } +#endif /* __LP64__ */ + /* Disallow advisory locking on non-seekable vnodes */ if (vnode_isfifo(vp)) { _err = err_advlock(&a); @@ -4537,9 +5271,13 @@ VNOP_ADVLOCK(struct vnode *vp, caddr_t id, int op, struct flock *fl, int flags, _err = (*vp->v_op[vnop_advlock_desc.vdesc_offset])(&a); } } + +#ifndef __LP64__ if (!thread_safe) { (void) thread_funnel_set(kernel_flock, funnel_state); } +#endif /* __LP64__ */ + return (_err); } @@ -4567,8 +5305,10 @@ VNOP_ALLOCATE(struct vnode *vp, off_t length, u_int32_t flags, off_t *bytesalloc { int _err; struct vnop_allocate_args a; +#ifndef __LP64__ int thread_safe; int funnel_state = 0; +#endif /* __LP64__ */ a.a_desc = &vnop_allocate_desc; a.a_vp = vp; @@ -4577,17 +5317,29 @@ VNOP_ALLOCATE(struct vnode *vp, off_t length, u_int32_t flags, off_t *bytesalloc a.a_bytesallocated = bytesallocated; a.a_offset = offset; a.a_context = ctx; - thread_safe = THREAD_SAFE_FS(vp); +#ifndef __LP64__ + thread_safe = THREAD_SAFE_FS(vp); if (!thread_safe) { if ( (_err = lock_fsnode(vp, &funnel_state)) ) { return (_err); } } +#endif /* __LP64__ */ + _err = (*vp->v_op[vnop_allocate_desc.vdesc_offset])(&a); +#if CONFIG_FSE + if (_err == 0) { + add_fsevent(FSE_STAT_CHANGED, ctx, FSE_ARG_VNODE, vp, FSE_ARG_DONE); + } +#endif + +#ifndef __LP64__ if (!thread_safe) { unlock_fsnode(vp, &funnel_state); } +#endif /* __LP64__ */ + return (_err); } @@ -4601,7 +5353,7 @@ struct vnop_pagein_args { struct vnodeop_desc *a_desc; vnode_t a_vp; upl_t a_pl; - vm_offset_t a_pl_offset; + upl_offset_t a_pl_offset; off_t a_f_offset; size_t a_size; int a_flags; @@ -4609,12 +5361,14 @@ struct vnop_pagein_args { }; #endif /* 0*/ errno_t -VNOP_PAGEIN(struct vnode *vp, upl_t pl, vm_offset_t pl_offset, off_t f_offset, size_t size, int flags, vfs_context_t ctx) +VNOP_PAGEIN(struct vnode *vp, upl_t pl, upl_offset_t pl_offset, off_t f_offset, size_t size, int flags, vfs_context_t ctx) { int _err; struct vnop_pagein_args a; +#ifndef __LP64__ int thread_safe; int funnel_state = 0; +#endif /* __LP64__ */ a.a_desc = &vnop_pagein_desc; a.a_vp = vp; @@ -4624,15 +5378,22 @@ VNOP_PAGEIN(struct vnode *vp, upl_t pl, vm_offset_t pl_offset, off_t f_offset, s a.a_size = size; a.a_flags = flags; a.a_context = ctx; - thread_safe = THREAD_SAFE_FS(vp); +#ifndef __LP64__ + thread_safe = THREAD_SAFE_FS(vp); if (!thread_safe) { funnel_state = thread_funnel_set(kernel_flock, TRUE); } +#endif /* __LP64__ */ + _err = (*vp->v_op[vnop_pagein_desc.vdesc_offset])(&a); + +#ifndef __LP64__ if (!thread_safe) { (void) thread_funnel_set(kernel_flock, funnel_state); } +#endif /* __LP64__ */ + return (_err); } @@ -4646,7 +5407,7 @@ struct vnop_pageout_args { struct vnodeop_desc *a_desc; vnode_t a_vp; upl_t a_pl; - vm_offset_t a_pl_offset; + upl_offset_t a_pl_offset; off_t a_f_offset; size_t a_size; int a_flags; @@ -4655,12 +5416,14 @@ struct vnop_pageout_args { #endif /* 0*/ errno_t -VNOP_PAGEOUT(struct vnode *vp, upl_t pl, vm_offset_t pl_offset, off_t f_offset, size_t size, int flags, vfs_context_t ctx) +VNOP_PAGEOUT(struct vnode *vp, upl_t pl, upl_offset_t pl_offset, off_t f_offset, size_t size, int flags, vfs_context_t ctx) { int _err; struct vnop_pageout_args a; +#ifndef __LP64__ int thread_safe; int funnel_state = 0; +#endif /* __LP64__ */ a.a_desc = &vnop_pageout_desc; a.a_vp = vp; @@ -4670,15 +5433,24 @@ VNOP_PAGEOUT(struct vnode *vp, upl_t pl, vm_offset_t pl_offset, off_t f_offset, a.a_size = size; a.a_flags = flags; a.a_context = ctx; - thread_safe = THREAD_SAFE_FS(vp); +#ifndef __LP64__ + thread_safe = THREAD_SAFE_FS(vp); if (!thread_safe) { funnel_state = thread_funnel_set(kernel_flock, TRUE); } +#endif /* __LP64__ */ + _err = (*vp->v_op[vnop_pageout_desc.vdesc_offset])(&a); + +#ifndef __LP64__ if (!thread_safe) { (void) thread_funnel_set(kernel_flock, funnel_state); } +#endif /* __LP64__ */ + + post_event_if_success(vp, _err, NOTE_WRITE); + return (_err); } @@ -4695,12 +5467,12 @@ struct vnop_searchfs_args { void *a_searchparams1; void *a_searchparams2; struct attrlist *a_searchattrs; - u_long a_maxmatches; + uint32_t a_maxmatches; struct timeval *a_timelimit; struct attrlist *a_returnattrs; - u_long *a_nummatches; - u_long a_scriptcode; - u_long a_options; + uint32_t *a_nummatches; + uint32_t a_scriptcode; + uint32_t a_options; struct uio *a_uio; struct searchstate *a_searchstate; vfs_context_t a_context; @@ -4708,12 +5480,14 @@ struct vnop_searchfs_args { #endif /* 0*/ errno_t -VNOP_SEARCHFS(struct vnode *vp, void *searchparams1, void *searchparams2, struct attrlist *searchattrs, u_long maxmatches, struct timeval *timelimit, struct attrlist *returnattrs, u_long *nummatches, u_long scriptcode, u_long options, struct uio *uio, struct searchstate *searchstate, vfs_context_t ctx) +VNOP_SEARCHFS(struct vnode *vp, void *searchparams1, void *searchparams2, struct attrlist *searchattrs, uint32_t maxmatches, struct timeval *timelimit, struct attrlist *returnattrs, uint32_t *nummatches, uint32_t scriptcode, uint32_t options, struct uio *uio, struct searchstate *searchstate, vfs_context_t ctx) { int _err; struct vnop_searchfs_args a; +#ifndef __LP64__ int thread_safe; int funnel_state = 0; +#endif /* __LP64__ */ a.a_desc = &vnop_searchfs_desc; a.a_vp = vp; @@ -4729,17 +5503,24 @@ VNOP_SEARCHFS(struct vnode *vp, void *searchparams1, void *searchparams2, struct a.a_uio = uio; a.a_searchstate = searchstate; a.a_context = ctx; - thread_safe = THREAD_SAFE_FS(vp); +#ifndef __LP64__ + thread_safe = THREAD_SAFE_FS(vp); if (!thread_safe) { if ( (_err = lock_fsnode(vp, &funnel_state)) ) { return (_err); } } +#endif /* __LP64__ */ + _err = (*vp->v_op[vnop_searchfs_desc.vdesc_offset])(&a); + +#ifndef __LP64__ if (!thread_safe) { unlock_fsnode(vp, &funnel_state); } +#endif /* __LP64__ */ + return (_err); } @@ -4785,8 +5566,10 @@ VNOP_GETXATTR(vnode_t vp, const char *name, uio_t uio, size_t *size, int options { struct vnop_getxattr_args a; int error; +#ifndef __LP64__ int thread_safe; int funnel_state = 0; +#endif /* __LP64__ */ a.a_desc = &vnop_getxattr_desc; a.a_vp = vp; @@ -4796,16 +5579,23 @@ VNOP_GETXATTR(vnode_t vp, const char *name, uio_t uio, size_t *size, int options a.a_options = options; a.a_context = ctx; +#ifndef __LP64__ thread_safe = THREAD_SAFE_FS(vp); if (!thread_safe) { if ( (error = lock_fsnode(vp, &funnel_state)) ) { return (error); } } +#endif /* __LP64__ */ + error = (*vp->v_op[vnop_getxattr_desc.vdesc_offset])(&a); + +#ifndef __LP64__ if (!thread_safe) { unlock_fsnode(vp, &funnel_state); } +#endif /* __LP64__ */ + return (error); } @@ -4814,8 +5604,10 @@ VNOP_SETXATTR(vnode_t vp, const char *name, uio_t uio, int options, vfs_context_ { struct vnop_setxattr_args a; int error; +#ifndef __LP64__ int thread_safe; int funnel_state = 0; +#endif /* __LP64__ */ a.a_desc = &vnop_setxattr_desc; a.a_vp = vp; @@ -4824,18 +5616,28 @@ VNOP_SETXATTR(vnode_t vp, const char *name, uio_t uio, int options, vfs_context_ a.a_options = options; a.a_context = ctx; +#ifndef __LP64__ thread_safe = THREAD_SAFE_FS(vp); if (!thread_safe) { if ( (error = lock_fsnode(vp, &funnel_state)) ) { return (error); } } +#endif /* __LP64__ */ + error = (*vp->v_op[vnop_setxattr_desc.vdesc_offset])(&a); + +#ifndef __LP64__ if (!thread_safe) { unlock_fsnode(vp, &funnel_state); } +#endif /* __LP64__ */ + if (error == 0) vnode_uncache_authorized_action(vp, KAUTH_INVALIDATE_CACHED_RIGHTS); + + post_event_if_success(vp, error, NOTE_ATTRIB); + return (error); } @@ -4844,8 +5646,10 @@ VNOP_REMOVEXATTR(vnode_t vp, const char *name, int options, vfs_context_t ctx) { struct vnop_removexattr_args a; int error; +#ifndef __LP64__ int thread_safe; int funnel_state = 0; +#endif /* __LP64__ */ a.a_desc = &vnop_removexattr_desc; a.a_vp = vp; @@ -4853,16 +5657,25 @@ VNOP_REMOVEXATTR(vnode_t vp, const char *name, int options, vfs_context_t ctx) a.a_options = options; a.a_context = ctx; +#ifndef __LP64__ thread_safe = THREAD_SAFE_FS(vp); if (!thread_safe) { if ( (error = lock_fsnode(vp, &funnel_state)) ) { return (error); } } +#endif /* __LP64__ */ + error = (*vp->v_op[vnop_removexattr_desc.vdesc_offset])(&a); + +#ifndef __LP64__ if (!thread_safe) { unlock_fsnode(vp, &funnel_state); } +#endif /* __LP64__ */ + + post_event_if_success(vp, error, NOTE_ATTRIB); + return (error); } @@ -4871,8 +5684,10 @@ VNOP_LISTXATTR(vnode_t vp, uio_t uio, size_t *size, int options, vfs_context_t c { struct vnop_listxattr_args a; int error; +#ifndef __LP64__ int thread_safe; int funnel_state = 0; +#endif /* __LP64__ */ a.a_desc = &vnop_listxattr_desc; a.a_vp = vp; @@ -4881,16 +5696,23 @@ VNOP_LISTXATTR(vnode_t vp, uio_t uio, size_t *size, int options, vfs_context_t c a.a_options = options; a.a_context = ctx; +#ifndef __LP64__ thread_safe = THREAD_SAFE_FS(vp); if (!thread_safe) { if ( (error = lock_fsnode(vp, &funnel_state)) ) { return (error); } } +#endif /* __LP64__ */ + error = (*vp->v_op[vnop_listxattr_desc.vdesc_offset])(&a); + +#ifndef __LP64__ if (!thread_safe) { unlock_fsnode(vp, &funnel_state); } +#endif /* __LP64__ */ + return (error); } @@ -4913,22 +5735,31 @@ VNOP_BLKTOOFF(struct vnode *vp, daddr64_t lblkno, off_t *offset) { int _err; struct vnop_blktooff_args a; +#ifndef __LP64__ int thread_safe; int funnel_state = 0; +#endif /* __LP64__ */ a.a_desc = &vnop_blktooff_desc; a.a_vp = vp; a.a_lblkno = lblkno; a.a_offset = offset; - thread_safe = THREAD_SAFE_FS(vp); +#ifndef __LP64__ + thread_safe = THREAD_SAFE_FS(vp); if (!thread_safe) { funnel_state = thread_funnel_set(kernel_flock, TRUE); } +#endif /* __LP64__ */ + _err = (*vp->v_op[vnop_blktooff_desc.vdesc_offset])(&a); + +#ifndef __LP64__ if (!thread_safe) { (void) thread_funnel_set(kernel_flock, funnel_state); } +#endif /* __LP64__ */ + return (_err); } @@ -4950,22 +5781,31 @@ VNOP_OFFTOBLK(struct vnode *vp, off_t offset, daddr64_t *lblkno) { int _err; struct vnop_offtoblk_args a; +#ifndef __LP64__ int thread_safe; int funnel_state = 0; +#endif /* __LP64__ */ a.a_desc = &vnop_offtoblk_desc; a.a_vp = vp; a.a_offset = offset; a.a_lblkno = lblkno; - thread_safe = THREAD_SAFE_FS(vp); +#ifndef __LP64__ + thread_safe = THREAD_SAFE_FS(vp); if (!thread_safe) { funnel_state = thread_funnel_set(kernel_flock, TRUE); } +#endif /* __LP64__ */ + _err = (*vp->v_op[vnop_offtoblk_desc.vdesc_offset])(&a); + +#ifndef __LP64__ if (!thread_safe) { (void) thread_funnel_set(kernel_flock, funnel_state); } +#endif /* __LP64__ */ + return (_err); } @@ -4992,8 +5832,10 @@ VNOP_BLOCKMAP(struct vnode *vp, off_t foffset, size_t size, daddr64_t *bpn, size { int _err; struct vnop_blockmap_args a; +#ifndef __LP64__ int thread_safe; int funnel_state = 0; +#endif /* __LP64__ */ if (ctx == NULL) { ctx = vfs_context_current(); @@ -5007,15 +5849,22 @@ VNOP_BLOCKMAP(struct vnode *vp, off_t foffset, size_t size, daddr64_t *bpn, size a.a_poff = poff; a.a_flags = flags; a.a_context = ctx; - thread_safe = THREAD_SAFE_FS(vp); +#ifndef __LP64__ + thread_safe = THREAD_SAFE_FS(vp); if (!thread_safe) { funnel_state = thread_funnel_set(kernel_flock, TRUE); } +#endif /* __LP64__ */ + _err = (*vp->v_op[vnop_blockmap_desc.vdesc_offset])(&a); + +#ifndef __LP64__ if (!thread_safe) { (void) thread_funnel_set(kernel_flock, funnel_state); } +#endif /* __LP64__ */ + return (_err); } @@ -5067,24 +5916,33 @@ VNOP_KQFILT_ADD(struct vnode *vp, struct knote *kn, vfs_context_t ctx) { int _err; struct vnop_kqfilt_add_args a; +#ifndef __LP64__ int thread_safe; int funnel_state = 0; +#endif /* __LP64__ */ a.a_desc = VDESC(vnop_kqfilt_add); a.a_vp = vp; a.a_kn = kn; a.a_context = ctx; - thread_safe = THREAD_SAFE_FS(vp); +#ifndef __LP64__ + thread_safe = THREAD_SAFE_FS(vp); if (!thread_safe) { if ( (_err = lock_fsnode(vp, &funnel_state)) ) { return (_err); } } +#endif /* __LP64__ */ + _err = (*vp->v_op[vnop_kqfilt_add_desc.vdesc_offset])(&a); + +#ifndef __LP64__ if (!thread_safe) { unlock_fsnode(vp, &funnel_state); } +#endif /* __LP64__ */ + return(_err); } @@ -5101,24 +5959,70 @@ VNOP_KQFILT_REMOVE(struct vnode *vp, uintptr_t ident, vfs_context_t ctx) { int _err; struct vnop_kqfilt_remove_args a; +#ifndef __LP64__ int thread_safe; int funnel_state = 0; +#endif /* __LP64__ */ a.a_desc = VDESC(vnop_kqfilt_remove); a.a_vp = vp; a.a_ident = ident; a.a_context = ctx; - thread_safe = THREAD_SAFE_FS(vp); +#ifndef __LP64__ + thread_safe = THREAD_SAFE_FS(vp); if (!thread_safe) { if ( (_err = lock_fsnode(vp, &funnel_state)) ) { return (_err); } } +#endif /* __LP64__ */ + _err = (*vp->v_op[vnop_kqfilt_remove_desc.vdesc_offset])(&a); + +#ifndef __LP64__ if (!thread_safe) { unlock_fsnode(vp, &funnel_state); } +#endif /* __LP64__ */ + + return(_err); +} + +errno_t +VNOP_MONITOR(vnode_t vp, uint32_t events, uint32_t flags, void *handle, vfs_context_t ctx) +{ + int _err; + struct vnop_monitor_args a; +#ifndef __LP64__ + int thread_safe; + int funnel_state = 0; +#endif /* __LP64__ */ + + a.a_desc = VDESC(vnop_monitor); + a.a_vp = vp; + a.a_events = events; + a.a_flags = flags; + a.a_handle = handle; + a.a_context = ctx; + +#ifndef __LP64__ + thread_safe = THREAD_SAFE_FS(vp); + if (!thread_safe) { + if ( (_err = lock_fsnode(vp, &funnel_state)) ) { + return (_err); + } + } +#endif /* __LP64__ */ + + _err = (*vp->v_op[vnop_monitor_desc.vdesc_offset])(&a); + +#ifndef __LP64__ + if (!thread_safe) { + unlock_fsnode(vp, &funnel_state); + } +#endif /* __LP64__ */ + return(_err); } @@ -5135,24 +6039,33 @@ VNOP_SETLABEL(struct vnode *vp, struct label *label, vfs_context_t ctx) { int _err; struct vnop_setlabel_args a; +#ifndef __LP64__ int thread_safe; int funnel_state = 0; +#endif /* __LP64__ */ a.a_desc = VDESC(vnop_setlabel); a.a_vp = vp; a.a_vl = label; a.a_context = ctx; - thread_safe = THREAD_SAFE_FS(vp); +#ifndef __LP64__ + thread_safe = THREAD_SAFE_FS(vp); if (!thread_safe) { if ( (_err = lock_fsnode(vp, &funnel_state)) ) { return (_err); } } +#endif /* __LP64__ */ + _err = (*vp->v_op[vnop_setlabel_desc.vdesc_offset])(&a); + +#ifndef __LP64__ if (!thread_safe) { unlock_fsnode(vp, &funnel_state); } +#endif /* __LP64__ */ + return(_err); } @@ -5166,8 +6079,11 @@ VNOP_GETNAMEDSTREAM(vnode_t vp, vnode_t *svpp, const char *name, enum nsoperatio { struct vnop_getnamedstream_args a; +#ifndef __LP64__ if (!THREAD_SAFE_FS(vp)) return (ENOTSUP); +#endif /* __LP64__ */ + a.a_desc = &vnop_getnamedstream_desc; a.a_vp = vp; a.a_svpp = svpp; @@ -5187,8 +6103,11 @@ VNOP_MAKENAMEDSTREAM(vnode_t vp, vnode_t *svpp, const char *name, int flags, vfs { struct vnop_makenamedstream_args a; +#ifndef __LP64__ if (!THREAD_SAFE_FS(vp)) return (ENOTSUP); +#endif /* __LP64__ */ + a.a_desc = &vnop_makenamedstream_desc; a.a_vp = vp; a.a_svpp = svpp; @@ -5208,8 +6127,11 @@ VNOP_REMOVENAMEDSTREAM(vnode_t vp, vnode_t svp, const char *name, int flags, vfs { struct vnop_removenamedstream_args a; +#ifndef __LP64__ if (!THREAD_SAFE_FS(vp)) return (ENOTSUP); +#endif /* __LP64__ */ + a.a_desc = &vnop_removenamedstream_desc; a.a_vp = vp; a.a_svp = svp; diff --git a/bsd/vfs/vfs_attrlist.c b/bsd/vfs/vfs_attrlist.c index dcc9e1b71..496b094b1 100644 --- a/bsd/vfs/vfs_attrlist.c +++ b/bsd/vfs/vfs_attrlist.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 1995-2007 Apple Inc. All rights reserved. + * Copyright (c) 1995-2008 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -66,6 +66,8 @@ struct _attrlist_buf { char *varcursor; ssize_t allocated; ssize_t needed; + attribute_set_t actual; + attribute_set_t valid; }; @@ -182,10 +184,11 @@ attrlist_pack_string(struct _attrlist_buf *ab, const char *source, ssize_t count #define ATTR_PACK_TIME(b, v, is64) \ do { \ if (is64) { \ - struct user_timespec us = {v.tv_sec, v.tv_nsec}; \ + struct user64_timespec us = {v.tv_sec, v.tv_nsec}; \ ATTR_PACK(&b, us); \ } else { \ - ATTR_PACK8(b, v); \ + struct user32_timespec us = {v.tv_sec, v.tv_nsec}; \ + ATTR_PACK(&b, us); \ } \ } while(0) @@ -220,8 +223,17 @@ static struct getvolattrlist_attrtab getvolattrlist_common_tab[] = { {ATTR_CMN_ACCESSMASK, 0, sizeof(uint32_t)}, {ATTR_CMN_FLAGS, 0, sizeof(uint32_t)}, {ATTR_CMN_USERACCESS, 0, sizeof(uint32_t)}, + {ATTR_CMN_EXTENDED_SECURITY, 0, sizeof(struct attrreference)}, + {ATTR_CMN_UUID, 0, sizeof(guid_t)}, + {ATTR_CMN_GRPUUID, 0, sizeof(guid_t)}, + {ATTR_CMN_FILEID, 0, sizeof(uint64_t)}, + {ATTR_CMN_PARENTID, 0, sizeof(uint64_t)}, + {ATTR_CMN_RETURNED_ATTRS, 0, sizeof(attribute_set_t)}, {0, 0, 0} }; +#define ATTR_CMN_VOL_INVALID \ + (ATTR_CMN_EXTENDED_SECURITY | ATTR_CMN_UUID | ATTR_CMN_GRPUUID | \ + ATTR_CMN_FILEID | ATTR_CMN_PARENTID) static struct getvolattrlist_attrtab getvolattrlist_vol_tab[] = { {ATTR_VOL_FSTYPE, 0, sizeof(uint32_t)}, @@ -242,6 +254,7 @@ static struct getvolattrlist_attrtab getvolattrlist_vol_tab[] = { {ATTR_VOL_MOUNTEDDEVICE, 0, sizeof(struct attrreference)}, {ATTR_VOL_ENCODINGSUSED, 0, sizeof(uint64_t)}, {ATTR_VOL_CAPABILITIES, VFSATTR_BIT(f_capabilities), sizeof(vol_capabilities_attr_t)}, + {ATTR_VOL_UUID, VFSATTR_BIT(f_uuid), sizeof(uuid_t)}, {ATTR_VOL_ATTRIBUTES, VFSATTR_BIT(f_attributes), sizeof(vol_attributes_attr_t)}, {ATTR_VOL_INFO, 0, 0}, {0, 0, 0} @@ -261,9 +274,9 @@ getvolattrlist_parsetab(struct getvolattrlist_attrtab *tab, attrgroup_t attrs, s vsp->f_active |= tab->bits; if (tab->size == ATTR_TIME_SIZE) { if (is_64bit) { - *sizep += sizeof(struct user_timespec); + *sizep += sizeof(struct user64_timespec); } else { - *sizep += sizeof(struct timespec); + *sizep += sizeof(struct user32_timespec); } } else { *sizep += tab->size; @@ -290,9 +303,17 @@ getvolattrlist_setupvfsattr(struct attrlist *alp, struct vfs_attr *vsp, ssize_t * Parse the above tables. */ *sizep = sizeof(uint32_t); /* length count */ - if (alp->commonattr && - (error = getvolattrlist_parsetab(getvolattrlist_common_tab, alp->commonattr, vsp, sizep, is_64bit)) != 0) - return(error); + if (alp->commonattr) { + if ((alp->commonattr & ATTR_CMN_VOL_INVALID) && + (alp->commonattr & ATTR_CMN_RETURNED_ATTRS) == 0) { + return (EINVAL); + } + if ((error = getvolattrlist_parsetab(getvolattrlist_common_tab, + alp->commonattr, vsp, sizep, + is_64bit)) != 0) { + return(error); + } + } if (alp->volattr && (error = getvolattrlist_parsetab(getvolattrlist_vol_tab, alp->volattr, vsp, sizep, is_64bit)) != 0) return(error); @@ -300,6 +321,38 @@ getvolattrlist_setupvfsattr(struct attrlist *alp, struct vfs_attr *vsp, ssize_t return(0); } +/* + * Given the attributes listed in asp and those supported + * in the vsp, fixup the asp attributes to reflect any + * missing attributes from the file system + */ +static void +getvolattrlist_fixupattrs(attribute_set_t *asp, struct vfs_attr *vsp) +{ + struct getvolattrlist_attrtab *tab; + + if (asp->commonattr) { + tab = getvolattrlist_common_tab; + do { + if ((tab->attr & asp->commonattr) && + (tab->bits != 0) && + ((tab->bits & vsp->f_supported) == 0)) { + asp->commonattr &= ~tab->attr; + } + } while ((++tab)->attr != 0); + } + if (asp->volattr) { + tab = getvolattrlist_vol_tab; + do { + if ((tab->attr & asp->volattr) && + (tab->bits != 0) && + ((tab->bits & vsp->f_supported) == 0)) { + asp->volattr &= ~tab->attr; + } + } while ((++tab)->attr != 0); + } +} + /* * Table-driven setup for all valid common/dir/file/fork attributes against files. */ @@ -310,6 +363,11 @@ struct getattrlist_attrtab { ssize_t size; kauth_action_t action; }; + +/* + * A zero after the ATTR_ bit indicates that we don't expect the underlying FS to report back with this + * information, and we will synthesize it at the VFS level. + */ static struct getattrlist_attrtab getattrlist_common_tab[] = { {ATTR_CMN_NAME, VATTR_BIT(va_name), sizeof(struct attrreference), KAUTH_VNODE_READ_ATTRIBUTES}, {ATTR_CMN_DEVID, 0, sizeof(dev_t), KAUTH_VNODE_READ_ATTRIBUTES}, @@ -336,13 +394,14 @@ static struct getattrlist_attrtab getattrlist_common_tab[] = { {ATTR_CMN_GRPUUID, VATTR_BIT(va_guuid), sizeof(guid_t), KAUTH_VNODE_READ_ATTRIBUTES}, {ATTR_CMN_FILEID, VATTR_BIT(va_fileid), sizeof(uint64_t), KAUTH_VNODE_READ_ATTRIBUTES}, {ATTR_CMN_PARENTID, VATTR_BIT(va_parentid), sizeof(uint64_t), KAUTH_VNODE_READ_ATTRIBUTES}, + {ATTR_CMN_FULLPATH, 0, sizeof(struct attrreference), KAUTH_VNODE_READ_ATTRIBUTES }, + {ATTR_CMN_RETURNED_ATTRS, 0, sizeof(attribute_set_t), 0}, {0, 0, 0, 0} }; + static struct getattrlist_attrtab getattrlist_dir_tab[] = { {ATTR_DIR_LINKCOUNT, VATTR_BIT(va_dirlinkcount), sizeof(uint32_t), KAUTH_VNODE_READ_ATTRIBUTES}, {ATTR_DIR_ENTRYCOUNT, VATTR_BIT(va_nchildren), sizeof(uint32_t), KAUTH_VNODE_READ_ATTRIBUTES}, - /* ATTR_DIR_ENTRYCOUNT falls back to va_nlink-2 if va_nchildren isn't supported, so request va_nlink just in case */ - {ATTR_DIR_ENTRYCOUNT, VATTR_BIT(va_nlink), 0, KAUTH_VNODE_READ_ATTRIBUTES}, {ATTR_DIR_MOUNTSTATUS, 0, sizeof(uint32_t), KAUTH_VNODE_READ_ATTRIBUTES}, {0, 0, 0, 0} }; @@ -370,18 +429,18 @@ static struct getattrlist_attrtab getattrlist_file_tab[] = { ATTR_VOL_ALLOCATIONCLUMP | ATTR_VOL_IOBLOCKSIZE | \ ATTR_VOL_MOUNTPOINT | ATTR_VOL_MOUNTFLAGS | \ ATTR_VOL_MOUNTEDDEVICE | ATTR_VOL_CAPABILITIES | \ - ATTR_VOL_ATTRIBUTES) + ATTR_VOL_ATTRIBUTES | ATTR_VOL_ENCODINGSUSED) #define VFS_DFLT_ATTR_CMN (ATTR_CMN_NAME | ATTR_CMN_DEVID | \ ATTR_CMN_FSID | ATTR_CMN_OBJTYPE | \ ATTR_CMN_OBJTAG | ATTR_CMN_OBJID | \ ATTR_CMN_PAROBJID | ATTR_CMN_SCRIPT | \ ATTR_CMN_MODTIME | ATTR_CMN_CHGTIME | \ - ATTR_CMN_ACCTIME | ATTR_CMN_FNDRINFO | \ + ATTR_CMN_FNDRINFO | \ ATTR_CMN_OWNERID | ATTR_CMN_GRPID | \ ATTR_CMN_ACCESSMASK | ATTR_CMN_FLAGS | \ ATTR_CMN_USERACCESS | ATTR_CMN_FILEID | \ - ATTR_CMN_PARENTID) + ATTR_CMN_PARENTID | ATTR_CMN_RETURNED_ATTRS) #define VFS_DFLT_ATTR_DIR (ATTR_DIR_LINKCOUNT | ATTR_DIR_MOUNTSTATUS) @@ -405,14 +464,16 @@ getattrlist_parsetab(struct getattrlist_attrtab *tab, attrgroup_t attrs, struct vap->va_active |= tab->bits; if (tab->size == ATTR_TIME_SIZE) { if (is_64bit) { - *sizep += sizeof(struct user_timespec); + *sizep += sizeof(struct user64_timespec); } else { - *sizep += sizeof(struct timespec); + *sizep += sizeof(struct user32_timespec); } } else { *sizep += tab->size; } *actionp |= tab->action; + if (attrs == recognised) + break; /* all done, get out */ } } while ((++tab)->attr != 0); @@ -449,6 +510,48 @@ getattrlist_setupvattr(struct attrlist *alp, struct vnode_attr *vap, ssize_t *si return(0); } +/* + * Given the attributes listed in asp and those supported + * in the vap, fixup the asp attributes to reflect any + * missing attributes from the file system + */ +static void +getattrlist_fixupattrs(attribute_set_t *asp, struct vnode_attr *vap) +{ + struct getattrlist_attrtab *tab; + + if (asp->commonattr) { + tab = getattrlist_common_tab; + do { + if ((tab->attr & asp->commonattr) && + (tab->bits & vap->va_active) && + (tab->bits & vap->va_supported) == 0) { + asp->commonattr &= ~tab->attr; + } + } while ((++tab)->attr != 0); + } + if (asp->dirattr) { + tab = getattrlist_dir_tab; + do { + if ((tab->attr & asp->dirattr) && + (tab->bits & vap->va_active) && + (vap->va_supported & tab->bits) == 0) { + asp->dirattr &= ~tab->attr; + } + } while ((++tab)->attr != 0); + } + if (asp->fileattr) { + tab = getattrlist_file_tab; + do { + if ((tab->attr & asp->fileattr) && + (tab->bits & vap->va_active) && + (vap->va_supported & tab->bits) == 0) { + asp->fileattr &= ~tab->attr; + } + } while ((++tab)->attr != 0); + } +} + static int setattrlist_setfinderinfo(vnode_t vp, char *fndrinfo, struct vfs_context *ctx) { @@ -512,7 +615,8 @@ getattrlist_findnamecomp(const char *mn, const char **np, ssize_t *nl) static int -getvolattrlist(vnode_t vp, struct getattrlist_args *uap, struct attrlist *alp, vfs_context_t ctx, int is_64bit) +getvolattrlist(vnode_t vp, struct getattrlist_args *uap, struct attrlist *alp, + vfs_context_t ctx, int is_64bit) { struct vfs_attr vs; struct vnode_attr va; @@ -521,7 +625,10 @@ getvolattrlist(vnode_t vp, struct getattrlist_args *uap, struct attrlist *alp, ssize_t fixedsize, varsize; const char *cnp = NULL; /* protected by ATTR_CMN_NAME */ ssize_t cnl = 0; /* protected by ATTR_CMN_NAME */ + int release_str = 0; mount_t mnt; + int return_valid; + int pack_invalid; ab.base = NULL; VATTR_INIT(&va); @@ -529,7 +636,21 @@ getvolattrlist(vnode_t vp, struct getattrlist_args *uap, struct attrlist *alp, vs.f_vol_name = NULL; mnt = vp->v_mount; - + /* Check for special packing semantics */ + return_valid = (alp->commonattr & ATTR_CMN_RETURNED_ATTRS); + pack_invalid = (uap->options & FSOPT_PACK_INVAL_ATTRS); + if (pack_invalid) { + /* FSOPT_PACK_INVAL_ATTRS requires ATTR_CMN_RETURNED_ATTRS */ + if (!return_valid) { + error = EINVAL; + goto out; + } + /* Keep invalid attrs from being uninitialized */ + bzero(&vs, sizeof (vs)); + /* Generate a valid mask for post processing */ + bcopy(&alp->commonattr, &ab.valid, sizeof (attribute_set_t)); + } + /* * For now, the vnode must be the root of its filesystem. * To relax this, we need to be able to find the root vnode of a filesystem @@ -540,7 +661,7 @@ getvolattrlist(vnode_t vp, struct getattrlist_args *uap, struct attrlist *alp, VFS_DEBUG(ctx, vp, "ATTRLIST - ERROR: volume attributes requested but not the root of a filesystem"); goto out; } - + /* * Set up the vfs_attr structure and call the filesystem. */ @@ -636,11 +757,21 @@ getvolattrlist(vnode_t vp, struct getattrlist_args *uap, struct attrlist *alp, /* check to see if our fixups were enough */ if (!VFSATTR_ALL_SUPPORTED(&vs)) { - error = EINVAL; - VFS_DEBUG(ctx, vp, "ATTRLIST - ERROR: could not get all requested volume attributes"); - VFS_DEBUG(ctx, vp, "ATTRLIST - wanted %016llx got %016llx missing %016llx", - vs.f_active, vs.f_supported, vs.f_active & ~vs.f_supported); - goto out; + if (return_valid) { + if (pack_invalid) { + /* Fix up valid mask for post processing */ + getvolattrlist_fixupattrs(&ab.valid, &vs); + + /* Force packing of everything asked for */ + vs.f_supported = vs.f_active; + } else { + /* Adjust the requested attributes */ + getvolattrlist_fixupattrs((attribute_set_t *)&alp->commonattr, &vs); + } + } else { + error = EINVAL; + goto out; + } } } } @@ -660,8 +791,15 @@ getvolattrlist(vnode_t vp, struct getattrlist_args *uap, struct attrlist *alp, goto out; } - if (VATTR_IS_ACTIVE(&va, va_encoding) && !VATTR_IS_SUPPORTED(&va, va_encoding)) - VATTR_RETURN(&va, va_encoding, 0x7e /* kTextEncodingMacUnicode */); + if (VATTR_IS_ACTIVE(&va, va_encoding) && + !VATTR_IS_SUPPORTED(&va, va_encoding)) { + if (!return_valid || pack_invalid) + /* use kTextEncodingMacUnicode */ + VATTR_RETURN(&va, va_encoding, 0x7e); + else + /* don't use a default */ + alp->commonattr &= ~ATTR_CMN_SCRIPT; + } } /* @@ -682,6 +820,9 @@ getvolattrlist(vnode_t vp, struct getattrlist_args *uap, struct attrlist *alp, /* just use "/" as name */ cnp = &vp->v_mount->mnt_vfsstat.f_mntonname[0]; } + else { + release_str = 1; + } cnl = strlen(cnp); } else { @@ -721,67 +862,112 @@ getvolattrlist(vnode_t vp, struct getattrlist_args *uap, struct attrlist *alp, * Pack results into the destination buffer. */ ab.fixedcursor = ab.base + sizeof(uint32_t); + if (return_valid) { + ab.fixedcursor += sizeof (attribute_set_t); + bzero(&ab.actual, sizeof (ab.actual)); + } ab.varcursor = ab.base + fixedsize; ab.needed = fixedsize + varsize; /* common attributes **************************************************/ - if (alp->commonattr & ATTR_CMN_NAME) + if (alp->commonattr & ATTR_CMN_NAME) { attrlist_pack_string(&ab, cnp, cnl); - if (alp->commonattr & ATTR_CMN_DEVID) + ab.actual.commonattr |= ATTR_CMN_NAME; + } + if (alp->commonattr & ATTR_CMN_DEVID) { ATTR_PACK4(ab, mnt->mnt_vfsstat.f_fsid.val[0]); - if (alp->commonattr & ATTR_CMN_FSID) + ab.actual.commonattr |= ATTR_CMN_DEVID; + } + if (alp->commonattr & ATTR_CMN_FSID) { ATTR_PACK8(ab, mnt->mnt_vfsstat.f_fsid); - if (alp->commonattr & ATTR_CMN_OBJTYPE) - ATTR_PACK4(ab, 0); - if (alp->commonattr & ATTR_CMN_OBJTAG) + ab.actual.commonattr |= ATTR_CMN_FSID; + } + if (alp->commonattr & ATTR_CMN_OBJTYPE) { + if (!return_valid || pack_invalid) + ATTR_PACK4(ab, 0); + } + if (alp->commonattr & ATTR_CMN_OBJTAG) { ATTR_PACK4(ab, vp->v_tag); + ab.actual.commonattr |= ATTR_CMN_OBJTAG; + } if (alp->commonattr & ATTR_CMN_OBJID) { - fsobj_id_t f = {0, 0}; - ATTR_PACK8(ab, f); + if (!return_valid || pack_invalid) { + fsobj_id_t f = {0, 0}; + ATTR_PACK8(ab, f); + } } if (alp->commonattr & ATTR_CMN_OBJPERMANENTID) { - fsobj_id_t f = {0, 0}; - ATTR_PACK8(ab, f); + if (!return_valid || pack_invalid) { + fsobj_id_t f = {0, 0}; + ATTR_PACK8(ab, f); + } } if (alp->commonattr & ATTR_CMN_PAROBJID) { - fsobj_id_t f = {0, 0}; - ATTR_PACK8(ab, f); + if (!return_valid || pack_invalid) { + fsobj_id_t f = {0, 0}; + ATTR_PACK8(ab, f); + } } /* note that this returns the encoding for the volume name, not the node name */ - if (alp->commonattr & ATTR_CMN_SCRIPT) + if (alp->commonattr & ATTR_CMN_SCRIPT) { ATTR_PACK4(ab, va.va_encoding); - if (alp->commonattr & ATTR_CMN_CRTIME) + ab.actual.commonattr |= ATTR_CMN_SCRIPT; + } + if (alp->commonattr & ATTR_CMN_CRTIME) { ATTR_PACK_TIME(ab, vs.f_create_time, is_64bit); - if (alp->commonattr & ATTR_CMN_MODTIME) - ATTR_PACK_TIME(ab, vs.f_modify_time, is_64bit); - if (alp->commonattr & ATTR_CMN_CHGTIME) + ab.actual.commonattr |= ATTR_CMN_CRTIME; + } + if (alp->commonattr & ATTR_CMN_MODTIME) { ATTR_PACK_TIME(ab, vs.f_modify_time, is_64bit); - if (alp->commonattr & ATTR_CMN_ACCTIME) + ab.actual.commonattr |= ATTR_CMN_MODTIME; + } + if (alp->commonattr & ATTR_CMN_CHGTIME) { + if (!return_valid || pack_invalid) + ATTR_PACK_TIME(ab, vs.f_modify_time, is_64bit); + } + if (alp->commonattr & ATTR_CMN_ACCTIME) { ATTR_PACK_TIME(ab, vs.f_access_time, is_64bit); - if (alp->commonattr & ATTR_CMN_BKUPTIME) + ab.actual.commonattr |= ATTR_CMN_ACCTIME; + } + if (alp->commonattr & ATTR_CMN_BKUPTIME) { ATTR_PACK_TIME(ab, vs.f_backup_time, is_64bit); + ab.actual.commonattr |= ATTR_CMN_BKUPTIME; + } if (alp->commonattr & ATTR_CMN_FNDRINFO) { char f[32]; /* * This attribute isn't really Finder Info, at least for HFS. */ if (vp->v_tag == VT_HFS) { - if ((error = VNOP_IOCTL(vp, HFS_GET_BOOT_INFO, (caddr_t)&f, 0, ctx)) != 0) + error = VNOP_IOCTL(vp, HFS_GET_BOOT_INFO, (caddr_t)&f, 0, ctx); + if (error == 0) { + attrlist_pack_fixed(&ab, f, sizeof(f)); + ab.actual.commonattr |= ATTR_CMN_FNDRINFO; + } else if (!return_valid) { goto out; - } else { + } + } else if (!return_valid || pack_invalid) { /* XXX we could at least pass out the volume UUID here */ bzero(&f, sizeof(f)); + attrlist_pack_fixed(&ab, f, sizeof(f)); } - attrlist_pack_fixed(&ab, f, sizeof(f)); } - if (alp->commonattr & ATTR_CMN_OWNERID) + if (alp->commonattr & ATTR_CMN_OWNERID) { ATTR_PACK4(ab, va.va_uid); - if (alp->commonattr & ATTR_CMN_GRPID) + ab.actual.commonattr |= ATTR_CMN_OWNERID; + } + if (alp->commonattr & ATTR_CMN_GRPID) { ATTR_PACK4(ab, va.va_gid); - if (alp->commonattr & ATTR_CMN_ACCESSMASK) + ab.actual.commonattr |= ATTR_CMN_GRPID; + } + if (alp->commonattr & ATTR_CMN_ACCESSMASK) { ATTR_PACK_CAST(&ab, uint32_t, va.va_mode); - if (alp->commonattr & ATTR_CMN_FLAGS) + ab.actual.commonattr |= ATTR_CMN_ACCESSMASK; + } + if (alp->commonattr & ATTR_CMN_FLAGS) { ATTR_PACK4(ab, va.va_flags); + ab.actual.commonattr |= ATTR_CMN_FLAGS; + } if (alp->commonattr & ATTR_CMN_USERACCESS) { /* XXX this is expensive and also duplicate work */ uint32_t perms = 0; if (vnode_isdir(vp)) { @@ -818,44 +1004,97 @@ getvolattrlist(vnode_t vp, struct getattrlist_args *uap, struct attrlist *alp, #endif /* MAC */ KAUTH_DEBUG("ATTRLIST - returning user access %x", perms); ATTR_PACK4(ab, perms); + ab.actual.commonattr |= ATTR_CMN_USERACCESS; + } + /* + * The following common volume attributes are only + * packed when the pack_invalid mode is enabled. + */ + if (pack_invalid) { + uint64_t fid = 0; + + if (alp->commonattr & ATTR_CMN_EXTENDED_SECURITY) + attrlist_pack_variable(&ab, NULL, 0); + if (alp->commonattr & ATTR_CMN_UUID) + ATTR_PACK(&ab, kauth_null_guid); + if (alp->commonattr & ATTR_CMN_GRPUUID) + ATTR_PACK(&ab, kauth_null_guid); + if (alp->commonattr & ATTR_CMN_FILEID) + ATTR_PACK8(ab, fid); + if (alp->commonattr & ATTR_CMN_PARENTID) + ATTR_PACK8(ab, fid); } /* volume attributes **************************************************/ - if (alp->volattr & ATTR_VOL_FSTYPE) + if (alp->volattr & ATTR_VOL_FSTYPE) { ATTR_PACK_CAST(&ab, uint32_t, vfs_typenum(mnt)); - if (alp->volattr & ATTR_VOL_SIGNATURE) + ab.actual.volattr |= ATTR_VOL_FSTYPE; + } + if (alp->volattr & ATTR_VOL_SIGNATURE) { ATTR_PACK_CAST(&ab, uint32_t, vs.f_signature); - if (alp->volattr & ATTR_VOL_SIZE) + ab.actual.volattr |= ATTR_VOL_SIGNATURE; + } + if (alp->volattr & ATTR_VOL_SIZE) { ATTR_PACK_CAST(&ab, off_t, vs.f_bsize * vs.f_blocks); - if (alp->volattr & ATTR_VOL_SPACEFREE) + ab.actual.volattr |= ATTR_VOL_SIZE; + } + if (alp->volattr & ATTR_VOL_SPACEFREE) { ATTR_PACK_CAST(&ab, off_t, vs.f_bsize * vs.f_bfree); - if (alp->volattr & ATTR_VOL_SPACEAVAIL) + ab.actual.volattr |= ATTR_VOL_SPACEFREE; + } + if (alp->volattr & ATTR_VOL_SPACEAVAIL) { ATTR_PACK_CAST(&ab, off_t, vs.f_bsize * vs.f_bavail); - if (alp->volattr & ATTR_VOL_MINALLOCATION) + ab.actual.volattr |= ATTR_VOL_SPACEAVAIL; + } + if (alp->volattr & ATTR_VOL_MINALLOCATION) { ATTR_PACK_CAST(&ab, off_t, vs.f_bsize); - if (alp->volattr & ATTR_VOL_ALLOCATIONCLUMP) + ab.actual.volattr |= ATTR_VOL_MINALLOCATION; + } + if (alp->volattr & ATTR_VOL_ALLOCATIONCLUMP) { ATTR_PACK_CAST(&ab, off_t, vs.f_bsize); /* not strictly true */ - if (alp->volattr & ATTR_VOL_IOBLOCKSIZE) + ab.actual.volattr |= ATTR_VOL_ALLOCATIONCLUMP; + } + if (alp->volattr & ATTR_VOL_IOBLOCKSIZE) { ATTR_PACK_CAST(&ab, uint32_t, vs.f_iosize); - if (alp->volattr & ATTR_VOL_OBJCOUNT) + ab.actual.volattr |= ATTR_VOL_IOBLOCKSIZE; + } + if (alp->volattr & ATTR_VOL_OBJCOUNT) { ATTR_PACK_CAST(&ab, uint32_t, vs.f_objcount); - if (alp->volattr & ATTR_VOL_FILECOUNT) + ab.actual.volattr |= ATTR_VOL_OBJCOUNT; + } + if (alp->volattr & ATTR_VOL_FILECOUNT) { ATTR_PACK_CAST(&ab, uint32_t, vs.f_filecount); - if (alp->volattr & ATTR_VOL_DIRCOUNT) + ab.actual.volattr |= ATTR_VOL_FILECOUNT; + } + if (alp->volattr & ATTR_VOL_DIRCOUNT) { ATTR_PACK_CAST(&ab, uint32_t, vs.f_dircount); - if (alp->volattr & ATTR_VOL_MAXOBJCOUNT) + ab.actual.volattr |= ATTR_VOL_DIRCOUNT; + } + if (alp->volattr & ATTR_VOL_MAXOBJCOUNT) { ATTR_PACK_CAST(&ab, uint32_t, vs.f_maxobjcount); - if (alp->volattr & ATTR_VOL_MOUNTPOINT) + ab.actual.volattr |= ATTR_VOL_MAXOBJCOUNT; + } + if (alp->volattr & ATTR_VOL_MOUNTPOINT) { attrlist_pack_string(&ab, mnt->mnt_vfsstat.f_mntonname, 0); - if (alp->volattr & ATTR_VOL_NAME) + ab.actual.volattr |= ATTR_VOL_MOUNTPOINT; + } + if (alp->volattr & ATTR_VOL_NAME) { attrlist_pack_string(&ab, vs.f_vol_name, 0); - if (alp->volattr & ATTR_VOL_MOUNTFLAGS) + ab.actual.volattr |= ATTR_VOL_NAME; + } + if (alp->volattr & ATTR_VOL_MOUNTFLAGS) { ATTR_PACK_CAST(&ab, uint32_t, mnt->mnt_flag); - if (alp->volattr & ATTR_VOL_MOUNTEDDEVICE) + ab.actual.volattr |= ATTR_VOL_MOUNTFLAGS; + } + if (alp->volattr & ATTR_VOL_MOUNTEDDEVICE) { attrlist_pack_string(&ab, mnt->mnt_vfsstat.f_mntfromname, 0); - if (alp->volattr & ATTR_VOL_ENCODINGSUSED) - ATTR_PACK_CAST(&ab, uint64_t, ~0LL); /* return all encodings */ + ab.actual.volattr |= ATTR_VOL_MOUNTEDDEVICE; + } + if (alp->volattr & ATTR_VOL_ENCODINGSUSED) { + if (!return_valid || pack_invalid) + ATTR_PACK_CAST(&ab, uint64_t, ~0LL); /* return all encodings */ + } if (alp->volattr & ATTR_VOL_CAPABILITIES) { /* fix up volume capabilities */ if (vfs_extendedsecurity(mnt)) { @@ -865,6 +1104,10 @@ getvolattrlist(vnode_t vp, struct getattrlist_args *uap, struct attrlist *alp, } vs.f_capabilities.valid[VOL_CAPABILITIES_INTERFACES] |= VOL_CAP_INT_EXTENDED_SECURITY; ATTR_PACK(&ab, vs.f_capabilities); + ab.actual.volattr |= ATTR_VOL_CAPABILITIES; + } + if (alp->volattr & ATTR_VOL_UUID) { + ATTR_PACK(&ab, vs.f_uuid); } if (alp->volattr & ATTR_VOL_ATTRIBUTES) { /* fix up volume attribute information */ @@ -881,14 +1124,15 @@ getvolattrlist(vnode_t vp, struct getattrlist_args *uap, struct attrlist *alp, vs.f_attributes.nativeattr.commonattr &= ~(ATTR_CMN_EXTENDED_SECURITY | ATTR_CMN_UUID | ATTR_CMN_GRPUUID); } ATTR_PACK(&ab, vs.f_attributes); + ab.actual.volattr |= ATTR_VOL_ATTRIBUTES; } /* diagnostic */ - if ((ab.fixedcursor - ab.base) != fixedsize) - panic("packed field size mismatch; allocated %ld but packed %d for common %08x vol %08x", - fixedsize, ab.fixedcursor - ab.base, alp->commonattr, alp->volattr); - if (ab.varcursor != (ab.base + ab.needed)) - panic("packed variable field size mismatch; used %d but expected %ld", ab.varcursor - ab.base, ab.needed); + if (!return_valid && (ab.fixedcursor - ab.base) != fixedsize) + panic("packed field size mismatch; allocated %ld but packed %ld for common %08x vol %08x", + fixedsize, (long) (ab.fixedcursor - ab.base), alp->commonattr, alp->volattr); + if (!return_valid && ab.varcursor != (ab.base + ab.needed)) + panic("packed variable field size mismatch; used %ld but expected %ld", (long) (ab.varcursor - ab.base), ab.needed); /* * In the compatible case, we report the smaller of the required and returned sizes. @@ -898,11 +1142,24 @@ getvolattrlist(vnode_t vp, struct getattrlist_args *uap, struct attrlist *alp, */ *(uint32_t *)ab.base = (uap->options & FSOPT_REPORT_FULLSIZE) ? ab.needed : imin(ab.allocated, ab.needed); + /* Return attribute set output if requested. */ + if (return_valid) { + ab.actual.commonattr |= ATTR_CMN_RETURNED_ATTRS; + if (pack_invalid) { + /* Only report the attributes that are valid */ + ab.actual.commonattr &= ab.valid.commonattr; + ab.actual.volattr &= ab.valid.volattr; + } + bcopy(&ab.actual, ab.base + sizeof(uint32_t), sizeof (ab.actual)); + } error = copyout(ab.base, uap->attributeBuffer, ab.allocated); out: if (vs.f_vol_name != NULL) kfree(vs.f_vol_name, MAXPATHLEN); + if (release_str) { + vnode_putname(cnp); + } if (ab.base != NULL) FREE(ab.base, M_TEMP); VFS_DEBUG(ctx, vp, "ATTRLIST - returning %d", error); @@ -912,46 +1169,34 @@ getvolattrlist(vnode_t vp, struct getattrlist_args *uap, struct attrlist *alp, /* * Obtain attribute information about a filesystem object. */ -int -getattrlist(proc_t p, struct getattrlist_args *uap, __unused register_t *retval) + +static int +getattrlist_internal(vnode_t vp, struct getattrlist_args *uap, proc_t p, vfs_context_t ctx) { struct attrlist al; struct vnode_attr va; - struct vfs_context *ctx; - struct nameidata nd; struct _attrlist_buf ab; - vnode_t vp; - u_long nameiflags; kauth_action_t action; ssize_t fixedsize, varsize; const char *cnp; const char *vname = NULL; + char *fullpathptr; + ssize_t fullpathlen; ssize_t cnl; int proc_is64; int error; + int return_valid; + int pack_invalid; + int vtype = 0; - ctx = vfs_context_current(); - vp = NULL; - error = 0; proc_is64 = proc_is64bit(p); VATTR_INIT(&va); va.va_name = NULL; ab.base = NULL; cnp = "unknown"; cnl = 0; - - /* - * Look up the file. - */ - nameiflags = NOTRIGGER | AUDITVNPATH1; - if (!(uap->options & FSOPT_NOFOLLOW)) - nameiflags |= FOLLOW; - NDINIT(&nd, LOOKUP, nameiflags, UIO_USERSPACE, uap->path, ctx); - - if ((error = namei(&nd)) != 0) - goto out; - vp = nd.ni_vp; - nameidone(&nd); + fullpathptr = NULL; + fullpathlen = 0; /* * Fetch the attribute request. @@ -966,7 +1211,7 @@ getattrlist(proc_t p, struct getattrlist_args *uap, __unused register_t *retval) VFS_DEBUG(ctx, vp, "%p ATTRLIST - %s request common %08x vol %08x file %08x dir %08x fork %08x %sfollow on '%s'", vp, p->p_comm, al.commonattr, al.volattr, al.fileattr, al.dirattr, al.forkattr, (uap->options & FSOPT_NOFOLLOW) ? "no":"", vp->v_name); - + #if CONFIG_MACF error = mac_vnode_check_getattrlist(ctx, vp, &al); if (error) @@ -988,10 +1233,31 @@ getattrlist(proc_t p, struct getattrlist_args *uap, __unused register_t *retval) goto out; } + /* Check for special packing semantics */ + return_valid = (al.commonattr & ATTR_CMN_RETURNED_ATTRS); + pack_invalid = (uap->options & FSOPT_PACK_INVAL_ATTRS); + if (pack_invalid) { + /* FSOPT_PACK_INVAL_ATTRS requires ATTR_CMN_RETURNED_ATTRS */ + if (!return_valid || al.forkattr) { + error = EINVAL; + goto out; + } + /* Keep invalid attrs from being uninitialized */ + bzero(&va, sizeof (va)); + /* Generate a valid mask for post processing */ + bcopy(&al.commonattr, &ab.valid, sizeof (attribute_set_t)); + } + + /* Pick up the vnode type. If the FS is bad and changes vnode types on us, we + * will have a valid snapshot that we can work from here. + */ + vtype = vp->v_type; + + /* * Set up the vnode_attr structure and authorise. */ - if ((error = getattrlist_setupvattr(&al, &va, &fixedsize, &action, proc_is64, vnode_isdir(vp))) != 0) { + if ((error = getattrlist_setupvattr(&al, &va, &fixedsize, &action, proc_is64, (vtype == VDIR))) != 0) { VFS_DEBUG(ctx, vp, "ATTRLIST - ERROR: setup for request failed"); goto out; } @@ -1000,6 +1266,19 @@ getattrlist(proc_t p, struct getattrlist_args *uap, __unused register_t *retval) goto out; } + /* + * If we're asking for the full path, allocate a buffer for that. + */ + if (al.commonattr & (ATTR_CMN_FULLPATH)) { + fullpathptr = (char*) kalloc(MAXPATHLEN); + if (fullpathptr == NULL) { + error = ENOMEM; + VFS_DEBUG(ctx,vp, "ATTRLIST - ERROR: cannot allocate fullpath buffer"); + goto out; + } + } + + if (va.va_active != 0) { /* * If we're going to ask for va_name, allocate a buffer to point it at @@ -1030,12 +1309,28 @@ getattrlist(proc_t p, struct getattrlist_args *uap, __unused register_t *retval) */ if ((al.commonattr & (ATTR_CMN_OBJID | ATTR_CMN_OBJPERMANENTID | ATTR_CMN_FILEID)) && !VATTR_IS_SUPPORTED(&va, va_linkid)) VATTR_CLEAR_ACTIVE(&va, va_linkid); /* forget we wanted this */ + /* - * Many (most?) filesystems don't know their parent object id. We can get it the - * hard way. + * Many filesystems don't know their parent object id. + * If necessary, attempt to derive it from the vnode. */ - if ((al.commonattr & (ATTR_CMN_PAROBJID | ATTR_CMN_PARENTID)) && !VATTR_IS_SUPPORTED(&va, va_parentid)) - VATTR_CLEAR_ACTIVE(&va, va_parentid); + if ((al.commonattr & (ATTR_CMN_PAROBJID | ATTR_CMN_PARENTID)) && + !VATTR_IS_SUPPORTED(&va, va_parentid)) { + vnode_t dvp; + + if ((dvp = vnode_getparent(vp)) != NULLVP) { + struct vnode_attr lva; + + VATTR_INIT(&lva); + VATTR_WANTED(&lva, va_fileid); + if (vnode_getattr(dvp, &lva, ctx) == 0 && + VATTR_IS_SUPPORTED(&va, va_fileid)) { + va.va_parentid = lva.va_fileid; + VATTR_SET_SUPPORTED(&va, va_parentid); + } + vnode_put(dvp); + } + } /* * And we can report datasize/alloc from total. */ @@ -1047,7 +1342,8 @@ getattrlist(proc_t p, struct getattrlist_args *uap, __unused register_t *retval) /* * If we don't have an encoding, go with UTF-8 */ - if ((al.commonattr & ATTR_CMN_SCRIPT) && !VATTR_IS_SUPPORTED(&va, va_encoding)) + if ((al.commonattr & ATTR_CMN_SCRIPT) && + !VATTR_IS_SUPPORTED(&va, va_encoding) && !return_valid) VATTR_RETURN(&va, va_encoding, 0x7e /* kTextEncodingMacUnicode */); /* @@ -1064,11 +1360,21 @@ getattrlist(proc_t p, struct getattrlist_args *uap, __unused register_t *retval) /* check again */ if (!VATTR_ALL_SUPPORTED(&va)) { - error = EINVAL; - VFS_DEBUG(ctx, vp, "ATTRLIST - ERROR: could not get all requested file attributes"); - VFS_DEBUG(ctx, vp, "ATTRLIST - have %016llx wanted %016llx missing %016llx", - va.va_supported, va.va_active, va.va_active & ~va.va_supported); - goto out; + if (return_valid) { + if (pack_invalid) { + /* Fix up valid mask for post processing */ + getattrlist_fixupattrs(&ab.valid, &va); + + /* Force packing of everything asked for */ + va.va_supported = va.va_active; + } else { + /* Adjust the requested attributes */ + getattrlist_fixupattrs((attribute_set_t *)&al.commonattr, &va); + } + } else { + error = EINVAL; + goto out; + } } } } @@ -1076,7 +1382,9 @@ getattrlist(proc_t p, struct getattrlist_args *uap, __unused register_t *retval) /* * Compute variable-space requirements. */ - varsize = 0; /* length count */ + varsize = 0; /* length count */ + + /* We may need to fix up the name attribute if requested */ if (al.commonattr & ATTR_CMN_NAME) { if (VATTR_IS_SUPPORTED(&va, va_name)) { va.va_name[MAXPATHLEN-1] = '\0'; /* Ensure nul-termination */ @@ -1113,6 +1421,26 @@ getattrlist(proc_t p, struct getattrlist_args *uap, __unused register_t *retval) varsize += roundup(cnl + 1, 4); } + /* + * Compute the full path to this vnode, if necessary. This attribute is almost certainly + * not supported by any filesystem, so build the path to this vnode at this time. + */ + if (al.commonattr & ATTR_CMN_FULLPATH) { + int len = MAXPATHLEN; + int err; + /* call build_path making sure NOT to use the cache-only behavior */ + err = build_path(vp, fullpathptr, len, &len, 0, vfs_context_current()); + if (err) { + error = err; + goto out; + } + fullpathlen = 0; + if (fullpathptr){ + fullpathlen = strlen(fullpathptr); + } + varsize += roundup(fullpathlen+1, 4); + } + /* * We have a kauth_acl_t but we will be returning a kauth_filesec_t. * @@ -1178,20 +1506,34 @@ getattrlist(proc_t p, struct getattrlist_args *uap, __unused register_t *retval) * Pack results into the destination buffer. */ ab.fixedcursor = ab.base + sizeof(uint32_t); + if (return_valid) { + ab.fixedcursor += sizeof (attribute_set_t); + bzero(&ab.actual, sizeof (ab.actual)); + } ab.varcursor = ab.base + fixedsize; ab.needed = ab.allocated; /* common attributes **************************************************/ - if (al.commonattr & ATTR_CMN_NAME) + if (al.commonattr & ATTR_CMN_NAME) { attrlist_pack_string(&ab, cnp, cnl); - if (al.commonattr & ATTR_CMN_DEVID) + ab.actual.commonattr |= ATTR_CMN_NAME; + } + if (al.commonattr & ATTR_CMN_DEVID) { ATTR_PACK4(ab, vp->v_mount->mnt_vfsstat.f_fsid.val[0]); - if (al.commonattr & ATTR_CMN_FSID) + ab.actual.commonattr |= ATTR_CMN_DEVID; + } + if (al.commonattr & ATTR_CMN_FSID) { ATTR_PACK8(ab, vp->v_mount->mnt_vfsstat.f_fsid); - if (al.commonattr & ATTR_CMN_OBJTYPE) - ATTR_PACK4(ab, vp->v_type); - if (al.commonattr & ATTR_CMN_OBJTAG) + ab.actual.commonattr |= ATTR_CMN_FSID; + } + if (al.commonattr & ATTR_CMN_OBJTYPE) { + ATTR_PACK4(ab, vtype); + ab.actual.commonattr |= ATTR_CMN_OBJTYPE; + } + if (al.commonattr & ATTR_CMN_OBJTAG) { ATTR_PACK4(ab, vp->v_tag); + ab.actual.commonattr |= ATTR_CMN_OBJTAG; + } if (al.commonattr & ATTR_CMN_OBJID) { fsobj_id_t f; /* @@ -1207,6 +1549,7 @@ getattrlist(proc_t p, struct getattrlist_args *uap, __unused register_t *retval) } f.fid_generation = 0; ATTR_PACK8(ab, f); + ab.actual.commonattr |= ATTR_CMN_OBJID; } if (al.commonattr & ATTR_CMN_OBJPERMANENTID) { fsobj_id_t f; @@ -1223,92 +1566,97 @@ getattrlist(proc_t p, struct getattrlist_args *uap, __unused register_t *retval) } f.fid_generation = 0; ATTR_PACK8(ab, f); + ab.actual.commonattr |= ATTR_CMN_OBJPERMANENTID; } if (al.commonattr & ATTR_CMN_PAROBJID) { fsobj_id_t f; - /* - * If the filesystem doesn't know the parent ID, we can - * try to get it via v->v_parent. Don't need to worry - * about links here, as we dont allow hardlinks to - * directories. - */ - if (VATTR_IS_SUPPORTED(&va, va_parentid)) { - f.fid_objno = va.va_parentid; - } else { - struct vnode_attr lva; - vnode_t pvp; - - pvp = vnode_getparent(vp); - - if (pvp == NULLVP) { - error = EINVAL; - goto out; - } - VATTR_INIT(&lva); - VATTR_WANTED(&lva, va_fileid); - error = vnode_getattr(pvp, &lva, ctx); - vnode_put(pvp); - if (error != 0) - goto out; - f.fid_objno = lva.va_fileid; - } + f.fid_objno = va.va_parentid; /* could be lossy here! */ f.fid_generation = 0; ATTR_PACK8(ab, f); + ab.actual.commonattr |= ATTR_CMN_PAROBJID; } - if (al.commonattr & ATTR_CMN_SCRIPT) - ATTR_PACK4(ab, va.va_encoding); - if (al.commonattr & ATTR_CMN_CRTIME) + if (al.commonattr & ATTR_CMN_SCRIPT) { + if (VATTR_IS_SUPPORTED(&va, va_encoding)) { + ATTR_PACK4(ab, va.va_encoding); + ab.actual.commonattr |= ATTR_CMN_SCRIPT; + } else if (!return_valid || pack_invalid) { + ATTR_PACK4(ab, 0x7e); + } + } + if (al.commonattr & ATTR_CMN_CRTIME) { ATTR_PACK_TIME(ab, va.va_create_time, proc_is64); - if (al.commonattr & ATTR_CMN_MODTIME) + ab.actual.commonattr |= ATTR_CMN_CRTIME; + } + if (al.commonattr & ATTR_CMN_MODTIME) { ATTR_PACK_TIME(ab, va.va_modify_time, proc_is64); - if (al.commonattr & ATTR_CMN_CHGTIME) + ab.actual.commonattr |= ATTR_CMN_MODTIME; + } + if (al.commonattr & ATTR_CMN_CHGTIME) { ATTR_PACK_TIME(ab, va.va_change_time, proc_is64); - if (al.commonattr & ATTR_CMN_ACCTIME) + ab.actual.commonattr |= ATTR_CMN_CHGTIME; + } + if (al.commonattr & ATTR_CMN_ACCTIME) { ATTR_PACK_TIME(ab, va.va_access_time, proc_is64); - if (al.commonattr & ATTR_CMN_BKUPTIME) + ab.actual.commonattr |= ATTR_CMN_ACCTIME; + } + if (al.commonattr & ATTR_CMN_BKUPTIME) { ATTR_PACK_TIME(ab, va.va_backup_time, proc_is64); + ab.actual.commonattr |= ATTR_CMN_BKUPTIME; + } if (al.commonattr & ATTR_CMN_FNDRINFO) { uio_t auio; - size_t fisize; + size_t fisize = 32; char uio_buf[UIO_SIZEOF(1)]; - fisize = imin(32, ab.allocated - (ab.fixedcursor - ab.base)); - if (fisize > 0) { - if ((auio = uio_createwithbuffer(1, 0, UIO_SYSSPACE, UIO_READ, uio_buf, sizeof(uio_buf))) == NULL) { - error = ENOMEM; - goto out; - } else { - uio_addiov(auio, CAST_USER_ADDR_T(ab.fixedcursor), fisize); - error = vn_getxattr(vp, XATTR_FINDERINFO_NAME, auio, &fisize, XATTR_NOSECURITY, ctx); - uio_free(auio); - } - if (error != 0) { - if ((error == ENOATTR) || (error == ENOENT) || (error == ENOTSUP) || (error == EPERM)) { - VFS_DEBUG(ctx, vp, "ATTRLIST - No system.finderinfo attribute, returning zeroes"); - bzero(ab.fixedcursor, 32); - error = 0; - } else { - VFS_DEBUG(ctx, vp, "ATTRLIST - ERROR: reading system.finderinfo attribute"); - goto out; - } - } - } else { - VFS_DEBUG(ctx, vp, "ATTRLIST - no room in caller buffer for FINDERINFO"); + if ((auio = uio_createwithbuffer(1, 0, UIO_SYSSPACE, UIO_READ, + uio_buf, sizeof(uio_buf))) == NULL) { + error = ENOMEM; + goto out; + } + uio_addiov(auio, CAST_USER_ADDR_T(ab.fixedcursor), fisize); + error = vn_getxattr(vp, XATTR_FINDERINFO_NAME, auio, + &fisize, XATTR_NOSECURITY, ctx); + uio_free(auio); + /* + * Default to zeros if its not available, + * unless ATTR_CMN_RETURNED_ATTRS was requested. + */ + if (error && + (!return_valid || pack_invalid) && + ((error == ENOATTR) || (error == ENOENT) || + (error == ENOTSUP) || (error == EPERM))) { + VFS_DEBUG(ctx, vp, "ATTRLIST - No system.finderinfo attribute, returning zeroes"); + bzero(ab.fixedcursor, 32); + error = 0; + } + if (error == 0) { + ab.fixedcursor += 32; + ab.actual.commonattr |= ATTR_CMN_FNDRINFO; + } else if (!return_valid) { + VFS_DEBUG(ctx, vp, "ATTRLIST - ERROR: reading system.finderinfo attribute"); + goto out; } - ab.fixedcursor += 32; } - if (al.commonattr & ATTR_CMN_OWNERID) + if (al.commonattr & ATTR_CMN_OWNERID) { ATTR_PACK4(ab, va.va_uid); - if (al.commonattr & ATTR_CMN_GRPID) + ab.actual.commonattr |= ATTR_CMN_OWNERID; + } + if (al.commonattr & ATTR_CMN_GRPID) { ATTR_PACK4(ab, va.va_gid); - if (al.commonattr & ATTR_CMN_ACCESSMASK) + ab.actual.commonattr |= ATTR_CMN_GRPID; + } + if (al.commonattr & ATTR_CMN_ACCESSMASK) { ATTR_PACK4(ab, va.va_mode); - if (al.commonattr & ATTR_CMN_FLAGS) + ab.actual.commonattr |= ATTR_CMN_ACCESSMASK; + } + if (al.commonattr & ATTR_CMN_FLAGS) { ATTR_PACK4(ab, va.va_flags); + ab.actual.commonattr |= ATTR_CMN_FLAGS; + } if (al.commonattr & ATTR_CMN_USERACCESS) { /* this is expensive */ uint32_t perms = 0; - if (vnode_isdir(vp)) { + if (vtype == VDIR) { if (vnode_authorize(vp, NULL, KAUTH_VNODE_ACCESS | KAUTH_VNODE_ADD_FILE | KAUTH_VNODE_ADD_SUBDIRECTORY | KAUTH_VNODE_DELETE_CHILD, ctx) == 0) perms |= W_OK; @@ -1343,6 +1691,7 @@ getattrlist(proc_t p, struct getattrlist_args *uap, __unused register_t *retval) #endif /* MAC */ VFS_DEBUG(ctx, vp, "ATTRLIST - granting perms %d", perms); ATTR_PACK4(ab, perms); + ab.actual.commonattr |= ATTR_CMN_USERACCESS; } if (al.commonattr & ATTR_CMN_EXTENDED_SECURITY) { if (VATTR_IS_SUPPORTED(&va, va_acl) && (va.va_acl != NULL)) { @@ -1353,86 +1702,96 @@ getattrlist(proc_t p, struct getattrlist_args *uap, __unused register_t *retval) fsec.fsec_magic = KAUTH_FILESEC_MAGIC; fsec.fsec_owner = kauth_null_guid; fsec.fsec_group = kauth_null_guid; - attrlist_pack_variable2(&ab, &fsec, ((char *)&fsec.fsec_acl - (char *)&fsec), va.va_acl, KAUTH_ACL_COPYSIZE(va.va_acl)); - } else { + attrlist_pack_variable2(&ab, &fsec, __offsetof(struct kauth_filesec, fsec_acl), va.va_acl, KAUTH_ACL_COPYSIZE(va.va_acl)); + ab.actual.commonattr |= ATTR_CMN_EXTENDED_SECURITY; + } else if (!return_valid || pack_invalid) { attrlist_pack_variable(&ab, NULL, 0); } } if (al.commonattr & ATTR_CMN_UUID) { - if (!VATTR_IS_SUPPORTED(&va, va_uuuid)) { - ATTR_PACK(&ab, kauth_null_guid); - } else { - ATTR_PACK(&ab, va.va_uuuid); - } - } - if (al.commonattr & ATTR_CMN_GRPUUID) { - if (!VATTR_IS_SUPPORTED(&va, va_guuid)) { - ATTR_PACK(&ab, kauth_null_guid); - } else { - ATTR_PACK(&ab, va.va_guuid); - } - } + if (VATTR_IS_SUPPORTED(&va, va_uuuid)) { + ATTR_PACK(&ab, va.va_uuuid); + ab.actual.commonattr |= ATTR_CMN_UUID; + } else if (!return_valid || pack_invalid) { + ATTR_PACK(&ab, kauth_null_guid); + } + } + if (al.commonattr & ATTR_CMN_GRPUUID) { + if (VATTR_IS_SUPPORTED(&va, va_guuid)) { + ATTR_PACK(&ab, va.va_guuid); + ab.actual.commonattr |= ATTR_CMN_GRPUUID; + } else if (!return_valid || pack_invalid) { + ATTR_PACK(&ab, kauth_null_guid); + } + } if (al.commonattr & ATTR_CMN_FILEID) { ATTR_PACK8(ab, va.va_fileid); + ab.actual.commonattr |= ATTR_CMN_FILEID; } if (al.commonattr & ATTR_CMN_PARENTID) { - uint64_t fileid; - /* - * If the filesystem doesn't know the parent ID, we can - * try to get it via v->v_parent. - */ - if (VATTR_IS_SUPPORTED(&va, va_parentid)) { - fileid = va.va_parentid; - } else { - struct vnode_attr lva; - vnode_t pvp; - - pvp = vnode_getparent(vp); - - if (pvp == NULLVP) { - error = EINVAL; - goto out; - } - VATTR_INIT(&lva); - VATTR_WANTED(&lva, va_fileid); - error = vnode_getattr(pvp, &lva, ctx); - vnode_put(pvp); - - if (error != 0) - goto out; - fileid = lva.va_fileid; - } - ATTR_PACK8(ab, fileid); + ATTR_PACK8(ab, va.va_parentid); + ab.actual.commonattr |= ATTR_CMN_PARENTID; + } + + if (al.commonattr & ATTR_CMN_FULLPATH) { + attrlist_pack_string (&ab, fullpathptr, fullpathlen); + ab.actual.commonattr |= ATTR_CMN_FULLPATH; } - /* directory attributes **************************************************/ - if (vnode_isdir(vp)) { - if (al.dirattr & ATTR_DIR_LINKCOUNT) /* full count of entries */ + /* directory attributes *********************************************/ + if (al.dirattr && (vtype == VDIR)) { + if (al.dirattr & ATTR_DIR_LINKCOUNT) { /* full count of entries */ ATTR_PACK4(ab, (uint32_t)va.va_dirlinkcount); - if (al.dirattr & ATTR_DIR_ENTRYCOUNT) + ab.actual.dirattr |= ATTR_DIR_LINKCOUNT; + } + if (al.dirattr & ATTR_DIR_ENTRYCOUNT) { ATTR_PACK4(ab, (uint32_t)va.va_nchildren); - if (al.dirattr & ATTR_DIR_MOUNTSTATUS) - ATTR_PACK_CAST(&ab, uint32_t, (vp->v_flag & VROOT) ? DIR_MNTSTATUS_MNTPOINT : 0); + ab.actual.dirattr |= ATTR_DIR_ENTRYCOUNT; + } + if (al.dirattr & ATTR_DIR_MOUNTSTATUS) { + ATTR_PACK_CAST(&ab, uint32_t, (vp->v_flag & VROOT) ? + DIR_MNTSTATUS_MNTPOINT : 0); + ab.actual.dirattr |= ATTR_DIR_MOUNTSTATUS; + } } /* file attributes **************************************************/ - if (!vnode_isdir(vp)) { - if (al.fileattr & ATTR_FILE_LINKCOUNT) + if (al.fileattr && (vtype != VDIR)) { + if (al.fileattr & ATTR_FILE_LINKCOUNT) { ATTR_PACK4(ab, (uint32_t)va.va_nlink); - if (al.fileattr & ATTR_FILE_TOTALSIZE) + ab.actual.fileattr |= ATTR_FILE_LINKCOUNT; + } + if (al.fileattr & ATTR_FILE_TOTALSIZE) { ATTR_PACK8(ab, va.va_total_size); - if (al.fileattr & ATTR_FILE_ALLOCSIZE) + ab.actual.fileattr |= ATTR_FILE_TOTALSIZE; + } + if (al.fileattr & ATTR_FILE_ALLOCSIZE) { ATTR_PACK8(ab, va.va_total_alloc); - if (al.fileattr & ATTR_FILE_IOBLOCKSIZE) + ab.actual.fileattr |= ATTR_FILE_ALLOCSIZE; + } + if (al.fileattr & ATTR_FILE_IOBLOCKSIZE) { ATTR_PACK4(ab, va.va_iosize); - if (al.fileattr & ATTR_FILE_CLUMPSIZE) - ATTR_PACK4(ab, 0); /* XXX value is deprecated */ + ab.actual.fileattr |= ATTR_FILE_IOBLOCKSIZE; + } + if (al.fileattr & ATTR_FILE_CLUMPSIZE) { + if (!return_valid || pack_invalid) { + ATTR_PACK4(ab, 0); /* this value is deprecated */ + ab.actual.fileattr |= ATTR_FILE_CLUMPSIZE; + } + } if (al.fileattr & ATTR_FILE_DEVTYPE) { + uint32_t dev; + if ((vp->v_type == VCHR) || (vp->v_type == VBLK)) { - ATTR_PACK(&ab, vp->v_specinfo->si_rdev); + if (vp->v_specinfo != NULL) + dev = vp->v_specinfo->si_rdev; + else + dev = va.va_rdev; } else { - ATTR_PACK_CAST(&ab, uint32_t, 0); + dev = 0; } + ATTR_PACK4(ab, dev); + ab.actual.fileattr |= ATTR_FILE_DEVTYPE; } if (al.fileattr & ATTR_FILE_DATALENGTH) { if (VATTR_IS_SUPPORTED(&va, va_data_size)) { @@ -1440,6 +1799,7 @@ getattrlist(proc_t p, struct getattrlist_args *uap, __unused register_t *retval) } else { ATTR_PACK8(ab, va.va_total_size); } + ab.actual.fileattr |= ATTR_FILE_DATALENGTH; } if (al.fileattr & ATTR_FILE_DATAALLOCSIZE) { if (VATTR_IS_SUPPORTED(&va, va_data_alloc)) { @@ -1447,6 +1807,7 @@ getattrlist(proc_t p, struct getattrlist_args *uap, __unused register_t *retval) } else { ATTR_PACK8(ab, va.va_total_alloc); } + ab.actual.fileattr |= ATTR_FILE_DATAALLOCSIZE; } /* fetch resource fork size/allocation via xattr interface */ if (al.fileattr & (ATTR_FILE_RSRCLENGTH | ATTR_FILE_RSRCALLOCSIZE)) { @@ -1464,6 +1825,7 @@ getattrlist(proc_t p, struct getattrlist_args *uap, __unused register_t *retval) if (al.fileattr & ATTR_FILE_RSRCLENGTH) { rlength = rsize; ATTR_PACK8(ab, rlength); + ab.actual.fileattr |= ATTR_FILE_RSRCLENGTH; } if (al.fileattr & ATTR_FILE_RSRCALLOCSIZE) { uint32_t blksize = vp->v_mount->mnt_vfsstat.f_bsize; @@ -1471,16 +1833,17 @@ getattrlist(proc_t p, struct getattrlist_args *uap, __unused register_t *retval) blksize = 512; rlength = roundup(rsize, blksize); ATTR_PACK8(ab, rlength); + ab.actual.fileattr |= ATTR_FILE_RSRCALLOCSIZE; } } } /* diagnostic */ - if ((ab.fixedcursor - ab.base) != fixedsize) - panic("packed field size mismatch; allocated %ld but packed %d for common %08x vol %08x", - fixedsize, ab.fixedcursor - ab.base, al.commonattr, al.volattr); - if (ab.varcursor != (ab.base + ab.needed)) - panic("packed variable field size mismatch; used %d but expected %ld", ab.varcursor - ab.base, ab.needed); + if (!return_valid && (ab.fixedcursor - ab.base) != fixedsize) + panic("packed field size mismatch; allocated %ld but packed %ld for common %08x vol %08x", + fixedsize, (long) (ab.fixedcursor - ab.base), al.commonattr, al.volattr); + if (!return_valid && ab.varcursor != (ab.base + ab.needed)) + panic("packed variable field size mismatch; used %ld but expected %ld", (long) (ab.varcursor - ab.base), ab.needed); /* * In the compatible case, we report the smaller of the required and returned sizes. @@ -1489,6 +1852,18 @@ getattrlist(proc_t p, struct getattrlist_args *uap, __unused register_t *retval) * they gave us, so they can always check for truncation themselves. */ *(uint32_t *)ab.base = (uap->options & FSOPT_REPORT_FULLSIZE) ? ab.needed : imin(ab.allocated, ab.needed); + + /* Return attribute set output if requested. */ + if (return_valid) { + ab.actual.commonattr |= ATTR_CMN_RETURNED_ATTRS; + if (pack_invalid) { + /* Only report the attributes that are valid */ + ab.actual.commonattr &= ab.valid.commonattr; + ab.actual.dirattr &= ab.valid.dirattr; + ab.actual.fileattr &= ab.valid.fileattr; + } + bcopy(&ab.actual, ab.base + sizeof(uint32_t), sizeof (ab.actual)); + } /* Only actually copyout as much out as the user buffer can hold */ error = copyout(ab.base, uap->attributeBuffer, imin(uap->bufferSize, ab.allocated)); @@ -1496,10 +1871,10 @@ getattrlist(proc_t p, struct getattrlist_args *uap, __unused register_t *retval) out: if (va.va_name) kfree(va.va_name, MAXPATHLEN); + if (fullpathptr) + kfree(fullpathptr, MAXPATHLEN); if (vname) - vnode_putname(vname); - if (vp) - vnode_put(vp); + vnode_putname(vname); if (ab.base != NULL) FREE(ab.base, M_TEMP); if (VATTR_IS_SUPPORTED(&va, va_acl) && (va.va_acl != NULL)) @@ -1509,6 +1884,72 @@ getattrlist(proc_t p, struct getattrlist_args *uap, __unused register_t *retval) return(error); } +int +fgetattrlist(proc_t p, struct fgetattrlist_args *uap, __unused int32_t *retval) +{ + struct vfs_context *ctx; + vnode_t vp = NULL; + int error; + struct getattrlist_args ap; + + ctx = vfs_context_current(); + error = 0; + + if ((error = file_vnode(uap->fd, &vp)) != 0) + return (error); + + if ((error = vnode_getwithref(vp)) != 0) { + file_drop(uap->fd); + return(error); + } + + ap.path = 0; + ap.alist = uap->alist; + ap.attributeBuffer = uap->attributeBuffer; + ap.bufferSize = uap->bufferSize; + ap.options = uap->options; + + error = getattrlist_internal(vp, &ap, p, ctx); + + file_drop(uap->fd); + if (vp) + vnode_put(vp); + + return error; +} + +int +getattrlist(proc_t p, struct getattrlist_args *uap, __unused int32_t *retval) +{ + struct vfs_context *ctx; + struct nameidata nd; + vnode_t vp = NULL; + u_long nameiflags; + int error; + + ctx = vfs_context_current(); + error = 0; + + /* + * Look up the file. + */ + nameiflags = NOTRIGGER | AUDITVNPATH1; + if (!(uap->options & FSOPT_NOFOLLOW)) + nameiflags |= FOLLOW; + NDINIT(&nd, LOOKUP, nameiflags, UIO_USERSPACE, uap->path, ctx); + + if ((error = namei(&nd)) != 0) + goto out; + vp = nd.ni_vp; + nameidone(&nd); + + error = getattrlist_internal(vp, uap, p, ctx); +out: + if (vp) + vnode_put(vp); + return error; +} + static int attrlist_unpack_fixed(char **cursor, char *end, void *buf, ssize_t size) { @@ -1526,12 +1967,15 @@ attrlist_unpack_fixed(char **cursor, char *end, void *buf, ssize_t size) #define ATTR_UNPACK_TIME(v, is64) \ do { \ if (is64) { \ - struct user_timespec us; \ + struct user64_timespec us; \ ATTR_UNPACK(us); \ v.tv_sec = us.tv_sec; \ v.tv_nsec = us.tv_nsec; \ } else { \ - ATTR_UNPACK(v); \ + struct user32_timespec us; \ + ATTR_UNPACK(us); \ + v.tv_sec = us.tv_sec; \ + v.tv_nsec = us.tv_nsec; \ } \ } while(0) @@ -1539,26 +1983,18 @@ attrlist_unpack_fixed(char **cursor, char *end, void *buf, ssize_t size) /* * Write attributes. */ -int -setattrlist(proc_t p, struct setattrlist_args *uap, __unused register_t *retval) +static int +setattrlist_internal(vnode_t vp, struct setattrlist_args *uap, proc_t p, vfs_context_t ctx) { struct attrlist al; - struct vfs_context context, *ctx; struct vnode_attr va; struct attrreference ar; - struct nameidata nd; - vnode_t vp; - u_long nameiflags; kauth_action_t action; char *user_buf, *cursor, *bufend, *fndrinfo, *cp, *volname; int proc_is64, error; uint32_t nace; kauth_filesec_t rfsec; - context.vc_thread = current_thread(); - context.vc_ucred = kauth_cred_get(); - ctx = &context; - vp = NULL; user_buf = NULL; fndrinfo = NULL; volname = NULL; @@ -1566,19 +2002,6 @@ setattrlist(proc_t p, struct setattrlist_args *uap, __unused register_t *retval) proc_is64 = proc_is64bit(p); VATTR_INIT(&va); - - /* - * Look up the file. - */ - nameiflags = 0; - if ((uap->options & FSOPT_NOFOLLOW) == 0) - nameiflags |= FOLLOW; - NDINIT(&nd, LOOKUP, nameiflags | AUDITVNPATH1, UIO_USERSPACE, uap->path, &context); - if ((error = namei(&nd)) != 0) - goto out; - vp = nd.ni_vp; - nameidone(&nd); - /* * Fetch the attribute set and validate. */ @@ -1640,7 +2063,7 @@ setattrlist(proc_t p, struct setattrlist_args *uap, __unused register_t *retval) VFS_DEBUG(ctx, vp, "ATTRLIST - copied in %d bytes of user attributes to %p", uap->bufferSize, user_buf); #if CONFIG_MACF - error = mac_vnode_check_setattrlist(&context, vp, &al); + error = mac_vnode_check_setattrlist(ctx, vp, &al); if (error) goto out; #endif /* MAC */ @@ -1711,7 +2134,7 @@ setattrlist(proc_t p, struct setattrlist_args *uap, __unused register_t *retval) ATTR_UNPACK(ar); cp += ar.attr_dataoffset; rfsec = (kauth_filesec_t)cp; - if (((char *)(rfsec + 1) > bufend) || /* no space for acl */ + if (((((char *)rfsec) + KAUTH_FILESEC_SIZE(0)) > bufend) || /* no space for acl */ (rfsec->fsec_magic != KAUTH_FILESEC_MAGIC) || /* bad magic */ (KAUTH_FILESEC_COPYSIZE(rfsec) != ar.attr_length) || /* size does not match */ ((cp + KAUTH_FILESEC_COPYSIZE(rfsec)) > bufend)) { /* ACEs overrun buffer */ @@ -1778,7 +2201,7 @@ setattrlist(proc_t p, struct setattrlist_args *uap, __unused register_t *retval) * Validate and authorize. */ action = 0; - if ((va.va_active != 0LL) && ((error = vnode_authattr(vp, &va, &action, &context)) != 0)) { + if ((va.va_active != 0LL) && ((error = vnode_authattr(vp, &va, &action, ctx)) != 0)) { VFS_DEBUG(ctx, vp, "ATTRLIST - ERROR: attribute changes refused: %d", error); goto out; } @@ -1793,11 +2216,11 @@ setattrlist(proc_t p, struct setattrlist_args *uap, __unused register_t *retval) goto out; } } else { - action |= KAUTH_VNODE_WRITE_ATTRIBUTES; + action |= KAUTH_VNODE_WRITE_EXTATTRIBUTES; } } - if ((action != 0) && ((error = vnode_authorize(vp, NULL, action, &context)) != 0)) { + if ((action != 0) && ((error = vnode_authorize(vp, NULL, action, ctx)) != 0)) { VFS_DEBUG(ctx, vp, "ATTRLIST - ERROR: authorization failed"); goto out; } @@ -1823,7 +2246,7 @@ setattrlist(proc_t p, struct setattrlist_args *uap, __unused register_t *retval) /* * Write the attributes if we have any. */ - if ((va.va_active != 0LL) && ((error = vnode_setattr(vp, &va, &context)) != 0)) { + if ((va.va_active != 0LL) && ((error = vnode_setattr(vp, &va, ctx)) != 0)) { VFS_DEBUG(ctx, vp, "ATTRLIST - ERROR: filesystem returned %d", error); goto out; } @@ -1834,7 +2257,7 @@ setattrlist(proc_t p, struct setattrlist_args *uap, __unused register_t *retval) if (fndrinfo != NULL) { if (al.volattr & ATTR_VOL_INFO) { if (vp->v_tag == VT_HFS) { - error = VNOP_IOCTL(vp, HFS_SET_BOOT_INFO, (caddr_t)fndrinfo, 0, &context); + error = VNOP_IOCTL(vp, HFS_SET_BOOT_INFO, (caddr_t)fndrinfo, 0, ctx); if (error != 0) goto out; } else { @@ -1878,10 +2301,71 @@ setattrlist(proc_t p, struct setattrlist_args *uap, __unused register_t *retval) /* all done and successful */ out: - if (vp != NULL) - vnode_put(vp); if (user_buf != NULL) FREE(user_buf, M_TEMP); VFS_DEBUG(ctx, vp, "ATTRLIST - set returning %d", error); return(error); } + +int +setattrlist(proc_t p, struct setattrlist_args *uap, __unused int32_t *retval) +{ + struct vfs_context *ctx; + struct nameidata nd; + vnode_t vp = NULL; + u_long nameiflags; + int error = 0; + + ctx = vfs_context_current(); + + /* + * Look up the file. + */ + nameiflags = 0; + if ((uap->options & FSOPT_NOFOLLOW) == 0) + nameiflags |= FOLLOW; + NDINIT(&nd, LOOKUP, nameiflags | AUDITVNPATH1, UIO_USERSPACE, uap->path, ctx); + if ((error = namei(&nd)) != 0) + goto out; + vp = nd.ni_vp; + nameidone(&nd); + + error = setattrlist_internal(vp, uap, p, ctx); +out: + if (vp != NULL) + vnode_put(vp); + return error; +} + +int +fsetattrlist(proc_t p, struct fsetattrlist_args *uap, __unused int32_t *retval) +{ + struct vfs_context *ctx; + vnode_t vp = NULL; + int error; + struct setattrlist_args ap; + + ctx = vfs_context_current(); + + if ((error = file_vnode(uap->fd, &vp)) != 0) + return (error); + + if ((error = vnode_getwithref(vp)) != 0) { + file_drop(uap->fd); + return(error); + } + + ap.path = 0; + ap.alist = uap->alist; + ap.attributeBuffer = uap->attributeBuffer; + ap.bufferSize = uap->bufferSize; + ap.options = uap->options; + + error = setattrlist_internal(vp, &ap, p, ctx); + file_drop(uap->fd); + if (vp != NULL) + vnode_put(vp); + + return error; +} + diff --git a/bsd/vfs/vfs_bio.c b/bsd/vfs/vfs_bio.c index 026109d21..3f4c4e593 100644 --- a/bsd/vfs/vfs_bio.c +++ b/bsd/vfs/vfs_bio.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2007 Apple Inc. All rights reserved. + * Copyright (c) 2000-2008 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -100,10 +100,12 @@ #include /* thread_block() */ #include +#include #include #include +#include #include #include @@ -113,7 +115,7 @@ static __inline__ void bufqinc(int q); static __inline__ void bufqdec(int q); #endif -static int bcleanbuf(buf_t bp); +static int bcleanbuf(buf_t bp, boolean_t discard); static int brecover_data(buf_t bp); static boolean_t incore(vnode_t vp, daddr64_t blkno); /* timeout is in msecs */ @@ -123,6 +125,7 @@ static void buf_reassign(buf_t bp, vnode_t newvp); static errno_t buf_acquire_locked(buf_t bp, int flags, int slpflag, int slptimeo); static int buf_iterprepare(vnode_t vp, struct buflists *, int flags); static void buf_itercomplete(vnode_t vp, struct buflists *, int flags); +static boolean_t buffer_cache_gc(void); __private_extern__ int bdwrite_internal(buf_t, int); @@ -165,6 +168,8 @@ static lck_grp_attr_t *buf_mtx_grp_attr; static lck_mtx_t *iobuffer_mtxp; static lck_mtx_t *buf_mtxp; +static int buf_busycount; + static __inline__ int buf_timestamp(void) { @@ -304,7 +309,7 @@ buf_markdelayed(buf_t bp) { if (!ISSET(bp->b_flags, B_DELWRI)) { SET(bp->b_flags, B_DELWRI); - OSAddAtomic(1, &nbdwrite); + OSAddAtomicLong(1, &nbdwrite); buf_reassign(bp, bp->b_vp); } SET(bp->b_flags, B_DONE); @@ -627,13 +632,13 @@ buf_setfsprivate(buf_t bp, void *fsprivate) { bp->b_fsprivate = fsprivate; } -ucred_t +kauth_cred_t buf_rcred(buf_t bp) { return (bp->b_rcred); } -ucred_t +kauth_cred_t buf_wcred(buf_t bp) { return (bp->b_wcred); @@ -662,7 +667,7 @@ errno_t buf_map(buf_t bp, caddr_t *io_addr) { buf_t real_bp; - vm_offset_t vaddr; + vm_offset_t vaddr; kern_return_t kret; if ( !(bp->b_flags & B_CLUSTER)) { @@ -901,8 +906,11 @@ buf_strategy(vnode_t devvp, void *ap) return (error); } - if (bp->b_blkno == -1) + if ((bp->b_blkno == -1) || (contig_bytes == 0)) { + /* Set block number to force biodone later */ + bp->b_blkno = -1; buf_clear(bp); + } else if ((long)contig_bytes < bp->b_bcount) return (buf_strategy_fragmented(devvp, bp, f_offset, contig_bytes)); } @@ -1042,6 +1050,10 @@ buf_invalidateblks(vnode_t vp, int flags, int slpflag, int slptimeo) int must_rescan = 1; struct buflists local_iterblkhd; + + if (LIST_EMPTY(&vp->v_cleanblkhd) && LIST_EMPTY(&vp->v_dirtyblkhd)) + return (0); + lck_mtx_lock(buf_mtxp); for (;;) { @@ -1454,6 +1466,8 @@ bufinit(void) TAILQ_INIT(dp); bufhashtbl = hashinit(nbuf_hashelements, M_CACHE, &bufhash); + buf_busycount = 0; + /* Initialize the buffer headers */ for (i = 0; i < max_nbuf_headers; i++) { nbuf_headers++; @@ -1513,6 +1527,11 @@ bufinit(void) /* start the bcleanbuf() thread */ bcleanbuf_thread_init(); + /* Register a callout for relieving vm pressure */ + if (vm_set_buffer_cleanup_callout(buffer_cache_gc) != KERN_SUCCESS) { + panic("Couldn't register buffer cache callout for vm pressure!\n"); + } + #if BALANCE_QUEUES { static void bufq_balance_thread_init(void) __attribute__((section("__TEXT, initcode"))); @@ -1584,7 +1603,7 @@ getbufzone(size_t size) static struct buf * -bio_doread(vnode_t vp, daddr64_t blkno, int size, ucred_t cred, int async, int queuetype) +bio_doread(vnode_t vp, daddr64_t blkno, int size, kauth_cred_t cred, int async, int queuetype) { buf_t bp; @@ -1613,7 +1632,7 @@ bio_doread(vnode_t vp, daddr64_t blkno, int size, ucred_t cred, int async, int q /* Pay for the read. */ if (p && p->p_stats) - OSIncrementAtomic(&p->p_stats->p_ru.ru_inblock); /* XXX */ + OSIncrementAtomicLong(&p->p_stats->p_ru.ru_inblock); /* XXX */ if (async) { /* @@ -1640,7 +1659,7 @@ bio_doread(vnode_t vp, daddr64_t blkno, int size, ucred_t cred, int async, int q */ static errno_t do_breadn_for_type(vnode_t vp, daddr64_t blkno, int size, daddr64_t *rablks, int *rasizes, - int nrablks, ucred_t cred, buf_t *bpp, int queuetype) + int nrablks, kauth_cred_t cred, buf_t *bpp, int queuetype) { buf_t bp; int i; @@ -1669,7 +1688,7 @@ do_breadn_for_type(vnode_t vp, daddr64_t blkno, int size, daddr64_t *rablks, int * This algorithm described in Bach (p.54). */ errno_t -buf_bread(vnode_t vp, daddr64_t blkno, int size, ucred_t cred, buf_t *bpp) +buf_bread(vnode_t vp, daddr64_t blkno, int size, kauth_cred_t cred, buf_t *bpp) { buf_t bp; @@ -1685,7 +1704,7 @@ buf_bread(vnode_t vp, daddr64_t blkno, int size, ucred_t cred, buf_t *bpp) * This algorithm described in Bach (p.54). */ errno_t -buf_meta_bread(vnode_t vp, daddr64_t blkno, int size, ucred_t cred, buf_t *bpp) +buf_meta_bread(vnode_t vp, daddr64_t blkno, int size, kauth_cred_t cred, buf_t *bpp) { buf_t bp; @@ -1700,7 +1719,7 @@ buf_meta_bread(vnode_t vp, daddr64_t blkno, int size, ucred_t cred, buf_t *bpp) * Read-ahead multiple disk blocks. The first is sync, the rest async. */ errno_t -buf_breadn(vnode_t vp, daddr64_t blkno, int size, daddr64_t *rablks, int *rasizes, int nrablks, ucred_t cred, buf_t *bpp) +buf_breadn(vnode_t vp, daddr64_t blkno, int size, daddr64_t *rablks, int *rasizes, int nrablks, kauth_cred_t cred, buf_t *bpp) { return (do_breadn_for_type(vp, blkno, size, rablks, rasizes, nrablks, cred, bpp, BLK_READ)); } @@ -1710,7 +1729,7 @@ buf_breadn(vnode_t vp, daddr64_t blkno, int size, daddr64_t *rablks, int *rasize * [buf_breadn() for meta-data] */ errno_t -buf_meta_breadn(vnode_t vp, daddr64_t blkno, int size, daddr64_t *rablks, int *rasizes, int nrablks, ucred_t cred, buf_t *bpp) +buf_meta_breadn(vnode_t vp, daddr64_t blkno, int size, daddr64_t *rablks, int *rasizes, int nrablks, kauth_cred_t cred, buf_t *bpp) { return (do_breadn_for_type(vp, blkno, size, rablks, rasizes, nrablks, cred, bpp, BLK_META)); } @@ -1736,7 +1755,7 @@ buf_bwrite(buf_t bp) CLR(bp->b_flags, (B_READ | B_DONE | B_ERROR | B_DELWRI)); if (wasdelayed) - OSAddAtomic(-1, &nbdwrite); + OSAddAtomicLong(-1, &nbdwrite); if (!sync) { /* @@ -1749,7 +1768,7 @@ buf_bwrite(buf_t bp) buf_reassign(bp, vp); else if (p && p->p_stats) - OSIncrementAtomic(&p->p_stats->p_ru.ru_oublock); /* XXX */ + OSIncrementAtomicLong(&p->p_stats->p_ru.ru_oublock); /* XXX */ } trace(TR_BUFWRITE, pack(vp, bp->b_bcount), bp->b_lblkno); @@ -1774,7 +1793,7 @@ buf_bwrite(buf_t bp) buf_reassign(bp, vp); else if (p && p->p_stats) - OSIncrementAtomic(&p->p_stats->p_ru.ru_oublock); /* XXX */ + OSIncrementAtomicLong(&p->p_stats->p_ru.ru_oublock); /* XXX */ /* Release the buffer. */ // XXXdbg - only if the unused bit is set @@ -1809,7 +1828,7 @@ vn_bwrite(struct vnop_bwrite_args *ap) * * Described in Leffler, et al. (pp. 208-213). * - * Note: With the abilitty to allocate additional buffer + * Note: With the ability to allocate additional buffer * headers, we can get in to the situation where "too" many * buf_bdwrite()s can create situation where the kernel can create * buffers faster than the disks can service. Doing a buf_bawrite() in @@ -1830,17 +1849,11 @@ bdwrite_internal(buf_t bp, int return_error) if (!ISSET(bp->b_flags, B_DELWRI)) { SET(bp->b_flags, B_DELWRI); if (p && p->p_stats) - OSIncrementAtomic(&p->p_stats->p_ru.ru_oublock); /* XXX */ - OSAddAtomic(1, &nbdwrite); + OSIncrementAtomicLong(&p->p_stats->p_ru.ru_oublock); /* XXX */ + OSAddAtomicLong(1, &nbdwrite); buf_reassign(bp, vp); } - /* If this is a tape block, write it the block now. */ - if (ISSET(bp->b_flags, B_TAPE)) { - VNOP_BWRITE(bp); - return (0); - } - /* * if we're not LOCKED, but the total number of delayed writes * has climbed above 75% of the total buffers in the system @@ -1939,12 +1952,7 @@ buf_brelse(buf_t bp) panic("buf_brelse: bad buffer = %p\n", bp); #ifdef JOE_DEBUG - bp->b_stackbrelse[0] = (int)__builtin_return_address(0); - bp->b_stackbrelse[1] = (int)__builtin_return_address(1); - bp->b_stackbrelse[2] = (int)__builtin_return_address(2); - bp->b_stackbrelse[3] = (int)__builtin_return_address(3); - bp->b_stackbrelse[4] = (int)__builtin_return_address(4); - bp->b_stackbrelse[5] = (int)__builtin_return_address(5); + (void) OSBacktrace(&bp->b_stackbrelse[0], 6); bp->b_lastbrelse = current_thread(); bp->b_tag = 0; @@ -1955,7 +1963,7 @@ buf_brelse(buf_t bp) } KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 388)) | DBG_FUNC_START, - bp->b_lblkno * PAGE_SIZE, (int)bp, (int)bp->b_datap, + bp->b_lblkno * PAGE_SIZE, bp, bp->b_datap, bp->b_flags, 0); trace(TR_BRELSE, pack(bp->b_vp, bp->b_bufsize), bp->b_lblkno); @@ -2002,8 +2010,8 @@ buf_brelse(buf_t bp) if (kret != KERN_SUCCESS) panic("brelse: Failed to create UPL"); -#ifdef UPL_DEBUG - upl_ubc_alias_set(upl, bp, 5); +#if UPL_DEBUG + upl_ubc_alias_set(upl, (uintptr_t) bp, (uintptr_t) 5); #endif /* UPL_DEBUG */ } } else { @@ -2050,14 +2058,16 @@ buf_brelse(buf_t bp) if (ISSET(bp->b_flags, (B_NOCACHE|B_ERROR))) SET(bp->b_flags, B_INVAL); - if ((bp->b_bufsize <= 0) || ISSET(bp->b_flags, B_INVAL)) { + if ((bp->b_bufsize <= 0) || + ISSET(bp->b_flags, B_INVAL) || + (ISSET(bp->b_lflags, BL_WANTDEALLOC) && !ISSET(bp->b_flags, B_DELWRI))) { /* * If it's invalid or empty, dissociate it from its vnode, * release its storage if B_META, and * clean it up a bit and put it on the EMPTY queue */ if (ISSET(bp->b_flags, B_DELWRI)) - OSAddAtomic(-1, &nbdwrite); + OSAddAtomicLong(-1, &nbdwrite); if (ISSET(bp->b_flags, B_META)) { if (bp->b_bufsize) { @@ -2141,6 +2151,7 @@ buf_brelse(buf_t bp) * Unlock the buffer. */ CLR(bp->b_lflags, (BL_BUSY | BL_WANTED)); + buf_busycount--; lck_mtx_unlock(buf_mtxp); @@ -2157,7 +2168,7 @@ buf_brelse(buf_t bp) wakeup(bp); } KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 388)) | DBG_FUNC_END, - (int)bp, (int)bp->b_datap, bp->b_flags, 0, 0); + bp, bp->b_datap, bp->b_flags, 0, 0); } /* @@ -2226,14 +2237,14 @@ buf_getblk(vnode_t vp, daddr64_t blkno, int size, int slpflag, int slptimeo, int struct bufhashhdr *dp; KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 386)) | DBG_FUNC_START, - (int)(blkno * PAGE_SIZE), size, operation, 0, 0); + (uintptr_t)(blkno * PAGE_SIZE), size, operation, 0, 0); ret_only_valid = operation & BLK_ONLYVALID; operation &= ~BLK_ONLYVALID; dp = BUFHASH(vp, blkno); start: lck_mtx_lock_spin(buf_mtxp); -start_locked: + if ((bp = incore_locked(vp, blkno, dp))) { /* * Found in the Buffer Cache @@ -2249,7 +2260,6 @@ buf_getblk(vnode_t vp, daddr64_t blkno, int size, int slpflag, int slptimeo, int SET(bp->b_lflags, BL_WANTED); bufstats.bufs_busyincore++; - lck_mtx_convert_spin(buf_mtxp); /* * don't retake the mutex after being awakened... * the time out is in msecs @@ -2257,6 +2267,9 @@ buf_getblk(vnode_t vp, daddr64_t blkno, int size, int slpflag, int slptimeo, int ts.tv_sec = (slptimeo/1000); ts.tv_nsec = (slptimeo % 1000) * 10 * NSEC_PER_USEC * 1000; + KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 396)) | DBG_FUNC_NONE, + (uintptr_t)blkno, size, operation, 0, 0); + err = msleep(bp, buf_mtxp, slpflag | PDROP | (PRIBIO + 1), "buf_getblk", &ts); /* @@ -2283,6 +2296,7 @@ buf_getblk(vnode_t vp, daddr64_t blkno, int size, int slpflag, int slptimeo, int */ SET(bp->b_lflags, BL_BUSY); SET(bp->b_flags, B_CACHE); + buf_busycount++; bremfree_locked(bp); bufstats.bufs_incore++; @@ -2329,7 +2343,7 @@ buf_getblk(vnode_t vp, daddr64_t blkno, int size, int slpflag, int slptimeo, int } else CLR(bp->b_flags, (B_DONE | B_CACHE | B_WASDIRTY | B_DELWRI)); - kret = ubc_upl_map(upl, (vm_address_t *)&(bp->b_datap)); + kret = ubc_upl_map(upl, (vm_offset_t*)&(bp->b_datap)); if (kret != KERN_SUCCESS) panic("getblk: ubc_upl_map() failed with (%d)", kret); @@ -2356,13 +2370,11 @@ buf_getblk(vnode_t vp, daddr64_t blkno, int size, int slpflag, int slptimeo, int lck_mtx_unlock(buf_mtxp); return (NULL); } - lck_mtx_convert_spin(buf_mtxp); - if ((vnode_isreg(vp) == 0) || (UBCINFOEXISTS(vp) == 0) /*|| (vnode_issystem(vp) == 1)*/) operation = BLK_META; if ((bp = getnewbuf(slpflag, slptimeo, &queue)) == NULL) - goto start_locked; + goto start; /* * getnewbuf may block for a number of different reasons... @@ -2418,7 +2430,7 @@ buf_getblk(vnode_t vp, daddr64_t blkno, int size, int slpflag, int slptimeo, int * in bufstats are protected with either * buf_mtxp or iobuffer_mtxp */ - OSAddAtomic(1, &bufstats.bufs_miss); + OSAddAtomicLong(1, &bufstats.bufs_miss); break; case BLK_WRITE: @@ -2448,8 +2460,8 @@ buf_getblk(vnode_t vp, daddr64_t blkno, int size, int slpflag, int slptimeo, int if (kret != KERN_SUCCESS) panic("Failed to create UPL"); -#ifdef UPL_DEBUG - upl_ubc_alias_set(upl, bp, 4); +#if UPL_DEBUG + upl_ubc_alias_set(upl, (uintptr_t) bp, (uintptr_t) 4); #endif /* UPL_DEBUG */ bp->b_upl = upl; @@ -2462,7 +2474,7 @@ buf_getblk(vnode_t vp, daddr64_t blkno, int size, int slpflag, int slptimeo, int SET(bp->b_flags, B_CACHE | B_DONE); - OSAddAtomic(1, &bufstats.bufs_vmhits); + OSAddAtomicLong(1, &bufstats.bufs_vmhits); bp->b_validoff = 0; bp->b_dirtyoff = 0; @@ -2493,9 +2505,9 @@ buf_getblk(vnode_t vp, daddr64_t blkno, int size, int slpflag, int slptimeo, int if ((long)contig_bytes < bp->b_bcount) bp->b_blkno = bp->b_lblkno; } else { - OSAddAtomic(1, &bufstats.bufs_miss); + OSAddAtomicLong(1, &bufstats.bufs_miss); } - kret = ubc_upl_map(upl, (vm_address_t *)&(bp->b_datap)); + kret = ubc_upl_map(upl, (vm_offset_t *)&(bp->b_datap)); if (kret != KERN_SUCCESS) panic("getblk: ubc_upl_map() failed with (%d)", kret); @@ -2508,15 +2520,10 @@ buf_getblk(vnode_t vp, daddr64_t blkno, int size, int slpflag, int slptimeo, int } } KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 386)) | DBG_FUNC_END, - (int)bp, (int)bp->b_datap, bp->b_flags, 3, 0); + bp, bp->b_datap, bp->b_flags, 3, 0); #ifdef JOE_DEBUG - bp->b_stackgetblk[0] = (int)__builtin_return_address(0); - bp->b_stackgetblk[1] = (int)__builtin_return_address(1); - bp->b_stackgetblk[2] = (int)__builtin_return_address(2); - bp->b_stackgetblk[3] = (int)__builtin_return_address(3); - bp->b_stackgetblk[4] = (int)__builtin_return_address(4); - bp->b_stackgetblk[5] = (int)__builtin_return_address(5); + (void) OSBacktrace(&bp->b_stackgetblk[0], 6); #endif return (bp); } @@ -2527,13 +2534,15 @@ buf_getblk(vnode_t vp, daddr64_t blkno, int size, int slpflag, int slptimeo, int buf_t buf_geteblk(int size) { - buf_t bp; + buf_t bp = NULL; int queue = BQ_EMPTY; - lck_mtx_lock(buf_mtxp); + do { + lck_mtx_lock_spin(buf_mtxp); + + bp = getnewbuf(0, 0, &queue); + } while (bp == NULL); - while ((bp = getnewbuf(0, 0, &queue)) == 0) - ; SET(bp->b_flags, (B_META|B_INVAL)); #if DIAGNOSTIC @@ -2595,7 +2604,7 @@ allocbuf(buf_t bp, int size) *(void **)(&bp->b_datap) = zalloc(z); } else { bp->b_datap = (uintptr_t)NULL; - kmem_alloc_wired(kernel_map, (vm_offset_t *)&bp->b_datap, desired_size); + kmem_alloc_kobject(kernel_map, (vm_offset_t *)&bp->b_datap, desired_size); CLR(bp->b_flags, B_ZALLOC); } bcopy((void *)elem, (caddr_t)bp->b_datap, bp->b_bufsize); @@ -2608,7 +2617,7 @@ allocbuf(buf_t bp, int size) if ((vm_size_t)bp->b_bufsize < desired_size) { /* reallocate to a bigger size */ bp->b_datap = (uintptr_t)NULL; - kmem_alloc_wired(kernel_map, (vm_offset_t *)&bp->b_datap, desired_size); + kmem_alloc_kobject(kernel_map, (vm_offset_t *)&bp->b_datap, desired_size); bcopy((const void *)elem, (caddr_t)bp->b_datap, bp->b_bufsize); kmem_free(kernel_map, elem, bp->b_bufsize); } else { @@ -2624,7 +2633,7 @@ allocbuf(buf_t bp, int size) *(void **)(&bp->b_datap) = zalloc(z); SET(bp->b_flags, B_ZALLOC); } else - kmem_alloc_wired(kernel_map, (vm_offset_t *)&bp->b_datap, desired_size); + kmem_alloc_kobject(kernel_map, (vm_offset_t *)&bp->b_datap, desired_size); } if (bp->b_datap == 0) @@ -2654,7 +2663,8 @@ allocbuf(buf_t bp, int size) * on which it was found. * * buf_mtxp is held upon entry - * returns with buf_mtxp locked + * returns with buf_mtxp locked if new buf available + * returns with buf_mtxp UNlocked if new buf NOT available */ static buf_t @@ -2732,7 +2742,7 @@ getnewbuf(int slpflag, int slptimeo, int * queue) SET(bp->b_flags, B_HDRALLOC); *queue = BQ_EMPTY; } - lck_mtx_lock(buf_mtxp); + lck_mtx_lock_spin(buf_mtxp); if (bp) { binshash(bp, &invalhash); @@ -2751,7 +2761,8 @@ getnewbuf(int slpflag, int slptimeo, int * queue) ts.tv_sec = (slptimeo/1000); /* the hz value is 100; which leads to 10ms */ ts.tv_nsec = (slptimeo % 1000) * NSEC_PER_USEC * 1000 * 10; - msleep(&needbuffer, buf_mtxp, slpflag|(PRIBIO+1), "getnewbuf", &ts); + + msleep(&needbuffer, buf_mtxp, slpflag | PDROP | (PRIBIO+1), "getnewbuf", &ts); return (NULL); } @@ -2813,10 +2824,10 @@ getnewbuf(int slpflag, int slptimeo, int * queue) } found: if (ISSET(bp->b_flags, B_LOCKED) || ISSET(bp->b_lflags, BL_BUSY)) - panic("getnewbuf: bp @ %p is LOCKED or BUSY! (flags 0x%lx)\n", bp, bp->b_flags); + panic("getnewbuf: bp @ %p is LOCKED or BUSY! (flags 0x%x)\n", bp, bp->b_flags); /* Clean it */ - if (bcleanbuf(bp)) { + if (bcleanbuf(bp, FALSE)) { /* * moved to the laundry thread, buffer not ready */ @@ -2837,7 +2848,7 @@ getnewbuf(int slpflag, int slptimeo, int * queue) * returns with buf_mtxp locked */ static int -bcleanbuf(buf_t bp) +bcleanbuf(buf_t bp, boolean_t discard) { /* Remove from the queue */ bremfree_locked(bp); @@ -2851,6 +2862,10 @@ bcleanbuf(buf_t bp) * it on the LAUNDRY queue, and return 1 */ if (ISSET(bp->b_flags, B_DELWRI)) { + if (discard) { + SET(bp->b_lflags, BL_WANTDEALLOC); + } + bp->b_whichq = BQ_LAUNDRY; bp->b_timestamp = buf_timestamp(); binstailfree(bp, &bufqueues[BQ_LAUNDRY], BQ_LAUNDRY); @@ -2864,7 +2879,7 @@ bcleanbuf(buf_t bp) */ (void)thread_block(THREAD_CONTINUE_NULL); - lck_mtx_lock(buf_mtxp); + lck_mtx_lock_spin(buf_mtxp); return (1); } @@ -2876,7 +2891,8 @@ bcleanbuf(buf_t bp) * Buffer is no longer on any free list... we own it */ SET(bp->b_lflags, BL_BUSY); - + buf_busycount++; + bremhash(bp); /* @@ -2906,31 +2922,6 @@ bcleanbuf(buf_t bp) trace(TR_BRELSE, pack(bp->b_vp, bp->b_bufsize), bp->b_lblkno); - /* clear out various other fields */ - bp->b_bufsize = 0; - bp->b_datap = (uintptr_t)NULL; - bp->b_upl = (void *)NULL; - /* - * preserve the state of whether this buffer - * was allocated on the fly or not... - * the only other flag that should be set at - * this point is BL_BUSY... - */ -#ifdef JOE_DEBUG - bp->b_owner = current_thread(); - bp->b_tag = 3; -#endif - bp->b_lflags = BL_BUSY; - bp->b_flags = (bp->b_flags & B_HDRALLOC); - bp->b_dev = NODEV; - bp->b_blkno = bp->b_lblkno = 0; - bp->b_iodone = NULL; - bp->b_error = 0; - bp->b_resid = 0; - bp->b_bcount = 0; - bp->b_dirtyoff = bp->b_dirtyend = 0; - bp->b_validoff = bp->b_validend = 0; - /* nuke any credentials we were holding */ if (IS_VALID_CRED(bp->b_rcred)) { kauth_cred_unref(&bp->b_rcred); @@ -2938,8 +2929,44 @@ bcleanbuf(buf_t bp) if (IS_VALID_CRED(bp->b_wcred)) { kauth_cred_unref(&bp->b_wcred); } - lck_mtx_lock(buf_mtxp); + /* If discarding, just move to the empty queue */ + if (discard) { + lck_mtx_lock_spin(buf_mtxp); + CLR(bp->b_flags, (B_META | B_ZALLOC | B_DELWRI | B_LOCKED | B_AGE | B_ASYNC | B_NOCACHE | B_FUA)); + bp->b_whichq = BQ_EMPTY; + binshash(bp, &invalhash); + binsheadfree(bp, &bufqueues[BQ_EMPTY], BQ_EMPTY); + CLR(bp->b_lflags, BL_BUSY); + buf_busycount--; + } else { + /* Not discarding: clean up and prepare for reuse */ + bp->b_bufsize = 0; + bp->b_datap = (uintptr_t)NULL; + bp->b_upl = (void *)NULL; + /* + * preserve the state of whether this buffer + * was allocated on the fly or not... + * the only other flag that should be set at + * this point is BL_BUSY... + */ +#ifdef JOE_DEBUG + bp->b_owner = current_thread(); + bp->b_tag = 3; +#endif + bp->b_lflags = BL_BUSY; + bp->b_flags = (bp->b_flags & B_HDRALLOC); + bp->b_dev = NODEV; + bp->b_blkno = bp->b_lblkno = 0; + bp->b_iodone = NULL; + bp->b_error = 0; + bp->b_resid = 0; + bp->b_bcount = 0; + bp->b_dirtyoff = bp->b_dirtyend = 0; + bp->b_validoff = bp->b_validend = 0; + + lck_mtx_lock_spin(buf_mtxp); + } return (0); } @@ -2954,8 +2981,9 @@ buf_invalblkno(vnode_t vp, daddr64_t lblkno, int flags) dp = BUFHASH(vp, lblkno); - lck_mtx_lock(buf_mtxp); relook: + lck_mtx_lock_spin(buf_mtxp); + if ((bp = incore_locked(vp, lblkno, dp)) == (struct buf *)0) { lck_mtx_unlock(buf_mtxp); return (0); @@ -2967,10 +2995,9 @@ buf_invalblkno(vnode_t vp, daddr64_t lblkno, int flags) } SET(bp->b_lflags, BL_WANTED); - error = msleep((caddr_t)bp, buf_mtxp, (PRIBIO + 1), "buf_invalblkno", NULL); + error = msleep((caddr_t)bp, buf_mtxp, PDROP | (PRIBIO + 1), "buf_invalblkno", NULL); if (error) { - lck_mtx_unlock(buf_mtxp); return (error); } goto relook; @@ -2978,6 +3005,7 @@ buf_invalblkno(vnode_t vp, daddr64_t lblkno, int flags) bremfree_locked(bp); SET(bp->b_lflags, BL_BUSY); SET(bp->b_flags, B_INVAL); + buf_busycount++; #ifdef JOE_DEBUG bp->b_owner = current_thread(); bp->b_tag = 4; @@ -3011,6 +3039,7 @@ buf_drop(buf_t bp) * Unlock the buffer. */ CLR(bp->b_lflags, (BL_BUSY | BL_WANTED)); + buf_busycount--; lck_mtx_unlock(buf_mtxp); @@ -3027,7 +3056,7 @@ errno_t buf_acquire(buf_t bp, int flags, int slpflag, int slptimeo) { errno_t error; - lck_mtx_lock(buf_mtxp); + lck_mtx_lock_spin(buf_mtxp); error = buf_acquire_locked(bp, flags, slpflag, slptimeo); @@ -3052,7 +3081,7 @@ buf_acquire_locked(buf_t bp, int flags, int slpflag, int slptimeo) } if (ISSET(bp->b_lflags, BL_BUSY)) { /* - * since the mutex_lock may block, the buffer + * since the lck_mtx_lock may block, the buffer * may become BUSY, so we need to * recheck for a NOWAIT request */ @@ -3072,6 +3101,8 @@ buf_acquire_locked(buf_t bp, int flags, int slpflag, int slptimeo) if (flags & BAC_REMOVE) bremfree_locked(bp); SET(bp->b_lflags, BL_BUSY); + buf_busycount++; + #ifdef JOE_DEBUG bp->b_owner = current_thread(); bp->b_tag = 5; @@ -3087,15 +3118,17 @@ buf_acquire_locked(buf_t bp, int flags, int slpflag, int slptimeo) errno_t buf_biowait(buf_t bp) { - lck_mtx_lock(buf_mtxp); + while (!ISSET(bp->b_flags, B_DONE)) { - DTRACE_IO1(wait__start, buf_t, bp); - while (!ISSET(bp->b_flags, B_DONE)) - (void) msleep(bp, buf_mtxp, (PRIBIO+1), "buf_biowait", NULL); - DTRACE_IO1(wait__done, buf_t, bp); + lck_mtx_lock_spin(buf_mtxp); - lck_mtx_unlock(buf_mtxp); - + if (!ISSET(bp->b_flags, B_DONE)) { + DTRACE_IO1(wait__start, buf_t, bp); + (void) msleep(bp, buf_mtxp, PDROP | (PRIBIO+1), "buf_biowait", NULL); + DTRACE_IO1(wait__done, buf_t, bp); + } else + lck_mtx_unlock(buf_mtxp); + } /* check for interruption of I/O (e.g. via NFS), then errors. */ if (ISSET(bp->b_flags, B_EINTR)) { CLR(bp->b_flags, B_EINTR); @@ -3112,14 +3145,17 @@ buf_biowait(buf_t bp) void buf_biowait_callback(buf_t bp) { - lck_mtx_lock(buf_mtxp); + while (!ISSET(bp->b_lflags, BL_CALLDONE)) { - DTRACE_IO1(wait__start, buf_t, bp); - while (!ISSET(bp->b_lflags, BL_CALLDONE)) - (void) msleep(bp, buf_mtxp, (PRIBIO+1), "buf_biowait", NULL); - DTRACE_IO1(wait__done, buf_t, bp); + lck_mtx_lock_spin(buf_mtxp); - lck_mtx_unlock(buf_mtxp); + if (!ISSET(bp->b_lflags, BL_CALLDONE)) { + DTRACE_IO1(wait__start, buf_t, bp); + (void) msleep(bp, buf_mtxp, PDROP | (PRIBIO+1), "buf_biowait", NULL); + DTRACE_IO1(wait__done, buf_t, bp); + } else + lck_mtx_unlock(buf_mtxp); + } } /* @@ -3144,8 +3180,10 @@ extern int hard_throttle_on_root; void buf_biodone(buf_t bp) { + mount_t mp; + KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 387)) | DBG_FUNC_START, - (int)bp, (int)bp->b_datap, bp->b_flags, 0, 0); + bp, bp->b_datap, bp->b_flags, 0, 0); if (ISSET(bp->b_flags, B_DONE)) panic("biodone already"); @@ -3154,8 +3192,17 @@ buf_biodone(buf_t bp) fslog_io_error(bp); } - if (bp->b_vp && bp->b_vp->v_mount && (bp->b_flags & B_READ) == 0) { - update_last_io_time(bp->b_vp->v_mount); + if (bp->b_vp && bp->b_vp->v_mount) { + mp = bp->b_vp->v_mount; + } else { + mp = NULL; + } + + if (mp && (bp->b_flags & B_READ) == 0) { + update_last_io_time(mp); + INCR_PENDING_IO(-(pending_io_t)buf_count(bp), mp->mnt_pending_write_size); + } else if (mp) { + INCR_PENDING_IO(-(pending_io_t)buf_count(bp), mp->mnt_pending_read_size); } if (kdebug_enable) { @@ -3172,11 +3219,11 @@ buf_biodone(buf_t bp) code |= DKIO_PAGING; KERNEL_DEBUG_CONSTANT(FSDBG_CODE(DBG_DKRW, code) | DBG_FUNC_NONE, - (unsigned int)bp, (unsigned int)bp->b_vp, + bp, (uintptr_t)bp->b_vp, bp->b_resid, bp->b_error, 0); } if ((bp->b_vp != NULLVP) && - ((bp->b_flags & (B_PAGEIO | B_READ)) == (B_PAGEIO | B_READ)) && + ((bp->b_flags & (B_IOSTREAMING | B_PAGEIO | B_READ)) == (B_PAGEIO | B_READ)) && (bp->b_vp->v_mount->mnt_kern_flag & MNTK_ROOTDEV)) { microuptime(&priority_IO_timestamp_for_root); hard_throttle_on_root = 0; @@ -3278,7 +3325,7 @@ buf_biodone(buf_t bp) } biodone_done: KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 387)) | DBG_FUNC_END, - (int)bp, (int)bp->b_datap, bp->b_flags, 0, 0); + (uintptr_t)bp, (uintptr_t)bp->b_datap, bp->b_flags, 0, 0); } /* @@ -3290,7 +3337,7 @@ count_lock_queue(void) buf_t bp; int n = 0; - lck_mtx_lock(buf_mtxp); + lck_mtx_lock_spin(buf_mtxp); for (bp = bufqueues[BQ_LOCKED].tqh_first; bp; bp = bp->b_freelist.tqe_next) @@ -3306,16 +3353,7 @@ count_lock_queue(void) int count_busy_buffers(void) { - buf_t bp; - int nbusy = 0; - - lck_mtx_lock(buf_mtxp); - for (bp = &buf_headers[boot_nbuf_headers]; --bp >= buf_headers; ) - if (!ISSET(bp->b_flags, B_INVAL) && ISSET(bp->b_lflags, BL_BUSY)) - nbusy++; - lck_mtx_unlock(buf_mtxp); - - return (nbusy); + return buf_busycount + bufstats.bufs_iobufinuse; } #if DIAGNOSTIC @@ -3364,14 +3402,16 @@ alloc_io_buf(vnode_t vp, int priv) { buf_t bp; - lck_mtx_lock(iobuffer_mtxp); + lck_mtx_lock_spin(iobuffer_mtxp); while (((niobuf_headers - NRESERVEDIOBUFS < bufstats.bufs_iobufinuse) && !priv) || (bp = iobufqueue.tqh_first) == NULL) { bufstats.bufs_iobufsleeps++; need_iobuffer = 1; - (void) msleep(&need_iobuffer, iobuffer_mtxp, (PRIBIO+1), (const char *)"alloc_io_buf", NULL); + (void) msleep(&need_iobuffer, iobuffer_mtxp, PDROP | (PRIBIO+1), (const char *)"alloc_io_buf", NULL); + + lck_mtx_lock_spin(iobuffer_mtxp); } TAILQ_REMOVE(&iobufqueue, bp, b_freelist); @@ -3443,6 +3483,9 @@ free_io_buf(buf_t bp) need_iobuffer = 0; need_wakeup = 1; } + if (bufstats.bufs_iobufinuse <= 0) + panic("free_io_buf: bp(%p) - bufstats.bufs_iobufinuse < 0", bp); + bufstats.bufs_iobufinuse--; lck_mtx_unlock(iobuffer_mtxp); @@ -3455,7 +3498,7 @@ free_io_buf(buf_t bp) void buf_list_lock(void) { - lck_mtx_lock(buf_mtxp); + lck_mtx_lock_spin(buf_mtxp); } void @@ -3474,8 +3517,11 @@ buf_list_unlock(void) static void bcleanbuf_thread_init(void) { + thread_t thread = THREAD_NULL; + /* create worker thread */ - kernel_thread(kernel_task, bcleanbuf_thread); + kernel_thread_start((thread_continue_t)bcleanbuf_thread, NULL, &thread); + thread_deallocate(thread); } static void @@ -3486,11 +3532,13 @@ bcleanbuf_thread(void) int loopcnt = 0; for (;;) { - lck_mtx_lock(buf_mtxp); + lck_mtx_lock_spin(buf_mtxp); - while ( (bp = TAILQ_FIRST(&bufqueues[BQ_LAUNDRY])) == NULL) - (void)msleep((void *)&bufqueues[BQ_LAUNDRY], buf_mtxp, PRIBIO, "blaundry", NULL); + while ( (bp = TAILQ_FIRST(&bufqueues[BQ_LAUNDRY])) == NULL) { + (void)msleep((void *)&bufqueues[BQ_LAUNDRY], buf_mtxp, PDROP | PRIBIO, "blaundry", NULL); + lck_mtx_lock_spin(buf_mtxp); + } /* * Remove from the queue */ @@ -3500,6 +3548,7 @@ bcleanbuf_thread(void) * Buffer is no longer on any free list */ SET(bp->b_lflags, BL_BUSY); + buf_busycount++; #ifdef JOE_DEBUG bp->b_owner = current_thread(); @@ -3523,6 +3572,7 @@ bcleanbuf_thread(void) /* we never leave a busy page on the laundary queue */ CLR(bp->b_lflags, BL_BUSY); + buf_busycount--; #ifdef JOE_DEBUG bp->b_owner = current_thread(); bp->b_tag = 11; @@ -3584,7 +3634,7 @@ brecover_data(buf_t bp) } bp->b_upl = upl; - kret = ubc_upl_map(upl, (vm_address_t *)&(bp->b_datap)); + kret = ubc_upl_map(upl, (vm_offset_t *)&(bp->b_datap)); if (kret != KERN_SUCCESS) panic("getblk: ubc_upl_map() failed with (%d)", kret); @@ -3598,6 +3648,38 @@ brecover_data(buf_t bp) return(0); } +static boolean_t +buffer_cache_gc(void) +{ + buf_t bp; + boolean_t did_large_zfree = FALSE; + int now = buf_timestamp(); + + lck_mtx_lock_spin(buf_mtxp); + + /* We only care about metadata (incore storage comes from zalloc()) */ + bp = TAILQ_FIRST(&bufqueues[BQ_META]); + + /* Only collect buffers unused in the last N seconds. Note: ordered by timestamp. */ + while ((bp != NULL) && ((now - bp->b_timestamp) > BUF_STALE_THRESHHOLD)) { + int result, size; + boolean_t is_zalloc; + + size = buf_size(bp); + is_zalloc = ISSET(bp->b_flags, B_ZALLOC); + + result = bcleanbuf(bp, TRUE); + if ((result == 0) && is_zalloc && (size >= PAGE_SIZE)) { + /* We've definitely freed at least a page to a zone */ + did_large_zfree = TRUE; + } + bp = TAILQ_FIRST(&bufqueues[BQ_META]); + } + + lck_mtx_unlock(buf_mtxp); + + return did_large_zfree; +} /* @@ -3656,6 +3738,8 @@ bflushq(int whichq, mount_t mp) bp->b_tag = 7; #endif SET(bp->b_lflags, BL_BUSY); + buf_busycount++; + flush_table[buf_count] = bp; buf_count++; total_writes++; @@ -3816,6 +3900,7 @@ bufqdec(int q) static void bufq_balance_thread_init(void) { + thread_t thread = THREAD_NULL; if (bufqscanwait++ == 0) { @@ -3872,7 +3957,8 @@ bufq_balance_thread_init(void) } /* create worker thread */ - kernel_thread(kernel_task, bufqscan_thread); + kernel_thread_start((thread_continue_t)bufqscan_thread, NULL, &thread); + thread_deallocate(thread); } /* The workloop for the buffer balancing thread */ @@ -3961,7 +4047,7 @@ balancebufq(int q) /* check if it's stale */ if ((t - bp->b_timestamp) > bufqlim[q].bl_stale) { - if (bcleanbuf(bp)) { + if (bcleanbuf(bp, FALSE)) { /* buf_bawrite() issued, bp not ready */ moretodo = 1; } else { diff --git a/bsd/vfs/vfs_cache.c b/bsd/vfs/vfs_cache.c index 95a07d69a..ba73d95a4 100644 --- a/bsd/vfs/vfs_cache.c +++ b/bsd/vfs/vfs_cache.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2007 Apple Inc. All rights reserved. + * Copyright (c) 2000-2008 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -143,14 +143,24 @@ struct nchstats nchstats; /* cache effectiveness statistics */ lck_grp_t * namecache_lck_grp; lck_grp_attr_t * namecache_lck_grp_attr; lck_attr_t * namecache_lck_attr; -lck_rw_t * namecache_rw_lock; + +lck_grp_t * strcache_lck_grp; +lck_grp_attr_t * strcache_lck_grp_attr; +lck_attr_t * strcache_lck_attr; + +lck_rw_t * namecache_rw_lock; +lck_rw_t * strtable_rw_lock; + +#define NUM_STRCACHE_LOCKS 1024 + +lck_mtx_t strcache_mtx_locks[NUM_STRCACHE_LOCKS]; + static vnode_t cache_lookup_locked(vnode_t dvp, struct componentname *cnp); -static int remove_name_locked(const char *); -static const char *add_name_locked(const char *, size_t, u_int, u_int); +static const char *add_name_internal(const char *, uint32_t, u_int, boolean_t, u_int); static void init_string_table(void) __attribute__((section("__TEXT, initcode"))); static void cache_delete(struct namecache *, int); -static void cache_enter_locked(vnode_t dvp, vnode_t vp, struct componentname *cnp); +static void cache_enter_locked(vnode_t dvp, vnode_t vp, struct componentname *cnp, const char *strname); #ifdef DUMP_STRING_TABLE /* @@ -168,23 +178,26 @@ static unsigned int crc32tab[256]; -// -// This function builds the path to a filename in "buff". The -// length of the buffer *INCLUDING* the trailing zero byte is -// returned in outlen. NOTE: the length includes the trailing -// zero byte and thus the length is one greater than what strlen -// would return. This is important and lots of code elsewhere -// in the kernel assumes this behavior. -// -// This function can call vnop in file system if the parent vnode -// does not exist or when called for hardlinks via volfs path. -// If BUILDPATH_NO_FS_ENTER is set in flags, it only uses values present -// in the name cache and does not enter the file system. -// +/* + * This function builds the path to a filename in "buff". The + * length of the buffer *INCLUDING* the trailing zero byte is + * returned in outlen. NOTE: the length includes the trailing + * zero byte and thus the length is one greater than what strlen + * would return. This is important and lots of code elsewhere + * in the kernel assumes this behavior. + * + * This function can call vnop in file system if the parent vnode + * does not exist or when called for hardlinks via volfs path. + * If BUILDPATH_NO_FS_ENTER is set in flags, it only uses values present + * in the name cache and does not enter the file system. + * + * passed in vp must have a valid io_count reference + */ int build_path(vnode_t first_vp, char *buff, int buflen, int *outlen, int flags, vfs_context_t ctx) { - vnode_t vp; + vnode_t vp, tvp; + vnode_t vp_with_iocount; vnode_t proc_root_dir_vp; char *end; const char *str; @@ -192,24 +205,48 @@ build_path(vnode_t first_vp, char *buff, int buflen, int *outlen, int flags, vfs int ret = 0; int fixhardlink; - if (first_vp == NULLVP) { + if (first_vp == NULLVP) return (EINVAL); - } - /* Grab the process fd so we can evaluate fd_rdir. */ - if (vfs_context_proc(ctx)->p_fd) { - proc_root_dir_vp = vfs_context_proc(ctx)->p_fd->fd_rdir; - } else { - proc_root_dir_vp = NULL; - } + + /* + * Grab the process fd so we can evaluate fd_rdir. + */ + if (vfs_context_proc(ctx)->p_fd) + proc_root_dir_vp = vfs_context_proc(ctx)->p_fd->fd_rdir; + else + proc_root_dir_vp = NULL; + + vp_with_iocount = NULLVP; again: vp = first_vp; + end = &buff[buflen-1]; *end = '\0'; - /* Check if this is the root of a file system. */ + /* + * holding the NAME_CACHE_LOCK in shared mode is + * sufficient to stabilize both the vp->v_parent chain + * and the 'vp->v_mount->mnt_vnodecovered' chain + * + * if we need to drop this lock, we must first grab the v_id + * from the vnode we're currently working with... if that + * vnode doesn't already have an io_count reference (the vp + * passed in comes with one), we must grab a reference + * after we drop the NAME_CACHE_LOCK via vnode_getwithvid... + * deadlocks may result if you call vnode_get while holding + * the NAME_CACHE_LOCK... we lazily release the reference + * we pick up the next time we encounter a need to drop + * the NAME_CACHE_LOCK or before we return from this routine + */ + NAME_CACHE_LOCK_SHARED(); + + /* + * Check if this is the root of a file system. + */ while (vp && vp->v_flag & VROOT) { if (vp->v_mount == NULL) { - return (EINVAL); + ret = EINVAL; + goto out_unlock; } if ((vp->v_mount->mnt_flag & MNT_ROOTFS) || (vp == proc_root_dir_vp)) { /* @@ -217,14 +254,16 @@ build_path(vnode_t first_vp, char *buff, int buflen, int *outlen, int flags, vfs * just "/". */ *--end = '/'; - goto out; + + goto out_unlock; } else { vp = vp->v_mount->mnt_vnodecovered; } } - NAME_CACHE_LOCK_SHARED(); while ((vp != NULLVP) && (vp->v_parent != vp)) { + int vid; + /* * For hardlinks the v_name may be stale, so if its OK * to enter a file system, ask the file system for the @@ -233,15 +272,16 @@ build_path(vnode_t first_vp, char *buff, int buflen, int *outlen, int flags, vfs fixhardlink = (vp->v_flag & VISHARDLINK) && (vp->v_mount->mnt_kern_flag & MNTK_PATH_FROM_ID) && !(flags & BUILDPATH_NO_FS_ENTER); + if (!fixhardlink) { str = vp->v_name; + if (str == NULL || *str == '\0') { - if (vp->v_parent != NULL) { + if (vp->v_parent != NULL) ret = EINVAL; - } else { + else ret = ENOENT; - } - break; + goto out_unlock; } len = strlen(str); /* @@ -249,15 +289,18 @@ build_path(vnode_t first_vp, char *buff, int buflen, int *outlen, int flags, vfs */ if ((end - buff) < (len + 1)) { ret = ENOSPC; - break; + goto out_unlock; } - /* Copy the name backwards. */ + /* + * Copy the name backwards. + */ str += len; - for (; len > 0; len--) { + for (; len > 0; len--) *--end = *--str; - } - /* Add a path separator. */ + /* + * Add a path separator. + */ *--end = '/'; } @@ -268,45 +311,59 @@ build_path(vnode_t first_vp, char *buff, int buflen, int *outlen, int flags, vfs (flags & BUILDPATH_NO_FS_ENTER)) { vp = vp->v_parent; - // if the vnode we have in hand isn't a directory and it - // has a v_parent, then we started with the resource fork - // so skip up to avoid getting a duplicate copy of the - // file name in the path. - if (vp && !vnode_isdir(vp) && vp->v_parent) { - vp = vp->v_parent; - } - } else /* No parent, go get it if supported. */ { + /* + * if the vnode we have in hand isn't a directory and it + * has a v_parent, then we started with the resource fork + * so skip up to avoid getting a duplicate copy of the + * file name in the path. + */ + if (vp && !vnode_isdir(vp) && vp->v_parent) + vp = vp->v_parent; + } else { + /* + * No parent, go get it if supported. + */ struct vnode_attr va; vnode_t dvp; - int vid; - /* Make sure file system supports obtaining a path from id. */ + /* + * Make sure file system supports obtaining a path from id. + */ if (!(vp->v_mount->mnt_kern_flag & MNTK_PATH_FROM_ID)) { ret = ENOENT; - break; + goto out_unlock; } - vid = vp->v_id; + vid = vp->v_id; + NAME_CACHE_UNLOCK(); - if (vnode_getwithvid(vp, vid) != 0) { - /* vnode was recycled, so start over. */ - goto again; - } - + if (vp != first_vp && vp != vp_with_iocount) { + if (vp_with_iocount) { + vnode_put(vp_with_iocount); + vp_with_iocount = NULLVP; + } + if (vnode_getwithvid(vp, vid)) + goto again; + vp_with_iocount = vp; + } VATTR_INIT(&va); VATTR_WANTED(&va, va_parentid); + if (fixhardlink) { VATTR_WANTED(&va, va_name); MALLOC_ZONE(va.va_name, caddr_t, MAXPATHLEN, M_NAMEI, M_WAITOK); } else { va.va_name = NULL; } - /* Ask the file system for its parent id and for its name (optional). */ + /* + * Ask the file system for its parent id and for its name (optional). + */ ret = vnode_getattr(vp, &va, ctx); if (fixhardlink) { if ((ret == 0) && (VATTR_IS_SUPPORTED(&va, va_name))) { str = va.va_name; + vnode_update_identity(vp, NULL, str, strlen(str), 0, VNODE_UPDATE_NAME); } else if (vp->v_name) { str = vp->v_name; ret = 0; @@ -316,64 +373,54 @@ build_path(vnode_t first_vp, char *buff, int buflen, int *outlen, int flags, vfs } len = strlen(str); - /* Check that there's enough space. */ + /* + * Check that there's enough space. + */ if ((end - buff) < (len + 1)) { ret = ENOSPC; } else { /* Copy the name backwards. */ str += len; - + for (; len > 0; len--) { *--end = *--str; } - /* Add a path separator. */ + /* + * Add a path separator. + */ *--end = '/'; } - bad_news: +bad_news: FREE_ZONE(va.va_name, MAXPATHLEN, M_NAMEI); } if (ret || !VATTR_IS_SUPPORTED(&va, va_parentid)) { - vnode_put(vp); ret = ENOENT; goto out; } - /* Ask the file system for the parent vnode. */ - ret = VFS_VGET(vp->v_mount, (ino64_t)va.va_parentid, &dvp, ctx); - if (ret) { - vnode_put(vp); + /* + * Ask the file system for the parent vnode. + */ + if ((ret = VFS_VGET(vp->v_mount, (ino64_t)va.va_parentid, &dvp, ctx))) goto out; - } - if (!fixhardlink && (vp->v_parent != dvp)) { + + if (!fixhardlink && (vp->v_parent != dvp)) vnode_update_identity(vp, dvp, NULL, 0, 0, VNODE_UPDATE_PARENT); - } - vnode_put(vp); + + if (vp_with_iocount) + vnode_put(vp_with_iocount); vp = dvp; - /* - * We are no longer under the name cache lock here. - * So to avoid a race for vnode termination, take a - * reference on the vnode and drop that reference - * after reacquiring the name cache lock. We use the - * vnode_rele_ext call with the dont_reenter flag - * set to avoid re-entering the file system which - * could possibly re-enter the name cache. - */ - if (vnode_ref(dvp) != 0) { - dvp = NULLVP; - } - vnode_put(vp); - NAME_CACHE_LOCK_SHARED(); + vp_with_iocount = vp; - if (dvp) { - vnode_rele_ext(dvp, 0, 1); - } + NAME_CACHE_LOCK_SHARED(); - // if the vnode we have in hand isn't a directory and it - // has a v_parent, then we started with the resource fork - // so skip up to avoid getting a duplicate copy of the - // file name in the path. - if (vp && !vnode_isdir(vp) && vp->v_parent) { - vp = vp->v_parent; - } + /* + * if the vnode we have in hand isn't a directory and it + * has a v_parent, then we started with the resource fork + * so skip up to avoid getting a duplicate copy of the + * file name in the path. + */ + if (vp && !vnode_isdir(vp) && vp->v_parent) + vp = vp->v_parent; } /* * When a mount point is crossed switch the vp. @@ -381,22 +428,54 @@ build_path(vnode_t first_vp, char *buff, int buflen, int *outlen, int flags, vfs * a vnode that's not the root of a mounted * file system. */ - while (vp) { - if (vp == proc_root_dir_vp) { - NAME_CACHE_UNLOCK(); - goto out; /* encountered the root */ + tvp = vp; + + while (tvp) { + if (tvp == proc_root_dir_vp) + goto out_unlock; /* encountered the root */ + + if (!(tvp->v_flag & VROOT) || !tvp->v_mount) + break; /* not the root of a mounted FS */ + tvp = tvp->v_mount->mnt_vnodecovered; + } + if (tvp == NULLVP) + goto out_unlock; + vp = tvp; + + if (vp && (flags & BUILDPATH_CHECKACCESS)) { + vid = vp->v_id; + + NAME_CACHE_UNLOCK(); + + if (vp != first_vp && vp != vp_with_iocount) { + if (vp_with_iocount) { + vnode_put(vp_with_iocount); + vp_with_iocount = NULLVP; + } + if (vnode_getwithvid(vp, vid)) + goto again; + vp_with_iocount = vp; } - if (!(vp->v_flag & VROOT) || !vp->v_mount) - break; /* not the root of a mounted FS */ - vp = vp->v_mount->mnt_vnodecovered; + if ((ret = vnode_authorize(vp, NULL, KAUTH_VNODE_SEARCH, ctx))) + goto out; /* no peeking */ + + NAME_CACHE_LOCK_SHARED(); } } +out_unlock: NAME_CACHE_UNLOCK(); out: - /* Slide the name down to the beginning of the buffer. */ + if (vp_with_iocount) + vnode_put(vp_with_iocount); + /* + * Slide the name down to the beginning of the buffer. + */ memmove(buff, end, &buff[buflen] - end); - - *outlen = &buff[buflen] - end; /* length includes the trailing zero byte */ + + /* + * length includes the trailing zero byte + */ + *outlen = &buff[buflen] - end; return (ret); } @@ -437,11 +516,11 @@ const char * vnode_getname(vnode_t vp) { const char *name = NULL; - - NAME_CACHE_LOCK(); + + NAME_CACHE_LOCK_SHARED(); if (vp->v_name) - name = add_name_locked(vp->v_name, strlen(vp->v_name), 0, 0); + name = vfs_addname(vp->v_name, strlen(vp->v_name), 0, 0); NAME_CACHE_UNLOCK(); return (name); @@ -450,11 +529,7 @@ vnode_getname(vnode_t vp) void vnode_putname(const char *name) { - NAME_CACHE_LOCK(); - - remove_name_locked(name); - - NAME_CACHE_UNLOCK(); + vfs_removename(name); } @@ -472,7 +547,7 @@ vnode_putname(const char *name) * if VNODE_UPDATE_CACHE, flush the name cache entries associated with vp */ void -vnode_update_identity(vnode_t vp, vnode_t dvp, const char *name, int name_len, int name_hashval, int flags) +vnode_update_identity(vnode_t vp, vnode_t dvp, const char *name, int name_len, uint32_t name_hashval, int flags) { struct namecache *ncp; vnode_t old_parentvp = NULLVP; @@ -480,6 +555,9 @@ vnode_update_identity(vnode_t vp, vnode_t dvp, const char *name, int name_len, i int isstream = (vp->v_flag & VISNAMEDSTREAM); int kusecountbumped = 0; #endif + kauth_cred_t tcred = NULL; + const char *vname = NULL; + const char *tname = NULL; if (flags & VNODE_UPDATE_PARENT) { if (dvp && vnode_ref(dvp) != 0) { @@ -497,35 +575,64 @@ vnode_update_identity(vnode_t vp, vnode_t dvp, const char *name, int name_len, i } else { dvp = NULLVP; } - NAME_CACHE_LOCK(); + if ( (flags & VNODE_UPDATE_NAME) ) { + if (name != vp->v_name) { + if (name && *name) { + if (name_len == 0) + name_len = strlen(name); + tname = vfs_addname(name, name_len, name_hashval, 0); + } + } else + flags &= ~VNODE_UPDATE_NAME; + } + if ( (flags & (VNODE_UPDATE_PURGE | VNODE_UPDATE_PARENT | VNODE_UPDATE_CACHE | VNODE_UPDATE_NAME)) ) { - if ( (flags & VNODE_UPDATE_NAME) && (name != vp->v_name) ) { - if (vp->v_name != NULL) { - remove_name_locked(vp->v_name); - vp->v_name = NULL; + NAME_CACHE_LOCK(); + + if ( (flags & VNODE_UPDATE_PURGE) ) { + + if (vp->v_parent) + vp->v_parent->v_nc_generation++; + + while ( (ncp = LIST_FIRST(&vp->v_nclinks)) ) + cache_delete(ncp, 1); + + while ( (ncp = LIST_FIRST(&vp->v_ncchildren)) ) + cache_delete(ncp, 1); + + /* + * Use a temp variable to avoid kauth_cred_unref() while NAME_CACHE_LOCK is held + */ + tcred = vp->v_cred; + vp->v_cred = NOCRED; + vp->v_authorized_actions = 0; } - if (name && *name) { - if (name_len == 0) - name_len = strlen(name); - vp->v_name = add_name_locked(name, name_len, name_hashval, 0); + if ( (flags & VNODE_UPDATE_NAME) ) { + vname = vp->v_name; + vp->v_name = tname; } - } - if (flags & VNODE_UPDATE_PARENT) { - if (dvp != vp && dvp != vp->v_parent) { - old_parentvp = vp->v_parent; - vp->v_parent = dvp; - dvp = NULLVP; + if (flags & VNODE_UPDATE_PARENT) { + if (dvp != vp && dvp != vp->v_parent) { + old_parentvp = vp->v_parent; + vp->v_parent = dvp; + dvp = NULLVP; - if (old_parentvp) - flags |= VNODE_UPDATE_CACHE; + if (old_parentvp) + flags |= VNODE_UPDATE_CACHE; + } } - } - if (flags & VNODE_UPDATE_CACHE) { - while ( (ncp = LIST_FIRST(&vp->v_nclinks)) ) - cache_delete(ncp, 1); - } - NAME_CACHE_UNLOCK(); + if (flags & VNODE_UPDATE_CACHE) { + while ( (ncp = LIST_FIRST(&vp->v_nclinks)) ) + cache_delete(ncp, 1); + } + NAME_CACHE_UNLOCK(); + if (vname != NULL) + vfs_removename(vname); + + if (IS_VALID_CRED(tcred)) + kauth_cred_unref(&tcred); + } if (dvp != NULLVP) { #if NAMEDSTREAMS /* Back-out the ref we took if we lost a race for vp->v_parent. */ @@ -563,7 +670,7 @@ vnode_update_identity(vnode_t vp, vnode_t dvp, const char *name, int name_len, i while ( (vp = old_parentvp) != NULLVP ) { - vnode_lock(vp); + vnode_lock_spin(vp); vnode_rele_internal(vp, 0, 0, 1); /* @@ -577,7 +684,7 @@ vnode_update_identity(vnode_t vp, vnode_t dvp, const char *name, int name_len, i * we'll sit in this loop until we run into * a parent in this chain that is not in this state * - * make our check and the node_rele atomic + * make our check and the vnode_rele atomic * with respect to the current vnode we're working on * by holding the vnode lock * if vnode_rele deferred the vnode_reclaim and has put @@ -840,7 +947,7 @@ cache_lookup_path(struct nameidata *ndp, struct componentname *cnp, vnode_t dp, kauth_cred_t ucred; boolean_t ttl_enabled = FALSE; struct timeval tv; - mount_t mp; + mount_t mp; unsigned int hash; int error = 0; @@ -865,7 +972,7 @@ cache_lookup_path(struct nameidata *ndp, struct componentname *cnp, vnode_t dp, cp = cnp->cn_nameptr; while (*cp && (*cp != '/')) { - hash ^= crc32tab[((hash >> 24) ^ (unsigned char)*cp++)]; + hash = crc32tab[((hash >> 24) ^ (unsigned char)*cp++)] ^ hash << 8; } /* * the crc generator can legitimately generate @@ -942,7 +1049,7 @@ cache_lookup_path(struct nameidata *ndp, struct componentname *cnp, vnode_t dp, if (!(cnp->cn_flags & DONOTAUTH)) { error = mac_vnode_check_lookup(ctx, dp, cnp); if (error) { - name_cache_unlock(); + NAME_CACHE_UNLOCK(); goto errorout; } } @@ -997,13 +1104,20 @@ cache_lookup_path(struct nameidata *ndp, struct componentname *cnp, vnode_t dp, */ if (cnp->cn_namelen == 1 && cnp->cn_nameptr[0] == '.') vp = dp; - else if ((cnp->cn_flags & ISDOTDOT) && dp->v_parent) + else if ( (cnp->cn_flags & ISDOTDOT) ) vp = dp->v_parent; else { if ( (vp = cache_lookup_locked(dp, cnp)) == NULLVP) break; - } + if ( (vp->v_flag & VISHARDLINK) ) { + /* + * The file system wants a VNOP_LOOKUP on this vnode + */ + vp = NULL; + break; + } + } if ( (cnp->cn_flags & ISLASTCN) ) break; @@ -1133,14 +1247,12 @@ cache_lookup_locked(vnode_t dvp, struct componentname *cnp) struct namecache *ncp; struct nchashhead *ncpp; long namelen = cnp->cn_namelen; - char *nameptr = cnp->cn_nameptr; unsigned int hashval = (cnp->cn_hash & NCHASHMASK); - vnode_t vp; ncpp = NCHHASH(dvp, cnp->cn_hash); LIST_FOREACH(ncp, ncpp, nc_hash) { if ((ncp->nc_dvp == dvp) && (ncp->nc_hashval == hashval)) { - if (memcmp(ncp->nc_name, nameptr, namelen) == 0 && ncp->nc_name[namelen] == 0) + if (memcmp(ncp->nc_name, cnp->cn_nameptr, namelen) == 0 && ncp->nc_name[namelen] == 0) break; } } @@ -1153,15 +1265,7 @@ cache_lookup_locked(vnode_t dvp, struct componentname *cnp) } NCHSTAT(ncs_goodhits); - vp = ncp->nc_vp; - if (vp && (vp->v_flag & VISHARDLINK)) { - /* - * The file system wants a VNOP_LOOKUP on this vnode - */ - vp = NULL; - } - - return (vp); + return (ncp->nc_vp); } @@ -1176,11 +1280,11 @@ hash_string(const char *cp, int len) if (len) { while (len--) { - hash ^= crc32tab[((hash >> 24) ^ (unsigned char)*cp++)]; + hash = crc32tab[((hash >> 24) ^ (unsigned char)*cp++)] ^ hash << 8; } } else { while (*cp != '\0') { - hash ^= crc32tab[((hash >> 24) ^ (unsigned char)*cp++)]; + hash = crc32tab[((hash >> 24) ^ (unsigned char)*cp++)] ^ hash << 8; } } /* @@ -1215,19 +1319,22 @@ cache_lookup(struct vnode *dvp, struct vnode **vpp, struct componentname *cnp) struct namecache *ncp; struct nchashhead *ncpp; long namelen = cnp->cn_namelen; - char *nameptr = cnp->cn_nameptr; - unsigned int hashval = (cnp->cn_hash & NCHASHMASK); + unsigned int hashval; boolean_t have_exclusive = FALSE; uint32_t vid; vnode_t vp; + if (cnp->cn_hash == 0) + cnp->cn_hash = hash_string(cnp->cn_nameptr, cnp->cn_namelen); + hashval = (cnp->cn_hash & NCHASHMASK); + NAME_CACHE_LOCK_SHARED(); - ncpp = NCHHASH(dvp, cnp->cn_hash); relook: + ncpp = NCHHASH(dvp, cnp->cn_hash); LIST_FOREACH(ncp, ncpp, nc_hash) { if ((ncp->nc_dvp == dvp) && (ncp->nc_hashval == hashval)) { - if (memcmp(ncp->nc_name, nameptr, namelen) == 0 && ncp->nc_name[namelen] == 0) + if (memcmp(ncp->nc_name, cnp->cn_nameptr, namelen) == 0 && ncp->nc_name[namelen] == 0) break; } } @@ -1298,6 +1405,30 @@ cache_lookup(struct vnode *dvp, struct vnode **vpp, struct componentname *cnp) return (ENOENT); } +const char * +cache_enter_create(vnode_t dvp, vnode_t vp, struct componentname *cnp) +{ + const char *strname; + + if (cnp->cn_hash == 0) + cnp->cn_hash = hash_string(cnp->cn_nameptr, cnp->cn_namelen); + + /* + * grab 2 references on the string entered + * one for the cache_enter_locked to consume + * and the second to be consumed by v_name (vnode_create call point) + */ + strname = add_name_internal(cnp->cn_nameptr, cnp->cn_namelen, cnp->cn_hash, TRUE, 0); + + NAME_CACHE_LOCK(); + + cache_enter_locked(dvp, vp, cnp, strname); + + NAME_CACHE_UNLOCK(); + + return (strname); +} + /* * Add an entry to the cache... @@ -1317,7 +1448,7 @@ cache_enter_with_gen(struct vnode *dvp, struct vnode *vp, struct componentname * NAME_CACHE_LOCK(); if (dvp->v_nc_generation == gen) - cache_enter_locked(dvp, vp, cnp); + (void)cache_enter_locked(dvp, vp, cnp, NULL); NAME_CACHE_UNLOCK(); } @@ -1329,19 +1460,27 @@ cache_enter_with_gen(struct vnode *dvp, struct vnode *vp, struct componentname * void cache_enter(struct vnode *dvp, struct vnode *vp, struct componentname *cnp) { + const char *strname; + if (cnp->cn_hash == 0) cnp->cn_hash = hash_string(cnp->cn_nameptr, cnp->cn_namelen); + /* + * grab 1 reference on the string entered + * for the cache_enter_locked to consume + */ + strname = add_name_internal(cnp->cn_nameptr, cnp->cn_namelen, cnp->cn_hash, FALSE, 0); + NAME_CACHE_LOCK(); - cache_enter_locked(dvp, vp, cnp); + cache_enter_locked(dvp, vp, cnp, strname); NAME_CACHE_UNLOCK(); } static void -cache_enter_locked(struct vnode *dvp, struct vnode *vp, struct componentname *cnp) +cache_enter_locked(struct vnode *dvp, struct vnode *vp, struct componentname *cnp, const char *strname) { struct namecache *ncp, *negp; struct nchashhead *ncpp; @@ -1354,6 +1493,8 @@ cache_enter_locked(struct vnode *dvp, struct vnode *vp, struct componentname *cn * someone beat us to the punch.. * this vnode is already in the cache */ + if (strname != NULL) + vfs_removename(strname); return; } /* @@ -1367,7 +1508,7 @@ cache_enter_locked(struct vnode *dvp, struct vnode *vp, struct componentname *cn /* * Allocate one more entry */ - ncp = (struct namecache *)_MALLOC_ZONE((u_long)sizeof *ncp, M_CACHE, M_WAITOK); + ncp = (struct namecache *)_MALLOC_ZONE(sizeof(*ncp), M_CACHE, M_WAITOK); numcache++; } else { /* @@ -1394,8 +1535,11 @@ cache_enter_locked(struct vnode *dvp, struct vnode *vp, struct componentname *cn ncp->nc_dvp = dvp; ncp->nc_hashval = cnp->cn_hash; ncp->nc_whiteout = FALSE; - ncp->nc_name = add_name_locked(cnp->cn_nameptr, cnp->cn_namelen, cnp->cn_hash, 0); + if (strname == NULL) + ncp->nc_name = add_name_internal(cnp->cn_nameptr, cnp->cn_namelen, cnp->cn_hash, FALSE, 0); + else + ncp->nc_name = strname; /* * make us the newest entry in the cache * i.e. we'll be the last to be stolen @@ -1442,8 +1586,6 @@ cache_enter_locked(struct vnode *dvp, struct vnode *vp, struct componentname *cn * the oldest */ negp = TAILQ_FIRST(&neghead); - TAILQ_REMOVE(&neghead, negp, nc_un.nc_negentry); - cache_delete(negp, 1); } } @@ -1491,6 +1633,8 @@ static void init_crc32(void) void nchinit(void) { + int i; + desiredNegNodes = (desiredvnodes / 10); desiredNodes = desiredvnodes + desiredNegNodes; @@ -1505,18 +1649,31 @@ nchinit(void) init_string_table(); - /* Allocate mount list lock group attribute and group */ + /* Allocate name cache lock group attribute and group */ namecache_lck_grp_attr= lck_grp_attr_alloc_init(); namecache_lck_grp = lck_grp_alloc_init("Name Cache", namecache_lck_grp_attr); - /* Allocate mount list lock attribute */ + /* Allocate name cache lock attribute */ namecache_lck_attr = lck_attr_alloc_init(); - /* Allocate mount list lock */ + /* Allocate name cache lock */ namecache_rw_lock = lck_rw_alloc_init(namecache_lck_grp, namecache_lck_attr); + /* Allocate string cache lock group attribute and group */ + strcache_lck_grp_attr= lck_grp_attr_alloc_init(); + + strcache_lck_grp = lck_grp_alloc_init("String Cache", strcache_lck_grp_attr); + + /* Allocate string cache lock attribute */ + strcache_lck_attr = lck_attr_alloc_init(); + + /* Allocate string cache lock */ + strtable_rw_lock = lck_rw_alloc_init(strcache_lck_grp, strcache_lck_attr); + + for (i = 0; i < NUM_STRCACHE_LOCKS; i++) + lck_mtx_init(&strcache_mtx_locks[i], strcache_lck_grp, strcache_lck_attr); } void @@ -1553,7 +1710,7 @@ resize_namecache(u_int newsize) dNodes = newsize + dNegNodes; // we don't support shrinking yet - if (dNodes < desiredNodes) { + if (dNodes <= desiredNodes) { return 0; } new_table = hashinit(2 * dNodes, M_CACHE, &nchashmask); @@ -1627,7 +1784,7 @@ cache_delete(struct namecache *ncp, int age_entry) TAILQ_REMOVE(&nchead, ncp, nc_entry); TAILQ_INSERT_HEAD(&nchead, ncp, nc_entry); } - remove_name_locked(ncp->nc_name); + vfs_removename(ncp->nc_name); ncp->nc_name = NULL; } @@ -1679,11 +1836,11 @@ cache_purge(vnode_t vp) void cache_purge_negatives(vnode_t vp) { - struct namecache *ncp; + struct namecache *ncp, *next_ncp; NAME_CACHE_LOCK(); - LIST_FOREACH(ncp, &vp->v_ncchildren, nc_child) + LIST_FOREACH_SAFE(ncp, &vp->v_ncchildren, nc_child, next_ncp) if (ncp->nc_vp == NULL) cache_delete(ncp , 1); @@ -1723,11 +1880,8 @@ cache_purgevfs(struct mount *mp) // static LIST_HEAD(stringhead, string_t) *string_ref_table; static u_long string_table_mask; -static uint32_t max_chain_len=0; -static struct stringhead *long_chain_head=NULL; static uint32_t filled_buckets=0; -static uint32_t num_dups=0; -static uint32_t nstrings=0; + typedef struct string_t { LIST_ENTRY(string_t) hash_chain; @@ -1736,54 +1890,62 @@ typedef struct string_t { } string_t; - -static int +static void resize_string_ref_table(void) { - struct stringhead *new_table; - struct stringhead *old_table; - struct stringhead *old_head, *head; - string_t *entry, *next; - uint32_t i, hashval; - u_long new_mask, old_mask; - - new_table = hashinit((string_table_mask + 1) * 2, M_CACHE, &new_mask); - if (new_table == NULL) { - return ENOMEM; - } + struct stringhead *new_table; + struct stringhead *old_table; + struct stringhead *old_head, *head; + string_t *entry, *next; + uint32_t i, hashval; + u_long new_mask, old_mask; - // do the switch! - old_table = string_ref_table; - string_ref_table = new_table; - old_mask = string_table_mask; - string_table_mask = new_mask; + /* + * need to hold the table lock exclusively + * in order to grow the table... need to recheck + * the need to resize again after we've taken + * the lock exclusively in case some other thread + * beat us to the punch + */ + lck_rw_lock_exclusive(strtable_rw_lock); - printf("resize: max chain len %d, new table size %lu\n", - max_chain_len, new_mask + 1); - max_chain_len = 0; - long_chain_head = NULL; - filled_buckets = 0; + if (4 * filled_buckets < ((string_table_mask + 1) * 3)) { + lck_rw_done(strtable_rw_lock); + return; + } + new_table = hashinit((string_table_mask + 1) * 2, M_CACHE, &new_mask); - // walk the old table and insert all the entries into - // the new table - // - for(i=0; i <= old_mask; i++) { - old_head = &old_table[i]; - for (entry=old_head->lh_first; entry != NULL; entry=next) { - hashval = hash_string((const char *)entry->str, 0); - head = &string_ref_table[hashval & string_table_mask]; - if (head->lh_first == NULL) { - filled_buckets++; - } + if (new_table == NULL) { + printf("failed to resize the hash table.\n"); + lck_rw_done(strtable_rw_lock); + return; + } + + // do the switch! + old_table = string_ref_table; + string_ref_table = new_table; + old_mask = string_table_mask; + string_table_mask = new_mask; + filled_buckets = 0; - next = entry->hash_chain.le_next; - LIST_INSERT_HEAD(head, entry, hash_chain); + // walk the old table and insert all the entries into + // the new table + // + for (i = 0; i <= old_mask; i++) { + old_head = &old_table[i]; + for (entry = old_head->lh_first; entry != NULL; entry = next) { + hashval = hash_string((const char *)entry->str, 0); + head = &string_ref_table[hashval & string_table_mask]; + if (head->lh_first == NULL) { + filled_buckets++; + } + next = entry->hash_chain.le_next; + LIST_INSERT_HEAD(head, entry, hash_chain); + } } - } - - FREE(old_table, M_CACHE); + lck_rw_done(strtable_rw_lock); - return 0; + FREE(old_table, M_CACHE); } @@ -1795,131 +1957,144 @@ init_string_table(void) const char * -vfs_addname(const char *name, size_t len, u_int hashval, u_int flags) +vfs_addname(const char *name, uint32_t len, u_int hashval, u_int flags) { - const char * ptr; - - NAME_CACHE_LOCK(); - ptr = add_name_locked(name, len, hashval, flags); - NAME_CACHE_UNLOCK(); - - return(ptr); + return (add_name_internal(name, len, hashval, FALSE, flags)); } + static const char * -add_name_locked(const char *name, size_t len, u_int hashval, __unused u_int flags) +add_name_internal(const char *name, uint32_t len, u_int hashval, boolean_t need_extra_ref, __unused u_int flags) { - struct stringhead *head; - string_t *entry; - uint32_t chain_len = 0; - char *ptr; + struct stringhead *head; + string_t *entry; + uint32_t chain_len = 0; + uint32_t hash_index; + uint32_t lock_index; + char *ptr; - // - // If the table gets more than 3/4 full, resize it - // - if (4*filled_buckets >= ((string_table_mask + 1) * 3)) { - if (resize_string_ref_table() != 0) { - printf("failed to resize the hash table.\n"); - } - } - if (hashval == 0) { - hashval = hash_string(name, 0); - } - - // - // if the length already accounts for the null-byte, then - // subtract one so later on we don't index past the end - // of the string. - // - if (len > 0 && name[len-1] == '\0') { - len--; - } - - head = &string_ref_table[hashval & string_table_mask]; - for (entry=head->lh_first; entry != NULL; chain_len++, entry=entry->hash_chain.le_next) { - if (memcmp(entry->str, name, len) == 0 && entry->str[len] == '\0') { - entry->refcount++; - num_dups++; - break; + if (hashval == 0) { + hashval = hash_string(name, 0); } - } + /* + * if the length already accounts for the null-byte, then + * subtract one so later on we don't index past the end + * of the string. + */ + if (len > 0 && name[len-1] == '\0') { + len--; + } + /* + * take this lock 'shared' to keep the hash stable + * if someone else decides to grow the pool they + * will take this lock exclusively + */ + lck_rw_lock_shared(strtable_rw_lock); - if (entry == NULL) { - // it wasn't already there so add it. - MALLOC(entry, string_t *, sizeof(string_t) + len + 1, M_TEMP, M_WAITOK); + /* + * If the table gets more than 3/4 full, resize it + */ + if (4 * filled_buckets >= ((string_table_mask + 1) * 3)) { + lck_rw_done(strtable_rw_lock); - // have to get "head" again because we could have blocked - // in malloc and thus head could have changed. - // - head = &string_ref_table[hashval & string_table_mask]; - if (head->lh_first == NULL) { - filled_buckets++; + resize_string_ref_table(); + + lck_rw_lock_shared(strtable_rw_lock); } + hash_index = hashval & string_table_mask; + lock_index = hash_index % NUM_STRCACHE_LOCKS; + + head = &string_ref_table[hash_index]; - ptr = (char *)((char *)entry + sizeof(string_t)); - strncpy(ptr, name, len); - ptr[len] = '\0'; - entry->str = ptr; - entry->refcount = 1; - LIST_INSERT_HEAD(head, entry, hash_chain); + lck_mtx_lock_spin(&strcache_mtx_locks[lock_index]); - if (chain_len > max_chain_len) { - max_chain_len = chain_len; - long_chain_head = head; + for (entry = head->lh_first; entry != NULL; chain_len++, entry = entry->hash_chain.le_next) { + if (memcmp(entry->str, name, len) == 0 && entry->str[len] == 0) { + entry->refcount++; + break; + } } + if (entry == NULL) { + lck_mtx_convert_spin(&strcache_mtx_locks[lock_index]); + /* + * it wasn't already there so add it. + */ + MALLOC(entry, string_t *, sizeof(string_t) + len + 1, M_TEMP, M_WAITOK); - nstrings++; - } + if (head->lh_first == NULL) { + OSAddAtomic(1, &filled_buckets); + } + ptr = (char *)((char *)entry + sizeof(string_t)); + strncpy(ptr, name, len); + ptr[len] = '\0'; + entry->str = ptr; + entry->refcount = 1; + LIST_INSERT_HEAD(head, entry, hash_chain); + } + if (need_extra_ref == TRUE) + entry->refcount++; - return (const char *)entry->str; + lck_mtx_unlock(&strcache_mtx_locks[lock_index]); + lck_rw_done(strtable_rw_lock); + + return (const char *)entry->str; } + int vfs_removename(const char *nameref) { - int i; + struct stringhead *head; + string_t *entry; + uint32_t hashval; + uint32_t hash_index; + uint32_t lock_index; + int retval = ENOENT; - NAME_CACHE_LOCK(); - i = remove_name_locked(nameref); - NAME_CACHE_UNLOCK(); + hashval = hash_string(nameref, 0); - return(i); - -} + /* + * take this lock 'shared' to keep the hash stable + * if someone else decides to grow the pool they + * will take this lock exclusively + */ + lck_rw_lock_shared(strtable_rw_lock); + /* + * must compute the head behind the table lock + * since the size and location of the table + * can change on the fly + */ + hash_index = hashval & string_table_mask; + lock_index = hash_index % NUM_STRCACHE_LOCKS; + head = &string_ref_table[hash_index]; -static int -remove_name_locked(const char *nameref) -{ - struct stringhead *head; - string_t *entry; - uint32_t hashval; - const char *ptr; - - hashval = hash_string(nameref, 0); - head = &string_ref_table[hashval & string_table_mask]; - for (entry=head->lh_first; entry != NULL; entry=entry->hash_chain.le_next) { - if (entry->str == nameref) { - entry->refcount--; - if (entry->refcount == 0) { - LIST_REMOVE(entry, hash_chain); - if (head->lh_first == NULL) { - filled_buckets--; - } - ptr = entry->str; - entry->str = NULL; - nstrings--; + lck_mtx_lock_spin(&strcache_mtx_locks[lock_index]); - FREE(entry, M_TEMP); - } else { - num_dups--; - } + for (entry = head->lh_first; entry != NULL; entry = entry->hash_chain.le_next) { + if (entry->str == nameref) { + entry->refcount--; - return 0; + if (entry->refcount == 0) { + LIST_REMOVE(entry, hash_chain); + + if (head->lh_first == NULL) { + OSAddAtomic(-1, &filled_buckets); + } + } else { + entry = NULL; + } + retval = 0; + break; + } } - } + lck_mtx_unlock(&strcache_mtx_locks[lock_index]); + lck_rw_done(strtable_rw_lock); - return ENOENT; + if (entry != NULL) + FREE(entry, M_TEMP); + + return retval; } @@ -1931,7 +2106,7 @@ dump_string_table(void) string_t *entry; u_long i; - NAME_CACHE_LOCK_SHARED(); + lck_rw_lock_shared(strtable_rw_lock); for (i = 0; i <= string_table_mask; i++) { head = &string_ref_table[i]; @@ -1939,6 +2114,6 @@ dump_string_table(void) printf("%6d - %s\n", entry->refcount, entry->str); } } - NAME_CACHE_UNLOCK(); + lck_rw_done(strtable_rw_lock); } #endif /* DUMP_STRING_TABLE */ diff --git a/bsd/vfs/vfs_cluster.c b/bsd/vfs/vfs_cluster.c index 9b1a7af25..5aec1498a 100644 --- a/bsd/vfs/vfs_cluster.c +++ b/bsd/vfs/vfs_cluster.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2007 Apple Inc. All rights reserved. + * Copyright (c) 2000-2008 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -88,9 +88,16 @@ #include #include +#include + +#if 0 +#undef KERNEL_DEBUG +#define KERNEL_DEBUG KERNEL_DEBUG_CONSTANT +#endif + #define CL_READ 0x01 -#define CL_WRITE 0x02 +#define CL_WRITE 0x02 #define CL_ASYNC 0x04 #define CL_COMMIT 0x08 #define CL_PAGEOUT 0x10 @@ -103,7 +110,16 @@ #define CL_KEEPCACHED 0x800 #define CL_DIRECT_IO 0x1000 #define CL_PASSIVE 0x2000 +#define CL_IOSTREAMING 0x4000 + +#define MAX_VECTOR_UPL_ELEMENTS 8 +#define MAX_VECTOR_UPL_SIZE (2 * MAX_UPL_SIZE) * PAGE_SIZE +extern upl_t vector_upl_create(vm_offset_t); +extern boolean_t vector_upl_is_valid(upl_t); +extern boolean_t vector_upl_set_subupl(upl_t,upl_t, u_int32_t); +extern void vector_upl_set_pagelist(upl_t); +extern void vector_upl_set_iostate(upl_t, upl_t, vm_offset_t, u_int32_t); struct clios { u_int io_completed; /* amount of io that has currently completed */ @@ -138,11 +154,11 @@ static int cluster_io(vnode_t vp, upl_t upl, vm_offset_t upl_offset, off_t f_off int flags, buf_t real_bp, struct clios *iostate, int (*)(buf_t, void *), void *callback_arg); static int cluster_iodone(buf_t bp, void *callback_arg); static int cluster_ioerror(upl_t upl, int upl_offset, int abort_size, int error, int io_flags); -static int cluster_hard_throttle_on(vnode_t vp); +static int cluster_hard_throttle_on(vnode_t vp, uint32_t); static void cluster_syncup(vnode_t vp, off_t newEOF, int (*)(buf_t, void *), void *callback_arg); -static void cluster_read_upl_release(upl_t upl, int start_pg, int last_pg, int flags); +static void cluster_read_upl_release(upl_t upl, int start_pg, int last_pg, int take_reference); static int cluster_copy_ubc_data_internal(vnode_t vp, struct uio *uio, int *io_resid, int mark_dirty, int take_reference); static int cluster_read_copy(vnode_t vp, struct uio *uio, u_int32_t io_req_size, off_t filesize, int flags, @@ -169,27 +185,26 @@ static int cluster_push_now(vnode_t vp, struct cl_extent *, off_t EOF, int flags static int cluster_try_push(struct cl_writebehind *, vnode_t vp, off_t EOF, int push_flag, int (*)(buf_t, void *), void *callback_arg); static void sparse_cluster_switch(struct cl_writebehind *, vnode_t vp, off_t EOF, int (*)(buf_t, void *), void *callback_arg); -static void sparse_cluster_push(struct cl_writebehind *, vnode_t vp, off_t EOF, int push_flag, int (*)(buf_t, void *), void *callback_arg); -static void sparse_cluster_add(struct cl_writebehind *, vnode_t vp, struct cl_extent *, off_t EOF, int (*)(buf_t, void *), void *callback_arg); +static void sparse_cluster_push(void **cmapp, vnode_t vp, off_t EOF, int push_flag, int (*)(buf_t, void *), void *callback_arg); +static void sparse_cluster_add(void **cmapp, vnode_t vp, struct cl_extent *, off_t EOF, int (*)(buf_t, void *), void *callback_arg); static kern_return_t vfs_drt_mark_pages(void **cmapp, off_t offset, u_int length, u_int *setcountp); static kern_return_t vfs_drt_get_cluster(void **cmapp, off_t *offsetp, u_int *lengthp); static kern_return_t vfs_drt_control(void **cmapp, int op_type); -int is_file_clean(vnode_t, off_t); /* * limit the internal I/O size so that we * can represent it in a 32 bit int */ -#define MAX_IO_REQUEST_SIZE (1024 * 1024 * 256) -#define MAX_IO_CONTIG_SIZE (MAX_UPL_SIZE * PAGE_SIZE) -#define MAX_VECTS 16 +#define MAX_IO_REQUEST_SIZE (1024 * 1024 * 512) +#define MAX_IO_CONTIG_SIZE (MAX_UPL_SIZE * PAGE_SIZE) +#define MAX_VECTS 16 #define MIN_DIRECT_WRITE_SIZE (4 * PAGE_SIZE) - -#define MAX_CLUSTER_SIZE(vp) (cluster_max_io_size(vp->v_mount, CL_WRITE)) -#define MAX_PREFETCH(vp) (cluster_max_io_size(vp->v_mount, CL_READ) * 3); +#define IO_SCALE(vp, base) (vp->v_mount->mnt_ioscale * base) +#define MAX_CLUSTER_SIZE(vp) (cluster_max_io_size(vp->v_mount, CL_WRITE)) +#define MAX_PREFETCH(vp, io_size) (io_size * IO_SCALE(vp, 3)) int speculative_reads_disabled = 0; @@ -200,7 +215,7 @@ int speculative_reads_disabled = 0; * before we issue a synchronous write */ #define HARD_THROTTLE_MAXCNT 0 -#define HARD_THROTTLE_MAXSIZE (64 * 1024) +#define HARD_THROTTLE_MAXSIZE (32 * 1024) int hard_throttle_on_root = 0; struct timeval priority_IO_timestamp_for_root; @@ -233,25 +248,25 @@ cluster_init(void) { uint32_t cluster_max_io_size(mount_t mp, int type) { - uint32_t max_io_size; - uint32_t segcnt; - uint32_t maxcnt; - - switch(type) { - - case CL_READ: - segcnt = mp->mnt_segreadcnt; - maxcnt = mp->mnt_maxreadcnt; - break; - case CL_WRITE: - segcnt = mp->mnt_segwritecnt; - maxcnt = mp->mnt_maxwritecnt; - break; - default: - segcnt = min(mp->mnt_segreadcnt, mp->mnt_segwritecnt); - maxcnt = min(mp->mnt_maxreadcnt, mp->mnt_maxwritecnt); - break; - } + uint32_t max_io_size; + uint32_t segcnt; + uint32_t maxcnt; + + switch(type) { + + case CL_READ: + segcnt = mp->mnt_segreadcnt; + maxcnt = mp->mnt_maxreadcnt; + break; + case CL_WRITE: + segcnt = mp->mnt_segwritecnt; + maxcnt = mp->mnt_maxwritecnt; + break; + default: + segcnt = min(mp->mnt_segreadcnt, mp->mnt_segwritecnt); + maxcnt = min(mp->mnt_maxreadcnt, mp->mnt_maxwritecnt); + break; + } if (segcnt > MAX_UPL_SIZE) { /* * don't allow a size beyond the max UPL size we can create @@ -400,30 +415,28 @@ cluster_syncup(vnode_t vp, off_t newEOF, int (*callback)(buf_t, void *), void *c static int -cluster_hard_throttle_on(vnode_t vp) +cluster_hard_throttle_on(vnode_t vp, uint32_t hard_throttle) { - static struct timeval hard_throttle_maxelapsed = { 0, 200000 }; + struct uthread *ut; + + if (hard_throttle) { + static struct timeval hard_throttle_maxelapsed = { 0, 200000 }; - if (vp->v_mount->mnt_kern_flag & MNTK_ROOTDEV) { - struct timeval elapsed; + if (vp->v_mount->mnt_kern_flag & MNTK_ROOTDEV) { + struct timeval elapsed; - if (hard_throttle_on_root) - return(1); + if (hard_throttle_on_root) + return(1); - microuptime(&elapsed); - timevalsub(&elapsed, &priority_IO_timestamp_for_root); + microuptime(&elapsed); + timevalsub(&elapsed, &priority_IO_timestamp_for_root); - if (timevalcmp(&elapsed, &hard_throttle_maxelapsed, <)) - return(1); + if (timevalcmp(&elapsed, &hard_throttle_maxelapsed, <)) + return(1); + } } - struct uthread *ut; if (throttle_get_io_policy(&ut) == IOPOL_THROTTLE) { - size_t devbsdunit; - if (vp->v_mount != NULL) - devbsdunit = vp->v_mount->mnt_devbsdunit; - else - devbsdunit = LOWPRI_MAX_NUM_DEV - 1; - if (throttle_io_will_be_throttled(-1, devbsdunit)) { + if (throttle_io_will_be_throttled(-1, vp->v_mount)) { return(1); } } @@ -495,7 +508,7 @@ cluster_iodone(buf_t bp, void *callback_arg) cbp_head = (buf_t)(bp->b_trans_head); KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 20)) | DBG_FUNC_START, - (int)cbp_head, bp->b_lblkno, bp->b_bcount, bp->b_flags, 0); + cbp_head, bp->b_lblkno, bp->b_bcount, bp->b_flags, 0); for (cbp = cbp_head; cbp; cbp = cbp->b_trans_next) { /* @@ -505,7 +518,7 @@ cluster_iodone(buf_t bp, void *callback_arg) if ( !(cbp->b_flags & B_DONE)) { KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 20)) | DBG_FUNC_END, - (int)cbp_head, (int)cbp, cbp->b_bcount, cbp->b_flags, 0); + cbp_head, cbp, cbp->b_bcount, cbp->b_flags, 0); return 0; } @@ -514,7 +527,7 @@ cluster_iodone(buf_t bp, void *callback_arg) } if (transaction_complete == FALSE) { KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 20)) | DBG_FUNC_END, - (int)cbp_head, 0, 0, 0, 0); + cbp_head, 0, 0, 0, 0); return 0; } @@ -630,18 +643,29 @@ cluster_iodone(buf_t bp, void *callback_arg) buf_biodone(real_bp); } KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 20)) | DBG_FUNC_END, - (int)upl, upl_offset - pg_offset, commit_size, (error << 24) | upl_flags, 0); + upl, upl_offset - pg_offset, commit_size, (error << 24) | upl_flags, 0); return (error); } +uint32_t +cluster_hard_throttle_limit(vnode_t vp, uint32_t *limit, uint32_t hard_throttle) +{ + if (cluster_hard_throttle_on(vp, hard_throttle)) { + *limit = HARD_THROTTLE_MAXSIZE; + return 1; + } + return 0; +} + + void -cluster_zero(upl_t upl, vm_offset_t upl_offset, int size, buf_t bp) +cluster_zero(upl_t upl, upl_offset_t upl_offset, int size, buf_t bp) { KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 23)) | DBG_FUNC_START, - upl_offset, size, (int)bp, 0, 0); + upl_offset, size, bp, 0, 0); if (bp == NULL || bp->b_datap == 0) { upl_page_info_t *pl; @@ -852,30 +876,30 @@ cluster_io(vnode_t vp, upl_t upl, vm_offset_t upl_offset, off_t f_offset, int no max_iosize = PAGE_SIZE; if (flags & CL_THROTTLE) { - if ( !(flags & CL_PAGEOUT) && cluster_hard_throttle_on(vp)) { + if ( !(flags & CL_PAGEOUT) && cluster_hard_throttle_on(vp, 1)) { if (max_iosize > HARD_THROTTLE_MAXSIZE) max_iosize = HARD_THROTTLE_MAXSIZE; async_throttle = HARD_THROTTLE_MAXCNT; } else { if ( (flags & CL_DEV_MEMORY) ) - async_throttle = VNODE_ASYNC_THROTTLE; + async_throttle = IO_SCALE(vp, VNODE_ASYNC_THROTTLE); else { u_int max_cluster; u_int max_cluster_size; u_int max_prefetch; - - max_cluster_size = MAX_CLUSTER_SIZE(vp); - max_prefetch = MAX_PREFETCH(vp); + max_cluster_size = MAX_CLUSTER_SIZE(vp); + max_prefetch = MAX_PREFETCH(vp, cluster_max_io_size(vp->v_mount, CL_READ)); + if (max_iosize > max_cluster_size) - max_cluster = max_cluster_size; + max_cluster = max_cluster_size; else max_cluster = max_iosize; if (size < max_cluster) max_cluster = size; - async_throttle = min(VNODE_ASYNC_THROTTLE, (max_prefetch / max_cluster) - 1); + async_throttle = min(IO_SCALE(vp, VNODE_ASYNC_THROTTLE), (max_prefetch / max_cluster) - 1); } } } @@ -883,6 +907,8 @@ cluster_io(vnode_t vp, upl_t upl, vm_offset_t upl_offset, off_t f_offset, int no io_flags |= B_AGE; if (flags & (CL_PAGEIN | CL_PAGEOUT)) io_flags |= B_PAGEIO; + if (flags & (CL_IOSTREAMING)) + io_flags |= B_IOSTREAMING; if (flags & CL_COMMIT) io_flags |= B_COMMIT_UPL; if (flags & CL_PRESERVE) @@ -908,6 +934,7 @@ cluster_io(vnode_t vp, upl_t upl, vm_offset_t upl_offset, off_t f_offset, int no daddr64_t blkno; daddr64_t lblkno; u_int io_size_wanted; + size_t io_size_tmp; if (size > max_iosize) io_size = max_iosize; @@ -915,12 +942,15 @@ cluster_io(vnode_t vp, upl_t upl, vm_offset_t upl_offset, off_t f_offset, int no io_size = size; io_size_wanted = io_size; + io_size_tmp = (size_t)io_size; - if ((error = VNOP_BLOCKMAP(vp, f_offset, io_size, &blkno, (size_t *)&io_size, NULL, bmap_flags, NULL))) + if ((error = VNOP_BLOCKMAP(vp, f_offset, io_size, &blkno, &io_size_tmp, NULL, bmap_flags, NULL))) break; - if (io_size > io_size_wanted) + if (io_size_tmp > io_size_wanted) io_size = io_size_wanted; + else + io_size = (u_int)io_size_tmp; if (real_bp && (real_bp->b_blkno == real_bp->b_lblkno)) real_bp->b_blkno = blkno; @@ -943,6 +973,8 @@ cluster_io(vnode_t vp, upl_t upl, vm_offset_t upl_offset, off_t f_offset, int no off_t e_offset; int pageout_flags; + if(upl_get_internal_vectorupl(upl)) + panic("Vector UPLs should not take this code-path\n"); /* * we're writing into a 'hole' */ @@ -1451,7 +1483,7 @@ cluster_io(vnode_t vp, upl_t upl, vm_offset_t upl_offset, off_t f_offset, int no upl_flags = cluster_ioerror(upl, upl_offset - pg_offset, abort_size, error, io_flags); KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 28)) | DBG_FUNC_NONE, - (int)upl, upl_offset - pg_offset, abort_size, (error << 24) | upl_flags, 0); + upl, upl_offset - pg_offset, abort_size, (error << 24) | upl_flags, 0); } if (retval == 0) retval = error; @@ -1475,6 +1507,24 @@ cluster_io(vnode_t vp, upl_t upl, vm_offset_t upl_offset, off_t f_offset, int no return (retval); } +#define reset_vector_run_state() \ + issueVectorUPL = vector_upl_offset = vector_upl_index = vector_upl_iosize = vector_upl_size = 0; + +static int +vector_cluster_io(vnode_t vp, upl_t vector_upl, vm_offset_t vector_upl_offset, off_t v_upl_uio_offset, int vector_upl_iosize, + int io_flag, buf_t real_bp, struct clios *iostate, int (*callback)(buf_t, void *), void *callback_arg) +{ + vector_upl_set_pagelist(vector_upl); + + if(io_flag & CL_READ) { + if(vector_upl_offset == 0 && ((vector_upl_iosize & PAGE_MASK)==0)) + io_flag &= ~CL_PRESERVE; /*don't zero fill*/ + else + io_flag |= CL_PRESERVE; /*zero fill*/ + } + return (cluster_io(vp, vector_upl, vector_upl_offset, v_upl_uio_offset, vector_upl_iosize, io_flag, real_bp, iostate, callback, callback_arg)); + +} static int cluster_read_prefetch(vnode_t vp, off_t f_offset, u_int size, off_t filesize, int (*callback)(buf_t, void *), void *callback_arg, int bflag) @@ -1510,7 +1560,7 @@ cluster_read_ahead(vnode_t vp, struct cl_extent *extent, off_t filesize, struct daddr64_t r_addr; off_t f_offset; int size_of_prefetch; - u_int max_prefetch; + u_int max_prefetch; KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 48)) | DBG_FUNC_START, @@ -1530,7 +1580,7 @@ cluster_read_ahead(vnode_t vp, struct cl_extent *extent, off_t filesize, struct return; } - max_prefetch = MAX_PREFETCH(vp); + max_prefetch = MAX_PREFETCH(vp, cluster_max_io_size(vp->v_mount, CL_READ)); if (extent->e_addr < rap->cl_maxra) { if ((rap->cl_maxra - extent->e_addr) > ((max_prefetch / PAGE_SIZE) / 4)) { @@ -1576,7 +1626,7 @@ cluster_read_ahead(vnode_t vp, struct cl_extent *extent, off_t filesize, struct int -cluster_pageout(vnode_t vp, upl_t upl, vm_offset_t upl_offset, off_t f_offset, +cluster_pageout(vnode_t vp, upl_t upl, upl_offset_t upl_offset, off_t f_offset, int size, off_t filesize, int flags) { return cluster_pageout_ext(vp, upl, upl_offset, f_offset, size, filesize, flags, NULL, NULL); @@ -1585,7 +1635,7 @@ cluster_pageout(vnode_t vp, upl_t upl, vm_offset_t upl_offset, off_t f_offset, int -cluster_pageout_ext(vnode_t vp, upl_t upl, vm_offset_t upl_offset, off_t f_offset, +cluster_pageout_ext(vnode_t vp, upl_t upl, upl_offset_t upl_offset, off_t f_offset, int size, off_t filesize, int flags, int (*callback)(buf_t, void *), void *callback_arg) { int io_size; @@ -1612,8 +1662,6 @@ cluster_pageout_ext(vnode_t vp, upl_t upl, vm_offset_t upl_offset, off_t f_offse local_flags |= CL_COMMIT; if ((flags & UPL_KEEPCACHED)) local_flags |= CL_KEEPCACHED; - if (flags & IO_PASSIVE) - local_flags |= CL_PASSIVE; KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 52)) | DBG_FUNC_NONE, @@ -1664,7 +1712,7 @@ cluster_pageout_ext(vnode_t vp, upl_t upl, vm_offset_t upl_offset, off_t f_offse int -cluster_pagein(vnode_t vp, upl_t upl, vm_offset_t upl_offset, off_t f_offset, +cluster_pagein(vnode_t vp, upl_t upl, upl_offset_t upl_offset, off_t f_offset, int size, off_t filesize, int flags) { return cluster_pagein_ext(vp, upl, upl_offset, f_offset, size, filesize, flags, NULL, NULL); @@ -1672,7 +1720,7 @@ cluster_pagein(vnode_t vp, upl_t upl, vm_offset_t upl_offset, off_t f_offset, int -cluster_pagein_ext(vnode_t vp, upl_t upl, vm_offset_t upl_offset, off_t f_offset, +cluster_pagein_ext(vnode_t vp, upl_t upl, upl_offset_t upl_offset, off_t f_offset, int size, off_t filesize, int flags, int (*callback)(buf_t, void *), void *callback_arg) { u_int io_size; @@ -1688,8 +1736,8 @@ cluster_pagein_ext(vnode_t vp, upl_t upl, vm_offset_t upl_offset, off_t f_offset local_flags |= CL_ASYNC; if ((flags & UPL_NOCOMMIT) == 0) local_flags |= CL_COMMIT; - if (flags & IO_PASSIVE) - local_flags |= CL_PASSIVE; + if (flags & UPL_IOSTREAMING) + local_flags |= CL_IOSTREAMING; KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 56)) | DBG_FUNC_NONE, @@ -1741,7 +1789,7 @@ cluster_bp_ext(buf_t bp, int (*callback)(buf_t, void *), void *callback_arg) int flags; KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 19)) | DBG_FUNC_START, - (int)bp, (int)bp->b_lblkno, bp->b_bcount, bp->b_flags, 0); + bp, (int)bp->b_lblkno, bp->b_bcount, bp->b_flags, 0); if (bp->b_flags & B_READ) flags = CL_ASYNC | CL_READ; @@ -1779,9 +1827,9 @@ cluster_write_ext(vnode_t vp, struct uio *uio, off_t oldEOF, off_t newEOF, off_t flags = xflags; if (flags & IO_PASSIVE) - bflag = CL_PASSIVE; + bflag = CL_PASSIVE; else - bflag = 0; + bflag = 0; if (vp->v_flag & VNOCACHE_DATA) flags |= IO_NOCACHE; @@ -1884,6 +1932,18 @@ cluster_write_ext(vnode_t vp, struct uio *uio, off_t oldEOF, off_t newEOF, off_t retval = cluster_io_type(uio, &write_type, &write_length, MIN_DIRECT_WRITE_SIZE); break; } + /* + * in case we end up calling cluster_write_copy (from cluster_write_direct) + * multiple times to service a multi-vector request that is not aligned properly + * we need to update the oldEOF so that we + * don't zero-fill the head of a page if we've successfully written + * data to that area... 'cluster_write_copy' will zero-fill the head of a + * page that is beyond the oldEOF if the write is unaligned... we only + * want that to happen for the very first page of the cluster_write, + * NOT the first page of each vector making up a multi-vector write. + */ + if (uio->uio_offset > oldEOF) + oldEOF = uio->uio_offset; } return (retval); } @@ -1896,13 +1956,13 @@ cluster_write_direct(vnode_t vp, struct uio *uio, off_t oldEOF, off_t newEOF, in upl_t upl; upl_page_info_t *pl; vm_offset_t upl_offset; + vm_offset_t vector_upl_offset = 0; u_int32_t io_req_size; u_int32_t offset_in_file; u_int32_t offset_in_iovbase; - u_int32_t io_size; - int io_flag; - int bflag; - vm_size_t upl_size; + u_int32_t io_size; + int io_flag = 0; + upl_size_t upl_size, vector_upl_size = 0; vm_size_t upl_needed_size; mach_msg_type_number_t pages_in_pl; int upl_flags; @@ -1915,15 +1975,14 @@ cluster_write_direct(vnode_t vp, struct uio *uio, off_t oldEOF, off_t newEOF, in user_addr_t iov_base; u_int32_t mem_alignment_mask; u_int32_t devblocksize; - u_int32_t max_upl_size; + u_int32_t max_upl_size; + u_int32_t vector_upl_iosize = 0; + int issueVectorUPL = 0,useVectorUPL = (uio->uio_iovcnt > 1); + off_t v_upl_uio_offset = 0; + int vector_upl_index=0; + upl_t vector_upl = NULL; - max_upl_size = cluster_max_io_size(vp->v_mount, CL_WRITE); - - if (flags & IO_PASSIVE) - bflag = CL_PASSIVE; - else - bflag = 0; /* * When we enter this routine, we know @@ -1932,6 +1991,13 @@ cluster_write_direct(vnode_t vp, struct uio *uio, off_t oldEOF, off_t newEOF, in KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 75)) | DBG_FUNC_START, (int)uio->uio_offset, *write_length, (int)newEOF, 0, 0); + max_upl_size = cluster_max_io_size(vp->v_mount, CL_WRITE); + + io_flag = CL_ASYNC | CL_PRESERVE | CL_COMMIT | CL_THROTTLE | CL_DIRECT_IO; + + if (flags & IO_PASSIVE) + io_flag |= CL_PASSIVE; + iostate.io_completed = 0; iostate.io_issued = 0; iostate.io_error = 0; @@ -1989,6 +2055,23 @@ cluster_write_direct(vnode_t vp, struct uio *uio, off_t oldEOF, off_t newEOF, in if (io_size > max_upl_size) io_size = max_upl_size; + if(useVectorUPL && (iov_base & PAGE_MASK)) { + /* + * We have an iov_base that's not page-aligned. + * Issue all I/O's that have been collected within + * this Vectored UPL. + */ + if(vector_upl_index) { + retval = vector_cluster_io(vp, vector_upl, vector_upl_offset, v_upl_uio_offset, vector_upl_iosize, io_flag, (buf_t)NULL, &iostate, callback, callback_arg); + reset_vector_run_state(); + } + + /* + * After this point, if we are using the Vector UPL path and the base is + * not page-aligned then the UPL with that base will be the first in the vector UPL. + */ + } + upl_offset = (vm_offset_t)((u_int32_t)iov_base & PAGE_MASK); upl_needed_size = (upl_offset + io_size + (PAGE_SIZE -1)) & ~PAGE_MASK; @@ -2073,6 +2156,18 @@ cluster_write_direct(vnode_t vp, struct uio *uio, off_t oldEOF, off_t newEOF, in */ goto wait_for_dwrites; } + + if(useVectorUPL) { + vm_offset_t end_off = ((iov_base + io_size) & PAGE_MASK); + if(end_off) + issueVectorUPL = 1; + /* + * After this point, if we are using a vector UPL, then + * either all the UPL elements end on a page boundary OR + * this UPL is the last element because it does not end + * on a page boundary. + */ + } /* * Now look for pages already in the cache @@ -2088,21 +2183,23 @@ cluster_write_direct(vnode_t vp, struct uio *uio, off_t oldEOF, off_t newEOF, in * if there are already too many outstanding writes * wait until some complete before issuing the next */ - lck_mtx_lock(cl_mtxp); + if (iostate.io_issued > iostate.io_completed) { - while ((iostate.io_issued - iostate.io_completed) > (2 * max_upl_size)) { + lck_mtx_lock(cl_mtxp); - KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 95)) | DBG_FUNC_START, - iostate.io_issued, iostate.io_completed, 2 * max_upl_size, 0, 0); + while ((iostate.io_issued - iostate.io_completed) > (max_upl_size * IO_SCALE(vp, 2))) { - iostate.io_wanted = 1; - msleep((caddr_t)&iostate.io_wanted, cl_mtxp, PRIBIO + 1, "cluster_write_direct", NULL); + KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 95)) | DBG_FUNC_START, + iostate.io_issued, iostate.io_completed, max_upl_size * IO_SCALE(vp, 2), 0, 0); - KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 95)) | DBG_FUNC_END, - iostate.io_issued, iostate.io_completed, 2 * max_upl_size, 0, 0); - } - lck_mtx_unlock(cl_mtxp); + iostate.io_wanted = 1; + msleep((caddr_t)&iostate.io_wanted, cl_mtxp, PRIBIO + 1, "cluster_write_direct", NULL); + KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 95)) | DBG_FUNC_END, + iostate.io_issued, iostate.io_completed, max_upl_size * IO_SCALE(vp, 2), 0, 0); + } + lck_mtx_unlock(cl_mtxp); + } if (iostate.io_error) { /* * one of the earlier writes we issued ran into a hard error @@ -2115,20 +2212,51 @@ cluster_write_direct(vnode_t vp, struct uio *uio, off_t oldEOF, off_t newEOF, in goto wait_for_dwrites; } - io_flag = CL_ASYNC | CL_PRESERVE | CL_COMMIT | CL_THROTTLE | CL_DIRECT_IO | bflag; KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 77)) | DBG_FUNC_START, (int)upl_offset, (int)uio->uio_offset, io_size, io_flag, 0); - retval = cluster_io(vp, upl, upl_offset, uio->uio_offset, + if(!useVectorUPL) + retval = cluster_io(vp, upl, upl_offset, uio->uio_offset, io_size, io_flag, (buf_t)NULL, &iostate, callback, callback_arg); + else { + if(!vector_upl_index) { + vector_upl = vector_upl_create(upl_offset); + v_upl_uio_offset = uio->uio_offset; + vector_upl_offset = upl_offset; + } + + vector_upl_set_subupl(vector_upl,upl,upl_size); + vector_upl_set_iostate(vector_upl, upl, vector_upl_size, upl_size); + vector_upl_index++; + vector_upl_iosize += io_size; + vector_upl_size += upl_size; + + if(issueVectorUPL || vector_upl_index == MAX_VECTOR_UPL_ELEMENTS || vector_upl_size >= MAX_VECTOR_UPL_SIZE) { + retval = vector_cluster_io(vp, vector_upl, vector_upl_offset, v_upl_uio_offset, vector_upl_iosize, io_flag, (buf_t)NULL, &iostate, callback, callback_arg); + reset_vector_run_state(); + } + } + /* * update the uio structure to * reflect the I/O that we just issued */ uio_update(uio, (user_size_t)io_size); + /* + * in case we end up calling through to cluster_write_copy to finish + * the tail of this request, we need to update the oldEOF so that we + * don't zero-fill the head of a page if we've successfully written + * data to that area... 'cluster_write_copy' will zero-fill the head of a + * page that is beyond the oldEOF if the write is unaligned... we only + * want that to happen for the very first page of the cluster_write, + * NOT the first page of each vector making up a multi-vector write. + */ + if (uio->uio_offset > oldEOF) + oldEOF = uio->uio_offset; + io_req_size -= io_size; KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 77)) | DBG_FUNC_END, @@ -2150,7 +2278,13 @@ cluster_write_direct(vnode_t vp, struct uio *uio, off_t oldEOF, off_t newEOF, in } wait_for_dwrites: - if (iostate.io_issued) { + + if(retval == 0 && iostate.io_error == 0 && useVectorUPL && vector_upl_index) { + retval = vector_cluster_io(vp, vector_upl, vector_upl_offset, v_upl_uio_offset, vector_upl_iosize, io_flag, (buf_t)NULL, &iostate, callback, callback_arg); + reset_vector_run_state(); + } + + if (iostate.io_issued > iostate.io_completed) { /* * make sure all async writes issued as part of this stream * have completed before we return @@ -2159,13 +2293,13 @@ cluster_write_direct(vnode_t vp, struct uio *uio, off_t oldEOF, off_t newEOF, in while (iostate.io_issued != iostate.io_completed) { KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 95)) | DBG_FUNC_START, - iostate.io_issued, iostate.io_completed, 0, 0, 0); + iostate.io_issued, iostate.io_completed, 0, 0, 0); iostate.io_wanted = 1; msleep((caddr_t)&iostate.io_wanted, cl_mtxp, PRIBIO + 1, "cluster_write_direct", NULL); - KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 95)) | DBG_FUNC_END, - iostate.io_issued, iostate.io_completed, 0, 0, 0); + KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 95)) | DBG_FUNC_END, + iostate.io_issued, iostate.io_completed, 0, 0, 0); } lck_mtx_unlock(cl_mtxp); } @@ -2180,6 +2314,9 @@ cluster_write_direct(vnode_t vp, struct uio *uio, off_t oldEOF, off_t newEOF, in * note that flags will never have IO_HEADZEROFILL or IO_TAILZEROFILL set * so we can just pass 0 in for the headOff and tailOff */ + if (uio->uio_offset > oldEOF) + oldEOF = uio->uio_offset; + retval = cluster_write_copy(vp, uio, io_req_size, oldEOF, newEOF, (off_t)0, (off_t)0, flags, callback, callback_arg); *write_type = IO_UNKNOWN; @@ -2202,7 +2339,7 @@ cluster_write_contig(vnode_t vp, struct uio *uio, off_t newEOF, int *write_type, u_int32_t tail_size = 0; u_int32_t io_size; u_int32_t xsize; - vm_size_t upl_size; + upl_size_t upl_size; vm_size_t upl_needed_size; mach_msg_type_number_t pages_in_pl; int upl_flags; @@ -2319,19 +2456,19 @@ cluster_write_contig(vnode_t vp, struct uio *uio, off_t newEOF, int *write_type, * if there are already too many outstanding writes * wait until some have completed before issuing the next */ - if (iostate.io_issued) { + if (iostate.io_issued > iostate.io_completed) { lck_mtx_lock(cl_mtxp); - while ((iostate.io_issued - iostate.io_completed) > (2 * MAX_IO_CONTIG_SIZE)) { + while ((iostate.io_issued - iostate.io_completed) > (MAX_IO_CONTIG_SIZE * IO_SCALE(vp, 2))) { - KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 95)) | DBG_FUNC_START, - iostate.io_issued, iostate.io_completed, 2 * MAX_IO_CONTIG_SIZE, 0, 0); + KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 95)) | DBG_FUNC_START, + iostate.io_issued, iostate.io_completed, MAX_IO_CONTIG_SIZE * IO_SCALE(vp, 2), 0, 0); iostate.io_wanted = 1; msleep((caddr_t)&iostate.io_wanted, cl_mtxp, PRIBIO + 1, "cluster_write_contig", NULL); - KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 95)) | DBG_FUNC_END, - iostate.io_issued, iostate.io_completed, 2 * MAX_IO_CONTIG_SIZE, 0, 0); + KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 95)) | DBG_FUNC_END, + iostate.io_issued, iostate.io_completed, MAX_IO_CONTIG_SIZE * IO_SCALE(vp, 2), 0, 0); } lck_mtx_unlock(cl_mtxp); } @@ -2378,20 +2515,22 @@ cluster_write_contig(vnode_t vp, struct uio *uio, off_t newEOF, int *write_type, * make sure all async writes that are part of this stream * have completed before we proceed */ - lck_mtx_lock(cl_mtxp); - - while (iostate.io_issued != iostate.io_completed) { - KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 95)) | DBG_FUNC_START, - iostate.io_issued, iostate.io_completed, 0, 0, 0); + if (iostate.io_issued > iostate.io_completed) { + + lck_mtx_lock(cl_mtxp); - iostate.io_wanted = 1; - msleep((caddr_t)&iostate.io_wanted, cl_mtxp, PRIBIO + 1, "cluster_write_contig", NULL); + while (iostate.io_issued != iostate.io_completed) { + KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 95)) | DBG_FUNC_START, + iostate.io_issued, iostate.io_completed, 0, 0, 0); - KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 95)) | DBG_FUNC_END, - iostate.io_issued, iostate.io_completed, 0, 0, 0); - } - lck_mtx_unlock(cl_mtxp); + iostate.io_wanted = 1; + msleep((caddr_t)&iostate.io_wanted, cl_mtxp, PRIBIO + 1, "cluster_write_contig", NULL); + KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 95)) | DBG_FUNC_END, + iostate.io_issued, iostate.io_completed, 0, 0, 0); + } + lck_mtx_unlock(cl_mtxp); + } if (iostate.io_error) error = iostate.io_error; @@ -2409,6 +2548,42 @@ cluster_write_contig(vnode_t vp, struct uio *uio, off_t newEOF, int *write_type, } +/* + * need to avoid a race between an msync of a range of pages dirtied via mmap + * vs a filesystem such as HFS deciding to write a 'hole' to disk via cluster_write's + * zerofill mechanism before it has seen the VNOP_PAGEOUTs for the pages being msync'd + * + * we should never force-zero-fill pages that are already valid in the cache... + * the entire page contains valid data (either from disk, zero-filled or dirtied + * via an mmap) so we can only do damage by trying to zero-fill + * + */ +static int +cluster_zero_range(upl_t upl, upl_page_info_t *pl, int flags, int io_offset, off_t zero_off, off_t upl_f_offset, int bytes_to_zero) +{ + int zero_pg_index; + boolean_t need_cluster_zero = TRUE; + + if ((flags & (IO_NOZEROVALID | IO_NOZERODIRTY))) { + + bytes_to_zero = min(bytes_to_zero, PAGE_SIZE - (int)(zero_off & PAGE_MASK_64)); + zero_pg_index = (int)((zero_off - upl_f_offset) / PAGE_SIZE_64); + + if (upl_valid_page(pl, zero_pg_index)) { + /* + * never force zero valid pages - dirty or clean + * we'll leave these in the UPL for cluster_write_copy to deal with + */ + need_cluster_zero = FALSE; + } + } + if (need_cluster_zero == TRUE) + cluster_zero(upl, io_offset, bytes_to_zero, NULL); + + return (bytes_to_zero); +} + + static int cluster_write_copy(vnode_t vp, struct uio *uio, u_int32_t io_req_size, off_t oldEOF, off_t newEOF, off_t headOff, off_t tailOff, int flags, int (*callback)(buf_t, void *), void *callback_arg) @@ -2436,13 +2611,8 @@ cluster_write_copy(vnode_t vp, struct uio *uio, u_int32_t io_req_size, off_t old struct cl_extent cl; struct cl_writebehind *wbp; int bflag; - u_int max_cluster_pgcount; - u_int max_io_size; - - if (flags & IO_PASSIVE) - bflag = CL_PASSIVE; - else - bflag = 0; + u_int max_cluster_pgcount; + u_int max_io_size; if (uio) { KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 40)) | DBG_FUNC_START, @@ -2455,6 +2625,11 @@ cluster_write_copy(vnode_t vp, struct uio *uio, u_int32_t io_req_size, off_t old io_resid = 0; } + if (flags & IO_PASSIVE) + bflag = CL_PASSIVE; + else + bflag = 0; + zero_cnt = 0; zero_cnt1 = 0; zero_off = 0; @@ -2480,6 +2655,16 @@ cluster_write_copy(vnode_t vp, struct uio *uio, u_int32_t io_req_size, off_t old zero_cnt = newEOF - headOff; zero_off = headOff; } + } else { + if (uio && uio->uio_offset > oldEOF) { + zero_off = uio->uio_offset & ~PAGE_MASK_64; + + if (zero_off >= oldEOF) { + zero_cnt = uio->uio_offset - zero_off; + + flags |= IO_HEADZEROFILL; + } + } } if (flags & IO_TAILZEROFILL) { if (uio) { @@ -2488,6 +2673,16 @@ cluster_write_copy(vnode_t vp, struct uio *uio, u_int32_t io_req_size, off_t old if (zero_off1 < tailOff) zero_cnt1 = tailOff - zero_off1; } + } else { + if (uio && newEOF > oldEOF) { + zero_off1 = uio->uio_offset + io_req_size; + + if (zero_off1 == newEOF && (zero_off1 & PAGE_MASK_64)) { + zero_cnt1 = PAGE_SIZE_64 - (zero_off1 & PAGE_MASK_64); + + flags |= IO_TAILZEROFILL; + } + } } if (zero_cnt == 0 && uio == (struct uio *) 0) { KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 40)) | DBG_FUNC_END, @@ -2527,7 +2722,7 @@ cluster_write_copy(vnode_t vp, struct uio *uio, u_int32_t io_req_size, off_t old xfer_resid = total_size; retval = cluster_copy_ubc_data_internal(vp, uio, &xfer_resid, 1, 1); - + if (retval) break; @@ -2585,14 +2780,14 @@ cluster_write_copy(vnode_t vp, struct uio *uio, u_int32_t io_req_size, off_t old upl_size, &upl, &pl, - UPL_SET_LITE | UPL_WILL_MODIFY); + UPL_SET_LITE | (( uio!=NULL && (uio->uio_flags & UIO_FLAGS_IS_COMPRESSED_FILE)) ? 0 : UPL_WILL_MODIFY)); if (kret != KERN_SUCCESS) panic("cluster_write_copy: failed to get pagelist"); KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 41)) | DBG_FUNC_END, - (int)upl, (int)upl_f_offset, start_offset, 0, 0); + upl, (int)upl_f_offset, start_offset, 0, 0); - if (start_offset && !upl_valid_page(pl, 0)) { + if (start_offset && upl_f_offset < oldEOF && !upl_valid_page(pl, 0)) { int read_size; /* @@ -2602,8 +2797,8 @@ cluster_write_copy(vnode_t vp, struct uio *uio, u_int32_t io_req_size, off_t old */ read_size = PAGE_SIZE; - if ((upl_f_offset + read_size) > newEOF) - read_size = newEOF - upl_f_offset; + if ((upl_f_offset + read_size) > oldEOF) + read_size = oldEOF - upl_f_offset; retval = cluster_io(vp, upl, 0, upl_f_offset, read_size, CL_READ | bflag, (buf_t)NULL, (struct clios *)NULL, callback, callback_arg); @@ -2620,7 +2815,7 @@ cluster_write_copy(vnode_t vp, struct uio *uio, u_int32_t io_req_size, off_t old ubc_upl_abort_range(upl, 0, upl_size, UPL_ABORT_FREE_ON_EMPTY); KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 45)) | DBG_FUNC_NONE, - (int)upl, 0, 0, retval, 0); + upl, 0, 0, retval, 0); break; } } @@ -2638,8 +2833,8 @@ cluster_write_copy(vnode_t vp, struct uio *uio, u_int32_t io_req_size, off_t old read_size = PAGE_SIZE; - if ((upl_f_offset + upl_offset + read_size) > newEOF) - read_size = newEOF - (upl_f_offset + upl_offset); + if ((off_t)(upl_f_offset + upl_offset + read_size) > oldEOF) + read_size = oldEOF - (upl_f_offset + upl_offset); retval = cluster_io(vp, upl, upl_offset, upl_f_offset + upl_offset, read_size, CL_READ | bflag, (buf_t)NULL, (struct clios *)NULL, callback, callback_arg); @@ -2656,7 +2851,7 @@ cluster_write_copy(vnode_t vp, struct uio *uio, u_int32_t io_req_size, off_t old ubc_upl_abort_range(upl, 0, upl_size, UPL_ABORT_FREE_ON_EMPTY); KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 45)) | DBG_FUNC_NONE, - (int)upl, 0, 0, retval, 0); + upl, 0, 0, retval, 0); break; } } @@ -2671,22 +2866,8 @@ cluster_write_copy(vnode_t vp, struct uio *uio, u_int32_t io_req_size, off_t old else bytes_to_zero = xfer_resid; - if ( !(flags & (IO_NOZEROVALID | IO_NOZERODIRTY))) { - cluster_zero(upl, io_offset, bytes_to_zero, NULL); - } else { - int zero_pg_index; - - bytes_to_zero = min(bytes_to_zero, PAGE_SIZE - (int)(zero_off & PAGE_MASK_64)); - zero_pg_index = (int)((zero_off - upl_f_offset) / PAGE_SIZE_64); - - if ( !upl_valid_page(pl, zero_pg_index)) { - cluster_zero(upl, io_offset, bytes_to_zero, NULL); + bytes_to_zero = cluster_zero_range(upl, pl, flags, io_offset, zero_off, upl_f_offset, bytes_to_zero); - } else if ((flags & (IO_NOZERODIRTY | IO_NOZEROVALID)) == IO_NOZERODIRTY && - !upl_dirty_page(pl, zero_pg_index)) { - cluster_zero(upl, io_offset, bytes_to_zero, NULL); - } - } xfer_resid -= bytes_to_zero; zero_cnt -= bytes_to_zero; zero_off += bytes_to_zero; @@ -2701,11 +2882,10 @@ cluster_write_copy(vnode_t vp, struct uio *uio, u_int32_t io_req_size, off_t old retval = cluster_copy_upl_data(uio, upl, io_offset, (int *)&io_requested); if (retval) { - ubc_upl_abort_range(upl, 0, upl_size, UPL_ABORT_DUMP_PAGES | UPL_ABORT_FREE_ON_EMPTY); KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 45)) | DBG_FUNC_NONE, - (int)upl, 0, 0, retval, 0); + upl, 0, 0, retval, 0); } else { io_resid -= bytes_to_move; xfer_resid -= bytes_to_move; @@ -2719,27 +2899,13 @@ cluster_write_copy(vnode_t vp, struct uio *uio, u_int32_t io_req_size, off_t old else bytes_to_zero = xfer_resid; - if ( !(flags & (IO_NOZEROVALID | IO_NOZERODIRTY))) { - cluster_zero(upl, io_offset, bytes_to_zero, NULL); - } else { - int zero_pg_index; - - bytes_to_zero = min(bytes_to_zero, PAGE_SIZE - (int)(zero_off1 & PAGE_MASK_64)); - zero_pg_index = (int)((zero_off1 - upl_f_offset) / PAGE_SIZE_64); - - if ( !upl_valid_page(pl, zero_pg_index)) { - cluster_zero(upl, io_offset, bytes_to_zero, NULL); - } else if ((flags & (IO_NOZERODIRTY | IO_NOZEROVALID)) == IO_NOZERODIRTY && - !upl_dirty_page(pl, zero_pg_index)) { - cluster_zero(upl, io_offset, bytes_to_zero, NULL); - } - } + bytes_to_zero = cluster_zero_range(upl, pl, flags, io_offset, zero_off1, upl_f_offset, bytes_to_zero); + xfer_resid -= bytes_to_zero; zero_cnt1 -= bytes_to_zero; zero_off1 += bytes_to_zero; io_offset += bytes_to_zero; } - if (retval == 0) { int cl_index; int ret_cluster_try_push; @@ -2782,14 +2948,14 @@ cluster_write_copy(vnode_t vp, struct uio *uio, u_int32_t io_req_size, off_t old */ cl.e_addr = (daddr64_t)((upl_f_offset + (off_t)upl_size) / PAGE_SIZE_64); - if (flags & IO_SYNC) + if (flags & IO_SYNC) { /* * if the IO_SYNC flag is set than we need to * bypass any clusters and immediately issue * the I/O */ goto issue_io; - + } /* * take the lock to protect our accesses * of the writebehind and sparse cluster state @@ -2803,7 +2969,7 @@ cluster_write_copy(vnode_t vp, struct uio *uio, u_int32_t io_req_size, off_t old * we've fallen into the sparse * cluster method of delaying dirty pages */ - sparse_cluster_add(wbp, vp, &cl, newEOF, callback, callback_arg); + sparse_cluster_add(&(wbp->cl_scmap), vp, &cl, newEOF, callback, callback_arg); lck_mtx_unlock(&wbp->cl_lockw); @@ -2818,7 +2984,7 @@ cluster_write_copy(vnode_t vp, struct uio *uio, u_int32_t io_req_size, off_t old */ wbp->cl_number = 0; - sparse_cluster_push(wbp, vp, newEOF, PUSH_ALL, callback, callback_arg); + sparse_cluster_push(&(wbp->cl_scmap), vp, newEOF, PUSH_ALL, callback, callback_arg); /* * no clusters of either type present at this point * so just go directly to start_new_cluster since @@ -2980,7 +3146,7 @@ cluster_write_copy(vnode_t vp, struct uio *uio, u_int32_t io_req_size, off_t old * sparse mechanism.... */ sparse_cluster_switch(wbp, vp, newEOF, callback, callback_arg); - sparse_cluster_add(wbp, vp, &cl, newEOF, callback, callback_arg); + sparse_cluster_add(&(wbp->cl_scmap), vp, &cl, newEOF, callback, callback_arg); lck_mtx_unlock(&wbp->cl_lockw); @@ -3106,13 +3272,13 @@ cluster_read_ext(vnode_t vp, struct uio *uio, off_t filesize, int xflags, int (* static void -cluster_read_upl_release(upl_t upl, int start_pg, int last_pg, int flags) +cluster_read_upl_release(upl_t upl, int start_pg, int last_pg, int take_reference) { int range; int abort_flags = UPL_ABORT_FREE_ON_EMPTY; if ((range = last_pg - start_pg)) { - if ( !(flags & IO_NOCACHE)) + if (take_reference) abort_flags |= UPL_ABORT_REFERENCE; ubc_upl_abort_range(upl, start_pg * PAGE_SIZE, range * PAGE_SIZE, abort_flags); @@ -3126,7 +3292,7 @@ cluster_read_copy(vnode_t vp, struct uio *uio, u_int32_t io_req_size, off_t file upl_page_info_t *pl; upl_t upl; vm_offset_t upl_offset; - u_int32_t upl_size; + u_int32_t upl_size; off_t upl_f_offset; int start_offset; int start_pg; @@ -3143,8 +3309,8 @@ cluster_read_copy(vnode_t vp, struct uio *uio, u_int32_t io_req_size, off_t file u_int32_t xsize; u_int32_t io_size; u_int32_t max_rd_size; - u_int32_t max_io_size; - u_int32_t max_prefetch; + u_int32_t max_io_size; + u_int32_t max_prefetch; u_int rd_ahead_enabled = 1; u_int prefetch_enabled = 1; struct cl_readahead * rap; @@ -3155,6 +3321,10 @@ cluster_read_copy(vnode_t vp, struct uio *uio, u_int32_t io_req_size, off_t file struct uthread *ut; int policy = IOPOL_DEFAULT; + + KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 32)) | DBG_FUNC_START, + (int)uio->uio_offset, io_req_size, (int)filesize, flags, 0); + policy = current_proc()->p_iopol_disk; ut = get_bsdthread_info(current_thread()); @@ -3162,41 +3332,43 @@ cluster_read_copy(vnode_t vp, struct uio *uio, u_int32_t io_req_size, off_t file if (ut->uu_iopol_disk != IOPOL_DEFAULT) policy = ut->uu_iopol_disk; - if (policy == IOPOL_THROTTLE) + if (policy == IOPOL_THROTTLE || (flags & IO_NOCACHE)) take_reference = 0; if (flags & IO_PASSIVE) bflag = CL_PASSIVE; else - bflag = 0; + bflag = 0; - max_prefetch = MAX_PREFETCH(vp); - max_rd_size = max_prefetch; - max_io_size = cluster_max_io_size(vp->v_mount, CL_READ); + max_io_size = cluster_max_io_size(vp->v_mount, CL_READ); + max_prefetch = MAX_PREFETCH(vp, max_io_size); + max_rd_size = max_prefetch; - KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 32)) | DBG_FUNC_START, - (int)uio->uio_offset, io_req_size, (int)filesize, flags, 0); - last_request_offset = uio->uio_offset + io_req_size; + if (last_request_offset > filesize) + last_request_offset = filesize; + if ((flags & (IO_RAOFF|IO_NOCACHE)) || ((last_request_offset & ~PAGE_MASK_64) == (uio->uio_offset & ~PAGE_MASK_64))) { rd_ahead_enabled = 0; rap = NULL; } else { - if (cluster_hard_throttle_on(vp)) { + if (cluster_hard_throttle_on(vp, 1)) { rd_ahead_enabled = 0; prefetch_enabled = 0; max_rd_size = HARD_THROTTLE_MAXSIZE; + } else if (policy == IOPOL_THROTTLE) { + rd_ahead_enabled = 0; + prefetch_enabled = 0; } if ((rap = cluster_get_rap(vp)) == NULL) rd_ahead_enabled = 0; + else { + extent.b_addr = uio->uio_offset / PAGE_SIZE_64; + extent.e_addr = (last_request_offset - 1) / PAGE_SIZE_64; + } } - if (last_request_offset > filesize) - last_request_offset = filesize; - extent.b_addr = uio->uio_offset / PAGE_SIZE_64; - extent.e_addr = (last_request_offset - 1) / PAGE_SIZE_64; - if (rap != NULL && rap->cl_ralen && (rap->cl_lastr == extent.b_addr || (rap->cl_lastr + 1) == extent.b_addr)) { /* * determine if we already have a read-ahead in the pipe courtesy of the @@ -3215,17 +3387,8 @@ cluster_read_copy(vnode_t vp, struct uio *uio, u_int32_t io_req_size, off_t file last_ioread_offset = (off_t)0; while (io_req_size && uio->uio_offset < filesize && retval == 0) { - /* - * compute the size of the upl needed to encompass - * the requested read... limit each call to cluster_io - * to the maximum UPL size... cluster_io will clip if - * this exceeds the maximum io_size for the device, - * make sure to account for - * a starting offset that's not page aligned - */ - start_offset = (int)(uio->uio_offset & PAGE_MASK_64); - upl_f_offset = uio->uio_offset - (off_t)start_offset; - max_size = filesize - uio->uio_offset; + + max_size = filesize - uio->uio_offset; if ((off_t)(io_req_size) < max_size) io_size = io_req_size; @@ -3277,7 +3440,7 @@ cluster_read_copy(vnode_t vp, struct uio *uio, u_int32_t io_req_size, off_t file io_requested = io_resid; - retval = cluster_copy_ubc_data_internal(vp, uio, (int *)&io_resid, 0, take_reference); + retval = cluster_copy_ubc_data_internal(vp, uio, (int *)&io_resid, 0, last_ioread_offset == 0 ? take_reference : 0); xsize = io_requested - io_resid; @@ -3292,7 +3455,7 @@ cluster_read_copy(vnode_t vp, struct uio *uio, u_int32_t io_req_size, off_t file */ break; - if ((io_size == 0 || last_ioread_offset == last_request_offset) && rd_ahead_enabled) { + if (rd_ahead_enabled && (io_size == 0 || last_ioread_offset == last_request_offset)) { /* * we're already finished the I/O for this read request * let's see if we should do a read-ahead @@ -3310,10 +3473,23 @@ cluster_read_copy(vnode_t vp, struct uio *uio, u_int32_t io_req_size, off_t file } break; } - start_offset = (int)(uio->uio_offset & PAGE_MASK_64); - upl_f_offset = uio->uio_offset - (off_t)start_offset; - max_size = filesize - uio->uio_offset; + /* + * recompute max_size since cluster_copy_ubc_data_internal + * may have advanced uio->uio_offset + */ + max_size = filesize - uio->uio_offset; } + /* + * compute the size of the upl needed to encompass + * the requested read... limit each call to cluster_io + * to the maximum UPL size... cluster_io will clip if + * this exceeds the maximum io_size for the device, + * make sure to account for + * a starting offset that's not page aligned + */ + start_offset = (int)(uio->uio_offset & PAGE_MASK_64); + upl_f_offset = uio->uio_offset - (off_t)start_offset; + if (io_size > max_rd_size) io_size = max_rd_size; @@ -3329,7 +3505,7 @@ cluster_read_copy(vnode_t vp, struct uio *uio, u_int32_t io_req_size, off_t file pages_in_upl = upl_size / PAGE_SIZE; KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 33)) | DBG_FUNC_START, - (int)upl, (int)upl_f_offset, upl_size, start_offset, 0); + upl, (int)upl_f_offset, upl_size, start_offset, 0); kret = ubc_create_upl(vp, upl_f_offset, @@ -3341,7 +3517,7 @@ cluster_read_copy(vnode_t vp, struct uio *uio, u_int32_t io_req_size, off_t file panic("cluster_read_copy: failed to get pagelist"); KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 33)) | DBG_FUNC_END, - (int)upl, (int)upl_f_offset, upl_size, start_offset, 0); + upl, (int)upl_f_offset, upl_size, start_offset, 0); /* * scan from the beginning of the upl looking for the first @@ -3379,7 +3555,7 @@ cluster_read_copy(vnode_t vp, struct uio *uio, u_int32_t io_req_size, off_t file upl_offset = start_pg * PAGE_SIZE; io_size = (last_pg - start_pg) * PAGE_SIZE; - if ((upl_f_offset + upl_offset + io_size) > filesize) + if ((off_t)(upl_f_offset + upl_offset + io_size) > filesize) io_size = filesize - (upl_f_offset + upl_offset); /* @@ -3466,20 +3642,22 @@ cluster_read_copy(vnode_t vp, struct uio *uio, u_int32_t io_req_size, off_t file rap->cl_lastr = extent.e_addr; } } - lck_mtx_lock(cl_mtxp); + if (iostate.io_issued > iostate.io_completed) { - while (iostate.io_issued != iostate.io_completed) { - KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 95)) | DBG_FUNC_START, - iostate.io_issued, iostate.io_completed, 0, 0, 0); + lck_mtx_lock(cl_mtxp); - iostate.io_wanted = 1; - msleep((caddr_t)&iostate.io_wanted, cl_mtxp, PRIBIO + 1, "cluster_read_copy", NULL); + while (iostate.io_issued != iostate.io_completed) { + KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 95)) | DBG_FUNC_START, + iostate.io_issued, iostate.io_completed, 0, 0, 0); - KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 95)) | DBG_FUNC_END, - iostate.io_issued, iostate.io_completed, 0, 0, 0); - } - lck_mtx_unlock(cl_mtxp); + iostate.io_wanted = 1; + msleep((caddr_t)&iostate.io_wanted, cl_mtxp, PRIBIO + 1, "cluster_read_copy", NULL); + KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 95)) | DBG_FUNC_END, + iostate.io_issued, iostate.io_completed, 0, 0, 0); + } + lck_mtx_unlock(cl_mtxp); + } if (iostate.io_error) error = iostate.io_error; else { @@ -3501,16 +3679,22 @@ cluster_read_copy(vnode_t vp, struct uio *uio, u_int32_t io_req_size, off_t file */ io_size = (last_pg - start_pg) * PAGE_SIZE; - KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 35)) | DBG_FUNC_START, (int)upl, start_pg * PAGE_SIZE, io_size, error, 0); + KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 35)) | DBG_FUNC_START, upl, start_pg * PAGE_SIZE, io_size, error, 0); if (error || (flags & IO_NOCACHE)) ubc_upl_abort_range(upl, start_pg * PAGE_SIZE, io_size, UPL_ABORT_DUMP_PAGES | UPL_ABORT_FREE_ON_EMPTY); - else - ubc_upl_commit_range(upl, start_pg * PAGE_SIZE, io_size, - UPL_COMMIT_CLEAR_DIRTY | UPL_COMMIT_FREE_ON_EMPTY | UPL_COMMIT_INACTIVATE); + else { + int commit_flags = UPL_COMMIT_CLEAR_DIRTY | UPL_COMMIT_FREE_ON_EMPTY; + + if (take_reference) + commit_flags |= UPL_COMMIT_INACTIVATE; + else + commit_flags |= UPL_COMMIT_SPECULATE; - KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 35)) | DBG_FUNC_END, (int)upl, start_pg * PAGE_SIZE, io_size, error, 0); + ubc_upl_commit_range(upl, start_pg * PAGE_SIZE, io_size, commit_flags); + } + KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 35)) | DBG_FUNC_END, upl, start_pg * PAGE_SIZE, io_size, error, 0); } if ((last_pg - start_pg) < pages_in_upl) { /* @@ -3523,28 +3707,28 @@ cluster_read_copy(vnode_t vp, struct uio *uio, u_int32_t io_req_size, off_t file else { KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 35)) | DBG_FUNC_START, - (int)upl, -1, pages_in_upl - (last_pg - start_pg), 0, 0); + upl, -1, pages_in_upl - (last_pg - start_pg), 0, 0); /* * handle any valid pages at the beginning of * the upl... release these appropriately */ - cluster_read_upl_release(upl, 0, start_pg, flags); + cluster_read_upl_release(upl, 0, start_pg, take_reference); /* * handle any valid pages immediately after the * pages we issued I/O for... ... release these appropriately */ - cluster_read_upl_release(upl, last_pg, uio_last, flags); + cluster_read_upl_release(upl, last_pg, uio_last, take_reference); - KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 35)) | DBG_FUNC_END, (int)upl, -1, -1, 0, 0); + KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 35)) | DBG_FUNC_END, upl, -1, -1, 0, 0); } } if (retval == 0) retval = error; if (io_req_size) { - if (cluster_hard_throttle_on(vp)) { + if (cluster_hard_throttle_on(vp, 1)) { rd_ahead_enabled = 0; prefetch_enabled = 0; @@ -3554,10 +3738,11 @@ cluster_read_copy(vnode_t vp, struct uio *uio, u_int32_t io_req_size, off_t file /* * coming out of throttled state */ - if (rap != NULL) - rd_ahead_enabled = 1; - prefetch_enabled = 1; - + if (policy != IOPOL_THROTTLE) { + if (rap != NULL) + rd_ahead_enabled = 1; + prefetch_enabled = 1; + } max_rd_size = max_prefetch; last_ioread_offset = 0; } @@ -3585,12 +3770,11 @@ cluster_read_direct(vnode_t vp, struct uio *uio, off_t filesize, int *read_type, upl_t upl; upl_page_info_t *pl; off_t max_io_size; - vm_offset_t upl_offset; - vm_size_t upl_size; + vm_offset_t upl_offset, vector_upl_offset = 0; + upl_size_t upl_size, vector_upl_size = 0; vm_size_t upl_needed_size; unsigned int pages_in_pl; int upl_flags; - int bflag; kern_return_t kret; unsigned int i; int force_data_sync; @@ -3609,24 +3793,27 @@ cluster_read_direct(vnode_t vp, struct uio *uio, off_t filesize, int *read_type, u_int32_t xsize; u_int32_t devblocksize; u_int32_t mem_alignment_mask; - u_int32_t max_upl_size; - u_int32_t max_rd_size; - u_int32_t max_rd_ahead; - + u_int32_t max_upl_size; + u_int32_t max_rd_size; + u_int32_t max_rd_ahead; - max_upl_size = cluster_max_io_size(vp->v_mount, CL_READ); + u_int32_t vector_upl_iosize = 0; + int issueVectorUPL = 0,useVectorUPL = (uio->uio_iovcnt > 1); + off_t v_upl_uio_offset = 0; + int vector_upl_index=0; + upl_t vector_upl = NULL; + + KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 70)) | DBG_FUNC_START, + (int)uio->uio_offset, (int)filesize, *read_type, *read_length, 0); - max_rd_size = max_upl_size; - max_rd_ahead = max_rd_size * 2; + max_upl_size = cluster_max_io_size(vp->v_mount, CL_READ); + max_rd_size = max_upl_size; + max_rd_ahead = max_rd_size * IO_SCALE(vp, 2); + io_flag = CL_COMMIT | CL_READ | CL_ASYNC | CL_NOZERO | CL_DIRECT_IO; if (flags & IO_PASSIVE) - bflag = CL_PASSIVE; - else - bflag = 0; - - KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 70)) | DBG_FUNC_START, - (int)uio->uio_offset, (int)filesize, *read_type, *read_length, 0); + io_flag |= CL_PASSIVE; iostate.io_completed = 0; iostate.io_issued = 0; @@ -3687,12 +3874,12 @@ cluster_read_direct(vnode_t vp, struct uio *uio, off_t filesize, int *read_type, while (io_req_size && retval == 0) { u_int32_t io_start; - if (cluster_hard_throttle_on(vp)) { + if (cluster_hard_throttle_on(vp, 1)) { max_rd_size = HARD_THROTTLE_MAXSIZE; max_rd_ahead = HARD_THROTTLE_MAXSIZE - 1; } else { max_rd_size = max_upl_size; - max_rd_ahead = max_rd_size * 2; + max_rd_ahead = max_rd_size * IO_SCALE(vp, 2); } io_start = io_size = io_req_size; @@ -3713,6 +3900,27 @@ cluster_read_direct(vnode_t vp, struct uio *uio, off_t filesize, int *read_type, io_req_size -= xsize; + if(useVectorUPL && (xsize || (iov_base & PAGE_MASK))) { + /* + * We found something in the cache or we have an iov_base that's not + * page-aligned. + * + * Issue all I/O's that have been collected within this Vectored UPL. + */ + if(vector_upl_index) { + retval = vector_cluster_io(vp, vector_upl, vector_upl_offset, v_upl_uio_offset, vector_upl_iosize, io_flag, (buf_t)NULL, &iostate, callback, callback_arg); + reset_vector_run_state(); + } + + if(xsize) + useVectorUPL = 0; + + /* + * After this point, if we are using the Vector UPL path and the base is + * not page-aligned then the UPL with that base will be the first in the vector UPL. + */ + } + /* * check to see if we are finished with this request... */ @@ -3850,26 +4058,40 @@ cluster_read_direct(vnode_t vp, struct uio *uio, off_t filesize, int *read_type, KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 72)) | DBG_FUNC_END, (int)upl_offset, upl_size, io_size, kret, 0); + if(useVectorUPL) { + vm_offset_t end_off = ((iov_base + io_size) & PAGE_MASK); + if(end_off) + issueVectorUPL = 1; + /* + * After this point, if we are using a vector UPL, then + * either all the UPL elements end on a page boundary OR + * this UPL is the last element because it does not end + * on a page boundary. + */ + } + /* * request asynchronously so that we can overlap * the preparation of the next I/O * if there are already too many outstanding reads * wait until some have completed before issuing the next read */ - lck_mtx_lock(cl_mtxp); + if (iostate.io_issued > iostate.io_completed) { - while ((iostate.io_issued - iostate.io_completed) > max_rd_ahead) { - KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 95)) | DBG_FUNC_START, - iostate.io_issued, iostate.io_completed, max_rd_ahead, 0, 0); + lck_mtx_lock(cl_mtxp); - iostate.io_wanted = 1; - msleep((caddr_t)&iostate.io_wanted, cl_mtxp, PRIBIO + 1, "cluster_read_direct", NULL); + while ((iostate.io_issued - iostate.io_completed) > max_rd_ahead) { + KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 95)) | DBG_FUNC_START, + iostate.io_issued, iostate.io_completed, max_rd_ahead, 0, 0); - KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 95)) | DBG_FUNC_END, - iostate.io_issued, iostate.io_completed, max_rd_ahead, 0, 0); - } - lck_mtx_unlock(cl_mtxp); - + iostate.io_wanted = 1; + msleep((caddr_t)&iostate.io_wanted, cl_mtxp, PRIBIO + 1, "cluster_read_direct", NULL); + + KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 95)) | DBG_FUNC_END, + iostate.io_issued, iostate.io_completed, max_rd_ahead, 0, 0); + } + lck_mtx_unlock(cl_mtxp); + } if (iostate.io_error) { /* * one of the earlier reads we issued ran into a hard error @@ -3883,15 +4105,36 @@ cluster_read_direct(vnode_t vp, struct uio *uio, off_t filesize, int *read_type, goto wait_for_dreads; } KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 73)) | DBG_FUNC_START, - (int)upl, (int)upl_offset, (int)uio->uio_offset, io_size, 0); + upl, (int)upl_offset, (int)uio->uio_offset, io_size, 0); - if (no_zero_fill) - io_flag = CL_COMMIT | CL_READ | CL_ASYNC | CL_NOZERO | CL_DIRECT_IO | bflag; - else - io_flag = CL_COMMIT | CL_READ | CL_ASYNC | CL_NOZERO | CL_DIRECT_IO | CL_PRESERVE | bflag; - retval = cluster_io(vp, upl, upl_offset, uio->uio_offset, io_size, io_flag, (buf_t)NULL, &iostate, callback, callback_arg); + if(!useVectorUPL) { + if (no_zero_fill) + io_flag &= ~CL_PRESERVE; + else + io_flag |= CL_PRESERVE; + + retval = cluster_io(vp, upl, upl_offset, uio->uio_offset, io_size, io_flag, (buf_t)NULL, &iostate, callback, callback_arg); + + } else { + if(!vector_upl_index) { + vector_upl = vector_upl_create(upl_offset); + v_upl_uio_offset = uio->uio_offset; + vector_upl_offset = upl_offset; + } + + vector_upl_set_subupl(vector_upl,upl, upl_size); + vector_upl_set_iostate(vector_upl, upl, vector_upl_size, upl_size); + vector_upl_index++; + vector_upl_size += upl_size; + vector_upl_iosize += io_size; + + if(issueVectorUPL || vector_upl_index == MAX_VECTOR_UPL_ELEMENTS || vector_upl_size >= MAX_VECTOR_UPL_SIZE) { + retval = vector_cluster_io(vp, vector_upl, vector_upl_offset, v_upl_uio_offset, vector_upl_iosize, io_flag, (buf_t)NULL, &iostate, callback, callback_arg); + reset_vector_run_state(); + } + } /* * update the uio structure */ @@ -3900,7 +4143,7 @@ cluster_read_direct(vnode_t vp, struct uio *uio, off_t filesize, int *read_type, io_req_size -= io_size; KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 73)) | DBG_FUNC_END, - (int)upl, (int)uio->uio_offset, io_req_size, retval, 0); + upl, (int)uio->uio_offset, io_req_size, retval, 0); } /* end while */ @@ -3918,26 +4161,31 @@ cluster_read_direct(vnode_t vp, struct uio *uio, off_t filesize, int *read_type, } wait_for_dreads: - if (iostate.io_issued) { - /* - * make sure all async reads that are part of this stream - * have completed before we return - */ + + if(retval == 0 && iostate.io_error == 0 && useVectorUPL && vector_upl_index) { + retval = vector_cluster_io(vp, vector_upl, vector_upl_offset, v_upl_uio_offset, vector_upl_iosize, io_flag, (buf_t)NULL, &iostate, callback, callback_arg); + reset_vector_run_state(); + } + /* + * make sure all async reads that are part of this stream + * have completed before we return + */ + if (iostate.io_issued > iostate.io_completed) { + lck_mtx_lock(cl_mtxp); while (iostate.io_issued != iostate.io_completed) { - KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 95)) | DBG_FUNC_START, - iostate.io_issued, iostate.io_completed, 0, 0, 0); + KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 95)) | DBG_FUNC_START, + iostate.io_issued, iostate.io_completed, 0, 0, 0); iostate.io_wanted = 1; msleep((caddr_t)&iostate.io_wanted, cl_mtxp, PRIBIO + 1, "cluster_read_direct", NULL); - KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 95)) | DBG_FUNC_END, - iostate.io_issued, iostate.io_completed, 0, 0, 0); + KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 95)) | DBG_FUNC_END, + iostate.io_issued, iostate.io_completed, 0, 0, 0); } lck_mtx_unlock(cl_mtxp); } - if (iostate.io_error) retval = iostate.io_error; @@ -3967,7 +4215,7 @@ cluster_read_contig(vnode_t vp, struct uio *uio, off_t filesize, int *read_type, addr64_t dst_paddr = 0; user_addr_t iov_base; off_t max_size; - vm_size_t upl_size; + upl_size_t upl_size; vm_size_t upl_needed_size; mach_msg_type_number_t pages_in_pl; int upl_flags; @@ -3985,9 +4233,9 @@ cluster_read_contig(vnode_t vp, struct uio *uio, off_t filesize, int *read_type, int bflag; if (flags & IO_PASSIVE) - bflag = CL_PASSIVE; + bflag = CL_PASSIVE; else - bflag = 0; + bflag = 0; /* * When we enter this routine, we know @@ -4101,18 +4349,18 @@ cluster_read_contig(vnode_t vp, struct uio *uio, off_t filesize, int *read_type, * if there are already too many outstanding reads * wait until some have completed before issuing the next */ - if (iostate.io_issued) { + if (iostate.io_issued > iostate.io_completed) { lck_mtx_lock(cl_mtxp); - while ((iostate.io_issued - iostate.io_completed) > (2 * MAX_IO_CONTIG_SIZE)) { - KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 95)) | DBG_FUNC_START, - iostate.io_issued, iostate.io_completed, 2 * MAX_IO_CONTIG_SIZE, 0, 0); + while ((iostate.io_issued - iostate.io_completed) > (MAX_IO_CONTIG_SIZE * IO_SCALE(vp, 2))) { + KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 95)) | DBG_FUNC_START, + iostate.io_issued, iostate.io_completed, MAX_IO_CONTIG_SIZE * IO_SCALE(vp, 2), 0, 0); iostate.io_wanted = 1; msleep((caddr_t)&iostate.io_wanted, cl_mtxp, PRIBIO + 1, "cluster_read_contig", NULL); - KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 95)) | DBG_FUNC_END, - iostate.io_issued, iostate.io_completed, 2 * MAX_IO_CONTIG_SIZE, 0, 0); + KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 95)) | DBG_FUNC_END, + iostate.io_issued, iostate.io_completed, MAX_IO_CONTIG_SIZE * IO_SCALE(vp, 2), 0, 0); } lck_mtx_unlock(cl_mtxp); } @@ -4156,20 +4404,22 @@ cluster_read_contig(vnode_t vp, struct uio *uio, off_t filesize, int *read_type, * make sure all async reads that are part of this stream * have completed before we proceed */ - lck_mtx_lock(cl_mtxp); + if (iostate.io_issued > iostate.io_completed) { - while (iostate.io_issued != iostate.io_completed) { - KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 95)) | DBG_FUNC_START, - iostate.io_issued, iostate.io_completed, 0, 0, 0); + lck_mtx_lock(cl_mtxp); - iostate.io_wanted = 1; - msleep((caddr_t)&iostate.io_wanted, cl_mtxp, PRIBIO + 1, "cluster_read_contig", NULL); + while (iostate.io_issued != iostate.io_completed) { + KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 95)) | DBG_FUNC_START, + iostate.io_issued, iostate.io_completed, 0, 0, 0); - KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 95)) | DBG_FUNC_END, - iostate.io_issued, iostate.io_completed, 0, 0, 0); - } - lck_mtx_unlock(cl_mtxp); + iostate.io_wanted = 1; + msleep((caddr_t)&iostate.io_wanted, cl_mtxp, PRIBIO + 1, "cluster_read_contig", NULL); + KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 95)) | DBG_FUNC_END, + iostate.io_issued, iostate.io_completed, 0, 0, 0); + } + lck_mtx_unlock(cl_mtxp); + } if (iostate.io_error) error = iostate.io_error; @@ -4193,7 +4443,7 @@ cluster_io_type(struct uio *uio, int *io_type, u_int32_t *io_length, u_int32_t m user_size_t iov_len; user_addr_t iov_base = 0; upl_t upl; - vm_size_t upl_size; + upl_size_t upl_size; int upl_flags; int retval = 0; @@ -4204,7 +4454,7 @@ cluster_io_type(struct uio *uio, int *io_type, u_int32_t *io_length, u_int32_t m iov_len = uio_curriovlen(uio); - KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 94)) | DBG_FUNC_START, (int)uio, (int)iov_len, 0, 0, 0); + KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 94)) | DBG_FUNC_START, uio, (int)iov_len, 0, 0, 0); if (iov_len) { iov_base = uio_curriovbase(uio); @@ -4246,7 +4496,7 @@ cluster_io_type(struct uio *uio, int *io_type, u_int32_t *io_length, u_int32_t m *io_length = 0; *io_type = IO_UNKNOWN; } - KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 94)) | DBG_FUNC_END, (int)iov_base, *io_type, *io_length, retval, 0); + KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 94)) | DBG_FUNC_END, iov_base, *io_type, *io_length, retval, 0); return (retval); } @@ -4268,7 +4518,7 @@ advisory_read_ext(vnode_t vp, off_t filesize, off_t f_offset, int resid, int (*c upl_page_info_t *pl; upl_t upl; vm_offset_t upl_offset; - int upl_size; + int upl_size; off_t upl_f_offset; int start_offset; int start_pg; @@ -4280,9 +4530,9 @@ advisory_read_ext(vnode_t vp, off_t filesize, off_t f_offset, int resid, int (*c int retval = 0; int issued_io; int skip_range; - uint32_t max_io_size; - - + uint32_t max_io_size; + + if ( !UBCINFOEXISTS(vp)) return(EINVAL); @@ -4290,9 +4540,9 @@ advisory_read_ext(vnode_t vp, off_t filesize, off_t f_offset, int resid, int (*c return(EINVAL); max_io_size = cluster_max_io_size(vp->v_mount, CL_READ); - + KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 60)) | DBG_FUNC_START, - (int)f_offset, resid, (int)filesize, 0, 0); + (int)f_offset, resid, (int)filesize, 0, 0); while (resid && f_offset < filesize && retval == 0) { /* @@ -4346,7 +4596,7 @@ advisory_read_ext(vnode_t vp, off_t filesize, off_t f_offset, int resid, int (*c pages_in_upl = upl_size / PAGE_SIZE; KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 61)) | DBG_FUNC_START, - (int)upl, (int)upl_f_offset, upl_size, start_offset, 0); + upl, (int)upl_f_offset, upl_size, start_offset, 0); kret = ubc_create_upl(vp, upl_f_offset, @@ -4371,7 +4621,7 @@ advisory_read_ext(vnode_t vp, off_t filesize, off_t f_offset, int resid, int (*c KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 61)) | DBG_FUNC_END, - (int)upl, (int)upl_f_offset, upl_size, start_offset, 0); + upl, (int)upl_f_offset, upl_size, start_offset, 0); for (last_pg = 0; last_pg < pages_in_upl; ) { @@ -4407,7 +4657,7 @@ advisory_read_ext(vnode_t vp, off_t filesize, off_t f_offset, int resid, int (*c upl_offset = start_pg * PAGE_SIZE; io_size = (last_pg - start_pg) * PAGE_SIZE; - if ((upl_f_offset + upl_offset + io_size) > filesize) + if ((off_t)(upl_f_offset + upl_offset + io_size) > filesize) io_size = filesize - (upl_f_offset + upl_offset); /* @@ -4448,10 +4698,11 @@ int cluster_push_ext(vnode_t vp, int flags, int (*callback)(buf_t, void *), void *callback_arg) { int retval; + int my_sparse_wait = 0; struct cl_writebehind *wbp; if ( !UBCINFOEXISTS(vp)) { - KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 53)) | DBG_FUNC_NONE, (int)vp, flags, 0, -1, 0); + KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 53)) | DBG_FUNC_NONE, vp, flags, 0, -1, 0); return (0); } /* return if deferred write is set */ @@ -4459,32 +4710,97 @@ cluster_push_ext(vnode_t vp, int flags, int (*callback)(buf_t, void *), void *ca return (0); } if ((wbp = cluster_get_wbp(vp, CLW_RETURNLOCKED)) == NULL) { - KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 53)) | DBG_FUNC_NONE, (int)vp, flags, 0, -2, 0); + KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 53)) | DBG_FUNC_NONE, vp, flags, 0, -2, 0); return (0); } if (wbp->cl_number == 0 && wbp->cl_scmap == NULL) { lck_mtx_unlock(&wbp->cl_lockw); - KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 53)) | DBG_FUNC_NONE, (int)vp, flags, 0, -3, 0); + KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 53)) | DBG_FUNC_NONE, vp, flags, 0, -3, 0); return(0); } KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 53)) | DBG_FUNC_START, - (int)wbp->cl_scmap, wbp->cl_number, flags, 0, 0); + wbp->cl_scmap, wbp->cl_number, flags, 0, 0); + + /* + * if we have an fsync in progress, we don't want to allow any additional + * sync/fsync/close(s) to occur until it finishes. + * note that its possible for writes to continue to occur to this file + * while we're waiting and also once the fsync starts to clean if we're + * in the sparse map case + */ + while (wbp->cl_sparse_wait) { + KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 97)) | DBG_FUNC_START, vp, 0, 0, 0, 0); + + msleep((caddr_t)&wbp->cl_sparse_wait, &wbp->cl_lockw, PRIBIO + 1, "cluster_push_ext", NULL); + + KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 97)) | DBG_FUNC_END, vp, 0, 0, 0, 0); + } + if (flags & IO_SYNC) { + my_sparse_wait = 1; + wbp->cl_sparse_wait = 1; + /* + * this is an fsync (or equivalent)... we must wait for any existing async + * cleaning operations to complete before we evaulate the current state + * and finish cleaning... this insures that all writes issued before this + * fsync actually get cleaned to the disk before this fsync returns + */ + while (wbp->cl_sparse_pushes) { + KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 98)) | DBG_FUNC_START, vp, 0, 0, 0, 0); + + msleep((caddr_t)&wbp->cl_sparse_pushes, &wbp->cl_lockw, PRIBIO + 1, "cluster_push_ext", NULL); + + KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 98)) | DBG_FUNC_END, vp, 0, 0, 0, 0); + } + } if (wbp->cl_scmap) { - sparse_cluster_push(wbp, vp, ubc_getsize(vp), PUSH_ALL | IO_PASSIVE, callback, callback_arg); + void *scmap; + + if (wbp->cl_sparse_pushes < SPARSE_PUSH_LIMIT) { + + scmap = wbp->cl_scmap; + wbp->cl_scmap = NULL; + + wbp->cl_sparse_pushes++; + + lck_mtx_unlock(&wbp->cl_lockw); + + sparse_cluster_push(&scmap, vp, ubc_getsize(vp), PUSH_ALL | IO_PASSIVE, callback, callback_arg); + + lck_mtx_lock(&wbp->cl_lockw); + wbp->cl_sparse_pushes--; + + if (wbp->cl_sparse_wait && wbp->cl_sparse_pushes == 0) + wakeup((caddr_t)&wbp->cl_sparse_pushes); + } else { + sparse_cluster_push(&(wbp->cl_scmap), vp, ubc_getsize(vp), PUSH_ALL | IO_PASSIVE, callback, callback_arg); + } retval = 1; - } else + } else { retval = cluster_try_push(wbp, vp, ubc_getsize(vp), PUSH_ALL | IO_PASSIVE, callback, callback_arg); - + } lck_mtx_unlock(&wbp->cl_lockw); if (flags & IO_SYNC) (void)vnode_waitforwrites(vp, 0, 0, 0, "cluster_push"); + if (my_sparse_wait) { + /* + * I'm the owner of the serialization token + * clear it and wakeup anyone that is waiting + * for me to finish + */ + lck_mtx_lock(&wbp->cl_lockw); + + wbp->cl_sparse_wait = 0; + wakeup((caddr_t)&wbp->cl_sparse_wait); + + lck_mtx_unlock(&wbp->cl_lockw); + } KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 53)) | DBG_FUNC_END, - (int)wbp->cl_scmap, wbp->cl_number, retval, 0, 0); + wbp->cl_scmap, wbp->cl_number, retval, 0, 0); return (retval); } @@ -4498,12 +4814,12 @@ cluster_release(struct ubc_info *ubc) if ((wbp = ubc->cl_wbehind)) { - KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 81)) | DBG_FUNC_START, (int)ubc, (int)wbp->cl_scmap, wbp->cl_scdirty, 0, 0); + KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 81)) | DBG_FUNC_START, ubc, wbp->cl_scmap, 0, 0, 0); if (wbp->cl_scmap) vfs_drt_control(&(wbp->cl_scmap), 0); } else { - KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 81)) | DBG_FUNC_START, (int)ubc, 0, 0, 0, 0); + KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 81)) | DBG_FUNC_START, ubc, 0, 0, 0, 0); } rap = ubc->cl_rahead; @@ -4519,7 +4835,7 @@ cluster_release(struct ubc_info *ubc) ubc->cl_rahead = NULL; ubc->cl_wbehind = NULL; - KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 81)) | DBG_FUNC_END, (int)ubc, (int)rap, (int)wbp, 0, 0); + KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 81)) | DBG_FUNC_END, ubc, rap, wbp, 0, 0); } @@ -4532,10 +4848,10 @@ cluster_try_push(struct cl_writebehind *wbp, vnode_t vp, off_t EOF, int push_fla int cl_len; int cl_pushed = 0; struct cl_wextent l_clusters[MAX_CLUSTERS]; - u_int max_cluster_pgcount; - - - max_cluster_pgcount = MAX_CLUSTER_SIZE(vp) / PAGE_SIZE; + u_int max_cluster_pgcount; + + + max_cluster_pgcount = MAX_CLUSTER_SIZE(vp) / PAGE_SIZE; /* * the write behind context exists and has * already been locked... @@ -4563,6 +4879,7 @@ cluster_try_push(struct cl_writebehind *wbp, vnode_t vp, off_t EOF, int push_fla } if (min_index == -1) break; + l_clusters[cl_index].b_addr = wbp->cl_clusters[min_index].b_addr; l_clusters[cl_index].e_addr = wbp->cl_clusters[min_index].e_addr; l_clusters[cl_index].io_flags = wbp->cl_clusters[min_index].io_flags; @@ -4781,7 +5098,7 @@ cluster_push_now(vnode_t vp, struct cl_extent *cl, off_t EOF, int flags, int (*c if (kret != KERN_SUCCESS) panic("cluster_push: failed to get pagelist"); - KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 41)) | DBG_FUNC_END, (int)upl, upl_f_offset, 0, 0, 0); + KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 41)) | DBG_FUNC_END, upl, upl_f_offset, 0, 0, 0); /* * since we only asked for the dirty pages back @@ -4870,10 +5187,7 @@ sparse_cluster_switch(struct cl_writebehind *wbp, vnode_t vp, off_t EOF, int (*c { int cl_index; - KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 78)) | DBG_FUNC_START, (int)vp, (int)wbp->cl_scmap, wbp->cl_scdirty, 0, 0); - - if (wbp->cl_scmap == NULL) - wbp->cl_scdirty = 0; + KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 78)) | DBG_FUNC_START, vp, wbp->cl_scmap, 0, 0, 0); for (cl_index = 0; cl_index < wbp->cl_number; cl_index++) { int flags; @@ -4885,47 +5199,47 @@ sparse_cluster_switch(struct cl_writebehind *wbp, vnode_t vp, off_t EOF, int (*c if (flags & UPL_POP_DIRTY) { cl.e_addr = cl.b_addr + 1; - sparse_cluster_add(wbp, vp, &cl, EOF, callback, callback_arg); + sparse_cluster_add(&(wbp->cl_scmap), vp, &cl, EOF, callback, callback_arg); } } } } wbp->cl_number = 0; - KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 78)) | DBG_FUNC_END, (int)vp, (int)wbp->cl_scmap, wbp->cl_scdirty, 0, 0); + KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 78)) | DBG_FUNC_END, vp, wbp->cl_scmap, 0, 0, 0); } /* - * sparse_cluster_push is called with the write behind lock held + * sparse_cluster_push must be called with the write-behind lock held if the scmap is + * still associated with the write-behind context... however, if the scmap has been disassociated + * from the write-behind context (the cluster_push case), the wb lock is not held */ static void -sparse_cluster_push(struct cl_writebehind *wbp, vnode_t vp, off_t EOF, int push_flag, int (*callback)(buf_t, void *), void *callback_arg) +sparse_cluster_push(void **scmap, vnode_t vp, off_t EOF, int push_flag, int (*callback)(buf_t, void *), void *callback_arg) { struct cl_extent cl; off_t offset; u_int length; - KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 79)) | DBG_FUNC_START, (int)vp, (int)wbp->cl_scmap, wbp->cl_scdirty, push_flag, 0); + KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 79)) | DBG_FUNC_START, vp, (*scmap), 0, push_flag, 0); if (push_flag & PUSH_ALL) - vfs_drt_control(&(wbp->cl_scmap), 1); + vfs_drt_control(scmap, 1); for (;;) { - if (vfs_drt_get_cluster(&(wbp->cl_scmap), &offset, &length) != KERN_SUCCESS) + if (vfs_drt_get_cluster(scmap, &offset, &length) != KERN_SUCCESS) break; cl.b_addr = (daddr64_t)(offset / PAGE_SIZE_64); cl.e_addr = (daddr64_t)((offset + length) / PAGE_SIZE_64); - wbp->cl_scdirty -= (int)(cl.e_addr - cl.b_addr); - cluster_push_now(vp, &cl, EOF, push_flag & IO_PASSIVE, callback, callback_arg); if ( !(push_flag & PUSH_ALL) ) break; } - KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 79)) | DBG_FUNC_END, (int)vp, (int)wbp->cl_scmap, wbp->cl_scdirty, 0, 0); + KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 79)) | DBG_FUNC_END, vp, (*scmap), 0, 0, 0); } @@ -4933,33 +5247,29 @@ sparse_cluster_push(struct cl_writebehind *wbp, vnode_t vp, off_t EOF, int push_ * sparse_cluster_add is called with the write behind lock held */ static void -sparse_cluster_add(struct cl_writebehind *wbp, vnode_t vp, struct cl_extent *cl, off_t EOF, int (*callback)(buf_t, void *), void *callback_arg) +sparse_cluster_add(void **scmap, vnode_t vp, struct cl_extent *cl, off_t EOF, int (*callback)(buf_t, void *), void *callback_arg) { u_int new_dirty; u_int length; off_t offset; - KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 80)) | DBG_FUNC_START, (int)wbp->cl_scmap, wbp->cl_scdirty, (int)cl->b_addr, (int)cl->e_addr, 0); + KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 80)) | DBG_FUNC_START, (*scmap), 0, cl->b_addr, (int)cl->e_addr, 0); offset = (off_t)(cl->b_addr * PAGE_SIZE_64); length = ((u_int)(cl->e_addr - cl->b_addr)) * PAGE_SIZE; - while (vfs_drt_mark_pages(&(wbp->cl_scmap), offset, length, &new_dirty) != KERN_SUCCESS) { + while (vfs_drt_mark_pages(scmap, offset, length, &new_dirty) != KERN_SUCCESS) { /* * no room left in the map * only a partial update was done * push out some pages and try again */ - wbp->cl_scdirty += new_dirty; - - sparse_cluster_push(wbp, vp, EOF, 0, callback, callback_arg); + sparse_cluster_push(scmap, vp, EOF, 0, callback, callback_arg); offset += (new_dirty * PAGE_SIZE_64); length -= (new_dirty * PAGE_SIZE); } - wbp->cl_scdirty += new_dirty; - - KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 80)) | DBG_FUNC_END, (int)vp, (int)wbp->cl_scmap, wbp->cl_scdirty, 0, 0); + KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 80)) | DBG_FUNC_END, vp, (*scmap), 0, 0, 0); } @@ -5093,17 +5403,10 @@ cluster_copy_upl_data(struct uio *uio, upl_t upl, int upl_offset, int *io_resid) uio->uio_segflg = UIO_PHYS_USERSPACE64; break; - case UIO_SYSSPACE32: - uio->uio_segflg = UIO_PHYS_SYSSPACE32; - break; - case UIO_SYSSPACE: uio->uio_segflg = UIO_PHYS_SYSSPACE; break; - case UIO_SYSSPACE64: - uio->uio_segflg = UIO_PHYS_SYSSPACE64; - break; } pl = ubc_upl_pageinfo(upl); @@ -5179,14 +5482,6 @@ cluster_copy_ubc_data_internal(vnode_t vp, struct uio *uio, int *io_resid, int m uio->uio_segflg = UIO_PHYS_USERSPACE64; break; - case UIO_SYSSPACE32: - uio->uio_segflg = UIO_PHYS_SYSSPACE32; - break; - - case UIO_SYSSPACE64: - uio->uio_segflg = UIO_PHYS_SYSSPACE64; - break; - case UIO_USERSPACE: case UIO_USERISPACE: uio->uio_segflg = UIO_PHYS_USERSPACE; diff --git a/bsd/vfs/vfs_conf.c b/bsd/vfs/vfs_conf.c index 2727afe16..467eb00b2 100644 --- a/bsd/vfs/vfs_conf.c +++ b/bsd/vfs/vfs_conf.c @@ -72,6 +72,13 @@ #include #include +#ifndef __LP64__ +#define VFS_THREAD_SAFE_FLAG VFC_VFSTHREADSAFE /* This is only defined for 32-bit */ +#else +#define VFS_THREAD_SAFE_FLAG 0 +#endif /* __LP64__ */ + + /* * These define the root filesystem, device, and root filesystem type. */ @@ -82,22 +89,15 @@ int (*mountroot)(void) = NULL; /* * Set up the initial array of known filesystem types. */ -extern struct vfsops ufs_vfsops; -#if FFS -extern int ffs_mountroot(mount_t, vnode_t, vfs_context_t); -#endif extern struct vfsops mfs_vfsops; extern int mfs_mountroot(mount_t, vnode_t, vfs_context_t); /* dead */ extern struct vfsops hfs_vfsops; extern int hfs_mountroot(mount_t, vnode_t, vfs_context_t); -extern struct vfsops cd9660_vfsops; -extern int cd9660_mountroot(mount_t, vnode_t, vfs_context_t); extern struct vfsops nfs_vfsops; extern int nfs_mountroot(void); extern struct vfsops afs_vfsops; extern struct vfsops null_vfsops; extern struct vfsops union_vfsops; -extern struct vfsops fdesc_vfsops; extern struct vfsops devfs_vfsops; /* @@ -112,74 +112,68 @@ typedef int (*mountroot_t)(mount_t, vnode_t, vfs_context_t); static struct vfstable vfstbllist[] = { /* HFS/HFS+ Filesystem */ #if HFS - { &hfs_vfsops, "hfs", 17, 0, (MNT_LOCAL | MNT_DOVOLFS), hfs_mountroot, NULL, 1, {{0}}, VFC_VFSLOCALARGS | VFC_VFSREADDIR_EXTENDED, NULL, 0, 1}, -#endif - - /* Fast Filesystem */ -#if FFS - { &ufs_vfsops, "ufs", 1, 0, MNT_LOCAL, ffs_mountroot, NULL, 0, {{0}}, VFC_VFSLOCALARGS | VFC_VFSREADDIR_EXTENDED, NULL, 0, 0}, -#endif - - /* ISO9660 (aka CDROM) Filesystem */ -#if CD9660 - { &cd9660_vfsops, "cd9660", 14, 0, MNT_LOCAL, cd9660_mountroot, NULL, 0, {{0}}, VFC_VFSLOCALARGS, NULL, 0, 0}, + { &hfs_vfsops, "hfs", 17, 0, (MNT_LOCAL | MNT_DOVOLFS), hfs_mountroot, NULL, 0, 0, VFC_VFSLOCALARGS | VFC_VFSREADDIR_EXTENDED | VFS_THREAD_SAFE_FLAG | VFC_VFS64BITREADY | VFC_VFSVNOP_PAGEOUTV2, NULL, 0}, #endif /* Memory-based Filesystem */ + +#ifndef __LP64__ #if MFS - { &mfs_vfsops, "mfs", 3, 0, MNT_LOCAL, mfs_mountroot, NULL, 0, {{0}}, VFC_VFSGENERICARGS , NULL, 0, 0}, + { &mfs_vfsops, "mfs", 3, 0, MNT_LOCAL, mfs_mountroot, NULL, 0, 0, VFC_VFSGENERICARGS, NULL, 0}, #endif +#endif /* __LP64__ */ /* Sun-compatible Network Filesystem */ #if NFSCLIENT - { &nfs_vfsops, "nfs", 2, 0, 0, (mountroot_t)nfs_mountroot, NULL, 1, {{0}}, VFC_VFSGENERICARGS|VFC_VFSPREFLIGHT, NULL, 0, 1}, + { &nfs_vfsops, "nfs", 2, 0, 0, NULL, NULL, 0, 0, VFC_VFSGENERICARGS | VFC_VFSPREFLIGHT | VFS_THREAD_SAFE_FLAG | VFC_VFS64BITREADY | VFC_VFSREADDIR_EXTENDED, NULL, 0}, #endif /* Andrew Filesystem */ +#ifndef __LP64__ #if AFS - { &afs_vfsops, "andrewfs", 13, 0, 0, afs_mountroot, NULL, 0, {{0}}, VFC_VFSGENERICARGS , NULL, 0, 0}, + { &afs_vfsops, "andrewfs", 13, 0, 0, afs_mountroot, NULL, 0, 0, VFC_VFSGENERICARGS , NULL, 0}, #endif +#endif /* __LP64__ */ /* Loopback (Minimal) Filesystem Layer */ +#ifndef __LP64__ #if NULLFS - { &null_vfsops, "loopback", 9, 0, 0, NULL, NULL, 0, {{0}}, VFC_VFSGENERICARGS , NULL, 0, 0}, + { &null_vfsops, "loopback", 9, 0, 0, NULL, NULL, 0, 0, VFC_VFSGENERICARGS , NULL, 0}, #endif +#endif /* __LP64__ */ /* Union (translucent) Filesystem */ #if UNION - { &union_vfsops, "unionfs", 15, 0, 0, NULL, NULL, 1, {{0}}, VFC_VFSGENERICARGS , NULL, 0, 0}, -#endif - - /* File Descriptor Filesystem */ -#if FDESC - { &fdesc_vfsops, "fdesc", 7, 0, 0, NULL, NULL, 0, {{0}}, VFC_VFSGENERICARGS , NULL, 0, 0}, + { &union_vfsops, "unionfs", 15, 0, 0, NULL, NULL, 0, 0, VFC_VFSGENERICARGS | VFS_THREAD_SAFE_FLAG | VFC_VFS64BITREADY, NULL, 0}, #endif /* Device Filesystem */ #if DEVFS #if CONFIG_MACF - { &devfs_vfsops, "devfs", 19, 0, MNT_MULTILABEL, NULL, NULL, 0, {{0}}, VFC_VFSGENERICARGS , NULL, 0, 0}, + { &devfs_vfsops, "devfs", 19, 0, (MNT_DONTBROWSE | MNT_MULTILABEL), NULL, NULL, 0, 0, VFC_VFSGENERICARGS | VFS_THREAD_SAFE_FLAG | VFC_VFS64BITREADY, NULL, 0}, #else - { &devfs_vfsops, "devfs", 19, 0, 0, NULL, NULL, 0, {{0}}, VFC_VFSGENERICARGS , NULL, 0, 0}, + { &devfs_vfsops, "devfs", 19, 0, MNT_DONTBROWSE, NULL, NULL, 0, 0, VFC_VFSGENERICARGS | VFS_THREAD_SAFE_FLAG | VFC_VFS64BITREADY, NULL, 0}, #endif /* MAC */ #endif +#ifndef __LP64__ +#endif /* __LP64__ */ - {NULL, "", 0, 0, 0, NULL, NULL, 0, {{0}}, 0, NULL, 0, 0}, - {NULL, "", 0, 0, 0, NULL, NULL, 0, {{0}}, 0, NULL, 0, 0}, - {NULL, "", 0, 0, 0, NULL, NULL, 0, {{0}}, 0, NULL, 0, 0}, - {NULL, "", 0, 0, 0, NULL, NULL, 0, {{0}}, 0, NULL, 0, 0}, - {NULL, "", 0, 0, 0, NULL, NULL, 0, {{0}}, 0, NULL, 0, 0}, - {NULL, "", 0, 0, 0, NULL, NULL, 0, {{0}}, 0, NULL, 0, 0}, - {NULL, "", 0, 0, 0, NULL, NULL, 0, {{0}}, 0, NULL, 0, 0}, - {NULL, "", 0, 0, 0, NULL, NULL, 0, {{0}}, 0, NULL, 0, 0}, - {NULL, "", 0, 0, 0, NULL, NULL, 0, {{0}}, 0, NULL, 0, 0}, - {NULL, "", 0, 0, 0, NULL, NULL, 0, {{0}}, 0, NULL, 0, 0}, - {NULL, "", 0, 0, 0, NULL, NULL, 0, {{0}}, 0, NULL, 0, 0}, - {NULL, "", 0, 0, 0, NULL, NULL, 0, {{0}}, 0, NULL, 0, 0}, - {NULL, "", 0, 0, 0, NULL, NULL, 0, {{0}}, 0, NULL, 0, 0}, - {NULL, "", 0, 0, 0, NULL, NULL, 0, {{0}}, 0, NULL, 0, 0}, - {NULL, "", 0, 0, 0, NULL, NULL, 0, {{0}}, 0, NULL, 0, 0} + {NULL, "", 0, 0, 0, NULL, NULL, 0, 0, 0, NULL, 0}, + {NULL, "", 0, 0, 0, NULL, NULL, 0, 0, 0, NULL, 0}, + {NULL, "", 0, 0, 0, NULL, NULL, 0, 0, 0, NULL, 0}, + {NULL, "", 0, 0, 0, NULL, NULL, 0, 0, 0, NULL, 0}, + {NULL, "", 0, 0, 0, NULL, NULL, 0, 0, 0, NULL, 0}, + {NULL, "", 0, 0, 0, NULL, NULL, 0, 0, 0, NULL, 0}, + {NULL, "", 0, 0, 0, NULL, NULL, 0, 0, 0, NULL, 0}, + {NULL, "", 0, 0, 0, NULL, NULL, 0, 0, 0, NULL, 0}, + {NULL, "", 0, 0, 0, NULL, NULL, 0, 0, 0, NULL, 0}, + {NULL, "", 0, 0, 0, NULL, NULL, 0, 0, 0, NULL, 0}, + {NULL, "", 0, 0, 0, NULL, NULL, 0, 0, 0, NULL, 0}, + {NULL, "", 0, 0, 0, NULL, NULL, 0, 0, 0, NULL, 0}, + {NULL, "", 0, 0, 0, NULL, NULL, 0, 0, 0, NULL, 0}, + {NULL, "", 0, 0, 0, NULL, NULL, 0, 0, 0, NULL, 0}, + {NULL, "", 0, 0, 0, NULL, NULL, 0, 0, 0, NULL, 0} }; /* @@ -198,11 +192,6 @@ struct vfstable *vfsconf = vfstbllist; * vectors. It is NULL terminated. * */ -#if FFS -extern struct vnodeopv_desc ffs_vnodeop_opv_desc; -extern struct vnodeopv_desc ffs_specop_opv_desc; -extern struct vnodeopv_desc ffs_fifoop_opv_desc; -#endif extern struct vnodeopv_desc mfs_vnodeop_opv_desc; extern struct vnodeopv_desc dead_vnodeop_opv_desc; #if FIFO && SOCKETS @@ -215,27 +204,20 @@ extern struct vnodeopv_desc fifo_nfsv2nodeop_opv_desc; extern struct vnodeopv_desc nfsv4_vnodeop_opv_desc; extern struct vnodeopv_desc spec_nfsv4nodeop_opv_desc; extern struct vnodeopv_desc fifo_nfsv4nodeop_opv_desc; -extern struct vnodeopv_desc fdesc_vnodeop_opv_desc; extern struct vnodeopv_desc null_vnodeop_opv_desc; extern struct vnodeopv_desc hfs_vnodeop_opv_desc; +extern struct vnodeopv_desc hfs_std_vnodeop_opv_desc; extern struct vnodeopv_desc hfs_specop_opv_desc; extern struct vnodeopv_desc hfs_fifoop_opv_desc; -extern struct vnodeopv_desc cd9660_vnodeop_opv_desc; -extern struct vnodeopv_desc cd9660_cdxaop_opv_desc; -extern struct vnodeopv_desc cd9660_specop_opv_desc; -extern struct vnodeopv_desc cd9660_fifoop_opv_desc; extern struct vnodeopv_desc union_vnodeop_opv_desc; extern struct vnodeopv_desc devfs_vnodeop_opv_desc; extern struct vnodeopv_desc devfs_spec_vnodeop_opv_desc; +#if FDESC +extern struct vnodeopv_desc devfs_devfd_vnodeop_opv_desc; +extern struct vnodeopv_desc devfs_fdesc_vnodeop_opv_desc; +#endif /* FDESC */ struct vnodeopv_desc *vfs_opv_descs[] = { -#if FFS - &ffs_vnodeop_opv_desc, - &ffs_specop_opv_desc, -#if FIFO - &ffs_fifoop_opv_desc, -#endif -#endif &dead_vnodeop_opv_desc, #if FIFO && SOCKETS &fifo_vnodeop_opv_desc, @@ -254,33 +236,27 @@ struct vnodeopv_desc *vfs_opv_descs[] = { &fifo_nfsv4nodeop_opv_desc, #endif #endif -#if FDESC - &fdesc_vnodeop_opv_desc, -#endif #if NULLFS &null_vnodeop_opv_desc, #endif #if HFS &hfs_vnodeop_opv_desc, + &hfs_std_vnodeop_opv_desc, &hfs_specop_opv_desc, #if FIFO &hfs_fifoop_opv_desc, #endif #endif -#if CD9660 - &cd9660_vnodeop_opv_desc, - &cd9660_cdxaop_opv_desc, - &cd9660_specop_opv_desc, -#if FIFO - &cd9660_fifoop_opv_desc, -#endif -#endif #if UNION &union_vnodeop_opv_desc, #endif #if DEVFS &devfs_vnodeop_opv_desc, &devfs_spec_vnodeop_opv_desc, -#endif +#if FDESC + &devfs_devfd_vnodeop_opv_desc, + &devfs_fdesc_vnodeop_opv_desc, +#endif /* FDESC */ +#endif /* DEVFS */ NULL }; diff --git a/bsd/vfs/vfs_fsevents.c b/bsd/vfs/vfs_fsevents.c index 07eaa1480..e09f990dc 100644 --- a/bsd/vfs/vfs_fsevents.c +++ b/bsd/vfs/vfs_fsevents.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2004-2006 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2004-2008 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -26,9 +26,9 @@ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ */ #include -#include #include #include +#include // for kqueue related stuff #include #if CONFIG_FSE @@ -58,7 +58,7 @@ #include #include -#include +#include #include @@ -99,7 +99,7 @@ struct fsevent_handle; typedef struct fs_event_watcher { int8_t *event_list; // the events we're interested in int32_t num_events; - dev_t *devices_to_watch; // only report events from these devices + dev_t *devices_not_to_watch; // report events from devices not in this list uint32_t num_devices; int32_t flags; kfs_event **event_queue; @@ -110,6 +110,7 @@ typedef struct fs_event_watcher { int32_t blockers; int32_t my_id; uint32_t num_dropped; + uint64_t max_event_id; struct fsevent_handle *fseh; } fs_event_watcher; @@ -154,7 +155,13 @@ static lck_mtx_t watch_table_lock; static lck_mtx_t event_buf_lock; static lck_mtx_t event_writer_lock; -static void init_pathbuff(void); + +/* Explicitly declare qsort so compiler doesn't complain */ +__private_extern__ void qsort( + void * array, + size_t nmembers, + size_t member_size, + int (*)(const void *, const void *)); static void @@ -199,8 +206,6 @@ fsevents_internal_init(void) // ever grow beyond what we initially filled it with zone_change(event_zone, Z_EXHAUST, TRUE); zone_change(event_zone, Z_COLLECT, FALSE); - - init_pathbuff(); } static void @@ -235,24 +240,23 @@ watcher_cares_about_dev(fs_event_watcher *watcher, dev_t dev) { unsigned int i; - // if there is not list of devices to watch, then always - // say we're interested so we'll report all events from - // all devices - if (watcher->devices_to_watch == NULL) { + // if devices_not_to_watch is NULL then we care about all + // events from all devices + if (watcher->devices_not_to_watch == NULL) { return 1; } for(i=0; i < watcher->num_devices; i++) { - if (dev == watcher->devices_to_watch[i]) { - // found a match! that means we want events - // from this device. - return 1; + if (dev == watcher->devices_not_to_watch[i]) { + // found a match! that means we do not + // want events from this device. + return 0; } } - // if we're here it's not in the devices_to_watch[] - // list so that means we do not care about it - return 0; + // if we're here it's not in the devices_not_to_watch[] + // list so that means we do care about it + return 1; } @@ -463,6 +467,7 @@ static char last_str[MAXPATHLEN]; static int last_nlen=0; static int last_vid=-1; static uint64_t last_coalesced_time=0; +static void *last_event_ptr=NULL; int last_coalesced = 0; static mach_timebase_info_data_t sTimebaseInfo = { 0, 0 }; @@ -483,6 +488,7 @@ add_fsevent(int type, vfs_context_t ctx, ...) int pathbuff_len; + va_start(ap, ctx); // ignore bogus event types.. @@ -493,6 +499,7 @@ add_fsevent(int type, vfs_context_t ctx, ...) // if no one cares about this type of event, bail out if (fs_event_type_watchers[type] == 0) { va_end(ap); + return 0; } @@ -559,6 +566,7 @@ add_fsevent(int type, vfs_context_t ctx, ...) last_coalesced++; unlock_fs_event_list(); va_end(ap); + return 0; } else { last_ptr = ptr; @@ -663,7 +671,7 @@ add_fsevent(int type, vfs_context_t ctx, ...) printf("add_fsevent: event queue is full! dropping events (num dropped events: %d; num events outstanding: %d).\n", num_dropped, num_events_outstanding); printf("add_fsevent: kfse_list head %p ; num_pending_rename %d\n", listhead, num_pending_rename); printf("add_fsevent: zalloc sez: %p\n", junkptr); - printf("add_fsevent: event_zone info: %d %p\n", ((int *)event_zone)[0], (void *)((int *)event_zone)[1]); + printf("add_fsevent: event_zone info: %d 0x%x\n", ((int *)event_zone)[0], ((int *)event_zone)[1]); for(ii=0; ii < MAX_WATCHERS; ii++) { if (watcher_table[ii] == NULL) { continue; @@ -792,7 +800,7 @@ add_fsevent(int type, vfs_context_t ctx, ...) // if (str && kfse->len != 0) { kfse->abstime = now; - OSAddAtomic(1, (SInt32 *)&kfse->refcount); + OSAddAtomic(1, &kfse->refcount); skip_init = 1; if (reuse_type == KFSE_COMBINED) { @@ -813,6 +821,7 @@ add_fsevent(int type, vfs_context_t ctx, ...) panic("add_fsevent: line %d: kfse recount %d but should be at least 1\n", __LINE__, kfse->refcount); } + last_event_ptr = kfse; unlock_fs_event_list(); goto normal_delivery; @@ -828,7 +837,7 @@ add_fsevent(int type, vfs_context_t ctx, ...) need_event_unlock = 1; lck_rw_lock_exclusive(&event_handling_lock); - OSAddAtomic(1, (SInt32 *)&kfse->refcount); + OSAddAtomic(1, &kfse->refcount); if (kfse->refcount < 1) { panic("add_fsevent: line %d: kfse recount %d but should be at least 1\n", __LINE__, kfse->refcount); @@ -855,7 +864,7 @@ add_fsevent(int type, vfs_context_t ctx, ...) kfse->flags = 0 | KFSE_RECYCLED_EVENT; if (kfse_dest) { - OSAddAtomic(1, (SInt32 *)&kfse_dest->refcount); + OSAddAtomic(1, &kfse_dest->refcount); kfse_dest->flags = 0 | KFSE_RECYCLED_EVENT; if (did_alloc == 0) { @@ -902,6 +911,7 @@ add_fsevent(int type, vfs_context_t ctx, ...) OSBitOrAtomic16(KFSE_BEING_CREATED, &kfse->flags); process_normally: + last_event_ptr = kfse; kfse->type = type; kfse->abstime = now; kfse->pid = p->p_pid; @@ -975,7 +985,7 @@ add_fsevent(int type, vfs_context_t ctx, ...) } cur->dev = dev = (dev_t)va.va_fsid; - cur->ino = (ino_t)va.va_fileid; + cur->ino = (ino64_t)va.va_fileid; cur->mode = (int32_t)vnode_vttoif(vnode_vtype(vp)) | va.va_mode; cur->uid = va.va_uid; cur->gid = va.va_gid; @@ -986,17 +996,43 @@ add_fsevent(int type, vfs_context_t ctx, ...) pathbuff_len = MAXPATHLEN; pathbuff[0] = '\0'; - if (vn_getpath(vp, pathbuff, &pathbuff_len) != 0 || pathbuff[0] == '\0') { - printf("add_fsevent: no name hard-link! dropping the event. (event %d vp == %p (%s)). \n", - type, vp, vp->v_name ? vp->v_name : "-UNKNOWN-FILE"); - error = ENOENT; - release_pathbuff(pathbuff); - pathbuff = NULL; - if (need_event_unlock == 0) { - // then we only grabbed it shared - lck_rw_unlock_shared(&event_handling_lock); + if ((ret = vn_getpath(vp, pathbuff, &pathbuff_len)) != 0 || pathbuff[0] == '\0') { + struct vnode *orig_vp = vp; + + if (ret != ENOSPC) { + printf("add_fsevent: unable to get path for vp %p (%s; ret %d; type %d)\n", + vp, vp->v_name ? vp->v_name : "-UNKNOWN-FILE", ret, type); + } + + cur->flags |= KFSE_CONTAINS_DROPPED_EVENTS; + + do { + if (vp->v_parent != NULL) { + vp = vp->v_parent; + } else if (vp->v_mount) { + strlcpy(pathbuff, vp->v_mount->mnt_vfsstat.f_mntonname, MAXPATHLEN); + break; + } else { + vp = NULL; + } + + if (vp == NULL) { + break; + } + + pathbuff_len = MAXPATHLEN; + ret = vn_getpath(vp, pathbuff, &pathbuff_len); + } while (ret == ENOSPC); + + if (ret != 0 || vp == NULL) { + printf("add_fsevent: unabled to get a path for vp %p. dropping the event.\n", orig_vp); + error = ENOENT; + if (need_event_unlock == 0) { + // then we only grabbed it shared + lck_rw_unlock_shared(&event_handling_lock); + } + goto clean_up; } - goto clean_up; } } @@ -1019,7 +1055,7 @@ add_fsevent(int type, vfs_context_t ctx, ...) fse = va_arg(ap, fse_info *); cur->dev = dev = (dev_t)fse->dev; - cur->ino = (ino_t)fse->ino; + cur->ino = (ino64_t)fse->ino; cur->mode = (int32_t)fse->mode; cur->uid = (uid_t)fse->uid; cur->gid = (uid_t)fse->gid; @@ -1027,6 +1063,10 @@ add_fsevent(int type, vfs_context_t ctx, ...) if ((fse->mode & FSE_MODE_HLINK) && fse->nlink == 0) { cur->mode |= FSE_MODE_LAST_HLINK; } + if (cur->mode & FSE_TRUNCATED_PATH) { + cur->flags |= KFSE_CONTAINS_DROPPED_EVENTS; + cur->mode &= ~FSE_TRUNCATED_PATH; + } break; } @@ -1127,12 +1167,18 @@ release_event_ref(kfs_event *kfse) kfs_event copy, dest_copy; - old_refcount = OSAddAtomic(-1, (SInt32 *)&kfse->refcount); + old_refcount = OSAddAtomic(-1, &kfse->refcount); if (old_refcount > 1) { return; } lock_fs_event_list(); + if (last_event_ptr == kfse) { + last_event_ptr = NULL; + last_event_type = -1; + last_coalesced_time = 0; + } + if (kfse->refcount < 0) { panic("release_event_ref: bogus kfse refcount %d\n", kfse->refcount); } @@ -1159,7 +1205,7 @@ release_event_ref(kfs_event *kfse) // holding the fs_event_buf lock // copy = *kfse; - if (kfse->dest && OSAddAtomic(-1, (SInt32 *)&kfse->dest->refcount) == 1) { + if (kfse->dest && OSAddAtomic(-1, &kfse->dest->refcount) == 1) { dest_copy = *kfse->dest; } else { dest_copy.str = NULL; @@ -1168,8 +1214,8 @@ release_event_ref(kfs_event *kfse) } kfse->pid = kfse->type; // save this off for debugging... - kfse->uid = (uid_t)kfse->str; // save this off for debugging... - kfse->gid = (gid_t)current_thread(); + kfse->uid = (uid_t)(long)kfse->str; // save this off for debugging... + kfse->gid = (gid_t)(long)current_thread(); kfse->str = (char *)0xdeadbeef; // XXXdbg - catch any cheaters... @@ -1250,7 +1296,7 @@ add_watcher(int8_t *event_list, int32_t num_events, int32_t eventq_size, fs_even watcher->event_list = event_list; watcher->num_events = num_events; - watcher->devices_to_watch = NULL; + watcher->devices_not_to_watch = NULL; watcher->num_devices = 0; watcher->flags = 0; watcher->event_queue = (kfs_event **)&watcher[1]; @@ -1259,6 +1305,7 @@ add_watcher(int8_t *event_list, int32_t num_events, int32_t eventq_size, fs_even watcher->wr = 0; watcher->blockers = 0; watcher->num_readers = 0; + watcher->max_event_id = 0; watcher->fseh = NULL; watcher->num_dropped = 0; // XXXdbg - debugging @@ -1326,7 +1373,7 @@ remove_watcher(fs_event_watcher *target) // printf("fsevents: removing watcher %p (rd %d wr %d num_readers %d flags 0x%x)\n", watcher, watcher->rd, watcher->wr, watcher->num_readers, watcher->flags); watcher->flags |= WATCHER_CLOSING; - OSAddAtomic(1, (SInt32 *)&watcher->num_readers); + OSAddAtomic(1, &watcher->num_readers); unlock_watch_table(); @@ -1362,9 +1409,9 @@ remove_watcher(fs_event_watcher *target) FREE(watcher->event_list, M_TEMP); watcher->event_list = NULL; } - if (watcher->devices_to_watch) { - FREE(watcher->devices_to_watch, M_TEMP); - watcher->devices_to_watch = NULL; + if (watcher->devices_not_to_watch) { + FREE(watcher->devices_not_to_watch, M_TEMP); + watcher->devices_not_to_watch = NULL; } FREE(watcher, M_TEMP); @@ -1428,13 +1475,17 @@ schedule_event_wakeup(void) static int watcher_add_event(fs_event_watcher *watcher, kfs_event *kfse) { + if (kfse->abstime > watcher->max_event_id) { + watcher->max_event_id = kfse->abstime; + } + if (((watcher->wr + 1) % watcher->eventq_size) == watcher->rd) { watcher->flags |= WATCHER_DROPPED_EVENTS; fsevents_wakeup(watcher); return ENOSPC; } - OSAddAtomic(1, (SInt32 *)&kfse->refcount); + OSAddAtomic(1, &kfse->refcount); watcher->event_queue[watcher->wr] = kfse; OSSynchronizeIO(); watcher->wr = (watcher->wr + 1) % watcher->eventq_size; @@ -1458,18 +1509,6 @@ watcher_add_event(fs_event_watcher *watcher, kfs_event *kfse) return 0; } - -// check if the next chunk of data will fit in the user's -// buffer. if not, just goto get_out which will return -// the number of bytes worth of events that we did read. -// this leaves the event that didn't fit in the queue. -// - // LP64todo - fix this -#define CHECK_UPTR(size) if (size > (unsigned)uio_resid(uio)) { \ - uio_setresid(uio, last_full_event_resid); \ - goto get_out; \ - } - static int fill_buff(uint16_t type, int32_t size, const void *data, char *buff, int32_t *_buff_idx, int32_t buff_sz, @@ -1700,11 +1739,11 @@ copy_out_kfse(fs_event_watcher *watcher, kfs_event *kfse, struct uio *uio) static int fmod_watch(fs_event_watcher *watcher, struct uio *uio) { - int error=0, last_full_event_resid; + int error=0; + user_ssize_t last_full_event_resid; kfs_event *kfse; uint16_t tmp16; - // LP64todo - fix this last_full_event_resid = uio_resid(uio); // need at least 2048 bytes of space (maxpathlen + 1 event buf) @@ -1716,26 +1755,26 @@ fmod_watch(fs_event_watcher *watcher, struct uio *uio) return 0; } - if (OSAddAtomic(1, (SInt32 *)&watcher->num_readers) != 0) { + if (OSAddAtomic(1, &watcher->num_readers) != 0) { // don't allow multiple threads to read from the fd at the same time - OSAddAtomic(-1, (SInt32 *)&watcher->num_readers); + OSAddAtomic(-1, &watcher->num_readers); return EAGAIN; } if (watcher->rd == watcher->wr) { if (watcher->flags & WATCHER_CLOSING) { - OSAddAtomic(-1, (SInt32 *)&watcher->num_readers); + OSAddAtomic(-1, &watcher->num_readers); return 0; } - OSAddAtomic(1, (SInt32 *)&watcher->blockers); + OSAddAtomic(1, &watcher->blockers); // there's nothing to do, go to sleep error = tsleep((caddr_t)watcher, PUSER|PCATCH, "fsevents_empty", 0); - OSAddAtomic(-1, (SInt32 *)&watcher->blockers); + OSAddAtomic(-1, &watcher->blockers); if (error != 0 || (watcher->flags & WATCHER_CLOSING)) { - OSAddAtomic(-1, (SInt32 *)&watcher->num_readers); + OSAddAtomic(-1, &watcher->num_readers); return error; } } @@ -1752,12 +1791,11 @@ fmod_watch(fs_event_watcher *watcher, struct uio *uio) tmp16 = FSE_ARG_DONE; // makes it a consistent msg error = uiomove((caddr_t)&tmp16, sizeof(int16_t), uio); - // LP64todo - fix this last_full_event_resid = uio_resid(uio); } if (error) { - OSAddAtomic(-1, (SInt32 *)&watcher->num_readers); + OSAddAtomic(-1, &watcher->num_readers); return error; } @@ -1798,7 +1836,6 @@ fmod_watch(fs_event_watcher *watcher, struct uio *uio) } } - // LP64todo - fix this last_full_event_resid = uio_resid(uio); } @@ -1815,7 +1852,7 @@ fmod_watch(fs_event_watcher *watcher, struct uio *uio) } get_out: - OSAddAtomic(-1, (SInt32 *)&watcher->num_readers); + OSAddAtomic(-1, &watcher->num_readers); return error; } @@ -1845,6 +1882,7 @@ typedef struct fsevent_handle { UInt32 flags; SInt32 active; fs_event_watcher *watcher; + struct klist knotes; struct selinfo si; } fsevent_handle; @@ -1870,10 +1908,12 @@ fseventsf_write(__unused struct fileproc *fp, __unused struct uio *uio, return EIO; } +#pragma pack(push, 4) typedef struct ext_fsevent_dev_filter_args { uint32_t num_devices; user_addr_t devices; } ext_fsevent_dev_filter_args; +#pragma pack(pop) typedef struct old_fsevent_dev_filter_args { uint32_t num_devices; @@ -1883,6 +1923,14 @@ typedef struct old_fsevent_dev_filter_args { #define OLD_FSEVENTS_DEVICE_FILTER _IOW('s', 100, old_fsevent_dev_filter_args) #define NEW_FSEVENTS_DEVICE_FILTER _IOW('s', 100, ext_fsevent_dev_filter_args) +#if __LP64__ +/* need this in spite of the padding due to alignment of devices */ +typedef struct fsevent_dev_filter_args32 { + uint32_t num_devices; + uint32_t devices; + int32_t pad1; +} fsevent_dev_filter_args32; +#endif static int fseventsf_ioctl(struct fileproc *fp, u_long cmd, caddr_t data, vfs_context_t ctx) @@ -1902,7 +1950,11 @@ fseventsf_ioctl(struct fileproc *fp, u_long cmd, caddr_t data, vfs_context_t ctx devfilt_args->num_devices = udev_filt_args->num_devices; devfilt_args->devices = CAST_USER_ADDR_T(udev_filt_args->devices); } else { +#if __LP64__ + fsevent_dev_filter_args32 *udev_filt_args = (fsevent_dev_filter_args32 *)data; +#else fsevent_dev_filter_args *udev_filt_args = (fsevent_dev_filter_args *)data; +#endif devfilt_args = &_devfilt_args; memset(devfilt_args, 0, sizeof(ext_fsevent_dev_filter_args)); @@ -1932,10 +1984,16 @@ fseventsf_ioctl(struct fileproc *fp, u_long cmd, caddr_t data, vfs_context_t ctx break; } + case FSEVENTS_GET_CURRENT_ID: { + *(uint64_t *)data = fseh->watcher->max_event_id; + ret = 0; + break; + } + case OLD_FSEVENTS_DEVICE_FILTER: case NEW_FSEVENTS_DEVICE_FILTER: { int new_num_devices; - dev_t *devices_to_watch, *tmp=NULL; + dev_t *devices_not_to_watch, *tmp=NULL; if (devfilt_args->num_devices > 256) { ret = EINVAL; @@ -1944,10 +2002,10 @@ fseventsf_ioctl(struct fileproc *fp, u_long cmd, caddr_t data, vfs_context_t ctx new_num_devices = devfilt_args->num_devices; if (new_num_devices == 0) { - tmp = fseh->watcher->devices_to_watch; + tmp = fseh->watcher->devices_not_to_watch; lock_watch_table(); - fseh->watcher->devices_to_watch = NULL; + fseh->watcher->devices_not_to_watch = NULL; fseh->watcher->num_devices = new_num_devices; unlock_watch_table(); @@ -1957,26 +2015,26 @@ fseventsf_ioctl(struct fileproc *fp, u_long cmd, caddr_t data, vfs_context_t ctx break; } - MALLOC(devices_to_watch, dev_t *, + MALLOC(devices_not_to_watch, dev_t *, new_num_devices * sizeof(dev_t), M_TEMP, M_WAITOK); - if (devices_to_watch == NULL) { + if (devices_not_to_watch == NULL) { ret = ENOMEM; break; } ret = copyin(devfilt_args->devices, - (void *)devices_to_watch, + (void *)devices_not_to_watch, new_num_devices * sizeof(dev_t)); if (ret) { - FREE(devices_to_watch, M_TEMP); + FREE(devices_not_to_watch, M_TEMP); break; } lock_watch_table(); fseh->watcher->num_devices = new_num_devices; - tmp = fseh->watcher->devices_to_watch; - fseh->watcher->devices_to_watch = devices_to_watch; + tmp = fseh->watcher->devices_not_to_watch; + fseh->watcher->devices_not_to_watch = devices_not_to_watch; unlock_watch_table(); if (tmp) { @@ -2049,10 +2107,95 @@ fseventsf_close(struct fileglob *fg, __unused vfs_context_t ctx) return 0; } +static void +filt_fsevent_detach(struct knote *kn) +{ + fsevent_handle *fseh = (struct fsevent_handle *)kn->kn_hook; + + lock_watch_table(); + + KNOTE_DETACH(&fseh->knotes, kn); + + unlock_watch_table(); +} + +/* + * Determine whether this knote should be active + * + * This is kind of subtle. + * --First, notice if the vnode has been revoked: in so, override hint + * --EVFILT_READ knotes are checked no matter what the hint is + * --Other knotes activate based on hint. + * --If hint is revoke, set special flags and activate + */ +static int +filt_fsevent(struct knote *kn, long hint) +{ + fsevent_handle *fseh = (struct fsevent_handle *)kn->kn_hook; + int activate = 0; + int32_t rd, wr, amt; + + if (NOTE_REVOKE == hint) { + kn->kn_flags |= (EV_EOF | EV_ONESHOT); + activate = 1; + } + + rd = fseh->watcher->rd; + wr = fseh->watcher->wr; + if (rd <= wr) { + amt = wr - rd; + } else { + amt = fseh->watcher->eventq_size - (rd - wr); + } + + switch(kn->kn_filter) { + case EVFILT_READ: + kn->kn_data = amt; + + if (kn->kn_data != 0) { + activate = 1; + } + break; + case EVFILT_VNODE: + /* Check events this note matches against the hint */ + if (kn->kn_sfflags & hint) { + kn->kn_fflags |= hint; /* Set which event occurred */ + } + if (kn->kn_fflags != 0) { + activate = 1; + } + break; + default: { + // nothing to do... + break; + } + } + + return (activate); +} + + +struct filterops fsevent_filtops = { + .f_isfd = 1, + .f_attach = NULL, + .f_detach = filt_fsevent_detach, + .f_event = filt_fsevent +}; + static int fseventsf_kqfilter(__unused struct fileproc *fp, __unused struct knote *kn, __unused vfs_context_t ctx) { - // XXXdbg + fsevent_handle *fseh = (struct fsevent_handle *)fp->f_fglob->fg_data; + + kn->kn_hook = (void*)fseh; + kn->kn_hookid = 1; + kn->kn_fop = &fsevent_filtops; + + lock_watch_table(); + + KNOTE_ATTACH(&fseh->knotes, kn); + + unlock_watch_table(); return 0; } @@ -2173,10 +2316,25 @@ parse_buffer_and_add_events(const char *buffer, int bufsize, vfs_context_t ctx, event_start = ptr; // record where the next event starts dest_path_len = ptr - dest_path; - err = add_fsevent(type, ctx, - FSE_ARG_STRING, path_len, path, FSE_ARG_FINFO, finfo, - FSE_ARG_STRING, dest_path_len, dest_path, FSE_ARG_FINFO, dest_finfo, - FSE_ARG_DONE); + // + // If the destination inode number is non-zero, generate a rename + // with both source and destination FSE_ARG_FINFO. Otherwise generate + // a rename with only one FSE_ARG_FINFO. If you need to inject an + // exchange with an inode of zero, just make that inode (and its path) + // come in as the first one, not the second. + // + if (dest_finfo->ino) { + err = add_fsevent(type, ctx, + FSE_ARG_STRING, path_len, path, FSE_ARG_FINFO, finfo, + FSE_ARG_STRING, dest_path_len, dest_path, FSE_ARG_FINFO, dest_finfo, + FSE_ARG_DONE); + } else { + err = add_fsevent(type, ctx, + FSE_ARG_STRING, path_len, path, FSE_ARG_FINFO, finfo, + FSE_ARG_STRING, dest_path_len, dest_path, + FSE_ARG_DONE); + } + if (err) { break; } @@ -2284,10 +2442,10 @@ typedef struct ext_fsevent_clone_args { } ext_fsevent_clone_args; typedef struct old_fsevent_clone_args { - int32_t event_list; + uint32_t event_list; int32_t num_events; int32_t event_queue_depth; - int32_t fd; + uint32_t fd; } old_fsevent_clone_args; #define OLD_FSEVENTS_CLONE _IOW('s', 1, old_fsevent_clone_args) @@ -2342,6 +2500,8 @@ fseventsioctl(__unused dev_t dev, u_long cmd, caddr_t data, __unused int flag, s return ENOMEM; } memset(fseh, 0, sizeof(fsevent_handle)); + + klist_init(&fseh->knotes); MALLOC(event_list, int8_t *, fse_clone_args->num_events * sizeof(int8_t), @@ -2407,8 +2567,9 @@ fseventsioctl(__unused dev_t dev, u_long cmd, caddr_t data, __unused int flag, s static void fsevents_wakeup(fs_event_watcher *watcher) { - wakeup((caddr_t)watcher); selwakeup(&watcher->fseh->si); + KNOTE(&watcher->fseh->knotes, NOTE_WRITE|NOTE_NONE); + wakeup((caddr_t)watcher); } @@ -2464,89 +2625,22 @@ fsevents_init(void) } - -// -// XXXdbg - temporary path buffer handling -// -#define NUM_PATH_BUFFS 16 -static char path_buff[NUM_PATH_BUFFS][MAXPATHLEN]; -static char path_buff_inuse[NUM_PATH_BUFFS]; - -static lck_grp_attr_t * pathbuff_group_attr; -static lck_attr_t * pathbuff_lock_attr; -static lck_grp_t * pathbuff_mutex_group; -static lck_mtx_t pathbuff_lock; - -static void -init_pathbuff(void) -{ - pathbuff_lock_attr = lck_attr_alloc_init(); - pathbuff_group_attr = lck_grp_attr_alloc_init(); - pathbuff_mutex_group = lck_grp_alloc_init("pathbuff-mutex", pathbuff_group_attr); - - lck_mtx_init(&pathbuff_lock, pathbuff_mutex_group, pathbuff_lock_attr); -} - -static void -lock_pathbuff(void) -{ - lck_mtx_lock(&pathbuff_lock); -} - -static void -unlock_pathbuff(void) -{ - lck_mtx_unlock(&pathbuff_lock); -} - - char * get_pathbuff(void) { - int i; - - lock_pathbuff(); - for(i=0; i < NUM_PATH_BUFFS; i++) { - if (path_buff_inuse[i] == 0) { - break; - } - } - - if (i >= NUM_PATH_BUFFS) { - char *path; - - unlock_pathbuff(); - MALLOC_ZONE(path, char *, MAXPATHLEN, M_NAMEI, M_WAITOK); - return path; - } + char *path; - path_buff_inuse[i] = 1; - unlock_pathbuff(); - return &path_buff[i][0]; + MALLOC_ZONE(path, char *, MAXPATHLEN, M_NAMEI, M_WAITOK); + return path; } void release_pathbuff(char *path) { - int i; if (path == NULL) { return; } - - lock_pathbuff(); - for(i=0; i < NUM_PATH_BUFFS; i++) { - if (path == &path_buff[i][0]) { - path_buff[i][0] = '\0'; - path_buff_inuse[i] = 0; - unlock_pathbuff(); - return; - } - } - - unlock_pathbuff(); - - // if we get here then it wasn't one of our temp buffers FREE_ZONE(path, MAXPATHLEN, M_NAMEI); } @@ -2591,6 +2685,57 @@ get_fse_info(struct vnode *vp, fse_info *fse, __unused vfs_context_t ctx) return 0; } +void +create_fsevent_from_kevent(vnode_t vp, uint32_t kevents, struct vnode_attr *vap) +{ + int fsevent_type=FSE_CONTENT_MODIFIED, len; // the default is the most pessimistic + char pathbuf[MAXPATHLEN]; + fse_info fse; + + + if (kevents & VNODE_EVENT_DELETE) { + fsevent_type = FSE_DELETE; + } else if (kevents & (VNODE_EVENT_EXTEND|VNODE_EVENT_WRITE)) { + fsevent_type = FSE_CONTENT_MODIFIED; + } else if (kevents & VNODE_EVENT_LINK) { + fsevent_type = FSE_CREATE_FILE; + } else if (kevents & VNODE_EVENT_RENAME) { + fsevent_type = FSE_CREATE_FILE; // XXXdbg - should use FSE_RENAME but we don't have the destination info; + } else if (kevents & (VNODE_EVENT_FILE_CREATED|VNODE_EVENT_FILE_REMOVED|VNODE_EVENT_DIR_CREATED|VNODE_EVENT_DIR_REMOVED)) { + fsevent_type = FSE_STAT_CHANGED; // XXXdbg - because vp is a dir and the thing created/removed lived inside it + } else { // a catch all for VNODE_EVENT_PERMS, VNODE_EVENT_ATTRIB and anything else + fsevent_type = FSE_STAT_CHANGED; + } + + // printf("convert_kevent: kevents 0x%x fsevent type 0x%x (for %s)\n", kevents, fsevent_type, vp->v_name ? vp->v_name : "(no-name)"); + + fse.dev = vap->va_fsid; + fse.ino = vap->va_fileid; + fse.mode = vnode_vttoif(vnode_vtype(vp)) | (uint32_t)vap->va_mode; + if (vp->v_flag & VISHARDLINK) { + fse.mode |= FSE_MODE_HLINK; + if (vp->v_type == VDIR) { + fse.nlink = vap->va_dirlinkcount; + } else { + fse.nlink = vap->va_nlink; + } + } + + if (vp->v_type == VDIR) { + fse.mode |= FSE_REMOTE_DIR_EVENT; + } + + + fse.uid = vap->va_uid; + fse.gid = vap->va_gid; + + len = sizeof(pathbuf); + if (vn_getpath(vp, pathbuf, &len) == 0) { + add_fsevent(fsevent_type, vfs_context_current(), FSE_ARG_STRING, len, pathbuf, FSE_ARG_FINFO, &fse, FSE_ARG_DONE); + } + return; +} + #else /* CONFIG_FSE */ /* * The get_pathbuff and release_pathbuff routines are used in places not diff --git a/bsd/vfs/vfs_init.c b/bsd/vfs/vfs_init.c index fe9c904c5..253bbcd77 100644 --- a/bsd/vfs/vfs_init.c +++ b/bsd/vfs/vfs_init.c @@ -286,6 +286,8 @@ lck_grp_attr_t * mnt_list_lck_grp_attr; lck_attr_t * mnt_list_lck_attr; lck_mtx_t * mnt_list_mtx_lock; +lck_mtx_t *pkg_extensions_lck; + struct mount * dead_mountp; /* * Initialize the vnode structures and initialize each file system type. @@ -311,6 +313,9 @@ vfsinit(void) /* Allocate spec hash list lock */ spechash_mtx_lock = lck_mtx_alloc_init(vnode_list_lck_grp, vnode_list_lck_attr); + /* Allocate the package extensions table lock */ + pkg_extensions_lck = lck_mtx_alloc_init(vnode_list_lck_grp, vnode_list_lck_attr); + /* allocate vnode lock group attribute and group */ vnode_lck_grp_attr= lck_grp_attr_alloc_init(); @@ -380,16 +385,24 @@ vfsinit(void) */ numused_vfsslots = maxtypenum = 0; for (vfsp = vfsconf, i = 0; i < maxvfsslots; i++, vfsp++) { + struct vfsconf vfsc; if (vfsp->vfc_vfsops == (struct vfsops *)0) break; if (i) vfsconf[i-1].vfc_next = vfsp; if (maxtypenum <= vfsp->vfc_typenum) maxtypenum = vfsp->vfc_typenum + 1; - /* a vfsconf is a prefix subset of a vfstable... */ - (*vfsp->vfc_vfsops->vfs_init)((struct vfsconf *)vfsp); - - lck_mtx_init(&vfsp->vfc_lock, fsconf_lck_grp, fsconf_lck_attr); + bzero(&vfsc, sizeof(struct vfsconf)); + vfsc.vfc_reserved1 = 0; + bcopy(vfsp->vfc_name, vfsc.vfc_name, sizeof(vfsc.vfc_name)); + vfsc.vfc_typenum = vfsp->vfc_typenum; + vfsc.vfc_refcount = vfsp->vfc_refcount; + vfsc.vfc_flags = vfsp->vfc_flags; + vfsc.vfc_reserved2 = 0; + vfsc.vfc_reserved3 = 0; + + (*vfsp->vfc_vfsops->vfs_init)(&vfsc); + numused_vfsslots++; } /* next vfc_typenum to be used */ @@ -410,9 +423,9 @@ vfsinit(void) /* * create a mount point for dead vnodes */ - MALLOC_ZONE(mp, struct mount *, (u_long)sizeof(struct mount), + MALLOC_ZONE(mp, struct mount *, sizeof(struct mount), M_MOUNT, M_WAITOK); - bzero((char *)mp, (u_long)sizeof(struct mount)); + bzero((char *)mp, sizeof(struct mount)); /* Initialize the default IO constraints */ mp->mnt_maxreadcnt = mp->mnt_maxwritecnt = MAXPHYS; mp->mnt_segreadcnt = mp->mnt_segwritecnt = 32; @@ -420,6 +433,8 @@ vfsinit(void) mp->mnt_maxsegwritesize = mp->mnt_maxwritecnt; mp->mnt_devblocksize = DEV_BSIZE; mp->mnt_alignmentmask = PAGE_MASK; + mp->mnt_ioqueue_depth = MNT_DEFAULT_IOQUEUE_DEPTH; + mp->mnt_ioscale = 1; mp->mnt_ioflags = 0; mp->mnt_realrootvp = NULLVP; mp->mnt_authcache_ttl = CACHED_LOOKUP_RIGHT_TTL; @@ -502,21 +517,30 @@ struct vfstable * vfstable_add(struct vfstable *nvfsp) { int slot; - struct vfstable *slotp; + struct vfstable *slotp, *allocated = NULL; /* * Find the next empty slot; we recognize an empty slot by a * NULL-valued ->vfc_vfsops, so if we delete a VFS, we must * ensure we set the entry back to NULL. */ +findslot: + mount_list_lock(); for (slot = 0; slot < maxvfsslots; slot++) { if (vfsconf[slot].vfc_vfsops == NULL) break; } if (slot == maxvfsslots) { - /* out of static slots; allocate one instead */ - MALLOC(slotp, struct vfstable *, sizeof(struct vfstable), - M_TEMP, M_WAITOK); + if (allocated == NULL) { + mount_list_unlock(); + /* out of static slots; allocate one instead */ + MALLOC(allocated, struct vfstable *, sizeof(struct vfstable), + M_TEMP, M_WAITOK); + goto findslot; + } else { + slotp = allocated; + allocated = NULL; + } } else { slotp = &vfsconf[slot]; } @@ -529,7 +553,6 @@ vfstable_add(struct vfstable *nvfsp) * with the value of 'maxvfslots' in the allocation case. */ bcopy(nvfsp, slotp, sizeof(struct vfstable)); - lck_mtx_init(&slotp->vfc_lock, fsconf_lck_grp, fsconf_lck_attr); if (slot != 0) { slotp->vfc_next = vfsconf[slot - 1].vfc_next; vfsconf[slot - 1].vfc_next = slotp; @@ -538,6 +561,12 @@ vfstable_add(struct vfstable *nvfsp) } numused_vfsslots++; + mount_list_unlock(); + + if (allocated != NULL) { + FREE(allocated, M_TEMP); + } + return(slotp); } @@ -560,6 +589,10 @@ vfstable_del(struct vfstable * vtbl) struct vfstable **vcpp; struct vfstable *vcdelp; +#if DEBUG + lck_mtx_assert(mnt_list_mtx_lock, LCK_MTX_ASSERT_OWNED); +#endif /* DEBUG */ + /* * Traverse the list looking for vtbl; if found, *vcpp * will contain the address of the pointer to the entry to @@ -577,8 +610,6 @@ vfstable_del(struct vfstable * vtbl) vcdelp = *vcpp; *vcpp = (*vcpp)->vfc_next; - lck_mtx_destroy(&vcdelp->vfc_lock, fsconf_lck_grp); - /* * Is this an entry from our static table? We find out by * seeing if the pointer to the object to be deleted places @@ -595,9 +626,15 @@ vfstable_del(struct vfstable * vtbl) * vfsconf onto our list, but it may not be persistent * because of the previous (copying) implementation. */ - FREE(vcdelp, M_TEMP); + mount_list_unlock(); + FREE(vcdelp, M_TEMP); + mount_list_lock(); } +#if DEBUG + lck_mtx_assert(mnt_list_mtx_lock, LCK_MTX_ASSERT_OWNED); +#endif /* DEBUG */ + return(0); } diff --git a/bsd/vfs/vfs_journal.c b/bsd/vfs/vfs_journal.c index 6e53e0169..30230cf09 100644 --- a/bsd/vfs/vfs_journal.c +++ b/bsd/vfs/vfs_journal.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 1995-2007 Apple Inc. All rights reserved. + * Copyright (c) 1995-2008 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -61,6 +61,7 @@ extern task_t kernel_task; #define DBG_JOURNAL_FLUSH 1 +#include /* DTRACE_IO1 */ #else #include @@ -78,6 +79,8 @@ extern task_t kernel_task; #include "vfs_journal.h" +#if JOURNALING + /* XXX next prototytype should be from libsa/stdlib.h> but conflicts libkern */ __private_extern__ void qsort( void * array, @@ -110,10 +113,10 @@ static __inline__ void unlock_oldstart(journal *jnl); // typedef struct bucket { - off_t block_num; - size_t jnl_offset; - size_t block_size; - int32_t cksum; + off_t block_num; + uint32_t jnl_offset; + uint32_t block_size; + int32_t cksum; } bucket; #define STARTING_BUCKETS 256 @@ -140,21 +143,15 @@ static int insert_block(journal *jnl, struct bucket **buf_ptr, int blk_index, of __FILE__, __LINE__, jnl->jhdr->magic, JOURNAL_HEADER_MAGIC);\ }\ if ( jnl->jhdr->start <= 0 \ - || jnl->jhdr->start > jnl->jhdr->size\ - || jnl->jhdr->start > 1024*1024*1024) {\ + || jnl->jhdr->start > jnl->jhdr->size) {\ panic("%s:%d: jhdr start looks bad (0x%llx max size 0x%llx)\n", \ __FILE__, __LINE__, jnl->jhdr->start, jnl->jhdr->size);\ }\ if ( jnl->jhdr->end <= 0 \ - || jnl->jhdr->end > jnl->jhdr->size\ - || jnl->jhdr->end > 1024*1024*1024) {\ + || jnl->jhdr->end > jnl->jhdr->size) {\ panic("%s:%d: jhdr end looks bad (0x%llx max size 0x%llx)\n", \ __FILE__, __LINE__, jnl->jhdr->end, jnl->jhdr->size);\ }\ - if (jnl->jhdr->size > 1024*1024*1024) {\ - panic("%s:%d: jhdr size looks bad (0x%llx)\n",\ - __FILE__, __LINE__, jnl->jhdr->size);\ - } \ } while(0) #define CHECK_TRANSACTION(tr) \ @@ -171,10 +168,10 @@ static int insert_block(journal *jnl, struct bucket **buf_ptr, int blk_index, of if (tr->total_bytes < 0) {\ panic("%s:%d: tr total_bytes looks bad: %d\n", __FILE__, __LINE__, tr->total_bytes);\ }\ - if (tr->journal_start < 0 || tr->journal_start > 1024*1024*1024) {\ + if (tr->journal_start < 0) {\ panic("%s:%d: tr journal start looks bad: 0x%llx\n", __FILE__, __LINE__, tr->journal_start);\ }\ - if (tr->journal_end < 0 || tr->journal_end > 1024*1024*1024) {\ + if (tr->journal_end < 0) {\ panic("%s:%d: tr journal end looks bad: 0x%llx\n", __FILE__, __LINE__, tr->journal_end);\ }\ if (tr->blhdr && (tr->blhdr->max_blocks <= 0 || tr->blhdr->max_blocks > (tr->jnl->jhdr->size/tr->jnl->jhdr->jhdr_size))) {\ @@ -291,7 +288,7 @@ do_journal_io(journal *jnl, off_t *offset, void *data, size_t len, int direction } if (curlen <= 0) { - panic("jnl: do_jnl_io: curlen == %d, offset 0x%llx len %lu\n", curlen, *offset, len); + panic("jnl: do_jnl_io: curlen == %d, offset 0x%llx len %zd\n", curlen, *offset, len); } if (*offset == 0 && (direction & JNL_HEADER) == 0) { @@ -315,10 +312,12 @@ do_journal_io(journal *jnl, off_t *offset, void *data, size_t len, int direction buf_markfua(bp); } + DTRACE_IO1(journal__start, buf_t, bp); err = VNOP_STRATEGY(bp); if (!err) { err = (int)buf_biowait(bp); } + DTRACE_IO1(journal__done, buf_t, bp); free_io_buf(bp); if (err) { @@ -363,7 +362,7 @@ read_journal_header(journal *jnl, void *data, size_t len) } static int -write_journal_header(journal *jnl) +write_journal_header(journal *jnl, int updating_start) { static int num_err_prints = 0; int ret=0; @@ -376,7 +375,7 @@ write_journal_header(journal *jnl) // Flush the track cache if we're not doing force-unit-access // writes. // - if ((jnl->flags & JOURNAL_DO_FUA_WRITES) == 0) { + if (!updating_start && (jnl->flags & JOURNAL_DO_FUA_WRITES) == 0) { ret = VNOP_IOCTL(jnl->jdev, DKIOCSYNCHRONIZECACHE, NULL, FWRITE, &context); } if (ret != 0) { @@ -416,7 +415,7 @@ write_journal_header(journal *jnl) // on an IDE bus analyzer with Larry Barras so while it // may seem obscure, it's not. // - if ((jnl->flags & JOURNAL_DO_FUA_WRITES) == 0) { + if (updating_start && (jnl->flags & JOURNAL_DO_FUA_WRITES) == 0) { VNOP_IOCTL(jnl->jdev, DKIOCSYNCHRONIZECACHE, NULL, FWRITE, &context); } @@ -498,7 +497,7 @@ buffer_flushed_callback(struct buf *bp, void *arg) // // OSAddAtomic() returns the value of tr->num_flushed before the add // - amt_flushed += OSAddAtomic(bufsize, (SInt32 *)&tr->num_flushed); + amt_flushed += OSAddAtomic(bufsize, &tr->num_flushed); // if this transaction isn't done yet, just return as @@ -664,8 +663,8 @@ swap_block_list_header(journal *jnl, block_list_header *blhdr) for(i=0; i < blhdr->num_blocks; i++) { blhdr->binfo[i].bnum = SWAP64(blhdr->binfo[i].bnum); - blhdr->binfo[i].bsize = SWAP32(blhdr->binfo[i].bsize); - blhdr->binfo[i].b.cksum = SWAP32(blhdr->binfo[i].b.cksum); + blhdr->binfo[i].u.bi.bsize = SWAP32(blhdr->binfo[i].u.bi.bsize); + blhdr->binfo[i].u.bi.b.cksum = SWAP32(blhdr->binfo[i].u.bi.b.cksum); } } @@ -837,11 +836,11 @@ insert_block(journal *jnl, struct bucket **buf_ptr, int blk_index, off_t num, si } // sanity check the values we're about to add - if (offset >= jnl->jhdr->size) { + if ((off_t)offset >= jnl->jhdr->size) { offset = jnl->jhdr->jhdr_size + (offset - jnl->jhdr->size); } if (size <= 0) { - panic("jnl: insert_block: bad size in insert_block (%lu)\n", size); + panic("jnl: insert_block: bad size in insert_block (%zd)\n", size); } (*buf_ptr)[blk_index].block_num = num; @@ -870,7 +869,7 @@ do_overlap(journal *jnl, struct bucket **buf_ptr, int blk_index, off_t block_num overlap = prev_block_end - block_start; if (overlap > 0) { if (overlap % jhdr_size != 0) { - panic("jnl: do_overlap: overlap with previous entry not a multiple of %lu\n", jhdr_size); + panic("jnl: do_overlap: overlap with previous entry not a multiple of %zd\n", jhdr_size); } // if the previous entry completely overlaps this one, we need to break it into two pieces. @@ -893,9 +892,9 @@ do_overlap(journal *jnl, struct bucket **buf_ptr, int blk_index, off_t block_num } // then, bail out fast if there's no overlap with the entries that follow - if (!overwrite && block_end <= (*buf_ptr)[blk_index].block_num*jhdr_size) { + if (!overwrite && block_end <= (off_t)((*buf_ptr)[blk_index].block_num*jhdr_size)) { return 0; // no overlap, no overwrite - } else if (overwrite && (blk_index + 1 >= *num_full_ptr || block_end <= (*buf_ptr)[blk_index+1].block_num*jhdr_size)) { + } else if (overwrite && (blk_index + 1 >= *num_full_ptr || block_end <= (off_t)((*buf_ptr)[blk_index+1].block_num*jhdr_size))) { (*buf_ptr)[blk_index].cksum = cksum; // update this return 1; // simple overwrite @@ -907,15 +906,15 @@ do_overlap(journal *jnl, struct bucket **buf_ptr, int blk_index, off_t block_num // entries must be adjusted to keep the array consistent. index = blk_index; num_to_remove = 0; - while(index < *num_full_ptr && block_end > (*buf_ptr)[index].block_num*jhdr_size) { - if (block_end >= ((*buf_ptr)[index].block_num*jhdr_size + (*buf_ptr)[index].block_size)) { + while(index < *num_full_ptr && block_end > (off_t)((*buf_ptr)[index].block_num*jhdr_size)) { + if (block_end >= (off_t)(((*buf_ptr)[index].block_num*jhdr_size + (*buf_ptr)[index].block_size))) { (*buf_ptr)[index].block_num = -2; // mark this for deletion num_to_remove++; } else { overlap = block_end - (*buf_ptr)[index].block_num*jhdr_size; if (overlap > 0) { if (overlap % jhdr_size != 0) { - panic("jnl: do_overlap: overlap of %lld is not multiple of %lu\n", overlap, jhdr_size); + panic("jnl: do_overlap: overlap of %lld is not multiple of %zd\n", overlap, jhdr_size); } // if we partially overlap this entry, adjust its block number, jnl offset, and size @@ -923,14 +922,14 @@ do_overlap(journal *jnl, struct bucket **buf_ptr, int blk_index, off_t block_num (*buf_ptr)[index].cksum = 0; new_offset = (*buf_ptr)[index].jnl_offset + overlap; // check for wrap-around - if (new_offset >= jnl->jhdr->size) { + if ((off_t)new_offset >= jnl->jhdr->size) { new_offset = jhdr_size + (new_offset - jnl->jhdr->size); } (*buf_ptr)[index].jnl_offset = new_offset; (*buf_ptr)[index].block_size -= overlap; // sanity check for negative value if ((*buf_ptr)[index].block_size <= 0) { - panic("jnl: do_overlap: after overlap, new block size is invalid (%lu)\n", (*buf_ptr)[index].block_size); + panic("jnl: do_overlap: after overlap, new block size is invalid (%u)\n", (*buf_ptr)[index].block_size); // return -1; // if above panic is removed, return -1 for error } } @@ -1105,25 +1104,25 @@ replay_journal(journal *jnl) } if ( (last_sequence_num != 0) - && (blhdr->binfo[0].b.sequence_num != 0) - && (blhdr->binfo[0].b.sequence_num != last_sequence_num) - && (blhdr->binfo[0].b.sequence_num != last_sequence_num+1)) { + && (blhdr->binfo[0].u.bi.b.sequence_num != 0) + && (blhdr->binfo[0].u.bi.b.sequence_num != last_sequence_num) + && (blhdr->binfo[0].u.bi.b.sequence_num != last_sequence_num+1)) { txn_start_offset = jnl->jhdr->end = blhdr_offset; if (check_past_jnl_end) { check_past_jnl_end = 0; printf("jnl: %s: 2: extra replay stopped @ %lld / 0x%llx (seq %d < %d)\n", - jnl->jdev_name, blhdr_offset, blhdr_offset, blhdr->binfo[0].b.sequence_num, last_sequence_num); + jnl->jdev_name, blhdr_offset, blhdr_offset, blhdr->binfo[0].u.bi.b.sequence_num, last_sequence_num); continue; } printf("jnl: %s: txn sequence numbers out of order in txn @ %lld / %llx! (%d < %d)\n", - jnl->jdev_name, blhdr_offset, blhdr_offset, blhdr->binfo[0].b.sequence_num, last_sequence_num); + jnl->jdev_name, blhdr_offset, blhdr_offset, blhdr->binfo[0].u.bi.b.sequence_num, last_sequence_num); bad_blocks = 1; goto bad_txn_handling; } - last_sequence_num = blhdr->binfo[0].b.sequence_num; + last_sequence_num = blhdr->binfo[0].u.bi.b.sequence_num; if (blhdr_offset >= jnl->jhdr->end && jnl->jhdr->start <= jnl->jhdr->end) { if (last_sequence_num == 0) { @@ -1138,7 +1137,7 @@ replay_journal(journal *jnl) printf("jnl: %s: examining extra transactions starting @ %lld / 0x%llx\n", jnl->jdev_name, blhdr_offset, blhdr_offset); } - if ( blhdr->max_blocks <= 0 || blhdr->max_blocks > 2048 + if ( blhdr->max_blocks <= 0 || blhdr->max_blocks > (jnl->jhdr->size/jnl->jhdr->jhdr_size) || blhdr->num_blocks <= 0 || blhdr->num_blocks > blhdr->max_blocks) { printf("jnl: %s: replay_journal: bad looking journal entry: max: %d num: %d\n", jnl->jdev_name, blhdr->max_blocks, blhdr->num_blocks); @@ -1154,8 +1153,8 @@ replay_journal(journal *jnl) goto bad_txn_handling; } - if (blhdr->binfo[i].bsize > max_bsize) { - max_bsize = blhdr->binfo[i].bsize; + if ((size_t)blhdr->binfo[i].u.bi.bsize > max_bsize) { + max_bsize = blhdr->binfo[i].u.bi.bsize; } } @@ -1179,7 +1178,7 @@ replay_journal(journal *jnl) int size, ret_val; off_t number; - size = blhdr->binfo[i].bsize; + size = blhdr->binfo[i].u.bi.bsize; number = blhdr->binfo[i].bnum; // don't add "killed" blocks @@ -1205,9 +1204,9 @@ replay_journal(journal *jnl) // there is no need to swap the checksum from disk because // it got swapped when the blhdr was read in. - if (blhdr->binfo[i].b.cksum != 0 && disk_cksum != blhdr->binfo[i].b.cksum) { + if (blhdr->binfo[i].u.bi.b.cksum != 0 && disk_cksum != blhdr->binfo[i].u.bi.b.cksum) { printf("jnl: %s: txn starting at %lld (%lld) @ index %3d bnum %lld (%d) with disk cksum != blhdr cksum (0x%.8x 0x%.8x)\n", - jnl->jdev_name, txn_start_offset, blhdr_offset, i, number, size, disk_cksum, blhdr->binfo[i].b.cksum); + jnl->jdev_name, txn_start_offset, blhdr_offset, i, number, size, disk_cksum, blhdr->binfo[i].u.bi.b.cksum); printf("jnl: 0x%.8x 0x%.8x 0x%.8x 0x%.8x 0x%.8x 0x%.8x 0x%.8x 0x%.8x\n", *(int *)&block_ptr[0*sizeof(int)], *(int *)&block_ptr[1*sizeof(int)], *(int *)&block_ptr[2*sizeof(int)], *(int *)&block_ptr[3*sizeof(int)], *(int *)&block_ptr[4*sizeof(int)], *(int *)&block_ptr[5*sizeof(int)], *(int *)&block_ptr[6*sizeof(int)], *(int *)&block_ptr[7*sizeof(int)]); @@ -1220,7 +1219,7 @@ replay_journal(journal *jnl) // add this bucket to co_buf, coalescing where possible // printf("jnl: replay_journal: adding block 0x%llx\n", number); - ret_val = add_block(jnl, &co_buf, number, size, (size_t) offset, blhdr->binfo[i].b.cksum, &num_buckets, &num_full); + ret_val = add_block(jnl, &co_buf, number, size, (size_t) offset, blhdr->binfo[i].u.bi.b.cksum, &num_buckets, &num_full); if (ret_val == -1) { printf("jnl: %s: replay_journal: trouble adding block to co_buf\n", jnl->jdev_name); @@ -1330,7 +1329,7 @@ replay_journal(journal *jnl) // done replaying; update jnl header - if (write_journal_header(jnl) != 0) { + if (write_journal_header(jnl, 1) != 0) { goto bad_replay; } @@ -1363,8 +1362,7 @@ replay_journal(journal *jnl) #define DEFAULT_TRANSACTION_BUFFER_SIZE (128*1024) -//#define DEFAULT_TRANSACTION_BUFFER_SIZE (256*1024) // better performance but uses more mem -#define MAX_TRANSACTION_BUFFER_SIZE (512*1024) +#define MAX_TRANSACTION_BUFFER_SIZE (2048*1024) // XXXdbg - so I can change it in the debugger int def_tbuffer_size = 0; @@ -1389,8 +1387,8 @@ size_up_tbuffer(journal *jnl, int tbuffer_size, int phys_blksz) def_tbuffer_size = DEFAULT_TRANSACTION_BUFFER_SIZE * 2; } else if (mem_size < (1024*1024*1024)) { def_tbuffer_size = DEFAULT_TRANSACTION_BUFFER_SIZE * 3; - } else if (mem_size >= (1024*1024*1024)) { - def_tbuffer_size = DEFAULT_TRANSACTION_BUFFER_SIZE * 4; + } else { + def_tbuffer_size = DEFAULT_TRANSACTION_BUFFER_SIZE * (mem_size / (256*1024*1024)); } } @@ -1435,8 +1433,9 @@ get_io_info(struct vnode *devvp, size_t phys_blksz, journal *jnl, struct vfs_con { off_t readblockcnt; off_t writeblockcnt; - off_t readmaxcnt; - off_t writemaxcnt; + off_t readmaxcnt=0, tmp_readmaxcnt; + off_t writemaxcnt=0, tmp_writemaxcnt; + off_t readsegcnt, writesegcnt; int32_t features; if (VNOP_IOCTL(devvp, DKIOCGETFEATURES, (caddr_t)&features, 0, context) == 0) { @@ -1447,39 +1446,64 @@ get_io_info(struct vnode *devvp, size_t phys_blksz, journal *jnl, struct vfs_con } } - if (VNOP_IOCTL(devvp, DKIOCGETMAXBYTECOUNTREAD, (caddr_t)&readmaxcnt, 0, context)) { - readmaxcnt = 0; - } + // + // First check the max read size via several different mechanisms... + // + VNOP_IOCTL(devvp, DKIOCGETMAXBYTECOUNTREAD, (caddr_t)&readmaxcnt, 0, context); + if (VNOP_IOCTL(devvp, DKIOCGETMAXBLOCKCOUNTREAD, (caddr_t)&readblockcnt, 0, context) == 0) { + tmp_readmaxcnt = readblockcnt * phys_blksz; + if (readmaxcnt == 0 || (readblockcnt > 0 && tmp_readmaxcnt < readmaxcnt)) { + readmaxcnt = tmp_readmaxcnt; + } + } + + if (VNOP_IOCTL(devvp, DKIOCGETMAXSEGMENTCOUNTREAD, (caddr_t)&readsegcnt, 0, context)) { + readsegcnt = 0; + } + + if (readsegcnt > 0 && (readsegcnt * PAGE_SIZE) < readmaxcnt) { + readmaxcnt = readsegcnt * PAGE_SIZE; + } + if (readmaxcnt == 0) { - if (VNOP_IOCTL(devvp, DKIOCGETMAXBLOCKCOUNTREAD, (caddr_t)&readblockcnt, 0, context)) { readmaxcnt = 128 * 1024; - } else { - readmaxcnt = readblockcnt * phys_blksz; - } + } else if (readmaxcnt > UINT32_MAX) { + readmaxcnt = UINT32_MAX; + } + + + // + // Now check the max writes size via several different mechanisms... + // + VNOP_IOCTL(devvp, DKIOCGETMAXBYTECOUNTWRITE, (caddr_t)&writemaxcnt, 0, context); + + if (VNOP_IOCTL(devvp, DKIOCGETMAXBLOCKCOUNTWRITE, (caddr_t)&writeblockcnt, 0, context) == 0) { + tmp_writemaxcnt = writeblockcnt * phys_blksz; + if (writemaxcnt == 0 || (writeblockcnt > 0 && tmp_writemaxcnt < writemaxcnt)) { + writemaxcnt = tmp_writemaxcnt; + } } + if (VNOP_IOCTL(devvp, DKIOCGETMAXSEGMENTCOUNTWRITE, (caddr_t)&writesegcnt, 0, context)) { + writesegcnt = 0; + } - if (VNOP_IOCTL(devvp, DKIOCGETMAXBYTECOUNTWRITE, (caddr_t)&writemaxcnt, 0, context)) { - writemaxcnt = 0; + if (writesegcnt > 0 && (writesegcnt * PAGE_SIZE) < writemaxcnt) { + writemaxcnt = writesegcnt * PAGE_SIZE; } if (writemaxcnt == 0) { - if (VNOP_IOCTL(devvp, DKIOCGETMAXBLOCKCOUNTWRITE, (caddr_t)&writeblockcnt, 0, context)) { writemaxcnt = 128 * 1024; - } else { - writemaxcnt = writeblockcnt * phys_blksz; - } + } else if (writemaxcnt > UINT32_MAX) { + writemaxcnt = UINT32_MAX; } jnl->max_read_size = readmaxcnt; jnl->max_write_size = writemaxcnt; - - // just in case it's still zero... - if (jnl->max_read_size == 0) { - jnl->max_read_size = 128 * 1024; - jnl->max_write_size = 128 * 1024; - } + // printf("jnl: %s: max read/write: %lld k / %lld k\n", + // jnl->jdev_name ? jnl->jdev_name : "unknown", + // jnl->max_read_size/1024, jnl->max_write_size/1024); } @@ -1512,7 +1536,7 @@ journal_create(struct vnode *jvp, void *arg) { journal *jnl; - size_t phys_blksz; + uint32_t phys_blksz, new_txn_base; struct vfs_context context; const char *jdev_name; @@ -1526,14 +1550,19 @@ journal_create(struct vnode *jvp, return NULL; } + if (journal_size < (256*1024) || journal_size > (1024*1024*1024)) { + printf("jnl: create: journal size %lld looks bogus.\n", journal_size); + return NULL; + } + if (phys_blksz > min_fs_blksz) { - printf("jnl: %s: create: error: phys blksize %lu bigger than min fs blksize %lu\n", + printf("jnl: %s: create: error: phys blksize %u bigger than min fs blksize %zd\n", jdev_name, phys_blksz, min_fs_blksz); return NULL; } if ((journal_size % phys_blksz) != 0) { - printf("jnl: %s: create: journal size 0x%llx is not an even multiple of block size 0x%lx\n", + printf("jnl: %s: create: journal size 0x%llx is not an even multiple of block size 0x%ux\n", jdev_name, journal_size, phys_blksz); return NULL; } @@ -1554,13 +1583,60 @@ journal_create(struct vnode *jvp, get_io_info(jvp, phys_blksz, jnl, &context); if (kmem_alloc(kernel_map, (vm_offset_t *)&jnl->header_buf, phys_blksz)) { - printf("jnl: %s: create: could not allocate space for header buffer (%lu bytes)\n", jdev_name, phys_blksz); + printf("jnl: %s: create: could not allocate space for header buffer (%u bytes)\n", jdev_name, phys_blksz); goto bad_kmem_alloc; } + jnl->header_buf_size = phys_blksz; + + jnl->jhdr = (journal_header *)jnl->header_buf; + memset(jnl->jhdr, 0, sizeof(journal_header)); + + // we have to set this up here so that do_journal_io() will work + jnl->jhdr->jhdr_size = phys_blksz; + + // + // We try and read the journal header to see if there is already one + // out there. If there is, it's possible that it has transactions + // in it that we might replay if we happen to pick a sequence number + // that is a little less than the old one, there is a crash and the + // last txn written ends right at the start of a txn from the previous + // incarnation of this file system. If all that happens we would + // replay the transactions from the old file system and that would + // destroy your disk. Although it is extremely unlikely for all those + // conditions to happen, the probability is non-zero and the result is + // severe - you lose your file system. Therefore if we find a valid + // journal header and the sequence number is non-zero we write junk + // over the entire journal so that there is no way we will encounter + // any old transactions. This is slow but should be a rare event + // since most tools erase the journal. + // + if ( read_journal_header(jnl, jnl->jhdr, phys_blksz) == phys_blksz + && jnl->jhdr->magic == JOURNAL_HEADER_MAGIC + && jnl->jhdr->sequence_num != 0) { + + new_txn_base = (jnl->jhdr->sequence_num + (journal_size / phys_blksz) + (random() % 16384)) & 0x00ffffff; + printf("jnl: create: avoiding old sequence number 0x%x (0x%x)\n", jnl->jhdr->sequence_num, new_txn_base); + +#if 0 + int i; + off_t pos=0; + + for(i=1; i < journal_size / phys_blksz; i++) { + pos = i*phys_blksz; + + // we don't really care what data we write just so long + // as it's not a valid transaction header. since we have + // the header_buf sitting around we'll use that. + write_journal_data(jnl, &pos, jnl->header_buf, phys_blksz); + } + printf("jnl: create: done clearing journal (i=%d)\n", i); +#endif + } else { + new_txn_base = random() & 0x00ffffff; + } memset(jnl->header_buf, 0, phys_blksz); - jnl->jhdr = (journal_header *)jnl->header_buf; jnl->jhdr->magic = JOURNAL_HEADER_MAGIC; jnl->jhdr->endian = ENDIAN_MAGIC; jnl->jhdr->start = phys_blksz; // start at block #1, block #0 is for the jhdr itself @@ -1569,17 +1645,17 @@ journal_create(struct vnode *jvp, jnl->jhdr->jhdr_size = phys_blksz; size_up_tbuffer(jnl, tbuffer_size, phys_blksz); - jnl->active_start = jnl->jhdr->start; + jnl->active_start = jnl->jhdr->start; // XXXdbg - for testing you can force the journal to wrap around // jnl->jhdr->start = jnl->jhdr->size - (phys_blksz*3); // jnl->jhdr->end = jnl->jhdr->size - (phys_blksz*3); - jnl->jhdr->sequence_num = random() & 0x00ffffff; + jnl->jhdr->sequence_num = new_txn_base; - lck_mtx_init(&jnl->jlock, jnl_mutex_group, jnl_lock_attr); + lck_mtx_init(&jnl->jlock, jnl_mutex_group, jnl_lock_attr); - if (write_journal_header(jnl) != 0) { + if (write_journal_header(jnl, 1) != 0) { printf("jnl: %s: journal_create: failed to write journal header.\n", jdev_name); goto bad_write; } @@ -1611,8 +1687,8 @@ journal_open(struct vnode *jvp, void *arg) { journal *jnl; - int orig_blksz=0; - size_t phys_blksz; + uint32_t orig_blksz=0; + uint32_t phys_blksz; int orig_checksum, checksum; struct vfs_context context; const char *jdev_name = get_jdev_name(jvp); @@ -1626,13 +1702,18 @@ journal_open(struct vnode *jvp, } if (phys_blksz > min_fs_blksz) { - printf("jnl: %s: open: error: phys blksize %lu bigger than min fs blksize %lu\n", + printf("jnl: %s: open: error: phys blksize %u bigger than min fs blksize %zd\n", jdev_name, phys_blksz, min_fs_blksz); return NULL; } + if (journal_size < (256*1024) || journal_size > (1024*1024*1024)) { + printf("jnl: open: journal size %lld looks bogus.\n", journal_size); + return NULL; + } + if ((journal_size % phys_blksz) != 0) { - printf("jnl: %s: open: journal size 0x%llx is not an even multiple of block size 0x%lx\n", + printf("jnl: %s: open: journal size 0x%llx is not an even multiple of block size 0x%x\n", jdev_name, journal_size, phys_blksz); return NULL; } @@ -1652,9 +1733,10 @@ journal_open(struct vnode *jvp, get_io_info(jvp, phys_blksz, jnl, &context); if (kmem_alloc(kernel_map, (vm_offset_t *)&jnl->header_buf, phys_blksz)) { - printf("jnl: %s: create: could not allocate space for header buffer (%lu bytes)\n", jdev_name, phys_blksz); + printf("jnl: %s: create: could not allocate space for header buffer (%u bytes)\n", jdev_name, phys_blksz); goto bad_kmem_alloc; } + jnl->header_buf_size = phys_blksz; jnl->jhdr = (journal_header *)jnl->header_buf; memset(jnl->jhdr, 0, sizeof(journal_header)); @@ -1663,7 +1745,7 @@ journal_open(struct vnode *jvp, jnl->jhdr->jhdr_size = phys_blksz; if (read_journal_header(jnl, jnl->jhdr, phys_blksz) != phys_blksz) { - printf("jnl: %s: open: could not read %lu bytes for the journal header.\n", + printf("jnl: %s: open: could not read %u bytes for the journal header.\n", jdev_name, phys_blksz); goto bad_journal; } @@ -1713,16 +1795,15 @@ journal_open(struct vnode *jvp, */ if (jnl->jhdr->start == jnl->jhdr->end) { - int err; - printf("jnl: %s: open: changing journal header size from %d to %lu\n", + printf("jnl: %s: open: changing journal header size from %d to %u\n", jdev_name, jnl->jhdr->jhdr_size, phys_blksz); jnl->jhdr->jhdr_size = phys_blksz; - if (write_journal_header(jnl)) { + if (write_journal_header(jnl, 1)) { printf("jnl: %s: open: failed to update journal header size\n", jdev_name); goto bad_journal; } } else { - printf("jnl: %s: open: phys_blksz %lu does not match journal header size %d, and journal is not empty!\n", + printf("jnl: %s: open: phys_blksz %u does not match journal header size %d, and journal is not empty!\n", jdev_name, phys_blksz, jnl->jhdr->jhdr_size); goto bad_journal; } @@ -1744,7 +1825,7 @@ journal_open(struct vnode *jvp, goto bad_journal; } - if (jnl->jhdr->size > 1024*1024*1024) { + if (jnl->jhdr->size < (256*1024) || jnl->jhdr->size > 1024*1024*1024) { printf("jnl: %s: open: jhdr size looks bad (0x%llx)\n", jdev_name, jnl->jhdr->size); goto bad_journal; } @@ -1781,7 +1862,7 @@ journal_open(struct vnode *jvp, if (orig_blksz != 0) { VNOP_IOCTL(jvp, DKIOCSETBLOCKSIZE, (caddr_t)&orig_blksz, FWRITE, &context); phys_blksz = orig_blksz; - if (orig_blksz < jnl->jhdr->jhdr_size) { + if (orig_blksz < (uint32_t)jnl->jhdr->jhdr_size) { printf("jnl: %s: open: jhdr_size is %d but orig phys blk size is %d. switching.\n", jdev_name, jnl->jhdr->jhdr_size, orig_blksz); @@ -1795,6 +1876,12 @@ journal_open(struct vnode *jvp, // set this now, after we've replayed the journal size_up_tbuffer(jnl, tbuffer_size, phys_blksz); + if ((off_t)(jnl->jhdr->blhdr_size/sizeof(block_info)-1) > (jnl->jhdr->size/jnl->jhdr->jhdr_size)) { + printf("jnl: %s: open: jhdr size and blhdr size are not compatible (0x%llx, %d, %d)\n", jdev_name, jnl->jhdr->size, + jnl->jhdr->blhdr_size, jnl->jhdr->jhdr_size); + goto bad_journal; + } + lck_mtx_init(&jnl->jlock, jnl_mutex_group, jnl_lock_attr); return jnl; @@ -1822,7 +1909,8 @@ journal_is_clean(struct vnode *jvp, size_t min_fs_block_size) { journal jnl; - int phys_blksz, ret; + uint32_t phys_blksz; + int ret; int orig_checksum, checksum; struct vfs_context context; const char *jdev_name = get_jdev_name(jvp); @@ -1836,12 +1924,17 @@ journal_is_clean(struct vnode *jvp, return EINVAL; } - if (phys_blksz > (int)min_fs_block_size) { - printf("jnl: %s: is_clean: error: phys blksize %d bigger than min fs blksize %lu\n", + if (phys_blksz > (uint32_t)min_fs_block_size) { + printf("jnl: %s: is_clean: error: phys blksize %d bigger than min fs blksize %zd\n", jdev_name, phys_blksz, min_fs_block_size); return EINVAL; } + if (journal_size < (256*1024) || journal_size > (1024*1024*1024)) { + printf("jnl: is_clean: journal size %lld looks bogus.\n", journal_size); + return EINVAL; + } + if ((journal_size % phys_blksz) != 0) { printf("jnl: %s: is_clean: journal size 0x%llx is not an even multiple of block size 0x%x\n", jdev_name, journal_size, phys_blksz); @@ -1854,6 +1947,7 @@ journal_is_clean(struct vnode *jvp, printf("jnl: %s: is_clean: could not allocate space for header buffer (%d bytes)\n", jdev_name, phys_blksz); return ENOMEM; } + jnl.header_buf_size = phys_blksz; get_io_info(jvp, phys_blksz, &jnl, &context); @@ -1907,7 +2001,7 @@ journal_is_clean(struct vnode *jvp, if (jnl.jhdr->start == jnl.jhdr->end) { ret = 0; } else { - ret = EINVAL; + ret = EBUSY; // so the caller can differentiate an invalid journal from a "busy" one } get_out: @@ -1977,7 +2071,7 @@ journal_close(journal *jnl) jnl->jhdr->start = jnl->active_start; // if this fails there's not much we can do at this point... - write_journal_header(jnl); + write_journal_header(jnl, 1); } else { // if we're here the journal isn't valid any more. // so make sure we don't leave any locked blocks lying around @@ -2001,7 +2095,7 @@ journal_close(journal *jnl) free_old_stuff(jnl); - kmem_free(kernel_map, (vm_offset_t)jnl->header_buf, jnl->jhdr->jhdr_size); + kmem_free(kernel_map, (vm_offset_t)jnl->header_buf, jnl->header_buf_size); jnl->jhdr = (void *)0xbeefbabe; if (jnl->jdev_name) { @@ -2116,7 +2210,7 @@ check_free_space(journal *jnl, int desired_size) jnl->old_start[i] = 0; if (free_space(jnl) > desired_size) { unlock_oldstart(jnl); - write_journal_header(jnl); + write_journal_header(jnl, 1); lock_oldstart(jnl); break; } @@ -2137,7 +2231,7 @@ check_free_space(journal *jnl, int desired_size) // start of the loop. // jnl->jhdr->start = jnl->active_start; - write_journal_header(jnl); + write_journal_header(jnl, 1); continue; } @@ -2298,9 +2392,50 @@ journal_modify_block_start(journal *jnl, struct buf *bp) // can't allow blocks that aren't an even multiple of the // underlying block size. if ((buf_size(bp) % jnl->jhdr->jhdr_size) != 0) { + uint32_t phys_blksz, bad=0; + + if (VNOP_IOCTL(jnl->jdev, DKIOCGETBLOCKSIZE, (caddr_t)&phys_blksz, 0, vfs_context_kernel())) { + bad = 1; + } else if (phys_blksz != (uint32_t)jnl->jhdr->jhdr_size) { + if (phys_blksz < 512) { + panic("jnl: mod block start: phys blksz %d is too small (%d, %d)\n", + phys_blksz, buf_size(bp), jnl->jhdr->jhdr_size); + } + + if ((buf_size(bp) % phys_blksz) != 0) { + bad = 1; + } else if (phys_blksz < (uint32_t)jnl->jhdr->jhdr_size) { + jnl->jhdr->jhdr_size = phys_blksz; + } else { + // the phys_blksz is now larger... need to realloc the jhdr + char *new_header_buf; + + printf("jnl: %s: phys blksz got bigger (was: %d/%d now %d)\n", + jnl->jdev_name, jnl->header_buf_size, jnl->jhdr->jhdr_size, phys_blksz); + if (kmem_alloc(kernel_map, (vm_offset_t *)&new_header_buf, phys_blksz)) { + printf("jnl: modify_block_start: %s: create: phys blksz change (was %d, now %d) but could not allocate space for new header\n", + jnl->jdev_name, jnl->jhdr->jhdr_size, phys_blksz); + bad = 1; + } else { + memcpy(new_header_buf, jnl->header_buf, jnl->header_buf_size); + memset(&new_header_buf[jnl->header_buf_size], 0x18, (phys_blksz - jnl->header_buf_size)); + kmem_free(kernel_map, (vm_offset_t)jnl->header_buf, jnl->header_buf_size); + jnl->header_buf = new_header_buf; + jnl->header_buf_size = phys_blksz; + + jnl->jhdr = (journal_header *)jnl->header_buf; + jnl->jhdr->jhdr_size = phys_blksz; + } + } + } else { + bad = 1; + } + + if (bad) { panic("jnl: mod block start: bufsize %d not a multiple of block size %d\n", buf_size(bp), jnl->jhdr->jhdr_size); return -1; + } } // make sure that this transaction isn't bigger than the whole journal @@ -2368,11 +2503,7 @@ journal_modify_block_abort(journal *jnl, struct buf *bp) // first check if it's already part of this transaction for(blhdr=tr->blhdr; blhdr; blhdr=(block_list_header *)((long)blhdr->binfo[0].bnum)) { for(i=1; i < blhdr->num_blocks; i++) { - if (bp == blhdr->binfo[i].b.bp) { - if (buf_size(bp) != blhdr->binfo[i].bsize) { - panic("jnl: bp @ %p changed size on me! (%d vs. %lu, jnl %p)\n", - bp, buf_size(bp), blhdr->binfo[i].bsize, jnl); - } + if (bp == blhdr->binfo[i].u.bp) { break; } } @@ -2435,14 +2566,14 @@ journal_modify_block_end(journal *jnl, struct buf *bp, void (*func)(struct buf * tbuffer_offset = jnl->jhdr->blhdr_size; for(i=1; i < blhdr->num_blocks; i++) { - if (bp == blhdr->binfo[i].b.bp) { - if (buf_size(bp) != blhdr->binfo[i].bsize) { - panic("jnl: bp @ %p changed size on me! (%d vs. %lu, jnl %p)\n", - bp, buf_size(bp), blhdr->binfo[i].bsize, jnl); - } + if (bp == blhdr->binfo[i].u.bp) { break; } - tbuffer_offset += blhdr->binfo[i].bsize; + if (blhdr->binfo[i].bnum != (off_t)-1) { + tbuffer_offset += buf_size(blhdr->binfo[i].u.bp); + } else { + tbuffer_offset += blhdr->binfo[i].u.bi.bsize; + } } if (i < blhdr->num_blocks) { @@ -2521,15 +2652,14 @@ journal_modify_block_end(journal *jnl, struct buf *bp, void (*func)(struct buf * vnode_ref(vp); bsize = buf_size(bp); - blhdr->binfo[i].bnum = (off_t)(buf_blkno(bp)); - blhdr->binfo[i].bsize = bsize; - blhdr->binfo[i].b.bp = bp; + blhdr->binfo[i].bnum = (off_t)(buf_blkno(bp)); + blhdr->binfo[i].u.bp = bp; if (func) { void *old_func=NULL, *old_arg=NULL; buf_setfilter(bp, func, arg, &old_func, &old_arg); - if (old_func != NULL) { - panic("jnl: modify_block_end: old func %p / arg %p", old_func, old_arg); + if (old_func != NULL && old_func != func) { + panic("jnl: modify_block_end: old func %p / arg %p (func %p)", old_func, old_arg, func); } } @@ -2579,7 +2709,7 @@ journal_kill_block(journal *jnl, struct buf *bp) for(blhdr=tr->blhdr; blhdr; blhdr=(block_list_header *)((long)blhdr->binfo[0].bnum)) { for(i=1; i < blhdr->num_blocks; i++) { - if (bp == blhdr->binfo[i].b.bp) { + if (bp == blhdr->binfo[i].u.bp) { vnode_t vp; buf_clearflags(bp, B_LOCKED); @@ -2598,8 +2728,9 @@ journal_kill_block(journal *jnl, struct buf *bp) //} else { tr->num_killed += buf_size(bp); //} - blhdr->binfo[i].b.bp = NULL; blhdr->binfo[i].bnum = (off_t)-1; + blhdr->binfo[i].u.bp = NULL; + blhdr->binfo[i].u.bi.bsize = buf_size(bp); buf_markinvalid(bp); buf_brelse(bp); @@ -2624,17 +2755,17 @@ journal_binfo_cmp(const void *a, const void *b) const block_info *bi_b = (const struct block_info *)b; daddr64_t res; - if (bi_a->b.bp == NULL) { + if (bi_a->bnum == (off_t)-1) { return 1; } - if (bi_b->b.bp == NULL) { + if (bi_b->bnum == (off_t)-1) { return -1; } // don't have to worry about negative block // numbers so this is ok to do. // - res = (buf_blkno(bi_a->b.bp) - buf_blkno(bi_b->b.bp)); + res = (buf_blkno(bi_a->u.bp) - buf_blkno(bi_b->u.bp)); return (int)res; } @@ -2752,7 +2883,9 @@ end_transaction(transaction *tr, int force_it, errno_t (*callback)(void*), void // slide everyone else down and put our latest guy in the last // entry in the old_start array // - memcpy(&jnl->old_start[0], &jnl->old_start[1], sizeof(jnl->old_start)-sizeof(jnl->old_start[0])); + + /* Because old_start is locked above, we can cast away the volatile qualifier before passing it to memcpy. */ + memcpy(__CAST_AWAY_QUALIFIER(&jnl->old_start[0], volatile, void *), __CAST_AWAY_QUALIFIER(&jnl->old_start[1], volatile, void *), sizeof(jnl->old_start)-sizeof(jnl->old_start[0])); jnl->old_start[sizeof(jnl->old_start)/sizeof(jnl->old_start[0]) - 1] = tr->journal_start | 0x8000000000000000LL; unlock_oldstart(jnl); @@ -2768,15 +2901,20 @@ end_transaction(transaction *tr, int force_it, errno_t (*callback)(void*), void daddr64_t lblkno; struct vnode *vp; - bp = blhdr->binfo[i].b.bp; + bp = blhdr->binfo[i].u.bp; // if this block has a callback function set, call // it now and then copy the data from the bp into // the journal. - if (bp) { + if (blhdr->binfo[i].bnum != (off_t)-1) { void (*func)(struct buf *, void *); void *arg; + if (bp == NULL) { + panic("jnl: inconsistent binfo (NULL bp w/bnum %lld; jnl @ %p, tr %p)\n", + blhdr->binfo[i].bnum, jnl, tr); + } + buf_setfilter(bp, NULL, NULL, (void **)&func, &arg); if (func) { @@ -2803,17 +2941,13 @@ end_transaction(transaction *tr, int force_it, errno_t (*callback)(void*), void } } - } else { // bp == NULL, only true if a block was "killed" - if (blhdr->binfo[i].bnum != (off_t)-1) { - panic("jnl: inconsistent binfo (NULL bp w/bnum %lld; jnl @ %p, tr %p)\n", - blhdr->binfo[i].bnum, jnl, tr); - } + } else { // bnum == -1, only true if a block was "killed" - tbuffer_offset += blhdr->binfo[i].bsize; + tbuffer_offset += blhdr->binfo[i].u.bi.bsize; continue; } - tbuffer_offset += blhdr->binfo[i].bsize; + tbuffer_offset += buf_size(bp); vp = buf_vnode(bp); blkno = buf_blkno(bp); @@ -2859,33 +2993,41 @@ end_transaction(transaction *tr, int force_it, errno_t (*callback)(void*), void for(blhdr=tr->blhdr; blhdr; blhdr=(block_list_header *)((long)blhdr->binfo[0].bnum)) { amt = blhdr->bytes_used; - blhdr->binfo[0].b.sequence_num = tr->sequence_num; + blhdr->binfo[0].u.bi.b.sequence_num = tr->sequence_num; blhdr->checksum = 0; blhdr->checksum = calc_checksum((char *)blhdr, BLHDR_CHECKSUM_SIZE); if (kmem_alloc(kernel_map, (vm_offset_t *)&bparray, blhdr->num_blocks * sizeof(struct buf *))) { - panic("can't allocate %lu bytes for bparray\n", blhdr->num_blocks * sizeof(struct buf *)); + panic("can't allocate %zd bytes for bparray\n", blhdr->num_blocks * sizeof(struct buf *)); } // calculate individual block checksums tbuffer_offset = jnl->jhdr->blhdr_size; for(i=1; i < blhdr->num_blocks; i++) { - bparray[i] = blhdr->binfo[i].b.bp; - if (bparray[i]) { - blhdr->binfo[i].b.cksum = calc_checksum(&((char *)blhdr)[tbuffer_offset], blhdr->binfo[i].bsize); + int32_t bsize; + + if (blhdr->binfo[i].bnum != (off_t)-1) { + bparray[i] = blhdr->binfo[i].u.bp; + bsize = buf_size(bparray[i]); + blhdr->binfo[i].u.bi.bsize = bsize; + blhdr->binfo[i].u.bi.b.cksum = calc_checksum(&((char *)blhdr)[tbuffer_offset], bsize); } else { - blhdr->binfo[i].b.cksum = 0; + bparray[i] = NULL; + bsize = blhdr->binfo[i].u.bi.bsize; + blhdr->binfo[i].u.bi.b.cksum = 0; } - tbuffer_offset += blhdr->binfo[i].bsize; + tbuffer_offset += bsize; } ret = write_journal_data(jnl, &end, blhdr, amt); // always put the bp pointers back for(i=1; i < blhdr->num_blocks; i++) { - blhdr->binfo[i].b.bp = bparray[i]; + if (blhdr->binfo[i].bnum != (off_t)-1) { + blhdr->binfo[i].u.bp = bparray[i]; + } } kmem_free(kernel_map, (vm_offset_t)bparray, blhdr->num_blocks * sizeof(struct buf *)); @@ -2905,7 +3047,7 @@ end_transaction(transaction *tr, int force_it, errno_t (*callback)(void*), void tr->journal_start, tr->journal_end); } - if (write_journal_header(jnl) != 0) { + if (write_journal_header(jnl, 0) != 0) { goto bad_journal; } @@ -2939,11 +3081,11 @@ end_transaction(transaction *tr, int force_it, errno_t (*callback)(void*), void qsort(&blhdr->binfo[1], blhdr->num_blocks-1, sizeof(block_info), journal_binfo_cmp); for(i=1; i < blhdr->num_blocks; i++) { - if (blhdr->binfo[i].b.bp == NULL) { + if (blhdr->binfo[i].bnum == (off_t)-1) { continue; } - bp = blhdr->binfo[i].b.bp; + bp = blhdr->binfo[i].u.bp; // have to pass BAC_REMOVE here because we're going to bawrite() // the buffer when we're done @@ -3023,23 +3165,23 @@ abort_transaction(journal *jnl, transaction *tr) for(blhdr=tr->blhdr; blhdr; blhdr=next) { for(i=1; i < blhdr->num_blocks; i++) { - if (blhdr->binfo[i].b.bp == NULL) { + if (blhdr->binfo[i].bnum == (off_t)-1) { continue; } - if ( (buf_vnode(blhdr->binfo[i].b.bp) == NULL) || - !(buf_flags(blhdr->binfo[i].b.bp) & B_LOCKED) ) { + if ( (buf_vnode(blhdr->binfo[i].u.bp) == NULL) || + !(buf_flags(blhdr->binfo[i].u.bp) & B_LOCKED) ) { continue; } - errno = buf_meta_bread(buf_vnode(blhdr->binfo[i].b.bp), - buf_lblkno(blhdr->binfo[i].b.bp), - buf_size(blhdr->binfo[i].b.bp), + errno = buf_meta_bread(buf_vnode(blhdr->binfo[i].u.bp), + buf_lblkno(blhdr->binfo[i].u.bp), + buf_size(blhdr->binfo[i].u.bp), NOCRED, &bp); if (errno == 0) { - if (bp != blhdr->binfo[i].b.bp) { + if (bp != blhdr->binfo[i].u.bp) { panic("jnl: abort_tr: got back a different bp! (bp %p should be %p, jnl %p\n", - bp, blhdr->binfo[i].b.bp, jnl); + bp, blhdr->binfo[i].u.bp, jnl); } // releasing a bp marked invalid @@ -3052,7 +3194,7 @@ abort_transaction(journal *jnl, transaction *tr) vnode_rele_ext(save_vp, 0, 1); } else { printf("jnl: %s: abort_tr: could not find block %Ld vp %p!\n", - jnl->jdev_name, blhdr->binfo[i].bnum, blhdr->binfo[i].b.bp); + jnl->jdev_name, blhdr->binfo[i].bnum, blhdr->binfo[i].u.bp); if (bp) { buf_brelse(bp); } @@ -3337,3 +3479,112 @@ int journal_relocate(journal *jnl, off_t offset, off_t journal_size, int32_t tbu abort_transaction(jnl, tr); return ret; } + + +#else // !JOURNALING - so provide stub functions + +int journal_uses_fua(__unused journal *jnl) +{ + return 0; +} + +journal * +journal_create(__unused struct vnode *jvp, + __unused off_t offset, + __unused off_t journal_size, + __unused struct vnode *fsvp, + __unused size_t min_fs_blksz, + __unused int32_t flags, + __unused int32_t tbuffer_size, + __unused void (*flush)(void *arg), + __unused void *arg) +{ + return NULL; +} + +journal * +journal_open(__unused struct vnode *jvp, + __unused off_t offset, + __unused off_t journal_size, + __unused struct vnode *fsvp, + __unused size_t min_fs_blksz, + __unused int32_t flags, + __unused int32_t tbuffer_size, + __unused void (*flush)(void *arg), + __unused void *arg) +{ + return NULL; +} + + +int +journal_modify_block_start(__unused journal *jnl, __unused struct buf *bp) +{ + return EINVAL; +} + +int +journal_modify_block_end(__unused journal *jnl, + __unused struct buf *bp, + __unused void (*func)(struct buf *bp, void *arg), + __unused void *arg) +{ + return EINVAL; +} + +int +journal_kill_block(__unused journal *jnl, __unused struct buf *bp) +{ + return EINVAL; +} + +int journal_relocate(__unused journal *jnl, + __unused off_t offset, + __unused off_t journal_size, + __unused int32_t tbuffer_size, + __unused errno_t (*callback)(void *), + __unused void *callback_arg) +{ + return EINVAL; +} + +void +journal_close(__unused journal *jnl) +{ +} + +int +journal_start_transaction(__unused journal *jnl) +{ + return EINVAL; +} + +int +journal_end_transaction(__unused journal *jnl) +{ + return EINVAL; +} + +int +journal_flush(__unused journal *jnl) +{ + return EINVAL; +} + +int +journal_is_clean(__unused struct vnode *jvp, + __unused off_t offset, + __unused off_t journal_size, + __unused struct vnode *fsvp, + __unused size_t min_fs_block_size) +{ + return 0; +} + + +void * +journal_owner(__unused journal *jnl) +{ + return NULL; +} +#endif // !JOURNALING diff --git a/bsd/vfs/vfs_journal.h b/bsd/vfs/vfs_journal.h index e6461cc1a..c3e058b40 100644 --- a/bsd/vfs/vfs_journal.h +++ b/bsd/vfs/vfs_journal.h @@ -43,15 +43,21 @@ #include #include +typedef struct _blk_info { + int32_t bsize; + union { + int32_t cksum; + uint32_t sequence_num; + } b; +} _blk_info; + typedef struct block_info { off_t bnum; // block # on the file system device - size_t bsize; // in bytes union { - int32_t cksum; - uint32_t sequence_num; // only used in block_list_header->binfo[0] + _blk_info bi; struct buf *bp; - } b; -} block_info; + } u; +} __attribute__((__packed__)) block_info; typedef struct block_list_header { u_int16_t max_blocks; // max number of blocks in this chunk @@ -133,6 +139,7 @@ typedef struct journal { int32_t tbuffer_size; // default transaction buffer size char *header_buf; // in-memory copy of the journal header + int32_t header_buf_size; journal_header *jhdr; // points to the first byte of header_buf off_t max_read_size; diff --git a/bsd/vfs/vfs_lookup.c b/bsd/vfs/vfs_lookup.c index 0c5299ae6..553bb41f9 100644 --- a/bsd/vfs/vfs_lookup.c +++ b/bsd/vfs/vfs_lookup.c @@ -89,7 +89,7 @@ #include #include -#include +#include #if CONFIG_MACF #include @@ -160,8 +160,10 @@ namei(struct nameidata *ndp) struct componentname *cnp = &ndp->ni_cnd; vfs_context_t ctx = cnp->cn_context; proc_t p = vfs_context_proc(ctx); +#if CONFIG_AUDIT /* XXX ut should be from context */ uthread_t ut = (struct uthread *)get_bsdthread_info(current_thread()); +#endif char *tmppn; char uio_buf[ UIO_SIZEOF(1) ]; @@ -186,7 +188,9 @@ namei(struct nameidata *ndp) cnp->cn_pnlen = PATHBUFLEN; } #if LP64_DEBUG - if (IS_VALID_UIO_SEGFLG(ndp->ni_segflg) == 0) { + if ((UIO_SEG_IS_USER_SPACE(ndp->ni_segflg) == 0) + && (ndp->ni_segflg != UIO_SYSSPACE) + && (ndp->ni_segflg != UIO_SYSSPACE32)) { panic("%s :%d - invalid ni_segflg\n", __FILE__, __LINE__); } #endif /* LP64_DEBUG */ @@ -233,9 +237,15 @@ namei(struct nameidata *ndp) /* Attempt to resolve a legacy volfs style pathname. */ MALLOC_ZONE(realpath, caddr_t, MAXPATHLEN, M_NAMEI, M_WAITOK); if (realpath) { + /* + * We only error out on the ENAMETOOLONG cases where we know that + * vfs_getrealpath translation succeeded but the path could not fit into + * MAXPATHLEN characters. In other failure cases, we may be dealing with a path + * that legitimately looks like /.vol/1234/567 and is not meant to be translated + */ if ((realpath_err= vfs_getrealpath(&cnp->cn_pnbuf[6], realpath, MAXPATHLEN, ctx))) { FREE_ZONE(realpath, MAXPATHLEN, M_NAMEI); - if (realpath_err == ENOSPC){ + if (realpath_err == ENOSPC || realpath_err == ENAMETOOLONG){ error = ENAMETOOLONG; goto error_out; } @@ -250,13 +260,15 @@ namei(struct nameidata *ndp) } } } - #endif /* CONFIG_VOLFS */ +#endif /* CONFIG_VOLFS */ +#if CONFIG_AUDIT /* If we are auditing the kernel pathname, save the user pathname */ if (cnp->cn_flags & AUDITVNPATH1) AUDIT_ARG(upath, ut->uu_cdir, cnp->cn_pnbuf, ARG_UPATH1); if (cnp->cn_flags & AUDITVNPATH2) AUDIT_ARG(upath, ut->uu_cdir, cnp->cn_pnbuf, ARG_UPATH2); +#endif /* CONFIG_AUDIT */ /* * Do not allow empty pathnames @@ -299,7 +311,7 @@ namei(struct nameidata *ndp) for (;;) { int need_newpathbuf; - int linklen; + u_int linklen; ndp->ni_startdir = dp; @@ -312,10 +324,13 @@ namei(struct nameidata *ndp) if ((cnp->cn_flags & ISSYMLINK) == 0) { return (0); } +#ifndef __LP64__ if ((cnp->cn_flags & FSNODELOCKHELD)) { cnp->cn_flags &= ~FSNODELOCKHELD; unlock_fsnode(ndp->ni_dvp, NULL); } +#endif /* __LP64__ */ + if (ndp->ni_loopcnt++ >= MAXSYMLINKS) { error = ELOOP; break; @@ -348,8 +363,13 @@ namei(struct nameidata *ndp) FREE_ZONE(cp, MAXPATHLEN, M_NAMEI); break; } - // LP64todo - fix this - linklen = MAXPATHLEN - uio_resid(auio); + + /* + * Safe to set unsigned with a [larger] signed type here + * because 0 <= uio_resid <= MAXPATHLEN and MAXPATHLEN + * is only 1024. + */ + linklen = MAXPATHLEN - (u_int)uio_resid(auio); if (linklen + ndp->ni_pathlen > MAXPATHLEN) { if (need_newpathbuf) FREE_ZONE(cp, MAXPATHLEN, M_NAMEI); @@ -652,10 +672,12 @@ lookup(struct nameidata *ndp) if ((error == ENOENT) && (dp->v_flag & VROOT) && (dp->v_mount != NULL) && (dp->v_mount->mnt_flag & MNT_UNION)) { +#ifndef __LP64__ if ((cnp->cn_flags & FSNODELOCKHELD)) { cnp->cn_flags &= ~FSNODELOCKHELD; unlock_fsnode(dp, NULL); } +#endif /* __LP64__ */ tdp = dp; dp = tdp->v_mount->mnt_vnodecovered; @@ -721,12 +743,12 @@ lookup(struct nameidata *ndp) ndp->ni_pathlen -= cnp->cn_consume; cnp->cn_consume = 0; } else { + int isdot_or_dotdot; + isdot_or_dotdot = (cnp->cn_namelen == 1 && cnp->cn_nameptr[0] == '.') || (cnp->cn_flags & ISDOTDOT); + if (dp->v_name == NULL || dp->v_parent == NULLVP) { - int isdot_or_dotdot; int update_flags = 0; - isdot_or_dotdot = (cnp->cn_namelen == 1 && cnp->cn_nameptr[0] == '.') || (cnp->cn_flags & ISDOTDOT); - if (isdot_or_dotdot == 0) { if (dp->v_name == NULL) update_flags |= VNODE_UPDATE_NAME; @@ -751,7 +773,7 @@ lookup(struct nameidata *ndp) * rechecked behind the name cache lock, but if it * already fails to match, no need to go any further */ - if (ndp->ni_dvp != NULLVP && (nc_generation == ndp->ni_dvp->v_nc_generation)) + if (ndp->ni_dvp != NULLVP && (nc_generation == ndp->ni_dvp->v_nc_generation) && (!isdot_or_dotdot)) cache_enter_with_gen(ndp->ni_dvp, dp, cnp, nc_generation); } } @@ -823,8 +845,7 @@ lookup(struct nameidata *ndp) #if CONFIG_MACF if (vfs_flags(vnode_mount(dp)) & MNT_MULTILABEL) { - error = vnode_label(vnode_mount(dp), NULL, dp, NULL, - VNODE_LABEL_NEEDREF, ctx); + error = vnode_label(vnode_mount(dp), NULL, dp, NULL, 0, ctx); if (error) goto bad2; } @@ -834,7 +855,7 @@ lookup(struct nameidata *ndp) mp = mounted_on_dp->v_mountedhere; if (mp) { - mount_lock(mp); + mount_lock_spin(mp); mp->mnt_realrootvp_vid = dp->v_id; mp->mnt_realrootvp = dp; mp->mnt_generation = current_mount_generation; @@ -937,19 +958,17 @@ lookup(struct nameidata *ndp) case DELETE: if (cnp->cn_flags & CN_ALLOWRSRCFORK) { nsop = NS_DELETE; - } - else { + } else { error = EPERM; - goto bad; + goto bad2; } break; case CREATE: if (cnp->cn_flags & CN_ALLOWRSRCFORK) { nsop = NS_CREATE; - } - else { + } else { error = EPERM; - goto bad; + goto bad2; } break; case LOOKUP: @@ -978,10 +997,12 @@ lookup(struct nameidata *ndp) /* The "parent" of the stream is the file. */ if (wantparent) { if (ndp->ni_dvp) { +#ifndef __LP64__ if (ndp->ni_cnd.cn_flags & FSNODELOCKHELD) { ndp->ni_cnd.cn_flags &= ~FSNODELOCKHELD; unlock_fsnode(ndp->ni_dvp, NULL); } +#endif /* __LP64__ */ vnode_put(ndp->ni_dvp); } ndp->ni_dvp = dp; @@ -1042,10 +1063,12 @@ lookup(struct nameidata *ndp) return (0); bad2: +#ifndef __LP64__ if ((cnp->cn_flags & FSNODELOCKHELD)) { cnp->cn_flags &= ~FSNODELOCKHELD; unlock_fsnode(ndp->ni_dvp, NULL); } +#endif /* __LP64__ */ if (ndp->ni_dvp) vnode_put(ndp->ni_dvp); if (dp) @@ -1057,10 +1080,12 @@ lookup(struct nameidata *ndp) return (error); bad: +#ifndef __LP64__ if ((cnp->cn_flags & FSNODELOCKHELD)) { cnp->cn_flags &= ~FSNODELOCKHELD; unlock_fsnode(ndp->ni_dvp, NULL); } +#endif /* __LP64__ */ if (dp) vnode_put(dp); ndp->ni_vp = NULLVP; @@ -1186,10 +1211,12 @@ relookup(struct vnode *dvp, struct vnode **vpp, struct componentname *cnp) void nameidone(struct nameidata *ndp) { +#ifndef __LP64__ if ((ndp->ni_cnd.cn_flags & FSNODELOCKHELD)) { ndp->ni_cnd.cn_flags &= ~FSNODELOCKHELD; unlock_fsnode(ndp->ni_dvp, NULL); } +#endif /* __LP64__ */ if (ndp->ni_cnd.cn_flags & HASBUF) { char *tmp = ndp->ni_cnd.cn_pnbuf; @@ -1267,7 +1294,7 @@ kdebug_lookup(struct vnode *dp, struct componentname *cnp) if (dbg_namelen <= 12) code |= DBG_FUNC_END; - KERNEL_DEBUG_CONSTANT(code, (unsigned int)dp, dbg_parms[0], dbg_parms[1], dbg_parms[2], 0); + KERNEL_DEBUG_CONSTANT(code, dp, dbg_parms[0], dbg_parms[1], dbg_parms[2], 0); code &= ~DBG_FUNC_START; @@ -1303,7 +1330,7 @@ vfs_getrealpath(const char * path, char * realpath, size_t bufsize, vfs_context_ struct mount *mp = NULL; char *str; char ch; - unsigned long id; + uint32_t id; ino64_t ino; int error; int length; diff --git a/bsd/vfs/vfs_quota.c b/bsd/vfs/vfs_quota.c index 91ffe8bce..4edc2115b 100644 --- a/bsd/vfs/vfs_quota.c +++ b/bsd/vfs/vfs_quota.c @@ -103,7 +103,7 @@ static u_int32_t quotamagic[MAXQUOTAS] = INITQMAGICS; * Code pertaining to management of the in-core dquot data structures. */ #define DQHASH(dqvp, id) \ - (&dqhashtbl[((((int)(dqvp)) >> 8) + id) & dqhash]) + (&dqhashtbl[((((intptr_t)(dqvp)) >> 8) + id) & dqhash]) LIST_HEAD(dqhash, dquot) *dqhashtbl; u_long dqhash; @@ -120,7 +120,7 @@ TAILQ_HEAD(dqfreelist, dquot) dqfreelist; TAILQ_HEAD(dqdirtylist, dquot) dqdirtylist; -static int dqlookup(struct quotafile *, u_long, struct dqblk *, u_int32_t *); +static int dqlookup(struct quotafile *, u_int32_t, struct dqblk *, u_int32_t *); static int dqsync_locked(struct dquot *dq); static void qf_lock(struct quotafile *); @@ -494,7 +494,7 @@ dqfileclose(struct quotafile *qfp, __unused int type) * reading the information from the file if necessary. */ int -dqget(u_long id, struct quotafile *qfp, int type, struct dquot **dqp) +dqget(u_int32_t id, struct quotafile *qfp, int type, struct dquot **dqp) { struct dquot *dq; struct dquot *ndq = NULL; @@ -784,13 +784,13 @@ dqget(u_long id, struct quotafile *qfp, int type, struct dquot **dqp) * one is inserted. The actual hash table index is returned. */ static int -dqlookup(struct quotafile *qfp, u_long id, struct dqblk *dqb, uint32_t *index) +dqlookup(struct quotafile *qfp, u_int32_t id, struct dqblk *dqb, uint32_t *index) { struct vnode *dqvp; struct vfs_context context; uio_t auio; int i, skip, last; - u_long mask; + u_int32_t mask; int error = 0; char uio_buf[ UIO_SIZEOF(1) ]; @@ -814,11 +814,11 @@ dqlookup(struct quotafile *qfp, u_long id, struct dqblk *dqb, uint32_t *index) uio_addiov(auio, CAST_USER_ADDR_T(dqb), sizeof (struct dqblk)); error = VNOP_READ(dqvp, auio, 0, &context); if (error) { - printf("dqlookup: error %d looking up id %lu at index %d\n", error, id, i); + printf("dqlookup: error %d looking up id %u at index %d\n", error, id, i); break; } else if (uio_resid(auio)) { error = EIO; - printf("dqlookup: error looking up id %lu at index %d\n", id, i); + printf("dqlookup: error looking up id %u at index %d\n", id, i); break; } /* diff --git a/bsd/vfs/vfs_subr.c b/bsd/vfs/vfs_subr.c index 643d79c07..c44732bb8 100644 --- a/bsd/vfs/vfs_subr.c +++ b/bsd/vfs/vfs_subr.c @@ -160,6 +160,8 @@ __private_extern__ kern_return_t reset_vmobjectcache(unsigned int val1, unsigned int val2); __private_extern__ int unlink1(vfs_context_t, struct nameidata *, int); +extern int system_inshutdown; + static void vnode_list_add(vnode_t); static void vnode_list_remove(vnode_t); static void vnode_list_remove_locked(vnode_t); @@ -170,8 +172,7 @@ static void vclean(vnode_t vp, int flag); static void vnode_reclaim_internal(vnode_t, int, int, int); static void vnode_dropiocount (vnode_t); -static errno_t vnode_getiocount(vnode_t vp, int vid, int vflags); -static int vget_internal(vnode_t, int, int); +static errno_t vnode_getiocount(vnode_t vp, unsigned int vid, int vflags); static vnode_t checkalias(vnode_t vp, dev_t nvp_rdev); static int vnode_reload(vnode_t); @@ -185,9 +186,14 @@ static int vnode_umount_preflight(mount_t, vnode_t, int); static int vnode_iterate_prepare(mount_t); static int vnode_iterate_reloadq(mount_t); static void vnode_iterate_clear(mount_t); +static mount_t vfs_getvfs_locked(fsid_t *); errno_t rmdir_remove_orphaned_appleDouble(vnode_t, vfs_context_t, int *); +#ifdef JOE_DEBUG +static void record_vp(vnode_t vp, int count); +#endif + TAILQ_HEAD(freelst, vnode) vnode_free_list; /* vnode free list */ TAILQ_HEAD(deadlst, vnode) vnode_dead_list; /* vnode dead list */ @@ -260,7 +266,7 @@ static int nummounts = 0; * place for now... it should be deprecated out of the * exports and removed eventually. */ -unsigned long vnodetarget; /* target for vnreclaim() */ +u_int32_t vnodetarget; /* target for vnreclaim() */ #define VNODE_FREE_TARGET 20 /* Default value for vnodetarget */ /* @@ -305,6 +311,10 @@ reset_vmobjectcache(unsigned int val1, unsigned int val2) vm_size_t oval = val1 - VNODE_FREE_MIN; vm_size_t nval; + if (val1 == val2) { + return KERN_SUCCESS; + } + if(val2 < VNODE_FREE_MIN) nval = 0; else @@ -324,9 +334,9 @@ vnode_waitforwrites(vnode_t vp, int output_target, int slpflag, int slptimeout, if (vp->v_numoutput > output_target) { - slpflag &= ~PDROP; + slpflag |= PDROP; - vnode_lock(vp); + vnode_lock_spin(vp); while ((vp->v_numoutput > output_target) && error == 0) { if (output_target) @@ -337,6 +347,8 @@ vnode_waitforwrites(vnode_t vp, int output_target, int slpflag, int slptimeout, ts.tv_sec = (slptimeout/100); ts.tv_nsec = (slptimeout % 1000) * 10 * NSEC_PER_USEC * 1000 ; error = msleep((caddr_t)&vp->v_numoutput, &vp->v_lock, (slpflag | (PRIBIO + 1)), msg, &ts); + + vnode_lock_spin(vp); } vnode_unlock(vp); } @@ -646,6 +658,12 @@ mount_lock(mount_t mp) lck_mtx_lock(&mp->mnt_mlock); } +void +mount_lock_spin(mount_t mp) +{ + lck_mtx_lock_spin(&mp->mnt_mlock); +} + void mount_unlock(mount_t mp) { @@ -657,7 +675,7 @@ void mount_ref(mount_t mp, int locked) { if ( !locked) - mount_lock(mp); + mount_lock_spin(mp); mp->mnt_count++; @@ -670,7 +688,7 @@ void mount_drop(mount_t mp, int locked) { if ( !locked) - mount_lock(mp); + mount_lock_spin(mp); mp->mnt_count--; @@ -853,8 +871,8 @@ vfs_rootmountalloc_internal(struct vfstable *vfsp, const char *devname) { mount_t mp; - mp = _MALLOC_ZONE((u_long)sizeof(struct mount), M_MOUNT, M_WAITOK); - bzero((char *)mp, (u_long)sizeof(struct mount)); + mp = _MALLOC_ZONE(sizeof(struct mount), M_MOUNT, M_WAITOK); + bzero((char *)mp, sizeof(struct mount)); /* Initialize the default IO constraints */ mp->mnt_maxreadcnt = mp->mnt_maxwritecnt = MAXPHYS; @@ -863,6 +881,8 @@ vfs_rootmountalloc_internal(struct vfstable *vfsp, const char *devname) mp->mnt_maxsegwritesize = mp->mnt_maxwritecnt; mp->mnt_devblocksize = DEV_BSIZE; mp->mnt_alignmentmask = PAGE_MASK; + mp->mnt_ioqueue_depth = MNT_DEFAULT_IOQUEUE_DEPTH; + mp->mnt_ioscale = 1; mp->mnt_ioflags = 0; mp->mnt_realrootvp = NULLVP; mp->mnt_authcache_ttl = CACHED_LOOKUP_RIGHT_TTL; @@ -1046,9 +1066,12 @@ vfs_mountroot(void) dounmount(mp, MNT_FORCE, 0, ctx); goto fail; } - - /* VFS_ROOT provides reference so flags = 0 */ error = vnode_label(mp, NULL, vp, NULL, 0, ctx); + /* + * get rid of reference provided by VFS_ROOT + */ + vnode_put(vp); + if (error) { printf("%s() vnode_label() returned %d\n", __func__, error); @@ -1072,7 +1095,6 @@ vfs_mountroot(void) /* * Lookup a mount point by filesystem identifier. */ -extern mount_t vfs_getvfs_locked(fsid_t *); struct mount * vfs_getvfs(fsid_t *fsid) @@ -1080,7 +1102,7 @@ vfs_getvfs(fsid_t *fsid) return (mount_list_lookupby_fsid(fsid, 0, 0)); } -struct mount * +static struct mount * vfs_getvfs_locked(fsid_t *fsid) { return(mount_list_lookupby_fsid(fsid, 1, 0)); @@ -1161,7 +1183,7 @@ insmntque(vnode_t vp, mount_t mp) panic("insmntque: vp not in mount vnode list"); vp->v_lflag &= ~VNAMED_MOUNT; - mount_lock(lmp); + mount_lock_spin(lmp); mount_drop(lmp, 1); @@ -1186,7 +1208,7 @@ insmntque(vnode_t vp, mount_t mp) * Insert into list of vnodes for the new mount point, if available. */ if ((vp->v_mount = mp) != NULL) { - mount_lock(mp); + mount_lock_spin(mp); if ((vp->v_mntvnodes.tqe_next != 0) && (vp->v_mntvnodes.tqe_prev != 0)) panic("vp already in mount list"); if (mp->mnt_lflag & MNT_LITER) @@ -1288,6 +1310,7 @@ checkalias(struct vnode *nvp, dev_t nvp_rdev) { struct vnode *vp; struct vnode **vpp; + struct specinfo *sin = NULL; int vid = 0; vpp = &speclisth[SPECHASH(nvp_rdev)]; @@ -1303,6 +1326,7 @@ checkalias(struct vnode *nvp, dev_t nvp_rdev) SPECHASH_UNLOCK(); if (vp) { +found_alias: if (vnode_getwithvid(vp,vid)) { goto loop; } @@ -1320,36 +1344,59 @@ checkalias(struct vnode *nvp, dev_t nvp_rdev) vnode_unlock(vp); goto loop; } + } if (vp == NULL || vp->v_tag != VT_NON) { -retnullvp: - MALLOC_ZONE(nvp->v_specinfo, struct specinfo *, sizeof(struct specinfo), - M_SPECINFO, M_WAITOK); + if (sin == NULL) { + MALLOC_ZONE(sin, struct specinfo *, sizeof(struct specinfo), + M_SPECINFO, M_WAITOK); + } + + nvp->v_specinfo = sin; bzero(nvp->v_specinfo, sizeof(struct specinfo)); nvp->v_rdev = nvp_rdev; nvp->v_specflags = 0; nvp->v_speclastr = -1; SPECHASH_LOCK(); + + /* We dropped the lock, someone could have added */ + if (vp == NULLVP) { + for (vp = *vpp; vp; vp = vp->v_specnext) { + if (nvp_rdev == vp->v_rdev && nvp->v_type == vp->v_type) { + vid = vp->v_id; + SPECHASH_UNLOCK(); + goto found_alias; + } + } + } + nvp->v_hashchain = vpp; nvp->v_specnext = *vpp; *vpp = nvp; - SPECHASH_UNLOCK(); if (vp != NULLVP) { - nvp->v_flag |= VALIASED; - vp->v_flag |= VALIASED; + nvp->v_specflags |= SI_ALIASED; + vp->v_specflags |= SI_ALIASED; + SPECHASH_UNLOCK(); vnode_put_locked(vp); vnode_unlock(vp); + } else { + SPECHASH_UNLOCK(); } + return (NULLVP); } + + if (sin) { + FREE_ZONE(sin, sizeof(struct specinfo), M_SPECINFO); + } + if ((vp->v_flag & (VBDEVVP | VDEVFLUSH)) != 0) return(vp); - else { - panic("checkalias with VT_NON vp that shouldn't: %x", (unsigned int)vp); - goto retnullvp; - } + + panic("checkalias with VT_NON vp that shouldn't: %p", vp); + return (vp); } @@ -1364,7 +1411,7 @@ checkalias(struct vnode *nvp, dev_t nvp_rdev) * and an error returned to indicate that the vnode is no longer * usable (possibly having been changed to a new file system type). */ -static int +int vget_internal(vnode_t vp, int vid, int vflags) { int error = 0; @@ -1473,11 +1520,15 @@ vnode_ref_ext(vnode_t vp, int fmode) static void vnode_list_add(vnode_t vp) { +#if DIAGNOSTIC + lck_mtx_assert(&vp->v_lock, LCK_MTX_ASSERT_OWNED); +#endif /* * if it is already on a list or non zero references return */ - if (VONLIST(vp) || (vp->v_usecount != 0) || (vp->v_iocount != 0)) + if (VONLIST(vp) || (vp->v_usecount != 0) || (vp->v_iocount != 0) || (vp->v_lflag & VL_TERMINATE)) return; + vnode_list_lock(); if ((vp->v_flag & VRAGE) && !(vp->v_lflag & VL_DEAD)) { @@ -1553,6 +1604,9 @@ vnode_list_remove_locked(vnode_t vp) static void vnode_list_remove(vnode_t vp) { +#if DIAGNOSTIC + lck_mtx_assert(&vp->v_lock, LCK_MTX_ASSERT_OWNED); +#endif /* * we want to avoid taking the list lock * in the case where we're not on the free @@ -1606,20 +1660,24 @@ vnode_rele_internal(vnode_t vp, int fmode, int dont_reenter, int locked) { if ( !locked) vnode_lock_spin(vp); - +#if DIAGNOSTIC + else + lck_mtx_assert(&vp->v_lock, LCK_MTX_ASSERT_OWNED); +#endif if (--vp->v_usecount < 0) - panic("vnode_rele_ext: vp %p usecount -ve : %d", vp, vp->v_usecount); + panic("vnode_rele_ext: vp %p usecount -ve : %d. v_tag = %d, v_type = %d, v_flag = %x.", vp, vp->v_usecount, vp->v_tag, vp->v_type, vp->v_flag); if (fmode & FWRITE) { if (--vp->v_writecount < 0) - panic("vnode_rele_ext: vp %p writecount -ve : %ld", vp, vp->v_writecount); + panic("vnode_rele_ext: vp %p writecount -ve : %d. v_tag = %d, v_type = %d, v_flag = %x.", vp, vp->v_writecount, vp->v_tag, vp->v_type, vp->v_flag); } if (fmode & O_EVTONLY) { if (--vp->v_kusecount < 0) - panic("vnode_rele_ext: vp %p kusecount -ve : %d", vp, vp->v_kusecount); + panic("vnode_rele_ext: vp %p kusecount -ve : %d. v_tag = %d, v_type = %d, v_flag = %x.", vp, vp->v_kusecount, vp->v_tag, vp->v_type, vp->v_flag); } if (vp->v_kusecount > vp->v_usecount) - panic("vnode_rele_ext: vp %p kusecount(%d) out of balance with usecount(%d)\n",vp, vp->v_kusecount, vp->v_usecount); + panic("vnode_rele_ext: vp %p kusecount(%d) out of balance with usecount(%d). v_tag = %d, v_type = %d, v_flag = %x.",vp, vp->v_kusecount, vp->v_usecount, vp->v_tag, vp->v_type, vp->v_flag); + if ((vp->v_iocount > 0) || (vp->v_usecount > 0)) { /* * vnode is still busy... if we're the last @@ -1723,7 +1781,7 @@ vflush(struct mount *mp, struct vnode *skipvp, int flags) int busy = 0; int reclaimed = 0; int retval; - int vid; + unsigned int vid; mount_lock(mp); vnode_iterate_setup(mp); @@ -1751,17 +1809,20 @@ vflush(struct mount *mp, struct vnode *skipvp, int flags) return(retval); } - /* iterate over all the vnodes */ - while (!TAILQ_EMPTY(&mp->mnt_workerqueue)) { - vp = TAILQ_FIRST(&mp->mnt_workerqueue); - TAILQ_REMOVE(&mp->mnt_workerqueue, vp, v_mntvnodes); - TAILQ_INSERT_TAIL(&mp->mnt_vnodelist, vp, v_mntvnodes); - if ( (vp->v_mount != mp) || (vp == skipvp)) { - continue; - } - vid = vp->v_id; - mount_unlock(mp); - vnode_lock(vp); + /* iterate over all the vnodes */ + while (!TAILQ_EMPTY(&mp->mnt_workerqueue)) { + + vp = TAILQ_FIRST(&mp->mnt_workerqueue); + TAILQ_REMOVE(&mp->mnt_workerqueue, vp, v_mntvnodes); + TAILQ_INSERT_TAIL(&mp->mnt_vnodelist, vp, v_mntvnodes); + + if ( (vp->v_mount != mp) || (vp == skipvp)) { + continue; + } + vid = vp->v_id; + mount_unlock(mp); + + vnode_lock_spin(vp); if ((vp->v_id != vid) || ((vp->v_lflag & (VL_DEAD | VL_TERMINATE)))) { vnode_unlock(vp); @@ -1788,7 +1849,7 @@ vflush(struct mount *mp, struct vnode *skipvp, int flags) continue; } /* - * If requested, skip over vnodes marked VSWAP. + * If requested, skip over vnodes marked VROOT. */ if ((flags & SKIPROOT) && (vp->v_flag & VROOT)) { vnode_unlock(vp); @@ -1811,6 +1872,8 @@ vflush(struct mount *mp, struct vnode *skipvp, int flags) */ if (((vp->v_usecount == 0) || ((vp->v_usecount - vp->v_kusecount) == 0))) { + + vnode_lock_convert(vp); vp->v_iocount++; /* so that drain waits for * other iocounts */ #ifdef JOE_DEBUG record_vp(vp, 1); @@ -1830,6 +1893,8 @@ vflush(struct mount *mp, struct vnode *skipvp, int flags) * anonymous device. For all other files, just kill them. */ if (flags & FORCECLOSE) { + vnode_lock_convert(vp); + if (vp->v_type != VBLK && vp->v_type != VCHR) { vp->v_iocount++; /* so that drain waits * for other iocounts */ #ifdef JOE_DEBUG @@ -1880,7 +1945,7 @@ vflush(struct mount *mp, struct vnode *skipvp, int flags) return (0); } -long num_recycledvnodes = 0; /* long for OSAddAtomic */ +long num_recycledvnodes = 0; /* * Disassociate the underlying file system from a vnode. * The vnode lock is held on entry. @@ -1893,7 +1958,6 @@ vclean(vnode_t vp, int flags) int need_inactive; int already_terminating; int clflags = 0; - #if NAMEDSTREAMS int is_namedstream; #endif @@ -1934,11 +1998,7 @@ vclean(vnode_t vp, int flags) vnode_unlock(vp); - OSAddAtomic(1, &num_recycledvnodes); - /* - * purge from the name cache as early as possible... - */ - cache_purge(vp); + OSAddAtomicLong(1, &num_recycledvnodes); if (flags & DOCLOSE) clflags |= IO_NDELAY; @@ -1971,11 +2031,25 @@ vclean(vnode_t vp, int flags) VNOP_INACTIVE(vp, ctx); #if NAMEDSTREAMS - /* Delete the shadow stream file before we reclaim its vnode */ - if ((is_namedstream != 0) && - (vp->v_parent != NULLVP) && - (vnode_isshadow(vp))) { - vnode_relenamedstream(vp->v_parent, vp, ctx); + if ((is_namedstream != 0) && (vp->v_parent != NULLVP)) { + vnode_t pvp = vp->v_parent; + + /* Delete the shadow stream file before we reclaim its vnode */ + if (vnode_isshadow(vp)) { + vnode_relenamedstream(pvp, vp, ctx); + } + + /* + * No more streams associated with the parent. We + * have a ref on it, so its identity is stable. + * If the parent is on an opaque volume, then we need to know + * whether it has associated named streams. + */ + if (vfs_authopaque(pvp->v_mount)) { + vnode_lock_spin(pvp); + pvp->v_lflag &= ~VL_HASSTREAMS; + vnode_unlock(pvp); + } } #endif @@ -1993,7 +2067,7 @@ vclean(vnode_t vp, int flags) panic("vclean: cannot reclaim"); // make sure the name & parent ptrs get cleaned out! - vnode_update_identity(vp, NULLVP, NULL, 0, 0, VNODE_UPDATE_PARENT | VNODE_UPDATE_NAME); + vnode_update_identity(vp, NULLVP, NULL, 0, 0, VNODE_UPDATE_PARENT | VNODE_UPDATE_NAME | VNODE_UPDATE_PURGE); vnode_lock(vp); @@ -2035,23 +2109,20 @@ vn_revoke(vnode_t vp, __unused int flags, __unused vfs_context_t a_context) panic("vnop_revoke"); #endif - if (vp->v_flag & VALIASED) { + if (vnode_isaliased(vp)) { /* * If a vgone (or vclean) is already in progress, - * wait until it is done and return. + * return an immediate error */ - vnode_lock(vp); - if (vp->v_lflag & VL_TERMINATE) { - vnode_unlock(vp); + if (vp->v_lflag & VL_TERMINATE) return(ENOENT); - } - vnode_unlock(vp); + /* * Ensure that vp will not be vgone'd while we * are eliminating its aliases. */ SPECHASH_LOCK(); - while (vp->v_flag & VALIASED) { + while ((vp->v_specflags & SI_ALIASED)) { for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) { if (vq->v_rdev != vp->v_rdev || vq->v_type != vp->v_type || vp == vq) @@ -2082,13 +2153,14 @@ vn_revoke(vnode_t vp, __unused int flags, __unused vfs_context_t a_context) int vnode_recycle(struct vnode *vp) { - vnode_lock(vp); + vnode_lock_spin(vp); if (vp->v_iocount || vp->v_usecount) { vp->v_lflag |= VL_MARKTERM; vnode_unlock(vp); return(0); } + vnode_lock_convert(vp); vnode_reclaim_internal(vp, 1, 0, 0); vnode_unlock(vp); @@ -2147,7 +2219,7 @@ vgone(vnode_t vp, int flags) if (vq == NULL) panic("missing bdev"); } - if (vp->v_flag & VALIASED) { + if (vp->v_specflags & SI_ALIASED) { vx = NULL; for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) { if (vq->v_rdev != vp->v_rdev || @@ -2160,8 +2232,8 @@ vgone(vnode_t vp, int flags) if (vx == NULL) panic("missing alias"); if (vq == NULL) - vx->v_flag &= ~VALIASED; - vp->v_flag &= ~VALIASED; + vx->v_specflags &= ~SI_ALIASED; + vp->v_specflags &= ~SI_ALIASED; } SPECHASH_UNLOCK(); { @@ -2216,7 +2288,7 @@ vcount(vnode_t vp) int vid; loop: - if ((vp->v_flag & VALIASED) == 0) + if (!vnode_isaliased(vp)) return (vp->v_usecount - vp->v_kusecount); count = 0; @@ -2276,10 +2348,8 @@ int prtactive = 0; /* 1 => print out reclaim of active vnodes */ /* * Print out a description of a vnode. */ -#if !CONFIG_NO_PRINTF_STRINGS static const char *typename[] = { "VNON", "VREG", "VDIR", "VBLK", "VCHR", "VLNK", "VSOCK", "VFIFO", "VBAD" }; -#endif void vprint(const char *label, struct vnode *vp) @@ -2288,7 +2358,7 @@ vprint(const char *label, struct vnode *vp) if (label != NULL) printf("%s: ", label); - printf("type %s, usecount %d, writecount %ld", + printf("type %s, usecount %d, writecount %d", typename[vp->v_type], vp->v_usecount, vp->v_writecount); sbuf[0] = '\0'; if (vp->v_flag & VROOT) @@ -2301,7 +2371,7 @@ vprint(const char *label, struct vnode *vp) strlcat(sbuf, "|VNOFLUSH", sizeof(sbuf)); if (vp->v_flag & VBWAIT) strlcat(sbuf, "|VBWAIT", sizeof(sbuf)); - if (vp->v_flag & VALIASED) + if (vnode_isaliased(vp)) strlcat(sbuf, "|VALIASED", sizeof(sbuf)); if (sbuf[0] != '\0') printf(" flags (%s)", &sbuf[1]); @@ -2314,6 +2384,11 @@ vn_getpath(struct vnode *vp, char *pathbuf, int *len) return build_path(vp, pathbuf, *len, len, BUILDPATH_NO_FS_ENTER, vfs_context_current()); } +int +vn_getpath_fsenter(struct vnode *vp, char *pathbuf, int *len) +{ + return build_path(vp, pathbuf, *len, len, 0, vfs_context_current()); +} int vn_getcdhash(struct vnode *vp, off_t offset, unsigned char *cdhash) @@ -2345,32 +2420,47 @@ extension_cmp(const void *a, const void *b) // them (i.e. a short 8 character name can't have an 8 // character extension). // +extern lck_mtx_t *pkg_extensions_lck; + __private_extern__ int -set_package_extensions_table(void *data, int nentries, int maxwidth) +set_package_extensions_table(user_addr_t data, int nentries, int maxwidth) { - char *new_exts; + char *new_exts, *old_exts; int error; if (nentries <= 0 || nentries > 1024 || maxwidth <= 0 || maxwidth > 255) { return EINVAL; } - MALLOC(new_exts, char *, nentries * maxwidth, M_TEMP, M_WAITOK); + + // allocate one byte extra so we can guarantee null termination + MALLOC(new_exts, char *, (nentries * maxwidth) + 1, M_TEMP, M_WAITOK); + if (new_exts == NULL) { + return ENOMEM; + } - error = copyin(CAST_USER_ADDR_T(data), new_exts, nentries * maxwidth); + error = copyin(data, new_exts, nentries * maxwidth); if (error) { FREE(new_exts, M_TEMP); return error; } - if (extension_table) { - FREE(extension_table, M_TEMP); - } + new_exts[(nentries * maxwidth)] = '\0'; // guarantee null termination of the block + + qsort(new_exts, nentries, maxwidth, extension_cmp); + + lck_mtx_lock(pkg_extensions_lck); + + old_exts = extension_table; extension_table = new_exts; nexts = nentries; max_ext_width = maxwidth; - qsort(extension_table, nexts, maxwidth, extension_cmp); + lck_mtx_unlock(pkg_extensions_lck); + + if (old_exts) { + FREE(old_exts, M_TEMP); + } return 0; } @@ -2401,16 +2491,21 @@ is_package_name(const char *name, int len) // advance over the "." name_ext++; + lck_mtx_lock(pkg_extensions_lck); + // now iterate over all the extensions to see if any match ptr = &extension_table[0]; for(i=0; i < nexts; i++, ptr+=max_ext_width) { extlen = strlen(ptr); if (strncasecmp(name_ext, ptr, extlen) == 0 && name_ext[extlen] == '\0') { // aha, a match! + lck_mtx_unlock(pkg_extensions_lck); return 1; } } + lck_mtx_unlock(pkg_extensions_lck); + // if we get here, no extension matched return 0; } @@ -2456,6 +2551,25 @@ vn_path_package_check(__unused vnode_t vp, char *path, int pathlen, int *compone return 0; } +/* + * Determine if a name is inappropriate for a searchfs query. + * This list consists of /System currently. + */ + +int vn_searchfs_inappropriate_name(const char *name, int len) { + const char *bad_names[] = { "System" }; + int bad_len[] = { 6 }; + int i; + + for(i=0; i < (int) (sizeof(bad_names) / sizeof(bad_names[0])); i++) { + if (len == bad_len[i] && strncmp(name, bad_names[i], strlen(bad_names[i]) + 1) == 0) { + return 1; + } + } + + // if we get here, no name matched + return 0; +} /* * Top level filesystem related information gathering. @@ -2470,7 +2584,7 @@ vfs_sysctl(int *name, u_int namelen, user_addr_t oldp, size_t *oldlenp, int *username; u_int usernamelen; int error; - struct vfsconf *vfsc; + struct vfsconf vfsc; /* All non VFS_GENERIC and in VFS_GENERIC, * VFS_MAXTYPENUM, VFS_CONF, VFS_SET_PACKAGE_EXTS @@ -2502,16 +2616,27 @@ vfs_sysctl(int *name, u_int namelen, user_addr_t oldp, size_t *oldlenp, if (namelen < 2) return (EISDIR); /* overloaded */ if (name[0] != VFS_GENERIC) { - for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next) - if (vfsp->vfc_typenum == name[0]) + + mount_list_lock(); + for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next) + if (vfsp->vfc_typenum == name[0]) { + vfsp->vfc_refcount++; break; + } + mount_list_unlock(); + if (vfsp == NULL) return (ENOTSUP); /* XXX current context proxy for proc p? */ - return ((*vfsp->vfc_vfsops->vfs_sysctl)(&name[1], namelen - 1, + error = ((*vfsp->vfc_vfsops->vfs_sysctl)(&name[1], namelen - 1, oldp, oldlenp, newp, newlen, vfs_context_current())); + + mount_list_lock(); + vfsp->vfc_refcount--; + mount_list_unlock(); + return error; } switch (name[1]) { case VFS_MAXTYPENUM: @@ -2519,31 +2644,31 @@ vfs_sysctl(int *name, u_int namelen, user_addr_t oldp, size_t *oldlenp, case VFS_CONF: if (namelen < 3) return (ENOTDIR); /* overloaded */ + + mount_list_lock(); for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next) if (vfsp->vfc_typenum == name[2]) break; - if (vfsp == NULL) + + if (vfsp == NULL) { + mount_list_unlock(); return (ENOTSUP); - vfsc = (struct vfsconf *)vfsp; - if (proc_is64bit(p)) { - struct user_vfsconf usr_vfsc; - usr_vfsc.vfc_vfsops = CAST_USER_ADDR_T(vfsc->vfc_vfsops); - bcopy(vfsc->vfc_name, usr_vfsc.vfc_name, sizeof(usr_vfsc.vfc_name)); - usr_vfsc.vfc_typenum = vfsc->vfc_typenum; - usr_vfsc.vfc_refcount = vfsc->vfc_refcount; - usr_vfsc.vfc_flags = vfsc->vfc_flags; - usr_vfsc.vfc_mountroot = CAST_USER_ADDR_T(vfsc->vfc_mountroot); - usr_vfsc.vfc_next = CAST_USER_ADDR_T(vfsc->vfc_next); - return (sysctl_rdstruct(oldp, oldlenp, newp, &usr_vfsc, - sizeof(usr_vfsc))); - } - else { - return (sysctl_rdstruct(oldp, oldlenp, newp, vfsc, - sizeof(struct vfsconf))); } + + vfsc.vfc_reserved1 = 0; + bcopy(vfsp->vfc_name, vfsc.vfc_name, sizeof(vfsc.vfc_name)); + vfsc.vfc_typenum = vfsp->vfc_typenum; + vfsc.vfc_refcount = vfsp->vfc_refcount; + vfsc.vfc_flags = vfsp->vfc_flags; + vfsc.vfc_reserved2 = 0; + vfsc.vfc_reserved3 = 0; + + mount_list_unlock(); + return (sysctl_rdstruct(oldp, oldlenp, newp, &vfsc, + sizeof(struct vfsconf))); case VFS_SET_PACKAGE_EXTS: - return set_package_extensions_table((void *)name[1], name[2], name[3]); + return set_package_extensions_table((user_addr_t)((unsigned)name[1]), name[2], name[3]); } /* * We need to get back into the general MIB, so we need to re-prepend @@ -2555,7 +2680,7 @@ vfs_sysctl(int *name, u_int namelen, user_addr_t oldp, size_t *oldlenp, bcopy(name, username + 1, namelen * sizeof(*name)); username[0] = CTL_VFS; error = userland_sysctl(p, username, usernamelen, oldp, - oldlenp, 1, newp, newlen, oldlenp); + oldlenp, newp, newlen, oldlenp); FREE(username, M_TEMP); return (error); } @@ -2591,7 +2716,7 @@ vfs_mountedon(struct vnode *vp) error = EBUSY; goto out; } - if (vp->v_flag & VALIASED) { + if (vp->v_specflags & SI_ALIASED) { for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) { if (vq->v_rdev != vp->v_rdev || vq->v_type != vp->v_type) @@ -2633,7 +2758,7 @@ vfs_unmountall(void) continue; } else if (error == EBUSY) { /* If EBUSY is returned, the unmount was already in progress */ - printf("unmount of %x failed (", (unsigned int)mp); + printf("unmount of %p failed (", mp); printf("BUSY)\n"); } mount_list_lock(); @@ -2652,16 +2777,16 @@ vnode_pager_vrele(vnode_t vp) { struct ubc_info *uip; - vnode_lock(vp); + vnode_lock_spin(vp); vp->v_lflag &= ~VNAMED_UBC; uip = vp->v_ubcinfo; vp->v_ubcinfo = UBC_INFO_NULL; - ubc_info_deallocate(uip); - vnode_unlock(vp); + + ubc_info_deallocate(uip); } @@ -2671,16 +2796,17 @@ errno_t vfs_init_io_attributes(vnode_t devvp, mount_t mp) { int error; - off_t readblockcnt; - off_t writeblockcnt; - off_t readmaxcnt; - off_t writemaxcnt; - off_t readsegcnt; - off_t writesegcnt; - off_t readsegsize; - off_t writesegsize; - off_t alignment; - u_long blksize; + off_t readblockcnt = 0; + off_t writeblockcnt = 0; + off_t readmaxcnt = 0; + off_t writemaxcnt = 0; + off_t readsegcnt = 0; + off_t writesegcnt = 0; + off_t readsegsize = 0; + off_t writesegsize = 0; + off_t alignment = 0; + off_t ioqueue_depth = 0; + u_int32_t blksize; u_int64_t temp; u_int32_t features; vfs_context_t ctx = vfs_context_current(); @@ -2719,6 +2845,16 @@ vfs_init_io_attributes(vnode_t devvp, mount_t mp) mp->mnt_devblocksize = blksize; + /* + * set the maximum possible I/O size + * this may get clipped to a smaller value + * based on which constraints are being advertised + * and if those advertised constraints result in a smaller + * limit for a given I/O + */ + mp->mnt_maxreadcnt = MAX_UPL_SIZE * PAGE_SIZE; + mp->mnt_maxwritecnt = MAX_UPL_SIZE * PAGE_SIZE; + if (VNOP_IOCTL(devvp, DKIOCISVIRTUAL, (caddr_t)&isvirtual, 0, ctx) == 0) { if (isvirtual) mp->mnt_kern_flag |= MNTK_VIRTUALDEV; @@ -2764,36 +2900,52 @@ vfs_init_io_attributes(vnode_t devvp, mount_t mp) (caddr_t)&alignment, 0, ctx))) return (error); + if ((error = VNOP_IOCTL(devvp, DKIOCGETCOMMANDPOOLSIZE, + (caddr_t)&ioqueue_depth, 0, ctx))) + return (error); + if (readmaxcnt) - temp = (readmaxcnt > UINT32_MAX) ? UINT32_MAX : readmaxcnt; - else { - if (readblockcnt) { - temp = readblockcnt * blksize; - temp = (temp > UINT32_MAX) ? UINT32_MAX : temp; - } else - temp = MAXPHYS; + mp->mnt_maxreadcnt = (readmaxcnt > UINT32_MAX) ? UINT32_MAX : readmaxcnt; + + if (readblockcnt) { + temp = readblockcnt * blksize; + temp = (temp > UINT32_MAX) ? UINT32_MAX : temp; + + if (temp < mp->mnt_maxreadcnt) + mp->mnt_maxreadcnt = (u_int32_t)temp; } - mp->mnt_maxreadcnt = (u_int32_t)temp; if (writemaxcnt) - temp = (writemaxcnt > UINT32_MAX) ? UINT32_MAX : writemaxcnt; - else { - if (writeblockcnt) { - temp = writeblockcnt * blksize; - temp = (temp > UINT32_MAX) ? UINT32_MAX : temp; - } else - temp = MAXPHYS; + mp->mnt_maxwritecnt = (writemaxcnt > UINT32_MAX) ? UINT32_MAX : writemaxcnt; + + if (writeblockcnt) { + temp = writeblockcnt * blksize; + temp = (temp > UINT32_MAX) ? UINT32_MAX : temp; + + if (temp < mp->mnt_maxwritecnt) + mp->mnt_maxwritecnt = (u_int32_t)temp; } - mp->mnt_maxwritecnt = (u_int32_t)temp; if (readsegcnt) { temp = (readsegcnt > UINT16_MAX) ? UINT16_MAX : readsegcnt; - mp->mnt_segreadcnt = (u_int16_t)temp; + } else { + temp = mp->mnt_maxreadcnt / PAGE_SIZE; + + if (temp > UINT16_MAX) + temp = UINT16_MAX; } + mp->mnt_segreadcnt = (u_int16_t)temp; + if (writesegcnt) { temp = (writesegcnt > UINT16_MAX) ? UINT16_MAX : writesegcnt; - mp->mnt_segwritecnt = (u_int16_t)temp; + } else { + temp = mp->mnt_maxwritecnt / PAGE_SIZE; + + if (temp > UINT16_MAX) + temp = UINT16_MAX; } + mp->mnt_segwritecnt = (u_int16_t)temp; + if (readsegsize) temp = (readsegsize > UINT32_MAX) ? UINT32_MAX : readsegsize; else @@ -2812,6 +2964,18 @@ vfs_init_io_attributes(vnode_t devvp, mount_t mp) temp = 0; mp->mnt_alignmentmask = temp; + + if (ioqueue_depth > MNT_DEFAULT_IOQUEUE_DEPTH) + temp = ioqueue_depth; + else + temp = MNT_DEFAULT_IOQUEUE_DEPTH; + + mp->mnt_ioqueue_depth = temp; + mp->mnt_ioscale = (mp->mnt_ioqueue_depth + (MNT_DEFAULT_IOQUEUE_DEPTH - 1)) / MNT_DEFAULT_IOQUEUE_DEPTH; + + if (mp->mnt_ioscale > 1) + printf("ioqueue_depth = %d, ioscale = %d\n", (int)mp->mnt_ioqueue_depth, (int)mp->mnt_ioscale); + if (features & DK_FEATURE_FORCE_UNIT_ACCESS) mp->mnt_ioflags |= MNT_IOFLAGS_FUA_SUPPORTED; @@ -2825,6 +2989,7 @@ lck_mtx_t *fs_klist_lock; void vfs_event_init(void) { + klist_init(&fs_klist); fs_klist_lck_grp = lck_grp_alloc_init("fs_klist", NULL); fs_klist_lock = lck_mtx_alloc_init(fs_klist_lck_grp, NULL); @@ -2934,6 +3099,10 @@ sysctl_vfs_vfslist(__unused struct sysctl_oid *oidp, __unused void *arg1, return (ENOMEM); MALLOC(fsidlst, fsid_t *, req->oldlen, M_TEMP, M_WAITOK); + if (fsidlst == NULL) { + return (ENOMEM); + } + error = sysctl_vfs_getvfslist(fsidlst, req->oldlen / sizeof(fsid_t), &actual); /* @@ -2959,8 +3128,7 @@ static int sysctl_vfs_ctlbyfsid(__unused struct sysctl_oid *oidp, void *arg1, int arg2, struct sysctl_req *req) { - struct vfsidctl vc; - struct user_vfsidctl user_vc; + union union_vfsidctl vc; struct mount *mp; struct vfsstatfs *sp; int *name, flags, namelen; @@ -2973,26 +3141,14 @@ sysctl_vfs_ctlbyfsid(__unused struct sysctl_oid *oidp, void *arg1, int arg2, namelen = arg2; is_64_bit = proc_is64bit(p); - if (is_64_bit) { - error = SYSCTL_IN(req, &user_vc, sizeof(user_vc)); - if (error) - goto out; - if (user_vc.vc_vers != VFS_CTL_VERS1) { - error = EINVAL; - goto out; - } - mp = mount_list_lookupby_fsid(&user_vc.vc_fsid, 0, 1); - } - else { - error = SYSCTL_IN(req, &vc, sizeof(vc)); - if (error) - goto out; - if (vc.vc_vers != VFS_CTL_VERS1) { - error = EINVAL; - goto out; - } - mp = mount_list_lookupby_fsid(&vc.vc_fsid, 0, 1); + error = SYSCTL_IN(req, &vc, is_64_bit? sizeof(vc.vc64):sizeof(vc.vc32)); + if (error) + goto out; + if (vc.vc32.vc_vers != VFS_CTL_VERS1) { /* works for 32 and 64 */ + error = EINVAL; + goto out; } + mp = mount_list_lookupby_fsid(&vc.vc32.vc_fsid, 0, 1); /* works for 32 and 64 */ if (mp == NULL) { error = ENOENT; goto out; @@ -3031,12 +3187,12 @@ sysctl_vfs_ctlbyfsid(__unused struct sysctl_oid *oidp, void *arg1, int arg2, case VFS_CTL_UMOUNT: req->newidx = 0; if (is_64_bit) { - req->newptr = user_vc.vc_ptr; - req->newlen = (size_t)user_vc.vc_len; + req->newptr = vc.vc64.vc_ptr; + req->newlen = (size_t)vc.vc64.vc_len; } else { - req->newptr = CAST_USER_ADDR_T(vc.vc_ptr); - req->newlen = vc.vc_len; + req->newptr = CAST_USER_ADDR_T(vc.vc32.vc_ptr); + req->newlen = vc.vc32.vc_len; } error = SYSCTL_IN(req, &flags, sizeof(flags)); if (error) @@ -3051,32 +3207,32 @@ sysctl_vfs_ctlbyfsid(__unused struct sysctl_oid *oidp, void *arg1, int arg2, case VFS_CTL_STATFS: req->newidx = 0; if (is_64_bit) { - req->newptr = user_vc.vc_ptr; - req->newlen = (size_t)user_vc.vc_len; + req->newptr = vc.vc64.vc_ptr; + req->newlen = (size_t)vc.vc64.vc_len; } else { - req->newptr = CAST_USER_ADDR_T(vc.vc_ptr); - req->newlen = vc.vc_len; + req->newptr = CAST_USER_ADDR_T(vc.vc32.vc_ptr); + req->newlen = vc.vc32.vc_len; } error = SYSCTL_IN(req, &flags, sizeof(flags)); if (error) break; sp = &mp->mnt_vfsstat; - if (((flags & MNT_NOWAIT) == 0 || (flags & MNT_WAIT)) && + if (((flags & MNT_NOWAIT) == 0 || (flags & (MNT_WAIT | MNT_DWAIT))) && (error = vfs_update_vfsstat(mp, ctx, VFS_USER_EVENT))) goto out; if (is_64_bit) { - struct user_statfs sfs; + struct user64_statfs sfs; bzero(&sfs, sizeof(sfs)); sfs.f_flags = mp->mnt_flag & MNT_VISFLAGMASK; sfs.f_type = mp->mnt_vtable->vfc_typenum; - sfs.f_bsize = (user_long_t)sp->f_bsize; - sfs.f_iosize = (user_long_t)sp->f_iosize; - sfs.f_blocks = (user_long_t)sp->f_blocks; - sfs.f_bfree = (user_long_t)sp->f_bfree; - sfs.f_bavail = (user_long_t)sp->f_bavail; - sfs.f_files = (user_long_t)sp->f_files; - sfs.f_ffree = (user_long_t)sp->f_ffree; + sfs.f_bsize = (user64_long_t)sp->f_bsize; + sfs.f_iosize = (user64_long_t)sp->f_iosize; + sfs.f_blocks = (user64_long_t)sp->f_blocks; + sfs.f_bfree = (user64_long_t)sp->f_bfree; + sfs.f_bavail = (user64_long_t)sp->f_bavail; + sfs.f_files = (user64_long_t)sp->f_files; + sfs.f_ffree = (user64_long_t)sp->f_ffree; sfs.f_fsid = sp->f_fsid; sfs.f_owner = sp->f_owner; @@ -3087,8 +3243,8 @@ sysctl_vfs_ctlbyfsid(__unused struct sysctl_oid *oidp, void *arg1, int arg2, error = SYSCTL_OUT(req, &sfs, sizeof(sfs)); } else { - struct statfs sfs; - bzero(&sfs, sizeof(struct statfs)); + struct user32_statfs sfs; + bzero(&sfs, sizeof(sfs)); sfs.f_flags = mp->mnt_flag & MNT_VISFLAGMASK; sfs.f_type = mp->mnt_vtable->vfc_typenum; @@ -3097,7 +3253,7 @@ sysctl_vfs_ctlbyfsid(__unused struct sysctl_oid *oidp, void *arg1, int arg2, * have to fudge the numbers here in that case. We inflate the blocksize in order * to reflect the filesystem size as best we can. */ - if (sp->f_blocks > LONG_MAX) { + if (sp->f_blocks > INT_MAX) { int shift; /* @@ -3110,27 +3266,27 @@ sysctl_vfs_ctlbyfsid(__unused struct sysctl_oid *oidp, void *arg1, int arg2, * being smaller than f_bsize. */ for (shift = 0; shift < 32; shift++) { - if ((sp->f_blocks >> shift) <= LONG_MAX) + if ((sp->f_blocks >> shift) <= INT_MAX) break; - if ((sp->f_bsize << (shift + 1)) > LONG_MAX) + if ((((long long)sp->f_bsize) << (shift + 1)) > INT_MAX) break; } -#define __SHIFT_OR_CLIP(x, s) ((((x) >> (s)) > LONG_MAX) ? LONG_MAX : ((x) >> (s))) - sfs.f_blocks = (long)__SHIFT_OR_CLIP(sp->f_blocks, shift); - sfs.f_bfree = (long)__SHIFT_OR_CLIP(sp->f_bfree, shift); - sfs.f_bavail = (long)__SHIFT_OR_CLIP(sp->f_bavail, shift); +#define __SHIFT_OR_CLIP(x, s) ((((x) >> (s)) > INT_MAX) ? INT_MAX : ((x) >> (s))) + sfs.f_blocks = (user32_long_t)__SHIFT_OR_CLIP(sp->f_blocks, shift); + sfs.f_bfree = (user32_long_t)__SHIFT_OR_CLIP(sp->f_bfree, shift); + sfs.f_bavail = (user32_long_t)__SHIFT_OR_CLIP(sp->f_bavail, shift); #undef __SHIFT_OR_CLIP - sfs.f_bsize = (long)(sp->f_bsize << shift); + sfs.f_bsize = (user32_long_t)(sp->f_bsize << shift); sfs.f_iosize = lmax(sp->f_iosize, sp->f_bsize); } else { - sfs.f_bsize = (long)sp->f_bsize; - sfs.f_iosize = (long)sp->f_iosize; - sfs.f_blocks = (long)sp->f_blocks; - sfs.f_bfree = (long)sp->f_bfree; - sfs.f_bavail = (long)sp->f_bavail; + sfs.f_bsize = (user32_long_t)sp->f_bsize; + sfs.f_iosize = (user32_long_t)sp->f_iosize; + sfs.f_blocks = (user32_long_t)sp->f_blocks; + sfs.f_bfree = (user32_long_t)sp->f_bfree; + sfs.f_bavail = (user32_long_t)sp->f_bavail; } - sfs.f_files = (long)sp->f_files; - sfs.f_ffree = (long)sp->f_ffree; + sfs.f_files = (user32_long_t)sp->f_files; + sfs.f_ffree = (user32_long_t)sp->f_ffree; sfs.f_fsid = sp->f_fsid; sfs.f_owner = sp->f_owner; @@ -3154,9 +3310,11 @@ sysctl_vfs_ctlbyfsid(__unused struct sysctl_oid *oidp, void *arg1, int arg2, static int filt_fsattach(struct knote *kn); static void filt_fsdetach(struct knote *kn); static int filt_fsevent(struct knote *kn, long hint); - -struct filterops fs_filtops = - { 0, filt_fsattach, filt_fsdetach, filt_fsevent }; +struct filterops fs_filtops = { + .f_attach = filt_fsattach, + .f_detach = filt_fsdetach, + .f_event = filt_fsevent, +}; static int filt_fsattach(struct knote *kn) @@ -3230,9 +3388,9 @@ sysctl_vfs_noremotehang(__unused struct sysctl_oid *oidp, } if (pid < 0) - OSBitAndAtomic(~((uint32_t)P_NOREMOTEHANG), (UInt32 *)&p->p_flag); + OSBitAndAtomic(~((uint32_t)P_NOREMOTEHANG), &p->p_flag); else - OSBitOrAtomic(P_NOREMOTEHANG, (UInt32 *)&p->p_flag); + OSBitOrAtomic(P_NOREMOTEHANG, &p->p_flag); proc_rele(p); return (0); @@ -3250,7 +3408,7 @@ SYSCTL_PROC(_vfs_generic, OID_AUTO, noremotehang, CTLFLAG_RW|CTLFLAG_ANYBODY, NULL, 0, sysctl_vfs_noremotehang, "I", "noremotehang"); -long num_reusedvnodes = 0; /* long for OSAddAtomic */ +long num_reusedvnodes = 0; static int new_vnode(vnode_t *vpp) @@ -3258,10 +3416,12 @@ new_vnode(vnode_t *vpp) vnode_t vp; int retries = 0; /* retry incase of tablefull */ int force_alloc = 0, walk_count = 0; - int vpid; + unsigned int vpid; struct timespec ts; struct timeval current_tv; +#ifndef __LP64__ struct unsafe_fsnode *l_unsafefs = 0; +#endif /* __LP64__ */ proc_t curproc = current_proc(); retry: @@ -3285,17 +3445,20 @@ new_vnode(vnode_t *vpp) if (numvnodes < desiredvnodes || force_alloc) { numvnodes++; vnode_list_unlock(); + MALLOC_ZONE(vp, struct vnode *, sizeof(*vp), M_VNODE, M_WAITOK); bzero((char *)vp, sizeof(*vp)); VLISTNONE(vp); /* avoid double queue removal */ lck_mtx_init(&vp->v_lock, vnode_lck_grp, vnode_lck_attr); + klist_init(&vp->v_knotes); nanouptime(&ts); vp->v_id = ts.tv_nsec; vp->v_flag = VSTANDARD; #if CONFIG_MACF - mac_vnode_label_init(vp); + if (mac_vnode_label_init_needed(vp)) + mac_vnode_label_init(vp); #endif /* MAC */ vp->v_iocount = 1; @@ -3309,8 +3472,8 @@ new_vnode(vnode_t *vpp) (current_tv.tv_sec - rage_tv.tv_sec) >= RAGE_TIME_LIMIT)) { TAILQ_FOREACH(vp, &vnode_rage_list, v_freelist) { - if ( !(vp->v_listflag & VLIST_RAGE) || !(vp->v_flag & VRAGE)) - panic("new_vnode: vp on RAGE list not marked both VLIST_RAGE and VRAGE"); + if ( !(vp->v_listflag & VLIST_RAGE)) + panic("new_vnode: vp (%p) on RAGE list not marked VLIST_RAGE", vp); // if we're a dependency-capable process, skip vnodes that can // cause recycling deadlocks. (i.e. this process is diskimages @@ -3406,6 +3569,7 @@ new_vnode(vnode_t *vpp) vnode_list_remove_locked(vp); vnode_list_unlock(); + vnode_lock_spin(vp); /* @@ -3448,29 +3612,32 @@ new_vnode(vnode_t *vpp) vnode_unlock(vp); goto retry; } - OSAddAtomic(1, &num_reusedvnodes); + OSAddAtomicLong(1, &num_reusedvnodes); /* Checks for anyone racing us for recycle */ if (vp->v_type != VBAD) { if (vp->v_lflag & VL_DEAD) - panic("new_vnode: the vnode is VL_DEAD but not VBAD"); + panic("new_vnode(%p): the vnode is VL_DEAD but not VBAD", vp); vnode_lock_convert(vp); (void)vnode_reclaim_internal(vp, 1, 1, 0); if ((VONLIST(vp))) - panic("new_vnode: vp on list "); + panic("new_vnode(%p): vp on list", vp); if (vp->v_usecount || vp->v_iocount || vp->v_kusecount || (vp->v_lflag & (VNAMED_UBC | VNAMED_MOUNT | VNAMED_FSHASH))) - panic("new_vnode: free vnode still referenced\n"); + panic("new_vnode(%p): free vnode still referenced", vp); if ((vp->v_mntvnodes.tqe_prev != 0) && (vp->v_mntvnodes.tqe_next != 0)) - panic("new_vnode: vnode seems to be on mount list "); + panic("new_vnode(%p): vnode seems to be on mount list", vp); if ( !LIST_EMPTY(&vp->v_nclinks) || !LIST_EMPTY(&vp->v_ncchildren)) - panic("new_vnode: vnode still hooked into the name cache"); + panic("new_vnode(%p): vnode still hooked into the name cache", vp); } + +#ifndef __LP64__ if (vp->v_unsafefs) { l_unsafefs = vp->v_unsafefs; vp->v_unsafefs = (struct unsafe_fsnode *)NULL; } +#endif /* __LP64__ */ #if CONFIG_MACF /* @@ -3482,7 +3649,11 @@ new_vnode(vnode_t *vpp) if (vp->v_lflag & VL_LABELED) { vnode_lock_convert(vp); mac_vnode_label_recycle(vp); + } else if (mac_vnode_label_init_needed(vp)) { + vnode_lock_convert(vp); + mac_vnode_label_init(vp); } + #endif /* MAC */ vp->v_iocount = 1; @@ -3497,10 +3668,13 @@ new_vnode(vnode_t *vpp) vnode_unlock(vp); +#ifndef __LP64__ if (l_unsafefs) { lck_mtx_destroy(&l_unsafefs->fsnodelock, vnode_lck_grp); FREE_ZONE((void *)l_unsafefs, sizeof(struct unsafe_fsnode), M_UNSAFEFS); } +#endif /* __LP64__ */ + done: *vpp = vp; @@ -3542,7 +3716,9 @@ vnode_get(struct vnode *vp) int vnode_get_locked(struct vnode *vp) { - +#if DIAGNOSTIC + lck_mtx_assert(&vp->v_lock, LCK_MTX_ASSERT_OWNED); +#endif if ((vp->v_iocount == 0) && (vp->v_lflag & (VL_TERMINATE | VL_DEAD))) { return(ENOENT); } @@ -3554,7 +3730,7 @@ vnode_get_locked(struct vnode *vp) } int -vnode_getwithvid(vnode_t vp, int vid) +vnode_getwithvid(vnode_t vp, uint32_t vid) { return(vget_internal(vp, vid, ( VNODE_NODEAD| VNODE_WITHID))); } @@ -3589,6 +3765,9 @@ vnode_put_locked(vnode_t vp) { vfs_context_t ctx = vfs_context_current(); /* hoist outside loop */ +#if DIAGNOSTIC + lck_mtx_assert(&vp->v_lock, LCK_MTX_ASSERT_OWNED); +#endif retry: if (vp->v_iocount < 1) panic("vnode_put(%p): iocount < 1", vp); @@ -3662,17 +3841,15 @@ vnode_isinuse_locked(vnode_t vp, int refcnt, int locked) errno_t vnode_resume(vnode_t vp) { + if ((vp->v_lflag & VL_SUSPENDED) && vp->v_owner == current_thread()) { - vnode_lock_spin(vp); - - if (vp->v_owner == current_thread()) { + vnode_lock_spin(vp); vp->v_lflag &= ~VL_SUSPENDED; vp->v_owner = NULL; vnode_unlock(vp); - wakeup(&vp->v_iocount); - } else - vnode_unlock(vp); + wakeup(&vp->v_iocount); + } return(0); } @@ -3736,7 +3913,7 @@ vnode_drain(vnode_t vp) #define UNAGE_THRESHHOLD 25 static errno_t -vnode_getiocount(vnode_t vp, int vid, int vflags) +vnode_getiocount(vnode_t vp, unsigned int vid, int vflags) { int nodead = vflags & VNODE_NODEAD; int nosusp = vflags & VNODE_NOSUSPEND; @@ -3769,6 +3946,7 @@ vnode_getiocount(vnode_t vp, int vid, int vflags) (vp->v_owner == current_thread())) { break; } + if (always != 0) break; vnode_lock_convert(vp); @@ -3804,10 +3982,8 @@ vnode_dropiocount (vnode_t vp) #ifdef JOE_DEBUG record_vp(vp, -1); #endif - if ((vp->v_lflag & (VL_DRAIN | VL_SUSPENDED)) && (vp->v_iocount <= 1)) { - vnode_lock_convert(vp); + if ((vp->v_lflag & (VL_DRAIN | VL_SUSPENDED)) && (vp->v_iocount <= 1)) wakeup(&vp->v_iocount); - } } @@ -3833,11 +4009,8 @@ vnode_reclaim_internal(struct vnode * vp, int locked, int reuse, int flags) vn_clearunionwait(vp, 1); - if (vnode_drain(vp)) { - panic("vnode drain failed"); - vnode_unlock(vp); - return; - } + vnode_drain(vp); + isfifo = (vp->v_type == VFIFO); if (vp->v_type != VBAD) @@ -3873,7 +4046,6 @@ vnode_reclaim_internal(struct vnode * vp, int locked, int reuse, int flags) vp->v_fifoinfo = NULL; FREE(fip, M_TEMP); } - vp->v_type = VBAD; if (vp->v_data) @@ -3893,6 +4065,11 @@ vnode_reclaim_internal(struct vnode * vp, int locked, int reuse, int flags) vp->v_lflag &= ~VL_DRAIN; vp->v_owner = NULL; + KNOTE(&vp->v_knotes, NOTE_REVOKE); + + /* Make sure that when we reuse the vnode, no knotes left over */ + klist_init(&vp->v_knotes); + if (vp->v_lflag & VL_TERMWANT) { vp->v_lflag &= ~VL_TERMWANT; wakeup(&vp->v_lflag); @@ -3909,10 +4086,13 @@ vnode_reclaim_internal(struct vnode * vp, int locked, int reuse, int flags) } /* USAGE: + * The following api creates a vnode and associates all the parameter specified in vnode_fsparam + * structure and returns a vnode handle with a reference. device aliasing is handled here so checkalias + * is obsoleted by this. * vnode_create(int flavor, size_t size, void * param, vnode_t *vp) */ int -vnode_create(int flavor, size_t size, void *data, vnode_t *vpp) +vnode_create(uint32_t flavor, uint32_t size, void *data, vnode_t *vpp) { int error; int insert = 1; @@ -4002,10 +4182,10 @@ vnode_create(int flavor, size_t size, void *data, vnode_t *vpp) bzero(fip, sizeof(struct fifoinfo )); vp->v_fifoinfo = fip; } - /* The file systems usually pass the address of the location where - * where there store the vnode pointer. When we add the vnode in mount - * point and name cache they are discoverable. So the file system node - * will have the connection to vnode setup by then + /* The file systems must pass the address of the location where + * they store the vnode pointer. When we add the vnode into the mount + * list and name cache they become discoverable. So the file system node + * must have the connection to vnode setup by then */ *vpp = vp; @@ -4019,21 +4199,20 @@ vnode_create(int flavor, size_t size, void *data, vnode_t *vpp) if (insert) { if ((vp->v_freelist.tqe_prev != (struct vnode **)0xdeadb)) panic("insmntque: vp on the free list\n"); - /* * enter in mount vnode list */ insmntque(vp, param->vnfs_mp); } -#ifdef INTERIM_FSNODE_LOCK - if (param->vnfs_mp->mnt_vtable->vfc_threadsafe == 0) { +#ifndef __LP64__ + if ((param->vnfs_mp->mnt_vtable->vfc_vfsflags & VFC_VFSTHREADSAFE) == 0) { MALLOC_ZONE(vp->v_unsafefs, struct unsafe_fsnode *, sizeof(struct unsafe_fsnode), M_UNSAFEFS, M_WAITOK); vp->v_unsafefs->fsnode_count = 0; vp->v_unsafefs->fsnodeowner = (void *)NULL; lck_mtx_init(&vp->v_unsafefs->fsnodelock, vnode_lck_grp, vnode_lck_attr); } -#endif /* INTERIM_FSNODE_LOCK */ +#endif /* __LP64__ */ } if (dvp && vnode_ref(dvp) == 0) { vp->v_parent = dvp; @@ -4043,10 +4222,13 @@ vnode_create(int flavor, size_t size, void *data, vnode_t *vpp) /* * enter into name cache * we've got the info to enter it into the name cache now + * cache_enter_create will pick up an extra reference on + * the name entered into the string cache */ - cache_enter(dvp, vp, cnp); - } - vp->v_name = vfs_addname(cnp->cn_nameptr, cnp->cn_namelen, cnp->cn_hash, 0); + vp->v_name = cache_enter_create(dvp, vp, cnp); + } else + vp->v_name = vfs_addname(cnp->cn_nameptr, cnp->cn_namelen, cnp->cn_hash, 0); + if ((cnp->cn_flags & UNIONCREATED) == UNIONCREATED) vp->v_flag |= VISUNION; } @@ -4251,13 +4433,22 @@ vfs_update_vfsstat(mount_t mp, vfs_context_t ctx, __unused int eventtype) return(0); } -void +int mount_list_add(mount_t mp) { + int res; + mount_list_lock(); - TAILQ_INSERT_TAIL(&mountlist, mp, mnt_list); - nummounts++; + if (system_inshutdown != 0) { + res = -1; + } else { + TAILQ_INSERT_TAIL(&mountlist, mp, mnt_list); + nummounts++; + res = 0; + } mount_list_unlock(); + + return res; } void @@ -4271,7 +4462,6 @@ mount_list_remove(mount_t mp) mount_list_unlock(); } -#if CONFIG_VOLFS mount_t mount_lookupby_volfsid(int volfs_id, int withref) { @@ -4305,8 +4495,6 @@ mount_lookupby_volfsid(int volfs_id, int withref) out: return(cur_mount); } -#endif - mount_t mount_list_lookupby_fsid(fsid_t *fsid, int locked, int withref) @@ -4337,7 +4525,7 @@ vnode_lookup(const char *path, int flags, vnode_t *vpp, vfs_context_t ctx) { struct nameidata nd; int error; - u_long ndflags = 0; + u_int32_t ndflags = 0; if (ctx == NULL) { /* XXX technically an error */ ctx = vfs_context_current(); @@ -4369,7 +4557,7 @@ vnode_open(const char *path, int fmode, int cmode, int flags, vnode_t *vpp, vfs_ { struct nameidata nd; int error; - u_long ndflags = 0; + u_int32_t ndflags = 0; int lflags = flags; if (ctx == NULL) { /* XXX technically an error */ @@ -4567,8 +4755,7 @@ vn_create(vnode_t dvp, vnode_t *vpp, struct componentname *cnp, struct vnode_att vp = *vpp; #if CONFIG_MACF if (!(flags & VN_CREATE_NOLABEL)) { - error = vnode_label(vnode_mount(vp), dvp, vp, cnp, - VNODE_LABEL_CREATE|VNODE_LABEL_NEEDREF, ctx); + error = vnode_label(vnode_mount(vp), dvp, vp, cnp, VNODE_LABEL_CREATE, ctx); if (error) goto error; } @@ -5317,13 +5504,16 @@ vnode_authorize_checkimmutable(vnode_t vp, struct vnode_attr *vap, int rights, i } } - /* check for file immutability */ + /* + * check for file immutability. first, check if the requested rights are + * allowable for a UF_APPEND file. + */ append = 0; if (vp->v_type == VDIR) { - if ((rights & (KAUTH_VNODE_ADD_FILE | KAUTH_VNODE_ADD_SUBDIRECTORY)) == rights) + if ((rights & (KAUTH_VNODE_ADD_FILE | KAUTH_VNODE_ADD_SUBDIRECTORY | KAUTH_VNODE_WRITE_EXTATTRIBUTES)) == rights) append = 1; } else { - if ((rights & KAUTH_VNODE_APPEND_DATA) == rights) + if ((rights & (KAUTH_VNODE_APPEND_DATA | KAUTH_VNODE_WRITE_EXTATTRIBUTES)) == rights) append = 1; } if ((error = vnode_immutable(vap, append, ignore)) != 0) { @@ -5436,7 +5626,9 @@ vnode_authorize_callback(kauth_cred_t cred, void *idata, kauth_action_t action, vfs_context_t ctx; vnode_t cvp = NULLVP; vnode_t vp, dvp; - int result; + int result = KAUTH_RESULT_DENY; + int parent_iocount = 0; + int parent_action; /* In case we need to use namedstream's data fork for cached rights*/ ctx = (vfs_context_t)arg0; vp = (vnode_t)arg1; @@ -5456,19 +5648,65 @@ vnode_authorize_callback(kauth_cred_t cred, void *idata, kauth_action_t action, */ if (dvp && vp) goto defer; - if (dvp) + if (dvp) { cvp = dvp; - else - cvp = vp; + } else { + /* + * For named streams on local-authorization volumes, rights are cached on the parent; + * authorization is determined by looking at the parent's properties anyway, so storing + * on the parent means that we don't recompute for the named stream and that if + * we need to flush rights (e.g. on VNOP_SETATTR()) we don't need to track down the + * stream to flush its cache separately. If we miss in the cache, then we authorize + * as if there were no cached rights (passing the named stream vnode and desired rights to + * vnode_authorize_callback_int()). + * + * On an opaquely authorized volume, we don't know the relationship between the + * data fork's properties and the rights granted on a stream. Thus, named stream vnodes + * on such a volume are authorized directly (rather than using the parent) and have their + * own caches. When a named stream vnode is created, we mark the parent as having a named + * stream. On a VNOP_SETATTR() for the parent that may invalidate cached authorization, we + * find the stream and flush its cache. + */ + if (vnode_isnamedstream(vp) && (!vfs_authopaque(vp->v_mount))) { + cvp = vp->v_parent; + if ((cvp != NULLVP) && (vnode_getwithref(cvp) == 0)) { + parent_iocount = 1; + } else { + cvp = NULL; + goto defer; /* If we can't use the parent, take the slow path */ + } - if (vnode_cache_is_authorized(cvp, ctx, action) == TRUE) - return KAUTH_RESULT_ALLOW; + /* Have to translate some actions */ + parent_action = action; + if (parent_action & KAUTH_VNODE_READ_DATA) { + parent_action &= ~KAUTH_VNODE_READ_DATA; + parent_action |= KAUTH_VNODE_READ_EXTATTRIBUTES; + } + if (parent_action & KAUTH_VNODE_WRITE_DATA) { + parent_action &= ~KAUTH_VNODE_WRITE_DATA; + parent_action |= KAUTH_VNODE_WRITE_EXTATTRIBUTES; + } + + } else { + cvp = vp; + } + } + + if (vnode_cache_is_authorized(cvp, ctx, parent_iocount ? parent_action : action) == TRUE) { + result = KAUTH_RESULT_ALLOW; + goto out; + } defer: result = vnode_authorize_callback_int(cred, idata, action, arg0, arg1, arg2, arg3); if (result == KAUTH_RESULT_ALLOW && cvp != NULLVP) vnode_cache_authorized_action(cvp, ctx, action); +out: + if (parent_iocount) { + vnode_put(cvp); + } + return result; } @@ -5616,7 +5854,7 @@ vnode_authorize_callback_int(__unused kauth_cred_t unused_cred, __unused void *i * If the vnode is an extended attribute data vnode (eg. a resource fork), *_DATA becomes * *_EXTATTRIBUTES. */ - if (S_ISXATTR(va.va_mode) || vnode_isnamedstream(vp)) { + if (vnode_isnamedstream(vp)) { if (rights & KAUTH_VNODE_READ_DATA) { rights &= ~KAUTH_VNODE_READ_DATA; rights |= KAUTH_VNODE_READ_EXTATTRIBUTES; @@ -6300,12 +6538,30 @@ vnode_authattr(vnode_t vp, struct vnode_attr *vap, kauth_action_t *actionp, vfs_ */ if (VATTR_IS_ACTIVE(vap, va_uuuid)) { /* if the owner UUID is not actually changing ... */ - if (VATTR_IS_SUPPORTED(&ova, va_uuuid) && kauth_guid_equal(&vap->va_uuuid, &ova.va_uuuid)) - goto no_uuuid_change; + if (VATTR_IS_SUPPORTED(&ova, va_uuuid)) { + if (kauth_guid_equal(&vap->va_uuuid, &ova.va_uuuid)) + goto no_uuuid_change; + + /* + * If the current owner UUID is a null GUID, check + * it against the UUID corresponding to the owner UID. + */ + if (kauth_guid_equal(&ova.va_uuuid, &kauth_null_guid) && + VATTR_IS_SUPPORTED(&ova, va_uid)) { + guid_t uid_guid; + + if (kauth_cred_uid2guid(ova.va_uid, &uid_guid) == 0 && + kauth_guid_equal(&vap->va_uuuid, &uid_guid)) + goto no_uuuid_change; + } + } /* * The owner UUID cannot be set by a non-superuser to anything other than - * their own. + * their own or a null GUID (to "unset" the owner UUID). + * Note that file systems must be prepared to handle the + * null UUID case in a manner appropriate for that file + * system. */ if (!has_priv_suser) { if ((error = kauth_cred_getguid(cred, &changer)) != 0) { @@ -6313,8 +6569,9 @@ vnode_authattr(vnode_t vp, struct vnode_attr *vap, kauth_action_t *actionp, vfs_ /* XXX ENOENT here - no UUID - should perhaps become EPERM */ goto out; } - if (!kauth_guid_equal(&vap->va_uuuid, &changer)) { - KAUTH_DEBUG(" ERROR - cannot set supplied owner UUID - not us"); + if (!kauth_guid_equal(&vap->va_uuuid, &changer) && + !kauth_guid_equal(&vap->va_uuuid, &kauth_null_guid)) { + KAUTH_DEBUG(" ERROR - cannot set supplied owner UUID - not us / null"); error = EPERM; goto out; } @@ -6328,20 +6585,41 @@ vnode_authattr(vnode_t vp, struct vnode_attr *vap, kauth_action_t *actionp, vfs_ */ if (VATTR_IS_ACTIVE(vap, va_guuid)) { /* if the group UUID is not actually changing ... */ - if (VATTR_IS_SUPPORTED(&ova, va_guuid) && kauth_guid_equal(&vap->va_guuid, &ova.va_guuid)) - goto no_guuid_change; + if (VATTR_IS_SUPPORTED(&ova, va_guuid)) { + if (kauth_guid_equal(&vap->va_guuid, &ova.va_guuid)) + goto no_guuid_change; + + /* + * If the current group UUID is a null UUID, check + * it against the UUID corresponding to the group GID. + */ + if (kauth_guid_equal(&ova.va_guuid, &kauth_null_guid) && + VATTR_IS_SUPPORTED(&ova, va_gid)) { + guid_t gid_guid; + + if (kauth_cred_gid2guid(ova.va_gid, &gid_guid) == 0 && + kauth_guid_equal(&vap->va_guuid, &gid_guid)) + goto no_guuid_change; + } + } /* * The group UUID cannot be set by a non-superuser to anything other than - * one of which they are a member. + * one of which they are a member or a null GUID (to "unset" + * the group UUID). + * Note that file systems must be prepared to handle the + * null UUID case in a manner appropriate for that file + * system. */ if (!has_priv_suser) { - if ((error = kauth_cred_ismember_guid(cred, &vap->va_guuid, &ismember)) != 0) { + if (kauth_guid_equal(&vap->va_guuid, &kauth_null_guid)) + ismember = 1; + else if ((error = kauth_cred_ismember_guid(cred, &vap->va_guuid, &ismember)) != 0) { KAUTH_DEBUG(" ERROR - got %d trying to check group membership", error); goto out; } if (!ismember) { - KAUTH_DEBUG(" ERROR - cannot create item with supplied group UUID - not a member"); + KAUTH_DEBUG(" ERROR - cannot set supplied group UUID - not a member / null"); error = EPERM; goto out; } @@ -6470,7 +6748,7 @@ vn_setunionwait(vnode_t vp) void vn_checkunionwait(vnode_t vp) { - vnode_lock(vp); + vnode_lock_spin(vp); while ((vp->v_flag & VISUNION) == VISUNION) msleep((caddr_t)&vp->v_flag, &vp->v_lock, 0, 0, 0); vnode_unlock(vp); @@ -6480,7 +6758,7 @@ void vn_clearunionwait(vnode_t vp, int locked) { if (!locked) - vnode_lock(vp); + vnode_lock_spin(vp); if((vp->v_flag & VISUNION) == VISUNION) { vp->v_flag &= ~VISUNION; wakeup((caddr_t)&vp->v_flag); @@ -6622,7 +6900,9 @@ errno_t rmdir_remove_orphaned_appleDouble(vnode_t vp , vfs_context_t ctx, int * } while (!eofflag); /* - * If we've made it here all the files in the dir are AppleDouble + * If we've made it here all the files in the dir are ._ files. + * As we iterate through to delete them, we will verify that + * they are true AppleDouble files. * We can delete the files even though the node is suspended * because we are the owner of the file. */ @@ -6662,11 +6942,62 @@ errno_t rmdir_remove_orphaned_appleDouble(vnode_t vp , vfs_context_t ctx, int * !((dp->d_namlen == 1 && dp->d_name[0] == '.') || (dp->d_namlen == 2 && dp->d_name[0] == '.' && dp->d_name[1] == '.')) ) { - NDINIT(&nd_temp, DELETE, USEDVP, UIO_SYSSPACE, CAST_USER_ADDR_T(dp->d_name), ctx); + + /* + * This is a ._ file, so verify it is actually an AppleDouble + * file by checking the header before we remove it. + */ + vnode_t xvp = NULL; + int did_namei = 0; + + NDINIT(&nd_temp, DELETE, USEDVP | LOCKPARENT, + UIO_SYSSPACE, CAST_USER_ADDR_T(dp->d_name), ctx); nd_temp.ni_dvp = vp; - error = unlink1(ctx, &nd_temp, 0); - if(error && error != ENOENT) + error = namei(&nd_temp); + + if (error) { + if (error == ENOENT) { + error = 0; + } else { + error = ENOTEMPTY; + } + goto out1; + } + did_namei = 1; + + xvp = nd_temp.ni_vp; + + error = check_appledouble_header(xvp, ctx); + if (error) { + error = ENOTEMPTY; + goto out1; + } + + /* Remove the file. */ + error = VNOP_REMOVE(vp, xvp, &nd_temp.ni_cnd, 0, ctx); + if (error) { + if (error == ENOENT) { + error = 0; + } + goto out1; + } + +out1: + /* drop extra reference on vp from LOCKPARENT namei */ + vnode_put (vp); + + if (did_namei) { + nameidone(&nd_temp); + did_namei = 0; + } + if (xvp) { + vnode_put(xvp); + xvp = NULL; + } + if (error) { goto outsc; + } + } cpos += dp->d_reclen; dp = (struct dirent*)cpos; @@ -6708,9 +7039,19 @@ errno_t rmdir_remove_orphaned_appleDouble(vnode_t vp , vfs_context_t ctx, int * } -#ifdef JOE_DEBUG +void +lock_vnode_and_post(vnode_t vp, int kevent_num) +{ + /* Only take the lock if there's something there! */ + if (vp->v_knotes.slh_first != NULL) { + vnode_lock(vp); + KNOTE(&vp->v_knotes, kevent_num); + vnode_unlock(vp); + } +} -record_vp(vnode_t vp, int count) { +#ifdef JOE_DEBUG +static void record_vp(vnode_t vp, int count) { struct uthread *ut; int i; diff --git a/bsd/vfs/vfs_support.c b/bsd/vfs/vfs_support.c index 3d20ffa7d..ed146fa8f 100644 --- a/bsd/vfs/vfs_support.c +++ b/bsd/vfs/vfs_support.c @@ -540,16 +540,17 @@ err_readdir(__unused struct vnop_readdir_args *ap) struct vnop_readdirattr_args /* { - struct vnode *a_vp; + struct vnodeop_desc *a_desc; + vnode_t a_vp; struct attrlist *a_alist; struct uio *a_uio; u_long a_maxcount; - u_long a_options; - int *a_newstate; + u_long a_options; + u_long *a_newstate; int *a_eofflag; - u_long *a_actualcount; + u_long *a_actualcount; vfs_context_t a_context; -} */; +} */ ; int nop_readdirattr(struct vnop_readdirattr_args *ap) @@ -643,7 +644,7 @@ err_strategy(__unused struct vnop_strategy_args *ap) struct vnop_pathconf_args /* { struct vnode *a_vp; int a_name; - register_t *a_retval; + int32_t *a_retval; vfs_context_t a_context; } */; diff --git a/bsd/vfs/vfs_syscalls.c b/bsd/vfs/vfs_syscalls.c index ee31d2c82..23653799f 100644 --- a/bsd/vfs/vfs_syscalls.c +++ b/bsd/vfs/vfs_syscalls.c @@ -94,6 +94,8 @@ #include #include #include +#include +#include #include #include #include @@ -101,7 +103,7 @@ #include #include -#include +#include #include #include @@ -111,6 +113,7 @@ #include #include +#include #if CONFIG_MACF #include @@ -147,14 +150,16 @@ static int sync_callback(mount_t, void *); static int munge_statfs(struct mount *mp, struct vfsstatfs *sfsp, user_addr_t bufp, int *sizep, boolean_t is_64_bit, boolean_t partial_copy); -static int statfs64_common(struct mount *mp, struct vfsstatfs *sfsp, user_addr_t bufp); +static int statfs64_common(struct mount *mp, struct vfsstatfs *sfsp, + user_addr_t bufp); +static int fsync_common(proc_t p, struct fsync_args *uap, int flags); int (*union_dircheckp)(struct vnode **, struct fileproc *, vfs_context_t); __private_extern__ int sync_internal(void); __private_extern__ -int open1(vfs_context_t, struct nameidata *, int, struct vnode_attr *, register_t *); +int open1(vfs_context_t, struct nameidata *, int, struct vnode_attr *, int32_t *); __private_extern__ int unlink1(vfs_context_t, struct nameidata *, int); @@ -172,17 +177,17 @@ struct lstatv_args { struct mkcomplex_args { const char *path; /* pathname of the file to be created */ mode_t mode; /* access mode for the newly created file */ - u_long type; /* format of the complex file */ + u_int32_t type; /* format of the complex file */ }; struct statv_args { const char *path; /* pathname of the target file */ struct vstat *vsb; /* vstat structure for returned info */ }; -int fstatv(proc_t p, struct fstatv_args *uap, register_t *retval); -int lstatv(proc_t p, struct lstatv_args *uap, register_t *retval); -int mkcomplex(proc_t p, struct mkcomplex_args *uap, register_t *retval); -int statv(proc_t p, struct statv_args *uap, register_t *retval); +int fstatv(proc_t p, struct fstatv_args *uap, int32_t *retval); +int lstatv(proc_t p, struct lstatv_args *uap, int32_t *retval); +int mkcomplex(proc_t p, struct mkcomplex_args *uap, int32_t *retval); +int statv(proc_t p, struct statv_args *uap, int32_t *retval); #endif /* __APPLE_API_OBSOLETE */ @@ -191,7 +196,7 @@ int statv(proc_t p, struct statv_args *uap, register_t *retval); * used to invalidate the cached value of the rootvp in the * mount structure utilized by cache_lookup_path */ -int mount_generation = 0; +uint32_t mount_generation = 0; /* counts number of mount and unmount operations */ unsigned int vfs_nummntops=0; @@ -209,7 +214,7 @@ extern errno_t rmdir_remove_orphaned_appleDouble(vnode_t, vfs_context_t, int *); */ /* ARGSUSED */ int -mount(proc_t p, struct mount_args *uap, __unused register_t *retval) +mount(proc_t p, struct mount_args *uap, __unused int32_t *retval) { struct __mac_mount_args muap; @@ -221,10 +226,29 @@ mount(proc_t p, struct mount_args *uap, __unused register_t *retval) return (__mac_mount(p, &muap, retval)); } +/* + * __mac_mount: + * Mount a file system taking into account MAC label behavior. + * See mount(2) man page for more information + * + * Parameters: p Process requesting the mount + * uap User argument descriptor (see below) + * retval (ignored) + * + * Indirect: uap->type Filesystem type + * uap->path Path to mount + * uap->data Mount arguments + * uap->mac_p MAC info + * uap->flags Mount flags + * + * + * Returns: 0 Success + * !0 Not success + */ int -__mac_mount(struct proc *p, register struct __mac_mount_args *uap, __unused register_t *retval) +__mac_mount(struct proc *p, register struct __mac_mount_args *uap, __unused int32_t *retval) { - struct vnode *vp; + struct vnode *vp, *pvp; struct vnode *devvp = NULLVP; struct vnode *device_vnode = NULLVP; #if CONFIG_MACF @@ -243,9 +267,12 @@ __mac_mount(struct proc *p, register struct __mac_mount_args *uap, __unused regi user_addr_t fsmountargs = uap->data; int ronly = 0; int mntalloc = 0; + boolean_t vfsp_ref = FALSE; mode_t accessmode; boolean_t is_64bit; boolean_t is_rwlock_locked = FALSE; + boolean_t did_rele = FALSE; + boolean_t have_usecount = FALSE; AUDIT_ARG(fflags, uap->flags); @@ -254,12 +281,13 @@ __mac_mount(struct proc *p, register struct __mac_mount_args *uap, __unused regi /* * Get vnode to be covered */ - NDINIT(&nd, LOOKUP, NOTRIGGER | FOLLOW | AUDITVNPATH1, + NDINIT(&nd, LOOKUP, NOTRIGGER | FOLLOW | AUDITVNPATH1 | WANTPARENT, UIO_USERSPACE, uap->path, ctx); error = namei(&nd); if (error) return (error); vp = nd.ni_vp; + pvp = nd.ni_dvp; if ((vp->v_flag & VROOT) && (vp->v_mount->mnt_flag & MNT_ROOTFS)) @@ -277,7 +305,7 @@ __mac_mount(struct proc *p, register struct __mac_mount_args *uap, __unused regi mp = vp->v_mount; /* unmount in progress return error */ - mount_lock(mp); + mount_lock_spin(mp); if (mp->mnt_lflag & MNT_LUNMOUNT) { mount_unlock(mp); error = EBUSY; @@ -362,8 +390,11 @@ __mac_mount(struct proc *p, register struct __mac_mount_args *uap, __unused regi AUDIT_ARG(text, fstypename); mount_list_lock(); for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next) - if (!strncmp(vfsp->vfc_name, fstypename, MFSNAMELEN)) + if (!strncmp(vfsp->vfc_name, fstypename, MFSNAMELEN)) { + vfsp->vfc_refcount++; + vfsp_ref = TRUE; break; + } mount_list_unlock(); if (vfsp == NULL) { error = ENODEV; @@ -386,9 +417,9 @@ __mac_mount(struct proc *p, register struct __mac_mount_args *uap, __unused regi /* * Allocate and initialize the filesystem. */ - MALLOC_ZONE(mp, struct mount *, (u_long)sizeof(struct mount), + MALLOC_ZONE(mp, struct mount *, (u_int32_t)sizeof(struct mount), M_MOUNT, M_WAITOK); - bzero((char *)mp, (u_long)sizeof(struct mount)); + bzero((char *)mp, (u_int32_t)sizeof(struct mount)); mntalloc = 1; /* Initialize the default IO constraints */ @@ -398,6 +429,8 @@ __mac_mount(struct proc *p, register struct __mac_mount_args *uap, __unused regi mp->mnt_maxsegwritesize = mp->mnt_maxwritecnt; mp->mnt_devblocksize = DEV_BSIZE; mp->mnt_alignmentmask = PAGE_MASK; + mp->mnt_ioqueue_depth = MNT_DEFAULT_IOQUEUE_DEPTH; + mp->mnt_ioscale = 1; mp->mnt_ioflags = 0; mp->mnt_realrootvp = NULLVP; mp->mnt_authcache_ttl = CACHED_LOOKUP_RIGHT_TTL; @@ -410,9 +443,6 @@ __mac_mount(struct proc *p, register struct __mac_mount_args *uap, __unused regi is_rwlock_locked = TRUE; mp->mnt_op = vfsp->vfc_vfsops; mp->mnt_vtable = vfsp; - mount_list_lock(); - vfsp->vfc_refcount++; - mount_list_unlock(); //mp->mnt_stat.f_type = vfsp->vfc_typenum; mp->mnt_flag |= vfsp->vfc_flags & MNT_VISFLAGMASK; strncpy(mp->mnt_vfsstat.f_fstypename, vfsp->vfc_name, MFSTYPENAMELEN); @@ -457,7 +487,7 @@ __mac_mount(struct proc *p, register struct __mac_mount_args *uap, __unused regi goto out1; fsmountargs += sizeof(devpath); } else { - char *tmp; + user32_addr_t tmp; if ( (error = copyin(fsmountargs, (caddr_t)&tmp, sizeof(tmp))) ) goto out1; /* munge into LP64 addr */ @@ -534,15 +564,38 @@ __mac_mount(struct proc *p, register struct __mac_mount_args *uap, __unused regi device_vnode = devvp; } else { if ((mp->mnt_flag & MNT_RDONLY) && (mp->mnt_kern_flag & MNTK_WANTRDWR)) { + dev_t dev; + int maj; /* * If upgrade to read-write by non-root, then verify * that user has necessary permissions on the device. */ device_vnode = mp->mnt_devvp; - if (device_vnode && suser(vfs_context_ucred(ctx), NULL)) { - if ((error = vnode_authorize(device_vnode, NULL, - KAUTH_VNODE_READ_DATA | KAUTH_VNODE_WRITE_DATA, ctx)) != 0) + + if (device_vnode) { + vnode_getalways(device_vnode); + + if (suser(vfs_context_ucred(ctx), NULL)) { + if ((error = vnode_authorize(device_vnode, NULL, + KAUTH_VNODE_READ_DATA | KAUTH_VNODE_WRITE_DATA, ctx)) != 0) { + vnode_put(device_vnode); + goto out2; + } + } + + /* Tell the device that we're upgrading */ + dev = (dev_t)device_vnode->v_rdev; + maj = major(dev); + + if ((u_int)maj >= (u_int)nblkdev) + panic("Volume mounted on a device with invalid major number.\n"); + + error = bdevsw[maj].d_open(dev, FREAD | FWRITE, S_IFBLK, p); + + vnode_put(device_vnode); + if (error != 0) { goto out2; + } } } device_vnode = NULLVP; @@ -629,9 +682,12 @@ __mac_mount(struct proc *p, register struct __mac_mount_args *uap, __unused regi printf("%s() VFS_ROOT returned %d\n", __func__, error); goto out3; } - - /* VFS_ROOT provides reference so needref = 0 */ error = vnode_label(mp, NULL, rvp, NULL, 0, ctx); + /* + * drop reference provided by VFS_ROOT + */ + vnode_put(rvp); + if (error) goto out3; } @@ -654,7 +710,12 @@ __mac_mount(struct proc *p, register struct __mac_mount_args *uap, __unused regi mount_generation++; name_cache_unlock(); - vnode_ref(vp); + error = vnode_ref(vp); + if (error != 0) { + goto out4; + } + + have_usecount = TRUE; error = checkdirs(vp, ctx); if (error != 0) { @@ -667,7 +728,11 @@ __mac_mount(struct proc *p, register struct __mac_mount_args *uap, __unused regi */ (void)VFS_START(mp, 0, ctx); - mount_list_add(mp); + error = mount_list_add(mp); + if (error != 0) { + goto out4; + } + lck_rw_done(&mp->mnt_rwlock); is_rwlock_locked = FALSE; @@ -704,7 +769,7 @@ __mac_mount(struct proc *p, register struct __mac_mount_args *uap, __unused regi mp->mnt_kern_flag |= MNTK_UNMOUNT_PREFLIGHT; } /* increment the operations count */ - OSAddAtomic(1, (SInt32 *)&vfs_nummntops); + OSAddAtomic(1, &vfs_nummntops); enablequotas(mp, ctx); if (device_vnode) { @@ -730,8 +795,8 @@ __mac_mount(struct proc *p, register struct __mac_mount_args *uap, __unused regi mount_list_unlock(); if (device_vnode ) { - VNOP_CLOSE(device_vnode, ronly ? FREAD : FREAD|FWRITE, ctx); vnode_rele(device_vnode); + VNOP_CLOSE(device_vnode, ronly ? FREAD : FREAD|FWRITE, ctx); } lck_rw_done(&mp->mnt_rwlock); is_rwlock_locked = FALSE; @@ -751,20 +816,29 @@ __mac_mount(struct proc *p, register struct __mac_mount_args *uap, __unused regi vnode_put(devvp); vnode_put(vp); + /* Note that we've changed something in the parent directory */ + post_event_if_success(pvp, error, NOTE_WRITE); + vnode_put(pvp); + return(error); + out4: (void)VFS_UNMOUNT(mp, MNT_FORCE, ctx); if (device_vnode != NULLVP) { + vnode_rele(device_vnode); VNOP_CLOSE(device_vnode, mp->mnt_flag & MNT_RDONLY ? FREAD : FREAD|FWRITE, ctx); - + did_rele = TRUE; } vnode_lock_spin(vp); vp->v_mountedhere = (mount_t) 0; vnode_unlock(vp); - vnode_rele(vp); + + if (have_usecount) { + vnode_rele(vp); + } out3: - if (devpath && ((uap->flags & MNT_UPDATE) == 0)) + if (devpath && ((uap->flags & MNT_UPDATE) == 0) && (!did_rele)) vnode_rele(devvp); out2: if (devpath && devvp) @@ -778,12 +852,16 @@ __mac_mount(struct proc *p, register struct __mac_mount_args *uap, __unused regi #if CONFIG_MACF mac_mount_label_destroy(mp); #endif + FREE_ZONE((caddr_t)mp, sizeof (struct mount), M_MOUNT); + } + + if (vfsp_ref) { mount_list_lock(); vfsp->vfc_refcount--; mount_list_unlock(); - FREE_ZONE((caddr_t)mp, sizeof (struct mount), M_MOUNT); } vnode_put(vp); + vnode_put(pvp); nameidone(&nd); return(error); @@ -800,17 +878,16 @@ enablequotas(struct mount *mp, vfs_context_t ctx) const char *qfextension[] = INITQFNAMES; /* XXX Shoulkd be an MNTK_ flag, instead of strncmp()'s */ - if ((strncmp(mp->mnt_vfsstat.f_fstypename, "hfs", sizeof("hfs")) != 0 ) - && (strncmp( mp->mnt_vfsstat.f_fstypename, "ufs", sizeof("ufs")) != 0)) - return; - + if (strncmp(mp->mnt_vfsstat.f_fstypename, "hfs", sizeof("hfs")) != 0 ) { + return; + } /* * Enable filesystem disk quotas if necessary. * We ignore errors as this should not interfere with final mount */ for (type=0; type < MAXQUOTAS; type++) { snprintf(qfpath, sizeof(qfpath), "%s/%s.%s", mp->mnt_vfsstat.f_mntonname, qfopsname, qfextension[type]); - NDINIT(&qnd, LOOKUP, FOLLOW, UIO_SYSSPACE32, CAST_USER_ADDR_T(qfpath), ctx); + NDINIT(&qnd, LOOKUP, FOLLOW, UIO_SYSSPACE, CAST_USER_ADDR_T(qfpath), ctx); if (namei(&qnd) != 0) continue; /* option file to trigger quotas is not present */ vnode_put(qnd.ni_vp); @@ -930,7 +1007,7 @@ checkdirs(vnode_t olddp, vfs_context_t ctx) */ /* ARGSUSED */ int -unmount(__unused proc_t p, struct unmount_args *uap, __unused register_t *retval) +unmount(__unused proc_t p, struct unmount_args *uap, __unused int32_t *retval) { vnode_t vp; struct mount *mp; @@ -1120,16 +1197,16 @@ dounmount(struct mount *mp, int flags, int withref, vfs_context_t ctx) /* increment the operations count */ if (!error) - OSAddAtomic(1, (SInt32 *)&vfs_nummntops); + OSAddAtomic(1, &vfs_nummntops); if ( mp->mnt_devvp && mp->mnt_vtable->vfc_vfsflags & VFC_VFSLOCALARGS) { /* hold an io reference and drop the usecount before close */ devvp = mp->mnt_devvp; - vnode_clearmountedon(devvp); vnode_getalways(devvp); vnode_rele(devvp); VNOP_CLOSE(devvp, mp->mnt_flag & MNT_RDONLY ? FREAD : FREAD|FWRITE, ctx); + vnode_clearmountedon(devvp); vnode_put(devvp); } lck_rw_done(&mp->mnt_rwlock); @@ -1178,7 +1255,10 @@ dounmount(struct mount *mp, int flags, int withref, vfs_context_t ctx) wakeup((caddr_t)mp); if (!error) { if ((coveredvp != NULLVP)) { + vnode_t pvp; + vnode_getwithref(coveredvp); + pvp = vnode_getparent(coveredvp); vnode_rele(coveredvp); vnode_lock_spin(coveredvp); if(mp->mnt_crossref == 0) { @@ -1193,6 +1273,11 @@ dounmount(struct mount *mp, int flags, int withref, vfs_context_t ctx) vnode_unlock(coveredvp); } vnode_put(coveredvp); + + if (pvp) { + lock_vnode_and_post(pvp, NOTE_WRITE); + vnode_put(pvp); + } } else if (mp->mnt_flag & MNT_ROOTFS) { mount_lock_destroy(mp); #if CONFIG_MACF @@ -1241,14 +1326,14 @@ struct ctldebug debug0 = { "syncprt", &syncprt }; int print_vmpage_stat=0; static int -sync_callback(mount_t mp, __unused void * arg) +sync_callback(mount_t mp, void * arg) { int asyncflag; if ((mp->mnt_flag & MNT_RDONLY) == 0) { asyncflag = mp->mnt_flag & MNT_ASYNC; mp->mnt_flag &= ~MNT_ASYNC; - VFS_SYNC(mp, MNT_NOWAIT, vfs_context_current()); + VFS_SYNC(mp, arg ? MNT_WAIT : MNT_NOWAIT, vfs_context_current()); if (asyncflag) mp->mnt_flag |= MNT_ASYNC; } @@ -1256,20 +1341,29 @@ sync_callback(mount_t mp, __unused void * arg) } -extern unsigned int vp_pagein, vp_pgodirty, vp_pgoclean; -extern unsigned int dp_pgins, dp_pgouts; +#include + +clock_sec_t sync_wait_time = 0; /* ARGSUSED */ int -sync(__unused proc_t p, __unused struct sync_args *uap, __unused register_t *retval) +sync(__unused proc_t p, __unused struct sync_args *uap, __unused int32_t *retval) { + clock_nsec_t nsecs; vfs_iterate(LK_NOWAIT, sync_callback, (void *)0); + + { + static fsid_t fsid = { { 0, 0 } }; + + clock_get_calendar_microtime(&sync_wait_time, &nsecs); + vfs_event_signal(&fsid, VQ_SYNCEVENT, (intptr_t)NULL); + wakeup((caddr_t)&sync_wait_time); + } + { if(print_vmpage_stat) { vm_countdirtypages(); - printf("VP: %d: %d: %d: %d: %d\n", vp_pgodirty, vp_pgoclean, vp_pagein, - dp_pgins, dp_pgouts); } } #if DIAGNOSTIC @@ -1283,10 +1377,10 @@ sync(__unused proc_t p, __unused struct sync_args *uap, __unused register_t *ret * Change filesystem quotas. */ #if QUOTA -static int quotactl_funneled(proc_t p, struct quotactl_args *uap, register_t *retval); +static int quotactl_funneled(proc_t p, struct quotactl_args *uap, int32_t *retval); int -quotactl(proc_t p, struct quotactl_args *uap, register_t *retval) +quotactl(proc_t p, struct quotactl_args *uap, int32_t *retval) { boolean_t funnel_state; int error; @@ -1298,7 +1392,7 @@ quotactl(proc_t p, struct quotactl_args *uap, register_t *retval) } static int -quotactl_funneled(proc_t p, struct quotactl_args *uap, __unused register_t *retval) +quotactl_funneled(proc_t p, struct quotactl_args *uap, __unused int32_t *retval) { struct mount *mp; int error, quota_cmd, quota_status; @@ -1308,7 +1402,7 @@ quotactl_funneled(proc_t p, struct quotactl_args *uap, __unused register_t *retv vfs_context_t ctx = vfs_context_current(); struct dqblk my_dqblk; - AUDIT_ARG(uid, uap->uid, 0, 0, 0); + AUDIT_ARG(uid, uap->uid); AUDIT_ARG(cmd, uap->cmd); NDINIT(&nd, LOOKUP, FOLLOW | AUDITVNPATH1, UIO_USERSPACE, uap->path, ctx); @@ -1393,7 +1487,7 @@ quotactl_funneled(proc_t p, struct quotactl_args *uap, __unused register_t *retv } #else int -quotactl(__unused proc_t p, __unused struct quotactl_args *uap, __unused register_t *retval) +quotactl(__unused proc_t p, __unused struct quotactl_args *uap, __unused int32_t *retval) { return (EOPNOTSUPP); } @@ -1409,7 +1503,7 @@ quotactl(__unused proc_t p, __unused struct quotactl_args *uap, __unused registe */ /* ARGSUSED */ int -statfs(__unused proc_t p, struct statfs_args *uap, __unused register_t *retval) +statfs(__unused proc_t p, struct statfs_args *uap, __unused int32_t *retval) { struct mount *mp; struct vfsstatfs *sp; @@ -1442,7 +1536,7 @@ statfs(__unused proc_t p, struct statfs_args *uap, __unused register_t *retval) */ /* ARGSUSED */ int -fstatfs(__unused proc_t p, struct fstatfs_args *uap, __unused register_t *retval) +fstatfs(__unused proc_t p, struct fstatfs_args *uap, __unused int32_t *retval) { vnode_t vp; struct mount *mp; @@ -1509,7 +1603,7 @@ statfs64_common(struct mount *mp, struct vfsstatfs *sfsp, user_addr_t bufp) * Get file system statistics in 64-bit mode */ int -statfs64(__unused struct proc *p, struct statfs64_args *uap, __unused register_t *retval) +statfs64(__unused struct proc *p, struct statfs64_args *uap, __unused int32_t *retval) { struct mount *mp; struct vfsstatfs *sp; @@ -1542,7 +1636,7 @@ statfs64(__unused struct proc *p, struct statfs64_args *uap, __unused register_t * Get file system statistics in 64-bit mode */ int -fstatfs64(__unused struct proc *p, struct fstatfs64_args *uap, __unused register_t *retval) +fstatfs64(__unused struct proc *p, struct fstatfs64_args *uap, __unused int32_t *retval) { struct vnode *vp; struct mount *mp; @@ -1596,9 +1690,9 @@ getfsstat_callback(mount_t mp, void * arg) sp = &mp->mnt_vfsstat; /* * If MNT_NOWAIT is specified, do not refresh the - * fsstat cache. MNT_WAIT overrides MNT_NOWAIT. + * fsstat cache. MNT_WAIT/MNT_DWAIT overrides MNT_NOWAIT. */ - if (((fstp->flags & MNT_NOWAIT) == 0 || (fstp->flags & MNT_WAIT)) && + if (((fstp->flags & MNT_NOWAIT) == 0 || (fstp->flags & (MNT_WAIT | MNT_DWAIT))) && (error = vfs_update_vfsstat(mp, ctx, VFS_USER_EVENT))) { KAUTH_DEBUG("vfs_update_vfsstat returned %d", error); @@ -1645,19 +1739,40 @@ getfsstat(__unused proc_t p, struct getfsstat_args *uap, int *retval) return (__mac_getfsstat(p, &muap, retval)); } +/* + * __mac_getfsstat: Get MAC-related file system statistics + * + * Parameters: p (ignored) + * uap User argument descriptor (see below) + * retval Count of file system statistics (N stats) + * + * Indirect: uap->bufsize Buffer size + * uap->macsize MAC info size + * uap->buf Buffer where information will be returned + * uap->mac MAC info + * uap->flags File system flags + * + * + * Returns: 0 Success + * !0 Not success + * + */ int __mac_getfsstat(__unused proc_t p, struct __mac_getfsstat_args *uap, int *retval) { user_addr_t sfsp; user_addr_t *mp; - int count, maxcount; + size_t count, maxcount, bufsize, macsize; struct getfsstat_struct fst; + bufsize = (size_t) uap->bufsize; + macsize = (size_t) uap->macsize; + if (IS_64BIT_PROCESS(p)) { - maxcount = uap->bufsize / sizeof(struct user_statfs); + maxcount = bufsize / sizeof(struct user64_statfs); } else { - maxcount = uap->bufsize / sizeof(struct statfs); + maxcount = bufsize / sizeof(struct user32_statfs); } sfsp = uap->buf; count = 0; @@ -1668,20 +1783,31 @@ __mac_getfsstat(__unused proc_t p, struct __mac_getfsstat_args *uap, int *retval if (uap->mac != USER_ADDR_NULL) { u_int32_t *mp0; int error; - int i; + unsigned int i; - count = (int)(uap->macsize / (IS_64BIT_PROCESS(p) ? 8 : 4)); + count = (macsize / (IS_64BIT_PROCESS(p) ? 8 : 4)); if (count != maxcount) return (EINVAL); /* Copy in the array */ - MALLOC(mp0, u_int32_t *, uap->macsize, M_MACTEMP, M_WAITOK); - error = copyin(uap->mac, mp0, uap->macsize); - if (error) + MALLOC(mp0, u_int32_t *, macsize, M_MACTEMP, M_WAITOK); + if (mp0 == NULL) { + return (ENOMEM); + } + + error = copyin(uap->mac, mp0, macsize); + if (error) { + FREE(mp0, M_MACTEMP); return (error); + } /* Normalize to an array of user_addr_t */ MALLOC(mp, user_addr_t *, count * sizeof(user_addr_t), M_MACTEMP, M_WAITOK); + if (mp == NULL) { + FREE(mp0, M_MACTEMP); + return (ENOMEM); + } + for (i = 0; i < count; i++) { if (IS_64BIT_PROCESS(p)) mp[i] = ((user_addr_t *)mp0)[i]; @@ -1728,10 +1854,15 @@ getfsstat64_callback(mount_t mp, void * arg) if (fstp->sfsp && fstp->count < fstp->maxcount) { sp = &mp->mnt_vfsstat; /* - * If MNT_NOWAIT is specified, do not refresh the - * fsstat cache. MNT_WAIT overrides MNT_NOWAIT. + * If MNT_NOWAIT is specified, do not refresh the fsstat + * cache. MNT_WAIT overrides MNT_NOWAIT. + * + * We treat MNT_DWAIT as MNT_WAIT for all instances of + * getfsstat, since the constants are out of the same + * namespace. */ - if (((fstp->flags & MNT_NOWAIT) == 0 || (fstp->flags & MNT_WAIT)) && + if (((fstp->flags & MNT_NOWAIT) == 0 || + (fstp->flags & (MNT_WAIT | MNT_DWAIT))) && (error = vfs_update_vfsstat(mp, vfs_context_current(), VFS_USER_EVENT))) { KAUTH_DEBUG("vfs_update_vfsstat returned %d", error); return(VFS_RETURNED); @@ -1784,13 +1915,6 @@ getfsstat64(__unused proc_t p, struct getfsstat64_args *uap, int *retval) return (0); } -#if COMPAT_GETFSSTAT -ogetfsstat(proc_t p, struct getfsstat_args *uap, register_t *retval) -{ - return (ENOTSUP); -} -#endif - /* * Change current working directory to a given file descriptor. */ @@ -1806,6 +1930,7 @@ common_fchdir(proc_t p, struct fchdir_args *uap, int per_thread) int error; vfs_context_t ctx = vfs_context_current(); + AUDIT_ARG(fd, uap->fd); if (per_thread && uap->fd == -1) { /* * Switching back from per-thread to per process CWD; verify we @@ -1873,7 +1998,7 @@ common_fchdir(proc_t p, struct fchdir_args *uap, int per_thread) uthread_t uth = get_bsdthread_info(th); tvp = uth->uu_cdir; uth->uu_cdir = vp; - OSBitOrAtomic(P_THCWD, (UInt32 *)&p->p_flag); + OSBitOrAtomic(P_THCWD, &p->p_flag); } else { vnode_rele(vp); return (ENOENT); @@ -1898,19 +2023,19 @@ common_fchdir(proc_t p, struct fchdir_args *uap, int per_thread) } int -fchdir(proc_t p, struct fchdir_args *uap, __unused register_t *retval) +fchdir(proc_t p, struct fchdir_args *uap, __unused int32_t *retval) { return common_fchdir(p, uap, 0); } int -__pthread_fchdir(proc_t p, struct __pthread_fchdir_args *uap, __unused register_t *retval) +__pthread_fchdir(proc_t p, struct __pthread_fchdir_args *uap, __unused int32_t *retval) { return common_fchdir(p, (void *)uap, 1); } /* - * Change current working directory (``.''). + * Change current working directory ("."). * * Returns: 0 Success * change_dir:ENOTDIR @@ -1947,7 +2072,7 @@ common_chdir(proc_t p, struct chdir_args *uap, int per_thread) uthread_t uth = get_bsdthread_info(th); tvp = uth->uu_cdir; uth->uu_cdir = nd.ni_vp; - OSBitOrAtomic(P_THCWD, (UInt32 *)&p->p_flag); + OSBitOrAtomic(P_THCWD, &p->p_flag); } else { vnode_rele(nd.ni_vp); return (ENOENT); @@ -1965,14 +2090,49 @@ common_chdir(proc_t p, struct chdir_args *uap, int per_thread) return (0); } + +/* + * chdir + * + * Change current working directory (".") for the entire process + * + * Parameters: p Process requesting the call + * uap User argument descriptor (see below) + * retval (ignored) + * + * Indirect parameters: uap->path Directory path + * + * Returns: 0 Success + * common_chdir: ENOTDIR + * common_chdir: ENOENT No such file or directory + * common_chdir: ??? + * + */ int -chdir(proc_t p, struct chdir_args *uap, __unused register_t *retval) +chdir(proc_t p, struct chdir_args *uap, __unused int32_t *retval) { return common_chdir(p, (void *)uap, 0); } +/* + * __pthread_chdir + * + * Change current working directory (".") for a single thread + * + * Parameters: p Process requesting the call + * uap User argument descriptor (see below) + * retval (ignored) + * + * Indirect parameters: uap->path Directory path + * + * Returns: 0 Success + * common_chdir: ENOTDIR + * common_chdir: ENOENT No such file or directory + * common_chdir: ??? + * + */ int -__pthread_chdir(proc_t p, struct __pthread_chdir_args *uap, __unused register_t *retval) +__pthread_chdir(proc_t p, struct __pthread_chdir_args *uap, __unused int32_t *retval) { return common_chdir(p, (void *)uap, 1); } @@ -1983,7 +2143,7 @@ __pthread_chdir(proc_t p, struct __pthread_chdir_args *uap, __unused register_t */ /* ARGSUSED */ int -chroot(proc_t p, struct chroot_args *uap, __unused register_t *retval) +chroot(proc_t p, struct chroot_args *uap, __unused int32_t *retval) { struct filedesc *fdp = p->p_fd; int error; @@ -2082,10 +2242,11 @@ change_dir(struct nameidata *ndp, vfs_context_t ctx) * dupfdopen:??? * VNOP_ADVLOCK:??? * vnode_setsize:??? + * + * XXX Need to implement uid, gid */ -#warning XXX implement uid, gid int -open1(vfs_context_t ctx, struct nameidata *ndp, int uflags, struct vnode_attr *vap, register_t *retval) +open1(vfs_context_t ctx, struct nameidata *ndp, int uflags, struct vnode_attr *vap, int32_t *retval) { proc_t p = vfs_context_proc(ctx); uthread_t uu = get_bsdthread_info(vfs_context_thread(ctx)); @@ -2259,8 +2420,7 @@ open1(vfs_context_t ctx, struct nameidata *ndp, int uflags, struct vnode_attr *v } /* - * An open system call using an extended argument list compared to the regular - * system call 'open'. + * open_extended: open a file given a path name; with extended argument list (including extended security (ACL)). * * Parameters: p Process requesting the open * uap User argument descriptor (see below) @@ -2283,7 +2443,7 @@ open1(vfs_context_t ctx, struct nameidata *ndp, int uflags, struct vnode_attr *v * in the code they originated. */ int -open_extended(proc_t p, struct open_extended_args *uap, register_t *retval) +open_extended(proc_t p, struct open_extended_args *uap, int32_t *retval) { struct filedesc *fdp = p->p_fd; int ciferror; @@ -2292,6 +2452,8 @@ open_extended(proc_t p, struct open_extended_args *uap, register_t *retval) struct nameidata nd; int cmode; + AUDIT_ARG(owner, uap->uid, uap->gid); + xsecdst = NULL; if ((uap->xsecurity != USER_ADDR_NULL) && ((ciferror = kauth_copyinfilesec(uap->xsecurity, &xsecdst)) != 0)) @@ -2317,15 +2479,14 @@ open_extended(proc_t p, struct open_extended_args *uap, register_t *retval) } int -open(proc_t p, struct open_args *uap, register_t *retval) +open(proc_t p, struct open_args *uap, int32_t *retval) { __pthread_testcancel(1); return(open_nocancel(p, (struct open_nocancel_args *)uap, retval)); } - int -open_nocancel(proc_t p, struct open_nocancel_args *uap, register_t *retval) +open_nocancel(proc_t p, struct open_nocancel_args *uap, int32_t *retval) { struct filedesc *fdp = p->p_fd; struct vnode_attr va; @@ -2349,7 +2510,7 @@ open_nocancel(proc_t p, struct open_nocancel_args *uap, register_t *retval) static int mkfifo1(vfs_context_t ctx, user_addr_t upath, struct vnode_attr *vap); int -mknod(proc_t p, struct mknod_args *uap, __unused register_t *retval) +mknod(proc_t p, struct mknod_args *uap, __unused int32_t *retval) { struct vnode_attr va; vfs_context_t ctx = vfs_context_current(); @@ -2367,7 +2528,7 @@ mknod(proc_t p, struct mknod_args *uap, __unused register_t *retval) return(mkfifo1(ctx, uap->path, &va)); AUDIT_ARG(mode, uap->mode); - AUDIT_ARG(dev, uap->dev); + AUDIT_ARG(value32, uap->dev); if ((error = suser(vfs_context_ucred(ctx), &p->p_acflag))) return (error); @@ -2515,8 +2676,7 @@ mkfifo1(vfs_context_t ctx, user_addr_t upath, struct vnode_attr *vap) /* - * A mkfifo system call using an extended argument list compared to the regular - * system call 'mkfifo'. + * mkfifo_extended: Create a named pipe; with extended argument list (including extended security (ACL)). * * Parameters: p Process requesting the open * uap User argument descriptor (see below) @@ -2537,12 +2697,14 @@ mkfifo1(vfs_context_t ctx, user_addr_t upath, struct vnode_attr *vap) * in the code they originated. */ int -mkfifo_extended(proc_t p, struct mkfifo_extended_args *uap, __unused register_t *retval) +mkfifo_extended(proc_t p, struct mkfifo_extended_args *uap, __unused int32_t *retval) { int ciferror; kauth_filesec_t xsecdst; struct vnode_attr va; + AUDIT_ARG(owner, uap->uid, uap->gid); + xsecdst = KAUTH_FILESEC_NONE; if (uap->xsecurity != USER_ADDR_NULL) { if ((ciferror = kauth_copyinfilesec(uap->xsecurity, &xsecdst)) != 0) @@ -2567,7 +2729,7 @@ mkfifo_extended(proc_t p, struct mkfifo_extended_args *uap, __unused register_t /* ARGSUSED */ int -mkfifo(proc_t p, struct mkfifo_args *uap, __unused register_t *retval) +mkfifo(proc_t p, struct mkfifo_args *uap, __unused int32_t *retval) { struct vnode_attr va; @@ -2577,6 +2739,83 @@ mkfifo(proc_t p, struct mkfifo_args *uap, __unused register_t *retval) return(mkfifo1(vfs_context_current(), uap->path, &va)); } + +static char * +my_strrchr(char *p, int ch) +{ + char *save; + + for (save = NULL;; ++p) { + if (*p == ch) + save = p; + if (!*p) + return(save); + } + /* NOTREACHED */ +} + +extern int safe_getpath(struct vnode *dvp, char *leafname, char *path, int _len, int *truncated_path); + +int +safe_getpath(struct vnode *dvp, char *leafname, char *path, int _len, int *truncated_path) +{ + int ret, len = _len; + + *truncated_path = 0; + ret = vn_getpath(dvp, path, &len); + if (ret == 0 && len < (MAXPATHLEN - 1)) { + if (leafname) { + path[len-1] = '/'; + len += strlcpy(&path[len], leafname, MAXPATHLEN-len) + 1; + if (len > MAXPATHLEN) { + char *ptr; + + // the string got truncated! + *truncated_path = 1; + ptr = my_strrchr(path, '/'); + if (ptr) { + *ptr = '\0'; // chop off the string at the last directory component + } + len = strlen(path) + 1; + } + } + } else if (ret == 0) { + *truncated_path = 1; + } else if (ret != 0) { + struct vnode *mydvp=dvp; + + if (ret != ENOSPC) { + printf("safe_getpath: failed to get the path for vp %p (%s) : err %d\n", + dvp, dvp->v_name ? dvp->v_name : "no-name", ret); + } + *truncated_path = 1; + + do { + if (mydvp->v_parent != NULL) { + mydvp = mydvp->v_parent; + } else if (mydvp->v_mount) { + strlcpy(path, mydvp->v_mount->mnt_vfsstat.f_mntonname, _len); + break; + } else { + // no parent and no mount point? only thing is to punt and say "/" changed + strlcpy(path, "/", _len); + len = 2; + mydvp = NULL; + } + + if (mydvp == NULL) { + break; + } + + len = _len; + ret = vn_getpath(mydvp, path, &len); + } while (ret == ENOSPC); + } + + return len; +} + + /* * Make a hard file link. * @@ -2590,15 +2829,18 @@ mkfifo(proc_t p, struct mkfifo_args *uap, __unused register_t *retval) */ /* ARGSUSED */ int -link(__unused proc_t p, struct link_args *uap, __unused register_t *retval) +link(__unused proc_t p, struct link_args *uap, __unused int32_t *retval) { vnode_t vp, dvp, lvp; struct nameidata nd; vfs_context_t ctx = vfs_context_current(); int error; +#if CONFIG_FSE fse_info finfo; +#endif int need_event, has_listeners; char *target_path = NULL; + int truncated=0; vp = dvp = lvp = NULLVP; @@ -2693,13 +2935,7 @@ link(__unused proc_t p, struct link_args *uap, __unused register_t *retval) goto out2; } - len = MAXPATHLEN; - vn_getpath(dvp, target_path, &len); - if ((len + 1 + nd.ni_cnd.cn_namelen + 1) < MAXPATHLEN) { - target_path[len-1] = '/'; - strlcpy(&target_path[len], nd.ni_cnd.cn_nameptr, MAXPATHLEN-len); - len += nd.ni_cnd.cn_namelen; - } + len = safe_getpath(dvp, nd.ni_cnd.cn_nameptr, target_path, MAXPATHLEN, &truncated); if (has_listeners) { /* build the path to file we are linking to */ @@ -2726,12 +2962,21 @@ link(__unused proc_t p, struct link_args *uap, __unused register_t *retval) if (need_event) { /* construct fsevent */ if (get_fse_info(vp, &finfo, ctx) == 0) { + if (truncated) { + finfo.mode |= FSE_TRUNCATED_PATH; + } + // build the path to the destination of the link add_fsevent(FSE_CREATE_FILE, ctx, FSE_ARG_STRING, len, target_path, FSE_ARG_FINFO, &finfo, FSE_ARG_DONE); } + if (vp->v_parent) { + add_fsevent(FSE_STAT_CHANGED, ctx, + FSE_ARG_VNODE, vp->v_parent, + FSE_ARG_DONE); + } } #endif } @@ -2760,7 +3005,7 @@ link(__unused proc_t p, struct link_args *uap, __unused register_t *retval) */ /* ARGSUSED */ int -symlink(proc_t p, struct symlink_args *uap, __unused register_t *retval) +symlink(proc_t p, struct symlink_args *uap, __unused int32_t *retval) { struct vnode_attr va; char *path; @@ -2884,11 +3129,10 @@ symlink(proc_t p, struct symlink_args *uap, __unused register_t *retval) /* * Delete a whiteout from the filesystem. + * XXX authorization not implmented for whiteouts */ -/* ARGSUSED */ -#warning XXX authorization not implmented for whiteouts int -undelete(__unused proc_t p, struct undelete_args *uap, __unused register_t *retval) +undelete(__unused proc_t p, struct undelete_args *uap, __unused int32_t *retval) { int error; struct nameidata nd; @@ -2921,6 +3165,7 @@ undelete(__unused proc_t p, struct undelete_args *uap, __unused register_t *retv return (error); } + /* * Delete a name from the filesystem. */ @@ -2932,12 +3177,14 @@ unlink1(vfs_context_t ctx, struct nameidata *ndp, int nodelbusy) int error; struct componentname *cnp; char *path = NULL; - int len; + int len=0; +#if CONFIG_FSE fse_info finfo; +#endif int flags = 0; int need_event = 0; int has_listeners = 0; - + int truncated_path=0; #if NAMEDRSRCFORK /* unlink or delete is allowed on rsrc forks and named streams */ ndp->ni_cnd.cn_flags |= CN_ALLOWRSRCFORK; @@ -2949,6 +3196,7 @@ unlink1(vfs_context_t ctx, struct nameidata *ndp, int nodelbusy) error = namei(ndp); if (error) return (error); + dvp = ndp->ni_dvp; vp = ndp->ni_vp; @@ -3002,8 +3250,8 @@ unlink1(vfs_context_t ctx, struct nameidata *ndp, int nodelbusy) error = ENOMEM; goto out; } - len = MAXPATHLEN; - vn_getpath(vp, path, &len); + + len = safe_getpath(dvp, ndp->ni_cnd.cn_nameptr, path, MAXPATHLEN, &truncated_path); } #if NAMEDRSRCFORK @@ -3041,6 +3289,9 @@ unlink1(vfs_context_t ctx, struct nameidata *ndp, int nodelbusy) if (vp->v_flag & VISHARDLINK) { get_fse_info(vp, &finfo, ctx); } + if (truncated_path) { + finfo.mode |= FSE_TRUNCATED_PATH; + } add_fsevent(FSE_DELETE, ctx, FSE_ARG_STRING, len, path, FSE_ARG_FINFO, &finfo, @@ -3057,14 +3308,15 @@ unlink1(vfs_context_t ctx, struct nameidata *ndp, int nodelbusy) */ out: #if NAMEDRSRCFORK - /* recycle deleted rsrc fork to force reclaim on shadow file if necessary */ - if ((vnode_isnamedstream(ndp->ni_vp)) && - (ndp->ni_vp->v_parent != NULLVP) && - (vnode_isshadow(ndp->ni_vp))) { - vnode_recycle(ndp->ni_vp); - } + /* recycle the deleted rsrc fork vnode to force a reclaim, which + * will cause its shadow file to go away if necessary. + */ + if ((vnode_isnamedstream(ndp->ni_vp)) && + (ndp->ni_vp->v_parent != NULLVP) && + vnode_isshadow(ndp->ni_vp)) { + vnode_recycle(ndp->ni_vp); + } #endif - nameidone(ndp); vnode_put(dvp); vnode_put(vp); @@ -3075,7 +3327,7 @@ unlink1(vfs_context_t ctx, struct nameidata *ndp, int nodelbusy) * Delete a name from the filesystem using POSIX semantics. */ int -unlink(__unused proc_t p, struct unlink_args *uap, __unused register_t *retval) +unlink(__unused proc_t p, struct unlink_args *uap, __unused int32_t *retval) { struct nameidata nd; vfs_context_t ctx = vfs_context_current(); @@ -3088,7 +3340,7 @@ unlink(__unused proc_t p, struct unlink_args *uap, __unused register_t *retval) * Delete a name from the filesystem using Carbon semantics. */ int -delete(__unused proc_t p, struct delete_args *uap, __unused register_t *retval) +delete(__unused proc_t p, struct delete_args *uap, __unused int32_t *retval) { struct nameidata nd; vfs_context_t ctx = vfs_context_current(); @@ -3172,6 +3424,12 @@ lseek(proc_t p, struct lseek_args *uap, off_t *retval) } } } + + /* + * An lseek can affect whether data is "available to read." Use + * hint of NOTE_NONE so no EVFILT_VNODE events fire + */ + post_event_if_success(vp, error, NOTE_NONE); (void)vnode_put(vp); file_drop(uap->fd); return (error); @@ -3238,12 +3496,15 @@ access1(vnode_t vp, vnode_t dvp, int uflags, vfs_context_t ctx) /* - * access_extended + * access_extended: Check access permissions in bulk. * - * Description: uap->entries Pointer to argument descriptor - * uap->size Size of the area pointed to by - * the descriptor - * uap->results Pointer to the results array + * Description: uap->entries Pointer to an array of accessx + * descriptor structs, plus one or + * more NULL terminated strings (see + * "Notes" section below). + * uap->size Size of the area pointed to by + * uap->entries. + * uap->results Pointer to the results array. * * Returns: 0 Success * ENOMEM Insufficient memory @@ -3261,7 +3522,7 @@ access1(vnode_t vp, vnode_t dvp, int uflags, vfs_context_t ctx) * uap->results Array contents modified * * Notes: The uap->entries are structured as an arbitrary length array - * of accessx descriptors, followed by one or more NULL terniated + * of accessx descriptors, followed by one or more NULL terminated * strings * * struct accessx_descriptor[0] @@ -3270,7 +3531,7 @@ access1(vnode_t vp, vnode_t dvp, int uflags, vfs_context_t ctx) * char name_data[0]; * * We determine the entry count by walking the buffer containing - * the uap->entries argument descriptor. For each descrptor we + * the uap->entries argument descriptor. For each descriptor we * see, the valid values for the offset ad_name_offset will be * in the byte range: * @@ -3279,12 +3540,12 @@ access1(vnode_t vp, vnode_t dvp, int uflags, vfs_context_t ctx) * [ uap->entries + uap->size - 2 ] * * since we must have at least one string, and the string must - * be at least one character plus the NUL terminator in length. + * be at least one character plus the NULL terminator in length. * * XXX: Need to support the check-as uid argument */ int -access_extended(__unused proc_t p, struct access_extended_args *uap, __unused register_t *retval) +access_extended(__unused proc_t p, struct access_extended_args *uap, __unused int32_t *retval) { struct accessx_descriptor *input = NULL; errno_t *result = NULL; @@ -3328,6 +3589,8 @@ access_extended(__unused proc_t p, struct access_extended_args *uap, __unused re if (error) goto out; + AUDIT_ARG(opaque, input, uap->size); + /* * Force NUL termination of the copyin buffer to avoid nami() running * off the end. If the caller passes us bogus data, they may get a @@ -3482,6 +3745,8 @@ access_extended(__unused proc_t p, struct access_extended_args *uap, __unused re } } + AUDIT_ARG(data, result, sizeof(errno_t), desc_actual); + /* copy out results */ error = copyout(result, uap->results, desc_actual * sizeof(errno_t)); @@ -3512,13 +3777,12 @@ access_extended(__unused proc_t p, struct access_extended_args *uap, __unused re * access1: */ int -access(__unused proc_t p, struct access_args *uap, __unused register_t *retval) +access(__unused proc_t p, struct access_args *uap, __unused int32_t *retval) { int error; struct nameidata nd; int niopts; struct vfs_context context; - #if NAMEDRSRCFORK int is_namedstream = 0; #endif @@ -3548,20 +3812,20 @@ access(__unused proc_t p, struct access_args *uap, __unused register_t *retval) goto out; #if NAMEDRSRCFORK - /* Grab reference on the shadow stream file vnode to - * force an inactive on release which will mark it for - * recycle + /* Grab reference on the shadow stream file vnode to + * force an inactive on release which will mark it + * for recycle. */ if (vnode_isnamedstream(nd.ni_vp) && - (nd.ni_vp->v_parent != NULLVP) && - (vnode_isshadow(nd.ni_vp))) { + (nd.ni_vp->v_parent != NULLVP) && + vnode_isshadow(nd.ni_vp)) { is_namedstream = 1; vnode_ref(nd.ni_vp); } #endif error = access1(nd.ni_vp, nd.ni_dvp, uap->flags, &context); - + #if NAMEDRSRCFORK if (is_namedstream) { vnode_rele(nd.ni_vp); @@ -3589,10 +3853,16 @@ access(__unused proc_t p, struct access_args *uap, __unused register_t *retval) static int stat2(vfs_context_t ctx, struct nameidata *ndp, user_addr_t ub, user_addr_t xsecurity, user_addr_t xsecurity_size, int isstat64) { - struct stat sb; - struct stat64 sb64; - struct user_stat user_sb; - struct user_stat64 user_sb64; + union { + struct stat sb; + struct stat64 sb64; + } source; + union { + struct user64_stat user64_sb; + struct user32_stat user32_sb; + struct user64_stat64 user64_sb64; + struct user32_stat64 user32_sb64; + } dest; caddr_t sbp; int error, my_size; kauth_filesec_t fsec; @@ -3608,21 +3878,19 @@ stat2(vfs_context_t ctx, struct nameidata *ndp, user_addr_t ub, user_addr_t xsec if (error) return (error); fsec = KAUTH_FILESEC_NONE; - if (isstat64 != 0) - statptr = (void *)&sb64; - else - statptr = (void *)&sb; + + statptr = (void *)&source; #if NAMEDRSRCFORK - /* Grab reference on the shadow stream file vnode to - * force an inactive on release which will mark it for - * recycle. + /* Grab reference on the shadow stream file vnode to + * force an inactive on release which will mark it + * for recycle. */ if (vnode_isnamedstream(ndp->ni_vp) && - (ndp->ni_vp->v_parent != NULLVP) && - (vnode_isshadow(ndp->ni_vp))) { + (ndp->ni_vp->v_parent != NULLVP) && + vnode_isshadow(ndp->ni_vp)) { is_namedstream = 1; - vnode_ref (ndp->ni_vp); + vnode_ref(ndp->ni_vp); } #endif @@ -3630,10 +3898,9 @@ stat2(vfs_context_t ctx, struct nameidata *ndp, user_addr_t ub, user_addr_t xsec #if NAMEDRSRCFORK if (is_namedstream) { - vnode_rele (ndp->ni_vp); + vnode_rele(ndp->ni_vp); } #endif - vnode_put(ndp->ni_vp); nameidone(ndp); @@ -3641,41 +3908,43 @@ stat2(vfs_context_t ctx, struct nameidata *ndp, user_addr_t ub, user_addr_t xsec return (error); /* Zap spare fields */ if (isstat64 != 0) { - sb64.st_lspare = 0; - sb64.st_qspare[0] = 0LL; - sb64.st_qspare[1] = 0LL; + source.sb64.st_lspare = 0; + source.sb64.st_qspare[0] = 0LL; + source.sb64.st_qspare[1] = 0LL; if (IS_64BIT_PROCESS(vfs_context_proc(ctx))) { - munge_stat64(&sb64, &user_sb64); - my_size = sizeof(user_sb64); - sbp = (caddr_t)&user_sb64; + munge_user64_stat64(&source.sb64, &dest.user64_sb64); + my_size = sizeof(dest.user64_sb64); + sbp = (caddr_t)&dest.user64_sb64; } else { - my_size = sizeof(sb64); - sbp = (caddr_t)&sb64; + munge_user32_stat64(&source.sb64, &dest.user32_sb64); + my_size = sizeof(dest.user32_sb64); + sbp = (caddr_t)&dest.user32_sb64; } /* * Check if we raced (post lookup) against the last unlink of a file. */ - if ((sb64.st_nlink == 0) && S_ISREG(sb64.st_mode)) { - sb64.st_nlink = 1; + if ((source.sb64.st_nlink == 0) && S_ISREG(source.sb64.st_mode)) { + source.sb64.st_nlink = 1; } } else { - sb.st_lspare = 0; - sb.st_qspare[0] = 0LL; - sb.st_qspare[1] = 0LL; + source.sb.st_lspare = 0; + source.sb.st_qspare[0] = 0LL; + source.sb.st_qspare[1] = 0LL; if (IS_64BIT_PROCESS(vfs_context_proc(ctx))) { - munge_stat(&sb, &user_sb); - my_size = sizeof(user_sb); - sbp = (caddr_t)&user_sb; + munge_user64_stat(&source.sb, &dest.user64_sb); + my_size = sizeof(dest.user64_sb); + sbp = (caddr_t)&dest.user64_sb; } else { - my_size = sizeof(sb); - sbp = (caddr_t)&sb; + munge_user32_stat(&source.sb, &dest.user32_sb); + my_size = sizeof(dest.user32_sb); + sbp = (caddr_t)&dest.user32_sb; } /* * Check if we raced (post lookup) against the last unlink of a file. */ - if ((sb.st_nlink == 0) && S_ISREG(sb.st_mode)) { - sb.st_nlink = 1; + if ((source.sb.st_nlink == 0) && S_ISREG(source.sb.st_mode)) { + source.sb.st_nlink = 1; } } if ((error = copyout(sbp, ub, my_size)) != 0) @@ -3728,8 +3997,24 @@ stat1(user_addr_t path, user_addr_t ub, user_addr_t xsecurity, user_addr_t xsecu return(stat2(ctx, &nd, ub, xsecurity, xsecurity_size, isstat64)); } +/* + * stat_extended: Get file status; with extended security (ACL). + * + * Parameters: p (ignored) + * uap User argument descriptor (see below) + * retval (ignored) + * + * Indirect: uap->path Path of file to get status from + * uap->ub User buffer (holds file status info) + * uap->xsecurity ACL to get (extended security) + * uap->xsecurity_size Size of ACL + * + * Returns: 0 Success + * !0 errno value + * + */ int -stat_extended(__unused proc_t p, struct stat_extended_args *uap, __unused register_t *retval) +stat_extended(__unused proc_t p, struct stat_extended_args *uap, __unused int32_t *retval) { return (stat1(uap->path, uap->ub, uap->xsecurity, uap->xsecurity_size, 0)); } @@ -3739,19 +4024,35 @@ stat_extended(__unused proc_t p, struct stat_extended_args *uap, __unused regist * stat1:??? [see stat1() in this file] */ int -stat(__unused proc_t p, struct stat_args *uap, __unused register_t *retval) +stat(__unused proc_t p, struct stat_args *uap, __unused int32_t *retval) { return(stat1(uap->path, uap->ub, 0, 0, 0)); } int -stat64(__unused proc_t p, struct stat64_args *uap, __unused register_t *retval) +stat64(__unused proc_t p, struct stat64_args *uap, __unused int32_t *retval) { return(stat1(uap->path, uap->ub, 0, 0, 1)); } +/* + * stat64_extended: Get file status; can handle large inode numbers; with extended security (ACL). + * + * Parameters: p (ignored) + * uap User argument descriptor (see below) + * retval (ignored) + * + * Indirect: uap->path Path of file to get status from + * uap->ub User buffer (holds file status info) + * uap->xsecurity ACL to get (extended security) + * uap->xsecurity_size Size of ACL + * + * Returns: 0 Success + * !0 errno value + * + */ int -stat64_extended(__unused proc_t p, struct stat64_extended_args *uap, __unused register_t *retval) +stat64_extended(__unused proc_t p, struct stat64_extended_args *uap, __unused int32_t *retval) { return (stat1(uap->path, uap->ub, uap->xsecurity, uap->xsecurity_size, 1)); } @@ -3770,25 +4071,59 @@ lstat1(user_addr_t path, user_addr_t ub, user_addr_t xsecurity, user_addr_t xsec return(stat2(ctx, &nd, ub, xsecurity, xsecurity_size, isstat64)); } +/* + * lstat_extended: Get file status; does not follow links; with extended security (ACL). + * + * Parameters: p (ignored) + * uap User argument descriptor (see below) + * retval (ignored) + * + * Indirect: uap->path Path of file to get status from + * uap->ub User buffer (holds file status info) + * uap->xsecurity ACL to get (extended security) + * uap->xsecurity_size Size of ACL + * + * Returns: 0 Success + * !0 errno value + * + */ int -lstat_extended(__unused proc_t p, struct lstat_extended_args *uap, __unused register_t *retval) +lstat_extended(__unused proc_t p, struct lstat_extended_args *uap, __unused int32_t *retval) { return (lstat1(uap->path, uap->ub, uap->xsecurity, uap->xsecurity_size, 0)); } int -lstat(__unused proc_t p, struct lstat_args *uap, __unused register_t *retval) +lstat(__unused proc_t p, struct lstat_args *uap, __unused int32_t *retval) { return(lstat1(uap->path, uap->ub, 0, 0, 0)); } + int -lstat64(__unused proc_t p, struct lstat64_args *uap, __unused register_t *retval) +lstat64(__unused proc_t p, struct lstat64_args *uap, __unused int32_t *retval) { return(lstat1(uap->path, uap->ub, 0, 0, 1)); } +/* + * lstat64_extended: Get file status; can handle large inode numbers; does not + * follow links; with extended security (ACL). + * + * Parameters: p (ignored) + * uap User argument descriptor (see below) + * retval (ignored) + * + * Indirect: uap->path Path of file to get status from + * uap->ub User buffer (holds file status info) + * uap->xsecurity ACL to get (extended security) + * uap->xsecurity_size Size of ACL + * + * Returns: 0 Success + * !0 errno value + * + */ int -lstat64_extended(__unused proc_t p, struct lstat64_extended_args *uap, __unused register_t *retval) +lstat64_extended(__unused proc_t p, struct lstat64_extended_args *uap, __unused int32_t *retval) { return (lstat1(uap->path, uap->ub, uap->xsecurity, uap->xsecurity_size, 1)); } @@ -3810,7 +4145,7 @@ lstat64_extended(__unused proc_t p, struct lstat64_extended_args *uap, __unused */ /* ARGSUSED */ int -pathconf(__unused proc_t p, struct pathconf_args *uap, register_t *retval) +pathconf(__unused proc_t p, struct pathconf_args *uap, int32_t *retval) { int error; struct nameidata nd; @@ -3834,7 +4169,7 @@ pathconf(__unused proc_t p, struct pathconf_args *uap, register_t *retval) */ /* ARGSUSED */ int -readlink(proc_t p, struct readlink_args *uap, register_t *retval) +readlink(proc_t p, struct readlink_args *uap, int32_t *retval) { vnode_t vp; uio_t auio; @@ -3869,7 +4204,8 @@ readlink(proc_t p, struct readlink_args *uap, register_t *retval) error = VNOP_READLINK(vp, auio, ctx); } vnode_put(vp); - // LP64todo - fix this + + /* Safe: uio_resid() is bounded above by "count", and "count" is an int */ *retval = uap->count - (int)uio_resid(auio); return (error); } @@ -3918,7 +4254,7 @@ chflags1(vnode_t vp, int flags, vfs_context_t ctx) */ /* ARGSUSED */ int -chflags(__unused proc_t p, struct chflags_args *uap, __unused register_t *retval) +chflags(__unused proc_t p, struct chflags_args *uap, __unused int32_t *retval) { vnode_t vp; vfs_context_t ctx = vfs_context_current(); @@ -3944,7 +4280,7 @@ chflags(__unused proc_t p, struct chflags_args *uap, __unused register_t *retval */ /* ARGSUSED */ int -fchflags(__unused proc_t p, struct fchflags_args *uap, __unused register_t *retval) +fchflags(__unused proc_t p, struct fchflags_args *uap, __unused int32_t *retval) { vnode_t vp; int error; @@ -3985,8 +4321,8 @@ chmod2(vfs_context_t ctx, vnode_t vp, struct vnode_attr *vap) kauth_action_t action; int error; - AUDIT_ARG(mode, (mode_t)vap->va_mode); -#warning XXX audit new args + AUDIT_ARG(mode, vap->va_mode); + /* XXX audit new args */ #if NAMEDSTREAMS /* chmod calls are not allowed for resource forks. */ @@ -4016,7 +4352,7 @@ chmod2(vfs_context_t ctx, vnode_t vp, struct vnode_attr *vap) /* - * Change mode of a file given path name. + * Change mode of a file given a path name. * * Returns: 0 Success * namei:??? [anything namei can return] @@ -4039,8 +4375,8 @@ chmod1(vfs_context_t ctx, user_addr_t path, struct vnode_attr *vap) } /* - * A chmod system call using an extended argument list compared to the regular - * system call 'mkfifo'. + * chmod_extended: Change the mode of a file given a path name; with extended + * argument list (including extended security (ACL)). * * Parameters: p Process requesting the open * uap User argument descriptor (see below) @@ -4061,12 +4397,14 @@ chmod1(vfs_context_t ctx, user_addr_t path, struct vnode_attr *vap) * in the code they originated. */ int -chmod_extended(__unused proc_t p, struct chmod_extended_args *uap, __unused register_t *retval) +chmod_extended(__unused proc_t p, struct chmod_extended_args *uap, __unused int32_t *retval) { int error; struct vnode_attr va; kauth_filesec_t xsecdst; + AUDIT_ARG(owner, uap->uid, uap->gid); + VATTR_INIT(&va); if (uap->mode != -1) VATTR_SET(&va, va_mode, uap->mode & ALLPERMS); @@ -4103,7 +4441,7 @@ chmod_extended(__unused proc_t p, struct chmod_extended_args *uap, __unused regi * chmod1:??? [anything chmod1 can return] */ int -chmod(__unused proc_t p, struct chmod_args *uap, __unused register_t *retval) +chmod(__unused proc_t p, struct chmod_args *uap, __unused int32_t *retval) { struct vnode_attr va; @@ -4139,13 +4477,33 @@ fchmod1(__unused proc_t p, int fd, struct vnode_attr *vap) return (error); } +/* + * fchmod_extended: Change mode of a file given a file descriptor; with + * extended argument list (including extended security (ACL)). + * + * Parameters: p Process requesting to change file mode + * uap User argument descriptor (see below) + * retval (ignored) + * + * Indirect: uap->mode File mode to set (same as 'chmod') + * uap->uid UID to set + * uap->gid GID to set + * uap->xsecurity ACL to set (or delete) + * uap->fd File descriptor of file to change mode + * + * Returns: 0 Success + * !0 errno value + * + */ int -fchmod_extended(proc_t p, struct fchmod_extended_args *uap, __unused register_t *retval) +fchmod_extended(proc_t p, struct fchmod_extended_args *uap, __unused int32_t *retval) { int error; struct vnode_attr va; kauth_filesec_t xsecdst; + AUDIT_ARG(owner, uap->uid, uap->gid); + VATTR_INIT(&va); if (uap->mode != -1) VATTR_SET(&va, va_mode, uap->mode & ALLPERMS); @@ -4182,7 +4540,7 @@ fchmod_extended(proc_t p, struct fchmod_extended_args *uap, __unused register_t } int -fchmod(proc_t p, struct fchmod_args *uap, __unused register_t *retval) +fchmod(proc_t p, struct fchmod_args *uap, __unused int32_t *retval) { struct vnode_attr va; @@ -4198,7 +4556,7 @@ fchmod(proc_t p, struct fchmod_args *uap, __unused register_t *retval) */ /* ARGSUSED */ static int -chown1(vfs_context_t ctx, struct chown_args *uap, __unused register_t *retval, int follow) +chown1(vfs_context_t ctx, struct chown_args *uap, __unused int32_t *retval, int follow) { vnode_t vp; struct vnode_attr va; @@ -4249,13 +4607,13 @@ chown1(vfs_context_t ctx, struct chown_args *uap, __unused register_t *retval, i } int -chown(__unused proc_t p, struct chown_args *uap, register_t *retval) +chown(__unused proc_t p, struct chown_args *uap, int32_t *retval) { return chown1(vfs_context_current(), uap, retval, 1); } int -lchown(__unused proc_t p, struct lchown_args *uap, register_t *retval) +lchown(__unused proc_t p, struct lchown_args *uap, int32_t *retval) { /* Argument list identical, but machine generated; cast for chown1() */ return chown1(vfs_context_current(), (struct chown_args *)uap, retval, 0); @@ -4266,7 +4624,7 @@ lchown(__unused proc_t p, struct lchown_args *uap, register_t *retval) */ /* ARGSUSED */ int -fchown(__unused proc_t p, struct fchown_args *uap, __unused register_t *retval) +fchown(__unused proc_t p, struct fchown_args *uap, __unused int32_t *retval) { struct vnode_attr va; vfs_context_t ctx = vfs_context_current(); @@ -4325,7 +4683,6 @@ fchown(__unused proc_t p, struct fchown_args *uap, __unused register_t *retval) static int getutimes(user_addr_t usrtvp, struct timespec *tsp) { - struct user_timeval tv[2]; int error; if (usrtvp == USER_ADDR_NULL) { @@ -4336,19 +4693,20 @@ getutimes(user_addr_t usrtvp, struct timespec *tsp) tsp[1] = tsp[0]; } else { if (IS_64BIT_PROCESS(current_proc())) { + struct user64_timeval tv[2]; error = copyin(usrtvp, (void *)tv, sizeof(tv)); + if (error) + return (error); + TIMEVAL_TO_TIMESPEC(&tv[0], &tsp[0]); + TIMEVAL_TO_TIMESPEC(&tv[1], &tsp[1]); } else { - struct timeval old_tv[2]; - error = copyin(usrtvp, (void *)old_tv, sizeof(old_tv)); - tv[0].tv_sec = old_tv[0].tv_sec; - tv[0].tv_usec = old_tv[0].tv_usec; - tv[1].tv_sec = old_tv[1].tv_sec; - tv[1].tv_usec = old_tv[1].tv_usec; + struct user32_timeval tv[2]; + error = copyin(usrtvp, (void *)tv, sizeof(tv)); + if (error) + return (error); + TIMEVAL_TO_TIMESPEC(&tv[0], &tsp[0]); + TIMEVAL_TO_TIMESPEC(&tv[1], &tsp[1]); } - if (error) - return (error); - TIMEVAL_TO_TIMESPEC(&tv[0], &tsp[0]); - TIMEVAL_TO_TIMESPEC(&tv[1], &tsp[1]); } return 0; } @@ -4405,7 +4763,7 @@ setutimes(vfs_context_t ctx, vnode_t vp, const struct timespec *ts, */ /* ARGSUSED */ int -utimes(__unused proc_t p, struct utimes_args *uap, __unused register_t *retval) +utimes(__unused proc_t p, struct utimes_args *uap, __unused int32_t *retval) { struct timespec ts[2]; user_addr_t usrtvp; @@ -4444,7 +4802,7 @@ utimes(__unused proc_t p, struct utimes_args *uap, __unused register_t *retval) */ /* ARGSUSED */ int -futimes(__unused proc_t p, struct futimes_args *uap, __unused register_t *retval) +futimes(__unused proc_t p, struct futimes_args *uap, __unused int32_t *retval) { struct timespec ts[2]; vnode_t vp; @@ -4473,7 +4831,7 @@ futimes(__unused proc_t p, struct futimes_args *uap, __unused register_t *retval */ /* ARGSUSED */ int -truncate(__unused proc_t p, struct truncate_args *uap, __unused register_t *retval) +truncate(__unused proc_t p, struct truncate_args *uap, __unused int32_t *retval) { vnode_t vp; struct vnode_attr va; @@ -4516,7 +4874,7 @@ truncate(__unused proc_t p, struct truncate_args *uap, __unused register_t *retv */ /* ARGSUSED */ int -ftruncate(proc_t p, struct ftruncate_args *uap, register_t *retval) +ftruncate(proc_t p, struct ftruncate_args *uap, int32_t *retval) { vfs_context_t ctx = vfs_context_current(); struct vnode_attr va; @@ -4575,24 +4933,78 @@ ftruncate(proc_t p, struct ftruncate_args *uap, register_t *retval) /* - * Sync an open file. + * Sync an open file with synchronized I/O _file_ integrity completion */ /* ARGSUSED */ int -fsync(proc_t p, struct fsync_args *uap, register_t *retval) +fsync(proc_t p, struct fsync_args *uap, __unused int32_t *retval) { __pthread_testcancel(1); - return(fsync_nocancel(p, (struct fsync_nocancel_args *)uap, retval)); + return(fsync_common(p, uap, MNT_WAIT)); +} + + +/* + * Sync an open file with synchronized I/O _file_ integrity completion + * + * Notes: This is a legacy support function that does not test for + * thread cancellation points. + */ +/* ARGSUSED */ +int +fsync_nocancel(proc_t p, struct fsync_nocancel_args *uap, __unused int32_t *retval) +{ + return(fsync_common(p, (struct fsync_args *)uap, MNT_WAIT)); } + +/* + * Sync an open file with synchronized I/O _data_ integrity completion + */ +/* ARGSUSED */ int -fsync_nocancel(proc_t p, struct fsync_nocancel_args *uap, __unused register_t *retval) +fdatasync(proc_t p, struct fdatasync_args *uap, __unused int32_t *retval) +{ + __pthread_testcancel(1); + return(fsync_common(p, (struct fsync_args *)uap, MNT_DWAIT)); +} + + +/* + * fsync_common + * + * Common fsync code to support both synchronized I/O file integrity completion + * (normal fsync) and synchronized I/O data integrity completion (fdatasync). + * + * If 'flags' is MNT_DWAIT, the caller is requesting data integrity, which + * will only guarantee that the file data contents are retrievable. If + * 'flags' is MNT_WAIT, the caller is rewuesting file integrity, which also + * includes additional metadata unnecessary for retrieving the file data + * contents, such as atime, mtime, ctime, etc., also be committed to stable + * storage. + * + * Parameters: p The process + * uap->fd The descriptor to synchronize + * flags The data integrity flags + * + * Returns: int Success + * fp_getfvp:EBADF Bad file descriptor + * fp_getfvp:ENOTSUP fd does not refer to a vnode + * VNOP_FSYNC:??? unspecified + * + * Notes: We use struct fsync_args because it is a short name, and all + * caller argument structures are otherwise identical. + */ +static int +fsync_common(proc_t p, struct fsync_args *uap, int flags) { vnode_t vp; struct fileproc *fp; vfs_context_t ctx = vfs_context_current(); int error; + AUDIT_ARG(fd, uap->fd); + if ( (error = fp_getfvp(p, uap->fd, &fp, &vp)) ) return (error); if ( (error = vnode_getwithref(vp)) ) { @@ -4600,14 +5012,16 @@ fsync_nocancel(proc_t p, struct fsync_nocancel_args *uap, __unused register_t *r return(error); } - error = VNOP_FSYNC(vp, MNT_WAIT, ctx); + AUDIT_ARG(vnpath, vp, ARG_VNODE1); + + error = VNOP_FSYNC(vp, flags, ctx); #if NAMEDRSRCFORK /* Sync resource fork shadow file if necessary. */ if ((error == 0) && (vp->v_flag & VISNAMEDSTREAM) && (vp->v_parent != NULLVP) && - (vnode_isshadow(vp)) && + vnode_isshadow(vp) && (fp->f_flags & FP_WRITTEN)) { (void) vnode_flushnamedstream(vp->v_parent, vp, ctx); } @@ -4627,7 +5041,7 @@ fsync_nocancel(proc_t p, struct fsync_nocancel_args *uap, __unused register_t *r */ /* ARGSUSED */ int -copyfile(__unused proc_t p, struct copyfile_args *uap, __unused register_t *retval) +copyfile(__unused proc_t p, struct copyfile_args *uap, __unused int32_t *retval) { vnode_t tvp, fvp, tdvp, sdvp; struct nameidata fromnd, tond; @@ -4710,7 +5124,7 @@ copyfile(__unused proc_t p, struct copyfile_args *uap, __unused register_t *retv */ /* ARGSUSED */ int -rename(__unused proc_t p, struct rename_args *uap, __unused register_t *retval) +rename(__unused proc_t p, struct rename_args *uap, __unused int32_t *retval) { vnode_t tvp, tdvp; vnode_t fvp, fdvp; @@ -4722,11 +5136,14 @@ rename(__unused proc_t p, struct rename_args *uap, __unused register_t *retval) int need_event; const char *oname; char *from_name = NULL, *to_name = NULL; - int from_len, to_len; + int from_len=0, to_len=0; int holding_mntlock; mount_t locked_mp = NULL; vnode_t oparent; +#if CONFIG_FSE fse_info from_finfo, to_finfo; +#endif + int from_truncated=0, to_truncated; holding_mntlock = 0; do_retry = 0; @@ -4961,8 +5378,9 @@ rename(__unused proc_t p, struct rename_args *uap, __unused register_t *retval) * source. NOTE: Then the target is unlocked going into vnop_rename, * so not to cause locking problems. There is a single reference on tvp. * - * NOTE - that fvp == tvp also occurs if they are hard linked - NOTE - * that correct behaviour then is just to remove the source (link) + * NOTE - that fvp == tvp also occurs if they are hard linked and + * that correct behaviour then is just to return success without doing + * anything. */ if (fvp == tvp && fdvp == tdvp) { if (fromnd.ni_cnd.cn_namelen == tond.ni_cnd.cn_namelen && @@ -5069,18 +5487,8 @@ rename(__unused proc_t p, struct rename_args *uap, __unused register_t *retval) error = ENOMEM; goto out1; } - from_len = MAXPATHLEN; - vn_getpath(fdvp, from_name, &from_len); - if ((from_len + 1 + fromnd.ni_cnd.cn_namelen + 1) < MAXPATHLEN) { - if (from_len > 2) { - from_name[from_len-1] = '/'; - } else { - from_len--; - } - strlcpy(&from_name[from_len], fromnd.ni_cnd.cn_nameptr, MAXPATHLEN-from_len); - from_len += fromnd.ni_cnd.cn_namelen + 1; - from_name[from_len] = '\0'; - } + + from_len = safe_getpath(fdvp, fromnd.ni_cnd.cn_nameptr, from_name, MAXPATHLEN, &from_truncated); GET_PATH(to_name); if (to_name == NULL) { @@ -5088,19 +5496,7 @@ rename(__unused proc_t p, struct rename_args *uap, __unused register_t *retval) goto out1; } - to_len = MAXPATHLEN; - vn_getpath(tdvp, to_name, &to_len); - // if the path is not just "/", then append a "/" - if ((to_len + 1 + tond.ni_cnd.cn_namelen + 1) < MAXPATHLEN) { - if (to_len > 2) { - to_name[to_len-1] = '/'; - } else { - to_len--; - } - strlcpy(&to_name[to_len], tond.ni_cnd.cn_nameptr, MAXPATHLEN-to_len); - to_len += tond.ni_cnd.cn_namelen + 1; - to_name[to_len] = '\0'; - } + to_len = safe_getpath(tdvp, tond.ni_cnd.cn_nameptr, to_name, MAXPATHLEN, &to_truncated); } error = VNOP_RENAME(fdvp, fvp, &fromnd.ni_cnd, @@ -5120,10 +5516,10 @@ rename(__unused proc_t p, struct rename_args *uap, __unused register_t *retval) /* * We may encounter a race in the VNOP where the destination didn't * exist when we did the namei, but it does by the time we go and - * try to create the entry. In this case, we should re-drive this rename - * call from the top again. Currently, only HFS bubbles out ERECYCLE, - * but other filesystem susceptible to this race could return it, too. - */ + * try to create the entry. In this case, we should re-drive this rename + * call from the top again. Currently, only HFS bubbles out ERECYCLE, + * but other filesystems susceptible to this race could return it, too. + */ if (error == ERECYCLE) { do_retry = 1; } @@ -5140,6 +5536,10 @@ rename(__unused proc_t p, struct rename_args *uap, __unused register_t *retval) #if CONFIG_FSE if (from_name != NULL && to_name != NULL) { + if (from_truncated || to_truncated) { + // set it here since only the from_finfo gets reported up to user space + from_finfo.mode |= FSE_TRUNCATED_PATH; + } if (tvp) { add_fsevent(FSE_RENAME, ctx, FSE_ARG_STRING, from_len, from_name, @@ -5249,7 +5649,7 @@ rename(__unused proc_t p, struct rename_args *uap, __unused register_t *retval) vnode_put(fvp); vnode_put(fdvp); } - + /* * If things changed after we did the namei, then we will re-drive * this rename call from the top. @@ -5258,7 +5658,7 @@ rename(__unused proc_t p, struct rename_args *uap, __unused register_t *retval) do_retry = 0; goto retry; } - + return (error); } @@ -5340,14 +5740,30 @@ mkdir1(vfs_context_t ctx, user_addr_t path, struct vnode_attr *vap) return (error); } - +/* + * mkdir_extended: Create a directory; with extended security (ACL). + * + * Parameters: p Process requesting to create the directory + * uap User argument descriptor (see below) + * retval (ignored) + * + * Indirect: uap->path Path of directory to create + * uap->mode Access permissions to set + * uap->xsecurity ACL to set + * + * Returns: 0 Success + * !0 Not success + * + */ int -mkdir_extended(proc_t p, struct mkdir_extended_args *uap, __unused register_t *retval) +mkdir_extended(proc_t p, struct mkdir_extended_args *uap, __unused int32_t *retval) { int ciferror; kauth_filesec_t xsecdst; struct vnode_attr va; + AUDIT_ARG(owner, uap->uid, uap->gid); + xsecdst = NULL; if ((uap->xsecurity != USER_ADDR_NULL) && ((ciferror = kauth_copyinfilesec(uap->xsecurity, &xsecdst)) != 0)) @@ -5365,7 +5781,7 @@ mkdir_extended(proc_t p, struct mkdir_extended_args *uap, __unused register_t *r } int -mkdir(proc_t p, struct mkdir_args *uap, __unused register_t *retval) +mkdir(proc_t p, struct mkdir_args *uap, __unused int32_t *retval) { struct vnode_attr va; @@ -5380,14 +5796,15 @@ mkdir(proc_t p, struct mkdir_args *uap, __unused register_t *retval) */ /* ARGSUSED */ int -rmdir(__unused proc_t p, struct rmdir_args *uap, __unused register_t *retval) +rmdir(__unused proc_t p, struct rmdir_args *uap, __unused int32_t *retval) { vnode_t vp, dvp; int error; struct nameidata nd; vfs_context_t ctx = vfs_context_current(); - int restart_flag, oldvp_id = -1; + int restart_flag; + uint32_t oldvp_id = UINT32_MAX; /* * This loop exists to restart rmdir in the unlikely case that two @@ -5411,7 +5828,7 @@ rmdir(__unused proc_t p, struct rmdir_args *uap, __unused register_t *retval) * If being restarted check if the new vp * still has the same v_id. */ - if (oldvp_id != -1 && oldvp_id != vp->v_id) { + if (oldvp_id != UINT32_MAX && oldvp_id != vp->v_id) { error = ENOENT; goto out; } @@ -5441,12 +5858,13 @@ rmdir(__unused proc_t p, struct rmdir_args *uap, __unused register_t *retval) } if (!error) { char *path = NULL; - int len; - fse_info finfo; + int len=0; int has_listeners = 0; int need_event = 0; - + int truncated = 0; #if CONFIG_FSE + fse_info finfo; + need_event = need_fsevent(FSE_DELETE, dvp); if (need_event) { get_fse_info(vp, &finfo, ctx); @@ -5459,8 +5877,13 @@ rmdir(__unused proc_t p, struct rmdir_args *uap, __unused register_t *retval) error = ENOMEM; goto out; } - len = MAXPATHLEN; - vn_getpath(vp, path, &len); + + len = safe_getpath(vp, NULL, path, MAXPATHLEN, &truncated); +#if CONFIG_FSE + if (truncated) { + finfo.mode |= FSE_TRUNCATED_PATH; + } +#endif } error = VNOP_RMDIR(dvp, vp, &nd.ni_cnd, ctx); @@ -5574,8 +5997,11 @@ vnode_readdir64(struct vnode *vp, struct uio *uio, int flags, int *eofflag, */ bufsize = 3 * MIN(uio_resid(uio), 87371) / 8; MALLOC(bufptr, void *, bufsize, M_TEMP, M_WAITOK); + if (bufptr == NULL) { + return ENOMEM; + } - auio = uio_create(1, 0, UIO_SYSSPACE32, UIO_READ); + auio = uio_create(1, 0, UIO_SYSSPACE, UIO_READ); uio_addiov(auio, (uintptr_t)bufptr, bufsize); auio->uio_offset = uio->uio_offset; @@ -5706,7 +6132,7 @@ getdirentries_common(int fd, user_addr_t bufp, user_size_t bufsize, ssize_t *byt if (offset) { *offset = loff; } - // LP64todo - fix this + *bytesread = bufsize - uio_resid(auio); out: file_drop(fd); @@ -5715,10 +6141,9 @@ getdirentries_common(int fd, user_addr_t bufp, user_size_t bufsize, ssize_t *byt int -getdirentries(__unused struct proc *p, struct getdirentries_args *uap, register_t *retval) +getdirentries(__unused struct proc *p, struct getdirentries_args *uap, int32_t *retval) { off_t offset; - long loff; ssize_t bytesread; int error; @@ -5726,8 +6151,13 @@ getdirentries(__unused struct proc *p, struct getdirentries_args *uap, register_ error = getdirentries_common(uap->fd, uap->buf, uap->count, &bytesread, &offset, 0); if (error == 0) { - loff = (long)offset; - error = copyout((caddr_t)&loff, uap->basep, sizeof(long)); + if (proc_is64bit(p)) { + user64_long_t base = (user64_long_t)offset; + error = copyout((caddr_t)&base, uap->basep, sizeof(user64_long_t)); + } else { + user32_long_t base = (user32_long_t)offset; + error = copyout((caddr_t)&base, uap->basep, sizeof(user32_long_t)); + } *retval = bytesread; } return (error); @@ -5753,12 +6183,11 @@ getdirentries64(__unused struct proc *p, struct getdirentries64_args *uap, user_ /* * Set the mode mask for creation of filesystem nodes. + * XXX implement xsecurity */ -#warning XXX implement xsecurity - #define UMASK_NOXSECURITY (void *)1 /* leave existing xsecurity alone */ static int -umask1(proc_t p, int newmask, __unused kauth_filesec_t fsec, register_t *retval) +umask1(proc_t p, int newmask, __unused kauth_filesec_t fsec, int32_t *retval) { struct filedesc *fdp; @@ -5771,9 +6200,22 @@ umask1(proc_t p, int newmask, __unused kauth_filesec_t fsec, register_t *retval) return (0); } - -int -umask_extended(proc_t p, struct umask_extended_args *uap, register_t *retval) +/* + * umask_extended: Set the mode mask for creation of filesystem nodes; with extended security (ACL). + * + * Parameters: p Process requesting to set the umask + * uap User argument descriptor (see below) + * retval umask of the process (parameter p) + * + * Indirect: uap->newmask umask to set + * uap->xsecurity ACL to set + * + * Returns: 0 Success + * !0 Not success + * + */ +int +umask_extended(proc_t p, struct umask_extended_args *uap, int32_t *retval) { int ciferror; kauth_filesec_t xsecdst; @@ -5794,7 +6236,7 @@ umask_extended(proc_t p, struct umask_extended_args *uap, register_t *retval) } int -umask(proc_t p, struct umask_args *uap, register_t *retval) +umask(proc_t p, struct umask_args *uap, int32_t *retval) { return(umask1(p, uap->newmask, UMASK_NOXSECURITY, retval)); } @@ -5805,7 +6247,7 @@ umask(proc_t p, struct umask_args *uap, register_t *retval) */ /* ARGSUSED */ int -revoke(proc_t p, struct revoke_args *uap, __unused register_t *retval) +revoke(proc_t p, struct revoke_args *uap, __unused int32_t *retval) { vnode_t vp; struct vnode_attr va; @@ -5822,6 +6264,16 @@ revoke(proc_t p, struct revoke_args *uap, __unused register_t *retval) nameidone(&nd); + if (!(vnode_ischr(vp) || vnode_isblk(vp))) { + error = ENOTSUP; + goto out; + } + + if (vnode_isblk(vp) && vnode_ismountedon(vp)) { + error = EBUSY; + goto out; + } + #if CONFIG_MACF error = mac_vnode_check_revoke(ctx, vp); if (error) @@ -5835,7 +6287,7 @@ revoke(proc_t p, struct revoke_args *uap, __unused register_t *retval) if (kauth_cred_getuid(vfs_context_ucred(ctx)) != va.va_uid && (error = suser(vfs_context_ucred(ctx), &p->p_acflag))) goto out; - if (vp->v_usecount > 1 || (vp->v_flag & VALIASED)) + if (vp->v_usecount > 0 || (vnode_isaliased(vp))) VNOP_REVOKE(vp, REVOKEALL, ctx); out: vnode_put(vp); @@ -5860,7 +6312,7 @@ revoke(proc_t p, struct revoke_args *uap, __unused register_t *retval) */ /* ARGSUSED */ int -mkcomplex(__unused proc_t p, __unused struct mkcomplex_args *uap, __unused register_t *retval) +mkcomplex(__unused proc_t p, __unused struct mkcomplex_args *uap, __unused int32_t *retval) { return (ENOTSUP); } @@ -5872,7 +6324,7 @@ mkcomplex(__unused proc_t p, __unused struct mkcomplex_args *uap, __unused regis int statv(__unused proc_t p, __unused struct statv_args *uap, - __unused register_t *retval) + __unused int32_t *retval) { return (ENOTSUP); /* We'll just return an error for now */ @@ -5885,7 +6337,7 @@ statv(__unused proc_t p, int lstatv(__unused proc_t p, __unused struct lstatv_args *uap, - __unused register_t *retval) + __unused int32_t *retval) { return (ENOTSUP); /* We'll just return an error for now */ } /* end of lstatv system call */ @@ -5897,7 +6349,7 @@ lstatv(__unused proc_t p, int fstatv(__unused proc_t p, __unused struct fstatv_args *uap, - __unused register_t *retval) + __unused int32_t *retval) { return (ENOTSUP); /* We'll just return an error for now */ } /* end of fstatv system call */ @@ -5918,7 +6370,7 @@ fstatv(__unused proc_t p, /* ARGSUSED */ int -getdirentriesattr (proc_t p, struct getdirentriesattr_args *uap, register_t *retval) +getdirentriesattr (proc_t p, struct getdirentriesattr_args *uap, int32_t *retval) { vnode_t vp; struct fileproc *fp; @@ -5997,14 +6449,13 @@ getdirentriesattr (proc_t p, struct getdirentriesattr_args *uap, register_t *ret action |= KAUTH_VNODE_SEARCH; if ((error = vnode_authorize(vp, NULL, action, ctx)) == 0) { - u_long ulcount = count; + /* Believe it or not, uap->options only has 32-bits of valid + * info, so truncate before extending again */ error = VNOP_READDIRATTR(vp, &attributelist, auio, count, - uap->options, (unsigned long *)&newstate, &eofflag, - &ulcount, ctx); - if (!error) - count = ulcount; + (u_long)(uint32_t)uap->options, &newstate, &eofflag, + &count, ctx); } (void)vnode_put(vp); @@ -6033,7 +6484,7 @@ getdirentriesattr (proc_t p, struct getdirentriesattr_args *uap, register_t *ret /* ARGSUSED */ int -exchangedata (__unused proc_t p, struct exchangedata_args *uap, __unused register_t *retval) +exchangedata (__unused proc_t p, struct exchangedata_args *uap, __unused int32_t *retval) { struct nameidata fnd, snd; @@ -6041,12 +6492,15 @@ exchangedata (__unused proc_t p, struct exchangedata_args *uap, __unused registe vnode_t fvp; vnode_t svp; int error; - u_long nameiflags; + u_int32_t nameiflags; char *fpath = NULL; char *spath = NULL; - int flen, slen; + int flen=0, slen=0; + int from_truncated=0, to_truncated=0; +#if CONFIG_FSE fse_info f_finfo, s_finfo; - +#endif + nameiflags = 0; if ((uap->options & FSOPT_NOFOLLOW) == 0) nameiflags |= FOLLOW; @@ -6108,19 +6562,17 @@ exchangedata (__unused proc_t p, struct exchangedata_args *uap, __unused registe error = ENOMEM; goto out; } - flen = MAXPATHLEN; - slen = MAXPATHLEN; - if (vn_getpath(fvp, fpath, &flen) != 0 || fpath[0] == '\0') { - printf("exchange: vn_getpath(fvp=%p) failed <<%s>>\n", - fvp, fpath); - } - if (vn_getpath(svp, spath, &slen) != 0 || spath[0] == '\0') { - printf("exchange: vn_getpath(svp=%p) failed <<%s>>\n", - svp, spath); - } + + flen = safe_getpath(fvp, NULL, fpath, MAXPATHLEN, &from_truncated); + slen = safe_getpath(svp, NULL, spath, MAXPATHLEN, &to_truncated); + #if CONFIG_FSE get_fse_info(fvp, &f_finfo, ctx); get_fse_info(svp, &s_finfo, ctx); + if (from_truncated || to_truncated) { + // set it here since only the f_finfo gets reported up to user space + f_finfo.mode |= FSE_TRUNCATED_PATH; + } #endif } /* Ok, make the call */ @@ -6178,38 +6630,46 @@ exchangedata (__unused proc_t p, struct exchangedata_args *uap, __unused registe /* ARGSUSED */ int -searchfs(proc_t p, struct searchfs_args *uap, __unused register_t *retval) +searchfs(proc_t p, struct searchfs_args *uap, __unused int32_t *retval) { vnode_t vp; int error=0; int fserror = 0; struct nameidata nd; - struct user_fssearchblock searchblock; + struct user64_fssearchblock searchblock; struct searchstate *state; struct attrlist *returnattrs; + struct timeval timelimit; void *searchparams1,*searchparams2; uio_t auio = NULL; int spacetype = proc_is64bit(p) ? UIO_USERSPACE64 : UIO_USERSPACE32; - u_long nummatches; + uint32_t nummatches; int mallocsize; - u_long nameiflags; + uint32_t nameiflags; vfs_context_t ctx = vfs_context_current(); char uio_buf[ UIO_SIZEOF(1) ]; /* Start by copying in fsearchblock paramater list */ if (IS_64BIT_PROCESS(p)) { - error = copyin(uap->searchblock, (caddr_t) &searchblock, sizeof(searchblock)); + error = copyin(uap->searchblock, (caddr_t) &searchblock, sizeof(searchblock)); + timelimit.tv_sec = searchblock.timelimit.tv_sec; + timelimit.tv_usec = searchblock.timelimit.tv_usec; } else { - struct fssearchblock tmp_searchblock; + struct user32_fssearchblock tmp_searchblock; + error = copyin(uap->searchblock, (caddr_t) &tmp_searchblock, sizeof(tmp_searchblock)); // munge into 64-bit version searchblock.returnattrs = CAST_USER_ADDR_T(tmp_searchblock.returnattrs); searchblock.returnbuffer = CAST_USER_ADDR_T(tmp_searchblock.returnbuffer); searchblock.returnbuffersize = tmp_searchblock.returnbuffersize; searchblock.maxmatches = tmp_searchblock.maxmatches; - searchblock.timelimit.tv_sec = tmp_searchblock.timelimit.tv_sec; - searchblock.timelimit.tv_usec = tmp_searchblock.timelimit.tv_usec; + /* + * These casts are safe. We will promote the tv_sec into a 64 bit long if necessary + * from a 32 bit long, and tv_usec is already a signed 32 bit int. + */ + timelimit.tv_sec = (__darwin_time_t) tmp_searchblock.timelimit.tv_sec; + timelimit.tv_usec = (__darwin_useconds_t) tmp_searchblock.timelimit.tv_usec; searchblock.searchparams1 = CAST_USER_ADDR_T(tmp_searchblock.searchparams1); searchblock.sizeofsearchparams1 = tmp_searchblock.sizeofsearchparams1; searchblock.searchparams2 = CAST_USER_ADDR_T(tmp_searchblock.searchparams2); @@ -6254,9 +6714,55 @@ searchfs(proc_t p, struct searchfs_args *uap, __unused register_t *retval) if ((error = copyin(uap->state, (caddr_t) state, sizeof(struct searchstate)))) goto freeandexit; - - /* set up the uio structure which will contain the users return buffer */ + + /* + * Because searchparams1 and searchparams2 may contain an ATTR_CMN_NAME search parameter, + * which is passed in with an attrreference_t, we need to inspect the buffer manually here. + * The KPI does not provide us the ability to pass in the length of the buffers searchparams1 + * and searchparams2. To obviate the need for all searchfs-supporting filesystems to + * validate the user-supplied data offset of the attrreference_t, we'll do it here. + */ + + if (searchblock.searchattrs.commonattr & ATTR_CMN_NAME) { + attrreference_t* string_ref; + u_int32_t* start_length; + user64_size_t param_length; + + /* validate searchparams1 */ + param_length = searchblock.sizeofsearchparams1; + /* skip the word that specifies length of the buffer */ + start_length= (u_int32_t*) searchparams1; + start_length= start_length+1; + string_ref= (attrreference_t*) start_length; + + /* ensure no negative offsets or too big offsets */ + if (string_ref->attr_dataoffset < 0 ) { + error = EINVAL; + goto freeandexit; + } + if (string_ref->attr_length > MAXPATHLEN) { + error = EINVAL; + goto freeandexit; + } + + /* Check for pointer overflow in the string ref */ + if (((char*) string_ref + string_ref->attr_dataoffset) < (char*) string_ref) { + error = EINVAL; + goto freeandexit; + } + + if (((char*) string_ref + string_ref->attr_dataoffset) > ((char*)searchparams1 + param_length)) { + error = EINVAL; + goto freeandexit; + } + if (((char*)string_ref + string_ref->attr_dataoffset + string_ref->attr_length) > ((char*)searchparams1 + param_length)) { + error = EINVAL; + goto freeandexit; + } + } + + /* set up the uio structure which will contain the users return buffer */ auio = uio_createwithbuffer(1, 0, spacetype, UIO_READ, &uio_buf[0], sizeof(uio_buf)); uio_addiov(auio, searchblock.returnbuffer, searchblock.returnbuffersize); @@ -6295,12 +6801,12 @@ searchfs(proc_t p, struct searchfs_args *uap, __unused register_t *retval) searchparams1, searchparams2, &searchblock.searchattrs, - searchblock.maxmatches, - &searchblock.timelimit, + (u_long)searchblock.maxmatches, + &timelimit, returnattrs, &nummatches, - uap->scriptcode, - uap->options, + (u_long)uap->scriptcode, + (u_long)uap->options, auio, state, ctx); @@ -6334,19 +6840,16 @@ searchfs(proc_t p, struct searchfs_args *uap, __unused register_t *retval) * Make a filesystem-specific control call: */ /* ARGSUSED */ -int -fsctl (proc_t p, struct fsctl_args *uap, __unused register_t *retval) +static int +fsctl_internal(proc_t p, vnode_t *arg_vp, u_long cmd, user_addr_t udata, u_long options, vfs_context_t ctx) { - int error; + int error=0; boolean_t is64bit; - struct nameidata nd; - u_long nameiflags; - u_long cmd = uap->cmd; u_int size; #define STK_PARAMS 128 char stkbuf[STK_PARAMS]; caddr_t data, memp; - vfs_context_t ctx = vfs_context_current(); + vnode_t vp = *arg_vp; size = IOCPARM_LEN(cmd); if (size > IOCPARM_MAX) return (EINVAL); @@ -6363,14 +6866,14 @@ fsctl (proc_t p, struct fsctl_args *uap, __unused register_t *retval) if (cmd & IOC_IN) { if (size) { - error = copyin(uap->data, data, size); + error = copyin(udata, data, size); if (error) goto FSCtl_Exit; } else { if (is64bit) { - *(user_addr_t *)data = uap->data; + *(user_addr_t *)data = udata; } else { - *(uint32_t *)data = (uint32_t)uap->data; + *(uint32_t *)data = (uint32_t)udata; } }; } else if ((cmd & IOC_OUT) && size) { @@ -6380,47 +6883,181 @@ fsctl (proc_t p, struct fsctl_args *uap, __unused register_t *retval) */ bzero(data, size); } else if (cmd & IOC_VOID) { - if (is64bit) { - *(user_addr_t *)data = uap->data; - } - else { - *(uint32_t *)data = (uint32_t)uap->data; - } + if (is64bit) { + *(user_addr_t *)data = udata; + } + else { + *(uint32_t *)data = (uint32_t)udata; + } } - /* Get the vnode for the file we are getting info on: */ - nameiflags = 0; - if ((uap->options & FSOPT_NOFOLLOW) == 0) nameiflags |= FOLLOW; - NDINIT(&nd, LOOKUP, nameiflags, UIO_USERSPACE, uap->path, ctx); - if ((error = namei(&nd))) goto FSCtl_Exit; + /* Check to see if it's a generic command */ + if (IOCBASECMD(cmd) == FSCTL_SYNC_VOLUME) { + mount_t mp = vp->v_mount; + int arg = *(uint32_t*)data; + + /* record vid of vp so we can drop it below. */ + uint32_t vvid = vp->v_id; -#if CONFIG_MACF - error = mac_mount_check_fsctl(ctx, vnode_mount(nd.ni_vp), cmd); - if (error) { - vnode_put(nd.ni_vp); - nameidone(&nd); + /* + * Then grab mount_iterref so that we can release the vnode. + * Without this, a thread may call vnode_iterate_prepare then + * get into a deadlock because we've never released the root vp + */ + error = mount_iterref (mp, 0); + if (error) { + goto FSCtl_Exit; + } + vnode_put(vp); + + /* issue the sync for this volume */ + (void)sync_callback(mp, (arg & FSCTL_SYNC_WAIT) ? &arg : NULL); + + /* + * Then release the mount_iterref once we're done syncing; it's not + * needed for the VNOP_IOCTL below + */ + mount_iterdrop(mp); + + if (arg & FSCTL_SYNC_FULLSYNC) { + /* re-obtain vnode iocount on the root vp, if possible */ + error = vnode_getwithvid (vp, vvid); + if (error == 0) { + error = VNOP_IOCTL(vp, F_FULLFSYNC, (caddr_t)NULL, 0, ctx); + vnode_put (vp); + } + } + /* mark the argument VP as having been released */ + *arg_vp = NULL; + + } else if (IOCBASECMD(cmd) == FSCTL_SET_PACKAGE_EXTS) { + user_addr_t ext_strings; + uint32_t num_entries; + uint32_t max_width; + + if ( (is64bit && size != sizeof(user64_package_ext_info)) + || (is64bit == 0 && size != sizeof(user32_package_ext_info))) { + + // either you're 64-bit and passed a 64-bit struct or + // you're 32-bit and passed a 32-bit struct. otherwise + // it's not ok. + error = EINVAL; goto FSCtl_Exit; - } -#endif + } + + if (is64bit) { + ext_strings = ((user64_package_ext_info *)data)->strings; + num_entries = ((user64_package_ext_info *)data)->num_entries; + max_width = ((user64_package_ext_info *)data)->max_width; + } else { + ext_strings = CAST_USER_ADDR_T(((user32_package_ext_info *)data)->strings); + num_entries = ((user32_package_ext_info *)data)->num_entries; + max_width = ((user32_package_ext_info *)data)->max_width; + } + + error = set_package_extensions_table(ext_strings, num_entries, max_width); - /* Invoke the filesystem-specific code */ - error = VNOP_IOCTL(nd.ni_vp, IOCBASECMD(cmd), data, uap->options, ctx); + } else if (IOCBASECMD(cmd) == FSCTL_WAIT_FOR_SYNC) { + error = tsleep((caddr_t)&sync_wait_time, PVFS|PCATCH, "sync-wait", 0); + if (error == 0) { + *(uint32_t *)data = (uint32_t)sync_wait_time; + error = 0; + } else { + error *= -1; + } + + } else { + /* Invoke the filesystem-specific code */ + error = VNOP_IOCTL(vp, IOCBASECMD(cmd), data, options, ctx); + } - vnode_put(nd.ni_vp); - nameidone(&nd); /* * Copy any data to user, size was * already set and checked above. */ if (error == 0 && (cmd & IOC_OUT) && size) - error = copyout(data, uap->data, size); + error = copyout(data, udata, size); FSCtl_Exit: if (memp) kfree(memp, size); return error; } + +/* ARGSUSED */ +int +fsctl (proc_t p, struct fsctl_args *uap, __unused int32_t *retval) +{ + int error; + struct nameidata nd; + u_long nameiflags; + vnode_t vp = NULL; + vfs_context_t ctx = vfs_context_current(); + + AUDIT_ARG(cmd, uap->cmd); + AUDIT_ARG(value32, uap->options); + /* Get the vnode for the file we are getting info on: */ + nameiflags = 0; + if ((uap->options & FSOPT_NOFOLLOW) == 0) nameiflags |= FOLLOW; + NDINIT(&nd, LOOKUP, nameiflags | AUDITVNPATH1, UIO_USERSPACE, + uap->path, ctx); + if ((error = namei(&nd))) goto done; + vp = nd.ni_vp; + nameidone(&nd); + +#if CONFIG_MACF + error = mac_mount_check_fsctl(ctx, vnode_mount(vp), uap->cmd); + if (error) { + goto done; + } +#endif + + error = fsctl_internal(p, &vp, uap->cmd, (user_addr_t)uap->data, uap->options, ctx); + +done: + if (vp) + vnode_put(vp); + return error; +} +/* ARGSUSED */ +int +ffsctl (proc_t p, struct ffsctl_args *uap, __unused int32_t *retval) +{ + int error; + vnode_t vp = NULL; + vfs_context_t ctx = vfs_context_current(); + int fd = -1; + + AUDIT_ARG(fd, uap->fd); + AUDIT_ARG(cmd, uap->cmd); + AUDIT_ARG(value32, uap->options); + + /* Get the vnode for the file we are getting info on: */ + if ((error = file_vnode(uap->fd, &vp))) + goto done; + fd = uap->fd; + if ((error = vnode_getwithref(vp))) { + goto done; + } + +#if CONFIG_MACF + error = mac_mount_check_fsctl(ctx, vnode_mount(vp), uap->cmd); + if (error) { + goto done; + } +#endif + + error = fsctl_internal(p, &vp, uap->cmd, (user_addr_t)uap->data, uap->options, ctx); + +done: + if (fd != -1) + file_drop(fd); + + if (vp) + vnode_put(vp); + return error; +} /* end of fsctl system call */ /* @@ -6457,7 +7094,7 @@ getxattr(proc_t p, struct getxattr_args *uap, user_ssize_t *retval) int spacetype = IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32; size_t attrsize = 0; size_t namelen; - u_long nameiflags; + u_int32_t nameiflags; int error; char uio_buf[ UIO_SIZEOF(1) ]; @@ -6479,12 +7116,39 @@ getxattr(proc_t p, struct getxattr_args *uap, user_ssize_t *retval) error = EPERM; goto out; } - if (uap->value && uap->size > 0) { + /* + * the specific check for 0xffffffff is a hack to preserve + * binaray compatibilty in K64 with applications that discovered + * that passing in a buf pointer and a size of -1 resulted in + * just the size of the indicated extended attribute being returned. + * this isn't part of the documented behavior, but because of the + * original implemtation's check for "uap->size > 0", this behavior + * was allowed. In K32 that check turned into a signed comparison + * even though uap->size is unsigned... in K64, we blow by that + * check because uap->size is unsigned and doesn't get sign smeared + * in the munger for a 32 bit user app. we also need to add a + * check to limit the maximum size of the buffer being passed in... + * unfortunately, the underlying fileystems seem to just malloc + * the requested size even if the actual extended attribute is tiny. + * because that malloc is for kernel wired memory, we have to put a + * sane limit on it. + * + * U32 running on K64 will yield 0x00000000ffffffff for uap->size + * U64 running on K64 will yield -1 (64 bits wide) + * U32/U64 running on K32 will yield -1 (32 bits wide) + */ + if (uap->size == 0xffffffff || uap->size == (size_t)-1) + goto no_uio; + + if (uap->size > (size_t)XATTR_MAXSIZE) + uap->size = XATTR_MAXSIZE; + + if (uap->value) { auio = uio_createwithbuffer(1, uap->position, spacetype, UIO_READ, &uio_buf[0], sizeof(uio_buf)); uio_addiov(auio, uap->value, uap->size); } - +no_uio: error = vn_getxattr(vp, attrname, auio, &attrsize, uap->options, ctx); out: vnode_put(vp); @@ -6562,7 +7226,7 @@ setxattr(proc_t p, struct setxattr_args *uap, int *retval) uio_t auio = NULL; int spacetype = IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32; size_t namelen; - u_long nameiflags; + u_int32_t nameiflags; int error; char uio_buf[ UIO_SIZEOF(1) ]; @@ -6656,8 +7320,8 @@ fsetxattr(proc_t p, struct fsetxattr_args *uap, int *retval) /* * Remove an extended attribute. + * XXX Code duplication here. */ -#warning "code duplication" int removexattr(proc_t p, struct removexattr_args *uap, int *retval) { @@ -6667,7 +7331,7 @@ removexattr(proc_t p, struct removexattr_args *uap, int *retval) int spacetype = IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32; vfs_context_t ctx = vfs_context_current(); size_t namelen; - u_long nameiflags; + u_int32_t nameiflags; int error; if (uap->options & (XATTR_NOSECURITY | XATTR_NODEFAULT)) @@ -6702,8 +7366,8 @@ removexattr(proc_t p, struct removexattr_args *uap, int *retval) /* * Remove an extended attribute. + * XXX Code duplication here. */ -#warning "code duplication" int fremovexattr(__unused proc_t p, struct fremovexattr_args *uap, int *retval) { @@ -6746,8 +7410,8 @@ fremovexattr(__unused proc_t p, struct fremovexattr_args *uap, int *retval) /* * Retrieve the list of extended attribute names. + * XXX Code duplication here. */ -#warning "code duplication" int listxattr(proc_t p, struct listxattr_args *uap, user_ssize_t *retval) { @@ -6757,7 +7421,7 @@ listxattr(proc_t p, struct listxattr_args *uap, user_ssize_t *retval) uio_t auio = NULL; int spacetype = IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32; size_t attrsize = 0; - u_long nameiflags; + u_int32_t nameiflags; int error; char uio_buf[ UIO_SIZEOF(1) ]; @@ -6772,7 +7436,6 @@ listxattr(proc_t p, struct listxattr_args *uap, user_ssize_t *retval) vp = nd.ni_vp; nameidone(&nd); if (uap->namebuf != 0 && uap->bufsize > 0) { - // LP64todo - fix this! auio = uio_createwithbuffer(1, 0, spacetype, UIO_READ, &uio_buf[0], sizeof(uio_buf)); uio_addiov(auio, uap->namebuf, uap->bufsize); @@ -6791,8 +7454,8 @@ listxattr(proc_t p, struct listxattr_args *uap, user_ssize_t *retval) /* * Retrieve the list of extended attribute names. + * XXX Code duplication here. */ -#warning "code duplication" int flistxattr(proc_t p, struct flistxattr_args *uap, user_ssize_t *retval) { @@ -6814,7 +7477,6 @@ flistxattr(proc_t p, struct flistxattr_args *uap, user_ssize_t *retval) return(error); } if (uap->namebuf != 0 && uap->bufsize > 0) { - // LP64todo - fix this! auio = uio_createwithbuffer(1, 0, spacetype, UIO_READ, &uio_buf[0], sizeof(uio_buf)); uio_addiov(auio, uap->namebuf, uap->bufsize); @@ -6832,6 +7494,70 @@ flistxattr(proc_t p, struct flistxattr_args *uap, user_ssize_t *retval) return (error); } +/* + * Obtain the full pathname of a file system object by id. + * + * This is a private SPI used by the File Manager. + */ +__private_extern__ +int +fsgetpath(__unused proc_t p, struct fsgetpath_args *uap, user_ssize_t *retval) +{ + vnode_t vp; + struct mount *mp = NULL; + vfs_context_t ctx = vfs_context_current(); + fsid_t fsid; + char *realpath; + int bpflags; + int length; + int error; + + if ((error = copyin(uap->fsid, (caddr_t)&fsid, sizeof(fsid)))) { + return (error); + } + AUDIT_ARG(value32, fsid.val[0]); + AUDIT_ARG(value64, uap->objid); + /* Restrict output buffer size for now. */ + if (uap->bufsize > PAGE_SIZE) { + return (EINVAL); + } + MALLOC(realpath, char *, uap->bufsize, M_TEMP, M_WAITOK); + if (realpath == NULL) { + return (ENOMEM); + } + /* Find the target mountpoint. */ + if ((mp = mount_lookupby_volfsid(fsid.val[0], 1)) == NULL) { + error = ENOTSUP; /* unexpected failure */ + goto out; + } + /* Find the target vnode. */ + if (uap->objid == 2) { + error = VFS_ROOT(mp, &vp, ctx); + } else { + error = VFS_VGET(mp, (ino64_t)uap->objid, &vp, ctx); + } + vfs_unbusy(mp); + if (error) { + goto out; + } + /* Obtain the absolute path to this vnode. */ + bpflags = vfs_context_suser(ctx) ? BUILDPATH_CHECKACCESS : 0; + error = build_path(vp, realpath, uap->bufsize, &length, bpflags, ctx); + vnode_put(vp); + if (error) { + goto out; + } + AUDIT_ARG(text, realpath); + error = copyout((caddr_t)realpath, uap->buf, length); + + *retval = (user_ssize_t)length; /* may be superseded by error */ +out: + if (realpath) { + FREE(realpath, M_TEMP); + } + return (error); +} + /* * Common routine to handle various flavors of statfs data heading out * to user space. @@ -6848,19 +7574,19 @@ munge_statfs(struct mount *mp, struct vfsstatfs *sfsp, int my_size, copy_size; if (is_64_bit) { - struct user_statfs sfs; + struct user64_statfs sfs; my_size = copy_size = sizeof(sfs); bzero(&sfs, my_size); sfs.f_flags = mp->mnt_flag & MNT_VISFLAGMASK; sfs.f_type = mp->mnt_vtable->vfc_typenum; sfs.f_reserved1 = (short)sfsp->f_fssubtype; - sfs.f_bsize = (user_long_t)sfsp->f_bsize; - sfs.f_iosize = (user_long_t)sfsp->f_iosize; - sfs.f_blocks = (user_long_t)sfsp->f_blocks; - sfs.f_bfree = (user_long_t)sfsp->f_bfree; - sfs.f_bavail = (user_long_t)sfsp->f_bavail; - sfs.f_files = (user_long_t)sfsp->f_files; - sfs.f_ffree = (user_long_t)sfsp->f_ffree; + sfs.f_bsize = (user64_long_t)sfsp->f_bsize; + sfs.f_iosize = (user64_long_t)sfsp->f_iosize; + sfs.f_blocks = (user64_long_t)sfsp->f_blocks; + sfs.f_bfree = (user64_long_t)sfsp->f_bfree; + sfs.f_bavail = (user64_long_t)sfsp->f_bavail; + sfs.f_files = (user64_long_t)sfsp->f_files; + sfs.f_ffree = (user64_long_t)sfsp->f_ffree; sfs.f_fsid = sfsp->f_fsid; sfs.f_owner = sfsp->f_owner; strlcpy(&sfs.f_fstypename[0], &sfsp->f_fstypename[0], MFSNAMELEN); @@ -6873,7 +7599,8 @@ munge_statfs(struct mount *mp, struct vfsstatfs *sfsp, error = copyout((caddr_t)&sfs, bufp, copy_size); } else { - struct statfs sfs; + struct user32_statfs sfs; + my_size = copy_size = sizeof(sfs); bzero(&sfs, my_size); @@ -6886,7 +7613,7 @@ munge_statfs(struct mount *mp, struct vfsstatfs *sfsp, * have to fudge the numbers here in that case. We inflate the blocksize in order * to reflect the filesystem size as best we can. */ - if ((sfsp->f_blocks > LONG_MAX) + if ((sfsp->f_blocks > INT_MAX) /* Hack for 4061702 . I think the real fix is for Carbon to * look for some volume capability and not depend on hidden * semantics agreed between a FS and carbon. @@ -6911,28 +7638,28 @@ munge_statfs(struct mount *mp, struct vfsstatfs *sfsp, * being smaller than f_bsize. */ for (shift = 0; shift < 32; shift++) { - if ((sfsp->f_blocks >> shift) <= LONG_MAX) + if ((sfsp->f_blocks >> shift) <= INT_MAX) break; - if ((sfsp->f_bsize << (shift + 1)) > LONG_MAX) + if ((sfsp->f_bsize << (shift + 1)) > INT_MAX) break; } -#define __SHIFT_OR_CLIP(x, s) ((((x) >> (s)) > LONG_MAX) ? LONG_MAX : ((x) >> (s))) - sfs.f_blocks = (long)__SHIFT_OR_CLIP(sfsp->f_blocks, shift); - sfs.f_bfree = (long)__SHIFT_OR_CLIP(sfsp->f_bfree, shift); - sfs.f_bavail = (long)__SHIFT_OR_CLIP(sfsp->f_bavail, shift); +#define __SHIFT_OR_CLIP(x, s) ((((x) >> (s)) > INT_MAX) ? INT_MAX : ((x) >> (s))) + sfs.f_blocks = (user32_long_t)__SHIFT_OR_CLIP(sfsp->f_blocks, shift); + sfs.f_bfree = (user32_long_t)__SHIFT_OR_CLIP(sfsp->f_bfree, shift); + sfs.f_bavail = (user32_long_t)__SHIFT_OR_CLIP(sfsp->f_bavail, shift); #undef __SHIFT_OR_CLIP - sfs.f_bsize = (long)(sfsp->f_bsize << shift); + sfs.f_bsize = (user32_long_t)(sfsp->f_bsize << shift); sfs.f_iosize = lmax(sfsp->f_iosize, sfsp->f_bsize); } else { /* filesystem is small enough to be reported honestly */ - sfs.f_bsize = (long)sfsp->f_bsize; - sfs.f_iosize = (long)sfsp->f_iosize; - sfs.f_blocks = (long)sfsp->f_blocks; - sfs.f_bfree = (long)sfsp->f_bfree; - sfs.f_bavail = (long)sfsp->f_bavail; + sfs.f_bsize = (user32_long_t)sfsp->f_bsize; + sfs.f_iosize = (user32_long_t)sfsp->f_iosize; + sfs.f_blocks = (user32_long_t)sfsp->f_blocks; + sfs.f_bfree = (user32_long_t)sfsp->f_bfree; + sfs.f_bavail = (user32_long_t)sfsp->f_bavail; } - sfs.f_files = (long)sfsp->f_files; - sfs.f_ffree = (long)sfsp->f_ffree; + sfs.f_files = (user32_long_t)sfsp->f_files; + sfs.f_ffree = (user32_long_t)sfsp->f_ffree; sfs.f_fsid = sfsp->f_fsid; sfs.f_owner = sfsp->f_owner; strlcpy(&sfs.f_fstypename[0], &sfsp->f_fstypename[0], MFSNAMELEN); @@ -6954,9 +7681,45 @@ munge_statfs(struct mount *mp, struct vfsstatfs *sfsp, /* * copy stat structure into user_stat structure. */ -void munge_stat(struct stat *sbp, struct user_stat *usbp) +void munge_user64_stat(struct stat *sbp, struct user64_stat *usbp) { - bzero(usbp, sizeof(struct user_stat)); + bzero(usbp, sizeof(*usbp)); + + usbp->st_dev = sbp->st_dev; + usbp->st_ino = sbp->st_ino; + usbp->st_mode = sbp->st_mode; + usbp->st_nlink = sbp->st_nlink; + usbp->st_uid = sbp->st_uid; + usbp->st_gid = sbp->st_gid; + usbp->st_rdev = sbp->st_rdev; +#ifndef _POSIX_C_SOURCE + usbp->st_atimespec.tv_sec = sbp->st_atimespec.tv_sec; + usbp->st_atimespec.tv_nsec = sbp->st_atimespec.tv_nsec; + usbp->st_mtimespec.tv_sec = sbp->st_mtimespec.tv_sec; + usbp->st_mtimespec.tv_nsec = sbp->st_mtimespec.tv_nsec; + usbp->st_ctimespec.tv_sec = sbp->st_ctimespec.tv_sec; + usbp->st_ctimespec.tv_nsec = sbp->st_ctimespec.tv_nsec; +#else + usbp->st_atime = sbp->st_atime; + usbp->st_atimensec = sbp->st_atimensec; + usbp->st_mtime = sbp->st_mtime; + usbp->st_mtimensec = sbp->st_mtimensec; + usbp->st_ctime = sbp->st_ctime; + usbp->st_ctimensec = sbp->st_ctimensec; +#endif + usbp->st_size = sbp->st_size; + usbp->st_blocks = sbp->st_blocks; + usbp->st_blksize = sbp->st_blksize; + usbp->st_flags = sbp->st_flags; + usbp->st_gen = sbp->st_gen; + usbp->st_lspare = sbp->st_lspare; + usbp->st_qspare[0] = sbp->st_qspare[0]; + usbp->st_qspare[1] = sbp->st_qspare[1]; +} + +void munge_user32_stat(struct stat *sbp, struct user32_stat *usbp) +{ + bzero(usbp, sizeof(*usbp)); usbp->st_dev = sbp->st_dev; usbp->st_ino = sbp->st_ino; @@ -6993,9 +7756,49 @@ void munge_stat(struct stat *sbp, struct user_stat *usbp) /* * copy stat64 structure into user_stat64 structure. */ -void munge_stat64(struct stat64 *sbp, struct user_stat64 *usbp) +void munge_user64_stat64(struct stat64 *sbp, struct user64_stat64 *usbp) +{ + bzero(usbp, sizeof(*usbp)); + + usbp->st_dev = sbp->st_dev; + usbp->st_ino = sbp->st_ino; + usbp->st_mode = sbp->st_mode; + usbp->st_nlink = sbp->st_nlink; + usbp->st_uid = sbp->st_uid; + usbp->st_gid = sbp->st_gid; + usbp->st_rdev = sbp->st_rdev; +#ifndef _POSIX_C_SOURCE + usbp->st_atimespec.tv_sec = sbp->st_atimespec.tv_sec; + usbp->st_atimespec.tv_nsec = sbp->st_atimespec.tv_nsec; + usbp->st_mtimespec.tv_sec = sbp->st_mtimespec.tv_sec; + usbp->st_mtimespec.tv_nsec = sbp->st_mtimespec.tv_nsec; + usbp->st_ctimespec.tv_sec = sbp->st_ctimespec.tv_sec; + usbp->st_ctimespec.tv_nsec = sbp->st_ctimespec.tv_nsec; + usbp->st_birthtimespec.tv_sec = sbp->st_birthtimespec.tv_sec; + usbp->st_birthtimespec.tv_nsec = sbp->st_birthtimespec.tv_nsec; +#else + usbp->st_atime = sbp->st_atime; + usbp->st_atimensec = sbp->st_atimensec; + usbp->st_mtime = sbp->st_mtime; + usbp->st_mtimensec = sbp->st_mtimensec; + usbp->st_ctime = sbp->st_ctime; + usbp->st_ctimensec = sbp->st_ctimensec; + usbp->st_birthtime = sbp->st_birthtime; + usbp->st_birthtimensec = sbp->st_birthtimensec; +#endif + usbp->st_size = sbp->st_size; + usbp->st_blocks = sbp->st_blocks; + usbp->st_blksize = sbp->st_blksize; + usbp->st_flags = sbp->st_flags; + usbp->st_gen = sbp->st_gen; + usbp->st_lspare = sbp->st_lspare; + usbp->st_qspare[0] = sbp->st_qspare[0]; + usbp->st_qspare[1] = sbp->st_qspare[1]; +} + +void munge_user32_stat64(struct stat64 *sbp, struct user32_stat64 *usbp) { - bzero(usbp, sizeof(struct user_stat)); + bzero(usbp, sizeof(*usbp)); usbp->st_dev = sbp->st_dev; usbp->st_ino = sbp->st_ino; diff --git a/bsd/vfs/vfs_utfconv.c b/bsd/vfs/vfs_utfconv.c index 948076da9..adf92df30 100644 --- a/bsd/vfs/vfs_utfconv.c +++ b/bsd/vfs/vfs_utfconv.c @@ -80,7 +80,7 @@ * Similar to __CFUniCharIsNonBaseCharacter except that * unicode_combinable also includes Hangul Jamo characters. */ -static inline int +inline int unicode_combinable(u_int16_t character) { const u_int8_t *bitmap = __CFUniCharCombiningBitmap; @@ -105,7 +105,7 @@ unicode_combinable(u_int16_t character) * * Similar to __CFUniCharIsDecomposableCharacter. */ -static inline int +inline int unicode_decomposeable(u_int16_t character) { const u_int8_t *bitmap = __CFUniCharDecomposableBitmap; u_int8_t value; @@ -591,6 +591,12 @@ utf8_decodestr(const u_int8_t* utf8p, size_t utf8len, u_int16_t* ucsp, if ((ucsp + 2) >= bufend) goto toolong; + /* Make a previous combining sequence canonical. */ + if (combcharcnt > 1) { + priortysort(ucsp - combcharcnt, combcharcnt); + } + combcharcnt = 0; + ucs_ch = '%'; *ucsp++ = swapbytes ? OSSwapInt16(ucs_ch) : (u_int16_t)ucs_ch; ucs_ch = hexdigits[byte >> 4]; diff --git a/bsd/vfs/vfs_vnops.c b/bsd/vfs/vfs_vnops.c index 0eb1036ad..c7b110fd6 100644 --- a/bsd/vfs/vfs_vnops.c +++ b/bsd/vfs/vfs_vnops.c @@ -105,6 +105,7 @@ int ubc_setcred(struct vnode *, struct proc *); #include #include +#include #if CONFIG_MACF #include @@ -122,6 +123,8 @@ static int vn_select( struct fileproc *fp, int which, void * wql, vfs_context_t ctx); static int vn_kqfilt_add(struct fileproc *fp, struct knote *kn, vfs_context_t ctx); +static void filt_vndetach(struct knote *kn); +static int filt_vnode(struct knote *kn, long hint); #if 0 static int vn_kqfilt_remove(struct vnode *vp, uintptr_t ident, vfs_context_t ctx); @@ -130,6 +133,13 @@ static int vn_kqfilt_remove(struct vnode *vp, uintptr_t ident, struct fileops vnops = { vn_read, vn_write, vn_ioctl, vn_select, vn_closefile, vn_kqfilt_add, NULL }; +struct filterops vnode_filtops = { + .f_isfd = 1, + .f_attach = NULL, + .f_detach = filt_vndetach, + .f_event = filt_vnode +}; + /* * Common code for vnode open operations. * Check permissions, and call the VNOP_OPEN or VNOP_CREATE routine. @@ -206,26 +216,28 @@ vn_open_auth(struct nameidata *ndp, int *fmodep, struct vnode_attr *vap) vfs_context_t ctx = ndp->ni_cnd.cn_context; int error; int fmode; + uint32_t origcnflags; kauth_action_t action; again: vp = NULL; dvp = NULL; fmode = *fmodep; + origcnflags = ndp->ni_cnd.cn_flags; if (fmode & O_CREAT) { if ( (fmode & O_DIRECTORY) ) { error = EINVAL; goto out; } ndp->ni_cnd.cn_nameiop = CREATE; - /* Inherit USEDVP flag only */ - ndp->ni_cnd.cn_flags &= USEDVP; + /* Inherit USEDVP, vnode_open() supported flags only */ + ndp->ni_cnd.cn_flags &= (USEDVP | NOCROSSMOUNT | DOWHITEOUT); ndp->ni_cnd.cn_flags |= LOCKPARENT | LOCKLEAF | AUDITVNPATH1; #if NAMEDRSRCFORK /* open calls are allowed for resource forks. */ ndp->ni_cnd.cn_flags |= CN_ALLOWRSRCFORK; #endif - if ((fmode & O_EXCL) == 0 && (fmode & O_NOFOLLOW) == 0) + if ((fmode & O_EXCL) == 0 && (fmode & O_NOFOLLOW) == 0 && (origcnflags & FOLLOW) != 0) ndp->ni_cnd.cn_flags |= FOLLOW; if ( (error = namei(ndp)) ) goto out; @@ -279,6 +291,8 @@ vn_open_auth(struct nameidata *ndp, int *fmodep, struct vnode_attr *vap) #if CONFIG_FSE if (need_fsevent(FSE_CREATE_FILE, vp)) { + vnode_put(dvp); + dvp = NULL; add_fsevent(FSE_CREATE_FILE, ctx, FSE_ARG_VNODE, vp, FSE_ARG_DONE); @@ -294,13 +308,17 @@ vn_open_auth(struct nameidata *ndp, int *fmodep, struct vnode_attr *vap) badcreate: nameidone(ndp); ndp->ni_dvp = NULL; - vnode_put(dvp); + + if (dvp) { + vnode_put(dvp); + } if (error) { /* - * Check for a creation race. + * Check for a creation or unlink race. */ - if ((error == EEXIST) && !(fmode & O_EXCL)) { + if (((error == EEXIST) && !(fmode & O_EXCL)) || + ((error == ENOENT) && (fmode & O_CREAT))){ goto again; } goto bad; @@ -319,14 +337,15 @@ vn_open_auth(struct nameidata *ndp, int *fmodep, struct vnode_attr *vap) } } else { ndp->ni_cnd.cn_nameiop = LOOKUP; - /* Inherit USEDVP flag only */ - ndp->ni_cnd.cn_flags &= USEDVP; + /* Inherit USEDVP, vnode_open() supported flags only */ + ndp->ni_cnd.cn_flags &= (USEDVP | NOCROSSMOUNT | DOWHITEOUT); ndp->ni_cnd.cn_flags |= FOLLOW | LOCKLEAF | AUDITVNPATH1; #if NAMEDRSRCFORK /* open calls are allowed for resource forks. */ ndp->ni_cnd.cn_flags |= CN_ALLOWRSRCFORK; #endif - if (fmode & O_NOFOLLOW || fmode & O_SYMLINK) { + /* preserve NOFOLLOW from vnode_open() */ + if (fmode & O_NOFOLLOW || fmode & O_SYMLINK || (origcnflags & FOLLOW) == 0) { ndp->ni_cnd.cn_flags &= ~FOLLOW; } @@ -425,9 +444,11 @@ vn_open_auth(struct nameidata *ndp, int *fmodep, struct vnode_attr *vap) ndp->ni_vp = NULL; if (vp) { #if NAMEDRSRCFORK - if ((vnode_isnamedstream(vp)) && (vp->v_parent != NULLVP) && - (vnode_isshadow (vp))) { - vnode_recycle(vp); + /* Aggressively recycle shadow files if we error'd out during open() */ + if ((vnode_isnamedstream(vp)) && + (vp->v_parent != NULLVP) && + (vnode_isshadow(vp))) { + vnode_recycle(vp); } #endif vnode_put(vp); @@ -495,7 +516,7 @@ vn_close(struct vnode *vp, int flags, vfs_context_t ctx) /* Sync data from resource fork shadow file if needed. */ if ((vp->v_flag & VISNAMEDSTREAM) && (vp->v_parent != NULLVP) && - (vnode_isshadow(vp))) { + vnode_isshadow(vp)) { if (flags & FWASWRITTEN) { (void) vnode_flushnamedstream(vp->v_parent, vp, ctx); } @@ -503,12 +524,12 @@ vn_close(struct vnode *vp, int flags, vfs_context_t ctx) #endif /* work around for foxhound */ - if (vp->v_type == VBLK) + if (vnode_isspec(vp)) (void)vnode_rele_ext(vp, flags, 0); error = VNOP_CLOSE(vp, flags, ctx); - if (vp->v_type != VBLK) + if (!vnode_isspec(vp)) (void)vnode_rele_ext(vp, flags, 0); return (error); @@ -519,15 +540,17 @@ vn_read_swapfile( struct vnode *vp, uio_t uio) { - static char *swap_read_zero_page = NULL; int error; off_t swap_count, this_count; off_t file_end, read_end; off_t prev_resid; + char *my_swap_page; /* - * Reading from a swap file will get you all zeroes. + * Reading from a swap file will get you zeroes. */ + + my_swap_page = NULL; error = 0; swap_count = uio_resid(uio); @@ -542,49 +565,20 @@ vn_read_swapfile( } while (swap_count > 0) { - if (swap_read_zero_page == NULL) { - char *my_zero_page; - int funnel_state; - - /* - * Take kernel funnel so that only one thread - * sets up "swap_read_zero_page". - */ - funnel_state = thread_funnel_set(kernel_flock, TRUE); - - if (swap_read_zero_page == NULL) { - MALLOC(my_zero_page, char *, PAGE_SIZE, - M_TEMP, M_WAITOK); - memset(my_zero_page, '?', PAGE_SIZE); - /* - * Adding a newline character here - * and there prevents "less(1)", for - * example, from getting too confused - * about a file with one really really - * long line. - */ - my_zero_page[PAGE_SIZE-1] = '\n'; - if (swap_read_zero_page == NULL) { - swap_read_zero_page = my_zero_page; - } else { - FREE(my_zero_page, M_TEMP); - } - } else { - /* - * Someone else raced us here and won; - * just use their page. - */ - } - thread_funnel_set(kernel_flock, funnel_state); + if (my_swap_page == NULL) { + MALLOC(my_swap_page, char *, PAGE_SIZE, + M_TEMP, M_WAITOK); + memset(my_swap_page, '\0', PAGE_SIZE); + /* add an end-of-line to keep line counters happy */ + my_swap_page[PAGE_SIZE-1] = '\n'; } - this_count = swap_count; if (this_count > PAGE_SIZE) { this_count = PAGE_SIZE; } prev_resid = uio_resid(uio); - error = uiomove((caddr_t) swap_read_zero_page, + error = uiomove((caddr_t) my_swap_page, this_count, uio); if (error) { @@ -592,6 +586,10 @@ vn_read_swapfile( } swap_count -= (prev_resid - uio_resid(uio)); } + if (my_swap_page != NULL) { + FREE(my_swap_page, M_TEMP); + my_swap_page = NULL; + } return error; } @@ -611,7 +609,10 @@ vn_rdwr( int *aresid, proc_t p) { - return vn_rdwr_64(rw, + int64_t resid; + int result; + + result = vn_rdwr_64(rw, vp, (uint64_t)(uintptr_t)base, (int64_t)len, @@ -619,8 +620,15 @@ vn_rdwr( segflg, ioflg, cred, - aresid, + &resid, p); + + /* "resid" should be bounded above by "len," which is an int */ + if (aresid != NULL) { + *aresid = resid; + } + + return result; } @@ -634,7 +642,7 @@ vn_rdwr_64( enum uio_seg segflg, int ioflg, kauth_cred_t cred, - int *aresid, + int64_t *aresid, proc_t p) { uio_t auio; @@ -672,7 +680,7 @@ vn_rdwr_64( if (error == 0) { if (rw == UIO_READ) { - if (vp->v_flag & VSWAP) { + if (vnode_isswap(vp)) { error = vn_read_swapfile(vp, auio); } else { error = VNOP_READ(vp, auio, ioflg, &context); @@ -683,7 +691,6 @@ vn_rdwr_64( } if (aresid) - // LP64todo - fix this *aresid = uio_resid(auio); else if (uio_resid(auio) && error == 0) @@ -726,7 +733,7 @@ vn_read(struct fileproc *fp, struct uio *uio, int flags, vfs_context_t ctx) uio->uio_offset = fp->f_fglob->fg_offset; count = uio_resid(uio); - if (vp->v_flag & VSWAP) { + if (vnode_isswap(vp)) { /* special case for swap files */ error = vn_read_swapfile(vp, uio); } else { @@ -775,9 +782,17 @@ vn_write(struct fileproc *fp, struct uio *uio, int flags, vfs_context_t ctx) ioflag |= IO_NDELAY; if ((fp->f_fglob->fg_flag & FNOCACHE) || vnode_isnocache(vp)) ioflag |= IO_NOCACHE; - if ((fp->f_fglob->fg_flag & O_FSYNC) || - (vp->v_mount && (vp->v_mount->mnt_flag & MNT_SYNCHRONOUS))) + /* + * Treat synchronous mounts and O_FSYNC on the fd as equivalent. + * + * XXX We treat O_DSYNC as O_FSYNC for now, since we can not delay + * XXX the non-essential metadata without some additional VFS work; + * XXX the intent at this point is to plumb the interface for it. + */ + if ((fp->f_fglob->fg_flag & (O_FSYNC|O_DSYNC)) || + (vp->v_mount && (vp->v_mount->mnt_flag & MNT_SYNCHRONOUS))) { ioflag |= IO_SYNC; + } if ((flags & FOF_OFFSET) == 0) { uio->uio_offset = fp->f_fglob->fg_offset; @@ -785,8 +800,8 @@ vn_write(struct fileproc *fp, struct uio *uio, int flags, vfs_context_t ctx) } if (((flags & FOF_OFFSET) == 0) && vfs_context_proc(ctx) && (vp->v_type == VREG) && - (((rlim_t)(uio->uio_offset + uio_uio_resid(uio)) > p->p_rlimit[RLIMIT_FSIZE].rlim_cur) || - ((rlim_t)uio_uio_resid(uio) > (p->p_rlimit[RLIMIT_FSIZE].rlim_cur - uio->uio_offset)))) { + (((rlim_t)(uio->uio_offset + uio_resid(uio)) > p->p_rlimit[RLIMIT_FSIZE].rlim_cur) || + ((rlim_t)uio_resid(uio) > (p->p_rlimit[RLIMIT_FSIZE].rlim_cur - uio->uio_offset)))) { /* * If the requested residual would cause us to go past the * administrative limit, then we need to adjust the residual @@ -794,10 +809,10 @@ vn_write(struct fileproc *fp, struct uio *uio, int flags, vfs_context_t ctx) * we can't do that (e.g. the residual is already 1 byte), * then we fail the write with EFBIG. */ - residcount = uio_uio_resid(uio); - if ((rlim_t)(uio->uio_offset + uio_uio_resid(uio)) > p->p_rlimit[RLIMIT_FSIZE].rlim_cur) { - clippedsize = (uio->uio_offset + uio_uio_resid(uio)) - p->p_rlimit[RLIMIT_FSIZE].rlim_cur; - } else if ((rlim_t)uio_uio_resid(uio) > (p->p_rlimit[RLIMIT_FSIZE].rlim_cur - uio->uio_offset)) { + residcount = uio_resid(uio); + if ((rlim_t)(uio->uio_offset + uio_resid(uio)) > p->p_rlimit[RLIMIT_FSIZE].rlim_cur) { + clippedsize = (uio->uio_offset + uio_resid(uio)) - p->p_rlimit[RLIMIT_FSIZE].rlim_cur; + } else if ((rlim_t)uio_resid(uio) > (p->p_rlimit[RLIMIT_FSIZE].rlim_cur - uio->uio_offset)) { clippedsize = (p->p_rlimit[RLIMIT_FSIZE].rlim_cur - uio->uio_offset); } if (clippedsize >= residcount) { @@ -818,10 +833,10 @@ vn_write(struct fileproc *fp, struct uio *uio, int flags, vfs_context_t ctx) return (EFBIG); } if (p && (vp->v_type == VREG) && - ((rlim_t)(uio->uio_offset + uio_uio_resid(uio)) > p->p_rlimit[RLIMIT_FSIZE].rlim_cur)) { + ((rlim_t)(uio->uio_offset + uio_resid(uio)) > p->p_rlimit[RLIMIT_FSIZE].rlim_cur)) { //Debugger("vn_bwrite:overstepping the bounds"); - residcount = uio_uio_resid(uio); - clippedsize = (uio->uio_offset + uio_uio_resid(uio)) - p->p_rlimit[RLIMIT_FSIZE].rlim_cur; + residcount = uio_resid(uio); + clippedsize = (uio->uio_offset + uio_resid(uio)) - p->p_rlimit[RLIMIT_FSIZE].rlim_cur; partialwrite = 1; uio_setresid(uio, residcount-clippedsize); } @@ -885,7 +900,7 @@ vn_stat_noauth(struct vnode *vp, void *sbptr, kauth_filesec_t *xsec, int isstat6 sb64 = (struct stat64 *)sbptr; else sb = (struct stat *)sbptr; - + memset(&va, 0, sizeof(va)); VATTR_INIT(&va); VATTR_WANTED(&va, va_fsid); VATTR_WANTED(&va, va_fileid); @@ -1185,16 +1200,6 @@ vn_select(struct fileproc *fp, int which, void *wql, __unused vfs_context_t ctx) } -/* - * Check that the vnode is still valid, and if so - * acquire requested lock. - */ -int -vn_lock(__unused vnode_t vp, __unused int flags, __unused proc_t p) -{ - return (0); -} - /* * File table vnode close routine. */ @@ -1227,7 +1232,7 @@ vn_closefile(struct fileglob *fg, vfs_context_t ctx) * VNOP_PATHCONF:??? */ int -vn_pathconf(vnode_t vp, int name, register_t *retval, vfs_context_t ctx) +vn_pathconf(vnode_t vp, int name, int32_t *retval, vfs_context_t ctx) { int error = 0; @@ -1279,48 +1284,217 @@ vn_pathconf(vnode_t vp, int name, register_t *retval, vfs_context_t ctx) static int vn_kqfilt_add(struct fileproc *fp, struct knote *kn, vfs_context_t ctx) { - struct vnode *vp = (struct vnode *)fp->f_fglob->fg_data; int error; - int funnel_state; + struct vnode *vp; - if ( (error = vnode_getwithref(vp)) == 0 ) { + vp = (struct vnode *)fp->f_fglob->fg_data; + + /* + * Don't attach a knote to a dead vnode. + */ + if ((error = vget_internal(vp, 0, VNODE_NODEAD)) == 0) { + switch (kn->kn_filter) { + case EVFILT_READ: + case EVFILT_WRITE: + if (vnode_isfifo(vp)) { + /* We'll only watch FIFOs that use our fifofs */ + if (!(vp->v_fifoinfo && vp->v_fifoinfo->fi_readsock)) { + error = ENOTSUP; + } + + } else if (!vnode_isreg(vp)) { + if (vnode_isspec(vp) && + (error = spec_kqfilter(vp, kn)) == 0) { + /* claimed by a special device */ + vnode_put(vp); + return 0; + } + + error = EINVAL; + } + break; + case EVFILT_VNODE: + break; + default: + error = EINVAL; + } + + if (error) { + vnode_put(vp); + return error; + } #if CONFIG_MACF error = mac_vnode_check_kqfilter(ctx, fp->f_fglob->fg_cred, kn, vp); if (error) { - (void)vnode_put(vp); - return (error); + vnode_put(vp); + return error; } #endif - funnel_state = thread_funnel_set(kernel_flock, TRUE); - error = VNOP_KQFILT_ADD(vp, kn, ctx); - thread_funnel_set(kernel_flock, funnel_state); + kn->kn_hook = (void*)vp; + kn->kn_hookid = vnode_vid(vp); + kn->kn_fop = &vnode_filtops; - (void)vnode_put(vp); + vnode_lock(vp); + KNOTE_ATTACH(&vp->v_knotes, kn); + vnode_unlock(vp); + + /* Ask the filesystem to provide remove notifications, but ignore failure */ + VNOP_MONITOR(vp, 0, VNODE_MONITOR_BEGIN, (void*) kn, ctx); + + vnode_put(vp); } + return (error); } -#if 0 -/* No one calls this yet. */ -static int -vn_kqfilt_remove(vp, ident, ctx) - struct vnode *vp; - uintptr_t ident; - vfs_context_t ctx; +static void +filt_vndetach(struct knote *kn) { - int error; - int funnel_state; + vfs_context_t ctx = vfs_context_current(); + struct vnode *vp; + vp = (struct vnode *)kn->kn_hook; + if (vnode_getwithvid(vp, kn->kn_hookid)) + return; + + vnode_lock(vp); + KNOTE_DETACH(&vp->v_knotes, kn); + vnode_unlock(vp); - if ( (error = vnode_getwithref(vp)) == 0 ) { + /* + * Tell a (generally networked) filesystem that we're no longer watching + * If the FS wants to track contexts, it should still be using the one from + * the VNODE_MONITOR_BEGIN. + */ + VNOP_MONITOR(vp, 0, VNODE_MONITOR_END, (void*)kn, ctx); + vnode_put(vp); +} - funnel_state = thread_funnel_set(kernel_flock, TRUE); - error = VNOP_KQFILT_REMOVE(vp, ident, ctx); - thread_funnel_set(kernel_flock, funnel_state); - (void)vnode_put(vp); +/* + * Used for EVFILT_READ + * + * Takes only VFIFO or VREG. vnode is locked. We handle the "poll" case + * differently than the regular case for VREG files. If not in poll(), + * then we need to know current fileproc offset for VREG. + */ +static intptr_t +vnode_readable_data_count(vnode_t vp, off_t current_offset, int ispoll) +{ + if (vnode_isfifo(vp)) { + int cnt; + int err = fifo_charcount(vp, &cnt); + if (err == 0) { + return (intptr_t)cnt; + } else { + return (intptr_t)0; + } + } else if (vnode_isreg(vp)) { + if (ispoll) { + return (intptr_t)1; + } + + off_t amount; + amount = vp->v_un.vu_ubcinfo->ui_size - current_offset; + if (amount > (off_t)INTPTR_MAX) { + return INTPTR_MAX; + } else if (amount < (off_t)INTPTR_MIN) { + return INTPTR_MIN; + } else { + return (intptr_t)amount; + } + } else { + panic("Should never have an EVFILT_READ except for reg or fifo."); + return 0; } - return (error); } -#endif + +/* + * Used for EVFILT_WRITE. + * + * For regular vnodes, we can always write (1). For named pipes, + * see how much space there is in the buffer. Nothing else is covered. + */ +static intptr_t +vnode_writable_space_count(vnode_t vp) +{ + if (vnode_isfifo(vp)) { + long spc; + int err = fifo_freespace(vp, &spc); + if (err == 0) { + return (intptr_t)spc; + } else { + return (intptr_t)0; + } + } else if (vnode_isreg(vp)) { + return (intptr_t)1; + } else { + panic("Should never have an EVFILT_READ except for reg or fifo."); + return 0; + } +} + +/* + * Determine whether this knote should be active + * + * This is kind of subtle. + * --First, notice if the vnode has been revoked: in so, override hint + * --EVFILT_READ knotes are checked no matter what the hint is + * --Other knotes activate based on hint. + * --If hint is revoke, set special flags and activate + */ +static int +filt_vnode(struct knote *kn, long hint) +{ + struct vnode *vp = (struct vnode *)kn->kn_hook; + int activate = 0; + + if (0 == hint) { + if ((vnode_getwithvid(vp, kn->kn_hookid) != 0)) { + hint = NOTE_REVOKE; + } else { + vnode_put(vp); + } + } + + /* NOTE_REVOKE is special, as it is only sent during vnode reclaim */ + if (NOTE_REVOKE == hint) { + kn->kn_flags |= (EV_EOF | EV_ONESHOT); + activate = 1; + + if ((kn->kn_filter == EVFILT_VNODE) && (kn->kn_sfflags & NOTE_REVOKE)) { + kn->kn_fflags |= NOTE_REVOKE; + } + } else { + switch(kn->kn_filter) { + case EVFILT_READ: + kn->kn_data = vnode_readable_data_count(vp, kn->kn_fp->f_fglob->fg_offset, (kn->kn_flags & EV_POLL)); + + if (kn->kn_data != 0) { + activate = 1; + } + break; + case EVFILT_WRITE: + kn->kn_data = vnode_writable_space_count(vp); + + if (kn->kn_data != 0) { + activate = 1; + } + break; + case EVFILT_VNODE: + /* Check events this note matches against the hint */ + if (kn->kn_sfflags & hint) { + kn->kn_fflags |= hint; /* Set which event occurred */ + } + if (kn->kn_fflags != 0) { + activate = 1; + } + break; + default: + panic("Invalid knote filter on a vnode!\n"); + } + } + + return (activate); +} diff --git a/bsd/vfs/vfs_xattr.c b/bsd/vfs/vfs_xattr.c index 43f8991d8..a1288ffc6 100644 --- a/bsd/vfs/vfs_xattr.c +++ b/bsd/vfs/vfs_xattr.c @@ -60,10 +60,10 @@ #if NAMEDSTREAMS /* - * Cast to 'unsigned int' loses precision - hope that's OK... + * We use %p to prevent loss of precision for pointers on varying architectures. */ #define MAKE_SHADOW_NAME(VP, NAME) \ - snprintf((NAME), sizeof((NAME)), ".vfs_rsrc_stream_%x%08x%x", (unsigned int)(VP), (VP)->v_id, (unsigned int)(VP)->v_data); + snprintf((NAME), sizeof((NAME)), ".vfs_rsrc_stream_%p%08x%p", (void*)(VP), (VP)->v_id, (VP)->v_data); static vnode_t shadow_dvp; /* tmp directory to hold stream shadow files */ static int shadow_vid; @@ -397,20 +397,27 @@ vnode_getnamedstream(vnode_t vp, vnode_t *svpp, const char *name, enum nsoperati uint32_t streamflags = VISNAMEDSTREAM; vnode_t svp = *svpp; - if ((vp->v_mount->mnt_kern_flag & MNTK_NAMED_STREAMS) == 0) { + if ((vp->v_mount->mnt_kern_flag & MNTK_NAMED_STREAMS) == 0) { streamflags |= VISSHADOW; - } - + } + /* Tag the vnode. */ vnode_lock_spin(svp); svp->v_flag |= streamflags; vnode_unlock(svp); - /* Make the file its parent. - * Note: This parent link helps us distinguish vnodes for - * shadow stream files from vnodes for resource fork on file - * systems that support named streams natively (both have - * VISNAMEDSTREAM set) by allowing access to mount structure - * for checking MNTK_NAMED_STREAMS bit at many places in the code + + /* Tag the parent so we know to flush credentials for streams on setattr */ + vnode_lock_spin(vp); + vp->v_lflag |= VL_HASSTREAMS; + vnode_unlock(vp); + + /* Make the file it's parent. + * Note: This parent link helps us distinguish vnodes for + * shadow stream files from vnodes for resource fork on file + * systems that support namedstream natively (both have + * VISNAMEDSTREAM set) by allowing access to mount structure + * for checking MNTK_NAMED_STREAMS bit at many places in the + * code. */ vnode_update_identity(svp, vp, NULL, 0, 0, VNODE_UPDATE_PARENT); } @@ -439,18 +446,24 @@ vnode_makenamedstream(vnode_t vp, vnode_t *svpp, const char *name, int flags, vf if ((vp->v_mount->mnt_kern_flag & MNTK_NAMED_STREAMS) == 0) { streamflags |= VISSHADOW; } - + /* Tag the vnode. */ vnode_lock_spin(svp); svp->v_flag |= streamflags; vnode_unlock(svp); - /* Make the file its parent. - * Note: This parent link helps us distinguish vnodes for - * shadow stream files from vnodes for resource fork on file - * systems that support named streams natively (both have - * VISNAMEDSTREAM set) by allowing access to mount structure - * for checking MNTK_NAMED_STREAMS bit at many places in the code + /* Tag the parent so we know to flush credentials for streams on setattr */ + vnode_lock_spin(vp); + vp->v_lflag |= VL_HASSTREAMS; + vnode_unlock(vp); + + /* Make the file it's parent. + * Note: This parent link helps us distinguish vnodes for + * shadow stream files from vnodes for resource fork on file + * systems that support namedstream natively (both have + * VISNAMEDSTREAM set) by allowing access to mount structure + * for checking MNTK_NAMED_STREAMS bit at many places in the + * code. */ vnode_update_identity(svp, vp, NULL, 0, 0, VNODE_UPDATE_PARENT); } @@ -492,7 +505,7 @@ vnode_relenamedstream(vnode_t vp, vnode_t svp, vfs_context_t context) { vnode_t dvp; struct componentname cn; - char tmpname[48]; + char tmpname[80]; errno_t err; cache_purge(svp); @@ -514,7 +527,7 @@ vnode_relenamedstream(vnode_t vp, vnode_t svp, vfs_context_t context) if (err != 0) { return err; } - + (void) VNOP_REMOVE(dvp, svp, &cn, 0, context); vnode_put(dvp); @@ -552,7 +565,7 @@ vnode_flushnamedstream(vnode_t vp, vnode_t svp, vfs_context_t context) if (kmem_alloc(kernel_map, (vm_offset_t *)&bufptr, bufsize)) { return (ENOMEM); } - auio = uio_create(1, 0, UIO_SYSSPACE32, UIO_READ); + auio = uio_create(1, 0, UIO_SYSSPACE, UIO_READ); offset = 0; /* @@ -566,7 +579,7 @@ vnode_flushnamedstream(vnode_t vp, vnode_t svp, vfs_context_t context) while (offset < datasize) { iosize = MIN(datasize - offset, iosize); - uio_reset(auio, offset, UIO_SYSSPACE32, UIO_READ); + uio_reset(auio, offset, UIO_SYSSPACE, UIO_READ); uio_addiov(auio, (uintptr_t)bufptr, iosize); error = VNOP_READ(svp, auio, 0, context); if (error) { @@ -579,7 +592,7 @@ vnode_flushnamedstream(vnode_t vp, vnode_t svp, vfs_context_t context) break; } } - uio_reset(auio, offset, UIO_SYSSPACE32, UIO_WRITE); + uio_reset(auio, offset, UIO_SYSSPACE, UIO_WRITE); uio_addiov(auio, (uintptr_t)bufptr, iosize); error = vn_setxattr(vp, XATTR_RESOURCEFORK_NAME, auio, XATTR_NOSECURITY, context); if (error) { @@ -607,7 +620,7 @@ getshadowfile(vnode_t vp, vnode_t *svpp, int makestream, size_t *rsrcsize, vnode_t svp = NULLVP; struct componentname cn; struct vnode_attr va; - char tmpname[48]; + char tmpname[80]; size_t datasize = 0; int error = 0; @@ -743,7 +756,14 @@ default_getnamedstream(vnode_t vp, vnode_t *svpp, const char *name, enum nsopera /* * The creator of the shadow file provides its file data, - * all other threads should wait until its ready. + * all other threads should wait until its ready. In order to + * prevent a deadlock during error codepaths, we need to check if the + * vnode is being created, or if it has failed out. Regardless of success or + * failure, we set the VISSHADOW bit on the vnode, so we check that + * if the vnode's flags don't have VISNAMEDSTREAM set. If it doesn't, + * then we can infer the creator isn't done yet. If it's there, but + * VISNAMEDSTREAM is not set, then we can infer it errored out and we should + * try again. */ if (!creator) { vnode_lock(svp); @@ -752,9 +772,19 @@ default_getnamedstream(vnode_t vp, vnode_t *svpp, const char *name, enum nsopera vnode_unlock(svp); goto out; } else { - /* its not ready, wait for it (sleep using v_parent as channel) */ - msleep((caddr_t)&svp->v_parent, &svp->v_lock, PINOD | PDROP, - "getnamedstream", NULL); + /* It's not ready, wait for it (sleep using v_parent as channel) */ + if ((svp->v_flag & VISSHADOW)) { + /* + * No VISNAMEDSTREAM, but we did see VISSHADOW, indicating that the other + * thread is done with this vnode. Just unlock the vnode and try again + */ + vnode_unlock(svp); + } + else { + /* Otherwise, sleep if the shadow file is not created yet */ + msleep((caddr_t)&svp->v_parent, &svp->v_lock, PINOD | PDROP, + "getnamedstream", NULL); + } vnode_put(svp); svp = NULLVP; goto retry; @@ -774,7 +804,7 @@ default_getnamedstream(vnode_t vp, vnode_t *svpp, const char *name, enum nsopera goto out; } - auio = uio_create(1, 0, UIO_SYSSPACE32, UIO_READ); + auio = uio_create(1, 0, UIO_SYSSPACE, UIO_READ); offset = 0; error = VNOP_OPEN(svp, 0, context); @@ -786,7 +816,7 @@ default_getnamedstream(vnode_t vp, vnode_t *svpp, const char *name, enum nsopera iosize = MIN(datasize - offset, iosize); - uio_reset(auio, offset, UIO_SYSSPACE32, UIO_READ); + uio_reset(auio, offset, UIO_SYSSPACE, UIO_READ); uio_addiov(auio, (uintptr_t)bufptr, iosize); error = vn_getxattr(vp, XATTR_RESOURCEFORK_NAME, auio, &tmpsize, XATTR_NOSECURITY, context); @@ -794,7 +824,7 @@ default_getnamedstream(vnode_t vp, vnode_t *svpp, const char *name, enum nsopera break; } - uio_reset(auio, offset, UIO_SYSSPACE32, UIO_WRITE); + uio_reset(auio, offset, UIO_SYSSPACE, UIO_WRITE); uio_addiov(auio, (uintptr_t)bufptr, iosize); error = VNOP_WRITE(svp, auio, 0, context); if (error) { @@ -809,17 +839,23 @@ default_getnamedstream(vnode_t vp, vnode_t *svpp, const char *name, enum nsopera if (creator) { if (error == 0) { vnode_lock(svp); - svp->v_flag |= VISNAMEDSTREAM; + /* VISSHADOW would be set later on anyway, so we set it now */ + svp->v_flag |= (VISNAMEDSTREAM | VISSHADOW); wakeup((caddr_t)&svp->v_parent); vnode_unlock(svp); } else { - /* On post create errors, get rid of the shadow file. This - * way, if there is another process waiting for initialization - * of the shadow file by the current process, it will wake up - * and retry by creating and initializing the shadow file again. + /* On post create errors, get rid of the shadow file. This + * way if there is another process waiting for initialization + * of the shadowfile by the current process will wake up and + * retry by creating and initializing the shadow file again. + * Also add the VISSHADOW bit here to indicate we're done operating + * on this vnode. */ - (void) vnode_relenamedstream(vp, svp, context); + (void)vnode_relenamedstream(vp, svp, context); + vnode_lock (svp); + svp->v_flag |= VISSHADOW; wakeup((caddr_t)&svp->v_parent); + vnode_unlock(svp); } } @@ -862,11 +898,14 @@ default_makenamedstream(vnode_t vp, vnode_t *svpp, const char *name, vfs_context vnode_t svp = *svpp; vnode_lock(svp); - svp->v_flag |= VISNAMEDSTREAM; + /* If we're the creator, mark it as a named stream */ + svp->v_flag |= (VISNAMEDSTREAM | VISSHADOW); /* Wakeup any waiters on the v_parent channel */ wakeup((caddr_t)&svp->v_parent); vnode_unlock(svp); + } + return (error); } @@ -892,7 +931,7 @@ get_shadow_dir(vnode_t *sdvpp, vfs_context_t context) vnode_t sdvp = NULLVP; struct componentname cn; struct vnode_attr va; - char tmpname[48]; + char tmpname[80]; uint32_t tmp_fsid; int error; @@ -913,8 +952,8 @@ get_shadow_dir(vnode_t *sdvpp, vfs_context_t context) } /* Create the shadow stream directory. */ - snprintf(tmpname, sizeof(tmpname), ".vfs_rsrc_streams_%x%x", - (unsigned int)rootvnode, shadow_sequence); + snprintf(tmpname, sizeof(tmpname), ".vfs_rsrc_streams_%p%x", + (void*)rootvnode, shadow_sequence); bzero(&cn, sizeof(cn)); cn.cn_nameiop = LOOKUP; cn.cn_flags = ISLASTCN; @@ -1136,15 +1175,13 @@ get_shadow_dir(vnode_t *sdvpp, vfs_context_t context) */ -#pragma options align=mac68k - #define FINDERINFOSIZE 32 typedef struct apple_double_entry { u_int32_t type; /* entry type: see list, 0 invalid */ u_int32_t offset; /* entry data offset from the beginning of the file. */ u_int32_t length; /* entry data length in bytes. */ -} apple_double_entry_t; +} __attribute__((aligned(2), packed)) apple_double_entry_t; typedef struct apple_double_header { @@ -1155,7 +1192,7 @@ typedef struct apple_double_header { apple_double_entry_t entries[2]; /* 'finfo' & 'rsrc' always exist */ u_int8_t finfo[FINDERINFOSIZE]; /* Must start with Finder Info (32 bytes) */ u_int8_t pad[2]; /* get better alignment inside attr_header */ -} apple_double_header_t; +} __attribute__((aligned(2), packed)) apple_double_header_t; #define ADHDRSIZE (4+4+16+2) @@ -1166,7 +1203,7 @@ typedef struct attr_entry { u_int16_t flags; u_int8_t namelen; u_int8_t name[1]; /* NULL-terminated UTF-8 name (up to 128 bytes max) */ -} attr_entry_t; +} __attribute__((aligned(2), packed)) attr_entry_t; /* Header + entries must fit into 64K. Data may extend beyond 64K. */ @@ -1180,7 +1217,7 @@ typedef struct attr_header { u_int32_t reserved[3]; u_int16_t flags; u_int16_t num_attrs; -} attr_header_t; +} __attribute__((aligned(2), packed)) attr_header_t; /* Empty Resource Fork Header */ @@ -1202,14 +1239,12 @@ typedef struct rsrcfork_header { u_int16_t mh_Types; u_int16_t mh_Names; u_int16_t typeCount; -} rsrcfork_header_t; +} __attribute__((aligned(2), packed)) rsrcfork_header_t; #define RF_FIRST_RESOURCE 256 #define RF_NULL_MAP_LENGTH 30 #define RF_EMPTY_TAG "This resource fork intentionally left blank " -#pragma options align=reset - /* Runtime information about the attribute file. */ typedef struct attr_info { vfs_context_t context; @@ -1864,6 +1899,9 @@ default_setxattr(vnode_t vp, const char *name, uio_t uio, int options, vfs_conte (void) vnode_setattr(vp, &va, context); } } + + post_event_if_success(vp, error, NOTE_ATTRIB); + return (error); } @@ -2083,6 +2121,9 @@ default_removexattr(vnode_t vp, const char *name, __unused int options, vfs_cont (void) vnode_setattr(vp, &va, context); } } + + post_event_if_success(vp, error, NOTE_ATTRIB); + return (error); } @@ -2112,6 +2153,8 @@ default_listxattr(vnode_t vp, uio_t uio, size_t *size, __unused int options, vfs return (error); } if ((error = get_xattrinfo(xvp, 0, &ainfo, context))) { + if (error == ENOATTR) + error = 0; close_xattrfile(xvp, FREAD, context); return (error); } @@ -2184,6 +2227,70 @@ default_listxattr(vnode_t vp, uio_t uio, size_t *size, __unused int options, vfs return (error); } +/* + * Check the header of a ._ file to verify that it is in fact an Apple Double + * file. Returns 0 if the header is valid, non-zero if invalid. + */ +int check_appledouble_header(vnode_t vp, vfs_context_t ctx) +{ + int error = 0; + attr_info_t ainfo; + struct vnode_attr va; + uio_t auio = NULL; + void *buffer = NULL; + int iosize; + + ainfo.filevp = vp; + ainfo.context = ctx; + VATTR_INIT(&va); + VATTR_WANTED(&va, va_data_size); + if ((error = vnode_getattr(vp, &va, ctx))) { + goto out; + } + ainfo.filesize = va.va_data_size; + + iosize = MIN(ATTR_MAX_HDR_SIZE, ainfo.filesize); + if (iosize == 0) { + error = ENOATTR; + goto out; + } + ainfo.iosize = iosize; + + MALLOC(buffer, void *, iosize, M_TEMP, M_WAITOK); + if (buffer == NULL) { + error = ENOMEM; + goto out; + } + + auio = uio_create(1, 0, UIO_SYSSPACE, UIO_READ); + uio_addiov(auio, (uintptr_t)buffer, iosize); + + /* Read the header */ + error = VNOP_READ(vp, auio, 0, ctx); + if (error) { + goto out; + } + ainfo.rawsize = iosize - uio_resid(auio); + ainfo.rawdata = (u_int8_t *)buffer; + + error = check_and_swap_apple_double_header(&ainfo); + if (error) { + goto out; + } + + /* If we made it here, then the header is ok */ + +out: + if (auio) { + uio_free(auio); + } + if (buffer) { + FREE(buffer, M_TEMP); + } + + return error; +} + static int open_xattrfile(vnode_t vp, int fileflags, vnode_t *xvpp, vfs_context_t context) { @@ -2289,8 +2396,10 @@ open_xattrfile(vnode_t vp, int fileflags, vnode_t *xvpp, vfs_context_t context) error = vn_create(dvp, &nd.ni_vp, &nd.ni_cnd, &va, VN_CREATE_NOAUTH | VN_CREATE_NOINHERIT | VN_CREATE_NOLABEL, context); - if (error == 0) - xvp = nd.ni_vp; + if (error) + error = ENOATTR; + else + xvp = nd.ni_vp; } nameidone(&nd); if (dvp != vp) { @@ -2299,10 +2408,10 @@ open_xattrfile(vnode_t vp, int fileflags, vnode_t *xvpp, vfs_context_t context) if (error) goto out; } else { - if ((error = namei(&nd))) { - nd.ni_dvp = NULLVP; + if ((error = namei(&nd))) { + nd.ni_dvp = NULLVP; error = ENOATTR; - goto out; + goto out; } xvp = nd.ni_vp; nameidone(&nd); @@ -2368,6 +2477,8 @@ open_xattrfile(vnode_t vp, int fileflags, vnode_t *xvpp, vfs_context_t context) locktype = (fileflags & O_EXLOCK) ? F_WRLCK : F_RDLCK; error = lock_xattrfile(xvp, locktype, context); + if (error) + error = ENOATTR; } out: if (dvp && (dvp != vp)) { @@ -2508,7 +2619,7 @@ get_xattrinfo(vnode_t xvp, int setting, attr_info_t *ainfop, vfs_context_t conte goto bail; } - auio = uio_create(1, 0, UIO_SYSSPACE32, UIO_READ); + auio = uio_create(1, 0, UIO_SYSSPACE, UIO_READ); uio_addiov(auio, (uintptr_t)buffer, iosize); /* Read the file header. */ @@ -2575,7 +2686,7 @@ get_xattrinfo(vnode_t xvp, int setting, attr_info_t *ainfop, vfs_context_t conte /* Read the system data which starts at byte 16 */ - rf_uio = uio_create(1, 0, UIO_SYSSPACE32, UIO_READ); + rf_uio = uio_create(1, 0, UIO_SYSSPACE, UIO_READ); uio_addiov(rf_uio, (uintptr_t)systemData, sizeof(systemData)); uio_setoffset(rf_uio, filehdr->entries[i].offset + 16); rf_err = VNOP_READ(xvp, rf_uio, 0, context); @@ -2663,7 +2774,7 @@ get_xattrinfo(vnode_t xvp, int setting, attr_info_t *ainfop, vfs_context_t conte attrhdr->num_attrs = 0; /* Push out new header */ - uio_reset(auio, 0, UIO_SYSSPACE32, UIO_WRITE); + uio_reset(auio, 0, UIO_SYSSPACE, UIO_WRITE); uio_addiov(auio, (uintptr_t)filehdr, writesize); swap_adhdr(filehdr); /* to big endian */ @@ -2723,7 +2834,7 @@ create_xattrfile(vnode_t xvp, u_int32_t fileid, vfs_context_t context) bzero(buffer, ATTR_BUF_SIZE); xah = (attr_header_t *)buffer; - auio = uio_create(1, 0, UIO_SYSSPACE32, UIO_WRITE); + auio = uio_create(1, 0, UIO_SYSSPACE, UIO_WRITE); uio_addiov(auio, (uintptr_t)buffer, ATTR_BUF_SIZE); rsrcforksize = sizeof(rsrcfork_header_t); rsrcforkhdr = (rsrcfork_header_t *) ((char *)buffer + ATTR_BUF_SIZE - rsrcforksize); @@ -2787,7 +2898,7 @@ write_xattrinfo(attr_info_t *ainfop) uio_t auio; int error; - auio = uio_create(1, 0, UIO_SYSSPACE32, UIO_WRITE); + auio = uio_create(1, 0, UIO_SYSSPACE, UIO_WRITE); uio_addiov(auio, (uintptr_t)ainfop->filehdr, ainfop->iosize); swap_adhdr(ainfop->filehdr); @@ -2960,7 +3071,7 @@ shift_data_down(vnode_t xvp, off_t start, size_t len, off_t delta, vfs_context_t size_t chunk, orig_chunk; char *buff; off_t pos; - ucred_t ucred = vfs_context_ucred(context); + kauth_cred_t ucred = vfs_context_ucred(context); proc_t p = vfs_context_proc(context); if (delta == 0 || len == 0) { @@ -2992,7 +3103,7 @@ shift_data_down(vnode_t xvp, off_t start, size_t len, off_t delta, vfs_context_t break; } - if ((pos - chunk) < start) { + if ((pos - (off_t)chunk) < start) { chunk = pos - start; if (chunk == 0) { // we're all done @@ -3014,7 +3125,7 @@ shift_data_up(vnode_t xvp, off_t start, size_t len, off_t delta, vfs_context_t c char *buff; off_t pos; off_t end; - ucred_t ucred = vfs_context_ucred(context); + kauth_cred_t ucred = vfs_context_ucred(context); proc_t p = vfs_context_proc(context); if (delta == 0 || len == 0) { @@ -3047,7 +3158,7 @@ shift_data_up(vnode_t xvp, off_t start, size_t len, off_t delta, vfs_context_t c break; } - if ((pos + chunk) > end) { + if ((pos + (off_t)chunk) > end) { chunk = end - pos; if (chunk == 0) { // we're all done diff --git a/bsd/vfs/vnode_if.c b/bsd/vfs/vnode_if.c index df30827ae..1a77414e2 100644 --- a/bsd/vfs/vnode_if.c +++ b/bsd/vfs/vnode_if.c @@ -362,6 +362,23 @@ struct vnodeop_desc vnop_kqfilt_remove_desc = { NULL }; +int vnop_monitor_vp_offsets[] = { + VOPARG_OFFSETOF(struct vnop_monitor_args,a_vp), + VDESC_NO_OFFSET +}; +struct vnodeop_desc vnop_monitor_desc = { + 0, + "vnop_monitor", + 0, + vnop_monitor_vp_offsets, + VDESC_NO_OFFSET, + VDESC_NO_OFFSET, + VDESC_NO_OFFSET, + VDESC_NO_OFFSET, + VOPARG_OFFSETOF(struct vnop_monitor_args, a_context), + NULL +}; + int vnop_setlabel_vp_offsets[] = { VOPARG_OFFSETOF(struct vnop_setlabel_args,a_vp), VDESC_NO_OFFSET @@ -1028,6 +1045,7 @@ struct vnodeop_desc *vfs_op_descs[] = { &vnop_blktooff_desc, &vnop_offtoblk_desc, &vnop_blockmap_desc, + &vnop_monitor_desc, #if NAMEDSTREAMS &vnop_getnamedstream_desc, &vnop_makenamedstream_desc, diff --git a/bsd/vm/dp_backing_file.c b/bsd/vm/dp_backing_file.c index 6b8467198..0f72ed2f1 100644 --- a/bsd/vm/dp_backing_file.c +++ b/bsd/vm/dp_backing_file.c @@ -47,7 +47,7 @@ #include #include -#include +#include #include #include @@ -154,6 +154,57 @@ macx_backing_store_suspend( return(error); } +extern boolean_t backing_store_stop_compaction; + +/* + * Routine: macx_backing_store_compaction + * Function: + * Turn compaction of swap space on or off. This is + * used during shutdown/restart so that the kernel + * doesn't waste time compacting swap files that are + * about to be deleted anyway. Compaction is always + * on by default when the system comes up and is turned + * off when a shutdown/restart is requested. It is + * re-enabled if the shutdown/restart is aborted for any reason. + */ + +int +macx_backing_store_compaction(int flags) +{ + int error; + + if ((error = suser(kauth_cred_get(), 0))) + return error; + + if (flags & SWAP_COMPACT_DISABLE) { + backing_store_stop_compaction = TRUE; + + } else if (flags & SWAP_COMPACT_ENABLE) { + backing_store_stop_compaction = FALSE; + } + + return 0; +} + +/* + * Routine: macx_triggers + * Function: + * Syscall interface to set the call backs for low and + * high water marks. + */ +int +macx_triggers( + struct macx_triggers_args *args) +{ + int error; + + error = suser(kauth_cred_get(), 0); + if (error) + return error; + + return mach_macx_triggers(args); +} + /* * Routine: macx_swapon * Function: @@ -177,7 +228,7 @@ macx_swapon( struct proc *p = current_proc(); AUDIT_MACH_SYSCALL_ENTER(AUE_SWAPON); - AUDIT_ARG(value, args->priority); + AUDIT_ARG(value32, args->priority); funnel_state = thread_funnel_set(kernel_flock, TRUE); ndp = &nd; @@ -195,7 +246,7 @@ macx_swapon( */ NDINIT(ndp, LOOKUP, FOLLOW | LOCKLEAF | AUDITVNPATH1, ((IS_64BIT_PROCESS(p)) ? UIO_USERSPACE64 : UIO_USERSPACE32), - CAST_USER_ADDR_T(args->filename), ctx); + (user_addr_t) args->filename, ctx); if ((error = namei(ndp))) goto swapon_bailout; @@ -261,6 +312,11 @@ macx_swapon( goto swapon_bailout; } + /* Mark this vnode as being used for swapfile */ + vnode_lock_spin(vp); + SET(vp->v_flag, VSWAP); + vnode_unlock(vp); + /* * NOTE: we are able to supply PAGE_SIZE here instead of * an actual record size or block number because: @@ -277,14 +333,17 @@ macx_swapon( error = EINVAL; else error = ENOMEM; + + /* This vnode is not to be used for swapfile */ + vnode_lock_spin(vp); + CLR(vp->v_flag, VSWAP); + vnode_unlock(vp); + goto swapon_bailout; } bs_port_table[i].bs = (void *)backing_store; error = 0; - /* Mark this vnode as being used for swapfile */ - SET(vp->v_flag, VSWAP); - ubc_setthreadcred(vp, p, current_thread()); /* @@ -337,7 +396,7 @@ macx_swapoff( */ NDINIT(ndp, LOOKUP, FOLLOW | LOCKLEAF | AUDITVNPATH1, ((IS_64BIT_PROCESS(p)) ? UIO_USERSPACE64 : UIO_USERSPACE32), - CAST_USER_ADDR_T(args->filename), ctx); + (user_addr_t) args->filename, ctx); if ((error = namei(ndp))) goto swapoff_bailout; @@ -373,7 +432,9 @@ macx_swapoff( error = 0; bs_port_table[i].vp = 0; /* This vnode is no longer used for swapfile */ + vnode_lock_spin(vp); CLR(vp->v_flag, VSWAP); + vnode_unlock(vp); /* get rid of macx_swapon() "long term" reference */ vnode_rele(vp); diff --git a/bsd/vm/vm_pager.h b/bsd/vm/vm_pager.h index 2a146275d..1e189858b 100644 --- a/bsd/vm/vm_pager.h +++ b/bsd/vm/vm_pager.h @@ -56,6 +56,7 @@ typedef struct pager_struct *vm_pager_t; #define PAGER_ABSENT 1 /* pager does not have page */ #define PAGER_ERROR 2 /* pager unable to read or write page */ +#if 0 #ifdef KERNEL typedef int pager_return_t; @@ -65,5 +66,6 @@ extern pager_return_t vm_pager_get(void); extern pager_return_t vm_pager_put(void); extern boolean_t vm_pager_has_page(void); #endif /* KERNEL */ +#endif #endif /* _VM_PAGER_ */ diff --git a/bsd/vm/vm_unix.c b/bsd/vm/vm_unix.c index 559f83290..8bb213b61 100644 --- a/bsd/vm/vm_unix.c +++ b/bsd/vm/vm_unix.c @@ -75,12 +75,13 @@ #include #include -#include +#include #include #include #include #include +#include #include @@ -100,12 +101,6 @@ SYSCTL_INT(_vm, OID_AUTO, allow_stack_exec, CTLFLAG_RW, &allow_stack_exec, 0, "" SYSCTL_INT(_vm, OID_AUTO, allow_data_exec, CTLFLAG_RW, &allow_data_exec, 0, ""); #endif /* !SECURE_KERNEL */ -#if CONFIG_NO_PRINTF_STRINGS -void -log_stack_execution_failure(__unused addr64_t a, __unused vm_prot_t b) -{ -} -#else static const char *prot_values[] = { "none", "read-only", @@ -123,8 +118,44 @@ log_stack_execution_failure(addr64_t vaddr, vm_prot_t prot) printf("Data/Stack execution not permitted: %s[pid %d] at virtual address 0x%qx, protections were %s\n", current_proc()->p_comm, current_proc()->p_pid, vaddr, prot_values[prot & VM_PROT_ALL]); } -#endif +int shared_region_unnest_logging = 1; + +SYSCTL_INT(_vm, OID_AUTO, shared_region_unnest_logging, CTLFLAG_RW, + &shared_region_unnest_logging, 0, ""); + +int vm_shared_region_unnest_log_interval = 10; +int shared_region_unnest_log_count_threshold = 5; + +/* These log rate throttling state variables aren't thread safe, but + * are sufficient unto the task. + */ +static int64_t last_unnest_log_time = 0; +static int shared_region_unnest_log_count = 0; + +void log_unnest_badness(vm_map_t m, vm_map_offset_t s, vm_map_offset_t e) { + struct timeval tv; + const char *pcommstr; + + if (shared_region_unnest_logging == 0) + return; + + if (shared_region_unnest_logging == 1) { + microtime(&tv); + if ((tv.tv_sec - last_unnest_log_time) < vm_shared_region_unnest_log_interval) { + if (shared_region_unnest_log_count++ > shared_region_unnest_log_count_threshold) + return; + } + else { + last_unnest_log_time = tv.tv_sec; + shared_region_unnest_log_count = 0; + } + } + + pcommstr = current_proc()->p_comm; + + printf("%s (map: %p) triggered DYLD shared region unnest for map: %p, region 0x%qx->0x%qx. While not abnormal for debuggers, this increases system memory footprint until the target exits.\n", current_proc()->p_comm, get_task_map(current_proc()->task), m, (uint64_t)s, (uint64_t)e); +} int useracc( @@ -255,7 +286,7 @@ suword( long fuword(user_addr_t addr) { - long word; + long word = 0; if (copyin(addr, (void *) &word, sizeof(int))) return(-1); @@ -274,7 +305,7 @@ suiword( long fuiword(user_addr_t addr) { - long word; + long word = 0; if (copyin(addr, (void *) &word, sizeof(int))) return(-1); @@ -317,7 +348,7 @@ suulong(user_addr_t addr, uint64_t uword) if (IS_64BIT_PROCESS(current_proc())) { return(copyout((void *)&uword, addr, sizeof(uword)) == 0 ? 0 : -1); } else { - return(suiword(addr, (u_long)uword)); + return(suiword(addr, (uint32_t)uword)); } } @@ -341,7 +372,23 @@ swapon(__unused proc_t procp, __unused struct swapon_args *uap, __unused int *re return(ENOTSUP); } - +/* + * pid_for_task + * + * Find the BSD process ID for the Mach task associated with the given Mach port + * name + * + * Parameters: args User argument descriptor (see below) + * + * Indirect parameters: args->t Mach port name + * args->pid Process ID (returned value; see below) + * + * Returns: KERL_SUCCESS Success + * KERN_FAILURE Not success + * + * Implicit returns: args->pid Process ID + * + */ kern_return_t pid_for_task( struct pid_for_task_args *args) @@ -472,6 +519,8 @@ task_for_pid_posix_check(proc_t target) * Only permitted to privileged processes, or processes * with the same user ID. * + * Note: if pid == 0, an error is return no matter who is calling. + * * XXX This should be a BSD system call, not a Mach trap!!! */ kern_return_t @@ -481,7 +530,6 @@ task_for_pid( mach_port_name_t target_tport = args->target_tport; int pid = args->pid; user_addr_t task_addr = args->t; - struct uthread *uthread; proc_t p = PROC_NULL; task_t t1 = TASK_NULL; mach_port_name_t tret = MACH_PORT_NULL; @@ -493,13 +541,12 @@ task_for_pid( AUDIT_ARG(pid, pid); AUDIT_ARG(mach_port1, target_tport); -#if defined(SECURE_KERNEL) - if (0 == pid) { + /* Always check if pid == 0 */ + if (pid == 0) { (void ) copyout((char *)&t1, task_addr, sizeof(mach_port_name_t)); AUDIT_MACH_SYSCALL_EXIT(KERN_FAILURE); return(KERN_FAILURE); } -#endif t1 = port_name_to_task(target_tport); if (t1 == TASK_NULL) { @@ -509,15 +556,11 @@ task_for_pid( } - /* - * Delayed binding of thread credential to process credential, if we - * are not running with an explicitly set thread credential. - */ - uthread = get_bsdthread_info(current_thread()); - kauth_cred_uthread_update(uthread, current_proc()); - p = proc_find(pid); - AUDIT_ARG(process, p); +#if CONFIG_AUDIT + if (p != PROC_NULL) + AUDIT_ARG(process, p); +#endif if (!(task_for_pid_posix_check(p))) { error = KERN_FAILURE; @@ -593,7 +636,6 @@ task_name_for_pid( mach_port_name_t target_tport = args->target_tport; int pid = args->pid; user_addr_t task_addr = args->t; - struct uthread *uthread; proc_t p = PROC_NULL; task_t t1; mach_port_name_t tret; @@ -612,17 +654,9 @@ task_name_for_pid( return(KERN_FAILURE); } - - /* - * Delayed binding of thread credential to process credential, if we - * are not running with an explicitly set thread credential. - */ - uthread = get_bsdthread_info(current_thread()); - kauth_cred_uthread_update(uthread, current_proc()); - p = proc_find(pid); - AUDIT_ARG(process, p); if (p != PROC_NULL) { + AUDIT_ARG(process, p); target_cred = kauth_cred_proc_ref(p); refheld = 1; @@ -1048,7 +1082,7 @@ shared_region_map_np( if (p->p_flag & P_NOSHLIB) { /* signal that this process is now using split libraries */ - OSBitAndAtomic(~((uint32_t)P_NOSHLIB), (UInt32 *)&p->p_flag); + OSBitAndAtomic(~((uint32_t)P_NOSHLIB), &p->p_flag); } done: @@ -1087,3 +1121,90 @@ extern unsigned int vm_page_free_target; SYSCTL_INT(_vm, OID_AUTO, vm_page_free_target, CTLFLAG_RD, &vm_page_free_target, 0, "Pageout daemon free target"); +extern unsigned int vm_memory_pressure; +SYSCTL_INT(_vm, OID_AUTO, memory_pressure, CTLFLAG_RD, + &vm_memory_pressure, 0, "Memory pressure indicator"); + +static int +vm_ctl_page_free_wanted SYSCTL_HANDLER_ARGS +{ +#pragma unused(oidp, arg1, arg2) + unsigned int page_free_wanted; + + page_free_wanted = mach_vm_ctl_page_free_wanted(); + return SYSCTL_OUT(req, &page_free_wanted, sizeof (page_free_wanted)); +} +SYSCTL_PROC(_vm, OID_AUTO, page_free_wanted, + CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_LOCKED, + 0, 0, vm_ctl_page_free_wanted, "I", ""); + +extern unsigned int vm_page_purgeable_count; +SYSCTL_INT(_vm, OID_AUTO, page_purgeable_count, CTLFLAG_RD, + &vm_page_purgeable_count, 0, "Purgeable page count"); + +extern unsigned int vm_page_purgeable_wired_count; +SYSCTL_INT(_vm, OID_AUTO, page_purgeable_wired_count, CTLFLAG_RD, + &vm_page_purgeable_wired_count, 0, "Wired purgeable page count"); + +SYSCTL_INT(_vm, OID_AUTO, page_reusable_count, CTLFLAG_RD, + &vm_page_stats_reusable.reusable_count, 0, "Reusable page count"); +SYSCTL_QUAD(_vm, OID_AUTO, reusable_success, CTLFLAG_RD, + &vm_page_stats_reusable.reusable_pages_success, ""); +SYSCTL_QUAD(_vm, OID_AUTO, reusable_failure, CTLFLAG_RD, + &vm_page_stats_reusable.reusable_pages_failure, ""); +SYSCTL_QUAD(_vm, OID_AUTO, reusable_shared, CTLFLAG_RD, + &vm_page_stats_reusable.reusable_pages_shared, ""); +SYSCTL_QUAD(_vm, OID_AUTO, all_reusable_calls, CTLFLAG_RD, + &vm_page_stats_reusable.all_reusable_calls, ""); +SYSCTL_QUAD(_vm, OID_AUTO, partial_reusable_calls, CTLFLAG_RD, + &vm_page_stats_reusable.partial_reusable_calls, ""); +SYSCTL_QUAD(_vm, OID_AUTO, reuse_success, CTLFLAG_RD, + &vm_page_stats_reusable.reuse_pages_success, ""); +SYSCTL_QUAD(_vm, OID_AUTO, reuse_failure, CTLFLAG_RD, + &vm_page_stats_reusable.reuse_pages_failure, ""); +SYSCTL_QUAD(_vm, OID_AUTO, all_reuse_calls, CTLFLAG_RD, + &vm_page_stats_reusable.all_reuse_calls, ""); +SYSCTL_QUAD(_vm, OID_AUTO, partial_reuse_calls, CTLFLAG_RD, + &vm_page_stats_reusable.partial_reuse_calls, ""); +SYSCTL_QUAD(_vm, OID_AUTO, can_reuse_success, CTLFLAG_RD, + &vm_page_stats_reusable.can_reuse_success, ""); +SYSCTL_QUAD(_vm, OID_AUTO, can_reuse_failure, CTLFLAG_RD, + &vm_page_stats_reusable.can_reuse_failure, ""); + + +int +vm_pressure_monitor( + __unused struct proc *p, + struct vm_pressure_monitor_args *uap, + int *retval) +{ + kern_return_t kr; + uint32_t pages_reclaimed; + uint32_t pages_wanted; + + kr = mach_vm_pressure_monitor( + (boolean_t) uap->wait_for_pressure, + uap->nsecs_monitored, + (uap->pages_reclaimed) ? &pages_reclaimed : NULL, + &pages_wanted); + + switch (kr) { + case KERN_SUCCESS: + break; + case KERN_ABORTED: + return EINTR; + default: + return EINVAL; + } + + if (uap->pages_reclaimed) { + if (copyout((void *)&pages_reclaimed, + uap->pages_reclaimed, + sizeof (pages_reclaimed)) != 0) { + return EFAULT; + } + } + + *retval = (int) pages_wanted; + return 0; +} diff --git a/bsd/vm/vnode_pager.c b/bsd/vm/vnode_pager.c index 483bda117..a15b6dcc4 100644 --- a/bsd/vm/vnode_pager.c +++ b/bsd/vm/vnode_pager.c @@ -55,6 +55,10 @@ #include #include +#include +#include +#include +#include #include #include @@ -76,11 +80,20 @@ #include -unsigned int vp_pagein=0; -unsigned int vp_pgodirty=0; -unsigned int vp_pgoclean=0; -unsigned int dp_pgouts=0; /* Default pager pageouts */ -unsigned int dp_pgins=0; /* Default pager pageins */ + +uint32_t +vnode_pager_isinuse(struct vnode *vp) +{ + if (vp->v_usecount > vp->v_kusecount) + return (1); + return (0); +} + +uint32_t +vnode_pager_return_hard_throttle_limit(struct vnode *vp, uint32_t *limit, uint32_t hard_throttle) +{ + return(cluster_hard_throttle_limit(vp, limit, hard_throttle)); +} vm_object_offset_t vnode_pager_get_filesize(struct vnode *vp) @@ -127,9 +140,9 @@ vnode_pager_get_cs_blobs( pager_return_t vnode_pageout(struct vnode *vp, upl_t upl, - vm_offset_t upl_offset, + upl_offset_t upl_offset, vm_object_offset_t f_offset, - vm_size_t size, + upl_size_t size, int flags, int *errorp) { @@ -140,7 +153,7 @@ vnode_pageout(struct vnode *vp, int isize; int pg_index; int base_index; - int offset; + upl_offset_t offset; upl_page_info_t *pl; vfs_context_t ctx = vfs_context_current(); /* pager context */ @@ -166,8 +179,6 @@ vnode_pageout(struct vnode *vp, * just go ahead and call vnop_pageout since * it has already sorted out the dirty ranges */ - dp_pgouts++; - KERNEL_DEBUG_CONSTANT((MACHDBG_CODE(DBG_MACH_VM, 1)) | DBG_FUNC_START, size, 1, 0, 0, 0); @@ -180,6 +191,45 @@ vnode_pageout(struct vnode *vp, goto out; } + if (upl == NULL) { + int request_flags; + + if (vp->v_mount->mnt_vtable->vfc_vfsflags & VFC_VFSVNOP_PAGEOUTV2) { + /* + * filesystem has requested the new form of VNOP_PAGEOUT for file + * backed objects... we will not grab the UPL befofe calling VNOP_PAGEOUT... + * it is the fileystem's responsibility to grab the range we're denoting + * via 'f_offset' and 'size' into a UPL... this allows the filesystem to first + * take any locks it needs, before effectively locking the pages into a UPL... + */ + KERNEL_DEBUG_CONSTANT((MACHDBG_CODE(DBG_MACH_VM, 1)) | DBG_FUNC_START, + size, (int)f_offset, 0, 0, 0); + + if ( (error_ret = VNOP_PAGEOUT(vp, NULL, upl_offset, (off_t)f_offset, + size, flags, ctx)) ) { + result = PAGER_ERROR; + } + KERNEL_DEBUG_CONSTANT((MACHDBG_CODE(DBG_MACH_VM, 1)) | DBG_FUNC_END, + size, 0, 0, 0, 0); + + goto out; + } + if (flags & UPL_MSYNC) + request_flags = UPL_UBC_MSYNC | UPL_RET_ONLY_DIRTY; + else + request_flags = UPL_UBC_PAGEOUT | UPL_RET_ONLY_DIRTY; + + ubc_create_upl(vp, f_offset, size, &upl, &pl, request_flags); + + if (upl == (upl_t)NULL) { + result = PAGER_ERROR; + error_ret = EINVAL; + goto out; + } + upl_offset = 0; + } else + pl = ubc_upl_pageinfo(upl); + /* * we come here for pageouts to 'real' files and * for msyncs... the upl may not contain any @@ -187,8 +237,6 @@ vnode_pageout(struct vnode *vp, * through it and find the 'runs' of dirty pages * to call VNOP_PAGEOUT on... */ - pl = ubc_upl_pageinfo(upl); - if (ubc_getsize(vp) == 0) { /* * if the file has been effectively deleted, then @@ -276,8 +324,6 @@ vnode_pageout(struct vnode *vp, * Note we must not sleep here if the buffer is busy - that is * a lock inversion which causes deadlock. */ - vp_pgoclean++; - #if NFSCLIENT if (vp->v_tag == VT_NFS) /* check with nfs if page is OK to drop */ @@ -305,8 +351,6 @@ vnode_pageout(struct vnode *vp, continue; } - vp_pgodirty++; - num_of_pages = 1; xsize = isize - PAGE_SIZE; @@ -321,7 +365,7 @@ vnode_pageout(struct vnode *vp, KERNEL_DEBUG_CONSTANT((MACHDBG_CODE(DBG_MACH_VM, 1)) | DBG_FUNC_START, xsize, (int)f_offset, 0, 0, 0); - if ( (error = VNOP_PAGEOUT(vp, upl, (vm_offset_t)offset, (off_t)f_offset, + if ( (error = VNOP_PAGEOUT(vp, upl, offset, (off_t)f_offset, xsize, flags, ctx)) ) { if (error_ret == 0) error_ret = error; @@ -347,9 +391,9 @@ pager_return_t vnode_pagein( struct vnode *vp, upl_t upl, - vm_offset_t upl_offset, + upl_offset_t upl_offset, vm_object_offset_t f_offset, - vm_size_t size, + upl_size_t size, int flags, int *errorp) { @@ -377,20 +421,36 @@ vnode_pagein( goto out; } if (upl == (upl_t)NULL) { - if (size > (MAX_UPL_SIZE * PAGE_SIZE)) { - - panic("vnode_pagein: size = %x\n", size); + flags &= ~UPL_NOCOMMIT; + if (size > (MAX_UPL_SIZE * PAGE_SIZE)) { result = PAGER_ERROR; error = PAGER_ERROR; goto out; } - ubc_create_upl(vp, f_offset, size, &upl, &pl, UPL_NOBLOCK | UPL_RET_ONLY_ABSENT | UPL_SET_LITE); + if (vp->v_mount->mnt_vtable->vfc_vfsflags & VFC_VFSVNOP_PAGEINV2) { + /* + * filesystem has requested the new form of VNOP_PAGEIN for file + * backed objects... we will not grab the UPL befofe calling VNOP_PAGEIN... + * it is the fileystem's responsibility to grab the range we're denoting + * via 'f_offset' and 'size' into a UPL... this allows the filesystem to first + * take any locks it needs, before effectively locking the pages into a UPL... + * so we pass a NULL into the filesystem instead of a UPL pointer... the 'upl_offset' + * is used to identify the "must have" page in the extent... the filesystem is free + * to clip the extent to better fit the underlying FS blocksize if it desires as + * long as it continues to include the "must have" page... 'f_offset' + 'upl_offset' + * identifies that page + */ + if ( (error = VNOP_PAGEIN(vp, NULL, upl_offset, (off_t)f_offset, + size, flags, vfs_context_current())) ) { + result = PAGER_ERROR; + error = PAGER_ERROR; + } + goto out; + } + ubc_create_upl(vp, f_offset, size, &upl, &pl, UPL_UBC_PAGEIN | UPL_RET_ONLY_ABSENT); if (upl == (upl_t)NULL) { - - panic("vnode_pagein: ubc_create_upl failed\n"); - result = PAGER_ABSENT; error = PAGER_ABSENT; goto out; @@ -403,15 +463,10 @@ vnode_pagein( * are responsible for commiting/aborting it * regardless of what the caller has passed in */ - flags &= ~UPL_NOCOMMIT; must_commit = 1; - - vp_pagein++; } else { pl = ubc_upl_pageinfo(upl); first_pg = upl_offset / PAGE_SIZE; - - dp_pgins++; } pages_in_upl = size / PAGE_SIZE; DTRACE_VM2(pgpgin, int, pages_in_upl, (uint64_t *), NULL); @@ -495,9 +550,20 @@ vnode_pagein( xsize = (last_pg - start_pg) * PAGE_SIZE; xoff = start_pg * PAGE_SIZE; - if ( (error = VNOP_PAGEIN(vp, upl, (vm_offset_t) xoff, + if ( (error = VNOP_PAGEIN(vp, upl, (upl_offset_t) xoff, (off_t)f_offset + xoff, xsize, flags, vfs_context_current())) ) { + /* + * Usually this UPL will be aborted/committed by the lower cluster layer. + * In the case of decmpfs, however, we may return an error (EAGAIN) to avoid + * a deadlock with another thread already inflating the file. In that case, + * we must take care of our UPL at this layer itself. + */ + if (must_commit) { + if(error == EAGAIN) { + ubc_upl_abort_range(upl, (upl_offset_t) xoff, xsize, UPL_ABORT_FREE_ON_EMPTY | UPL_ABORT_RESTART); + } + } result = PAGER_ERROR; error = PAGER_ERROR; diff --git a/bsd/vm/vnode_pager.h b/bsd/vm/vnode_pager.h index 89962ac72..17adbf1e7 100644 --- a/bsd/vm/vnode_pager.h +++ b/bsd/vm/vnode_pager.h @@ -43,10 +43,9 @@ #include #include #include +#include #include -vm_pager_t vnode_pager_setup(struct vnode *, memory_object_t); - /* * Vstructs are the internal (to us) description of a unit of backing store. * The are the link between memory objects and the backing store they represent. @@ -128,30 +127,6 @@ typedef struct vstruct { #define VNODE_PAGER_NULL ((vnode_pager_t) 0) - -pager_return_t vnode_pagein(struct vnode *, upl_t, - upl_offset_t, vm_object_offset_t, - upl_size_t, int, int *); -pager_return_t vnode_pageout(struct vnode *, upl_t, - upl_offset_t, vm_object_offset_t, - upl_size_t, int, int *); - -extern vm_object_offset_t vnode_pager_get_filesize( - struct vnode *vp); - -extern kern_return_t vnode_pager_get_pathname( - struct vnode *vp, - char *pathname, - vm_size_t *length_p); - -extern kern_return_t vnode_pager_get_filename( - struct vnode *vp, - const char **filename); - -extern kern_return_t vnode_pager_get_cs_blobs( - struct vnode *vp, - void **blobs); - #endif /* KERNEL */ #endif /* _VNODE_PAGER_ */ diff --git a/config/BSDKernel.exports b/config/BSDKernel.exports index 745ad66f2..f0322f1ac 100644 --- a/config/BSDKernel.exports +++ b/config/BSDKernel.exports @@ -8,6 +8,8 @@ __FREE __FREE_ZONE __MALLOC __MALLOC_ZONE +_advisory_read +_advisory_read_ext _bcd2bin_data _bdevsw_add _bdevsw_isfree @@ -41,10 +43,10 @@ _buf_drvdata _buf_error _buf_flags _buf_flushdirtyblks -_buf_fua _buf_free -_buf_fsprivate _buf_fromcache +_buf_fsprivate +_buf_fua _buf_getblk _buf_geteblk _buf_invalblkno @@ -96,20 +98,18 @@ _cdevsw_add_with_bdev _cdevsw_isfree _cdevsw_remove _cluster_bp +_cluster_bp_ext _cluster_copy_ubc_data _cluster_copy_upl_data -_advisory_read _cluster_pagein -_cluster_pageout -_cluster_push -_cluster_read -_cluster_write -_cluster_bp_ext -_advisory_read_ext _cluster_pagein_ext +_cluster_pageout _cluster_pageout_ext +_cluster_push _cluster_push_ext +_cluster_read _cluster_read_ext +_cluster_write _cluster_write_ext _cluster_zero _copystr @@ -169,25 +169,25 @@ _err_symlink _err_whiteout _err_write _ether_add_proto -_ether_del_proto _ether_check_multi +_ether_del_proto _ether_demux _ether_frameout _ether_ioctl +_fifo_advlock +_fifo_close +_fifo_inactive +_fifo_ioctl _fifo_lookup _fifo_open +_fifo_pathconf _fifo_read -_fifo_write -_fifo_ioctl _fifo_select -_fifo_inactive -_fifo_close -_fifo_pathconf -_fifo_advlock +_fifo_write _file_drop _file_flags _file_socket -_file_vnode +_file_vnode_withvid _fubyte _fuibyte _fuiword @@ -239,6 +239,7 @@ _ifnet_get_multicast_list _ifnet_hdrlen _ifnet_index _ifnet_input +_ifnet_interface_family_find _ifnet_ioctl _ifnet_lastchange _ifnet_list_free @@ -254,8 +255,8 @@ _ifnet_output _ifnet_output_raw _ifnet_reference _ifnet_release -_ifnet_resolve_multicast:_dlil_resolve_multi _ifnet_remove_multicast +_ifnet_resolve_multicast:_dlil_resolve_multi _ifnet_set_addrlen _ifnet_set_baudrate _ifnet_set_eflags @@ -266,6 +267,10 @@ _ifnet_set_lladdr _ifnet_set_metric _ifnet_set_mtu _ifnet_set_offload +_ifnet_set_tso_mtu +_ifnet_get_tso_mtu +_ifnet_get_wake_flags +_ifnet_set_wake_flags _ifnet_set_promiscuous _ifnet_set_stat _ifnet_softc @@ -288,25 +293,24 @@ _ipf_inject_input _ipf_inject_output _ipf_remove _is_file_clean -_is_suser -_is_suser1 _isdisk _kauth_acl_alloc _kauth_acl_free _kauth_authorize_action +_kauth_authorize_process _kauth_cred_create _kauth_cred_find _kauth_cred_get +_kauth_cred_get_with_ref _kauth_cred_getgid _kauth_cred_getguid _kauth_cred_getntsid _kauth_cred_getuid -_kauth_cred_get_with_ref +_kauth_cred_gid2guid _kauth_cred_gid2ntsid _kauth_cred_guid2gid _kauth_cred_guid2ntsid _kauth_cred_guid2uid -_kauth_cred_gid2guid _kauth_cred_ismember_gid _kauth_cred_ismember_guid _kauth_cred_ntsid2gid @@ -314,16 +318,15 @@ _kauth_cred_ntsid2guid _kauth_cred_ntsid2uid _kauth_cred_proc_ref _kauth_cred_ref -_kauth_cred_rele _kauth_cred_uid2guid _kauth_cred_uid2ntsid _kauth_cred_unref _kauth_deregister_scope +_kauth_filesec_alloc +_kauth_filesec_free _kauth_getgid _kauth_getruid _kauth_getuid -_kauth_filesec_alloc -_kauth_filesec_free _kauth_listen_scope _kauth_null_guid _kauth_register_scope @@ -331,16 +334,12 @@ _kauth_unlisten_scope _kdebug_enable _kernel_debug _kernel_debug1 -_kernel_flock _kernproc _kev_msg_post _kev_vendor_code_find _knote _knote_attach _knote_detach -_ldisc_deregister -_ldisc_register -_lightning_bolt _mbuf_adj _mbuf_adjustlen _mbuf_align_32 @@ -350,7 +349,10 @@ _mbuf_allocpacket_list _mbuf_attachcluster _mbuf_clear_csum_performed _mbuf_clear_csum_requested +_mbuf_get_mlen +_mbuf_get_mhlen _mbuf_clear_vlan_tag +_mbuf_concatenate _mbuf_copy_pkthdr _mbuf_copyback _mbuf_copydata @@ -366,6 +368,7 @@ _mbuf_freem_list _mbuf_get _mbuf_get_csum_performed _mbuf_get_csum_requested +_mbuf_get_tso_requested _mbuf_get_vlan_tag _mbuf_getcluster _mbuf_gethdr @@ -447,7 +450,6 @@ _nop_write _nulldev _nullop _physio -_postevent _proc_exiting _proc_find _proc_forcequota @@ -457,6 +459,7 @@ _proc_isinferior _proc_issignal _proc_name _proc_noremotehang +_proc_pgrpid _proc_pid _proc_ppid _proc_rele @@ -464,21 +467,16 @@ _proc_self _proc_selfname _proc_selfpid _proc_selfppid -_proc_tbe +_proc_selfpgrpid _proc_signal _proc_suser -_proc_ucred +_proc_tbe _proto_inject _proto_input _proto_register_plumber _proto_unregister_plumber _random _read_random -_rl_add -_rl_init -_rl_remove -_rl_scan -_rootvnode _selrecord _selthreadclear _seltrue @@ -519,35 +517,20 @@ _sockopt_direction _sockopt_level _sockopt_name _sockopt_valsize +_spec_close +_spec_ebadf +_spec_fsync +_spec_ioctl _spec_lookup _spec_open +_spec_pathconf _spec_read -_spec_write -_spec_ioctl _spec_select -_spec_fsync _spec_strategy -_spec_close -_spec_pathconf -_spl0 -_splbio -_splclock -_splhigh -_splimp -_spllo -_spln -_sploff -_splon -_splpower -_splsched -_splsoftclock -_spltty -_splvm -_splx +_spec_write _subyte _suibyte _suiword -_suser _suword _sysctl__children _sysctl__debug_children @@ -588,32 +571,22 @@ _tvtoabstime _ubc_blktooff _ubc_create_upl _ubc_getcred -_ubc_getobject _ubc_getsize -_ubc_info_deallocate -_ubc_info_init -_ubc_info_zone -_ubc_isinuse _ubc_msync _ubc_offtoblk _ubc_page_op _ubc_pages_resident _ubc_range_op -_ubc_setcred _ubc_setsize _ubc_setthreadcred -_ubc_sync_range _ubc_upl_abort _ubc_upl_abort_range _ubc_upl_commit _ubc_upl_commit_range _ubc_upl_map _ubc_upl_maxbufsize -_ubc_upl_unmap _ubc_upl_pageinfo -_upl_page_present -_upl_dirty_page -_upl_valid_page +_ubc_upl_unmap _uio_addiov _uio_create _uio_curriovbase @@ -633,6 +606,11 @@ _uio_setrw _uio_update _uiomove _uiomove64 +_unicode_combinable +_unicode_decomposeable +_upl_dirty_page +_upl_page_present +_upl_valid_page _useracc _utf8_decodestr _utf8_encodelen @@ -669,27 +647,22 @@ _vfs_fsprivate _vfs_fsremove _vfs_getnewfsid _vfs_getvfs -_vfs_ioattr _vfs_init_io_attributes +_vfs_ioattr _vfs_isforce _vfs_isrdonly _vfs_isrdwr _vfs_isreload _vfs_issynchronous +_vfs_isunmount _vfs_isupdate _vfs_iswriteupgrade _vfs_iterate _vfs_maxsymlen _vfs_mountedon -_vfs_mountroot _vfs_name -_vfs_nummntops -_vfs_op_descs -_vfs_op_init -_vfs_opv_descs -_vfs_opv_init -_vfs_opv_numops _vfs_removename +_vfs_rootvnode _vfs_setauthcache_ttl _vfs_setauthopaque _vfs_setauthopaqueaccess @@ -710,9 +683,9 @@ _vn_path_package_check _vn_rdwr _vn_revoke _vnode_addfsref -_vnode_authorize _vnode_authattr _vnode_authattr_new +_vnode_authorize _vnode_clearfsnode _vnode_clearmountedon _vnode_clearnocache @@ -722,6 +695,8 @@ _vnode_create _vnode_fsnode _vnode_get _vnode_getattr +_vnode_getname +_vnode_getparent _vnode_getwithref _vnode_getwithvid _vnode_hascleanblks @@ -729,22 +704,26 @@ _vnode_hasdirtyblks _vnode_isblk _vnode_ischr _vnode_isdir -_vnode_islnk _vnode_isfifo _vnode_isinuse +_vnode_islnk _vnode_ismount _vnode_ismountedon _vnode_isnocache _vnode_isnoreadahead +_vnode_israge +_vnode_isrecycled _vnode_isreg +_vnode_isswap _vnode_issystem _vnode_isvroot _vnode_iterate +_vnode_lookup _vnode_mount _vnode_mountedhere -_vnode_lookup _vnode_open _vnode_put +_vnode_putname _vnode_recycle _vnode_ref _vnode_rele @@ -758,6 +737,7 @@ _vnode_settag _vnode_specrdev _vnode_startwrite _vnode_uncache_credentials +_vnode_update_identity _vnode_vfs64bitready _vnode_vfsisrdonly _vnode_vfsmaxsymlen @@ -767,7 +747,6 @@ _vnode_vid _vnode_vtype _vnode_waitforwrites _vnode_writedone -_vnodetarget _vnop_access_desc _vnop_advlock_desc _vnop_allocate_desc @@ -814,8 +793,6 @@ _vnop_strategy_desc _vnop_symlink_desc _vnop_whiteout_desc _vnop_write_desc -_vslock -_vsunlock _vttoif_tab _wakeup _wakeup_one diff --git a/config/BSDKernel.i386.exports b/config/BSDKernel.i386.exports index 4fb38fe48..1cdfeddfe 100644 --- a/config/BSDKernel.i386.exports +++ b/config/BSDKernel.i386.exports @@ -1,4 +1,8 @@ +_file_vnode _in6_cksum:_inet6_cksum +_is_suser +_is_suser1 +_kauth_cred_rele _mbuf_data _mbuf_inet6_cksum _mbuf_len @@ -12,3 +16,23 @@ _mbuf_setlen _mbuf_setnextpkt _mbuf_type _nd6_lookup_ipv6 +_proc_ucred +_rootvnode +_spl0 +_splbio +_splclock +_splhigh +_splimp +_spllo +_spln +_sploff +_splon +_splpower +_splsched +_splsoftclock +_spltty +_splvm +_splx +_suser +_ubc_setcred +_ubc_sync_range diff --git a/config/BSDKernel.ppc.exports b/config/BSDKernel.ppc.exports index 4fb38fe48..83559e0b0 100644 --- a/config/BSDKernel.ppc.exports +++ b/config/BSDKernel.ppc.exports @@ -1,4 +1,7 @@ +_file_vnode _in6_cksum:_inet6_cksum +_is_suser +_is_suser1 _mbuf_data _mbuf_inet6_cksum _mbuf_len @@ -12,3 +15,23 @@ _mbuf_setlen _mbuf_setnextpkt _mbuf_type _nd6_lookup_ipv6 +_proc_ucred +_rootvnode +_spl0 +_splbio +_splclock +_splhigh +_splimp +_spllo +_spln +_sploff +_splon +_splpower +_splsched +_splsoftclock +_spltty +_splvm +_splx +_suser +_ubc_setcred +_ubc_sync_range diff --git a/config/BSDKernel.x86_64.exports b/config/BSDKernel.x86_64.exports new file mode 100644 index 000000000..4fb38fe48 --- /dev/null +++ b/config/BSDKernel.x86_64.exports @@ -0,0 +1,14 @@ +_in6_cksum:_inet6_cksum +_mbuf_data +_mbuf_inet6_cksum +_mbuf_len +_mbuf_next +_mbuf_nextpkt +_mbuf_pkthdr_header +_mbuf_pkthdr_len +_mbuf_pkthdr_rcvif +_mbuf_pkthdr_setheader +_mbuf_setlen +_mbuf_setnextpkt +_mbuf_type +_nd6_lookup_ipv6 diff --git a/config/IOKit.exports b/config/IOKit.exports index f14615395..be71c491d 100644 --- a/config/IOKit.exports +++ b/config/IOKit.exports @@ -35,6 +35,7 @@ _IOLockTryLock:_lck_mtx_try_lock _IOLockUnlock:_lck_mtx_unlock _IOLockWakeup _IOLog +_IOLogv _IOMalloc _IOMallocAligned _IOMallocContiguous @@ -57,7 +58,6 @@ _IONetworkMatching _IONetworkNamePrefixMatching _IOOFPathMatching _IOPageableMapForAddress -_IOPanic _IOPause _IOPrintPlane _IORWLockAlloc @@ -73,6 +73,7 @@ _IORecursiveLockGetMachLock _IORecursiveLockHaveLock _IORecursiveLockLock _IORecursiveLockSleep +_IORecursiveLockSleepDeadline _IORecursiveLockTryLock _IORecursiveLockUnlock _IORecursiveLockWakeup @@ -107,30 +108,18 @@ _PE_cpu_signal _PE_cpu_start _PE_enter_debugger _PE_halt_restart -_PE_parse_boot_arg _PE_parse_boot_argn _PE_poll_input _StartIOKit -__Z10tellClientP8OSObjectPv -__Z16IODTFindSlotNameP15IORegistryEntrym -__Z16IODTSetResolvingP15IORegistryEntryPFlmPmS1_EPFvS0_PhS4_S4_E -__Z17IODTGetCellCountsP15IORegistryEntryPmS1_ __Z17IODTMapInterruptsP15IORegistryEntry __Z17IODeviceTreeAllocPv __Z17IOServiceOrderingPK15OSMetaClassBaseS1_Pv __Z18IODTCompareNubNamePK15IORegistryEntryP8OSStringPS3_ __Z19printDictionaryKeysP12OSDictionaryPc -__Z19tellAppWithResponseP8OSObjectPv __Z20IODTMakeNVDescriptorP15IORegistryEntryP17IONVRAMDescriptor __Z20IODTMatchNubWithKeysP15IORegistryEntryPKc __Z21IODTResolveAddressingP15IORegistryEntryPKcP14IODeviceMemory -__Z22IODTResolveAddressCellP15IORegistryEntryPmS1_S1_ -__Z22tellClientWithResponseP8OSObjectPv -__Z23IODTFindMatchingEntriesP15IORegistryEntrymPKc -__Z24broadcast_aggressivenessP8OSObjectPvS1_S1_S1_ -__Z26serializedAllowPowerChangeP8OSObjectPvS1_S1_S1_ __Z27IODTInterruptControllerNameP15IORegistryEntry -__Z27serializedCancelPowerChangeP8OSObjectPvS1_S1_S1_ __ZN10IOMachPort10gMetaClassE __ZN10IOMachPort10superClassE __ZN10IOMachPort11dictForTypeEj @@ -168,8 +157,6 @@ __ZN10IOWorkLoop14addEventSourceEP13IOEventSource __ZN10IOWorkLoop15runEventSourcesEv __ZN10IOWorkLoop17removeEventSourceEP13IOEventSource __ZN10IOWorkLoop19signalWorkAvailableEv -__ZN10IOWorkLoop19workLoopWithOptionsEm -__ZN10IOWorkLoop20_RESERVEDIOWorkLoop2Ev __ZN10IOWorkLoop20_RESERVEDIOWorkLoop3Ev __ZN10IOWorkLoop20_RESERVEDIOWorkLoop4Ev __ZN10IOWorkLoop20_RESERVEDIOWorkLoop5Ev @@ -184,7 +171,6 @@ __ZN10IOWorkLoop9MetaClassC2Ev __ZN10IOWorkLoop9closeGateEv __ZN10IOWorkLoop9metaClassE __ZN10IOWorkLoop9runActionEPFiP8OSObjectPvS2_S2_S2_ES1_S2_S2_S2_S2_ -__ZN10IOWorkLoop9sleepGateEPvm __ZN10IOWorkLoopC1EPK11OSMetaClass __ZN10IOWorkLoopC1Ev __ZN10IOWorkLoopC2EPK11OSMetaClass @@ -195,16 +181,11 @@ __ZN11IOCatalogue10addDriversEP7OSArrayb __ZN11IOCatalogue10gMetaClassE __ZN11IOCatalogue10initializeEv __ZN11IOCatalogue10superClassE -__ZN11IOCatalogue11findDriversEP12OSDictionaryPl -__ZN11IOCatalogue11findDriversEP9IOServicePl __ZN11IOCatalogue13removeDriversEP12OSDictionaryb __ZN11IOCatalogue13startMatchingEP12OSDictionary __ZN11IOCatalogue15moduleHasLoadedEP8OSString __ZN11IOCatalogue15moduleHasLoadedEPKc __ZN11IOCatalogue16terminateDriversEP12OSDictionary -__ZN11IOCatalogue18removeKernelLinkerEv -__ZN11IOCatalogue23recordStartupExtensionsEv -__ZN11IOCatalogue24addExtensionsFromArchiveEP6OSData __ZN11IOCatalogue25terminateDriversForModuleEP8OSStringb __ZN11IOCatalogue25terminateDriversForModuleEPKcb __ZN11IOCatalogue4freeEv @@ -221,15 +202,10 @@ __ZN11IOCatalogueD0Ev __ZN11IOCatalogueD2Ev __ZN11IODataQueue10gMetaClassE __ZN11IODataQueue10superClassE -__ZN11IODataQueue11withEntriesEmm -__ZN11IODataQueue12withCapacityEm -__ZN11IODataQueue15initWithEntriesEmm -__ZN11IODataQueue16initWithCapacityEm __ZN11IODataQueue19getMemoryDescriptorEv __ZN11IODataQueue19setNotificationPortEP8ipc_port __ZN11IODataQueue29sendDataAvailableNotificationEv __ZN11IODataQueue4freeEv -__ZN11IODataQueue7enqueueEPvm __ZN11IODataQueue9MetaClassC1Ev __ZN11IODataQueue9MetaClassC2Ev __ZN11IODataQueue9metaClassE @@ -241,12 +217,28 @@ __ZN11IODataQueueD0Ev __ZN11IODataQueueD2Ev __ZN11IOMemoryMap10gMetaClassE __ZN11IOMemoryMap10superClassE +__ZN11IOMemoryMap13getMapOptionsEv +__ZN11IOMemoryMap14getAddressTaskEv +__ZN11IOMemoryMap17getVirtualAddressEv __ZN11IOMemoryMap18getPhysicalAddressEv +__ZN11IOMemoryMap19getMemoryDescriptorEv +__ZN11IOMemoryMap21_RESERVEDIOMemoryMap0Ev +__ZN11IOMemoryMap21_RESERVEDIOMemoryMap1Ev +__ZN11IOMemoryMap21_RESERVEDIOMemoryMap2Ev +__ZN11IOMemoryMap21_RESERVEDIOMemoryMap3Ev +__ZN11IOMemoryMap21_RESERVEDIOMemoryMap4Ev +__ZN11IOMemoryMap21_RESERVEDIOMemoryMap5Ev +__ZN11IOMemoryMap21_RESERVEDIOMemoryMap6Ev +__ZN11IOMemoryMap21_RESERVEDIOMemoryMap7Ev +__ZN11IOMemoryMap5unmapEv __ZN11IOMemoryMap9MetaClassC1Ev __ZN11IOMemoryMap9MetaClassC2Ev +__ZN11IOMemoryMap9getLengthEv __ZN11IOMemoryMap9metaClassE __ZN11IOMemoryMapC1EPK11OSMetaClass +__ZN11IOMemoryMapC1Ev __ZN11IOMemoryMapC2EPK11OSMetaClass +__ZN11IOMemoryMapC2Ev __ZN11IOMemoryMapD0Ev __ZN11IOMemoryMapD2Ev __ZN11IOResources10gMetaClassE @@ -266,20 +258,11 @@ __ZN11IOResourcesD2Ev __ZN12IODMACommand10gMetaClassE __ZN12IODMACommand10superClassE __ZN12IODMACommand10writeBytesEyPKvy -__ZN12IODMACommand11OutputBig32EPS_NS_9Segment64EPvm -__ZN12IODMACommand11OutputBig64EPS_NS_9Segment64EPvm -__ZN12IODMACommand11synchronizeEm -__ZN12IODMACommand12OutputHost32EPS_NS_9Segment64EPvm -__ZN12IODMACommand12OutputHost64EPS_NS_9Segment64EPvm __ZN12IODMACommand12cloneCommandEPv -__ZN12IODMACommand14OutputLittle32EPS_NS_9Segment64EPvm -__ZN12IODMACommand14OutputLittle64EPS_NS_9Segment64EPvm -__ZN12IODMACommand15genIOVMSegmentsEPyPvPm -__ZN12IODMACommand17withSpecificationEPFbPS_NS_9Segment64EPvmEhyNS_14MappingOptionsEymP8IOMapperS2_ +__ZN12IODMACommand12getAlignmentEv +__ZN12IODMACommand17getNumAddressBitsEv __ZN12IODMACommand19setMemoryDescriptorEPK18IOMemoryDescriptorb __ZN12IODMACommand21clearMemoryDescriptorEb -__ZN12IODMACommand21initWithSpecificationEPFbPS_NS_9Segment64EPvmEhyNS_14MappingOptionsEymP8IOMapperS2_ -__ZN12IODMACommand22_RESERVEDIODMACommand2Ev __ZN12IODMACommand22_RESERVEDIODMACommand3Ev __ZN12IODMACommand22_RESERVEDIODMACommand4Ev __ZN12IODMACommand22_RESERVEDIODMACommand5Ev @@ -293,11 +276,10 @@ __ZN12IODMACommand23_RESERVEDIODMACommand12Ev __ZN12IODMACommand23_RESERVEDIODMACommand13Ev __ZN12IODMACommand23_RESERVEDIODMACommand14Ev __ZN12IODMACommand23_RESERVEDIODMACommand15Ev -__ZN12IODMACommand24prepareWithSpecificationEPFbPS_NS_9Segment64EPvmEhyNS_14MappingOptionsEymP8IOMapperyybb +__ZN12IODMACommand26getPreparedOffsetAndLengthEPyS0_ __ZN12IODMACommand4freeEv __ZN12IODMACommand7prepareEyybb __ZN12IODMACommand8completeEbb -__ZN12IODMACommand8transferEmyPvy __ZN12IODMACommand9MetaClassC1Ev __ZN12IODMACommand9MetaClassC2Ev __ZN12IODMACommand9metaClassE @@ -345,19 +327,12 @@ __ZN12IOUserClient10getServiceEv __ZN12IOUserClient10initializeEv __ZN12IOUserClient10superClassE __ZN12IOUserClient11clientCloseEv -__ZN12IOUserClient12initWithTaskEP4taskPvm -__ZN12IOUserClient12initWithTaskEP4taskPvmP12OSDictionary __ZN12IOUserClient13connectClientEPS_ __ZN12IOUserClient14externalMethodEjP25IOExternalMethodArgumentsP24IOExternalMethodDispatchP8OSObjectPv -__ZN12IOUserClient15mapClientMemoryEmP4taskmj -__ZN12IOUserClient15sendAsyncResultEPjiPPvm -__ZN12IOUserClient17sendAsyncResult64EPyiS0_m __ZN12IOUserClient17setAsyncReferenceEPjP8ipc_portPvS3_ __ZN12IOUserClient18clientHasPrivilegeEPvPKc -__ZN12IOUserClient19clientMemoryForTypeEmPmPP18IOMemoryDescriptor __ZN12IOUserClient20exportObjectToClientEP4taskP8OSObjectPS3_ __ZN12IOUserClient21destroyUserReferencesEP8OSObject -__ZN12IOUserClient22_RESERVEDIOUserClient1Ev __ZN12IOUserClient22_RESERVEDIOUserClient2Ev __ZN12IOUserClient22_RESERVEDIOUserClient3Ev __ZN12IOUserClient22_RESERVEDIOUserClient4Ev @@ -372,15 +347,9 @@ __ZN12IOUserClient23_RESERVEDIOUserClient12Ev __ZN12IOUserClient23_RESERVEDIOUserClient13Ev __ZN12IOUserClient23_RESERVEDIOUserClient14Ev __ZN12IOUserClient23_RESERVEDIOUserClient15Ev -__ZN12IOUserClient23getExternalTrapForIndexEm -__ZN12IOUserClient24getNotificationSemaphoreEmPP9semaphore -__ZN12IOUserClient24getTargetAndTrapForIndexEPP9IOServicem -__ZN12IOUserClient24registerNotificationPortEP8ipc_portmm -__ZN12IOUserClient25getExternalMethodForIndexEm -__ZN12IOUserClient26getTargetAndMethodForIndexEPP9IOServicem +__ZN12IOUserClient23releaseNotificationPortEP8ipc_port +__ZN12IOUserClient23releaseAsyncReference64EPy __ZN12IOUserClient26removeMappingForDescriptorEP18IOMemoryDescriptor -__ZN12IOUserClient30getExternalAsyncMethodForIndexEm -__ZN12IOUserClient31getAsyncTargetAndMethodForIndexEPP9IOServicem __ZN12IOUserClient4freeEv __ZN12IOUserClient4initEP12OSDictionary __ZN12IOUserClient4initEv @@ -397,11 +366,9 @@ __ZN13IOCommandGate10superClassE __ZN13IOCommandGate11commandGateEP8OSObjectPFiS1_PvS2_S2_S2_E __ZN13IOCommandGate11setWorkLoopEP10IOWorkLoop __ZN13IOCommandGate12checkForWorkEv -__ZN13IOCommandGate12commandSleepEPvm __ZN13IOCommandGate13attemptActionEPFiP8OSObjectPvS2_S2_S2_ES2_S2_S2_S2_ __ZN13IOCommandGate13commandWakeupEPvb __ZN13IOCommandGate14attemptCommandEPvS0_S0_S0_ -__ZN13IOCommandGate23_RESERVEDIOCommandGate0Ev __ZN13IOCommandGate23_RESERVEDIOCommandGate1Ev __ZN13IOCommandGate23_RESERVEDIOCommandGate2Ev __ZN13IOCommandGate23_RESERVEDIOCommandGate3Ev @@ -426,7 +393,6 @@ __ZN13IOCommandGateD2Ev __ZN13IOCommandPool10gMetaClassE __ZN13IOCommandPool10getCommandEb __ZN13IOCommandPool10superClassE -__ZN13IOCommandPool11commandPoolEP9IOServiceP10IOWorkLoopm __ZN13IOCommandPool12withWorkLoopEP10IOWorkLoop __ZN13IOCommandPool13returnCommandEP9IOCommand __ZN13IOCommandPool15gatedGetCommandEPP9IOCommandb @@ -441,7 +407,6 @@ __ZN13IOCommandPool23_RESERVEDIOCommandPool5Ev __ZN13IOCommandPool23_RESERVEDIOCommandPool6Ev __ZN13IOCommandPool23_RESERVEDIOCommandPool7Ev __ZN13IOCommandPool4freeEv -__ZN13IOCommandPool4initEP9IOServiceP10IOWorkLoopm __ZN13IOCommandPool9MetaClassC1Ev __ZN13IOCommandPool9MetaClassC2Ev __ZN13IOCommandPool9metaClassE @@ -475,7 +440,6 @@ __ZN13IOEventSource9MetaClassC2Ev __ZN13IOEventSource9closeGateEv __ZN13IOEventSource9metaClassE __ZN13IOEventSource9setActionEPFvP8OSObjectzE -__ZN13IOEventSource9sleepGateEPvm __ZN13IOEventSourceC1EPK11OSMetaClass __ZN13IOEventSourceC2EPK11OSMetaClass __ZN13IOEventSourceD0Ev @@ -483,7 +447,6 @@ __ZN13IOEventSourceD2Ev __ZN13_IOServiceJob10gMetaClassE __ZN13_IOServiceJob10pingConfigEPS_ __ZN13_IOServiceJob10superClassE -__ZN13_IOServiceJob8startJobEP9IOServiceim __ZN13_IOServiceJob9MetaClassC1Ev __ZN13_IOServiceJob9MetaClassC2Ev __ZN13_IOServiceJob9metaClassE @@ -493,31 +456,8 @@ __ZN13_IOServiceJobC2EPK11OSMetaClass __ZN13_IOServiceJobC2Ev __ZN13_IOServiceJobD0Ev __ZN13_IOServiceJobD2Ev -__ZN14IOCommandQueue10gMetaClassE -__ZN14IOCommandQueue10superClassE -__ZN14IOCommandQueue12checkForWorkEv -__ZN14IOCommandQueue12commandQueueEP8OSObjectPFvS1_PvS2_S2_S2_Ei -__ZN14IOCommandQueue14enqueueCommandEbPvS0_S0_S0_ -__ZN14IOCommandQueue15performAndFlushEP8OSObjectPFvS1_PvS2_S2_S2_E -__ZN14IOCommandQueue4freeEv -__ZN14IOCommandQueue4initEP8OSObjectPFvS1_PvS2_S2_S2_Ei -__ZN14IOCommandQueue9MetaClassC1Ev -__ZN14IOCommandQueue9MetaClassC2Ev -__ZN14IOCommandQueue9metaClassE -__ZN14IOCommandQueueC1EPK11OSMetaClass -__ZN14IOCommandQueueC1Ev -__ZN14IOCommandQueueC2EPK11OSMetaClass -__ZN14IOCommandQueueC2Ev -__ZN14IOCommandQueueD0Ev -__ZN14IOCommandQueueD2Ev -__ZN14IODeviceMemory12withSubRangeEPS_mm -__ZN14IODeviceMemory13arrayFromListEPNS_11InitElementEm -__ZN14IODeviceMemory9withRangeEmm __ZN14IOMemoryCursor10gMetaClassE __ZN14IOMemoryCursor10superClassE -__ZN14IOMemoryCursor17withSpecificationEPFvNS_15PhysicalSegmentEPvmEmmm -__ZN14IOMemoryCursor19genPhysicalSegmentsEP18IOMemoryDescriptormPvmmPm -__ZN14IOMemoryCursor21initWithSpecificationEPFvNS_15PhysicalSegmentEPvmEmmm __ZN14IOMemoryCursor9MetaClassC1Ev __ZN14IOMemoryCursor9MetaClassC2Ev __ZN14IOMemoryCursor9metaClassE @@ -529,11 +469,8 @@ __ZN14IOMemoryCursorD0Ev __ZN14IOMemoryCursorD2Ev __ZN14IOPMrootDomain10gMetaClassE __ZN14IOPMrootDomain10superClassE -__ZN14IOPMrootDomain10youAreRootEv __ZN14IOPMrootDomain11sleepSystemEv -__ZN14IOPMrootDomain12broadcast_itEmm __ZN14IOPMrootDomain12tellChangeUpEm -__ZN14IOPMrootDomain12unIdleDeviceEP9IOServicem __ZN14IOPMrootDomain12wakeFromDozeEv __ZN14IOPMrootDomain13askChangeDownEm __ZN14IOPMrootDomain13copyPMSettingEP8OSSymbol @@ -544,30 +481,17 @@ __ZN14IOPMrootDomain14publishFeatureEPKcjPj __ZN14IOPMrootDomain14shutdownSystemEv __ZN14IOPMrootDomain14tellChangeDownEm __ZN14IOPMrootDomain15powerChangeDoneEm -__ZN14IOPMrootDomain15reportUserInputEv -__ZN14IOPMrootDomain16adjustPowerStateEv -__ZN14IOPMrootDomain16command_receivedEPvS0_S0_S0_ __ZN14IOPMrootDomain16tellNoChangeDownEm __ZN14IOPMrootDomain17getSleepSupportedEv __ZN14IOPMrootDomain17setAggressivenessEmm -__ZN14IOPMrootDomain17setSleepSupportedEm __ZN14IOPMrootDomain18changePowerStateToEm -__ZN14IOPMrootDomain19sysPowerDownHandlerEPvS0_mP9IOServiceS0_j __ZN14IOPMrootDomain22changePowerStateToPrivEm __ZN14IOPMrootDomain22removePublishedFeatureEj __ZN14IOPMrootDomain23requestPowerDomainStateEmP17IOPowerConnectionm -__ZN14IOPMrootDomain23setQuickSpinDownTimeoutEv -__ZN14IOPMrootDomain24displayWranglerPublishedEPvS0_P9IOService -__ZN14IOPMrootDomain24receivePowerNotificationEm __ZN14IOPMrootDomain24systemPowerEventOccurredEPK8OSSymbolP8OSObject __ZN14IOPMrootDomain24systemPowerEventOccurredEPK8OSSymbolj -__ZN14IOPMrootDomain25announcePowerSourceChangeEv -__ZN14IOPMrootDomain26handleSleepTimerExpirationEv -__ZN14IOPMrootDomain26restoreUserSpinDownTimeoutEv -__ZN14IOPMrootDomain27displayWranglerNotificationEPvS0_mP9IOServiceS0_j __ZN14IOPMrootDomain27registerPMSettingControllerEPPK8OSSymbolPFiP8OSObjectS2_S5_mES5_mPS5_ __ZN14IOPMrootDomain27registerPMSettingControllerEPPK8OSSymboljPFiP8OSObjectS2_S5_mES5_mPS5_ -__ZN14IOPMrootDomain39stopIgnoringClamshellEventsDuringWakeupEv __ZN14IOPMrootDomain5startEP9IOService __ZN14IOPMrootDomain9MetaClassC1Ev __ZN14IOPMrootDomain9MetaClassC2Ev @@ -601,11 +525,7 @@ __ZN15IOConditionLockD0Ev __ZN15IOConditionLockD2Ev __ZN15IODMAController10gMetaClassE __ZN15IODMAController10superClassE -__ZN15IODMAController13getControllerEP9IOServicem -__ZN15IODMAController16notifyDMACommandEP16IODMAEventSourceP12IODMACommandim __ZN15IODMAController18completeDMACommandEP16IODMAEventSourceP12IODMACommand -__ZN15IODMAController20createControllerNameEm -__ZN15IODMAController21registerDMAControllerEm __ZN15IODMAController5startEP9IOService __ZN15IODMAController9MetaClassC1Ev __ZN15IODMAController9MetaClassC2Ev @@ -650,8 +570,8 @@ __ZN15IOPMPowerSource19setBatteryInstalledEb __ZN15IOPMPowerSource20setExternalConnectedEb __ZN15IOPMPowerSource21externalChargeCapableEv __ZN15IOPMPowerSource22setLegacyIOBatteryInfoEP12OSDictionary -__ZN15IOPMPowerSource24setExternalChargeCapableEb __ZN15IOPMPowerSource24capacityPercentRemainingEv +__ZN15IOPMPowerSource24setExternalChargeCapableEb __ZN15IOPMPowerSource4freeEv __ZN15IOPMPowerSource4initEv __ZN15IOPMPowerSource5modelEv @@ -698,6 +618,7 @@ __ZN15IORegistryEntry16setPropertyTableEP12OSDictionary __ZN15IORegistryEntry17matchPathLocationEPKcPK15IORegistryPlane __ZN15IORegistryEntry17runPropertyActionEPFiP8OSObjectPvS2_S2_S2_ES1_S2_S2_S2_S2_ __ZN15IORegistryEntry18getGenerationCountEv +__ZN15IORegistryEntry18getRegistryEntryIDEv __ZN15IORegistryEntry21getChildFromComponentEPPKcPK15IORegistryPlane __ZN15IORegistryEntry25_RESERVEDIORegistryEntry6Ev __ZN15IORegistryEntry25_RESERVEDIORegistryEntry7Ev @@ -754,29 +675,11 @@ __ZN15IORegistryPlaneC2EPK11OSMetaClass __ZN15IORegistryPlaneC2Ev __ZN15IORegistryPlaneD0Ev __ZN15IORegistryPlaneD2Ev -__ZN15_IOConfigThread10gMetaClassE -__ZN15_IOConfigThread10superClassE -__ZN15_IOConfigThread12configThreadEv -__ZN15_IOConfigThread4freeEv -__ZN15_IOConfigThread4mainEPS_ -__ZN15_IOConfigThread9MetaClassC1Ev -__ZN15_IOConfigThread9MetaClassC2Ev -__ZN15_IOConfigThread9metaClassE -__ZN15_IOConfigThreadC1EPK11OSMetaClass -__ZN15_IOConfigThreadC1Ev -__ZN15_IOConfigThreadC2EPK11OSMetaClass -__ZN15_IOConfigThreadC2Ev -__ZN15_IOConfigThreadD0Ev -__ZN15_IOConfigThreadD2Ev __ZN16IODMAEventSource10gMetaClassE __ZN16IODMAEventSource10superClassE __ZN16IODMAEventSource12checkForWorkEv -__ZN16IODMAEventSource14dmaEventSourceEP8OSObjectP9IOServicePFvS1_PS_P12IODMACommandimES8_m -__ZN16IODMAEventSource14stopDMACommandEbP13mach_timespec -__ZN16IODMAEventSource15startDMACommandEP12IODMACommand11IODirectionmm -__ZN16IODMAEventSource16notifyDMACommandEP12IODMACommandim +__ZN16IODMAEventSource14stopDMACommandEby __ZN16IODMAEventSource18completeDMACommandEP12IODMACommand -__ZN16IODMAEventSource4initEP8OSObjectP9IOServicePFvS1_PS_P12IODMACommandimES8_m __ZN16IODMAEventSource9MetaClassC1Ev __ZN16IODMAEventSource9MetaClassC2Ev __ZN16IODMAEventSource9metaClassE @@ -789,7 +692,6 @@ __ZN16IODMAEventSourceD2Ev __ZN16IOKitDiagnostics10gMetaClassE __ZN16IOKitDiagnostics10superClassE __ZN16IOKitDiagnostics11diagnosticsEv -__ZN16IOKitDiagnostics12updateOffsetEP12OSDictionarymPKc __ZN16IOKitDiagnostics9MetaClassC1Ev __ZN16IOKitDiagnostics9MetaClassC2Ev __ZN16IOKitDiagnostics9metaClassE @@ -818,23 +720,15 @@ __ZN16IOPMinformeeListC2EPK11OSMetaClass __ZN16IOPMinformeeListC2Ev __ZN16IOPMinformeeListD0Ev __ZN16IOPMinformeeListD2Ev -__ZN16IORangeAllocator10deallocateEmm __ZN16IORangeAllocator10gMetaClassE __ZN16IORangeAllocator10superClassE -__ZN16IORangeAllocator12allocElementEm __ZN16IORangeAllocator12getFreeCountEv -__ZN16IORangeAllocator13allocateRangeEmm -__ZN16IORangeAllocator14deallocElementEm __ZN16IORangeAllocator16getFragmentCountEv __ZN16IORangeAllocator19getFragmentCapacityEv -__ZN16IORangeAllocator28setFragmentCapacityIncrementEm __ZN16IORangeAllocator4freeEv -__ZN16IORangeAllocator4initEmmmm -__ZN16IORangeAllocator8allocateEmPmm __ZN16IORangeAllocator9MetaClassC1Ev __ZN16IORangeAllocator9MetaClassC2Ev __ZN16IORangeAllocator9metaClassE -__ZN16IORangeAllocator9withRangeEmmmm __ZN16IORangeAllocatorC1EPK11OSMetaClass __ZN16IORangeAllocatorC1Ev __ZN16IORangeAllocatorC2EPK11OSMetaClass @@ -843,9 +737,6 @@ __ZN16IORangeAllocatorD0Ev __ZN16IORangeAllocatorD2Ev __ZN17IOBigMemoryCursor10gMetaClassE __ZN17IOBigMemoryCursor10superClassE -__ZN17IOBigMemoryCursor13outputSegmentEN14IOMemoryCursor15PhysicalSegmentEPvm -__ZN17IOBigMemoryCursor17withSpecificationEmmm -__ZN17IOBigMemoryCursor21initWithSpecificationEmmm __ZN17IOBigMemoryCursor9MetaClassC1Ev __ZN17IOBigMemoryCursor9MetaClassC2Ev __ZN17IOBigMemoryCursor9metaClassE @@ -902,9 +793,6 @@ __ZN17IOPowerConnectionD0Ev __ZN17IOPowerConnectionD2Ev __ZN17IOSharedDataQueue10gMetaClassE __ZN17IOSharedDataQueue10superClassE -__ZN17IOSharedDataQueue16initWithCapacityEm -__ZN17IOSharedDataQueue11withEntriesEmm -__ZN17IOSharedDataQueue12withCapacityEm __ZN17IOSharedDataQueue19getMemoryDescriptorEv __ZN17IOSharedDataQueue27_RESERVEDIOSharedDataQueue0Ev __ZN17IOSharedDataQueue27_RESERVEDIOSharedDataQueue1Ev @@ -916,7 +804,6 @@ __ZN17IOSharedDataQueue27_RESERVEDIOSharedDataQueue6Ev __ZN17IOSharedDataQueue27_RESERVEDIOSharedDataQueue7Ev __ZN17IOSharedDataQueue4freeEv __ZN17IOSharedDataQueue4peekEv -__ZN17IOSharedDataQueue7dequeueEPvPm __ZN17IOSharedDataQueue9MetaClassC1Ev __ZN17IOSharedDataQueue9MetaClassC2Ev __ZN17IOSharedDataQueue9metaClassE @@ -929,48 +816,17 @@ __ZN17IOSharedDataQueueD2Ev __ZN18IOMemoryDescriptor10addMappingEP11IOMemoryMap __ZN18IOMemoryDescriptor10gMetaClassE __ZN18IOMemoryDescriptor10initializeEv -__ZN18IOMemoryDescriptor10setMappingEP4taskjm __ZN18IOMemoryDescriptor10superClassE -__ZN18IOMemoryDescriptor10withRangesEP14IOVirtualRangem11IODirectionP4taskb -__ZN18IOMemoryDescriptor10writeBytesEmPKvm -__ZN18IOMemoryDescriptor11makeMappingEPS_P4taskjmmm -__ZN18IOMemoryDescriptor11withAddressEPvm11IODirection -__ZN18IOMemoryDescriptor11withAddressEjm11IODirectionP4task -__ZN18IOMemoryDescriptor11withOptionsEPvmmP4taskmP8IOMapper -__ZN18IOMemoryDescriptor12setPurgeableEmPm -__ZN18IOMemoryDescriptor12withSubRangeEPS_mm11IODirection __ZN18IOMemoryDescriptor13removeMappingEP11IOMemoryMap -__ZN18IOMemoryDescriptor15initWithOptionsEPvmmP4taskmP8IOMapper -__ZN18IOMemoryDescriptor16getSourceSegmentEmPm -__ZN18IOMemoryDescriptor16performOperationEmmm -__ZN18IOMemoryDescriptor17withAddressRangesEP14IOAddressRangemmP4task +__ZN18IOMemoryDescriptor16getPreparationIDEv __ZN18IOMemoryDescriptor18getPhysicalAddressEv -__ZN18IOMemoryDescriptor18withPhysicalRangesEP15IOPhysicalRangem11IODirectionb -__ZN18IOMemoryDescriptor19withPhysicalAddressEmm11IODirection -__ZN18IOMemoryDescriptor20getPhysicalSegment64EmPm -__ZN18IOMemoryDescriptor28_RESERVEDIOMemoryDescriptor6Ev -__ZN18IOMemoryDescriptor28_RESERVEDIOMemoryDescriptor7Ev -__ZN18IOMemoryDescriptor28_RESERVEDIOMemoryDescriptor8Ev -__ZN18IOMemoryDescriptor28_RESERVEDIOMemoryDescriptor9Ev -__ZN18IOMemoryDescriptor29_RESERVEDIOMemoryDescriptor10Ev -__ZN18IOMemoryDescriptor29_RESERVEDIOMemoryDescriptor11Ev -__ZN18IOMemoryDescriptor29_RESERVEDIOMemoryDescriptor12Ev -__ZN18IOMemoryDescriptor29_RESERVEDIOMemoryDescriptor13Ev -__ZN18IOMemoryDescriptor29_RESERVEDIOMemoryDescriptor14Ev -__ZN18IOMemoryDescriptor29_RESERVEDIOMemoryDescriptor15Ev __ZN18IOMemoryDescriptor30withPersistentMemoryDescriptorEPS_ -__ZN18IOMemoryDescriptor3mapEP4taskjmmm -__ZN18IOMemoryDescriptor3mapEm __ZN18IOMemoryDescriptor4freeEv -__ZN18IOMemoryDescriptor5doMapEP7_vm_mapPjmmm __ZN18IOMemoryDescriptor6getTagEv -__ZN18IOMemoryDescriptor6setTagEm -__ZN18IOMemoryDescriptor7doUnmapEP7_vm_mapjm __ZN18IOMemoryDescriptor8redirectEP4taskb __ZN18IOMemoryDescriptor9MetaClassC1Ev __ZN18IOMemoryDescriptor9MetaClassC2Ev __ZN18IOMemoryDescriptor9metaClassE -__ZN18IOMemoryDescriptor9readBytesEmPvm __ZN18IOMemoryDescriptorC1EPK11OSMetaClass __ZN18IOMemoryDescriptorC2EPK11OSMetaClass __ZN18IOMemoryDescriptorD0Ev @@ -980,8 +836,6 @@ __ZN18IORegistryIterator10enterEntryEv __ZN18IORegistryIterator10gMetaClassE __ZN18IORegistryIterator10iterateAllEv __ZN18IORegistryIterator10superClassE -__ZN18IORegistryIterator11iterateOverEP15IORegistryEntryPK15IORegistryPlanem -__ZN18IORegistryIterator11iterateOverEPK15IORegistryPlanem __ZN18IORegistryIterator13getNextObjectEv __ZN18IORegistryIterator15getCurrentEntryEv __ZN18IORegistryIterator17getNextObjectFlatEv @@ -1000,23 +854,11 @@ __ZN18IORegistryIteratorC2Ev __ZN18IORegistryIteratorD0Ev __ZN18IORegistryIteratorD2Ev __ZN18IOTimerEventSource10gMetaClassE -__ZN18IOTimerEventSource10setTimeoutE12UnsignedWide -__ZN18IOTimerEventSource10setTimeoutE13mach_timespec -__ZN18IOTimerEventSource10setTimeoutEmm __ZN18IOTimerEventSource10superClassE -__ZN18IOTimerEventSource10wakeAtTimeE12UnsignedWide -__ZN18IOTimerEventSource10wakeAtTimeE13mach_timespec -__ZN18IOTimerEventSource10wakeAtTimeEmm __ZN18IOTimerEventSource11setWorkLoopEP10IOWorkLoop __ZN18IOTimerEventSource12checkForWorkEv -__ZN18IOTimerEventSource12setTimeoutMSEm -__ZN18IOTimerEventSource12setTimeoutUSEm -__ZN18IOTimerEventSource12wakeAtTimeMSEm -__ZN18IOTimerEventSource12wakeAtTimeUSEm __ZN18IOTimerEventSource13cancelTimeoutEv __ZN18IOTimerEventSource14setTimeoutFuncEv -__ZN18IOTimerEventSource15setTimeoutTicksEm -__ZN18IOTimerEventSource15wakeAtTimeTicksEm __ZN18IOTimerEventSource16timerEventSourceEP8OSObjectPFvS1_PS_E __ZN18IOTimerEventSource28_RESERVEDIOTimerEventSource0Ev __ZN18IOTimerEventSource28_RESERVEDIOTimerEventSource1Ev @@ -1088,26 +930,8 @@ __ZN19IOPMPowerSourceListC2EPK11OSMetaClass __ZN19IOPMPowerSourceListC2Ev __ZN19IOPMPowerSourceListD0Ev __ZN19IOPMPowerSourceListD2Ev -__ZN19IOPMPowerStateQueue10gMetaClassE -__ZN19IOPMPowerStateQueue10superClassE -__ZN19IOPMPowerStateQueue12checkForWorkEv -__ZN19IOPMPowerStateQueue14unIdleOccurredEP9IOServicem -__ZN19IOPMPowerStateQueue17PMPowerStateQueueEP8OSObject -__ZN19IOPMPowerStateQueue4initEP8OSObjectPFvS1_zE -__ZN19IOPMPowerStateQueue9MetaClassC1Ev -__ZN19IOPMPowerStateQueue9MetaClassC2Ev -__ZN19IOPMPowerStateQueue9metaClassE -__ZN19IOPMPowerStateQueueC1EPK11OSMetaClass -__ZN19IOPMPowerStateQueueC1Ev -__ZN19IOPMPowerStateQueueC2EPK11OSMetaClass -__ZN19IOPMPowerStateQueueC2Ev -__ZN19IOPMPowerStateQueueD0Ev -__ZN19IOPMPowerStateQueueD2Ev __ZN20IOLittleMemoryCursor10gMetaClassE __ZN20IOLittleMemoryCursor10superClassE -__ZN20IOLittleMemoryCursor13outputSegmentEN14IOMemoryCursor15PhysicalSegmentEPvm -__ZN20IOLittleMemoryCursor17withSpecificationEmmm -__ZN20IOLittleMemoryCursor21initWithSpecificationEmmm __ZN20IOLittleMemoryCursor9MetaClassC1Ev __ZN20IOLittleMemoryCursor9MetaClassC2Ev __ZN20IOLittleMemoryCursor9metaClassE @@ -1120,8 +944,6 @@ __ZN20IOLittleMemoryCursorD2Ev __ZN20RootDomainUserClient10gMetaClassE __ZN20RootDomainUserClient10superClassE __ZN20RootDomainUserClient11clientCloseEv -__ZN20RootDomainUserClient15setPreventativeEmm -__ZN20RootDomainUserClient26getTargetAndMethodForIndexEPP9IOServicem __ZN20RootDomainUserClient5startEP9IOService __ZN20RootDomainUserClient9MetaClassC1Ev __ZN20RootDomainUserClient9MetaClassC2Ev @@ -1133,19 +955,13 @@ __ZN20RootDomainUserClientC2Ev __ZN20RootDomainUserClientD0Ev __ZN20RootDomainUserClientD2Ev __ZN21IOInterruptController10gMetaClassE -__ZN21IOInterruptController10initVectorElP17IOInterruptVector __ZN21IOInterruptController10superClassE -__ZN21IOInterruptController11causeVectorElP17IOInterruptVector -__ZN21IOInterruptController12enableVectorElP17IOInterruptVector -__ZN21IOInterruptController13getVectorTypeElP17IOInterruptVector __ZN21IOInterruptController14causeInterruptEP9IOServicei __ZN21IOInterruptController15enableInterruptEP9IOServicei __ZN21IOInterruptController15handleInterruptEPvP9IOServicei __ZN21IOInterruptController16disableInterruptEP9IOServicei __ZN21IOInterruptController16getInterruptTypeEP9IOServiceiPi -__ZN21IOInterruptController17disableVectorHardElP17IOInterruptVector __ZN21IOInterruptController17registerInterruptEP9IOServiceiPvPFvS2_S2_S2_iES2_ -__ZN21IOInterruptController17vectorCanBeSharedElP17IOInterruptVector __ZN21IOInterruptController19unregisterInterruptEP9IOServicei __ZN21IOInterruptController26getInterruptHandlerAddressEv __ZN21IOInterruptController31_RESERVEDIOInterruptController0Ev @@ -1163,9 +979,6 @@ __ZN21IOInterruptControllerD0Ev __ZN21IOInterruptControllerD2Ev __ZN21IONaturalMemoryCursor10gMetaClassE __ZN21IONaturalMemoryCursor10superClassE -__ZN21IONaturalMemoryCursor13outputSegmentEN14IOMemoryCursor15PhysicalSegmentEPvm -__ZN21IONaturalMemoryCursor17withSpecificationEmmm -__ZN21IONaturalMemoryCursor21initWithSpecificationEmmm __ZN21IONaturalMemoryCursor9MetaClassC1Ev __ZN21IONaturalMemoryCursor9MetaClassC2Ev __ZN21IONaturalMemoryCursor9metaClassE @@ -1177,29 +990,12 @@ __ZN21IONaturalMemoryCursorD0Ev __ZN21IONaturalMemoryCursorD2Ev __ZN21IOSubMemoryDescriptor10gMetaClassE __ZN21IOSubMemoryDescriptor10superClassE -__ZN21IOSubMemoryDescriptor10writeBytesEmPKvm -__ZN21IOSubMemoryDescriptor11makeMappingEP18IOMemoryDescriptorP4taskjmmm -__ZN21IOSubMemoryDescriptor12initSubRangeEP18IOMemoryDescriptormm11IODirection -__ZN21IOSubMemoryDescriptor12setPurgeableEmPm -__ZN21IOSubMemoryDescriptor14initWithRangesEP14IOVirtualRangem11IODirectionP4taskb -__ZN21IOSubMemoryDescriptor15initWithAddressEPvm11IODirection -__ZN21IOSubMemoryDescriptor15initWithAddressEjm11IODirectionP4task -__ZN21IOSubMemoryDescriptor16getSourceSegmentEmPm -__ZN21IOSubMemoryDescriptor16performOperationEmmm -__ZN21IOSubMemoryDescriptor17getVirtualSegmentEmPm -__ZN21IOSubMemoryDescriptor18getPhysicalSegmentEmPm -__ZN21IOSubMemoryDescriptor20getPhysicalSegment64EmPm -__ZN21IOSubMemoryDescriptor22initWithPhysicalRangesEP15IOPhysicalRangem11IODirectionb -__ZN21IOSubMemoryDescriptor23initWithPhysicalAddressEmm11IODirection +__ZN21IOSubMemoryDescriptor16getPreparationIDEv __ZN21IOSubMemoryDescriptor4freeEv -__ZN21IOSubMemoryDescriptor5doMapEP7_vm_mapPjmmm -__ZN21IOSubMemoryDescriptor7prepareE11IODirection -__ZN21IOSubMemoryDescriptor8completeE11IODirection __ZN21IOSubMemoryDescriptor8redirectEP4taskb __ZN21IOSubMemoryDescriptor9MetaClassC1Ev __ZN21IOSubMemoryDescriptor9MetaClassC2Ev __ZN21IOSubMemoryDescriptor9metaClassE -__ZN21IOSubMemoryDescriptor9readBytesEmPvm __ZN21IOSubMemoryDescriptorC1EPK11OSMetaClass __ZN21IOSubMemoryDescriptorC1Ev __ZN21IOSubMemoryDescriptorC2EPK11OSMetaClass @@ -1252,24 +1048,10 @@ __ZN22_IOOpenServiceIteratorD0Ev __ZN22_IOOpenServiceIteratorD2Ev __ZN23IOMultiMemoryDescriptor10gMetaClassE __ZN23IOMultiMemoryDescriptor10superClassE -__ZN23IOMultiMemoryDescriptor10writeBytesEmPKvm -__ZN23IOMultiMemoryDescriptor14initWithRangesEP14IOVirtualRangem11IODirectionP4taskb -__ZN23IOMultiMemoryDescriptor15initWithAddressEPvm11IODirection -__ZN23IOMultiMemoryDescriptor15initWithAddressEjm11IODirectionP4task -__ZN23IOMultiMemoryDescriptor15withDescriptorsEPP18IOMemoryDescriptorm11IODirectionb -__ZN23IOMultiMemoryDescriptor16getSourceSegmentEmPm -__ZN23IOMultiMemoryDescriptor17getVirtualSegmentEmPm -__ZN23IOMultiMemoryDescriptor18getPhysicalSegmentEmPm -__ZN23IOMultiMemoryDescriptor19initWithDescriptorsEPP18IOMemoryDescriptorm11IODirectionb -__ZN23IOMultiMemoryDescriptor22initWithPhysicalRangesEP15IOPhysicalRangem11IODirectionb -__ZN23IOMultiMemoryDescriptor23initWithPhysicalAddressEmm11IODirection __ZN23IOMultiMemoryDescriptor4freeEv -__ZN23IOMultiMemoryDescriptor7prepareE11IODirection -__ZN23IOMultiMemoryDescriptor8completeE11IODirection __ZN23IOMultiMemoryDescriptor9MetaClassC1Ev __ZN23IOMultiMemoryDescriptor9MetaClassC2Ev __ZN23IOMultiMemoryDescriptor9metaClassE -__ZN23IOMultiMemoryDescriptor9readBytesEmPvm __ZN23IOMultiMemoryDescriptorC1EPK11OSMetaClass __ZN23IOMultiMemoryDescriptorC1Ev __ZN23IOMultiMemoryDescriptorC2EPK11OSMetaClass @@ -1278,41 +1060,11 @@ __ZN23IOMultiMemoryDescriptorD0Ev __ZN23IOMultiMemoryDescriptorD2Ev __ZN24IOBufferMemoryDescriptor10gMetaClassE __ZN24IOBufferMemoryDescriptor10superClassE -__ZN24IOBufferMemoryDescriptor11appendBytesEPKvj -__ZN24IOBufferMemoryDescriptor11withOptionsEmjj -__ZN24IOBufferMemoryDescriptor12setDirectionE11IODirection -__ZN24IOBufferMemoryDescriptor12withCapacityEj11IODirectionb -__ZN24IOBufferMemoryDescriptor13initWithBytesEPKvj11IODirectionb -__ZN24IOBufferMemoryDescriptor14getBytesNoCopyEjj __ZN24IOBufferMemoryDescriptor14getBytesNoCopyEv -__ZN24IOBufferMemoryDescriptor14initWithRangesEP14IOVirtualRangem11IODirectionP4taskb -__ZN24IOBufferMemoryDescriptor15initWithAddressEPvm11IODirection -__ZN24IOBufferMemoryDescriptor15initWithAddressEjm11IODirectionP4task -__ZN24IOBufferMemoryDescriptor15initWithOptionsEmjj -__ZN24IOBufferMemoryDescriptor15initWithOptionsEmjjP4task -__ZN24IOBufferMemoryDescriptor17inTaskWithOptionsEP4taskmjj -__ZN24IOBufferMemoryDescriptor22initWithPhysicalRangesEP15IOPhysicalRangem11IODirectionb -__ZN24IOBufferMemoryDescriptor23initWithPhysicalAddressEmm11IODirection -__ZN24IOBufferMemoryDescriptor34_RESERVEDIOBufferMemoryDescriptor2Ev -__ZN24IOBufferMemoryDescriptor34_RESERVEDIOBufferMemoryDescriptor3Ev -__ZN24IOBufferMemoryDescriptor34_RESERVEDIOBufferMemoryDescriptor4Ev -__ZN24IOBufferMemoryDescriptor34_RESERVEDIOBufferMemoryDescriptor5Ev -__ZN24IOBufferMemoryDescriptor34_RESERVEDIOBufferMemoryDescriptor6Ev -__ZN24IOBufferMemoryDescriptor34_RESERVEDIOBufferMemoryDescriptor7Ev -__ZN24IOBufferMemoryDescriptor34_RESERVEDIOBufferMemoryDescriptor8Ev -__ZN24IOBufferMemoryDescriptor34_RESERVEDIOBufferMemoryDescriptor9Ev -__ZN24IOBufferMemoryDescriptor35_RESERVEDIOBufferMemoryDescriptor10Ev -__ZN24IOBufferMemoryDescriptor35_RESERVEDIOBufferMemoryDescriptor11Ev -__ZN24IOBufferMemoryDescriptor35_RESERVEDIOBufferMemoryDescriptor12Ev -__ZN24IOBufferMemoryDescriptor35_RESERVEDIOBufferMemoryDescriptor13Ev -__ZN24IOBufferMemoryDescriptor35_RESERVEDIOBufferMemoryDescriptor14Ev -__ZN24IOBufferMemoryDescriptor35_RESERVEDIOBufferMemoryDescriptor15Ev __ZN24IOBufferMemoryDescriptor4freeEv __ZN24IOBufferMemoryDescriptor9MetaClassC1Ev __ZN24IOBufferMemoryDescriptor9MetaClassC2Ev __ZN24IOBufferMemoryDescriptor9metaClassE -__ZN24IOBufferMemoryDescriptor9setLengthEj -__ZN24IOBufferMemoryDescriptor9withBytesEPKvj11IODirectionb __ZN24IOBufferMemoryDescriptorC1EPK11OSMetaClass __ZN24IOBufferMemoryDescriptorC1Ev __ZN24IOBufferMemoryDescriptorC2EPK11OSMetaClass @@ -1321,25 +1073,8 @@ __ZN24IOBufferMemoryDescriptorD0Ev __ZN24IOBufferMemoryDescriptorD2Ev __ZN25IOGeneralMemoryDescriptor10gMetaClassE __ZN25IOGeneralMemoryDescriptor10superClassE -__ZN25IOGeneralMemoryDescriptor11setPositionEm -__ZN25IOGeneralMemoryDescriptor11wireVirtualE11IODirection -__ZN25IOGeneralMemoryDescriptor13mapIntoKernelEj -__ZN25IOGeneralMemoryDescriptor14initWithRangesEP14IOVirtualRangem11IODirectionP4taskb -__ZN25IOGeneralMemoryDescriptor15initWithAddressEPvm11IODirection -__ZN25IOGeneralMemoryDescriptor15initWithAddressEjm11IODirectionP4task -__ZN25IOGeneralMemoryDescriptor15initWithOptionsEPvmmP4taskmP8IOMapper -__ZN25IOGeneralMemoryDescriptor15unmapFromKernelEv -__ZN25IOGeneralMemoryDescriptor16getSourceSegmentEmPm -__ZN25IOGeneralMemoryDescriptor17getVirtualSegmentEmPm -__ZN25IOGeneralMemoryDescriptor18getPhysicalSegmentEmPm -__ZN25IOGeneralMemoryDescriptor20getPhysicalSegment64EmPm -__ZN25IOGeneralMemoryDescriptor22initWithPhysicalRangesEP15IOPhysicalRangem11IODirectionb -__ZN25IOGeneralMemoryDescriptor23initWithPhysicalAddressEmm11IODirection +__ZN25IOGeneralMemoryDescriptor16getPreparationIDEv __ZN25IOGeneralMemoryDescriptor4freeEv -__ZN25IOGeneralMemoryDescriptor5doMapEP7_vm_mapPjmmm -__ZN25IOGeneralMemoryDescriptor7doUnmapEP7_vm_mapjm -__ZN25IOGeneralMemoryDescriptor7prepareE11IODirection -__ZN25IOGeneralMemoryDescriptor8completeE11IODirection __ZN25IOGeneralMemoryDescriptor9MetaClassC1Ev __ZN25IOGeneralMemoryDescriptor9MetaClassC2Ev __ZN25IOGeneralMemoryDescriptor9metaClassE @@ -1416,22 +1151,7 @@ __ZN28IOFilterInterruptEventSourceD0Ev __ZN28IOFilterInterruptEventSourceD2Ev __ZN29IOInterleavedMemoryDescriptor10gMetaClassE __ZN29IOInterleavedMemoryDescriptor10superClassE -__ZN29IOInterleavedMemoryDescriptor12withCapacityEm11IODirection -__ZN29IOInterleavedMemoryDescriptor14initWithRangesEP14IOVirtualRangem11IODirectionP4taskb -__ZN29IOInterleavedMemoryDescriptor15initWithAddressEPvm11IODirection -__ZN29IOInterleavedMemoryDescriptor15initWithAddressEjm11IODirectionP4task -__ZN29IOInterleavedMemoryDescriptor16getSourceSegmentEmPm -__ZN29IOInterleavedMemoryDescriptor16initWithCapacityEm11IODirection -__ZN29IOInterleavedMemoryDescriptor17getVirtualSegmentEmPm -__ZN29IOInterleavedMemoryDescriptor18getPhysicalSegmentEmPm -__ZN29IOInterleavedMemoryDescriptor19setMemoryDescriptorEP18IOMemoryDescriptormm -__ZN29IOInterleavedMemoryDescriptor20getPhysicalSegment64EmPm -__ZN29IOInterleavedMemoryDescriptor22clearMemoryDescriptorsE11IODirection -__ZN29IOInterleavedMemoryDescriptor22initWithPhysicalRangesEP15IOPhysicalRangem11IODirectionb -__ZN29IOInterleavedMemoryDescriptor23initWithPhysicalAddressEmm11IODirection __ZN29IOInterleavedMemoryDescriptor4freeEv -__ZN29IOInterleavedMemoryDescriptor7prepareE11IODirection -__ZN29IOInterleavedMemoryDescriptor8completeE11IODirection __ZN29IOInterleavedMemoryDescriptor9MetaClassC1Ev __ZN29IOInterleavedMemoryDescriptor9MetaClassC2Ev __ZN29IOInterleavedMemoryDescriptor9metaClassE @@ -1441,16 +1161,9 @@ __ZN29IOInterleavedMemoryDescriptorC2EPK11OSMetaClass __ZN29IOInterleavedMemoryDescriptorC2Ev __ZN29IOInterleavedMemoryDescriptorD0Ev __ZN29IOInterleavedMemoryDescriptorD2Ev -__ZN8IOMapper10allocTableEm __ZN8IOMapper10gMetaClassE -__ZN8IOMapper10iovmInsertEjmP13upl_page_infom -__ZN8IOMapper10iovmInsertEjmPjm __ZN8IOMapper10superClassE -__ZN8IOMapper11NewARTTableEmPPvPj -__ZN8IOMapper12FreeARTTableEP6OSDatam __ZN8IOMapper17setMapperRequiredEb -__ZN8IOMapper18_RESERVEDIOMapper1Ev -__ZN8IOMapper18_RESERVEDIOMapper2Ev __ZN8IOMapper18_RESERVEDIOMapper3Ev __ZN8IOMapper18_RESERVEDIOMapper4Ev __ZN8IOMapper18_RESERVEDIOMapper5Ev @@ -1464,6 +1177,7 @@ __ZN8IOMapper19_RESERVEDIOMapper12Ev __ZN8IOMapper19_RESERVEDIOMapper13Ev __ZN8IOMapper19_RESERVEDIOMapper14Ev __ZN8IOMapper19_RESERVEDIOMapper15Ev +__ZN8IOMapper19copyMapperForDeviceEP9IOService __ZN8IOMapper19waitForSystemMapperEv __ZN8IOMapper4freeEv __ZN8IOMapper5startEP9IOService @@ -1475,17 +1189,6 @@ __ZN8IOMapperC1EPK11OSMetaClass __ZN8IOMapperC2EPK11OSMetaClass __ZN8IOMapperD0Ev __ZN8IOMapperD2Ev -__ZN8IOPMprot10gMetaClassE -__ZN8IOPMprot10superClassE -__ZN8IOPMprot9MetaClassC1Ev -__ZN8IOPMprot9MetaClassC2Ev -__ZN8IOPMprot9metaClassE -__ZN8IOPMprotC1EPK11OSMetaClass -__ZN8IOPMprotC1Ev -__ZN8IOPMprotC2EPK11OSMetaClass -__ZN8IOPMprotC2Ev -__ZN8IOPMprotD0Ev -__ZN8IOPMprotD2Ev __ZN8IOSyncer10gMetaClassE __ZN8IOSyncer10superClassE __ZN8IOSyncer13privateSignalEv @@ -1514,60 +1217,35 @@ __ZN9IOCommandC1EPK11OSMetaClass __ZN9IOCommandC2EPK11OSMetaClass __ZN9IOCommandD0Ev __ZN9IOCommandD2Ev -__ZN9IOService10actionStopEPS_S0_ -__ZN9IOService10adjustBusyEl __ZN9IOService10gMetaClassE -__ZN9IOService10handleOpenEPS_mPv __ZN9IOService10initializeEv __ZN9IOService10joinPMtreeEPS_ __ZN9IOService10makeUsableEv __ZN9IOService10superClassE -__ZN9IOService10systemWakeEv -__ZN9IOService10youAreRootEv -__ZN9IOService11_adjustBusyEl __ZN9IOService11addLocationEP12OSDictionary __ZN9IOService11getPlatformEv -__ZN9IOService11handleCloseEPS_m __ZN9IOService11setPlatformEP16IOPlatformExpert -__ZN9IOService11tellClientsEi -__ZN9IOService12clampPowerOnEm -__ZN9IOService12didTerminateEPS_mPb __ZN9IOService12getBusyStateEv __ZN9IOService12getResourcesEv __ZN9IOService12nameMatchingEPK8OSStringP12OSDictionary __ZN9IOService12nameMatchingEPKcP12OSDictionary __ZN9IOService12passiveMatchEP12OSDictionaryb -__ZN9IOService12requestProbeEm -__ZN9IOService12scheduleStopEPS_ __ZN9IOService12tellChangeUpEm -__ZN9IOService12waitForStateEmmP13mach_timespec __ZN9IOService13addPowerChildEPS_ __ZN9IOService13askChangeDownEm __ZN9IOService13checkResourceEP8OSObject -__ZN9IOService13getPMworkloopEv +__ZN9IOService13getPowerStateEv __ZN9IOService13invokeNotiferEP18_IOServiceNotifier __ZN9IOService13matchLocationEPS_ -__ZN9IOService13messageClientEmP8OSObjectPvj -__ZN9IOService13newUserClientEP4taskPvmP12OSDictionaryPP12IOUserClient -__ZN9IOService13newUserClientEP4taskPvmPP12IOUserClient __ZN9IOService13setPowerStateEmPS_ -__ZN9IOService13startMatchingEm -__ZN9IOService13waitMatchIdleEm -__ZN9IOService13willTerminateEPS_m -__ZN9IOService14actionFinalizeEPS_m __ZN9IOService14activityTickleEmm __ZN9IOService14applyToClientsEPFvPS_PvES1_ __ZN9IOService14causeInterruptEi __ZN9IOService14checkResourcesEv -__ZN9IOService14doServiceMatchEm __ZN9IOService14getServiceRootEv -__ZN9IOService14messageClientsEmPvj -__ZN9IOService14newTemperatureElPS_ -__ZN9IOService14setPowerParentEP17IOPowerConnectionbm __ZN9IOService14startCandidateEPS_ __ZN9IOService14tellChangeDownEm __ZN9IOService14waitForServiceEP12OSDictionaryP13mach_timespec -__ZN9IOService15addNotificationEPK8OSSymbolP12OSDictionaryPFbPvS5_PS_ES5_S5_l __ZN9IOService15comparePropertyEP12OSDictionaryPK8OSString __ZN9IOService15comparePropertyEP12OSDictionaryPKc __ZN9IOService15enableInterruptEi @@ -1575,40 +1253,24 @@ __ZN9IOService15errnoFromReturnEi __ZN9IOService15getDeviceMemoryEv __ZN9IOService15getPMRootDomainEv __ZN9IOService15lookupInterruptEibPP21IOInterruptController -__ZN9IOService15nextIdleTimeoutE12UnsignedWideS0_j __ZN9IOService15powerChangeDoneEm __ZN9IOService15probeCandidatesEP12OSOrderedSet -__ZN9IOService16propertyMatchingEPK8OSSymbolPK8OSObjectP12OSDictionary __ZN9IOService15publishResourceEPK8OSSymbolP8OSObject __ZN9IOService15publishResourceEPKcP8OSObject -__ZN9IOService15registerServiceEm __ZN9IOService15serviceMatchingEPK8OSStringP12OSDictionary __ZN9IOService15serviceMatchingEPKcP12OSDictionary __ZN9IOService15setDeviceMemoryEP7OSArray -__ZN9IOService15setNotificationEPK8OSSymbolP12OSDictionaryPFbPvS5_PS_ES5_S5_l __ZN9IOService15setPMRootDomainEP14IOPMrootDomain -__ZN9IOService15tellChangeDown1Em -__ZN9IOService15tellChangeDown2Em -__ZN9IOService15terminateClientEPS_m -__ZN9IOService15terminatePhase1Em -__ZN9IOService15terminateThreadEPv -__ZN9IOService15terminateWorkerEm -__ZN9IOService16ack_timer_tickedEv __ZN9IOService16allowPowerChangeEm __ZN9IOService16applyToProvidersEPFvPS_PvES1_ -__ZN9IOService16command_receivedEPvS0_S0_S0_ -__ZN9IOService16didYouWakeSystemEv __ZN9IOService16disableInterruptEi __ZN9IOService16getCPUSnoopDelayEv __ZN9IOService16getInterruptTypeEiPi -__ZN9IOService16registerInterestEPK8OSSymbolPFiPvS3_mPS_S3_jES3_S3_ +__ZN9IOService16propertyMatchingEPK8OSSymbolPK8OSObjectP12OSDictionary __ZN9IOService16removePowerChildEP17IOPowerConnection -__ZN9IOService16requestTerminateEPS_m __ZN9IOService16resolveInterruptEPS_i __ZN9IOService16resourceMatchingEPK8OSStringP12OSDictionary __ZN9IOService16resourceMatchingEPKcP12OSDictionary -__ZN9IOService16scheduleFinalizeEv -__ZN9IOService16setCPUSnoopDelayEm __ZN9IOService16stringFromReturnEi __ZN9IOService16tellNoChangeDownEm __ZN9IOService17addNeededResourceEPKc @@ -1618,30 +1280,18 @@ __ZN9IOService17catalogNewDriversEP12OSOrderedSet __ZN9IOService17comparePropertiesEP12OSDictionaryP12OSCollection __ZN9IOService17currentCapabilityEv __ZN9IOService17getAggressivenessEmPm -__ZN9IOService13getPowerStateEv __ZN9IOService17registerInterruptEiP8OSObjectPFvS1_PvPS_iES2_ __ZN9IOService17setAggressivenessEmm -__ZN9IOService18actionDidTerminateEPS_m __ZN9IOService18changePowerStateToEm -__ZN9IOService18doServiceTerminateEm __ZN9IOService18getResourceServiceEv __ZN9IOService18lockForArbitrationEb __ZN9IOService18matchPropertyTableEP12OSDictionary -__ZN9IOService18matchPropertyTableEP12OSDictionaryPl -__ZN9IOService18requireMaxBusStallEm __ZN9IOService18setIdleTimerPeriodEm -__ZN9IOService18settleTimerExpiredEv -__ZN9IOService18systemWillShutdownEm -__ZN9IOService19_RESERVEDIOService5Ev __ZN9IOService19_RESERVEDIOService6Ev __ZN9IOService19_RESERVEDIOService7Ev __ZN9IOService19_RESERVEDIOService8Ev __ZN9IOService19_RESERVEDIOService9Ev -__ZN9IOService19actionWillTerminateEPS_mP7OSArray -__ZN9IOService19deliverNotificationEPK8OSSymbolmm -__ZN9IOService19getExistingServicesEP12OSDictionarymm __ZN9IOService19getMatchingServicesEP12OSDictionary -__ZN9IOService19installNotificationEPK8OSSymbolP12OSDictionaryPFbPvS5_PS_ES5_S5_lPP10OSIterator __ZN9IOService19powerOverrideOnPrivEv __ZN9IOService19registerPowerDriverEPS_P14IOPMPowerStatem __ZN9IOService19start_PM_idle_timerEv @@ -1689,54 +1339,39 @@ __ZN9IOService20callPlatformFunctionEPKcbPvS2_S2_S2_ __ZN9IOService20getDeviceMemoryCountEv __ZN9IOService20powerOverrideOffPrivEv __ZN9IOService20unlockForArbitrationEv -__ZN9IOService21doInstallNotificationEPK8OSSymbolP12OSDictionaryPFbPvS5_PS_ES5_S5_lPP10OSIterator __ZN9IOService21getClientWithCategoryEPK8OSSymbol __ZN9IOService21powerStateDidChangeToEmmPS_ __ZN9IOService21temporaryPowerClampOnEv __ZN9IOService21unregisterAllInterestEv -__ZN9IOService22PM_Clamp_Timer_ExpiredEv __ZN9IOService22acknowledgePowerChangeEPS_ __ZN9IOService22changePowerStateToPrivEm -__ZN9IOService22powerDomainDidChangeToEmP17IOPowerConnection +__ZN9IOService22copyClientWithCategoryEPK8OSSymbol __ZN9IOService22powerStateWillChangeToEmmPS_ -__ZN9IOService23acknowledgeNotificationEPvm +__ZN9IOService22waitForMatchingServiceEP12OSDictionaryy __ZN9IOService23currentPowerConsumptionEv -__ZN9IOService23powerDomainWillChangeToEmP17IOPowerConnection +__ZN9IOService23registryEntryIDMatchingEyP12OSDictionary __ZN9IOService23requestPowerDomainStateEmP17IOPowerConnectionm -__ZN9IOService23scheduleTerminatePhase2Em -__ZN9IOService23syncNotificationHandlerEPvS0_PS_ -__ZN9IOService23tellClientsWithResponseEi -__ZN9IOService24PM_idle_timer_expirationEv __ZN9IOService24acknowledgeSetPowerStateEv __ZN9IOService24getDeviceMemoryWithIndexEj -__ZN9IOService24mapDeviceMemoryWithIndexEjm __ZN9IOService24powerStateForDomainStateEm __ZN9IOService24registerInterestedDriverEPS_ +__ZN9IOService24requireMaxInterruptDelayEj __ZN9IOService26deRegisterInterestedDriverEPS_ -__ZN9IOService26temperatureCriticalForZoneEPS_ __ZN9IOService27maxCapabilityForDomainStateEm -__ZN9IOService27serializedAllowPowerChange2Em -__ZN9IOService28serializedCancelPowerChange2Em __ZN9IOService31initialPowerStateForDomainStateEm __ZN9IOService4freeEv -__ZN9IOService4openEPS_mPv __ZN9IOService4stopEPS_ -__ZN9IOService5closeEPS_m -__ZN9IOService5probeEPS_Pl __ZN9IOService5startEPS_ -__ZN9IOService6PMfreeEv __ZN9IOService6PMinitEv __ZN9IOService6PMstopEv __ZN9IOService6attachEPS_ __ZN9IOService6detachEPS_ -__ZN9IOService7messageEmPS_Pv -__ZN9IOService8finalizeEm __ZN9IOService9MetaClassC1Ev __ZN9IOService9MetaClassC2Ev __ZN9IOService9metaClassE -__ZN9IOService9resourcesEv -__ZN9IOService9terminateEm +__ZN9IOService9resourcesEv : __ZN9IOService18getResourceServiceEv __ZN9IOService9waitQuietEP13mach_timespec +__ZN9IOService9waitQuietEy __ZN9IOServiceC1EPK11OSMetaClass __ZN9IOServiceC1Ev __ZN9IOServiceC2EPK11OSMetaClass @@ -1758,7 +1393,6 @@ __ZNK10IOWorkLoop9MetaClass5allocEv __ZNK10IOWorkLoop9getThreadEv __ZNK11IOCatalogue12getMetaClassEv __ZNK11IOCatalogue12unloadModuleEP8OSString -__ZNK11IOCatalogue13serializeDataEmP11OSSerialize __ZNK11IOCatalogue14isModuleLoadedEP12OSDictionary __ZNK11IOCatalogue14isModuleLoadedEP8OSString __ZNK11IOCatalogue14isModuleLoadedEPKc @@ -1794,8 +1428,6 @@ __ZNK13IOEventSource9getActionEv __ZNK13IOEventSource9isEnabledEv __ZNK13_IOServiceJob12getMetaClassEv __ZNK13_IOServiceJob9MetaClass5allocEv -__ZNK14IOCommandQueue12getMetaClassEv -__ZNK14IOCommandQueue9MetaClass5allocEv __ZNK14IOMemoryCursor12getMetaClassEv __ZNK14IOMemoryCursor9MetaClass5allocEv __ZNK14IOPMrootDomain12getMetaClassEv @@ -1811,19 +1443,13 @@ __ZNK15IOPMPowerSource9MetaClass5allocEv __ZNK15IORegistryEntry11compareNameEP8OSStringPS1_ __ZNK15IORegistryEntry11getLocationEPK15IORegistryPlane __ZNK15IORegistryEntry11getPropertyEPK8OSString -__ZNK15IORegistryEntry11getPropertyEPK8OSStringPK15IORegistryPlanem __ZNK15IORegistryEntry11getPropertyEPK8OSSymbol -__ZNK15IORegistryEntry11getPropertyEPK8OSSymbolPK15IORegistryPlanem __ZNK15IORegistryEntry11getPropertyEPKc -__ZNK15IORegistryEntry11getPropertyEPKcPK15IORegistryPlanem __ZNK15IORegistryEntry12compareNamesEP8OSObjectPP8OSString __ZNK15IORegistryEntry12copyLocationEPK15IORegistryPlane __ZNK15IORegistryEntry12copyPropertyEPK8OSString -__ZNK15IORegistryEntry12copyPropertyEPK8OSStringPK15IORegistryPlanem __ZNK15IORegistryEntry12copyPropertyEPK8OSSymbol -__ZNK15IORegistryEntry12copyPropertyEPK8OSSymbolPK15IORegistryPlanem __ZNK15IORegistryEntry12copyPropertyEPKc -__ZNK15IORegistryEntry12copyPropertyEPKcPK15IORegistryPlanem __ZNK15IORegistryEntry12getMetaClassEv __ZNK15IORegistryEntry13getChildEntryEPK15IORegistryPlane __ZNK15IORegistryEntry14applyToParentsEPFvPS_PvES1_PK15IORegistryPlane @@ -1853,8 +1479,6 @@ __ZNK15IORegistryEntry9breakLinkEPS_jPK15IORegistryPlane __ZNK15IORegistryPlane12getMetaClassEv __ZNK15IORegistryPlane9MetaClass5allocEv __ZNK15IORegistryPlane9serializeEP11OSSerialize -__ZNK15_IOConfigThread12getMetaClassEv -__ZNK15_IOConfigThread9MetaClass5allocEv __ZNK16IODMAEventSource12getMetaClassEv __ZNK16IODMAEventSource9MetaClass5allocEv __ZNK16IOKitDiagnostics12getMetaClassEv @@ -1873,7 +1497,6 @@ __ZNK17IOSharedDataQueue12getMetaClassEv __ZNK17IOSharedDataQueue9MetaClass5allocEv __ZNK18IOMemoryDescriptor12getDirectionEv __ZNK18IOMemoryDescriptor12getMetaClassEv -__ZNK18IOMemoryDescriptor19dmaCommandOperationEmPvj __ZNK18IOMemoryDescriptor9MetaClass5allocEv __ZNK18IOMemoryDescriptor9getLengthEv __ZNK18IORegistryIterator12getMetaClassEv @@ -1886,8 +1509,6 @@ __ZNK18_IOServiceNotifier12getMetaClassEv __ZNK18_IOServiceNotifier9MetaClass5allocEv __ZNK19IOPMPowerSourceList12getMetaClassEv __ZNK19IOPMPowerSourceList9MetaClass5allocEv -__ZNK19IOPMPowerStateQueue12getMetaClassEv -__ZNK19IOPMPowerStateQueue9MetaClass5allocEv __ZNK20IOLittleMemoryCursor12getMetaClassEv __ZNK20IOLittleMemoryCursor9MetaClass5allocEv __ZNK20RootDomainUserClient12getMetaClassEv @@ -1897,9 +1518,7 @@ __ZNK21IOInterruptController9MetaClass5allocEv __ZNK21IONaturalMemoryCursor12getMetaClassEv __ZNK21IONaturalMemoryCursor9MetaClass5allocEv __ZNK21IOSubMemoryDescriptor12getMetaClassEv -__ZNK21IOSubMemoryDescriptor19dmaCommandOperationEmPvj __ZNK21IOSubMemoryDescriptor9MetaClass5allocEv -__ZNK21IOSubMemoryDescriptor9serializeEP11OSSerialize __ZNK22IOInterruptEventSource11getIntIndexEv __ZNK22IOInterruptEventSource11getProviderEv __ZNK22IOInterruptEventSource12getMetaClassEv @@ -1913,7 +1532,6 @@ __ZNK24IOBufferMemoryDescriptor11getCapacityEv __ZNK24IOBufferMemoryDescriptor12getMetaClassEv __ZNK24IOBufferMemoryDescriptor9MetaClass5allocEv __ZNK25IOGeneralMemoryDescriptor12getMetaClassEv -__ZNK25IOGeneralMemoryDescriptor19dmaCommandOperationEmPvj __ZNK25IOGeneralMemoryDescriptor9MetaClass5allocEv __ZNK25IOGeneralMemoryDescriptor9serializeEP11OSSerialize __ZNK26_IOServiceInterestNotifier12getMetaClassEv @@ -1928,8 +1546,6 @@ __ZNK29IOInterleavedMemoryDescriptor9MetaClass5allocEv __ZNK8IOMapper12getMetaClassEv __ZNK8IOMapper13getBypassMaskEPy __ZNK8IOMapper9MetaClass5allocEv -__ZNK8IOPMprot12getMetaClassEv -__ZNK8IOPMprot9MetaClass5allocEv __ZNK8IOSyncer12getMetaClassEv __ZNK8IOSyncer9MetaClass5allocEv __ZNK9IOCommand12getMetaClassEv @@ -1963,7 +1579,6 @@ __ZTV13IOCommandGate __ZTV13IOCommandPool __ZTV13IOEventSource __ZTV13_IOServiceJob -__ZTV14IOCommandQueue __ZTV14IOMemoryCursor __ZTV14IOPMrootDomain __ZTV15IOConditionLock @@ -1971,7 +1586,6 @@ __ZTV15IODMAController __ZTV15IOPMPowerSource __ZTV15IORegistryEntry __ZTV15IORegistryPlane -__ZTV15_IOConfigThread __ZTV16IODMAEventSource __ZTV16IOKitDiagnostics __ZTV16IOPMinformeeList @@ -1986,7 +1600,6 @@ __ZTV18IOTimerEventSource __ZTV18IOUserNotification __ZTV18_IOServiceNotifier __ZTV19IOPMPowerSourceList -__ZTV19IOPMPowerStateQueue __ZTV20IOLittleMemoryCursor __ZTV20RootDomainUserClient __ZTV21IOInterruptController @@ -2002,7 +1615,6 @@ __ZTV27IOSharedInterruptController __ZTV28IOFilterInterruptEventSource __ZTV29IOInterleavedMemoryDescriptor __ZTV8IOMapper -__ZTV8IOPMprot __ZTV8IOSyncer __ZTV9IOCommand __ZTV9IOService @@ -2021,7 +1633,6 @@ __ZTVN13IOCommandGate9MetaClassE __ZTVN13IOCommandPool9MetaClassE __ZTVN13IOEventSource9MetaClassE __ZTVN13_IOServiceJob9MetaClassE -__ZTVN14IOCommandQueue9MetaClassE __ZTVN14IOMemoryCursor9MetaClassE __ZTVN14IOPMrootDomain9MetaClassE __ZTVN15IOConditionLock9MetaClassE @@ -2029,7 +1640,6 @@ __ZTVN15IODMAController9MetaClassE __ZTVN15IOPMPowerSource9MetaClassE __ZTVN15IORegistryEntry9MetaClassE __ZTVN15IORegistryPlane9MetaClassE -__ZTVN15_IOConfigThread9MetaClassE __ZTVN16IODMAEventSource9MetaClassE __ZTVN16IOKitDiagnostics9MetaClassE __ZTVN16IOPMinformeeList9MetaClassE @@ -2043,7 +1653,6 @@ __ZTVN18IOTimerEventSource9MetaClassE __ZTVN18IOUserNotification9MetaClassE __ZTVN18_IOServiceNotifier9MetaClassE __ZTVN19IOPMPowerSourceList9MetaClassE -__ZTVN19IOPMPowerStateQueue9MetaClassE __ZTVN20IOLittleMemoryCursor9MetaClassE __ZTVN20RootDomainUserClient9MetaClassE __ZTVN21IOInterruptController9MetaClassE @@ -2059,7 +1668,6 @@ __ZTVN27IOSharedInterruptController9MetaClassE __ZTVN28IOFilterInterruptEventSource9MetaClassE __ZTVN29IOInterleavedMemoryDescriptor9MetaClassE __ZTVN8IOMapper9MetaClassE -__ZTVN8IOPMprot9MetaClassE __ZTVN8IOSyncer9MetaClassE __ZTVN9IOCommand9MetaClassE __ZTVN9IOService9MetaClassE @@ -2068,7 +1676,6 @@ __giDebugLogInternal __giDebugReserved1 __giDebugReserved2 _acknowledgeSleepWakeNotification -_add_from_mkext_function _db_dumpiojunk _db_piokjunk _debug_container_malloc_size @@ -2107,9 +1714,7 @@ _gIOFirstPublishNotification _gIOGeneralInterest _gIOInterruptControllersKey _gIOInterruptSpecifiersKey -_gIOKLDLock _gIOKernelConfigTables -_gIOKernelKmods _gIOKitDebug _gIOKitDebugKey _gIOLocationKey @@ -2128,7 +1733,6 @@ _gIOPlatformQuiesceActionKey _gIOPlatformSleepActionKey _gIOPlatformWakeActionKey _gIOPowerPlane -_gIOPrelinkedModules _gIOPriorityPowerStateInterest _gIOProbeScoreKey _gIOPropertyMatchKey diff --git a/config/IOKit.i386.exports b/config/IOKit.i386.exports index a483c4173..068770db6 100644 --- a/config/IOKit.i386.exports +++ b/config/IOKit.i386.exports @@ -1,6 +1,346 @@ -__ZN24IOBufferMemoryDescriptor20initWithPhysicalMaskEP4taskmyyy -__ZN24IOBufferMemoryDescriptor22inTaskWithPhysicalMaskEP4taskmyy -__ZN18IOMemoryDescriptor16withAddressRangeEyymP4task -__ZN18IOMemoryDescriptor19createMappingInTaskEP4taskymyy +_IOLockUnlock_darwin10:_lck_mtx_unlock_darwin10 +_IOPanic +_PE_parse_boot_arg +__Z16IODTFindSlotNameP15IORegistryEntrym +__Z16IODTSetResolvingP15IORegistryEntryPFlmPmS1_EPFvS0_PhS4_S4_E +__Z17IODTGetCellCountsP15IORegistryEntryPmS1_ +__Z22IODTResolveAddressCellP15IORegistryEntryPmS1_S1_ +__Z23IODTFindMatchingEntriesP15IORegistryEntrymPKc +__ZN10IOWorkLoop19workLoopWithOptionsEm +__ZN10IOWorkLoop9sleepGateEPv12UnsignedWidem +__ZN10IOWorkLoop9sleepGateEPvm +__ZN11IOCatalogue11findDriversEP12OSDictionaryPl +__ZN11IOCatalogue11findDriversEP9IOServicePl +__ZN11IODataQueue11withEntriesEmm +__ZN11IODataQueue12withCapacityEm +__ZN11IODataQueue15initWithEntriesEmm +__ZN11IODataQueue16initWithCapacityEm +__ZN11IODataQueue7enqueueEPvm +__ZN11IOMemoryMap10getAddressEv +__ZN11IOMemoryMap18getPhysicalSegmentEmPm +__ZN11IOMemoryMap19setMemoryDescriptorEP18IOMemoryDescriptory +__ZN11IOMemoryMap7getSizeEv +__ZN11IOMemoryMap8redirectEP18IOMemoryDescriptormm +__ZN11IOMemoryMap8redirectEP18IOMemoryDescriptormy +__ZN12IODMACommand11OutputBig32EPS_NS_9Segment64EPvm +__ZN12IODMACommand11OutputBig64EPS_NS_9Segment64EPvm +__ZN12IODMACommand11synchronizeEm +__ZN12IODMACommand12OutputHost32EPS_NS_9Segment64EPvm +__ZN12IODMACommand12OutputHost64EPS_NS_9Segment64EPvm +__ZN12IODMACommand14OutputLittle32EPS_NS_9Segment64EPvm +__ZN12IODMACommand14OutputLittle64EPS_NS_9Segment64EPvm +__ZN12IODMACommand15genIOVMSegmentsEPFbPS_NS_9Segment64EPvmEPyS2_Pm +__ZN12IODMACommand15genIOVMSegmentsEPyPvPm +__ZN12IODMACommand17withSpecificationEPFbPS_NS_9Segment64EPvmEhyNS_14MappingOptionsEymP8IOMapperS2_ +__ZN12IODMACommand21initWithSpecificationEPFbPS_NS_9Segment64EPvmEhyNS_14MappingOptionsEymP8IOMapperS2_ +__ZN12IODMACommand24prepareWithSpecificationEPFbPS_NS_9Segment64EPvmEhyNS_14MappingOptionsEymP8IOMapperyybb +__ZN12IODMACommand8transferEmyPvy +__ZN12IOUserClient12initWithTaskEP4taskPvm +__ZN12IOUserClient12initWithTaskEP4taskPvmP12OSDictionary +__ZN12IOUserClient15mapClientMemoryEmP4taskmj +__ZN12IOUserClient15sendAsyncResultEPjiPPvm __ZN12IOUserClient17mapClientMemory64EmP4taskmy +__ZN12IOUserClient17sendAsyncResult64EPyiS0_m +__ZN12IOUserClient19clientMemoryForTypeEmPmPP18IOMemoryDescriptor __ZN12IOUserClient19setAsyncReference64EPyP8ipc_portyy +__ZN12IOUserClient23getExternalTrapForIndexEm +__ZN12IOUserClient24getNotificationSemaphoreEmPP9semaphore +__ZN12IOUserClient24getTargetAndTrapForIndexEPP9IOServicem +__ZN12IOUserClient24registerNotificationPortEP8ipc_portmm +__ZN12IOUserClient24registerNotificationPortEP8ipc_portmy +__ZN12IOUserClient25getExternalMethodForIndexEm +__ZN12IOUserClient26getTargetAndMethodForIndexEPP9IOServicem +__ZN12IOUserClient30getExternalAsyncMethodForIndexEm +__ZN12IOUserClient31getAsyncTargetAndMethodForIndexEPP9IOServicem +__ZN13IOCommandGate12commandSleepEPv12UnsignedWidem +__ZN13IOCommandGate12commandSleepEPvm +__ZN13IOCommandPool11commandPoolEP9IOServiceP10IOWorkLoopm +__ZN13IOCommandPool4initEP9IOServiceP10IOWorkLoopm +__ZN13IOEventSource9sleepGateEPv12UnsignedWidem +__ZN13IOEventSource9sleepGateEPvm +__ZN13_IOServiceJob8startJobEP9IOServiceim +__ZN14IOCommandQueue10gMetaClassE +__ZN14IOCommandQueue10superClassE +__ZN14IOCommandQueue12checkForWorkEv +__ZN14IOCommandQueue12commandQueueEP8OSObjectPFvS1_PvS2_S2_S2_Ei +__ZN14IOCommandQueue14enqueueCommandEbPvS0_S0_S0_ +__ZN14IOCommandQueue15performAndFlushEP8OSObjectPFvS1_PvS2_S2_S2_E +__ZN14IOCommandQueue4freeEv +__ZN14IOCommandQueue4initEP8OSObjectPFvS1_PvS2_S2_S2_Ei +__ZN14IOCommandQueue9MetaClassC1Ev +__ZN14IOCommandQueue9MetaClassC2Ev +__ZN14IOCommandQueue9metaClassE +__ZN14IOCommandQueueC1EPK11OSMetaClass +__ZN14IOCommandQueueC1Ev +__ZN14IOCommandQueueC2EPK11OSMetaClass +__ZN14IOCommandQueueC2Ev +__ZN14IOCommandQueueD0Ev +__ZN14IOCommandQueueD2Ev +__ZN14IODeviceMemory12withSubRangeEPS_mm +__ZN14IODeviceMemory13arrayFromListEPNS_11InitElementEm +__ZN14IODeviceMemory9withRangeEmm +__ZN14IOMemoryCursor17withSpecificationEPFvNS_15PhysicalSegmentEPvmEmmm +__ZN14IOMemoryCursor19genPhysicalSegmentsEP18IOMemoryDescriptormPvmmPm +__ZN14IOMemoryCursor21initWithSpecificationEPFvNS_15PhysicalSegmentEPvmEmmm +__ZN14IOPMrootDomain17setSleepSupportedEm +__ZN14IOPMrootDomain19sysPowerDownHandlerEPvS0_mP9IOServiceS0_j +__ZN14IOPMrootDomain24receivePowerNotificationEm +__ZN14IOPMrootDomain27displayWranglerNotificationEPvS0_mP9IOServiceS0_j +__ZN15IODMAController13getControllerEP9IOServicem +__ZN15IODMAController16notifyDMACommandEP16IODMAEventSourceP12IODMACommandim +__ZN15IODMAController20createControllerNameEm +__ZN15IODMAController21registerDMAControllerEm +__ZN16IODMAEventSource14dmaEventSourceEP8OSObjectP9IOServicePFvS1_PS_P12IODMACommandimES8_m +__ZN16IODMAEventSource15startDMACommandEP12IODMACommand11IODirectionmm +__ZN16IODMAEventSource16notifyDMACommandEP12IODMACommandim +__ZN16IODMAEventSource4initEP8OSObjectP9IOServicePFvS1_PS_P12IODMACommandimES8_m +__ZN16IOKitDiagnostics12updateOffsetEP12OSDictionarymPKc +__ZN16IORangeAllocator10deallocateEmm +__ZN16IORangeAllocator12allocElementEm +__ZN16IORangeAllocator13allocateRangeEmm +__ZN16IORangeAllocator14deallocElementEm +__ZN16IORangeAllocator28setFragmentCapacityIncrementEm +__ZN16IORangeAllocator4initEmmmm +__ZN16IORangeAllocator8allocateEmPmm +__ZN16IORangeAllocator9withRangeEmmmm +__ZN17IOBigMemoryCursor13outputSegmentEN14IOMemoryCursor15PhysicalSegmentEPvm +__ZN17IOBigMemoryCursor17withSpecificationEmmm +__ZN17IOBigMemoryCursor21initWithSpecificationEmmm +__ZN17IOSharedDataQueue11withEntriesEmm +__ZN17IOSharedDataQueue12withCapacityEm +__ZN17IOSharedDataQueue16initWithCapacityEm +__ZN17IOSharedDataQueue7dequeueEPvPm +__ZN18IOMemoryDescriptor10setMappingEP4taskjm +__ZN18IOMemoryDescriptor10withRangesEP14IOVirtualRangem11IODirectionP4taskb +__ZN18IOMemoryDescriptor10writeBytesEmPKvm +__ZN18IOMemoryDescriptor11makeMappingEPS_P4taskjmmm +__ZN18IOMemoryDescriptor11withAddressEPvm11IODirection +__ZN18IOMemoryDescriptor11withAddressEjm11IODirectionP4task +__ZN18IOMemoryDescriptor11withOptionsEPvmmP4taskmP8IOMapper +__ZN18IOMemoryDescriptor12setPurgeableEmPm +__ZN18IOMemoryDescriptor12withSubRangeEPS_mm11IODirection +__ZN18IOMemoryDescriptor14initWithRangesEP14IOVirtualRangem11IODirectionP4taskb +__ZN18IOMemoryDescriptor15initWithAddressEPvm11IODirection +__ZN18IOMemoryDescriptor15initWithAddressEjm11IODirectionP4task +__ZN18IOMemoryDescriptor15initWithOptionsEPvmmP4taskmP8IOMapper +__ZN18IOMemoryDescriptor16getSourceSegmentEmPm +__ZN18IOMemoryDescriptor16performOperationEmmm +__ZN18IOMemoryDescriptor16withAddressRangeEyymP4task +__ZN18IOMemoryDescriptor17getVirtualSegmentEmPm +__ZN18IOMemoryDescriptor17withAddressRangesEP14IOAddressRangemmP4task +__ZN18IOMemoryDescriptor18getPhysicalSegmentEmPm +__ZN18IOMemoryDescriptor18getPhysicalSegmentEmPmm +__ZN18IOMemoryDescriptor18withPhysicalRangesEP15IOPhysicalRangem11IODirectionb +__ZN18IOMemoryDescriptor19createMappingInTaskEP4taskymyy +__ZN18IOMemoryDescriptor19withPhysicalAddressEmm11IODirection +__ZN18IOMemoryDescriptor20getPhysicalSegment64EmPm +__ZN18IOMemoryDescriptor22initWithPhysicalRangesEP15IOPhysicalRangem11IODirectionb +__ZN18IOMemoryDescriptor23initWithPhysicalAddressEmm11IODirection +__ZN18IOMemoryDescriptor28_RESERVEDIOMemoryDescriptor8Ev +__ZN18IOMemoryDescriptor28_RESERVEDIOMemoryDescriptor9Ev +__ZN18IOMemoryDescriptor29_RESERVEDIOMemoryDescriptor10Ev +__ZN18IOMemoryDescriptor29_RESERVEDIOMemoryDescriptor11Ev +__ZN18IOMemoryDescriptor29_RESERVEDIOMemoryDescriptor12Ev +__ZN18IOMemoryDescriptor29_RESERVEDIOMemoryDescriptor13Ev +__ZN18IOMemoryDescriptor29_RESERVEDIOMemoryDescriptor14Ev +__ZN18IOMemoryDescriptor29_RESERVEDIOMemoryDescriptor15Ev +__ZN18IOMemoryDescriptor3mapEP4taskjmmm +__ZN18IOMemoryDescriptor3mapEm +__ZN18IOMemoryDescriptor5doMapEP7_vm_mapPjmmm +__ZN18IOMemoryDescriptor6setTagEm +__ZN18IOMemoryDescriptor7doUnmapEP7_vm_mapjm +__ZN18IOMemoryDescriptor9readBytesEmPvm +__ZN18IORegistryIterator11iterateOverEP15IORegistryEntryPK15IORegistryPlanem +__ZN18IORegistryIterator11iterateOverEPK15IORegistryPlanem +__ZN18IOTimerEventSource10setTimeoutE12UnsignedWide +__ZN18IOTimerEventSource10setTimeoutE13mach_timespec +__ZN18IOTimerEventSource10setTimeoutEmm +__ZN18IOTimerEventSource10wakeAtTimeE12UnsignedWide +__ZN18IOTimerEventSource10wakeAtTimeE13mach_timespec +__ZN18IOTimerEventSource10wakeAtTimeEmm +__ZN18IOTimerEventSource12setTimeoutMSEm +__ZN18IOTimerEventSource12setTimeoutUSEm +__ZN18IOTimerEventSource12wakeAtTimeMSEm +__ZN18IOTimerEventSource12wakeAtTimeUSEm +__ZN18IOTimerEventSource15setTimeoutTicksEm +__ZN18IOTimerEventSource15wakeAtTimeTicksEm +__ZN20IOLittleMemoryCursor13outputSegmentEN14IOMemoryCursor15PhysicalSegmentEPvm +__ZN20IOLittleMemoryCursor17withSpecificationEmmm +__ZN20IOLittleMemoryCursor21initWithSpecificationEmmm +__ZN20RootDomainUserClient15setPreventativeEmm +__ZN20RootDomainUserClient26getTargetAndMethodForIndexEPP9IOServicem +__ZN21IOInterruptController10initVectorElP17IOInterruptVector +__ZN21IOInterruptController11causeVectorElP17IOInterruptVector +__ZN21IOInterruptController12enableVectorElP17IOInterruptVector +__ZN21IOInterruptController13getVectorTypeElP17IOInterruptVector +__ZN21IOInterruptController17disableVectorHardElP17IOInterruptVector +__ZN21IOInterruptController17vectorCanBeSharedElP17IOInterruptVector +__ZN21IONaturalMemoryCursor13outputSegmentEN14IOMemoryCursor15PhysicalSegmentEPvm +__ZN21IONaturalMemoryCursor17withSpecificationEmmm +__ZN21IONaturalMemoryCursor21initWithSpecificationEmmm +__ZN21IOSubMemoryDescriptor11makeMappingEP18IOMemoryDescriptorP4taskjmmm +__ZN21IOSubMemoryDescriptor12initSubRangeEP18IOMemoryDescriptormm11IODirection +__ZN21IOSubMemoryDescriptor12setPurgeableEmPm +__ZN21IOSubMemoryDescriptor12withSubRangeEP18IOMemoryDescriptormmm +__ZN21IOSubMemoryDescriptor18getPhysicalSegmentEmPmm +__ZN21IOSubMemoryDescriptor7prepareE11IODirection +__ZN21IOSubMemoryDescriptor8completeE11IODirection +__ZN23IOMultiMemoryDescriptor15withDescriptorsEPP18IOMemoryDescriptorm11IODirectionb +__ZN23IOMultiMemoryDescriptor18getPhysicalSegmentEmPmm +__ZN23IOMultiMemoryDescriptor19initWithDescriptorsEPP18IOMemoryDescriptorm11IODirectionb +__ZN23IOMultiMemoryDescriptor7prepareE11IODirection +__ZN23IOMultiMemoryDescriptor8completeE11IODirection +__ZN24IOBufferMemoryDescriptor11appendBytesEPKvj +__ZN24IOBufferMemoryDescriptor11withOptionsEmjj +__ZN24IOBufferMemoryDescriptor12setDirectionE11IODirection +__ZN24IOBufferMemoryDescriptor12withCapacityEj11IODirectionb +__ZN24IOBufferMemoryDescriptor13initWithBytesEPKvj11IODirectionb +__ZN24IOBufferMemoryDescriptor14getBytesNoCopyEjj +__ZN24IOBufferMemoryDescriptor15initWithOptionsEmjj +__ZN24IOBufferMemoryDescriptor15initWithOptionsEmjjP4task +__ZN24IOBufferMemoryDescriptor17inTaskWithOptionsEP4taskmjj +__ZN24IOBufferMemoryDescriptor20initWithPhysicalMaskEP4taskmyyy +__ZN24IOBufferMemoryDescriptor22inTaskWithPhysicalMaskEP4taskmyy +__ZN24IOBufferMemoryDescriptor34_RESERVEDIOBufferMemoryDescriptor2Ev +__ZN24IOBufferMemoryDescriptor34_RESERVEDIOBufferMemoryDescriptor3Ev +__ZN24IOBufferMemoryDescriptor34_RESERVEDIOBufferMemoryDescriptor4Ev +__ZN24IOBufferMemoryDescriptor34_RESERVEDIOBufferMemoryDescriptor5Ev +__ZN24IOBufferMemoryDescriptor34_RESERVEDIOBufferMemoryDescriptor6Ev +__ZN24IOBufferMemoryDescriptor34_RESERVEDIOBufferMemoryDescriptor7Ev +__ZN24IOBufferMemoryDescriptor34_RESERVEDIOBufferMemoryDescriptor8Ev +__ZN24IOBufferMemoryDescriptor34_RESERVEDIOBufferMemoryDescriptor9Ev +__ZN24IOBufferMemoryDescriptor35_RESERVEDIOBufferMemoryDescriptor10Ev +__ZN24IOBufferMemoryDescriptor35_RESERVEDIOBufferMemoryDescriptor11Ev +__ZN24IOBufferMemoryDescriptor35_RESERVEDIOBufferMemoryDescriptor12Ev +__ZN24IOBufferMemoryDescriptor35_RESERVEDIOBufferMemoryDescriptor13Ev +__ZN24IOBufferMemoryDescriptor35_RESERVEDIOBufferMemoryDescriptor14Ev +__ZN24IOBufferMemoryDescriptor35_RESERVEDIOBufferMemoryDescriptor15Ev +__ZN24IOBufferMemoryDescriptor9setLengthEj +__ZN24IOBufferMemoryDescriptor9withBytesEPKvj11IODirectionb +__ZN25IOGeneralMemoryDescriptor11setPositionEm +__ZN25IOGeneralMemoryDescriptor11wireVirtualE11IODirection +__ZN25IOGeneralMemoryDescriptor12setPurgeableEmPm +__ZN25IOGeneralMemoryDescriptor13mapIntoKernelEj +__ZN25IOGeneralMemoryDescriptor14initWithRangesEP14IOVirtualRangem11IODirectionP4taskb +__ZN25IOGeneralMemoryDescriptor15initWithAddressEPvm11IODirection +__ZN25IOGeneralMemoryDescriptor15initWithAddressEjm11IODirectionP4task +__ZN25IOGeneralMemoryDescriptor15initWithOptionsEPvmmP4taskmP8IOMapper +__ZN25IOGeneralMemoryDescriptor15unmapFromKernelEv +__ZN25IOGeneralMemoryDescriptor16getSourceSegmentEmPm +__ZN25IOGeneralMemoryDescriptor17getVirtualSegmentEmPm +__ZN25IOGeneralMemoryDescriptor18getPhysicalSegmentEmPm +__ZN25IOGeneralMemoryDescriptor18getPhysicalSegmentEmPmm +__ZN25IOGeneralMemoryDescriptor20getPhysicalSegment64EmPm +__ZN25IOGeneralMemoryDescriptor22initWithPhysicalRangesEP15IOPhysicalRangem11IODirectionb +__ZN25IOGeneralMemoryDescriptor23initWithPhysicalAddressEmm11IODirection +__ZN25IOGeneralMemoryDescriptor5doMapEP7_vm_mapPjmmm +__ZN25IOGeneralMemoryDescriptor7doUnmapEP7_vm_mapjm +__ZN25IOGeneralMemoryDescriptor7prepareE11IODirection +__ZN25IOGeneralMemoryDescriptor8completeE11IODirection +__ZN29IOInterleavedMemoryDescriptor12withCapacityEm11IODirection +__ZN29IOInterleavedMemoryDescriptor16initWithCapacityEm11IODirection +__ZN29IOInterleavedMemoryDescriptor18getPhysicalSegmentEmPmm +__ZN29IOInterleavedMemoryDescriptor19setMemoryDescriptorEP18IOMemoryDescriptormm +__ZN29IOInterleavedMemoryDescriptor22clearMemoryDescriptorsE11IODirection +__ZN29IOInterleavedMemoryDescriptor7prepareE11IODirection +__ZN29IOInterleavedMemoryDescriptor8completeE11IODirection +__ZN8IOMapper10allocTableEm +__ZN8IOMapper10iovmInsertEjmP13upl_page_infom +__ZN8IOMapper10iovmInsertEjmPjm +__ZN8IOMapper11NewARTTableEmPPvPj +__ZN8IOMapper12FreeARTTableEP6OSDatam +__ZN8IOMapper18iovmFreeDMACommandEP12IODMACommandjm +__ZN8IOMapper19iovmAllocDMACommandEP12IODMACommandm +__ZN8IOPMprot10gMetaClassE +__ZN8IOPMprot10superClassE +__ZN8IOPMprot9MetaClassC1Ev +__ZN8IOPMprot9MetaClassC2Ev +__ZN8IOPMprot9metaClassE +__ZN8IOPMprotC1EPK11OSMetaClass +__ZN8IOPMprotC1Ev +__ZN8IOPMprotC2EPK11OSMetaClass +__ZN8IOPMprotC2Ev +__ZN8IOPMprotD0Ev +__ZN8IOPMprotD2Ev +__ZN9IOService10adjustBusyEl +__ZN9IOService10handleOpenEPS_mPv +__ZN9IOService10systemWakeEv +__ZN9IOService10youAreRootEv +__ZN9IOService11_adjustBusyEl +__ZN9IOService11handleCloseEPS_m +__ZN9IOService11tellClientsEi +__ZN9IOService12clampPowerOnEm +__ZN9IOService12didTerminateEPS_mPb +__ZN9IOService12requestProbeEm +__ZN9IOService12waitForStateEmmP13mach_timespec +__ZN9IOService13getPMworkloopEv +__ZN9IOService13messageClientEmP8OSObjectPvj +__ZN9IOService13newUserClientEP4taskPvmP12OSDictionaryPP12IOUserClient +__ZN9IOService13newUserClientEP4taskPvmPP12IOUserClient +__ZN9IOService13startMatchingEm +__ZN9IOService13waitMatchIdleEm +__ZN9IOService13willTerminateEPS_m +__ZN9IOService14actionFinalizeEPS_m +__ZN9IOService14doServiceMatchEm +__ZN9IOService14messageClientsEmPvj +__ZN9IOService14newTemperatureElPS_ +__ZN9IOService14setPowerParentEP17IOPowerConnectionbm +__ZN9IOService15addNotificationEPK8OSSymbolP12OSDictionaryPFbPvS5_PS_ES5_S5_l +__ZN9IOService15nextIdleTimeoutE12UnsignedWideS0_j +__ZN9IOService15registerServiceEm +__ZN9IOService15tellChangeDown1Em +__ZN9IOService15tellChangeDown2Em +__ZN9IOService15terminateClientEPS_m +__ZN9IOService15terminatePhase1Em +__ZN9IOService15terminateWorkerEm +__ZN9IOService16ack_timer_tickedEv +__ZN9IOService16command_receivedEPvS0_S0_S0_ +__ZN9IOService16didYouWakeSystemEv +__ZN9IOService16registerInterestEPK8OSSymbolPFiPvS3_mPS_S3_jES3_S3_ +__ZN9IOService16requestTerminateEPS_m +__ZN9IOService16setCPUSnoopDelayEm +__ZN9IOService18actionDidTerminateEPS_m +__ZN9IOService18doServiceTerminateEm +__ZN9IOService18matchPropertyTableEP12OSDictionaryPl +__ZN9IOService18requireMaxBusStallEm +__ZN9IOService18settleTimerExpiredEv +__ZN9IOService18systemWillShutdownEm +__ZN9IOService19actionWillTerminateEPS_mP7OSArray +__ZN9IOService19deliverNotificationEPK8OSSymbolmm +__ZN9IOService19installNotificationEPK8OSSymbolP12OSDictionaryPFbPvS5_PS_ES5_S5_lPP10OSIterator +__ZN9IOService22PM_Clamp_Timer_ExpiredEv +__ZN9IOService22powerDomainDidChangeToEmP17IOPowerConnection +__ZN9IOService23acknowledgeNotificationEPvm +__ZN9IOService23addMatchingNotificationEPK8OSSymbolP12OSDictionaryPFbPvS5_PS_P10IONotifierES5_S5_l +__ZN9IOService23powerDomainWillChangeToEmP17IOPowerConnection +__ZN9IOService23scheduleTerminatePhase2Em +__ZN9IOService23tellClientsWithResponseEi +__ZN9IOService24PM_idle_timer_expirationEv +__ZN9IOService24mapDeviceMemoryWithIndexEjm +__ZN9IOService26temperatureCriticalForZoneEPS_ +__ZN9IOService27serializedAllowPowerChange2Em +__ZN9IOService28serializedCancelPowerChange2Em +__ZN9IOService4openEPS_mPv +__ZN9IOService5closeEPS_m +__ZN9IOService5probeEPS_Pl +__ZN9IOService6PMfreeEv +__ZN9IOService7messageEmPS_Pv +__ZN9IOService8finalizeEm +__ZN9IOService9terminateEm +__ZNK11IOCatalogue13serializeDataEmP11OSSerialize +__ZNK14IOCommandQueue12getMetaClassEv +__ZNK14IOCommandQueue9MetaClass5allocEv +__ZNK15IORegistryEntry11getPropertyEPK8OSStringPK15IORegistryPlanem +__ZNK15IORegistryEntry11getPropertyEPK8OSSymbolPK15IORegistryPlanem +__ZNK15IORegistryEntry11getPropertyEPKcPK15IORegistryPlanem +__ZNK15IORegistryEntry12copyPropertyEPK8OSStringPK15IORegistryPlanem +__ZNK15IORegistryEntry12copyPropertyEPK8OSSymbolPK15IORegistryPlanem +__ZNK15IORegistryEntry12copyPropertyEPKcPK15IORegistryPlanem +__ZNK18IOMemoryDescriptor19dmaCommandOperationEmPvj +__ZNK25IOGeneralMemoryDescriptor19dmaCommandOperationEmPvj +__ZNK8IOPMprot12getMetaClassEv +__ZNK8IOPMprot9MetaClass5allocEv +__ZTV14IOCommandQueue +__ZTV8IOPMprot +__ZTVN14IOCommandQueue9MetaClassE +__ZTVN8IOPMprot9MetaClassE diff --git a/config/IOKit.ppc.exports b/config/IOKit.ppc.exports index b08a79c40..26b5a9209 100644 --- a/config/IOKit.ppc.exports +++ b/config/IOKit.ppc.exports @@ -1,11 +1,175 @@ -__ZN10AppleMacIO9metaClassE -__ZN16AppleMacIODevice9metaClassE +_IOPanic +_PE_parse_boot_arg __Z11IODBDMAStopPV23IODBDMAChannelRegisters __Z12IODBDMAFlushPV23IODBDMAChannelRegisters __Z12IODBDMAPausePV23IODBDMAChannelRegisters __Z12IODBDMAResetPV23IODBDMAChannelRegisters __Z12IODBDMAStartPV23IODBDMAChannelRegistersPV17IODBDMADescriptor __Z15IODBDMAContinuePV23IODBDMAChannelRegisters +__Z16IODTFindSlotNameP15IORegistryEntrym +__Z16IODTSetResolvingP15IORegistryEntryPFlmPmS1_EPFvS0_PhS4_S4_E +__Z17IODTGetCellCountsP15IORegistryEntryPmS1_ +__Z22IODTResolveAddressCellP15IORegistryEntryPmS1_S1_ +__Z23IODTFindMatchingEntriesP15IORegistryEntrymPKc +__ZN10AppleMacIO9metaClassE +__ZN10IOWorkLoop19workLoopWithOptionsEm +__ZN10IOWorkLoop9sleepGateEPv12UnsignedWidem +__ZN10IOWorkLoop9sleepGateEPvm +__ZN11IOCatalogue11findDriversEP12OSDictionaryPl +__ZN11IOCatalogue11findDriversEP9IOServicePl +__ZN11IODataQueue11withEntriesEmm +__ZN11IODataQueue12withCapacityEm +__ZN11IODataQueue15initWithEntriesEmm +__ZN11IODataQueue16initWithCapacityEm +__ZN11IODataQueue7enqueueEPvm +__ZN11IOMemoryMap10getAddressEv +__ZN11IOMemoryMap18getPhysicalSegmentEmPm +__ZN11IOMemoryMap19setMemoryDescriptorEP18IOMemoryDescriptory +__ZN11IOMemoryMap7getSizeEv +__ZN11IOMemoryMap8redirectEP18IOMemoryDescriptormm +__ZN11IOMemoryMap8redirectEP18IOMemoryDescriptormy +__ZN12IODMACommand11OutputBig32EPS_NS_9Segment64EPvm +__ZN12IODMACommand11OutputBig64EPS_NS_9Segment64EPvm +__ZN12IODMACommand11synchronizeEm +__ZN12IODMACommand12OutputHost32EPS_NS_9Segment64EPvm +__ZN12IODMACommand12OutputHost64EPS_NS_9Segment64EPvm +__ZN12IODMACommand14OutputLittle32EPS_NS_9Segment64EPvm +__ZN12IODMACommand14OutputLittle64EPS_NS_9Segment64EPvm +__ZN12IODMACommand15genIOVMSegmentsEPyPvPm +__ZN12IODMACommand17withSpecificationEPFbPS_NS_9Segment64EPvmEhyNS_14MappingOptionsEymP8IOMapperS2_ +__ZN12IODMACommand21initWithSpecificationEPFbPS_NS_9Segment64EPvmEhyNS_14MappingOptionsEymP8IOMapperS2_ +__ZN12IODMACommand24prepareWithSpecificationEPFbPS_NS_9Segment64EPvmEhyNS_14MappingOptionsEymP8IOMapperyybb +__ZN12IODMACommand8transferEmyPvy +__ZN12IOUserClient12initWithTaskEP4taskPvm +__ZN12IOUserClient12initWithTaskEP4taskPvmP12OSDictionary +__ZN12IOUserClient15mapClientMemoryEmP4taskmj +__ZN12IOUserClient15sendAsyncResultEPjiPPvm +__ZN12IOUserClient17mapClientMemory64EmP4taskmy +__ZN12IOUserClient17sendAsyncResult64EPyiS0_m +__ZN12IOUserClient19clientMemoryForTypeEmPmPP18IOMemoryDescriptor +__ZN12IOUserClient19setAsyncReference64EPyP8ipc_portyy +__ZN12IOUserClient23getExternalTrapForIndexEm +__ZN12IOUserClient24getNotificationSemaphoreEmPP9semaphore +__ZN12IOUserClient24getTargetAndTrapForIndexEPP9IOServicem +__ZN12IOUserClient24registerNotificationPortEP8ipc_portmm +__ZN12IOUserClient24registerNotificationPortEP8ipc_portmy +__ZN12IOUserClient25getExternalMethodForIndexEm +__ZN12IOUserClient26getTargetAndMethodForIndexEPP9IOServicem +__ZN12IOUserClient30getExternalAsyncMethodForIndexEm +__ZN12IOUserClient31getAsyncTargetAndMethodForIndexEPP9IOServicem +__ZN13IOCommandGate12commandSleepEPv12UnsignedWidem +__ZN13IOCommandGate12commandSleepEPvm +__ZN13IOCommandPool11commandPoolEP9IOServiceP10IOWorkLoopm +__ZN13IOCommandPool4initEP9IOServiceP10IOWorkLoopm +__ZN13IOEventSource9sleepGateEPv12UnsignedWidem +__ZN13IOEventSource9sleepGateEPvm +__ZN13_IOServiceJob8startJobEP9IOServiceim +__ZN14IOCommandQueue10gMetaClassE +__ZN14IOCommandQueue10superClassE +__ZN14IOCommandQueue12checkForWorkEv +__ZN14IOCommandQueue12commandQueueEP8OSObjectPFvS1_PvS2_S2_S2_Ei +__ZN14IOCommandQueue14enqueueCommandEbPvS0_S0_S0_ +__ZN14IOCommandQueue15performAndFlushEP8OSObjectPFvS1_PvS2_S2_S2_E +__ZN14IOCommandQueue4freeEv +__ZN14IOCommandQueue4initEP8OSObjectPFvS1_PvS2_S2_S2_Ei +__ZN14IOCommandQueue9MetaClassC1Ev +__ZN14IOCommandQueue9MetaClassC2Ev +__ZN14IOCommandQueue9metaClassE +__ZN14IOCommandQueueC1EPK11OSMetaClass +__ZN14IOCommandQueueC1Ev +__ZN14IOCommandQueueC2EPK11OSMetaClass +__ZN14IOCommandQueueC2Ev +__ZN14IOCommandQueueD0Ev +__ZN14IOCommandQueueD2Ev +__ZN14IODeviceMemory12withSubRangeEPS_mm +__ZN14IODeviceMemory13arrayFromListEPNS_11InitElementEm +__ZN14IODeviceMemory9withRangeEmm +__ZN14IOMemoryCursor17withSpecificationEPFvNS_15PhysicalSegmentEPvmEmmm +__ZN14IOMemoryCursor19genPhysicalSegmentsEP18IOMemoryDescriptormPvmmPm +__ZN14IOMemoryCursor21initWithSpecificationEPFvNS_15PhysicalSegmentEPvmEmmm +__ZN14IOPMrootDomain17setSleepSupportedEm +__ZN14IOPMrootDomain19sysPowerDownHandlerEPvS0_mP9IOServiceS0_j +__ZN14IOPMrootDomain24receivePowerNotificationEm +__ZN14IOPMrootDomain27displayWranglerNotificationEPvS0_mP9IOServiceS0_j +__ZN15IODMAController13getControllerEP9IOServicem +__ZN15IODMAController16notifyDMACommandEP16IODMAEventSourceP12IODMACommandim +__ZN15IODMAController20createControllerNameEm +__ZN15IODMAController21registerDMAControllerEm +__ZN16AppleMacIODevice9metaClassE +__ZN16IODMAEventSource14dmaEventSourceEP8OSObjectP9IOServicePFvS1_PS_P12IODMACommandimES8_m +__ZN16IODMAEventSource15startDMACommandEP12IODMACommand11IODirectionmm +__ZN16IODMAEventSource16notifyDMACommandEP12IODMACommandim +__ZN16IODMAEventSource4initEP8OSObjectP9IOServicePFvS1_PS_P12IODMACommandimES8_m +__ZN16IOKitDiagnostics12updateOffsetEP12OSDictionarymPKc +__ZN16IORangeAllocator10deallocateEmm +__ZN16IORangeAllocator12allocElementEm +__ZN16IORangeAllocator13allocateRangeEmm +__ZN16IORangeAllocator14deallocElementEm +__ZN16IORangeAllocator28setFragmentCapacityIncrementEm +__ZN16IORangeAllocator4initEmmmm +__ZN16IORangeAllocator8allocateEmPmm +__ZN16IORangeAllocator9withRangeEmmmm +__ZN17IOBigMemoryCursor13outputSegmentEN14IOMemoryCursor15PhysicalSegmentEPvm +__ZN17IOBigMemoryCursor17withSpecificationEmmm +__ZN17IOBigMemoryCursor21initWithSpecificationEmmm +__ZN17IOSharedDataQueue11withEntriesEmm +__ZN17IOSharedDataQueue12withCapacityEm +__ZN17IOSharedDataQueue16initWithCapacityEm +__ZN17IOSharedDataQueue7dequeueEPvPm +__ZN18IOMemoryDescriptor10setMappingEP4taskjm +__ZN18IOMemoryDescriptor10withRangesEP14IOVirtualRangem11IODirectionP4taskb +__ZN18IOMemoryDescriptor10writeBytesEmPKvm +__ZN18IOMemoryDescriptor11makeMappingEPS_P4taskjmmm +__ZN18IOMemoryDescriptor11withAddressEPvm11IODirection +__ZN18IOMemoryDescriptor11withAddressEjm11IODirectionP4task +__ZN18IOMemoryDescriptor11withOptionsEPvmmP4taskmP8IOMapper +__ZN18IOMemoryDescriptor12setPurgeableEmPm +__ZN18IOMemoryDescriptor12withSubRangeEPS_mm11IODirection +__ZN18IOMemoryDescriptor14initWithRangesEP14IOVirtualRangem11IODirectionP4taskb +__ZN18IOMemoryDescriptor15initWithAddressEPvm11IODirection +__ZN18IOMemoryDescriptor15initWithAddressEjm11IODirectionP4task +__ZN18IOMemoryDescriptor15initWithOptionsEPvmmP4taskmP8IOMapper +__ZN18IOMemoryDescriptor16getSourceSegmentEmPm +__ZN18IOMemoryDescriptor16performOperationEmmm +__ZN18IOMemoryDescriptor16withAddressRangeEyymP4task +__ZN18IOMemoryDescriptor17getVirtualSegmentEmPm +__ZN18IOMemoryDescriptor17withAddressRangesEP14IOAddressRangemmP4task +__ZN18IOMemoryDescriptor18getPhysicalSegmentEmPm +__ZN18IOMemoryDescriptor18getPhysicalSegmentEmPmm +__ZN18IOMemoryDescriptor18withPhysicalRangesEP15IOPhysicalRangem11IODirectionb +__ZN18IOMemoryDescriptor19createMappingInTaskEP4taskymyy +__ZN18IOMemoryDescriptor19withPhysicalAddressEmm11IODirection +__ZN18IOMemoryDescriptor20getPhysicalSegment64EmPm +__ZN18IOMemoryDescriptor22initWithPhysicalRangesEP15IOPhysicalRangem11IODirectionb +__ZN18IOMemoryDescriptor23initWithPhysicalAddressEmm11IODirection +__ZN18IOMemoryDescriptor28_RESERVEDIOMemoryDescriptor8Ev +__ZN18IOMemoryDescriptor28_RESERVEDIOMemoryDescriptor9Ev +__ZN18IOMemoryDescriptor29_RESERVEDIOMemoryDescriptor10Ev +__ZN18IOMemoryDescriptor29_RESERVEDIOMemoryDescriptor11Ev +__ZN18IOMemoryDescriptor29_RESERVEDIOMemoryDescriptor12Ev +__ZN18IOMemoryDescriptor29_RESERVEDIOMemoryDescriptor13Ev +__ZN18IOMemoryDescriptor29_RESERVEDIOMemoryDescriptor14Ev +__ZN18IOMemoryDescriptor29_RESERVEDIOMemoryDescriptor15Ev +__ZN18IOMemoryDescriptor3mapEP4taskjmmm +__ZN18IOMemoryDescriptor3mapEm +__ZN18IOMemoryDescriptor5doMapEP7_vm_mapPjmmm +__ZN18IOMemoryDescriptor6setTagEm +__ZN18IOMemoryDescriptor7doUnmapEP7_vm_mapjm +__ZN18IOMemoryDescriptor9readBytesEmPvm +__ZN18IORegistryIterator11iterateOverEP15IORegistryEntryPK15IORegistryPlanem +__ZN18IORegistryIterator11iterateOverEPK15IORegistryPlanem +__ZN18IOTimerEventSource10setTimeoutE12UnsignedWide +__ZN18IOTimerEventSource10setTimeoutE13mach_timespec +__ZN18IOTimerEventSource10setTimeoutEmm +__ZN18IOTimerEventSource10wakeAtTimeE12UnsignedWide +__ZN18IOTimerEventSource10wakeAtTimeE13mach_timespec +__ZN18IOTimerEventSource10wakeAtTimeEmm +__ZN18IOTimerEventSource12setTimeoutMSEm +__ZN18IOTimerEventSource12setTimeoutUSEm +__ZN18IOTimerEventSource12wakeAtTimeMSEm +__ZN18IOTimerEventSource12wakeAtTimeUSEm +__ZN18IOTimerEventSource15setTimeoutTicksEm +__ZN18IOTimerEventSource15wakeAtTimeTicksEm __ZN19IODBDMAMemoryCursor10gMetaClassE __ZN19IODBDMAMemoryCursor10superClassE __ZN19IODBDMAMemoryCursor17withSpecificationEmmm @@ -19,6 +183,148 @@ __ZN19IODBDMAMemoryCursorC2EPK11OSMetaClass __ZN19IODBDMAMemoryCursorC2Ev __ZN19IODBDMAMemoryCursorD0Ev __ZN19IODBDMAMemoryCursorD2Ev +__ZN20IOLittleMemoryCursor13outputSegmentEN14IOMemoryCursor15PhysicalSegmentEPvm +__ZN20IOLittleMemoryCursor17withSpecificationEmmm +__ZN20IOLittleMemoryCursor21initWithSpecificationEmmm +__ZN20RootDomainUserClient15setPreventativeEmm +__ZN20RootDomainUserClient26getTargetAndMethodForIndexEPP9IOServicem +__ZN21IOInterruptController10initVectorElP17IOInterruptVector +__ZN21IOInterruptController11causeVectorElP17IOInterruptVector +__ZN21IOInterruptController12enableVectorElP17IOInterruptVector +__ZN21IOInterruptController13getVectorTypeElP17IOInterruptVector +__ZN21IOInterruptController17disableVectorHardElP17IOInterruptVector +__ZN21IOInterruptController17vectorCanBeSharedElP17IOInterruptVector +__ZN21IONaturalMemoryCursor13outputSegmentEN14IOMemoryCursor15PhysicalSegmentEPvm +__ZN21IONaturalMemoryCursor17withSpecificationEmmm +__ZN21IONaturalMemoryCursor21initWithSpecificationEmmm +__ZN21IOSubMemoryDescriptor11makeMappingEP18IOMemoryDescriptorP4taskjmmm +__ZN21IOSubMemoryDescriptor12initSubRangeEP18IOMemoryDescriptormm11IODirection +__ZN21IOSubMemoryDescriptor12setPurgeableEmPm +__ZN21IOSubMemoryDescriptor12withSubRangeEP18IOMemoryDescriptormmm +__ZN21IOSubMemoryDescriptor18getPhysicalSegmentEmPmm +__ZN21IOSubMemoryDescriptor7prepareE11IODirection +__ZN21IOSubMemoryDescriptor8completeE11IODirection +__ZN23IOMultiMemoryDescriptor15withDescriptorsEPP18IOMemoryDescriptorm11IODirectionb +__ZN23IOMultiMemoryDescriptor18getPhysicalSegmentEmPmm +__ZN23IOMultiMemoryDescriptor19initWithDescriptorsEPP18IOMemoryDescriptorm11IODirectionb +__ZN23IOMultiMemoryDescriptor7prepareE11IODirection +__ZN23IOMultiMemoryDescriptor8completeE11IODirection +__ZN24IOBufferMemoryDescriptor11appendBytesEPKvj +__ZN24IOBufferMemoryDescriptor11withOptionsEmjj +__ZN24IOBufferMemoryDescriptor12setDirectionE11IODirection +__ZN24IOBufferMemoryDescriptor12withCapacityEj11IODirectionb +__ZN24IOBufferMemoryDescriptor13initWithBytesEPKvj11IODirectionb +__ZN24IOBufferMemoryDescriptor14getBytesNoCopyEjj +__ZN24IOBufferMemoryDescriptor15initWithOptionsEmjj +__ZN24IOBufferMemoryDescriptor15initWithOptionsEmjjP4task +__ZN24IOBufferMemoryDescriptor17inTaskWithOptionsEP4taskmjj +__ZN24IOBufferMemoryDescriptor20initWithPhysicalMaskEP4taskmyyy +__ZN24IOBufferMemoryDescriptor22inTaskWithPhysicalMaskEP4taskmyy +__ZN24IOBufferMemoryDescriptor34_RESERVEDIOBufferMemoryDescriptor2Ev +__ZN24IOBufferMemoryDescriptor34_RESERVEDIOBufferMemoryDescriptor3Ev +__ZN24IOBufferMemoryDescriptor34_RESERVEDIOBufferMemoryDescriptor4Ev +__ZN24IOBufferMemoryDescriptor34_RESERVEDIOBufferMemoryDescriptor5Ev +__ZN24IOBufferMemoryDescriptor34_RESERVEDIOBufferMemoryDescriptor6Ev +__ZN24IOBufferMemoryDescriptor34_RESERVEDIOBufferMemoryDescriptor7Ev +__ZN24IOBufferMemoryDescriptor34_RESERVEDIOBufferMemoryDescriptor8Ev +__ZN24IOBufferMemoryDescriptor34_RESERVEDIOBufferMemoryDescriptor9Ev +__ZN24IOBufferMemoryDescriptor35_RESERVEDIOBufferMemoryDescriptor10Ev +__ZN24IOBufferMemoryDescriptor35_RESERVEDIOBufferMemoryDescriptor11Ev +__ZN24IOBufferMemoryDescriptor35_RESERVEDIOBufferMemoryDescriptor12Ev +__ZN24IOBufferMemoryDescriptor35_RESERVEDIOBufferMemoryDescriptor13Ev +__ZN24IOBufferMemoryDescriptor35_RESERVEDIOBufferMemoryDescriptor14Ev +__ZN24IOBufferMemoryDescriptor35_RESERVEDIOBufferMemoryDescriptor15Ev +__ZN24IOBufferMemoryDescriptor9setLengthEj +__ZN24IOBufferMemoryDescriptor9withBytesEPKvj11IODirectionb +__ZN25IOGeneralMemoryDescriptor11setPositionEm +__ZN25IOGeneralMemoryDescriptor11wireVirtualE11IODirection +__ZN25IOGeneralMemoryDescriptor12setPurgeableEmPm +__ZN25IOGeneralMemoryDescriptor13mapIntoKernelEj +__ZN25IOGeneralMemoryDescriptor14initWithRangesEP14IOVirtualRangem11IODirectionP4taskb +__ZN25IOGeneralMemoryDescriptor15initWithAddressEPvm11IODirection +__ZN25IOGeneralMemoryDescriptor15initWithAddressEjm11IODirectionP4task +__ZN25IOGeneralMemoryDescriptor15initWithOptionsEPvmmP4taskmP8IOMapper +__ZN25IOGeneralMemoryDescriptor15unmapFromKernelEv +__ZN25IOGeneralMemoryDescriptor16getSourceSegmentEmPm +__ZN25IOGeneralMemoryDescriptor17getVirtualSegmentEmPm +__ZN25IOGeneralMemoryDescriptor18getPhysicalSegmentEmPm +__ZN25IOGeneralMemoryDescriptor18getPhysicalSegmentEmPmm +__ZN25IOGeneralMemoryDescriptor20getPhysicalSegment64EmPm +__ZN25IOGeneralMemoryDescriptor22initWithPhysicalRangesEP15IOPhysicalRangem11IODirectionb +__ZN25IOGeneralMemoryDescriptor23initWithPhysicalAddressEmm11IODirection +__ZN25IOGeneralMemoryDescriptor5doMapEP7_vm_mapPjmmm +__ZN25IOGeneralMemoryDescriptor7doUnmapEP7_vm_mapjm +__ZN25IOGeneralMemoryDescriptor7prepareE11IODirection +__ZN25IOGeneralMemoryDescriptor8completeE11IODirection +__ZN29IOInterleavedMemoryDescriptor12withCapacityEm11IODirection +__ZN29IOInterleavedMemoryDescriptor16initWithCapacityEm11IODirection +__ZN29IOInterleavedMemoryDescriptor18getPhysicalSegmentEmPmm +__ZN29IOInterleavedMemoryDescriptor19setMemoryDescriptorEP18IOMemoryDescriptormm +__ZN29IOInterleavedMemoryDescriptor22clearMemoryDescriptorsE11IODirection +__ZN29IOInterleavedMemoryDescriptor7prepareE11IODirection +__ZN29IOInterleavedMemoryDescriptor8completeE11IODirection +__ZN8IOMapper10allocTableEm +__ZN8IOMapper10iovmInsertEjmP13upl_page_infom +__ZN8IOMapper10iovmInsertEjmPjm +__ZN8IOMapper11NewARTTableEmPPvPj +__ZN8IOMapper12FreeARTTableEP6OSDatam +__ZN8IOPMprot10gMetaClassE +__ZN8IOPMprot10superClassE +__ZN8IOPMprot9MetaClassC1Ev +__ZN8IOPMprot9MetaClassC2Ev +__ZN8IOPMprot9metaClassE +__ZN8IOPMprotC1EPK11OSMetaClass +__ZN8IOPMprotC1Ev +__ZN8IOPMprotC2EPK11OSMetaClass +__ZN8IOPMprotC2Ev +__ZN8IOPMprotD0Ev +__ZN8IOPMprotD2Ev +__ZN9IOService10adjustBusyEl +__ZN9IOService10handleOpenEPS_mPv +__ZN9IOService10systemWakeEv +__ZN9IOService10youAreRootEv +__ZN9IOService11_adjustBusyEl +__ZN9IOService11handleCloseEPS_m +__ZN9IOService11tellClientsEi +__ZN9IOService12clampPowerOnEm +__ZN9IOService12didTerminateEPS_mPb +__ZN9IOService12requestProbeEm +__ZN9IOService12waitForStateEmmP13mach_timespec +__ZN9IOService13getPMworkloopEv +__ZN9IOService13messageClientEmP8OSObjectPvj +__ZN9IOService13newUserClientEP4taskPvmP12OSDictionaryPP12IOUserClient +__ZN9IOService13newUserClientEP4taskPvmPP12IOUserClient +__ZN9IOService13startMatchingEm +__ZN9IOService13waitMatchIdleEm +__ZN9IOService13willTerminateEPS_m +__ZN9IOService14actionFinalizeEPS_m +__ZN9IOService14doServiceMatchEm +__ZN9IOService14messageClientsEmPvj +__ZN9IOService14newTemperatureElPS_ +__ZN9IOService14setPowerParentEP17IOPowerConnectionbm +__ZN9IOService15addNotificationEPK8OSSymbolP12OSDictionaryPFbPvS5_PS_ES5_S5_l +__ZN9IOService15nextIdleTimeoutE12UnsignedWideS0_j +__ZN9IOService15registerServiceEm +__ZN9IOService15tellChangeDown1Em +__ZN9IOService15tellChangeDown2Em +__ZN9IOService15terminateClientEPS_m +__ZN9IOService15terminatePhase1Em +__ZN9IOService15terminateWorkerEm +__ZN9IOService16ack_timer_tickedEv +__ZN9IOService16command_receivedEPvS0_S0_S0_ +__ZN9IOService16didYouWakeSystemEv +__ZN9IOService16registerInterestEPK8OSSymbolPFiPvS3_mPS_S3_jES3_S3_ +__ZN9IOService16requestTerminateEPS_m +__ZN9IOService16setCPUSnoopDelayEm +__ZN9IOService18actionDidTerminateEPS_m +__ZN9IOService18doServiceTerminateEm +__ZN9IOService18matchPropertyTableEP12OSDictionaryPl +__ZN9IOService18requireMaxBusStallEm +__ZN9IOService18settleTimerExpiredEv +__ZN9IOService18systemWillShutdownEm +__ZN9IOService19actionWillTerminateEPS_mP7OSArray +__ZN9IOService19deliverNotificationEPK8OSSymbolmm +__ZN9IOService19installNotificationEPK8OSSymbolP12OSDictionaryPFbPvS5_PS_ES5_S5_lPP10OSIterator __ZN9IOService20_RESERVEDIOService48Ev __ZN9IOService20_RESERVEDIOService49Ev __ZN9IOService20_RESERVEDIOService50Ev @@ -35,13 +341,43 @@ __ZN9IOService20_RESERVEDIOService60Ev __ZN9IOService20_RESERVEDIOService61Ev __ZN9IOService20_RESERVEDIOService62Ev __ZN9IOService20_RESERVEDIOService63Ev +__ZN9IOService22PM_Clamp_Timer_ExpiredEv +__ZN9IOService22powerDomainDidChangeToEmP17IOPowerConnection +__ZN9IOService23acknowledgeNotificationEPvm +__ZN9IOService23addMatchingNotificationEPK8OSSymbolP12OSDictionaryPFbPvS5_PS_P10IONotifierES5_S5_l +__ZN9IOService23powerDomainWillChangeToEmP17IOPowerConnection +__ZN9IOService23scheduleTerminatePhase2Em +__ZN9IOService23tellClientsWithResponseEi +__ZN9IOService24PM_idle_timer_expirationEv +__ZN9IOService24mapDeviceMemoryWithIndexEjm +__ZN9IOService26temperatureCriticalForZoneEPS_ +__ZN9IOService27serializedAllowPowerChange2Em +__ZN9IOService28serializedCancelPowerChange2Em +__ZN9IOService4openEPS_mPv +__ZN9IOService5closeEPS_m +__ZN9IOService5probeEPS_Pl +__ZN9IOService6PMfreeEv +__ZN9IOService7messageEmPS_Pv +__ZN9IOService8finalizeEm +__ZN9IOService9terminateEm +__ZNK11IOCatalogue13serializeDataEmP11OSSerialize +__ZNK14IOCommandQueue12getMetaClassEv +__ZNK14IOCommandQueue9MetaClass5allocEv +__ZNK15IORegistryEntry11getPropertyEPK8OSStringPK15IORegistryPlanem +__ZNK15IORegistryEntry11getPropertyEPK8OSSymbolPK15IORegistryPlanem +__ZNK15IORegistryEntry11getPropertyEPKcPK15IORegistryPlanem +__ZNK15IORegistryEntry12copyPropertyEPK8OSStringPK15IORegistryPlanem +__ZNK15IORegistryEntry12copyPropertyEPK8OSSymbolPK15IORegistryPlanem +__ZNK15IORegistryEntry12copyPropertyEPKcPK15IORegistryPlanem +__ZNK18IOMemoryDescriptor19dmaCommandOperationEmPvj __ZNK19IODBDMAMemoryCursor12getMetaClassEv __ZNK19IODBDMAMemoryCursor9MetaClass5allocEv +__ZNK25IOGeneralMemoryDescriptor19dmaCommandOperationEmPvj +__ZNK8IOPMprot12getMetaClassEv +__ZNK8IOPMprot9MetaClass5allocEv +__ZTV14IOCommandQueue __ZTV19IODBDMAMemoryCursor +__ZTV8IOPMprot +__ZTVN14IOCommandQueue9MetaClassE __ZTVN19IODBDMAMemoryCursor9MetaClassE -__ZN24IOBufferMemoryDescriptor20initWithPhysicalMaskEP4taskmyyy -__ZN24IOBufferMemoryDescriptor22inTaskWithPhysicalMaskEP4taskmyy -__ZN18IOMemoryDescriptor16withAddressRangeEyymP4task -__ZN18IOMemoryDescriptor19createMappingInTaskEP4taskymyy -__ZN12IOUserClient17mapClientMemory64EmP4taskmy -__ZN12IOUserClient19setAsyncReference64EPyP8ipc_portyy +__ZTVN8IOPMprot9MetaClassE diff --git a/config/IOKit.x86_64.exports b/config/IOKit.x86_64.exports new file mode 100644 index 000000000..d3067b6e0 --- /dev/null +++ b/config/IOKit.x86_64.exports @@ -0,0 +1,271 @@ +__Z16IODTFindSlotNameP15IORegistryEntryj +__Z16IODTSetResolvingP15IORegistryEntryPFijPjS1_EPFvS0_PhS4_S4_E +__Z17IODTGetCellCountsP15IORegistryEntryPjS1_ +__Z22IODTResolveAddressCellP15IORegistryEntryPjPyS2_ +__Z23IODTFindMatchingEntriesP15IORegistryEntryjPKc +__ZN10IOWorkLoop19workLoopWithOptionsEj +__ZN10IOWorkLoop20_RESERVEDIOWorkLoop0Ev +__ZN10IOWorkLoop20_RESERVEDIOWorkLoop1Ev +__ZN10IOWorkLoop20_RESERVEDIOWorkLoop2Ev +__ZN10IOWorkLoop9sleepGateEPvj +__ZN10IOWorkLoop9sleepGateEPvyj +__ZN11IOCatalogue11findDriversEP12OSDictionaryPi +__ZN11IOCatalogue11findDriversEP9IOServicePi +__ZN11IODataQueue11withEntriesEjj +__ZN11IODataQueue12withCapacityEj +__ZN11IODataQueue15initWithEntriesEjj +__ZN11IODataQueue16initWithCapacityEj +__ZN11IODataQueue7enqueueEPvj +__ZN11IOMemoryMap18getPhysicalSegmentEyPyj +__ZN11IOMemoryMap19setMemoryDescriptorEP18IOMemoryDescriptory +__ZN11IOMemoryMap8redirectEP18IOMemoryDescriptorjy +__ZN12IODMACommand11OutputBig32EPS_NS_9Segment64EPvj +__ZN12IODMACommand11OutputBig64EPS_NS_9Segment64EPvj +__ZN12IODMACommand11synchronizeEj +__ZN12IODMACommand12OutputHost32EPS_NS_9Segment64EPvj +__ZN12IODMACommand12OutputHost64EPS_NS_9Segment64EPvj +__ZN12IODMACommand14OutputLittle32EPS_NS_9Segment64EPvj +__ZN12IODMACommand14OutputLittle64EPS_NS_9Segment64EPvj +__ZN12IODMACommand15genIOVMSegmentsEPFbPS_NS_9Segment64EPvjEPyS2_Pj +__ZN12IODMACommand15genIOVMSegmentsEPyPvPj +__ZN12IODMACommand17withSpecificationEPFbPS_NS_9Segment64EPvjEhyNS_14MappingOptionsEyjP8IOMapperS2_ +__ZN12IODMACommand21initWithSpecificationEPFbPS_NS_9Segment64EPvjEhyNS_14MappingOptionsEyjP8IOMapperS2_ +__ZN12IODMACommand24prepareWithSpecificationEPFbPS_NS_9Segment64EPvjEhyNS_14MappingOptionsEyjP8IOMapperyybb +__ZN12IODMACommand8transferEjyPvy +__ZN12IOUserClient12initWithTaskEP4taskPvj +__ZN12IOUserClient12initWithTaskEP4taskPvjP12OSDictionary +__ZN12IOUserClient15sendAsyncResultEPjiPPvj +__ZN12IOUserClient17mapClientMemory64EjP4taskjy +__ZN12IOUserClient17sendAsyncResult64EPyiS0_j +__ZN12IOUserClient19clientMemoryForTypeEjPjPP18IOMemoryDescriptor +__ZN12IOUserClient19setAsyncReference64EPyP8ipc_portyy +__ZN12IOUserClient22_RESERVEDIOUserClient0Ev +__ZN12IOUserClient22_RESERVEDIOUserClient1Ev +__ZN12IOUserClient23getExternalTrapForIndexEj +__ZN12IOUserClient24getNotificationSemaphoreEjPP9semaphore +__ZN12IOUserClient24getTargetAndTrapForIndexEPP9IOServicej +__ZN12IOUserClient24registerNotificationPortEP8ipc_portjj +__ZN12IOUserClient24registerNotificationPortEP8ipc_portjy +__ZN12IOUserClient25getExternalMethodForIndexEj +__ZN12IOUserClient26getTargetAndMethodForIndexEPP9IOServicej +__ZN12IOUserClient30getExternalAsyncMethodForIndexEj +__ZN12IOUserClient31getAsyncTargetAndMethodForIndexEPP9IOServicej +__ZN13IOCommandGate12commandSleepEPvj +__ZN13IOCommandGate12commandSleepEPvyj +__ZN13IOCommandGate23_RESERVEDIOCommandGate0Ev +__ZN13IOCommandPool11commandPoolEP9IOServiceP10IOWorkLoopj +__ZN13IOCommandPool4initEP9IOServiceP10IOWorkLoopj +__ZN13IOEventSource9sleepGateEPvj +__ZN13IOEventSource9sleepGateEPvyj +__ZN13_IOServiceJob8startJobEP9IOServiceij +__ZN14IODeviceMemory12withSubRangeEPS_yy +__ZN14IODeviceMemory13arrayFromListEPNS_11InitElementEj +__ZN14IODeviceMemory9withRangeEyy +__ZN14IOMemoryCursor17withSpecificationEPFvNS_15PhysicalSegmentEPvjEyyy +__ZN14IOMemoryCursor19genPhysicalSegmentsEP18IOMemoryDescriptoryPvjjPy +__ZN14IOMemoryCursor21initWithSpecificationEPFvNS_15PhysicalSegmentEPvjEyyy +__ZN14IOPMrootDomain17setSleepSupportedEj +__ZN14IOPMrootDomain19sysPowerDownHandlerEPvS0_jP9IOServiceS0_m +__ZN14IOPMrootDomain24receivePowerNotificationEj +__ZN14IOPMrootDomain27displayWranglerNotificationEPvS0_jP9IOServiceS0_m +__ZN15IODMAController13getControllerEP9IOServicej +__ZN15IODMAController16notifyDMACommandEP16IODMAEventSourceP12IODMACommandiy +__ZN15IODMAController20createControllerNameEj +__ZN15IODMAController21registerDMAControllerEj +__ZN15IORegistryEntry25_RESERVEDIORegistryEntry0Ev +__ZN15IORegistryEntry25_RESERVEDIORegistryEntry1Ev +__ZN15IORegistryEntry25_RESERVEDIORegistryEntry2Ev +__ZN15IORegistryEntry25_RESERVEDIORegistryEntry3Ev +__ZN15IORegistryEntry25_RESERVEDIORegistryEntry4Ev +__ZN15IORegistryEntry25_RESERVEDIORegistryEntry5Ev +__ZN16IODMAEventSource14dmaEventSourceEP8OSObjectP9IOServicePFvS1_PS_P12IODMACommandiyES8_j +__ZN16IODMAEventSource15startDMACommandEP12IODMACommandjyy +__ZN16IODMAEventSource16notifyDMACommandEP12IODMACommandiy +__ZN16IODMAEventSource4initEP8OSObjectP9IOServicePFvS1_PS_P12IODMACommandiyES8_j +__ZN16IOKitDiagnostics12updateOffsetEP12OSDictionaryjPKc +__ZN16IORangeAllocator10deallocateEyy +__ZN16IORangeAllocator12allocElementEj +__ZN16IORangeAllocator13allocateRangeEyy +__ZN16IORangeAllocator14deallocElementEj +__ZN16IORangeAllocator28setFragmentCapacityIncrementEj +__ZN16IORangeAllocator4initEyyjj +__ZN16IORangeAllocator8allocateEyPyy +__ZN16IORangeAllocator9withRangeEyyjj +__ZN17IOBigMemoryCursor13outputSegmentEN14IOMemoryCursor15PhysicalSegmentEPvj +__ZN17IOBigMemoryCursor17withSpecificationEyyy +__ZN17IOBigMemoryCursor21initWithSpecificationEyyy +__ZN17IOSharedDataQueue11withEntriesEjj +__ZN17IOSharedDataQueue12withCapacityEj +__ZN17IOSharedDataQueue16initWithCapacityEj +__ZN17IOSharedDataQueue7dequeueEPvPj +__ZN18IOMemoryDescriptor10setMappingEP4taskyj +__ZN18IOMemoryDescriptor10writeBytesEyPKvy +__ZN18IOMemoryDescriptor11makeMappingEPS_P4taskyjyy +__ZN18IOMemoryDescriptor11withAddressEPvyj +__ZN18IOMemoryDescriptor11withOptionsEPvjjP4taskjP8IOMapper +__ZN18IOMemoryDescriptor12setPurgeableEjPj +__ZN18IOMemoryDescriptor15initWithOptionsEPvjjP4taskjP8IOMapper +__ZN18IOMemoryDescriptor16performOperationEjyy +__ZN18IOMemoryDescriptor16withAddressRangeEyyjP4task +__ZN18IOMemoryDescriptor17withAddressRangesEP14IOVirtualRangejjP4task +__ZN18IOMemoryDescriptor19createMappingInTaskEP4taskyjyy +__ZN18IOMemoryDescriptor19withPhysicalAddressEyyj +__ZN18IOMemoryDescriptor28_RESERVEDIOMemoryDescriptor1Ev +__ZN18IOMemoryDescriptor28_RESERVEDIOMemoryDescriptor2Ev +__ZN18IOMemoryDescriptor28_RESERVEDIOMemoryDescriptor3Ev +__ZN18IOMemoryDescriptor28_RESERVEDIOMemoryDescriptor4Ev +__ZN18IOMemoryDescriptor28_RESERVEDIOMemoryDescriptor5Ev +__ZN18IOMemoryDescriptor28_RESERVEDIOMemoryDescriptor6Ev +__ZN18IOMemoryDescriptor28_RESERVEDIOMemoryDescriptor7Ev +__ZN18IOMemoryDescriptor28_RESERVEDIOMemoryDescriptor8Ev +__ZN18IOMemoryDescriptor28_RESERVEDIOMemoryDescriptor9Ev +__ZN18IOMemoryDescriptor29_RESERVEDIOMemoryDescriptor10Ev +__ZN18IOMemoryDescriptor29_RESERVEDIOMemoryDescriptor11Ev +__ZN18IOMemoryDescriptor29_RESERVEDIOMemoryDescriptor12Ev +__ZN18IOMemoryDescriptor29_RESERVEDIOMemoryDescriptor13Ev +__ZN18IOMemoryDescriptor29_RESERVEDIOMemoryDescriptor14Ev +__ZN18IOMemoryDescriptor29_RESERVEDIOMemoryDescriptor15Ev +__ZN18IOMemoryDescriptor3mapEj +__ZN18IOMemoryDescriptor5doMapEP7_vm_mapPyjyy +__ZN18IOMemoryDescriptor6setTagEj +__ZN18IOMemoryDescriptor7doUnmapEP7_vm_mapyy +__ZN18IOMemoryDescriptor9readBytesEyPvy +__ZN18IORegistryIterator11iterateOverEP15IORegistryEntryPK15IORegistryPlanej +__ZN18IORegistryIterator11iterateOverEPK15IORegistryPlanej +__ZN18IOTimerEventSource10setTimeoutEjj +__ZN18IOTimerEventSource10setTimeoutEy +__ZN18IOTimerEventSource10wakeAtTimeEjj +__ZN18IOTimerEventSource10wakeAtTimeEy +__ZN18IOTimerEventSource12setTimeoutMSEj +__ZN18IOTimerEventSource12setTimeoutUSEj +__ZN18IOTimerEventSource12wakeAtTimeMSEj +__ZN18IOTimerEventSource12wakeAtTimeUSEj +__ZN18IOTimerEventSource15setTimeoutTicksEj +__ZN18IOTimerEventSource15wakeAtTimeTicksEj +__ZN20IOLittleMemoryCursor13outputSegmentEN14IOMemoryCursor15PhysicalSegmentEPvj +__ZN20IOLittleMemoryCursor17withSpecificationEyyy +__ZN20IOLittleMemoryCursor21initWithSpecificationEyyy +__ZN20RootDomainUserClient15setPreventativeEjj +__ZN20RootDomainUserClient26getTargetAndMethodForIndexEPP9IOServicej +__ZN21IOInterruptController10initVectorEiP17IOInterruptVector +__ZN21IOInterruptController11causeVectorEiP17IOInterruptVector +__ZN21IOInterruptController12enableVectorEiP17IOInterruptVector +__ZN21IOInterruptController13getVectorTypeEiP17IOInterruptVector +__ZN21IOInterruptController17disableVectorHardEiP17IOInterruptVector +__ZN21IOInterruptController17vectorCanBeSharedEiP17IOInterruptVector +__ZN21IONaturalMemoryCursor13outputSegmentEN14IOMemoryCursor15PhysicalSegmentEPvj +__ZN21IONaturalMemoryCursor17withSpecificationEyyy +__ZN21IONaturalMemoryCursor21initWithSpecificationEyyy +__ZN21IOSubMemoryDescriptor11makeMappingEP18IOMemoryDescriptorP4taskyjyy +__ZN21IOSubMemoryDescriptor12initSubRangeEP18IOMemoryDescriptoryyj +__ZN21IOSubMemoryDescriptor12setPurgeableEjPj +__ZN21IOSubMemoryDescriptor12withSubRangeEP18IOMemoryDescriptoryyj +__ZN21IOSubMemoryDescriptor18getPhysicalSegmentEyPyj +__ZN21IOSubMemoryDescriptor7prepareEj +__ZN21IOSubMemoryDescriptor8completeEj +__ZN23IOMultiMemoryDescriptor15withDescriptorsEPP18IOMemoryDescriptorjjb +__ZN23IOMultiMemoryDescriptor19initWithDescriptorsEPP18IOMemoryDescriptorjjb +__ZN23IOMultiMemoryDescriptor7prepareEj +__ZN23IOMultiMemoryDescriptor8completeEj +__ZN24IOBufferMemoryDescriptor11appendBytesEPKvm +__ZN24IOBufferMemoryDescriptor11withOptionsEjmm +__ZN24IOBufferMemoryDescriptor12setDirectionEj +__ZN24IOBufferMemoryDescriptor12withCapacityEmjb +__ZN24IOBufferMemoryDescriptor14getBytesNoCopyEmm +__ZN24IOBufferMemoryDescriptor17inTaskWithOptionsEP4taskjmm +__ZN24IOBufferMemoryDescriptor20initWithPhysicalMaskEP4taskjyyy +__ZN24IOBufferMemoryDescriptor22inTaskWithPhysicalMaskEP4taskjyy +__ZN24IOBufferMemoryDescriptor34_RESERVEDIOBufferMemoryDescriptor0Ev +__ZN24IOBufferMemoryDescriptor34_RESERVEDIOBufferMemoryDescriptor1Ev +__ZN24IOBufferMemoryDescriptor34_RESERVEDIOBufferMemoryDescriptor2Ev +__ZN24IOBufferMemoryDescriptor34_RESERVEDIOBufferMemoryDescriptor3Ev +__ZN24IOBufferMemoryDescriptor34_RESERVEDIOBufferMemoryDescriptor4Ev +__ZN24IOBufferMemoryDescriptor34_RESERVEDIOBufferMemoryDescriptor5Ev +__ZN24IOBufferMemoryDescriptor34_RESERVEDIOBufferMemoryDescriptor6Ev +__ZN24IOBufferMemoryDescriptor34_RESERVEDIOBufferMemoryDescriptor7Ev +__ZN24IOBufferMemoryDescriptor34_RESERVEDIOBufferMemoryDescriptor8Ev +__ZN24IOBufferMemoryDescriptor34_RESERVEDIOBufferMemoryDescriptor9Ev +__ZN24IOBufferMemoryDescriptor35_RESERVEDIOBufferMemoryDescriptor10Ev +__ZN24IOBufferMemoryDescriptor35_RESERVEDIOBufferMemoryDescriptor11Ev +__ZN24IOBufferMemoryDescriptor35_RESERVEDIOBufferMemoryDescriptor12Ev +__ZN24IOBufferMemoryDescriptor35_RESERVEDIOBufferMemoryDescriptor13Ev +__ZN24IOBufferMemoryDescriptor35_RESERVEDIOBufferMemoryDescriptor14Ev +__ZN24IOBufferMemoryDescriptor35_RESERVEDIOBufferMemoryDescriptor15Ev +__ZN24IOBufferMemoryDescriptor9setLengthEm +__ZN24IOBufferMemoryDescriptor9withBytesEPKvmjb +__ZN25IOGeneralMemoryDescriptor11wireVirtualEj +__ZN25IOGeneralMemoryDescriptor12setPurgeableEjPj +__ZN25IOGeneralMemoryDescriptor15initWithOptionsEPvjjP4taskjP8IOMapper +__ZN25IOGeneralMemoryDescriptor18getPhysicalSegmentEyPyj +__ZN25IOGeneralMemoryDescriptor5doMapEP7_vm_mapPyjyy +__ZN25IOGeneralMemoryDescriptor7doUnmapEP7_vm_mapyy +__ZN25IOGeneralMemoryDescriptor7prepareEj +__ZN25IOGeneralMemoryDescriptor8completeEj +__ZN29IOInterleavedMemoryDescriptor12withCapacityEyj +__ZN29IOInterleavedMemoryDescriptor16initWithCapacityEyj +__ZN29IOInterleavedMemoryDescriptor19setMemoryDescriptorEP18IOMemoryDescriptoryy +__ZN29IOInterleavedMemoryDescriptor22clearMemoryDescriptorsEj +__ZN29IOInterleavedMemoryDescriptor7prepareEj +__ZN29IOInterleavedMemoryDescriptor8completeEj +__ZN8IOMapper10allocTableEy +__ZN8IOMapper10iovmInsertEjjP13upl_page_infoj +__ZN8IOMapper10iovmInsertEjjPjj +__ZN8IOMapper11NewARTTableEyPPvPj +__ZN8IOMapper12FreeARTTableEP6OSDatay +__ZN8IOMapper18iovmFreeDMACommandEP12IODMACommandjj +__ZN8IOMapper19iovmAllocDMACommandEP12IODMACommandj +__ZN9IOService10adjustBusyEi +__ZN9IOService10handleOpenEPS_jPv +__ZN9IOService11_adjustBusyEi +__ZN9IOService11handleCloseEPS_j +__ZN9IOService12didTerminateEPS_jPb +__ZN9IOService12requestProbeEj +__ZN9IOService13messageClientEjP8OSObjectPvm +__ZN9IOService13newUserClientEP4taskPvjP12OSDictionaryPP12IOUserClient +__ZN9IOService13newUserClientEP4taskPvjPP12IOUserClient +__ZN9IOService13startMatchingEj +__ZN9IOService13waitMatchIdleEj +__ZN9IOService13willTerminateEPS_j +__ZN9IOService14actionFinalizeEPS_j +__ZN9IOService14doServiceMatchEj +__ZN9IOService14messageClientsEjPvm +__ZN9IOService15addNotificationEPK8OSSymbolP12OSDictionaryPFbPvS5_PS_ES5_S5_i +__ZN9IOService15nextIdleTimeoutEyyj +__ZN9IOService15registerServiceEj +__ZN9IOService15terminateClientEPS_j +__ZN9IOService15terminatePhase1Ej +__ZN9IOService15terminateWorkerEj +__ZN9IOService16registerInterestEPK8OSSymbolPFiPvS3_jPS_S3_mES3_S3_ +__ZN9IOService16requestTerminateEPS_j +__ZN9IOService16setCPUSnoopDelayEj +__ZN9IOService18actionDidTerminateEPS_j +__ZN9IOService18doServiceTerminateEj +__ZN9IOService18matchPropertyTableEP12OSDictionaryPi +__ZN9IOService18requireMaxBusStallEj +__ZN9IOService18systemWillShutdownEj +__ZN9IOService19_RESERVEDIOService0Ev +__ZN9IOService19_RESERVEDIOService1Ev +__ZN9IOService19_RESERVEDIOService2Ev +__ZN9IOService19_RESERVEDIOService3Ev +__ZN9IOService19_RESERVEDIOService4Ev +__ZN9IOService19_RESERVEDIOService5Ev +__ZN9IOService19actionWillTerminateEPS_jP7OSArray +__ZN9IOService19deliverNotificationEPK8OSSymboljj +__ZN9IOService23acknowledgeNotificationEPvj +__ZN9IOService23addMatchingNotificationEPK8OSSymbolP12OSDictionaryPFbPvS5_PS_P10IONotifierES5_S5_i +__ZN9IOService23scheduleTerminatePhase2Ej +__ZN9IOService24mapDeviceMemoryWithIndexEjj +__ZN9IOService4openEPS_jPv +__ZN9IOService5closeEPS_j +__ZN9IOService5probeEPS_Pi +__ZN9IOService7messageEjPS_Pv +__ZN9IOService8finalizeEj +__ZN9IOService9terminateEj +__ZNK15IORegistryEntry11getPropertyEPK8OSStringPK15IORegistryPlanej +__ZNK15IORegistryEntry11getPropertyEPK8OSSymbolPK15IORegistryPlanej +__ZNK15IORegistryEntry11getPropertyEPKcPK15IORegistryPlanej +__ZNK15IORegistryEntry12copyPropertyEPK8OSStringPK15IORegistryPlanej +__ZNK15IORegistryEntry12copyPropertyEPK8OSSymbolPK15IORegistryPlanej +__ZNK15IORegistryEntry12copyPropertyEPKcPK15IORegistryPlanej +__ZNK18IOMemoryDescriptor19dmaCommandOperationEjPvj +__ZNK25IOGeneralMemoryDescriptor19dmaCommandOperationEjPvj diff --git a/config/IPFirewall.kext/Info.plist b/config/IPFirewall.kext/Info.plist deleted file mode 100644 index 7f415cdbd..000000000 --- a/config/IPFirewall.kext/Info.plist +++ /dev/null @@ -1,26 +0,0 @@ - - - - - CFBundleDevelopmentRegion - English - CFBundleGetInfoString - IP network firewall - CFBundleIdentifier - com.apple.nke.IPFirewall - CFBundleInfoDictionaryVersion - 6.0 - CFBundleName - IP Firewall - CFBundlePackageType - KEXT - CFBundleShortVersionString - 2.0 - CFBundleSignature - ipfw - CFBundleVersion - 2.0 - OSKernelResource - - - diff --git a/config/Libkern.exports b/config/Libkern.exports index 8438edd9c..2e7ff44dd 100644 --- a/config/Libkern.exports +++ b/config/Libkern.exports @@ -1,3 +1,4 @@ +___bzero:_bzero _Assert _MD5Final _MD5Init @@ -16,6 +17,7 @@ _OSBitXorAtomic _OSBitXorAtomic16 _OSBitXorAtomic8 _OSCompareAndSwap +_OSCompareAndSwapPtr _OSDecrementAtomic _OSDecrementAtomic16 _OSDecrementAtomic8 @@ -23,16 +25,17 @@ _OSFree _OSIncrementAtomic _OSIncrementAtomic16 _OSIncrementAtomic8 +_OSKextCancelRequest +_OSKextLoadKextWithIdentifier +_OSKextReleaseKextWithLoadTag +_OSKextRequestResource +_OSKextRetainKextWithLoadTag _OSMalloc _OSMalloc_Tagalloc _OSMalloc_Tagfree _OSMalloc_noblock _OSMalloc_nowait _OSReportWithBacktrace -_OSRuntimeFinalizeCPP -_OSRuntimeInitializeCPP -_OSRuntimeUnloadCPP -_OSRuntimeUnloadCPPForSegment _OSTestAndClear _OSTestAndSet _OSUnserializechar @@ -42,6 +45,7 @@ _OSlibkernInit _SHA1Final _SHA1Init _SHA1Update +_STRDUP __Z13OSUnserializePKcPP8OSString __Z16OSUnserializeXMLPKcPP8OSString __ZN10OSIterator10gMetaClassE @@ -183,14 +187,12 @@ __ZN12OSOrderedSet10setOptionsEjjPv __ZN12OSOrderedSet10superClassE __ZN12OSOrderedSet11orderObjectEPK15OSMetaClassBase __ZN12OSOrderedSet12removeObjectEPK15OSMetaClassBase -__ZN12OSOrderedSet12withCapacityEjPFlPK15OSMetaClassBaseS2_PvES3_ __ZN12OSOrderedSet13setLastObjectEPK15OSMetaClassBase __ZN12OSOrderedSet14copyCollectionEP12OSDictionary __ZN12OSOrderedSet14ensureCapacityEj __ZN12OSOrderedSet14getOrderingRefEv __ZN12OSOrderedSet14setFirstObjectEPK15OSMetaClassBase __ZN12OSOrderedSet15flushCollectionEv -__ZN12OSOrderedSet16initWithCapacityEjPFlPK15OSMetaClassBaseS2_PvES3_ __ZN12OSOrderedSet20setCapacityIncrementEj __ZN12OSOrderedSet22_RESERVEDOSOrderedSet0Ev __ZN12OSOrderedSet22_RESERVEDOSOrderedSet1Ev @@ -317,7 +319,6 @@ __ZN6OSData12withCapacityEj __ZN6OSData13initWithBytesEPKvj __ZN6OSData14ensureCapacityEj __ZN6OSData15withBytesNoCopyEPvj -__ZN6OSData16_RESERVEDOSData0Ev __ZN6OSData16_RESERVEDOSData1Ev __ZN6OSData16_RESERVEDOSData2Ev __ZN6OSData16_RESERVEDOSData3Ev @@ -326,6 +327,7 @@ __ZN6OSData16_RESERVEDOSData5Ev __ZN6OSData16_RESERVEDOSData6Ev __ZN6OSData16_RESERVEDOSData7Ev __ZN6OSData16initWithCapacityEj +__ZN6OSData18setDeallocFunctionEPFvPvjE __ZN6OSData19initWithBytesNoCopyEPvj __ZN6OSData20setCapacityIncrementEj __ZN6OSData4freeEv @@ -676,6 +678,7 @@ __ZNK9OSBoolean9MetaClass5allocEv __ZNK9OSBoolean9isEqualToEPK15OSMetaClassBase __ZNK9OSBoolean9isEqualToEPKS_ __ZNK9OSBoolean9serializeEP11OSSerialize +__ZSt20__throw_length_errorPKc __ZTV10OSIterator __ZTV11OSMetaClass __ZTV11OSSerialize @@ -710,9 +713,13 @@ __ZTVN8OSObject9MetaClassE __ZTVN8OSString9MetaClassE __ZTVN8OSSymbol9MetaClassE __ZTVN9OSBoolean9MetaClassE +__ZdaPv __ZdlPv +__Znam __Znwm ___cxa_pure_virtual +___stack_chk_fail +___stack_chk_guard _adler32 _atoi _bcmp @@ -771,8 +778,8 @@ _lck_mtx_destroy _lck_mtx_free _lck_mtx_init _lck_mtx_lock -_lck_mtx_unlock _lck_mtx_try_lock +_lck_mtx_unlock _lck_rw_alloc_init _lck_rw_destroy _lck_rw_free @@ -791,12 +798,14 @@ _lck_spin_destroy _lck_spin_free _lck_spin_init _lck_spin_lock +_lck_spin_try_lock _lck_spin_unlock _memcmp _memcpy _memmove _memset _ml_at_interrupt_context +_ml_get_interrupts_enabled _ml_set_interrupts_enabled _osrelease _ostype @@ -813,15 +822,14 @@ _sscanf _strcasecmp _strchr _strcmp -_STRDUP +_strlcat +_strlcpy _strlen _strncasecmp _strncat _strncmp _strncpy _strnlen -_strlcat -_strlcpy _strprefix _strtol _strtoq diff --git a/config/Libkern.i386.exports b/config/Libkern.i386.exports index d7f49b799..31d172284 100644 --- a/config/Libkern.i386.exports +++ b/config/Libkern.i386.exports @@ -1,5 +1,12 @@ -_OSCompareAndSwap64 +_lck_mtx_unlock_darwin10 _OSAddAtomic64 -_strcpy -_strcat +_OSCompareAndSwap64 +_OSRuntimeFinalizeCPP +_OSRuntimeInitializeCPP +_OSRuntimeUnloadCPP +_OSRuntimeUnloadCPPForSegment +__ZN12OSOrderedSet12withCapacityEjPFlPK15OSMetaClassBaseS2_PvES3_ +__ZN12OSOrderedSet16initWithCapacityEjPFlPK15OSMetaClassBaseS2_PvES3_ _sprintf +_strcat +_strcpy diff --git a/config/Libkern.ppc.exports b/config/Libkern.ppc.exports index 4531e8434..ebf87f219 100644 --- a/config/Libkern.ppc.exports +++ b/config/Libkern.ppc.exports @@ -1,5 +1,11 @@ _OSDequeueAtomic _OSEnqueueAtomic +_OSRuntimeFinalizeCPP +_OSRuntimeInitializeCPP +_OSRuntimeUnloadCPP +_OSRuntimeUnloadCPPForSegment +__ZN12OSOrderedSet12withCapacityEjPFlPK15OSMetaClassBaseS2_PvES3_ +__ZN12OSOrderedSet16initWithCapacityEjPFlPK15OSMetaClassBaseS2_PvES3_ __ZN8OSObject19_RESERVEDOSObject16Ev __ZN8OSObject19_RESERVEDOSObject17Ev __ZN8OSObject19_RESERVEDOSObject18Ev @@ -18,6 +24,6 @@ __ZN8OSObject19_RESERVEDOSObject30Ev __ZN8OSObject19_RESERVEDOSObject31Ev _bcopy_nc _bzero_nc -_strcpy -_strcat _sprintf +_strcat +_strcpy diff --git a/config/Libkern.x86_64.exports b/config/Libkern.x86_64.exports new file mode 100644 index 000000000..639d10368 --- /dev/null +++ b/config/Libkern.x86_64.exports @@ -0,0 +1,8 @@ +_OSAddAtomic64 +_OSCompareAndSwap64 +__ZN12OSOrderedSet12withCapacityEjPFiPK15OSMetaClassBaseS2_PvES3_ +__ZN12OSOrderedSet16initWithCapacityEjPFiPK15OSMetaClassBaseS2_PvES3_ +_gOSKextUnresolved +_sprintf +_strcat +_strcpy diff --git a/config/MACFramework.exports b/config/MACFramework.exports index 1d551bebf..cba6d7dae 100644 --- a/config/MACFramework.exports +++ b/config/MACFramework.exports @@ -3,6 +3,8 @@ _mac_policy_unregister _mac_vnop_getxattr _mac_vnop_setxattr _mac_vnop_removexattr +_mac_label_get +_mac_label_set _mac_audit_text diff --git a/config/MACFramework.x86_64.exports b/config/MACFramework.x86_64.exports new file mode 100644 index 000000000..6006136b4 --- /dev/null +++ b/config/MACFramework.x86_64.exports @@ -0,0 +1,9 @@ +_kau_will_audit +_mac_kalloc +_mac_kalloc_noblock +_mac_kfree +_mac_mbuf_alloc +_mac_mbuf_free +_mac_unwire +_mac_wire +_sysctl__security_mac_children diff --git a/config/Mach.exports b/config/Mach.exports index 48e542931..eb47c7d62 100644 --- a/config/Mach.exports +++ b/config/Mach.exports @@ -23,7 +23,8 @@ _lck_rw_sleep_deadline _lck_spin_sleep _lck_spin_sleep_deadline _mach_absolute_time -_mach_msg_send_from_kernel +_mach_msg_send_from_kernel_proper +_mach_vm_pressure_monitor _nanoseconds_to_absolutetime _preemption_enabled _semaphore_create @@ -32,8 +33,9 @@ _semaphore_destroy _semaphore_reference _semaphore_signal _semaphore_signal_all -_semaphore_timedwait _semaphore_wait +_semaphore_wait_deadline +_semaphore_wait_noblock _task_deallocate _task_reference _thread_block diff --git a/config/Mach.i386.exports b/config/Mach.i386.exports index 09d50d0f1..8b6d67bba 100644 --- a/config/Mach.i386.exports +++ b/config/Mach.i386.exports @@ -1,2 +1,4 @@ _host_vmxon _host_vmxoff +_mach_msg_send_from_kernel +_semaphore_timedwait diff --git a/config/Mach.ppc.exports b/config/Mach.ppc.exports index e69de29bb..cc31a814e 100644 --- a/config/Mach.ppc.exports +++ b/config/Mach.ppc.exports @@ -0,0 +1 @@ +_semaphore_timedwait diff --git a/config/Mach.x86_64.exports b/config/Mach.x86_64.exports new file mode 100644 index 000000000..09d50d0f1 --- /dev/null +++ b/config/Mach.x86_64.exports @@ -0,0 +1,2 @@ +_host_vmxon +_host_vmxoff diff --git a/config/Makefile b/config/Makefile index db72f8acd..c28573b0a 100644 --- a/config/Makefile +++ b/config/Makefile @@ -17,6 +17,8 @@ INSTINC_SUBDIRS_PPC = INSTINC_SUBDIRS_I386 = +INSTINC_SUBDIRS_X86_64 = + INSTINC_SUBDIRS_ARM = EXPINC_SUBDIRS = @@ -25,6 +27,8 @@ EXPINC_SUBDIRS_PPC = EXPINC_SUBDIRS_I386 = +EXPINC_SUBDIRS_X86_64 = + EXPINC_SUBDIRS_ARM = COMP_SUBDIRS = @@ -33,7 +37,6 @@ INST_SUBDIRS = INSTALL_DATA_LIST= \ - IPFirewall.kext/Info.plist \ System.kext/Info.plist \ System.kext/PlugIns/Libkern.kext/Info.plist \ System.kext/PlugIns/Mach.kext/Info.plist \ @@ -45,6 +48,7 @@ INSTALL_DATA_LIST= \ System.kext/PlugIns/IONVRAMFamily.kext/Info.plist \ System.kext/PlugIns/IOSystemManagement.kext/Info.plist \ System.kext/PlugIns/Unsupported.kext/Info.plist \ + System.kext/PlugIns/Private.kext/Info.plist \ \ System.kext/PlugIns/System6.0.kext/Info.plist \ System.kext/PlugIns/Libkern6.0.kext/Info.plist \ @@ -57,10 +61,8 @@ INSTALL_DATA_DIR= \ INSTMAN_SUBDIRS = -# - -KEXT_CREATE_SYMBOL_SET = /usr/local/bin/kextsymboltool -NEWVERS = $(SRCROOT)/config/newvers.pl +MD_SUPPORTED_KPI_FILENAME="SupportedKPIs-${ARCH_CONFIG_LC}.txt" +MI_SUPPORTED_KPI_FILENAME="SupportedKPIs-all-archs.txt" ifneq ($(MACHINE_CONFIG), DEFAULT) OBJPATH = $(OBJROOT)/$(KERNEL_CONFIG)_$(ARCH_CONFIG)_$(MACHINE_CONFIG) @@ -68,13 +70,22 @@ else OBJPATH = $(OBJROOT)/$(KERNEL_CONFIG)_$(ARCH_CONFIG) endif +ifeq ($(ARCH_CONFIG),I386) +SUPPORT_SYSTEM60_KEXT = 1 +else ifeq ($(ARCH_CONFIG),ARM) +SUPPORT_SYSTEM60_KEXT = 1 +else +SUPPORT_SYSTEM60_KEXT = 0 +endif + SYMBOL_COMPONENT_LIST = \ System6.0 \ BSDKernel \ IOKit \ Libkern \ Mach \ - Unsupported + Unsupported \ + Private ifdef MAC SYMBOL_COMPONENT_LIST += MACFramework @@ -89,23 +100,48 @@ SYMBOL_SET_FAT = $(foreach set, $(SYMBOL_COMPONENT_LIST), $(OBJROOT)/$(set).symb ## .SUFFIXES: .symbolset .symbollist $(OBJPATH)/allsymbols: $(OBJPATH)/mach_kernel - $(_v)nm -gj $< > $@ + $(_v)$(NM) -gj $< > $@ $(SYMBOL_SET_BUILD): $(OBJPATH)/%.symbolset : %.exports %.$(ARCH_CONFIG_LC).exports $(OBJPATH)/allsymbols - $(_v)$(KEXT_CREATE_SYMBOL_SET) \ + $(_v)if [ "$*" != System6.0 -o $(SUPPORT_SYSTEM60_KEXT) -eq 1 ]; then \ + $(KEXT_CREATE_SYMBOL_SET) \ $($(addsuffix $(ARCH_CONFIG),ARCH_FLAGS_ALL_)) \ -import $(OBJPATH)/allsymbols \ -export $*.exports \ -export $*.$(ARCH_CONFIG_LC).exports \ - -output $@; - -ifneq ($(MACHINE_CONFIG), DEFAULT) -$(SYMBOL_SET_FAT): $(OBJROOT)/%.symbolset : $(foreach arch, $(INSTALL_ARCHS), $(OBJROOT)/$(KERNEL_CONFIG)_$(arch)_$(MACHINE_CONFIG)/%.symbolset) - $(LIPO) $(foreach arch, $(INSTALL_ARCHS), $(OBJROOT)/$(KERNEL_CONFIG)_$(arch)_$(MACHINE_CONFIG)/$*.symbolset) -create -output $@; -else -$(SYMBOL_SET_FAT): $(OBJROOT)/%.symbolset : $(foreach arch, $(INSTALL_ARCHS), $(OBJROOT)/$(KERNEL_CONFIG)_$(arch)/%.symbolset) - $(_v)$(LIPO) $(foreach arch, $(INSTALL_ARCHS), $(OBJROOT)/$(KERNEL_CONFIG)_$(arch)/$*.symbolset) -create -output $@; -endif + -output $@ $(_vstdout); \ + fi + $(_v)if [ ! -e $@ ]; then \ + printf "" > $@; \ + fi + +# When building the top-level install target for all architectures, aggregate the per-arch +# symbolsets for the primary machine configuration for each kernel/arch config +$(SYMBOL_SET_FAT): $(OBJROOT)/%.symbolset : + $(_v)per_arch_symbolsets=""; \ + kernel_config=$(INSTALL_TYPE); \ + machine_config=$(MACHINE_CONFIG); \ + for arch_config in $(INSTALL_ARCHS); \ + do \ + if [ $${arch_config} = ARM ] ; then \ + if [ $${machine_config} = DEFAULT ] ; then \ + machine_config=$(DEFAULT_ARM_MACHINE_CONFIG); \ + fi; \ + fi; \ + if [ $${machine_config} = DEFAULT ] ; then \ + objpath=${OBJROOT}/$${kernel_config}_$${arch_config}; \ + else \ + objpath=${OBJROOT}/$${kernel_config}_$${arch_config}_$${machine_config}; \ + fi; \ + if [ -s "$${objpath}/$*.symbolset" ]; then \ + per_arch_symbolsets="$${per_arch_symbolsets} $${objpath}/$*.symbolset"; \ + fi; \ + done; \ + if [ -n "$${per_arch_symbolsets}" ]; then \ + $(LIPO) $${per_arch_symbolsets} -create -output $@; \ + else \ + printf "" > $@; \ + fi build_symbol_sets: $(SYMBOL_SET_BUILD) $(_v)$(KEXT_CREATE_SYMBOL_SET) \ @@ -122,33 +158,68 @@ build_symbol_sets: $(SYMBOL_SET_BUILD) $(MACFRAMEWORKEXPORTS) \ -export $(SRCROOT)/$(COMPONENT)/Unsupported.exports \ -export $(SRCROOT)/$(COMPONENT)/Unsupported.$(ARCH_CONFIG_LC).exports \ - -output /dev/null; + -output /dev/null $(_vstdout); + $(_v)$(KEXT_CREATE_SYMBOL_SET) \ + $($(addsuffix $(ARCH_CONFIG),ARCH_FLAGS_)) \ + -import $(OBJPATH)/allsymbols \ + -export $(SRCROOT)/$(COMPONENT)/Libkern.exports \ + -export $(SRCROOT)/$(COMPONENT)/Libkern.$(ARCH_CONFIG_LC).exports \ + -export $(SRCROOT)/$(COMPONENT)/Mach.exports \ + -export $(SRCROOT)/$(COMPONENT)/Mach.$(ARCH_CONFIG_LC).exports \ + -export $(SRCROOT)/$(COMPONENT)/IOKit.exports \ + -export $(SRCROOT)/$(COMPONENT)/IOKit.$(ARCH_CONFIG_LC).exports \ + -export $(SRCROOT)/$(COMPONENT)/BSDKernel.exports \ + -export $(SRCROOT)/$(COMPONENT)/BSDKernel.$(ARCH_CONFIG_LC).exports \ + $(MACFRAMEWORKEXPORTS) \ + -export $(SRCROOT)/$(COMPONENT)/Private.exports \ + -export $(SRCROOT)/$(COMPONENT)/Private.$(ARCH_CONFIG_LC).exports \ + -output /dev/null $(_vstdout); + $(_v) $(SRCROOT)/$(COMPONENT)/list_supported.sh $(SRCROOT)/$(COMPONENT) $(ARCH_CONFIG_LC) $(OBJPATH)/${MD_SUPPORTED_KPI_FILENAME}; + $(_v)if [ -n `echo $${ARCH_CONFIGS%%\ *} | grep -i $(ARCH_CONFIG)` ]; \ + then \ + $(SRCROOT)/$(COMPONENT)/list_supported.sh $(SRCROOT)/$(COMPONENT) "ALL" $(OBJROOT)/${MI_SUPPORTED_KPI_FILENAME}; \ + fi + install_symbol_sets: $(SYMBOL_SET_FAT) $(SRCROOT)/config/MasterVersion - $(_v)install $(INSTALL_FLAGS) $(OBJROOT)/System6.0.symbolset $(DSTROOT)/$(INSTALL_DATA_DIR)/System.kext/PlugIns/System6.0.kext/kernel.6.0; - $(_v)install $(INSTALL_FLAGS) $(OBJROOT)/BSDKernel.symbolset $(DSTROOT)/$(INSTALL_DATA_DIR)/System.kext/PlugIns/BSDKernel.kext/BSDKernel; -ifdef MAC - $(_v)install $(INSTALL_FLAGS) $(OBJROOT)/MACFramework.symbolset $(DSTROOT)/$(INSTALL_DATA_DIR)/System.kext/PlugIns/MACFramework.kext/MACFramework; -endif - $(_v)install $(INSTALL_FLAGS) $(OBJROOT)/IOKit.symbolset $(DSTROOT)/$(INSTALL_DATA_DIR)/System.kext/PlugIns/IOKit.kext/IOKit; - $(_v)install $(INSTALL_FLAGS) $(OBJROOT)/Libkern.symbolset $(DSTROOT)/$(INSTALL_DATA_DIR)/System.kext/PlugIns/Libkern.kext/Libkern; - $(_v)install $(INSTALL_FLAGS) $(OBJROOT)/Mach.symbolset $(DSTROOT)/$(INSTALL_DATA_DIR)/System.kext/PlugIns/Mach.kext/Mach; - $(_v)install $(INSTALL_FLAGS) $(OBJROOT)/Unsupported.symbolset $(DSTROOT)/$(INSTALL_DATA_DIR)/System.kext/PlugIns/Unsupported.kext/Unsupported; + $(_v)if [ -s "$(OBJROOT)/System6.0.symbolset" ]; then \ + install $(INSTALL_FLAGS) $(OBJROOT)/System6.0.symbolset $(DSTROOT)/$(INSTALL_DATA_DIR)/System.kext/PlugIns/System6.0.kext/kernel.6.0; \ + install $(INSTALL_FLAGS) $(OBJROOT)/System6.0.symbolset $(DSTROOT)/$(INSTALL_DATA_DIR)/System.kext/PlugIns/Mach6.0.kext/Mach6.0; \ + install $(INSTALL_FLAGS) $(OBJROOT)/System6.0.symbolset $(DSTROOT)/$(INSTALL_DATA_DIR)/System.kext/PlugIns/BSDKernel6.0.kext/BSDKernel6.0; \ + install $(INSTALL_FLAGS) $(OBJROOT)/System6.0.symbolset $(DSTROOT)/$(INSTALL_DATA_DIR)/System.kext/PlugIns/Libkern6.0.kext/Libkern6.0; \ + install $(INSTALL_FLAGS) $(OBJROOT)/System6.0.symbolset $(DSTROOT)/$(INSTALL_DATA_DIR)/System.kext/PlugIns/IOKit6.0.kext/IOKit6.0; \ + fi + $(_v)install $(INSTALL_FLAGS) $(OBJROOT)/BSDKernel.symbolset $(DSTROOT)/$(INSTALL_DATA_DIR)/System.kext/PlugIns/BSDKernel.kext/BSDKernel; + $(_v)install $(INSTALL_FLAGS) $(OBJROOT)/IOKit.symbolset $(DSTROOT)/$(INSTALL_DATA_DIR)/System.kext/PlugIns/IOKit.kext/IOKit; + $(_v)install $(INSTALL_FLAGS) $(OBJROOT)/Libkern.symbolset $(DSTROOT)/$(INSTALL_DATA_DIR)/System.kext/PlugIns/Libkern.kext/Libkern; + $(_v)install $(INSTALL_FLAGS) $(OBJROOT)/Mach.symbolset $(DSTROOT)/$(INSTALL_DATA_DIR)/System.kext/PlugIns/Mach.kext/Mach; + $(_v)install $(INSTALL_FLAGS) $(OBJROOT)/Unsupported.symbolset $(DSTROOT)/$(INSTALL_DATA_DIR)/System.kext/PlugIns/Unsupported.kext/Unsupported; + $(_v)install $(INSTALL_FLAGS) $(OBJROOT)/Private.symbolset $(DSTROOT)/$(INSTALL_DATA_DIR)/System.kext/PlugIns/Private.kext/Private; $(_v)$(NEWVERS) $(DSTROOT)/$(INSTALL_DATA_DIR)/System.kext/Info.plist \ $(DSTROOT)/$(INSTALL_DATA_DIR)/System.kext/PlugIns/AppleNMI.kext/Info.plist \ $(DSTROOT)/$(INSTALL_DATA_DIR)/System.kext/PlugIns/ApplePlatformFamily.kext/Info.plist \ $(DSTROOT)/$(INSTALL_DATA_DIR)/System.kext/PlugIns/BSDKernel.kext/Info.plist \ - $(DSTROOT)/$(INSTALL_DATA_DIR)/System.kext/PlugIns/MACFramework.kext/Info.plist \ $(DSTROOT)/$(INSTALL_DATA_DIR)/System.kext/PlugIns/IOKit.kext/Info.plist \ $(DSTROOT)/$(INSTALL_DATA_DIR)/System.kext/PlugIns/IONVRAMFamily.kext/Info.plist \ $(DSTROOT)/$(INSTALL_DATA_DIR)/System.kext/PlugIns/IOSystemManagement.kext/Info.plist \ $(DSTROOT)/$(INSTALL_DATA_DIR)/System.kext/PlugIns/Libkern.kext/Info.plist \ $(DSTROOT)/$(INSTALL_DATA_DIR)/System.kext/PlugIns/Mach.kext/Info.plist \ - $(DSTROOT)/$(INSTALL_DATA_DIR)/System.kext/PlugIns/Unsupported.kext/Info.plist + $(DSTROOT)/$(INSTALL_DATA_DIR)/System.kext/PlugIns/Unsupported.kext/Info.plist \ + $(DSTROOT)/$(INSTALL_DATA_DIR)/System.kext/PlugIns/Private.kext/Info.plist; + $(_v)$(MKDIR) $(DSTROOT)/$(KRESDIR); + $(_v)install $(INSTALL_FLAGS) $(OBJPATH)/$(MD_SUPPORTED_KPI_FILENAME) $(DSTROOT)/$(KRESDIR); + $(_v)if [ -n `echo $${ARCH_CONFIGS%%\ *} | grep -i $(ARCH_CONFIG)` ]; then \ + install $(INSTALL_FLAGS) $(OBJROOT)/$(MI_SUPPORTED_KPI_FILENAME) $(DSTROOT)/$(KRESDIR); \ + fi +ifdef MAC + $(_v)install $(INSTALL_FLAGS) $(OBJROOT)/MACFramework.symbolset $(DSTROOT)/$(INSTALL_DATA_DIR)/System.kext/PlugIns/MACFramework.kext/MACFramework; + $(_v)$(NEWVERS) $(DSTROOT)/$(INSTALL_DATA_DIR)/System.kext/PlugIns/MACFramework.kext/Info.plist +endif + $(_v)$(CP) -rf $(DSTROOT)/$(INSTALL_DATA_DIR)/System.kext $(SYMROOT) do_build_all: build_symbol_sets -do_build_install: install_symbol_sets +do_build_install: install_symbol_sets include $(MakeInc_rule) include $(MakeInc_dir) diff --git a/config/MasterVersion b/config/MasterVersion index 6ce3d2f9d..ec4aa8f99 100644 --- a/config/MasterVersion +++ b/config/MasterVersion @@ -1,4 +1,4 @@ -9.8.0 +10.0.0 # The first line of this file contains the master version number for the kernel. # All other instances of the kernel version in xnu are derived from this file. diff --git a/config/Private.exports b/config/Private.exports new file mode 100644 index 000000000..c1536e81a --- /dev/null +++ b/config/Private.exports @@ -0,0 +1,176 @@ +__ZN16IOPlatformExpert* +__ZNK16IOPlatformExpert* +__ZTV16IOPlatformExpert +__ZN18IODTPlatformExpert* +__ZNK18IODTPlatformExpert* +__ZTV18IODTPlatformExpert +__ZN5IOCPU* +__ZNK5IOCPU* +__ZTV5IOCPU +__ZN24IOCPUInterruptController* +__ZNK24IOCPUInterruptController* +__ZTV24IOCPUInterruptController +_b_to_q +_bdevsw +_boot +_bsd_hostname +_bsd_set_dependency_capable +_buf_setfilter +_cdevsw +_clalloc +_clfree +_cons_cinput +_fd_rdwr +_get_aiotask +_hz +_in6addr_local +_inaddr_local +_inet_domain_mutex +_ip_mutex +_ip_output +_ip_protox +_ipc_port_release_send +_kdp_set_interface +_kdp_unregister_send_receive +_kmem_alloc_kobject +_linesw +_log +_logwakeup +_m_cat +_m_free:_mbuf_free +_m_freem:_mbuf_freem +_m_get +_m_gethdr +_m_mtod +_m_prepend_2 +_m_pullup +_m_split +_m_trailingspace:_mbuf_trailingspace +_mac_proc_set_enforce +_mcl_to_paddr +_mountroot_post_hook +_net_add_domain +_net_add_proto +_net_del_domain +_net_del_proto +_netboot_root +_perf_monitor_register +_perf_monitor_unregister +_pffinddomain +_pffindproto +_pmc_accessible_from_core +_pmc_config_set_interrupt_threshold +_pmc_config_set_value +_pmc_create_config +_pmc_find_by_name +_pmc_free_config +_pmc_free_pmc_list +_pmc_get_accessible_core_list +_pmc_get_name +_pmc_get_pmc_list +_pmc_register +_pmc_reserve +_pmc_reserve_task +_pmc_reserve_thread +_pmc_reservation_free +_pmc_reservation_read +_pmc_reservation_start +_pmc_reservation_stop +_pmc_reservation_write +_pmc_unregister +_post_sys_powersource +_port_name_to_task +_port_name_to_thread +_pru_abort_notsupp +_pru_accept_notsupp +_pru_bind_notsupp +_pru_connect2_notsupp +_pru_connect_notsupp +_pru_disconnect_notsupp +_pru_listen_notsupp +_pru_peeraddr_notsupp +_pru_rcvd_notsupp +_pru_rcvoob_notsupp +_pru_send_notsupp +_pru_sense_null +_pru_shutdown_notsupp +_pru_sockaddr_notsupp +_pru_sopoll_notsupp +_q_to_b +_register_decmpfs_decompressor +_rootdev +_rootvp +_sbappendaddr +_sbappendrecord +_sbflush +_sbspace +_soabort +_socantrcvmore +_socantsendmore +_sock_getlistener +_sock_release +_sock_retain +_sodisconnect +_sofree +_sofreelastref +_soisconnected +_soisconnecting +_soisdisconnected +_soisdisconnecting +_sonewconn +_sopoll +_sooptcopyin +_sooptcopyout +_soreceive +_soreserve +_sorwakeup +_sosend +_termioschars +_thread_tid +_thread_dispatchqaddr +_throttle_info_create +_throttle_info_mount_ref +_throttle_info_mount_rel +_throttle_info_release +_throttle_info_update +_timeout +_tk_nin +_tk_rawcc +_tsleep +_ttioctl +_ttsetwater +_ttspeedtab +_ttwakeup +_ttwwakeup +_ttyclose +_ttyflush +_ttyfree +_ttyinput +_ttymalloc +_ttymodem +_ttysleep +_ttyselect +_tty_lock +_tty_unlock +_unmountroot_pre_hook +_unputc +_unregister_decmpfs_decompressor +_untimeout +_vnode_ismonitored +_vnode_notify +_vnop_monitor_desc +_vfs_context_bind +_vfs_context_get_special_port +_vfs_context_set_special_port +_vfs_get_notify_attributes +_vfs_mntlabel +_vfs_vnodecovered +_vm_map_copy_copy +_vm_map_copy_discard +_vm_map_copyin +_vm_map_copyin_common +_vm_map_copyout +_vn_getpath_fsenter +_vn_searchfs_inappropriate_name +_sock_settclassopt +_sock_gettclassopt diff --git a/config/Private.i386.exports b/config/Private.i386.exports new file mode 100644 index 000000000..63d85a3b0 --- /dev/null +++ b/config/Private.i386.exports @@ -0,0 +1,14 @@ +_acpi_install_wake_handler +_acpi_sleep_kernel +_add_fsevent +_apic_table +_cpu_to_lapic +_cpuid_features +_cpuid_info +_gOSKextUnresolved +_lapic_end_of_interrupt +_mp_broadcast +_mp_cpus_call +_need_fsevent +_smp_initialized +_lapic_unmask_perfcnt_interrupt diff --git a/config/Private.ppc.exports b/config/Private.ppc.exports new file mode 100644 index 000000000..0f0b58c19 --- /dev/null +++ b/config/Private.ppc.exports @@ -0,0 +1,2 @@ +_add_fsevent +_need_fsevent diff --git a/config/Private.x86_64.exports b/config/Private.x86_64.exports new file mode 100644 index 000000000..9748fcbe7 --- /dev/null +++ b/config/Private.x86_64.exports @@ -0,0 +1,15 @@ +_acpi_install_wake_handler +_acpi_sleep_kernel +_add_fsevent +_apic_table +_cpu_to_lapic +_cpuid_features +_cpuid_info +_lapic_end_of_interrupt +_lapic_unmask_perfcnt_interrupt +_mp_broadcast +_mp_cpus_call +_need_fsevent +_semaphore_timedwait +_smp_initialized +_kext_get_vm_map diff --git a/config/README.DEBUG-kernel.txt b/config/README.DEBUG-kernel.txt new file mode 100644 index 000000000..b2fc71942 --- /dev/null +++ b/config/README.DEBUG-kernel.txt @@ -0,0 +1,39 @@ +This directory contains a universal DEBUG kernel, built for 32-bit and +64-bit Intel. It includes a dSYM bundle for remote kernel debugging +and live kernel debugging. + +INSTALLATION + +!!!WARNING!!! These steps will overwrite the default kernel and +System.kext. Backup all files before attempting these steps. + +To install the DEBUG kernel, do: +bash-3.2$ sudo -s +bash-3.2# cd / +bash-3.2# ditto /AppleInternal/Developer/Extras/Kernel\ Debugging/System.kext /System/Library/Extensions/System.kext +bash-3.2# cp -r /AppleInternal/Developer/Extras/Kernel\ Debugging/mach_kernel* / +bash-3.2# chown -R root:wheel /System/Library/Extensions/System.kext /mach_kernel* +bash-3.2# chmod -R g-w /System/Library/Extensions/System.kext /mach_kernel* +bash-3.2# touch /System/Library/Extensions +bash-3.2# shutdown -r now + +REMOTE KERNEL DEBUGGING + +See the documentation that accompanies the Kernel Debug Kit + +LIVE KERNEL DEBUGGING + +With the DEBUG kernel installed, set "kmem=1" in your "boot-args" +NVRAM variable, reboot, and do: + +bash-3.2$ sudo gdb -a --quiet /mach_kernel +(gdb) target darwin-kernel +(gdb) source /AppleInternal/Developer/Extras/Kernel\ Debugging/kgmacros +Loading Kernel GDB Macros package. Type "help kgm" for more info. +(gdb) attach +Connected. + + should reflect the currently booted kernel architecture, either +"i386" or "x86_64" + + diff --git a/config/System.kext/Info.plist b/config/System.kext/Info.plist index ac4e5bae0..1fa642814 100644 --- a/config/System.kext/Info.plist +++ b/config/System.kext/Info.plist @@ -1,11 +1,11 @@ - + CFBundleDevelopmentRegion English CFBundleGetInfoString - System Resource Pseudoextension, Apple Computer Inc, ###KERNEL_VERSION_LONG### + System Resource Pseudoextension, Apple Inc, ###KERNEL_VERSION_LONG### CFBundleIdentifier com.apple.kernel CFBundleInfoDictionaryVersion @@ -26,5 +26,7 @@ Root OSKernelResource + OSBundleAllowUserLoad + diff --git a/config/System.kext/PlugIns/AppleNMI.kext/Info.plist b/config/System.kext/PlugIns/AppleNMI.kext/Info.plist index 742ae9d1b..1963c2d13 100644 --- a/config/System.kext/PlugIns/AppleNMI.kext/Info.plist +++ b/config/System.kext/PlugIns/AppleNMI.kext/Info.plist @@ -1,11 +1,11 @@ - + CFBundleDevelopmentRegion English CFBundleGetInfoString - AppleNMI Pseudoextension, Apple Computer Inc, ###KERNEL_VERSION_LONG### + AppleNMI Pseudoextension, Apple Inc, ###KERNEL_VERSION_LONG### CFBundleIdentifier com.apple.driver.AppleNMI CFBundleInfoDictionaryVersion @@ -24,5 +24,7 @@ Root OSKernelResource + OSBundleAllowUserLoad + diff --git a/config/System.kext/PlugIns/ApplePlatformFamily.kext/Info.plist b/config/System.kext/PlugIns/ApplePlatformFamily.kext/Info.plist index ffd2dae41..e7b4da8e3 100644 --- a/config/System.kext/PlugIns/ApplePlatformFamily.kext/Info.plist +++ b/config/System.kext/PlugIns/ApplePlatformFamily.kext/Info.plist @@ -1,5 +1,5 @@ - + CFBundleDevelopmentRegion @@ -26,5 +26,7 @@ Root OSKernelResource + OSBundleAllowUserLoad + diff --git a/config/System.kext/PlugIns/BSDKernel.kext/Info.plist b/config/System.kext/PlugIns/BSDKernel.kext/Info.plist index fcd967afa..77c930fe3 100644 --- a/config/System.kext/PlugIns/BSDKernel.kext/Info.plist +++ b/config/System.kext/PlugIns/BSDKernel.kext/Info.plist @@ -1,5 +1,5 @@ - + CFBundleDevelopmentRegion @@ -28,5 +28,7 @@ Root OSKernelResource + OSBundleAllowUserLoad + diff --git a/config/System.kext/PlugIns/BSDKernel6.0.kext/Info.plist b/config/System.kext/PlugIns/BSDKernel6.0.kext/Info.plist index 55d34bbf3..95393ca8d 100644 --- a/config/System.kext/PlugIns/BSDKernel6.0.kext/Info.plist +++ b/config/System.kext/PlugIns/BSDKernel6.0.kext/Info.plist @@ -1,9 +1,11 @@ - + CFBundleDevelopmentRegion English + CFBundleExecutable + BSDKernel6.0 CFBundleGetInfoString BSD Kernel Pseudoextension, Apple Computer Inc, 7.9.9 CFBundleIdentifier @@ -24,9 +26,9 @@ 1.1 OSBundleRequired Root - OSBundleSharedExecutableIdentifier - com.apple.kernel.6.0 OSKernelResource + OSBundleAllowUserLoad + diff --git a/config/System.kext/PlugIns/IOKit.kext/Info.plist b/config/System.kext/PlugIns/IOKit.kext/Info.plist index 5ca172b1c..2d794045f 100644 --- a/config/System.kext/PlugIns/IOKit.kext/Info.plist +++ b/config/System.kext/PlugIns/IOKit.kext/Info.plist @@ -1,5 +1,5 @@ - + CFBundleDevelopmentRegion @@ -28,5 +28,7 @@ Root OSKernelResource + OSBundleAllowUserLoad + diff --git a/config/System.kext/PlugIns/IOKit6.0.kext/Info.plist b/config/System.kext/PlugIns/IOKit6.0.kext/Info.plist index 8dcf9743c..34a0cd3d7 100644 --- a/config/System.kext/PlugIns/IOKit6.0.kext/Info.plist +++ b/config/System.kext/PlugIns/IOKit6.0.kext/Info.plist @@ -1,9 +1,11 @@ - + CFBundleDevelopmentRegion English + CFBundleExecutable + IOKit6.0 CFBundleGetInfoString I/O Kit Pseudoextension, Apple Computer Inc, 7.9.9 CFBundleIdentifier @@ -24,9 +26,9 @@ 1.0.0b1 OSBundleRequired Root - OSBundleSharedExecutableIdentifier - com.apple.kernel.6.0 OSKernelResource + OSBundleAllowUserLoad + diff --git a/config/System.kext/PlugIns/IONVRAMFamily.kext/Info.plist b/config/System.kext/PlugIns/IONVRAMFamily.kext/Info.plist index 998d83151..d6a40137a 100644 --- a/config/System.kext/PlugIns/IONVRAMFamily.kext/Info.plist +++ b/config/System.kext/PlugIns/IONVRAMFamily.kext/Info.plist @@ -1,5 +1,5 @@ - + CFBundleDevelopmentRegion @@ -26,5 +26,7 @@ Root OSKernelResource + OSBundleAllowUserLoad + diff --git a/config/System.kext/PlugIns/IOSystemManagement.kext/Info.plist b/config/System.kext/PlugIns/IOSystemManagement.kext/Info.plist index 5f9024432..9e5b80327 100644 --- a/config/System.kext/PlugIns/IOSystemManagement.kext/Info.plist +++ b/config/System.kext/PlugIns/IOSystemManagement.kext/Info.plist @@ -1,5 +1,5 @@ - + CFBundleDevelopmentRegion @@ -26,5 +26,7 @@ Root OSKernelResource + OSBundleAllowUserLoad + diff --git a/config/System.kext/PlugIns/Libkern.kext/Info.plist b/config/System.kext/PlugIns/Libkern.kext/Info.plist index a04f4c87f..a3c99a627 100644 --- a/config/System.kext/PlugIns/Libkern.kext/Info.plist +++ b/config/System.kext/PlugIns/Libkern.kext/Info.plist @@ -1,5 +1,5 @@ - + CFBundleDevelopmentRegion @@ -28,5 +28,7 @@ Root OSKernelResource + OSBundleAllowUserLoad + diff --git a/config/System.kext/PlugIns/Libkern6.0.kext/Info.plist b/config/System.kext/PlugIns/Libkern6.0.kext/Info.plist index c9dfaa185..8014a6c03 100644 --- a/config/System.kext/PlugIns/Libkern6.0.kext/Info.plist +++ b/config/System.kext/PlugIns/Libkern6.0.kext/Info.plist @@ -1,9 +1,11 @@ - + CFBundleDevelopmentRegion English + CFBundleExecutable + Libkern6.0 CFBundleGetInfoString Libkern Pseudoextension, Apple Computer Inc, 7.9.9 CFBundleIdentifier @@ -24,9 +26,9 @@ 1.0.0b1 OSBundleRequired Root - OSBundleSharedExecutableIdentifier - com.apple.kernel.6.0 OSKernelResource + OSBundleAllowUserLoad + diff --git a/config/System.kext/PlugIns/MACFramework.kext/Info.plist b/config/System.kext/PlugIns/MACFramework.kext/Info.plist index 93ab38088..c0651c2c6 100644 --- a/config/System.kext/PlugIns/MACFramework.kext/Info.plist +++ b/config/System.kext/PlugIns/MACFramework.kext/Info.plist @@ -1,5 +1,5 @@ - + CFBundleDevelopmentRegion @@ -28,5 +28,7 @@ Root OSKernelResource + OSBundleAllowUserLoad + diff --git a/config/System.kext/PlugIns/Mach.kext/Info.plist b/config/System.kext/PlugIns/Mach.kext/Info.plist index 9f4a6e288..cb6ac76d7 100644 --- a/config/System.kext/PlugIns/Mach.kext/Info.plist +++ b/config/System.kext/PlugIns/Mach.kext/Info.plist @@ -1,5 +1,5 @@ - + CFBundleDevelopmentRegion @@ -28,5 +28,7 @@ Root OSKernelResource + OSBundleAllowUserLoad + diff --git a/config/System.kext/PlugIns/Mach6.0.kext/Info.plist b/config/System.kext/PlugIns/Mach6.0.kext/Info.plist index 69244d218..c46200211 100644 --- a/config/System.kext/PlugIns/Mach6.0.kext/Info.plist +++ b/config/System.kext/PlugIns/Mach6.0.kext/Info.plist @@ -1,9 +1,11 @@ - + CFBundleDevelopmentRegion English + CFBundleExecutable + Mach6.0 CFBundleGetInfoString Mach Kernel Pseudoextension, Apple Computer Inc, 7.9.9 CFBundleIdentifier @@ -24,9 +26,9 @@ 1.0.0b1 OSBundleRequired Root - OSBundleSharedExecutableIdentifier - com.apple.kernel.6.0 OSKernelResource + OSBundleAllowUserLoad + diff --git a/config/System.kext/PlugIns/Private.kext/Info.plist b/config/System.kext/PlugIns/Private.kext/Info.plist new file mode 100644 index 000000000..05b09ae09 --- /dev/null +++ b/config/System.kext/PlugIns/Private.kext/Info.plist @@ -0,0 +1,34 @@ + + + + + CFBundleDevelopmentRegion + English + CFBundleExecutable + Private + CFBundleGetInfoString + Private Pseudoextension, Apple Computer Inc, ###KERNEL_VERSION_LONG### + CFBundleIdentifier + com.apple.kpi.private + CFBundleInfoDictionaryVersion + 6.0 + CFBundleName + Private Pseudoextension + CFBundlePackageType + KEXT + CFBundleShortVersionString + ###KERNEL_VERSION_SHORT### + CFBundleSignature + ???? + CFBundleVersion + ###KERNEL_VERSION_LONG### + OSBundleCompatibleVersion + 8.0.0b1 + OSBundleRequired + Root + OSKernelResource + + OSBundleAllowUserLoad + + + diff --git a/config/System.kext/PlugIns/System6.0.kext/Info.plist b/config/System.kext/PlugIns/System6.0.kext/Info.plist index 36f98b1e9..24f566b70 100644 --- a/config/System.kext/PlugIns/System6.0.kext/Info.plist +++ b/config/System.kext/PlugIns/System6.0.kext/Info.plist @@ -1,5 +1,5 @@ - + CFBundleDevelopmentRegion @@ -7,7 +7,7 @@ CFBundleExecutable kernel.6.0 CFBundleGetInfoString - System Resource Pseudoextension, Apple Computer Inc, 7.9.9 + System Resource Pseudoextension, Apple Inc, 7.9.9 CFBundleIdentifier com.apple.kernel.6.0 CFBundleInfoDictionaryVersion @@ -28,5 +28,7 @@ Root OSKernelResource + OSBundleAllowUserLoad + diff --git a/config/System.kext/PlugIns/Unsupported.kext/Info.plist b/config/System.kext/PlugIns/Unsupported.kext/Info.plist index 5f8979335..c89ef6754 100644 --- a/config/System.kext/PlugIns/Unsupported.kext/Info.plist +++ b/config/System.kext/PlugIns/Unsupported.kext/Info.plist @@ -1,5 +1,5 @@ - + CFBundleDevelopmentRegion @@ -28,5 +28,7 @@ Root OSKernelResource + OSBundleAllowUserLoad + diff --git a/config/System6.0.exports b/config/System6.0.exports index d4882c538..ab5e6038b 100644 --- a/config/System6.0.exports +++ b/config/System6.0.exports @@ -169,13 +169,11 @@ _PE_init_platform _PE_init_printf _PE_initialize_console _PE_kputc -_PE_parse_boot_arg _PE_poll_input _PE_putc _PE_register_timebase_callback _PE_state _StartIOKit -__Z10tellClientP8OSObjectPv __Z13OSUnserializePKcPP8OSString __Z16IOCPUSleepKernelv __Z16IODTFindSlotNameP15IORegistryEntrym @@ -187,17 +185,12 @@ __Z17IODeviceTreeAllocPv __Z17IOServiceOrderingPK15OSMetaClassBaseS1_Pv __Z18IODTCompareNubNamePK15IORegistryEntryP8OSStringPS3_ __Z19printDictionaryKeysP12OSDictionaryPc -__Z19tellAppWithResponseP8OSObjectPv __Z20IODTMakeNVDescriptorP15IORegistryEntryP17IONVRAMDescriptor __Z20IODTMatchNubWithKeysP15IORegistryEntryPKc __Z21IODTResolveAddressingP15IORegistryEntryPKcP14IODeviceMemory __Z22IODTResolveAddressCellP15IORegistryEntryPmS1_S1_ -__Z22tellClientWithResponseP8OSObjectPv __Z23IODTFindMatchingEntriesP15IORegistryEntrymPKc -__Z24broadcast_aggressivenessP8OSObjectPvS1_S1_S1_ -__Z26serializedAllowPowerChangeP8OSObjectPvS1_S1_S1_ __Z27IODTInterruptControllerNameP15IORegistryEntry -__Z27serializedCancelPowerChangeP8OSObjectPvS1_S1_S1_ __ZN10IOMachPort10gMetaClassE __ZN10IOMachPort10superClassE __ZN10IOMachPort11dictForTypeEj @@ -235,7 +228,6 @@ __ZN10IOWorkLoop14addEventSourceEP13IOEventSource __ZN10IOWorkLoop15runEventSourcesEv __ZN10IOWorkLoop17removeEventSourceEP13IOEventSource __ZN10IOWorkLoop19signalWorkAvailableEv -__ZN10IOWorkLoop20_RESERVEDIOWorkLoop2Ev __ZN10IOWorkLoop20_RESERVEDIOWorkLoop3Ev __ZN10IOWorkLoop20_RESERVEDIOWorkLoop4Ev __ZN10IOWorkLoop20_RESERVEDIOWorkLoop5Ev @@ -250,6 +242,7 @@ __ZN10IOWorkLoop9MetaClassC2Ev __ZN10IOWorkLoop9closeGateEv __ZN10IOWorkLoop9metaClassE __ZN10IOWorkLoop9runActionEPFiP8OSObjectPvS2_S2_S2_ES1_S2_S2_S2_S2_ +__ZN10IOWorkLoop9sleepGateEPv12UnsignedWidem __ZN10IOWorkLoop9sleepGateEPvm __ZN10IOWorkLoopC1EPK11OSMetaClass __ZN10IOWorkLoopC1Ev @@ -281,9 +274,6 @@ __ZN11IOCatalogue13startMatchingEP12OSDictionary __ZN11IOCatalogue15moduleHasLoadedEP8OSString __ZN11IOCatalogue15moduleHasLoadedEPKc __ZN11IOCatalogue16terminateDriversEP12OSDictionary -__ZN11IOCatalogue18removeKernelLinkerEv -__ZN11IOCatalogue23recordStartupExtensionsEv -__ZN11IOCatalogue24addExtensionsFromArchiveEP6OSData __ZN11IOCatalogue25terminateDriversForModuleEP8OSStringb __ZN11IOCatalogue25terminateDriversForModuleEPKcb __ZN11IOCatalogue4freeEv @@ -319,13 +309,32 @@ __ZN11IODataQueueC2Ev __ZN11IODataQueueD0Ev __ZN11IODataQueueD2Ev __ZN11IOMemoryMap10gMetaClassE +__ZN11IOMemoryMap10getAddressEv __ZN11IOMemoryMap10superClassE +__ZN11IOMemoryMap13getMapOptionsEv +__ZN11IOMemoryMap14getAddressTaskEv +__ZN11IOMemoryMap17getVirtualAddressEv __ZN11IOMemoryMap18getPhysicalAddressEv +__ZN11IOMemoryMap18getPhysicalSegmentEmPm +__ZN11IOMemoryMap19getMemoryDescriptorEv +__ZN11IOMemoryMap21_RESERVEDIOMemoryMap0Ev +__ZN11IOMemoryMap21_RESERVEDIOMemoryMap1Ev +__ZN11IOMemoryMap21_RESERVEDIOMemoryMap2Ev +__ZN11IOMemoryMap21_RESERVEDIOMemoryMap3Ev +__ZN11IOMemoryMap21_RESERVEDIOMemoryMap4Ev +__ZN11IOMemoryMap21_RESERVEDIOMemoryMap5Ev +__ZN11IOMemoryMap21_RESERVEDIOMemoryMap6Ev +__ZN11IOMemoryMap21_RESERVEDIOMemoryMap7Ev +__ZN11IOMemoryMap5unmapEv +__ZN11IOMemoryMap7getSizeEv __ZN11IOMemoryMap9MetaClassC1Ev __ZN11IOMemoryMap9MetaClassC2Ev +__ZN11IOMemoryMap9getLengthEv __ZN11IOMemoryMap9metaClassE __ZN11IOMemoryMapC1EPK11OSMetaClass +__ZN11IOMemoryMapC1Ev __ZN11IOMemoryMapC2EPK11OSMetaClass +__ZN11IOMemoryMapC2Ev __ZN11IOMemoryMapD0Ev __ZN11IOMemoryMapD2Ev __ZN11IOResources10gMetaClassE @@ -452,7 +461,6 @@ __ZN12IOUserClient18clientHasPrivilegeEPvPKc __ZN12IOUserClient19clientMemoryForTypeEmPmPP18IOMemoryDescriptor __ZN12IOUserClient20exportObjectToClientEP4taskP8OSObjectPS3_ __ZN12IOUserClient21destroyUserReferencesEP8OSObject -__ZN12IOUserClient22_RESERVEDIOUserClient1Ev __ZN12IOUserClient22_RESERVEDIOUserClient2Ev __ZN12IOUserClient22_RESERVEDIOUserClient3Ev __ZN12IOUserClient22_RESERVEDIOUserClient4Ev @@ -471,6 +479,7 @@ __ZN12IOUserClient23getExternalTrapForIndexEm __ZN12IOUserClient24getNotificationSemaphoreEmPP9semaphore __ZN12IOUserClient24getTargetAndTrapForIndexEPP9IOServicem __ZN12IOUserClient24registerNotificationPortEP8ipc_portmm +__ZN12IOUserClient24registerNotificationPortEP8ipc_portmy __ZN12IOUserClient25getExternalMethodForIndexEm __ZN12IOUserClient26getTargetAndMethodForIndexEPP9IOServicem __ZN12IOUserClient26removeMappingForDescriptorEP18IOMemoryDescriptor @@ -612,11 +621,11 @@ __ZN13IOCommandGate10superClassE __ZN13IOCommandGate11commandGateEP8OSObjectPFiS1_PvS2_S2_S2_E __ZN13IOCommandGate11setWorkLoopEP10IOWorkLoop __ZN13IOCommandGate12checkForWorkEv +__ZN13IOCommandGate12commandSleepEPv12UnsignedWidem __ZN13IOCommandGate12commandSleepEPvm __ZN13IOCommandGate13attemptActionEPFiP8OSObjectPvS2_S2_S2_ES2_S2_S2_S2_ __ZN13IOCommandGate13commandWakeupEPvb __ZN13IOCommandGate14attemptCommandEPvS0_S0_S0_ -__ZN13IOCommandGate23_RESERVEDIOCommandGate0Ev __ZN13IOCommandGate23_RESERVEDIOCommandGate1Ev __ZN13IOCommandGate23_RESERVEDIOCommandGate2Ev __ZN13IOCommandGate23_RESERVEDIOCommandGate3Ev @@ -690,6 +699,7 @@ __ZN13IOEventSource9MetaClassC2Ev __ZN13IOEventSource9closeGateEv __ZN13IOEventSource9metaClassE __ZN13IOEventSource9setActionEPFvP8OSObjectzE +__ZN13IOEventSource9sleepGateEPv12UnsignedWidem __ZN13IOEventSource9sleepGateEPvm __ZN13IOEventSourceC1EPK11OSMetaClass __ZN13IOEventSourceC2EPK11OSMetaClass @@ -744,11 +754,8 @@ __ZN14IOMemoryCursorD0Ev __ZN14IOMemoryCursorD2Ev __ZN14IOPMrootDomain10gMetaClassE __ZN14IOPMrootDomain10superClassE -__ZN14IOPMrootDomain10youAreRootEv __ZN14IOPMrootDomain11sleepSystemEv -__ZN14IOPMrootDomain12broadcast_itEmm __ZN14IOPMrootDomain12tellChangeUpEm -__ZN14IOPMrootDomain12unIdleDeviceEP9IOServicem __ZN14IOPMrootDomain12wakeFromDozeEv __ZN14IOPMrootDomain13askChangeDownEm __ZN14IOPMrootDomain13restartSystemEv @@ -757,9 +764,6 @@ __ZN14IOPMrootDomain14publishFeatureEPKc __ZN14IOPMrootDomain14shutdownSystemEv __ZN14IOPMrootDomain14tellChangeDownEm __ZN14IOPMrootDomain15powerChangeDoneEm -__ZN14IOPMrootDomain15reportUserInputEv -__ZN14IOPMrootDomain16adjustPowerStateEv -__ZN14IOPMrootDomain16command_receivedEPvS0_S0_S0_ __ZN14IOPMrootDomain16tellNoChangeDownEm __ZN14IOPMrootDomain17getSleepSupportedEv __ZN14IOPMrootDomain17setAggressivenessEmm @@ -768,14 +772,7 @@ __ZN14IOPMrootDomain18changePowerStateToEm __ZN14IOPMrootDomain19sysPowerDownHandlerEPvS0_mP9IOServiceS0_j __ZN14IOPMrootDomain22changePowerStateToPrivEm __ZN14IOPMrootDomain23requestPowerDomainStateEmP17IOPowerConnectionm -__ZN14IOPMrootDomain23setQuickSpinDownTimeoutEv -__ZN14IOPMrootDomain24displayWranglerPublishedEPvS0_P9IOService __ZN14IOPMrootDomain24receivePowerNotificationEm -__ZN14IOPMrootDomain25announcePowerSourceChangeEv -__ZN14IOPMrootDomain26handleSleepTimerExpirationEv -__ZN14IOPMrootDomain26restoreUserSpinDownTimeoutEv -__ZN14IOPMrootDomain27displayWranglerNotificationEPvS0_mP9IOServiceS0_j -__ZN14IOPMrootDomain39stopIgnoringClamshellEventsDuringWakeupEv __ZN14IOPMrootDomain5startEP9IOService __ZN14IOPMrootDomain9MetaClassC1Ev __ZN14IOPMrootDomain9MetaClassC2Ev @@ -932,20 +929,6 @@ __ZN15OSMetaClassBaseD0Ev __ZN15OSMetaClassBaseD2Ev __ZN15OSMetaClassMetaC1Ev __ZN15OSMetaClassMetaC2Ev -__ZN15_IOConfigThread10gMetaClassE -__ZN15_IOConfigThread10superClassE -__ZN15_IOConfigThread12configThreadEv -__ZN15_IOConfigThread4freeEv -__ZN15_IOConfigThread4mainEPS_ -__ZN15_IOConfigThread9MetaClassC1Ev -__ZN15_IOConfigThread9MetaClassC2Ev -__ZN15_IOConfigThread9metaClassE -__ZN15_IOConfigThreadC1EPK11OSMetaClass -__ZN15_IOConfigThreadC1Ev -__ZN15_IOConfigThreadC2EPK11OSMetaClass -__ZN15_IOConfigThreadC2Ev -__ZN15_IOConfigThreadD0Ev -__ZN15_IOConfigThreadD2Ev __ZN16IOKitDiagnostics10gMetaClassE __ZN16IOKitDiagnostics10superClassE __ZN16IOKitDiagnostics11diagnosticsEv @@ -1185,15 +1168,21 @@ __ZN18IOMemoryDescriptor11withOptionsEPvmmP4taskmP8IOMapper __ZN18IOMemoryDescriptor12setPurgeableEmPm __ZN18IOMemoryDescriptor12withSubRangeEPS_mm11IODirection __ZN18IOMemoryDescriptor13removeMappingEP11IOMemoryMap +__ZN18IOMemoryDescriptor14initWithRangesEP14IOVirtualRangem11IODirectionP4taskb +__ZN18IOMemoryDescriptor15initWithAddressEPvm11IODirection +__ZN18IOMemoryDescriptor15initWithAddressEjm11IODirectionP4task __ZN18IOMemoryDescriptor15initWithOptionsEPvmmP4taskmP8IOMapper __ZN18IOMemoryDescriptor16getSourceSegmentEmPm __ZN18IOMemoryDescriptor16performOperationEmmm +__ZN18IOMemoryDescriptor17getVirtualSegmentEmPm __ZN18IOMemoryDescriptor18getPhysicalAddressEv +__ZN18IOMemoryDescriptor18getPhysicalSegmentEmPm +__ZN18IOMemoryDescriptor18getPhysicalSegmentEmPmm __ZN18IOMemoryDescriptor18withPhysicalRangesEP15IOPhysicalRangem11IODirectionb __ZN18IOMemoryDescriptor19withPhysicalAddressEmm11IODirection __ZN18IOMemoryDescriptor20getPhysicalSegment64EmPm -__ZN18IOMemoryDescriptor28_RESERVEDIOMemoryDescriptor6Ev -__ZN18IOMemoryDescriptor28_RESERVEDIOMemoryDescriptor7Ev +__ZN18IOMemoryDescriptor22initWithPhysicalRangesEP15IOPhysicalRangem11IODirectionb +__ZN18IOMemoryDescriptor23initWithPhysicalAddressEmm11IODirection __ZN18IOMemoryDescriptor28_RESERVEDIOMemoryDescriptor8Ev __ZN18IOMemoryDescriptor28_RESERVEDIOMemoryDescriptor9Ev __ZN18IOMemoryDescriptor29_RESERVEDIOMemoryDescriptor10Ev @@ -1332,21 +1321,6 @@ __ZN19IOPMPowerSourceListC2EPK11OSMetaClass __ZN19IOPMPowerSourceListC2Ev __ZN19IOPMPowerSourceListD0Ev __ZN19IOPMPowerSourceListD2Ev -__ZN19IOPMPowerStateQueue10gMetaClassE -__ZN19IOPMPowerStateQueue10superClassE -__ZN19IOPMPowerStateQueue12checkForWorkEv -__ZN19IOPMPowerStateQueue14unIdleOccurredEP9IOServicem -__ZN19IOPMPowerStateQueue17PMPowerStateQueueEP8OSObject -__ZN19IOPMPowerStateQueue4initEP8OSObjectPFvS1_zE -__ZN19IOPMPowerStateQueue9MetaClassC1Ev -__ZN19IOPMPowerStateQueue9MetaClassC2Ev -__ZN19IOPMPowerStateQueue9metaClassE -__ZN19IOPMPowerStateQueueC1EPK11OSMetaClass -__ZN19IOPMPowerStateQueueC1Ev -__ZN19IOPMPowerStateQueueC2EPK11OSMetaClass -__ZN19IOPMPowerStateQueueC2Ev -__ZN19IOPMPowerStateQueueD0Ev -__ZN19IOPMPowerStateQueueD2Ev __ZN20IOLittleMemoryCursor10gMetaClassE __ZN20IOLittleMemoryCursor10superClassE __ZN20IOLittleMemoryCursor13outputSegmentEN14IOMemoryCursor15PhysicalSegmentEPvm @@ -1438,29 +1412,17 @@ __ZN21IONaturalMemoryCursorD0Ev __ZN21IONaturalMemoryCursorD2Ev __ZN21IOSubMemoryDescriptor10gMetaClassE __ZN21IOSubMemoryDescriptor10superClassE -__ZN21IOSubMemoryDescriptor10writeBytesEmPKvm __ZN21IOSubMemoryDescriptor11makeMappingEP18IOMemoryDescriptorP4taskjmmm __ZN21IOSubMemoryDescriptor12initSubRangeEP18IOMemoryDescriptormm11IODirection __ZN21IOSubMemoryDescriptor12setPurgeableEmPm -__ZN21IOSubMemoryDescriptor14initWithRangesEP14IOVirtualRangem11IODirectionP4taskb -__ZN21IOSubMemoryDescriptor15initWithAddressEPvm11IODirection -__ZN21IOSubMemoryDescriptor15initWithAddressEjm11IODirectionP4task -__ZN21IOSubMemoryDescriptor16getSourceSegmentEmPm -__ZN21IOSubMemoryDescriptor16performOperationEmmm -__ZN21IOSubMemoryDescriptor17getVirtualSegmentEmPm -__ZN21IOSubMemoryDescriptor18getPhysicalSegmentEmPm -__ZN21IOSubMemoryDescriptor20getPhysicalSegment64EmPm -__ZN21IOSubMemoryDescriptor22initWithPhysicalRangesEP15IOPhysicalRangem11IODirectionb -__ZN21IOSubMemoryDescriptor23initWithPhysicalAddressEmm11IODirection +__ZN21IOSubMemoryDescriptor18getPhysicalSegmentEmPmm __ZN21IOSubMemoryDescriptor4freeEv -__ZN21IOSubMemoryDescriptor5doMapEP7_vm_mapPjmmm __ZN21IOSubMemoryDescriptor7prepareE11IODirection __ZN21IOSubMemoryDescriptor8completeE11IODirection __ZN21IOSubMemoryDescriptor8redirectEP4taskb __ZN21IOSubMemoryDescriptor9MetaClassC1Ev __ZN21IOSubMemoryDescriptor9MetaClassC2Ev __ZN21IOSubMemoryDescriptor9metaClassE -__ZN21IOSubMemoryDescriptor9readBytesEmPvm __ZN21IOSubMemoryDescriptorC1EPK11OSMetaClass __ZN21IOSubMemoryDescriptorC1Ev __ZN21IOSubMemoryDescriptorC2EPK11OSMetaClass @@ -1531,24 +1493,15 @@ __ZN22_IOOpenServiceIteratorD0Ev __ZN22_IOOpenServiceIteratorD2Ev __ZN23IOMultiMemoryDescriptor10gMetaClassE __ZN23IOMultiMemoryDescriptor10superClassE -__ZN23IOMultiMemoryDescriptor10writeBytesEmPKvm -__ZN23IOMultiMemoryDescriptor14initWithRangesEP14IOVirtualRangem11IODirectionP4taskb -__ZN23IOMultiMemoryDescriptor15initWithAddressEPvm11IODirection -__ZN23IOMultiMemoryDescriptor15initWithAddressEjm11IODirectionP4task __ZN23IOMultiMemoryDescriptor15withDescriptorsEPP18IOMemoryDescriptorm11IODirectionb -__ZN23IOMultiMemoryDescriptor16getSourceSegmentEmPm -__ZN23IOMultiMemoryDescriptor17getVirtualSegmentEmPm -__ZN23IOMultiMemoryDescriptor18getPhysicalSegmentEmPm +__ZN23IOMultiMemoryDescriptor18getPhysicalSegmentEmPmm __ZN23IOMultiMemoryDescriptor19initWithDescriptorsEPP18IOMemoryDescriptorm11IODirectionb -__ZN23IOMultiMemoryDescriptor22initWithPhysicalRangesEP15IOPhysicalRangem11IODirectionb -__ZN23IOMultiMemoryDescriptor23initWithPhysicalAddressEmm11IODirection __ZN23IOMultiMemoryDescriptor4freeEv __ZN23IOMultiMemoryDescriptor7prepareE11IODirection __ZN23IOMultiMemoryDescriptor8completeE11IODirection __ZN23IOMultiMemoryDescriptor9MetaClassC1Ev __ZN23IOMultiMemoryDescriptor9MetaClassC2Ev __ZN23IOMultiMemoryDescriptor9metaClassE -__ZN23IOMultiMemoryDescriptor9readBytesEmPvm __ZN23IOMultiMemoryDescriptorC1EPK11OSMetaClass __ZN23IOMultiMemoryDescriptorC1Ev __ZN23IOMultiMemoryDescriptorC2EPK11OSMetaClass @@ -1564,15 +1517,10 @@ __ZN24IOBufferMemoryDescriptor12withCapacityEj11IODirectionb __ZN24IOBufferMemoryDescriptor13initWithBytesEPKvj11IODirectionb __ZN24IOBufferMemoryDescriptor14getBytesNoCopyEjj __ZN24IOBufferMemoryDescriptor14getBytesNoCopyEv -__ZN24IOBufferMemoryDescriptor14initWithRangesEP14IOVirtualRangem11IODirectionP4taskb -__ZN24IOBufferMemoryDescriptor15initWithAddressEPvm11IODirection -__ZN24IOBufferMemoryDescriptor15initWithAddressEjm11IODirectionP4task __ZN24IOBufferMemoryDescriptor15initWithOptionsEmjj __ZN24IOBufferMemoryDescriptor15initWithOptionsEmjjP4task -__ZN24IOBufferMemoryDescriptor17inTaskWithOptionsEP4taskmjj __ZN24IOBufferMemoryDescriptor17getVirtualSegmentEmPm -__ZN24IOBufferMemoryDescriptor22initWithPhysicalRangesEP15IOPhysicalRangem11IODirectionb -__ZN24IOBufferMemoryDescriptor23initWithPhysicalAddressEmm11IODirection +__ZN24IOBufferMemoryDescriptor17inTaskWithOptionsEP4taskmjj __ZN24IOBufferMemoryDescriptor34_RESERVEDIOBufferMemoryDescriptor2Ev __ZN24IOBufferMemoryDescriptor34_RESERVEDIOBufferMemoryDescriptor3Ev __ZN24IOBufferMemoryDescriptor34_RESERVEDIOBufferMemoryDescriptor4Ev @@ -1639,6 +1587,7 @@ __ZN25IOGeneralMemoryDescriptor15unmapFromKernelEv __ZN25IOGeneralMemoryDescriptor16getSourceSegmentEmPm __ZN25IOGeneralMemoryDescriptor17getVirtualSegmentEmPm __ZN25IOGeneralMemoryDescriptor18getPhysicalSegmentEmPm +__ZN25IOGeneralMemoryDescriptor18getPhysicalSegmentEmPmm __ZN25IOGeneralMemoryDescriptor20getPhysicalSegment64EmPm __ZN25IOGeneralMemoryDescriptor22initWithPhysicalRangesEP15IOPhysicalRangem11IODirectionb __ZN25IOGeneralMemoryDescriptor23initWithPhysicalAddressEmm11IODirection @@ -1721,6 +1670,7 @@ __ZN28IOFilterInterruptEventSourceC2EPK11OSMetaClass __ZN28IOFilterInterruptEventSourceC2Ev __ZN28IOFilterInterruptEventSourceD0Ev __ZN28IOFilterInterruptEventSourceD2Ev +__ZN29IOInterleavedMemoryDescriptor18getPhysicalSegmentEmPmm __ZN5IOCPU10gMetaClassE __ZN5IOCPU10superClassE __ZN5IOCPU11getCPUGroupEv @@ -1799,7 +1749,6 @@ __ZN6OSData12withCapacityEj __ZN6OSData13initWithBytesEPKvj __ZN6OSData14ensureCapacityEj __ZN6OSData15withBytesNoCopyEPvj -__ZN6OSData16_RESERVEDOSData0Ev __ZN6OSData16_RESERVEDOSData1Ev __ZN6OSData16_RESERVEDOSData2Ev __ZN6OSData16_RESERVEDOSData3Ev @@ -1808,6 +1757,7 @@ __ZN6OSData16_RESERVEDOSData5Ev __ZN6OSData16_RESERVEDOSData6Ev __ZN6OSData16_RESERVEDOSData7Ev __ZN6OSData16initWithCapacityEj +__ZN6OSData18setDeallocFunctionEPFvPvjE __ZN6OSData19initWithBytesNoCopyEPvj __ZN6OSData20setCapacityIncrementEj __ZN6OSData4freeEv @@ -1867,8 +1817,6 @@ __ZN8IOMapper10superClassE __ZN8IOMapper11NewARTTableEmPPvPj __ZN8IOMapper12FreeARTTableEP6OSDatam __ZN8IOMapper17setMapperRequiredEb -__ZN8IOMapper18_RESERVEDIOMapper1Ev -__ZN8IOMapper18_RESERVEDIOMapper2Ev __ZN8IOMapper18_RESERVEDIOMapper3Ev __ZN8IOMapper18_RESERVEDIOMapper4Ev __ZN8IOMapper18_RESERVEDIOMapper5Ev @@ -2092,7 +2040,6 @@ __ZN9IODTNVRAMC2EPK11OSMetaClass __ZN9IODTNVRAMC2Ev __ZN9IODTNVRAMD0Ev __ZN9IODTNVRAMD2Ev -__ZN9IOService10actionStopEPS_S0_ __ZN9IOService10adjustBusyEl __ZN9IOService10gMetaClassE __ZN9IOService10handleOpenEPS_mPv @@ -2102,7 +2049,6 @@ __ZN9IOService10makeUsableEv __ZN9IOService10superClassE __ZN9IOService10systemWakeEv __ZN9IOService10youAreRootEv -__ZN9IOService11_adjustBusyEl __ZN9IOService11addLocationEP12OSDictionary __ZN9IOService11getPlatformEv __ZN9IOService11handleCloseEPS_m @@ -2116,7 +2062,6 @@ __ZN9IOService12nameMatchingEPK8OSStringP12OSDictionary __ZN9IOService12nameMatchingEPKcP12OSDictionary __ZN9IOService12passiveMatchEP12OSDictionaryb __ZN9IOService12requestProbeEm -__ZN9IOService12scheduleStopEPS_ __ZN9IOService12tellChangeUpEm __ZN9IOService12waitForStateEmmP13mach_timespec __ZN9IOService13addPowerChildEPS_ @@ -2132,7 +2077,6 @@ __ZN9IOService13setPowerStateEmPS_ __ZN9IOService13startMatchingEm __ZN9IOService13waitMatchIdleEm __ZN9IOService13willTerminateEPS_m -__ZN9IOService14actionFinalizeEPS_m __ZN9IOService14activityTickleEmm __ZN9IOService14applyToClientsEPFvPS_PvES1_ __ZN9IOService14causeInterruptEi @@ -2156,21 +2100,16 @@ __ZN9IOService15lookupInterruptEibPP21IOInterruptController __ZN9IOService15nextIdleTimeoutE12UnsignedWideS0_j __ZN9IOService15powerChangeDoneEm __ZN9IOService15probeCandidatesEP12OSOrderedSet -__ZN9IOService16propertyMatchingEPK8OSSymbolPK8OSObjectP12OSDictionary __ZN9IOService15publishResourceEPK8OSSymbolP8OSObject __ZN9IOService15publishResourceEPKcP8OSObject __ZN9IOService15registerServiceEm __ZN9IOService15serviceMatchingEPK8OSStringP12OSDictionary __ZN9IOService15serviceMatchingEPKcP12OSDictionary __ZN9IOService15setDeviceMemoryEP7OSArray -__ZN9IOService15setNotificationEPK8OSSymbolP12OSDictionaryPFbPvS5_PS_ES5_S5_l __ZN9IOService15setPMRootDomainEP14IOPMrootDomain __ZN9IOService15tellChangeDown1Em __ZN9IOService15tellChangeDown2Em __ZN9IOService15terminateClientEPS_m -__ZN9IOService15terminatePhase1Em -__ZN9IOService15terminateThreadEPv -__ZN9IOService15terminateWorkerEm __ZN9IOService16ack_timer_tickedEv __ZN9IOService16allowPowerChangeEm __ZN9IOService16applyToProvidersEPFvPS_PvES1_ @@ -2178,13 +2117,13 @@ __ZN9IOService16command_receivedEPvS0_S0_S0_ __ZN9IOService16didYouWakeSystemEv __ZN9IOService16disableInterruptEi __ZN9IOService16getInterruptTypeEiPi +__ZN9IOService16propertyMatchingEPK8OSSymbolPK8OSObjectP12OSDictionary __ZN9IOService16registerInterestEPK8OSSymbolPFiPvS3_mPS_S3_jES3_S3_ __ZN9IOService16removePowerChildEP17IOPowerConnection __ZN9IOService16requestTerminateEPS_m __ZN9IOService16resolveInterruptEPS_i __ZN9IOService16resourceMatchingEPK8OSStringP12OSDictionary __ZN9IOService16resourceMatchingEPKcP12OSDictionary -__ZN9IOService16scheduleFinalizeEv __ZN9IOService16stringFromReturnEi __ZN9IOService16tellNoChangeDownEm __ZN9IOService17addNeededResourceEPKc @@ -2196,7 +2135,6 @@ __ZN9IOService17currentCapabilityEv __ZN9IOService17getAggressivenessEmPm __ZN9IOService17registerInterruptEiP8OSObjectPFvS1_PvPS_iES2_ __ZN9IOService17setAggressivenessEmm -__ZN9IOService18actionDidTerminateEPS_m __ZN9IOService18changePowerStateToEm __ZN9IOService18doServiceTerminateEm __ZN9IOService18getResourceServiceEv @@ -2206,16 +2144,13 @@ __ZN9IOService18matchPropertyTableEP12OSDictionaryPl __ZN9IOService18setIdleTimerPeriodEm __ZN9IOService18settleTimerExpiredEv __ZN9IOService18systemWillShutdownEm -__ZN9IOService19_RESERVEDIOService5Ev +__ZN9IOService19installNotificationEPK8OSSymbolP12OSDictionaryPFbPvS5_PS_ES5_S5_lPP10OSIterator __ZN9IOService19_RESERVEDIOService6Ev __ZN9IOService19_RESERVEDIOService7Ev __ZN9IOService19_RESERVEDIOService8Ev __ZN9IOService19_RESERVEDIOService9Ev -__ZN9IOService19actionWillTerminateEPS_mP7OSArray __ZN9IOService19deliverNotificationEPK8OSSymbolmm -__ZN9IOService19getExistingServicesEP12OSDictionarymm __ZN9IOService19getMatchingServicesEP12OSDictionary -__ZN9IOService19installNotificationEPK8OSSymbolP12OSDictionaryPFbPvS5_PS_ES5_S5_lPP10OSIterator __ZN9IOService19powerOverrideOnPrivEv __ZN9IOService19registerPowerDriverEPS_P14IOPMPowerStatem __ZN9IOService19start_PM_idle_timerEv @@ -2263,7 +2198,6 @@ __ZN9IOService20callPlatformFunctionEPKcbPvS2_S2_S2_ __ZN9IOService20getDeviceMemoryCountEv __ZN9IOService20powerOverrideOffPrivEv __ZN9IOService20unlockForArbitrationEv -__ZN9IOService21doInstallNotificationEPK8OSSymbolP12OSDictionaryPFbPvS5_PS_ES5_S5_lPP10OSIterator __ZN9IOService21getClientWithCategoryEPK8OSSymbol __ZN9IOService21powerStateDidChangeToEmmPS_ __ZN9IOService21temporaryPowerClampOnEv @@ -2271,14 +2205,13 @@ __ZN9IOService21unregisterAllInterestEv __ZN9IOService22PM_Clamp_Timer_ExpiredEv __ZN9IOService22acknowledgePowerChangeEPS_ __ZN9IOService22changePowerStateToPrivEm +__ZN9IOService22copyClientWithCategoryEPK8OSSymbol __ZN9IOService22powerDomainDidChangeToEmP17IOPowerConnection __ZN9IOService22powerStateWillChangeToEmmPS_ __ZN9IOService23acknowledgeNotificationEPvm __ZN9IOService23currentPowerConsumptionEv __ZN9IOService23powerDomainWillChangeToEmP17IOPowerConnection __ZN9IOService23requestPowerDomainStateEmP17IOPowerConnectionm -__ZN9IOService23scheduleTerminatePhase2Em -__ZN9IOService23syncNotificationHandlerEPvS0_PS_ __ZN9IOService23tellClientsWithResponseEi __ZN9IOService24PM_idle_timer_expirationEv __ZN9IOService24acknowledgeSetPowerStateEv @@ -2308,9 +2241,10 @@ __ZN9IOService8finalizeEm __ZN9IOService9MetaClassC1Ev __ZN9IOService9MetaClassC2Ev __ZN9IOService9metaClassE -__ZN9IOService9resourcesEv +__ZN9IOService9resourcesEv : __ZN9IOService18getResourceServiceEv __ZN9IOService9terminateEm __ZN9IOService9waitQuietEP13mach_timespec +__ZN9IOService9waitQuietEy __ZN9IOServiceC1EPK11OSMetaClass __ZN9IOServiceC1Ev __ZN9IOServiceC2EPK11OSMetaClass @@ -2515,8 +2449,6 @@ __ZNK15OSMetaClassBase8metaCastEPK8OSSymbol __ZNK15OSMetaClassBase8metaCastEPKc __ZNK15OSMetaClassBase9isEqualToEPKS_ __ZNK15OSMetaClassMeta5allocEv -__ZNK15_IOConfigThread12getMetaClassEv -__ZNK15_IOConfigThread9MetaClass5allocEv __ZNK16IOKitDiagnostics12getMetaClassEv __ZNK16IOKitDiagnostics9MetaClass5allocEv __ZNK16IOKitDiagnostics9serializeEP11OSSerialize @@ -2553,8 +2485,6 @@ __ZNK18_IOServiceNotifier12getMetaClassEv __ZNK18_IOServiceNotifier9MetaClass5allocEv __ZNK19IOPMPowerSourceList12getMetaClassEv __ZNK19IOPMPowerSourceList9MetaClass5allocEv -__ZNK19IOPMPowerStateQueue12getMetaClassEv -__ZNK19IOPMPowerStateQueue9MetaClass5allocEv __ZNK20IOLittleMemoryCursor12getMetaClassEv __ZNK20IOLittleMemoryCursor9MetaClass5allocEv __ZNK20OSCollectionIterator12getMetaClassEv @@ -2566,9 +2496,7 @@ __ZNK21IOInterruptController9MetaClass5allocEv __ZNK21IONaturalMemoryCursor12getMetaClassEv __ZNK21IONaturalMemoryCursor9MetaClass5allocEv __ZNK21IOSubMemoryDescriptor12getMetaClassEv -__ZNK21IOSubMemoryDescriptor19dmaCommandOperationEmPvj __ZNK21IOSubMemoryDescriptor9MetaClass5allocEv -__ZNK21IOSubMemoryDescriptor9serializeEP11OSSerialize __ZNK22IOInterruptEventSource11getIntIndexEv __ZNK22IOInterruptEventSource11getProviderEv __ZNK22IOInterruptEventSource12getMetaClassEv @@ -2753,7 +2681,6 @@ __ZTV15IORegistryPlane __ZTV15IOWatchDogTimer __ZTV15OSMetaClassBase __ZTV15OSMetaClassMeta -__ZTV15_IOConfigThread __ZTV16IOKitDiagnostics __ZTV16IOPMinformeeList __ZTV16IOPlatformDevice @@ -2769,7 +2696,6 @@ __ZTV18IOTimerEventSource __ZTV18IOUserNotification __ZTV18_IOServiceNotifier __ZTV19IOPMPowerSourceList -__ZTV19IOPMPowerStateQueue __ZTV20IOLittleMemoryCursor __ZTV20OSCollectionIterator __ZTV20RootDomainUserClient @@ -2830,7 +2756,6 @@ __ZTVN15IOPanicPlatform9MetaClassE __ZTVN15IORegistryEntry9MetaClassE __ZTVN15IORegistryPlane9MetaClassE __ZTVN15IOWatchDogTimer9MetaClassE -__ZTVN15_IOConfigThread9MetaClassE __ZTVN16IOKitDiagnostics9MetaClassE __ZTVN16IOPMinformeeList9MetaClassE __ZTVN16IOPlatformDevice9MetaClassE @@ -2845,7 +2770,6 @@ __ZTVN18IOTimerEventSource9MetaClassE __ZTVN18IOUserNotification9MetaClassE __ZTVN18_IOServiceNotifier9MetaClassE __ZTVN19IOPMPowerSourceList9MetaClassE -__ZTVN19IOPMPowerStateQueue9MetaClassE __ZTVN20IOLittleMemoryCursor9MetaClassE __ZTVN20OSCollectionIterator9MetaClassE __ZTVN20RootDomainUserClient9MetaClassE @@ -2890,7 +2814,6 @@ __printf __start _absolutetime_to_nanoseconds _acknowledgeSleepWakeNotification -_add_from_mkext_function _appleClut8 _argstrcpy _assert_wait @@ -2964,9 +2887,6 @@ _flush_dcache64 _gGearPict _gIOAppPowerStateInterest _gIOBusyInterest -_gIOCatalogCacheMisses -_gIOCatalogLock -_gIOCatalogModuleRequests _gIOCatalogue _gIOClassKey _gIOCommandPoolSizeKey @@ -2993,9 +2913,7 @@ _gIOFirstPublishNotification _gIOGeneralInterest _gIOInterruptControllersKey _gIOInterruptSpecifiersKey -_gIOKLDLock _gIOKernelConfigTables -_gIOKernelKmods _gIOKitDebug _gIOKitDebugKey _gIOLocationKey @@ -3010,7 +2928,6 @@ _gIONameMatchedKey _gIOParentMatchKey _gIOPathMatchKey _gIOPowerPlane -_gIOPrelinkedModules _gIOPriorityPowerStateInterest _gIOProbeScoreKey _gIOPropertyMatchKey @@ -3050,67 +2967,6 @@ _iokit_version_major:_version_major _iokit_version_minor:_version_minor _iokit_version_variant:_version_variant _ipc_port_release_send -_is_io_async_method_scalarI_scalarO -_is_io_async_method_scalarI_structureI -_is_io_async_method_scalarI_structureO -_is_io_async_method_structureI_structureO -_is_io_catalog_get_data -_is_io_catalog_get_gen_count -_is_io_catalog_module_loaded -_is_io_catalog_reset -_is_io_catalog_send_data -_is_io_catalog_terminate -_is_io_connect_add_client -_is_io_connect_get_notification_semaphore -_is_io_connect_get_service -_is_io_connect_map_memory -_is_io_connect_method_scalarI_scalarO -_is_io_connect_method_scalarI_structureI -_is_io_connect_method_scalarI_structureO -_is_io_connect_method_structureI_structureO -_is_io_connect_set_notification_port -_is_io_connect_set_properties -_is_io_connect_unmap_memory -_is_io_iterator_is_valid -_is_io_iterator_next -_is_io_iterator_reset -_is_io_make_matching -_is_io_object_conforms_to -_is_io_object_get_class -_is_io_object_get_retain_count -_is_io_registry_create_iterator -_is_io_registry_entry_create_iterator -_is_io_registry_entry_from_path -_is_io_registry_entry_get_child_iterator -_is_io_registry_entry_get_location_in_plane -_is_io_registry_entry_get_name -_is_io_registry_entry_get_name_in_plane -_is_io_registry_entry_get_parent_iterator -_is_io_registry_entry_get_path -_is_io_registry_entry_get_properties -_is_io_registry_entry_get_property -_is_io_registry_entry_get_property_bytes -_is_io_registry_entry_get_property_recursively -_is_io_registry_entry_in_plane -_is_io_registry_entry_set_properties -_is_io_registry_get_root_entry -_is_io_registry_iterator_enter_entry -_is_io_registry_iterator_exit_entry -_is_io_service_acknowledge_notification -_is_io_service_add_interest_notification -_is_io_service_add_notification -_is_io_service_add_notification_old -_is_io_service_add_notification_ool -_is_io_service_close -_is_io_service_get_busy_state -_is_io_service_get_matching_services -_is_io_service_get_matching_services_ool -_is_io_service_get_state -_is_io_service_match_property_table -_is_io_service_match_property_table_ool -_is_io_service_open -_is_io_service_request_probe -_is_io_service_wait_quiet _is_suser _is_suser1 _isargsep @@ -3125,7 +2981,6 @@ _kern_os_free _kern_os_malloc _kern_os_malloc_size _kern_os_realloc -_kernelLinkerPresent _kernel_debug _kernel_debug1 _kernel_map @@ -3135,14 +2990,6 @@ _kernel_thread _kfree _kmem_alloc _kmem_free -_kmod -_kmod_create_fake -_kmod_load_from_cache -_kmod_load_function -_kmod_load_request -_kmod_lock -_kmod_lookupbyname -_kmod_unload_cache _kprintf _libkern_builder:_osbuilder _libkern_osrelease:_osrelease @@ -3193,9 +3040,9 @@ _ml_thread_policy _mutex_alloc:_mutex_alloc_EXT _mutex_free:_mutex_free_EXT _mutex_init:_mutex_init_EXT -_mutex_lock:_mutex_lock_EXT -_mutex_try:_mutex_try_EXT -_mutex_unlock:_mutex_unlock_EXT +_mutex_lock:_lck_mtx_lock +_mutex_try:_lck_mtx_try_lock +_mutex_unlock:_lck_mtx_unlock _nanoseconds_to_absolutetime _nanotime _nanouptime @@ -3232,10 +3079,8 @@ _processor_info _processor_start _random _read_random -_record_startup_extensions_function _registerPrioritySleepWakeInterest _registerSleepWakeInterest -_remove_startup_extension_function _rootDomainRestart _rootDomainShutdown _semaphore_create diff --git a/config/System6.0.i386.exports b/config/System6.0.i386.exports index e876d829f..5cb3b501c 100644 --- a/config/System6.0.i386.exports +++ b/config/System6.0.i386.exports @@ -1,5 +1,11 @@ _PE_install_interrupt_handler _PE_interrupt_handler +_PE_parse_boot_arg +__ZN11IOMemoryMap19setMemoryDescriptorEP18IOMemoryDescriptory +__ZN11IOMemoryMap8redirectEP18IOMemoryDescriptormm +__ZN11IOMemoryMap8redirectEP18IOMemoryDescriptormy +__ZN24IOBufferMemoryDescriptor20initWithPhysicalMaskEP4taskmyyy +__ZN24IOBufferMemoryDescriptor22inTaskWithPhysicalMaskEP4taskmyy _acpi_install_wake_handler _acpi_sleep_kernel _cpu_number @@ -8,9 +14,7 @@ _cpuid_features _cpuid_info _hfs_addconverter _hfs_remconverter -_kdreboot _lapic_end_of_interrupt -_lapic_smm_restore _ml_get_max_cpus _mp_broadcast _mp_cpus_call @@ -20,8 +24,6 @@ _mtrr_range_remove _rtc_clock_stepped _rtc_clock_stepping _smp_initialized -__ZN24IOBufferMemoryDescriptor20initWithPhysicalMaskEP4taskmyyy -__ZN24IOBufferMemoryDescriptor22inTaskWithPhysicalMaskEP4taskmyy +_sprintf _strcat _strcpy -_sprintf diff --git a/config/System6.0.ppc.exports b/config/System6.0.ppc.exports index 000f473b0..6b9d3ed8c 100644 --- a/config/System6.0.ppc.exports +++ b/config/System6.0.ppc.exports @@ -1,19 +1,19 @@ _CallTVector +_OSDequeueAtomic +_OSEnqueueAtomic _PE_Determine_Clock_Speeds _PE_find_scc _PE_init_taproot +_PE_parse_boot_arg _PE_read_write_time_of_day _PE_write_IIC _PPCcalls _ResetHandler -_OSEnqueueAtomic -_OSDequeueAtomic __Z11IODBDMAStopPV23IODBDMAChannelRegisters __Z12IODBDMAFlushPV23IODBDMAChannelRegisters __Z12IODBDMAPausePV23IODBDMAChannelRegisters __Z12IODBDMAResetPV23IODBDMAChannelRegisters __Z12IODBDMAStartPV23IODBDMAChannelRegistersPV17IODBDMADescriptor -__Z14RootRegisteredP8OSObjectPvP9IOService __Z15IODBDMAContinuePV23IODBDMAChannelRegisters __Z32IOFreePhysicallyContiguousMemoryPjj __Z36IOAllocatePhysicallyContiguousMemoryjjPjPm @@ -52,6 +52,9 @@ __ZN10AppleNVRAMC2EPK11OSMetaClass __ZN10AppleNVRAMC2Ev __ZN10AppleNVRAMD0Ev __ZN10AppleNVRAMD2Ev +__ZN11IOMemoryMap19setMemoryDescriptorEP18IOMemoryDescriptory +__ZN11IOMemoryMap8redirectEP18IOMemoryDescriptormm +__ZN11IOMemoryMap8redirectEP18IOMemoryDescriptormy __ZN16AppleMacIODevice10gMetaClassE __ZN16AppleMacIODevice10superClassE __ZN16AppleMacIODevice12getResourcesEv @@ -237,17 +240,17 @@ _mapping_prealloc _mapping_relpre _ml_enable_cache_level _ml_enable_nap +_ml_mem_backoff _ml_ppc_sleep _ml_set_processor_speed _ml_set_processor_voltage _ml_throttle -_ml_mem_backoff _pe_do_clock_test _pe_run_clock_test _pmsRunLocal -_scc _rc4_crypt _rc4_init +_scc +_sprintf _strcat _strcpy -_sprintf diff --git a/config/System6.0.x86_64.exports b/config/System6.0.x86_64.exports new file mode 100644 index 000000000..60c3e3ad1 --- /dev/null +++ b/config/System6.0.x86_64.exports @@ -0,0 +1,3 @@ +_sprintf +_strcat +_strcpy diff --git a/config/Unsupported.exports b/config/Unsupported.exports index 8aab26874..81a51d5ea 100644 --- a/config/Unsupported.exports +++ b/config/Unsupported.exports @@ -7,7 +7,6 @@ _KUNCUserNotificationDisplayFromBundle _KUNCUserNotificationDisplayNotice _NDR_record _PE_kputc -_cons_ops __Z22OSFlushObjectTrackListv __ZN15IOWatchDogTimer10gMetaClassE __ZN15IOWatchDogTimer10superClassE @@ -26,6 +25,7 @@ __ZN15IOWatchDogTimerC2EPK11OSMetaClass __ZN15IOWatchDogTimerD0Ev __ZN15IOWatchDogTimerD2Ev __ZN16IOPlatformDevice10gMetaClassE +__ZN16IOPlatformDevice13matchLocationEP9IOService __ZN16IOPlatformDevice26_RESERVEDIOPlatformDevice0Ev __ZN16IOPlatformDevice26_RESERVEDIOPlatformDevice1Ev __ZN16IOPlatformDevice26_RESERVEDIOPlatformDevice2Ev @@ -39,18 +39,14 @@ __ZN9IODTNVRAM15initOFVariablesEv __ZN9IODTNVRAM15syncOFVariablesEv __ZN9IODTNVRAM16escapeDataToDataEP6OSData __ZN9IODTNVRAM16updateOWBootArgsEPK8OSSymbolP8OSObject -__ZN9IODTNVRAM17getOWVariableInfoEmPPK8OSSymbolPmS4_ __ZN9IODTNVRAM18generateOWChecksumEPh __ZN9IODTNVRAM18validateOWChecksumEPh -__ZN9IODTNVRAM19convertObjectToPropEPhPmPK8OSSymbolP8OSObject -__ZN9IODTNVRAM19convertPropToObjectEPhmS0_mPPK8OSSymbolPP8OSObject -__ZN9IODTNVRAM19searchNVRAMPropertyEP17IONVRAMDescriptorPm -__ZN9IODTNVRAM19unescapeBytesToDataEPKhm __ZN9IODTNVRAM22readNVRAMPropertyType0EP15IORegistryEntryPPK8OSSymbolPP6OSData __ZN9IODTNVRAM22readNVRAMPropertyType1EP15IORegistryEntryPPK8OSSymbolPP6OSData __ZN9IODTNVRAM23writeNVRAMPropertyType0EP15IORegistryEntryPK8OSSymbolP6OSData __ZN9IODTNVRAM23writeNVRAMPropertyType1EP15IORegistryEntryPK8OSSymbolP6OSData __ZN9IODTNVRAM26calculatePartitionChecksumEPh +__ZN9IODTNVRAM9metaClassE __ZN9IODTNVRAMC2EPK11OSMetaClass __ZN9IODTNVRAMD2Ev __ZNK15IOWatchDogTimer12getMetaClassEv @@ -69,15 +65,9 @@ _aes_decrypt_key128 _aes_encrypt_cbc _aes_encrypt_key128 _appleClut8 -_b_to_q -_bdevsw _boot -_bsd_set_dependency_capable -_cdevsw -_clalloc -_clfree -_clock_get_system_value _cons_cinput +_cons_ops _conslog_putc _convert_port_entry_to_map _convert_port_entry_to_object @@ -86,13 +76,11 @@ _delay _delay_for_interval _des_ecb_encrypt _des_set_key -_domains _gIODTSharedInterrupts _gOSObjectTrackList _gOSObjectTrackThread _gPEClockFrequencyInfo _gPESerialBaud -_get_aiotask _get_bsdtask_info _get_task_map _getsectdatafromheader @@ -102,21 +90,7 @@ _hfs_relconverter _host_get_special_port _host_priv_self _hz -_ifunit -_in_broadcast -_in_ifaddrhead -_in_pcb_get_owner -_in_pcb_grab_port -_in_pcb_letgo_port -_in_pcb_new_share_client -_in_pcb_rem_share_client -_inaddr_local -_inet_domain_mutex -_ip_mutex -_ip_output -_ip_protox _ipc_kernel_map -_ipc_port_release_send _ipflow_fastforward _kalloc _kauth_cred_issuser @@ -128,49 +102,24 @@ _kdp_set_interface _kdp_unregister_send_receive _kernel_map _kernel_pmap -_kernel_thread _kev_post_msg _kfree _kmem_alloc _kmem_free -_kmod -_kmod_create_fake -_kmod_create_fake_with_address -_kmod_destroy_fake -_kmod_lock -_kmod_lookupbyname _kmputc -_lbolt _lck_mtx_assert _lck_rw_done -_linesw -_lo_ifp +_ldisc_deregister +_ldisc_register _log -_logwakeup -_m_adj:_mbuf_adj -_m_cat -_m_copydata -_m_copym -_m_free:_mbuf_free -_m_freem:_mbuf_freem -_m_get -_m_gethdr -_m_getpacket -_m_getpackets -_m_mclget -_m_prepend_2 -_m_pullup -_m_split -_m_trailingspace:_mbuf_trailingspace _mach_gss_accept_sec_context _mach_gss_init_sec_context _mach_make_memory_entry_64 _mach_memory_entry_page_op _mach_memory_entry_range_op -_mach_msg_rpc_from_kernel -_mach_msg_send_from_kernel_with_options +_mach_msg_rpc_from_kernel_proper +_mach_vm_region _max_mem -_mcl_to_paddr _mem_size _memory_object_page_op _mig_dealloc_reply_port @@ -187,137 +136,39 @@ _ml_processor_register _ml_thread_policy _mountroot_post_hook _msleep1 -_net_add_domain -_net_add_proto -_net_del_domain -_net_del_proto -_netboot_root _ovbcopy -_pffinddomain -_pffindproto _pmap_find_phys _populate_model_name _prf _processor_exit _processor_info _processor_start -_pru_abort_notsupp -_pru_accept_notsupp -_pru_bind_notsupp -_pru_connect2_notsupp -_pru_connect_notsupp -_pru_disconnect_notsupp -_pru_listen_notsupp -_pru_peeraddr_notsupp -_pru_rcvd_notsupp -_pru_rcvoob_notsupp -_pru_send_notsupp -_pru_sense_null -_pru_shutdown_notsupp -_pru_sockaddr_notsupp -_pru_sopoll_notsupp _putc -_q_to_b _rc4_crypt _rc4_init -_rootdev -_rootvp -_rt_mtx -_rt_setgate -_rtalloc1_locked -_rtfree -_rtrequest_locked -_rtunref -_sbappendaddr -_sbappendrecord -_sbflush -_sbspace _securelevel _sha1_hardware_hook _sleep -_soabort -_sobind -_socantrcvmore -_socantsendmore -_sock_getlistener -_sock_release -_sock_retain -_soclose -_soconnect -_socreate -_sodisconnect -_sofree -_sofreelastref -_soisconnected -_soisconnecting -_soisdisconnected -_soisdisconnecting -_sonewconn -_sooptcopyin -_sooptcopyout -_sopoll -_soreceive -_soreserve -_sorwakeup -_sosend -_sosetopt _stack_privilege _task_get_special_port _task_resume _task_suspend -_tcbinfo -_termioschars -_thread_call_func -_thread_call_func_cancel -_thread_call_func_delayed -_thread_call_is_delayed -_thread_cancel_timer -_thread_funnel_set _thread_notrigger -_thread_set_timer -_thread_set_timer_deadline -_timeout -_tk_nin -_tk_rawcc _tsleep -_ttioctl -_ttsetwater -_ttspeedtab -_ttwakeup -_ttwwakeup -_ttyclose -_ttyflush -_ttyinput -_ttymodem -_ttyselect -_udbinfo -_uio_iovsaddr -_uio_spacetype -_unputc -_untimeout _vfs_context_current -_vfs_context_get_special_port -_vfs_context_set_special_port _vfs_setlocklocal _vfs_update_vfsstat _vm_allocate _vm_deallocate _vm_map -_vm_map_copyin -_vm_map_copyin_common -_vm_map_copyout _vm_map_deallocate _vm_map_unwire _vm_map_wire _vm_protect _vm_region _vm_region_object_create -_vnode_getname -_vnode_getparent _vnode_isnamedstream -_vnode_putname _vnode_tag -_vnode_update_identity _vnop_getnamedstream_desc _vnop_kqfilt_add_desc _vnop_kqfilt_remove_desc diff --git a/config/Unsupported.i386.exports b/config/Unsupported.i386.exports index 7720a4d41..bf2cedbf7 100644 --- a/config/Unsupported.i386.exports +++ b/config/Unsupported.i386.exports @@ -1,12 +1,47 @@ +__ZN9IODTNVRAM17getOWVariableInfoEmPPK8OSSymbolPmS4_ +__ZN9IODTNVRAM19convertObjectToPropEPhPmPK8OSSymbolP8OSObject +__ZN9IODTNVRAM19convertPropToObjectEPhmS0_mPPK8OSSymbolPP8OSObject +__ZN9IODTNVRAM19searchNVRAMPropertyEP17IONVRAMDescriptorPm +__ZN9IODTNVRAM19unescapeBytesToDataEPKhm _cpu_number +_domains _dsmos_page_transform_hook +_gPEEFIRuntimeServices _gPEEFISystemTable +_ifunit _in6addr_local +_in_broadcast +_inaddr_local +_inet_domain_mutex _io_map_spec +_ip_mutex +_ip_output +_ip_protox _kdp_register_callout _kdp_set_ip_and_mac_addresses +_kernel_flock +_kernel_thread _lapic_start +_lo_ifp +_m_adj +_m_cat +_m_copydata +_m_copym +_m_free:_mbuf_free +_m_freem:_mbuf_freem +_m_get +_m_gethdr +_m_getpacket +_m_getpackets +_m_mclget _m_mtod +_m_prepend_2 +_m_pullup +_m_split +_m_trailingspace:_mbuf_trailingspace +_mach_msg_rpc_from_kernel +_mach_msg_send_from_kernel_with_options +_mcl_to_paddr _ml_get_apicid _ml_get_maxbusdelay _ml_get_maxsnoop @@ -14,15 +49,77 @@ _ml_cpu_int_event_time _mp_rendezvous _mp_rendezvous_no_intrs _nd6_storelladdr +_net_add_domain +_net_add_proto +_net_del_domain +_net_del_proto +_pffinddomain +_pffindproto _pmCPUControl _pmKextRegister _pm_init_lock +_pru_abort_notsupp +_pru_accept_notsupp +_pru_bind_notsupp +_pru_connect2_notsupp +_pru_connect_notsupp +_pru_disconnect_notsupp +_pru_listen_notsupp +_pru_peeraddr_notsupp +_pru_rcvd_notsupp +_pru_rcvoob_notsupp +_pru_send_notsupp +_pru_sense_null +_pru_shutdown_notsupp +_pru_sockaddr_notsupp +_pru_sopoll_notsupp _real_ncpus _rtc_clock_napped +_sbappendaddr +_sbappendrecord +_sbflush +_sbspace _serial_getc _serial_init _serial_putc +_soabort +_sobind +_socantrcvmore +_socantsendmore +_sock_getlistener +_sock_release +_sock_retain +_soclose +_soconnect +_socreate +_sodisconnect +_sofree +_sofreelastref +_soisconnected +_soisconnecting +_soisdisconnected +_soisdisconnecting +_sonewconn +_sooptcopyin +_sooptcopyout +_sopoll +_soreceive +_soreserve +_sorwakeup +_sosend +_sosetopt +_tcbinfo _tmrCvt _tsc_get_info +_thread_call_func +_thread_call_func_cancel +_thread_call_func_delayed +_thread_call_is_delayed +_thread_cancel_timer +_thread_funnel_set +_thread_set_timer +_thread_set_timer_deadline +_udbinfo _hibernate_vm_lock _hibernate_vm_unlock +_clock_get_system_value diff --git a/config/Unsupported.ppc.exports b/config/Unsupported.ppc.exports index 24fffc53e..fbc85ede8 100644 --- a/config/Unsupported.ppc.exports +++ b/config/Unsupported.ppc.exports @@ -1,22 +1,46 @@ -_ASPgetmsg -_ASPputmsg _CallTVector _PPCcalls _PE_write_IIC __ZN19IODBDMAMemoryCursor13outputSegmentEN14IOMemoryCursor15PhysicalSegmentEPvm -_asp_open -_at_ioctl +__ZN9IODTNVRAM17getOWVariableInfoEmPPK8OSSymbolPmS4_ +__ZN9IODTNVRAM19convertObjectToPropEPhPmPK8OSSymbolP8OSObject +__ZN9IODTNVRAM19convertPropToObjectEPhmS0_mPPK8OSSymbolPP8OSObject +__ZN9IODTNVRAM19searchNVRAMPropertyEP17IONVRAMDescriptorPm +__ZN9IODTNVRAM19unescapeBytesToDataEPKhm +_domains _get_preemption_level -_gbuf_alloc_wait -_gref_alloc -_gref_close -_gref_wput _ignore_zero_fault +_ifunit _in6addr_local +_in_broadcast +_inaddr_local +_inet_domain_mutex +_ip_mutex +_ip_output +_ip_protox _killprint +_kernel_flock +_kernel_thread +_lo_ifp _mapping_prealloc _mapping_relpre +_m_adj +_m_cat +_m_copydata +_m_copym +_m_free:_mbuf_free +_m_freem:_mbuf_freem +_m_get +_m_gethdr +_m_getpacket +_m_getpackets +_m_mclget _m_mtod +_m_prepend_2 +_m_pullup +_m_split +_m_trailingspace:_mbuf_trailingspace +_mcl_to_paddr _ml_enable_cache_level _ml_enable_nap _ml_ppc_sleep @@ -24,9 +48,71 @@ _ml_set_processor_speed _ml_set_processor_voltage _ml_throttle _nd6_storelladdr +_net_add_domain +_net_add_proto +_net_del_domain +_net_del_proto +_pffinddomain +_pffindproto _pmsStart _pmsPark _pmsRun _pmsRunLocal _pmsBuild +_pru_abort_notsupp +_pru_accept_notsupp +_pru_bind_notsupp +_pru_connect2_notsupp +_pru_connect_notsupp +_pru_disconnect_notsupp +_pru_listen_notsupp +_pru_peeraddr_notsupp +_pru_rcvd_notsupp +_pru_rcvoob_notsupp +_pru_send_notsupp +_pru_sense_null +_pru_shutdown_notsupp +_pru_sockaddr_notsupp +_pru_sopoll_notsupp _ml_mem_backoff +_sbappendaddr +_sbappendrecord +_sbflush +_sbspace +_soabort +_sobind +_socantrcvmore +_socantsendmore +_sock_getlistener +_sock_release +_sock_retain +_soclose +_soconnect +_socreate +_sodisconnect +_sofree +_sofreelastref +_soisconnected +_soisconnecting +_soisdisconnected +_soisdisconnecting +_sonewconn +_sooptcopyin +_sooptcopyout +_sopoll +_soreceive +_soreserve +_sorwakeup +_sosend +_sosetopt +_tcbinfo +_thread_call_func +_thread_call_func_cancel +_thread_call_func_delayed +_thread_call_is_delayed +_thread_cancel_timer +_thread_funnel_set +_thread_set_timer +_thread_set_timer_deadline +_udbinfo +_clock_get_system_value diff --git a/config/Unsupported.x86_64.exports b/config/Unsupported.x86_64.exports new file mode 100644 index 000000000..f4dc69724 --- /dev/null +++ b/config/Unsupported.x86_64.exports @@ -0,0 +1,33 @@ +__ZN9IODTNVRAM17getOWVariableInfoEjPPK8OSSymbolPjS4_ +__ZN9IODTNVRAM19convertObjectToPropEPhPjPK8OSSymbolP8OSObject +__ZN9IODTNVRAM19convertPropToObjectEPhjS0_jPPK8OSSymbolPP8OSObject +__ZN9IODTNVRAM19searchNVRAMPropertyEP17IONVRAMDescriptorPj +__ZN9IODTNVRAM19unescapeBytesToDataEPKhj +_cpu_number +_dsmos_page_transform_hook +_gPEEFIRuntimeServices +_gPEEFISystemTable +_io_map_spec +_kdp_register_callout +_kdp_set_ip_and_mac_addresses +_lapic_start +_ml_get_apicid +_ml_get_maxbusdelay +_ml_get_maxsnoop +_ml_cpu_int_event_time +_mp_rendezvous +_mp_rendezvous_no_intrs +_pmCPUControl +_pmKextRegister +_pm_init_lock +_real_ncpus +_rtc_clock_napped +_serial_getc +_serial_init +_serial_putc +_sock_release +_sock_retain +_tmrCvt +_tsc_get_info +_hibernate_vm_lock +_hibernate_vm_unlock diff --git a/config/compress-man-pages.pl b/config/compress-man-pages.pl index 7711919e0..1dbd5a173 100755 --- a/config/compress-man-pages.pl +++ b/config/compress-man-pages.pl @@ -74,8 +74,8 @@ sub usage { while(($count = scalar(@compress)) > 0) { $_ = $count > $N ? $N : $count; my @args = splice(@compress, 0, $_); - print "gzip -f @args\n"; - system('gzip', '-f', @args) == 0 or die "gzip failed\n";; + print "gzip -f -n @args\n"; + system('gzip', '-f', '-n', @args) == 0 or die "gzip failed\n";; } foreach my $list (@links) { my $main = shift(@$list); diff --git a/config/list_supported.sh b/config/list_supported.sh new file mode 100755 index 000000000..340632c88 --- /dev/null +++ b/config/list_supported.sh @@ -0,0 +1,56 @@ +#!/bin/bash + +# +# Copyright (c) 2008 Apple Inc. All rights reserved. +# +# @APPLE_OSREFERENCE_LICENSE_HEADER_START@ +# +# This file contains Original Code and/or Modifications of Original Code +# as defined in and that are subject to the Apple Public Source License +# Version 2.0 (the 'License'). You may not use this file except in +# compliance with the License. Please obtain a copy of the License at +# http://www.opensource.apple.com/apsl/ and read it before using this +# file. +# +# The Original Code and all software distributed under the License are +# distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER +# EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, +# INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. +# Please see the License for the specific language governing rights and +# limitations under the License. +# +# @APPLE_OSREFERENCE_LICENSE_HEADER_END@ +# +# list_supported.sh + +CONFIG_DIR=$1 +ARCH=$2 +TARGET_FILE=$3 + +SUPPORTED_KPI_FILES=( BSDKernel Mach IOKit Libkern ) +DEPENDENCY_NAMES=( com.apple.kpi.bsd com.apple.kpi.mach com.apple.kpi.iokit com.apple.kpi.libkern ) + +rm -f $TARGET_FILE + +if [ ${ARCH} == "ALL" ] +then + echo "The following symbols are considered sustainable KPI on all architectures." >> $TARGET_FILE + echo "Note that symbols may be exported by some (or all) architectures individually." >> $TARGET_FILE +else + echo "The following symbols are considered sustainable KPI on architecture ${ARCH}." >> $TARGET_FILE +fi +echo >> $TARGET_FILE + +for (( i = 0 ; i < ${#SUPPORTED_KPI_FILES[@]} ; i++ )) +do + echo "Exported by ${DEPENDENCY_NAMES[i]}:" >> $TARGET_FILE + echo >> $TARGET_FILE + if [ $ARCH == "ALL" ] + then + cat "${CONFIG_DIR}/${SUPPORTED_KPI_FILES[i]}.exports" | sed "s/^_//" | sed "s/:.*//" | sort >> $TARGET_FILE + else + cat "${CONFIG_DIR}/${SUPPORTED_KPI_FILES[i]}.${ARCH}.exports" | sed "s/^_//" | sed "s/:.*//" | sort >> $TARGET_FILE + fi + echo >> $TARGET_FILE +done diff --git a/iokit/Drivers/platform/drvAppleMacIO/AppleMacIO.cpp b/iokit/Drivers/platform/drvAppleMacIO/AppleMacIO.cpp index 7231090c3..846d0aaa1 100644 --- a/iokit/Drivers/platform/drvAppleMacIO/AppleMacIO.cpp +++ b/iokit/Drivers/platform/drvAppleMacIO/AppleMacIO.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 1998-2000 Apple Computer, Inc. All rights reserved. + * Copyright (c) 1998-2008 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -45,6 +45,8 @@ extern "C" { #include +#include + #include #include @@ -171,6 +173,7 @@ bool AppleMacIO::selfTest( void ) UInt32 i; UInt32 status; IODBDMADescriptor *dmaDesc; + IOBufferMemoryDescriptor *buffer; volatile IODBDMAChannelRegisters *ioBaseDMA; bool ok = false; enum { kTestChannel = 0x8000 }; @@ -180,7 +183,9 @@ bool AppleMacIO::selfTest( void ) + kTestChannel ); do { - dmaDescriptors = (IODBDMADescriptor *)IOMallocContiguous(page_size, 1, & dmaDescriptorsPhys); + buffer = IOBufferMemoryDescriptor::withCapacity(page_size, kIODirectionOutIn, true); + dmaDescriptors = (IODBDMADescriptor*)buffer->getBytesNoCopy(); + if (!dmaDescriptors) continue; @@ -206,6 +211,8 @@ bool AppleMacIO::selfTest( void ) dmaDesc++; + dmaDescriptorsPhys = (UInt32) (buffer->getPhysicalSegment(0, NULL, 0)); + IOMakeDBDMADescriptorDep( dmaDesc, kdbdmaStoreQuad, kdbdmaKeySystem, @@ -249,9 +256,8 @@ bool AppleMacIO::selfTest( void ) } while (false); - if (dmaDescriptors) - IOFreeContiguous(dmaDescriptors, page_size); - + if (buffer) + buffer->release(); return ok; } diff --git a/iokit/Drivers/platform/drvAppleNMI/AppleNMI.cpp b/iokit/Drivers/platform/drvAppleNMI/AppleNMI.cpp index 07da66d56..bf941dcc5 100644 --- a/iokit/Drivers/platform/drvAppleNMI/AppleNMI.cpp +++ b/iokit/Drivers/platform/drvAppleNMI/AppleNMI.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 1998-2000 Apple Computer, Inc. All rights reserved. + * Copyright (c) 1998-2008 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -42,7 +42,7 @@ extern "C" { #include } -bool RootRegistered( OSObject * us, void *, IOService * yourDevice ); +bool RootRegistered( OSObject * us, void *, IOService * yourDevice, __unused IONotifier * yourNotifier ); /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ @@ -70,7 +70,7 @@ bool AppleNMI::start(IOService *provider) mask_NMI = TRUE; // Flag to mask/unmask NMI @ sleep/wake // Get notified when Root Domain registers - addNotification( gIOPublishNotification, serviceMatching("IOPMrootDomain"), (IOServiceNotificationHandler)RootRegistered, this, 0 ); + addMatchingNotification( gIOPublishNotification, serviceMatching("IOPMrootDomain"), (IOServiceMatchingNotificationHandler) RootRegistered, this, 0 ); // Register the interrupt. IOInterruptAction handler = OSMemberFunctionCast(IOInterruptAction, @@ -85,7 +85,7 @@ bool AppleNMI::start(IOService *provider) // The Root Power Domain has registered, so now we register as an interested driver // so we know when the system is going to sleep or wake // ********************************************************************************** -bool RootRegistered( OSObject * us, void *, IOService * yourDevice ) +bool RootRegistered( OSObject * us, void *, IOService * yourDevice, __unused IONotifier * yourNotifier) { if ( yourDevice != NULL ) { ((AppleNMI *)us)->rootDomain = yourDevice; diff --git a/iokit/Drivers/platform/drvAppleNVRAM/AppleNVRAM.cpp b/iokit/Drivers/platform/drvAppleNVRAM/AppleNVRAM.cpp index 805d034bd..3391d7863 100644 --- a/iokit/Drivers/platform/drvAppleNVRAM/AppleNVRAM.cpp +++ b/iokit/Drivers/platform/drvAppleNVRAM/AppleNVRAM.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 1998-2000 Apple Computer, Inc. All rights reserved. + * Copyright (c) 1998-2008 Apple Computer, Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -83,9 +83,9 @@ IOReturn AppleNVRAM::read(IOByteCount offset, UInt8 *buffer, IOByteCount length) { UInt32 cnt; - - if ((buffer == 0) || (length <= 0) || (offset < 0) || - (offset + length > kNVRAMImageSize)) + + // length and offset can't be less than zero (unsigned), so we don't check + if ((buffer == 0) || (length == 0) || (offset + length > kNVRAMImageSize)) return kIOReturnBadArgument; switch (_nvramType) { @@ -122,8 +122,8 @@ IOReturn AppleNVRAM::write(IOByteCount offset, UInt8 *buffer, { UInt32 cnt; - if ((buffer == 0) || (length <= 0) || (offset < 0) || - (offset + length > kNVRAMImageSize)) + // length and offset can't be less than zero (unsigned), so we don't check + if ((buffer == 0) || (length == 0) || (offset + length > kNVRAMImageSize)) return kIOReturnBadArgument; switch (_nvramType) { diff --git a/iokit/IOKit/IOBufferMemoryDescriptor.h b/iokit/IOKit/IOBufferMemoryDescriptor.h index f42f66489..babd068fd 100644 --- a/iokit/IOKit/IOBufferMemoryDescriptor.h +++ b/iokit/IOKit/IOBufferMemoryDescriptor.h @@ -72,15 +72,18 @@ class IOBufferMemoryDescriptor : public IOGeneralMemoryDescriptor vm_size_t _capacity; vm_offset_t _alignment; IOOptionBits _options; - IOPhysicalAddress * _physAddrs; - unsigned _physSegCount; +private: + uintptr_t _internalReserved; + unsigned _internalFlags; private: +#ifndef __LP64__ virtual bool initWithOptions( IOOptionBits options, vm_size_t capacity, vm_offset_t alignment, - task_t inTask); + task_t inTask) APPLE_KEXT_DEPRECATED; /* use withOptions() instead */ +#endif /* !__LP64__ */ virtual bool initWithPhysicalMask( task_t inTask, @@ -89,8 +92,13 @@ class IOBufferMemoryDescriptor : public IOGeneralMemoryDescriptor mach_vm_address_t alignment, mach_vm_address_t physicalMask); +#ifdef __LP64__ + OSMetaClassDeclareReservedUnused(IOBufferMemoryDescriptor, 0); + OSMetaClassDeclareReservedUnused(IOBufferMemoryDescriptor, 1); +#else /* !__LP64__ */ OSMetaClassDeclareReservedUsed(IOBufferMemoryDescriptor, 0); OSMetaClassDeclareReservedUsed(IOBufferMemoryDescriptor, 1); +#endif /* !__LP64__ */ OSMetaClassDeclareReservedUnused(IOBufferMemoryDescriptor, 2); OSMetaClassDeclareReservedUnused(IOBufferMemoryDescriptor, 3); OSMetaClassDeclareReservedUnused(IOBufferMemoryDescriptor, 4); @@ -109,38 +117,6 @@ class IOBufferMemoryDescriptor : public IOGeneralMemoryDescriptor protected: virtual void free(); - virtual bool initWithAddress( void * address, /* not supported */ - IOByteCount withLength, - IODirection withDirection ); - - virtual bool initWithAddress( vm_address_t address, /* not supported */ - IOByteCount withLength, - IODirection withDirection, - task_t withTask ); - - virtual bool initWithPhysicalAddress( - IOPhysicalAddress address, /* not supported */ - IOByteCount withLength, - IODirection withDirection ); - - virtual bool initWithPhysicalRanges( - IOPhysicalRange * ranges, /* not supported */ - UInt32 withCount, - IODirection withDirection, - bool asReference = false ); - - virtual bool initWithRanges( IOVirtualRange * ranges, /* not supported */ - UInt32 withCount, - IODirection withDirection, - task_t withTask, - bool asReference = false ); - - IOGeneralMemoryDescriptor::withAddress; /* not supported */ - IOGeneralMemoryDescriptor::withPhysicalAddress; /* not supported */ - IOGeneralMemoryDescriptor::withPhysicalRanges; /* not supported */ - IOGeneralMemoryDescriptor::withRanges; /* not supported */ - IOGeneralMemoryDescriptor::withSubRange; /* not supported */ - public: /* @@ -150,9 +126,11 @@ class IOBufferMemoryDescriptor : public IOGeneralMemoryDescriptor * hold capacity bytes. The descriptor's length is initially set to the * capacity. */ +#ifndef __LP64__ virtual bool initWithOptions( IOOptionBits options, vm_size_t capacity, - vm_offset_t alignment); + vm_offset_t alignment) APPLE_KEXT_DEPRECATED; /* use withOptions() instead */ +#endif /* !__LP64__ */ static IOBufferMemoryDescriptor * withOptions( IOOptionBits options, vm_size_t capacity, @@ -207,16 +185,12 @@ class IOBufferMemoryDescriptor : public IOGeneralMemoryDescriptor vm_size_t capacity, IODirection withDirection, bool withContiguousMemory = false); - /* - * initWithBytes: - * - * Initialize a new IOBufferMemoryDescriptor preloaded with bytes (copied). - * The descriptor's length and capacity are set to the input buffer's size. - */ +#ifndef __LP64__ virtual bool initWithBytes(const void * bytes, vm_size_t withLength, IODirection withDirection, - bool withContiguousMemory = false); + bool withContiguousMemory = false) APPLE_KEXT_DEPRECATED; /* use withBytes() instead */ +#endif /* !__LP64__ */ /* * withBytes: @@ -281,8 +255,10 @@ class IOBufferMemoryDescriptor : public IOGeneralMemoryDescriptor */ virtual bool appendBytes(const void *bytes, vm_size_t withLength); - /* DEPRECATED */ virtual void * getVirtualSegment(IOByteCount offset, - /* DEPRECATED */ IOByteCount * length); +#ifndef __LP64__ + virtual void * getVirtualSegment(IOByteCount offset, + IOByteCount * length) APPLE_KEXT_DEPRECATED; /* use getBytesNoCopy() instead */ +#endif /* !__LP64__ */ }; #endif /* !_IOBUFFERMEMORYDESCRIPTOR_H */ diff --git a/iokit/IOKit/IOCatalogue.h b/iokit/IOKit/IOCatalogue.h index f666bf426..d63943e6a 100644 --- a/iokit/IOKit/IOCatalogue.h +++ b/iokit/IOKit/IOCatalogue.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 1998-2000 Apple Computer, Inc. All rights reserved. + * Copyright (c) 1998-2000 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -26,7 +26,7 @@ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ */ /* - * Copyright (c) 1998 Apple Computer, Inc. All rights reserved. + * Copyright (c) 1998 Apple Inc. All rights reserved. * * HISTORY * @@ -62,7 +62,12 @@ class IOCatalogue : public OSObject IOLock * lock; SInt32 generation; +/* This stuff is no longer used at all but was exported in prior + * releases, so we keep it around for PPC/i386 only. + */ +#if __ppc__ || __i386__ IOLock * kld_lock; +#endif /* __ppc__ || __i386__ */ public: /*! @@ -209,7 +214,19 @@ class IOCatalogue : public OSObject virtual bool serialize(OSSerialize * s) const; bool serializeData(IOOptionBits kind, OSSerialize * s) const; + + /*! + @function removePersonalities + @abstract Remove exact personalities from the database. + @param personalitiesArray An array of personalities to remove. + @result Returns true if all personalities are removed successfully. Failure is due to a memory allocation failure. + */ + bool removePersonalities(OSArray * personalitiesArray); +/* This stuff is no longer used at all we keep it around for PPC/i386 + * binary compatibility only. Symbols are no longer exported. + */ +#if __ppc__ || __i386__ /*! @function recordStartupExtensions @abstract Records extensions made available by the primary booter. @@ -244,8 +261,7 @@ class IOCatalogue : public OSObject removed or wasn't present, KERN_FAILURE otherwise. */ virtual kern_return_t removeKernelLinker(void); - - static void disableExternalLinker(void); +#endif /* __ppc__ || __i386__ */ private: @@ -257,21 +273,8 @@ class IOCatalogue : public OSObject IOReturn unloadModule( OSString * moduleName ) const; }; -__BEGIN_DECLS -/*! - @function IOKitRelocStart - @abstract Deprecated API. -*/ -kmod_start_func_t IOKitRelocStart; -/*! - @function IOKitRelocStop - @abstract Deprecated API. -*/ -kmod_stop_func_t IOKitRelocStop; -__END_DECLS - -extern const OSSymbol * gIOClassKey; -extern const OSSymbol * gIOProbeScoreKey; -extern IOCatalogue * gIOCatalogue; +extern const OSSymbol * gIOClassKey; +extern const OSSymbol * gIOProbeScoreKey; +extern IOCatalogue * gIOCatalogue; #endif /* ! _IOKIT_IOCATALOGUE_H */ diff --git a/iokit/IOKit/IOCommandGate.h b/iokit/IOKit/IOCommandGate.h index 21a0dceb3..1b17b791d 100644 --- a/iokit/IOKit/IOCommandGate.h +++ b/iokit/IOKit/IOCommandGate.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 1998-2000 Apple Computer, Inc. All rights reserved. + * Copyright (c) 1998-2009 Apple Computer, Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -44,7 +44,7 @@ that executes an action on the driver's work-loop. 'On the work-loop' is actually a lie but the work-loop single threaded semantic is maintained for this event source. Using the work-loop gate rather than execution by the workloop. The command gate tests for a potential self dead lock by checking if the -runCommand request is made from the work-loop's thread, it doens't check for a +runCommand request is made from the work-loop's thread, it doesn't check for a mutual dead lock though where a pair of work loop's dead lock each other.

The IOCommandGate is a lighter weight version of the IOCommandQueue and @@ -54,7 +54,7 @@ check if the hardware is active, if so it will add the request to a pending queue internal to the device or the device's family. Otherwise if the hardware is inactive then this request can be acted upon immediately.

- CAUTION: The runAction and runCommand functions can not be called from an interrupt context. But attemptCommand can, though it may return an error + CAUTION: The runAction, runCommand, and attemptCommand functions cannot be called from an interrupt context. */ class IOCommandGate : public IOEventSource @@ -189,10 +189,10 @@ client's thread attemptCommand will fail if the work-loop's gate is closed. /*! @function commandSleep @abstract Put a thread that is currently holding the command gate to sleep. - @discussion Put a thread to sleep waiting for an event but release the gate first. If the event occurs then the commandGate is closed before the returns. + @discussion Put a thread to sleep waiting for an event but release the gate first. If the event occurs then the commandGate is closed before the function returns. @param event Pointer to an address. - @param interruptible THREAD_UNINT, THREAD_INTERRUPTIBLE or THREAD_ABORTSAFE, defaults to THREAD_ABORTSAFE. - @result THREAD_AWAKENED - normal wakeup, THREAD_TIMED_OUT - timeout expired, THREAD_INTERRUPTED - interrupted by clear_wait, THREAD_RESTART - restart operation entirely, kIOReturnNotPermitted if the calling thread does not hold the command gate. */ + @param interruptible THREAD_UNINT, THREAD_INTERRUPTIBLE or THREAD_ABORTSAFE. THREAD_UNINT specifies that the sleep cannot be interrupted by a signal. THREAD_INTERRUPTIBLE specifies that the sleep may be interrupted by a "kill -9" signal. THREAD_ABORTSAFE (the default value) specifies that the sleep may be interrupted by any user signal. + @result THREAD_AWAKENED - normal wakeup, THREAD_TIMED_OUT - timeout expired, THREAD_INTERRUPTED - interrupted, THREAD_RESTART - restart operation entirely, kIOReturnNotPermitted if the calling thread does not hold the command gate. */ virtual IOReturn commandSleep(void *event, UInt32 interruptible = THREAD_ABORTSAFE); @@ -212,8 +212,23 @@ client's thread attemptCommand will fail if the work-loop's gate is closed. @discussion Enable the command gate. The attemptAction/attemptCommand calls will now be enabled and can succeeed. Stalled runCommand/runAction calls will be woken up. */ virtual void enable(); +/*! @function commandSleep + @abstract Put a thread that is currently holding the command gate to sleep. + @discussion Put a thread to sleep waiting for an event but release the gate first. If the event occurs or timeout occurs then the commandGate is closed before the function returns. + @param event Pointer to an address. + @param deadline Clock deadline to timeout the sleep. + @param interruptible THREAD_UNINT, THREAD_INTERRUPTIBLE or THREAD_ABORTSAFE. THREAD_UNINT specifies that the sleep cannot be interrupted by a signal. THREAD_INTERRUPTIBLE specifies that the sleep may be interrupted by a "kill -9" signal. THREAD_ABORTSAFE specifies that the sleep may be interrupted by any user signal. + @result THREAD_AWAKENED - normal wakeup, THREAD_TIMED_OUT - timeout expired, THREAD_INTERRUPTED - interrupted, THREAD_RESTART - restart operation entirely, kIOReturnNotPermitted if the calling thread does not hold the command gate. */ + virtual IOReturn commandSleep(void *event, + AbsoluteTime deadline, + UInt32 interruptible); + private: +#if __LP64__ OSMetaClassDeclareReservedUnused(IOCommandGate, 0); +#else + OSMetaClassDeclareReservedUsed(IOCommandGate, 0); +#endif OSMetaClassDeclareReservedUnused(IOCommandGate, 1); OSMetaClassDeclareReservedUnused(IOCommandGate, 2); OSMetaClassDeclareReservedUnused(IOCommandGate, 3); diff --git a/iokit/IOKit/IOCommandQueue.h b/iokit/IOKit/IOCommandQueue.h index 5337cdfa5..1dfc5270e 100644 --- a/iokit/IOKit/IOCommandQueue.h +++ b/iokit/IOKit/IOCommandQueue.h @@ -41,9 +41,6 @@ HISTORY #include - -#define DEPRECATED __attribute__((deprecated)) - class IOCommandQueue; typedef void (*IOCommandQueueAction) @@ -70,16 +67,16 @@ class IOCommandQueue : public IOEventSource static IOCommandQueue *commandQueue(OSObject *inOwner, IOCommandQueueAction inAction = 0, int inSize = kIOCQDefaultSize) - DEPRECATED; + APPLE_KEXT_DEPRECATED; virtual bool init(OSObject *inOwner, IOCommandQueueAction inAction = 0, int inSize = kIOCQDefaultSize) - DEPRECATED; + APPLE_KEXT_DEPRECATED; virtual kern_return_t enqueueCommand(bool gotoSleep = true, void *field0 = 0, void *field1 = 0, void *field2 = 0, void *field3 = 0) - DEPRECATED; + APPLE_KEXT_DEPRECATED; // WARNING: This function can only be safely called from the appropriate // work loop context. You should check IOWorkLoop::onThread is true. @@ -89,7 +86,7 @@ class IOCommandQueue : public IOEventSource // If the input fields are zero then the queue's owner/action will be used. virtual int performAndFlush(OSObject *target = 0, IOCommandQueueAction inAction = 0) - DEPRECATED; + APPLE_KEXT_DEPRECATED; }; #endif /* !_IOKIT_IOCOMMANDQUEUE_H */ diff --git a/iokit/IOKit/IODMACommand.h b/iokit/IOKit/IODMACommand.h index f92b86368..a2a2852f3 100644 --- a/iokit/IOKit/IODMACommand.h +++ b/iokit/IOKit/IODMACommand.h @@ -362,6 +362,12 @@ friend class IODMAEventSource; UInt32 *numSegments) { return genIOVMSegments(offset, segments, numSegments); }; + IOReturn + genIOVMSegments(SegmentFunction segmentFunction, + UInt64 *offsetP, + void *segmentsP, + UInt32 *numSegmentsP); + virtual void free(); private: @@ -372,7 +378,8 @@ friend class IODMAEventSource; void *segments, UInt32 segmentIndex); - IOReturn genIOVMSegments(InternalSegmentFunction outSegFunc, + IOReturn genIOVMSegments(uint32_t op, + InternalSegmentFunction outSegFunc, void *reference, UInt64 *offsetP, void *segmentsP, @@ -381,7 +388,7 @@ friend class IODMAEventSource; static IOReturn clientOutputSegment( void *reference, IODMACommand *target, Segment64 segment, void *vSegList, UInt32 outSegIndex); - + static IOReturn segmentOp( void *reference, IODMACommand *target, @@ -428,10 +435,22 @@ friend class IODMAEventSource; void *segments, UInt32 segmentIndex); +/*! @function getPreparedOffsetAndLength + @abstract Returns the offset and length into the target IOMemoryDescriptor of a prepared IODDMACommand. + @discussion If successfully prepared, returns the offset and length into the IOMemoryDescriptor. Will fail for an unprepared IODMACommand. + @param offset returns the starting offset in the memory descriptor the DMA command was prepared with. Pass NULL for don't care. + @param length returns the length in the memory descriptor the DMA command was prepared with. Pass NULL for don't care. + @result An IOReturn code. kIOReturnNotReady if the IODMACommand is not prepared. */ + + virtual IOReturn getPreparedOffsetAndLength(UInt64 * offset, UInt64 * length); + + UInt8 getNumAddressBits(void); + UInt32 getAlignment(void); + private: OSMetaClassDeclareReservedUsed(IODMACommand, 0); OSMetaClassDeclareReservedUsed(IODMACommand, 1); - OSMetaClassDeclareReservedUnused(IODMACommand, 2); + OSMetaClassDeclareReservedUsed(IODMACommand, 2); OSMetaClassDeclareReservedUnused(IODMACommand, 3); OSMetaClassDeclareReservedUnused(IODMACommand, 4); OSMetaClassDeclareReservedUnused(IODMACommand, 5); diff --git a/iokit/IOKit/IODMAController.h b/iokit/IOKit/IODMAController.h index 1d30052e7..a8c1aed8a 100644 --- a/iokit/IOKit/IODMAController.h +++ b/iokit/IOKit/IODMAController.h @@ -50,10 +50,11 @@ class IODMAController : public IOService virtual IOReturn initDMAChannel(IOService *provider, IODMAEventSource *dmaES, UInt32 *dmaIndex, UInt32 reqIndex) = 0; virtual IOReturn startDMACommand(UInt32 dmaIndex, IODMACommand *dmaCommand, IODirection direction, IOByteCount byteCount = 0, IOByteCount byteOffset = 0) = 0; - virtual IOReturn stopDMACommand(UInt32 dmaIndex, bool flush = false, mach_timespec_t * timeout = 0) = 0; + virtual IOReturn stopDMACommand(UInt32 dmaIndex, bool flush = false, uint64_t timeout = UINT64_MAX) = 0; virtual void completeDMACommand(IODMAEventSource *dmaES, IODMACommand *dmaCommand); virtual void notifyDMACommand(IODMAEventSource *dmaES, IODMACommand *dmaCommand, IOReturn status, IOByteCount actualByteCount); virtual IOReturn queryDMACommand(UInt32 dmaIndex, IODMACommand **dmaCommand, IOByteCount *transferCount, bool waitForIdle = false) = 0; + virtual IOByteCount getFIFODepth(UInt32 dmaIndex) = 0; public: static const OSSymbol *createControllerName(UInt32 phandle); diff --git a/iokit/IOKit/IODMAEventSource.h b/iokit/IOKit/IODMAEventSource.h index 4659c3247..18a72de50 100644 --- a/iokit/IOKit/IODMAEventSource.h +++ b/iokit/IOKit/IODMAEventSource.h @@ -58,9 +58,10 @@ class IODMAEventSource : public IOEventSource UInt32 dmaIndex = 0); virtual IOReturn startDMACommand(IODMACommand *dmaCommand, IODirection direction, IOByteCount byteCount = 0, IOByteCount byteOffset = 0); - virtual IOReturn stopDMACommand(bool flush = false, mach_timespec_t *timeout = 0); + virtual IOReturn stopDMACommand(bool flush = false, uint64_t timeout = UINT64_MAX); virtual IOReturn queryDMACommand(IODMACommand **dmaCommand, IOByteCount *transferCount, bool waitForIdle = false); + virtual IOByteCount getFIFODepth(); private: IOService *dmaProvider; diff --git a/iokit/IOKit/IODeviceTreeSupport.h b/iokit/IOKit/IODeviceTreeSupport.h index eacf339bc..15b5aa4b4 100644 --- a/iokit/IOKit/IODeviceTreeSupport.h +++ b/iokit/IOKit/IODeviceTreeSupport.h @@ -88,8 +88,6 @@ OSArray * IODTResolveAddressing( IORegistryEntry * regEntry, const char * addressPropertyName, IODeviceMemory * parent ); -#pragma options align=mac68k - struct IONVRAMDescriptor { unsigned int format:4; unsigned int marker:1; @@ -98,9 +96,7 @@ struct IONVRAMDescriptor { unsigned int bridgeDevices:6 * 5; unsigned int functionNum:3; unsigned int deviceNum:5; -}; - -#pragma options align=reset +} __attribute__((aligned(2), packed)); IOReturn IODTMakeNVDescriptor( IORegistryEntry * regEntry, IONVRAMDescriptor * hdr ); diff --git a/iokit/IOKit/IOEventSource.h b/iokit/IOKit/IOEventSource.h index ae6060b95..4afc5aa99 100644 --- a/iokit/IOKit/IOEventSource.h +++ b/iokit/IOKit/IOEventSource.h @@ -182,18 +182,13 @@ IOWorkLoop that at least reacts to signalWorkAvailable() and onThread functions. protected: // Methods to access the IOWorkLoop exported fields - /* inline */ void signalWorkAvailable(); - /* { workLoop->signalWorkAvailable(); }; */ - /* inline */ void openGate(); - /* { workLoop->openGate(); }; */ - /* inline */ void closeGate(); - /* { workLoop->closeGate(); }; */ - /* inline */ bool tryCloseGate(); - /* { return workLoop->tryCloseGate(); }; */ - /* inline */ int sleepGate(void *event, UInt32 type); - /* { return workLoop->sleepGate(event, type); }; */ - /* inline */ void wakeupGate(void *event, bool oneThread); - /* { workLoop->wakeupGate(event, oneThread); }; */ + void signalWorkAvailable(); + void openGate(); + void closeGate(); + bool tryCloseGate(); + int sleepGate(void *event, UInt32 type); + int sleepGate(void *event, AbsoluteTime deadline, UInt32 type); + void wakeupGate(void *event, bool oneThread); public: /*! @function setAction diff --git a/iokit/IOKit/IOHibernatePrivate.h b/iokit/IOKit/IOHibernatePrivate.h index ab897a850..2c9a5b062 100644 --- a/iokit/IOKit/IOHibernatePrivate.h +++ b/iokit/IOKit/IOHibernatePrivate.h @@ -73,11 +73,11 @@ struct IOHibernateImageHeader uint32_t runtimePages; uint32_t runtimePageCount; - - uint8_t reserved2[16]; + uint64_t runtimeVirtualPages __attribute__ ((packed)); + uint8_t reserved2[8]; - uint64_t encryptStart; - uint64_t machineSignature; + uint64_t encryptStart __attribute__ ((packed)); + uint64_t machineSignature __attribute__ ((packed)); uint32_t previewSize; uint32_t previewPageListSize; @@ -90,13 +90,19 @@ struct IOHibernateImageHeader uint32_t memoryMapSize; uint32_t systemTableOffset; - uint32_t reserved[77]; // make sizeof == 512 + uint32_t debugFlags; + + uint32_t reserved[76]; // make sizeof == 512 uint32_t fileExtentMapSize; IOPolledFileExtent fileExtentMap[2]; }; typedef struct IOHibernateImageHeader IOHibernateImageHeader; +enum +{ + kIOHibernateDebugRestoreLogs = 0x00000001 +}; struct hibernate_bitmap_t { @@ -240,6 +246,11 @@ hibernate_teardown(hibernate_page_list_t * page_list, kern_return_t hibernate_processor_setup(IOHibernateImageHeader * header); +void +hibernate_gobble_pages(uint32_t gobble_count, uint32_t free_page_time); +void +hibernate_free_gobble_pages(void); + void hibernate_vm_lock(void); void @@ -303,6 +314,7 @@ hibernate_newruntime_map(void * map, vm_size_t map_size, extern uint32_t gIOHibernateState; extern uint32_t gIOHibernateMode; +extern uint32_t gIOHibernateDebugFlags; extern uint32_t gIOHibernateFreeTime; // max time to spend freeing pages (ms) extern uint8_t gIOHibernateRestoreStack[]; extern uint8_t gIOHibernateRestoreStackEnd[]; diff --git a/iokit/IOKit/IOInterleavedMemoryDescriptor.h b/iokit/IOKit/IOInterleavedMemoryDescriptor.h index 960dded40..e5c2a943f 100644 --- a/iokit/IOKit/IOInterleavedMemoryDescriptor.h +++ b/iokit/IOKit/IOInterleavedMemoryDescriptor.h @@ -41,7 +41,7 @@ class IOInterleavedMemoryDescriptor : public IOMemoryDescriptor protected: - UInt32 _descriptorCapacity; + IOByteCount _descriptorCapacity; UInt32 _descriptorCount; IOMemoryDescriptor ** _descriptors; IOByteCount * _descriptorOffsets; @@ -50,45 +50,6 @@ class IOInterleavedMemoryDescriptor : public IOMemoryDescriptor virtual void free(); - /* - * These methods are not supported under this subclass. - */ - - virtual bool initWithAddress( void * address, /* not supported */ - IOByteCount withLength, - IODirection withDirection ); - - virtual bool initWithAddress( vm_address_t address, /* not supported */ - IOByteCount withLength, - IODirection withDirection, - task_t withTask ); - - virtual bool initWithPhysicalAddress( - IOPhysicalAddress address, /* not supported */ - IOByteCount withLength, - IODirection withDirection ); - - virtual bool initWithPhysicalRanges( - IOPhysicalRange * ranges, /* not supported */ - UInt32 withCount, - IODirection withDirection, - bool asReference = false ); - - virtual bool initWithRanges( IOVirtualRange * ranges, /* not supported */ - UInt32 withCount, - IODirection withDirection, - task_t withTask, - bool asReference = false ); - - virtual void * getVirtualSegment( IOByteCount offset, /* not supported */ - IOByteCount * length ); - - IOMemoryDescriptor::withAddress; /* not supported */ - IOMemoryDescriptor::withPhysicalAddress; /* not supported */ - IOMemoryDescriptor::withPhysicalRanges; /* not supported */ - IOMemoryDescriptor::withRanges; /* not supported */ - IOMemoryDescriptor::withSubRange; /* not supported */ - public: /*! @function withCapacity @@ -98,7 +59,7 @@ class IOInterleavedMemoryDescriptor : public IOMemoryDescriptor @param direction An I/O direction to be associated with the descriptor, which may affect the operation of the prepare and complete methods on some architectures. @result The created IOInterleavedMemoryDescriptor on success, to be released by the caller, or zero on failure. */ - static IOInterleavedMemoryDescriptor * withCapacity( UInt32 capacity, + static IOInterleavedMemoryDescriptor * withCapacity( IOByteCount capacity, IODirection direction); /*! @function initWithCapacity @@ -108,7 +69,7 @@ class IOInterleavedMemoryDescriptor : public IOMemoryDescriptor @param direction An I/O direction to be associated with the descriptor, which may affect the operation of the prepare and complete methods on some architectures. @result The created IOInterleavedMemoryDescriptor on success, to be released by the caller, or zero on failure. */ - virtual bool initWithCapacity( UInt32 capacity, + virtual bool initWithCapacity( IOByteCount capacity, IODirection direction ); /*! @function clearMemoryDescriptors @@ -137,11 +98,9 @@ class IOInterleavedMemoryDescriptor : public IOMemoryDescriptor @param length If non-zero, getPhysicalSegment will store here the length of the physically contiguous segement at the given offset. @result A physical address, or zero if the offset is beyond the length of the memory. */ - virtual IOPhysicalAddress getPhysicalSegment( IOByteCount offset, - IOByteCount * length ); - - virtual addr64_t getPhysicalSegment64( IOByteCount offset, - IOByteCount * length ); + virtual addr64_t getPhysicalSegment( IOByteCount offset, + IOByteCount * length, + IOOptionBits options = 0 ); /*! @function prepare @abstract Prepare the memory for an I/O transfer. @@ -158,9 +117,6 @@ class IOInterleavedMemoryDescriptor : public IOMemoryDescriptor @result An IOReturn code. */ virtual IOReturn complete(IODirection forDirection = kIODirectionNone); - - virtual IOPhysicalAddress getSourceSegment(IOByteCount offset, - IOByteCount * length); }; #endif /* !_IOINTERLEAVEDMEMORYDESCRIPTOR_H */ diff --git a/iokit/IOKit/IOInterruptController.h b/iokit/IOKit/IOInterruptController.h index d99523594..71f55e549 100644 --- a/iokit/IOKit/IOInterruptController.h +++ b/iokit/IOKit/IOInterruptController.h @@ -50,7 +50,7 @@ struct IOInterruptVector { volatile char interruptRegistered; IOLock * interruptLock; IOService * nub; - long source; + int source; void * target; IOInterruptHandler handler; void * refCon; @@ -59,6 +59,11 @@ struct IOInterruptVector { typedef struct IOInterruptVector IOInterruptVector; +#if __LP64__ +typedef int32_t IOInterruptVectorNumber; +#else +typedef long IOInterruptVectorNumber; +#endif class IOInterruptController : public IOService { @@ -91,12 +96,12 @@ class IOInterruptController : public IOService // Methods to be overridden for simplifed interrupt controller subclasses. - virtual bool vectorCanBeShared(long vectorNumber, IOInterruptVector *vector); - virtual void initVector(long vectorNumber, IOInterruptVector *vector); - virtual int getVectorType(long vectorNumber, IOInterruptVector *vector); - virtual void disableVectorHard(long vectorNumber, IOInterruptVector *vector); - virtual void enableVector(long vectorNumber, IOInterruptVector *vector); - virtual void causeVector(long vectorNumber, IOInterruptVector *vector); + virtual bool vectorCanBeShared(IOInterruptVectorNumber vectorNumber, IOInterruptVector *vector); + virtual void initVector(IOInterruptVectorNumber vectorNumber, IOInterruptVector *vector); + virtual int getVectorType(IOInterruptVectorNumber vectorNumber, IOInterruptVector *vector); + virtual void disableVectorHard(IOInterruptVectorNumber vectorNumber, IOInterruptVector *vector); + virtual void enableVector(IOInterruptVectorNumber vectorNumber, IOInterruptVector *vector); + virtual void causeVector(IOInterruptVectorNumber vectorNumber, IOInterruptVector *vector); OSMetaClassDeclareReservedUnused(IOInterruptController, 0); OSMetaClassDeclareReservedUnused(IOInterruptController, 1); @@ -116,7 +121,7 @@ class IOSharedInterruptController : public IOInterruptController int numVectors; int vectorsRegistered; int vectorsEnabled; - volatile long controllerDisabled; + volatile int controllerDisabled; bool sourceIsLevel; struct ExpansionData { }; diff --git a/iokit/IOKit/IOKitDebug.h b/iokit/IOKit/IOKitDebug.h index 954f1bd5f..499faa3c8 100644 --- a/iokit/IOKit/IOKitDebug.h +++ b/iokit/IOKit/IOKitDebug.h @@ -54,48 +54,54 @@ class IOKitDiagnostics : public OSObject virtual bool serialize(OSSerialize *s) const; private: static void updateOffset( OSDictionary * dict, - UInt32 value, const char * name ); + UInt32 value, const char * name ); }; -#endif __cplusplus +#endif /* __cplusplus */ enum { -// loggage - kIOLogAttach = 0x00000001ULL, - kIOLogProbe = 0x00000002ULL, - kIOLogStart = 0x00000004ULL, - kIOLogRegister = 0x00000008ULL, - kIOLogMatch = 0x00000010ULL, - kIOLogConfig = 0x00000020ULL, - kIOLogYield = 0x00000040ULL, - kIOLogPower = 0x00000080ULL, - kIOLogMapping = 0x00000100ULL, - kIOLogCatalogue = 0x00000200ULL, - kIOLogTracePower = 0x00000400ULL, - kIOLogDebugPower = 0x00000800ULL, - kIOLogServiceTree = 0x00001000ULL, - kIOLogDTree = 0x00002000ULL, - kIOLogMemory = 0x00004000ULL, - kIOLogKextMemory = 0x00008000ULL, - kOSLogRegistryMods = 0x00010000ULL, // Log attempts to modify registry collections - -// debug aids - change behaviour - kIONoFreeObjects = 0x00100000ULL, - kIOLogSynchronous = 0x00200000ULL, // IOLog completes synchrounsly - kOSTraceObjectAlloc = 0x00400000ULL, - - _kIODebugTopFlag = 0x8000000000000000ULL // force enum to be 64 bits + // loggage + kIOLogAttach = 0x00000001ULL, + kIOLogProbe = 0x00000002ULL, + kIOLogStart = 0x00000004ULL, + kIOLogRegister = 0x00000008ULL, + kIOLogMatch = 0x00000010ULL, + kIOLogConfig = 0x00000020ULL, + kIOLogYield = 0x00000040ULL, + kIOLogPower = 0x00000080ULL, + kIOLogMapping = 0x00000100ULL, + kIOLogCatalogue = 0x00000200ULL, + kIOLogTracePower = 0x00000400ULL, + kIOLogDebugPower = 0x00000800ULL, + kIOLogServiceTree = 0x00001000ULL, + kIOLogDTree = 0x00002000ULL, + kIOLogMemory = 0x00004000ULL, + kIOLogKextMemory = 0x00008000ULL, + kOSLogRegistryMods = 0x00010000ULL, // Log attempts to modify registry collections + kIOLogPMRootDomain = 0x00020000ULL, + kOSRegistryModsMode = 0x00040000ULL, // Change default registry modification handling - panic vs. log + kIOTraceIOService = 0x00080000ULL, + kIOLogHibernate = 0x00100000ULL, + + // debug aids - change behaviour + kIONoFreeObjects = 0x00100000ULL, + kIOLogSynchronous = 0x00200000ULL, // IOLog completes synchronously + kOSTraceObjectAlloc = 0x00400000ULL, + + _kIODebugTopFlag = 0x8000000000000000ULL // force enum to be 64 bits }; -extern SInt64 gIOKitDebug; +extern SInt64 gIOKitDebug; #ifdef __cplusplus extern "C" { #endif struct IORegistryPlane; -extern void IOPrintPlane( const struct IORegistryPlane * plane ); -extern void OSPrintMemory( void ); +extern void IOPrintPlane( const struct IORegistryPlane * plane ); +#ifndef _OSCPPDEBUG_H +extern void OSPrintMemory( void ); +#endif #define IOPrintMemory OSPrintMemory #ifdef __cplusplus diff --git a/iokit/IOKit/IOKitKeys.h b/iokit/IOKit/IOKitKeys.h index ed5843340..62395d54d 100644 --- a/iokit/IOKit/IOKitKeys.h +++ b/iokit/IOKit/IOKitKeys.h @@ -53,6 +53,9 @@ #define kIOFireWirePlane "IOFireWire" #define kIOUSBPlane "IOUSB" +// registry ID number +#define kIORegistryEntryIDKey "IORegistryEntryID" + // IOService class name #define kIOServiceClass "IOService" @@ -82,9 +85,14 @@ // IOService default user client class, for loadable user clients #define kIOUserClientClassKey "IOUserClientClass" +// key to find IOMappers +#define kIOMapperIDKey "IOMapperID" + #define kIOUserClientCrossEndianKey "IOUserClientCrossEndian" #define kIOUserClientCrossEndianCompatibleKey "IOUserClientCrossEndianCompatible" #define kIOUserClientSharedInstanceKey "IOUserClientSharedInstance" +// diagnostic string describing the creating task +#define kIOUserClientCreatorKey "IOUserClientCreator" // IOService notification types #define kIOPublishNotification "IOServicePublish" diff --git a/iokit/IOKit/IOKitKeysPrivate.h b/iokit/IOKit/IOKitKeysPrivate.h index 489b2f58d..73e93db0f 100644 --- a/iokit/IOKit/IOKitKeysPrivate.h +++ b/iokit/IOKit/IOKitKeysPrivate.h @@ -32,26 +32,25 @@ #include // properties found in the registry root -#define kIOConsoleUsersKey "IOConsoleUsers" /* value is OSArray */ -#define kIOMaximumMappedIOByteCountKey "IOMaximumMappedIOByteCount" /* value is OSNumber */ -#define kIOStartupMkextCRC "IOStartupMkextCRC" /* value is 32-bit OSNumber */ +#define kIOConsoleUsersKey "IOConsoleUsers" /* value is OSArray */ +#define kIOMaximumMappedIOByteCountKey "IOMaximumMappedIOByteCount" /* value is OSNumber */ // properties found in the console user dict -#define kIOConsoleSessionIDKey "kCGSSessionIDKey" /* value is OSNumber */ +#define kIOConsoleSessionIDKey "kCGSSessionIDKey" /* value is OSNumber */ -#define kIOConsoleSessionUserNameKey "kCGSSessionUserNameKey" /* value is OSString */ -#define kIOConsoleSessionUIDKey "kCGSSessionUserIDKey" /* value is OSNumber */ -#define kIOConsoleSessionConsoleSetKey "kCGSSessionConsoleSetKey" /* value is OSNumber */ -#define kIOConsoleSessionOnConsoleKey "kCGSSessionOnConsoleKey" /* value is OSBoolean */ -#define kIOConsoleSessionSecureInputPIDKey "kCGSSessionSecureInputPID" /* value is OSNumber */ +#define kIOConsoleSessionUserNameKey "kCGSSessionUserNameKey" /* value is OSString */ +#define kIOConsoleSessionUIDKey "kCGSSessionUserIDKey" /* value is OSNumber */ +#define kIOConsoleSessionConsoleSetKey "kCGSSessionConsoleSetKey" /* value is OSNumber */ +#define kIOConsoleSessionOnConsoleKey "kCGSSessionOnConsoleKey" /* value is OSBoolean */ +#define kIOConsoleSessionSecureInputPIDKey "kCGSSessionSecureInputPID" /* value is OSNumber */ // IOResources property -#define kIOConsoleUsersSeedKey "IOConsoleUsersSeed" /* value is OSNumber */ +#define kIOConsoleUsersSeedKey "IOConsoleUsersSeed" /* value is OSNumber */ // private keys for clientHasPrivilege -#define kIOClientPrivilegeConsoleUser "console" -#define kIOClientPrivilegeSecureConsoleProcess "secureprocess" +#define kIOClientPrivilegeConsoleUser "console" +#define kIOClientPrivilegeSecureConsoleProcess "secureprocess" // clientHasPrivilege security token for kIOClientPrivilegeSecureConsoleProcess typedef struct _IOUCProcessToken { @@ -59,22 +58,22 @@ typedef struct _IOUCProcessToken { UInt32 pid; } IOUCProcessToken; -#define kIOKernelHasSafeSleep 1 +#define kIOKernelHasSafeSleep 1 enum { kIOPrepareToPhys32 = 0x04 }; #define kIODirectionPrepareToPhys32 ((IODirection) kIOPrepareToPhys32) -#define kIOPlatformSleepActionKey "IOPlatformSleepAction" /* value is OSNumber (priority) */ -#define kIOPlatformWakeActionKey "IOPlatformWakeAction" /* value is OSNumber (priority) */ -#define kIOPlatformQuiesceActionKey "IOPlatformQuiesceAction" /* value is OSNumber (priority) */ -#define kIOPlatformActiveActionKey "IOPlatformActiveAction" /* value is OSNumber (priority) */ +#define kIOPlatformSleepActionKey "IOPlatformSleepAction" /* value is OSNumber (priority) */ +#define kIOPlatformWakeActionKey "IOPlatformWakeAction" /* value is OSNumber (priority) */ +#define kIOPlatformQuiesceActionKey "IOPlatformQuiesceAction" /* value is OSNumber (priority) */ +#define kIOPlatformActiveActionKey "IOPlatformActiveAction" /* value is OSNumber (priority) */ -#define kIOPlatformFunctionHandlerSet "IOPlatformFunctionHandlerSet" -#if defined(__i386__) -#define kIOPlatformFunctionHandlerMaxBusDelay "IOPlatformFunctionHandlerMaxBusDelay" -#define kIOPlatformFunctionHandlerMaxInterruptDelay "IOPlatformFunctionHandlerMaxInterruptDelay" -#endif /* defined(__i386__) */ +#define kIOPlatformFunctionHandlerSet "IOPlatformFunctionHandlerSet" +#if defined(__i386__) || defined(__x86_64__) +#define kIOPlatformFunctionHandlerMaxBusDelay "IOPlatformFunctionHandlerMaxBusDelay" +#define kIOPlatformFunctionHandlerMaxInterruptDelay "IOPlatformFunctionHandlerMaxInterruptDelay" +#endif /* defined(__i386__) || defined(__x86_64__) */ #endif /* ! _IOKIT_IOKITKEYSPRIVATE_H */ diff --git a/iokit/IOKit/IOKitServer.h b/iokit/IOKit/IOKitServer.h index 5bfc7862a..a68c99243 100644 --- a/iokit/IOKit/IOKitServer.h +++ b/iokit/IOKit/IOKitServer.h @@ -81,6 +81,7 @@ enum { kIOCatalogRemoveDriversNoMatch, kIOCatalogStartMatching, kIOCatalogRemoveKernelLinker, + kIOCatalogKextdActive, kIOCatalogKextdFinishedLaunching }; @@ -118,9 +119,5 @@ enum { kIOCatalogServiceTerminate }; -enum { - kIOCatalogMatchIdle = KMOD_IOKIT_END_RANGE_PACKET - 0x10 -}; - #endif /* ! _IOKIT_IOKITSERVER_H */ diff --git a/iokit/IOKit/IOLib.h b/iokit/IOKit/IOLib.h index dc85274ae..6183a3358 100644 --- a/iokit/IOKit/IOLib.h +++ b/iokit/IOKit/IOLib.h @@ -39,6 +39,7 @@ #error IOLib.h is for kernel use only #endif +#include #include #include @@ -107,7 +108,7 @@ void * IOMallocAligned(vm_size_t size, vm_offset_t alignment); void IOFreeAligned(void * address, vm_size_t size); /*! @function IOMallocContiguous - @abstract Allocates wired memory in the kernel map, with an alignment restriction and physically contiguous. + @abstract Deprecated - use IOBufferMemoryDescriptor. Allocates wired memory in the kernel map, with an alignment restriction and physically contiguous. @discussion This is a utility to allocate memory in the kernel, with an alignment restriction which is specified as a byte count, and will allocate only physically contiguous memory. The request may fail if memory is fragmented, and may cause large amounts of paging activity. This function may block and so should not be called from interrupt level or while a simple lock is held. @param size Size of the memory requested. @param alignment Byte count of the alignment for the memory. For example, pass 256 to get memory allocated at an address with bits 0-7 zero. @@ -115,15 +116,15 @@ void IOFreeAligned(void * address, vm_size_t size); @result Virtual address of the allocated memory, or zero on failure. */ void * IOMallocContiguous(vm_size_t size, vm_size_t alignment, - IOPhysicalAddress * physicalAddress); + IOPhysicalAddress * physicalAddress) __attribute__((deprecated)); /*! @function IOFreeContiguous - @abstract Frees memory allocated with IOMallocContiguous. + @abstract Deprecated - use IOBufferMemoryDescriptor. Frees memory allocated with IOMallocContiguous. @discussion This function frees memory allocated with IOMallocContiguous, it may block and so should not be called from interrupt level or while a simple lock is held. @param address Virtual address of the allocated memory. @param size Size of the memory allocated. */ -void IOFreeContiguous(void * address, vm_size_t size); +void IOFreeContiguous(void * address, vm_size_t size) __attribute__((deprecated)); /*! @function IOMallocPageable @@ -253,19 +254,19 @@ IOReturn IOFlushProcessorCache( task_t task, IOVirtualAddress address, #define IOThreadSelf() (current_thread()) /*! @function IOCreateThread - @abstract Create a kernel thread. + @abstract Deprecated function - use kernel_thread_start(). Create a kernel thread. @discussion This function creates a kernel thread, and passes the caller supplied argument to the new thread. Warning: the value returned by this function is not 100% reliable. There is a race condition where it is possible that the new thread has already terminated before this call returns. Under that circumstance the IOThread returned will be invalid. In general there is little that can be done with this value except compare it against 0. The thread itself can call IOThreadSelf() 100% reliably and that is the prefered mechanism to manipulate the IOThreads state. @param function A C-function pointer where the thread will begin execution. @param argument Caller specified data to be passed to the new thread. @result An IOThread identifier for the new thread, equivalent to an osfmk thread_t. */ -IOThread IOCreateThread(IOThreadFunc function, void *argument); +IOThread IOCreateThread(IOThreadFunc function, void *argument) __attribute__((deprecated)); /*! @function IOExitThread - @abstract Terminate exceution of current thread. + @abstract Deprecated function - use thread_terminate(). Terminate execution of current thread. @discussion This function destroys the currently running thread, and does not return. */ -void IOExitThread(void) __dead2; +void IOExitThread(void) __attribute__((deprecated)); /*! @function IOSleep @abstract Sleep the calling thread for a number of milliseconds. @@ -291,12 +292,20 @@ void IOPause(unsigned nanoseconds); /*! @function IOLog @abstract Log a message to console in text mode, and /var/log/system.log. @discussion This function allows a driver to log diagnostic information to the screen during verbose boots, and to a log file found at /var/log/system.log. IOLog should not be called from interrupt context. - @param format A printf() style format string (see printf() documentation). + @param format A printf() style format string (see printf(3) documentation). @param other arguments described by the format string. */ void IOLog(const char *format, ...) __attribute__((format(printf, 1, 2))); +/*! @function IOLogv + @abstract Log a message to console in text mode, and /var/log/system.log. + @discussion This function allows a driver to log diagnostic information to the screen during verbose boots, and to a log file found at /var/log/system.log. IOLogv should not be called from interrupt context. + @param format A printf() style format string (see printf(3) documentation). + @param ap stdarg(3) style variable arguments. */ + +void IOLogv(const char *format, va_list ap); + #ifndef _FN_KPRINTF #define _FN_KPRINTF void kprintf(const char *format, ...); @@ -326,6 +335,11 @@ IOReturn IOFindValueForName(const char *string, @param reason A C-string to describe why the debugger is being entered. */ void Debugger(const char * reason); +#if __LP64__ +#define IOPanic(reason) panic("%s", reason) +#else +void IOPanic(const char *reason) __attribute__((deprecated)); +#endif struct OSDictionary * IOBSDNameMatching( const char * name ); struct OSDictionary * IOOFPathMatching( const char * path, char * buf, int maxLen ); @@ -361,23 +375,25 @@ static inline IOFixed IOFixedDivide(IOFixed a, IOFixed b) (((value) / (multiple)) * (multiple)); -#ifdef __APPLE_API_OBSOLETE +#if defined(__APPLE_API_OBSOLETE) /* The following API is deprecated */ -#undef eieio -#define eieio() \ - OSSynchronizeIO() - -void IOPanic(const char *reason); - /* The API exported by kern/clock.h should be used for high resolution timing. */ -void IOGetTime( mach_timespec_t * clock_time); +void IOGetTime( mach_timespec_t * clock_time) __attribute__((deprecated)); + +#if !defined(__LP64__) + +#undef eieio +#define eieio() \ + OSSynchronizeIO() extern mach_timespec_t IOZeroTvalspec; +#endif /* !defined(__LP64__) */ + #endif /* __APPLE_API_OBSOLETE */ __END_DECLS diff --git a/iokit/IOKit/IOLocks.h b/iokit/IOKit/IOLocks.h index 8b3e7c12c..0e762ef55 100644 --- a/iokit/IOKit/IOLocks.h +++ b/iokit/IOKit/IOLocks.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 1998-2007 Apple Inc. All rights reserved. + * Copyright (c) 1998-2009 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -50,22 +50,29 @@ extern "C" { #include #include +/*! @var IOLockGroup + Global lock group used by all IOKit locks. To simplify kext debugging and lock-heat analysis, consider using lck_* locks with a per-driver lock group, as defined in kern/locks.h. +*/ extern lck_grp_t *IOLockGroup; +#if defined(XNU_KERNEL_PRIVATE) +#define IOLOCKS_INLINE 1 +#endif + /* * Mutex lock operations */ -#ifdef XNU_KERNEL_PRIVATE +#ifdef IOLOCKS_INLINE typedef lck_mtx_t IOLock; #else typedef struct _IOLock IOLock; -#endif /* XNU_KERNEL_PRIVATE */ +#endif /* IOLOCKS_INLINE */ /*! @function IOLockAlloc @abstract Allocates and initializes a mutex. - @discussion Allocates a mutex in general purpose memory, and initilizes it. Mutexes are general purpose blocking mutual exclusion locks, supplied by libkern/locks.h. This function may block and so should not be called from interrupt level or while a spin lock is held. + @discussion Allocates a mutex in general purpose memory, and initializes it. Mutexes are general purpose blocking mutual exclusion locks, supplied by libkern/locks.h. This function may block and so should not be called from interrupt level or while a spin lock is held. IOLocks use the global IOKit lock group, IOLockGroup. To simplify kext debugging and lock-heat analysis, consider using lck_* locks with a per-driver lock group, as defined in kern/locks.h. @result Pointer to the allocated lock, or zero on failure. */ IOLock * IOLockAlloc( void ); @@ -89,19 +96,11 @@ lck_mtx_t * IOLockGetMachLock( IOLock * lock); @discussion Lock the mutex. If the lock is held by any thread, block waiting for its unlock. This function may block and so should not be called from interrupt level or while a spin lock is held. Locking the mutex recursively from one thread will result in deadlock. @param lock Pointer to the allocated lock. */ -#ifdef XNU_KERNEL_PRIVATE -#ifndef IOLOCKS_CPP -static __inline__ -void IOLockLock( IOLock * lock) -{ - lck_mtx_lock(lock); -} -#else -void IOLockLock( IOLock * lock); -#endif /* !IOLOCKS_CPP */ +#ifdef IOLOCKS_INLINE +#define IOLockLock(l) lck_mtx_lock(l) #else void IOLockLock( IOLock * lock); -#endif /* XNU_KERNEL_PRIVATE */ +#endif /* !IOLOCKS_INLINE */ /*! @function IOLockTryLock @abstract Attempt to lock a mutex. @@ -109,42 +108,30 @@ void IOLockLock( IOLock * lock); @param lock Pointer to the allocated lock. @result True if the mutex was unlocked and is now locked by the caller, otherwise false. */ -#ifdef XNU_KERNEL_PRIVATE -#ifndef IOLOCKS_CPP -static __inline__ -boolean_t IOLockTryLock( IOLock * lock) -{ - return(lck_mtx_try_lock(lock)); -} +#ifdef IOLOCKS_INLINE +#define IOLockTryLock(l) lck_mtx_try_lock(l) #else boolean_t IOLockTryLock( IOLock * lock); -#endif /* !IOLOCKS_CPP */ -#else -boolean_t IOLockTryLock( IOLock * lock); -#endif /* XNU_KERNEL_PRIVATE */ +#endif /* !IOLOCKS_INLINE */ /*! @function IOLockUnlock @abstract Unlock a mutex. @discussion Unlock the mutex and wake any blocked waiters. Results are undefined if the caller has not locked the mutex. This function may block and so should not be called from interrupt level or while a spin lock is held. @param lock Pointer to the allocated lock. */ -#ifdef XNU_KERNEL_PRIVATE -#ifndef IOLOCKS_CPP -static __inline__ -void IOLockUnlock( IOLock * lock) -{ - lck_mtx_unlock(lock); -} -#else -void IOLockUnlock( IOLock * lock); -#endif /* !IOLOCKS_CPP */ +#ifdef IOLOCKS_INLINE +#define IOLockUnlock(l) lck_mtx_unlock(l) #else +#if defined(__i386__) +void IOLockUnlock( IOLock * lock) __DARWIN10_ALIAS(IOLockUnlock); +#else /* !__i386__ */ void IOLockUnlock( IOLock * lock); -#endif /* XNU_KERNEL_PRIVATE */ +#endif /* __i386__ */ +#endif /* !IOLOCKS_INLINE */ /*! @function IOLockSleep @abstract Sleep with mutex unlock and relock -@discussion Prepare to sleep,unlock the mutex, and re-acquire it on wakeup.Results are undefined if the caller has not locked the mutex. This function may block and so should not be called from interrupt level or while a spin lock is held. +@discussion Prepare to sleep,unlock the mutex, and re-acquire it on wakeup. Results are undefined if the caller has not locked the mutex. This function may block and so should not be called from interrupt level or while a spin lock is held. @param lock Pointer to the locked lock. @param event The event to sleep on. @param interType How can the sleep be interrupted. @@ -182,7 +169,7 @@ typedef struct _IORecursiveLock IORecursiveLock; /*! @function IORecursiveLockAlloc @abstract Allocates and initializes an recursive lock. - @discussion Allocates a recursive lock in general purpose memory, and initializes it. Recursive locks function identically to mutexes but allow one thread to lock more than once, with balanced unlocks. + @discussion Allocates a recursive lock in general purpose memory, and initializes it. Recursive locks function identically to mutexes but allow one thread to lock more than once, with balanced unlocks. IORecursiveLocks use the global IOKit lock group, IOLockGroup. To simplify kext debugging and lock-heat analysis, consider using lck_* locks with a per-driver lock group, as defined in kern/locks.h. @result Pointer to the allocated lock, or zero on failure. */ IORecursiveLock * IORecursiveLockAlloc( void ); @@ -233,6 +220,8 @@ boolean_t IORecursiveLockHaveLock( const IORecursiveLock * lock); extern int IORecursiveLockSleep( IORecursiveLock *_lock, void *event, UInt32 interType); +extern int IORecursiveLockSleepDeadline( IORecursiveLock * _lock, void *event, + AbsoluteTime deadline, UInt32 interType); extern void IORecursiveLockWakeup( IORecursiveLock *_lock, void *event, bool oneThread); @@ -240,15 +229,15 @@ extern void IORecursiveLockWakeup( IORecursiveLock *_lock, * Complex (read/write) lock operations */ -#ifdef XNU_KERNEL_PRIVATE +#ifdef IOLOCKS_INLINE typedef lck_rw_t IORWLock; #else typedef struct _IORWLock IORWLock; -#endif /* XNU_KERNEL_PRIVATE */ +#endif /* IOLOCKS_INLINE */ /*! @function IORWLockAlloc @abstract Allocates and initializes a read/write lock. -@discussion Allocates and initializes a read/write lock in general purpose memory, and initilizes it. Read/write locks provide for multiple readers, one exclusive writer, and are supplied by libkern/locks.h. This function may block and so should not be called from interrupt level or while a spin lock is held. + @discussion Allocates and initializes a read/write lock in general purpose memory. Read/write locks provide for multiple readers, one exclusive writer, and are supplied by libkern/locks.h. This function may block and so should not be called from interrupt level or while a spin lock is held. IORWLocks use the global IOKit lock group, IOLockGroup. To simplify kext debugging and lock-heat analysis, consider using lck_* locks with a per-driver lock group, as defined in kern/locks.h. @result Pointer to the allocated lock, or zero on failure. */ IORWLock * IORWLockAlloc( void ); @@ -272,57 +261,34 @@ lck_rw_t * IORWLockGetMachLock( IORWLock * lock); @discussion Lock the lock for read, allowing multiple readers when there are no writers. If the lock is held for write, block waiting for its unlock. This function may block and so should not be called from interrupt level or while a spin lock is held. Locking the lock recursively from one thread, for read or write, can result in deadlock. @param lock Pointer to the allocated lock. */ -#ifdef XNU_KERNEL_PRIVATE -#ifndef IOLOCKS_CPP -static __inline__ -void IORWLockRead( IORWLock * lock) -{ - lck_rw_lock_shared( lock); -} -#else -void IORWLockRead( IORWLock * lock); -#endif /* !IOLOCKS_CPP */ +#ifdef IOLOCKS_INLINE +#define IORWLockRead(l) lck_rw_lock_shared(l) #else -void IORWLockRead( IORWLock * lock); -#endif /* XNU_KERNEL_PRIVATE */ +void IORWLockRead(IORWLock * lock); +#endif /* !IOLOCKS_INLINE */ /*! @function IORWLockWrite @abstract Lock a read/write lock for write. @discussion Lock the lock for write, allowing one writer exlusive access. If the lock is held for read or write, block waiting for its unlock. This function may block and so should not be called from interrupt level or while a spin lock is held. Locking the lock recursively from one thread, for read or write, can result in deadlock. @param lock Pointer to the allocated lock. */ -#ifdef XNU_KERNEL_PRIVATE -#ifndef IOLOCKS_CPP -static __inline__ -void IORWLockWrite( IORWLock * lock) -{ - lck_rw_lock_exclusive( lock); -} -#else -void IORWLockWrite( IORWLock * lock); -#endif /* !IOLOCKS_CPP */ +#ifdef IOLOCKS_INLINE +#define IORWLockWrite(l) lck_rw_lock_exclusive(l) #else void IORWLockWrite( IORWLock * lock); -#endif /* XNU_KERNEL_PRIVATE */ +#endif /* !IOLOCKS_INLINE */ /*! @function IORWLockUnlock @abstract Unlock a read/write lock. @discussion Undo one call to IORWLockRead or IORWLockWrite. Results are undefined if the caller has not locked the lock. This function may block and so should not be called from interrupt level or while a spin lock is held. @param lock Pointer to the allocated lock. */ -#ifdef XNU_KERNEL_PRIVATE -#ifndef IOLOCKS_CPP -static __inline__ -void IORWLockUnlock( IORWLock * lock) -{ - lck_rw_done( lock); -} +#ifdef IOLOCKS_INLINE +#define IORWLockUnlock(l) lck_rw_done(l) #else void IORWLockUnlock( IORWLock * lock); -#endif /* !IOLOCKS_CPP */ -#else -void IORWLockUnlock( IORWLock * lock); -#endif /* XNU_KERNEL_PRIVATE */ +#endif /* !IOLOCKS_INLINE */ + #ifdef __APPLE_API_OBSOLETE @@ -339,15 +305,15 @@ static __inline__ void IORWUnlock( IORWLock * lock) { IORWLockUnlock(lock); } * Simple locks. Cannot block while holding a simple lock. */ -#ifdef KERNEL_PRIVATE +#ifdef IOLOCKS_INLINE typedef lck_spin_t IOSimpleLock; #else typedef struct _IOSimpleLock IOSimpleLock; -#endif /* XNU_KERNEL_PRIVATE */ +#endif /* IOLOCKS_INLINE */ /*! @function IOSimpleLockAlloc @abstract Allocates and initializes a spin lock. - @discussion Allocates an initializes a spin lock in general purpose memory, and initilizes it. Spin locks provide non-blocking mutual exclusion for synchronization between thread context and interrupt context, or for multiprocessor synchronization, and are supplied by libkern/locks.h. This function may block and so should not be called from interrupt level or while a spin lock is held. + @discussion Allocates and initializes a spin lock in general purpose memory. Spin locks provide non-blocking mutual exclusion for synchronization between thread context and interrupt context, or for multiprocessor synchronization, and are supplied by libkern/locks.h. This function may block and so should not be called from interrupt level or while a spin lock is held. IOSimpleLocks use the global IOKit lock group, IOLockGroup. To simplify kext debugging and lock-heat analysis, consider using lck_* locks with a per-driver lock group, as defined in kern/locks.h. @result Pointer to the allocated lock, or zero on failure. */ IOSimpleLock * IOSimpleLockAlloc( void ); @@ -378,19 +344,12 @@ void IOSimpleLockInit( IOSimpleLock * lock ); @discussion Lock the spin lock. If the lock is held, spin waiting for its unlock. Spin locks disable preemption, cannot be held across any blocking operation, and should be held for very short periods. When used to synchronize between interrupt context and thread context they should be locked with interrupts disabled - IOSimpleLockLockDisableInterrupt() will do both. Locking the lock recursively from one thread will result in deadlock. @param lock Pointer to the lock. */ -#ifdef XNU_KERNEL_PRIVATE -#ifndef IOLOCKS_CPP -static __inline__ -void IOSimpleLockLock( IOSimpleLock * lock ) -{ - lck_spin_lock( lock ); -} +#ifdef IOLOCKS_INLINE +#define IOSimpleLockLock(l) lck_spin_lock(l) #else void IOSimpleLockLock( IOSimpleLock * lock ); -#endif /* !IOLOCKS_CPP */ -#else -void IOSimpleLockLock( IOSimpleLock * lock ); -#endif /* XNU_KERNEL_PRIVATE */ +#endif /* !IOLOCKS_INLINE */ + /*! @function IOSimpleLockTryLock @abstract Attempt to lock a spin lock. @@ -398,40 +357,28 @@ void IOSimpleLockLock( IOSimpleLock * lock ); @param lock Pointer to the lock. @result True if the lock was unlocked and is now locked by the caller, otherwise false. */ -#ifdef XNU_KERNEL_PRIVATE -#ifndef IOLOCKS_CPP -static __inline__ -boolean_t IOSimpleLockTryLock( IOSimpleLock * lock ) -{ - return( lck_spin_try_lock( lock ) ); -} +#ifdef IOLOCKS_INLINE +#define IOSimpleLockTryLock(l) lck_spin_try_lock(l) #else boolean_t IOSimpleLockTryLock( IOSimpleLock * lock ); -#endif /* !IOLOCKS_CPP */ -#else -boolean_t IOSimpleLockTryLock( IOSimpleLock * lock ); -#endif /* XNU_KERNEL_PRIVATE */ +#endif /* !IOLOCKS_INLINE */ /*! @function IOSimpleLockUnlock @abstract Unlock a spin lock. @discussion Unlock the lock, and restore preemption. Results are undefined if the caller has not locked the lock. @param lock Pointer to the lock. */ -#ifdef XNU_KERNEL_PRIVATE -#ifndef IOLOCKS_CPP -static __inline__ -void IOSimpleLockUnlock( IOSimpleLock * lock ) -{ - lck_spin_unlock( lock ); -} -#else -void IOSimpleLockUnlock( IOSimpleLock * lock ); -#endif /* !IOLOCKS_CPP */ +#ifdef IOLOCKS_INLINE +#define IOSimpleLockUnlock(l) lck_spin_unlock(l) #else void IOSimpleLockUnlock( IOSimpleLock * lock ); -#endif /* XNU_KERNEL_PRIVATE */ +#endif /* !IOLOCKS_INLINE */ +#if __LP64__ +typedef boolean_t IOInterruptState; +#else typedef long int IOInterruptState; +#endif /*! @function IOSimpleLockLockDisableInterrupt @abstract Lock a spin lock. diff --git a/iokit/IOKit/IOMapper.h b/iokit/IOKit/IOMapper.h index 9fa09459a..d877974ef 100644 --- a/iokit/IOKit/IOMapper.h +++ b/iokit/IOKit/IOMapper.h @@ -57,6 +57,9 @@ __END_DECLS #include class OSData; +class IODMACommand; + +extern const OSSymbol * gIOMapperIDKey; class IOMapper : public IOService { @@ -109,20 +112,28 @@ class IOMapper : public IOService ppnum_t *pageList, IOItemCount pageCount); virtual void iovmInsert(ppnum_t addr, IOItemCount offset, upl_page_info_t *pageList, IOItemCount pageCount); + static void checkForSystemMapper() - { if ((vm_address_t) gSystem & kWaitMask) waitForSystemMapper(); }; + { if ((uintptr_t) gSystem & kWaitMask) waitForSystemMapper(); }; + static IOMapper * copyMapperForDevice(IOService * device); + + // Function will panic if the given address is not found in a valid // iovm mapping. virtual addr64_t mapAddr(IOPhysicalAddress addr) = 0; // Get the address mask to or into an address to bypass this mapper - virtual bool getBypassMask(addr64_t *maskP) const + virtual bool getBypassMask(addr64_t *maskP) const; + + virtual ppnum_t iovmAllocDMACommand(IODMACommand * command, IOItemCount pageCount); + virtual void iovmFreeDMACommand(IODMACommand * command, ppnum_t addr, IOItemCount pageCount); + OSMetaClassDeclareReservedUsed(IOMapper, 0); + OSMetaClassDeclareReservedUsed(IOMapper, 1); + OSMetaClassDeclareReservedUsed(IOMapper, 2); private: - OSMetaClassDeclareReservedUnused(IOMapper, 1); - OSMetaClassDeclareReservedUnused(IOMapper, 2); OSMetaClassDeclareReservedUnused(IOMapper, 3); OSMetaClassDeclareReservedUnused(IOMapper, 4); OSMetaClassDeclareReservedUnused(IOMapper, 5); diff --git a/iokit/IOKit/IOMemoryDescriptor.h b/iokit/IOKit/IOMemoryDescriptor.h index ea04c67e2..1186b89a1 100644 --- a/iokit/IOKit/IOMemoryDescriptor.h +++ b/iokit/IOKit/IOMemoryDescriptor.h @@ -34,15 +34,7 @@ #include #include -__BEGIN_DECLS #include -__END_DECLS - -struct IOPhysicalRange -{ - IOPhysicalAddress address; - IOByteCount length; -}; class IOMemoryMap; class IOMapper; @@ -50,7 +42,11 @@ class IOMapper; /* * Direction of transfer, with respect to the described memory. */ +#ifdef __LP64__ +enum +#else /* !__LP64__ */ enum IODirection +#endif /* !__LP64__ */ { kIODirectionNone = 0x0, // same as VM_PROT_NONE kIODirectionIn = 0x1, // User land 'read', same as VM_PROT_READ @@ -58,26 +54,36 @@ enum IODirection kIODirectionOutIn = kIODirectionOut | kIODirectionIn, kIODirectionInOut = kIODirectionIn | kIODirectionOut }; +#ifdef __LP64__ +typedef IOOptionBits IODirection; +#endif /* __LP64__ */ /* * IOOptionBits used in the withOptions variant */ enum { kIOMemoryDirectionMask = 0x00000007, +#ifdef XNU_KERNEL_PRIVATE kIOMemoryAutoPrepare = 0x00000008, // Shared with Buffer MD - +#endif + kIOMemoryTypeVirtual = 0x00000010, kIOMemoryTypePhysical = 0x00000020, kIOMemoryTypeUPL = 0x00000030, kIOMemoryTypePersistentMD = 0x00000040, // Persistent Memory Descriptor kIOMemoryTypeUIO = 0x00000050, +#ifdef __LP64__ + kIOMemoryTypeVirtual64 = kIOMemoryTypeVirtual, + kIOMemoryTypePhysical64 = kIOMemoryTypePhysical, +#else /* !__LP64__ */ kIOMemoryTypeVirtual64 = 0x00000060, kIOMemoryTypePhysical64 = 0x00000070, +#endif /* !__LP64__ */ kIOMemoryTypeMask = 0x000000f0, kIOMemoryAsReference = 0x00000100, kIOMemoryBufferPageable = 0x00000400, - kIOMemoryDontMap = 0x00000800, + kIOMemoryMapperNone = 0x00000800, #ifdef XNU_KERNEL_PRIVATE kIOMemoryRedirected = 0x00004000, kIOMemoryPreparedReadOnly = 0x00008000, @@ -89,7 +95,6 @@ enum { kIOMemoryThreadSafe = 0x00100000, // Shared with Buffer MD }; -#define kIOMapperNone ((IOMapper *) -1) #define kIOMapperSystem ((IOMapper *) 0) enum @@ -107,6 +112,11 @@ enum #define IOMEMORYDESCRIPTOR_SUPPORTS_DMACOMMAND 1 +enum +{ + kIOPreparationIDUnprepared = 0, + kIOPreparationIDUnsupported = 1, +}; /*! @class IOMemoryDescriptor : public OSObject @abstract An abstract base class defining common methods for describing physical or virtual memory. @@ -114,8 +124,7 @@ enum class IOMemoryDescriptor : public OSObject { - friend class _IOMemoryMap; - friend class IOSubMemoryDescriptor; + friend class IOMemoryMap; OSDeclareDefaultStructors(IOMemoryDescriptor); @@ -139,15 +148,23 @@ class IOMemoryDescriptor : public OSObject IOOptionBits _flags; void * _memEntry; - IODirection _direction; /* DEPRECATED: use _flags instead. direction of transfer */ +#ifdef __LP64__ + uint64_t __iomd_reserved1; + uint64_t __iomd_reserved2; + uint64_t __iomd_reserved3; + uint64_t __iomd_reserved4; +#else /* !__LP64__ */ + IODirection _direction; /* use _flags instead */ +#endif /* !__LP64__ */ IOByteCount _length; /* length of all ranges */ IOOptionBits _tag; public: typedef IOOptionBits DMACommandOps; +#ifndef __LP64__ virtual IOPhysicalAddress getSourceSegment( IOByteCount offset, - IOByteCount * length ); - OSMetaClassDeclareReservedUsed(IOMemoryDescriptor, 0); + IOByteCount * length ) APPLE_KEXT_DEPRECATED; +#endif /* !__LP64__ */ /*! @function initWithOptions @abstract Master initialiser for all variants of memory descriptors. For a more complete description see IOMemoryDescriptor::withOptions. @@ -159,12 +176,11 @@ typedef IOOptionBits DMACommandOps; task_t task, IOOptionBits options, IOMapper * mapper = kIOMapperSystem); - OSMetaClassDeclareReservedUsed(IOMemoryDescriptor, 1); +#ifndef __LP64__ virtual addr64_t getPhysicalSegment64( IOByteCount offset, - IOByteCount * length ); - OSMetaClassDeclareReservedUsed(IOMemoryDescriptor, 2); - + IOByteCount * length ) APPLE_KEXT_DEPRECATED; /* use getPhysicalSegment() and kIOMemoryMapperNone instead */ +#endif /* !__LP64__ */ /*! @function setPurgeable @abstract Control the purgeable status of a memory descriptors memory. @@ -182,7 +198,6 @@ typedef IOOptionBits DMACommandOps; virtual IOReturn setPurgeable( IOOptionBits newState, IOOptionBits * oldState ); - OSMetaClassDeclareReservedUsed(IOMemoryDescriptor, 3); /*! @function performOperation @abstract Perform an operation on the memory descriptor's memory. @@ -196,15 +211,48 @@ typedef IOOptionBits DMACommandOps; virtual IOReturn performOperation( IOOptionBits options, IOByteCount offset, IOByteCount length ); - OSMetaClassDeclareReservedUsed(IOMemoryDescriptor, 4); // Used for dedicated communications for IODMACommand virtual IOReturn dmaCommandOperation(DMACommandOps op, void *vData, UInt dataSize) const; - OSMetaClassDeclareReservedUsed(IOMemoryDescriptor, 5); +/*! @function getPhysicalSegment + @abstract Break a memory descriptor into its physically contiguous segments. + @discussion This method returns the physical address of the byte at the given offset into the memory, and optionally the length of the physically contiguous segment from that offset. + @param offset A byte offset into the memory whose physical address to return. + @param length If non-zero, getPhysicalSegment will store here the length of the physically contiguous segement at the given offset. + @result A physical address, or zero if the offset is beyond the length of the memory. */ + +#ifdef __LP64__ + virtual addr64_t getPhysicalSegment( IOByteCount offset, + IOByteCount * length, + IOOptionBits options = 0 ) = 0; +#else /* !__LP64__ */ + virtual addr64_t getPhysicalSegment( IOByteCount offset, + IOByteCount * length, + IOOptionBits options ); +#endif /* !__LP64__ */ + + virtual uint64_t getPreparationID( void ); + private: + OSMetaClassDeclareReservedUsed(IOMemoryDescriptor, 0); +#ifdef __LP64__ + OSMetaClassDeclareReservedUnused(IOMemoryDescriptor, 1); + OSMetaClassDeclareReservedUnused(IOMemoryDescriptor, 2); + OSMetaClassDeclareReservedUnused(IOMemoryDescriptor, 3); + OSMetaClassDeclareReservedUnused(IOMemoryDescriptor, 4); + OSMetaClassDeclareReservedUnused(IOMemoryDescriptor, 5); OSMetaClassDeclareReservedUnused(IOMemoryDescriptor, 6); OSMetaClassDeclareReservedUnused(IOMemoryDescriptor, 7); +#else /* !__LP64__ */ + OSMetaClassDeclareReservedUsed(IOMemoryDescriptor, 1); + OSMetaClassDeclareReservedUsed(IOMemoryDescriptor, 2); + OSMetaClassDeclareReservedUsed(IOMemoryDescriptor, 3); + OSMetaClassDeclareReservedUsed(IOMemoryDescriptor, 4); + OSMetaClassDeclareReservedUsed(IOMemoryDescriptor, 5); + OSMetaClassDeclareReservedUsed(IOMemoryDescriptor, 6); + OSMetaClassDeclareReservedUsed(IOMemoryDescriptor, 7); +#endif /* !__LP64__ */ OSMetaClassDeclareReservedUnused(IOMemoryDescriptor, 8); OSMetaClassDeclareReservedUnused(IOMemoryDescriptor, 9); OSMetaClassDeclareReservedUnused(IOMemoryDescriptor, 10); @@ -222,7 +270,7 @@ typedef IOOptionBits DMACommandOps; public: /*! @function withAddress @abstract Create an IOMemoryDescriptor to describe one virtual range of the kernel task. - @discussion This method creates and initializes an IOMemoryDescriptor for memory consisting of a single virtual memory range mapped into the kernel map. + @discussion This method creates and initializes an IOMemoryDescriptor for memory consisting of a single virtual memory range mapped into the kernel map. This memory descriptor needs to be prepared before it can be used to extract data from the memory described. @param address The virtual address of the first byte in the memory. @param withLength The length of memory. @param withDirection An I/O direction to be associated with the descriptor, which may affect the operation of the prepare and complete methods on some architectures. @@ -232,19 +280,12 @@ typedef IOOptionBits DMACommandOps; IOByteCount withLength, IODirection withDirection); -/*! @function withAddress - @abstract Create an IOMemoryDescriptor to describe one virtual range of the specified map. - @discussion This method creates and initializes an IOMemoryDescriptor for memory consisting of a single virtual memory range mapped into the specified map. - @param address The virtual address of the first byte in the memory. - @param withLength The length of memory. - @param withDirection An I/O direction to be associated with the descriptor, which may affect the operation of the prepare and complete methods on some architectures. - @param withTask The task the virtual ranges are mapped into. - @result The created IOMemoryDescriptor on success, to be released by the caller, or zero on failure. */ - - static IOMemoryDescriptor * withAddress(vm_address_t address, +#ifndef __LP64__ + static IOMemoryDescriptor * withAddress(IOVirtualAddress address, IOByteCount withLength, IODirection withDirection, - task_t withTask); + task_t withTask) APPLE_KEXT_DEPRECATED; /* use withAddressRange() and prepare() instead */ +#endif /* !__LP64__ */ /*! @function withPhysicalAddress @abstract Create an IOMemoryDescriptor to describe one physical range. @@ -259,25 +300,17 @@ typedef IOOptionBits DMACommandOps; IOByteCount withLength, IODirection withDirection ); -/*! @function withRanges - @abstract Create an IOMemoryDescriptor to describe one or more virtual ranges. - @discussion This method creates and initializes an IOMemoryDescriptor for memory consisting of an array of virtual memory ranges each mapped into a specified source task. - @param ranges An array of IOVirtualRange structures which specify the virtual ranges in the specified map which make up the memory to be described. - @param withCount The member count of the ranges array. - @param withDirection An I/O direction to be associated with the descriptor, which may affect the operation of the prepare and complete methods on some architectures. - @param withTask The task each of the virtual ranges are mapped into. - @param asReference If false, the IOMemoryDescriptor object will make a copy of the ranges array, otherwise, the array will be used in situ, avoiding an extra allocation. - @result The created IOMemoryDescriptor on success, to be released by the caller, or zero on failure. */ - +#ifndef __LP64__ static IOMemoryDescriptor * withRanges(IOVirtualRange * ranges, UInt32 withCount, IODirection withDirection, task_t withTask, - bool asReference = false); + bool asReference = false) APPLE_KEXT_DEPRECATED; /* use withAddressRanges() instead */ +#endif /* !__LP64__ */ /*! @function withAddressRange @abstract Create an IOMemoryDescriptor to describe one virtual range of the specified map. - @discussion This method creates and initializes an IOMemoryDescriptor for memory consisting of a single virtual memory range mapped into the specified map. Note that unlike IOMemoryDescriptor::withAddress(), kernel_task memory must be explicitly prepared when passed to this api. + @discussion This method creates and initializes an IOMemoryDescriptor for memory consisting of a single virtual memory range mapped into the specified map. This memory descriptor needs to be prepared before it can be used to extract data from the memory described. @param address The virtual address of the first byte in the memory. @param withLength The length of memory. @param options @@ -293,7 +326,7 @@ typedef IOOptionBits DMACommandOps; /*! @function withAddressRanges @abstract Create an IOMemoryDescriptor to describe one or more virtual ranges. - @discussion This method creates and initializes an IOMemoryDescriptor for memory consisting of an array of virtual memory ranges each mapped into a specified source task. Note that unlike IOMemoryDescriptor::withAddress(), kernel_task memory must be explicitly prepared when passed to this api. + @discussion This method creates and initializes an IOMemoryDescriptor for memory consisting of an array of virtual memory ranges each mapped into a specified source task. This memory descriptor needs to be prepared before it can be used to extract data from the memory described. @param ranges An array of IOAddressRange structures which specify the virtual ranges in the specified map which make up the memory to be described. IOAddressRange is the 64bit version of IOVirtualRange. @param rangeCount The member count of the ranges array. @param options @@ -306,14 +339,14 @@ typedef IOOptionBits DMACommandOps; IOAddressRange * ranges, UInt32 rangeCount, IOOptionBits options, - task_t withTask); + task_t task); /*! @function withOptions @abstract Master initialiser for all variants of memory descriptors. - @discussion This method creates and initializes an IOMemoryDescriptor for memory it has three main variants: Virtual, Physical & mach UPL. These variants are selected with the options parameter, see below. This memory descriptor needs to be prepared before it can be used to extract data from the memory described. However we temporarily have setup a mechanism that automatically prepares kernel_task memory descriptors at creation time. + @discussion This method creates and initializes an IOMemoryDescriptor for memory it has three main variants: Virtual, Physical & mach UPL. These variants are selected with the options parameter, see below. This memory descriptor needs to be prepared before it can be used to extract data from the memory described. - @param buffers A pointer to an array of IOVirtualRanges or IOPhysicalRanges if the options:type is Virtual or Physical. For type UPL it is a upl_t returned by the mach/memory_object_types.h apis, primarily used internally by the UBC. + @param buffers A pointer to an array of IOAddressRange when options:type is kIOMemoryTypeVirtual64 or kIOMemoryTypePhysical64 or a 64bit kernel. For type UPL it is a upl_t returned by the mach/memory_object_types.h apis, primarily used internally by the UBC. IOVirtualRanges or IOPhysicalRanges are 32 bit only types for use when options:type is kIOMemoryTypeVirtual or kIOMemoryTypePhysical on 32bit kernels. @param count options:type = Virtual or Physical count contains a count of the number of entires in the buffers array. For options:type = UPL this field contains a total length. @@ -323,10 +356,9 @@ typedef IOOptionBits DMACommandOps; @param options kIOMemoryDirectionMask (options:direction) This nibble indicates the I/O direction to be associated with the descriptor, which may affect the operation of the prepare and complete methods on some architectures. - kIOMemoryTypeMask (options:type) kIOMemoryTypeVirtual, kIOMemoryTypePhysical, kIOMemoryTypeUPL Indicates that what type of memory basic memory descriptor to use. This sub-field also controls the interpretation of the buffers, count, offset & task parameters. + kIOMemoryTypeMask (options:type) kIOMemoryTypeVirtual64, kIOMemoryTypeVirtual, kIOMemoryTypePhysical64, kIOMemoryTypePhysical, kIOMemoryTypeUPL Indicates that what type of memory basic memory descriptor to use. This sub-field also controls the interpretation of the buffers, count, offset & task parameters. kIOMemoryAsReference For options:type = Virtual or Physical this indicate that the memory descriptor need not copy the ranges array into local memory. This is an optimisation to try to minimise unnecessary allocations. kIOMemoryBufferPageable Only used by the IOBufferMemoryDescriptor as an indication that the kernel virtual memory is in fact pageable and we need to use the kernel pageable submap rather than the default map. - kIOMemoryNoAutoPrepare Indicates that the temporary AutoPrepare of kernel_task memory should not be performed. @param mapper Which IOMapper should be used to map the in-memory physical addresses into I/O space addresses. Defaults to 0 which indicates that the system mapper is to be used, if present. @@ -339,34 +371,20 @@ typedef IOOptionBits DMACommandOps; IOOptionBits options, IOMapper * mapper = kIOMapperSystem); -/*! @function withPhysicalRanges - @abstract Create an IOMemoryDescriptor to describe one or more physical ranges. - @discussion This method creates and initializes an IOMemoryDescriptor for memory consisting of an array of physical memory ranges. - @param ranges An array of IOPhysicalRange structures which specify the physical ranges which make up the memory to be described. - @param withCount The member count of the ranges array. - @param withDirection An I/O direction to be associated with the descriptor, which may affect the operation of the prepare and complete methods on some architectures. - @param asReference If false, the IOMemoryDescriptor object will make a copy of the ranges array, otherwise, the array will be used in situ, avoiding an extra allocation. - @result The created IOMemoryDescriptor on success, to be released by the caller, or zero on failure. */ - +#ifndef __LP64__ static IOMemoryDescriptor * withPhysicalRanges( IOPhysicalRange * ranges, UInt32 withCount, IODirection withDirection, - bool asReference = false); - -/*! @function withSubRange - @abstract Create an IOMemoryDescriptor to describe a subrange of an existing descriptor. - @discussion This method creates and initializes an IOMemoryDescriptor for memory consisting of a subrange of the specified memory descriptor. The parent memory descriptor is retained by the new descriptor. - @param of The parent IOMemoryDescriptor of which a subrange is to be used for the new descriptor, which will be retained by the subrange IOMemoryDescriptor. - @param offset A byte offset into the parent memory descriptor's memory. - @param length The length of the subrange. - @param withDirection An I/O direction to be associated with the descriptor, which may affect the operation of the prepare and complete methods on some architectures. This is used over the direction of the parent descriptor. - @result The created IOMemoryDescriptor on success, to be released by the caller, or zero on failure. */ + bool asReference = false) APPLE_KEXT_DEPRECATED; /* use withOptions() and kIOMemoryTypePhysical instead */ +#endif /* !__LP64__ */ +#ifndef __LP64__ static IOMemoryDescriptor * withSubRange(IOMemoryDescriptor *of, IOByteCount offset, IOByteCount length, - IODirection withDirection); + IODirection withDirection) APPLE_KEXT_DEPRECATED; /* use IOSubMemoryDescriptor::withSubRange() and kIOMemoryThreadSafe instead */ +#endif /* !__LP64__ */ /*! @function withPersistentMemoryDescriptor @abstract Copy constructor that generates a new memory descriptor if the backing memory for the same task's virtual address and length has changed. @@ -376,74 +394,30 @@ typedef IOOptionBits DMACommandOps; static IOMemoryDescriptor * withPersistentMemoryDescriptor(IOMemoryDescriptor *originalMD); -/*! @function initWithAddress - @abstract Initialize or reinitialize an IOMemoryDescriptor to describe one virtual range of the kernel task. - @discussion This method initializes an IOMemoryDescriptor for memory consisting of a single virtual memory range mapped into the kernel map. An IOMemoryDescriptor can be re-used by calling initWithAddress or initWithRanges again on an existing instance -- note this behavior is not commonly supported in other IOKit classes, although it is supported here. - @param address The virtual address of the first byte in the memory. - @param withLength The length of memory. - @param withDirection An I/O direction to be associated with the descriptor, which may affect the operation of the prepare and complete methods on some architectures. - @result true on success, false on failure. */ - +#ifndef __LP64__ + // obsolete initializers + // - initWithOptions is the designated initializer virtual bool initWithAddress(void * address, IOByteCount withLength, - IODirection withDirection) = 0; - -/*! @function initWithAddress - @abstract Initialize or reinitialize an IOMemoryDescriptor to describe one virtual range of the specified map. - @discussion This method initializes an IOMemoryDescriptor for memory consisting of a single virtual memory range mapped into the specified map. An IOMemoryDescriptor can be re-used by calling initWithAddress or initWithRanges again on an existing instance -- note this behavior is not commonly supported in other IOKit classes, although it is supported here. - @param address The virtual address of the first byte in the memory. - @param withLength The length of memory. - @param withDirection An I/O direction to be associated with the descriptor, which may affect the operation of the prepare and complete methods on some architectures. - @param withTask The task the virtual ranges are mapped into. - @result true on success, false on failure. */ - - virtual bool initWithAddress(vm_address_t address, + IODirection withDirection) APPLE_KEXT_DEPRECATED; /* use initWithOptions() instead */ + virtual bool initWithAddress(IOVirtualAddress address, IOByteCount withLength, IODirection withDirection, - task_t withTask) = 0; - -/*! @function initWithPhysicalAddress - @abstract Initialize or reinitialize an IOMemoryDescriptor to describe one physical range. - @discussion This method initializes an IOMemoryDescriptor for memory consisting of a single physical memory range. An IOMemoryDescriptor can be re-used by calling initWithAddress or initWithRanges again on an existing instance -- note this behavior is not commonly supported in other IOKit classes, although it is supported here. - @param address The physical address of the first byte in the memory. - @param withLength The length of memory. - @param withDirection An I/O direction to be associated with the descriptor, which may affect the operation of the prepare and complete methods on some architectures. - @result true on success, false on failure. */ - + task_t withTask) APPLE_KEXT_DEPRECATED; /* use initWithOptions() instead */ virtual bool initWithPhysicalAddress( IOPhysicalAddress address, IOByteCount withLength, - IODirection withDirection ) = 0; - -/*! @function initWithRanges - @abstract Initialize or reinitialize an IOMemoryDescriptor to describe one or more virtual ranges. - @discussion This method initializes an IOMemoryDescriptor for memory consisting of an array of virtual memory ranges each mapped into a specified source task. An IOMemoryDescriptor can be re-used by calling initWithAddress or initWithRanges again on an existing instance -- note this behavior is not commonly supported in other IOKit classes, although it is supported here. - @param ranges An array of IOVirtualRange structures which specify the virtual ranges in the specified map which make up the memory to be described. - @param withCount The member count of the ranges array. - @param withDirection An I/O direction to be associated with the descriptor, which may affect the operation of the prepare and complete methods on some architectures. - @param withTask The task each of the virtual ranges are mapped into. - @param asReference If false, the IOMemoryDescriptor object will make a copy of the ranges array, otherwise, the array will be used in situ, avoiding an extra allocation. - @result true on success, false on failure. */ - + IODirection withDirection ) APPLE_KEXT_DEPRECATED; /* use initWithOptions() instead */ virtual bool initWithRanges(IOVirtualRange * ranges, UInt32 withCount, IODirection withDirection, task_t withTask, - bool asReference = false) = 0; - -/*! @function initWithPhysicalRanges - @abstract Initialize or reinitialize an IOMemoryDescriptor to describe one or more physical ranges. - @discussion This method initializes an IOMemoryDescriptor for memory consisting of an array of physical memory ranges. An IOMemoryDescriptor can be re-used by calling initWithAddress or initWithRanges again on an existing instance -- note this behavior is not commonly supported in other IOKit classes, although it is supported here. - @param ranges An array of IOPhysicalRange structures which specify the physical ranges which make up the memory to be described. - @param withCount The member count of the ranges array. - @param withDirection An I/O direction to be associated with the descriptor, which may affect the operation of the prepare and complete methods on some architectures. - @param asReference If false, the IOMemoryDescriptor object will make a copy of the ranges array, otherwise, the array will be used in situ, avoiding an extra allocation. - @result true on success, false on failure. */ - + bool asReference = false) APPLE_KEXT_DEPRECATED; /* use initWithOptions() instead */ virtual bool initWithPhysicalRanges(IOPhysicalRange * ranges, UInt32 withCount, IODirection withDirection, - bool asReference = false) = 0; + bool asReference = false) APPLE_KEXT_DEPRECATED; /* use initWithOptions() instead */ +#endif /* __LP64__ */ /*! @function getDirection @abstract Accessor to get the direction the memory descriptor was created with. @@ -479,7 +453,7 @@ typedef IOOptionBits DMACommandOps; @param offset A byte offset into the memory descriptor's memory. @param bytes The caller supplied buffer to copy the data to. @param withLength The length of the data to copy. - @result The number of bytes copied, zero will be returned if the specified offset is beyond the length of the descriptor. */ + @result The number of bytes copied, zero will be returned if the specified offset is beyond the length of the descriptor. Development/debug kernel builds will assert if the offset is beyond the length of the descriptor. */ virtual IOByteCount readBytes(IOByteCount offset, void * bytes, IOByteCount withLength); @@ -490,33 +464,27 @@ typedef IOOptionBits DMACommandOps; @param offset A byte offset into the memory descriptor's memory. @param bytes The caller supplied buffer to copy the data from. @param withLength The length of the data to copy. - @result The number of bytes copied, zero will be returned if the specified offset is beyond the length of the descriptor. */ + @result The number of bytes copied, zero will be returned if the specified offset is beyond the length of the descriptor. Development/debug kernel builds will assert if the offset is beyond the length of the descriptor. */ virtual IOByteCount writeBytes(IOByteCount offset, const void * bytes, IOByteCount withLength); -/*! @function getPhysicalSegment - @abstract Break a memory descriptor into its physically contiguous segments. - @discussion This method returns the physical address of the byte at the given offset into the memory, and optionally the length of the physically contiguous segment from that offset. - @param offset A byte offset into the memory whose physical address to return. - @param length If non-zero, getPhysicalSegment will store here the length of the physically contiguous segement at the given offset. - @result A physical address, or zero if the offset is beyond the length of the memory. */ - +#ifndef __LP64__ virtual IOPhysicalAddress getPhysicalSegment(IOByteCount offset, - IOByteCount * length) = 0; + IOByteCount * length); +#endif /* !__LP64__ */ /*! @function getPhysicalAddress @abstract Return the physical address of the first byte in the memory. @discussion This method returns the physical address of the first byte in the memory. It is most useful on memory known to be physically contiguous. @result A physical address. */ - /* inline */ IOPhysicalAddress getPhysicalAddress(); - /* { return( getPhysicalSegment( 0, 0 )); } */ + IOPhysicalAddress getPhysicalAddress(); - /* DEPRECATED */ /* USE INSTEAD: map(), readBytes(), writeBytes() */ - /* DEPRECATED */ virtual void * getVirtualSegment(IOByteCount offset, - /* DEPRECATED */ IOByteCount * length) = 0; - /* DEPRECATED */ /* USE INSTEAD: map(), readBytes(), writeBytes() */ +#ifndef __LP64__ + virtual void * getVirtualSegment(IOByteCount offset, + IOByteCount * length) APPLE_KEXT_DEPRECATED; /* use map() and getVirtualAddress() instead */ +#endif /* !__LP64__ */ /*! @function prepare @abstract Prepare the memory for an I/O transfer. @@ -561,29 +529,14 @@ typedef IOOptionBits DMACommandOps; mach_vm_size_t offset = 0, mach_vm_size_t length = 0 ); -/*! @function map - @abstract Maps a IOMemoryDescriptor into a task - deprecated, only safe for 32 bit tasks. Use createMappingInTask instead. - @discussion This is the general purpose method to map all or part of the memory described by a memory descriptor into a task at any available address, or at a fixed address if possible. Caching & read-only options may be set for the mapping. The mapping is represented as a returned reference to a IOMemoryMap object, which may be shared if the mapping is compatible with an existing mapping of the IOMemoryDescriptor. The IOMemoryMap object returned should be released only when the caller has finished accessing the mapping, as freeing the object destroys the mapping. - @param intoTask Sets the target task for the mapping. Pass kernel_task for the kernel address space. - @param atAddress If a placed mapping is requested, atAddress specifies its address, and the kIOMapAnywhere should not be set. Otherwise, atAddress is ignored. - @param options Mapping options are defined in IOTypes.h,
- kIOMapAnywhere should be passed if the mapping can be created anywhere. If not set, the atAddress parameter sets the location of the mapping, if it is available in the target map.
- kIOMapDefaultCache to inhibit the cache in I/O areas, kIOMapCopybackCache in general purpose RAM.
- kIOMapInhibitCache, kIOMapWriteThruCache, kIOMapCopybackCache to set the appropriate caching.
- kIOMapReadOnly to allow only read only accesses to the memory - writes will cause and access fault.
- kIOMapReference will only succeed if the mapping already exists, and the IOMemoryMap object is just an extra reference, ie. no new mapping will be created.
- kIOMapUnique allows a special kind of mapping to be created that may be used with the IOMemoryMap::redirect() API. These mappings will not be shared as is the default - there will always be a unique mapping created for the caller, not an existing mapping with an extra reference.
- @param offset Is a beginning offset into the IOMemoryDescriptor's memory where the mapping starts. Zero is the default to map all the memory. - @param length Is the length of the mapping requested for a subset of the IOMemoryDescriptor. Zero is the default to map all the memory. - @result A reference to an IOMemoryMap object representing the mapping, which can supply the virtual address of the mapping and other information. The mapping may be shared with multiple callers - multiple maps are avoided if a compatible one exists. The IOMemoryMap object returned should be released only when the caller has finished accessing the mapping, as freeing the object destroys the mapping. The IOMemoryMap instance also retains the IOMemoryDescriptor it maps while it exists. */ - +#ifndef __LP64__ virtual IOMemoryMap * map( task_t intoTask, IOVirtualAddress atAddress, IOOptionBits options, IOByteCount offset = 0, - IOByteCount length = 0 ); - + IOByteCount length = 0 ) APPLE_KEXT_DEPRECATED; /* use createMappingInTask() instead */ +#endif /* !__LP64__ */ /*! @function map @abstract Maps a IOMemoryDescriptor into the kernel map. @@ -609,7 +562,9 @@ typedef IOOptionBits DMACommandOps; // Following methods are private implementation - // make virtual +#ifdef __LP64__ + virtual +#endif /* __LP64__ */ IOReturn redirect( task_t safeTask, bool redirect ); IOReturn handleFault( @@ -620,7 +575,6 @@ typedef IOOptionBits DMACommandOps; mach_vm_size_t length, IOOptionBits options ); -protected: virtual IOMemoryMap * makeMapping( IOMemoryDescriptor * owner, task_t intoTask, @@ -629,6 +583,7 @@ typedef IOOptionBits DMACommandOps; IOByteCount offset, IOByteCount length ); +protected: virtual void addMapping( IOMemoryMap * mapping ); @@ -651,12 +606,31 @@ typedef IOOptionBits DMACommandOps; /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ /*! @class IOMemoryMap : public OSObject - @abstract An abstract base class defining common methods for describing a memory mapping. + @abstract A class defining common methods for describing a memory mapping. @discussion The IOMemoryMap object represents a mapped range of memory, described by a IOMemoryDescriptor. The mapping may be in the kernel or a non-kernel task and has processor cache mode attributes. IOMemoryMap instances are created by IOMemoryDescriptor when it creates mappings in its map method, and returned to the caller. */ class IOMemoryMap : public OSObject { - OSDeclareAbstractStructors(IOMemoryMap) + OSDeclareDefaultStructors(IOMemoryMap) +#ifdef XNU_KERNEL_PRIVATE +public: + IOMemoryDescriptor * fMemory; + IOMemoryMap * fSuperMap; + mach_vm_size_t fOffset; + mach_vm_address_t fAddress; + mach_vm_size_t fLength; + task_t fAddressTask; + vm_map_t fAddressMap; + IOOptionBits fOptions; + upl_t fRedirUPL; + ipc_port_t fRedirEntry; + IOMemoryDescriptor * fOwner; + uint8_t fUserClientUnmap; +#endif /* XNU_KERNEL_PRIVATE */ + +protected: + virtual void taggedRelease(const void *tag = 0) const; + virtual void free(); public: /*! @function getVirtualAddress @@ -664,7 +638,7 @@ class IOMemoryMap : public OSObject @discussion This method returns the virtual address of the first byte in the mapping. @result A virtual address. */ - virtual IOVirtualAddress getVirtualAddress() = 0; + virtual IOVirtualAddress getVirtualAddress(); /*! @function getPhysicalSegment @abstract Break a mapping into its physically contiguous segments. @@ -673,53 +647,58 @@ class IOMemoryMap : public OSObject @param length If non-zero, getPhysicalSegment will store here the length of the physically contiguous segement at the given offset. @result A physical address, or zero if the offset is beyond the length of the mapping. */ +#ifdef __LP64__ virtual IOPhysicalAddress getPhysicalSegment(IOByteCount offset, - IOByteCount * length) = 0; + IOByteCount * length, + IOOptionBits options = 0); +#else /* !__LP64__ */ + virtual IOPhysicalAddress getPhysicalSegment(IOByteCount offset, + IOByteCount * length); +#endif /* !__LP64__ */ /*! @function getPhysicalAddress @abstract Return the physical address of the first byte in the mapping. @discussion This method returns the physical address of the first byte in the mapping. It is most useful on mappings known to be physically contiguous. @result A physical address. */ - /* inline */ IOPhysicalAddress getPhysicalAddress(); - /* { return( getPhysicalSegment( 0, 0 )); } */ + IOPhysicalAddress getPhysicalAddress(); /*! @function getLength @abstract Accessor to the length of the mapping. @discussion This method returns the length of the mapping. @result A byte count. */ - virtual IOByteCount getLength() = 0; + virtual IOByteCount getLength(); /*! @function getAddressTask @abstract Accessor to the task of the mapping. @discussion This method returns the mach task the mapping exists in. @result A mach task_t. */ - virtual task_t getAddressTask() = 0; + virtual task_t getAddressTask(); /*! @function getMemoryDescriptor @abstract Accessor to the IOMemoryDescriptor the mapping was created from. @discussion This method returns the IOMemoryDescriptor the mapping was created from. @result An IOMemoryDescriptor reference, which is valid while the IOMemoryMap object is retained. It should not be released by the caller. */ - virtual IOMemoryDescriptor * getMemoryDescriptor() = 0; + virtual IOMemoryDescriptor * getMemoryDescriptor(); /*! @function getMapOptions @abstract Accessor to the options the mapping was created with. @discussion This method returns the options to IOMemoryDescriptor::map the mapping was created with. @result Options for the mapping, including cache settings. */ - virtual IOOptionBits getMapOptions() = 0; + virtual IOOptionBits getMapOptions(); /*! @function unmap @abstract Force the IOMemoryMap to unmap, without destroying the object. @discussion IOMemoryMap instances will unmap themselves upon free, ie. when the last client with a reference calls release. This method forces the IOMemoryMap to destroy the mapping it represents, regardless of the number of clients. It is not generally used. @result An IOReturn code. */ - virtual IOReturn unmap() = 0; + virtual IOReturn unmap(); - virtual void taskDied() = 0; + virtual void taskDied(); /*! @function redirect @abstract Replace the memory mapped in a process with new backing memory. @@ -729,24 +708,69 @@ class IOMemoryMap : public OSObject @param offset As with IOMemoryDescriptor::map(), a beginning offset into the IOMemoryDescriptor's memory where the mapping starts. Zero is the default. @result An IOReturn code. */ +#ifndef __LP64__ +// For 32 bit XNU, there is a 32 bit (IOByteCount) and a 64 bit (mach_vm_size_t) interface; +// for 64 bit, these fall together on the 64 bit one. virtual IOReturn redirect(IOMemoryDescriptor * newBackingMemory, IOOptionBits options, - IOByteCount offset = 0) = 0; - + IOByteCount offset = 0); +#endif virtual IOReturn redirect(IOMemoryDescriptor * newBackingMemory, IOOptionBits options, - mach_vm_size_t offset = 0) = 0; + mach_vm_size_t offset = 0); + +#ifdef __LP64__ + inline mach_vm_address_t getAddress() __attribute__((always_inline)); + inline mach_vm_size_t getSize() __attribute__((always_inline)); +#else /* !__LP64__ */ + virtual mach_vm_address_t getAddress(); + virtual mach_vm_size_t getSize(); +#endif /* !__LP64__ */ + +#ifdef XNU_KERNEL_PRIVATE + // for IOMemoryDescriptor use + IOMemoryMap * copyCompatible( IOMemoryMap * newMapping ); + + bool init( + task_t intoTask, + mach_vm_address_t toAddress, + IOOptionBits options, + mach_vm_size_t offset, + mach_vm_size_t length ); + + bool setMemoryDescriptor(IOMemoryDescriptor * _memory, mach_vm_size_t _offset); + + IOReturn redirect( + task_t intoTask, bool redirect ); - virtual mach_vm_address_t getAddress() = 0; - virtual mach_vm_size_t getSize() = 0; + IOReturn userClientUnmap(); +#endif /* XNU_KERNEL_PRIVATE */ + + OSMetaClassDeclareReservedUnused(IOMemoryMap, 0); + OSMetaClassDeclareReservedUnused(IOMemoryMap, 1); + OSMetaClassDeclareReservedUnused(IOMemoryMap, 2); + OSMetaClassDeclareReservedUnused(IOMemoryMap, 3); + OSMetaClassDeclareReservedUnused(IOMemoryMap, 4); + OSMetaClassDeclareReservedUnused(IOMemoryMap, 5); + OSMetaClassDeclareReservedUnused(IOMemoryMap, 6); + OSMetaClassDeclareReservedUnused(IOMemoryMap, 7); }; /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ +#ifdef XNU_KERNEL_PRIVATE +// Also these flags should not overlap with the options to +// IOMemoryDescriptor::initWithRanges(... IOOptionsBits options); +enum { + _kIOMemorySourceSegment = 0x00002000 +}; +#endif /* XNU_KERNEL_PRIVATE */ + +#if !defined(__LP64) || defined(_IOMEMORYDESCRIPTOR_INTERNAL_) // The following classes are private implementation of IOMemoryDescriptor - they // should not be referenced directly, just through the public API's in the // IOMemoryDescriptor class. For example, an IOGeneralMemoryDescriptor instance -// might be created by IOMemoryDescriptor::withAddress(), but there should be +// might be created by IOMemoryDescriptor::withAddressRange(), but there should be // no need to reference as anything but a generic IOMemoryDescriptor *. class IOGeneralMemoryDescriptor : public IOMemoryDescriptor @@ -763,7 +787,9 @@ class IOGeneralMemoryDescriptor : public IOMemoryDescriptor protected: Ranges _ranges; unsigned _rangesCount; /* number of address ranges in list */ +#ifndef __LP64__ bool _rangesIsAllocated; /* is list allocated by us? */ +#endif /* !__LP64__ */ task_t _task; /* task where all ranges are mapped to */ @@ -774,21 +800,28 @@ class IOGeneralMemoryDescriptor : public IOMemoryDescriptor unsigned _wireCount; /* number of outstanding wires */ - /* DEPRECATED */ vm_address_t _cachedVirtualAddress; /* a cached virtual-to-physical */ +#ifndef __LP64__ + uintptr_t _cachedVirtualAddress; - /* DEPRECATED */ IOPhysicalAddress _cachedPhysicalAddress; + IOPhysicalAddress _cachedPhysicalAddress; +#endif /* !__LP64__ */ bool _initialized; /* has superclass been initialized? */ +public: virtual void free(); virtual IOReturn dmaCommandOperation(DMACommandOps op, void *vData, UInt dataSize) const; + virtual uint64_t getPreparationID( void ); + private: - /* DEPRECATED */ virtual void setPosition(IOByteCount position); - /* DEPRECATED */ virtual void mapIntoKernel(unsigned rangeIndex); - /* DEPRECATED */ virtual void unmapFromKernel(); +#ifndef __LP64__ + virtual void setPosition(IOByteCount position); + virtual void mapIntoKernel(unsigned rangeIndex); + virtual void unmapFromKernel(); +#endif /* !__LP64__ */ // Internal APIs may be made virtual at some time in the future. IOReturn wireVirtual(IODirection forDirection); @@ -816,43 +849,56 @@ class IOGeneralMemoryDescriptor : public IOMemoryDescriptor IOOptionBits options, IOMapper * mapper = kIOMapperSystem); +#ifndef __LP64__ // Secondary initialisers virtual bool initWithAddress(void * address, IOByteCount withLength, - IODirection withDirection); + IODirection withDirection) APPLE_KEXT_DEPRECATED; - virtual bool initWithAddress(vm_address_t address, + virtual bool initWithAddress(IOVirtualAddress address, IOByteCount withLength, IODirection withDirection, - task_t withTask); + task_t withTask) APPLE_KEXT_DEPRECATED; virtual bool initWithPhysicalAddress( IOPhysicalAddress address, IOByteCount withLength, - IODirection withDirection ); + IODirection withDirection ) APPLE_KEXT_DEPRECATED; virtual bool initWithRanges( IOVirtualRange * ranges, UInt32 withCount, IODirection withDirection, task_t withTask, - bool asReference = false); + bool asReference = false) APPLE_KEXT_DEPRECATED; virtual bool initWithPhysicalRanges(IOPhysicalRange * ranges, UInt32 withCount, IODirection withDirection, - bool asReference = false); + bool asReference = false) APPLE_KEXT_DEPRECATED; virtual addr64_t getPhysicalSegment64( IOByteCount offset, - IOByteCount * length ); + IOByteCount * length ) APPLE_KEXT_DEPRECATED; virtual IOPhysicalAddress getPhysicalSegment(IOByteCount offset, IOByteCount * length); virtual IOPhysicalAddress getSourceSegment(IOByteCount offset, - IOByteCount * length); + IOByteCount * length) APPLE_KEXT_DEPRECATED; - /* DEPRECATED */ virtual void * getVirtualSegment(IOByteCount offset, - /* DEPRECATED */ IOByteCount * length); + virtual void * getVirtualSegment(IOByteCount offset, + IOByteCount * length) APPLE_KEXT_DEPRECATED; +#endif /* !__LP64__ */ + + virtual IOReturn setPurgeable( IOOptionBits newState, + IOOptionBits * oldState ); + + virtual addr64_t getPhysicalSegment( IOByteCount offset, + IOByteCount * length, +#ifdef __LP64__ + IOOptionBits options = 0 ); +#else /* !__LP64__ */ + IOOptionBits options ); +#endif /* !__LP64__ */ virtual IOReturn prepare(IODirection forDirection = kIODirectionNone); @@ -878,119 +924,23 @@ class IOGeneralMemoryDescriptor : public IOMemoryDescriptor }; +#endif /* !defined(__LP64) || defined(_IOMEMORYDESCRIPTOR_INTERNAL_) */ + /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ -class IOSubMemoryDescriptor : public IOMemoryDescriptor +#ifdef __LP64__ +mach_vm_address_t IOMemoryMap::getAddress() { - friend class IOMemoryDescriptor; - - OSDeclareDefaultStructors(IOSubMemoryDescriptor); - -protected: - IOMemoryDescriptor * _parent; - IOByteCount _start; - - virtual void free(); - - virtual bool initWithAddress(void * address, - IOByteCount withLength, - IODirection withDirection); - - virtual bool initWithAddress(vm_address_t address, - IOByteCount withLength, - IODirection withDirection, - task_t withTask); - - virtual bool initWithPhysicalAddress( - IOPhysicalAddress address, - IOByteCount withLength, - IODirection withDirection ); - - virtual bool initWithRanges( IOVirtualRange * ranges, - UInt32 withCount, - IODirection withDirection, - task_t withTask, - bool asReference = false); - - virtual bool initWithPhysicalRanges(IOPhysicalRange * ranges, - UInt32 withCount, - IODirection withDirection, - bool asReference = false); - - IOMemoryDescriptor::withAddress; - IOMemoryDescriptor::withPhysicalAddress; - IOMemoryDescriptor::withPhysicalRanges; - IOMemoryDescriptor::withRanges; - IOMemoryDescriptor::withSubRange; + return (getVirtualAddress()); +} - // used by IODMACommand - virtual IOReturn dmaCommandOperation(DMACommandOps op, void *vData, UInt dataSize) const; - -public: - /* - * Initialize or reinitialize an IOSubMemoryDescriptor to describe - * a subrange of an existing descriptor. - * - * An IOSubMemoryDescriptor can be re-used by calling initSubRange - * again on an existing instance -- note that this behavior is not - * commonly supported in other IOKit classes, although it is here. - */ - virtual bool initSubRange( IOMemoryDescriptor * parent, - IOByteCount offset, IOByteCount length, - IODirection withDirection ); - - /* - * IOMemoryDescriptor required methods - */ - - virtual addr64_t getPhysicalSegment64( IOByteCount offset, - IOByteCount * length ); - - virtual IOPhysicalAddress getPhysicalSegment(IOByteCount offset, - IOByteCount * length); - - virtual IOPhysicalAddress getSourceSegment(IOByteCount offset, - IOByteCount * length); - - virtual IOByteCount readBytes(IOByteCount offset, - void * bytes, IOByteCount withLength); - - virtual IOByteCount writeBytes(IOByteCount offset, - const void * bytes, IOByteCount withLength); - - virtual void * getVirtualSegment(IOByteCount offset, - IOByteCount * length); - - virtual IOReturn prepare(IODirection forDirection = kIODirectionNone); - - virtual IOReturn complete(IODirection forDirection = kIODirectionNone); - - // make virtual - IOReturn redirect( task_t safeTask, bool redirect ); - - virtual bool serialize(OSSerialize *s) const; - - virtual IOReturn setPurgeable( IOOptionBits newState, - IOOptionBits * oldState ); - virtual IOReturn performOperation( IOOptionBits options, - IOByteCount offset, IOByteCount length ); - -protected: - virtual IOMemoryMap * makeMapping( - IOMemoryDescriptor * owner, - task_t intoTask, - IOVirtualAddress atAddress, - IOOptionBits options, - IOByteCount offset, - IOByteCount length ); - - virtual IOReturn doMap( - vm_map_t addressMap, - IOVirtualAddress * atAddress, - IOOptionBits options, - IOByteCount sourceOffset = 0, - IOByteCount length = 0 ); -}; +mach_vm_size_t IOMemoryMap::getSize() +{ + return (getLength()); +} +#else /* !__LP64__ */ +#include +#endif /* !__LP64__ */ /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ diff --git a/iokit/IOKit/IOMultiMemoryDescriptor.h b/iokit/IOKit/IOMultiMemoryDescriptor.h index c4efe6086..42b19a468 100644 --- a/iokit/IOKit/IOMultiMemoryDescriptor.h +++ b/iokit/IOKit/IOMultiMemoryDescriptor.h @@ -47,45 +47,6 @@ class IOMultiMemoryDescriptor : public IOMemoryDescriptor virtual void free(); - /* - * These methods are not supported under this subclass. - */ - - virtual bool initWithAddress( void * address, /* not supported */ - IOByteCount withLength, - IODirection withDirection ); - - virtual bool initWithAddress( vm_address_t address, /* not supported */ - IOByteCount withLength, - IODirection withDirection, - task_t withTask ); - - virtual bool initWithPhysicalAddress( - IOPhysicalAddress address, /* not supported */ - IOByteCount withLength, - IODirection withDirection ); - - virtual bool initWithPhysicalRanges( - IOPhysicalRange * ranges, /* not supported */ - UInt32 withCount, - IODirection withDirection, - bool asReference = false ); - - virtual bool initWithRanges( IOVirtualRange * ranges, /* not supported */ - UInt32 withCount, - IODirection withDirection, - task_t withTask, - bool asReference = false ); - - virtual void * getVirtualSegment( IOByteCount offset, /* not supported */ - IOByteCount * length ); - - IOMemoryDescriptor::withAddress; /* not supported */ - IOMemoryDescriptor::withPhysicalAddress; /* not supported */ - IOMemoryDescriptor::withPhysicalRanges; /* not supported */ - IOMemoryDescriptor::withRanges; /* not supported */ - IOMemoryDescriptor::withSubRange; /* not supported */ - public: /*! @function withDescriptors @@ -118,16 +79,16 @@ class IOMultiMemoryDescriptor : public IOMemoryDescriptor IODirection withDirection, bool asReference = false ); -/*! @function getPhysicalAddress - @abstract Return the physical address of the first byte in the memory. - @discussion This method returns the physical address of the first byte in the memory. It is most useful on memory known to be physically contiguous. - @result A physical address. */ - - virtual IOPhysicalAddress getPhysicalSegment( IOByteCount offset, - IOByteCount * length ); +/*! @function getPhysicalSegment + @abstract Break a memory descriptor into its physically contiguous segments. + @discussion This method returns the physical address of the byte at the given offset into the memory, and optionally the length of the physically contiguous segment from that offset. + @param offset A byte offset into the memory whose physical address to return. + @param length If non-zero, getPhysicalSegment will store here the length of the physically contiguous segement at the given offset. + @result A physical address, or zero if the offset is beyond the length of the memory. */ - virtual addr64_t getPhysicalSegment64( IOByteCount offset, - IOByteCount * length ); + virtual addr64_t getPhysicalSegment( IOByteCount offset, + IOByteCount * length, + IOOptionBits options = 0 ); /*! @function prepare @abstract Prepare the memory for an I/O transfer. @@ -144,33 +105,6 @@ class IOMultiMemoryDescriptor : public IOMemoryDescriptor @result An IOReturn code. */ virtual IOReturn complete(IODirection forDirection = kIODirectionNone); - -/*! @function readBytes - @abstract Copy data from the memory descriptor's buffer to the specified buffer. - @discussion This method copies data from the memory descriptor's memory at the given offset, to the caller's buffer. - @param offset A byte offset into the memory descriptor's memory. - @param bytes The caller supplied buffer to copy the data to. - @param withLength The length of the data to copy. - @result The number of bytes copied, zero will be returned if the specified offset is beyond the length of the descriptor. */ - - virtual IOByteCount readBytes( IOByteCount offset, - void * bytes, - IOByteCount withLength ); - -/*! @function writeBytes - @abstract Copy data to the memory descriptor's buffer from the specified buffer. - @discussion This method copies data to the memory descriptor's memory at the given offset, from the caller's buffer. - @param offset A byte offset into the memory descriptor's memory. - @param bytes The caller supplied buffer to copy the data from. - @param withLength The length of the data to copy. - @result The number of bytes copied, zero will be returned if the specified offset is beyond the length of the descriptor. */ - - virtual IOByteCount writeBytes( IOByteCount offset, - const void * bytes, - IOByteCount withLength ); - - virtual IOPhysicalAddress getSourceSegment(IOByteCount offset, - IOByteCount * length); }; #endif /* !_IOMULTIMEMORYDESCRIPTOR_H */ diff --git a/iokit/IOKit/IOPMEventSource.h b/iokit/IOKit/IOPMEventSource.h deleted file mode 100644 index 370b9e14e..000000000 --- a/iokit/IOKit/IOPMEventSource.h +++ /dev/null @@ -1,63 +0,0 @@ -/* - * Copyright (c) 2001-2002 Apple Computer, Inc. All rights reserved. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ - * - * This file contains Original Code and/or Modifications of Original Code - * as defined in and that are subject to the Apple Public Source License - * Version 2.0 (the 'License'). You may not use this file except in - * compliance with the License. The rights granted to you under the License - * may not be used to create, or enable the creation or redistribution of, - * unlawful or unlicensed copies of an Apple operating system, or to - * circumvent, violate, or enable the circumvention or violation of, any - * terms of an Apple operating system software license agreement. - * - * Please obtain a copy of the License at - * http://www.opensource.apple.com/apsl/ and read it before using this file. - * - * The Original Code and all software distributed under the License are - * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER - * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, - * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. - * Please see the License for the specific language governing rights and - * limitations under the License. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ - */ - - #ifndef _IOPMEVENTSOURCE_H_ - #define _IOPMEVENTSOURCE_H_ - - #include - #include - - // Queue of requested states - typedef struct { - unsigned long state; - void *next; - } ActivityTickleStateList; - - class IOPMEventSource : public IOEventSource - { - OSDeclareDefaultStructors(IOPMEventSource); - -protected: - virtual bool checkForWork(void); - - ActivityTickleStateList *states; - -public: - typedef void (*Action)(OSObject *owner, unsigned long state); - - // static initialiser - static IOPMEventSource *PMEventSource(OSObject *owner, Action action); - - virtual bool init(OSObject *owner, Action action); - - // Enqueues an activityTickle request to be executed on the workloop - virtual IOReturn activityTickleOccurred(unsigned long); - }; - - #endif /* _IOPMEVENTSOURCE_H_ */ - diff --git a/iokit/IOKit/IOPlatformExpert.h b/iokit/IOKit/IOPlatformExpert.h index 1826c256e..f75a3e3ab 100644 --- a/iokit/IOKit/IOPlatformExpert.h +++ b/iokit/IOKit/IOPlatformExpert.h @@ -45,6 +45,8 @@ extern "C" { #endif +#include + extern boolean_t PEGetMachineName( char * name, int maxLength ); extern boolean_t PEGetModelName( char * name, int maxLength ); extern int PEGetPlatformEpoch( void ); @@ -61,7 +63,7 @@ extern int (*PE_halt_restart)(unsigned int type); extern int PEHaltRestart(unsigned int type); // Save the Panic Info. Returns the number of bytes saved. -extern unsigned long PESavePanicInfo(unsigned char *buffer, unsigned long length); +extern UInt32 PESavePanicInfo(UInt8 *buffer, UInt32 length); extern long PEGetGMTTimeOfDay( void ); extern void PESetGMTTimeOfDay( long secs ); diff --git a/iokit/IOKit/IORangeAllocator.h b/iokit/IOKit/IORangeAllocator.h index b6f7ee17b..d81bbefc2 100644 --- a/iokit/IOKit/IORangeAllocator.h +++ b/iokit/IOKit/IORangeAllocator.h @@ -40,7 +40,7 @@ #include #include -typedef UInt32 IORangeScalar; +typedef IOByteCount IORangeScalar; /*! @class IORangeAllocator @abstract A utility class to manage allocations from a range. @@ -94,7 +94,8 @@ class IORangeAllocator : public OSObject { @result Returns the new IORangeAllocator instance, to be released by the caller, or zero on failure. */ static IORangeAllocator * withRange( IORangeScalar endOfRange, - IORangeScalar defaultAlignment = 0, UInt32 capacity = 0, + IORangeScalar defaultAlignment = 0, + UInt32 capacity = 0, IOOptionBits options = 0 ); virtual void free(); diff --git a/iokit/IOKit/IORegistryEntry.h b/iokit/IOKit/IORegistryEntry.h index de924f9c1..dd34e1764 100644 --- a/iokit/IOKit/IORegistryEntry.h +++ b/iokit/IOKit/IORegistryEntry.h @@ -42,6 +42,7 @@ extern const OSSymbol * gIONameKey; extern const OSSymbol * gIOLocationKey; +extern const OSSymbol * gIORegistryEntryIDKey; class IORegistryEntry; class IORegistryPlane; @@ -70,7 +71,10 @@ class IORegistryEntry : public OSObject /*! @struct ExpansionData @discussion This structure will be used to expand the capablilties of this class in the future. */ - struct ExpansionData { }; + struct ExpansionData + { + uint64_t fRegistryEntryID; + }; /*! @var reserved Reserved for future use. (Internal use only) */ @@ -97,7 +101,6 @@ class IORegistryEntry : public OSObject IOOptionBits options = kIORegistryIterateRecursively | kIORegistryIterateParents) const; - OSMetaClassDeclareReservedUsed(IORegistryEntry, 0); /*! @function copyProperty @abstract Synchronized method to obtain a property from a registry entry or one of its parents (or children) in the hierarchy. Available in Mac OS X 10.1 or later. @@ -112,7 +115,6 @@ class IORegistryEntry : public OSObject IOOptionBits options = kIORegistryIterateRecursively | kIORegistryIterateParents) const; - OSMetaClassDeclareReservedUsed(IORegistryEntry, 1); /*! @function copyProperty @abstract Synchronized method to obtain a property from a registry entry or one of its parents (or children) in the hierarchy. Available in Mac OS X 10.1 or later. @@ -127,7 +129,6 @@ class IORegistryEntry : public OSObject IOOptionBits options = kIORegistryIterateRecursively | kIORegistryIterateParents) const; - OSMetaClassDeclareReservedUsed(IORegistryEntry, 2); /*! @function copyParentEntry @abstract Returns an registry entry's first parent entry in a plane. Available in Mac OS X 10.1 or later. @@ -136,7 +137,6 @@ class IORegistryEntry : public OSObject @result Returns the first parent of the registry entry, or zero if the entry is not attached into the registry in that plane. A reference on the entry is returned to caller, which should be released. */ virtual IORegistryEntry * copyParentEntry( const IORegistryPlane * plane ) const; - OSMetaClassDeclareReservedUsed(IORegistryEntry, 3); /*! @function copyChildEntry @abstract Returns an registry entry's first child entry in a plane. Available in Mac OS X 10.1 or later. @@ -145,7 +145,6 @@ class IORegistryEntry : public OSObject @result Returns the first child of the registry entry, or zero if the entry is not attached into the registry in that plane. A reference on the entry is returned to caller, which should be released. */ virtual IORegistryEntry * copyChildEntry( const IORegistryPlane * plane ) const; - OSMetaClassDeclareReservedUsed(IORegistryEntry, 4); /* method available in Mac OS X 10.4 or later */ /*! @@ -179,10 +178,23 @@ member function's parameter list. virtual IOReturn runPropertyAction(Action action, OSObject *target, void *arg0 = 0, void *arg1 = 0, void *arg2 = 0, void *arg3 = 0); - OSMetaClassDeclareReservedUsed(IORegistryEntry, 5); private: - +#if __LP64__ + OSMetaClassDeclareReservedUnused(IORegistryEntry, 0); + OSMetaClassDeclareReservedUnused(IORegistryEntry, 1); + OSMetaClassDeclareReservedUnused(IORegistryEntry, 2); + OSMetaClassDeclareReservedUnused(IORegistryEntry, 3); + OSMetaClassDeclareReservedUnused(IORegistryEntry, 4); + OSMetaClassDeclareReservedUnused(IORegistryEntry, 5); +#else + OSMetaClassDeclareReservedUsed(IORegistryEntry, 0); + OSMetaClassDeclareReservedUsed(IORegistryEntry, 1); + OSMetaClassDeclareReservedUsed(IORegistryEntry, 2); + OSMetaClassDeclareReservedUsed(IORegistryEntry, 3); + OSMetaClassDeclareReservedUsed(IORegistryEntry, 4); + OSMetaClassDeclareReservedUsed(IORegistryEntry, 5); +#endif OSMetaClassDeclareReservedUnused(IORegistryEntry, 6); OSMetaClassDeclareReservedUnused(IORegistryEntry, 7); OSMetaClassDeclareReservedUnused(IORegistryEntry, 8); @@ -757,16 +769,34 @@ member function's parameter list. static const char * dealiasPath( const char ** opath, const IORegistryPlane * plane ); +/*! @function makePlane + @abstract Constructs an IORegistryPlane object. + @discussion Most planes in IOKit are created by the OS, although other planes may be created. + @param name A C-string name for the new plane, to be copied. + @result A new instance of an IORegistryPlane, or zero on failure. */ + + static const IORegistryPlane * makePlane( const char * name ); + +/*! @abstract Returns an ID for the registry entry that is global to all tasks. + @discussion The entry ID returned by getRegistryEntryID can be used to identify a registry entry across all tasks. A registry entry may be looked up by its entry ID by creating a matching dictionary with IORegistryEntryIDMatching() in user space, or IOService::registryEntryIDMatching() in the kernel, to be used with the IOKit matching functions. The ID is valid only until the machine reboots. + @result An ID for the registry entry, assigned when the entry is first attached in the registry. */ + + uint64_t getRegistryEntryID( void ); + /* * * * * * * * * * * * * * * * * * * * * * * * * * * * */ /* * * * * * * * * * * * internals * * * * * * * * * * * */ /* * * * * * * * * * * * * * * * * * * * * * * * * * * * */ -public: - static IORegistryEntry * initialize( void ); - static const IORegistryPlane * makePlane( const char * name ); - // don't even think about using this virtual bool init( IORegistryEntry * from, const IORegistryPlane * inPlane ); + +#ifdef XNU_KERNEL_PRIVATE +public: +#else +private: +#endif + static IORegistryEntry * initialize( void ); + private: inline bool arrayMember( OSArray * set, const IORegistryEntry * member, @@ -779,16 +809,24 @@ member function's parameter list. unsigned int relation, const IORegistryPlane * plane ) const; - virtual OSArray * getParentSetReference( const IORegistryPlane * plane ) + APPLE_KEXT_COMPATIBILITY_VIRTUAL + OSArray * getParentSetReference( const IORegistryPlane * plane ) const; - virtual OSArray * getChildSetReference( const IORegistryPlane * plane ) + + APPLE_KEXT_COMPATIBILITY_VIRTUAL + OSArray * getChildSetReference( const IORegistryPlane * plane ) const; - virtual IORegistryEntry * getChildFromComponent( const char ** path, + + APPLE_KEXT_COMPATIBILITY_VIRTUAL + IORegistryEntry * getChildFromComponent( const char ** path, const IORegistryPlane * plane ); - virtual const OSSymbol * hasAlias( const IORegistryPlane * plane, + APPLE_KEXT_COMPATIBILITY_VIRTUAL + const OSSymbol * hasAlias( const IORegistryPlane * plane, char * opath = 0, int * length = 0 ) const; - virtual const char * matchPathLocation( const char * cmp, + + APPLE_KEXT_COMPATIBILITY_VIRTUAL + const char * matchPathLocation( const char * cmp, const IORegistryPlane * plane ); }; diff --git a/iokit/IOKit/IOService.h b/iokit/IOKit/IOService.h index 45baa9348..1285e37ba 100644 --- a/iokit/IOKit/IOService.h +++ b/iokit/IOKit/IOService.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 1998-2000 Apple Computer, Inc. All rights reserved. + * Copyright (c) 1998-2009 Apple Computer, Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -57,6 +57,10 @@ extern "C" { #include } +#ifndef UINT64_MAX +#define UINT64_MAX 18446744073709551615ULL +#endif + enum { kIODefaultProbeScore = 0 }; @@ -148,6 +152,10 @@ typedef void (*IOInterruptAction)( OSObject * target, void * refCon, typedef bool (*IOServiceNotificationHandler)( void * target, void * refCon, IOService * newService ); +typedef bool (*IOServiceMatchingNotificationHandler)( void * target, void * refCon, + IOService * newService, + IONotifier * notifier ); + /*! @typedef IOServiceInterestHandler @param target Reference supplied when the notification was registered. @param refCon Reference constant supplied when the notification was registered. @@ -272,7 +280,7 @@ The class name that the service will attempt to allocate when a user client conn kIOKitDebugKey, extern const OSSymbol * gIOKitDebugKey, "IOKitDebug"
Set some debug flags for logging the driver loading process. Flags are defined in IOKit/IOKitDebug.h, but 65535 works well.*/ - + class IOService : public IORegistryEntry { OSDeclareDefaultStructors(IOService) @@ -292,7 +300,8 @@ class IOService : public IORegistryEntry SInt32 __providerGeneration; IOService * __owner; IOOptionBits __state[2]; - IOOptionBits __reserved[4]; + uint64_t __timeBusy; + uint64_t __accumBusy; IOServicePM * pwrMgt; protected: @@ -301,7 +310,7 @@ class IOService : public IORegistryEntry public: // DEPRECATED - IOPMprot * pm_vars; + void * pm_vars; public: /* methods available in Mac OS X 10.1 or later */ @@ -357,14 +366,31 @@ class IOService : public IORegistryEntry virtual void systemWillShutdown( IOOptionBits specifier ); +/*! @function copyClientWithCategory + @availability Mac OS X v10.6 and later + @param category An OSSymbol corresponding to an IOMatchCategory matching property. + @result Returns a reference to the IOService child with the given category. The result should be released by the caller. +*/ + + virtual IOService * copyClientWithCategory( const OSSymbol * category ); + private: +#if __LP64__ + OSMetaClassDeclareReservedUnused(IOService, 0); + OSMetaClassDeclareReservedUnused(IOService, 1); + OSMetaClassDeclareReservedUnused(IOService, 2); + OSMetaClassDeclareReservedUnused(IOService, 3); + OSMetaClassDeclareReservedUnused(IOService, 4); + OSMetaClassDeclareReservedUnused(IOService, 5); +#else OSMetaClassDeclareReservedUsed(IOService, 0); OSMetaClassDeclareReservedUsed(IOService, 1); OSMetaClassDeclareReservedUsed(IOService, 2); OSMetaClassDeclareReservedUsed(IOService, 3); OSMetaClassDeclareReservedUsed(IOService, 4); + OSMetaClassDeclareReservedUsed(IOService, 5); +#endif - OSMetaClassDeclareReservedUnused(IOService, 5); OSMetaClassDeclareReservedUnused(IOService, 6); OSMetaClassDeclareReservedUnused(IOService, 7); OSMetaClassDeclareReservedUnused(IOService, 8); @@ -592,13 +618,17 @@ class IOService : public IORegistryEntry virtual void adjustBusy( SInt32 delta ); + APPLE_KEXT_COMPATIBILITY_VIRTUAL + IOReturn waitQuiet(mach_timespec_t * timeout) + APPLE_KEXT_DEPRECATED; + /*! @function waitQuiet @abstract Waits for an IOService object's busyState to be zero. @discussion Blocks the caller until an IOService object is non busy. - @param timeout Specifies a maximum time to wait. + @param timeout The maximum time to wait in nanoseconds. Default is to wait forever. @result Returns an error code if Mach synchronization primitives fail, kIOReturnTimeout, or kIOReturnSuccess. */ - virtual IOReturn waitQuiet( mach_timespec_t * timeout = 0 ); + IOReturn waitQuiet(uint64_t timeout = UINT64_MAX); /* Matching */ @@ -644,7 +674,7 @@ class IOService : public IORegistryEntry /* Notifications */ /*! @function addNotification - @abstract Adds a persistant notification handler to be notified of IOService events. + @abstract Deprecated use addMatchingNotification(). Adds a persistant notification handler to be notified of IOService events. @discussion IOService will deliver notifications of changes in state of an IOService object to registered clients. The type of notification is specified by a symbol, for example gIOMatchedNotification or gIOTerminatedNotification, and notifications will only include IOService objects that match the supplied matching dictionary. Notifications are ordered by a priority set with addNotification. When the notification is installed, its handler will be called with each of any currently existing IOService objects that are in the correct state (eg. registered) and match the supplied matching dictionary, avoiding races between finding preexisting and new IOService events. The notification request is identified by an instance of an IONotifier object, through which it can be enabled, disabled, or removed. addNotification consumes a retain count on the matching dictionary when the notification is removed. @param type An OSSymbol identifying the type of notification and IOService state:
gIOPublishNotification Delivered when an IOService object is registered. @@ -663,10 +693,33 @@ class IOService : public IORegistryEntry const OSSymbol * type, OSDictionary * matching, IOServiceNotificationHandler handler, void * target, void * ref = 0, + SInt32 priority = 0 ) + APPLE_KEXT_DEPRECATED; + +/*! @function addMatchingNotification + @abstract Adds a persistant notification handler to be notified of IOService events. + @discussion IOService will deliver notifications of changes in state of an IOService object to registered clients. The type of notification is specified by a symbol, for example gIOMatchedNotification or gIOTerminatedNotification, and notifications will only include IOService objects that match the supplied matching dictionary. Notifications are ordered by a priority set with addNotification. When the notification is installed, its handler will be called with each of any currently existing IOService objects that are in the correct state (eg. registered) and match the supplied matching dictionary, avoiding races between finding preexisting and new IOService events. The notification request is identified by an instance of an IONotifier object, through which it can be enabled, disabled, or removed. addMatchingNotification does not consume a reference on the matching dictionary when the notification is removed, unlike addNotification. + @param type An OSSymbol identifying the type of notification and IOService state: +
gIOPublishNotification Delivered when an IOService object is registered. +
gIOFirstPublishNotification Delivered when an IOService object is registered, but only once per IOService instance. Some IOService objects may be reregistered when their state is changed. +
gIOMatchedNotification Delivered when an IOService object has been matched with all client drivers, and they have been probed and started. +
gIOFirstMatchNotification Delivered when an IOService object has been matched with all client drivers, but only once per IOService instance. Some IOService objects may be reregistered when their state is changed. +
gIOTerminatedNotification Delivered after an IOService object has been terminated, during its finalize stage. + @param matching A matching dictionary to restrict notifications to only matching IOService objects. The dictionary is retained while the notification is installed. (Differs from addNotification). + @param handler A C function callback to deliver notifications. + @param target An instance reference for the callback's use. + @param ref A reference constant for the callback's use. + @param priority A constant ordering all notifications of a each type. + @result An instance of an IONotifier object that can be used to control or destroy the notification request. */ + + static IONotifier * addMatchingNotification( + const OSSymbol * type, OSDictionary * matching, + IOServiceMatchingNotificationHandler handler, + void * target, void * ref = 0, SInt32 priority = 0 ); /*! @function waitForService - @abstract Waits for a matching to service to be published. + @abstract Deprecated use waitForMatchingService(). Waits for a matching to service to be published. @discussion Provides a method of waiting for an IOService object matching the supplied matching dictionary to be registered and fully matched. @param matching The matching dictionary describing the desired IOService object. waitForService consumes one reference of the matching dictionary. @param timeout The maximum time to wait. @@ -675,6 +728,16 @@ class IOService : public IORegistryEntry static IOService * waitForService( OSDictionary * matching, mach_timespec_t * timeout = 0); +/*! @function waitForMatchingService + @abstract Waits for a matching to service to be published. + @discussion Provides a method of waiting for an IOService object matching the supplied matching dictionary to be registered and fully matched. + @param matching The matching dictionary describing the desired IOService object. (Does not consume a reference of the matching dictionary - differs from waitForService() which does consume a reference on the matching dictionary.) + @param timeout The maximum time to wait in nanoseconds. Default is to wait forever. + @result A published IOService object matching the supplied dictionary. waitForMatchingService returns a reference to the IOService which should be released by the caller. (Differs from waitForService() which does not retain the returned object.) */ + + static IOService * waitForMatchingService( OSDictionary * matching, + uint64_t timeout = UINT64_MAX); + /*! @function getMatchingServices @abstract Finds the set of current published IOService objects matching a matching dictionary. @discussion Provides a method of finding the current set of published IOService objects matching the supplied matching dictionary. @@ -683,24 +746,7 @@ class IOService : public IORegistryEntry static OSIterator * getMatchingServices( OSDictionary * matching ); -/*! @function installNotification - @abstract Adds a persistant notification handler to be notified of IOService events. - @discussion A lower level interface to @link addNotification addNotification@/link that installs a handler and returns the current set of IOService objects that are in the specified state and match the matching dictionary. - @param type See addNotification. - @param matching See addNotification. - @param handler See addNotification. - @param self See addNotification. - @param ref See addNotification. - @param priority See addNotification. - @param existing Returns an iterator over the set of IOService objects that are currently in the specified state and match the matching dictionary. - @result See addNotification. */ - - static IONotifier * installNotification( - const OSSymbol * type, OSDictionary * matching, - IOServiceNotificationHandler handler, - void * target, void * ref, - SInt32 priority, OSIterator ** existing ); - +public: /* Helpers to make matching dictionaries for simple cases, * they add keys to an existing dictionary, or create one. */ @@ -776,6 +822,17 @@ class IOService : public IORegistryEntry static OSDictionary * propertyMatching( const OSSymbol * key, const OSObject * value, OSDictionary * table = 0 ); +/*! @function registryEntryIDMatching + @abstract Creates a matching dictionary, or adds matching properties to an existing dictionary, that specify a IORegistryEntryID match. + @discussion registryEntryIDMatching creates a matching dictionary that specifies the IOService object with the assigned registry entry ID (returned by IORegistryEntry::getRegistryEntryID()). An existing dictionary may be passed in, in which case the matching properties will be added to that dictionary rather than creating a new one. + @param name The service's ID. Matching is successful on the IOService object that return that ID from the IORegistryEntry::getRegistryEntryID() method. + @param table If zero, registryEntryIDMatching creates a matching dictionary and returns a reference to it, otherwise the matching properties are added to the specified dictionary. + @result The matching dictionary created, or passed in, is returned on success, or zero on failure. */ + + static OSDictionary * registryEntryIDMatching( uint64_t entryID, + OSDictionary * table = 0 ); + + /*! @function addLocation @abstract Adds a location matching property to an existing dictionary. @discussion This function creates matching properties that specify the location of a IOService object, as an embedded matching dictionary. This matching will be successful on an IOService object that attached to an IOService object which matches this location matching dictionary. @@ -1057,9 +1114,10 @@ class IOService : public IORegistryEntry /*! @function messageClient @abstract Sends a generic message to an attached client. @discussion A provider may deliver messages via the @link message message@/link method to its clients informing them of state changes, such as kIOMessageServiceIsTerminated or kIOMessageServiceIsSuspended. Certain messages are defined by the I/O Kit in IOMessage.h while others may be family dependent. This method may be called in the provider to send a message to the specified client, which may be useful for overrides. - @param type A type defined in IOMessage.h or defined by the provider family. + @param messageType A type defined in IOMessage.h or defined by the provider family. @param client A client of the IOService to send the message. - @param argument An argument defined by the provider family, not used by IOService. + @param messageArgument An argument defined by the provider family, not used by IOService. + @param argSize Specifies the size of messageArgument, in bytes. If argSize is non-zero, messageArgument is treated as a pointer to argSize bytes of data. If argSize is 0 (the default), messageArgument is treated as an ordinal and passed by value. @result The return code from the client message call. */ virtual IOReturn messageClient( UInt32 messageType, OSObject * client, @@ -1070,6 +1128,7 @@ class IOService : public IORegistryEntry @discussion A provider may deliver messages via the @link message message@/link method to its clients informing them of state changes, such as kIOMessageServiceIsTerminated or kIOMessageServiceIsSuspended. Certain messages are defined by the I/O Kit in IOMessage.h while others may be family dependent. This method may be called in the provider to send a message to all the attached clients, via the @link messageClient messageClient@/link method. @param type A type defined in IOMessage.h or defined by the provider family. @param argument An argument defined by the provider family, not used by IOService. + @param argSize Specifies the size of argument, in bytes. If argSize is non-zero, argument is treated as a pointer to argSize bytes of data. If argSize is 0 (the default), argument is treated as an ordinal and passed by value. @result Any non-kIOReturnSuccess return codes returned by the clients, or kIOReturnSuccess if all return kIOReturnSuccess. */ virtual IOReturn messageClients( UInt32 type, @@ -1130,72 +1189,127 @@ class IOService : public IORegistryEntry virtual int errnoFromReturn( IOReturn rtn ); /* * * * * * * * * * * * * * * * * * * * * * * * * * * * */ - /* * * * * * * * * * * * Internals * * * * * * * * * * * */ + /* * * * * * * * * * end of IOService API * * * * * * * */ /* * * * * * * * * * * * * * * * * * * * * * * * * * * * */ -public: + /* for IOInterruptController implementors */ + int _numInterruptSources; IOInterruptSource *_interruptSources; - static void initialize( void ); - + /* overrides */ virtual bool serializeProperties( OSSerialize * s ) const; +#ifdef KERNEL_PRIVATE + /* Apple only SPI to control CPU low power modes */ + void setCPUSnoopDelay(UInt32 ns); + UInt32 getCPUSnoopDelay(); +#endif + void requireMaxBusStall(UInt32 ns); + void requireMaxInterruptDelay(uint32_t ns); + + /* * * * * * * * * * * * * * * * * * * * * * * * * * * * */ + /* * * * * * * * * * * * Internals * * * * * * * * * * * */ + /* * * * * * * * * * * * * * * * * * * * * * * * * * * * */ + +#ifdef XNU_KERNEL_PRIVATE +public: + // called from other xnu components + static void initialize( void ); static void setPlatform( IOPlatformExpert * platform); static void setPMRootDomain( class IOPMrootDomain * rootDomain ); - static IOReturn catalogNewDrivers( OSOrderedSet * newTables ); + uint64_t getAccumulatedBusyTime( void ); + +private: static IOReturn waitMatchIdle( UInt32 ms ); + static IONotifier * installNotification( + const OSSymbol * type, OSDictionary * matching, + IOServiceMatchingNotificationHandler handler, + void * target, void * ref, + SInt32 priority, OSIterator ** existing ); +#if !defined(__LP64__) + static IONotifier * installNotification( + const OSSymbol * type, OSDictionary * matching, + IOServiceNotificationHandler handler, + void * target, void * ref, + SInt32 priority, OSIterator ** existing); +#endif /* !defined(__LP64__) */ +#endif - static IOService * resources( void ); - virtual bool checkResources( void ); - virtual bool checkResource( OSObject * matching ); +private: + APPLE_KEXT_COMPATIBILITY_VIRTUAL + bool checkResources( void ); + APPLE_KEXT_COMPATIBILITY_VIRTUAL + bool checkResource( OSObject * matching ); - virtual void probeCandidates( OSOrderedSet * matches ); - virtual bool startCandidate( IOService * candidate ); - virtual IOService * getClientWithCategory( const OSSymbol * category ); + APPLE_KEXT_COMPATIBILITY_VIRTUAL + void probeCandidates( OSOrderedSet * matches ); + APPLE_KEXT_COMPATIBILITY_VIRTUAL + bool startCandidate( IOService * candidate ); - virtual bool passiveMatch( OSDictionary * matching, bool changesOK = false); +public: + APPLE_KEXT_COMPATIBILITY_VIRTUAL + IOService * getClientWithCategory( const OSSymbol * category ) + APPLE_KEXT_DEPRECATED; + // copyClientWithCategory is the public replacement - virtual void startMatching( IOOptionBits options = 0 ); - virtual void doServiceMatch( IOOptionBits options ); - virtual void doServiceTerminate( IOOptionBits options ); +#ifdef XNU_KERNEL_PRIVATE + /* Callable within xnu source only - but require vtable entries to be visible */ +public: +#else +private: +#endif + APPLE_KEXT_COMPATIBILITY_VIRTUAL + bool passiveMatch( OSDictionary * matching, bool changesOK = false); + APPLE_KEXT_COMPATIBILITY_VIRTUAL + void startMatching( IOOptionBits options = 0 ); + APPLE_KEXT_COMPATIBILITY_VIRTUAL + void doServiceMatch( IOOptionBits options ); + APPLE_KEXT_COMPATIBILITY_VIRTUAL + void doServiceTerminate( IOOptionBits options ); - static OSObject * getExistingServices( OSDictionary * matching, +private: + static OSObject * copyExistingServices( OSDictionary * matching, IOOptionBits inState, IOOptionBits options = 0 ); static IONotifier * setNotification( const OSSymbol * type, OSDictionary * matching, - IOServiceNotificationHandler handler, + IOServiceMatchingNotificationHandler handler, void * target, void * ref, SInt32 priority = 0 ); static IONotifier * doInstallNotification( const OSSymbol * type, OSDictionary * matching, - IOServiceNotificationHandler handler, + IOServiceMatchingNotificationHandler handler, void * target, void * ref, SInt32 priority, OSIterator ** existing ); static bool syncNotificationHandler( void * target, void * ref, - IOService * newService ); + IOService * newService, IONotifier * notifier ); - virtual void deliverNotification( const OSSymbol * type, + APPLE_KEXT_COMPATIBILITY_VIRTUAL + void deliverNotification( const OSSymbol * type, IOOptionBits orNewState, IOOptionBits andNewState ); bool invokeNotifer( class _IOServiceNotifier * notify ); - virtual void unregisterAllInterest( void ); + APPLE_KEXT_COMPATIBILITY_VIRTUAL + void unregisterAllInterest( void ); - virtual IOReturn waitForState( UInt32 mask, UInt32 value, + APPLE_KEXT_COMPATIBILITY_VIRTUAL + IOReturn waitForState( UInt32 mask, UInt32 value, mach_timespec_t * timeout = 0 ); + IOReturn waitForState( UInt32 mask, UInt32 value, uint64_t timeout ); + UInt32 _adjustBusy( SInt32 delta ); bool terminatePhase1( IOOptionBits options = 0 ); void scheduleTerminatePhase2( IOOptionBits options = 0 ); void scheduleStop( IOService * provider ); void scheduleFinalize( void ); - static void terminateThread( void * arg ); + static void terminateThread( void * arg, wait_result_t unused ); static void terminateWorker( IOOptionBits options ); static void actionWillTerminate( IOService * victim, IOOptionBits options, OSArray * doPhase2List ); @@ -1203,514 +1317,448 @@ class IOService : public IORegistryEntry static void actionFinalize( IOService * victim, IOOptionBits options ); static void actionStop( IOService * client, IOService * provider ); - virtual IOReturn resolveInterrupt(IOService *nub, int source); - virtual IOReturn lookupInterrupt(int source, bool resolve, IOInterruptController **interruptController); + APPLE_KEXT_COMPATIBILITY_VIRTUAL + IOReturn resolveInterrupt(IOService *nub, int source); + APPLE_KEXT_COMPATIBILITY_VIRTUAL + IOReturn lookupInterrupt(int source, bool resolve, IOInterruptController **interruptController); - // SPI to control CPU low power modes - void setCPUSnoopDelay(UInt32 ns); - UInt32 getCPUSnoopDelay(); - void requireMaxBusStall(UInt32 ns); - - void PMfree( void ); +#ifdef XNU_KERNEL_PRIVATE + /* end xnu internals */ +#endif /* power management */ - +public: + /*! @function PMinit @abstract Initializes power management for a driver. @discussion PMinit allocates and initializes the power management instance variables, and it should be called before accessing those variables or calling the power management methods. This method should be called inside the driver's start routine and must be paired with a call to @link PMstop PMstop@/link. - - Most calls to PMinit are followed by calls to @link joinPMtree joinPMtree@/link and @link registerPowerDriver registerPowerDriver@/link. -*/ - virtual void PMinit (void ); + Most calls to PMinit are followed by calls to @link joinPMtree joinPMtree@/link and @link registerPowerDriver registerPowerDriver@/link. */ + + virtual void PMinit( void ); /*! @function PMstop @abstract Frees and removes the driver from power management. - @discussion The power managment variables don't exist after this call and the power managment methods in the caller shouldn't be called. - - Calling PMstop cleans up for the three power management initialization calls: @link PMinit PMinit@/link, @link joinPMtree joinPMtree@/link, and @link registerPowerDriver registerPowerDriver@/link. -*/ - virtual void PMstop ( void ); + @discussion The power managment variables don't exist after this call and the power managment methods in the caller shouldn't be called. + Calling PMstop cleans up for the three power management initialization calls: @link PMinit PMinit@/link, @link joinPMtree joinPMtree@/link, and @link registerPowerDriver registerPowerDriver@/link. */ + + virtual void PMstop( void ); /*! @function joinPMtree - @abstract Joins the driver into the power plane of the I/O Registry . - @discussion A driver uses this method to call its nub when initializing (usually in its start routine after calling @link PMinit PMinit@/link), to be attached into the power management hierarchy (i.e., the power plane). A driver usually calls this method on the driver for the device that provides it power (this is frequently the nub). - - Before this call returns, the caller will probably be called at @link setPowerParent setPowerParent@/link and @link setAggressiveness setAggressiveness@/link and possibly at @link addPowerChild addPowerChild@/link as it is added to the hierarchy. + @abstract Joins the driver into the power plane of the I/O Registry. + @discussion A driver uses this method to call its nub when initializing (usually in its start routine after calling @link PMinit PMinit@/link), to be attached into the power management hierarchy (i.e., the power plane). A driver usually calls this method on the driver for the device that provides it power (this is frequently the nub). + Before this call returns, the caller will probably be called at @link setPowerParent setPowerParent@/link and @link setAggressiveness setAggressiveness@/link and possibly at @link addPowerChild addPowerChild@/link as it is added to the hierarchy. This method may be overridden by a nub subclass. + @param driver The driver to be added to the power plane, usually this. */ - This method may be overridden by a nub subclass. - @param driver The driver to be added to the power plane, usually this. -*/ - virtual void joinPMtree ( IOService * driver ); + virtual void joinPMtree( IOService * driver ); /*! @function registerPowerDriver @abstract Registers a set of power states that the driver supports. - @discussion A driver defines its array of supported power states with power management in its power management initialization (its start routine). If successful, power management will call the driver to instruct it to change its power state through @link setPowerState setPowerState@/link. - Most drivers do not need to override registerPowerDriver. A nub may override registerPowerDriver if it needs to arrange its children in the power plane differently than the default placement, but this is uncommon. - @param controllingDriver A pointer to the calling driver, usually this. @param powerStates A driver-defined array of power states that the driver and device support. Power states are defined in pwr_mgt/IOPMpowerState.h. - @param numberOfStates The number of power states in the array. - @result IOPMNoErr. All errors are logged via kprintf. -*/ - virtual IOReturn registerPowerDriver ( - IOService* controllingDriver, - IOPMPowerState* powerStates, - unsigned long numberOfStates ); + @param numberOfStates The number of power states in the array. + @result IOPMNoErr. All errors are logged via kprintf. */ -/*! - @function registerInterestedDriver + virtual IOReturn registerPowerDriver( + IOService * controllingDriver, + IOPMPowerState * powerStates, + unsigned long numberOfStates ); + +/*! @function registerInterestedDriver @abstract Allows an IOService object to register interest in the changing power state of a power-managed IOService object. @discussion Call registerInterestedDriver on the IOService object you are interested in receiving power state messages from, and pass a pointer to the interested driver (this) as an argument. - The interested driver should override @link powerStateWillChangeTo powerStateWillChangeTo@/link and @link powerStateDidChangeTo powerStateDidChangeTo@/link to receive these power change messages. - Interested drivers must acknowledge power changes in powerStateWillChangeTo or powerStateDidChangeTo, either via return value or later calls to @link acknowledgePowerChange acknowledgePowerChange@/link. - - Most drivers do not need to override registerInterestedDriver. - @param theDriver The driver of interest adds this pointer to the list of interested drivers. It informs drivers on this list before and after the power change. - @result Flags describing the capability of the device in its current power state. If the current power state is not yet defined, zero is returned (this is the case when the driver is not yet in the power domain hierarchy or hasn't fully registered with power management yet). -*/ - virtual IOPMPowerFlags registerInterestedDriver ( IOService* theDriver ); + @param theDriver The driver of interest adds this pointer to the list of interested drivers. It informs drivers on this list before and after the power change. + @result Flags describing the capability of the device in its current power state. If the current power state is not yet defined, zero is returned (this is the case when the driver is not yet in the power domain hierarchy or hasn't fully registered with power management yet). */ + + APPLE_KEXT_COMPATIBILITY_VIRTUAL + IOPMPowerFlags registerInterestedDriver( IOService * theDriver ); /*! @function deRegisterInterestedDriver @abstract De-registers power state interest from a previous call to registerInterestedDriver. @discussion Most drivers do not need to override deRegisterInterestedDriver. @param theDriver The interested driver previously passed into @link registerInterestedDriver registerInterestedDriver@/link. - @result A return code that can be ignored by the caller. -*/ - virtual IOReturn deRegisterInterestedDriver ( IOService * theDriver ); + @result A return code that can be ignored by the caller. */ + + APPLE_KEXT_COMPATIBILITY_VIRTUAL + IOReturn deRegisterInterestedDriver( IOService * theDriver ); /*! @function acknowledgePowerChange @abstract Acknowledges an in-progress power state change. - @discussion When power management informs an interested object (via @link powerStateWillChangeTo powerStateWillChangeTo@/link or @link powerStateDidChangeTo powerStateDidChangeTo@/link), the object can return an immediate acknowledgement via a return code, or it may return an indication that it will acknowledge later by calling acknowledgePowerChange. - - Interested objects are those that have registered as interested drivers, as well as power plane children of the power changing driver. - - A driver that calls @link registerInterestedDriver registerInterestedDriver@/link must call acknowledgePowerChange, or use an immediate acknowledgement return from powerStateWillChangeTo or powerStateDidChangeTo. - - Most drivers do not need to override acknowledgePowerChange. - + @discussion When power management informs an interested object (via @link powerStateWillChangeTo powerStateWillChangeTo@/link or @link powerStateDidChangeTo powerStateDidChangeTo@/link), the object can return an immediate acknowledgement via a return code, or it may return an indication that it will acknowledge later by calling acknowledgePowerChange. + Interested objects are those that have registered as interested drivers, as well as power plane children of the power changing driver. A driver that calls @link registerInterestedDriver registerInterestedDriver@/link must call acknowledgePowerChange, or use an immediate acknowledgement return from powerStateWillChangeTo or powerStateDidChangeTo. @param whichDriver A pointer to the calling driver. The called object tracks all interested parties to ensure that all have acknowledged the power state change. - @result IOPMNoErr. -*/ - virtual IOReturn acknowledgePowerChange ( IOService * whichDriver ); - + @result IOPMNoErr. */ + APPLE_KEXT_COMPATIBILITY_VIRTUAL + IOReturn acknowledgePowerChange( IOService * whichDriver ); /*! @function acknowledgeSetPowerState @abstract Acknowledges the belated completion of a driver's setPowerState power state change. @discussion After power management instructs a driver to change its state via @link setPowerState setPowerState@/link, that driver must acknowledge the change when its device has completed its transition. The acknowledgement may be immediate, via a return code from setPowerState, or delayed, via this call to acknowledgeSetPowerState. - - Any driver that does not return kIOPMAckImplied from its setPowerState implementation must later call acknowledgeSetPowerState. - - Most drivers do not need to override acknowledgeSetPowerState. - @result IOPMNoErr. -*/ - virtual IOReturn acknowledgeSetPowerState ( void ); + Any driver that does not return kIOPMAckImplied from its setPowerState implementation must later call acknowledgeSetPowerState. + @result IOPMNoErr. */ -/*! @function powerDomainWillChangeTo - @abstract Notifies a driver that its power domain is about to change state. - @discussion This call is handled internally by power management. It is not intended to be overridden or called by drivers. -*/ - IOReturn powerDomainWillChangeTo ( - IOPMPowerFlags newPowerStateFlags, - IOPowerConnection * whichParent ); - -/*! @function powerDomainDidChangeTo - @abstract Notifies a driver that its power domain is about to change state. - @discussion This call is handled internally by power management. It is not intended to be overridden or called by drivers. -*/ - IOReturn powerDomainDidChangeTo ( - IOPMPowerFlags newPowerStateFlags, - IOPowerConnection * whichParent ); + APPLE_KEXT_COMPATIBILITY_VIRTUAL + IOReturn acknowledgeSetPowerState( void ); /*! @function requestPowerDomainState @abstract Tells a driver to adjust its power state. - @discussion This call is handled internally by power management. It is not intended to be overridden or called by drivers. -*/ - virtual IOReturn requestPowerDomainState ( - IOPMPowerFlags desiredState, - IOPowerConnection * whichChild, - unsigned long specificationFlags ); + @discussion This call is handled internally by power management. It is not intended to be overridden or called by drivers. */ + + virtual IOReturn requestPowerDomainState( + IOPMPowerFlags desiredState, + IOPowerConnection * whichChild, + unsigned long specificationFlags ); /*! @function makeUsable @abstract Requests that a device become usable. @discussion This method is called when some client of a device (or the device's own driver) is asking for the device to become usable. Power management responds by telling the object upon which this method is called to change to its highest power state. - - makeUsable is implemented using @link changePowerStateToPriv changePowerStateToPriv@/link. - - Subsequent requests for lower power, such as from changePowerStateToPriv, will pre-empt this request. - @result A return code that can be ignored by the caller. -*/ - virtual IOReturn makeUsable ( void ); + makeUsable is implemented using @link changePowerStateToPriv changePowerStateToPriv@/link. Subsequent requests for lower power, such as from changePowerStateToPriv, will pre-empt this request. + @result A return code that can be ignored by the caller. */ + + APPLE_KEXT_COMPATIBILITY_VIRTUAL + IOReturn makeUsable( void ); /*! @function temporaryPowerClampOn @abstract A driver calls this method to hold itself in the highest power state until it has children. - @discussion Use temporaryPowerClampOn to hold your driver in its highest power state while waiting for child devices to attach. After children have attached, the clamp is released and the device's power state is controlled by the children's requirements. - @result A return code that can be ignored by the caller. -*/ - virtual IOReturn temporaryPowerClampOn ( void ); - + @discussion Use temporaryPowerClampOn to hold your driver in its highest power state while waiting for child devices to attach. After children have attached, the clamp is released and the device's power state is controlled by the children's requirements. + @result A return code that can be ignored by the caller. */ + + APPLE_KEXT_COMPATIBILITY_VIRTUAL + IOReturn temporaryPowerClampOn( void ); + /*! @function changePowerStateTo @abstract Sets a driver's power state. - @discussion This function is one of several that are used to set a driver's power state. In most circumstances, however, you should call @link changePowerStateToPriv changePowerStateToPriv@/link instead. - + @discussion This function is one of several that are used to set a driver's power state. In most circumstances, however, you should call @link changePowerStateToPriv changePowerStateToPriv@/link instead. Calls to changePowerStateTo, changePowerStateToPriv, and a driver's power children all affect the power state of a driver. For legacy design reasons, they have overlapping functionality. Although you should call changePowerStateToPriv to change your device's power state, you might need to call changePowerStateTo in the following circumstances: -
  • If a driver will be using changePowerStateToPriv to change its power state, it should call changePowerStateTo(0) in its start routine to eliminate the influence changePowerStateTo has on power state calculations. -
  • Call changePowerStateTo in conjunction with @link setIdleTimerPeriod setIdleTimerPeriod@/link and @link activityTickle activityTickle@/link to idle a driver into a low power state. For a driver with 3 power states, for example, changePowerStateTo(1) sets a minimum level of power state 1, such that the idle timer period may not set your device's power any lower than state 1.
- @param ordinal The number of the desired power state in the power state array. @result A return code that can be ignored by the caller. */ - virtual IOReturn changePowerStateTo ( unsigned long ordinal ); + + APPLE_KEXT_COMPATIBILITY_VIRTUAL + IOReturn changePowerStateTo( unsigned long ordinal ); /*! @function currentCapability @abstract Finds out the capability of a device's current power state. - @result A copy of the capabilityFlags field for the current power state in the power state array. - */ - virtual IOPMPowerFlags currentCapability ( void ); + @result A copy of the capabilityFlags field for the current power state in the power state array. */ + + APPLE_KEXT_COMPATIBILITY_VIRTUAL + IOPMPowerFlags currentCapability( void ); /*! @function currentPowerConsumption @abstract Finds out the current power consumption of a device. @discussion Most Mac OS X power managed drivers do not report their power consumption via the staticPower field. Thus this call will not accurately reflect power consumption for most drivers. - @result A copy of the staticPower field for the current power state in the power state array. -*/ - virtual unsigned long currentPowerConsumption ( void ); + @result A copy of the staticPower field for the current power state in the power state array. */ + + APPLE_KEXT_COMPATIBILITY_VIRTUAL + unsigned long currentPowerConsumption( void ); /*! @function activityTickle @abstract Informs power management when a power-managed device is in use, so that power management can track when it is idle and adjust its power state accordingly. @discussion The activityTickle method is provided for objects in the system (or for the driver itself) to tell a driver that its device is being used. - The IOService superclass can manage idleness determination with a simple idle timer mechanism and this activityTickle call. To start this up, the driver calls its superclass's setIdleTimerPeriod. This starts a timer for the time interval specified in the call. When the timer expires, the superclass checks to see if there has been any activity since the last timer expiration. (It checks to see if activityTickle has been called). If there has been activity, it restarts the timer, and this process continues. When the timer expires, and there has been no device activity, the superclass lowers the device power state to the next lower state. This can continue until the device is in state zero. - - After the device has been powered down by at least one power state, a subsequent call to activityTickle causes the device to be switched to a higher state required for the activity. - + After the device has been powered down by at least one power state, a subsequent call to activityTickle causes the device to be switched to a higher state required for the activity. If the driver is managing the idleness determination totally on its own, the value of the type parameter should be kIOPMSubclassPolicy, and the driver should override the activityTickle method. The superclass IOService implementation of activityTickle does nothing with the kIOPMSubclassPolicy argument. + @param type When type is kIOPMSubclassPolicy, activityTickle is not handled in IOService and should be intercepted by the subclass. When type is kIOPMSuperclassPolicy1, an activity flag is set and the device state is checked. If the device has been powered down, it is powered up again. + @param stateNumber When type is kIOPMSuperclassPolicy1, stateNumber contains the desired power state ordinal for the activity. If the device is in a lower state, the superclass will switch it to this state. This is for devices that can handle some accesses in lower power states; the device is powered up only as far as it needs to be for the activity. + @result When type is kIOPMSuperclassPolicy1, the superclass returns true if the device is currently in the state specified by stateNumber. If the device is in a lower state and must be powered up, the superclass returns false; in this case the superclass will initiate a power change to power the device up. */ - @param type When type is kIOPMSubclassPolicy, activityTickle is not handled in IOService and should be intercepted by the subclass. When type is kIOPMSuperclassPolicy1, an activity flag is set and the device state is checked. If the device has been powered down, it is powered up again. - @param stateNumber When type is kIOPMSuperclassPolicy1, stateNumber contains the desired power state ordinal for the activity. If the device is in a lower state, the superclass will switch it to this state. This is for devices that can handle some accesses in lower power states; the device is powered up only as far as it needs to be for the activity. - @result When type is kIOPMSuperclassPolicy1, the superclass returns true if the device is currently in the state specified by stateNumber. If the device is in a lower state and must be powered up, the superclass returns false; in this case the superclass will initiate a power change to power the device up. -*/ - virtual bool activityTickle ( - unsigned long type, - unsigned long stateNumber=0 ); + virtual bool activityTickle( + unsigned long type, + unsigned long stateNumber = 0 ); /*! @function setAggressiveness @abstract Broadcasts an aggressiveness factor from the parent of a driver to the driver. - @discussion Implement setAggressiveness to receive a notification when an "aggressiveness Aggressiveness factors are a loose set of power management variables that contain values for system sleep timeout, display sleep timeout, whether the system is on battery or AC, and other power management features. There are several aggressiveness factors that can be broadcast and a driver may take action on whichever factors apply to it. - A driver that has joined the power plane via @link joinPMtree joinPMtree@/link will receive setAgressiveness calls when aggressiveness factors change. - A driver may override this call if it needs to do something with the new factor (such as change its idle timeout). If overridden, the driver must call its superclass's setAgressiveness method in its own setAgressiveness implementation. - Most drivers do not need to implement setAgressiveness. - @param type The aggressiveness factor type, such as kPMMinutesToDim, kPMMinutesToSpinDown, kPMMinutesToSleep, and kPMPowerSource. (Aggressiveness factors are defined in pwr_mgt/IOPM.h.) - - @param newLevel The aggressiveness factor's new value. - @result IOPMNoErr. -*/ - virtual IOReturn setAggressiveness ( - unsigned long type, - unsigned long newLevel ); + @param newLevel The aggressiveness factor's new value. + @result IOPMNoErr. */ + + virtual IOReturn setAggressiveness( + unsigned long type, + unsigned long newLevel ); /*! @function getAggressiveness - @abstract Returns the current aggressiveness value for the given type. + @abstract Returns the current aggressiveness value for the given type. @param type The aggressiveness factor to query. @param currentLevel Upon successful return, contains the value of aggressiveness factor type. - @result kIOReturnSuccess upon success; an I/O Kit error code otherwise. - */ - virtual IOReturn getAggressiveness ( - unsigned long type, - unsigned long *currentLevel ); + @result kIOReturnSuccess upon success; an I/O Kit error code otherwise. */ + + virtual IOReturn getAggressiveness( + unsigned long type, + unsigned long * currentLevel ); +#ifndef __LP64__ /*! @function systemWake - @abstract Tells every driver in the power plane that the system is waking up. - @discussion This call is handled internally by power management. It is not intended to be overridden or called by drivers. -*/ - virtual IOReturn systemWake ( void ); + @abstract Tells every driver in the power plane that the system is waking up. + @discussion This call is handled internally by power management. It is not intended to be overridden or called by drivers. */ + + virtual IOReturn systemWake( void ) + APPLE_KEXT_DEPRECATED; /*! @function temperatureCriticalForZone - @abstract Alerts a driver to a critical temperature in some thermal zone. - @discussion This call is unused by power management. It is not intended to be called or overridden. -*/ - virtual IOReturn temperatureCriticalForZone ( IOService * whichZone ); + @abstract Alerts a driver to a critical temperature in some thermal zone. + @discussion This call is unused by power management. It is not intended to be called or overridden. */ + + virtual IOReturn temperatureCriticalForZone( IOService * whichZone ) + APPLE_KEXT_DEPRECATED; /*! @function youAreRoot - @abstract Informs the root power domain IOService object that is is the root power domain. - @discussion The Platform Expert instantiates the root power domain IOService object and calls it with this method to inform it that it is the root power domain. - - This call is handled internally by power management. It is not intended to be overridden or called by drivers. -*/ - virtual IOReturn youAreRoot ( void ); + @abstract Informs power management which IOService object is the power plane root. + @discussion This call is handled internally by power management. It is not intended to be overridden or called by drivers. */ + + virtual IOReturn youAreRoot( void ) + APPLE_KEXT_DEPRECATED; /*! @function setPowerParent - @abstract For internal use only; deprecated; not intended to be called or overridden. -*/ - virtual IOReturn setPowerParent ( - IOPowerConnection * theParent, - bool stateKnown, - IOPMPowerFlags currentState ); + @abstract This call is handled internally by power management. It is not intended to be overridden or called by drivers. */ + + virtual IOReturn setPowerParent( + IOPowerConnection * parent, + bool stateKnown, + IOPMPowerFlags currentState ) + APPLE_KEXT_DEPRECATED; +#endif /* !__LP64__ */ /*! @function addPowerChild - @abstract Informs a driver that it has a new child. - @discussion The Platform Expert uses this method to call a driver and introduce it to a new child. - - This call is handled internally by power management. It is not intended to be overridden or called by drivers. - @param theChild A pointer to the child IOService object. -*/ - virtual IOReturn addPowerChild ( IOService * theChild ); + @abstract Informs a driver that it has a new child. + @discussion The Platform Expert uses this method to call a driver and introduce it to a new child. This call is handled internally by power management. It is not intended to be overridden or called by drivers. + @param theChild A pointer to the child IOService object. */ + + virtual IOReturn addPowerChild( IOService * theChild ); /*! @function removePowerChild - @abstract Informs a power managed driver that one of its power plane childen is disappearing. - @discussion This call is handled internally by power management. It is not intended to be overridden or called by drivers. + @abstract Informs a power managed driver that one of its power plane childen is disappearing. + @discussion This call is handled internally by power management. It is not intended to be overridden or called by drivers. */ -*/ - virtual IOReturn removePowerChild ( IOPowerConnection * theChild ); + virtual IOReturn removePowerChild( IOPowerConnection * theChild ); -/* @function command_received - @discussion This call is handled internally by power management. It is not intended to be overridden or called by drivers. -*/ - virtual void command_received ( void *, void * , void * , void *); +#ifndef __LP64__ +/*! @function command_received + @discussion This call is handled internally by power management. It is not intended to be overridden or called by drivers. */ -/* @function start_PM_idle_timer - @discussion This call is handled internally by power management. It is not intended to be overridden or called by drivers. -*/ - virtual void start_PM_idle_timer ( void ); + virtual void command_received( void *, void * , void * , void * ); +#endif -/* @function PM_idle_timer_expiration - @discussion This call is handled internally by power management. It is not intended to be overridden or called by drivers. -*/ - virtual void PM_idle_timer_expiration ( void ); +/*! @function start_PM_idle_timer + @discussion This call is handled internally by power management. It is not intended to be overridden or called by drivers. */ -/* @function PM_Clamp_Timer_Expired - @discussion This call is handled internally by power management. It is not intended to be overridden or called by drivers. -*/ - virtual void PM_Clamp_Timer_Expired (void); + APPLE_KEXT_COMPATIBILITY_VIRTUAL + void start_PM_idle_timer( void ); + +#ifndef __LP64__ +/*! @function PM_idle_timer_expiration + @discussion This call is handled internally by power management. It is not intended to be overridden or called by drivers. */ + + virtual void PM_idle_timer_expiration( void ) + APPLE_KEXT_DEPRECATED; + +/*! @function PM_Clamp_Timer_Expired + @discussion This call is handled internally by power management. It is not intended to be overridden or called by drivers. */ + + virtual void PM_Clamp_Timer_Expired( void ) + APPLE_KEXT_DEPRECATED; +#endif /*! @function setIdleTimerPeriod @abstract Sets or changes the idle timer period. - @discussion A driver using the idleness determination provided by IOService calls its superclass with this method to set or change the idle timer period. See @link activityTickle activityTickle@/link for a description of this type of idleness determination. - @param period The desired idle timer period in seconds. - @result kIOReturnSuccess if successful. May return kIOReturnError if there was difficulty creating the timer event or the command queue. -*/ - virtual IOReturn setIdleTimerPeriod ( unsigned long ); + @discussion A driver using the idleness determination provided by IOService calls its superclass with this method to set or change the idle timer period. See @link activityTickle activityTickle@/link for a description of this type of idleness determination. + @param period The desired idle timer period in seconds. + @result kIOReturnSuccess upon success; an I/O Kit error code otherwise. */ + + virtual IOReturn setIdleTimerPeriod( unsigned long ); +#ifndef __LP64__ /*! @function getPMworkloop @abstract Returns a pointer to the system-wide power management work loop. - @discussion Most drivers should create their own work loops to synchronize their code; drivers should not run arbitrary code on the power management work loop. -*/ - virtual IOWorkLoop *getPMworkloop ( void ); + @availability Deprecated in Mac OS X version 10.6. + @discussion Most drivers should create their own work loops to synchronize their code; drivers should not run arbitrary code on the power management work loop. */ + + virtual IOWorkLoop * getPMworkloop( void ) + APPLE_KEXT_DEPRECATED; +#endif /*! @function getPowerState @abstract Determines a device's power state. @discussion A device's "current power state" is updated at the end of each power state transition (e.g. transition from state 1 to state 0, or state 0 to state 2). This transition includes the time spent powering on or off any power plane children. Thus, if a child calls getPowerState on its power parent during system wake from sleep, the call will return the index to the device's off state rather than its on state. - @result The current power state's index into the device's power state array. -*/ - UInt32 getPowerState(); - - -/* @function ack_timer_ticked - @discussion This call is handled internally by power management. It is not intended to be overridden or called by drivers. -*/ - void ack_timer_ticked ( void ); - -/* @function settleTimerExpired - @abstract For internal use only. -*/ - void settleTimerExpired ( void ); - -/* @function serializedAllowPowerChange2 - @discussion This call is handled internally by power management. It is not intended to be overridden or called by drivers. -*/ - IOReturn serializedAllowPowerChange2 ( unsigned long ); - -/* @function serializedCancelPowerChange2 - @discussion This call is handled internally by power management. It is not intended to be overridden or called by drivers. -*/ - IOReturn serializedCancelPowerChange2 ( unsigned long ); + @result The current power state's index into the device's power state array. */ + UInt32 getPowerState( void ); /*! @function setPowerState @abstract Requests a power managed driver to change the power state of its device. - @discussion A power managed driver must override setPowerState to take part in system power management. After a driver is registered with power management, the system uses setPowerState to power the device off and on for system sleep and wake. - - Calls to @link PMinit PMinit@/link and @link registerPowerDriver registerPowerDriver@/link enable power management to change a device's power state using setPowerState. - - setPowerState is called in a clean and separate thread context. - + Calls to @link PMinit PMinit@/link and @link registerPowerDriver registerPowerDriver@/link enable power management to change a device's power state using setPowerState. setPowerState is called in a clean and separate thread context. @param powerStateOrdinal The number in the power state array of the state the driver is being instructed to switch to. - @param whatDevice A pointer to the power management object which registered to manage power for this device. In most cases, whatDevice will be equal to your driver's own this pointer. - @result The driver must return IOPMAckImplied if it has complied with the request when it returns. Otherwise if it has started the process of changing power state but not finished it, the driver should return a number of microseconds which is an upper limit of the time it will need to finish. Then, when it has completed the power switch, it should call @link acknowledgeSetPowerState acknowledgeSetPowerState@/link. -*/ -virtual IOReturn setPowerState ( - unsigned long powerStateOrdinal, - IOService* whatDevice ); + @result The driver must return IOPMAckImplied if it has complied with the request when it returns. Otherwise if it has started the process of changing power state but not finished it, the driver should return a number of microseconds which is an upper limit of the time it will need to finish. Then, when it has completed the power switch, it should call @link acknowledgeSetPowerState acknowledgeSetPowerState@/link. */ + + virtual IOReturn setPowerState( + unsigned long powerStateOrdinal, + IOService * whatDevice ); +#ifndef __LP64__ /*! @function clampPowerOn - @abstract Deprecated. Do not use. - */ -virtual void clampPowerOn (unsigned long duration); + @abstract Deprecated. Do not use. */ -/*! @function maxCapabilityForDomainState - @abstract Determines a driver's highest power state possible for a given power domain state. - @discussion This happens when the power domain is changing state and power management needs to determine which state the device is capable of in the new domain state. + virtual void clampPowerOn( unsigned long duration ); +#endif +/*! @function maxCapabilityForDomainState + @abstract Determines a driver's highest power state possible for a given power domain state. + @discussion This happens when the power domain is changing state and power management needs to determine which state the device is capable of in the new domain state. Most drivers do not need to implement this method, and can rely upon the default IOService implementation. The IOService implementation scans the power state array looking for the highest state whose inputPowerRequirement field exactly matches the value of the domainState parameter. If more intelligent determination is required, the driver itself should implement the method and override the superclass's implementation. - - @param domainState Flags that describe the character of "domain power"; they represent the outputPowerCharacter field of a state in the power domain's power state array. + @param domainState Flags that describe the character of "domain power"; they represent the outputPowerCharacter field of a state in the power domain's power state array. + @result A state number. */ - @result A state number. - */ -virtual unsigned long maxCapabilityForDomainState ( - IOPMPowerFlags domainState ); + virtual unsigned long maxCapabilityForDomainState( IOPMPowerFlags domainState ); /*! @function initialPowerStateForDomainState @abstract Determines which power state a device is in, given the current power domain state. @discussion Power management calls this method once, when the driver is initializing power management. + Most drivers do not need to implement this method, and can rely upon the default IOService implementation. The IOService implementation scans the power state array looking for the highest state whose inputPowerRequirement field exactly matches the value of the domainState parameter. If more intelligent determination is required, the power managed driver should implement the method and override the superclass's implementation. + @param domainState Flags that describe the character of "domain power"; they represent the outputPowerCharacter field of a state in the power domain's power state array. + @result A state number. */ - Most drivers do not need to implement this method, and can rely upon the default IOService implementation. The IOService implementation scans the power state array looking for the highest state whose inputPowerRequirement field exactly matches the value of the domainState parameter. If more intelligent determination is required, the power managed driver should implement the method and override the superclass's implementation. - - @param domainState Flags that describe the character of "domain power"; they represent the outputPowerCharacter field of a state in the power domain's power state array. - @result A state number. -*/ -virtual unsigned long initialPowerStateForDomainState ( - IOPMPowerFlags domainState); + virtual unsigned long initialPowerStateForDomainState( IOPMPowerFlags domainState ); /*! @function powerStateForDomainState - @abstract Determines what power state the device would be in for a given power domain state. - @discussion Power management calls a driver with this method to find out what power state the device would be in for a given power domain state. This happens when the power domain is changing state and power management needs to determine the effect of the change. - + @abstract Determines what power state the device would be in for a given power domain state. + @discussion Power management calls a driver with this method to find out what power state the device would be in for a given power domain state. This happens when the power domain is changing state and power management needs to determine the effect of the change. Most drivers do not need to implement this method, and can rely upon the default IOService implementation. The IOService implementation scans the power state array looking for the highest state whose inputPowerRequirement field exactly matches the value of the domainState parameter. If more intelligent determination is required, the power managed driver should implement the method and override the superclass's implementation. + @param domainState Flags that describe the character of "domain power"; they represent the outputPowerCharacter field of a state in the power domain's power state array. + @result A state number. */ - @param domainState Flags that describe the character of "domain power"; they represent the outputPowerCharacter field of a state in the power domain's power state array. - - @result A state number. -*/ -virtual unsigned long powerStateForDomainState ( IOPMPowerFlags domainState ); + virtual unsigned long powerStateForDomainState( IOPMPowerFlags domainState ); /*! @function powerStateWillChangeTo @abstract Informs interested parties that a device is about to change its power state. @discussion Power management informs interested parties that a device is about to change to a different power state. Interested parties are those that have registered for this notification via @link registerInterestedDriver registerInterestedDriver@/link. If you have called registerInterestedDriver on a power managed driver, you must implement powerStateWillChangeTo and @link powerStateDidChangeTo powerStateDidChangeTo@/link to receive the notifications. + powerStateWillChangeTo is called in a clean and separate thread context. powerStateWillChangeTo is called before a power state transition takes place; powerStateDidChangeTo is called after the transition has completed. + @param capabilities Flags that describe the capability of the device in the new power state (they come from the capabilityFlags field of the new state in the power state array). + @param stateNumber The number of the state in the state array that the device is switching to. + @param whatDevice A pointer to the driver that is changing. It can be used by a driver that is receiving power state change notifications for multiple devices to distinguish between them. + @result The driver returns IOPMAckImplied if it has prepared for the power change when it returns. If it has started preparing but not finished, it should return a number of microseconds which is an upper limit of the time it will need to finish preparing. Then, when it has completed its preparations, it should call @link acknowledgePowerChange acknowledgePowerChange@/link. */ - powerStateWillChangeTo is called in a clean and separate thread context. - - powerStateWillChangeTo is called before a power state transition takes place; powerStateDidChangeTo is called after the transition has completed. - - @param capabilities Flags that describe the capability of the device in the new power state (they come from the capabilityFlags field of the new state in the power state array). - @param stateNumber The number of the state in the state array that the device is switching to. - @param whatDevice A pointer to the driver that is changing. It can be used by a driver that is receiving power state change notifications for multiple devices to distinguish between them. - @result The driver returns IOPMAckImplied if it has prepared for the power change when it returns. If it has started preparing but not finished, it should return a number of microseconds which is an upper limit of the time it will need to finish preparing. Then, when it has completed its preparations, it should call @link acknowledgePowerChange acknowledgePowerChange@/link. -*/ -virtual IOReturn powerStateWillChangeTo ( - IOPMPowerFlags capabilities, - unsigned long stateNumber, - IOService* whatDevice); + virtual IOReturn powerStateWillChangeTo( + IOPMPowerFlags capabilities, + unsigned long stateNumber, + IOService * whatDevice ); /*! @function powerStateDidChangeTo @abstract Informs interested parties that a device has changed to a different power state. @discussion Power management informs interested parties that a device has changed to a different power state. Interested parties are those that have registered for this notification via @link registerInterestedDriver registerInterestedDriver@/link. If you have called registerInterestedDriver on a power managed driver, you must implemnt @link powerStateWillChangeTo powerStateWillChangeTo@/link and powerStateDidChangeTo to receive the notifications. - - powerStateDidChangeTo is called in a clean and separate thread context. - - powerStateWillChangeTo is called before a power state transition takes place; powerStateDidChangeTo is called after the transition has completed. - - @param capabilities Flags that describe the capability of the device in the new power state (they come from the capabilityFlags field of the new state in the power state array). - @param stateNumber The number of the state in the state array that the device is switching to. - @param whatDevice A pointer to the driver that is changing. It can be used by a driver that is receiving power state change notifications for multiple devices to distinguish between them. - @result The driver returns IOPMAckImplied if it has prepared for the power change when it returns. If it has started preparing but not finished, it should return a number of microseconds which is an upper limit of the time it will need to finish preparing. Then, when it has completed its preparations, it should call @link acknowledgePowerChange acknowledgePowerChange@/link. -*/ -virtual IOReturn powerStateDidChangeTo ( - IOPMPowerFlags capabilities, - unsigned long stateNumber, - IOService* whatDevice); - + powerStateDidChangeTo is called in a clean and separate thread context. powerStateWillChangeTo is called before a power state transition takes place; powerStateDidChangeTo is called after the transition has completed. + @param capabilities Flags that describe the capability of the device in the new power state (they come from the capabilityFlags field of the new state in the power state array). + @param stateNumber The number of the state in the state array that the device is switching to. + @param whatDevice A pointer to the driver that is changing. It can be used by a driver that is receiving power state change notifications for multiple devices to distinguish between them. + @result The driver returns IOPMAckImplied if it has prepared for the power change when it returns. If it has started preparing but not finished, it should return a number of microseconds which is an upper limit of the time it will need to finish preparing. Then, when it has completed its preparations, it should call @link acknowledgePowerChange acknowledgePowerChange@/link. */ + + virtual IOReturn powerStateDidChangeTo( + IOPMPowerFlags capabilities, + unsigned long stateNumber, + IOService * whatDevice ); + +#ifndef __LP64__ /*! @function didYouWakeSystem - @abstract Asks a driver if its device is the one that just woke the system from sleep. + @abstract Asks a driver if its device is the one that just woke the system from sleep. + @availability Deprecated in Mac OS X version 10.6. @discussion Power management calls a power managed driver with this method to ask if its device is the one that just woke the system from sleep. If a device is capable of waking the system from sleep, its driver should implement didYouWakeSystem and return true if its device was responsible for waking the system. - @result true if the driver's device did wake the system and false if it didn't. -*/ -virtual bool didYouWakeSystem ( void ); + @result true if the driver's device woke the system and false otherwise. */ + + virtual bool didYouWakeSystem( void ) + APPLE_KEXT_DEPRECATED; /*! @function newTemperature - @abstract (Deprecated. Do not use.) Tells a power managed driver that the temperature in the thermal zone has changed. - @discussion A thermal-zone driver calls a power managed driver with this method to tell it that the temperature in the zone has changed. This method is not intended to be overridden or called by drivers. This method is deprecated. -*/ -virtual IOReturn newTemperature ( long currentTemp, IOService * whichZone ); + @abstract Tells a power managed driver that the temperature in the thermal zone has changed. + @discussion This call is unused by power management. It is not intended to be called or overridden. */ - virtual bool askChangeDown ( unsigned long ); - virtual bool tellChangeDown ( unsigned long ); - bool tellChangeDown1 ( unsigned long ); - bool tellChangeDown2 ( unsigned long ); - virtual void tellNoChangeDown ( unsigned long ); - virtual void tellChangeUp ( unsigned long ); - virtual IOReturn allowPowerChange ( unsigned long refcon ); - virtual IOReturn cancelPowerChange ( unsigned long refcon ); + virtual IOReturn newTemperature( long currentTemp, IOService * whichZone ) + APPLE_KEXT_DEPRECATED; +#endif + virtual bool askChangeDown( unsigned long ); + virtual bool tellChangeDown( unsigned long ); + virtual void tellNoChangeDown ( unsigned long ); + virtual void tellChangeUp( unsigned long ); + virtual IOReturn allowPowerChange( unsigned long refcon ); + virtual IOReturn cancelPowerChange( unsigned long refcon ); - protected: +protected: /*! @function changePowerStateToPriv @abstract Tells a driver's superclass to change the power state of its device. - @discussion A driver uses this method to tell its superclass to change the power state of the device. This is the recommended way to change the power state of a device. - - Three things affect driver power state: @link changePowerStateTo changePowerStateTo@/link, changePowerStateToPriv, and the desires of the driver's power plane children. Power management puts the device into the maximum state governed by those three entities. - + @discussion A driver uses this method to tell its superclass to change the power state of the device. This is the recommended way to change the power state of a device. + Three things affect driver power state: @link changePowerStateTo changePowerStateTo@/link, changePowerStateToPriv, and the desires of the driver's power plane children. Power management puts the device into the maximum state governed by those three entities. Drivers may eliminate the influence of the changePowerStateTo method on power state one of two ways. See @link powerOverrideOnPriv powerOverrideOnPriv@/link to ignore the method's influence, or call changePowerStateTo(0) in the driver's start routine to remove the changePowerStateTo method's power request. - - @param ordinal The number of the desired power state in the power state array. - @result A return code that can be ignored by the caller. -*/ - IOReturn changePowerStateToPriv ( unsigned long ordinal ); + @param ordinal The number of the desired power state in the power state array. + @result A return code that can be ignored by the caller. */ + + IOReturn changePowerStateToPriv( unsigned long ordinal ); /*! @function powerOverrideOnPriv @abstract Allows a driver to ignore its children's power management requests and only use changePowerStateToPriv to define its own power state. + @discussion Power management normally keeps a device at the highest state required by its requests via @link changePowerStateTo changePowerStateTo@/link, @link changePowerStateToPriv changePowerStateToPriv@/link, and its children. However, a driver may ensure a lower power state than otherwise required by itself and its children using powerOverrideOnPriv. When the override is on, power management keeps the device's power state in the state specified by changePowerStateToPriv. Turning on the override will initiate a power change if the driver's changePowerStateToPriv desired power state is different from the maximum of the changePowerStateTo desired power state and the children's desires. + @result A return code that can be ignored by the caller. */ - @discussion Power management normally keeps a device at the highest state required by its requests via @link changePowerStateTo changePowerStateTo@/link, @link changePowerStateToPriv changePowerStateToPriv@/link, and its children. However, a driver may ensure a lower power state than otherwise required by itself and its children using powerOverrideOnPriv. - - When the override is on, power management keeps the device's power state in the state specified by changePowerStateToPriv. - - Turning on the override will initiate a power change if the driver's changePowerStateToPriv desired power state is different from the maximum of the changePowerStateTo desired power state and the children's desires. - - @result A return code that can be ignored by the caller. - -*/ - IOReturn powerOverrideOnPriv ( void ); + IOReturn powerOverrideOnPriv( void ); /*! @function powerOverrideOffPriv @abstract Allows a driver to disable a power override. + @discussion When a driver has enabled an override via @link powerOverrideOnPriv powerOverrideOnPriv@/link, it can disable it again by calling this method in its superclass. Disabling the override reverts to the default algorithm for determining a device's power state. The superclass will now keep the device at the highest state required by changePowerStateTo, changePowerStateToPriv, and its children. Turning off the override will initiate a power change if the driver's desired power state is different from the maximum of the power managed driver's desire and the children's desires. + @result A return code that can be ignored by the caller. */ - @discussion When a driver has enabled an override via @link powerOverrideOnPriv powerOverrideOnPriv@/link, it can disable it again by calling this method in its superclass. Disabling the override reverts to the default algorithm for determining a device's power state. The superclass will now keep the device at the highest state required by changePowerStateTo, changePowerStateToPriv, and its children. - - Turning off the override will initiate a power change if the driver's desired power state is different from the maximum of the power managed driver's desire and the children's desires. - - @result A return code that can be ignored by the caller. -*/ - IOReturn powerOverrideOffPriv ( void ); + IOReturn powerOverrideOffPriv( void ); - /*! @function powerChangeDone - @abstract Tells a driver when a power change is complete. - - @discussion Power management uses this method to call into a driver when a power change is completely done, when all interested parties have acknowledged the @link powerStateDidChangeTo powerStateDidChangeTo@/link call. The default implementation of this method is null; the method is meant to be overridden by subclassed power managed drivers. A driver should use this method to find out if a power change it initiated is complete. - @param stateNumber The number of the state in the state array that the device has switched from. -*/ - virtual void powerChangeDone ( unsigned long stateNumber); - - bool tellClientsWithResponse ( int messageType ); - void tellClients ( int messageType ); +/*! @function powerChangeDone + @abstract Tells a driver when a power state change is complete. + @discussion Power management uses this method to inform a driver when a power change is completely done, when all interested parties have acknowledged the @link powerStateDidChangeTo powerStateDidChangeTo@/link call. The default implementation of this method is null; the method is meant to be overridden by subclassed power managed drivers. A driver should use this method to find out if a power change it initiated is complete. + @param stateNumber The number of the state in the state array that the device has switched from. */ + + virtual void powerChangeDone( unsigned long stateNumber ); +#ifdef XNU_KERNEL_PRIVATE + /* Power management internals */ +public: + void settleTimerExpired( void ); + IOReturn synchronizePowerTree( void ); + bool assertPMThreadCall( void ); + void deassertPMThreadCall( void ); + +#ifdef __LP64__ + static IOWorkLoop * getPMworkloop( void ); +#endif + +protected: + bool tellClientsWithResponse( int messageType ); + bool tellClientsWithResponse( int messageType, bool (*)(OSObject *, void *) ); + void tellClients( int messageType ); + void tellClients( int messageType, bool (*)(OSObject *, void *) ); + IOReturn changePowerStateWithOverrideTo( unsigned long ordinal ); private: - IOReturn enqueuePowerChange ( unsigned long, unsigned long, unsigned long, IOPowerConnection *, unsigned long ); +#ifndef __LP64__ + void ack_timer_ticked ( void ); + IOReturn serializedAllowPowerChange2 ( unsigned long ); + IOReturn serializedCancelPowerChange2 ( unsigned long ); + IOReturn powerDomainWillChangeTo( IOPMPowerFlags, IOPowerConnection * ); + IOReturn powerDomainDidChangeTo( IOPMPowerFlags, IOPowerConnection * ); +#endif + void PMfree( void ); + bool tellChangeDown1 ( unsigned long ); + bool tellChangeDown2 ( unsigned long ); + IOReturn startPowerChange ( unsigned long, unsigned long, unsigned long, IOPowerConnection *, unsigned long ); void setParentInfo ( IOPMPowerFlags, IOPowerConnection *, bool ); - IOReturn notifyAll ( bool is_prechange ); + IOReturn notifyAll ( int nextMachineState, bool is_prechange ); bool notifyChild ( IOPowerConnection * nextObject, bool is_prechange ); // power change initiated by driver + void OurChangeStart( void ); void OurChangeTellClientsPowerDown ( void ); void OurChangeTellPriorityClientsPowerDown ( void ); void OurChangeNotifyInterestedDriversWillChange ( void ); @@ -1718,27 +1766,27 @@ virtual IOReturn newTemperature ( long currentTemp, IOService * whichZone ); void OurChangeWaitForPowerSettle ( void ); void OurChangeNotifyInterestedDriversDidChange ( void ); void OurChangeFinish ( void ); - + void OurSyncStart ( void ); + // downward power change initiated by a power parent + IOReturn ParentChangeStart( void ); void ParentDownTellPriorityClientsPowerDown ( void ); void ParentDownNotifyInterestedDriversWillChange ( void ); void ParentDownNotifyDidChangeAndAcknowledgeChange ( void ); void ParentDownSetPowerState ( void ); void ParentDownWaitForPowerSettle ( void ); - void ParentDownAcknowledgeChange ( void ); + void ParentAcknowledgePowerChange ( void ); // upward power change initiated by a power parent void ParentUpSetPowerState ( void ); void ParentUpWaitForSettleTime ( void ); void ParentUpNotifyInterestedDriversDidChange ( void ); - void ParentUpAcknowledgePowerChange ( void ); void all_done ( void ); void start_ack_timer ( void ); void stop_ack_timer ( void ); unsigned long compute_settle_time ( void ); IOReturn startSettleTimer ( unsigned long delay ); - IOReturn changeState ( void ); IOReturn ask_parent ( unsigned long requestedState ); bool checkForDone ( void ); bool responseValid ( unsigned long x, int pid ); @@ -1748,7 +1796,7 @@ virtual IOReturn newTemperature ( long currentTemp, IOService * whichZone ); static void ack_timer_expired( thread_call_param_t, thread_call_param_t ); static IOReturn actionAckTimerExpired(OSObject *, void *, void *, void *, void * ); static IOReturn actionDriverCalloutDone(OSObject *, void *, void *, void *, void * ); - static IOPMRequest * acquirePMRequest( IOService * target, UInt32 type ); + static IOPMRequest * acquirePMRequest( IOService * target, IOOptionBits type, IOPMRequest * active = 0 ); static void releasePMRequest( IOPMRequest * request ); static void pmDriverCallout( IOService * from ); static void pmTellClientWithResponse( OSObject * object, void * context ); @@ -1757,18 +1805,18 @@ virtual IOReturn newTemperature ( long currentTemp, IOService * whichZone ); void addPowerChild1( IOPMRequest * request ); void addPowerChild2( IOPMRequest * request ); void addPowerChild3( IOPMRequest * request ); - void adjustPowerState( void ); + void adjustPowerState( uint32_t clamp = 0 ); void start_ack_timer( UInt32 value, UInt32 scale ); void handlePMstop( IOPMRequest * request ); void handleRegisterPowerDriver( IOPMRequest * request ); bool handleAcknowledgePowerChange( IOPMRequest * request ); void handlePowerDomainWillChangeTo( IOPMRequest * request ); void handlePowerDomainDidChangeTo( IOPMRequest * request ); - void handleMakeUsable( IOPMRequest * request ); - void handleChangePowerStateTo( IOPMRequest * request ); - void handleChangePowerStateToPriv( IOPMRequest * request ); + void handleRequestPowerState( IOPMRequest * request ); void handlePowerOverrideChanged( IOPMRequest * request ); + void handleActivityTickle( IOPMRequest * request ); void handleInterestChanged( IOPMRequest * request ); + void handleSynchronizePowerTree( IOPMRequest * request ); void submitPMRequest( IOPMRequest * request ); void submitPMRequest( IOPMRequest ** request, IOItemCount count ); void executePMRequest( IOPMRequest * request ); @@ -1776,7 +1824,7 @@ virtual IOReturn newTemperature ( long currentTemp, IOService * whichZone ); bool retirePMRequest( IOPMRequest * request, IOPMWorkQueue * queue ); bool servicePMRequestQueue( IOPMRequest * request, IOPMRequestQueue * queue ); bool servicePMReplyQueue( IOPMRequest * request, IOPMRequestQueue * queue ); - bool servicePMFreeQueue( IOPMRequest * request, IOPMRequestQueue * queue ); + bool servicePMFreeQueue( IOPMRequest * request, IOPMCompletionQueue * queue ); bool notifyInterestedDrivers( void ); void notifyInterestedDriversDone( void ); bool notifyControllingDriver( void ); @@ -1784,11 +1832,15 @@ virtual IOReturn newTemperature ( long currentTemp, IOService * whichZone ); void driverSetPowerState( void ); void driverInformPowerChange( void ); bool isPMBlocked( IOPMRequest * request, int count ); - void start_our_change( const changeNoteItem * changeNote ); - IOReturn start_parent_change( const changeNoteItem * changeNote ); void notifyChildren( void ); void notifyChildrenDone( void ); void cleanClientResponses ( bool logErrors ); + void idleTimerExpired( IOTimerEventSource * ); + void updatePowerClient( const OSSymbol * client, uint32_t powerState ); + void removePowerClient( const OSSymbol * client ); + uint32_t getPowerStateForClient( const OSSymbol * client ); + IOReturn requestPowerState( const OSSymbol * client, uint32_t state ); +#endif /* XNU_KERNEL_PRIVATE */ }; #endif /* ! _IOKIT_IOSERVICE_H */ diff --git a/iokit/IOKit/IOServicePM.h b/iokit/IOKit/IOServicePM.h index 940a75160..96edc11c0 100644 --- a/iokit/IOKit/IOServicePM.h +++ b/iokit/IOKit/IOServicePM.h @@ -29,36 +29,33 @@ #ifndef _IOKIT_IOSERVICEPM_H #define _IOKIT_IOSERVICEPM_H -#include -#include #include -extern "C" { -#include -} - class IOService; class IOServicePM; class IOPowerConnection; -class IOPMinformee; -class IOPMinformeeList; class IOWorkLoop; class IOCommandGate; class IOTimerEventSource; class IOPlatformExpert; + +#ifdef XNU_KERNEL_PRIVATE +class IOPMinformee; +class IOPMinformeeList; class IOPMWorkQueue; class IOPMRequest; class IOPMRequestQueue; -struct changeNoteItem; +class IOPMCompletionQueue; -/* DEPRECATED */ -/*! @class IOPMprot - @abstract Protected power management instance variables for IOService objects. - @availability Mac OS X version 10.0. Deprecated in version 10.5. - @discussion IOPMprot is deprecated. Do not use it in any new code. - - Call IOService::getPowerState to query the current power state rather than access myCurrentState. -*/ +/* Binary compatibility with drivers that access pm_vars */ +#ifdef __LP64__ +#define PM_VARS_SUPPORT 0 +#else +#define PM_VARS_SUPPORT 1 +#endif + +#if PM_VARS_SUPPORT +/* Deprecated in version 10.5 */ class IOPMprot : public OSObject { friend class IOService; @@ -66,50 +63,16 @@ class IOPMprot : public OSObject OSDeclareDefaultStructors(IOPMprot) public: - /*! @var ourName - From getName(), used in logging. - */ const char * ourName; - - /*! @var thePlatform - From getPlatform, used in logging and registering. - */ IOPlatformExpert * thePlatform; - - /*! @var theNumberOfPowerStates - The number of states in the array. - */ unsigned long theNumberOfPowerStates; - - /*! @var thePowerStates - The array. - */ IOPMPowerState thePowerStates[IOPMMaxPowerStates]; - - /*! @var theControllingDriver - Points to the controlling driver. - */ IOService * theControllingDriver; - - /*! @var aggressiveness - Current value of power management aggressiveness. - */ unsigned long aggressiveness; - - /*! @var current_aggressiveness_values - Array of aggressiveness values. - */ - unsigned long current_aggressiveness_values [kMaxType+1]; - - /*! @var current_aggressiveness_validity - True for values that are currently valid. - */ - bool current_aggressiveness_valid [kMaxType+1]; - - /*! @var myCurrentState - The ordinal of our current power state. - */ + unsigned long current_aggressiveness_values[kMaxType+1]; + bool current_aggressiveness_valid[kMaxType+1]; unsigned long myCurrentState; }; - +#endif /* PM_VARS_SUPPORT */ +#endif /* XNU_KERNEL_PRIVATE */ #endif /* !_IOKIT_IOSERVICEPM_H */ diff --git a/iokit/IOKit/IOSubMemoryDescriptor.h b/iokit/IOKit/IOSubMemoryDescriptor.h new file mode 100644 index 000000000..0093ea32f --- /dev/null +++ b/iokit/IOKit/IOSubMemoryDescriptor.h @@ -0,0 +1,109 @@ +/* + * Copyright (c) 1998-2000 Apple Computer, Inc. All rights reserved. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. The rights granted to you under the License + * may not be used to create, or enable the creation or redistribution of, + * unlawful or unlicensed copies of an Apple operating system, or to + * circumvent, violate, or enable the circumvention or violation of, any + * terms of an Apple operating system software license agreement. + * + * Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ + */ + +#ifndef _IOSUBMEMORYDESCRIPTOR_H +#define _IOSUBMEMORYDESCRIPTOR_H + +#include + +/*! @class IOSubMemoryDescriptor : public IOMemoryDescriptor + @abstract The IOSubMemoryDescriptor object describes a memory area made up of a portion of another IOMemoryDescriptor. + @discussion The IOSubMemoryDescriptor object represents a subrange of memory, specified as a portion of another IOMemoryDescriptor. */ + +class IOSubMemoryDescriptor : public IOMemoryDescriptor +{ + OSDeclareDefaultStructors(IOSubMemoryDescriptor); + +protected: + IOMemoryDescriptor * _parent; + IOByteCount _start; + + virtual void free(); + +public: +/*! @function withSubRange + @abstract Create an IOMemoryDescriptor to describe a subrange of an existing descriptor. + @discussion This method creates and initializes an IOMemoryDescriptor for memory consisting of a subrange of the specified memory descriptor. The parent memory descriptor is retained by the new descriptor. + @param of The parent IOMemoryDescriptor of which a subrange is to be used for the new descriptor, which will be retained by the subrange IOMemoryDescriptor. + @param offset A byte offset into the parent memory descriptor's memory. + @param length The length of the subrange. + @param options + kIOMemoryDirectionMask (options:direction) This nibble indicates the I/O direction to be associated with the descriptor, which may affect the operation of the prepare and complete methods on some architectures. + @result The created IOMemoryDescriptor on success, to be released by the caller, or zero on failure. */ + + static IOSubMemoryDescriptor * withSubRange(IOMemoryDescriptor *of, + IOByteCount offset, + IOByteCount length, + IOOptionBits options); + + /* + * Initialize or reinitialize an IOSubMemoryDescriptor to describe + * a subrange of an existing descriptor. + * + * An IOSubMemoryDescriptor can be re-used by calling initSubRange + * again on an existing instance -- note that this behavior is not + * commonly supported in other IOKit classes, although it is here. + */ + virtual bool initSubRange( IOMemoryDescriptor * parent, + IOByteCount offset, IOByteCount length, + IODirection withDirection ); + + /* + * IOMemoryDescriptor required methods + */ + + virtual addr64_t getPhysicalSegment( IOByteCount offset, + IOByteCount * length, + IOOptionBits options = 0 ); + + virtual IOReturn prepare(IODirection forDirection = kIODirectionNone); + + virtual IOReturn complete(IODirection forDirection = kIODirectionNone); + +#ifdef __LP64__ + virtual +#endif /* __LP64__ */ + IOReturn redirect( task_t safeTask, bool redirect ); + + virtual IOReturn setPurgeable( IOOptionBits newState, + IOOptionBits * oldState ); + + // support map() on kIOMemoryTypeVirtual without prepare() + virtual IOMemoryMap * makeMapping( + IOMemoryDescriptor * owner, + task_t intoTask, + IOVirtualAddress atAddress, + IOOptionBits options, + IOByteCount offset, + IOByteCount length ); + + virtual uint64_t getPreparationID( void ); + +}; + +#endif /* !_IOSUBMEMORYDESCRIPTOR_H */ diff --git a/iokit/IOKit/IOSyncer.h b/iokit/IOKit/IOSyncer.h index f2d09e267..f6dfce383 100644 --- a/iokit/IOKit/IOSyncer.h +++ b/iokit/IOKit/IOSyncer.h @@ -32,8 +32,6 @@ #include #include -#define DEPRECATED __attribute__((deprecated)) - class IOSyncer : public OSObject { OSDeclareDefaultStructors(IOSyncer) @@ -49,17 +47,17 @@ class IOSyncer : public OSObject public: static IOSyncer * create(bool twoRetains = true) - DEPRECATED; + APPLE_KEXT_DEPRECATED; virtual bool init(bool twoRetains) - DEPRECATED; + APPLE_KEXT_DEPRECATED; virtual void reinit() - DEPRECATED; + APPLE_KEXT_DEPRECATED; virtual IOReturn wait(bool autoRelease = true) - DEPRECATED; + APPLE_KEXT_DEPRECATED; virtual void signal(IOReturn res = kIOReturnSuccess, bool autoRelease = true) - DEPRECATED; + APPLE_KEXT_DEPRECATED; }; #endif /* !_IOSYNCER */ diff --git a/iokit/IOKit/IOTimeStamp.h b/iokit/IOKit/IOTimeStamp.h index b900cdfdd..b1b09057f 100644 --- a/iokit/IOKit/IOTimeStamp.h +++ b/iokit/IOKit/IOTimeStamp.h @@ -32,24 +32,24 @@ static inline void IOTimeStampStartConstant(unsigned int csc, - unsigned int a = 0, unsigned int b = 0, - unsigned int c = 0, unsigned int d = 0) + uintptr_t a = 0, uintptr_t b = 0, + uintptr_t c = 0, uintptr_t d = 0) { KERNEL_DEBUG_CONSTANT(csc | DBG_FUNC_START, a, b, c, d, 0); } static inline void -IOTimeStampEndConstant(unsigned int csc, - unsigned int a = 0, unsigned int b = 0, - unsigned int c = 0, unsigned int d = 0) +IOTimeStampEndConstant(uintptr_t csc, + uintptr_t a = 0, uintptr_t b = 0, + uintptr_t c = 0, uintptr_t d = 0) { KERNEL_DEBUG_CONSTANT(csc | DBG_FUNC_END, a, b, c, d, 0); } static inline void -IOTimeStampConstant(unsigned int csc, - unsigned int a = 0, unsigned int b = 0, - unsigned int c = 0, unsigned int d = 0) +IOTimeStampConstant(uintptr_t csc, + uintptr_t a = 0, uintptr_t b = 0, + uintptr_t c = 0, uintptr_t d = 0) { KERNEL_DEBUG_CONSTANT(csc | DBG_FUNC_NONE, a, b, c, d, 0); } @@ -57,25 +57,25 @@ IOTimeStampConstant(unsigned int csc, #if KDEBUG static inline void -IOTimeStampStart(unsigned int csc, - unsigned int a = 0, unsigned int b = 0, - unsigned int c = 0, unsigned int d = 0) +IOTimeStampStart(uintptr_t csc, + uintptr_t a = 0, uintptr_t b = 0, + uintptr_t c = 0, uintptr_t d = 0) { KERNEL_DEBUG(csc | DBG_FUNC_START, a, b, c, d, 0); } static inline void -IOTimeStampEnd(unsigned int csc, - unsigned int a = 0, unsigned int b = 0, - unsigned int c = 0, unsigned int d = 0) +IOTimeStampEnd(uintptr_t csc, + uintptr_t a = 0, uintptr_t b = 0, + uintptr_t c = 0, uintptr_t d = 0) { KERNEL_DEBUG(csc | DBG_FUNC_END, a, b, c, d, 0); } static inline void -IOTimeStamp(unsigned int csc, - unsigned int a = 0, unsigned int b = 0, - unsigned int c = 0, unsigned int d = 0) +IOTimeStamp(uintptr_t csc, + uintptr_t a = 0, uintptr_t b = 0, + uintptr_t c = 0, uintptr_t d = 0) { KERNEL_DEBUG(csc | DBG_FUNC_NONE, a, b, c, d, 0); } @@ -114,6 +114,7 @@ IOTimeStamp(unsigned int csc, #define IODBG_MCURS(code) (KDBG_CODE(DBG_IOKIT, DBG_IOMCURS, code)) #define IODBG_MDESC(code) (KDBG_CODE(DBG_IOKIT, DBG_IOMDESC, code)) #define IODBG_POWER(code) (KDBG_CODE(DBG_IOKIT, DBG_IOPOWER, code)) +#define IODBG_IOSERVICE(code) (KDBG_CODE(DBG_IOKIT, DBG_IOSERVICE, code)) /* IOKit specific codes - within each subclass */ @@ -165,4 +166,24 @@ IOTimeStamp(unsigned int csc, /* DBG_IOKIT/DBG_IOPOWER codes */ // See IOKit/pwr_mgt/IOPMlog.h for the power management codes +/* DBG_IOKIT/DBG_IOSERVICE codes */ +#define IOSERVICE_BUSY 1 /* 0x05080004 */ +#define IOSERVICE_NONBUSY 2 /* 0x05080008 */ +#define IOSERVICE_MODULESTALL 3 /* 0x0508000C */ +#define IOSERVICE_MODULEUNSTALL 4 /* 0x05080010 */ + +#define IOSERVICE_TERMINATE_PHASE1 5 /* 0x05080014 */ +#define IOSERVICE_TERMINATE_REQUEST_OK 6 /* 0x05080018 */ +#define IOSERVICE_TERMINATE_REQUEST_FAIL 7 /* 0x0508001C */ +#define IOSERVICE_TERMINATE_SCHEDULE_STOP 8 /* 0x05080020 */ +#define IOSERVICE_TERMINATE_SCHEDULE_FINALIZE 9 /* 0x05080024 */ +#define IOSERVICE_TERMINATE_WILL 10 /* 0x05080028 */ +#define IOSERVICE_TERMINATE_DID 11 /* 0x0508002C */ +#define IOSERVICE_TERMINATE_DID_DEFER 12 /* 0x05080030 */ +#define IOSERVICE_TERMINATE_FINALIZE 13 /* 0x05080034 */ +#define IOSERVICE_TERMINATE_STOP 14 /* 0x05080038 */ +#define IOSERVICE_TERMINATE_STOP_NOP 15 /* 0x0508003C */ +#define IOSERVICE_TERMINATE_STOP_DEFER 16 /* 0x05080040 */ +#define IOSERVICE_TERMINATE_DONE 17 /* 0x05080044 */ + #endif /* ! IOKIT_IOTIMESTAMP_H */ diff --git a/iokit/IOKit/IOTimerEventSource.h b/iokit/IOKit/IOTimerEventSource.h index cedfaa40d..7cc0d38c3 100644 --- a/iokit/IOKit/IOTimerEventSource.h +++ b/iokit/IOKit/IOTimerEventSource.h @@ -159,11 +159,10 @@ class IOTimerEventSource : public IOEventSource virtual IOReturn setTimeout(UInt32 interval, UInt32 scale_factor = kNanosecondScale); -/*! @function setTimeout - @abstract Setup a callback at after the delay in decrementer ticks. See wakeAtTime(AbsoluteTime). - @param interval Delay from now to wake up. - @result kIOReturnSuccess if everything is fine, kIOReturnNoResources if action hasn't been declared. */ - virtual IOReturn setTimeout(mach_timespec_t interval); +#if !defined(__LP64__) + virtual IOReturn setTimeout(mach_timespec_t interval) + APPLE_KEXT_DEPRECATED; +#endif /*! @function setTimeout @abstract Setup a callback at after the delay in decrementer ticks. See wakeAtTime(AbsoluteTime). @@ -197,11 +196,10 @@ class IOTimerEventSource : public IOEventSource virtual IOReturn wakeAtTime(UInt32 abstime, UInt32 scale_factor = kNanosecondScale); -/*! @function wakeAtTime - @abstract Setup a callback at this absolute time. See wakeAtTime(AbsoluteTime). - @param abstime mach_timespec_t of the desired callout time. - @result kIOReturnSuccess if everything is fine, kIOReturnNoResources if action hasn't been declared. */ - virtual IOReturn wakeAtTime(mach_timespec_t abstime); +#if !defined(__LP64__) + virtual IOReturn wakeAtTime(mach_timespec_t abstime) + APPLE_KEXT_DEPRECATED; +#endif /*! @function wakeAtTime @abstract Setup a callback at this absolute time. diff --git a/iokit/IOKit/IOTypes.h b/iokit/IOKit/IOTypes.h index 0dd2dc1ce..9f5d5a3f7 100644 --- a/iokit/IOKit/IOTypes.h +++ b/iokit/IOKit/IOTypes.h @@ -75,53 +75,66 @@ typedef UInt32 IOVersion; typedef UInt32 IOItemCount; typedef UInt32 IOCacheMode; -typedef UInt32 IOByteCount; +typedef UInt32 IOByteCount32; +typedef UInt64 IOByteCount64; - /* LP64todo - these will need to expand to mach_vm_address_t */ -typedef vm_address_t IOVirtualAddress; -typedef IOVirtualAddress IOLogicalAddress; +typedef UInt32 IOPhysicalAddress32; +typedef UInt64 IOPhysicalAddress64; +typedef UInt32 IOPhysicalLength32; +typedef UInt64 IOPhysicalLength64; -#if 0 +#ifdef __LP64__ +typedef mach_vm_address_t IOVirtualAddress; +#else +typedef vm_address_t IOVirtualAddress; +#endif + +#if defined(__LP64__) && defined(KERNEL) +typedef IOByteCount64 IOByteCount; +#else +typedef IOByteCount32 IOByteCount; +#endif -typedef UInt64 IOPhysicalAddress; -typedef UInt64 IOPhysicalLength; +typedef IOVirtualAddress IOLogicalAddress; + +#if defined(__LP64__) && defined(KERNEL) + +typedef IOPhysicalAddress64 IOPhysicalAddress; +typedef IOPhysicalLength64 IOPhysicalLength; #define IOPhysical32( hi, lo ) ((UInt64) lo + ((UInt64)(hi) << 32)) #define IOPhysSize 64 #else -typedef UInt32 IOPhysicalAddress; -typedef UInt32 IOPhysicalLength; +typedef IOPhysicalAddress32 IOPhysicalAddress; +typedef IOPhysicalLength32 IOPhysicalLength; #define IOPhysical32( hi, lo ) (lo) #define IOPhysSize 32 #endif -#if __cplusplus -struct IOVirtualRange +typedef struct { - IOVirtualAddress address; + IOPhysicalAddress address; IOByteCount length; -}; -struct IOAddressRange -{ - mach_vm_address_t address; - mach_vm_size_t length; -}; -#else +} IOPhysicalRange; + typedef struct { IOVirtualAddress address; IOByteCount length; } IOVirtualRange; +#ifdef __LP64__ +typedef IOVirtualRange IOAddressRange; +#else /* !__LP64__ */ typedef struct { mach_vm_address_t address; mach_vm_size_t length; } IOAddressRange; -#endif +#endif /* !__LP64__ */ /* * Map between #defined or enum'd constants and text description. @@ -200,8 +213,10 @@ enum { kIOMapStatic = 0x01000000, kIOMapReference = 0x02000000, - kIOMapUnique = 0x04000000, - kIOMap64Bit = 0x08000000 + kIOMapUnique = 0x04000000 +#ifdef XNU_KERNEL_PRIVATE + , kIOMap64Bit = 0x08000000 +#endif }; /*! @enum Scale Factors @@ -223,18 +238,8 @@ enum { /* compatibility types */ #ifndef KERNEL -/* - * Machine-independent caching specification. - */ -typedef enum { - IO_CacheOff, // cache inhibit - IO_WriteThrough, - IO_CopyBack -} IOCache; -//typedef char OSString[64]; typedef unsigned int IODeviceNumber; -typedef unsigned int IOObjectNumber; #endif diff --git a/iokit/IOKit/IOUserClient.h b/iokit/IOKit/IOUserClient.h index 1a2782e50..5c2ee2d7d 100644 --- a/iokit/IOKit/IOUserClient.h +++ b/iokit/IOKit/IOUserClient.h @@ -173,19 +173,31 @@ class IOUserClient : public IOService */ ExpansionData * reserved; +#ifdef XNU_KERNEL_PRIVATE public: +#else +private: +#endif OSSet * mappings; UInt8 sharedInstance; UInt8 __reservedA[3]; void * __reserved[7]; - virtual IOReturn externalMethod( uint32_t selector, IOExternalMethodArguments * arguments, +public: + virtual IOReturn externalMethod( uint32_t selector, IOExternalMethodArguments * arguments, IOExternalMethodDispatch * dispatch = 0, OSObject * target = 0, void * reference = 0 ); - OSMetaClassDeclareReservedUsed(IOUserClient, 0); -private: + virtual IOReturn registerNotificationPort( + mach_port_t port, UInt32 type, io_user_reference_t refCon); +private: +#if __LP64__ + OSMetaClassDeclareReservedUnused(IOUserClient, 0); OSMetaClassDeclareReservedUnused(IOUserClient, 1); +#else + OSMetaClassDeclareReservedUsed(IOUserClient, 0); + OSMetaClassDeclareReservedUsed(IOUserClient, 1); +#endif OSMetaClassDeclareReservedUnused(IOUserClient, 2); OSMetaClassDeclareReservedUnused(IOUserClient, 3); OSMetaClassDeclareReservedUnused(IOUserClient, 4); @@ -201,6 +213,17 @@ class IOUserClient : public IOService OSMetaClassDeclareReservedUnused(IOUserClient, 14); OSMetaClassDeclareReservedUnused(IOUserClient, 15); +#ifdef XNU_KERNEL_PRIVATE + /* Available within xnu source only */ +public: + static void initialize( void ); + static void destroyUserReferences( OSObject * obj ); + IOMemoryMap * mapClientMemory64( IOOptionBits type, + task_t task, + IOOptionBits mapFlags = kIOMapAnywhere, + mach_vm_address_t atAddress = 0 ); +#endif + protected: static IOReturn sendAsyncResult(OSAsyncReference reference, IOReturn result, void *args[], UInt32 numArgs); @@ -215,13 +238,26 @@ class IOUserClient : public IOService mach_vm_address_t callback, io_user_reference_t refcon); public: - static void initialize( void ); - - static void destroyUserReferences( OSObject * obj ); - static IOReturn clientHasPrivilege( void * securityToken, const char * privilegeName ); + /*! + @function releaseAsyncReference64 + @abstract Release the mach_port_t reference held within the OSAsyncReference64 structure. + @discussion The OSAsyncReference64 structure passed to async methods holds a reference to the wakeup mach port, which should be released to balance each async method call. Behavior is undefined if these calls are not correctly balanced. + @param reference The reference passed to the subclass IOAsyncMethod, or externalMethod() in the IOExternalMethodArguments.asyncReference field. + @result A return code. + */ + static IOReturn releaseAsyncReference64(OSAsyncReference64 reference); + /*! + @function releaseNotificationPort + @abstract Release the mach_port_t passed to registerNotificationPort(). + @discussion The mach_port_t passed to the registerNotificationPort() methods should be released to balance each call to registerNotificationPort(). Behavior is undefined if these calls are not correctly balanced. + @param reference The mach_port_t argument previously passed to the subclass implementation of registerNotificationPort(). + @result A return code. + */ + static IOReturn releaseNotificationPort(mach_port_t port); + virtual bool init(); virtual bool init( OSDictionary * dictionary ); // Currently ignores the all args, just passes up to IOService::init() @@ -252,15 +288,16 @@ class IOUserClient : public IOService IOOptionBits * options, IOMemoryDescriptor ** memory ); - virtual IOMemoryMap * mapClientMemory( IOOptionBits type, +#if !__LP64__ +private: + APPLE_KEXT_COMPATIBILITY_VIRTUAL + IOMemoryMap * mapClientMemory( IOOptionBits type, task_t task, IOOptionBits mapFlags = kIOMapAnywhere, - IOVirtualAddress atAddress = 0 ); + IOVirtualAddress atAddress = 0 ); +#endif - IOMemoryMap * mapClientMemory64( IOOptionBits type, - task_t task, - IOOptionBits mapFlags = kIOMapAnywhere, - mach_vm_address_t atAddress = 0 ); +public: /*! @function removeMappingForDescriptor @@ -282,9 +319,11 @@ class IOUserClient : public IOService // Old methods for accessing method vector backward compatiblility only virtual IOExternalMethod * - getExternalMethodForIndex( UInt32 index ); + getExternalMethodForIndex( UInt32 index ) + APPLE_KEXT_DEPRECATED; virtual IOExternalAsyncMethod * - getExternalAsyncMethodForIndex( UInt32 index ); + getExternalAsyncMethodForIndex( UInt32 index ) + APPLE_KEXT_DEPRECATED; // Methods for accessing method vector. virtual IOExternalMethod * @@ -294,7 +333,9 @@ class IOUserClient : public IOService // Methods for accessing trap vector - old and new style virtual IOExternalTrap * - getExternalTrapForIndex( UInt32 index ); + getExternalTrapForIndex( UInt32 index ) + APPLE_KEXT_DEPRECATED; + virtual IOExternalTrap * getTargetAndTrapForIndex( IOService **targetP, UInt32 index ); }; diff --git a/iokit/IOKit/IOWorkLoop.h b/iokit/IOKit/IOWorkLoop.h index c278c9537..808329ada 100644 --- a/iokit/IOKit/IOWorkLoop.h +++ b/iokit/IOKit/IOWorkLoop.h @@ -272,7 +272,6 @@ member function's parameter list. @param arg3 Parameter for action parameter, defaults to 0. @result Returns the value of the Action callout. */ - OSMetaClassDeclareReservedUsed(IOWorkLoop, 0); virtual IOReturn runAction(Action action, OSObject *target, void *arg0 = 0, void *arg1 = 0, void *arg2 = 0, void *arg3 = 0); @@ -296,12 +295,22 @@ member function's parameter list. @result Return false if the work loop is shutting down, true otherwise. */ - OSMetaClassDeclareReservedUsed(IOWorkLoop, 1); virtual bool runEventSources(); protected: + // Internal APIs used by event sources to control the thread + virtual int sleepGate(void *event, AbsoluteTime deadline, UInt32 interuptibleType); +protected: +#if __LP64__ + OSMetaClassDeclareReservedUnused(IOWorkLoop, 0); + OSMetaClassDeclareReservedUnused(IOWorkLoop, 1); OSMetaClassDeclareReservedUnused(IOWorkLoop, 2); +#else + OSMetaClassDeclareReservedUsed(IOWorkLoop, 0); + OSMetaClassDeclareReservedUsed(IOWorkLoop, 1); + OSMetaClassDeclareReservedUsed(IOWorkLoop, 2); +#endif OSMetaClassDeclareReservedUnused(IOWorkLoop, 3); OSMetaClassDeclareReservedUnused(IOWorkLoop, 4); OSMetaClassDeclareReservedUnused(IOWorkLoop, 5); diff --git a/iokit/IOKit/Makefile b/iokit/IOKit/Makefile index 90246e45d..23d52274b 100644 --- a/iokit/IOKit/Makefile +++ b/iokit/IOKit/Makefile @@ -24,12 +24,16 @@ INSTINC_SUBDIRS_PPC = \ INSTINC_SUBDIRS_I386 = \ i386 +INSTINC_SUBDIRS_X86_64 = \ + i386 + INSTINC_SUBDIRS_ARM = \ arm EXPINC_SUBDIRS = ${INSTINC_SUBDIRS} EXPINC_SUBDIRS_PPC = ${INSTINC_SUBDIRS_PPC} EXPINC_SUBDIRS_I386 = ${INSTINC_SUBDIRS_I386} +EXPINC_SUBDIRS_X86_64 = ${INSTINC_SUBDIRS_X86_64} EXPINC_SUBDIRS_ARM = ${INSTINC_SUBDIRS_ARM} NOT_EXPORT_HEADERS = diff --git a/iokit/IOKit/OSMessageNotification.h b/iokit/IOKit/OSMessageNotification.h index 1c62d7b0c..7d7b5e537 100644 --- a/iokit/IOKit/OSMessageNotification.h +++ b/iokit/IOKit/OSMessageNotification.h @@ -82,7 +82,7 @@ enum { // -------------- enum { kOSAsyncRef64Count = 8, - kOSAsyncRef64Size = kOSAsyncRef64Count * sizeof(io_user_reference_t) + kOSAsyncRef64Size = kOSAsyncRef64Count * ((int) sizeof(io_user_reference_t)) }; typedef io_user_reference_t OSAsyncReference64[kOSAsyncRef64Count]; @@ -98,10 +98,12 @@ struct OSNotificationHeader64 { #endif }; +#pragma pack(4) struct IOServiceInterestContent64 { natural_t messageType; io_user_reference_t messageArgument[1]; }; +#pragma pack() // -------------- #if !KERNEL_USER32 @@ -136,9 +138,9 @@ struct IOServiceInterestContent { struct IOAsyncCompletionContent { IOReturn result; #if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) - void * args[]; + void * args[] __attribute__ ((packed)); #else - void * args[0]; + void * args[0] __attribute__ ((packed)); #endif }; diff --git a/iokit/IOKit/i386/IOSharedLockImp.h b/iokit/IOKit/i386/IOSharedLockImp.h index ee2bd16d3..cb15fb1d8 100644 --- a/iokit/IOKit/i386/IOSharedLockImp.h +++ b/iokit/IOKit/i386/IOSharedLockImp.h @@ -55,31 +55,12 @@ #include -/* - * void - * ev_lock(p) - * int *p; - * - * Lock the lock pointed to by p. Spin (possibly forever) until the next - * lock is available. - */ - TEXT - #ifndef KERNEL -LEAF(_ev_lock, 0) -LEAF(_IOSpinLock, 0) - movl 4(%esp), %ecx -0: - xorl %eax, %eax - rep - nop /* pause for hyperthreaded CPU's */ - lock - cmpxchgl %ecx, (%ecx) - jne 0b - ret -END(_ev_lock) +#error this file for kernel only; comm page has user versions #endif + TEXT + /* * void * ev_unlock(p) @@ -89,13 +70,15 @@ END(_ev_lock) */ LEAF(_ev_unlock, 0) LEAF(_IOSpinUnlock, 0) +#if __x86_64__ + movl $0, (%rdi) +#else movl 4(%esp), %ecx movl $0, (%ecx) - ret +#endif END(_ev_unlock) - /* * int * ev_try_lock(p) @@ -106,6 +89,14 @@ END(_ev_unlock) LEAF(_ev_try_lock, 0) LEAF(_IOTrySpinLock, 0) +#if __x86_64__ + xorl %eax, %eax + orl $-1, %edx + lock + cmpxchgl %edx, (%rdi) + setz %dl + movzbl %dl, %eax +#else movl 4(%esp), %ecx xorl %eax, %eax lock @@ -115,6 +106,7 @@ LEAF(_IOTrySpinLock, 0) ret 1: xorl %eax, %eax /* no */ +#endif END(_ev_try_lock) diff --git a/iokit/IOKit/i386/Makefile b/iokit/IOKit/i386/Makefile index 3f51d0a48..f8f0826c4 100644 --- a/iokit/IOKit/i386/Makefile +++ b/iokit/IOKit/i386/Makefile @@ -14,12 +14,12 @@ MD_DIR = i386 EXCLUDE_HEADERS = INSTINC_SUBDIRS = -INSTINC_SUBDIRS_PPC = INSTINC_SUBDIRS_I386 = +INSTINC_SUBDIRS_X86_64 = EXPINC_SUBDIRS = ${INSTINC_SUBDIRS} -EXPINC_SUBDIRS_PPC = ${INSTINC_SUBDIRS_PPC} EXPINC_SUBDIRS_I386 = ${INSTINC_SUBDIRS_I386} +EXPINC_SUBDIRS_X86_64 = ${INSTINC_SUBDIRS_X86_64} ALL_HEADERS = $(shell (cd $(SOURCE); echo *.h)) HEADER_LIST = $(filter-out $(EXCLUDE_HEADERS), $(ALL_HEADERS)) diff --git a/iokit/IOKit/machine/IOSharedLockImp.h b/iokit/IOKit/machine/IOSharedLockImp.h index b348bc820..ec0c90f2c 100644 --- a/iokit/IOKit/machine/IOSharedLockImp.h +++ b/iokit/IOKit/machine/IOSharedLockImp.h @@ -28,7 +28,7 @@ #if defined (__ppc__) #include "IOKit/ppc/IOSharedLockImp.h" -#elif defined (__i386__) +#elif defined (__i386__) || defined (__x86_64__) #include "IOKit/i386/IOSharedLockImp.h" #else #error architecture not supported diff --git a/iokit/IOKit/machine/Makefile b/iokit/IOKit/machine/Makefile index 4ef1c73e7..4a77745b4 100644 --- a/iokit/IOKit/machine/Makefile +++ b/iokit/IOKit/machine/Makefile @@ -16,10 +16,12 @@ EXCLUDE_HEADERS = INSTINC_SUBDIRS = INSTINC_SUBDIRS_PPC = INSTINC_SUBDIRS_I386 = +INSTINC_SUBDIRS_X86_64 = EXPINC_SUBDIRS = ${INSTINC_SUBDIRS} EXPINC_SUBDIRS_PPC = ${INSTINC_SUBDIRS_PPC} EXPINC_SUBDIRS_I386 = ${INSTINC_SUBDIRS_I386} +EXPINC_SUBDIRS_X86_64 = ${INSTINC_SUBDIRS_X86_64} ALL_HEADERS = $(shell (cd $(SOURCE); echo *.h)) HEADER_LIST = $(filter-out $(EXCLUDE_HEADERS), $(ALL_HEADERS)) diff --git a/iokit/IOKit/nvram/Makefile b/iokit/IOKit/nvram/Makefile index 3a5bd5cda..3235dd242 100644 --- a/iokit/IOKit/nvram/Makefile +++ b/iokit/IOKit/nvram/Makefile @@ -16,11 +16,13 @@ NOT_EXPORT_HEADERS = INSTINC_SUBDIRS = INSTINC_SUBDIRS_PPC = INSTINC_SUBDIRS_I386 = +INSTINC_SUBDIRS_X86_64 = INSTINC_SUBDIRS_ARM = EXPINC_SUBDIRS = ${INSTINC_SUBDIRS} EXPINC_SUBDIRS_PPC = ${INSTINC_SUBDIRS_PPC} EXPINC_SUBDIRS_I386 = ${INSTINC_SUBDIRS_I386} +EXPINC_SUBDIRS_X86_64 = ${INSTINC_SUBDIRS_X86_64} EXPINC_SUBDIRS_ARM = ${INSTINC_SUBDIRS_ARM} ALL_HEADERS = $(shell (cd $(SOURCE); echo *.h)) diff --git a/iokit/IOKit/platform/Makefile b/iokit/IOKit/platform/Makefile index 405a7c3eb..644b0b114 100644 --- a/iokit/IOKit/platform/Makefile +++ b/iokit/IOKit/platform/Makefile @@ -17,11 +17,13 @@ NOT_KF_MI_HEADERS = INSTINC_SUBDIRS = INSTINC_SUBDIRS_PPC = INSTINC_SUBDIRS_I386 = +INSTINC_SUBDIRS_X86_64 = INSTINC_SUBDIRS_ARM = EXPINC_SUBDIRS = ${INSTINC_SUBDIRS} EXPINC_SUBDIRS_PPC = ${INSTINC_SUBDIRS_PPC} EXPINC_SUBDIRS_I386 = ${INSTINC_SUBDIRS_I386} +EXPINC_SUBDIRS_X86_64 = ${INSTINC_SUBDIRS_X86_64} EXPINC_SUBDIRS_ARM = ${INSTINC_SUBDIRS_ARM} ALL_HEADERS = $(shell (cd $(SOURCE); echo *.h)) diff --git a/iokit/IOKit/power/Makefile b/iokit/IOKit/power/Makefile index 6a9beac4b..dcebcdb9b 100644 --- a/iokit/IOKit/power/Makefile +++ b/iokit/IOKit/power/Makefile @@ -16,11 +16,13 @@ NOT_EXPORT_HEADERS = INSTINC_SUBDIRS = INSTINC_SUBDIRS_PPC = INSTINC_SUBDIRS_I386 = +INSTINC_SUBDIRS_X86_64 = INSTINC_SUBDIRS_ARM = EXPINC_SUBDIRS = ${INSTINC_SUBDIRS} EXPINC_SUBDIRS_PPC = ${INSTINC_SUBDIRS_PPC} EXPINC_SUBDIRS_I386 = ${INSTINC_SUBDIRS_I386} +EXPINC_SUBDIRS_X86_64 = ${INSTINC_SUBDIRS_X86_64} EXPINC_SUBDIRS_ARM = ${INSTINC_SUBDIRS_ARM} ALL_HEADERS = $(shell (cd $(SOURCE); echo *.h)) diff --git a/iokit/IOKit/ppc/Makefile b/iokit/IOKit/ppc/Makefile index 321a0a5fd..21ff86cad 100644 --- a/iokit/IOKit/ppc/Makefile +++ b/iokit/IOKit/ppc/Makefile @@ -15,11 +15,9 @@ NOT_EXPORT_HEADERS = IOSharedLockImp.h INSTINC_SUBDIRS = INSTINC_SUBDIRS_PPC = -INSTINC_SUBDIRS_I386 = EXPINC_SUBDIRS = ${INSTINC_SUBDIRS} EXPINC_SUBDIRS_PPC = ${INSTINC_SUBDIRS_PPC} -EXPINC_SUBDIRS_I386 = ${INSTINC_SUBDIRS_I386} ALL_HEADERS = $(shell (cd $(SOURCE); echo *.h)) diff --git a/iokit/IOKit/pwr_mgt/IOPM.h b/iokit/IOKit/pwr_mgt/IOPM.h index aa6a0a615..7d78225af 100644 --- a/iokit/IOKit/pwr_mgt/IOPM.h +++ b/iokit/IOKit/pwr_mgt/IOPM.h @@ -210,10 +210,26 @@ enum { */ #define kAppleClamshellCausesSleepKey "AppleClamshellCausesSleep" +/* kIOPMSleepWakeUUIDKey + * Key refers to a CFStringRef that will uniquely identify + * a sleep/wake cycle for logging & tracking. + * The key becomes valid at the beginning of a sleep cycle - before we + * initiate any sleep/wake notifications. + * The key becomes invalid at the completion of a system wakeup. The + * property will not be present in the IOPMrootDomain's registry entry + * when it is invalid. + * + * See IOPMrootDomain notification kIOPMMessageSleepWakeUUIDChange + */ + #define kIOPMSleepWakeUUIDKey "SleepWakeUUID" + /******************************************************************************* * * Root Domain general interest messages * + * Available by registering for interest type 'gIOGeneralInterest' + * on IOPMrootDomain. + * ******************************************************************************/ /* kIOPMMessageClamshellStateChange @@ -265,6 +281,24 @@ enum { #define kIOPMMessageSystemPowerEventOccurred \ iokit_family_msg(sub_iokit_powermanagement, 0x130) +/* kIOPMMessageSleepWakeUUIDChange + * Either a new SleepWakeUUID has been specified at the beginning of a sleep, + * or we're removing the existing property upon completion of a wakeup. + */ +#define kIOPMMessageSleepWakeUUIDChange \ + iokit_family_msg(sub_iokit_powermanagement, 0x140) + +/* kIOPMMessageSleepWakeUUIDSet + * Argument accompanying the kIOPMMessageSleepWakeUUIDChange notification when + * a new UUID has been specified. + */ +#define kIOPMMessageSleepWakeUUIDSet ((void *)1) + +/* kIOPMMessageSleepWakeUUIDCleared + * Argument accompanying the kIOPMMessageSleepWakeUUIDChange notification when + * the current UUID has been removed. + */ +#define kIOPMMessageSleepWakeUUIDCleared ((void *)0) /******************************************************************************* * @@ -297,25 +331,31 @@ enum { ******************************************************************************/ enum { kIOPMNoErr = 0, - // Returned by powerStateWillChange and powerStateDidChange: - // Immediate acknowledgement of power state change + + // Returned by driver's setPowerState(), powerStateWillChangeTo(), + // powerStateDidChangeTo(), or acknowledgeSetPowerState() to + // implicitly acknowledge power change upon function return. kIOPMAckImplied = 0, - // Acknowledgement of power state change will come later + + // Deprecated kIOPMWillAckLater = 1, - - // Returned by requestDomainState: - // Unrecognized specification parameter + + // Returned by requestPowerDomainState() to indicate + // unrecognized specification parameter. kIOPMBadSpecification = 4, - // No power state matches search specification + + // Returned by requestPowerDomainState() to indicate + // no power state matches search specification. kIOPMNoSuchState = 5, - - // Device cannot change its power for some reason + + // Deprecated kIOPMCannotRaisePower = 6, - - // Returned by changeStateTo: - // Requested state doesn't exist + + // Deprecated kIOPMParameterError = 7, - // Device not yet fully hooked into power management + + // Returned when power management state is accessed + // before driver has called PMinit(). kIOPMNotYetInitialized = 8, // And the old constants; deprecated @@ -363,7 +403,7 @@ enum { #define kIOPMPSBatteryChargeStatusKey "ChargeStatus" #define kIOPMPSBatteryTemperatureKey "Temperature" -// kIOPMBatteryChargeStatusKey may have one of the following values, or may have +// kIOPMPSBatteryChargeStatusKey may have one of the following values, or may have // no value. If kIOPMBatteryChargeStatusKey has a NULL value (or no value) associated with it // then charge is proceeding normally. If one of these battery charge status reasons is listed, // then the charge may have been interrupted. @@ -500,6 +540,9 @@ enum { #define kIOPMSettingDebugWakeRelativeKey "WakeRelativeToSleep" #define kIOPMSettingDebugPowerRelativeKey "PowerRelativeToShutdown" +// Maintenance wake calendar. +#define kIOPMSettingMaintenanceWakeCalendarKey "MaintenanceWakeCalendarDate" + struct IOPMCalendarStruct { UInt32 year; UInt8 month; @@ -605,8 +648,6 @@ struct IOPowerStateChangeNotification { }; typedef struct IOPowerStateChangeNotification IOPowerStateChangeNotification; typedef IOPowerStateChangeNotification sleepWakeNote; - -extern void IOPMRegisterDevice(const char *, IOService *); #endif /* KERNEL && __cplusplus */ #endif /* ! _IOKIT_IOPM_H */ diff --git a/iokit/IOKit/pwr_mgt/IOPMLibDefs.h b/iokit/IOKit/pwr_mgt/IOPMLibDefs.h index 10da68d8e..ff25bf381 100644 --- a/iokit/IOKit/pwr_mgt/IOPMLibDefs.h +++ b/iokit/IOKit/pwr_mgt/IOPMLibDefs.h @@ -27,13 +27,14 @@ */ -#define kPMSetAggressiveness 0 -#define kPMGetAggressiveness 1 -#define kPMSleepSystem 2 -#define kPMAllowPowerChange 3 -#define kPMCancelPowerChange 4 -#define kPMShutdownSystem 5 -#define kPMRestartSystem 6 -#define kPMSleepSystemOptions 7 +#define kPMSetAggressiveness 0 +#define kPMGetAggressiveness 1 +#define kPMSleepSystem 2 +#define kPMAllowPowerChange 3 +#define kPMCancelPowerChange 4 +#define kPMShutdownSystem 5 +#define kPMRestartSystem 6 +#define kPMSleepSystemOptions 7 +#define kPMSetMaintenanceWakeCalendar 8 -#define kNumPMMethods 8 +#define kNumPMMethods 9 diff --git a/iokit/IOKit/pwr_mgt/IOPMPrivate.h b/iokit/IOKit/pwr_mgt/IOPMPrivate.h index e32611c99..bf50d78e7 100644 --- a/iokit/IOKit/pwr_mgt/IOPMPrivate.h +++ b/iokit/IOKit/pwr_mgt/IOPMPrivate.h @@ -30,17 +30,22 @@ #include +/*****************************************************************************/ + // Private power commands issued to root domain // bits 0-7 in IOPM.h enum { - kIOPMSetValue = (1<<16), + kIOPMSetValue = (1<<16), // don't sleep on clamshell closure on a portable with AC connected - kIOPMSetDesktopMode = (1<<17), + kIOPMSetDesktopMode = (1<<17), // set state of AC adaptor connected - kIOPMSetACAdaptorConnected = (1<<18) + kIOPMSetACAdaptorConnected = (1<<18) }; +/*****************************************************************************/ +/*****************************************************************************/ + /* * PM notification types */ @@ -78,5 +83,187 @@ enum { kIOPMStateConsoleShutdownCertain = 4 }; +/*****************************************************************************/ +/*****************************************************************************/ + +/* PM Statistics - event indices + * These are arguments to IOPMrootDomain::pmStatsRecordEvent(). + */ +enum { + kIOPMStatsHibernateImageWrite = 1, + kIOPMStatsHibernateImageRead, + kIOPMStatsDriversNotify, + kIOPMStatsApplicationNotify, + kIOPMStatsLateDriverAcknowledge, + kIOPMStatsLateAppAcknowledge, + + // To designate if you're specifying the start or stop end of + // each of the above events, do a bitwise OR of the appropriate + // Start/Stop flag and pass the result to IOPMrootDomain to record + // the event. + kIOPMStatsEventStartFlag = (1 << 24), + kIOPMStatsEventStopFlag = (1 << 25) +}; + +// Keys for IOPMrootDomain registry properties +#define kIOPMSleepStatisticsKey "SleepStatistics" +#define kIOPMSleepStatisticsAppsKey "AppStatistics" + +// Application response statistics +#define kIOPMStatsNameKey "Name" +#define kIOPMStatsPIDKey "Pid" +#define kIOPMStatsTimeMSKey "TimeMS" +#define kIOPMStatsApplicationResponseTypeKey "ResponseType" +#define kIOPMStatsMessageTypeKey "MessageType" + +// PM Statistics: potential values for the key kIOPMStatsApplicationResponseTypeKey +// entry in the application results array. +#define kIOPMStatsResponseTimedOut "ResponseTimedOut" +#define kIOPMStatsResponseCancel "ResponseCancel" +#define kIOPMStatsResponseSlow "ResponseSlow" + +typedef struct { + struct bounds{ + uint64_t start; + uint64_t stop; + }; + + struct bounds hibWrite; + struct bounds hibRead; +// bounds driverNotifySleep; +// bounds driverNotifyWake; +// bounds appNotifySleep; +// bounds appNotifyWake; +// OSDictionary *tardyApps; +// OSDictionary *tardyDrivers; +} PMStatsStruct; + +/*****************************************************************************/ + +/* PM RootDomain tracePoints + * + * In the sleep/wake process, we expect the sleep trace points to proceed + * in increasing order. Once sleep begins with code kIOPMTracePointSleepStarted = 0x11, + * we expect sleep to continue in a monotonically increasing order of tracepoints + * to kIOPMTracePointSystemLoginwindowPhase = 0x30. After trace point SystemLoginWindowPhase, + * the system will return to kIOPMTracePointSystemUp = 0x00. + * + * If the trace point decreases (instead of increasing) before reaching kIOPMTracePointSystemUp, + * that indicates that the sleep process was cancelled. The cancel reason shall be indicated + * in the cancel tracepoint. (TBD) + */ + +enum { +/* When kTracePointSystemUp is the latest tracePoint, + the system is awake. It is not asleep, sleeping, or waking. + + * Phase begins: At boot, at completion of wake from sleep, + immediately following kIOPMTracePointSystemLoginwindowPhase. + * Phase ends: When a sleep attempt is initiated. + */ + kIOPMTracePointSystemUp = 0, + +/* When kIOPMTracePointSleepStarted we have just initiated sleep. + + Note: The state prior to kIOPMTracePointSleepStarted may be only one of: + * kIOPMTracePointSystemUp + * kIOPMTracePointSystemLoginwindowPhase or + + * Phase begins: At initiation of system sleep (idle or forced). + * Phase ends: As we start to notify applications of system sleep. + */ + kIOPMTracePointSleepStarted = 0x11, + +/* When kTracePointSystemSleepAppsPhase is the latest tracePoint, + a system sleep has been irrevocably inititated and PM waits + for responses from notified applications. + + * Phase begins: Begin to asynchronously fire kIOMessageSystemWillSleep notifications, + * and in the case of an idle sleep kIOMessageCanSystemSleep as well. + * Phase ends: When we have received all user & interested kernel acknowledgements. + */ + kIOPMTracePointSystemSleepAppsPhase = 0x12, + + +/* When kIOPMTracePointSystemHibernatePhase is the latest tracePoint, + PM is writing the hiernate image to disk. + */ + kIOPMTracePointSystemHibernatePhase = 0x13, + +/* When kTracePointSystemSleepDriversPhase is the latest tracePoint, + PM is iterating the driver tree powering off devices individually. + + * Phase begins: When IOPMrootDomain has received all of its power acknowledgements and begins + * executing IOService::powerDomainWillChangeTo() + * Phase ends: When IOPMrootDomain::powerChangeDone begins executing CPU shutoff code. + */ + kIOPMTracePointSystemSleepDriversPhase = 0x14, + +/* When kTracePointSystemSleepPlatformPhase is the latest tracePoint, + all apps and drivers have notified of sleep. Plotfarm is powering + off CPU; or system is asleep; or low level wakeup is underway. + + Note: If a system is asleep and then loses power, and it does not have a hibernate + image to restore from (e.g. hibernatemode = 0), then OS X may interpret this power + loss as a system crash in the kTracePointSystemSleepPlatformPhase, since the + power loss resembles a hang or crash, and the power being removed by the user. + + * Phase begins: IOPMrootDomain has already shut off drivers, and is now powering off CPU. + * Phase ends: Immediately after CPU's are powered back on during wakeup. + */ + kIOPMTracePointSystemSleepPlatformPhase = 0x15, + +/* When kTracePointSystemWakeDriversPhase is the latest tracePoint, + System CPU is powered, PM is notifying drivers of system wake. + + * Phase begins: CPU's have successfully powered up and OS is executing. + * Phase ends: All drivers have handled power events & acknowledged completion. + IOPMrootDomain is about to deliver kIOMessageSystemHasPoweredOn. + */ + kIOPMTracePointSystemWakeDriversPhase = 0x21, + +/* When kTracePointSystemWakeAppsPhase is the latest tracePoint, + System CPU is powered, PM has powered on each driver. + + * Phase begins: IOPMrootDomain::tellChangeUp before sending asynchronous + kIOMessageSystemHasPoweredOn notifications + * Phase ends: IOPMrootDomain::tellChangeUp after sending asynchronous notifications + */ + kIOPMTracePointSystemWakeAppsPhase = 0x22, + +/* kIOPMTracePointSystemLoginwindowPhase + This phase represents a several minute window after the system has powered on. + Higher levels of system diagnostics are in a heightened state of alert in this phase, + in case any user errors occurred that we could not detect in software. + + This several minute window + + * Phase begins: After IOPMrootDomain sends kIOMessageSystemHasPoweredOn message. + * Phase ends: When loginwindow calls IOPMSleepWakeSetUUID(NULL) the system shall + be considered awake and usable. The next phase shall be kIOPMTracePointSystemUp. + */ + kIOPMTracePointSystemLoginwindowPhase = 0x30 +}; + +/*****************************************************************************/ + +/* +�* kIOPMLoginWindowSecurityDebugKey - identifies PM debug data specific to LoginWindow + * for use with IOPMrootDomain. +�*/ +#define kIOPMLoginWindowSecurityDebugKey "LoginWindowSecurity" + +// For PM internal use only - key to locate sleep failure results within SCDynamicStore. +#define kIOPMDynamicStoreSleepFailureKey "SleepFailure" + +/*****************************************************************************/ + +// For IOPMLibPrivate.h +#define kIOPMSleepWakeFailureKey "PMFailurePhase" +#define kIOPMSleepWakeFailureCodeKey "PMStatusCode" +#define kIOPMSleepWakeFailureLoginKey "LWFailurePhase" +#define kIOPMSleepWakeFailureUUIDKey "UUID" +#define kIOPMSleepWakeFailureDateKey "Date" + #endif /* ! _IOKIT_IOPMPRIVATE_H */ diff --git a/iokit/IOKit/pwr_mgt/IOPMlog.h b/iokit/IOKit/pwr_mgt/IOPMlog.h index 695c267b1..b9d50eda5 100644 --- a/iokit/IOKit/pwr_mgt/IOPMlog.h +++ b/iokit/IOKit/pwr_mgt/IOPMlog.h @@ -26,108 +26,58 @@ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ */ enum PMLogEnum { - kPMLogSetParent = 1, // 1 0x05100004 - kPMLogAddChild, // 2 0x05100008 - kPMLogRemoveChild, // 3 0x0510000c - kPMLogControllingDriver, // 4 0x05100010 - kPMLogControllingDriverErr1, // 5 0x05100014 - bad power state array version - kPMLogControllingDriverErr2, // 6 0x05100018 - power states already registered - kPMLogControllingDriverErr3, // 7 0x0510001c - kPMLogControllingDriverErr4, // 8 0x05100020 - power driver is invalid - kPMLogInterestedDriver, // 9 0x05100024 - kPMLogAcknowledgeErr1, // 10 0x05100028 - unknown entity called acknowledgePowerChange - kPMLogChildAcknowledge, // 11 0x0510002c - kPMLogDriverAcknowledge, // 12 0x05100030 - interested driver acknowledges - kPMLogAcknowledgeErr2, // 13 0x05100034 - object has already acked - kPMLogAcknowledgeErr3, // 14 0x05100038 - not expecting any acks - kPMLogAcknowledgeErr4, // 15 0x0510003c - not expecting acknowledgeSetPowerState - kPMLogDriverAcknowledgeSet, // 16 0x05100040 - controlling driver acknowledges - kPMLogWillChange, // 17 0x05100044 - kPMLogDidChange, // 18 0x05100048 - kPMLogRequestDomain, // 19 0x0510004c - kPMLogMakeUsable, // 20 0x05100050 - kPMLogChangeStateTo, // 21 0x05100054 - kPMLogChangeStateToPriv, // 22 0x05100058 - kPMLogSetAggressiveness, // 23 0x0510005c - kPMLogCriticalTemp, // 24 0x05100060 - kPMLogOverrideOn, // 25 0x05100064 - kPMLogOverrideOff, // 26 0x05100068 - kPMLogEnqueueErr, // 27 0x0510006c - change queue overflow - kPMLogCollapseQueue, // 28 0x05100070 - kPMLogChangeDone, // 29 0x05100074 - kPMLogCtrlDriverTardy, // 30 0x05100078 - controlling driver didn't acknowledge - kPMLogIntDriverTardy, // 31 0x0510007c - interested driver didn't acknowledge - kPMLogStartAckTimer, // 32 0x05100080 - kPMLogStartParentChange, // 33 0x05100084 - kPMLogAmendParentChange, // 34 0x05100088 - kPMLogStartDeviceChange, // 35 0x0510008c - kPMLogRequestDenied, // 36 0x05100090 - parent denied domain state change request - kPMLogControllingDriverErr5, // 37 0x05100094 - too few power states - kPMLogProgramHardware, // 38 0x05100098 - kPMLogInformDriverPreChange, // 39 0x0510009c - kPMLogInformDriverPostChange, // 40 0x051000a0 - kPMLogRemoveDriver, // 41 0x051000a4 - kPMLogSetIdleTimerPeriod, // 42 0x051000a8 - kPMLogSystemWake, // 43 0x051000ac - kPMLogAcknowledgeErr5, // 44 0x051000b0 - kPMLogClientAcknowledge, // 45 0x051000b4 - kPMLogClientTardy, // 46 0x051000b8 - application didn't acknowledge - kPMLogClientCancel, // 47 0x051000bc - kPMLogClientNotify, // 48 0x051000c0 - client sent a notification - kPMLogAppNotify, // 49 0x051000c4 - application sent a notification - kPMLogSetClockGating, // 50 0x051000c8 - platform device specific clock control - kPMLogSetPowerGating, // 51 0x051000cc - platform device specific power control - kPMLogSetPinGroup, // 52 0x051000d0 - platform device specific gpio control - kPMLogIdleCancel, // 53 0x051000d4 - device unidle during change + kPMLogSetParent = 1, // 1 0x05100004 + kPMLogAddChild, // 2 0x05100008 + kPMLogRemoveChild, // 3 0x0510000c + kPMLogControllingDriver, // 4 0x05100010 + kPMLogControllingDriverErr1, // 5 0x05100014 - bad power state array version + kPMLogControllingDriverErr2, // 6 0x05100018 - power states already registered + kPMLogControllingDriverErr3, // 7 0x0510001c + kPMLogControllingDriverErr4, // 8 0x05100020 - power driver is invalid + kPMLogInterestedDriver, // 9 0x05100024 + kPMLogAcknowledgeErr1, // 10 0x05100028 - unknown entity called acknowledgePowerChange + kPMLogChildAcknowledge, // 11 0x0510002c + kPMLogDriverAcknowledge, // 12 0x05100030 - interested driver acknowledges + kPMLogAcknowledgeErr2, // 13 0x05100034 - object has already acked + kPMLogAcknowledgeErr3, // 14 0x05100038 - not expecting any acks + kPMLogAcknowledgeErr4, // 15 0x0510003c - not expecting acknowledgeSetPowerState + kPMLogDriverAcknowledgeSet, // 16 0x05100040 - controlling driver acknowledges + kPMLogWillChange, // 17 0x05100044 + kPMLogDidChange, // 18 0x05100048 + kPMLogRequestDomain, // 19 0x0510004c + kPMLogMakeUsable, // 20 0x05100050 + kPMLogChangeStateTo, // 21 0x05100054 + kPMLogChangeStateToPriv, // 22 0x05100058 + kPMLogSetAggressiveness, // 23 0x0510005c + kPMLogCriticalTemp, // 24 0x05100060 + kPMLogOverrideOn, // 25 0x05100064 + kPMLogOverrideOff, // 26 0x05100068 + kPMLogEnqueueErr, // 27 0x0510006c - NOT USED + kPMLogCollapseQueue, // 28 0x05100070 - NOT USED + kPMLogChangeDone, // 29 0x05100074 + kPMLogCtrlDriverTardy, // 30 0x05100078 - controlling driver didn't acknowledge + kPMLogIntDriverTardy, // 31 0x0510007c - interested driver didn't acknowledge + kPMLogStartAckTimer, // 32 0x05100080 + kPMLogStartParentChange, // 33 0x05100084 + kPMLogAmendParentChange, // 34 0x05100088 + kPMLogStartDeviceChange, // 35 0x0510008c + kPMLogRequestDenied, // 36 0x05100090 - parent denied domain state change request + kPMLogControllingDriverErr5, // 37 0x05100094 - too few power states + kPMLogProgramHardware, // 38 0x05100098 + kPMLogInformDriverPreChange, // 39 0x0510009c + kPMLogInformDriverPostChange, // 40 0x051000a0 + kPMLogRemoveDriver, // 41 0x051000a4 - NOT USED + kPMLogSetIdleTimerPeriod, // 42 0x051000a8 + kPMLogSystemWake, // 43 0x051000ac + kPMLogAcknowledgeErr5, // 44 0x051000b0 + kPMLogClientAcknowledge, // 45 0x051000b4 + kPMLogClientTardy, // 46 0x051000b8 - application didn't acknowledge + kPMLogClientCancel, // 47 0x051000bc - NOT USED + kPMLogClientNotify, // 48 0x051000c0 - client sent a notification + kPMLogAppNotify, // 49 0x051000c4 - application sent a notification + kPMLogSetClockGating, // 50 0x051000c8 - NOT USED + kPMLogSetPowerGating, // 51 0x051000cc - NOT USED + kPMLogSetPinGroup, // 52 0x051000d0 - NOT USED + kPMLogIdleCancel, // 53 0x051000d4 - device unidle during change kIOPMlogLastEvent }; - -// Deprecated Power Management Logging Constants -#define PMlogSetParent kPMLogSetParent -#define PMlogAddChild kPMLogAddChild -#define PMlogRemoveChild kPMLogRemoveChild -#define PMlogControllingDriver kPMLogControllingDriver -#define PMlogControllingDriverErr1 kPMLogControllingDriverErr1 -#define PMlogControllingDriverErr2 kPMLogControllingDriverErr2 -#define PMlogControllingDriverErr3 kPMLogControllingDriverErr3 -#define PMlogControllingDriverErr4 kPMLogControllingDriverErr4 -#define PMlogInterestedDriver kPMLogInterestedDriver -#define PMlogAcknowledgeErr1 kPMLogAcknowledgeErr1 -#define PMlogChildAcknowledge kPMLogChildAcknowledge -#define PMlogDriverAcknowledge kPMLogDriverAcknowledge -#define PMlogAcknowledgeErr2 kPMLogAcknowledgeErr2 -#define PMlogAcknowledgeErr3 kPMLogAcknowledgeErr3 -#define PMlogAcknowledgeErr4 kPMLogAcknowledgeErr4 -#define PMlogDriverAcknowledgeSet kPMLogDriverAcknowledgeSet -#define PMlogWillChange kPMLogWillChange -#define PMlogDidChange kPMLogDidChange -#define PMlogRequestDomain kPMLogRequestDomain -#define PMlogMakeUsable kPMLogMakeUsable -#define PMlogChangeStateTo kPMLogChangeStateTo -#define PMlogChangeStateToPriv kPMLogChangeStateToPriv -#define PMlogSetAggressiveness kPMLogSetAggressiveness -#define PMlogCriticalTemp kPMLogCriticalTemp -#define PMlogOverrideOn kPMLogOverrideOn -#define PMlogOverrideOff kPMLogOverrideOff -#define PMlogEnqueueErr kPMLogEnqueueErr -#define PMlogCollapseQueue kPMLogCollapseQueue -#define PMlogChangeDone kPMLogChangeDone -#define PMlogCtrlDriverTardy kPMLogCtrlDriverTardy -#define PMlogIntDriverTardy kPMLogIntDriverTardy -#define PMlogStartAckTimer kPMLogStartAckTimer -#define PMlogStartParentChange kPMLogStartParentChange -#define PMlogAmendParentChange kPMLogAmendParentChange -#define PMlogStartDeviceChange kPMLogStartDeviceChange -#define PMlogRequestDenied kPMLogRequestDenied -#define PMlogControllingDriverErr5 kPMLogControllingDriverErr5 -#define PMlogProgramHardware kPMLogProgramHardware -#define PMlogInformDriverPreChange kPMLogInformDriverPreChange -#define PMlogInformDriverPostChange kPMLogInformDriverPostChange -#define PMlogRemoveDriver kPMLogRemoveDriver -#define PMsetIdleTimerPeriod kPMLogSetIdleTimerPeriod -#define PMlogSystemWake kPMLogSystemWake -#define PMlogAcknowledgeErr5 kPMLogAcknowledgeErr5 -#define PMlogClientAcknowledge kPMLogClientAcknowledge -#define PMlogClientTardy kPMLogClientTardy -#define PMlogClientCancel kPMLogClientCancel - diff --git a/iokit/IOKit/pwr_mgt/Makefile b/iokit/IOKit/pwr_mgt/Makefile index b1b7a39b0..14165762a 100644 --- a/iokit/IOKit/pwr_mgt/Makefile +++ b/iokit/IOKit/pwr_mgt/Makefile @@ -20,11 +20,13 @@ NOT_EXPORT_HEADERS = \ INSTINC_SUBDIRS = INSTINC_SUBDIRS_PPC = INSTINC_SUBDIRS_I386 = +INSTINC_SUBDIRS_X86_64 = INSTINC_SUBDIRS_ARM = EXPINC_SUBDIRS = ${INSTINC_SUBDIRS} EXPINC_SUBDIRS_PPC = ${INSTINC_SUBDIRS_PPC} EXPINC_SUBDIRS_I386 = ${INSTINC_SUBDIRS_I386} +EXPINC_SUBDIRS_X86_64 = ${INSTINC_SUBDIRS_X86_64} EXPINC_SUBDIRS_ARM = ${INSTINC_SUBDIRS_ARM} ALL_HEADERS = $(shell (cd $(SOURCE); echo *.h)) @@ -36,7 +38,7 @@ INSTALL_MI_DIR = $(MI_DIR) EXPORT_MI_LIST = $(filter-out $(NOT_EXPORT_HEADERS), $(ALL_HEADERS)) EXPORT_MI_DIR = IOKit/$(MI_DIR) -INSTALL_KF_MI_LCL_LIST = $(EXPORT_MI_LIST) IOPMPrivate.h IOPMPagingPlexus.h +INSTALL_KF_MI_LCL_LIST = $(EXPORT_MI_LIST) IOPMPrivate.h include $(MakeInc_rule) include $(MakeInc_dir) diff --git a/iokit/IOKit/pwr_mgt/RootDomain.h b/iokit/IOKit/pwr_mgt/RootDomain.h index c528b8c3e..0c2629376 100644 --- a/iokit/IOKit/pwr_mgt/RootDomain.h +++ b/iokit/IOKit/pwr_mgt/RootDomain.h @@ -30,25 +30,37 @@ #include #include +#include "IOKit/pwr_mgt/IOPMPrivate.h" + +#ifdef XNU_KERNEL_PRIVATE +#if defined(__i386__) || defined(__x86_64__) +#define ROOT_DOMAIN_RUN_STATES 1 +#endif +struct AggressivesRecord; +#endif class IOPMPowerStateQueue; class RootDomainUserClient; +class PMTraceWorker; +/* + * Flags for get/setSleepSupported() + */ enum { kRootDomainSleepNotSupported = 0x00000000, kRootDomainSleepSupported = 0x00000001, kFrameBufferDeepSleepSupported = 0x00000002, - kPCICantSleep = 0x00000004 + kPCICantSleep = 0x00000004 }; - - /* *IOPMrootDomain registry property keys */ #define kRootDomainSupportedFeatures "Supported Features" #define kRootDomainSleepReasonKey "Last Sleep Reason" #define kRootDomainSleepOptionsKey "Last Sleep Options" +#define kIOPMRootDomainWakeReasonKey "Wake Reason" +#define kIOPMRootDomainWakeTypeKey "Wake Type" #define kIOPMRootDomainPowerStatusKey "Power Status" /* @@ -61,6 +73,7 @@ enum { #define kIOPMIdleSleepKey "Idle Sleep" #define kIOPMLowPowerSleepKey "Low Power Sleep" #define kIOPMThermalEmergencySleepKey "Thermal Emergency Sleep" +#define kIOPMMaintenanceSleepKey "Maintenance Sleep" /* * String constants for communication with PM CPU @@ -68,56 +81,49 @@ enum { #define kIOPMRootDomainLidCloseCString "LidClose" #define kIOPMRootDomainBatPowerCString "BatPower" -// Supported Feature bitfields for IOPMrootDomain::publishFeature() +/* + * Supported Feature bitfields for IOPMrootDomain::publishFeature() + */ enum { - kIOPMSupportedOnAC = 1<<0, - kIOPMSupportedOnBatt = 1<<1, - kIOPMSupportedOnUPS = 1<<2 + kIOPMSupportedOnAC = (1<<0), + kIOPMSupportedOnBatt = (1<<1), + kIOPMSupportedOnUPS = (1<<2) }; -typedef IOReturn (*IOPMSettingControllerCallback) \ - (OSObject *target, const OSSymbol *type, \ +typedef IOReturn (*IOPMSettingControllerCallback) + (OSObject *target, const OSSymbol *type, OSObject *val, uintptr_t refcon); -extern "C" -{ - IONotifier * registerSleepWakeInterest( - IOServiceInterestHandler, void *, void * = 0); +__BEGIN_DECLS +IONotifier * registerSleepWakeInterest( + IOServiceInterestHandler, void *, void * = 0); - IONotifier * registerPrioritySleepWakeInterest( - IOServiceInterestHandler handler, - void * self, void * ref = 0); - - IOReturn acknowledgeSleepWakeNotification(void * ); +IONotifier * registerPrioritySleepWakeInterest( + IOServiceInterestHandler handler, + void * self, void * ref = 0); - IOReturn vetoSleepWakeNotification(void * PMrefcon); +IOReturn acknowledgeSleepWakeNotification(void * ); - IOReturn rootDomainRestart ( void ); - - IOReturn rootDomainShutdown ( void ); -} +IOReturn vetoSleepWakeNotification(void * PMrefcon); +__END_DECLS #define IOPM_ROOTDOMAIN_REV 2 class IOPMrootDomain: public IOService { -OSDeclareDefaultStructors(IOPMrootDomain) - -public: - - class IOService * wrangler; // we tickle the wrangler on button presses, etc + OSDeclareFinalStructors(IOPMrootDomain) +public: static IOPMrootDomain * construct( void ); - virtual bool start( IOService * provider ); - virtual IOReturn setAggressiveness ( unsigned long, unsigned long ); - virtual IOReturn youAreRoot ( void ); - virtual IOReturn sleepSystem ( void ); - IOReturn sleepSystemOptions ( OSDictionary *options ); + virtual bool start( IOService * provider ); + virtual IOReturn setAggressiveness( unsigned long, unsigned long ); + virtual IOReturn getAggressiveness( unsigned long, unsigned long * ); - virtual IOReturn setProperties ( OSObject * ); - IOReturn shutdownSystem ( void ); - IOReturn restartSystem ( void ); + virtual IOReturn sleepSystem( void ); + IOReturn sleepSystemOptions( OSDictionary *options ); + + virtual IOReturn setProperties( OSObject * ); /*! @function systemPowerEventOccurred @abstract Other drivers may inform IOPMrootDomain of system PM events @@ -129,50 +135,50 @@ OSDeclareDefaultStructors(IOPMrootDomain) to interested parties. Pass false if you're calling systemPowerEventOccurred several times in succession; and pass true only on the last invocatino. @result kIOReturnSuccess on success */ - IOReturn systemPowerEventOccurred(const OSSymbol *event, - uint32_t intValue); - IOReturn systemPowerEventOccurred(const OSSymbol *event, - OSObject *value); - - virtual IOReturn receivePowerNotification (UInt32 msg); - virtual void setSleepSupported( IOOptionBits flags ); - virtual IOOptionBits getSleepSupported(); - virtual IOReturn requestPowerDomainState ( IOPMPowerFlags, IOPowerConnection *, unsigned long ); - virtual void handleSleepTimerExpiration ( void ); - void stopIgnoringClamshellEventsDuringWakeup ( void ); - void wakeFromDoze( void ); - void broadcast_it (unsigned long, unsigned long ); + + IOReturn systemPowerEventOccurred( + const OSSymbol *event, + uint32_t intValue ); + + IOReturn systemPowerEventOccurred( + const OSSymbol *event, + OSObject *value ); + + virtual IOReturn receivePowerNotification( UInt32 msg ); + + virtual void setSleepSupported( IOOptionBits flags ); + + virtual IOOptionBits getSleepSupported( void ); + + void wakeFromDoze( void ); // KEXT driver announces support of power management feature - void publishFeature( const char *feature ); + + void publishFeature( const char *feature ); // KEXT driver announces support of power management feature // And specifies power sources with kIOPMSupportedOn{AC/Batt/UPS} bitfield. // Returns a unique uint32_t identifier for later removing support for this // feature. // NULL is acceptable for uniqueFeatureID for kexts without plans to unload. - void publishFeature( const char *feature, - uint32_t supportedWhere, - uint32_t *uniqueFeatureID); + + void publishFeature( const char *feature, + uint32_t supportedWhere, + uint32_t *uniqueFeatureID); // KEXT driver announces removal of a previously published power management // feature. Pass 'uniqueFeatureID' returned from publishFeature() - IOReturn removePublishedFeature( uint32_t removeFeatureID ); - - void unIdleDevice( IOService *, unsigned long ); - void announcePowerSourceChange( void ); - // Override of these methods for logging purposes. - virtual IOReturn changePowerStateTo ( unsigned long ordinal ); - virtual IOReturn changePowerStateToPriv ( unsigned long ordinal ); + IOReturn removePublishedFeature( uint32_t removeFeatureID ); /*! @function copyPMSetting - @abstract Copy the current value for a PM setting. Returns OSNumber or + @abstract Copy the current value for a PM setting. Returns an OSNumber or OSData depending on the setting. - @param whichSetting Name of the desired setting. - @result OSObject *value if valid, NULL otherwise. */ - OSObject *copyPMSetting(OSSymbol *whichSetting); - + @param whichSetting Name of the desired setting. + @result OSObject value if valid, NULL otherwise. */ + + OSObject * copyPMSetting( OSSymbol *whichSetting ); + /*! @function registerPMSettingController @abstract Register for callbacks on changes to certain PM settings. @param settings NULL terminated array of C strings, each string for a PM @@ -184,7 +190,8 @@ OSDeclareDefaultStructors(IOPMrootDomain) handle will have a retain count of 1 on return. To deregister, pass to unregisterPMSettingController() @result kIOReturnSuccess on success. */ - IOReturn registerPMSettingController( + + IOReturn registerPMSettingController( const OSSymbol *settings[], IOPMSettingControllerCallback callout, OSObject *target, @@ -204,7 +211,8 @@ OSDeclareDefaultStructors(IOPMrootDomain) handle will have a retain count of 1 on return. To deregister, pass to unregisterPMSettingController() @result kIOReturnSuccess on success. */ - IOReturn registerPMSettingController( + + IOReturn registerPMSettingController( const OSSymbol *settings[], uint32_t supportedPowerSources, IOPMSettingControllerCallback callout, @@ -212,50 +220,182 @@ OSDeclareDefaultStructors(IOPMrootDomain) uintptr_t refcon, OSObject **handle); // out param -/*! @function acknowledgeSystemWillShutdown - @abstract Handle callbacks from IOService::systemWillShutdown(). - @param The IOService sender of the callback. */ + virtual IONotifier * registerInterest( + const OSSymbol * typeOfInterest, + IOServiceInterestHandler handler, + void * target, void * ref = 0 ); + + void pmStatsRecordEvent( + int eventIndex, + AbsoluteTime timestamp); + + void pmStatsRecordApplicationResponse( + const OSSymbol *response, + const char *name, + int messageType, + uint32_t delay_ms, + int app_pid); + + virtual IOReturn callPlatformFunction( + const OSSymbol *functionName, + bool waitForFunction, + void *param1, void *param2, + void *param3, void *param4 ); + +private: + virtual IOReturn changePowerStateTo( unsigned long ordinal ); + virtual IOReturn changePowerStateToPriv( unsigned long ordinal ); + virtual IOReturn requestPowerDomainState( IOPMPowerFlags, IOPowerConnection *, unsigned long ); + virtual void powerChangeDone( unsigned long ); + virtual bool tellChangeDown( unsigned long ); + virtual bool askChangeDown( unsigned long ); + virtual void tellChangeUp( unsigned long ); + virtual void tellNoChangeDown( unsigned long ); +#ifdef XNU_KERNEL_PRIVATE + /* Root Domain internals */ +public: + +#if ROOT_DOMAIN_RUN_STATES + void tagPowerPlaneService( + IOService * service, + uint32_t * rdFlags ); + + void handleActivityTickleForService( + IOService * service ); + + void handlePowerChangeStartForService( + IOService * service, + uint32_t * rootDomainFlags, + uint32_t newPowerState, + uint32_t changeFlags ); + + void handlePowerChangeDoneForService( + IOService * service, + uint32_t * rootDomainFlags, + uint32_t newPowerState, + uint32_t changeFlags ); + + void overridePowerStateForService( + IOService * service, + uint32_t * rdFlags, + unsigned long * powerState, + uint32_t changeFlags ); + + IOReturn setMaintenanceWakeCalendar( + const IOPMCalendarStruct * calendar ); +#endif /* ROOT_DOMAIN_RUN_STATES */ + + // Handle callbacks from IOService::systemWillShutdown() void acknowledgeSystemWillShutdown( IOService * from ); -/*! @function handlePlatformHaltRestart - @abstract Handle platform halt and restart notifications. - @param kPEHaltCPU or kPERestartCPU. */ + // Handle platform halt and restart notifications void handlePlatformHaltRestart( UInt32 pe_type ); + IOReturn shutdownSystem( void ); + IOReturn restartSystem( void ); + void handleSleepTimerExpiration( void ); + void handleForcedSleepTimerExpiration( void ); + void stopIgnoringClamshellEventsDuringWakeup( void ); + + IOReturn joinAggressiveness( IOService * service ); + void handleAggressivesRequests( void ); + + void tracePoint( uint8_t point ); + private: + friend class PMSettingObject; // Points to our parent - class IORootParent * patriarch; + IOService * wrangler; + class IORootParent * patriarch; - // Pref: idle time before idle sleep - long sleepSlider; - long idleSeconds; - uint64_t autoWakeStart; - uint64_t autoWakeEnd; + IOLock *featuresDictLock; // guards supportedFeatures + IOPMPowerStateQueue *pmPowerStateQueue; + + OSArray *allowedPMSettings; + PMTraceWorker *pmTracer; - // Pref: longest of other idle times (disk and display) - long longestNonSleepSlider; + // Settings controller info + IORecursiveLock *settingsCtrlLock; + OSDictionary *settingsCallbacks; + OSDictionary *fPMSettingsDict; + + IONotifier *_batteryPublishNotifier; + IONotifier *_displayWranglerNotifier; + + // Statistics + const OSSymbol *_statsNameKey; + const OSSymbol *_statsPIDKey; + const OSSymbol *_statsTimeMSKey; + const OSSymbol *_statsResponseTypeKey; + const OSSymbol *_statsMessageTypeKey; + + OSString *queuedSleepWakeUUIDString; + + OSArray *pmStatsAppResponses; + + PMStatsStruct pmStats; + + // Pref: idle time before idle sleep + unsigned long sleepSlider; + unsigned long idleSeconds; + uint64_t autoWakeStart; + uint64_t autoWakeEnd; // Difference between sleepSlider and longestNonSleepSlider - long extraSleepDelay; + unsigned long extraSleepDelay; // Used to wait between say display idle and system idle - thread_call_t extraSleepTimer; + thread_call_t extraSleepTimer; // Used to ignore clamshell close events while we're waking from sleep - thread_call_t clamshellWakeupIgnore; + thread_call_t clamshellWakeupIgnore; + + thread_call_t diskSyncCalloutEntry; + + uint32_t runStateIndex; + uint32_t runStateFlags; + uint32_t nextRunStateIndex; + uint32_t wranglerTickled; + + unsigned int systemBooting :1; + unsigned int systemShutdown :1; + unsigned int clamshellExists :1; + unsigned int clamshellIsClosed :1; + unsigned int ignoringClamshell :1; + unsigned int ignoringClamshellOnWake :1; + unsigned int desktopMode :1; + unsigned int acAdaptorConnected :1; + + unsigned int allowSleep :1; + unsigned int sleepIsSupported :1; + unsigned int canSleep :1; + unsigned int sleepASAP :1; + unsigned int idleSleepTimerPending :1; + unsigned int userDisabledAllSleep :1; + unsigned int ignoreChangeDown :1; + unsigned int wranglerAsleep :1; + + // Info for communicating system state changes to PMCPU + int32_t idxPMCPUClamshell; + int32_t idxPMCPULimitedPower; + + IOOptionBits platformSleepSupport; + + queue_head_t aggressivesQueue; + thread_call_t aggressivesThreadCall; + OSData * aggressivesData; + + AbsoluteTime wranglerSleepTime; + // PCI top-level PM trace + IOService * pciHostBridgeDevice; + // IOPMrootDomain internal sleep call - IOReturn privateSleepSystem ( const char *sleepReason ); + IOReturn privateSleepSystem( const char *sleepReason ); + void announcePowerSourceChange( void ); - - virtual void powerChangeDone ( unsigned long ); - virtual void command_received ( void *, void * , void * , void *); - virtual bool tellChangeDown ( unsigned long stateNum); - virtual bool askChangeDown ( unsigned long stateNum); - virtual void tellChangeUp ( unsigned long ); - virtual void tellNoChangeDown ( unsigned long ); - void reportUserInput ( void ); + void reportUserInput( void ); static IOReturn sysPowerDownHandler( void * target, void * refCon, UInt32 messageType, IOService * service, void * messageArgument, vm_size_t argSize ); @@ -270,79 +410,65 @@ OSDeclareDefaultStructors(IOPMrootDomain) static bool batteryPublished( void * target, void * refCon, IOService * resourceService ); - void adjustPowerState ( void ); - void setQuickSpinDownTimeout ( void ); - void restoreUserSpinDownTimeout ( void ); + void adjustPowerState( void ); + void setQuickSpinDownTimeout( void ); + void restoreUserSpinDownTimeout( void ); - bool shouldSleepOnClamshellClosed (void ); - void sendClientClamshellNotification ( void ); + bool shouldSleepOnClamshellClosed(void ); + void sendClientClamshellNotification( void ); // Inform PMCPU of changes to state like lid, AC vs. battery void informCPUStateChange( uint32_t type, uint32_t value ); - - IOLock *featuresDictLock; // guards supportedFeatures - IOPMPowerStateQueue *pmPowerStateQueue; - unsigned int user_spindown; // User's selected disk spindown value - - unsigned int systemBooting:1; - unsigned int systemShutdown:1; - unsigned int ignoringClamshell:1; - unsigned int allowSleep:1; - unsigned int sleepIsSupported:1; - unsigned int canSleep:1; - unsigned int idleSleepPending:1; - unsigned int sleepASAP:1; - unsigned int desktopMode:1; - unsigned int userDisabledAllSleep:1; - - unsigned int acAdaptorConnect:1; - unsigned int ignoringClamshellDuringWakeup:1; - unsigned int clamshellIsClosed:1; - unsigned int clamshellExists:1; - - OSArray *allowedPMSettings; - - // Settings controller info - IORecursiveLock *settingsCtrlLock; - OSDictionary *settingsCallbacks; - OSDictionary *fPMSettingsDict; + + void dispatchPowerEvent( uint32_t event, void * arg0, void * arg1 ); + void handlePowerNotification( UInt32 msg ); + IOReturn setPMSetting(const OSSymbol *, OSObject *); - thread_call_t diskSyncCalloutEntry; - IONotifier *_batteryPublishNotifier; - IONotifier *_displayWranglerNotifier; + void startIdleSleepTimer( uint32_t inSeconds ); + void cancelIdleSleepTimer( void ); - // Info for communicating system state changes to PMCPU - int32_t idxPMCPUClamshell; - int32_t idxPMCPULimitedPower; + void updateRunState( uint32_t inRunState ); + + IOReturn setAggressiveness( + unsigned long type, + unsigned long value, + IOOptionBits options ); + + void synchronizeAggressives( + queue_head_t * services, + const AggressivesRecord * array, + int count ); + + void broadcastAggressives( + const AggressivesRecord * array, + int count ); + + void aggressivenessChanged( void ); - struct ExpansionData { - }; - ExpansionData *_reserved; - IOOptionBits platformSleepSupport; - friend class PMSettingObject; + void publishSleepWakeUUID( bool shouldPublish ); + +#endif /* XNU_KERNEL_PRIVATE */ }; +#ifdef XNU_KERNEL_PRIVATE class IORootParent: public IOService { -OSDeclareDefaultStructors(IORootParent) + OSDeclareFinalStructors(IORootParent) private: unsigned long mostRecentChange; - -public: - - virtual IOReturn changePowerStateToPriv ( unsigned long ordinal ); - bool start ( IOService * nub ); - void shutDownSystem ( void ); - void restartSystem ( void ); - void sleepSystem ( void ); - void dozeSystem ( void ); - void sleepToDoze ( void ); - void wakeSystem ( void ); +public: + bool start( IOService * nub ); + void shutDownSystem( void ); + void restartSystem( void ); + void sleepSystem( void ); + void dozeSystem( void ); + void sleepToDoze( void ); + void wakeSystem( void ); }; +#endif /* XNU_KERNEL_PRIVATE */ - -#endif /* _IOKIT_ROOTDOMAIN_H */ +#endif /* _IOKIT_ROOTDOMAIN_H */ diff --git a/iokit/IOKit/rtc/Makefile b/iokit/IOKit/rtc/Makefile index 0095f4b1a..ace4cfb12 100644 --- a/iokit/IOKit/rtc/Makefile +++ b/iokit/IOKit/rtc/Makefile @@ -16,11 +16,13 @@ NOT_EXPORT_HEADERS = INSTINC_SUBDIRS = INSTINC_SUBDIRS_PPC = INSTINC_SUBDIRS_I386 = +INSTINC_SUBDIRS_X86_64 = INSTINC_SUBDIRS_ARM = EXPINC_SUBDIRS = ${INSTINC_SUBDIRS} EXPINC_SUBDIRS_PPC = ${INSTINC_SUBDIRS_PPC} EXPINC_SUBDIRS_I386 = ${INSTINC_SUBDIRS_I386} +EXPINC_SUBDIRS_X86_64 = ${INSTINC_SUBDIRS_X86_64} EXPINC_SUBDIRS_ARM = ${INSTINC_SUBDIRS_ARM} ALL_HEADERS = $(shell (cd $(SOURCE); echo *.h)) diff --git a/iokit/IOKit/system_management/Makefile b/iokit/IOKit/system_management/Makefile index a9ad1604c..1f168421f 100644 --- a/iokit/IOKit/system_management/Makefile +++ b/iokit/IOKit/system_management/Makefile @@ -16,11 +16,13 @@ NOT_EXPORT_HEADERS = INSTINC_SUBDIRS = INSTINC_SUBDIRS_PPC = INSTINC_SUBDIRS_I386 = +INSTINC_SUBDIRS_X86_64 = INSTINC_SUBDIRS_ARM = EXPINC_SUBDIRS = ${INSTINC_SUBDIRS} EXPINC_SUBDIRS_PPC = ${INSTINC_SUBDIRS_PPC} EXPINC_SUBDIRS_I386 = ${INSTINC_SUBDIRS_I386} +EXPINC_SUBDIRS_X86_64 = ${INSTINC_SUBDIRS_X86_64} EXPINC_SUBDIRS_ARM = ${INSTINC_SUBDIRS_ARM} ALL_HEADERS = $(shell (cd $(SOURCE); echo *.h)) diff --git a/iokit/Kernel/IOBufferMemoryDescriptor.cpp b/iokit/Kernel/IOBufferMemoryDescriptor.cpp index 8358a9537..bca55b535 100644 --- a/iokit/Kernel/IOBufferMemoryDescriptor.cpp +++ b/iokit/Kernel/IOBufferMemoryDescriptor.cpp @@ -25,6 +25,9 @@ * * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ */ + +#define _IOMEMORYDESCRIPTOR_INTERNAL_ + #include #include @@ -40,11 +43,15 @@ __BEGIN_DECLS void ipc_port_release_send(ipc_port_t port); #include -vm_map_t IOPageableMapForAddress( vm_address_t address ); __END_DECLS /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ +enum +{ + kInternalFlagRealloc = 0x00000001, +}; + volatile ppnum_t gIOHighestAllocatedPage; /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ @@ -53,50 +60,9 @@ volatile ppnum_t gIOHighestAllocatedPage; OSDefineMetaClassAndStructors(IOBufferMemoryDescriptor, IOGeneralMemoryDescriptor); -bool IOBufferMemoryDescriptor::initWithAddress( - void * /* address */ , - IOByteCount /* withLength */ , - IODirection /* withDirection */ ) -{ - return false; -} - -bool IOBufferMemoryDescriptor::initWithAddress( - vm_address_t /* address */ , - IOByteCount /* withLength */ , - IODirection /* withDirection */ , - task_t /* withTask */ ) -{ - return false; -} - -bool IOBufferMemoryDescriptor::initWithPhysicalAddress( - IOPhysicalAddress /* address */ , - IOByteCount /* withLength */ , - IODirection /* withDirection */ ) -{ - return false; -} - -bool IOBufferMemoryDescriptor::initWithPhysicalRanges( - IOPhysicalRange * /* ranges */ , - UInt32 /* withCount */ , - IODirection /* withDirection */ , - bool /* asReference */ ) -{ - return false; -} - -bool IOBufferMemoryDescriptor::initWithRanges( - IOVirtualRange * /* ranges */ , - UInt32 /* withCount */ , - IODirection /* withDirection */ , - task_t /* withTask */ , - bool /* asReference */ ) -{ - return false; -} +/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ +#ifndef __LP64__ bool IOBufferMemoryDescriptor::initWithOptions( IOOptionBits options, vm_size_t capacity, @@ -106,6 +72,7 @@ bool IOBufferMemoryDescriptor::initWithOptions( mach_vm_address_t physicalMask = 0; return (initWithPhysicalMask(inTask, options, capacity, alignment, physicalMask)); } +#endif /* !__LP64__ */ bool IOBufferMemoryDescriptor::initWithPhysicalMask( task_t inTask, @@ -118,34 +85,47 @@ bool IOBufferMemoryDescriptor::initWithPhysicalMask( task_t mapTask = NULL; vm_map_t vmmap = NULL; addr64_t lastIOAddr; - IOAddressRange range; - IOOptionBits iomdOptions = kIOMemoryTypeVirtual64; + mach_vm_address_t highestMask = 0; + bool usePhys; + IOOptionBits iomdOptions = kIOMemoryTypeVirtual64 | kIOMemoryAsReference; if (!capacity) return false; - _options = options; - _capacity = capacity; - _physAddrs = 0; - _physSegCount = 0; - _buffer = 0; - range.address = 0; - range.length = 0; - _ranges.v64 = ⦥ + _options = options; + _capacity = capacity; + _internalFlags = 0; + _internalReserved = 0; + _buffer = 0; + + _ranges.v64 = IONew(IOAddressRange, 1); + if (!_ranges.v64) + return (false); + _ranges.v64->address = 0; + _ranges.v64->length = 0; // Grab IOMD bits from the Buffer MD options iomdOptions |= (options & kIOBufferDescriptorMemoryFlags); + if (physicalMask && (alignment <= 1)) + { + alignment = ((physicalMask ^ (-1ULL)) & (physicalMask - 1)); + highestMask = (physicalMask | alignment); + alignment++; + } + if ((options & (kIOMemorySharingTypeMask | kIOMapCacheMask)) && (alignment < page_size)) alignment = page_size; - if (physicalMask && (alignment <= 1)) - alignment = ((physicalMask ^ PAGE_MASK) & PAGE_MASK) + 1; + if (alignment >= page_size) + capacity = round_page(capacity); + + if (alignment > page_size) + options |= kIOMemoryPhysicallyContiguous; _alignment = alignment; - if (((inTask != kernel_task) && !(options & kIOMemoryPageable)) || - (physicalMask && (options & kIOMapCacheMask))) + if ((inTask != kernel_task) && !(options & kIOMemoryPageable)) return false; if ((options & kIOMemoryPhysicallyContiguous) && !physicalMask) @@ -201,13 +181,21 @@ bool IOBufferMemoryDescriptor::initWithPhysicalMask( else lastIOAddr = ptoa_64(gIOHighestAllocatedPage); - if (physicalMask && (lastIOAddr != (lastIOAddr & physicalMask))) + usePhys = (highestMask && (lastIOAddr != (lastIOAddr & highestMask)) + && (alignment <= page_size)); + + if (!usePhys && (options & kIOMemoryPhysicallyContiguous)) + { + _buffer = (void *) IOKernelAllocateContiguous(capacity, highestMask, alignment); + usePhys = (NULL == _buffer); + } + if (usePhys) { mach_vm_address_t address; iomdOptions &= ~kIOMemoryTypeVirtual64; iomdOptions |= kIOMemoryTypePhysical64; - address = IOMallocPhysical(capacity, physicalMask); + address = IOMallocPhysical(capacity, highestMask); _buffer = (void *) address; if (!_buffer) return false; @@ -225,11 +213,17 @@ bool IOBufferMemoryDescriptor::initWithPhysicalMask( /* Allocate a wired-down buffer inside kernel space. */ if (options & kIOMemoryPhysicallyContiguous) - _buffer = (void *) IOKernelAllocateContiguous(capacity, alignment); + { + // attempted allocate already + } else if (alignment > 1) + { _buffer = IOMallocAligned(capacity, alignment); + } else + { _buffer = IOMalloc(capacity); + } if (!_buffer) return false; } @@ -238,14 +232,14 @@ bool IOBufferMemoryDescriptor::initWithPhysicalMask( if( (kIOMemoryTypePhysical64 != (kIOMemoryTypeMask & iomdOptions)) && (options & (kIOMemoryPageable | kIOMapCacheMask))) { ipc_port_t sharedMem; - vm_size_t size = round_page_32(capacity); + vm_size_t size = round_page(capacity); kr = mach_make_memory_entry(vmmap, &size, (vm_offset_t)_buffer, memEntryCacheMode, &sharedMem, NULL ); - if( (KERN_SUCCESS == kr) && (size != round_page_32(capacity))) { + if( (KERN_SUCCESS == kr) && (size != round_page(capacity))) { ipc_port_release_send( sharedMem ); kr = kIOReturnVMError; } @@ -276,14 +270,14 @@ bool IOBufferMemoryDescriptor::initWithPhysicalMask( } } - range.address = (mach_vm_address_t) _buffer; - range.length = capacity; + _ranges.v64->address = (mach_vm_address_t) _buffer;; + _ranges.v64->length = _capacity; - if (!super::initWithOptions(&range, 1, 0, + if (!super::initWithOptions(_ranges.v64, 1, 0, inTask, iomdOptions, /* System mapper */ 0)) return false; - if (physicalMask && !IOMapper::gSystem) + if (highestMask && !IOMapper::gSystem) { IOMDDMACharacteristics mdSummary; @@ -308,12 +302,12 @@ bool IOBufferMemoryDescriptor::initWithPhysicalMask( else lastIOAddr = ptoa_64(gIOLastPage); - if (lastIOAddr != (lastIOAddr & physicalMask)) + if (lastIOAddr != (lastIOAddr & highestMask)) { if (kIOMemoryTypePhysical64 != (_flags & kIOMemoryTypeMask)) { // flag a retry - _physSegCount = 1; + _internalFlags |= kInternalFlagRealloc; } return false; } @@ -326,20 +320,23 @@ bool IOBufferMemoryDescriptor::initWithPhysicalMask( if( !reserved) return( false ); } - reserved->map = map(mapTask, 0, kIOMapAnywhere, 0, 0); + reserved->map = createMappingInTask(mapTask, 0, + kIOMapAnywhere | (options & kIOMapCacheMask), 0, 0); if (!reserved->map) { _buffer = 0; return( false ); } release(); // map took a retain on this + reserved->map->retain(); + removeMapping(reserved->map); mach_vm_address_t buffer = reserved->map->getAddress(); _buffer = (void *) buffer; if (kIOMemoryTypeVirtual64 == (kIOMemoryTypeMask & iomdOptions)) _ranges.v64->address = buffer; } - setLength(capacity); + setLength(_capacity); return true; } @@ -352,14 +349,14 @@ IOBufferMemoryDescriptor * IOBufferMemoryDescriptor::inTaskWithOptions( { IOBufferMemoryDescriptor *me = new IOBufferMemoryDescriptor; - if (me && !me->initWithOptions(options, capacity, alignment, inTask)) { - bool retry = me->_physSegCount; + if (me && !me->initWithPhysicalMask(inTask, options, capacity, alignment, 0)) { + bool retry = (0 != (kInternalFlagRealloc & me->_internalFlags)); me->release(); me = 0; if (retry) { me = new IOBufferMemoryDescriptor; - if (me && !me->initWithOptions(options, capacity, alignment, inTask)) + if (me && !me->initWithPhysicalMask(inTask, options, capacity, alignment, 0)) { me->release(); me = 0; @@ -379,7 +376,7 @@ IOBufferMemoryDescriptor * IOBufferMemoryDescriptor::inTaskWithPhysicalMask( if (me && !me->initWithPhysicalMask(inTask, options, capacity, 1, physicalMask)) { - bool retry = me->_physSegCount; + bool retry = (0 != (kInternalFlagRealloc & me->_internalFlags)); me->release(); me = 0; if (retry) @@ -395,20 +392,38 @@ IOBufferMemoryDescriptor * IOBufferMemoryDescriptor::inTaskWithPhysicalMask( return me; } +#ifndef __LP64__ bool IOBufferMemoryDescriptor::initWithOptions( IOOptionBits options, vm_size_t capacity, vm_offset_t alignment) { - return( initWithOptions(options, capacity, alignment, kernel_task) ); + return (initWithPhysicalMask(kernel_task, options, capacity, alignment, (mach_vm_address_t)0)); } +#endif /* !__LP64__ */ IOBufferMemoryDescriptor * IOBufferMemoryDescriptor::withOptions( IOOptionBits options, vm_size_t capacity, vm_offset_t alignment) { - return(IOBufferMemoryDescriptor::inTaskWithOptions(kernel_task, options, capacity, alignment)); + IOBufferMemoryDescriptor *me = new IOBufferMemoryDescriptor; + + if (me && !me->initWithPhysicalMask(kernel_task, options, capacity, alignment, 0)) { + bool retry = (0 != (kInternalFlagRealloc & me->_internalFlags)); + me->release(); + me = 0; + if (retry) + { + me = new IOBufferMemoryDescriptor; + if (me && !me->initWithPhysicalMask(kernel_task, options, capacity, alignment, 0)) + { + me->release(); + me = 0; + } + } + } + return me; } @@ -429,6 +444,7 @@ IOBufferMemoryDescriptor::withCapacity(vm_size_t inCapacity, inCapacity, inContiguous ? inCapacity : 1 )); } +#ifndef __LP64__ /* * initWithBytes: * @@ -440,10 +456,9 @@ bool IOBufferMemoryDescriptor::initWithBytes(const void * inBytes, IODirection inDirection, bool inContiguous) { - if (!initWithOptions( - inDirection | kIOMemoryUnshared - | (inContiguous ? kIOMemoryPhysicallyContiguous : 0), - inLength, inLength )) + if (!initWithPhysicalMask(kernel_task, inDirection | kIOMemoryUnshared + | (inContiguous ? kIOMemoryPhysicallyContiguous : 0), + inLength, inLength, (mach_vm_address_t)0)) return false; // start out with no data @@ -454,6 +469,7 @@ bool IOBufferMemoryDescriptor::initWithBytes(const void * inBytes, return true; } +#endif /* !__LP64__ */ /* * withBytes: @@ -469,15 +485,21 @@ IOBufferMemoryDescriptor::withBytes(const void * inBytes, { IOBufferMemoryDescriptor *me = new IOBufferMemoryDescriptor; - if (me && !me->initWithBytes(inBytes, inLength, inDirection, inContiguous)) + if (me && !me->initWithPhysicalMask( + kernel_task, inDirection | kIOMemoryUnshared + | (inContiguous ? kIOMemoryPhysicallyContiguous : 0), + inLength, inLength, 0 )) { - bool retry = me->_physSegCount; + bool retry = (0 != (kInternalFlagRealloc & me->_internalFlags)); me->release(); me = 0; if (retry) { me = new IOBufferMemoryDescriptor; - if (me && !me->initWithBytes(inBytes, inLength, inDirection, inContiguous)) + if (me && !me->initWithPhysicalMask( + kernel_task, inDirection | kIOMemoryUnshared + | (inContiguous ? kIOMemoryPhysicallyContiguous : 0), + inLength, inLength, 0 )) { me->release(); me = 0; @@ -485,6 +507,18 @@ IOBufferMemoryDescriptor::withBytes(const void * inBytes, } } + + if (me) + { + // start out with no data + me->setLength(0); + + if (!me->appendBytes(inBytes, inLength)) + { + me->release(); + me = 0; + } + } return me; } @@ -501,10 +535,14 @@ void IOBufferMemoryDescriptor::free() IOOptionBits options = _options; vm_size_t size = _capacity; void * buffer = _buffer; - mach_vm_address_t source = (_ranges.v) ? _ranges.v64->address : 0; IOMemoryMap * map = 0; + IOAddressRange * range = _ranges.v64; + mach_vm_address_t source = range ? range->address : 0; vm_offset_t alignment = _alignment; + if (alignment >= page_size) + size = round_page(size); + if (reserved) { map = reserved->map; @@ -519,7 +557,7 @@ void IOBufferMemoryDescriptor::free() if (options & kIOMemoryPageable) { #if IOALLOCDEBUG - debug_iomallocpageable_size -= round_page_32(size); + debug_iomallocpageable_size -= round_page(size); #endif } else if (buffer) @@ -533,6 +571,8 @@ void IOBufferMemoryDescriptor::free() else IOFree(buffer, size); } + if (range && (kIOMemoryAsReference & flags)) + IODelete(range, IOAddressRange, 1); } /* @@ -572,7 +612,10 @@ void IOBufferMemoryDescriptor::setLength(vm_size_t length) */ void IOBufferMemoryDescriptor::setDirection(IODirection direction) { - _direction = direction; + _flags = (_flags & ~kIOMemoryDirectionMask) | direction; +#ifndef __LP64__ + _direction = (IODirection) (_flags & kIOMemoryDirectionMask); +#endif /* !__LP64__ */ } /* @@ -636,8 +679,9 @@ IOBufferMemoryDescriptor::getBytesNoCopy(vm_size_t start, vm_size_t withLength) return 0; } -/* DEPRECATED */ void * IOBufferMemoryDescriptor::getVirtualSegment(IOByteCount offset, -/* DEPRECATED */ IOByteCount * lengthOfSegment) +#ifndef __LP64__ +void * IOBufferMemoryDescriptor::getVirtualSegment(IOByteCount offset, + IOByteCount * lengthOfSegment) { void * bytes = getBytesNoCopy(offset, 0); @@ -646,9 +690,15 @@ IOBufferMemoryDescriptor::getBytesNoCopy(vm_size_t start, vm_size_t withLength) return bytes; } +#endif /* !__LP64__ */ +#ifdef __LP64__ +OSMetaClassDefineReservedUnused(IOBufferMemoryDescriptor, 0); +OSMetaClassDefineReservedUnused(IOBufferMemoryDescriptor, 1); +#else /* !__LP64__ */ OSMetaClassDefineReservedUsed(IOBufferMemoryDescriptor, 0); OSMetaClassDefineReservedUsed(IOBufferMemoryDescriptor, 1); +#endif /* !__LP64__ */ OSMetaClassDefineReservedUnused(IOBufferMemoryDescriptor, 2); OSMetaClassDefineReservedUnused(IOBufferMemoryDescriptor, 3); OSMetaClassDefineReservedUnused(IOBufferMemoryDescriptor, 4); diff --git a/iokit/Kernel/IOCPU.cpp b/iokit/Kernel/IOCPU.cpp index 9ce7974ed..d0e8b868d 100644 --- a/iokit/Kernel/IOCPU.cpp +++ b/iokit/Kernel/IOCPU.cpp @@ -175,14 +175,14 @@ iocpu_run_platform_actions(queue_head_t * queue, uint32_t first_priority, uint32 extern "C" kern_return_t IOCPURunPlatformQuiesceActions(void) { - return (iocpu_run_platform_actions(iocpu_get_platform_quiesce_queue(), 0, 0UL-1, + return (iocpu_run_platform_actions(iocpu_get_platform_quiesce_queue(), 0, 0U-1, NULL, NULL, NULL)); } extern "C" kern_return_t IOCPURunPlatformActiveActions(void) { - return (iocpu_run_platform_actions(iocpu_get_platform_active_queue(), 0, 0UL-1, + return (iocpu_run_platform_actions(iocpu_get_platform_active_queue(), 0, 0U-1, NULL, NULL, NULL)); } @@ -324,7 +324,7 @@ void IOCPUSleepKernel(void) iter->release(); } - iocpu_run_platform_actions(&gIOSleepActionQueue, 0, 0UL-1, + iocpu_run_platform_actions(&gIOSleepActionQueue, 0, 0U-1, NULL, NULL, NULL); numCPUs = gIOCPUs->getCount(); @@ -350,7 +350,7 @@ void IOCPUSleepKernel(void) if (bootCPU) bootCPU->haltCPU(); - iocpu_run_platform_actions(&gIOWakeActionQueue, 0, 0UL-1, + iocpu_run_platform_actions(&gIOWakeActionQueue, 0, 0U-1, NULL, NULL, NULL); iocpu_platform_action_entry_t * entry; @@ -362,9 +362,9 @@ void IOCPUSleepKernel(void) } if (!queue_empty(&gIOSleepActionQueue)) - IOPanic("gIOSleepActionQueue"); + panic("gIOSleepActionQueue"); if (!queue_empty(&gIOWakeActionQueue)) - IOPanic("gIOWakeActionQueue"); + panic("gIOWakeActionQueue"); // Wake the other CPUs. for (cnt = 0; cnt < numCPUs; cnt++) @@ -432,7 +432,7 @@ bool IOCPU::start(IOService *provider) provider->setProperty("timebase-frequency", timebaseFrequency); timebaseFrequency->release(); - super::setProperty("IOCPUID", (UInt32)this, 32); + super::setProperty("IOCPUID", (uintptr_t)this, sizeof(uintptr_t)*8); setCPUNumber(0); setCPUState(kIOCPUStateUnregistered); diff --git a/iokit/Kernel/IOCatalogue.cpp b/iokit/Kernel/IOCatalogue.cpp index 7ca1e8c46..d7738cb39 100644 --- a/iokit/Kernel/IOCatalogue.cpp +++ b/iokit/Kernel/IOCatalogue.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 1998-2006 Apple Computer, Inc. All rights reserved. + * Copyright (c) 1998-2006 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -26,7 +26,7 @@ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ */ /* - * Copyright (c) 1998 Apple Computer, Inc. All rights reserved. + * Copyright (c) 1998 Apple Inc. All rights reserved. * * HISTORY * @@ -38,60 +38,30 @@ * Version 2.0. */ -#include -#include -#include -#include -#include extern "C" { #include -#include -#include +#include #include #include }; -#include - -#include - - -extern "C" { -int IODTGetLoaderInfo( char *key, void **infoAddr, int *infoSize ); -extern void IODTFreeLoaderInfo( char *key, void *infoAddr, int infoSize ); -/* operates on 32 bit segments */ -extern void OSRuntimeUnloadCPPForSegment(struct segment_command * segment); -}; - - -/***** - * At startup these function pointers are set to use the libsa in-kernel - * linker for recording and loading kmods. Once the root filesystem - * is available, the kmod_load_function pointer gets switched to point - * at the kmod_load_extension() function built into the kernel, and the - * others are set to zero. Those two functions must *always* be checked - * before being invoked. - */ -extern "C" { -kern_return_t (*kmod_load_function)(char *extension_name) = - &kmod_load_extension; -bool (*record_startup_extensions_function)(void) = 0; -bool (*add_from_mkext_function)(OSData * mkext) = 0; -void (*remove_startup_extension_function)(const char * name) = 0; -}; - +#include +#include +#include +#include -/***** - * A few parts of IOCatalogue require knowledge of - * whether the in-kernel linker is present. This - * variable is set by libsa's bootstrap code. - */ -int kernelLinkerPresent = 0; +#include +#include +#include -#define kModuleKey "CFBundleIdentifier" +#include +#include -#define super OSObject -OSDefineMetaClassAndStructors(IOCatalogue, OSObject) +#if PRAGMA_MARK +#pragma mark Internal Declarations +#endif +/********************************************************************* +*********************************************************************/ #define CATALOGTEST 0 @@ -99,744 +69,60 @@ IOCatalogue * gIOCatalogue; const OSSymbol * gIOClassKey; const OSSymbol * gIOProbeScoreKey; const OSSymbol * gIOModuleIdentifierKey; -OSSet * gIOCatalogModuleRequests; -OSSet * gIOCatalogCacheMisses; -OSSet * gIOCatalogROMMkexts; -IOLock * gIOCatalogLock; -IOLock * gIOKLDLock; - -/********************************************************************* -*********************************************************************/ - -OSArray * gIOPrelinkedModules = 0; - -extern "C" kern_return_t -kmod_create_internal( - kmod_info_t *info, - kmod_t *id); - -extern "C" kern_return_t -kmod_destroy_internal(kmod_t id); - -extern "C" kern_return_t -kmod_start_or_stop( - kmod_t id, - int start, - kmod_args_t *data, - mach_msg_type_number_t *dataCount); +IOLock * gIOCatalogLock; -extern "C" kern_return_t kmod_retain(kmod_t id); -extern "C" kern_return_t kmod_release(kmod_t id); - -#if CONFIG_MACF_KEXT -/* MAC Framework support */ - -/* - * define IOC_DEBUG to display run-time debugging information - * #define IOC_DEBUG 1 - */ - -#ifdef IOC_DEBUG -#define DPRINTF(x) printf x -#else -#define IOC_DEBUG -#define DPRINTF(x) +#if PRAGMA_MARK +#pragma mark Utility functions #endif - -static bool -primitive_type(OSObject *obj) -{ - const OSMetaClass *typeID; - - typeID = OSTypeIDInst(obj); - if (typeID == OSTypeID(OSString) || typeID == OSTypeID(OSNumber) || - typeID == OSTypeID(OSBoolean) || typeID == OSTypeID(OSData)) - return(true); - else - return(false); -} - -static int -primitive_type_length(OSObject *obj) -{ - const OSMetaClass *typeID; - int len; - - typeID = OSTypeIDInst(obj); - if (typeID == OSTypeID(OSString)) { - OSString * stringObj = OSDynamicCast(OSString, obj); - len = stringObj->getLength() + 1; - } - else if (typeID == OSTypeID(OSNumber)) { - len = sizeof("4294967295"); /* UINT32_MAX */ - } - else if (typeID == OSTypeID(OSBoolean)) { - OSBoolean * boolObj = OSDynamicCast(OSBoolean, obj); - len = boolObj->isTrue() ? sizeof("true") : sizeof("false"); - } - else if (typeID == OSTypeID(OSData)) { - OSData * dataObj = OSDynamicCast(OSData, obj); - len = dataObj->getLength(); - } - else { - len = 0; - } - return(len); -} - -static void -primitive_type_collect(struct mac_module_data_element *element, OSObject *value) -{ - const OSMetaClass *typeID; - - typeID = OSTypeIDInst(value); - if (typeID == OSTypeID(OSString)) { - OSString *stringObj = OSDynamicCast(OSString, value); - element->value_type = MAC_DATA_TYPE_PRIMITIVE; - element->value_size = stringObj->getLength() + 1; - DPRINTF(("osdict: string %s size %d\n", - stringObj->getCStringNoCopy(), element->value_size)); - memcpy(element->value, stringObj->getCStringNoCopy(), - element->value_size); - } else if (typeID == OSTypeID(OSNumber)) { - OSNumber *numberObj = OSDynamicCast(OSNumber, value); - element->value_type = MAC_DATA_TYPE_PRIMITIVE; - element->value_size = sprintf(element->value, "%u", - numberObj->unsigned32BitValue()) + 1; - } else if (typeID == OSTypeID(OSBoolean)) { - OSBoolean *boolObj = OSDynamicCast(OSBoolean, value); - element->value_type = MAC_DATA_TYPE_PRIMITIVE; - if (boolObj->isTrue()) { - strcpy(element->value, "true"); - element->value_size = 5; - } else { - strcpy(element->value, "false"); - element->value_size = 6; - } - } else if (typeID == OSTypeID(OSData)) { - OSData *dataObj = OSDynamicCast(OSData, value); - element->value_type = MAC_DATA_TYPE_PRIMITIVE; - element->value_size = dataObj->getLength(); - DPRINTF(("osdict: data size %d\n", dataObj->getLength())); - memcpy(element->value, dataObj->getBytesNoCopy(), - element->value_size); - } -} - /********************************************************************* -* This function takes an OSDictionary and returns a struct mac_module_data -* list. *********************************************************************/ -struct mac_module_data * -osdict_encode(OSDictionary *dict) +static void +UniqueProperties(OSDictionary * dict) { - const OSMetaClass * typeID; // don't release - OSString * key = NULL; // don't release - OSCollectionIterator * keyIterator = 0; // must release - struct mac_module_data * module_data = 0; - struct mac_module_data_element * element; - unsigned int strtabsize = 0; - unsigned int listtabsize = 0; - unsigned int dicttabsize = 0; - unsigned int nkeys = 0; - unsigned int datalen; - char *strtab = NULL; - char *listtab = NULL; - char *dicttab = NULL; - vm_offset_t data_addr; - - keyIterator = OSCollectionIterator::withCollection(dict); - if (!keyIterator) - goto finish; + OSString * data; - /* Iterate over OSModuleData to figure out total size */ - while ( (key = OSDynamicCast(OSString, keyIterator->getNextObject())) ) { - - // Get the key's value and determine its type - OSObject * value = dict->getObject(key); - if (!value) - continue; + data = OSDynamicCast(OSString, dict->getObject(gIOClassKey)); + if (data) { + const OSSymbol *classSymbol = OSSymbol::withString(data); - typeID = OSTypeIDInst(value); - if (primitive_type(value)) { - strtabsize += primitive_type_length(value); - } - else if (typeID == OSTypeID(OSArray)) { - unsigned int k, cnt, nents; - OSArray *arrayObj = OSDynamicCast(OSArray, value); - - nents = 0; - cnt = arrayObj->getCount(); - for (k = 0; k < cnt; k++) { - value = arrayObj->getObject(k); - typeID = OSTypeIDInst(value); - if (primitive_type(value)) { - listtabsize += primitive_type_length(value); - nents++; - } - else if (typeID == OSTypeID(OSDictionary)) { - unsigned int dents; - OSDictionary *dictObj; - OSString *dictkey; - OSCollectionIterator *dictIterator; - - dents = 0; - dictObj = OSDynamicCast(OSDictionary, value); - dictIterator = OSCollectionIterator::withCollection(dictObj); - if (!dictIterator) - goto finish; - while ((dictkey = OSDynamicCast(OSString, - dictIterator->getNextObject()))) { - OSObject *dictvalue; - - dictvalue = dictObj->getObject(dictkey); - if (!dictvalue) - continue; - if (primitive_type(dictvalue)) { - strtabsize += primitive_type_length(dictvalue); - } - else { - continue; /* Only handle primitive types here. */ - } - /* - * Allow for the "arraynnn/" prefix in the key length. - */ - strtabsize += dictkey->getLength() + 1; - dents++; - } - dictIterator->release(); - if (dents-- > 0) { - dicttabsize += sizeof(struct mac_module_data_list) + - dents * sizeof(struct mac_module_data_element); - nents++; - } - } - else { - continue; /* Skip everything else. */ - } - } - if (nents == 0) - continue; - listtabsize += sizeof(struct mac_module_data_list) + - (nents - 1) * sizeof(struct mac_module_data_element); - } - else { - continue; /* skip anything else */ - } - strtabsize += key->getLength() + 1; - nkeys++; + dict->setObject( gIOClassKey, (OSSymbol *) classSymbol); + classSymbol->release(); } - if (nkeys == 0) - goto finish; - /* - * Allocate and fill in the module data structures. - */ - datalen = sizeof(struct mac_module_data) + - sizeof(mac_module_data_element) * (nkeys - 1) + - strtabsize + listtabsize + dicttabsize; - DPRINTF(("osdict: datalen %d strtabsize %d listtabsize %d dicttabsize %d\n", - datalen, strtabsize, listtabsize, dicttabsize)); - if (kmem_alloc(kernel_map, &data_addr, datalen) != KERN_SUCCESS) - goto finish; - module_data = (mac_module_data *)data_addr; - module_data->base_addr = data_addr; - module_data->size = datalen; - module_data->count = nkeys; - strtab = (char *)&module_data->data[nkeys]; - listtab = strtab + strtabsize; - dicttab = listtab + listtabsize; - DPRINTF(("osdict: data_addr %p strtab %p listtab %p dicttab %p end %p\n", - data_addr, strtab, listtab, dicttab, data_addr + datalen)); - - keyIterator->reset(); - nkeys = 0; - element = &module_data->data[0]; - DPRINTF(("osdict: element %p\n", element)); - while ( (key = OSDynamicCast(OSString, keyIterator->getNextObject())) ) { - - // Get the key's value and determine its type - OSObject * value = dict->getObject(key); - if (!value) - continue; + data = OSDynamicCast(OSString, dict->getObject(gIOMatchCategoryKey)); + if (data) { + const OSSymbol *classSymbol = OSSymbol::withString(data); - /* Store key */ - DPRINTF(("osdict: element @%p\n", element)); - element->key = strtab; - element->key_size = key->getLength() + 1; - DPRINTF(("osdict: key %s size %d @%p\n", key->getCStringNoCopy(), element->key_size, strtab)); - memcpy(element->key, key->getCStringNoCopy(), element->key_size); - - typeID = OSTypeIDInst(value); - if (primitive_type(value)) { - /* Store value */ - element->value = element->key + element->key_size; - DPRINTF(("osdict: primitive element value %p\n", element->value)); - primitive_type_collect(element, value); - strtab += element->key_size + element->value_size; - DPRINTF(("osdict: new strtab %p\n", strtab)); - } - else if (typeID == OSTypeID(OSArray)) { - unsigned int k, cnt, nents; - char *astrtab; - struct mac_module_data_list *arrayhd; - struct mac_module_data_element *ele; - OSArray *arrayObj = OSDynamicCast(OSArray, value); - - element->value = listtab; - DPRINTF(("osdict: array element value %p\n", element->value)); - element->value_type = MAC_DATA_TYPE_ARRAY; - arrayhd = (struct mac_module_data_list *)element->value; - arrayhd->type = 0; - DPRINTF(("osdict: arrayhd %p\n", arrayhd)); - nents = 0; - astrtab = strtab + element->key_size; - ele = &(arrayhd->list[0]); - cnt = arrayObj->getCount(); - for (k = 0; k < cnt; k++) { - value = arrayObj->getObject(k); - DPRINTF(("osdict: array ele %d @%p\n", nents, ele)); - ele->key = NULL; - ele->key_size = 0; - typeID = OSTypeIDInst(value); - if (primitive_type(value)) { - if (arrayhd->type != 0 && - arrayhd->type != MAC_DATA_TYPE_PRIMITIVE) - continue; - arrayhd->type = MAC_DATA_TYPE_PRIMITIVE; - ele->value = astrtab; - primitive_type_collect(ele, value); - astrtab += ele->value_size; - DPRINTF(("osdict: array new astrtab %p\n", astrtab)); - } - else if (typeID == OSTypeID(OSDictionary)) { - unsigned int dents; - char *dstrtab; - OSDictionary *dictObj; - OSString *dictkey; - OSCollectionIterator *dictIterator; - struct mac_module_data_list *dicthd; - struct mac_module_data_element *dele; - - if (arrayhd->type != 0 && - arrayhd->type != MAC_DATA_TYPE_DICT) - continue; - dictObj = OSDynamicCast(OSDictionary, value); - dictIterator = OSCollectionIterator::withCollection(dictObj); - if (!dictIterator) - goto finish; - DPRINTF(("osdict: dict\n")); - ele->value = dicttab; - ele->value_type = MAC_DATA_TYPE_DICT; - dicthd = (struct mac_module_data_list *)ele->value; - DPRINTF(("osdict: dicthd %p\n", dicthd)); - dstrtab = astrtab; - dents = 0; - while ((dictkey = OSDynamicCast(OSString, - dictIterator->getNextObject()))) { - OSObject *dictvalue; - - dictvalue = dictObj->getObject(dictkey); - if (!dictvalue) - continue; - dele = &(dicthd->list[dents]); - DPRINTF(("osdict: dict ele %d @%p\n", dents, dele)); - if (primitive_type(dictvalue)) { - dele->key = dstrtab; - dele->key_size = dictkey->getLength() + 1; - DPRINTF(("osdict: dictkey %s size %d @%p\n", - dictkey->getCStringNoCopy(), dictkey->getLength(), dstrtab)); - memcpy(dele->key, dictkey->getCStringNoCopy(), - dele->key_size); - dele->value = dele->key + dele->key_size; - primitive_type_collect(dele, dictvalue); - dstrtab += dele->key_size + dele->value_size; - DPRINTF(("osdict: dict new dstrtab %p\n", dstrtab)); - } - else { - continue; /* Only handle primitive types here. */ - } - dents++; - } - dictIterator->release(); - if (dents == 0) - continue; - arrayhd->type = MAC_DATA_TYPE_DICT; - ele->value_size = sizeof(struct mac_module_data_list) + - (dents - 1) * sizeof(struct mac_module_data_element); - DPRINTF(("osdict: dict ele size %d ents %d\n", ele->value_size, dents)); - dicttab += ele->value_size; - DPRINTF(("osdict: new dicttab %p\n", dicttab)); - dicthd->count = dents; - astrtab = dstrtab; - } - else { - continue; /* Skip everything else. */ - } - nents++; - ele++; - } - if (nents == 0) - continue; - element->value_size = sizeof(struct mac_module_data_list) + - (nents - 1) * sizeof(struct mac_module_data_element); - listtab += element->value_size; - DPRINTF(("osdict: new listtab %p\n", listtab)); - arrayhd->count = nents; - strtab = astrtab; - DPRINTF(("osdict: new strtab %p\n", strtab)); - } - else { - continue; /* skip anything else */ - } - element++; + dict->setObject(gIOMatchCategoryKey, (OSSymbol *) classSymbol); + classSymbol->release(); } - DPRINTF(("module_data list @%p, key %p value %p\n", - module_data, module_data->data[0].key, module_data->data[0].value)); -finish: - if (keyIterator) - keyIterator->release(); - return(module_data); + return; } /********************************************************************* -* This function takes a plist and looks for an OSModuleData dictionary. -* If it is found, an encoded copy is returned. +* Add a new personality to the set if it has a unique IOResourceMatchKey value. +* XXX -- svail: This should be optimized. +* esb - There doesn't seem like any reason to do this - it causes problems +* esb - when there are more than one loadable driver matching on the same provider class *********************************************************************/ -kmod_args_t -get_module_data(OSDictionary * kextPlist, mach_msg_type_number_t * datalen) -{ - - OSDictionary * kextModuleData = 0; // don't release - struct mac_module_data * module_data = 0; - vm_map_copy_t copy = 0; - - kextModuleData = OSDynamicCast(OSDictionary, - kextPlist->getObject("OSModuleData")); - if (!kextModuleData) - goto finish; - - module_data = osdict_encode(kextModuleData); - if (!module_data) - goto finish; - *datalen = module_data->size; - /* - * Make a CoW copy of data and free the original. The copy is - * consumed by a call to vm_map_copyout() in kmod_start_or_stop(). - */ - vm_map_copyin(kernel_map, (vm_offset_t)module_data, *datalen, FALSE, ©); - kmem_free(kernel_map, (vm_offset_t)module_data, *datalen); - DPRINTF(("get_module_data: copy @ %p\n", copy)); -finish: - return (kmod_args_t)copy; -} -#endif /* MAC */ - -static -kern_return_t start_prelink_module(UInt32 moduleIndex) +static void +AddNewImports(OSOrderedSet * set, OSDictionary * dict) { - kern_return_t kr = KERN_SUCCESS; - UInt32 * togo; - SInt32 count, where, end; - UInt32 * prelink; - SInt32 next, lastDep; - OSData * data; - OSString * str; - OSDictionary * dict; - - OSArray * - prelinkedModules = gIOPrelinkedModules; - - togo = IONew(UInt32, prelinkedModules->getCount()); - togo[0] = moduleIndex; - count = 1; - - for (next = 0; next < count; next++) - { - dict = (OSDictionary *) prelinkedModules->getObject(togo[next]); - - data = OSDynamicCast(OSData, dict->getObject("OSBundlePrelink")); - if (!data) - { - // already started or no code - if (togo[next] == moduleIndex) - { - kr = KERN_FAILURE; - break; - } - continue; - } - prelink = (UInt32 *) data->getBytesNoCopy(); - lastDep = OSReadBigInt32(prelink, 12); - for (SInt32 idx = OSReadBigInt32(prelink, 8); idx < lastDep; idx += sizeof(UInt32)) - { - UInt32 depIdx = OSReadBigInt32(prelink, idx) - 1; - - for (where = next + 1; - (where < count) && (togo[where] > depIdx); - where++) {} - - if (where != count) - { - if (togo[where] == depIdx) - continue; - for (end = count; end != where; end--) - togo[end] = togo[end - 1]; - } - count++; - togo[where] = depIdx; - } - } - - if (KERN_SUCCESS != kr) - return kr; - - for (next = (count - 1); next >= 0; next--) - { - dict = (OSDictionary *) prelinkedModules->getObject(togo[next]); - - data = OSDynamicCast(OSData, dict->getObject("OSBundlePrelink")); - if (!data) - continue; - prelink = (UInt32 *) data->getBytesNoCopy(); - - kmod_t id; - kmod_info_t * kmod_info = (kmod_info_t *) OSReadBigInt32(prelink, 0); - - kr = kmod_create_internal(kmod_info, &id); - if (KERN_SUCCESS != kr) - break; - - lastDep = OSReadBigInt32(prelink, 12); - for (SInt32 idx = OSReadBigInt32(prelink, 8); idx < lastDep; idx += sizeof(UInt32)) - { - OSDictionary * depDict; - kmod_info_t * depInfo; - - depDict = (OSDictionary *) prelinkedModules->getObject(OSReadBigInt32(prelink, idx) - 1); - str = OSDynamicCast(OSString, depDict->getObject(kModuleKey)); - depInfo = kmod_lookupbyname_locked(str->getCStringNoCopy()); - if (depInfo) - { - kr = kmod_retain(KMOD_PACK_IDS(id, depInfo->id)); - kfree(depInfo, sizeof(kmod_info_t)); - } else - IOLog("%s: NO DEP %s\n", kmod_info->name, str->getCStringNoCopy()); - } - dict->removeObject("OSBundlePrelink"); - - if (kmod_info->start) - kr = kmod_start_or_stop(kmod_info->id, 1, 0, 0); - } - - IODelete(togo, UInt32, prelinkedModules->getCount()); - - return kr; + set->setObject(dict); } +#if PRAGMA_MARK +#pragma mark IOCatalogue class implementation +#endif /********************************************************************* -* This is a function that IOCatalogue calls in order to load a kmod. *********************************************************************/ -static -kern_return_t kmod_load_from_cache_sym(const OSSymbol * kmod_name) -{ - OSArray * prelinkedModules = gIOPrelinkedModules; - kern_return_t result = KERN_FAILURE; - OSDictionary * dict; - OSObject * ident; - UInt32 idx; - - if (!gIOPrelinkedModules) - return KERN_FAILURE; - - for (idx = 0; - (dict = (OSDictionary *) prelinkedModules->getObject(idx)); - idx++) - { - if ((ident = dict->getObject(kModuleKey)) - && kmod_name->isEqualTo(ident)) - break; - } - if (dict) - { - if (kernelLinkerPresent && dict->getObject("OSBundleDefer")) - { - kmod_load_extension((char *) kmod_name->getCStringNoCopy()); - result = kIOReturnOffline; - } - else - result = start_prelink_module(idx); - } - - return result; -} - -extern "C" Boolean kmod_load_request(const char * moduleName, Boolean make_request) -{ - bool ret, cacheMiss = false; - kern_return_t kr; - const OSSymbol * sym = 0; - kmod_info_t * kmod_info; - - if (!moduleName) - return false; - - /* To make sure this operation completes even if a bad extension needs - * to be removed, take the kld lock for this whole block, spanning the - * kmod_load_function() and remove_startup_extension_function() calls. - */ - IOLockLock(gIOKLDLock); - do - { - // Is the module already loaded? - ret = (0 != (kmod_info = kmod_lookupbyname_locked((char *)moduleName))); - if (ret) { - kfree(kmod_info, sizeof(kmod_info_t)); - break; - } - sym = OSSymbol::withCString(moduleName); - if (!sym) { - ret = false; - break; - } - - kr = kmod_load_from_cache_sym(sym); - ret = (kIOReturnSuccess == kr); - cacheMiss = !ret; - if (ret || !make_request || (kr == kIOReturnOffline)) - break; - - // If the module hasn't been loaded, then load it. - if (!kmod_load_function) { - IOLog("IOCatalogue: %s cannot be loaded " - "(kmod load function not set).\n", - moduleName); - ret = true; - break; - } - - kr = kmod_load_function((char *)moduleName); - - if (ret != kIOReturnSuccess) { - IOLog("IOCatalogue: %s cannot be loaded.\n", moduleName); - - /* If the extension couldn't be loaded this time, - * make it unavailable so that no more requests are - * made in vain. This also enables other matching - * extensions to have a chance. - */ - if (kernelLinkerPresent && remove_startup_extension_function) { - (*remove_startup_extension_function)(moduleName); - } - ret = false; - - } else if (kernelLinkerPresent) { - // If kern linker is here, the driver is actually loaded, - // so return true. - ret = true; - - } else { - // kern linker isn't here, a request has been queued - // but the module isn't necessarily loaded yet, so stall. - ret = false; - } - } - while (false); - - IOLockUnlock(gIOKLDLock); - - if (sym) - { - IOLockLock(gIOCatalogLock); - gIOCatalogModuleRequests->setObject(sym); - if (cacheMiss) - gIOCatalogCacheMisses->setObject(sym); - IOLockUnlock(gIOCatalogLock); - } - - return ret; -} - -extern "C" kern_return_t kmod_unload_cache(void) -{ - OSArray * prelinkedModules = gIOPrelinkedModules; - kern_return_t result = KERN_FAILURE; - OSDictionary * dict; - UInt32 idx; - UInt32 * prelink; - OSData * data; - - if (!gIOPrelinkedModules) - return KERN_SUCCESS; - - IOLockLock(gIOKLDLock); - for (idx = 0; - (dict = (OSDictionary *) prelinkedModules->getObject(idx)); - idx++) - { - data = OSDynamicCast(OSData, dict->getObject("OSBundlePrelink")); - if (!data) - continue; - prelink = (UInt32 *) data->getBytesNoCopy(); - - kmod_info_t * kmod_info = (kmod_info_t *) OSReadBigInt32(prelink, 0); - vm_offset_t - virt = ml_static_ptovirt(kmod_info->address); - if( virt) { - ml_static_mfree(virt, kmod_info->size); - } - } - - gIOPrelinkedModules->release(); - gIOPrelinkedModules = 0; - - IOLockUnlock(gIOKLDLock); - - return result; -} - -extern "C" kern_return_t kmod_load_from_cache(const char * kmod_name) -{ - kern_return_t kr; - const OSSymbol * sym = OSSymbol::withCStringNoCopy(kmod_name); - - if (sym) - { - kr = kmod_load_from_cache_sym(sym); - sym->release(); - } - else - kr = kIOReturnNoMemory; - - return kr; -} +#define super OSObject +OSDefineMetaClassAndStructors(IOCatalogue, OSObject) /********************************************************************* *********************************************************************/ - -static void UniqueProperties( OSDictionary * dict ) -{ - OSString * data; - - data = OSDynamicCast( OSString, dict->getObject( gIOClassKey )); - if( data) { - const OSSymbol *classSymbol = OSSymbol::withString(data); - - dict->setObject( gIOClassKey, (OSSymbol *) classSymbol); - classSymbol->release(); - } - - data = OSDynamicCast( OSString, dict->getObject( gIOMatchCategoryKey )); - if( data) { - const OSSymbol *classSymbol = OSSymbol::withString(data); - - dict->setObject( gIOMatchCategoryKey, (OSSymbol *) classSymbol); - classSymbol->release(); - } -} - -void IOCatalogue::initialize( void ) +void IOCatalogue::initialize(void) { OSArray * array; OSString * errorString; @@ -846,20 +132,17 @@ void IOCatalogue::initialize( void ) array = OSDynamicCast(OSArray, OSUnserialize(gIOKernelConfigTables, &errorString)); if (!array && errorString) { - IOLog("KernelConfigTables syntax error: %s\n", - errorString->getCStringNoCopy()); - errorString->release(); + IOLog("KernelConfigTables syntax error: %s\n", + errorString->getCStringNoCopy()); + errorString->release(); } gIOClassKey = OSSymbol::withCStringNoCopy( kIOClassKey ); gIOProbeScoreKey = OSSymbol::withCStringNoCopy( kIOProbeScoreKey ); - gIOModuleIdentifierKey = OSSymbol::withCStringNoCopy( kModuleKey ); - gIOCatalogModuleRequests = OSSet::withCapacity(16); - gIOCatalogCacheMisses = OSSet::withCapacity(16); - gIOCatalogROMMkexts = OSSet::withCapacity(4); + gIOModuleIdentifierKey = OSSymbol::withCStringNoCopy( kCFBundleIdentifierKey ); assert( array && gIOClassKey && gIOProbeScoreKey - && gIOModuleIdentifierKey && gIOCatalogModuleRequests); + && gIOModuleIdentifierKey); gIOCatalogue = new IOCatalogue; assert(gIOCatalogue); @@ -868,7 +151,9 @@ void IOCatalogue::initialize( void ) array->release(); } -// Initialize the IOCatalog object. +/********************************************************************* +* Initialize the IOCatalog object. +*********************************************************************/ bool IOCatalogue::init(OSArray * initArray) { OSDictionary * dict; @@ -883,10 +168,11 @@ bool IOCatalogue::init(OSArray * initArray) kernelTables = OSCollectionIterator::withCollection( array ); gIOCatalogLock = IOLockAlloc(); - gIOKLDLock = IOLockAlloc(); lock = gIOCatalogLock; - kld_lock = gIOKLDLock; +#if __ppc__ || __i386__ + kld_lock = NULL; +#endif /* __ppc__ || __i386__ */ kernelTables->reset(); while( (dict = (OSDictionary *) kernelTables->getNextObject())) { @@ -905,8 +191,10 @@ bool IOCatalogue::init(OSArray * initArray) return true; } -// Release all resources used by IOCatalogue and deallocate. -// This will probably never be called. +/********************************************************************* +* Release all resources used by IOCatalogue and deallocate. +* This will probably never be called. +*********************************************************************/ void IOCatalogue::free( void ) { if ( array ) @@ -918,13 +206,15 @@ void IOCatalogue::free( void ) super::free(); } +/********************************************************************* +*********************************************************************/ #if CATALOGTEST static int hackLimit; - enum { kDriversPerIter = 4 }; -void IOCatalogue::ping( thread_call_param_t arg, thread_call_param_t) +void +IOCatalogue::ping(thread_call_param_t arg, thread_call_param_t) { IOCatalogue * self = (IOCatalogue *) arg; OSOrderedSet * set; @@ -941,7 +231,7 @@ void IOCatalogue::ping( thread_call_param_t arg, thread_call_param_t) if( table) { set->setLastObject( table ); - OSSymbol * sym = (OSSymbol *) table->getObject( gIOClassKey ); + OSSymbol * sym = (OSSymbol *) table->getObject(gIOClassKey); kprintf("enabling %s\n", sym->getCStringNoCopy()); } else { @@ -959,14 +249,18 @@ void IOCatalogue::ping( thread_call_param_t arg, thread_call_param_t) if( kDriversPerIter == newLimit) { AbsoluteTime deadline; - clock_interval_to_deadline( 500, kMillisecondScale ); - thread_call_func_delayed( ping, this, deadline ); + clock_interval_to_deadline(500, kMillisecondScale); + thread_call_func_delayed(ping, this, deadline); } } #endif -OSOrderedSet * IOCatalogue::findDrivers( IOService * service, - SInt32 * generationCount ) +/********************************************************************* +*********************************************************************/ +OSOrderedSet * +IOCatalogue::findDrivers( + IOService * service, + SInt32 * generationCount) { OSDictionary * nextTable; OSOrderedSet * set; @@ -977,7 +271,7 @@ OSOrderedSet * IOCatalogue::findDrivers( IOService * service, if( !set ) return( 0 ); - IOLockLock( lock ); + IOLockLock(lock); kernelTables->reset(); #if CATALOGTEST @@ -996,14 +290,18 @@ OSOrderedSet * IOCatalogue::findDrivers( IOService * service, *generationCount = getGenerationCount(); - IOLockUnlock( lock ); + IOLockUnlock(lock); return( set ); } -// Is personality already in the catalog? -OSOrderedSet * IOCatalogue::findDrivers( OSDictionary * matching, - SInt32 * generationCount) +/********************************************************************* +* Is personality already in the catalog? +*********************************************************************/ +OSOrderedSet * +IOCatalogue::findDrivers( + OSDictionary * matching, + SInt32 * generationCount) { OSDictionary * dict; OSOrderedSet * set; @@ -1013,7 +311,7 @@ OSOrderedSet * IOCatalogue::findDrivers( OSDictionary * matching, set = OSOrderedSet::withCapacity( 1, IOServiceOrdering, (void *)gIOProbeScoreKey ); - IOLockLock( lock ); + IOLockLock(lock); kernelTables->reset(); while ( (dict = (OSDictionary *) kernelTables->getNextObject()) ) { @@ -1024,108 +322,111 @@ OSOrderedSet * IOCatalogue::findDrivers( OSDictionary * matching, set->setObject(dict); } *generationCount = getGenerationCount(); - IOLockUnlock( lock ); + IOLockUnlock(lock); return set; } -// Add a new personality to the set if it has a unique IOResourceMatchKey value. -// XXX -- svail: This should be optimized. -// esb - There doesn't seem like any reason to do this - it causes problems -// esb - when there are more than one loadable driver matching on the same provider class -static void AddNewImports( OSOrderedSet * set, OSDictionary * dict ) -{ - set->setObject(dict); -} - -// Add driver config tables to catalog and start matching process. -bool IOCatalogue::addDrivers(OSArray * drivers, - bool doNubMatching ) +/********************************************************************* +* Add driver config tables to catalog and start matching process. +* +* Important that existing personalities are kept (not replaced) +* if duplicates found. Personalities can come from OSKext objects +* or from userland kext library. We want to minimize distinct +* copies between OSKext & IOCatalogue. +* +* xxx - userlib used to refuse to send personalities with IOKitDebug +* xxx - during safe boot. That would be better implemented here. +*********************************************************************/ +bool IOCatalogue::addDrivers( + OSArray * drivers, + bool doNubMatching) { - OSCollectionIterator * iter; - OSDictionary * dict; - OSOrderedSet * set; - OSArray * persons; - OSString * moduleName; - bool ret; + bool result = false; + OSCollectionIterator * iter = NULL; // must release + OSOrderedSet * set = NULL; // must release + OSDictionary * dict = NULL; // do not release + OSArray * persons = NULL; // do not release - ret = true; persons = OSDynamicCast(OSArray, drivers); - if ( !persons ) - return false; - - iter = OSCollectionIterator::withCollection( persons ); - if (!iter ) - return false; + if (!persons) { + goto finish; + } set = OSOrderedSet::withCapacity( 10, IOServiceOrdering, - (void *)gIOProbeScoreKey ); - if ( !set ) { - iter->release(); - return false; + (void *)gIOProbeScoreKey ); + if (!set) { + goto finish; } - IOLockLock( lock ); - while ( (dict = (OSDictionary *) iter->getNextObject()) ) - { - if ((moduleName = OSDynamicCast(OSString, dict->getObject("OSBundleModuleDemand")))) - { - IOLockUnlock( lock ); - ret = kmod_load_request(moduleName->getCStringNoCopy(), false); - IOLockLock( lock ); - ret = true; - } - else - { - SInt count; - - UniqueProperties( dict ); - - // Add driver personality to catalogue. - count = array->getCount(); - while ( count-- ) { - OSDictionary * driver; - - // Be sure not to double up on personalities. - driver = (OSDictionary *)array->getObject(count); - - /* Unlike in other functions, this comparison must be exact! - * The catalogue must be able to contain personalities that - * are proper supersets of others. - * Do not compare just the properties present in one driver - * pesonality or the other. - */ - if (dict->isEqualTo(driver)) - break; - } - if (count >= 0) - // its a dup - continue; - - ret = array->setObject( dict ); - if (!ret) - break; + iter = OSCollectionIterator::withCollection(persons); + if (!iter) { + goto finish; + } + + result = true; + + IOLockLock(lock); + while ( (dict = (OSDictionary *) iter->getNextObject()) ) { - AddNewImports( set, dict ); - } + // xxx Deleted OSBundleModuleDemand check; will handle in other ways for SL + + SInt count; + + UniqueProperties(dict); + + // Add driver personality to catalogue. + count = array->getCount(); + while (count--) { + OSDictionary * driver; + + // Be sure not to double up on personalities. + driver = (OSDictionary *)array->getObject(count); + + /* Unlike in other functions, this comparison must be exact! + * The catalogue must be able to contain personalities that + * are proper supersets of others. + * Do not compare just the properties present in one driver + * pesonality or the other. + */ + if (dict->isEqualTo(driver)) { + break; + } + } + if (count >= 0) { + // its a dup + continue; + } + + result = array->setObject(dict); + if (!result) { + break; + } + + AddNewImports(set, dict); } // Start device matching. if (doNubMatching && (set->getCount() > 0)) { - IOService::catalogNewDrivers( set ); + IOService::catalogNewDrivers(set); generation++; } - IOLockUnlock( lock ); + IOLockUnlock(lock); - set->release(); - iter->release(); - - return ret; +finish: + if (set) set->release(); + if (iter) iter->release(); + + return result; } -// Remove drivers from the catalog which match the -// properties in the matching dictionary. -bool IOCatalogue::removeDrivers( OSDictionary * matching, - bool doNubMatching) +/********************************************************************* +* Remove drivers from the catalog which match the +* properties in the matching dictionary. +*********************************************************************/ +bool +IOCatalogue::removeDrivers( + OSDictionary * matching, + bool doNubMatching) { OSCollectionIterator * tables; OSDictionary * dict; @@ -1156,7 +457,7 @@ bool IOCatalogue::removeDrivers( OSDictionary * matching, UniqueProperties( matching ); - IOLockLock( lock ); + IOLockLock(lock); kernelTables->reset(); arrayCopy->merge(array); array->flushCollection(); @@ -1178,7 +479,7 @@ bool IOCatalogue::removeDrivers( OSDictionary * matching, IOService::catalogNewDrivers(set); generation++; } - IOLockUnlock( lock ); + IOLockUnlock(lock); set->release(); tables->release(); @@ -1187,29 +488,48 @@ bool IOCatalogue::removeDrivers( OSDictionary * matching, } // Return the generation count. -SInt32 IOCatalogue::getGenerationCount( void ) const +SInt32 IOCatalogue::getGenerationCount(void) const { return( generation ); } -bool IOCatalogue::isModuleLoaded( OSString * moduleName ) const +bool IOCatalogue::isModuleLoaded(OSString * moduleName) const { return isModuleLoaded(moduleName->getCStringNoCopy()); } -bool IOCatalogue::isModuleLoaded( const char * moduleName ) const +bool IOCatalogue::isModuleLoaded(const char * moduleName) const { - return (kmod_load_request(moduleName, true)); + OSReturn ret; + ret = OSKext::loadKextWithIdentifier(moduleName); + if (kOSKextReturnDeferred == ret) { + // a request has been queued but the module isn't necessarily + // loaded yet, so stall. + return false; + } + // module is present or never will be + return true; } // Check to see if module has been loaded already. -bool IOCatalogue::isModuleLoaded( OSDictionary * driver ) const +bool IOCatalogue::isModuleLoaded(OSDictionary * driver) const { OSString * moduleName = NULL; + OSString * publisherName = NULL; if ( !driver ) return false; + /* The personalities of codeless kexts often contain the bundle ID of the + * kext they reference, and not the bundle ID of the codeless kext itself. + * The prelinked kernel needs to know the bundle ID of the codeless kext + * so it can include these personalities, so OSKext stores that bundle ID + * in the IOPersonalityPublisher key, and we record it as requested here. + */ + publisherName = OSDynamicCast(OSString, + driver->getObject(kIOPersonalityPublisherKey)); + OSKext::recordIdentifierRequest(publisherName); + moduleName = OSDynamicCast(OSString, driver->getObject(gIOModuleIdentifierKey)); if ( moduleName ) return isModuleLoaded(moduleName); @@ -1220,57 +540,39 @@ bool IOCatalogue::isModuleLoaded( OSDictionary * driver ) const return true; } -// This function is called after a module has been loaded. -void IOCatalogue::moduleHasLoaded( OSString * moduleName ) +/* This function is called after a module has been loaded. + * Is invoked from user client call, ultimately from IOKitLib's + * IOCatalogueModuleLoaded(). Sent from kextd. + */ +void IOCatalogue::moduleHasLoaded(OSString * moduleName) { - OSDictionary * dict; + OSDictionary * dict; dict = OSDictionary::withCapacity(2); dict->setObject(gIOModuleIdentifierKey, moduleName); startMatching(dict); dict->release(); + + (void) OSKext::setDeferredLoadSucceeded(); + (void) OSKext::considerRebuildOfPrelinkedKernel(); } -void IOCatalogue::moduleHasLoaded( const char * moduleName ) +void IOCatalogue::moduleHasLoaded(const char * moduleName) { - OSString * name; + OSString * name; name = OSString::withCString(moduleName); moduleHasLoaded(name); name->release(); } -IOReturn IOCatalogue::unloadModule( OSString * moduleName ) const +// xxx - return is really OSReturn/kern_return_t +IOReturn IOCatalogue::unloadModule(OSString * moduleName) const { - kmod_info_t * k_info = 0; - kern_return_t ret; - const char * name; - - ret = kIOReturnBadArgument; - if ( moduleName ) { - name = moduleName->getCStringNoCopy(); - k_info = kmod_lookupbyname_locked((char *)name); - if ( k_info && (k_info->reference_count < 1) ) { - record_kext_unload(k_info->id); - if ( k_info->stop && - !((ret = k_info->stop(k_info, 0)) == kIOReturnSuccess) ) { - - kfree(k_info, sizeof(kmod_info_t)); - return ret; - } - - ret = kmod_destroy(host_priv_self(), k_info->id); - } - } - - if (k_info) { - kfree(k_info, sizeof(kmod_info_t)); - } - - return ret; + return OSKext::removeKextWithIdentifier(moduleName->getCStringNoCopy()); } -static IOReturn _terminateDrivers( OSDictionary * matching ) +static IOReturn _terminateDrivers(OSDictionary * matching) { OSDictionary * dict; OSIterator * iter; @@ -1354,55 +656,140 @@ static IOReturn _removeDrivers( OSArray * array, OSDictionary * matching ) return ret; } -IOReturn IOCatalogue::terminateDrivers( OSDictionary * matching ) +bool IOCatalogue::removePersonalities(OSArray * personalitiesToRemove) +{ + bool result = true; + OSArray * arrayCopy = NULL; // do not release + OSCollectionIterator * iterator = NULL; // must release + OSDictionary * personality = NULL; // do not release + OSDictionary * checkPersonality = NULL; // do not release + unsigned int count, i; + + // remove configs from catalog. + + arrayCopy = OSArray::withArray(array); + if (!arrayCopy) { + result = false; + goto finish; + } + + iterator = OSCollectionIterator::withCollection(arrayCopy); + arrayCopy->release(); + if (!iterator) { + result = false; + goto finish; + } + + array->flushCollection(); + + count = personalitiesToRemove->getCount(); + + /* Go through the old catalog's list of personalities and add back any that + * are *not* found in 'personalitiesToRemove'. + */ + while ((personality = (OSDictionary *)iterator->getNextObject())) { + bool found = false; + + for (i = 0; i < count; i++) { + checkPersonality = OSDynamicCast(OSDictionary, + personalitiesToRemove->getObject(i)); + + /* Do isEqualTo() with the single-arg version to make an exact + * comparison (unlike _removeDrivers() above). + */ + if (personality->isEqualTo(checkPersonality)) { + found = true; + break; + } + } + + if (!found) { + array->setObject(personality); + } + } + +finish: + + OSSafeRelease(iterator); + return result; +} + +IOReturn IOCatalogue::terminateDrivers(OSDictionary * matching) { IOReturn ret; ret = _terminateDrivers(matching); - IOLockLock( lock ); + IOLockLock(lock); if (kIOReturnSuccess == ret) ret = _removeDrivers(array, matching); kernelTables->reset(); - IOLockUnlock( lock ); + IOLockUnlock(lock); return ret; } IOReturn IOCatalogue::terminateDriversForModule( - OSString * moduleName, - bool unload ) + OSString * moduleName, + bool unload) { IOReturn ret; OSDictionary * dict; + bool isLoaded = false; + + /* Check first if the kext currently has any linkage dependents; + * in such a case the unload would fail so let's not terminate any + * IOServices (since doing so typically results in a panic when there + * are loaded dependencies). Note that we aren't locking the kext here + * so it might lose or gain dependents by the time we call unloadModule(); + * I think that's ok, our unload can fail if a kext comes in on top of + * this one even after we've torn down IOService objects. Conversely, + * if we fail the unload here and then lose a library, the autounload + * thread will get us in short order. + */ + if (OSKext::isKextWithIdentifierLoaded(moduleName->getCStringNoCopy())) { + + isLoaded = true; + if (!OSKext::canUnloadKextWithIdentifier(moduleName, + /* checkClasses */ false)) { + ret = kOSKextReturnInUse; + goto finish; + } + } dict = OSDictionary::withCapacity(1); - if ( !dict ) - return kIOReturnNoMemory; + if (!dict) { + ret = kIOReturnNoMemory; + goto finish; + } dict->setObject(gIOModuleIdentifierKey, moduleName); ret = _terminateDrivers(dict); - IOLockLock( lock ); - if (kIOReturnSuccess == ret) - ret = _removeDrivers(array, dict); + + /* No goto between IOLock calls! + */ + IOLockLock(lock); + if (kIOReturnSuccess == ret) { + ret = _removeDrivers(array, dict); + } kernelTables->reset(); // Unload the module itself. - if ( unload && ret == kIOReturnSuccess ) { - // Do kmod stop first. + if (unload && isLoaded && ret == kIOReturnSuccess) { ret = unloadModule(moduleName); } - IOLockUnlock( lock ); + IOLockUnlock(lock); dict->release(); +finish: return ret; } IOReturn IOCatalogue::terminateDriversForModule( - const char * moduleName, - bool unload ) + const char * moduleName, + bool unload) { OSString * name; IOReturn ret; @@ -1430,7 +817,7 @@ bool IOCatalogue::startMatching( OSDictionary * matching ) if ( !set ) return false; - IOLockLock( lock ); + IOLockLock(lock); kernelTables->reset(); while ( (dict = (OSDictionary *)kernelTables->getNextObject()) ) { @@ -1447,7 +834,7 @@ bool IOCatalogue::startMatching( OSDictionary * matching ) generation++; } - IOLockUnlock( lock ); + IOLockUnlock(lock); set->release(); @@ -1479,34 +866,15 @@ bool IOCatalogue::serializeData(IOOptionBits kind, OSSerialize * s) const break; case kIOCatalogGetModuleDemandList: - IOLockLock( lock ); - if (!gIOCatalogModuleRequests->serialize(s)) - kr = kIOReturnNoMemory; - IOLockUnlock( lock ); + kr = KERN_NOT_SUPPORTED; break; case kIOCatalogGetCacheMissList: - IOLockLock( lock ); - if (!gIOCatalogCacheMisses->serialize(s)) - kr = kIOReturnNoMemory; - IOLockUnlock( lock ); + kr = KERN_NOT_SUPPORTED; break; case kIOCatalogGetROMMkextList: - IOLockLock( lock ); - - if (!gIOCatalogROMMkexts || !gIOCatalogROMMkexts->getCount()) - kr = kIOReturnNoResources; - else if (!gIOCatalogROMMkexts->serialize(s)) - kr = kIOReturnNoMemory; - - if (gIOCatalogROMMkexts) - { - gIOCatalogROMMkexts->release(); - gIOCatalogROMMkexts = 0; - } - - IOLockUnlock( lock ); + kr = KERN_NOT_SUPPORTED; break; default: @@ -1518,181 +886,26 @@ bool IOCatalogue::serializeData(IOOptionBits kind, OSSerialize * s) const } -bool IOCatalogue::recordStartupExtensions(void) { - bool result = false; - - IOLockLock(kld_lock); - if (kernelLinkerPresent && record_startup_extensions_function) { - result = (*record_startup_extensions_function)(); - } else { - IOLog("Can't record startup extensions; " - "kernel linker is not present.\n"); - result = false; - } - IOLockUnlock(kld_lock); - - return result; -} - - -/********************************************************************* -* This function operates on sections retrieved from the currently running -* 32 bit mach kernel. -*********************************************************************/ -bool IOCatalogue::addExtensionsFromArchive(OSData * mkext) -{ - OSData * copyData; - bool result = false; - bool prelinked; - - /* The mkext we've been handed (or the data it references) can go away, - * so we need to make a local copy to keep around as long as it might - * be needed. - */ - copyData = OSData::withData(mkext); - if (copyData) - { - struct section * infosect; - - infosect = getsectbyname("__PRELINK", "__info"); - prelinked = (infosect && infosect->addr && infosect->size); - - IOLockLock(kld_lock); - - if (gIOCatalogROMMkexts) - gIOCatalogROMMkexts->setObject(copyData); - - if (prelinked) { - result = true; - } else if (kernelLinkerPresent && add_from_mkext_function) { - result = (*add_from_mkext_function)(copyData); - } else { - IOLog("Can't add startup extensions from archive; " - "kernel linker is not present.\n"); - result = false; - } - - IOLockUnlock(kld_lock); - - copyData->release(); - } - - return result; -} - -/********************************************************************* -* This function clears out all references to the in-kernel linker, -* frees the list of startup extensions in extensionDict, and -* deallocates the kernel's __KLD segment to reclaim that memory. -* -* The segments it operates on are strictly 32 bit segments. -*********************************************************************/ -kern_return_t IOCatalogue::removeKernelLinker(void) { - kern_return_t result = KERN_SUCCESS; - struct segment_command * segmentLE, *segmentKLD; - boolean_t keepsyms = FALSE; -#if __ppc__ || __arm__ - char * dt_segment_name; - void * segment_paddress; - int segment_size; -#endif - - /* This must be the very first thing done by this function. - */ - IOLockLock(kld_lock); - - - /* If the kernel linker isn't here, that's automatically - * a success. - */ - if (!kernelLinkerPresent) { - result = KERN_SUCCESS; - goto finish; - } - - PE_parse_boot_argn("keepsyms", &keepsyms, sizeof (keepsyms)); - - IOLog("Jettisoning kernel linker.\n"); - - kernelLinkerPresent = 0; - - /* Set the kmod_load_extension function as the means for loading - * a kernel extension. - */ - kmod_load_function = &kmod_load_extension; - - record_startup_extensions_function = 0; - add_from_mkext_function = 0; - remove_startup_extension_function = 0; - - - /* Invoke destructors for the __KLD and __LINKEDIT segments. - * Do this for all segments before actually freeing their - * memory so that any cross-dependencies (not that there - * should be any) are handled. - */ - segmentKLD = getsegbyname("__KLD"); - if (!segmentKLD) { - IOLog("error removing kernel linker: can't find __KLD segment\n"); - result = KERN_FAILURE; - goto finish; - } - OSRuntimeUnloadCPPForSegment(segmentKLD); - -#if __ppc__ || __arm__ - /* Free the memory that was set up by bootx. - */ - dt_segment_name = "Kernel-__KLD"; - if (0 == IODTGetLoaderInfo(dt_segment_name, &segment_paddress, &segment_size)) { - IODTFreeLoaderInfo(dt_segment_name, (void *)segment_paddress, - (int)segment_size); - } -#elif __i386__ - /* On x86, use the mapping data from the segment load command to - * unload KLD directly, unless the keepsyms boot-arg was enabled. - * This may invalidate any assumptions about "avail_start" - * defining the lower bound for valid physical addresses. - */ - if (!keepsyms && segmentKLD->vmaddr && segmentKLD->vmsize) - ml_static_mfree(segmentKLD->vmaddr, segmentKLD->vmsize); -#else -#error arch +#if PRAGMA_MARK +#pragma mark Obsolete Kext Loading Stuff #endif - - struct section * sect; - sect = getsectbyname("__PRELINK", "__symtab"); - if (sect && sect->addr) { - ml_static_mfree(sect->addr, sect->size); - } - -finish: - - /* This must be the very last thing done before returning. - */ - IOLockUnlock(kld_lock); - - return result; -} - /********************************************************************* -* This function stops the catalogue from making kextd requests during -* shutdown. -*********************************************************************/ -void IOCatalogue::disableExternalLinker(void) { - IOLockLock(gIOKLDLock); - /* If kmod_load_extension (the kextd requester function) is in use, - * disable new module requests. - */ - if (kmod_load_function == &kmod_load_extension) { - kmod_load_function = NULL; - } +********************************************************************** +*** BINARY COMPATIBILITY SECTION *** +********************************************************************** +********************************************************************** +* These functions are no longer used are necessary for C++ binary +* compatibility on ppc/i386. +**********************************************************************/ +#if __ppc__ || __i386__ + +bool IOCatalogue::recordStartupExtensions(void) +{ return false; } - IOLockUnlock(gIOKLDLock); -} +bool IOCatalogue::addExtensionsFromArchive(OSData * mkext) +{ return KERN_NOT_SUPPORTED; } -extern "C" -void jettison_kernel_linker(void) -{ - if (gIOCatalogue != NULL) - gIOCatalogue->removeKernelLinker(); -} +kern_return_t IOCatalogue::removeKernelLinker(void) +{ return KERN_NOT_SUPPORTED; } + +#endif /* __ppc__ || __i386__ */ diff --git a/iokit/Kernel/IOCommandGate.cpp b/iokit/Kernel/IOCommandGate.cpp index f276ebc22..55d6eee7f 100644 --- a/iokit/Kernel/IOCommandGate.cpp +++ b/iokit/Kernel/IOCommandGate.cpp @@ -35,7 +35,11 @@ #define super IOEventSource OSDefineMetaClassAndStructors(IOCommandGate, IOEventSource) +#if __LP64__ OSMetaClassDefineReservedUnused(IOCommandGate, 0); +#else +OSMetaClassDefineReservedUsed(IOCommandGate, 0); +#endif OSMetaClassDefineReservedUnused(IOCommandGate, 1); OSMetaClassDefineReservedUnused(IOCommandGate, 2); OSMetaClassDefineReservedUnused(IOCommandGate, 3); @@ -126,7 +130,7 @@ IOReturn IOCommandGate::runAction(Action inAction, return kIOReturnBadArgument; IOTimeStampConstant(IODBG_CMDQ(IOCMDQ_ACTION), - (unsigned int) inAction, (unsigned int) owner); + (uintptr_t) inAction, (uintptr_t) owner); // closeGate is recursive needn't worry if we already hold the lock. closeGate(); @@ -179,7 +183,7 @@ IOReturn IOCommandGate::attemptAction(Action inAction, res = kIOReturnNotPermitted; else { IOTimeStampConstant(IODBG_CMDQ(IOCMDQ_ACTION), - (unsigned int) inAction, (unsigned int) owner); + (uintptr_t) inAction, (uintptr_t) owner); res = (*inAction)(owner, arg0, arg1, arg2, arg3); } @@ -197,6 +201,14 @@ IOReturn IOCommandGate::commandSleep(void *event, UInt32 interruptible) return sleepGate(event, interruptible); } +IOReturn IOCommandGate::commandSleep(void *event, AbsoluteTime deadline, UInt32 interruptible) +{ + if (!workLoop->inGate()) + return kIOReturnNotPermitted; + + return sleepGate(event, deadline, interruptible); +} + void IOCommandGate::commandWakeup(void *event, bool oneThread) { wakeupGate(event, oneThread); diff --git a/iokit/Kernel/IOCommandQueue.cpp b/iokit/Kernel/IOCommandQueue.cpp index 88249acf6..e2cd65b4e 100644 --- a/iokit/Kernel/IOCommandQueue.cpp +++ b/iokit/Kernel/IOCommandQueue.cpp @@ -32,12 +32,16 @@ HISTORY 1998-7-13 Godfrey van der Linden(gvdl) Created. ]*/ + +#if !defined(__LP64__) + #include #include #include #include + #define NUM_FIELDS_IN_COMMAND 4 typedef struct commandEntryTag { void *f[NUM_FIELDS_IN_COMMAND]; @@ -150,7 +154,7 @@ bool IOCommandQueue::checkForWork() consumerIndex = 0; IOTimeStampConstant(IODBG_CMDQ(IOCMDQ_ACTION), - (unsigned int) action, (unsigned int) owner); + (uintptr_t) action, (uintptr_t) owner); (*(IOCommandQueueAction) action)(owner, field0, field1, field2, field3); @@ -272,3 +276,5 @@ int IOCommandQueue::performAndFlush(OSObject *target, return numEntries; } + +#endif /* !defined(__LP64__) */ diff --git a/iokit/Kernel/IOCopyMapper.cpp b/iokit/Kernel/IOCopyMapper.cpp index 4755e46cd..da48bdc86 100644 --- a/iokit/Kernel/IOCopyMapper.cpp +++ b/iokit/Kernel/IOCopyMapper.cpp @@ -102,7 +102,7 @@ typedef struct ActiveDARTEntry { static SYSCTL_UINT(_kern, OID_AUTO, copyregionmax, CTLFLAG_RD | CTLFLAG_NOAUTO | CTLFLAG_KERN, - NULL, 0, ""); + (unsigned int *)NULL, 0, ""); static SYSCTL_UINT(_kern, OID_AUTO, lowpagemax, CTLFLAG_RD | CTLFLAG_NOAUTO | CTLFLAG_KERN, @@ -214,7 +214,7 @@ ppnum_t IOCopyMapper::iovmAlloc(IOItemCount pages) // Can't alloc anything bigger than 1/2 table if (pages >= fMapperRegionSize/2) { - panic("iovmAlloc 0x%lx", pages); + panic("iovmAlloc 0x%lx", (long) pages); return 0; } @@ -288,7 +288,7 @@ void IOCopyMapper::iovmFree(ppnum_t addr, IOItemCount pages) FreeDARTEntry *freeDART = (FreeDARTEntry *) fTable; if (addr < fBufferPage) - IOPanic("addr < fBufferPage"); + panic("addr < fBufferPage"); addr -= fBufferPage; // Can't free anything of less than minumum @@ -376,7 +376,7 @@ addr64_t IOCopyMapper::mapAddr(IOPhysicalAddress addr) return (ptoa_64(mappedPage.fPPNum) | offset); } - panic("%s::mapAddr(0x%08lx) not mapped for I/O\n", getName(), addr); + panic("%s::mapAddr(0x%08lx) not mapped for I/O\n", getName(), (long) addr); return 0; } } diff --git a/iokit/Kernel/IODMACommand.cpp b/iokit/Kernel/IODMACommand.cpp index 75d751afe..603432900 100644 --- a/iokit/Kernel/IODMACommand.cpp +++ b/iokit/Kernel/IODMACommand.cpp @@ -46,9 +46,6 @@ #define IS_BYPASSED(type) (MAPTYPE(type) == kBypassed) #define IS_NONCOHERENT(type) (MAPTYPE(type) == kNonCoherent) - -static bool gIOEnableCopyMapper = true; - enum { kWalkSyncIn = 0x01, // bounce -> md @@ -94,7 +91,7 @@ OSDefineMetaClassAndStructors(IODMACommand, IOCommand); OSMetaClassDefineReservedUsed(IODMACommand, 0); OSMetaClassDefineReservedUsed(IODMACommand, 1); -OSMetaClassDefineReservedUnused(IODMACommand, 2); +OSMetaClassDefineReservedUsed(IODMACommand, 2); OSMetaClassDefineReservedUnused(IODMACommand, 3); OSMetaClassDefineReservedUnused(IODMACommand, 4); OSMetaClassDefineReservedUnused(IODMACommand, 5); @@ -204,6 +201,9 @@ IODMACommand::initWithSpecification(SegmentFunction outSegFunc, return false; }; + if (fMapper) + fMapper->retain(); + reserved = IONew(IODMACommandInternal, 1); if (!reserved) return false; @@ -220,6 +220,9 @@ IODMACommand::free() if (reserved) IODelete(reserved, IODMACommandInternal, 1); + if (fMapper) + fMapper->release(); + super::free(); } @@ -267,6 +270,7 @@ IODMACommand::setMemoryDescriptor(const IOMemoryDescriptor *mem, bool autoPrepar mem->retain(); fMemory = mem; + mem->dmaCommandOperation(kIOMDSetDMAActive, this, 0); if (autoPrepare) return prepare(); }; @@ -283,6 +287,7 @@ IODMACommand::clearMemoryDescriptor(bool autoComplete) if (fMemory) { while (fActive) complete(); + fMemory->dmaCommandOperation(kIOMDSetDMAInactive, this, 0); fMemory->release(); fMemory = 0; } @@ -305,7 +310,7 @@ IODMACommand::segmentOp( void *segments, UInt32 segmentIndex) { - IOOptionBits op = (IOOptionBits) reference; + IOOptionBits op = (uintptr_t) reference; addr64_t maxPhys, address; addr64_t remapAddr = 0; uint64_t length; @@ -313,7 +318,7 @@ IODMACommand::segmentOp( IODMACommandInternal * state = target->reserved; - if (target->fNumAddressBits && (target->fNumAddressBits < 64)) + if (target->fNumAddressBits && (target->fNumAddressBits < 64) && !state->fLocalMapper) maxPhys = (1ULL << target->fNumAddressBits); else maxPhys = 0; @@ -327,8 +332,8 @@ IODMACommand::segmentOp( if (!state->fMisaligned) { - state->fMisaligned |= (0 != (target->fAlignMask & address)); - if (state->fMisaligned) DEBG("misaligned %qx:%qx, %lx\n", address, length, target->fAlignMask); + state->fMisaligned |= (0 != (state->fSourceAlignMask & address)); + if (state->fMisaligned) DEBG("misaligned %qx:%qx, %lx\n", address, length, state->fSourceAlignMask); } if (state->fMisaligned && (kWalkPreflight & op)) @@ -404,14 +409,15 @@ IODMACommand::walkAll(UInt8 op) UInt32 numSegments; UInt64 offset; - if (gIOEnableCopyMapper && (kWalkPreflight & op)) + if (kWalkPreflight & op) { - state->fCopyContig = false; + state->fMapContig = false; state->fMisaligned = false; state->fDoubleBuffer = false; state->fPrepared = false; state->fCopyNext = 0; - state->fCopyPageAlloc = 0; + state->fCopyMapperPageAlloc = 0; + state->fLocalMapperPageAlloc = 0; state->fCopyPageCount = 0; state->fNextRemapIndex = 0; state->fCopyMD = 0; @@ -420,7 +426,7 @@ IODMACommand::walkAll(UInt8 op) { offset = 0; numSegments = 0-1; - ret = genIOVMSegments(segmentOp, (void *) op, &offset, state, &numSegments); + ret = genIOVMSegments(op, segmentOp, (void *) op, &offset, state, &numSegments); } op &= ~kWalkPreflight; @@ -441,17 +447,17 @@ IODMACommand::walkAll(UInt8 op) mapBase = mapper->iovmAlloc(state->fCopyPageCount); if (mapBase) { - state->fCopyPageAlloc = mapBase; - if (state->fCopyPageAlloc && state->fDoubleBuffer) + state->fCopyMapperPageAlloc = mapBase; + if (state->fCopyMapperPageAlloc && state->fDoubleBuffer) { DEBG("contig copy map\n"); - state->fCopyContig = true; + state->fMapContig = true; } - state->fCopyNext = ptoa_64(state->fCopyPageAlloc); + state->fCopyNext = ptoa_64(state->fCopyMapperPageAlloc); offset = 0; numSegments = 0-1; - ret = genIOVMSegments(segmentOp, (void *) op, &offset, state, &numSegments); + ret = genIOVMSegments(op, segmentOp, (void *) op, &offset, state, &numSegments); state->fPrepared = true; op &= ~(kWalkSyncIn | kWalkSyncOut); } @@ -459,7 +465,7 @@ IODMACommand::walkAll(UInt8 op) { DEBG("alloc IOBMD\n"); state->fCopyMD = IOBufferMemoryDescriptor::withOptions( - fMDSummary.fDirection, state->fPreparedLength, page_size); + fMDSummary.fDirection, state->fPreparedLength, state->fSourceAlignMask); if (state->fCopyMD) { @@ -473,20 +479,27 @@ IODMACommand::walkAll(UInt8 op) } } } + + if (state->fLocalMapper) + { + state->fLocalMapperPageCount = atop_64(round_page(state->fPreparedLength)); + state->fLocalMapperPageAlloc = fMapper->iovmAllocDMACommand(this, state->fLocalMapperPageCount); + state->fMapContig = true; + } } - if (gIOEnableCopyMapper && state->fPrepared && ((kWalkSyncIn | kWalkSyncOut) & op)) + if (state->fPrepared && ((kWalkSyncIn | kWalkSyncOut) & op)) { if (state->fCopyPageCount) { DEBG("sync fCopyPageCount %d\n", state->fCopyPageCount); - if (state->fCopyPageAlloc) + if (state->fCopyMapperPageAlloc) { - state->fCopyNext = ptoa_64(state->fCopyPageAlloc); + state->fCopyNext = ptoa_64(state->fCopyMapperPageAlloc); offset = 0; numSegments = 0-1; - ret = genIOVMSegments(segmentOp, (void *) op, &offset, state, &numSegments); + ret = genIOVMSegments(op, segmentOp, (void *) op, &offset, state, &numSegments); } else if (state->fCopyMD) { @@ -517,10 +530,16 @@ IODMACommand::walkAll(UInt8 op) if (kWalkComplete & op) { - if (state->fCopyPageAlloc) + if (state->fLocalMapperPageAlloc) + { + fMapper->iovmFreeDMACommand(this, state->fLocalMapperPageAlloc, state->fLocalMapperPageCount); + state->fLocalMapperPageAlloc = 0; + state->fLocalMapperPageCount = 0; + } + if (state->fCopyMapperPageAlloc) { - gIOCopyMapper->iovmFree(state->fCopyPageAlloc, state->fCopyPageCount); - state->fCopyPageAlloc = 0; + gIOCopyMapper->iovmFree(state->fCopyMapperPageAlloc, state->fCopyPageCount); + state->fCopyMapperPageAlloc = 0; state->fCopyPageCount = 0; } if (state->fCopyMD) @@ -534,6 +553,18 @@ IODMACommand::walkAll(UInt8 op) return (ret); } +UInt8 +IODMACommand::getNumAddressBits(void) +{ + return (fNumAddressBits); +} + +UInt32 +IODMACommand::getAlignment(void) +{ + return (fAlignMask + 1); +} + IOReturn IODMACommand::prepareWithSpecification(SegmentFunction outSegFunc, UInt8 numAddressBits, @@ -599,7 +630,12 @@ IODMACommand::prepareWithSpecification(SegmentFunction outSegFunc, if (!alignment) alignment = 1; fAlignMask = alignment - 1; - fMapper = mapper; + if (mapper != fMapper) + { + mapper->retain(); + fMapper->release(); + fMapper = mapper; + } fInternalState->fIterateOnly = (0 != (kIterateOnly & mappingOptions)); @@ -623,7 +659,7 @@ IODMACommand::prepare(UInt64 offset, UInt64 length, bool flushCache, bool synchr if (IS_NONCOHERENT(mappingOptions) && flushCache) { IOMemoryDescriptor *poMD = const_cast(fMemory); - poMD->performOperation(kIOMemoryIncoherentIOStore, 0, fMDSummary.fLength); + poMD->performOperation(kIOMemoryIncoherentIOStore, offset, length); } if (fActive++) { @@ -636,20 +672,30 @@ IODMACommand::prepare(UInt64 offset, UInt64 length, bool flushCache, bool synchr state->fPreparedOffset = offset; state->fPreparedLength = length; - state->fCopyContig = false; + state->fMapContig = false; state->fMisaligned = false; state->fDoubleBuffer = false; state->fPrepared = false; state->fCopyNext = 0; - state->fCopyPageAlloc = 0; + state->fCopyMapperPageAlloc = 0; state->fCopyPageCount = 0; state->fNextRemapIndex = 0; state->fCopyMD = 0; + state->fLocalMapperPageAlloc = 0; + state->fLocalMapperPageCount = 0; + state->fLocalMapper = (fMapper && (fMapper != IOMapper::gSystem)); + + state->fSourceAlignMask = fAlignMask; + if (state->fLocalMapper) + state->fSourceAlignMask &= page_mask; + state->fCursor = state->fIterateOnly || (!state->fCheckAddressing - && (!fAlignMask - || ((fMDSummary.fPageAlign & (1 << 31)) && (0 == (fMDSummary.fPageAlign & fAlignMask))))); + && !state->fLocalMapper + && (!state->fSourceAlignMask + || ((fMDSummary.fPageAlign & (1 << 31)) && (0 == (fMDSummary.fPageAlign & state->fSourceAlignMask))))); + if (!state->fCursor) { IOOptionBits op = kWalkPrepare | kWalkPreflight; @@ -687,13 +733,28 @@ IODMACommand::complete(bool invalidateCache, bool synchronize) { IOMemoryDescriptor *poMD = const_cast(fMemory); - poMD->performOperation(kIOMemoryIncoherentIOFlush, 0, fMDSummary.fLength); + poMD->performOperation(kIOMemoryIncoherentIOFlush, state->fPreparedOffset, state->fPreparedLength); } } return ret; } +IOReturn +IODMACommand::getPreparedOffsetAndLength(UInt64 * offset, UInt64 * length) +{ + IODMACommandInternal * state = fInternalState; + if (fActive < 1) + return (kIOReturnNotReady); + + if (offset) + *offset = state->fPreparedOffset; + if (length) + *length = state->fPreparedLength; + + return (kIOReturnSuccess); +} + IOReturn IODMACommand::synchronize(IOOptionBits options) { @@ -752,7 +813,7 @@ IODMACommand::transferSegment(void *reference, void *segments, UInt32 segmentIndex) { - IODMACommandTransferContext * context = (IODMACommandTransferContext *) segments; + IODMACommandTransferContext * context = (IODMACommandTransferContext *) reference; UInt64 length = min(segment.fLength, context->remaining); addr64_t ioAddr = segment.fIOVMAddr; addr64_t cpuAddr = ioAddr; @@ -793,6 +854,7 @@ IODMACommand::transfer(IOOptionBits transferOp, UInt64 offset, void * buffer, UI { IODMACommandInternal * state = fInternalState; IODMACommandTransferContext context; + Segment64 segments[1]; UInt32 numSegments = 0-1; if (fActive < 1) @@ -806,7 +868,7 @@ IODMACommand::transfer(IOOptionBits transferOp, UInt64 offset, void * buffer, UI context.bufferOffset = 0; context.remaining = length; context.op = transferOp; - (void) genIOVMSegments(transferSegment, (void *) kWalkClient, &offset, &context, &numSegments); + (void) genIOVMSegments(kWalkClient, transferSegment, &context, &offset, &segments[0], &numSegments); return (length - context.remaining); } @@ -828,17 +890,18 @@ IODMACommand::genIOVMSegments(UInt64 *offsetP, void *segmentsP, UInt32 *numSegmentsP) { - return (genIOVMSegments(clientOutputSegment, (void *) kWalkClient, offsetP, segmentsP, numSegmentsP)); + return (genIOVMSegments(kWalkClient, clientOutputSegment, (void *) fOutSeg, + offsetP, segmentsP, numSegmentsP)); } IOReturn -IODMACommand::genIOVMSegments(InternalSegmentFunction outSegFunc, +IODMACommand::genIOVMSegments(uint32_t op, + InternalSegmentFunction outSegFunc, void *reference, UInt64 *offsetP, void *segmentsP, UInt32 *numSegmentsP) { - IOOptionBits op = (IOOptionBits) reference; IODMACommandInternal * internalState = fInternalState; IOOptionBits mdOp = kIOMDWalkSegments; IOReturn ret = kIOReturnSuccess; @@ -888,9 +951,12 @@ IODMACommand::genIOVMSegments(InternalSegmentFunction outSegFunc, state->fOffset = offset; state->fLength = memLength - offset; - if (internalState->fCopyContig && (kWalkClient & op)) + if (internalState->fMapContig && (kWalkClient & op)) { - state->fIOVMAddr = ptoa_64(internalState->fCopyPageAlloc) + ppnum_t pageNum = internalState->fLocalMapperPageAlloc; + if (!pageNum) + pageNum = internalState->fCopyMapperPageAlloc; + state->fIOVMAddr = ptoa_64(pageNum) + offset - internalState->fPreparedOffset; rtn = kIOReturnSuccess; } @@ -956,19 +1022,19 @@ IODMACommand::genIOVMSegments(InternalSegmentFunction outSegFunc, { DEBG("sparse switch %qx, %qx ", curSeg.fIOVMAddr, curSeg.fLength); if (trunc_page_64(curSeg.fIOVMAddr) == gIOCopyMapper->mapAddr( - ptoa_64(internalState->fCopyPageAlloc + internalState->fNextRemapIndex))) + ptoa_64(internalState->fCopyMapperPageAlloc + internalState->fNextRemapIndex))) { - curSeg.fIOVMAddr = ptoa_64(internalState->fCopyPageAlloc + internalState->fNextRemapIndex) + curSeg.fIOVMAddr = ptoa_64(internalState->fCopyMapperPageAlloc + internalState->fNextRemapIndex) + (curSeg.fIOVMAddr & PAGE_MASK); internalState->fNextRemapIndex += atop_64(round_page(curSeg.fLength)); } else for (UInt checkRemapIndex = 0; checkRemapIndex < internalState->fCopyPageCount; checkRemapIndex++) { if (trunc_page_64(curSeg.fIOVMAddr) == gIOCopyMapper->mapAddr( - ptoa_64(internalState->fCopyPageAlloc + checkRemapIndex))) + ptoa_64(internalState->fCopyMapperPageAlloc + checkRemapIndex))) { - curSeg.fIOVMAddr = ptoa_64(internalState->fCopyPageAlloc + checkRemapIndex) + curSeg.fIOVMAddr = ptoa_64(internalState->fCopyMapperPageAlloc + checkRemapIndex) + (curSeg.fIOVMAddr & PAGE_MASK); internalState->fNextRemapIndex = checkRemapIndex + atop_64(round_page(curSeg.fLength)); break; @@ -991,7 +1057,7 @@ IODMACommand::genIOVMSegments(InternalSegmentFunction outSegFunc, } if (internalState->fCursor - && (0 != (fAlignMask & curSeg.fIOVMAddr))) + && (0 != (internalState->fSourceAlignMask & curSeg.fIOVMAddr))) { curSeg.fIOVMAddr = 0; ret = kIOReturnNotAligned; @@ -1036,16 +1102,18 @@ IODMACommand::clientOutputSegment( void *reference, IODMACommand *target, Segment64 segment, void *vSegList, UInt32 outSegIndex) { + SegmentFunction segmentFunction = (SegmentFunction) reference; IOReturn ret = kIOReturnSuccess; if ((target->fNumAddressBits < 64) - && ((segment.fIOVMAddr + segment.fLength - 1) >> target->fNumAddressBits)) + && ((segment.fIOVMAddr + segment.fLength - 1) >> target->fNumAddressBits) + && (target->reserved->fLocalMapperPageAlloc || !target->reserved->fLocalMapper)) { DEBG("kIOReturnMessageTooLarge(fNumAddressBits) %qx, %qx\n", segment.fIOVMAddr, segment.fLength); ret = kIOReturnMessageTooLarge; } - if (!(*target->fOutSeg)(target, segment, vSegList, outSegIndex)) + if (!(*segmentFunction)(target, segment, vSegList, outSegIndex)) { DEBG("kIOReturnMessageTooLarge(fOutSeg) %qx, %qx\n", segment.fIOVMAddr, segment.fLength); ret = kIOReturnMessageTooLarge; @@ -1054,6 +1122,16 @@ IODMACommand::clientOutputSegment( return (ret); } +IOReturn +IODMACommand::genIOVMSegments(SegmentFunction segmentFunction, + UInt64 *offsetP, + void *segmentsP, + UInt32 *numSegmentsP) +{ + return (genIOVMSegments(kWalkClient, clientOutputSegment, (void *) segmentFunction, + offsetP, segmentsP, numSegmentsP)); +} + bool IODMACommand::OutputHost32(IODMACommand *, Segment64 segment, void *vSegList, UInt32 outSegIndex) diff --git a/iokit/Kernel/IODMAController.cpp b/iokit/Kernel/IODMAController.cpp index 4f81342d4..33a54dc76 100644 --- a/iokit/Kernel/IODMAController.cpp +++ b/iokit/Kernel/IODMAController.cpp @@ -37,7 +37,7 @@ const OSSymbol *IODMAController::createControllerName(UInt32 phandle) #define CREATE_BUF_LEN 48 char buf[CREATE_BUF_LEN]; - snprintf(buf, CREATE_BUF_LEN, "IODMAController%08lX", phandle); + snprintf(buf, CREATE_BUF_LEN, "IODMAController%08X", (uint32_t)phandle); return OSSymbol::withCString(buf); } diff --git a/iokit/Kernel/IODMAEventSource.cpp b/iokit/Kernel/IODMAEventSource.cpp index eeaf70c8a..a032b9204 100644 --- a/iokit/Kernel/IODMAEventSource.cpp +++ b/iokit/Kernel/IODMAEventSource.cpp @@ -103,7 +103,7 @@ IOReturn IODMAEventSource::startDMACommand(IODMACommand *dmaCommand, IODirection return kIOReturnSuccess; } -IOReturn IODMAEventSource::stopDMACommand(bool flush, mach_timespec_t *timeout) +IOReturn IODMAEventSource::stopDMACommand(bool flush, uint64_t timeout) { if ((dmaController == 0) || (dmaIndex == 0xFFFFFFFF)) return kIOReturnError; @@ -119,6 +119,14 @@ IOReturn IODMAEventSource::queryDMACommand(IODMACommand **dmaCommand, IOByteCoun } +IOByteCount IODMAEventSource::getFIFODepth() +{ + if ((dmaController == 0) || (dmaIndex == 0xFFFFFFFF)) return kIOReturnError; + + return dmaController->getFIFODepth(dmaIndex); +} + + // protected bool IODMAEventSource::checkForWork(void) diff --git a/iokit/Kernel/IODataQueue.cpp b/iokit/Kernel/IODataQueue.cpp index 3a7f4c832..1001ebeff 100644 --- a/iokit/Kernel/IODataQueue.cpp +++ b/iokit/Kernel/IODataQueue.cpp @@ -77,7 +77,7 @@ Boolean IODataQueue::initWithCapacity(UInt32 size) return false; } - dataQueue = (IODataQueueMemory *)IOMallocAligned(round_page_32(size + DATA_QUEUE_MEMORY_HEADER_SIZE), PAGE_SIZE); + dataQueue = (IODataQueueMemory *)IOMallocAligned(round_page(size + DATA_QUEUE_MEMORY_HEADER_SIZE), PAGE_SIZE); if (dataQueue == 0) { return false; } @@ -97,7 +97,7 @@ Boolean IODataQueue::initWithEntries(UInt32 numEntries, UInt32 entrySize) void IODataQueue::free() { if (dataQueue) { - IOFreeAligned(dataQueue, round_page_32(dataQueue->queueSize + DATA_QUEUE_MEMORY_HEADER_SIZE)); + IOFreeAligned(dataQueue, round_page(dataQueue->queueSize + DATA_QUEUE_MEMORY_HEADER_SIZE)); } super::free(); @@ -209,7 +209,7 @@ void IODataQueue::sendDataAvailableNotification() msgh = (mach_msg_header_t *)notifyMsg; if (msgh && msgh->msgh_remote_port) { - kr = mach_msg_send_from_kernel(msgh, msgh->msgh_size); + kr = mach_msg_send_from_kernel_proper(msgh, msgh->msgh_size); switch(kr) { case MACH_SEND_TIMED_OUT: // Notification already sent case MACH_MSG_SUCCESS: diff --git a/iokit/Kernel/IODeviceMemory.cpp b/iokit/Kernel/IODeviceMemory.cpp index d379e1755..f7ed8020c 100644 --- a/iokit/Kernel/IODeviceMemory.cpp +++ b/iokit/Kernel/IODeviceMemory.cpp @@ -27,6 +27,7 @@ */ #include +#include /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ @@ -34,8 +35,8 @@ IODeviceMemory * IODeviceMemory::withRange( IOPhysicalAddress start, IOPhysicalLength length ) { - return( (IODeviceMemory *) IOMemoryDescriptor::withPhysicalAddress( - start, length, kIODirectionNone )); + return( (IODeviceMemory *) IOMemoryDescriptor::withAddressRange( + start, length, kIODirectionNone | kIOMemoryMapperNone, NULL )); } @@ -44,7 +45,7 @@ IODeviceMemory * IODeviceMemory::withSubRange( IOPhysicalAddress offset, IOPhysicalLength length ) { - return( (IODeviceMemory *) IOMemoryDescriptor::withSubRange( + return( (IODeviceMemory *) IOSubMemoryDescriptor::withSubRange( of, offset, length, kIODirectionNone )); } diff --git a/iokit/Kernel/IODeviceTreeSupport.cpp b/iokit/Kernel/IODeviceTreeSupport.cpp index 381022c56..afb221cf4 100644 --- a/iokit/Kernel/IODeviceTreeSupport.cpp +++ b/iokit/Kernel/IODeviceTreeSupport.cpp @@ -91,7 +91,6 @@ IODeviceTreeAlloc( void * dtTop ) DTEntry mapEntry; OSArray * stack; OSData * prop; - OSObject * obj; OSDictionary * allInts; vm_offset_t * dtMap; unsigned int propSize; @@ -144,7 +143,7 @@ IODeviceTreeAlloc( void * dtTop ) freeDT = (kSuccess == DTLookupEntry( 0, "/chosen/memory-map", &mapEntry )) && (kSuccess == DTGetProperty( mapEntry, "DeviceTree", (void **) &dtMap, &propSize )) - && ((2 * sizeof(vm_offset_t)) == propSize); + && ((2 * sizeof(uint32_t)) == propSize); parent = MakeReferenceTable( (DTEntry)dtTop, freeDT ); @@ -193,7 +192,7 @@ IODeviceTreeAlloc( void * dtTop ) // free original device tree DTInit(0); IODTFreeLoaderInfo( "DeviceTree", - (void *)dtMap[0], round_page_32(dtMap[1]) ); + (void *)dtMap[0], (int) round_page(dtMap[1]) ); } // adjust tree @@ -210,6 +209,9 @@ IODeviceTreeAlloc( void * dtTop ) if( !intMap && child->getProperty( gIODTInterruptParentKey)) intMap = true; +#if __ppc__ + OSObject * obj; + // Look for a "driver,AAPL,MacOSX,PowerPC" property. if( (obj = child->getProperty( "driver,AAPL,MacOSX,PowerPC"))) { gIOCatalogue->addExtensionsFromArchive((OSData *)obj); @@ -226,6 +228,7 @@ IODeviceTreeAlloc( void * dtTop ) child->removeProperty( "driver,AAPL,MacOS,PowerPC"); } } +#endif /* __ppc__ */ } regIter->release(); } @@ -308,11 +311,7 @@ static void FreePhysicalMemory( vm_offset_t * range ) { vm_offset_t virt; -#if defined (__i386__) - virt = ml_boot_ptovirt( range[0] ); -#else virt = ml_static_ptovirt( range[0] ); -#endif if( virt) { ml_static_mfree( virt, range[1] ); } @@ -383,7 +382,7 @@ MakeReferenceTable( DTEntry dtEntry, bool copy ) } else if(noLocation && (!strncmp(name, "reg", sizeof("reg")))) { // default location - override later - snprintf(location, sizeof(location), "%lX", *((UInt32 *) prop)); + snprintf(location, sizeof(location), "%X", *((uint32_t *) prop)); regEntry->setLocation( location ); } } @@ -468,7 +467,7 @@ const OSSymbol * IODTInterruptControllerName( IORegistryEntry * regEntry ) assert( ok ); if( ok) { - snprintf(buf, sizeof(buf), "IOInterruptController%08lX", phandle); + snprintf(buf, sizeof(buf), "IOInterruptController%08X", (uint32_t)phandle); sym = OSSymbol::withCString( buf ); } else sym = 0; @@ -845,7 +844,7 @@ bool IODTMatchNubWithKeys( IORegistryEntry * regEntry, result = regEntry->compareNames( obj ); obj->release(); } -#ifdef DEBUG +#if DEBUG else IOLog("Couldn't unserialize %s\n", keys ); #endif @@ -1101,7 +1100,7 @@ OSArray * IODTResolveAddressing( IORegistryEntry * regEntry, range = 0; if( parent) range = IODeviceMemory::withSubRange( parent, - phys - parent->getPhysicalAddress(), len ); + phys - parent->getPhysicalSegment(0, 0, kIOMemoryMapperNone), len ); if( 0 == range) range = IODeviceMemory::withRange( phys, len ); if( range) diff --git a/iokit/Kernel/IOEventSource.cpp b/iokit/Kernel/IOEventSource.cpp index 0e0dbc8f8..a20232d91 100644 --- a/iokit/Kernel/IOEventSource.cpp +++ b/iokit/Kernel/IOEventSource.cpp @@ -56,6 +56,8 @@ void IOEventSource::closeGate() { workLoop->closeGate(); } bool IOEventSource::tryCloseGate() { return workLoop->tryCloseGate(); } int IOEventSource::sleepGate(void *event, UInt32 type) { return workLoop->sleepGate(event, type); } +int IOEventSource::sleepGate(void *event, AbsoluteTime deadline, UInt32 type) + { return workLoop->sleepGate(event, deadline, type); } void IOEventSource::wakeupGate(void *event, bool oneThread) { workLoop->wakeupGate(event, oneThread); } diff --git a/iokit/Kernel/IOFilterInterruptEventSource.cpp b/iokit/Kernel/IOFilterInterruptEventSource.cpp index 9e268f6e7..47a3b8a14 100644 --- a/iokit/Kernel/IOFilterInterruptEventSource.cpp +++ b/iokit/Kernel/IOFilterInterruptEventSource.cpp @@ -42,19 +42,19 @@ HISTORY #define IOTimeTypeStampS(t) \ do { \ IOTimeStampStart(IODBG_INTES(t), \ - (unsigned int) this, (unsigned int) owner); \ + (uintptr_t) this, (uintptr_t) owner); \ } while(0) #define IOTimeTypeStampE(t) \ do { \ IOTimeStampEnd(IODBG_INTES(t), \ - (unsigned int) this, (unsigned int) owner); \ + (uintptr_t) this, (uintptr_t) owner); \ } while(0) #define IOTimeStampLatency() \ do { \ IOTimeStampEnd(IODBG_INTES(IOINTES_LAT), \ - (unsigned int) this, (unsigned int) owner); \ + (uintptr_t) this, (uintptr_t) owner); \ } while(0) #else /* !KDEBUG */ @@ -161,7 +161,7 @@ IOTimeTypeStampS(IOINTES_INTCTXT); IOTimeTypeStampS(IOINTES_INTFLTR); IOTimeStampConstant(IODBG_INTES(IOINTES_FILTER), - (unsigned int) filterAction, (unsigned int) owner); + (uintptr_t) filterAction, (uintptr_t) owner); filterRes = (*filterAction)(owner, this); IOTimeTypeStampE(IOINTES_INTFLTR); @@ -180,7 +180,7 @@ IOTimeTypeStampS(IOINTES_INTCTXT); IOTimeTypeStampS(IOINTES_INTFLTR); IOTimeStampConstant(IODBG_INTES(IOINTES_FILTER), - (unsigned int) filterAction, (unsigned int) owner); + (uintptr_t) filterAction, (uintptr_t) owner); filterRes = (*filterAction)(owner, this); IOTimeTypeStampE(IOINTES_INTFLTR); diff --git a/iokit/Kernel/IOHibernateIO.cpp b/iokit/Kernel/IOHibernateIO.cpp index 9105cdbb9..9abb3f6ca 100644 --- a/iokit/Kernel/IOHibernateIO.cpp +++ b/iokit/Kernel/IOHibernateIO.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2004-2006 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2004-2008 Apple Computer, Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -198,7 +198,7 @@ uint32_t gIOHibernateFreeTime = 0*1000; // max time to spend freeing pages (m static IODTNVRAM * gIOOptionsEntry; static IORegistryEntry * gIOChosenEntry; -#ifdef __i386__ +#if defined(__i386__) || defined(__x86_64__) static const OSSymbol * gIOCreateEFIDevicePathSymbol; #endif @@ -243,7 +243,7 @@ IOMemoryDescriptorWriteFromPhysical(IOMemoryDescriptor * md, addr64_t dstAddr64; IOByteCount dstLen; - dstAddr64 = md->getPhysicalSegment64(offset, &dstLen); + dstAddr64 = md->getPhysicalSegment(offset, &dstLen, kIOMemoryMapperNone); if (!dstAddr64) break; @@ -281,7 +281,7 @@ IOMemoryDescriptorReadToPhysical(IOMemoryDescriptor * md, addr64_t srcAddr64; IOByteCount dstLen; - srcAddr64 = md->getPhysicalSegment64(offset, &dstLen); + srcAddr64 = md->getPhysicalSegment(offset, &dstLen, kIOMemoryMapperNone); if (!srcAddr64) break; @@ -591,6 +591,7 @@ IOPolledFileOpen( const char * filename, IOBufferMemoryDescriptor * ioBuffer, err = kIOReturnNoSpace; break; } + gIOHibernateFileRef = vars->fileRef; HIBLOG("Opened file %s, size %qd, partition base 0x%qx, maxio %qx\n", filename, ctx.size, vars->block0, maxiobytes); if (ctx.size < 1*1024*1024) // check against image size estimate! @@ -665,7 +666,7 @@ IOPolledFileOpen( const char * filename, IOBufferMemoryDescriptor * ioBuffer, && child->isParent(next, gIOServicePlane, true)); HIBLOG("hibernate image major %d, minor %d, blocksize %ld, pollers %d\n", - major, minor, vars->blockSize, vars->pollers->getCount()); + major, minor, (long)vars->blockSize, vars->pollers->getCount()); if (vars->pollers->getCount() < kIOHibernateMinPollersNeeded) continue; @@ -686,7 +687,7 @@ IOPolledFileOpen( const char * filename, IOBufferMemoryDescriptor * ioBuffer, { char str2[24]; -#if __i386__ +#if defined(__i386__) || defined(__x86_64__) if (!gIOCreateEFIDevicePathSymbol) gIOCreateEFIDevicePathSymbol = OSSymbol::withCString("CreateEFIDevicePath"); @@ -725,7 +726,10 @@ IOPolledFileOpen( const char * filename, IOBufferMemoryDescriptor * ioBuffer, { HIBLOG("error 0x%x opening hibernation file\n", err); if (vars->fileRef) + { kern_close_file_for_direct_io(vars->fileRef); + gIOHibernateFileRef = vars->fileRef = NULL; + } } if (part) @@ -743,8 +747,6 @@ IOPolledFileClose( IOPolledFileIOVars * vars ) vars->pollers->release(); } - gIOHibernateFileRef = vars->fileRef; - bzero(vars, sizeof(IOPolledFileIOVars)); return (kIOReturnSuccess); @@ -977,7 +979,7 @@ if (vars->position & (vars->blockSize - 1)) HIBLOG("misaligned file pos %qx\n", vars->buffer + vars->bufferHalf, &cryptvars->ctx.decrypt); } -#endif CRYPTO +#endif /* CRYPTO */ } } while (size); @@ -1005,6 +1007,10 @@ IOHibernateSystemSleep(void) gIOHibernateState = kIOHibernateStateInactive; + gIOHibernateDebugFlags = 0; + if (kIOLogHibernate & gIOKitDebug) + gIOHibernateDebugFlags |= kIOHibernateDebugRestoreLogs; + /* The invocation of IOPMSleepSystemWithOptions() may override * existing hibernation settings. */ @@ -1106,6 +1112,7 @@ IOHibernateSystemSleep(void) } bzero(gIOHibernateCurrentHeader, sizeof(IOHibernateImageHeader)); + gIOHibernateCurrentHeader->debugFlags = gIOHibernateDebugFlags; boolean_t encryptedswap; err = hibernate_setup(gIOHibernateCurrentHeader, @@ -1155,7 +1162,7 @@ IOHibernateSystemSleep(void) } data->release(); -#ifdef __ppc__ +#if defined(__ppc__) size_t len; char valueString[16]; @@ -1179,7 +1186,7 @@ IOHibernateSystemSleep(void) { vars->haveFastBoot = true; - len = sprintf(valueString, "0x%lx", *((UInt32 *)data->getBytesNoCopy())); + len = snprintf(valueString, sizeof(valueString), "0x%lx", *((UInt32 *)data->getBytesNoCopy())); data = OSData::withBytes(valueString, len + 1); sym = OSSymbol::withCStringNoCopy(kIOHibernateMemorySignatureEnvKey); if (sym && data) @@ -1194,7 +1201,7 @@ IOHibernateSystemSleep(void) gIOHibernateCurrentHeader->machineSignature = *((UInt32 *)data->getBytesNoCopy()); } #endif /* __ppc__ */ -#ifdef __i386__ +#if defined(__i386__) || defined(__x86_64__) struct AppleRTCHibernateVars { uint8_t signature[4]; @@ -1258,7 +1265,7 @@ IOHibernateSystemSleep(void) if (data) gIOHibernateCurrentHeader->machineSignature = *((UInt32 *)data->getBytesNoCopy()); } -#else /* !__i386__ */ +#else /* !i386 && !x86_64 */ if (kIOHibernateModeEncrypt & gIOHibernateMode) { data = OSData::withBytes(&vars->wiredCryptKey[0], sizeof(vars->wiredCryptKey)); @@ -1312,7 +1319,7 @@ IOHibernateSystemSleep(void) &newVolume, sizeof(newVolume)); } } -#endif /* !__i386__ */ +#endif /* !i386 && !x86_64 */ } // -- @@ -1467,7 +1474,7 @@ IOHibernateSystemHasSlept(void) IOService::getPMRootDomain()->getProperty(kIOHibernatePreviewActiveKey)))) { UInt32 flags = *((UInt32 *)data->getBytesNoCopy()); - HIBPRINT("kIOHibernatePreviewActiveKey %08lx\n", flags); + HIBPRINT("kIOHibernatePreviewActiveKey %08lx\n", (long)flags); IOService::getPMRootDomain()->removeProperty(kIOHibernatePreviewActiveKey); @@ -1515,7 +1522,7 @@ IOHibernateSystemWake(void) IOUnmapPages(kernel_map, vars->videoMapping, vars->videoMapSize); if (vars->videoAllocSize) // dealloc range - kmem_free(kernel_map, trunc_page_32(vars->videoMapping), vars->videoAllocSize); + kmem_free(kernel_map, trunc_page(vars->videoMapping), vars->videoAllocSize); } if (vars->previewBuffer) @@ -1587,7 +1594,7 @@ IOHibernateSystemWake(void) } #endif -#ifdef __i386__ +#if defined(__i386__) || defined(__x86_64__) IOService::getPMRootDomain()->removeProperty(kIOHibernateRTCVariablesKey); /* @@ -1783,6 +1790,8 @@ hibernate_write_image(void) AbsoluteTime_to_scalar(&decoTime) = 0; clock_get_uptime(&allTime); + IOService::getPMRootDomain()->pmStatsRecordEvent( + kIOPMStatsHibernateImageWrite | kIOPMStatsEventStartFlag, allTime); do { @@ -1813,17 +1822,27 @@ hibernate_write_image(void) break; } + uintptr_t hibernateBase; + uintptr_t hibernateEnd; + +#if defined(__i386__) || defined(__x86_64__) + hibernateBase = sectINITPTB; +#else + hibernateBase = sectHIBB; +#endif + + hibernateEnd = (sectHIBB + sectSizeHIB); // copy out restore1 code - page = atop_32(sectHIBB); - count = atop_32(round_page(sectHIBB + sectSizeHIB)) - page; + page = atop_32(hibernateBase); + count = atop_32(round_page(hibernateEnd)) - page; header->restore1CodePage = page; header->restore1PageCount = count; - header->restore1CodeOffset = ((uint32_t) &hibernate_machine_entrypoint) - sectHIBB; - header->restore1StackOffset = ((uint32_t) &gIOHibernateRestoreStackEnd[0]) - 64 - sectHIBB; + header->restore1CodeOffset = ((uintptr_t) &hibernate_machine_entrypoint) - hibernateBase; + header->restore1StackOffset = ((uintptr_t) &gIOHibernateRestoreStackEnd[0]) - 64 - hibernateBase; // sum __HIB sect, with zeros for the stack - src = (uint8_t *) trunc_page(sectHIBB); + src = (uint8_t *) trunc_page(hibernateBase); for (page = 0; page < count; page++) { if ((src < &gIOHibernateRestoreStack[0]) || (src >= &gIOHibernateRestoreStackEnd[0])) @@ -1836,8 +1855,8 @@ hibernate_write_image(void) // write the __HIB sect, with zeros for the stack - src = (uint8_t *) trunc_page(sectHIBB); - count = ((uint32_t) &gIOHibernateRestoreStack[0]) - trunc_page(sectHIBB); + src = (uint8_t *) trunc_page(hibernateBase); + count = ((uintptr_t) &gIOHibernateRestoreStack[0]) - trunc_page(hibernateBase); if (count) { err = IOPolledFileWrite(vars->fileVars, src, count, cryptvars); @@ -1851,7 +1870,7 @@ hibernate_write_image(void) if (kIOReturnSuccess != err) break; src = &gIOHibernateRestoreStackEnd[0]; - count = round_page(sectHIBB + sectSizeHIB) - ((uint32_t) src); + count = round_page(hibernateEnd) - ((uintptr_t) src); if (count) { err = IOPolledFileWrite(vars->fileVars, src, count, cryptvars); @@ -1870,7 +1889,7 @@ hibernate_write_image(void) count = 0; do { - phys64 = vars->previewBuffer->getPhysicalSegment64(count, &segLen); + phys64 = vars->previewBuffer->getPhysicalSegment(count, &segLen, kIOMemoryMapperNone); pageAndCount[0] = atop_64(phys64); pageAndCount[1] = atop_32(segLen); err = IOPolledFileWrite(vars->fileVars, @@ -1885,7 +1904,7 @@ hibernate_write_image(void) if (kIOReturnSuccess != err) break; - src = (uint8_t *) vars->previewBuffer->getSourceSegment(0, NULL); + src = (uint8_t *) vars->previewBuffer->getPhysicalSegment(0, NULL, _kIOMemorySourceSegment); count = vars->previewBuffer->getLength(); header->previewPageListSize = ppnum; @@ -1902,7 +1921,7 @@ hibernate_write_image(void) // mark areas for no save for (count = 0; - (phys64 = vars->ioBuffer->getPhysicalSegment64(count, &segLen)); + (phys64 = vars->ioBuffer->getPhysicalSegment(count, &segLen, kIOMemoryMapperNone)); count += segLen) { hibernate_set_page_state(vars->page_list, vars->page_list_wired, @@ -1912,7 +1931,7 @@ hibernate_write_image(void) } for (count = 0; - (phys64 = vars->srcBuffer->getPhysicalSegment64(count, &segLen)); + (phys64 = vars->srcBuffer->getPhysicalSegment(count, &segLen, kIOMemoryMapperNone)); count += segLen) { hibernate_set_page_state(vars->page_list, vars->page_list_wired, @@ -1934,15 +1953,15 @@ hibernate_write_image(void) hibernate_page_list_set_volatile(vars->page_list, vars->page_list_wired, &pageCount); - page = atop_32(sectHIBB); - count = atop_32(round_page(sectHIBB + sectSizeHIB)) - page; + page = atop_32(hibernateBase); + count = atop_32(round_page(hibernateEnd)) - page; hibernate_set_page_state(vars->page_list, vars->page_list_wired, page, count, kIOHibernatePageStateFree); pageCount -= count; if (vars->previewBuffer) for (count = 0; - (phys64 = vars->previewBuffer->getPhysicalSegment64(count, &segLen)); + (phys64 = vars->previewBuffer->getPhysicalSegment(count, &segLen, kIOMemoryMapperNone)); count += segLen) { hibernate_set_page_state(vars->page_list, vars->page_list_wired, @@ -1980,7 +1999,7 @@ hibernate_write_image(void) err = IOMemoryDescriptorWriteFromPhysical(vars->srcBuffer, 0, ptoa_64(ppnum), page_size); if (err) { - HIBLOG("IOMemoryDescriptorWriteFromPhysical %d [%d] %x\n", __LINE__, ppnum, err); + HIBLOG("IOMemoryDescriptorWriteFromPhysical %d [%ld] %x\n", __LINE__, (long)ppnum, err); break; } @@ -2126,6 +2145,10 @@ hibernate_write_image(void) while (false); clock_get_uptime(&endTime); + + IOService::getPMRootDomain()->pmStatsRecordEvent( + kIOPMStatsHibernateImageWrite | kIOPMStatsEventStopFlag, endTime); + SUB_ABSOLUTETIME(&endTime, &allTime); absolutetime_to_nanoseconds(endTime, &nsec); HIBLOG("all time: %qd ms, ", @@ -2362,7 +2385,7 @@ hibernate_machine_init(void) err = IOMemoryDescriptorReadToPhysical(vars->srcBuffer, decoOffset, ptoa_64(ppnum), page_size); if (err) { - HIBLOG("IOMemoryDescriptorReadToPhysical [%d] %x\n", ppnum, err); + HIBLOG("IOMemoryDescriptorReadToPhysical [%ld] %x\n", (long)ppnum, err); break; } @@ -2412,6 +2435,12 @@ hibernate_machine_init(void) (uint8_t *) vars->videoMapping, 0, kIOHibernateProgressCount); clock_get_uptime(&endTime); + + IOService::getPMRootDomain()->pmStatsRecordEvent( + kIOPMStatsHibernateImageRead | kIOPMStatsEventStartFlag, allTime); + IOService::getPMRootDomain()->pmStatsRecordEvent( + kIOPMStatsHibernateImageRead | kIOPMStatsEventStopFlag, endTime); + SUB_ABSOLUTETIME(&endTime, &allTime); absolutetime_to_nanoseconds(endTime, &nsec); diff --git a/iokit/Kernel/IOHibernateInternal.h b/iokit/Kernel/IOHibernateInternal.h index 1e0adbc5c..03e422bae 100644 --- a/iokit/Kernel/IOHibernateInternal.h +++ b/iokit/Kernel/IOHibernateInternal.h @@ -94,9 +94,12 @@ uint32_t hibernate_sum(uint8_t *buf, int32_t len); extern vm_offset_t sectHIBB; -extern int sectSizeHIB; +extern unsigned long sectSizeHIB; extern vm_offset_t sectDATAB; -extern int sectSizeDATA; +extern unsigned long sectSizeDATA; +#if defined(__i386__) || defined(__x86_64__) +extern vm_offset_t sectINITPTB; +#endif extern vm_offset_t gIOHibernateWakeMap; // ppnum extern vm_size_t gIOHibernateWakeMapSize; diff --git a/iokit/Kernel/IOHibernateRestoreKernel.c b/iokit/Kernel/IOHibernateRestoreKernel.c index 405d64646..ea2180933 100644 --- a/iokit/Kernel/IOHibernateRestoreKernel.c +++ b/iokit/Kernel/IOHibernateRestoreKernel.c @@ -47,6 +47,8 @@ it calls or references needs to be careful to only touch memory also in the "__H uint32_t gIOHibernateState; +uint32_t gIOHibernateDebugFlags; + static IOHibernateImageHeader _hibernateHeader; IOHibernateImageHeader * gIOHibernateCurrentHeader = &_hibernateHeader; @@ -59,10 +61,156 @@ hibernate_cryptwakevars_t * gIOHibernateCryptWakeVars = &_cryptWakeVars; vm_offset_t gIOHibernateWakeMap; // ppnum vm_size_t gIOHibernateWakeMapSize; -#if __i386__ -extern void acpi_wake_prot_entry(void); + +#if CONFIG_SLEEP +#if defined(__i386__) || defined(__x86_64__) +extern void acpi_wake_prot_entry(void); +#endif +#endif + +/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ + +#if defined(__i386__) || defined(__x86_64__) + +#define DBGLOG 1 + +#include + +/* standard port addresses */ +enum { + COM1_PORT_ADDR = 0x3f8, + COM2_PORT_ADDR = 0x2f8 +}; + +/* UART register offsets */ +enum { + UART_RBR = 0, /* receive buffer Register (R) */ + UART_THR = 0, /* transmit holding register (W) */ + UART_DLL = 0, /* DLAB = 1, divisor latch (LSB) */ + UART_IER = 1, /* interrupt enable register */ + UART_DLM = 1, /* DLAB = 1, divisor latch (MSB) */ + UART_IIR = 2, /* interrupt ident register (R) */ + UART_FCR = 2, /* fifo control register (W) */ + UART_LCR = 3, /* line control register */ + UART_MCR = 4, /* modem control register */ + UART_LSR = 5, /* line status register */ + UART_MSR = 6, /* modem status register */ + UART_SCR = 7 /* scratch register */ +}; + +enum { + UART_LCR_8BITS = 0x03, + UART_LCR_DLAB = 0x80 +}; + +enum { + UART_MCR_DTR = 0x01, + UART_MCR_RTS = 0x02, + UART_MCR_OUT1 = 0x04, + UART_MCR_OUT2 = 0x08, + UART_MCR_LOOP = 0x10 +}; + +enum { + UART_LSR_DR = 0x01, + UART_LSR_OE = 0x02, + UART_LSR_PE = 0x04, + UART_LSR_FE = 0x08, + UART_LSR_THRE = 0x20 +}; + +static void uart_putc(char c) +{ + while (!(inb(COM1_PORT_ADDR + UART_LSR) & UART_LSR_THRE)) + {} + outb(COM1_PORT_ADDR + UART_THR, c); +} + +static int debug_probe( void ) +{ + /* Verify that the Scratch Register is accessible */ + outb(COM1_PORT_ADDR + UART_SCR, 0x5a); + if (inb(COM1_PORT_ADDR + UART_SCR) != 0x5a) return false; + outb(COM1_PORT_ADDR + UART_SCR, 0xa5); + if (inb(COM1_PORT_ADDR + UART_SCR) != 0xa5) return false; + uart_putc('\n'); + return true; +} + +static void uart_puthex(uint64_t num) +{ + int bit; + char c; + bool leading = true; + + for (bit = 60; bit >= 0; bit -= 4) + { + c = 0xf & (num >> bit); + if (c) + leading = false; + else if (leading) + continue; + if (c <= 9) + c += '0'; + else + c+= 'a' - 10; + uart_putc(c); + } +} + +static void debug_code(uint32_t code, uint64_t value) +{ + int bit; + char c; + + if (!(kIOHibernateDebugRestoreLogs & gIOHibernateDebugFlags)) + return; + + for (bit = 24; bit >= 0; bit -= 8) + { + c = 0xFF & (code >> bit); + if (c) + uart_putc(c); + } + uart_putc('='); + uart_puthex(value); + uart_putc('\n'); + uart_putc('\r'); +} + +#endif /* defined(__i386__) || defined(__x86_64__) */ + +#if !defined(DBGLOG) +#define debug_probe() (false) +#define debug_code(c, v) {} #endif +enum +{ + kIOHibernateRestoreCodeImageStart = 'imgS', + kIOHibernateRestoreCodeImageEnd = 'imgE', + kIOHibernateRestoreCodePageIndexStart = 'pgiS', + kIOHibernateRestoreCodePageIndexEnd = 'pgiE', + kIOHibernateRestoreCodeMapStart = 'mapS', + kIOHibernateRestoreCodeMapEnd = 'mapE', + kIOHibernateRestoreCodeWakeMapSize = 'wkms', + kIOHibernateRestoreCodeConflictPage = 'cfpg', + kIOHibernateRestoreCodeConflictSource = 'cfsr', + kIOHibernateRestoreCodeNoMemory = 'nomm' +}; + +/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ + + +static void fatal(void) +{ +#if defined(__i386__) || defined(__x86_64__) + outb(0xcf9, 6); +#else + while (true) {} +#endif +} + /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ #define BASE 65521L /* largest prime smaller than 65536 */ @@ -102,11 +250,6 @@ hibernate_sum(uint8_t *buf, int32_t len) /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ -static __inline__ unsigned int cntlzw(unsigned int num) -{ - return clz(num); -} - static hibernate_bitmap_t * hibernate_page_bitmap(hibernate_page_list_t * list, uint32_t page) { @@ -237,6 +380,13 @@ hibernate_page_list_grab(hibernate_page_list_t * list, uint32_t * pNextFree) } } + if (!bitmap) + { + debug_code(kIOHibernateRestoreCodeNoMemory, nextFree); + fatal(); + nextFree = 0; + } + return (nextFree); } @@ -249,7 +399,7 @@ store_one_page(uint32_t procFlags, uint32_t * src, uint32_t compressedSize, dst = ptoa_64(ppnum); if (ppnum < 0x00100000) - buffer = (uint32_t *) (uint32_t) dst; + buffer = (uint32_t *) (uintptr_t) dst; if (compressedSize != PAGE_SIZE) { @@ -259,14 +409,15 @@ store_one_page(uint32_t procFlags, uint32_t * src, uint32_t compressedSize, sum = hibernate_sum((uint8_t *) src, PAGE_SIZE); - if (((uint64_t) (uint32_t) src) == dst) + if (((uint64_t) (uintptr_t) src) == dst) src = 0; - hibernate_restore_phys_page((uint64_t) (uint32_t) src, dst, PAGE_SIZE, procFlags); + hibernate_restore_phys_page((uint64_t) (uintptr_t) src, dst, PAGE_SIZE, procFlags); return (sum); } +// used only for small struct copies static void bcopy_internal(const void *src, void *dst, uint32_t len) { @@ -287,7 +438,6 @@ long hibernate_kernel_entrypoint(IOHibernateImageHeader * header, void * p2, void * p3, void * p4) { - typedef void (*ResetProc)(void); uint32_t idx; uint32_t * src; uint32_t * buffer; @@ -307,10 +457,14 @@ hibernate_kernel_entrypoint(IOHibernateImageHeader * header, uint32_t lastImagePage; uint32_t lastMapPage; uint32_t lastPageIndexPage; - ResetProc proc; C_ASSERT(sizeof(IOHibernateImageHeader) == 512); + if ((kIOHibernateDebugRestoreLogs & gIOHibernateDebugFlags) && !debug_probe()) + gIOHibernateDebugFlags &= ~kIOHibernateDebugRestoreLogs; + + debug_code(kIOHibernateRestoreCodeImageStart, (uintptr_t) header); + bcopy_internal(header, gIOHibernateCurrentHeader, sizeof(IOHibernateImageHeader)); @@ -340,51 +494,60 @@ hibernate_kernel_entrypoint(IOHibernateImageHeader * header, sizeof(hibernate_cryptvars_t)); src = (uint32_t *) - (((uint32_t) &header->fileExtentMap[0]) + (((uintptr_t) &header->fileExtentMap[0]) + header->fileExtentMapSize + ptoa_32(header->restore1PageCount)); if (header->previewSize) { pageIndexSource = src; - map = (hibernate_page_list_t *)(((uint32_t) pageIndexSource) + header->previewSize); - src = (uint32_t *) (((uint32_t) pageIndexSource) + header->previewPageListSize); + map = (hibernate_page_list_t *)(((uintptr_t) pageIndexSource) + header->previewSize); + src = (uint32_t *) (((uintptr_t) pageIndexSource) + header->previewPageListSize); } else { pageIndexSource = 0; map = (hibernate_page_list_t *) src; - src = (uint32_t *) (((uint32_t) map) + header->bitmapSize); + src = (uint32_t *) (((uintptr_t) map) + header->bitmapSize); } - lastPageIndexPage = atop_32(src); + lastPageIndexPage = atop_32((uintptr_t) src); + + lastImagePage = atop_32(((uintptr_t) header) + header->image1Size); - lastImagePage = atop_32(((uint32_t) header) + header->image1Size); + lastMapPage = atop_32(((uintptr_t) map) + header->bitmapSize); - lastMapPage = atop_32(((uint32_t) map) + header->bitmapSize); + debug_code(kIOHibernateRestoreCodeImageEnd, ptoa_64(lastImagePage)); + debug_code(kIOHibernateRestoreCodePageIndexStart, (uintptr_t) pageIndexSource); + debug_code(kIOHibernateRestoreCodePageIndexEnd, ptoa_64(lastPageIndexPage)); + debug_code(kIOHibernateRestoreCodeMapStart, (uintptr_t) map); + debug_code(kIOHibernateRestoreCodeMapEnd, ptoa_64(lastMapPage)); // knock all the image pages to be used out of free map - for (ppnum = atop_32(header); ppnum <= lastImagePage; ppnum++) + for (ppnum = atop_32((uintptr_t) header); ppnum <= lastImagePage; ppnum++) { hibernate_page_bitset(map, FALSE, ppnum); } nextFree = 0; hibernate_page_list_grab(map, &nextFree); - buffer = (uint32_t *) ptoa_32(hibernate_page_list_grab(map, &nextFree)); + buffer = (uint32_t *) (uintptr_t) ptoa_32(hibernate_page_list_grab(map, &nextFree)); if (header->memoryMapSize && (count = header->memoryMapOffset)) { p4 = (void *)(((uintptr_t) header) - count); gIOHibernateWakeMap = hibernate_page_list_grab(map, &nextFree); gIOHibernateWakeMapSize = header->memoryMapSize; - bcopy_internal(p4, (void *) ptoa_32(gIOHibernateWakeMap), gIOHibernateWakeMapSize); + debug_code(kIOHibernateRestoreCodeWakeMapSize, gIOHibernateWakeMapSize); + if (gIOHibernateWakeMapSize > PAGE_SIZE) + fatal(); + bcopy_internal(p4, (void *) (uintptr_t) ptoa_32(gIOHibernateWakeMap), gIOHibernateWakeMapSize); } else gIOHibernateWakeMapSize = 0; sum = gIOHibernateCurrentHeader->actualRestore1Sum; - gIOHibernateCurrentHeader->diag[0] = (uint32_t) header; + gIOHibernateCurrentHeader->diag[0] = (uint32_t)(uintptr_t) header; gIOHibernateCurrentHeader->diag[1] = sum; uncompressedPages = 0; @@ -405,7 +568,7 @@ hibernate_kernel_entrypoint(IOHibernateImageHeader * header, if (!count) { pageIndexSource = 0; - src = (uint32_t *) (((uint32_t) map) + gIOHibernateCurrentHeader->bitmapSize); + src = (uint32_t *) (((uintptr_t) map) + gIOHibernateCurrentHeader->bitmapSize); ppnum = src[0]; count = src[1]; src += 2; @@ -431,13 +594,11 @@ hibernate_kernel_entrypoint(IOHibernateImageHeader * header, compressedSize = kIOHibernateTagLength & tag; } -// SINT(ppnum); - - conflicts = (((ppnum >= atop_32(map)) && (ppnum <= lastMapPage)) - || ((ppnum >= atop_32(src)) && (ppnum <= lastImagePage))); + conflicts = (((ppnum >= atop_32((uintptr_t) map)) && (ppnum <= lastMapPage)) + || ((ppnum >= atop_32((uintptr_t) src)) && (ppnum <= lastImagePage))); if (pageIndexSource) - conflicts |= ((ppnum >= atop_32(pageIndexSource)) && (ppnum <= lastPageIndexPage)); + conflicts |= ((ppnum >= atop_32((uintptr_t) pageIndexSource)) && (ppnum <= lastPageIndexPage)); if (!conflicts) { @@ -451,6 +612,9 @@ hibernate_kernel_entrypoint(IOHibernateImageHeader * header, uint32_t bufferPage; uint32_t * dst; +// debug_code(kIOHibernateRestoreCodeConflictPage, ppnum); +// debug_code(kIOHibernateRestoreCodeConflictSource, (uintptr_t) src); + conflictCount++; // alloc new buffer page @@ -465,7 +629,7 @@ hibernate_kernel_entrypoint(IOHibernateImageHeader * header, copyPageList[1] = pageListPage; else copyPageListHead = pageListPage; - copyPageList = (uint32_t *) ptoa_32(pageListPage); + copyPageList = (uint32_t *) (uintptr_t) ptoa_32(pageListPage); copyPageList[1] = 0; copyPageIndex = 2; } @@ -475,7 +639,7 @@ hibernate_kernel_entrypoint(IOHibernateImageHeader * header, copyPageList[copyPageIndex++] = compressedSize; copyPageList[0] = copyPageIndex; - dst = (uint32_t *) ptoa_32(bufferPage); + dst = (uint32_t *) (uintptr_t) ptoa_32(bufferPage); for (idx = 0; idx < ((compressedSize + 3) >> 2); idx++) dst[idx] = src[idx]; } @@ -485,20 +649,20 @@ hibernate_kernel_entrypoint(IOHibernateImageHeader * header, // -- copy back conflicts - copyPageList = (uint32_t *) ptoa_32(copyPageListHead); + copyPageList = (uint32_t *)(uintptr_t) ptoa_32(copyPageListHead); while (copyPageList) { for (copyPageIndex = 2; copyPageIndex < copyPageList[0]; copyPageIndex += 3) { ppnum = copyPageList[copyPageIndex + 0]; - src = (uint32_t *) ptoa_32(copyPageList[copyPageIndex + 1]); + src = (uint32_t *) (uintptr_t) ptoa_32(copyPageList[copyPageIndex + 1]); compressedSize = copyPageList[copyPageIndex + 2]; sum += store_one_page(gIOHibernateCurrentHeader->processorFlags, src, compressedSize, buffer, ppnum); uncompressedPages++; } - copyPageList = (uint32_t *) ptoa_32(copyPageList[1]); + copyPageList = (uint32_t *) (uintptr_t) ptoa_32(copyPageList[1]); } // -- image has been destroyed... @@ -510,16 +674,24 @@ hibernate_kernel_entrypoint(IOHibernateImageHeader * header, gIOHibernateState = kIOHibernateStateWakingFromHibernate; -#if __ppc__ +#if CONFIG_SLEEP +#if defined(__ppc__) + typedef void (*ResetProc)(void); + ResetProc proc; proc = (ResetProc) 0x100; __asm__ volatile("ori 0, 0, 0" : : ); proc(); -#elif __i386__ +#elif defined(__i386__) || defined(__x86_64__) + typedef void (*ResetProc)(void); + ResetProc proc; proc = (ResetProc) acpi_wake_prot_entry; // flush caches __asm__("wbinvd"); proc(); +#else +// implement me +#endif #endif - + return -1; } diff --git a/iokit/Kernel/IOInterleavedMemoryDescriptor.cpp b/iokit/Kernel/IOInterleavedMemoryDescriptor.cpp index 0d7713a81..50dbcf2fd 100644 --- a/iokit/Kernel/IOInterleavedMemoryDescriptor.cpp +++ b/iokit/Kernel/IOInterleavedMemoryDescriptor.cpp @@ -32,65 +32,8 @@ #define super IOMemoryDescriptor OSDefineMetaClassAndStructors(IOInterleavedMemoryDescriptor, IOMemoryDescriptor) -// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -bool IOInterleavedMemoryDescriptor::initWithAddress( - void * /* address */ , - IOByteCount /* withLength */ , - IODirection /* withDirection */ ) -{ - return false; -} - -// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -bool IOInterleavedMemoryDescriptor::initWithAddress( - vm_address_t /* address */ , - IOByteCount /* withLength */ , - IODirection /* withDirection */ , - task_t /* withTask */ ) -{ - return false; -} - -// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -bool IOInterleavedMemoryDescriptor::initWithPhysicalAddress( - IOPhysicalAddress /* address */ , - IOByteCount /* withLength */ , - IODirection /* withDirection */ ) -{ - return false; -} - - -// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -bool IOInterleavedMemoryDescriptor::initWithPhysicalRanges( - IOPhysicalRange * /* ranges */ , - UInt32 /* withCount */ , - IODirection /* withDirection */ , - bool /* asReference */ ) -{ - return false; -} - -// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -bool IOInterleavedMemoryDescriptor::initWithRanges( - IOVirtualRange * /* ranges */ , - UInt32 /* withCount */ , - IODirection /* withDirection */ , - task_t /* withTask */ , - bool /* asReference */ ) -{ - return false; -} - -// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - IOInterleavedMemoryDescriptor * IOInterleavedMemoryDescriptor::withCapacity( - UInt32 capacity, + IOByteCount capacity, IODirection direction ) { // @@ -112,10 +55,8 @@ IOInterleavedMemoryDescriptor * IOInterleavedMemoryDescriptor::withCapacity( return me; } -// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - bool IOInterleavedMemoryDescriptor::initWithCapacity( - UInt32 capacity, + IOByteCount capacity, IODirection direction ) { // @@ -131,7 +72,10 @@ bool IOInterleavedMemoryDescriptor::initWithCapacity( // Initialize our minimal state. - _direction = direction; + _flags = direction; +#ifndef __LP64__ + _direction = (IODirection) (_flags & kIOMemoryDirectionMask); +#endif /* !__LP64__ */ _length = 0; _mappings = 0; _tag = 0; @@ -148,8 +92,6 @@ bool IOInterleavedMemoryDescriptor::initWithCapacity( return true; } -// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - void IOInterleavedMemoryDescriptor::clearMemoryDescriptors( IODirection direction ) { UInt32 index; @@ -157,7 +99,7 @@ void IOInterleavedMemoryDescriptor::clearMemoryDescriptors( IODirection directio for ( index = 0; index < _descriptorCount; index++ ) { if ( _descriptorPrepared ) - _descriptors[index]->complete(_direction); + _descriptors[index]->complete(getDirection()); _descriptors[index]->release(); _descriptors[index] = 0; @@ -167,7 +109,12 @@ void IOInterleavedMemoryDescriptor::clearMemoryDescriptors( IODirection directio } if ( direction != kIODirectionNone ) - _direction = direction; + { + _flags = (_flags & ~kIOMemoryDirectionMask) | direction; +#ifndef __LP64__ + _direction = (IODirection) (_flags & kIOMemoryDirectionMask); +#endif /* !__LP64__ */ + } _descriptorCount = 0; _length = 0; @@ -176,8 +123,6 @@ void IOInterleavedMemoryDescriptor::clearMemoryDescriptors( IODirection directio }; -// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - bool IOInterleavedMemoryDescriptor::setMemoryDescriptor( IOMemoryDescriptor * descriptor, IOByteCount offset, @@ -189,7 +134,7 @@ bool IOInterleavedMemoryDescriptor::setMemoryDescriptor( if ( (offset + length) > descriptor->getLength() ) return false; -// if ( descriptor->getDirection() != _direction ) +// if ( descriptor->getDirection() != getDirection() ) // return false; descriptor->retain(); @@ -204,8 +149,6 @@ bool IOInterleavedMemoryDescriptor::setMemoryDescriptor( return true; } -// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - void IOInterleavedMemoryDescriptor::free() { // @@ -230,8 +173,6 @@ void IOInterleavedMemoryDescriptor::free() super::free(); } -// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - IOReturn IOInterleavedMemoryDescriptor::prepare(IODirection forDirection) { // @@ -248,7 +189,7 @@ IOReturn IOInterleavedMemoryDescriptor::prepare(IODirection forDirection) if ( forDirection == kIODirectionNone ) { - forDirection = _direction; + forDirection = getDirection(); } for ( index = 0; index < _descriptorCount; index++ ) @@ -271,8 +212,6 @@ IOReturn IOInterleavedMemoryDescriptor::prepare(IODirection forDirection) return status; } -// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - IOReturn IOInterleavedMemoryDescriptor::complete(IODirection forDirection) { // @@ -288,7 +227,7 @@ IOReturn IOInterleavedMemoryDescriptor::complete(IODirection forDirection) if ( forDirection == kIODirectionNone ) { - forDirection = _direction; + forDirection = getDirection(); } for ( unsigned index = 0; index < _descriptorCount; index++ ) @@ -303,73 +242,10 @@ IOReturn IOInterleavedMemoryDescriptor::complete(IODirection forDirection) return statusFinal; } -// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -addr64_t IOInterleavedMemoryDescriptor::getPhysicalSegment64( - IOByteCount offset, IOByteCount * length ) -{ - // - // This method returns the physical address of the byte at the given offset - // into the memory, and optionally the length of the physically contiguous - // segment from that offset. - // - - addr64_t pa; - - assert(offset <= _length); - - for ( unsigned index = 0; index < _descriptorCount; index++ ) - { - if ( offset < _descriptorLengths[index] ) - { - pa = _descriptors[index]->getPhysicalSegment64(_descriptorOffsets[index] + offset, length); - if ((_descriptorLengths[index] - offset) < *length) *length = _descriptorLengths[index] - offset; - return pa; - } - offset -= _descriptorLengths[index]; - } - - if ( length ) *length = 0; - - return 0; -} - -// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -IOPhysicalAddress IOInterleavedMemoryDescriptor::getPhysicalSegment( - IOByteCount offset, IOByteCount * length ) -{ - // - // This method returns the physical address of the byte at the given offset - // into the memory, and optionally the length of the physically contiguous - // segment from that offset. - // - - IOPhysicalAddress pa; - - assert(offset <= _length); - - for ( unsigned index = 0; index < _descriptorCount; index++ ) - { - if ( offset < _descriptorLengths[index] ) - { - pa = _descriptors[index]->getPhysicalSegment(_descriptorOffsets[index] + offset, length); - if ((_descriptorLengths[index] - offset) < *length) *length = _descriptorLengths[index] - offset; - return pa; - } - offset -= _descriptorLengths[index]; - } - - if ( length ) *length = 0; - - return 0; -} - -// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -IOPhysicalAddress IOInterleavedMemoryDescriptor::getSourceSegment( +addr64_t IOInterleavedMemoryDescriptor::getPhysicalSegment( IOByteCount offset, - IOByteCount * length ) + IOByteCount * length, + IOOptionBits options ) { // // This method returns the physical address of the byte at the given offset @@ -377,7 +253,7 @@ IOPhysicalAddress IOInterleavedMemoryDescriptor::getSourceSegment( // segment from that offset. // - IOPhysicalAddress pa; + addr64_t pa; assert(offset <= _length); @@ -385,7 +261,7 @@ IOPhysicalAddress IOInterleavedMemoryDescriptor::getSourceSegment( { if ( offset < _descriptorLengths[index] ) { - pa = _descriptors[index]->getSourceSegment(_descriptorOffsets[index] + offset, length); + pa = _descriptors[index]->getPhysicalSegment(_descriptorOffsets[index] + offset, length, options); if ((_descriptorLengths[index] - offset) < *length) *length = _descriptorLengths[index] - offset; return pa; } @@ -396,11 +272,3 @@ IOPhysicalAddress IOInterleavedMemoryDescriptor::getSourceSegment( return 0; } - -// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -void * IOInterleavedMemoryDescriptor::getVirtualSegment( IOByteCount /* offset */ , - IOByteCount * /* length */ ) -{ - return 0; -} diff --git a/iokit/Kernel/IOInterruptController.cpp b/iokit/Kernel/IOInterruptController.cpp index b1ce6b51d..beedc1a8b 100644 --- a/iokit/Kernel/IOInterruptController.cpp +++ b/iokit/Kernel/IOInterruptController.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 1998-2000 Apple Computer, Inc. All rights reserved. + * Copyright (c) 1998-2008 Apple Computer, Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -44,7 +44,6 @@ #include #include - /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ #define super IOService @@ -66,19 +65,21 @@ IOReturn IOInterruptController::registerInterrupt(IOService *nub, int source, void *refCon) { IOInterruptSource *interruptSources; - long vectorNumber; + IOInterruptVectorNumber vectorNumber; IOInterruptVector *vector; - long wasDisabledSoft; + int wasDisabledSoft; IOReturn error; OSData *vectorData; - IOService *originalNub; - int originalSource; IOOptionBits options; bool canBeShared, shouldBeShared, wasAlreadyRegisterd; - + + IOService *originalNub = NULL; // Protected by wasAlreadyRegisterd + int originalSource = 0; // Protected by wasAlreadyRegisterd + + interruptSources = nub->_interruptSources; vectorData = interruptSources[source].vectorData; - vectorNumber = *(long *)vectorData->getBytesNoCopy(); + vectorNumber = *(IOInterruptVectorNumber *)vectorData->getBytesNoCopy(); vector = &vectors[vectorNumber]; // Get the lock for this vector. @@ -192,6 +193,7 @@ IOReturn IOInterruptController::registerInterrupt(IOService *nub, int source, vector->interruptRegistered = 1; // Enable the original consumer's interrupt if needed. + // originalNub is protected by wasAlreadyRegisterd here (see line 184). if (!wasDisabledSoft) originalNub->enableInterrupt(originalSource); } @@ -223,13 +225,13 @@ IOReturn IOInterruptController::registerInterrupt(IOService *nub, int source, IOReturn IOInterruptController::unregisterInterrupt(IOService *nub, int source) { IOInterruptSource *interruptSources; - long vectorNumber; + IOInterruptVectorNumber vectorNumber; IOInterruptVector *vector; OSData *vectorData; interruptSources = nub->_interruptSources; vectorData = interruptSources[source].vectorData; - vectorNumber = *(long *)vectorData->getBytesNoCopy(); + vectorNumber = *(IOInterruptVectorNumber *)vectorData->getBytesNoCopy(); vector = &vectors[vectorNumber]; // Get the lock for this vector. @@ -266,7 +268,7 @@ IOReturn IOInterruptController::getInterruptType(IOService *nub, int source, int *interruptType) { IOInterruptSource *interruptSources; - long vectorNumber; + IOInterruptVectorNumber vectorNumber; IOInterruptVector *vector; OSData *vectorData; @@ -274,7 +276,7 @@ IOReturn IOInterruptController::getInterruptType(IOService *nub, int source, interruptSources = nub->_interruptSources; vectorData = interruptSources[source].vectorData; - vectorNumber = *(long *)vectorData->getBytesNoCopy(); + vectorNumber = *(IOInterruptVectorNumber *)vectorData->getBytesNoCopy(); vector = &vectors[vectorNumber]; *interruptType = getVectorType(vectorNumber, vector); @@ -285,13 +287,13 @@ IOReturn IOInterruptController::getInterruptType(IOService *nub, int source, IOReturn IOInterruptController::enableInterrupt(IOService *nub, int source) { IOInterruptSource *interruptSources; - long vectorNumber; + IOInterruptVectorNumber vectorNumber; IOInterruptVector *vector; OSData *vectorData; interruptSources = nub->_interruptSources; vectorData = interruptSources[source].vectorData; - vectorNumber = *(long *)vectorData->getBytesNoCopy(); + vectorNumber = *(IOInterruptVectorNumber *)vectorData->getBytesNoCopy(); vector = &vectors[vectorNumber]; if (vector->interruptDisabledSoft) { @@ -302,7 +304,8 @@ IOReturn IOInterruptController::enableInterrupt(IOService *nub, int source) #endif if (!getPlatform()->atInterruptLevel()) { - while (vector->interruptActive); + while (vector->interruptActive) + {} #if __ppc__ isync(); #endif @@ -320,13 +323,13 @@ IOReturn IOInterruptController::enableInterrupt(IOService *nub, int source) IOReturn IOInterruptController::disableInterrupt(IOService *nub, int source) { IOInterruptSource *interruptSources; - long vectorNumber; + IOInterruptVectorNumber vectorNumber; IOInterruptVector *vector; OSData *vectorData; interruptSources = nub->_interruptSources; vectorData = interruptSources[source].vectorData; - vectorNumber = *(long *)vectorData->getBytesNoCopy(); + vectorNumber = *(IOInterruptVectorNumber *)vectorData->getBytesNoCopy(); vector = &vectors[vectorNumber]; vector->interruptDisabledSoft = 1; @@ -336,7 +339,8 @@ IOReturn IOInterruptController::disableInterrupt(IOService *nub, int source) #endif if (!getPlatform()->atInterruptLevel()) { - while (vector->interruptActive); + while (vector->interruptActive) + {} #if __ppc__ isync(); #endif @@ -348,13 +352,13 @@ IOReturn IOInterruptController::disableInterrupt(IOService *nub, int source) IOReturn IOInterruptController::causeInterrupt(IOService *nub, int source) { IOInterruptSource *interruptSources; - long vectorNumber; + IOInterruptVectorNumber vectorNumber; IOInterruptVector *vector; OSData *vectorData; interruptSources = nub->_interruptSources; vectorData = interruptSources[source].vectorData; - vectorNumber = *(long *)vectorData->getBytesNoCopy(); + vectorNumber = *(IOInterruptVectorNumber *)vectorData->getBytesNoCopy(); vector = &vectors[vectorNumber]; causeVector(vectorNumber, vector); @@ -376,34 +380,34 @@ IOReturn IOInterruptController::handleInterrupt(void *refCon, IOService *nub, // Methods to be overridden for simplifed interrupt controller subclasses. -bool IOInterruptController::vectorCanBeShared(long /*vectorNumber*/, +bool IOInterruptController::vectorCanBeShared(IOInterruptVectorNumber /*vectorNumber*/, IOInterruptVector */*vector*/) { return false; } -void IOInterruptController::initVector(long /*vectorNumber*/, +void IOInterruptController::initVector(IOInterruptVectorNumber /*vectorNumber*/, IOInterruptVector */*vector*/) { } -int IOInterruptController::getVectorType(long /*vectorNumber*/, +int IOInterruptController::getVectorType(IOInterruptVectorNumber /*vectorNumber*/, IOInterruptVector */*vector*/) { return kIOInterruptTypeEdge; } -void IOInterruptController::disableVectorHard(long /*vectorNumber*/, +void IOInterruptController::disableVectorHard(IOInterruptVectorNumber /*vectorNumber*/, IOInterruptVector */*vector*/) { } -void IOInterruptController::enableVector(long /*vectorNumber*/, +void IOInterruptController::enableVector(IOInterruptVectorNumber /*vectorNumber*/, IOInterruptVector */*vector*/) { } -void IOInterruptController::causeVector(long /*vectorNumber*/, +void IOInterruptController::causeVector(IOInterruptVectorNumber /*vectorNumber*/, IOInterruptVector */*vector*/) { } @@ -494,7 +498,7 @@ IOReturn IOSharedInterruptController::registerInterrupt(IOService *nub, void *refCon) { IOInterruptSource *interruptSources; - long vectorNumber; + IOInterruptVectorNumber vectorNumber; IOInterruptVector *vector = 0; OSData *vectorData; IOInterruptState interruptState; @@ -558,52 +562,51 @@ IOReturn IOSharedInterruptController::registerInterrupt(IOService *nub, IOReturn IOSharedInterruptController::unregisterInterrupt(IOService *nub, int source) { - IOInterruptSource *interruptSources; - long vectorNumber; + IOInterruptVectorNumber vectorNumber; IOInterruptVector *vector; - OSData *vectorData; IOInterruptState interruptState; - - interruptSources = nub->_interruptSources; - vectorData = interruptSources[source].vectorData; - vectorNumber = *(long *)vectorData->getBytesNoCopy(); - vector = &vectors[vectorNumber]; - - // Get the lock for this vector. - IOTakeLock(vector->interruptLock); - - // Return success if it is not already registered - if (!vector->interruptRegistered) { + + for (vectorNumber = 0; vectorNumber < kIOSharedInterruptControllerDefaultVectors; vectorNumber++) { + vector = &vectors[vectorNumber]; + + // Get the lock for this vector. + IOTakeLock(vector->interruptLock); + + // Return success if it is not already registered + if (!vector->interruptRegistered + || (vector->nub != nub) || (vector->source != source)) { + IOUnlock(vector->interruptLock); + continue; + } + + // Soft disable the source and the controller too. + disableInterrupt(nub, source); + + // Clear all the storage for the vector except for interruptLock. + vector->interruptActive = 0; + vector->interruptDisabledSoft = 0; + vector->interruptDisabledHard = 0; + vector->interruptRegistered = 0; + vector->nub = 0; + vector->source = 0; + vector->handler = 0; + vector->target = 0; + vector->refCon = 0; + + interruptState = IOSimpleLockLockDisableInterrupt(controllerLock); + vectorsRegistered--; + IOSimpleLockUnlockEnableInterrupt(controllerLock, interruptState); + + // Move along to the next one. IOUnlock(vector->interruptLock); - return kIOReturnSuccess; } - - // Soft disable the source and the controller too. - disableInterrupt(nub, source); - - // Clear all the storage for the vector except for interruptLock. - vector->interruptActive = 0; - vector->interruptDisabledSoft = 0; - vector->interruptDisabledHard = 0; - vector->interruptRegistered = 0; - vector->nub = 0; - vector->source = 0; - vector->handler = 0; - vector->target = 0; - vector->refCon = 0; - - interruptState = IOSimpleLockLockDisableInterrupt(controllerLock); - vectorsRegistered--; - IOSimpleLockUnlockEnableInterrupt(controllerLock, interruptState); - - IOUnlock(vector->interruptLock); - + // Re-enable the controller if all vectors are enabled. if (vectorsEnabled == vectorsRegistered) { controllerDisabled = 0; provider->enableInterrupt(0); } - + return kIOReturnSuccess; } @@ -618,14 +621,14 @@ IOReturn IOSharedInterruptController::enableInterrupt(IOService *nub, int source) { IOInterruptSource *interruptSources; - long vectorNumber; + IOInterruptVectorNumber vectorNumber; IOInterruptVector *vector; OSData *vectorData; IOInterruptState interruptState; interruptSources = nub->_interruptSources; vectorData = interruptSources[source].vectorData; - vectorNumber = *(long *)vectorData->getBytesNoCopy(); + vectorNumber = *(IOInterruptVectorNumber *)vectorData->getBytesNoCopy(); vector = &vectors[vectorNumber]; interruptState = IOSimpleLockLockDisableInterrupt(controllerLock); @@ -650,14 +653,14 @@ IOReturn IOSharedInterruptController::disableInterrupt(IOService *nub, int source) { IOInterruptSource *interruptSources; - long vectorNumber; + IOInterruptVectorNumber vectorNumber; IOInterruptVector *vector; OSData *vectorData; IOInterruptState interruptState; interruptSources = nub->_interruptSources; vectorData = interruptSources[source].vectorData; - vectorNumber = *(long *)vectorData->getBytesNoCopy(); + vectorNumber = *(IOInterruptVectorNumber *)vectorData->getBytesNoCopy(); vector = &vectors[vectorNumber]; interruptState = IOSimpleLockLockDisableInterrupt(controllerLock); @@ -672,7 +675,8 @@ IOReturn IOSharedInterruptController::disableInterrupt(IOService *nub, IOSimpleLockUnlockEnableInterrupt(controllerLock, interruptState); if (!getPlatform()->atInterruptLevel()) { - while (vector->interruptActive); + while (vector->interruptActive) + {} #if __ppc__ isync(); #endif @@ -691,7 +695,7 @@ IOReturn IOSharedInterruptController::handleInterrupt(void * /*refCon*/, IOService * nub, int /*source*/) { - long vectorNumber; + IOInterruptVectorNumber vectorNumber; IOInterruptVector *vector; for (vectorNumber = 0; vectorNumber < numVectors; vectorNumber++) { diff --git a/iokit/Kernel/IOInterruptEventSource.cpp b/iokit/Kernel/IOInterruptEventSource.cpp index 4bfd27786..9694b1130 100644 --- a/iokit/Kernel/IOInterruptEventSource.cpp +++ b/iokit/Kernel/IOInterruptEventSource.cpp @@ -44,19 +44,19 @@ HISTORY #define IOTimeTypeStampS(t) \ do { \ IOTimeStampStart(IODBG_INTES(t), \ - (unsigned int) this, (unsigned int) owner); \ + (uintptr_t) this, (uintptr_t) owner); \ } while(0) #define IOTimeTypeStampE(t) \ do { \ IOTimeStampEnd(IODBG_INTES(t), \ - (unsigned int) this, (unsigned int) owner); \ + (uintptr_t) this, (uintptr_t) owner); \ } while(0) #define IOTimeStampLatency() \ do { \ IOTimeStampEnd(IODBG_INTES(IOINTES_LAT), \ - (unsigned int) this, (unsigned int) owner); \ + (uintptr_t) this, (uintptr_t) owner); \ } while(0) #else /* !KDEBUG */ @@ -188,7 +188,7 @@ bool IOInterruptEventSource::checkForWork() IOTimeStampLatency(); IOTimeTypeStampS(IOINTES_CLIENT); IOTimeStampConstant(IODBG_INTES(IOINTES_ACTION), - (unsigned int) intAction, (unsigned int) owner); + (uintptr_t) intAction, (uintptr_t) owner); (*intAction)(owner, this, numInts); IOTimeTypeStampE(IOINTES_CLIENT); @@ -200,7 +200,7 @@ bool IOInterruptEventSource::checkForWork() IOTimeStampLatency(); IOTimeTypeStampS(IOINTES_CLIENT); IOTimeStampConstant(IODBG_INTES(IOINTES_ACTION), - (unsigned int) intAction, (unsigned int) owner); + (uintptr_t) intAction, (uintptr_t) owner); (*intAction)(owner, this, -numInts); IOTimeTypeStampE(IOINTES_CLIENT); diff --git a/iokit/Kernel/IOKitDebug.cpp b/iokit/Kernel/IOKitDebug.cpp index 3a2415196..f061b516c 100644 --- a/iokit/Kernel/IOKitDebug.cpp +++ b/iokit/Kernel/IOKitDebug.cpp @@ -62,6 +62,11 @@ int debug_container_malloc_size; extern "C" { +#if 0 +#define DEBG(fmt, args...) { kprintf(fmt, ## args); } +#else +#define DEBG(fmt, args...) { IOLog(fmt, ## args); } +#endif void IOPrintPlane( const IORegistryPlane * plane ) { @@ -75,23 +80,23 @@ void IOPrintPlane( const IORegistryPlane * plane ) assert( iter ); all = iter->iterateAll(); if( all) { - IOLog("Count %d\n", all->getCount() ); + DEBG("Count %d\n", all->getCount() ); all->release(); } else - IOLog("Empty\n"); + DEBG("Empty\n"); iter->reset(); while( (next = iter->getNextObjectRecursive())) { snprintf(format + 1, sizeof(format) - 1, "%ds", 2 * next->getDepth( plane )); - IOLog( format, ""); - IOLog( "\033[33m%s", next->getName( plane )); + DEBG( format, ""); + DEBG( "\033[33m%s", next->getName( plane )); if( (next->getLocation( plane ))) - IOLog("@%s", next->getLocation( plane )); - IOLog("\033[0m getMetaClass()->getClassName()); + DEBG("@%s", next->getLocation( plane )); + DEBG("\033[0m getMetaClass()->getClassName()); if( (service = OSDynamicCast(IOService, next))) - IOLog(", busy %ld", service->getBusyState()); - IOLog( ">\n"); - IOSleep(250); + DEBG(", busy %ld", (long) service->getBusyState()); + DEBG( ">\n"); +// IOSleep(250); } iter->release(); } diff --git a/iokit/Kernel/IOKitKernelInternal.h b/iokit/Kernel/IOKitKernelInternal.h index a21ff0031..ea74e3bd9 100644 --- a/iokit/Kernel/IOKitKernelInternal.h +++ b/iokit/Kernel/IOKitKernelInternal.h @@ -43,16 +43,22 @@ typedef kern_return_t (*IOIteratePageableMapsCallback)(vm_map_t map, void * ref) void IOLibInit(void); kern_return_t IOIteratePageableMaps(vm_size_t size, IOIteratePageableMapsCallback callback, void * ref); -vm_map_t IOPageableMapForAddress(vm_address_t address); +vm_map_t IOPageableMapForAddress(uintptr_t address); kern_return_t IOMemoryDescriptorMapMemEntry(vm_map_t map, ipc_port_t entry, IOOptionBits options, bool pageable, mach_vm_size_t offset, mach_vm_address_t * address, mach_vm_size_t length); - -SInt32 OSKernelStackRemaining( void ); +kern_return_t +IOMemoryDescriptorMapCopy(vm_map_t map, + vm_map_t src_map, + mach_vm_offset_t src_address, + IOOptionBits options, + mach_vm_size_t offset, + mach_vm_address_t * address, mach_vm_size_t length); mach_vm_address_t -IOKernelAllocateContiguous(mach_vm_size_t size, mach_vm_size_t alignment); +IOKernelAllocateContiguous(mach_vm_size_t size, + mach_vm_address_t maxPhys, mach_vm_size_t alignment); void IOKernelFreeContiguous(mach_vm_address_t address, mach_vm_size_t size); @@ -80,6 +86,8 @@ enum { kIOMDWalkSegments = 0x00000001, kIOMDFirstSegment = 0x00000002 | kIOMDWalkSegments, kIOMDGetCharacteristics = 0x00000004, + kIOMDSetDMAActive = 0x00000005, + kIOMDSetDMAInactive = 0x00000006, kIOMDLastDMACommandOperation }; struct IOMDDMACharacteristics { @@ -106,20 +114,26 @@ struct IODMACommandInternal UInt64 fPreparedOffset; UInt64 fPreparedLength; + UInt32 fSourceAlignMask; + UInt8 fCursor; UInt8 fCheckAddressing; UInt8 fIterateOnly; UInt8 fMisaligned; - UInt8 fCopyContig; + UInt8 fMapContig; UInt8 fPrepared; UInt8 fDoubleBuffer; UInt8 fNewMD; - - ppnum_t fCopyPageAlloc; + UInt8 fLocalMapper; + + ppnum_t fCopyMapperPageAlloc; ppnum_t fCopyPageCount; ppnum_t fNextRemapIndex; addr64_t fCopyNext; + ppnum_t fLocalMapperPageAlloc; + ppnum_t fLocalMapperPageCount; + class IOBufferMemoryDescriptor * fCopyMD; // IODMAEventSource use diff --git a/iokit/Kernel/IOLib.cpp b/iokit/Kernel/IOLib.cpp index 5fca33623..e40d20afc 100644 --- a/iokit/Kernel/IOLib.cpp +++ b/iokit/Kernel/IOLib.cpp @@ -36,6 +36,7 @@ #include #include #include +#include #include #include @@ -62,13 +63,16 @@ mach_timespec_t IOZeroTvalspec = { 0, 0 }; extern ppnum_t pmap_find_phys(pmap_t pmap, addr64_t va); -extern kern_return_t kmem_suballoc( - vm_map_t parent, - vm_offset_t *addr, - vm_size_t size, - boolean_t pageable, - boolean_t anywhere, - vm_map_t *new_map); +int +__doprnt( + const char *fmt, + va_list argp, + void (*putc)(int, void *), + void *arg, + int radix); + +extern void conslog_putc(char); + /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ @@ -97,9 +101,8 @@ enum { kIOMaxPageableMaps = 16 }; enum { kIOPageableMapSize = 96 * 1024 * 1024 }; enum { kIOPageableMaxMapSize = 96 * 1024 * 1024 }; -/* LP64todo - these need to expand */ typedef struct { - vm_map_t map; + vm_map_t map; vm_offset_t address; vm_offset_t end; } IOMapData; @@ -198,10 +201,10 @@ void IOFree(void * address, vm_size_t size) void * IOMallocAligned(vm_size_t size, vm_size_t alignment) { kern_return_t kr; - vm_address_t address; - vm_address_t allocationAddress; + vm_offset_t address; + vm_offset_t allocationAddress; vm_size_t adjustedSize; - vm_offset_t alignMask; + uintptr_t alignMask; if (size == 0) return 0; @@ -237,8 +240,8 @@ void * IOMallocAligned(vm_size_t size, vm_size_t alignment) + (sizeof(vm_size_t) + sizeof(vm_address_t))) & (~alignMask); - *((vm_size_t *)(address - sizeof(vm_size_t) - - sizeof(vm_address_t))) = adjustedSize; + *((vm_size_t *)(address - sizeof(vm_size_t) - sizeof(vm_address_t))) + = adjustedSize; *((vm_address_t *)(address - sizeof(vm_address_t))) = allocationAddress; } else @@ -259,7 +262,7 @@ void * IOMallocAligned(vm_size_t size, vm_size_t alignment) void IOFreeAligned(void * address, vm_size_t size) { vm_address_t allocationAddress; - vm_size_t adjustedSize; + vm_size_t adjustedSize; if( !address) return; @@ -269,10 +272,10 @@ void IOFreeAligned(void * address, vm_size_t size) adjustedSize = size + sizeof(vm_size_t) + sizeof(vm_address_t); if (adjustedSize >= page_size) { - kmem_free( kernel_map, (vm_address_t) address, size); + kmem_free( kernel_map, (vm_offset_t) address, size); } else { - adjustedSize = *((vm_size_t *)( (vm_address_t) address + adjustedSize = *((vm_size_t *)( (vm_address_t) address - sizeof(vm_address_t) - sizeof(vm_size_t))); allocationAddress = *((vm_address_t *)( (vm_address_t) address - sizeof(vm_address_t) )); @@ -304,7 +307,7 @@ IOKernelFreeContiguous(mach_vm_address_t address, mach_vm_size_t size) adjustedSize = (2 * size) + sizeof(mach_vm_size_t) + sizeof(mach_vm_address_t); if (adjustedSize >= page_size) { - kmem_free( kernel_map, (vm_address_t) address, size); + kmem_free( kernel_map, (vm_offset_t) address, size); } else { @@ -321,7 +324,8 @@ IOKernelFreeContiguous(mach_vm_address_t address, mach_vm_size_t size) } mach_vm_address_t -IOKernelAllocateContiguous(mach_vm_size_t size, mach_vm_size_t alignment) +IOKernelAllocateContiguous(mach_vm_size_t size, mach_vm_address_t maxPhys, + mach_vm_size_t alignment) { kern_return_t kr; mach_vm_address_t address; @@ -341,10 +345,10 @@ IOKernelAllocateContiguous(mach_vm_size_t size, mach_vm_size_t alignment) { vm_offset_t virt; adjustedSize = size; - if (adjustedSize > page_size) + if ((adjustedSize > page_size) || (alignment > page_size) || maxPhys) { kr = kmem_alloc_contig(kernel_map, &virt, size, - alignMask, 0, 0); + alignMask, atop(maxPhys), atop(alignMask), 0); } else { @@ -368,7 +372,7 @@ IOKernelAllocateContiguous(mach_vm_size_t size, mach_vm_size_t alignment) & (~alignMask); if (atop_32(address) != atop_32(address + size - 1)) - address = round_page_32(address); + address = round_page(address); *((mach_vm_size_t *)(address - sizeof(mach_vm_size_t) - sizeof(mach_vm_address_t))) = adjustedSize; @@ -380,8 +384,8 @@ IOKernelAllocateContiguous(mach_vm_size_t size, mach_vm_size_t alignment) #if IOALLOCDEBUG if (address) { - debug_iomalloc_size += size; - } + debug_iomalloc_size += size; + } #endif return (address); @@ -410,16 +414,17 @@ void * IOMallocContiguous(vm_size_t size, vm_size_t alignment, /* Do we want a physical address? */ if (!physicalAddress) { - address = IOKernelAllocateContiguous(size, alignment); + address = IOKernelAllocateContiguous(size, 0 /*maxPhys*/, alignment); } else do { IOBufferMemoryDescriptor * bmd; mach_vm_address_t physicalMask; - vm_offset_t alignMask; + vm_offset_t alignMask; alignMask = alignment - 1; - physicalMask = 0xFFFFFFFF ^ (alignMask & PAGE_MASK); + physicalMask = (0xFFFFFFFF ^ alignMask); + bmd = IOBufferMemoryDescriptor::inTaskWithPhysicalMask( kernel_task, kIOMemoryPhysicallyContiguous, size, physicalMask); if (!bmd) @@ -555,7 +560,7 @@ kern_return_t IOIteratePageableMaps(vm_size_t size, struct IOMallocPageableRef { - vm_address_t address; + vm_offset_t address; vm_size_t size; }; @@ -586,13 +591,13 @@ void * IOMallocPageable(vm_size_t size, vm_size_t alignment) #if IOALLOCDEBUG if( ref.address) - debug_iomallocpageable_size += round_page_32(size); + debug_iomallocpageable_size += round_page(size); #endif return( (void *) ref.address ); } -vm_map_t IOPageableMapForAddress( vm_address_t address ) +vm_map_t IOPageableMapForAddress( uintptr_t address ) { vm_map_t map = 0; UInt32 index; @@ -605,7 +610,7 @@ vm_map_t IOPageableMapForAddress( vm_address_t address ) } } if( !map) - IOPanic("IOPageableMapForAddress: null"); + panic("IOPageableMapForAddress: null"); return( map ); } @@ -619,10 +624,10 @@ void IOFreePageable(void * address, vm_size_t size) kmem_free( map, (vm_offset_t) address, size); #if IOALLOCDEBUG - debug_iomallocpageable_size -= round_page_32(size); + debug_iomallocpageable_size -= round_page(size); #endif } - + /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ IOReturn IOSetProcessorCacheMode( task_t task, IOVirtualAddress address, @@ -633,9 +638,13 @@ IOReturn IOSetProcessorCacheMode( task_t task, IOVirtualAddress address, if( task != kernel_task) return( kIOReturnUnsupported ); - - length = round_page_32(address + length) - trunc_page_32( address ); - address = trunc_page_32( address ); + if ((address | length) & PAGE_MASK) + { +// OSReportWithBacktrace("IOSetProcessorCacheMode(0x%x, 0x%x, 0x%x) fails\n", address, length, cacheMode); + return( kIOReturnUnsupported ); + } + length = round_page(address + length) - trunc_page( address ); + address = trunc_page( address ); // make map mode cacheMode = (cacheMode << kIOMapCacheShift) & kIOMapCacheMask; @@ -671,13 +680,9 @@ IOReturn IOFlushProcessorCache( task_t task, IOVirtualAddress address, /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ -SInt32 OSKernelStackRemaining( void ) +vm_offset_t OSKernelStackRemaining( void ) { - SInt32 stack; - - stack = (((SInt32) &stack) & (KERNEL_STACK_SIZE - 1)); - - return( stack ); + return (ml_stack_remaining()); } /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ @@ -708,21 +713,31 @@ void IOPause(unsigned nanoseconds) /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ +static void _iolog_putc(int ch, void *arg __unused) +{ + conslog_putc(ch); +} + void IOLog(const char *format, ...) { va_list ap; - extern void conslog_putc(char); - extern void logwakeup(void); va_start(ap, format); - _doprnt(format, &ap, conslog_putc, 16); + __doprnt(format, ap, _iolog_putc, NULL, 16); va_end(ap); } +void IOLogv(const char *format, va_list ap) +{ + __doprnt(format, ap, _iolog_putc, NULL, 16); +} + +#if !__LP64__ void IOPanic(const char *reason) { panic("%s", reason); } +#endif /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ diff --git a/iokit/Kernel/IOLocks.cpp b/iokit/Kernel/IOLocks.cpp index 988e01529..a61f6a326 100644 --- a/iokit/Kernel/IOLocks.cpp +++ b/iokit/Kernel/IOLocks.cpp @@ -25,15 +25,6 @@ * * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ */ -/* - * Copyright (c) 1998 Apple Computer, Inc. All rights reserved. - * - * HISTORY - * - */ - - -#define IOLOCKS_CPP 1 #include @@ -195,7 +186,6 @@ int IORecursiveLockSleep(IORecursiveLock *_lock, void *event, UInt32 interType) int res; assert(lock->thread == IOThreadSelf()); - assert(lock->count == 1 || interType == THREAD_UNINT); lock->count = 0; lock->thread = 0; @@ -210,6 +200,29 @@ int IORecursiveLockSleep(IORecursiveLock *_lock, void *event, UInt32 interType) return res; } +int IORecursiveLockSleepDeadline( IORecursiveLock * _lock, void *event, + AbsoluteTime deadline, UInt32 interType) +{ + _IORecursiveLock * lock = (_IORecursiveLock *)_lock; + UInt32 count = lock->count; + int res; + + assert(lock->thread == IOThreadSelf()); + + lock->count = 0; + lock->thread = 0; + res = lck_mtx_sleep_deadline(lock->mutex, LCK_SLEEP_DEFAULT, (event_t) event, + (wait_interrupt_t) interType, __OSAbsoluteTime(deadline)); + + // Must re-establish the recursive lock no matter why we woke up + // otherwise we would potentially leave the return path corrupted. + assert(lock->thread == 0); + assert(lock->count == 0); + lock->thread = IOThreadSelf(); + lock->count = count; + return res; +} + void IORecursiveLockWakeup(IORecursiveLock *, void *event, bool oneThread) { thread_wakeup_prim((event_t) event, oneThread, THREAD_AWAKENED); diff --git a/iokit/Kernel/IOMapper.cpp b/iokit/Kernel/IOMapper.cpp index 43f7bbe54..171e22ff3 100644 --- a/iokit/Kernel/IOMapper.cpp +++ b/iokit/Kernel/IOMapper.cpp @@ -27,6 +27,7 @@ */ #include #include +#include #include #include "IOCopyMapper.h" @@ -39,8 +40,8 @@ __END_DECLS OSDefineMetaClassAndAbstractStructors(IOMapper, IOService); OSMetaClassDefineReservedUsed(IOMapper, 0); -OSMetaClassDefineReservedUnused(IOMapper, 1); -OSMetaClassDefineReservedUnused(IOMapper, 2); +OSMetaClassDefineReservedUsed(IOMapper, 1); +OSMetaClassDefineReservedUsed(IOMapper, 2); OSMetaClassDefineReservedUnused(IOMapper, 3); OSMetaClassDefineReservedUnused(IOMapper, 4); OSMetaClassDefineReservedUnused(IOMapper, 5); @@ -73,6 +74,7 @@ static IOMapperLock sMapperLock; bool IOMapper::start(IOService *provider) { + OSObject * obj; if (!super::start(provider)) return false; @@ -86,6 +88,14 @@ bool IOMapper::start(IOService *provider) sMapperLock.unlock(); } + if (provider) + { + obj = provider->getProperty("iommu-id"); + if (!obj) + obj = provider->getProperty("AAPL,phandle"); + if (obj) + setProperty(gIOMapperIDKey, obj); + } return true; } @@ -123,11 +133,48 @@ void IOMapper::setMapperRequired(bool hasMapper) void IOMapper::waitForSystemMapper() { sMapperLock.lock(); - while ((vm_address_t) IOMapper::gSystem & kWaitMask) + while ((uintptr_t) IOMapper::gSystem & kWaitMask) sMapperLock.sleep(&IOMapper::gSystem); sMapperLock.unlock(); } +IOMapper * IOMapper::copyMapperForDevice(IOService * device) +{ + OSObject * obj; + IOMapper * mapper; + OSDictionary * matching; + + obj = device->copyProperty("iommu-parent"); + if (!obj) + return (NULL); + + if ((mapper = OSDynamicCast(IOMapper, obj))) + return (mapper); + + matching = IOService::propertyMatching(gIOMapperIDKey, obj); + if (matching) + { + mapper = OSDynamicCast(IOMapper, IOService::waitForMatchingService(matching)); + matching->release(); + } + if (mapper) + device->setProperty("iommu-parent", mapper); + else + obj->release(); + + return (mapper); +} + +ppnum_t IOMapper::iovmAllocDMACommand(IODMACommand * command, IOItemCount pageCount) +{ + return (0); +} + +void IOMapper::iovmFreeDMACommand(IODMACommand * command, + ppnum_t addr, IOItemCount pageCount) +{ +} + void IOMapper::iovmInsert(ppnum_t addr, IOItemCount offset, ppnum_t *pageList, IOItemCount pageCount) { @@ -151,8 +198,8 @@ NewARTTable(IOByteCount size, void ** virtAddrP, ppnum_t *physAddrP) kern_return_t kr; vm_address_t address; - size = round_page_32(size); - kr = kmem_alloc_contig(kernel_map, &address, size, PAGE_MASK, 0, 0); + size = round_page(size); + kr = kmem_alloc_contig(kernel_map, &address, size, PAGE_MASK, 0 /*max_pnum*/, 0 /*pnum_mask*/, false); if (kr) return 0; @@ -173,7 +220,7 @@ void IOMapper::FreeARTTable(OSData *artHandle, IOByteCount size) { vm_address_t address = (vm_address_t) artHandle; - size = round_page_32(size); + size = round_page(size); kmem_free(kernel_map, address, size); // Just panic if address is 0 } diff --git a/iokit/Kernel/IOMemoryCursor.cpp b/iokit/Kernel/IOMemoryCursor.cpp index 503fb3a64..36a15009d 100644 --- a/iokit/Kernel/IOMemoryCursor.cpp +++ b/iokit/Kernel/IOMemoryCursor.cpp @@ -101,7 +101,7 @@ if (inMaxSegmentSize > sMaxDBDMASegment) inMaxSegmentSize = sMaxDBDMASegment; UInt32 IOMemoryCursor::genPhysicalSegments(IOMemoryDescriptor *inDescriptor, - IOPhysicalLength fromPosition, + IOByteCount fromPosition, void * inSegments, UInt32 inMaxSegments, UInt32 inMaxTransferSize, @@ -133,7 +133,7 @@ IOMemoryCursor::genPhysicalSegments(IOMemoryDescriptor *inDescriptor, if (!seg.location) { seg.location = inDescriptor->getPhysicalSegment( - fromPosition, &seg.length); + fromPosition, (IOByteCount*)&seg.length); assert(seg.location); assert(seg.length); fromPosition += seg.length; diff --git a/iokit/Kernel/IOMemoryDescriptor.cpp b/iokit/Kernel/IOMemoryDescriptor.cpp index 58e7190e6..9aef6a1a0 100644 --- a/iokit/Kernel/IOMemoryDescriptor.cpp +++ b/iokit/Kernel/IOMemoryDescriptor.cpp @@ -31,7 +31,8 @@ * HISTORY * */ -// 45678901234567890123456789012345678901234567890123456789012345678901234567890 + + #include #include @@ -41,6 +42,10 @@ #include #include +#ifndef __LP64__ +#include +#endif /* !__LP64__ */ + #include #include @@ -75,7 +80,7 @@ kern_return_t copypv(addr64_t source, addr64_t sink, unsigned int size, int whic memory_object_t device_pager_setup( memory_object_t pager, - int device_handle, + uintptr_t device_handle, vm_size_t size, int flags); void @@ -135,68 +140,7 @@ static IORecursiveLock * gIOMemoryLock; #define DEBG(fmt, args...) {} #endif -/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ - -class _IOMemoryMap : public IOMemoryMap -{ - OSDeclareDefaultStructors(_IOMemoryMap) -public: - IOMemoryDescriptor * fMemory; - IOMemoryMap * fSuperMap; - mach_vm_size_t fOffset; - mach_vm_address_t fAddress; - mach_vm_size_t fLength; - task_t fAddressTask; - vm_map_t fAddressMap; - IOOptionBits fOptions; - upl_t fRedirUPL; - ipc_port_t fRedirEntry; - IOMemoryDescriptor * fOwner; - -protected: - virtual void taggedRelease(const void *tag = 0) const; - virtual void free(); - -public: - - // IOMemoryMap methods - virtual IOVirtualAddress getVirtualAddress(); - virtual IOByteCount getLength(); - virtual task_t getAddressTask(); - virtual mach_vm_address_t getAddress(); - virtual mach_vm_size_t getSize(); - virtual IOMemoryDescriptor * getMemoryDescriptor(); - virtual IOOptionBits getMapOptions(); - - virtual IOReturn unmap(); - virtual void taskDied(); - - virtual IOReturn redirect(IOMemoryDescriptor * newBackingMemory, - IOOptionBits options, - IOByteCount offset = 0); - - virtual IOReturn redirect(IOMemoryDescriptor * newBackingMemory, - IOOptionBits options, - mach_vm_size_t offset = 0); - - virtual IOPhysicalAddress getPhysicalSegment(IOByteCount offset, - IOByteCount * length); - - // for IOMemoryDescriptor use - _IOMemoryMap * copyCompatible( _IOMemoryMap * newMapping ); - - bool init( - task_t intoTask, - mach_vm_address_t toAddress, - IOOptionBits options, - mach_vm_size_t offset, - mach_vm_size_t length ); - - bool setMemoryDescriptor(IOMemoryDescriptor * _memory, mach_vm_size_t _offset); - - IOReturn redirect( - task_t intoTask, bool redirect ); -}; +#define IOMD_DEBUG_DMAACTIVE 1 /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ @@ -216,16 +160,21 @@ struct typePersMDData struct ioPLBlock { upl_t fIOPL; - vm_address_t fIOMDOffset; // The offset of this iopl in descriptor - vm_offset_t fPageInfo; // Pointer to page list or index into it - ppnum_t fMappedBase; // Page number of first page in this iopl - unsigned int fPageOffset; // Offset within first page of iopl - unsigned int fFlags; // Flags + vm_address_t fPageInfo; // Pointer to page list or index into it + uint32_t fIOMDOffset; // The offset of this iopl in descriptor + ppnum_t fMappedBase; // Page number of first page in this iopl + unsigned int fPageOffset; // Offset within first page of iopl + unsigned int fFlags; // Flags }; struct ioGMDData { IOMapper *fMapper; + uint64_t fPreparationID; unsigned int fPageCnt; +#if __LP64__ + // align arrays to 8 bytes so following macros work + unsigned int fPad; +#endif upl_page_info_t fPageList[]; ioPLBlock fBlocks[]; }; @@ -241,13 +190,13 @@ struct ioGMDData { /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ -#define next_page(a) ( trunc_page_32(a) + PAGE_SIZE ) +#define next_page(a) ( trunc_page(a) + PAGE_SIZE ) extern "C" { kern_return_t device_data_action( - int device_handle, + uintptr_t device_handle, ipc_port_t device_pager, vm_prot_t protection, vm_object_offset_t offset, @@ -280,7 +229,7 @@ kern_return_t device_data_action( } kern_return_t device_close( - int device_handle) + uintptr_t device_handle) { struct ExpansionData { void * devicePager; @@ -310,11 +259,13 @@ getAddrLenForInd(user_addr_t &addr, IOPhysicalLength &len, // Output variables user_size_t us; uio_getiov((uio_t) r.uio, ind, &addr, &us); len = us; } +#ifndef __LP64__ else if ((kIOMemoryTypeVirtual64 == type) || (kIOMemoryTypePhysical64 == type)) { IOAddressRange cur = r.v64[ind]; addr = cur.address; len = cur.length; } +#endif /* !__LP64__ */ else { IOVirtualRange cur = r.v[ind]; addr = cur.address; @@ -324,37 +275,22 @@ getAddrLenForInd(user_addr_t &addr, IOPhysicalLength &len, // Output variables /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ -/* - * withAddress: - * - * Create a new IOMemoryDescriptor. The buffer is a virtual address - * relative to the specified task. If no task is supplied, the kernel - * task is implied. - */ IOMemoryDescriptor * IOMemoryDescriptor::withAddress(void * address, IOByteCount length, IODirection direction) { return IOMemoryDescriptor:: - withAddress((vm_address_t) address, length, direction, kernel_task); + withAddressRange((IOVirtualAddress) address, length, direction | kIOMemoryAutoPrepare, kernel_task); } +#ifndef __LP64__ IOMemoryDescriptor * -IOMemoryDescriptor::withAddress(vm_address_t address, +IOMemoryDescriptor::withAddress(IOVirtualAddress address, IOByteCount length, IODirection direction, task_t task) { -#if TEST_V64 - if (task) - { - IOOptionBits options = (IOOptionBits) direction; - if (task == kernel_task) - options |= kIOMemoryAutoPrepare; - return (IOMemoryDescriptor::withAddressRange(address, length, options, task)); - } -#endif IOGeneralMemoryDescriptor * that = new IOGeneralMemoryDescriptor; if (that) { @@ -365,6 +301,7 @@ IOMemoryDescriptor::withAddress(vm_address_t address, } return 0; } +#endif /* !__LP64__ */ IOMemoryDescriptor * IOMemoryDescriptor::withPhysicalAddress( @@ -372,9 +309,9 @@ IOMemoryDescriptor::withPhysicalAddress( IOByteCount length, IODirection direction ) { -#if TEST_P64 - return (IOMemoryDescriptor::withAddressRange(address, length, (IOOptionBits) direction, NULL)); -#endif +#ifdef __LP64__ + return (IOMemoryDescriptor::withAddressRange(address, length, direction, TASK_NULL)); +#else /* !__LP64__ */ IOGeneralMemoryDescriptor *self = new IOGeneralMemoryDescriptor; if (self && !self->initWithPhysicalAddress(address, length, direction)) { @@ -383,8 +320,10 @@ IOMemoryDescriptor::withPhysicalAddress( } return self; +#endif /* !__LP64__ */ } +#ifndef __LP64__ IOMemoryDescriptor * IOMemoryDescriptor::withRanges( IOVirtualRange * ranges, UInt32 withCount, @@ -402,6 +341,7 @@ IOMemoryDescriptor::withRanges( IOVirtualRange * ranges, } return 0; } +#endif /* !__LP64__ */ IOMemoryDescriptor * IOMemoryDescriptor::withAddressRange(mach_vm_address_t address, @@ -438,7 +378,7 @@ IOMemoryDescriptor::withAddressRanges(IOAddressRange * ranges, /* - * withRanges: + * withOptions: * * Create a new IOMemoryDescriptor. The buffer is made up of several * virtual address ranges, from a given task. @@ -465,7 +405,6 @@ IOMemoryDescriptor::withOptions(void * buffers, return self; } -// Can't leave abstract but this should never be used directly, bool IOMemoryDescriptor::initWithOptions(void * buffers, UInt32 count, UInt32 offset, @@ -473,11 +412,10 @@ bool IOMemoryDescriptor::initWithOptions(void * buffers, IOOptionBits options, IOMapper * mapper) { - // @@@ gvdl: Should I panic? - panic("IOMD::initWithOptions called\n"); - return 0; + return( false ); } +#ifndef __LP64__ IOMemoryDescriptor * IOMemoryDescriptor::withPhysicalRanges( IOPhysicalRange * ranges, UInt32 withCount, @@ -501,14 +439,9 @@ IOMemoryDescriptor::withSubRange(IOMemoryDescriptor * of, IOByteCount length, IODirection direction) { - IOSubMemoryDescriptor *self = new IOSubMemoryDescriptor; - - if (self && !self->initSubRange(of, offset, length, direction)) { - self->release(); - self = 0; - } - return self; + return (IOSubMemoryDescriptor::withSubRange(of, offset, length, direction | kIOMemoryThreadSafe)); } +#endif /* !__LP64__ */ IOMemoryDescriptor * IOMemoryDescriptor::withPersistentMemoryDescriptor(IOMemoryDescriptor *originalMD) @@ -586,8 +519,8 @@ void *IOGeneralMemoryDescriptor::createNamedEntry() return sharedMem; } else { #if IOASSERT - IOLog("IOGMD::mach_make_memory_entry_64 (%08llx) size (%08lx:%08x)\n", - (UInt64)range0Addr, (UInt32)actualSize, size); + IOLog("IOGMD::mach_make_memory_entry_64 (%08llx) size (%08llx:%08llx)\n", + (UInt64)range0Addr, (UInt64)actualSize, (UInt64)size); #endif ipc_port_release_send( sharedMem ); } @@ -596,31 +529,20 @@ void *IOGeneralMemoryDescriptor::createNamedEntry() return MACH_PORT_NULL; } -/* - * initWithAddress: - * - * Initialize an IOMemoryDescriptor. The buffer is a virtual address - * relative to the specified task. If no task is supplied, the kernel - * task is implied. - * - * An IOMemoryDescriptor can be re-used by calling initWithAddress or - * initWithRanges again on an existing instance -- note this behavior - * is not commonly supported in other I/O Kit classes, although it is - * supported here. - */ +#ifndef __LP64__ bool IOGeneralMemoryDescriptor::initWithAddress(void * address, IOByteCount withLength, IODirection withDirection) { - _singleRange.v.address = (vm_address_t) address; + _singleRange.v.address = (vm_offset_t) address; _singleRange.v.length = withLength; return initWithRanges(&_singleRange.v, 1, withDirection, kernel_task, true); } bool -IOGeneralMemoryDescriptor::initWithAddress(vm_address_t address, +IOGeneralMemoryDescriptor::initWithAddress(IOVirtualAddress address, IOByteCount withLength, IODirection withDirection, task_t withTask) @@ -685,6 +607,7 @@ IOGeneralMemoryDescriptor::initWithRanges( return initWithOptions(ranges, count, 0, task, mdOpts, /* mapper */ 0); } +#endif /* !__LP64__ */ /* * initWithOptions: @@ -739,11 +662,14 @@ IOGeneralMemoryDescriptor::initWithOptions(void * buffers, switch (type) { case kIOMemoryTypeUIO: case kIOMemoryTypeVirtual: +#ifndef __LP64__ case kIOMemoryTypeVirtual64: +#endif /* !__LP64__ */ assert(task); if (!task) return false; +#ifndef __LP64__ if (vm_map_is_64bit(get_task_map(task)) && (kIOMemoryTypeVirtual == type) && ((IOVirtualRange *) buffers)->address) @@ -751,12 +677,13 @@ IOGeneralMemoryDescriptor::initWithOptions(void * buffers, OSReportWithBacktrace("IOMemoryDescriptor: attempt to create 32b virtual in 64b task, use ::withAddressRange()"); return false; } +#endif /* !__LP64__ */ break; case kIOMemoryTypePhysical: // Neither Physical nor UPL should have a task +#ifndef __LP64__ case kIOMemoryTypePhysical64: - mapper = kIOMapperNone; - +#endif /* !__LP64__ */ case kIOMemoryTypeUPL: assert(!task); break; @@ -784,12 +711,14 @@ IOGeneralMemoryDescriptor::initWithOptions(void * buffers, while (_wireCount) complete(); } - if (_ranges.v && _rangesIsAllocated) + if (_ranges.v && !(kIOMemoryAsReference & _flags)) { if (kIOMemoryTypeUIO == type) uio_free((uio_t) _ranges.v); +#ifndef __LP64__ else if ((kIOMemoryTypeVirtual64 == type) || (kIOMemoryTypePhysical64 == type)) IODelete(_ranges.v64, IOAddressRange, _rangesCount); +#endif /* !__LP64__ */ else IODelete(_ranges.v, IOVirtualRange, _rangesCount); } @@ -806,7 +735,7 @@ IOGeneralMemoryDescriptor::initWithOptions(void * buffers, } // Grab the appropriate mapper - if (mapper == kIOMapperNone) + if (kIOMemoryMapperNone & options) mapper = 0; // No Mapper else if (mapper == kIOMapperSystem) { IOMapper::checkForSystemMapper(); @@ -824,8 +753,9 @@ IOGeneralMemoryDescriptor::initWithOptions(void * buffers, _flags = options; _task = task; - // DEPRECATED variable initialisation +#ifndef __LP64__ _direction = (IODirection) (_flags & kIOMemoryDirectionMask); +#endif /* !__LP64__ */ __iomd_reservedA = 0; __iomd_reservedB = 0; @@ -866,9 +796,12 @@ IOGeneralMemoryDescriptor::initWithOptions(void * buffers, _pages += atop_32(offset + count + PAGE_MASK) - atop_32(offset); ioPLBlock iopl; - upl_page_info_t *pageList = UPL_GET_INTERNAL_PAGE_LIST((upl_t) buffers); - iopl.fIOPL = (upl_t) buffers; + upl_page_info_t *pageList = UPL_GET_INTERNAL_PAGE_LIST(iopl.fIOPL); + + if (upl_get_size(iopl.fIOPL) < (count + offset)) + panic("short external upl"); + // Set the flag kIOPLOnDevice convieniently equal to 1 iopl.fFlags = pageList->device | kIOPLExternUPL; iopl.fIOMDOffset = 0; @@ -899,7 +832,9 @@ IOGeneralMemoryDescriptor::initWithOptions(void * buffers, // Initialize the memory descriptor if (options & kIOMemoryAsReference) { +#ifndef __LP64__ _rangesIsAllocated = false; +#endif /* !__LP64__ */ // Hack assignment to get the buffer arg into _ranges. // I'd prefer to do _ranges = (Ranges) buffers, but that doesn't @@ -908,25 +843,50 @@ IOGeneralMemoryDescriptor::initWithOptions(void * buffers, _ranges.v = (IOVirtualRange *) buffers; } else { +#ifndef __LP64__ _rangesIsAllocated = true; - switch (_flags & kIOMemoryTypeMask) +#endif /* !__LP64__ */ + switch (type) { case kIOMemoryTypeUIO: _ranges.v = (IOVirtualRange *) uio_duplicate((uio_t) buffers); break; +#ifndef __LP64__ case kIOMemoryTypeVirtual64: case kIOMemoryTypePhysical64: + if (count == 1 + && (((IOAddressRange *) buffers)->address + ((IOAddressRange *) buffers)->length) <= 0x100000000ULL) { + if (kIOMemoryTypeVirtual64 == type) + type = kIOMemoryTypeVirtual; + else + type = kIOMemoryTypePhysical; + _flags = (_flags & ~kIOMemoryTypeMask) | type | kIOMemoryAsReference; + _rangesIsAllocated = false; + _ranges.v = &_singleRange.v; + _singleRange.v.address = ((IOAddressRange *) buffers)->address; + _singleRange.v.length = ((IOAddressRange *) buffers)->length; + break; + } _ranges.v64 = IONew(IOAddressRange, count); if (!_ranges.v64) return false; bcopy(buffers, _ranges.v, count * sizeof(IOAddressRange)); break; +#endif /* !__LP64__ */ case kIOMemoryTypeVirtual: case kIOMemoryTypePhysical: - _ranges.v = IONew(IOVirtualRange, count); - if (!_ranges.v) - return false; + if (count == 1) { + _flags |= kIOMemoryAsReference; +#ifndef __LP64__ + _rangesIsAllocated = false; +#endif /* !__LP64__ */ + _ranges.v = &_singleRange.v; + } else { + _ranges.v = IONew(IOVirtualRange, count); + if (!_ranges.v) + return false; + } bcopy(buffers, _ranges.v, count * sizeof(IOVirtualRange)); break; } @@ -938,7 +898,7 @@ IOGeneralMemoryDescriptor::initWithOptions(void * buffers, UInt32 pages = 0; for (unsigned ind = 0; ind < count; ind++) { user_addr_t addr; - UInt32 len; + IOPhysicalLength len; // addr & len are returned by this function getAddrLenForInd(addr, len, type, vec, ind); @@ -1015,12 +975,14 @@ void IOGeneralMemoryDescriptor::free() if (_memoryEntries) _memoryEntries->release(); - if (_ranges.v && _rangesIsAllocated) + if (_ranges.v && !(kIOMemoryAsReference & _flags)) { if (kIOMemoryTypeUIO == type) uio_free((uio_t) _ranges.v); +#ifndef __LP64__ else if ((kIOMemoryTypeVirtual64 == type) || (kIOMemoryTypePhysical64 == type)) IODelete(_ranges.v64, IOAddressRange, _rangesCount); +#endif /* !__LP64__ */ else IODelete(_ranges.v, IOVirtualRange, _rangesCount); @@ -1041,15 +1003,17 @@ void IOGeneralMemoryDescriptor::free() super::free(); } -/* DEPRECATED */ void IOGeneralMemoryDescriptor::unmapFromKernel() -/* DEPRECATED */ { - panic("IOGMD::unmapFromKernel deprecated"); -/* DEPRECATED */ } -/* DEPRECATED */ -/* DEPRECATED */ void IOGeneralMemoryDescriptor::mapIntoKernel(unsigned rangeIndex) -/* DEPRECATED */ { - panic("IOGMD::mapIntoKernel deprecated"); -/* DEPRECATED */ } +#ifndef __LP64__ +void IOGeneralMemoryDescriptor::unmapFromKernel() +{ + panic("IOGMD::unmapFromKernel deprecated"); +} + +void IOGeneralMemoryDescriptor::mapIntoKernel(unsigned rangeIndex) +{ + panic("IOGMD::mapIntoKernel deprecated"); +} +#endif /* !__LP64__ */ /* * getDirection: @@ -1058,7 +1022,11 @@ void IOGeneralMemoryDescriptor::free() */ IODirection IOMemoryDescriptor::getDirection() const { - return _direction; +#ifndef __LP64__ + if (_direction) + return _direction; +#endif /* !__LP64__ */ + return (IODirection) (_flags & kIOMemoryDirectionMask); } /* @@ -1081,6 +1049,7 @@ IOOptionBits IOMemoryDescriptor::getTag( void ) return( _tag); } +#ifndef __LP64__ // @@@ gvdl: who is using this API? Seems like a wierd thing to implement. IOPhysicalAddress IOMemoryDescriptor::getSourceSegment( IOByteCount offset, IOByteCount * length ) @@ -1094,11 +1063,12 @@ IOMemoryDescriptor::getSourceSegment( IOByteCount offset, IOByteCount * length return( (IOPhysicalAddress) physAddr ); // truncated but only page offset is used } +#endif /* !__LP64__ */ IOByteCount IOMemoryDescriptor::readBytes (IOByteCount offset, void *bytes, IOByteCount length) { - addr64_t dstAddr = (addr64_t) (UInt32) bytes; + addr64_t dstAddr = CAST_DOWN(addr64_t, bytes); IOByteCount remaining; // Assert that this entire I/O is withing the available range @@ -1108,12 +1078,15 @@ IOByteCount IOMemoryDescriptor::readBytes return 0; } + if (kIOMemoryThreadSafe & _flags) + LOCK; + remaining = length = min(length, _length - offset); while (remaining) { // (process another target segment?) addr64_t srcAddr64; IOByteCount srcLen; - srcAddr64 = getPhysicalSegment64(offset, &srcLen); + srcAddr64 = getPhysicalSegment(offset, &srcLen, kIOMemoryMapperNone); if (!srcAddr64) break; @@ -1129,6 +1102,9 @@ IOByteCount IOMemoryDescriptor::readBytes remaining -= srcLen; } + if (kIOMemoryThreadSafe & _flags) + UNLOCK; + assert(!remaining); return length - remaining; @@ -1137,7 +1113,7 @@ IOByteCount IOMemoryDescriptor::readBytes IOByteCount IOMemoryDescriptor::writeBytes (IOByteCount offset, const void *bytes, IOByteCount length) { - addr64_t srcAddr = (addr64_t) (UInt32) bytes; + addr64_t srcAddr = CAST_DOWN(addr64_t, bytes); IOByteCount remaining; // Assert that this entire I/O is withing the available range @@ -1150,12 +1126,15 @@ IOByteCount IOMemoryDescriptor::writeBytes return 0; } + if (kIOMemoryThreadSafe & _flags) + LOCK; + remaining = length = min(length, _length - offset); while (remaining) { // (process another target segment?) addr64_t dstAddr64; IOByteCount dstLen; - dstAddr64 = getPhysicalSegment64(offset, &dstLen); + dstAddr64 = getPhysicalSegment(offset, &dstLen, kIOMemoryMapperNone); if (!dstAddr64) break; @@ -1171,6 +1150,9 @@ IOByteCount IOMemoryDescriptor::writeBytes remaining -= dstLen; } + if (kIOMemoryThreadSafe & _flags) + UNLOCK; + assert(!remaining); return length - remaining; @@ -1179,10 +1161,37 @@ IOByteCount IOMemoryDescriptor::writeBytes // osfmk/device/iokit_rpc.c extern "C" unsigned int IODefaultCacheBits(addr64_t pa); -/* DEPRECATED */ void IOGeneralMemoryDescriptor::setPosition(IOByteCount position) -/* DEPRECATED */ { - panic("IOGMD::setPosition deprecated"); -/* DEPRECATED */ } +#ifndef __LP64__ +void IOGeneralMemoryDescriptor::setPosition(IOByteCount position) +{ + panic("IOGMD::setPosition deprecated"); +} +#endif /* !__LP64__ */ + +static volatile SInt64 gIOMDPreparationID __attribute__((aligned(8))) = (1ULL << 32); + +uint64_t +IOGeneralMemoryDescriptor::getPreparationID( void ) +{ + ioGMDData *dataP; + if (!_wireCount || !(dataP = getDataP(_memoryEntries))) + return (kIOPreparationIDUnprepared); + if (kIOPreparationIDUnprepared == dataP->fPreparationID) + { +#if defined(__ppc__ ) + dataP->fPreparationID = gIOMDPreparationID++; +#else + dataP->fPreparationID = OSIncrementAtomic64(&gIOMDPreparationID); +#endif + } + return (dataP->fPreparationID); +} + +uint64_t +IOMemoryDescriptor::getPreparationID( void ) +{ + return (kIOPreparationIDUnsupported); +} IOReturn IOGeneralMemoryDescriptor::dmaCommandOperation(DMACommandOps op, void *vData, UInt dataSize) const { @@ -1195,7 +1204,7 @@ IOReturn IOGeneralMemoryDescriptor::dmaCommandOperation(DMACommandOps op, void * data->fLength = _length; data->fSGCount = _rangesCount; data->fPages = _pages; - data->fDirection = _direction; + data->fDirection = getDirection(); if (!_wireCount) data->fIsPrepared = false; else { @@ -1216,8 +1225,20 @@ IOReturn IOGeneralMemoryDescriptor::dmaCommandOperation(DMACommandOps op, void * } return kIOReturnSuccess; - } - else if (!(kIOMDWalkSegments & op)) + +#if IOMD_DEBUG_DMAACTIVE + } else if (kIOMDSetDMAActive == op) { + IOGeneralMemoryDescriptor * md = const_cast(this); + md->__iomd_reservedA++; + } else if (kIOMDSetDMAInactive == op) { + IOGeneralMemoryDescriptor * md = const_cast(this); + if (md->__iomd_reservedA) + md->__iomd_reservedA--; + else + panic("kIOMDSetDMAInactive"); +#endif /* IOMD_DEBUG_DMAACTIVE */ + + } else if (!(kIOMDWalkSegments & op)) return kIOReturnBadArgument; // Get the next segment @@ -1256,7 +1277,7 @@ IOReturn IOGeneralMemoryDescriptor::dmaCommandOperation(DMACommandOps op, void * const IOPhysicalRange *physP = (IOPhysicalRange *) &_ranges.p[0]; // Find the range after the one that contains the offset - UInt len; + mach_vm_size_t len; for (len = 0; off2Ind <= offset; ind++) { len = physP[ind].length; off2Ind += len; @@ -1278,6 +1299,7 @@ IOReturn IOGeneralMemoryDescriptor::dmaCommandOperation(DMACommandOps op, void * ind--; off2Ind -= len; } +#ifndef __LP64__ else if ( (_flags & kIOMemoryTypeMask) == kIOMemoryTypePhysical64) { // Physical address based memory descriptor @@ -1306,6 +1328,7 @@ IOReturn IOGeneralMemoryDescriptor::dmaCommandOperation(DMACommandOps op, void * ind--; off2Ind -= len; } +#endif /* !__LP64__ */ else do { if (!_wireCount) panic("IOGMD: not wired for the IODMACommand"); @@ -1367,6 +1390,10 @@ IOReturn IOGeneralMemoryDescriptor::dmaCommandOperation(DMACommandOps op, void * // Compute the starting address of this segment IOPhysicalAddress pageAddr = pageList[pageInd].phys_addr; + if (!pageAddr) { + panic("!pageList phys_addr"); + } + address = ptoa_64(pageAddr) + offset; // length is currently set to the length of the remainider of the iopl. @@ -1399,28 +1426,70 @@ IOReturn IOGeneralMemoryDescriptor::dmaCommandOperation(DMACommandOps op, void * } addr64_t -IOGeneralMemoryDescriptor::getPhysicalSegment64(IOByteCount offset, IOByteCount *lengthOfSegment) +IOGeneralMemoryDescriptor::getPhysicalSegment(IOByteCount offset, IOByteCount *lengthOfSegment, IOOptionBits options) { - IOReturn ret; - IOByteCount length = 0; - addr64_t address = 0; + IOReturn ret; + addr64_t address = 0; + IOByteCount length = 0; + IOMapper * mapper = gIOSystemMapper; + IOOptionBits type = _flags & kIOMemoryTypeMask; + + if (lengthOfSegment) + *lengthOfSegment = 0; + + if (offset >= _length) + return 0; - if (gIOSystemMapper && (kIOMemoryTypePhysical == (_flags & kIOMemoryTypeMask))) - return (super::getPhysicalSegment64(offset, lengthOfSegment)); + // IOMemoryDescriptor::doMap() cannot use getPhysicalSegment() to obtain the page offset, since it must + // support the unwired memory case in IOGeneralMemoryDescriptor, and hibernate_write_image() cannot use + // map()->getVirtualAddress() to obtain the kernel pointer, since it must prevent the memory allocation + // due to IOMemoryMap, so _kIOMemorySourceSegment is a necessary evil until all of this gets cleaned up - if (offset < _length) // (within bounds?) + if ((options & _kIOMemorySourceSegment) && (kIOMemoryTypeUPL != type)) + { + unsigned rangesIndex = 0; + Ranges vec = _ranges; + user_addr_t addr; + + // Find starting address within the vector of ranges + for (;;) { + getAddrLenForInd(addr, length, type, vec, rangesIndex); + if (offset < length) + break; + offset -= length; // (make offset relative) + rangesIndex++; + } + + // Now that we have the starting range, + // lets find the last contiguous range + addr += offset; + length -= offset; + + for ( ++rangesIndex; rangesIndex < _rangesCount; rangesIndex++ ) { + user_addr_t newAddr; + IOPhysicalLength newLen; + + getAddrLenForInd(newAddr, newLen, type, vec, rangesIndex); + if (addr + length != newAddr) + break; + length += newLen; + } + if (addr) + address = (IOPhysicalAddress) addr; // Truncate address to 32bit + } + else { IOMDDMAWalkSegmentState _state; IOMDDMAWalkSegmentArgs * state = (IOMDDMAWalkSegmentArgs *) &_state; state->fOffset = offset; state->fLength = _length - offset; - state->fMapped = false; + state->fMapped = (0 == (options & kIOMemoryMapperNone)); ret = dmaCommandOperation(kIOMDFirstSegment, _state, sizeof(_state)); if ((kIOReturnSuccess != ret) && (kIOReturnOverrun != ret)) - DEBG("getPhysicalSegment64 dmaCommandOperation(%lx), %p, offset %qx, addr %qx, len %qx\n", + DEBG("getPhysicalSegment dmaCommandOperation(%lx), %p, offset %qx, addr %qx, len %qx\n", ret, this, state->fOffset, state->fIOVMAddr, state->fLength); if (kIOReturnSuccess == ret) @@ -1428,60 +1497,88 @@ IOGeneralMemoryDescriptor::getPhysicalSegment64(IOByteCount offset, IOByteCount address = state->fIOVMAddr; length = state->fLength; } - if (!address) - length = 0; + + // dmaCommandOperation() does not distinguish between "mapped" and "unmapped" physical memory, even + // with fMapped set correctly, so we must handle the transformation here until this gets cleaned up + + if (mapper && ((kIOMemoryTypePhysical == type) || (kIOMemoryTypePhysical64 == type))) + { + if ((options & kIOMemoryMapperNone) && !(_flags & kIOMemoryMapperNone)) + { + addr64_t origAddr = address; + IOByteCount origLen = length; + + address = mapper->mapAddr(origAddr); + length = page_size - (address & (page_size - 1)); + while ((length < origLen) + && ((address + length) == mapper->mapAddr(origAddr + length))) + length += page_size; + if (length > origLen) + length = origLen; + } +#ifdef __LP64__ + else if (!(options & kIOMemoryMapperNone) && (_flags & kIOMemoryMapperNone)) + { + panic("getPhysicalSegment not mapped for I/O"); + } +#endif /* __LP64__ */ + } } + if (!address) + length = 0; + if (lengthOfSegment) *lengthOfSegment = length; return (address); } -IOPhysicalAddress -IOGeneralMemoryDescriptor::getPhysicalSegment(IOByteCount offset, IOByteCount *lengthOfSegment) +#ifndef __LP64__ +addr64_t +IOMemoryDescriptor::getPhysicalSegment(IOByteCount offset, IOByteCount *lengthOfSegment, IOOptionBits options) { - IOReturn ret; - IOByteCount length = 0; - addr64_t address = 0; - -// assert(offset <= _length); + addr64_t address = 0; - if (offset < _length) // (within bounds?) + if (options & _kIOMemorySourceSegment) { - IOMDDMAWalkSegmentState _state; - IOMDDMAWalkSegmentArgs * state = (IOMDDMAWalkSegmentArgs *) &_state; + address = getSourceSegment(offset, lengthOfSegment); + } + else if (options & kIOMemoryMapperNone) + { + address = getPhysicalSegment64(offset, lengthOfSegment); + } + else + { + address = getPhysicalSegment(offset, lengthOfSegment); + } - state->fOffset = offset; - state->fLength = _length - offset; - state->fMapped = true; + return (address); +} - ret = dmaCommandOperation( - kIOMDFirstSegment, _state, sizeof(_state)); +addr64_t +IOGeneralMemoryDescriptor::getPhysicalSegment64(IOByteCount offset, IOByteCount *lengthOfSegment) +{ + return (getPhysicalSegment(offset, lengthOfSegment, kIOMemoryMapperNone)); +} - if ((kIOReturnSuccess != ret) && (kIOReturnOverrun != ret)) - DEBG("getPhysicalSegment dmaCommandOperation(%lx), %p, offset %qx, addr %qx, len %qx\n", - ret, this, state->fOffset, - state->fIOVMAddr, state->fLength); - if (kIOReturnSuccess == ret) - { - address = state->fIOVMAddr; - length = state->fLength; - } +IOPhysicalAddress +IOGeneralMemoryDescriptor::getPhysicalSegment(IOByteCount offset, IOByteCount *lengthOfSegment) +{ + addr64_t address = 0; + IOByteCount length = 0; - if (!address) - length = 0; - } + address = getPhysicalSegment(offset, lengthOfSegment, 0); + + if (lengthOfSegment) + length = *lengthOfSegment; if ((address + length) > 0x100000000ULL) { panic("getPhysicalSegment() out of 32b range 0x%qx, len 0x%lx, class %s", - address, length, (getMetaClass())->getClassName()); + address, (long) length, (getMetaClass())->getClassName()); } - if (lengthOfSegment) - *lengthOfSegment = length; - return ((IOPhysicalAddress) address); } @@ -1522,70 +1619,28 @@ IOMemoryDescriptor::getPhysicalSegment64(IOByteCount offset, IOByteCount *length } IOPhysicalAddress -IOGeneralMemoryDescriptor::getSourceSegment(IOByteCount offset, IOByteCount *lengthOfSegment) +IOMemoryDescriptor::getPhysicalSegment(IOByteCount offset, IOByteCount *lengthOfSegment) { - IOPhysicalAddress address = 0; - IOPhysicalLength length = 0; - IOOptionBits type = _flags & kIOMemoryTypeMask; - - assert(offset <= _length); - - if ( type == kIOMemoryTypeUPL) - return super::getSourceSegment( offset, lengthOfSegment ); - else if ( offset < _length ) // (within bounds?) - { - unsigned rangesIndex = 0; - Ranges vec = _ranges; - user_addr_t addr; - - // Find starting address within the vector of ranges - for (;;) { - getAddrLenForInd(addr, length, type, vec, rangesIndex); - if (offset < length) - break; - offset -= length; // (make offset relative) - rangesIndex++; - } - - // Now that we have the starting range, - // lets find the last contiguous range - addr += offset; - length -= offset; - - for ( ++rangesIndex; rangesIndex < _rangesCount; rangesIndex++ ) { - user_addr_t newAddr; - IOPhysicalLength newLen; - - getAddrLenForInd(newAddr, newLen, type, vec, rangesIndex); - if (addr + length != newAddr) - break; - length += newLen; - } - if (addr) - address = (IOPhysicalAddress) addr; // Truncate address to 32bit - else - length = 0; - } - - if ( lengthOfSegment ) *lengthOfSegment = length; - - return address; + return ((IOPhysicalAddress) getPhysicalSegment(offset, lengthOfSegment, 0)); } -/* DEPRECATED */ /* USE INSTEAD: map(), readBytes(), writeBytes() */ -/* DEPRECATED */ void * IOGeneralMemoryDescriptor::getVirtualSegment(IOByteCount offset, -/* DEPRECATED */ IOByteCount * lengthOfSegment) -/* DEPRECATED */ { - if (_task == kernel_task) - return (void *) getSourceSegment(offset, lengthOfSegment); - else - panic("IOGMD::getVirtualSegment deprecated"); - - return 0; -/* DEPRECATED */ } -/* DEPRECATED */ /* USE INSTEAD: map(), readBytes(), writeBytes() */ +IOPhysicalAddress +IOGeneralMemoryDescriptor::getSourceSegment(IOByteCount offset, IOByteCount *lengthOfSegment) +{ + return ((IOPhysicalAddress) getPhysicalSegment(offset, lengthOfSegment, _kIOMemorySourceSegment)); +} +void * IOGeneralMemoryDescriptor::getVirtualSegment(IOByteCount offset, + IOByteCount * lengthOfSegment) +{ + if (_task == kernel_task) + return (void *) getSourceSegment(offset, lengthOfSegment); + else + panic("IOGMD::getVirtualSegment deprecated"); + return 0; +} +#endif /* !__LP64__ */ IOReturn IOMemoryDescriptor::dmaCommandOperation(DMACommandOps op, void *vData, UInt dataSize) const @@ -1597,7 +1652,7 @@ IOMemoryDescriptor::dmaCommandOperation(DMACommandOps op, void *vData, UInt data IOMDDMACharacteristics *data = (IOMDDMACharacteristics *) vData; data->fLength = getLength(); data->fSGCount = 0; - data->fDirection = _direction; + data->fDirection = getDirection(); if (IOMapper::gSystem) data->fIsMapped = true; data->fIsPrepared = true; // Assume prepared - fails safe @@ -1614,7 +1669,7 @@ IOMemoryDescriptor::dmaCommandOperation(DMACommandOps op, void *vData, UInt data if (data->fMapped && IOMapper::gSystem) data->fIOVMAddr = ncmd->getPhysicalSegment(offset, &length); else - data->fIOVMAddr = ncmd->getPhysicalSegment64(offset, &length); + data->fIOVMAddr = ncmd->getPhysicalSegment(offset, &length, kIOMemoryMapperNone); data->fLength = length; } else @@ -1623,6 +1678,113 @@ IOMemoryDescriptor::dmaCommandOperation(DMACommandOps op, void *vData, UInt data return kIOReturnSuccess; } +static IOReturn +purgeableControlBits(IOOptionBits newState, vm_purgable_t * control, int * state) +{ + IOReturn err = kIOReturnSuccess; + + *control = VM_PURGABLE_SET_STATE; + switch (newState) + { + case kIOMemoryPurgeableKeepCurrent: + *control = VM_PURGABLE_GET_STATE; + break; + + case kIOMemoryPurgeableNonVolatile: + *state = VM_PURGABLE_NONVOLATILE; + break; + case kIOMemoryPurgeableVolatile: + *state = VM_PURGABLE_VOLATILE; + break; + case kIOMemoryPurgeableEmpty: + *state = VM_PURGABLE_EMPTY; + break; + default: + err = kIOReturnBadArgument; + break; + } + return (err); +} + +static IOReturn +purgeableStateBits(int * state) +{ + IOReturn err = kIOReturnSuccess; + + switch (*state) + { + case VM_PURGABLE_NONVOLATILE: + *state = kIOMemoryPurgeableNonVolatile; + break; + case VM_PURGABLE_VOLATILE: + *state = kIOMemoryPurgeableVolatile; + break; + case VM_PURGABLE_EMPTY: + *state = kIOMemoryPurgeableEmpty; + break; + default: + *state = kIOMemoryPurgeableNonVolatile; + err = kIOReturnNotReady; + break; + } + return (err); +} + +IOReturn +IOGeneralMemoryDescriptor::setPurgeable( IOOptionBits newState, + IOOptionBits * oldState ) +{ + IOReturn err = kIOReturnSuccess; + vm_purgable_t control; + int state; + + if (_memEntry) + { + err = super::setPurgeable(newState, oldState); + } + else + { + if (kIOMemoryThreadSafe & _flags) + LOCK; + do + { + // Find the appropriate vm_map for the given task + vm_map_t curMap; + if (_task == kernel_task && (kIOMemoryBufferPageable & _flags)) + { + err = kIOReturnNotReady; + break; + } + else + curMap = get_task_map(_task); + + // can only do one range + Ranges vec = _ranges; + IOOptionBits type = _flags & kIOMemoryTypeMask; + user_addr_t addr; + IOByteCount len; + getAddrLenForInd(addr, len, type, vec, 0); + + err = purgeableControlBits(newState, &control, &state); + if (kIOReturnSuccess != err) + break; + err = mach_vm_purgable_control(curMap, addr, control, &state); + if (oldState) + { + if (kIOReturnSuccess == err) + { + err = purgeableStateBits(&state); + *oldState = state; + } + } + } + while (false); + if (kIOMemoryThreadSafe & _flags) + UNLOCK; + } + return (err); +} + IOReturn IOMemoryDescriptor::setPurgeable( IOOptionBits newState, IOOptionBits * oldState ) { @@ -1630,6 +1792,9 @@ IOReturn IOMemoryDescriptor::setPurgeable( IOOptionBits newState, vm_purgable_t control; int state; + if (kIOMemoryThreadSafe & _flags) + LOCK; + do { if (!_memEntry) @@ -1637,59 +1802,24 @@ IOReturn IOMemoryDescriptor::setPurgeable( IOOptionBits newState, err = kIOReturnNotReady; break; } - - control = VM_PURGABLE_SET_STATE; - switch (newState) - { - case kIOMemoryPurgeableKeepCurrent: - control = VM_PURGABLE_GET_STATE; - break; - - case kIOMemoryPurgeableNonVolatile: - state = VM_PURGABLE_NONVOLATILE; - break; - case kIOMemoryPurgeableVolatile: - state = VM_PURGABLE_VOLATILE; - break; - case kIOMemoryPurgeableEmpty: - state = VM_PURGABLE_EMPTY; - break; - default: - err = kIOReturnBadArgument; - break; - } - - if (kIOReturnSuccess != err) - break; - + err = purgeableControlBits(newState, &control, &state); + if (kIOReturnSuccess != err) + break; err = mach_memory_entry_purgable_control((ipc_port_t) _memEntry, control, &state); - - if (oldState) - { - if (kIOReturnSuccess == err) - { - switch (state) - { - case VM_PURGABLE_NONVOLATILE: - state = kIOMemoryPurgeableNonVolatile; - break; - case VM_PURGABLE_VOLATILE: - state = kIOMemoryPurgeableVolatile; - break; - case VM_PURGABLE_EMPTY: - state = kIOMemoryPurgeableEmpty; - break; - default: - state = kIOMemoryPurgeableNonVolatile; - err = kIOReturnNotReady; - break; - } - *oldState = state; - } - } + if (oldState) + { + if (kIOReturnSuccess == err) + { + err = purgeableStateBits(&state); + *oldState = state; + } + } } while (false); + if (kIOMemoryThreadSafe & _flags) + UNLOCK; + return (err); } @@ -1715,6 +1845,9 @@ IOReturn IOMemoryDescriptor::performOperation( IOOptionBits options, if (!func) return (kIOReturnUnsupported); + if (kIOMemoryThreadSafe & _flags) + LOCK; + remaining = length = min(length, getLength() - offset); while (remaining) // (process another target segment?) @@ -1722,7 +1855,7 @@ IOReturn IOMemoryDescriptor::performOperation( IOOptionBits options, addr64_t dstAddr64; IOByteCount dstLen; - dstAddr64 = getPhysicalSegment64(offset, &dstLen); + dstAddr64 = getPhysicalSegment(offset, &dstLen, kIOMemoryMapperNone); if (!dstAddr64) break; @@ -1736,6 +1869,9 @@ IOReturn IOMemoryDescriptor::performOperation( IOOptionBits options, remaining -= dstLen; } + if (kIOMemoryThreadSafe & _flags) + UNLOCK; + return (remaining ? kIOReturnUnderrun : kIOReturnSuccess); } @@ -1750,7 +1886,7 @@ extern vm_offset_t first_avail; static kern_return_t io_get_kernel_static_upl( vm_map_t /* map */, - vm_address_t offset, + uintptr_t offset, vm_size_t *upl_size, upl_t *upl, upl_page_info_array_t page_list, @@ -1779,7 +1915,7 @@ io_get_kernel_static_upl( page_list[page].precious = 0; page_list[page].device = 0; if (phys > highestPage) - highestPage = page; + highestPage = phys; } *highest_page = highestPage; @@ -1813,7 +1949,7 @@ IOReturn IOGeneralMemoryDescriptor::wireVirtual(IODirection forDirection) dataP = 0; // May no longer be valid so lets not get tempted. if (forDirection == kIODirectionNone) - forDirection = _direction; + forDirection = getDirection(); int uplFlags; // This Mem Desc's default flags for upl creation switch (kIODirectionOutIn & forDirection) @@ -1856,7 +1992,7 @@ IOReturn IOGeneralMemoryDescriptor::wireVirtual(IODirection forDirection) // Get the startPage address and length of vec[range] getAddrLenForInd(startPage, numBytes, type, vec, range); - iopl.fPageOffset = (short) startPage & PAGE_MASK; + iopl.fPageOffset = startPage & PAGE_MASK; numBytes += iopl.fPageOffset; startPage = trunc_page_64(startPage); @@ -1883,7 +2019,7 @@ IOReturn IOGeneralMemoryDescriptor::wireVirtual(IODirection forDirection) int ioplFlags = uplFlags; upl_page_list_ptr_t baseInfo = &pageInfo[pageIndex]; - vm_size_t ioplSize = round_page_32(numBytes); + vm_size_t ioplSize = round_page(numBytes); unsigned int numPageInfo = atop_32(ioplSize); if (theMap == kernel_map && kernelStart < io_kernel_static_end) { @@ -1908,7 +2044,7 @@ IOReturn IOGeneralMemoryDescriptor::wireVirtual(IODirection forDirection) assert(theMap); error = vm_map_create_upl(theMap, startPage, - &ioplSize, + (upl_size_t*)&ioplSize, &iopl.fIOPL, baseInfo, &numPageInfo, @@ -2074,6 +2210,10 @@ IOReturn IOGeneralMemoryDescriptor::complete(IODirection /* forDirection */) ioPLBlock *ioplList = getIOPLList(dataP); UInt count = getNumIOPL(_memoryEntries, dataP); +#if IOMD_DEBUG_DMAACTIVE + if (__iomd_reservedA) panic("complete() while dma active"); +#endif /* IOMD_DEBUG_DMAACTIVE */ + if (dataP->fMapper && _pages && ioplList[0].fMappedBase) dataP->fMapper->iovmFree(ioplList[0].fMappedBase, _pages); @@ -2086,6 +2226,8 @@ IOReturn IOGeneralMemoryDescriptor::complete(IODirection /* forDirection */) } } (void) _memoryEntries->initWithBytes(dataP, sizeof(ioGMDData)); // == setLength() + + dataP->fPreparationID = kIOPreparationIDUnprepared; } } @@ -2103,13 +2245,15 @@ IOReturn IOGeneralMemoryDescriptor::doMap( IOByteCount __length ) { +#ifndef __LP64__ if (!(kIOMap64Bit & options)) panic("IOGeneralMemoryDescriptor::doMap !64bit"); +#endif /* !__LP64__ */ - _IOMemoryMap * mapping = (_IOMemoryMap *) *__address; + IOMemoryMap * mapping = (IOMemoryMap *) *__address; mach_vm_size_t offset = mapping->fOffset + __offset; mach_vm_size_t length = mapping->fLength; - kern_return_t kr; + kern_return_t kr = kIOReturnVMError; ipc_port_t sharedMem = (ipc_port_t) _memEntry; IOOptionBits type = _flags & kIOMemoryTypeMask; @@ -2151,26 +2295,43 @@ IOReturn IOGeneralMemoryDescriptor::doMap( prot, &sharedMem, NULL ); - if( (KERN_SUCCESS == kr) && (actualSize != round_page_32(size))) { + if( (KERN_SUCCESS == kr) && (actualSize != round_page(size))) + { + // map will cross vm objects #if IOASSERT - IOLog("mach_make_memory_entry_64 (%08llx) size (%08lx:%08x)\n", - range0Addr, (UInt32) actualSize, size); + IOLog("mach_make_memory_entry_64 (%08llx) size (%08llx:%08llx)\n", + range0Addr, (UInt64)actualSize, (UInt64)size); #endif kr = kIOReturnVMError; ipc_port_release_send( sharedMem ); - } - - if( KERN_SUCCESS != kr) sharedMem = MACH_PORT_NULL; - } else do { // _task == 0, must be physical + mach_vm_address_t address; + mach_vm_size_t pageOffset = (range0Addr & PAGE_MASK); + + address = trunc_page_64(mapping->fAddress); + if ((options & kIOMapAnywhere) || ((mapping->fAddress - address) == pageOffset)) + { + kr = IOMemoryDescriptorMapCopy(mapping->fAddressMap, + get_task_map(_task), range0Addr, + options, + offset, &address, round_page_64(length + pageOffset)); + if (kr == KERN_SUCCESS) + mapping->fAddress = address + pageOffset; + else + mapping->fAddress = NULL; + } + } + } + else do + { // _task == 0, must be physical memory_object_t pager; unsigned int flags = 0; addr64_t pa; IOPhysicalLength segLen; - pa = getPhysicalSegment64( offset, &segLen ); + pa = getPhysicalSegment( offset, &segLen, kIOMemoryMapperNone ); if( !reserved) { reserved = IONew( ExpansionData, 1 ); @@ -2221,7 +2382,7 @@ IOReturn IOGeneralMemoryDescriptor::doMap( flags |= reserved->pagerContig ? DEVICE_PAGER_CONTIGUOUS : 0; - pager = device_pager_setup( (memory_object_t) 0, (int) reserved, + pager = device_pager_setup( (memory_object_t) 0, (uintptr_t) reserved, size, flags); assert( pager ); @@ -2251,7 +2412,7 @@ IOReturn IOGeneralMemoryDescriptor::doMap( IOReturn result; if (0 == sharedMem) - result = kIOReturnVMError; + result = kr; else result = super::doMap( __addressMap, __address, options, __offset, __length ); @@ -2269,21 +2430,27 @@ IOReturn IOGeneralMemoryDescriptor::doUnmap( /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ -OSDefineMetaClassAndAbstractStructors( IOMemoryMap, OSObject ) - -/* inline function implementation */ -IOPhysicalAddress IOMemoryMap::getPhysicalAddress() - { return( getPhysicalSegment( 0, 0 )); } +#undef super +#define super OSObject +OSDefineMetaClassAndStructors( IOMemoryMap, OSObject ) -#undef super -#define super IOMemoryMap +OSMetaClassDefineReservedUnused(IOMemoryMap, 0); +OSMetaClassDefineReservedUnused(IOMemoryMap, 1); +OSMetaClassDefineReservedUnused(IOMemoryMap, 2); +OSMetaClassDefineReservedUnused(IOMemoryMap, 3); +OSMetaClassDefineReservedUnused(IOMemoryMap, 4); +OSMetaClassDefineReservedUnused(IOMemoryMap, 5); +OSMetaClassDefineReservedUnused(IOMemoryMap, 6); +OSMetaClassDefineReservedUnused(IOMemoryMap, 7); -OSDefineMetaClassAndStructors(_IOMemoryMap, IOMemoryMap) +/* ex-inline function implementation */ +IOPhysicalAddress IOMemoryMap::getPhysicalAddress() + { return( getPhysicalSegment( 0, 0 )); } /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ -bool _IOMemoryMap::init( +bool IOMemoryMap::init( task_t intoTask, mach_vm_address_t toAddress, IOOptionBits _options, @@ -2310,7 +2477,7 @@ bool _IOMemoryMap::init( return (true); } -bool _IOMemoryMap::setMemoryDescriptor(IOMemoryDescriptor * _memory, mach_vm_size_t _offset) +bool IOMemoryMap::setMemoryDescriptor(IOMemoryDescriptor * _memory, mach_vm_size_t _offset) { if (!_memory) return(false); @@ -2337,6 +2504,8 @@ bool _IOMemoryMap::setMemoryDescriptor(IOMemoryDescriptor * _memory, mach_vm_siz struct IOMemoryDescriptorMapAllocRef { ipc_port_t sharedMem; + vm_map_t src_map; + mach_vm_offset_t src_address; mach_vm_address_t mapped; mach_vm_size_t size; mach_vm_size_t sourceOffset; @@ -2406,8 +2575,29 @@ static kern_return_t IOMemoryDescriptorMapAlloc(vm_map_t map, void * _ref) ref->mapped = 0; continue; } - } + else if (ref->src_map) + { + vm_prot_t cur_prot, max_prot; + err = mach_vm_remap(map, &ref->mapped, ref->size, PAGE_MASK, + (ref->options & kIOMapAnywhere) ? TRUE : FALSE, + ref->src_map, ref->src_address, + FALSE /* copy */, + &cur_prot, + &max_prot, + VM_INHERIT_NONE); + if (KERN_SUCCESS == err) + { + if ((!(VM_PROT_READ & cur_prot)) + || (!(kIOMapReadOnly & ref->options) && !(VM_PROT_WRITE & cur_prot))) + { + mach_vm_deallocate(map, ref->mapped, ref->size); + err = KERN_PROTECTION_FAILURE; + } + } + if (KERN_SUCCESS != err) + ref->mapped = 0; + } else { err = mach_vm_allocate( map, &ref->mapped, ref->size, @@ -2435,11 +2625,12 @@ IOMemoryDescriptorMapMemEntry(vm_map_t map, ipc_port_t entry, IOOptionBits optio IOReturn err; IOMemoryDescriptorMapAllocRef ref; - ref.sharedMem = entry; + ref.sharedMem = entry; + ref.src_map = NULL; + ref.sharedMem = entry; ref.sourceOffset = trunc_page_64(offset); - ref.options = options; - - ref.size = length; + ref.options = options; + ref.size = length; if (options & kIOMapAnywhere) // vm_map looks for addresses above here, even when VM_FLAGS_ANYWHERE @@ -2456,6 +2647,38 @@ IOMemoryDescriptorMapMemEntry(vm_map_t map, ipc_port_t entry, IOOptionBits optio return (err); } +kern_return_t +IOMemoryDescriptorMapCopy(vm_map_t map, + vm_map_t src_map, + mach_vm_offset_t src_address, + IOOptionBits options, + mach_vm_size_t offset, + mach_vm_address_t * address, mach_vm_size_t length) +{ + IOReturn err; + IOMemoryDescriptorMapAllocRef ref; + + ref.sharedMem = NULL; + ref.src_map = src_map; + ref.src_address = src_address; + ref.sourceOffset = trunc_page_64(offset); + ref.options = options; + ref.size = length; + + if (options & kIOMapAnywhere) + // vm_map looks for addresses above here, even when VM_FLAGS_ANYWHERE + ref.mapped = 0; + else + ref.mapped = *address; + + if (map == kernel_map) + err = IOIteratePageableMaps(ref.size, &IOMemoryDescriptorMapAlloc, &ref); + else + err = IOMemoryDescriptorMapAlloc(map, &ref); + + *address = ref.mapped; + return (err); +} IOReturn IOMemoryDescriptor::doMap( vm_map_t __addressMap, @@ -2464,9 +2687,11 @@ IOReturn IOMemoryDescriptor::doMap( IOByteCount __offset, IOByteCount __length ) { +#ifndef __LP64__ if (!(kIOMap64Bit & options)) panic("IOMemoryDescriptor::doMap !64bit"); +#endif /* !__LP64__ */ - _IOMemoryMap * mapping = (_IOMemoryMap *) *__address; + IOMemoryMap * mapping = (IOMemoryMap *) *__address; mach_vm_size_t offset = mapping->fOffset + __offset; mach_vm_size_t length = mapping->fLength; @@ -2474,11 +2699,12 @@ IOReturn IOMemoryDescriptor::doMap( memory_object_t pager; mach_vm_size_t pageOffset; IOPhysicalAddress sourceAddr; + unsigned int lock_count; do { - sourceAddr = getSourceSegment( offset, NULL ); - pageOffset = sourceAddr - trunc_page_32( sourceAddr ); + sourceAddr = getPhysicalSegment( offset, NULL, _kIOMemorySourceSegment ); + pageOffset = sourceAddr - trunc_page( sourceAddr ); if( reserved) pager = (memory_object_t) reserved->devicePager; @@ -2497,7 +2723,7 @@ IOReturn IOMemoryDescriptor::doMap( continue; } - size = mapping->fLength + pageOffset; + size = round_page(mapping->fLength + pageOffset); flags = UPL_COPYOUT_FROM | UPL_SET_INTERNAL | UPL_SET_LITE | UPL_SET_IO_WIRE | UPL_BLOCK_ACCESS; @@ -2506,7 +2732,18 @@ IOReturn IOMemoryDescriptor::doMap( &flags)) redirUPL2 = NULL; + for (lock_count = 0; + IORecursiveLockHaveLock(gIOMemoryLock); + lock_count++) { + UNLOCK; + } err = upl_transpose(redirUPL2, mapping->fRedirUPL); + for (; + lock_count; + lock_count--) { + LOCK; + } + if (kIOReturnSuccess != err) { IOLog("upl_transpose(%x)\n", err); @@ -2555,7 +2792,7 @@ IOReturn IOMemoryDescriptor::doMap( doUnmap( mapping->fAddressMap, (IOVirtualAddress) mapping, 0 ); } -#ifdef DEBUG +#if DEBUG if (kIOLogMapping & gIOKitDebug) IOLog("mapping(%x) desc %p @ %lx, map %p, address %qx, offset %qx, length %qx\n", err, this, sourceAddr, mapping, address, offset, length); @@ -2594,7 +2831,7 @@ IOReturn IOMemoryDescriptor::handleFault( { if( kIOMemoryRedirected & _flags) { -#ifdef DEBUG +#if DEBUG IOLog("sleep mem redirect %p, %qx\n", this, sourceOffset); #endif do { @@ -2605,7 +2842,7 @@ IOReturn IOMemoryDescriptor::handleFault( return( kIOReturnSuccess ); } - physAddr = getPhysicalSegment64( sourceOffset, &segLen ); + physAddr = getPhysicalSegment( sourceOffset, &segLen, kIOMemoryMapperNone ); assert( physAddr ); pageOffset = physAddr - trunc_page_64( physAddr ); pagerOffset = sourceOffset; @@ -2620,16 +2857,16 @@ IOReturn IOMemoryDescriptor::handleFault( // in the middle of the loop only map whole pages if( segLen >= bytes) segLen = bytes; - else if( segLen != trunc_page_32( segLen)) + else if( segLen != trunc_page( segLen)) err = kIOReturnVMError; if( physAddr != trunc_page_64( physAddr)) err = kIOReturnBadArgument; if (kIOReturnSuccess != err) break; -#ifdef DEBUG +#if DEBUG if( kIOLogMapping & gIOKitDebug) - IOLog("_IOMemoryMap::map(%p) 0x%qx->0x%qx:0x%qx\n", + IOLog("IOMemoryMap::map(%p) 0x%qx->0x%qx:0x%qx\n", addressMap, address + pageOffset, physAddr + pageOffset, segLen - pageOffset); #endif @@ -2640,9 +2877,9 @@ IOReturn IOMemoryDescriptor::handleFault( IOPhysicalLength allLen; addr64_t allPhys; - allPhys = getPhysicalSegment64( 0, &allLen ); + allPhys = getPhysicalSegment( 0, &allLen, kIOMemoryMapperNone ); assert( allPhys ); - err = device_pager_populate_object( pager, 0, atop_64(allPhys), round_page_32(allLen) ); + err = device_pager_populate_object( pager, 0, atop_64(allPhys), round_page(allLen) ); } else { @@ -2685,7 +2922,7 @@ IOReturn IOMemoryDescriptor::handleFault( pageOffset = 0; } - while (bytes && (physAddr = getPhysicalSegment64( sourceOffset, &segLen ))); + while (bytes && (physAddr = getPhysicalSegment( sourceOffset, &segLen, kIOMemoryMapperNone ))); if (bytes) err = kIOReturnBadArgument; @@ -2709,15 +2946,15 @@ IOReturn IOMemoryDescriptor::doUnmap( } else { - addressMap = ((_IOMemoryMap *) __address)->fAddressMap; - address = ((_IOMemoryMap *) __address)->fAddress; - length = ((_IOMemoryMap *) __address)->fLength; + addressMap = ((IOMemoryMap *) __address)->fAddressMap; + address = ((IOMemoryMap *) __address)->fAddress; + length = ((IOMemoryMap *) __address)->fLength; } if( _memEntry && (addressMap == kernel_map) && (kIOMemoryBufferPageable & _flags)) addressMap = IOPageableMapForAddress( address ); -#ifdef DEBUG +#if DEBUG if( kIOLogMapping & gIOKitDebug) IOLog("IOMemoryDescriptor::doUnmap map %p, 0x%qx:0x%qx\n", addressMap, address, length ); @@ -2731,7 +2968,7 @@ IOReturn IOMemoryDescriptor::doUnmap( IOReturn IOMemoryDescriptor::redirect( task_t safeTask, bool doRedirect ) { IOReturn err = kIOReturnSuccess; - _IOMemoryMap * mapping = 0; + IOMemoryMap * mapping = 0; OSIterator * iter; LOCK; @@ -2743,7 +2980,7 @@ IOReturn IOMemoryDescriptor::redirect( task_t safeTask, bool doRedirect ) do { if( (iter = OSCollectionIterator::withCollection( _mappings))) { - while( (mapping = (_IOMemoryMap *) iter->getNextObject())) + while( (mapping = (IOMemoryMap *) iter->getNextObject())) mapping->redirect( safeTask, doRedirect ); iter->release(); @@ -2757,27 +2994,24 @@ IOReturn IOMemoryDescriptor::redirect( task_t safeTask, bool doRedirect ) UNLOCK; +#ifndef __LP64__ // temporary binary compatibility IOSubMemoryDescriptor * subMem; if( (subMem = OSDynamicCast( IOSubMemoryDescriptor, this))) err = subMem->redirect( safeTask, doRedirect ); else err = kIOReturnSuccess; +#endif /* !__LP64__ */ return( err ); } -IOReturn IOSubMemoryDescriptor::redirect( task_t safeTask, bool doRedirect ) -{ - return( _parent->redirect( safeTask, doRedirect )); -} - -IOReturn _IOMemoryMap::redirect( task_t safeTask, bool doRedirect ) +IOReturn IOMemoryMap::redirect( task_t safeTask, bool doRedirect ) { IOReturn err = kIOReturnSuccess; if( fSuperMap) { -// err = ((_IOMemoryMap *)superMap)->redirect( safeTask, doRedirect ); +// err = ((IOMemoryMap *)superMap)->redirect( safeTask, doRedirect ); } else { LOCK; @@ -2793,18 +3027,8 @@ IOReturn _IOMemoryMap::redirect( task_t safeTask, bool doRedirect ) && (0 == (fOptions & kIOMapStatic))) { IOUnmapPages( fAddressMap, fAddress, fLength ); - if(!doRedirect && safeTask - && (((fMemory->_flags & kIOMemoryTypeMask) == kIOMemoryTypePhysical) - || ((fMemory->_flags & kIOMemoryTypeMask) == kIOMemoryTypePhysical64))) - { - IOVirtualAddress iova = (IOVirtualAddress) this; - err = mach_vm_deallocate( fAddressMap, fAddress, fLength ); - err = fMemory->doMap( fAddressMap, &iova, - (fOptions & ~kIOMapAnywhere) | kIOMap64Bit/*| kIOMapReserve*/, - 0, 0 ); - } else - err = kIOReturnSuccess; -#ifdef DEBUG + err = kIOReturnSuccess; +#if DEBUG IOLog("IOMemoryMap::redirect(%d, %p) 0x%qx:0x%qx from %p\n", doRedirect, this, fAddress, fLength, fAddressMap); #endif } @@ -2828,7 +3052,7 @@ IOReturn _IOMemoryMap::redirect( task_t safeTask, bool doRedirect ) return( err ); } -IOReturn _IOMemoryMap::unmap( void ) +IOReturn IOMemoryMap::unmap( void ) { IOReturn err; @@ -2855,9 +3079,11 @@ IOReturn _IOMemoryMap::unmap( void ) return( err ); } -void _IOMemoryMap::taskDied( void ) +void IOMemoryMap::taskDied( void ) { LOCK; + if (fUserClientUnmap) + unmap(); if( fAddressMap) { vm_map_deallocate(fAddressMap); fAddressMap = 0; @@ -2867,18 +3093,24 @@ void _IOMemoryMap::taskDied( void ) UNLOCK; } +IOReturn IOMemoryMap::userClientUnmap( void ) +{ + fUserClientUnmap = true; + return (kIOReturnSuccess); +} + // Overload the release mechanism. All mappings must be a member // of a memory descriptors _mappings set. This means that we // always have 2 references on a mapping. When either of these mappings // are released we need to free ourselves. -void _IOMemoryMap::taggedRelease(const void *tag) const +void IOMemoryMap::taggedRelease(const void *tag) const { LOCK; super::taggedRelease(tag, 2); UNLOCK; } -void _IOMemoryMap::free() +void IOMemoryMap::free() { unmap(); @@ -2908,35 +3140,41 @@ void _IOMemoryMap::free() super::free(); } -IOByteCount _IOMemoryMap::getLength() +IOByteCount IOMemoryMap::getLength() { return( fLength ); } -IOVirtualAddress _IOMemoryMap::getVirtualAddress() +IOVirtualAddress IOMemoryMap::getVirtualAddress() { +#ifndef __LP64__ if (fSuperMap) fSuperMap->getVirtualAddress(); - else if (fAddressMap && vm_map_is_64bit(fAddressMap)) + else if (fAddressMap + && vm_map_is_64bit(fAddressMap) + && (sizeof(IOVirtualAddress) < 8)) { OSReportWithBacktrace("IOMemoryMap::getVirtualAddress(0x%qx) called on 64b map; use ::getAddress()", fAddress); } +#endif /* !__LP64__ */ return (fAddress); } -mach_vm_address_t _IOMemoryMap::getAddress() +#ifndef __LP64__ +mach_vm_address_t IOMemoryMap::getAddress() { return( fAddress); } -mach_vm_size_t _IOMemoryMap::getSize() +mach_vm_size_t IOMemoryMap::getSize() { return( fLength ); } +#endif /* !__LP64__ */ -task_t _IOMemoryMap::getAddressTask() +task_t IOMemoryMap::getAddressTask() { if( fSuperMap) return( fSuperMap->getAddressTask()); @@ -2944,18 +3182,18 @@ task_t _IOMemoryMap::getAddressTask() return( fAddressTask); } -IOOptionBits _IOMemoryMap::getMapOptions() +IOOptionBits IOMemoryMap::getMapOptions() { return( fOptions); } -IOMemoryDescriptor * _IOMemoryMap::getMemoryDescriptor() +IOMemoryDescriptor * IOMemoryMap::getMemoryDescriptor() { return( fMemory ); } -_IOMemoryMap * _IOMemoryMap::copyCompatible( - _IOMemoryMap * newMapping ) +IOMemoryMap * IOMemoryMap::copyCompatible( + IOMemoryMap * newMapping ) { task_t task = newMapping->getAddressTask(); mach_vm_address_t toAddress = newMapping->fAddress; @@ -2999,12 +3237,20 @@ _IOMemoryMap * _IOMemoryMap::copyCompatible( } IOPhysicalAddress -_IOMemoryMap::getPhysicalSegment( IOByteCount _offset, IOPhysicalLength * _length) +#ifdef __LP64__ +IOMemoryMap::getPhysicalSegment( IOByteCount _offset, IOPhysicalLength * _length, IOOptionBits _options) +#else /* !__LP64__ */ +IOMemoryMap::getPhysicalSegment( IOByteCount _offset, IOPhysicalLength * _length) +#endif /* !__LP64__ */ { IOPhysicalAddress address; LOCK; +#ifdef __LP64__ + address = fMemory->getPhysicalSegment( fOffset + _offset, _length, _options ); +#else /* !__LP64__ */ address = fMemory->getPhysicalSegment( fOffset + _offset, _length ); +#endif /* !__LP64__ */ UNLOCK; return( address ); @@ -3066,6 +3312,7 @@ IOMemoryMap * IOMemoryDescriptor::map( 0, getLength() )); } +#ifndef __LP64__ IOMemoryMap * IOMemoryDescriptor::map( task_t intoTask, IOVirtualAddress atAddress, @@ -3082,6 +3329,7 @@ IOMemoryMap * IOMemoryDescriptor::map( return (createMappingInTask(intoTask, atAddress, options, offset, length)); } +#endif /* !__LP64__ */ IOMemoryMap * IOMemoryDescriptor::createMappingInTask( task_t intoTask, @@ -3090,13 +3338,13 @@ IOMemoryMap * IOMemoryDescriptor::createMappingInTask( mach_vm_size_t offset, mach_vm_size_t length) { - IOMemoryMap * result; - _IOMemoryMap * mapping; + IOMemoryMap * result; + IOMemoryMap * mapping; if (0 == length) length = getLength(); - mapping = new _IOMemoryMap; + mapping = new IOMemoryMap; if( mapping && !mapping->init( intoTask, atAddress, @@ -3110,7 +3358,7 @@ IOMemoryMap * IOMemoryDescriptor::createMappingInTask( else result = 0; -#ifdef DEBUG +#if DEBUG if (!result) IOLog("createMappingInTask failed desc %p, addr %qx, options %lx, offset %qx, length %qx\n", this, atAddress, options, offset, length); @@ -3119,14 +3367,16 @@ IOMemoryMap * IOMemoryDescriptor::createMappingInTask( return (result); } -IOReturn _IOMemoryMap::redirect(IOMemoryDescriptor * newBackingMemory, +#ifndef __LP64__ // there is only a 64 bit version for LP64 +IOReturn IOMemoryMap::redirect(IOMemoryDescriptor * newBackingMemory, IOOptionBits options, IOByteCount offset) { return (redirect(newBackingMemory, options, (mach_vm_size_t)offset)); } +#endif -IOReturn _IOMemoryMap::redirect(IOMemoryDescriptor * newBackingMemory, +IOReturn IOMemoryMap::redirect(IOMemoryDescriptor * newBackingMemory, IOOptionBits options, mach_vm_size_t offset) { @@ -3146,7 +3396,7 @@ IOReturn _IOMemoryMap::redirect(IOMemoryDescriptor * newBackingMemory, if (!fRedirUPL) { - vm_size_t size = fLength; + vm_size_t size = round_page(fLength); int flags = UPL_COPYOUT_FROM | UPL_SET_INTERNAL | UPL_SET_LITE | UPL_SET_IO_WIRE | UPL_BLOCK_ACCESS; if (KERN_SUCCESS != memory_object_iopl_request((ipc_port_t) fMemory->_memEntry, 0, &size, &fRedirUPL, @@ -3157,7 +3407,8 @@ IOReturn _IOMemoryMap::redirect(IOMemoryDescriptor * newBackingMemory, if (physMem) { IOUnmapPages( fAddressMap, fAddress, fLength ); - physMem->redirect(0, true); + if (false) + physMem->redirect(0, true); } } @@ -3177,7 +3428,7 @@ IOReturn _IOMemoryMap::redirect(IOMemoryDescriptor * newBackingMemory, upl_deallocate(fRedirUPL); fRedirUPL = 0; } - if (physMem) + if (false && physMem) physMem->redirect(0, false); } } @@ -3199,13 +3450,15 @@ IOMemoryMap * IOMemoryDescriptor::makeMapping( IOByteCount __offset, IOByteCount __length ) { +#ifndef __LP64__ if (!(kIOMap64Bit & options)) panic("IOMemoryDescriptor::makeMapping !64bit"); +#endif /* !__LP64__ */ IOMemoryDescriptor * mapDesc = 0; - _IOMemoryMap * result = 0; + IOMemoryMap * result = 0; OSIterator * iter; - _IOMemoryMap * mapping = (_IOMemoryMap *) __address; + IOMemoryMap * mapping = (IOMemoryMap *) __address; mach_vm_size_t offset = mapping->fOffset + __offset; mach_vm_size_t length = mapping->fLength; @@ -3233,12 +3486,12 @@ IOMemoryMap * IOMemoryDescriptor::makeMapping( if (((_flags & kIOMemoryTypeMask) == kIOMemoryTypePhysical) || ((_flags & kIOMemoryTypeMask) == kIOMemoryTypePhysical64)) { - phys = getPhysicalSegment(offset, &physLen); + phys = getPhysicalSegment(offset, &physLen, kIOMemoryMapperNone); if (!phys || (physLen < length)) continue; - mapDesc = IOMemoryDescriptor::withPhysicalAddress( - phys, length, _direction); + mapDesc = IOMemoryDescriptor::withAddressRange( + phys, length, getDirection() | kIOMemoryMapperNone, NULL); if (!mapDesc) continue; offset = 0; @@ -3250,8 +3503,8 @@ IOMemoryMap * IOMemoryDescriptor::makeMapping( // look for a compatible existing mapping if( (iter = OSCollectionIterator::withCollection(_mappings))) { - _IOMemoryMap * lookMapping; - while ((lookMapping = (_IOMemoryMap *) iter->getNextObject())) + IOMemoryMap * lookMapping; + while ((lookMapping = (IOMemoryMap *) iter->getNextObject())) { if ((result = lookMapping->copyCompatible(mapping))) { @@ -3314,305 +3567,11 @@ void IOMemoryDescriptor::removeMapping( _mappings->removeObject( mapping); } -/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ - -#undef super -#define super IOMemoryDescriptor - -OSDefineMetaClassAndStructors(IOSubMemoryDescriptor, IOMemoryDescriptor) - -/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ - -bool IOSubMemoryDescriptor::initSubRange( IOMemoryDescriptor * parent, - IOByteCount offset, IOByteCount length, - IODirection direction ) -{ - if( !parent) - return( false); - - if( (offset + length) > parent->getLength()) - return( false); - - /* - * We can check the _parent instance variable before having ever set it - * to an initial value because I/O Kit guarantees that all our instance - * variables are zeroed on an object's allocation. - */ - - if( !_parent) { - if( !super::init()) - return( false ); - } else { - /* - * An existing memory descriptor is being retargeted to - * point to somewhere else. Clean up our present state. - */ - - _parent->release(); - _parent = 0; - } - - parent->retain(); - _parent = parent; - _start = offset; - _length = length; - _direction = direction; - _tag = parent->getTag(); - - return( true ); -} - -void IOSubMemoryDescriptor::free( void ) -{ - if( _parent) - _parent->release(); - - super::free(); -} - - -IOReturn -IOSubMemoryDescriptor::dmaCommandOperation(DMACommandOps op, void *vData, UInt dataSize) const -{ - IOReturn rtn; - - if (kIOMDGetCharacteristics == op) { - - rtn = _parent->dmaCommandOperation(op, vData, dataSize); - if (kIOReturnSuccess == rtn) { - IOMDDMACharacteristics *data = (IOMDDMACharacteristics *) vData; - data->fLength = _length; - data->fSGCount = 0; // XXX gvdl: need to compute and pages - data->fPages = 0; - data->fPageAlign = 0; - } - - return rtn; - } - else if (kIOMDWalkSegments & op) { - if (dataSize < sizeof(IOMDDMAWalkSegmentArgs)) - return kIOReturnUnderrun; - - IOMDDMAWalkSegmentArgs *data = - reinterpret_cast(vData); - UInt offset = data->fOffset; - UInt remain = _length - offset; - if ((int) remain <= 0) - return (!remain)? kIOReturnOverrun : kIOReturnInternalError; - - data->fOffset = offset + _start; - rtn = _parent->dmaCommandOperation(op, vData, dataSize); - if (data->fLength > remain) - data->fLength = remain; - data->fOffset = offset; - - return rtn; - } - else - return kIOReturnBadArgument; -} - -addr64_t -IOSubMemoryDescriptor::getPhysicalSegment64(IOByteCount offset, IOByteCount * length) -{ - addr64_t address; - IOByteCount actualLength; - - assert(offset <= _length); - - if( length) - *length = 0; - - if( offset >= _length) - return( 0 ); - - address = _parent->getPhysicalSegment64( offset + _start, &actualLength ); - - if( address && length) - *length = min( _length - offset, actualLength ); - - return( address ); -} - -IOPhysicalAddress -IOSubMemoryDescriptor::getPhysicalSegment( IOByteCount offset, IOByteCount * length ) -{ - IOPhysicalAddress address; - IOByteCount actualLength; - - assert(offset <= _length); - - if( length) - *length = 0; - - if( offset >= _length) - return( 0 ); - - address = _parent->getPhysicalSegment( offset + _start, &actualLength ); - - if( address && length) - *length = min( _length - offset, actualLength ); - - return( address ); -} - -IOPhysicalAddress -IOSubMemoryDescriptor::getSourceSegment( IOByteCount offset, IOByteCount * length ) -{ - IOPhysicalAddress address; - IOByteCount actualLength; - - assert(offset <= _length); - - if( length) - *length = 0; - - if( offset >= _length) - return( 0 ); - - address = _parent->getSourceSegment( offset + _start, &actualLength ); - - if( address && length) - *length = min( _length - offset, actualLength ); - - return( address ); -} - -void * IOSubMemoryDescriptor::getVirtualSegment(IOByteCount offset, - IOByteCount * lengthOfSegment) -{ - return( 0 ); -} - -IOReturn IOSubMemoryDescriptor::doMap( - vm_map_t addressMap, - IOVirtualAddress * atAddress, - IOOptionBits options, - IOByteCount sourceOffset, - IOByteCount length ) -{ - panic("IOSubMemoryDescriptor::doMap"); - return (IOMemoryDescriptor::doMap(addressMap, atAddress, options, sourceOffset, length)); -} - -IOByteCount IOSubMemoryDescriptor::readBytes(IOByteCount offset, - void * bytes, IOByteCount length) -{ - IOByteCount byteCount; - - assert(offset <= _length); - - if( offset >= _length) - return( 0 ); - - LOCK; - byteCount = _parent->readBytes( _start + offset, bytes, - min(length, _length - offset) ); - UNLOCK; - - return( byteCount ); -} - -IOByteCount IOSubMemoryDescriptor::writeBytes(IOByteCount offset, - const void* bytes, IOByteCount length) -{ - IOByteCount byteCount; - - assert(offset <= _length); - - if( offset >= _length) - return( 0 ); - - LOCK; - byteCount = _parent->writeBytes( _start + offset, bytes, - min(length, _length - offset) ); - UNLOCK; - - return( byteCount ); -} - -IOReturn IOSubMemoryDescriptor::setPurgeable( IOOptionBits newState, - IOOptionBits * oldState ) -{ - IOReturn err; - - LOCK; - err = _parent->setPurgeable( newState, oldState ); - UNLOCK; - - return( err ); -} - -IOReturn IOSubMemoryDescriptor::performOperation( IOOptionBits options, - IOByteCount offset, IOByteCount length ) -{ - IOReturn err; - - assert(offset <= _length); - - if( offset >= _length) - return( kIOReturnOverrun ); - - LOCK; - err = _parent->performOperation( options, _start + offset, - min(length, _length - offset) ); - UNLOCK; - - return( err ); -} - -IOReturn IOSubMemoryDescriptor::prepare( - IODirection forDirection) -{ - IOReturn err; - - LOCK; - err = _parent->prepare( forDirection); - UNLOCK; - - return( err ); -} - -IOReturn IOSubMemoryDescriptor::complete( - IODirection forDirection) -{ - IOReturn err; - - LOCK; - err = _parent->complete( forDirection); - UNLOCK; - - return( err ); -} - -IOMemoryMap * IOSubMemoryDescriptor::makeMapping( - IOMemoryDescriptor * owner, - task_t intoTask, - IOVirtualAddress address, - IOOptionBits options, - IOByteCount offset, - IOByteCount length ) -{ - IOMemoryMap * mapping = 0; - - if (!(kIOMap64Bit & options)) - { - panic("IOSubMemoryDescriptor::makeMapping !64bit"); - } - - mapping = (IOMemoryMap *) _parent->makeMapping( - owner, - intoTask, - address, - options, _start + offset, length ); - - return( mapping ); -} - -/* ick */ - +#ifndef __LP64__ +// obsolete initializers +// - initWithOptions is the designated initializer bool -IOSubMemoryDescriptor::initWithAddress(void * address, +IOMemoryDescriptor::initWithAddress(void * address, IOByteCount length, IODirection direction) { @@ -3620,7 +3579,7 @@ IOSubMemoryDescriptor::initWithAddress(void * address, } bool -IOSubMemoryDescriptor::initWithAddress(vm_address_t address, +IOMemoryDescriptor::initWithAddress(IOVirtualAddress address, IOByteCount length, IODirection direction, task_t task) @@ -3629,7 +3588,7 @@ IOSubMemoryDescriptor::initWithAddress(vm_address_t address, } bool -IOSubMemoryDescriptor::initWithPhysicalAddress( +IOMemoryDescriptor::initWithPhysicalAddress( IOPhysicalAddress address, IOByteCount length, IODirection direction ) @@ -3638,7 +3597,7 @@ IOSubMemoryDescriptor::initWithPhysicalAddress( } bool -IOSubMemoryDescriptor::initWithRanges( +IOMemoryDescriptor::initWithRanges( IOVirtualRange * ranges, UInt32 withCount, IODirection direction, @@ -3649,7 +3608,7 @@ IOSubMemoryDescriptor::initWithRanges( } bool -IOSubMemoryDescriptor::initWithPhysicalRanges( IOPhysicalRange * ranges, +IOMemoryDescriptor::initWithPhysicalRanges( IOPhysicalRange * ranges, UInt32 withCount, IODirection direction, bool asReference) @@ -3657,6 +3616,13 @@ IOSubMemoryDescriptor::initWithPhysicalRanges( IOPhysicalRange * ranges, return( false ); } +void * IOMemoryDescriptor::getVirtualSegment(IOByteCount offset, + IOByteCount * lengthOfSegment) +{ + return( 0 ); +} +#endif /* !__LP64__ */ + /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ bool IOGeneralMemoryDescriptor::serialize(OSSerialize * s) const @@ -3755,63 +3721,26 @@ bool IOGeneralMemoryDescriptor::serialize(OSSerialize * s) const return result; } -bool IOSubMemoryDescriptor::serialize(OSSerialize * s) const -{ - if (!s) { - return (false); - } - if (s->previouslySerialized(this)) return true; - - // Pretend we are a dictionary. - // We must duplicate the functionality of OSDictionary here - // because otherwise object references will not work; - // they are based on the value of the object passed to - // previouslySerialized and addXMLStartTag. - - if (!s->addXMLStartTag(this, "dict")) return false; - - char const *keys[3] = {"offset", "length", "parent"}; - - OSObject *values[3]; - values[0] = OSNumber::withNumber(_start, sizeof(_start) * 8); - if (values[0] == 0) - return false; - values[1] = OSNumber::withNumber(_length, sizeof(_length) * 8); - if (values[1] == 0) { - values[0]->release(); - return false; - } - values[2] = _parent; - - bool result = true; - for (int i=0; i<3; i++) { - if (!s->addString("") || - !s->addString(keys[i]) || - !s->addXMLEndTag("key") || - !values[i]->serialize(s)) { - result = false; - break; - } - } - values[0]->release(); - values[1]->release(); - if (!result) { - return false; - } - - return s->addXMLEndTag("dict"); -} - /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ OSMetaClassDefineReservedUsed(IOMemoryDescriptor, 0); +#ifdef __LP64__ +OSMetaClassDefineReservedUnused(IOMemoryDescriptor, 1); +OSMetaClassDefineReservedUnused(IOMemoryDescriptor, 2); +OSMetaClassDefineReservedUnused(IOMemoryDescriptor, 3); +OSMetaClassDefineReservedUnused(IOMemoryDescriptor, 4); +OSMetaClassDefineReservedUnused(IOMemoryDescriptor, 5); +OSMetaClassDefineReservedUnused(IOMemoryDescriptor, 6); +OSMetaClassDefineReservedUnused(IOMemoryDescriptor, 7); +#else /* !__LP64__ */ OSMetaClassDefineReservedUsed(IOMemoryDescriptor, 1); OSMetaClassDefineReservedUsed(IOMemoryDescriptor, 2); OSMetaClassDefineReservedUsed(IOMemoryDescriptor, 3); OSMetaClassDefineReservedUsed(IOMemoryDescriptor, 4); OSMetaClassDefineReservedUsed(IOMemoryDescriptor, 5); -OSMetaClassDefineReservedUnused(IOMemoryDescriptor, 6); -OSMetaClassDefineReservedUnused(IOMemoryDescriptor, 7); +OSMetaClassDefineReservedUsed(IOMemoryDescriptor, 6); +OSMetaClassDefineReservedUsed(IOMemoryDescriptor, 7); +#endif /* !__LP64__ */ OSMetaClassDefineReservedUnused(IOMemoryDescriptor, 8); OSMetaClassDefineReservedUnused(IOMemoryDescriptor, 9); OSMetaClassDefineReservedUnused(IOMemoryDescriptor, 10); diff --git a/iokit/Kernel/IOMultiMemoryDescriptor.cpp b/iokit/Kernel/IOMultiMemoryDescriptor.cpp index fb1a38f22..262680dc8 100644 --- a/iokit/Kernel/IOMultiMemoryDescriptor.cpp +++ b/iokit/Kernel/IOMultiMemoryDescriptor.cpp @@ -32,63 +32,6 @@ #define super IOMemoryDescriptor OSDefineMetaClassAndStructors(IOMultiMemoryDescriptor, IOMemoryDescriptor) -// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -bool IOMultiMemoryDescriptor::initWithAddress( - void * /* address */ , - IOByteCount /* withLength */ , - IODirection /* withDirection */ ) -{ - return false; -} - -// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -bool IOMultiMemoryDescriptor::initWithAddress( - vm_address_t /* address */ , - IOByteCount /* withLength */ , - IODirection /* withDirection */ , - task_t /* withTask */ ) -{ - return false; -} - -// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -bool IOMultiMemoryDescriptor::initWithPhysicalAddress( - IOPhysicalAddress /* address */ , - IOByteCount /* withLength */ , - IODirection /* withDirection */ ) -{ - return false; -} - - -// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -bool IOMultiMemoryDescriptor::initWithPhysicalRanges( - IOPhysicalRange * /* ranges */ , - UInt32 /* withCount */ , - IODirection /* withDirection */ , - bool /* asReference */ ) -{ - return false; -} - -// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -bool IOMultiMemoryDescriptor::initWithRanges( - IOVirtualRange * /* ranges */ , - UInt32 /* withCount */ , - IODirection /* withDirection */ , - task_t /* withTask */ , - bool /* asReference */ ) -{ - return false; -} - -// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - IOMultiMemoryDescriptor * IOMultiMemoryDescriptor::withDescriptors( IOMemoryDescriptor ** descriptors, UInt32 withCount, @@ -118,8 +61,6 @@ IOMultiMemoryDescriptor * IOMultiMemoryDescriptor::withDescriptors( return me; } -// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - bool IOMultiMemoryDescriptor::initWithDescriptors( IOMemoryDescriptor ** descriptors, UInt32 withCount, @@ -155,7 +96,10 @@ bool IOMultiMemoryDescriptor::initWithDescriptors( _descriptors = 0; _descriptorsCount = withCount; _descriptorsIsAllocated = asReference ? false : true; - _direction = withDirection; + _flags = withDirection; +#ifndef __LP64__ + _direction = (IODirection) (_flags & kIOMemoryDirectionMask); +#endif /* !__LP64__ */ _length = 0; _mappings = 0; _tag = 0; @@ -185,8 +129,6 @@ bool IOMultiMemoryDescriptor::initWithDescriptors( return true; } -// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - void IOMultiMemoryDescriptor::free() { // @@ -205,8 +147,6 @@ void IOMultiMemoryDescriptor::free() super::free(); } -// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - IOReturn IOMultiMemoryDescriptor::prepare(IODirection forDirection) { // @@ -223,7 +163,7 @@ IOReturn IOMultiMemoryDescriptor::prepare(IODirection forDirection) if ( forDirection == kIODirectionNone ) { - forDirection = _direction; + forDirection = getDirection(); } for ( index = 0; index < _descriptorsCount; index++ ) @@ -244,8 +184,6 @@ IOReturn IOMultiMemoryDescriptor::prepare(IODirection forDirection) return status; } -// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - IOReturn IOMultiMemoryDescriptor::complete(IODirection forDirection) { // @@ -261,7 +199,7 @@ IOReturn IOMultiMemoryDescriptor::complete(IODirection forDirection) if ( forDirection == kIODirectionNone ) { - forDirection = _direction; + forDirection = getDirection(); } for ( unsigned index = 0; index < _descriptorsCount; index++ ) @@ -274,65 +212,10 @@ IOReturn IOMultiMemoryDescriptor::complete(IODirection forDirection) return statusFinal; } -// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -addr64_t IOMultiMemoryDescriptor::getPhysicalSegment64( - IOByteCount offset, IOByteCount * length ) -{ - // - // This method returns the physical address of the byte at the given offset - // into the memory, and optionally the length of the physically contiguous - // segment from that offset. - // - - assert(offset <= _length); - - for ( unsigned index = 0; index < _descriptorsCount; index++ ) - { - if ( offset < _descriptors[index]->getLength() ) - { - return _descriptors[index]->getPhysicalSegment64(offset, length); - } - offset -= _descriptors[index]->getLength(); - } - - if ( length ) *length = 0; - - return 0; -} - -// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -IOPhysicalAddress IOMultiMemoryDescriptor::getPhysicalSegment( - IOByteCount offset, IOByteCount * length ) -{ - // - // This method returns the physical address of the byte at the given offset - // into the memory, and optionally the length of the physically contiguous - // segment from that offset. - // - - assert(offset <= _length); - - for ( unsigned index = 0; index < _descriptorsCount; index++ ) - { - if ( offset < _descriptors[index]->getLength() ) - { - return _descriptors[index]->getPhysicalSegment(offset, length); - } - offset -= _descriptors[index]->getLength(); - } - - if ( length ) *length = 0; - - return 0; -} - -// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -IOPhysicalAddress IOMultiMemoryDescriptor::getSourceSegment( +addr64_t IOMultiMemoryDescriptor::getPhysicalSegment( IOByteCount offset, - IOByteCount * length ) + IOByteCount * length, + IOOptionBits options ) { // // This method returns the physical address of the byte at the given offset @@ -346,7 +229,7 @@ IOPhysicalAddress IOMultiMemoryDescriptor::getSourceSegment( { if ( offset < _descriptors[index]->getLength() ) { - return _descriptors[index]->getSourceSegment(offset, length); + return _descriptors[index]->getPhysicalSegment(offset, length, options); } offset -= _descriptors[index]->getLength(); } @@ -355,83 +238,3 @@ IOPhysicalAddress IOMultiMemoryDescriptor::getSourceSegment( return 0; } - -// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -void * IOMultiMemoryDescriptor::getVirtualSegment( IOByteCount /* offset */ , - IOByteCount * /* length */ ) -{ - return 0; -} - -// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -IOByteCount IOMultiMemoryDescriptor::readBytes( IOByteCount offset, - void * bytes, - IOByteCount withLength ) -{ - // - // Copies data from the memory descriptor's buffer at the given offset, to - // the specified buffer. Returns the number of bytes copied. - // - - IOByteCount bytesCopied = 0; - unsigned index; - - for ( index = 0; index < _descriptorsCount; index++ ) - { - if ( offset < _descriptors[index]->getLength() ) break; - offset -= _descriptors[index]->getLength(); - } - - for ( ; index < _descriptorsCount && withLength; index++) - { - IOByteCount copy = min(_descriptors[index]->getLength(), withLength); - IOByteCount copied = _descriptors[index]->readBytes(offset,bytes,copy); - - bytesCopied += copied; - if ( copied != copy ) break; - - bytes = ((UInt8 *) bytes) + copied; - withLength -= copied; - offset = 0; - } - - return bytesCopied; -} - -// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -IOByteCount IOMultiMemoryDescriptor::writeBytes( IOByteCount offset, - const void * bytes, - IOByteCount withLength ) -{ - // - // Copies data to the memory descriptor's buffer at the given offset, from - // the specified buffer. Returns the number of bytes copied. - // - - IOByteCount bytesCopied = 0; - unsigned index; - - for ( index = 0; index < _descriptorsCount; index++ ) - { - if ( offset < _descriptors[index]->getLength() ) break; - offset -= _descriptors[index]->getLength(); - } - - for ( ; index < _descriptorsCount && withLength; index++) - { - IOByteCount copy = min(_descriptors[index]->getLength(), withLength); - IOByteCount copied = _descriptors[index]->writeBytes(offset,bytes,copy); - - bytesCopied += copied; - if ( copied != copy ) break; - - bytes = ((UInt8 *) bytes) + copied; - withLength -= copied; - offset = 0; - } - - return bytesCopied; -} diff --git a/iokit/Kernel/IONVRAM.cpp b/iokit/Kernel/IONVRAM.cpp index 499aa6cc8..4c51e4457 100644 --- a/iokit/Kernel/IONVRAM.cpp +++ b/iokit/Kernel/IONVRAM.cpp @@ -551,7 +551,7 @@ IOReturn IODTNVRAM::writeNVRAMPartition(const OSSymbol *partitionID, return kIOReturnSuccess; } -UInt32 IODTNVRAM::savePanicInfo(UInt8 *buffer, IOByteCount length) +IOByteCount IODTNVRAM::savePanicInfo(UInt8 *buffer, IOByteCount length) { if ((_piImage == 0) || (length <= 0)) return 0; @@ -1117,9 +1117,9 @@ bool IODTNVRAM::convertObjectToProp(UInt8 *buffer, UInt32 *length, if (tmpValue == 0xFFFFFFFF) { strlcpy((char *)buffer, "-1", *length - propNameLength); } else if (tmpValue < 1000) { - snprintf((char *)buffer, *length - propNameLength, "%ld", tmpValue); + snprintf((char *)buffer, *length - propNameLength, "%d", (uint32_t)tmpValue); } else { - snprintf((char *)buffer, *length - propNameLength, "0x%lx", tmpValue); + snprintf((char *)buffer, *length - propNameLength, "0x%x", (uint32_t)tmpValue); } break; @@ -1244,7 +1244,6 @@ enum { kMaxNVDataLength = 8 }; -#pragma options align=mac68k struct NVRAMProperty { IONVRAMDescriptor header; @@ -1253,7 +1252,6 @@ struct NVRAMProperty UInt8 dataLength; UInt8 data[ kMaxNVDataLength ]; }; -#pragma options align=reset bool IODTNVRAM::searchNVRAMProperty(IONVRAMDescriptor *hdr, UInt32 *where) { diff --git a/iokit/Kernel/IOPMPowerStateQueue.cpp b/iokit/Kernel/IOPMPowerStateQueue.cpp index dee1aff49..7081a7fae 100644 --- a/iokit/Kernel/IOPMPowerStateQueue.cpp +++ b/iokit/Kernel/IOPMPowerStateQueue.cpp @@ -27,53 +27,16 @@ */ #include "IOPMPowerStateQueue.h" -#include "IOKit/IOLocks.h" -#undef super -#define super IOEventSource -OSDefineMetaClassAndStructors(IOPMPowerStateQueue, IOEventSource); - -#ifndef __ppc__ /* ppc does this right and doesn't need these routines */ -static -void * OSDequeueAtomic(void * volatile * inList, SInt32 inOffset) -{ - /* The _pointer_ is volatile, not the listhead itself */ - void * volatile oldListHead; - void * volatile newListHead; - - do { - oldListHead = *inList; - if (oldListHead == NULL) { - break; - } - - newListHead = *(void * volatile *) (((char *) oldListHead) + inOffset); - } while (! OSCompareAndSwap((UInt32)oldListHead, - (UInt32)newListHead, (volatile UInt32 *)inList)); - return oldListHead; -} - -static -void OSEnqueueAtomic(void * volatile * inList, void * inNewLink, SInt32 inOffset) -{ - /* The _pointer_ is volatile, not the listhead itself */ - void * volatile oldListHead; - void * volatile newListHead = inNewLink; - void * volatile * newLinkNextPtr = (void * volatile *) (((char *) inNewLink) + inOffset); - - do { - oldListHead = *inList; - *newLinkNextPtr = oldListHead; - } while (! OSCompareAndSwap((UInt32)oldListHead, (UInt32)newListHead, - (volatile UInt32 *)inList)); -} -#endif /* ! __ppc__ */ +#define super IOEventSource +OSDefineMetaClassAndStructors( IOPMPowerStateQueue, IOEventSource ) -IOPMPowerStateQueue *IOPMPowerStateQueue::PMPowerStateQueue(OSObject *inOwner) +IOPMPowerStateQueue * IOPMPowerStateQueue::PMPowerStateQueue( + OSObject * inOwner, Action inAction ) { - IOPMPowerStateQueue *me = new IOPMPowerStateQueue; + IOPMPowerStateQueue * me = new IOPMPowerStateQueue; - if(me && !me->init(inOwner, 0) ) + if (me && !me->init(inOwner, inAction)) { me->release(); return NULL; @@ -82,109 +45,60 @@ IOPMPowerStateQueue *IOPMPowerStateQueue::PMPowerStateQueue(OSObject *inOwner) return me; } -bool IOPMPowerStateQueue::init(OSObject *owner, Action action) +bool IOPMPowerStateQueue::init( OSObject * inOwner, Action inAction ) { - if(!(super::init(owner, (IOEventSource::Action) action))) return false; + if (!inAction || !(super::init(inOwner, inAction))) + return false; + + queue_init( &queueHead ); + + queueLock = IOLockAlloc(); + if (!queueLock) + return false; - // Queue of powerstate changes - changes = NULL; -#ifndef __ppc__ - if (!(tmpLock = IOLockAlloc())) panic("IOPMPowerStateQueue::init can't alloc lock"); -#endif return true; } - -bool IOPMPowerStateQueue::unIdleOccurred(IOService *inTarget, unsigned long inState) +bool IOPMPowerStateQueue::submitPowerEvent( + uint32_t eventType, + void * arg0, + void * arg1 ) { - PowerChangeEntry *new_one = NULL; - - new_one = (PowerChangeEntry *)IOMalloc(sizeof(PowerChangeEntry)); - if(!new_one) return false; - - new_one->actionType = IOPMPowerStateQueue::kUnIdle; - new_one->state = inState; - new_one->target = inTarget; - - // Change to queue -#ifndef __ppc__ - IOLockLock(tmpLock); -#endif - OSEnqueueAtomic((void **)&changes, (void *)new_one, 0); -#ifndef __ppc__ - IOLockUnlock(tmpLock); -#endif + PowerEventEntry * entry; + + entry = IONew(PowerEventEntry, 1); + if (!entry) + return false; + + entry->eventType = eventType; + entry->args[0] = arg0; + entry->args[1] = arg1; + + IOLockLock(queueLock); + queue_enter(&queueHead, entry, PowerEventEntry *, chain); + IOLockUnlock(queueLock); signalWorkAvailable(); return true; } -bool IOPMPowerStateQueue::featureChangeOccurred( - uint32_t inState, - IOService *inTarget) +bool IOPMPowerStateQueue::checkForWork( void ) { - PowerChangeEntry *new_one = NULL; - - new_one = (PowerChangeEntry *)IOMalloc(sizeof(PowerChangeEntry)); - if(!new_one) return false; - - new_one->actionType = IOPMPowerStateQueue::kPMFeatureChange; - new_one->state = inState; - new_one->target = inTarget; - - // Change to queue -#ifdef __i386__ - IOLockLock(tmpLock); -#endif - OSEnqueueAtomic((void **)&changes, (void *)new_one, 0); -#ifdef __i386__ - IOLockUnlock(tmpLock); -#endif - signalWorkAvailable(); + IOPMPowerStateQueueAction queueAction = (IOPMPowerStateQueueAction) action; + PowerEventEntry * entry; - return true; -} + IOLockLock(queueLock); + while (!queue_empty(&queueHead)) + { + queue_remove_first(&queueHead, entry, PowerEventEntry *, chain); + IOLockUnlock(queueLock); + (*queueAction)(owner, entry->eventType, entry->args[0], entry->args[1]); + IODelete(entry, PowerEventEntry, 1); -// checkForWork() is called in a gated context -bool IOPMPowerStateQueue::checkForWork() -{ - PowerChangeEntry *theNode; - uint32_t theState; - IOService *theTarget; - uint16_t theAction; - - // Dequeue and process the state change request -#ifndef __ppc__ - IOLockLock(tmpLock); -#endif - if((theNode = (PowerChangeEntry *)OSDequeueAtomic((void **)&changes, 0))) - { -#ifndef __ppc__ - IOLockUnlock(tmpLock); -#endif - theState = theNode->state; - theTarget = theNode->target; - theAction = theNode->actionType; - IOFree((void *)theNode, sizeof(PowerChangeEntry)); - - switch (theAction) - { - case kUnIdle: - theTarget->command_received((void *)theState, 0, 0, 0); - break; - - case kPMFeatureChange: - theTarget->messageClients(theState, theTarget); - break; - } - } -#ifndef __ppc__ - else { - IOLockUnlock(tmpLock); - } -#endif - // Return true if there's more work to be done - if(changes) return true; - else return false; + IOLockLock(queueLock); + } + IOLockUnlock(queueLock); + + return false; } diff --git a/iokit/Kernel/IOPMPowerStateQueue.h b/iokit/Kernel/IOPMPowerStateQueue.h index a2862c500..713cb1afe 100644 --- a/iokit/Kernel/IOPMPowerStateQueue.h +++ b/iokit/Kernel/IOPMPowerStateQueue.h @@ -29,54 +29,34 @@ #ifndef _IOPMPOWERSTATEQUEUE_H_ #define _IOPMPOWERSTATEQUEUE_H_ -#include #include -#include -extern "C" { - #include -} +#include +#include + +typedef void (*IOPMPowerStateQueueAction)(OSObject *, uint32_t event, void *, void *); class IOPMPowerStateQueue : public IOEventSource - { - OSDeclareDefaultStructors(IOPMPowerStateQueue); +{ + OSDeclareDefaultStructors(IOPMPowerStateQueue) private: - enum { - kUnIdle = 0, - kPMFeatureChange = 1 - }; - - // Queue of requested states - struct PowerChangeEntry - { - void *next; - uint16_t actionType; - uint32_t state; - IOService *target; + struct PowerEventEntry { + queue_chain_t chain; + uint32_t eventType; + void * args[2]; }; - void *changes; -#ifndef __ppc__ - IOLock *tmpLock; -#endif + queue_head_t queueHead; + IOLock * queueLock; protected: - virtual bool checkForWork(void); + virtual bool checkForWork( void ); + virtual bool init( OSObject * owner, Action action ); public: - //typedef void (*Action)(IOService *target, unsigned long state); + static IOPMPowerStateQueue * PMPowerStateQueue( OSObject * owner, Action action ); - virtual bool init(OSObject *owner, Action action = 0); + bool submitPowerEvent( uint32_t eventType, void * arg0 = 0, void * arg1 = 0 ); +}; - // static initialiser - static IOPMPowerStateQueue *PMPowerStateQueue(OSObject *owner); - - // Enqueues an activityTickle request to be executed on the workloop - virtual bool unIdleOccurred(IOService *, unsigned long); - - // Enqueues a feature changed notify request to be executed on the workloop - virtual bool featureChangeOccurred(uint32_t, IOService *); - }; - - #endif /* _IOPMPOWERSTATEQUEUE_H_ */ - +#endif /* _IOPMPOWERSTATEQUEUE_H_ */ diff --git a/iokit/Kernel/IOPMrootDomain.cpp b/iokit/Kernel/IOPMrootDomain.cpp index 1ff71887b..c98da3b9f 100644 --- a/iokit/Kernel/IOPMrootDomain.cpp +++ b/iokit/Kernel/IOPMrootDomain.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 1998-2006 Apple Computer, Inc. All rights reserved. + * Copyright (c) 1998-2008 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -25,12 +25,14 @@ * * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ */ +#include +#include #include #include -#include #include #include #include +#include #include #include #include @@ -40,6 +42,7 @@ #include "IOKit/pwr_mgt/IOPowerConnection.h" #include "IOPMPowerStateQueue.h" #include +#include // IOServicePMPrivate #if HIBERNATION #include #endif @@ -47,33 +50,67 @@ #include #include #include "IOServicePrivate.h" // _IOServiceInterestNotifier +#include "IOServicePMPrivate.h" +__BEGIN_DECLS +#include +__END_DECLS -#if __i386__ +#if defined(__i386__) || defined(__x86_64__) __BEGIN_DECLS #include "IOPMrootDomainInternal.h" __END_DECLS #endif +#define kIOPMrootDomainClass "IOPMrootDomain" + +#define LOG_PREFIX "PMRD: " + +#define LOG(x...) do { \ + kprintf(LOG_PREFIX x); IOLog(x); } while (false) -//#define DEBUG 1 -#if DEBUG -#define DEBUG_LOG(x...) do { kprintf(x); } while (0) +#define KLOG(x...) do { \ + kprintf(LOG_PREFIX x); } while (false) + +#define DLOG(x...) do { \ + if (kIOLogPMRootDomain & gIOKitDebug) \ + kprintf(LOG_PREFIX x); } while (false) + +#define CHECK_THREAD_CONTEXT +#ifdef CHECK_THREAD_CONTEXT +static IOWorkLoop * gIOPMWorkLoop = 0; +#define ASSERT_GATED(x) \ +do { \ + if (gIOPMWorkLoop && gIOPMWorkLoop->inGate() != true) { \ + panic("RootDomain: not inside PM gate"); \ + } \ +} while(false) #else -#define DEBUG_LOG(x...) -#endif -#define HaltRestartLog(x...) do { kprintf(x); } while (0) +#define ASSERT_GATED(x) +#endif /* CHECK_THREAD_CONTEXT */ + +// Event types for IOPMPowerStateQueue::submitPowerEvent() +enum { + kPowerEventFeatureChanged = 1, + kPowerEventReceivedPowerNotification, + kPowerEventSystemBootCompleted, + kPowerEventSystemShutdown, + kPowerEventUserDisabledSleep, + kPowerEventConfigdRegisteredInterest, + kPowerEventAggressivenessChanged +}; extern "C" { -IOReturn OSMetaClassSystemSleepOrWake( UInt32 ); +IOReturn OSKextSystemSleepOrWake( UInt32 ); } extern const IORegistryPlane * gIOPowerPlane; -IOReturn broadcast_aggressiveness ( OSObject *, void *, void *, void *, void * ); -static void sleepTimerExpired(thread_call_param_t); -static void wakeupClamshellTimerExpired ( thread_call_param_t us); +static void idleSleepTimerExpired( thread_call_param_t, thread_call_param_t ); +static void wakeupClamshellTimerExpired( thread_call_param_t us, thread_call_param_t ); static void notifySystemShutdown( IOService * root, unsigned long event ); +static bool clientMessageFilter( OSObject * object, void * context ); +static void handleAggressivesFunction( thread_call_param_t param1, thread_call_param_t param2 ); // "IOPMSetSleepSupported" callPlatformFunction name static const OSSymbol *sleepSupportedPEFunction = NULL; @@ -84,18 +121,6 @@ static const OSSymbol *sleepSupportedPEFunction = NULL; | kIOPMSupportedOnBatt \ | kIOPMSupportedOnUPS) -#define number_of_power_states 5 -#define OFF_STATE 0 -#define RESTART_STATE 1 -#define SLEEP_STATE 2 -#define DOZE_STATE 3 -#define ON_STATE 4 - -#define ON_POWER kIOPMPowerOn -#define RESTART_POWER kIOPMRestart -#define SLEEP_POWER kIOPMAuxPowerOn -#define DOZE_POWER kIOPMDoze - enum { // not idle around autowake time, secs @@ -103,24 +128,147 @@ enum kAutoWakePostWindow = 15 }; - #define kLocalEvalClamshellCommand (1 << 15) -static IOPMPowerState ourPowerStates[number_of_power_states] = { - // state 0, off - {1,0, 0, 0,0,0,0,0,0,0,0,0}, - // state 1, restart - {1,kIOPMRestartCapability, kIOPMRestart, RESTART_POWER,0,0,0,0,0,0,0,0}, - // state 2, sleep - {1,kIOPMSleepCapability, kIOPMSleep, SLEEP_POWER,0,0,0,0,0,0,0,0}, - // state 3, doze - {1,kIOPMDoze, kIOPMDoze, DOZE_POWER,0,0,0,0,0,0,0,0}, - // state 4, on - {1,kIOPMPowerOn, kIOPMPowerOn, ON_POWER,0,0,0,0,0,0,0,0}, +enum { + OFF_STATE = 0, + RESTART_STATE = 1, + SLEEP_STATE = 2, + DOZE_STATE = 3, + ON_STATE = 4, + NUM_POWER_STATES +}; + +#define ON_POWER kIOPMPowerOn +#define RESTART_POWER kIOPMRestart +#define SLEEP_POWER kIOPMAuxPowerOn +#define DOZE_POWER kIOPMDoze + +static IOPMPowerState ourPowerStates[NUM_POWER_STATES] = +{ + {1, 0, 0, 0, 0,0,0,0,0,0,0,0}, + {1, kIOPMRestartCapability, kIOPMRestart, RESTART_POWER, 0,0,0,0,0,0,0,0}, + {1, kIOPMSleepCapability, kIOPMSleep, SLEEP_POWER, 0,0,0,0,0,0,0,0}, + {1, kIOPMDoze, kIOPMDoze, DOZE_POWER, 0,0,0,0,0,0,0,0}, + {1, kIOPMPowerOn, kIOPMPowerOn, ON_POWER, 0,0,0,0,0,0,0,0} +}; + +// Clients eligible to receive system power messages. +enum { + kMessageClientNone = 0, + kMessageClientAll, + kMessageClientConfigd +}; + +// Run states (R-state) defined within the ON power state. +enum { + kRStateNormal = 0, + kRStateDark, + kRStateMaintenance, + kRStateCount +}; + +// IOService in power plane can be tagged with following flags. +enum { + kServiceFlagGraphics = 0x01, + kServiceFlagNoPowerUp = 0x02, + kServiceFlagTopLevelPCI = 0x04 +}; + +// Flags describing R-state features and capabilities. +enum { + kRStateFlagNone = 0x00000000, + kRStateFlagSuppressGraphics = 0x00000001, + kRStateFlagSuppressMessages = 0x00000002, + kRStateFlagSuppressPCICheck = 0x00000004, + kRStateFlagDisableIdleSleep = 0x00000008 +}; + +#if ROOT_DOMAIN_RUN_STATES + +// Table of flags for each R-state. +static uint32_t gRStateFlags[ kRStateCount ] = +{ + kRStateFlagNone, + + /* Dark wake */ + kRStateFlagSuppressGraphics, + + /* Maintenance wake */ + kRStateFlagSuppressGraphics | + kRStateFlagSuppressMessages | + kRStateFlagSuppressPCICheck | + kRStateFlagDisableIdleSleep +}; + +static IONotifier * gConfigdNotifier = 0; + +#define kIOPMRootDomainRunStateKey "Run State" +#define kIOPMRootDomainWakeTypeMaintenance "Maintenance" + +#endif /* ROOT_DOMAIN_RUN_STATES */ + +// Special interest that entitles the interested client from receiving +// all system messages. Used by pmconfigd to support maintenance wake. +// +#define kIOPMPrivilegedPowerInterest "IOPMPrivilegedPowerInterest" + +static IONotifier * gSysPowerDownNotifier = 0; + +/* + * Aggressiveness + */ +#define AGGRESSIVES_LOCK() IOLockLock(featuresDictLock) +#define AGGRESSIVES_UNLOCK() IOLockUnlock(featuresDictLock) + +#define kAggressivesMinValue 1 + +static uint32_t gAggressivesState = 0; + +enum { + kAggressivesStateBusy = 0x01, + kAggressivesStateQuickSpindown = 0x02 +}; + +struct AggressivesRecord { + uint32_t flags; + uint32_t type; + uint32_t value; +}; + +struct AggressivesRequest { + queue_chain_t chain; + uint32_t options; + uint32_t dataType; + union { + IOService * service; + AggressivesRecord record; + } data; +}; + +enum { + kAggressivesRequestTypeService = 1, + kAggressivesRequestTypeRecord +}; + +enum { + kAggressivesOptionSynchronous = 0x00000001, + kAggressivesOptionQuickSpindownEnable = 0x00000100, + kAggressivesOptionQuickSpindownDisable = 0x00000200, + kAggressivesOptionQuickSpindownMask = 0x00000300 +}; + +enum { + kAggressivesRecordFlagModified = 0x00000001, + kAggressivesRecordFlagMinValue = 0x00000002 + }; static IOPMrootDomain * gRootDomain; static UInt32 gSleepOrShutdownPending = 0; +static UInt32 gWillShutdown = 0; +static uint32_t gMessageClientType = kMessageClientNone; +static UInt32 gSleepWakeUUIDIsSet = false; struct timeval gIOLastSleepTime; struct timeval gIOLastWakeTime; @@ -133,9 +281,13 @@ enum { kInformableCount = 2 }; +const OSSymbol *gIOPMStatsApplicationResponseTimedOut; +const OSSymbol *gIOPMStatsApplicationResponseCancel; +const OSSymbol *gIOPMStatsApplicationResponseSlow; + class PMSettingObject : public OSObject { - OSDeclareDefaultStructors(PMSettingObject) + OSDeclareFinalStructors(PMSettingObject) private: IOPMrootDomain *parent; IOPMSettingControllerCallback func; @@ -158,7 +310,47 @@ class PMSettingObject : public OSObject void free(void); }; + +/* + * PMTraceWorker + * Internal helper object for logging trace points to RTC + * IOPMrootDomain and only IOPMrootDomain should instantiate + * exactly one of these. + */ + +typedef void (*IOPMTracePointHandler)( + void * target, uint32_t code, uint32_t data ); + +class PMTraceWorker : public OSObject +{ + OSDeclareDefaultStructors(PMTraceWorker) +public: + typedef enum { kPowerChangeStart, kPowerChangeCompleted } change_t; + + static PMTraceWorker *tracer( IOPMrootDomain * ); + void tracePCIPowerChange(change_t, IOService *, uint32_t, uint32_t); + void tracePoint(uint8_t phase); + void traceLoginWindowPhase(uint8_t phase); + int recordTopLevelPCIDevice(IOService *); + void RTC_TRACE(void); + virtual bool serialize(OSSerialize *s) const; + + IOPMTracePointHandler tracePointHandler; + void * tracePointTarget; +private: + IOPMrootDomain *owner; + IOLock *pciMappingLock; + OSArray *pciDeviceBitMappings; + + uint8_t tracePhase; + uint8_t loginWindowPhase; + uint8_t addedToRegistry; + uint8_t unused0; + uint32_t pciBusyBitMask; +}; + /* + * PMHaltWorker * Internal helper object for Shutdown/Restart notifications. */ #define kPMHaltMaxWorkers 8 @@ -166,7 +358,7 @@ class PMSettingObject : public OSObject class PMHaltWorker : public OSObject { - OSDeclareDefaultStructors( PMHaltWorker ) + OSDeclareFinalStructors( PMHaltWorker ) public: IOService * service; // service being worked on @@ -177,17 +369,17 @@ class PMHaltWorker : public OSObject bool timeout; // service took too long static PMHaltWorker * worker( void ); - static void main( void * arg ); + static void main( void * arg, wait_result_t waitResult ); static void work( PMHaltWorker * me ); static void checkTimeout( PMHaltWorker * me, AbsoluteTime * now ); virtual void free( void ); }; -OSDefineMetaClassAndStructors( PMHaltWorker, OSObject ) +OSDefineMetaClassAndFinalStructors( PMHaltWorker, OSObject ) #define super IOService -OSDefineMetaClassAndStructors(IOPMrootDomain,IOService) +OSDefineMetaClassAndFinalStructors(IOPMrootDomain, IOService) extern "C" { @@ -221,14 +413,17 @@ extern "C" return gRootDomain->shutdownSystem(); } - void IOSystemShutdownNotification ( void ) + void IOSystemShutdownNotification ( void ) { - IOCatalogue::disableExternalLinker(); - for ( int i = 0; i < 100; i++ ) - { - if ( OSCompareAndSwap( 0, 1, &gSleepOrShutdownPending ) ) break; - IOSleep( 100 ); - } + if (OSCompareAndSwap(0, 1, &gWillShutdown)) + { + OSKext::willShutdown(); + for (int i = 0; i < 100; i++) + { + if (OSCompareAndSwap(0, 1, &gSleepOrShutdownPending)) break; + IOSleep( 100 ); + } + } } int sync_internal(void); @@ -275,11 +470,11 @@ ADB will turn on again so that they can wake the system out of Doze (keyboard/mo to be tickled)). */ -// ********************************************************************************** +//****************************************************************************** IOPMrootDomain * IOPMrootDomain::construct( void ) { - IOPMrootDomain *root; + IOPMrootDomain *root; root = new IOPMrootDomain; if( root) @@ -288,24 +483,24 @@ IOPMrootDomain * IOPMrootDomain::construct( void ) return( root ); } -// ********************************************************************************** +//****************************************************************************** -static void disk_sync_callout(thread_call_param_t p0, thread_call_param_t p1) +static void disk_sync_callout( thread_call_param_t p0, thread_call_param_t p1 ) { IOService *rootDomain = (IOService *) p0; unsigned long pmRef = (unsigned long) p1; - DEBUG_LOG("disk_sync_callout: start\n"); + DLOG("disk_sync_callout start\n"); #if HIBERNATION IOHibernateSystemSleep(); #endif sync_internal(); rootDomain->allowPowerChange(pmRef); - DEBUG_LOG("disk_sync_callout: finish\n"); + DLOG("disk_sync_callout finish\n"); } -// ********************************************************************************** +//****************************************************************************** static UInt32 computeDeltaTimeMS( const AbsoluteTime * startTime ) { @@ -322,31 +517,126 @@ static UInt32 computeDeltaTimeMS( const AbsoluteTime * startTime ) return (UInt32)(nano / 1000000ULL); } -// ********************************************************************************** -// start -// -// We don't do much here. The real initialization occurs when the platform -// expert informs us we are the root. -// ********************************************************************************** +//****************************************************************************** -#define kRootDomainSettingsCount 16 +static int +sysctl_sleepwaketime SYSCTL_HANDLER_ARGS +{ + struct timeval *swt = (struct timeval *)arg1; + struct proc *p = req->p; + + if (p == kernproc) { + return sysctl_io_opaque(req, swt, sizeof(*swt), NULL); + } else if(proc_is64bit(p)) { + struct user64_timeval t; + t.tv_sec = swt->tv_sec; + t.tv_usec = swt->tv_usec; + return sysctl_io_opaque(req, &t, sizeof(t), NULL); + } else { + struct user32_timeval t; + t.tv_sec = swt->tv_sec; + t.tv_usec = swt->tv_usec; + return sysctl_io_opaque(req, &t, sizeof(t), NULL); + } +} + +static SYSCTL_PROC(_kern, OID_AUTO, sleeptime, + CTLTYPE_STRUCT | CTLFLAG_RD | CTLFLAG_NOAUTO | CTLFLAG_KERN, + &gIOLastSleepTime, 0, sysctl_sleepwaketime, "S,timeval", ""); + +static SYSCTL_PROC(_kern, OID_AUTO, waketime, + CTLTYPE_STRUCT | CTLFLAG_RD | CTLFLAG_NOAUTO | CTLFLAG_KERN, + &gIOLastWakeTime, 0, sysctl_sleepwaketime, "S,timeval", ""); + + +static int +sysctl_willshutdown +(__unused struct sysctl_oid *oidp, __unused void *arg1, __unused int arg2, struct sysctl_req *req) +{ + int new_value, changed; + int error = sysctl_io_number(req, gWillShutdown, sizeof(int), &new_value, &changed); + if (changed) { + if (!gWillShutdown && (new_value == 1)) { + IOSystemShutdownNotification(); + } else + error = EINVAL; + } + return(error); +} + +static SYSCTL_PROC(_kern, OID_AUTO, willshutdown, + CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_NOAUTO | CTLFLAG_KERN, + 0, 0, sysctl_willshutdown, "I", ""); + +#if !CONFIG_EMBEDDED + +static int +sysctl_progressmeterenable +(__unused struct sysctl_oid *oidp, __unused void *arg1, __unused int arg2, struct sysctl_req *req) +{ + int error; + int new_value, changed; + + error = sysctl_io_number(req, vc_progress_meter_enable, sizeof(int), &new_value, &changed); + + if (changed) + vc_enable_progressmeter(new_value); + + return (error); +} + +static int +sysctl_progressmeter +(__unused struct sysctl_oid *oidp, __unused void *arg1, __unused int arg2, struct sysctl_req *req) +{ + int error; + int new_value, changed; + + error = sysctl_io_number(req, vc_progress_meter_value, sizeof(int), &new_value, &changed); + + if (changed) + vc_set_progressmeter(new_value); + + return (error); +} + +static SYSCTL_PROC(_kern, OID_AUTO, progressmeterenable, + CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_NOAUTO | CTLFLAG_KERN, + 0, 0, sysctl_progressmeterenable, "I", ""); -static SYSCTL_STRUCT(_kern, OID_AUTO, sleeptime, - CTLFLAG_RD | CTLFLAG_NOAUTO | CTLFLAG_KERN, - &gIOLastSleepTime, timeval, ""); +static SYSCTL_PROC(_kern, OID_AUTO, progressmeter, + CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_NOAUTO | CTLFLAG_KERN, + 0, 0, sysctl_progressmeter, "I", ""); -static SYSCTL_STRUCT(_kern, OID_AUTO, waketime, - CTLFLAG_RD | CTLFLAG_NOAUTO | CTLFLAG_KERN, - &gIOLastWakeTime, timeval, ""); +#endif static const OSSymbol * gIOPMSettingAutoWakeSecondsKey; +static const OSSymbol * gIOPMSettingMaintenanceWakeCalendarKey; + +//****************************************************************************** +// start +// +//****************************************************************************** + +#define kRootDomainSettingsCount 16 -bool IOPMrootDomain::start ( IOService * nub ) +bool IOPMrootDomain::start( IOService * nub ) { OSIterator *psIterator; OSDictionary *tmpDict; + super::start(nub); + + gRootDomain = this; gIOPMSettingAutoWakeSecondsKey = OSSymbol::withCString(kIOPMSettingAutoWakeSecondsKey); + gIOPMSettingMaintenanceWakeCalendarKey = + OSSymbol::withCString(kIOPMSettingMaintenanceWakeCalendarKey); + + gIOPMStatsApplicationResponseTimedOut = OSSymbol::withCString(kIOPMStatsResponseTimedOut); + gIOPMStatsApplicationResponseCancel = OSSymbol::withCString(kIOPMStatsResponseCancel); + gIOPMStatsApplicationResponseSlow = OSSymbol::withCString(kIOPMStatsResponseSlow); + + sleepSupportedPEFunction = OSSymbol::withCString("IOPMSetSleepSupported"); const OSSymbol *settingsArr[kRootDomainSettingsCount] = { @@ -367,36 +657,57 @@ bool IOPMrootDomain::start ( IOService * nub ) OSSymbol::withCString(kIOPMSettingGraphicsSwitchKey), OSSymbol::withCString(kIOPMStateConsoleShutdown) }; - - - pmPowerStateQueue = 0; - _reserved = (ExpansionData *)IOMalloc(sizeof(ExpansionData)); - if(!_reserved) return false; + queue_init(&aggressivesQueue); + aggressivesThreadCall = thread_call_allocate(handleAggressivesFunction, this); + aggressivesData = OSData::withCapacity( + sizeof(AggressivesRecord) * (kPMLastAggressivenessType + 4)); - super::start(nub); + featuresDictLock = IOLockAlloc(); + settingsCtrlLock = IORecursiveLockAlloc(); + setPMRootDomain(this); + + extraSleepTimer = thread_call_allocate( + idleSleepTimerExpired, + (thread_call_param_t) this); - gRootDomain = this; + clamshellWakeupIgnore = thread_call_allocate( + wakeupClamshellTimerExpired, + (thread_call_param_t) this); - PMinit(); + diskSyncCalloutEntry = thread_call_allocate( + &disk_sync_callout, + (thread_call_param_t) this); - sleepSupportedPEFunction = OSSymbol::withCString("IOPMSetSleepSupported"); canSleep = true; - setProperty(kIOSleepSupportedKey,true); + setProperty(kIOSleepSupportedKey, true); + + bzero(&pmStats, sizeof(pmStats)); + + pmTracer = PMTraceWorker::tracer(this); + updateRunState(kRStateNormal); userDisabledAllSleep = false; allowSleep = true; sleepIsSupported = true; systemBooting = true; sleepSlider = 0; - idleSleepPending = false; + idleSleepTimerPending = false; wrangler = NULL; sleepASAP = false; clamshellIsClosed = false; clamshellExists = false; ignoringClamshell = true; - ignoringClamshellDuringWakeup = false; - acAdaptorConnect = true; + ignoringClamshellOnWake = false; + acAdaptorConnected = true; + + queuedSleepWakeUUIDString = NULL; + pmStatsAppResponses = OSArray::withCapacity(5); + _statsNameKey = OSSymbol::withCString(kIOPMStatsNameKey); + _statsPIDKey = OSSymbol::withCString(kIOPMStatsPIDKey); + _statsTimeMSKey = OSSymbol::withCString(kIOPMStatsTimeMSKey); + _statsResponseTypeKey = OSSymbol::withCString(kIOPMStatsApplicationResponseTypeKey); + _statsMessageTypeKey = OSSymbol::withCString(kIOPMStatsMessageTypeKey); idxPMCPUClamshell = kCPUUnknownIndex; idxPMCPULimitedPower = kCPUUnknownIndex; @@ -415,51 +726,55 @@ bool IOPMrootDomain::start ( IOService * nub ) 0); fPMSettingsDict = OSDictionary::withCapacity(5); - - pmPowerStateQueue = IOPMPowerStateQueue::PMPowerStateQueue(this); + + PMinit(); // creates gIOPMWorkLoop + + // Create IOPMPowerStateQueue used to queue external power + // events, and to handle those events on the PM work loop. + pmPowerStateQueue = IOPMPowerStateQueue::PMPowerStateQueue( + this, OSMemberFunctionCast(IOEventSource::Action, this, + &IOPMrootDomain::dispatchPowerEvent)); getPMworkloop()->addEventSource(pmPowerStateQueue); - - featuresDictLock = IOLockAlloc(); - settingsCtrlLock = IORecursiveLockAlloc(); - - extraSleepTimer = thread_call_allocate( - (thread_call_func_t)sleepTimerExpired, - (thread_call_param_t) this); - clamshellWakeupIgnore = thread_call_allocate( - (thread_call_func_t)wakeupClamshellTimerExpired, - (thread_call_param_t) this); - diskSyncCalloutEntry = thread_call_allocate( - &disk_sync_callout, - (thread_call_param_t) this); +#ifdef CHECK_THREAD_CONTEXT + gIOPMWorkLoop = getPMworkloop(); +#endif - // create our parent + // create our power parent patriarch = new IORootParent; patriarch->init(); patriarch->attach(this); patriarch->start(this); patriarch->addPowerChild(this); - - registerPowerDriver(this,ourPowerStates,number_of_power_states); - setPMRootDomain(this); + registerPowerDriver(this, ourPowerStates, NUM_POWER_STATES); + // set a clamp until we sleep changePowerStateToPriv(ON_STATE); // install power change handler - registerPrioritySleepWakeInterest( &sysPowerDownHandler, this, 0); + gSysPowerDownNotifier = registerPrioritySleepWakeInterest( &sysPowerDownHandler, this, 0); #if !NO_KERNEL_HID // Register for a notification when IODisplayWrangler is published - _displayWranglerNotifier = addNotification( - gIOPublishNotification, serviceMatching("IODisplayWrangler"), - &displayWranglerPublished, this, 0); + if ((tmpDict = serviceMatching("IODisplayWrangler"))) + { + _displayWranglerNotifier = addMatchingNotification( + gIOPublishNotification, tmpDict, + (IOServiceMatchingNotificationHandler) &displayWranglerPublished, + this, 0); + tmpDict->release(); + } #endif // Battery location published - ApplePMU support only - _batteryPublishNotifier = addNotification( - gIOPublishNotification, serviceMatching("IOPMPowerSource"), - &batteryPublished, this, this); - + if ((tmpDict = serviceMatching("IOPMPowerSource"))) + { + _batteryPublishNotifier = addMatchingNotification( + gIOPublishNotification, tmpDict, + (IOServiceMatchingNotificationHandler) &batteryPublished, + this, this); + tmpDict->release(); + } const OSSymbol *ucClassName = OSSymbol::withCStringNoCopy("RootDomainUserClient"); setProperty(gIOUserClientClassKey, (OSObject *) ucClassName); @@ -479,9 +794,13 @@ bool IOPMrootDomain::start ( IOService * nub ) psIterator->release(); } - sysctl_register_oid(&sysctl__kern_sleeptime); sysctl_register_oid(&sysctl__kern_waketime); + sysctl_register_oid(&sysctl__kern_willshutdown); +#if !CONFIG_EMBEDDED + sysctl_register_oid(&sysctl__kern_progressmeterenable); + sysctl_register_oid(&sysctl__kern_progressmeter); +#endif /* !CONFIG_EMBEDDED */ #if HIBERNATION IOHibernateSystemInit(this); @@ -492,22 +811,24 @@ bool IOPMrootDomain::start ( IOService * nub ) return true; } -// ********************************************************************************** + +//****************************************************************************** // setProperties // // Receive a setProperty call // The "System Boot" property means the system is completely booted. -// ********************************************************************************** -IOReturn IOPMrootDomain::setProperties ( OSObject *props_obj) -{ - IOReturn return_value = kIOReturnSuccess; - OSDictionary *dict = OSDynamicCast(OSDictionary, props_obj); - OSBoolean *b; - OSNumber *n; - OSString *str; - OSSymbol *type; - OSObject *obj; - unsigned int i; +//****************************************************************************** + +IOReturn IOPMrootDomain::setProperties( OSObject * props_obj ) +{ + IOReturn return_value = kIOReturnSuccess; + OSDictionary *dict = OSDynamicCast(OSDictionary, props_obj); + OSBoolean *b; + OSNumber *n; + OSString *str; + OSSymbol *type; + OSObject *obj; + unsigned int i; const OSSymbol *boot_complete_string = OSSymbol::withCString("System Boot Complete"); @@ -531,7 +852,11 @@ IOReturn IOPMrootDomain::setProperties ( OSObject *props_obj) #endif const OSSymbol *sleepdisabled_string = OSSymbol::withCString("SleepDisabled"); - + const OSSymbol *ondeck_sleepwake_uuid_string = + OSSymbol::withCString(kIOPMSleepWakeUUIDKey); + const OSSymbol *loginwindow_tracepoint_string = + OSSymbol::withCString(kIOPMLoginWindowSecurityDebugKey); + if(!dict) { return_value = kIOReturnBadArgument; @@ -541,24 +866,14 @@ IOReturn IOPMrootDomain::setProperties ( OSObject *props_obj) if ((n = OSDynamicCast(OSNumber, dict->getObject(idle_seconds_string)))) { setProperty(idle_seconds_string, n); - idleSeconds = n->unsigned32BitValue(); + idleSeconds = n->unsigned32BitValue(); } - if( systemBooting - && boot_complete_string - && dict->getObject(boot_complete_string)) + if (boot_complete_string && dict->getObject(boot_complete_string)) { - systemBooting = false; - adjustPowerState(); - - // If lid is closed, re-send lid closed notification - // now that booting is complete. - if( clamshellIsClosed ) - { - this->receivePowerNotification(kLocalEvalClamshellCommand); - } + pmPowerStateQueue->submitPowerEvent( kPowerEventSystemBootCompleted ); } - + if( battery_warning_disabled_string && dict->getObject(battery_warning_disabled_string)) { @@ -569,28 +884,7 @@ IOReturn IOPMrootDomain::setProperties ( OSObject *props_obj) if( sys_shutdown_string && (b = OSDynamicCast(OSBoolean, dict->getObject(sys_shutdown_string)))) { - - if(kOSBooleanTrue == b) - { - /* We set systemShutdown = true during shutdown - to prevent sleep at unexpected times while loginwindow is trying - to shutdown apps and while the OS is trying to transition to - complete power of. - - Set to true during shutdown, as soon as loginwindow shows - the "shutdown countdown dialog", through individual app - termination, and through black screen kernel shutdown. - */ - kprintf("systemShutdown true\n"); - systemShutdown = true; - } else { - /* - A shutdown was initiated, but then the shutdown - was cancelled, clearing systemShutdown to false here. - */ - kprintf("systemShutdown false\n"); - systemShutdown = false; - } + pmPowerStateQueue->submitPowerEvent(kPowerEventSystemShutdown, (void *) b); } if( stall_halt_string @@ -626,8 +920,37 @@ IOReturn IOPMrootDomain::setProperties ( OSObject *props_obj) && (b = OSDynamicCast(OSBoolean, dict->getObject(sleepdisabled_string))) ) { setProperty(sleepdisabled_string, b); - - userDisabledAllSleep = (kOSBooleanTrue == b); + pmPowerStateQueue->submitPowerEvent(kPowerEventUserDisabledSleep, (void *) b); + } + + if (ondeck_sleepwake_uuid_string + && (obj = dict->getObject(ondeck_sleepwake_uuid_string))) + { + // Clear the currently published UUID + if (kOSBooleanFalse == obj) + { + publishSleepWakeUUID(NULL); + } + + // Cache UUID for an upcoming sleep/wake + if ((str = OSDynamicCast(OSString, obj))) + { + if (queuedSleepWakeUUIDString) { + queuedSleepWakeUUIDString->release(); + queuedSleepWakeUUIDString = NULL; + } + queuedSleepWakeUUIDString = str; + queuedSleepWakeUUIDString->retain(); + DLOG("SleepWake UUID queued: %s\n", + queuedSleepWakeUUIDString->getCStringNoCopy()); + } + } + + if (loginwindow_tracepoint_string + && (n = OSDynamicCast(OSNumber, dict->getObject(loginwindow_tracepoint_string))) + && pmTracer) + { + pmTracer->traceLoginWindowPhase( n->unsigned8BitValue() ); } // Relay our allowed PM settings onto our registered PM clients @@ -664,210 +987,741 @@ IOReturn IOPMrootDomain::setProperties ( OSObject *props_obj) } exit: - if(sleepdisabled_string) sleepdisabled_string->release(); if(boot_complete_string) boot_complete_string->release(); + if(sys_shutdown_string) sys_shutdown_string->release(); if(stall_halt_string) stall_halt_string->release(); + if (battery_warning_disabled_string) battery_warning_disabled_string->release(); if(idle_seconds_string) idle_seconds_string->release(); + if(sleepdisabled_string) sleepdisabled_string->release(); + if(ondeck_sleepwake_uuid_string) ondeck_sleepwake_uuid_string->release(); +#if HIBERNATION + if(hibernatemode_string) hibernatemode_string->release(); + if(hibernatefile_string) hibernatefile_string->release(); + if(hibernatefreeratio_string) hibernatefreeratio_string->release(); + if(hibernatefreetime_string) hibernatefreetime_string->release(); +#endif return return_value; } -//********************************************************************************* -// youAreRoot +//****************************************************************************** +// aggressivenessChanged // -// Power Managment is informing us that we are the root power domain. -// We know we are not the root however, since we have just instantiated a parent -// for ourselves and made it the root. We override this method so it will have -// no effect -//********************************************************************************* -IOReturn IOPMrootDomain::youAreRoot ( void ) -{ - return IOPMNoErr; -} +// We are behind the command gate to examine changes to aggressives. +//****************************************************************************** -// ********************************************************************************** -// command_received -// -// No longer used -// ********************************************************************************** -void IOPMrootDomain::command_received ( void * w, void * x, void * y, void * z ) +void IOPMrootDomain::aggressivenessChanged( void ) { - super::command_received(w,x,y,z); -} + unsigned long minutesToSleep = 0; + unsigned long minutesToDisplayDim = 0; + ASSERT_GATED(); -// ********************************************************************************** -// broadcast_aggressiveness -// -// ********************************************************************************** -IOReturn broadcast_aggressiveness ( OSObject * root, void * x, void * y, void *, void * ) -{ - ((IOPMrootDomain *)root)->broadcast_it((unsigned long)x,(unsigned long)y); - return IOPMNoErr; -} + // Fetch latest display and system sleep slider values. + getAggressiveness(kPMMinutesToSleep, &minutesToSleep); + getAggressiveness(kPMMinutesToDim, &minutesToDisplayDim); + DLOG("aggressiveness changed system %u, display %u\n", + (uint32_t) minutesToSleep, (uint32_t) minutesToDisplayDim); + DLOG("idle time -> %ld secs (ena %d)\n", + idleSeconds, (minutesToSleep != 0)); -// ********************************************************************************** -// broadcast_it -// -// We are behind the command gate to broadcast an aggressiveness factor. We let the -// superclass do it, but we need to snoop on factors that affect idle sleep. -// ********************************************************************************** -void IOPMrootDomain::broadcast_it (unsigned long type, unsigned long value) -{ - super::setAggressiveness(type,value); + if (0x7fffffff == minutesToSleep) + minutesToSleep = idleSeconds; - // Save user's spin down timer to restore after we replace it for idle sleep - if( type == kPMMinutesToSpinDown ) user_spindown = value; + // How long to wait before sleeping the system once the displays turns + // off is indicated by 'extraSleepDelay'. - // Use longestNonSleepSlider to calculate dimming adjust idle sleep timer - if (getAggressiveness(kPMMinutesToDim, (unsigned long *)&longestNonSleepSlider) - != kIOReturnSuccess) - longestNonSleepSlider = 0; + if ( minutesToSleep > minutesToDisplayDim ) { + extraSleepDelay = minutesToSleep - minutesToDisplayDim; + } + else { + extraSleepDelay = 0; + } - if ( type == kPMMinutesToSleep ) { - DEBUG_LOG("PM idle time -> %ld secs (ena %d)\n", idleSeconds, (value != 0)); - if (0x7fffffff == value) - value = idleSeconds; + // system sleep timer was disabled, but not anymore. + if ( (sleepSlider == 0) && (minutesToSleep != 0) ) { + if (!wrangler) + { + sleepASAP = false; + changePowerStateToPriv(ON_STATE); + if (idleSeconds) + { + startIdleSleepTimer( idleSeconds ); + } + } + else + { + // Start idle sleep timer if wrangler went to sleep + // while system sleep was disabled. - if ( (sleepSlider == 0) && (value != 0) ) { - if (!wrangler) + sleepASAP = false; + if (wranglerAsleep) { - sleepASAP = false; - changePowerStateToPriv(ON_STATE); - if (idleSeconds) + AbsoluteTime now; + uint64_t nanos; + uint32_t minutesSinceDisplaySleep = 0; + uint32_t sleepDelay; + + clock_get_uptime(&now); + if (CMP_ABSOLUTETIME(&now, &wranglerSleepTime) > 0) { - AbsoluteTime deadline; - // stay awake for at least idleSeconds - clock_interval_to_deadline(idleSeconds, kSecondScale, &deadline); - thread_call_enter_delayed(extraSleepTimer, deadline); - // this gets turned off when we sleep again - idleSleepPending = true; + SUB_ABSOLUTETIME(&now, &wranglerSleepTime); + absolutetime_to_nanoseconds(now, &nanos); + minutesSinceDisplaySleep = nanos / (60000000000ULL); } - } - else - { - // If sleepASAP is already set, then calling adjustPowerState() here - // will put the system to sleep immediately which is bad. Note that - // this aggressiveness change can occur without waking up the display - // by (dis)connecting the AC adapter. To get around this, the power - // clamp is restore to ON state then dropped after waiting for the - // sleep timer to expire. - - if (sleepASAP) + + if (extraSleepDelay > minutesSinceDisplaySleep) + { + sleepDelay = extraSleepDelay - minutesSinceDisplaySleep; + } + else { - AbsoluteTime deadline; - // stay awake for at least sleepSlider minutes - clock_interval_to_deadline(value * 60, kSecondScale, &deadline); - thread_call_enter_delayed(extraSleepTimer, deadline); - // this gets turned off when we sleep again - idleSleepPending = true; - sleepASAP = false; + // 1 min idle sleep. + sleepDelay = 1; } + + startIdleSleepTimer(sleepDelay * 60); + DLOG("display slept %u min, set idle timer to %u min\n", + minutesSinceDisplaySleep, sleepDelay); } } - sleepSlider = value; - if ( sleepSlider == 0 ) { - // idle sleep is now disabled - adjustPowerState(); - // make sure we're powered - patriarch->wakeSystem(); - } } - if ( sleepSlider > longestNonSleepSlider ) { - extraSleepDelay = sleepSlider - longestNonSleepSlider ; - } - else { - extraSleepDelay = 0; + + sleepSlider = minutesToSleep; + if ( sleepSlider == 0 ) { + cancelIdleSleepTimer(); + // idle sleep is now disabled + adjustPowerState(); + // make sure we're powered + patriarch->wakeSystem(); } } -// ********************************************************************************** -// sleepTimerExpired +//****************************************************************************** +// setAggressiveness // -// ********************************************************************************** -static void sleepTimerExpired ( thread_call_param_t us) -{ - ((IOPMrootDomain *)us)->handleSleepTimerExpiration(); - } - - -static void wakeupClamshellTimerExpired ( thread_call_param_t us) +// Override IOService::setAggressiveness() +//****************************************************************************** + +IOReturn IOPMrootDomain::setAggressiveness( + unsigned long type, + unsigned long value ) { - ((IOPMrootDomain *)us)->stopIgnoringClamshellEventsDuringWakeup(); + return setAggressiveness( type, value, 0 ); } - -// ********************************************************************************** -// handleSleepTimerExpiration -// -// The time between the sleep idle timeout and the next longest one has elapsed. -// It's time to sleep. Start that by removing the clamp that's holding us awake. -// ********************************************************************************** -void IOPMrootDomain::handleSleepTimerExpiration ( void ) +/* + * Private setAggressiveness() with an internal options argument. + */ +IOReturn IOPMrootDomain::setAggressiveness( + unsigned long type, + unsigned long value, + IOOptionBits options ) { - DEBUG_LOG("SleepTimerExpired\n"); + AggressivesRequest * entry; + AggressivesRequest * request; + bool found = false; - AbsoluteTime time; + DLOG("setAggressiveness 0x%x = %u, options 0x%x\n", + (uint32_t) type, (uint32_t) value, (uint32_t) options); - clock_get_uptime(&time); - if ((AbsoluteTime_to_scalar(&time) > autoWakeStart) && (AbsoluteTime_to_scalar(&time) < autoWakeEnd)) + request = IONew(AggressivesRequest, 1); + if (!request) + return kIOReturnNoMemory; + + memset(request, 0, sizeof(*request)); + request->options = options; + request->dataType = kAggressivesRequestTypeRecord; + request->data.record.type = (uint32_t) type; + request->data.record.value = (uint32_t) value; + + AGGRESSIVES_LOCK(); + + // Update disk quick spindown flag used by getAggressiveness(). + // Never merge requests with quick spindown flags set. + + if (options & kAggressivesOptionQuickSpindownEnable) + gAggressivesState |= kAggressivesStateQuickSpindown; + else if (options & kAggressivesOptionQuickSpindownDisable) + gAggressivesState &= ~kAggressivesStateQuickSpindown; + else { - thread_call_enter_delayed(extraSleepTimer, *((AbsoluteTime *) &autoWakeEnd)); - return; + // Coalesce requests with identical aggressives types. + // Deal with callers that calls us too "aggressively". + + queue_iterate(&aggressivesQueue, entry, AggressivesRequest *, chain) + { + if ((entry->dataType == kAggressivesRequestTypeRecord) && + (entry->data.record.type == type) && + ((entry->options & kAggressivesOptionQuickSpindownMask) == 0)) + { + entry->data.record.value = value; + found = true; + break; + } + } } - // accelerate disk spin down if spin down timer is non-zero (zero = never spin down) - if(0 != user_spindown) - setQuickSpinDownTimeout(); + if (!found) + { + queue_enter(&aggressivesQueue, request, AggressivesRequest *, chain); + } - sleepASAP = true; - adjustPowerState(); + AGGRESSIVES_UNLOCK(); + + if (found) + IODelete(request, AggressivesRequest, 1); + + if (options & kAggressivesOptionSynchronous) + handleAggressivesRequests(); // not truly synchronous + else + thread_call_enter(aggressivesThreadCall); + + return kIOReturnSuccess; } -void IOPMrootDomain::stopIgnoringClamshellEventsDuringWakeup(void) +//****************************************************************************** +// getAggressiveness +// +// Override IOService::setAggressiveness() +// Fetch the aggressiveness factor with the given type. +//****************************************************************************** + +IOReturn IOPMrootDomain::getAggressiveness ( + unsigned long type, + unsigned long * outLevel ) { - // Allow clamshell-induced sleep now - ignoringClamshellDuringWakeup = false; + uint32_t value = 0; + int source = 0; - // Re-send clamshell event, in case it causes a sleep - if(clamshellIsClosed) - this->receivePowerNotification( kLocalEvalClamshellCommand ); + if (!outLevel) + return kIOReturnBadArgument; + + AGGRESSIVES_LOCK(); + + // Disk quick spindown in effect, report value = 1 + + if ((gAggressivesState & kAggressivesStateQuickSpindown) && + (type == kPMMinutesToSpinDown)) + { + value = kAggressivesMinValue; + source = 1; + } + + // Consult the pending request queue. + + if (!source) + { + AggressivesRequest * entry; + + queue_iterate(&aggressivesQueue, entry, AggressivesRequest *, chain) + { + if ((entry->dataType == kAggressivesRequestTypeRecord) && + (entry->data.record.type == type) && + ((entry->options & kAggressivesOptionQuickSpindownMask) == 0)) + { + value = entry->data.record.value; + source = 2; + break; + } + } + } + + // Consult the backend records. + + if (!source && aggressivesData) + { + AggressivesRecord * record; + int i, count; + + count = aggressivesData->getLength() / sizeof(AggressivesRecord); + record = (AggressivesRecord *) aggressivesData->getBytesNoCopy(); + + for (i = 0; i < count; i++, record++) + { + if (record->type == type) + { + value = record->value; + source = 3; + break; + } + } + } + + AGGRESSIVES_UNLOCK(); + + if (source) + { + DLOG("getAggressiveness 0x%x = %u, source %d\n", + (uint32_t) type, value, source); + *outLevel = (unsigned long) value; + return kIOReturnSuccess; + } + else + { + DLOG("getAggressiveness type 0x%x not found\n", (uint32_t) type); + *outLevel = 0; // default return = 0, driver may not check for error + return kIOReturnInvalid; + } } -//********************************************************************************* -// setAggressiveness + +//****************************************************************************** +// joinAggressiveness // -// Some aggressiveness factor has changed. We broadcast it to the hierarchy while on -// the Power Mangement workloop thread. This enables objects in the -// hierarchy to successfully alter their idle timers, which are all on the -// same thread. -//********************************************************************************* +// Request from IOService to join future aggressiveness broadcasts. +//****************************************************************************** -IOReturn IOPMrootDomain::setAggressiveness ( unsigned long type, unsigned long newLevel ) +IOReturn IOPMrootDomain::joinAggressiveness( + IOService * service ) { - IOWorkLoop * pmWorkLoop = getPMworkloop(); - if (pmWorkLoop) - pmWorkLoop->runAction(broadcast_aggressiveness,this,(void *)type,(void *)newLevel); + AggressivesRequest * request; + + if (!service || (service == this)) + return kIOReturnBadArgument; + + DLOG("joinAggressiveness %s (%p)\n", service->getName(), service); + + request = IONew(AggressivesRequest, 1); + if (!request) + return kIOReturnNoMemory; + + service->retain(); // released by synchronizeAggressives() + + memset(request, 0, sizeof(*request)); + request->dataType = kAggressivesRequestTypeService; + request->data.service = service; + + AGGRESSIVES_LOCK(); + queue_enter(&aggressivesQueue, request, AggressivesRequest *, chain); + AGGRESSIVES_UNLOCK(); + + thread_call_enter(aggressivesThreadCall); return kIOReturnSuccess; } -// ********************************************************************************** +//****************************************************************************** +// handleAggressivesRequests +// +// Backend thread processes all incoming aggressiveness requests in the queue. +//****************************************************************************** + +static void +handleAggressivesFunction( + thread_call_param_t param1, + thread_call_param_t param2 ) +{ + if (param1) + { + ((IOPMrootDomain *) param1)->handleAggressivesRequests(); + } +} + +void IOPMrootDomain::handleAggressivesRequests( void ) +{ + AggressivesRecord * start; + AggressivesRecord * record; + AggressivesRequest * request; + queue_head_t joinedQueue; + int i, count; + bool broadcast; + bool found; + bool pingSelf = false; + + AGGRESSIVES_LOCK(); + + if ((gAggressivesState & kAggressivesStateBusy) || !aggressivesData || + queue_empty(&aggressivesQueue)) + goto unlock_done; + + gAggressivesState |= kAggressivesStateBusy; + count = aggressivesData->getLength() / sizeof(AggressivesRecord); + start = (AggressivesRecord *) aggressivesData->getBytesNoCopy(); + + do + { + broadcast = false; + queue_init(&joinedQueue); + + do + { + // Remove request from the incoming queue in FIFO order. + queue_remove_first(&aggressivesQueue, request, AggressivesRequest *, chain); + switch (request->dataType) + { + case kAggressivesRequestTypeRecord: + // Update existing record if found. + found = false; + for (i = 0, record = start; i < count; i++, record++) + { + if (record->type == request->data.record.type) + { + found = true; + + if (request->options & kAggressivesOptionQuickSpindownEnable) + { + if ((record->flags & kAggressivesRecordFlagMinValue) == 0) + { + broadcast = true; + record->flags |= (kAggressivesRecordFlagMinValue | + kAggressivesRecordFlagModified); + DLOG("quick spindown accelerated, was %u min\n", + record->value); + } + } + else if (request->options & kAggressivesOptionQuickSpindownDisable) + { + if (record->flags & kAggressivesRecordFlagMinValue) + { + broadcast = true; + record->flags |= kAggressivesRecordFlagModified; + record->flags &= ~kAggressivesRecordFlagMinValue; + DLOG("disk spindown restored to %u min\n", + record->value); + } + } + else if (record->value != request->data.record.value) + { + record->value = request->data.record.value; + if ((record->flags & kAggressivesRecordFlagMinValue) == 0) + { + broadcast = true; + record->flags |= kAggressivesRecordFlagModified; + } + } + break; + } + } + + // No matching record, append a new record. + if (!found && + ((request->options & kAggressivesOptionQuickSpindownDisable) == 0)) + { + AggressivesRecord newRecord; + + newRecord.flags = kAggressivesRecordFlagModified; + newRecord.type = request->data.record.type; + newRecord.value = request->data.record.value; + if (request->options & kAggressivesOptionQuickSpindownEnable) + { + newRecord.flags |= kAggressivesRecordFlagMinValue; + DLOG("disk spindown accelerated\n"); + } + + aggressivesData->appendBytes(&newRecord, sizeof(newRecord)); + + // OSData may have switched to another (larger) buffer. + count = aggressivesData->getLength() / sizeof(AggressivesRecord); + start = (AggressivesRecord *) aggressivesData->getBytesNoCopy(); + broadcast = true; + } + + // Finished processing the request, release it. + IODelete(request, AggressivesRequest, 1); + break; + + case kAggressivesRequestTypeService: + // synchronizeAggressives() will free request. + queue_enter(&joinedQueue, request, AggressivesRequest *, chain); + break; + + default: + panic("bad aggressives request type %x\n", request->dataType); + break; + } + } while (!queue_empty(&aggressivesQueue)); + + // Release the lock to perform work, with busy flag set. + if (!queue_empty(&joinedQueue) || broadcast) + { + AGGRESSIVES_UNLOCK(); + if (!queue_empty(&joinedQueue)) + synchronizeAggressives(&joinedQueue, start, count); + if (broadcast) + broadcastAggressives(start, count); + AGGRESSIVES_LOCK(); + } + + // Remove the modified flag from all records. + for (i = 0, record = start; i < count; i++, record++) + { + if ((record->flags & kAggressivesRecordFlagModified) && + ((record->type == kPMMinutesToDim) || + (record->type == kPMMinutesToSleep))) + pingSelf = true; + + record->flags &= ~kAggressivesRecordFlagModified; + } + + // Check the incoming queue again since new entries may have been + // added while lock was released above. + + } while (!queue_empty(&aggressivesQueue)); + + gAggressivesState &= ~kAggressivesStateBusy; + +unlock_done: + AGGRESSIVES_UNLOCK(); + + // Root domain is interested in system and display sleep slider changes. + // Submit a power event to handle those changes on the PM work loop. + + if (pingSelf && pmPowerStateQueue) { + pmPowerStateQueue->submitPowerEvent( kPowerEventAggressivenessChanged ); + } +} + + +//****************************************************************************** +// synchronizeAggressives +// +// Push all known aggressiveness records to one or more IOService. +//****************************************************************************** + +void IOPMrootDomain::synchronizeAggressives( + queue_head_t * joinedQueue, + const AggressivesRecord * array, + int count ) +{ + IOService * service; + AggressivesRequest * request; + const AggressivesRecord * record; + uint32_t value; + int i; + + while (!queue_empty(joinedQueue)) + { + queue_remove_first(joinedQueue, request, AggressivesRequest *, chain); + if (request->dataType == kAggressivesRequestTypeService) + service = request->data.service; + else + service = 0; + + IODelete(request, AggressivesRequest, 1); + request = 0; + + if (service) + { + if (service->assertPMThreadCall()) + { + for (i = 0, record = array; i < count; i++, record++) + { + value = record->value; + if (record->flags & kAggressivesRecordFlagMinValue) + value = kAggressivesMinValue; + + DLOG("synchronizeAggressives 0x%x = %u to %s\n", + record->type, value, service->getName()); + service->setAggressiveness(record->type, value); + } + service->deassertPMThreadCall(); + } + service->release(); // retained by joinAggressiveness() + } + } +} + + +//****************************************************************************** +// broadcastAggressives +// +// Traverse PM tree and call setAggressiveness() for records that have changed. +//****************************************************************************** + +void IOPMrootDomain::broadcastAggressives( + const AggressivesRecord * array, + int count ) +{ + IORegistryIterator * iter; + IORegistryEntry * entry; + IOPowerConnection * connect; + IOService * service; + const AggressivesRecord * record; + uint32_t value; + int i; + + iter = IORegistryIterator::iterateOver( + this, gIOPowerPlane, kIORegistryIterateRecursively); + if (iter) + { + do + { + iter->reset(); + while ((entry = iter->getNextObject())) + { + connect = OSDynamicCast(IOPowerConnection, entry); + if (!connect || !connect->getReadyFlag()) + continue; + + if ((service = (IOService *) connect->copyChildEntry(gIOPowerPlane))) + { + if (service->assertPMThreadCall()) + { + for (i = 0, record = array; i < count; i++, record++) + { + if (record->flags & kAggressivesRecordFlagModified) + { + value = record->value; + if (record->flags & kAggressivesRecordFlagMinValue) + value = kAggressivesMinValue; + DLOG("broadcastAggressives %x = %u to %s\n", + record->type, value, service->getName()); + service->setAggressiveness(record->type, value); + } + } + service->deassertPMThreadCall(); + } + service->release(); + } + } + } + while (!entry && !iter->isValid()); + iter->release(); + } +} + + +//****************************************************************************** +// startIdleSleepTimer +// +//****************************************************************************** + +void IOPMrootDomain::startIdleSleepTimer( uint32_t inSeconds ) +{ + AbsoluteTime deadline; + + ASSERT_GATED(); + if (inSeconds) + { + clock_interval_to_deadline(inSeconds, kSecondScale, &deadline); + thread_call_enter_delayed(extraSleepTimer, deadline); + idleSleepTimerPending = true; + DLOG("idle timer set for %u seconds\n", inSeconds); + } +} + + +//****************************************************************************** +// cancelIdleSleepTimer +// +//****************************************************************************** + +void IOPMrootDomain::cancelIdleSleepTimer( void ) +{ + ASSERT_GATED(); + if (idleSleepTimerPending) + { + DLOG("idle timer cancelled\n"); + thread_call_cancel(extraSleepTimer); + idleSleepTimerPending = false; + } +} + + +//****************************************************************************** +// idleSleepTimerExpired +// +//****************************************************************************** + +static void idleSleepTimerExpired( + thread_call_param_t us, thread_call_param_t ) +{ + ((IOPMrootDomain *)us)->handleSleepTimerExpiration(); +} + +static void wakeupClamshellTimerExpired( + thread_call_param_t us, thread_call_param_t ) +{ + ((IOPMrootDomain *)us)->stopIgnoringClamshellEventsDuringWakeup(); +} + + +//****************************************************************************** +// handleSleepTimerExpiration +// +// The time between the sleep idle timeout and the next longest one has elapsed. +// It's time to sleep. Start that by removing the clamp that's holding us awake. +//****************************************************************************** + +void IOPMrootDomain::handleSleepTimerExpiration( void ) +{ + if (!getPMworkloop()->inGate()) + { + getPMworkloop()->runAction( + OSMemberFunctionCast(IOWorkLoop::Action, this, + &IOPMrootDomain::handleSleepTimerExpiration), + this); + return; + } + + AbsoluteTime time; + + DLOG("sleep timer expired\n"); + ASSERT_GATED(); + + idleSleepTimerPending = false; + + clock_get_uptime(&time); + if ((AbsoluteTime_to_scalar(&time) > autoWakeStart) && + (AbsoluteTime_to_scalar(&time) < autoWakeEnd)) + { + thread_call_enter_delayed(extraSleepTimer, *((AbsoluteTime *) &autoWakeEnd)); + return; + } + + // accelerate disk spin down if spin down timer is non-zero + setQuickSpinDownTimeout(); + + sleepASAP = true; + adjustPowerState(); +} + + +//****************************************************************************** +// stopIgnoringClamshellEventsDuringWakeup +// +//****************************************************************************** + +void IOPMrootDomain::stopIgnoringClamshellEventsDuringWakeup( void ) +{ + if (!getPMworkloop()->inGate()) + { + getPMworkloop()->runAction( + OSMemberFunctionCast(IOWorkLoop::Action, this, + &IOPMrootDomain::stopIgnoringClamshellEventsDuringWakeup), + this); + return; + } + + ASSERT_GATED(); + + // Allow clamshell-induced sleep now + ignoringClamshellOnWake = false; + + // Re-send clamshell event, in case it causes a sleep + if (clamshellIsClosed) + handlePowerNotification( kLocalEvalClamshellCommand ); +} + + +//****************************************************************************** // sleepSystem // -// ********************************************************************************** +//****************************************************************************** + /* public */ -IOReturn IOPMrootDomain::sleepSystem ( void ) +IOReturn IOPMrootDomain::sleepSystem( void ) { - return sleepSystemOptions (NULL); + return sleepSystemOptions(NULL); } /* private */ -IOReturn IOPMrootDomain::sleepSystemOptions ( OSDictionary *options ) +IOReturn IOPMrootDomain::sleepSystemOptions( OSDictionary *options ) { /* sleepSystem is a public function, and may be called by any kernel driver. * And that's bad - drivers should sleep the system by calling @@ -891,127 +1745,124 @@ IOReturn IOPMrootDomain::sleepSystemOptions ( OSDictionary *options ) } /* private */ -IOReturn IOPMrootDomain::privateSleepSystem ( const char *sleepReason ) +IOReturn IOPMrootDomain::privateSleepSystem( const char *sleepReason ) { - // Record sleep cause in IORegistry - if (sleepReason) { - setProperty(kRootDomainSleepReasonKey, sleepReason); - } - - if(systemShutdown) { - kprintf("Preventing system sleep on grounds of systemShutdown.\n"); - } - - if( userDisabledAllSleep ) + if ( userDisabledAllSleep ) { + LOG("Sleep prevented by user disable\n"); + /* Prevent sleep of all kinds if directed to by user space */ return kIOReturnNotPermitted; } - if ( !systemBooting - && !systemShutdown - && allowSleep) + if ( systemBooting || systemShutdown || !allowSleep ) { - if ( !sleepIsSupported ) { - setSleepSupported( kPCICantSleep ); - kprintf("Sleep prevented by kIOPMPreventSystemSleep flag\n"); - } - patriarch->sleepSystem(); - return kIOReturnSuccess; - } else { - // Unable to sleep because system is in the process of booting or shutting down, - // or sleep has otherwise been disallowed. + LOG("Sleep prevented by SB %d, SS %d, AS %d\n", + systemBooting, systemShutdown, allowSleep); + + // Unable to sleep because system is in the process of booting or + // shutting down, or sleep has otherwise been disallowed. return kIOReturnError; } + + // Record sleep cause in IORegistry + if (sleepReason) { + setProperty(kRootDomainSleepReasonKey, sleepReason); + } + + tracePoint(kIOPMTracePointSleepStarted); + + patriarch->sleepSystem(); + return kIOReturnSuccess; } -// ********************************************************************************** +//****************************************************************************** // shutdownSystem // -// ********************************************************************************** -IOReturn IOPMrootDomain::shutdownSystem ( void ) +//****************************************************************************** + +IOReturn IOPMrootDomain::shutdownSystem( void ) { //patriarch->shutDownSystem(); return kIOReturnUnsupported; } -// ********************************************************************************** +//****************************************************************************** // restartSystem // -// ********************************************************************************** -IOReturn IOPMrootDomain::restartSystem ( void ) +//****************************************************************************** + +IOReturn IOPMrootDomain::restartSystem( void ) { //patriarch->restartSystem(); return kIOReturnUnsupported; } -// ********************************************************************************** +//****************************************************************************** // powerChangeDone // // This overrides powerChangeDone in IOService. // -// Finder sleep and idle sleep move us from the ON state to the SLEEP_STATE. +// Menu sleep and idle sleep move us from the ON state to the SLEEP_STATE. // In this case: -// If we just finished going to the SLEEP_STATE, and the platform is capable of true sleep, -// sleep the kernel. Otherwise switch up to the DOZE_STATE which will keep almost -// everything as off as it can get. -// -// ********************************************************************************** -void IOPMrootDomain::powerChangeDone ( unsigned long previousState ) -{ - OSNumber * propertyPtr; - unsigned short theProperty; - AbsoluteTime deadline; +// If we finished going to the SLEEP_STATE, and the platform is capable of +// true sleep, then sleep the kernel. Otherwise switch up to the DOZE_STATE +// which will keep almost everything as off as it can get. +//****************************************************************************** - DEBUG_LOG("PowerChangeDone: %ld -> %ld\n", previousState, getPowerState()); +void IOPMrootDomain::powerChangeDone( unsigned long previousState ) +{ + ASSERT_GATED(); + DLOG("PowerChangeDone: %u->%u\n", + (uint32_t) previousState, (uint32_t) getPowerState()); switch ( getPowerState() ) { case SLEEP_STATE: if ( previousState != ON_STATE ) break; - if ( canSleep && sleepIsSupported ) + if ( canSleep ) { // re-enable this timer for next sleep - idleSleepPending = false; + cancelIdleSleepTimer(); + wranglerTickled = true; - uint32_t secs, microsecs; + clock_sec_t secs; + clock_usec_t microsecs; clock_get_calendar_microtime(&secs, µsecs); logtime(secs); gIOLastSleepTime.tv_sec = secs; gIOLastSleepTime.tv_usec = microsecs; + gIOLastWakeTime.tv_sec = 0; + gIOLastWakeTime.tv_usec = 0; #if HIBERNATION - IOLog("System %sSleep\n", gIOHibernateState ? "Safe" : ""); + LOG("System %sSleep\n", gIOHibernateState ? "Safe" : ""); + + tracePoint(kIOPMTracePointSystemHibernatePhase); IOHibernateSystemHasSlept(); #else - IOLog("System Sleep\n"); + LOG("System Sleep\n"); #endif + tracePoint(kIOPMTracePointSystemSleepPlatformPhase); + getPlatform()->sleepKernel(); // The CPU(s) are off at this point. When they're awakened by CPU interrupt, // code will resume execution here. // Now we're waking... + tracePoint(kIOPMTracePointSystemWakeDriversPhase); + #if HIBERNATION IOHibernateSystemWake(); #endif - // stay awake for at least 30 seconds - clock_interval_to_deadline(30, kSecondScale, &deadline); - thread_call_enter_delayed(extraSleepTimer, deadline); - // this gets turned off when we sleep again - idleSleepPending = true; - - // Ignore closed clamshell during wakeup and for a few seconds - // after wakeup is complete - ignoringClamshellDuringWakeup = true; - // sleep transition complete gSleepOrShutdownPending = 0; @@ -1020,135 +1871,145 @@ void IOPMrootDomain::powerChangeDone ( unsigned long previousState ) // get us some power patriarch->wakeSystem(); - - // early stage wake notification - tellClients(kIOMessageSystemWillPowerOn); - // tell the tree we're waking + // Set indicator if UUID was set - allow it to be cleared. + if (getProperty(kIOPMSleepWakeUUIDKey)) + gSleepWakeUUIDIsSet = true; + +#if !ROOT_DOMAIN_RUN_STATES + tellClients(kIOMessageSystemWillPowerOn, clientMessageFilter); +#endif + #if HIBERNATION - IOLog("System %sWake\n", gIOHibernateState ? "SafeSleep " : ""); + LOG("System %sWake\n", gIOHibernateState ? "SafeSleep " : ""); #endif + + // log system wake + getPlatform()->PMLog(kIOPMrootDomainClass, kPMLogSystemWake, 0, 0); + +#ifndef __LP64__ + // tell the tree we're waking systemWake(); - - // Allow drivers to request extra processing time before clamshell - // sleep if kIOREMSleepEnabledKey is present. - // Ignore clamshell events for at least 5 seconds - if(getProperty(kIOREMSleepEnabledKey)) { - // clamshellWakeupIgnore callout clears ignoreClamshellDuringWakeup bit - clock_interval_to_deadline(5, kSecondScale, &deadline); - if(clamshellWakeupIgnore) { - thread_call_enter_delayed(clamshellWakeupIgnore, deadline); - } - } else ignoringClamshellDuringWakeup = false; - - // Find out what woke us - propertyPtr = OSDynamicCast(OSNumber,getProperty("WakeEvent")); - if ( propertyPtr ) { - theProperty = propertyPtr->unsigned16BitValue(); - IOLog("Wake event %04x\n",theProperty); - if ( (theProperty & 0x0008) || //lid - (theProperty & 0x0800) || // front panel button - (theProperty & 0x0020) || // external keyboard - (theProperty & 0x0001) ) { // internal keyboard - // We've identified the wakeup event as UI driven - reportUserInput(); - } - } else { - // Since we can't identify the wakeup event, treat it as UI activity +#endif + + +#if defined(__i386__) || defined(__x86_64__) +#if ROOT_DOMAIN_RUN_STATES + OSString * wakeType = OSDynamicCast( + OSString, getProperty(kIOPMRootDomainWakeTypeKey)); + if (wakeType && wakeType->isEqualTo(kIOPMRootDomainWakeTypeMaintenance)) + { + updateRunState(kRStateMaintenance); + wranglerTickled = false; + } + else +#endif /* ROOT_DOMAIN_RUN_STATES */ + { + updateRunState(kRStateNormal); reportUserInput(); } - - // Wake for thirty seconds +#else /* !__i386__ && !__x86_64__ */ + // stay awake for at least 30 seconds + startIdleSleepTimer(30); + reportUserInput(); +#endif + changePowerStateToPriv(ON_STATE); } else { + updateRunState(kRStateNormal); + // allow us to step up a power state patriarch->sleepToDoze(); // ignore children's request for higher power during doze. - powerOverrideOnPriv(); - changePowerStateToPriv(DOZE_STATE); + changePowerStateWithOverrideTo(DOZE_STATE); } break; case DOZE_STATE: if ( previousState != DOZE_STATE ) { - IOLog("System Doze\n"); + LOG("System Doze\n"); } // re-enable this timer for next sleep - idleSleepPending = false; + cancelIdleSleepTimer(); gSleepOrShutdownPending = 0; // Invalidate prior activity tickles to allow wake from doze. if (wrangler) wrangler->changePowerStateTo(0); break; + +#if ROOT_DOMAIN_RUN_STATES + case ON_STATE: + // SLEEP -> ON (Maintenance) + // Go back to sleep, unless cancelled by a HID event. + + if ((previousState == SLEEP_STATE) && + (runStateIndex == kRStateMaintenance) && + !wranglerTickled) + { + setProperty(kRootDomainSleepReasonKey, kIOPMMaintenanceSleepKey); + changePowerStateWithOverrideTo(SLEEP_STATE); + } + + // ON -> ON triggered by R-state changes. + + if ((previousState == ON_STATE) && + (runStateIndex != nextRunStateIndex) && + (nextRunStateIndex < kRStateCount)) + { + LOG("R-state changed %u->%u\n", + runStateIndex, nextRunStateIndex); + updateRunState(nextRunStateIndex); + + DLOG("kIOMessageSystemHasPoweredOn (%u)\n", + gMessageClientType); + tellClients(kIOMessageSystemHasPoweredOn, clientMessageFilter); + } - case RESTART_STATE: - IOLog("System Restart\n"); - PEHaltRestart(kPERestartCPU); - break; - - case OFF_STATE: - IOLog("System Halt\n"); - PEHaltRestart(kPEHaltCPU); break; +#endif /* ROOT_DOMAIN_RUN_STATES */ } } -// ********************************************************************************** +//****************************************************************************** // wakeFromDoze // -// The Display Wrangler calls here when it switches to its highest state. If the -// system is currently dozing, allow it to wake by making sure the parent is -// providing power. -// ********************************************************************************** +// The Display Wrangler calls here when it switches to its highest state. +// If the system is currently dozing, allow it to wake by making sure the +// parent is providing power. +//****************************************************************************** + void IOPMrootDomain::wakeFromDoze( void ) { - if ( getPowerState() == DOZE_STATE ) + if ( getPowerState() == DOZE_STATE ) { - // Reset sleep support till next sleep attempt. - // A machine's support of sleep vs. doze can change over the course of - // a running system, so we recalculate it before every sleep. - setSleepSupported(0); - changePowerStateToPriv(ON_STATE); - powerOverrideOffPriv(); - - // early wake notification - tellClients(kIOMessageSystemWillPowerOn); - - // allow us to wake if children so desire patriarch->wakeSystem(); } } -// ***************************************************************************** +//****************************************************************************** // publishFeature // // Adds a new feature to the supported features dictionary -// -// -// ***************************************************************************** +//****************************************************************************** + void IOPMrootDomain::publishFeature( const char * feature ) { - publishFeature(feature, kIOPMSupportedOnAC - | kIOPMSupportedOnBatt - | kIOPMSupportedOnUPS, - NULL); - return; + publishFeature(feature, kRD_AllPowerSources, NULL); } -// ***************************************************************************** +//****************************************************************************** // publishFeature (with supported power source specified) // // Adds a new feature to the supported features dictionary -// -// -// ***************************************************************************** -void IOPMrootDomain::publishFeature( +//****************************************************************************** + +void IOPMrootDomain::publishFeature( const char *feature, uint32_t supportedWhere, uint32_t *uniqueFeatureID) @@ -1163,8 +2024,6 @@ void IOPMrootDomain::publishFeature( supportedWhere &= kRD_AllPowerSources; // mask off any craziness! -// kprintf("IOPMrootDomain::publishFeature [\"%s\":%0x01x]\n", feature, supportedWhere); - if(!supportedWhere) { // Feature isn't supported anywhere! return; @@ -1196,8 +2055,11 @@ void IOPMrootDomain::publishFeature( // have no need to remove themselves later. *uniqueFeatureID = next_feature_id; } - - feature_value = supportedWhere + (next_feature_id << 16); + + feature_value = (uint32_t)next_feature_id; + feature_value <<= 16; + feature_value += supportedWhere; + new_feature_data = OSNumber::withNumber( (unsigned long long)feature_value, 32); @@ -1209,12 +2071,20 @@ void IOPMrootDomain::publishFeature( // We need to create an OSArray to hold the now 2 elements. existing_feature_arr = OSArray::withObjects( (const OSObject **)&existing_feature, 1, 2); - existing_feature_arr->setObject(new_feature_data); - features->setObject(feature, existing_feature_arr); } else if(( existing_feature_arr = OSDynamicCast(OSArray, osObj) )) { - // Add object to existing array - existing_feature_arr->setObject(new_feature_data); + // Add object to existing array + existing_feature_arr = OSArray::withArray( + existing_feature_arr, + existing_feature_arr->getCount() + 1); + } + + if (existing_feature_arr) + { + existing_feature_arr->setObject(new_feature_data); + features->setObject(feature, existing_feature_arr); + existing_feature_arr->release(); + existing_feature_arr = 0; } } else { // The easy case: no previously existing features listed. We simply @@ -1233,18 +2103,17 @@ void IOPMrootDomain::publishFeature( // Notify EnergySaver and all those in user space so they might // re-populate their feature specific UI if(pmPowerStateQueue) { - pmPowerStateQueue->featureChangeOccurred( - kIOPMMessageFeatureChange, this); + pmPowerStateQueue->submitPowerEvent( kPowerEventFeatureChanged ); } } -// ***************************************************************************** + +//****************************************************************************** // removePublishedFeature // // Removes previously published feature -// -// -// ***************************************************************************** +//****************************************************************************** + IOReturn IOPMrootDomain::removePublishedFeature( uint32_t removeFeatureID ) { IOReturn ret = kIOReturnError; @@ -1258,6 +2127,7 @@ IOReturn IOPMrootDomain::removePublishedFeature( uint32_t removeFeatureID ) OSNumber *numberMember = NULL; OSObject *osObj = NULL; OSNumber *osNum = NULL; + OSArray *arrayMemberCopy; if(featuresDictLock) IOLockLock(featuresDictLock); @@ -1326,8 +2196,14 @@ IOReturn IOPMrootDomain::removePublishedFeature( uint32_t removeFeatureID ) // the whole thing. features->removeObject(dictKey); } else { - // Otherwise just remove the element in question. - arrayMember->removeObject(i); + // Otherwise remove the element from a copy of the array. + arrayMemberCopy = OSArray::withArray(arrayMember); + if (arrayMemberCopy) + { + arrayMemberCopy->removeObject(i); + features->setObject(dictKey, arrayMemberCopy); + arrayMemberCopy->release(); + } } madeAChange = true; @@ -1337,7 +2213,6 @@ IOReturn IOPMrootDomain::removePublishedFeature( uint32_t removeFeatureID ) } } - dictIterator->release(); if( madeAChange ) @@ -1349,8 +2224,7 @@ IOReturn IOPMrootDomain::removePublishedFeature( uint32_t removeFeatureID ) // Notify EnergySaver and all those in user space so they might // re-populate their feature specific UI if(pmPowerStateQueue) { - pmPowerStateQueue->featureChangeOccurred( - kIOPMMessageFeatureChange, this); + pmPowerStateQueue->submitPowerEvent( kPowerEventFeatureChanged ); } } else { ret = kIOReturnNotFound; @@ -1363,26 +2237,15 @@ IOReturn IOPMrootDomain::removePublishedFeature( uint32_t removeFeatureID ) } -// ********************************************************************************** -// unIdleDevice -// -// Enqueues unidle event to be performed later in a serialized context. -// -// ********************************************************************************** -void IOPMrootDomain::unIdleDevice( IOService *theDevice, unsigned long theState ) -{ - if(pmPowerStateQueue) - pmPowerStateQueue->unIdleOccurred(theDevice, theState); -} - -// ********************************************************************************** +//****************************************************************************** // announcePowerSourceChange // -// Notifies "interested parties" that the batteries have changed state -// -// ********************************************************************************** +// Notifies "interested parties" that the battery state has changed +//****************************************************************************** + void IOPMrootDomain::announcePowerSourceChange( void ) { +#ifdef __ppc__ IORegistryEntry *_batteryRegEntry = (IORegistryEntry *) getProperty("BatteryEntry"); // (if possible) re-publish power source state under IOPMrootDomain; @@ -1396,19 +2259,19 @@ void IOPMrootDomain::announcePowerSourceChange( void ) if(batt_info) setProperty(kIOBatteryInfoKey, batt_info); } - +#endif } -// ***************************************************************************** +//****************************************************************************** // setPMSetting (private) // // Internal helper to relay PM settings changes from user space to individual // drivers. Should be called only by IOPMrootDomain::setProperties. -// -// ***************************************************************************** -IOReturn IOPMrootDomain::setPMSetting( - const OSSymbol *type, +//****************************************************************************** + +IOReturn IOPMrootDomain::setPMSetting( + const OSSymbol *type, OSObject *obj) { OSArray *arr = NULL; @@ -1435,13 +2298,14 @@ IOReturn IOPMrootDomain::setPMSetting( return kIOReturnSuccess; } -// ***************************************************************************** + +//****************************************************************************** // copyPMSetting (public) // // Allows kexts to safely read setting values, without being subscribed to // notifications. -// -// ***************************************************************************** +//****************************************************************************** + OSObject * IOPMrootDomain::copyPMSetting( OSSymbol *whichSetting) { @@ -1459,11 +2323,13 @@ OSObject * IOPMrootDomain::copyPMSetting( return obj; } -// ***************************************************************************** + +//****************************************************************************** // registerPMSettingController (public) // // direct wrapper to registerPMSettingController with uint32_t power source arg -// ***************************************************************************** +//****************************************************************************** + IOReturn IOPMrootDomain::registerPMSettingController( const OSSymbol * settings[], IOPMSettingControllerCallback func, @@ -1477,7 +2343,8 @@ IOReturn IOPMrootDomain::registerPMSettingController( func, target, refcon, handle); } -// ***************************************************************************** + +//****************************************************************************** // registerPMSettingController (public) // // Kexts may register for notifications when a particular setting is changed. @@ -1495,7 +2362,8 @@ IOReturn IOPMrootDomain::registerPMSettingController( // IOPMrootDomain::deRegisterPMSettingCallback when unloading your kext // Returns: // kIOReturnSuccess on success -// ***************************************************************************** +//****************************************************************************** + IOReturn IOPMrootDomain::registerPMSettingController( const OSSymbol * settings[], uint32_t supportedPowerSources, @@ -1565,25 +2433,28 @@ IOReturn IOPMrootDomain::registerPMSettingController( // is closed. //****************************************************************************** -bool IOPMrootDomain::shouldSleepOnClamshellClosed ( void ) +bool IOPMrootDomain::shouldSleepOnClamshellClosed( void ) { + DLOG("clamshell state %d, EX %d, IG %d, IW %d, DT %d, AC %d\n", + clamshellIsClosed, clamshellExists, ignoringClamshell, + ignoringClamshellOnWake, desktopMode, acAdaptorConnected); + return ( !ignoringClamshell - && !ignoringClamshellDuringWakeup - && !(desktopMode && acAdaptorConnect) ); + && !ignoringClamshellOnWake + && !(desktopMode && acAdaptorConnected) ); } -void IOPMrootDomain::sendClientClamshellNotification ( void ) +void IOPMrootDomain::sendClientClamshellNotification( void ) { /* Only broadcast clamshell alert if clamshell exists. */ - if(!clamshellExists) + if (!clamshellExists) return; - + setProperty(kAppleClamshellStateKey, - clamshellIsClosed ? kOSBooleanTrue : kOSBooleanFalse); + clamshellIsClosed ? kOSBooleanTrue : kOSBooleanFalse); setProperty(kAppleClamshellCausesSleepKey, - shouldSleepOnClamshellClosed() ? kOSBooleanTrue : kOSBooleanFalse); - + shouldSleepOnClamshellClosed() ? kOSBooleanTrue : kOSBooleanFalse); /* Argument to message is a bitfiel of * ( kClamshellStateBit | kClamshellSleepBit ) @@ -1593,6 +2464,7 @@ void IOPMrootDomain::sendClientClamshellNotification ( void ) | ( shouldSleepOnClamshellClosed() ? kClamshellSleepBit : 0)) ); } + //****************************************************************************** // informCPUStateChange // @@ -1607,7 +2479,7 @@ void IOPMrootDomain::informCPUStateChange( uint32_t type, uint32_t value ) { -#ifdef __i386__ +#if defined(__i386__) || defined(__x86_64__) pmioctlVariableInfo_t varInfoStruct; int pmCPUret = 0; @@ -1655,18 +2527,107 @@ void IOPMrootDomain::informCPUStateChange( return; -#endif __i386__ +#endif /* __i386__ || __x86_64__ */ } + +//****************************************************************************** +// dispatchPowerEvent +// +// IOPMPowerStateQueue callback function. Running on PM work loop thread. +//****************************************************************************** + +void IOPMrootDomain::dispatchPowerEvent( + uint32_t event, void * arg0, void * arg1 ) +{ + DLOG("power event %x args %p %p\n", event, arg0, arg1); + ASSERT_GATED(); + + switch (event) + { + case kPowerEventFeatureChanged: + messageClients(kIOPMMessageFeatureChange, this); + break; + + case kPowerEventReceivedPowerNotification: + handlePowerNotification( (UInt32)(uintptr_t) arg0 ); + break; + + case kPowerEventSystemBootCompleted: + if (systemBooting) + { + systemBooting = false; + adjustPowerState(); + + // If lid is closed, re-send lid closed notification + // now that booting is complete. + if( clamshellIsClosed ) + { + handlePowerNotification(kLocalEvalClamshellCommand); + } + } + break; + + case kPowerEventSystemShutdown: + if (kOSBooleanTrue == (OSBoolean *) arg0) + { + /* We set systemShutdown = true during shutdown + to prevent sleep at unexpected times while loginwindow is trying + to shutdown apps and while the OS is trying to transition to + complete power of. + + Set to true during shutdown, as soon as loginwindow shows + the "shutdown countdown dialog", through individual app + termination, and through black screen kernel shutdown. + */ + LOG("systemShutdown true\n"); + systemShutdown = true; + } else { + /* + A shutdown was initiated, but then the shutdown + was cancelled, clearing systemShutdown to false here. + */ + LOG("systemShutdown false\n"); + systemShutdown = false; + } + break; + + case kPowerEventUserDisabledSleep: + userDisabledAllSleep = (kOSBooleanTrue == (OSBoolean *) arg0); + break; + +#if ROOT_DOMAIN_RUN_STATES + case kPowerEventConfigdRegisteredInterest: + if (gConfigdNotifier) + { + gConfigdNotifier->release(); + gConfigdNotifier = 0; + } + if (arg0) + { + gConfigdNotifier = (IONotifier *) arg0; + } + break; +#endif + + case kPowerEventAggressivenessChanged: + aggressivenessChanged(); + break; + } +} + + //****************************************************************************** // systemPowerEventOccurred // // The power controller is notifying us of a hardware-related power management // event that we must handle. // -// systemPowerEventOccurred covers the same functionality that receivePowerNotification -// does; it simply provides a richer API for conveying more information. +// systemPowerEventOccurred covers the same functionality that +// receivePowerNotification does; it simply provides a richer API for conveying +// more information. //****************************************************************************** + IOReturn IOPMrootDomain::systemPowerEventOccurred( const OSSymbol *event, uint32_t intValue) @@ -1738,14 +2699,23 @@ IOReturn IOPMrootDomain::systemPowerEventOccurred( // receivePowerNotification // // The power controller is notifying us of a hardware-related power management -// event that we must handle. This may be a result of an 'environment' interrupt from -// the power mgt micro. +// event that we must handle. This may be a result of an 'environment' interrupt +// from the power mgt micro. //****************************************************************************** -IOReturn IOPMrootDomain::receivePowerNotification (UInt32 msg) +IOReturn IOPMrootDomain::receivePowerNotification( UInt32 msg ) +{ + pmPowerStateQueue->submitPowerEvent( + kPowerEventReceivedPowerNotification, (void *) msg ); + return kIOReturnSuccess; +} + +void IOPMrootDomain::handlePowerNotification( UInt32 msg ) { bool eval_clamshell = false; + ASSERT_GATED(); + /* * Local (IOPMrootDomain only) eval clamshell command */ @@ -1759,11 +2729,11 @@ IOReturn IOPMrootDomain::receivePowerNotification (UInt32 msg) */ if (msg & kIOPMOverTemp) { - IOLog("PowerManagement emergency overtemp signal. Going to sleep!"); - + LOG("PowerManagement emergency overtemp signal. Going to sleep!"); privateSleepSystem (kIOPMThermalEmergencySleepKey); } +#ifdef __ppc__ /* * PMU Processor Speed Change */ @@ -1774,6 +2744,7 @@ IOReturn IOPMrootDomain::receivePowerNotification (UInt32 msg) getPlatform()->sleepKernel(); pmu->callPlatformFunction("recoverFromSleep", false, 0, 0, 0, 0); } +#endif /* * Sleep Now! @@ -1791,7 +2762,6 @@ IOReturn IOPMrootDomain::receivePowerNotification (UInt32 msg) privateSleepSystem (kIOPMLowPowerSleepKey); } - /* * Clamshell OPEN */ @@ -1856,11 +2826,15 @@ IOReturn IOPMrootDomain::receivePowerNotification (UInt32 msg) */ if (msg & kIOPMSetACAdaptorConnected) { - acAdaptorConnect = (0 != (msg & kIOPMSetValue)); + acAdaptorConnected = (0 != (msg & kIOPMSetValue)); msg &= ~(kIOPMSetACAdaptorConnected | kIOPMSetValue); - // Tell PMCPU - informCPUStateChange(kInformAC, !acAdaptorConnect); + // Tell CPU PM + informCPUStateChange(kInformAC, !acAdaptorConnected); + + // Tell BSD if AC is connected + // 0 == external power source; 1 == on battery + post_sys_powersource(acAdaptorConnected ? 0:1); sendClientClamshellNotification(); @@ -1869,7 +2843,6 @@ IOReturn IOPMrootDomain::receivePowerNotification (UInt32 msg) { eval_clamshell = true; } - } /* @@ -1924,8 +2897,10 @@ IOReturn IOPMrootDomain::receivePowerNotification (UInt32 msg) // are we dozing? if ( getPowerState() == DOZE_STATE ) { - // yes, tell the tree we're waking +#ifndef __LP64__ + // yes, tell the tree we're waking systemWake(); +#endif // wake the Display Wrangler reportUserInput(); } @@ -1957,8 +2932,10 @@ IOReturn IOPMrootDomain::receivePowerNotification (UInt32 msg) allowSleep = false; // are we dozing? if ( getPowerState() == DOZE_STATE ) { - // yes, tell the tree we're waking +#ifndef __LP64__ + // yes, tell the tree we're waking systemWake(); +#endif adjustPowerState(); // wake the Display Wrangler reportUserInput(); @@ -1968,69 +2945,73 @@ IOReturn IOPMrootDomain::receivePowerNotification (UInt32 msg) patriarch->wakeSystem(); } } - - return 0; } -//********************************************************************************* -// sleepSupported +//****************************************************************************** +// getSleepSupported // -//********************************************************************************* +//****************************************************************************** -void IOPMrootDomain::setSleepSupported( IOOptionBits flags ) +IOOptionBits IOPMrootDomain::getSleepSupported( void ) { - if ( flags & kPCICantSleep ) - { - canSleep = false; - } else { - canSleep = true; - platformSleepSupport = flags; - } + return( platformSleepSupport ); +} - setProperty(kIOSleepSupportedKey, canSleep); +//****************************************************************************** +// setSleepSupported +// +//****************************************************************************** + +void IOPMrootDomain::setSleepSupported( IOOptionBits flags ) +{ + DLOG("setSleepSupported(%x)\n", (uint32_t) flags); + OSBitOrAtomic(flags, &platformSleepSupport); } -//********************************************************************************* + +//****************************************************************************** // requestPowerDomainState // // The root domain intercepts this call to the superclass. // Called on the PM work loop thread. // // If the clamp bit is not set in the desire, then the child doesn't need the power -// state it's requesting; it just wants it. The root ignores desires but not needs. +// state it's requesting; it just wants it. The root ignores desires but not needs. // If the clamp bit is not set, the root takes it that the child can tolerate no -// power and interprets the request accordingly. If all children can thus tolerate +// power and interprets the request accordingly. If all children can thus tolerate // no power, we are on our way to idle sleep. -//********************************************************************************* +//****************************************************************************** IOReturn IOPMrootDomain::requestPowerDomainState ( - IOPMPowerFlags desiredState, + IOPMPowerFlags desiredFlags, IOPowerConnection * whichChild, unsigned long specification ) { OSIterator *iter; OSObject *next; IOPowerConnection *connection; - unsigned long powerRequestFlag = 0; + IOPMPowerFlags powerRequestFlag = 0; IOPMPowerFlags editedDesire; -#if DEBUG - IOService *powerChild; - powerChild = (IOService *) whichChild->getChildEntry(gIOPowerPlane); -#endif + ASSERT_GATED(); + + if (kIOLogPMRootDomain & gIOKitDebug) + { + IOService * powerChild = + (IOService *) whichChild->getChildEntry(gIOPowerPlane); + DLOG("child %p, flags %lx, spec %lx - %s\n", + powerChild, desiredFlags, specification, + powerChild ? powerChild->getName() : "?"); + } - DEBUG_LOG("RequestPowerDomainState: flags %lx, child %p [%s], spec %lx\n", - desiredState, powerChild, powerChild ? powerChild->getName() : "?", - specification); - // Force the child's input power requirements to 0 unless the prevent // idle-sleep flag is set. No input power flags map to our state 0. // Our power clamp (deviceDesire) keeps the minimum power state at 2. - if (desiredState & kIOPMPreventIdleSleep) - editedDesire = desiredState; + if (desiredFlags & kIOPMPreventIdleSleep) + editedDesire = kIOPMPreventIdleSleep | kIOPMPowerOn; else editedDesire = 0; @@ -2051,29 +3032,29 @@ IOReturn IOPMrootDomain::requestPowerDomainState ( // Is this connection attached to the child that called // requestPowerDomainState()? - if ( connection == whichChild ) + if (connection == whichChild) { - // Yes, OR in the child's input power requirements. + // OR in the child's input power requirements. powerRequestFlag |= editedDesire; - if ( desiredState & kIOPMPreventSystemSleep ) + if ( desiredFlags & kIOPMPreventSystemSleep ) sleepIsSupported = false; } else { -#if DEBUG - powerChild = (IOService *) connection->getChildEntry(gIOPowerPlane); -#endif - DEBUG_LOG(" child %p, PState %ld, noIdle %d, noSleep %d, valid %d %s\n", - powerChild, - connection->getDesiredDomainState(), - connection->getPreventIdleSleepFlag(), - connection->getPreventSystemSleepFlag(), - connection->getReadyFlag(), - powerChild ? powerChild->getName() : "?"); - - // No, OR in the child's desired power domain state. - // Which is our power state desired by this child. + if (kIOLogPMRootDomain & gIOKitDebug) + { + IOService * powerChild = + (IOService *) connection->getChildEntry(gIOPowerPlane); + DLOG("child %p, state %ld, noIdle %d, noSleep %d - %s\n", + powerChild, + connection->getDesiredDomainState(), + connection->getPreventIdleSleepFlag(), + connection->getPreventSystemSleepFlag(), + powerChild ? powerChild->getName() : "?"); + } + + // OR in the child's desired power state (0 or ON_STATE). powerRequestFlag |= connection->getDesiredDomainState(); if ( connection->getPreventSystemSleepFlag() ) @@ -2083,62 +3064,47 @@ IOReturn IOPMrootDomain::requestPowerDomainState ( } iter->release(); } - + + DLOG("childPowerFlags 0x%lx, extraSleepDelay %ld\n", + powerRequestFlag, extraSleepDelay); + if ( !powerRequestFlag && !systemBooting ) { - if (!wrangler) - { - sleepASAP = false; - changePowerStateToPriv(ON_STATE); - if (idleSeconds) - { - AbsoluteTime deadline; - // stay awake for at least idleSeconds - clock_interval_to_deadline(idleSeconds, kSecondScale, &deadline); - thread_call_enter_delayed(extraSleepTimer, deadline); - // this gets turned off when we sleep again - idleSleepPending = true; - } - } - else if (extraSleepDelay == 0) - { - sleepASAP = true; - } + if (!wrangler) + { + sleepASAP = false; + changePowerStateToPriv(ON_STATE); + if (idleSeconds) + { + // stay awake for at least idleSeconds + startIdleSleepTimer(idleSeconds); + } + } + else if (!extraSleepDelay && !idleSleepTimerPending) + { + sleepASAP = true; + } } - - DEBUG_LOG(" sleepDelay %lx, mergedFlags %lx, sleepASAP %x, booting %x\n", - extraSleepDelay, powerRequestFlag, sleepASAP, systemBooting); - // Drop our power clamp to SLEEP_STATE when all devices become idle. - // Needed when the system sleep and display sleep timeouts are the same. - // Otherwise, the extra sleep timer will also drop our power clamp. + // Drop our power clamp to SLEEP_STATE when all children became idle, + // and the system sleep and display sleep values are equal. adjustPowerState(); - editedDesire |= (desiredState & kIOPMPreventSystemSleep); - - // If our power clamp has already dropped to SLEEP_STATE, and no child - // is keeping us at max power, then this will trigger idle sleep. - - return super::requestPowerDomainState(editedDesire, whichChild, specification); -} - + // If our power clamp has already dropped to SLEEP_STATE, and no child + // is keeping us at ON_STATE, then this will trigger idle sleep. -//********************************************************************************* -// getSleepSupported -// -//********************************************************************************* + editedDesire |= (desiredFlags & kIOPMPreventSystemSleep); -IOOptionBits IOPMrootDomain::getSleepSupported( void ) -{ - return( platformSleepSupport ); + return super::requestPowerDomainState( + editedDesire, whichChild, specification); } -//********************************************************************************* +//****************************************************************************** // handlePlatformHaltRestart // -//********************************************************************************* +//****************************************************************************** struct HaltRestartApplierContext { IOPMrootDomain * RootDomain; @@ -2178,10 +3144,10 @@ platformHaltRestartApplier( OSObject * object, void * context ) if (notifier) { - HaltRestartLog("%s handler %p took %lu ms\n", + KLOG("%s handler %p took %u ms\n", (ctx->MessageType == kIOMessageSystemWillPowerOff) ? "PowerOff" : "Restart", - notifier->handler, deltaTime ); + notifier->handler, (uint32_t) deltaTime ); } } @@ -2201,10 +3167,11 @@ void IOPMrootDomain::handlePlatformHaltRestart( UInt32 pe_type ) switch (pe_type) { case kPEHaltCPU: + case kPEUPSDelayHaltCPU: ctx.PowerState = OFF_STATE; ctx.MessageType = kIOMessageSystemWillPowerOff; break; - + case kPERestartCPU: ctx.PowerState = RESTART_STATE; ctx.MessageType = kIOMessageSystemWillRestart; @@ -2217,131 +3184,303 @@ void IOPMrootDomain::handlePlatformHaltRestart( UInt32 pe_type ) // Notify legacy clients applyToInterested(gIOPriorityPowerStateInterest, platformHaltRestartApplier, &ctx); + // For UPS shutdown leave File Server Mode intact, otherwise turn it off. + if (kPEUPSDelayHaltCPU != pe_type) + { + const OSSymbol * setting = OSSymbol::withCString(kIOPMSettingRestartOnPowerLossKey); + OSNumber * num = OSNumber::withNumber((unsigned long long) 0, 32); + if (setting && num) + { + setPMSetting(setting, num); + setting->release(); + num->release(); + } + } + // Notify in power tree order notifySystemShutdown(this, ctx.MessageType); deltaTime = computeDeltaTimeMS(&startTime); - HaltRestartLog("%s all drivers took %lu ms\n", + KLOG("%s all drivers took %u ms\n", (ctx.MessageType == kIOMessageSystemWillPowerOff) ? "PowerOff" : "Restart", - deltaTime ); + (uint32_t) deltaTime ); +} + + +//****************************************************************************** +// registerInterest +// +//****************************************************************************** + +IONotifier * IOPMrootDomain::registerInterest( + const OSSymbol * typeOfInterest, + IOServiceInterestHandler handler, + void * target, void * ref ) +{ + IONotifier * notifier; + bool isConfigd; + + isConfigd = typeOfInterest && + typeOfInterest->isEqualTo(kIOPMPrivilegedPowerInterest); + + if (isConfigd) + typeOfInterest = gIOAppPowerStateInterest; + + notifier = super::registerInterest(typeOfInterest, handler, target, ref); + +#if ROOT_DOMAIN_RUN_STATES + if (isConfigd && notifier && pmPowerStateQueue) + { + notifier->retain(); + if (pmPowerStateQueue->submitPowerEvent( + kPowerEventConfigdRegisteredInterest, notifier) == false) + notifier->release(); + } +#endif + + return notifier; +} + +static bool clientMessageFilter( OSObject * object, void * arg ) +{ +#if ROOT_DOMAIN_RUN_STATES +#if LOG_INTEREST_CLIENTS + IOPMInterestContext * context = (IOPMInterestContext *) arg; +#endif + bool allow = false; + + switch (gMessageClientType) + { + case kMessageClientNone: + allow = false; + break; + + case kMessageClientAll: + allow = true; + break; + + case kMessageClientConfigd: + allow = ((object == (OSObject *) gConfigdNotifier) || + (object == (OSObject *) gSysPowerDownNotifier)); + break; + } + +#if LOG_INTEREST_CLIENTS + if (allow) + DLOG("system message %x to %p\n", + context->msgType, object); +#endif + + return allow; +#else + return true; +#endif } -//********************************************************************************* +//****************************************************************************** // tellChangeDown // // We override the superclass implementation so we can send a different message // type to the client or application being notified. -//********************************************************************************* +//****************************************************************************** -bool IOPMrootDomain::tellChangeDown ( unsigned long stateNum ) +bool IOPMrootDomain::tellChangeDown( unsigned long stateNum ) { + bool done; + + DLOG("tellChangeDown %u->%u, R-state %u\n", + (uint32_t) getPowerState(), (uint32_t) stateNum, runStateIndex); + switch ( stateNum ) { case DOZE_STATE: case SLEEP_STATE: - - // Direct callout into OSMetaClass so it can disable kmod unloads - // during sleep/wake to prevent deadlocks. - OSMetaClassSystemSleepOrWake( kIOMessageSystemWillSleep ); - return super::tellClientsWithResponse(kIOMessageSystemWillSleep); + if (!ignoreChangeDown) + { + // Direct callout into OSKext so it can disable kext unloads + // during sleep/wake to prevent deadlocks. + OSKextSystemSleepOrWake( kIOMessageSystemWillSleep ); + + if ( (SLEEP_STATE == stateNum) && sleepSupportedPEFunction ) + { + // Reset PCI prevent sleep flag before calling platform driver. + OSBitAndAtomic(~kPCICantSleep, &platformSleepSupport); + + // Skip PCI check for maintenance sleep. + if ((runStateFlags & kRStateFlagSuppressPCICheck) == 0) + { + // Determine if the machine supports sleep, or must doze. + getPlatform()->callPlatformFunction( + sleepSupportedPEFunction, false, + NULL, NULL, NULL, NULL); + } + + // If the machine only supports doze, the callPlatformFunction call + // boils down to IOPMrootDomain::setSleepSupported(kPCICantSleep), + // otherwise nothing. + } + + // Update canSleep and kIOSleepSupportedKey property so drivers + // can tell if platform is going to sleep versus doze. + +#if CONFIG_SLEEP + canSleep = true; +#else + canSleep = false; +#endif + if (!sleepIsSupported) + canSleep = false; + if (platformSleepSupport & kPCICantSleep) + canSleep = false; + setProperty(kIOSleepSupportedKey, canSleep); + DLOG("canSleep %d\n", canSleep); + + // Publish the new sleep-wake UUID + publishSleepWakeUUID(true); + + // Two change downs are sent by IOServicePM. Ignore the 2nd. + ignoreChangeDown = true; + + tracePoint( kIOPMTracePointSystemSleepAppsPhase); + } + + DLOG("kIOMessageSystemWillSleep (%d)\n", gMessageClientType); + done = super::tellClientsWithResponse( + kIOMessageSystemWillSleep, clientMessageFilter); + break; + + default: + done = super::tellChangeDown(stateNum); + break; } - return super::tellChangeDown(stateNum); + return done; } -//********************************************************************************* +//****************************************************************************** // askChangeDown // // We override the superclass implementation so we can send a different message // type to the client or application being notified. // -// This must be idle sleep since we don't ask apps during any other power change. -//********************************************************************************* +// This must be idle sleep since we don't ask during any other power change. +//****************************************************************************** -bool IOPMrootDomain::askChangeDown ( unsigned long ) +bool IOPMrootDomain::askChangeDown( unsigned long stateNum ) { - return super::tellClientsWithResponse(kIOMessageCanSystemSleep); + DLOG("askChangeDown %u->%u, R-state %u\n", + (uint32_t) getPowerState(), (uint32_t) stateNum, runStateIndex); + DLOG("kIOMessageCanSystemSleep (%d)\n", gMessageClientType); + + return super::tellClientsWithResponse( + kIOMessageCanSystemSleep, + clientMessageFilter); } -//********************************************************************************* +//****************************************************************************** // tellNoChangeDown // -// Notify registered applications and kernel clients that we are not -// dropping power. +// Notify registered applications and kernel clients that we are not dropping +// power. // // We override the superclass implementation so we can send a different message // type to the client or application being notified. // // This must be a vetoed idle sleep, since no other power change can be vetoed. -//********************************************************************************* +//****************************************************************************** -void IOPMrootDomain::tellNoChangeDown ( unsigned long ) +void IOPMrootDomain::tellNoChangeDown( unsigned long stateNum ) { + DLOG("tellNoChangeDown %u->%u, R-state %u\n", + (uint32_t) getPowerState(), (uint32_t) stateNum, runStateIndex); + + // Sleep canceled, clear the sleep trace point. + tracePoint(kIOPMTracePointSystemUp); + if (idleSeconds && !wrangler) { - AbsoluteTime deadline; - sleepASAP = false; - // stay awake for at least idleSeconds - clock_interval_to_deadline(idleSeconds, kSecondScale, &deadline); - thread_call_enter_delayed(extraSleepTimer, deadline); - // this gets turned off when we sleep again - idleSleepPending = true; + // stay awake for at least idleSeconds + sleepASAP = false; + startIdleSleepTimer(idleSeconds); } - return tellClients(kIOMessageSystemWillNotSleep); + DLOG("kIOMessageSystemWillNotSleep (%d)\n", gMessageClientType); + return tellClients(kIOMessageSystemWillNotSleep, clientMessageFilter); } -//********************************************************************************* +//****************************************************************************** // tellChangeUp // // Notify registered applications and kernel clients that we are raising power. // // We override the superclass implementation so we can send a different message // type to the client or application being notified. -//********************************************************************************* +//****************************************************************************** -void IOPMrootDomain::tellChangeUp ( unsigned long stateNum) +void IOPMrootDomain::tellChangeUp( unsigned long stateNum ) { - if ( stateNum == ON_STATE ) + OSData *publishPMStats = NULL; + + DLOG("tellChangeUp %u->%u, R-state %u\n", + (uint32_t) getPowerState(), (uint32_t) stateNum, runStateIndex); + + ignoreChangeDown = false; + + if ( stateNum == ON_STATE ) { - // Direct callout into OSMetaClass so it can disable kmod unloads + // Direct callout into OSKext so it can disable kext unloads // during sleep/wake to prevent deadlocks. - OSMetaClassSystemSleepOrWake( kIOMessageSystemHasPoweredOn ); + OSKextSystemSleepOrWake( kIOMessageSystemHasPoweredOn ); - if (getPowerState() == ON_STATE) - { - // this is a quick wake from aborted sleep - if (idleSeconds && !wrangler) - { - AbsoluteTime deadline; - sleepASAP = false; - // stay awake for at least idleSeconds - clock_interval_to_deadline(idleSeconds, kSecondScale, &deadline); - thread_call_enter_delayed(extraSleepTimer, deadline); - // this gets turned off when we sleep again - idleSleepPending = true; - } - tellClients(kIOMessageSystemWillPowerOn); - } + if (getPowerState() == ON_STATE) + { + // this is a quick wake from aborted sleep + if (idleSeconds && !wrangler) + { + // stay awake for at least idleSeconds + sleepASAP = false; + startIdleSleepTimer(idleSeconds); + } + DLOG("kIOMessageSystemWillPowerOn (%d)\n", gMessageClientType); + tellClients(kIOMessageSystemWillPowerOn, clientMessageFilter); + } #if HIBERNATION - else - { - IOHibernateSystemPostWake(); - } + else + { + IOHibernateSystemPostWake(); + } #endif - return tellClients(kIOMessageSystemHasPoweredOn); + + tracePoint(kIOPMTracePointSystemWakeAppsPhase); + publishPMStats = OSData::withBytes(&pmStats, sizeof(pmStats)); + setProperty(kIOPMSleepStatisticsKey, publishPMStats); + publishPMStats->release(); + bzero(&pmStats, sizeof(pmStats)); + + if (pmStatsAppResponses) + { + setProperty(kIOPMSleepStatisticsAppsKey, pmStatsAppResponses); + pmStatsAppResponses->release(); + pmStatsAppResponses = OSArray::withCapacity(5); + } + + DLOG("kIOMessageSystemHasPoweredOn (%d)\n", gMessageClientType); + tellClients(kIOMessageSystemHasPoweredOn, clientMessageFilter); + + tracePoint(kIOPMTracePointSystemUp); } } -//********************************************************************************* + +//****************************************************************************** // reportUserInput // -//********************************************************************************* +//****************************************************************************** -void IOPMrootDomain::reportUserInput ( void ) +void IOPMrootDomain::reportUserInput( void ) { #if !NO_KERNEL_HID OSIterator * iter; @@ -2361,74 +3500,364 @@ void IOPMrootDomain::reportUserInput ( void ) #endif } -//********************************************************************************* + +//****************************************************************************** // setQuickSpinDownTimeout // -//********************************************************************************* +//****************************************************************************** -void IOPMrootDomain::setQuickSpinDownTimeout ( void ) +void IOPMrootDomain::setQuickSpinDownTimeout( void ) { - super::setAggressiveness((unsigned long)kPMMinutesToSpinDown,(unsigned long)1); + ASSERT_GATED(); + setAggressiveness( + kPMMinutesToSpinDown, 0, kAggressivesOptionQuickSpindownEnable ); } -//********************************************************************************* + +//****************************************************************************** // restoreUserSpinDownTimeout // -//********************************************************************************* +//****************************************************************************** + +void IOPMrootDomain::restoreUserSpinDownTimeout( void ) +{ + ASSERT_GATED(); + setAggressiveness( + kPMMinutesToSpinDown, 0, kAggressivesOptionQuickSpindownDisable ); +} + + +//****************************************************************************** +// changePowerStateTo & changePowerStateToPriv +// +// Override of these methods for logging purposes. +//****************************************************************************** + +IOReturn IOPMrootDomain::changePowerStateTo( unsigned long ordinal ) +{ + return kIOReturnUnsupported; // ignored +} + +IOReturn IOPMrootDomain::changePowerStateToPriv( unsigned long ordinal ) +{ + DLOG("changePowerStateToPriv(%lu)\n", ordinal); + + if ( (getPowerState() == DOZE_STATE) && (ordinal != ON_STATE) ) + { + return kIOReturnSuccess; + } + + if ( (userDisabledAllSleep || systemBooting || systemShutdown) && + (ordinal == SLEEP_STATE) ) + { + DLOG("SLEEP rejected, forced to ON state (UD %d, SB %d, SS %d)\n", + userDisabledAllSleep, systemBooting, systemShutdown); + + super::changePowerStateToPriv(ON_STATE); + } + + return super::changePowerStateToPriv(ordinal); +} + + +//****************************************************************************** +// updateRunState +// +//****************************************************************************** + +void IOPMrootDomain::updateRunState( uint32_t inRunState ) +{ +#if ROOT_DOMAIN_RUN_STATES + if (inRunState < kRStateCount) + { + runStateIndex = nextRunStateIndex = inRunState; + runStateFlags = gRStateFlags[inRunState]; + + setProperty( + kIOPMRootDomainRunStateKey, + (unsigned long long) inRunState, 32); + } +#endif +} + + +#if ROOT_DOMAIN_RUN_STATES +//****************************************************************************** +// tagPowerPlaneService +// +// Running on PM work loop thread. +//****************************************************************************** + +void IOPMrootDomain::tagPowerPlaneService( + IOService * service, + uint32_t * rdFlags ) +{ + *rdFlags = 0; + + if (service->getProperty("IOPMStrictTreeOrder") || + service->metaCast("IODisplayWrangler") || + OSDynamicCast(OSNumber, + service->getProperty("IOPMUnattendedWakePowerState"))) + { + *rdFlags |= kServiceFlagGraphics; + DLOG("tagged device %s %x\n", service->getName(), *rdFlags); + } + + // Locate the first PCI host bridge. + if (!pciHostBridgeDevice && service->metaCast("IOPCIBridge")) + { + IOService * provider = service->getProvider(); + if (OSDynamicCast(IOPlatformDevice, provider) && + provider->inPlane(gIODTPlane)) + { + pciHostBridgeDevice = provider; + DLOG("PMTrace found PCI host bridge %s->%s\n", + provider->getName(), service->getName()); + } + } + + // Tag top-level PCI devices. The order of PMinit() call does not + // change across boots and is used as the PCI bit number. + if (pciHostBridgeDevice && service->metaCast("IOPCIDevice")) + { + // Would prefer to check built-in property, but tagPowerPlaneService() + // is called before pciDevice->registerService(). + IORegistryEntry * parent = service->getParentEntry(gIODTPlane); + if ((parent == pciHostBridgeDevice) && service->getProperty("acpi-device")) + { + int bit = pmTracer->recordTopLevelPCIDevice( service ); + if (bit >= 0) + { + // Save the assigned bit for fast lookup. + bit &= 0xff; + *rdFlags |= (kServiceFlagTopLevelPCI | (bit << 8)); + } + } + } +} + + +//****************************************************************************** +// handleActivityTickleForService +// +// Called by IOService::activityTickle() for a tickle that is requesting the +// service to raise power state. Called from driver thread. +//****************************************************************************** + +void IOPMrootDomain::handleActivityTickleForService( IOService * service ) +{ + // Tickle directed to IODisplayWrangler while graphics is disabled. + // Bring graphics online. + + if ((service == wrangler) && + (runStateIndex > kRStateNormal) && + (false == wranglerTickled)) + { + DLOG("display wrangler tickled\n"); + wranglerTickled = true; + synchronizePowerTree(); + } +} + + +//****************************************************************************** +// handlePowerChangeStartForService +// +// Running on PM work loop thread. +//****************************************************************************** + +void IOPMrootDomain::handlePowerChangeStartForService( + IOService * service, + uint32_t * rdFlags, + uint32_t newPowerState, + uint32_t changeFlags ) +{ + if (service == this) + { + uint32_t currentPowerState = (uint32_t) getPowerState(); + uint32_t nextRunStateFlags; + + assert(nextRunStateIndex < kRStateCount); + nextRunStateFlags = gRStateFlags[nextRunStateIndex]; + + gMessageClientType = kMessageClientNone; + + // Transition towards or away from ON power state. + + if ((currentPowerState != newPowerState) && + ((ON_STATE == newPowerState) || (ON_STATE == currentPowerState))) + { + if ((runStateFlags & kRStateFlagSuppressMessages) == 0) + gMessageClientType = kMessageClientAll; + else + gMessageClientType = kMessageClientConfigd; + } + + // Transition caused by deassertion of system notification suppression. + + if ((ON_STATE == newPowerState) && + (ON_STATE == currentPowerState) && + ((runStateFlags ^ nextRunStateFlags) & kRStateFlagSuppressMessages)) + { + gMessageClientType = kMessageClientAll; + } + + if (ON_STATE == newPowerState) + { + DLOG("kIOMessageSystemWillPowerOn (%d)\n", + gMessageClientType); + tellClients(kIOMessageSystemWillPowerOn, clientMessageFilter); + } + } + + if (*rdFlags & kServiceFlagTopLevelPCI) + { + pmTracer->tracePCIPowerChange( + PMTraceWorker::kPowerChangeStart, + service, changeFlags, + (*rdFlags >> 8) & 0xff); + } +} + + +//****************************************************************************** +// handlePowerChangeDoneForService +// +// Running on PM work loop thread. +//****************************************************************************** + +void IOPMrootDomain::handlePowerChangeDoneForService( + IOService * service, + uint32_t * rdFlags, + uint32_t newPowerState, + uint32_t changeFlags ) +{ + if (*rdFlags & kServiceFlagTopLevelPCI) + { + pmTracer->tracePCIPowerChange( + PMTraceWorker::kPowerChangeCompleted, + service, changeFlags, + (*rdFlags >> 8) & 0xff); + } +} + + +//****************************************************************************** +// overridePowerStateForService +// +// Runs on PM work loop thread. +//****************************************************************************** + +void IOPMrootDomain::overridePowerStateForService( + IOService * service, + uint32_t * rdFlags, + unsigned long * powerState, + uint32_t changeFlags ) +{ + uint32_t inPowerState = (uint32_t) *powerState; + + if ((service == this) && (inPowerState == ON_STATE) && + (changeFlags & kIOPMSynchronize)) + { + DLOG("sync root domain %u->%u\n", + (uint32_t) getPowerState(), inPowerState); + + // Root Domain is in a reduced R-state, and a HID tickle has + // requested a PM tree sync. Begin R-state transition. + + if (runStateIndex != kRStateNormal) + { + nextRunStateIndex = kRStateNormal; + setProperty( + kIOPMRootDomainRunStateKey, + (unsigned long long) kRStateNormal, 32); + } + } + + if (*rdFlags & kServiceFlagGraphics) + { + DLOG("graphics device %s %u->%u (flags 0x%x)\n", + service->getName(), (uint32_t) service->getPowerState(), + inPowerState, changeFlags); + + if (inPowerState == 0) + { + // Graphics device is powering down, apply limit preventing + // device from powering back up later unless we consent. + + if ((*rdFlags & kServiceFlagNoPowerUp) == 0) + { + *rdFlags |= kServiceFlagNoPowerUp; + DLOG("asserted power limit for %s\n", + service->getName()); + } + } + else + { + uint32_t nextRunStateFlags; + + assert(nextRunStateIndex < kRStateCount); + nextRunStateFlags = gRStateFlags[nextRunStateIndex]; + + // Graphics device is powering up. Release power limit at the + // did-change machine state. + + if (changeFlags & kIOPMSynchronize) + { + if ((runStateFlags & kRStateFlagSuppressGraphics) && + ((nextRunStateFlags & kRStateFlagSuppressGraphics) == 0) && + (changeFlags & kIOPMDomainDidChange)) + { + // Woke up without graphics power, but + // HID event has tickled display wrangler. + *rdFlags &= ~kServiceFlagNoPowerUp; + DLOG("removed power limit for %s\n", + service->getName()); + } + } + else if ((runStateFlags & kRStateFlagSuppressGraphics) == 0) + { + *rdFlags &= ~kServiceFlagNoPowerUp; + } -void IOPMrootDomain::restoreUserSpinDownTimeout ( void ) -{ - super::setAggressiveness((unsigned long)kPMMinutesToSpinDown,(unsigned long)user_spindown); + if (*rdFlags & kServiceFlagNoPowerUp) + { + DLOG("limited %s to power state 0\n", + service->getName()); + *powerState = 0; + } + } + } } -//********************************************************************************* -// changePowerStateTo & changePowerStateToPriv -// -// Override of these methods for logging purposes. -//********************************************************************************* -IOReturn IOPMrootDomain::changePowerStateTo ( unsigned long ordinal ) -{ - return super::changePowerStateTo(ordinal); -} +//****************************************************************************** +// setMaintenanceWakeCalendar +// +//****************************************************************************** -IOReturn IOPMrootDomain::changePowerStateToPriv ( unsigned long ordinal ) +IOReturn IOPMrootDomain::setMaintenanceWakeCalendar( + const IOPMCalendarStruct * calendar ) { - IOReturn ret; - - DEBUG_LOG("ChangePowerStateToPriv: power state %ld\n", ordinal); - - if ( (getPowerState() == DOZE_STATE) && (ordinal != ON_STATE) ) - { - return kIOReturnSuccess; - } - - if( (userDisabledAllSleep || systemBooting || systemShutdown) - && (ordinal == SLEEP_STATE) ) - { - DEBUG_LOG(" sleep denied: disableAllSleep %d, booting %d, shutdown %d\n", - userDisabledAllSleep, systemBooting, systemShutdown); - super::changePowerStateToPriv(ON_STATE); - } + OSData * data; + IOReturn ret; - if( (SLEEP_STATE == ordinal) && sleepSupportedPEFunction ) - { - - // Determine if the machine supports sleep, or must doze. - ret = getPlatform()->callPlatformFunction( - sleepSupportedPEFunction, false, - NULL, NULL, NULL, NULL); + if (!calendar) + return kIOReturnBadArgument; - // If the machine only supports doze, the callPlatformFunction call - // boils down to IOPMrootDomain::setSleepSupported(kPCICantSleep), - // otherwise nothing. - } + data = OSData::withBytesNoCopy((void *) calendar, sizeof(*calendar)); + if (!data) + return kIOReturnNoMemory; + + ret = setPMSetting(gIOPMSettingMaintenanceWakeCalendarKey, data); - return super::changePowerStateToPriv(ordinal); + data->release(); + return ret; } +#endif /* ROOT_DOMAIN_RUN_STATES */ -//********************************************************************************* +//****************************************************************************** // sysPowerDownHandler // // Receives a notification when the RootDomain changes state. @@ -2436,7 +3865,7 @@ IOReturn IOPMrootDomain::changePowerStateToPriv ( unsigned long ordinal ) // Allows us to take action on system sleep, power down, and restart after // applications have received their power change notifications and replied, // but before drivers have powered down. We perform a vfs sync on power down. -//********************************************************************************* +//****************************************************************************** IOReturn IOPMrootDomain::sysPowerDownHandler( void * target, void * refCon, UInt32 messageType, IOService * service, @@ -2446,13 +3875,13 @@ IOReturn IOPMrootDomain::sysPowerDownHandler( void * target, void * refCon, IOPowerStateChangeNotification *params = (IOPowerStateChangeNotification *) messageArgument; IOPMrootDomain *rootDomain = OSDynamicCast(IOPMrootDomain, service); + DLOG("sysPowerDownHandler message %x\n", (uint32_t) messageType); + if(!rootDomain) return kIOReturnUnsupported; switch (messageType) { case kIOMessageSystemWillSleep: - DEBUG_LOG("SystemWillSleep\n"); - // Interested applications have been notified of an impending power // change and have acked (when applicable). // This is our chance to save whatever state we can before powering @@ -2494,8 +3923,50 @@ IOReturn IOPMrootDomain::sysPowerDownHandler( void * target, void * refCon, } return ret; } - -//********************************************************************************* + +//****************************************************************************** +// publishSleepWakeUUID +// +// +//****************************************************************************** +void IOPMrootDomain::publishSleepWakeUUID( bool shouldPublish ) +{ + if (shouldPublish) + { + if (queuedSleepWakeUUIDString) + { + if (OSCompareAndSwap(/*old*/ true, /*new*/ false, &gSleepWakeUUIDIsSet)) + { + // Upon wake, it takes some time for userland to invalidate the + // UUID. If another sleep is initiated during that period, force + // a CLEAR message to balance the upcoming SET message. + + messageClients( kIOPMMessageSleepWakeUUIDChange, + kIOPMMessageSleepWakeUUIDCleared ); + + DLOG("SleepWake UUID forced clear\n"); + } + + setProperty(kIOPMSleepWakeUUIDKey, queuedSleepWakeUUIDString); + DLOG("SleepWake UUID published: %s\n", queuedSleepWakeUUIDString->getCStringNoCopy()); + queuedSleepWakeUUIDString->release(); + queuedSleepWakeUUIDString = NULL; + messageClients(kIOPMMessageSleepWakeUUIDChange, + kIOPMMessageSleepWakeUUIDSet); + } + } else { + if (OSCompareAndSwap(/*old*/ true, /*new*/ false, &gSleepWakeUUIDIsSet)) + { + DLOG("SleepWake UUID cleared\n"); + removeProperty(kIOPMSleepWakeUUIDKey); + messageClients(kIOPMMessageSleepWakeUUIDChange, + kIOPMMessageSleepWakeUUIDCleared); + } + } +} + + +//****************************************************************************** // displayWranglerNotification // // Receives a notification when the IODisplayWrangler changes state. @@ -2509,7 +3980,7 @@ IOReturn IOPMrootDomain::sysPowerDownHandler( void * target, void * refCon, // On wake from display sleep: // - Cancel the idle sleep timer // - restore the user's chosen spindown timer from the "quick" spin down value -//********************************************************************************* +//****************************************************************************** IOReturn IOPMrootDomain::displayWranglerNotification( void * target, void * refCon, @@ -2517,30 +3988,41 @@ IOReturn IOPMrootDomain::displayWranglerNotification( void * messageArgument, vm_size_t argSize ) { #if !NO_KERNEL_HID - IOPMrootDomain * rootDomain = OSDynamicCast(IOPMrootDomain, (IOService *)target); - AbsoluteTime deadline; - static int displayPowerState = 4; + int displayPowerState; + IOPowerStateChangeNotification * params = + (IOPowerStateChangeNotification *) messageArgument; + + if ((messageType != kIOMessageDeviceWillPowerOff) && + (messageType != kIOMessageDeviceHasPoweredOn)) + return kIOReturnUnsupported; - if (!rootDomain) + ASSERT_GATED(); + if (!gRootDomain) return kIOReturnUnsupported; + displayPowerState = params->stateNumber; + DLOG("DisplayWrangler message 0x%x, new power state %d\n", + (uint32_t) messageType, displayPowerState); + switch (messageType) { case kIOMessageDeviceWillPowerOff: - DEBUG_LOG("DisplayWranglerWillPowerOff: new p-state %d\n", - displayPowerState - 1); // The display wrangler has dropped power because of idle display sleep - // or force system sleep. We will receive 4 messages before the display - // wrangler reaches its lowest state. Act only when going to state 2. + // or force system sleep. // - // 4->3 Display Dim - // 3->2 Display Sleep - // 2->1 Not visible to user - // 1->0 Not visible to user + // 4 Display ON + // 3 Display Dim + // 2 Display Sleep + // 1 Not visible to user + // 0 Not visible to user + + if (gRootDomain->wranglerAsleep || (displayPowerState > 2)) + break; + + // Record the time the display wrangler went to sleep. - displayPowerState--; - if ( 2 != displayPowerState ) - return kIOReturnUnsupported; + gRootDomain->wranglerAsleep = true; + clock_get_uptime(&gRootDomain->wranglerSleepTime); // We start a timer here if the System Sleep timer is greater than the // Display Sleep timer. We kick off this timer when the display sleeps. @@ -2549,56 +4031,38 @@ IOReturn IOPMrootDomain::displayWranglerNotification( // to the user's activity patterns, Display Sleep _always_ occurs at the // specified interval since last user activity. - if ( rootDomain->extraSleepDelay ) + if ( gRootDomain->extraSleepDelay ) + { + gRootDomain->startIdleSleepTimer(gRootDomain->extraSleepDelay * 60); + } + else if ( gRootDomain->sleepSlider ) { - clock_interval_to_deadline(rootDomain->extraSleepDelay*60, kSecondScale, &deadline); - thread_call_enter_delayed(rootDomain->extraSleepTimer, deadline); - rootDomain->idleSleepPending = true; - DEBUG_LOG(" sleep timer set to expire in %ld min\n", - rootDomain->extraSleepDelay); - } else { // Accelerate disk spindown if system sleep and display sleep // sliders are set to the same value (e.g. both set to 5 min), // and display is about to go dark. Check that spin down timer // is non-zero (zero = never spin down) and system sleep is // not set to never sleep. - if ( (0 != rootDomain->user_spindown) && (0 != rootDomain->sleepSlider) ) - { - DEBUG_LOG(" accelerate quick disk spindown, was %d min\n", - rootDomain->user_spindown); - rootDomain->setQuickSpinDownTimeout(); - } + gRootDomain->setQuickSpinDownTimeout(); } break; case kIOMessageDeviceHasPoweredOn: - DEBUG_LOG("DisplayWranglerHasPoweredOn: previous p-state %d\n", - displayPowerState); // The display wrangler has powered on either because of user activity // or wake from sleep/doze. - displayPowerState = 4; - rootDomain->adjustPowerState(); + if ( 4 != displayPowerState ) + break; - // cancel any pending idle sleep timers - if (rootDomain->idleSleepPending) - { - DEBUG_LOG(" extra-sleep timer stopped\n"); - thread_call_cancel(rootDomain->extraSleepTimer); - rootDomain->idleSleepPending = false; - } + gRootDomain->wranglerAsleep = false; + gRootDomain->adjustPowerState(); + gRootDomain->cancelIdleSleepTimer(); // Change the spindown value back to the user's selection from our // accelerated setting. - if (0 != rootDomain->user_spindown) - { - DEBUG_LOG(" restoring disk spindown to %d min\n", - rootDomain->user_spindown); - rootDomain->restoreUserSpinDownTimeout(); - } + gRootDomain->restoreUserSpinDownTimeout(); break; @@ -2607,15 +4071,15 @@ IOReturn IOPMrootDomain::displayWranglerNotification( } #endif return kIOReturnUnsupported; - } +} + -//********************************************************************************* +//****************************************************************************** // displayWranglerPublished // // Receives a notification when the IODisplayWrangler is published. // When it's published we install a power state change handler. -// -//********************************************************************************* +//****************************************************************************** bool IOPMrootDomain::displayWranglerPublished( void * target, @@ -2623,16 +4087,13 @@ bool IOPMrootDomain::displayWranglerPublished( IOService * newService) { #if !NO_KERNEL_HID - IOPMrootDomain *rootDomain = - OSDynamicCast(IOPMrootDomain, (IOService *)target); - - if(!rootDomain) + if(!gRootDomain) return false; - rootDomain->wrangler = newService; - + gRootDomain->wrangler = newService; + // we found the display wrangler, now install a handler - if( !rootDomain->wrangler->registerInterest( gIOGeneralInterest, + if( !gRootDomain->wrangler->registerInterest( gIOGeneralInterest, &displayWranglerNotification, target, 0) ) { return false; @@ -2641,11 +4102,11 @@ bool IOPMrootDomain::displayWranglerPublished( return true; } -//********************************************************************************* + +//****************************************************************************** // batteryPublished // // Notification on battery class IOPowerSource appearance -// //****************************************************************************** bool IOPMrootDomain::batteryPublished( @@ -2663,7 +4124,8 @@ bool IOPMrootDomain::batteryPublished( return (true); } -//********************************************************************************* + +//****************************************************************************** // adjustPowerState // // Some condition that affects our wake/sleep/doze decision has changed. @@ -2675,57 +4137,384 @@ bool IOPMrootDomain::batteryPublished( // In those circumstances, we prevent sleep and doze by holding power on with // changePowerStateToPriv(ON). // -// If the above conditions do not exist, and also the sleep timer has expired, we -// allow sleep or doze to occur with either changePowerStateToPriv(SLEEP) or +// If the above conditions do not exist, and also the sleep timer has expired, +// we allow sleep or doze to occur with either changePowerStateToPriv(SLEEP) or // changePowerStateToPriv(DOZE) depending on whether or not we already know the // platform cannot sleep. // // In this case, sleep or doze will either occur immediately or at the next time // that no children are holding the system out of idle sleep via the // kIOPMPreventIdleSleep flag in their power state arrays. -//********************************************************************************* +//****************************************************************************** void IOPMrootDomain::adjustPowerState( void ) { + DLOG("adjustPowerState " + "PS %u, ASAP %d, SL %ld, AS %d, SB %d, SS %d, UD %d\n", + (uint32_t) getPowerState(), sleepASAP, sleepSlider, + allowSleep, systemBooting, systemShutdown, userDisabledAllSleep); + + ASSERT_GATED(); + if ( (sleepSlider == 0) || !allowSleep || systemBooting || systemShutdown - || userDisabledAllSleep ) + || userDisabledAllSleep + || (runStateFlags & kRStateFlagDisableIdleSleep) ) { - DEBUG_LOG("AdjustPowerState %ld -> ON: slider %ld, allowSleep %d, " - "booting %d, shutdown %d, userDisabled %d\n", - getPowerState(), sleepSlider, allowSleep, systemBooting, - systemShutdown, userDisabledAllSleep); - changePowerStateToPriv(ON_STATE); } else { if ( sleepASAP ) { - DEBUG_LOG("AdjustPowerState SLEEP\n"); - /* Convenient place to run any code at idle sleep time - * IOPMrootDomain initiates an idle sleep here + * IOPMrootDomain initiates an idle sleep here * * Set last sleep cause accordingly. */ setProperty(kRootDomainSleepReasonKey, kIOPMIdleSleepKey); - + + tracePoint(kIOPMTracePointSleepStarted); + sleepASAP = false; - if ( !sleepIsSupported ) - { - setSleepSupported( kPCICantSleep ); - kprintf("Sleep prevented by kIOPMPreventSystemSleep flag\n"); - } changePowerStateToPriv(SLEEP_STATE); } } } -//********************************************************************************* +void IOPMrootDomain::pmStatsRecordEvent( + int eventIndex, + AbsoluteTime timestamp) +{ + bool starting = eventIndex & kIOPMStatsEventStartFlag ? true:false; + bool stopping = eventIndex & kIOPMStatsEventStopFlag ? true:false; + uint64_t delta; + uint64_t nsec; + + eventIndex &= ~(kIOPMStatsEventStartFlag | kIOPMStatsEventStopFlag); + + absolutetime_to_nanoseconds(timestamp, &nsec); + + switch (eventIndex) { + case kIOPMStatsHibernateImageWrite: + if (starting) + pmStats.hibWrite.start = nsec; + else if (stopping) + pmStats.hibWrite.stop = nsec; + + if (stopping) { + delta = pmStats.hibWrite.stop - pmStats.hibWrite.start; + IOLog("PMStats: Hibernate write took %qd ms\n", delta/1000000ULL); + } + break; + case kIOPMStatsHibernateImageRead: + if (starting) + pmStats.hibRead.start = nsec; + else if (stopping) + pmStats.hibRead.stop = nsec; + + if (stopping) { + delta = pmStats.hibRead.stop - pmStats.hibRead.start; + IOLog("PMStats: Hibernate read took %qd ms\n", delta/1000000ULL); + } + break; + } +} + +/* + * Appends a record of the application response to + * IOPMrootDomain::pmStatsAppResponses + */ +void IOPMrootDomain::pmStatsRecordApplicationResponse( + const OSSymbol *response, + const char *name, + int messageType, + uint32_t delay_ms, + int app_pid) +{ + OSDictionary *responseDescription = NULL; + OSNumber *delayNum = NULL; + OSNumber *pidNum = NULL; + OSNumber *msgNum = NULL; + const OSSymbol *appname; + const OSSymbol *entryName; + OSObject *entryType; + int i; + + if (!pmStatsAppResponses || pmStatsAppResponses->getCount() > 50) + return; + + i = 0; + while ((responseDescription = (OSDictionary *) pmStatsAppResponses->getObject(i++))) + { + entryType = responseDescription->getObject(_statsResponseTypeKey); + entryName = (OSSymbol *) responseDescription->getObject(_statsNameKey); + if (entryName && (entryType == response) && entryName->isEqualTo(name)) + { + OSNumber * entryValue; + entryValue = (OSNumber *) responseDescription->getObject(_statsTimeMSKey); + if (entryValue && (entryValue->unsigned32BitValue() < delay_ms)) + entryValue->setValue(delay_ms); + return; + } + } + + responseDescription = OSDictionary::withCapacity(5); + if (responseDescription) + { + if (response) { + responseDescription->setObject(_statsResponseTypeKey, response); + } + + if (messageType != 0) { + msgNum = OSNumber::withNumber(messageType, 32); + if (msgNum) { + responseDescription->setObject(_statsMessageTypeKey, msgNum); + msgNum->release(); + } + } + + if (name && (strlen(name) > 0)) + { + appname = OSSymbol::withCString(name); + if (appname) { + responseDescription->setObject(_statsNameKey, appname); + appname->release(); + } + } + + if (app_pid != -1) { + pidNum = OSNumber::withNumber(app_pid, 32); + if (pidNum) { + responseDescription->setObject(_statsPIDKey, pidNum); + pidNum->release(); + } + } + + delayNum = OSNumber::withNumber(delay_ms, 32); + if (delayNum) { + responseDescription->setObject(_statsTimeMSKey, delayNum); + delayNum->release(); + } + + if (pmStatsAppResponses) { + pmStatsAppResponses->setObject(responseDescription); + } + + responseDescription->release(); + } + return; +} + + +//****************************************************************************** +// TracePoint support +// +//****************************************************************************** + +#define kIOPMRegisterNVRAMTracePointHandlerKey \ + "IOPMRegisterNVRAMTracePointHandler" + +IOReturn IOPMrootDomain::callPlatformFunction( + const OSSymbol * functionName, + bool waitForFunction, + void * param1, void * param2, + void * param3, void * param4 ) +{ + if (pmTracer && functionName && + functionName->isEqualTo(kIOPMRegisterNVRAMTracePointHandlerKey) && + !pmTracer->tracePointHandler && !pmTracer->tracePointTarget) + { + uint32_t tracePointPhases, tracePointPCI; + uint64_t statusCode; + + pmTracer->tracePointHandler = (IOPMTracePointHandler) param1; + pmTracer->tracePointTarget = (void *) param2; + tracePointPCI = (uint32_t)(uintptr_t) param3; + tracePointPhases = (uint32_t)(uintptr_t) param4; + statusCode = (((uint64_t)tracePointPCI) << 32) | tracePointPhases; + if ((tracePointPhases >> 24) != kIOPMTracePointSystemUp) + { + LOG("Sleep failure code 0x%08x 0x%08x\n", + tracePointPCI, tracePointPhases); + } + setProperty(kIOPMSleepWakeFailureCodeKey, statusCode, 64); + pmTracer->tracePointHandler( pmTracer->tracePointTarget, 0, 0 ); + + return kIOReturnSuccess; + } + + return super::callPlatformFunction( + functionName, waitForFunction, param1, param2, param3, param4); +} + +void IOPMrootDomain::tracePoint( uint8_t point ) +{ + pmTracer->tracePoint(point); +} + +//****************************************************************************** +// PMTraceWorker Class +// +//****************************************************************************** + +#undef super +#define super OSObject +OSDefineMetaClassAndStructors(PMTraceWorker, OSObject) + +#define kPMBestGuessPCIDevicesCount 25 +#define kPMMaxRTCBitfieldSize 32 + +PMTraceWorker *PMTraceWorker::tracer(IOPMrootDomain *owner) +{ + PMTraceWorker *me; + + me = OSTypeAlloc( PMTraceWorker ); + if (!me || !me->init()) + { + return NULL; + } + + DLOG("PMTraceWorker %p\n", me); + + // Note that we cannot instantiate the PCI device -> bit mappings here, since + // the IODeviceTree has not yet been created by IOPlatformExpert. We create + // this dictionary lazily. + me->owner = owner; + me->pciDeviceBitMappings = NULL; + me->pciMappingLock = IOLockAlloc(); + me->tracePhase = kIOPMTracePointSystemUp; + me->loginWindowPhase = 0; + me->pciBusyBitMask = 0; + return me; +} + +void PMTraceWorker::RTC_TRACE(void) +{ + if (tracePointHandler && tracePointTarget) + { + uint32_t wordA; + + wordA = tracePhase; // destined for bits 24-31 + wordA <<= 8; + wordA |= loginWindowPhase; // destined for bits 16-23 + wordA <<= 16; + + tracePointHandler( tracePointTarget, pciBusyBitMask, wordA ); + DLOG("RTC_TRACE wrote 0x%08x 0x%08x\n", pciBusyBitMask, wordA); + } +} + +int PMTraceWorker::recordTopLevelPCIDevice(IOService * pciDevice) +{ + const OSSymbol * deviceName; + int index = -1; + + IOLockLock(pciMappingLock); + + if (!pciDeviceBitMappings) + { + pciDeviceBitMappings = OSArray::withCapacity(kPMBestGuessPCIDevicesCount); + if (!pciDeviceBitMappings) + goto exit; + } + + // Check for bitmask overflow. + if (pciDeviceBitMappings->getCount() >= kPMMaxRTCBitfieldSize) + goto exit; + + if ((deviceName = pciDevice->copyName()) && + (pciDeviceBitMappings->getNextIndexOfObject(deviceName, 0) == (unsigned int)-1) && + pciDeviceBitMappings->setObject(deviceName)) + { + index = pciDeviceBitMappings->getCount() - 1; + DLOG("PMTrace PCI array: set object %s => %d\n", + deviceName->getCStringNoCopy(), index); + } + if (deviceName) + deviceName->release(); + if (!addedToRegistry && (index >= 0)) + addedToRegistry = owner->setProperty("PCITopLevel", this); + +exit: + IOLockUnlock(pciMappingLock); + return index; +} + +bool PMTraceWorker::serialize(OSSerialize *s) const +{ + bool ok = false; + if (pciDeviceBitMappings) + { + IOLockLock(pciMappingLock); + ok = pciDeviceBitMappings->serialize(s); + IOLockUnlock(pciMappingLock); + } + return ok; +} + +void PMTraceWorker::tracePoint(uint8_t phase) +{ + tracePhase = phase; + + DLOG("IOPMrootDomain: trace point 0x%02x\n", tracePhase); + RTC_TRACE(); +} + +void PMTraceWorker::traceLoginWindowPhase(uint8_t phase) +{ + loginWindowPhase = phase; + + DLOG("IOPMrootDomain: loginwindow tracepoint 0x%02x\n", loginWindowPhase); + RTC_TRACE(); +} + +void PMTraceWorker::tracePCIPowerChange( + change_t type, IOService *service, uint32_t changeFlags, uint32_t bitNum) +{ + uint32_t bitMask; + uint32_t expectedFlag; + + // Ignore PCI changes outside of system sleep/wake. + if ((kIOPMTracePointSystemSleepDriversPhase != tracePhase) && + (kIOPMTracePointSystemWakeDriversPhase != tracePhase)) + return; + + // Only record the WillChange transition when going to sleep, + // and the DidChange on the way up. + changeFlags &= (kIOPMDomainWillChange | kIOPMDomainDidChange); + expectedFlag = (kIOPMTracePointSystemSleepDriversPhase == tracePhase) ? + kIOPMDomainWillChange : kIOPMDomainDidChange; + if (changeFlags != expectedFlag) + return; + + // Mark this device off in our bitfield + if (bitNum < kPMMaxRTCBitfieldSize) + { + bitMask = (1 << bitNum); + + if (kPowerChangeStart == type) + { + pciBusyBitMask |= bitMask; + DLOG("PMTrace: Device %s started - bit %2d mask 0x%08x => 0x%08x\n", + service->getName(), bitNum, bitMask, pciBusyBitMask); + } + else + { + pciBusyBitMask &= ~bitMask; + DLOG("PMTrace: Device %s finished - bit %2d mask 0x%08x => 0x%08x\n", + service->getName(), bitNum, bitMask, pciBusyBitMask); + } + + RTC_TRACE(); + } +} + + +//****************************************************************************** // PMHaltWorker Class // -//********************************************************************************* +//****************************************************************************** static unsigned int gPMHaltBusyCount; static unsigned int gPMHaltIdleCount; @@ -2749,14 +4538,14 @@ PMHaltWorker * PMHaltWorker::worker( void ) if (!me->lock) break; - DEBUG_LOG("PMHaltWorker %p\n", me); + DLOG("PMHaltWorker %p\n", me); me->retain(); // thread holds extra retain - thread = IOCreateThread( &PMHaltWorker::main, me ); - if (!thread) + if (KERN_SUCCESS != kernel_thread_start(&PMHaltWorker::main, (void *) me, &thread)) { me->release(); break; } + thread_deallocate(thread); return me; } while (false); @@ -2767,7 +4556,7 @@ PMHaltWorker * PMHaltWorker::worker( void ) void PMHaltWorker::free( void ) { - DEBUG_LOG("PMHaltWorker free %p\n", this); + DLOG("PMHaltWorker free %p\n", this); if (lock) { IOLockFree(lock); @@ -2776,7 +4565,7 @@ void PMHaltWorker::free( void ) return OSObject::free(); } -void PMHaltWorker::main( void * arg ) +void PMHaltWorker::main( void * arg, wait_result_t waitResult ) { PMHaltWorker * me = (PMHaltWorker *) arg; @@ -2812,7 +4601,7 @@ void PMHaltWorker::main( void * arg ) } // No more work to do, terminate thread - DEBUG_LOG("All done for worker: %p (visits = %u)\n", me, me->visits); + DLOG("All done for worker: %p (visits = %u)\n", me, me->visits); thread_wakeup( &gPMHaltDepth ); me->release(); } @@ -2872,13 +4661,13 @@ void PMHaltWorker::work( PMHaltWorker * me ) deltaTime = computeDeltaTimeMS(&startTime); if ((deltaTime > kPMHaltTimeoutMS) || timeout || - (gIOKitDebug & kIOLogDebugPower)) + (gIOKitDebug & (kIOLogDebugPower | kIOLogPMRootDomain))) { - HaltRestartLog("%s driver %s (%p) took %lu ms\n", + KLOG("%s driver %s (%p) took %u ms\n", (gPMHaltEvent == kIOMessageSystemWillPowerOff) ? "PowerOff" : "Restart", service->getName(), service, - deltaTime ); + (uint32_t) deltaTime ); } service->release(); @@ -2907,7 +4696,7 @@ void PMHaltWorker::checkTimeout( PMHaltWorker * me, AbsoluteTime * now ) if (nano > 3000000000ULL) { me->timeout = true; - HaltRestartLog("%s still waiting on %s\n", + LOG("%s still waiting on %s\n", (gPMHaltEvent == kIOMessageSystemWillPowerOff) ? "PowerOff" : "Restart", me->service->getName()); @@ -2916,11 +4705,12 @@ void PMHaltWorker::checkTimeout( PMHaltWorker * me, AbsoluteTime * now ) IOLockUnlock(me->lock); } -//********************************************************************************* + +//****************************************************************************** // acknowledgeSystemWillShutdown // // Acknowledgement from drivers that they have prepared for shutdown/restart. -//********************************************************************************* +//****************************************************************************** void IOPMrootDomain::acknowledgeSystemWillShutdown( IOService * from ) { @@ -2930,7 +4720,7 @@ void IOPMrootDomain::acknowledgeSystemWillShutdown( IOService * from ) if (!from) return; - //DEBUG_LOG("%s acknowledged\n", from->getName()); + //DLOG("%s acknowledged\n", from->getName()); prop = from->copyProperty( gPMHaltClientAcknowledgeKey ); if (prop) { @@ -2943,16 +4733,17 @@ void IOPMrootDomain::acknowledgeSystemWillShutdown( IOService * from ) } else { - DEBUG_LOG("%s acknowledged without worker property\n", + DLOG("%s acknowledged without worker property\n", from->getName()); } } -//********************************************************************************* + +//****************************************************************************** // notifySystemShutdown // // Notify all objects in PM tree that system will shutdown or restart -//********************************************************************************* +//****************************************************************************** static void notifySystemShutdown( IOService * root, unsigned long event ) @@ -2973,7 +4764,7 @@ notifySystemShutdown( IOService * root, unsigned long event ) void * baseFunc; bool ok; - DEBUG_LOG("%s event = %lx\n", __FUNCTION__, event); + DLOG("%s event = %lx\n", __FUNCTION__, event); baseFunc = OSMemberFunctionCast(void *, root, &IOService::systemWillShutdown); @@ -3065,7 +4856,7 @@ notifySystemShutdown( IOService * root, unsigned long event ) ok = inner->setObject(node); } if (!ok) - DEBUG_LOG("Skipped PM node %s\n", node->getName()); + DLOG("Skipped PM node %s\n", node->getName()); } iter->release(); } @@ -3076,7 +4867,7 @@ notifySystemShutdown( IOService * root, unsigned long event ) count = 0; if (inner != PLACEHOLDER) count = inner->getCount(); - DEBUG_LOG("Nodes at depth %u = %u\n", i, count); + DLOG("Nodes at depth %u = %u\n", i, count); } // strip placeholders (not all depths are populated) @@ -3107,7 +4898,7 @@ notifySystemShutdown( IOService * root, unsigned long event ) if (numWorkers > kPMHaltMaxWorkers) numWorkers = kPMHaltMaxWorkers; - DEBUG_LOG("PM nodes = %u, maxDepth = %u, workers = %u\n", + DLOG("PM nodes = %u, maxDepth = %u, workers = %u\n", totalNodes, gPMHaltArray->getCount(), numWorkers); for (unsigned int i = 0; i < numWorkers; i++) @@ -3148,27 +4939,17 @@ notifySystemShutdown( IOService * root, unsigned long event ) } done: - DEBUG_LOG("%s done\n", __FUNCTION__); + DLOG("%s done\n", __FUNCTION__); return; } -#if DEBUG_TEST -// debug - exercise notifySystemShutdown() -bool IOPMrootDomain::serializeProperties( OSSerialize * s ) const -{ - IOPMrootDomain * root = (IOPMrootDomain *) this; - notifySystemShutdown( root, kIOMessageSystemWillPowerOff ); - return( super::serializeProperties(s) ); -} -#endif /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ - #undef super #define super OSObject -OSDefineMetaClassAndStructors(PMSettingObject, OSObject) +OSDefineMetaClassAndFinalStructors(PMSettingObject, OSObject) void PMSettingObject::setPMSetting(const OSSymbol *type, OSObject *obj) { @@ -3276,10 +5057,10 @@ void PMSettingObject::taggedRelease(const void *tag, const int when) const /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ -#undef super +#undef super #define super IOService -OSDefineMetaClassAndStructors(IORootParent, IOService) +OSDefineMetaClassAndFinalStructors(IORootParent, IOService) // This array exactly parallels the state array for the root domain. // Power state changes initiated by a device can be vetoed by a client of the device, and @@ -3287,49 +5068,42 @@ OSDefineMetaClassAndStructors(IORootParent, IOService) // so when the root domain wants a power state change that cannot be vetoed (e.g. demand sleep), it asks // its parent to make the change. That is the reason for this complexity. -static IOPMPowerState patriarchPowerStates[number_of_power_states] = { - {1,0,0,0,0,0,0,0,0,0,0,0}, // off - {1,0,RESTART_POWER,0,0,0,0,0,0,0,0,0}, // reset - {1,0,SLEEP_POWER,0,0,0,0,0,0,0,0,0}, // sleep - {1,0,DOZE_POWER,0,0,0,0,0,0,0,0,0}, // doze - {1,0,ON_POWER,0,0,0,0,0,0,0,0,0} // running +static IOPMPowerState patriarchPowerStates[NUM_POWER_STATES] = +{ + {1,0,0,0,0,0,0,0,0,0,0,0}, // off (not used) + {1,0,RESTART_POWER,0,0,0,0,0,0,0,0,0}, // reset (not used) + {1,0,SLEEP_POWER,0,0,0,0,0,0,0,0,0}, // sleep + {1,0,DOZE_POWER,0,0,0,0,0,0,0,0,0}, // doze + {1,0,ON_POWER,0,0,0,0,0,0,0,0,0}, // running }; -bool IORootParent::start ( IOService * nub ) +bool IORootParent::start( IOService * nub ) { mostRecentChange = ON_STATE; super::start(nub); + attachToParent( getRegistryRoot(), gIOPowerPlane ); PMinit(); - youAreRoot(); - registerPowerDriver(this,patriarchPowerStates,number_of_power_states); + registerPowerDriver(this, patriarchPowerStates, NUM_POWER_STATES); wakeSystem(); powerOverrideOnPriv(); return true; } - -void IORootParent::shutDownSystem ( void ) +void IORootParent::shutDownSystem( void ) { - mostRecentChange = OFF_STATE; - changePowerStateToPriv(OFF_STATE); } - -void IORootParent::restartSystem ( void ) +void IORootParent::restartSystem( void ) { - mostRecentChange = RESTART_STATE; - changePowerStateToPriv(RESTART_STATE); } - -void IORootParent::sleepSystem ( void ) +void IORootParent::sleepSystem( void ) { mostRecentChange = SLEEP_STATE; changePowerStateToPriv(SLEEP_STATE); } - -void IORootParent::dozeSystem ( void ) +void IORootParent::dozeSystem( void ) { mostRecentChange = DOZE_STATE; changePowerStateToPriv(DOZE_STATE); @@ -3340,37 +5114,15 @@ void IORootParent::dozeSystem ( void ) // In idle sleep, do nothing because the parent is still on and the root can freely change state. -void IORootParent::sleepToDoze ( void ) +void IORootParent::sleepToDoze( void ) { if ( mostRecentChange == SLEEP_STATE ) { changePowerStateToPriv(DOZE_STATE); } } - -void IORootParent::wakeSystem ( void ) +void IORootParent::wakeSystem( void ) { mostRecentChange = ON_STATE; changePowerStateToPriv(ON_STATE); } - -IOReturn IORootParent::changePowerStateToPriv ( unsigned long ordinal ) -{ - IOReturn ret; - - if( (SLEEP_STATE == ordinal) && sleepSupportedPEFunction ) - { - - // Determine if the machine supports sleep, or must doze. - ret = getPlatform()->callPlatformFunction( - sleepSupportedPEFunction, false, - NULL, NULL, NULL, NULL); - - // If the machine only supports doze, the callPlatformFunction call - // boils down toIOPMrootDomain::setSleepSupported(kPCICantSleep), - // otherwise nothing. - } - - return super::changePowerStateToPriv(ordinal); -} - diff --git a/iokit/Kernel/IOPlatformExpert.cpp b/iokit/Kernel/IOPlatformExpert.cpp index a03ef1d90..8109e190e 100644 --- a/iokit/Kernel/IOPlatformExpert.cpp +++ b/iokit/Kernel/IOPlatformExpert.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 1998-2006 Apple Computer, Inc. All rights reserved. + * Copyright (c) 1998-2008 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -54,9 +54,6 @@ extern "C" { #include } -/* Delay period for UPS halt */ -#define kUPSDelayHaltCPU_msec (1000*60*5) - void printDictionaryKeys (OSDictionary * inDictionary, char * inMsg); static void getCStringForObject(OSObject *inObj, char *outStr, size_t outStrLen); @@ -254,16 +251,11 @@ int IOPlatformExpert::haltRestart(unsigned int type) { if (type == kPEPanicSync) return 0; - if (type == kPEHangCPU) while (1); + if (type == kPEHangCPU) while (true) {} if (type == kPEUPSDelayHaltCPU) { - // Stall shutdown for 5 minutes, and if no outside force has - // removed our power at that point, proceed with a reboot. - IOSleep( kUPSDelayHaltCPU_msec ); - - // Ideally we never reach this point. - - type = kPERestartCPU; + // RestartOnPowerLoss feature was turned on, proceed with shutdown. + type = kPEHaltCPU; } // On ARM kPEPanicRestartCPU is supported in the drivers @@ -384,13 +376,14 @@ PMLog(const char *who, unsigned long event, if (debugFlags & kIOLogPower) { - uint32_t nows, nowus; + clock_sec_t nows; + clock_usec_t nowus; clock_get_system_microtime(&nows, &nowus); nowus += (nows % 1000) * 1000000; - kprintf("pm%u %x %.30s %d %x %x\n", - nowus, (unsigned) current_thread(), who, // Identity - (int) event, param1, param2); // Args + kprintf("pm%u %p %.30s %d %lx %lx\n", + nowus, current_thread(), who, // Identity + (int) event, (long) param1, (long) param2); // Args if (debugFlags & kIOLogTracePower) { static const UInt32 sStartStopBitField[] = @@ -412,7 +405,7 @@ PMLog(const char *who, unsigned long event, } // Record the timestamp, wish I had a this pointer - IOTimeStampConstant(code, (UInt32) who, event, param1, param2); + IOTimeStampConstant(code, (uintptr_t) who, event, param1, param2); } } } @@ -433,7 +426,6 @@ void IOPlatformExpert::PMInstantiatePowerDomains ( void ) root->init(); root->attach(this); root->start(this); - root->youAreRoot(); } @@ -750,7 +742,7 @@ static void IOShutdownNotificationsTimedOut( thread_call_param_t p0, thread_call_param_t p1) { - int type = (int)p0; + int type = (int)(long)p0; /* 30 seconds has elapsed - resume shutdown */ if(gIOPlatform) gIOPlatform->haltRestart(type); @@ -792,7 +784,6 @@ int PEHaltRestart(unsigned int type) IOPMrootDomain *pmRootDomain = IOService::getPMRootDomain(); AbsoluteTime deadline; thread_call_t shutdown_hang; - unsigned int tell_type; if(type == kPEHaltCPU || type == kPERestartCPU || type == kPEUPSDelayHaltCPU) { @@ -809,15 +800,8 @@ int PEHaltRestart(unsigned int type) (thread_call_param_t) type); clock_interval_to_deadline( 30, kSecondScale, &deadline ); thread_call_enter1_delayed( shutdown_hang, 0, deadline ); - - if( kPEUPSDelayHaltCPU == type ) { - tell_type = kPEHaltCPU; - } else { - tell_type = type; - } - - pmRootDomain->handlePlatformHaltRestart(tell_type); + pmRootDomain->handlePlatformHaltRestart(type); /* This notification should have few clients who all do their work synchronously. @@ -842,16 +826,14 @@ long PEGetGMTTimeOfDay(void) { long result = 0; - if( gIOPlatform) - result = gIOPlatform->getGMTTimeOfDay(); + if( gIOPlatform) result = gIOPlatform->getGMTTimeOfDay(); return (result); } void PESetGMTTimeOfDay(long secs) { - if( gIOPlatform) - gIOPlatform->setGMTTimeOfDay(secs); + if( gIOPlatform) gIOPlatform->setGMTTimeOfDay(secs); } } /* extern "C" */ @@ -861,7 +843,7 @@ void IOPlatformExpert::registerNVRAMController(IONVRAMController * caller) OSData * data; IORegistryEntry * entry; OSString * string = 0; - char uuid[ 36 + 1 ]; + uuid_string_t uuid; entry = IORegistryEntry::fromPath( "/efi/platform", gIODTPlane ); if ( entry ) @@ -925,7 +907,7 @@ IOReturn IOPlatformExpert::callPlatformFunction(const OSSymbol *functionName, if (waitForFunction) { _resources = waitForService(resourceMatching(functionName)); } else { - _resources = resources(); + _resources = getResourceService(); } if (_resources == 0) return kIOReturnUnsupported; @@ -1406,3 +1388,4 @@ bool IOPanicPlatform::start(IOService * provider) { return false; } + diff --git a/iokit/Kernel/IORangeAllocator.cpp b/iokit/Kernel/IORangeAllocator.cpp index d8a56aec6..12804d24a 100644 --- a/iokit/Kernel/IORangeAllocator.cpp +++ b/iokit/Kernel/IORangeAllocator.cpp @@ -91,7 +91,7 @@ bool IORangeAllocator::init( IORangeScalar endOfRange, return( true ); } -IORangeAllocator * IORangeAllocator:: withRange( +IORangeAllocator * IORangeAllocator::withRange( IORangeScalar endOfRange, IORangeScalar defaultAlignment, UInt32 capacity, diff --git a/iokit/Kernel/IORegistryEntry.cpp b/iokit/Kernel/IORegistryEntry.cpp index f039c28ad..e41a94bdc 100644 --- a/iokit/Kernel/IORegistryEntry.cpp +++ b/iokit/Kernel/IORegistryEntry.cpp @@ -65,6 +65,7 @@ static OSDictionary * gIORegistryPlanes; const OSSymbol * gIONameKey; const OSSymbol * gIOLocationKey; +const OSSymbol * gIORegistryEntryIDKey; enum { kParentSetIndex = 0, @@ -75,6 +76,10 @@ enum { kIOMaxPlaneName = 32 }; +enum { kIORegistryIDReserved = (1ULL << 32) + 255 }; + +static uint64_t gIORegistryLastID = kIORegistryIDReserved; + class IORegistryPlane : public OSObject { friend class IORegistryEntry; @@ -149,8 +154,12 @@ IORegistryEntry * IORegistryEntry::initialize( void ) && gIORegistryPlanes ); ok = gRegistryRoot->init(); + if (ok) + gRegistryRoot->reserved->fRegistryEntryID = ++gIORegistryLastID; + gIONameKey = OSSymbol::withCStringNoCopy( "IOName" ); gIOLocationKey = OSSymbol::withCStringNoCopy( "IOLocation" ); + gIORegistryEntryIDKey = OSSymbol::withCStringNoCopy( kIORegistryEntryIDKey ); assert( ok && gIONameKey && gIOLocationKey ); @@ -261,6 +270,13 @@ bool IORegistryEntry::init( OSDictionary * dict ) if( !super::init()) return( false); + if (!reserved) + { + reserved = IONew(ExpansionData, 1); + if (!reserved) + return (false); + bzero(reserved, sizeof(ExpansionData)); + } if( dict) { dict->retain(); if( fPropertyTable) @@ -307,6 +323,9 @@ bool IORegistryEntry::init( IORegistryEntry * old, WLOCK; + reserved = old->reserved; + old->reserved = NULL; + fPropertyTable = old->getPropertyTable(); fPropertyTable->retain(); #ifdef IOREGSPLITTABLES @@ -340,19 +359,11 @@ bool IORegistryEntry::init( IORegistryEntry * old, void IORegistryEntry::free( void ) { - #if DEBUG_FREE -#define msg ": attached at free()" - int len = strlen(msg) + 40; - char buf[len]; - if( registryTable() && gIOServicePlane) { if( getParentSetReference( gIOServicePlane ) || getChildSetReference( gIOServicePlane )) { - - strlcpy( buf, getName(), 32); - strlcat( buf, msg, len ); - IOPanic( buf ); + panic("%s: attached at free()", getName()); } } #endif @@ -365,6 +376,9 @@ void IORegistryEntry::free( void ) registryTable()->release(); #endif /* IOREGSPLITTABLES */ + if (reserved) + IODelete(reserved, ExpansionData, 1); + super::free(); } @@ -1593,6 +1607,9 @@ bool IORegistryEntry::attachToParent( IORegistryEntry * parent, WLOCK; + if (!reserved->fRegistryEntryID) + reserved->fRegistryEntryID = ++gIORegistryLastID; + ret = makeLink( parent, kParentSetIndex, plane ); if( (links = parent->getChildSetReference( plane ))) @@ -1633,6 +1650,14 @@ bool IORegistryEntry::attachToParent( IORegistryEntry * parent, return( ret ); } +uint64_t IORegistryEntry::getRegistryEntryID( void ) +{ + if (reserved) + return (reserved->fRegistryEntryID); + else + return (0); +} + bool IORegistryEntry::attachToChild( IORegistryEntry * child, const IORegistryPlane * plane ) { @@ -2009,13 +2034,21 @@ OSOrderedSet * IORegistryIterator::iterateAll( void ) return( done); } +#if __LP64__ +OSMetaClassDefineReservedUnused(IORegistryEntry, 0); +OSMetaClassDefineReservedUnused(IORegistryEntry, 1); +OSMetaClassDefineReservedUnused(IORegistryEntry, 2); +OSMetaClassDefineReservedUnused(IORegistryEntry, 3); +OSMetaClassDefineReservedUnused(IORegistryEntry, 4); +OSMetaClassDefineReservedUnused(IORegistryEntry, 5); +#else OSMetaClassDefineReservedUsed(IORegistryEntry, 0); OSMetaClassDefineReservedUsed(IORegistryEntry, 1); OSMetaClassDefineReservedUsed(IORegistryEntry, 2); OSMetaClassDefineReservedUsed(IORegistryEntry, 3); OSMetaClassDefineReservedUsed(IORegistryEntry, 4); OSMetaClassDefineReservedUsed(IORegistryEntry, 5); - +#endif OSMetaClassDefineReservedUnused(IORegistryEntry, 6); OSMetaClassDefineReservedUnused(IORegistryEntry, 7); OSMetaClassDefineReservedUnused(IORegistryEntry, 8); diff --git a/iokit/Kernel/IOService.cpp b/iokit/Kernel/IOService.cpp index f58ea137c..62dda56fe 100644 --- a/iokit/Kernel/IOService.cpp +++ b/iokit/Kernel/IOService.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 1998-2007 Apple Inc. All rights reserved. + * Copyright (c) 1998-2008 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -29,7 +29,9 @@ #include #include +#include #include +#include #include #include #include @@ -43,12 +45,13 @@ #include #include #include +#include #include #include #include -//#define LOG kprintf -#define LOG IOLog +#define LOG kprintf +//#define LOG IOLog #include "IOServicePrivate.h" @@ -97,6 +100,7 @@ const OSSymbol * gIOMatchCategoryKey; const OSSymbol * gIODefaultMatchCategoryKey; const OSSymbol * gIOMatchedServiceCountKey; +const OSSymbol * gIOMapperIDKey; const OSSymbol * gIOUserClientClassKey; const OSSymbol * gIOKitDebugKey; @@ -166,6 +170,8 @@ const OSSymbol * gIOPlatformFunctionHandlerSet; IORecursiveLockUnlock( gNotificationLock ) #define SLEEPNOTIFY(event) \ IORecursiveLockSleep( gNotificationLock, (void *)(event), THREAD_UNINT ) +#define SLEEPNOTIFYTO(event, deadline) \ + IORecursiveLockSleepDeadline( gNotificationLock, (void *)(event), deadline, THREAD_UNINT ) #define WAKEUPNOTIFY(event) \ IORecursiveLockWakeup( gNotificationLock, (void *)(event), /* wake one */ false ) @@ -206,9 +212,18 @@ static IOLock * gArbitrationLockQueueLock; bool IOService::isInactive( void ) const { return( 0 != (kIOServiceInactiveState & getState())); } + +/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ + +#define IOServiceTrace(csc, a, b, c, d) { \ + if(kIOTraceIOService & gIOKitDebug) { \ + KERNEL_DEBUG_CONSTANT(IODBG_IOSERVICE(csc), a, b, c, d, 0); \ + } \ +} + /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ -#if defined(__i386__) +#if defined(__i386__) || defined(__x86_64__) // Only used by the intel implementation of // IOService::requireMaxBusStall(UInt32 ns) @@ -235,7 +250,7 @@ requireMaxCpuDelay(IOService * service, UInt32 ns, UInt32 delayType); static IOReturn setLatencyHandler(UInt32 delayType, IOService * target, bool enable); -#endif /* defined(__i386__) */ +#endif /* defined(__i386__) || defined(__x86_64__) */ /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ @@ -271,6 +286,8 @@ void IOService::initialize( void ) gIOInterruptSpecifiersKey = OSSymbol::withCStringNoCopy("IOInterruptSpecifiers"); + gIOMapperIDKey = OSSymbol::withCStringNoCopy(kIOMapperIDKey); + gIOKitDebugKey = OSSymbol::withCStringNoCopy( kIOKitDebugKey ); gIOCommandPoolSizeKey = OSSymbol::withCStringNoCopy( kIOCommandPoolSizeKey ); @@ -306,7 +323,7 @@ void IOService::initialize( void ) gIOPlatformActiveActionKey = OSSymbol::withCStringNoCopy(kIOPlatformActiveActionKey); gIOPlatformFunctionHandlerSet = OSSymbol::withCStringNoCopy(kIOPlatformFunctionHandlerSet); -#if defined(__i386__) +#if defined(__i386__) || defined(__x86_64__) sCPULatencyFunctionName[kCpuDelayBusStall] = OSSymbol::withCStringNoCopy(kIOPlatformFunctionHandlerMaxBusDelay); sCPULatencyFunctionName[kCpuDelayInterrupt] = OSSymbol::withCStringNoCopy(kIOPlatformFunctionHandlerMaxInterruptDelay); #endif @@ -392,6 +409,7 @@ void IOService::stop( IOService * provider ) void IOService::free( void ) { requireMaxBusStall(0); + requireMaxInterruptDelay(0); if( getPropertyTable()) unregisterAllInterest(); PMfree(); @@ -624,7 +642,7 @@ IOReturn IOService::catalogNewDrivers( OSOrderedSet * newTables ) while( (table = (OSDictionary *) newTables->getFirstObject())) { LOCKWRITENOTIFY(); - set = (OSSet *) getExistingServices( table, + set = (OSSet *) copyExistingServices( table, kIOServiceRegisteredState, kIOServiceExistingSet); UNLOCKNOTIFY(); @@ -643,7 +661,7 @@ IOReturn IOService::catalogNewDrivers( OSOrderedSet * newTables ) #if IOMATCHDEBUG if( getDebugFlags( table ) & kIOLogMatch) - LOG("Matching service count = %ld\n", count); + LOG("Matching service count = %ld\n", (long)count); #endif newTables->removeObject(table); } @@ -848,7 +866,7 @@ IOReturn IOService::callPlatformFunction( const OSSymbol * functionName, if (gIOPlatformFunctionHandlerSet == functionName) { -#if defined(__i386__) +#if defined(__i386__) || defined(__x86_64__) const OSSymbol * functionHandlerName = (const OSSymbol *) param1; IOService * target = (IOService *) param2; bool enable = (param3 != 0); @@ -857,7 +875,7 @@ IOReturn IOService::callPlatformFunction( const OSSymbol * functionName, result = setLatencyHandler(kCpuDelayBusStall, target, enable); else if (sCPULatencyFunctionName[kCpuDelayInterrupt] == param1) result = setLatencyHandler(kCpuDelayInterrupt, target, enable); -#endif /* defined(__i386__) */ +#endif /* defined(__i386__) || defined(__x86_64__) */ } if ((kIOReturnUnsupported == result) && (provider = getProvider())) { @@ -1615,7 +1633,7 @@ void _IOServiceInterestNotifier::enable( bool was ) #define headQ(o) setObject(0, o) #define TLOG(fmt, args...) { if(kIOLogYield & gIOKitDebug) IOLog(fmt, ## args); } -inline void _workLoopAction( IOWorkLoop::Action action, +static void _workLoopAction( IOWorkLoop::Action action, IOService * service, void * p0 = 0, void * p1 = 0, void * p2 = 0, void * p3 = 0 ) @@ -1657,7 +1675,15 @@ bool IOService::terminatePhase1( IOOptionBits options ) bool didInactive; bool startPhase2 = false; - TLOG("%s::terminatePhase1(%08lx)\n", getName(), options); + TLOG("%s::terminatePhase1(%08llx)\n", getName(), (long long)options); + + uint64_t regID = getRegistryEntryID(); + IOServiceTrace( + IOSERVICE_TERMINATE_PHASE1, + (uintptr_t) regID, + (uintptr_t) (regID >> 32), + (uintptr_t) this, + (uintptr_t) options); // -- compat if( options & kIOServiceRecursing) { @@ -1696,11 +1722,22 @@ bool IOService::terminatePhase1( IOOptionBits options ) iter = victim->getClientIterator(); if( iter) { while( (client = (IOService *) iter->getNextObject())) { - TLOG("%s::requestTerminate(%s, %08lx)\n", - client->getName(), victim->getName(), options); + TLOG("%s::requestTerminate(%s, %08llx)\n", + client->getName(), victim->getName(), (long long)options); ok = client->requestTerminate( victim, options ); TLOG("%s::requestTerminate(%s, ok = %d)\n", client->getName(), victim->getName(), ok); + + uint64_t regID1 = client->getRegistryEntryID(); + uint64_t regID2 = victim->getRegistryEntryID(); + IOServiceTrace( + (ok ? IOSERVICE_TERMINATE_REQUEST_OK + : IOSERVICE_TERMINATE_REQUEST_FAIL), + (uintptr_t) regID1, + (uintptr_t) (regID1 >> 32), + (uintptr_t) regID2, + (uintptr_t) (regID2 >> 32)); + if( ok) makeInactive->setObject( client ); } @@ -1763,8 +1800,8 @@ void IOService::scheduleTerminatePhase2( IOOptionBits options ) waitResult = IOLockSleepDeadline( gJobsLock, &gIOTerminateWork, deadline, THREAD_UNINT ); if( waitResult == THREAD_TIMED_OUT) { - TLOG("%s::terminate(kIOServiceSynchronous) timeout", getName()); - } + IOLog("%s::terminate(kIOServiceSynchronous) timeout\n", getName()); + } } } while(gIOTerminateWork || (wait && (waitResult != THREAD_TIMED_OUT))); @@ -1777,7 +1814,7 @@ void IOService::scheduleTerminatePhase2( IOOptionBits options ) gIOTerminatePhase2List->setObject( this ); if( 0 == gIOTerminateWork++) { if( !gIOTerminateThread) - gIOTerminateThread = IOCreateThread( &terminateThread, (void *) options ); + kernel_thread_start(&terminateThread, (void *) options, &gIOTerminateThread); else IOLockWakeup(gJobsLock, (event_t) &gIOTerminateWork, /* one-thread */ false ); } @@ -1788,13 +1825,14 @@ void IOService::scheduleTerminatePhase2( IOOptionBits options ) release(); } -void IOService::terminateThread( void * arg ) +void IOService::terminateThread( void * arg, wait_result_t waitResult ) { IOLockLock( gJobsLock ); while (gIOTerminateWork) - terminateWorker( (IOOptionBits) arg ); + terminateWorker( (uintptr_t) arg ); + thread_deallocate(gIOTerminateThread); gIOTerminateThread = 0; IOLockWakeup( gJobsLock, (event_t) &gIOTerminateThread, /* one-thread */ false); @@ -1805,13 +1843,22 @@ void IOService::scheduleStop( IOService * provider ) { TLOG("%s::scheduleStop(%s)\n", getName(), provider->getName()); + uint64_t regID1 = getRegistryEntryID(); + uint64_t regID2 = provider->getRegistryEntryID(); + IOServiceTrace( + IOSERVICE_TERMINATE_SCHEDULE_STOP, + (uintptr_t) regID1, + (uintptr_t) (regID1 >> 32), + (uintptr_t) regID2, + (uintptr_t) (regID2 >> 32)); + IOLockLock( gJobsLock ); gIOStopList->tailQ( this ); gIOStopProviderList->tailQ( provider ); if( 0 == gIOTerminateWork++) { if( !gIOTerminateThread) - gIOTerminateThread = IOCreateThread( &terminateThread, (void *) 0 ); + kernel_thread_start(&terminateThread, (void *) 0, &gIOTerminateThread); else IOLockWakeup(gJobsLock, (event_t) &gIOTerminateWork, /* one-thread */ false ); } @@ -1823,12 +1870,19 @@ void IOService::scheduleFinalize( void ) { TLOG("%s::scheduleFinalize\n", getName()); + uint64_t regID1 = getRegistryEntryID(); + IOServiceTrace( + IOSERVICE_TERMINATE_SCHEDULE_FINALIZE, + (uintptr_t) regID1, + (uintptr_t) (regID1 >> 32), + 0, 0); + IOLockLock( gJobsLock ); gIOFinalizeList->tailQ( this ); if( 0 == gIOTerminateWork++) { if( !gIOTerminateThread) - gIOTerminateThread = IOCreateThread( &terminateThread, (void *) 0 ); + kernel_thread_start(&terminateThread, (void *) 0, &gIOTerminateThread); else IOLockWakeup(gJobsLock, (event_t) &gIOTerminateWork, /* one-thread */ false ); } @@ -1872,8 +1926,18 @@ void IOService::actionWillTerminate( IOService * victim, IOOptionBits options, iter = victim->getClientIterator(); if( iter) { while( (client = (IOService *) iter->getNextObject())) { - TLOG("%s::willTerminate(%s, %08lx)\n", - client->getName(), victim->getName(), options); + TLOG("%s::willTerminate(%s, %08llx)\n", + client->getName(), victim->getName(), (long long)options); + + uint64_t regID1 = client->getRegistryEntryID(); + uint64_t regID2 = victim->getRegistryEntryID(); + IOServiceTrace( + IOSERVICE_TERMINATE_WILL, + (uintptr_t) regID1, + (uintptr_t) (regID1 >> 32), + (uintptr_t) regID2, + (uintptr_t) (regID2 >> 32)); + ok = client->willTerminate( victim, options ); doPhase2List->tailQ( client ); } @@ -1892,9 +1956,20 @@ void IOService::actionDidTerminate( IOService * victim, IOOptionBits options ) iter = victim->getClientIterator(); if( iter) { while( (client = (IOService *) iter->getNextObject())) { - TLOG("%s::didTerminate(%s, %08lx)\n", - client->getName(), victim->getName(), options); + TLOG("%s::didTerminate(%s, %08llx)\n", + client->getName(), victim->getName(), (long long)options); client->didTerminate( victim, options, &defer ); + + uint64_t regID1 = client->getRegistryEntryID(); + uint64_t regID2 = victim->getRegistryEntryID(); + IOServiceTrace( + (defer ? IOSERVICE_TERMINATE_DID_DEFER + : IOSERVICE_TERMINATE_DID), + (uintptr_t) regID1, + (uintptr_t) (regID1 >> 32), + (uintptr_t) regID2, + (uintptr_t) (regID2 >> 32)); + TLOG("%s::didTerminate(%s, defer %d)\n", client->getName(), victim->getName(), defer); } @@ -1904,13 +1979,31 @@ void IOService::actionDidTerminate( IOService * victim, IOOptionBits options ) void IOService::actionFinalize( IOService * victim, IOOptionBits options ) { - TLOG("%s::finalize(%08lx)\n", victim->getName(), options); + TLOG("%s::finalize(%08llx)\n", victim->getName(), (long long)options); + + uint64_t regID1 = victim->getRegistryEntryID(); + IOServiceTrace( + IOSERVICE_TERMINATE_FINALIZE, + (uintptr_t) regID1, + (uintptr_t) (regID1 >> 32), + 0, 0); + victim->finalize( options ); } void IOService::actionStop( IOService * provider, IOService * client ) { TLOG("%s::stop(%s)\n", client->getName(), provider->getName()); + + uint64_t regID1 = provider->getRegistryEntryID(); + uint64_t regID2 = client->getRegistryEntryID(); + IOServiceTrace( + IOSERVICE_TERMINATE_STOP, + (uintptr_t) regID1, + (uintptr_t) (regID1 >> 32), + (uintptr_t) regID2, + (uintptr_t) (regID2 >> 32)); + client->stop( provider ); if( provider->isOpen( client )) provider->close( client ); @@ -2020,10 +2113,30 @@ void IOService::terminateWorker( IOOptionBits options ) if( !provider->isChild( client, gIOServicePlane )) { // may be multiply queued - nop it TLOG("%s::nop stop(%s)\n", client->getName(), provider->getName()); + + uint64_t regID1 = provider->getRegistryEntryID(); + uint64_t regID2 = client->getRegistryEntryID(); + IOServiceTrace( + IOSERVICE_TERMINATE_STOP_NOP, + (uintptr_t) regID1, + (uintptr_t) (regID1 >> 32), + (uintptr_t) regID2, + (uintptr_t) (regID2 >> 32)); + } else { // not ready for stop if it has clients, skip it if( (client->__state[1] & kIOServiceTermPhase3State) && client->getClient()) { TLOG("%s::defer stop(%s)\n", client->getName(), provider->getName()); + + uint64_t regID1 = provider->getRegistryEntryID(); + uint64_t regID2 = client->getRegistryEntryID(); + IOServiceTrace( + IOSERVICE_TERMINATE_STOP_DEFER, + (uintptr_t) regID1, + (uintptr_t) (regID1 >> 32), + (uintptr_t) regID2, + (uintptr_t) (regID2 >> 32)); + idx++; continue; } @@ -2052,6 +2165,9 @@ void IOService::terminateWorker( IOOptionBits options ) if( !moreToDo) { TLOG("iokit terminate done, %d stops remain\n", gIOStopList->getCount()); + IOServiceTrace( + IOSERVICE_TERMINATE_DONE, + (uintptr_t) gIOStopList->getCount(), 0, 0, 0); } } while( moreToDo ); @@ -2329,7 +2445,7 @@ SInt32 IOServiceOrdering( const OSMetaClassBase * inObj1, const OSMetaClassBase return ( val1 - val2 ); } -IOService * IOService::getClientWithCategory( const OSSymbol * category ) +IOService * IOService::copyClientWithCategory( const OSSymbol * category ) { IOService * service = 0; OSIterator * iter; @@ -2343,13 +2459,25 @@ IOService * IOService::getClientWithCategory( const OSSymbol * category ) nextCat = (const OSSymbol *) OSDynamicCast( OSSymbol, service->getProperty( gIOMatchCategoryKey )); if( category == nextCat) + { + service->retain(); break; + } } iter->release(); } return( service ); } +IOService * IOService::getClientWithCategory( const OSSymbol * category ) +{ + IOService * + service = copyClientWithCategory(category); + if (service) + service->release(); + return (service); +} + bool IOService::invokeNotifer( _IOServiceNotifier * notify ) { _IOServiceNotifierInvocation invocation; @@ -2369,7 +2497,7 @@ bool IOService::invokeNotifer( _IOServiceNotifier * notify ) if( willNotify) { - ret = (*notify->handler)( notify->target, notify->ref, this ); + ret = (*notify->handler)(notify->target, notify->ref, this, notify); LOCKWRITENOTIFY(); queue_remove( ¬ify->handlerInvocations, &invocation, @@ -2414,6 +2542,8 @@ void IOService::probeCandidates( OSOrderedSet * matches ) #if IOMATCHDEBUG SInt64 debugFlags; #endif + IOService * client = NULL; + assert( matches ); while( !needReloc && (nextMatch = matches->getFirstObject())) { @@ -2447,8 +2577,8 @@ void IOService::probeCandidates( OSOrderedSet * matches ) match->getObject( gIOMatchCategoryKey )); if( 0 == category) category = gIODefaultMatchCategoryKey; - - if( getClientWithCategory( category )) { + + if( (client = copyClientWithCategory(category)) ) { #if IOMATCHDEBUG if( debugFlags & kIOLogMatch) LOG("%s: match category %s exists\n", getName(), @@ -2456,6 +2586,10 @@ void IOService::probeCandidates( OSOrderedSet * matches ) #endif nextMatch->release(); nextMatch = 0; + + client->release(); + client = NULL; + continue; } @@ -2674,16 +2808,35 @@ void IOService::probeCandidates( OSOrderedSet * matches ) } - // adjust the busy count by -1 if matching is stalled for a module, - // or +1 if a previously stalled matching is complete. + // adjust the busy count by +1 if matching is stalled for a module, + // or -1 if a previously stalled matching is complete. lockForArbitration(); SInt32 adjBusy = 0; + uint64_t regID = getRegistryEntryID(); + if( needReloc) { adjBusy = (__state[1] & kIOServiceModuleStallState) ? 0 : 1; - if( adjBusy) + if( adjBusy) { + + IOServiceTrace( + IOSERVICE_MODULESTALL, + (uintptr_t) regID, + (uintptr_t) (regID >> 32), + (uintptr_t) this, + 0); + __state[1] |= kIOServiceModuleStallState; + } } else if( __state[1] & kIOServiceModuleStallState) { + + IOServiceTrace( + IOSERVICE_MODULEUNSTALL, + (uintptr_t) regID, + (uintptr_t) (regID >> 32), + (uintptr_t) this, + 0); + __state[1] &= ~kIOServiceModuleStallState; adjBusy = -1; } @@ -2734,7 +2887,7 @@ bool IOService::startCandidate( IOService * service ) SUB_ABSOLUTETIME(&endTime, &startTime); absolutetime_to_nanoseconds(endTime, &nano); if (nano > 500000000ULL) - IOLog("%s::start took %ld ms\n", service->getName(), (UInt32)(nano / 1000000ULL)); + IOLog("%s::start took %ld ms\n", service->getName(), (long)(UInt32)(nano / 1000000ULL)); } } if( !ok) @@ -2743,11 +2896,6 @@ bool IOService::startCandidate( IOService * service ) return( ok ); } -IOService * IOService::resources( void ) -{ - return( gIOResources ); -} - void IOService::publishResource( const char * key, OSObject * value ) { const OSSymbol * sym; @@ -2824,12 +2972,12 @@ bool IOService::checkResource( OSObject * matching ) } if( gIOKitDebug & kIOLogConfig) - LOG("config(%x): stalling %s\n", (int) IOThreadSelf(), getName()); + LOG("config(%p): stalling %s\n", IOThreadSelf(), getName()); waitForService( table ); if( gIOKitDebug & kIOLogConfig) - LOG("config(%x): waking\n", (int) IOThreadSelf() ); + LOG("config(%p): waking\n", IOThreadSelf() ); return( true ); } @@ -2870,7 +3018,8 @@ void _IOConfigThread::configThread( void ) continue; if( !inst->init()) continue; - if( !(IOCreateThread((IOThreadFunc) &_IOConfigThread::main, inst ))) + thread_t unused; + if (KERN_SUCCESS != kernel_thread_start(&_IOConfigThread::main, inst, &unused)) continue; return; @@ -2885,6 +3034,7 @@ void _IOConfigThread::configThread( void ) void _IOConfigThread::free( void ) { + thread_deallocate(current_thread()); OSObject::free(); } @@ -2982,11 +3132,13 @@ UInt32 IOService::_adjustBusy( SInt32 delta ) if( next != this) next->lockForArbitration(); count = next->__state[1] & kIOServiceBusyStateMask; - assert( count < kIOServiceBusyMax); wasQuiet = (0 == count); - assert( (!wasQuiet) || (delta > 0)); - next->__state[1] += delta; - nowQuiet = (0 == (next->__state[1] & kIOServiceBusyStateMask)); + if (((delta < 0) && wasQuiet) || ((delta > 0) && (kIOServiceBusyMax == count))) + OSReportWithBacktrace("%s: bad busy count (%d,%d)\n", next->getName(), count, delta); + else + count += delta; + next->__state[1] = (next->__state[1] & ~kIOServiceBusyStateMask) | count; + nowQuiet = (0 == count); needWake = (0 != (kIOServiceBusyWaiterState & next->__state[1])); if( needWake) { @@ -2999,11 +3151,30 @@ UInt32 IOService::_adjustBusy( SInt32 delta ) next->unlockForArbitration(); if( (wasQuiet || nowQuiet) ) { + uint64_t regID = next->getRegistryEntryID(); + + IOServiceTrace( + ((wasQuiet/*nowBusy*/) ? IOSERVICE_BUSY : IOSERVICE_NONBUSY), + (uintptr_t) regID, + (uintptr_t) (regID >> 32), + (uintptr_t) next, + 0); + + if (wasQuiet) + { + next->__timeBusy = mach_absolute_time(); + } + else + { + next->__accumBusy += mach_absolute_time() - next->__timeBusy; + next->__timeBusy = 0; + } + MessageClientsContext context; context.service = next; context.type = kIOMessageServiceBusyStateChange; - context.argument = (void *) wasQuiet; // busy now + context.argument = (void *) wasQuiet; /*nowBusy*/ context.argSize = 0; applyToInterestNotifiers( next, gIOBusyInterest, @@ -3011,7 +3182,7 @@ UInt32 IOService::_adjustBusy( SInt32 delta ) #if !NO_KEXTD if( nowQuiet && (next == gIOServiceRoot)) - OSMetaClass::considerUnloads(); + OSKext::considerUnloads(); #endif } @@ -3029,13 +3200,40 @@ void IOService::adjustBusy( SInt32 delta ) unlockForArbitration(); } +uint64_t IOService::getAccumulatedBusyTime( void ) +{ + uint64_t accumBusy = __accumBusy; + uint64_t timeBusy = __timeBusy; + uint64_t nano; + + do + { + accumBusy = __accumBusy; + timeBusy = __timeBusy; + if (timeBusy) + accumBusy += mach_absolute_time() - timeBusy; + } + while (timeBusy != __timeBusy); + + absolutetime_to_nanoseconds(*(AbsoluteTime *)&accumBusy, &nano); + + return (nano); +} + UInt32 IOService::getBusyState( void ) { return( __state[1] & kIOServiceBusyStateMask ); } IOReturn IOService::waitForState( UInt32 mask, UInt32 value, - mach_timespec_t * timeout ) + mach_timespec_t * timeout ) +{ + panic("waitForState"); + return (kIOReturnUnsupported); +} + +IOReturn IOService::waitForState( UInt32 mask, UInt32 value, + uint64_t timeout ) { bool wait; int waitResult = THREAD_AWAKENED; @@ -3049,23 +3247,17 @@ IOReturn IOService::waitForState( UInt32 mask, UInt32 value, if( wait) { __state[1] |= kIOServiceBusyWaiterState; unlockForArbitration(); - if( timeout ) { + if( timeout != UINT64_MAX ) { if( computeDeadline ) { AbsoluteTime nsinterval; - clock_interval_to_absolutetime_interval( - timeout->tv_sec, kSecondScale, &abstime ); - clock_interval_to_absolutetime_interval( - timeout->tv_nsec, kNanosecondScale, &nsinterval ); - ADD_ABSOLUTETIME( &abstime, &nsinterval ); - clock_absolutetime_interval_to_deadline( - abstime, &abstime ); + nanoseconds_to_absolutetime(timeout, &nsinterval ); + clock_absolutetime_interval_to_deadline(nsinterval, &abstime); computeDeadline = false; } - - assert_wait_deadline((event_t)this, THREAD_UNINT, __OSAbsoluteTime(abstime)); + assert_wait_deadline((event_t)this, THREAD_UNINT, __OSAbsoluteTime(abstime)); } - else - assert_wait((event_t)this, THREAD_UNINT ); + else + assert_wait((event_t)this, THREAD_UNINT ); } else unlockForArbitration(); IOLockUnlock( gIOServiceBusyLock ); @@ -3080,11 +3272,27 @@ IOReturn IOService::waitForState( UInt32 mask, UInt32 value, return( kIOReturnSuccess ); } -IOReturn IOService::waitQuiet( mach_timespec_t * timeout ) +IOReturn IOService::waitQuiet( uint64_t timeout ) { return( waitForState( kIOServiceBusyStateMask, 0, timeout )); } +IOReturn IOService::waitQuiet( mach_timespec_t * timeout ) +{ + uint64_t timeoutNS; + + if (timeout) + { + timeoutNS = timeout->tv_sec; + timeoutNS *= kSecondScale; + timeoutNS += timeout->tv_nsec; + } + else + timeoutNS = UINT64_MAX; + + return( waitForState( kIOServiceBusyStateMask, 0, timeoutNS )); +} + bool IOService::serializeProperties( OSSerialize * s ) const { #if 0 @@ -3095,8 +3303,9 @@ bool IOService::serializeProperties( OSSerialize * s ) const } -void _IOConfigThread::main( _IOConfigThread * self ) +void _IOConfigThread::main(void * arg, wait_result_t result) { + _IOConfigThread * self = (_IOConfigThread *) arg; _IOServiceJob * job; IOService * nub; bool alive = true; @@ -3132,8 +3341,8 @@ void _IOConfigThread::main( _IOConfigThread * self ) nub = job->nub; if( gIOKitDebug & kIOLogConfig) - LOG("config(%x): starting on %s, %d\n", - (int) IOThreadSelf(), job->nub->getName(), job->type); + LOG("config(%p): starting on %s, %d\n", + IOThreadSelf(), job->nub->getName(), job->type); switch( job->type) { @@ -3142,8 +3351,8 @@ void _IOConfigThread::main( _IOConfigThread * self ) break; default: - LOG("config(%x): strange type (%d)\n", - (int) IOThreadSelf(), job->type ); + LOG("config(%p): strange type (%d)\n", + IOThreadSelf(), job->type ); break; } @@ -3167,7 +3376,7 @@ void _IOConfigThread::main( _IOConfigThread * self ) } while( alive ); if( gIOKitDebug & kIOLogConfig) - LOG("config(%x): terminating\n", (int) IOThreadSelf() ); + LOG("config(%p): terminating\n", IOThreadSelf() ); self->release(); } @@ -3243,7 +3452,7 @@ void _IOServiceJob::pingConfig( _IOServiceJob * job ) } // internal - call with gNotificationLock -OSObject * IOService::getExistingServices( OSDictionary * matching, +OSObject * IOService::copyExistingServices( OSDictionary * matching, IOOptionBits inState, IOOptionBits options ) { OSObject * current = 0; @@ -3265,7 +3474,10 @@ OSObject * IOService::getExistingServices( OSDictionary * matching, && service->passiveMatch( matching )) { if( options & kIONotifyOnce) + { + service->retain(); current = service; + } else current = OSSet::withObjects( (const OSObject **) &service, 1, 1 ); @@ -3284,6 +3496,7 @@ OSObject * IOService::getExistingServices( OSDictionary * matching, && service->passiveMatch( matching )) { if( options & kIONotifyOnce) { + service->retain(); current = service; break; } @@ -3316,7 +3529,7 @@ OSIterator * IOService::getMatchingServices( OSDictionary * matching ) // is a lock even needed? LOCKWRITENOTIFY(); - iter = (OSIterator *) getExistingServices( matching, + iter = (OSIterator *) copyExistingServices( matching, kIOServiceMatchedState ); UNLOCKNOTIFY(); @@ -3324,11 +3537,23 @@ OSIterator * IOService::getMatchingServices( OSDictionary * matching ) return( iter ); } +struct _IOServiceMatchingNotificationHandlerRef +{ + IOServiceNotificationHandler handler; + void * ref; +}; + +static bool _IOServiceMatchingNotificationHandler( void * target, void * refCon, + IOService * newService, + IONotifier * notifier ) +{ + return ((*((_IOServiceNotifier *) notifier)->compatHandler)(target, refCon, newService)); +} // internal - call with gNotificationLock IONotifier * IOService::setNotification( const OSSymbol * type, OSDictionary * matching, - IOServiceNotificationHandler handler, void * target, void * ref, + IOServiceMatchingNotificationHandler handler, void * target, void * ref, SInt32 priority ) { _IOServiceNotifier * notify = 0; @@ -3344,10 +3569,17 @@ IONotifier * IOService::setNotification( } if( notify) { - notify->matching = matching; - notify->handler = handler; + notify->handler = handler; notify->target = target; - notify->ref = ref; + notify->matching = matching; + matching->retain(); + if (handler == &_IOServiceMatchingNotificationHandler) + { + notify->compatHandler = ((_IOServiceMatchingNotificationHandlerRef *)ref)->handler; + notify->ref = ((_IOServiceMatchingNotificationHandlerRef *)ref)->ref; + } + else + notify->ref = ref; notify->priority = priority; notify->state = kIOServiceNotifyEnable; queue_init( ¬ify->handlerInvocations ); @@ -3373,7 +3605,7 @@ IONotifier * IOService::setNotification( // internal - call with gNotificationLock IONotifier * IOService::doInstallNotification( const OSSymbol * type, OSDictionary * matching, - IOServiceNotificationHandler handler, + IOServiceMatchingNotificationHandler handler, void * target, void * ref, SInt32 priority, OSIterator ** existing ) { @@ -3402,7 +3634,7 @@ IONotifier * IOService::doInstallNotification( if( inState) // get the current set - exist = (OSIterator *) getExistingServices( matching, inState ); + exist = (OSIterator *) copyExistingServices( matching, inState ); else exist = 0; @@ -3411,14 +3643,35 @@ IONotifier * IOService::doInstallNotification( return( notify ); } +#if !defined(__LP64__) +IONotifier * IOService::installNotification(const OSSymbol * type, OSDictionary * matching, + IOServiceNotificationHandler handler, + void * target, void * refCon, + SInt32 priority, OSIterator ** existing ) +{ + IONotifier * result; + _IOServiceMatchingNotificationHandlerRef ref; + ref.handler = handler; + ref.ref = refCon; + + result = (_IOServiceNotifier *) installNotification( type, matching, + &_IOServiceMatchingNotificationHandler, + target, &ref, priority, existing ); + if (result) + matching->release(); + + return (result); +} +#endif /* !defined(__LP64__) */ + IONotifier * IOService::installNotification( const OSSymbol * type, OSDictionary * matching, - IOServiceNotificationHandler handler, + IOServiceMatchingNotificationHandler handler, void * target, void * ref, SInt32 priority, OSIterator ** existing ) { - IONotifier * notify; + IONotifier * notify; LOCKWRITENOTIFY(); @@ -3433,6 +3686,27 @@ IONotifier * IOService::installNotification( IONotifier * IOService::addNotification( const OSSymbol * type, OSDictionary * matching, IOServiceNotificationHandler handler, + void * target, void * refCon, + SInt32 priority ) +{ + IONotifier * result; + _IOServiceMatchingNotificationHandlerRef ref; + + ref.handler = handler; + ref.ref = refCon; + + result = addMatchingNotification(type, matching, &_IOServiceMatchingNotificationHandler, + target, &ref, priority); + + if (result) + matching->release(); + + return (result); +} + +IONotifier * IOService::addMatchingNotification( + const OSSymbol * type, OSDictionary * matching, + IOServiceMatchingNotificationHandler handler, void * target, void * ref, SInt32 priority ) { @@ -3461,76 +3735,93 @@ IONotifier * IOService::addNotification( return( notify ); } -struct SyncNotifyVars { - semaphore_port_t waitHere; - IOService * result; -}; - bool IOService::syncNotificationHandler( void * /* target */, void * ref, - IOService * newService ) + IOService * newService, + IONotifier * notifier ) { - // result may get written more than once before the - // notification is removed! - ((SyncNotifyVars *) ref)->result = newService; - semaphore_signal( ((SyncNotifyVars *) ref)->waitHere ); + LOCKWRITENOTIFY(); + if (!*((IOService **) ref)) + { + newService->retain(); + (*(IOService **) ref) = newService; + WAKEUPNOTIFY(ref); + } + UNLOCKNOTIFY(); return( false ); } -IOService * IOService::waitForService( OSDictionary * matching, - mach_timespec_t * timeout ) +IOService * IOService::waitForMatchingService( OSDictionary * matching, + uint64_t timeout) { IONotifier * notify = 0; // priority doesn't help us much since we need a thread wakeup SInt32 priority = 0; - SyncNotifyVars state; - kern_return_t err = kIOReturnBadArgument; + IOService * result; - if( !matching) + if (!matching) return( 0 ); - state.waitHere = 0; - state.result = 0; + result = NULL; LOCKWRITENOTIFY(); - - do { - - state.result = (IOService *) getExistingServices( matching, + do + { + result = (IOService *) copyExistingServices( matching, kIOServiceMatchedState, kIONotifyOnce ); - if( state.result) - continue; - - err = semaphore_create( kernel_task, &state.waitHere, - SYNC_POLICY_FIFO, 0 ); - if( KERN_SUCCESS != err) - continue; - + if (result) + break; notify = IOService::setNotification( gIOMatchedNotification, matching, &IOService::syncNotificationHandler, (void *) 0, - (void *) &state, priority ); - - } while( false ); - - UNLOCKNOTIFY(); - - if( notify) { - if( timeout) - err = semaphore_timedwait( state.waitHere, *timeout ); + &result, priority ); + if (!notify) + break; + if (UINT64_MAX != timeout) + { + AbsoluteTime deadline; + nanoseconds_to_absolutetime(timeout, &deadline); + clock_absolutetime_interval_to_deadline(deadline, &deadline); + SLEEPNOTIFYTO(&result, deadline); + } else - err = semaphore_wait( state.waitHere ); + { + SLEEPNOTIFY(&result); + } } + while( false ); + + UNLOCKNOTIFY(); - if( notify) + if (notify) notify->remove(); // dequeues + + return( result ); +} + +IOService * IOService::waitForService( OSDictionary * matching, + mach_timespec_t * timeout ) +{ + IOService * result; + uint64_t timeoutNS; + + if (timeout) + { + timeoutNS = timeout->tv_sec; + timeoutNS *= kSecondScale; + timeoutNS += timeout->tv_nsec; + } else - matching->release(); - if( state.waitHere) - semaphore_destroy( kernel_task, state.waitHere ); + timeoutNS = UINT64_MAX; + + result = waitForMatchingService(matching, timeoutNS); - return( state.result ); + matching->release(); + if (result) + result->release(); + + return (result); } void IOService::deliverNotification( const OSSymbol * type, @@ -3684,6 +3975,27 @@ OSDictionary * IOService::propertyMatching( const OSSymbol * key, const OSObject return( table ); } +OSDictionary * IOService::registryEntryIDMatching( uint64_t entryID, + OSDictionary * table ) +{ + OSNumber * num; + + num = OSNumber::withNumber( entryID, 64 ); + if( !num) + return( 0 ); + + if( !table) + table = OSDictionary::withCapacity( 2 ); + if( table) + table->setObject( gIORegistryEntryIDKey, num ); + + if (num) + num->release(); + + return( table ); +} + + /* * _IOServiceNotifier */ @@ -3786,8 +4098,8 @@ IOService * IOResources::resources( void ) IOWorkLoop * IOResources::getWorkLoop() const { - // If we are the resource root then bringe over to the - // platform to get its workloop + // If we are the resource root + // then use the platform's workloop if (this == (IOResources *) gIOResources) return getPlatform()->getWorkLoop(); else @@ -4056,6 +4368,12 @@ bool IOService::passiveMatch( OSDictionary * table, bool changesOK ) break; } + num = OSDynamicCast( OSNumber, table->getObject( gIORegistryEntryIDKey )); + if( num) { + done++; + match = (getRegistryEntryID() == num->unsigned64BitValue()); + } + num = OSDynamicCast( OSNumber, table->getObject( gIOMatchedServiceCountKey )); if( num) { @@ -4344,7 +4662,6 @@ int IOService::errnoFromReturn( IOReturn rtn ) case kIOReturnExclusiveAccess: case kIOReturnLockedRead: case kIOReturnLockedWrite: - case kIOReturnNotAttached: case kIOReturnNotOpen: case kIOReturnNotReadable: return(EACCES); @@ -4357,6 +4674,7 @@ int IOService::errnoFromReturn( IOReturn rtn ) return(EBUSY); case kIOReturnBadMedia: case kIOReturnNoMedia: + case kIOReturnNotAttached: case kIOReturnUnformattedMedia: return(ENXIO); // (media error) case kIOReturnDMAError: @@ -4457,22 +4775,22 @@ void IOService::setDeviceMemory( OSArray * array ) void IOService:: setCPUSnoopDelay(UInt32 __unused ns) { -#if defined(__i386__) +#if defined(__i386__) || defined(__x86_64__) ml_set_maxsnoop(ns); -#endif /* defined(__i386__) */ +#endif /* defined(__i386__) || defined(__x86_64__) */ } UInt32 IOService:: getCPUSnoopDelay() { -#if defined(__i386__) +#if defined(__i386__) || defined(__x86_64__) return ml_get_maxsnoop(); #else return 0; -#endif /* defined(__i386__) */ +#endif /* defined(__i386__) || defined(__x86_64__) */ } -#if defined(__i386__) +#if defined(__i386__) || defined(__x86_64__) static void requireMaxCpuDelay(IOService * service, UInt32 ns, UInt32 delayType) { @@ -4625,16 +4943,24 @@ setLatencyHandler(UInt32 delayType, IOService * target, bool enable) return (result); } -#endif /* defined(__i386__) */ +#endif /* defined(__i386__) || defined(__x86_64__) */ void IOService:: requireMaxBusStall(UInt32 __unused ns) { -#if defined(__i386__) +#if defined(__i386__) || defined(__x86_64__) requireMaxCpuDelay(this, ns, kCpuDelayBusStall); #endif } +void IOService:: +requireMaxInterruptDelay(uint32_t __unused ns) +{ +#if defined(__i386__) || defined(__x86_64__) + requireMaxCpuDelay(this, ns, kCpuDelayInterrupt); +#endif +} + /* * Device interrupts */ @@ -4791,13 +5117,21 @@ IOReturn IOService::causeInterrupt(int source) return interruptController->causeInterrupt(this, source); } +#if __LP64__ +OSMetaClassDefineReservedUnused(IOService, 0); +OSMetaClassDefineReservedUnused(IOService, 1); +OSMetaClassDefineReservedUnused(IOService, 2); +OSMetaClassDefineReservedUnused(IOService, 3); +OSMetaClassDefineReservedUnused(IOService, 4); +OSMetaClassDefineReservedUnused(IOService, 5); +#else OSMetaClassDefineReservedUsed(IOService, 0); OSMetaClassDefineReservedUsed(IOService, 1); OSMetaClassDefineReservedUsed(IOService, 2); OSMetaClassDefineReservedUsed(IOService, 3); OSMetaClassDefineReservedUsed(IOService, 4); - -OSMetaClassDefineReservedUnused(IOService, 5); +OSMetaClassDefineReservedUsed(IOService, 5); +#endif OSMetaClassDefineReservedUnused(IOService, 6); OSMetaClassDefineReservedUnused(IOService, 7); OSMetaClassDefineReservedUnused(IOService, 8); diff --git a/iokit/Kernel/IOServicePM.cpp b/iokit/Kernel/IOServicePM.cpp index 5b5cc0b8a..2f4610608 100644 --- a/iokit/Kernel/IOServicePM.cpp +++ b/iokit/Kernel/IOServicePM.cpp @@ -26,6 +26,7 @@ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ */ +//#define IOASSERT 1 #include #include #include @@ -50,11 +51,8 @@ #include "IOKitKernelInternal.h" static void settle_timer_expired(thread_call_param_t, thread_call_param_t); -static void PM_idle_timer_expired(OSObject *, IOTimerEventSource *); -void tellAppWithResponse(OSObject * object, void * context) { /*empty*/ } -void tellClientWithResponse(OSObject * object, void * context) { /*empty*/ } -void tellClient(OSObject * object, void * context); -IOReturn serializedAllowPowerChange(OSObject *, void *, void *, void *, void *); +static void tellKernelClientApplier(OSObject * object, void * arg); +static void tellAppClientApplier(OSObject * object, void * arg); static uint64_t computeTimeDeltaNS( const AbsoluteTime * start ) { @@ -67,23 +65,43 @@ static uint64_t computeTimeDeltaNS( const AbsoluteTime * start ) return nsec; } +#if PM_VARS_SUPPORT OSDefineMetaClassAndStructors(IOPMprot, OSObject) +#endif // log setPowerStates longer than (ns): #define LOG_SETPOWER_TIMES (50ULL * 1000ULL * 1000ULL) // log app responses longer than (ns): #define LOG_APP_RESPONSE_TIMES (100ULL * 1000ULL * 1000ULL) +// use message tracer to log messages longer than (ns): +#define LOG_APP_RESPONSE_MSG_TRACER (3 * 1000ULL * 1000ULL * 1000ULL) //********************************************************************************* // Globals //********************************************************************************* -static bool gIOPMInitialized = false; -static IOItemCount gIOPMBusyCount = 0; -static IOWorkLoop * gIOPMWorkLoop = 0; -static IOPMRequestQueue * gIOPMRequestQueue = 0; -static IOPMRequestQueue * gIOPMReplyQueue = 0; -static IOPMRequestQueue * gIOPMFreeQueue = 0; +static bool gIOPMInitialized = false; +static uint32_t gIOPMBusyCount = 0; +static IOWorkLoop * gIOPMWorkLoop = 0; +static IOPMRequestQueue * gIOPMRequestQueue = 0; +static IOPMRequestQueue * gIOPMReplyQueue = 0; +static IOPMCompletionQueue * gIOPMFreeQueue = 0; +static IOPMRequest * gIOPMRequest = 0; +static IOPlatformExpert * gPlatform = 0; +static IOService * gIOPMRootNode = 0; + +static const OSSymbol * gIOPMPowerClientDevice = 0; +static const OSSymbol * gIOPMPowerClientDriver = 0; +static const OSSymbol * gIOPMPowerClientChildProxy = 0; +static const OSSymbol * gIOPMPowerClientChildren = 0; + +static uint32_t getPMRequestType( void ) +{ + uint32_t type = kIOPMRequestTypeInvalid; + if (gIOPMRequest) + type = gIOPMRequest->getType(); + return type; +} //********************************************************************************* // Macros @@ -102,25 +120,34 @@ do { \ assert(gIOPMWorkLoop->inGate()); \ } while(false) -#define PM_LOCK() IOLockLock(fPMLock) -#define PM_UNLOCK() IOLockUnlock(fPMLock) +#define PM_LOCK() IOLockLock(fPMLock) +#define PM_UNLOCK() IOLockUnlock(fPMLock) +#define PM_LOCK_SLEEP(event) IOLockSleep(fPMLock, event, THREAD_UNINT) +#define PM_LOCK_WAKEUP(event) IOLockWakeup(fPMLock, event, false) -#define ns_per_us 1000 -#define k30seconds (30*1000000) -#define kMinAckTimeoutTicks (10*1000000) -#define kIOPMTardyAckSPSKey "IOPMTardyAckSetPowerState" -#define kIOPMTardyAckPSCKey "IOPMTardyAckPowerStateChange" -#define kPwrMgtKey "IOPowerManagement" +#define ns_per_us 1000 +#define k30seconds (30*1000000) +#define kMinAckTimeoutTicks (10*1000000) +#define kIOPMTardyAckSPSKey "IOPMTardyAckSetPowerState" +#define kIOPMTardyAckPSCKey "IOPMTardyAckPowerStateChange" +#define kPwrMgtKey "IOPowerManagement" #define OUR_PMLog(t, a, b) \ - do { fPlatform->PMLog( fName, t, a, b); } while(0) + do { gPlatform->PMLog( fName, t, a, b); } while(0) -#define NS_TO_MS(nsec) ((int)((nsec) / 1000000ULL)) +#define NS_TO_MS(nsec) ((int)((nsec) / 1000000ULL)) #if CONFIG_EMBEDDED -#define SUPPORT_IDLE_CANCEL 1 +#define SUPPORT_IDLE_CANCEL 1 #endif +#define kNotifyWillChange (true) +#define kNotifyDidChange (false) + +#define kIOPMPowerStateMax 0xFFFFFFFF + +#define IS_PM_ROOT() (this == gIOPMRootNode) + //********************************************************************************* // PM machine states //********************************************************************************* @@ -139,15 +166,17 @@ enum { kIOPM_ParentDownNotifyDidChangeAndAcknowledgeChange = 11, kIOPM_ParentDownSetPowerState = 12, kIOPM_ParentDownWaitForPowerSettle = 13, - kIOPM_ParentDownAcknowledgeChange = 14, + kIOPM_ParentAcknowledgePowerChange = 14, kIOPM_ParentUpSetPowerState = 15, /* 16 not used */ kIOPM_ParentUpWaitForSettleTime = 17, kIOPM_ParentUpNotifyInterestedDriversDidChange = 18, - kIOPM_ParentUpAcknowledgePowerChange = 19, + /* 19 not used */ kIOPM_Finished = 20, kIOPM_DriverThreadCallDone = 21, - kIOPM_NotifyChildrenDone = 22 + kIOPM_NotifyChildrenDone = 22, + kIOPM_SyncNotifyDidChange = 23, + kIOPM_SyncFinish = 24 }; @@ -318,7 +347,7 @@ Note all parent requested changes need to acknowledge the power has changed to t */ //********************************************************************************* -// [public virtual] PMinit +// [public] PMinit // // Initialize power management. //********************************************************************************* @@ -329,6 +358,7 @@ void IOService::PMinit ( void ) { if ( !gIOPMInitialized ) { + gPlatform = getPlatform(); gIOPMWorkLoop = IOWorkLoop::workLoop(); if (gIOPMWorkLoop) { @@ -340,8 +370,8 @@ void IOService::PMinit ( void ) this, OSMemberFunctionCast(IOPMRequestQueue::Action, this, &IOService::servicePMReplyQueue)); - gIOPMFreeQueue = IOPMRequestQueue::create( - this, OSMemberFunctionCast(IOPMRequestQueue::Action, + gIOPMFreeQueue = IOPMCompletionQueue::create( + this, OSMemberFunctionCast(IOPMCompletionQueue::Action, this, &IOService::servicePMFreeQueue)); if (gIOPMWorkLoop->addEventSource(gIOPMRequestQueue) != @@ -364,6 +394,11 @@ void IOService::PMinit ( void ) gIOPMFreeQueue->release(); gIOPMFreeQueue = 0; } + + gIOPMPowerClientDevice = OSSymbol::withCStringNoCopy( "DevicePowerState" ); + gIOPMPowerClientDriver = OSSymbol::withCStringNoCopy( "DriverPowerState" ); + gIOPMPowerClientChildProxy = OSSymbol::withCStringNoCopy( "ChildProxyPowerState" ); + gIOPMPowerClientChildren = OSSymbol::withCStringNoCopy( "ChildrenPowerState" ); } if (gIOPMRequestQueue && gIOPMReplyQueue && gIOPMFreeQueue) @@ -376,23 +411,18 @@ void IOService::PMinit ( void ) pwrMgt->init(); setProperty(kPwrMgtKey, pwrMgt); - fOwner = this; - fWeAreRoot = false; fPMLock = IOLockAlloc(); fInterestedDrivers = new IOPMinformeeList; fInterestedDrivers->initialize(); fDesiredPowerState = 0; - fDriverDesire = 0; fDeviceDesire = 0; fInitialChange = true; - fNeedToBecomeUsable = false; fPreviousRequest = 0; fDeviceOverrides = false; fMachineState = kIOPM_Finished; fIdleTimerEventSource = NULL; fIdleTimerMinPowerState = 0; fActivityLock = IOLockAlloc(); - fClampOn = false; fStrictTreeOrder = false; fActivityTicklePowerState = -1; fControllingDriver = NULL; @@ -402,17 +432,18 @@ void IOService::PMinit ( void ) fParentsCurrentPowerFlags = 0; fMaxCapability = 0; fName = getName(); - fPlatform = getPlatform(); fParentsKnowState = false; fSerialNumber = 0; fResponseArray = NULL; + fNotifyClientArray = NULL; fDoNotPowerDown = true; fCurrentPowerConsumption = kIOPMUnknown; + fOverrideMaxPowerState = kIOPMPowerStateMax; - for (unsigned int i = 0; i <= kMaxType; i++) + if (!gIOPMRootNode && (getParentEntry(gIOPowerPlane) == getRegistryRoot())) { - fAggressivenessValue[i] = 0; - fAggressivenessValid[i] = false; + gIOPMRootNode = this; + fParentsKnowState = true; } fAckTimer = thread_call_allocate( @@ -429,12 +460,12 @@ void IOService::PMinit ( void ) { prot->init(); prot->ourName = fName; - prot->thePlatform = fPlatform; + prot->thePlatform = gPlatform; fPMVars = prot; pm_vars = prot; } #else - pm_vars = (IOPMprot *) true; + pm_vars = (void *) true; #endif initialized = true; @@ -442,7 +473,7 @@ void IOService::PMinit ( void ) } //********************************************************************************* -// [public] PMfree +// [private] PMfree // // Free up the data created in PMinit, if it exists. //********************************************************************************* @@ -460,7 +491,8 @@ void IOService::PMfree ( void ) assert(fNotifyChildArray == NULL); if ( fIdleTimerEventSource != NULL ) { - getPMworkloop()->removeEventSource(fIdleTimerEventSource); + fIdleTimerEventSource->disable(); + gIOPMWorkLoop->removeEventSource(fIdleTimerEventSource); fIdleTimerEventSource->release(); fIdleTimerEventSource = NULL; } @@ -491,7 +523,7 @@ void IOService::PMfree ( void ) fInterestedDrivers = NULL; } if ( fPMWorkQueue ) { - getPMworkloop()->removeEventSource(fPMWorkQueue); + gIOPMWorkLoop->removeEventSource(fPMWorkQueue); fPMWorkQueue->release(); fPMWorkQueue = 0; } @@ -504,11 +536,19 @@ void IOService::PMfree ( void ) fResponseArray->release(); fResponseArray = NULL; } + if ( fNotifyClientArray ) { + fNotifyClientArray->release(); + fNotifyClientArray = NULL; + } if (fPowerStates && fNumberOfPowerStates) { IODelete(fPowerStates, IOPMPowerState, fNumberOfPowerStates); fNumberOfPowerStates = 0; fPowerStates = NULL; } + if (fPowerClients) { + fPowerClients->release(); + fPowerClients = 0; + } #if PM_VARS_SUPPORT if (fPMVars) @@ -524,7 +564,7 @@ void IOService::PMfree ( void ) } //********************************************************************************* -// [public virtual] joinPMtree +// [public] joinPMtree // // A policy-maker calls its nub here when initializing, to be attached into // the power management hierarchy. The default function is to call the @@ -538,15 +578,16 @@ void IOService::PMfree ( void ) void IOService::joinPMtree ( IOService * driver ) { - IOPlatformExpert * platform; + IOPlatformExpert * platform; platform = getPlatform(); assert(platform != 0); platform->PMRegisterDevice(this, driver); } +#ifndef __LP64__ //********************************************************************************* -// [public virtual] youAreRoot +// [deprecated] youAreRoot // // Power Managment is informing us that we are the root power domain. // The only difference between us and any other power domain is that @@ -555,14 +596,12 @@ void IOService::joinPMtree ( IOService * driver ) IOReturn IOService::youAreRoot ( void ) { - fWeAreRoot = true; - fParentsKnowState = true; - attachToParent( getRegistryRoot(), gIOPowerPlane ); return IOPMNoErr; } +#endif /* !__LP64__ */ //********************************************************************************* -// [public virtual] PMstop +// [public] PMstop // // Immediately stop driver callouts. Schedule an async stop request to detach // from power plane. @@ -581,23 +620,37 @@ void IOService::PMstop ( void ) // changes until the PMstop async request is processed. PM_LOCK(); - fWillPMStop = true; - if (fDriverCallBusy) - PM_DEBUG("%s::PMstop() driver call busy\n", getName()); + fLockedFlags.PMStop = true; + if (fLockedFlags.DriverCallBusy) + { + PM_DEBUG("%s: PMstop() driver call busy\n", getName()); + } + while (fThreadAssertionCount != 0) + { + if (current_thread() == fThreadAssertionThread) + { + PM_ERROR("%s: PMstop() called from PM thread call\n", getName()); + break; + } + // Wait for thread assertions to drop to zero. + PM_DEBUG("%s: PMstop() wait for %u thread assertion(s)\n", + getName(), fThreadAssertionCount); + PM_LOCK_SLEEP(&fThreadAssertionCount); + } PM_UNLOCK(); request = acquirePMRequest( this, kIOPMRequestTypePMStop ); if (request) { - PM_TRACE("[%s] %p PMstop\n", getName(), this); + PM_TRACE("%s: %p PMstop\n", getName(), this); submitPMRequest( request ); } } //********************************************************************************* -// handlePMstop +// [private] handlePMstop // -// Disconnect the node from its parents and children in the Power Plane. +// Disconnect the node from all parents and children in the power plane. //********************************************************************************* void IOService::handlePMstop ( IOPMRequest * request ) @@ -609,7 +662,7 @@ void IOService::handlePMstop ( IOPMRequest * request ) IOService * theParent; PM_ASSERT_IN_GATE(); - PM_TRACE("[%s] %p %s start\n", getName(), this, __FUNCTION__); + PM_TRACE("%s: %p %s start\n", getName(), this, __FUNCTION__); // remove the property removeProperty(kPwrMgtKey); @@ -682,15 +735,16 @@ void IOService::handlePMstop ( IOPMRequest * request ) PM_UNLOCK(); } - // Tell PM_idle_timer_expiration() to ignore idle timer. + // Tell idleTimerExpired() to ignore idle timer. fIdleTimerPeriod = 0; + if (fIdleTimerEventSource) + fIdleTimerEventSource->disable(); - fWillPMStop = false; - PM_TRACE("[%s] %p %s done\n", getName(), this, __FUNCTION__); + PM_TRACE("%s: %p %s done\n", getName(), this, __FUNCTION__); } //********************************************************************************* -// [public virtual] addPowerChild +// [public] addPowerChild // // Power Management is informing us who our children are. //********************************************************************************* @@ -708,7 +762,7 @@ IOReturn IOService::addPowerChild ( IOService * child ) if (!initialized || !child->initialized) return IOPMNotYetInitialized; - OUR_PMLog( kPMLogAddChild, 0, 0 ); + OUR_PMLog( kPMLogAddChild, (uintptr_t) child, 0 ); do { // Is this child already one of our children? @@ -732,7 +786,7 @@ IOReturn IOService::addPowerChild ( IOService * child ) } if (!ok) { - PM_DEBUG("[%s] %s (%p) is already a child\n", + PM_DEBUG("%s: %s (%p) is already a child\n", getName(), child->getName(), child); break; } @@ -765,8 +819,8 @@ IOReturn IOService::addPowerChild ( IOService * child ) if (!requests[0] || !requests[1] || !requests[2]) break; - requests[0]->setParentRequest( requests[1] ); - requests[1]->setParentRequest( requests[2] ); + requests[0]->attachNextRequest( requests[1] ); + requests[1]->attachNextRequest( requests[2] ); connection->init(); connection->start(this); @@ -791,15 +845,16 @@ IOReturn IOService::addPowerChild ( IOService * child ) if (requests[1]) releasePMRequest(requests[1]); if (requests[2]) releasePMRequest(requests[2]); - // silent failure, to prevent platform drivers from adding the child + // Silent failure, to prevent platform drivers from adding the child // to the root domain. - return IOPMNoErr; + + return kIOReturnSuccess; } //********************************************************************************* // [private] addPowerChild1 // -// Called on the power parent. +// Step 1/3 of adding a power child. Called on the power parent. //********************************************************************************* void IOService::addPowerChild1 ( IOPMRequest * request ) @@ -809,24 +864,24 @@ void IOService::addPowerChild1 ( IOPMRequest * request ) // Make us temporary usable before adding the child. PM_ASSERT_IN_GATE(); - OUR_PMLog( kPMLogMakeUsable, kPMLogMakeUsable, fDeviceDesire ); + OUR_PMLog( kPMLogMakeUsable, kPMLogMakeUsable, 0 ); if (fControllingDriver && inPlane(gIOPowerPlane) && fParentsKnowState) { tempDesire = fNumberOfPowerStates - 1; } - if (tempDesire && (fWeAreRoot || (fMaxCapability >= tempDesire))) + if (tempDesire && (IS_PM_ROOT() || (fMaxCapability >= tempDesire))) { - computeDesiredState( tempDesire ); - changeState(); + adjustPowerState(tempDesire); } } //********************************************************************************* // [private] addPowerChild2 // -// Called on the joining child. Blocked behind addPowerChild1. +// Step 2/3 of adding a power child. Called on the joining child. +// Execution blocked behind addPowerChild1. //********************************************************************************* void IOService::addPowerChild2 ( IOPMRequest * request ) @@ -843,12 +898,12 @@ void IOService::addPowerChild2 ( IOPMRequest * request ) if (!parent || !inPlane(gIOPowerPlane)) { - PM_DEBUG("[%s] addPowerChild2 not in power plane\n", getName()); + PM_DEBUG("%s: addPowerChild2 not in power plane\n", getName()); return; } - // Parent will be waiting for us to complete this stage, safe to - // directly access parent's vars. + // Parent will be waiting for us to complete this stage. + // It is safe to directly access parent's vars. knowsState = (parent->fPowerStates) && (parent->fParentsKnowState); powerState = parent->fCurrentPowerState; @@ -871,23 +926,27 @@ void IOService::addPowerChild2 ( IOPMRequest * request ) fMaxCapability = fControllingDriver->maxCapabilityForDomainState(fParentsCurrentPowerFlags); // initially change into the state we are already in tempDesire = fControllingDriver->initialPowerStateForDomainState(fParentsCurrentPowerFlags); - computeDesiredState(tempDesire); fPreviousRequest = 0xffffffff; - changeState(); + adjustPowerState(tempDesire); } + +#if ROOT_DOMAIN_RUN_STATES + getPMRootDomain()->tagPowerPlaneService(this, &fRootDomainState); +#endif } //********************************************************************************* // [private] addPowerChild3 // -// Called on the parent. Blocked behind addPowerChild2. +// Step 3/3 of adding a power child. Called on the parent. +// Execution blocked behind addPowerChild2. //********************************************************************************* void IOService::addPowerChild3 ( IOPMRequest * request ) { IOPowerConnection * connection = (IOPowerConnection *) request->fArg0; IOService * child; - unsigned int i; + IOPMrootDomain * rootDomain = getPMRootDomain(); PM_ASSERT_IN_GATE(); child = (IOService *) connection->getChildEntry(gIOPowerPlane); @@ -896,28 +955,24 @@ void IOService::addPowerChild3 ( IOPMRequest * request ) { if (child->getProperty("IOPMStrictTreeOrder")) { - PM_DEBUG("[%s] strict ordering enforced\n", getName()); + PM_DEBUG("%s: strict PM order enforced\n", getName()); fStrictTreeOrder = true; } - for (i = 0; i <= kMaxType; i++) - { - if ( fAggressivenessValid[i] ) - { - child->setAggressiveness(i, fAggressivenessValue[i]); - } - } + if (rootDomain) + rootDomain->joinAggressiveness( child ); } else { - PM_DEBUG("[%s] addPowerChild3 not in power plane\n", getName()); + PM_DEBUG("%s: addPowerChild3 not in power plane\n", getName()); } connection->release(); } +#ifndef __LP64__ //********************************************************************************* -// [public virtual deprecated] setPowerParent +// [deprecated] setPowerParent // // Power Management is informing us who our parent is. // If we have a controlling driver, find out, given our newly-informed @@ -930,9 +985,10 @@ IOReturn IOService::setPowerParent ( { return kIOReturnUnsupported; } +#endif /* !__LP64__ */ //********************************************************************************* -// [public virtual] removePowerChild +// [public] removePowerChild // // Called on a parent whose child is being removed by PMstop(). //********************************************************************************* @@ -945,7 +1001,7 @@ IOReturn IOService::removePowerChild ( IOPowerConnection * theNub ) OUR_PMLog( kPMLogRemoveChild, 0, 0 ); theNub->retain(); - + // detach nub from child theChild = theNub->copyChildEntry(gIOPowerPlane); if ( theChild ) @@ -976,16 +1032,17 @@ IOReturn IOService::removePowerChild ( IOPowerConnection * theNub ) theNub->release(); - // Schedule a request to re-scan child desires and clamp bits. - if (!fWillAdjustPowerState) + // A child has gone away, re-scan children desires and clamp bits. + // The fPendingAdjustPowerRequest helps to reduce redundant parent work. + + if (!fAdjustPowerScheduled) { IOPMRequest * request; - request = acquirePMRequest( this, kIOPMRequestTypeAdjustPowerState ); if (request) { submitPMRequest( request ); - fWillAdjustPowerState = true; + fAdjustPowerScheduled = true; } } @@ -993,7 +1050,7 @@ IOReturn IOService::removePowerChild ( IOPowerConnection * theNub ) } //********************************************************************************* -// [public virtual] registerPowerDriver +// [public] registerPowerDriver // // A driver has called us volunteering to control power to our device. //********************************************************************************* @@ -1016,7 +1073,7 @@ IOReturn IOService::registerPowerDriver ( return kIOReturnBadArgument; } - if (!powerDriver) + if (!powerDriver || !powerDriver->initialized) { OUR_PMLog(kPMLogControllingDriverErr4, 0, 0); return kIOReturnBadArgument; @@ -1067,6 +1124,7 @@ void IOService::handleRegisterPowerDriver ( IOPMRequest * request ) unsigned long numberOfStates = (unsigned long) request->fArg2; unsigned long i; IOService * root; + OSIterator * iter; PM_ASSERT_IN_GATE(); assert(powerStates); @@ -1115,10 +1173,25 @@ void IOService::handleRegisterPowerDriver ( IOPMRequest * request ) } } - if ( fNeedToBecomeUsable ) { - fNeedToBecomeUsable = false; - fDeviceDesire = fNumberOfPowerStates - 1; - } + // Examine all existing power clients and perform limit check. + + if (fPowerClients) + { + iter = OSCollectionIterator::withCollection(fPowerClients); + if (iter) + { + const OSSymbol * client; + while ((client = (const OSSymbol *) iter->getNextObject())) + { + uint32_t powerState = getPowerStateForClient(client); + if (powerState >= numberOfStates) + { + updatePowerClient(client, numberOfStates - 1); + } + } + iter->release(); + } + } if ( inPlane(gIOPowerPlane) && fParentsKnowState ) { @@ -1126,8 +1199,7 @@ void IOService::handleRegisterPowerDriver ( IOPMRequest * request ) fMaxCapability = fControllingDriver->maxCapabilityForDomainState(fParentsCurrentPowerFlags); // initially change into the state we are already in tempDesire = fControllingDriver->initialPowerStateForDomainState(fParentsCurrentPowerFlags); - computeDesiredState(tempDesire); - changeState(); + adjustPowerState(tempDesire); } } else @@ -1140,7 +1212,7 @@ void IOService::handleRegisterPowerDriver ( IOPMRequest * request ) } //********************************************************************************* -// [public virtual] registerInterestedDriver +// [public] registerInterestedDriver // // Add the caller to our list of interested drivers and return our current // power state. If we don't have a power-controlling driver yet, we will @@ -1179,7 +1251,7 @@ IOPMPowerFlags IOService::registerInterestedDriver ( IOService * driver ) } //********************************************************************************* -// [public virtual] deRegisterInterestedDriver +// [public] deRegisterInterestedDriver //********************************************************************************* IOReturn IOService::deRegisterInterestedDriver ( IOService * driver ) @@ -1206,7 +1278,7 @@ IOReturn IOService::deRegisterInterestedDriver ( IOService * driver ) { item->active = false; } - if (fDriverCallBusy) + if (fLockedFlags.DriverCallBusy) PM_DEBUG("%s::deRegisterInterestedDriver() driver call busy\n", getName()); } PM_UNLOCK(); @@ -1275,7 +1347,7 @@ void IOService::handleInterestChanged( IOPMRequest * request ) } //********************************************************************************* -// [public virtual] acknowledgePowerChange +// [public] acknowledgePowerChange // // After we notified one of the interested drivers or a power-domain child // of an impending change in power, it has called to say it is now @@ -1299,10 +1371,7 @@ IOReturn IOService::acknowledgePowerChange ( IOService * whichObject ) request = acquirePMRequest( this, kIOPMRequestTypeAckPowerChange ); if (!request) - { - PM_ERROR("%s::%s no memory\n", getName(), __FUNCTION__); return kIOReturnNoMemory; - } whichObject->retain(); request->fArg0 = whichObject; @@ -1362,7 +1431,7 @@ bool IOService::handleAcknowledgePowerChange ( IOPMRequest * request ) informee->whatObject->getName(), (fDriverCallReason == kDriverCallInformPreChange) ? "Will" : "Did", informee->whatObject, - fName, fCurrentPowerState, fHeadNoteState, NS_TO_MS(nsec)); + fName, fCurrentPowerState, fHeadNotePowerState, NS_TO_MS(nsec)); } #endif // mark it acked @@ -1389,11 +1458,11 @@ bool IOService::handleAcknowledgePowerChange ( IOPMRequest * request ) } if ( childPower == kIOPMUnknown ) { - fPowerStates[fHeadNoteState].staticPower = kIOPMUnknown; + fHeadNotePowerArrayEntry->staticPower = kIOPMUnknown; } else { - if ( fPowerStates[fHeadNoteState].staticPower != kIOPMUnknown ) + if (fHeadNotePowerArrayEntry->staticPower != kIOPMUnknown) { - fPowerStates[fHeadNoteState].staticPower += childPower; + fHeadNotePowerArrayEntry->staticPower += childPower; } } } @@ -1417,7 +1486,7 @@ bool IOService::handleAcknowledgePowerChange ( IOPMRequest * request ) } //********************************************************************************* -// [public virtual] acknowledgeSetPowerState +// [public] acknowledgeSetPowerState // // After we instructed our controlling driver to change power states, // it has called to say it has finished doing so. @@ -1433,10 +1502,7 @@ IOReturn IOService::acknowledgeSetPowerState ( void ) request = acquirePMRequest( this, kIOPMRequestTypeAckSetPowerState ); if (!request) - { - PM_ERROR("%s::%s no memory\n", getName(), __FUNCTION__); return kIOReturnNoMemory; - } submitPMRequest( request ); return kIOReturnSuccess; @@ -1444,31 +1510,78 @@ IOReturn IOService::acknowledgeSetPowerState ( void ) //********************************************************************************* // [private] adjustPowerState -// -// Child has signaled a change - child changed it's desire, new child added, -// existing child removed. Adjust our power state accordingly. //********************************************************************************* -void IOService::adjustPowerState( void ) +void IOService::adjustPowerState ( uint32_t clamp ) { PM_ASSERT_IN_GATE(); - if (inPlane(gIOPowerPlane)) + computeDesiredState(clamp); + if (fControllingDriver && fParentsKnowState && inPlane(gIOPowerPlane)) { - rebuildChildClampBits(); - computeDesiredState(); - if ( fControllingDriver && fParentsKnowState ) - changeState(); + startPowerChange( + /* flags */ kIOPMWeInitiated, + /* power state */ fDesiredPowerState, + /* domain flags */ 0, + /* connection */ 0, + /* parent flags */ 0); } - else +} + +//********************************************************************************* +// [public] synchronizePowerTree +//********************************************************************************* + +IOReturn IOService::synchronizePowerTree ( void ) +{ + IOPMRequest * request_c; + IOPMRequest * request_s; + + if (this != getPMRootDomain()) + return kIOReturnBadArgument; + if (!initialized) + return kIOPMNotYetInitialized; + + request_c = acquirePMRequest( this, kIOPMRequestTypeIdleCancel ); + request_s = acquirePMRequest( this, kIOPMRequestTypeSynchronizePowerTree ); + + if (!request_c || !request_s) + goto error_no_memory; + + request_c->attachNextRequest( request_s ); + + submitPMRequest(request_c); + submitPMRequest(request_s); + + return kIOReturnSuccess; + +error_no_memory: + if (request_c) releasePMRequest(request_c); + if (request_s) releasePMRequest(request_s); + return kIOReturnNoMemory; +} + +//********************************************************************************* +// [private] handleSynchronizePowerTree +//********************************************************************************* + +void IOService::handleSynchronizePowerTree ( IOPMRequest * /*request*/ ) +{ + PM_ASSERT_IN_GATE(); + if (fControllingDriver && fParentsKnowState && inPlane(gIOPowerPlane) && + (fCurrentPowerState == fNumberOfPowerStates - 1)) { - PM_DEBUG("[%s] %s: not in power tree\n", getName(), __FUNCTION__); - return; + startPowerChange( + /* flags */ kIOPMWeInitiated | kIOPMSynchronize, + /* power state */ fCurrentPowerState, + /* domain flags */ 0, + /* connection */ 0, + /* parent flags */ 0); } - fWillAdjustPowerState = false; } +#ifndef __LP64__ //********************************************************************************* -// [public deprecated] powerDomainWillChangeTo +// [deprecated] powerDomainWillChangeTo // // Called by the power-hierarchy parent notifying of a new power state // in the power domain. @@ -1484,6 +1597,7 @@ IOReturn IOService::powerDomainWillChangeTo ( assert(false); return kIOReturnUnsupported; } +#endif /* !__LP64__ */ //********************************************************************************* // [private] handlePowerDomainWillChangeTo @@ -1491,30 +1605,31 @@ IOReturn IOService::powerDomainWillChangeTo ( void IOService::handlePowerDomainWillChangeTo ( IOPMRequest * request ) { - IOPMPowerFlags newPowerFlags = (IOPMPowerFlags) request->fArg0; - IOPowerConnection * whichParent = (IOPowerConnection *) request->fArg1; - bool powerWillDrop = (bool) request->fArg2; + IOPMPowerFlags parentPowerFlags = (IOPMPowerFlags) request->fArg0; + IOPowerConnection * whichParent = (IOPowerConnection *) request->fArg1; + unsigned long parentChangeFlags = (unsigned long) request->fArg2; OSIterator * iter; OSObject * next; IOPowerConnection * connection; unsigned long newPowerState; + unsigned long myChangeFlags; IOPMPowerFlags combinedPowerFlags; bool savedParentsKnowState; IOReturn result = IOPMAckImplied; PM_ASSERT_IN_GATE(); - OUR_PMLog(kPMLogWillChange, newPowerFlags, 0); + OUR_PMLog(kPMLogWillChange, parentPowerFlags, 0); if (!inPlane(gIOPowerPlane) || !whichParent || !whichParent->getAwaitingAck()) { - PM_DEBUG("[%s] %s: not in power tree\n", getName(), __FUNCTION__); + PM_DEBUG("%s::%s not in power tree\n", getName(), __FUNCTION__); goto exit_no_ack; } savedParentsKnowState = fParentsKnowState; - // Combine parents' power flags to determine our maximum state - // within the new power domain + // Combine parents' output power flags. + combinedPowerFlags = 0; iter = getParentIterator(gIOPowerPlane); @@ -1525,7 +1640,7 @@ void IOService::handlePowerDomainWillChangeTo ( IOPMRequest * request ) if ( (connection = OSDynamicCast(IOPowerConnection, next)) ) { if ( connection == whichParent ) - combinedPowerFlags |= newPowerFlags; + combinedPowerFlags |= parentPowerFlags; else combinedPowerFlags |= connection->parentCurrentPowerFlags(); } @@ -1533,30 +1648,37 @@ void IOService::handlePowerDomainWillChangeTo ( IOPMRequest * request ) iter->release(); } - if ( fControllingDriver ) + // If our initial change has yet to occur, then defer the power change + // until after the power domain has completed its power transition. + + if ( fControllingDriver && !fInitialChange ) { newPowerState = fControllingDriver->maxCapabilityForDomainState( combinedPowerFlags); - result = enqueuePowerChange( - /* flags */ IOPMParentInitiated | IOPMDomainWillChange, + // Absorb parent's kIOPMSynchronize flag. + myChangeFlags = kIOPMParentInitiated | kIOPMDomainWillChange | + (parentChangeFlags & kIOPMSynchronize); + + result = startPowerChange( + /* flags */ myChangeFlags, /* power state */ newPowerState, - /* domain state */ combinedPowerFlags, + /* domain flags */ combinedPowerFlags, /* connection */ whichParent, - /* parent state */ newPowerFlags); + /* parent flags */ parentPowerFlags); } // If parent is dropping power, immediately update the parent's // capability flags. Any future merging of parent(s) combined // power flags should account for this power drop. - if (powerWillDrop) + if (parentChangeFlags & kIOPMDomainPowerDrop) { - setParentInfo(newPowerFlags, whichParent, true); + setParentInfo(parentPowerFlags, whichParent, true); } // Parent is expecting an ACK from us. If we did not embark on a state - // transition, when enqueuePowerChang() returns IOPMAckImplied. We are + // transition, i.e. startPowerChange() returned IOPMAckImplied. We are // still required to issue an ACK to our parent. if (IOPMAckImplied == result) @@ -1571,23 +1693,14 @@ void IOService::handlePowerDomainWillChangeTo ( IOPMRequest * request ) } } - // If the parent registers it's power driver late, then this is the - // first opportunity to tell our parent about our desire. - - if (!savedParentsKnowState && fParentsKnowState) - { - PM_TRACE("[%s] powerDomainWillChangeTo: parentsKnowState = true\n", - getName()); - ask_parent( fDesiredPowerState ); - } - exit_no_ack: // Drop the retain from notifyChild(). if (whichParent) whichParent->release(); } +#ifndef __LP64__ //********************************************************************************* -// [public deprecated] powerDomainDidChangeTo +// [deprecated] powerDomainDidChangeTo // // Called by the power-hierarchy parent after the power state of the power domain // has settled at a new level. @@ -1603,6 +1716,7 @@ IOReturn IOService::powerDomainDidChangeTo ( assert(false); return kIOReturnUnsupported; } +#endif /* !__LP64__ */ //********************************************************************************* // [private] handlePowerDomainDidChangeTo @@ -1610,40 +1724,54 @@ IOReturn IOService::powerDomainDidChangeTo ( void IOService::handlePowerDomainDidChangeTo ( IOPMRequest * request ) { - IOPMPowerFlags newPowerFlags = (IOPMPowerFlags) request->fArg0; - IOPowerConnection * whichParent = (IOPowerConnection *) request->fArg1; + IOPMPowerFlags parentPowerFlags = (IOPMPowerFlags) request->fArg0; + IOPowerConnection * whichParent = (IOPowerConnection *) request->fArg1; + unsigned long parentChangeFlags = (unsigned long) request->fArg2; unsigned long newPowerState; + unsigned long myChangeFlags; + unsigned long initialDesire; bool savedParentsKnowState; IOReturn result = IOPMAckImplied; PM_ASSERT_IN_GATE(); - OUR_PMLog(kPMLogDidChange, newPowerFlags, 0); + OUR_PMLog(kPMLogDidChange, parentPowerFlags, 0); if (!inPlane(gIOPowerPlane) || !whichParent || !whichParent->getAwaitingAck()) { - PM_DEBUG("[%s] %s: not in power tree\n", getName(), __FUNCTION__); + PM_DEBUG("%s::%s not in power tree\n", getName(), __FUNCTION__); goto exit_no_ack; } savedParentsKnowState = fParentsKnowState; - setParentInfo(newPowerFlags, whichParent, true); + setParentInfo(parentPowerFlags, whichParent, true); if ( fControllingDriver ) { newPowerState = fControllingDriver->maxCapabilityForDomainState( fParentsCurrentPowerFlags); - result = enqueuePowerChange( - /* flags */ IOPMParentInitiated | IOPMDomainDidChange, + if (fInitialChange) + { + initialDesire = fControllingDriver->initialPowerStateForDomainState( + fParentsCurrentPowerFlags); + computeDesiredState(initialDesire); + } + + // Absorb parent's kIOPMSynchronize flag. + myChangeFlags = kIOPMParentInitiated | kIOPMDomainDidChange | + (parentChangeFlags & kIOPMSynchronize); + + result = startPowerChange( + /* flags */ myChangeFlags, /* power state */ newPowerState, - /* domain state */ fParentsCurrentPowerFlags, + /* domain flags */ fParentsCurrentPowerFlags, /* connection */ whichParent, - /* parent state */ 0); + /* parent flags */ 0); } // Parent is expecting an ACK from us. If we did not embark on a state - // transition, when enqueuePowerChang() returns IOPMAckImplied. We are + // transition, i.e. startPowerChange() returned IOPMAckImplied. We are // still required to issue an ACK to our parent. if (IOPMAckImplied == result) @@ -1658,12 +1786,12 @@ void IOService::handlePowerDomainDidChangeTo ( IOPMRequest * request ) } } - // If the parent registers it's power driver late, then this is the + // If the parent registers its power driver late, then this is the // first opportunity to tell our parent about our desire. if (!savedParentsKnowState && fParentsKnowState) { - PM_TRACE("[%s] powerDomainDidChangeTo: parentsKnowState = true\n", + PM_TRACE("%s::powerDomainDidChangeTo parentsKnowState = true\n", getName()); ask_parent( fDesiredPowerState ); } @@ -1740,6 +1868,9 @@ void IOService::rebuildChildClampBits ( void ) fPowerStates[i].capabilityFlags &= ~(kIOPMChildClamp | kIOPMChildClamp2); } + if (!inPlane(gIOPowerPlane)) + return; + // Loop through the children. When we encounter the calling child, save the // computed state as this child's desire. And set the ChildClamp bits in any // of our states that some child has clamp on. @@ -1773,7 +1904,7 @@ void IOService::rebuildChildClampBits ( void ) } //********************************************************************************* -// [public virtual] requestPowerDomainState +// [public] requestPowerDomainState // // The child of a power domain calls it parent here to request power of a certain // character. @@ -1795,7 +1926,7 @@ IOReturn IOService::requestPowerDomainState ( if (gIOPMWorkLoop->onThread() == false) { - PM_DEBUG("[%s] called requestPowerDomainState\n", getName()); + PM_DEBUG("%s::requestPowerDomainState\n", getName()); return kIOReturnSuccess; } @@ -1887,16 +2018,6 @@ IOReturn IOService::requestPowerDomainState ( computedState = i; - // Clamp removed on the initial power request from a new child. - - if (fClampOn && !whichChild->childHasRequestedPower()) - { - PM_TRACE("[%s] %p power clamp removed (child = %p)\n", - getName(), this, whichChild); - fClampOn = false; - fDeviceDesire = 0; - } - // Record the child's desires on the connection. #if SUPPORT_IDLE_CANCEL bool attemptCancel = ((kIOPMPreventIdleSleep & desiredState) && !whichChild->getPreventIdleSleepFlag()); @@ -1909,16 +2030,17 @@ IOReturn IOService::requestPowerDomainState ( if (whichChild->getReadyFlag() == false) return IOPMNoErr; - // Issue a ping for us to re-evaluate all children desires and - // possibly change power state. + // Schedule a request to re-evaluate all children desires and + // adjust power state. Submit a request if one wasn't pending, + // or if the current request is part of a call tree. - if (!fWillAdjustPowerState && !fDeviceOverrides) - { - childRequest = acquirePMRequest( this, kIOPMRequestTypeAdjustPowerState ); + if (!fDeviceOverrides && (!fAdjustPowerScheduled || gIOPMRequest->getRootRequest())) + { + childRequest = acquirePMRequest( this, kIOPMRequestTypeAdjustPowerState, gIOPMRequest ); if (childRequest) { submitPMRequest( childRequest ); - fWillAdjustPowerState = true; + fAdjustPowerScheduled = true; } } #if SUPPORT_IDLE_CANCEL @@ -1936,7 +2058,7 @@ IOReturn IOService::requestPowerDomainState ( } //********************************************************************************* -// [public virtual] temporaryPowerClampOn +// [public] temporaryPowerClampOn // // A power domain wants to clamp its power on till it has children which // will thendetermine the power domain state. @@ -1946,21 +2068,11 @@ IOReturn IOService::requestPowerDomainState ( IOReturn IOService::temporaryPowerClampOn ( void ) { - IOPMRequest * request; - - if (!initialized) - return IOPMNotYetInitialized; - - request = acquirePMRequest( this, kIOPMRequestTypeTemporaryPowerClamp ); - if (!request) - return kIOReturnNoMemory; - - submitPMRequest( request ); - return IOPMNoErr; + return requestPowerState( gIOPMPowerClientChildProxy, kIOPMPowerStateMax ); } //********************************************************************************* -// [public virtual] makeUsable +// [public] makeUsable // // Some client of our device is asking that we become usable. Although // this has not come from a subclassed device object, treat it exactly @@ -1973,89 +2085,90 @@ IOReturn IOService::temporaryPowerClampOn ( void ) IOReturn IOService::makeUsable ( void ) { - IOPMRequest * request; - - if (!initialized) - return IOPMNotYetInitialized; - OUR_PMLog(kPMLogMakeUsable, 0, 0); + return requestPowerState( gIOPMPowerClientDevice, kIOPMPowerStateMax ); +} - request = acquirePMRequest( this, kIOPMRequestTypeMakeUsable ); - if (!request) - return kIOReturnNoMemory; +//********************************************************************************* +// [public] currentCapability +//********************************************************************************* - submitPMRequest( request ); - return IOPMNoErr; +IOPMPowerFlags IOService::currentCapability ( void ) +{ + if (!initialized) + return IOPMNotPowerManaged; + + return fCurrentCapabilityFlags; } //********************************************************************************* -// [private] handleMakeUsable +// [public] changePowerStateTo // -// Handle a request to become usable. +// Called by our power-controlling driver to change power state. The new desired +// power state is computed and compared against the current power state. If those +// power states differ, then a power state change is initiated. //********************************************************************************* -void IOService::handleMakeUsable ( IOPMRequest * request ) +IOReturn IOService::changePowerStateTo ( unsigned long ordinal ) { - PM_ASSERT_IN_GATE(); - if ( fControllingDriver ) - { - fDeviceDesire = fNumberOfPowerStates - 1; - computeDesiredState(); - if ( inPlane(gIOPowerPlane) && fParentsKnowState ) - { - changeState(); - } - } - else - { - fNeedToBecomeUsable = true; - } + OUR_PMLog(kPMLogChangeStateTo, ordinal, 0); + return requestPowerState( gIOPMPowerClientDriver, ordinal ); } //********************************************************************************* -// [public virtual] currentCapability +// [protected] changePowerStateToPriv +// +// Called by our driver subclass to change power state. The new desired power +// state is computed and compared against the current power state. If those +// power states differ, then a power state change is initiated. //********************************************************************************* -IOPMPowerFlags IOService::currentCapability ( void ) +IOReturn IOService::changePowerStateToPriv ( unsigned long ordinal ) { - if (!initialized) - return IOPMNotPowerManaged; - - return fCurrentCapabilityFlags; + OUR_PMLog(kPMLogChangeStateToPriv, ordinal, 0); + return requestPowerState( gIOPMPowerClientDevice, ordinal ); } //********************************************************************************* -// [public virtual] changePowerStateTo +// [protected] changePowerStateWithOverrideTo // -// For some reason, our power-controlling driver has decided it needs to change -// power state. We enqueue the power change so that appropriate parties -// will be notified, and then we will instruct the driver to make the change. +// Called by our driver subclass to change power state. The new desired power +// state is computed and compared against the current power state. If those +// power states differ, then a power state change is initiated. +// Override enforced - Children and Driver desires are ignored. //********************************************************************************* -IOReturn IOService::changePowerStateTo ( unsigned long ordinal ) +IOReturn IOService::changePowerStateWithOverrideTo ( unsigned long ordinal ) { IOPMRequest * request; if (!initialized) - return IOPMNotYetInitialized; + return kIOPMNotYetInitialized; - OUR_PMLog(kPMLogChangeStateTo, ordinal, 0); + OUR_PMLog(kPMLogChangeStateToPriv, ordinal, 0); - request = acquirePMRequest( this, kIOPMRequestTypeChangePowerStateTo ); + request = acquirePMRequest( this, kIOPMRequestTypeRequestPowerStateOverride ); if (!request) return kIOReturnNoMemory; - request->fArg0 = (void *) ordinal; - request->fArg1 = (void *) false; + gIOPMPowerClientDevice->retain(); + request->fArg0 = (void *) ordinal; + request->fArg1 = (void *) gIOPMPowerClientDevice; + request->fArg2 = 0; +#if NOT_READY + if (action) + request->installCompletionAction( action, target, param ); +#endif - // Avoid needless downwards power transitions by clamping power in - // computeDesiredState() until the delayed request is processed. + // Prevent needless downwards power transitions by clamping power + // until the scheduled request is executed. - if (gIOPMWorkLoop->inGate()) + if (gIOPMWorkLoop->inGate() && (ordinal < fNumberOfPowerStates)) { fTempClampPowerState = max(fTempClampPowerState, ordinal); fTempClampCount++; - request->fArg1 = (void *) true; + fOverrideMaxPowerState = ordinal; + request->fArg2 = (void *) true; } submitPMRequest( request ); @@ -2063,64 +2176,41 @@ IOReturn IOService::changePowerStateTo ( unsigned long ordinal ) } //********************************************************************************* -// [private] handleChangePowerStateTo -//********************************************************************************* - -void IOService::handleChangePowerStateTo ( IOPMRequest * request ) -{ - unsigned long ordinal = (unsigned long) request->fArg0; - - PM_ASSERT_IN_GATE(); - if (request->fArg1) - { - assert(fTempClampCount != 0); - if (fTempClampCount) - fTempClampCount--; - if (!fTempClampCount) - fTempClampPowerState = 0; - } - - if ( fControllingDriver && (ordinal < fNumberOfPowerStates)) - { - fDriverDesire = ordinal; - computeDesiredState(); - if ( inPlane(gIOPowerPlane) && fParentsKnowState ) - { - changeState(); - } - } -} - -//********************************************************************************* -// [public virtual] changePowerStateToPriv -// -// For some reason, a subclassed device object has decided it needs to change -// power state. We enqueue the power change so that appropriate parties -// will be notified, and then we will instruct the driver to make the change. +// [private] requestPowerState //********************************************************************************* -IOReturn IOService::changePowerStateToPriv ( unsigned long ordinal ) +IOReturn IOService::requestPowerState ( + const OSSymbol * client, + uint32_t state ) { IOPMRequest * request; + if (!client) + return kIOReturnBadArgument; if (!initialized) - return IOPMNotYetInitialized; + return kIOPMNotYetInitialized; - request = acquirePMRequest( this, kIOPMRequestTypeChangePowerStateToPriv ); + request = acquirePMRequest( this, kIOPMRequestTypeRequestPowerState ); if (!request) return kIOReturnNoMemory; - request->fArg0 = (void *) ordinal; - request->fArg1 = (void *) false; + client->retain(); + request->fArg0 = (void *) state; + request->fArg1 = (void *) client; + request->fArg2 = 0; +#if NOT_READY + if (action) + request->installCompletionAction( action, target, param ); +#endif - // Avoid needless downwards power transitions by clamping power in - // computeDesiredState() until the delayed request is processed. + // Prevent needless downwards power transitions by clamping power + // until the scheduled request is executed. - if (gIOPMWorkLoop->inGate()) + if (gIOPMWorkLoop->inGate() && (state < fNumberOfPowerStates)) { - fTempClampPowerState = max(fTempClampPowerState, ordinal); + fTempClampPowerState = max(fTempClampPowerState, state); fTempClampCount++; - request->fArg1 = (void *) true; + request->fArg2 = (void *) true; } submitPMRequest( request ); @@ -2128,106 +2218,196 @@ IOReturn IOService::changePowerStateToPriv ( unsigned long ordinal ) } //********************************************************************************* -// [private] handleChangePowerStateToPriv +// [private] handleRequestPowerState //********************************************************************************* -void IOService::handleChangePowerStateToPriv ( IOPMRequest * request ) +void IOService::handleRequestPowerState ( IOPMRequest * request ) { - unsigned long ordinal = (unsigned long) request->fArg0; + const OSSymbol * client = (const OSSymbol *) request->fArg1; + uint32_t state = (uint32_t)(uintptr_t) request->fArg0; PM_ASSERT_IN_GATE(); - OUR_PMLog(kPMLogChangeStateToPriv, ordinal, 0); - if (request->fArg1) + if (request->fArg2) { assert(fTempClampCount != 0); - if (fTempClampCount) - fTempClampCount--; - if (!fTempClampCount) - fTempClampPowerState = 0; + if (fTempClampCount) fTempClampCount--; + if (!fTempClampCount) fTempClampPowerState = 0; } - if ( fControllingDriver && (ordinal < fNumberOfPowerStates)) - { - fDeviceDesire = ordinal; - computeDesiredState(); - if ( inPlane(gIOPowerPlane) && fParentsKnowState ) - { - changeState(); - } - } + if (fNumberOfPowerStates && (state >= fNumberOfPowerStates)) + state = fNumberOfPowerStates - 1; + + // Override from changePowerStateWithOverrideTo() persists until + // the next "device" power request, such as changePowerStateToPriv(). + + if ((getPMRequestType() != kIOPMRequestTypeRequestPowerStateOverride) && + (client == gIOPMPowerClientDevice)) + fOverrideMaxPowerState = kIOPMPowerStateMax; + + if ((state == 0) && + (client != gIOPMPowerClientDevice) && + (client != gIOPMPowerClientDriver) && + (client != gIOPMPowerClientChildProxy)) + removePowerClient(client); + else + updatePowerClient(client, state); + + adjustPowerState(); + client->release(); +} + +//********************************************************************************* +// [private] Helper functions to update/remove power clients. +//********************************************************************************* + +void IOService::updatePowerClient( const OSSymbol * client, uint32_t powerState ) +{ + if (!fPowerClients) + fPowerClients = OSDictionary::withCapacity(4); + if (fPowerClients && client) + { + OSNumber * num = (OSNumber *) fPowerClients->getObject(client); + if (num) + num->setValue(powerState); + else + { + num = OSNumber::withNumber(powerState, 32); + if (num) + { + fPowerClients->setObject(client, num); + num->release(); + } + } + } +} + +void IOService::removePowerClient( const OSSymbol * client ) +{ + if (fPowerClients && client) + fPowerClients->removeObject(client); +} + +uint32_t IOService::getPowerStateForClient( const OSSymbol * client ) +{ + uint32_t powerState = 0; + + if (fPowerClients && client) + { + OSNumber * num = (OSNumber *) fPowerClients->getObject(client); + if (num) powerState = num->unsigned32BitValue(); + } + return powerState; } //********************************************************************************* // [private] computeDesiredState //********************************************************************************* -void IOService::computeDesiredState ( unsigned long tempDesire ) +void IOService::computeDesiredState ( unsigned long localClamp ) { OSIterator * iter; OSObject * next; IOPowerConnection * connection; - unsigned long newDesiredState = 0; - unsigned long childDesire = 0; - unsigned long deviceDesire; - - if (tempDesire) - deviceDesire = tempDesire; - else - deviceDesire = fDeviceDesire; + uint32_t desiredState = 0; + uint32_t newPowerState = 0; + bool hasChildren = false; - // If clamp is on, always override deviceDesire to max. + // Desired power state is always 0 without a controlling driver. - if (fClampOn && fNumberOfPowerStates) - deviceDesire = fNumberOfPowerStates - 1; + if (!fNumberOfPowerStates) + { + fDesiredPowerState = 0; + //PM_DEBUG("%s::%s no controlling driver\n", getName(), __FUNCTION__); + return; + } - // Compute the maximum of our children's desires, - // our controlling driver's desire, and the subclass device's desire. + // Examine the children's desired power state. - if ( !fDeviceOverrides ) + iter = getChildIterator(gIOPowerPlane); + if (iter) { - iter = getChildIterator(gIOPowerPlane); - if ( iter ) + while ((next = iter->getNextObject())) { - while ( (next = iter->getNextObject()) ) + if ((connection = OSDynamicCast(IOPowerConnection, next))) { - if ( (connection = OSDynamicCast(IOPowerConnection, next)) ) + if (connection->getReadyFlag() == false) { - if (connection->getReadyFlag() == false) - { - PM_CONNECT("[%s] %s: connection not ready\n", - getName(), __FUNCTION__); - continue; - } - - if (connection->getDesiredDomainState() > childDesire) - childDesire = connection->getDesiredDomainState(); + PM_CONNECT("[%s] %s: connection not ready\n", + getName(), __FUNCTION__); + continue; } + if (connection->childHasRequestedPower()) + hasChildren = true; + if (connection->getDesiredDomainState() > desiredState) + desiredState = connection->getDesiredDomainState(); } - iter->release(); } + iter->release(); + } + if (hasChildren) + updatePowerClient(gIOPMPowerClientChildren, desiredState); + else + removePowerClient(gIOPMPowerClientChildren); - fChildrenDesire = childDesire; - newDesiredState = max(childDesire, fDriverDesire); + // Iterate through all power clients to determine the min power state. + + iter = OSCollectionIterator::withCollection(fPowerClients); + if (iter) + { + const OSSymbol * client; + while ((client = (const OSSymbol *) iter->getNextObject())) + { + // Ignore child and driver when override is in effect. + if ((fDeviceOverrides || + (getPMRequestType() == kIOPMRequestTypeRequestPowerStateOverride)) && + ((client == gIOPMPowerClientChildren) || + (client == gIOPMPowerClientDriver))) + continue; + + // Ignore child proxy when children are present. + if (hasChildren && (client == gIOPMPowerClientChildProxy)) + continue; + + desiredState = getPowerStateForClient(client); + assert(desiredState < fNumberOfPowerStates); + PM_TRACE(" %u %s\n", + desiredState, client->getCStringNoCopy()); + + newPowerState = max(newPowerState, desiredState); + + if (client == gIOPMPowerClientDevice) + fDeviceDesire = desiredState; + } + iter->release(); } - newDesiredState = max(deviceDesire, newDesiredState); - if (fTempClampCount && (fTempClampPowerState < fNumberOfPowerStates)) - newDesiredState = max(fTempClampPowerState, newDesiredState); + // Factor in the temporary power desires. + + newPowerState = max(newPowerState, localClamp); + newPowerState = max(newPowerState, fTempClampPowerState); + + // Limit check against max power override. + + newPowerState = min(newPowerState, fOverrideMaxPowerState); - fDesiredPowerState = newDesiredState; - // Limit check against number of power states. - if (fNumberOfPowerStates == 0) - fDesiredPowerState = 0; - else if (fDesiredPowerState >= fNumberOfPowerStates) - fDesiredPowerState = fNumberOfPowerStates - 1; + if (newPowerState >= fNumberOfPowerStates) + newPowerState = fNumberOfPowerStates - 1; + + fDesiredPowerState = newPowerState; + + PM_TRACE(" temp %u, clamp %u, current %u, new %u\n", + (uint32_t) localClamp, (uint32_t) fTempClampPowerState, + (uint32_t) fCurrentPowerState, newPowerState); - // Restart idle timer if stopped and deviceDesire has increased. + // Restart idle timer if stopped and device desire has increased. - if (fDeviceDesire && fActivityTimerStopped) + if (fDeviceDesire && fIdleTimerStopped) { - fActivityTimerStopped = false; + fIdleTimerStopped = false; + fActivityTickleCount = 0; + clock_get_uptime(&fIdleTimerStartTime); start_PM_idle_timer(); } @@ -2237,48 +2417,17 @@ void IOService::computeDesiredState ( unsigned long tempDesire ) // of servicing more activity tickles rather than dropping one when // the device is in a low power state. - if (fPMRequest && (fPMRequest->getType() != kIOPMRequestTypeActivityTickle) && + if ((getPMRequestType() != kIOPMRequestTypeActivityTickle) && (fActivityTicklePowerState != -1)) { IOLockLock(fActivityLock); fActivityTicklePowerState = -1; IOLockUnlock(fActivityLock); } - - PM_TRACE(" NewState %ld, Child %ld, Driver %ld, Device %ld, Clamp %d (%ld)\n", - fDesiredPowerState, childDesire, fDriverDesire, deviceDesire, - fClampOn, fTempClampCount ? fTempClampPowerState : 0); -} - -//********************************************************************************* -// [private] changeState -// -// A subclass object, our controlling driver, or a power domain child -// has asked for a different power state. Here we compute what new -// state we should enter and enqueue the change (or start it). -//********************************************************************************* - -IOReturn IOService::changeState ( void ) -{ - IOReturn result; - - PM_ASSERT_IN_GATE(); - assert(inPlane(gIOPowerPlane)); - assert(fParentsKnowState); - assert(fControllingDriver); - - result = enqueuePowerChange( - /* flags */ IOPMWeInitiated, - /* power state */ fDesiredPowerState, - /* domain state */ 0, - /* connection */ 0, - /* parent state */ 0); - - return result; } //********************************************************************************* -// [public virtual] currentPowerConsumption +// [public] currentPowerConsumption // //********************************************************************************* @@ -2291,7 +2440,7 @@ unsigned long IOService::currentPowerConsumption ( void ) } //********************************************************************************* -// [public virtual] getPMworkloop +// [deprecated] getPMworkloop //********************************************************************************* IOWorkLoop * IOService::getPMworkloop ( void ) @@ -2300,7 +2449,7 @@ IOWorkLoop * IOService::getPMworkloop ( void ) } //********************************************************************************* -// [public virtual] activityTickle +// [public] activityTickle // // The tickle with parameter kIOPMSuperclassPolicy1 causes the activity // flag to be set, and the device state checked. If the device has been @@ -2321,8 +2470,14 @@ bool IOService::activityTickle ( unsigned long type, unsigned long stateNumber ) // Record device activity for the idle timer handler. fDeviceActive = true; + fActivityTickleCount++; clock_get_uptime(&fDeviceActiveTimestamp); +#if ROOT_DOMAIN_RUN_STATES + if (fCurrentPowerState == 0) + getPMRootDomain()->handleActivityTickleForService(this); +#endif + // Record the last tickle power state. // This helps to filter out redundant tickles as // this function may be called from the data path. @@ -2351,7 +2506,42 @@ bool IOService::activityTickle ( unsigned long type, unsigned long stateNumber ) } //********************************************************************************* -// [public virtual] setIdleTimerPeriod +// [private] handleActivityTickle +//********************************************************************************* + +void IOService::handleActivityTickle ( IOPMRequest * request ) +{ + uint32_t ticklePowerState = (uint32_t)(uintptr_t) request->fArg0; + bool adjustPower = false; + + PM_ASSERT_IN_GATE(); + if (request->fArg1) + { + // Power rise from activity tickle. + if ((ticklePowerState > fDeviceDesire) && + (ticklePowerState < fNumberOfPowerStates)) + { + fIdleTimerMinPowerState = ticklePowerState; + adjustPower = true; + } + } + else if (fDeviceDesire > fIdleTimerMinPowerState) + { + // Power drop due to idle timer expiration. + // Do not allow idle timer to reduce power below tickle power. + ticklePowerState = fDeviceDesire - 1; + adjustPower = true; + } + + if (adjustPower) + { + updatePowerClient(gIOPMPowerClientDevice, ticklePowerState); + adjustPowerState(); + } +} + +//********************************************************************************* +// [public] setIdleTimerPeriod // // A subclass policy-maker is going to use our standard idleness // detection service. Make a command queue and an idle timer and @@ -2361,41 +2551,24 @@ bool IOService::activityTickle ( unsigned long type, unsigned long stateNumber ) IOReturn IOService::setIdleTimerPeriod ( unsigned long period ) { - IOWorkLoop * wl = getPMworkloop(); - - if (!initialized || !wl) + if (!initialized) return IOPMNotYetInitialized; - OUR_PMLog(PMsetIdleTimerPeriod, period, 0); + OUR_PMLog(kPMLogSetIdleTimerPeriod, period, 0); - fIdleTimerPeriod = period; + IOPMRequest * request = + acquirePMRequest( this, kIOPMRequestTypeSetIdleTimerPeriod ); + if (!request) + return kIOReturnNoMemory; - if ( period > 0 ) - { - // make the timer event - if ( fIdleTimerEventSource == NULL ) - { - IOTimerEventSource * timerSrc; + request->fArg0 = (void *) period; + submitPMRequest( request ); - timerSrc = IOTimerEventSource::timerEventSource( - this, PM_idle_timer_expired); - - if (timerSrc && (wl->addEventSource(timerSrc) != kIOReturnSuccess)) - { - timerSrc->release(); - timerSrc = 0; - } - - fIdleTimerEventSource = timerSrc; - } - - start_PM_idle_timer(); - } return IOPMNoErr; } //****************************************************************************** -// [public virtual] nextIdleTimeout +// [public] nextIdleTimeout // // Returns how many "seconds from now" the device should idle into its // next lowest power state. @@ -2420,7 +2593,7 @@ SInt32 IOService::nextIdleTimeout( delta_secs = (SInt32)(delta_ns / NSEC_PER_SEC); // Be paranoid about delta somehow exceeding timer period. - if (delta_secs < (int) fIdleTimerPeriod ) + if (delta_secs < (int) fIdleTimerPeriod) delay_secs = (int) fIdleTimerPeriod - delta_secs; else delay_secs = (int) fIdleTimerPeriod; @@ -2429,7 +2602,7 @@ SInt32 IOService::nextIdleTimeout( } //****************************************************************************** -// [public virtual] start_PM_idle_timer +// [public] start_PM_idle_timer // // The parameter is a pointer to us. Use it to call our timeout method. //****************************************************************************** @@ -2441,7 +2614,7 @@ void IOService::start_PM_idle_timer ( void ) AbsoluteTime uptime; SInt32 idle_in = 0; - if (!initialized || !fIdleTimerEventSource) + if (!initialized || !fIdleTimerPeriod || !fIdleTimerEventSource) return; IOLockLock(fActivityLock); @@ -2468,30 +2641,19 @@ void IOService::start_PM_idle_timer ( void ) } //********************************************************************************* -// [private] PM_idle_timer_expired -// -// The parameter is a pointer to us. Use it to call our timeout method. -//********************************************************************************* - -void PM_idle_timer_expired ( OSObject * ourSelves, IOTimerEventSource * ) -{ - ((IOService *)ourSelves)->PM_idle_timer_expiration(); -} - -//********************************************************************************* -// [public virtual] PM_idle_timer_expiration +// [private] idleTimerExpired // // The idle timer has expired. If there has been activity since the last // expiration, just restart the timer and return. If there has not been // activity, switch to the next lower power state and restart the timer. //********************************************************************************* -void IOService::PM_idle_timer_expiration ( void ) +void IOService::idleTimerExpired( IOTimerEventSource * ) { IOPMRequest * request; bool restartTimer = true; - if ( !initialized || !fIdleTimerPeriod ) + if ( !initialized || !fIdleTimerPeriod || fLockedFlags.PMStop ) return; IOLockLock(fActivityLock); @@ -2535,17 +2697,26 @@ void IOService::PM_idle_timer_expiration ( void ) start_PM_idle_timer(); } +#ifndef __LP64__ //********************************************************************************* -// [public virtual] command_received -// +// [deprecated] PM_idle_timer_expiration +//********************************************************************************* + +void IOService::PM_idle_timer_expiration ( void ) +{ +} + +//********************************************************************************* +// [deprecated] command_received //********************************************************************************* void IOService::command_received ( void *statePtr , void *, void * , void * ) { } +#endif /* !__LP64__ */ //********************************************************************************* -// [public virtual] setAggressiveness +// [public] setAggressiveness // // Pass on the input parameters to all power domain children. All those which are // power domains will pass it on to their children, etc. @@ -2553,68 +2724,23 @@ void IOService::command_received ( void *statePtr , void *, void * , void * ) IOReturn IOService::setAggressiveness ( unsigned long type, unsigned long newLevel ) { - OSIterator * iter; - OSObject * next; - IOPowerConnection * connection; - IOService * child; - - if (!initialized) - return IOPMNotYetInitialized; - - if (getPMRootDomain() == this) - OUR_PMLog(kPMLogSetAggressiveness, type, newLevel); - - if ( type <= kMaxType ) - { - fAggressivenessValue[type] = newLevel; - fAggressivenessValid[type] = true; - } - - iter = getChildIterator(gIOPowerPlane); - if ( iter ) - { - while ( (next = iter->getNextObject()) ) - { - if ( (connection = OSDynamicCast(IOPowerConnection, next)) ) - { - if (connection->getReadyFlag() == false) - { - PM_CONNECT("[%s] %s: connection not ready\n", - getName(), __FUNCTION__); - continue; - } - - child = ((IOService *)(connection->copyChildEntry(gIOPowerPlane))); - if ( child ) - { - child->setAggressiveness(type, newLevel); - child->release(); - } - } - } - iter->release(); - } - - return IOPMNoErr; + return kIOReturnSuccess; } //********************************************************************************* -// [public virtual] getAggressiveness +// [public] getAggressiveness // // Called by the user client. //********************************************************************************* IOReturn IOService::getAggressiveness ( unsigned long type, unsigned long * currentLevel ) { - if ( !initialized || (type > kMaxType) ) - return kIOReturnBadArgument; + IOPMrootDomain * rootDomain = getPMRootDomain(); - if ( !fAggressivenessValid[type] ) - return kIOReturnInvalid; - - *currentLevel = fAggressivenessValue[type]; - - return kIOReturnSuccess; + if (!rootDomain) + return kIOReturnNotReady; + + return rootDomain->getAggressiveness( type, currentLevel ); } //********************************************************************************* @@ -2630,8 +2756,9 @@ UInt32 IOService::getPowerState ( void ) return fCurrentPowerState; } +#ifndef __LP64__ //********************************************************************************* -// [public virtual] systemWake +// [deprecated] systemWake // // Pass this to all power domain children. All those which are // power domains will pass it on to their children, etc. @@ -2644,8 +2771,6 @@ IOReturn IOService::systemWake ( void ) IOPowerConnection * connection; IOService * theChild; - OUR_PMLog(kPMLogSystemWake, 0, 0); - iter = getChildIterator(gIOPowerPlane); if ( iter ) { @@ -2683,7 +2808,7 @@ IOReturn IOService::systemWake ( void ) } //********************************************************************************* -// [public virtual] temperatureCriticalForZone +// [deprecated] temperatureCriticalForZone //********************************************************************************* IOReturn IOService::temperatureCriticalForZone ( IOService * whichZone ) @@ -2693,7 +2818,7 @@ IOReturn IOService::temperatureCriticalForZone ( IOService * whichZone ) OUR_PMLog(kPMLogCriticalTemp, 0, 0); - if ( inPlane(gIOPowerPlane) && !fWeAreRoot ) + if ( inPlane(gIOPowerPlane) && !IS_PM_ROOT() ) { theNub = (IOService *)copyParentEntry(gIOPowerPlane); if ( theNub ) @@ -2709,9 +2834,10 @@ IOReturn IOService::temperatureCriticalForZone ( IOService * whichZone ) } return IOPMNoErr; } +#endif /* !__LP64__ */ //********************************************************************************* -// [public] powerOverrideOnPriv +// [protected] powerOverrideOnPriv //********************************************************************************* IOReturn IOService::powerOverrideOnPriv ( void ) @@ -2736,7 +2862,7 @@ IOReturn IOService::powerOverrideOnPriv ( void ) } //********************************************************************************* -// [public] powerOverrideOffPriv +// [protected] powerOverrideOffPriv //********************************************************************************* IOReturn IOService::powerOverrideOffPriv ( void ) @@ -2778,59 +2904,56 @@ void IOService::handlePowerOverrideChanged ( IOPMRequest * request ) fDeviceOverrides = false; } - if (fControllingDriver && inPlane(gIOPowerPlane) && fParentsKnowState) - { - computeDesiredState(); - changeState(); - } + adjustPowerState(); } //********************************************************************************* -// [private] enqueuePowerChange +// [private] startPowerChange //********************************************************************************* -IOReturn IOService::enqueuePowerChange ( - unsigned long flags, - unsigned long whatStateOrdinal, - unsigned long domainState, - IOPowerConnection * whichParent, - unsigned long singleParentState ) +IOReturn IOService::startPowerChange ( + unsigned long changeFlags, + unsigned long powerState, + unsigned long domainFlags, + IOPowerConnection * parentConnection, + unsigned long parentFlags ) { - changeNoteItem changeNote; - IOPMPowerState * powerStatePtr; - PM_ASSERT_IN_GATE(); assert( fMachineState == kIOPM_Finished ); - assert( whatStateOrdinal < fNumberOfPowerStates ); + assert( powerState < fNumberOfPowerStates ); - if (whatStateOrdinal >= fNumberOfPowerStates) + if (powerState >= fNumberOfPowerStates) return IOPMAckImplied; - powerStatePtr = &fPowerStates[whatStateOrdinal]; +#if ROOT_DOMAIN_RUN_STATES + // Root domain can override chosen power state to a lower state. + getPMRootDomain()->overridePowerStateForService( + this, &fRootDomainState, + &powerState, changeFlags); +#endif - // Initialize the change note - changeNote.flags = flags; - changeNote.newStateNumber = whatStateOrdinal; - changeNote.outputPowerCharacter = powerStatePtr->outputPowerCharacter; - changeNote.inputPowerRequirement = powerStatePtr->inputPowerRequirement; - changeNote.capabilityFlags = powerStatePtr->capabilityFlags; - changeNote.parent = NULL; + // Initialize the change note. - if (flags & IOPMParentInitiated ) - { - changeNote.domainState = domainState; - changeNote.parent = whichParent; - changeNote.singleParentState = singleParentState; - } + fHeadNoteFlags = changeFlags; + fHeadNotePowerState = powerState; + fHeadNotePowerArrayEntry = &fPowerStates[ powerState ]; + fHeadNoteParentConnection = NULL; - if (flags & IOPMWeInitiated ) + if (changeFlags & kIOPMWeInitiated) { - start_our_change(&changeNote); + if (changeFlags & kIOPMSynchronize) + OurSyncStart(); + else + OurChangeStart(); return 0; } else { - return start_parent_change(&changeNote); + assert(changeFlags & kIOPMParentInitiated); + fHeadNoteDomainFlags = domainFlags; + fHeadNoteParentFlags = parentFlags; + fHeadNoteParentConnection = parentConnection; + return ParentChangeStart(); } } @@ -2846,10 +2969,11 @@ bool IOService::notifyInterestedDrivers ( void ) IOItemCount count; PM_ASSERT_IN_GATE(); - assert( fDriverCallBusy == false ); assert( fDriverCallParamCount == 0 ); assert( fHeadNotePendingAcks == 0 ); + fHeadNotePendingAcks = 0; + count = list->numberOfItems(); if (!count) goto done; // no interested drivers @@ -2894,7 +3018,8 @@ bool IOService::notifyInterestedDrivers ( void ) // Machine state will be blocked pending callout thread completion. PM_LOCK(); - fDriverCallBusy = true; + assert( fLockedFlags.DriverCallBusy == false ); + fLockedFlags.DriverCallBusy = true; PM_UNLOCK(); thread_call_enter( fDriverCallEntry ); return true; @@ -2919,7 +3044,7 @@ void IOService::notifyInterestedDriversDone ( void ) param = (DriverCallParam *) fDriverCallParamPtr; count = fDriverCallParamCount; - assert( fDriverCallBusy == false ); + assert( fLockedFlags.DriverCallBusy == false ); assert( fMachineState == kIOPM_DriverThreadCallDone ); if (param && count) @@ -2931,9 +3056,15 @@ void IOService::notifyInterestedDriversDone ( void ) if ((result == IOPMAckImplied) || (result < 0)) { - // child return IOPMAckImplied - informee->timer = 0; - fHeadNotePendingAcks--; + // Interested driver return IOPMAckImplied. + // If informee timer is zero, it must have de-registered + // interest during the thread callout. That also drops + // the pending ack count. + + if (fHeadNotePendingAcks && informee->timer) + fHeadNotePendingAcks--; + + informee->timer = 0; } else if (informee->timer) { @@ -2989,7 +3120,7 @@ void IOService::notifyChildren ( void ) children = OSArray::withCapacity(8); // Sum child power consumption in notifyChild() - fPowerStates[fHeadNoteState].staticPower = 0; + fHeadNotePowerArrayEntry->staticPower = 0; iter = getChildIterator(gIOPowerPlane); if ( iter ) @@ -3043,7 +3174,7 @@ void IOService::notifyChildrenDone ( void ) assert(fMachineState == kIOPM_NotifyChildrenDone); // Interested drivers have all acked (if any), ack timer stopped. - // Notify one child, wait for it's ack, then repeat for next child. + // Notify one child, wait for it to ack, then repeat for next child. // This is a workaround for some drivers with multiple instances at // the same branch in the power tree, but the driver is slow to power // up unless the tree ordering is observed. Problem observed only on @@ -3071,12 +3202,12 @@ void IOService::notifyChildrenDone ( void ) // [private] notifyAll //********************************************************************************* -IOReturn IOService::notifyAll ( bool is_prechange ) +IOReturn IOService::notifyAll ( int nextMachineState, bool is_prechange ) { // Save the next machine_state to be restored by notifyInterestedDriversDone() PM_ASSERT_IN_GATE(); - fNextMachineState = fMachineState; + fNextMachineState = nextMachineState; fMachineState = kIOPM_DriverThreadCallDone; fDriverCallReason = is_prechange ? kDriverCallInformPreChange : kDriverCallInformPostChange; @@ -3101,7 +3232,7 @@ IOReturn IOService::actionDriverCalloutDone ( IOServicePM * pwrMgt = (IOServicePM *) arg0; PM_LOCK(); - fDriverCallBusy = false; + fLockedFlags.DriverCallBusy = false; PM_UNLOCK(); if (gIOPMReplyQueue) @@ -3125,7 +3256,8 @@ void IOService::pmDriverCallout ( IOService * from ) break; default: - IOPanic("IOService::pmDriverCallout bad machine state"); + panic("IOService::pmDriverCallout bad machine state %x", + from->fDriverCallReason); } gIOPMWorkLoop->runAction(actionDriverCalloutDone, @@ -3147,21 +3279,21 @@ void IOService::driverSetPowerState ( void ) IOReturn result; AbsoluteTime end; - assert( fDriverCallBusy ); + assert( fLockedFlags.DriverCallBusy == true ); param = (DriverCallParam *) fDriverCallParamPtr; assert( param ); assert( fDriverCallParamCount == 1 ); driver = fControllingDriver; - powerState = fHeadNoteState; + powerState = fHeadNotePowerState; - if (!fWillPMStop) + if (fLockedFlags.PMStop == false) { - OUR_PMLog( kPMLogProgramHardware, (UInt32) this, powerState); + OUR_PMLog( kPMLogProgramHardware, (uintptr_t) this, powerState); clock_get_uptime(&fDriverCallStartTime); result = driver->setPowerState( powerState, this ); clock_get_uptime(&end); - OUR_PMLog((UInt32) -kPMLogProgramHardware, (UInt32) this, (UInt32) result); + OUR_PMLog((UInt32) -kPMLogProgramHardware, (uintptr_t) this, (UInt32) result); #if LOG_SETPOWER_TIMES if ((result == IOPMAckImplied) || (result < 0)) @@ -3199,36 +3331,36 @@ void IOService::driverInformPowerChange ( void ) DriverCallParam * param; AbsoluteTime end; - assert( fDriverCallBusy ); + assert( fLockedFlags.DriverCallBusy == true ); param = (DriverCallParam *) fDriverCallParamPtr; count = fDriverCallParamCount; assert( count && param ); - powerFlags = fHeadNoteCapabilityFlags; - powerState = fHeadNoteState; + powerFlags = fHeadNotePowerArrayEntry->capabilityFlags; + powerState = fHeadNotePowerState; for (IOItemCount i = 0; i < count; i++) { informee = (IOPMinformee *) param->Target; driver = informee->whatObject; - if (!fWillPMStop && informee->active) + if ((fLockedFlags.PMStop == false) && informee->active) { if (fDriverCallReason == kDriverCallInformPreChange) { - OUR_PMLog(kPMLogInformDriverPreChange, (UInt32) this, powerState); + OUR_PMLog(kPMLogInformDriverPreChange, (uintptr_t) this, powerState); clock_get_uptime(&informee->startTime); result = driver->powerStateWillChangeTo(powerFlags, powerState, this); clock_get_uptime(&end); - OUR_PMLog((UInt32)-kPMLogInformDriverPreChange, (UInt32) this, result); + OUR_PMLog((UInt32)-kPMLogInformDriverPreChange, (uintptr_t) this, result); } else { - OUR_PMLog(kPMLogInformDriverPostChange, (UInt32) this, powerState); + OUR_PMLog(kPMLogInformDriverPostChange, (uintptr_t) this, powerState); clock_get_uptime(&informee->startTime); result = driver->powerStateDidChangeTo(powerFlags, powerState, this); clock_get_uptime(&end); - OUR_PMLog((UInt32)-kPMLogInformDriverPostChange, (UInt32) this, result); + OUR_PMLog((UInt32)-kPMLogInformDriverPostChange, (uintptr_t) this, result); } #if LOG_SETPOWER_TIMES @@ -3263,10 +3395,11 @@ void IOService::driverInformPowerChange ( void ) bool IOService::notifyChild ( IOPowerConnection * theNub, bool is_prechange ) { - IOReturn k = IOPMAckImplied; + IOReturn ret = IOPMAckImplied; unsigned long childPower; IOService * theChild; IOPMRequest * childRequest; + uint32_t requestArg2; int requestType; PM_ASSERT_IN_GATE(); @@ -3281,40 +3414,42 @@ bool IOService::notifyChild ( IOPowerConnection * theNub, bool is_prechange ) // kIOPMAckImplied, we'll be awaiting their acknowledgement later. fHeadNotePendingAcks++; theNub->setAwaitingAck(true); - - requestType = is_prechange ? - kIOPMRequestTypePowerDomainWillChange : - kIOPMRequestTypePowerDomainDidChange; + + requestArg2 = fHeadNoteFlags; + if (fHeadNotePowerState < fCurrentPowerState) + requestArg2 |= kIOPMDomainPowerDrop; + + requestType = is_prechange ? + kIOPMRequestTypePowerDomainWillChange : + kIOPMRequestTypePowerDomainDidChange; childRequest = acquirePMRequest( theChild, requestType ); if (childRequest) { theNub->retain(); - childRequest->fArg0 = (void *) fHeadNoteOutputFlags; + childRequest->fArg0 = (void *) fHeadNotePowerArrayEntry->outputPowerCharacter; childRequest->fArg1 = (void *) theNub; - childRequest->fArg2 = (void *) (fHeadNoteState < fCurrentPowerState); + childRequest->fArg2 = (void *) requestArg2; theChild->submitPMRequest( childRequest ); - k = IOPMWillAckLater; + ret = IOPMWillAckLater; } else { - k = IOPMAckImplied; + ret = IOPMAckImplied; fHeadNotePendingAcks--; theNub->setAwaitingAck(false); childPower = theChild->currentPowerConsumption(); if ( childPower == kIOPMUnknown ) { - fPowerStates[fHeadNoteState].staticPower = kIOPMUnknown; + fHeadNotePowerArrayEntry->staticPower = kIOPMUnknown; } else { - if ( fPowerStates[fHeadNoteState].staticPower != kIOPMUnknown ) - { - fPowerStates[fHeadNoteState].staticPower += childPower; - } + if (fHeadNotePowerArrayEntry->staticPower != kIOPMUnknown ) + fHeadNotePowerArrayEntry->staticPower += childPower; } } theChild->release(); - return (k == IOPMAckImplied); + return (IOPMAckImplied == ret); } //********************************************************************************* @@ -3329,7 +3464,7 @@ bool IOService::notifyChild ( IOPowerConnection * theNub, bool is_prechange ) void IOService::OurChangeTellClientsPowerDown ( void ) { fMachineState = kIOPM_OurChangeTellPriorityClientsPowerDown; - tellChangeDown1(fHeadNoteState); + tellChangeDown1(fHeadNotePowerState); } //********************************************************************************* @@ -3344,7 +3479,7 @@ void IOService::OurChangeTellClientsPowerDown ( void ) void IOService::OurChangeTellPriorityClientsPowerDown ( void ) { fMachineState = kIOPM_OurChangeNotifyInterestedDriversWillChange; - tellChangeDown2(fHeadNoteState); + tellChangeDown2(fHeadNotePowerState); } //********************************************************************************* @@ -3358,8 +3493,13 @@ void IOService::OurChangeTellPriorityClientsPowerDown ( void ) void IOService::OurChangeNotifyInterestedDriversWillChange ( void ) { - fMachineState = kIOPM_OurChangeSetPowerState; - notifyAll( true ); + IOPMrootDomain *rootDomain; + if ((rootDomain = getPMRootDomain()) == this) + { + rootDomain->tracePoint(kIOPMTracePointSystemSleepDriversPhase); + } + + notifyAll( kIOPM_OurChangeSetPowerState, kNotifyWillChange ); } //********************************************************************************* @@ -3413,8 +3553,7 @@ void IOService::OurChangeWaitForPowerSettle ( void ) void IOService::OurChangeNotifyInterestedDriversDidChange ( void ) { - fMachineState = kIOPM_OurChangeFinish; - notifyAll(false); + notifyAll( kIOPM_OurChangeFinish, kNotifyDidChange ); } //********************************************************************************* @@ -3442,7 +3581,7 @@ void IOService::OurChangeFinish ( void ) void IOService::ParentDownTellPriorityClientsPowerDown ( void ) { fMachineState = kIOPM_ParentDownNotifyInterestedDriversWillChange; - tellChangeDown2(fHeadNoteState); + tellChangeDown2(fHeadNotePowerState); } //********************************************************************************* @@ -3457,8 +3596,13 @@ void IOService::ParentDownTellPriorityClientsPowerDown ( void ) void IOService::ParentDownNotifyInterestedDriversWillChange ( void ) { - fMachineState = kIOPM_ParentDownSetPowerState; - notifyAll( true ); + IOPMrootDomain *rootDomain; + if ((rootDomain = getPMRootDomain()) == this) + { + rootDomain->tracePoint(kIOPMTracePointSystemSleepDriversPhase); + } + + notifyAll( kIOPM_ParentDownSetPowerState, kNotifyWillChange ); } //********************************************************************************* @@ -3514,25 +3658,23 @@ void IOService::ParentDownWaitForPowerSettle ( void ) void IOService::ParentDownNotifyDidChangeAndAcknowledgeChange ( void ) { - fMachineState = kIOPM_ParentDownAcknowledgeChange; - notifyAll(false); + notifyAll( kIOPM_ParentAcknowledgePowerChange, kNotifyDidChange ); } //********************************************************************************* -// [private] ParentDownAcknowledgeChange +// [private] ParentAcknowledgePowerChange // // We had to wait for it, but all parties have acknowledged our post-change -// notification of a power lowering initiated by the parent. +// notification of a power change (either Up or Down) initiated by the parent. // Here we acknowledge the parent. -// We are done with this change note, and we can start on the next one. //********************************************************************************* -void IOService::ParentDownAcknowledgeChange ( void ) +void IOService::ParentAcknowledgePowerChange ( void ) { IORegistryEntry * nub; IOService * parent; - nub = fHeadNoteParent; + nub = fHeadNoteParentConnection; nub->retain(); all_done(); parent = (IOService *)nub->copyParentEntry(gIOPowerPlane); @@ -3598,33 +3740,7 @@ void IOService::ParentUpWaitForSettleTime ( void ) void IOService::ParentUpNotifyInterestedDriversDidChange ( void ) { - fMachineState = kIOPM_ParentUpAcknowledgePowerChange; - notifyAll(false); -} - -//********************************************************************************* -// [private] ParentUpAcknowledgePowerChange -// -// All parties have acknowledged our post-change notification of a power -// raising initiated by the parent. Here we acknowledge the parent. -// We are done with this change note, and we can start on the next one. -//********************************************************************************* - -void IOService::ParentUpAcknowledgePowerChange ( void ) -{ - IORegistryEntry * nub; - IOService * parent; - - nub = fHeadNoteParent; - nub->retain(); - all_done(); - parent = (IOService *)nub->copyParentEntry(gIOPowerPlane); - if ( parent ) - { - parent->acknowledgePowerChange((IOService *)nub); - parent->release(); - } - nub->release(); + notifyAll( kIOPM_ParentAcknowledgePowerChange, kNotifyDidChange ); } //********************************************************************************* @@ -3639,36 +3755,60 @@ void IOService::all_done ( void ) { unsigned long previous_state; +#if ROOT_DOMAIN_RUN_STATES + getPMRootDomain()->handlePowerChangeDoneForService( + /* service */ this, + /* RD flags */ &fRootDomainState, + /* new pwr state */ fHeadNotePowerState, + /* change flags */ fHeadNoteFlags ); +#endif + + if ((fHeadNoteFlags & kIOPMSynchronize) && + ((fMachineState == kIOPM_Finished) || (fMachineState == kIOPM_SyncFinish))) + { + // Sync operation and no power change occurred. + // Do not inform driver and clients about this request completion, + // except for the originator (root domain). + + if (getPMRequestType() == kIOPMRequestTypeSynchronizePowerTree) + { + powerChangeDone(fCurrentPowerState); + } + + fMachineState = kIOPM_Finished; + return; + } + fMachineState = kIOPM_Finished; // our power change - if ( fHeadNoteFlags & IOPMWeInitiated ) + if ( fHeadNoteFlags & kIOPMWeInitiated ) { // could our driver switch to the new state? - if ( !( fHeadNoteFlags & IOPMNotDone) ) + if ( !( fHeadNoteFlags & kIOPMNotDone) ) { // we changed, tell our parent - if ( !fWeAreRoot ) + if ( !IS_PM_ROOT() ) { - ask_parent(fHeadNoteState); + ask_parent(fHeadNotePowerState); } // yes, did power raise? - if ( fCurrentPowerState < fHeadNoteState ) + if ( fCurrentPowerState < fHeadNotePowerState ) { // yes, inform clients and apps - tellChangeUp (fHeadNoteState); + tellChangeUp (fHeadNotePowerState); } previous_state = fCurrentPowerState; // either way - fCurrentPowerState = fHeadNoteState; + fCurrentPowerState = fHeadNotePowerState; #if PM_VARS_SUPPORT fPMVars->myCurrentState = fCurrentPowerState; #endif OUR_PMLog(kPMLogChangeDone, fCurrentPowerState, 0); - + // inform subclass policy-maker - if (!fWillPMStop && fParentsKnowState) + if ((fLockedFlags.PMStop == false) && fParentsKnowState) powerChangeDone(previous_state); else PM_DEBUG("%s::powerChangeDone() skipped\n", getName()); @@ -3676,29 +3816,29 @@ void IOService::all_done ( void ) } // parent's power change - if ( fHeadNoteFlags & IOPMParentInitiated) + if ( fHeadNoteFlags & kIOPMParentInitiated) { - if (((fHeadNoteFlags & IOPMDomainWillChange) && (fCurrentPowerState >= fHeadNoteState)) || - ((fHeadNoteFlags & IOPMDomainDidChange) && (fCurrentPowerState < fHeadNoteState))) + if (((fHeadNoteFlags & kIOPMDomainWillChange) && (fCurrentPowerState >= fHeadNotePowerState)) || + ((fHeadNoteFlags & kIOPMDomainDidChange) && (fCurrentPowerState < fHeadNotePowerState))) { // did power raise? - if ( fCurrentPowerState < fHeadNoteState ) + if ( fCurrentPowerState < fHeadNotePowerState ) { // yes, inform clients and apps - tellChangeUp (fHeadNoteState); + tellChangeUp (fHeadNotePowerState); } // either way previous_state = fCurrentPowerState; - fCurrentPowerState = fHeadNoteState; + fCurrentPowerState = fHeadNotePowerState; #if PM_VARS_SUPPORT fPMVars->myCurrentState = fCurrentPowerState; #endif - fMaxCapability = fControllingDriver->maxCapabilityForDomainState(fHeadNoteDomainState); + fMaxCapability = fControllingDriver->maxCapabilityForDomainState(fHeadNoteDomainFlags); OUR_PMLog(kPMLogChangeDone, fCurrentPowerState, 0); // inform subclass policy-maker - if (!fWillPMStop && fParentsKnowState) + if ((fLockedFlags.PMStop == false) && fParentsKnowState) powerChangeDone(previous_state); else PM_DEBUG("%s::powerChangeDone() skipped\n", getName()); @@ -3754,9 +3894,9 @@ unsigned long IOService::compute_settle_time ( void ) i = fCurrentPowerState; // we're lowering power - if ( fHeadNoteState < fCurrentPowerState ) + if ( fHeadNotePowerState < fCurrentPowerState ) { - while ( i > fHeadNoteState ) + while ( i > fHeadNotePowerState ) { totalTime += fPowerStates[i].settleDownTime; i--; @@ -3764,9 +3904,9 @@ unsigned long IOService::compute_settle_time ( void ) } // we're raising power - if ( fHeadNoteState > fCurrentPowerState ) + if ( fHeadNotePowerState > fCurrentPowerState ) { - while ( i < fHeadNoteState ) + while ( i < fHeadNotePowerState ) { totalTime += fPowerStates[i+1].settleUpTime; i++; @@ -3810,10 +3950,12 @@ IOReturn IOService::startSettleTimer ( unsigned long delay ) // machine state, false otherwise. //********************************************************************************* +#ifndef __LP64__ void IOService::ack_timer_ticked ( void ) { assert(false); } +#endif /* !__LP64__ */ bool IOService::ackTimerTick( void ) { @@ -3837,12 +3979,12 @@ bool IOService::ackTimerTick( void ) OUR_PMLog(kPMLogCtrlDriverTardy, 0, 0); setProperty(kIOPMTardyAckSPSKey, kOSBooleanTrue); PM_ERROR("%s::setPowerState(%p, %lu -> %lu) timed out after %d ms\n", - fName, this, fCurrentPowerState, fHeadNoteState, NS_TO_MS(nsec)); + fName, this, fCurrentPowerState, fHeadNotePowerState, NS_TO_MS(nsec)); if (gIOKitDebug & kIOLogDebugPower) { panic("%s::setPowerState(%p, %lu -> %lu) timed out after %d ms", - fName, this, fCurrentPowerState, fHeadNoteState, NS_TO_MS(nsec)); + fName, this, fCurrentPowerState, fHeadNotePowerState, NS_TO_MS(nsec)); } else { @@ -3859,9 +4001,8 @@ bool IOService::ackTimerTick( void ) case kIOPM_OurChangeSetPowerState: case kIOPM_OurChangeFinish: case kIOPM_ParentDownSetPowerState: - case kIOPM_ParentDownAcknowledgeChange: + case kIOPM_ParentAcknowledgePowerChange: case kIOPM_ParentUpSetPowerState: - case kIOPM_ParentUpAcknowledgePowerChange: case kIOPM_NotifyChildrenDone: // are we waiting for interested parties to acknowledge? if ( fHeadNotePendingAcks != 0 ) @@ -3883,7 +4024,7 @@ bool IOService::ackTimerTick( void ) PM_ERROR("%s::powerState%sChangeTo(%p, %s, %lu -> %lu) timed out after %d ms\n", nextObject->whatObject->getName(), (fDriverCallReason == kDriverCallInformPreChange) ? "Will" : "Did", - nextObject->whatObject, fName, fCurrentPowerState, fHeadNoteState, + nextObject->whatObject, fName, fCurrentPowerState, fHeadNotePowerState, NS_TO_MS(nsec)); // Pretend driver has acked. @@ -3918,7 +4059,7 @@ bool IOService::ackTimerTick( void ) break; default: - PM_TRACE("[%s] unexpected ack timer tick (state = %ld)\n", + PM_TRACE("%s: unexpected ack timer tick (state = %d)\n", getName(), fMachineState); break; } @@ -3959,7 +4100,7 @@ void IOService::stop_ack_timer ( void ) } //********************************************************************************* -// [static] settleTimerExpired +// [static] actionAckTimerExpired // // Inside PM work loop's gate. //********************************************************************************* @@ -4001,23 +4142,6 @@ IOService::ack_timer_expired ( thread_call_param_t arg0, thread_call_param_t arg me->release(); } -//********************************************************************************* -// settleTimerExpired -// -// Inside PM work loop's gate. -//********************************************************************************* - -static IOReturn -settleTimerExpired ( - OSObject * target, - void * arg0, void * arg1, - void * arg2, void * arg3 ) -{ - IOService * me = (IOService *) target; - me->settleTimerExpired(); - return kIOReturnSuccess; -} - //********************************************************************************* // settle_timer_expired // @@ -4031,78 +4155,91 @@ settle_timer_expired ( thread_call_param_t arg0, thread_call_param_t arg1 ) if (gIOPMWorkLoop && gIOPMReplyQueue) { - gIOPMWorkLoop->runAction(settleTimerExpired, me); + gIOPMWorkLoop->runAction( + OSMemberFunctionCast(IOWorkLoop::Action, me, &IOService::settleTimerExpired), + me); gIOPMReplyQueue->signalWorkAvailable(); } me->release(); } //********************************************************************************* -// [private] start_parent_change +// [private] ParentChangeStart // // Here we begin the processing of a power change initiated by our parent. //********************************************************************************* -IOReturn IOService::start_parent_change ( const changeNoteItem * changeNote ) +IOReturn IOService::ParentChangeStart ( void ) { - fHeadNoteFlags = changeNote->flags; - fHeadNoteState = changeNote->newStateNumber; - fHeadNoteOutputFlags = changeNote->outputPowerCharacter; - fHeadNoteDomainState = changeNote->domainState; - fHeadNoteParent = changeNote->parent; - fHeadNoteCapabilityFlags = changeNote->capabilityFlags; - PM_ASSERT_IN_GATE(); - OUR_PMLog( kPMLogStartParentChange, fHeadNoteState, fCurrentPowerState ); + OUR_PMLog( kPMLogStartParentChange, fHeadNotePowerState, fCurrentPowerState ); // Power domain is lowering power - if ( fHeadNoteState < fCurrentPowerState ) + if ( fHeadNotePowerState < fCurrentPowerState ) { - setParentInfo( - changeNote->singleParentState, - fHeadNoteParent, true ); + setParentInfo( fHeadNoteParentFlags, fHeadNoteParentConnection, true ); + +#if ROOT_DOMAIN_RUN_STATES + getPMRootDomain()->handlePowerChangeStartForService( + /* service */ this, + /* RD flags */ &fRootDomainState, + /* new pwr state */ fHeadNotePowerState, + /* change flags */ fHeadNoteFlags ); +#endif // tell apps and kernel clients fInitialChange = false; fMachineState = kIOPM_ParentDownTellPriorityClientsPowerDown; - tellChangeDown1(fHeadNoteState); + tellChangeDown1(fHeadNotePowerState); return IOPMWillAckLater; } // Power domain is raising power - if ( fHeadNoteState > fCurrentPowerState ) + if ( fHeadNotePowerState > fCurrentPowerState ) { - IOPMPowerState * powerStatePtr; - if ( fDesiredPowerState > fCurrentPowerState ) { - if ( fDesiredPowerState < fHeadNoteState ) + if ( fDesiredPowerState < fHeadNotePowerState ) { // We power up, but not all the way - fHeadNoteState = fDesiredPowerState; - powerStatePtr = &fPowerStates[fHeadNoteState]; - fHeadNoteOutputFlags = powerStatePtr->outputPowerCharacter; - fHeadNoteCapabilityFlags = powerStatePtr->capabilityFlags; - OUR_PMLog(kPMLogAmendParentChange, fHeadNoteState, 0); + fHeadNotePowerState = fDesiredPowerState; + fHeadNotePowerArrayEntry = &fPowerStates[fDesiredPowerState]; + OUR_PMLog(kPMLogAmendParentChange, fHeadNotePowerState, 0); } } else { // We don't need to change - fHeadNoteState = fCurrentPowerState; - powerStatePtr = &fPowerStates[fHeadNoteState]; - fHeadNoteOutputFlags = powerStatePtr->outputPowerCharacter; - fHeadNoteCapabilityFlags = powerStatePtr->capabilityFlags; - OUR_PMLog(kPMLogAmendParentChange, fHeadNoteState, 0); + fHeadNotePowerState = fCurrentPowerState; + fHeadNotePowerArrayEntry = &fPowerStates[fCurrentPowerState]; + OUR_PMLog(kPMLogAmendParentChange, fHeadNotePowerState, 0); } } - if ((fHeadNoteState > fCurrentPowerState) && - (fHeadNoteFlags & IOPMDomainDidChange)) + if ( fHeadNoteFlags & kIOPMDomainDidChange ) { - // Parent did change up - start our change up - fInitialChange = false; - fMachineState = kIOPM_ParentUpSetPowerState; - notifyAll( true ); - return IOPMWillAckLater; + if ( fHeadNotePowerState > fCurrentPowerState ) + { +#if ROOT_DOMAIN_RUN_STATES + getPMRootDomain()->handlePowerChangeStartForService( + /* service */ this, + /* RD flags */ &fRootDomainState, + /* new pwr state */ fHeadNotePowerState, + /* change flags */ fHeadNoteFlags ); +#endif + + // Parent did change up - start our change up + fInitialChange = false; + notifyAll( kIOPM_ParentUpSetPowerState, kNotifyWillChange ); + return IOPMWillAckLater; + } + else if (fHeadNoteFlags & kIOPMSynchronize) + { + // We do not need to change power state, but notify + // children to propagate tree synchronization. + fMachineState = kIOPM_SyncNotifyDidChange; + fDriverCallReason = kDriverCallInformPreChange; + notifyChildren(); + return IOPMWillAckLater; + } } all_done(); @@ -4110,67 +4247,90 @@ IOReturn IOService::start_parent_change ( const changeNoteItem * changeNote ) } //********************************************************************************* -// [private] start_our_change +// [private] OurChangeStart // // Here we begin the processing of a power change initiated by us. //********************************************************************************* -void IOService::start_our_change ( const changeNoteItem * changeNote ) +void IOService::OurChangeStart ( void ) { - fHeadNoteFlags = changeNote->flags; - fHeadNoteState = changeNote->newStateNumber; - fHeadNoteOutputFlags = changeNote->outputPowerCharacter; - fHeadNoteCapabilityFlags = changeNote->capabilityFlags; - PM_ASSERT_IN_GATE(); + OUR_PMLog( kPMLogStartDeviceChange, fHeadNotePowerState, fCurrentPowerState ); - OUR_PMLog( kPMLogStartDeviceChange, fHeadNoteState, fCurrentPowerState ); + // fMaxCapability is our maximum possible power state based on the current + // power state of our parents. If we are trying to raise power beyond the + // maximum, send an async request for more power to all parents. - // can our driver switch to the new state? - if (( fHeadNoteCapabilityFlags & IOPMNotAttainable ) || - ((fMaxCapability < fHeadNoteState) && (!fWeAreRoot))) + if (!IS_PM_ROOT() && (fMaxCapability < fHeadNotePowerState)) { - // mark the change note un-actioned - fHeadNoteFlags |= IOPMNotDone; - - // no, ask the parent to do it then - if ( !fWeAreRoot ) - { - ask_parent(fHeadNoteState); - } - all_done(); + fHeadNoteFlags |= kIOPMNotDone; + ask_parent(fHeadNotePowerState); + OurChangeFinish(); return; } - if ( !fInitialChange ) - { - if ( fHeadNoteState == fCurrentPowerState ) - { - // we initiated a null change; forget it - all_done(); - return; - } + // Redundant power changes skips to the end of the state machine. + + if (!fInitialChange && (fHeadNotePowerState == fCurrentPowerState)) + { + OurChangeFinish(); + return; } fInitialChange = false; - // dropping power? - if ( fHeadNoteState < fCurrentPowerState ) +#if ROOT_DOMAIN_RUN_STATES + getPMRootDomain()->handlePowerChangeStartForService( + /* service */ this, + /* RD flags */ &fRootDomainState, + /* new pwr state */ fHeadNotePowerState, + /* change flags */ fHeadNoteFlags ); +#endif + + // Two separate paths, depending if power is being raised or lowered. + // Lowering power is subject to client approval. + + if ( fHeadNotePowerState < fCurrentPowerState ) { - // yes, in case we have to wait for acks + // Next state when dropping power. fMachineState = kIOPM_OurChangeTellClientsPowerDown; fDoNotPowerDown = false; - // ask apps and kernel clients if we can drop power + // Ask apps and kernel clients permission to lower power. fOutOfBandParameter = kNotifyApps; - askChangeDown(fHeadNoteState); - } else { - // in case they don't all ack - fMachineState = kIOPM_OurChangeSetPowerState; - // notify interested drivers and children - notifyAll(true); + askChangeDown(fHeadNotePowerState); + } + else + { + // Notify interested drivers and children. + notifyAll( kIOPM_OurChangeSetPowerState, kNotifyWillChange ); } } +//********************************************************************************* +// [private] OurSyncStart +//********************************************************************************* + +void IOService::OurSyncStart ( void ) +{ + PM_ASSERT_IN_GATE(); + + if (fInitialChange) + return; + +#if ROOT_DOMAIN_RUN_STATES + getPMRootDomain()->handlePowerChangeStartForService( + /* service */ this, + /* RD flags */ &fRootDomainState, + /* new pwr state */ fHeadNotePowerState, + /* change flags */ fHeadNoteFlags ); +#endif + + fMachineState = kIOPM_SyncNotifyDidChange; + fDriverCallReason = kDriverCallInformPreChange; + + notifyChildren(); +} + //********************************************************************************* // [private] ask_parent // @@ -4210,7 +4370,7 @@ IOReturn IOService::ask_parent ( unsigned long requestedState ) return IOPMNoErr; } - if ( fWeAreRoot ) + if ( IS_PM_ROOT() ) { return IOPMNoErr; } @@ -4250,13 +4410,10 @@ bool IOService::notifyControllingDriver ( void ) unsigned long powerState; PM_ASSERT_IN_GATE(); - assert( fDriverCallBusy == false ); assert( fDriverCallParamCount == 0 ); assert( fControllingDriver ); - powerState = fHeadNoteState; - if (fPowerStates[powerState].capabilityFlags & IOPMNotAttainable ) - return false; // state not attainable + powerState = fHeadNotePowerState; param = (DriverCallParam *) fDriverCallParamPtr; if (!param) @@ -4278,7 +4435,8 @@ bool IOService::notifyControllingDriver ( void ) // from the callout thread. PM_LOCK(); - fDriverCallBusy = true; + assert( fLockedFlags.DriverCallBusy == false ); + fLockedFlags.DriverCallBusy = true; PM_UNLOCK(); thread_call_enter( fDriverCallEntry ); return true; @@ -4296,7 +4454,7 @@ void IOService::notifyControllingDriverDone( void ) PM_ASSERT_IN_GATE(); param = (DriverCallParam *) fDriverCallParamPtr; - assert( fDriverCallBusy == false ); + assert( fLockedFlags.DriverCallBusy == false ); assert( fMachineState == kIOPM_DriverThreadCallDone ); if (param) @@ -4343,7 +4501,7 @@ void IOService::notifyControllingDriverDone( void ) } //********************************************************************************* -// [public virtual] askChangeDown +// [public] askChangeDown // // Ask registered applications and kernel clients if we can change to a lower // power state. @@ -4360,7 +4518,7 @@ bool IOService::askChangeDown ( unsigned long stateNum ) } //********************************************************************************* -// [public] tellChangeDown1 +// [private] tellChangeDown1 // // Notify registered applications and kernel clients that we are definitely // dropping power. @@ -4375,7 +4533,7 @@ bool IOService::tellChangeDown1 ( unsigned long stateNum ) } //********************************************************************************* -// [public] tellChangeDown2 +// [private] tellChangeDown2 // // Notify priority clients that we are definitely dropping power. // @@ -4389,7 +4547,7 @@ bool IOService::tellChangeDown2 ( unsigned long stateNum ) } //********************************************************************************* -// [public virtual] tellChangeDown +// [public] tellChangeDown // // Notify registered applications and kernel clients that we are definitely // dropping power. @@ -4410,61 +4568,82 @@ bool IOService::tellChangeDown ( unsigned long stateNum ) // //********************************************************************************* -static void logAppTimeouts ( OSObject * object, void * context) +static void logAppTimeouts ( OSObject * object, void * arg ) { - struct context *theContext = (struct context *)context; - OSObject *flag; + IOPMInterestContext * context = (IOPMInterestContext *) arg; + OSObject * flag; + unsigned int clientIndex; - if( !OSDynamicCast( IOService, object) ) { - flag = theContext->responseFlags->getObject(theContext->counter); - if (kOSBooleanTrue != flag) + if (OSDynamicCast(_IOServiceInterestNotifier, object)) + { + // Discover the 'counter' value or index assigned to this client + // when it was notified, by searching for the array index of the + // client in an array holding the cached interested clients. + + clientIndex = context->notifyClients->getNextIndexOfObject(object, 0); + + if ((clientIndex != (unsigned int) -1) && + (flag = context->responseFlags->getObject(clientIndex)) && + (flag != kOSBooleanTrue)) { OSString * clientID = 0; - theContext->us->messageClient(theContext->msgType, object, &clientID); - PM_ERROR(theContext->errorLog, clientID ? clientID->getCStringNoCopy() : ""); + context->us->messageClient(context->msgType, object, &clientID); + PM_ERROR(context->errorLog, clientID ? clientID->getCStringNoCopy() : ""); + + // TODO: record message type if possible + IOService::getPMRootDomain()->pmStatsRecordApplicationResponse( + gIOPMStatsApplicationResponseTimedOut, + clientID ? clientID->getCStringNoCopy() : "", + 0, (30*1000), -1); + if (clientID) clientID->release(); } - theContext->counter += 1; } } void IOService::cleanClientResponses ( bool logErrors ) { - struct context theContext; + IOPMInterestContext context; - if (logErrors && fResponseArray) { - theContext.responseFlags = fResponseArray; - theContext.serialNumber = fSerialNumber; - theContext.counter = 0; - theContext.msgType = kIOMessageCopyClientID; - theContext.us = this; - theContext.maxTimeRequested = 0; - theContext.stateNumber = fHeadNoteState; - theContext.stateFlags = fHeadNoteCapabilityFlags; - theContext.errorLog = "PM notification timeout (%s)\n"; + if (logErrors && fResponseArray && fNotifyClientArray) { + context.responseFlags = fResponseArray; + context.notifyClients = fNotifyClientArray; + context.serialNumber = fSerialNumber; + context.counter = 0; + context.msgType = kIOMessageCopyClientID; + context.us = this; + context.maxTimeRequested = 0; + context.stateNumber = fHeadNotePowerState; + context.stateFlags = fHeadNotePowerArrayEntry->capabilityFlags; + context.errorLog = "PM notification timeout (%s)\n"; switch ( fOutOfBandParameter ) { case kNotifyApps: - applyToInterested(gIOAppPowerStateInterest, logAppTimeouts, (void *) &theContext); + applyToInterested(gIOAppPowerStateInterest, logAppTimeouts, (void *) &context); case kNotifyPriority: default: break; } } - if (fResponseArray) + if (fResponseArray) { // get rid of this stuff fResponseArray->release(); fResponseArray = NULL; } + if (fNotifyClientArray) + { + fNotifyClientArray->release(); + fNotifyClientArray = NULL; + } return; } //********************************************************************************* -// [public] tellClientsWithResponse +// [protected] tellClientsWithResponse // // Notify registered applications and kernel clients that we are definitely // dropping power. @@ -4472,50 +4651,77 @@ void IOService::cleanClientResponses ( bool logErrors ) // Return true if we don't have to wait for acknowledgements //********************************************************************************* -bool IOService::tellClientsWithResponse ( int messageType ) +bool IOService::tellClientsWithResponse ( + int messageType ) +{ + return tellClientsWithResponse( messageType, 0 ); +} + +bool IOService::tellClientsWithResponse ( + int messageType, + IOPMMessageFilter filter ) { - struct context theContext; + IOPMInterestContext context; PM_ASSERT_IN_GATE(); + assert( fResponseArray == NULL ); + assert( fNotifyClientArray == NULL ); fResponseArray = OSArray::withCapacity( 1 ); + if (!fResponseArray) + goto exit; + + fResponseArray->setCapacityIncrement(8); fSerialNumber += 1; - - theContext.responseFlags = fResponseArray; - theContext.serialNumber = fSerialNumber; - theContext.counter = 0; - theContext.msgType = messageType; - theContext.us = this; - theContext.maxTimeRequested = 0; - theContext.stateNumber = fHeadNoteState; - theContext.stateFlags = fHeadNoteCapabilityFlags; + + context.responseFlags = fResponseArray; + context.notifyClients = 0; + context.serialNumber = fSerialNumber; + context.counter = 0; + context.msgType = messageType; + context.us = this; + context.maxTimeRequested = 0; + context.stateNumber = fHeadNotePowerState; + context.stateFlags = fHeadNotePowerArrayEntry->capabilityFlags; + context.filterFunc = filter; switch ( fOutOfBandParameter ) { case kNotifyApps: - applyToInterested(gIOAppPowerStateInterest, - pmTellAppWithResponse, (void *)&theContext); - applyToInterested(gIOGeneralInterest, - pmTellClientWithResponse, (void *)&theContext); + applyToInterested( gIOAppPowerStateInterest, + pmTellAppWithResponse, (void *) &context ); + fNotifyClientArray = context.notifyClients; + + applyToInterested( gIOGeneralInterest, + pmTellClientWithResponse, (void *) &context ); break; + case kNotifyPriority: - applyToInterested(gIOPriorityPowerStateInterest, - pmTellClientWithResponse, (void *)&theContext); + applyToInterested( gIOPriorityPowerStateInterest, + pmTellClientWithResponse, (void *) &context ); break; } - + // do we have to wait for somebody? if ( !checkForDone() ) { - OUR_PMLog(kPMLogStartAckTimer,theContext.maxTimeRequested, 0); - start_ack_timer( theContext.maxTimeRequested / 1000, kMillisecondScale ); + OUR_PMLog(kPMLogStartAckTimer, context.maxTimeRequested, 0); + start_ack_timer( context.maxTimeRequested / 1000, kMillisecondScale ); return false; } +exit: // everybody responded - fResponseArray->release(); - fResponseArray = NULL; - // cleanClientResponses(false); - + if (fResponseArray) + { + fResponseArray->release(); + fResponseArray = NULL; + } + if (fNotifyClientArray) + { + fNotifyClientArray->release(); + fNotifyClientArray = NULL; + } + return true; } @@ -4526,24 +4732,49 @@ bool IOService::tellClientsWithResponse ( int messageType ) // cookie we can identify the response with. //********************************************************************************* -void IOService::pmTellAppWithResponse ( OSObject * object, void * context ) +void IOService::pmTellAppWithResponse ( OSObject * object, void * arg ) { - struct context * theContext = (struct context *) context; - IOServicePM * pwrMgt = theContext->us->pwrMgt; - AbsoluteTime now; + IOPMInterestContext * context = (IOPMInterestContext *) arg; + IOServicePM * pwrMgt = context->us->pwrMgt; + AbsoluteTime now; + UInt32 refcon; - if( OSDynamicCast( IOService, object) ) + if (!OSDynamicCast(_IOServiceInterestNotifier, object)) { - // Automatically 'ack' in kernel clients - theContext->responseFlags->setObject(theContext->counter, kOSBooleanTrue); + // object must be an _IOServiceInterestNotifier. + return; + } - const char *who = ((IOService *) object)->getName(); - fPlatform->PMLog(who, - kPMLogClientAcknowledge, theContext->msgType, * (UInt32 *) object); - } else { - UInt32 refcon = ((theContext->serialNumber & 0xFFFF)<<16) - + (theContext->counter & 0xFFFF); - OUR_PMLog(kPMLogAppNotify, theContext->msgType, refcon); + // Lazily create app clients array. + if (0 == context->notifyClients) + { + context->notifyClients = OSArray::withCapacity( 32 ); + } + + if (context->filterFunc && !context->filterFunc(object, arg)) + { + // ack - needed to match the counter index at logAppTimeouts(). + context->responseFlags->setObject(context->counter, kOSBooleanTrue); + if (context->notifyClients) + context->notifyClients->setObject(context->counter, kOSBooleanTrue); + } + else + { + refcon = ((context->serialNumber & 0xFFFF)<<16) + + (context->counter & 0xFFFF); + OUR_PMLog(kPMLogAppNotify, context->msgType, refcon); + + if (gIOKitDebug & kIOLogDebugPower) + { + // Log client pid/name and associated index. + OSString * clientID = 0; + context->us->messageClient(kIOMessageCopyClientID, object, &clientID); + PM_DEBUG("[Notify %u] message 0x%x to %s\n", + (uint32_t) context->counter, + context->msgType, + clientID ? clientID->getCStringNoCopy() : ""); + if (clientID) clientID->release(); + } #if LOG_APP_RESPONSE_TIMES OSNumber * num; @@ -4551,21 +4782,24 @@ void IOService::pmTellAppWithResponse ( OSObject * object, void * context ) num = OSNumber::withNumber(AbsoluteTime_to_scalar(&now), sizeof(uint64_t) * 8); if (num) { - theContext->responseFlags->setObject(theContext->counter, num); + context->responseFlags->setObject(context->counter, num); num->release(); } else #endif - theContext->responseFlags->setObject(theContext->counter, kOSBooleanFalse); + context->responseFlags->setObject(context->counter, kOSBooleanFalse); + + if (context->notifyClients) + context->notifyClients->setObject(context->counter, object); - theContext->us->messageClient(theContext->msgType, object, (void *)refcon); - if ( theContext->maxTimeRequested < k30seconds ) + context->us->messageClient(context->msgType, object, (void *)refcon); + if ( context->maxTimeRequested < k30seconds ) { - theContext->maxTimeRequested = k30seconds; + context->maxTimeRequested = k30seconds; } - - theContext->counter += 1; } + + context->counter++; } //********************************************************************************* @@ -4579,51 +4813,54 @@ void IOService::pmTellAppWithResponse ( OSObject * object, void * context ) // If it tells us via the return code in the struct that it does need time, we will chill. //********************************************************************************* -void IOService::pmTellClientWithResponse ( OSObject * object, void * context ) +void IOService::pmTellClientWithResponse ( OSObject * object, void * arg ) { - struct context *theContext = (struct context *)context; - IOPowerStateChangeNotification notify; - UInt32 refcon; - IOReturn retCode; - OSObject *theFlag; + IOPMInterestContext * context = (IOPMInterestContext *) arg; + IOPowerStateChangeNotification notify; + UInt32 refcon; + IOReturn retCode; + OSObject * theFlag; - refcon = ((theContext->serialNumber & 0xFFFF)<<16) + (theContext->counter & 0xFFFF); - theContext->responseFlags->setObject(theContext->counter, kOSBooleanFalse); + if (context->filterFunc && !context->filterFunc(object, arg)) + return; - IOServicePM * pwrMgt = theContext->us->pwrMgt; + refcon = ((context->serialNumber & 0xFFFF)<<16) + (context->counter & 0xFFFF); + context->responseFlags->setObject(context->counter, kOSBooleanFalse); + + IOServicePM * pwrMgt = context->us->pwrMgt; if (gIOKitDebug & kIOLogPower) { - OUR_PMLog(kPMLogClientNotify, refcon, (UInt32) theContext->msgType); + OUR_PMLog(kPMLogClientNotify, refcon, (UInt32) context->msgType); if (OSDynamicCast(IOService, object)) { const char *who = ((IOService *) object)->getName(); - fPlatform->PMLog(who, - kPMLogClientNotify, * (UInt32 *) object, (UInt32) object); + gPlatform->PMLog(who, + kPMLogClientNotify, * (UInt32 *) object, (UInt64) object); } else if (OSDynamicCast(_IOServiceInterestNotifier, object)) { _IOServiceInterestNotifier *n = (_IOServiceInterestNotifier *) object; - OUR_PMLog(kPMLogClientNotify, (UInt32) n->handler, 0); + OUR_PMLog(kPMLogClientNotify, (UInt64) n->handler, 0); } } notify.powerRef = (void *)refcon; notify.returnValue = 0; - notify.stateNumber = theContext->stateNumber; - notify.stateFlags = theContext->stateFlags; - retCode = theContext->us->messageClient(theContext->msgType,object,(void *)¬ify); + notify.stateNumber = context->stateNumber; + notify.stateFlags = context->stateFlags; + retCode = context->us->messageClient(context->msgType,object,(void *)¬ify); if ( retCode == kIOReturnSuccess ) { if ( notify.returnValue == 0 ) { // client doesn't want time to respond - theContext->responseFlags->replaceObject(theContext->counter, kOSBooleanTrue); - OUR_PMLog(kPMLogClientAcknowledge, refcon, (UInt32) object); + context->responseFlags->replaceObject(context->counter, kOSBooleanTrue); + OUR_PMLog(kPMLogClientAcknowledge, refcon, (UInt64) object); } else { // it does want time, and it hasn't responded yet - theFlag = theContext->responseFlags->getObject(theContext->counter); + theFlag = context->responseFlags->getObject(context->counter); if ( kOSBooleanTrue != theFlag ) { // so note its time requirement - if ( theContext->maxTimeRequested < notify.returnValue ) + if ( context->maxTimeRequested < notify.returnValue ) { - theContext->maxTimeRequested = notify.returnValue; + context->maxTimeRequested = notify.returnValue; } } } @@ -4631,13 +4868,13 @@ void IOService::pmTellClientWithResponse ( OSObject * object, void * context ) OUR_PMLog(kPMLogClientAcknowledge, refcon, 0); // not a client of ours // so we won't be waiting for response - theContext->responseFlags->replaceObject(theContext->counter, kOSBooleanTrue); + context->responseFlags->replaceObject(context->counter, kOSBooleanTrue); } - theContext->counter += 1; + context->counter++; } //********************************************************************************* -// [public virtual] tellNoChangeDown +// [public] tellNoChangeDown // // Notify registered applications and kernel clients that we are not // dropping power. @@ -4652,7 +4889,7 @@ void IOService::tellNoChangeDown ( unsigned long ) } //********************************************************************************* -// [public virtual] tellChangeUp +// [public] tellChangeUp // // Notify registered applications and kernel clients that we are raising power. // @@ -4666,42 +4903,72 @@ void IOService::tellChangeUp ( unsigned long ) } //********************************************************************************* -// [public] tellClients +// [protected] tellClients // // Notify registered applications and kernel clients of something. //********************************************************************************* void IOService::tellClients ( int messageType ) { - struct context theContext; + tellClients( messageType, 0 ); +} + +void IOService::tellClients ( int messageType, IOPMMessageFilter filter ) +{ + IOPMInterestContext context; - theContext.msgType = messageType; - theContext.us = this; - theContext.stateNumber = fHeadNoteState; - theContext.stateFlags = fHeadNoteCapabilityFlags; + context.msgType = messageType; + context.us = this; + context.stateNumber = fHeadNotePowerState; + context.stateFlags = fHeadNotePowerArrayEntry->capabilityFlags; + context.filterFunc = filter; - applyToInterested(gIOPriorityPowerStateInterest,tellClient,(void *)&theContext); - applyToInterested(gIOAppPowerStateInterest,tellClient, (void *)&theContext); - applyToInterested(gIOGeneralInterest,tellClient, (void *)&theContext); + applyToInterested( gIOPriorityPowerStateInterest, + tellKernelClientApplier, (void *) &context ); + + applyToInterested( gIOAppPowerStateInterest, + tellAppClientApplier, (void *) &context ); + + applyToInterested( gIOGeneralInterest, + tellKernelClientApplier, (void *) &context ); } //********************************************************************************* -// [global] tellClient +// [private] tellKernelClientApplier // -// Notify a registered application or kernel client of something. +// Message a kernel client. //********************************************************************************* -void tellClient ( OSObject * object, void * context ) +static void tellKernelClientApplier ( OSObject * object, void * arg ) { - struct context * theContext = (struct context *) context; + IOPMInterestContext * context = (IOPMInterestContext *) arg; IOPowerStateChangeNotification notify; - notify.powerRef = (void *) 0; + if (context->filterFunc && !context->filterFunc(object, arg)) + return; + + notify.powerRef = (void *) 0; notify.returnValue = 0; - notify.stateNumber = theContext->stateNumber; - notify.stateFlags = theContext->stateFlags; + notify.stateNumber = context->stateNumber; + notify.stateFlags = context->stateFlags; + + context->us->messageClient(context->msgType, object, ¬ify); +} + +//********************************************************************************* +// [private] tellAppClientApplier +// +// Message a registered application. +//********************************************************************************* + +static void tellAppClientApplier ( OSObject * object, void * arg ) +{ + IOPMInterestContext * context = (IOPMInterestContext *) arg; + + if (context->filterFunc && !context->filterFunc(object, arg)) + return; - theContext->us->messageClient(theContext->msgType, object, ¬ify); + context->us->messageClient(context->msgType, object, 0); } //********************************************************************************* @@ -4783,6 +5050,16 @@ bool IOService::responseValid ( unsigned long x, int pid ) OSString * name = IOCopyLogNameForPID(pid); PM_DEBUG("PM response took %d ms (%s)\n", NS_TO_MS(nsec), name ? name->getCStringNoCopy() : ""); + + if (nsec > LOG_APP_RESPONSE_MSG_TRACER) + { + // TODO: populate the messageType argument + getPMRootDomain()->pmStatsRecordApplicationResponse( + gIOPMStatsApplicationResponseSlow, + name ? name->getCStringNoCopy() : "", 0, + NS_TO_MS(nsec), pid); + } + if (name) name->release(); } @@ -4792,6 +5069,11 @@ bool IOService::responseValid ( unsigned long x, int pid ) if ( kOSBooleanFalse == theFlag ) { + if ((gIOKitDebug & kIOLogDebugPower) && + (fOutOfBandParameter == kNotifyApps)) + { + PM_DEBUG("[Notify %u] acked\n", (uint32_t) ordinalComponent); + } fResponseArray->replaceObject(ordinalComponent, kOSBooleanTrue); } @@ -4799,7 +5081,7 @@ bool IOService::responseValid ( unsigned long x, int pid ) } //********************************************************************************* -// [public virtual] allowPowerChange +// [public] allowPowerChange // // Our power state is about to lower, and we have notified applications // and kernel clients, and one of them has acknowledged. If this is the last to do @@ -4821,32 +5103,26 @@ IOReturn IOService::allowPowerChange ( unsigned long refcon ) request = acquirePMRequest( this, kIOPMRequestTypeAllowPowerChange ); if (!request) - { - PM_ERROR("%s::%s no memory\n", getName(), __FUNCTION__); return kIOReturnNoMemory; - } request->fArg0 = (void *) refcon; request->fArg1 = (void *) proc_selfpid(); + request->fArg2 = (void *) 0; submitPMRequest( request ); return kIOReturnSuccess; } -IOReturn serializedAllowPowerChange ( OSObject *owner, void * refcon, void *, void *, void *) -{ - // [deprecated] public - return kIOReturnUnsupported; -} - +#ifndef __LP64__ IOReturn IOService::serializedAllowPowerChange2 ( unsigned long refcon ) { // [deprecated] public return kIOReturnUnsupported; } +#endif /* !__LP64__ */ //********************************************************************************* -// [public virtual] cancelPowerChange +// [public] cancelPowerChange // // Our power state is about to lower, and we have notified applications // and kernel clients, and one of them has vetoed the change. If this is the last @@ -4858,7 +5134,8 @@ IOReturn IOService::serializedAllowPowerChange2 ( unsigned long refcon ) IOReturn IOService::cancelPowerChange ( unsigned long refcon ) { - IOPMRequest * request; + IOPMRequest * request; + OSString * name; if ( !initialized ) { @@ -4866,51 +5143,32 @@ IOReturn IOService::cancelPowerChange ( unsigned long refcon ) return kIOReturnSuccess; } - OSString * name = IOCopyLogNameForPID(proc_selfpid()); + name = IOCopyLogNameForPID(proc_selfpid()); PM_ERROR("PM notification cancel (%s)\n", name ? name->getCStringNoCopy() : ""); - if (name) - name->release(); request = acquirePMRequest( this, kIOPMRequestTypeCancelPowerChange ); if (!request) - { - PM_ERROR("%s::%s no memory\n", getName(), __FUNCTION__); - return kIOReturnNoMemory; - } - - request->fArg0 = (void *) refcon; - request->fArg1 = (void *) proc_selfpid(); - submitPMRequest( request ); + { + if (name) + name->release(); + return kIOReturnNoMemory; + } - return kIOReturnSuccess; -} + request->fArg0 = (void *) refcon; + request->fArg1 = (void *) proc_selfpid(); + request->fArg2 = (void *) name; + submitPMRequest( request ); -IOReturn serializedCancelPowerChange ( OSObject *owner, void * refcon, void *, void *, void *) -{ - // [deprecated] public - return kIOReturnUnsupported; + return kIOReturnSuccess; } +#ifndef __LP64__ IOReturn IOService::serializedCancelPowerChange2 ( unsigned long refcon ) { // [deprecated] public return kIOReturnUnsupported; } -#if 0 -//********************************************************************************* -// c_PM_clamp_Timer_Expired (C Func) -// -// Called when our clamp timer expires...we will call the object method. -//********************************************************************************* - -static void c_PM_Clamp_Timer_Expired ( OSObject * client, IOTimerEventSource * ) -{ - if (client) - ((IOService *)client)->PM_Clamp_Timer_Expired (); -} -#endif - //********************************************************************************* // PM_Clamp_Timer_Expired // @@ -4919,15 +5177,6 @@ static void c_PM_Clamp_Timer_Expired ( OSObject * client, IOTimerEventSource * ) void IOService::PM_Clamp_Timer_Expired ( void ) { -#if 0 - if ( ! initialized ) - { - // we're unloading - return; - } - - changePowerStateToPriv (0); -#endif } //********************************************************************************* @@ -4936,31 +5185,13 @@ void IOService::PM_Clamp_Timer_Expired ( void ) // Set to highest available power state for a minimum of duration milliseconds //********************************************************************************* -#define kFiveMinutesInNanoSeconds (300 * NSEC_PER_SEC) - void IOService::clampPowerOn ( unsigned long duration ) { -#if 0 - changePowerStateToPriv (fNumberOfPowerStates-1); - - if ( pwrMgt->clampTimerEventSrc == NULL ) { - pwrMgt->clampTimerEventSrc = IOTimerEventSource::timerEventSource(this, - c_PM_Clamp_Timer_Expired); - - IOWorkLoop * workLoop = getPMworkloop (); - - if ( !pwrMgt->clampTimerEventSrc || !workLoop || - ( workLoop->addEventSource( pwrMgt->clampTimerEventSrc) != kIOReturnSuccess) ) { - - } - } - - pwrMgt->clampTimerEventSrc->setTimeout(300*USEC_PER_SEC, USEC_PER_SEC); -#endif } +#endif /* !__LP64__ */ //********************************************************************************* -// [public virtual] setPowerState +// [public] setPowerState // // Does nothing here. This should be implemented in a subclass driver. //********************************************************************************* @@ -4972,7 +5203,7 @@ IOReturn IOService::setPowerState ( } //********************************************************************************* -// [public virtual] maxCapabilityForDomainState +// [public] maxCapabilityForDomainState // // Finds the highest power state in the array whose input power // requirement is equal to the input parameter. Where a more intelligent @@ -4990,7 +5221,7 @@ unsigned long IOService::maxCapabilityForDomainState ( IOPMPowerFlags domainStat for ( i = fNumberOfPowerStates - 1; i >= 0; i-- ) { if ( (domainState & fPowerStates[i].inputPowerRequirement) == - fPowerStates[i].inputPowerRequirement ) + fPowerStates[i].inputPowerRequirement ) { return i; } @@ -4999,7 +5230,7 @@ unsigned long IOService::maxCapabilityForDomainState ( IOPMPowerFlags domainStat } //********************************************************************************* -// [public virtual] initialPowerStateForDomainState +// [public] initialPowerStateForDomainState // // Finds the highest power state in the array whose input power // requirement is equal to the input parameter. Where a more intelligent @@ -5026,7 +5257,7 @@ unsigned long IOService::initialPowerStateForDomainState ( IOPMPowerFlags domain } //********************************************************************************* -// [public virtual] powerStateForDomainState +// [public] powerStateForDomainState // // Finds the highest power state in the array whose input power // requirement is equal to the input parameter. Where a more intelligent @@ -5052,19 +5283,21 @@ unsigned long IOService::powerStateForDomainState ( IOPMPowerFlags domainState ) return 0; } +#ifndef __LP64__ //********************************************************************************* -// [public virtual] didYouWakeSystem +// [deprecated] didYouWakeSystem // // Does nothing here. This should be implemented in a subclass driver. //********************************************************************************* -bool IOService::didYouWakeSystem ( void ) +bool IOService::didYouWakeSystem ( void ) { return false; } +#endif /* !__LP64__ */ //********************************************************************************* -// [public virtual] powerStateWillChangeTo +// [public] powerStateWillChangeTo // // Does nothing here. This should be implemented in a subclass driver. //********************************************************************************* @@ -5075,7 +5308,7 @@ IOReturn IOService::powerStateWillChangeTo ( IOPMPowerFlags, unsigned long, IOSe } //********************************************************************************* -// [public virtual] powerStateDidChangeTo +// [public] powerStateDidChangeTo // // Does nothing here. This should be implemented in a subclass driver. //********************************************************************************* @@ -5086,7 +5319,7 @@ IOReturn IOService::powerStateDidChangeTo ( IOPMPowerFlags, unsigned long, IOSer } //********************************************************************************* -// [public virtual] powerChangeDone +// [protected] powerChangeDone // // Called from PM work loop thread. // Does nothing here. This should be implemented in a subclass policy-maker. @@ -5096,8 +5329,9 @@ void IOService::powerChangeDone ( unsigned long ) { } +#ifndef __LP64__ //********************************************************************************* -// [public virtual] newTemperature +// [deprecated] newTemperature // // Does nothing here. This should be implemented in a subclass driver. //********************************************************************************* @@ -5106,9 +5340,10 @@ IOReturn IOService::newTemperature ( long currentTemp, IOService * whichZone ) { return IOPMNoErr; } +#endif /* !__LP64__ */ //********************************************************************************* -// [public virtual] systemWillShutdown +// [public] systemWillShutdown // // System shutdown and restart notification. //********************************************************************************* @@ -5125,7 +5360,8 @@ void IOService::systemWillShutdown( IOOptionBits specifier ) //********************************************************************************* IOPMRequest * -IOService::acquirePMRequest( IOService * target, IOOptionBits requestType ) +IOService::acquirePMRequest( IOService * target, IOOptionBits requestType, + IOPMRequest * active ) { IOPMRequest * request; @@ -5135,6 +5371,16 @@ IOService::acquirePMRequest( IOService * target, IOOptionBits requestType ) if (request) { request->init( target, requestType ); + if (active) + { + IOPMRequest * root = active->getRootRequest(); + if (root) request->attachRootRequest(root); + } + } + else + { + PM_ERROR("%s: No memory for PM request type 0x%x\n", + target->getName(), (uint32_t) requestType); } return request; } @@ -5163,11 +5409,11 @@ void IOService::submitPMRequest( IOPMRequest * request ) assert( gIOPMRequestQueue ); PM_TRACE("[+ %02lx] %p [%p %s] %p %p %p\n", - request->getType(), request, + (long)request->getType(), request, request->getTarget(), request->getTarget()->getName(), request->fArg0, request->fArg1, request->fArg2); - if (request->isReply()) + if (request->isReplyType()) gIOPMReplyQueue->queuePMRequest( request ); else gIOPMRequestQueue->queuePMRequest( request ); @@ -5183,7 +5429,7 @@ void IOService::submitPMRequest( IOPMRequest ** requests, IOItemCount count ) { IOPMRequest * req = requests[i]; PM_TRACE("[+ %02lx] %p [%p %s] %p %p %p\n", - req->getType(), req, + (long)req->getType(), req, req->getTarget(), req->getTarget()->getName(), req->fArg0, req->fArg1, req->fArg2); } @@ -5203,8 +5449,8 @@ bool IOService::servicePMRequestQueue( if (!initialized) { - PM_DEBUG("[%s] %s: PM not initialized\n", getName(), __FUNCTION__); - goto done; + PM_DEBUG("%s: PM not initialized\n", getName()); + goto done; } // Create an IOPMWorkQueue on demand, when the initial PM request is @@ -5227,16 +5473,15 @@ bool IOService::servicePMRequestQueue( if (fPMWorkQueue && (gIOPMWorkLoop->addEventSource(fPMWorkQueue) != kIOReturnSuccess)) { - PM_ERROR("[%s] %s: addEventSource failed\n", - getName(), __FUNCTION__); + PM_ERROR("%s: add PM work queue failed\n", getName()); fPMWorkQueue->release(); fPMWorkQueue = 0; } if (!fPMWorkQueue) { - PM_ERROR("[%s] %s: not ready (type %02lx)\n", - getName(), __FUNCTION__, request->getType()); + PM_ERROR("%s: no PM work queue (type %02lx)\n", + getName(), (long)request->getType()); goto done; } } @@ -5245,21 +5490,27 @@ bool IOService::servicePMRequestQueue( return false; // do not signal more done: - gIOPMFreeQueue->queuePMRequest( request ); + fAdjustPowerScheduled = false; + gIOPMFreeQueue->queuePMRequest(request); return false; // do not signal more } //********************************************************************************* // [private] servicePMFreeQueue // -// Called by IOPMFreeQueue to recycle a completed request. +// Called by the request completion to recycle a completed request. //********************************************************************************* bool IOService::servicePMFreeQueue( - IOPMRequest * request, - IOPMRequestQueue * queue ) + IOPMRequest * request, + IOPMCompletionQueue * queue ) { - bool more = request->hasParentRequest(); + bool more = request->getNextRequest(); + IOPMRequest * root = request->getRootRequest(); + + if (root && (root != request)) + more = true; + releasePMRequest( request ); return more; } @@ -5274,11 +5525,11 @@ bool IOService::retirePMRequest( IOPMRequest * request, IOPMWorkQueue * queue ) { assert(request && queue); - PM_TRACE("[- %02lx] %p [%p %s] State %ld, Busy %ld\n", + PM_TRACE("[- %02x] %p [%p %s] State %d, Busy %d\n", request->getType(), request, this, getName(), fMachineState, gIOPMBusyCount); - // Catch requests created by PM_idle_timer_expiration(). + // Catch requests created by idleTimerExpired(). if ((request->getType() == kIOPMRequestTypeActivityTickle) && (request->fArg1 == (void *) false)) @@ -5288,9 +5539,13 @@ bool IOService::retirePMRequest( IOPMRequest * request, IOPMWorkQueue * queue ) // a flag so we know to restart idle timer when deviceDesire goes up. if (fDeviceDesire > 0) + { + fActivityTickleCount = 0; + clock_get_uptime(&fIdleTimerStartTime); start_PM_idle_timer(); + } else - fActivityTimerStopped = true; + fIdleTimerStopped = true; } gIOPMFreeQueue->queuePMRequest( request ); @@ -5316,7 +5571,7 @@ bool IOService::isPMBlocked ( IOPMRequest * request, int count ) // 5 = kDriverCallInformPreChange // 6 = kDriverCallInformPostChange // 7 = kDriverCallSetPowerState - if (fDriverCallBusy) reason = 5 + fDriverCallReason; + if (fLockedFlags.DriverCallBusy) reason = 5 + fDriverCallReason; break; } @@ -5351,7 +5606,7 @@ bool IOService::isPMBlocked ( IOPMRequest * request, int count ) { if (count) { - PM_TRACE("[B %02lx] %p [%p %s] State %ld, Reason %d\n", + PM_TRACE("[B %02x] %p [%p %s] State %d, Reason %d\n", request->getType(), request, this, getName(), fMachineState, reason); } @@ -5377,10 +5632,10 @@ bool IOService::servicePMRequest( IOPMRequest * request, IOPMWorkQueue * queue ) while (isPMBlocked(request, loop++) == false) { - PM_TRACE("[W %02lx] %p [%p %s] State %ld\n", + PM_TRACE("[W %02x] %p [%p %s] State %d\n", request->getType(), request, this, getName(), fMachineState); - fPMRequest = request; + gIOPMRequest = request; // Every PM machine states must be handled in one of the cases below. @@ -5402,9 +5657,9 @@ bool IOService::servicePMRequest( IOPMRequest * request, IOPMWorkQueue * queue ) OUR_PMLog(kPMLogIdleCancel, (uintptr_t) this, fMachineState); PM_ERROR("%s: idle cancel\n", fName); // yes, rescind the warning - tellNoChangeDown(fHeadNoteState); + tellNoChangeDown(fHeadNotePowerState); // mark the change note un-actioned - fHeadNoteFlags |= IOPMNotDone; + fHeadNoteFlags |= kIOPMNotDone; // and we're done all_done(); } @@ -5412,7 +5667,6 @@ bool IOService::servicePMRequest( IOPMRequest * request, IOPMWorkQueue * queue ) case kIOPM_OurChangeTellPriorityClientsPowerDown: // our change, should it be acted on still? -#if SUPPORT_IDLE_CANCEL if (fDoNotPowerDown) { OUR_PMLog(kPMLogIdleCancel, (uintptr_t) this, fMachineState); @@ -5420,12 +5674,11 @@ bool IOService::servicePMRequest( IOPMRequest * request, IOPMWorkQueue * queue ) // no, tell clients we're back in the old state tellChangeUp(fCurrentPowerState); // mark the change note un-actioned - fHeadNoteFlags |= IOPMNotDone; + fHeadNoteFlags |= kIOPMNotDone; // and we're done all_done(); } else -#endif { // yes, we can continue OurChangeTellPriorityClientsPowerDown(); @@ -5472,8 +5725,8 @@ bool IOService::servicePMRequest( IOPMRequest * request, IOPMWorkQueue * queue ) ParentDownWaitForPowerSettle(); break; - case kIOPM_ParentDownAcknowledgeChange: - ParentDownAcknowledgeChange(); + case kIOPM_ParentAcknowledgePowerChange: + ParentAcknowledgePowerChange(); break; case kIOPM_ParentUpSetPowerState: @@ -5488,10 +5741,6 @@ bool IOService::servicePMRequest( IOPMRequest * request, IOPMWorkQueue * queue ) ParentUpNotifyInterestedDriversDidChange(); break; - case kIOPM_ParentUpAcknowledgePowerChange: - ParentUpAcknowledgePowerChange(); - break; - case kIOPM_DriverThreadCallDone: if (fDriverCallReason == kDriverCallSetPowerState) notifyControllingDriverDone(); @@ -5503,11 +5752,25 @@ bool IOService::servicePMRequest( IOPMRequest * request, IOPMWorkQueue * queue ) notifyChildrenDone(); break; + case kIOPM_SyncNotifyDidChange: + fMachineState = kIOPM_SyncFinish; + fDriverCallReason = kDriverCallInformPostChange; + notifyChildren(); + break; + + case kIOPM_SyncFinish: + if (fHeadNoteFlags & kIOPMParentInitiated) + ParentAcknowledgePowerChange(); + else + OurChangeFinish(); + break; + default: - IOPanic("servicePMWorkQueue: unknown machine state"); + panic("servicePMWorkQueue: unknown machine state %x", + fMachineState); } - fPMRequest = 0; + gIOPMRequest = 0; if (fMachineState == kIOPM_Finished) { @@ -5552,18 +5815,11 @@ void IOService::executePMRequest( IOPMRequest * request ) break; case kIOPMRequestTypeAdjustPowerState: + fAdjustPowerScheduled = false; + rebuildChildClampBits(); adjustPowerState(); break; - case kIOPMRequestTypeMakeUsable: - handleMakeUsable( request ); - break; - - case kIOPMRequestTypeTemporaryPowerClamp: - fClampOn = true; - handleMakeUsable( request ); - break; - case kIOPMRequestTypePowerDomainWillChange: handlePowerDomainWillChangeTo( request ); break; @@ -5572,12 +5828,9 @@ void IOService::executePMRequest( IOPMRequest * request ) handlePowerDomainDidChangeTo( request ); break; - case kIOPMRequestTypeChangePowerStateTo: - handleChangePowerStateTo( request ); - break; - - case kIOPMRequestTypeChangePowerStateToPriv: - handleChangePowerStateToPriv( request ); + case kIOPMRequestTypeRequestPowerState: + case kIOPMRequestTypeRequestPowerStateOverride: + handleRequestPowerState( request ); break; case kIOPMRequestTypePowerOverrideOnPriv: @@ -5586,42 +5839,47 @@ void IOService::executePMRequest( IOPMRequest * request ) break; case kIOPMRequestTypeActivityTickle: - if (request) - { - bool setDeviceDesire = false; + handleActivityTickle( request ); + break; - if (request->fArg1) - { - // Power rise from activity tickle. - unsigned long ticklePowerState = (unsigned long) request->fArg0; - if ((fDeviceDesire < ticklePowerState) && - (ticklePowerState < fNumberOfPowerStates)) + case kIOPMRequestTypeSynchronizePowerTree: + handleSynchronizePowerTree( request ); + break; + + case kIOPMRequestTypeSetIdleTimerPeriod: + { + IOWorkLoop * wl = gIOPMWorkLoop; + fIdleTimerPeriod = (uintptr_t) request->fArg0; + + if (wl && (false == fLockedFlags.PMStop) && (fIdleTimerPeriod > 0)) + { + if ( NULL == fIdleTimerEventSource ) { - setDeviceDesire = true; - fIdleTimerMinPowerState = ticklePowerState; - } - } - else if (fDeviceDesire > fIdleTimerMinPowerState) - { - // Power drop from idle timer expiration. - request->fArg0 = (void *) (fDeviceDesire - 1); - setDeviceDesire = true; - } + IOTimerEventSource * timerSrc; + + timerSrc = IOTimerEventSource::timerEventSource( + this, + OSMemberFunctionCast(IOTimerEventSource::Action, + this, &IOService::idleTimerExpired)); + + if (timerSrc && (wl->addEventSource(timerSrc) != kIOReturnSuccess)) + { + timerSrc->release(); + timerSrc = 0; + } - if (setDeviceDesire) - { - // handleChangePowerStateToPriv() does not check the - // request type, as long as the args are appropriate - // for kIOPMRequestTypeChangePowerStateToPriv. + fIdleTimerEventSource = timerSrc; + } - request->fArg1 = (void *) false; - handleChangePowerStateToPriv( request ); - } - } - break; + fActivityTickleCount = 0; + clock_get_uptime(&fIdleTimerStartTime); + start_PM_idle_timer(); + } + } + break; default: - IOPanic("executePMRequest: unknown request type"); + panic("executePMRequest: unknown request type %x", request->getType()); } } @@ -5634,9 +5892,9 @@ bool IOService::servicePMReplyQueue( IOPMRequest * request, IOPMRequestQueue * q bool more = false; assert( request && queue ); - assert( request->isReply() ); + assert( request->isReplyType() ); - PM_TRACE("[A %02lx] %p [%p %s] State %ld\n", + PM_TRACE("[A %02x] %p [%p %s] State %d\n", request->getType(), request, this, getName(), fMachineState); switch ( request->getType() ) @@ -5644,10 +5902,18 @@ bool IOService::servicePMReplyQueue( IOPMRequest * request, IOPMRequestQueue * q case kIOPMRequestTypeAllowPowerChange: case kIOPMRequestTypeCancelPowerChange: // Check if we are expecting this response. - if (responseValid((unsigned long) request->fArg0, (int) request->fArg1)) + if (responseValid((unsigned long) request->fArg0, (int)(long) request->fArg1)) { if (kIOPMRequestTypeCancelPowerChange == request->getType()) + { + OSString * name = (OSString *) request->fArg2; + getPMRootDomain()->pmStatsRecordApplicationResponse( + gIOPMStatsApplicationResponseCancel, + name ? name->getCStringNoCopy() : "", 0, + 0, (int)(uintptr_t) request->fArg1); + fDoNotPowerDown = true; + } if (checkForDone()) { @@ -5657,9 +5923,20 @@ bool IOService::servicePMReplyQueue( IOPMRequest * request, IOPMRequestQueue * q fResponseArray->release(); fResponseArray = NULL; } + if ( fNotifyClientArray ) + { + fNotifyClientArray->release(); + fNotifyClientArray = NULL; + } more = true; } } + // OSString containing app name in Arg2 must be released. + if (request->getType() == kIOPMRequestTypeCancelPowerChange) + { + OSObject * obj = (OSObject *) request->fArg2; + if (obj) obj->release(); + } break; case kIOPMRequestTypeAckPowerChange: @@ -5672,7 +5949,7 @@ bool IOService::servicePMReplyQueue( IOPMRequest * request, IOPMRequestQueue * q // driver acked while setPowerState() call is in-flight. // take this ack, return value from setPowerState() is irrelevant. OUR_PMLog(kPMLogDriverAcknowledgeSet, - (UInt32) this, fDriverTimer); + (uintptr_t) this, fDriverTimer); fDriverTimer = 0; } else if (fDriverTimer > 0) @@ -5684,16 +5961,16 @@ bool IOService::servicePMReplyQueue( IOPMRequest * request, IOPMRequestQueue * q uint64_t nsec = computeTimeDeltaNS(&fDriverCallStartTime); if (nsec > LOG_SETPOWER_TIMES) PM_DEBUG("%s::setPowerState(%p, %lu -> %lu) async took %d ms\n", - fName, this, fCurrentPowerState, fHeadNoteState, NS_TO_MS(nsec)); + fName, this, fCurrentPowerState, fHeadNotePowerState, NS_TO_MS(nsec)); #endif - OUR_PMLog(kPMLogDriverAcknowledgeSet, (UInt32) this, fDriverTimer); + OUR_PMLog(kPMLogDriverAcknowledgeSet, (uintptr_t) this, fDriverTimer); fDriverTimer = 0; more = true; } else { // unexpected ack - OUR_PMLog(kPMLogAcknowledgeErr4, (UInt32) this, 0); + OUR_PMLog(kPMLogAcknowledgeErr4, (uintptr_t) this, 0); } break; @@ -5702,7 +5979,6 @@ bool IOService::servicePMReplyQueue( IOPMRequest * request, IOPMRequestQueue * q more = true; break; -#if SUPPORT_IDLE_CANCEL case kIOPMRequestTypeIdleCancel: if ((fMachineState == kIOPM_OurChangeTellClientsPowerDown) || (fMachineState == kIOPM_OurChangeTellPriorityClientsPowerDown)) @@ -5714,16 +5990,61 @@ bool IOService::servicePMReplyQueue( IOPMRequest * request, IOPMRequestQueue * q more = true; } break; -#endif default: - IOPanic("servicePMReplyQueue: unknown reply type"); + panic("servicePMReplyQueue: unknown reply type %x", + request->getType()); } releasePMRequest( request ); return more; } +//********************************************************************************* +// [private] assertPMThreadCall / deassertPMThreadCall +//********************************************************************************* + +bool IOService::assertPMThreadCall( void ) +{ + if (!initialized) + return false; + + // PMfree() should only be called from IOService::free(). + // That makes it safe to touch IOServicePM state here. + // Caller holds a retain and has checked target is on PM plane. + + PM_LOCK(); + if (fLockedFlags.PMStop) + { + // PMstop() already issued - fail the assertion. + PM_UNLOCK(); + return false; + } + + // Increment assertion count to block PMstop(), and return true. + fThreadAssertionCount++; + fThreadAssertionThread = current_thread(); // only 1 caller + PM_UNLOCK(); + + return true; +} + +void IOService::deassertPMThreadCall( void ) +{ + PM_LOCK(); + assert(fThreadAssertionCount > 0); + if (fThreadAssertionCount) + fThreadAssertionCount--; + if (current_thread() == fThreadAssertionThread) + fThreadAssertionThread = 0; + if ((fThreadAssertionCount == 0) && fLockedFlags.PMStop) + { + // PMstop() is blocked waiting for assertion count to drop to zero. + PM_LOCK_WAKEUP(&fThreadAssertionCount); + } + PM_UNLOCK(); +} + //********************************************************************************* // IOPMRequest Class // @@ -5748,11 +6069,9 @@ bool IOPMRequest::init( IOService * target, IOOptionBits type ) if (!IOCommand::init()) return false; - fType = type; - fTarget = target; - fParent = 0; - fChildCount = 0; - fArg0 = fArg1 = fArg2 = 0; + fType = type; + fTarget = target; + fCompletionStatus = kIOReturnSuccess; if (fTarget) fTarget->retain(); @@ -5762,21 +6081,96 @@ bool IOPMRequest::init( IOService * target, IOOptionBits type ) void IOPMRequest::reset( void ) { - assert( fChildCount == 0 ); + assert( fWorkWaitCount == 0 ); + assert( fFreeWaitCount == 0 ); + + detachNextRequest(); + detachRootRequest(); fType = kIOPMRequestTypeInvalid; - if (fParent) + if (fCompletionAction) { - fParent->fChildCount--; - fParent = 0; - } + fCompletionAction(fCompletionTarget, fCompletionParam, fCompletionStatus); + } if (fTarget) { fTarget->release(); fTarget = 0; - } + } +} + +void IOPMRequest::attachNextRequest( IOPMRequest * next ) +{ + if (!fRequestNext) + { + // Postpone the execution of the next request after + // this request. + fRequestNext = next; + fRequestNext->fWorkWaitCount++; +#if LOG_REQUEST_ATTACH + kprintf("Attached next: %p [0x%x] -> %p [0x%x, %u] %s\n", + this, (uint32_t) fType, fRequestNext, + (uint32_t) fRequestNext->fType, + (uint32_t) fRequestNext->fWorkWaitCount, + fTarget->getName()); +#endif + } +} + +void IOPMRequest::detachNextRequest( void ) +{ + if (fRequestNext) + { + assert(fRequestNext->fWorkWaitCount); + if (fRequestNext->fWorkWaitCount) + fRequestNext->fWorkWaitCount--; +#if LOG_REQUEST_ATTACH + kprintf("Detached next: %p [0x%x] -> %p [0x%x, %u] %s\n", + this, (uint32_t) fType, fRequestNext, + (uint32_t) fRequestNext->fType, + (uint32_t) fRequestNext->fWorkWaitCount, + fTarget->getName()); +#endif + fRequestNext = 0; + } +} + +void IOPMRequest::attachRootRequest( IOPMRequest * root ) +{ + if (!fRequestRoot) + { + // Delay the completion of the root request after + // this request. + fRequestRoot = root; + fRequestRoot->fFreeWaitCount++; +#if LOG_REQUEST_ATTACH + kprintf("Attached root: %p [0x%x] -> %p [0x%x, %u] %s\n", + this, (uint32_t) fType, fRequestRoot, + (uint32_t) fRequestRoot->fType, + (uint32_t) fRequestRoot->fFreeWaitCount, + fTarget->getName()); +#endif + } +} + +void IOPMRequest::detachRootRequest( void ) +{ + if (fRequestRoot) + { + assert(fRequestRoot->fFreeWaitCount); + if (fRequestRoot->fFreeWaitCount) + fRequestRoot->fFreeWaitCount--; +#if LOG_REQUEST_ATTACH + kprintf("Detached root: %p [0x%x] -> %p [0x%x, %u] %s\n", + this, (uint32_t) fType, fRequestRoot, + (uint32_t) fRequestRoot->fType, + (uint32_t) fRequestRoot->fFreeWaitCount, + fTarget->getName()); +#endif + fRequestRoot = 0; + } } //********************************************************************************* @@ -5928,7 +6322,7 @@ bool IOPMWorkQueue::checkForWork( void ) { request = (IOPMRequest *) queue_first(&fWorkQueue); assert(request->getTarget() == target); - if (request->hasChildRequest()) break; + if (request->isWorkBlocked()) break; done = (*fWorkAction)( target, request, this ); if (!done) break; @@ -5941,6 +6335,67 @@ bool IOPMWorkQueue::checkForWork( void ) return false; } +//********************************************************************************* +// IOPMCompletionQueue Class +//********************************************************************************* + +OSDefineMetaClassAndStructors( IOPMCompletionQueue, IOEventSource ); + +IOPMCompletionQueue * IOPMCompletionQueue::create( IOService * inOwner, Action inAction ) +{ + IOPMCompletionQueue * me = OSTypeAlloc(IOPMCompletionQueue); + if (me && !me->init(inOwner, inAction)) + { + me->release(); + me = 0; + } + return me; +} + +bool IOPMCompletionQueue::init( IOService * inOwner, Action inAction ) +{ + if (!inAction || !IOEventSource::init(inOwner, (IOEventSourceAction)inAction)) + return false; + + queue_init(&fQueue); + return true; +} + +void IOPMCompletionQueue::queuePMRequest( IOPMRequest * request ) +{ + assert(request); + request->detachNextRequest(); // unblocks next request + queue_enter(&fQueue, request, IOPMRequest *, fCommandChain); + if (workLoop) signalWorkAvailable(); +} + +bool IOPMCompletionQueue::checkForWork( void ) +{ + Action dqAction = (Action) action; + IOPMRequest * request; + IOService * target; + bool more = false; + queue_head_t tmpQueue; + + queue_init(&tmpQueue); + + while (!queue_empty(&fQueue)) + { + queue_remove_first( &fQueue, request, IOPMRequest *, fCommandChain ); + if (request->isFreeBlocked()) + { + queue_enter(&tmpQueue, request, IOPMRequest *, fCommandChain); + continue; + } + target = request->getTarget(); + assert(target); + more |= (*dqAction)( target, request, this ); + } + + queue_new_head(&tmpQueue, &fQueue, IOPMRequest *, fCommandChain); + return more; +} + OSDefineMetaClassAndStructors(IOServicePM, OSObject) //********************************************************************************* @@ -5950,7 +6405,7 @@ OSDefineMetaClassAndStructors(IOServicePM, OSObject) //********************************************************************************* static void -setPMProperty( OSDictionary * dict, const char * key, unsigned long value ) +setPMProperty( OSDictionary * dict, const char * key, uint64_t value ) { OSNumber * num = OSNumber::withNumber(value, sizeof(value) * 8); if (num) @@ -5960,31 +6415,81 @@ setPMProperty( OSDictionary * dict, const char * key, unsigned long value ) } } -bool IOServicePM::serialize( OSSerialize * s ) const +IOReturn IOServicePM::gatedSerialize( OSSerialize * s ) { OSDictionary * dict; bool ok = false; + int dictSize = 4; + + if (IdleTimerPeriod) + dictSize += 4; + + if (PowerClients) + dict = OSDictionary::withDictionary( + PowerClients, PowerClients->getCount() + dictSize); + else + dict = OSDictionary::withCapacity(dictSize); - dict = OSDictionary::withCapacity(8); if (dict) { - setPMProperty( dict, "CurrentPowerState", CurrentPowerState ); + setPMProperty(dict, "CurrentPowerState", CurrentPowerState); if (DesiredPowerState != CurrentPowerState) - setPMProperty( dict, "DesiredPowerState", DesiredPowerState ); + setPMProperty(dict, "DesiredPowerState", DesiredPowerState); if (kIOPM_Finished != MachineState) - setPMProperty( dict, "MachineState", MachineState ); - if (ChildrenDesire) - setPMProperty( dict, "ChildrenPowerState", ChildrenDesire ); - if (DeviceDesire) - setPMProperty( dict, "DeviceChangePowerState", DeviceDesire ); - if (DriverDesire) - setPMProperty( dict, "DriverChangePowerState", DriverDesire ); + setPMProperty(dict, "MachineState", MachineState); if (DeviceOverrides) - dict->setObject( "PowerOverrideOn", kOSBooleanTrue ); + dict->setObject("PowerOverrideOn", kOSBooleanTrue); + + if (IdleTimerPeriod) + { + AbsoluteTime now; + AbsoluteTime delta; + uint64_t nsecs; + + clock_get_uptime(&now); + + // The idle timer period in milliseconds. + setPMProperty(dict, "IdleTimerPeriod", IdleTimerPeriod * 1000ULL); + + // The number of activity tickles recorded since device idle + setPMProperty(dict, "ActivityTickles", ActivityTickleCount); + + if (AbsoluteTime_to_scalar(&DeviceActiveTimestamp)) + { + // The number of milliseconds since the last activity tickle. + delta = now; + SUB_ABSOLUTETIME(&delta, &DeviceActiveTimestamp); + absolutetime_to_nanoseconds(delta, &nsecs); + setPMProperty(dict, "TimeSinceActivityTickle", NS_TO_MS(nsecs)); + } + + if (AbsoluteTime_to_scalar(&IdleTimerStartTime)) + { + // The number of milliseconds since the last device idle. + delta = now; + SUB_ABSOLUTETIME(&delta, &IdleTimerStartTime); + absolutetime_to_nanoseconds(delta, &nsecs); + setPMProperty(dict, "TimeSinceDeviceIdle", NS_TO_MS(nsecs)); + } + } ok = dict->serialize(s); dict->release(); } - return ok; + return (ok ? kIOReturnSuccess : kIOReturnNoMemory); +} + +bool IOServicePM::serialize( OSSerialize * s ) const +{ + IOReturn ret = kIOReturnNotReady; + + if (gIOPMWorkLoop) + { + ret = gIOPMWorkLoop->runAction( + OSMemberFunctionCast(IOWorkLoop::Action, this, &IOServicePM::gatedSerialize), + (OSObject *) this, (void *) s); + } + + return (kIOReturnSuccess == ret); } diff --git a/iokit/Kernel/IOServicePMPrivate.h b/iokit/Kernel/IOServicePMPrivate.h index 21e9361ff..227715d5f 100644 --- a/iokit/Kernel/IOServicePMPrivate.h +++ b/iokit/Kernel/IOServicePMPrivate.h @@ -29,9 +29,6 @@ #ifndef _IOKIT_IOSERVICEPMPRIVATE_H #define _IOKIT_IOSERVICEPMPRIVATE_H -/* Binary compatibility with drivers that access pm_vars */ -#define PM_VARS_SUPPORT 1 - /*! @class IOServicePM @abstract Power management class. */ @@ -42,269 +39,162 @@ class IOServicePM : public OSObject OSDeclareDefaultStructors( IOServicePM ) private: - /*! @var Owner - Points to object that called PMinit(). Used only for debugging. - */ - IOService * Owner; - - /*! @var InterestedDrivers - List of interested drivers. - */ + // List of interested drivers. IOPMinformeeList * InterestedDrivers; - /*! @var DriverTimer - How long to wait for controlling driver to acknowledge. - */ + // How long to wait for controlling driver to acknowledge. IOReturn DriverTimer; - /*! @var AckTimer */ - thread_call_t AckTimer; + // Current power management machine state. + uint32_t MachineState; - /*! @var SettleTimer */ + thread_call_t AckTimer; thread_call_t SettleTimer; - /*! @var MachineState - Current power management machine state. - */ - unsigned long MachineState; - - /*! @var SettleTimeUS - Settle time after changing power state. - */ + // Settle time after changing power state. unsigned long SettleTimeUS; - /*! @var HeadNoteFlags - The flags field for the current change note. - */ + // The flags describing current change note. unsigned long HeadNoteFlags; - /*! @var HeadNoteState - The newStateNumber field for the current change note. - */ - unsigned long HeadNoteState; - - /*! @var HeadNoteOutputFlags - The outputPowerCharacter field for the current change note. - */ - unsigned long HeadNoteOutputFlags; - - /*! @var HeadNoteDomainState - Power domain flags from parent. (only on parent change). - */ - unsigned long HeadNoteDomainState; - - /*! @var HeadNoteParent - Pointer to initiating parent. (only on parent change). - */ - IOPowerConnection * HeadNoteParent; + // The new power state number being changed to. + unsigned long HeadNotePowerState; + + // Points to the entry in the power state array. + IOPMPowerState * HeadNotePowerArrayEntry; + + // Power flags supplied by all parents (domain). + unsigned long HeadNoteDomainFlags; + + // Connection attached to the changing parent. + IOPowerConnection * HeadNoteParentConnection; - /*! @var HeadNoteCapabilityFlags - The capabilityFlags field for the current change note. - */ - unsigned long HeadNoteCapabilityFlags; - - /*! @var HeadNotePendingAcks - Number of acks we are waiting for. - */ + // Power flags supplied by the changing parent. + unsigned long HeadNoteParentFlags; + + // Number of acks still outstanding. unsigned long HeadNotePendingAcks; - /*! @var PMLock - PM state lock. - */ + // PM state lock. IOLock * PMLock; - /*! @var WeAreRoot - True if our owner is the root of the power tree. - */ - bool WeAreRoot; - - /*! @var InitialChange - Initialized to true, then set to false after the initial power change. - */ + // Initialized to true, then set to false after the initial power change. bool InitialChange; - /*! @var NeedToBecomeUsable - Someone has called makeUsable before we had a controlling driver. - */ - bool NeedToBecomeUsable; - - /*! @var DeviceOverrides - Ignore children and driver desires if true. - */ + // Ignore children and driver desires if true. bool DeviceOverrides; - /*! @var ClampOn - Domain is clamped on until the first power child is added. - */ - bool ClampOn; - - /*! @var DeviceActive - True if device was active since last idle timer expiration. - */ + // True if device was active since last idle timer expiration. bool DeviceActive; - /*! @var DoNotPowerDown - Keeps track of any negative responses from notified apps and clients. - */ + // Keeps track of any negative responses from notified apps and clients. bool DoNotPowerDown; - /*! @var ParentsKnowState - True if all our parents know the state of their power domain. - */ + // True if all our parents know the state of their power domain. bool ParentsKnowState; - /*! @var DeviceActiveTimestamp - Time of last device activity. - */ + bool StrictTreeOrder; + bool IdleTimerStopped; + bool AdjustPowerScheduled; + + // Time of last device activity. AbsoluteTime DeviceActiveTimestamp; - /*! @var ActivityLock - Used to protect activity flag. - */ + // Used to protect activity flag. IOLock * ActivityLock; - /*! @var IdleTimerEventSource - An idle timer event source. - */ + // Idle timer event source. IOTimerEventSource * IdleTimerEventSource; - /*! @var IdleTimerPeriod - Idle timer's period in seconds. - */ + // Idle timer's period in seconds. unsigned long IdleTimerPeriod; unsigned long IdleTimerMinPowerState; + AbsoluteTime IdleTimerStartTime; - /*! @var DriverDesire - Power state desired by our controlling driver. - */ - unsigned long DriverDesire; - - /*! @var DeviceDesire - Power state desired by a subclassed device object. - */ + // Power state desired by a subclassed device object. unsigned long DeviceDesire; - /*! @var ChildrenDesire - Power state desired by all children. - */ - unsigned long ChildrenDesire; - - /*! @var DesiredPowerState - This is the power state we desire currently. - */ + // This is the power state we desire currently. unsigned long DesiredPowerState; - /*! @var PreviousRequest - This is what our parent thinks our need is. - */ + // This is what our parent thinks our need is. unsigned long PreviousRequest; - /*! @var Name - Cache result from getName(), used in logging. - */ + // Cache result from getName(), used in logging. const char * Name; - /*! @var Platform - Cache result from getPlatform(), used in logging and registering. - */ - IOPlatformExpert * Platform; - - /*! @var NumberOfPowerStates - Number of power states in the power array. - */ + // Number of power states in the power array. unsigned long NumberOfPowerStates; - /*! @var PowerStates - Power state array. - */ + // Power state array. IOPMPowerState * PowerStates; - /*! @var ControllingDriver - The controlling driver. - */ + // The controlling driver. IOService * ControllingDriver; - /*! @var AggressivenessValues - Array of aggressiveness values. - */ - unsigned long AggressivenessValue[ kMaxType + 1 ]; - - /*! @var AggressivenessValid - True for aggressiveness values that are currently valid. - */ - bool AggressivenessValid[ kMaxType + 1 ]; - - /*! @var CurrentPowerState - The ordinal of our current power state. - */ + // Our current power state. unsigned long CurrentPowerState; - /*! @var ParentsCurrentPowerFlags - Logical OR of power flags for each power domain parent. - */ + // Logical OR of power flags for each power domain parent. IOPMPowerFlags ParentsCurrentPowerFlags; - /*! @var MaxCapability - Ordinal of highest power state we can achieve in current power domain. - */ + // The highest power state we can achieve in current power domain. unsigned long MaxCapability; - /*! @var OutputPowerCharacterFlags - Logical OR of all output power character flags in the array. - */ + // Logical OR of all output power character flags in the array. IOPMPowerFlags OutputPowerCharacterFlags; - /*! @var SerialNumber - Used to uniquely identify power management notification to apps and clients. - */ - UInt16 SerialNumber; - - /*! @var ResponseArray - OSArray which manages responses from notified apps and clients. - */ + // OSArray which manages responses from notified apps and clients. OSArray * ResponseArray; + OSArray * NotifyClientArray; + + // Used to uniquely identify power management notification to apps and clients. + UInt16 SerialNumber; - /*! @var OutOfBandParameter - Used to communicate desired function to tellClientsWithResponse(). - This is used because it avoids changing the signatures of the affected virtual methods. - */ + // Used to communicate desired function to tellClientsWithResponse(). + // This is used because it avoids changing the signatures of the affected virtual methods. int OutOfBandParameter; AbsoluteTime DriverCallStartTime; IOPMPowerFlags CurrentCapabilityFlags; + long ActivityTicklePowerState; unsigned long CurrentPowerConsumption; unsigned long TempClampPowerState; - unsigned long TempClampCount; IOPMWorkQueue * PMWorkQueue; - IOPMRequest * PMRequest; OSSet * InsertInterestSet; OSSet * RemoveInterestSet; OSArray * NotifyChildArray; - unsigned long WaitReason; - unsigned long NextMachineState; + OSDictionary * PowerClients; thread_call_t DriverCallEntry; void * DriverCallParamPtr; IOItemCount DriverCallParamCount; IOItemCount DriverCallParamSlots; - IOOptionBits DriverCallReason; - long ActivityTicklePowerState; - bool StrictTreeOrder; - bool DriverCallBusy; - bool ActivityTimerStopped; - bool WillAdjustPowerState; - bool WillPMStop; + uint32_t DriverCallReason; + uint32_t TempClampCount; + uint32_t OverrideMaxPowerState; + uint32_t ActivityTickleCount; + uint32_t WaitReason; + uint32_t NextMachineState; + uint32_t RootDomainState; + uint32_t ThreadAssertionCount; + + // Protected by PMLock + struct { + uint32_t DriverCallBusy : 1; + uint32_t PMStop : 1; + } LockedFlags; + + thread_t ThreadAssertionThread; #if PM_VARS_SUPPORT IOPMprot * PMVars; #endif - /*! @function serialize - Serialize IOServicePM state for debug output. - */ + // Serialize IOServicePM state for debug output. + IOReturn gatedSerialize( OSSerialize * s ); virtual bool serialize( OSSerialize * s ) const; }; -#define fWeAreRoot pwrMgt->WeAreRoot #define fInterestedDrivers pwrMgt->InterestedDrivers #define fDriverTimer pwrMgt->DriverTimer #define fAckTimer pwrMgt->AckTimer @@ -312,31 +202,27 @@ class IOServicePM : public OSObject #define fMachineState pwrMgt->MachineState #define fSettleTimeUS pwrMgt->SettleTimeUS #define fHeadNoteFlags pwrMgt->HeadNoteFlags -#define fHeadNoteState pwrMgt->HeadNoteState -#define fHeadNoteOutputFlags pwrMgt->HeadNoteOutputFlags -#define fHeadNoteDomainState pwrMgt->HeadNoteDomainState -#define fHeadNoteParent pwrMgt->HeadNoteParent -#define fHeadNoteCapabilityFlags pwrMgt->HeadNoteCapabilityFlags +#define fHeadNotePowerState pwrMgt->HeadNotePowerState +#define fHeadNotePowerArrayEntry pwrMgt->HeadNotePowerArrayEntry +#define fHeadNoteDomainFlags pwrMgt->HeadNoteDomainFlags +#define fHeadNoteParentConnection pwrMgt->HeadNoteParentConnection +#define fHeadNoteParentFlags pwrMgt->HeadNoteParentFlags #define fHeadNotePendingAcks pwrMgt->HeadNotePendingAcks #define fPMLock pwrMgt->PMLock #define fInitialChange pwrMgt->InitialChange -#define fNeedToBecomeUsable pwrMgt->NeedToBecomeUsable #define fDeviceOverrides pwrMgt->DeviceOverrides -#define fClampOn pwrMgt->ClampOn -#define fOwner pwrMgt->Owner #define fActivityLock pwrMgt->ActivityLock #define fIdleTimerEventSource pwrMgt->IdleTimerEventSource #define fIdleTimerPeriod pwrMgt->IdleTimerPeriod #define fIdleTimerMinPowerState pwrMgt->IdleTimerMinPowerState #define fDeviceActive pwrMgt->DeviceActive +#define fIdleTimerStartTime pwrMgt->IdleTimerStartTime #define fDeviceActiveTimestamp pwrMgt->DeviceActiveTimestamp -#define fDriverDesire pwrMgt->DriverDesire +#define fActivityTickleCount pwrMgt->ActivityTickleCount #define fDeviceDesire pwrMgt->DeviceDesire -#define fChildrenDesire pwrMgt->ChildrenDesire #define fDesiredPowerState pwrMgt->DesiredPowerState #define fPreviousRequest pwrMgt->PreviousRequest #define fName pwrMgt->Name -#define fPlatform pwrMgt->Platform #define fNumberOfPowerStates pwrMgt->NumberOfPowerStates #define fPowerStates pwrMgt->PowerStates #define fControllingDriver pwrMgt->ControllingDriver @@ -349,6 +235,7 @@ class IOServicePM : public OSObject #define fOutputPowerCharacterFlags pwrMgt->OutputPowerCharacterFlags #define fSerialNumber pwrMgt->SerialNumber #define fResponseArray pwrMgt->ResponseArray +#define fNotifyClientArray pwrMgt->NotifyClientArray #define fDoNotPowerDown pwrMgt->DoNotPowerDown #define fOutOfBandParameter pwrMgt->OutOfBandParameter #define fDriverCallStartTime pwrMgt->DriverCallStartTime @@ -356,8 +243,8 @@ class IOServicePM : public OSObject #define fCurrentPowerConsumption pwrMgt->CurrentPowerConsumption #define fTempClampPowerState pwrMgt->TempClampPowerState #define fTempClampCount pwrMgt->TempClampCount +#define fOverrideMaxPowerState pwrMgt->OverrideMaxPowerState #define fPMWorkQueue pwrMgt->PMWorkQueue -#define fPMRequest pwrMgt->PMRequest #define fWaitReason pwrMgt->WaitReason #define fNextMachineState pwrMgt->NextMachineState #define fDriverCallReason pwrMgt->DriverCallReason @@ -365,44 +252,36 @@ class IOServicePM : public OSObject #define fDriverCallParamPtr pwrMgt->DriverCallParamPtr #define fDriverCallParamCount pwrMgt->DriverCallParamCount #define fDriverCallParamSlots pwrMgt->DriverCallParamSlots -#define fDriverCallBusy pwrMgt->DriverCallBusy -#define fWillPMStop pwrMgt->WillPMStop #define fActivityTickled pwrMgt->ActivityTickled #define fInsertInterestSet pwrMgt->InsertInterestSet #define fRemoveInterestSet pwrMgt->RemoveInterestSet #define fStrictTreeOrder pwrMgt->StrictTreeOrder #define fNotifyChildArray pwrMgt->NotifyChildArray -#define fWillAdjustPowerState pwrMgt->WillAdjustPowerState -#define fActivityTimerStopped pwrMgt->ActivityTimerStopped +#define fIdleTimerStopped pwrMgt->IdleTimerStopped +#define fAdjustPowerScheduled pwrMgt->AdjustPowerScheduled #define fActivityTicklePowerState pwrMgt->ActivityTicklePowerState #define fPMVars pwrMgt->PMVars +#define fPowerClients pwrMgt->PowerClients +#define fRootDomainState pwrMgt->RootDomainState +#define fThreadAssertionCount pwrMgt->ThreadAssertionCount +#define fThreadAssertionThread pwrMgt->ThreadAssertionThread +#define fLockedFlags pwrMgt->LockedFlags -/*! -@defined ACK_TIMER_PERIOD -@discussion When an IOService is waiting for acknowledgement to a power change -notification from an interested driver or the controlling driver its ack timer -is ticking every tenth of a second. +/* +When an IOService is waiting for acknowledgement to a power change +notification from an interested driver or the controlling driver, +the ack timer is ticking every tenth of a second. (100000000 nanoseconds are one tenth of a second). */ -#define ACK_TIMER_PERIOD 100000000 - -#define IOPMParentInitiated 1 // this power change initiated by our parent -#define IOPMWeInitiated 2 // this power change initiated by this device -#define IOPMNotDone 4 // we couldn't make this change -#define IOPMNotInUse 8 // this list element not currently in use -#define IOPMDomainWillChange 16 // change started by PowerDomainWillChangeTo -#define IOPMDomainDidChange 32 // change started by PowerDomainDidChangeTo - -struct changeNoteItem { - unsigned long flags; - unsigned long newStateNumber; - IOPMPowerFlags outputPowerCharacter; - IOPMPowerFlags inputPowerRequirement; - IOPMPowerFlags domainState; - IOPowerConnection * parent; - IOPMPowerFlags singleParentState; - IOPMPowerFlags capabilityFlags; -}; +#define ACK_TIMER_PERIOD 100000000 + +#define kIOPMParentInitiated 0x01 // this power change initiated by our parent +#define kIOPMWeInitiated 0x02 // this power change initiated by this device +#define kIOPMNotDone 0x04 // we couldn't make this change +#define kIOPMDomainWillChange 0x08 // change started by PowerDomainWillChangeTo +#define kIOPMDomainDidChange 0x10 // change started by PowerDomainDidChangeTo +#define kIOPMDomainPowerDrop 0x20 // Domain is lowering power +#define kIOPMSynchronize 0x40 // change triggered by power tree re-sync enum { kDriverCallInformPreChange, @@ -421,109 +300,151 @@ enum { kNotifyPriority }; +typedef bool (*IOPMMessageFilter)(OSObject * object, void * context); + // used for applyToInterested -struct context { - OSArray * responseFlags; - UInt16 serialNumber; - UInt16 counter; - UInt32 maxTimeRequested; - int msgType; - IOService * us; - unsigned long stateNumber; - IOPMPowerFlags stateFlags; - const char * errorLog; +struct IOPMInterestContext { + OSArray * responseFlags; + OSArray * notifyClients; + UInt16 serialNumber; + UInt16 counter; + UInt32 maxTimeRequested; + int msgType; + IOService * us; + unsigned long stateNumber; + IOPMPowerFlags stateFlags; + const char * errorLog; + IOPMMessageFilter filterFunc; }; +//********************************************************************************* +// PM Statistics & Diagnostics +//********************************************************************************* + +extern const OSSymbol *gIOPMStatsApplicationResponseTimedOut; +extern const OSSymbol *gIOPMStatsApplicationResponseCancel; +extern const OSSymbol *gIOPMStatsApplicationResponseSlow; + //********************************************************************************* // PM command types //********************************************************************************* enum { /* Command Types */ - kIOPMRequestTypeInvalid = 0x00, - kIOPMRequestTypePMStop = 0x01, - kIOPMRequestTypeAddPowerChild1 = 0x02, - kIOPMRequestTypeAddPowerChild2 = 0x03, - kIOPMRequestTypeAddPowerChild3 = 0x04, - kIOPMRequestTypeRegisterPowerDriver = 0x05, - kIOPMRequestTypeAdjustPowerState = 0x06, - kIOPMRequestTypeMakeUsable = 0x07, - kIOPMRequestTypeTemporaryPowerClamp = 0x08, - kIOPMRequestTypePowerDomainWillChange = 0x09, - kIOPMRequestTypePowerDomainDidChange = 0x0A, - kIOPMRequestTypeChangePowerStateTo = 0x0B, - kIOPMRequestTypeChangePowerStateToPriv = 0x0C, - kIOPMRequestTypePowerOverrideOnPriv = 0x0D, - kIOPMRequestTypePowerOverrideOffPriv = 0x0E, - kIOPMRequestTypeActivityTickle = 0x0F, + kIOPMRequestTypeInvalid = 0x00, + kIOPMRequestTypePMStop = 0x01, + kIOPMRequestTypeAddPowerChild1 = 0x02, + kIOPMRequestTypeAddPowerChild2 = 0x03, + kIOPMRequestTypeAddPowerChild3 = 0x04, + kIOPMRequestTypeRegisterPowerDriver = 0x05, + kIOPMRequestTypeAdjustPowerState = 0x06, + kIOPMRequestTypePowerDomainWillChange = 0x07, + kIOPMRequestTypePowerDomainDidChange = 0x08, + kIOPMRequestTypePowerOverrideOnPriv = 0x09, + kIOPMRequestTypePowerOverrideOffPriv = 0x0A, + kIOPMRequestTypeActivityTickle = 0x0B, + kIOPMRequestTypeRequestPowerState = 0x0C, + kIOPMRequestTypeSynchronizePowerTree = 0x0D, + kIOPMRequestTypeRequestPowerStateOverride = 0x0E, + kIOPMRequestTypeSetIdleTimerPeriod = 0x0F, + /* Reply Types */ - kIOPMRequestTypeReplyStart = 0x80, - kIOPMRequestTypeAckPowerChange = 0x81, - kIOPMRequestTypeAckSetPowerState = 0x82, - kIOPMRequestTypeAllowPowerChange = 0x83, - kIOPMRequestTypeCancelPowerChange = 0x84, - kIOPMRequestTypeInterestChanged = 0x85, - kIOPMRequestTypeIdleCancel = 0x86 + kIOPMRequestTypeReplyStart = 0x80, + kIOPMRequestTypeAckPowerChange = 0x81, + kIOPMRequestTypeAckSetPowerState = 0x82, + kIOPMRequestTypeAllowPowerChange = 0x83, + kIOPMRequestTypeCancelPowerChange = 0x84, + kIOPMRequestTypeInterestChanged = 0x85, + kIOPMRequestTypeIdleCancel = 0x86 }; //********************************************************************************* -// PM Helper Classes +// IOServicePM internal helper classes //********************************************************************************* +typedef void (*IOPMCompletionAction)(void * target, void * param, IOReturn status); + class IOPMRequest : public IOCommand { OSDeclareDefaultStructors( IOPMRequest ) protected: - IOOptionBits fType; // request type - IOService * fTarget; // request target - IOPMRequest * fParent; // parent request - IOItemCount fChildCount; // wait if non-zero + IOService * fTarget; // request target + IOPMRequest * fRequestNext; // the next request in the chain + IOPMRequest * fRequestRoot; // the root request in the issue tree + IOItemCount fWorkWaitCount; // execution blocked if non-zero + IOItemCount fFreeWaitCount; // completion blocked if non-zero + uint32_t fType; // request type + + IOPMCompletionAction fCompletionAction; + void * fCompletionTarget; + void * fCompletionParam; + IOReturn fCompletionStatus; public: - void * fArg0; - void * fArg1; - void * fArg2; + void * fArg0; + void * fArg1; + void * fArg2; - inline bool hasChildRequest( void ) const + inline bool isWorkBlocked( void ) const { - return (fChildCount != 0); + return (fWorkWaitCount != 0); } - inline bool hasParentRequest( void ) const + inline bool isFreeBlocked( void ) const { - return (fParent != 0); + return (fFreeWaitCount != 0); } - inline void setParentRequest( IOPMRequest * parent ) + inline IOPMRequest * getNextRequest( void ) const { - if (!fParent) - { - fParent = parent; - fParent->fChildCount++; - } + return fRequestNext; } - inline IOOptionBits getType( void ) const + inline IOPMRequest * getRootRequest( void ) const + { + if (fRequestRoot) return fRequestRoot; + if (fCompletionAction) return (IOPMRequest *) this; + return 0; + } + + inline uint32_t getType( void ) const { return fType; } - inline bool isReply( void ) const + inline bool isReplyType( void ) const { return (fType > kIOPMRequestTypeReplyStart); } - inline IOService * getTarget( void ) const + inline IOService * getTarget( void ) const { return fTarget; } - static IOPMRequest *create( void ); + inline bool isCompletionInstalled( void ) + { + return (fCompletionAction != 0); + } - void reset( void ); + inline void installCompletionAction( + IOPMCompletionAction action, + void * target, + void * param ) + { + fCompletionAction = action; + fCompletionTarget = target; + fCompletionParam = param; + } - bool init( IOService * owner, IOOptionBits type ); + static IOPMRequest * create( void ); + bool init( IOService * owner, IOOptionBits type ); + void reset( void ); + void attachNextRequest( IOPMRequest * next ); + void detachNextRequest( void ); + void attachRootRequest( IOPMRequest * root ); + void detachRootRequest( void ); }; class IOPMRequestQueue : public IOEventSource @@ -564,8 +485,26 @@ class IOPMWorkQueue : public IOEventSource virtual bool init( IOService * inOwner, Action work, Action retire ); public: - static IOPMWorkQueue * create( IOService * inOwner, Action work, Action retire ); - void queuePMRequest( IOPMRequest * request ); + static IOPMWorkQueue * create( IOService * inOwner, Action work, Action retire ); + void queuePMRequest( IOPMRequest * request ); +}; + +class IOPMCompletionQueue : public IOEventSource +{ + OSDeclareDefaultStructors( IOPMCompletionQueue ) + +public: + typedef bool (*Action)( IOService *, IOPMRequest *, IOPMCompletionQueue * ); + +protected: + queue_head_t fQueue; + + virtual bool checkForWork( void ); + virtual bool init( IOService * inOwner, Action inAction ); + +public: + static IOPMCompletionQueue * create( IOService * inOwner, Action inAction ); + void queuePMRequest( IOPMRequest * request ); }; #endif /* !_IOKIT_IOSERVICEPMPRIVATE_H */ diff --git a/iokit/Kernel/IOServicePrivate.h b/iokit/Kernel/IOServicePrivate.h index 577ddd7e1..1d455fbee 100644 --- a/iokit/Kernel/IOServicePrivate.h +++ b/iokit/Kernel/IOServicePrivate.h @@ -84,7 +84,8 @@ class _IOServiceNotifier : public IONotifier OSOrderedSet * whence; OSDictionary * matching; - IOServiceNotificationHandler handler; + IOServiceMatchingNotificationHandler handler; + IOServiceNotificationHandler compatHandler; void * target; void * ref; SInt32 priority; @@ -130,7 +131,7 @@ class _IOConfigThread : public OSObject virtual void free(); static void configThread( void ); - static void main( _IOConfigThread * self ); + static void main( void * arg, wait_result_t result ); }; enum { diff --git a/iokit/Kernel/IOSharedDataQueue.cpp b/iokit/Kernel/IOSharedDataQueue.cpp index 85bd0e3db..7fb3af426 100644 --- a/iokit/Kernel/IOSharedDataQueue.cpp +++ b/iokit/Kernel/IOSharedDataQueue.cpp @@ -75,7 +75,7 @@ Boolean IOSharedDataQueue::initWithCapacity(UInt32 size) return false; } - dataQueue = (IODataQueueMemory *)IOMallocAligned(round_page_32(size + DATA_QUEUE_MEMORY_HEADER_SIZE + DATA_QUEUE_MEMORY_APPENDIX_SIZE), PAGE_SIZE); + dataQueue = (IODataQueueMemory *)IOMallocAligned(round_page(size + DATA_QUEUE_MEMORY_HEADER_SIZE + DATA_QUEUE_MEMORY_APPENDIX_SIZE), PAGE_SIZE); if (dataQueue == 0) { return false; } @@ -95,7 +95,7 @@ Boolean IOSharedDataQueue::initWithCapacity(UInt32 size) void IOSharedDataQueue::free() { if (dataQueue) { - IOFreeAligned(dataQueue, round_page_32(dataQueue->queueSize + DATA_QUEUE_MEMORY_HEADER_SIZE + DATA_QUEUE_MEMORY_APPENDIX_SIZE)); + IOFreeAligned(dataQueue, round_page(dataQueue->queueSize + DATA_QUEUE_MEMORY_HEADER_SIZE + DATA_QUEUE_MEMORY_APPENDIX_SIZE)); dataQueue = NULL; } diff --git a/iokit/Kernel/IOStartIOKit.cpp b/iokit/Kernel/IOStartIOKit.cpp index 42003bf24..b621622fa 100644 --- a/iokit/Kernel/IOStartIOKit.cpp +++ b/iokit/Kernel/IOStartIOKit.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 1998-2006 Apple Computer, Inc. All rights reserved. + * Copyright (c) 1998-2008 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -26,13 +26,14 @@ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ */ /* - * Copyright (c) 1998,1999 Apple Computer, Inc. All rights reserved. + * Copyright (c) 1998,1999 Apple Inc. All rights reserved. * * HISTORY * */ #include +#include #include #include #include @@ -53,6 +54,8 @@ extern "C" { extern void OSlibkernInit (void); +void iokit_post_constructor_init(void) __attribute__((section("__TEXT, initcode"))); + #include #include @@ -74,7 +77,8 @@ void IOKitInitializeTime( void ) void IOKitResetTime( void ) { - uint32_t secs, microsecs; + clock_sec_t secs; + clock_usec_t microsecs; clock_initialize_calendar(); @@ -83,85 +87,10 @@ void IOKitResetTime( void ) gIOLastWakeTime.tv_usec = microsecs; } - -// From -extern int debug_mode; - -void StartIOKit( void * p1, void * p2, void * p3, void * p4 ) +void iokit_post_constructor_init(void) { - IOPlatformExpertDevice * rootNub; - int debugFlags; IORegistryEntry * root; OSObject * obj; - extern const char * gIOKernelKmods; - OSString * errorString = NULL; // must release - OSDictionary * fakeKmods; // must release - OSCollectionIterator * kmodIter; // must release - OSString * kmodName; // don't release - - if( PE_parse_boot_argn( "io", &debugFlags, sizeof (debugFlags) )) - gIOKitDebug = debugFlags; - - // Check for the log synchronous bit set in io - if (gIOKitDebug & kIOLogSynchronous) - debug_mode = true; - - // - // Have to start IOKit environment before we attempt to start - // the C++ runtime environment. At some stage we have to clean up - // the initialisation path so that OS C++ can initialise independantly - // of iokit basic service initialisation, or better we have IOLib stuff - // initialise as basic OS services. - // - IOLibInit(); - OSlibkernInit(); - - /***** - * Declare the fake kmod_info structs for built-in components - * that must be tracked as independent units for dependencies. - */ - fakeKmods = OSDynamicCast(OSDictionary, - OSUnserialize(gIOKernelKmods, &errorString)); - - if (!fakeKmods) { - if (errorString) { - panic("Kernel kmod list syntax error: %s\n", - errorString->getCStringNoCopy()); - errorString->release(); - } else { - panic("Error loading kernel kmod list.\n"); - } - } - - kmodIter = OSCollectionIterator::withCollection(fakeKmods); - if (!kmodIter) { - panic("Can't declare in-kernel kmods.\n"); - } - while ((kmodName = OSDynamicCast(OSString, kmodIter->getNextObject()))) { - - OSString * kmodVersion = OSDynamicCast(OSString, - fakeKmods->getObject(kmodName)); - if (!kmodVersion) { - panic("Can't declare in-kernel kmod; \"%s\" has " - "an invalid version.\n", - kmodName->getCStringNoCopy()); - } - - // empty version strings get replaced with current kernel version - const char *vers = (strlen(kmodVersion->getCStringNoCopy()) - ? kmodVersion->getCStringNoCopy() - : osrelease); - - if (KERN_SUCCESS != kmod_create_fake(kmodName->getCStringNoCopy(), vers)) { - panic("Failure declaring in-kernel kmod \"%s\".\n", - kmodName->getCStringNoCopy()); - } - } - - kmodIter->release(); - fakeKmods->release(); - - root = IORegistryEntry::initialize(); assert( root ); @@ -185,15 +114,49 @@ void StartIOKit( void * p1, void * p2, void * p3, void * p4 ) obj->release(); } +} + +// From +extern int debug_mode; + +/***** + * Pointer into bootstrap KLD segment for functions never used past startup. + */ +void (*record_startup_extensions_function)(void) = 0; + +void StartIOKit( void * p1, void * p2, void * p3, void * p4 ) +{ + IOPlatformExpertDevice * rootNub; + int debugFlags; + + if( PE_parse_boot_argn( "io", &debugFlags, sizeof (debugFlags) )) + gIOKitDebug = debugFlags; + + // Check for the log synchronous bit set in io + if (gIOKitDebug & kIOLogSynchronous) + debug_mode = true; + + // + // Have to start IOKit environment before we attempt to start + // the C++ runtime environment. At some stage we have to clean up + // the initialisation path so that OS C++ can initialise independantly + // of iokit basic service initialisation, or better we have IOLib stuff + // initialise as basic OS services. + // + IOLibInit(); + OSlibkernInit(); + rootNub = new IOPlatformExpertDevice; if( rootNub && rootNub->initWithArgs( p1, p2, p3, p4)) { rootNub->attach( 0 ); - /* Enter into the catalogue the drivers - * provided by BootX. + /* If the bootstrap segment set up a function to record startup + * extensions, call it now. */ - gIOCatalogue->recordStartupExtensions(); + if (record_startup_extensions_function) { + record_startup_extensions_function(); + } rootNub->registerService(); diff --git a/iokit/Kernel/IOStringFuncs.c b/iokit/Kernel/IOStringFuncs.c index 7b43926d3..548e980a1 100644 --- a/iokit/Kernel/IOStringFuncs.c +++ b/iokit/Kernel/IOStringFuncs.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 1998-2006 Apple Computer, Inc. All rights reserved. + * Copyright (c) 1998-2008 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -25,7 +25,9 @@ * * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ */ -/* Copyright (c) 1995 NeXT Computer, Inc. All rights reserved. + +/* + * Copyright (c) 1995 NeXT Computer, Inc. All rights reserved. * * strol.c - The functions strtol() & strtoul() are exported as public API * via the header file ~driverkit/generalFuncs.h @@ -38,7 +40,7 @@ * Commented out references to errno. */ -/*- +/* * Copyright (c) 1990, 1993 * The Regents of the University of California. All rights reserved. * @@ -79,6 +81,15 @@ #include #include + +long strtol(const char *nptr, char **endptr, int base); +unsigned long strtoul(const char *nptr, char **endptr, int base); +quad_t strtoq(const char *nptr, char **endptr, int base); +u_quad_t strtouq(const char *nptr, char **endptr, int base); +char *strchr(const char *str, int ch); +char *strncat(char *s1, const char *s2, unsigned long n); + + typedef int BOOL; static inline BOOL @@ -191,7 +202,16 @@ strtol(const char *nptr, char **endptr, int base) } else if (neg) acc = -acc; if (endptr != 0) - *endptr = (char *)(any ? s - 1 : nptr); + { + if(any) + { + *endptr = __CAST_AWAY_QUALIFIER(s - 1, const, char *); + } + else + { + *endptr = __CAST_AWAY_QUALIFIER(nptr, const, char *); + } + } return (acc); } @@ -253,7 +273,17 @@ strtoul(const char *nptr, char **endptr, int base) } else if (neg) acc = -acc; if (endptr != 0) - *endptr = (char *)(any ? s - 1 : nptr); + { + if(any) + { + *endptr = __CAST_AWAY_QUALIFIER(s - 1, const, char *); + } + else + { + *endptr = __CAST_AWAY_QUALIFIER(nptr, const, char *); + } + } + return (acc); } @@ -329,7 +359,7 @@ strtoq(const char *nptr, char **endptr, int base) break; if (c >= base) break; - if (any < 0 || acc > cutoff || acc == cutoff && c > cutlim) + if (any < 0 || acc > cutoff || (acc == cutoff && c > cutlim)) any = -1; else { any = 1; @@ -343,7 +373,17 @@ strtoq(const char *nptr, char **endptr, int base) } else if (neg) acc = -acc; if (endptr != 0) - *endptr = (char *)(any ? s - 1 : nptr); + { + if(any) + { + *endptr = __CAST_AWAY_QUALIFIER(s - 1, const, char *); + } + else + { + *endptr = __CAST_AWAY_QUALIFIER(nptr, const, char *); + } + } + return (acc); } @@ -400,7 +440,7 @@ strtouq(const char *nptr, break; if (c >= base) break; - if (any < 0 || acc > cutoff || acc == cutoff && c > cutlim) + if (any < 0 || acc > cutoff || (acc == cutoff && c > cutlim)) any = -1; else { any = 1; @@ -414,7 +454,17 @@ strtouq(const char *nptr, } else if (neg) acc = -acc; if (endptr != 0) - *endptr = (char *)(any ? s - 1 : nptr); + { + if(any) + { + *endptr = __CAST_AWAY_QUALIFIER(s - 1, const, char *); + } + else + { + *endptr = __CAST_AWAY_QUALIFIER(nptr, const, char *); + } + } + return (acc); } @@ -427,7 +477,7 @@ char *strchr(const char *str, int ch) { do { if (*str == ch) - return((char *)str); + return(__CAST_AWAY_QUALIFIER(str, const, char *)); } while (*str++); return ((char *) 0); } diff --git a/iokit/Kernel/IOSubMemoryDescriptor.cpp b/iokit/Kernel/IOSubMemoryDescriptor.cpp new file mode 100644 index 000000000..3e06210fb --- /dev/null +++ b/iokit/Kernel/IOSubMemoryDescriptor.cpp @@ -0,0 +1,193 @@ +/* + * Copyright (c) 1998-2007 Apple Inc. All rights reserved. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. The rights granted to you under the License + * may not be used to create, or enable the creation or redistribution of, + * unlawful or unlicensed copies of an Apple operating system, or to + * circumvent, violate, or enable the circumvention or violation of, any + * terms of an Apple operating system software license agreement. + * + * Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ + */ + +#include + +#include "IOKitKernelInternal.h" + +#define super IOMemoryDescriptor + +OSDefineMetaClassAndStructors(IOSubMemoryDescriptor, IOMemoryDescriptor) + +IOReturn IOSubMemoryDescriptor::redirect( task_t safeTask, bool doRedirect ) +{ +#ifdef __LP64__ + super::redirect( safeTask, doRedirect ); +#endif /* __LP64__ */ + return( _parent->redirect( safeTask, doRedirect )); +} + +IOSubMemoryDescriptor * +IOSubMemoryDescriptor::withSubRange(IOMemoryDescriptor * of, + IOByteCount offset, + IOByteCount length, + IOOptionBits options) +{ + IOSubMemoryDescriptor *self = new IOSubMemoryDescriptor; + + if (self && !self->initSubRange(of, offset, length, (IODirection) options)) { + self->release(); + self = 0; + } + return self; +} + +bool IOSubMemoryDescriptor::initSubRange( IOMemoryDescriptor * parent, + IOByteCount offset, IOByteCount length, + IODirection direction ) +{ + if( !parent) + return( false); + + if( (offset + length) > parent->getLength()) + return( false); + + /* + * We can check the _parent instance variable before having ever set it + * to an initial value because I/O Kit guarantees that all our instance + * variables are zeroed on an object's allocation. + */ + + if( !_parent) { + if( !super::init()) + return( false ); + } else { + /* + * An existing memory descriptor is being retargeted to + * point to somewhere else. Clean up our present state. + */ + + _parent->release(); + _parent = 0; + } + + parent->retain(); + _parent = parent; + _start = offset; + _length = length; + _flags = direction; +#ifndef __LP64__ + _direction = (IODirection) (_flags & kIOMemoryDirectionMask); +#endif /* !__LP64__ */ + _tag = parent->getTag(); + + return( true ); +} + +void IOSubMemoryDescriptor::free( void ) +{ + if( _parent) + _parent->release(); + + super::free(); +} + +addr64_t +IOSubMemoryDescriptor::getPhysicalSegment(IOByteCount offset, IOByteCount * length, IOOptionBits options) +{ + addr64_t address; + IOByteCount actualLength; + + assert(offset <= _length); + + if( length) + *length = 0; + + if( offset >= _length) + return( 0 ); + + address = _parent->getPhysicalSegment( offset + _start, &actualLength, options ); + + if( address && length) + *length = min( _length - offset, actualLength ); + + return( address ); +} + +IOReturn IOSubMemoryDescriptor::setPurgeable( IOOptionBits newState, + IOOptionBits * oldState ) +{ + IOReturn err; + + err = _parent->setPurgeable( newState, oldState ); + + return( err ); +} + +IOReturn IOSubMemoryDescriptor::prepare( + IODirection forDirection) +{ + IOReturn err; + + err = _parent->prepare( forDirection); + + return( err ); +} + +IOReturn IOSubMemoryDescriptor::complete( + IODirection forDirection) +{ + IOReturn err; + + err = _parent->complete( forDirection); + + return( err ); +} + +IOMemoryMap * IOSubMemoryDescriptor::makeMapping( + IOMemoryDescriptor * owner, + task_t intoTask, + IOVirtualAddress address, + IOOptionBits options, + IOByteCount offset, + IOByteCount length ) +{ + IOMemoryMap * mapping = 0; + +#ifndef __LP64__ + if (!(kIOMap64Bit & options)) + { + panic("IOSubMemoryDescriptor::makeMapping !64bit"); + } +#endif /* !__LP64__ */ + + mapping = (IOMemoryMap *) _parent->makeMapping( + owner, + intoTask, + address, + options, _start + offset, length ); + + return( mapping ); +} + +uint64_t +IOSubMemoryDescriptor::getPreparationID( void ) +{ + return (_parent->getPreparationID()); +} + diff --git a/iokit/Kernel/IOTimerEventSource.cpp b/iokit/Kernel/IOTimerEventSource.cpp index d32a17887..ed45f6ab9 100644 --- a/iokit/Kernel/IOTimerEventSource.cpp +++ b/iokit/Kernel/IOTimerEventSource.cpp @@ -89,7 +89,7 @@ void IOTimerEventSource::timeout(void *self) if (doit && me->enabled && AbsoluteTime_to_scalar(&me->abstime)) { IOTimeStampConstant(IODBG_TIMES(IOTIMES_ACTION), - (unsigned int) doit, (unsigned int) me->owner); + (uintptr_t) doit, (uintptr_t) me->owner); (*doit)(me->owner, me); } wl->openGate(); @@ -97,9 +97,12 @@ void IOTimerEventSource::timeout(void *self) } } -void IOTimerEventSource::timeoutAndRelease(void * self, void * count) +void IOTimerEventSource::timeoutAndRelease(void * self, void * c) { IOTimerEventSource *me = (IOTimerEventSource *) self; + /* The second parameter (a pointer) gets abused to carry an SInt32, so on LP64, "count" + must be cast to "long" before, in order to tell GCC we're not truncating a pointer. */ + SInt32 count = (SInt32) (long) c; if (me->enabled && me->action) { @@ -110,10 +113,10 @@ void IOTimerEventSource::timeoutAndRelease(void * self, void * count) Action doit; wl->closeGate(); doit = (Action) me->action; - if (doit && (me->reserved->calloutGeneration == (SInt32) count)) + if (doit && (me->reserved->calloutGeneration == count)) { IOTimeStampConstant(IODBG_TIMES(IOTIMES_ACTION), - (unsigned int) doit, (unsigned int) me->owner); + (uintptr_t) doit, (uintptr_t) me->owner); (*doit)(me->owner, me); } wl->openGate(); @@ -227,6 +230,7 @@ IOReturn IOTimerEventSource::setTimeout(UInt32 interval, UInt32 scale_factor) return wakeAtTime(end); } +#if !defined(__LP64__) IOReturn IOTimerEventSource::setTimeout(mach_timespec_t interval) { AbsoluteTime end, nsecs; @@ -239,6 +243,7 @@ IOReturn IOTimerEventSource::setTimeout(mach_timespec_t interval) return wakeAtTime(end); } +#endif IOReturn IOTimerEventSource::setTimeout(AbsoluteTime interval) { @@ -273,6 +278,7 @@ IOReturn IOTimerEventSource::wakeAtTime(UInt32 inAbstime, UInt32 scale_factor) return wakeAtTime(end); } +#if !defined(__LP64__) IOReturn IOTimerEventSource::wakeAtTime(mach_timespec_t inAbstime) { AbsoluteTime end, nsecs; @@ -285,6 +291,7 @@ IOReturn IOTimerEventSource::wakeAtTime(mach_timespec_t inAbstime) return wakeAtTime(end); } +#endif void IOTimerEventSource::setWorkLoop(IOWorkLoop *inWorkLoop) { diff --git a/iokit/Kernel/IOUserClient.cpp b/iokit/Kernel/IOUserClient.cpp index 940197e34..7f2c78d13 100644 --- a/iokit/Kernel/IOUserClient.cpp +++ b/iokit/Kernel/IOUserClient.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 1998-2006 Apple Computer, Inc. All rights reserved. + * Copyright (c) 1998-2008 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -27,6 +27,7 @@ */ +#include #include #include #include @@ -34,7 +35,9 @@ #include #include #include +#include #include +#include #include #include @@ -45,7 +48,7 @@ #define SCALAR64(x) ((io_user_scalar_t)((unsigned int)x)) #define SCALAR32(x) ((uint32_t )x) #define ARG32(x) ((void *)SCALAR32(x)) -#define REF64(x) ((io_user_reference_t)((natural_t)(x))) +#define REF64(x) ((io_user_reference_t)((UInt64)(x))) #define REF32(x) ((int)(x)) enum @@ -85,6 +88,7 @@ extern ipc_port_t master_device_port; extern void iokit_retain_port( ipc_port_t port ); extern void iokit_release_port( ipc_port_t port ); +extern void iokit_release_port_send( ipc_port_t port ); extern kern_return_t iokit_switch_object_port( ipc_port_t port, io_object_t obj, ipc_kobject_type_t type ); @@ -422,7 +426,7 @@ class IOServiceUserNotification : public IOUserNotification virtual void free(); static bool _handler( void * target, - void * ref, IOService * newService ); + void * ref, IOService * newService, IONotifier * notifier ); virtual bool handler( void * ref, IOService * newService ); virtual OSObject * getNextObject(); @@ -436,7 +440,7 @@ class IOServiceMessageUserNotification : public IOUserNotification mach_msg_header_t msgHdr; mach_msg_body_t msgBody; mach_msg_port_descriptor_t ports[1]; - OSNotificationHeader64 notifyHeader; + OSNotificationHeader64 notifyHeader __attribute__ ((packed)); }; PingMsg * pingMsg; @@ -587,7 +591,7 @@ void IOServiceUserNotification::free( void ) } bool IOServiceUserNotification::_handler( void * target, - void * ref, IOService * newService ) + void * ref, IOService * newService, IONotifier * notifier ) { return( ((IOServiceUserNotification *) target)->handler( ref, newService )); } @@ -621,13 +625,13 @@ bool IOServiceUserNotification::handler( void * ref, else pingMsg->msgHdr.msgh_local_port = NULL; - kr = mach_msg_send_from_kernel( &pingMsg->msgHdr, + kr = mach_msg_send_from_kernel_proper( &pingMsg->msgHdr, pingMsg->msgHdr.msgh_size); if( port) iokit_release_port( port ); if( KERN_SUCCESS != kr) - IOLog("%s: mach_msg_send_from_kernel {%x}\n", __FILE__, kr ); + IOLog("%s: mach_msg_send_from_kernel_proper {%x}\n", __FILE__, kr ); } return( true ); @@ -754,7 +758,7 @@ IOReturn IOServiceMessageUserNotification::handler( void * ref, else { data->messageArgument[0] |= (data->messageArgument[0] << 32); - argSize = sizeof(messageArgument); + argSize = sizeof(uint32_t); } } else @@ -772,7 +776,7 @@ IOReturn IOServiceMessageUserNotification::handler( void * ref, pingMsg->ports[0].name = providerPort; thisPort = iokit_port_for_object( this, IKOT_IOKIT_OBJECT ); pingMsg->msgHdr.msgh_local_port = thisPort; - kr = mach_msg_send_from_kernel( &pingMsg->msgHdr, + kr = mach_msg_send_from_kernel_proper( &pingMsg->msgHdr, pingMsg->msgHdr.msgh_size); if( thisPort) iokit_release_port( thisPort ); @@ -780,7 +784,7 @@ IOReturn IOServiceMessageUserNotification::handler( void * ref, iokit_release_port( providerPort ); if( KERN_SUCCESS != kr) - IOLog("%s: mach_msg_send_from_kernel {%x}\n", __FILE__, kr ); + IOLog("%s: mach_msg_send_from_kernel_proper {%x}\n", __FILE__, kr ); return( kIOReturnSuccess ); } @@ -807,10 +811,10 @@ void IOUserClient::setAsyncReference(OSAsyncReference asyncRef, mach_port_t wakePort, void *callback, void *refcon) { - asyncRef[kIOAsyncReservedIndex] = ((natural_t) wakePort) + asyncRef[kIOAsyncReservedIndex] = ((uintptr_t) wakePort) | (kIOUCAsync0Flags & asyncRef[kIOAsyncReservedIndex]); - asyncRef[kIOAsyncCalloutFuncIndex] = (natural_t) callback; - asyncRef[kIOAsyncCalloutRefconIndex] = (natural_t) refcon; + asyncRef[kIOAsyncCalloutFuncIndex] = (uintptr_t) callback; + asyncRef[kIOAsyncCalloutRefconIndex] = (uintptr_t) refcon; } void IOUserClient::setAsyncReference64(OSAsyncReference64 asyncRef, @@ -823,7 +827,7 @@ void IOUserClient::setAsyncReference64(OSAsyncReference64 asyncRef, asyncRef[kIOAsyncCalloutRefconIndex] = refcon; } -inline OSDictionary * CopyConsoleUser(UInt32 uid) +static OSDictionary * CopyConsoleUser(UInt32 uid) { OSArray * array; OSDictionary * user = 0; @@ -857,7 +861,8 @@ IOReturn IOUserClient::clientHasPrivilege( void * securityToken, OSDictionary * user; bool secureConsole; - if ((secureConsole = !strcmp(privilegeName, kIOClientPrivilegeSecureConsoleProcess))) + if ((secureConsole = !strncmp(privilegeName, kIOClientPrivilegeSecureConsoleProcess, + sizeof(kIOClientPrivilegeSecureConsoleProcess)))) task = (task_t)((IOUCProcessToken *)securityToken)->token; else task = (task_t)securityToken; @@ -867,16 +872,19 @@ IOReturn IOUserClient::clientHasPrivilege( void * securityToken, if (KERN_SUCCESS != kr) {} - else if (!strcmp(privilegeName, kIOClientPrivilegeAdministrator)) { + else if (!strncmp(privilegeName, kIOClientPrivilegeAdministrator, + sizeof(kIOClientPrivilegeAdministrator))) { if (0 != token.val[0]) kr = kIOReturnNotPrivileged; - } else if (!strcmp(privilegeName, kIOClientPrivilegeLocalUser)) { + } else if (!strncmp(privilegeName, kIOClientPrivilegeLocalUser, + sizeof(kIOClientPrivilegeLocalUser))) { user = CopyConsoleUser(token.val[0]); if ( user ) user->release(); else kr = kIOReturnNotPrivileged; - } else if (secureConsole || !strcmp(privilegeName, kIOClientPrivilegeConsoleUser)) { + } else if (secureConsole || !strncmp(privilegeName, kIOClientPrivilegeConsoleUser, + sizeof(kIOClientPrivilegeConsoleUser))) { user = CopyConsoleUser(token.val[0]); if ( user ) { if (user->getObject(gIOConsoleSessionOnConsoleKey) != kOSBooleanTrue) @@ -966,6 +974,14 @@ IOReturn IOUserClient::registerNotificationPort( return( kIOReturnUnsupported); } +IOReturn IOUserClient::registerNotificationPort( + mach_port_t port, + UInt32 type, + io_user_reference_t refCon) +{ + return (registerNotificationPort(port, type, (UInt32) refCon)); +} + IOReturn IOUserClient::getNotificationSemaphore( UInt32 notification_type, semaphore_t * semaphore ) { @@ -984,29 +1000,16 @@ IOReturn IOUserClient::clientMemoryForType( UInt32 type, return( kIOReturnUnsupported); } +#if !__LP64__ IOMemoryMap * IOUserClient::mapClientMemory( IOOptionBits type, task_t task, IOOptionBits mapFlags, IOVirtualAddress atAddress ) { - IOReturn err; - IOOptionBits options = 0; - IOMemoryDescriptor * memory; - IOMemoryMap * map = 0; - - err = clientMemoryForType( (UInt32) type, &options, &memory ); - - if( memory && (kIOReturnSuccess == err)) { - - options = (options & ~kIOMapUserOptionsMask) - | (mapFlags & kIOMapUserOptionsMask); - map = memory->map( task, atAddress, options ); - memory->release(); - } - - return( map ); + return (NULL); } +#endif IOMemoryMap * IOUserClient::mapClientMemory64( IOOptionBits type, @@ -1094,6 +1097,25 @@ getTargetAndTrapForIndex(IOService ** targetP, UInt32 index) return trap; } +IOReturn IOUserClient::releaseAsyncReference64(OSAsyncReference64 reference) +{ + mach_port_t port; + port = (mach_port_t) (reference[0] & ~kIOUCAsync0Flags); + + if (MACH_PORT_NULL != port) + iokit_release_port_send(port); + + return (kIOReturnSuccess); +} + +IOReturn IOUserClient::releaseNotificationPort(mach_port_t port) +{ + if (MACH_PORT_NULL != port) + iokit_release_port_send(port); + + return (kIOReturnSuccess); +} + IOReturn IOUserClient::sendAsyncResult(OSAsyncReference reference, IOReturn result, void *args[], UInt32 numArgs) { @@ -1131,8 +1153,7 @@ IOReturn IOUserClient::sendAsyncResult64(OSAsyncReference64 reference, { OSNotificationHeader64 notifyHdr; IOAsyncCompletionContent asyncContent; - uint32_t pad; - io_user_reference_t args[kMaxAsyncArgs]; + io_user_reference_t args[kMaxAsyncArgs] __attribute__ ((packed)); } msg64; } m; }; @@ -1159,7 +1180,6 @@ IOReturn IOUserClient::sendAsyncResult64(OSAsyncReference64 reference, sizeof(replyMsg.msgHdr) + sizeof(replyMsg.m.msg64) - (kMaxAsyncArgs - numArgs) * sizeof(io_user_reference_t); replyMsg.m.msg64.notifyHdr.size = sizeof(IOAsyncCompletionContent) - + sizeof(uint32_t) + numArgs * sizeof(io_user_reference_t); replyMsg.m.msg64.notifyHdr.type = kIOAsyncCompletionNotificationType; bcopy(reference, replyMsg.m.msg64.notifyHdr.reference, sizeof(OSAsyncReference64)); @@ -1189,10 +1209,10 @@ IOReturn IOUserClient::sendAsyncResult64(OSAsyncReference64 reference, replyMsg.m.msg32.args[idx] = REF32(args[idx]); } - kr = mach_msg_send_from_kernel( &replyMsg.msgHdr, + kr = mach_msg_send_from_kernel_proper( &replyMsg.msgHdr, replyMsg.msgHdr.msgh_size); if( KERN_SUCCESS != kr) - IOLog("%s: mach_msg_send_from_kernel {%x}\n", __FILE__, kr ); + IOLog("%s: mach_msg_send_from_kernel_proper {%x}\n", __FILE__, kr ); return kr; } @@ -1212,16 +1232,16 @@ kern_return_t is_io_object_get_class( io_name_t className ) { const OSMetaClass* my_obj = NULL; - - if( !object) - return( kIOReturnBadArgument ); + + if( !object) + return( kIOReturnBadArgument ); my_obj = object->getMetaClass(); if (!my_obj) { return (kIOReturnNotFound); } - strcpy( className, my_obj->getClassName()); + strlcpy( className, my_obj->getClassName(), sizeof(io_name_t)); return( kIOReturnSuccess ); } @@ -1405,9 +1425,9 @@ kern_return_t is_io_service_match_property_table_ool( kern_return_t *result, boolean_t *matches ) { - kern_return_t kr; - vm_offset_t data; - vm_map_offset_t map_data; + kern_return_t kr; + vm_offset_t data; + vm_map_offset_t map_data; kr = vm_map_copyout( kernel_map, &map_data, (vm_map_copy_t) matching ); data = CAST_DOWN(vm_offset_t, map_data); @@ -1528,10 +1548,9 @@ static kern_return_t internal_io_service_add_notification( if( !userNotify) continue; - notify = IOService::addNotification( sym, dict, + notify = IOService::addMatchingNotification( sym, dict, &userNotify->_handler, userNotify ); if( notify) { - dict = 0; *notification = userNotify; userNotify->setNotification( notify ); err = kIOReturnSuccess; @@ -1649,6 +1668,7 @@ kern_return_t is_io_service_add_notification_old( io_name_t notification_type, io_string_t matching, mach_port_t port, + // for binary compatibility reasons, this must be natural_t for ILP32 natural_t ref, io_object_t * notification ) { @@ -1933,6 +1953,18 @@ kern_return_t is_io_registry_entry_get_location_in_plane( return( kIOReturnNotFound ); } +/* Routine io_registry_entry_get_registry_entry_id */ +kern_return_t is_io_registry_entry_get_registry_entry_id( + io_object_t registry_entry, + uint64_t *entry_id ) +{ + CHECK( IORegistryEntry, registry_entry, entry ); + + *entry_id = entry->getRegistryEntryID(); + + return (kIOReturnSuccess); +} + // Create a vm_map_copy_t or kalloc'ed data for memory // to be copied out. ipc will free after the copyout. @@ -2205,11 +2237,15 @@ kern_return_t is_io_service_get_busy_state( /* Routine io_service_get_state */ kern_return_t is_io_service_get_state( io_object_t _service, - uint64_t *state ) + uint64_t *state, + uint32_t *busy_state, + uint64_t *accumulated_busy_time ) { CHECK( IOService, _service, service ); - *state = service->getState(); + *state = service->getState(); + *busy_state = service->getBusyState(); + *accumulated_busy_time = service->getAccumulatedBusyTime(); return( kIOReturnSuccess ); } @@ -2219,9 +2255,15 @@ kern_return_t is_io_service_wait_quiet( io_object_t _service, mach_timespec_t wait_time ) { + uint64_t timeoutNS; + CHECK( IOService, _service, service ); - return( service->waitQuiet( &wait_time )); + timeoutNS = wait_time.tv_sec; + timeoutNS *= kSecondScale; + timeoutNS += wait_time.tv_nsec; + + return( service->waitQuiet(timeoutNS) ); } /* Routine io_service_request_probe */ @@ -2343,6 +2385,12 @@ kern_return_t is_io_service_open_extended( break; } client->sharedInstance = (0 != client->getProperty(kIOUserClientSharedInstanceKey)); + OSString * creatorName = IOCopyLogNameForPID(proc_selfpid()); + if (creatorName) + { + client->setProperty(kIOUserClientCreatorKey, creatorName); + creatorName->release(); + } } } while (false); @@ -2396,7 +2444,7 @@ kern_return_t is_io_connect_set_notification_port( CHECK( IOUserClient, connection, client ); return( client->registerNotificationPort( port, notification_type, - reference )); + (io_user_reference_t) reference )); } /* Routine io_connect_set_notification_port */ @@ -2549,7 +2597,7 @@ kern_return_t is_io_connect_unmap_memory_from_task name = IOMachPort::makeSendRightForTask( from_task, map, IKOT_IOKIT_OBJECT ); if (name) { - map->unmap(); + map->userClientUnmap(); err = iokit_mod_send_right( from_task, name, -2 ); err = kIOReturnSuccess; } @@ -3142,7 +3190,7 @@ kern_return_t shim_io_connect_method_scalarI_structureO( const io_user_scalar_t * input, mach_msg_type_number_t inputCount, io_struct_inband_t output, - mach_msg_type_number_t * outputCount ) + IOByteCount * outputCount ) { IOMethod func; IOReturn err; @@ -3487,7 +3535,7 @@ kern_return_t shim_io_connect_method_structureI_structureO( io_struct_inband_t input, mach_msg_type_number_t inputCount, io_struct_inband_t output, - mach_msg_type_number_t * outputCount ) + IOByteCount * outputCount ) { IOMethod func; IOReturn err = kIOReturnBadArgument; @@ -3636,7 +3684,7 @@ kern_return_t is_io_make_matching( err = kIOReturnNoMemory; continue; } else - strcpy( matching, s->text()); + strlcpy(matching, s->text(), sizeof(io_string_t)); } while( false); @@ -3665,15 +3713,19 @@ kern_return_t is_io_catalog_send_data( if( master_port != master_device_port) return kIOReturnNotPrivileged; - // FIXME: This is a hack. Should have own function for removeKernelLinker() - if( (flag != kIOCatalogRemoveKernelLinker && flag != kIOCatalogKextdFinishedLaunching) && ( !inData || !inDataCount) ) + if( (flag != kIOCatalogRemoveKernelLinker && + flag != kIOCatalogKextdActive && + flag != kIOCatalogKextdFinishedLaunching) && + ( !inData || !inDataCount) ) + { return kIOReturnBadArgument; + } if (inData) { vm_map_offset_t map_data; kr = vm_map_copyout( kernel_map, &map_data, (vm_map_copy_t)inData); - data = CAST_DOWN(vm_offset_t, map_data); + data = CAST_DOWN(vm_offset_t, map_data); if( kr != KERN_SUCCESS) return kr; @@ -3740,18 +3792,26 @@ kern_return_t is_io_catalog_send_data( } break; - case kIOCatalogRemoveKernelLinker: { - if (gIOCatalogue->removeKernelLinker() != KERN_SUCCESS) { - kr = kIOReturnError; - } else { - kr = kIOReturnSuccess; - } - } + case kIOCatalogRemoveKernelLinker: + kr = KERN_NOT_SUPPORTED; + break; + + case kIOCatalogKextdActive: +#if !NO_KEXTD + OSKext::setKextdActive(); + + /* Dump all nonloaded startup extensions; kextd will now send them + * down on request. + */ + OSKext::flushNonloadedKexts( /* flushPrelinkedKexts */ false); +#endif + kr = kIOReturnSuccess; break; case kIOCatalogKextdFinishedLaunching: { #if !NO_KEXTD static bool clearedBusy = false; + if (!clearedBusy) { IOService * serviceRoot = IOService::getServiceRoot(); if (serviceRoot) { @@ -3892,7 +3952,9 @@ kern_return_t is_io_catalog_get_gen_count( return kIOReturnSuccess; } -/* Routine io_catalog_module_loaded */ +/* Routine io_catalog_module_loaded. + * Is invoked from IOKitLib's IOCatalogueModuleLoaded(). Doesn't seem to be used. + */ kern_return_t is_io_catalog_module_loaded( mach_port_t master_port, io_name_t name) @@ -3962,6 +4024,7 @@ IOReturn IOUserClient::externalMethod( uint32_t selector, IOExternalMethodArgume { IOReturn err; IOService * object; + IOByteCount structureOutputSize; if (dispatch) { @@ -4002,6 +4065,7 @@ IOReturn IOUserClient::externalMethod( uint32_t selector, IOExternalMethodArgume return (err); } + // pre-Leopard API's don't do ool structs if (args->structureInputDescriptor || args->structureOutputDescriptor) { @@ -4009,6 +4073,8 @@ IOReturn IOUserClient::externalMethod( uint32_t selector, IOExternalMethodArgume return (err); } + structureOutputSize = args->structureOutputSize; + if (args->asyncWakePort) { IOExternalAsyncMethod * method; @@ -4064,7 +4130,7 @@ IOReturn IOUserClient::externalMethod( uint32_t selector, IOExternalMethodArgume case kIOUCScalarIStructI: err = shim_io_connect_method_scalarI_structureI( method, object, args->scalarInput, args->scalarInputCount, - (char *)args->structureInput, args->structureInputSize ); + (char *) args->structureInput, args->structureInputSize ); break; case kIOUCScalarIScalarO: @@ -4076,14 +4142,14 @@ IOReturn IOUserClient::externalMethod( uint32_t selector, IOExternalMethodArgume case kIOUCScalarIStructO: err = shim_io_connect_method_scalarI_structureO( method, object, args->scalarInput, args->scalarInputCount, - (char *) args->structureOutput, &args->structureOutputSize ); + (char *) args->structureOutput, &structureOutputSize ); break; case kIOUCStructIStructO: err = shim_io_connect_method_structureI_structureO( method, object, - (char *)args->structureInput, args->structureInputSize, - (char *) args->structureOutput, &args->structureOutputSize ); + (char *) args->structureInput, args->structureInputSize, + (char *) args->structureOutput, &structureOutputSize ); break; default: @@ -4091,14 +4157,22 @@ IOReturn IOUserClient::externalMethod( uint32_t selector, IOExternalMethodArgume break; } } + + args->structureOutputSize = structureOutputSize; + return (err); } }; /* extern "C" */ -OSMetaClassDefineReservedUsed(IOUserClient, 0); +#if __LP64__ +OSMetaClassDefineReservedUnused(IOUserClient, 0); OSMetaClassDefineReservedUnused(IOUserClient, 1); +#else +OSMetaClassDefineReservedUsed(IOUserClient, 0); +OSMetaClassDefineReservedUsed(IOUserClient, 1); +#endif OSMetaClassDefineReservedUnused(IOUserClient, 2); OSMetaClassDefineReservedUnused(IOUserClient, 3); OSMetaClassDefineReservedUnused(IOUserClient, 4); diff --git a/iokit/Kernel/IOWorkLoop.cpp b/iokit/Kernel/IOWorkLoop.cpp index 54eaa0e3c..688a7c013 100644 --- a/iokit/Kernel/IOWorkLoop.cpp +++ b/iokit/Kernel/IOWorkLoop.cpp @@ -32,6 +32,8 @@ HISTORY 1998-7-13 Godfrey van der Linden(gvdl) Created. */ + +#include #include #include #include @@ -44,10 +46,15 @@ HISTORY OSDefineMetaClassAndStructors(IOWorkLoop, OSObject); // Block of unused functions intended for future use +#if __LP64__ +OSMetaClassDefineReservedUnused(IOWorkLoop, 0); +OSMetaClassDefineReservedUnused(IOWorkLoop, 1); +OSMetaClassDefineReservedUnused(IOWorkLoop, 2); +#else OSMetaClassDefineReservedUsed(IOWorkLoop, 0); OSMetaClassDefineReservedUsed(IOWorkLoop, 1); - -OSMetaClassDefineReservedUnused(IOWorkLoop, 2); +OSMetaClassDefineReservedUsed(IOWorkLoop, 2); +#endif OSMetaClassDefineReservedUnused(IOWorkLoop, 3); OSMetaClassDefineReservedUnused(IOWorkLoop, 4); OSMetaClassDefineReservedUnused(IOWorkLoop, 5); @@ -73,6 +80,7 @@ static inline bool ISSETP(void *addr, unsigned int flag) #define fFlags loopRestart + bool IOWorkLoop::init() { // The super init and gateLock allocation MUST be done first @@ -110,12 +118,11 @@ bool IOWorkLoop::init() } if ( workThread == NULL ) { - IOThreadFunc cptr = OSMemberFunctionCast( - IOThreadFunc, + thread_continue_t cptr = OSMemberFunctionCast( + thread_continue_t, this, &IOWorkLoop::threadMain); - workThread = IOCreateThread(cptr, this); - if (!workThread) + if (KERN_SUCCESS != kernel_thread_start(cptr, this, &workThread)) return false; } @@ -355,9 +362,12 @@ do { \ } while(workToDo); exitThread: + thread_t thread = workThread; workThread = 0; // Say we don't have a loop and free ourselves free(); - IOExitThread(); + + thread_deallocate(thread); + (void) thread_terminate(thread); } IOThread IOWorkLoop::getThread() const @@ -406,6 +416,11 @@ int IOWorkLoop::sleepGate(void *event, UInt32 interuptibleType) return IORecursiveLockSleep(gateLock, event, interuptibleType); } +int IOWorkLoop::sleepGate(void *event, AbsoluteTime deadline, UInt32 interuptibleType) +{ + return IORecursiveLockSleepDeadline(gateLock, event, deadline, interuptibleType); +} + void IOWorkLoop::wakeupGate(void *event, bool oneThread) { IORecursiveLockWakeup(gateLock, event, oneThread); @@ -427,7 +442,7 @@ IOReturn IOWorkLoop::runAction(Action inAction, OSObject *target, IOReturn IOWorkLoop::_maintRequest(void *inC, void *inD, void *, void *) { - maintCommandEnum command = (maintCommandEnum) (vm_address_t) inC; + maintCommandEnum command = (maintCommandEnum) (uintptr_t) inC; IOEventSource *inEvent = (IOEventSource *) inD; IOReturn res = kIOReturnSuccess; diff --git a/iokit/Kernel/RootDomainUserClient.cpp b/iokit/Kernel/RootDomainUserClient.cpp index 170ab0eae..8dbb20b0e 100644 --- a/iokit/Kernel/RootDomainUserClient.cpp +++ b/iokit/Kernel/RootDomainUserClient.cpp @@ -86,9 +86,9 @@ IOReturn RootDomainUserClient::secureSleepSystemOptions( void * p1, void * p2, void * p3, void * p4, void * p5, void * p6 ) { - void *inOptions = (void *)p1; + void *inOptions = (void *)p1; uint32_t *returnCode = (uint32_t *)p2; - IOByteCount inOptionsSize = (IOByteCount)p3; +// IOByteCount inOptionsSize = (uintptr_t)p3; IOByteCount *returnCodeSize = (IOByteCount *)p4; int local_priv = 0; @@ -164,9 +164,35 @@ IOReturn RootDomainUserClient::secureSetAggressiveness( *return_code = kIOReturnNotPrivileged; return kIOReturnSuccess; } - } +IOReturn RootDomainUserClient::secureSetMaintenanceWakeCalendar( + void * p1, void * p2, void * p3, + void * p4, void * p5, void * p6 ) +{ +#if ROOT_DOMAIN_RUN_STATES + IOPMCalendarStruct * inCalendar = (IOPMCalendarStruct *) p1; + uint32_t * returnCode = (uint32_t *) p2; + IOByteCount * returnCodeSize = (IOByteCount *) p4; + int admin_priv = 0; + IOReturn ret = kIOReturnNotPrivileged; + + ret = clientHasPrivilege(fOwningTask, kIOClientPrivilegeAdministrator); + admin_priv = (kIOReturnSuccess == ret); + + *returnCodeSize = sizeof(uint32_t); + + if (admin_priv && fOwner) { + *returnCode = fOwner->setMaintenanceWakeCalendar(inCalendar); + return kIOReturnSuccess; + } else { + *returnCode = kIOReturnNotPrivileged; + return kIOReturnSuccess; + } +#else + return kIOReturnUnsupported; +#endif +} IOReturn RootDomainUserClient::clientClose( void ) { @@ -184,30 +210,34 @@ IOExternalMethod * RootDomainUserClient::getTargetAndMethodForIndex( IOService ** targetP, UInt32 index ) { static const IOExternalMethod sMethods[] = { - { // kPMSetAggressiveness, 0 + { // kPMSetAggressiveness, 0 (IOService *)1, (IOMethod)&RootDomainUserClient::secureSetAggressiveness, kIOUCScalarIScalarO, 2, 1 }, - { // kPMGetAggressiveness, 1 + { // kPMGetAggressiveness, 1 0, (IOMethod)&IOPMrootDomain::getAggressiveness, kIOUCScalarIScalarO, 1, 1 }, - { // kPMSleepSystem, 2 + { // kPMSleepSystem, 2 (IOService *)1, (IOMethod)&RootDomainUserClient::secureSleepSystem, kIOUCScalarIScalarO, 0, 1 }, - { // kPMAllowPowerChange, 3 + { // kPMAllowPowerChange, 3 0, (IOMethod)&IOPMrootDomain::allowPowerChange, kIOUCScalarIScalarO, 1, 0 }, - { // kPMCancelPowerChange, 4 + { // kPMCancelPowerChange, 4 0, (IOMethod)&IOPMrootDomain::cancelPowerChange, kIOUCScalarIScalarO, 1, 0 }, - { // kPMShutdownSystem, 5 + { // kPMShutdownSystem, 5 0, (IOMethod)&IOPMrootDomain::shutdownSystem, kIOUCScalarIScalarO, 0, 0 }, - { // kPMRestartSystem, 6 + { // kPMRestartSystem, 6 0, (IOMethod)&IOPMrootDomain::restartSystem, kIOUCScalarIScalarO, 0, 0 }, - { // kPMSleepSystemOptions, 7 + { // kPMSleepSystemOptions, 7 (IOService *)1, (IOMethod)&RootDomainUserClient::secureSleepSystemOptions, kIOUCStructIStructO, kIOUCVariableStructureSize, sizeof(uint32_t) + }, + { // kPMSetMaintenanceWakeCalendar, 8 + (IOService *)1, (IOMethod)&RootDomainUserClient::secureSetMaintenanceWakeCalendar, + kIOUCStructIStructO, sizeof(IOPMCalendarStruct), sizeof(uint32_t) } }; diff --git a/iokit/Kernel/RootDomainUserClient.h b/iokit/Kernel/RootDomainUserClient.h index 6f7356a85..c66daabcc 100644 --- a/iokit/Kernel/RootDomainUserClient.h +++ b/iokit/Kernel/RootDomainUserClient.h @@ -59,6 +59,10 @@ class RootDomainUserClient : public IOUserClient unsigned long newLevel, int *return_code ); + IOReturn secureSetMaintenanceWakeCalendar( + void * p1, void * p2, void * p3, + void * p4, void * p5, void * p6 ); + public: virtual IOReturn clientClose( void ); diff --git a/iokit/Kernel/WKdm.h b/iokit/Kernel/WKdm.h index be3ca2d1f..fc73454ae 100644 --- a/iokit/Kernel/WKdm.h +++ b/iokit/Kernel/WKdm.h @@ -47,7 +47,7 @@ extern "C" { //#include //#include -typedef unsigned long WK_word; +typedef unsigned int WK_word; /* at the moment we have dependencies on the page size. That should * be changed to work for any power-of-two size that's at least 16 diff --git a/iokit/Kernel/WKdmCompress.c b/iokit/Kernel/WKdmCompress.c index aa9d1b541..c58477371 100644 --- a/iokit/Kernel/WKdmCompress.c +++ b/iokit/Kernel/WKdmCompress.c @@ -185,8 +185,8 @@ WKdm_compress (WK_word* src_buf, printf("tempLowBitsArray holds %u bytes\n", (char *) next_low_bits - (char *) tempLowBitsArray); - printf("next_full_patt is %u\n", - (unsigned long) next_full_patt); + printf("next_full_patt is %p\n", + next_full_patt); printf(" i.e., there are %u full patterns\n", next_full_patt - (dest_buf + TAGS_AREA_OFFSET + (num_input_words / 16))); @@ -280,7 +280,7 @@ WKdm_compress (WK_word* src_buf, next_full_patt); #ifdef WK_DEBUG printf("Packing of queue positions stopped at %u\n", boundary_tmp); -#endif WK_DEBUG +#endif // WK_DEBUG /* Record (into the header) where we stopped packing queue positions, * which is where we will start packing low bits. diff --git a/iokit/Kernel/WKdmDecompress.c b/iokit/Kernel/WKdmDecompress.c index 062f2d32f..8921ae0e9 100644 --- a/iokit/Kernel/WKdmDecompress.c +++ b/iokit/Kernel/WKdmDecompress.c @@ -143,9 +143,9 @@ WKdm_decompress (WK_word* src_buf, #ifdef WK_DEBUG printf("\nIn DECOMPRESSOR\n"); - printf("tempTagsArray is at %u\n", (unsigned long int) tempTagsArray); - printf("tempQPosArray is at %u\n", (unsigned long int) tempQPosArray); - printf("tempLowBitsArray is at %u\n", (unsigned long int) tempLowBitsArray); + printf("tempTagsArray is at %p\n", tempTagsArray); + printf("tempQPosArray is at %p\n", tempQPosArray); + printf("tempLowBitsArray is at %p\n", tempLowBitsArray); printf(" first four words of source buffer are:\n"); printf(" %u\n %u\n %u\n %u\n", @@ -274,10 +274,10 @@ WKdm_decompress (WK_word* src_buf, #ifdef WK_DEBUG printf("AFTER DECOMPRESSING\n"); - printf("next_output is %u\n", (unsigned long int) next_output); - printf("next_tag is %u\n", (unsigned long int) next_tag); - printf("next_full_word is %u\n", (unsigned long int) next_full_word); - printf("next_q_pos is %u\n", (unsigned long int) next_q_pos); + printf("next_output is %p\n", next_output); + printf("next_tag is %p\n", next_tag); + printf("next_full_word is %p\n", next_full_word); + printf("next_q_pos is %p\n", next_q_pos); #endif } } diff --git a/iokit/Kernel/x86_64/IOAsmSupport.s b/iokit/Kernel/x86_64/IOAsmSupport.s new file mode 100644 index 000000000..ff7585311 --- /dev/null +++ b/iokit/Kernel/x86_64/IOAsmSupport.s @@ -0,0 +1,45 @@ +/* + * Copyright (c) 1998-2000 Apple Computer, Inc. All rights reserved. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. The rights granted to you under the License + * may not be used to create, or enable the creation or redistribution of, + * unlawful or unlicensed copies of an Apple operating system, or to + * circumvent, violate, or enable the circumvention or violation of, any + * terms of an Apple operating system software license agreement. + * + * Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ + */ +#include + + +/* + * Seemingly unused references from cpp statically initialized objects. + */ + +.globl .constructors_used +.globl .destructors_used +.data + .align 2 + .long 0x11223344 +.constructors_used: + .long 0xdeadbeef + .long 0x11223344 +.destructors_used: + .long 0xdeadbeef + .long 0x11223344 diff --git a/bsd/dev/i386/lock_stubs.c b/iokit/Kernel/x86_64/IOSharedLock.s similarity index 88% rename from bsd/dev/i386/lock_stubs.c rename to iokit/Kernel/x86_64/IOSharedLock.s index 1362cd986..69183e016 100644 --- a/bsd/dev/i386/lock_stubs.c +++ b/iokit/Kernel/x86_64/IOSharedLock.s @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. + * Copyright (c) 1998-2000 Apple Computer, Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -25,8 +25,5 @@ * * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ */ -#if 0 -#define _KERNEL -#define DEFINE_SIMPLE_LOCK_PRIMS -#include -#endif +#include + diff --git a/iokit/KernelConfigTables.cpp b/iokit/KernelConfigTables.cpp index 37fe95833..b8c590d06 100644 --- a/iokit/KernelConfigTables.cpp +++ b/iokit/KernelConfigTables.cpp @@ -32,32 +32,6 @@ * Version 2.0. */ - -/* This list is used in IOStartIOKit.cpp to declare fake kmod_info - * structs for kext dependencies that are built into the kernel. - * Empty version strings get replaced with osrelease at runtime. - */ -const char * gIOKernelKmods = -"{" - "'com.apple.kernel' = '';" - "'com.apple.kpi.bsd' = '';" - "'com.apple.kpi.dsep' = '';" - "'com.apple.kpi.iokit' = '';" - "'com.apple.kpi.libkern' = '';" - "'com.apple.kpi.mach' = '';" - "'com.apple.kpi.unsupported' = '';" - "'com.apple.iokit.IONVRAMFamily' = '';" - "'com.apple.driver.AppleNMI' = '';" - "'com.apple.iokit.IOSystemManagementFamily' = '';" - "'com.apple.iokit.ApplePlatformFamily' = '';" - "'com.apple.kernel.6.0' = '7.9.9';" - "'com.apple.kernel.bsd' = '7.9.9';" - "'com.apple.kernel.iokit' = '7.9.9';" - "'com.apple.kernel.libkern' = '7.9.9';" - "'com.apple.kernel.mach' = '7.9.9';" -"}"; - - const char * gIOKernelConfigTables = "(" " {" @@ -87,3 +61,11 @@ const char * gIOKernelConfigTables = ")"; +/* This stuff is no longer used at all but was exported in prior + * releases, so we'll keep them around for PPC/i386 only. + * See libkern's OSKext.cpp for other symbols, which have been moved + * there for sanity. + */ +#if __ppc__ || __i386__ +const char * gIOKernelKmods = ""; +#endif /* __ppc__ || __i386__ */ diff --git a/iokit/Makefile b/iokit/Makefile index a5a3441af..fee3c6fe1 100644 --- a/iokit/Makefile +++ b/iokit/Makefile @@ -10,12 +10,14 @@ include $(MakeInc_def) INSTINC_SUBDIRS = IOKit INSTINC_SUBDIRS_PPC = ${INSTINC_SUBDIRS} INSTINC_SUBDIRS_I386 = ${INSTINC_SUBDIRS} +INSTINC_SUBDIRS_X86_64 = ${INSTINC_SUBDIRS} INSTINC_SUBDIRS_ARM = ${INSTINC_SUBDIRS} EXPINC_SUBDIRS = IOKit EXPINC_SUBDIRS_PPC = ${EXPINC_SUBDIRS} EXPINC_SUBDIRS_I386 = ${EXPINC_SUBDIRS} +EXPINC_SUBDIRS_X86_64 = ${EXPINC_SUBDIRS} EXPINC_SUBDIRS_ARM = ${EXPINC_SUBDIRS} diff --git a/iokit/bsddev/DINetBootHook.h b/iokit/bsddev/DINetBootHook.h index d7ff1f0a1..4742cc88f 100644 --- a/iokit/bsddev/DINetBootHook.h +++ b/iokit/bsddev/DINetBootHook.h @@ -77,4 +77,4 @@ int di_root_image(const char *path, char devname[], dev_t *dev_p); #endif /* __APPLE_API_PRIVATE */ -#endif __DINETBOOKHOOK_H__ +#endif /* __DINETBOOKHOOK_H__ */ diff --git a/iokit/bsddev/IOKitBSDInit.cpp b/iokit/bsddev/IOKitBSDInit.cpp index 3ec11b1f1..98c68da18 100644 --- a/iokit/bsddev/IOKitBSDInit.cpp +++ b/iokit/bsddev/IOKitBSDInit.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 1998-2000 Apple Computer, Inc. All rights reserved. + * Copyright (c) 1998-2008 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -39,7 +39,11 @@ extern "C" { #include // how long to wait for matching root device, secs -#define ROOTDEVICETIMEOUT 60 +#if DEBUG +#define ROOTDEVICETIMEOUT 120 +#else +#define ROOTDEVICETIMEOUT 60 +#endif extern dev_t mdevadd(int devid, ppnum_t base, unsigned int size, int phys); extern dev_t mdevlookup(int devid); @@ -49,7 +53,7 @@ kern_return_t IOKitBSDInit( void ) { IOService::publishResource("IOBSD"); - + return( kIOReturnSuccess ); } @@ -569,7 +573,7 @@ kern_return_t IOFindBSDRoot( char * rootName, unsigned int rootNameSize, if(data) { /* We found one */ ramdParms = (UInt32 *)data->getBytesNoCopy(); /* Point to the ram disk base and size */ - (void)mdevadd(-1, ramdParms[0] >> 12, ramdParms[1] >> 12, 0); /* Initialize it and pass back the device number */ + (void)mdevadd(-1, ml_static_ptovirt(ramdParms[0]) >> 12, ramdParms[1] >> 12, 0); /* Initialize it and pass back the device number */ } regEntry->release(); /* Toss the entry */ } @@ -704,11 +708,14 @@ kern_return_t IOFindBSDRoot( char * rootName, unsigned int rootNameSize, if ( service && findHFSChild ) { bool waiting = true; + uint64_t timeoutNS; + // wait for children services to finish registering while ( waiting ) { - t.tv_sec = ROOTDEVICETIMEOUT; - t.tv_nsec = 0; - if ( service->waitQuiet( &t ) == kIOReturnSuccess ) { + timeoutNS = ROOTDEVICETIMEOUT; + timeoutNS *= kSecondScale; + + if ( (service->waitQuiet(timeoutNS) ) == kIOReturnSuccess) { waiting = false; } else { IOLog( "Waiting for child registration\n" ); @@ -857,6 +864,121 @@ kern_return_t IOBSDGetPlatformUUID( uuid_t uuid, mach_timespec_t timeout ) return KERN_SUCCESS; } +kern_return_t IOBSDGetPlatformSerialNumber( char *serial_number_str, u_int32_t len ) +{ + OSDictionary * platform_dict; + IOService *platform; + OSString * string; + + if (len < 1) { + return 0; + } + serial_number_str[0] = '\0'; + + platform_dict = IOService::serviceMatching( "IOPlatformExpertDevice" ); + if (platform_dict == NULL) { + return KERN_NOT_SUPPORTED; + } + + platform = IOService::waitForService( platform_dict ); + if (platform) { + string = ( OSString * ) platform->getProperty( kIOPlatformSerialNumberKey ); + if ( string == 0 ) { + return KERN_NOT_SUPPORTED; + } else { + strlcpy( serial_number_str, string->getCStringNoCopy( ), len ); + } + } + + return KERN_SUCCESS; +} + +dev_t IOBSDGetMediaWithUUID( const char *uuid_cstring, char *bsd_name, int bsd_name_len, int timeout) +{ + dev_t dev = 0; + OSDictionary *dictionary; + OSString *uuid_string; + + if (bsd_name_len < 1) { + return 0; + } + bsd_name[0] = '\0'; + + dictionary = IOService::serviceMatching( "IOMedia" ); + if( dictionary ) { + uuid_string = OSString::withCString( uuid_cstring ); + if( uuid_string ) { + IOService *service; + mach_timespec_t tv = { timeout, 0 }; // wait up to "timeout" seconds for the device + + dictionary->setObject( "UUID", uuid_string ); + dictionary->retain(); + service = IOService::waitForService( dictionary, &tv ); + if( service ) { + OSNumber *dev_major = (OSNumber *) service->getProperty( kIOBSDMajorKey ); + OSNumber *dev_minor = (OSNumber *) service->getProperty( kIOBSDMinorKey ); + OSString *iostr = (OSString *) service->getProperty( kIOBSDNameKey ); + + if( iostr) + strlcpy( bsd_name, iostr->getCStringNoCopy(), bsd_name_len ); + + if ( dev_major && dev_minor ) + dev = makedev( dev_major->unsigned32BitValue(), dev_minor->unsigned32BitValue() ); + } + uuid_string->release(); + } + dictionary->release(); + } + + return dev; +} + + +void IOBSDIterateMediaWithContent(const char *content_uuid_cstring, int (*func)(const char *bsd_dev_name, const char *uuid_str, void *arg), void *arg) +{ + OSDictionary *dictionary; + OSString *content_uuid_string; + + dictionary = IOService::serviceMatching( "IOMedia" ); + if( dictionary ) { + content_uuid_string = OSString::withCString( content_uuid_cstring ); + if( content_uuid_string ) { + IOService *service; + OSIterator *iter; + + dictionary->setObject( "Content", content_uuid_string ); + dictionary->retain(); + + iter = IOService::getMatchingServices(dictionary); + while (iter && (service = (IOService *)iter->getNextObject())) { + if( service ) { + OSString *iostr = (OSString *) service->getProperty( kIOBSDNameKey ); + OSString *uuidstr = (OSString *) service->getProperty( "UUID" ); + const char *uuid; + + if( iostr) { + if (uuidstr) { + uuid = uuidstr->getCStringNoCopy(); + } else { + uuid = "00000000-0000-0000-0000-000000000000"; + } + + // call the callback + if (func && func(iostr->getCStringNoCopy(), uuid, arg) == 0) { + break; + } + } + } + } + if (iter) + iter->release(); + + content_uuid_string->release(); + } + dictionary->release(); + } +} + int IOBSDIsMediaEjectable( const char *cdev_name ) { diff --git a/iokit/conf/MASTER b/iokit/conf/MASTER index ae3f0e88b..cb7e708c8 100644 --- a/iokit/conf/MASTER +++ b/iokit/conf/MASTER @@ -63,7 +63,10 @@ options IOKITCPP # C++ implementation # options KDEBUG # kernel tracing # options NETWORKING # kernel networking # options CRYPTO # want crypto code # -options CONFIG_DTRACE # enable dtrace # +options CONFIG_DTRACE # enable dtrace # + +options CONFIG_SLEEP # # + #makeoptions LIBDRIVER = "libDriver_kern.o" # #makeoptions LIBOBJC = "libkobjc.o" # diff --git a/iokit/conf/MASTER.i386 b/iokit/conf/MASTER.i386 index 080b9dd74..3574359ef 100644 --- a/iokit/conf/MASTER.i386 +++ b/iokit/conf/MASTER.i386 @@ -3,10 +3,11 @@ # Standard Apple Mac OS Configurations: # -------- ----- ------ --------------- # -# RELEASE = [ intel mach iokitcpp hibernation medium crypto config_dtrace ] +# RELEASE = [ intel mach iokitcpp hibernation medium crypto config_dtrace config_sleep ] # PROFILE = [ RELEASE profile ] # DEBUG = [ RELEASE debug ] # +# # EMBEDDED = [ intel mach iokitcpp hibernation no_kextd bsmall crypto ] # DEVELOPMENT = [ EMBEDDED config_dtrace ] # @@ -16,3 +17,5 @@ machine "i386" # cpu "i386" # options NO_KEXTD # + +options NO_NESTED_PMAP # diff --git a/iokit/conf/MASTER.x86_64 b/iokit/conf/MASTER.x86_64 new file mode 100644 index 000000000..857357c71 --- /dev/null +++ b/iokit/conf/MASTER.x86_64 @@ -0,0 +1,21 @@ +###################################################################### +# +# Standard Apple Mac OS Configurations: +# -------- ----- ------ --------------- +# +# RELEASE = [ intel mach iokitcpp hibernation medium crypto config_dtrace config_sleep ] +# PROFILE = [ RELEASE profile ] +# DEBUG = [ RELEASE debug ] +# +# +# EMBEDDED = [ intel mach iokitcpp hibernation no_kextd bsmall crypto ] +# DEVELOPMENT = [ EMBEDDED ] +# +###################################################################### + +machine "x86_64" # +cpu "x86_64" # + +options NO_KEXTD # + +options NO_NESTED_PMAP # diff --git a/iokit/conf/Makefile b/iokit/conf/Makefile index d52aee208..750aadb65 100644 --- a/iokit/conf/Makefile +++ b/iokit/conf/Makefile @@ -35,7 +35,6 @@ $(COMPOBJROOT)/$(IOKIT_KERNEL_CONFIG)/Makefile : $(SOURCE)/MASTER \ $(SOURCE)/files.$(ARCH_CONFIG_LC) \ $(COMPOBJROOT)/doconf $(_v)(doconf_target=$(addsuffix /conf, $(TARGET)); \ - echo $${doconf_target};\ $(MKDIR) $${doconf_target}; \ cd $${doconf_target}; \ rm -f $(notdir $?); \ diff --git a/iokit/conf/Makefile.i386 b/iokit/conf/Makefile.i386 index 0b9af95e3..f81e7dcac 100644 --- a/iokit/conf/Makefile.i386 +++ b/iokit/conf/Makefile.i386 @@ -7,38 +7,8 @@ CFLAGS+= $(WERROR) CWARNFLAGS= $(filter-out -Wbad-function-cast, $(CWARNFLAGS_STD)) # Objects that don't compile cleanly: -OBJS_NO_WERROR= \ - ioconf.o \ - UNDRequest.o \ - IOLib.cpo \ - IOStringFuncs.o \ - IOCatalogue.cpo \ - IOCPU.cpo \ - IOCommandGate.cpo \ - IOCommandPool.cpo \ - IOCommandQueue.cpo \ - IOConditionLock.cpo \ - IOFilterInterruptEventSource.cpo \ - IOHibernateIO.cpo \ - IOInterruptController.cpo \ - IOInterruptEventSource.cpo \ - IOKitBSDInit.cpo \ - IOMapper.cpo \ - IOMemoryCursor.cpo \ - IOMemoryDescriptor.cpo \ - IOPlatformExpert.cpo \ - IOPMPowerStateQueue.cpo \ - IOPMrootDomain.cpo \ - IORangeAllocator.cpo \ - IORegistryEntry.cpo \ - IOService.cpo \ - IOServicePM.cpo \ - IOSyncer.cpo \ - IOTimerEventSource.cpo \ - IOUserClient.cpo \ - IOWorkLoop.cpo \ - RootDomainUserClient.cpo - +#OBJS_NO_WERROR= \ + OBJS_WERROR=$(filter-out $(OBJS_NO_WERROR),$(OBJS)) $(OBJS_WERROR): WERROR=-Werror diff --git a/iokit/conf/Makefile.ppc b/iokit/conf/Makefile.ppc index 7786ccbd6..77e9826b2 100644 --- a/iokit/conf/Makefile.ppc +++ b/iokit/conf/Makefile.ppc @@ -2,6 +2,18 @@ #BEGIN Machine dependent Makefile fragment for ppc ###################################################################### +# Enable -Werror for ppc builds +CFLAGS+= $(WERROR) +CWARNFLAGS= $(filter-out -Wbad-function-cast, $(CWARNFLAGS_STD)) + +# Objects that don't compile cleanly: +OBJS_NO_WERROR= \ + AppleMacIO.cpo + +OBJS_WERROR=$(filter-out $(OBJS_NO_WERROR),$(OBJS)) + +$(OBJS_WERROR): WERROR=-Werror + ###################################################################### #END Machine dependent Makefile fragment for ppc ###################################################################### diff --git a/iokit/conf/Makefile.template b/iokit/conf/Makefile.template index c1c09f5b5..80dfe4328 100644 --- a/iokit/conf/Makefile.template +++ b/iokit/conf/Makefile.template @@ -24,12 +24,11 @@ include $(MakeInc_cmd) include $(MakeInc_def) # -# XXX: CFLAGS ARM_TODO: added __MBUF_TRANSITION_ for IONetworkingFamily +# XXX: CFLAGS # CFLAGS+= -imacros meta_features.h -DKERNEL -DDRIVER_PRIVATE \ - -Wall -Wno-four-char-constants -fno-common \ + -Wall -fno-common \ -DIOMATCHDEBUG=1 -DIOALLOCDEBUG=1 \ - -D__MBUF_TRANSITION_ \ -imacros meta_features.h $(CFLAGS_INLINE_CONFIG) #-DIOKITDEBUG=-1 @@ -87,12 +86,14 @@ ${OBJS}: ${OBJSDEPS} LDOBJS = $(OBJS) $(COMPONENT).o: $(LDOBJS) - @echo LD $(COMPONENT) $(_v)$(SEG_HACK) __HIB IOHibernateRestoreKernel.o -o _IOHibernateRestoreKernel.o $(_v)mv _IOHibernateRestoreKernel.o IOHibernateRestoreKernel.o $(_v)$(SEG_HACK) __HIB WKdmDecompress.o -o _WKdmDecompress.o $(_v)mv _WKdmDecompress.o WKdmDecompress.o - $(_v)$(LD) $(LDFLAGS_COMPONENT) -o $(COMPONENT).o ${LDOBJS} + @echo LDFILELIST $(COMPONENT) + $(_v)( for obj in ${LDOBJS}; do \ + echo $(TARGET)$(COMP_OBJ_DIR)/$(KERNEL_CONFIG)/$${obj}; \ + done; ) > $(COMPONENT).o do_depend: do_all $(_v)${MD} -u Makedep -f -d `ls *.d` diff --git a/iokit/conf/Makefile.x86_64 b/iokit/conf/Makefile.x86_64 new file mode 100644 index 000000000..64ddbfaa5 --- /dev/null +++ b/iokit/conf/Makefile.x86_64 @@ -0,0 +1,19 @@ +###################################################################### +#BEGIN Machine dependent Makefile fragment for x86_64 +###################################################################### + +# Enable -Werror for x86_64 builds +CFLAGS+= $(WERROR) +CWARNFLAGS= $(filter-out -Wbad-function-cast, $(CWARNFLAGS_STD)) + +# Objects that don't compile cleanly: +#OBJS_NO_WERROR= \ + +OBJS_WERROR=$(filter-out $(OBJS_NO_WERROR),$(OBJS)) + +$(OBJS_WERROR): WERROR=-Werror + +###################################################################### +#END Machine dependent Makefile fragment for x86_64 +###################################################################### + diff --git a/iokit/conf/files b/iokit/conf/files index 3b87d080a..600ba8be0 100644 --- a/iokit/conf/files +++ b/iokit/conf/files @@ -58,6 +58,7 @@ iokit/Kernel/IOMemoryCursor.cpp optional iokitcpp iokit/Kernel/IOMemoryDescriptor.cpp optional iokitcpp iokit/Kernel/IOMultiMemoryDescriptor.cpp optional iokitcpp iokit/Kernel/IORangeAllocator.cpp optional iokitcpp +iokit/Kernel/IOSubMemoryDescriptor.cpp optional iokitcpp iokit/Kernel/IOPlatformExpert.cpp optional iokitcpp diff --git a/iokit/conf/files.x86_64 b/iokit/conf/files.x86_64 new file mode 100644 index 000000000..c81cf1178 --- /dev/null +++ b/iokit/conf/files.x86_64 @@ -0,0 +1,8 @@ + +# Shared lock + +iokit/Kernel/x86_64/IOSharedLock.s standard +iokit/Kernel/x86_64/IOAsmSupport.s standard + +# Power Domains +iokit/Kernel/IOPMrootDomain.cpp optional iokitcpp diff --git a/iokit/mach-o/mach_header.h b/iokit/mach-o/mach_header.h deleted file mode 100644 index 8dab718c0..000000000 --- a/iokit/mach-o/mach_header.h +++ /dev/null @@ -1,86 +0,0 @@ -/* - * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ - * - * This file contains Original Code and/or Modifications of Original Code - * as defined in and that are subject to the Apple Public Source License - * Version 2.0 (the 'License'). You may not use this file except in - * compliance with the License. The rights granted to you under the License - * may not be used to create, or enable the creation or redistribution of, - * unlawful or unlicensed copies of an Apple operating system, or to - * circumvent, violate, or enable the circumvention or violation of, any - * terms of an Apple operating system software license agreement. - * - * Please obtain a copy of the License at - * http://www.opensource.apple.com/apsl/ and read it before using this file. - * - * The Original Code and all software distributed under the License are - * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER - * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, - * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. - * Please see the License for the specific language governing rights and - * limitations under the License. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ - */ -/* - * File: kern/mach_header.h - * - * Definitions for accessing mach-o headers. This header wraps the - * routines defined in osfmk/mach-o/mach_header.c; this is made clear - * by the existance of the getsectcmdsymtabfromheader() prototype. - * - * NOTE: The functions prototyped by this header only operate againt - * 32 bit mach headers. Many of these functions imply the - * currently running kernel, and cannot be used against mach - * headers other than that of the currently running kernel. - * - * HISTORY - * 29-Jan-92 Mike DeMoney (mike@next.com) - * Made into machine independent form from machdep/m68k/mach_header.h. - * Ifdef'ed out most of this since I couldn't find any references. - */ - -#ifndef _KERN_MACH_HEADER_ -#define _KERN_MACH_HEADER_ - -#include -#include - -#if KERNEL -struct mach_header **getmachheaders(void); -vm_offset_t getlastaddr(void); - -struct segment_command *firstseg(void); -struct segment_command *firstsegfromheader(struct mach_header *header); -struct segment_command *nextseg(struct segment_command *sgp); -struct segment_command *nextsegfromheader( - struct mach_header *header, - struct segment_command *seg); -struct segment_command *getsegbyname(const char *seg_name); -struct segment_command *getsegbynamefromheader( - struct mach_header *header, - const char *seg_name); -void *getsegdatafromheader(struct mach_header *, const char *, int *); -struct section *getsectbyname(const char *seg_name, const char *sect_name); -struct section *getsectbynamefromheader( - struct mach_header *header, - char *seg_name, - char *sect_name); -void *getsectdatafromheader(struct mach_header *, const char *, const char *, int *); -struct section *firstsect(struct segment_command *sgp); -struct section *nextsect(struct segment_command *sgp, struct section *sp); -struct fvmlib_command *fvmlib(void); -struct fvmlib_command *fvmlibfromheader(struct mach_header *header); -struct segment_command *getfakefvmseg(void); -#ifdef MACH_KDB -struct symtab_command *getsectcmdsymtabfromheader(struct mach_header *); -boolean_t getsymtab(struct mach_header *, vm_offset_t *, int *, - vm_offset_t *, vm_size_t *); -#endif - -#endif /* KERNEL */ - -#endif /* _KERN_MACH_HEADER_ */ diff --git a/kgmacros b/kgmacros index 7623092e2..216fcd4cd 100644 --- a/kgmacros +++ b/kgmacros @@ -8,6 +8,11 @@ set print asm-demangle on set cp-abi gnu-v2 +# This option tells gdb to relax its stack tracing heuristics +# Useful for debugging across stack switches +# (to the interrupt stack, for instance). Requires gdb-675 or greater. +set backtrace sanity-checks off + echo Loading Kernel GDB Macros package. Type "help kgm" for more info.\n define kgm @@ -35,6 +40,7 @@ document kgm | showallipc Display a summary listing of all the ipc spaces | showallrights Display a summary listing of all the ipc rights | showallkmods Display a summary listing of all the kernel modules +| showallbusyports Display a listing of all ports with unread messages | | showallclasses Display info about all OSObject subclasses in the system | showobject Show info about an OSObject - its vtable ptr and retain count, & more info for simple container classes. @@ -43,6 +49,14 @@ document kgm | showregistryentry Show info about a registry entry; its properties and descendants in the current plane | setregistryplane Set the plane to be used for the iokit registry macros (pass zero for list) | +| setfindregistrystr Set the encoded string for matching with +| findregistryentry or findregistryprop (created from +| strcmp_arg_pack64) +| findregistryentry Find a registry entry that matches the encoded string +| findregistryentries Find all the registry entries that match the encoded string +| findregistryprop Search the registry entry for a property that matches +| the encoded string +| | showtask Display info about the specified task | showtaskthreads Display info about the threads in the task | showtaskstacks Display the stack for each thread in the task @@ -50,6 +64,7 @@ document kgm | showtaskvme Display info about the task's vm_map entries | showtaskipc Display info about the specified task's ipc space | showtaskrights Display info about the task's ipc space entries +| showtaskbusyports Display all of the task's ports with unread messages | | showact Display info about a thread specified by activation | showactstack Display the stack for a thread specified by activation @@ -100,12 +115,31 @@ document kgm | switchtocorethread Corefile version of "switchtoact" | resetcorectx Corefile version of "resetctx" | -| readphys Reads the specified untranslated address -| readphys64 Reads the specified untranslated 64-bit address +| readphys8 Reads the specified untranslated address (8-bit read) +| readphys16 Reads the specified untranslated address (16-bit read) +| readphys32 Reads the specified untranslated address (32-bit read) +| readphys64 Reads the specified untranslated address (64-bit read) +| writephys8 Writes to the specified untranslated address (8-bit write) +| writephys16 Writes to the specified untranslated address (16-bit write) +| writephys32 Writes to the specified untranslated address (32-bit write) +| writephys64 Writes to the specified untranslated address (64-bit write) +| +| readioport8 Read 8-bits from the specified I/O Port +| readioport16 Read 16-bits from the specified I/O Port +| readioport32 Read 32-bits from the specified I/O Port +| writeioport8 Write 8-bits into the specified I/O Port +| writeioport16 Write 16-bits into the specified I/O Port +| writeioport32 Write 32-bits into the specified I/O Port +| +| readmsr64 Read 64-bits from the specified MSR +| writemsr64 Write 64-bits into the specified MSR | | rtentry_showdbg Print the debug information of a route entry | rtentry_trash Walk the list of trash route entries | +| inifa_showdbg Print the debug information of an IPv4 interface address +| in6ifa_showdbg Print the debug information of an IPv6 interface address +| | mbuf_walkpkt Walk the mbuf packet chain (m_nextpkt) | mbuf_walk Walk the mbuf chain (m_next) | mbuf_buf2slab Find the slab structure of the corresponding buffer @@ -122,12 +156,17 @@ document kgm | mcache_stat Print all mcaches in the system | mcache_showcache Display the number of objects in the cache | +| showbootargs Display boot arguments passed to the target kernel | showbootermemorymap Dump phys memory map from EFI | -| systemlog Display the kernel's printf ring buffer +| systemlog Display the kernel's printf ring buffer +| +| hexdump Show the contents of memory as a hex/ASCII dump | | showvnodepath Print the path for a vnode | showvnodelocks Display list of advisory locks held/blocked on a vnode +| showvnodedev Display information about a device vnode +| showtty Display information about a struct tty | showallvols Display a summary of mounted volumes | showvnode Display info about one vnode | showvolvnodes Display info about all vnodes of a given volume @@ -142,9 +181,20 @@ document kgm | showworkqvnodes Print the vnode worker list | shownewvnodes Print the new vnode list | -| ifconfig display ifconfig-like output -| showifaddrs show the list of addresses for the given ifp -| showifmultiaddrs show the list of multicast addresses for the given ifp +| ifconfig display ifconfig-like output +| showifaddrs show the list of addresses for the given ifp +| showifmultiaddrs show the list of multicast addresses for the given ifp +| +| showsocket Display information about a socket +| showprocsockets Given a proc_t pointer, display information about its sockets +| showallprocsockets Display information about the sockets of all the processes +| +| show_tcp_pcbinfo Display the list of the TCP protocol control blocks +| show_tcp_timewaitslots Display the list of the TCP protocol control blocks in TIMEWAIT +| show_udp_pcbinfo Display the list of UDP protocol control blocks +| +| show_rt_inet Display the IPv4 routing table +| show_rt_inet6 Display the IPv6 routing table | | showallpmworkqueues Display info about all IOPMWorkQueue objects | showregistrypmstate Display power management state for all IOPower registry entries @@ -163,6 +213,36 @@ document kgm | findoldest Find oldest zone leak debugging record | countpcs Print how often a pc occurs in the zone leak log | +| pmap_walk Perform a page-table walk +| pmap_vtop Translate a virtual address to physical address +| +| showuserlibraries Show binary images known by dyld in the target task +| +| showthreadfortid Displays the address of the thread structure for a given thread_id value. +| +| strcmp_nomalloc A version of strcmp that avoids the use of malloc +| through the use of encoded strings created via +| strcmp_arg_pack64. +| strcmp_arg_pack64 Pack a string into a 64-bit quantity for use by +| strcmp_nomalloc +| +| pci_cfg_read8 Read 8-bits from a PCI config space register +| pci_cfg_read16 Read 16-bits from a PCI config space register +| pci_cfg_read32 Read 32-bits from a PCI config space register +| pci_cfg_write8 Write 8-bits into a PCI config space register +| pci_cfg_write16 Write 16-bits into a PCI config space register +| pci_cfg_write32 Write 32-bits into a PCI config space register +| pci_cfg_dump Dump entire config space for a PCI device +| pci_cfg_scan Perform a scan for PCI devices +| pci_cfg_dump_all Dump config spaces for all detected PCI devices +| +| lapic_read32 Read APIC entry +| lapic_write32 Write APIC entry +| lapic_dump Dump APIC entries +| +| ioapic_read32 Read IOAPIC entry +| ioapic_write32 Write IOAPIC entry +| ioapic_dump Dump IOAPIC entries | | Type "help " for more specific help on a particular macro. | Type "show user " to see what the macro is really doing. @@ -171,7 +251,7 @@ end # This macro should appear before any symbol references, to facilitate # a gdb "source" without a loaded symbol file. define showversion - printf "%s\n", *(char **)0x501C + kdp-kernelversion end document showversion @@ -185,83 +265,98 @@ Syntax: showversion | correctly. end -set $kgm_mtype = ((struct mach_header)_mh_execute_header).cputype +set $kgm_mtype_ppc = 0x00000012 +set $kgm_mtype_arm = 0x0000000C -# This option tells gdb to relax its stack tracing heuristics -# Useful for debugging across stack switches -# (to the interrupt stack, for instance). Requires gdb-675 or greater. -# Don't do this for arm as a workaround to 5486905 -if ($kgm_mtype != 12) - set backtrace sanity-checks off -end +set $kgm_mtype_i386 = 0x00000007 +set $kgm_mtype_x86_64 = 0x01000007 +set $kgm_mtype_x86_any = $kgm_mtype_i386 +set $kgm_mtype_x86_mask = 0xFEFFFFFF + +set $kgm_mtype = ((unsigned int *)&_mh_execute_header)[1] +set $kgm_lp64 = $kgm_mtype & 0x01000000 -set $kgm_dummy = &proc0 -set $kgm_dummy = &kmod +set $kgm_lcpu_self = 0xFFFE set $kgm_reg_depth = 0 -set $kgm_reg_plane = (void **) gIOServicePlane +set $kgm_reg_depth_max = 0xFFFF +set $kgm_reg_plane = (IORegistryPlane *) gIOServicePlane set $kgm_namekey = (OSSymbol *) 0 set $kgm_childkey = (OSSymbol *) 0 set $kgm_show_object_addrs = 0 set $kgm_show_object_retain = 0 set $kgm_show_props = 0 +set $kgm_show_data_alwaysbytes = 0 set $kgm_show_kmod_syms = 0 +# Print a pointer +define showptr + if $kgm_lp64 + printf "0x%016llx", $arg0 + else + printf "0x%08x", $arg0 + end +end + +# for headers, leave 8 chars for LP64 pointers +define showptrhdrpad + if $kgm_lp64 + printf " " + end +end + define showkmodheader - printf "kmod address size " - printf "id refs version name\n" + printf "kmod " + showptrhdrpad + printf " address " + showptrhdrpad + printf " size " + showptrhdrpad + printf " id refs version name\n" end define showkmodint set $kgm_kmodp = (struct kmod_info *)$arg0 - printf "0x%08x ", $arg0 - printf "0x%08x ", $kgm_kmodp->address - printf "0x%08x ", $kgm_kmodp->size + showptr $kgm_kmodp + printf " " + showptr $kgm_kmodp->address + printf " " + showptr $kgm_kmodp->size + printf " " printf "%3d ", $kgm_kmodp->id printf "%5d ", $kgm_kmodp->reference_count - printf "%10s ", &$kgm_kmodp->version - printf "%s\n", &$kgm_kmodp->name + printf "%10s ", $kgm_kmodp->version + printf "%s\n", $kgm_kmodp->name end -set $kgm_kmodmin = 0xffffffff -set $kgm_fkmodmin = 0x00000000 -set $kgm_kmodmax = 0x00000000 -set $kgm_fkmodmax = 0xffffffff +# cached info of the last kext found, to speed up subsequent lookups set $kgm_pkmod = 0 set $kgm_pkmodst = 0 set $kgm_pkmoden = 0 + define showkmodaddrint - printf "0x%x" , $arg0 - if ((unsigned int)$arg0 >= (unsigned int)$kgm_pkmodst) && ((unsigned int)$arg0 <= (unsigned int)$kgm_pkmoden) - set $kgm_off = ((unsigned int)$arg0 - (unsigned int)$kgm_pkmodst) + showptr $arg0 + if ((unsigned long)$arg0 >= (unsigned long)$kgm_pkmodst) && ((unsigned long)$arg0 < (unsigned long)$kgm_pkmoden) + set $kgm_off = ((unsigned long)$arg0 - (unsigned long)$kgm_pkmodst) printf " <%s + 0x%x>", $kgm_pkmod->name, $kgm_off else - if ((unsigned int)$arg0 <= (unsigned int)$kgm_fkmodmax) && ((unsigned int)$arg0 >= (unsigned int)$kgm_fkmodmin) - set $kgm_kmodp = (struct kmod_info *)kmod - while $kgm_kmodp - set $kgm_kmod = *$kgm_kmodp - if $kgm_kmod.address && ($kgm_kmod.address < $kgm_kmodmin) - set $kgm_kmodmin = $kgm_kmod.address - end - if ($kgm_kmod.address + $kgm_kmod.size) > $kgm_kmodmax - set $kgm_kmodmax = $kgm_kmod.address + $kgm_kmod.size - end - set $kgm_off = ((unsigned int)$arg0 - (unsigned int)$kgm_kmod.address) - if ($kgm_kmod.address <= $arg0) && ($kgm_off <= $kgm_kmod.size) - printf " <%s + 0x%x>", $kgm_kmodp->name, $kgm_off - set $kgm_pkmod = $kgm_kmodp - set $kgm_pkmodst = $kgm_kmod.address - set $kgm_pkmoden = $kgm_pkmodst + $kgm_kmod.size - set $kgm_kmodp = 0 - else - set $kgm_kmodp = $kgm_kmod.next - end - end - if !$kgm_pkmod - set $kgm_fkmodmin = $kgm_kmodmin - set $kgm_fkmodmax = $kgm_kmodmax + set $kgm_kmodp = (struct kmod_info *)kmod + if ($kgm_mtype == $kgm_mtype_x86_64) && ($arg0 >= (unsigned long)&_mh_execute_header) + # kexts are loaded below the kernel for x86_64 + set $kgm_kmodp = 0 + end + while $kgm_kmodp + set $kgm_off = ((unsigned long)$arg0 - (unsigned long)$kgm_kmodp->address) + if ($kgm_kmodp->address <= $arg0) && ($kgm_off < $kgm_kmodp->size) + printf " <%s + 0x%x>", $kgm_kmodp->name, $kgm_off + set $kgm_pkmod = $kgm_kmodp + set $kgm_pkmodst = $kgm_kmodp->address + set $kgm_pkmoden = $kgm_pkmodst + $kgm_kmodp->size + set $kgm_kmodp = 0 + else + set $kgm_kmodp = $kgm_kmodp->next end end end @@ -299,16 +394,57 @@ Syntax: (gdb) showallkmods end define showactheader - printf " thread " - printf "processor pri state wait_queue wait_event\n" + printf " " + showptrhdrpad + printf " thread " + showptrhdrpad + printf " thread_id " + showptrhdrpad + printf " processor " + showptrhdrpad + printf " pri io_policy state wait_queue" + showptrhdrpad + printf " wait_event\n" end define showactint - printf " 0x%08x ", $arg0 + printf " " + showptrhdrpad set $kgm_thread = *(struct thread *)$arg0 - printf "0x%08x ", $kgm_thread.last_processor - printf "%3d ", $kgm_thread.sched_pri + showptr $arg0 + if ($kgm_thread.static_param) + printf "[WQ]" + else + printf " " + end + printf " %7ld ", $kgm_thread.thread_id + showptr $kgm_thread.last_processor + printf " %3d ", $kgm_thread.sched_pri + if ($kgm_thread.uthread != 0) + set $kgm_printed = 0 + set $kgm_uthread = (struct uthread *)$kgm_thread.uthread + if ($kgm_uthread->uu_flag & 0x400) + printf "RAGE " + else + printf " " + end + if ($kgm_uthread->uu_iopol_disk == 1) + printf "NORM " + set $kgm_printed = 1 + end + if ($kgm_uthread->uu_iopol_disk == 2) + printf "PASS " + set $kgm_printed = 1 + end + if ($kgm_uthread->uu_iopol_disk == 3) + printf "THROT " + set $kgm_printed = 1 + end + if ($kgm_printed == 0) + printf " " + end + end set $kgm_state = $kgm_thread.state if $kgm_state & 0x80 printf "I" @@ -332,38 +468,51 @@ define showactint printf "S" end if $kgm_state & 0x01 - printf "W\t" - printf "0x%08x ", $kgm_thread.wait_queue - if (((unsigned)$kgm_thread.wait_event > (unsigned)sectPRELINKB) \ + printf "W" + printf "\t " + showptr $kgm_thread.wait_queue + printf " " + if (((unsigned long)$kgm_thread.wait_event > (unsigned long)&last_kernel_symbol) \ && ($arg1 != 2) && ($kgm_show_kmod_syms == 0)) showkmodaddr $kgm_thread.wait_event else - output /a (unsigned) $kgm_thread.wait_event + output /a $kgm_thread.wait_event end if ($kgm_thread.uthread != 0) set $kgm_uthread = (struct uthread *)$kgm_thread.uthread if ($kgm_uthread->uu_wmesg != 0) - printf " \"%s\"", $kgm_uthread->uu_wmesg + printf "\t \"%s\"", $kgm_uthread->uu_wmesg end end end if $arg1 != 0 if ($kgm_thread.kernel_stack != 0) if ($kgm_thread.reserved_stack != 0) - printf "\n\t\treserved_stack=0x%08x", $kgm_thread.reserved_stack + printf "\n " + showptrhdrpad + printf " reserved_stack=" + showptr $kgm_thread.reserved_stack end - printf "\n\t\tkernel_stack=0x%08x", $kgm_thread.kernel_stack - if ($kgm_mtype == 18) + printf "\n " + showptrhdrpad + printf " kernel_stack=" + showptr $kgm_thread.kernel_stack + if ($kgm_mtype == $kgm_mtype_ppc) set $mysp = $kgm_thread.machine.pcb->save_r1 end - if ($kgm_mtype == 7) - set $kgm_statep = (struct x86_kernel_state32 *) \ - ($kgm_thread->kernel_stack + 0x4000 \ - - sizeof(struct x86_kernel_state32)) - set $mysp = $kgm_statep->k_ebp + if (($kgm_mtype & $kgm_mtype_x86_mask) == $kgm_mtype_x86_any) + set $kgm_statep = (struct x86_kernel_state *) \ + ($kgm_thread->kernel_stack + kernel_stack_size \ + - sizeof(struct x86_kernel_state)) + if ($kgm_mtype == $kgm_mtype_i386) + set $mysp = $kgm_statep->k_ebp + else + set $mysp = $kgm_statep->k_rbp + end end - if ($kgm_mtype == 12) - if ($arg0 == $r9) + if ($kgm_mtype == $kgm_mtype_arm) + if (((unsigned long)$r7 < ((unsigned long) ($kgm_thread->kernel_stack+kernel_stack_size))) \ + && ((unsigned long)$r7 > (unsigned long) ($kgm_thread->kernel_stack))) set $mysp = $r7 else set $kgm_statep = (struct arm_saved_state *)$kgm_thread.machine.kstackptr @@ -371,8 +520,11 @@ define showactint end end set $prevsp = $mysp - 16 - printf "\n\t\tstacktop=0x%08x", $mysp - if ($kgm_mtype == 18) + printf "\n " + showptrhdrpad + printf " stacktop=" + showptr $mysp + if ($kgm_mtype == $kgm_mtype_ppc) set $stkmask = 0xf else set $stkmask = 0x3 @@ -380,33 +532,46 @@ define showactint set $kgm_return = 0 while ($mysp != 0) && (($mysp & $stkmask) == 0) \ && ($mysp != $prevsp) \ - && ((((unsigned) $mysp ^ (unsigned) $prevsp) < 0x2000) \ - || (((unsigned)$mysp < ((unsigned) ($kgm_thread->kernel_stack+0x4000))) \ - && ((unsigned)$mysp > (unsigned) ($kgm_thread->kernel_stack)))) - printf "\n\t\t0x%08x ", $mysp - if ($kgm_mtype == 18) + && ((((unsigned long) $mysp ^ (unsigned long) $prevsp) < 0x2000) \ + || (((unsigned long)$mysp < ((unsigned long) ($kgm_thread->kernel_stack+kernel_stack_size))) \ + && ((unsigned long)$mysp > (unsigned long) ($kgm_thread->kernel_stack)))) + printf "\n " + showptrhdrpad + printf " " + showptr $mysp + printf " " + if ($kgm_mtype == $kgm_mtype_ppc) set $kgm_return = *($mysp + 8) end - if ($kgm_mtype == 7) + if ($kgm_mtype == $kgm_mtype_i386) set $kgm_return = *($mysp + 4) end - if ($kgm_mtype == 12) + if ($kgm_mtype == $kgm_mtype_x86_64) + set $kgm_return = *(unsigned long *)($mysp + 8) + end + if ($kgm_mtype == $kgm_mtype_arm) set $kgm_return = *($mysp + 4) end - if (((unsigned) $kgm_return > (unsigned) sectPRELINKB) \ + if (((unsigned long) $kgm_return < (unsigned long) &_mh_execute_header || \ + (unsigned long) $kgm_return >= (unsigned long) &last_kernel_symbol ) \ && ($kgm_show_kmod_syms == 0)) showkmodaddr $kgm_return else - output /a (unsigned) $kgm_return + output /a $kgm_return end set $prevsp = $mysp - set $mysp = * $mysp + set $mysp = *(unsigned long *)$mysp end set $kgm_return = 0 - printf "\n\t\tstackbottom=0x%08x", $prevsp + printf "\n " + showptrhdrpad + printf " stackbottom=" + showptr $prevsp else - printf "\n\t\t\tcontinuation=" - output /a (unsigned) $kgm_thread.continuation + printf "\n " + showptrhdrpad + printf " continuation=" + output /a $kgm_thread.continuation end printf "\n" else @@ -459,7 +624,7 @@ end define showcurrentthreads set $kgm_prp = (struct processor *)processor_list while $kgm_prp != 0 - printf "Processor 0x%08x State %d (cpu_id %x)\n", $kgm_prp, ($kgm_prp)->state, ($kgm_prp)->cpu_num + printf "Processor 0x%08x State %d (cpu_id %x)\n", $kgm_prp, ($kgm_prp)->state, ($kgm_prp)->cpu_id if ($kgm_prp)->active_thread != 0 set $kgm_actp = ($kgm_prp)->active_thread showtaskheader @@ -510,7 +675,7 @@ end define showcurrentstacks set $kgm_prp = processor_list while $kgm_prp != 0 - printf "Processor 0x%08x State %d (cpu_id %x)\n", $kgm_prp, ($kgm_prp)->state, ($kgm_prp)->cpu_num + printf "Processor 0x%08x State %d (cpu_id %x)\n", $kgm_prp, ($kgm_prp)->state, ($kgm_prp)->cpu_id if ($kgm_prp)->active_thread != 0 set $kgm_actp = ($kgm_prp)->active_thread showtaskheader @@ -529,14 +694,14 @@ Syntax: (gdb) showcurrentstacks end define showwaiterheader - printf "waiters activation " - printf "thread pri state wait_queue wait_event\n" + printf "waiters thread " + printf "processor pri state wait_queue wait_event\n" end define showwaitqwaiters - set $kgm_w_waitqp = (struct wait_queue *)$arg0 + set $kgm_w_waitqp = (WaitQueue*)$arg0 set $kgm_w_linksp = &($kgm_w_waitqp->wq_queue) - set $kgm_w_wqe = (struct wait_queue_element *)$kgm_w_linksp->next + set $kgm_w_wqe = (WaitQueueElement *)$kgm_w_linksp->next set $kgm_w_found = 0 while ( (queue_entry_t)$kgm_w_wqe != (queue_entry_t)$kgm_w_linksp) if ($kgm_w_wqe->wqe_type != &_wait_queue_link) @@ -547,32 +712,32 @@ define showwaitqwaiters set $kgm_w_shuttle = (struct thread *)$kgm_w_wqe showactint $kgm_w_shuttle 0 end - set $kgm_w_wqe = (struct wait_queue_element *)$kgm_w_wqe->wqe_links.next + set $kgm_w_wqe = (WaitQueueElement *)$kgm_w_wqe->wqe_links.next end end define showwaitqwaitercount - set $kgm_wc_waitqp = (struct wait_queue *)$arg0 + set $kgm_wc_waitqp = (WaitQueue*)$arg0 set $kgm_wc_linksp = &($kgm_wc_waitqp->wq_queue) - set $kgm_wc_wqe = (struct wait_queue_element *)$kgm_wc_linksp->next + set $kgm_wc_wqe = (WaitQueueElement *)$kgm_wc_linksp->next set $kgm_wc_count = 0 while ( (queue_entry_t)$kgm_wc_wqe != (queue_entry_t)$kgm_wc_linksp) if ($kgm_wc_wqe->wqe_type != &_wait_queue_link) set $kgm_wc_count = $kgm_wc_count + 1 end - set $kgm_wc_wqe = (struct wait_queue_element *)$kgm_wc_wqe->wqe_links.next + set $kgm_wc_wqe = (WaitQueueElement *)$kgm_wc_wqe->wqe_links.next end printf "0x%08x ", $kgm_wc_count end define showwaitqmembercount - set $kgm_mc_waitqsetp = (struct wait_queue_set *)$arg0 + set $kgm_mc_waitqsetp = (WaitQueueSet*)$arg0 set $kgm_mc_setlinksp = &($kgm_mc_waitqsetp->wqs_setlinks) - set $kgm_mc_wql = (struct wait_queue_link *)$kgm_mc_setlinksp->next + set $kgm_mc_wql = (WaitQueueLink *)$kgm_mc_setlinksp->next set $kgm_mc_count = 0 while ( (queue_entry_t)$kgm_mc_wql != (queue_entry_t)$kgm_mc_setlinksp) set $kgm_mc_count = $kgm_mc_count + 1 - set $kgm_mc_wql = (struct wait_queue_link *)$kgm_mc_wql->wql_setlinks.next + set $kgm_mc_wql = (WaitQueueLink *)$kgm_mc_wql->wql_setlinks.next end printf "0x%08x ", $kgm_mc_count end @@ -584,7 +749,7 @@ define showwaitqmemberheader end define showwaitqmemberint - set $kgm_m_waitqp = (struct wait_queue *)$arg0 + set $kgm_m_waitqp = (WaitQueue*)$arg0 printf " 0x%08x ", $kgm_m_waitqp printf "0x%08x ", $kgm_m_waitqp->wq_interlock.lock_data if ($kgm_m_waitqp->wq_fifo) @@ -609,9 +774,9 @@ define showwaitqmemberofheader end define showwaitqmemberof - set $kgm_mo_waitqp = (struct wait_queue *)$arg0 + set $kgm_mo_waitqp = (WaitQueue*)$arg0 set $kgm_mo_linksp = &($kgm_mo_waitqp->wq_queue) - set $kgm_mo_wqe = (struct wait_queue_element *)$kgm_mo_linksp->next + set $kgm_mo_wqe = (WaitQueueElement *)$kgm_mo_linksp->next set $kgm_mo_found = 0 while ( (queue_entry_t)$kgm_mo_wqe != (queue_entry_t)$kgm_mo_linksp) if ($kgm_mo_wqe->wqe_type == &_wait_queue_link) @@ -619,18 +784,18 @@ define showwaitqmemberof set $kgm_mo_found = 1 showwaitqmemberofheader end - set $kgm_mo_wqlp = (struct wait_queue_link *)$kgm_mo_wqe - set $kgm_mo_wqsetp = (struct wait_queue *)($kgm_mo_wqlp->wql_setqueue) + set $kgm_mo_wqlp = (WaitQueueLink *)$kgm_mo_wqe + set $kgm_mo_wqsetp = (WaitQueue*)($kgm_mo_wqlp->wql_setqueue) showwaitqmemberint $kgm_mo_wqsetp end - set $kgm_mo_wqe = (struct wait_queue_element *)$kgm_mo_wqe->wqe_links.next + set $kgm_mo_wqe = (WaitQueueElement *)$kgm_mo_wqe->wqe_links.next end end define showwaitqmembers - set $kgm_ms_waitqsetp = (struct wait_queue_set *)$arg0 + set $kgm_ms_waitqsetp = (WaitQueueSet*)$arg0 set $kgm_ms_setlinksp = &($kgm_ms_waitqsetp->wqs_setlinks) - set $kgm_ms_wql = (struct wait_queue_link *)$kgm_ms_setlinksp->next + set $kgm_ms_wql = (WaitQueueLink *)$kgm_ms_setlinksp->next set $kgm_ms_found = 0 while ( (queue_entry_t)$kgm_ms_wql != (queue_entry_t)$kgm_ms_setlinksp) set $kgm_ms_waitqp = $kgm_ms_wql->wql_element.wqe_queue @@ -639,7 +804,7 @@ define showwaitqmembers set $kgm_ms_found = 1 end showwaitqmemberint $kgm_ms_waitqp - set $kgm_ms_wql = (struct wait_queue_link *)$kgm_ms_wql->wql_setlinks.next + set $kgm_ms_wql = (WaitQueueLink *)$kgm_ms_wql->wql_setlinks.next end end @@ -649,10 +814,10 @@ define showwaitqheader end define showwaitqint - set $kgm_waitqp = (struct wait_queue *)$arg0 + set $kgm_waitqp = (WaitQueue*)$arg0 printf "0x%08x ", $kgm_waitqp if ($kgm_waitqp->wq_type == 0xf1d1) - printf "0x%08x ", ((struct wait_queue_set *)$kgm_waitqp)->wqs_refcount + printf "0x%08x ", ((WaitQueueSet*)$kgm_waitqp)->wqs_refcount else printf "0x00000000 " end @@ -673,7 +838,7 @@ define showwaitqint end define showwaitq - set $kgm_waitq1p = (wait_queue_t)$arg0 + set $kgm_waitq1p = (WaitQueue*)$arg0 showwaitqheader showwaitqint $kgm_waitq1p if ($kgm_waitq1p->wq_type == 0xf1d1) @@ -685,64 +850,79 @@ define showwaitq end define showmapheader - printf "vm_map pmap vm_size " - printf "#ents rpage hint first_free\n" + printf "vm_map " + showptrhdrpad + printf " pmap " + showptrhdrpad + printf " vm_size " + showptrhdrpad + printf " #ents rpage hint " + showptrhdrpad + printf " first_free\n" end define showvmeheader - printf " entry start " - printf " prot #page object offset\n" + printf " entry " + showptrhdrpad + printf " start prot #page object " + showptrhdrpad + printf " offset\n" end define showvmint set $kgm_mapp = (vm_map_t)$arg0 set $kgm_map = *$kgm_mapp - printf "0x%08x ", $arg0 - printf "0x%08x ", $kgm_map.pmap - printf "0x%08x ", $kgm_map.size - printf "%3d ", $kgm_map.hdr.nentries + showptr $arg0 + printf " " + showptr $kgm_map.pmap + printf " " + showptr $kgm_map.size + printf " %3d ", $kgm_map.hdr.nentries if $kgm_map.pmap printf "%5d ", $kgm_map.pmap->stats.resident_count else printf " " end - printf "0x%08x ", $kgm_map.hint - printf "0x%08x\n", $kgm_map.first_free + showptr $kgm_map.hint + printf " " + showptr $kgm_map.first_free + printf "\n" if $arg1 != 0 - showvmeheader - set $kgm_head_vmep = &($kgm_mapp->hdr.links) - set $kgm_vmep = $kgm_map.hdr.links.next - while (($kgm_vmep != 0) && ($kgm_vmep != $kgm_head_vmep)) - set $kgm_vme = *$kgm_vmep - printf " 0x%08x ", $kgm_vmep - printf "0x%016llx ", $kgm_vme.links.start - printf "%1x", $kgm_vme.protection - printf "%1x", $kgm_vme.max_protection - if $kgm_vme.inheritance == 0x0 - printf "S" - end - if $kgm_vme.inheritance == 0x1 - printf "C" - end - if $kgm_vme.inheritance == 0x2 - printf "-" - end - if $kgm_vme.inheritance == 0x3 - printf "D" - end - if $kgm_vme.is_sub_map - printf "s " - else - if $kgm_vme.needs_copy - printf "n " - else - printf " " - end - end - printf "%5d ",($kgm_vme.links.end - $kgm_vme.links.start) >> 12 - printf "0x%08x ", $kgm_vme.object.vm_object - printf "0x%016llx\n", $kgm_vme.offset - set $kgm_vmep = $kgm_vme.links.next + showvmeheader + set $kgm_head_vmep = &($kgm_mapp->hdr.links) + set $kgm_vmep = $kgm_map.hdr.links.next + while (($kgm_vmep != 0) && ($kgm_vmep != $kgm_head_vmep)) + set $kgm_vme = *$kgm_vmep + printf " " + showptr $kgm_vmep + printf " 0x%016llx ", $kgm_vme.links.start + printf "%1x", $kgm_vme.protection + printf "%1x", $kgm_vme.max_protection + if $kgm_vme.inheritance == 0x0 + printf "S" + end + if $kgm_vme.inheritance == 0x1 + printf "C" + end + if $kgm_vme.inheritance == 0x2 + printf "-" + end + if $kgm_vme.inheritance == 0x3 + printf "D" + end + if $kgm_vme.is_sub_map + printf "s " + else + if $kgm_vme.needs_copy + printf "n " + else + printf " " + end + end + printf "%6d ",($kgm_vme.links.end - $kgm_vme.links.start) >> 12 + showptr $kgm_vme.object.vm_object + printf " 0x%016llx\n", $kgm_vme.offset + set $kgm_vmep = $kgm_vme.links.next end end printf "\n" @@ -803,50 +983,57 @@ end define showipcheader - printf "ipc_space is_table table_next " - printf "flags tsize splaytree splaybase\n" + printf "ipc_space " + showptrhdrpad + printf " is_table " + showptrhdrpad + printf " table_next" + showptrhdrpad + printf " flags tsize splaytree splaybase\n" end define showipceheader - printf " name object " - printf "rite urefs destname destination\n" + printf " name object " + showptrhdrpad + printf " rite urefs destname destination\n" end define showipceint set $kgm_ie = *(ipc_entry_t)$arg0 printf " 0x%08x ", $arg1 - printf "0x%08x ", $kgm_ie.ie_object + showptr $kgm_ie.ie_object + printf " " if $kgm_ie.ie_bits & 0x00100000 - printf "Dead " + printf "Dead " printf "%5d\n", $kgm_ie.ie_bits & 0xffff else if $kgm_ie.ie_bits & 0x00080000 - printf "SET " + printf "SET " printf "%5d\n", $kgm_ie.ie_bits & 0xffff else if $kgm_ie.ie_bits & 0x00010000 - if $kgm_ie.ie_bits & 0x00020000 - printf " SR" - else - printf " S" - end + if $kgm_ie.ie_bits & 0x00020000 + printf " SR" + else + printf " S" + end else - if $kgm_ie.ie_bits & 0x00020000 - printf " R" - end + if $kgm_ie.ie_bits & 0x00020000 + printf " R" + end end if $kgm_ie.ie_bits & 0x00040000 - printf " O" + printf " O" end if $kgm_ie.index.request - printf "n" + printf "n" else printf " " end if $kgm_ie.ie_bits & 0x00800000 - printf "c" + printf "c" else - printf " " + printf " " end printf "%5d ", $kgm_ie.ie_bits & 0xffff showportdest $kgm_ie.ie_object @@ -857,53 +1044,57 @@ end define showipcint set $kgm_isp = (ipc_space_t)$arg0 set $kgm_is = *$kgm_isp - printf "0x%08x ", $arg0 - printf "0x%08x ", $kgm_is.is_table - printf "0x%08x ", $kgm_is.is_table_next + showptr $arg0 + printf " " + showptr $kgm_is.is_table + printf " " + showptr $kgm_is.is_table_next + printf " " if $kgm_is.is_growing != 0 - printf "G" + printf "G" else - printf " " + printf " " end if $kgm_is.is_fast != 0 - printf "F" + printf "F" else - printf " " + printf " " end if $kgm_is.is_active != 0 - printf "A " + printf "A " else - printf " " + printf " " end printf "%5d ", $kgm_is.is_table_size printf "0x%08x ", $kgm_is.is_tree_total - printf "0x%08x\n", &$kgm_isp->is_tree + showptr &$kgm_isp->is_tree + printf "\n" if $arg1 != 0 - showipceheader - set $kgm_iindex = 0 - set $kgm_iep = $kgm_is.is_table - set $kgm_destspacep = (ipc_space_t)0 + showipceheader + set $kgm_iindex = 0 + set $kgm_iep = $kgm_is.is_table + set $kgm_destspacep = (ipc_space_t)0 while ( $kgm_iindex < $kgm_is.is_table_size ) - set $kgm_ie = *$kgm_iep - if $kgm_ie.ie_bits & 0x001f0000 - set $kgm_name = (($kgm_iindex << 8)|($kgm_ie.ie_bits >> 24)) - showipceint $kgm_iep $kgm_name - end - set $kgm_iindex = $kgm_iindex + 1 - set $kgm_iep = &($kgm_is.is_table[$kgm_iindex]) - end - if $kgm_is.is_tree_total - printf "Still need to write tree traversal\n" - end + set $kgm_ie = *$kgm_iep + if $kgm_ie.ie_bits & 0x001f0000 + set $kgm_name = (($kgm_iindex << 8)|($kgm_ie.ie_bits >> 24)) + showipceint $kgm_iep $kgm_name + end + set $kgm_iindex = $kgm_iindex + 1 + set $kgm_iep = &($kgm_is.is_table[$kgm_iindex]) + end + if $kgm_is.is_tree_total + printf "Still need to write tree traversal\n" + end end printf "\n" end define showipc - set $kgm_isp = (ipc_space_t)$arg0 - showipcheader - showipcint $kgm_isp 0 + set $kgm_isp = (ipc_space_t)$arg0 + showipcheader + showipcint $kgm_isp 0 end document showipc Syntax: (gdb) showipc @@ -912,7 +1103,7 @@ end define showrights set $kgm_isp = (ipc_space_t)$arg0 - showipcheader + showipcheader showipcint $kgm_isp 1 end document showrights @@ -937,7 +1128,7 @@ end define showtaskrights set $kgm_taskp = (task_t)$arg0 showtaskheader - showipcheader + showipcheader showtaskint $kgm_taskp showipcint $kgm_taskp->itk_space 1 end @@ -952,8 +1143,8 @@ define showallipc while $kgm_cur_taskp != $kgm_head_taskp showtaskheader showipcheader - showtaskint $kgm_cur_taskp - showipcint $kgm_cur_taskp->itk_space 0 + showtaskint $kgm_cur_taskp + showipcint $kgm_cur_taskp->itk_space 0 set $kgm_cur_taskp = (struct task *)($kgm_cur_taskp->tasks.next) end end @@ -969,8 +1160,8 @@ define showallrights while $kgm_cur_taskp != $kgm_head_taskp showtaskheader showipcheader - showtaskint $kgm_cur_taskp - showipcint $kgm_cur_taskp->itk_space 1 + showtaskint $kgm_cur_taskp + showipcint $kgm_cur_taskp->itk_space 1 set $kgm_cur_taskp = (struct task *)($kgm_cur_taskp->tasks.next) end end @@ -1006,18 +1197,26 @@ end define showtaskheader - printf "task vm_map ipc_space #acts " + printf "task " + showptrhdrpad + printf " vm_map " + showptrhdrpad + printf " ipc_space " + showptrhdrpad + printf " #acts " showprocheader end define showtaskint - set $kgm_task = *(struct task *)$arg0 - printf "0x%08x ", $arg0 - printf "0x%08x ", $kgm_task.map - printf "0x%08x ", $kgm_task.itk_space - printf "%3d ", $kgm_task.thread_count - showprocint $kgm_task.bsd_info + set $kgm_taskp = (struct task *)$arg0 + showptr $arg0 + printf " " + showptr $kgm_taskp->map + printf " " + showptr $kgm_taskp->itk_space + printf " %5d ", $kgm_taskp->thread_count + showprocint $kgm_taskp->bsd_info end define showtask @@ -1078,21 +1277,57 @@ end document showalltasks Syntax: (gdb) showalltasks | Routine to print a summary listing of all the tasks +| wq_state -> reports "number of workq threads", "number of scheduled workq threads", "number of pending work items" +| if "number of pending work items" seems stuck at non-zero, it may indicate that the workqueue mechanism is hung +| io_policy -> RAGE - rapid aging of vnodes requested +| NORM - normal I/O explicitly requested (this is the default) +| PASS - passive I/O requested (i.e. I/Os do not affect throttling decisions) +| THROT - throttled I/O requested (i.e. thread/task may be throttled after each I/O completes) end - define showprocheader - printf " pid proc command\n" + printf " pid process io_policy wq_state" + showptrhdrpad + printf " command\n" end define showprocint set $kgm_procp = (struct proc *)$arg0 if $kgm_procp != 0 + set $kgm_printed = 0 printf "%5d ", $kgm_procp->p_pid - printf "0x%08x ", $kgm_procp - printf "%s\n", $kgm_procp->p_comm + showptr $kgm_procp + if ($kgm_procp->p_lflag & 0x400000) + printf " RAGE " + else + printf " " + end + if ($kgm_procp->p_iopol_disk == 1) + printf "NORM " + set $kgm_printed = 1 + end + if ($kgm_procp->p_iopol_disk == 2) + printf "PASS " + set $kgm_printed = 1 + end + if ($kgm_procp->p_iopol_disk == 3) + printf "THROT " + set $kgm_printed = 1 + end + if ($kgm_printed == 0) + printf " " + end + set $kgm_wqp = (struct workqueue *)$kgm_procp->p_wqptr + if $kgm_wqp != 0 + printf " %2d %2d %2d ", $kgm_wqp->wq_nthreads, $kgm_wqp->wq_thidlecount, $kgm_wqp->wq_itemcount + else + printf " " + end + printf " %s\n", $kgm_procp->p_comm else - printf " *0* 0x00000000 --\n" + printf " *0* " + showptr 0 + printf " --\n" end end @@ -1118,7 +1353,7 @@ end define showproc showtaskheader set $kgm_procp = (struct proc *)$arg0 - showtaskint $kgm_procp->task $arg1 $arg2 + showtaskint $kgm_procp->task end @@ -1163,7 +1398,7 @@ define showkmsgint else printf "rM" end - if (($kgm_kmsgh.msgh_bits & 0xff00) == (19 < 8)) + if (($kgm_kmsgh.msgh_bits & 0xff00) == (19 << 8)) printf "lC" else printf "lM" @@ -1182,7 +1417,8 @@ end define showkobject set $kgm_portp = (struct ipc_port *)$arg0 - printf "0x%08x kobject(", $kgm_portp->ip_kobject + showptr $kgm_portp->ip_kobject + printf " kobject(" set $kgm_kotype = ($kgm_portp->ip_object.io_bits & 0x00000fff) if ($kgm_kotype == 1) printf "THREAD" @@ -1300,7 +1536,9 @@ define showportdestproc if $kgm_destprocp != 0 printf "%s(%d)\n", $kgm_destprocp->p_comm, $kgm_destprocp->p_pid else - printf "task 0x%08x\n", $kgm_desttaskp + printf "task " + showptr $kgm_desttaskp + printf "\n" end end @@ -1311,10 +1549,12 @@ define showportdest showkobject $kgm_portp else if ($kgm_portp->ip_object.io_bits & 0x80000000) - printf "0x%08x ", $kgm_portp->ip_object.io_receiver_name + showptr $kgm_portp->ip_messages.data.port.receiver_name + printf " " showportdestproc $kgm_portp else - printf "0x%08x inactive-port\n", $kgm_portp + showptr $kgm_portp + printf " inactive-port\n" end end end @@ -1322,17 +1562,13 @@ end define showportmember printf " 0x%08x ", $arg0 set $kgm_portp = (struct ipc_port *)$arg0 - printf "0x%08x ", $kgm_portp->ip_object.io_receiver_name + printf "0x%08x ", $kgm_portp->ip_messages.data.port.receiver_name if ($kgm_portp->ip_object.io_bits & 0x80000000) printf "A" else printf " " end - if ($kgm_portp->ip_object.io_bits & 0x7fff0000) - printf "Set " - else - printf "Port" - end + printf "Port" printf "%5d ", $kgm_portp->ip_object.io_references printf "0x%08x ", &($kgm_portp->ip_messages) printf "0x%08x\n", $kgm_portp->ip_messages.data.port.msgcount @@ -1342,7 +1578,7 @@ define showportint printf "0x%08x ", $arg0 set $kgm_portp = (struct ipc_port *)$arg0 printf "0x%08x ", &($kgm_portp->ip_messages) - printf "0x%08x ", $kgm_portp->ip_object.io_receiver_name + printf "0x%08x ", $kgm_portp->ip_messages.data.port.receiver_name if ($kgm_portp->ip_object.io_bits & 0x80000000) printf "A" else @@ -1369,7 +1605,7 @@ define showpsetint printf "0x%08x ", $arg0 set $kgm_psetp = (struct ipc_pset *)$arg0 printf "0x%08x ", &($kgm_psetp->ips_messages) - printf "0x%08x ", $kgm_psetp->ips_object.io_receiver_name + printf "0x%08x ", $kgm_psetp->ips_messages.data.pset.local_name if ($kgm_psetp->ips_object.io_bits & 0x80000000) printf "A" else @@ -1377,9 +1613,9 @@ define showpsetint end printf "Set " printf "%5d ", $kgm_psetp->ips_object.io_references - printf "0x%08x ", $kgm_psetp->ips_object.io_receiver_name + printf "0x%08x ", $kgm_psetp->ips_messages.data.pset.local_name set $kgm_setlinksp = &($kgm_psetp->ips_messages.data.set_queue.wqs_setlinks) - set $kgm_wql = (struct wait_queue_link *)$kgm_setlinksp->next + set $kgm_wql = (WaitQueueLink *)$kgm_setlinksp->next set $kgm_found = 0 while ( (queue_entry_t)$kgm_wql != (queue_entry_t)$kgm_setlinksp) set $kgm_portp = (struct ipc_port *)((int)($kgm_wql->wql_element->wqe_queue) - ((int)$kgm_portoff)) @@ -1390,7 +1626,7 @@ define showpsetint set $kgm_found = 1 end showportmember $kgm_portp 0 - set $kgm_wql = (struct wait_queue_link *)$kgm_wql->wql_setlinks.next + set $kgm_wql = (WaitQueueLink *)$kgm_wql->wql_setlinks.next end if !$kgm_found printf "--n/e--\n" @@ -1418,52 +1654,54 @@ end define showmqueue set $kgm_mqueue = *(struct ipc_mqueue *)$arg0 - set $kgm_psetoff = &(((struct ipc_pset *)0)->ips_messages) - set $kgm_portoff = &(((struct ipc_port *)0)->ip_messages) - if ($kgm_mqueue.data.set_queue.wqs_wait_queue.wq_type == 0xf1d1) - set $kgm_pset = (((int)$arg0) - ((int)$kgm_psetoff)) + if ($kgm_mqueue.data.pset.set_queue.wqs_wait_queue.wq_type == 0xf1d1) + set $kgm_psetoff = &(((struct ipc_pset *)0)->ips_messages) + set $kgm_pset = (((long)$arg0) - ((long)$kgm_psetoff)) showpsetheader showpsetint $kgm_pset 1 end - if ($kgm_mqueue.data.set_queue.wqs_wait_queue.wq_type == 0xf1d0) + if ($kgm_mqueue.data.pset.set_queue.wqs_wait_queue.wq_type == 0xf1d0) + set $kgm_portoff = &(((struct ipc_port *)0)->ip_messages) + set $kgm_port = (((long)$arg0) - ((long)$kgm_portoff)) showportheader - set $kgm_port = (((int)$arg0) - ((int)$kgm_portoff)) showportint $kgm_port 1 end end define zprint_one -set $kgm_zone = (struct zone *)$arg0 + set $kgm_zone = (struct zone *)$arg0 -printf "0x%08x ", $kgm_zone -printf "%8d ",$kgm_zone->count -printf "%8x ",$kgm_zone->cur_size -printf "%8x ",$kgm_zone->max_size -printf "%6d ",$kgm_zone->elem_size -printf "%8x ",$kgm_zone->alloc_size -printf "%s ",$kgm_zone->zone_name + showptr $kgm_zone + printf " %6d ",$kgm_zone->count + printf "%8x ",$kgm_zone->cur_size + printf "%8x ",$kgm_zone->max_size + printf "%6d ",$kgm_zone->elem_size + printf "%8x ",$kgm_zone->alloc_size + printf "%s ",$kgm_zone->zone_name -if ($kgm_zone->exhaustible) - printf "H" -end -if ($kgm_zone->collectable) - printf "C" -end -if ($kgm_zone->expandable) - printf "X" -end -printf "\n" + if ($kgm_zone->exhaustible) + printf "H" + end + if ($kgm_zone->collectable) + printf "C" + end + if ($kgm_zone->expandable) + printf "X" + end + printf "\n" end define zprint -printf "ZONE COUNT TOT_SZ MAX_SZ ELT_SZ ALLOC_SZ NAME\n" -set $kgm_zone_ptr = (struct zone *)first_zone -while ($kgm_zone_ptr != 0) - zprint_one $kgm_zone_ptr - set $kgm_zone_ptr = $kgm_zone_ptr->next_zone -end -printf "\n" + printf "ZONE " + showptrhdrpad + printf " COUNT TOT_SZ MAX_SZ ELT_SZ ALLOC_SZ NAME\n" + set $kgm_zone_ptr = (struct zone *)first_zone + while ($kgm_zone_ptr != 0) + zprint_one $kgm_zone_ptr + set $kgm_zone_ptr = $kgm_zone_ptr->next_zone + end + printf "\n" end document zprint Syntax: (gdb) zprint @@ -1471,29 +1709,31 @@ Syntax: (gdb) zprint end define showmtxgrp -set $kgm_mtxgrp = (struct _lck_grp_ *)$arg0 - -if ($kgm_mtxgrp->lck_grp_mtxcnt) -printf "0x%08x ", $kgm_mtxgrp -printf "%8d ",$kgm_mtxgrp->lck_grp_mtxcnt -printf "%12u ",$kgm_mtxgrp->lck_grp_stat.lck_grp_mtx_stat.lck_grp_mtx_util_cnt -printf "%8u ",$kgm_mtxgrp->lck_grp_stat.lck_grp_mtx_stat.lck_grp_mtx_miss_cnt -printf "%8u ",$kgm_mtxgrp->lck_grp_stat.lck_grp_mtx_stat.lck_grp_mtx_wait_cnt -printf "%s ",&$kgm_mtxgrp->lck_grp_name -printf "\n" -end + set $kgm_mtxgrp = (struct _lck_grp_ *)$arg0 + + if ($kgm_mtxgrp->lck_grp_mtxcnt) + showptr $kgm_mtxgrp + printf " %8d ",$kgm_mtxgrp->lck_grp_mtxcnt + printf "%12u ",$kgm_mtxgrp->lck_grp_stat.lck_grp_mtx_stat.lck_grp_mtx_util_cnt + printf "%8u ",$kgm_mtxgrp->lck_grp_stat.lck_grp_mtx_stat.lck_grp_mtx_miss_cnt + printf "%8u ",$kgm_mtxgrp->lck_grp_stat.lck_grp_mtx_stat.lck_grp_mtx_wait_cnt + printf "%s ",&$kgm_mtxgrp->lck_grp_name + printf "\n" + end end define showallmtx -printf "LCK GROUP CNT UTIL MISS WAIT NAME\n" -set $kgm_mtxgrp_ptr = (struct _lck_grp_ *)&lck_grp_queue -set $kgm_mtxgrp_ptr = (struct _lck_grp_ *)$kgm_mtxgrp_ptr->lck_grp_link.next -while ($kgm_mtxgrp_ptr != (struct _lck_grp_ *)&lck_grp_queue) - showmtxgrp $kgm_mtxgrp_ptr - set $kgm_mtxgrp_ptr = (struct _lck_grp_ *)$kgm_mtxgrp_ptr->lck_grp_link.next -end -printf "\n" + printf "LCK GROUP " + showptrhdrpad + printf " CNT UTIL MISS WAIT NAME\n" + set $kgm_mtxgrp_ptr = (struct _lck_grp_ *)&lck_grp_queue + set $kgm_mtxgrp_ptr = (struct _lck_grp_ *)$kgm_mtxgrp_ptr->lck_grp_link.next + while ($kgm_mtxgrp_ptr != (struct _lck_grp_ *)&lck_grp_queue) + showmtxgrp $kgm_mtxgrp_ptr + set $kgm_mtxgrp_ptr = (struct _lck_grp_ *)$kgm_mtxgrp_ptr->lck_grp_link.next + end + printf "\n" end document showallmtx Syntax: (gdb) showallmtx @@ -1501,29 +1741,31 @@ Syntax: (gdb) showallmtx end define showrwlckgrp -set $kgm_rwlckgrp = (struct _lck_grp_ *)$arg0 - -if ($kgm_rwlckgrp->lck_grp_rwcnt) -printf "0x%08x ", $kgm_rwlckgrp -printf "%8d ",$kgm_rwlckgrp->lck_grp_rwcnt -printf "%12u ",$kgm_rwlckgrp->lck_grp_stat.lck_grp_rw_stat.lck_grp_rw_util_cnt -printf "%8u ",$kgm_rwlckgrp->lck_grp_stat.lck_grp_rw_stat.lck_grp_rw_miss_cnt -printf "%8u ",$kgm_rwlckgrp->lck_grp_stat.lck_grp_rw_stat.lck_grp_rw_wait_cnt -printf "%s ",&$kgm_rwlckgrp->lck_grp_name -printf "\n" -end + set $kgm_rwlckgrp = (struct _lck_grp_ *)$arg0 + + if ($kgm_rwlckgrp->lck_grp_rwcnt) + showptr $kgm_rwlckgrp + printf " %8d ",$kgm_rwlckgrp->lck_grp_rwcnt + printf "%12u ",$kgm_rwlckgrp->lck_grp_stat.lck_grp_rw_stat.lck_grp_rw_util_cnt + printf "%8u ",$kgm_rwlckgrp->lck_grp_stat.lck_grp_rw_stat.lck_grp_rw_miss_cnt + printf "%8u ",$kgm_rwlckgrp->lck_grp_stat.lck_grp_rw_stat.lck_grp_rw_wait_cnt + printf "%s ",&$kgm_rwlckgrp->lck_grp_name + printf "\n" + end end define showallrwlck -printf "LCK GROUP CNT UTIL MISS WAIT NAME\n" -set $kgm_rwlckgrp_ptr = (struct _lck_grp_ *)&lck_grp_queue -set $kgm_rwlckgrp_ptr = (struct _lck_grp_ *)$kgm_rwlckgrp_ptr->lck_grp_link.next -while ($kgm_rwlckgrp_ptr != (struct _lck_grp_ *)&lck_grp_queue) - showrwlckgrp $kgm_rwlckgrp_ptr - set $kgm_rwlckgrp_ptr = (struct _lck_grp_ *)$kgm_rwlckgrp_ptr->lck_grp_link.next -end -printf "\n" + printf "LCK GROUP " + showptrhdrpad + printf " CNT UTIL MISS WAIT NAME\n" + set $kgm_rwlckgrp_ptr = (struct _lck_grp_ *)&lck_grp_queue + set $kgm_rwlckgrp_ptr = (struct _lck_grp_ *)$kgm_rwlckgrp_ptr->lck_grp_link.next + while ($kgm_rwlckgrp_ptr != (struct _lck_grp_ *)&lck_grp_queue) + showrwlckgrp $kgm_rwlckgrp_ptr + set $kgm_rwlckgrp_ptr = (struct _lck_grp_ *)$kgm_rwlckgrp_ptr->lck_grp_link.next + end + printf "\n" end document showallrwlck Syntax: (gdb) showallrwlck @@ -1564,7 +1806,7 @@ define switchtoact output/a (unsigned) $newact.continuation echo \n else - if ($kgm_mtype == 18) + if ($kgm_mtype == $kgm_mtype_ppc) if ($kdp_act_counter == 0) set $kdpstate = (struct savearea *) kdp.saved_state end @@ -1575,16 +1817,16 @@ define switchtoact set $pc=$newact->machine->pcb.save_srr0 update end - if ($kgm_mtype == 7) + if ($kgm_mtype == $kgm_mtype_i386) set $kdpstatep = (struct x86_saved_state32 *) kdp.saved_state if ($kdp_act_counter == 0) set $kdpstate = *($kdpstatep) end set $kdp_act_counter = $kdp_act_counter + 1 - set $kgm_statep = (struct x86_kernel_state32 *) \ - ($newact->kernel_stack + 0x4000 \ - - sizeof(struct x86_kernel_state32)) + set $kgm_statep = (struct x86_kernel_state *) \ + ($newact->kernel_stack + kernel_stack_size \ + - sizeof(struct x86_kernel_state)) set $kdpstatep->ebx = $kgm_statep->k_ebx set $kdpstatep->ebp = $kgm_statep->k_ebp set $kdpstatep->edi = $kgm_statep->k_edi @@ -1595,7 +1837,29 @@ define switchtoact set $pc = $kgm_statep->k_eip update end - if ($kgm_mtype == 12) + if ($kgm_mtype == $kgm_mtype_x86_64) + set $kdpstatep = (struct x86_saved_state64 *) kdp.saved_state + if ($kdp_act_counter == 0) + set $kdpstate = *($kdpstatep) + end + set $kdp_act_counter = $kdp_act_counter + 1 + + set $kgm_statep = (struct x86_kernel_state *) \ + ($newact->kernel_stack + kernel_stack_size \ + - sizeof(struct x86_kernel_state)) + set $kdpstatep->rbx = $kgm_statep->k_rbx + set $kdpstatep->rbp = $kgm_statep->k_rbp + set $kdpstatep->r12 = $kgm_statep->k_r12 + set $kdpstatep->r13 = $kgm_statep->k_r13 + set $kdpstatep->r14 = $kgm_statep->k_r14 + set $kdpstatep->r15 = $kgm_statep->k_r15 + set $kdpstatep->isf.rsp = $kgm_statep->k_rsp + flushregs + flushstack + set $pc = $kgm_statep->k_rip + update + end + if ($kgm_mtype == $kgm_mtype_arm) set $r0_save = $r0 set $r1_save = $r1 set $r2_save = $r2 @@ -1648,7 +1912,7 @@ end define switchtoctx select 0 - if ($kgm_mtype == 18) + if ($kgm_mtype == $kgm_mtype_ppc) if ($kdp_act_counter == 0) set $kdpstate = (struct savearea *) kdp.saved_state end @@ -1659,7 +1923,9 @@ define switchtoctx set $pc=((struct savearea *) $arg0)->save_srr0 update else - if ($kgm_mtype == 12) + if ($kgm_mtype == $kgm_mtype_arm) + set arm disassembler std + select-frame 0 set $r0_save = $r0 set $r1_save = $r1 set $r2_save = $r2 @@ -1712,7 +1978,7 @@ end define resetctx select 0 if ($kdp_act_counter != 0) - if ($kgm_mtype == 18) + if ($kgm_mtype == $kgm_mtype_ppc) set (struct savearea *)kdp.saved_state=$kdpstate flushregs flushstack @@ -1720,7 +1986,7 @@ define resetctx update set $kdp_act_counter = 0 end - if ($kgm_mtype == 7) + if ($kgm_mtype == $kgm_mtype_i386) set $kdpstatep = (struct x86_saved_state32 *) kdp.saved_state set *($kdpstatep)=$kdpstate flushregs @@ -1729,7 +1995,16 @@ define resetctx update set $kdp_act_counter = 0 end - if ($kgm_mtype == 12) + if ($kgm_mtype == $kgm_mtype_x86_64) + set $kdpstatep = (struct x86_saved_state64 *) kdp.saved_state + set *($kdpstatep)=$kdpstate + flushregs + flushstack + set $pc=$kdpstatep->isf.rip + update + set $kdp_act_counter = 0 + end + if ($kgm_mtype == $kgm_mtype_arm) set $r0 = $r0_save flushregs set $r1 = $r1_save @@ -1813,7 +2088,7 @@ define paniclog if *(char *)$kgm_panic_bufptr == 10 printf "\n" else - printf "%c", *$kgm_panic_bufptr + printf "%c", *(char *)$kgm_panic_bufptr end set $kgm_panic_bufptr= (char *)$kgm_panic_bufptr + 1 end @@ -1826,20 +2101,19 @@ document paniclog end define dumpcallqueue - set $kgm_callhead = (queue_t)&$arg0 - set $kgm_call = (struct call_entry *)$kgm_callhead.next + set $kgm_callhead = $arg0 + set $kgm_callentry = $kgm_callhead->next set $kgm_i = 0 - while $kgm_call != $kgm_callhead + while $kgm_callentry != $kgm_callhead + set $kgm_call = (struct call_entry *)$kgm_callentry printf "0x%08x ", $kgm_call printf "0x%08x 0x%08x ", $kgm_call->param0, $kgm_call->param1 - output $kgm_call->state - printf "\t" output $kgm_call->deadline printf "\t" output $kgm_call->func printf "\n" set $kgm_i = $kgm_i + 1 - set $kgm_call = (struct call_entry *)$kgm_call->q_link.next + set $kgm_callentry = $kgm_callentry->next end printf "%d entries\n", $kgm_i end @@ -1900,6 +2174,52 @@ define _kgm_update_loop set $kgm_update_loop_ctr = $kgm_update_loop_ctr + 1 end end +# Internal routine used by "_loadfrom" to read from 64-bit addresses +# on 32-bit kernels +define _loadk32m64 + # set up the manual KDP packet + set manual_pkt.input = 0 + set manual_pkt.len = sizeof(kdp_readmem64_req_t) + set $kgm_pkt = (kdp_readmem64_req_t *)&manual_pkt.data + set $kgm_pkt->hdr.request = KDP_READMEM64 + set $kgm_pkt->hdr.len = sizeof(kdp_readmem64_req_t) + set $kgm_pkt->hdr.is_reply = 0 + set $kgm_pkt->hdr.seq = 0 + set $kgm_pkt->hdr.key = 0 + set $kgm_pkt->address = (uint64_t)$arg0 + set $kgm_pkt->nbytes = sizeof(uint64_t) + set manual_pkt.input = 1 + # dummy to make sure manual packet is executed + set $kgm_dummy = &_mh_execute_header + set $kgm_pkt = (kdp_readmem64_reply_t *)&manual_pkt.data + if ($kgm_pkt->error == 0) + set $kgm_k32read64 = *(uint64_t *)$kgm_pkt->data + else + set $kgm_k32read64 = 0 + end +end + +# Internal routine used by "showx86backtrace" to abstract possible loads from +# user space +define _loadfrom + if (kdp_pmap == 0) + set $kgm_loadval = *(uintptr_t *)$arg0 + else + if ($kgm_x86_abi == 0xe) + set $kgm_loadval = *(uint32_t *)$arg0 + else + if ($kgm_x86_abi == 0xf) + if ($kgm_mtype == $kgm_mtype_i386) + _loadk32m64 $arg0 + set $kgm_loadval = $kgm_k32read64 + else + set $kgm_loadval = *(uint64_t *)$arg0 + end + end + end +end +end + #This is necessary since gdb often doesn't do backtraces on x86 correctly #in the absence of symbols.The code below in showuserstack and @@ -1908,36 +2228,70 @@ end #errors on x86. These errors appear on ppc as well, but they don't #always stop macro evaluation. -set $kgm_cur_ebp = 0 -set $kgm_cur_eip = 0 - +set $kgm_cur_frame = 0 +set $kgm_cur_pc = 0 +set $kgm_x86_abi = 0 define showx86backtrace - if ($kgm_cur_ebp == 0) - set $kgm_cur_ebp = $ebp - end - if ($kgm_cur_eip == 0) - set $kgm_cur_eip = $eip - end - printf "0: EBP: 0x%08x EIP: 0x%08x\n", $kgm_cur_ebp, $kgm_cur_eip - x/i $kgm_cur_eip - set $kgm_prev_ebp = *((uint32_t *) $kgm_cur_ebp) - set $kgm_prev_eip = *((uint32_t *) ($kgm_cur_ebp + 4)) - set $kgm_cur_ebp = 0 - set $kgm_cur_eip = 0 + if ($kgm_mtype == $kgm_mtype_i386) + set $kgm_frame_reg = $ebp + set $kgm_pc = $eip + set $kgm_ret_off = 4 + end + if ($kgm_mtype == $kgm_mtype_x86_64) + set $kgm_frame_reg = $rbp + set $kgm_pc = $rip + set $kgm_ret_off = 8 + end + + if ($kgm_x86_abi == 0xe) + set $kgm_ret_off = 4 + end + if ($kgm_x86_abi == 0xf) + set $kgm_ret_off = 8 + end + + if ($kgm_cur_frame == 0) + set $kgm_cur_frame = $kgm_frame_reg + end + if ($kgm_cur_pc == 0) + set $kgm_cur_pc = $kgm_pc + end + printf "0: Frame: 0x%016llx PC: 0x%016llx\n", $kgm_cur_frame, $kgm_cur_pc + if (!(($kgm_x86_abi == 0xf) && ($kgm_mtype == $kgm_mtype_i386))) + x/i $kgm_cur_pc + end + set $kgm_tmp_frame = $kgm_cur_frame + set $kgm_cur_frame = 0 + set $kgm_cur_pc = 0 + _loadfrom ($kgm_tmp_frame) + set $kgm_prev_frame = $kgm_loadval + _loadfrom ($kgm_tmp_frame+$kgm_ret_off) + set $kgm_prev_pc = $kgm_loadval set $kgm_frameno = 1 - while $kgm_prev_ebp != 0 - printf "%d: saved EBP: 0x%08x saved EIP: 0x%08x\n", $kgm_frameno, $kgm_prev_ebp, $kgm_prev_eip - x/i $kgm_prev_eip - set $kgm_prev_eip = *((uint32_t *) ($kgm_prev_ebp + 4)) - set $kgm_prev_ebp = *((uint32_t *) $kgm_prev_ebp) + while $kgm_prev_frame != 0 + printf "%d: Saved frame: 0x%016llx Saved PC: 0x%016llx\n", $kgm_frameno, $kgm_prev_frame, $kgm_prev_pc + if (!(($kgm_x86_abi == 0xf) && ($kgm_mtype == $kgm_mtype_i386))) + x/i $kgm_prev_pc + end + _loadfrom ($kgm_prev_frame+$kgm_ret_off) + set $kgm_prev_pc = $kgm_loadval + _loadfrom ($kgm_prev_frame) + set $kgm_prev_frame = $kgm_loadval set $kgm_frameno = $kgm_frameno + 1 end set kdp_pmap = 0 + set $kgm_x86_abi = 0 +end + +define showx86backtrace2 + set $kgm_cur_frame = $arg0 + set $kgm_cur_pc = $arg1 + showx86backtrace end define showuserstack select 0 - if ($kgm_mtype == 18) + if ($kgm_mtype == $kgm_mtype_ppc) if ($kdp_act_counter == 0) set $kdpstate = (struct savearea *) kdp.saved_state end @@ -1967,18 +2321,30 @@ define showuserstack _kgm_update_loop end else - if ($kgm_mtype == 7) + if (($kgm_mtype & $kgm_mtype_x86_mask) == $kgm_mtype_x86_any) set $newact = (struct thread *) $arg0 -#This needs to identify 64-bit processes as well - set $newiss = (x86_saved_state32_t) ($newact->machine.pcb->iss.uss.ss_32) - set $checkpc = $newiss.eip + set $newiss = (x86_saved_state_t *) ($newact->machine.pcb->iss) + set $kgm_x86_abi = $newiss.flavor + if ($newiss.flavor == 0xf) + set $checkpc = $newiss.uss.ss_64.isf.rip + set $checkframe = $newiss.uss.ss_64.rbp + + else + set $checkpc = $newiss.uss.ss_32.eip + set $checkframe = $newiss.uss.ss_32.ebp + end + if ($checkpc == 0) echo This activation does not appear to have echo \20 a valid user context.\n else - set $kgm_cur_ebp = $newiss.ebp - set $kgm_cur_eip = $checkpc - printf "You may now issue the showx86backtrace command to see the user space backtrace for this thread (0x%08x); you can also examine memory locations in this address space (pmap 0x%08x) before issuing the backtrace. This two-step process is necessary to work around various bugs in x86 gdb, which cause it to stop memory evaluation on spurious memory read errors. Additionally, you may need to issue a set kdp_pmap = 0 command after the showx86backtrace completes, to resume reading from the kernel address space.\n", $arg0, $newact->task->map->pmap + set $kgm_cur_frame = $checkframe + set $kgm_cur_pc = $checkpc + printf "You may now issue the showx86backtrace command to see the user space backtrace for this thread (" + showptr $arg0 + printf "); you can also examine memory locations in this address space (pmap " + showptr $newact->task->map->pmap + printf ") before issuing the backtrace. This two-step process is necessary to work around various bugs in x86 gdb, which cause it to stop memory evaluation on spurious memory read errors. Additionally, you may need to issue a set kdp_pmap = 0 command after the showx86backtrace completes, to resume reading from the kernel address space.\n" set kdp_pmap = $newact->task->map->pmap _kgm_flush_loop _kgm_update_loop @@ -2059,15 +2425,15 @@ define switchtocorethread output/a (unsigned) $newact.continuation echo \n else - if ($kgm_mtype == 18) + if ($kgm_mtype == $kgm_mtype_ppc) loadcontext $newact->machine->pcb flushstack set $pc = $newact->machine->pcb.save_srr0 else - if ($kgm_mtype == 7) - set $kgm_cstatep = (struct x86_kernel_state32 *) \ - ($newact->kernel_stack + 0x4000 \ - - sizeof(struct x86_kernel_state32)) + if (($kgm_mtype & $kgm_mtype_x86_mask) == $kgm_mtype_x86_any) + set $kgm_cstatep = (struct x86_kernel_state *) \ + ($newact->kernel_stack + kernel_stack_size \ + - sizeof(struct x86_kernel_state)) loadcontext $kgm_cstatep flushstack else @@ -2091,7 +2457,7 @@ end define loadcontext select 0 - if ($kgm_mtype == 18) + if ($kgm_mtype == $kgm_mtype_ppc) set $kgm_contextp = (struct savearea *) $arg0 set $pc = $kgm_contextp.save_srr0 set $r1 = $kgm_contextp.save_r1 @@ -2131,27 +2497,39 @@ define loadcontext set $cr = $kgm_contextp.save_cr set $ctr = $kgm_contextp.save_ctr else - if ($kgm_mtype == 7) - set $kgm_contextp = (struct x86_kernel_state32 *) $arg0 + if ($kgm_mtype == $kgm_mtype_i386) + set $kgm_contextp = (struct x86_kernel_state *) $arg0 set $ebx = $kgm_contextp->k_ebx set $ebp = $kgm_contextp->k_ebp set $edi = $kgm_contextp->k_edi set $esi = $kgm_contextp->k_esi set $eip = $kgm_contextp->k_eip set $pc = $kgm_contextp->k_eip + else + if ($kgm_mtype == $kgm_mtype_x86_64) + set $kgm_contextp = (struct x86_kernel_state *) $arg0 + set $rbx = $kgm_contextp->k_rbx + set $rbp = $kgm_contextp->k_rbp + set $r12 = $kgm_contextp->k_r12 + set $r13 = $kgm_contextp->k_r13 + set $r14 = $kgm_contextp->k_r14 + set $r15 = $kgm_contextp->k_r15 + set $rip = $kgm_contextp->k_rip + set $pc = $kgm_contextp->k_rip else echo loadcontext not supported on this architecture\n end end + end end define resetcorectx select 0 - if ($kgm_mtype == 18) + if ($kgm_mtype == $kgm_mtype_ppc) set $kgm_corecontext = (struct savearea *) kdp.saved_state loadcontext $kgm_corecontext else - if ($kgm_mtype == 7) + if ($kgm_mtype == $kgm_mtype_i386) set $kdpstatep = (struct x86_saved_state32 *) kdp.saved_state set $ebx = $kdpstatep->ebx set $ebp = $kdpstatep->ebp @@ -2226,17 +2604,18 @@ define showgdbthread printf "\n\t\treserved_stack=0x%08x", $kgm_thread.reserved_stack end printf "\n\t\tkernel_stack=0x%08x", $kgm_thread.kernel_stack - if ($kgm_mtype == 18) + if ($kgm_mtype == $kgm_mtype_ppc) set $mysp = $kgm_thread.machine.pcb->save_r1 end - if ($kgm_mtype == 7) - set $kgm_statep = (struct x86_kernel_state32 *) \ - ($kgm_thread->kernel_stack + 0x4000 \ - - sizeof(struct x86_kernel_state32)) + if ($kgm_mtype == $kgm_mtype_i386) + set $kgm_statep = (struct x86_kernel_state *) \ + ($kgm_thread->kernel_stack + kernel_stack_size \ + - sizeof(struct x86_kernel_state)) set $mysp = $kgm_statep->k_ebp end - if ($kgm_mtype == 12) - if ($arg0 == $r9) + if ($kgm_mtype == $kgm_mtype_arm) + if (((unsigned long)$r7 < ((unsigned long) ($kgm_thread->kernel_stack+kernel_stack_size))) \ + && ((unsigned long)$r7 > (unsigned long) ($kgm_thread->kernel_stack))) set $mysp = $r7 else set $kgm_statep = (struct arm_saved_state *)$kgm_thread.machine.kstackptr @@ -2328,7 +2707,7 @@ end define switchtouserthread select 0 - if ($kgm_mtype == 18) + if ($kgm_mtype == $kgm_mtype_ppc) if ($kdp_act_counter == 0) set $kdpstate = (struct savearea *) kdp.saved_state end @@ -2404,6 +2783,20 @@ define showboolean end end +define showdatabytes + set $kgm_data = (OSData *)$arg0 + + printf "<" + set $kgm_datap = (const unsigned char *) $kgm_data->data + set $kgm_idx = 0 + while ( $kgm_idx < $kgm_data->length ) + printf "%02X", *$kgm_datap + set $kgm_datap = $kgm_datap + 1 + set $kgm_idx = $kgm_idx + 1 + end + printf ">\n" +end + define showdata set $kgm_data = (OSData *)$arg0 @@ -2546,55 +2939,68 @@ define showobjectint set $kgm_obj = (OSObject *) $arg1 set $kgm_vt = *((void **) $arg1) - if ($kgm_mtype == 12) + if ($kgm_lp64 || $kgm_mtype == $kgm_mtype_arm) set $kgm_vt = $kgm_vt - 2 * sizeof(void *) end if ($kgm_show_object_addrs) - printf "`object %p, vt ", $arg1 - output /a (unsigned) $kgm_vt - if ($kgm_show_object_retain) - printf ", retain count %d, container retain %d", (0xffff & $kgm_obj->retainCount), $kgm_obj->retainCount >> 16 - end - printf "` " + printf "`object " + showptr $arg1 + printf ", vt " + output /a (unsigned long) $kgm_vt + if ($kgm_show_object_retain) + printf ", retain count %d, container retain %d", (0xffff & $kgm_obj->retainCount), $kgm_obj->retainCount >> 16 + end + printf "` " end - if ($kgm_vt == _ZTV8OSString) - showstring $arg1 - else - if ($kgm_vt == _ZTV8OSSymbol) + # No multiple-inheritance + set $kgm_shown = 0 + if ($kgm_vt == &_ZTV8OSString) + showstring $arg1 + set $kgm_shown = 1 + end + if ($kgm_vt == &_ZTV8OSSymbol) showstring $arg1 - else - if ($kgm_vt == _ZTV8OSNumber) + set $kgm_shown = 1 + end + if ($kgm_vt == &_ZTV8OSNumber) shownumber $arg1 - else - if ($kgm_vt == _ZTV6OSData) - showdata $arg1 - else - if ($kgm_vt == _ZTV9OSBoolean) - showboolean $arg1 - else - if ($kgm_vt == _ZTV12OSDictionary) - showdictionaryint _$arg0 $arg1 - else - if ($kgm_vt == _ZTV7OSArray) - showarrayint _$arg0 $arg1 - else - if ($kgm_vt == _ZTV5OSSet) - showsetint _$arg0 $arg1 - else - if ($kgm_show_object_addrs == 0) - printf "`object %p, vt ", $arg1 - output /a (unsigned) $kgm_vt - printf "`" - end - end - end - end - end - end - end - end + set $kgm_shown = 1 + end + if ($kgm_vt == &_ZTV6OSData) + if $kgm_show_data_alwaysbytes == 1 + showdatabytes $arg1 + else + showdata $arg1 + end + set $kgm_shown = 1 + end + if ($kgm_vt == &_ZTV9OSBoolean) + showboolean $arg1 + set $kgm_shown = 1 + end + if ($kgm_vt == &_ZTV12OSDictionary) + showdictionaryint _$arg0 $arg1 + set $kgm_shown = 1 + end + if ($kgm_vt == &_ZTV7OSArray) + showarrayint _$arg0 $arg1 + set $kgm_shown = 1 + end + if ($kgm_vt == &_ZTV5OSSet) + showsetint _$arg0 $arg1 + set $kgm_shown = 1 + end + + if ($kgm_shown != 1) + if ($kgm_show_object_addrs == 0) + printf "`object " + showptr $arg1 + printf ", vt " + output /a (unsigned long) $kgm_vt + printf "`" + end end end @@ -2627,7 +3033,7 @@ define dictget end -define showregistryentryrecurse +define _registryentryrecurseinit set $kgm_re = (IOService *)$arg1 set $kgm$arg0_stack = (unsigned long long) $arg2 @@ -2651,9 +3057,11 @@ define showregistryentryrecurse else set $kgm$arg0_stack = $kgm$arg0_stack & ~(2ULL << $kgm_reg_depth) end +end - indent $kgm_reg_depth $kgm$arg0_stack - printf "+-o " +define findregistryentryrecurse + set $kgm_registry_entry = 0 + _registryentryrecurseinit $arg0 $arg1 $arg2 $arg3 dictget $kgm_re->fRegistryTable $kgm_namekey if ($kgm_result == 0) @@ -2664,71 +3072,289 @@ define showregistryentryrecurse end if ($kgm_result != 0) - printf "%s", ((OSString *)$kgm_result)->string - else - if (((IOService*)$kgm_re)->pwrMgt && ((IOService*)$kgm_re)->pwrMgt->Name) - printf "%s", ((IOService*)$kgm_re)->pwrMgt->Name - else -# printf ", guessclass " -# guessclass $kgm_re - printf "??" - end - end + set $str = ((OSString *) $kgm_result)->string + strcmp_nomalloc $str $kgm_reg_find_str0 $kgm_reg_find_str1 $kgm_reg_find_str2 $kgm_reg_find_str3 $kgm_reg_find_str4 $kgm_reg_find_str5 $kgm_reg_find_str6 $kgm_reg_find_str7 $kgm_reg_find_str8 + if $kgm_findregistry_verbose + echo . + end + if $kgm_strcmp_result == 0 + if $kgm_findregistry_verbose + printf "\n%s:\n | ", ((OSString *) $kgm_result)->string + showobject $kgm_re + printf " | " + print $kgm_re + end + + # if we want to show everything, then don't populate $kgm_registry_entry + if !$kgm_findregistry_continue + set $kgm_registry_entry = $kgm_re + end + end + end - printf " array[$kgm$arg0_child_idx++] + set $kgm_more_sib = ($kgm$arg0_child_idx < $kgm$arg0_child_count) + if $kgm_reg_depth >= $kgm_reg_depth_max + 1 + loop_break + end + findregistryentryrecurse _$arg0 $kgm_re $kgm$arg0_stack $kgm_more_sib + if $kgm_registry_entry + loop_break + end + end + set $kgm_reg_depth = $kgm_reg_depth - 1 end - output /a $kgm_vt +end - if ($kgm_vt != _ZTV15IORegistryEntry) - printf ", " - set $kgm_state = $kgm_re->__state[0] - # kIOServiceRegisteredState - if (0 == ($kgm_state & 2)) - printf "!" - end - printf "registered, " - # kIOServiceMatchedState - if (0 == ($kgm_state & 4)) - printf "!" - end - printf "matched, " - # kIOServiceInactiveState - if ($kgm_state & 1) - printf "in" +define findregdictvalue + set $kgm_registry_value = 0 + set $kgm_reg_idx = 0 + while ($kgm_reg_idx < $arg0->count) + set $kgm_obj = $arg0->dictionary + $kgm_reg_idx + set $str = ((OSString *)$kgm_obj->key)->string + strcmp_nomalloc $str $kgm_reg_find_str0 $kgm_reg_find_str1 $kgm_reg_find_str2 $kgm_reg_find_str3 $kgm_reg_find_str4 $kgm_reg_find_str5 $kgm_reg_find_str6 $kgm_reg_find_str7 $kgm_reg_find_str8 + + if $kgm_strcmp_result == 0 + set $kgm_registry_value = $kgm_obj->value + if $kgm_findregistry_verbose + showobject $kgm_registry_value + print $kgm_registry_value + end + loop_break end - printf "active, busy %d, retain count %d", (0xff & $kgm_re->__state[1]), (0xffff & $kgm_re->retainCount) + set $kgm_reg_idx = $kgm_reg_idx + 1 end - printf ">\n" +end - if ($kgm_show_props) - set $kgm_props = $kgm_re->fPropertyTable - showregdictionary $kgm_props $kgm$arg0_stack +define setfindregistrystr + set $kgm_reg_find_str0 = 0 + set $kgm_reg_find_str1 = 0 + set $kgm_reg_find_str2 = 0 + set $kgm_reg_find_str3 = 0 + set $kgm_reg_find_str4 = 0 + set $kgm_reg_find_str5 = 0 + set $kgm_reg_find_str6 = 0 + set $kgm_reg_find_str7 = 0 + set $kgm_reg_find_str8 = 0 + + if $argc > 0 + set $kgm_reg_find_str0 = $arg0 + end + if $argc > 1 + set $kgm_reg_find_str1 = $arg1 + end + if $argc > 2 + set $kgm_reg_find_str2 = $arg2 + end + if $argc > 3 + set $kgm_reg_find_str3 = $arg3 + end + if $argc > 4 + set $kgm_reg_find_str4 = $arg4 + end + if $argc > 5 + set $kgm_reg_find_str5 = $arg5 + end + if $argc > 6 + set $kgm_reg_find_str6 = $arg6 + end + if $argc > 7 + set $kgm_reg_find_str7 = $arg7 + end + if $argc > 8 + set $kgm_reg_find_str8 = $arg8 + end +end + +document setfindregistrystr +Syntax: (gdb) setfindregistrystr [a] [b] [c] [d] [e] [f] [g] [h] [i] +| Store an encoded string into up to 9 arguments for use by +| findregistryprop or findregistryentry. The arguments are created +| through calls to strcmp_arg_pack64 +end + +define _findregistryprop + set $reg = (IOService *) $arg0 + set $kgm_props = $reg->fPropertyTable + set $kgm_findregistry_verbose = 0 + + findregdictvalue $kgm_props +end + +define findregistryprop + set $reg = (IOService *) $arg0 + set $kgm_props = $reg->fPropertyTable + + set $kgm_findregistry_verbose = 1 + findregdictvalue $kgm_props +end + +document findregistryprop +Syntax: (gdb) findregistryprop +| Given a registry entry, print out the contents for the property that matches +| the encoded string specified via setfindregistrystr. +| +| For example, the following will print out the "intel-pic" property stored in +| the AppleACPIPlatformExpert registry entry $pe_entry: +| strcmp_arg_pack64 'i' 'n' 't' 'e' 'l' '-' 'p' 'i' +| set $intel_pi = $kgm_strcmp_arg +| strcmp_arg_pack64 'c' 0 0 0 0 0 0 0 +| set $c = $kgm_strcmp_arg +| setfindregistrystr $intel_pi $c +| findregistryprop $pe_entry +end + +define findregistryentryint + set $kgm_namekey = (OSSymbol *) $kgm_reg_plane->nameKey + set $kgm_childkey = (OSSymbol *) $kgm_reg_plane->keys[1] + if $kgm_findregistry_verbose + printf "Searching" + end + findregistryentryrecurse _ $arg0 0 0 +end + +define _findregistryentry + set $kgm_findregistry_verbose = 0 + set $kgm_findregistry_continue = 0 + set $kgm_reg_depth = 0 + + findregistryentryint gRegistryRoot +end + +define findregistryentry + set $kgm_findregistry_verbose = 1 + set $kgm_findregistry_continue = 0 + set $kgm_reg_depth = 0 + + findregistryentryint gRegistryRoot +end + +define findregistryentries + set $kgm_findregistry_verbose = 1 + set $kgm_findregistry_continue = 1 + set $kgm_reg_depth = 0 + + findregistryentryint gRegistryRoot +end + +document findregistryentry +Syntax: (gdb) findregistryentry +| Search for a registry entry that matches the encoded string specified through +| setfindregistrystr. You can alter the search depth through use of +| $kgm_reg_depth_max. +| +| For example, the following will pull out the AppleACPIPlatformExpert registry +| entry: +| strcmp_arg_pack64 'A' 'p' 'p' 'l' 'e' 'A' 'C' 'P' +| set $AppleACP = $kgm_strcmp_arg +| strcmp_arg_pack64 'I' 'P' 'l' 'a' 't' 'f' 'o' 'r' +| set $IPlatfor = $kgm_strcmp_arg +| strcmp_arg_pack64 'm' 'E' 'x' 'p' 'e' 'r' 't' 0 +| set $mExpert = $kgm_strcmp_arg +| setfindregistrystr $AppleACP $IPlatfor $mExpert +| findregistryentry +end + +document findregistryentries +Syntax: (gdb) findregistryentries +| Search for all registry entries that match the encoded string specified through +| setfindregistrystr. You can alter the search depth through use of +| $kgm_reg_depth_max. See findregistryentry for an example of how to encode a string. +end + + +define showregistryentryrecurse + _registryentryrecurseinit $arg0 $arg1 $arg2 $arg3 + + indent $kgm_reg_depth $kgm$arg0_stack + printf "+-o " + + dictget $kgm_re->fRegistryTable $kgm_namekey + if ($kgm_result == 0) + dictget $kgm_re->fRegistryTable gIONameKey + end + if ($kgm_result == 0) + dictget $kgm_re->fPropertyTable gIOClassKey + end + + if ($kgm_result != 0) + printf "%s", ((OSString *)$kgm_result)->string + else + if (((IOService*)$kgm_re)->pwrMgt && ((IOService*)$kgm_re)->pwrMgt->Name) + printf "%s", ((IOService*)$kgm_re)->pwrMgt->Name + else +# printf ", guessclass " +# guessclass $kgm_re + printf "??" + end + end + + + printf " IORegistryEntry::reserved->fRegistryEntryID + printf "vtable " + set $kgm_vt = (unsigned long) *(void**) $kgm_re + if ($kgm_lp64 || $kgm_mtype == $kgm_mtype_arm) + set $kgm_vt = $kgm_vt - 2 * sizeof(void *) + end + output /a $kgm_vt + + if ($kgm_vt != &_ZTV15IORegistryEntry) + printf ", " + set $kgm_state = $kgm_re->__state[0] + # kIOServiceRegisteredState + if (0 == ($kgm_state & 2)) + printf "!" + end + printf "registered, " + # kIOServiceMatchedState + if (0 == ($kgm_state & 4)) + printf "!" + end + printf "matched, " + # kIOServiceInactiveState + if ($kgm_state & 1) + printf "in" + end + printf "active, busy %d, retain count %d", (0xff & $kgm_re->__state[1]), (0xffff & $kgm_re->retainCount) + end + printf ">\n" + + if ($kgm_show_props) + set $kgm_props = $kgm_re->fPropertyTable + showregdictionary $kgm_props $kgm$arg0_stack end # recurse if ($kgm$arg0_child_count != 0) - set $kgm_reg_depth = $kgm_reg_depth + 1 - set $kgm$arg0_child_idx = 0 + set $kgm_reg_depth = $kgm_reg_depth + 1 + set $kgm$arg0_child_idx = 0 - while ($kgm$arg0_child_idx < $kgm$arg0_child_count) - set $kgm_re = $kgm$arg0_child_array->array[$kgm$arg0_child_idx++] - set $kgm_more_sib = ($kgm$arg0_child_idx < $kgm$arg0_child_count) - showregistryentryrecurse _$arg0 $kgm_re $kgm$arg0_stack $kgm_more_sib - end + while ($kgm$arg0_child_idx < $kgm$arg0_child_count) + set $kgm_re = $kgm$arg0_child_array->array[$kgm$arg0_child_idx++] + set $kgm_more_sib = ($kgm$arg0_child_idx < $kgm$arg0_child_count) + if $kgm_reg_depth >= $kgm_reg_depth_max + 1 + loop_break + end + showregistryentryrecurse _$arg0 $kgm_re $kgm$arg0_stack $kgm_more_sib + end - set $kgm_reg_depth = $kgm_reg_depth - 1 + set $kgm_reg_depth = $kgm_reg_depth - 1 end end define showregistryentryint - set $kgm_namekey = (OSSymbol *) $kgm_reg_plane[2] - set $kgm_childkey = (OSSymbol *) $kgm_reg_plane[4] + set $kgm_namekey = (OSSymbol *) $kgm_reg_plane->nameKey + set $kgm_childkey = (OSSymbol *) $kgm_reg_plane->keys[1] showregistryentryrecurse _ $arg0 0 0 end @@ -2740,7 +3366,8 @@ define showregistry end document showregistry Syntax: (gdb) showregistry -| Show info about all registry entries in the current plane. +| Show info about all registry entries in the current plane. You can specify the maximum +| display depth with $kgm_reg_depth_max. end define showregistryprops @@ -2766,11 +3393,11 @@ Syntax: (gdb) showregistryentry end define setregistryplane - if ($arg0) - set $kgm_reg_plane = (void **) $arg0 + if ($arg0 != 0) + set $kgm_reg_plane = (IORegistryPlane *) $arg0 else - showobjectint _ gIORegistryPlanes - printf "\n" + showobjectint _ gIORegistryPlanes + printf "\n" end end document setregistryplane @@ -2831,12 +3458,12 @@ define showosobjecttracking showobject $obj set $kgm_idx = 0 while $kgm_idx < (sizeof($kgm_next->bt) / sizeof($kgm_next->bt[0])) - if ((unsigned) $kgm_next->bt[$kgm_idx] > (unsigned) sectPRELINKB) + if ((unsigned long) $kgm_next->bt[$kgm_idx] > (unsigned long) &last_kernel_symbol) showkmodaddr $kgm_next->bt[$kgm_idx] printf "\n" else - if ((unsigned) $kgm_next->bt[$kgm_idx] > 0) - output /a (unsigned) $kgm_next->bt[$kgm_idx] + if ((unsigned long) $kgm_next->bt[$kgm_idx] > 0) + output /a $kgm_next->bt[$kgm_idx] printf "\n" end end @@ -2854,32 +3481,152 @@ Syntax: (gdb) showosobjecttracking | Set gOSObjectTrackThread to 1 or a thread_t to capture new OSObjects allocated by a thread or all threads. end -define readphys - set kdp_trans_off = 1 - x/x $arg0 - set kdp_trans_off = 0 +define readphysint + set $kgm_readphysint_result = 0xBAD10AD + # set up the manual KDP packet + set manual_pkt.input = 0 + set manual_pkt.len = sizeof(kdp_readphysmem64_req_t) + set $kgm_pkt = (kdp_readphysmem64_req_t *)&manual_pkt.data + set $kgm_pkt->hdr.request = KDP_READPHYSMEM64 + set $kgm_pkt->hdr.len = sizeof(kdp_readphysmem64_req_t) + set $kgm_pkt->hdr.is_reply = 0 + set $kgm_pkt->hdr.seq = 0 + set $kgm_pkt->hdr.key = 0 + set $kgm_pkt->address = (uint64_t)$arg0 + set $kgm_pkt->nbytes = $arg1 >> 3 + set $kgm_pkt->lcpu = $arg2 + set manual_pkt.input = 1 + # dummy to make sure manual packet is executed + set $kgm_dummy = &_mh_execute_header + set $kgm_pkt = (kdp_readphysmem64_reply_t *)&manual_pkt.data + if ($kgm_pkt->error == 0) + if $arg1 == 8 + set $kgm_readphysint_result = *((uint8_t *)$kgm_pkt->data) + end + if $arg1 == 16 + set $kgm_readphysint_result = *((uint16_t *)$kgm_pkt->data) + end + if $arg1 == 32 + set $kgm_readphysint_result = *((uint32_t *)$kgm_pkt->data) + end + if $arg1 == 64 + set $kgm_readphysint_result = *((uint64_t *)$kgm_pkt->data) + end + end +end + +define readphys8 + readphysint $arg0 8 $kgm_lcpu_self + output /a $arg0 + printf ":\t0x%02hhx\n", $kgm_readphysint_result + set $kgm_readphys_result = (uint64_t)$kgm_readphysint_result +end + +define readphys16 + readphysint $arg0 16 $kgm_lcpu_self + output /a $arg0 + printf ":\t0x%04hx\n", $kgm_readphysint_result + set $kgm_readphys_result = (uint64_t)$kgm_readphysint_result +end + +define readphys32 + readphysint $arg0 32 $kgm_lcpu_self + output /a $arg0 + printf ":\t0x%08x\n", $kgm_readphysint_result + set $kgm_readphys_result = (uint64_t)$kgm_readphysint_result end define readphys64 - if ($kgm_mtype == 18) - set kdp_src_high32 = ((uint32_t) ($arg0)) >> 32 - x/x (uint32_t) (($arg0) & 0x00000000ffffffffUL) - set kdp_src_high32 = 0 - else - echo readphys64 not available on this architecture.\n - end + readphysint $arg0 64 $kgm_lcpu_self + output /a $arg0 + printf ":\t0x%016llx\n", $kgm_readphysint_result + set $kgm_readphys_result = (uint64_t)$kgm_readphysint_result +end + +define readphys + readphys32 $arg0 +end + +document readphys8 +| See readphys64 +end + +document readphys16 +| See readphys64 end -document readphys -| The argument is interpreted as a physical address, and the word addressed is -| displayed. While this fails if no physical page exists at the given address, -| it must be used with caution. +document readphys32 +| See readphys64 end document readphys64 -| The argument is interpreted as a 64-bit physical address, and the word -| addressed is displayed. While this fails if no physical page exists at the -| given address, it must be used with caution. +| The argument is interpreted as a physical address, and the 64-bit word +| addressed is displayed. Saves 64-bit result in $kgm_readphys_result. +end + +define writephysint + # set up the manual KDP packet + set manual_pkt.input = 0 + set manual_pkt.len = sizeof(kdp_writephysmem64_req_t) + set $kgm_pkt = (kdp_writephysmem64_req_t *)&manual_pkt.data + set $kgm_pkt->hdr.request = KDP_WRITEPHYSMEM64 + set $kgm_pkt->hdr.len = sizeof(kdp_writephysmem64_req_t) + set $kgm_pkt->hdr.is_reply = 0 + set $kgm_pkt->hdr.seq = 0 + set $kgm_pkt->hdr.key = 0 + set $kgm_pkt->address = (uint64_t)$arg0 + set $kgm_pkt->nbytes = $arg1 >> 3 + set $kgm_pkt->lcpu = $arg3 + if $arg1 == 8 + set *(uint8_t *)$kgm_pkt->data = (uint8_t)$arg2 + end + if $arg1 == 16 + set *(uint16_t *)$kgm_pkt->data = (uint16_t)$arg2 + end + if $arg1 == 32 + set *(uint32_t *)$kgm_pkt->data = (uint32_t)$arg2 + end + if $arg1 == 64 + set *(uint64_t *)$kgm_pkt->data = (uint64_t)$arg2 + end + set manual_pkt.input = 1 + # dummy to make sure manual packet is executed + set $kgm_dummy = &_mh_execute_header + set $kgm_pkt = (kdp_writephysmem64_reply_t *)&manual_pkt.data + set $kgm_writephysint_result = $kgm_pkt->error +end + +define writephys8 + writephysint $arg0 8 $arg1 $kgm_lcpu_self +end + +define writephys16 + writephysint $arg0 16 $arg1 $kgm_lcpu_self +end + +define writephys32 + writephysint $arg0 32 $arg1 $kgm_lcpu_self +end + +define writephys64 + writephysint $arg0 64 $arg1 $kgm_lcpu_self +end + +document writephys8 +| See writephys64 +end + +document writephys16 +| See writephys64 +end + +document writephys32 +| See writephys64 +end + +document writephys64 +| The argument is interpreted as a physical address, and the second argument is +| written to that address as a 64-bit word. end define addkextsyms @@ -2910,8 +3657,20 @@ Syntax: (gdb) showprocfiles end define _showprocheader - printf "fd fileglob fg flags fg type fg data info\n" - printf "----- ---------- ---------- -------- ---------- -------------------\n" + printf "fd fileglob " + showptrhdrpad + printf " fg flags fg type fg data " + showptrhdrpad + printf " info\n" + printf "----- ----------" + if $kgm_lp64 + printf "--------" + end + printf " ---------- -------- ----------" + if $kgm_lp64 + printf "--------" + end + printf " -------------------\n" end define _showprocfiles @@ -2927,7 +3686,9 @@ define _showprocfiles # display fd #, fileglob address, fileglob flags set $kgm_spf_flags = $kgm_spf_ofiles[$kgm_spf_count].f_flags set $kgm_spf_fg = $kgm_spf_ofiles[$kgm_spf_count].f_fglob - printf "%-5d 0x%08x 0x%08x ", $kgm_spf_count, $kgm_spf_fg, $kgm_spf_flags + printf "%-5d ", $kgm_spf_count + showptr $kgm_spf_fg + printf " 0x%08x ", $kgm_spf_flags # decode fileglob type set $kgm_spf_fgt = $kgm_spf_fg->fg_type if ($kgm_spf_fgt == 1) @@ -2958,7 +3719,9 @@ define _showprocfiles # display fileglob data address and decode interesting fact(s) # about data, if we know any set $kgm_spf_fgd = $kgm_spf_fg->fg_data - printf " 0x%08x ", $kgm_spf_fgd + printf " " + showptr $kgm_spf_fgd + printf " " if ($kgm_spf_fgt == 1) set $kgm_spf_name = ((struct vnode *)$kgm_spf_fgd)->v_name if ($kgm_spf_name == 0) @@ -3029,17 +3792,23 @@ define _showproclocks end set $kgm_spl_count = $kgm_spf_count + 1 end - printf "%d total locks for 0x%08x\n", $kgm_spl_seen, $arg0 + printf "%d total locks for ", $kgm_spl_seen + showptr $arg0 + printf "\n" end define showprocinfo set $kgm_spi_proc = (proc_t)$arg0 - printf "Process 0x%08x\n", $kgm_spi_proc + printf "Process " + showptr $kgm_spi_proc + printf "\n" printf " name %s\n", $kgm_spi_proc->p_comm - printf " pid:%.8d", $kgm_spi_proc->p_pid - printf " task:0x%.8x", $kgm_spi_proc->task - printf " p_stat:%.1d", $kgm_spi_proc->p_stat - printf " parent pid:%.8d", $kgm_spi_proc->p_ppid + printf " pid:%.8d", $kgm_spi_proc->p_pid + printf " task:" + showptr $kgm_spi_proc->task + printf " p_stat:%.1d", $kgm_spi_proc->p_stat + printf " parent pid:%.8d", $kgm_spi_proc->p_ppid + printf "\n" # decode part of credential set $kgm_spi_cred = $kgm_spi_proc->p_ucred if ($kgm_spi_cred != 0) @@ -3125,7 +3894,7 @@ define showprocinfo printf " 0x00800000 - signal exceptions\n" end if ($kgm_spi_flag & 0x01000000) - printf " 0x01000000 - being branch traced\n" + printf " 0x01000000 - has thread cwd\n" end if ($kgm_spi_flag & 0x02000000) printf " 0x02000000 - has vfork() children\n" @@ -3239,14 +4008,16 @@ end define print_vnode set $vp = (struct vnode *)$arg0 printf " " - printf " vp 0x%.8x", $vp + printf " vp " + showptr $vp printf " use %d", $vp->v_usecount printf " io %d", $vp->v_iocount printf " kuse %d", $vp->v_kusecount printf " type %d", $vp->v_type printf " flg 0x%.8x", $vp->v_flag printf " lflg 0x%.8x", $vp->v_lflag - printf " par 0x%.8x", $vp->v_parent + printf " par " + showptr $vp->v_parent set $_name = (char *)$vp->v_name if ($_name != 0) printf " %s", $_name @@ -3427,7 +4198,8 @@ end define print_mount set $mp = (struct mount *)$arg0 printf " " - printf " mp 0x%.8x", $mp + printf " mp " + showptr $mp printf " flag %x", $mp->mnt_flag printf " kern_flag %x", $mp->mnt_kern_flag printf " lflag %x", $mp->mnt_lflag @@ -3451,12 +4223,11 @@ Syntax: showallmounts end define pcprint - set $pc = $arg0 - if ((unsigned int)$pc <= (unsigned int) $kgm_fkmodmax) && \ - ((unsigned int)$pc >= (unsigned int)$kgm_fkmodmin) - showkmodaddr $pc + if (((unsigned long) $arg0 < (unsigned long) &_mh_execute_header || \ + (unsigned long) $arg0 >= (unsigned long) &last_kernel_symbol )) + showkmodaddr $arg0 else - output/a $pc + output /a $arg0 end end @@ -3465,7 +4236,7 @@ define mbuf_walkpkt set $cnt = 1 set $tot = 0 while $mp - printf "%4d: 0x%08x [len %4d, type %2d, ", $cnt, $mp, \ + printf "%4d: %p [len %4d, type %2d, ", $cnt, $mp, \ $mp->m_hdr.mh_len, $mp->m_hdr.mh_type if mclaudit != 0 mbuf_buf2mca $mp @@ -3488,7 +4259,7 @@ define mbuf_walk set $cnt = 1 set $tot = 0 while $mp - printf "%4d: 0x%08x [len %4d, type %2d, ", $cnt, $mp, \ + printf "%4d: %p [len %4d, type %2d, ", $cnt, $mp, \ $mp->m_hdr.mh_len, $mp->m_hdr.mh_type if mclaudit != 0 mbuf_buf2mca $mp @@ -3511,7 +4282,7 @@ define mbuf_buf2slab set $gix = ((char *)$addr - (char *)mbutl) >> 20 set $ix = ((char *)$addr - (char *)mbutl) >> 11 set $slab = &slabstbl[$gix].slg_slab[$ix] - printf "0x%08x", $slab + printf "%p", $slab end document mbuf_buf2slab @@ -3524,7 +4295,7 @@ define mbuf_buf2mca set $clbase = ((union mcluster *)(mbutl + $ix)) set $mclidx = (((char *)$addr - (char *)$clbase) >> 8) set $mca = mclaudit[$ix].cl_audit[$mclidx] - printf "mca: 0x%08x", $mca + printf "mca: %p", $mca end document mbuf_buf2mca @@ -3567,9 +4338,9 @@ define mbuf_showmca $mca->mca_thread set $cnt = 0 while $cnt < $mca->mca_depth - set $pc = $mca->mca_stack[$cnt] + set $kgm_pc = $mca->mca_stack[$cnt] printf "%4d: ", $cnt + 1 - pcprint $pc + pcprint $kgm_pc printf "\n" set $cnt = $cnt + 1 end @@ -3579,9 +4350,9 @@ define mbuf_showmca end set $cnt = 0 while $cnt < $mca->mca_pdepth - set $pc = $mca->mca_pstack[$cnt] + set $kgm_pc = $mca->mca_pstack[$cnt] printf "%4d: ", $cnt + 1 - pcprint $pc + pcprint $kgm_pc printf "\n" set $cnt = $cnt + 1 end @@ -3599,9 +4370,16 @@ set $MCF_NOCPUCACHE = 0x10 define mcache_stat set $head = (mcache_t *)mcache_head set $mc = $head - printf "cache cache cache buf buf backing (# of retries) bufs\n" - printf "name state addr size align zone wait nowait failed incache\n" - printf "------------------------- -------- ---------- ------ ----- ---------- -------------------------- --------\n" + + if $kgm_lp64 + printf "cache cache cache buf buf backing (# of retries) bufs\n" + printf "name state addr size align zone wait nowait failed incache\n" + printf "------------------------- -------- ------------------ ------ ----- ------------------ -------------------------- --------\n" + else + printf "cache cache cache buf buf backing (# of retries) bufs\n" + printf "name state addr size align zone wait nowait failed incache\n" + printf "------------------------- -------- ---------- ------ ----- ---------- -------------------------- --------\n" + end while $mc != 0 set $bktsize = $mc->mc_cpu.cc_bktsize printf "%-25s ", $mc->mc_name @@ -3618,12 +4396,16 @@ define mcache_stat end end end - printf " 0x%08x %6d %5d ",$mc, \ + printf " %p %6d %5d ",$mc, \ $mc->mc_bufsize, $mc->mc_align if $mc->mc_slab_zone != 0 - printf "0x%08x", $mc->mc_slab_zone + printf "%p", $mc->mc_slab_zone else - printf " custom" + if $kgm_lp64 + printf " custom" + else + printf " custom" + end end set $tot = 0 set $tot += $mc->mc_full.bl_total * $bktsize @@ -3669,7 +4451,7 @@ define mcache_walkobj set $cnt = 1 set $tot = 0 while $p - printf "%4d: 0x%08x\n", $cnt, $p, + printf "%4d: %p\n", $cnt, $p, set $p = $p->obj_next set $cnt = $cnt + 1 end @@ -3749,11 +4531,16 @@ define mbuf_slabs set $slg = (mcl_slabg_t *)$arg0 set $x = 0 - printf "slot addr next base C R N size flags\n" - printf "---- ---------- ---------- ---------- -- -- -- ------ -----\n" + if $kgm_lp64 + printf "slot addr next base C R N size flags\n" + printf "---- ------------------ ------------------ ------------------ -- -- -- ------ -----\n" + else + printf "slot addr next base C R N size flags\n" + printf "---- ---------- ---------- ---------- -- -- -- ------ -----\n" + end while $x < $NSLABSPMB set $sl = &$slg->slg_slab[$x] - printf "%3d: 0x%08x 0x%08x 0x%08x %2d %2d %2d %6d 0x%04x ", \ + printf "%3d: %p %p %p %2d %2d %2d %6d 0x%04x ", \ $x + 1, $sl, $sl->sl_next, $sl->sl_base, $sl->sl_class, \ $sl->sl_refcnt, $sl->sl_chunks, $sl->sl_len, \ $sl->sl_flags @@ -3963,7 +4750,11 @@ document mbuf_mca_ctype end define mbuf_showactive - mbuf_walkallslabs 1 0 + if $argc == 0 + mbuf_walkallslabs 1 0 + else + mbuf_walkallslabs 1 0 $arg0 + end end document mbuf_showactive @@ -4006,6 +4797,11 @@ end define mbuf_walkallslabs set $show_a = $arg0 set $show_f = $arg1 + if $argc == 3 + set $show_tr = $arg2 + else + set $show_tr = 0 + end set $x = 0 set $total = 0 set $total_a = 0 @@ -4023,10 +4819,16 @@ define mbuf_walkallslabs end printf "objects; this may take a while ...)\n\n" - printf " slab mca obj allocation\n" - printf "slot idx address address address type state\n" - printf "---- ---- ---------- ---------- ---------- ----- -----------\n" - + if $kgm_lp64 + printf " slab mca obj allocation\n" + printf "slot idx address address address type state\n" + printf "---- ---- ------------------ ------------------ ------------------ ----- -----------\n" + else + printf " slab mca obj allocation\n" + printf "slot idx address address address type state\n" + printf "---- ---- ---------- ---------- ---------- ----- -----------\n" + end + while $x < slabgrp set $slg = slabstbl[$x] set $y = 0 @@ -4052,12 +4854,16 @@ define mbuf_walkallslabs if $printmca != 0 if $first == 1 - printf "%4d %4d 0x%08x ", $x, $y, $sl + printf "%4d %4d %p ", $x, $y, $sl else - printf " " + if $kgm_lp64 + printf " " + else + printf " " + end end - printf "0x%08x 0x%08x ", $mca, $mca->mca_addr + printf "%p %p ", $mca, $mca->mca_addr mbuf_mca_ctype $mca 0 if $mca->mca_uflags & ($MB_INUSE|$MB_COMP_INUSE) printf "active " @@ -4069,7 +4875,21 @@ define mbuf_walkallslabs end printf "\n" set $total = $total + 1 + + if $show_tr != 0 + printf "recent transaction for this buffer (thread %p):\n", \ + $mca->mca_thread + set $cnt = 0 + while $cnt < $mca->mca_depth + set $kgm_pc = $mca->mca_stack[$cnt] + printf "%4d: ", $cnt + 1 + pcprint $kgm_pc + printf "\n" + set $cnt = $cnt + 1 + end + end end + set $mca = $mca->mca_next end set $y += 1 @@ -4092,17 +4912,254 @@ document mbuf_walkallslabs | parameter. This is a backend routine for mbuf_show{active,inactive,all}. end +set $RTF_UP = 0x1 +set $RTF_GATEWAY = 0x2 +set $RTF_HOST = 0x4 +set $RTF_REJECT = 0x8 +set $RTF_DYNAMIC = 0x10 +set $RTF_MODIFIED = 0x20 +set $RTF_DONE = 0x40 +set $RTF_DELCLONE = 0x80 +set $RTF_CLONING = 0x100 +set $RTF_XRESOLVE = 0x200 +set $RTF_LLINFO = 0x400 +set $RTF_STATIC = 0x800 +set $RTF_BLACKHOLE = 0x1000 +set $RTF_PROTO2 = 0x4000 +set $RTF_PROTO1 = 0x8000 +set $RTF_PRCLONING = 0x10000 +set $RTF_WASCLONED = 0x20000 +set $RTF_PROTO3 = 0x40000 +set $RTF_PINNED = 0x100000 +set $RTF_LOCAL = 0x200000 +set $RTF_BROADCAST = 0x400000 +set $RTF_MULTICAST = 0x800000 +set $RTF_IFSCOPE = 0x1000000 +set $RTF_CONDEMNED = 0x2000000 + +set $AF_INET = 2 +set $AF_INET6 = 30 +set $AF_LINK = 18 + +define rtentry_prdetails + set $rt = (struct rtentry *)$arg0 + set $is_v6 = 0 + + set $dst = (struct sockaddr *)$rt->rt_nodes->rn_u.rn_leaf.rn_Key + if $dst->sa_family == $AF_INET + showsockaddr_in $dst + printf " " + else + if $dst->sa_family == $AF_INET6 + showsockaddr_in6 $dst + printf " " + set $is_v6 = 1 + else + if $dst->sa_family == $AF_LINK + showsockaddr_dl $dst + printf " " + else + showsockaddr_unspec $dst + end + end + end + + set $dst = (struct sockaddr *)$rt->rt_gateway + if $dst->sa_family == $AF_INET + showsockaddr_in $dst + printf " " + else + if $dst->sa_family == $AF_INET6 + set $is_v6 = 1 + showsockaddr_in6 $dst + printf " " + else + if $dst->sa_family == $AF_LINK + showsockaddr_dl $dst + if $is_v6 + printf " " + else + printf " " + end + else + showsockaddr_unspec $dst + end + end + end + + if $rt->rt_flags & $RTF_WASCLONED + if $kgm_lp64 + printf "%18p ", $rt->rt_parent + else + printf "%10p ", $rt->rt_parent + end + else + if $kgm_lp64 + printf " " + else + printf " " + end + end + + printf "%6u %8u ", $rt->rt_refcnt, $rt->rt_rmx.rmx_pksent + + if $rt->rt_flags & $RTF_UP + printf "U" + end + if $rt->rt_flags & $RTF_GATEWAY + printf "G" + end + if $rt->rt_flags & $RTF_HOST + printf "H" + end + if $rt->rt_flags & $RTF_REJECT + printf "R" + end + if $rt->rt_flags & $RTF_DYNAMIC + printf "D" + end + if $rt->rt_flags & $RTF_MODIFIED + printf "M" + end + if $rt->rt_flags & $RTF_CLONING + printf "C" + end + if $rt->rt_flags & $RTF_PRCLONING + printf "c" + end + if $rt->rt_flags & $RTF_LLINFO + printf "L" + end + if $rt->rt_flags & $RTF_STATIC + printf "S" + end + if $rt->rt_flags & $RTF_PROTO1 + printf "1" + end + if $rt->rt_flags & $RTF_PROTO2 + printf "2" + end + if $rt->rt_flags & $RTF_PROTO3 + printf "3" + end + if $rt->rt_flags & $RTF_WASCLONED + printf "W" + end + if $rt->rt_flags & $RTF_BROADCAST + printf "b" + end + if $rt->rt_flags & $RTF_MULTICAST + printf "m" + end + if $rt->rt_flags & $RTF_XRESOLVE + printf "X" + end + if $rt->rt_flags & $RTF_BLACKHOLE + printf "B" + end + if $rt->rt_flags & $RTF_IFSCOPE + printf "I" + end + + printf "/%s%d", $rt->rt_ifp->if_name, $rt->rt_ifp->if_unit +end + +set $RNF_ROOT = 2 + +define _rttable_dump + set $rnh = $arg0 + set $rn = (struct radix_node *)$rnh->rnh_treetop + set $rnh_cnt = $rnh->rnh_cnt + + while $rn->rn_bit >= 0 + set $rn = $rn->rn_u.rn_node.rn_L + end + + while 1 + set $base = (struct radix_node *)$rn + while ($rn->rn_parent->rn_u.rn_node.rn_R == $rn) && ($rn->rn_flags & $RNF_ROOT) == 0 + set $rn = $rn->rn_parent + end + set $rn = $rn->rn_parent->rn_u.rn_node.rn_R + while $rn->rn_bit >= 0 + set $rn = $rn->rn_u.rn_node.rn_L + end + set $next = $rn + while $base != 0 + set $rn = $base + set $base = $rn->rn_u.rn_leaf.rn_Dupedkey + if ($rn->rn_flags & $RNF_ROOT) == 0 + + set $rt = (struct rtentry *)$rn + + if $kgm_lp64 + printf "%18p ", $rt + else + printf "%10p ", $rt + end + rtentry_prdetails $rt + printf "\n" + + end + end + set $rn = $next + if ($rn->rn_flags & $RNF_ROOT) != 0 + loop_break + end + end +end + + +define show_rt_inet + if $kgm_lp64 + printf " rtentry dst gw parent Refs Use flags/if\n" + printf " ----------------- --------------- ----------------- ------------------ ------ -------- -----------\n" + else + printf " rtentry dst gw parent Refs Use flags/if\n" + printf " --------- --------------- ----------------- ---------- ------ -------- -----------\n" + end + _rttable_dump rt_tables[2] +end + +document show_rt_inet +Syntax: (gdb) show_rt_inet +| Show the entries of the IPv4 routing table. +end + +define show_rt_inet6 + if $kgm_lp64 + printf " rtentry dst gw parent Refs Use flags/if\n" + printf " ----------------- --------------------------------------- --------------------------------------- ------------------ ------ -------- -----------\n" + else + printf " rtentry dst gw parent Refs Use flags/if\n" + printf " --------- --------------------------------------- --------------------------------------- ---------- ------ -------- -----------\n" + end + _rttable_dump rt_tables[30] +end + +document show_rt_inet6 +Syntax: (gdb) show_rt_inet6 +| Show the entries of the IPv6 routing table. +end + define rtentry_trash set $rtd = (struct rtentry_dbg *)rttrash_head.tqh_first set $cnt = 0 while $rtd != 0 if $cnt == 0 - printf " rtentry_dbg ref flags\n" - printf " ------------ --- ----------\n" + if $kgm_lp64 + printf " rtentry ref hold rele dst gw parent flags/if\n" + printf " ----------------- --- ------ ------ --------------- ----- ------------------ -----------\n" + else + printf " rtentry ref hold rele dst gw parent flags/if\n" + printf " --------- --- ------ ------ --------------- ----- ---------- -----------\n" + end end - printf "%4d: %p %3d 0x%08x\n", $cnt + 1, $rtd, \ + printf "%4d: %p %3d %6d %6d ", $cnt + 1, $rtd, \ $rtd->rtd_refhold_cnt - $rtd->rtd_refrele_cnt, \ - $rtd->rtd_entry.rt_flags + $rtd->rtd_refhold_cnt, $rtd->rtd_refrele_cnt + rtentry_prdetails $rtd + printf "\n" set $rtd = $rtd->rtd_trash_link.tqe_next set $cnt = $cnt + 1 end @@ -4115,57 +5172,55 @@ Syntax: (gdb) rtentry_trash | "rte_debug" boot-args parameter. end -set $RTD_TRSTACK_SIZE = 8 -set $RTD_REFHIST_SIZE = 4 +set $CTRACE_STACK_SIZE = ctrace_stack_size +set $CTRACE_HIST_SIZE = ctrace_hist_size define rtentry_showdbg set $rtd = (struct rtentry_dbg *)$arg0 set $cnt = 0 - printf "Total holds: %d\n", $rtd->rtd_refhold_cnt - printf "Next hold slot: %d\n", $rtd->rtd_refhold_next - printf "Total releases: %d\n", $rtd->rtd_refrele_cnt - printf "Next release slot: %d\n", $rtd->rtd_refrele_next + printf "Total holds:\t%d\n", $rtd->rtd_refhold_cnt + printf "Total releases:\t%d\n", $rtd->rtd_refrele_cnt set $ix = 0 - while $ix < $RTD_TRSTACK_SIZE - set $pc = $rtd->rtd_alloc_stk_pc[$ix] - if $pc != 0 + while $ix < $CTRACE_STACK_SIZE + set $kgm_pc = $rtd->rtd_alloc.pc[$ix] + if $kgm_pc != 0 if $ix == 0 printf "\nAlloc (thread %p):\n", \ - $rtd->rtd_alloc_thread + $rtd->rtd_alloc.th end printf "%4d: ", $ix + 1 - pcprint $pc + pcprint $kgm_pc printf "\n" end set $ix = $ix + 1 end set $ix = 0 - while $ix < $RTD_TRSTACK_SIZE - set $pc = $rtd->rtd_free_stk_pc[$ix] - if $pc != 0 + while $ix < $CTRACE_STACK_SIZE + set $kgm_pc = $rtd->rtd_free.pc[$ix] + if $kgm_pc != 0 if $ix == 0 printf "\nFree: (thread %p)\n", \ - $rtd->rtd_free_thread + $rtd->rtd_free.th end printf "%4d: ", $ix + 1 - pcprint $pc + pcprint $kgm_pc printf "\n" end set $ix = $ix + 1 end - while $cnt < $RTD_REFHIST_SIZE + while $cnt < $CTRACE_HIST_SIZE set $ix = 0 - while $ix < $RTD_TRSTACK_SIZE - set $pc = $rtd->rtd_refhold[$cnt].pc[$ix] - if $pc != 0 + while $ix < $CTRACE_STACK_SIZE + set $kgm_pc = $rtd->rtd_refhold[$cnt].pc[$ix] + if $kgm_pc != 0 if $ix == 0 printf "\nHold [%d] (thread %p):\n", \ $cnt, $rtd->rtd_refhold[$cnt].th end printf "%4d: ", $ix + 1 - pcprint $pc + pcprint $kgm_pc printf "\n" end set $ix = $ix + 1 @@ -4173,45 +5228,245 @@ define rtentry_showdbg set $cnt = $cnt + 1 end set $cnt = 0 - while $cnt < $RTD_REFHIST_SIZE + while $cnt < $CTRACE_HIST_SIZE set $ix = 0 - while $ix < $RTD_TRSTACK_SIZE - set $pc = $rtd->rtd_refrele[$cnt].pc[$ix] - if $pc != 0 + while $ix < $CTRACE_STACK_SIZE + set $kgm_pc = $rtd->rtd_refrele[$cnt].pc[$ix] + if $kgm_pc != 0 if $ix == 0 printf "\nRelease [%d] (thread %p):\n",\ $cnt, $rtd->rtd_refrele[$cnt].th end printf "%4d: ", $ix + 1 - pcprint $pc + pcprint $kgm_pc printf "\n" end set $ix = $ix + 1 end set $cnt = $cnt + 1 end -end - -document rtentry_showdbg -Syntax: (gdb) rtentry_showdbg -| Given a route entry structure address, print the debug information -| related to it. This requires route entry debugging to be turned -| on, by setting the appropriate flags to the "rte_debug" boot-args -| parameter. -end - -# -# print all OSMalloc stats - -define ostag_print -set $kgm_tagp = (OSMallocTag)$arg0 -printf "0x%08x: ", $kgm_tagp -printf "%8d ",$kgm_tagp->OSMT_refcnt -printf "%8x ",$kgm_tagp->OSMT_state -printf "%8x ",$kgm_tagp->OSMT_attr -printf "%s ",$kgm_tagp->OSMT_name -printf "\n" -end + + printf "\nTotal locks:\t%d\n", $rtd->rtd_lock_cnt + printf "Total unlocks:\t%d\n", $rtd->rtd_unlock_cnt + + set $cnt = 0 + while $cnt < $CTRACE_HIST_SIZE + set $ix = 0 + while $ix < $CTRACE_STACK_SIZE + set $kgm_pc = $rtd->rtd_lock[$cnt].pc[$ix] + if $kgm_pc != 0 + if $ix == 0 + printf "\nLock [%d] (thread %p):\n",\ + $cnt, $rtd->rtd_lock[$cnt].th + end + printf "%4d: ", $ix + 1 + pcprint $kgm_pc + printf "\n" + end + set $ix = $ix + 1 + end + set $cnt = $cnt + 1 + end + set $cnt = 0 + while $cnt < $CTRACE_HIST_SIZE + set $ix = 0 + while $ix < $CTRACE_STACK_SIZE + set $kgm_pc = $rtd->rtd_unlock[$cnt].pc[$ix] + if $kgm_pc != 0 + if $ix == 0 + printf "\nUnlock [%d] (thread %p):\n",\ + $cnt, $rtd->rtd_unlock[$cnt].th + end + printf "%4d: ", $ix + 1 + pcprint $kgm_pc + printf "\n" + end + set $ix = $ix + 1 + end + set $cnt = $cnt + 1 + end +end + +document rtentry_showdbg +Syntax: (gdb) rtentry_showdbg +| Given a route entry structure address, print the debug information +| related to it. This requires route entry debugging to be turned +| on, by setting the appropriate flags to the "rte_debug" boot-args +| parameter. +end + +define inifa_showdbg + set $inifa = (struct in_ifaddr_dbg *)$arg0 + set $cnt = 0 + + printf "Total holds:\t%d\n", $inifa->inifa_refhold_cnt + printf "Total releases:\t%d\n", $inifa->inifa_refrele_cnt + + set $ix = 0 + while $ix < $CTRACE_STACK_SIZE + set $kgm_pc = $inifa->inifa_alloc.pc[$ix] + if $kgm_pc != 0 + if $ix == 0 + printf "\nAlloc (thread %p):\n", \ + $inifa->inifa_alloc.th + end + printf "%4d: ", $ix + 1 + pcprint $kgm_pc + printf "\n" + end + set $ix = $ix + 1 + end + set $ix = 0 + while $ix < $CTRACE_STACK_SIZE + set $kgm_pc = $inifa->inifa_free.pc[$ix] + if $kgm_pc != 0 + if $ix == 0 + printf "\nFree: (thread %p)\n", \ + $inifa->inifa_free.th + end + printf "%4d: ", $ix + 1 + pcprint $kgm_pc + printf "\n" + end + set $ix = $ix + 1 + end + while $cnt < $CTRACE_HIST_SIZE + set $ix = 0 + while $ix < $CTRACE_STACK_SIZE + set $kgm_pc = $inifa->inifa_refhold[$cnt].pc[$ix] + if $kgm_pc != 0 + if $ix == 0 + printf "\nHold [%d] (thread %p):\n", \ + $cnt, $inifa->inifa_refhold[$cnt].th + end + printf "%4d: ", $ix + 1 + pcprint $kgm_pc + printf "\n" + end + set $ix = $ix + 1 + end + set $cnt = $cnt + 1 + end + set $cnt = 0 + while $cnt < $CTRACE_HIST_SIZE + set $ix = 0 + while $ix < $CTRACE_STACK_SIZE + set $kgm_pc = $inifa->inifa_refrele[$cnt].pc[$ix] + if $kgm_pc != 0 + if $ix == 0 + printf "\nRelease [%d] (thread %p):\n",\ + $cnt, $inifa->inifa_refrele[$cnt].th + end + printf "%4d: ", $ix + 1 + pcprint $kgm_pc + printf "\n" + end + set $ix = $ix + 1 + end + set $cnt = $cnt + 1 + end +end + +document inifa_showdbg +Syntax: (gdb) inifa_showdbg +| Given an IPv4 interface structure address, print the debug information +| related to it. This requires interface address debugging to be turned +| on, by setting the appropriate flags to the "ifa_debug" boot-args +| parameter. +end + +define in6ifa_showdbg + set $in6ifa = (struct in6_ifaddr_dbg *)$arg0 + set $cnt = 0 + + printf "Total holds:\t%d\n", $in6ifa->in6ifa_refhold_cnt + printf "Total releases:\t%d\n", $in6ifa->in6ifa_refrele_cnt + + set $ix = 0 + while $ix < $CTRACE_STACK_SIZE + set $kgm_pc = $in6ifa->in6ifa_alloc.pc[$ix] + if $kgm_pc != 0 + if $ix == 0 + printf "\nAlloc (thread %p):\n", \ + $in6ifa->in6ifa_alloc.th + end + printf "%4d: ", $ix + 1 + pcprint $kgm_pc + printf "\n" + end + set $ix = $ix + 1 + end + set $ix = 0 + while $ix < $CTRACE_STACK_SIZE + set $kgm_pc = $in6ifa->in6ifa_free.pc[$ix] + if $kgm_pc != 0 + if $ix == 0 + printf "\nFree: (thread %p)\n", \ + $in6ifa->in6ifa_free.th + end + printf "%4d: ", $ix + 1 + pcprint $kgm_pc + printf "\n" + end + set $ix = $ix + 1 + end + while $cnt < $CTRACE_HIST_SIZE + set $ix = 0 + while $ix < $CTRACE_STACK_SIZE + set $kgm_pc = $in6ifa->in6ifa_refhold[$cnt].pc[$ix] + if $kgm_pc != 0 + if $ix == 0 + printf "\nHold [%d] (thread %p):\n", \ + $cnt, $in6ifa->in6ifa_refhold[$cnt].th + end + printf "%4d: ", $ix + 1 + pcprint $kgm_pc + printf "\n" + end + set $ix = $ix + 1 + end + set $cnt = $cnt + 1 + end + set $cnt = 0 + while $cnt < $CTRACE_HIST_SIZE + set $ix = 0 + while $ix < $CTRACE_STACK_SIZE + set $kgm_pc = $in6ifa->in6ifa_refrele[$cnt].pc[$ix] + if $kgm_pc != 0 + if $ix == 0 + printf "\nRelease [%d] (thread %p):\n",\ + $cnt, $in6ifa->in6ifa_refrele[$cnt].th + end + printf "%4d: ", $ix + 1 + pcprint $kgm_pc + printf "\n" + end + set $ix = $ix + 1 + end + set $cnt = $cnt + 1 + end +end + +document in6ifa_showdbg +Syntax: (gdb) in6ifa_showdbg +| Given an IPv6 interface structure address, print the debug information +| related to it. This requires interface address debugging to be turned +| on, by setting the appropriate flags to the "ifa_debug" boot-args +| parameter. +end + +# +# print all OSMalloc stats + +define ostag_print +set $kgm_tagp = (OSMallocTag)$arg0 +printf "0x%08x: ", $kgm_tagp +printf "%8d ",$kgm_tagp->OSMT_refcnt +printf "%8x ",$kgm_tagp->OSMT_state +printf "%8x ",$kgm_tagp->OSMT_attr +printf "%s ",$kgm_tagp->OSMT_name +printf "\n" +end define showosmalloc @@ -4270,191 +5525,111 @@ document systemlog | Display the kernel's printf ring buffer end -define printvnodepathint_recur - if $arg0 != 0 - if ($arg0->v_flag & 0x000001) && ($arg0->v_mount != 0) - if $arg0->v_mount->mnt_vnodecovered != 0 - printvnodepathint_recur $arg0->v_mount->mnt_vnodecovered $arg0->v_mount->mnt_vnodecovered->v_name + +define hexdump + set $kgm_addr = (unsigned char *)$arg0 + set $kgm_len = $arg1 + while $kgm_len > 0 + showptr $kgm_addr + printf ": " + set $kgm_i = 0 + while $kgm_i < 16 + printf "%02x ", *($kgm_addr+$kgm_i) + set $kgm_i += 1 + end + printf " |" + set $kgm_i = 0 + while $kgm_i < 16 + set $kgm_temp = *($kgm_addr+$kgm_i) + if $kgm_temp < 32 || $kgm_temp >= 127 + printf "." + else + printf "%c", $kgm_temp end - else - printvnodepathint_recur $arg0->v_parent $arg0->v_parent->v_name - printf "/%s", $arg1 + set $kgm_i += 1 end + printf "|\n" + set $kgm_addr += 16 + set $kgm_len -= 16 end end - -# -# Show the locks held on a vnode by range, type, and holder. -# -define showvnodelocks - if ($argc == 1) - _showvnodelockheader - _showvnodelocks $arg0 - else - printf "| Usage:\n|\n" - help showvnodelocks - end -end -document showvnodelocks -| Given a vnodet pointer, display the list of advisory record locks for the -| referenced pvnode. +document hexdump +| Show the contents of memory as a hex/ASCII dump | The following is the syntax: -| (gdb) showvnodelocks -end - -define _showvnodelockheader - printf "* type W held by lock type start end\n" - printf "- ----- - ------------- --------- ------------------ ------------------\n" +| (gdb) hexdump
end -# -# Macro to display a single lock; used to display both held locks and -# blocked locks -# -define _showvnodelock - set $kgm_svl_lock = ((struct lockf *)$arg0) - - # decode flags - set $kgm_svl_flags = $kgm_svl_lock->lf_flags - set $kgm_svl_type = $kgm_svl_lock->lf_type - if ($kgm_svl_flags & 0x20) - printf "flock" - end - if ($kgm_svl_flags & 0x40) - printf "posix" - end - if ($kgm_svl_flags & 0x80) - printf "prov " - end - if ($kgm_svl_flags & 0x10) - printf " W " - else - printf " . " - end - # POSIX file vs. advisory range locks - if ($kgm_svl_flags & 0x40) - set $kgm_svl_proc = (proc_t)$kgm_svl_lock->lf_id - printf "PID %8d ", $kgm_svl_proc->p_pid - else - printf "ID 0x%08x ", $kgm_svl_lock->lf_id +define printcolonhex + if ($argc == 2) + set $addr = $arg0 + set $count = $arg1 + set $li = 0 + while ($li < $count) + if ($li == 0) + printf "%02x", (u_char)$addr[$li] + end + if ($li != 0) + printf ":%02x", (u_char)$addr[$li] + end + set $li = $li + 1 + end end +end - # lock type - if ($kgm_svl_type == 1) - printf "shared " +define showsockaddr_dl + set $sdl = (struct sockaddr_dl *)$arg0 + if ($sdl == 0) + printf "(null) " else - if ($kgm_svl_type == 3) - printf "exclusive " - else - if ($kgm_svl_type == 2) - printf "unlock " + if $sdl->sdl_nlen == 0 && $sdl->sdl_alen == 0 && $sdl->sdl_slen == 0 + printf "link#%3d ", $sdl->sdl_index else - printf "unknown " - end - end + set $addr = $sdl->sdl_data + $sdl->sdl_nlen + set $count = $sdl->sdl_alen + printcolonhex $addr $count + end end +end - # start and stop - printf "0x%016x..", $kgm_svl_lock->lf_start - printf "0x%016x ", $kgm_svl_lock->lf_end - printf "\n" +define showsockaddr_unspec + set $sockaddr = (struct sockaddr *)$arg0 + set $addr = $sockaddr->sa_data + set $count = $sockaddr->sa_len - 2 + printcolonhex $addr $count end -# Body of showvnodelocks, not including header -define _showvnodelocks - set $kgm_svl_vnode = ((vnode_t)$arg0) - set $kgm_svl_lockiter = $kgm_svl_vnode->v_lockf - while ($kgm_svl_lockiter != 0) - # locks that are held - printf "H " - _showvnodelock $kgm_svl_lockiter - - # and any locks blocked by them - set $kgm_svl_blocker = $kgm_svl_lockiter->lf_blkhd.tqh_first - while ($kgm_svl_blocker != 0) - printf "> " - _showvnodelock $kgm_svl_blocker - set $kgm_svl_blocker = $kgm_svl_blocker->lf_block.tqe_next - end - - # and on to the next one... - set $kgm_svl_lockiter = $kgm_svl_lockiter->lf_next - end -end - -define showvnodepath - set $vp = (struct vnode *)$arg0 - if $vp != 0 - if ($vp->v_flag & 0x000001) && ($vp->v_mount != 0) && ($vp->v_mount->mnt_flag & 0x00004000) - printf "/" - else - printvnodepathint_recur $vp $vp->v_name - end - end - printf "\n" -end - -document showvnodepath -Syntax: (gdb) showvnodepath -| Prints the path for a vnode -end - -define printcolonhex - if ($argc == 2) - set $addr = $arg0 - set $count = $arg1 - set $li = 0 - while ($li < $count) - if ($li == 0) - printf "%02x", (u_char)$addr[$li] - end - if ($li != 0) - printf ":%02x", (u_char)$addr[$li] - end - set $li = $li + 1 - end - end -end - -define showsockaddr_dl - set $sdl = (struct sockaddr_dl *)$arg0 - printf "LINK " - if ($sdl == 0) - printf "(null)" - else - set $addr = $sdl->sdl_data + $sdl->sdl_nlen - set $count = $sdl->sdl_alen - printcolonhex $addr $count - end -end - -define showsockaddr_unspec - set $sockaddr = (struct sockaddr *)$arg0 - set $addr = $sockaddr->sa_data - set $count = $sockaddr->sa_len - 2 - printf "UNSP " - printcolonhex $addr $count -end - -define showsockaddr_at - set $sockaddr = (struct sockaddr *)$arg0 - set $addr = $sockaddr->sa_data - set $count = $sockaddr->sa_len - 2 - printf "ATLK " - printcolonhex $addr $count -end +define showsockaddr_at + set $sockaddr = (struct sockaddr *)$arg0 + set $addr = $sockaddr->sa_data + set $count = $sockaddr->sa_len - 2 + printcolonhex $addr $count +end define showsockaddr_in set $sin = (struct sockaddr_in *)$arg0 set $sa_bytes = (unsigned char *)&($sin->sin_addr) - printf "IPV4 %d.%d.%d.%d", $sa_bytes[0], $sa_bytes[1], $sa_bytes[2], $sa_bytes[3] + printf "%3u.%03u.%03u.%03u", $sa_bytes[0], $sa_bytes[1], $sa_bytes[2], $sa_bytes[3] end define showsockaddr_in6 set $sin6 = (struct sockaddr_in6 *)$arg0 set $sa_bytes = $sin6->sin6_addr.__u6_addr.__u6_addr8 - printf "IPV6 %02x%02x:%02x%02x:%02x%02x:%02x%02x:%02x%02x:%02x%02x:%02x%02x:%02x%02x", $sa_bytes[0], $sa_bytes[1], $sa_bytes[2], $sa_bytes[3], $sa_bytes[4], $sa_bytes[5], $sa_bytes[6], $sa_bytes[7], $sa_bytes[8], $sa_bytes[9], $sa_bytes[10], $sa_bytes[11], $sa_bytes[12], $sa_bytes[13], $sa_bytes[14], $sa_bytes[15] + printf "%2x%02x:%02x%02x:%02x%02x:%02x%02x:%02x%02x:%02x%02x:%02x%02x:%02x%02x", \ + $sa_bytes[0], $sa_bytes[1], $sa_bytes[2], $sa_bytes[3], $sa_bytes[4], $sa_bytes[5], $sa_bytes[6], $sa_bytes[7], $sa_bytes[8], $sa_bytes[9], $sa_bytes[10], $sa_bytes[11], $sa_bytes[12], $sa_bytes[13], $sa_bytes[14], $sa_bytes[15] +end + +define showsockaddr_un + set $sun = (struct sockaddr_un *)$arg0 + if $sun == 0 + printf "(null)" + else + if $sun->sun_path[0] == 0 + printf "\"\"" + else + printf "%s", $sun->sun_path + end + end end define showifmultiaddrs @@ -4504,27 +5679,37 @@ define showsockaddr printf "(null)" else if ($mysock->sa_family == 0) + printf "UNSPC" showsockaddr_unspec $mysock set $showsockaddr_handled = 1 end + if ($mysock->sa_family == 1) + printf "UNIX " + showsockaddr_un $mysock + set $showsockaddr_handled = 1 + end if ($mysock->sa_family == 2) + printf "INET " showsockaddr_in $mysock set $showsockaddr_handled = 1 end if ($mysock->sa_family == 30) + printf "INET6 " showsockaddr_in6 $mysock set $showsockaddr_handled = 1 end if ($mysock->sa_family == 18) + printf "LINK " showsockaddr_dl $mysock set $showsockaddr_handled = 1 end if ($mysock->sa_family == 16) + printf "ATLK " showsockaddr_at $mysock set $showsockaddr_handled = 1 end if ($showsockaddr_handled == 0) - printf "%d ", $mysock->sa_family + printf "FAM %d ", $mysock->sa_family set $addr = $mysock->sa_data set $count = $mysock->sa_len printcolonhex $addr $count @@ -4683,13 +5868,16 @@ define ifconfig end set $ifp = (struct ifnet *)(ifnet->tqh_first) while ($ifp != 0) - printf "%s%d: flags=%x", $ifp->if_name, $ifp->if_unit, (u_short)$ifp->if_flags + printf "%s%d: flags=%hx", $ifp->if_name, $ifp->if_unit, (u_short)$ifp->if_flags showifflags $ifp->if_flags + printf " index %d", $ifp->if_index printf " mtu %d\n", $ifp->if_data.ifi_mtu - printf "\t(struct ifnet *)0x%x\n", $ifp + printf "\t(struct ifnet *)" + showptr $ifp + printf "\n" if ($ifconfig_all == 1) showifaddrs $ifp - end + end set $ifp = $ifp->if_link->tqe_next end end @@ -4698,1052 +5886,4061 @@ Syntax: (gdb) ifconfig | display ifconfig-like output, and print the (struct ifnet *) pointers for further inspection end -define showbpfdtab - set $myi = 0 - while ($myi < bpf_dtab_size) - if (bpf_dtab[$myi] != 0) - printf "Address 0x%x, bd_next 0x%x\n", bpf_dtab[$myi], bpf_dtab[$myi]->bd_next - print *bpf_dtab[$myi] +define _show_unix_domain_socket + set $so = (struct socket *)$arg0 + set $pcb = (struct unpcb *)$so->so_pcb + if $pcb == 0 + printf "unpcb: (null) " + else + printf "unpcb: %p ", $pcb + printf "unp_vnode: %p ", $pcb->unp_vnode + printf "unp_conn: %p ", $pcb->unp_conn + printf "unp_addr: " + showsockaddr_un $pcb->unp_addr end - set $myi = $myi + 1 - end end -define showallvols - printf "volume mnt_data mnt_devvp typename mountpoint\n" - set $kgm_vol = (mount_t) mountlist.tqh_first - while $kgm_vol - printf "0x%08x ", $kgm_vol - printf "0x%08x ", $kgm_vol->mnt_data - printf "0x%08x ", $kgm_vol->mnt_devvp - if ($kgm_vol->mnt_vtable->vfc_name[0] == 'h') && \ - ($kgm_vol->mnt_vtable->vfc_name[1] == 'f') && \ - ($kgm_vol->mnt_vtable->vfc_name[2] == 's') && \ - ($kgm_vol->mnt_vtable->vfc_name[3] == '\0') - set $kgm_hfsmount = \ - (struct hfsmount *) $kgm_vol->mnt_data - if $kgm_hfsmount->hfs_freezing_proc != 0 - printf "FROZEN hfs " - else - printf "hfs " - end - else - printf "%-10s ", $kgm_vol->mnt_vtable->vfc_name - end - printf "%s\n", $kgm_vol->mnt_vfsstat.f_mntonname - - set $kgm_vol = (mount_t) $kgm_vol->mnt_list.tqe_next +define _show_in_port + set $str = (unsigned char *)$arg0 + set $port = *(unsigned short *)$arg0 + + if (((($port & 0xff00) >> 8) == $str[0])) && ((($port & 0x00ff) == $str[1])) + #printf "big endian " + printf ":%d ", $port + else + #printf "little endian " + printf ":%d ", (($port & 0xff00) >> 8) | (($port & 0x00ff) << 8) end end -document showallvols -Syntax: (gdb) showallvols -| Display a summary of mounted volumes +define _show_in_addr_4in6 + set $ia = (unsigned char *)$arg0 + if $ia + printf "%3u.%03u.%03u.%03u", $ia[0], $ia[1], $ia[2], $ia[3] + end end -define showvnodeheader - printf "vnode usecount iocount v_data vtype parent name\n" +define _show_in6_addr + set $ia = (unsigned char *)$arg0 + if $ia + printf "%2x%02x:%02x%02x:%02x%02x:%02x%02x:%02x%02x:%02x%02x:%02x%02x:%02x%02x", \ + $ia[0], $ia[1], $ia[2], $ia[3], $ia[4], $ia[5], $ia[6], $ia[7], \ + $ia[8], $ia[9], $ia[10], $ia[11], $ia[12], $ia[13], $ia[14], $ia[15] + end end -define showvnodeint - set $kgm_vnode = (vnode_t) $arg0 - printf "0x%08x ", $kgm_vnode - printf "%8d ", $kgm_vnode->v_usecount - printf "%7d ", $kgm_vnode->v_iocount -# print information about clean/dirty blocks? - printf "0x%08x ", $kgm_vnode->v_data - - # print the vtype, using the enum tag - set $kgm_vtype = $kgm_vnode->v_type - if $kgm_vtype == VNON - printf "VNON " - end - if $kgm_vtype == VREG - printf "VREG " - end - if $kgm_vtype == VDIR - printf "VDIR " - end - if $kgm_vtype == VBLK - printf "VBLK " - end - if $kgm_vtype == VCHR - printf "VCHR " +define _showtcpstate + set $tp = (struct tcpcb *)$arg0 + if $tp + if $tp->t_state == 0 + printf "CLOSED " + end + if $tp->t_state == 1 + printf "LISTEN " + end + if $tp->t_state == 2 + printf "SYN_SENT " + end + if $tp->t_state == 3 + printf "SYN_RCVD " + end + if $tp->t_state == 4 + printf "ESTABLISHED " + end + if $tp->t_state == 5 + printf "CLOSE_WAIT " + end + if $tp->t_state == 6 + printf "FIN_WAIT_1 " + end + if $tp->t_state == 7 + printf "CLOSING " + end + if $tp->t_state == 8 + printf "LAST_ACK " + end + if $tp->t_state == 9 + printf "FIN_WAIT_2 " + end + if $tp->t_state == 10 + printf "TIME_WAIT " + end end - if $kgm_vtype == VLNK - printf "VLNK " +end + +define _showsockprotocol + set $so = (struct socket *)$arg0 + set $inpcb = (struct inpcb *)$so->so_pcb + + if $so->so_proto->pr_protocol == 6 + printf "TCP " + _showtcpstate $inpcb->inp_ppcb end - if $kgm_vtype == VSOCK - printf "VSOCK " + if $so->so_proto->pr_protocol == 17 + printf "UDP " end - if $kgm_vtype == VFIFO - printf "VFIFO " + if $so->so_proto->pr_protocol == 1 + printf "ICMP " end - if $kgm_vtype == VBAD - printf "VBAD " + if $so->so_proto->pr_protocol == 254 + printf "DIVERT " end - if ($kgm_vtype < VNON) || ($kgm_vtype > VBAD) - printf "%5d ", $kgm_vtype + if $so->so_proto->pr_protocol == 255 + printf "RAW " end +end - printf "0x%08x ", $kgm_vnode->v_parent - if $kgm_vnode->v_name != 0 - printf "%s\n", $kgm_vnode->v_name +define _show_ipv4_socket + set $so = (struct socket *)$arg0 + set $inpcb = (struct inpcb *)$so->so_pcb + if $inpcb == 0 + printf "inpcb: (null) " else - printf "\n" + printf "inpcb: %p ", $inpcb + + _showsockprotocol $so + + _show_in_addr_4in6 &$inpcb->inp_dependladdr.inp46_local + _show_in_port &$inpcb->inp_lport + printf "-> " + _show_in_addr_4in6 &$inpcb->inp_dependfaddr.inp46_foreign + _show_in_port &$inpcb->inp_fport end end -define showvnode - showvnodeheader - showvnodeint $arg0 -end +define _show_ipv6_socket + set $so = (struct socket *)$arg0 + set $pcb = (struct inpcb *)$so->so_pcb + if $pcb == 0 + printf "inpcb: (null) " + else + printf "inpcb: %p ", $pcb -document showvnode -Syntax: (gdb) showvnode -| Display info about one vnode -end + _showsockprotocol $so -define showvolvnodes - showvnodeheader - set $kgm_vol = (mount_t) $arg0 - set $kgm_vnode = (vnode_t) $kgm_vol.mnt_vnodelist.tqh_first - while $kgm_vnode - showvnodeint $kgm_vnode - set $kgm_vnode = (vnode_t) $kgm_vnode->v_mntvnodes.tqe_next + _show_in6_addr &$pcb->inp_dependladdr.inp6_local + _show_in_port &$pcb->inp_lport + printf "-> " + _show_in6_addr &$pcb->inp_dependfaddr.inp6_foreign + _show_in_port &$pcb->inp_fport end end -document showvolvnodes -Syntax: (gdb) showvolvnodes -| Display info about all vnodes of a given mount_t -end - -define showvolbusyvnodes - showvnodeheader - set $kgm_vol = (mount_t) $arg0 - set $kgm_vnode = (vnode_t) $kgm_vol.mnt_vnodelist.tqh_first - while $kgm_vnode - if $kgm_vnode->v_iocount != 0 - showvnodeint $kgm_vnode - end - set $kgm_vnode = (vnode_t) $kgm_vnode->v_mntvnodes.tqe_next - end -end -document showvolbusyvnodes -Syntax: (gdb) showvolbusyvnodes -| Display info about busy (iocount!=0) vnodes of a given mount_t -end +define showsocket + set $so = (struct socket *)$arg0 + if $so == 0 + printf "so: (null) " + else + printf "so: %p ", $so + if $so && $so->so_proto && $so->so_proto->pr_domain + set $domain = (struct domain *) $so->so_proto->pr_domain -define showallbusyvnodes - showvnodeheader - set $kgm_vol = (mount_t) mountlist.tqh_first - while $kgm_vol - set $kgm_vnode = (vnode_t) $kgm_vol.mnt_vnodelist.tqh_first - while $kgm_vnode - if $kgm_vnode->v_iocount != 0 - showvnodeint $kgm_vnode + printf "%s ", $domain->dom_name + if $domain->dom_family == 1 + _show_unix_domain_socket $so + end + if $domain->dom_family == 2 + _show_ipv4_socket $so + end + if $domain->dom_family == 30 + _show_ipv6_socket $so end - set $kgm_vnode = (vnode_t) $kgm_vnode->v_mntvnodes.tqe_next end - set $kgm_vol = (mount_t) $kgm_vol->mnt_list.tqe_next end + printf "\n" end - -document showallbusyvnodes -Syntax: (gdb) showallbusyvnodes -| Display info about all busy (iocount!=0) vnodes +document showsocket +Syntax: (gdb) showsocket +| Routine to print out a socket end -define showallvnodes - showvnodeheader - set $kgm_vol = (mount_t) mountlist.tqh_first - while $kgm_vol - set $kgm_vnode = (vnode_t) $kgm_vol.mnt_vnodelist.tqh_first - while $kgm_vnode - showvnodeint $kgm_vnode - set $kgm_vnode = (vnode_t) $kgm_vnode->v_mntvnodes.tqe_next +define showprocsockets + set $pp = (struct proc *)$arg0 + set $fdp = (struct filedesc *)$pp->p_fd + + set $count = 0 + set $fpp = (struct fileproc **)($fdp->fd_ofiles) + set $fpo = (char)($fdp->fd_ofileflags[0]) + while $count < $fdp->fd_nfiles + if *$fpp + set $fg =(struct fileglob *)((**$fpp)->f_fglob) + if $fg && (($fg)->fg_type == 2) + if $fdp->fd_ofileflags[$count] & 4 + printf "U: " + else + printf " " + end + printf "fd = %d ", $count + if $fg->fg_data + showsocket $fg->fg_data + else + printf "\n" + end + end end - set $kgm_vol = (mount_t) $kgm_vol->mnt_list.tqe_next + set $fpp = $fpp + 1 + set $count = $count + 1 end end - -document showallvnodes -Syntax: (gdb) showallvnodes -| Display info about all vnodes +document showprocsockets +Syntax: (gdb) showprocsockets +| Routine to print out all the open fds +| which are sockets in a process end -define _showvnodelockheader - printf "* type W held by lock type start end\n" - printf "- ----- - ------------- --------- ------------------ ------------------\n" +define showallprocsockets + set $basep = (struct proc *)allproc->lh_first + set $pp = $basep + while $pp + printf "============================================ \n" + showproc $pp + showprocsockets $pp + set $pp = $pp->p_list.le_next + end end +document showallprocsockets +Syntax: (gdb) showallprocsockets +| Routine to print out all the open fds +| which are sockets +end + +define _print_ntohs + set $port = (unsigned short)$arg0 + set $port = (unsigned short)((($arg0 & 0xff00) >> 8) & 0xff) + set $port |= (unsigned short)(($arg0 & 0xff) << 8) + printf "%5d", $port +end + +set $INPCB_STATE_INUSE=0x1 +set $INPCB_STATE_CACHED=0x2 +set $INPCB_STATE_DEAD=0x3 + +set $INP_RECVOPTS=0x01 +set $INP_RECVRETOPTS=0x02 +set $INP_RECVDSTADDR=0x04 +set $INP_HDRINCL=0x08 +set $INP_HIGHPORT=0x10 +set $INP_LOWPORT=0x20 +set $INP_ANONPORT=0x40 +set $INP_RECVIF=0x80 +set $INP_MTUDISC=0x100 +set $INP_STRIPHDR=0x200 +set $INP_FAITH=0x400 +set $INP_INADDR_ANY=0x800 +set $INP_RECVTTL=0x1000 +set $INP_UDP_NOCKSUM=0x2000 +set $IN6P_IPV6_V6ONLY=0x008000 +set $IN6P_PKTINFO=0x010000 +set $IN6P_HOPLIMIT=0x020000 +set $IN6P_HOPOPTS=0x040000 +set $IN6P_DSTOPTS=0x080000 +set $IN6P_RTHDR=0x100000 +set $IN6P_RTHDRDSTOPTS=0x200000 +set $IN6P_AUTOFLOWLABEL=0x800000 +set $IN6P_BINDV6ONLY=0x10000000 + +set $INP_IPV4=0x1 +set $INP_IPV6=0x2 + +set $IPPROTO_TCP=6 +set $IPPROTO_UDP=17 + +define _dump_inpcb + set $pcb = (struct inpcb *)$arg0 + if $kgm_lp64 + printf "%18p", $pcb + else + printf "%10p ", $pcb + end + if $arg1 == $IPPROTO_TCP + printf "tcp" + else + if $arg1 == $IPPROTO_UDP + printf "udp" + else + printf "%2d.", $arg1 + end + end + if ($pcb->inp_vflag & $INP_IPV4) + printf "4 " + end + if ($pcb->inp_vflag & $INP_IPV6) + printf "6 " + end -define _showvnodelock - set $kgm_svl_lock = ((struct lockf *)$arg0) - - # decode flags - set $kgm_svl_flags = $kgm_svl_lock->lf_flags - set $kgm_svl_type = $kgm_svl_lock->lf_type - if ($kgm_svl_flags & 0x20) - printf "flock" - end - if ($kgm_svl_flags & 0x40) - printf "posix" - end - if ($kgm_svl_flags & 0x80) - printf "prov " - end - if ($kgm_svl_flags & 0x10) - printf " W " - else - printf " . " - end - - # POSIX file vs. advisory range locks - if ($kgm_svl_flags & 0x40) - set $kgm_svl_proc = (proc_t)$kgm_svl_lock->lf_id - printf "PID %8d ", $kgm_svl_proc->p_pid - else - printf "ID 0x%08x ", $kgm_svl_lock->lf_id - end - - # lock type - if ($kgm_svl_type == 1) - printf "shared " - else - if ($kgm_svl_type == 3) - printf "exclusive " + if ($pcb->inp_vflag & $INP_IPV4) + printf " " + _show_in_addr &$pcb->inp_dependladdr.inp46_local.ia46_addr4 else - if ($kgm_svl_type == 2) - printf "unlock " - else - printf "unknown " - end + _show_in6_addr &$pcb->inp_dependladdr.inp6_local end - end + printf " " + _print_ntohs $pcb->inp_lport + printf " " + if ($pcb->inp_vflag & $INP_IPV4) + printf " " + _show_in_addr &($pcb->inp_dependfaddr.inp46_foreign.ia46_addr4) + else + _show_in6_addr &($pcb->inp_dependfaddr.inp6_foreign) + end + printf " " + _print_ntohs $pcb->inp_fport + printf " " - # start and stop - printf "0x%016x..", $kgm_svl_lock->lf_start - printf "0x%016x ", $kgm_svl_lock->lf_end - printf "\n" -end -# Body of showvnodelocks, not including header -define _showvnodelocks - set $kgm_svl_vnode = ((vnode_t)$arg0) - set $kgm_svl_lockiter = $kgm_svl_vnode->v_lockf - while ($kgm_svl_lockiter != 0) - # locks that are held - printf "H " - _showvnodelock $kgm_svl_lockiter + if $arg1 == $IPPROTO_TCP + _showtcpstate $pcb->inp_ppcb + end - # and any locks blocked by them - set $kgm_svl_blocker = $kgm_svl_lockiter->lf_blkhd.tqh_first - while ($kgm_svl_blocker != 0) - printf "> " - _showvnodelock $kgm_svl_blocker - set $kgm_svl_blocker = $kgm_svl_blocker->lf_block.tqe_next +# printf "phd " +# set $phd = $pcb->inp_phd +# while $phd != 0 +# printf " " +# _print_ntohs $phd->phd_port +# set $phd = $phd->phd_hash.le_next +# end +# printf ", " + if ($pcb->inp_flags & $INP_RECVOPTS) + printf "recvopts " + end + if ($pcb->inp_flags & $INP_RECVRETOPTS) + printf "recvretopts " + end + if ($pcb->inp_flags & $INP_RECVDSTADDR) + printf "recvdstaddr " + end + if ($pcb->inp_flags & $INP_HDRINCL) + printf "hdrincl " + end + if ($pcb->inp_flags & $INP_HIGHPORT) + printf "highport " + end + if ($pcb->inp_flags & $INP_LOWPORT) + printf "lowport " + end + if ($pcb->inp_flags & $INP_ANONPORT) + printf "anonport " + end + if ($pcb->inp_flags & $INP_RECVIF) + printf "recvif " + end + if ($pcb->inp_flags & $INP_MTUDISC) + printf "mtudisc " + end + if ($pcb->inp_flags & $INP_STRIPHDR) + printf "striphdr " + end + if ($pcb->inp_flags & $INP_FAITH) + printf "faith " + end + if ($pcb->inp_flags & $INP_INADDR_ANY) + printf "inaddr_any " + end + if ($pcb->inp_flags & $INP_RECVTTL) + printf "recvttl " + end + if ($pcb->inp_flags & $INP_UDP_NOCKSUM) + printf "nocksum " end + if ($pcb->inp_flags & $IN6P_IPV6_V6ONLY) + printf "v6only " + end + if ($pcb->inp_flags & $IN6P_PKTINFO) + printf "pktinfo " + end + if ($pcb->inp_flags & $IN6P_HOPLIMIT) + printf "hoplimit " + end + if ($pcb->inp_flags & $IN6P_HOPOPTS) + printf "hopopts " + end + if ($pcb->inp_flags & $IN6P_DSTOPTS) + printf "dstopts " + end + if ($pcb->inp_flags & $IN6P_RTHDR) + printf "rthdr " + end + if ($pcb->inp_flags & $IN6P_RTHDRDSTOPTS) + printf "rthdrdstopts " + end + if ($pcb->inp_flags & $IN6P_AUTOFLOWLABEL) + printf "autoflowlabel " + end + if ($pcb->inp_flags & $IN6P_BINDV6ONLY) + printf "bindv6only " + end + set $so = (struct socket *)$pcb->inp_socket + if $so != 0 + printf "[so=%p s=%ld r=%ld usecnt=%ld] ", $so, $so->so_snd.sb_cc, \ + $so->so_rcv.sb_cc, $so->so_usecount + end + if ($pcb->inp_state == 0 || $pcb->inp_state == $INPCB_STATE_INUSE) + printf "inuse, " + else + if ($pcb->inp_state == $INPCB_STATE_CACHED) + printf "cached, " + else + if ($pcb->inp_state == $INPCB_STATE_DEAD) + printf "dead, " + else + printf "unknown (%d), ", $pcb->inp_state + end + end + end +end - # and on to the next one... - set $kgm_svl_lockiter = $kgm_svl_lockiter->lf_next - end +define _dump_inpcbport + set $ppcb = (struct inpcbport *)$arg0 + printf "%p: lport ", $ppcb + _print_ntohs $ppcb->phd_port end +set $UDBHASHSIZE=16 -define showvnodelocks - if ($argc == 1) - _showvnodelockheader - _showvnodelocks $arg0 - else - printf "| Usage:\n|\n" - help showvnodelocks - end +define _dump_pcbinfo + set $snd_cc = 0 + set $rcv_cc = 0 + set $pcbseen = 0 + set $pcbi = (struct inpcbinfo *)$arg0 + printf "lastport %d lastlow %d lasthi %d\n", \ + $pcbi->lastport, $pcbi->lastlow, $pcbi->lasthi + printf "active pcb count is %d\n", $pcbi->ipi_count + set $hashsize = $pcbi->hashmask + 1 + printf "hash size is %d\n", $hashsize + printf "hash base %p has the following inpcb(s):\n", $pcbi->hashbase + if $kgm_lp64 + printf "pcb prot source address port destination address port\n" + printf "------------------ ---- --------------------------------------- ----- --------------------------------------- -----\n" + else + printf "pcb prot source address port destination address port\n" + printf "---------- ---- --------------------------------------- ----- --------------------------------------- -----\n" + end + set $i = 0 + set $hashbase = $pcbi->hashbase + set $head = *(uintptr_t *)$hashbase + while $i < $hashsize + if $head != 0 + set $pcb0 = (struct inpcb *)$head + while $pcb0 != 0 + set $pcbseen += 1 + _dump_inpcb $pcb0 $arg1 + set $so = (struct socket *)$pcb->inp_socket + if $so != 0 + set $snd_cc += $so->so_snd.sb_cc + set $rcv_cc += $so-> so_rcv.sb_cc + end + set $pcb0 = $pcb0->inp_hash.le_next + printf "\n" + end + end + set $i += 1 + set $hashbase += 1 + set $head = *(uintptr_t *)$hashbase + end + printf "total seen %ld snd_cc %ld rcv_cc %ld\n", $pcbseen, $snd_cc, $rcv_cc + printf "port hash base is %p\n", $pcbi->porthashbase + set $i = 0 + set $hashbase = $pcbi->porthashbase + set $head = *(uintptr_t *)$hashbase + while $i < $hashsize + if $head != 0 + set $pcb0 = (struct inpcbport *)$head + while $pcb0 != 0 + printf "\t" + _dump_inpcbport $pcb0 + printf "\n" + set $pcb0 = $pcb0->phd_hash.le_next + end + end + set $i += 1 + set $hashbase += 1 + set $head = *(uintptr_t *)$hashbase + end end -document showvnodelocks -Syntax: (gdb) showvnodelocks -| Given a vnodet pointer, display the list of advisory record locks for the -| referenced pvnodes -end +set $N_TIME_WAIT_SLOTS=128 -define showbootargs - printf "%s\n", (char*)((boot_args*)PE_state.bootArgs).CommandLine +define show_tcp_timewaitslots + set $slot = -1 + set $all = 0 + if $argc == 1 + if (int)$arg0 == -1 + set $all = 1 + else + set $slot = (int)$arg0 + end + end + printf "time wait slot size %d cur_tw_slot %ld\n", $N_TIME_WAIT_SLOTS, cur_tw_slot + set $i = 0 + while $i < $N_TIME_WAIT_SLOTS + set $perslot = 0 + set $head = (uintptr_t *)time_wait_slots[$i] + if $i == $slot || $slot == -1 + if $head != 0 + set $pcb0 = (struct inpcb *)$head + while $pcb0 != 0 + set $perslot += 1 + set $pcb0 = $pcb0->inp_list.le_next + end + end + printf " slot %ld count %ld\n", $i, $perslot + end + if $all || $i == $slot + if $head != 0 + set $pcb0 = (struct inpcb *)$head + while $pcb0 != 0 + printf "\t" + _dump_inpcb $pcb0 $IPPROTO_TCP + printf "\n" + set $pcb0 = $pcb0->inp_list.le_next + end + end + end + set $i += 1 + end end - -document showbootargs -Syntax: showbootargs -| Display boot arguments passed to the target kernel +document show_tcp_timewaitslots +Syntax: (gdb) show_tcp_timewaitslots +| Print the list of TCP protocol control block in the TIMEWAIT state +| Pass -1 to see the list of PCB for each slot +| Pass a slot number to see information for that slot with the list of PCB end -define showbootermemorymap - set $kgm_boot_args = kernelBootArgs - set $kgm_msize = kernelBootArgs->MemoryMapDescriptorSize - set $kgm_mcount = kernelBootArgs->MemoryMapSize / $kgm_msize - set $kgm_i = 0 - - printf "Type Physical Start Number of Pages\n" - while $kgm_i < $kgm_mcount - set $kgm_mptr = (EfiMemoryRange *)((unsigned long)kernelBootArgs->MemoryMap + $kgm_i * $kgm_msize) -# p/x *$kgm_mptr - if $kgm_mptr->Type == 0 - printf "reserved " - end - if $kgm_mptr->Type == 1 - printf "LoaderCode" - end - if $kgm_mptr->Type == 2 - printf "LoaderData" - end - if $kgm_mptr->Type == 3 - printf "BS_code " - end - if $kgm_mptr->Type == 4 - printf "BS_data " - end - if $kgm_mptr->Type == 5 - printf "RT_code " - end - if $kgm_mptr->Type == 6 - printf "RT_data " - end - if $kgm_mptr->Type == 7 - printf "available " - end - if $kgm_mptr->Type == 8 - printf "Unusable " - end - if $kgm_mptr->Type == 9 - printf "ACPI_recl " - end - if $kgm_mptr->Type == 10 - printf "ACPI_NVS " - end - if $kgm_mptr->Type == 11 - printf "MemMapIO " - end - if $kgm_mptr->Type == 12 - printf "MemPortIO " - end - if $kgm_mptr->Type == 13 - printf "PAL_code " - end - if $kgm_mptr->Type > 13 - printf "UNKNOWN " - end - - printf " %016llx %016llx\n", $kgm_mptr->PhysicalStart, $kgm_mptr->NumberOfPages - set $kgm_i = $kgm_i + 1 - end +define show_tcp_pcbinfo + _dump_pcbinfo &tcbinfo $IPPROTO_TCP end - -document showbootermemorymap -Syntax: (gdb) showbootermemorymap -| Prints out the phys memory map from kernelBootArgs +document show_tcp_pcbinfo +Syntax: (gdb) show_tcp_pcbinfo +| Print the list of TCP protocol control block information end -define showstacksaftertask - set $kgm_head_taskp = &default_pset.tasks - set $kgm_taskp = (struct task *)$arg0 - while $kgm_taskp != $kgm_head_taskp - showtaskheader - showtaskint $kgm_taskp - set $kgm_head_actp = &($kgm_taskp->threads) - set $kgm_actp = (struct thread *)($kgm_taskp->threads.next) - while $kgm_actp != $kgm_head_actp - showactheader - if ($decode_wait_events > 0) - showactint $kgm_actp 1 - else - showactint $kgm_actp 2 - end - set $kgm_actp = (struct thread *)($kgm_actp->task_threads.next) - end - printf "\n" - set $kgm_taskp = (struct task *)($kgm_taskp->pset_tasks.next) - end +define show_udp_pcbinfo + _dump_pcbinfo &udbinfo $IPPROTO_UDP end -document showstacksaftertask -Syntax: (gdb) showstacksaftertask -| Routine to print out all stacks (as in showallstacks) starting after a given task -| Useful if that gdb refuses to print a certain task's stack. +document show_udp_pcbinfo +Syntax: (gdb) show_udp_pcbinfo +| Print the list of UDP protocol control block information end -define showpmworkqueueint - set $kgm_pm_wq = (IOPMWorkQueue *)$arg0 - set $kgm_pm_node = (IOService *)$kgm_pm_wq->owner - printf "0x%08x 0x%08x ", $kgm_pm_wq, $kgm_pm_node - printf "%02d ", $kgm_pm_node->pwrMgt->CurrentPowerState - printf "%02d ", $kgm_pm_node->pwrMgt->MachineState - printf "%02d ", $kgm_pm_node->pwrMgt->WaitReason - printf "%s\n", $kgm_pm_node->pwrMgt->Name - set $kgm_pm_queue = &($kgm_pm_wq->fWorkQueue) - set $kgm_pm_req = (IOPMRequest *) $kgm_pm_queue->next - while ( (queue_entry_t) $kgm_pm_req != (queue_entry_t) $kgm_pm_queue) - printf " Request 0x%08x [%02x] Args ", $kgm_pm_req, $kgm_pm_req->fType - printf "0x%08x ", $kgm_pm_req->fArg0 - printf "0x%08x ", $kgm_pm_req->fArg1 - printf "0x%08x\n", $kgm_pm_req->fArg2 - set $kgm_pm_req = (IOPMRequest *)$kgm_pm_req->fCommandChain.next +define showbpfdtab + set $myi = 0 + while ($myi < bpf_dtab_size) + if (bpf_dtab[$myi] != 0) + printf "Address 0x%x, bd_next 0x%x\n", bpf_dtab[$myi], bpf_dtab[$myi]->bd_next + print *bpf_dtab[$myi] + end + set $myi = $myi + 1 end end -define showallpmworkqueues - set $kgm_pm_next = gIOPMWorkLoop->eventChain - printf "WorkQueue Owner PS MS WT Name\n" - printf "--------------------------------------\n" - while ( $kgm_pm_next ) - set $kgm_vt = *((void **) $kgm_pm_next) - if ($kgm_vt == _ZTV13IOPMWorkQueue) - showpmworkqueueint $kgm_pm_next - end - set $kgm_pm_next = $kgm_pm_next->eventChainNext - end +define printvnodepathint_recur + if $arg0 != 0 + if ($arg0->v_flag & 0x000001) && ($arg0->v_mount != 0) + if $arg0->v_mount->mnt_vnodecovered != 0 + printvnodepathint_recur $arg0->v_mount->mnt_vnodecovered $arg0->v_mount->mnt_vnodecovered->v_name + end + else + printvnodepathint_recur $arg0->v_parent $arg0->v_parent->v_name + printf "/%s", $arg1 + end + end +end + +define showvnodepath + set $vp = (struct vnode *)$arg0 + if $vp != 0 + if ($vp->v_flag & 0x000001) && ($vp->v_mount != 0) && ($vp->v_mount->mnt_flag & 0x00004000) + printf "/" + else + printvnodepathint_recur $vp $vp->v_name + end + end + printf "\n" +end + +document showvnodepath +Syntax: (gdb) showvnodepath +| Prints the path for a vnode +end + +define showallvols + printf "volume " + showptrhdrpad + printf " mnt_data " + showptrhdrpad + printf " mnt_devvp " + showptrhdrpad + printf " typename mountpoint\n" + set $kgm_vol = (mount_t) mountlist.tqh_first + while $kgm_vol + showptr $kgm_vol + printf " " + showptr $kgm_vol->mnt_data + printf " " + showptr $kgm_vol->mnt_devvp + printf " " + if ($kgm_vol->mnt_vtable->vfc_name[0] == 'h') && \ + ($kgm_vol->mnt_vtable->vfc_name[1] == 'f') && \ + ($kgm_vol->mnt_vtable->vfc_name[2] == 's') && \ + ($kgm_vol->mnt_vtable->vfc_name[3] == '\0') + set $kgm_hfsmount = \ + (struct hfsmount *) $kgm_vol->mnt_data + if $kgm_hfsmount->hfs_freezing_proc != 0 + printf "FROZEN hfs " + else + printf "hfs " + end + else + printf "%-10s ", $kgm_vol->mnt_vtable->vfc_name + end + printf "%s\n", $kgm_vol->mnt_vfsstat.f_mntonname + + set $kgm_vol = (mount_t) $kgm_vol->mnt_list.tqe_next + end +end + +document showallvols +Syntax: (gdb) showallvols +| Display a summary of mounted volumes +end + +define showvnodeheader + printf "vnode " + showptrhdrpad + printf " usecount iocount v_data " + showptrhdrpad + printf " vtype parent " + showptrhdrpad + printf " name\n" +end + +define showvnodeint + set $kgm_vnode = (vnode_t) $arg0 + showptr $kgm_vnode + printf " %8d ", $kgm_vnode->v_usecount + printf "%7d ", $kgm_vnode->v_iocount +# print information about clean/dirty blocks? + showptr $kgm_vnode->v_data + printf " " + # print the vtype, using the enum tag + set $kgm_vtype = $kgm_vnode->v_type + if $kgm_vtype == VNON + printf "VNON " + end + if $kgm_vtype == VREG + printf "VREG " + end + if $kgm_vtype == VDIR + printf "VDIR " + end + if $kgm_vtype == VBLK + printf "VBLK " + end + if $kgm_vtype == VCHR + printf "VCHR " + end + if $kgm_vtype == VLNK + printf "VLNK " + end + if $kgm_vtype == VSOCK + printf "VSOCK " + end + if $kgm_vtype == VFIFO + printf "VFIFO " + end + if $kgm_vtype == VBAD + printf "VBAD " + end + if ($kgm_vtype < VNON) || ($kgm_vtype > VBAD) + printf "%5d ", $kgm_vtype + end + + showptr $kgm_vnode->v_parent + printf " " + if $kgm_vnode->v_name != 0 + printf "%s\n", $kgm_vnode->v_name + else + printf "\n" + end +end + +define showvnode + showvnodeheader + showvnodeint $arg0 +end + +document showvnode +Syntax: (gdb) showvnode +| Display info about one vnode +end + +define showvolvnodes + showvnodeheader + set $kgm_vol = (mount_t) $arg0 + set $kgm_vnode = (vnode_t) $kgm_vol.mnt_vnodelist.tqh_first + while $kgm_vnode + showvnodeint $kgm_vnode + set $kgm_vnode = (vnode_t) $kgm_vnode->v_mntvnodes.tqe_next + end +end + +document showvolvnodes +Syntax: (gdb) showvolvnodes +| Display info about all vnodes of a given mount_t +end + +define showvolbusyvnodes + showvnodeheader + set $kgm_vol = (mount_t) $arg0 + set $kgm_vnode = (vnode_t) $kgm_vol.mnt_vnodelist.tqh_first + while $kgm_vnode + if $kgm_vnode->v_iocount != 0 + showvnodeint $kgm_vnode + end + set $kgm_vnode = (vnode_t) $kgm_vnode->v_mntvnodes.tqe_next + end +end + +document showvolbusyvnodes +Syntax: (gdb) showvolbusyvnodes +| Display info about busy (iocount!=0) vnodes of a given mount_t +end + +define showallbusyvnodes + showvnodeheader + set $kgm_vol = (mount_t) mountlist.tqh_first + while $kgm_vol + set $kgm_vnode = (vnode_t) $kgm_vol.mnt_vnodelist.tqh_first + while $kgm_vnode + if $kgm_vnode->v_iocount != 0 + showvnodeint $kgm_vnode + end + set $kgm_vnode = (vnode_t) $kgm_vnode->v_mntvnodes.tqe_next + end + set $kgm_vol = (mount_t) $kgm_vol->mnt_list.tqe_next + end +end + +document showallbusyvnodes +Syntax: (gdb) showallbusyvnodes +| Display info about all busy (iocount!=0) vnodes +end + +define showallvnodes + showvnodeheader + set $kgm_vol = (mount_t) mountlist.tqh_first + while $kgm_vol + set $kgm_vnode = (vnode_t) $kgm_vol.mnt_vnodelist.tqh_first + while $kgm_vnode + showvnodeint $kgm_vnode + set $kgm_vnode = (vnode_t) $kgm_vnode->v_mntvnodes.tqe_next + end + set $kgm_vol = (mount_t) $kgm_vol->mnt_list.tqe_next + end +end + +document showallvnodes +Syntax: (gdb) showallvnodes +| Display info about all vnodes +end + +define _showvnodelockheader + printf "* type W held by lock type start end\n" + printf "- ----- - ------------- --------- ------------------ ------------------\n" +end + +define _showvnodelock + set $kgm_svl_lock = ((struct lockf *)$arg0) + + # decode flags + set $kgm_svl_flags = $kgm_svl_lock->lf_flags + set $kgm_svl_type = $kgm_svl_lock->lf_type + if ($kgm_svl_flags & 0x20) + printf "flock" + end + if ($kgm_svl_flags & 0x40) + printf "posix" + end + if ($kgm_svl_flags & 0x80) + printf "prov " + end + if ($kgm_svl_flags & 0x10) + printf " W " + else + printf " . " + end + + # POSIX file vs. advisory range locks + if ($kgm_svl_flags & 0x40) + set $kgm_svl_proc = (proc_t)$kgm_svl_lock->lf_id + printf "PID %8d ", $kgm_svl_proc->p_pid + else + printf "ID 0x%08x ", $kgm_svl_lock->lf_id + end + + # lock type + if ($kgm_svl_type == 1) + printf "shared " + else + if ($kgm_svl_type == 3) + printf "exclusive " + else + if ($kgm_svl_type == 2) + printf "unlock " + else + printf "unknown " + end + end + end + + # start and stop + printf "0x%016x..", $kgm_svl_lock->lf_start + printf "0x%016x ", $kgm_svl_lock->lf_end + printf "\n" +end +# Body of showvnodelocks, not including header +define _showvnodelocks + set $kgm_svl_vnode = ((vnode_t)$arg0) + set $kgm_svl_lockiter = $kgm_svl_vnode->v_lockf + while ($kgm_svl_lockiter != 0) + # locks that are held + printf "H " + _showvnodelock $kgm_svl_lockiter + + # and any locks blocked by them + set $kgm_svl_blocker = $kgm_svl_lockiter->lf_blkhd.tqh_first + while ($kgm_svl_blocker != 0) + printf "> " + _showvnodelock $kgm_svl_blocker + set $kgm_svl_blocker = $kgm_svl_blocker->lf_block.tqe_next + end + + # and on to the next one... + set $kgm_svl_lockiter = $kgm_svl_lockiter->lf_next + end +end + + +define showvnodelocks + if ($argc == 1) + _showvnodelockheader + _showvnodelocks $arg0 + else + printf "| Usage:\n|\n" + help showvnodelocks + end +end + +document showvnodelocks +Syntax: (gdb) showvnodelocks +| Given a vnodet pointer, display the list of advisory record locks for the +| referenced pvnodes +end + +define showbootargs + printf "%s\n", (char*)((boot_args*)PE_state.bootArgs).CommandLine +end + +document showbootargs +Syntax: showbootargs +| Display boot arguments passed to the target kernel +end + +define showbootermemorymap + if ($kgm_mtype == $kgm_mtype_i386) + set $kgm_voffset = 0 + else + if ($kgm_mtype == $kgm_mtype_x86_64) + set $kgm_voffset = 0xFFFFFF8000000000ULL + else + echo showbootermemorymap not supported on this architecture + end + end + + set $kgm_boot_args = kernelBootArgs + set $kgm_msize = kernelBootArgs->MemoryMapDescriptorSize + set $kgm_mcount = kernelBootArgs->MemoryMapSize / $kgm_msize + set $kgm_i = 0 + + printf "Type Physical Start Number of Pages Virtual Start Attributes\n" + while $kgm_i < $kgm_mcount + set $kgm_mptr = (EfiMemoryRange *)((unsigned long)kernelBootArgs->MemoryMap + $kgm_voffset + $kgm_i * $kgm_msize) +# p/x *$kgm_mptr + if $kgm_mptr->Type == 0 + printf "reserved " + end + if $kgm_mptr->Type == 1 + printf "LoaderCode" + end + if $kgm_mptr->Type == 2 + printf "LoaderData" + end + if $kgm_mptr->Type == 3 + printf "BS_code " + end + if $kgm_mptr->Type == 4 + printf "BS_data " + end + if $kgm_mptr->Type == 5 + printf "RT_code " + end + if $kgm_mptr->Type == 6 + printf "RT_data " + end + if $kgm_mptr->Type == 7 + printf "available " + end + if $kgm_mptr->Type == 8 + printf "Unusable " + end + if $kgm_mptr->Type == 9 + printf "ACPI_recl " + end + if $kgm_mptr->Type == 10 + printf "ACPI_NVS " + end + if $kgm_mptr->Type == 11 + printf "MemMapIO " + end + if $kgm_mptr->Type == 12 + printf "MemPortIO " + end + if $kgm_mptr->Type == 13 + printf "PAL_code " + end + if $kgm_mptr->Type > 13 + printf "UNKNOWN " + end + + printf " %016llx %016llx", $kgm_mptr->PhysicalStart, $kgm_mptr->NumberOfPages + if $kgm_mptr->VirtualStart != 0 + printf " %016llx", $kgm_mptr->VirtualStart + else + printf " " + end + printf " %016llx\n", $kgm_mptr->Attribute + set $kgm_i = $kgm_i + 1 + end +end + +document showbootermemorymap +Syntax: (gdb) showbootermemorymap +| Prints out the phys memory map from kernelBootArgs +end + + +define showstacksaftertask + set $kgm_head_taskp = &default_pset.tasks + set $kgm_taskp = (struct task *)$arg0 + while $kgm_taskp != $kgm_head_taskp + showtaskheader + showtaskint $kgm_taskp + set $kgm_head_actp = &($kgm_taskp->threads) + set $kgm_actp = (struct thread *)($kgm_taskp->threads.next) + while $kgm_actp != $kgm_head_actp + showactheader + if ($decode_wait_events > 0) + showactint $kgm_actp 1 + else + showactint $kgm_actp 2 + end + set $kgm_actp = (struct thread *)($kgm_actp->task_threads.next) + end + printf "\n" + set $kgm_taskp = (struct task *)($kgm_taskp->pset_tasks.next) + end +end +document showstacksaftertask +Syntax: (gdb) showstacksaftertask +| Routine to print out all stacks (as in showallstacks) starting after a given task +| Useful if that gdb refuses to print a certain task's stack. +end + +define showpmworkqueueint + set $kgm_pm_wq = (IOPMWorkQueue *)$arg0 + set $kgm_pm_node = (IOService *)$kgm_pm_wq->owner + showptr $kgm_pm_wq + printf " " + showptr $kgm_pm_node + printf " " + printf "%02d ", $kgm_pm_node->pwrMgt->CurrentPowerState + printf "%02d ", $kgm_pm_node->pwrMgt->MachineState + printf "%02d ", $kgm_pm_node->pwrMgt->WaitReason + printf "%s\n", $kgm_pm_node->pwrMgt->Name + set $kgm_pm_queue = &($kgm_pm_wq->fWorkQueue) + set $kgm_pm_req = (IOPMRequest *)$kgm_pm_queue->next + if ((queue_entry_t) $kgm_pm_req != (queue_entry_t) $kgm_pm_queue) + printf "\n" + printf "request " + showptrhdrpad + printf " type next " + showptrhdrpad + printf " root " + showptrhdrpad + printf " work_wait free_wait\n" + while ((queue_entry_t) $kgm_pm_req != (queue_entry_t) $kgm_pm_queue) + showptr $kgm_pm_req + printf " 0x%02x ", $kgm_pm_req->fType + showptr $kgm_pm_req->fRequestNext + printf " " + showptr $kgm_pm_req->fRequestRoot + printf " 0x%08x 0x%08x\n", $kgm_pm_req->fWorkWaitCount, $kgm_pm_req->fFreeWaitCount + showptrhdrpad + printf " args " + showptr $kgm_pm_req->fArg0 + printf " " + showptr $kgm_pm_req->fArg1 + printf " " + showptr $kgm_pm_req->fArg2 + printf "\n" + set $kgm_pm_req = (IOPMRequest *)$kgm_pm_req->fCommandChain.next + end + printf "\n" + end +end + +define showallpmworkqueues + set $kgm_pm_next = gIOPMWorkLoop->eventChain + printf "queue " + showptrhdrpad + printf " owner " + showptrhdrpad + printf " ps ms wr name\n" + while ( $kgm_pm_next ) + set $kgm_vt = *((void **) $kgm_pm_next) + if ($kgm_lp64 || $kgm_mtype == $kgm_mtype_arm) + set $kgm_vt = $kgm_vt - 2 * sizeof(void *) + end + if ($kgm_vt == &_ZTV13IOPMWorkQueue) + showpmworkqueueint $kgm_pm_next + end + set $kgm_pm_next = $kgm_pm_next->eventChainNext + end +end + +document showallpmworkqueues +Syntax: (gdb) showallpmworkqueues +| Display info about all IOPMWorkQueue objects +end + +define showioservicepm + set $kgm_iopmpriv = (IOServicePM *)$arg0 + printf "{ " + printf "MachineState = %d (", $kgm_iopmpriv->MachineState + if ( $kgm_iopmpriv->MachineState == 1 ) + printf "kIOPM_OurChangeTellClientsPowerDown" + else + if ( $kgm_iopmpriv->MachineState == 2 ) + printf "kIOPM_OurChangeTellPriorityClientsPowerDown" + else + if ( $kgm_iopmpriv->MachineState == 3 ) + printf "kIOPM_OurChangeNotifyInterestedDriversWillChange" + else + if ( $kgm_iopmpriv->MachineState == 4 ) + printf "kIOPM_OurChangeSetPowerState" + else + if ( $kgm_iopmpriv->MachineState == 5 ) + printf "kIOPM_OurChangeWaitForPowerSettle" + else + if ( $kgm_iopmpriv->MachineState == 6 ) + printf "kIOPM_OurChangeNotifyInterestedDriversDidChange" + else + if ( $kgm_iopmpriv->MachineState == 7 ) + printf "kIOPM_OurChangeFinish" + else + if ( $kgm_iopmpriv->MachineState == 8 ) + printf "kIOPM_ParentDownTellPriorityClientsPowerDown" + else + if ( $kgm_iopmpriv->MachineState == 9 ) + printf "kIOPM_ParentDownNotifyInterestedDriversWillChange" + else + if ( $kgm_iopmpriv->MachineState == 10 ) + printf "Unused_MachineState_10" + else + if ( $kgm_iopmpriv->MachineState == 11 ) + printf "kIOPM_ParentDownNotifyDidChangeAndAcknowledgeChange" + else + if ( $kgm_iopmpriv->MachineState == 12 ) + printf "kIOPM_ParentDownSetPowerState" + else + if ( $kgm_iopmpriv->MachineState == 13 ) + printf "kIOPM_ParentDownWaitForPowerSettle" + else + if ( $kgm_iopmpriv->MachineState == 14 ) + printf "kIOPM_ParentDownAcknowledgeChange" + else + if ( $kgm_iopmpriv->MachineState == 15) + printf "kIOPM_ParentUpSetPowerState" + else + if ( $kgm_iopmpriv->MachineState == 16) + printf "Unused_MachineState_16" + else + if ( $kgm_iopmpriv->MachineState == 17) + printf "kIOPM_ParentUpWaitForSettleTime" + else + if ( $kgm_iopmpriv->MachineState == 18) + printf "kIOPM_ParentUpNotifyInterestedDriversDidChange" + else + if ( $kgm_iopmpriv->MachineState == 19) + printf "kIOPM_ParentUpAcknowledgePowerChange" + else + if ( $kgm_iopmpriv->MachineState == 20) + printf "kIOPM_Finished" + else + if ( $kgm_iopmpriv->MachineState == 21) + printf "kIOPM_DriverThreadCallDone" + else + if ( $kgm_iopmpriv->MachineState == 22) + printf "kIOPM_NotifyChildrenDone" + end + end + end + end + end + end + end + end + end + end + end + end + end + end + end + end + end + end + end + end + end + end + printf "), " + + if ( $kgm_iopmpriv->MachineState != 20 ) + printf "DriverTimer = %d, ",(unsigned int)$kgm_iopmpriv->DriverTimer + printf "SettleTime = %d, ",(unsigned int)$kgm_iopmpriv->SettleTimeUS + printf "HeadNoteFlags = %08x, ",(unsigned int)$kgm_iopmpriv->HeadNoteFlags + printf "HeadNotePendingAcks = %x, ",(unsigned int)$kgm_iopmpriv->HeadNotePendingAcks + end + + if ( $kgm_iopmpriv->DeviceOverrides != 0 ) + printf"DeviceOverrides, " + end + + printf "DeviceDesire = %d, ",(unsigned int)$kgm_iopmpriv->DeviceDesire + printf "DesiredPowerState = %d, ",(unsigned int)$kgm_iopmpriv->DesiredPowerState + printf "PreviousRequest = %d }\n",(unsigned int)$kgm_iopmpriv->PreviousRequest +end + +document showioservicepm +Syntax: (gdb) showioservicepm +| Routine to dump the IOServicePM object +end + +define showregistryentryrecursepmstate + set $kgm_re = (IOService *)$arg1 + set $kgm$arg0_stack = (unsigned long long) $arg2 + + if ($arg3) + set $kgm$arg0_stack = $kgm$arg0_stack | (1ULL << $kgm_reg_depth) + else + set $kgm$arg0_stack = $kgm$arg0_stack & ~(1ULL << $kgm_reg_depth) + end + + dictget $kgm_re->fRegistryTable $kgm_childkey + set $kgm$arg0_child_array = (OSArray *) $kgm_result + + if ($kgm$arg0_child_array) + set $kgm$arg0_child_count = $kgm$arg0_child_array->count + else + set $kgm$arg0_child_count = 0 + end + + if ($kgm$arg0_child_count) + set $kgm$arg0_stack = $kgm$arg0_stack | (2ULL << $kgm_reg_depth) + else + set $kgm$arg0_stack = $kgm$arg0_stack & ~(2ULL << $kgm_reg_depth) + end + + indent $kgm_reg_depth $kgm$arg0_stack + printf "+-o " + + dictget $kgm_re->fRegistryTable $kgm_namekey + if ($kgm_result == 0) + dictget $kgm_re->fRegistryTable gIONameKey + end + if ($kgm_result == 0) + dictget $kgm_re->fPropertyTable gIOClassKey + end + + if ($kgm_result != 0) + printf "%s <%p>", ((OSString *)$kgm_result)->string, $kgm_re + else + if (((IOService*)$kgm_re)->pwrMgt && ((IOService*)$kgm_re)->pwrMgt->Name) + printf "%s <", ((IOService*)$kgm_re)->pwrMgt->Name + showptr $kgm_re + printf ">" + else + printf "?? <" + showptr $kgm_re + printf ">" + end + end + + if (((IOService*)$kgm_re)->pwrMgt ) + printf " Current Power State: %ld ", ((IOService*)$kgm_re)->pwrMgt->CurrentPowerState + #printf " Mach State %ld", ((IOService*)$kgm_re)->pwrMgt->MachineState + showioservicepm ((IOService*)$kgm_re)->pwrMgt + end + printf "\n" + + + # recurse + if ($kgm$arg0_child_count != 0) + + set $kgm_reg_depth = $kgm_reg_depth + 1 + set $kgm$arg0_child_idx = 0 + + while ($kgm$arg0_child_idx < $kgm$arg0_child_count) + set $kgm_re = $kgm$arg0_child_array->array[$kgm$arg0_child_idx++] + set $kgm_more_sib = ($kgm$arg0_child_idx < $kgm$arg0_child_count) + if $kgm_reg_depth >= $kgm_reg_depth_max + 1 + loop_break + end + showregistryentryrecursepmstate _$arg0 $kgm_re $kgm$arg0_stack $kgm_more_sib + end + + set $kgm_reg_depth = $kgm_reg_depth - 1 + end +end + +define showregistryentryintpmstate + set $kgm_namekey = (OSSymbol *) $kgm_reg_plane->nameKey + set $kgm_childkey = (OSSymbol *) $kgm_reg_plane->keys[1] + showregistryentryrecursepmstate _ $arg0 0 0 +end + +define showregistrypmstate +# setregistryplane gIOPowerPlane + set $kgm_reg_depth = 0 + set $kgm_show_props = 1 + showregistryentryintpmstate gRegistryRoot +end + +document showregistrypmstate +Syntax: (gdb) showregistrypmstate +| Routine to dump the PM state of each IOPower registry entry +end + +define showstacksafterthread + set $kgm_head_taskp = &default_pset.tasks + set $kgm_actp = (struct thread *)$arg0 + set $kgm_actp = (struct thread *)($kgm_actp->task_threads.next) + set $kgm_taskp = (struct task *)$kgm_actp->task + while $kgm_taskp != $kgm_head_taskp + showtaskheader + showtaskint $kgm_taskp + set $kgm_head_actp = &($kgm_taskp->threads) + while $kgm_actp != $kgm_head_actp + showactheader + if ($decode_wait_events > 0) + showactint $kgm_actp 1 + else + showactint $kgm_actp 2 + end + set $kgm_actp = (struct thread *)($kgm_actp->task_threads.next) + end + printf "\n" + set $kgm_taskp = (struct task *)($kgm_taskp->pset_tasks.next) + end +end + +document showstacksafterthread +Syntax: (gdb) showstacksafterthread +| Routine to print out all stacks (as in showallstacks) starting after a given thread +| Useful if that gdb refuses to print a certain task's stack. +end + +define kdp-reenter + set kdp_reentry_deadline = ((unsigned) $arg0)*1000 + continue +end + +document kdp-reenter +Syntax: (gdb) kdp-reenter +| Schedules reentry into the debugger after seconds, and resumes +| the target system. +end + +define _if_present + if (!$arg0) + printf " not" + end + printf " present" +end + +define showMCAstate + if (($kgm_mtype & $kgm_mtype_x86_mask) != $kgm_mtype_x86_any) + printf "Not available for current architecture.\n" + else + printf "MCA" + _if_present mca_MCA_present + printf ", control MSR" + _if_present mca_control_MSR_present + printf ", threshold status" + _if_present mca_threshold_status_present + printf "\n%d error banks, ", mca_error_bank_count + printf "family code 0x%x, ", mca_family + printf "machine-check dump state: %d\n", mca_dump_state + set $kgm_cpu = 0 + while cpu_data_ptr[$kgm_cpu] != 0 + set $kgm_mcp = cpu_data_ptr[$kgm_cpu]->cpu_mca_state + if $kgm_mcp + printf "CPU %d:", $kgm_cpu + printf " mca_mcg_ctl: 0x%016llx", $kgm_mcp->mca_mcg_ctl + printf " mca_mcg_status: 0x%016llx\n", $kgm_mcp->mca_mcg_status.u64 + printf "bank " + printf "mca_mci_ctl " + printf "mca_mci_status " + printf "mca_mci_addr " + printf "mca_mci_misc\n" + set $kgm_bank = 0 + while $kgm_bank < mca_error_bank_count + set $kgm_bp = &$kgm_mcp->mca_error_bank[$kgm_bank] + printf " %2d:", $kgm_bank + printf " 0x%016llx", $kgm_bp->mca_mci_ctl + printf " 0x%016llx", $kgm_bp->mca_mci_status.u64 + printf " 0x%016llx", $kgm_bp->mca_mci_addr + printf " 0x%016llx\n", $kgm_bp->mca_mci_misc + set $kgm_bank = $kgm_bank + 1 + end + end + set $kgm_cpu = $kgm_cpu + 1 + end + end +end + +document showMCAstate +Syntax: showMCAstate +| Print machine-check register state after MC exception. +end + +define _pt_step + # + # Step to lower-level page table and print attributes + # $kgm_pt_paddr: current page table entry physical address + # $kgm_pt_index: current page table entry index (0..511) + # returns + # $kgm_pt_paddr: next level page table entry physical address + # or null if invalid + # $kgm_pt_valid: 1 if $kgm_pt_paddr is valid, 0 if the walk + # should be aborted + # $kgm_pt_large: 1 if kgm_pt_paddr is a page frame address + # of a large page and not another page table entry + # For $kgm_pt_verbose = 0: print nothing + # 1: print basic information + # 2: print basic information and hex table dump + # + set $kgm_entryp = $kgm_pt_paddr + 8*$kgm_pt_index + readphysint $kgm_entryp 64 $kgm_lcpu_self + set $entry = $kgm_readphysint_result + if $kgm_pt_verbose == 2 + set $kgm_pte_loop = 0 + while $kgm_pte_loop < 512 + set $kgm_pt_paddr_tmp = $kgm_pt_paddr + $kgm_pte_loop*8 + readphys64 $kgm_pt_paddr_tmp + set $kgm_pte_loop = $kgm_pte_loop + 1 + end + end + set $kgm_paddr_mask = ~((0xfffULL<<52) | 0xfffULL) + set $kgm_paddr_largemask = ~((0xfffULL<<52) | 0x1fffffULL) + if $kgm_pt_verbose == 0 + if $entry & (0x1 << 0) + set $kgm_pt_valid = 1 + if $entry & (0x1 << 7) + set $kgm_pt_large = 1 + set $kgm_pt_paddr = $entry & $kgm_paddr_largemask + else + set $kgm_pt_large = 0 + set $kgm_pt_paddr = $entry & $kgm_paddr_mask + end + else + set $kgm_pt_valid = 0 + set $kgm_pt_large = 0 + set $kgm_pt_paddr = 0 + end + else + printf "0x%016llx:\n\t0x%016llx\n\t", $kgm_entryp, $entry + if $entry & (0x1 << 0) + printf "valid" + set $kgm_pt_paddr = $entry & $kgm_paddr_mask + set $kgm_pt_valid = 1 + else + printf "invalid" + set $kgm_pt_paddr = 0 + set $kgm_pt_valid = 0 + # stop decoding other bits + set $entry = 0 + end + if $entry & (0x1 << 1) + printf " writeable" + else + printf " read-only" + end + if $entry & (0x1 << 2) + printf " user" + else + printf " supervisor" + end + if $entry & (0x1 << 3) + printf " PWT" + end + if $entry & (0x1 << 4) + printf " PCD" + end + if $entry & (0x1 << 5) + printf " accessed" + end + if $entry & (0x1 << 6) + printf " dirty" + end + if $entry & (0x1 << 7) + printf " large" + set $kgm_pt_large = 1 + else + set $kgm_pt_large = 0 + end + if $entry & (0x1 << 8) + printf " global" + end + if $entry & (0x3 << 9) + printf " avail:0x%x", ($entry >> 9) & 0x3 + end + if $entry & (0x1 << 63) + printf " noexec" + end + printf "\n" + end +end + +define _pmap_walk + set $kgm_pmap = (pmap_t) $arg0 + set $kgm_vaddr = $arg1 + set $kgm_pt_paddr = $kgm_pmap->pm_cr3 + set $kgm_pt_valid = $kgm_pt_paddr != 0 + set $kgm_pt_large = 0 + set $kgm_pframe_offset = 0 + if $kgm_pt_valid && cpu_64bit + # Look up bits 47:39 of the linear address in PML4T + set $kgm_pt_index = ($kgm_vaddr >> 39) & 0x1ffULL + set $kgm_pframe_offset = $kgm_vaddr & 0x7fffffffffULL + if $kgm_pt_verbose + printf "pml4 (index %d):\n", $kgm_pt_index + end + _pt_step + end + if $kgm_pt_valid + # Look up bits 38:30 of the linear address in PDPT + set $kgm_pt_index = ($kgm_vaddr >> 30) & 0x1ffULL + set $kgm_pframe_offset = $kgm_vaddr & 0x3fffffffULL + if $kgm_pt_verbose + printf "pdpt (index %d):\n", $kgm_pt_index + end + _pt_step + end + if $kgm_pt_valid && !$kgm_pt_large + # Look up bits 29:21 of the linear address in PDT + set $kgm_pt_index = ($kgm_vaddr >> 21) & 0x1ffULL + set $kgm_pframe_offset = $kgm_vaddr & 0x1fffffULL + if $kgm_pt_verbose + printf "pdt (index %d):\n", $kgm_pt_index + end + _pt_step + end + if $kgm_pt_valid && !$kgm_pt_large + # Look up bits 20:21 of the linear address in PT + set $kgm_pt_index = ($kgm_vaddr >> 12) & 0x1ffULL + set $kgm_pframe_offset = $kgm_vaddr & 0xfffULL + if $kgm_pt_verbose + printf "pt (index %d):\n", $kgm_pt_index + end + _pt_step + end + if $kgm_pt_valid + set $kgm_paddr = $kgm_pt_paddr + $kgm_pframe_offset + readphysint $kgm_paddr 32 $kgm_lcpu_self + set $kgm_value = $kgm_readphysint_result + printf "phys 0x%016llx: 0x%08x\n", $kgm_paddr, $kgm_value + else + set $kgm_paddr = 0 + printf "(no translation)\n" + end +end + +define pmap_walk + if (($kgm_mtype & $kgm_mtype_x86_mask) != $kgm_mtype_x86_any) + printf "Not available for current architecture.\n" + else + if $argc != 2 + printf "pmap_walk \n" + else + if !$kgm_pt_verbose + set $kgm_pt_verbose = 1 + else + if $kgm_pt_verbose != 2 + set $kgm_pt_verbose = 1 + end + end + _pmap_walk $arg0 $arg1 + end + end +end + +document pmap_walk +Syntax: (gdb) pmap_walk +| Perform a page-table walk in for . +| Set $kgm_pt_verbose=2 for full hex dump of page tables. +end + +define pmap_vtop + if (($kgm_mtype & $kgm_mtype_x86_mask) != $kgm_mtype_x86_any) + printf "Not available for current architecture.\n" + else + if $argc != 2 + printf "pmap_vtop \n" + else + set $kgm_pt_verbose = 0 + _pmap_walk $arg0 $arg1 + end + end +end + +document pmap_vtop +Syntax: (gdb) pmap_vtop +| For page-tables in translate to physical address. +end + +define zstack + set $index = $arg0 + + if (log_records == 0) + set $count = 0 + printf "Zone logging not enabled. Add 'zlog=' to boot-args.\n" + else + if ($argc == 2) + set $count = $arg1 + else + set $count = 1 + end + end + + while ($count) + printf "\n--------------- " + + if (zrecords[$index].z_opcode == 1) + printf "ALLOC " + else + printf "FREE " + end + + printf " 0x%x : index %d : ztime %d -------------\n", zrecords[$index].z_element, $index, zrecords[$index].z_time + + set $frame = 0 + + while ($frame < 15) + set $frame_pc = zrecords[$index].z_pc[$frame] + + if ($frame_pc == 0) + loop_break + end + + x/i $frame_pc + set $frame = $frame + 1 + end + + set $index = $index + 1 + set $count = $count - 1 + end +end + +document zstack +Syntax: (gdb) zstack [] +| Zone leak debugging: print the stack trace of log element at . +| If a is supplied, it prints log elements starting at . +| +| The suggested usage is to look at indexes below zcurrent and look for common stack traces. +| The stack trace that occurs the most is probably the cause of the leak. Find the pc of the +| function calling into zalloc and use the countpcs kgmacro to find out how often that pc occurs in the log. +| The pc occuring in a high percentage of records is most likely the source of the leak. +| +| The findoldest kgmacro is also useful for leak debugging since it identifies the oldest record +| in the log, which may indicate the leaker. +end + +define findoldest + set $index = 0 + set $count = log_records + set $cur_min = 2000000000 + set $cur_index = 0 + + if (log_records == 0) + printf "Zone logging not enabled. Add 'zlog=' to boot-args.\n" + else + + while ($count) + if (zrecords[$index].z_element && zrecords[$index].z_time < $cur_min) + set $cur_index = $index + set $cur_min = zrecords[$index].z_time + end + + set $count = $count - 1 + set $index = $index + 1 + end + + printf "oldest record is at log index %d:\n", $cur_index + zstack $cur_index + end +end + +document findoldest +Syntax: (gdb) findoldest +| Zone leak debugging: find and print the oldest record in the log. Note that this command +| can take several minutes to run since it uses linear search. +| +| Once it prints a stack trace, find the pc of the caller above all the zalloc, kalloc and +| IOKit layers. Then use the countpcs kgmacro to see how often this caller has allocated +| memory. A caller with a high percentage of records in the log is probably the leaker. +end + +define countpcs + set $target_pc = $arg0 + set $index = 0 + set $count = log_records + set $found = 0 + + if (log_records == 0) + printf "Zone logging not enabled. Add 'zlog=' to boot-args.\n" + else + + while ($count) + set $frame = 0 + + if (zrecords[$index].z_element != 0) + while ($frame < 15) + if (zrecords[$index].z_pc[$frame] == $target_pc) + set $found = $found + 1 + set $frame = 15 + end + + set $frame = $frame + 1 + end + end + + set $index = $index + 1 + set $count = $count - 1 + end + + printf "occurred %d times in log (%d%c of records)\n", $found, ($found * 100) / zrecorded, '%' + end +end + +document countpcs +Syntax: (gdb) countpcs +| Zone leak debugging: search the log and print a count of all log entries that contain the given +| in the stack trace. This is useful for verifying a suspected as being the source of +| the leak. If a high percentage of the log entries contain the given , then it's most +| likely the source of the leak. Note that this command can take several minutes to run. +end + +define findelem + set $fe_index = zcurrent + set $fe_count = log_records + set $fe_elem = $arg0 + set $fe_prev_op = -1 + + if (log_records == 0) + printf "Zone logging not enabled. Add 'zlog=' to boot-args.\n" + end + + while ($fe_count) + if (zrecords[$fe_index].z_element == $fe_elem) + zstack $fe_index + + if (zrecords[$fe_index].z_opcode == $fe_prev_op) + printf "*************** DOUBLE OP! *********************\n + end + + set $fe_prev_op = zrecords[$fe_index].z_opcode + end + + set $fe_count = $fe_count - 1 + set $fe_index = $fe_index + 1 + + if ($fe_index >= log_records) + set $fe_index = 0 + end + end +end + +document findelem +Syntax: (gdb) findelem +| Zone corruption debugging: search the log and print out the stack traces for all log entries that +| refer to the given zone element. When the kernel panics due to a corrupted zone element, get the +| element address and use this macro. This will show you the stack traces of all logged zalloc and +| zfree operations which tells you who touched the element in the recent past. This also makes +| double-frees readily apparent. +end + + +# This implements a shadowing scheme in kgmacros. If the +# current user data can be accessed by simply changing kdp_pmap, +# that is used. Otherwise, we copy data into a temporary buffer +# in the kernel's address space and use that instead. Don't rely on +# kdp_pmap between invocations of map/unmap. Since the shadow +# codepath uses a manual KDP packet, request no more than 128 bytes. +# Uses $kgm_lp64 for kernel address space size +define _map_user_data_from_task + set $kgm_map_user_taskp = (task_t)$arg0 + set $kgm_map_user_map = $kgm_map_user_taskp->map + set $kgm_map_user_pmap = $kgm_map_user_map->pmap + set $kgm_map_user_task_64 = ( $kgm_map_user_taskp->taskFeatures[0] & 0x80000000) + set $kgm_map_user_window = 0 + set $kgm_map_switch_map = 0 + + if $kgm_lp64 + set $kgm_map_switch_map = 1 + else + if !$kgm_map_user_task_64 + set $kgm_map_switch_map = 1 + end + end + + if ($kgm_map_switch_map) + # switch the map safely + set $kgm_map_user_window = $arg1 + set kdp_pmap = $kgm_map_user_pmap + else + # requires shadowing/copying + + # set up the manual KDP packet + set manual_pkt.input = 0 + set manual_pkt.len = sizeof(kdp_readmem64_req_t) + set $kgm_pkt = (kdp_readmem64_req_t *)&manual_pkt.data + set $kgm_pkt->hdr.request = KDP_READMEM64 + set $kgm_pkt->hdr.len = sizeof(kdp_readmem64_req_t) + set $kgm_pkt->hdr.is_reply = 0 + set $kgm_pkt->hdr.seq = 0 + set $kgm_pkt->hdr.key = 0 + set $kgm_pkt->address = (uint64_t)$arg1 + set $kgm_pkt->nbytes = (uint32_t)$arg2 + + set kdp_pmap = $kgm_map_user_pmap + set manual_pkt.input = 1 + # dummy to make sure manual packet is executed + set $kgm_dummy = &_mh_execute_header + # Go back to kernel map so that we can access buffer directly + set kdp_pmap = 0 + + set $kgm_pkt = (kdp_readmem64_reply_t *)&manual_pkt.data + if ($kgm_pkt->error == 0) + set $kgm_map_user_window = $kgm_pkt->data + else + set $kgm_map_user_window = 0 + end + + end +end + +define _unmap_user_data_from_task + set kdp_pmap = 0 +end + +# uses $kgm_taskp. Maps 32 bytes at a time and prints it +define _print_path_for_image + set $kgm_print_path_address = (unsigned long long)$arg0 + set $kgm_path_str_notdone = 1 + + while $kgm_path_str_notdone + _map_user_data_from_task $kgm_taskp $kgm_print_path_address 32 + + set $kgm_print_path_ptr = (char *)$kgm_map_user_window + set $kgm_path_i = 0 + while ($kgm_path_i < 32 && $kgm_print_path_ptr[$kgm_path_i] != '\0') + set $kgm_path_i = $kgm_path_i + 1 + end + printf "%.32s", $kgm_print_path_ptr + + _unmap_user_data_from_task $kgm_taskp + + # if we terminated on NUL, break out + if $kgm_path_i < 32 + set $kgm_path_str_notdone = 0 + else + set $kgm_print_path_address = $kgm_print_path_address + 32 + end + end +end + +# uses $kgm_taskp and $kgm_task_64 +define _print_image_info + set $kgm_mh_image_address = (unsigned long long)$arg0 + set $kgm_mh_path_address = (unsigned long long)$arg1 + + # 32 bytes enough for mach_header/mach_header_64 + _map_user_data_from_task $kgm_taskp $kgm_mh_image_address 32 + + set $kgm_mh_ptr = (unsigned int*)$kgm_map_user_window + set $kgm_mh_magic = $kgm_mh_ptr[0] + set $kgm_mh_cputype = $kgm_mh_ptr[1] + set $kgm_mh_cpusubtype = $kgm_mh_ptr[2] + set $kgm_mh_filetype = $kgm_mh_ptr[3] + set $kgm_mh_ncmds = $kgm_mh_ptr[4] + set $kgm_mh_sizeofcmds = $kgm_mh_ptr[5] + set $kgm_mh_flags = $kgm_mh_ptr[6] + + _unmap_user_data_from_task $kgm_taskp + + if $kgm_mh_magic == 0xfeedfacf + set $kgm_mh_64 = 1 + set $kgm_lc_address = $kgm_mh_image_address + 32 + else + set $kgm_mh_64 = 0 + set $kgm_lc_address = $kgm_mh_image_address + 28 + end + + set $kgm_lc_idx = 0 + set $kgm_uuid_data = 0 + while $kgm_lc_idx < $kgm_mh_ncmds + + # 24 bytes is size of uuid_command + _map_user_data_from_task $kgm_taskp $kgm_lc_address 24 + + set $kgm_lc_ptr = (unsigned int *)$kgm_map_user_window + set $kgm_lc_cmd = $kgm_lc_ptr[0] + set $kgm_lc_cmd_size = $kgm_lc_ptr[1] + set $kgm_lc_data = (unsigned char *)$kgm_lc_ptr + 8 + + if $kgm_lc_cmd == 0x1b + set $kgm_uuid_data = $kgm_lc_data + if $kgm_mh_64 + printf "0x%016llx ", $kgm_mh_image_address + else + printf "0x%08x ", $kgm_mh_image_address + end + + set $kgm_printed_type = 0 + if $kgm_mh_filetype == 0x2 + printf "MH_EXECUTE " + set $kgm_printed_type = 1 + end + if $kgm_mh_filetype == 0x6 + printf "MH_DYLIB " + set $kgm_printed_type = 1 + end + if $kgm_mh_filetype == 0x7 + printf "MH_DYLINKER " + set $kgm_printed_type = 1 + end + if $kgm_mh_filetype == 0x8 + printf "MH_BUNDLE " + set $kgm_printed_type = 1 + end + if !$kgm_printed_type + printf "UNKNOWN " + end + printf "%02.2X%02.2X%02.2X%02.2X-", $kgm_uuid_data[0], $kgm_uuid_data[1], $kgm_uuid_data[2], $kgm_uuid_data[3] + printf "%02.2X%02.2X-", $kgm_uuid_data[4], $kgm_uuid_data[5] + printf "%02.2X%02.2X-", $kgm_uuid_data[6], $kgm_uuid_data[7] + printf "%02.2X%02.2X-", $kgm_uuid_data[8], $kgm_uuid_data[9] + printf "%02.2X%02.2X%02.2X%02.2X%02.2X%02.2X", $kgm_uuid_data[10], $kgm_uuid_data[11], $kgm_uuid_data[12], $kgm_uuid_data[13], $kgm_uuid_data[14], $kgm_uuid_data[15] + + _unmap_user_data_from_task $kgm_taskp + + printf " " + _print_path_for_image $kgm_mh_path_address + printf "\n" + + loop_break + else + _unmap_user_data_from_task $kgm_taskp + end + + set $kgm_lc_address = $kgm_lc_address + $kgm_lc_cmd_size + set $kgm_lc_idx = $kgm_lc_idx + 1 + end + + if (!$kgm_uuid_data) + # didn't find LC_UUID, for a dylib, just print out basic info + if $kgm_mh_64 + printf "0x%016llx ", $kgm_mh_image_address + else + printf "0x%08x ", $kgm_mh_image_address + end + set $kgm_printed_type = 0 + if $kgm_mh_filetype == 0x2 + printf "MH_EXECUTE " + set $kgm_printed_type = 1 + end + if $kgm_mh_filetype == 0x6 + printf "MH_DYLIB " + set $kgm_printed_type = 1 + end + if $kgm_mh_filetype == 0x7 + printf "MH_DYLINKER " + set $kgm_printed_type = 1 + end + if $kgm_mh_filetype == 0x8 + printf "MH_BUNDLE " + set $kgm_printed_type = 1 + end + if !$kgm_printed_type + printf "UNKNOWN " + end + printf " ", + + printf " " + _print_path_for_image $kgm_mh_path_address + printf "\n" + + end + +end + +define _print_images_for_dyld_image_info + set $kgm_taskp = $arg0 + set $kgm_task_64 = $arg1 + set $kgm_dyld_all_image_infos_address = (unsigned long long)$arg2 + + _map_user_data_from_task $kgm_taskp $kgm_dyld_all_image_infos_address 16 + + set $kgm_dyld_all_image_infos = (unsigned int *)$kgm_map_user_window + if ($kgm_dyld_all_image_infos[0] != 6) + printf "Invalid version number %d\n", $kgm_dyld_all_image_infos[0] + end + set $kgm_image_info_count = $kgm_dyld_all_image_infos[1] + + if $kgm_task_64 + set $kgm_image_info_size = 24 + set $kgm_image_info_array_address = ((unsigned long long *)$kgm_dyld_all_image_infos)[1] + else + set $kgm_image_info_size = 12 + set $kgm_image_info_array_address = ((unsigned int *)$kgm_dyld_all_image_infos)[2] + end + + _unmap_user_data_from_task $kgm_taskp + + set $kgm_image_info_i = 0 + while $kgm_image_info_i < $kgm_image_info_count + + set $kgm_image_info_address = $kgm_image_info_array_address + $kgm_image_info_size*$kgm_image_info_i + + _map_user_data_from_task $kgm_taskp $kgm_image_info_address $kgm_image_info_size + if $kgm_task_64 + set $kgm_image_info_addr = ((unsigned long long *)$kgm_map_user_window)[0] + set $kgm_image_info_path = ((unsigned long long *)$kgm_map_user_window)[1] + else + set $kgm_image_info_addr = ((unsigned int *)$kgm_map_user_window)[0] + set $kgm_image_info_path = ((unsigned int *)$kgm_map_user_window)[1] + end + _unmap_user_data_from_task $kgm_taskp + + # printf "[%d] = image address %llx path address %llx\n", $kgm_image_info_i, $kgm_image_info_addr, $kgm_image_info_path + _print_image_info $kgm_image_info_addr $kgm_image_info_path + + set $kgm_image_info_i = $kgm_image_info_i + 1 + end +end + +define showuserlibraries + set $kgm_taskp = (task_t)$arg0 + set $kgm_dyld_image_info = $kgm_taskp->all_image_info_addr + + set $kgm_map = $kgm_taskp->map + set $kgm_task_64 = ( $kgm_taskp->taskFeatures[0] & 0x80000000) + + if ($kgm_dyld_image_info != 0) + printf "address " + if $kgm_task_64 + printf " " + end + printf " type " + printf " uuid " + printf "path\n" + + _print_images_for_dyld_image_info $kgm_taskp $kgm_task_64 $kgm_dyld_image_info + else + printf "No dyld shared library information available for task\n" + end +end +document showuserlibraries +Syntax: (gdb) showuserlibraries +| For a given user task, inspect the dyld shared library state and print +| information about all Mach-O images. +end + +define showkerneldebugheader + printf "kd_buf " + showptrhdrpad + printf "CPU Thread " + showptrhdrpad + printf "Timestamp S/E Class Sub Code Code Specific Info\n" +end + +define _printevflags + if $arg0 & 1 + printf "EV_RE " + end + if $arg0 & 2 + printf "EV_WR " + end + if $arg0 & 4 + printf "EV_EX " + end + if $arg0 & 8 + printf "EV_RM " + end + + if $arg0 & 0x00100 + printf "EV_RBYTES " + end + if $arg0 & 0x00200 + printf "EV_WBYTES " + end + if $arg0 & 0x00400 + printf "EV_RCLOSED " + end + if $arg0 & 0x00800 + printf "EV_RCONN " + end + if $arg0 & 0x01000 + printf "EV_WCLOSED " + end + if $arg0 & 0x02000 + printf "EV_WCONN " + end + if $arg0 & 0x04000 + printf "EV_OOB " + end + if $arg0 & 0x08000 + printf "EV_FIN " + end + if $arg0 & 0x10000 + printf "EV_RESET " + end + if $arg0 & 0x20000 + printf "EV_TIMEOUT " + end +end + +define showkerneldebugbufferentry + set $kgm_kdebug_entry = (kd_buf *) $arg0 + + set $kgm_debugid = $kgm_kdebug_entry->debugid + set $kgm_kdebug_arg1 = $kgm_kdebug_entry->arg1 + set $kgm_kdebug_arg2 = $kgm_kdebug_entry->arg2 + set $kgm_kdebug_arg3 = $kgm_kdebug_entry->arg3 + set $kgm_kdebug_arg4 = $kgm_kdebug_entry->arg4 + + if $kgm_lp64 + set $kgm_kdebug_cpu = $kgm_kdebug_entry->cpuid + set $kgm_ts_hi = ($kgm_kdebug_entry->timestamp >> 32) & 0xFFFFFFFF + set $kgm_ts_lo = $kgm_kdebug_entry->timestamp & 0xFFFFFFFF + else + set $kgm_kdebug_cpu = ($kgm_kdebug_entry->timestamp >> 56) + set $kgm_ts_hi = ($kgm_kdebug_entry->timestamp >> 32) & 0x00FFFFFF + set $kgm_ts_lo = $kgm_kdebug_entry->timestamp & 0xFFFFFFFF + end + + set $kgm_kdebug_class = ($kgm_debugid >> 24) & 0x000FF + set $kgm_kdebug_subclass = ($kgm_debugid >> 16) & 0x000FF + set $kgm_kdebug_code = ($kgm_debugid >> 2) & 0x03FFF + set $kgm_kdebug_qual = ($kgm_debugid ) & 0x00003 + + if $kgm_kdebug_qual == 0 + set $kgm_kdebug_qual = '-' + else + if $kgm_kdebug_qual == 1 + set $kgm_kdebug_qual = 'S' + else + if $kgm_kdebug_qual == 2 + set $kgm_kdebug_qual = 'E' + else + if $kgm_kdebug_qual == 3 + set $kgm_kdebug_qual = '?' + end + end + end + end + + # preamble and qual + + showptr $kgm_kdebug_entry + printf " %d ", $kgm_kdebug_cpu + showptr $kgm_kdebug_entry->arg5 + printf " 0x%08X%08X %c ", $kgm_ts_hi, $kgm_ts_lo, $kgm_kdebug_qual + + # class + + if $kgm_kdebug_class == 1 + printf "MACH" + else + if $kgm_kdebug_class == 2 + printf "NET " + else + if $kgm_kdebug_class == 3 + printf "FS " + else + if $kgm_kdebug_class == 4 + printf "BSD " + else + if $kgm_kdebug_class == 5 + printf "IOK " + else + if $kgm_kdebug_class == 6 + printf "DRVR" + else + if $kgm_kdebug_class == 7 + printf "TRAC" + else + if $kgm_kdebug_class == 8 + printf "DLIL" + else + if $kgm_kdebug_class == 8 + printf "SEC " + else + if $kgm_kdebug_class == 20 + printf "MISC" + else + if $kgm_kdebug_class == 31 + printf "DYLD" + else + if $kgm_kdebug_class == 32 + printf "QT " + else + if $kgm_kdebug_class == 33 + printf "APPS" + else + if $kgm_kdebug_class == 255 + printf "MIG " + else + printf "0x%02X", $kgm_kdebug_class + end + end + end + end + end + end + end + end + end + end + end + end + end + end + + # subclass and code + + printf " 0x%02X %5d ", $kgm_kdebug_subclass, $kgm_kdebug_code + + # space for debugid-specific processing + + # EVPROC from bsd/kern/sys_generic.c + + # MISCDBG_CODE(DBG_EVENT,DBG_WAIT) + if $kgm_debugid == 0x14100048 + printf "waitevent " + if $kgm_kdebug_arg1 == 1 + printf "before sleep" + else + if $kgm_kdebug_arg1 == 2 + printf "after sleep" + else + printf "????????????" + end + end + printf " chan=0x%08X ", $kgm_kdebug_arg2 + else + # MISCDBG_CODE(DBG_EVENT,DBG_WAIT|DBG_FUNC_START) + if $kgm_debugid == 0x14100049 + printf "waitevent " + else + # MISCDBG_CODE(DBG_EVENT,DBG_WAIT|DBG_FUNC_END) + if $kgm_debugid == 0x1410004a + printf "waitevent error=%d ", $kgm_kdebug_arg1 + printf "eqp=0x%08X ", $kgm_kdebug_arg4 + _printevflags $kgm_kdebug_arg3 + printf "er_handle=%d ", $kgm_kdebug_arg2 + else + # MISCDBG_CODE(DBG_EVENT,DBG_DEQUEUE|DBG_FUNC_START) + if $kgm_debugid == 0x14100059 + printf "evprocdeque proc=0x%08X ", $kgm_kdebug_arg1 + if $kgm_kdebug_arg2 == 0 + printf "remove first " + else + printf "remove 0x%08X ", $kgm_kdebug_arg2 + end + else + # MISCDBG_CODE(DBG_EVENT,DBG_DEQUEUE|DBG_FUNC_END) + if $kgm_debugid == 0x1410005a + printf "evprocdeque " + if $kgm_kdebug_arg1 == 0 + printf "result=NULL " + else + printf "result=0x%08X ", $kgm_kdebug_arg1 + end + else + # MISCDBG_CODE(DBG_EVENT,DBG_POST|DBG_FUNC_START) + if $kgm_debugid == 0x14100041 + printf "postevent " + _printevflags $kgm_kdebug_arg1 + else + # MISCDBG_CODE(DBG_EVENT,DBG_POST) + if $kgm_debugid == 0x14100040 + printf "postevent " + printf "evq=0x%08X ", $kgm_kdebug_arg1 + printf "er_eventbits=" + _printevflags $kgm_kdebug_arg2 + printf "mask=" + _printevflags $kgm_kdebug_arg3 + else + # MISCDBG_CODE(DBG_EVENT,DBG_POST|DBG_FUNC_END) + if $kgm_debugid == 0x14100042 + printf "postevent " + else + # MISCDBG_CODE(DBG_EVENT,DBG_ENQUEUE|DBG_FUNC_START) + if $kgm_debugid == 0x14100055 + printf "evprocenque eqp=0x%08d ", $kgm_kdebug_arg1 + if $kgm_kdebug_arg2 & 1 + printf "EV_QUEUED " + end + _printevflags $kgm_kdebug_arg3 + else + + # MISCDBG_CODE(DBG_EVENT,DBG_EWAKEUP) + if $kgm_debugid == 0x14100050 + printf "evprocenque before wakeup eqp=0x%08d ", $kgm_kdebug_arg4 + else + # MISCDBG_CODE(DBG_EVENT,DBG_ENQUEUE|DBG_FUNC_END) + if $kgm_debugid == 0x14100056 + printf "evprocenque " + else + # MISCDBG_CODE(DBG_EVENT,DBG_MOD|DBG_FUNC_START) + if $kgm_debugid == 0x1410004d + printf "modwatch " + else + # MISCDBG_CODE(DBG_EVENT,DBG_MOD) + if $kgm_debugid == 0x1410004c + printf "modwatch er_handle=%d ", $kgm_kdebug_arg1 + _printevflags $kgm_kdebug_arg2 + printf "evq=0x%08X ", $kgm_kdebug_arg3 + else + # MISCDBG_CODE(DBG_EVENT,DBG_MOD|DBG_FUNC_END) + if $kgm_debugid == 0x1410004e + printf "modwatch er_handle=%d ", $kgm_kdebug_arg1 + printf "ee_eventmask=" + _printevflags $kgm_kdebug_arg2 + printf "sp=0x%08X ", $kgm_kdebug_arg3 + printf "flag=" + _printevflags $kgm_kdebug_arg4 + else + printf "arg1=0x%08X ", $kgm_kdebug_arg1 + printf "arg2=0x%08X ", $kgm_kdebug_arg2 + printf "arg3=0x%08X ", $kgm_kdebug_arg3 + printf "arg4=0x%08X ", $kgm_kdebug_arg4 + end + end + end + end + end + end + end + end + end + end + end + end + end + end + + # finish up + + printf "\n" +end + +define showkerneldebugbuffercpu + set $kgm_cpu_number = (int) $arg0 + set $kgm_entry_count = (int) $arg1 + set $kgm_debugentriesfound = 0 + + #if kdebug_flags & KDBG_BFINIT + if (kdebug_flags & 0x80000000) + showkerneldebugheader + + if $kgm_entry_count == 0 + printf " is 0, dumping 50 entries\n" + set $kgm_entry_count = 50 + end + + if $kgm_cpu_number >= kd_cpus + printf "cpu number too big\n" + else + set $kgm_kdbp = &kdbip[$kgm_cpu_number] + set $kgm_kdsp = $kgm_kdbp->kd_list_head + while (($kgm_kdsp != 0) && ($kgm_entry_count > 0)) + if $kgm_kdsp->kds_readlast != $kgm_kdsp->kds_bufptr + set $kgm_kds_bufptr = $kgm_kdsp->kds_bufptr + while (($kgm_kds_bufptr > $kgm_kdsp->kds_readlast) && ($kgm_entry_count > 0)) + set $kgm_kds_bufptr = $kgm_kds_bufptr - 1 + set $kgm_entry_count = $kgm_entry_count - 1 + showkerneldebugbufferentry $kgm_kds_bufptr + end + end + set $kgm_kdsp = $kgm_kdsp->kds_next + end + end + else + printf "Trace buffer not enabled\n" + end +end + +document showkerneldebugbuffercpu +Syntax: showkerneldebugbuffercpu +| Prints the last N entries in the kernel debug buffer for CPU x. +end + +define showkerneldebugbuffer + + #if kdebug_flags & KDBG_BFINIT + if (kdebug_flags & 0x80000000) + + set $kgm_entrycount = (int) $arg0 + + if $kgm_entrycount == 0 + printf " is 0, dumping 50 entries per cpu\n" + set $kgm_entrycount = 50 + end + + set $kgm_cpu = (int) 0 + + while $kgm_cpu < kd_cpus + showkerneldebugbuffercpu $kgm_cpu $kgm_entrycount + set $kgm_cpu = $kgm_cpu + 1 + end + else + printf "Trace buffer not enabled\n" + end +end + +document showkerneldebugbuffer +Syntax: showkerneldebugbuffer +| Prints the last N entries in the kernel debug buffer per cpu. i.e. showkerneldebugbuffer 50 will +| display the last 50 entries in each CPU's debug buffer. +end + +define showallvmstats + printf " pid command #ents wired vsize rsize max rsize\n" + printf " (pages) (pages) (pages) (pages)\n" + set $kgm_head_taskp = &tasks + set $kgm_taskp = (struct task *)($kgm_head_taskp->next) + while $kgm_taskp != $kgm_head_taskp + set $kgm_procp = (struct proc *)($kgm_taskp->bsd_info) + set $kgm_mapp = (struct _vm_map *)($kgm_taskp->map) + printf "%8d %17s %8d %15d %15d %15d %15d\n", $kgm_procp->p_pid, $kgm_procp->p_comm, $kgm_mapp->hdr.nentries, $kgm_mapp->pmap->stats.wired_count, $kgm_mapp->size >> 12, $kgm_mapp->pmap->stats.resident_count, $kgm_mapp->pmap->stats.resident_max + set $kgm_taskp = (struct task *)($kgm_taskp->tasks.next) + end +end + +document showallvmstats +Syntax: showallvmstats +| prints a summary of vm statistics in a table format +end + +define show_user_registers + if (($kgm_mtype & $kgm_mtype_x86_mask) == $kgm_mtype_x86_any) + set $kgm_thread = (thread_t)$arg0 + if ((*(thread_t)$kgm_thread)->machine.xxx_pcb.iss.flavor == 15) + p/x ($kgm_thread)->machine.xxx_pcb.iss->uss.ss_64 + else + p/x ($kgm_thread)->machine.xxx_pcb.iss->uss.ss_32 + end + end + if ($kgm_mtype == $kgm_mtype_ppc) + set $kgm_thread = (thread_t)$arg0 + p/x *($kgm_thread)->machine.pcb + end +end + +document show_user_registers +Syntax: show_user_registers +| Display user registers associated with a kernel thread +| properly displays the 32 bit or 64 bit registers for intel architecture +end + +define _cmp + set $cmp0 = $arg0 + set $cmp1 = $arg1 + + # check for end of string. cmp0 can be longer than cmp1. it + # can't be shorter. + if $cmp1 == '\0' + set $kgm_strcmp_result = 0 + set $kgm_strcmp_done = 1 + end + + if !$kgm_strcmp_done && $cmp0 == '\0' + set $kgm_strcmp_result = -1 + set $kgm_strcmp_done = 1 + end + + # do they match? + if !$kgm_strcmp_done + set $kgm_strcmp_result = (uint8_t) $cmp0 - (uint8_t) $cmp1 + if $kgm_strcmp_result != 0 + set $kgm_strcmp_done = 1 + end + end +end + +define _cmp_arg64 + set $cmp = $arg1 + set $masked = $cmp & 0xFF + _cmp $arg0[0] $masked + + if !$kgm_strcmp_done + set $cmp = $cmp >> 8 + set $masked = $cmp & 0xFF + _cmp $arg0[1] $masked + end + if !$kgm_strcmp_done + set $cmp = $cmp >> 8 + set $masked = $cmp & 0xFF + _cmp $arg0[2] $masked + end + if !$kgm_strcmp_done + set $cmp = $cmp >> 8 + set $masked = $cmp & 0xFF + _cmp $arg0[3] $masked + end + if !$kgm_strcmp_done + set $cmp = $cmp >> 8 + set $masked = $cmp & 0xFF + _cmp $arg0[4] $masked + end + if !$kgm_strcmp_done + set $cmp = $cmp >> 8 + set $masked = $cmp & 0xFF + _cmp $arg0[5] $masked + end + if !$kgm_strcmp_done + set $cmp = $cmp >> 8 + set $masked = $cmp & 0xFF + _cmp $arg0[6] $masked + end + if !$kgm_strcmp_done + set $cmp = $cmp >> 8 + set $masked = $cmp & 0xFF + _cmp $arg0[7] $masked + end +end + +define strcmp_arg_pack64 + set $kgm_strcmp_arg = ((((((((((((((uint64_t) $arg7 << 8) | $arg6) << 8) | $arg5) << 8) | $arg4) << 8) | $arg3) << 8) | $arg2) << 8) | $arg1) << 8) | $arg0 +end + +document strcmp_arg_pack64 +Syntax: strcmp_arg_pack64 +| Packs a string given as 8 character arguments into a 64-bit int stored in +| $kgm_strcmp_arg. Use 0 or '\0' for unused arguments. The encoded string +| is suitable for use by strcmp_nomalloc and setfindregistrystr. +| e.g., strcmp_arg_pack64 'H' 'e' 'l' 'l' 'o' 0 0 0 +| packs "Hello" into $kgm_strcmp_arg. +| +end + +define strcmp_nomalloc + set $str = $arg0 + set $count = $argc - 1 + + set $kgm_strcmp_result = 0 + set $kgm_strcmp_done = 0 + + if $count > 0 + _cmp_arg64 $str $arg1 + end + if !$kgm_strcmp_done && $count > 1 + set $str = $str + 8 + _cmp_arg64 $str $arg2 + end + if !$kgm_strcmp_done && $count > 2 + set $str = $str + 8 + _cmp_arg64 $str $arg3 + end + if !$kgm_strcmp_done && $count > 3 + set $str = $str + 8 + _cmp_arg64 $str $arg4 + end + if !$kgm_strcmp_done && $count > 4 + set $str = $str + 8 + _cmp_arg64 $str $arg5 + end + if !$kgm_strcmp_done && $count > 5 + set $str = $str + 8 + _cmp_arg64 $str $arg6 + end + if !$kgm_strcmp_done && $count > 6 + set $str = $str + 8 + _cmp_arg64 $str $arg7 + end + if !$kgm_strcmp_done && $count > 7 + set $str = $str + 8 + _cmp_arg64 $str $arg8 + end + if !$kgm_strcmp_done && $count > 8 + set $str = $str + 8 + _cmp_arg64 $str $arg9 + end +end + +document strcmp_nomalloc +Syntax: strcmp_nomalloc [b] [c] [d] [e] [f] [g] [h] [i] +| Given a pre-allocated , perform a string compare with the +| encoded string stored in arguments a - i. The result is stored in +| $kgm_strcmp_result. +| +| For example, the following will result in $kgm_strcmp_result == 0: +| strcmp_arg_pack64 'D' 'a' 'r' 'w' 'i' 'n' ' ' 'K' +| strcmp_nomalloc version $kgm_strcmp_arg +end + +# _pci_cfg_addr_value $addr $size +define _pci_cfg_addr_value + readphysint $arg0 $arg1 $kgm_lcpu_self + set $kgm_pci_cfg_value = $kgm_readphysint_result +end + + +set $kgm_pci_cfg_init = 0 +define _pci_cfg_init + # get this from the registry if it exists there + if $kgm_pci_cfg_init == 0 + strcmp_arg_pack64 'A' 'p' 'p' 'l' 'e' 'A' 'C' 'P' + set $AppleACP = $kgm_strcmp_arg + strcmp_arg_pack64 'I' 'P' 'l' 'a' 't' 'f' 'o' 'r' + set $IPlatfor = $kgm_strcmp_arg + strcmp_arg_pack64 'm' 'E' 'x' 'p' 'e' 'r' 't' 0 + set $mExpert = $kgm_strcmp_arg + setfindregistrystr $AppleACP $IPlatfor $mExpert + + set $olddepth = $kgm_reg_depth_max + set $kgm_reg_depth_max = 2 + _findregistryentry + set $kgm_reg_depth_max = $olddepth + + if $kgm_registry_entry + strcmp_arg_pack64 'a' 'c' 'p' 'i' '-' 'm' 'm' 'c' + set $acpi_mmc = $kgm_strcmp_arg + strcmp_arg_pack64 'f' 'g' '-' 's' 'e' 'g' '0' 0 + set $fg_seg0 = $kgm_strcmp_arg + setfindregistrystr $acpi_mmc $fg_seg0 + + _findregistryprop $kgm_registry_entry + if $kgm_registry_value + set $kgm_pci_cfg_base = ((OSNumber *) $kgm_registry_value)->value + set $kgm_pci_cfg_init = 1 + end + end + end + + # if the above fails, search for 0:0:0 in likely places. + if $kgm_pci_cfg_init == 0 + set $kgm_pci_cfg_base = 0xF0000000 + while $kgm_pci_cfg_init == 0 && $kgm_pci_cfg_base > 0xA0000000 + _pci_cfg_addr_value $kgm_pci_cfg_base 8 + if $kgm_pci_cfg_value > 0x0 && $kgm_pci_cfg_value < 0xFF + set $kgm_pci_cfg_init = 1 + else + set $kgm_pci_cfg_base = $kgm_pci_cfg_base - 0x10000000 + end + end + end +end + +# _pci_cfg_addr $bus $dev $fcn $off +define _pci_cfg_addr + set $bus = $arg0 + set $dev = $arg1 + set $fcn = $arg2 + set $off = $arg3 + + _pci_cfg_init + set $kgm_pci_cfg_addr = $kgm_pci_cfg_base | ($bus << 20) | ($dev << 15) | ($fcn << 12) | $off +end + +define _pci_cfg_value + _pci_cfg_addr $arg0 $arg1 $arg2 $arg3 + _pci_cfg_addr_value $kgm_pci_cfg_addr $arg4 +end + +define pci_cfg_read8 + _pci_cfg_value $arg0 $arg1 $arg2 $arg3 8 + printf "%08X: %02X\n", $kgm_pci_cfg_addr, $kgm_pci_cfg_value +end + +define pci_cfg_read16 + _pci_cfg_value $arg0 $arg1 $arg2 $arg3 16 + printf "%08X: %04X\n", $kgm_pci_cfg_addr, $kgm_pci_cfg_value +end + +define pci_cfg_read32 + _pci_cfg_value $arg0 $arg1 $arg2 $arg3 32 + printf "%08X: %08X\n", $kgm_pci_cfg_addr, $kgm_pci_cfg_value +end + +document pci_cfg_read8 +Syntax: (gdb) pci_cfg_read8 +| read 8 bits for the given of the pci device located at +| ::. +end + +document pci_cfg_read16 +Syntax: (gdb) pci_cfg_read +| read 16 bits for the given of the pci device located at +| ::. +end + +document pci_cfg_read32 +Syntax: (gdb) pci_cfg_read +| read 32 bits for the given of the pci device located at +| ::. +end + +define pci_cfg_write8 + _pci_cfg_addr $arg0 $arg1 $arg2 $arg3 + writephysint $kgm_pci_cfg_addr 8 $arg4 $kgm_lcpu_self +end + +define pci_cfg_write16 + _pci_cfg_addr $arg0 $arg1 $arg2 $arg3 + writephysint $kgm_pci_cfg_addr 16 $arg4 $kgm_lcpu_self +end + +define pci_cfg_write32 + _pci_cfg_addr $arg0 $arg1 $arg2 $arg3 + writephysint $kgm_pci_cfg_addr 32 $arg4 $kgm_lcpu_self +end + +document pci_cfg_write8 +Syntax: (gdb) pci_cfg_write8 +| write an 8-bit into the given of the pci device located at +| ::. +end + +document pci_cfg_write16 +Syntax: (gdb) pci_cfg_write16 +| write a 16-bit into the given of the pci device located at +| ::. +end + +document pci_cfg_write32 +Syntax: (gdb) pci_cfg_write32 +| write a 32-bit into the given of the pci device located at +| ::. +end + + +define pci_cfg_dump + set $bus = $arg0 + set $dev = $arg1 + set $fcn = $arg2 + set $off = 0 + + # check for a valid pci device + _pci_cfg_value $bus $dev $fcn $off 8 + if $kgm_pci_cfg_value > 0x0 && $kgm_pci_cfg_value < 0xff + printf " address: 00 01 02 03 04 05 06 07 08 09 0A 0B 0C 0D 0E 0F\n" + printf "---------------------------------------------------------" + + while $off < 256 + _pci_cfg_value $bus $dev $fcn $off 32 + if ($off & 0xF) == 0 + printf "\n%08X: ", $kgm_pci_cfg_addr + end + printf "%02X %02X %02X %02X ", $kgm_pci_cfg_value & 0xFF, ($kgm_pci_cfg_value >> 8) & 0xFF, ($kgm_pci_cfg_value >> 16) & 0xFF, ($kgm_pci_cfg_value >> 24) & 0xFF + set $off = $off + 4 + end + printf "\n" + + # check for pcie extended capability config space + _pci_cfg_value $bus $dev $fcn $off 8 + if $kgm_pci_cfg_value < 0xff + while $off < 4096 + _pci_cfg_value $bus $dev $fcn $off 32 + if ($off & 0xF) == 0 + printf "\n%08X: ", $kgm_pci_cfg_addr + end + printf "%02X %02X %02X %02X ", $kgm_pci_cfg_value & 0xFF, ($kgm_pci_cfg_value >> 8) & 0xFF, ($kgm_pci_cfg_value >> 16) & 0xFF, ($kgm_pci_cfg_value >> 24) & 0xFF + set $off = $off + 4 + end + printf "\n" + end + end +end + +document pci_cfg_dump +Syntax: (gdb) pci_cfg_dump +| dump config space for the pci device located at :: +| if you specify an invalid/inaccessible pci device, nothing will be +| printed out. +end + +set $kgm_pci_cfg_bus_start = 0 +set $kgm_pci_cfg_bus_max = 8 +set $kgm_pci_cfg_device_max = 32 +set $kgm_pci_cfg_function_max = 8 +define _pci_cfg_scan + set $dump = $arg0 + + set $bus = $kgm_pci_cfg_bus_start + while $bus < $kgm_pci_cfg_bus_max + # check for bus:0:0 to see if we should + # probe this bus further + _pci_cfg_value $bus 0x0 0x0 0x0 32 + if $kgm_pci_cfg_value > 0 && $kgm_pci_cfg_value < 0xFFFFFFFF + + set $dev = 0 + while $dev < $kgm_pci_cfg_device_max + + set $fcn = 0 + while $fcn < $kgm_pci_cfg_function_max + _pci_cfg_value $bus $dev $fcn 0x0 32 + if $kgm_pci_cfg_value > 0 && $kgm_pci_cfg_value < 0xFFFFFFFF + if $dump == 0 + printf "%03X:%03X:%03X: %02X%02X %02X%02X", $bus, $dev, $fcn, ($kgm_pci_cfg_value >> 8) & 0xFF, $kgm_pci_cfg_value & 0xFF, ($kgm_pci_cfg_value >> 24) & 0xFF, ($kgm_pci_cfg_value >> 16) & 0xFF + _pci_cfg_value $bus $dev $fcn 0x8 32 + printf " %02X | %02X%02X%02X\n", $kgm_pci_cfg_value & 0xFF, ($kgm_pci_cfg_value >> 24) & 0xFF, ($kgm_pci_cfg_value >> 16) & 0xFF, ($kgm_pci_cfg_value >> 8) & 0xFF + else + printf " device: %03X:%03X:%03X\n", $bus, $dev, $fcn + pci_cfg_dump $bus $dev $fcn + printf "\n" + end + end + set $fcn = $fcn + 1 + end + set $dev = $dev + 1 + end + end + set $bus = $bus + 1 + end +end + +define pci_cfg_dump_all + _pci_cfg_scan 1 +end + +document pci_cfg_dump_all +Syntax: (gdb) pci_cfg_dump_all +| dump config spaces for scanned pci devices. the number of busses to scan +| is stored in $kgm_pci_cfg_bus_max. the default for that is 8. you can also +| specify the starting bus with $kgm_pci_cfg_bus_start. +end + +define pci_cfg_scan + printf "bus:dev:fcn: vendor device rev | class\n" + printf "---------------------------------------\n" + _pci_cfg_scan 0 +end + +document pci_cfg_scan +Syntax: (gdb) pci_cfg_scan +| scan for pci devices. the number of busses to scan is stored in +| $kgm_pci_cfg_bus_max. the default for that is 8. you can also specify the +| starting bus with $kgm_pci_cfg_bus_start. +end + +define readioportint + set $kgm_readioportint_result = 0xBAD10AD + # set up the manual KDP packet + set manual_pkt.input = 0 + set manual_pkt.len = sizeof(kdp_readioport_req_t) + set $kgm_pkt = (kdp_readioport_req_t *)&manual_pkt.data + set $kgm_pkt->hdr.request = KDP_READIOPORT + set $kgm_pkt->hdr.len = sizeof(kdp_readioport_req_t) + set $kgm_pkt->hdr.is_reply = 0 + set $kgm_pkt->hdr.seq = 0 + set $kgm_pkt->hdr.key = 0 + set $kgm_pkt->address = (uint16_t)$arg0 + set $kgm_pkt->nbytes = $arg1 >> 3 + set $kgm_pkt->lcpu = (uint16_t)$arg2 + set manual_pkt.input = 1 + # dummy to make sure manual packet is executed + set $kgm_dummy = &_mh_execute_header + set $kgm_pkt = (kdp_readioport_reply_t *)&manual_pkt.data + if ($kgm_pkt->error == 0) + if $arg1 == 8 + set $kgm_readioportint_result = *((uint8_t *) $kgm_pkt->data) + end + if $arg1 == 16 + set $kgm_readioportint_result = *((uint16_t *) $kgm_pkt->data) + end + if $arg1 == 32 + set $kgm_readioportint_result = *((uint32_t *) $kgm_pkt->data) + end + end +end + +define readioport8 + set $lcpu = $kgm_lcpu_self + if $argc > 1 + set $lcpu = $arg1 + end + readioportint $arg0 8 $lcpu + output /a $arg0 + printf ":\t0x%02hhx\n", $kgm_readioportint_result +end + +define readioport16 + set $lcpu = $kgm_lcpu_self + if $argc > 1 + set $lcpu = $arg1 + end + readioportint $arg0 16 $lcpu + output /a $arg0 + printf ":\t0x%04hx\n", $kgm_readioportint_result +end + +define readioport32 + set $lcpu = $kgm_lcpu_self + if $argc > 1 + set $lcpu = $arg1 + end + readioportint $arg0 32 $lcpu + output /a $arg0 + printf ":\t0x%08x\n", $kgm_readioportint_result +end + +document readioport8 +| See readioport32. +end + +document readioport16 +| See readioport32. +end + +document readioport32 +Syntax: (gdb) readioport32 [lcpu (kernel's numbering convention)] +| Read value stored in the specified IO port. The CPU can be optionally +| specified as well. +end + +define writeioportint + # set up the manual KDP packet + set manual_pkt.input = 0 + set manual_pkt.len = sizeof(kdp_writeioport_req_t) + set $kgm_pkt = (kdp_writeioport_req_t *)&manual_pkt.data + set $kgm_pkt->hdr.request = KDP_WRITEIOPORT + set $kgm_pkt->hdr.len = sizeof(kdp_writeioport_req_t) + set $kgm_pkt->hdr.is_reply = 0 + set $kgm_pkt->hdr.seq = 0 + set $kgm_pkt->hdr.key = 0 + set $kgm_pkt->address = (uint16_t)$arg0 + set $kgm_pkt->nbytes = $arg1 >> 3 + set $kgm_pkt->lcpu = (uint16_t)$arg3 + if $arg1 == 8 + set *(uint8_t *)$kgm_pkt->data = (uint8_t)$arg2 + end + if $arg1 == 16 + set *(uint16_t *)$kgm_pkt->data = (uint16_t)$arg2 + end + if $arg1 == 32 + set *(uint32_t *)$kgm_pkt->data = (uint32_t)$arg2 + end + set manual_pkt.input = 1 + # dummy to make sure manual packet is executed + set $kgm_dummy = &_mh_execute_header + set $kgm_pkt = (kdp_writeioport_reply_t *)&manual_pkt.data + set $kgm_writeioportint_result = $kgm_pkt->error +end + +define writeioport8 + set $lcpu = $kgm_lcpu_self + if $argc > 2 + set $lcpu = $arg2 + end + writeioportint $arg0 8 $arg1 $lcpu +end + +define writeioport16 + set $lcpu = $kgm_lcpu_self + if $argc > 2 + set $lcpu = $arg2 + end + writeioportint $arg0 16 $arg1 $lcpu +end + +define writeioport32 + set $lcpu = $kgm_lcpu_self + if $argc > 2 + set $lcpu = $arg2 + end + writeioportint $arg0 32 $arg1 $lcpu +end + +document writeioport8 +| See writeioport32. +end + +document writeioport16 +| See writeioport32. +end + +document writeioport32 +Syntax: (gdb) writeioport32 [lcpu (kernel's numbering convention)] +| Write the value to the specified IO port. The size of the value is +| determined by the name of the command. The CPU used can be optionally +| specified. +end + +define readmsr64int + set $kgm_readmsr64int_result = 0xBAD10AD + # set up the manual KDP packet + set manual_pkt.input = 0 + set manual_pkt.len = sizeof(kdp_readmsr64_req_t) + set $kgm_pkt = (kdp_readmsr64_req_t *)&manual_pkt.data + set $kgm_pkt->hdr.request = KDP_READMSR64 + set $kgm_pkt->hdr.len = sizeof(kdp_readmsr64_req_t) + set $kgm_pkt->hdr.is_reply = 0 + set $kgm_pkt->hdr.seq = 0 + set $kgm_pkt->hdr.key = 0 + set $kgm_pkt->address = (uint32_t)$arg0 + set $kgm_pkt->lcpu = (uint16_t)$arg1 + set manual_pkt.input = 1 + # dummy to make sure manual packet is executed + set $kgm_dummy = &_mh_execute_header + set $kgm_pkt = (kdp_readmsr64_reply_t *)&manual_pkt.data + if ($kgm_pkt->error == 0) + set $kgm_readmsr64int_result = *((uint64_t *) $kgm_pkt->data) + end +end + +define readmsr64 + set $lcpu = $kgm_lcpu_self + if $argc > 1 + set $lcpu = $arg1 + end + readmsr64int $arg0 $lcpu + output /a $arg0 + printf ":\t0x%016llx\n", $kgm_readmsr64int_result +end + +define writemsr64int + # set up the manual KDP packet + set manual_pkt.input = 0 + set manual_pkt.len = sizeof(kdp_writemsr64_req_t) + set $kgm_pkt = (kdp_writemsr64_req_t *)&manual_pkt.data + set $kgm_pkt->hdr.request = KDP_WRITEMSR64 + set $kgm_pkt->hdr.len = sizeof(kdp_writemsr64_req_t) + set $kgm_pkt->hdr.is_reply = 0 + set $kgm_pkt->hdr.seq = 0 + set $kgm_pkt->hdr.key = 0 + set $kgm_pkt->address = (uint32_t)$arg0 + set $kgm_pkt->lcpu = (uint16_t)$arg2 + set *(uint64_t *)$kgm_pkt->data = (uint64_t)$arg1 + set manual_pkt.input = 1 + # dummy to make sure manual packet is executed + set $kgm_dummy = &_mh_execute_header + set $kgm_pkt = (kdp_writemsr64_reply_t *)&manual_pkt.data + set $kgm_writemsr64int_result = $kgm_pkt->error +end + +define writemsr64 + set $lcpu = $kgm_lcpu_self + if $argc > 2 + set $lcpu = $arg2 + end + writemsr64int $arg0 $arg1 $lcpu +end + +document writemsr64 +Syntax: (gdb) writemsr64 [lcpu (kernel's numbering convention)] +| Write to the specified MSR. The CPU can be optionally specified. end -document showallpmworkqueues -Syntax: (gdb) showallpmworkqueues -| Display info about all IOPMWorkQueue objects +document readmsr64 +Syntax: (gdb) readmsr64 [lcpu (kernel's numbering convention)] +| Read the specified MSR. The CPU can be optionally specified. end -define showioservicepm - set $kgm_iopmpriv = (IOServicePM *)$arg0 - printf "{ this object = %08x", $kgm_iopmpriv->Owner - if ( $kgm_iopmpriv->WeAreRoot ) - printf " (root)" - end - printf ", " - - printf "MachineState = %d (", $kgm_iopmpriv->MachineState - if ( $kgm_iopmpriv->MachineState == 1 ) - printf "kIOPM_OurChangeTellClientsPowerDown" - else - if ( $kgm_iopmpriv->MachineState == 2 ) - printf "kIOPM_OurChangeTellPriorityClientsPowerDown" - else - if ( $kgm_iopmpriv->MachineState == 3 ) - printf "kIOPM_OurChangeNotifyInterestedDriversWillChange" - else - if ( $kgm_iopmpriv->MachineState == 4 ) - printf "kIOPM_OurChangeSetPowerState" - else - if ( $kgm_iopmpriv->MachineState == 5 ) - printf "kIOPM_OurChangeWaitForPowerSettle" - else - if ( $kgm_iopmpriv->MachineState == 6 ) - printf "kIOPM_OurChangeNotifyInterestedDriversDidChange" - else - if ( $kgm_iopmpriv->MachineState == 7 ) - printf "kIOPM_OurChangeFinish" - else - if ( $kgm_iopmpriv->MachineState == 8 ) - printf "kIOPM_ParentDownTellPriorityClientsPowerDown" - else - if ( $kgm_iopmpriv->MachineState == 9 ) - printf "kIOPM_ParentDownNotifyInterestedDriversWillChange" - else - if ( $kgm_iopmpriv->MachineState == 10 ) - printf "Unused_MachineState_10" - else - if ( $kgm_iopmpriv->MachineState == 11 ) - printf "kIOPM_ParentDownNotifyDidChangeAndAcknowledgeChange" - else - if ( $kgm_iopmpriv->MachineState == 12 ) - printf "kIOPM_ParentDownSetPowerState" - else - if ( $kgm_iopmpriv->MachineState == 13 ) - printf "kIOPM_ParentDownWaitForPowerSettle" - else - if ( $kgm_iopmpriv->MachineState == 14 ) - printf "kIOPM_ParentDownAcknowledgeChange" - else - if ( $kgm_iopmpriv->MachineState == 15) - printf "kIOPM_ParentUpSetPowerState" - else - if ( $kgm_iopmpriv->MachineState == 16) - printf "Unused_MachineState_16" - else - if ( $kgm_iopmpriv->MachineState == 17) - printf "kIOPM_ParentUpWaitForSettleTime" - else - if ( $kgm_iopmpriv->MachineState == 18) - printf "kIOPM_ParentUpNotifyInterestedDriversDidChange" - else - if ( $kgm_iopmpriv->MachineState == 19) - printf "kIOPM_ParentUpAcknowledgePowerChange" - else - if ( $kgm_iopmpriv->MachineState == 20) - printf "kIOPM_Finished" - else - if ( $kgm_iopmpriv->MachineState == 21) - printf "kIOPM_DriverThreadCallDone" - else - if ( $kgm_iopmpriv->MachineState == 22) - printf "kIOPM_NotifyChildrenDone" - end - end - end - end - end - end - end - end - end - end - end - end - end - end - end - end - end - end - end - end - end - end - printf "), " - - if ( $kgm_iopmpriv->MachineState != 20 ) - printf "DriverTimer = %d, ",(unsigned int)$kgm_iopmpriv->DriverTimer - printf "SettleTime = %d, ",(unsigned int)$kgm_iopmpriv->SettleTimeUS - printf "HeadNoteFlags = %08x, ",(unsigned int)$kgm_iopmpriv->HeadNoteFlags - printf "HeadNoteState = %d, ",(unsigned int)$kgm_iopmpriv->HeadNoteState - printf "HeadNoteOutputFlags = %08x, ",(unsigned int)$kgm_iopmpriv->HeadNoteOutputFlags - printf "HeadNoteDomainState = %08x, ",(unsigned int)$kgm_iopmpriv->HeadNoteDomainState - printf "HeadNoteCapabilityFlags = %08x, ",(unsigned int)$kgm_iopmpriv->HeadNoteCapabilityFlags - printf "HeadNotePendingAcks = %x, ",(unsigned int)$kgm_iopmpriv->HeadNotePendingAcks - end +# default if we can't find a registry entry +set $kgm_ioapic_addr = 0xFEC00000 +set $kgm_ioapic_init = 0 - if ( $kgm_iopmpriv->DeviceOverrides != 0 ) - printf"DeviceOverrides, " - end - - printf "DriverDesire = %d, ",(unsigned int)$kgm_iopmpriv->DriverDesire - printf "DeviceDesire = %d, ",(unsigned int)$kgm_iopmpriv->DeviceDesire - printf "DesiredPowerState = %d, ",(unsigned int)$kgm_iopmpriv->DesiredPowerState - printf "PreviousRequest = %d }",(unsigned int)$kgm_iopmpriv->PreviousRequest +set $_ioapic_index_off = 0x00 +set $_ioapic_data_off = 0x10 +set $_ioapic_eoi_off = 0x40 + +set $_ioapic_index_id = 0x00 +set $_ioapic_index_ver = 0x01 +set $_ioapic_index_redir_base = 0x10 + +set $_apic_vector_mask = 0xFF +set $_apic_masked = 0x10000 +set $_apic_trigger_level = 0x08000 +set $_apic_polarity_high = 0x02000 +set $_apic_pending = 0x01000 + +define _ioapic_init + if $kgm_ioapic_init == 0 + strcmp_arg_pack64 'i' 'o' '-' 'a' 'p' 'i' 'c' 0 + setfindregistrystr $kgm_strcmp_arg + + set $olddepth = $kgm_reg_depth_max + set $kgm_reg_depth_max = 3 + _findregistryentry + set $kgm_reg_depth_max = $olddepth + + if $kgm_registry_entry + strcmp_arg_pack64 'P' 'h' 'y' 's' 'i' 'c' 'a' 'l' + set $Physical = $kgm_strcmp_arg + strcmp_arg_pack64 ' ' 'A' 'd' 'd' 'r' 'e' 's' 's' + set $_Address = $kgm_strcmp_arg + setfindregistrystr $Physical $_Address + + _findregistryprop $kgm_registry_entry + if $kgm_registry_value + set $kgm_ioapic_addr = ((OSNumber *) $kgm_registry_value)->value + end + end + set $kgm_ioapic_index_addr = $kgm_ioapic_addr + $_ioapic_index_off + set $kgm_ioapic_data_addr = $kgm_ioapic_addr + $_ioapic_data_off + set $kgm_ioapic_init = 1 + end end -document showioservicepm -Syntax: (gdb) showioservicepm -| Routine to dump the IOServicePM object +define _ioapic_addr_value + _ioapic_init + writephysint $kgm_ioapic_index_addr 8 $arg0 $kgm_lcpu_self + if $argc > 1 + writephysint $kgm_ioapic_data_addr 32 $arg1 $kgm_lcpu_self + else + readphysint $kgm_ioapic_data_addr 32 $kgm_lcpu_self + set $kgm_ioapic_value = $kgm_readphysint_result + end end -define showregistryentryrecursepmstate - set $kgm_re = (IOService *)$arg1 - set $kgm$arg0_stack = (unsigned long long) $arg2 +define _apic_print + set $value = $arg0 - if ($arg3) - set $kgm$arg0_stack = $kgm$arg0_stack | (1ULL << $kgm_reg_depth) - else - set $kgm$arg0_stack = $kgm$arg0_stack & ~(1ULL << $kgm_reg_depth) - end + printf "[VEC=%3d ", $value & $_apic_vector_mask + if $value & $_apic_masked + printf "MASK=yes " + else + printf "MASK=no " + end - dictget $kgm_re->fRegistryTable $kgm_childkey - set $kgm$arg0_child_array = (OSArray *) $kgm_result + if $value & $_apic_trigger_level + printf "TRIG=level " + else + printf "TRIG=edge " + end - if ($kgm$arg0_child_array) - set $kgm$arg0_child_count = $kgm$arg0_child_array->count - else - set $kgm$arg0_child_count = 0 - end + if $value & $_apic_polarity_high + printf "POL=high" + else + printf "POL=low " + end - if ($kgm$arg0_child_count) - set $kgm$arg0_stack = $kgm$arg0_stack | (2ULL << $kgm_reg_depth) - else - set $kgm$arg0_stack = $kgm$arg0_stack & ~(2ULL << $kgm_reg_depth) - end + if $value & $_apic_pending + printf " PEND=yes]\n" + else + printf " PEND=no ]\n" + end +end - indent $kgm_reg_depth $kgm$arg0_stack - printf "+-o " +define ioapic_read32 + if (($kgm_mtype & $kgm_mtype_x86_mask) != $kgm_mtype_x86_any) + printf "ioapic_read32 not supported on this architecture.\n" + else + _ioapic_addr_value $arg0 + printf "IOAPIC[0x%02X]: 0x%08X\n", $arg0, $kgm_ioapic_value + end +end - dictget $kgm_re->fRegistryTable $kgm_namekey - if ($kgm_result == 0) - dictget $kgm_re->fRegistryTable gIONameKey - end - if ($kgm_result == 0) - dictget $kgm_re->fPropertyTable gIOClassKey - end +document ioapic_read32 +Syntax: (gdb) ioapic_read +| Read the IOAPIC register at the offset specified. +end - if ($kgm_result != 0) - printf "%s <%p>", ((OSString *)$kgm_result)->string, $kgm_re - else - if (((IOService*)$kgm_re)->pwrMgt && ((IOService*)$kgm_re)->pwrMgt->Name) - printf "%s <%p>", ((IOService*)$kgm_re)->pwrMgt->Name, $kgm_re - else - printf "?? <%p>", $kgm_re - end - end +define ioapic_write32 + if (($kgm_mtype & $kgm_mtype_x86_mask) != $kgm_mtype_x86_any) + printf "ioapic_write32 not supported on this architecture.\n" + else + _ioapic_addr_value $arg0 $arg1 + end +end - if (((IOService*)$kgm_re)->pwrMgt ) - printf " Current Power State: %ld ", ((IOService*)$kgm_re)->pwrMgt->CurrentPowerState - #printf " Mach State %ld", ((IOService*)$kgm_re)->pwrMgt->MachineState - showioservicepm ((IOService*)$kgm_re)->pwrMgt - end - printf "\n" - +document ioapic_write32 +Syntax: (gdb) ioapic_write32 +| Write the IOAPIC register at the offset specified. +end - # recurse - if ($kgm$arg0_child_count != 0) +define ioapic_dump + if (($kgm_mtype & $kgm_mtype_x86_mask) != $kgm_mtype_x86_any) + printf "ioapic_dump not supported on this architecture.\n" + else + # id + _ioapic_addr_value $_ioapic_index_id + printf "IOAPIC[0x%02X] ID: 0x%08X\n", $_ioapic_index_id, $kgm_ioapic_value - set $kgm_reg_depth = $kgm_reg_depth + 1 - set $kgm$arg0_child_idx = 0 + # version + _ioapic_addr_value $_ioapic_index_ver + set $maxredir = (($kgm_ioapic_value & 0xFF0000) >> 16) + 1 - while ($kgm$arg0_child_idx < $kgm$arg0_child_count) - set $kgm_re = $kgm$arg0_child_array->array[$kgm$arg0_child_idx++] - set $kgm_more_sib = ($kgm$arg0_child_idx < $kgm$arg0_child_count) - showregistryentryrecursepmstate _$arg0 $kgm_re $kgm$arg0_stack $kgm_more_sib - end + printf "IOAPIC[0x%02X] VERSION: 0x%08X [", $_ioapic_index_ver, $kgm_ioapic_value + printf "MAXREDIR=%02d PRQ=%d VERSION=0x%02X]\n", $maxredir, ($kgm_ioapic_value >> 15) & 0x1, $kgm_ioapic_value & 0xFF + + # all the redir entries + set $i = 0 + while $i < $maxredir + set $addr0 = $_ioapic_index_redir_base + ($i << 1) + set $addr1 = $addr0 + 1 + _ioapic_addr_value $addr1 + printf "IOAPIC[0x%02X] IOREDIR%02d: 0x%08X", $addr0, $i, $kgm_ioapic_value + + _ioapic_addr_value $addr0 + printf "%08X ", $kgm_ioapic_value + _apic_print $kgm_ioapic_value + set $i = $i + 1 + end + end +end - set $kgm_reg_depth = $kgm_reg_depth - 1 - end +document ioapic_dump +Syntax: (gdb) ioapic_dump +| Dump all the IOAPIC entries. +end + + +set $_lapic_base_addr = 0xFEE00000 +set $_lapic_id = 0x20 +set $_lapic_version = 0x30 +set $_lapic_tpr = 0x80 +set $_lapic_apr = 0x90 +set $_lapic_ppr = 0xA0 +set $_lapic_eoi = 0xB0 +set $_lapic_ldr = 0xD0 +set $_lapic_dfr = 0xE0 +set $_lapic_sivr = 0xF0 + +set $_lapic_isr_size = 0x10 +set $_lapic_isr_num = 8 +set $_lapic_isr0 = 0x100 +set $_lapic_tmr0 = 0x180 +set $_lapic_irr0 = 0x200 + +set $_lapic_esr = 0x280 +set $_lapic_esr_register = 0x80 +set $_lapic_esr_recv_vect = 0x40 +set $_lapic_esr_send_vect = 0x20 + +set $_lapic_icr0 = 0x300 +set $_lapic_icr1 = 0x310 + +set $_lapic_lvt_timer = 0x320 +set $_lapic_lvt_thermal = 0x330 +set $_lapic_lvt_pmcr = 0x340 +set $_lapic_lvt_lint0 = 0x350 +set $_lapic_lvt_lint1 = 0x360 +set $_lapic_lvt_error = 0x370 + +set $_lapic_icr = 0x380 +set $_lapic_ccr = 0x390 +set $_lapic_dcr = 0x3E0 + +set $_apic_cfg_msr = 0x1B +set $_apic_cfg_msr_x2EN = 0x00000C00 +set $_x2apic_enabled = -1 + +# _lapic_addr $offset returns the actual address to use +define _lapic_addr + if $_x2apic_enabled < 0 + readmsr64int $_apic_cfg_msr $kgm_lcpu_self + if ($kgm_readmsr64int_result & $_apic_cfg_msr_x2EN) == $_apic_cfg_msr_x2EN + set $_x2apic_enabled = 1 + else + set $_x2apic_enabled = 0 + end + end + + if $_x2apic_enabled + # x2APIC addresses are MSRs that use xAPIC offsets that + # are 4-bit shifted + set $kgm_lapic_addr = $arg0 >> 4 + else + set $kgm_lapic_addr = $_lapic_base_addr + $arg0 + end end -define showregistryentryintpmstate - set $kgm_namekey = (OSSymbol *) $kgm_reg_plane[2] - set $kgm_childkey = (OSSymbol *) $kgm_reg_plane[4] - showregistryentryrecursepmstate _ $arg0 0 0 +# _lapic_addr_value $offset $lcpu +define _lapic_addr_value + _lapic_addr $arg0 + if $_x2apic_enabled + readmsr64int $kgm_lapic_addr $arg1 + set $kgm_lapic_value = $kgm_readmsr64int_result + else + readphysint $kgm_lapic_addr 32 $arg1 + set $kgm_lapic_value = $kgm_readphysint_result + end end -define showregistrypmstate -# setregistryplane gIOPowerPlane - set $kgm_reg_depth = 0 - set $kgm_show_props = 1 - showregistryentryintpmstate gRegistryRoot +# lapic_read32 $offset [$lcpu] +define lapic_read32 + if (($kgm_mtype & $kgm_mtype_x86_mask) != $kgm_mtype_x86_any) + printf "lapic_read32 not supported on this architecture.\n" + else + set $lcpu = $kgm_lcpu_self + if $argc > 1 + set $lcpu = $arg1 + end + _lapic_addr_value $arg0 $lcpu + printf "LAPIC[0x%03X]: 0x%08X\n", $arg0, $kgm_lapic_value + end end -document showregistrypmstate -Syntax: (gdb) showregistrypmstate -| Routine to dump the PM state of each IOPower registry entry +document lapic_read32 +Syntax: (gdb) apic_read32_cpu [lcpu (kernel's numbering convention)] +| Read the LAPIC register at the offset specified. The CPU can be optionally +| specified. end -define showstacksafterthread - set $kgm_head_taskp = &default_pset.tasks - set $kgm_actp = (struct thread *)$arg0 - set $kgm_actp = (struct thread *)($kgm_actp->task_threads.next) - set $kgm_taskp = (struct task *)$kgm_actp->task - while $kgm_taskp != $kgm_head_taskp - showtaskheader - showtaskint $kgm_taskp - set $kgm_head_actp = &($kgm_taskp->threads) - while $kgm_actp != $kgm_head_actp - showactheader - if ($decode_wait_events > 0) - showactint $kgm_actp 1 - else - showactint $kgm_actp 2 - end - set $kgm_actp = (struct thread *)($kgm_actp->task_threads.next) - end - printf "\n" - set $kgm_taskp = (struct task *)($kgm_taskp->pset_tasks.next) - end +# lapic_write32 $offset $value [$lcpu] +define lapic_write32 + if (($kgm_mtype & $kgm_mtype_x86_mask) != $kgm_mtype_x86_any) + printf "lapic_write32_cpu not supported on this architecture.\n" + else + set $lcpu = $kgm_lcpu_self + if $argc > 2 + set $lcpu = $arg2 + end + + _lapic_addr $arg0 + if $_x2apic_enabled + writemsr64int $kgm_lapic_addr $arg1 $lcpu + else + writephysint $kgm_lapic_addr 32 $arg1 $lcpu + end + end +end + +document lapic_write32 +Syntax: (gdb) lapic_write32 [lcpu (kernel's numbering convention)] +| Write the LAPIC register at the offset specified. The CPU can be optionally +| specified. end -document showstacksafterthread -Syntax: (gdb) showstacksafterthread -| Routine to print out all stacks (as in showallstacks) starting after a given thread -| Useful if that gdb refuses to print a certain task's stack. -end +# lapic_dump [lcpu] +define lapic_dump + if (($kgm_mtype & $kgm_mtype_x86_mask) != $kgm_mtype_x86_any) + printf "lapic_dump not supported on this architecture.\n" + else + set $lcpu = $kgm_lcpu_self + if $argc > 0 + set $lcpu = $arg0 + end + + _lapic_addr_value $_lapic_id $lcpu + + # the above also figures out if we're using an xAPIC or an x2APIC + printf "LAPIC operating mode: " + if $_x2apic_enabled + printf " x2APIC\n" + else + printf " xAPIC\n" + end + + printf "LAPIC[0x%03X] ID: 0x%08X\n", $_lapic_id, $kgm_lapic_value + + _lapic_addr_value $_lapic_version $lcpu + set $lvt_num = ($kgm_lapic_value >> 16) + 1 + printf "LAPIC[0x%03X] VERSION: 0x%08X [VERSION=%d MaxLVT=%d]\n", $_lapic_version, $kgm_lapic_value, $kgm_lapic_value & 0xFF, $lvt_num + + _lapic_addr_value $_lapic_tpr $lcpu + printf "LAPIC[0x%03X] TASK PRIORITY: 0x%08X\n", $_lapic_tpr, $kgm_lapic_value + + _lapic_addr_value $_lapic_ppr $lcpu + printf "LAPIC[0x%03X] PROCESSOR PRIORITY: 0x%08X\n", $_lapic_ppr, $kgm_lapic_value + + _lapic_addr_value $_lapic_ldr $lcpu + printf "LAPIC[0x%03X] LOGICAL DEST: 0x%08X\n", $_lapic_ldr, $kgm_lapic_value + + _lapic_addr_value $_lapic_dfr $lcpu + printf "LAPIC[0x%03X] DEST FORMAT: 0x%08X\n", $_lapic_dfr, $kgm_lapic_value + + _lapic_addr_value $_lapic_sivr $lcpu + printf "LAPIC[0x%03X] SPURIOUS VECTOR: 0x%08X [VEC=%3d ENABLED=%d]\n", $_lapic_sivr, $kgm_lapic_value, $kgm_lapic_value & $_apic_vector_mask, ($kgm_lapic_value & 0x100) >> 8, + + set $i = 0 + while $i < $_lapic_isr_num + set $addr = $_lapic_isr0 + $i * $_lapic_isr_size + _lapic_addr_value $addr $lcpu + printf "LAPIC[0x%03X] ISR[%03d:%03d]: 0x%08X\n", $addr, 32*($i + 1) - 1, 32*$i, $kgm_lapic_value + set $i = $i + 1 + end + + set $i = 0 + while $i < $_lapic_isr_num + set $addr = $_lapic_tmr0 + $i * $_lapic_isr_size + _lapic_addr_value $addr $lcpu + printf "LAPIC[0x%03X] TMR[%03d:%03d]: 0x%08X\n", $addr, 32*($i + 1) - 1, 32*$i, $kgm_lapic_value + set $i = $i + 1 + end -define kdp-reenter - set kdp_reentry_deadline = ((unsigned) $arg0)*1000 - continue -end + set $i = 0 + while $i < $_lapic_isr_num + set $addr = $_lapic_irr0 + $i * $_lapic_isr_size + _lapic_addr_value $addr $lcpu + printf "LAPIC[0x%03X] IRR[%03d:%03d]: 0x%08X\n", $addr, 32*($i + 1) - 1, 32*$i, $kgm_lapic_value + set $i = $i + 1 + end -document kdp-reenter -Syntax: (gdb) kdp-reenter -| Schedules reentry into the debugger after seconds, and resumes -| the target system. -end + _lapic_addr_value $_lapic_esr $lcpu + printf "LAPIC[0x%03X] ERROR STATUS: 0x%08X ", $_lapic_esr, $kgm_lapic_value + if $kgm_lapic_value + printf "[" + end + if $kgm_lapic_value & $_lapic_esr_register + printf "Register " + end + if $kgm_lapic_value & $_lapic_esr_recv_vect + printf "Received Vector " + end + if $kgm_lapic_value & $_lapic_esr_send_vect + printf "Send Vector" + end + if $kgm_lapic_value + printf "]" + end + printf "\n" + + _lapic_addr_value $_lapic_icr1 $lcpu + printf "LAPIC[0x%03X] Interrupt Command: 0x%08X [DEST=%d]\n", $_lapic_icr0, $kgm_lapic_value, $kgm_lapic_value >> 24 + _lapic_addr_value $_lapic_icr0 $lcpu + printf " 0x%08X ", $kgm_lapic_value + _apic_print $kgm_lapic_value + + if $lvt_num > 0 + _lapic_addr_value $_lapic_lvt_timer $lcpu + printf "LAPIC[0x%03X] LVT Timer: 0x%08X ", $_lapic_lvt_timer, $kgm_lapic_value + _apic_print $kgm_lapic_value + end -define _if_present - if (!$arg0) - printf " not" - end - printf " present" -end + if $lvt_num > 1 + _lapic_addr_value $_lapic_lvt_lint0 $lcpu + printf "LAPIC[0x%03X] LVT LINT0: 0x%08X ", $_lapic_lvt_lint0, $kgm_lapic_value + _apic_print $kgm_lapic_value + end -define showMCAstate - if ($kgm_mtype != 7) - printf "Not available for current architecture.\n" - else - printf "MCA" - _if_present mca_MCA_present - printf ", control MSR" - _if_present mca_control_MSR_present - printf ", threshold status" - _if_present mca_threshold_status_present - printf "\n%d error banks, ", mca_error_bank_count - printf "family code 0x%x, ", mca_family - printf "machine-check dump state: %d\n", mca_dump_state - set $kgm_cpu = 0 - while cpu_data_ptr[$kgm_cpu] != 0 - set $kgm_mcp = cpu_data_ptr[$kgm_cpu]->cpu_mca_state - if $kgm_mcp - printf "CPU %d:", $kgm_cpu - printf " mca_mcg_ctl: 0x%016llx", $kgm_mcp->mca_mcg_ctl - printf " mca_mcg_status: 0x%016llx\n", $kgm_mcp->mca_mcg_status.u64 - printf "bank " - printf "mca_mci_ctl " - printf "mca_mci_status " - printf "mca_mci_addr " - printf "mca_mci_misc\n" - set $kgm_bank = 0 - while $kgm_bank < mca_error_bank_count - set $kgm_bp = &$kgm_mcp->mca_error_bank[$kgm_bank] - printf " %2d:", $kgm_bank - printf " 0x%016llx", $kgm_bp->mca_mci_ctl - printf " 0x%016llx", $kgm_bp->mca_mci_status.u64 - printf " 0x%016llx", $kgm_bp->mca_mci_addr - printf " 0x%016llx\n", $kgm_bp->mca_mci_misc - set $kgm_bank = $kgm_bank + 1 - end - end - set $kgm_cpu = $kgm_cpu + 1 - end - end + if $lvt_num > 2 + _lapic_addr_value $_lapic_lvt_lint1 $lcpu + printf "LAPIC[0x%03X] LVT LINT1: 0x%08X ", $_lapic_lvt_lint1, $kgm_lapic_value + _apic_print $kgm_lapic_value + end + + if $lvt_num > 3 + _lapic_addr_value $_lapic_lvt_error $lcpu + printf "LAPIC[0x%03X] LVT Error: 0x%08X ", $_lapic_lvt_error, $kgm_lapic_value + _apic_print $kgm_lapic_value + end + + if $lvt_num > 4 + _lapic_addr_value $_lapic_lvt_pmcr $lcpu + printf "LAPIC[0x%03X] LVT PerfMon: 0x%08X ", $_lapic_lvt_pmcr, $kgm_lapic_value + _apic_print $kgm_lapic_value + end + + if $lvt_num > 5 + _lapic_addr_value $_lapic_lvt_thermal $lcpu + printf "LAPIC[0x%03X] LVT Thermal: 0x%08X ", $_lapic_lvt_thermal, $kgm_lapic_value + _apic_print $kgm_lapic_value + end + + _lapic_addr_value $_lapic_dcr $lcpu + printf "LAPIC[0x%03X] Timer Divide: 0x%08X [Divide by ", $_lapic_dcr, $kgm_lapic_value + set $kgm_lapic_value = ($kgm_lapic_value & 0x8) >> 1 | $kgm_lapic_value & 0x3 + if $kgm_lapic_value == 0x7 + printf "1]\n" + else + printf "%d]\n", 2 << $kgm_lapic_value + end + + _lapic_addr_value $_lapic_icr $lcpu + printf "LAPIC[0x%03X] Timer Init Count: 0x%08X\n", $_lapic_icr, $kgm_lapic_value + + _lapic_addr_value $_lapic_ccr $lcpu + printf "LAPIC[0x%03X] Timer Cur Count: 0x%08X\n", $_lapic_ccr, $kgm_lapic_value + end end -document showMCAstate -Syntax: showMCAstate -| Print machine-check register state after MC exception. +document lapic_dump +Syntax: (gdb) lapic_dump [lcpu (kernel's numbering convention)] +| Dump all the LAPIC entries. The CPU can be optionally specified. +end + +define showknoteheader + printf " knote filter ident kn_ptr status\n" end -define _pt_step - # - # Step to lower-level page table and print attributes - # $kgm_pt_paddr: current page table entry physical address - # $kgm_pt_index: current page table entry index (0..511) - # returns - # $kgm_pt_paddr: next level page table entry physical address - # or null if invalid - # For $kgm_pt_verbose = 0: print nothing - # 1: print basic information - # 2: print basic information and hex table dump - # The trickery with kdp_src_high32 is required for accesses above 4GB. - # - set $kgm_entryp = $kgm_pt_paddr + 8*$kgm_pt_index - set kdp_src_high32 = $kgm_pt_paddr >> 32 - set kdp_trans_off = 1 - set $entry = *(pt_entry_t *)($kgm_entryp & 0x0ffffffffULL) - if $kgm_pt_verbose == 2 - x/512g ($kgm_pt_paddr & 0x0ffffffffULL) +define showknoteint + set $kgm_knotep = ((struct knote *) $arg0) + printf " " + showptr $kgm_knotep + printf " " + set $kgm_filt = -$kgm_knotep->kn_kevent.filter + if ($kgm_filt == 1) + printf "EVFILT_READ " end - set kdp_trans_off = 0 - set kdp_src_high32 = 0 - set $kgm_paddr_mask = ~((0xffffULL<<48) | 0xfffULL) - if $kgm_pt_verbose == 0 - if $entry & (0x1 << 0) - set $kgm_pt_paddr = $entry & $kgm_paddr_mask - else - set $kgm_pt_paddr = 0 - end - else - printf "0x%016llx:\n\t0x%016llx\n\t", $kgm_entryp, $entry - if $entry & (0x1 << 0) - printf "valid" - set $kgm_pt_paddr = $entry & $kgm_paddr_mask - else - printf "invalid" - set $kgm_pt_paddr = 0 - end - if $entry & (0x1 << 1) - printf " writeable" - else - printf " read-only" - end - if $entry & (0x1 << 2) - printf " user" - else - printf " supervisor" - end - if $entry & (0x1 << 3) - printf " PWT" - end - if $entry & (0x1 << 4) - printf " PCD" - end - if $entry & (0x1 << 5) - printf " accessed" - end - if $entry & (0x1 << 6) - printf " dirty" - end - if $entry & (0x1 << 7) - printf " PAT" - end - if $entry & (0x1 << 8) - printf " global" - end - if $entry & (0x3 << 9) - printf " avail:0x%x", ($entry >> 9) & 0x3 - end - if $entry & (0x1 << 63) - printf " noexec" - end - printf "\n" + if ($kgm_filt == 2) + printf "EVFILT_WRITE " end -end - -define _pmap_walk - set $kgm_pmap = (pmap_t) $arg0 - set $kgm_vaddr = $arg1 - set $kgm_pt_paddr = $kgm_pmap->pm_cr3 - if $kgm_pt_paddr && cpu_64bit - set $kgm_pt_index = ($kgm_vaddr >> 39) & 0x1ffULL - if $kgm_pt_verbose - printf "pml4 (index %d):\n", $kgm_pt_index - end - _pt_step + if ($kgm_filt == 3) + printf "EVFILT_AIO " end - if $kgm_pt_paddr - set $kgm_pt_index = ($kgm_vaddr >> 30) & 0x1ffULL - if $kgm_pt_verbose - printf "pdpt (index %d):\n", $kgm_pt_index - end - _pt_step + if ($kgm_filt == 4) + printf "EVFILT_VNODE " end - if $kgm_pt_paddr - set $kgm_pt_index = ($kgm_vaddr >> 21) & 0x1ffULL - if $kgm_pt_verbose - printf "pdt (index %d):\n", $kgm_pt_index - end - _pt_step + if ($kgm_filt == 5) + printf "EVFILT_PROC " end - if $kgm_pt_paddr - set $kgm_pt_index = ($kgm_vaddr >> 12) & 0x1ffULL - if $kgm_pt_verbose - printf "pt (index %d):\n", $kgm_pt_index - end - _pt_step + if ($kgm_filt == 6) + printf "EVFILT_SIGNAL " end - if $kgm_pt_paddr - set $kgm_paddr = $kgm_pt_paddr + ($kgm_vaddr & 0xfffULL) - set kdp_trans_off = 1 - set kdp_src_high32 = $kgm_paddr >> 32 - set $kgm_value = *($kgm_paddr & 0x0ffffffffULL) - set kdp_trans_off = 0 - set kdp_src_high32 = 0 - printf "phys 0x%016llx: 0x%08x\n", $kgm_paddr, $kgm_value - else - set $kgm_paddr = 0 - printf "(no translation)\n" + if ($kgm_filt == 7) + printf "EVFILT_TIMER " end -end - -define pmap_walk - if $kgm_mtype != 7 - printf "Not available for current architecture.\n" - else - if $argc != 2 - printf "pmap_walk \n" - else - if !$kgm_pt_verbose - set $kgm_pt_verbose = 1 - else - if $kgm_pt_verbose != 2 - set $kgm_pt_verbose = 1 - end - end - _pmap_walk $arg0 $arg1 - end + if ($kgm_filt == 8) + printf "EVFILT_MACHPORT" end -end - -document pmap_walk -Syntax: (gdb) pmap_walk -| Perform a page-table walk in for . -| Set $kgm_pt_verbose=2 for full hex dump of page tables. -end - -define pmap_vtop - if $kgm_mtype != 7 - printf "Not available for current architecture.\n" + if ($kgm_filt == 9) + printf "EVFILT_FS " + end + if ($kgm_filt == 10) + printf "EVFILT_USER " + end + if ($kgm_filt == 11) + printf "EVFILT_SESSION " + end + printf "%7d ", $kgm_knotep->kn_kevent.ident + showptr $kgm_knotep->kn_ptr.p_fp + printf " " + if ($kgm_knotep->kn_status == 0) + printf "-" else - if $argc != 2 - printf "pmap_vtop \n" - else - set $kgm_pt_verbose = 0 - _pmap_walk $arg0 $arg1 - end + if ($kgm_knotep->kn_status & 0x01) + printf "A" + end + if ($kgm_knotep->kn_status & 0x02) + printf "Q" + end + if ($kgm_knotep->kn_status & 0x04) + printf "Dis" + end + if ($kgm_knotep->kn_status & 0x08) + printf "Dr" + end + if ($kgm_knotep->kn_status & 0x10) + printf "Uw" + end + if ($kgm_knotep->kn_status & 0x20) + printf "Att" + end + if ($kgm_knotep->kn_status & 0x40) + printf "Stq" + end end + printf "\n" end -document pmap_vtop -Syntax: (gdb) pmap_vtop -| For page-tables in translate to physical address. -end - -define zstack - set $index = $arg0 - - if (log_records == 0) - set $count = 0 - printf "Zone logging not enabled. Add 'zlog=' to boot-args.\n" - else - if ($argc == 2) - set $count = $arg1 - else - set $count = 1 - end +define showprocknotes + showknoteheader + set $kgm_fdp = ((proc_t)$arg0)->p_fd + set $kgm_knlist = $kgm_fdp->fd_knlist + set $i = 0 + while (($i < $kgm_fdp->fd_knlistsize) && ($kgm_knlist != 0)) + set $kgm_kn = ((struct knote *)$kgm_knlist[$i].slh_first) + while ($kgm_kn != 0) + showknoteint $kgm_kn + set $kgm_kn = ((struct knote *)$kgm_kn->kn_link.sle_next) end + set $i = $i + 1 + end + set $kgm_knhash = $kgm_fdp->fd_knhash + set $i = 0 + while (($i < $kgm_fdp->fd_knhashmask + 1) && ($kgm_knhash != 0)) + set $kgm_kn = ((struct knote *)$kgm_knhash[$i].slh_first) + while ($kgm_kn != 0) + showknoteint $kgm_kn + set $kgm_kn = ((struct knote *)$kgm_kn->kn_link.sle_next) + end + set $i = $i + 1 + end +end - while ($count) - printf "\n--------------- " - - if (zrecords[$index].z_opcode == 1) - printf "ALLOC " - else - printf "FREE " - end - - printf " 0x%x : index %d : ztime %d -------------\n", zrecords[$index].z_element, $index, zrecords[$index].z_time - - set $frame = 0 +define showallknotes + set $kgm_head_taskp = &tasks + set $kgm_taskp = (struct task *)($kgm_head_taskp->next) + while $kgm_taskp != $kgm_head_taskp + showtaskheader + showtaskint $kgm_taskp + showprocknotes $kgm_taskp->bsd_info + set $kgm_taskp = (struct task *)($kgm_taskp->tasks.next) + end +end +document showprocknotes +Syntax: showprocknotes +| Displays filter and status information for every kevent registered for +| the process. +end - while ($frame < 15) - set $frame_pc = zrecords[$index].z_pc[$frame] +# +# Device node related debug macros +# - if ($frame_pc == 0) - loop_break +define _showtty + set $kgm_tty = (struct tty *) $arg0 + printf "tty struct at " + showptr $kgm_tty + printf "\n" + printf "-last input to raw queue:\n" + p $kgm_tty->t_rawq->c_cs + printf "-last input to canonical queue:\n" + p $kgm_tty->t_canq->c_cs + printf "-last output data:\n" + p $kgm_tty->t_outq->c_cs + printf "state:\n" + if ($kgm_tty->t_state & 0x00000001) + printf " TS_SO_OLOWAT (Wake up when output <= low water)\n" + end + if ($kgm_tty->t_state & 0x00000002) + printf " TS_ASYNC (async I/O mode)\n" + else + printf " - (synchronous I/O mode)\n" + end + if ($kgm_tty->t_state & 0x00000004) + printf " TS_BUSY (Draining output)\n" + end + if ($kgm_tty->t_state & 0x00000008) + printf " TS_CARR_ON (Carrier is present)\n" + else + printf " - (Carrier is NOT present)\n" + end + if ($kgm_tty->t_state & 0x00000010) + printf " TS_FLUSH (Outq has been flushed during DMA)\n" + end + if ($kgm_tty->t_state & 0x00000020) + printf " TS_ISOPEN (Open has completed)\n" + else + printf " - (Open has NOT completed)\n" + end + if ($kgm_tty->t_state & 0x00000040) + printf " TS_TBLOCK (Further input blocked)\n" + end + if ($kgm_tty->t_state & 0x00000080) + printf " TS_TIMEOUT (Wait for output char processing)\n" + end + if ($kgm_tty->t_state & 0x00000100) + printf " TS_TTSTOP (Output paused)\n" + end + if ($kgm_tty->t_state & 0x00000200) + printf " TS_WOPEN (Open in progress)\n" + end + if ($kgm_tty->t_state & 0x00000400) + printf " TS_XCLUDE (Tty requires exclusivity)\n" + end + if ($kgm_tty->t_state & 0x00000800) + printf " TS_BKSL (State for lowercase \\ work)\n" + end + if ($kgm_tty->t_state & 0x00001000) + printf " TS_CNTTB (Counting tab width, ignore FLUSHO)\n" + end + if ($kgm_tty->t_state & 0x00002000) + printf " TS_ERASE (Within a \\.../ for PRTRUB)\n" + end + if ($kgm_tty->t_state & 0x00004000) + printf " TS_LNCH (Next character is literal)\n" + end + if ($kgm_tty->t_state & 0x00008000) + printf " TS_TYPEN (Retyping suspended input (PENDIN))\n" + end + if ($kgm_tty->t_state & 0x00010000) + printf " TS_CAN_BYPASS_L_RINT (Device in "raw" mode)\n" + end + if ($kgm_tty->t_state & 0x00020000) + printf " TS_CONNECTED (Connection open)\n" + else + printf " - (Connection NOT open)\n" + end + if ($kgm_tty->t_state & 0x00040000) + printf " TS_SNOOP (Device is being snooped on)\n" + end + if ($kgm_tty->t_state & 0x80000) + printf " TS_SO_OCOMPLETE (Wake up when output completes)\n" + end + if ($kgm_tty->t_state & 0x00100000) + printf " TS_ZOMBIE (Connection lost)\n" + end + if ($kgm_tty->t_state & 0x00200000) + printf " TS_CAR_OFLOW (For MDMBUF - handle in driver)\n" + end + if ($kgm_tty->t_state & 0x00400000) + printf " TS_CTS_OFLOW (For CCTS_OFLOW - handle in driver)\n" + end + if ($kgm_tty->t_state & 0x00800000) + printf " TS_DSR_OFLOW (For CDSR_OFLOW - handle in driver)\n" + end + # xxx todo: do we care about decoding flags? + printf "flags: 0x%08x\n", $kgm_tty->t_flags + printf "foreground process group: " + showptr $kgm_tty->t_pgrp + printf "\n" + printf "enclosing session: " + showptr $kgm_tty->t_session + printf "\n" + printf "Termios:\n" + # XXX todo: decode these flags, someday + printf " Input flags: 0x%08x\n", $kgm_tty->t_termios.c_iflag + printf " Output flags: 0x%08x\n", $kgm_tty->t_termios.c_oflag + printf " Control flags: 0x%08x\n", $kgm_tty->t_termios.c_cflag + printf " Local flags: 0x%08x\n", $kgm_tty->t_termios.c_lflag + printf " Input speed: %d\n", $kgm_tty->t_termios.c_ispeed + printf " Output speed: %d\n", $kgm_tty->t_termios.c_ospeed + # XXX todo: useful to decode t_winsize? t_iokit? c_cc? anything else? + printf "high watermark: %d bytes\n", $kgm_tty->t_hiwat + printf "low watermark: %d bytes\n", $kgm_tty->t_lowat +end + +define _showwhohas + # _showwhohas + printf "fd " + printf "fileglob " +showptrhdrpad + printf "vnode " +showptrhdrpad + printf "process " +showptrhdrpad + printf "name\n" + + set $kgm_swh_devnode_dev = (((int) $arg0) << 24) | (int) $arg1 + # iterate all tasks to iterate all processes to iterate all + # open files in each process to see who has a given major/minor + # device open + set $kgm_taskp = (struct task *)($kgm_head_taskp->next) + while $kgm_taskp != $kgm_head_taskp + set $kgm_procp = (proc_t) $kgm_taskp->bsd_info + set $kgm_spf_filedesc = $kgm_procp->p_fd + set $kgm_spf_last = $kgm_spf_filedesc->fd_lastfile + set $kgm_spf_ofiles = $kgm_spf_filedesc->fd_ofiles + set $kgm_spf_count = 0 + while (($kgm_spf_ofiles != 0) && ($kgm_spf_count <= $kgm_spf_last)) + # only files currently open + if ($kgm_spf_ofiles[$kgm_spf_count] != 0) + set $kgm_spf_fg = $kgm_spf_ofiles[$kgm_spf_count].f_fglob + if ($kgm_spf_fg->fg_type == 1) + # display fd #, fileglob & vnode address, proc name + set $kgm_swh_m_vnode = (vnode_t) $kgm_spf_fg->fg_data + set $kgm_swh_m_vtype = (enum vtype) $kgm_swh_m_vnode->v_type + if (($kgm_swh_m_vtype == VBLK) || ($kgm_swh_m_vtype == VCHR)) && ((((devnode_t *)$kgm_swh_m_vnode->v_data)->dn_typeinfo.dev) == $kgm_swh_devnode_dev) + printf "%-5d ", $kgm_spf_count + showptr $kgm_spf_fg + printf " " + showptr $kgm_swh_m_vnode + printf " " + showptr $kgm_procp + printf " %s\n", $kgm_procp->p_comm + end end - - x/i $frame_pc - set $frame = $frame + 1 + end + set $kgm_spf_count = $kgm_spf_count + 1 end - set $index = $index + 1 - set $count = $count - 1 + set $kgm_taskp = (struct task *)($kgm_taskp->tasks.next) end end -document zstack -Syntax: (gdb) zstack [] -| Zone leak debugging: print the stack trace of log element at . -| If a is supplied, it prints log elements starting at . -| -| The suggested usage is to look at indexes below zcurrent and look for common stack traces. -| The stack trace that occurs the most is probably the cause of the leak. Find the pc of the -| function calling into zalloc and use the countpcs kgmacro to find out how often that pc occurs in the log. -| The pc occuring in a high percentage of records is most likely the source of the leak. -| -| The findoldest kgmacro is also useful for leak debugging since it identifies the oldest record -| in the log, which may indicate the leaker. +define _showvnodedev_cpty + set $kgm_ptmx_major = (int) $arg0 + set $kgm_ptmx_minor = (int) $arg1 + set $kgm_ptmx_ioctl = _state.pis_ioctl_list[$kgm_ptmx_minor] + set $kgm_ptmx_ioctl = _state.pis_ioctl_list[$kgm_ptmx_minor] + printf " ptmx_ioctl struct at " + showptr $kgm_ptmx_ioctl + printf "\n" + printf " flags:\n" + if ($kgm_ptmx_ioctl->pt_flags & 0x0008) + printf " PF_PKT (packet mode)\n" + end + if ($kgm_ptmx_ioctl->pt_flags & 0x0010) + printf " PF_STOPPED (user told stopped)\n" + end + if ($kgm_ptmx_ioctl->pt_flags & 0x0020) + printf " PF_REMOTE (remote and flow controlled input)\n" + end + if ($kgm_ptmx_ioctl->pt_flags & 0x0040) + printf " PF_NOSTOP" + end + if ($kgm_ptmx_ioctl->pt_flags & 0x0080) + printf " PF_UCNTL (user control mode)\n" + end + if ($kgm_ptmx_ioctl->pt_flags & 0x0100) + printf " PF_UNLOCKED (slave unlock - master open resets)\n" + end + if ($kgm_ptmx_ioctl->pt_flags & 0x0200) + printf " PF_OPEN_M (master is open)\n" + # XXX we should search for who has the master open, but + # XXX each master gets the same minor, even though it + # XXX gets a different vnode. we chold probably change + # XXX this, but to do it we would need some way of + # XXX expressing the information in the vnode structure + # XXX somewhere. If we *did* change it, it would buy us + # XXX the ability to determine who has the corresponding + # XXX master end of the pty open + else + printf " PF_OPEN_M (master is closed)\n" + end + if ($kgm_ptmx_ioctl->pt_flags & 0x0400) + printf " PF_OPEN_S (slave is open)\n" + printf "---vvvvv--- fds open on this device ---vvvvv---\n" + _showwhohas ($kgm_ptmx_major) ($kgm_ptmx_minor) + printf "---^^^^^--- fds open on this device ---^^^^^---\n" + else + printf " - (slave is closed)\n" + end + printf "TTY Specific Information\n" + _showtty $kgm_ptmx_ioctl->pt_tty end -define findoldest - set $index = 0 - set $count = log_records - set $cur_min = 2000000000 - set $cur_index = 0 - - if (log_records == 0) - printf "Zone logging not enabled. Add 'zlog=' to boot-args.\n" - else +define showvnodedev + if ($argc == 1) + set $kgm_vnode = (vnode_t) $arg0 + set $kgm_vtype = (enum vtype) $kgm_vnode->v_type + if (($kgm_vtype == VBLK) || ($kgm_vtype == VCHR)) + set $kgm_devnode = (devnode_t *) $kgm_vnode->v_data + set $kgm_devnode_dev = $kgm_devnode->dn_typeinfo.dev + set $kgm_devnode_major = ($kgm_devnode_dev >> 24) & 0xff + set $kgm_devnode_minor = $kgm_devnode_dev & 0x00ffffff + + # boilerplate device information for a vnode + printf "Device Info:\n" + printf " vnode: " + showptr $kgm_vnode + printf "\n" + printf " type: " + if ($kgm_vtype == VBLK) + printf "VBLK " + end + if ($kgm_vtype == VCHR) + printf "VCHR" + end + printf "\n" + printf " name: %s\n", $kgm_vnode->v_name + printf " major, minor: %d, %d\n", $kgm_devnode_major, $kgm_devnode_minor + printf " mode 0%o\n", $kgm_devnode->dn_mode + printf " owner (u,g): %d %d", $kgm_devnode->dn_uid, $kgm_devnode->dn_gid + printf "\n" - while ($count) - if (zrecords[$index].z_element && zrecords[$index].z_time < $cur_min) - set $cur_index = $index - set $cur_min = zrecords[$index].z_time + # decode device specific data + printf "Device Specific Information: " + if ($kgm_vtype == VBLK) + printf " Sorry, I do not know how to decode block devices yet!\n" + printf " Maybe you can write me!" + end + if ($kgm_vtype == VCHR) + # Device information; this is scanty + # range check + if ($kgm_devnode_major > 42) || ($kgm_devnode_major < 0) + printf "Invalid major #\n" + else + # static assignments in conf + if ($kgm_devnode_major == 0) + printf "Console mux device\n" + else + if ($kgm_devnode_major == 2) + printf "Current tty alias\n" + else + if ($kgm_devnode_major == 3) + printf "NULL device\n" + else + if ($kgm_devnode_major == 4) + printf "Old pty slave\n" + else + if ($kgm_devnode_major == 5) + printf "Old pty master\n" + else + if ($kgm_devnode_major == 6) + printf "Kernel log\n" + else + if ($kgm_devnode_major == 12) + printf "Memory devices\n" + else + # Statically linked dynamic assignments + if cdevsw[$kgm_devnode_major].d_open == ptmx_open + printf "Cloning pty master\n" + _showvnodedev_cpty ($kgm_devnode_major) ($kgm_devnode_minor) + else + if cdevsw[$kgm_devnode_major].d_open == ptsd_open + printf "Cloning pty slave\n" + _showvnodedev_cpty ($kgm_devnode_major) ($kgm_devnode_minor) + else + printf "RESERVED SLOT\n" + end + end + end + end + end + end + end + end + end end - - set $count = $count - 1 - set $index = $index + 1 end - - printf "oldest record is at log index %d:\n", $cur_index - zstack $cur_index + else + showptr $kgm_vnode + printf " is not a device\n" end + else + printf "| Usage:\n|\n" + help showvnodedev + end end - -document findoldest -Syntax: (gdb) findoldest -| Zone leak debugging: find and print the oldest record in the log. Note that this command -| can take several minutes to run since it uses linear search. -| -| Once it prints a stack trace, find the pc of the caller above all the zalloc, kalloc and -| IOKit layers. Then use the countpcs kgmacro to see how often this caller has allocated -| memory. A caller with a high percentage of records in the log is probably the leaker. +document showvnodedev +Syntax: (gdb) showvnodedev +| showvnodedev Display information about a device vnode end -define countpcs - set $target_pc = $arg0 - set $index = 0 - set $count = log_records - set $found = 0 +define showtty + if ($argc == 1) + _showtty $arg0 + else + printf "| Usage:\n|\n" + help showtty + end +end +document showtty +Syntax: (gdb) showtty +| showtty Display information about a struct tty +end - if (log_records == 0) - printf "Zone logging not enabled. Add 'zlog=' to boot-args.\n" - else +define showeventsourceobject + set $kgm_vt = *((void **) $arg1) + if $kgm_lp64 + set $kgm_vt = $kgm_vt - 16 + end + pcprint $kgm_vt +end +document showeventsourceobject +Syntax: (gdb) showeventsourceobject +| Routine to display information about an IOEventSource subclass. +end + +define showworkloopeventsources + set $kgm_eventsource = (struct IOEventSource*)$arg0 + while $kgm_eventsource != 0 + printf " " + printf "EventSource:\t" + showptr $kgm_eventsource + printf " Description: " + showeventsourceobject _ $kgm_eventsource + printf "\n" + if $kgm_eventsource->action != 0 + printf " " + printf "Action: \t" + pcprint $kgm_eventsource->action + printf "\n" + end + if $kgm_eventsource->owner != 0 + printf " " + printf "Owner: \t" + showptr $kgm_eventsource->owner + printf " Description: " + showeventsourceobject _ $kgm_eventsource->owner + printf "\n" + end + set $kgm_eventsource = $kgm_eventsource->eventChainNext + printf "\n" + end +end +document showworkloopeventsources +Syntax: (gdb) showworkloopeventsources +| Routine to walk an IOEventSource chain associated with an IOWorkLoop and print information +| about each event source in the chain. +end + +define showworkloopheader + printf "thread " + showptrhdrpad + printf " workloop " + showptrhdrpad + printf " pri state\tLockGroupName\n" +end +document showworkloopheader +Syntax: (gdb) showworkloopheader +| Routine to print out header info about an IOKit workloop. +end + +define showworkloop + set $kgm_workloopthread = (struct thread*)$arg0 + set $kgm_workloop = (struct IOWorkLoop*)$arg1 + showptr $kgm_workloopthread + printf " " + showptr $kgm_workloop + printf " %3d ", $kgm_workloopthread.sched_pri + set $kgm_state = $kgm_workloopthread.state + if $kgm_state & 0x80 + printf "I" + end + if $kgm_state & 0x40 + printf "P" + end + if $kgm_state & 0x20 + printf "A" + end + if $kgm_state & 0x10 + printf "H" + end + if $kgm_state & 0x08 + printf "U" + end + if $kgm_state & 0x04 + printf "R" + end + if $kgm_state & 0x02 + printf "S" + end + if $kgm_state & 0x01 + printf "W" + end + printf "\t\t" + set $kgm_gateLock = ( struct _IORecursiveLock *)$kgm_workloop->gateLock + set $kgm_lockGroup = (struct _lck_grp_*)($kgm_gateLock->group) + printf "%s", $kgm_lockGroup->lck_grp_name + printf "\n" + showworkloopeventsources $kgm_workloop->eventChain +end +document showworkloop +Syntax: (gdb) showworkloop +| Routine to print out info about an IOKit workloop. +end - while ($count) - set $frame = 0 - - if (zrecords[$index].z_element != 0) - while ($frame < 15) - if (zrecords[$index].z_pc[$frame] == $target_pc) - set $found = $found + 1 - set $frame = 15 +define showallworkloopthreads + set $kgm_head_taskp = &tasks + set $kgm_taskp = (struct task *)($kgm_head_taskp->next) + set $kgm_head_actp = &($kgm_taskp->threads) + set $kgm_actp = (struct thread *)($kgm_taskp->threads.next) + while $kgm_actp != $kgm_head_actp + if ($kgm_actp->continuation == _ZN10IOWorkLoop10threadMainEv) + showworkloopheader + showworkloop $kgm_actp $kgm_actp->parameter + else + if ($kgm_actp->kernel_stack != 0) + if ($kgm_mtype == $kgm_mtype_x86_64) + #Warning: Grokking stack looking for hopeful workloops until we squirrel some info in thread_t. + set $kgm_workloop = *((struct IOWorkLoop **)($kgm_actp->kernel_stack + kernel_stack_size - 0xB8)) + else + if ($kgm_mtype == $kgm_mtype_i386) + set $kgm_workloop = *((struct IOWorkLoop **)($kgm_actp->kernel_stack + kernel_stack_size - 0x3C)) + end + end + if ($kgm_workloop != 0) + set $kgm_vt = *((void **) $kgm_workloop) + if $kgm_lp64 + set $kgm_vt = $kgm_vt - 16 + end + if ($kgm_vt == &_ZTV10IOWorkLoop) + showworkloopheader + showworkloop $kgm_actp $kgm_workloop end - - set $frame = $frame + 1 end end - - set $index = $index + 1 - set $count = $count - 1 end - - printf "occurred %d times in log (%d%c of records)\n", $found, ($found * 100) / zrecorded, '%' + set $kgm_actp = (struct thread *)($kgm_actp->task_threads.next) end + printf "\n" end - -document countpcs -Syntax: (gdb) countpcs -| Zone leak debugging: search the log and print a count of all log entries that contain the given -| in the stack trace. This is useful for verifying a suspected as being the source of -| the leak. If a high percentage of the log entries contain the given , then it's most -| likely the source of the leak. Note that this command can take several minutes to run. +document showallworkloopthreads +Syntax: (gdb) showallworkloopthreads +| Routine to print out info about all IOKit workloop threads in the system. This macro will find +| all IOWorkLoop threads blocked in continuations and on i386 and x86_64 systems will make a +| best-effort guess to find any workloops that are actually not blocked in a continuation. For a +| complete list, it is best to compare the output of this macro against the output of 'showallstacks'. end - -define findelem - set $fe_index = zcurrent - set $fe_count = log_records - set $fe_elem = $arg0 - set $fe_prev_op = -1 - - if (log_records == 0) - printf "Zone logging not enabled. Add 'zlog=' to boot-args.\n" +define showthreadfortid + set $kgm_id_found = 0 + + set $kgm_head_taskp = &tasks + set $kgm_taskp = (struct task *)($kgm_head_taskp->next) + while $kgm_taskp != $kgm_head_taskp + set $kgm_head_actp = &($kgm_taskp->threads) + set $kgm_actp = (struct thread *)($kgm_taskp->threads.next) + while $kgm_actp != $kgm_head_actp + set $kgm_thread = *(struct thread *)$kgm_actp + set $kgm_thread_id = $kgm_thread.thread_id + if ($kgm_thread_id == $arg0) + showptr $kgm_actp + printf "\n" + set $kgm_id_found = 1 + loop_break + end + set $kgm_actp = (struct thread *)($kgm_actp->task_threads.next) + end + if ($kgm_id_found == 1) + loop_break + end + set $kgm_taskp = (struct task *)($kgm_taskp->tasks.next) end + if ($kgm_id_found == 0) + printf "Not a valid thread_id\n" + end +end - while ($fe_count) - if (zrecords[$fe_index].z_element == $fe_elem) - zstack $fe_index +document showthreadfortid +Syntax: showthreadfortid +|The thread structure contains a unique thread_id value for each thread. +|This command is used to retrieve the address of the thread structure(thread_t) +|corresponding to a given thread_id. +end - if (zrecords[$fe_index].z_opcode == $fe_prev_op) - printf "*************** DOUBLE OP! *********************\n +define showtaskbusyports + set $kgm_isp = ((task_t)$arg0)->itk_space + set $kgm_iindex = 0 + while ( $kgm_iindex < $kgm_isp->is_table_size ) + set $kgm_iep = &($kgm_isp->is_table[$kgm_iindex]) + if $kgm_iep->ie_bits & 0x00020000 + set $kgm_port = ((ipc_port_t)$kgm_iep->ie_object) + if $kgm_port->ip_messages.data.port.msgcount > 0 + showport $kgm_port end - - set $fe_prev_op = zrecords[$fe_index].z_opcode end + set $kgm_iindex = $kgm_iindex + 1 + end +end - set $fe_count = $fe_count - 1 - set $fe_index = $fe_index + 1 +document showtaskbusyports +Syntax: showtaskbusyports +|Routine to print information about receive rights belonging to this task that +|have enqueued messages. This is often a sign of a blocked or hung process. +end - if ($fe_index >= log_records) - set $fe_index = 0 - end - end +define showallbusyports + set $kgm_head_taskp = &tasks + set $kgm_cur_taskp = (struct task *)($kgm_head_taskp->next) + while $kgm_cur_taskp != $kgm_head_taskp + showtaskbusyports $kgm_cur_taskp + set $kgm_cur_taskp = (struct task *)($kgm_cur_taskp->tasks.next) + end end -document findelem -Syntax: (gdb) findelem -| Zone corruption debugging: search the log and print out the stack traces for all log entries that -| refer to the given zone element. When the kernel panics due to a corrupted zone element, get the -| element address and use this macro. This will show you the stack traces of all logged zalloc and -| zfree operations which tells you who touched the element in the recent past. This also makes -| double-frees readily apparent. +document showallbusyports +Syntax: showallbusyports +|Routine to print information about all receive rights on the system that +|have enqueued messages. end diff --git a/libkern/Makefile b/libkern/Makefile index ccff380d6..583dcb221 100644 --- a/libkern/Makefile +++ b/libkern/Makefile @@ -14,6 +14,8 @@ INSTINC_SUBDIRS_PPC = ${INSTINC_SUBDIRS} INSTINC_SUBDIRS_I386 = ${INSTINC_SUBDIRS} +INSTINC_SUBDIRS_X86_64 = ${INSTINC_SUBDIRS} + INSTINC_SUBDIRS_ARM = ${INSTINC_SUBDIRS} EXPINC_SUBDIRS = \ @@ -24,11 +26,19 @@ EXPINC_SUBDIRS_PPC = ${EXPINC_SUBDIRS} EXPINC_SUBDIRS_I386 = ${EXPINC_SUBDIRS} +EXPINC_SUBDIRS_X86_64 = ${EXPINC_SUBDIRS} + EXPINC_SUBDIRS_ARM = ${EXPINC_SUBDIRS} SETUP_SUBDIRS = conf -COMP_SUBDIRS = conf kmod +COMP_SUBDIRS_PPC = conf kmod + +COMP_SUBDIRS_I386 = conf kmod + +COMP_SUBDIRS_X86_64 = conf kmod + +COMP_SUBDIRS_ARM = conf kmod INST_SUBDIRS = kmod diff --git a/libkern/OSKextLib.cpp b/libkern/OSKextLib.cpp new file mode 100644 index 000000000..4876839af --- /dev/null +++ b/libkern/OSKextLib.cpp @@ -0,0 +1,504 @@ +/* + * Copyright (c) 2008 Apple Inc. All rights reserved. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. The rights granted to you under the License + * may not be used to create, or enable the creation or redistribution of, + * unlawful or unlicensed copies of an Apple operating system, or to + * circumvent, violate, or enable the circumvention or violation of, any + * terms of an Apple operating system software license agreement. + * + * Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ + */ + +extern "C" { +#include +#include + +#include +#include +#include +}; + +#include +#include +#include +#include + +extern "C" { + +#if PRAGMA_MARK +#pragma mark C-based kext interface (loading/loaded kexts only) +#endif +/********************************************************************* +*********************************************************************/ +kern_return_t OSKextLoadKextWithIdentifier(const char * bundle_id) +{ + return OSKext::loadKextWithIdentifier(bundle_id); +} + +/********************************************************************* +*********************************************************************/ +uint32_t +OSKextGetLoadTagForIdentifier(const char * kextIdentifier) +{ + uint32_t result = kOSKextInvalidLoadTag; + OSKext * theKext = NULL; // must release + + if (!kextIdentifier) { + goto finish; + } + + theKext = OSKext::lookupKextWithIdentifier(kextIdentifier); + if (theKext && theKext->isLoaded()) { + result = theKext->getLoadTag(); + } +finish: + if (theKext) theKext->release(); + return result; +} + +/********************************************************************* +*********************************************************************/ +OSReturn OSKextRetainKextWithLoadTag(uint32_t loadTag) +{ + OSReturn result = kOSKextReturnNotFound; + OSKext * theKext = NULL; // do not release; as this function is a retain + + if (loadTag == kOSKextInvalidLoadTag) { + result = kOSKextReturnInvalidArgument; + goto finish; + } + theKext = OSKext::lookupKextWithLoadTag(loadTag); + if (theKext) { + result = kOSReturnSuccess; + + OSKextLog(theKext, + kOSKextLogDebugLevel | + kOSKextLogKextBookkeepingFlag, + "Kext %s (load tag %d) has been retained.", + theKext->getIdentifierCString(), + loadTag); + + /* Call this after so a log message about autounload comes second. + */ + theKext->setAutounloadEnabled(true); + } else { + OSKextLog(theKext, + kOSKextLogErrorLevel | + kOSKextLogKextBookkeepingFlag, + "Can't retain kext with load tag %d - no such kext is loaded.", + loadTag); + } +finish: + return result; +} + +/********************************************************************* +*********************************************************************/ +OSReturn OSKextReleaseKextWithLoadTag(uint32_t loadTag) +{ + OSReturn result = kOSKextReturnNotFound; + OSKext * theKext = NULL; // must release twice! + + if (loadTag == kOSKextInvalidLoadTag) { + result = kOSKextReturnInvalidArgument; + goto finish; + } + theKext = OSKext::lookupKextWithLoadTag(loadTag); + if (theKext) { + result = kOSReturnSuccess; + OSKext::considerUnloads(); // schedule autounload pass + theKext->release(); // do the release the caller wants + theKext->release(); // now do the release on the lookup + OSKextLog(theKext, + kOSKextLogDebugLevel | + kOSKextLogKextBookkeepingFlag, + "Kext %s (load tag %d) has been released.", + theKext->getIdentifierCString(), + loadTag); + } else { + OSKextLog(theKext, + kOSKextLogErrorLevel | + kOSKextLogKextBookkeepingFlag, + "Can't release kext with load tag %d - no such kext is loaded.", + loadTag); + } + + // xxx - should I check that the refcount of the OSKext is above the lower bound? + // xxx - do we want a OSKextGetRetainCountOfKextWithLoadTag()? +finish: + return result; +} + +/********************************************************************* +* Not to be called by the kext being unloaded! +*********************************************************************/ +OSReturn OSKextUnloadKextWithLoadTag(uint32_t loadTag) +{ + return OSKext::removeKextWithLoadTag(loadTag, + /* terminateServicesAndRemovePersonalitiesFlag */ false); +} + + +#if PRAGMA_MARK +#pragma mark Kext Requests +#endif +/********************************************************************* +* Kext Requests +*********************************************************************/ +OSReturn OSKextRequestResource( + const char * kextIdentifier, + const char * resourceName, + OSKextRequestResourceCallback callback, + void * context, + OSKextRequestTag * requestTagOut) +{ + return OSKext::requestResource(kextIdentifier, resourceName, + callback, context, requestTagOut); +} + +/********************************************************************* +*********************************************************************/ +OSReturn OSKextCancelRequest( + OSKextRequestTag requestTag, + void ** contextOut) +{ + return OSKext::cancelRequest(requestTag, contextOut); +} + +#if PRAGMA_MARK +#pragma mark MIG Functions & Wrappers +#endif +/********************************************************************* +* This function is for use only by OSKextLib.cpp and OSKext.cpp. +* +* xxx - can we cache the kextd port or do we have to get it each time +* xxx - in case it relaunches? +*********************************************************************/ +extern void ipc_port_release_send(ipc_port_t); + +kern_return_t OSKextPingKextd(void) +{ + kern_return_t result = KERN_FAILURE; + mach_port_t kextd_port = IPC_PORT_NULL; + + result = host_get_kextd_port(host_priv_self(), &kextd_port); + if (result != KERN_SUCCESS || !IPC_PORT_VALID(kextd_port)) { + OSKextLog(/* kext */ NULL, + kOSKextLogErrorLevel | + kOSKextLogIPCFlag, + "Can't get kextd port."); + goto finish; + } + + result = kextd_ping(kextd_port); + if (result != KERN_SUCCESS) { + OSKextLog(/* kext */ NULL, + kOSKextLogErrorLevel | + kOSKextLogIPCFlag, + "kextd ping failed (0x%x).", (int)result); + goto finish; + } + +finish: + if (IPC_PORT_VALID(kextd_port)) { + ipc_port_release_send(kextd_port); + } + + return result; +} + +/********************************************************************* +* IMPORTANT: Once we have done the vm_map_copyout(), we *must* return +* KERN_SUCCESS or the kernel map gets messed up (reason as yet +* unknown). We use op_result to return the real result of our work. +*********************************************************************/ +kern_return_t kext_request( + host_priv_t hostPriv, + /* in only */ uint32_t clientLogSpec, + /* in only */ vm_offset_t requestIn, + /* in only */ mach_msg_type_number_t requestLengthIn, + /* out only */ vm_offset_t * responseOut, + /* out only */ mach_msg_type_number_t * responseLengthOut, + /* out only */ vm_offset_t * logDataOut, + /* out only */ mach_msg_type_number_t * logDataLengthOut, + /* out only */ kern_return_t * op_result) +{ + kern_return_t result = KERN_FAILURE; + vm_map_address_t map_addr = 0; // do not free/deallocate + char * request = NULL; // must vm_deallocate + + mkext2_header * mkextHeader = NULL; // do not release + bool isMkext = false; + + char * response = NULL; // must kmem_free + uint32_t responseLength = 0; + char * logData = NULL; // must kmem_free + uint32_t logDataLength = 0; + + /* MIG doesn't pass "out" parameters as empty, so clear them immediately + * just in case, or MIG will try to copy out bogus data. + */ + *op_result = KERN_FAILURE; + *responseOut = NULL; + *responseLengthOut = 0; + *logDataOut = NULL; + *logDataLengthOut = 0; + + /* Check for input. Don't discard what isn't there, though. + */ + if (!requestLengthIn || !requestIn) { + OSKextLog(/* kext */ NULL, + kOSKextLogErrorLevel | + kOSKextLogIPCFlag, + "Invalid request from user space (no data)."); + *op_result = KERN_INVALID_ARGUMENT; + goto finish; + } + + /* Once we have done the vm_map_copyout(), we *must* return KERN_SUCCESS + * or the kernel map gets messed up (reason as yet unknown). We will use + * op_result to return the real result of our work. + */ + result = vm_map_copyout(kernel_map, &map_addr, (vm_map_copy_t)requestIn); + if (result != KERN_SUCCESS) { + OSKextLog(/* kext */ NULL, + kOSKextLogErrorLevel | + kOSKextLogIPCFlag, + "vm_map_copyout() failed for request from user space."); + vm_map_copy_discard((vm_map_copy_t)requestIn); + goto finish; + } + request = CAST_DOWN(char *, map_addr); + + /* Check if request is an mkext; this is always a load request + * and requires root access. If it isn't an mkext, see if it's + * an XML request, and check the request to see if that requires + * root access. + */ + if (requestLengthIn > sizeof(mkext2_header)) { + mkextHeader = (mkext2_header *)request; + if (MKEXT_GET_MAGIC(mkextHeader) == MKEXT_MAGIC && + MKEXT_GET_SIGNATURE(mkextHeader) == MKEXT_SIGN) { + + isMkext = true; + } + } + + if (isMkext) { +#ifdef SECURE_KERNEL + // xxx - something tells me if we have a secure kernel we don't even + // xxx - want to log a message here. :-) + *op_result = KERN_NOT_SUPPORTED; + goto finish; +#else + // xxx - can we find out if calling task is kextd? + // xxx - can we find the name of the calling task? + if (hostPriv == HOST_PRIV_NULL) { + OSKextLog(/* kext */ NULL, + kOSKextLogErrorLevel | + kOSKextLogLoadFlag | kOSKextLogIPCFlag, + "Attempt by non-root process to load a kext."); + *op_result = kOSKextReturnNotPrivileged; + goto finish; + } + + *op_result = OSKext::loadFromMkext((OSKextLogSpec)clientLogSpec, + request, requestLengthIn, + &logData, &logDataLength); + +#endif /* defined(SECURE_KERNEL) */ + + } else { + + /* If the request isn't an mkext, then is should be XML. Parse it + * if possible and hand the request over to OSKext. + */ + *op_result = OSKext::handleRequest(hostPriv, + (OSKextLogSpec)clientLogSpec, + request, requestLengthIn, + &response, &responseLength, + &logData, &logDataLength); + } + + if (response && responseLength > 0) { + kern_return_t copyin_result; + + copyin_result = vm_map_copyin(kernel_map, + CAST_USER_ADDR_T(response), responseLength, + /* src_destroy */ false, (vm_map_copy_t *)responseOut); + if (copyin_result == KERN_SUCCESS) { + *responseLengthOut = responseLength; + } else { + OSKextLog(/* kext */ NULL, + kOSKextLogErrorLevel | + kOSKextLogIPCFlag, + "Failed to copy response to request from user space."); + *op_result = copyin_result; // xxx - should we map to our own code? + *responseOut = NULL; + *responseLengthOut = 0; + goto finish; + } + } + + if (logData && logDataLength > 0) { + kern_return_t copyin_result; + + copyin_result = vm_map_copyin(kernel_map, + CAST_USER_ADDR_T(logData), logDataLength, + /* src_destroy */ false, (vm_map_copy_t *)logDataOut); + if (copyin_result == KERN_SUCCESS) { + *logDataLengthOut = logDataLength; + } else { + OSKextLog(/* kext */ NULL, + kOSKextLogErrorLevel | + kOSKextLogIPCFlag, + "Failed to copy log data for request from user space."); + *op_result = copyin_result; // xxx - should we map to our own code? + *logDataOut = NULL; + *logDataLengthOut = 0; + goto finish; + } + } + +finish: + if (request) { + (void)vm_deallocate(kernel_map, (vm_offset_t)request, requestLengthIn); + } + if (response) { + kmem_free(kernel_map, (vm_offset_t)response, responseLength); + } + if (logData) { + kmem_free(kernel_map, (vm_offset_t)logData, logDataLength); + } + + return result; +} + +/********************************************************************* +* Gets the vm_map for the current kext +*********************************************************************/ +extern vm_offset_t sectPRELINKB; +extern int sectSizePRELINK; +extern int kth_started; +extern vm_map_t g_kext_map; + +vm_map_t +kext_get_vm_map(kmod_info_t *info) +{ + vm_map_t kext_map = NULL; + + /* Set the vm map */ + if ((info->address >= sectPRELINKB) && + (info->address < (sectPRELINKB + sectSizePRELINK))) + { + kext_map = kernel_map; + } else { + kext_map = g_kext_map; + } + + return kext_map; +} + + +#if PRAGMA_MARK +/********************************************************************/ +#pragma mark Weak linking support +/********************************************************************/ +#endif +void +kext_weak_symbol_referenced(void) +{ + panic("A kext referenced an unresolved weak symbol\n"); +} + +const void *gOSKextUnresolved = (const void *)&kext_weak_symbol_referenced; + +#if PRAGMA_MARK +#pragma mark Kernel-Internal C Functions +#endif +/********************************************************************* +* Called from startup.c. +*********************************************************************/ +void OSKextRemoveKextBootstrap(void) +{ + OSKext::removeKextBootstrap(); + return; +} + +/********************************************************************* +*********************************************************************/ +void kext_dump_panic_lists(int (*printf_func)(const char * fmt, ...)) +{ + OSKext::printKextPanicLists(printf_func); + return; +} + +#if PRAGMA_MARK +#pragma mark Kmod Compatibility Functions +#endif +/********************************************************************* +********************************************************************** +* KMOD COMPATIBILITY FUNCTIONS * +* (formerly in kmod.c, or C++ bridges from) * +********************************************************************** +********************************************************************** +* These two functions are used in various places in the kernel, but +* are not exported. We might rename them at some point to start with +* kext_ or OSKext. +* +* kmod_panic_dump() must not be called outside of a panic context. +* kmod_dump_log() must not be called in a panic context. +*********************************************************************/ +void +kmod_panic_dump(vm_offset_t * addr, unsigned int cnt) +{ + extern int kdb_printf(const char *format, ...) __printflike(1,2); + + OSKext::printKextsInBacktrace(addr, cnt, &kdb_printf, + /* takeLock? */ false); + return; +} + +/********************************************************************/ +void kmod_dump_log(vm_offset_t *addr, unsigned int cnt); + +void +kmod_dump_log( + vm_offset_t * addr, + unsigned int cnt) +{ + OSKext::printKextsInBacktrace(addr, cnt, &printf, /* lock? */ true); +} + +/********************************************************************* +* Compatibility implementation for kmod_get_info() host_priv routine. +* Only supported on old 32-bit architectures. +*********************************************************************/ +#if __ppc__ || __i386__ +kern_return_t +kext_get_kmod_info( + kmod_info_array_t * kmod_list, + mach_msg_type_number_t * kmodCount) +{ + return OSKext::getKmodInfo(kmod_list, kmodCount); +} +#endif /* __ppc__ || __i386__ */ + +}; diff --git a/libsa/vers_rsrc.c b/libkern/OSKextVersion.c similarity index 60% rename from libsa/vers_rsrc.c rename to libkern/OSKextVersion.c index f3db0d5d2..bc1cc253c 100644 --- a/libsa/vers_rsrc.c +++ b/libkern/OSKextVersion.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2006 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2008 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -25,19 +25,17 @@ * * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ */ -#ifndef KERNEL -#include -#include "vers_rsrc.h" -#else +#ifdef KERNEL #include -#include -#endif /* not KERNEL */ - -#ifndef KERNEL -#define PRIV_EXT +#include +#include #else -#define PRIV_EXT __private_extern__ -#endif /* not KERNEL */ +#include +#include +#include +#endif /* KERNEL */ + +#include #define VERS_MAJOR_DIGITS (4) #define VERS_MINOR_DIGITS (2) @@ -45,21 +43,27 @@ #define VERS_STAGE_DIGITS (1) #define VERS_STAGE_LEVEL_DIGITS (3) +#define VERS_MAJOR_MAX (9999) +#define VERS_STAGE_LEVEL_MAX (255) + #define VERS_MAJOR_MULT (100000000) #define VERS_MINOR_MULT (1000000) #define VERS_REVISION_MULT (10000) #define VERS_STAGE_MULT (1000) + typedef enum { - VERS_invalid = 0, - VERS_development = 1, - VERS_alpha = 3, - VERS_beta = 5, - VERS_candidate = 7, - VERS_release = 9, -} VERS_stage; + kOSKextVersionStageInvalid = 0, + kOSKextVersionStageDevelopment = 1, + kOSKextVersionStageAlpha = 3, + kOSKextVersionStageBeta = 5, + kOSKextVersionStageCandidate = 7, + kOSKextVersionStageRelease = 9, +} OSKextVersionStage; +/********************************************************************* +*********************************************************************/ static int __vers_isdigit(char c) { return (c == '0' || c == '1' || c == '2' || c == '3' || @@ -67,6 +71,8 @@ static int __vers_isdigit(char c) { c == '7' || c == '8' || c == '9'); } +/********************************************************************* +*********************************************************************/ static int __vers_isspace(char c) { return (c == ' ' || c == '\t' || @@ -74,6 +80,8 @@ static int __vers_isspace(char c) { c == '\n'); } +/********************************************************************* +*********************************************************************/ static int __vers_digit_for_char(char c) { switch (c) { case '0': return 0; break; @@ -92,92 +100,101 @@ static int __vers_digit_for_char(char c) { return -1; } +/********************************************************************* +*********************************************************************/ static int __VERS_isreleasestate(char c) { return (c == 'd' || c == 'a' || c == 'b' || c == 'f'); } -static VERS_stage __VERS_stage_for_string(const char ** string_p) { +/********************************************************************* +*********************************************************************/ +static OSKextVersionStage __OSKextVersionStageForString(const char ** string_p) { const char * string; if (!string_p || !*string_p) { - return VERS_invalid; + return kOSKextVersionStageInvalid; } string = *string_p; if (__vers_isspace(string[0]) || string[0] == '\0') { - return VERS_release; + return kOSKextVersionStageRelease; } else { switch (string[0]) { case 'd': if (__vers_isdigit(string[1])) { *string_p = &string[1]; - return VERS_development; + return kOSKextVersionStageDevelopment; } break; case 'a': if (__vers_isdigit(string[1])) { *string_p = &string[1]; - return VERS_alpha; + return kOSKextVersionStageAlpha; } break; case 'b': if (__vers_isdigit(string[1])) { *string_p = &string[1]; - return VERS_beta; + return kOSKextVersionStageBeta; } break; case 'f': if (__vers_isdigit(string[1])) { *string_p = &string[1]; - return VERS_candidate; + return kOSKextVersionStageCandidate; } else if (string[1] == 'c' && __vers_isdigit(string[2])) { *string_p = &string[2]; - return VERS_candidate; + return kOSKextVersionStageCandidate; } else { - return VERS_invalid; + return kOSKextVersionStageInvalid; } break; default: - return VERS_invalid; + return kOSKextVersionStageInvalid; break; } } - return VERS_invalid; + return kOSKextVersionStageInvalid; } -static const char * __VERS_string_for_stage(VERS_stage stage) { +/********************************************************************* +*********************************************************************/ +static const char * __OSKextVersionStringForStage(OSKextVersionStage stage) +{ switch (stage) { - case VERS_invalid: return "?"; break; - case VERS_development: return "d"; break; - case VERS_alpha: return "a"; break; - case VERS_beta: return "b"; break; - case VERS_candidate: return "f"; break; - case VERS_release: return ""; break; + case kOSKextVersionStageInvalid: return NULL; break; + case kOSKextVersionStageDevelopment: return "d"; break; + case kOSKextVersionStageAlpha: return "a"; break; + case kOSKextVersionStageBeta: return "b"; break; + case kOSKextVersionStageCandidate: return "f"; break; + case kOSKextVersionStageRelease: return ""; break; } - return "?"; + return NULL; } -PRIV_EXT -VERS_version VERS_parse_string(const char * vers_string) { - VERS_version result = -1; - int vers_digit = -1; - int num_digits_scanned = 0; - VERS_version vers_major = 0; - VERS_version vers_minor = 0; - VERS_version vers_revision = 0; - VERS_version vers_stage = 0; - VERS_version vers_stage_level = 0; - const char * current_char_p; - - if (!vers_string || *vers_string == '\0') { +/********************************************************************* +*********************************************************************/ +OSKextVersion OSKextParseVersionString(const char * versionString) +{ + OSKextVersion result = -1; + int vers_digit = -1; + int num_digits_scanned = 0; + OSKextVersion vers_major = 0; + OSKextVersion vers_minor = 0; + OSKextVersion vers_revision = 0; + OSKextVersion vers_stage = 0; + OSKextVersion vers_stage_level = 0; + const char * current_char_p; + + if (!versionString || *versionString == '\0') { return -1; } - current_char_p = (const char *)&vers_string[0]; + current_char_p = (const char *)&versionString[0]; /***** * Check for an initial digit of the major release number. @@ -195,7 +212,7 @@ VERS_version VERS_parse_string(const char * vers_string) { */ while (num_digits_scanned < VERS_MAJOR_DIGITS) { if (__vers_isspace(*current_char_p) || *current_char_p == '\0') { - vers_stage = VERS_release; + vers_stage = kOSKextVersionStageRelease; goto finish; } else if (__vers_isdigit(*current_char_p)) { vers_digit = __vers_digit_for_char(*current_char_p); @@ -234,7 +251,7 @@ VERS_version VERS_parse_string(const char * vers_string) { */ while (num_digits_scanned < VERS_MINOR_DIGITS) { if (__vers_isspace(*current_char_p) || *current_char_p == '\0') { - vers_stage = VERS_release; + vers_stage = kOSKextVersionStageRelease; goto finish; } else if (__vers_isdigit(*current_char_p)) { vers_digit = __vers_digit_for_char(*current_char_p); @@ -273,7 +290,7 @@ VERS_version VERS_parse_string(const char * vers_string) { */ while (num_digits_scanned < VERS_REVISION_DIGITS) { if (__vers_isspace(*current_char_p) || *current_char_p == '\0') { - vers_stage = VERS_release; + vers_stage = kOSKextVersionStageRelease; goto finish; } else if (__vers_isdigit(*current_char_p)) { vers_digit = __vers_digit_for_char(*current_char_p); @@ -306,11 +323,11 @@ VERS_version VERS_parse_string(const char * vers_string) { * Check for the release state. */ if (__vers_isspace(*current_char_p) || *current_char_p == '\0') { - vers_stage = VERS_release; + vers_stage = kOSKextVersionStageRelease; goto finish; } else { - vers_stage = __VERS_stage_for_string(¤t_char_p); - if (vers_stage == VERS_invalid) { + vers_stage = __OSKextVersionStageForString(¤t_char_p); + if (vers_stage == kOSKextVersionStageInvalid) { return -1; } } @@ -351,13 +368,13 @@ VERS_version VERS_parse_string(const char * vers_string) { return -1; } - if (vers_stage_level > 255) { + if (vers_stage_level > VERS_STAGE_LEVEL_MAX) { return -1; } finish: - if (vers_stage == VERS_candidate && vers_stage_level == 0) { + if (vers_stage == kOSKextVersionStageCandidate && vers_stage_level == 0) { return -1; } @@ -370,76 +387,112 @@ VERS_version VERS_parse_string(const char * vers_string) { return result; } -#define VERS_STRING_MAX_LEN (16) - -PRIV_EXT -int VERS_string(char * buffer, UInt32 length, VERS_version vers) { - int cpos = 0; - VERS_version vers_major = 0; - VERS_version vers_minor = 0; - VERS_version vers_revision = 0; - VERS_version vers_stage = 0; - VERS_version vers_stage_level = 0; - const char * stage_string = NULL; // don't free +/********************************************************************* +*********************************************************************/ +Boolean OSKextVersionGetString( + OSKextVersion aVersion, + char * buffer, + uint32_t bufferLength) +{ + int cpos = 0; + OSKextVersion vers_major = 0; + OSKextVersion vers_minor = 0; + OSKextVersion vers_revision = 0; + OSKextVersion vers_stage = 0; + OSKextVersion vers_stage_level = 0; + const char * stage_string = NULL; // don't free /* No buffer or length less than longest possible vers string, * return 0. */ - if (!buffer || length < VERS_STRING_MAX_LEN) { - return 0; + if (!buffer || bufferLength < kOSKextVersionMaxLength) { + return FALSE; } - bzero(buffer, length * sizeof(char)); + bzero(buffer, bufferLength * sizeof(char)); - if (vers < 0) { - strlcpy(buffer, "(invalid)", length); - return 1; + if (aVersion < 0) { + strlcpy(buffer, "(invalid)", bufferLength); + return TRUE; + } + if (aVersion == 0) { + strlcpy(buffer, "(missing)", bufferLength); + return TRUE; } - vers_major = vers / VERS_MAJOR_MULT; + vers_major = aVersion / VERS_MAJOR_MULT; + if (vers_major > VERS_MAJOR_MAX) { + strlcpy(buffer, "(invalid)", bufferLength); + return TRUE; + } - vers_minor = vers - (vers_major * VERS_MAJOR_MULT); + vers_minor = aVersion - (vers_major * VERS_MAJOR_MULT); vers_minor /= VERS_MINOR_MULT; - vers_revision = vers - + vers_revision = aVersion - ( (vers_major * VERS_MAJOR_MULT) + (vers_minor * VERS_MINOR_MULT) ); vers_revision /= VERS_REVISION_MULT; - vers_stage = vers - + vers_stage = aVersion - ( (vers_major * VERS_MAJOR_MULT) + (vers_minor * VERS_MINOR_MULT) + (vers_revision * VERS_REVISION_MULT)); vers_stage /= VERS_STAGE_MULT; - vers_stage_level = vers - + vers_stage_level = aVersion - ( (vers_major * VERS_MAJOR_MULT) + (vers_minor * VERS_MINOR_MULT) + (vers_revision * VERS_REVISION_MULT) + (vers_stage * VERS_STAGE_MULT)); + if (vers_stage_level > VERS_STAGE_LEVEL_MAX) { + strlcpy(buffer, "(invalid)", bufferLength); + return TRUE; + } - cpos = snprintf(buffer, length, "%lu", (UInt32)vers_major); + cpos = snprintf(buffer, bufferLength, "%u", (uint32_t)vers_major); /* Always include the minor version; it just looks weird without. */ buffer[cpos] = '.'; cpos++; - cpos += snprintf(buffer+cpos, length - cpos, "%lu", (UInt32)vers_minor); + cpos += snprintf(buffer+cpos, bufferLength - cpos, "%u", (uint32_t)vers_minor); /* The revision is displayed only if nonzero. */ if (vers_revision) { buffer[cpos] = '.'; cpos++; - cpos += snprintf(buffer+cpos, length - cpos, "%lu", - (UInt32)vers_revision); + cpos += snprintf(buffer+cpos, bufferLength - cpos, "%u", + (uint32_t)vers_revision); } - stage_string = __VERS_string_for_stage(vers_stage); - if (stage_string && stage_string[0]) { - strlcat(buffer, stage_string, length); + stage_string = __OSKextVersionStringForStage(vers_stage); + if (!stage_string) { + strlcpy(buffer, "(invalid)", bufferLength); + return TRUE; + } + if (stage_string[0]) { + strlcat(buffer, stage_string, bufferLength); cpos += strlen(stage_string); } - if (vers_stage < VERS_release) { - snprintf(buffer+cpos, length - cpos, "%lu", (UInt32)vers_stage_level); + if (vers_stage < kOSKextVersionStageRelease) { + snprintf(buffer+cpos, bufferLength - cpos, "%u", (uint32_t)vers_stage_level); } - return 1; + return TRUE; +} + +/********************************************************************* +*********************************************************************/ +#ifndef KERNEL +OSKextVersion OSKextParseVersionCFString(CFStringRef versionString) +{ + OSKextVersion result = -1; + char versBuffer[kOSKextVersionMaxLength]; + + if (CFStringGetCString(versionString, versBuffer, + sizeof(versBuffer), kCFStringEncodingASCII)) { + + result = OSKextParseVersionString(versBuffer); + } + return result; } +#endif diff --git a/libkern/c++/OSArray.cpp b/libkern/c++/OSArray.cpp index c1b7ff66c..92370e81f 100644 --- a/libkern/c++/OSArray.cpp +++ b/libkern/c++/OSArray.cpp @@ -218,8 +218,9 @@ void OSArray::flushCollection() unsigned int i; haveUpdated(); - for (i = 0; i < count; i++) + for (i = 0; i < count; i++) { array[i]->taggedRelease(OSTypeID(OSCollection)); + } count = 0; } diff --git a/libkern/c++/OSBoolean.cpp b/libkern/c++/OSBoolean.cpp index 732ce17a3..55f4c86b2 100644 --- a/libkern/c++/OSBoolean.cpp +++ b/libkern/c++/OSBoolean.cpp @@ -34,7 +34,7 @@ #define super OSObject -OSDefineMetaClassAndStructors(OSBoolean, OSObject) +OSDefineMetaClassAndStructorsWithInit(OSBoolean, OSObject, OSBoolean::initialize()) OSMetaClassDefineReservedUnused(OSBoolean, 0); OSMetaClassDefineReservedUnused(OSBoolean, 1); OSMetaClassDefineReservedUnused(OSBoolean, 2); @@ -82,8 +82,8 @@ void OSBoolean::free() assert(false); } -void OSBoolean::taggedRetain(const void *tag) const { } -void OSBoolean::taggedRelease(const void *tag, const int when) const { } +void OSBoolean::taggedRetain(__unused const void *tag) const { } +void OSBoolean::taggedRelease(__unused const void *tag, __unused const int when) const { } OSBoolean *OSBoolean::withBoolean(bool inValue) { diff --git a/libkern/c++/OSCollection.cpp b/libkern/c++/OSCollection.cpp index dc27d1d87..4da177f16 100644 --- a/libkern/c++/OSCollection.cpp +++ b/libkern/c++/OSCollection.cpp @@ -60,9 +60,21 @@ bool OSCollection::init() void OSCollection::haveUpdated() { - if ( (gIOKitDebug & kOSLogRegistryMods) && (fOptions & kImmutable) ) - OSReportWithBacktrace("Trying to change a collection in the registry"); - + if (fOptions & kImmutable) + { +#if __LP64__ + if (!(gIOKitDebug & kOSRegistryModsMode)) +#else + if (gIOKitDebug & kOSRegistryModsMode) +#endif + { + panic("Trying to change a collection in the registry"); + } + else + { + OSReportWithBacktrace("Trying to change a collection in the registry"); + } + } updateStamp++; } diff --git a/libkern/c++/OSData.cpp b/libkern/c++/OSData.cpp index 47def143b..b7cd6852c 100644 --- a/libkern/c++/OSData.cpp +++ b/libkern/c++/OSData.cpp @@ -38,7 +38,7 @@ #define super OSObject OSDefineMetaClassAndStructors(OSData, OSObject) -OSMetaClassDefineReservedUnused(OSData, 0); +OSMetaClassDefineReservedUsed(OSData, 0); // setDeallocFunction OSMetaClassDefineReservedUnused(OSData, 1); OSMetaClassDefineReservedUnused(OSData, 2); OSMetaClassDefineReservedUnused(OSData, 3); @@ -190,7 +190,12 @@ void OSData::free() if (capacity != EXTERNAL && data && capacity) { kfree(data, capacity); ACCUMSIZE( -capacity ); - } + } else if (capacity == EXTERNAL) { + DeallocFunction freemem = (DeallocFunction)reserved; + if (freemem && data && length) { + freemem(data, length); + } + } super::free(); } @@ -207,6 +212,8 @@ unsigned int OSData::setCapacityIncrement(unsigned increment) return capacityIncrement = increment; } +// xx-review: does not check for capacity == EXTERNAL + unsigned int OSData::ensureCapacity(unsigned int newCapacity) { unsigned char * newData; @@ -321,11 +328,11 @@ bool OSData::isEqualTo(const void *someData, unsigned int inLength) const bool OSData::isEqualTo(const OSMetaClassBase *obj) const { - OSData * data; + OSData * otherData; OSString * str; - if ((data = OSDynamicCast(OSData, obj))) - return isEqualTo(data); + if ((otherData = OSDynamicCast(OSData, obj))) + return isEqualTo(otherData); else if ((str = OSDynamicCast (OSString, obj))) return isEqualTo(str); else @@ -423,3 +430,12 @@ bool OSData::serialize(OSSerialize *s) const return s->addXMLEndTag("data"); } + +/* Note I am just using the reserved pointer here instead of allocating a whole buffer + * to hold one pointer. + */ +void OSData::setDeallocFunction(DeallocFunction func) +{ + reserved = (ExpansionData *)func; + return; +} diff --git a/libkern/c++/OSDictionary.cpp b/libkern/c++/OSDictionary.cpp index d09743c1d..eaa1483df 100644 --- a/libkern/c++/OSDictionary.cpp +++ b/libkern/c++/OSDictionary.cpp @@ -87,7 +87,7 @@ bool OSDictionary::initWithObjects(const OSObject *objects[], unsigned int theCount, unsigned int theCapacity) { - unsigned int capacity = theCount; + unsigned int newCapacity = theCount; if (!objects || !keys) return false; @@ -96,10 +96,10 @@ bool OSDictionary::initWithObjects(const OSObject *objects[], if (theCount > theCapacity) return false; - capacity = theCapacity; + newCapacity = theCapacity; } - if (!initWithCapacity(capacity)) + if (!initWithCapacity(newCapacity)) return false; for (unsigned int i = 0; i < theCount; i++) { @@ -117,7 +117,7 @@ bool OSDictionary::initWithObjects(const OSObject *objects[], unsigned int theCount, unsigned int theCapacity) { - unsigned int capacity = theCount; + unsigned int newCapacity = theCount; if (!objects || !keys) return false; @@ -126,10 +126,10 @@ bool OSDictionary::initWithObjects(const OSObject *objects[], if (theCount > theCapacity) return false; - capacity = theCapacity; + newCapacity = theCapacity; } - if (!initWithCapacity(capacity)) + if (!initWithCapacity(newCapacity)) return false; for (unsigned int i = 0; i < theCount; i++) { @@ -153,21 +153,21 @@ bool OSDictionary::initWithObjects(const OSObject *objects[], bool OSDictionary::initWithDictionary(const OSDictionary *dict, unsigned int theCapacity) { - unsigned int capacity; + unsigned int newCapacity; if ( !dict ) return false; - capacity = dict->count; + newCapacity = dict->count; if ( theCapacity ) { if ( dict->count > theCapacity ) return false; - capacity = theCapacity; + newCapacity = theCapacity; } - if (!initWithCapacity(capacity)) + if (!initWithCapacity(newCapacity)) return false; count = dict->count; @@ -371,7 +371,7 @@ bool OSDictionary::merge(const OSDictionary *srcDict) if ( !OSDynamicCast(OSDictionary, srcDict) ) return false; - iter = OSCollectionIterator::withCollection((OSDictionary *)srcDict); + iter = OSCollectionIterator::withCollection(const_cast(srcDict)); if ( !iter ) return false; @@ -397,7 +397,7 @@ OSObject *OSDictionary::getObject(const OSSymbol *aKey) const // if the key exists, remove the object for (unsigned int i = 0; i < count; i++) if (aKey == dictionary[i].key) - return (OSObject *) dictionary[i].value; + return (const_cast ((const OSObject *)dictionary[i].value)); return 0; } diff --git a/libkern/c++/OSKext.cpp b/libkern/c++/OSKext.cpp new file mode 100644 index 000000000..e4c15bfc2 --- /dev/null +++ b/libkern/c++/OSKext.cpp @@ -0,0 +1,9200 @@ +/* + * Copyright (c) 2008-2009 Apple Inc. All rights reserved. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. The rights granted to you under the License + * may not be used to create, or enable the creation or redistribution of, + * unlawful or unlicensed copies of an Apple operating system, or to + * circumvent, violate, or enable the circumvention or violation of, any + * terms of an Apple operating system software license agreement. + * + * Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ + */ + +extern "C" { +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +}; + +#include +#include +#include + +#include +#include +#include +#include + +#if PRAGMA_MARK +#pragma mark External & Internal Function Protos +#endif +/********************************************************************* +*********************************************************************/ +extern "C" { +// in libkern/OSKextLib.cpp, not in header for a reason. +extern kern_return_t OSKextPingKextd(void); + +extern int IODTGetLoaderInfo(const char * key, void ** infoAddr, int * infoSize); +extern void IODTFreeLoaderInfo(const char * key, void * infoAddr, int infoSize); +extern void OSRuntimeUnloadCPPForSegment(kernel_segment_command_t * segment); +extern void OSRuntimeUnloadCPP(kmod_info_t * ki, void * data); + +extern ppnum_t pmap_find_phys(pmap_t pmap, addr64_t va); /* osfmk/machine/pmap.h */ +} + +static OSReturn _OSKextCreateRequest( + const char * predicate, + OSDictionary ** requestP); +static OSString * _OSKextGetRequestPredicate(OSDictionary * requestDict); +static OSObject * _OSKextGetRequestArgument( + OSDictionary * requestDict, + const char * argName); +static bool _OSKextSetRequestArgument( + OSDictionary * requestDict, + const char * argName, + OSObject * value); +static void * _OSKextExtractPointer(OSData * wrapper); +static OSReturn _OSDictionarySetCStringValue( + OSDictionary * dict, + const char * key, + const char * value); +#if CONFIG_MACF_KEXT +static void * MACFCopyModuleDataForKext( + OSKext * theKext, + mach_msg_type_number_t * datalen); +#endif /* CONFIG_MACF_KEXT */ + +#if PRAGMA_MARK +#pragma mark Constants & Macros +#endif +/********************************************************************* +* Constants & Macros +*********************************************************************/ + +/* A typical Snow Leopard system has a bit under 120 kexts loaded. + * Use this number to create containers. + */ +#define kOSKextTypicalLoadCount (120) + +/* Any kext will have at least 1 retain for the internal lookup-by-ID dict. + * A loaded kext will no dependents or external retains will have 2 retains. + */ +#define kOSKextMinRetainCount (1) +#define kOSKextMinLoadedRetainCount (2) + +/********** + * Strings and substrings used in dependency resolution. + */ +#define APPLE_KEXT_PREFIX "com.apple." +#define KERNEL_LIB "com.apple.kernel" + +#define PRIVATE_KPI "com.apple.kpi.private" + +/* Version for compatbility pseudokexts (com.apple.kernel.*), + * compatible back to v6.0. + */ +#define KERNEL6_LIB "com.apple.kernel.6.0" +#define KERNEL6_VERSION "7.9.9" + +#define KERNEL_LIB_PREFIX "com.apple.kernel." +#define KPI_LIB_PREFIX "com.apple.kpi." + +#define STRING_HAS_PREFIX(s, p) (strncmp((s), (p), strlen(p)) == 0) + +/********************************************************************* +* infoDict keys for internally-stored data. Saves on ivar slots for +* objects we don't keep around past boot time or during active load. +*********************************************************************/ + +/* A usable, uncompressed file is stored under this key. + */ +#define _kOSKextExecutableKey "_OSKextExecutable" + +/* An indirect reference to the executable file from an mkext + * is stored under this key. + */ +#define _kOSKextMkextExecutableReferenceKey "_OSKextMkextExecutableReference" + +/* If the file is contained in a larger buffer laid down by the booter or + * sent from user space, the OSKext stores that OSData under this key so that + * references are properly tracked. This is always an mkext, right now. + */ +#define _kOSKextExecutableExternalDataKey "_OSKextExecutableExternalData" + +#if PRAGMA_MARK +#pragma mark Typedefs +#endif +/********************************************************************* +* Typedefs +*********************************************************************/ + +/********************************************************************* +* MkextEntryRef describes the contents of an OSData object +* referencing a file entry from an mkext so that we can uncompress +* (if necessary) and extract it on demand. +* +* It contains the mkextVersion in case we ever wind up supporting +* multiple mkext formats. Mkext format 1 is officially retired as of +* Snow Leopard. +*********************************************************************/ +typedef struct MkextEntryRef { + mkext_basic_header * mkext; // beginning of whole mkext file + void * fileinfo; // mkext2_file_entry or equiv; see mkext.h +} MkextEntryRef; + +#if PRAGMA_MARK +#pragma mark Global and static Module Variables +#endif +/********************************************************************* +* Global & static variables, used to keep track of kexts. +*********************************************************************/ + +static bool sPrelinkBoot = false; +static bool sSafeBoot = false; + +/****** +* sKextLock is the principal lock for OSKext. Below, there is also an +* sKextInnerLock used to guard access to data accessed on in-calls from +* IOService. This 2nd lock is required to prevent a deadlock +* with IOService calling back into OSKext::considerUnloads() +* on a separate thread during a kext load operation. +*/ +static IORecursiveLock * sKextLock = NULL; + +static OSDictionary * sKextsByID = NULL; +static OSArray * sLoadedKexts = NULL; + +static OSArray * sPrelinkedPersonalities = NULL; + +// Requests to kextd waiting to be picked up. +static OSArray * sKernelRequests = NULL; +// Identifier of kext load requests in sKernelRequests +static OSSet * sPostedKextLoadIdentifiers = NULL; +static OSArray * sRequestCallbackRecords = NULL; + +// Identifiers of all kexts ever requested in kernel; used for prelinked kernel +static OSSet * sAllKextLoadIdentifiers = NULL; +static KXLDContext * sKxldContext = NULL; +static uint32_t sNextLoadTag = 0; +static uint32_t sNextRequestTag = 0; + +static bool sUserLoadsActive = false; +static bool sKextdActive = false; +static bool sDeferredLoadSucceeded = false; +static bool sConsiderUnloadsExecuted = false; + +static bool sKernelRequestsEnabled = true; +static bool sLoadEnabled = true; +static bool sUnloadEnabled = true; + +/********************************************************************* +* Stuff for the OSKext representing the kernel itself. +**********/ +static OSKext * sKernelKext = NULL; + +/* Set up a fake kmod_info struct for the kernel. + * It's used in OSRuntime.cpp to call OSRuntimeInitializeCPP() + * before OSKext is initialized; that call only needs the name + * and address to be set correctly. + * + * We don't do much else with the kerne's kmod_info; we never + * put it into the kmod list, never adjust the reference count, + * and never have kernel components reference it. + * For that matter, we don't do much with kmod_info structs + * at all anymore! We just keep them filled in for gdb and + * binary compability. + */ +kmod_info_t g_kernel_kmod_info = { + /* next */ 0, + /* info_version */ KMOD_INFO_VERSION, + /* id */ 0, // loadTag: kernel is always 0 + /* name */ kOSKextKernelIdentifier, // bundle identifier + /* version */ "0", // filled in in OSKext::initialize() + /* reference_count */ -1, // never adjusted; kernel never unloads + /* reference_list */ NULL, + /* address */ (vm_address_t)&_mh_execute_header, + /* size */ 0, // filled in in OSKext::initialize() + /* hdr_size */ 0, + /* start */ 0, + /* stop */ 0 +}; + +extern "C" { +// symbol 'kmod' referenced in: model_dep.c, db_trace.c, symbols.c, db_low_trace.c, +// dtrace.c, dtrace_glue.h, OSKext.cpp, locore.s, lowmem_vectors.s, +// misc_protos.h, db_low_trace.c, kgmacros +// 'kmod' is a holdover from the old kmod system, we can't rename it. +kmod_info_t * kmod = NULL; + +#define KEXT_PANICLIST_SIZE (2 * PAGE_SIZE) + +static char * unloaded_kext_paniclist = NULL; +static uint32_t unloaded_kext_paniclist_size = 0; +static uint32_t unloaded_kext_paniclist_length = 0; +AbsoluteTime last_loaded_timestamp; + +static char * loaded_kext_paniclist = NULL; +static uint32_t loaded_kext_paniclist_size = 0; +static uint32_t loaded_kext_paniclist_length = 0; +AbsoluteTime last_unloaded_timestamp; +static void * last_unloaded_address = NULL; +#if __LP64__ +static uint64_t last_unloaded_size = 0; +#else +static uint32_t last_unloaded_size = 0; +#endif /* __LP64__ */ + +}; + +/********************************************************************* +* Because we can start IOService matching from OSKext (via IOCatalogue) +* and IOService can call into OSKext, there is potential for cross-lock +* contention, so OSKext needs two locks. The regular sKextLock above +* guards most OSKext class/static variables, and sKextInnerLock guards +* variables that can be accessed on in-calls from IOService, currently: +* +* * OSKext::considerUnloads() +* +* Note that sConsiderUnloadsExecuted above belongs to sKextLock! +* +* When both sKextLock and sKextInnerLock need to be taken, +* always lock sKextLock first and unlock it second. Never take both +* locks in an entry point to OSKext; if you need to do so, you must +* spawn an independent thread to avoid potential deadlocks for threads +* calling into OSKext. +* +* All static variables from here to the closing comment block fall +* under sKextInnerLock. +**********/ +static IORecursiveLock * sKextInnerLock = NULL; + +static bool sAutounloadEnabled = true; +static bool sConsiderUnloadsCalled = false; +static bool sConsiderUnloadsPending = false; + +static unsigned int sConsiderUnloadDelay = 60; // seconds +static thread_call_t sUnloadCallout = 0; +static thread_call_t sDestroyLinkContextThread = 0; // one-shot, one-at-a-time thread +static bool sSystemSleep = false; // true when system going to sleep + +static const OSKextLogSpec kDefaultKernelLogFilter = kOSKextLogBasicLevel | + kOSKextLogVerboseFlagsMask; +static OSKextLogSpec sKernelLogFilter = kDefaultKernelLogFilter; +static bool sBootArgLogFilterFound = false; +SYSCTL_INT(_debug, OID_AUTO, kextlog, CTLFLAG_RW, &sKernelLogFilter, + sKernelLogFilter, "kernel kext logging"); + +static OSKextLogSpec sUserSpaceKextLogFilter = kOSKextLogSilentFilter; +static OSArray * sUserSpaceLogSpecArray = NULL; +static OSArray * sUserSpaceLogMessageArray = NULL; + +/********* +* End scope for sKextInnerLock-protected variables. +*********************************************************************/ + +#if PRAGMA_MARK +#pragma mark OSData callbacks (need to move to OSData) +#endif +/********************************************************************* +* C functions used for callbacks. +*********************************************************************/ +extern "C" { +void osdata_kmem_free(void * ptr, unsigned int length) { + kmem_free(kernel_map, (vm_address_t)ptr, length); + return; +} + +void osdata_phys_free(void * ptr, unsigned int length) { + ml_static_mfree((vm_offset_t)ptr, length); + return; +} + +void osdata_vm_deallocate(void * ptr, unsigned int length) +{ + (void)vm_deallocate(kernel_map, (vm_offset_t)ptr, length); + return; +} +}; + +#if PRAGMA_MARK +#pragma mark KXLD Allocation Callback +#endif +/********************************************************************* +* KXLD Allocation Callback +*********************************************************************/ +kxld_addr_t +kern_allocate( + u_long size, + KXLDAllocateFlags * flags, + void * user_data) +{ + vm_address_t result = 0; // returned + kern_return_t mach_result = KERN_FAILURE; + bool success = false; + OSKext * theKext = (OSKext *)user_data; + u_long roundSize = round_page(size); + OSData * linkBuffer = NULL; // must release + + mach_result = kext_alloc(&result, roundSize, /* fixed */ FALSE); + if (mach_result != KERN_SUCCESS) { + OSKextLog(theKext, + kOSKextLogErrorLevel | + kOSKextLogGeneralFlag, + "Can't allocate kernel memory to link %s.", + theKext->getIdentifierCString()); + goto finish; + } + + /* Create an OSData wrapper for the allocated buffer. + * Note that we do not set a dealloc function on it here. + * We have to call vm_map_unwire() on it in OSKext::unload() + * and an OSData dealloc function can't take all those parameters. + */ + linkBuffer = OSData::withBytesNoCopy((void *)result, roundSize); + if (!linkBuffer) { + OSKextLog(theKext, + kOSKextLogErrorLevel | + kOSKextLogGeneralFlag, + "Can't allocate linked executable wrapper for %s.", + theKext->getIdentifierCString()); + goto finish; + } + + OSKextLog(theKext, + kOSKextLogProgressLevel | + kOSKextLogLoadFlag | kOSKextLogLinkFlag, + "Allocated link buffer for kext %s at %p (%lu bytes).", + theKext->getIdentifierCString(), + (void *)result, (unsigned long)roundSize); + + theKext->setLinkedExecutable(linkBuffer); + + *flags = kKxldAllocateWritable; + success = true; + +finish: + if (!success && result) { + kext_free(result, roundSize); + result = 0; + } + + OSSafeRelease(linkBuffer); + + return (kxld_addr_t)result; +} + +/********************************************************************* +*********************************************************************/ +void +kxld_log_callback( + KXLDLogSubsystem subsystem, + KXLDLogLevel level, + const char * format, + va_list argList, + void * user_data) +{ + OSKext *theKext = (OSKext *) user_data; + OSKextLogSpec logSpec = 0; + + switch (subsystem) { + case kKxldLogLinking: + logSpec |= kOSKextLogLinkFlag; + break; + case kKxldLogPatching: + logSpec |= kOSKextLogPatchFlag; + break; + } + + switch (level) { + case kKxldLogExplicit: + logSpec |= kOSKextLogExplicitLevel; + break; + case kKxldLogErr: + logSpec |= kOSKextLogErrorLevel; + break; + case kKxldLogWarn: + logSpec |= kOSKextLogWarningLevel; + break; + case kKxldLogBasic: + logSpec |= kOSKextLogProgressLevel; + break; + case kKxldLogDetail: + logSpec |= kOSKextLogDetailLevel; + break; + case kKxldLogDebug: + logSpec |= kOSKextLogDebugLevel; + break; + } + + OSKextVLog(theKext, logSpec, format, argList); +} + +#if PRAGMA_MARK +#pragma mark Module Config (Startup & Shutdown) +#endif +/********************************************************************* +* Module Config (Class Definition & Class Methods) +*********************************************************************/ +#define super OSObject +OSDefineMetaClassAndStructors(OSKext, OSObject) + +/********************************************************************* +*********************************************************************/ +/* static */ +void +OSKext::initialize(void) +{ + OSData * kernelExecutable = NULL; // do not release + u_char * kernelStart = NULL; // do not free + size_t kernelLength = 0; + OSString * scratchString = NULL; // must release + IORegistryEntry * registryRoot = NULL; // do not release + OSNumber * kernelCPUType = NULL; // must release + OSNumber * kernelCPUSubtype = NULL; // must release + OSKextLogSpec bootLogFilter = kOSKextLogSilentFilter; + bool setResult = false; + uint64_t * timestamp = 0; + char bootArgBuffer[16]; // for PE_parse_boot_argn w/strings + + /* This must be the first thing allocated. Everything else grabs this lock. + */ + sKextLock = IORecursiveLockAlloc(); + sKextInnerLock = IORecursiveLockAlloc(); + assert(sKextLock); + assert(sKextInnerLock); + + sKextsByID = OSDictionary::withCapacity(kOSKextTypicalLoadCount); + sLoadedKexts = OSArray::withCapacity(kOSKextTypicalLoadCount); + sKernelRequests = OSArray::withCapacity(0); + sPostedKextLoadIdentifiers = OSSet::withCapacity(0); + sAllKextLoadIdentifiers = OSSet::withCapacity(kOSKextTypicalLoadCount); + sRequestCallbackRecords = OSArray::withCapacity(0); + assert(sKextsByID && sLoadedKexts && sKernelRequests && + sPostedKextLoadIdentifiers && sAllKextLoadIdentifiers && + sRequestCallbackRecords); + + /* Read the log flag boot-args and set the log flags. + */ + if (PE_parse_boot_argn("kextlog", &bootLogFilter, sizeof("kextlog=0x00000000 "))) { + sBootArgLogFilterFound = true; + sKernelLogFilter = bootLogFilter; + // log this if any flags are set + OSKextLog(/* kext */ NULL, + kOSKextLogBasicLevel | + kOSKextLogFlagsMask, + "Kernel kext log filter 0x%x per kextlog boot arg.", + (unsigned)sKernelLogFilter); + } + + sSafeBoot = PE_parse_boot_argn("-x", bootArgBuffer, + sizeof(bootArgBuffer)) ? true : false; + + if (sSafeBoot) { + OSKextLog(/* kext */ NULL, + kOSKextLogWarningLevel | + kOSKextLogGeneralFlag, + "SAFE BOOT DETECTED - " + "only valid OSBundleRequired kexts will be loaded."); + } + + /* Set up an OSKext instance to represent the kernel itself. + */ + sKernelKext = new OSKext; + assert(sKernelKext); + + kernelStart = (u_char *)&_mh_execute_header; + kernelLength = getlastaddr() - (vm_offset_t)kernelStart; + kernelExecutable = OSData::withBytesNoCopy( + kernelStart, kernelLength); + assert(kernelExecutable); + + sKernelKext->loadTag = sNextLoadTag++; // the kernel is load tag 0 + sKernelKext->bundleID = OSSymbol::withCString(kOSKextKernelIdentifier); + + sKernelKext->version = OSKextParseVersionString(osrelease); + sKernelKext->compatibleVersion = sKernelKext->version; + sKernelKext->linkedExecutable = kernelExecutable; + // linkState will be set first time we do a link + + sKernelKext->flags.hasAllDependencies = 1; + sKernelKext->flags.kernelComponent = 1; + sKernelKext->flags.prelinked = 0; + sKernelKext->flags.loaded = 1; + sKernelKext->flags.started = 1; + sKernelKext->flags.CPPInitialized = 0; + + sKernelKext->kmod_info = &g_kernel_kmod_info; + strlcpy(g_kernel_kmod_info.version, osrelease, + sizeof(g_kernel_kmod_info.version)); + g_kernel_kmod_info.size = kernelLength; + g_kernel_kmod_info.id = sKernelKext->loadTag; + + /* Cons up an info dict, so we don't have to have special-case + * checking all over. + */ + sKernelKext->infoDict = OSDictionary::withCapacity(5); + assert(sKernelKext->infoDict); + setResult = sKernelKext->infoDict->setObject(kCFBundleIdentifierKey, + sKernelKext->bundleID); + assert(setResult); + setResult = sKernelKext->infoDict->setObject(kOSKernelResourceKey, + kOSBooleanTrue); + assert(setResult); + + scratchString = OSString::withCStringNoCopy(osrelease); + assert(scratchString); + setResult = sKernelKext->infoDict->setObject(kCFBundleVersionKey, + scratchString); + assert(setResult); + OSSafeReleaseNULL(scratchString); + + scratchString = OSString::withCStringNoCopy("mach_kernel"); + assert(scratchString); + setResult = sKernelKext->infoDict->setObject(kCFBundleNameKey, + scratchString); + assert(setResult); + OSSafeReleaseNULL(scratchString); + + /* Add the kernel kext to the bookkeeping dictionaries. Note that + * the kernel kext doesn't have a kmod_info struct. copyInfo() + * gathers info from other places anyhow. + */ + setResult = sKextsByID->setObject(sKernelKext->bundleID, sKernelKext); + assert(setResult); + setResult = sLoadedKexts->setObject(sKernelKext); + assert(setResult); + sKernelKext->release(); + + registryRoot = IORegistryEntry::getRegistryRoot(); + kernelCPUType = OSNumber::withNumber( + (long long unsigned int)_mh_execute_header.cputype, + 8 * sizeof(_mh_execute_header.cputype)); + kernelCPUSubtype = OSNumber::withNumber( + (long long unsigned int)_mh_execute_header.cpusubtype, + 8 * sizeof(_mh_execute_header.cpusubtype)); + assert(registryRoot && kernelCPUSubtype && kernelCPUType); + + registryRoot->setProperty(kOSKernelCPUTypeKey, kernelCPUType); + registryRoot->setProperty(kOSKernelCPUSubtypeKey, kernelCPUSubtype); + + OSSafeRelease(kernelCPUType); + OSSafeRelease(kernelCPUSubtype); + + timestamp = __OSAbsoluteTimePtr(&last_loaded_timestamp); + *timestamp = 0; + timestamp = __OSAbsoluteTimePtr(&last_unloaded_timestamp); + *timestamp = 0; + + OSKextLog(/* kext */ NULL, + kOSKextLogProgressLevel | + kOSKextLogGeneralFlag, + "Kext system initialized."); + + return; +} + +/********************************************************************* +* This could be in OSKextLib.cpp but we need to hold a lock +* while removing all the segments and sKextLock will do. +*********************************************************************/ +/* static */ +OSReturn +OSKext::removeKextBootstrap(void) +{ + OSReturn result = kOSReturnError; + + static bool alreadyDone = false; + boolean_t keepsyms = FALSE; + + const char * dt_kernel_header_name = "Kernel-__HEADER"; + const char * dt_kernel_symtab_name = "Kernel-__SYMTAB"; + kernel_mach_header_t * dt_mach_header = NULL; + int dt_mach_header_size = 0; + struct symtab_command * dt_symtab = NULL; + int dt_symtab_size = 0; + int dt_result = 0; + + kernel_segment_command_t * seg_to_remove = NULL; +#if __ppc__ || __arm__ + const char * dt_segment_name = NULL; + void * segment_paddress = NULL; + int segment_size = 0; +#endif + + /* This must be the very first thing done by this function. + */ + IORecursiveLockLock(sKextLock); + + /* If we already did this, it's a success. + */ + if (alreadyDone) { + result = kOSReturnSuccess; + goto finish; + } + + OSKextLog(/* kext */ NULL, + kOSKextLogProgressLevel | + kOSKextLogGeneralFlag, + "Jettisoning kext bootstrap segments."); + + PE_parse_boot_argn("keepsyms", &keepsyms, sizeof(keepsyms)); + + /***** + * Dispose of unnecessary stuff that the booter didn't need to load. + */ + dt_result = IODTGetLoaderInfo(dt_kernel_header_name, + (void **)&dt_mach_header, &dt_mach_header_size); + if (dt_result == 0 && dt_mach_header) { + IODTFreeLoaderInfo(dt_kernel_header_name, (void *)dt_mach_header, + round_page_32(dt_mach_header_size)); + } + dt_result = IODTGetLoaderInfo(dt_kernel_symtab_name, + (void **)&dt_symtab, &dt_symtab_size); + if (dt_result == 0 && dt_symtab) { + IODTFreeLoaderInfo(dt_kernel_symtab_name, (void *)dt_symtab, + round_page_32(dt_symtab_size)); + } + + /***** + * KLD bootstrap segment. + */ + // xxx - should rename KLD segment + seg_to_remove = getsegbyname("__KLD"); + if (seg_to_remove) { + OSRuntimeUnloadCPPForSegment(seg_to_remove); + } + +#if __ppc__ || __arm__ + /* Free the memory that was set up by bootx. + */ + dt_segment_name = "Kernel-__KLD"; + if (0 == IODTGetLoaderInfo(dt_segment_name, &segment_paddress, &segment_size)) { + IODTFreeLoaderInfo(dt_segment_name, (void *)segment_paddress, + (int)segment_size); + } +#elif __i386__ || __x86_64__ + /* On x86, use the mapping data from the segment load command to + * unload KLD directly. + * This may invalidate any assumptions about "avail_start" + * defining the lower bound for valid physical addresses. + */ + if (seg_to_remove && seg_to_remove->vmaddr && seg_to_remove->vmsize) { + ml_static_mfree(seg_to_remove->vmaddr, seg_to_remove->vmsize); + } +#else +#error arch +#endif + + seg_to_remove = NULL; + + /***** + * Prelinked kernel's symtab (if there is one). + */ + kernel_section_t * sect; + sect = getsectbyname("__PRELINK", "__symtab"); + if (sect && sect->addr && sect->size) { + ml_static_mfree(sect->addr, sect->size); + } + + /***** + * Dump the LINKEDIT segment, unless keepsyms is set. + */ + if (!keepsyms) { + seg_to_remove = (kernel_segment_command_t *)getsegbyname("__LINKEDIT"); + if (seg_to_remove) { + OSRuntimeUnloadCPPForSegment(seg_to_remove); + } + +#if __ppc__ || __arm__ + dt_segment_name = "Kernel-__LINKEDIT"; + if (0 == IODTGetLoaderInfo(dt_segment_name, + &segment_paddress, &segment_size)) { + + IODTFreeLoaderInfo(dt_segment_name, (void *)segment_paddress, + (int)segment_size); + } +#elif __i386__ || __x86_64__ + if (seg_to_remove && seg_to_remove->vmaddr && seg_to_remove->vmsize) { + ml_static_mfree(seg_to_remove->vmaddr, seg_to_remove->vmsize); + } +#else +#error arch +#endif + } else { + OSKextLog(/* kext */ NULL, + kOSKextLogBasicLevel | + kOSKextLogGeneralFlag, + "keepsyms boot arg specified; keeping linkedit segment for symbols."); + } + + seg_to_remove = NULL; + + alreadyDone = true; + result = kOSReturnSuccess; + +finish: + + /* This must be the very last thing done before returning. + */ + IORecursiveLockUnlock(sKextLock); + + return result; +} + +/********************************************************************* +*********************************************************************/ +void +OSKext::flushNonloadedKexts( + Boolean flushPrelinkedKexts) +{ + OSSet * prelinkedKexts = NULL; // must release + OSCollectionIterator * kextIterator = NULL; // must release + OSCollectionIterator * prelinkIterator = NULL; // must release + const OSSymbol * thisID = NULL; // do not release + OSKext * thisKext = NULL; // do not release + uint32_t count, i; + + IORecursiveLockLock(sKextLock); + + OSKextLog(/* kext */ NULL, + kOSKextLogProgressLevel | + kOSKextLogKextBookkeepingFlag, + "Flushing nonloaded kexts and other unused data."); + + OSKext::considerDestroyingLinkContext(); + + /* If we aren't flushing unused prelinked kexts, we have to put them + * aside while we flush everything else so make a container for them. + */ + if (!flushPrelinkedKexts) { + prelinkedKexts = OSSet::withCapacity(0); + if (!prelinkedKexts) { + goto finish; + } + } + + /* Set aside prelinked kexts (in-use or not) and break + * any lingering inter-kext references for nonloaded kexts + * so they have min. retain counts. + */ + kextIterator = OSCollectionIterator::withCollection(sKextsByID); + if (!kextIterator) { + goto finish; + } + + while ((thisID = OSDynamicCast(OSSymbol, + kextIterator->getNextObject()))) { + + thisKext = OSDynamicCast(OSKext, sKextsByID->getObject(thisID)); + + if (thisKext) { + if (prelinkedKexts && thisKext->isPrelinked()) { + prelinkedKexts->setObject(thisKext); + } + thisKext->flushDependencies(/* forceIfLoaded */ false); + } + } + + /* Dump all the kexts in the ID dictionary; we'll repopulate it shortly. + */ + sKextsByID->flushCollection(); + + /* Now put the loaded kexts back into the ID dictionary. + */ + count = sLoadedKexts->getCount(); + for (i = 0; i < count; i++) { + thisKext = OSDynamicCast(OSKext, sLoadedKexts->getObject(i)); + sKextsByID->setObject(thisKext->getIdentifierCString(), thisKext); + } + + /* Finally, put back the prelinked kexts if we saved any. + */ + if (prelinkedKexts) { + prelinkIterator = OSCollectionIterator::withCollection(prelinkedKexts); + if (!prelinkIterator) { + goto finish; + } + + while ((thisKext = OSDynamicCast(OSKext, + prelinkIterator->getNextObject()))) { + + sKextsByID->setObject(thisKext->getIdentifierCString(), + thisKext); + } + } + +finish: + IORecursiveLockUnlock(sKextLock); + + OSSafeRelease(prelinkedKexts); + OSSafeRelease(kextIterator); + OSSafeRelease(prelinkIterator); + + return; +} + +/********************************************************************* +*********************************************************************/ +/* static */ +void +OSKext::setKextdActive(Boolean active) +{ + IORecursiveLockLock(sKextLock); + sKextdActive = active; + if (sPrelinkedPersonalities) { + gIOCatalogue->removePersonalities(sPrelinkedPersonalities); + OSSafeReleaseNULL(sPrelinkedPersonalities); + } + IORecursiveLockUnlock(sKextLock); + + return; +} + +/********************************************************************* +*********************************************************************/ +/* static */ +void +OSKext::setDeferredLoadSucceeded(Boolean succeeded) +{ + IORecursiveLockLock(sKextLock); + sDeferredLoadSucceeded = succeeded; + IORecursiveLockUnlock(sKextLock); + + return; +} + +/********************************************************************* +* Called from IOSystemShutdownNotification. +*********************************************************************/ +/* static */ +void +OSKext::willShutdown(void) +{ + OSReturn checkResult = kOSReturnError; + OSDictionary * exitRequest = NULL; // must release + + IORecursiveLockLock(sKextLock); + + OSKext::setLoadEnabled(false); + OSKext::setUnloadEnabled(false); + OSKext::setAutounloadsEnabled(false); + OSKext::setKernelRequestsEnabled(false); + + OSKextLog(/* kext */ NULL, + kOSKextLogProgressLevel | + kOSKextLogGeneralFlag, + "System shutdown; requesting immediate kextd exit."); + + checkResult = _OSKextCreateRequest(kKextRequestPredicateRequestKextdExit, + &exitRequest); + if (checkResult != kOSReturnSuccess) { + goto finish; + } + if (!sKernelRequests->setObject(exitRequest)) { + goto finish; + } + + OSKextPingKextd(); + +finish: + IORecursiveLockUnlock(sKextLock); + + OSSafeRelease(exitRequest); + return; +} + +/********************************************************************* +*********************************************************************/ +/* static */ +bool +OSKext::getLoadEnabled(void) +{ + bool result; + + IORecursiveLockLock(sKextLock); + result = sLoadEnabled; + IORecursiveLockUnlock(sKextLock); + return result; +} + +/********************************************************************* +*********************************************************************/ +/* static */ +bool +OSKext::setLoadEnabled(bool flag) +{ + bool result; + + IORecursiveLockLock(sKextLock); + result = sLoadEnabled; + sLoadEnabled = (flag ? true : false); + + if (sLoadEnabled != result) { + OSKextLog(/* kext */ NULL, + kOSKextLogBasicLevel | + kOSKextLogLoadFlag, + "Kext loading now %sabled.", sLoadEnabled ? "en" : "dis"); + } + + IORecursiveLockUnlock(sKextLock); + + return result; +} + +/********************************************************************* +*********************************************************************/ +/* static */ +bool +OSKext::getUnloadEnabled(void) +{ + bool result; + + IORecursiveLockLock(sKextLock); + result = sUnloadEnabled; + IORecursiveLockUnlock(sKextLock); + return result; +} + +/********************************************************************* +*********************************************************************/ +/* static */ +bool +OSKext::setUnloadEnabled(bool flag) +{ + bool result; + + IORecursiveLockLock(sKextLock); + result = sUnloadEnabled; + sUnloadEnabled = (flag ? true : false); + IORecursiveLockUnlock(sKextLock); + + if (sUnloadEnabled != result) { + OSKextLog(/* kext */ NULL, + kOSKextLogBasicLevel | + kOSKextLogGeneralFlag | kOSKextLogLoadFlag, + "Kext unloading now %sabled.", sUnloadEnabled ? "en" : "dis"); + } + + return result; +} + +/********************************************************************* +* Do not call any function that takes sKextLock here! +*********************************************************************/ +/* static */ +bool +OSKext::getAutounloadEnabled(void) +{ + bool result; + + IORecursiveLockLock(sKextInnerLock); + result = sAutounloadEnabled ? true : false; + IORecursiveLockUnlock(sKextInnerLock); + return result; +} + +/********************************************************************* +* Do not call any function that takes sKextLock here! +*********************************************************************/ +/* static */ +bool +OSKext::setAutounloadsEnabled(bool flag) +{ + bool result; + + IORecursiveLockLock(sKextInnerLock); + + result = sAutounloadEnabled; + sAutounloadEnabled = (flag ? true : false); + if (!sAutounloadEnabled && sUnloadCallout) { + thread_call_cancel(sUnloadCallout); + } + + if (sAutounloadEnabled != result) { + OSKextLog(/* kext */ NULL, + kOSKextLogBasicLevel | + kOSKextLogGeneralFlag | kOSKextLogLoadFlag, + "Kext autounloading now %sabled.", + sAutounloadEnabled ? "en" : "dis"); + } + + IORecursiveLockUnlock(sKextInnerLock); + + return result; +} + +/********************************************************************* +*********************************************************************/ +/* instance method operating on OSKext field */ +bool +OSKext::setAutounloadEnabled(bool flag) +{ + bool result = flags.autounloadEnabled ? true : false; + flags.autounloadEnabled = flag ? 1 : 0; + + if (result != (flag ? true : false)) { + OSKextLog(this, + kOSKextLogProgressLevel | + kOSKextLogLoadFlag | kOSKextLogKextBookkeepingFlag, + "Autounloading for kext %s now %sabled.", + getIdentifierCString(), + flags.autounloadEnabled ? "en" : "dis"); + } + return result; +} + +/********************************************************************* +*********************************************************************/ +/* static */ +bool +OSKext::setKernelRequestsEnabled(bool flag) +{ + bool result; + + IORecursiveLockLock(sKextLock); + result = sKernelRequestsEnabled; + sKernelRequestsEnabled = flag ? true : false; + + if (sKernelRequestsEnabled != result) { + OSKextLog(/* kext */ NULL, + kOSKextLogBasicLevel | + kOSKextLogGeneralFlag, + "Kernel requests now %sabled.", + sKernelRequestsEnabled ? "en" : "dis"); + } + IORecursiveLockUnlock(sKextLock); + return result; +} + +/********************************************************************* +*********************************************************************/ +/* static */ +bool +OSKext::getKernelRequestsEnabled(void) +{ + bool result; + + IORecursiveLockLock(sKextLock); + result = sKernelRequestsEnabled; + IORecursiveLockUnlock(sKextLock); + return result; +} + +#if PRAGMA_MARK +#pragma mark Kext Life Cycle +#endif +/********************************************************************* +*********************************************************************/ +OSKext * +OSKext::withPrelinkedInfoDict( + OSDictionary * anInfoDict) +{ + OSKext * newKext = new OSKext; + + if (newKext && !newKext->initWithPrelinkedInfoDict(anInfoDict)) { + newKext->release(); + return NULL; + } + + return newKext; +} + +/********************************************************************* +*********************************************************************/ +bool +OSKext::initWithPrelinkedInfoDict( + OSDictionary * anInfoDict) +{ + bool result = false; + kern_return_t alloc_result = KERN_SUCCESS; + OSString * kextPath = NULL; // do not release + OSNumber * addressNum = NULL; // reused; do not release + OSNumber * lengthNum = NULL; // reused; do not release + void * data = NULL; // do not free + void * srcData = NULL; // do not free + OSData * prelinkedExecutable = NULL; // must release + void * linkStateCopy = NULL; // kmem_free on error + uint32_t linkStateLength = 0; + uint32_t length = 0; // reused + + if (!super::init()) { + goto finish; + } + + /* Get the path. Don't look for an arch-specific path property. + */ + kextPath = OSDynamicCast(OSString, + anInfoDict->getObject(kPrelinkBundlePathKey)); + + if (!setInfoDictionaryAndPath(anInfoDict, kextPath)) { + goto finish; + } + + /* Don't need the path to be in the info dictionary any more. + */ + anInfoDict->removeObject(kPrelinkBundlePathKey); + + /* If we have a link state, create an OSData wrapper for it. + */ + addressNum = OSDynamicCast(OSNumber, + anInfoDict->getObject(kPrelinkLinkStateKey)); + if (addressNum) { + lengthNum = OSDynamicCast(OSNumber, + anInfoDict->getObject(kPrelinkLinkStateSizeKey)); + if (!lengthNum) { + OSKextLog(this, + kOSKextLogErrorLevel | + kOSKextLogArchiveFlag, + "Kext %s can't find prelinked kext link state size.", + getIdentifierCString()); + goto finish; + } + + data = (void *) (intptr_t) (addressNum->unsigned64BitValue()); + linkStateLength = (uint32_t) (lengthNum->unsigned32BitValue()); + + anInfoDict->removeObject(kPrelinkLinkStateKey); + anInfoDict->removeObject(kPrelinkLinkStateSizeKey); + + /* Copy the link state out of the booter-provided memory so it is in + * the VM system and we can page it out. + */ + alloc_result = kmem_alloc_pageable(kernel_map, + (vm_offset_t *)&linkStateCopy, linkStateLength); + if (alloc_result != KERN_SUCCESS) { + OSKextLog(this, + kOSKextLogErrorLevel | + kOSKextLogArchiveFlag, + "Kext %s failed to copy prelinked link state.", + getIdentifierCString()); + goto finish; + } + memcpy(linkStateCopy, data, linkStateLength); + + linkState = OSData::withBytesNoCopy(linkStateCopy, linkStateLength); + if (!linkState) { + OSKextLog(this, + kOSKextLogErrorLevel | + kOSKextLogArchiveFlag, + "Kext %s failed to create link state wrapper.", + getIdentifierCString()); + goto finish; + } + linkState->setDeallocFunction(osdata_kmem_free); + + /* Clear linkStateCopy; the OSData owns it now so we mustn't free it. + */ + linkStateCopy = NULL; + } + + /* Create an OSData wrapper around the linked executable. + */ + addressNum = OSDynamicCast(OSNumber, + anInfoDict->getObject(kPrelinkExecutableLoadKey)); + if (addressNum) { + lengthNum = OSDynamicCast(OSNumber, + anInfoDict->getObject(kPrelinkExecutableSizeKey)); + if (!lengthNum) { + OSKextLog(this, + kOSKextLogErrorLevel | + kOSKextLogArchiveFlag, + "Kext %s can't find prelinked kext executable size.", + getIdentifierCString()); + goto finish; + } + + data = (void *) (intptr_t) (addressNum->unsigned64BitValue()); + length = (uint32_t) (lengthNum->unsigned32BitValue()); + + anInfoDict->removeObject(kPrelinkExecutableLoadKey); + anInfoDict->removeObject(kPrelinkExecutableSizeKey); + + /* If the kext's load address differs from its source address, allocate + * space in the kext map at the load address and copy the kext over. + */ + addressNum = OSDynamicCast(OSNumber, anInfoDict->getObject(kPrelinkExecutableSourceKey)); + if (addressNum) { + srcData = (void *) (intptr_t) (addressNum->unsigned64BitValue()); + + if (data != srcData) { +#if __LP64__ + alloc_result = kext_alloc((vm_offset_t *)&data, length, /* fixed */ TRUE); + if (alloc_result != KERN_SUCCESS) { + OSKextLog(this, + kOSKextLogErrorLevel | kOSKextLogGeneralFlag, + "Failed to allocate space for prelinked kext %s.", + getIdentifierCString()); + goto finish; + } + memcpy(data, srcData, length); +#else + OSKextLog(this, + kOSKextLogErrorLevel | kOSKextLogGeneralFlag, + "Error: prelinked kext %s - source and load addresses " + "differ on ILP32 architecture.", + getIdentifierCString()); + goto finish; +#endif /* __LP64__ */ + } + + anInfoDict->removeObject(kPrelinkExecutableSourceKey); + } + + /* We don't need to set a dealloc function for the linked executable + * because it is freed separately in OSKext::unload(), which must unwire + * part of the memory. + * xxx - do we *have* to do it that way? + */ + prelinkedExecutable = OSData::withBytesNoCopy(data, length); + if (!prelinkedExecutable) { + OSKextLog(this, + kOSKextLogErrorLevel | + kOSKextLogGeneralFlag | kOSKextLogArchiveFlag, + "Kext %s failed to create executable wrapper.", + getIdentifierCString()); + goto finish; + } + setLinkedExecutable(prelinkedExecutable); + + addressNum = OSDynamicCast(OSNumber, + anInfoDict->getObject(kPrelinkKmodInfoKey)); + if (!addressNum) { + OSKextLog(this, + kOSKextLogErrorLevel | + kOSKextLogArchiveFlag, + "Kext %s can't find prelinked kext kmod_info address.", + getIdentifierCString()); + goto finish; + } + + kmod_info = (kmod_info_t *) (intptr_t) (addressNum->unsigned64BitValue()); + + anInfoDict->removeObject(kPrelinkKmodInfoKey); + } + + /* If the plist has a UUID for an interface, save that off. + */ + if (isInterface()) { + interfaceUUID = OSDynamicCast(OSData, + anInfoDict->getObject(kPrelinkInterfaceUUIDKey)); + if (interfaceUUID) { + interfaceUUID->retain(); + anInfoDict->removeObject(kPrelinkInterfaceUUIDKey); + } + } + + flags.prelinked = true; + + /* If we created a kext from prelink info, + * we must be booting from a prelinked kernel. + */ + sPrelinkBoot = true; + + result = registerIdentifier(); + +finish: + + /* If we didn't hand linkStateCopy off to an OSData, free it. + */ + if (linkStateCopy) { + kmem_free(kernel_map, (vm_offset_t)linkStateCopy, linkStateLength); + } + + OSSafeRelease(prelinkedExecutable); + + return result; +} + +/********************************************************************* +*********************************************************************/ +OSKext * +OSKext::withBooterData( + OSString * deviceTreeName, + OSData * booterData) +{ + OSKext * newKext = new OSKext; + + if (newKext && !newKext->initWithBooterData(deviceTreeName, booterData)) { + newKext->release(); + return NULL; + } + + return newKext; +} + +/********************************************************************* +*********************************************************************/ +typedef struct _BooterKextFileInfo { + uint32_t infoDictPhysAddr; + uint32_t infoDictLength; + uint32_t executablePhysAddr; + uint32_t executableLength; + uint32_t bundlePathPhysAddr; + uint32_t bundlePathLength; +} _BooterKextFileInfo; + +bool +OSKext::initWithBooterData( + OSString * deviceTreeName, + OSData * booterData) +{ + bool result = false; + _BooterKextFileInfo * kextFileInfo = NULL; // do not free + char * infoDictAddr = NULL; // do not free + void * executableAddr = NULL; // do not free + char * bundlePathAddr = NULL; // do not free + + OSObject * parsedXML = NULL; // must release + OSDictionary * theInfoDict = NULL; // do not release + OSString * kextPath = NULL; // must release + OSString * errorString = NULL; // must release + OSData * executable = NULL; // must release + + if (!super::init()) { + goto finish; + } + + kextFileInfo = (_BooterKextFileInfo *)booterData->getBytesNoCopy(); + if (!kextFileInfo) { + OSKextLog(this, + kOSKextLogErrorLevel | + kOSKextLogGeneralFlag, + "No booter-provided data for kext device tree entry %s.", + deviceTreeName->getCStringNoCopy()); + goto finish; + } + + /* The info plist must exist or we can't read the kext. + */ + if (!kextFileInfo->infoDictPhysAddr || !kextFileInfo->infoDictLength) { + OSKextLog(this, + kOSKextLogErrorLevel | + kOSKextLogGeneralFlag, + "No kext info dictionary for booter device tree entry %s.", + deviceTreeName->getCStringNoCopy()); + goto finish; + } + + infoDictAddr = (char *)ml_static_ptovirt(kextFileInfo->infoDictPhysAddr); + if (!infoDictAddr) { + OSKextLog(this, + kOSKextLogErrorLevel | + kOSKextLogGeneralFlag, + "Can't translate physical address 0x%x of kext info dictionary " + "for device tree entry %s.", + (int)kextFileInfo->infoDictPhysAddr, + deviceTreeName->getCStringNoCopy()); + goto finish; + } + + parsedXML = OSUnserializeXML(infoDictAddr, &errorString); + if (parsedXML) { + theInfoDict = OSDynamicCast(OSDictionary, parsedXML); + } + if (!theInfoDict) { + const char * errorCString = "(unknown error)"; + + if (errorString && errorString->getCStringNoCopy()) { + errorCString = errorString->getCStringNoCopy(); + } else if (parsedXML) { + errorCString = "not a dictionary"; + } + OSKextLog(this, + kOSKextLogErrorLevel | + kOSKextLogGeneralFlag, + "Error unserializing info dictionary for device tree entry %s: %s.", + deviceTreeName->getCStringNoCopy(), errorCString); + goto finish; + } + + /* A bundle path is not mandatory. + */ + if (kextFileInfo->bundlePathPhysAddr && kextFileInfo->bundlePathLength) { + bundlePathAddr = (char *)ml_static_ptovirt(kextFileInfo->bundlePathPhysAddr); + if (!bundlePathAddr) { + OSKextLog(this, + kOSKextLogErrorLevel | + kOSKextLogGeneralFlag, + "Can't translate physical address 0x%x of kext bundle path " + "for device tree entry %s.", + (int)kextFileInfo->bundlePathPhysAddr, + deviceTreeName->getCStringNoCopy()); + goto finish; + } + bundlePathAddr[kextFileInfo->bundlePathLength-1] = '\0'; // just in case! + + kextPath = OSString::withCString(bundlePathAddr); + if (!kextPath) { + OSKextLog(this, + kOSKextLogErrorLevel | + kOSKextLogGeneralFlag, + "Failed to create wrapper for device tree entry %s kext path %s.", + deviceTreeName->getCStringNoCopy(), bundlePathAddr); + goto finish; + } + } + + if (!setInfoDictionaryAndPath(theInfoDict, kextPath)) { + goto finish; + } + + /* An executable is not mandatory. + */ + if (kextFileInfo->executablePhysAddr && kextFileInfo->executableLength) { + executableAddr = (void *)ml_static_ptovirt(kextFileInfo->executablePhysAddr); + if (!executableAddr) { + OSKextLog(this, + kOSKextLogErrorLevel | + kOSKextLogGeneralFlag, + "Can't translate physical address 0x%x of kext executable " + "for device tree entry %s.", + (int)kextFileInfo->executablePhysAddr, + deviceTreeName->getCStringNoCopy()); + goto finish; + } + + executable = OSData::withBytesNoCopy(executableAddr, + kextFileInfo->executableLength); + if (!executable) { + OSKextLog(this, + kOSKextLogErrorLevel | + kOSKextLogGeneralFlag, + "Failed to create executable wrapper for device tree entry %s.", + deviceTreeName->getCStringNoCopy()); + goto finish; + } + + /* A kext with an executable needs to retain the whole booterData + * object to keep the executable in memory. + */ + if (!setExecutable(executable, booterData)) { + OSKextLog(this, + kOSKextLogErrorLevel | + kOSKextLogGeneralFlag, + "Failed to set kext executable for device tree entry %s.", + deviceTreeName->getCStringNoCopy()); + goto finish; + } + } + + result = registerIdentifier(); + +finish: + OSSafeRelease(parsedXML); + OSSafeRelease(kextPath); + OSSafeRelease(errorString); + OSSafeRelease(executable); + + return result; +} + +/********************************************************************* +*********************************************************************/ +bool +OSKext::registerIdentifier(void) +{ + bool result = false; + OSKext * existingKext = NULL; // do not release + bool existingIsLoaded = false; + bool existingIsPrelinked = false; + OSKextVersion newVersion = -1; + OSKextVersion existingVersion = -1; + char newVersionCString[kOSKextVersionMaxLength]; + char existingVersionCString[kOSKextVersionMaxLength]; + OSData * newUUID = NULL; // must release + OSData * existingUUID = NULL; // must release + + /* Get the new kext's version for checks & log messages. + */ + newVersion = getVersion(); + OSKextVersionGetString(newVersion, newVersionCString, + kOSKextVersionMaxLength); + + /* If we don't have an existing kext with this identifier, + * just record the new kext and we're done! + */ + existingKext = OSDynamicCast(OSKext, sKextsByID->getObject(bundleID)); + if (!existingKext) { + sKextsByID->setObject(bundleID, this); + result = true; + goto finish; + } + + /* Get the existing kext's version for checks & log messages. + */ + existingVersion = existingKext->getVersion(); + OSKextVersionGetString(existingVersion, + existingVersionCString, kOSKextVersionMaxLength); + + existingIsLoaded = existingKext->isLoaded(); + existingIsPrelinked = existingKext->isPrelinked(); + + /* If we have a kext with this identifier that's already loaded/prelinked, + * we can't use the new one, but let's be really thorough and check how + * the two are related for a precise diagnostic log message. + * + * Note that user space can't find out about nonloaded prelinked kexts, + * so in this case we log a message when new & existing are equivalent + * at the step rather than warning level, because we are always going + * be getting a copy of the kext in the user load request mkext. + */ + if (existingIsLoaded || existingIsPrelinked) { + bool sameVersion = (newVersion == existingVersion); + bool sameExecutable = true; // assume true unless we have UUIDs + + /* Only get the UUID if the existing kext is loaded. Doing so + * might have to uncompress an mkext executable and we shouldn't + * take that hit when neither kext is loaded. + */ + newUUID = copyUUID(); + existingUUID = existingKext->copyUUID(); + + /* I'm entirely too paranoid about checking equivalence of executables, + * but I remember nasty problems with it in the past. + * + * - If we have UUIDs for both kexts, compare them. + * - If only one kext has a UUID, they're definitely different. + */ + if (newUUID && existingUUID) { + sameExecutable = newUUID->isEqualTo(existingUUID); + } else if (newUUID || existingUUID) { + sameExecutable = false; + } + + if (!newUUID && !existingUUID) { + + /* If there are no UUIDs, we can't really tell that the executables + * are *different* without a lot of work; the loaded kext's + * unrelocated executable is no longer around (and we never had it + * in-kernel for a prelinked kext). We certainly don't want to do + * a whole fake link for the new kext just to compare, either. + */ + + OSKextVersionGetString(version, newVersionCString, + sizeof(newVersionCString)); + OSKextLog(this, + kOSKextLogWarningLevel | + kOSKextLogKextBookkeepingFlag, + "Notice - new kext %s, v%s matches %s kext " + "but can't determine if executables are the same (no UUIDs).", + getIdentifierCString(), + newVersionCString, + (existingIsLoaded ? "loaded" : "prelinked")); + } + + if (sameVersion && sameExecutable) { + OSKextLog(this, + (existingIsLoaded ? kOSKextLogWarningLevel : kOSKextLogStepLevel) | + kOSKextLogKextBookkeepingFlag, + "Refusing new kext %s, v%s: a %s copy is already present " + "(same version and executable).", + getIdentifierCString(), newVersionCString, + (existingIsLoaded ? "loaded" : "prelinked")); + } else { + if (!sameVersion) { + /* This condition is significant so log it under warnings. + */ + OSKextLog(this, + kOSKextLogWarningLevel | + kOSKextLogKextBookkeepingFlag, + "Refusing new kext %s, v%s: already have %s v%s.", + getIdentifierCString(), + newVersionCString, + (existingIsLoaded ? "loaded" : "prelinked"), + existingVersionCString); + } else { + /* This condition is significant so log it under warnings. + */ + OSKextLog(this, + kOSKextLogWarningLevel | kOSKextLogKextBookkeepingFlag, + "Refusing new kext %s, v%s: a %s copy with a different " + "executable UUID is already present.", + getIdentifierCString(), newVersionCString, + (existingIsLoaded ? "loaded" : "prelinked")); + } + } + goto finish; + } /* if (existingIsLoaded || existingIsPrelinked) */ + + /* We have two nonloaded/nonprelinked kexts, so our decision depends on whether + * user loads are happening or if we're still in early boot. User agents are + * supposed to resolve dependencies topside and include only the exact + * kexts needed; so we always accept the new kext (in fact we should never + * see an older unloaded copy hanging around). + */ + if (sUserLoadsActive) { + sKextsByID->setObject(bundleID, this); + result = true; + + OSKextLog(this, + kOSKextLogStepLevel | + kOSKextLogKextBookkeepingFlag, + "Dropping old copy of kext %s (v%s) for newly-added (v%s).", + getIdentifierCString(), + existingVersionCString, + newVersionCString); + + goto finish; + } + + /* During early boot, the kext with the highest version always wins out. + * Prelinked kernels will never hit this, but mkexts and booter-read + * kexts might have duplicates. + */ + if (newVersion > existingVersion) { + sKextsByID->setObject(bundleID, this); + result = true; + + OSKextLog(this, + kOSKextLogStepLevel | + kOSKextLogKextBookkeepingFlag, + "Dropping lower version (v%s) of registered kext %s for higher (v%s).", + existingVersionCString, + getIdentifierCString(), + newVersionCString); + + } else { + OSKextLog(this, + kOSKextLogStepLevel | + kOSKextLogKextBookkeepingFlag, + "Kext %s is already registered with a higher/same version (v%s); " + "dropping newly-added (v%s).", + getIdentifierCString(), + existingVersionCString, + newVersionCString); + } + + /* result has been set appropriately by now. */ + +finish: + + if (result) { + OSKextLog(this, + kOSKextLogStepLevel | + kOSKextLogKextBookkeepingFlag, + "Kext %s, v%s registered and available for loading.", + getIdentifierCString(), newVersionCString); + } + + OSSafeRelease(newUUID); + OSSafeRelease(existingUUID); + + return result; +} + +/********************************************************************* +* Does the bare minimum validation to look up a kext. +* All other validation is done on the spot as needed. +* +* No need for lock, only called from init +**********************************************************************/ +bool +OSKext::setInfoDictionaryAndPath( + OSDictionary * aDictionary, + OSString * aPath) +{ + bool result = false; + OSString * bundleIDString = NULL; // do not release + OSString * versionString = NULL; // do not release + OSString * compatibleVersionString = NULL; // do not release + const char * versionCString = NULL; // do not free + const char * compatibleVersionCString = NULL; // do not free + OSBoolean * scratchBool = NULL; // do not release + + if (infoDict) { + panic("Attempt to set info dictionary on a kext " + "that already has one (%s).", + getIdentifierCString()); + } + + if (!aDictionary || !OSDynamicCast(OSDictionary, aDictionary)) { + goto finish; + } + + infoDict = aDictionary; + infoDict->retain(); + + /* Check right away if the info dictionary has any log flags. + */ + scratchBool = OSDynamicCast(OSBoolean, + getPropertyForHostArch(kOSBundleEnableKextLoggingKey)); + if (scratchBool == kOSBooleanTrue) { + flags.loggingEnabled = 1; + } + + /* The very next thing to get is the bundle identifier. Unlike + * in user space, a kext with no bundle identifier gets axed + * immediately. + */ + bundleIDString = OSDynamicCast(OSString, + getPropertyForHostArch(kCFBundleIdentifierKey)); + if (!bundleIDString) { + OSKextLog(this, + kOSKextLogErrorLevel | + kOSKextLogValidationFlag, + "CFBundleIdentifier missing/invalid type in kext %s.", + aPath ? aPath->getCStringNoCopy() : "(unknown)"); + goto finish; + } + bundleID = OSSymbol::withString(bundleIDString); + if (!bundleID) { + OSKextLog(this, + kOSKextLogErrorLevel | + kOSKextLogValidationFlag, + "Can't copy bundle identifier as symbol for kext %s.", + bundleIDString->getCStringNoCopy()); + goto finish; + } + + /* Save the path if we got one (it should always be available but it's + * just something nice to have for bookkeeping). + */ + if (aPath) { + path = aPath; + path->retain(); + } + + /***** + * Minimal validation to initialize. We'll do other validation on the spot. + */ + if (bundleID->getLength() >= KMOD_MAX_NAME) { + OSKextLog(this, + kOSKextLogErrorLevel | + kOSKextLogValidationFlag, + "Kext %s error - CFBundleIdentifier over max length %d.", + getIdentifierCString(), KMOD_MAX_NAME - 1); + goto finish; + } + + version = compatibleVersion = -1; + + versionString = OSDynamicCast(OSString, + getPropertyForHostArch(kCFBundleVersionKey)); + if (!versionString) { + OSKextLog(this, + kOSKextLogErrorLevel | + kOSKextLogValidationFlag, + "Kext %s error - CFBundleVersion missing/invalid type.", + getIdentifierCString()); + goto finish; + } + versionCString = versionString->getCStringNoCopy(); + version = OSKextParseVersionString(versionCString); + if (version < 0) { + OSKextLog(this, + kOSKextLogErrorLevel | + kOSKextLogValidationFlag, + "Kext %s error - CFBundleVersion bad value '%s'.", + getIdentifierCString(), versionCString); + goto finish; + } + + compatibleVersion = -1; // set to illegal value for kexts that don't have + + compatibleVersionString = OSDynamicCast(OSString, + getPropertyForHostArch(kOSBundleCompatibleVersionKey)); + if (compatibleVersionString) { + compatibleVersionCString = compatibleVersionString->getCStringNoCopy(); + compatibleVersion = OSKextParseVersionString(compatibleVersionCString); + if (compatibleVersion < 0) { + OSKextLog(this, + kOSKextLogErrorLevel | + kOSKextLogValidationFlag, + "Kext %s error - OSBundleCompatibleVersion bad value '%s'.", + getIdentifierCString(), compatibleVersionCString); + goto finish; + } + + if (compatibleVersion > version) { + OSKextLog(this, + kOSKextLogErrorLevel | + kOSKextLogValidationFlag, + "Kext %s error - %s %s > %s %s (must be <=).", + getIdentifierCString(), + kOSBundleCompatibleVersionKey, compatibleVersionCString, + kCFBundleVersionKey, versionCString); + goto finish; + } + } + + /* Set flags for later use if the infoDict gets flushed. We only + * check for true values, not false ones(!) + */ + scratchBool = OSDynamicCast(OSBoolean, + getPropertyForHostArch(kOSBundleIsInterfaceKey)); + if (scratchBool && scratchBool->isTrue()) { + flags.interface = 1; + } + + scratchBool = OSDynamicCast(OSBoolean, + getPropertyForHostArch(kOSKernelResourceKey)); + if (scratchBool && scratchBool->isTrue()) { + flags.kernelComponent = 1; + flags.interface = 1; // xxx - hm. the kernel itself isn't an interface... + flags.started = 1; + + /* A kernel component has one implicit dependency on the kernel. + */ + flags.hasAllDependencies = 1; + } + + result = true; + +finish: + + return result; +} + +/********************************************************************* +* Not used for prelinked kernel boot as there is no unrelocated +* executable. +*********************************************************************/ +bool +OSKext::setExecutable( + OSData * anExecutable, + OSData * externalData, + bool externalDataIsMkext) +{ + bool result = false; + const char * executableKey = NULL; // do not free + + if (!anExecutable) { + infoDict->removeObject(_kOSKextExecutableKey); + infoDict->removeObject(_kOSKextMkextExecutableReferenceKey); + infoDict->removeObject(_kOSKextExecutableExternalDataKey); + result = true; + goto finish; + } + + if (infoDict->getObject(_kOSKextExecutableKey) || + infoDict->getObject(_kOSKextMkextExecutableReferenceKey)) { + + panic("Attempt to set an executable on a kext " + "that already has one (%s).", + getIdentifierCString()); + goto finish; + } + + if (externalDataIsMkext) { + executableKey = _kOSKextMkextExecutableReferenceKey; + } else { + executableKey = _kOSKextExecutableKey; + } + + if (anExecutable) { + infoDict->setObject(executableKey, anExecutable); + if (externalData) { + infoDict->setObject(_kOSKextExecutableExternalDataKey, externalData); + } + } + + result = true; + +finish: + return result; +} + +/********************************************************************* +*********************************************************************/ +void +OSKext::free(void) +{ + if (isLoaded()) { + panic("Attempt to free loaded kext %s.", getIdentifierCString()); + } + + OSSafeRelease(infoDict); + OSSafeRelease(bundleID); + OSSafeRelease(path); + OSSafeRelease(dependencies); + OSSafeRelease(linkState); + OSSafeRelease(linkedExecutable); + OSSafeRelease(metaClasses); + OSSafeRelease(interfaceUUID); + + if (isInterface() && kmod_info) { + kfree(kmod_info, sizeof(kmod_info_t)); + } + + super::free(); + return; +} + +#if PRAGMA_MARK +#pragma mark Mkext files +#endif +/********************************************************************* +*********************************************************************/ +OSReturn +OSKext::readMkextArchive(OSData * mkextData, + uint32_t * checksumPtr) +{ + OSReturn result = kOSKextReturnBadData; + uint32_t mkextLength = 0; + mkext_header * mkextHeader = 0; // do not free + uint32_t mkextVersion = 0; + + /* Note default return of kOSKextReturnBadData above. + */ + mkextLength = mkextData->getLength(); + if (mkextLength < sizeof(mkext_basic_header)) { + OSKextLog(/* kext */ NULL, + kOSKextLogErrorLevel | + kOSKextLogArchiveFlag, + "Mkext archive too small to be valid."); + goto finish; + } + + mkextHeader = (mkext_header *)mkextData->getBytesNoCopy(); + + if (MKEXT_GET_MAGIC(mkextHeader) != MKEXT_MAGIC || + MKEXT_GET_SIGNATURE(mkextHeader) != MKEXT_SIGN) { + OSKextLog(/* kext */ NULL, + kOSKextLogErrorLevel | + kOSKextLogArchiveFlag, + "Mkext archive has invalid magic or signature."); + goto finish; + } + + if (MKEXT_GET_LENGTH(mkextHeader) != mkextLength) { + OSKextLog(/* kext */ NULL, + kOSKextLogErrorLevel | + kOSKextLogArchiveFlag, + "Mkext archive recorded length doesn't match actual file length."); + goto finish; + } + + mkextVersion = MKEXT_GET_VERSION(mkextHeader); + + if (mkextVersion == MKEXT_VERS_2) { + result = OSKext::readMkext2Archive(mkextData, NULL, checksumPtr); + } else if (mkextVersion == MKEXT_VERS_1) { + result = OSKext::readMkext1Archive(mkextData, checksumPtr); + } else { + OSKextLog(/* kext */ NULL, + kOSKextLogErrorLevel | + kOSKextLogArchiveFlag, + "Mkext archive of unsupported mkext version 0x%x.", mkextVersion); + result = kOSKextReturnUnsupported; + } + +finish: + return result; +} + +/********************************************************************* +* Assumes magic, signature, version, length have been checked. +* +* Doesn't do as much bounds-checking as it should, but we're dropping +* mkext1 support from the kernel for SnowLeopard soon. +* +* Should keep track of all kexts created so far, and if we hit a +* fatal error halfway through, remove those kexts. If we've dropped +* an older version that had already been read, whoops! Might want to +* add a level of buffering? +*********************************************************************/ +/* static */ +OSReturn +OSKext::readMkext1Archive( + OSData * mkextData, + uint32_t * checksumPtr) +{ + OSReturn result = kOSReturnError; + uint32_t mkextLength; + mkext1_header * mkextHeader = 0; // do not free + void * mkextEnd = 0; // do not free + uint32_t mkextVersion; + uint8_t * crc_address = 0; + uint32_t checksum; + uint32_t numKexts = 0; + + OSData * infoDictDataObject = NULL; // must release + OSObject * parsedXML = NULL; // must release + OSDictionary * infoDict = NULL; // do not release + OSString * errorString = NULL; // must release + OSData * mkextExecutableInfo = NULL; // must release + OSKext * theKext = NULL; // must release + + mkextLength = mkextData->getLength(); + mkextHeader = (mkext1_header *)mkextData->getBytesNoCopy(); + mkextEnd = (char *)mkextHeader + mkextLength; + mkextVersion = OSSwapBigToHostInt32(mkextHeader->version); + + crc_address = (u_int8_t *)&mkextHeader->version; + checksum = mkext_adler32(crc_address, + (uintptr_t)mkextHeader + + OSSwapBigToHostInt32(mkextHeader->length) - (uintptr_t)crc_address); + + if (OSSwapBigToHostInt32(mkextHeader->adler32) != checksum) { + OSKextLog(/* kext */ NULL, + kOSKextLogErrorLevel | kOSKextLogArchiveFlag, + "Kext archive has a bad checksum."); + result = kOSKextReturnBadData; + goto finish; + } + + if (checksumPtr) { + *checksumPtr = checksum; + } + + /* Check that the CPU type & subtype match that of the running kernel. */ + if (OSSwapBigToHostInt32(mkextHeader->cputype) != (UInt32)CPU_TYPE_ANY) { + if ((UInt32)_mh_execute_header.cputype != + OSSwapBigToHostInt32(mkextHeader->cputype)) { + + OSKextLog(/* kext */ NULL, + kOSKextLogErrorLevel | kOSKextLogArchiveFlag, + "Kext archive doesn't contain software " + "for this computer's CPU type."); + result = kOSKextReturnArchNotFound; + goto finish; + } + } + + numKexts = OSSwapBigToHostInt32(mkextHeader->numkexts); + + for (uint32_t i = 0; i < numKexts; i++) { + + OSSafeReleaseNULL(infoDictDataObject); + OSSafeReleaseNULL(infoDict); + OSSafeReleaseNULL(mkextExecutableInfo); + OSSafeReleaseNULL(errorString); + OSSafeReleaseNULL(theKext); + + mkext_kext * kextEntry = &mkextHeader->kext[i]; + mkext_file * infoDictPtr = &kextEntry->plist; + mkext_file * executablePtr = &kextEntry->module; + if (kextEntry >= mkextEnd) { + OSKextLog(/* kext */ NULL, + kOSKextLogErrorLevel | kOSKextLogArchiveFlag, + "Mkext file overrun."); + result = kOSKextReturnBadData; + goto finish; + } + + /* Note that we're pretty tolerant of errors in individual entries. + * As long as we can keep processing, we do. + */ + infoDictDataObject = OSKext::extractMkext1Entry( + mkextHeader, infoDictPtr); + if (!infoDictDataObject) { + OSKextLog(/* kext */ NULL, + kOSKextLogErrorLevel | kOSKextLogArchiveFlag, + "Can't uncompress info dictionary " + "from mkext archive entry %d.", i); + continue; + } + + parsedXML = OSUnserializeXML( + (const char *)infoDictDataObject->getBytesNoCopy(), + &errorString); + if (parsedXML) { + infoDict = OSDynamicCast(OSDictionary, parsedXML); + } + if (!infoDict) { + const char * errorCString = "(unknown error)"; + + if (errorString && errorString->getCStringNoCopy()) { + errorCString = errorString->getCStringNoCopy(); + } else if (parsedXML) { + errorCString = "not a dictionary"; + } + OSKextLog(/* kext */ NULL, + kOSKextLogErrorLevel | kOSKextLogArchiveFlag, + "Error: Can't read XML property list " + "for mkext archive entry %d: %s.", i, errorCString); + continue; + } + + theKext = new OSKext; + if (!theKext) { + OSKextLog(/* kext */ NULL, + kOSKextLogErrorLevel | kOSKextLogArchiveFlag, + "Kext allocation failure."); + continue; + } + + /***** + * Prepare an entry to hold the mkext entry info for the + * compressed binary module, if there is one. If all four fields + * of the module entry are zero, there isn't one. + */ + if ((OSSwapBigToHostInt32(executablePtr->offset) || + OSSwapBigToHostInt32(executablePtr->compsize) || + OSSwapBigToHostInt32(executablePtr->realsize) || + OSSwapBigToHostInt32(executablePtr->modifiedsecs))) { + + MkextEntryRef entryRef; + + mkextExecutableInfo = OSData::withCapacity(sizeof(entryRef)); + if (!mkextExecutableInfo) { + panic("Error: Couldn't allocate data object " + "for mkext archive entry %d.\n", i); + } + + entryRef.mkext = (mkext_basic_header *)mkextHeader; + entryRef.fileinfo = (uint8_t *)executablePtr; + if (!mkextExecutableInfo->appendBytes(&entryRef, + sizeof(entryRef))) { + + OSKextLog(/* kext */ NULL, + kOSKextLogErrorLevel | kOSKextLogArchiveFlag, + "Couldn't record executable info " + "for mkext archive entry %d.", i); + // we might hit a load error later but oh well + // xxx - should probably remove theKext + continue; + } + + } + + /* Init can fail because of a data/runtime error, or because the + * kext is a dup. Either way, we don't care here. + */ + if (!theKext->initWithMkext1Info(infoDict, mkextExecutableInfo, + mkextData)) { + + // theKext is released at the top of the loop or in the finish block + continue; + } + + /* If we got even one kext out of the mkext archive, + * we have successfully read the archive, in that we + * have data references into its mapped memory. + */ + result = kOSReturnSuccess; + } + +finish: + + OSSafeRelease(infoDictDataObject); + OSSafeRelease(parsedXML); + OSSafeRelease(errorString); + OSSafeRelease(mkextExecutableInfo); + OSSafeRelease(theKext); + + return result; +} + +/********************************************************************* +*********************************************************************/ +bool +OSKext::initWithMkext1Info( + OSDictionary * anInfoDict, + OSData * executableWrapper, + OSData * mkextData) +{ + bool result = false; + + // mkext1 doesn't allow for path (might stuff in info dict) + if (!setInfoDictionaryAndPath(anInfoDict, /* path */ NULL)) { + goto finish; + } + + if (!registerIdentifier()) { + goto finish; + } + + if (!setExecutable(executableWrapper, mkextData, true)) { + goto finish; + } + + result = true; + +finish: + + /* If we can't init, remove the kext from the lookup dictionary. + * This is safe to call in init because there's an implicit retain. + */ + if (!result) { + OSKext::removeKext(this, /* removePersonalities? */ false); + } + + return result; +} + +/********************************************************************* +* xxx - this should take the input data length +*********************************************************************/ +/* static */ +OSData * +OSKext::extractMkext1Entry( + const void * mkextFileBase, + const void * entry) +{ + OSData * result = NULL; + OSData * uncompressedData = NULL; // release on error + const char * errmsg = NULL; + + mkext_file * fileinfo; + uint8_t * uncompressedDataBuffer = 0; // do not free (panic on alloc. fail) + size_t uncompressed_size = 0; + kern_return_t kern_result; + + fileinfo = (mkext_file *)entry; + + size_t offset = OSSwapBigToHostInt32(fileinfo->offset); + size_t compressed_size = OSSwapBigToHostInt32(fileinfo->compsize); + size_t expected_size = OSSwapBigToHostInt32(fileinfo->realsize); + + // Add 1 for '\0' to terminate XML string (for plists) + // (we really should have the archive format include that). + size_t alloc_size = expected_size + 1; + time_t modifiedsecs = OSSwapBigToHostInt32(fileinfo->modifiedsecs); + + /* If these four fields are zero there's no file, but it's up to + * the calling context to decide if that's an error. + */ + if (offset == 0 && compressed_size == 0 && + expected_size == 0 && modifiedsecs == 0) { + goto finish; + } + + kern_result = kmem_alloc(kernel_map, + (vm_offset_t *)&uncompressedDataBuffer, + alloc_size); + if (kern_result != KERN_SUCCESS) { + panic(ALLOC_FAIL); + goto finish; + } + + uncompressedData = OSData::withBytesNoCopy(uncompressedDataBuffer, + alloc_size); + if (uncompressedData == NULL) { + /* No need to free uncompressedDataBuffer here, either. */ + panic(ALLOC_FAIL); + goto finish; + } + uncompressedData->setDeallocFunction(&osdata_kmem_free); + + /* Do the decompression if necessary. Note that even if the file isn't + * compressed, we want to make a copy so that we don't have the tie to + * the larger mkext file buffer any more. + * xxx - need to detect decompression overflow too + */ + if (compressed_size != 0) { + errmsg = "OSKext::uncompressMkext - " + "uncompressed file shorter than expected"; + uncompressed_size = decompress_lzss(uncompressedDataBuffer, + expected_size, + ((uint8_t *)mkextFileBase) + offset, + compressed_size); + if (uncompressed_size != expected_size) { + goto finish; + } + } else { + memcpy(uncompressedDataBuffer, + ((uint8_t *)mkextFileBase) + offset, + expected_size); + } + + // Add a terminating nul character in case the data is XML. + // (we really should have the archive format include that). + uncompressedDataBuffer[expected_size] = '\0'; + + result = uncompressedData; + errmsg = NULL; + +finish: + if (!result) { + OSKextLog(/* kext */ NULL, + kOSKextLogErrorLevel | kOSKextLogArchiveFlag, + "%s", errmsg); + + if (uncompressedData) { + uncompressedData->release(); + } + } + return result; +} + +/********************************************************************* +* Assumes magic, signature, version, length have been checked. +* xxx - need to add further bounds checking for each file entry +* +* Should keep track of all kexts created so far, and if we hit a +* fatal error halfway through, remove those kexts. If we've dropped +* an older version that had already been read, whoops! Might want to +* add a level of buffering? +*********************************************************************/ +/* static */ +OSReturn +OSKext::readMkext2Archive( + OSData * mkextData, + OSDictionary ** mkextPlistOut, + uint32_t * checksumPtr) +{ + OSReturn result = kOSReturnError; + uint32_t mkextLength; + mkext2_header * mkextHeader = NULL; // do not free + void * mkextEnd = NULL; // do not free + uint32_t mkextVersion; + uint8_t * crc_address = NULL; + uint32_t checksum; + uint32_t mkextPlistOffset; + uint32_t mkextPlistCompressedSize; + char * mkextPlistEnd = NULL; // do not free + uint32_t mkextPlistFullSize; + OSString * errorString = NULL; // must release + OSData * mkextPlistUncompressedData = NULL; // must release + const char * mkextPlistDataBuffer = NULL; // do not free + OSObject * parsedXML = NULL; // must release + OSDictionary * mkextPlist = NULL; // do not release + OSArray * mkextInfoDictArray = NULL; // do not release + uint32_t count, i; + + mkextLength = mkextData->getLength(); + mkextHeader = (mkext2_header *)mkextData->getBytesNoCopy(); + mkextEnd = (char *)mkextHeader + mkextLength; + mkextVersion = MKEXT_GET_VERSION(mkextHeader); + + crc_address = (u_int8_t *)&mkextHeader->version; + checksum = mkext_adler32(crc_address, + (uintptr_t)mkextHeader + + MKEXT_GET_LENGTH(mkextHeader) - (uintptr_t)crc_address); + + if (MKEXT_GET_CHECKSUM(mkextHeader) != checksum) { + OSKextLog(/* kext */ NULL, + kOSKextLogErrorLevel | + kOSKextLogArchiveFlag, + "Mkext archive has bad checksum."); + result = kOSKextReturnBadData; + goto finish; + } + + if (checksumPtr) { + *checksumPtr = checksum; + } + + /* Check that the CPU type & subtype match that of the running kernel. */ + if (MKEXT_GET_CPUTYPE(mkextHeader) == (UInt32)CPU_TYPE_ANY) { + OSKextLog(/* kext */ NULL, + kOSKextLogErrorLevel | + kOSKextLogArchiveFlag, + "Mkext archive must have a specific CPU type."); + result = kOSKextReturnBadData; + goto finish; + } else { + if ((UInt32)_mh_execute_header.cputype != + MKEXT_GET_CPUTYPE(mkextHeader)) { + + OSKextLog(/* kext */ NULL, + kOSKextLogErrorLevel | + kOSKextLogArchiveFlag, + "Mkext archive does not match the running kernel's CPU type."); + result = kOSKextReturnArchNotFound; + goto finish; + } + } + + mkextPlistOffset = MKEXT2_GET_PLIST(mkextHeader); + mkextPlistCompressedSize = MKEXT2_GET_PLIST_COMPSIZE(mkextHeader); + mkextPlistEnd = (char *)mkextHeader + mkextPlistOffset + + mkextPlistCompressedSize; + if (mkextPlistEnd > mkextEnd) { + OSKextLog(/* kext */ NULL, + kOSKextLogErrorLevel | + kOSKextLogArchiveFlag, + "Mkext archive file overrun."); + result = kOSKextReturnBadData; + } + + mkextPlistFullSize = MKEXT2_GET_PLIST_FULLSIZE(mkextHeader); + if (mkextPlistCompressedSize) { + mkextPlistUncompressedData = sKernelKext->extractMkext2FileData( + (UInt8 *)mkextHeader + mkextPlistOffset, + "plist", + mkextPlistCompressedSize, mkextPlistFullSize); + if (!mkextPlistUncompressedData) { + goto finish; + } + mkextPlistDataBuffer = (const char *) + mkextPlistUncompressedData->getBytesNoCopy(); + } else { + mkextPlistDataBuffer = (const char *)mkextHeader + mkextPlistOffset; + } + + /* IOCFSerialize added a nul byte to the end of the string. Very nice of it. + */ + parsedXML = OSUnserializeXML(mkextPlistDataBuffer, &errorString); + if (parsedXML) { + mkextPlist = OSDynamicCast(OSDictionary, parsedXML); + } + if (!mkextPlist) { + const char * errorCString = "(unknown error)"; + + if (errorString && errorString->getCStringNoCopy()) { + errorCString = errorString->getCStringNoCopy(); + } else if (parsedXML) { + errorCString = "not a dictionary"; + } + OSKextLog(/* kext */ NULL, + kOSKextLogErrorLevel | + kOSKextLogArchiveFlag, + "Error unserializing mkext plist: %s.", errorCString); + goto finish; + } + + /* If the caller needs the plist, hand it back and retain it. + * (This function releases it at the end.) + */ + if (mkextPlistOut) { + *mkextPlistOut = mkextPlist; + (*mkextPlistOut)->retain(); + } + + mkextInfoDictArray = OSDynamicCast(OSArray, + mkextPlist->getObject(kMKEXTInfoDictionariesKey)); + if (!mkextInfoDictArray) { + OSKextLog(/* kext */ NULL, + kOSKextLogErrorLevel | + kOSKextLogArchiveFlag, + "Mkext archive contains no kext info dictionaries."); + goto finish; + } + + count = mkextInfoDictArray->getCount(); + for (i = 0; i < count; i++) { + OSDictionary * infoDict; + + + infoDict = OSDynamicCast(OSDictionary, + mkextInfoDictArray->getObject(i)); + + /* Create the kext for the entry, then release it, because the + * kext system keeps them around until explicitly removed. + * Any creation/registration failures are already logged for us. + */ + OSKext * newKext = OSKext::withMkext2Info(infoDict, mkextData); + OSSafeRelease(newKext); + } + + /* Even if we didn't keep any kexts from the mkext, we may have a load + * request to process, so we are successful (no errors occurred). + */ + result = kOSReturnSuccess; + +finish: + + OSSafeRelease(parsedXML); + OSSafeRelease(mkextPlistUncompressedData); + OSSafeRelease(errorString); + + return result; +} + +/********************************************************************* +*********************************************************************/ +/* static */ +OSKext * +OSKext::withMkext2Info( + OSDictionary * anInfoDict, + OSData * mkextData) +{ + OSKext * newKext = new OSKext; + + if (newKext && !newKext->initWithMkext2Info(anInfoDict, mkextData)) { + newKext->release(); + return NULL; + } + + return newKext; +} + +/********************************************************************* +*********************************************************************/ +bool +OSKext::initWithMkext2Info( + OSDictionary * anInfoDict, + OSData * mkextData) +{ + bool result = false; + OSString * kextPath = NULL; // do not release + OSNumber * executableOffsetNum = NULL; // do not release + OSCollectionIterator * iterator = NULL; // must release + OSData * executable = NULL; // must release + + if (!super::init()) { + goto finish; + } + + /* Get the path. Don't look for an arch-specific path property. + */ + kextPath = OSDynamicCast(OSString, + anInfoDict->getObject(kMKEXTBundlePathKey)); + + if (!setInfoDictionaryAndPath(anInfoDict, kextPath)) { + goto finish; + } + + /* Don't need the path to be in the info dictionary any more. + */ + anInfoDict->removeObject(kMKEXTBundlePathKey); + + executableOffsetNum = OSDynamicCast(OSNumber, + infoDict->getObject(kMKEXTExecutableKey)); + if (executableOffsetNum) { + executable = createMkext2FileEntry(mkextData, + executableOffsetNum, "executable"); + infoDict->removeObject(kMKEXTExecutableKey); + if (!executable) { + goto finish; + } + if (!setExecutable(executable, mkextData, true)) { + goto finish; + } + } + + result = registerIdentifier(); + +finish: + + OSSafeRelease(executable); + OSSafeRelease(iterator); + return result; +} + +/********************************************************************* +*********************************************************************/ +OSData * +OSKext::createMkext2FileEntry( + OSData * mkextData, + OSNumber * offsetNum, + const char * name) +{ + OSData * result = NULL; + MkextEntryRef entryRef; + uint8_t * mkextBuffer = (uint8_t *)mkextData->getBytesNoCopy(); + uint32_t entryOffset = offsetNum->unsigned32BitValue(); + + result = OSData::withCapacity(sizeof(entryRef)); + if (!result) { + goto finish; + } + + entryRef.mkext = (mkext_basic_header *)mkextBuffer; + entryRef.fileinfo = mkextBuffer + entryOffset; + if (!result->appendBytes(&entryRef, sizeof(entryRef))) { + OSSafeReleaseNULL(result); + goto finish; + } + +finish: + if (!result) { + OSKextLog(this, + kOSKextLogErrorLevel | + kOSKextLogArchiveFlag, + "Can't create wrapper for mkext file entry '%s' of kext %s.", + name, getIdentifierCString()); + } + return result; +} + +/********************************************************************* +*********************************************************************/ +extern "C" { +static void * z_alloc(void *, u_int items, u_int size); +static void z_free(void *, void *ptr); + +typedef struct z_mem { + uint32_t alloc_size; + uint8_t data[0]; +} z_mem; + +/* + * Space allocation and freeing routines for use by zlib routines. + */ +void * +z_alloc(void * notused __unused, u_int num_items, u_int size) +{ + void * result = NULL; + z_mem * zmem = NULL; + uint32_t total = num_items * size; + uint32_t allocSize = total + sizeof(zmem); + + zmem = (z_mem *)kalloc(allocSize); + if (!zmem) { + goto finish; + } + zmem->alloc_size = allocSize; + result = (void *)&(zmem->data); +finish: + return result; +} + +void +z_free(void * notused __unused, void * ptr) +{ + uint32_t * skipper = (uint32_t *)ptr - 1; + z_mem * zmem = (z_mem *)skipper; + kfree((void *)zmem, zmem->alloc_size); + return; +} +}; + +OSData * +OSKext::extractMkext2FileData( + UInt8 * data, + const char * name, + uint32_t compressedSize, + uint32_t fullSize) +{ + OSData * result = NULL; + + OSData * uncompressedData = NULL; // release on error + + uint8_t * uncompressedDataBuffer = 0; // do not free + unsigned long uncompressedSize; + z_stream zstream; + bool zstream_inited = false; + int zlib_result; + + /* If the file isn't compressed, we want to make a copy + * so that we don't have the tie to the larger mkext file buffer any more. + */ + if (!compressedSize) { + uncompressedData = OSData::withBytes(data, fullSize); + // xxx - no check for failure? + result = uncompressedData; + goto finish; + } + + if (KERN_SUCCESS != kmem_alloc(kernel_map, + (vm_offset_t*)&uncompressedDataBuffer, fullSize)) { + + /* How's this for cheesy? The kernel is only asked to extract + * kext plists so we tailor the log messages. + */ + if (this == sKernelKext) { + OSKextLog(this, + kOSKextLogErrorLevel | + kOSKextLogArchiveFlag, + "Allocation failure extracting %s from mkext.", name); + } else { + OSKextLog(this, + kOSKextLogErrorLevel | + kOSKextLogArchiveFlag, + "Allocation failure extracting %s from mkext for kext %s.", + name, getIdentifierCString()); + } + + goto finish; + } + uncompressedData = OSData::withBytesNoCopy(uncompressedDataBuffer, fullSize); + if (!uncompressedData) { + if (this == sKernelKext) { + OSKextLog(this, + kOSKextLogErrorLevel | + kOSKextLogArchiveFlag, + "Allocation failure extracting %s from mkext.", name); + } else { + OSKextLog(this, + kOSKextLogErrorLevel | + kOSKextLogArchiveFlag, + "Allocation failure extracting %s from mkext for kext %s.", + name, getIdentifierCString()); + } + goto finish; + } + uncompressedData->setDeallocFunction(&osdata_kmem_free); + + if (this == sKernelKext) { + OSKextLog(this, + kOSKextLogDetailLevel | + kOSKextLogArchiveFlag, + "Kernel extracted %s from mkext - compressed size %d, uncompressed size %d.", + name, compressedSize, fullSize); + } else { + OSKextLog(this, + kOSKextLogDetailLevel | + kOSKextLogArchiveFlag, + "Kext %s extracted %s from mkext - compressed size %d, uncompressed size %d.", + getIdentifierCString(), name, compressedSize, fullSize); + } + + bzero(&zstream, sizeof(zstream)); + zstream.next_in = (UInt8 *)data; + zstream.avail_in = compressedSize; + + zstream.next_out = uncompressedDataBuffer; + zstream.avail_out = fullSize; + + zstream.zalloc = z_alloc; + zstream.zfree = z_free; + + zlib_result = inflateInit(&zstream); + if (Z_OK != zlib_result) { + if (this == sKernelKext) { + OSKextLog(this, + kOSKextLogErrorLevel | + kOSKextLogArchiveFlag, + "Mkext error; zlib inflateInit failed (%d) for %s.", + zlib_result, name); + } else { + OSKextLog(this, + kOSKextLogErrorLevel | + kOSKextLogArchiveFlag, + "Kext %s - mkext error; zlib inflateInit failed (%d) for %s .", + getIdentifierCString(), zlib_result, name); + } + goto finish; + } else { + zstream_inited = true; + } + + zlib_result = inflate(&zstream, Z_FINISH); + + if (zlib_result == Z_STREAM_END || zlib_result == Z_OK) { + uncompressedSize = zstream.total_out; + } else { + if (this == sKernelKext) { + OSKextLog(this, + kOSKextLogErrorLevel | + kOSKextLogArchiveFlag, + "Mkext error; zlib inflate failed (%d) for %s.", + zlib_result, name); + } else { + OSKextLog(this, + kOSKextLogErrorLevel | + kOSKextLogArchiveFlag, + "Kext %s - mkext error; zlib inflate failed (%d) for %s .", + getIdentifierCString(), zlib_result, name); + } + if (zstream.msg) { + OSKextLog(this, + kOSKextLogErrorLevel | + kOSKextLogArchiveFlag, + "zlib error: %s.", zstream.msg); + } + goto finish; + } + + if (uncompressedSize != fullSize) { + if (this == sKernelKext) { + OSKextLog(this, + kOSKextLogErrorLevel | + kOSKextLogArchiveFlag, + "Mkext error; zlib inflate discrepancy for %s, " + "uncompressed size != original size.", name); + } else { + OSKextLog(this, + kOSKextLogErrorLevel | + kOSKextLogArchiveFlag, + "Kext %s - mkext error; zlib inflate discrepancy for %s, " + "uncompressed size != original size.", + getIdentifierCString(), name); + } + goto finish; + } + + result = uncompressedData; + +finish: + /* Don't bother checking return, nothing we can do on fail. + */ + if (zstream_inited) inflateEnd(&zstream); + + if (!result) { + OSSafeRelease(uncompressedData); + } + + return result; +} + +/********************************************************************* +*********************************************************************/ +/* static */ +OSReturn +OSKext::loadFromMkext( + OSKextLogSpec clientLogFilter, + char * mkextBuffer, + uint32_t mkextBufferLength, + char ** logInfoOut, + uint32_t * logInfoLengthOut) +{ + OSReturn result = kOSReturnError; + OSReturn tempResult = kOSReturnError; + + OSData * mkextData = NULL; // must release + OSDictionary * mkextPlist = NULL; // must release + + OSArray * logInfoArray = NULL; // must release + OSSerialize * serializer = NULL; // must release + + OSString * predicate = NULL; // do not release + OSDictionary * requestArgs = NULL; // do not release + + OSString * kextIdentifier = NULL; // do not release + OSNumber * startKextExcludeNum = NULL; // do not release + OSNumber * startMatchingExcludeNum = NULL; // do not release + OSBoolean * delayAutounloadBool = NULL; // do not release + OSArray * personalityNames = NULL; // do not release + + /* Default values for these two options: regular autounload behavior, + * load all kexts, send no personalities. + */ + Boolean delayAutounload = false; + OSKextExcludeLevel startKextExcludeLevel = kOSKextExcludeNone; + OSKextExcludeLevel startMatchingExcludeLevel = kOSKextExcludeAll; + + IORecursiveLockLock(sKextLock); + + if (logInfoOut) { + *logInfoOut = NULL; + *logInfoLengthOut = 0; + } + + OSKext::setUserSpaceLogFilter(clientLogFilter, logInfoOut ? true : false); + + OSKextLog(/* kext */ NULL, + kOSKextLogDebugLevel | + kOSKextLogIPCFlag, + "Received kext load request from user space."); + + /* Regardless of processing, the fact that we have gotten here means some + * user-space program is up and talking to us, so we'll switch our kext + * registration to reflect that. + */ + if (!sUserLoadsActive) { + OSKextLog(/* kext */ NULL, + kOSKextLogProgressLevel | + kOSKextLogGeneralFlag | kOSKextLogLoadFlag, + "Switching to late startup (user-space) kext loading policy."); + + sUserLoadsActive = true; + } + + if (!sLoadEnabled) { + OSKextLog(/* kext */ NULL, + kOSKextLogErrorLevel | + kOSKextLogLoadFlag, + "Kext loading is disabled."); + result = kOSKextReturnDisabled; + goto finish; + } + + /* Note that we do not set a dealloc function on this OSData + * object! No references to it can remain after the loadFromMkext() + * call since we are in a MIG function, and will vm_deallocate() + * the buffer. + */ + mkextData = OSData::withBytesNoCopy(mkextBuffer, + mkextBufferLength); + if (!mkextData) { + OSKextLog(/* kext */ NULL, + kOSKextLogErrorLevel | + kOSKextLogLoadFlag | kOSKextLogIPCFlag, + "Failed to create wrapper for kext load request."); + result = kOSKextReturnNoMemory; + goto finish; + } + + result = readMkext2Archive(mkextData, &mkextPlist, NULL); + if (result != kOSReturnSuccess) { + OSKextLog(/* kext */ NULL, + kOSKextLogErrorLevel | + kOSKextLogLoadFlag, + "Failed to read kext load request."); + goto finish; + } + + predicate = _OSKextGetRequestPredicate(mkextPlist); + if (!predicate || !predicate->isEqualTo(kKextRequestPredicateLoad)) { + OSKextLog(/* kext */ NULL, + kOSKextLogErrorLevel | + kOSKextLogLoadFlag, + "Received kext load request with no predicate; skipping."); + result = kOSKextReturnInvalidArgument; + goto finish; + } + + requestArgs = OSDynamicCast(OSDictionary, + mkextPlist->getObject(kKextRequestArgumentsKey)); + if (!requestArgs || !requestArgs->getCount()) { + OSKextLog(/* kext */ NULL, + kOSKextLogErrorLevel | + kOSKextLogLoadFlag, + "Received kext load request with no arguments."); + result = kOSKextReturnInvalidArgument; + goto finish; + } + + kextIdentifier = OSDynamicCast(OSString, + requestArgs->getObject(kKextRequestArgumentBundleIdentifierKey)); + if (!kextIdentifier) { + OSKextLog(/* kext */ NULL, + kOSKextLogErrorLevel | + kOSKextLogLoadFlag, + "Received kext load request with no kext identifier."); + result = kOSKextReturnInvalidArgument; + goto finish; + } + + startKextExcludeNum = OSDynamicCast(OSNumber, + requestArgs->getObject(kKextKextRequestArgumentStartExcludeKey)); + startMatchingExcludeNum = OSDynamicCast(OSNumber, + requestArgs->getObject(kKextRequestArgumentStartMatchingExcludeKey)); + delayAutounloadBool = OSDynamicCast(OSBoolean, + requestArgs->getObject(kKextRequestArgumentDelayAutounloadKey)); + personalityNames = OSDynamicCast(OSArray, + requestArgs->getObject(kKextRequestArgumentPersonalityNamesKey)); + + if (delayAutounloadBool) { + delayAutounload = delayAutounloadBool->getValue(); + } + if (startKextExcludeNum) { + startKextExcludeLevel = startKextExcludeNum->unsigned8BitValue(); + } + if (startMatchingExcludeNum) { + startMatchingExcludeLevel = startMatchingExcludeNum->unsigned8BitValue(); + } + + OSKextLog(/* kext */ NULL, + kOSKextLogProgressLevel | + kOSKextLogIPCFlag, + "Received request from user space to load kext %s.", + kextIdentifier->getCStringNoCopy()); + + /* Load the kext, with no deferral, since this is a load from outside + * the kernel. + * xxx - Would like a better way to handle the default values for the + * xxx - start/match opt args. + */ + result = OSKext::loadKextWithIdentifier( + kextIdentifier, + /* allowDefer */ false, + delayAutounload, + startKextExcludeLevel, + startMatchingExcludeLevel, + personalityNames); + if (result != kOSReturnSuccess) { + goto finish; + } + /* If the load came down from kextd, it will shortly inform IOCatalogue + * for matching via a separate IOKit calldown. + */ + +finish: + + /* Gather up the collected log messages for user space. Any + * error messages past this call will not make it up as log messages + * but will be in the system log. + */ + logInfoArray = OSKext::clearUserSpaceLogFilter(); + + if (logInfoArray && logInfoOut && logInfoLengthOut) { + tempResult = OSKext::serializeLogInfo(logInfoArray, + logInfoOut, logInfoLengthOut); + if (tempResult != kOSReturnSuccess) { + result = tempResult; + } + } + + OSKext::flushNonloadedKexts(/* flushPrelinkedKexts */ false); + + /* Note: mkextDataObject will have been retained by every kext w/an + * executable in it. That should all have been flushed out at the + * and of the load operation, but you never know.... + */ + if (mkextData && mkextData->getRetainCount() > 1) { + OSKextLog(/* kext */ NULL, + kOSKextLogErrorLevel | + kOSKextLogLoadFlag | kOSKextLogIPCFlag, + "Kext load request buffer from user space still retained by a kext; " + "probable memory leak."); + } + + IORecursiveLockUnlock(sKextLock); + + OSSafeRelease(mkextData); + OSSafeRelease(mkextPlist); + OSSafeRelease(serializer); + OSSafeRelease(logInfoArray); + + return result; +} + +/********************************************************************* +*********************************************************************/ +/* static */ +OSReturn +OSKext::serializeLogInfo( + OSArray * logInfoArray, + char ** logInfoOut, + uint32_t * logInfoLengthOut) +{ + OSReturn result = kOSReturnError; + char * buffer = NULL; + kern_return_t kmem_result = KERN_FAILURE; + OSSerialize * serializer = NULL; // must release; reused + char * logInfo = NULL; // returned by reference + uint32_t logInfoLength = 0; + + if (!logInfoArray || !logInfoOut || !logInfoLengthOut) { + OSKextLog(/* kext */ NULL, + kOSKextLogErrorLevel | + kOSKextLogIPCFlag, + "Internal error; invalid arguments to OSKext::serializeLogInfo()."); + /* Bad programmer. */ + result = kOSKextReturnInvalidArgument; + goto finish; + } + + serializer = OSSerialize::withCapacity(0); + if (!serializer) { + OSKextLog(/* kext */ NULL, + kOSKextLogErrorLevel | + kOSKextLogIPCFlag, + "Failed to create serializer on log info for request from user space."); + /* Incidental error; we're going to (try to) allow the request + * itself to succeed. */ + } + + if (!logInfoArray->serialize(serializer)) { + OSKextLog(/* kext */ NULL, + kOSKextLogErrorLevel | + kOSKextLogIPCFlag, + "Failed to serialize log info for request from user space."); + /* Incidental error; we're going to (try to) allow the request + * itself to succeed. */ + } else { + logInfo = serializer->text(); + logInfoLength = serializer->getLength(); + + kmem_result = kmem_alloc(kernel_map, (vm_offset_t *)&buffer, logInfoLength); + if (kmem_result != KERN_SUCCESS) { + OSKextLog(/* kext */ NULL, + kOSKextLogErrorLevel | + kOSKextLogIPCFlag, + "Failed to copy log info for request from user space."); + /* Incidental error; we're going to (try to) allow the request + * to succeed. */ + } else { + memcpy(buffer, logInfo, logInfoLength); + *logInfoOut = buffer; + *logInfoLengthOut = logInfoLength; + } + } + + result = kOSReturnSuccess; +finish: + OSSafeRelease(serializer); + return result; +} + +#if PRAGMA_MARK +#pragma mark Instance Management Methods +#endif +/********************************************************************* +*********************************************************************/ +OSKext * +OSKext::lookupKextWithIdentifier(const char * kextIdentifier) +{ + OSKext * foundKext = NULL; + + IORecursiveLockLock(sKextLock); + foundKext = OSDynamicCast(OSKext, sKextsByID->getObject(kextIdentifier)); + if (foundKext) { + foundKext->retain(); + } + IORecursiveLockUnlock(sKextLock); + + return foundKext; +} + +/********************************************************************* +*********************************************************************/ +OSKext * +OSKext::lookupKextWithIdentifier(OSString * kextIdentifier) +{ + return OSKext::lookupKextWithIdentifier(kextIdentifier->getCStringNoCopy()); +} + +/********************************************************************* +*********************************************************************/ +OSKext * +OSKext::lookupKextWithLoadTag(uint32_t aTag) +{ + OSKext * foundKext = NULL; // returned + uint32_t count, i; + + IORecursiveLockLock(sKextLock); + + count = sLoadedKexts->getCount(); + for (i = 0; i < count; i++) { + OSKext * thisKext = OSDynamicCast(OSKext, sLoadedKexts->getObject(i)); + if (thisKext->getLoadTag() == aTag) { + foundKext = thisKext; + foundKext->retain(); + goto finish; + } + } + +finish: + IORecursiveLockUnlock(sKextLock); + + return foundKext; +} + +/********************************************************************* +*********************************************************************/ +OSKext * +OSKext::lookupKextWithAddress(vm_address_t address) +{ + OSKext * foundKext = NULL; // returned + uint32_t count, i; + + IORecursiveLockLock(sKextLock); + + count = sLoadedKexts->getCount(); + for (i = 0; i < count; i++) { + OSKext * thisKext = OSDynamicCast(OSKext, sLoadedKexts->getObject(i)); + if (thisKext->linkedExecutable) { + vm_address_t kext_start = + (vm_address_t)thisKext->linkedExecutable->getBytesNoCopy(); + vm_address_t kext_end = kext_start + + thisKext->linkedExecutable->getLength(); + + if ((kext_start <= address) && (address < kext_end)) { + foundKext = thisKext; + foundKext->retain(); + goto finish; + } + } + } + +finish: + IORecursiveLockUnlock(sKextLock); + + return foundKext; +} + +/********************************************************************* +*********************************************************************/ +/* static */ +bool OSKext::isKextWithIdentifierLoaded(const char * kextIdentifier) +{ + bool result = false; + OSKext * foundKext = NULL; // returned + + IORecursiveLockLock(sKextLock); + + foundKext = OSDynamicCast(OSKext, sKextsByID->getObject(kextIdentifier)); + if (foundKext && foundKext->isLoaded()) { + result = true; + } + + IORecursiveLockUnlock(sKextLock); + + return result; +} + +/********************************************************************* +* xxx - should spawn a separate thread so a kext can safely have +* xxx - itself unloaded. +*********************************************************************/ +/* static */ +OSReturn +OSKext::removeKext( + OSKext * aKext, + bool terminateServicesAndRemovePersonalitiesFlag) + { + OSReturn result = kOSKextReturnInUse; + OSKext * checkKext = NULL; // do not release + + IORecursiveLockLock(sKextLock); + + /* If the kext has no identifier, it failed to init + * so isn't in sKextsByID and it isn't loaded. + */ + if (!aKext->getIdentifier()) { + result = kOSReturnSuccess; + goto finish; + } + + checkKext = OSDynamicCast(OSKext, + sKextsByID->getObject(aKext->getIdentifier())); + if (checkKext != aKext) { + result = kOSKextReturnNotFound; + goto finish; + } + + if (aKext->isLoaded()) { + /* If we are terminating, send the request to the IOCatalogue + * (which will actually call us right back but that's ok we have + * a recursive lock don't you know) but do not ask the IOCatalogue + * to call back with an unload, we'll do that right here. + */ + if (terminateServicesAndRemovePersonalitiesFlag) { + result = gIOCatalogue->terminateDriversForModule( + aKext->getIdentifierCString(), /* unload */ false); + if (result != kOSReturnSuccess) { + OSKextLog(aKext, + kOSKextLogProgressLevel | + kOSKextLogKextBookkeepingFlag, + "Can't remove kext %s; services failed to terminate - 0x%x.", + aKext->getIdentifierCString(), result); + goto finish; + } + } + + result = aKext->unload(); + if (result != kOSReturnSuccess) { + goto finish; + } + } + + /* Remove personalities as requested. This is a bit redundant for a loaded + * kext as IOCatalogue::terminateDriversForModule() removes driver + * personalities, but it doesn't restart matching, which we always want + * coming from here, and OSKext::removePersonalitiesFromCatalog() ensures + * that happens. + */ + if (terminateServicesAndRemovePersonalitiesFlag) { + aKext->removePersonalitiesFromCatalog(); + } + + OSKextLog(aKext, + kOSKextLogProgressLevel | + kOSKextLogKextBookkeepingFlag, + "Removing kext %s.", + aKext->getIdentifierCString()); + + sKextsByID->removeObject(aKext->getIdentifier()); + result = kOSReturnSuccess; + +finish: + IORecursiveLockUnlock(sKextLock); + return result; + } + +/********************************************************************* +*********************************************************************/ +/* static */ +OSReturn +OSKext::removeKextWithIdentifier( + const char * kextIdentifier, + bool terminateServicesAndRemovePersonalitiesFlag) +{ + OSReturn result = kOSReturnError; + + IORecursiveLockLock(sKextLock); + + OSKext * aKext = OSDynamicCast(OSKext, + sKextsByID->getObject(kextIdentifier)); + if (!aKext) { + result = kOSKextReturnNotFound; + OSKextLog(/* kext */ NULL, + kOSKextLogErrorLevel | + kOSKextLogKextBookkeepingFlag, + "Can't remove kext %s - not found.", + kextIdentifier); + goto finish; + } + + result = OSKext::removeKext(aKext, + terminateServicesAndRemovePersonalitiesFlag); + +finish: + IORecursiveLockUnlock(sKextLock); + + return result; +} + +/********************************************************************* +*********************************************************************/ +/* static */ +OSReturn +OSKext::removeKextWithLoadTag( + OSKextLoadTag loadTag, + bool terminateServicesAndRemovePersonalitiesFlag) +{ + OSReturn result = kOSReturnError; + OSKext * foundKext = NULL; + uint32_t count, i; + + IORecursiveLockLock(sKextLock); + + count = sLoadedKexts->getCount(); + for (i = 0; i < count; i++) { + OSKext * thisKext = OSDynamicCast(OSKext, sLoadedKexts->getObject(i)); + if (thisKext->loadTag == loadTag) { + foundKext = thisKext; + break; + } + } + + if (!foundKext) { + result = kOSKextReturnNotFound; + OSKextLog(/* kext */ NULL, + kOSKextLogErrorLevel | + kOSKextLogLoadFlag | kOSKextLogKextBookkeepingFlag, + "Can't remove kext with load tag %d - not found.", + loadTag); + goto finish; + } + + result = OSKext::removeKext(foundKext, + terminateServicesAndRemovePersonalitiesFlag); + +finish: + IORecursiveLockUnlock(sKextLock); + + return result; + } + +/********************************************************************* +*********************************************************************/ +OSDictionary * +OSKext::copyKexts(void) +{ + OSDictionary * result; + + IORecursiveLockLock(sKextLock); + result = OSDynamicCast(OSDictionary, sKextsByID->copyCollection()); + IORecursiveLockUnlock(sKextLock); + + return result; +} + +#if PRAGMA_MARK +#pragma mark Accessors +#endif +/********************************************************************* +*********************************************************************/ +const OSSymbol * +OSKext::getIdentifier(void) +{ + return bundleID; +} + +/********************************************************************* +* A kext must have a bundle identifier to even survive initialization; +* this is guaranteed to exist past then. +*********************************************************************/ +const char * +OSKext::getIdentifierCString(void) +{ + return bundleID->getCStringNoCopy(); +} + +/********************************************************************* +*********************************************************************/ +OSKextVersion +OSKext::getVersion(void) +{ + return version; +} + +/********************************************************************* +*********************************************************************/ +OSKextVersion +OSKext::getCompatibleVersion(void) +{ + return compatibleVersion; +} + +/********************************************************************* +*********************************************************************/ +bool +OSKext::isCompatibleWithVersion(OSKextVersion aVersion) +{ + if ((compatibleVersion > -1 && version > -1) && + (compatibleVersion <= version && aVersion <= version)) { + return true; + } + return false; +} + +/********************************************************************* +*********************************************************************/ +bool +OSKext::declaresExecutable(void) +{ + if (getPropertyForHostArch(kCFBundleExecutableKey)) { + return true; + } + return false; +} + +/********************************************************************* +*********************************************************************/ +OSData * +OSKext::getExecutable(void) +{ + OSData * result = NULL; + OSData * extractedExecutable = NULL; // must release + OSData * mkextExecutableRef = NULL; // do not release + + result = OSDynamicCast(OSData, infoDict->getObject(_kOSKextExecutableKey)); + if (result) { + goto finish; + } + + mkextExecutableRef = OSDynamicCast(OSData, + getPropertyForHostArch(_kOSKextMkextExecutableReferenceKey)); + + if (mkextExecutableRef) { + + MkextEntryRef * mkextEntryRef = (MkextEntryRef *) + mkextExecutableRef->getBytesNoCopy(); + uint32_t mkextVersion = MKEXT_GET_VERSION(mkextEntryRef->mkext); + if (mkextVersion == MKEXT_VERS_2) { + mkext2_file_entry * fileinfo = + (mkext2_file_entry *)mkextEntryRef->fileinfo; + uint32_t compressedSize = MKEXT2_GET_ENTRY_COMPSIZE(fileinfo); + uint32_t fullSize = MKEXT2_GET_ENTRY_FULLSIZE(fileinfo); + extractedExecutable = extractMkext2FileData( + MKEXT2_GET_ENTRY_DATA(fileinfo), "executable", + compressedSize, fullSize); + } else if (mkextVersion == MKEXT_VERS_1) { + extractedExecutable = extractMkext1Entry( + mkextEntryRef->mkext, mkextEntryRef->fileinfo); + } else { + OSKextLog(this, kOSKextLogErrorLevel | + kOSKextLogArchiveFlag, + "Kext %s - unknown mkext version 0x%x for executable.", + getIdentifierCString(), mkextVersion); + } + + /* Regardless of success, remove the mkext executable, + * and drop one reference on the mkext. (setExecutable() does not + * replace, it removes, or panics if asked to replace.) + */ + infoDict->removeObject(_kOSKextMkextExecutableReferenceKey); + infoDict->removeObject(_kOSKextExecutableExternalDataKey); + + if (extractedExecutable && extractedExecutable->getLength()) { + if (!setExecutable(extractedExecutable)) { + goto finish; + } + result = extractedExecutable; + } else { + goto finish; + } + } + +finish: + + OSSafeRelease(extractedExecutable); + + return result; +} + +/********************************************************************* +*********************************************************************/ +bool +OSKext::isInterface(void) +{ + return flags.interface; +} + +/********************************************************************* +*********************************************************************/ +bool +OSKext::isKernelComponent(void) +{ + return flags.kernelComponent ? true : false; +} + +/********************************************************************* +* We might want to check this recursively for all dependencies, +* since a subtree of dependencies could get loaded before we hit +* a dependency that isn't safe-boot-loadable. +* +* xxx - Might want to return false if OSBundleEnableKextLogging or +* OSBundleDebugLevel +* or IOKitDebug is nonzero too (we used to do that, but I don't see +* the point except it's usually development drivers, which might +* cause panics on startup, that have those properties). Heh; could +* use a "kx" boot-arg! +*********************************************************************/ +bool +OSKext::isLoadableInSafeBoot(void) +{ + bool result = false; + OSString * required = NULL; // do not release + + + required = OSDynamicCast(OSString, + getPropertyForHostArch(kOSBundleRequiredKey)); + if (!required) { + goto finish; + } + if (required->isEqualTo(kOSBundleRequiredRoot) || + required->isEqualTo(kOSBundleRequiredLocalRoot) || + required->isEqualTo(kOSBundleRequiredNetworkRoot) || + required->isEqualTo(kOSBundleRequiredSafeBoot) || + required->isEqualTo(kOSBundleRequiredConsole)) { + + result = true; + } + +finish: + return result; +} + +/********************************************************************* +*********************************************************************/ +bool +OSKext::isPrelinked(void) +{ + return flags.prelinked ? true : false; +} + +/********************************************************************* +*********************************************************************/ +bool OSKext::isLoaded(void) +{ + return flags.loaded ? true : false; +} + +/********************************************************************* +*********************************************************************/ +bool +OSKext::isStarted(void) +{ + return flags.started ? true : false; +} + +/********************************************************************* +*********************************************************************/ +bool +OSKext::isCPPInitialized(void) +{ + return flags.CPPInitialized; +} + +/********************************************************************* +*********************************************************************/ +void +OSKext::setCPPInitialized(bool initialized) +{ + flags.CPPInitialized = initialized; +} + +/********************************************************************* +*********************************************************************/ +uint32_t +OSKext::getLoadTag(void) +{ + return loadTag; +} + +/********************************************************************* +*********************************************************************/ +OSData * +OSKext::copyUUID(void) +{ + OSData * result = NULL; + OSData * theExecutable = NULL; // do not release + const kernel_mach_header_t * header = NULL; + const struct load_command * load_cmd = NULL; + const struct uuid_command * uuid_cmd = NULL; + uint32_t i; + + /* An interface kext doesn't have a linked executable with an LC_UUID, + * we create one when it's linked. + */ + if (interfaceUUID) { + result = interfaceUUID; + result->retain(); + goto finish; + } + + /* For real kexts, try to get the UUID from the linked executable, + * or if is hasn't been linked yet, the unrelocated executable. + */ + theExecutable = linkedExecutable; + if (!theExecutable) { + theExecutable = getExecutable(); + } + if (!theExecutable) { + goto finish; + } + + header = (const kernel_mach_header_t *)theExecutable->getBytesNoCopy(); + load_cmd = (const struct load_command *)&header[1]; + + for (i = 0; i < header->ncmds; i++) { + if (load_cmd->cmd == LC_UUID) { + uuid_cmd = (struct uuid_command *)load_cmd; + result = OSData::withBytes(uuid_cmd->uuid, sizeof(uuid_cmd->uuid)); + goto finish; + } + load_cmd = (struct load_command *)((caddr_t)load_cmd + load_cmd->cmdsize); + } + +finish: + return result; +} + +/********************************************************************* +*********************************************************************/ +#if defined (__ppc__) +#define ARCHNAME "ppc" +#elif defined (__i386__) +#define ARCHNAME "i386" +#elif defined (__x86_64__) +#define ARCHNAME "x86_64" +#else +#error architecture not supported +#endif + +#define ARCH_SEPARATOR_CHAR '_' + +static char * makeHostArchKey(const char * key, uint32_t * keySizeOut) +{ + char * result = NULL; + uint32_t keyLength = strlen(key); + uint32_t keySize; + + /* Add 1 for the ARCH_SEPARATOR_CHAR, and 1 for the '\0'. + */ + keySize = 1 + 1 + strlen(key) + strlen(ARCHNAME); + result = (char *)kalloc(keySize); + if (!result) { + goto finish; + } + strlcpy(result, key, keySize); + result[keyLength++] = ARCH_SEPARATOR_CHAR; + result[keyLength] = '\0'; + strlcat(result, ARCHNAME, keySize); + *keySizeOut = keySize; + +finish: + return result; +} + +/********************************************************************* +*********************************************************************/ +OSObject * +OSKext::getPropertyForHostArch(const char * key) +{ + OSObject * result = NULL; // do not release + uint32_t hostArchKeySize = 0; + char * hostArchKey = NULL; // must kfree + + if (!key || !infoDict) { + goto finish; + } + + /* Some properties are not allowed to be arch-variant: + * - Any CFBundle... property. + * - OSBundleIsInterface. + * - OSKernelResource. + */ + if (STRING_HAS_PREFIX(key, "OS") || + STRING_HAS_PREFIX(key, "IO")) { + + hostArchKey = makeHostArchKey(key, &hostArchKeySize); + if (!hostArchKey) { + OSKextLog(/* kext (this isn't about a kext) */ NULL, + kOSKextLogErrorLevel | kOSKextLogGeneralFlag, + "Allocation failure."); + goto finish; + } + result = infoDict->getObject(hostArchKey); + } + + if (!result) { + result = infoDict->getObject(key); + } + +finish: + if (hostArchKey) kfree(hostArchKey, hostArchKeySize); + return result; +} + +#if PRAGMA_MARK +#pragma mark Load/Start/Stop/Unload +#endif +/********************************************************************* +*********************************************************************/ +OSReturn +OSKext::loadKextWithIdentifier( + const char * kextIdentifierCString, + Boolean allowDeferFlag, + Boolean delayAutounloadFlag, + OSKextExcludeLevel startOpt, + OSKextExcludeLevel startMatchingOpt, + OSArray * personalityNames) +{ + OSReturn result = kOSReturnError; + OSString * kextIdentifier = NULL; // must release + + kextIdentifier = OSString::withCString(kextIdentifierCString); + if (!kextIdentifier) { + result = kOSKextReturnNoMemory; + goto finish; + } + result = OSKext::loadKextWithIdentifier(kextIdentifier, + allowDeferFlag, delayAutounloadFlag, + startOpt, startMatchingOpt, personalityNames); + +finish: + OSSafeRelease(kextIdentifier); + return result; +} + + +/********************************************************************* +*********************************************************************/ +OSReturn +OSKext::loadKextWithIdentifier( + OSString * kextIdentifier, + Boolean allowDeferFlag, + Boolean delayAutounloadFlag, + OSKextExcludeLevel startOpt, + OSKextExcludeLevel startMatchingOpt, + OSArray * personalityNames) +{ + OSReturn result = kOSReturnError; + OSKext * theKext = NULL; // do not release + OSDictionary * loadRequest = NULL; // must release + const OSSymbol * kextIdentifierSymbol = NULL; // must release + + IORecursiveLockLock(sKextLock); + + if (!kextIdentifier) { + result = kOSKextReturnInvalidArgument; + goto finish; + } + + OSKext::recordIdentifierRequest(kextIdentifier); + + theKext = OSDynamicCast(OSKext, sKextsByID->getObject(kextIdentifier)); + if (!theKext) { + if (!allowDeferFlag) { + OSKextLog(/* kext */ NULL, + kOSKextLogErrorLevel | + kOSKextLogLoadFlag, + "Can't load kext %s - not found.", + kextIdentifier->getCStringNoCopy()); + goto finish; + } + + if (!sKernelRequestsEnabled) { + OSKextLog(theKext, + kOSKextLogErrorLevel | + kOSKextLogLoadFlag, + "Can't load kext %s - requests to user space are disabled.", + kextIdentifier->getCStringNoCopy()); + result = kOSKextReturnDisabled; + goto finish; + } + + /* Create a new request unless one is already sitting + * in sKernelRequests for this bundle identifier + */ + kextIdentifierSymbol = OSSymbol::withString(kextIdentifier); + if (!sPostedKextLoadIdentifiers->containsObject(kextIdentifierSymbol)) { + result = _OSKextCreateRequest(kKextRequestPredicateRequestLoad, + &loadRequest); + if (result != kOSReturnSuccess) { + goto finish; + } + if (!_OSKextSetRequestArgument(loadRequest, + kKextRequestArgumentBundleIdentifierKey, kextIdentifier)) { + + result = kOSKextReturnNoMemory; + goto finish; + } + if (!sKernelRequests->setObject(loadRequest)) { + result = kOSKextReturnNoMemory; + goto finish; + } + + if (!sPostedKextLoadIdentifiers->setObject(kextIdentifierSymbol)) { + result = kOSKextReturnNoMemory; + goto finish; + } + + OSKextLog(theKext, + kOSKextLogDebugLevel | + kOSKextLogLoadFlag, + "Kext %s not found; queued load request to user space.", + kextIdentifier->getCStringNoCopy()); + } + + if (sKextdActive) { + OSKextPingKextd(); + } else { + OSKextLog(/* kext */ NULL, + ((sPrelinkBoot) ? kOSKextLogDebugLevel : kOSKextLogErrorLevel) | + kOSKextLogLoadFlag, + "Not loading kext %s - not found and kextd not available in early boot.", + kextIdentifier->getCStringNoCopy()); + } + + result = kOSKextReturnDeferred; + goto finish; + } + + result = theKext->load(startOpt, startMatchingOpt, personalityNames); + + if (result != kOSReturnSuccess) { + OSKextLog(theKext, + kOSKextLogErrorLevel | + kOSKextLogLoadFlag, + "Failed to load kext %s (error 0x%x).", + kextIdentifier->getCStringNoCopy(), (int)result); + + OSKext::removeKext(theKext, + /* terminateService/removePersonalities */ true); + goto finish; + } + + if (delayAutounloadFlag) { + OSKextLog(theKext, + kOSKextLogProgressLevel | + kOSKextLogLoadFlag | kOSKextLogKextBookkeepingFlag, + "Setting delayed autounload for %s.", + kextIdentifier->getCStringNoCopy()); + theKext->flags.delayAutounload = 1; + } + +finish: + OSSafeRelease(loadRequest); + OSSafeRelease(kextIdentifierSymbol); + + IORecursiveLockUnlock(sKextLock); + + return result; +} + +/********************************************************************* +*********************************************************************/ +/* static */ +void +OSKext::recordIdentifierRequest( + OSString * kextIdentifier) +{ + const OSSymbol * kextIdentifierSymbol = NULL; // must release + bool fail = false; + + if (!sAllKextLoadIdentifiers || !kextIdentifier) { + goto finish; + } + + kextIdentifierSymbol = OSSymbol::withString(kextIdentifier); + if (!kextIdentifierSymbol) { + // xxx - this is really a basic alloc failure + fail = true; + goto finish; + } + + if (!sAllKextLoadIdentifiers->containsObject(kextIdentifierSymbol)) { + if (!sAllKextLoadIdentifiers->setObject(kextIdentifierSymbol)) { + fail = true; + } else { + // xxx - need to find a way to associate this whole func w/the kext + OSKextLog(/* kext */ NULL, + // xxx - check level + kOSKextLogStepLevel | + kOSKextLogArchiveFlag, + "Recorded kext %s as a candidate for inclusion in prelinked kernel.", + kextIdentifier->getCStringNoCopy()); + } + } +finish: + + if (fail) { + OSKextLog(/* kext */ NULL, + kOSKextLogErrorLevel | + kOSKextLogArchiveFlag, + "Failed to record kext %s as a candidate for inclusion in prelinked kernel.", + kextIdentifier->getCStringNoCopy()); + } + OSSafeRelease(kextIdentifierSymbol); + return; +} + +/********************************************************************* +*********************************************************************/ +OSReturn +OSKext::load( + OSKextExcludeLevel startOpt, + OSKextExcludeLevel startMatchingOpt, + OSArray * personalityNames) +{ + OSReturn result = kOSReturnError; + kern_return_t kxldResult; + OSKextExcludeLevel dependenciesStartOpt = startOpt; + OSKextExcludeLevel dependenciesStartMatchingOpt = startMatchingOpt; + unsigned int i, count; + Boolean alreadyLoaded = false; + OSKext * lastLoadedKext = NULL; + + if (!sLoadEnabled) { + if (!isLoaded() || (!isStarted() && startOpt != kOSKextExcludeNone) || + (startMatchingOpt != kOSKextExcludeNone)) { + + OSKextLog(this, + kOSKextLogErrorLevel | + kOSKextLogLoadFlag, + "Kext loading is disabled " + "(attempt to load/start/start matching for kext %s).", + getIdentifierCString()); + } + result = kOSKextReturnDisabled; + goto finish; + } + + if (isLoaded()) { + alreadyLoaded = true; + result = kOSReturnSuccess; + + OSKextLog(this, + kOSKextLogDebugLevel | + kOSKextLogLoadFlag | kOSKextLogKextBookkeepingFlag, + "Kext %s is already loaded.", + getIdentifierCString()); + goto loaded; + } + + /* If we've pushed the next available load tag to the invalid value, + * we can't load any more kexts. + */ + if (sNextLoadTag == kOSKextInvalidLoadTag) { + OSKextLog(this, + kOSKextLogErrorLevel | + kOSKextLogLoadFlag, + "Can't load kext %s - no more load tags to assign.", + getIdentifierCString()); + result = kOSKextReturnNoResources; + goto finish; + } + + /* This is a bit of a hack, because we shouldn't be handling + * personalities within the load function. + */ + if (!declaresExecutable()) { + result = kOSReturnSuccess; + goto loaded; + } + + /* Are we in safe boot? + */ + if (sSafeBoot && !isLoadableInSafeBoot()) { + OSKextLog(this, + kOSKextLogErrorLevel | + kOSKextLogLoadFlag, + "Can't load kext %s - not loadable during safe boot.", + getIdentifierCString()); + result = kOSKextReturnBootLevel; + goto finish; + } + + OSKextLog(this, + kOSKextLogProgressLevel | kOSKextLogLoadFlag, + "Loading kext %s.", + getIdentifierCString()); + + + if (!sKxldContext) { + kxldResult = kxld_create_context(&sKxldContext, &kern_allocate, + &kxld_log_callback, /* Flags */ (KXLDFlags) 0, + /* cputype */ 0, /* cpusubtype */ 0); + if (kxldResult) { + OSKextLog(this, + kOSKextLogErrorLevel | + kOSKextLogLoadFlag | kOSKextLogLinkFlag, + "Can't load kext %s - failed to create link context.", + getIdentifierCString()); + result = kOSKextReturnNoMemory; + goto finish; + } + } + + /* We only need to resolve dependencies once for the whole graph, but + * resolveDependencies will just return if there's no work to do, so it's + * safe to call it more than once. + */ + if (!resolveDependencies()) { + // xxx - check resolveDependencies() for log msg + OSKextLog(this, + kOSKextLogErrorLevel | + kOSKextLogLoadFlag | kOSKextLogDependenciesFlag, + "Can't load kext %s - failed to resolve library dependencies.", + getIdentifierCString()); + result = kOSKextReturnDependencies; + goto finish; + } + + /* If we are excluding just the kext being loaded now (and not its + * dependencies), drop the exclusion level to none so dependencies + * start and/or add their personalities. + */ + if (dependenciesStartOpt == kOSKextExcludeKext) { + dependenciesStartOpt = kOSKextExcludeNone; + } + + if (dependenciesStartMatchingOpt == kOSKextExcludeKext) { + dependenciesStartMatchingOpt = kOSKextExcludeNone; + } + + /* Load the dependencies, recursively. + */ + count = getNumDependencies(); + for (i = 0; i < count; i++) { + OSKext * dependency = OSDynamicCast(OSKext, + dependencies->getObject(i)); + if (dependency == NULL) { + OSKextLog(this, + kOSKextLogErrorLevel | + kOSKextLogLoadFlag | kOSKextLogDependenciesFlag, + "Internal error loading kext %s; dependency disappeared.", + getIdentifierCString()); + result = kOSKextReturnInternalError; + goto finish; + } + + /* Dependencies must be started accorting to the opt, + * but not given the personality names of the main kext. + */ + result = dependency->load(dependenciesStartOpt, + dependenciesStartMatchingOpt, + /* personalityNames */ NULL); + if (result != KERN_SUCCESS) { + OSKextLog(this, + kOSKextLogErrorLevel | + kOSKextLogLoadFlag | kOSKextLogDependenciesFlag, + "Dependency %s of kext %s failed to load.", + dependency->getIdentifierCString(), + getIdentifierCString()); + + OSKext::removeKext(dependency, + /* terminateService/removePersonalities */ true); + result = kOSKextReturnDependencyLoadError; + + goto finish; + } + } + + result = loadExecutable(); + if (result != KERN_SUCCESS) { + goto finish; + } + + flags.loaded = true; + + /* Add the kext to the list of loaded kexts and update the kmod_info + * struct to point to that of the last loaded kext (which is the way + * it's always been done, though I'd rather do them in order now). + */ + lastLoadedKext = OSDynamicCast(OSKext, sLoadedKexts->getLastObject()); + sLoadedKexts->setObject(this); + + /* Keep the kernel itself out of the kmod list. + */ + if (lastLoadedKext == sKernelKext) { + lastLoadedKext = NULL; + } + + if (lastLoadedKext) { + kmod_info->next = lastLoadedKext->kmod_info; + } + + /* Make the global kmod list point at the just-loaded kext. Note that the + * __kernel__ kext isn't in this list, as it wasn't before SnowLeopard, + * although we do report it in kextstat these days by using the newer + * OSArray of loaded kexts, which does contain it. + * + * (The OSKext object representing the kernel doesn't even have a kmod_info + * struct, though I suppose we could stick a pointer to it from the + * static struct in OSRuntime.cpp.) + */ + kmod = kmod_info; + + /* Save the list of loaded kexts in case we panic. + */ + clock_get_uptime(&last_loaded_timestamp); + OSKext::saveLoadedKextPanicList(); + +loaded: + /* This is a bit of a hack, because we shouldn't be handling + * personalities within the load function. + */ + if (declaresExecutable() && (startOpt == kOSKextExcludeNone)) { + result = start(); + if (result != kOSReturnSuccess) { + OSKextLog(this, + kOSKextLogErrorLevel | kOSKextLogLoadFlag, + "Kext %s start failed (result 0x%x).", + getIdentifierCString(), result); + result = kOSKextReturnStartStopError; + } + } + + /* If not excluding matching, send the personalities to the kernel. + * This never affects the result of the load operation. + */ + if (result == kOSReturnSuccess && startMatchingOpt == kOSKextExcludeNone) { + sendPersonalitiesToCatalog(true, personalityNames); + } + +finish: + if (result != kOSReturnSuccess) { + OSKextLog(this, + kOSKextLogErrorLevel | + kOSKextLogLoadFlag, + "Kext %s failed to load (0x%x).", + getIdentifierCString(), (int)result); + } else if (!alreadyLoaded) { + OSKextLog(this, + kOSKextLogProgressLevel | + kOSKextLogLoadFlag, + "Kext %s loaded.", + getIdentifierCString()); + } + return result; +} + +/********************************************************************* +* called only by load() +*********************************************************************/ +OSReturn +OSKext::loadExecutable() +{ + OSReturn result = kOSReturnError; + kern_return_t kxldResult; + u_char ** kxlddeps = NULL; // must kfree + uint32_t num_kxlddeps = 0; + uint32_t num_kmod_refs = 0; + u_char * linkStateBytes = NULL; // do not free + u_long linkStateLength = 0; + u_char ** linkStateBytesPtr = NULL; // do not free + u_long * linkStateLengthPtr = NULL; // do not free + struct mach_header ** kxldHeaderPtr = NULL; // do not free + struct mach_header * kxld_header = NULL; // xxx - need to free here? + OSData * theExecutable = NULL; // do not release + OSString * versString = NULL; // do not release + const char * versCString = NULL; // do not free + const char * string = NULL; // do not free + unsigned int i; + + /* We need the version string for a variety of bits below. + */ + versString = OSDynamicCast(OSString, + getPropertyForHostArch(kCFBundleVersionKey)); + if (!versString) { + goto finish; + } + versCString = versString->getCStringNoCopy(); + + if (isKernelComponent()) { + if (STRING_HAS_PREFIX(versCString, KERNEL_LIB_PREFIX)) { + if (strncmp(versCString, KERNEL6_VERSION, strlen(KERNEL6_VERSION))) { + OSKextLog(this, + kOSKextLogErrorLevel | + kOSKextLogLoadFlag, + "Kernel component %s has incorrect version %s; " + "expected %s.", + getIdentifierCString(), + versCString, KERNEL6_VERSION); + result = kOSKextReturnInternalError; + goto finish; + } else if (strcmp(versCString, osrelease)) { + OSKextLog(this, + kOSKextLogErrorLevel | + kOSKextLogLoadFlag, + "Kernel component %s has incorrect version %s; " + "expected %s.", + getIdentifierCString(), + versCString, osrelease); + result = kOSKextReturnInternalError; + goto finish; + } + } + } + + if (isPrelinked()) { + goto register_kmod; + } + + theExecutable = getExecutable(); + if (!theExecutable) { + if (declaresExecutable()) { + OSKextLog(this, + kOSKextLogErrorLevel | + kOSKextLogLoadFlag, + "Can't load kext %s - executable is missing.", + getIdentifierCString()); + result = kOSKextReturnValidation; + goto finish; + } + goto register_kmod; + } + + if (isKernelComponent()) { + num_kxlddeps = 1; // the kernel itself + } else { + num_kxlddeps = getNumDependencies(); + } + if (!num_kxlddeps) { + OSKextLog(this, + kOSKextLogErrorLevel | + kOSKextLogLoadFlag | kOSKextLogDependenciesFlag, + "Can't load kext %s - it has no library dependencies.", + getIdentifierCString()); + goto finish; + } + kxlddeps = (u_char **)kalloc(num_kxlddeps * sizeof(*kxlddeps)); + if (!kxlddeps) { + OSKextLog(this, + kOSKextLogErrorLevel | + kOSKextLogLoadFlag | kOSKextLogLinkFlag, + "Can't allocate link context to load kext %s.", + getIdentifierCString()); + goto finish; + } + + if (isKernelComponent()) { + OSData * kernelLinkState = OSKext::getKernelLinkState(); + kxlddeps[0] = (u_char *)kernelLinkState->getBytesNoCopy(); + } else for (i = 0; i < num_kxlddeps; i++) { + OSKext * dependency = OSDynamicCast(OSKext, dependencies->getObject(i)); + if (!dependency->linkState) { + // xxx - maybe we should panic here + OSKextLog(this, + kOSKextLogErrorLevel | + kOSKextLogLoadFlag | kOSKextLogLinkFlag, + "Can't load kext %s - link state missing.", + getIdentifierCString()); + goto finish; + } + kxlddeps[i] = (u_char *)dependency->linkState->getBytesNoCopy(); + assert(kxlddeps[i]); + } + + /* We only need link state for a library kext. + */ + if (compatibleVersion > -1 && (declaresExecutable() || isKernelComponent())) { + linkStateBytesPtr = &linkStateBytes; + linkStateLengthPtr = &linkStateLength; + } + + /* We only need the linked executable for a real kext. + */ + if (!isInterface()) { + kxldHeaderPtr = &kxld_header; + } + +#if DEBUG + OSKextLog(this, + kOSKextLogExplicitLevel | + kOSKextLogLoadFlag | kOSKextLogLinkFlag, + "Kext %s - calling kxld_link_file:\n" + " kxld_context: %p\n" + " executable: %p executable_length: %d\n" + " user_data: %p\n" + " kxld_dependencies: %p num_dependencies: %d\n" + " kxld_header_ptr: %p kmod_info_ptr: %p\n" + " link_state_ptr: %p link_state_length_ptr: %p", + getIdentifierCString(), kxldContext, + theExecutable->getBytesNoCopy(), theExecutable->getLength(), + this, kxlddeps, num_kxlddeps, + kxldHeaderPtr, kernelKmodInfoPtr, + linkStateBytesPtr, linkStateLengthPtr); +#endif + + /* After this call, the linkedExecutable instance variable + * should exist. + */ + kxldResult = kxld_link_file(sKxldContext, + (u_char *)theExecutable->getBytesNoCopy(), + theExecutable->getLength(), + getIdentifierCString(), this, kxlddeps, num_kxlddeps, + (u_char **)kxldHeaderPtr, (kxld_addr_t *)&kmod_info, + linkStateBytesPtr, linkStateLengthPtr, + /* symbolFile */ NULL, /* symbolFileSize */ NULL); + + if (kxldResult != KERN_SUCCESS) { + // xxx - add kxldResult here? + OSKextLog(this, + kOSKextLogErrorLevel | + kOSKextLogLoadFlag, + "Can't load kext %s - link failed.", + getIdentifierCString()); + result = kOSKextReturnLinkError; + goto finish; + } + + /* If we got a link state, wrap it in an OSData and keep it + * around for later use linking other kexts that depend on this kext. + */ + if (linkStateBytes && linkStateLength > 0) { + linkState = OSData::withBytesNoCopy(linkStateBytes, linkStateLength); + assert(linkState); + linkState->setDeallocFunction(&osdata_kmem_free); + } + + /* If this isn't an interface, We've written data & instructions into kernel + * memory, so flush the data cache and invalidate the instruction cache. + */ + if (!isInterface()) { + flush_dcache(kmod_info->address, kmod_info->size, false); + invalidate_icache(kmod_info->address, kmod_info->size, false); + } + +register_kmod: + + if (isInterface()) { + + /* Whip up a fake kmod_info entry for the interface kext. + */ + kmod_info = (kmod_info_t *)kalloc(sizeof(kmod_info_t)); + if (!kmod_info) { + result = KERN_MEMORY_ERROR; + goto finish; + } + + /* A pseudokext has almost nothing in its kmod_info struct. + */ + bzero(kmod_info, sizeof(kmod_info_t)); + + kmod_info->info_version = KMOD_INFO_VERSION; + + /* An interface kext doesn't have a linkedExecutable, so save a + * copy of the UUID out of the original executable via copyUUID() + * while we still have the original executable. + */ + interfaceUUID = copyUUID(); + } + + kmod_info->id = loadTag = sNextLoadTag++; + kmod_info->reference_count = 0; // KMOD_DECL... sets it to -1 (invalid). + + /* Stamp the bundle ID and version from the OSKext over anything + * resident inside the kmod_info. + */ + string = getIdentifierCString(); + strlcpy(kmod_info->name, string, sizeof(kmod_info->name)); + + string = versCString; + strlcpy(kmod_info->version, string, sizeof(kmod_info->version)); + + /* Add the dependencies' kmod_info structs as kmod_references. + */ + num_kmod_refs = getNumDependencies(); + if (num_kmod_refs) { + kmod_info->reference_list = (kmod_reference_t *)kalloc( + num_kmod_refs * sizeof(kmod_reference_t)); + if (!kmod_info->reference_list) { + result = KERN_MEMORY_ERROR; + goto finish; + } + bzero(kmod_info->reference_list, + num_kmod_refs * sizeof(kmod_reference_t)); + for (uint32_t refIndex = 0; refIndex < num_kmod_refs; refIndex++) { + kmod_reference_t * ref = &(kmod_info->reference_list[refIndex]); + OSKext * refKext = OSDynamicCast(OSKext, dependencies->getObject(refIndex)); + ref->info = refKext->kmod_info; + ref->info->reference_count++; + + if (refIndex + 1 < num_kmod_refs) { + ref->next = kmod_info->reference_list + refIndex + 1; + } + } + } + + if (!isInterface() && linkedExecutable) { + OSKextLog(this, + kOSKextLogProgressLevel | + kOSKextLogLoadFlag, + "Kext %s executable loaded; %u pages at 0x%lx (load tag %u).", + kmod_info->name, + (unsigned)kmod_info->size / PAGE_SIZE, + (unsigned long)kmod_info->address, + (unsigned)kmod_info->id); + } + + result = setVMProtections(); + if (result != KERN_SUCCESS) { + goto finish; + } + + result = kOSReturnSuccess; + +finish: + if (kxlddeps) kfree(kxlddeps, (num_kxlddeps * sizeof(void *))); + + /* We no longer need the unrelocated executable (which the linker + * has altered anyhow). + */ + setExecutable(NULL); + + if (result != kOSReturnSuccess) { + OSKextLog(this, + kOSKextLogErrorLevel | + kOSKextLogLoadFlag, + "Failed to load executable for kext %s.", + getIdentifierCString()); + + if (kmod_info && kmod_info->reference_list) { + kfree(kmod_info->reference_list, + num_kmod_refs * sizeof(kmod_reference_t)); + } + if (isInterface()) { + kfree(kmod_info, sizeof(kmod_info_t)); + } + kmod_info = NULL; + if (linkedExecutable) { + linkedExecutable->release(); + linkedExecutable = NULL; + } + } + + return result; +} + +/********************************************************************* +* xxx - initWithPrelinkedInfoDict doesn't use this +*********************************************************************/ +void +OSKext::setLinkedExecutable(OSData * anExecutable) +{ + if (linkedExecutable) { + panic("Attempt to set linked executable on kext " + "that already has one (%s).\n", + getIdentifierCString()); + } + linkedExecutable = anExecutable; + linkedExecutable->retain(); + return; +} + +/********************************************************************* +* called only by loadExecutable() +*********************************************************************/ +OSReturn +OSKext::setVMProtections(void) +{ + vm_map_t kext_map = NULL; + kernel_segment_command_t * seg = NULL; + vm_map_offset_t start = 0; + vm_map_offset_t end = 0; + OSReturn result = kOSReturnError; + + if (!kmod_info->address && !kmod_info->size) { + result = kOSReturnSuccess; + goto finish; + } + + /* Get the kext's vm map */ + kext_map = kext_get_vm_map(kmod_info); + if (!kext_map) { + result = KERN_MEMORY_ERROR; + goto finish; + } + + /* XXX: On arm, the vme covering the prelinked kernel (really, the whole + * range from 0xc0000000 to a little over 0xe0000000) has maxprot set to 0 + * so the vm_map_protect calls below fail + * I believe this happens in the call to vm_map_enter in kmem_init but I + * need to confirm. + */ + /* Protect the headers as read-only; they do not need to be wired */ + result = vm_map_protect(kext_map, kmod_info->address, + kmod_info->address + kmod_info->hdr_size, VM_PROT_READ, TRUE); + if (result != KERN_SUCCESS) { + goto finish; + } + + /* Set the VM protections and wire down each of the segments */ + seg = firstsegfromheader((kernel_mach_header_t *)kmod_info->address); + while (seg) { + start = round_page(seg->vmaddr); + end = trunc_page(seg->vmaddr + seg->vmsize); + + result = vm_map_protect(kext_map, start, end, seg->maxprot, TRUE); + if (result != KERN_SUCCESS) { + OSKextLog(this, + kOSKextLogErrorLevel | + kOSKextLogLoadFlag, + "Kext %s failed to set maximum VM protections " + "for segment %s - 0x%x.", + getIdentifierCString(), seg->segname, (int)result); + goto finish; + } + + result = vm_map_protect(kext_map, start, end, seg->initprot, FALSE); + if (result != KERN_SUCCESS) { + OSKextLog(this, + kOSKextLogErrorLevel | + kOSKextLogLoadFlag, + "Kext %s failed to set initial VM protections " + "for segment %s - 0x%x.", + getIdentifierCString(), seg->segname, (int)result); + goto finish; + } + + result = vm_map_wire(kext_map, start, end, seg->initprot, FALSE); + if (result != KERN_SUCCESS) { + goto finish; + } + + seg = nextsegfromheader((kernel_mach_header_t *) kmod_info->address, seg); + } + +finish: + return result; +} + +/********************************************************************* +*********************************************************************/ +OSReturn +OSKext::validateKextMapping(bool startFlag) +{ + OSReturn result = kOSReturnError; + const char * whichOp = startFlag ? "start" : "stop"; + kern_return_t kern_result = 0; + vm_map_t kext_map = NULL; + mach_vm_address_t address = 0; + mach_vm_size_t size = 0; + uint32_t depth = 0; + mach_msg_type_number_t count; + vm_region_submap_short_info_data_64_t info; + + count = VM_REGION_SUBMAP_SHORT_INFO_COUNT_64; + bzero(&info, sizeof(info)); + + // xxx - do we need a distinct OSReturn value for these or is "bad data" + // xxx - sufficient? + + /* Verify that the kmod_info and start/stop pointers are non-NULL. + */ + if (!kmod_info) { + OSKextLog(this, + kOSKextLogErrorLevel | + kOSKextLogLoadFlag, + "Kext %s - NULL kmod_info pointer.", + getIdentifierCString()); + result = kOSKextReturnBadData; + goto finish; + } + + if (startFlag) { + address = (mach_vm_address_t)kmod_info->start; + } else { + address = (mach_vm_address_t)kmod_info->stop; + } + + if (!address) { + OSKextLog(this, + kOSKextLogErrorLevel | + kOSKextLogLoadFlag, + "Kext %s - NULL module %s pointer.", + getIdentifierCString(), whichOp); + result = kOSKextReturnBadData; + goto finish; + } + + kext_map = kext_get_vm_map(kmod_info); + depth = (kernel_map == kext_map) ? 1 : 2; + + /* Verify that the start/stop function lies within the kext's address range. + */ + if (address < kmod_info->address + kmod_info->hdr_size || + kmod_info->address + kmod_info->size <= address) + { + OSKextLog(this, + kOSKextLogErrorLevel | + kOSKextLogLoadFlag, + "Kext %s module %s pointer is outside of kext range " + "(%s %p - kext at %p-%p)..", + getIdentifierCString(), + whichOp, + whichOp, + (void *)address, + (void *)kmod_info->address, + (void *)(kmod_info->address + kmod_info->size)); + result = kOSKextReturnBadData; + goto finish; + } + + /* Only do these checks before calling the start function; + * If anything goes wrong with the mapping while the kext is running, + * we'll likely have panicked well before any attempt to stop the kext. + */ + if (startFlag) { + + /* Verify that the start/stop function is executable. + */ + kern_result = mach_vm_region_recurse(kernel_map, &address, &size, &depth, + (vm_region_recurse_info_t)&info, &count); + if (kern_result != KERN_SUCCESS) { + OSKextLog(this, + kOSKextLogErrorLevel | + kOSKextLogLoadFlag, + "Kext %s - bad %s pointer %p.", + getIdentifierCString(), + whichOp, (void *)address); + result = kOSKextReturnBadData; + goto finish; + } + + if (!(info.protection & VM_PROT_EXECUTE)) { + OSKextLog(this, + kOSKextLogErrorLevel | + kOSKextLogLoadFlag, + "Kext %s - memory region containing module %s function " + "is not executable.", + getIdentifierCString(), whichOp); + result = kOSKextReturnBadData; + goto finish; + } + + /* Verify that the kext is backed by physical memory. + */ + for (address = kmod_info->address; + address < round_page(kmod_info->address + kmod_info->size); + address += PAGE_SIZE) + { + if (!pmap_find_phys(kernel_pmap, (vm_offset_t)address)) { + OSKextLog(this, + kOSKextLogErrorLevel | + kOSKextLogLoadFlag, + "Kext %s - page %p is not backed by physical memory.", + getIdentifierCString(), + (void *)address); + result = kOSKextReturnBadData; + goto finish; + } + } + } + + result = kOSReturnSuccess; +finish: + return result; +} + +/********************************************************************* +*********************************************************************/ +OSReturn +OSKext::start(bool startDependenciesFlag) +{ + OSReturn result = kOSReturnError; + kern_return_t (* startfunc)(kmod_info_t *, void *); + unsigned int i, count; + void * kmodStartData = NULL; // special handling needed +#if CONFIG_MACF_KEXT + mach_msg_type_number_t kmodStartDataCount = 0; +#endif /* CONFIG_MACF_KEXT */ + + if (isStarted() || isInterface() || isKernelComponent()) { + result = kOSReturnSuccess; + goto finish; + } + + if (!isLoaded()) { + OSKextLog(this, + kOSKextLogErrorLevel | + kOSKextLogLoadFlag, + "Attempt to start nonloaded kext %s.", + getIdentifierCString()); + result = kOSKextReturnInvalidArgument; + goto finish; + } + + result = validateKextMapping(/* start? */ true); + if (result != kOSReturnSuccess) { + goto finish; + } + + startfunc = kmod_info->start; + + count = getNumDependencies(); + for (i = 0; i < count; i++) { + OSKext * dependency = OSDynamicCast(OSKext, dependencies->getObject(i)); + if (dependency == NULL) { + OSKextLog(this, + kOSKextLogErrorLevel | + kOSKextLogLoadFlag, + "Kext %s start - internal error, dependency disappeared.", + getIdentifierCString()); + goto finish; + } + if (!dependency->isStarted()) { + if (startDependenciesFlag) { + OSReturn dependencyResult = + dependency->start(startDependenciesFlag); + if (dependencyResult != KERN_SUCCESS) { + OSKextLog(this, + kOSKextLogErrorLevel | + kOSKextLogLoadFlag, + "Kext %s start - dependency %s failed to start (error 0x%x).", + getIdentifierCString(), + dependency->getIdentifierCString(), + dependencyResult); + goto finish; + } + } else { + OSKextLog(this, + kOSKextLogErrorLevel | + kOSKextLogLoadFlag, + "Not starting %s - dependency %s not started yet.", + getIdentifierCString(), + dependency->getIdentifierCString()); + result = kOSKextReturnStartStopError; // xxx - make new return? + goto finish; + } + } + } + +#if CONFIG_MACF_KEXT + /* See if the kext has any MAC framework module data in its plist. + * This is passed in as arg #2 of the kext's start routine, + * which is otherwise reserved for any other kext. + */ + kmodStartData = MACFCopyModuleDataForKext(this, &kmodStartDataCount); +#endif /* CONFIG_MACF_KEXT */ + + OSKextLog(this, + kOSKextLogDetailLevel | + kOSKextLogLoadFlag, + "Kext %s calling module start function.", + getIdentifierCString()); + + flags.starting = 1; + +#if !__i386__ && !__ppc__ + result = OSRuntimeInitializeCPP(kmod_info, NULL); + if (result == KERN_SUCCESS) { +#endif + + result = startfunc(kmod_info, kmodStartData); + +#if !__i386__ && !__ppc__ + if (result != KERN_SUCCESS) { + (void) OSRuntimeFinalizeCPP(kmod_info, NULL); + } + } +#endif + + flags.starting = 0; + + /* On success overlap the setting of started/starting. On failure just + * clear starting. + */ + if (result == KERN_SUCCESS) { + flags.started = 1; + + // xxx - log start error from kernel? + OSKextLog(this, + kOSKextLogProgressLevel | + kOSKextLogLoadFlag, + "Kext %s is now started.", + getIdentifierCString()); + } else { + invokeOrCancelRequestCallbacks( + /* result not actually used */ kOSKextReturnStartStopError, + /* invokeFlag */ false); + OSKextLog(this, + kOSKextLogProgressLevel | + kOSKextLogLoadFlag, + "Kext %s did not start (return code 0x%x).", + getIdentifierCString(), result); + } + +finish: +#if CONFIG_MACF_KEXT + /* Free the module data for a MAC framework kext. When we start using + * param #2 we'll have to distinguish and free/release appropriately. + * + * xxx - I'm pretty sure the old codepath freed the data and that it's + * xxx - up to the kext to copy it. + */ + if (kmodStartData) { + kmem_free(kernel_map, (vm_offset_t)kmodStartData, kmodStartDataCount); + } +#endif /* CONFIG_MACF_KEXT */ + + return result; +} + +/********************************************************************* +*********************************************************************/ +/* static */ +bool OSKext::canUnloadKextWithIdentifier( + OSString * kextIdentifier, + bool checkClassesFlag) +{ + bool result = false; + OSKext * aKext = NULL; // do not release + + IORecursiveLockLock(sKextLock); + + aKext = OSDynamicCast(OSKext, sKextsByID->getObject(kextIdentifier)); + + if (!aKext) { + goto finish; // can't unload what's not loaded + } + + if (aKext->isLoaded()) { + if (aKext->getRetainCount() > kOSKextMinLoadedRetainCount) { + goto finish; + } + if (checkClassesFlag && aKext->hasOSMetaClassInstances()) { + goto finish; + } + } + + result = true; + +finish: + IORecursiveLockUnlock(sKextLock); + return result; +} + +/********************************************************************* +*********************************************************************/ +OSReturn +OSKext::stop(void) +{ + OSReturn result = kOSReturnError; + kern_return_t (*stopfunc)(kmod_info_t *, void *); + + if (!isStarted() || isInterface()) { + result = kOSReturnSuccess; + goto finish; + } + + if (!isLoaded()) { + OSKextLog(this, + kOSKextLogErrorLevel | + kOSKextLogLoadFlag, + "Attempt to stop nonloaded kext %s.", + getIdentifierCString()); + result = kOSKextReturnInvalidArgument; + goto finish; + } + + /* Refuse to stop if we have clients or instances. It is up to + * the caller to make sure those aren't true. + */ + if (getRetainCount() > kOSKextMinLoadedRetainCount) { + OSKextLog(this, + kOSKextLogErrorLevel | + kOSKextLogLoadFlag, + "Kext %s - C++ instances; can't stop.", + getIdentifierCString()); + result = kOSKextReturnInUse; + goto finish; + } + + if (getRetainCount() > kOSKextMinLoadedRetainCount) { + + OSKextLog(this, + kOSKextLogErrorLevel | + kOSKextLogLoadFlag, + "Kext %s - has references (linkage or tracking object); " + "can't stop.", + getIdentifierCString()); + result = kOSKextReturnInUse; + goto finish; + } + + /* Note: If validateKextMapping fails on the stop & unload path, + * we are in serious trouble and a kernel panic is likely whether + * we stop & unload the kext or not. + */ + result = validateKextMapping(/* start? */ false); + if (result != kOSReturnSuccess) { + goto finish; + } + + /* Save the list of loaded kexts in case we panic. + */ + OSKext::saveUnloadedKextPanicList(this); + + stopfunc = kmod_info->stop; + if (stopfunc) { + OSKextLog(this, + kOSKextLogDetailLevel | + kOSKextLogLoadFlag, + "Kext %s calling module stop function.", + getIdentifierCString()); + + flags.stopping = 1; + + result = stopfunc(kmod_info, /* userData */ NULL); +#if !__i386__ && !__ppc__ + if (result == KERN_SUCCESS) { + result = OSRuntimeFinalizeCPP(kmod_info, NULL); + } +#endif + + flags.stopping = 0; + + if (result == KERN_SUCCESS) { + flags.started = 0; + + OSKextLog(this, + kOSKextLogDetailLevel | + kOSKextLogLoadFlag, + "Kext %s is now stopped and ready to unload.", + getIdentifierCString()); + } else { + OSKextLog(this, + kOSKextLogErrorLevel | + kOSKextLogLoadFlag, + "Kext %s did not stop (return code 0x%x).", + getIdentifierCString(), result); + result = kOSKextReturnStartStopError; + } + } + +finish: + return result; +} + +/********************************************************************* +*********************************************************************/ +OSReturn +OSKext::unload(void) +{ + OSReturn result = kOSReturnError; + unsigned int index; + uint32_t num_kmod_refs = 0; + + if (!sUnloadEnabled) { + OSKextLog(this, + kOSKextLogErrorLevel | + kOSKextLogLoadFlag, + "Kext unloading is disabled (%s).", + this->getIdentifierCString()); + + result = kOSKextReturnDisabled; + goto finish; + } + + /* Refuse to unload if we have clients or instances. It is up to + * the caller to make sure those aren't true. + */ + if (getRetainCount() > kOSKextMinLoadedRetainCount) { + // xxx - Don't log under errors? this is more of an info thing + OSKextLog(this, + kOSKextLogErrorLevel | + kOSKextLogKextBookkeepingFlag, + "Can't unload kext %s; outstanding references (linkage or tracking object).", + getIdentifierCString()); + result = kOSKextReturnInUse; + goto finish; + } + + + if (hasOSMetaClassInstances()) { + OSKextLog(this, + kOSKextLogErrorLevel | + kOSKextLogLoadFlag | kOSKextLogKextBookkeepingFlag, + "Can't unload kext %s; classes have instances:", + getIdentifierCString()); + reportOSMetaClassInstances(kOSKextLogErrorLevel | + kOSKextLogLoadFlag | kOSKextLogKextBookkeepingFlag); + result = kOSKextReturnInUse; + goto finish; + } + + if (!isLoaded()) { + result = kOSReturnSuccess; + goto finish; + } + + if (isKernelComponent()) { + result = kOSKextReturnInvalidArgument; + goto finish; + } + + /* Note that the kext is unloading before running any code that + * might be in the kext (request callbacks, module stop function). + * We will deny certain requests made against a kext in the process + * of unloading. + */ + flags.unloading = 1; + + if (isStarted()) { + result = stop(); + if (result != KERN_SUCCESS) { + OSKextLog(this, + kOSKextLogErrorLevel | + kOSKextLogLoadFlag, + "Kext %s can't unload - module stop returned 0x%x.", + getIdentifierCString(), (unsigned)result); + result = kOSKextReturnStartStopError; + goto finish; + } + } + + OSKextLog(this, + kOSKextLogProgressLevel | + kOSKextLogLoadFlag, + "Kext %s unloading.", + getIdentifierCString()); + + /* Even if we don't call the stop function, we want to be sure we + * have no OSMetaClass references before unloading the kext executable + * from memory. OSMetaClasses may have pointers into the kext executable + * and that would cause a panic on OSKext::free() when metaClasses is freed. + */ + if (metaClasses) { + metaClasses->flushCollection(); + } + + /* Remove the kext from the list of loaded kexts, patch the gap + * in the kmod_info_t linked list, and reset "kmod" to point to the + * last loaded kext that isn't the fake kernel kext (sKernelKext). + */ + index = sLoadedKexts->getNextIndexOfObject(this, 0); + if (index != (unsigned int)-1) { + + sLoadedKexts->removeObject(index); + + OSKext * nextKext = OSDynamicCast(OSKext, + sLoadedKexts->getObject(index)); + + if (nextKext) { + if (index > 0) { + OSKext * gapKext = OSDynamicCast(OSKext, + sLoadedKexts->getObject(index - 1)); + + nextKext->kmod_info->next = gapKext->kmod_info; + + } else /* index == 0 */ { + nextKext->kmod_info->next = NULL; + } + } + + OSKext * lastKext = OSDynamicCast(OSKext, sLoadedKexts->getLastObject()); + if (lastKext && lastKext != sKernelKext) { + kmod = lastKext->kmod_info; + } else { + kmod = NULL; // clear the global kmod variable + } + } + + /* Clear out the kmod references that we're keeping for compatibility + * with current panic backtrace code & kgmacros. + * xxx - will want to update those bits sometime and remove this. + */ + num_kmod_refs = getNumDependencies(); + if (num_kmod_refs && kmod_info && kmod_info->reference_list) { + for (uint32_t refIndex = 0; refIndex < num_kmod_refs; refIndex++) { + kmod_reference_t * ref = &(kmod_info->reference_list[refIndex]); + ref->info->reference_count--; + } + kfree(kmod_info->reference_list, + num_kmod_refs * sizeof(kmod_reference_t)); + } + + /* If we have a linked executable, release & clear it, and then + * unwire & deallocate the buffer the OSData wrapped. + */ + if (linkedExecutable) { + vm_map_t kext_map; + + /* linkedExecutable is just a wrapper for the executable and doesn't + * free it. + */ + linkedExecutable->release(); + linkedExecutable = NULL; + + OSKextLog(this, + kOSKextLogProgressLevel | + kOSKextLogLoadFlag, + "Kext %s unwiring and unmapping linked executable.", + getIdentifierCString()); + + kext_map = kext_get_vm_map(kmod_info); + if (kext_map) { + // xxx - do we have to do this before freeing? Why can't we just free it? + // xxx - we should be able to set a dealloc func on the linkedExecutable + result = vm_map_unwire(kext_map, + kmod_info->address + kmod_info->hdr_size, + kmod_info->address + kmod_info->size, FALSE); + if (result == KERN_SUCCESS) { + kext_free(kmod_info->address, kmod_info->size); + } + } + } + + /* An interface kext has a fake kmod_info that was allocated, + * so we have to free it. + */ + if (isInterface()) { + kfree(kmod_info, sizeof(kmod_info_t)); + } + + kmod_info = NULL; + + flags.loaded = false; + flushDependencies(); + + OSKextLog(this, + kOSKextLogProgressLevel | kOSKextLogLoadFlag, + "Kext %s unloaded.", getIdentifierCString()); + +finish: + OSKext::saveLoadedKextPanicList(); + + flags.unloading = 0; + return result; +} + +/********************************************************************* +*********************************************************************/ +static void +_OSKextConsiderDestroyingLinkContext( + __unused thread_call_param_t p0, + __unused thread_call_param_t p1) +{ + /* Once both recursive locks are taken in correct order, we shouldn't + * have to worry about further recursive lock takes. + */ + IORecursiveLockLock(sKextLock); + IORecursiveLockLock(sKextInnerLock); + + /* The first time we destroy the kxldContext is in the first + * OSKext::considerUnloads() call, which sets sConsiderUnloadsCalled + * before calling this function. Thereafter any call to this function + * will actually destroy the context. + */ + if (sConsiderUnloadsCalled && sKxldContext) { + kxld_destroy_context(sKxldContext); + sKxldContext = NULL; + } + + /* Free the thread_call that was allocated to execute this function. + */ + if (sDestroyLinkContextThread) { + if (!thread_call_free(sDestroyLinkContextThread)) { + OSKextLog(/* kext */ NULL, + kOSKextLogErrorLevel | + kOSKextLogGeneralFlag, + "thread_call_free() failed for kext link context."); + } + sDestroyLinkContextThread = 0; + } + + IORecursiveLockUnlock(sKextInnerLock); + IORecursiveLockUnlock(sKextLock); + + return; +} + +/********************************************************************* +* Destroying the kxldContext requires checking variables under both +* sKextInnerLock and sKextLock, so we do it on a separate thread +* to avoid deadlocks with IOService, with which OSKext has a reciprocal +* call relationship. +* +* Do not call any function that takes sKextLock here! This function +* can be invoked with sKextInnerLock, and the two must always +* be taken in the order: sKextLock -> sKextInnerLock. +*********************************************************************/ +/* static */ +void +OSKext::considerDestroyingLinkContext(void) +{ + IORecursiveLockLock(sKextInnerLock); + + /* If we have already queued a thread to destroy the link context, + * don't bother resetting; that thread will take care of it. + */ + if (sDestroyLinkContextThread) { + goto finish; + } + + /* The function to be invoked in the thread will deallocate + * this thread_call, so don't share it around. + */ + sDestroyLinkContextThread = thread_call_allocate( + &_OSKextConsiderDestroyingLinkContext, 0); + if (!sDestroyLinkContextThread) { + OSKextLog(/* kext */ NULL, + kOSKextLogErrorLevel | kOSKextLogGeneralFlag | kOSKextLogLinkFlag, + "Can't create thread to destroy kext link context."); + goto finish; + } + + thread_call_enter(sDestroyLinkContextThread); + +finish: + IORecursiveLockUnlock(sKextInnerLock); + return; +} + +/********************************************************************* +*********************************************************************/ +OSData * +OSKext::getKernelLinkState() +{ + kern_return_t kxldResult; + u_char * kernel = NULL; + size_t kernelLength; + u_char * linkStateBytes = NULL; + u_long linkStateLength; + OSData * linkState = NULL; + + if (sKernelKext && sKernelKext->linkState) { + goto finish; + } + + kernel = (u_char *)&_mh_execute_header; + kernelLength = getlastaddr() - (vm_offset_t)kernel; + + kxldResult = kxld_link_file(sKxldContext, + kernel, + kernelLength, + kOSKextKernelIdentifier, + /* callbackData */ NULL, + /* dependencies */ NULL, + /* numDependencies */ 0, + /* linkedObjectOut */ NULL, + /* kmod_info_kern out */ NULL, + &linkStateBytes, + &linkStateLength, + /* symbolFile */ NULL, + /* symbolFileSize */ NULL); + if (kxldResult) { + panic("Can't generate kernel link state; no kexts can be loaded."); + goto finish; + } + + linkState = OSData::withBytesNoCopy(linkStateBytes, linkStateLength); + linkState->setDeallocFunction(&osdata_kmem_free); + sKernelKext->linkState = linkState; + +finish: + return sKernelKext->linkState; +} + +#if PRAGMA_MARK +#pragma mark Autounload +#endif +/********************************************************************* +* This is a static method because the kext will be deallocated if it +* does unload! +*********************************************************************/ +OSReturn +OSKext::autounloadKext(OSKext * aKext) +{ + OSReturn result = kOSKextReturnInUse; + + /* Check for external references to this kext (usu. dependents), + * instances of defined classes (or classes derived from them), + * outstanding requests. + */ + if ((aKext->getRetainCount() > kOSKextMinLoadedRetainCount) || + !aKext->flags.autounloadEnabled || + aKext->isKernelComponent()) { + + goto finish; + } + + /* Skip a delay-autounload kext, once. + */ + if (aKext->flags.delayAutounload) { + OSKextLog(aKext, + kOSKextLogProgressLevel | + kOSKextLogLoadFlag | kOSKextLogKextBookkeepingFlag, + "Kext %s has delayed autounload set; skipping and clearing flag.", + aKext->getIdentifierCString()); + aKext->flags.delayAutounload = 0; + goto finish; + } + + if (aKext->hasOSMetaClassInstances() || + aKext->countRequestCallbacks()) { + goto finish; + } + + result = OSKext::removeKext(aKext); + +finish: + + return result; +} + +/********************************************************************* +*********************************************************************/ +void +_OSKextConsiderUnloads( + __unused thread_call_param_t p0, + __unused thread_call_param_t p1) +{ + bool didUnload = false; + unsigned int count, i; + + /* Once both recursive locks are taken in correct order, we shouldn't + * have to worry about further recursive lock takes. + */ + IORecursiveLockLock(sKextLock); + IORecursiveLockLock(sKextInnerLock); + + OSKext::flushNonloadedKexts(/* flushPrelinkedKexts */ true); + + /* If the system is powering down, don't try to unload anything. + */ + if (sSystemSleep) { + goto finish; + } + + OSKextLog(/* kext */ NULL, + kOSKextLogProgressLevel | + kOSKextLogLoadFlag, + "Checking for unused kexts to autounload."); + + /***** + * Remove any request callbacks marked as stale, + * and mark as stale any currently in flight. + */ + count = sRequestCallbackRecords->getCount(); + if (count) { + i = count - 1; + do { + OSDictionary * callbackRecord = OSDynamicCast(OSDictionary, + sRequestCallbackRecords->getObject(i)); + OSBoolean * stale = OSDynamicCast(OSBoolean, + callbackRecord->getObject(kKextRequestStaleKey)); + + if (stale && stale->isTrue()) { + OSKext::invokeRequestCallback(callbackRecord, + kOSKextReturnTimeout); + } else { + callbackRecord->setObject(kKextRequestStaleKey, + kOSBooleanTrue); + } + } while (i--); + } + + /***** + * Make multiple passes through the array of loaded kexts until + * we don't unload any. This handles unwinding of dependency + * chains. We have to go *backwards* through the array because + * kexts are removed from it when unloaded, and we cannot make + * a copy or we'll mess up the retain counts we rely on to + * check whether a kext will unload. If only we could have + * nonretaining collections like CF has.... + */ + do { + didUnload = false; + + count = sLoadedKexts->getCount(); + if (count) { + i = count - 1; + do { + OSKext * thisKext = OSDynamicCast(OSKext, + sLoadedKexts->getObject(i)); + didUnload = (kOSReturnSuccess == OSKext::autounloadKext(thisKext)); + } while (i--); + } + } while (didUnload); + +finish: + sConsiderUnloadsPending = false; + sConsiderUnloadsExecuted = true; + + (void) OSKext::considerRebuildOfPrelinkedKernel(); + + IORecursiveLockUnlock(sKextInnerLock); + IORecursiveLockUnlock(sKextLock); + + return; +} + +/********************************************************************* +* Do not call any function that takes sKextLock here! +*********************************************************************/ +void OSKext::considerUnloads(Boolean rescheduleOnlyFlag) +{ + AbsoluteTime when; + + IORecursiveLockLock(sKextInnerLock); + + if (!sUnloadCallout) { + sUnloadCallout = thread_call_allocate(&_OSKextConsiderUnloads, 0); + } + + if (rescheduleOnlyFlag && !sConsiderUnloadsPending) { + goto finish; + } + + thread_call_cancel(sUnloadCallout); + if (OSKext::getAutounloadEnabled() && !sSystemSleep) { + clock_interval_to_deadline(sConsiderUnloadDelay, + 1000 * 1000 * 1000, &when); + + OSKextLog(/* kext */ NULL, + kOSKextLogProgressLevel | + kOSKextLogLoadFlag, + "%scheduling %sscan for unused kexts in %lu seconds.", + sConsiderUnloadsPending ? "Res" : "S", + sConsiderUnloadsCalled ? "" : "initial ", + (unsigned long)sConsiderUnloadDelay); + + sConsiderUnloadsPending = true; + thread_call_enter_delayed(sUnloadCallout, when); + } + +finish: + /* The kxld context should be reused throughout boot. We mark the end of + * period as the first time considerUnloads() is called, and we destroy + * the first kxld context in that function. Afterwards, it will be + * destroyed in flushNonloadedKexts. + */ + if (!sConsiderUnloadsCalled) { + sConsiderUnloadsCalled = true; + OSKext::considerDestroyingLinkContext(); + } + + IORecursiveLockUnlock(sKextInnerLock); + return; +} + +/********************************************************************* +* Do not call any function that takes sKextLock here! +*********************************************************************/ +extern "C" { + +IOReturn OSKextSystemSleepOrWake(UInt32 messageType) +{ + IORecursiveLockLock(sKextInnerLock); + + /* If the system is going to sleep, cancel the reaper thread timer, + * and note that we're in a sleep state in case it just fired but hasn't + * taken the lock yet. If we are coming back from sleep, just + * clear the sleep flag; IOService's normal operation will cause + * unloads to be considered soon enough. + */ + if (messageType == kIOMessageSystemWillSleep) { + if (sUnloadCallout) { + thread_call_cancel(sUnloadCallout); + } + sSystemSleep = true; + } else if (messageType == kIOMessageSystemHasPoweredOn) { + sSystemSleep = false; + } + IORecursiveLockUnlock(sKextInnerLock); + + return kIOReturnSuccess; +} + +}; + + +#if PRAGMA_MARK +#pragma mark Prelinked Kernel +#endif +/********************************************************************* +* Do not access sConsiderUnloads... variables other than +* sConsiderUnloadsExecuted in this function. They are guarded by a +* different lock. +*********************************************************************/ +/* static */ +void +OSKext::considerRebuildOfPrelinkedKernel(void) +{ + OSReturn checkResult = kOSReturnError; + static bool requestedPrelink = false; + OSDictionary * prelinkRequest = NULL; // must release + + IORecursiveLockLock(sKextLock); + + if (!sDeferredLoadSucceeded || !sConsiderUnloadsExecuted || + sSafeBoot || requestedPrelink) + { + goto finish; + } + + OSKextLog(/* kext */ NULL, + kOSKextLogProgressLevel | + kOSKextLogArchiveFlag, + "Requesting build of prelinked kernel."); + + checkResult = _OSKextCreateRequest(kKextRequestPredicateRequestPrelink, + &prelinkRequest); + if (checkResult != kOSReturnSuccess) { + goto finish; + } + + if (!sKernelRequests->setObject(prelinkRequest)) { + goto finish; + } + + OSKextPingKextd(); + requestedPrelink = true; + +finish: + IORecursiveLockUnlock(sKextLock); + OSSafeRelease(prelinkRequest); + return; +} + +#if PRAGMA_MARK +#pragma mark Dependencies +#endif +/********************************************************************* +*********************************************************************/ +bool +OSKext::resolveDependencies( + OSArray * loopStack) +{ + bool result = false; + OSArray * localLoopStack = NULL; // must release + bool addedToLoopStack = false; + OSDictionary * libraries = NULL; // do not release + OSCollectionIterator * libraryIterator = NULL; // must release + OSString * libraryID = NULL; // do not release + OSString * infoString = NULL; // do not release + OSString * readableString = NULL; // do not release + OSKext * libraryKext = NULL; // do not release + bool hasRawKernelDependency = false; + bool hasKernelDependency = false; + bool hasKPIDependency = false; + bool hasPrivateKPIDependency = false; + unsigned int count; + + /* A kernel component will automatically have this flag set, + * and a loaded kext should also have it set (as should all its + * loaded dependencies). + */ + if (flags.hasAllDependencies) { + result = true; + goto finish; + } + + /* Check for loops in the dependency graph. + */ + if (loopStack) { + if (loopStack->getNextIndexOfObject(this, 0) != (unsigned int)-1) { + OSKextLog(this, + kOSKextLogErrorLevel | + kOSKextLogDependenciesFlag, + "Kext %s has a dependency loop; can't resolve dependencies.", + getIdentifierCString()); + goto finish; + } + } else { + OSKextLog(this, + kOSKextLogStepLevel | + kOSKextLogDependenciesFlag, + "Kext %s resolving dependencies.", + getIdentifierCString()); + + loopStack = OSArray::withCapacity(6); // any small capacity will do + if (!loopStack) { + OSKextLog(this, + kOSKextLogErrorLevel | + kOSKextLogDependenciesFlag, + "Kext %s can't create bookkeeping stack to resolve dependencies.", + getIdentifierCString()); + goto finish; + } + localLoopStack = loopStack; + } + if (!loopStack->setObject(this)) { + OSKextLog(this, + kOSKextLogErrorLevel | + kOSKextLogDependenciesFlag, + "Kext %s - internal error resolving dependencies.", + getIdentifierCString()); + goto finish; + } + addedToLoopStack = true; + + /* Purge any existing kexts in the dependency list and start over. + */ + flushDependencies(); + if (dependencies) { + OSKextLog(this, + kOSKextLogErrorLevel | + kOSKextLogDependenciesFlag, + "Kext %s - internal error resolving dependencies.", + getIdentifierCString()); + } + + libraries = OSDynamicCast(OSDictionary, + getPropertyForHostArch(kOSBundleLibrariesKey)); + if (libraries == NULL || libraries->getCount() == 0) { + OSKextLog(this, + kOSKextLogErrorLevel | + kOSKextLogValidationFlag | kOSKextLogDependenciesFlag, + "Kext %s - can't resolve dependencies; %s missing/invalid type.", + getIdentifierCString(), kOSBundleLibrariesKey); + goto finish; + } + + /* Make a new array to hold the dependencies (flush freed the old one). + */ + dependencies = OSArray::withCapacity(libraries->getCount()); + if (!dependencies) { + OSKextLog(this, + kOSKextLogErrorLevel | + kOSKextLogDependenciesFlag, + "Kext %s - can't allocate dependencies array.", + getIdentifierCString()); + goto finish; + } + + // xxx - compat: We used to add an implicit dependency on kernel 6.0 + // xxx - compat: if none were declared. + + libraryIterator = OSCollectionIterator::withCollection(libraries); + if (!libraryIterator) { + OSKextLog(this, + kOSKextLogErrorLevel | + kOSKextLogDependenciesFlag, + "Kext %s - can't allocate dependencies iterator.", + getIdentifierCString()); + goto finish; + } + + while ((libraryID = OSDynamicCast(OSString, + libraryIterator->getNextObject()))) { + + const char * library_id = libraryID->getCStringNoCopy(); + + OSString * libraryVersion = OSDynamicCast(OSString, + libraries->getObject(libraryID)); + if (libraryVersion == NULL) { + OSKextLog(this, + kOSKextLogErrorLevel | + kOSKextLogValidationFlag | kOSKextLogDependenciesFlag, + "Kext %s - illegal type in OSBundleLibraries.", + getIdentifierCString()); + goto finish; + } + + OSKextVersion libraryVers = + OSKextParseVersionString(libraryVersion->getCStringNoCopy()); + if (libraryVers == -1) { + OSKextLog(this, + kOSKextLogErrorLevel | + kOSKextLogValidationFlag | kOSKextLogDependenciesFlag, + "Kext %s - invalid library version %s.", + getIdentifierCString(), + libraryVersion->getCStringNoCopy()); + goto finish; + } + + libraryKext = OSDynamicCast(OSKext, sKextsByID->getObject(libraryID)); + if (libraryKext == NULL) { + OSKextLog(this, + kOSKextLogErrorLevel | + kOSKextLogDependenciesFlag, + "Kext %s - library kext %s not found.", + getIdentifierCString(), library_id); + goto finish; + } + + if (!libraryKext->isCompatibleWithVersion(libraryVers)) { + OSKextLog(this, + kOSKextLogErrorLevel | + kOSKextLogDependenciesFlag, + "Kext %s - library kext %s not compatible " + "with requested version %s.", + getIdentifierCString(), library_id, + libraryVersion->getCStringNoCopy()); + goto finish; + } + + if (!libraryKext->resolveDependencies(loopStack)) { + goto finish; + } + + /* Add the library directly only if it has an executable to link. + * Otherwise it's just used to collect other dependencies, so put + * *its* dependencies on the list for this kext. + */ + // xxx - We are losing info here; would like to make fake entries or + // xxx - keep these in the dependency graph for loaded kexts. + // xxx - I really want to make kernel components not a special case! + if (libraryKext->declaresExecutable() || + libraryKext->isInterface()) { + + if (dependencies->getNextIndexOfObject(libraryKext, 0) == (unsigned)-1) { + dependencies->setObject(libraryKext); + + OSKextLog(this, + kOSKextLogDetailLevel | + kOSKextLogDependenciesFlag, + "Kext %s added dependency %s.", + getIdentifierCString(), + libraryKext->getIdentifierCString()); + } + } else { + int numLibDependencies = libraryKext->getNumDependencies(); + OSArray * libraryDependencies = libraryKext->getDependencies(); + int index; + + if (numLibDependencies) { + // xxx - this msg level should be 1 lower than the per-kext one + OSKextLog(this, + kOSKextLogDetailLevel | + kOSKextLogDependenciesFlag, + "Kext %s pulling %d dependencies from codeless library %s.", + getIdentifierCString(), + numLibDependencies, + libraryKext->getIdentifierCString()); + } + for (index = 0; index < numLibDependencies; index++) { + OSKext * thisLibDependency = OSDynamicCast(OSKext, + libraryDependencies->getObject(index)); + if (dependencies->getNextIndexOfObject(thisLibDependency, 0) == (unsigned)-1) { + dependencies->setObject(thisLibDependency); + OSKextLog(this, + kOSKextLogDetailLevel | + kOSKextLogDependenciesFlag, + "Kext %s added dependency %s from codeless library %s.", + getIdentifierCString(), + thisLibDependency->getIdentifierCString(), + libraryKext->getIdentifierCString()); + } + } + } + + if ((strlen(library_id) == strlen(KERNEL_LIB)) && + 0 == strncmp(library_id, KERNEL_LIB, sizeof(KERNEL_LIB)-1)) { + + hasRawKernelDependency = true; + } else if (STRING_HAS_PREFIX(library_id, KERNEL_LIB_PREFIX)) { + hasKernelDependency = true; + } else if (STRING_HAS_PREFIX(library_id, KPI_LIB_PREFIX)) { + hasKPIDependency = true; + if (!strncmp(library_id, PRIVATE_KPI, sizeof(PRIVATE_KPI)-1)) { + hasPrivateKPIDependency = true; + } + } + } + +#if __LP64__ + if (hasRawKernelDependency || hasKernelDependency) { + OSKextLog(this, + kOSKextLogErrorLevel | + kOSKextLogValidationFlag | kOSKextLogDependenciesFlag, + "Error - kext %s declares %s dependencies. " + "Only %s* dependencies are supported for 64-bit kexts.", + getIdentifierCString(), KERNEL_LIB, KPI_LIB_PREFIX); + goto finish; + } + if (!hasKPIDependency) { + OSKextLog(this, + kOSKextLogWarningLevel | + kOSKextLogDependenciesFlag, + "Warning - kext %s declares no %s* dependencies. " + "If it uses any KPIs, the link may fail with undefined symbols.", + getIdentifierCString(), KPI_LIB_PREFIX); + } +#else /* __LP64__ */ + // xxx - will change to flatly disallow "kernel" dependencies at some point + // xxx - is it invalid to do both "com.apple.kernel" and any + // xxx - "com.apple.kernel.*"? + + if (hasRawKernelDependency && hasKernelDependency) { + OSKextLog(this, + kOSKextLogErrorLevel | + kOSKextLogValidationFlag | kOSKextLogDependenciesFlag, + "Error - kext %s declares dependencies on both " + "%s and %s.", + getIdentifierCString(), KERNEL_LIB, KERNEL6_LIB); + goto finish; + } + + if ((hasRawKernelDependency || hasKernelDependency) && hasKPIDependency) { + OSKextLog(this, + kOSKextLogWarningLevel | + kOSKextLogDependenciesFlag, + "Warning - kext %s has immediate dependencies on both " + "%s* and %s* components; use only one style.", + getIdentifierCString(), KERNEL_LIB, KPI_LIB_PREFIX); + } + + if (!hasRawKernelDependency && !hasKernelDependency && !hasKPIDependency) { + // xxx - do we want to use validation flag for these too? + OSKextLog(this, + kOSKextLogWarningLevel | + kOSKextLogDependenciesFlag, + "Warning - %s declares no kernel dependencies; using %s.", + getIdentifierCString(), KERNEL6_LIB); + OSKext * kernelKext = OSDynamicCast(OSKext, + sKextsByID->getObject(KERNEL6_LIB)); + if (kernelKext) { + dependencies->setObject(kernelKext); + } else { + OSKextLog(this, + kOSKextLogErrorLevel | + kOSKextLogDependenciesFlag, + "Error - Library %s not found for %s.", + KERNEL6_LIB, getIdentifierCString()); + } + } + + /* If the kext doesn't have a raw kernel or KPI dependency, then add all of + * its indirect dependencies to simulate old-style linking. XXX - Should + * check for duplicates. + */ + if (!hasRawKernelDependency && !hasKPIDependency) { + unsigned int i; + + count = getNumDependencies(); + + /* We add to the dependencies array in this loop, but do not iterate + * past its original count. + */ + for (i = 0; i < count; i++) { + OSKext * dependencyKext = OSDynamicCast(OSKext, + dependencies->getObject(i)); + dependencyKext->addBleedthroughDependencies(dependencies); + } + } +#endif /* __LP64__ */ + + if (hasPrivateKPIDependency) { + bool hasApplePrefix = false; + bool infoCopyrightIsValid = false; + bool readableCopyrightIsValid = false; + + hasApplePrefix = STRING_HAS_PREFIX(getIdentifierCString(), + APPLE_KEXT_PREFIX); + + infoString = OSDynamicCast(OSString, + getPropertyForHostArch("CFBundleGetInfoString")); + if (infoString) { + infoCopyrightIsValid = + kxld_validate_copyright_string(infoString->getCStringNoCopy()); + } + + readableString = OSDynamicCast(OSString, + getPropertyForHostArch("NSHumanReadableCopyright")); + if (readableString) { + readableCopyrightIsValid = + kxld_validate_copyright_string(readableString->getCStringNoCopy()); + } + + if (!hasApplePrefix || (!infoCopyrightIsValid && !readableCopyrightIsValid)) { + OSKextLog(this, + kOSKextLogErrorLevel | + kOSKextLogDependenciesFlag, + "Error - kext %s declares a dependency on %s. " + "Only Apple kexts may declare a dependency on %s.", + getIdentifierCString(), PRIVATE_KPI, PRIVATE_KPI); + goto finish; + } + } + + result = true; + flags.hasAllDependencies = 1; + +finish: + + if (addedToLoopStack) { + count = loopStack->getCount(); + if (count > 0 && (this == loopStack->getObject(count - 1))) { + loopStack->removeObject(count - 1); + } else { + OSKextLog(this, + kOSKextLogErrorLevel | + kOSKextLogDependenciesFlag, + "Kext %s - internal error resolving dependencies.", + getIdentifierCString()); + } + } + + if (result && localLoopStack) { + OSKextLog(this, + kOSKextLogStepLevel | + kOSKextLogDependenciesFlag, + "Kext %s successfully resolved dependencies.", + getIdentifierCString()); + } + + OSSafeRelease(localLoopStack); + OSSafeRelease(libraryIterator); + + return result; +} + +/********************************************************************* +*********************************************************************/ +bool +OSKext::addBleedthroughDependencies(OSArray * anArray) +{ + bool result = false; + unsigned int dependencyIndex, dependencyCount; + + dependencyCount = getNumDependencies(); + + for (dependencyIndex = 0; + dependencyIndex < dependencyCount; + dependencyIndex++) { + + OSKext * dependency = OSDynamicCast(OSKext, + dependencies->getObject(dependencyIndex)); + if (!dependency) { + OSKextLog(this, + kOSKextLogErrorLevel | + kOSKextLogDependenciesFlag, + "Kext %s - internal error propagating compatibility dependencies.", + getIdentifierCString()); + goto finish; + } + if (anArray->getNextIndexOfObject(dependency, 0) == (unsigned int)-1) { + anArray->setObject(dependency); + } + dependency->addBleedthroughDependencies(anArray); + } + + result = true; + +finish: + return result; +} + +/********************************************************************* +*********************************************************************/ +bool +OSKext::flushDependencies(bool forceFlag) +{ + bool result = false; + + /* Only clear the dependencies if the kext isn't loaded; + * we need the info for loaded kexts to track references. + */ + if (!isLoaded() || forceFlag) { + if (dependencies) { + // xxx - check level + OSKextLog(this, + kOSKextLogProgressLevel | + kOSKextLogDependenciesFlag, + "Kext %s flushing dependencies.", + getIdentifierCString()); + OSSafeReleaseNULL(dependencies); + + } + if (!isKernelComponent()) { + flags.hasAllDependencies = 0; + } + result = true; + } + + return result; +} + +/********************************************************************* +*********************************************************************/ +uint32_t +OSKext::getNumDependencies(void) +{ + if (!dependencies) { + return 0; + } + return dependencies->getCount(); +} + +/********************************************************************* +*********************************************************************/ +OSArray * +OSKext::getDependencies(void) +{ + return dependencies; +} + +#if PRAGMA_MARK +#pragma mark OSMetaClass Support +#endif +/********************************************************************* +*********************************************************************/ +OSReturn +OSKext::addClass( + OSMetaClass * aClass, + uint32_t numClasses) +{ + OSReturn result = kOSMetaClassNoInsKModSet; + + if (!metaClasses) { + metaClasses = OSSet::withCapacity(numClasses); + if (!metaClasses) { + goto finish; + } + } + + if (metaClasses->containsObject(aClass)) { + OSKextLog(this, + kOSKextLogWarningLevel | + kOSKextLogLoadFlag, + "Notice - kext %s has already registered class %s.", + getIdentifierCString(), + aClass->getClassName()); + result = kOSReturnSuccess; + goto finish; + } + + if (!metaClasses->setObject(aClass)) { + goto finish; + } else { + OSKextLog(this, + kOSKextLogDetailLevel | + kOSKextLogLoadFlag, + "Kext %s registered class %s.", + getIdentifierCString(), + aClass->getClassName()); + } + + if (!flags.autounloadEnabled) { + const OSMetaClass * metaScan = NULL; // do not release + + for (metaScan = aClass; metaScan; metaScan = metaScan->getSuperClass()) { + if (metaScan == OSTypeID(IOService)) { + + OSKextLog(this, + kOSKextLogProgressLevel | + kOSKextLogLoadFlag, + "Kext %s has IOService subclass %s; enabling autounload.", + getIdentifierCString(), + aClass->getClassName()); + + flags.autounloadEnabled = 1; + break; + } + } + } + + result = kOSReturnSuccess; + +finish: + if (result != kOSReturnSuccess) { + OSKextLog(this, + kOSKextLogErrorLevel | + kOSKextLogLoadFlag, + "Kext %s failed to register class %s.", + getIdentifierCString(), + aClass->getClassName()); + } + + return result; +} + +/********************************************************************* +*********************************************************************/ +OSReturn +OSKext::removeClass( + OSMetaClass * aClass) +{ + OSReturn result = kOSMetaClassNoKModSet; + + if (!metaClasses) { + goto finish; + } + + if (!metaClasses->containsObject(aClass)) { + OSKextLog(this, + kOSKextLogWarningLevel | + kOSKextLogLoadFlag, + "Notice - kext %s asked to unregister unknown class %s.", + getIdentifierCString(), + aClass->getClassName()); + result = kOSReturnSuccess; + goto finish; + } + + OSKextLog(this, + kOSKextLogDetailLevel | + kOSKextLogLoadFlag, + "Kext %s unregistering class %s.", + getIdentifierCString(), + aClass->getClassName()); + + metaClasses->removeObject(aClass); + + result = kOSReturnSuccess; + +finish: + if (result != kOSReturnSuccess) { + OSKextLog(this, + kOSKextLogErrorLevel | + kOSKextLogLoadFlag, + "Failed to unregister kext %s class %s.", + getIdentifierCString(), + aClass->getClassName()); + } + return result; +} + +/********************************************************************* +*********************************************************************/ +OSSet * +OSKext::getMetaClasses(void) +{ + return metaClasses; +} + +/********************************************************************* +*********************************************************************/ +bool +OSKext::hasOSMetaClassInstances(void) +{ + bool result = false; + OSCollectionIterator * classIterator = NULL; // must release + OSMetaClass * checkClass = NULL; // do not release + + if (!metaClasses) { + goto finish; + } + + classIterator = OSCollectionIterator::withCollection(metaClasses); + if (!classIterator) { + // xxx - log alloc failure? + goto finish; + } + while ((checkClass = (OSMetaClass *)classIterator->getNextObject())) { + if (checkClass->getInstanceCount()) { + result = true; + goto finish; + } + } + +finish: + + OSSafeRelease(classIterator); + return result; +} + +/********************************************************************* +*********************************************************************/ +/* static */ +void +OSKext::reportOSMetaClassInstances( + const char * kextIdentifier, + OSKextLogSpec msgLogSpec) +{ + OSKext * theKext = NULL; // must release + + theKext = OSKext::lookupKextWithIdentifier(kextIdentifier); + if (!theKext) { + goto finish; + } + + theKext->reportOSMetaClassInstances(msgLogSpec); +finish: + OSSafeRelease(theKext); + return; +} + +/********************************************************************* +*********************************************************************/ +void +OSKext::reportOSMetaClassInstances(OSKextLogSpec msgLogSpec) +{ + OSCollectionIterator * classIterator = NULL; // must release + OSMetaClass * checkClass = NULL; // do not release + + if (!metaClasses) { + goto finish; + } + + classIterator = OSCollectionIterator::withCollection(metaClasses); + if (!classIterator) { + goto finish; + } + while ((checkClass = (OSMetaClass *)classIterator->getNextObject())) { + if (checkClass->getInstanceCount()) { + OSKextLog(this, + msgLogSpec, + " Kext %s class %s has %d instance%s.", + getIdentifierCString(), + checkClass->getClassName(), + checkClass->getInstanceCount(), + checkClass->getInstanceCount() == 1 ? "" : "s"); + } + } + +finish: + OSSafeRelease(classIterator); + return; +} + +#if PRAGMA_MARK +#pragma mark User-Space Requests +#endif +/********************************************************************* +* XXX - this function is a big ugly mess +*********************************************************************/ +/* static */ +OSReturn +OSKext::handleRequest( + host_priv_t hostPriv, + OSKextLogSpec clientLogFilter, + char * requestBuffer, + uint32_t requestLength, + char ** responseOut, + uint32_t * responseLengthOut, + char ** logInfoOut, + uint32_t * logInfoLengthOut) +{ + OSReturn result = kOSReturnError; + kern_return_t kmem_result = KERN_FAILURE; + + char * response = NULL; // returned by reference + uint32_t responseLength = 0; + + OSObject * parsedXML = NULL; // must release + OSDictionary * requestDict = NULL; // do not release + OSString * errorString = NULL; // must release + + OSData * responseData = NULL; // must release + OSObject * responseObject = NULL; // must release + + OSSerialize * serializer = NULL; // must release + + OSArray * logInfoArray = NULL; // must release + + OSString * predicate = NULL; // do not release + OSString * kextIdentifier = NULL; // do not release + OSArray * kextIdentifiers = NULL; // do not release + OSKext * theKext = NULL; // do not release + OSBoolean * boolArg = NULL; // do not release + + IORecursiveLockLock(sKextLock); + + if (responseOut) { + *responseOut = NULL; + *responseLengthOut = 0; + } + if (logInfoOut) { + *logInfoOut = NULL; + *logInfoLengthOut = 0; + } + + OSKext::setUserSpaceLogFilter(clientLogFilter, logInfoOut ? true : false); + + /* XML must be nul-terminated. + */ + if (requestBuffer[requestLength - 1] != '\0') { + OSKextLog(/* kext */ NULL, + kOSKextLogErrorLevel | + kOSKextLogIPCFlag, + "Invalid request from user space (not nul-terminated)."); + result = kOSKextReturnBadData; + goto finish; + } + parsedXML = OSUnserializeXML((const char *)requestBuffer, &errorString); + if (parsedXML) { + requestDict = OSDynamicCast(OSDictionary, parsedXML); + } + if (!requestDict) { + const char * errorCString = "(unknown error)"; + + if (errorString && errorString->getCStringNoCopy()) { + errorCString = errorString->getCStringNoCopy(); + } else if (parsedXML) { + errorCString = "not a dictionary"; + } + OSKextLog(/* kext */ NULL, + kOSKextLogErrorLevel | + kOSKextLogIPCFlag, + "Error unserializing request from user space: %s.", + errorCString); + result = kOSKextReturnSerialization; + goto finish; + } + + predicate = _OSKextGetRequestPredicate(requestDict); + if (!predicate) { + OSKextLog(/* kext */ NULL, + kOSKextLogErrorLevel | + kOSKextLogIPCFlag, + "Recieved kext request from user space with no predicate."); + result = kOSKextReturnInvalidArgument; + goto finish; + } + + OSKextLog(/* kext */ NULL, + kOSKextLogDebugLevel | + kOSKextLogIPCFlag, + "Received '%s' request from user space.", + predicate->getCStringNoCopy()); + + result = kOSKextReturnNotPrivileged; + if (hostPriv == HOST_PRIV_NULL) { + if (!predicate->isEqualTo(kKextRequestPredicateGetLoaded) && + !predicate->isEqualTo(kKextRequestPredicateGetKernelLinkState) && + !predicate->isEqualTo(kKextRequestPredicateGetKernelLoadAddress)) { + + goto finish; + } + } + + /* Get common args in anticipation of use. + */ + kextIdentifier = OSDynamicCast(OSString, _OSKextGetRequestArgument( + requestDict, kKextRequestArgumentBundleIdentifierKey)); + kextIdentifiers = OSDynamicCast(OSArray, _OSKextGetRequestArgument( + requestDict, kKextRequestArgumentBundleIdentifierKey)); + if (kextIdentifier) { + theKext = OSDynamicCast(OSKext, sKextsByID->getObject(kextIdentifier)); + } + boolArg = OSDynamicCast(OSBoolean, _OSKextGetRequestArgument( + requestDict, kKextRequestArgumentValueKey)); + + result = kOSKextReturnInvalidArgument; + + if (predicate->isEqualTo(kKextRequestPredicateStart)) { + if (!kextIdentifier) { + OSKextLog(/* kext */ NULL, + kOSKextLogErrorLevel | + kOSKextLogIPCFlag, + "Invalid arguments to kext start request."); + } else if (!theKext) { + OSKextLog(/* kext */ NULL, + kOSKextLogErrorLevel | + kOSKextLogIPCFlag, + "Kext %s not found for start request.", + kextIdentifier->getCStringNoCopy()); + result = kOSKextReturnNotFound; + } else { + result = theKext->start(); + } + + } else if (predicate->isEqualTo(kKextRequestPredicateStop)) { + if (!kextIdentifier) { + OSKextLog(/* kext */ NULL, + kOSKextLogErrorLevel | + kOSKextLogIPCFlag, + "Invalid arguments to kext stop request."); + } else if (!theKext) { + OSKextLog(/* kext */ NULL, + kOSKextLogErrorLevel | + kOSKextLogIPCFlag, + "Kext %s not found for stop request.", + kextIdentifier->getCStringNoCopy()); + result = kOSKextReturnNotFound; + } else { + result = theKext->stop(); + } + + } else if (predicate->isEqualTo(kKextRequestPredicateUnload)) { + if (!kextIdentifier) { + OSKextLog(/* kext */ NULL, + kOSKextLogErrorLevel | + kOSKextLogIPCFlag, + "Invalid arguments to kext unload request."); + } else if (!theKext) { + OSKextLog(/* kext */ NULL, + kOSKextLogErrorLevel | + kOSKextLogIPCFlag, + "Kext %s not found for unload request.", + kextIdentifier->getCStringNoCopy()); + result = kOSKextReturnNotFound; + } else { + OSBoolean * terminateFlag = OSDynamicCast(OSBoolean, + _OSKextGetRequestArgument(requestDict, + kKextRequestArgumentTerminateIOServicesKey)); + result = OSKext::removeKext(theKext, terminateFlag == kOSBooleanTrue); + } + + } else if (predicate->isEqualTo(kKextRequestPredicateSendResource)) { + result = OSKext::dispatchResource(requestDict); + + } else if (predicate->isEqualTo(kKextRequestPredicateGetLoaded)) { + OSBoolean * delayAutounloadBool = NULL; + + delayAutounloadBool = OSDynamicCast(OSBoolean, + _OSKextGetRequestArgument(requestDict, + kKextRequestArgumentDelayAutounloadKey)); + + /* If asked to delay autounload, reset the timer if it's currently set. + * (That is, don't schedule an unload if one isn't already pending. + */ + if (delayAutounloadBool == kOSBooleanTrue) { + OSKext::considerUnloads(/* rescheduleOnly? */ true); + } + + responseObject = OSDynamicCast(OSObject, + OSKext::copyLoadedKextInfo(kextIdentifiers)); + if (!responseObject) { + result = kOSKextReturnInternalError; + } else { + OSKextLog(/* kext */ NULL, + kOSKextLogDebugLevel | + kOSKextLogIPCFlag, + "Returning loaded kext info."); + result = kOSReturnSuccess; + } + + } else if (predicate->isEqualTo(kKextRequestPredicateGetKernelLoadAddress)) { + OSNumber * addressNum = NULL; // released as responseObject + kernel_segment_command_t * textseg = getsegbyname("__TEXT"); + + if (!textseg) { + OSKextLog(/* kext */ NULL, + kOSKextLogErrorLevel | + kOSKextLogGeneralFlag | kOSKextLogIPCFlag, + "Can't find text segment for kernel load address."); + result = kOSReturnError; + goto finish; + } + + OSKextLog(/* kext */ NULL, + kOSKextLogDebugLevel | + kOSKextLogIPCFlag, + "Returning kernel load address 0x%llx.", + (unsigned long long)textseg->vmaddr); + addressNum = OSNumber::withNumber((long long unsigned int)textseg->vmaddr, + 8 * sizeof(long long unsigned int)); + responseObject = OSDynamicCast(OSObject, addressNum); + result = kOSReturnSuccess; + + } else if (predicate->isEqualTo(kKextRequestPredicateGetKernelLinkState)) { + OSKextLog(/* kext */ NULL, + kOSKextLogDebugLevel | + kOSKextLogIPCFlag, + "Returning kernel link state."); + responseData = sKernelKext->linkState; + responseData->retain(); + result = kOSReturnSuccess; + + } else if (predicate->isEqualTo(kKextRequestPredicateGetKernelRequests)) { + + /* Hand the current sKernelRequests array to the caller + * (who must release it), and make a new one. + */ + responseObject = OSDynamicCast(OSObject, sKernelRequests); + sKernelRequests = OSArray::withCapacity(0); + sPostedKextLoadIdentifiers->flushCollection(); + OSKextLog(/* kext */ NULL, + kOSKextLogDebugLevel | + kOSKextLogIPCFlag, + "Returning kernel requests."); + result = kOSReturnSuccess; + + } else if (predicate->isEqualTo(kKextRequestPredicateGetAllLoadRequests)) { + + /* Return the set of all requested bundle identifiers */ + responseObject = OSDynamicCast(OSObject, sAllKextLoadIdentifiers); + responseObject->retain(); + OSKextLog(/* kext */ NULL, + kOSKextLogDebugLevel | + kOSKextLogIPCFlag, + "Returning load requests."); + result = kOSReturnSuccess; + } + + /********** + * Now we have handle the request, or not. Gather up the response & logging + * info to ship to user space. + *********/ + + /* Note: Nothing in OSKext is supposed to retain requestDict, + * but you never know.... + */ + if (requestDict->getRetainCount() > 1) { + OSKextLog(/* kext */ NULL, + kOSKextLogWarningLevel | + kOSKextLogIPCFlag, + "Request from user space still retained by a kext; " + "probable memory leak."); + } + + if (responseData && responseObject) { + OSKextLog(/* kext */ NULL, + kOSKextLogErrorLevel | + kOSKextLogIPCFlag, + "Mistakenly generated both data & plist responses to user request " + "(returning only data)."); + } + + if (responseData && responseData->getLength() && responseOut) { + + response = (char *)responseData->getBytesNoCopy(); + responseLength = responseData->getLength(); + } else if (responseOut && responseObject) { + serializer = OSSerialize::withCapacity(0); + if (!serializer) { + result = kOSKextReturnNoMemory; + goto finish; + } + + if (!responseObject->serialize(serializer)) { + OSKextLog(/* kext */ NULL, + kOSKextLogErrorLevel | + kOSKextLogIPCFlag, + "Failed to serialize response to request from user space."); + result = kOSKextReturnSerialization; + goto finish; + } + + response = (char *)serializer->text(); + responseLength = serializer->getLength(); + } + + if (responseOut && response) { + char * buffer; + + /* This kmem_alloc sets the return value of the function. + */ + kmem_result = kmem_alloc(kernel_map, (vm_offset_t *)&buffer, + responseLength); + if (kmem_result != KERN_SUCCESS) { + OSKextLog(/* kext */ NULL, + kOSKextLogErrorLevel | + kOSKextLogIPCFlag, + "Failed to copy response to request from user space."); + result = kmem_result; + goto finish; + } else { + memcpy(buffer, response, responseLength); + *responseOut = buffer; + *responseLengthOut = responseLength; + } + } + +finish: + + /* Gather up the collected log messages for user space. Any messages + * messages past this call will not make it up as log messages but + * will be in the system log. Note that we ignore the return of the + * serialize; it has no bearing on the operation at hand even if we + * fail to get the log messages. + */ + logInfoArray = OSKext::clearUserSpaceLogFilter(); + + if (logInfoArray && logInfoOut && logInfoLengthOut) { + (void)OSKext::serializeLogInfo(logInfoArray, + logInfoOut, logInfoLengthOut); + } + + IORecursiveLockUnlock(sKextLock); + + OSSafeRelease(requestDict); + OSSafeRelease(errorString); + OSSafeRelease(responseData); + OSSafeRelease(responseObject); + OSSafeRelease(serializer); + OSSafeRelease(logInfoArray); + + return result; +} + +/********************************************************************* +*********************************************************************/ +/* static */ +OSArray * +OSKext::copyLoadedKextInfo(OSArray * kextIdentifiers) +{ + OSArray * result = NULL; + OSDictionary * kextInfo = NULL; // must release + uint32_t count, i; + uint32_t idCount = 0; + uint32_t idIndex = 0; + + IORecursiveLockLock(sKextLock); + + /* Empty list of bundle ids is equivalent to no list (get all). + */ + if (kextIdentifiers && !kextIdentifiers->getCount()) { + kextIdentifiers = NULL; + } else if (kextIdentifiers) { + idCount = kextIdentifiers->getCount(); + } + + count = sLoadedKexts->getCount(); + result = OSArray::withCapacity(count); + if (!result) { + goto finish; + } + for (i = 0; i < count; i++) { + OSKext * thisKext = NULL; // do not release + Boolean includeThis = true; + + if (kextInfo) { + kextInfo->release(); + kextInfo = NULL; + } + thisKext = OSDynamicCast(OSKext, sLoadedKexts->getObject(i)); + if (!thisKext) { + continue; + } + + /* Skip current kext if we have a list of bundle IDs and + * it isn't in the list. + */ + if (kextIdentifiers) { + const OSString * thisKextID = thisKext->getIdentifier(); + + includeThis = false; + + for (idIndex = 0; idIndex < idCount; idIndex++) { + const OSString * thisRequestID = OSDynamicCast(OSString, + kextIdentifiers->getObject(idIndex)); + if (thisKextID->isEqualTo(thisRequestID)) { + includeThis = true; + break; + } + } + } + + if (!includeThis) { + continue; + } + + kextInfo = thisKext->copyInfo(); + result->setObject(kextInfo); + } + +finish: + IORecursiveLockUnlock(sKextLock); + + if (kextInfo) kextInfo->release(); + + return result; +} + +/********************************************************************* +Load Tag +Bundle ID +Bundle Version +Path +Load Address +Load Size +Wired Size +Version +Dependency Load Tags +# Dependent References +UUID +RetainCount +*********************************************************************/ +#define _OSKextLoadInfoDictCapacity (12) + +OSDictionary * +OSKext::copyInfo(void) +{ + OSDictionary * result = NULL; + bool success = false; + OSNumber * cpuTypeNumber = NULL; // must release + OSNumber * cpuSubtypeNumber = NULL; // must release + OSString * versionString = NULL; // do not release + OSData * uuid = NULL; // must release + OSNumber * scratchNumber = NULL; // must release + OSArray * dependencyLoadTags = NULL; // must release + OSCollectionIterator * metaClassIterator = NULL; // must release + OSArray * metaClassInfo = NULL; // must release + OSDictionary * metaClassDict = NULL; // must release + OSMetaClass * thisMetaClass = NULL; // do not release + OSString * metaClassName = NULL; // must release + OSString * superclassName = NULL; // must release + uint32_t count, i; + + result = OSDictionary::withCapacity(_OSKextLoadInfoDictCapacity); + if (!result) { + goto finish; + } + + /* CPU Type & Subtype. + * Use the CPU type of the kernel for all (loaded) kexts. + * xxx - should we not include this for the kernel components, + * xxx - or for any interface? they have mach-o files, they're just weird. + */ + if (linkedExecutable || (this == sKernelKext)) { + + cpuTypeNumber = OSNumber::withNumber( + (long long unsigned int)_mh_execute_header.cputype, + 8 * sizeof(_mh_execute_header.cputype)); + if (cpuTypeNumber) { + result->setObject(kOSBundleCPUTypeKey, cpuTypeNumber); + } + } + + // I don't want to rely on a mach header for nonkernel kexts, yet + if (this == sKernelKext) { + cpuSubtypeNumber = OSNumber::withNumber( + (long long unsigned int)_mh_execute_header.cputype, + 8 * sizeof(_mh_execute_header.cputype)); + if (cpuSubtypeNumber) { + result->setObject(kOSBundleCPUSubtypeKey, cpuSubtypeNumber); + } + } + + /* CFBundleIdentifier. + */ + result->setObject(kCFBundleIdentifierKey, bundleID); + + /* CFBundleVersion. + */ + versionString = OSDynamicCast(OSString, + getPropertyForHostArch(kCFBundleVersionKey)); + if (versionString) { + result->setObject(kCFBundleVersionKey, versionString); + } + + /* OSBundleCompatibleVersion. + */ + versionString = OSDynamicCast(OSString, + getPropertyForHostArch(kOSBundleCompatibleVersionKey)); + if (versionString) { + result->setObject(kOSBundleCompatibleVersionKey, versionString); + } + + /* Path. + */ + if (path) { + result->setObject(kOSBundlePathKey, path); + } + + /* UUID. + */ + uuid = copyUUID(); + if (uuid) { + result->setObject(kOSBundleUUIDKey, uuid); + } + + /***** + * OSKernelResource, OSBundleIsInterface, OSBundlePrelinked, OSBundleStarted. + */ + result->setObject(kOSKernelResourceKey, + isKernelComponent() ? kOSBooleanTrue : kOSBooleanFalse); + + result->setObject(kOSBundleIsInterfaceKey, + isInterface() ? kOSBooleanTrue : kOSBooleanFalse); + + result->setObject(kOSBundlePrelinkedKey, + isPrelinked() ? kOSBooleanTrue : kOSBooleanFalse); + + result->setObject(kOSBundleStartedKey, + isStarted() ? kOSBooleanTrue : kOSBooleanFalse); + + /* LoadTag (Index). + */ + scratchNumber = OSNumber::withNumber((unsigned long long)loadTag, + /* numBits */ 8 * sizeof(loadTag)); + if (scratchNumber) { + result->setObject(kOSBundleLoadTagKey, scratchNumber); + OSSafeReleaseNULL(scratchNumber); + } + + /* LoadAddress, LoadSize. + */ + if (isInterface() || linkedExecutable) { + /* These go to userspace via serialization, so we don't want any doubts + * about their size. + */ + uint64_t loadAddress = 0; + uint32_t loadSize = 0; + uint32_t wiredSize = 0; + + /* Interfaces always report 0 load address & size. + * Just the way they roll. + * + * xxx - leaving in # when we have a linkedExecutable...a kernelcomp + * xxx - shouldn't have one! + */ + if (linkedExecutable /* && !isInterface() */) { + loadAddress = (uint64_t)linkedExecutable->getBytesNoCopy(); + loadSize = linkedExecutable->getLength(); + + /* If we have a kmod_info struct, calculated the wired size + * from that. Otherwise it's the full load size. + */ + if (kmod_info) { + wiredSize = loadSize - kmod_info->hdr_size; + } else { + wiredSize = loadSize; + } + } + + scratchNumber = OSNumber::withNumber( + (unsigned long long)(loadAddress), + /* numBits */ 8 * sizeof(loadAddress)); + if (scratchNumber) { + result->setObject(kOSBundleLoadAddressKey, scratchNumber); + OSSafeReleaseNULL(scratchNumber); + } + scratchNumber = OSNumber::withNumber( + (unsigned long long)(loadSize), + /* numBits */ 8 * sizeof(loadSize)); + if (scratchNumber) { + result->setObject(kOSBundleLoadSizeKey, scratchNumber); + OSSafeReleaseNULL(scratchNumber); + } + scratchNumber = OSNumber::withNumber( + (unsigned long long)(wiredSize), + /* numBits */ 8 * sizeof(wiredSize)); + if (scratchNumber) { + result->setObject(kOSBundleWiredSizeKey, scratchNumber); + OSSafeReleaseNULL(scratchNumber); + } + } + + /* OSBundleDependencies. In descending order for + * easy compatibility with kextstat(8). + */ + if ((count = getNumDependencies())) { + dependencyLoadTags = OSArray::withCapacity(count); + result->setObject(kOSBundleDependenciesKey, dependencyLoadTags); + + i = count - 1; + do { + OSKext * dependency = OSDynamicCast(OSKext, + dependencies->getObject(i)); + + OSSafeReleaseNULL(scratchNumber); + + if (!dependency) { + continue; + } + scratchNumber = OSNumber::withNumber( + (unsigned long long)dependency->getLoadTag(), + /* numBits*/ 8 * sizeof(loadTag)); + if (scratchNumber) { + dependencyLoadTags->setObject(scratchNumber); + } + } while (i--); + } + + OSSafeReleaseNULL(scratchNumber); + + /* OSBundleMetaClasses. + */ + if (metaClasses && metaClasses->getCount()) { + metaClassIterator = OSCollectionIterator::withCollection(metaClasses); + metaClassInfo = OSArray::withCapacity(metaClasses->getCount()); + if (!metaClassIterator || !metaClassInfo) { + goto finish; + } + result->setObject(kOSBundleClassesKey, metaClassInfo); + + while ( (thisMetaClass = OSDynamicCast(OSMetaClass, + metaClassIterator->getNextObject())) ) { + + OSSafeReleaseNULL(metaClassDict); + OSSafeReleaseNULL(metaClassName); + OSSafeReleaseNULL(superclassName); + OSSafeReleaseNULL(scratchNumber); + + metaClassDict = OSDictionary::withCapacity(3); + if (!metaClassDict) { + goto finish; + } + + metaClassName = OSString::withCString(thisMetaClass->getClassName()); + if (thisMetaClass->getSuperClass()) { + superclassName = OSString::withCString( + thisMetaClass->getSuperClass()->getClassName()); + } + scratchNumber = OSNumber::withNumber(thisMetaClass->getInstanceCount(), + 8 * sizeof(unsigned int)); + if (!metaClassDict || !metaClassName || !superclassName || + !scratchNumber) { + + goto finish; + } + + metaClassInfo->setObject(metaClassDict); + metaClassDict->setObject(kOSMetaClassNameKey, metaClassName); + metaClassDict->setObject(kOSMetaClassSuperclassNameKey, superclassName); + metaClassDict->setObject(kOSMetaClassTrackingCountKey, scratchNumber); + } + } + + /* OSBundleRetainCount. + */ + OSSafeReleaseNULL(scratchNumber); + { + int extRetainCount = getRetainCount() - 1; + if (isLoaded()) { + extRetainCount--; + } + scratchNumber = OSNumber::withNumber( + (int)extRetainCount, + /* numBits*/ 8 * sizeof(int)); + if (scratchNumber) { + result->setObject(kOSBundleRetainCountKey, scratchNumber); + } + } + + success = true; +finish: + OSSafeRelease(cpuTypeNumber); + OSSafeRelease(cpuSubtypeNumber); + OSSafeRelease(uuid); + OSSafeRelease(scratchNumber); + OSSafeRelease(dependencyLoadTags); + OSSafeRelease(metaClassIterator); + OSSafeRelease(metaClassInfo); + OSSafeRelease(metaClassDict); + OSSafeRelease(metaClassName); + OSSafeRelease(superclassName); + if (!success) { + OSSafeReleaseNULL(result); + } + return result; +} + +/********************************************************************* +*********************************************************************/ +/* static */ +OSReturn +OSKext::requestResource( + const char * kextIdentifierCString, + const char * resourceNameCString, + OSKextRequestResourceCallback callback, + void * context, + OSKextRequestTag * requestTagOut) +{ + OSReturn result = kOSReturnError; + OSKext * callbackKext = NULL; // must release (looked up) + + OSKextRequestTag requestTag = -1; + OSNumber * requestTagNum = NULL; // must release + + OSDictionary * requestDict = NULL; // must release + OSString * kextIdentifier = NULL; // must release + OSString * resourceName = NULL; // must release + + OSDictionary * callbackRecord = NULL; // must release + OSData * callbackWrapper = NULL; // must release + + OSData * contextWrapper = NULL; // must release + + IORecursiveLockLock(sKextLock); + + if (requestTagOut) { + *requestTagOut = kOSKextRequestTagInvalid; + } + + if (!kextIdentifierCString || !resourceNameCString || !callback) { + result = kOSKextReturnInvalidArgument; + goto finish; + } + + callbackKext = OSKext::lookupKextWithAddress((vm_address_t)callback); + if (!callbackKext) { + OSKextLog(/* kext */ NULL, + kOSKextLogErrorLevel | kOSKextLogIPCFlag, + "Resource request has bad callback address."); + result = kOSKextReturnInvalidArgument; + goto finish; + } + if (!callbackKext->flags.starting && !callbackKext->flags.started) { + OSKextLog(/* kext */ NULL, + kOSKextLogErrorLevel | kOSKextLogIPCFlag, + "Resource request callback is in a kext that is not started."); + result = kOSKextReturnInvalidArgument; + goto finish; + } + + /* Do not allow any new requests to be made on a kext that is unloading. + */ + if (callbackKext->flags.stopping) { + result = kOSKextReturnStopping; + goto finish; + } + + /* If we're wrapped the next available request tag around to the negative + * numbers, we can't service any more requests. + */ + if (sNextRequestTag == kOSKextRequestTagInvalid) { + OSKextLog(/* kext */ NULL, + kOSKextLogErrorLevel | kOSKextLogIPCFlag, + "No more request tags available; restart required."); + result = kOSKextReturnNoResources; + goto finish; + } + requestTag = sNextRequestTag++; + + result = _OSKextCreateRequest(kKextRequestPredicateRequestResource, + &requestDict); + if (result != kOSReturnSuccess) { + goto finish; + } + + kextIdentifier = OSString::withCString(kextIdentifierCString); + resourceName = OSString::withCString(resourceNameCString); + requestTagNum = OSNumber::withNumber((long long unsigned int)requestTag, + 8 * sizeof(requestTag)); + if (!kextIdentifier || + !resourceName || + !requestTagNum || + !_OSKextSetRequestArgument(requestDict, + kKextRequestArgumentBundleIdentifierKey, kextIdentifier) || + !_OSKextSetRequestArgument(requestDict, + kKextRequestArgumentNameKey, resourceName) || + !_OSKextSetRequestArgument(requestDict, + kKextRequestArgumentRequestTagKey, requestTagNum)) { + + result = kOSKextReturnNoMemory; + goto finish; + } + + callbackRecord = OSDynamicCast(OSDictionary, requestDict->copyCollection()); + if (!callbackRecord) { + result = kOSKextReturnNoMemory; + goto finish; + } + // we validate callback address at call time + callbackWrapper = OSData::withBytes((void *)&callback, sizeof(void *)); + if (context) { + contextWrapper = OSData::withBytes((void *)&context, sizeof(void *)); + } + if (!callbackWrapper || !_OSKextSetRequestArgument(callbackRecord, + kKextRequestArgumentCallbackKey, callbackWrapper)) { + + result = kOSKextReturnNoMemory; + goto finish; + } + + if (context) { + if (!contextWrapper || !_OSKextSetRequestArgument(callbackRecord, + kKextRequestArgumentContextKey, contextWrapper)) { + + result = kOSKextReturnNoMemory; + goto finish; + } + } + + /* Only post the requests after all the other potential failure points + * have been passed. + */ + if (!sKernelRequests->setObject(requestDict) || + !sRequestCallbackRecords->setObject(callbackRecord)) { + + result = kOSKextReturnNoMemory; + goto finish; + } + + OSKextPingKextd(); + + result = kOSReturnSuccess; + if (requestTagOut) { + *requestTagOut = requestTag; + } + +finish: + + /* If we didn't succeed, yank the request & callback + * from their holding arrays. + */ + if (result != kOSReturnSuccess) { + unsigned int index; + + index = sKernelRequests->getNextIndexOfObject(requestDict, 0); + if (index != (unsigned int)-1) { + sKernelRequests->removeObject(index); + } + index = sRequestCallbackRecords->getNextIndexOfObject(callbackRecord, 0); + if (index != (unsigned int)-1) { + sRequestCallbackRecords->removeObject(index); + } + } + + OSKext::considerUnloads(/* rescheduleOnly? */ true); + + IORecursiveLockUnlock(sKextLock); + + if (callbackKext) callbackKext->release(); + if (requestTagNum) requestTagNum->release(); + + if (requestDict) requestDict->release(); + if (kextIdentifier) kextIdentifier->release(); + if (resourceName) resourceName->release(); + + if (callbackRecord) callbackRecord->release(); + if (callbackWrapper) callbackWrapper->release(); + if (contextWrapper) contextWrapper->release(); + + return result; +} + +/********************************************************************* +*********************************************************************/ +/* static */ +OSReturn +OSKext::dequeueCallbackForRequestTag( + OSKextRequestTag requestTag, + OSDictionary ** callbackRecordOut) +{ + OSReturn result = kOSReturnError; + OSNumber * requestTagNum = NULL; // must release + + requestTagNum = OSNumber::withNumber((long long unsigned int)requestTag, + 8 * sizeof(requestTag)); + if (!requestTagNum) { + goto finish; + } + + result = OSKext::dequeueCallbackForRequestTag(requestTagNum, + callbackRecordOut); + +finish: + OSSafeRelease(requestTagNum); + + return result; +} + +/********************************************************************* +*********************************************************************/ +/* static */ +OSReturn +OSKext::dequeueCallbackForRequestTag( + OSNumber * requestTagNum, + OSDictionary ** callbackRecordOut) +{ + OSReturn result = kOSKextReturnInvalidArgument; + OSDictionary * callbackRecord = NULL; // retain if matched! + OSNumber * callbackTagNum = NULL; // do not release + unsigned int count, i; + + IORecursiveLockLock(sKextLock); + + result = kOSReturnError; + count = sRequestCallbackRecords->getCount(); + for (i = 0; i < count; i++) { + callbackRecord = OSDynamicCast(OSDictionary, + sRequestCallbackRecords->getObject(i)); + if (!callbackRecord) { + goto finish; + } + + /* If we don't find a tag, we basically have a leak here. Maybe + * we should just remove it. + */ + callbackTagNum = OSDynamicCast(OSNumber, _OSKextGetRequestArgument( + callbackRecord, kKextRequestArgumentRequestTagKey)); + if (!callbackTagNum) { + goto finish; + } + + /* We could be even more paranoid and check that all the incoming + * args match what's in the callback record. + */ + if (callbackTagNum->isEqualTo(requestTagNum)) { + if (callbackRecordOut) { + *callbackRecordOut = callbackRecord; + callbackRecord->retain(); + } + sRequestCallbackRecords->removeObject(i); + result = kOSReturnSuccess; + goto finish; + } + } + result = kOSKextReturnNotFound; + +finish: + IORecursiveLockUnlock(sKextLock); + return result; +} + +/********************************************************************* +*********************************************************************/ +/* static */ +OSReturn +OSKext::dispatchResource(OSDictionary * requestDict) +{ + OSReturn result = kOSReturnError; + OSDictionary * callbackRecord = NULL; // must release + OSNumber * requestTag = NULL; // do not release + OSNumber * requestResult = NULL; // do not release + OSData * dataObj = NULL; // do not release + uint32_t dataLength = 0; + const void * dataPtr = NULL; // do not free + OSData * callbackWrapper = NULL; // do not release + OSKextRequestResourceCallback callback = NULL; + OSData * contextWrapper = NULL; // do not release + void * context = NULL; // do not free + OSKext * callbackKext = NULL; // must release (looked up) + + IORecursiveLockLock(sKextLock); + + /* Get the args from the request. Right now we need the tag + * to look up the callback record, and the result for invoking the callback. + */ + requestTag = OSDynamicCast(OSNumber, _OSKextGetRequestArgument(requestDict, + kKextRequestArgumentRequestTagKey)); + requestResult = OSDynamicCast(OSNumber, _OSKextGetRequestArgument(requestDict, + kKextRequestArgumentResultKey)); + if (!requestTag || !requestResult) { + result = kOSKextReturnInvalidArgument; + goto finish; + } + + /* Look for a callback record matching this request's tag. + */ + result = dequeueCallbackForRequestTag(requestTag, &callbackRecord); + if (result != kOSReturnSuccess) { + goto finish; + } + + /***** + * Get the context pointer of the callback record (if there is one). + */ + contextWrapper = OSDynamicCast(OSData, _OSKextGetRequestArgument(callbackRecord, + kKextRequestArgumentContextKey)); + context = _OSKextExtractPointer(contextWrapper); + if (contextWrapper && !context) { + goto finish; + } + + callbackWrapper = OSDynamicCast(OSData, + _OSKextGetRequestArgument(callbackRecord, + kKextRequestArgumentCallbackKey)); + callback = (OSKextRequestResourceCallback) + _OSKextExtractPointer(callbackWrapper); + if (!callback) { + goto finish; + } + + /* Check for a data obj. We might not have one and that's ok, that means + * we didn't find the requested resource, and we still have to tell the + * caller that via the callback. + */ + dataObj = OSDynamicCast(OSData, _OSKextGetRequestArgument(requestDict, + kKextRequestArgumentValueKey)); + if (dataObj) { + dataPtr = dataObj->getBytesNoCopy(); + dataLength = dataObj->getLength(); + } + + callbackKext = OSKext::lookupKextWithAddress((vm_address_t)callback); + if (!callbackKext) { + OSKextLog(/* kext */ NULL, + kOSKextLogErrorLevel | kOSKextLogIPCFlag, + "Can't invoke callback for resource request; " + "no kext loaded at callback address %p.", + callback); + goto finish; + } + if (!callbackKext->flags.starting && !callbackKext->flags.started) { + OSKextLog(/* kext */ NULL, + kOSKextLogErrorLevel | kOSKextLogIPCFlag, + "Can't invoke kext resource callback; " + "kext at callback address %p is not running.", + callback); + goto finish; + } + + (void)callback(requestTag->unsigned32BitValue(), + (OSReturn)requestResult->unsigned32BitValue(), + dataPtr, dataLength, context); + + result = kOSReturnSuccess; + +finish: + if (callbackKext) callbackKext->release(); + if (callbackRecord) callbackRecord->release(); + + IORecursiveLockUnlock(sKextLock); + return result; +} + +/********************************************************************* +*********************************************************************/ +/* static */ +void +OSKext::invokeRequestCallback( + OSDictionary * callbackRecord, + OSReturn callbackResult) +{ + OSString * predicate = _OSKextGetRequestPredicate(callbackRecord); + OSNumber * resultNum = NULL; // must release + + if (!predicate) { + goto finish; + } + + resultNum = OSNumber::withNumber((long long unsigned int)callbackResult, + 8 * sizeof(callbackResult)); + if (!resultNum) { + goto finish; + } + + /* Insert the result into the callback record and dispatch it as if it + * were the reply coming down from user space. + */ + _OSKextSetRequestArgument(callbackRecord, kKextRequestArgumentResultKey, + resultNum); + + if (predicate->isEqualTo(kKextRequestPredicateRequestResource)) { + /* This removes the pending callback record. + */ + OSKext::dispatchResource(callbackRecord); + } + +finish: + if (resultNum) resultNum->release(); + return; +} + +/********************************************************************* +*********************************************************************/ +/* static */ +OSReturn +OSKext::cancelRequest( + OSKextRequestTag requestTag, + void ** contextOut) +{ + OSReturn result = kOSKextReturnNoMemory; + OSDictionary * callbackRecord = NULL; // must release + OSData * contextWrapper = NULL; // do not release + + result = OSKext::dequeueCallbackForRequestTag(requestTag, + &callbackRecord); + + if (result == kOSReturnSuccess && contextOut) { + contextWrapper = OSDynamicCast(OSData, + _OSKextGetRequestArgument(callbackRecord, + kKextRequestArgumentContextKey)); + *contextOut = _OSKextExtractPointer(contextWrapper); + } + + if (callbackRecord) callbackRecord->release(); + + return result; +} + +/********************************************************************* +*********************************************************************/ +void +OSKext::invokeOrCancelRequestCallbacks( + OSReturn callbackResult, + bool invokeFlag) +{ + unsigned int count, i; + + IORecursiveLockLock(sKextLock); + + count = sRequestCallbackRecords->getCount(); + if (!count) { + goto finish; + } + + i = count - 1; + do { + OSDictionary * request = OSDynamicCast(OSDictionary, + sRequestCallbackRecords->getObject(i)); + + if (!request) { + continue; + } + OSData * callbackWrapper = OSDynamicCast(OSData, + _OSKextGetRequestArgument(request, + kKextRequestArgumentCallbackKey)); + + if (!callbackWrapper) { + sRequestCallbackRecords->removeObject(i); + continue; + } + + vm_address_t callbackAddress = (vm_address_t) + _OSKextExtractPointer(callbackWrapper); + + if ((kmod_info->address <= callbackAddress) && + (callbackAddress < (kmod_info->address + kmod_info->size))) { + + if (invokeFlag) { + /* This removes the callback record. + */ + invokeRequestCallback(request, callbackResult); + } else { + sRequestCallbackRecords->removeObject(i); + } + } + } while (i--); + +finish: + IORecursiveLockUnlock(sKextLock); + return; +} + +/********************************************************************* +*********************************************************************/ +uint32_t +OSKext::countRequestCallbacks(void) +{ + uint32_t result = 0; + unsigned int count, i; + + IORecursiveLockLock(sKextLock); + + count = sRequestCallbackRecords->getCount(); + if (!count) { + goto finish; + } + + i = count - 1; + do { + OSDictionary * request = OSDynamicCast(OSDictionary, + sRequestCallbackRecords->getObject(i)); + + if (!request) { + continue; + } + OSData * callbackWrapper = OSDynamicCast(OSData, + _OSKextGetRequestArgument(request, + kKextRequestArgumentCallbackKey)); + + if (!callbackWrapper) { + continue; + } + + vm_address_t callbackAddress = (vm_address_t) + _OSKextExtractPointer(callbackWrapper); + + if ((kmod_info->address <= callbackAddress) && + (callbackAddress < (kmod_info->address + kmod_info->size))) { + + result++; + } + } while (i--); + +finish: + IORecursiveLockUnlock(sKextLock); + return result; +} + +/********************************************************************* +*********************************************************************/ +static OSReturn _OSKextCreateRequest( + const char * predicate, + OSDictionary ** requestP) +{ + OSReturn result = kOSKextReturnNoMemory; + OSDictionary * request = NULL; // must release on error + OSDictionary * args = NULL; // must release + + request = OSDictionary::withCapacity(2); + if (!request) { + goto finish; + } + result = _OSDictionarySetCStringValue(request, + kKextRequestPredicateKey, predicate); + if (result != kOSReturnSuccess) { + goto finish; + } + result = kOSReturnSuccess; + +finish: + if (result != kOSReturnSuccess) { + if (request) request->release(); + } else { + *requestP = request; + } + if (args) args->release(); + + return result; +} + +/********************************************************************* +*********************************************************************/ +static OSString * _OSKextGetRequestPredicate(OSDictionary * requestDict) +{ + return OSDynamicCast(OSString, + requestDict->getObject(kKextRequestPredicateKey)); +} + +/********************************************************************* +*********************************************************************/ +static OSObject * _OSKextGetRequestArgument( + OSDictionary * requestDict, + const char * argName) +{ + OSDictionary * args = OSDynamicCast(OSDictionary, + requestDict->getObject(kKextRequestArgumentsKey)); + if (args) { + return args->getObject(argName); + } + return NULL; +} + +/********************************************************************* +*********************************************************************/ +static bool _OSKextSetRequestArgument( + OSDictionary * requestDict, + const char * argName, + OSObject * value) +{ + OSDictionary * args = OSDynamicCast(OSDictionary, + requestDict->getObject(kKextRequestArgumentsKey)); + if (!args) { + args = OSDictionary::withCapacity(2); + if (!args) { + goto finish; + } + requestDict->setObject(kKextRequestArgumentsKey, args); + args->release(); + } + if (args) { + return args->setObject(argName, value); + } +finish: + return false; +} + +/********************************************************************* +*********************************************************************/ +static void * _OSKextExtractPointer(OSData * wrapper) +{ + void * result = NULL; + const void * resultPtr = NULL; + + if (!wrapper) { + goto finish; + } + resultPtr = wrapper->getBytesNoCopy(); + result = *(void **)resultPtr; +finish: + return result; +} + +/********************************************************************* +*********************************************************************/ +static OSReturn _OSDictionarySetCStringValue( + OSDictionary * dict, + const char * cKey, + const char * cValue) +{ + OSReturn result = kOSKextReturnNoMemory; + const OSSymbol * key = NULL; // must release + OSString * value = NULL; // must release + + key = OSSymbol::withCString(cKey); + value = OSString::withCString(cValue); + if (!key || !value) { + goto finish; + } + if (dict->setObject(key, value)) { + result = kOSReturnSuccess; + } + +finish: + if (key) key->release(); + if (value) value->release(); + + return result; +} + +#if PRAGMA_MARK +#pragma mark Personalities (IOKit Drivers) +#endif +/********************************************************************* +*********************************************************************/ +/* static */ +OSArray * +OSKext::copyAllKextPersonalities(bool filterSafeBootFlag) +{ + OSArray * result = NULL; // returned + OSCollectionIterator * kextIterator = NULL; // must release + OSArray * personalities = NULL; // must release + OSCollectionIterator * personalitiesIterator = NULL; // must release + + OSString * kextID = NULL; // do not release + OSKext * theKext = NULL; // do not release + + IORecursiveLockLock(sKextLock); + + /* Let's conservatively guess that any given kext has around 3 + * personalities for now. + */ + result = OSArray::withCapacity(sKextsByID->getCount() * 3); + if (!result) { + goto finish; + } + + kextIterator = OSCollectionIterator::withCollection(sKextsByID); + if (!kextIterator) { + goto finish; + } + + while ((kextID = OSDynamicCast(OSString, kextIterator->getNextObject()))) { + if (personalitiesIterator) { + personalitiesIterator->release(); + personalitiesIterator = NULL; + } + if (personalities) { + personalities->release(); + personalities = NULL; + } + + theKext = OSDynamicCast(OSKext, sKextsByID->getObject(kextID)); + if (!sSafeBoot || !filterSafeBootFlag || theKext->isLoadableInSafeBoot()) { + personalities = theKext->copyPersonalitiesArray(); + if (!personalities) { + continue; + } + result->merge(personalities); + } else { + // xxx - check for better place to put this log msg + OSKextLog(theKext, + kOSKextLogWarningLevel | + kOSKextLogLoadFlag, + "Kext %s is not loadable during safe boot; " + "omitting its personalities.", + theKext->getIdentifierCString()); + } + + } + +finish: + IORecursiveLockUnlock(sKextLock); + + if (kextIterator) kextIterator->release(); + if (personalitiesIterator) personalitiesIterator->release(); + if (personalities) personalities->release(); + + return result; +} + +/********************************************************************* +*********************************************************************/ +/* static */ +void +OSKext::setPrelinkedPersonalities(OSArray * personalitiesArray) +{ + sPrelinkedPersonalities = personalitiesArray; + if (sPrelinkedPersonalities) { + sPrelinkedPersonalities->retain(); + gIOCatalogue->addDrivers(sPrelinkedPersonalities); + } + return; +} + +/********************************************************************* +*********************************************************************/ +/* static */ +void +OSKext::sendAllKextPersonalitiesToCatalog(bool startMatching) +{ + int numPersonalities = 0; + + OSKextLog(/* kext */ NULL, + kOSKextLogStepLevel | + kOSKextLogLoadFlag, + "Sending all eligible registered kexts' personalities " + "to the IOCatalogue %s.", + startMatching ? "and starting matching" : "but not starting matching"); + + OSArray * personalities = OSKext::copyAllKextPersonalities( + /* filterSafeBootFlag */ true); + + if (personalities) { + gIOCatalogue->addDrivers(personalities, startMatching); + numPersonalities = personalities->getCount(); + personalities->release(); + } + + OSKextLog(/* kext */ NULL, + kOSKextLogStepLevel | + kOSKextLogLoadFlag, + "%d kext personalit%s sent to the IOCatalogue; %s.", + numPersonalities, numPersonalities > 0 ? "ies" : "y", + startMatching ? "matching started" : "matching not started"); + return; +} + +/********************************************************************* +* Do not make a deep copy, just convert the IOKitPersonalities dict +* to an array for sending to the IOCatalogue. +*********************************************************************/ +OSArray * +OSKext::copyPersonalitiesArray(void) +{ + OSArray * result = NULL; + OSDictionary * personalities = NULL; // do not release + OSCollectionIterator * personalitiesIterator = NULL; // must release + + OSString * personalityName = NULL; // do not release + OSString * personalityBundleIdentifier = NULL; // do not release + + personalities = OSDynamicCast(OSDictionary, + getPropertyForHostArch(kIOKitPersonalitiesKey)); + if (!personalities) { + goto finish; + } + + result = OSArray::withCapacity(personalities->getCount()); + if (!result) { + goto finish; + } + + personalitiesIterator = + OSCollectionIterator::withCollection(personalities); + if (!personalitiesIterator) { + goto finish; + } + while ((personalityName = OSDynamicCast(OSString, + personalitiesIterator->getNextObject()))) { + + OSDictionary * personality = OSDynamicCast(OSDictionary, + personalities->getObject(personalityName)); + + /****** + * If the personality doesn't have a CFBundleIdentifier, or if it + * differs from the kext's, insert the kext's ID so we can find it. + * The publisher ID is used to remove personalities from bundles + * correctly. + */ + personalityBundleIdentifier = OSDynamicCast(OSString, + personality->getObject(kCFBundleIdentifierKey)); + + if (!personalityBundleIdentifier) { + personality->setObject(kCFBundleIdentifierKey, bundleID); + } else if (!personalityBundleIdentifier->isEqualTo(bundleID)) { + personality->setObject(kIOPersonalityPublisherKey, bundleID); + } + + result->setObject(personality); + } + +finish: + if (personalitiesIterator) personalitiesIterator->release(); + + return result; +} + +/********************************************************************* +Might want to change this to a bool return? +*********************************************************************/ +void +OSKext::sendPersonalitiesToCatalog( + bool startMatching, + OSArray * personalityNames) +{ + OSArray * personalitiesToSend = NULL; // must release + OSDictionary * kextPersonalities = NULL; // do not release + int count, i; + + if (sSafeBoot && !isLoadableInSafeBoot()) { + OSKextLog(this, + kOSKextLogErrorLevel | + kOSKextLogLoadFlag, + "Kext %s is not loadable during safe boot; " + "not sending personalities to the IOCatalogue.", + getIdentifierCString()); + return; + } + + if (!personalityNames || !personalityNames->getCount()) { + personalitiesToSend = copyPersonalitiesArray(); + } else { + kextPersonalities = OSDynamicCast(OSDictionary, + getPropertyForHostArch(kIOKitPersonalitiesKey)); + if (!kextPersonalities || !kextPersonalities->getCount()) { + goto finish; + } + personalitiesToSend = OSArray::withCapacity(0); + if (!personalitiesToSend) { + goto finish; + } + count = personalityNames->getCount(); + for (i = 0; i < count; i++) { + OSString * name = OSDynamicCast(OSString, + personalityNames->getObject(i)); + if (!name) { + continue; + } + OSDictionary * personality = OSDynamicCast(OSDictionary, + kextPersonalities->getObject(name)); + if (personality) { + personalitiesToSend->setObject(personality); + } + } + } + if (personalitiesToSend) { + unsigned numPersonalities = personalitiesToSend->getCount(); + OSKextLog(this, + kOSKextLogStepLevel | + kOSKextLogLoadFlag, + "Kext %s sending %d personalit%s to the IOCatalogue%s.", + getIdentifierCString(), + numPersonalities, + numPersonalities > 1 ? "ies" : "y", + startMatching ? " and starting matching" : " but not starting matching"); + gIOCatalogue->addDrivers(personalitiesToSend, startMatching); + } +finish: + if (personalitiesToSend) { + personalitiesToSend->release(); + } + return; +} + +/********************************************************************* +*********************************************************************/ +void +OSKext::removePersonalitiesFromCatalog(void) +{ + OSDictionary * personality = NULL; // do not release + + personality = OSDictionary::withCapacity(1); + if (!personality) { + goto finish; + } + personality->setObject(kCFBundleIdentifierKey, getIdentifier()); + + OSKextLog(this, + kOSKextLogStepLevel | + kOSKextLogLoadFlag, + "Kext %s removing all personalities naming it from the IOCatalogue.", + getIdentifierCString()); + + /* Have the IOCatalog remove all personalities matching this kext's + * bundle ID and trigger matching anew. + */ + gIOCatalogue->removeDrivers(personality, /* startMatching */ true); + + finish: + if (personality) personality->release(); + + return; +} + + +#if PRAGMA_MARK +#pragma mark Logging +#endif +/********************************************************************* +* Do not call any function that takes sKextLock here! +*********************************************************************/ +/* static */ +OSKextLogSpec +OSKext::setUserSpaceLogFilter( + OSKextLogSpec userLogFilter, + bool captureFlag) +{ + OSKextLogSpec result; + + IORecursiveLockLock(sKextInnerLock); + + result = sUserSpaceKextLogFilter; + sUserSpaceKextLogFilter = userLogFilter; + + /* If the config flag itself is changing, log the state change + * going both ways, before setting up the user-space log arrays, + * so that this is only logged in the kernel. + */ + if (sUserSpaceKextLogFilter != result) { + OSKextLog(/* kext */ NULL, + kOSKextLogDebugLevel | + kOSKextLogGeneralFlag, + "User-space log flags changed from 0x%x to 0x%x.", + result, sUserSpaceKextLogFilter); + } + + if (userLogFilter && captureFlag && + !sUserSpaceLogSpecArray && !sUserSpaceLogMessageArray) { + + // xxx - do some measurements for a good initial capacity? + sUserSpaceLogSpecArray = OSArray::withCapacity(0); + sUserSpaceLogMessageArray = OSArray::withCapacity(0); + + if (!sUserSpaceLogSpecArray || !sUserSpaceLogMessageArray) { + OSKextLog(/* kext */ NULL, + kOSKextLogErrorLevel | + kOSKextLogGeneralFlag, + "Failed to allocate user-space log message arrays."); + OSSafeReleaseNULL(sUserSpaceLogSpecArray); + OSSafeReleaseNULL(sUserSpaceLogMessageArray); + } + } + + IORecursiveLockUnlock(sKextInnerLock); + + return result; +} + +/********************************************************************* +* Do not call any function that takes sKextLock here! +*********************************************************************/ +/* static */ +OSArray * +OSKext::clearUserSpaceLogFilter(void) +{ + OSArray * result = NULL; + OSKextLogSpec oldLogFilter; + + IORecursiveLockLock(sKextInnerLock); + + result = OSArray::withCapacity(2); + if (result) { + result->setObject(sUserSpaceLogSpecArray); + result->setObject(sUserSpaceLogMessageArray); + } + OSSafeReleaseNULL(sUserSpaceLogSpecArray); + OSSafeReleaseNULL(sUserSpaceLogMessageArray); + + oldLogFilter = sUserSpaceKextLogFilter; + sUserSpaceKextLogFilter = kOSKextLogSilentFilter; + + /* If the config flag itself is changing, log the state change + * going both ways, after tearing down the user-space log + * arrays, so this is only logged within the kernel. + */ + if (oldLogFilter != sUserSpaceKextLogFilter) { + OSKextLog(/* kext */ NULL, + kOSKextLogDebugLevel | + kOSKextLogGeneralFlag, + "User-space log flags changed from 0x%x to 0x%x.", + oldLogFilter, sUserSpaceKextLogFilter); + } + + IORecursiveLockUnlock(sKextInnerLock); + + return result; +} + +/********************************************************************* +* Do not call any function that takes sKextLock here! +*********************************************************************/ +/* static */ +OSKextLogSpec +OSKext::getUserSpaceLogFilter(void) +{ + OSKextLogSpec result; + + IORecursiveLockLock(sKextInnerLock); + result = sUserSpaceKextLogFilter; + IORecursiveLockUnlock(sKextInnerLock); + + return result; +} + +/********************************************************************* +* This function is called by OSMetaClass during kernel C++ setup. +* Be careful what you access here; assume only OSKext::initialize() +* has been called. +* +* Do not call any function that takes sKextLock here! +*********************************************************************/ +#define VTRESET "\033[0m" + +#define VTBOLD "\033[1m" +#define VTUNDER "\033[4m" + +#define VTRED "\033[31m" +#define VTGREEN "\033[32m" +#define VTYELLOW "\033[33m" +#define VTBLUE "\033[34m" +#define VTMAGENTA "\033[35m" +#define VTCYAN "\033[36m" + +inline const char * colorForFlags(OSKextLogSpec flags) +{ + OSKextLogSpec logLevel = flags & kOSKextLogLevelMask; + + switch (logLevel) { + case kOSKextLogErrorLevel: + return VTRED VTBOLD; + break; + case kOSKextLogWarningLevel: + return VTRED; + break; + case kOSKextLogBasicLevel: + return VTYELLOW VTUNDER; + break; + case kOSKextLogProgressLevel: + return VTYELLOW; + break; + case kOSKextLogStepLevel: + return VTGREEN; + break; + case kOSKextLogDetailLevel: + return VTCYAN; + break; + case kOSKextLogDebugLevel: + return VTMAGENTA; + break; + default: + return ""; // white + break; + } + return ""; +} + +inline bool logSpecMatch( + OSKextLogSpec msgLogSpec, + OSKextLogSpec logFilter) +{ + OSKextLogSpec filterKextGlobal = logFilter & kOSKextLogKextOrGlobalMask; + OSKextLogSpec filterLevel = logFilter & kOSKextLogLevelMask; + OSKextLogSpec filterFlags = logFilter & kOSKextLogFlagsMask; + + OSKextLogSpec msgKextGlobal = msgLogSpec & kOSKextLogKextOrGlobalMask; + OSKextLogSpec msgLevel = msgLogSpec & kOSKextLogLevelMask; + OSKextLogSpec msgFlags = msgLogSpec & kOSKextLogFlagsMask; + + /* Explicit messages always get logged. + */ + if (msgLevel == kOSKextLogExplicitLevel) { + return true; + } + + /* Warnings and errors are logged regardless of the flags. + */ + if (msgLevel <= kOSKextLogBasicLevel && (msgLevel <= filterLevel)) { + return true; + } + + /* A verbose message that isn't for a logging-enabled kext and isn't global + * does *not* get logged. + */ + if (!msgKextGlobal && !filterKextGlobal) { + return false; + } + + /* Warnings and errors are logged regardless of the flags. + * All other messages must fit the flags and + * have a level at or below the filter. + * + */ + if ((msgFlags & filterFlags) && (msgLevel <= filterLevel)) { + return true; + } + return false; +} + +extern "C" { + +void +OSKextLog( + OSKext * aKext, + OSKextLogSpec msgLogSpec, + const char * format, ...) +{ + va_list argList; + + va_start(argList, format); + OSKextVLog(aKext, msgLogSpec, format, argList); + va_end(argList); +} + +void +OSKextVLog( + OSKext * aKext, + OSKextLogSpec msgLogSpec, + const char * format, + va_list srcArgList) +{ + extern int disableConsoleOutput; + + bool logForKernel = false; + bool logForUser = false; + va_list argList; + char stackBuffer[120]; + uint32_t length = 0; + char * allocBuffer = NULL; // must kfree + OSNumber * logSpecNum = NULL; // must release + OSString * logString = NULL; // must release + char * buffer = stackBuffer; // do not free + + IORecursiveLockLock(sKextInnerLock); + + /* Set the kext/global bit in the message spec if we have no + * kext or if the kext requests logging. + */ + if (!aKext || aKext->flags.loggingEnabled) { + msgLogSpec = msgLogSpec | kOSKextLogKextOrGlobalMask; + } + + logForKernel = logSpecMatch(msgLogSpec, sKernelLogFilter); + if (sUserSpaceLogSpecArray && sUserSpaceLogMessageArray) { + logForUser = logSpecMatch(msgLogSpec, sUserSpaceKextLogFilter); + } + + if (! (logForKernel || logForUser) ) { + goto finish; + } + + /* No goto from here until past va_end()! + */ + va_copy(argList, srcArgList); + length = vsnprintf(stackBuffer, sizeof(stackBuffer), format, argList); + va_end(argList); + + if (length + 1 >= sizeof(stackBuffer)) { + allocBuffer = (char *)kalloc((length + 1) * sizeof(char)); + if (!allocBuffer) { + goto finish; + } + + /* No goto from here until past va_end()! + */ + va_copy(argList, srcArgList); + vsnprintf(allocBuffer, length + 1, format, argList); + va_end(argList); + + buffer = allocBuffer; + } + + /* If user space wants the log message, queue it up. + */ + if (logForUser && sUserSpaceLogSpecArray && sUserSpaceLogMessageArray) { + logSpecNum = OSNumber::withNumber(msgLogSpec, 8 * sizeof(msgLogSpec)); + logString = OSString::withCString(buffer); + if (logSpecNum && logString) { + sUserSpaceLogSpecArray->setObject(logSpecNum); + sUserSpaceLogMessageArray->setObject(logString); + } + } + + /* Always log messages from the kernel according to the kernel's + * log flags. + */ + if (logForKernel) { + + /* If we are in console mode and have a custom log filter, + * colorize the log message. + */ + if (!disableConsoleOutput && sBootArgLogFilterFound) { + const char * color = ""; // do not free + color = colorForFlags(msgLogSpec); + printf("%s%s%s\n", colorForFlags(msgLogSpec), + buffer, color[0] ? VTRESET : ""); + } else { + printf("%s\n", buffer); + } + } + +finish: + if (allocBuffer) { + kfree(allocBuffer, (length + 1) * sizeof(char)); + } + OSSafeRelease(logString); + OSSafeRelease(logSpecNum); + IORecursiveLockUnlock(sKextInnerLock); + return; +} + +}; /* extern "C" */ + +#if PRAGMA_MARK +#pragma mark Backtrace Dump & kmod_get_info() support +#endif +/********************************************************************* +*********************************************************************/ +/* static */ +void +OSKext::printKextsInBacktrace( + vm_offset_t * addr, + unsigned int cnt, + int (* printf_func)(const char *fmt, ...), + bool lockFlag) +{ + vm_offset_t * kscan_addr = NULL; + kmod_info_t * k = NULL; + kmod_reference_t * r = NULL; + unsigned int i; + int found_kmod = 0; + + if (lockFlag) { + IORecursiveLockLock(sKextLock); + } + + for (k = kmod; k; k = k->next) { + if (pmap_find_phys(kernel_pmap, (addr64_t)((uintptr_t)k)) == 0) { + (*printf_func)(" kmod scan stopped due to missing " + "kmod page: %p\n", k); + break; + } + if (!k->address) { + continue; // skip fake entries for built-in kernel components + } + for (i = 0, kscan_addr = addr; i < cnt; i++, kscan_addr++) { + if ((*kscan_addr >= k->address) && + (*kscan_addr < (k->address + k->size))) { + + if (!found_kmod) { + (*printf_func)(" Kernel Extensions in backtrace " + "(with dependencies):\n"); + } + found_kmod = 1; + (*printf_func)(" %s(%s)@%p->%p\n", + k->name, k->version, k->address, k->address + k->size - 1); + + for (r = k->reference_list; r; r = r->next) { + kmod_info_t * rinfo; + + if (pmap_find_phys(kernel_pmap, (addr64_t)((uintptr_t)r)) == 0) { + (*printf_func)(" kmod dependency scan stopped " + "due to missing dependency page: %p\n", r); + break; + } + + rinfo = r->info; + + if (pmap_find_phys(kernel_pmap, (addr64_t)((uintptr_t)rinfo)) == 0) { + (*printf_func)(" kmod dependency scan stopped " + "due to missing kmod page: %p\n", rinfo); + break; + } + + if (!rinfo->address) { + continue; // skip fake entries for built-ins + } + + (*printf_func)(" dependency: %s(%s)@%p\n", + rinfo->name, rinfo->version, rinfo->address); + } + + break; // only report this kmod for one backtrace address + } + } + } + + if (lockFlag) { + IORecursiveLockUnlock(sKextLock); + } + + return; +} + +/******************************************************************************* +* substitute() looks at an input string (a pointer within a larger buffer) +* for a match to a substring, and on match it writes the marker & substitution +* character to an output string, updating the scan (from) and +* output (to) indexes as appropriate. +*******************************************************************************/ +static int substitute( + const char * scan_string, + char * string_out, + uint32_t * to_index, + uint32_t * from_index, + const char * substring, + char marker, + char substitution); + +/* string_out must be at least KMOD_MAX_NAME bytes. + */ +static int +substitute( + const char * scan_string, + char * string_out, + uint32_t * to_index, + uint32_t * from_index, + const char * substring, + char marker, + char substitution) +{ + uint32_t substring_length = strnlen(substring, KMOD_MAX_NAME - 1); + + /* On a substring match, append the marker (if there is one) and then + * the substitution character, updating the output (to) index accordingly. + * Then update the input (from) length by the length of the substring + * that got replaced. + */ + if (!strncmp(scan_string, substring, substring_length)) { + if (marker) { + string_out[(*to_index)++] = marker; + } + string_out[(*to_index)++] = substitution; + (*from_index) += substring_length; + return 1; + } + return 0; +} + +/******************************************************************************* +* compactIdentifier() takes a CFBundleIdentifier in a buffer of at least +* KMOD_MAX_NAME characters and performs various substitutions of common +* prefixes & substrings as defined by tables in kext_panic_report.h. +*******************************************************************************/ +static void compactIdentifier( + const char * identifier, + char * identifier_out, + char ** identifier_out_end); + +static void +compactIdentifier( + const char * identifier, + char * identifier_out, + char ** identifier_out_end) +{ + uint32_t from_index, to_index; + uint32_t scan_from_index = 0; + uint32_t scan_to_index = 0; + subs_entry_t * subs_entry = NULL; + int did_sub = 0; + + from_index = to_index = 0; + identifier_out[0] = '\0'; + + /* Replace certain identifier prefixes with shorter @+character sequences. + * Check the return value of substitute() so we only replace the prefix. + */ + for (subs_entry = &kext_identifier_prefix_subs[0]; + subs_entry->substring && !did_sub; + subs_entry++) { + + did_sub = substitute(identifier, identifier_out, + &scan_to_index, &scan_from_index, + subs_entry->substring, /* marker */ '\0', subs_entry->substitute); + } + did_sub = 0; + + /* Now scan through the identifier looking for the common substrings + * and replacing them with shorter !+character sequences via substitute(). + */ + for (/* see above */; + scan_from_index < KMOD_MAX_NAME - 1 && identifier[scan_from_index]; + /* see loop */) { + + const char * scan_string = &identifier[scan_from_index]; + + did_sub = 0; + + if (scan_from_index) { + for (subs_entry = &kext_identifier_substring_subs[0]; + subs_entry->substring && !did_sub; + subs_entry++) { + + did_sub = substitute(scan_string, identifier_out, + &scan_to_index, &scan_from_index, + subs_entry->substring, '!', subs_entry->substitute); + } + } + + /* If we didn't substitute, copy the input character to the output. + */ + if (!did_sub) { + identifier_out[scan_to_index++] = identifier[scan_from_index++]; + } + } + + identifier_out[scan_to_index] = '\0'; + if (identifier_out_end) { + *identifier_out_end = &identifier_out[scan_to_index]; + } + + return; +} + +/******************************************************************************* +* assemble_identifier_and_version() adds to a string buffer a compacted +* bundle identifier followed by a version string. +*******************************************************************************/ + +/* identPlusVers must be at least 2*KMOD_MAX_NAME in length. + */ +static int assemble_identifier_and_version( + kmod_info_t * kmod_info, + char * identPlusVers); +static int +assemble_identifier_and_version( + kmod_info_t * kmod_info, + char * identPlusVers) +{ + int result = 0; + + compactIdentifier(kmod_info->name, identPlusVers, NULL); + result = strnlen(identPlusVers, KMOD_MAX_NAME - 1); + identPlusVers[result++] = '\t'; // increment for real char + identPlusVers[result] = '\0'; // don't increment for nul char + result = strlcat(identPlusVers, kmod_info->version, KMOD_MAX_NAME); + + return result; +} + +/******************************************************************************* +*******************************************************************************/ +#define LAST_LOADED " - last loaded " +#define LAST_LOADED_TS_WIDTH (16) + +/* static */ +uint32_t +OSKext::saveLoadedKextPanicListTyped( + const char * prefix, + int invertFlag, + int libsFlag, + char * paniclist, + uint32_t list_size, + uint32_t * list_length_ptr) +{ + uint32_t result = 0; + int error = 0; + unsigned int count, i; + + count = sLoadedKexts->getCount(); + if (!count) { + goto finish; + } + + i = count - 1; + do { + OSKext * theKext = OSDynamicCast(OSKext, sLoadedKexts->getObject(i)); + kmod_info_t * kmod_info = theKext->kmod_info; + int match; + char identPlusVers[2*KMOD_MAX_NAME]; + uint32_t identPlusVersLength; + char timestampBuffer[17]; // enough for a uint64_t + + /* Skip all built-in kexts. + */ + if (theKext->isKernelComponent()) { + continue; + } + + /* Filter for kmod name (bundle identifier). + */ + match = !strncmp(kmod_info->name, prefix, strnlen(prefix, KMOD_MAX_NAME)); + if ((match && invertFlag) || (!match && !invertFlag)) { + continue; + } + + /* Filter for libraries (kexts that have a compatible version). + */ + if ((libsFlag == 0 && theKext->getCompatibleVersion() > 1) || + (libsFlag == 1 && theKext->getCompatibleVersion() < 1)) { + + continue; + } + + if (!kmod_info || + !pmap_find_phys(kernel_pmap, (addr64_t)((uintptr_t)kmod_info))) { + + printf("kext scan stopped due to missing kmod_info page: %p\n", + kmod_info); + error = 1; + goto finish; + } + + identPlusVersLength = assemble_identifier_and_version(kmod_info, + identPlusVers); + if (!identPlusVersLength) { + printf("error saving loaded kext info\n"); + goto finish; + } + + /* We're going to note the last-loaded kext in the list. + */ + if (i + 1 == count) { + snprintf(timestampBuffer, sizeof(timestampBuffer), "%llu", + AbsoluteTime_to_scalar(&last_loaded_timestamp)); + identPlusVersLength += sizeof(LAST_LOADED) - 1 + + strnlen(timestampBuffer, sizeof(timestampBuffer)); + } + + /* Adding 1 for the newline. + */ + if (*list_length_ptr + identPlusVersLength + 1 >= list_size) { + goto finish; + } + + *list_length_ptr = strlcat(paniclist, identPlusVers, list_size); + if (i + 1 == count) { + *list_length_ptr = strlcat(paniclist, LAST_LOADED, list_size); + *list_length_ptr = strlcat(paniclist, timestampBuffer, list_size); + } + *list_length_ptr = strlcat(paniclist, "\n", list_size); + + } while (i--); + +finish: + if (!error) { + if (*list_length_ptr + 1 <= list_size) { + result = list_size - (*list_length_ptr + 1); + } + } + + return result; +} + +/********************************************************************* +*********************************************************************/ +/* static */ +void +OSKext::saveLoadedKextPanicList(void) +{ + char * newlist = NULL; + uint32_t newlist_size = 0; + uint32_t newlist_length = 0; + + IORecursiveLockLock(sKextLock); + + newlist_length = 0; + newlist_size = KEXT_PANICLIST_SIZE; + newlist = (char *)kalloc(newlist_size); + + if (!newlist) { + OSKextLog(/* kext */ NULL, + kOSKextLogErrorLevel | kOSKextLogGeneralFlag, + "Couldn't allocate kext panic log buffer."); + goto finish; + } + + newlist[0] = '\0'; + + // non-"com.apple." kexts + if (!OSKext::saveLoadedKextPanicListTyped("com.apple.", /* invert? */ 1, + /* libs? */ -1, newlist, newlist_size, &newlist_length)) { + + goto finish; + } + // "com.apple." nonlibrary kexts + if (!OSKext::saveLoadedKextPanicListTyped("com.apple.", /* invert? */ 0, + /* libs? */ 0, newlist, newlist_size, &newlist_length)) { + + goto finish; + } + // "com.apple." library kexts + if (!OSKext::saveLoadedKextPanicListTyped("com.apple.", /* invert? */ 0, + /* libs? */ 1, newlist, newlist_size, &newlist_length)) { + + goto finish; + } + + if (loaded_kext_paniclist) { + kfree(loaded_kext_paniclist, loaded_kext_paniclist_size); + } + loaded_kext_paniclist = newlist; + loaded_kext_paniclist_size = newlist_size; + loaded_kext_paniclist_length = newlist_length; + +finish: + IORecursiveLockUnlock(sKextLock); + return; +} + +/********************************************************************* +*********************************************************************/ +/* static */ +void +OSKext::saveUnloadedKextPanicList(OSKext * aKext) +{ + char * newlist = NULL; + uint32_t newlist_size = 0; + uint32_t newlist_length = 0; + char identPlusVers[2*KMOD_MAX_NAME]; + uint32_t identPlusVersLength; + + if (!aKext->kmod_info) { + return; // do not goto finish here b/c of lock + } + + IORecursiveLockLock(sKextLock); + + clock_get_uptime(&last_unloaded_timestamp); + last_unloaded_address = (void *)aKext->kmod_info->address; + last_unloaded_size = aKext->kmod_info->size; + + + identPlusVersLength = assemble_identifier_and_version(aKext->kmod_info, + identPlusVers); + if (!identPlusVersLength) { + printf("error saving unloaded kext info\n"); + goto finish; + } + + newlist_length = identPlusVersLength; + newlist_size = newlist_length + 1; + newlist = (char *)kalloc(newlist_size); + + if (!newlist) { + printf("couldn't allocate kext panic log buffer\n"); + goto finish; + } + + newlist[0] = '\0'; + + strlcpy(newlist, identPlusVers, newlist_size); + + if (unloaded_kext_paniclist) { + kfree(unloaded_kext_paniclist, unloaded_kext_paniclist_size); + } + unloaded_kext_paniclist = newlist; + unloaded_kext_paniclist_size = newlist_size; + unloaded_kext_paniclist_length = newlist_length; + +finish: + IORecursiveLockUnlock(sKextLock); + return; +} + +/********************************************************************* +*********************************************************************/ +#if __LP64__ +#define __kLoadSizeEscape "0x%lld" +#else +#define __kLoadSizeEscape "0x%ld" +#endif /* __LP64__ */ + +/* static */ +void +OSKext::printKextPanicLists(int (*printf_func)(const char *fmt, ...)) +{ + printf_func("unloaded kexts:\n"); + if (unloaded_kext_paniclist && + pmap_find_phys(kernel_pmap, (addr64_t) (uintptr_t) unloaded_kext_paniclist) && + unloaded_kext_paniclist[0]) { + + printf_func( + "%.*s (addr %p, size " __kLoadSizeEscape ") - last unloaded %llu\n", + unloaded_kext_paniclist_length, unloaded_kext_paniclist, + last_unloaded_address, last_unloaded_size, + AbsoluteTime_to_scalar(&last_unloaded_timestamp)); + } else { + printf_func("(none)\n"); + } + printf_func("loaded kexts:\n"); + if (loaded_kext_paniclist && + pmap_find_phys(kernel_pmap, (addr64_t) (uintptr_t) loaded_kext_paniclist) && + loaded_kext_paniclist[0]) { + + printf_func("%.*s", loaded_kext_paniclist_length, loaded_kext_paniclist); + } else { + printf_func("(none)\n"); + } + return; +} + +/********************************************************************* +*********************************************************************/ +#if __ppc__ || __i386__ +/* static */ +kern_return_t +OSKext::getKmodInfo( + kmod_info_array_t * kmodList, + mach_msg_type_number_t * kmodCount) +{ + kern_return_t result = KERN_FAILURE; + vm_offset_t data; + kmod_info_t * k, * kmod_info_scan_ptr; + kmod_reference_t * r, * ref_scan_ptr; + int ref_count; + unsigned size = 0; + + *kmodList = (kmod_info_t *)0; + *kmodCount = 0; + + IORecursiveLockLock(sKextLock); + + k = kmod; + while (k) { + size += sizeof(kmod_info_t); + r = k->reference_list; + while (r) { + size +=sizeof(kmod_reference_t); + r = r->next; + } + k = k->next; + } + if (!size) { + result = KERN_SUCCESS; + goto finish; + } + + result = kmem_alloc(kernel_map, &data, size); + if (result != KERN_SUCCESS) { + goto finish; + } + + /* Copy each kmod_info struct sequentially into the data buffer. + * Set each struct's nonzero 'next' pointer back to itself as a sentinel; + * the kernel space address is used to match refs, and a zero 'next' flags + * the end of kmod_infos in the data buffer and the beginning of references. + */ + k = kmod; + kmod_info_scan_ptr = (kmod_info_t *)data; + while (k) { + *kmod_info_scan_ptr = *k; + if (k->next) { + kmod_info_scan_ptr->next = k; + } + kmod_info_scan_ptr++; + k = k->next; + } + + /* Now add references after the kmod_info structs in the same buffer. + * Update each kmod_info with the ref_count so we can associate + * references with kmod_info structs. + */ + k = kmod; + ref_scan_ptr = (kmod_reference_t *)kmod_info_scan_ptr; + kmod_info_scan_ptr = (kmod_info_t *)data; + while (k) { + r = k->reference_list; + ref_count = 0; + while (r) { + /* Note the last kmod_info in the data buffer has its next == 0. + * Since there can only be one like that, + * this case is handled by the caller. + */ + *ref_scan_ptr = *r; + ref_scan_ptr++; + r = r->next; + ref_count++; + } + /* Stuff the # of refs into the 'reference_list' field of the kmod_info + * struct for the client to interpret. + */ + kmod_info_scan_ptr->reference_list = (kmod_reference_t *)(long)ref_count; + kmod_info_scan_ptr++; + k = k->next; + } + + result = vm_map_copyin(kernel_map, data, size, TRUE, (vm_map_copy_t *)kmodList); + if (result != KERN_SUCCESS) { + goto finish; + } + + *kmodCount = size; + result = KERN_SUCCESS; + +finish: + IORecursiveLockUnlock(sKextLock); + + if (result != KERN_SUCCESS && data) { + kmem_free(kernel_map, data, size); + *kmodList = (kmod_info_t *)0; + *kmodCount = 0; + } + return result; +} +#endif /* __ppc__ || __i386__ */ +#if PRAGMA_MARK +#pragma mark MAC Framework Support +#endif +/********************************************************************* +*********************************************************************/ +#if CONFIG_MACF_KEXT +/* MAC Framework support */ + +/* + * define IOC_DEBUG to display run-time debugging information + * #define IOC_DEBUG 1 + */ + +#ifdef IOC_DEBUG +#define DPRINTF(x) printf x +#else +#define IOC_DEBUG +#define DPRINTF(x) +#endif + +/********************************************************************* +*********************************************************************/ +static bool +MACFObjectIsPrimitiveType(OSObject * obj) +{ + const OSMetaClass * typeID = NULL; // do not release + + typeID = OSTypeIDInst(obj); + if (typeID == OSTypeID(OSString) || typeID == OSTypeID(OSNumber) || + typeID == OSTypeID(OSBoolean) || typeID == OSTypeID(OSData)) { + + return true; + } + return false; +} + +/********************************************************************* +*********************************************************************/ +static int +MACFLengthForObject(OSObject * obj) +{ + const OSMetaClass * typeID = NULL; // do not release + int len; + + typeID = OSTypeIDInst(obj); + if (typeID == OSTypeID(OSString)) { + OSString * stringObj = OSDynamicCast(OSString, obj); + len = stringObj->getLength() + 1; + } else if (typeID == OSTypeID(OSNumber)) { + len = sizeof("4294967295"); /* UINT32_MAX */ + } else if (typeID == OSTypeID(OSBoolean)) { + OSBoolean * boolObj = OSDynamicCast(OSBoolean, obj); + len = boolObj->isTrue() ? sizeof("true") : sizeof("false"); + } else if (typeID == OSTypeID(OSData)) { + OSData * dataObj = OSDynamicCast(OSData, obj); + len = dataObj->getLength(); + } else { + len = 0; + } + return len; +} + +/********************************************************************* +*********************************************************************/ +static void +MACFInitElementFromObject( + struct mac_module_data_element * element, + OSObject * value) +{ + const OSMetaClass * typeID = NULL; // do not release + + typeID = OSTypeIDInst(value); + if (typeID == OSTypeID(OSString)) { + OSString * stringObj = OSDynamicCast(OSString, value); + element->value_type = MAC_DATA_TYPE_PRIMITIVE; + element->value_size = stringObj->getLength() + 1; + DPRINTF(("osdict: string %s size %d\n", + stringObj->getCStringNoCopy(), element->value_size)); + memcpy(element->value, stringObj->getCStringNoCopy(), + element->value_size); + } else if (typeID == OSTypeID(OSNumber)) { + OSNumber * numberObj = OSDynamicCast(OSNumber, value); + element->value_type = MAC_DATA_TYPE_PRIMITIVE; + element->value_size = sprintf(element->value, "%u", + numberObj->unsigned32BitValue()) + 1; + } else if (typeID == OSTypeID(OSBoolean)) { + OSBoolean * boolObj = OSDynamicCast(OSBoolean, value); + element->value_type = MAC_DATA_TYPE_PRIMITIVE; + if (boolObj->isTrue()) { + strcpy(element->value, "true"); + element->value_size = 5; + } else { + strcpy(element->value, "false"); + element->value_size = 6; + } + } else if (typeID == OSTypeID(OSData)) { + OSData * dataObj = OSDynamicCast(OSData, value); + element->value_type = MAC_DATA_TYPE_PRIMITIVE; + element->value_size = dataObj->getLength(); + DPRINTF(("osdict: data size %d\n", dataObj->getLength())); + memcpy(element->value, dataObj->getBytesNoCopy(), + element->value_size); + } + return; +} + +/********************************************************************* +* This function takes an OSDictionary and returns a struct mac_module_data +* list. +*********************************************************************/ +static struct mac_module_data * +MACFEncodeOSDictionary(OSDictionary * dict) +{ + struct mac_module_data * result = NULL; // do not free + const OSMetaClass * typeID = NULL; // do not release + OSString * key = NULL; // do not release + OSCollectionIterator * keyIterator = NULL; // must release + struct mac_module_data_element * element = NULL; // do not free + unsigned int strtabsize = 0; + unsigned int listtabsize = 0; + unsigned int dicttabsize = 0; + unsigned int nkeys = 0; + unsigned int datalen = 0; + char * strtab = NULL; // do not free + char * listtab = NULL; // do not free + char * dicttab = NULL; // do not free + vm_offset_t data_addr = 0; + + keyIterator = OSCollectionIterator::withCollection(dict); + if (!keyIterator) { + goto finish; + } + + /* Iterate over OSModuleData to figure out total size */ + while ( (key = OSDynamicCast(OSString, keyIterator->getNextObject())) ) { + + // Get the key's value and determine its type + OSObject * value = dict->getObject(key); + if (!value) { + continue; + } + + typeID = OSTypeIDInst(value); + if (MACFObjectIsPrimitiveType(value)) { + strtabsize += MACFLengthForObject(value); + } + else if (typeID == OSTypeID(OSArray)) { + unsigned int k, cnt, nents; + OSArray * arrayObj = OSDynamicCast(OSArray, value); + + nents = 0; + cnt = arrayObj->getCount(); + for (k = 0; k < cnt; k++) { + value = arrayObj->getObject(k); + typeID = OSTypeIDInst(value); + if (MACFObjectIsPrimitiveType(value)) { + listtabsize += MACFLengthForObject(value); + nents++; + } + else if (typeID == OSTypeID(OSDictionary)) { + unsigned int dents = 0; + OSDictionary * dictObj = NULL; // do not release + OSString * dictkey = NULL; // do not release + OSCollectionIterator * dictIterator = NULL; // must release + + dictObj = OSDynamicCast(OSDictionary, value); + dictIterator = OSCollectionIterator::withCollection(dictObj); + if (!dictIterator) { + goto finish; + } + while ((dictkey = OSDynamicCast(OSString, + dictIterator->getNextObject()))) { + + OSObject * dictvalue = NULL; // do not release + + dictvalue = dictObj->getObject(dictkey); + if (!dictvalue) { + continue; + } + if (MACFObjectIsPrimitiveType(dictvalue)) { + strtabsize += MACFLengthForObject(dictvalue); + } else { + continue; /* Only handle primitive types here. */ + } + /* + * Allow for the "arraynnn/" prefix in the key length. + */ + strtabsize += dictkey->getLength() + 1; + dents++; + } + dictIterator->release(); + if (dents-- > 0) { + dicttabsize += sizeof(struct mac_module_data_list) + + dents * sizeof(struct mac_module_data_element); + nents++; + } + } + else { + continue; /* Skip everything else. */ + } + } + if (nents == 0) { + continue; + } + listtabsize += sizeof(struct mac_module_data_list) + + (nents - 1) * sizeof(struct mac_module_data_element); + } else { + continue; /* skip anything else */ + } + strtabsize += key->getLength() + 1; + nkeys++; + } + if (nkeys == 0) { + goto finish; + } + + /* + * Allocate and fill in the module data structures. + */ + datalen = sizeof(struct mac_module_data) + + sizeof(mac_module_data_element) * (nkeys - 1) + + strtabsize + listtabsize + dicttabsize; + DPRINTF(("osdict: datalen %d strtabsize %d listtabsize %d dicttabsize %d\n", + datalen, strtabsize, listtabsize, dicttabsize)); + if (kmem_alloc(kernel_map, &data_addr, datalen) != KERN_SUCCESS) { + goto finish; + } + result = (mac_module_data *)data_addr; + result->base_addr = data_addr; + result->size = datalen; + result->count = nkeys; + strtab = (char *)&result->data[nkeys]; + listtab = strtab + strtabsize; + dicttab = listtab + listtabsize; + DPRINTF(("osdict: data_addr %p strtab %p listtab %p dicttab %p end %p\n", + data_addr, strtab, listtab, dicttab, data_addr + datalen)); + + keyIterator->reset(); + nkeys = 0; + element = &result->data[0]; + DPRINTF(("osdict: element %p\n", element)); + while ( (key = OSDynamicCast(OSString, keyIterator->getNextObject())) ) { + + // Get the key's value and determine its type + OSObject * value = dict->getObject(key); + if (!value) { + continue; + } + + /* Store key */ + DPRINTF(("osdict: element @%p\n", element)); + element->key = strtab; + element->key_size = key->getLength() + 1; + DPRINTF(("osdict: key %s size %d @%p\n", key->getCStringNoCopy(), + element->key_size, strtab)); + memcpy(element->key, key->getCStringNoCopy(), element->key_size); + + typeID = OSTypeIDInst(value); + if (MACFObjectIsPrimitiveType(value)) { + /* Store value */ + element->value = element->key + element->key_size; + DPRINTF(("osdict: primitive element value %p\n", element->value)); + MACFInitElementFromObject(element, value); + strtab += element->key_size + element->value_size; + DPRINTF(("osdict: new strtab %p\n", strtab)); + } else if (typeID == OSTypeID(OSArray)) { + unsigned int k, cnt, nents; + char *astrtab; + struct mac_module_data_list *arrayhd; + struct mac_module_data_element *ele; + OSArray *arrayObj = OSDynamicCast(OSArray, value); + + element->value = listtab; + DPRINTF(("osdict: array element value %p\n", element->value)); + element->value_type = MAC_DATA_TYPE_ARRAY; + arrayhd = (struct mac_module_data_list *)element->value; + arrayhd->type = 0; + DPRINTF(("osdict: arrayhd %p\n", arrayhd)); + nents = 0; + astrtab = strtab + element->key_size; + ele = &(arrayhd->list[0]); + cnt = arrayObj->getCount(); + for (k = 0; k < cnt; k++) { + value = arrayObj->getObject(k); + DPRINTF(("osdict: array ele %d @%p\n", nents, ele)); + ele->key = NULL; + ele->key_size = 0; + typeID = OSTypeIDInst(value); + if (MACFObjectIsPrimitiveType(value)) { + if (arrayhd->type != 0 && + arrayhd->type != MAC_DATA_TYPE_PRIMITIVE) { + + continue; + } + arrayhd->type = MAC_DATA_TYPE_PRIMITIVE; + ele->value = astrtab; + MACFInitElementFromObject(ele, value); + astrtab += ele->value_size; + DPRINTF(("osdict: array new astrtab %p\n", astrtab)); + } else if (typeID == OSTypeID(OSDictionary)) { + unsigned int dents; + char * dstrtab = NULL; // do not free + OSDictionary * dictObj = NULL; // do not release + OSString * dictkey = NULL; // do not release + OSCollectionIterator * dictIterator = NULL; // must release + struct mac_module_data_list * dicthd = NULL; // do not free + struct mac_module_data_element * dele = NULL; // do not free + + if (arrayhd->type != 0 && + arrayhd->type != MAC_DATA_TYPE_DICT) { + + continue; + } + dictObj = OSDynamicCast(OSDictionary, value); + dictIterator = OSCollectionIterator::withCollection(dictObj); + if (!dictIterator) { + goto finish; + } + DPRINTF(("osdict: dict\n")); + ele->value = dicttab; + ele->value_type = MAC_DATA_TYPE_DICT; + dicthd = (struct mac_module_data_list *)ele->value; + DPRINTF(("osdict: dicthd %p\n", dicthd)); + dstrtab = astrtab; + dents = 0; + while ((dictkey = OSDynamicCast(OSString, + dictIterator->getNextObject()))) { + + OSObject * dictvalue = NULL; // do not release + + dictvalue = dictObj->getObject(dictkey); + if (!dictvalue) { + continue; + } + dele = &(dicthd->list[dents]); + DPRINTF(("osdict: dict ele %d @%p\n", dents, dele)); + if (MACFObjectIsPrimitiveType(dictvalue)) { + dele->key = dstrtab; + dele->key_size = dictkey->getLength() + 1; + DPRINTF(("osdict: dictkey %s size %d @%p\n", + dictkey->getCStringNoCopy(), dictkey->getLength(), dstrtab)); + memcpy(dele->key, dictkey->getCStringNoCopy(), + dele->key_size); + dele->value = dele->key + dele->key_size; + MACFInitElementFromObject(dele, dictvalue); + dstrtab += dele->key_size + dele->value_size; + DPRINTF(("osdict: dict new dstrtab %p\n", dstrtab)); + } else { + continue; /* Only handle primitive types here. */ + } + dents++; + } + dictIterator->release(); + if (dents == 0) { + continue; + } + arrayhd->type = MAC_DATA_TYPE_DICT; + ele->value_size = sizeof(struct mac_module_data_list) + + (dents - 1) * sizeof(struct mac_module_data_element); + DPRINTF(("osdict: dict ele size %d ents %d\n", ele->value_size, dents)); + dicttab += ele->value_size; + DPRINTF(("osdict: new dicttab %p\n", dicttab)); + dicthd->count = dents; + astrtab = dstrtab; + } else { + continue; /* Skip everything else. */ + } + nents++; + ele++; + } + if (nents == 0) { + continue; + } + element->value_size = sizeof(struct mac_module_data_list) + + (nents - 1) * sizeof(struct mac_module_data_element); + listtab += element->value_size; + DPRINTF(("osdict: new listtab %p\n", listtab)); + arrayhd->count = nents; + strtab = astrtab; + DPRINTF(("osdict: new strtab %p\n", strtab)); + } else { + continue; /* skip anything else */ + } + element++; + } + DPRINTF(("result list @%p, key %p value %p\n", + result, result->data[0].key, result->data[0].value)); +finish: + if (keyIterator) keyIterator->release(); + return result; +} + +/********************************************************************* +* This function takes a plist and looks for an OSModuleData dictionary. +* If it is found, an encoded copy is returned. The value must be +* kmem_free()'d. +*********************************************************************/ +static void * +MACFCopyModuleDataForKext( + OSKext * theKext, + mach_msg_type_number_t * datalen) + +{ + struct mac_module_data * result = NULL; + OSDictionary * kextModuleData = NULL; // do not release + vm_map_copy_t copy = 0; + + kextModuleData = OSDynamicCast(OSDictionary, + theKext->getPropertyForHostArch("OSModuleData")); + if (!kextModuleData) { + goto finish; + } + + result = MACFEncodeOSDictionary(kextModuleData); + if (!result) { + goto finish; + } + *datalen = module_data->size; + +finish: + return (void *)result; +} +#endif /* CONFIG_MACF_KEXT */ diff --git a/libkern/c++/OSMetaClass.cpp b/libkern/c++/OSMetaClass.cpp index f7f1e0a3b..bc67307d2 100644 --- a/libkern/c++/OSMetaClass.cpp +++ b/libkern/c++/OSMetaClass.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2006 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2006 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -32,143 +32,208 @@ #include #include - #include +#include + #include #include #include -#include +#include #include #include #include + #include #include #include #include +#include __BEGIN_DECLS #include #include -#include +#include #include #include #include -#include #include -extern void OSRuntimeUnloadCPP(kmod_info_t *ki, void *); - +#if PRAGMA_MARK +#pragma mark Macros +#endif /* PRAGMA_MARK */ +/********************************************************************* +* Macros +*********************************************************************/ #if OSALLOCDEBUG extern int debug_container_malloc_size; -#define ACCUMSIZE(s) do { debug_container_malloc_size += (s); } while(0) +#define ACCUMSIZE(s) do { debug_container_malloc_size += (s); } while (0) #else #define ACCUMSIZE(s) #endif /* OSALLOCDEBUG */ __END_DECLS +#if PRAGMA_MARK +#pragma mark Internal constants & data structs +#endif /* PRAGMA_MARK */ +/********************************************************************* +* Internal constants & data structs +*********************************************************************/ +OSKextLogSpec kOSMetaClassLogSpec = + kOSKextLogErrorLevel | + kOSKextLogLoadFlag | + kOSKextLogKextBookkeepingFlag; + static enum { kCompletedBootstrap = 0, - kNoDictionaries = 1, + kNoDictionaries = 1, kMakingDictionaries = 2 } sBootstrapState = kNoDictionaries; -static const int kClassCapacityIncrement = 40; -static const int kKModCapacityIncrement = 10; -static OSDictionary *sAllClassesDict, *sKModClassesDict, *sSortedByClassesDict; +static const int kClassCapacityIncrement = 40; +static const int kKModCapacityIncrement = 10; +static OSDictionary * sAllClassesDict; +IOLock * sAllClassesLock = NULL; -static mutex_t *loadLock = 0; +/* + * While loading a kext and running all its constructors to register + * all OSMetaClass classes, the classes are queued up here. Only one + * kext can be in flight at a time, guarded by sStalledClassesLock + */ static struct StalledData { - const char *kmodName; - OSReturn result; - unsigned int capacity; - unsigned int count; - OSMetaClass **classes; -} *sStalled; - -static unsigned int sConsiderUnloadDelay = 60; /* secs */ -static bool unloadsEnabled = true; // set to false when system going to sleep -static thread_call_t unloadCallout = 0; - -static const char OSMetaClassBasePanicMsg[] = - "OSMetaClassBase::_RESERVEDOSMetaClassBase%d called\n"; - + const char * kextIdentifier; + OSReturn result; + unsigned int capacity; + unsigned int count; + OSMetaClass ** classes; +} * sStalled; +IOLock * sStalledClassesLock = NULL; + +#if PRAGMA_MARK +#pragma mark OSMetaClassBase +#endif /* PRAGMA_MARK */ +/********************************************************************* +* OSMetaClassBase. +*********************************************************************/ + +/********************************************************************* +* Reserved vtable functions. +*********************************************************************/ #if SLOT_USED void OSMetaClassBase::_RESERVEDOSMetaClassBase0() - { panic(OSMetaClassBasePanicMsg, 0); } + { panic("OSMetaClassBase::_RESERVEDOSMetaClassBase%d called.", 0); } void OSMetaClassBase::_RESERVEDOSMetaClassBase1() - { panic(OSMetaClassBasePanicMsg, 1); } + { panic("OSMetaClassBase::_RESERVEDOSMetaClassBase%d called.", 1); } void OSMetaClassBase::_RESERVEDOSMetaClassBase2() - { panic(OSMetaClassBasePanicMsg, 2); } + { panic("OSMetaClassBase::_RESERVEDOSMetaClassBase%d called.", 2); } #endif /* SLOT_USED */ // As these slots are used move them up inside the #if above void OSMetaClassBase::_RESERVEDOSMetaClassBase3() - { panic(OSMetaClassBasePanicMsg, 3); } + { panic("OSMetaClassBase::_RESERVEDOSMetaClassBase%d called.", 3); } void OSMetaClassBase::_RESERVEDOSMetaClassBase4() - { panic(OSMetaClassBasePanicMsg, 4); } + { panic("OSMetaClassBase::_RESERVEDOSMetaClassBase%d called.", 4); } void OSMetaClassBase::_RESERVEDOSMetaClassBase5() - { panic(OSMetaClassBasePanicMsg, 5); } + { panic("OSMetaClassBase::_RESERVEDOSMetaClassBase%d called.", 5); } void OSMetaClassBase::_RESERVEDOSMetaClassBase6() - { panic(OSMetaClassBasePanicMsg, 6); } + { panic("OSMetaClassBase::_RESERVEDOSMetaClassBase%d called.", 6); } -/* - * These used to be inline in the header but gcc didn't believe us - * Now we MUST pull the inline out at least until the compiler is - * repaired. - */ -// Helper inlines for runtime type preprocessor macros -OSMetaClassBase *OSMetaClassBase:: -safeMetaCast(const OSMetaClassBase *me, const OSMetaClass *toType) - { return (me)? me->metaCast(toType) : 0; } +/********************************************************************* +* These used to be inline in the header but gcc didn't believe us +* Now we MUST pull the inline out at least until the compiler is +* repaired. +* +* Helper inlines for runtime type preprocessor macros +*********************************************************************/ + +/********************************************************************* +*********************************************************************/ +OSMetaClassBase * +OSMetaClassBase::safeMetaCast( + const OSMetaClassBase * me, + const OSMetaClass * toType) +{ + return (me)? me->metaCast(toType) : 0; +} -bool OSMetaClassBase:: -checkTypeInst(const OSMetaClassBase *inst, const OSMetaClassBase *typeinst) +/********************************************************************* +*********************************************************************/ +bool +OSMetaClassBase::checkTypeInst( + const OSMetaClassBase * inst, + const OSMetaClassBase * typeinst) { - const OSMetaClass *toType = OSTypeIDInst(typeinst); + const OSMetaClass * toType = OSTypeIDInst(typeinst); return typeinst && inst && (0 != inst->metaCast(toType)); } +/********************************************************************* +*********************************************************************/ +void OSMetaClassBase:: +initialize() +{ + sAllClassesLock = IOLockAlloc(); + sStalledClassesLock = IOLockAlloc(); +} -// If you need this slot you had better setup an IOCTL style interface. -// 'Cause the whole kernel world depends on OSMetaClassBase and YOU -// CANT change the VTABLE size ever. -void OSMetaClassBase::_RESERVEDOSMetaClassBase7() - { panic(OSMetaClassBasePanicMsg, 7); } - +/********************************************************************* +* If you need this slot you had better setup an IOCTL style interface. +* 'Cause the whole kernel world depends on OSMetaClassBase and YOU +* CANT change the VTABLE size ever. +*********************************************************************/ +void +OSMetaClassBase::_RESERVEDOSMetaClassBase7() +{ panic("OSMetaClassBase::_RESERVEDOSMetaClassBase%d called.", 7); } + +/********************************************************************* +*********************************************************************/ OSMetaClassBase::OSMetaClassBase() { } +/********************************************************************* +*********************************************************************/ OSMetaClassBase::~OSMetaClassBase() { - void **thisVTable; + void ** thisVTable; thisVTable = (void **) this; *thisVTable = (void *) -1UL; } -bool OSMetaClassBase::isEqualTo(const OSMetaClassBase *anObj) const +/********************************************************************* +*********************************************************************/ +bool +OSMetaClassBase::isEqualTo(const OSMetaClassBase * anObj) const { return this == anObj; } -OSMetaClassBase *OSMetaClassBase::metaCast(const OSMetaClass *toMeta) const +/********************************************************************* +*********************************************************************/ +OSMetaClassBase * +OSMetaClassBase::metaCast(const OSMetaClass * toMeta) const { return toMeta->checkMetaCast(this); } -OSMetaClassBase *OSMetaClassBase::metaCast(const OSSymbol *toMetaSymb) const +/********************************************************************* +*********************************************************************/ +OSMetaClassBase * +OSMetaClassBase::metaCast(const OSSymbol * toMetaSymb) const { return OSMetaClass::checkMetaCastWithName(toMetaSymb, this); } -OSMetaClassBase *OSMetaClassBase::metaCast(const OSString *toMetaStr) const +/********************************************************************* +*********************************************************************/ +OSMetaClassBase * +OSMetaClassBase::metaCast(const OSString * toMetaStr) const { - const OSSymbol *tempSymb = OSSymbol::withString(toMetaStr); - OSMetaClassBase *ret = 0; + const OSSymbol * tempSymb = OSSymbol::withString(toMetaStr); + OSMetaClassBase * ret = 0; if (tempSymb) { ret = metaCast(tempSymb); tempSymb->release(); @@ -176,10 +241,13 @@ OSMetaClassBase *OSMetaClassBase::metaCast(const OSString *toMetaStr) const return ret; } -OSMetaClassBase *OSMetaClassBase::metaCast(const char *toMetaCStr) const +/********************************************************************* +*********************************************************************/ +OSMetaClassBase * +OSMetaClassBase::metaCast(const char * toMetaCStr) const { - const OSSymbol *tempSymb = OSSymbol::withCString(toMetaCStr); - OSMetaClassBase *ret = 0; + const OSSymbol * tempSymb = OSSymbol::withCString(toMetaCStr); + OSMetaClassBase * ret = 0; if (tempSymb) { ret = metaCast(tempSymb); tempSymb->release(); @@ -187,16 +255,22 @@ OSMetaClassBase *OSMetaClassBase::metaCast(const char *toMetaCStr) const return ret; } +#if PRAGMA_MARK +#pragma mark OSMetaClassMeta +#endif /* PRAGMA_MARK */ +/********************************************************************* +* OSMetaClassMeta - the bootstrap metaclass of OSMetaClass +*********************************************************************/ class OSMetaClassMeta : public OSMetaClass { public: OSMetaClassMeta(); - OSObject *alloc() const; + OSObject * alloc() const; }; OSMetaClassMeta::OSMetaClassMeta() : OSMetaClass("OSMetaClass", 0, sizeof(OSMetaClass)) { } -OSObject *OSMetaClassMeta::alloc() const { return 0; } +OSObject * OSMetaClassMeta::alloc() const { return 0; } static OSMetaClassMeta sOSMetaClassMeta; @@ -204,692 +278,741 @@ const OSMetaClass * const OSMetaClass::metaClass = &sOSMetaClassMeta; const OSMetaClass * OSMetaClass::getMetaClass() const { return &sOSMetaClassMeta; } -static const char OSMetaClassPanicMsg[] = - "OSMetaClass::_RESERVEDOSMetaClass%d called\n"; +#if PRAGMA_MARK +#pragma mark OSMetaClass +#endif /* PRAGMA_MARK */ +/********************************************************************* +* OSMetaClass +*********************************************************************/ +/********************************************************************* +* Reserved functions. +*********************************************************************/ void OSMetaClass::_RESERVEDOSMetaClass0() - { panic(OSMetaClassPanicMsg, 0); } + { panic("OSMetaClass::_RESERVEDOSMetaClass%d called", 0); } void OSMetaClass::_RESERVEDOSMetaClass1() - { panic(OSMetaClassPanicMsg, 1); } + { panic("OSMetaClass::_RESERVEDOSMetaClass%d called", 1); } void OSMetaClass::_RESERVEDOSMetaClass2() - { panic(OSMetaClassPanicMsg, 2); } + { panic("OSMetaClass::_RESERVEDOSMetaClass%d called", 2); } void OSMetaClass::_RESERVEDOSMetaClass3() - { panic(OSMetaClassPanicMsg, 3); } + { panic("OSMetaClass::_RESERVEDOSMetaClass%d called", 3); } void OSMetaClass::_RESERVEDOSMetaClass4() - { panic(OSMetaClassPanicMsg, 4); } + { panic("OSMetaClass::_RESERVEDOSMetaClass%d called", 4); } void OSMetaClass::_RESERVEDOSMetaClass5() - { panic(OSMetaClassPanicMsg, 5); } + { panic("OSMetaClass::_RESERVEDOSMetaClass%d called", 5); } void OSMetaClass::_RESERVEDOSMetaClass6() - { panic(OSMetaClassPanicMsg, 6); } + { panic("OSMetaClass::_RESERVEDOSMetaClass%d called", 6); } void OSMetaClass::_RESERVEDOSMetaClass7() - { panic(OSMetaClassPanicMsg, 7); } - -void OSMetaClass::logError(OSReturn result) + { panic("OSMetaClass::_RESERVEDOSMetaClass%d called", 7); } + +/********************************************************************* +*********************************************************************/ +static void +OSMetaClassLogErrorForKext( + OSReturn error, + OSKext * aKext) { - const char *msg; + const char * message = NULL; - switch (result) { - case kOSMetaClassNoInit: - msg="OSMetaClass::preModLoad wasn't called, runtime internal error"; - break; + switch (error) { + case kOSReturnSuccess: + return; + case kOSMetaClassNoInit: // xxx - never returned; logged at fail site + message = "OSMetaClass: preModLoad() wasn't called (runtime internal error)."; + break; case kOSMetaClassNoDicts: - msg="Allocation failure for Metaclass internal dictionaries"; break; + message = "OSMetaClass: Allocation failure for OSMetaClass internal dictionaries."; + break; case kOSMetaClassNoKModSet: - msg="Allocation failure for internal kmodule set"; break; + message = "OSMetaClass: Allocation failure for internal kext recording set/set missing."; + break; case kOSMetaClassNoInsKModSet: - msg="Can't insert the KMod set into the module dictionary"; break; + message = "OSMetaClass: Failed to record class in kext."; + break; case kOSMetaClassDuplicateClass: - msg="Duplicate class"; break; - case kOSMetaClassNoSuper: - msg="Can't associate a class with its super class"; break; - case kOSMetaClassInstNoSuper: - msg="Instance construction, unknown super class."; break; - default: + message = "OSMetaClass: Duplicate class encountered."; + break; + case kOSMetaClassNoSuper: // xxx - never returned + message = "OSMetaClass: Can't associate a class with its superclass."; + break; + case kOSMetaClassInstNoSuper: // xxx - never returned + message = "OSMetaClass: Instance construction error; unknown superclass."; + break; + case kOSMetaClassNoKext: + message = "OSMetaClass: Kext not found for metaclass."; + break; case kOSMetaClassInternal: - msg="runtime internal error"; break; - case kOSReturnSuccess: - return; + default: + message = "OSMetaClass: Runtime internal error."; + break; + } + + if (message) { + OSKextLog(aKext, kOSMetaClassLogSpec, "%s", message); } - printf("%s\n", msg); + return; } -OSMetaClass::OSMetaClass(const char *inClassName, - const OSMetaClass *inSuperClass, - unsigned int inClassSize) +void +OSMetaClass::logError(OSReturn error) +{ + OSMetaClassLogErrorForKext(error, NULL); +} + +/********************************************************************* +* The core constructor for a MetaClass (defined with this name always +* but within the scope of its represented class). +* +* MetaClass constructors are invoked in OSRuntimeInitializeCPP(), +* in between calls to OSMetaClass::preModLoad(), which sets up for +* registration, and OSMetaClass::postModLoad(), which actually +* records all the class/kext relationships of the new MetaClasses. +*********************************************************************/ +OSMetaClass::OSMetaClass( + const char * inClassName, + const OSMetaClass * inSuperClass, + unsigned int inClassSize) { instanceCount = 0; classSize = inClassSize; superClassLink = inSuperClass; - className = (const OSSymbol *) inClassName; + /* Hack alert: We are just casting inClassName and storing it in + * an OSString * instance variable. This may be because you can't + * create C++ objects in static constructors, but I really don't know! + */ + className = (const OSSymbol *)inClassName; + // sStalledClassesLock taken in preModLoad if (!sStalled) { - printf("OSMetaClass::preModLoad wasn't called for %s, " - "runtime internal error\n", inClassName); + /* There's no way we can look up the kext here, unfortunately. + */ + OSKextLog(/* kext */ NULL, kOSMetaClassLogSpec, + "OSMetaClass: preModLoad() wasn't called for class %s " + "(runtime internal error).", + inClassName); } else if (!sStalled->result) { - // Grow stalled array if neccessary - if (sStalled->count >= sStalled->capacity) { - OSMetaClass **oldStalled = sStalled->classes; - int oldSize = sStalled->capacity * sizeof(OSMetaClass *); - int newSize = oldSize - + kKModCapacityIncrement * sizeof(OSMetaClass *); - - sStalled->classes = (OSMetaClass **) kalloc(newSize); - if (!sStalled->classes) { - sStalled->classes = oldStalled; - sStalled->result = kOSMetaClassNoTempData; - return; - } - - sStalled->capacity += kKModCapacityIncrement; - memmove(sStalled->classes, oldStalled, oldSize); - kfree(oldStalled, oldSize); - ACCUMSIZE(newSize - oldSize); - } - - sStalled->classes[sStalled->count++] = this; + // Grow stalled array if neccessary + if (sStalled->count >= sStalled->capacity) { + OSMetaClass **oldStalled = sStalled->classes; + int oldSize = sStalled->capacity * sizeof(OSMetaClass *); + int newSize = oldSize + + kKModCapacityIncrement * sizeof(OSMetaClass *); + + sStalled->classes = (OSMetaClass **)kalloc(newSize); + if (!sStalled->classes) { + sStalled->classes = oldStalled; + sStalled->result = kOSMetaClassNoTempData; + return; + } + + sStalled->capacity += kKModCapacityIncrement; + memmove(sStalled->classes, oldStalled, oldSize); + kfree(oldStalled, oldSize); + ACCUMSIZE(newSize - oldSize); + } + + sStalled->classes[sStalled->count++] = this; } } +/********************************************************************* +*********************************************************************/ OSMetaClass::~OSMetaClass() { - do { - OSCollectionIterator *iter; - - if (sAllClassesDict) { - sAllClassesDict->removeObject(className); - className->release(); - } - - iter = OSCollectionIterator::withCollection(sKModClassesDict); - if (!iter) - break; - - OSSymbol *iterKey; - while ( (iterKey = (OSSymbol *) iter->getNextObject()) ) { - OSSet *kmodClassSet; - kmodClassSet = (OSSet *) sKModClassesDict->getObject(iterKey); - if (kmodClassSet && kmodClassSet->containsObject(this)) { - kmodClassSet->removeObject(this); - break; - } - } - iter->release(); - } while (false); + OSKext * myKext = (OSKext *)reserved; // do not release + + /* Hack alert: 'className' is a C string during early C++ init, and + * is converted to a real OSSymbol only when we record the OSKext in + * OSMetaClass::postModLoad(). So only do this bit if we have an OSKext. + * We can't safely cast or check 'className'. + * + * Also, release className *after* calling into the kext, + * as removeClass() may access className. + */ + IOLockLock(sAllClassesLock); + if (sAllClassesDict) { + if (myKext) { + sAllClassesDict->removeObject(className); + } else { + sAllClassesDict->removeObject((char *)className); + } + } + IOLockUnlock(sAllClassesLock); + + if (myKext) { + if (myKext->removeClass(this) != kOSReturnSuccess) { + // xxx - what can we do? + } + className->release(); + } + // sStalledClassesLock taken in preModLoad if (sStalled) { - unsigned int i; - - // First pass find class in stalled list - for (i = 0; i < sStalled->count; i++) - if (this == sStalled->classes[i]) - break; - - if (i < sStalled->count) { - sStalled->count--; - if (i < sStalled->count) - memmove(&sStalled->classes[i], &sStalled->classes[i+1], - (sStalled->count - i) * sizeof(OSMetaClass *)); - } + unsigned int i; + + /* First pass find class in stalled list. If we find it that means + * we started C++ init with constructors but now we're tearing down + * because of some failure. + */ + for (i = 0; i < sStalled->count; i++) { + if (this == sStalled->classes[i]) { + break; + } + } + + /* Remove this metaclass from the stalled list so postModLoad() doesn't + * try to register it. + */ + if (i < sStalled->count) { + sStalled->count--; + if (i < sStalled->count) { + memmove(&sStalled->classes[i], &sStalled->classes[i+1], + (sStalled->count - i) * sizeof(OSMetaClass *)); + } + } } } -void *OSMetaClass::operator new(__unused size_t size) { return 0; } +/********************************************************************* +* Empty overrides. +*********************************************************************/ +void * OSMetaClass::operator new(__unused size_t size) { return 0; } void OSMetaClass::retain() const { } void OSMetaClass::release() const { } void OSMetaClass::release(__unused int when) const { } -void OSMetaClass::taggedRetain(__unused const void *tag) const { } -void OSMetaClass::taggedRelease(__unused const void *tag) const { } -void OSMetaClass::taggedRelease(__unused const void *tag, __unused const int when) const { } +void OSMetaClass::taggedRetain(__unused const void * tag) const { } +void OSMetaClass::taggedRelease(__unused const void * tag) const { } +void OSMetaClass::taggedRelease(__unused const void * tag, __unused const int when) const { } int OSMetaClass::getRetainCount() const { return 0; } -const char *OSMetaClass::getClassName() const +/********************************************************************* +*********************************************************************/ +const char * +OSMetaClass::getClassName() const { return className->getCStringNoCopy(); } -unsigned int OSMetaClass::getClassSize() const +/********************************************************************* +*********************************************************************/ +unsigned int +OSMetaClass::getClassSize() const { return classSize; } -void *OSMetaClass::preModLoad(const char *kmodName) +/********************************************************************* +*********************************************************************/ +void * +OSMetaClass::preModLoad(const char * kextIdentifier) { - if (!loadLock) { - loadLock = mutex_alloc(0); - mutex_lock(loadLock); - } - else - mutex_lock(loadLock); + IOLockLock(sStalledClassesLock); - sStalled = (StalledData *) kalloc(sizeof(*sStalled)); + assert (sStalled == NULL); + sStalled = (StalledData *)kalloc(sizeof(* sStalled)); if (sStalled) { - sStalled->classes = (OSMetaClass **) - kalloc(kKModCapacityIncrement * sizeof(OSMetaClass *)); - if (!sStalled->classes) { - kfree(sStalled, sizeof(*sStalled)); - return 0; - } - ACCUMSIZE((kKModCapacityIncrement * sizeof(OSMetaClass *)) + sizeof(*sStalled)); + sStalled->classes = (OSMetaClass **) + kalloc(kKModCapacityIncrement * sizeof(OSMetaClass *)); + if (!sStalled->classes) { + kfree(sStalled, sizeof(*sStalled)); + return 0; + } + ACCUMSIZE((kKModCapacityIncrement * sizeof(OSMetaClass *)) + + sizeof(*sStalled)); sStalled->result = kOSReturnSuccess; - sStalled->capacity = kKModCapacityIncrement; - sStalled->count = 0; - sStalled->kmodName = kmodName; - bzero(sStalled->classes, kKModCapacityIncrement * sizeof(OSMetaClass *)); + sStalled->capacity = kKModCapacityIncrement; + sStalled->count = 0; + sStalled->kextIdentifier = kextIdentifier; + bzero(sStalled->classes, kKModCapacityIncrement * sizeof(OSMetaClass *)); } + // keep sStalledClassesLock locked until postModLoad + return sStalled; } -bool OSMetaClass::checkModLoad(void *loadHandle) +/********************************************************************* +*********************************************************************/ +bool +OSMetaClass::checkModLoad(void * loadHandle) { - return sStalled && loadHandle == sStalled - && sStalled->result == kOSReturnSuccess; + return sStalled && loadHandle == sStalled && + sStalled->result == kOSReturnSuccess; } -OSReturn OSMetaClass::postModLoad(void *loadHandle) +/********************************************************************* +*********************************************************************/ +OSReturn +OSMetaClass::postModLoad(void * loadHandle) { - OSReturn result = kOSReturnSuccess; - OSSet *kmodSet = 0; - OSSymbol *myname = 0; + OSReturn result = kOSReturnSuccess; + OSSymbol * myKextName = 0; // must release + OSKext * myKext = 0; // must release if (!sStalled || loadHandle != sStalled) { - logError(kOSMetaClassInternal); - return kOSMetaClassInternal; + result = kOSMetaClassInternal; + goto finish; } + + if (sStalled->result) { + result = sStalled->result; + } else switch (sBootstrapState) { + + case kNoDictionaries: + sBootstrapState = kMakingDictionaries; + // No break; fall through + + case kMakingDictionaries: + sAllClassesDict = OSDictionary::withCapacity(kClassCapacityIncrement); + if (!sAllClassesDict) { + result = kOSMetaClassNoDicts; + break; + } - if (sStalled->result) - result = sStalled->result; - else switch (sBootstrapState) { - case kNoDictionaries: - sBootstrapState = kMakingDictionaries; - // No break; fall through - - case kMakingDictionaries: - sKModClassesDict = OSDictionary::withCapacity(kKModCapacityIncrement); - sAllClassesDict = OSDictionary::withCapacity(kClassCapacityIncrement); - sSortedByClassesDict = OSDictionary::withCapacity(kClassCapacityIncrement); - if (!sAllClassesDict || !sKModClassesDict || !sSortedByClassesDict) { - result = kOSMetaClassNoDicts; - break; - } - // No break; fall through - - case kCompletedBootstrap: - { - unsigned int i; - myname = (OSSymbol *)OSSymbol::withCStringNoCopy(sStalled->kmodName); - - if (!sStalled->count) - break; // Nothing to do so just get out - - // First pass checking classes aren't already loaded - for (i = 0; i < sStalled->count; i++) { - OSMetaClass *me = sStalled->classes[i]; + // No break; fall through - if (0 != sAllClassesDict->getObject((const char *) me->className)) { - printf("Class \"%s\" is duplicate\n", (const char *) me->className); - result = kOSMetaClassDuplicateClass; + case kCompletedBootstrap: + { + unsigned int i; + myKextName = const_cast(OSSymbol::withCStringNoCopy( + sStalled->kextIdentifier)); + + if (!sStalled->count) { + break; // Nothing to do so just get out + } + + myKext = OSKext::lookupKextWithIdentifier(myKextName); + if (!myKext) { + result = kOSMetaClassNoKext; + + /* Log this error here so we can include the kext name. + */ + OSKextLog(/* kext */ NULL, kOSMetaClassLogSpec, + "OSMetaClass: Can't record classes for kext %s - kext not found.", + sStalled->kextIdentifier); + break; + } + + /* First pass checking classes aren't already loaded. If any already + * exist, we don't register any, and so we don't technically have + * to do any C++ teardown. + * + * Hack alert: me->className has been a C string until now. + * We only release the OSSymbol if we store the kext. + */ + IOLockLock(sAllClassesLock); + for (i = 0; i < sStalled->count; i++) { + OSMetaClass * me = sStalled->classes[i]; + OSMetaClass * orig = OSDynamicCast(OSMetaClass, + sAllClassesDict->getObject((const char *)me->className)); + + if (orig) { + + /* Log this error here so we can include the class name. + * xxx - we should look up the other kext that defines the class + */ + OSKextLog(myKext, kOSMetaClassLogSpec, + "OSMetaClass: Kext %s class %s is a duplicate;" + "kext %s already has a class by that name.", + sStalled->kextIdentifier, (const char *)me->className, + ((OSKext *)orig->reserved)->getIdentifierCString()); + result = kOSMetaClassDuplicateClass; + break; + } + } + IOLockUnlock(sAllClassesLock); + + /* Bail if we didn't go through the entire list of new classes + * (if we hit a duplicate). + */ + if (i != sStalled->count) { break; } - } - if (i != sStalled->count) - break; - - kmodSet = OSSet::withCapacity(sStalled->count); - if (!kmodSet) { - result = kOSMetaClassNoKModSet; - break; - } - - if (!sKModClassesDict->setObject(myname, kmodSet)) { - result = kOSMetaClassNoInsKModSet; - break; - } - - // Second pass symbolling strings and inserting classes in dictionary - for (i = 0; i < sStalled->count; i++) { - OSMetaClass *me = sStalled->classes[i]; - me->className = - OSSymbol::withCStringNoCopy((const char *) me->className); - - sAllClassesDict->setObject(me->className, me); - kmodSet->setObject(me); - sSortedByClassesDict->setObject((const OSSymbol *)me, myname); - } - sBootstrapState = kCompletedBootstrap; - break; - } - default: - result = kOSMetaClassInternal; - break; + // Second pass symbolling strings and inserting classes in dictionary + IOLockLock(sAllClassesLock); + for (i = 0; i < sStalled->count; i++) { + OSMetaClass * me = sStalled->classes[i]; + + /* Hack alert: me->className has been a C string until now. + * We only release the OSSymbol in ~OSMetaClass() + * if we set the reference to the kext. + */ + me->className = + OSSymbol::withCStringNoCopy((const char *)me->className); + + // xxx - I suppose if these fail we're going to panic soon.... + sAllClassesDict->setObject(me->className, me); + + /* Do not retain the kext object here. + */ + me->reserved = (ExpansionData *)myKext; + if (myKext) { + result = myKext->addClass(me, sStalled->count); + if (result != kOSReturnSuccess) { + /* OSKext::addClass() logs with kOSMetaClassNoInsKModSet. */ + break; + } + } + } + IOLockUnlock(sAllClassesLock); + sBootstrapState = kCompletedBootstrap; + break; + } + + default: + result = kOSMetaClassInternal; + break; } + +finish: + /* Don't call logError() for success or the conditions logged above + * or by called function. + */ + if (result != kOSReturnSuccess && + result != kOSMetaClassNoInsKModSet && + result != kOSMetaClassDuplicateClass && + result != kOSMetaClassNoKext) { - if (kmodSet) - kmodSet->release(); + OSMetaClassLogErrorForKext(result, myKext); + } - if (myname) - myname->release(); + OSSafeRelease(myKextName); + OSSafeRelease(myKext); if (sStalled) { - ACCUMSIZE(-(sStalled->capacity * sizeof(OSMetaClass *) - + sizeof(*sStalled))); - kfree(sStalled->classes, - sStalled->capacity * sizeof(OSMetaClass *)); - kfree(sStalled, sizeof(*sStalled)); - sStalled = 0; + ACCUMSIZE(-(sStalled->capacity * sizeof(OSMetaClass *) + + sizeof(*sStalled))); + kfree(sStalled->classes, sStalled->capacity * sizeof(OSMetaClass *)); + kfree(sStalled, sizeof(*sStalled)); + sStalled = 0; } + + IOLockUnlock(sStalledClassesLock); - logError(result); - mutex_unlock(loadLock); return result; } -void OSMetaClass::instanceConstructed() const +/********************************************************************* +*********************************************************************/ +void +OSMetaClass::instanceConstructed() const { - // if ((0 == OSIncrementAtomic((SInt32 *)&(((OSMetaClass *) this)->instanceCount))) && superClassLink) - if ((0 == OSIncrementAtomic((SInt32 *) &instanceCount)) && superClassLink) - superClassLink->instanceConstructed(); -} - -void OSMetaClass::instanceDestructed() const -{ - if ((1 == OSDecrementAtomic((SInt32 *) &instanceCount)) && superClassLink) - superClassLink->instanceDestructed(); - - if( ((int) instanceCount) < 0) - printf("%s: bad retain(%d)", getClassName(), instanceCount); -} - -bool OSMetaClass::modHasInstance(const char *kmodName) -{ - bool result = false; - - if (!loadLock) { - loadLock = mutex_alloc(0); - mutex_lock(loadLock); + // if ((0 == OSIncrementAtomic(&(((OSMetaClass *) this)->instanceCount))) && superClassLink) + if ((0 == OSIncrementAtomic(&instanceCount)) && superClassLink) { + superClassLink->instanceConstructed(); } - else - mutex_lock(loadLock); - - do { - OSSet *kmodClasses; - OSCollectionIterator *iter; - OSMetaClass *checkClass; - - kmodClasses = OSDynamicCast(OSSet, - sKModClassesDict->getObject(kmodName)); - if (!kmodClasses) - break; - - iter = OSCollectionIterator::withCollection(kmodClasses); - if (!iter) - break; - - while ( (checkClass = (OSMetaClass *) iter->getNextObject()) ) - if (checkClass->getInstanceCount()) { - result = true; - break; - } - - iter->release(); - } while (false); - - mutex_unlock(loadLock); - - return result; } -void OSMetaClass::reportModInstances(const char *kmodName) +/********************************************************************* +*********************************************************************/ +void +OSMetaClass::instanceDestructed() const { - OSSet *kmodClasses; - OSCollectionIterator *iter; - OSMetaClass *checkClass; - - kmodClasses = OSDynamicCast(OSSet, - sKModClassesDict->getObject(kmodName)); - if (!kmodClasses) - return; - - iter = OSCollectionIterator::withCollection(kmodClasses); - if (!iter) - return; - - while ( (checkClass = (OSMetaClass *) iter->getNextObject()) ) - if (checkClass->getInstanceCount()) { - printf("%s: %s has %d instance(s)\n", - kmodName, - checkClass->getClassName(), - checkClass->getInstanceCount()); - } - - iter->release(); -} - - -extern "C" { + if ((1 == OSDecrementAtomic(&instanceCount)) && superClassLink) { + superClassLink->instanceDestructed(); + } -IOReturn OSMetaClassSystemSleepOrWake(UInt32 messageType) -{ - mutex_lock(loadLock); + if (((int)instanceCount) < 0) { + OSKext * myKext = (OSKext *)reserved; - /* If the system is going to sleep, cancel the reaper thread timer - * and mark unloads disabled in case it just fired but hasn't - * taken the lock yet. If we are coming back from sleep, just - * set unloads enabled; IOService's normal operation will cause - * unloads to be considered soon enough. - */ - if (messageType == kIOMessageSystemWillSleep) { - if (unloadCallout) { - thread_call_cancel(unloadCallout); - } - unloadsEnabled = false; - } else if (messageType == kIOMessageSystemHasPoweredOn) { - unloadsEnabled = true; + OSKextLog(myKext, kOSMetaClassLogSpec, + // xxx - this phrasing is rather cryptic + "OSMetaClass: Class %s - bad retain (%d)", + getClassName(), instanceCount); } - mutex_unlock(loadLock); - - return kIOReturnSuccess; } -}; - -extern "C" kern_return_t kmod_unload_cache(void); - -static void _OSMetaClassConsiderUnloads(__unused thread_call_param_t p0, - __unused thread_call_param_t p1) +/********************************************************************* +*********************************************************************/ +bool +OSMetaClass::modHasInstance(const char * kextIdentifier) { - OSSet *kmodClasses; - OSSymbol *kmodName; - OSCollectionIterator *kmods; - OSCollectionIterator *classes; - OSMetaClass *checkClass; - kmod_info_t *ki = 0; - kern_return_t ret; - bool didUnload; - - mutex_lock(loadLock); - - if (!unloadsEnabled) { - mutex_unlock(loadLock); - return; + bool result = false; + OSKext * theKext = NULL; // must release + + theKext = OSKext::lookupKextWithIdentifier(kextIdentifier); + if (!theKext) { + goto finish; } - - do { - - kmods = OSCollectionIterator::withCollection(sKModClassesDict); - if (!kmods) - break; - - didUnload = false; - while ( (kmodName = (OSSymbol *) kmods->getNextObject()) ) { - - if (ki) { - kfree(ki, sizeof(kmod_info_t)); - ki = 0; - } - - ki = kmod_lookupbyname_locked((char *)kmodName->getCStringNoCopy()); - if (!ki) - continue; - - if (ki->reference_count) { - continue; - } - - kmodClasses = OSDynamicCast(OSSet, - sKModClassesDict->getObject(kmodName)); - classes = OSCollectionIterator::withCollection(kmodClasses); - if (!classes) - continue; - while ((checkClass = (OSMetaClass *) classes->getNextObject()) - && (0 == checkClass->getInstanceCount())) - {} - classes->release(); - - if (0 == checkClass) { - record_kext_unload(ki->id); - OSRuntimeUnloadCPP(ki, 0); // call destructors - ret = kmod_destroy(host_priv_self(), ki->id); - didUnload = true; - } - - } - - kmods->release(); - - } while (didUnload); + result = theKext->hasOSMetaClassInstances(); - mutex_unlock(loadLock); - - kmod_unload_cache(); +finish: + OSSafeRelease(theKext); + return result; } -void OSMetaClass::considerUnloads() +/********************************************************************* +*********************************************************************/ +void +OSMetaClass::reportModInstances(const char * kextIdentifier) { - AbsoluteTime when; - - mutex_lock(loadLock); - - if (!unloadCallout) - unloadCallout = thread_call_allocate(&_OSMetaClassConsiderUnloads, 0); - - thread_call_cancel(unloadCallout); - clock_interval_to_deadline(sConsiderUnloadDelay, 1000 * 1000 * 1000, &when); - thread_call_enter_delayed(unloadCallout, when); - - mutex_unlock(loadLock); + OSKext::reportOSMetaClassInstances(kextIdentifier, + kOSKextLogExplicitLevel); + return; } -const OSMetaClass *OSMetaClass::getMetaClassWithName(const OSSymbol *name) +/********************************************************************* +*********************************************************************/ +void +OSMetaClass::considerUnloads() { - OSMetaClass *retMeta = 0; - - if (!name) - return 0; - - if (sAllClassesDict) - retMeta = (OSMetaClass *) sAllClassesDict->getObject(name); + OSKext::considerUnloads(); +} - if (!retMeta && sStalled) - { - // Oh dear we have to scan the stalled list and walk the - // the stalled list manually. - const char *cName = name->getCStringNoCopy(); - unsigned int i; +/********************************************************************* +*********************************************************************/ +const OSMetaClass * +OSMetaClass::getMetaClassWithName(const OSSymbol * name) +{ + OSMetaClass * retMeta = 0; - // find class in stalled list - for (i = 0; i < sStalled->count; i++) { - retMeta = sStalled->classes[i]; - if (0 == strcmp(cName, (const char *) retMeta->className)) - break; - } + if (!name) { + return 0; + } - if (i < sStalled->count) - retMeta = 0; + IOLockLock(sAllClassesLock); + if (sAllClassesDict) { + retMeta = (OSMetaClass *) sAllClassesDict->getObject(name); } + IOLockUnlock(sAllClassesLock); return retMeta; } -OSObject *OSMetaClass::allocClassWithName(const OSSymbol *name) +/********************************************************************* +*********************************************************************/ +OSObject * +OSMetaClass::allocClassWithName(const OSSymbol * name) { - OSObject * result; - mutex_lock(loadLock); + OSObject * result = 0; const OSMetaClass * const meta = getMetaClassWithName(name); - if (meta) - result = meta->alloc(); - else - result = 0; - - mutex_unlock(loadLock); + if (meta) { + result = meta->alloc(); + } return result; } -OSObject *OSMetaClass::allocClassWithName(const OSString *name) +/********************************************************************* +*********************************************************************/ +OSObject * +OSMetaClass::allocClassWithName(const OSString * name) { - const OSSymbol *tmpKey = OSSymbol::withString(name); - OSObject *result = allocClassWithName(tmpKey); + const OSSymbol * tmpKey = OSSymbol::withString(name); + OSObject * result = allocClassWithName(tmpKey); tmpKey->release(); return result; } -OSObject *OSMetaClass::allocClassWithName(const char *name) +/********************************************************************* +*********************************************************************/ +OSObject * +OSMetaClass::allocClassWithName(const char * name) { - const OSSymbol *tmpKey = OSSymbol::withCStringNoCopy(name); - OSObject *result = allocClassWithName(tmpKey); + const OSSymbol * tmpKey = OSSymbol::withCStringNoCopy(name); + OSObject * result = allocClassWithName(tmpKey); tmpKey->release(); return result; } -OSMetaClassBase *OSMetaClass:: -checkMetaCastWithName(const OSSymbol *name, const OSMetaClassBase *in) +/********************************************************************* +*********************************************************************/ +OSMetaClassBase * +OSMetaClass::checkMetaCastWithName( + const OSSymbol * name, + const OSMetaClassBase * in) { - OSMetaClassBase * result; - mutex_lock(loadLock); + OSMetaClassBase * result = 0; + const OSMetaClass * const meta = getMetaClassWithName(name); - if (meta) - result = meta->checkMetaCast(in); - else - result = 0; + if (meta) { + result = meta->checkMetaCast(in); + } - mutex_unlock(loadLock); return result; } -OSMetaClassBase *OSMetaClass:: -checkMetaCastWithName(const OSString *name, const OSMetaClassBase *in) +/********************************************************************* +*********************************************************************/ +OSMetaClassBase * OSMetaClass:: +checkMetaCastWithName( + const OSString * name, + const OSMetaClassBase * in) { - const OSSymbol *tmpKey = OSSymbol::withString(name); - OSMetaClassBase *result = checkMetaCastWithName(tmpKey, in); + const OSSymbol * tmpKey = OSSymbol::withString(name); + OSMetaClassBase * result = checkMetaCastWithName(tmpKey, in); + tmpKey->release(); return result; } -OSMetaClassBase *OSMetaClass:: -checkMetaCastWithName(const char *name, const OSMetaClassBase *in) +/********************************************************************* +*********************************************************************/ +OSMetaClassBase * +OSMetaClass::checkMetaCastWithName( + const char * name, + const OSMetaClassBase * in) { - const OSSymbol *tmpKey = OSSymbol::withCStringNoCopy(name); - OSMetaClassBase *result = checkMetaCastWithName(tmpKey, in); + const OSSymbol * tmpKey = OSSymbol::withCStringNoCopy(name); + OSMetaClassBase * result = checkMetaCastWithName(tmpKey, in); + tmpKey->release(); return result; } -/* -OSMetaClass::checkMetaCast - checkMetaCast(const OSMetaClassBase *check) - -Check to see if the 'check' object has this object in it's metaclass chain. Returns check if it is indeed a kind of the current meta class, 0 otherwise. - -Generally this method is not invoked directly but is used to implement the OSMetaClassBase::metaCast member function. - -See also OSMetaClassBase::metaCast - - */ -OSMetaClassBase *OSMetaClass::checkMetaCast(const OSMetaClassBase *check) const +/********************************************************************* + * OSMetaClass::checkMetaCast() + * Check to see if the 'check' object has this object in its metaclass chain. + * Returns check if it is indeed a kind of the current meta class, 0 otherwise. + * + * Generally this method is not invoked directly but is used to implement + * the OSMetaClassBase::metaCast member function. + * + * See also OSMetaClassBase::metaCast +*********************************************************************/ +OSMetaClassBase * OSMetaClass::checkMetaCast( + const OSMetaClassBase * check) const { - const OSMetaClass * const toMeta = this; - const OSMetaClass *fromMeta; + const OSMetaClass * const toMeta = this; + const OSMetaClass * fromMeta; for (fromMeta = check->getMetaClass(); ; fromMeta = fromMeta->superClassLink) { - if (toMeta == fromMeta) - return (OSMetaClassBase *) check; // Discard const - - if (!fromMeta->superClassLink) - break; + if (toMeta == fromMeta) { + return const_cast(check); // Discard const + } + if (!fromMeta->superClassLink) { + break; + } } return 0; } -void OSMetaClass::reservedCalled(int ind) const +/********************************************************************* +*********************************************************************/ +void +OSMetaClass::reservedCalled(int ind) const { - const char *cname = className->getCStringNoCopy(); - panic("%s::_RESERVED%s%d called\n", cname, cname, ind); + const char * cname = className->getCStringNoCopy(); + panic("%s::_RESERVED%s%d called.", cname, cname, ind); } -const OSMetaClass *OSMetaClass::getSuperClass() const +/********************************************************************* +*********************************************************************/ +const +OSMetaClass * +OSMetaClass::getSuperClass() const { return superClassLink; } -const OSSymbol *OSMetaClass::getKmodName() const -{ - return (const OSSymbol *)sSortedByClassesDict->getObject((OSSymbol *)this); +/********************************************************************* +* xxx - I want to rename this :-/ +*********************************************************************/ +const OSSymbol * +OSMetaClass::getKmodName() const +{ + OSKext * myKext = (OSKext *)reserved; + if (myKext) { + return myKext->getIdentifier(); + } + return OSSymbol::withCStringNoCopy("unknown"); } -unsigned int OSMetaClass::getInstanceCount() const +/********************************************************************* +*********************************************************************/ +unsigned int +OSMetaClass::getInstanceCount() const { return instanceCount; } -void OSMetaClass::printInstanceCounts() +/********************************************************************* +*********************************************************************/ +/* static */ +void +OSMetaClass::printInstanceCounts() { - OSCollectionIterator *classes; - OSSymbol *className; - OSMetaClass *meta; + OSCollectionIterator * classes; + OSSymbol * className; + OSMetaClass * meta; + IOLockLock(sAllClassesLock); classes = OSCollectionIterator::withCollection(sAllClassesDict); - if (!classes) - return; + assert(classes); while( (className = (OSSymbol *)classes->getNextObject())) { - meta = (OSMetaClass *) sAllClassesDict->getObject(className); - assert(meta); - - printf("%24s count: %03d x 0x%03x = 0x%06x\n", - className->getCStringNoCopy(), - meta->getInstanceCount(), - meta->getClassSize(), - meta->getInstanceCount() * meta->getClassSize() ); + meta = (OSMetaClass *)sAllClassesDict->getObject(className); + assert(meta); + + printf("%24s count: %03d x 0x%03x = 0x%06x\n", + className->getCStringNoCopy(), + meta->getInstanceCount(), + meta->getClassSize(), + meta->getInstanceCount() * meta->getClassSize() ); } printf("\n"); classes->release(); + IOLockUnlock(sAllClassesLock); + return; } -OSDictionary * OSMetaClass::getClassDictionary() +/********************************************************************* +*********************************************************************/ +OSDictionary * +OSMetaClass::getClassDictionary() { - panic("OSMetaClass::getClassDictionary(): Obsoleted\n"); + panic("OSMetaClass::getClassDictionary() is obsoleted.\n"); return 0; } -bool OSMetaClass::serialize(__unused OSSerialize *s) const +/********************************************************************* +*********************************************************************/ +bool +OSMetaClass::serialize(__unused OSSerialize * s) const { panic("OSMetaClass::serialize(): Obsoleted\n"); return false; } -void OSMetaClass::serializeClassDictionary(OSDictionary *serializeDictionary) +/********************************************************************* +*********************************************************************/ +/* static */ +void +OSMetaClass::serializeClassDictionary(OSDictionary * serializeDictionary) { - OSDictionary *classDict; + OSDictionary * classDict = NULL; + + IOLockLock(sAllClassesLock); classDict = OSDictionary::withCapacity(sAllClassesDict->getCount()); - if (!classDict) - return; + if (!classDict) { + goto finish; + } - mutex_lock(loadLock); do { - OSCollectionIterator *classes; - const OSSymbol *className; + OSCollectionIterator * classes; + const OSSymbol * className; classes = OSCollectionIterator::withCollection(sAllClassesDict); - if (!classes) + if (!classes) { break; + } - while ((className = (const OSSymbol *) classes->getNextObject())) { - const OSMetaClass *meta; - OSNumber *count; + while ((className = (const OSSymbol *)classes->getNextObject())) { + const OSMetaClass * meta; + OSNumber * count; - meta = (OSMetaClass *) sAllClassesDict->getObject(className); + meta = (OSMetaClass *)sAllClassesDict->getObject(className); count = OSNumber::withNumber(meta->getInstanceCount(), 32); if (count) { classDict->setObject(className, count); @@ -901,7 +1024,10 @@ void OSMetaClass::serializeClassDictionary(OSDictionary *serializeDictionary) serializeDictionary->setObject("Classes", classDict); } while (0); - mutex_unlock(loadLock); +finish: + OSSafeRelease(classDict); + + IOLockUnlock(sAllClassesLock); - classDict->release(); + return; } diff --git a/libkern/c++/OSNumber.cpp b/libkern/c++/OSNumber.cpp index 87f96c780..5b3b31a91 100644 --- a/libkern/c++/OSNumber.cpp +++ b/libkern/c++/OSNumber.cpp @@ -29,10 +29,6 @@ #include -__BEGIN_DECLS -extern unsigned long strtoul(const char *, char **, int); -__END_DECLS - #include #include #include @@ -53,30 +49,30 @@ OSMetaClassDefineReservedUnused(OSNumber, 5); OSMetaClassDefineReservedUnused(OSNumber, 6); OSMetaClassDefineReservedUnused(OSNumber, 7); -bool OSNumber::init(unsigned long long inValue, unsigned int numberOfBits) +bool OSNumber::init(unsigned long long inValue, unsigned int newNumberOfBits) { if (!super::init()) return false; - size = numberOfBits; + size = newNumberOfBits; value = (inValue & sizeMask); return true; } -bool OSNumber::init(const char *value, unsigned int numberOfBits) +bool OSNumber::init(const char *newValue, unsigned int newNumberOfBits) { - return init((unsigned long long)strtoul(value, NULL, 0), numberOfBits); + return init((unsigned long long)strtoul(newValue, NULL, 0), newNumberOfBits); } void OSNumber::free() { super::free(); } OSNumber *OSNumber::withNumber(unsigned long long value, - unsigned int numberOfBits) + unsigned int newNumberOfBits) { OSNumber *me = new OSNumber; - if (me && !me->init(value, numberOfBits)) { + if (me && !me->init(value, newNumberOfBits)) { me->release(); return 0; } @@ -84,11 +80,11 @@ OSNumber *OSNumber::withNumber(unsigned long long value, return me; } -OSNumber *OSNumber::withNumber(const char *value, unsigned int numberOfBits) +OSNumber *OSNumber::withNumber(const char *value, unsigned int newNumberOfBits) { OSNumber *me = new OSNumber; - if (me && !me->init(value, numberOfBits)) { + if (me && !me->init(value, newNumberOfBits)) { me->release(); return 0; } diff --git a/libkern/c++/OSObject.cpp b/libkern/c++/OSObject.cpp index f92783f84..814fbbdf6 100644 --- a/libkern/c++/OSObject.cpp +++ b/libkern/c++/OSObject.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -165,7 +165,7 @@ void OSObject::taggedRetain(const void *tag) const #if !DEBUG break; // Break out of update loop which pegs the reference -#else DEBUG +#else /* DEBUG */ // @@@ gvdl: eventually need to make this panic optional // based on a boot argument i.e. debug= boot flag msg = "About to wrap the reference count, reference leak?"; @@ -175,7 +175,7 @@ void OSObject::taggedRetain(const void *tag) const } newCount = origCount + inc; - } while (!OSCompareAndSwap(origCount, newCount, (UInt32 *) countP)); + } while (!OSCompareAndSwap(origCount, newCount, const_cast(countP))); } void OSObject::taggedRelease(const void *tag) const @@ -212,7 +212,7 @@ void OSObject::taggedRelease(const void *tag, const int when) const #if !DEBUG return; // return out of function which pegs the reference -#else DEBUG +#else /* DEBUG */ // @@@ gvdl: eventually need to make this panic optional // based on a boot argument i.e. debug= boot flag panic("OSObject::refcount: %s", @@ -226,23 +226,28 @@ void OSObject::taggedRelease(const void *tag, const int when) const else newCount = actualCount; - } while (!OSCompareAndSwap(origCount, newCount, (UInt32 *) countP)); + } while (!OSCompareAndSwap(origCount, newCount, const_cast(countP))); // // This panic means that we have just attempted to release an object - // who's retain count has gone to less than the number of collections + // whose retain count has gone to less than the number of collections // it is a member off. Take a panic immediately. - // In Fact the panic MAY not be a registry corruption but it is + // In fact the panic MAY not be a registry corruption but it is // ALWAYS the wrong thing to do. I call it a registry corruption 'cause // the registry is the biggest single use of a network of collections. // - if ((UInt16) actualCount < (actualCount >> 16)) - panic("A driver releasing a(n) %s has corrupted the registry\n", - getClassName(this)); +// xxx - this error message is overly-specific; +// xxx - any code in the kernel could trip this, +// xxx - and it applies as noted to all collections, not just the registry + if ((UInt16) actualCount < (actualCount >> 16)) { + panic("A kext releasing a(n) %s has corrupted the registry.", + getClassName(this)); + } // Check for a 'free' condition and that if we are first through - if (newCount == 0xffff) - ((OSObject *) this)->free(); + if (newCount == 0xffff) { + (const_cast(this))->free(); + } } void OSObject::release() const diff --git a/libkern/c++/OSOrderedSet.cpp b/libkern/c++/OSOrderedSet.cpp index cabf763bb..47d5f4970 100644 --- a/libkern/c++/OSOrderedSet.cpp +++ b/libkern/c++/OSOrderedSet.cpp @@ -205,7 +205,7 @@ bool OSOrderedSet::setLastObject(const OSMetaClassBase *anObject) #define ORDER(obj1,obj2) \ - (ordering ? ((*ordering)( (OSObject *) obj1, (OSObject *) obj2, orderingRef)) : 0) + (ordering ? ((*ordering)( (const OSObject *) obj1, (const OSObject *) obj2, orderingRef)) : 0) bool OSOrderedSet::setObject(const OSMetaClassBase *anObject ) { @@ -264,13 +264,13 @@ OSObject *OSOrderedSet::getObject( unsigned int index ) const // if( pri) // *pri = array[index].pri; - return( (OSObject *) array[index].obj ); + return( const_cast((const OSObject *) array[index].obj) ); } OSObject *OSOrderedSet::getFirstObject() const { if( count) - return( (OSObject *) array[0].obj ); + return( const_cast((const OSObject *) array[0].obj) ); else return( 0 ); } @@ -278,7 +278,7 @@ OSObject *OSOrderedSet::getFirstObject() const OSObject *OSOrderedSet::getLastObject() const { if( count) - return( (OSObject *) array[count-1].obj ); + return( const_cast((const OSObject *) array[count-1].obj) ); else return( 0 ); } @@ -342,7 +342,7 @@ getNextObjectForIterator(void *inIterator, OSObject **ret) const unsigned int index = (*iteratorP)++; if (index < count) - *ret = (OSObject *) array[index].obj; + *ret = const_cast((const OSObject *) array[index].obj); else *ret = 0; diff --git a/libkern/c++/OSRuntime.cpp b/libkern/c++/OSRuntime.cpp index ecbaaaadf..88df070d0 100644 --- a/libkern/c++/OSRuntime.cpp +++ b/libkern/c++/OSRuntime.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000,2008-2009 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -26,259 +26,408 @@ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ */ /* - * Copyright (c) 1997 Apple Computer, Inc. + * Copyright (c) 1997 Apple Inc. * */ #include +#include #include #include -#include #include __BEGIN_DECLS #include - -struct mach_header; - #include -#include +#include #include +#if PRAGMA_MARK +#pragma mark Constants &c. +#endif /* PRAGMA_MARK */ +OSKextLogSpec kOSRuntimeLogSpec = + kOSKextLogErrorLevel | + kOSKextLogLoadFlag | + kOSKextLogKextBookkeepingFlag; + +#if PRAGMA_MARK +#pragma mark Logging Bootstrap +#endif /* PRAGMA_MARK */ +/********************************************************************* +* kern_os Logging Bootstrap +* +* We can't call in to OSKext until the kernel's C++ environment is up +* and running, so let's mask those references with a check variable. +* We print unconditionally if C++ isn't up, but if that's the case +* we've generally hit a serious error in kernel init! +*********************************************************************/ +static bool gKernelCPPInitialized = false; + +#define OSRuntimeLog(kext, flags, format, args...) \ + do { \ + if (gKernelCPPInitialized) { \ + OSKextLog((kext), (flags), (format), ## args); \ + } else { \ + printf((format), ## args); \ + } \ + } while (0) + + +#if PRAGMA_MARK +#pragma mark kern_os Allocator Package +#endif /* PRAGMA_MARK */ +/********************************************************************* +* kern_os Allocator Package +*********************************************************************/ + +/********************************************************************* +*********************************************************************/ #if OSALLOCDEBUG extern int debug_iomalloc_size; #endif struct _mhead { - size_t mlen; - char dat[0]; + size_t mlen; + char dat[0]; }; -void *kern_os_malloc( - size_t size) +/********************************************************************* +*********************************************************************/ +void * +kern_os_malloc(size_t size) { - struct _mhead *mem; - size_t memsize = sizeof (*mem) + size ; + struct _mhead * mem; + size_t memsize = sizeof (*mem) + size ; - if (size == 0) - return (0); + if (size == 0) { + return (0); + } - mem = (struct _mhead *)kalloc(memsize); - if (!mem) - return (0); + mem = (struct _mhead *)kalloc(memsize); + if (!mem) { + return (0); + } #if OSALLOCDEBUG - debug_iomalloc_size += memsize; + debug_iomalloc_size += memsize; #endif - mem->mlen = memsize; - bzero( mem->dat, size); + mem->mlen = memsize; + bzero(mem->dat, size); - return (mem->dat); + return mem->dat; } -void kern_os_free( - void *addr) +/********************************************************************* +*********************************************************************/ +void +kern_os_free(void * addr) { - struct _mhead *hdr; + struct _mhead * hdr; - if (!addr) - return; + if (!addr) { + return; + } - hdr = (struct _mhead *) addr; hdr--; + hdr = (struct _mhead *)addr; + hdr--; #if OSALLOCDEBUG - debug_iomalloc_size -= hdr->mlen; + debug_iomalloc_size -= hdr->mlen; #endif #if 0 - memset((vm_offset_t)hdr, 0xbb, hdr->mlen); + memset((vm_offset_t)hdr, 0xbb, hdr->mlen); #else - kfree(hdr, hdr->mlen); + kfree(hdr, hdr->mlen); #endif } -void *kern_os_realloc( - void *addr, - size_t nsize) +/********************************************************************* +*********************************************************************/ +void * +kern_os_realloc( + void * addr, + size_t nsize) { - struct _mhead *ohdr; - struct _mhead *nmem; - size_t nmemsize, osize; - - if (!addr) - return (kern_os_malloc(nsize)); - - ohdr = (struct _mhead *) addr; ohdr--; - osize = ohdr->mlen - sizeof (*ohdr); - if (nsize == osize) - return (addr); - - if (nsize == 0) { - kern_os_free(addr); - return (0); - } - - nmemsize = sizeof (*nmem) + nsize ; - nmem = (struct _mhead *) kalloc(nmemsize); - if (!nmem){ - kern_os_free(addr); - return (0); - } + struct _mhead * ohdr; + struct _mhead * nmem; + size_t nmemsize, osize; + + if (!addr) { + return (kern_os_malloc(nsize)); + } + + ohdr = (struct _mhead *)addr; + ohdr--; + osize = ohdr->mlen - sizeof(*ohdr); + if (nsize == osize) { + return (addr); + } + + if (nsize == 0) { + kern_os_free(addr); + return (0); + } + + nmemsize = sizeof (*nmem) + nsize ; + nmem = (struct _mhead *) kalloc(nmemsize); + if (!nmem){ + kern_os_free(addr); + return (0); + } #if OSALLOCDEBUG - debug_iomalloc_size += (nmemsize - ohdr->mlen); + debug_iomalloc_size += (nmemsize - ohdr->mlen); #endif - nmem->mlen = nmemsize; - if (nsize > osize) - (void) memset(&nmem->dat[osize], 0, nsize - osize); - (void) memcpy(nmem->dat, ohdr->dat, - (nsize > osize) ? osize : nsize); - kfree(ohdr, ohdr->mlen); + nmem->mlen = nmemsize; + if (nsize > osize) { + (void) memset(&nmem->dat[osize], 0, nsize - osize); + } + (void)memcpy(nmem->dat, ohdr->dat, (nsize > osize) ? osize : nsize); + kfree(ohdr, ohdr->mlen); - return (nmem->dat); + return (nmem->dat); } -size_t kern_os_malloc_size( - void *addr) +/********************************************************************* +*********************************************************************/ +size_t +kern_os_malloc_size(void * addr) { - struct _mhead *hdr; + struct _mhead * hdr; - if (!addr) - return( 0); + if (!addr) { + return(0); + } - hdr = (struct _mhead *) addr; hdr--; - return( hdr->mlen - sizeof (struct _mhead)); + hdr = (struct _mhead *) addr; hdr--; + return hdr->mlen - sizeof (struct _mhead); } +#if PRAGMA_MARK +#pragma mark C++ Runtime Load/Unload +#endif /* PRAGMA_MARK */ +/********************************************************************* +* kern_os C++ Runtime Load/Unload +*********************************************************************/ + +/********************************************************************* +*********************************************************************/ #if __GNUC__ >= 3 -void __cxa_pure_virtual( void ) { panic(__FUNCTION__); } +void __cxa_pure_virtual( void ) { panic("%s", __FUNCTION__); } #else -void __pure_virtual( void ) { panic(__FUNCTION__); } +void __pure_virtual( void ) { panic("%s", __FUNCTION__); } #endif typedef void (*structor_t)(void); -// Given a pointer to a 32 bit mach object segment, iterate the segment to -// obtain a 32 bit destructor section for C++ objects, and call each of the -// destructors there. +/********************************************************************* +* OSRuntimeUnloadCPPForSegment() +* +* Given a pointer to a mach object segment, iterate the segment to +* obtain a destructor section for C++ objects, and call each of the +* destructors there. +*********************************************************************/ + void -OSRuntimeUnloadCPPForSegment(struct segment_command * segment) { +OSRuntimeUnloadCPPForSegmentInKmod( + kernel_segment_command_t * segment, + kmod_info_t * kmodInfo) +{ + + kernel_section_t * section = NULL; // do not free + OSKext * theKext = NULL; // must release - struct section * section; + if (gKernelCPPInitialized && kmodInfo) { + theKext = OSKext::lookupKextWithIdentifier(kmodInfo->name); + } for (section = firstsect(segment); section != 0; section = nextsect(segment, section)) { - if (strcmp(section->sectname, "__destructor") == 0) { + if (strncmp(section->sectname, SECT_DESTRUCTOR, + sizeof(SECT_DESTRUCTOR)) == 0) { + structor_t * destructors = (structor_t *)section->addr; if (destructors) { int num_destructors = section->size / sizeof(structor_t); + int hit_null_destructor = 0; for (int i = 0; i < num_destructors; i++) { - (*destructors[i])(); + if (destructors[i]) { + (*destructors[i])(); + } else if (!hit_null_destructor) { + hit_null_destructor = 1; + OSRuntimeLog(theKext, kOSRuntimeLogSpec, + "Null destructor in kext %s segment %s!", + kmodInfo ? kmodInfo->name : "(unknown)", + section->segname); + } } } /* if (destructors) */ - } /* if (strcmp...) */ + } /* if (strncmp...) */ } /* for (section...) */ + OSSafeRelease(theKext); return; } -// This function will only operate on 32 bit kmods -void OSRuntimeUnloadCPP(kmod_info_t *ki, void *) +void +OSRuntimeUnloadCPPForSegment(kernel_segment_command_t * segment) { + OSRuntimeUnloadCPPForSegmentInKmod(segment, NULL); +} + +/********************************************************************* +*********************************************************************/ +void +OSRuntimeUnloadCPP( + kmod_info_t * kmodInfo, + void * data __unused) { - if (ki && ki->address) { + if (kmodInfo && kmodInfo->address) { - struct segment_command * segment; - struct mach_header *header; + kernel_segment_command_t * segment; + kernel_mach_header_t * header; - OSSymbol::checkForPageUnload((void *) ki->address, - (void *) (ki->address + ki->size)); + OSSymbol::checkForPageUnload((void *)kmodInfo->address, + (void *)(kmodInfo->address + kmodInfo->size)); - header = (struct mach_header *)ki->address; + header = (kernel_mach_header_t *)kmodInfo->address; segment = firstsegfromheader(header); for (segment = firstsegfromheader(header); segment != 0; - segment = nextseg(segment)) { + segment = nextsegfromheader(header, segment)) { - OSRuntimeUnloadCPPForSegment(segment); + OSRuntimeUnloadCPPForSegmentInKmod(segment, kmodInfo); } } + + return; } -kern_return_t OSRuntimeFinalizeCPP(kmod_info_t *ki, void *) +/********************************************************************* +*********************************************************************/ +kern_return_t +OSRuntimeFinalizeCPP( + kmod_info_t * kmodInfo, + void * data __unused) { - void *metaHandle; + kern_return_t result = KMOD_RETURN_FAILURE; + void * metaHandle = NULL; // do not free + OSKext * theKext = NULL; // must release - if (OSMetaClass::modHasInstance(ki->name)) { - // @@@ gvdl should have a verbose flag - printf("Can't unload %s due to -\n", ki->name); - OSMetaClass::reportModInstances(ki->name); - return kOSMetaClassHasInstances; + if (gKernelCPPInitialized) { + theKext = OSKext::lookupKextWithIdentifier(kmodInfo->name); } - // Tell the meta class system that we are starting to unload - metaHandle = OSMetaClass::preModLoad(ki->name); - OSRuntimeUnloadCPP(ki, 0); // Do the actual unload - (void) OSMetaClass::postModLoad(metaHandle); + if (theKext && !theKext->isCPPInitialized()) { + result = KMOD_RETURN_SUCCESS; + goto finish; + } + + /* OSKext checks for this condition now, but somebody might call + * this function directly (the symbol is exported....). + */ + if (OSMetaClass::modHasInstance(kmodInfo->name)) { + // xxx - Don't log under errors? this is more of an info thing + OSRuntimeLog(theKext, kOSRuntimeLogSpec, + "Can't tear down kext %s C++; classes have instances:", + kmodInfo->name); + OSKext::reportOSMetaClassInstances(kmodInfo->name, kOSRuntimeLogSpec); + result = kOSMetaClassHasInstances; + goto finish; + } + + /* Tell the meta class system that we are starting to unload. + * metaHandle isn't actually needed on the finalize path, + * so we don't check it here, even though OSMetaClass::postModLoad() will + * return a failure (it only does actual work on the init path anyhow). + */ + metaHandle = OSMetaClass::preModLoad(kmodInfo->name); + OSRuntimeUnloadCPP(kmodInfo, 0); + (void)OSMetaClass::postModLoad(metaHandle); - return KMOD_RETURN_SUCCESS; + if (theKext) { + theKext->setCPPInitialized(false); + } + result = KMOD_RETURN_SUCCESS; +finish: + OSSafeRelease(theKext); + return result; } // Functions used by the extenTools/kmod library project -// This function will only operate on 32 bit kmods -kern_return_t OSRuntimeInitializeCPP(kmod_info_t *ki, void *) + +/********************************************************************* +*********************************************************************/ +kern_return_t +OSRuntimeInitializeCPP( + kmod_info_t * kmodInfo, + void * data __unused) { - struct mach_header *header; - void *metaHandle; - bool load_success; - struct segment_command * segment; - struct segment_command * failure_segment; - - if (!ki || !ki->address) - return KMOD_RETURN_FAILURE; - else - header = (struct mach_header *) ki->address; - - // Tell the meta class system that we are starting the load - metaHandle = OSMetaClass::preModLoad(ki->name); + kern_return_t result = KMOD_RETURN_FAILURE; + OSKext * theKext = NULL; // must release + kernel_mach_header_t * header = NULL; + void * metaHandle = NULL; // do not free + bool load_success = true; + kernel_segment_command_t * segment = NULL; // do not free + kernel_segment_command_t * failure_segment = NULL; // do not free + + if (!kmodInfo || !kmodInfo->address || !kmodInfo->name) { + result = kOSKextReturnInvalidArgument; + goto finish; + } + + if (gKernelCPPInitialized) { + theKext = OSKext::lookupKextWithIdentifier(kmodInfo->name); + } + + if (theKext && theKext->isCPPInitialized()) { + result = KMOD_RETURN_SUCCESS; + goto finish; + } + + header = (kernel_mach_header_t *)kmodInfo->address; + + /* Tell the meta class system that we are starting the load + */ + metaHandle = OSMetaClass::preModLoad(kmodInfo->name); assert(metaHandle); - if (!metaHandle) - return KMOD_RETURN_FAILURE; + if (!metaHandle) { + goto finish; + } - load_success = true; - failure_segment = 0; + /* NO GOTO PAST HERE. */ - /* Scan the header for all sections named "__constructor", in any + /* Scan the header for all constructor sections, in any * segment, and invoke the constructors within those sections. */ for (segment = firstsegfromheader(header); - segment != 0 && load_success; - segment = nextseg(segment)) { + segment != NULL && load_success; + segment = nextsegfromheader(header, segment)) { - struct section * section; + kernel_section_t * section; /* Record the current segment in the event of a failure. */ failure_segment = segment; for (section = firstsect(segment); - section != 0 && load_success; + section != NULL; section = nextsect(segment, section)) { - if (strcmp(section->sectname, "__constructor") == 0) { + if (strncmp(section->sectname, SECT_CONSTRUCTOR, + sizeof(SECT_CONSTRUCTOR)) == 0) { + structor_t * constructors = (structor_t *)section->addr; if (constructors) { - // FIXME: can we break here under the assumption that - // section names are unique within a segment? - int num_constructors = section->size / sizeof(structor_t); int hit_null_constructor = 0; @@ -291,66 +440,146 @@ kern_return_t OSRuntimeInitializeCPP(kmod_info_t *ki, void *) (*constructors[i])(); } else if (!hit_null_constructor) { hit_null_constructor = 1; - printf("Error! Null constructor in segment %s.\n", - section->segname); + OSRuntimeLog(theKext, kOSRuntimeLogSpec, + "Null constructor in kext %s segment %s!", + kmodInfo->name, section->segname); } } load_success = OSMetaClass::checkModLoad(metaHandle); + break; } /* if (constructors) */ - } /* if (strcmp...) */ + } /* if (strncmp...) */ } /* for (section...) */ } /* for (segment...) */ - - // We failed so call all of the destructors + /* We failed so call all of the destructors. We must do this before + * calling OSMetaClass::postModLoad() as the OSMetaClass destructors + * will alter state (in the metaHandle) used by that function. + */ if (!load_success) { - /* Scan the header for all sections named "__constructor", in any + /* Scan the header for all destructor sections, in any * segment, and invoke the constructors within those sections. */ for (segment = firstsegfromheader(header); segment != failure_segment && segment != 0; - segment = nextseg(segment)) { + segment = nextsegfromheader(header, segment)) { OSRuntimeUnloadCPPForSegment(segment); } /* for (segment...) */ } - return OSMetaClass::postModLoad(metaHandle); + /* Now, regardless of success so far, do the post-init registration + * and cleanup. If we had to call the unloadCPP function, static + * destructors have removed classes from the stalled list so no + * metaclasses will actually be registered. + */ + result = OSMetaClass::postModLoad(metaHandle); + + /* If we've otherwise been fine up to now, but OSMetaClass::postModLoad() + * fails (typically due to a duplicate class), tear down all the C++ + * stuff from the kext. This isn't necessary for libkern/OSMetaClass stuff, + * but may be necessary for other C++ code. We ignore the return value + * because it's only a fail when there are existing instances of libkern + * classes, and there had better not be any created on the C++ init path. + */ + if (load_success && result != KMOD_RETURN_SUCCESS) { + (void)OSRuntimeFinalizeCPP(kmodInfo, NULL); + } + + if (theKext && load_success && result == KMOD_RETURN_SUCCESS) { + theKext->setCPPInitialized(true); + } +finish: + OSSafeRelease(theKext); + return result; } -static KMOD_LIB_DECL(__kernel__, 0); +#if PRAGMA_MARK +#pragma mark Libkern Init +#endif /* PRAGMA_MARK */ +/********************************************************************* +* Libkern Init +*********************************************************************/ +/********************************************************************* +*********************************************************************/ extern lck_spin_t gOSObjectTrackLock; extern lck_grp_t * IOLockGroup; +extern kmod_info_t g_kernel_kmod_info; void OSlibkernInit(void) { lck_spin_init(&gOSObjectTrackLock, IOLockGroup, LCK_ATTR_NULL); + + // This must be called before calling OSRuntimeInitializeCPP. + OSMetaClassBase::initialize(); + + if (kOSReturnSuccess != OSRuntimeInitializeCPP(&g_kernel_kmod_info, 0)) { + panic("OSRuntime: C++ runtime failed to initialize."); + } + + gKernelCPPInitialized = true; - vm_address_t *headerArray = (vm_address_t *) getmachheaders(); - - KMOD_INFO_NAME.address = headerArray[0]; assert(!headerArray[1]); - if (kOSReturnSuccess != OSRuntimeInitializeCPP(&KMOD_INFO_NAME, 0)) - panic("OSRuntime: C++ runtime failed to initialize"); - - OSBoolean::initialize(); + return; } __END_DECLS -void * operator new( size_t size) +#if PRAGMA_MARK +#pragma mark C++ Allocators & Deallocators +#endif /* PRAGMA_MARK */ +/********************************************************************* +* C++ Allocators & Deallocators +*********************************************************************/ +void * +operator new(size_t size) { void * result; - result = (void *) kern_os_malloc( size); - return( result); + result = (void *) kern_os_malloc(size); + return result; } -void operator delete( void * addr) +void +operator delete(void * addr) +{ + kern_os_free(addr); + return; +} + +void * +operator new[](unsigned long sz) +{ + if (sz == 0) sz = 1; + return kern_os_malloc(sz); +} + +void +operator delete[](void * ptr) +{ + if (ptr) { + kern_os_free(ptr); + } + return; +} + +/* PR-6481964 - The compiler is going to check for size overflows in calls to + * new[], and if there is an overflow, it will call __throw_length_error. + * This is an unrecoverable error by the C++ standard, so we must panic here. + * + * We have to put the function inside the std namespace because of how the + * compiler expects the name to be mangled. + */ +namespace std { + +void +__throw_length_error(const char *msg __unused) { - kern_os_free( addr); + panic("Size of array created by new[] has overflowed"); } +}; + diff --git a/libkern/c++/OSRuntimeSupport.c b/libkern/c++/OSRuntimeSupport.c index 77bcc2bc4..6ed505f73 100644 --- a/libkern/c++/OSRuntimeSupport.c +++ b/libkern/c++/OSRuntimeSupport.c @@ -3,6 +3,7 @@ // systems. // Note that I have had to manually mangle the symbols names. #if __GNUC__ >= 3 - void _ZN11OSMetaClassdlEPvm(void *mem, unsigned long size) { } + void _ZN11OSMetaClassdlEPvm(void *mem, unsigned long size); + void _ZN11OSMetaClassdlEPvm(__attribute__((__unused__)) void *mem, __attribute__((__unused__)) unsigned long size) { } #endif diff --git a/libkern/c++/OSSerialize.cpp b/libkern/c++/OSSerialize.cpp index 6db4c90ee..90a0b6054 100644 --- a/libkern/c++/OSSerialize.cpp +++ b/libkern/c++/OSSerialize.cpp @@ -79,6 +79,7 @@ bool OSSerialize::previouslySerialized(const OSMetaClassBase *o) // look it up tagString = (OSString *)tags->getObject((const OSSymbol *) o); +// xx-review: no error checking here for addString calls! // does it exist? if (tagString) { addString("getCount() < inCapacity) setObject(inObjects[i]); else @@ -208,6 +209,8 @@ bool OSSet::merge(const OSArray *array) const OSMetaClassBase *anObject; bool retVal = false; +// xx-review: if any setObject fails due to memory allocation failure, +// xx-review: this function should return false for (int i = 0; (anObject = array->getObject(i)); i++) if (setObject(anObject)) retVal = true; diff --git a/libkern/c++/OSString.cpp b/libkern/c++/OSString.cpp index 4b2cd6da4..f5095fa01 100644 --- a/libkern/c++/OSString.cpp +++ b/libkern/c++/OSString.cpp @@ -94,7 +94,7 @@ bool OSString::initWithCStringNoCopy(const char *cString) length = strlen(cString) + 1; flags |= kOSStringNoCopy; - string = (char *) cString; + string = const_cast(cString); return true; } @@ -208,7 +208,7 @@ bool OSString::isEqualTo(const OSString *aString) const bool OSString::isEqualTo(const char *aCString) const { - return strcmp(string, aCString) == 0; + return strncmp(string, aCString, length) == 0; } bool OSString::isEqualTo(const OSMetaClassBase *obj) const diff --git a/libkern/c++/OSSymbol.cpp b/libkern/c++/OSSymbol.cpp index a5852153b..1d6e6c2f0 100644 --- a/libkern/c++/OSSymbol.cpp +++ b/libkern/c++/OSSymbol.cpp @@ -79,7 +79,7 @@ class OSSymbolPool Bucket *buckets; unsigned int nBuckets; unsigned int count; - mutex_t *poolGate; + lck_mtx_t *poolGate; static inline void hashSymbol(const char *s, unsigned int *hashP, @@ -115,8 +115,8 @@ class OSSymbolPool bool init(); - inline void closeGate() { mutex_lock(poolGate); }; - inline void openGate() { mutex_unlock(poolGate); }; + inline void closeGate() { lck_mtx_lock(poolGate); }; + inline void openGate() { lck_mtx_unlock(poolGate); }; OSSymbol *findSymbol(const char *cString) const; OSSymbol *insertSymbol(OSSymbol *sym); @@ -142,6 +142,8 @@ void OSSymbolPool::operator delete(void *mem, size_t size) ACCUMSIZE(-size); } +extern lck_grp_t *IOLockGroup; + bool OSSymbolPool::init() { count = 0; @@ -153,7 +155,7 @@ bool OSSymbolPool::init() bzero(buckets, nBuckets * sizeof(Bucket)); - poolGate = mutex_alloc(0); + poolGate = lck_mtx_alloc_init(IOLockGroup, LCK_ATTR_NULL); return poolGate != 0; } @@ -175,7 +177,7 @@ OSSymbolPool::~OSSymbolPool() } if (poolGate) - kfree(poolGate, 36 * 4); + lck_mtx_free(poolGate, IOLockGroup); } unsigned long OSSymbolPool::log2(unsigned int x) @@ -273,7 +275,7 @@ OSSymbol *OSSymbolPool::findSymbol(const char *cString) const probeSymbol = (OSSymbol *) thisBucket->symbolP; if (inLen == probeSymbol->length - && (strcmp(probeSymbol->string, cString) == 0)) + && (strncmp(probeSymbol->string, cString, probeSymbol->length) == 0)) return probeSymbol; return 0; } @@ -281,7 +283,7 @@ OSSymbol *OSSymbolPool::findSymbol(const char *cString) const for (list = thisBucket->symbolP; j--; list++) { probeSymbol = *list; if (inLen == probeSymbol->length - && (strcmp(probeSymbol->string, cString) == 0)) + && (strncmp(probeSymbol->string, cString, probeSymbol->length) == 0)) return probeSymbol; } @@ -310,7 +312,7 @@ OSSymbol *OSSymbolPool::insertSymbol(OSSymbol *sym) probeSymbol = (OSSymbol *) thisBucket->symbolP; if (inLen == probeSymbol->length - && strcmp(probeSymbol->string, cString) == 0) + && strncmp(probeSymbol->string, cString, probeSymbol->length) == 0) return probeSymbol; list = (OSSymbol **) kalloc(2 * sizeof(OSSymbol *)); @@ -329,7 +331,7 @@ OSSymbol *OSSymbolPool::insertSymbol(OSSymbol *sym) for (list = thisBucket->symbolP; j--; list++) { probeSymbol = *list; if (inLen == probeSymbol->length - && strcmp(probeSymbol->string, cString) == 0) + && strncmp(probeSymbol->string, cString, probeSymbol->length) == 0) return probeSymbol; } diff --git a/libkern/c++/OSUnserialize.cpp b/libkern/c++/OSUnserialize.cpp index 4568e6f8a..6b32a76ee 100644 --- a/libkern/c++/OSUnserialize.cpp +++ b/libkern/c++/OSUnserialize.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2006 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -48,32 +48,101 @@ // // // DO NOT EDIT OSUnserialize.tab.cpp! -// -// this means you! -// -// -// -// +/* A Bison parser, made by GNU Bison 2.3. */ + +/* Skeleton implementation for Bison's Yacc-like parsers in C + + Copyright (C) 1984, 1989, 1990, 2000, 2001, 2002, 2003, 2004, 2005, 2006 + Free Software Foundation, Inc. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2, or (at your option) + any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, + Boston, MA 02110-1301, USA. */ + +/* As a special exception, you may create a larger work that contains + part or all of the Bison parser skeleton and distribute that work + under terms of your choice, so long as that work isn't itself a + parser generator using the skeleton or a modified version thereof + as a parser skeleton. Alternatively, if you modify or redistribute + the parser skeleton itself, you may (at your option) remove this + special exception, which will cause the skeleton and the resulting + Bison output files to be licensed under the GNU General Public + License without this special exception. + + This special exception was added by the Free Software Foundation in + version 2.2 of Bison. */ -/* A Bison parser, made from OSUnserialize.y - by GNU Bison version 1.28 */ +/* C LALR(1) parser skeleton written by Richard Stallman, by + simplifying the original so-called "semantic" parser. */ -#define YYBISON 1 /* Identify Bison output. */ +/* All symbols defined below should begin with yy or YY, to avoid + infringing on user name space. This should be done even for local + variables, as they might otherwise be expanded by user macros. + There are some unavoidable exceptions within include files to + define necessary library symbols; they are noted "INFRINGES ON + USER NAME SPACE" below. */ +/* Identify Bison output. */ +#define YYBISON 1 + +/* Bison version. */ +#define YYBISON_VERSION "2.3" + +/* Skeleton name. */ +#define YYSKELETON_NAME "yacc.c" + +/* Pure parsers. */ +#define YYPURE 0 + +/* Using locations. */ +#define YYLSP_NEEDED 0 + +/* Substitute the variable and function names. */ #define yyparse OSUnserializeparse -#define yylex OSUnserializelex +#define yylex OSUnserializelex #define yyerror OSUnserializeerror -#define yylval OSUnserializelval -#define yychar OSUnserializechar +#define yylval OSUnserializelval +#define yychar OSUnserializechar #define yydebug OSUnserializedebug #define yynerrs OSUnserializenerrs -#define NUMBER 257 -#define STRING 258 -#define DATA 259 -#define BOOLEAN 260 -#define SYNTAX_ERROR 261 -#line 54 "OSUnserialize.y" + +/* Tokens. */ +#ifndef YYTOKENTYPE +# define YYTOKENTYPE + /* Put the tokens into the symbol table, so that GDB and other debuggers + know about them. */ + enum yytokentype { + NUMBER = 258, + STRING = 259, + DATA = 260, + BOOLEAN = 261, + SYNTAX_ERROR = 262 + }; +#endif +/* Tokens. */ +#define NUMBER 258 +#define STRING 259 +#define DATA 260 +#define BOOLEAN 261 +#define SYNTAX_ERROR 262 + + + + +/* Copy the first part of user declarations. */ +#line 60 "OSUnserialize.y" #include #include @@ -91,8 +160,7 @@ typedef struct object { } object_t; -static int yyparse(); -static int yyerror(char *s); +static int yyerror(const char *s); static int yylex(); static object_t * newObject(); @@ -110,7 +178,7 @@ static void rememberObject(int, object_t *); static OSObject *retrieveObject(int); // temp variable to use during parsing -static object_t *o; +static object_t *oo; // resultant object of parsed text static OSObject *parsedObject; @@ -127,451 +195,1071 @@ extern void kern_os_free(void * addr); #define realloc(a, s) kern_os_realloc(a, s) #define free(a) kern_os_free(a) -#ifndef YYSTYPE -#define YYSTYPE int + + +/* Enabling traces. */ +#ifndef YYDEBUG +# define YYDEBUG 0 #endif -#ifndef __cplusplus -#ifndef __STDC__ -#define const +/* Enabling verbose error messages. */ +#ifdef YYERROR_VERBOSE +# undef YYERROR_VERBOSE +# define YYERROR_VERBOSE 1 +#else +# define YYERROR_VERBOSE 0 #endif + +/* Enabling the token table. */ +#ifndef YYTOKEN_TABLE +# define YYTOKEN_TABLE 0 #endif +#if ! defined YYSTYPE && ! defined YYSTYPE_IS_DECLARED +typedef int YYSTYPE; +# define yystype YYSTYPE /* obsolescent; will be withdrawn */ +# define YYSTYPE_IS_DECLARED 1 +# define YYSTYPE_IS_TRIVIAL 1 +#endif -#define YYFINAL 43 -#define YYFLAG -32768 -#define YYNTBASE 19 - -#define YYTRANSLATE(x) ((unsigned)(x) <= 261 ? yytranslate[x] : 31) - -static const char yytranslate[] = { 0, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 13, - 14, 2, 2, 17, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 18, 12, 2, - 11, 2, 2, 8, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 15, 2, 16, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 9, 2, 10, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 1, 3, 4, 5, 6, - 7 -}; -#if YYDEBUG != 0 -static const short yyprhs[] = { 0, - 0, 1, 3, 5, 7, 9, 11, 13, 15, 17, - 19, 22, 26, 29, 33, 35, 38, 43, 46, 50, - 53, 57, 59, 63, 67, 69, 71 -}; +/* Copy the second part of user declarations. */ -static const short yyrhs[] = { -1, - 20, 0, 7, 0, 21, 0, 24, 0, 25, 0, - 29, 0, 28, 0, 27, 0, 30, 0, 8, 3, - 0, 20, 8, 3, 0, 9, 10, 0, 9, 22, - 10, 0, 23, 0, 22, 23, 0, 20, 11, 20, - 12, 0, 13, 14, 0, 13, 26, 14, 0, 15, - 16, 0, 15, 26, 16, 0, 20, 0, 26, 17, - 20, 0, 3, 18, 3, 0, 5, 0, 4, 0, - 6, 0 -}; +/* Line 216 of yacc.c. */ +#line 182 "OSUnserialize.tab.c" + +#ifdef short +# undef short #endif -#if YYDEBUG != 0 -static const short yyrline[] = { 0, - 116, 117, 118, 121, 122, 123, 124, 125, 126, 127, - 128, 137, 145, 146, 149, 150, 153, 163, 164, 167, - 168, 171, 176, 187, 195, 200, 205 -}; +#ifdef YYTYPE_UINT8 +typedef YYTYPE_UINT8 yytype_uint8; +#else +typedef unsigned char yytype_uint8; #endif +#ifdef YYTYPE_INT8 +typedef YYTYPE_INT8 yytype_int8; +#elif (defined __STDC__ || defined __C99__FUNC__ \ + || defined __cplusplus || defined _MSC_VER) +typedef signed char yytype_int8; +#else +typedef short int yytype_int8; +#endif -#if YYDEBUG != 0 || defined (YYERROR_VERBOSE) +#ifdef YYTYPE_UINT16 +typedef YYTYPE_UINT16 yytype_uint16; +#else +typedef unsigned short int yytype_uint16; +#endif -static const char * const yytname[] = { "$","error","$undefined.","NUMBER", -"STRING","DATA","BOOLEAN","SYNTAX_ERROR","'@'","'{'","'}'","'='","';'","'('", -"')'","'['","']'","','","':'","input","object","dict","pairs","pair","array", -"set","elements","offset","data","string","boolean", NULL -}; +#ifdef YYTYPE_INT16 +typedef YYTYPE_INT16 yytype_int16; +#else +typedef short int yytype_int16; #endif -static const short yyr1[] = { 0, - 19, 19, 19, 20, 20, 20, 20, 20, 20, 20, - 20, 20, 21, 21, 22, 22, 23, 24, 24, 25, - 25, 26, 26, 27, 28, 29, 30 -}; +#ifndef YYSIZE_T +# ifdef __SIZE_TYPE__ +# define YYSIZE_T __SIZE_TYPE__ +# elif defined size_t +# define YYSIZE_T size_t +# elif ! defined YYSIZE_T && (defined __STDC__ || defined __C99__FUNC__ \ + || defined __cplusplus || defined _MSC_VER) +# include /* INFRINGES ON USER NAME SPACE */ +# define YYSIZE_T size_t +# else +# define YYSIZE_T unsigned int +# endif +#endif + +#define YYSIZE_MAXIMUM ((YYSIZE_T) -1) + +#ifndef YY_ +# if defined YYENABLE_NLS && YYENABLE_NLS +# if ENABLE_NLS +# include /* INFRINGES ON USER NAME SPACE */ +# define YY_(msgid) dgettext ("bison-runtime", msgid) +# endif +# endif +# ifndef YY_ +# define YY_(msgid) msgid +# endif +#endif + +/* Suppress unused-variable warnings by "using" E. */ +#if ! defined lint || defined __GNUC__ +# define YYUSE(e) ((void) (e)) +#else +# define YYUSE(e) /* empty */ +#endif + +/* Identity function, used to suppress warnings about constant conditions. */ +#ifndef lint +# define YYID(n) (n) +#else +#if (defined __STDC__ || defined __C99__FUNC__ \ + || defined __cplusplus || defined _MSC_VER) +static int +YYID (int i) +#else +static int +YYID (i) + int i; +#endif +{ + return i; +} +#endif + +#if ! defined yyoverflow || YYERROR_VERBOSE + +/* The parser invokes alloca or malloc; define the necessary symbols. */ + +# ifdef YYSTACK_USE_ALLOCA +# if YYSTACK_USE_ALLOCA +# ifdef __GNUC__ +# define YYSTACK_ALLOC __builtin_alloca +# elif defined __BUILTIN_VA_ARG_INCR +# include /* INFRINGES ON USER NAME SPACE */ +# elif defined _AIX +# define YYSTACK_ALLOC __alloca +# elif defined _MSC_VER +# include /* INFRINGES ON USER NAME SPACE */ +# define alloca _alloca +# else +# define YYSTACK_ALLOC alloca +# if ! defined _ALLOCA_H && ! defined _STDLIB_H && (defined __STDC__ || defined __C99__FUNC__ \ + || defined __cplusplus || defined _MSC_VER) +# include /* INFRINGES ON USER NAME SPACE */ +# ifndef _STDLIB_H +# define _STDLIB_H 1 +# endif +# endif +# endif +# endif +# endif + +# ifdef YYSTACK_ALLOC + /* Pacify GCC's `empty if-body' warning. */ +# define YYSTACK_FREE(Ptr) do { /* empty */; } while (YYID (0)) +# ifndef YYSTACK_ALLOC_MAXIMUM + /* The OS might guarantee only one guard page at the bottom of the stack, + and a page size can be as small as 4096 bytes. So we cannot safely + invoke alloca (N) if N exceeds 4096. Use a slightly smaller number + to allow for a few compiler-allocated temporary stack slots. */ +# define YYSTACK_ALLOC_MAXIMUM 4032 /* reasonable circa 2006 */ +# endif +# else +# define YYSTACK_ALLOC YYMALLOC +# define YYSTACK_FREE YYFREE +# ifndef YYSTACK_ALLOC_MAXIMUM +# define YYSTACK_ALLOC_MAXIMUM YYSIZE_MAXIMUM +# endif +# if (defined __cplusplus && ! defined _STDLIB_H \ + && ! ((defined YYMALLOC || defined malloc) \ + && (defined YYFREE || defined free))) +# include /* INFRINGES ON USER NAME SPACE */ +# ifndef _STDLIB_H +# define _STDLIB_H 1 +# endif +# endif +# ifndef YYMALLOC +# define YYMALLOC malloc +# if ! defined malloc && ! defined _STDLIB_H && (defined __STDC__ || defined __C99__FUNC__ \ + || defined __cplusplus || defined _MSC_VER) +void *malloc (YYSIZE_T); /* INFRINGES ON USER NAME SPACE */ +# endif +# endif +# ifndef YYFREE +# define YYFREE free +# if ! defined free && ! defined _STDLIB_H && (defined __STDC__ || defined __C99__FUNC__ \ + || defined __cplusplus || defined _MSC_VER) +void free (void *); /* INFRINGES ON USER NAME SPACE */ +# endif +# endif +# endif +#endif /* ! defined yyoverflow || YYERROR_VERBOSE */ + + +#if (! defined yyoverflow \ + && (! defined __cplusplus \ + || (defined YYSTYPE_IS_TRIVIAL && YYSTYPE_IS_TRIVIAL))) + +/* A type that is properly aligned for any stack member. */ +union yyalloc +{ + yytype_int16 yyss; + YYSTYPE yyvs; + }; + +/* The size of the maximum gap between one aligned stack and the next. */ +# define YYSTACK_GAP_MAXIMUM (sizeof (union yyalloc) - 1) + +/* The size of an array large to enough to hold all stacks, each with + N elements. */ +# define YYSTACK_BYTES(N) \ + ((N) * (sizeof (yytype_int16) + sizeof (YYSTYPE)) \ + + YYSTACK_GAP_MAXIMUM) + +/* Copy COUNT objects from FROM to TO. The source and destination do + not overlap. */ +# ifndef YYCOPY +# if defined __GNUC__ && 1 < __GNUC__ +# define YYCOPY(To, From, Count) \ + __builtin_memcpy (To, From, (Count) * sizeof (*(From))) +# else +# define YYCOPY(To, From, Count) \ + do \ + { \ + YYSIZE_T yyi; \ + for (yyi = 0; yyi < (Count); yyi++) \ + (To)[yyi] = (From)[yyi]; \ + } \ + while (YYID (0)) +# endif +# endif + +/* Relocate STACK from its old location to the new one. The + local variables YYSIZE and YYSTACKSIZE give the old and new number of + elements in the stack, and YYPTR gives the new location of the + stack. Advance YYPTR to a properly aligned location for the next + stack. */ +# define YYSTACK_RELOCATE(Stack) \ + do \ + { \ + YYSIZE_T yynewbytes; \ + YYCOPY (&yyptr->Stack, Stack, yysize); \ + Stack = &yyptr->Stack; \ + yynewbytes = yystacksize * sizeof (*Stack) + YYSTACK_GAP_MAXIMUM; \ + yyptr += yynewbytes / sizeof (*yyptr); \ + } \ + while (YYID (0)) -static const short yyr2[] = { 0, - 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 2, 3, 2, 3, 1, 2, 4, 2, 3, 2, - 3, 1, 3, 3, 1, 1, 1 +#endif + +/* YYFINAL -- State number of the termination state. */ +#define YYFINAL 30 +/* YYLAST -- Last index in YYTABLE. */ +#define YYLAST 80 + +/* YYNTOKENS -- Number of terminals. */ +#define YYNTOKENS 19 +/* YYNNTS -- Number of nonterminals. */ +#define YYNNTS 13 +/* YYNRULES -- Number of rules. */ +#define YYNRULES 28 +/* YYNRULES -- Number of states. */ +#define YYNSTATES 43 + +/* YYTRANSLATE(YYLEX) -- Bison symbol number corresponding to YYLEX. */ +#define YYUNDEFTOK 2 +#define YYMAXUTOK 262 + +#define YYTRANSLATE(YYX) \ + ((unsigned int) (YYX) <= YYMAXUTOK ? yytranslate[YYX] : YYUNDEFTOK) + +/* YYTRANSLATE[YYLEX] -- Bison symbol number corresponding to YYLEX. */ +static const yytype_uint8 yytranslate[] = +{ + 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 13, 14, 2, 2, 17, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 18, 12, + 2, 11, 2, 2, 8, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 15, 2, 16, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 9, 2, 10, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 1, 2, 3, 4, + 5, 6, 7 }; -static const short yydefact[] = { 1, - 0, 26, 25, 27, 3, 0, 0, 0, 0, 2, - 4, 5, 6, 9, 8, 7, 10, 0, 11, 13, - 0, 0, 15, 18, 22, 0, 20, 0, 0, 24, - 0, 14, 16, 19, 0, 21, 12, 0, 23, 17, - 0, 0, 0 +#if YYDEBUG +/* YYPRHS[YYN] -- Index of the first RHS symbol of rule number YYN in + YYRHS. */ +static const yytype_uint8 yyprhs[] = +{ + 0, 0, 3, 4, 6, 8, 10, 12, 14, 16, + 18, 20, 22, 25, 29, 32, 36, 38, 41, 46, + 49, 53, 56, 60, 62, 66, 70, 72, 74 }; -static const short yydefgoto[] = { 41, - 21, 11, 22, 23, 12, 13, 26, 14, 15, 16, - 17 +/* YYRHS -- A `-1'-separated list of the rules' RHS. */ +static const yytype_int8 yyrhs[] = +{ + 20, 0, -1, -1, 21, -1, 7, -1, 22, -1, + 25, -1, 26, -1, 30, -1, 29, -1, 28, -1, + 31, -1, 8, 3, -1, 21, 8, 3, -1, 9, + 10, -1, 9, 23, 10, -1, 24, -1, 23, 24, + -1, 21, 11, 21, 12, -1, 13, 14, -1, 13, + 27, 14, -1, 15, 16, -1, 15, 27, 16, -1, + 21, -1, 27, 17, 21, -1, 3, 18, 3, -1, + 5, -1, 4, -1, 6, -1 }; -static const short yypact[] = { 12, - -13,-32768,-32768,-32768,-32768, 9, 33, 46, -2, 2, --32768,-32768,-32768,-32768,-32768,-32768,-32768, 25,-32768,-32768, - 21, 59,-32768,-32768, 2, 16,-32768, 7, 31,-32768, - 72,-32768,-32768,-32768, 72,-32768,-32768, 14, 2,-32768, - 40, 44,-32768 +/* YYRLINE[YYN] -- source line where rule number YYN was defined. */ +static const yytype_uint8 yyrline[] = +{ + 0, 121, 121, 122, 123, 126, 127, 128, 129, 130, + 131, 132, 133, 142, 150, 151, 154, 155, 158, 168, + 169, 172, 173, 176, 181, 192, 200, 205, 210 }; +#endif -static const short yypgoto[] = {-32768, - 0,-32768,-32768, 23,-32768,-32768, 38,-32768,-32768,-32768, --32768 +#if YYDEBUG || YYERROR_VERBOSE || YYTOKEN_TABLE +/* YYTNAME[SYMBOL-NUM] -- String name of the symbol SYMBOL-NUM. + First, the terminals, then, starting at YYNTOKENS, nonterminals. */ +static const char *const yytname[] = +{ + "$end", "error", "$undefined", "NUMBER", "STRING", "DATA", "BOOLEAN", + "SYNTAX_ERROR", "'@'", "'{'", "'}'", "'='", "';'", "'('", "')'", "'['", + "']'", "','", "':'", "$accept", "input", "object", "dict", "pairs", + "pair", "array", "set", "elements", "offset", "data", "string", + "boolean", 0 }; +#endif +# ifdef YYPRINT +/* YYTOKNUM[YYLEX-NUM] -- Internal token number corresponding to + token YYLEX-NUM. */ +static const yytype_uint16 yytoknum[] = +{ + 0, 256, 257, 258, 259, 260, 261, 262, 64, 123, + 125, 61, 59, 40, 41, 91, 93, 44, 58 +}; +# endif -#define YYLAST 87 +/* YYR1[YYN] -- Symbol number of symbol that rule YYN derives. */ +static const yytype_uint8 yyr1[] = +{ + 0, 19, 20, 20, 20, 21, 21, 21, 21, 21, + 21, 21, 21, 21, 22, 22, 23, 23, 24, 25, + 25, 26, 26, 27, 27, 28, 29, 30, 31 +}; +/* YYR2[YYN] -- Number of symbols composing right hand side of rule YYN. */ +static const yytype_uint8 yyr2[] = +{ + 0, 2, 0, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 2, 3, 2, 3, 1, 2, 4, 2, + 3, 2, 3, 1, 3, 3, 1, 1, 1 +}; -static const short yytable[] = { 10, - 1, 2, 3, 4, 18, 6, 7, 25, 25, 29, - 8, 19, 9, 27, 1, 2, 3, 4, 5, 6, - 7, 29, 36, 35, 8, 40, 9, 30, 29, 34, - 38, 31, 35, 37, 39, 1, 2, 3, 4, 42, - 6, 7, 20, 43, 33, 8, 28, 9, 1, 2, - 3, 4, 0, 6, 7, 0, 0, 0, 8, 24, - 9, 1, 2, 3, 4, 0, 6, 7, 32, 0, - 0, 8, 0, 9, 1, 2, 3, 4, 0, 6, - 7, 0, 0, 0, 8, 0, 9 +/* YYDEFACT[STATE-NAME] -- Default rule to reduce with in state + STATE-NUM when YYTABLE doesn't specify something else to do. Zero + means the default is an error. */ +static const yytype_uint8 yydefact[] = +{ + 2, 0, 27, 26, 28, 4, 0, 0, 0, 0, + 0, 3, 5, 6, 7, 10, 9, 8, 11, 0, + 12, 14, 0, 0, 16, 19, 23, 0, 21, 0, + 1, 0, 25, 0, 15, 17, 20, 0, 22, 13, + 0, 24, 18 }; -static const short yycheck[] = { 0, - 3, 4, 5, 6, 18, 8, 9, 8, 9, 8, - 13, 3, 15, 16, 3, 4, 5, 6, 7, 8, - 9, 8, 16, 17, 13, 12, 15, 3, 8, 14, - 31, 11, 17, 3, 35, 3, 4, 5, 6, 0, - 8, 9, 10, 0, 22, 13, 9, 15, 3, 4, - 5, 6, -1, 8, 9, -1, -1, -1, 13, 14, - 15, 3, 4, 5, 6, -1, 8, 9, 10, -1, - -1, 13, -1, 15, 3, 4, 5, 6, -1, 8, - 9, -1, -1, -1, 13, -1, 15 +/* YYDEFGOTO[NTERM-NUM]. */ +static const yytype_int8 yydefgoto[] = +{ + -1, 10, 22, 12, 23, 24, 13, 14, 27, 15, + 16, 17, 18 }; -/* -*-C-*- Note some compilers choke on comments on `#line' lines. */ -#line 3 "/usr/share/bison.simple" -/* This file comes from bison-1.28. */ -/* Skeleton output parser for bison, - Copyright (C) 1984, 1989, 1990 Free Software Foundation, Inc. +/* YYPACT[STATE-NUM] -- Index in YYTABLE of the portion describing + STATE-NUM. */ +#define YYPACT_NINF -14 +static const yytype_int8 yypact[] = +{ + 12, -13, -14, -14, -14, -14, 9, 26, 39, -2, + 10, 20, -14, -14, -14, -14, -14, -14, -14, 35, + -14, -14, 38, 52, -14, -14, 20, 49, -14, 7, + -14, 37, -14, 65, -14, -14, -14, 65, -14, -14, + 14, 20, -14 +}; - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2, or (at your option) - any later version. +/* YYPGOTO[NTERM-NUM]. */ +static const yytype_int8 yypgoto[] = +{ + -14, -14, 0, -14, -14, 27, -14, -14, 42, -14, + -14, -14, -14 +}; - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. +/* YYTABLE[YYPACT[STATE-NUM]]. What to do in state STATE-NUM. If + positive, shift that token. If negative, reduce the rule which + number is the opposite. If zero, do what YYDEFACT says. + If YYTABLE_NINF, syntax error. */ +#define YYTABLE_NINF -1 +static const yytype_uint8 yytable[] = +{ + 11, 1, 2, 3, 4, 19, 6, 7, 26, 26, + 30, 8, 20, 9, 28, 1, 2, 3, 4, 5, + 6, 7, 31, 38, 37, 8, 42, 9, 31, 1, + 2, 3, 4, 40, 6, 7, 21, 41, 32, 8, + 39, 9, 1, 2, 3, 4, 31, 6, 7, 33, + 35, 29, 8, 25, 9, 1, 2, 3, 4, 0, + 6, 7, 34, 36, 0, 8, 37, 9, 1, 2, + 3, 4, 0, 6, 7, 0, 0, 0, 8, 0, + 9 +}; - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 59 Temple Place - Suite 330, - Boston, MA 02111-1307, USA. */ - -/* As a special exception, when this file is copied by Bison into a - Bison output file, you may use that output file without restriction. - This special exception was added by the Free Software Foundation - in version 1.24 of Bison. */ - -/* This is the parser code that is written into each bison parser - when the %semantic_parser declaration is not specified in the grammar. - It was written by Richard Stallman by simplifying the hairy parser - used when %semantic_parser is specified. */ - -#ifndef YYSTACK_USE_ALLOCA -#ifdef alloca -#define YYSTACK_USE_ALLOCA -#else /* alloca not defined */ -#ifdef __GNUC__ -#define YYSTACK_USE_ALLOCA -#define alloca __builtin_alloca -#else /* not GNU C. */ -#if (!defined (__STDC__) && defined (sparc)) || defined (__sparc__) || defined (__sparc) || defined (__sgi) || (defined (__sun) && defined (__i386)) || defined (__arm) -#define YYSTACK_USE_ALLOCA -#include -#else /* not sparc */ -/* We think this test detects Watcom and Microsoft C. */ -/* This used to test MSDOS, but that is a bad idea - since that symbol is in the user namespace. */ -#if (defined (_MSDOS) || defined (_MSDOS_)) && !defined (__TURBOC__) -#if 0 /* No need for malloc.h, which pollutes the namespace; - instead, just don't use alloca. */ -#include -#endif -#else /* not MSDOS, or __TURBOC__ */ -#if defined(_AIX) -/* I don't know what this was needed for, but it pollutes the namespace. - So I turned it off. rms, 2 May 1997. */ -/* #include */ - #pragma alloca -#define YYSTACK_USE_ALLOCA -#else /* not MSDOS, or __TURBOC__, or _AIX */ -#if 0 -#ifdef __hpux /* haible@ilog.fr says this works for HPUX 9.05 and up, - and on HPUX 10. Eventually we can turn this on. */ -#define YYSTACK_USE_ALLOCA -#define alloca __builtin_alloca -#endif /* __hpux */ -#endif -#endif /* not _AIX */ -#endif /* not MSDOS, or __TURBOC__ */ -#endif /* not sparc */ -#endif /* not GNU C */ -#endif /* alloca not defined */ -#endif /* YYSTACK_USE_ALLOCA not defined */ - -#ifdef YYSTACK_USE_ALLOCA -#define YYSTACK_ALLOC alloca -#else -#define YYSTACK_ALLOC malloc -#endif +static const yytype_int8 yycheck[] = +{ + 0, 3, 4, 5, 6, 18, 8, 9, 8, 9, + 0, 13, 3, 15, 16, 3, 4, 5, 6, 7, + 8, 9, 8, 16, 17, 13, 12, 15, 8, 3, + 4, 5, 6, 33, 8, 9, 10, 37, 3, 13, + 3, 15, 3, 4, 5, 6, 8, 8, 9, 11, + 23, 9, 13, 14, 15, 3, 4, 5, 6, -1, + 8, 9, 10, 14, -1, 13, 17, 15, 3, 4, + 5, 6, -1, 8, 9, -1, -1, -1, 13, -1, + 15 +}; -/* Note: there must be only one dollar sign in this file. - It is replaced by the list of actions, each action - as one case of the switch. */ +/* YYSTOS[STATE-NUM] -- The (internal number of the) accessing + symbol of state STATE-NUM. */ +static const yytype_uint8 yystos[] = +{ + 0, 3, 4, 5, 6, 7, 8, 9, 13, 15, + 20, 21, 22, 25, 26, 28, 29, 30, 31, 18, + 3, 10, 21, 23, 24, 14, 21, 27, 16, 27, + 0, 8, 3, 11, 10, 24, 14, 17, 16, 3, + 21, 21, 12 +}; #define yyerrok (yyerrstatus = 0) #define yyclearin (yychar = YYEMPTY) -#define YYEMPTY -2 +#define YYEMPTY (-2) #define YYEOF 0 + #define YYACCEPT goto yyacceptlab -#define YYABORT goto yyabortlab -#define YYERROR goto yyerrlab1 -/* Like YYERROR except do call yyerror. - This remains here temporarily to ease the - transition to the new meaning of YYERROR, for GCC. +#define YYABORT goto yyabortlab +#define YYERROR goto yyerrorlab + + +/* Like YYERROR except do call yyerror. This remains here temporarily + to ease the transition to the new meaning of YYERROR, for GCC. Once GCC version 2 has supplanted version 1, this can go. */ + #define YYFAIL goto yyerrlab + #define YYRECOVERING() (!!yyerrstatus) -#define YYBACKUP(token, value) \ + +#define YYBACKUP(Token, Value) \ do \ if (yychar == YYEMPTY && yylen == 1) \ - { yychar = (token), yylval = (value); \ - yychar1 = YYTRANSLATE (yychar); \ - YYPOPSTACK; \ + { \ + yychar = (Token); \ + yylval = (Value); \ + yytoken = YYTRANSLATE (yychar); \ + YYPOPSTACK (1); \ goto yybackup; \ } \ else \ - { yyerror ("syntax error: cannot back up"); YYERROR; } \ -while (0) + { \ + yyerror (YY_("syntax error: cannot back up")); \ + YYERROR; \ + } \ +while (YYID (0)) + #define YYTERROR 1 #define YYERRCODE 256 -#ifndef YYPURE -#define YYLEX yylex() + +/* YYLLOC_DEFAULT -- Set CURRENT to span from RHS[1] to RHS[N]. + If N is 0, then set CURRENT to the empty location which ends + the previous symbol: RHS[0] (always defined). */ + +#define YYRHSLOC(Rhs, K) ((Rhs)[K]) +#ifndef YYLLOC_DEFAULT +# define YYLLOC_DEFAULT(Current, Rhs, N) \ + do \ + if (YYID (N)) \ + { \ + (Current).first_line = YYRHSLOC (Rhs, 1).first_line; \ + (Current).first_column = YYRHSLOC (Rhs, 1).first_column; \ + (Current).last_line = YYRHSLOC (Rhs, N).last_line; \ + (Current).last_column = YYRHSLOC (Rhs, N).last_column; \ + } \ + else \ + { \ + (Current).first_line = (Current).last_line = \ + YYRHSLOC (Rhs, 0).last_line; \ + (Current).first_column = (Current).last_column = \ + YYRHSLOC (Rhs, 0).last_column; \ + } \ + while (YYID (0)) #endif -#ifdef YYPURE -#ifdef YYLSP_NEEDED -#ifdef YYLEX_PARAM -#define YYLEX yylex(&yylval, &yylloc, YYLEX_PARAM) -#else -#define YYLEX yylex(&yylval, &yylloc) + +/* YY_LOCATION_PRINT -- Print the location on the stream. + This macro was not mandated originally: define only if we know + we won't break user code: when these are the locations we know. */ + +#ifndef YY_LOCATION_PRINT +# if defined YYLTYPE_IS_TRIVIAL && YYLTYPE_IS_TRIVIAL +# define YY_LOCATION_PRINT(File, Loc) \ + fprintf (File, "%d.%d-%d.%d", \ + (Loc).first_line, (Loc).first_column, \ + (Loc).last_line, (Loc).last_column) +# else +# define YY_LOCATION_PRINT(File, Loc) ((void) 0) +# endif #endif -#else /* not YYLSP_NEEDED */ + + +/* YYLEX -- calling `yylex' with the right arguments. */ + #ifdef YYLEX_PARAM -#define YYLEX yylex(&yylval, YYLEX_PARAM) +# define YYLEX yylex (YYLEX_PARAM) #else -#define YYLEX yylex(&yylval) +# define YYLEX yylex () #endif -#endif /* not YYLSP_NEEDED */ + +/* Enable debugging if requested. */ +#if YYDEBUG + +# ifndef YYFPRINTF +# include /* INFRINGES ON USER NAME SPACE */ +# define YYFPRINTF fprintf +# endif + +# define YYDPRINTF(Args) \ +do { \ + if (yydebug) \ + YYFPRINTF Args; \ +} while (YYID (0)) + +# define YY_SYMBOL_PRINT(Title, Type, Value, Location) \ +do { \ + if (yydebug) \ + { \ + YYFPRINTF (stderr, "%s ", Title); \ + yy_symbol_print (stderr, \ + Type, Value); \ + YYFPRINTF (stderr, "\n"); \ + } \ +} while (YYID (0)) + + +/*--------------------------------. +| Print this symbol on YYOUTPUT. | +`--------------------------------*/ + +/*ARGSUSED*/ +#if (defined __STDC__ || defined __C99__FUNC__ \ + || defined __cplusplus || defined _MSC_VER) +static void +yy_symbol_value_print (FILE *yyoutput, int yytype, YYSTYPE const * const yyvaluep) +#else +static void +yy_symbol_value_print (yyoutput, yytype, yyvaluep) + FILE *yyoutput; + int yytype; + YYSTYPE const * const yyvaluep; #endif +{ + if (!yyvaluep) + return; +# ifdef YYPRINT + if (yytype < YYNTOKENS) + YYPRINT (yyoutput, yytoknum[yytype], *yyvaluep); +# else + YYUSE (yyoutput); +# endif + switch (yytype) + { + default: + break; + } +} -/* If nonreentrant, generate the variables here */ -#ifndef YYPURE +/*--------------------------------. +| Print this symbol on YYOUTPUT. | +`--------------------------------*/ -int yychar; /* the lookahead symbol */ -YYSTYPE yylval; /* the semantic value of the */ - /* lookahead symbol */ +#if (defined __STDC__ || defined __C99__FUNC__ \ + || defined __cplusplus || defined _MSC_VER) +static void +yy_symbol_print (FILE *yyoutput, int yytype, YYSTYPE const * const yyvaluep) +#else +static void +yy_symbol_print (yyoutput, yytype, yyvaluep) + FILE *yyoutput; + int yytype; + YYSTYPE const * const yyvaluep; +#endif +{ + if (yytype < YYNTOKENS) + YYFPRINTF (yyoutput, "token %s (", yytname[yytype]); + else + YYFPRINTF (yyoutput, "nterm %s (", yytname[yytype]); + + yy_symbol_value_print (yyoutput, yytype, yyvaluep); + YYFPRINTF (yyoutput, ")"); +} -#ifdef YYLSP_NEEDED -YYLTYPE yylloc; /* location data for the lookahead */ - /* symbol */ +/*------------------------------------------------------------------. +| yy_stack_print -- Print the state stack from its BOTTOM up to its | +| TOP (included). | +`------------------------------------------------------------------*/ + +#if (defined __STDC__ || defined __C99__FUNC__ \ + || defined __cplusplus || defined _MSC_VER) +static void +yy_stack_print (yytype_int16 *bottom, yytype_int16 *top) +#else +static void +yy_stack_print (bottom, top) + yytype_int16 *bottom; + yytype_int16 *top; #endif +{ + YYFPRINTF (stderr, "Stack now"); + for (; bottom <= top; ++bottom) + YYFPRINTF (stderr, " %d", *bottom); + YYFPRINTF (stderr, "\n"); +} + +# define YY_STACK_PRINT(Bottom, Top) \ +do { \ + if (yydebug) \ + yy_stack_print ((Bottom), (Top)); \ +} while (YYID (0)) + -int yynerrs; /* number of parse errors so far */ -#endif /* not YYPURE */ +/*------------------------------------------------. +| Report that the YYRULE is going to be reduced. | +`------------------------------------------------*/ -#if YYDEBUG != 0 -int yydebug; /* nonzero means print parse trace */ -/* Since this is uninitialized, it does not stop multiple parsers - from coexisting. */ +#if (defined __STDC__ || defined __C99__FUNC__ \ + || defined __cplusplus || defined _MSC_VER) +static void +yy_reduce_print (YYSTYPE *yyvsp, int yyrule) +#else +static void +yy_reduce_print (yyvsp, yyrule) + YYSTYPE *yyvsp; + int yyrule; #endif +{ + int yynrhs = yyr2[yyrule]; + int yyi; + unsigned long int yylno = yyrline[yyrule]; + YYFPRINTF (stderr, "Reducing stack by rule %d (line %lu):\n", + yyrule - 1, yylno); + /* The symbols being reduced. */ + for (yyi = 0; yyi < yynrhs; yyi++) + { + fprintf (stderr, " $%d = ", yyi + 1); + yy_symbol_print (stderr, yyrhs[yyprhs[yyrule] + yyi], + &(yyvsp[(yyi + 1) - (yynrhs)]) + ); + fprintf (stderr, "\n"); + } +} -/* YYINITDEPTH indicates the initial size of the parser's stacks */ +# define YY_REDUCE_PRINT(Rule) \ +do { \ + if (yydebug) \ + yy_reduce_print (yyvsp, Rule); \ +} while (YYID (0)) +/* Nonzero means print parse trace. It is left uninitialized so that + multiple parsers can coexist. */ +int yydebug; +#else /* !YYDEBUG */ +# define YYDPRINTF(Args) +# define YY_SYMBOL_PRINT(Title, Type, Value, Location) +# define YY_STACK_PRINT(Bottom, Top) +# define YY_REDUCE_PRINT(Rule) +#endif /* !YYDEBUG */ + + +/* YYINITDEPTH -- initial size of the parser's stacks. */ #ifndef YYINITDEPTH -#define YYINITDEPTH 200 +# define YYINITDEPTH 200 #endif -/* YYMAXDEPTH is the maximum size the stacks can grow to - (effective only if the built-in stack extension method is used). */ +/* YYMAXDEPTH -- maximum size the stacks can grow to (effective only + if the built-in stack extension method is used). -#if YYMAXDEPTH == 0 -#undef YYMAXDEPTH -#endif + Do not make this value too large; the results are undefined if + YYSTACK_ALLOC_MAXIMUM < YYSTACK_BYTES (YYMAXDEPTH) + evaluated with infinite-precision integer arithmetic. */ #ifndef YYMAXDEPTH -#define YYMAXDEPTH 10000 +# define YYMAXDEPTH 10000 #endif + -/* Define __yy_memcpy. Note that the size argument - should be passed with type unsigned int, because that is what the non-GCC - definitions require. With GCC, __builtin_memcpy takes an arg - of type size_t, but it can handle unsigned int. */ - -#if __GNUC__ > 1 /* GNU C and GNU C++ define this. */ -#define __yy_memcpy(TO,FROM,COUNT) __builtin_memcpy(TO,FROM,COUNT) -#else /* not GNU C or C++ */ -#ifndef __cplusplus - -/* This is the most reliable way to avoid incompatibilities - in available built-in functions on various systems. */ -static void -__yy_memcpy (to, from, count) - char *to; - char *from; - unsigned int count; + +#if YYERROR_VERBOSE + +# ifndef yystrlen +# if defined __GLIBC__ && defined _STRING_H +# define yystrlen strlen +# else +/* Return the length of YYSTR. */ +#if (defined __STDC__ || defined __C99__FUNC__ \ + || defined __cplusplus || defined _MSC_VER) +static YYSIZE_T +yystrlen (const char *yystr) +#else +static YYSIZE_T +yystrlen (yystr) + const char *yystr; +#endif +{ + YYSIZE_T yylen; + for (yylen = 0; yystr[yylen]; yylen++) + continue; + return yylen; +} +# endif +# endif + +# ifndef yystpcpy +# if defined __GLIBC__ && defined _STRING_H && defined _GNU_SOURCE +# define yystpcpy stpcpy +# else +/* Copy YYSRC to YYDEST, returning the address of the terminating '\0' in + YYDEST. */ +#if (defined __STDC__ || defined __C99__FUNC__ \ + || defined __cplusplus || defined _MSC_VER) +static char * +yystpcpy (char *yydest, const char *yysrc) +#else +static char * +yystpcpy (yydest, yysrc) + char *yydest; + const char *yysrc; +#endif { - register char *f = from; - register char *t = to; - register int i = count; + char *yyd = yydest; + const char *yys = yysrc; + + while ((*yyd++ = *yys++) != '\0') + continue; - while (i-- > 0) - *t++ = *f++; + return yyd - 1; } +# endif +# endif + +# ifndef yytnamerr +/* Copy to YYRES the contents of YYSTR after stripping away unnecessary + quotes and backslashes, so that it's suitable for yyerror. The + heuristic is that double-quoting is unnecessary unless the string + contains an apostrophe, a comma, or backslash (other than + backslash-backslash). YYSTR is taken from yytname. If YYRES is + null, do not copy; instead, return the length of what the result + would have been. */ +static YYSIZE_T +yytnamerr (char *yyres, const char *yystr) +{ + if (*yystr == '"') + { + YYSIZE_T yyn = 0; + char const *yyp = yystr; + + for (;;) + switch (*++yyp) + { + case '\'': + case ',': + goto do_not_strip_quotes; + + case '\\': + if (*++yyp != '\\') + goto do_not_strip_quotes; + /* Fall through. */ + default: + if (yyres) + yyres[yyn] = *yyp; + yyn++; + break; + + case '"': + if (yyres) + yyres[yyn] = '\0'; + return yyn; + } + do_not_strip_quotes: ; + } -#else /* __cplusplus */ + if (! yyres) + return yystrlen (yystr); -/* This is the most reliable way to avoid incompatibilities - in available built-in functions on various systems. */ -static void -__yy_memcpy (char *to, char *from, unsigned int count) + return yystpcpy (yyres, yystr) - yyres; +} +# endif + +/* Copy into YYRESULT an error message about the unexpected token + YYCHAR while in state YYSTATE. Return the number of bytes copied, + including the terminating null byte. If YYRESULT is null, do not + copy anything; just return the number of bytes that would be + copied. As a special case, return 0 if an ordinary "syntax error" + message will do. Return YYSIZE_MAXIMUM if overflow occurs during + size calculation. */ +static YYSIZE_T +yysyntax_error (char *yyresult, int yystate, int yychar) { - register char *t = to; - register char *f = from; - register int i = count; + int yyn = yypact[yystate]; - while (i-- > 0) - *t++ = *f++; + if (! (YYPACT_NINF < yyn && yyn <= YYLAST)) + return 0; + else + { + int yytype = YYTRANSLATE (yychar); + YYSIZE_T yysize0 = yytnamerr (0, yytname[yytype]); + YYSIZE_T yysize = yysize0; + YYSIZE_T yysize1; + int yysize_overflow = 0; + enum { YYERROR_VERBOSE_ARGS_MAXIMUM = 5 }; + char const *yyarg[YYERROR_VERBOSE_ARGS_MAXIMUM]; + int yyx; + +# if 0 + /* This is so xgettext sees the translatable formats that are + constructed on the fly. */ + YY_("syntax error, unexpected %s"); + YY_("syntax error, unexpected %s, expecting %s"); + YY_("syntax error, unexpected %s, expecting %s or %s"); + YY_("syntax error, unexpected %s, expecting %s or %s or %s"); + YY_("syntax error, unexpected %s, expecting %s or %s or %s or %s"); +# endif + char *yyfmt; + char const *yyf; + static char const yyunexpected[] = "syntax error, unexpected %s"; + static char const yyexpecting[] = ", expecting %s"; + static char const yyor[] = " or %s"; + char yyformat[sizeof yyunexpected + + sizeof yyexpecting - 1 + + ((YYERROR_VERBOSE_ARGS_MAXIMUM - 2) + * (sizeof yyor - 1))]; + char const *yyprefix = yyexpecting; + + /* Start YYX at -YYN if negative to avoid negative indexes in + YYCHECK. */ + int yyxbegin = yyn < 0 ? -yyn : 0; + + /* Stay within bounds of both yycheck and yytname. */ + int yychecklim = YYLAST - yyn + 1; + int yyxend = yychecklim < YYNTOKENS ? yychecklim : YYNTOKENS; + int yycount = 1; + + yyarg[0] = yytname[yytype]; + yyfmt = yystpcpy (yyformat, yyunexpected); + + for (yyx = yyxbegin; yyx < yyxend; ++yyx) + if (yycheck[yyx + yyn] == yyx && yyx != YYTERROR) + { + if (yycount == YYERROR_VERBOSE_ARGS_MAXIMUM) + { + yycount = 1; + yysize = yysize0; + yyformat[sizeof yyunexpected - 1] = '\0'; + break; + } + yyarg[yycount++] = yytname[yyx]; + yysize1 = yysize + yytnamerr (0, yytname[yyx]); + yysize_overflow |= (yysize1 < yysize); + yysize = yysize1; + yyfmt = yystpcpy (yyfmt, yyprefix); + yyprefix = yyor; + } + + yyf = YY_(yyformat); + yysize1 = yysize + yystrlen (yyf); + yysize_overflow |= (yysize1 < yysize); + yysize = yysize1; + + if (yysize_overflow) + return YYSIZE_MAXIMUM; + + if (yyresult) + { + /* Avoid sprintf, as that infringes on the user's name space. + Don't have undefined behavior even if the translation + produced a string with the wrong number of "%s"s. */ + char *yyp = yyresult; + int yyi = 0; + while ((*yyp = *yyf) != '\0') + { + if (*yyp == '%' && yyf[1] == 's' && yyi < yycount) + { + yyp += yytnamerr (yyp, yyarg[yyi++]); + yyf += 2; + } + else + { + yyp++; + yyf++; + } + } + } + return yysize; + } } +#endif /* YYERROR_VERBOSE */ + +/*-----------------------------------------------. +| Release the memory associated to this symbol. | +`-----------------------------------------------*/ + +/*ARGSUSED*/ +#if (defined __STDC__ || defined __C99__FUNC__ \ + || defined __cplusplus || defined _MSC_VER) +static void +yydestruct (const char *yymsg, int yytype, YYSTYPE *yyvaluep) +#else +static void +yydestruct (yymsg, yytype, yyvaluep) + const char *yymsg; + int yytype; + YYSTYPE *yyvaluep; #endif -#endif +{ + YYUSE (yyvaluep); + + if (!yymsg) + yymsg = "Deleting"; + YY_SYMBOL_PRINT (yymsg, yytype, yyvaluep, yylocationp); + + switch (yytype) + { + + default: + break; + } +} -#line 217 "/usr/share/bison.simple" -/* The user can define YYPARSE_PARAM as the name of an argument to be passed - into yyparse. The argument should have type void *. - It should actually point to an object. - Grammar actions can access the variable by casting it - to the proper pointer type. */ +/* Prevent warnings from -Wmissing-prototypes. */ #ifdef YYPARSE_PARAM -#ifdef __cplusplus -#define YYPARSE_PARAM_ARG void *YYPARSE_PARAM -#define YYPARSE_PARAM_DECL -#else /* not __cplusplus */ -#define YYPARSE_PARAM_ARG YYPARSE_PARAM -#define YYPARSE_PARAM_DECL void *YYPARSE_PARAM; -#endif /* not __cplusplus */ -#else /* not YYPARSE_PARAM */ -#define YYPARSE_PARAM_ARG -#define YYPARSE_PARAM_DECL -#endif /* not YYPARSE_PARAM */ - -/* Prevent warning if -Wstrict-prototypes. */ -#ifdef __GNUC__ -#ifdef YYPARSE_PARAM -int yyparse (void *); +#if defined __STDC__ || defined __cplusplus +int yyparse (void *YYPARSE_PARAM); #else -int yyparse (void); +int yyparse (); #endif +#else /* ! YYPARSE_PARAM */ +#if defined __STDC__ || defined __cplusplus +int yyparse (void); +#else +int yyparse (); #endif +#endif /* ! YYPARSE_PARAM */ -int -yyparse(YYPARSE_PARAM_ARG) - YYPARSE_PARAM_DECL -{ - register int yystate; - register int yyn; - register short *yyssp; - register YYSTYPE *yyvsp; - int yyerrstatus; /* number of tokens to shift before error messages enabled */ - int yychar1 = 0; /* lookahead token as an internal (translated) token number */ - short yyssa[YYINITDEPTH]; /* the state stack */ - YYSTYPE yyvsa[YYINITDEPTH]; /* the semantic value stack */ - short *yyss = yyssa; /* refer to the stacks thru separate pointers */ - YYSTYPE *yyvs = yyvsa; /* to allow yyoverflow to reallocate them elsewhere */ +/* The look-ahead symbol. */ +int yychar; + +/* The semantic value of the look-ahead symbol. */ +YYSTYPE yylval; + +/* Number of syntax errors so far. */ +int yynerrs; -#ifdef YYLSP_NEEDED - YYLTYPE yylsa[YYINITDEPTH]; /* the location stack */ - YYLTYPE *yyls = yylsa; - YYLTYPE *yylsp; -#define YYPOPSTACK (yyvsp--, yyssp--, yylsp--) + +/*----------. +| yyparse. | +`----------*/ + +#ifdef YYPARSE_PARAM +#if (defined __STDC__ || defined __C99__FUNC__ \ + || defined __cplusplus || defined _MSC_VER) +int +yyparse (void *YYPARSE_PARAM) #else -#define YYPOPSTACK (yyvsp--, yyssp--) +int +yyparse (YYPARSE_PARAM) + void *YYPARSE_PARAM; #endif +#else /* ! YYPARSE_PARAM */ +#if (defined __STDC__ || defined __C99__FUNC__ \ + || defined __cplusplus || defined _MSC_VER) +int +yyparse (void) +#else +int +yyparse () - int yystacksize = YYINITDEPTH; - int yyfree_stacks = 0; - -#ifdef YYPURE - int yychar; - YYSTYPE yylval; - int yynerrs; -#ifdef YYLSP_NEEDED - YYLTYPE yylloc; #endif +#endif +{ + + int yystate; + int yyn; + int yyresult; + /* Number of tokens to shift before error messages enabled. */ + int yyerrstatus; + /* Look-ahead token as an internal (translated) token number. */ + int yytoken = 0; +#if YYERROR_VERBOSE + /* Buffer for error messages, and its allocated size. */ + char yymsgbuf[128]; + char *yymsg = yymsgbuf; + YYSIZE_T yymsg_alloc = sizeof yymsgbuf; #endif - YYSTYPE yyval; /* the variable used to return */ - /* semantic values from the action */ - /* routines */ + /* Three stacks and their tools: + `yyss': related to states, + `yyvs': related to semantic values, + `yyls': related to locations. - int yylen; + Refer to the stacks thru separate pointers, to allow yyoverflow + to reallocate them elsewhere. */ -#if YYDEBUG != 0 - if (yydebug) - fprintf(stderr, "Starting parse\n"); -#endif + /* The state stack. */ + yytype_int16 yyssa[YYINITDEPTH]; + yytype_int16 *yyss = yyssa; + yytype_int16 *yyssp; + + /* The semantic value stack. */ + YYSTYPE yyvsa[YYINITDEPTH]; + YYSTYPE *yyvs = yyvsa; + YYSTYPE *yyvsp; + + + +#define YYPOPSTACK(N) (yyvsp -= (N), yyssp -= (N)) + + YYSIZE_T yystacksize = YYINITDEPTH; + + /* The variables used to return semantic value and location from the + action routines. */ + YYSTYPE yyval; + + + /* The number of symbols on the RHS of the reduced rule. + Keep to zero when no symbol should be popped. */ + int yylen = 0; + + YYDPRINTF ((stderr, "Starting parse\n")); yystate = 0; yyerrstatus = 0; @@ -583,590 +1271,551 @@ yyparse(YYPARSE_PARAM_ARG) so that they stay on the same level as the state stack. The wasted elements are never initialized. */ - yyssp = yyss - 1; + yyssp = yyss; yyvsp = yyvs; -#ifdef YYLSP_NEEDED - yylsp = yyls; -#endif -/* Push a new state, which is found in yystate . */ -/* In all cases, when you get here, the value and location stacks - have just been pushed. so pushing a state here evens the stacks. */ -yynewstate: + goto yysetstate; - *++yyssp = yystate; +/*------------------------------------------------------------. +| yynewstate -- Push a new state, which is found in yystate. | +`------------------------------------------------------------*/ + yynewstate: + /* In all cases, when you get here, the value and location stacks + have just been pushed. So pushing a state here evens the stacks. */ + yyssp++; - if (yyssp >= yyss + yystacksize - 1) - { - /* Give user a chance to reallocate the stack */ - /* Use copies of these so that the &'s don't force the real ones into memory. */ - YYSTYPE *yyvs1 = yyvs; - short *yyss1 = yyss; -#ifdef YYLSP_NEEDED - YYLTYPE *yyls1 = yyls; -#endif + yysetstate: + *yyssp = yystate; + if (yyss + yystacksize - 1 <= yyssp) + { /* Get the current used size of the three stacks, in elements. */ - int size = yyssp - yyss + 1; + YYSIZE_T yysize = yyssp - yyss + 1; #ifdef yyoverflow - /* Each stack pointer address is followed by the size of - the data in use in that stack, in bytes. */ -#ifdef YYLSP_NEEDED - /* This used to be a conditional around just the two extra args, - but that might be undefined if yyoverflow is a macro. */ - yyoverflow("parser stack overflow", - &yyss1, size * sizeof (*yyssp), - &yyvs1, size * sizeof (*yyvsp), - &yyls1, size * sizeof (*yylsp), - &yystacksize); -#else - yyoverflow("parser stack overflow", - &yyss1, size * sizeof (*yyssp), - &yyvs1, size * sizeof (*yyvsp), - &yystacksize); -#endif - - yyss = yyss1; yyvs = yyvs1; -#ifdef YYLSP_NEEDED - yyls = yyls1; -#endif + { + /* Give user a chance to reallocate the stack. Use copies of + these so that the &'s don't force the real ones into + memory. */ + YYSTYPE *yyvs1 = yyvs; + yytype_int16 *yyss1 = yyss; + + + /* Each stack pointer address is followed by the size of the + data in use in that stack, in bytes. This used to be a + conditional around just the two extra args, but that might + be undefined if yyoverflow is a macro. */ + yyoverflow (YY_("memory exhausted"), + &yyss1, yysize * sizeof (*yyssp), + &yyvs1, yysize * sizeof (*yyvsp), + + &yystacksize); + + yyss = yyss1; + yyvs = yyvs1; + } #else /* no yyoverflow */ +# ifndef YYSTACK_RELOCATE + goto yyexhaustedlab; +# else /* Extend the stack our own way. */ - if (yystacksize >= YYMAXDEPTH) - { - yyerror("parser stack overflow"); - if (yyfree_stacks) - { - free (yyss); - free (yyvs); -#ifdef YYLSP_NEEDED - free (yyls); -#endif - } - return 2; - } + if (YYMAXDEPTH <= yystacksize) + goto yyexhaustedlab; yystacksize *= 2; - if (yystacksize > YYMAXDEPTH) + if (YYMAXDEPTH < yystacksize) yystacksize = YYMAXDEPTH; -#ifndef YYSTACK_USE_ALLOCA - yyfree_stacks = 1; -#endif - yyss = (short *) YYSTACK_ALLOC (yystacksize * sizeof (*yyssp)); - __yy_memcpy ((char *)yyss, (char *)yyss1, - size * (unsigned int) sizeof (*yyssp)); - yyvs = (YYSTYPE *) YYSTACK_ALLOC (yystacksize * sizeof (*yyvsp)); - __yy_memcpy ((char *)yyvs, (char *)yyvs1, - size * (unsigned int) sizeof (*yyvsp)); -#ifdef YYLSP_NEEDED - yyls = (YYLTYPE *) YYSTACK_ALLOC (yystacksize * sizeof (*yylsp)); - __yy_memcpy ((char *)yyls, (char *)yyls1, - size * (unsigned int) sizeof (*yylsp)); -#endif + + { + yytype_int16 *yyss1 = yyss; + union yyalloc *yyptr = + (union yyalloc *) YYSTACK_ALLOC (YYSTACK_BYTES (yystacksize)); + if (! yyptr) + goto yyexhaustedlab; + YYSTACK_RELOCATE (yyss); + YYSTACK_RELOCATE (yyvs); + +# undef YYSTACK_RELOCATE + if (yyss1 != yyssa) + YYSTACK_FREE (yyss1); + } +# endif #endif /* no yyoverflow */ - yyssp = yyss + size - 1; - yyvsp = yyvs + size - 1; -#ifdef YYLSP_NEEDED - yylsp = yyls + size - 1; -#endif + yyssp = yyss + yysize - 1; + yyvsp = yyvs + yysize - 1; -#if YYDEBUG != 0 - if (yydebug) - fprintf(stderr, "Stack size increased to %d\n", yystacksize); -#endif - if (yyssp >= yyss + yystacksize - 1) + YYDPRINTF ((stderr, "Stack size increased to %lu\n", + (unsigned long int) yystacksize)); + + if (yyss + yystacksize - 1 <= yyssp) YYABORT; } -#if YYDEBUG != 0 - if (yydebug) - fprintf(stderr, "Entering state %d\n", yystate); -#endif + YYDPRINTF ((stderr, "Entering state %d\n", yystate)); goto yybackup; - yybackup: -/* Do appropriate processing given the current state. */ -/* Read a lookahead token if we need one and don't already have one. */ -/* yyresume: */ +/*-----------. +| yybackup. | +`-----------*/ +yybackup: - /* First try to decide what to do without reference to lookahead token. */ + /* Do appropriate processing given the current state. Read a + look-ahead token if we need one and don't already have one. */ + /* First try to decide what to do without reference to look-ahead token. */ yyn = yypact[yystate]; - if (yyn == YYFLAG) + if (yyn == YYPACT_NINF) goto yydefault; - /* Not known => get a lookahead token if don't already have one. */ - - /* yychar is either YYEMPTY or YYEOF - or a valid token in external form. */ + /* Not known => get a look-ahead token if don't already have one. */ + /* YYCHAR is either YYEMPTY or YYEOF or a valid look-ahead symbol. */ if (yychar == YYEMPTY) { -#if YYDEBUG != 0 - if (yydebug) - fprintf(stderr, "Reading a token: "); -#endif + YYDPRINTF ((stderr, "Reading a token: ")); yychar = YYLEX; } - /* Convert token to internal form (in yychar1) for indexing tables with */ - - if (yychar <= 0) /* This means end of input. */ + if (yychar <= YYEOF) { - yychar1 = 0; - yychar = YYEOF; /* Don't call YYLEX any more */ - -#if YYDEBUG != 0 - if (yydebug) - fprintf(stderr, "Now at end of input.\n"); -#endif + yychar = yytoken = YYEOF; + YYDPRINTF ((stderr, "Now at end of input.\n")); } else { - yychar1 = YYTRANSLATE(yychar); - -#if YYDEBUG != 0 - if (yydebug) - { - fprintf (stderr, "Next token is %d (%s", yychar, yytname[yychar1]); - /* Give the individual parser a way to print the precise meaning - of a token, for further debugging info. */ -#ifdef YYPRINT - YYPRINT (stderr, yychar, yylval); -#endif - fprintf (stderr, ")\n"); - } -#endif + yytoken = YYTRANSLATE (yychar); + YY_SYMBOL_PRINT ("Next token is", yytoken, &yylval, &yylloc); } - yyn += yychar1; - if (yyn < 0 || yyn > YYLAST || yycheck[yyn] != yychar1) + /* If the proper action on seeing token YYTOKEN is to reduce or to + detect an error, take that action. */ + yyn += yytoken; + if (yyn < 0 || YYLAST < yyn || yycheck[yyn] != yytoken) goto yydefault; - yyn = yytable[yyn]; - - /* yyn is what to do for this token type in this state. - Negative => reduce, -yyn is rule number. - Positive => shift, yyn is new state. - New state is final state => don't bother to shift, - just return success. - 0, or most negative number => error. */ - - if (yyn < 0) + if (yyn <= 0) { - if (yyn == YYFLAG) + if (yyn == 0 || yyn == YYTABLE_NINF) goto yyerrlab; yyn = -yyn; goto yyreduce; } - else if (yyn == 0) - goto yyerrlab; if (yyn == YYFINAL) YYACCEPT; - /* Shift the lookahead token. */ + /* Count tokens shifted since error; after three, turn off error + status. */ + if (yyerrstatus) + yyerrstatus--; -#if YYDEBUG != 0 - if (yydebug) - fprintf(stderr, "Shifting token %d (%s), ", yychar, yytname[yychar1]); -#endif + /* Shift the look-ahead token. */ + YY_SYMBOL_PRINT ("Shifting", yytoken, &yylval, &yylloc); - /* Discard the token being shifted unless it is eof. */ + /* Discard the shifted token unless it is eof. */ if (yychar != YYEOF) yychar = YYEMPTY; + yystate = yyn; *++yyvsp = yylval; -#ifdef YYLSP_NEEDED - *++yylsp = yylloc; -#endif - - /* count tokens shifted since error; after three, turn off error status. */ - if (yyerrstatus) yyerrstatus--; - yystate = yyn; goto yynewstate; -/* Do the default action for the current state. */ -yydefault: +/*-----------------------------------------------------------. +| yydefault -- do the default action for the current state. | +`-----------------------------------------------------------*/ +yydefault: yyn = yydefact[yystate]; if (yyn == 0) goto yyerrlab; + goto yyreduce; + -/* Do a reduction. yyn is the number of a rule to reduce with. */ +/*-----------------------------. +| yyreduce -- Do a reduction. | +`-----------------------------*/ yyreduce: + /* yyn is the number of a rule to reduce with. */ yylen = yyr2[yyn]; - if (yylen > 0) - yyval = yyvsp[1-yylen]; /* implement default value of the action */ - -#if YYDEBUG != 0 - if (yydebug) - { - int i; - fprintf (stderr, "Reducing via rule %d (line %d), ", - yyn, yyrline[yyn]); + /* If YYLEN is nonzero, implement the default value of the action: + `$$ = $1'. - /* Print the symbols being reduced, and their result. */ - for (i = yyprhs[yyn]; yyrhs[i] > 0; i++) - fprintf (stderr, "%s ", yytname[yyrhs[i]]); - fprintf (stderr, " -> %s\n", yytname[yyr1[yyn]]); - } -#endif + Otherwise, the following line sets YYVAL to garbage. + This behavior is undocumented and Bison + users should not rely upon it. Assigning to YYVAL + unconditionally makes the parser a bit smaller, and it avoids a + GCC warning that YYVAL may be used uninitialized. */ + yyval = yyvsp[1-yylen]; - switch (yyn) { - -case 1: -#line 116 "OSUnserialize.y" -{ parsedObject = (OSObject *)NULL; YYACCEPT; ; - break;} -case 2: -#line 117 "OSUnserialize.y" -{ parsedObject = (OSObject *)yyvsp[0]; YYACCEPT; ; - break;} -case 3: -#line 118 "OSUnserialize.y" -{ yyerror("syntax error"); YYERROR; ; - break;} -case 4: + YY_REDUCE_PRINT (yyn); + switch (yyn) + { + case 2: #line 121 "OSUnserialize.y" -{ yyval = (object_t *)buildOSDictionary(yyvsp[0]); ; - break;} -case 5: + { parsedObject = (OSObject *)NULL; YYACCEPT; ;} + break; + + case 3: #line 122 "OSUnserialize.y" -{ yyval = (object_t *)buildOSArray(yyvsp[0]); ; - break;} -case 6: + { parsedObject = (OSObject *)(yyvsp[(1) - (1)]); YYACCEPT; ;} + break; + + case 4: #line 123 "OSUnserialize.y" -{ yyval = (object_t *)buildOSSet(yyvsp[0]); ; - break;} -case 7: -#line 124 "OSUnserialize.y" -{ yyval = (object_t *)buildOSString(yyvsp[0]); ; - break;} -case 8: -#line 125 "OSUnserialize.y" -{ yyval = (object_t *)buildOSData(yyvsp[0]); ; - break;} -case 9: + { yyerror("syntax error"); YYERROR; ;} + break; + + case 5: #line 126 "OSUnserialize.y" -{ yyval = (object_t *)buildOSOffset(yyvsp[0]); ; - break;} -case 10: + { (yyval) = (object_t *)buildOSDictionary((yyvsp[(1) - (1)])); ;} + break; + + case 6: #line 127 "OSUnserialize.y" -{ yyval = (object_t *)buildOSBoolean(yyvsp[0]); ; - break;} -case 11: + { (yyval) = (object_t *)buildOSArray((yyvsp[(1) - (1)])); ;} + break; + + case 7: #line 128 "OSUnserialize.y" -{ yyval = (object_t *)retrieveObject(yyvsp[0]->u.offset); - if (yyval) { - ((OSObject *)yyval)->retain(); + { (yyval) = (object_t *)buildOSSet((yyvsp[(1) - (1)])); ;} + break; + + case 8: +#line 129 "OSUnserialize.y" + { (yyval) = (object_t *)buildOSString((yyvsp[(1) - (1)])); ;} + break; + + case 9: +#line 130 "OSUnserialize.y" + { (yyval) = (object_t *)buildOSData((yyvsp[(1) - (1)])); ;} + break; + + case 10: +#line 131 "OSUnserialize.y" + { (yyval) = (object_t *)buildOSOffset((yyvsp[(1) - (1)])); ;} + break; + + case 11: +#line 132 "OSUnserialize.y" + { (yyval) = (object_t *)buildOSBoolean((yyvsp[(1) - (1)])); ;} + break; + + case 12: +#line 133 "OSUnserialize.y" + { (yyval) = (object_t *)retrieveObject((yyvsp[(2) - (2)])->u.offset); + if ((yyval)) { + ((OSObject *)(yyval))->retain(); } else { yyerror("forward reference detected"); YYERROR; } - freeObject(yyvsp[0]); - ; - break;} -case 12: -#line 137 "OSUnserialize.y" -{ yyval = yyvsp[-2]; - rememberObject(yyvsp[0]->u.offset, yyvsp[-2]); - freeObject(yyvsp[0]); - ; - break;} -case 13: -#line 145 "OSUnserialize.y" -{ yyval = NULL; ; - break;} -case 14: -#line 146 "OSUnserialize.y" -{ yyval = yyvsp[-1]; ; - break;} -case 16: + freeObject((yyvsp[(2) - (2)])); + ;} + break; + + case 13: +#line 142 "OSUnserialize.y" + { (yyval) = (yyvsp[(1) - (3)]); + rememberObject((yyvsp[(3) - (3)])->u.offset, (yyvsp[(1) - (3)])); + freeObject((yyvsp[(3) - (3)])); + ;} + break; + + case 14: #line 150 "OSUnserialize.y" -{ yyvsp[0]->next = yyvsp[-1]; yyvsp[-1]->prev = yyvsp[0]; yyval = yyvsp[0]; ; - break;} -case 17: -#line 153 "OSUnserialize.y" -{ yyval = newObject(); - yyval->next = NULL; - yyval->prev = NULL; - yyval->u.key = yyvsp[-3]; - yyval->object = yyvsp[-1]; - ; - break;} -case 18: -#line 163 "OSUnserialize.y" -{ yyval = NULL; ; - break;} -case 19: -#line 164 "OSUnserialize.y" -{ yyval = yyvsp[-1]; ; - break;} -case 20: -#line 167 "OSUnserialize.y" -{ yyval = NULL; ; - break;} -case 21: + { (yyval) = NULL; ;} + break; + + case 15: +#line 151 "OSUnserialize.y" + { (yyval) = (yyvsp[(2) - (3)]); ;} + break; + + case 17: +#line 155 "OSUnserialize.y" + { (yyvsp[(2) - (2)])->next = (yyvsp[(1) - (2)]); (yyvsp[(1) - (2)])->prev = (yyvsp[(2) - (2)]); (yyval) = (yyvsp[(2) - (2)]); ;} + break; + + case 18: +#line 158 "OSUnserialize.y" + { (yyval) = newObject(); + (yyval)->next = NULL; + (yyval)->prev = NULL; + (yyval)->u.key = (yyvsp[(1) - (4)]); + (yyval)->object = (yyvsp[(3) - (4)]); + ;} + break; + + case 19: #line 168 "OSUnserialize.y" -{ yyval = yyvsp[-1]; ; - break;} -case 22: -#line 171 "OSUnserialize.y" -{ yyval = newObject(); - yyval->object = yyvsp[0]; - yyval->next = NULL; - yyval->prev = NULL; - ; - break;} -case 23: -#line 176 "OSUnserialize.y" -{ o = newObject(); - o->object = yyvsp[0]; - o->next = yyvsp[-2]; - o->prev = NULL; - yyvsp[-2]->prev = o; - yyval = o; - ; - break;} -case 24: -#line 187 "OSUnserialize.y" -{ yyval = yyvsp[-2]; - yyval->size = yyvsp[0]->u.offset; - freeObject(yyvsp[0]); - ; - break;} -} - /* the action file gets copied in in place of this dollarsign */ -#line 543 "/usr/share/bison.simple" - - yyvsp -= yylen; - yyssp -= yylen; -#ifdef YYLSP_NEEDED - yylsp -= yylen; -#endif + { (yyval) = NULL; ;} + break; -#if YYDEBUG != 0 - if (yydebug) - { - short *ssp1 = yyss - 1; - fprintf (stderr, "state stack now"); - while (ssp1 != yyssp) - fprintf (stderr, " %d", *++ssp1); - fprintf (stderr, "\n"); + case 20: +#line 169 "OSUnserialize.y" + { (yyval) = (yyvsp[(2) - (3)]); ;} + break; + + case 21: +#line 172 "OSUnserialize.y" + { (yyval) = NULL; ;} + break; + + case 22: +#line 173 "OSUnserialize.y" + { (yyval) = (yyvsp[(2) - (3)]); ;} + break; + + case 23: +#line 176 "OSUnserialize.y" + { (yyval) = newObject(); + (yyval)->object = (yyvsp[(1) - (1)]); + (yyval)->next = NULL; + (yyval)->prev = NULL; + ;} + break; + + case 24: +#line 181 "OSUnserialize.y" + { oo = newObject(); + oo->object = (yyvsp[(3) - (3)]); + oo->next = (yyvsp[(1) - (3)]); + oo->prev = NULL; + (yyvsp[(1) - (3)])->prev = oo; + (yyval) = oo; + ;} + break; + + case 25: +#line 192 "OSUnserialize.y" + { (yyval) = (yyvsp[(1) - (3)]); + (yyval)->size = (yyvsp[(3) - (3)])->u.offset; + freeObject((yyvsp[(3) - (3)])); + ;} + break; + + +/* Line 1267 of yacc.c. */ +#line 1555 "OSUnserialize.tab.c" + default: break; } -#endif + YY_SYMBOL_PRINT ("-> $$ =", yyr1[yyn], &yyval, &yyloc); + + YYPOPSTACK (yylen); + yylen = 0; + YY_STACK_PRINT (yyss, yyssp); *++yyvsp = yyval; -#ifdef YYLSP_NEEDED - yylsp++; - if (yylen == 0) - { - yylsp->first_line = yylloc.first_line; - yylsp->first_column = yylloc.first_column; - yylsp->last_line = (yylsp-1)->last_line; - yylsp->last_column = (yylsp-1)->last_column; - yylsp->text = 0; - } - else - { - yylsp->last_line = (yylsp+yylen-1)->last_line; - yylsp->last_column = (yylsp+yylen-1)->last_column; - } -#endif - /* Now "shift" the result of the reduction. - Determine what state that goes to, - based on the state we popped back to - and the rule number reduced by. */ + /* Now `shift' the result of the reduction. Determine what state + that goes to, based on the state we popped back to and the rule + number reduced by. */ yyn = yyr1[yyn]; - yystate = yypgoto[yyn - YYNTBASE] + *yyssp; - if (yystate >= 0 && yystate <= YYLAST && yycheck[yystate] == *yyssp) + yystate = yypgoto[yyn - YYNTOKENS] + *yyssp; + if (0 <= yystate && yystate <= YYLAST && yycheck[yystate] == *yyssp) yystate = yytable[yystate]; else - yystate = yydefgoto[yyn - YYNTBASE]; + yystate = yydefgoto[yyn - YYNTOKENS]; goto yynewstate; -yyerrlab: /* here on detecting error */ - if (! yyerrstatus) - /* If not already recovering from an error, report this error. */ +/*------------------------------------. +| yyerrlab -- here on detecting error | +`------------------------------------*/ +yyerrlab: + /* If not already recovering from an error, report this error. */ + if (!yyerrstatus) { ++yynerrs; - -#ifdef YYERROR_VERBOSE - yyn = yypact[yystate]; - - if (yyn > YYFLAG && yyn < YYLAST) - { - int size = 0; - char *msg; - int x, count, len; - - count = 0; - /* Start X at -yyn if nec to avoid negative indexes in yycheck. */ - for (x = (yyn < 0 ? -yyn : 0); - x < (sizeof(yytname) / sizeof(char *)); x++) - if (yycheck[x + yyn] == x) - size += strlen(yytname[x]) + 15, count++; - len = size + 15; - msg = (char *) malloc(len); - if (msg != 0) - { - strlcpy(msg, "parse error", len); - - if (count < 5) - { - count = 0; - for (x = (yyn < 0 ? -yyn : 0); - x < (sizeof(yytname) / sizeof(char *)); x++) - if (yycheck[x + yyn] == x) - { - strlcat(msg, count == 0 ? ", expecting `" : " or `", - len); - strlcat(msg, yytname[x], len); - strlcat(msg, "'", len); - count++; - } - } - yyerror(msg); - free(msg); - } - else - yyerror ("parse error; also virtual memory exceeded"); - } - else -#endif /* YYERROR_VERBOSE */ - yyerror("parse error"); +#if ! YYERROR_VERBOSE + yyerror (YY_("syntax error")); +#else + { + YYSIZE_T yysize = yysyntax_error (0, yystate, yychar); + if (yymsg_alloc < yysize && yymsg_alloc < YYSTACK_ALLOC_MAXIMUM) + { + YYSIZE_T yyalloc = 2 * yysize; + if (! (yysize <= yyalloc && yyalloc <= YYSTACK_ALLOC_MAXIMUM)) + yyalloc = YYSTACK_ALLOC_MAXIMUM; + if (yymsg != yymsgbuf) + YYSTACK_FREE (yymsg); + yymsg = (char *) YYSTACK_ALLOC (yyalloc); + if (yymsg) + yymsg_alloc = yyalloc; + else + { + yymsg = yymsgbuf; + yymsg_alloc = sizeof yymsgbuf; + } + } + + if (0 < yysize && yysize <= yymsg_alloc) + { + (void) yysyntax_error (yymsg, yystate, yychar); + yyerror (yymsg); + } + else + { + yyerror (YY_("syntax error")); + if (yysize != 0) + goto yyexhaustedlab; + } + } +#endif } - goto yyerrlab1; -yyerrlab1: /* here on error raised explicitly by an action */ + if (yyerrstatus == 3) { - /* if just tried and failed to reuse lookahead token after an error, discard it. */ - - /* return failure if at end of input */ - if (yychar == YYEOF) - YYABORT; - -#if YYDEBUG != 0 - if (yydebug) - fprintf(stderr, "Discarding token %d (%s).\n", yychar, yytname[yychar1]); -#endif + /* If just tried and failed to reuse look-ahead token after an + error, discard it. */ - yychar = YYEMPTY; + if (yychar <= YYEOF) + { + /* Return failure if at end of input. */ + if (yychar == YYEOF) + YYABORT; + } + else + { + yydestruct ("Error: discarding", + yytoken, &yylval); + yychar = YYEMPTY; + } } - /* Else will try to reuse lookahead token - after shifting the error token. */ + /* Else will try to reuse look-ahead token after shifting the error + token. */ + goto yyerrlab1; - yyerrstatus = 3; /* Each real token shifted decrements this */ - goto yyerrhandle; +/*---------------------------------------------------. +| yyerrorlab -- error raised explicitly by YYERROR. | +`---------------------------------------------------*/ +yyerrorlab: -yyerrdefault: /* current state does not do anything special for the error token. */ + /* Pacify compilers like GCC when the user code never invokes + YYERROR and the label yyerrorlab therefore never appears in user + code. */ + if (/*CONSTCOND*/ 0) + goto yyerrorlab; -#if 0 - /* This is wrong; only states that explicitly want error tokens - should shift them. */ - yyn = yydefact[yystate]; /* If its default is to accept any token, ok. Otherwise pop it.*/ - if (yyn) goto yydefault; -#endif + /* Do not reclaim the symbols of the rule which action triggered + this YYERROR. */ + YYPOPSTACK (yylen); + yylen = 0; + YY_STACK_PRINT (yyss, yyssp); + yystate = *yyssp; + goto yyerrlab1; -yyerrpop: /* pop the current state because it cannot handle the error token */ - if (yyssp == yyss) YYABORT; - yyvsp--; - yystate = *--yyssp; -#ifdef YYLSP_NEEDED - yylsp--; -#endif +/*-------------------------------------------------------------. +| yyerrlab1 -- common code for both syntax error and YYERROR. | +`-------------------------------------------------------------*/ +yyerrlab1: + yyerrstatus = 3; /* Each real token shifted decrements this. */ -#if YYDEBUG != 0 - if (yydebug) + for (;;) { - short *ssp1 = yyss - 1; - fprintf (stderr, "Error: state stack now"); - while (ssp1 != yyssp) - fprintf (stderr, " %d", *++ssp1); - fprintf (stderr, "\n"); - } -#endif - -yyerrhandle: + yyn = yypact[yystate]; + if (yyn != YYPACT_NINF) + { + yyn += YYTERROR; + if (0 <= yyn && yyn <= YYLAST && yycheck[yyn] == YYTERROR) + { + yyn = yytable[yyn]; + if (0 < yyn) + break; + } + } - yyn = yypact[yystate]; - if (yyn == YYFLAG) - goto yyerrdefault; + /* Pop the current state because it cannot handle the error token. */ + if (yyssp == yyss) + YYABORT; - yyn += YYTERROR; - if (yyn < 0 || yyn > YYLAST || yycheck[yyn] != YYTERROR) - goto yyerrdefault; - yyn = yytable[yyn]; - if (yyn < 0) - { - if (yyn == YYFLAG) - goto yyerrpop; - yyn = -yyn; - goto yyreduce; + yydestruct ("Error: popping", + yystos[yystate], yyvsp); + YYPOPSTACK (1); + yystate = *yyssp; + YY_STACK_PRINT (yyss, yyssp); } - else if (yyn == 0) - goto yyerrpop; if (yyn == YYFINAL) YYACCEPT; -#if YYDEBUG != 0 - if (yydebug) - fprintf(stderr, "Shifting error token, "); -#endif - *++yyvsp = yylval; -#ifdef YYLSP_NEEDED - *++yylsp = yylloc; -#endif + + + /* Shift the error token. */ + YY_SYMBOL_PRINT ("Shifting", yystos[yyn], yyvsp, yylsp); yystate = yyn; goto yynewstate; - yyacceptlab: - /* YYACCEPT comes here. */ - if (yyfree_stacks) - { - free (yyss); - free (yyvs); -#ifdef YYLSP_NEEDED - free (yyls); + +/*-------------------------------------. +| yyacceptlab -- YYACCEPT comes here. | +`-------------------------------------*/ +yyacceptlab: + yyresult = 0; + goto yyreturn; + +/*-----------------------------------. +| yyabortlab -- YYABORT comes here. | +`-----------------------------------*/ +yyabortlab: + yyresult = 1; + goto yyreturn; + +#ifndef yyoverflow +/*-------------------------------------------------. +| yyexhaustedlab -- memory exhaustion comes here. | +`-------------------------------------------------*/ +yyexhaustedlab: + yyerror (YY_("memory exhausted")); + yyresult = 2; + /* Fall through. */ #endif - } - return 0; - yyabortlab: - /* YYABORT comes here. */ - if (yyfree_stacks) +yyreturn: + if (yychar != YYEOF && yychar != YYEMPTY) + yydestruct ("Cleanup: discarding lookahead", + yytoken, &yylval); + /* Do not reclaim the symbols of the rule which action triggered + this YYABORT or YYACCEPT. */ + YYPOPSTACK (yylen); + YY_STACK_PRINT (yyss, yyssp); + while (yyssp != yyss) { - free (yyss); - free (yyvs); -#ifdef YYLSP_NEEDED - free (yyls); -#endif + yydestruct ("Cleanup: popping", + yystos[*yyssp], yyvsp); + YYPOPSTACK (1); } - return 1; +#ifndef yyoverflow + if (yyss != yyssa) + YYSTACK_FREE (yyss); +#endif +#if YYERROR_VERBOSE + if (yymsg != yymsgbuf) + YYSTACK_FREE (yymsg); +#endif + /* Make sure YYID is used. */ + return YYID (yyresult); } -#line 208 "OSUnserialize.y" + + +#line 213 "OSUnserialize.y" static int lineNumber = 0; @@ -1187,9 +1836,9 @@ static int parseBufferIndex; static char yyerror_message[128]; int -yyerror(char *s) /* Called by yyparse on error */ +yyerror(const char *s) /* Called by yyparse on error */ { - sprintf(yyerror_message, "OSUnserialize: %s near line %d\n", s, lineNumber); + snprintf(yyerror_message, sizeof(yyerror_message), "OSUnserialize: %s near line %d\n", s, lineNumber); return 0; } @@ -1403,14 +2052,14 @@ yylex() // !@$&)(^Q$&*^!$(*!@$_(^%_(*Q#$(_*&!$_(*&!$_(*&!#$(*!@&^!@#%!_!# // !@$&)(^Q$&*^!$(*!@$_(^%_(*Q#$(_*&!$_(*&!$_(*&!#$(*!@&^!@#%!_!# -#ifdef DEBUG +#if DEBUG int debugUnserializeAllocCount = 0; #endif object_t * newObject() { -#ifdef DEBUG +#if DEBUG debugUnserializeAllocCount++; #endif return (object_t *)malloc(sizeof(object_t)); @@ -1419,7 +2068,7 @@ newObject() void freeObject(object_t *o) { -#ifdef DEBUG +#if DEBUG debugUnserializeAllocCount--; #endif free(o); @@ -1431,7 +2080,7 @@ static void rememberObject(int tag, object_t *o) { char key[16]; - sprintf(key, "%u", tag); + snprintf(key, sizeof(key), "%u", tag); tags->setObject(key, (OSObject *)o); } @@ -1440,7 +2089,7 @@ static OSObject * retrieveObject(int tag) { char key[16]; - sprintf(key, "%u", tag); + snprintf(key, sizeof(key), "%u", tag); return tags->getObject(key); } @@ -1559,10 +2208,11 @@ buildOSBoolean(object_t *o) }; __BEGIN_DECLS -#include +#include __END_DECLS -static mutex_t *lock = 0; +static lck_mtx_t *lock = 0; +extern lck_grp_t *IOLockGroup; OSObject* OSUnserialize(const char *buffer, OSString **errorString) @@ -1570,14 +2220,14 @@ OSUnserialize(const char *buffer, OSString **errorString) OSObject *object; if (!lock) { - lock = mutex_alloc(0); - mutex_lock(lock); + lock = lck_mtx_alloc_init(IOLockGroup, LCK_ATTR_NULL); + lck_mtx_lock(lock); } else { - mutex_lock(lock); + lck_mtx_lock(lock); } -#ifdef DEBUG +#if DEBUG debugUnserializeAllocCount = 0; #endif yyerror_message[0] = 0; //just in case @@ -1594,13 +2244,13 @@ OSUnserialize(const char *buffer, OSString **errorString) } tags->release(); -#ifdef DEBUG +#if DEBUG if (debugUnserializeAllocCount) { printf("OSUnserialize: allocation check failed, count = %d.\n", debugUnserializeAllocCount); } #endif - mutex_unlock(lock); + lck_mtx_unlock(lock); return object; } @@ -1619,3 +2269,4 @@ OSUnserialize(const char *buffer, OSString **errorString) // // // + diff --git a/libkern/c++/OSUnserialize.y b/libkern/c++/OSUnserialize.y index 34b2ee963..450ce0811 100644 --- a/libkern/c++/OSUnserialize.y +++ b/libkern/c++/OSUnserialize.y @@ -74,8 +74,7 @@ typedef struct object { } object_t; -static int yyparse(); -static int yyerror(char *s); +static int yyerror(const char *s); static int yylex(); static object_t * newObject(); @@ -93,7 +92,7 @@ static void rememberObject(int, object_t *); static OSObject *retrieveObject(int); // temp variable to use during parsing -static object_t *o; +static object_t *oo; // resultant object of parsed text static OSObject *parsedObject; @@ -179,12 +178,12 @@ elements: object { $$ = newObject(); $$->next = NULL; $$->prev = NULL; } - | elements ',' object { o = newObject(); - o->object = $3; - o->next = $1; - o->prev = NULL; - $1->prev = o; - $$ = o; + | elements ',' object { oo = newObject(); + oo->object = $3; + oo->next = $1; + oo->prev = NULL; + $1->prev = oo; + $$ = oo; } ; @@ -231,9 +230,9 @@ static int parseBufferIndex; static char yyerror_message[128]; int -yyerror(char *s) /* Called by yyparse on error */ +yyerror(const char *s) /* Called by yyparse on error */ { - sprintf(yyerror_message, "OSUnserialize: %s near line %d\n", s, lineNumber); + snprintf(yyerror_message, sizeof(yyerror_message), "OSUnserialize: %s near line %d\n", s, lineNumber); return 0; } @@ -447,14 +446,14 @@ yylex() // !@$&)(^Q$&*^!$(*!@$_(^%_(*Q#$(_*&!$_(*&!$_(*&!#$(*!@&^!@#%!_!# // !@$&)(^Q$&*^!$(*!@$_(^%_(*Q#$(_*&!$_(*&!$_(*&!#$(*!@&^!@#%!_!# -#ifdef DEBUG +#if DEBUG int debugUnserializeAllocCount = 0; #endif object_t * newObject() { -#ifdef DEBUG +#if DEBUG debugUnserializeAllocCount++; #endif return (object_t *)malloc(sizeof(object_t)); @@ -463,7 +462,7 @@ newObject() void freeObject(object_t *o) { -#ifdef DEBUG +#if DEBUG debugUnserializeAllocCount--; #endif free(o); @@ -475,7 +474,7 @@ static void rememberObject(int tag, object_t *o) { char key[16]; - sprintf(key, "%u", tag); + snprintf(key, sizeof(key), "%u", tag); tags->setObject(key, (OSObject *)o); } @@ -484,7 +483,7 @@ static OSObject * retrieveObject(int tag) { char key[16]; - sprintf(key, "%u", tag); + snprintf(key, sizeof(key), "%u", tag); return tags->getObject(key); } @@ -603,10 +602,11 @@ buildOSBoolean(object_t *o) }; __BEGIN_DECLS -#include +#include __END_DECLS -static mutex_t *lock = 0; +static lck_mtx_t *lock = 0; +extern lck_grp_t *IOLockGroup; OSObject* OSUnserialize(const char *buffer, OSString **errorString) @@ -614,14 +614,14 @@ OSUnserialize(const char *buffer, OSString **errorString) OSObject *object; if (!lock) { - lock = mutex_alloc(ETAP_IO_AHA); - _mutex_lock(lock); + lock = lck_mtx_alloc_init(IOLockGroup, LCK_ATTR_NULL); + lck_mtx_lock(lock); } else { - _mutex_lock(lock); + lck_mtx_lock(lock); } -#ifdef DEBUG +#if DEBUG debugUnserializeAllocCount = 0; #endif yyerror_message[0] = 0; //just in case @@ -638,13 +638,13 @@ OSUnserialize(const char *buffer, OSString **errorString) } tags->release(); -#ifdef DEBUG +#if DEBUG if (debugUnserializeAllocCount) { printf("OSUnserialize: allocation check failed, count = %d.\n", debugUnserializeAllocCount); } #endif - mutex_unlock(lock); + lck_mtx_unlock(lock); return object; } diff --git a/libkern/c++/OSUnserializeXML.cpp b/libkern/c++/OSUnserializeXML.cpp index e543deec5..7a424634b 100644 --- a/libkern/c++/OSUnserializeXML.cpp +++ b/libkern/c++/OSUnserializeXML.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 1999-2007 Apple Inc. All rights reserved. + * Copyright (c) 1999-2009 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -195,10 +195,9 @@ typedef struct parser_state { #undef yyerror #define yyerror(s) OSUnserializeerror(STATE, (s)) -static int OSUnserializeerror(parser_state_t *state, char *s); +static int OSUnserializeerror(parser_state_t *state, const char *s); static int yylex(YYSTYPE *lvalp, parser_state_t *state); -static int yyparse(void * state); static object_t *newObject(parser_state_t *state); static void freeObject(parser_state_t *state, object_t *o); @@ -219,10 +218,6 @@ extern void *kern_os_malloc(size_t size); extern void *kern_os_realloc(void * addr, size_t size); extern void kern_os_free(void * addr); -//XXX shouldn't have to define these -extern long strtol(const char *, char **, int); -extern unsigned long strtoul(const char *, char **, int); - } /* extern "C" */ #define malloc(s) kern_os_malloc(s) @@ -262,7 +257,7 @@ typedef int YYSTYPE; /* Line 216 of yacc.c. */ -#line 216 "OSUnserializeXML.tab.c" +#line 211 "OSUnserializeXML.tab.c" #ifdef short # undef short @@ -312,7 +307,7 @@ typedef short int yytype_int16; #define YYSIZE_MAXIMUM ((YYSIZE_T) -1) #ifndef YY_ -# if YYENABLE_NLS +# if defined YYENABLE_NLS && YYENABLE_NLS # if ENABLE_NLS # include /* INFRINGES ON USER NAME SPACE */ # define YY_(msgid) dgettext ("bison-runtime", msgid) @@ -554,10 +549,10 @@ static const yytype_int8 yyrhs[] = /* YYRLINE[YYN] -- source line where rule number YYN was defined. */ static const yytype_uint8 yyrline[] = { - 0, 150, 150, 153, 158, 163, 164, 165, 166, 167, - 168, 169, 170, 183, 186, 189, 192, 193, 198, 207, - 212, 215, 218, 221, 224, 227, 230, 233, 240, 243, - 246, 249, 252 + 0, 145, 145, 148, 153, 158, 159, 160, 161, 162, + 163, 164, 165, 178, 181, 184, 187, 188, 193, 202, + 207, 210, 213, 216, 219, 222, 225, 228, 235, 238, + 241, 244, 247 }; #endif @@ -754,7 +749,7 @@ while (YYID (0)) we won't break user code: when these are the locations we know. */ #ifndef YY_LOCATION_PRINT -# if YYLTYPE_IS_TRIVIAL +# if defined YYLTYPE_IS_TRIVIAL && YYLTYPE_IS_TRIVIAL # define YY_LOCATION_PRINT(File, Loc) \ fprintf (File, "%d.%d-%d.%d", \ (Loc).first_line, (Loc).first_column, \ @@ -1495,14 +1490,14 @@ int yynerrs; switch (yyn) { case 2: -#line 150 "OSUnserializeXML.y" +#line 145 "OSUnserializeXML.y" { yyerror("unexpected end of buffer"); YYERROR; ;} break; case 3: -#line 153 "OSUnserializeXML.y" +#line 148 "OSUnserializeXML.y" { STATE->parsedObject = (yyvsp[(1) - (1)])->object; (yyvsp[(1) - (1)])->object = 0; freeObject(STATE, (yyvsp[(1) - (1)])); @@ -1511,49 +1506,49 @@ int yynerrs; break; case 4: -#line 158 "OSUnserializeXML.y" +#line 153 "OSUnserializeXML.y" { yyerror("syntax error"); YYERROR; ;} break; case 5: -#line 163 "OSUnserializeXML.y" +#line 158 "OSUnserializeXML.y" { (yyval) = buildDictionary(STATE, (yyvsp[(1) - (1)])); ;} break; case 6: -#line 164 "OSUnserializeXML.y" +#line 159 "OSUnserializeXML.y" { (yyval) = buildArray(STATE, (yyvsp[(1) - (1)])); ;} break; case 7: -#line 165 "OSUnserializeXML.y" +#line 160 "OSUnserializeXML.y" { (yyval) = buildSet(STATE, (yyvsp[(1) - (1)])); ;} break; case 8: -#line 166 "OSUnserializeXML.y" +#line 161 "OSUnserializeXML.y" { (yyval) = buildString(STATE, (yyvsp[(1) - (1)])); ;} break; case 9: -#line 167 "OSUnserializeXML.y" +#line 162 "OSUnserializeXML.y" { (yyval) = buildData(STATE, (yyvsp[(1) - (1)])); ;} break; case 10: -#line 168 "OSUnserializeXML.y" +#line 163 "OSUnserializeXML.y" { (yyval) = buildNumber(STATE, (yyvsp[(1) - (1)])); ;} break; case 11: -#line 169 "OSUnserializeXML.y" +#line 164 "OSUnserializeXML.y" { (yyval) = buildBoolean(STATE, (yyvsp[(1) - (1)])); ;} break; case 12: -#line 170 "OSUnserializeXML.y" +#line 165 "OSUnserializeXML.y" { (yyval) = retrieveObject(STATE, (yyvsp[(1) - (1)])->idref); if ((yyval)) { (yyval)->object->retain(); @@ -1566,28 +1561,28 @@ int yynerrs; break; case 13: -#line 183 "OSUnserializeXML.y" +#line 178 "OSUnserializeXML.y" { (yyval) = (yyvsp[(1) - (2)]); (yyval)->elements = NULL; ;} break; case 14: -#line 186 "OSUnserializeXML.y" +#line 181 "OSUnserializeXML.y" { (yyval) = (yyvsp[(1) - (3)]); (yyval)->elements = (yyvsp[(2) - (3)]); ;} break; case 17: -#line 193 "OSUnserializeXML.y" +#line 188 "OSUnserializeXML.y" { (yyval) = (yyvsp[(2) - (2)]); (yyval)->next = (yyvsp[(1) - (2)]); ;} break; case 18: -#line 198 "OSUnserializeXML.y" +#line 193 "OSUnserializeXML.y" { (yyval) = (yyvsp[(1) - (2)]); (yyval)->key = (OSString *)(yyval)->object; (yyval)->object = (yyvsp[(2) - (2)])->object; @@ -1598,47 +1593,47 @@ int yynerrs; break; case 19: -#line 207 "OSUnserializeXML.y" +#line 202 "OSUnserializeXML.y" { (yyval) = buildString(STATE, (yyvsp[(1) - (1)])); ;} break; case 20: -#line 212 "OSUnserializeXML.y" +#line 207 "OSUnserializeXML.y" { (yyval) = (yyvsp[(1) - (2)]); (yyval)->elements = NULL; ;} break; case 21: -#line 215 "OSUnserializeXML.y" +#line 210 "OSUnserializeXML.y" { (yyval) = (yyvsp[(1) - (3)]); (yyval)->elements = (yyvsp[(2) - (3)]); ;} break; case 23: -#line 221 "OSUnserializeXML.y" +#line 216 "OSUnserializeXML.y" { (yyval) = (yyvsp[(1) - (2)]); (yyval)->elements = NULL; ;} break; case 24: -#line 224 "OSUnserializeXML.y" +#line 219 "OSUnserializeXML.y" { (yyval) = (yyvsp[(1) - (3)]); (yyval)->elements = (yyvsp[(2) - (3)]); ;} break; case 26: -#line 230 "OSUnserializeXML.y" +#line 225 "OSUnserializeXML.y" { (yyval) = (yyvsp[(1) - (1)]); (yyval)->next = NULL; ;} break; case 27: -#line 233 "OSUnserializeXML.y" +#line 228 "OSUnserializeXML.y" { (yyval) = (yyvsp[(2) - (2)]); (yyval)->next = (yyvsp[(1) - (2)]); ;} @@ -1646,7 +1641,7 @@ int yynerrs; /* Line 1267 of yacc.c. */ -#line 1600 "OSUnserializeXML.tab.c" +#line 1595 "OSUnserializeXML.tab.c" default: break; } YY_SYMBOL_PRINT ("-> $$ =", yyr1[yyn], &yyval, &yyloc); @@ -1860,19 +1855,18 @@ int yynerrs; } -#line 255 "OSUnserializeXML.y" +#line 250 "OSUnserializeXML.y" int -OSUnserializeerror(parser_state_t * state, char *s) /* Called by yyparse on errors */ +OSUnserializeerror(parser_state_t * state, const char *s) /* Called by yyparse on errors */ { - char tempString[128]; - if (state->errorString) { + char tempString[128]; snprintf(tempString, 128, "OSUnserializeXML: %s near line %d\n", s, state->lineNumber); *(state->errorString) = OSString::withCString(tempString); } - + return 0; } @@ -1882,7 +1876,7 @@ OSUnserializeerror(parser_state_t * state, char *s) /* Called by yyparse on err #define TAG_START 1 #define TAG_END 2 #define TAG_EMPTY 3 -#define TAG_COMMENT 4 +#define TAG_IGNORE 4 #define currentChar() (state->parseBuffer[state->parseBufferIndex]) #define nextChar() (state->parseBuffer[++state->parseBufferIndex]) @@ -1911,16 +1905,50 @@ getTag(parser_state_t *state, if (c != '<') return TAG_BAD; c = nextChar(); // skip '<' - if (c == '?' || c == '!') { - while ((c = nextChar()) != 0) { - if (c == '\n') state->lineNumber++; - if (c == '>') { - (void)nextChar(); - return TAG_COMMENT; - } - } - } + // + // + if (c == '!') { + c = nextChar(); + bool isComment = (c == '-') && ((c = nextChar()) != 0) && (c == '-'); + if (!isComment && !isAlpha(c)) return TAG_BAD; // lineNumber++; + if (isComment) { + if (c != '-') continue; + c = nextChar(); + if (c != '-') continue; + c = nextChar(); + } + if (c == '>') { + (void)nextChar(); + return TAG_IGNORE; + } + if (isComment) break; + } + return TAG_BAD; + } + + else + + // + if (c == '?') { + while ((c = nextChar()) != 0) { + if (c == '\n') state->lineNumber++; + if (c != '?') continue; + c = nextChar(); + if (c == '>') { + (void)nextChar(); + return TAG_IGNORE; + } + } + return TAG_BAD; + } + + else + + // if (c == '/') { c = nextChar(); // skip '/' tagType = TAG_END; @@ -2255,7 +2283,7 @@ yylex(YYSTYPE *lvalp, parser_state_t *state) tagType = getTag(STATE, tag, &attributeCount, attributes, values); if (tagType == TAG_BAD) return SYNTAX_ERROR; - if (tagType == TAG_COMMENT) goto top; + if (tagType == TAG_IGNORE) goto top; // handle allocation and check for "ID" and "IDREF" tags up front *lvalp = object = newObject(STATE); @@ -2658,7 +2686,7 @@ buildNumber(parser_state_t *state, object_t *o) }; object_t * -buildBoolean(parser_state_t *state, object_t *o) +buildBoolean(parser_state_t *state __unused, object_t *o) { o->object = ((o->number == 0) ? kOSBooleanFalse : kOSBooleanTrue); o->object->retain(); diff --git a/libkern/c++/OSUnserializeXML.y b/libkern/c++/OSUnserializeXML.y index ca44aa099..abcc20484 100644 --- a/libkern/c++/OSUnserializeXML.y +++ b/libkern/c++/OSUnserializeXML.y @@ -1,5 +1,5 @@ /* - * Copyright (c) 1999-2007 Apple Inc. All rights reserved. + * Copyright (c) 1999-2009 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -100,10 +100,9 @@ typedef struct parser_state { #undef yyerror #define yyerror(s) OSUnserializeerror(STATE, (s)) -static int OSUnserializeerror(parser_state_t *state, char *s); +static int OSUnserializeerror(parser_state_t *state, const char *s); static int yylex(YYSTYPE *lvalp, parser_state_t *state); -static int yyparse(void * state); static object_t *newObject(parser_state_t *state); static void freeObject(parser_state_t *state, object_t *o); @@ -124,10 +123,6 @@ extern void *kern_os_malloc(size_t size); extern void *kern_os_realloc(void * addr, size_t size); extern void kern_os_free(void * addr); -//XXX shouldn't have to define these -extern long strtol(const char *, char **, int); -extern unsigned long strtoul(const char *, char **, int); - } /* extern "C" */ #define malloc(s) kern_os_malloc(s) @@ -255,15 +250,14 @@ string: STRING %% int -OSUnserializeerror(parser_state_t * state, char *s) /* Called by yyparse on errors */ +OSUnserializeerror(parser_state_t * state, const char *s) /* Called by yyparse on errors */ { - char tempString[128]; - if (state->errorString) { + char tempString[128]; snprintf(tempString, 128, "OSUnserializeXML: %s near line %d\n", s, state->lineNumber); *(state->errorString) = OSString::withCString(tempString); } - + return 0; } @@ -273,7 +267,7 @@ OSUnserializeerror(parser_state_t * state, char *s) /* Called by yyparse on err #define TAG_START 1 #define TAG_END 2 #define TAG_EMPTY 3 -#define TAG_COMMENT 4 +#define TAG_IGNORE 4 #define currentChar() (state->parseBuffer[state->parseBufferIndex]) #define nextChar() (state->parseBuffer[++state->parseBufferIndex]) @@ -302,16 +296,50 @@ getTag(parser_state_t *state, if (c != '<') return TAG_BAD; c = nextChar(); // skip '<' - if (c == '?' || c == '!') { - while ((c = nextChar()) != 0) { - if (c == '\n') state->lineNumber++; - if (c == '>') { - (void)nextChar(); - return TAG_COMMENT; - } - } - } + // + // + if (c == '!') { + c = nextChar(); + bool isComment = (c == '-') && ((c = nextChar()) != 0) && (c == '-'); + if (!isComment && !isAlpha(c)) return TAG_BAD; // lineNumber++; + if (isComment) { + if (c != '-') continue; + c = nextChar(); + if (c != '-') continue; + c = nextChar(); + } + if (c == '>') { + (void)nextChar(); + return TAG_IGNORE; + } + if (isComment) break; + } + return TAG_BAD; + } + + else + + // + if (c == '?') { + while ((c = nextChar()) != 0) { + if (c == '\n') state->lineNumber++; + if (c != '?') continue; + c = nextChar(); + if (c == '>') { + (void)nextChar(); + return TAG_IGNORE; + } + } + return TAG_BAD; + } + + else + + // if (c == '/') { c = nextChar(); // skip '/' tagType = TAG_END; @@ -646,7 +674,7 @@ yylex(YYSTYPE *lvalp, parser_state_t *state) tagType = getTag(STATE, tag, &attributeCount, attributes, values); if (tagType == TAG_BAD) return SYNTAX_ERROR; - if (tagType == TAG_COMMENT) goto top; + if (tagType == TAG_IGNORE) goto top; // handle allocation and check for "ID" and "IDREF" tags up front *lvalp = object = newObject(STATE); @@ -1049,7 +1077,7 @@ buildNumber(parser_state_t *state, object_t *o) }; object_t * -buildBoolean(parser_state_t *state, object_t *o) +buildBoolean(parser_state_t *state __unused, object_t *o) { o->object = ((o->number == 0) ? kOSBooleanFalse : kOSBooleanTrue); o->object->retain(); diff --git a/libkern/c++/Tests/TestSerialization/test1/Info.plist b/libkern/c++/Tests/TestSerialization/test1/Info.plist new file mode 100644 index 000000000..c6c23a5e6 --- /dev/null +++ b/libkern/c++/Tests/TestSerialization/test1/Info.plist @@ -0,0 +1,33 @@ + + + + + CFBundleDevelopmentRegion + English + CFBundleExecutable + ${EXECUTABLE_NAME} + CFBundleName + ${PRODUCT_NAME} + CFBundleIconFile + + CFBundleIdentifier + com.apple.kext.${PRODUCT_NAME:identifier} + CFBundleInfoDictionaryVersion + 6.0 + CFBundlePackageType + KEXT + CFBundleSignature + ???? + CFBundleVersion + 1.0.0d1 + OSBundleLibraries + + com.apple.kpi.iokit + 9.0.0d7 + com.apple.kpi.libkern + 9.0.0d7 + com.apple.kpi.mach + 9.0.0d7 + + + diff --git a/libkern/c++/Tests/TestSerialization/test1/test1.pbproj/project.pbxproj b/libkern/c++/Tests/TestSerialization/test1/test1.pbproj/project.pbxproj deleted file mode 100644 index 453a5a07d..000000000 --- a/libkern/c++/Tests/TestSerialization/test1/test1.pbproj/project.pbxproj +++ /dev/null @@ -1,260 +0,0 @@ -// !$*UTF8*$! -{ - archiveVersion = 1; - classes = { - }; - objectVersion = 38; - objects = { - 051B4E2F03823AF402CA299A = { - isa = PBXFileReference; - path = test1_main.cpp; - refType = 4; - }; - 051B4E3003823AF402CA299A = { - fileRef = 051B4E2F03823AF402CA299A; - isa = PBXBuildFile; - settings = { - }; - }; -//050 -//051 -//052 -//053 -//054 -//060 -//061 -//062 -//063 -//064 - 06AA1261FFB20DD611CA28AA = { - buildActionMask = 2147483647; - files = ( - ); - generatedFileNames = ( - ); - isa = PBXShellScriptBuildPhase; - neededFileNames = ( - ); - runOnlyForDeploymentPostprocessing = 0; - shellPath = /bin/sh; - shellScript = "script=\"${SYSTEM_DEVELOPER_DIR}/ProjectBuilder Extras/Kernel Extension Support/KEXTPostprocess\";\nif [ -x \"$script\" ]; then\n . \"$script\"\nfi"; - }; - 06AA1262FFB20DD611CA28AA = { - buildRules = ( - ); - buildSettings = { - COPY_PHASE_STRIP = NO; - OPTIMIZATION_CFLAGS = "-O0"; - }; - isa = PBXBuildStyle; - name = Development; - }; - 06AA1263FFB20DD611CA28AA = { - buildRules = ( - ); - buildSettings = { - COPY_PHASE_STRIP = YES; - }; - isa = PBXBuildStyle; - name = Deployment; - }; - 06AA1268FFB211EB11CA28AA = { - buildActionMask = 2147483647; - files = ( - ); - generatedFileNames = ( - ); - isa = PBXShellScriptBuildPhase; - neededFileNames = ( - ); - runOnlyForDeploymentPostprocessing = 0; - shellPath = /bin/sh; - shellScript = "script=\"${SYSTEM_DEVELOPER_DIR}/ProjectBuilder Extras/Kernel Extension Support/KEXTPreprocess\";\nif [ -x \"$script\" ]; then\n . \"$script\"\nfi"; - }; -//060 -//061 -//062 -//063 -//064 -//080 -//081 -//082 -//083 -//084 - 089C1669FE841209C02AAC07 = { - buildStyles = ( - 06AA1262FFB20DD611CA28AA, - 06AA1263FFB20DD611CA28AA, - ); - isa = PBXProject; - mainGroup = 089C166AFE841209C02AAC07; - projectDirPath = ""; - targets = ( - 089C1673FE841209C02AAC07, - ); - }; - 089C166AFE841209C02AAC07 = { - children = ( - 247142CAFF3F8F9811CA285C, - 19C28FB6FE9D52B211CA2CBB, - ); - isa = PBXGroup; - name = test1; - refType = 4; - }; - 089C1673FE841209C02AAC07 = { - buildPhases = ( - 06AA1268FFB211EB11CA28AA, - 089C1674FE841209C02AAC07, - 089C1675FE841209C02AAC07, - 089C1676FE841209C02AAC07, - 089C1677FE841209C02AAC07, - 089C1679FE841209C02AAC07, - 06AA1261FFB20DD611CA28AA, - ); - buildSettings = { - FRAMEWORK_SEARCH_PATHS = ""; - HEADER_SEARCH_PATHS = ""; - INSTALL_PATH = "$(SYSTEM_LIBRARY_DIR)/Extensions"; - KERNEL_MODULE = YES; - LIBRARY_SEARCH_PATHS = ""; - MODULE_NAME = com.MySoftwareCompany.kext.test1; - MODULE_START = test1_start; - MODULE_STOP = test1_stop; - MODULE_VERSION = 1.0.0d1; - OTHER_CFLAGS = ""; - OTHER_LDFLAGS = ""; - OTHER_REZFLAGS = ""; - PRODUCT_NAME = test1; - SECTORDER_FLAGS = ""; - WARNING_CFLAGS = "-Wmost -Wno-four-char-constants -Wno-unknown-pragmas"; - WRAPPER_EXTENSION = kext; - }; - dependencies = ( - ); - isa = PBXBundleTarget; - name = test1; - productInstallPath = "$(SYSTEM_LIBRARY_DIR)/Extensions"; - productName = test1; - productReference = 0A5A7D55FFB780D811CA28AA; - productSettingsXML = " - - - - CFBundleDevelopmentRegion - English - CFBundleExecutable - test1 - CFBundleIconFile - - CFBundleIdentifier - com.MySoftwareCompany.kext.test1 - CFBundleInfoDictionaryVersion - 6.0 - CFBundlePackageType - KEXT - CFBundleSignature - ???? - CFBundleVersion - 1.0.0d1 - OSBundleLibraries - - com.apple.kernel.libkern - 1.1 - - - -"; - shouldUseHeadermap = 1; - }; - 089C1674FE841209C02AAC07 = { - buildActionMask = 2147483647; - files = ( - ); - isa = PBXHeadersBuildPhase; - runOnlyForDeploymentPostprocessing = 0; - }; - 089C1675FE841209C02AAC07 = { - buildActionMask = 2147483647; - files = ( - ); - isa = PBXResourcesBuildPhase; - runOnlyForDeploymentPostprocessing = 0; - }; - 089C1676FE841209C02AAC07 = { - buildActionMask = 2147483647; - files = ( - 051B4E3003823AF402CA299A, - ); - isa = PBXSourcesBuildPhase; - runOnlyForDeploymentPostprocessing = 0; - }; - 089C1677FE841209C02AAC07 = { - buildActionMask = 2147483647; - files = ( - ); - isa = PBXFrameworksBuildPhase; - runOnlyForDeploymentPostprocessing = 0; - }; - 089C1679FE841209C02AAC07 = { - buildActionMask = 2147483647; - files = ( - ); - isa = PBXRezBuildPhase; - runOnlyForDeploymentPostprocessing = 0; - }; -//080 -//081 -//082 -//083 -//084 -//0A0 -//0A1 -//0A2 -//0A3 -//0A4 - 0A5A7D55FFB780D811CA28AA = { - isa = PBXBundleReference; - path = test1.kext; - refType = 3; - }; -//0A0 -//0A1 -//0A2 -//0A3 -//0A4 -//190 -//191 -//192 -//193 -//194 - 19C28FB6FE9D52B211CA2CBB = { - children = ( - 0A5A7D55FFB780D811CA28AA, - ); - isa = PBXGroup; - name = Products; - refType = 4; - }; -//190 -//191 -//192 -//193 -//194 -//240 -//241 -//242 -//243 -//244 - 247142CAFF3F8F9811CA285C = { - children = ( - 051B4E2F03823AF402CA299A, - ); - isa = PBXGroup; - name = Source; - path = ""; - refType = 4; - }; - }; - rootObject = 089C1669FE841209C02AAC07; -} diff --git a/libkern/c++/Tests/TestSerialization/test1/test1.xcodeproj/project.pbxproj b/libkern/c++/Tests/TestSerialization/test1/test1.xcodeproj/project.pbxproj new file mode 100644 index 000000000..3f4a3744e --- /dev/null +++ b/libkern/c++/Tests/TestSerialization/test1/test1.xcodeproj/project.pbxproj @@ -0,0 +1,240 @@ +// !$*UTF8*$! +{ + archiveVersion = 1; + classes = { + }; + objectVersion = 45; + objects = { + +/* Begin PBXBuildFile section */ + 00420FB80F57B71E000C8EB0 /* test1_main.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 00420FB70F57B71E000C8EB0 /* test1_main.cpp */; }; +/* End PBXBuildFile section */ + +/* Begin PBXFileReference section */ + 00420FB70F57B71E000C8EB0 /* test1_main.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = test1_main.cpp; sourceTree = ""; }; + 32A4FEC30562C75700D090E7 /* Info.plist */ = {isa = PBXFileReference; lastKnownFileType = text.plist.xml; path = Info.plist; sourceTree = ""; }; + 32A4FEC40562C75800D090E7 /* test1.kext */ = {isa = PBXFileReference; explicitFileType = wrapper.cfbundle; includeInIndex = 0; path = test1.kext; sourceTree = BUILT_PRODUCTS_DIR; }; + D27513B306A6225300ADB3A4 /* Kernel.framework */ = {isa = PBXFileReference; lastKnownFileType = wrapper.framework; name = Kernel.framework; path = /System/Library/Frameworks/Kernel.framework; sourceTree = ""; }; +/* End PBXFileReference section */ + +/* Begin PBXFrameworksBuildPhase section */ + 32A4FEBF0562C75700D090E7 /* Frameworks */ = { + isa = PBXFrameworksBuildPhase; + buildActionMask = 2147483647; + files = ( + ); + runOnlyForDeploymentPostprocessing = 0; + }; +/* End PBXFrameworksBuildPhase section */ + +/* Begin PBXGroup section */ + 089C166AFE841209C02AAC07 /* test1 */ = { + isa = PBXGroup; + children = ( + 247142CAFF3F8F9811CA285C /* Source */, + 089C167CFE841241C02AAC07 /* Resources */, + D27513B306A6225300ADB3A4 /* Kernel.framework */, + 19C28FB6FE9D52B211CA2CBB /* Products */, + ); + name = test1; + sourceTree = ""; + }; + 089C167CFE841241C02AAC07 /* Resources */ = { + isa = PBXGroup; + children = ( + 32A4FEC30562C75700D090E7 /* Info.plist */, + ); + name = Resources; + sourceTree = ""; + }; + 19C28FB6FE9D52B211CA2CBB /* Products */ = { + isa = PBXGroup; + children = ( + 32A4FEC40562C75800D090E7 /* test1.kext */, + ); + name = Products; + sourceTree = ""; + }; + 247142CAFF3F8F9811CA285C /* Source */ = { + isa = PBXGroup; + children = ( + 00420FB70F57B71E000C8EB0 /* test1_main.cpp */, + ); + name = Source; + sourceTree = ""; + }; +/* End PBXGroup section */ + +/* Begin PBXHeadersBuildPhase section */ + 32A4FEBA0562C75700D090E7 /* Headers */ = { + isa = PBXHeadersBuildPhase; + buildActionMask = 2147483647; + files = ( + ); + runOnlyForDeploymentPostprocessing = 0; + }; +/* End PBXHeadersBuildPhase section */ + +/* Begin PBXNativeTarget section */ + 32A4FEB80562C75700D090E7 /* test1 */ = { + isa = PBXNativeTarget; + buildConfigurationList = 1DEB91C308733DAC0010E9CD /* Build configuration list for PBXNativeTarget "test1" */; + buildPhases = ( + 32A4FEBA0562C75700D090E7 /* Headers */, + 32A4FEBB0562C75700D090E7 /* Resources */, + 32A4FEBD0562C75700D090E7 /* Sources */, + 32A4FEBF0562C75700D090E7 /* Frameworks */, + 32A4FEC00562C75700D090E7 /* Rez */, + ); + buildRules = ( + ); + dependencies = ( + ); + name = test1; + productInstallPath = "$(SYSTEM_LIBRARY_DIR)/Extensions"; + productName = test1; + productReference = 32A4FEC40562C75800D090E7 /* test1.kext */; + productType = "com.apple.product-type.kernel-extension"; + }; +/* End PBXNativeTarget section */ + +/* Begin PBXProject section */ + 089C1669FE841209C02AAC07 /* Project object */ = { + isa = PBXProject; + buildConfigurationList = 1DEB91C708733DAC0010E9CD /* Build configuration list for PBXProject "test1" */; + compatibilityVersion = "Xcode 3.1"; + hasScannedForEncodings = 1; + mainGroup = 089C166AFE841209C02AAC07 /* test1 */; + projectDirPath = ""; + projectRoot = ""; + targets = ( + 32A4FEB80562C75700D090E7 /* test1 */, + ); + }; +/* End PBXProject section */ + +/* Begin PBXResourcesBuildPhase section */ + 32A4FEBB0562C75700D090E7 /* Resources */ = { + isa = PBXResourcesBuildPhase; + buildActionMask = 2147483647; + files = ( + ); + runOnlyForDeploymentPostprocessing = 0; + }; +/* End PBXResourcesBuildPhase section */ + +/* Begin PBXRezBuildPhase section */ + 32A4FEC00562C75700D090E7 /* Rez */ = { + isa = PBXRezBuildPhase; + buildActionMask = 2147483647; + files = ( + ); + runOnlyForDeploymentPostprocessing = 0; + }; +/* End PBXRezBuildPhase section */ + +/* Begin PBXSourcesBuildPhase section */ + 32A4FEBD0562C75700D090E7 /* Sources */ = { + isa = PBXSourcesBuildPhase; + buildActionMask = 2147483647; + files = ( + 00420FB80F57B71E000C8EB0 /* test1_main.cpp in Sources */, + ); + runOnlyForDeploymentPostprocessing = 0; + }; +/* End PBXSourcesBuildPhase section */ + +/* Begin XCBuildConfiguration section */ + 1DEB91C408733DAC0010E9CD /* Debug */ = { + isa = XCBuildConfiguration; + buildSettings = { + ALWAYS_SEARCH_USER_PATHS = NO; + ARCHS = "$(ARCHS_STANDARD_32_64_BIT)"; + COPY_PHASE_STRIP = NO; + GCC_DYNAMIC_NO_PIC = NO; + GCC_MODEL_TUNING = G5; + GCC_OPTIMIZATION_LEVEL = 0; + INFOPLIST_FILE = Info.plist; + INSTALL_PATH = "$(SYSTEM_LIBRARY_DIR)/Extensions"; + MODULE_NAME = com.yourcompany.kext.test1; + MODULE_START = test1_start; + MODULE_STOP = test1_stop; + MODULE_VERSION = 1.0.0d1; + ONLY_ACTIVE_ARCH = NO; + PRODUCT_NAME = test1; + SDKROOT = ""; + WRAPPER_EXTENSION = kext; + }; + name = Debug; + }; + 1DEB91C508733DAC0010E9CD /* Release */ = { + isa = XCBuildConfiguration; + buildSettings = { + ALWAYS_SEARCH_USER_PATHS = NO; + ARCHS = "$(ARCHS_STANDARD_32_64_BIT)"; + DEBUG_INFORMATION_FORMAT = "dwarf-with-dsym"; + GCC_MODEL_TUNING = G5; + INFOPLIST_FILE = Info.plist; + INSTALL_PATH = "$(SYSTEM_LIBRARY_DIR)/Extensions"; + MODULE_NAME = com.yourcompany.kext.test1; + MODULE_START = test1_start; + MODULE_STOP = test1_stop; + MODULE_VERSION = 1.0.0d1; + ONLY_ACTIVE_ARCH = NO; + PRODUCT_NAME = test1; + SDKROOT = ""; + WRAPPER_EXTENSION = kext; + }; + name = Release; + }; + 1DEB91C808733DAC0010E9CD /* Debug */ = { + isa = XCBuildConfiguration; + buildSettings = { + ARCHS = "$(ARCHS_STANDARD_32_BIT)"; + GCC_C_LANGUAGE_STANDARD = c99; + GCC_OPTIMIZATION_LEVEL = 0; + GCC_WARN_ABOUT_RETURN_TYPE = YES; + GCC_WARN_UNUSED_VARIABLE = YES; + ONLY_ACTIVE_ARCH = YES; + PREBINDING = NO; + SDKROOT = macosx10.5; + }; + name = Debug; + }; + 1DEB91C908733DAC0010E9CD /* Release */ = { + isa = XCBuildConfiguration; + buildSettings = { + ARCHS = "$(ARCHS_STANDARD_32_BIT)"; + GCC_C_LANGUAGE_STANDARD = c99; + GCC_WARN_ABOUT_RETURN_TYPE = YES; + GCC_WARN_UNUSED_VARIABLE = YES; + PREBINDING = NO; + SDKROOT = macosx10.5; + }; + name = Release; + }; +/* End XCBuildConfiguration section */ + +/* Begin XCConfigurationList section */ + 1DEB91C308733DAC0010E9CD /* Build configuration list for PBXNativeTarget "test1" */ = { + isa = XCConfigurationList; + buildConfigurations = ( + 1DEB91C408733DAC0010E9CD /* Debug */, + 1DEB91C508733DAC0010E9CD /* Release */, + ); + defaultConfigurationIsVisible = 0; + defaultConfigurationName = Release; + }; + 1DEB91C708733DAC0010E9CD /* Build configuration list for PBXProject "test1" */ = { + isa = XCConfigurationList; + buildConfigurations = ( + 1DEB91C808733DAC0010E9CD /* Debug */, + 1DEB91C908733DAC0010E9CD /* Release */, + ); + defaultConfigurationIsVisible = 0; + defaultConfigurationName = Release; + }; +/* End XCConfigurationList section */ + }; + rootObject = 089C1669FE841209C02AAC07 /* Project object */; +} diff --git a/libkern/c++/Tests/TestSerialization/test1/test1_main.cpp b/libkern/c++/Tests/TestSerialization/test1/test1_main.cpp index 80a132574..1efad504b 100644 --- a/libkern/c++/Tests/TestSerialization/test1/test1_main.cpp +++ b/libkern/c++/Tests/TestSerialization/test1/test1_main.cpp @@ -39,26 +39,26 @@ __END_DECLS #include #include -char *testBuffer = " -{ string = \"this is a 'string' with spaces\"; - string2 = 'this is also a \"string\" with spaces'; - offset = 16384:32; - true = .true.; - false = .false.; - data = <0123 4567 89abcdef>; - array = (1:8, 2:16, 3:32, 4:64 ); - set = [ one, two, three, four ]; - emptydict = { }@1; - emptyarray = ( )@2; - emptyset = [ ]@3; - emptydata = < >@4; - emptydict2 = @1; - emptyarray2 = @2; - emptyset2 = @3; - emptydata2 = @4; - dict2 = { string = asdfasdf; }; - dict3 = { string = asdfasdf; }; -}@0"; +const char *testBuffer = "" +"{ string = \"this is a 'string' with spaces\";" +" string2 = 'this is also a \"string\" with spaces';" +" offset = 16384:32;" +" true = .true.;" +" false = .false.;" +" data = <0123 4567 89abcdef>;" +" array = (1:8, 2:16, 3:32, 4:64 );" +" set = [ one, two, three, four ];" +" emptydict = { }@1;" +" emptyarray = ( )@2;" +" emptyset = [ ]@3;" +" emptydata = < >@4;" +" emptydict2 = @1;" +" emptyarray2 = @2;" +" emptyset2 = @3;" +" emptydata2 = @4;" +" dict2 = { string = asdfasdf; };" +" dict3 = { string = asdfasdf; };" +"}@0"; kern_return_t test1_start(struct kmod_info *ki, void *data) diff --git a/libkern/c++/Tests/TestSerialization/test2/Info.plist b/libkern/c++/Tests/TestSerialization/test2/Info.plist new file mode 100644 index 000000000..1c5782fe7 --- /dev/null +++ b/libkern/c++/Tests/TestSerialization/test2/Info.plist @@ -0,0 +1,33 @@ + + + + + CFBundleDevelopmentRegion + English + CFBundleExecutable + ${EXECUTABLE_NAME} + CFBundleName + ${PRODUCT_NAME} + CFBundleIconFile + + CFBundleIdentifier + com.apple.kext.${PRODUCT_NAME:identifier} + CFBundleInfoDictionaryVersion + 6.0 + CFBundlePackageType + KEXT + CFBundleSignature + ???? + CFBundleVersion + 1.0.0d1 + OSBundleLibraries + + com.apple.kpi.iokit + 9.0.0d7 + com.apple.kpi.libkern + 9.0.0d7 + com.apple.kpi.mach + 9.0.0d7 + + + diff --git a/libkern/c++/Tests/TestSerialization/test2/test2.pbproj/project.pbxproj b/libkern/c++/Tests/TestSerialization/test2/test2.pbproj/project.pbxproj deleted file mode 100644 index 431decfcd..000000000 --- a/libkern/c++/Tests/TestSerialization/test2/test2.pbproj/project.pbxproj +++ /dev/null @@ -1,270 +0,0 @@ -// !$*UTF8*$! -{ - archiveVersion = 1; - classes = { - }; - objectVersion = 39; - objects = { - 05D29F900382361902CA299A = { - fileEncoding = 30; - isa = PBXFileReference; - lastKnownFileType = sourcecode.cpp.cpp; - path = test2_main.cpp; - refType = 4; - sourceTree = ""; - }; - 05D29F910382361902CA299A = { - fileRef = 05D29F900382361902CA299A; - isa = PBXBuildFile; - settings = { - }; - }; -//050 -//051 -//052 -//053 -//054 -//060 -//061 -//062 -//063 -//064 - 06AA1261FFB20DD611CA28AA = { - buildActionMask = 2147483647; - files = ( - ); - isa = PBXShellScriptBuildPhase; - runOnlyForDeploymentPostprocessing = 0; - shellPath = /bin/sh; - shellScript = "script=\"${SYSTEM_DEVELOPER_DIR}/ProjectBuilder Extras/Kernel Extension Support/KEXTPostprocess\";\nif [ -x \"$script\" ]; then\n . \"$script\"\nfi"; - }; - 06AA1262FFB20DD611CA28AA = { - buildRules = ( - ); - buildSettings = { - COPY_PHASE_STRIP = NO; - GCC_DYNAMIC_NO_PIC = NO; - GCC_ENABLE_FIX_AND_CONTINUE = YES; - GCC_GENERATE_DEBUGGING_SYMBOLS = YES; - GCC_OPTIMIZATION_LEVEL = 0; - OPTIMIZATION_CFLAGS = "-O0"; - ZERO_LINK = YES; - }; - isa = PBXBuildStyle; - name = Development; - }; - 06AA1263FFB20DD611CA28AA = { - buildRules = ( - ); - buildSettings = { - COPY_PHASE_STRIP = YES; - GCC_ENABLE_FIX_AND_CONTINUE = NO; - ZERO_LINK = NO; - }; - isa = PBXBuildStyle; - name = Deployment; - }; - 06AA1268FFB211EB11CA28AA = { - buildActionMask = 2147483647; - files = ( - ); - isa = PBXShellScriptBuildPhase; - runOnlyForDeploymentPostprocessing = 0; - shellPath = /bin/sh; - shellScript = "script=\"${SYSTEM_DEVELOPER_DIR}/ProjectBuilder Extras/Kernel Extension Support/KEXTPreprocess\";\nif [ -x \"$script\" ]; then\n . \"$script\"\nfi"; - }; -//060 -//061 -//062 -//063 -//064 -//080 -//081 -//082 -//083 -//084 - 089C1669FE841209C02AAC07 = { - buildSettings = { - }; - buildStyles = ( - 06AA1262FFB20DD611CA28AA, - 06AA1263FFB20DD611CA28AA, - ); - hasScannedForEncodings = 1; - isa = PBXProject; - mainGroup = 089C166AFE841209C02AAC07; - projectDirPath = ""; - targets = ( - 089C1673FE841209C02AAC07, - ); - }; - 089C166AFE841209C02AAC07 = { - children = ( - 247142CAFF3F8F9811CA285C, - 19C28FB6FE9D52B211CA2CBB, - ); - isa = PBXGroup; - name = test2; - refType = 4; - sourceTree = ""; - }; - 089C1673FE841209C02AAC07 = { - buildPhases = ( - 06AA1268FFB211EB11CA28AA, - 089C1674FE841209C02AAC07, - 089C1675FE841209C02AAC07, - 089C1676FE841209C02AAC07, - 089C1677FE841209C02AAC07, - 089C1679FE841209C02AAC07, - 06AA1261FFB20DD611CA28AA, - ); - buildSettings = { - FRAMEWORK_SEARCH_PATHS = ""; - HEADER_SEARCH_PATHS = ""; - INSTALL_PATH = "$(SYSTEM_LIBRARY_DIR)/Extensions"; - KERNEL_MODULE = YES; - LIBRARY_SEARCH_PATHS = ""; - MODULE_NAME = com.MySoftwareCompany.kext.test2; - MODULE_START = test2_start; - MODULE_STOP = test2_stop; - MODULE_VERSION = 1.0.0d1; - OTHER_CFLAGS = ""; - OTHER_LDFLAGS = ""; - OTHER_REZFLAGS = ""; - PRODUCT_NAME = test2; - SECTORDER_FLAGS = ""; - WARNING_CFLAGS = "-Wmost -Wno-four-char-constants -Wno-unknown-pragmas"; - WRAPPER_EXTENSION = kext; - }; - dependencies = ( - ); - isa = PBXBundleTarget; - name = test2; - productInstallPath = "$(SYSTEM_LIBRARY_DIR)/Extensions"; - productName = test2; - productReference = 0A5A7D55FFB780D811CA28AA; - productSettingsXML = " - - - - CFBundleDevelopmentRegion - English - CFBundleExecutable - test2 - CFBundleIconFile - - CFBundleIdentifier - com.MySoftwareCompany.kext.test2 - CFBundleInfoDictionaryVersion - 6.0 - CFBundlePackageType - KEXT - CFBundleSignature - ???? - CFBundleVersion - 1.0.0d1 - OSBundleLibraries - - com.apple.kernel.libkern - 1.1 - - - -"; - }; - 089C1674FE841209C02AAC07 = { - buildActionMask = 2147483647; - files = ( - ); - isa = PBXHeadersBuildPhase; - runOnlyForDeploymentPostprocessing = 0; - }; - 089C1675FE841209C02AAC07 = { - buildActionMask = 2147483647; - files = ( - ); - isa = PBXResourcesBuildPhase; - runOnlyForDeploymentPostprocessing = 0; - }; - 089C1676FE841209C02AAC07 = { - buildActionMask = 2147483647; - files = ( - 05D29F910382361902CA299A, - ); - isa = PBXSourcesBuildPhase; - runOnlyForDeploymentPostprocessing = 0; - }; - 089C1677FE841209C02AAC07 = { - buildActionMask = 2147483647; - files = ( - ); - isa = PBXFrameworksBuildPhase; - runOnlyForDeploymentPostprocessing = 0; - }; - 089C1679FE841209C02AAC07 = { - buildActionMask = 2147483647; - files = ( - ); - isa = PBXRezBuildPhase; - runOnlyForDeploymentPostprocessing = 0; - }; -//080 -//081 -//082 -//083 -//084 -//0A0 -//0A1 -//0A2 -//0A3 -//0A4 - 0A5A7D55FFB780D811CA28AA = { - explicitFileType = wrapper.cfbundle; - fallbackIsa = PBXFileReference; - isa = PBXBundleReference; - path = test2.kext; - refType = 3; - sourceTree = BUILT_PRODUCTS_DIR; - }; -//0A0 -//0A1 -//0A2 -//0A3 -//0A4 -//190 -//191 -//192 -//193 -//194 - 19C28FB6FE9D52B211CA2CBB = { - children = ( - 0A5A7D55FFB780D811CA28AA, - ); - isa = PBXGroup; - name = Products; - refType = 4; - sourceTree = ""; - }; -//190 -//191 -//192 -//193 -//194 -//240 -//241 -//242 -//243 -//244 - 247142CAFF3F8F9811CA285C = { - children = ( - 05D29F900382361902CA299A, - ); - isa = PBXGroup; - name = Source; - path = ""; - refType = 4; - sourceTree = ""; - }; - }; - rootObject = 089C1669FE841209C02AAC07; -} diff --git a/libkern/c++/Tests/TestSerialization/test2/test2.xcodeproj/project.pbxproj b/libkern/c++/Tests/TestSerialization/test2/test2.xcodeproj/project.pbxproj new file mode 100644 index 000000000..8984a83a7 --- /dev/null +++ b/libkern/c++/Tests/TestSerialization/test2/test2.xcodeproj/project.pbxproj @@ -0,0 +1,240 @@ +// !$*UTF8*$! +{ + archiveVersion = 1; + classes = { + }; + objectVersion = 45; + objects = { + +/* Begin PBXBuildFile section */ + 00420FC60F57B813000C8EB0 /* test2_main.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 00420FC50F57B813000C8EB0 /* test2_main.cpp */; }; +/* End PBXBuildFile section */ + +/* Begin PBXFileReference section */ + 00420FC50F57B813000C8EB0 /* test2_main.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = test2_main.cpp; sourceTree = ""; }; + 32A4FEC30562C75700D090E7 /* Info.plist */ = {isa = PBXFileReference; lastKnownFileType = text.plist.xml; path = Info.plist; sourceTree = ""; }; + 32A4FEC40562C75800D090E7 /* test2.kext */ = {isa = PBXFileReference; explicitFileType = wrapper.cfbundle; includeInIndex = 0; path = test2.kext; sourceTree = BUILT_PRODUCTS_DIR; }; + D27513B306A6225300ADB3A4 /* Kernel.framework */ = {isa = PBXFileReference; lastKnownFileType = wrapper.framework; name = Kernel.framework; path = /System/Library/Frameworks/Kernel.framework; sourceTree = ""; }; +/* End PBXFileReference section */ + +/* Begin PBXFrameworksBuildPhase section */ + 32A4FEBF0562C75700D090E7 /* Frameworks */ = { + isa = PBXFrameworksBuildPhase; + buildActionMask = 2147483647; + files = ( + ); + runOnlyForDeploymentPostprocessing = 0; + }; +/* End PBXFrameworksBuildPhase section */ + +/* Begin PBXGroup section */ + 089C166AFE841209C02AAC07 /* test2 */ = { + isa = PBXGroup; + children = ( + 247142CAFF3F8F9811CA285C /* Source */, + 089C167CFE841241C02AAC07 /* Resources */, + D27513B306A6225300ADB3A4 /* Kernel.framework */, + 19C28FB6FE9D52B211CA2CBB /* Products */, + ); + name = test2; + sourceTree = ""; + }; + 089C167CFE841241C02AAC07 /* Resources */ = { + isa = PBXGroup; + children = ( + 32A4FEC30562C75700D090E7 /* Info.plist */, + ); + name = Resources; + sourceTree = ""; + }; + 19C28FB6FE9D52B211CA2CBB /* Products */ = { + isa = PBXGroup; + children = ( + 32A4FEC40562C75800D090E7 /* test2.kext */, + ); + name = Products; + sourceTree = ""; + }; + 247142CAFF3F8F9811CA285C /* Source */ = { + isa = PBXGroup; + children = ( + 00420FC50F57B813000C8EB0 /* test2_main.cpp */, + ); + name = Source; + sourceTree = ""; + }; +/* End PBXGroup section */ + +/* Begin PBXHeadersBuildPhase section */ + 32A4FEBA0562C75700D090E7 /* Headers */ = { + isa = PBXHeadersBuildPhase; + buildActionMask = 2147483647; + files = ( + ); + runOnlyForDeploymentPostprocessing = 0; + }; +/* End PBXHeadersBuildPhase section */ + +/* Begin PBXNativeTarget section */ + 32A4FEB80562C75700D090E7 /* test2 */ = { + isa = PBXNativeTarget; + buildConfigurationList = 1DEB91C308733DAC0010E9CD /* Build configuration list for PBXNativeTarget "test2" */; + buildPhases = ( + 32A4FEBA0562C75700D090E7 /* Headers */, + 32A4FEBB0562C75700D090E7 /* Resources */, + 32A4FEBD0562C75700D090E7 /* Sources */, + 32A4FEBF0562C75700D090E7 /* Frameworks */, + 32A4FEC00562C75700D090E7 /* Rez */, + ); + buildRules = ( + ); + dependencies = ( + ); + name = test2; + productInstallPath = "$(SYSTEM_LIBRARY_DIR)/Extensions"; + productName = test2; + productReference = 32A4FEC40562C75800D090E7 /* test2.kext */; + productType = "com.apple.product-type.kernel-extension"; + }; +/* End PBXNativeTarget section */ + +/* Begin PBXProject section */ + 089C1669FE841209C02AAC07 /* Project object */ = { + isa = PBXProject; + buildConfigurationList = 1DEB91C708733DAC0010E9CD /* Build configuration list for PBXProject "test2" */; + compatibilityVersion = "Xcode 3.1"; + hasScannedForEncodings = 1; + mainGroup = 089C166AFE841209C02AAC07 /* test2 */; + projectDirPath = ""; + projectRoot = ""; + targets = ( + 32A4FEB80562C75700D090E7 /* test2 */, + ); + }; +/* End PBXProject section */ + +/* Begin PBXResourcesBuildPhase section */ + 32A4FEBB0562C75700D090E7 /* Resources */ = { + isa = PBXResourcesBuildPhase; + buildActionMask = 2147483647; + files = ( + ); + runOnlyForDeploymentPostprocessing = 0; + }; +/* End PBXResourcesBuildPhase section */ + +/* Begin PBXRezBuildPhase section */ + 32A4FEC00562C75700D090E7 /* Rez */ = { + isa = PBXRezBuildPhase; + buildActionMask = 2147483647; + files = ( + ); + runOnlyForDeploymentPostprocessing = 0; + }; +/* End PBXRezBuildPhase section */ + +/* Begin PBXSourcesBuildPhase section */ + 32A4FEBD0562C75700D090E7 /* Sources */ = { + isa = PBXSourcesBuildPhase; + buildActionMask = 2147483647; + files = ( + 00420FC60F57B813000C8EB0 /* test2_main.cpp in Sources */, + ); + runOnlyForDeploymentPostprocessing = 0; + }; +/* End PBXSourcesBuildPhase section */ + +/* Begin XCBuildConfiguration section */ + 1DEB91C408733DAC0010E9CD /* Debug */ = { + isa = XCBuildConfiguration; + buildSettings = { + ALWAYS_SEARCH_USER_PATHS = NO; + ARCHS = "$(ARCHS_STANDARD_32_64_BIT)"; + COPY_PHASE_STRIP = NO; + GCC_DYNAMIC_NO_PIC = NO; + GCC_MODEL_TUNING = G5; + GCC_OPTIMIZATION_LEVEL = 0; + INFOPLIST_FILE = Info.plist; + INSTALL_PATH = "$(SYSTEM_LIBRARY_DIR)/Extensions"; + MODULE_NAME = com.yourcompany.kext.test2; + MODULE_START = test2_start; + MODULE_STOP = test2_stop; + MODULE_VERSION = 1.0.0d1; + ONLY_ACTIVE_ARCH = NO; + PRODUCT_NAME = test2; + SDKROOT = ""; + WRAPPER_EXTENSION = kext; + }; + name = Debug; + }; + 1DEB91C508733DAC0010E9CD /* Release */ = { + isa = XCBuildConfiguration; + buildSettings = { + ALWAYS_SEARCH_USER_PATHS = NO; + ARCHS = "$(ARCHS_STANDARD_32_64_BIT)"; + DEBUG_INFORMATION_FORMAT = "dwarf-with-dsym"; + GCC_MODEL_TUNING = G5; + INFOPLIST_FILE = Info.plist; + INSTALL_PATH = "$(SYSTEM_LIBRARY_DIR)/Extensions"; + MODULE_NAME = com.yourcompany.kext.test2; + MODULE_START = test2_start; + MODULE_STOP = test2_stop; + MODULE_VERSION = 1.0.0d1; + ONLY_ACTIVE_ARCH = NO; + PRODUCT_NAME = test2; + SDKROOT = ""; + WRAPPER_EXTENSION = kext; + }; + name = Release; + }; + 1DEB91C808733DAC0010E9CD /* Debug */ = { + isa = XCBuildConfiguration; + buildSettings = { + ARCHS = "$(ARCHS_STANDARD_32_BIT)"; + GCC_C_LANGUAGE_STANDARD = c99; + GCC_OPTIMIZATION_LEVEL = 0; + GCC_WARN_ABOUT_RETURN_TYPE = YES; + GCC_WARN_UNUSED_VARIABLE = YES; + ONLY_ACTIVE_ARCH = YES; + PREBINDING = NO; + SDKROOT = macosx10.5; + }; + name = Debug; + }; + 1DEB91C908733DAC0010E9CD /* Release */ = { + isa = XCBuildConfiguration; + buildSettings = { + ARCHS = "$(ARCHS_STANDARD_32_BIT)"; + GCC_C_LANGUAGE_STANDARD = c99; + GCC_WARN_ABOUT_RETURN_TYPE = YES; + GCC_WARN_UNUSED_VARIABLE = YES; + PREBINDING = NO; + SDKROOT = macosx10.5; + }; + name = Release; + }; +/* End XCBuildConfiguration section */ + +/* Begin XCConfigurationList section */ + 1DEB91C308733DAC0010E9CD /* Build configuration list for PBXNativeTarget "test2" */ = { + isa = XCConfigurationList; + buildConfigurations = ( + 1DEB91C408733DAC0010E9CD /* Debug */, + 1DEB91C508733DAC0010E9CD /* Release */, + ); + defaultConfigurationIsVisible = 0; + defaultConfigurationName = Release; + }; + 1DEB91C708733DAC0010E9CD /* Build configuration list for PBXProject "test2" */ = { + isa = XCConfigurationList; + buildConfigurations = ( + 1DEB91C808733DAC0010E9CD /* Debug */, + 1DEB91C908733DAC0010E9CD /* Release */, + ); + defaultConfigurationIsVisible = 0; + defaultConfigurationName = Release; + }; +/* End XCConfigurationList section */ + }; + rootObject = 089C1669FE841209C02AAC07 /* Project object */; +} diff --git a/libkern/c++/Tests/TestSerialization/test2/test2_main.cpp b/libkern/c++/Tests/TestSerialization/test2/test2_main.cpp index a8fe830e9..58ac14e1b 100644 --- a/libkern/c++/Tests/TestSerialization/test2/test2_main.cpp +++ b/libkern/c++/Tests/TestSerialization/test2/test2_main.cpp @@ -39,12 +39,17 @@ __END_DECLS #include #include -char *testBuffer = +const char *testBuffer = " \n" " \n" " \n" " \n" " \n" +" \n" +" \n" + " \n" " key true \n" @@ -64,7 +69,7 @@ char *testBuffer = " key s0 \n" " key s1 string 1 \n" " key s2 string 2 \n" -" key mr � mac roman copyright � \n" +" key mr � mac roman copyright � \n" " key uft8 \xc2\xa9 utf-8 copyright \xc2\xa9 \n" " key <&> <&> \n" diff --git a/libkern/conf/MASTER b/libkern/conf/MASTER index 611d12999..2ca7aafb2 100644 --- a/libkern/conf/MASTER +++ b/libkern/conf/MASTER @@ -64,3 +64,6 @@ options CONFIG_NO_PRINTF_STRINGS # options CONFIG_NO_KPRINTF_STRINGS # options IPSEC # IP security # + +options CONFIG_KXLD # kxld/runtime linking of kexts # + diff --git a/libkern/conf/MASTER.i386 b/libkern/conf/MASTER.i386 index c8a93d539..2bf2e22da 100644 --- a/libkern/conf/MASTER.i386 +++ b/libkern/conf/MASTER.i386 @@ -1,8 +1,9 @@ ###################################################################### # -# RELEASE = [ intel mach libkerncpp networking config_dtrace crypto zlib ] +# RELEASE = [ intel mach libkerncpp networking config_dtrace crypto zlib config_kxld ] # PROFILE = [ RELEASE profile ] -# DEBUG = [ RELEASE debug ] +# DEBUG = [ RELEASE debug mach_kdb ] +# # # EMBEDDED = [ intel mach libkerncpp networking crypto zlib ] # DEVELOPMENT = [ EMBEDDED config_dtrace ] @@ -11,3 +12,7 @@ machine "i386" # cpu "i386" # + +options MACH_KDB # # + +options NO_NESTED_PMAP # diff --git a/libkern/conf/MASTER.ppc b/libkern/conf/MASTER.ppc index 2b8994142..21e317660 100644 --- a/libkern/conf/MASTER.ppc +++ b/libkern/conf/MASTER.ppc @@ -4,10 +4,10 @@ # Standard Apple MacOS X Configurations: # -------- ---- -------- --------------- # -# RELEASE = [ppc mach libkerncpp networking config_dtrace crypto zlib] +# RELEASE = [ppc mach libkerncpp networking config_dtrace crypto zlib config_kxld ] # DEVELOPMENT = [ RELEASE ] # PROFILE = [RELEASE profile] -# DEBUG = [RELEASE debug] +# DEBUG = [RELEASE debug mach_kdb ] # RELEASE_TRACE = [ RELEASE kdebug ] # DEBUG_TRACE = [ DEBUG kdebug ] # @@ -16,3 +16,4 @@ machine "ppc" # cpu "ppc" # +options MACH_KDB # # diff --git a/libkern/conf/MASTER.x86_64 b/libkern/conf/MASTER.x86_64 new file mode 100644 index 000000000..da71fbe23 --- /dev/null +++ b/libkern/conf/MASTER.x86_64 @@ -0,0 +1,18 @@ +###################################################################### +# +# RELEASE = [ intel mach libkerncpp networking config_dtrace crypto zlib config_kxld ] +# PROFILE = [ RELEASE profile ] +# DEBUG = [ RELEASE debug mach_kdb ] +# +# +# EMBEDDED = [ intel mach libkerncpp networking crypto zlib ] +# DEVELOPMENT = [ EMBEDDED ] +# +###################################################################### + +machine "x86_64" # +cpu "x86_64" # + +options MACH_KDB # # + +options NO_NESTED_PMAP # diff --git a/libkern/conf/Makefile b/libkern/conf/Makefile index f499460ea..8f54b1af4 100644 --- a/libkern/conf/Makefile +++ b/libkern/conf/Makefile @@ -35,7 +35,6 @@ $(COMPOBJROOT)/$(LIBKERN_KERNEL_CONFIG)/Makefile : $(SOURCE)/MASTER \ $(SOURCE)/files.$(ARCH_CONFIG_LC) \ $(COMPOBJROOT)/doconf $(_v)(doconf_target=$(addsuffix /conf, $(TARGET)); \ - echo $${doconf_target};\ $(MKDIR) $${doconf_target}; \ cd $${doconf_target}; \ rm -f $(notdir $?); \ diff --git a/libkern/conf/Makefile.i386 b/libkern/conf/Makefile.i386 index 3dab7b368..3695a666c 100644 --- a/libkern/conf/Makefile.i386 +++ b/libkern/conf/Makefile.i386 @@ -2,40 +2,6 @@ #BEGIN Machine dependent Makefile fragment for i386 ###################################################################### -# Enable -Werror for i386 builds -CFLAGS+= $(WERROR) -CWARNFLAGS= $(filter-out -Wbad-function-cast, $(CWARNFLAGS_STD)) - -# Objects that don't compile cleanly in libkern/c++: -OBJS_NO_WERROR= \ - ioconf.o \ - OSRuntimeSupport.o \ - OSMetaClass.cpo \ - OSArray.cpo \ - OSBoolean.cpo \ - OSCollectionIterator.cpo \ - OSCollection.cpo \ - OSData.cpo \ - OSDictionary.cpo \ - OSNumber.cpo \ - OSObject.cpo \ - OSOrderedSet.cpo \ - OSRuntime.cpo \ - OSSerialize.cpo \ - OSString.cpo \ - OSSymbol.cpo \ - OSUnserialize.cpo \ - OSIterator.cpo \ - OSSet.cpo \ - scanf.o \ - OSUnserializeXML.cpo \ - zlib.o \ - uuid.o - -OBJS_WERROR=$(filter-out $(OBJS_NO_WERROR),$(OBJS)) - -$(OBJS_WERROR): WERROR=-Werror - ###################################################################### #END Machine dependent Makefile fragment for i386 ###################################################################### diff --git a/libkern/conf/Makefile.ppc b/libkern/conf/Makefile.ppc index 2b438f2fa..cd79f229a 100644 --- a/libkern/conf/Makefile.ppc +++ b/libkern/conf/Makefile.ppc @@ -2,7 +2,6 @@ #BEGIN Machine dependent Makefile fragment for ppc ###################################################################### - ###################################################################### #END Machine dependent Makefile fragment for ppc ###################################################################### diff --git a/libkern/conf/Makefile.template b/libkern/conf/Makefile.template index 0a4000c19..005aa9ca3 100644 --- a/libkern/conf/Makefile.template +++ b/libkern/conf/Makefile.template @@ -11,7 +11,7 @@ export IDENT # -# XXX: INCFLAGS +# INCFLAGS # INCFLAGS_MAKEFILE= @@ -24,12 +24,22 @@ include $(MakeInc_cmd) include $(MakeInc_def) # -# XXX: CFLAGS +# CFLAGS # -CFLAGS+= -imacros meta_features.h -DKERNEL -DLIBKERN_KERNEL_PRIVATE -DOSALLOCDEBUG=1 \ - -Wall -Wno-four-char-constants -fno-common $(CFLAGS_INLINE_CONFIG) +CFLAGS+= -imacros meta_features.h -DLIBKERN_KERNEL_PRIVATE -DOSALLOCDEBUG=1 \ + -Werror $(CFLAGS_INLINE_CONFIG) -SFLAGS+= -DKERNEL +# zlib is 3rd party source +compress.o_CWARNFLAGS_ADD = -Wno-cast-qual +deflate.o_CWARNFLAGS_ADD = -Wno-cast-qual +infback.o_CWARNFLAGS_ADD = -Wno-cast-qual +inffast.o_CWARNFLAGS_ADD = -Wno-cast-qual +inflate.o_CWARNFLAGS_ADD = -Wno-cast-qual +trees.o_CWARNFLAGS_ADD = -Wno-cast-qual +uncompr.o_CWARNFLAGS_ADD = -Wno-cast-qual + +# warnings in bison-generated code +OSUnserializeXML.cpo_CXXWARNFLAGS_ADD = -Wno-uninitialized # # Directories for mig generated files @@ -76,8 +86,10 @@ ${OBJS}: ${OBJSDEPS} LDOBJS = $(OBJS) $(COMPONENT).o: $(LDOBJS) - @echo LD $(COMPONENT) - $(_v)$(LD) $(LDFLAGS_COMPONENT) -o $(COMPONENT).o ${LDOBJS} + @echo LDFILELIST $(COMPONENT) + $(_v)( for obj in ${LDOBJS}; do \ + echo $(TARGET)$(COMP_OBJ_DIR)/$(KERNEL_CONFIG)/$${obj}; \ + done; ) > $(COMPONENT).o do_all: $(COMPONENT).o diff --git a/libkern/conf/Makefile.x86_64 b/libkern/conf/Makefile.x86_64 new file mode 100644 index 000000000..7b0de925d --- /dev/null +++ b/libkern/conf/Makefile.x86_64 @@ -0,0 +1,7 @@ +###################################################################### +#BEGIN Machine dependent Makefile fragment for x86_64 +###################################################################### + +###################################################################### +#END Machine dependent Makefile fragment for x86_64 +###################################################################### diff --git a/libkern/conf/files b/libkern/conf/files index f1c59a5a8..c763d0ac4 100644 --- a/libkern/conf/files +++ b/libkern/conf/files @@ -23,6 +23,7 @@ libkern/c++/OSCollectionIterator.cpp optional libkerncpp libkern/c++/OSData.cpp optional libkerncpp libkern/c++/OSDictionary.cpp optional libkerncpp libkern/c++/OSIterator.cpp optional libkerncpp +libkern/c++/OSKext.cpp optional libkerncpp libkern/c++/OSNumber.cpp optional libkerncpp libkern/c++/OSOrderedSet.cpp optional libkerncpp libkern/c++/OSRuntime.cpp optional libkerncpp @@ -34,14 +35,48 @@ libkern/c++/OSSymbol.cpp optional libkerncpp libkern/c++/OSUnserialize.cpp optional libkerncpp libkern/c++/OSUnserializeXML.cpp optional libkerncpp +libkern/OSKextLib.cpp standard libkerncpp +libkern/mkext.c standard +libkern/OSKextVersion.c standard + libkern/stdio/scanf.c standard libkern/uuid/uuid.c standard -libkern/zlib.c optional zlib networking -libkern/zlib.c optional ipsec +libkern/kernel_mach_header.c standard + +libkern/zlib/adler32.c optional zlib +libkern/zlib/compress.c optional zlib +libkern/zlib/crc32.c optional zlib +libkern/zlib/deflate.c optional zlib +#libkern/zlib/gzio.c not needed for kernel optional zlib +libkern/zlib/infback.c optional zlib +libkern/zlib/inffast.c optional zlib +libkern/zlib/inflate.c optional zlib +libkern/zlib/inftrees.c optional zlib +libkern/zlib/trees.c optional zlib +libkern/zlib/uncompr.c optional zlib +libkern/zlib/zutil.c optional zlib libkern/crypto/md5.c optional crypto libkern/crypto/md5.c optional networking libkern/crypto/sha1.c optional crypto libkern/crypto/sha1.c optional ipsec + +libkern/stack_protector.c standard + +libkern/kxld/kxld.c optional config_kxld +libkern/kxld/kxld_array.c optional config_kxld +libkern/kxld/kxld_copyright.c optional config_kxld +libkern/kxld/kxld_dict.c optional config_kxld +libkern/kxld/kxld_kext.c optional config_kxld +libkern/kxld/kxld_reloc.c optional config_kxld +libkern/kxld/kxld_sect.c optional config_kxld +libkern/kxld/kxld_seg.c optional config_kxld +libkern/kxld/kxld_state.c optional config_kxld +libkern/kxld/kxld_sym.c optional config_kxld +libkern/kxld/kxld_symtab.c optional config_kxld +libkern/kxld/kxld_util.c optional config_kxld +libkern/kxld/kxld_uuid.c optional config_kxld +libkern/kxld/kxld_vtable.c optional config_kxld +libkern/kxld/kxld_stubs.c standard diff --git a/libkern/conf/files.x86_64 b/libkern/conf/files.x86_64 new file mode 100644 index 000000000..bcf047445 --- /dev/null +++ b/libkern/conf/files.x86_64 @@ -0,0 +1 @@ +libkern/x86_64/OSAtomic.s standard diff --git a/libkern/crypto/sha1.c b/libkern/crypto/sha1.c index 7924e6382..69e9eec42 100644 --- a/libkern/crypto/sha1.c +++ b/libkern/crypto/sha1.c @@ -231,9 +231,9 @@ void sha1_hardware_hook(Boolean option, InKernelPerformSHA1Func func, void *ref) { if(option) { // Establish the hook. The hardware is ready. - OSCompareAndSwap((uintptr_t)NULL, (uintptr_t)ref, (uintptr_t *)&SHA1Ref); + OSCompareAndSwapPtr((void*)NULL, (void*)ref, (void * volatile*)&SHA1Ref); - if(!OSCompareAndSwap((uintptr_t)NULL, (uintptr_t)func, (uintptr_t *)&performSHA1WithinKernelOnly)) { + if(!OSCompareAndSwapPtr((void *)NULL, (void *)func, (void * volatile *)&performSHA1WithinKernelOnly)) { panic("sha1_hardware_hook: Called twice.. Should never happen\n"); } } diff --git a/libkern/gen/OSAtomicOperations.c b/libkern/gen/OSAtomicOperations.c index bbdb0d970..aeeb09364 100644 --- a/libkern/gen/OSAtomicOperations.c +++ b/libkern/gen/OSAtomicOperations.c @@ -54,52 +54,63 @@ enum { * Like standards, there are a lot of atomic ops to choose from! */ -#ifndef __ppc__ +#if !defined(__ppc__) && !defined(__i386__) && !defined(__x86_64__) +/* Implemented in assembly for ppc and i386 and x86_64 */ +#undef OSAddAtomic +SInt32 +OSAddAtomic(SInt32 amount, volatile SInt32 * value) +{ + SInt32 oldValue; + SInt32 newValue; + do { + oldValue = *value; + newValue = oldValue + amount; + } while (!OSCompareAndSwap((UInt32)oldValue, + (UInt32)newValue, + (volatile UInt32 *) value)); + return oldValue; +} -SInt32 OSIncrementAtomic(volatile SInt32 * value) +long +OSAddAtomicLong(long theAmount, volatile long *address) { - return OSAddAtomic(1, value); +#if __LP64__ +#error Unimplemented +#else + return (long)OSAddAtomic((SInt32)theAmount, address); +#endif } -SInt32 OSDecrementAtomic(volatile SInt32 * value) +/* Implemented as an assembly alias for i386 and linker alias for ppc */ +#undef OSCompareAndSwapPtr +Boolean OSCompareAndSwapPtr(void *oldValue, void *newValue, + void * volatile *address) { - return OSAddAtomic(-1, value); +#if __LP64__ + return OSCompareAndSwap64((UInt64)oldValue, (UInt64)newValue, + (volatile UInt64 *)address); +#else + return OSCompareAndSwap((UInt32)oldValue, (UInt32)newValue, + (volatile UInt32 *)address); +#endif } +#endif + +#ifndef __ppc__ +/* Implemented as assembly for ppc */ -#ifdef CMPXCHG8B -void * OSDequeueAtomic(void * volatile * inList, SInt32 inOffset) +#undef OSIncrementAtomic +SInt32 OSIncrementAtomic(volatile SInt32 * value) { - /* The _pointer_ is volatile, not the listhead itself */ - void * volatile oldListHead; - void * volatile newListHead; - - do { - oldListHead = *inList; - if (oldListHead == NULL) { - break; - } - - newListHead = *(void * volatile *) (((char *) oldListHead) + inOffset); - } while (! OSCompareAndSwap((UInt32)oldListHead, - (UInt32)newListHead, (volatile UInt32 *)inList)); - return oldListHead; + return OSAddAtomic(1, value); } -void OSEnqueueAtomic(void * volatile * inList, void * inNewLink, SInt32 inOffset) +#undef OSDecrementAtomic +SInt32 OSDecrementAtomic(volatile SInt32 * value) { - /* The _pointer_ is volatile, not the listhead itself */ - void * volatile oldListHead; - void * volatile newListHead = inNewLink; - void * volatile * newLinkNextPtr = (void * volatile *) (((char *) inNewLink) + inOffset); - - do { - oldListHead = *inList; - *newLinkNextPtr = oldListHead; - } while (! OSCompareAndSwap((UInt32)oldListHead, (UInt32)newListHead, - (volatile UInt32 *)inList)); + return OSAddAtomic(-1, value); } -#endif /* CMPXCHG8B */ #endif /* !__ppc__ */ static UInt32 OSBitwiseAtomic(UInt32 and_mask, UInt32 or_mask, UInt32 xor_mask, volatile UInt32 * value) @@ -115,16 +126,19 @@ static UInt32 OSBitwiseAtomic(UInt32 and_mask, UInt32 or_mask, UInt32 xor_mask, return oldValue; } +#undef OSBitAndAtomic UInt32 OSBitAndAtomic(UInt32 mask, volatile UInt32 * value) { return OSBitwiseAtomic(mask, 0, 0, value); } +#undef OSBitOrAtomic UInt32 OSBitOrAtomic(UInt32 mask, volatile UInt32 * value) { return OSBitwiseAtomic((UInt32) -1, mask, 0, value); } +#undef OSBitXorAtomic UInt32 OSBitXorAtomic(UInt32 mask, volatile UInt32 * value) { return OSBitwiseAtomic((UInt32) -1, 0, mask, value); @@ -133,10 +147,10 @@ UInt32 OSBitXorAtomic(UInt32 mask, volatile UInt32 * value) static Boolean OSCompareAndSwap8(UInt8 oldValue8, UInt8 newValue8, volatile UInt8 * value8) { UInt32 mask = 0x000000ff; - UInt32 alignment = ((UInt32) value8) & (sizeof(UInt32) - 1); + UInt32 alignment = (UInt32)((unsigned long) value8) & (sizeof(UInt32) - 1); UInt32 shiftValues = (24 << 24) | (16 << 16) | (8 << 8); int shift = (UInt32) *(((UInt8 *) &shiftValues) + alignment); - volatile UInt32 * value32 = (volatile UInt32 *) (value8 - alignment); + volatile UInt32 * value32 = (volatile UInt32 *) ((uintptr_t)value8 - alignment); UInt32 oldValue; UInt32 newValue; @@ -237,10 +251,10 @@ UInt8 OSBitXorAtomic8(UInt32 mask, volatile UInt8 * value) static Boolean OSCompareAndSwap16(UInt16 oldValue16, UInt16 newValue16, volatile UInt16 * value16) { UInt32 mask = 0x0000ffff; - UInt32 alignment = ((UInt32) value16) & (sizeof(UInt32) - 1); + UInt32 alignment = (UInt32)((unsigned long) value16) & (sizeof(UInt32) - 1); UInt32 shiftValues = (16 << 24) | (16 << 16); UInt32 shift = (UInt32) *(((UInt8 *) &shiftValues) + alignment); - volatile UInt32 * value32 = (volatile UInt32 *) (((UInt32) value16) - alignment); + volatile UInt32 * value32 = (volatile UInt32 *) (((unsigned long) value16) - alignment); UInt32 oldValue; UInt32 newValue; diff --git a/libkern/gen/OSDebug.cpp b/libkern/gen/OSDebug.cpp index 3690714e8..de1d99372 100644 --- a/libkern/gen/OSDebug.cpp +++ b/libkern/gen/OSDebug.cpp @@ -34,12 +34,14 @@ #include #include #include -#include +#include #include // From bsd's libkern directory #include #include +#include + extern int etext; __BEGIN_DECLS // From osmfk/kern/thread.h but considered to be private @@ -53,11 +55,13 @@ extern addr64_t kvtophys(vm_offset_t va); __END_DECLS -static mutex_t *sOSReportLock = mutex_alloc(0); +extern lck_grp_t *IOLockGroup; + +static lck_mtx_t *sOSReportLock = lck_mtx_alloc_init(IOLockGroup, LCK_ATTR_NULL); /* Use kernel_debug() to log a backtrace */ void -trace_backtrace(unsigned int debugid, unsigned int debugid2, int size, int data) { +trace_backtrace(uint32_t debugid, uint32_t debugid2, uintptr_t size, uintptr_t data) { void *bt[16]; const unsigned cnt = sizeof(bt) / sizeof(bt[0]); unsigned i; @@ -78,7 +82,7 @@ trace_backtrace(unsigned int debugid, unsigned int debugid2, int size, int data) */ if (!found) i=2; -#define safe_bt(a) (int)(a VM_MIN_KERNEL_ADDRESS) && + return ((raddr > VM_MIN_KERNEL_AND_KEXT_ADDRESS) && + (raddr < VM_MAX_KERNEL_ADDRESS)); +} +#endif + +#if __x86_64__ +#define x86_64_RETURN_OFFSET 8 +static unsigned int +x86_64_validate_raddr(vm_offset_t raddr) +{ + return ((raddr > VM_MIN_KERNEL_AND_KEXT_ADDRESS) && (raddr < VM_MAX_KERNEL_ADDRESS)); } +static unsigned int +x86_64_validate_stackptr(vm_offset_t stackptr) +{ + /* Existence and alignment check + */ + if (!stackptr || (stackptr & 0x7) || !x86_64_validate_raddr(stackptr)) + return 0; + + /* Is a virtual->physical translation present? + */ + if (!kvtophys(stackptr)) + return 0; + + /* Check if the return address lies on the same page; + * If not, verify that a translation exists. + */ + if (((PAGE_SIZE - (stackptr & PAGE_MASK)) < x86_64_RETURN_OFFSET) && + !kvtophys(stackptr + x86_64_RETURN_OFFSET)) + return 0; + return 1; +} #endif + unsigned OSBacktrace(void **bt, unsigned maxAddrs) { unsigned frame; @@ -165,7 +201,7 @@ unsigned OSBacktrace(void **bt, unsigned maxAddrs) stackptr_prev = stackptr; stackptr = mem[stackptr_prev >> 2]; - if ((stackptr_prev ^ stackptr) > 8 * 1024) // Sanity check + if ((stackptr - stackptr_prev) > 8 * 1024) // Sanity check break; vm_offset_t addr = mem[(stackptr >> 2) + 2]; @@ -178,7 +214,7 @@ unsigned OSBacktrace(void **bt, unsigned maxAddrs) for ( ; i < maxAddrs; i++) bt[i] = (void *) 0; #elif __i386__ -#define SANE_i386_FRAME_SIZE 8*1024 +#define SANE_i386_FRAME_SIZE (kernel_stack_size >> 1) vm_offset_t stackptr, stackptr_prev, raddr; unsigned frame_index = 0; /* Obtain current frame pointer */ @@ -204,7 +240,7 @@ unsigned OSBacktrace(void **bt, unsigned maxAddrs) if (stackptr < stackptr_prev) break; - if ((stackptr_prev ^ stackptr) > SANE_i386_FRAME_SIZE) + if ((stackptr - stackptr_prev) > SANE_i386_FRAME_SIZE) break; raddr = *((vm_offset_t *) (stackptr + i386_RETURN_OFFSET)); @@ -217,6 +253,49 @@ unsigned OSBacktrace(void **bt, unsigned maxAddrs) pad: frame = frame_index; + for ( ; frame_index < maxAddrs; frame_index++) + bt[frame_index] = (void *) 0; +#elif __x86_64__ +#define SANE_x86_64_FRAME_SIZE (kernel_stack_size >> 1) + vm_offset_t stackptr, stackptr_prev, raddr; + unsigned frame_index = 0; +/* Obtain current frame pointer */ + + __asm__ volatile("movq %%rbp, %0" : "=m" (stackptr)); + + if (!x86_64_validate_stackptr(stackptr)) + goto pad; + + raddr = *((vm_offset_t *) (stackptr + x86_64_RETURN_OFFSET)); + + if (!x86_64_validate_raddr(raddr)) + goto pad; + + bt[frame_index++] = (void *) raddr; + + for ( ; frame_index < maxAddrs; frame_index++) { + stackptr_prev = stackptr; + stackptr = *((vm_offset_t *) stackptr_prev); + + if (!x86_64_validate_stackptr(stackptr)) + break; + /* Stack grows downwards */ + if (stackptr < stackptr_prev) + break; + + if ((stackptr - stackptr_prev) > SANE_x86_64_FRAME_SIZE) + break; + + raddr = *((vm_offset_t *) (stackptr + x86_64_RETURN_OFFSET)); + + if (!x86_64_validate_raddr(raddr)) + break; + + bt[frame_index] = (void *) raddr; + } +pad: + frame = frame_index; + for ( ; frame_index < maxAddrs; frame_index++) bt[frame_index] = (void *) 0; #else @@ -224,3 +303,4 @@ unsigned OSBacktrace(void **bt, unsigned maxAddrs) #endif return frame; } + diff --git a/libkern/i386/OSAtomic.s b/libkern/i386/OSAtomic.s index 2be21d1a8..72adbb29d 100644 --- a/libkern/i386/OSAtomic.s +++ b/libkern/i386/OSAtomic.s @@ -31,8 +31,10 @@ #;*************************************************************************** .globl _OSCompareAndSwap + .globl _OSCompareAndSwapPtr _OSCompareAndSwap: +_OSCompareAndSwapPtr: movl 4(%esp), %eax #; oldValue movl 8(%esp), %edx #; newValue movl 12(%esp), %ecx #; ptr @@ -96,7 +98,9 @@ _OSAddAtomic64: #;******************************************************* .globl _OSAddAtomic + .globl _OSAddAtomicLong _OSAddAtomic: +_OSAddAtomicLong: movl 4(%esp), %eax #; Load addend movl 8(%esp), %ecx #; Load address of operand lock diff --git a/libkern/kernel_mach_header.c b/libkern/kernel_mach_header.c new file mode 100644 index 000000000..e0830d99d --- /dev/null +++ b/libkern/kernel_mach_header.c @@ -0,0 +1,363 @@ +/* + * Copyright (c) 2000-2008 Apple Inc. All rights reserved. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. The rights granted to you under the License + * may not be used to create, or enable the creation or redistribution of, + * unlawful or unlicensed copies of an Apple operating system, or to + * circumvent, violate, or enable the circumvention or violation of, any + * terms of an Apple operating system software license agreement. + * + * Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ + */ +/* + * File: libkern/kernel_mach_header.c + * + * Functions for accessing mach-o headers. + * + * NOTE: This file supports only kernel mach headers at the present + * time; it's primary use is by kld, and all externally + * referenced routines at the present time operate against + * the kernel mach header _mh_execute_header, which is the + * header for the currently executing kernel. + * + */ + +#include +#include +#include +#include // from libsa + +/* + * return the last address (first avail) + * + * This routine operates against the currently executing kernel only + */ +vm_offset_t +getlastaddr(void) +{ + kernel_segment_command_t *sgp; + vm_offset_t last_addr = 0; + kernel_mach_header_t *header = &_mh_execute_header; + unsigned long i; + + sgp = (kernel_segment_command_t *) + ((uintptr_t)header + sizeof(kernel_mach_header_t)); + for (i = 0; i < header->ncmds; i++){ + if ( sgp->cmd == LC_SEGMENT_KERNEL) { + if (sgp->vmaddr + sgp->vmsize > last_addr) + last_addr = sgp->vmaddr + sgp->vmsize; + } + sgp = (kernel_segment_command_t *)((uintptr_t)sgp + sgp->cmdsize); + } + return last_addr; +} + +/* + * This routine returns the a pointer to the data for the named section in the + * named segment if it exist in the mach header passed to it. Also it returns + * the size of the section data indirectly through the pointer size. Otherwise + * it returns zero for the pointer and the size. + * + * This routine can operate against any kernel mach header. + */ +void * +getsectdatafromheader( + kernel_mach_header_t *mhp, + const char *segname, + const char *sectname, + unsigned long *size) +{ + const kernel_section_t *sp; + void *result; + + sp = getsectbynamefromheader(mhp, segname, sectname); + if(sp == (kernel_section_t *)0){ + *size = 0; + return((char *)0); + } + *size = sp->size; + result = (void *)sp->addr; + return result; +} + +/* + * This routine returns the a pointer to the data for the named segment + * if it exist in the mach header passed to it. Also it returns + * the size of the segment data indirectly through the pointer size. + * Otherwise it returns zero for the pointer and the size. + */ +void * +getsegdatafromheader( + kernel_mach_header_t *mhp, + const char *segname, + unsigned long *size) +{ + const kernel_segment_command_t *sc; + void *result; + + sc = getsegbynamefromheader(mhp, segname); + if(sc == (kernel_segment_command_t *)0){ + *size = 0; + return((char *)0); + } + *size = sc->vmsize; + result = (void *)sc->vmaddr; + return result; +} + +/* + * This routine returns the section structure for the named section in the + * named segment for the mach_header pointer passed to it if it exist. + * Otherwise it returns zero. + * + * This routine can operate against any kernel mach header. + */ +kernel_section_t * +getsectbynamefromheader( + kernel_mach_header_t *mhp, + const char *segname, + const char *sectname) +{ + kernel_segment_command_t *sgp; + kernel_section_t *sp; + unsigned long i, j; + + sgp = (kernel_segment_command_t *) + ((uintptr_t)mhp + sizeof(kernel_mach_header_t)); + for(i = 0; i < mhp->ncmds; i++){ + if(sgp->cmd == LC_SEGMENT_KERNEL) + if(strncmp(sgp->segname, segname, sizeof(sgp->segname)) == 0 || + mhp->filetype == MH_OBJECT){ + sp = (kernel_section_t *)((uintptr_t)sgp + + sizeof(kernel_segment_command_t)); + for(j = 0; j < sgp->nsects; j++){ + if(strncmp(sp->sectname, sectname, + sizeof(sp->sectname)) == 0 && + strncmp(sp->segname, segname, + sizeof(sp->segname)) == 0) + return(sp); + sp = (kernel_section_t *)((uintptr_t)sp + + sizeof(kernel_section_t)); + } + } + sgp = (kernel_segment_command_t *)((uintptr_t)sgp + sgp->cmdsize); + } + return((kernel_section_t *)NULL); +} + +/* + * This routine can operate against any kernel mach header. + */ +kernel_segment_command_t * +getsegbynamefromheader( + kernel_mach_header_t *header, + const char *seg_name) +{ + kernel_segment_command_t *sgp; + unsigned long i; + + sgp = (kernel_segment_command_t *) + ((uintptr_t)header + sizeof(kernel_mach_header_t)); + for (i = 0; i < header->ncmds; i++){ + if ( sgp->cmd == LC_SEGMENT_KERNEL + && !strncmp(sgp->segname, seg_name, sizeof(sgp->segname))) + return sgp; + sgp = (kernel_segment_command_t *)((uintptr_t)sgp + sgp->cmdsize); + } + return (kernel_segment_command_t *)NULL; +} + +/* + * Return the first segment_command in the header. + */ +kernel_segment_command_t * +firstseg(void) +{ + return firstsegfromheader(&_mh_execute_header); +} + +kernel_segment_command_t * +firstsegfromheader(kernel_mach_header_t *header) +{ + u_int i = 0; + kernel_segment_command_t *sgp = (kernel_segment_command_t *) + ((uintptr_t)header + sizeof(*header)); + + for (i = 0; i < header->ncmds; i++){ + if (sgp->cmd == LC_SEGMENT_KERNEL) + return sgp; + sgp = (kernel_segment_command_t *)((uintptr_t)sgp + sgp->cmdsize); + } + return (kernel_segment_command_t *)NULL; +} + +/* + * This routine operates against any kernel mach segment_command structure + * pointer and the provided kernel header, to obtain the sequentially next + * segment_command structure in that header. + */ +kernel_segment_command_t * +nextsegfromheader( + kernel_mach_header_t *header, + kernel_segment_command_t *seg) +{ + u_int i = 0; + kernel_segment_command_t *sgp = (kernel_segment_command_t *) + ((uintptr_t)header + sizeof(*header)); + + /* Find the index of the passed-in segment */ + for (i = 0; sgp != seg && i < header->ncmds; i++) { + sgp = (kernel_segment_command_t *)((uintptr_t)sgp + sgp->cmdsize); + } + + /* Increment to the next load command */ + i++; + sgp = (kernel_segment_command_t *)((uintptr_t)sgp + sgp->cmdsize); + + /* Return the next segment command, if any */ + for (; i < header->ncmds; i++) { + if (sgp->cmd == LC_SEGMENT_KERNEL) return sgp; + + sgp = (kernel_segment_command_t *)((uintptr_t)sgp + sgp->cmdsize); + } + + return (kernel_segment_command_t *)NULL; +} + + +/* + * Return the address of the named Mach-O segment from the currently + * executing kernel kernel, or NULL. + */ +kernel_segment_command_t * +getsegbyname(const char *seg_name) +{ + return(getsegbynamefromheader(&_mh_execute_header, seg_name)); +} + +/* + * This routine returns the a pointer the section structure of the named + * section in the named segment if it exists in the currently executing + * kernel, which it is presumed to be linked into. Otherwise it returns NULL. + */ +kernel_section_t * +getsectbyname( + const char *segname, + const char *sectname) +{ + return(getsectbynamefromheader( + (kernel_mach_header_t *)&_mh_execute_header, segname, sectname)); +} + +/* + * This routine can operate against any kernel segment_command structure to + * return the first kernel section immediately following that structure. If + * there are no sections associated with the segment_command structure, it + * returns NULL. + */ +kernel_section_t * +firstsect(kernel_segment_command_t *sgp) +{ + if (!sgp || sgp->nsects == 0) + return (kernel_section_t *)NULL; + + return (kernel_section_t *)(sgp+1); +} + +/* + * This routine can operate against any kernel segment_command structure and + * kernel section to return the next consecutive kernel section immediately + * following the kernel section provided. If there are no sections following + * the provided section, it returns NULL. + */ +kernel_section_t * +nextsect(kernel_segment_command_t *sgp, kernel_section_t *sp) +{ + kernel_section_t *fsp = firstsect(sgp); + + if (((uintptr_t)(sp - fsp) + 1) >= sgp->nsects) + return (kernel_section_t *)NULL; + + return sp+1; +} + +#ifdef MACH_KDB +/* + * This routine returns the section command for the symbol table in the + * named segment for the mach_header pointer passed to it if it exist. + * Otherwise it returns zero. + */ +static struct symtab_command * +getsectcmdsymtabfromheader( + kernel_mach_header_t *mhp) +{ + kernel_segment_command_t *sgp; + unsigned long i; + + sgp = (kernel_segment_command_t *) + ((uintptr_t)mhp + sizeof(kernel_mach_header_t)); + for(i = 0; i < mhp->ncmds; i++){ + if(sgp->cmd == LC_SYMTAB) + return((struct symtab_command *)sgp); + sgp = (kernel_segment_command_t *)((uintptr_t)sgp + sgp->cmdsize); + } + return((struct symtab_command *)NULL); +} + +boolean_t getsymtab(kernel_mach_header_t *header, + vm_offset_t *symtab, + int *nsyms, + vm_offset_t *strtab, + vm_size_t *strtabsize) +{ + kernel_segment_command_t *seglink_cmd; + struct symtab_command *symtab_cmd; + + seglink_cmd = NULL; + + if((header->magic != MH_MAGIC) + && (header->magic != MH_MAGIC_64)) { /* Check if this is a valid header format */ + return (FALSE); /* Bye y'all... */ + } + + seglink_cmd = getsegbynamefromheader(header,"__LINKEDIT"); + if (seglink_cmd == NULL) { + return(FALSE); + } + + symtab_cmd = NULL; + symtab_cmd = getsectcmdsymtabfromheader(header); + if (symtab_cmd == NULL) + return(FALSE); + + *nsyms = symtab_cmd->nsyms; + if(symtab_cmd->nsyms == 0) return (FALSE); /* No symbols */ + + *strtabsize = symtab_cmd->strsize; + if(symtab_cmd->strsize == 0) return (FALSE); /* Symbol length is 0 */ + + *symtab = seglink_cmd->vmaddr + symtab_cmd->symoff - + seglink_cmd->fileoff; + + *strtab = seglink_cmd->vmaddr + symtab_cmd->stroff - + seglink_cmd->fileoff; + + return(TRUE); +} +#endif diff --git a/libkern/kmod/Makefile b/libkern/kmod/Makefile index 7a50b88bd..8ffce509e 100644 --- a/libkern/kmod/Makefile +++ b/libkern/kmod/Makefile @@ -14,26 +14,23 @@ export COMPOBJROOT=$(OBJROOT)/$(KERNEL_CONFIG)_$(ARCH_CONFIG)/$(COMPONENT) endif do_all: - @echo "[ $(SOURCE) ] Starting do_all $(COMPONENT) $(KERNEL_CONFIG) $(ARCH_CONFIG) $(TARGET)"; \ - ($(MKDIR) $(COMPOBJROOT)/kmod; \ + $(_v)($(MKDIR) $(COMPOBJROOT)/kmod; \ cd $(COMPOBJROOT)/kmod; \ ${MAKE} MAKEFILES=$(SOURCE)/Makefile.kmod \ TARGET=$(TARGET) \ do_build_all \ - ); \ - echo "[ $(SOURCE) ] Returning do_all $(COMPONENT) $(KERNEL_CONFIG) $(ARCH_CONFIG) $(TARGET)"; + ) do_build_all: do_all do_install: - @echo "[ $(SOURCE) ] Starting do_install $(COMPONENT) $(KERNEL_CONFIG) $(ARCH_CONFIG) $(TARGET)"; \ + @echo "[ $(SOURCE) ] make do_install $(COMPONENT) $(KERNEL_CONFIG) $(ARCH_CONFIG) $(TARGET)"; \ ($(MKDIR) $(COMPOBJROOT)/kmod; \ cd $(COMPOBJROOT)/kmod; \ ${MAKE} MAKEFILES=$(SOURCE)/Makefile.kmod \ TARGET=$(TARGET) \ do_build_install \ - ); \ - echo "[ $(SOURCE) ] Returning do_install $(COMPONENT) $(KERNEL_CONFIG) $(ARCH_CONFIG) $(TARGET)"; + ) do_build_install: do_install diff --git a/libkern/kmod/Makefile.kmod b/libkern/kmod/Makefile.kmod index bab4de575..0c35bdf4d 100644 --- a/libkern/kmod/Makefile.kmod +++ b/libkern/kmod/Makefile.kmod @@ -7,6 +7,7 @@ export MakeInc_def=${SRCROOT}/makedefs/MakeInc.def export MakeInc_rule=${SRCROOT}/makedefs/MakeInc.rule export MakeInc_dir=${SRCROOT}/makedefs/MakeInc.dir + include $(MakeInc_cmd) include $(MakeInc_def) @@ -15,9 +16,10 @@ KMOD_NAME = libkmod KMODCPP_NAME = libkmodc++ LIB_INSTALL_FLAGS = -p -m 444 -CFLAGS += -Wall -Wno-four-char-constants -CFLAGS_PPC += -mlong-branch -CFLAGS_ARM += -mlong-calls +# -mkernel implies -mlong-branch/-mlong-calls/-mno-red-zone as needed for +# code linked into kexts +CFLAGS_KMOD = $(filter-out -O0 -O1 -O2 -O3 -O4 -Os -Oz,$(CFLAGS)) \ + -Os -mkernel -Wall ifneq ($(MACHINE_CONFIG), DEFAULT) COMPOBJROOT = $(OBJROOT)/$(KERNEL_CONFIG)_$(ARCH_CONFIG)_$(MACHINE_CONFIG)/$(COMPONENT)/kmod @@ -27,6 +29,7 @@ COMPOBJROOT = $(OBJROOT)/$(KERNEL_CONFIG)_$(ARCH_CONFIG)/$(COMPONENT)/kmod INSTOBJROOT = $(OBJROOT)/$(INSTALL_TYPE)_$(ARCH_CONFIG)/$(COMPONENT)/kmod endif + KMOD_CFILES = c_start.c c_stop.c KMODCPP_CFILES = cplus_start.c cplus_stop.c @@ -37,23 +40,23 @@ ALL_OFILES = $(KMOD_OFILES) $(KMODCPP_OFILES) $(ALL_OFILES): %.o : %.c @echo CC $@ - $(_v)${KCC} -c ${CFLAGS} ${${join $@,_CFLAGS}} ${INCFLAGS} ${${join $@,_INCFLAGS}} -o $(COMPOBJROOT)/$(*F).o $< + $(_v)${KCC} -c ${CFLAGS_KMOD} ${${join $@,_CFLAGS}} ${INCFLAGS} ${${join $@,_INCFLAGS}} -o $(COMPOBJROOT)/$(*F).o $< $(COMPOBJROOT)/$(KMOD_NAME).a: $(KMOD_OFILES) - @echo LIBTOOL $@ - $(_v)libtool -static -o $@ $^ + @echo LIBTOOL $(notdir $@) + $(_v)$(LIBTOOL) -static -o $@ $^ $(_vstdout) 2>&1 $(COMPOBJROOT)/$(KMODCPP_NAME).a: $(KMODCPP_OFILES) - @echo LIBTOOL $@ - $(_v)libtool -static -o $@ $^ + @echo LIBTOOL $(notdir $@) + $(_v)$(LIBTOOL) -static -o $@ $^ $(_vstdout) 2>&1 do_build_all: $(COMPOBJROOT)/$(KMOD_NAME).a $(COMPOBJROOT)/$(KMODCPP_NAME).a $(INSTALL_DIR)/%.a: $(INSTOBJROOT)/%.a @echo Installing $< in $@; - $(RM) $@ || true; \ + $(_v)$(RM) $@ || true; \ ${MKDIR} $(INSTALL_DIR) $(SYMROOT); \ - $(_v)if [ $(MACHINE_CONFIG) = DEFAULT ] ; then \ + if [ $(MACHINE_CONFIG) = DEFAULT ]; then \ allarchs=""; \ for onearch in $(INSTALL_ARCHS); do \ archdir=$(OBJROOT)/$(KERNEL_CONFIG)_$${onearch}/$(COMPONENT); \ diff --git a/libkern/kmod/c_start.c b/libkern/kmod/c_start.c index 720a70926..a859e223f 100644 --- a/libkern/kmod/c_start.c +++ b/libkern/kmod/c_start.c @@ -43,10 +43,14 @@ *.o -lkmodc++ kmod_info.o -lkmod */ #include +#include // These global symbols will be defined by CreateInfo script's info.c file. extern kmod_start_func_t *_realmain; +extern kmod_info_t KMOD_INFO_NAME; +/********************************************************************* +*********************************************************************/ __private_extern__ kern_return_t _start(kmod_info_t *ki, void *data) { if (_realmain) @@ -54,3 +58,24 @@ __private_extern__ kern_return_t _start(kmod_info_t *ki, void *data) else return KERN_SUCCESS; } + +/********************************************************************* +*********************************************************************/ +__private_extern__ const char * OSKextGetCurrentIdentifier(void) +{ + return KMOD_INFO_NAME.name; +} + +/********************************************************************* +*********************************************************************/ +__private_extern__ const char * OSKextGetCurrentVersionString(void) +{ + return KMOD_INFO_NAME.version; +} + +/********************************************************************* +*********************************************************************/ +__private_extern__ OSKextLoadTag OSKextGetCurrentLoadTag(void) +{ + return (OSKextLoadTag)KMOD_INFO_NAME.id; +} diff --git a/libkern/kmod/cplus_start.c b/libkern/kmod/cplus_start.c index 3bfb64198..8ae7a0193 100644 --- a/libkern/kmod/cplus_start.c +++ b/libkern/kmod/cplus_start.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000,2008-2009 Apple Computer, Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -42,23 +42,58 @@ The linkline must look like this. *.o -lkmodc++ kmod_info.o -lkmod */ +#if __i386__ || __ppc__ #include +#include asm(".constructors_used = 0"); asm(".private_extern .constructors_used"); // This global symbols will be defined by CreateInfo script's info.c file. extern kmod_start_func_t *_realmain; +extern kmod_info_t KMOD_INFO_NAME; // Functions defined in libkern/c++/OSRuntime.cpp extern kern_return_t OSRuntimeInitializeCPP(kmod_info_t *ki, void *data); +extern kern_return_t OSRuntimeFinalizeCPP(kmod_info_t *ki, void *data); +/********************************************************************* +*********************************************************************/ __private_extern__ kern_return_t _start(kmod_info_t *ki, void *data) { - kern_return_t res = OSRuntimeInitializeCPP(ki, data); + kern_return_t result = OSRuntimeInitializeCPP(ki, data); - if (!res && _realmain) - res = (*_realmain)(ki, data); + if ((result == KERN_SUCCESS) && _realmain) { + result = (*_realmain)(ki, data); - return res; + /* If _realmain failed, tear down C++. + */ + if (result != KERN_SUCCESS) { + (void)OSRuntimeFinalizeCPP(ki, data); + } + } + + return result; +} + +/********************************************************************* +*********************************************************************/ +__private_extern__ const char * OSKextGetCurrentIdentifier(void) +{ + return KMOD_INFO_NAME.name; +} + +/********************************************************************* +*********************************************************************/ +__private_extern__ const char * OSKextGetCurrentVersionString(void) +{ + return KMOD_INFO_NAME.version; +} + +/********************************************************************* +*********************************************************************/ +__private_extern__ OSKextLoadTag OSKextGetCurrentLoadTag(void) +{ + return (OSKextLoadTag)KMOD_INFO_NAME.id; } +#endif diff --git a/libkern/kmod/cplus_stop.c b/libkern/kmod/cplus_stop.c index fffd04044..b4ce5236a 100644 --- a/libkern/kmod/cplus_stop.c +++ b/libkern/kmod/cplus_stop.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000,2008-2009 Apple Computer, Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -42,6 +42,7 @@ The linkline must look like this. *.o -lkmodc++ kmod_info.o -lkmod */ +#if __i386__ || __ppc__ #include asm(".destructors_used = 0"); @@ -55,10 +56,16 @@ extern kmod_stop_func_t *_antimain; __private_extern__ kern_return_t _stop(kmod_info_t *ki, void *data) { - kern_return_t res = OSRuntimeFinalizeCPP(ki, data); + kern_return_t result = KERN_SUCCESS; - if (!res && _antimain) - res = (*_antimain)(ki, data); - - return res; + if (_antimain) { + result = (*_antimain)(ki, data); + } + + if (result == KERN_SUCCESS) { + result = OSRuntimeFinalizeCPP(ki, data); + } + + return result; } +#endif diff --git a/libkern/kxld/Makefile b/libkern/kxld/Makefile new file mode 100644 index 000000000..84412e08f --- /dev/null +++ b/libkern/kxld/Makefile @@ -0,0 +1,171 @@ +################################################################################ +# Note: this makefile is used only for the libkxld build alias. It is not part +# of the regular kernel build process. +################################################################################ + +# Version +COMPATIBILITY_VERSION=1 +CURRENT_VERSION=1 + +# Paths +ifndef DSTROOT +DSTROOT=./BUILD/dst +endif +ifndef OBJROOT +OBJROOT=./BUILD/obj +endif +ifndef SYMROOT +SYMROOT=./BUILD/sym +endif +ifdef SRCROOT +HDRSRC=$(SRCROOT)/libkern/libkern +OBJSRC=$(SRCROOT)/libkern/kxld +else +SRCROOT=. +HDRSRC=$(SRCROOT)/../libkern +OBJSRC=$(SRCROOT) +ROOT=BUILD +endif +ifdef RC_CFLAGS +ARCHS=$(addprefix -arch , $(RC_ARCHS)) +else +ARCHS=-arch i386 -arch x86_64 -arch ppc +RC_CFLAGS=$(ARCHS) -pipe +endif + +PRODUCT_TYPE ?= DYLIB + +HDRDST=$(DSTROOT)/usr/local/include +LIBDST=$(DSTROOT)/usr/lib/system +ARCHIVEDST=$(DSTROOT)/usr/local/lib +LIBOBJ=$(OBJROOT)/libkxld.o +LIBKXLD_DYLIB=libkxld.dylib +LIBKXLD_ARCHIVE=libkxld.a +LIBKXLDNAME=/usr/lib/system/$(LIBKXLD_DYLIB) +LIBKXLDOBJ_DYLIB=$(OBJROOT)/$(LIBKXLD_DYLIB) +LIBKXLDOBJ_ARCHIVE=$(OBJROOT)/$(LIBKXLD_ARCHIVE) +LIBKXLDDST_DYLIB=$(LIBDST)/$(LIBKXLD_DYLIB) +LIBKXLDDST_ARCHIVE=$(ARCHIVEDST)/$(LIBKXLD_ARCHIVE) +TESTSRC=$(SRCROOT)/tests +TESTDST=./BUILD/tests + +# Flags +SDKROOT ?= / +CFLAGS=-std=c99 -Wall -Wextra -Werror -pedantic -Wformat=2 -Wcast-align \ + -Wwrite-strings -Wshorten-64-to-32 -Wshadow -Winit-self -Wpointer-arith \ + -Wno-format-y2k -W -Wstrict-prototypes -Wmissing-prototypes -Wreturn-type \ + -Wcast-qual -Wwrite-strings -Wswitch -Wcast-align -Wbad-function-cast \ + -Wchar-subscripts -Winline -Wnested-externs -Wredundant-decls -g \ + -isysroot $(SDKROOT) +LDFLAGS=$(ARCHS) -dynamiclib -install_name $(LIBKXLDNAME) \ + -compatibility_version $(COMPATIBILITY_VERSION) \ + -current_version $(CURRENT_VERSION) -isysroot $(SDKROOT) +INCLUDES=-I$(HDRSRC) $(INCFLAGS_EXTERN) + +# Tools +CC = xcrun -sdk $(SDKROOT) cc +LIBTOOL = xcrun -sdk $(SDKROOT) libtool +STRIP = xcrun -sdk $(SDKROOT) strip + +# Turn on -Wno-cast-align for arm since it won't build without it +ifeq ($(findstring arm, $(ARCHS)),arm) +CFLAGS+=-Wno-cast-align +endif + +# Files +HDR_NAMES=kxld.h kxld_types.h +OBJ_NAMES=kxld.o kxld_array.o kxld_copyright.o kxld_dict.o kxld_kext.o kxld_reloc.o \ + kxld_sect.o kxld_seg.o kxld_sym.o kxld_state.o kxld_symtab.o kxld_util.o \ + kxld_uuid.o kxld_vtable.o +HDRS=$(addprefix $(HDRSRC)/, $(HDR_NAMES)) +OBJS=$(addprefix $(OBJROOT)/, $(OBJ_NAMES)) + +$(shell [ -d $(OBJROOT) ] || mkdir -p $(OBJROOT)) + +# Implicit rules +%.o : %.c +$(OBJROOT)/%.o : $(OBJSRC)/%.c + $(CC) $(RC_CFLAGS) $(CFLAGS) $(OPTIM) $(INCLUDES) -c $< -o $@ +$(OBJROOT)/%.o : $(TESTSRC)/%.c + $(CC) $(RC_CFLAGS) $(CFLAGS) -O0 -DDEBUG $(INCLUDES) -I $(SRCROOT) -c $< -o $@ + +SRCROOTESC=$(subst /,\/,$(SRCROOT)) +OBJROOTESC=$(subst /,\/,$(OBJROOT)) +SEDOBJS=sed -E 's/(^[a-z_]+)\.o/$(OBJROOTESC)\/\1\.o $(OBJROOTESC)\/\1\.d/' +SEDSRCS=sed -E 's/([a-z_]+\.[ch])/$(SRCROOTESC)\/\1/g' +$(OBJROOT)/%.d: $(OBJSRC)/%.c + @set -e; rm -f $@; \ + $(CC) $(INCLUDES) -MM $< | $(SEDOBJS) | $(SEDSRCS) > $@; + +# Rules +release: OPTIM=-Os -dynamic +release: build + +debug: OPTIM=-O0 -DDEBUG -dynamic +debug: build + +profile: OPTIM=-Os -pg -dynamic +profile: build + +tests: OPTIM=-O0 -DDEBUG +tests: kxld_dict_test copyrighttest + +build: $(LIBKXLDOBJ_$(PRODUCT_TYPE)) + @[ -d $(SYMROOT) ] || mkdir -p $(SYMROOT) + install -c -m 644 $< $(SYMROOT) + +$(LIBKXLDOBJ_DYLIB): $(OBJS) + $(CC) $(LDFLAGS) -o $@ $^ + +$(LIBKXLDOBJ_ARCHIVE): $(OBJS) + $(LIBTOOL) -static -o $@ $^ + +installhdrs: + @[ -d $(HDRDST) ] || mkdir -p $(HDRDST) + install -o 0 -g 0 -c -m 444 $(HDRS) $(HDRDST) + +install: release installhdrs $(LIBKXLDDST_$(PRODUCT_TYPE)) + +$(LIBKXLDDST_DYLIB): + @[ -d $(LIBDST) ] || mkdir -p $(LIBDST) + install -o 0 -g 0 -c -m 555 $(SYMROOT)/$(LIBKXLD_DYLIB) $(LIBDST) + $(STRIP) -S -x $@ + +$(LIBKXLDDST_ARCHIVE): + @[ -d $(ARCHIVEDST) ] || mkdir -p $(ARCHIVEDST) + install -o 0 -g 0 -c -m 555 $(SYMROOT)/$(LIBKXLD_ARCHIVE) $(ARCHIVEDST) + + +KEXTCOPYOBJS=$(OBJROOT)/kextcopyright.o $(OBJROOT)/kxld_copyright.o $(OBJROOT)/kxld_util.o +kextcopyright: $(KEXTCOPYOBJS) $(TESTDST) + $(CC) $(ARCHS) $(KEXTCOPYOBJS) -framework CoreFoundation -o $(OBJROOT)/kextcopyright + install -c -m 755 $(OBJROOT)/kextcopyright $(TESTDST) + +DICTOBJS=$(OBJROOT)/kxld_dict_test.o $(OBJROOT)/kxld_dict.o $(OBJROOT)/kxld_array.o $(OBJROOT)/kxld_util.o +kxld_dict_test: $(DICTOBJS) $(TESTDST) + $(CC) $(ARCHS) $(DICTOBJS) -o $(OBJROOT)/kxld_dict_test + install -c -m 755 $(OBJROOT)/kxld_dict_test $(TESTDST) + +COPYTESTOBJS=$(OBJROOT)/kxld_copyright.o $(OBJROOT)/kxld_util.o +copyrighttest: OPTIM+=-DTEST +copyrighttest: $(KEXTCOPYOBJS) $(TESTDST) + $(CC) $(ARCHS) $(COPYTESTOBJS) -framework CoreFoundation -framework IOKit -o $(OBJROOT)/copyrighttest + install -c -m 755 $(OBJROOT)/copyrighttest $(TESTDST) + +$(TESTDST): + @[ -d $(TESTDST) ] || mkdir -p $(TESTDST) + +clean: + @rm -rf $(OBJROOT)/* + +fullclean: +ifdef ROOT + @rm -rf $(ROOT) +else + @rm -rf $(OBJROOT) $(DSTROOT) $(SYMROOT) +endif + +# Automatically build dependency information when .c or .h files change based +# on implicit rule for .d:.c +-include $(OBJS:.o=.d) + diff --git a/libkern/kxld/kxld.c b/libkern/kxld/kxld.c new file mode 100644 index 000000000..3d9de9588 --- /dev/null +++ b/libkern/kxld/kxld.c @@ -0,0 +1,470 @@ +/* + * Copyright (c) 2007-2008 Apple Inc. All rights reserved. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. The rights granted to you under the License + * may not be used to create, or enable the creation or redistribution of, + * unlawful or unlicensed copies of an Apple operating system, or to + * circumvent, violate, or enable the circumvention or violation of, any + * terms of an Apple operating system software license agreement. + * + * Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ + */ +#include +#include +#include /* For PAGE_SIZE */ + +#define DEBUG_ASSERT_COMPONENT_NAME_STRING "kxld" +#include + +#if !KERNEL + #include "kxld.h" + #include "kxld_types.h" +#else + #include + #include +#endif /* KERNEL */ + +#include "kxld_array.h" +#include "kxld_dict.h" +#include "kxld_kext.h" +#include "kxld_state.h" +#include "kxld_sym.h" +#include "kxld_symtab.h" +#include "kxld_util.h" +#include "kxld_vtable.h" + +struct kxld_vtable; + +struct kxld_context { + KXLDKext *kext; + KXLDArray *section_order; + KXLDArray deps; + KXLDArray tmps; + KXLDDict defined_symbols; + KXLDDict obsolete_symbols; + KXLDDict vtables; + KXLDFlags flags; + KXLDAllocateCallback allocate_callback; + cpu_type_t cputype; + cpu_subtype_t cpusubtype; +}; + +/******************************************************************************* +* Globals +*******************************************************************************/ + +/* Certain architectures alter the order of a kext's sections from its input + * binary, so we track that order in a dictionary of arrays, with one array for + * each architecture. Since the kernel only has one architecture, we can + * eliminate the dictionary and use a simple array. + * XXX: If we ever use the linker in a multithreaded environment, we will need + * locks around these global structures. + */ +#if KXLD_USER_OR_OBJECT +#if KERNEL +static KXLDArray *s_section_order; +#else +static KXLDDict *s_order_dict; +#endif +#endif + +/******************************************************************************* +* Prototypes +*******************************************************************************/ + +static void clear_context(KXLDContext *context); + +/******************************************************************************* +*******************************************************************************/ +kern_return_t +kxld_create_context(KXLDContext **_context, + KXLDAllocateCallback allocate_callback, KXLDLoggingCallback logging_callback, + KXLDFlags flags, cpu_type_t cputype, cpu_subtype_t cpusubtype) +{ + kern_return_t rval = KERN_FAILURE; + KXLDContext *context = NULL; + KXLDArray *section_order = NULL; +#if !KERNEL + cpu_type_t *cputype_p = NULL; +#endif + + check(_context); + check(allocate_callback); + check(logging_callback); + *_context = NULL; + + context = kxld_alloc(sizeof(*context)); + require_action(context, finish, rval=KERN_RESOURCE_SHORTAGE); + bzero(context, sizeof(*context)); + + context->flags = flags; + context->allocate_callback = allocate_callback; + context->cputype = cputype; + context->cpusubtype = cpusubtype; + + kxld_set_logging_callback(logging_callback); + + context->kext = kxld_alloc(kxld_kext_sizeof()); + require_action(context->kext, finish, rval=KERN_RESOURCE_SHORTAGE); + bzero(context->kext, kxld_kext_sizeof()); + + /* Check if we already have an order array for this arch */ + +#if KXLD_USER_OR_OBJECT +#if KERNEL + context->section_order = s_section_order; +#else + /* In userspace, create the dictionary if it doesn't already exist */ + if (!s_order_dict) { + s_order_dict = kxld_alloc(sizeof(*s_order_dict)); + require_action(s_order_dict, finish, rval=KERN_RESOURCE_SHORTAGE); + bzero(s_order_dict, sizeof(*s_order_dict)); + + rval = kxld_dict_init(s_order_dict, kxld_dict_uint32_hash, + kxld_dict_uint32_cmp, 0); + require_noerr(rval, finish); + } + + context->section_order = kxld_dict_find(s_order_dict, &cputype); +#endif /* KERNEL */ + + /* Create an order array for this arch if needed */ + + if (!context->section_order) { + + section_order = kxld_alloc(sizeof(*section_order)); + require_action(section_order, finish, rval=KERN_RESOURCE_SHORTAGE); + bzero(section_order, sizeof(*section_order)); + +#if KERNEL + s_section_order = section_order; +#else + /* In userspace, add the new array to the order dictionary */ + cputype_p = kxld_alloc(sizeof(*cputype_p)); + require_action(cputype_p, finish, rval=KERN_RESOURCE_SHORTAGE); + *cputype_p = cputype; + + rval = kxld_dict_insert(s_order_dict, cputype_p, section_order); + require_noerr(rval, finish); + + cputype_p = NULL; +#endif /* KERNEL */ + + context->section_order = section_order; + + section_order = NULL; + } +#endif /* KXLD_USER_OR_OBJECT */ + + rval = KERN_SUCCESS; + *_context = context; + context = NULL; + +finish: + if (context) kxld_free(context, sizeof(*context)); + if (section_order) kxld_free(section_order, sizeof(*section_order)); +#if !KERNEL + if (cputype_p) kxld_free(cputype_p, sizeof(*cputype_p)); +#endif + + return rval; +} + +/******************************************************************************* +*******************************************************************************/ +void +kxld_destroy_context(KXLDContext *context) +{ + KXLDState *dep = NULL; + u_int i = 0; + + check(context); + + kxld_kext_deinit(context->kext); + + for (i = 0; i < context->deps.maxitems; ++i) { + dep = kxld_array_get_slot(&context->deps, i); + kxld_state_deinit(dep); + } + + kxld_array_deinit(&context->deps); + kxld_array_deinit(&context->tmps); + + kxld_dict_deinit(&context->defined_symbols); + kxld_dict_deinit(&context->obsolete_symbols); + kxld_dict_deinit(&context->vtables); + + kxld_free(context->kext, kxld_kext_sizeof()); + kxld_free(context, sizeof(*context)); + + kxld_print_memory_report(); +} + +/******************************************************************************* +*******************************************************************************/ +kern_return_t +kxld_link_file( + KXLDContext *context, + u_char *file, + u_long size, + const char *name, + void *callback_data, + u_char **deps, + u_int ndeps, + u_char **_linked_object, + kxld_addr_t *kmod_info_kern, + u_char **_link_state, + u_long *_link_state_size, + u_char **_symbol_file __unused, + u_long *_symbol_file_size __unused) +{ + kern_return_t rval = KERN_FAILURE; + KXLDState *state = NULL; + KXLDAllocateFlags flags = 0; + kxld_addr_t vmaddr = 0; + u_long header_size = 0; + u_long vmsize = 0; + u_int nsyms = 0; + u_int nvtables = 0; + u_int i = 0; + u_char *linked_object = NULL; + u_char *linked_object_alloc = NULL; + u_char *link_state = NULL; + u_char *symbol_file = NULL; + u_long link_state_size = 0; + u_long symbol_file_size = 0; + + kxld_set_logging_callback_data(name, callback_data); + + require_action(context, finish, rval=KERN_INVALID_ARGUMENT); + require_action(file, finish, rval=KERN_INVALID_ARGUMENT); + require_action(size, finish, rval=KERN_INVALID_ARGUMENT); + + rval = kxld_array_init(&context->deps, sizeof(struct kxld_state), ndeps); + require_noerr(rval, finish); + + if (deps) { + /* Initialize the dependencies */ + for (i = 0; i < ndeps; ++i) { + state = kxld_array_get_item(&context->deps, i); + + rval = kxld_state_init_from_file(state, deps[i], + context->section_order); + require_noerr(rval, finish); + } + } + + rval = kxld_kext_init(context->kext, file, size, name, + context->flags, (deps == 0) /* is_kernel */, context->section_order, + context->cputype, context->cpusubtype); + require_noerr(rval, finish); + + if (deps) { + + /* Calculate the base number of symbols and vtables in the kext */ + + nsyms += kxld_kext_get_num_symbols(context->kext); + nvtables += kxld_kext_get_num_vtables(context->kext); + + /* Extract the symbol and vtable counts from the dependencies. + */ + + for (i = 0; i < ndeps; ++i) { + cpu_type_t cputype; + cpu_subtype_t cpusubtype; + + state = kxld_array_get_item(&context->deps, i); + + kxld_state_get_cputype(state, &cputype, &cpusubtype); + + rval = kxld_kext_validate_cputype(context->kext, + cputype, cpusubtype); + require_noerr(rval, finish); + + nsyms += kxld_state_get_num_symbols(state); + nvtables += kxld_state_get_num_vtables(state); + } + + /* Create the global symbol and vtable tables */ + + rval = kxld_dict_init(&context->defined_symbols, kxld_dict_string_hash, + kxld_dict_string_cmp, nsyms); + require_noerr(rval, finish); + + rval = kxld_dict_init(&context->obsolete_symbols, kxld_dict_string_hash, + kxld_dict_string_cmp, 0); + require_noerr(rval, finish); + + rval = kxld_dict_init(&context->vtables, kxld_dict_string_hash, + kxld_dict_string_cmp, nvtables); + require_noerr(rval, finish); + + /* Populate the global tables */ + + for (i = 0; i < ndeps; ++i) { + state = kxld_array_get_item(&context->deps, i); + + rval = kxld_state_get_symbols(state, &context->defined_symbols, + &context->obsolete_symbols); + require_noerr(rval, finish); + + rval = kxld_state_get_vtables(state, &context->vtables); + require_noerr(rval, finish); + } + + if (kxld_kext_is_true_kext(context->kext)) { + + /* Allocate the kext object */ + + kxld_kext_get_vmsize(context->kext, &header_size, &vmsize); + vmaddr = context->allocate_callback(vmsize, &flags, callback_data); + require_action(!(vmaddr & (PAGE_SIZE-1)), finish, rval=KERN_FAILURE; + kxld_log(kKxldLogLinking, kKxldLogErr, + "Load address %p is not page-aligned.", + (void *) (uintptr_t) vmaddr)); + + if (flags & kKxldAllocateWritable) { + linked_object = (u_char *) (u_long) vmaddr; + } else { + linked_object_alloc = kxld_page_alloc_untracked(vmsize); + require_action(linked_object_alloc, finish, rval=KERN_RESOURCE_SHORTAGE); + linked_object = linked_object_alloc; + } + + /* Zero out the memory before we fill it. We fill this buffer in a + * sparse fashion, and it's simpler to clear it now rather than + * track and zero any pieces we didn't touch after we've written + * all of the sections to memory. + */ + bzero(linked_object, vmsize); + + /* Relocate to the new link address */ + + rval = kxld_kext_relocate(context->kext, vmaddr, &context->vtables, + &context->defined_symbols, &context->obsolete_symbols); + require_noerr(rval, finish); + + /* Generate linked object if requested */ + + if (_linked_object) { + check(kmod_info_kern); + *_linked_object = NULL; + *kmod_info_kern = 0; + + rval = kxld_kext_export_linked_object(context->kext, linked_object, + kmod_info_kern); + require_noerr(rval, finish); + } + + } else { + /* Resolve the pseudokext's symbols */ + + rval = kxld_kext_resolve(context->kext, &context->vtables, + &context->defined_symbols); + require_noerr(rval, finish); + } + } + + /* Generate link state if requested */ + + if (_link_state) { + check(_link_state_size); + *_link_state = NULL; + *_link_state_size = 0; + + kxld_dict_clear(&context->defined_symbols); + rval = kxld_state_export_kext_to_file(context->kext, &link_state, + &link_state_size, &context->defined_symbols, &context->tmps); + require_noerr(rval, finish); + } + +#if !KERNEL + /* Generate symbol file if requested */ + + if (_symbol_file) { + check(_symbol_file_size); + *_symbol_file = NULL; + *_symbol_file_size = 0; + + rval = kxld_kext_export_symbol_file(context->kext, &symbol_file, + &symbol_file_size); + require_noerr(rval, finish); + } +#endif /* !KERNEL */ + + /* Commit output to return variables */ + + if (_linked_object) { + *_linked_object = linked_object; + linked_object = NULL; + linked_object_alloc = NULL; + } + + if (_link_state) { + *_link_state = link_state; + *_link_state_size = link_state_size; + link_state = NULL; + } + +#if !KERNEL + if (_symbol_file) { + *_symbol_file = symbol_file; + *_symbol_file_size = symbol_file_size; + symbol_file = NULL; + } +#endif + + rval = KERN_SUCCESS; + +finish: + + if (linked_object_alloc) kxld_page_free_untracked(linked_object_alloc, vmsize); + if (link_state) kxld_page_free_untracked(link_state, link_state_size); + if (symbol_file) kxld_page_free_untracked(symbol_file, symbol_file_size); + + clear_context(context); + + kxld_set_logging_callback_data(NULL, NULL); + + return rval; +} + +/******************************************************************************* +*******************************************************************************/ +static void +clear_context(KXLDContext *context) +{ + KXLDState *state = NULL; + u_int i = 0; + + check(context); + + kxld_kext_clear(context->kext); + for (i = 0; i < context->deps.nitems; ++i) { + state = kxld_array_get_item(&context->deps, i); + kxld_state_clear(state); + } + kxld_array_reset(&context->deps); + + kxld_array_clear(&context->tmps); + kxld_dict_clear(&context->defined_symbols); + kxld_dict_clear(&context->obsolete_symbols); + kxld_dict_clear(&context->vtables); +} + diff --git a/libkern/kxld/kxld_array.c b/libkern/kxld/kxld_array.c new file mode 100644 index 000000000..b04a6045a --- /dev/null +++ b/libkern/kxld/kxld_array.c @@ -0,0 +1,483 @@ +/* + * Copyright (c) 2008 Apple Inc. All rights reserved. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. The rights granted to you under the License + * may not be used to create, or enable the creation or redistribution of, + * unlawful or unlicensed copies of an Apple operating system, or to + * circumvent, violate, or enable the circumvention or violation of, any + * terms of an Apple operating system software license agreement. + * + * Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ + */ +#include + +#if KERNEL + #include +#else + #include +#endif + +#define DEBUG_ASSERT_COMPONENT_NAME_STRING "kxld" +#include + +#include "kxld_array.h" +#include "kxld_util.h" + +static kern_return_t array_init(KXLDArray *array, size_t itemsize, u_int nitems); +static KXLDArrayPool * pool_create(size_t capacity); +static void pool_destroy(KXLDArrayPool *pool, size_t capacity); +static u_int reinit_pools(KXLDArray *array, u_int nitems); + +/******************************************************************************* +*******************************************************************************/ +kern_return_t +kxld_array_init(KXLDArray *array, size_t itemsize, u_int nitems) +{ + kern_return_t rval = KERN_FAILURE; + KXLDArrayPool *dstpool = NULL, *srcpool = NULL, *tmp = NULL; + KXLDArrayHead srcpools = STAILQ_HEAD_INITIALIZER(srcpools); + size_t srcpool_capacity = 0; + u_long offset = 0; + + check(array); + + if (!nitems) { + kxld_array_reset(array); + rval = KERN_SUCCESS; + goto finish; + } + + require_action(itemsize, finish, rval=KERN_INVALID_ARGUMENT); + + /* If the array has some pools, we need to see if there is enough space in + * those pools to accomodate the requested size array. If there isn't + * enough space, we save the existing pools to a temporary STAILQ and zero + * out the array structure. This will cause a new pool of sufficient size + * to be created, and we then copy the data from the old pools into the new + * pool. + */ + if (array->npools) { + /* Update the array's maxitems based on the new itemsize */ + array->pool_maxitems = (u_int) (array->pool_capacity / itemsize); + array->maxitems = 0; + STAILQ_FOREACH(srcpool, &array->pools, entries) { + array->maxitems += array->pool_maxitems; + } + + /* If there's not enough space, save the pools to a temporary STAILQ + * and zero out the array structure. Otherwise, rescan the pools to + * update their internal nitems counts. + */ + if (array->maxitems < nitems) { + STAILQ_FOREACH_SAFE(srcpool, &array->pools, entries, tmp) { + STAILQ_INSERT_TAIL(&srcpools, srcpool, entries); + STAILQ_REMOVE(&array->pools, srcpool, kxld_array_pool, entries); + } + srcpool_capacity = array->pool_capacity; + bzero(array, sizeof(*array)); + } else { + nitems = reinit_pools(array, nitems); + require_action(nitems == 0, finish, rval=KERN_FAILURE); + } + } + + array->itemsize = itemsize; + + /* If array->maxitems is zero, it means we are either rebuilding an array + * that was too small, or we're initializing an array for the first time. + * In either case, we need to set up a pool of the requested size, and + * if we're rebuilding an old array, we'll also copy the data from the old + * pools into the new pool. + */ + if (array->maxitems == 0) { + + rval = array_init(array, itemsize, nitems); + require_noerr(rval, finish); + + dstpool = STAILQ_FIRST(&array->pools); + require_action(dstpool, finish, rval=KERN_FAILURE); + + STAILQ_FOREACH_SAFE(srcpool, &srcpools, entries, tmp) { + memcpy(dstpool->buffer + offset, srcpool->buffer, srcpool_capacity); + offset += srcpool_capacity; + + STAILQ_REMOVE(&srcpools, srcpool, kxld_array_pool, entries); + pool_destroy(srcpool, srcpool_capacity); + } + + } + + rval = KERN_SUCCESS; +finish: + if (rval) kxld_array_deinit(array); + return rval; +} + +/******************************************************************************* +* This may only be called to initialize (or reinitialize) an array with exactly +* zero or one pool. Calling this on an array with more than one pool is an +* error. +*******************************************************************************/ +static kern_return_t +array_init(KXLDArray *array, size_t itemsize, u_int nitems) +{ + kern_return_t rval = KERN_FAILURE; + KXLDArrayPool *pool = NULL; + + array->itemsize = itemsize; + + pool = STAILQ_FIRST(&array->pools); + if (pool) { + require_action(itemsize * nitems < array->pool_capacity, + finish, rval=KERN_FAILURE); + require_action(array->npools == 1, finish, rval=KERN_FAILURE); + bzero(pool->buffer, array->pool_capacity); + } else { + array->pool_capacity = round_page(array->itemsize * nitems); + + pool = pool_create(array->pool_capacity); + require_action(pool, finish, rval=KERN_RESOURCE_SHORTAGE); + STAILQ_INSERT_HEAD(&array->pools, pool, entries); + } + pool->nitems = nitems; + + array->pool_maxitems = (u_int) (array->pool_capacity / array->itemsize); + array->maxitems = array->pool_maxitems; + array->nitems = nitems; + array->npools = 1; + + rval = KERN_SUCCESS; +finish: + return rval; +} + +/******************************************************************************* +*******************************************************************************/ +static KXLDArrayPool * +pool_create(size_t capacity) +{ + KXLDArrayPool *pool = NULL, *rval = NULL; + + pool = kxld_alloc(sizeof(*pool)); + require(pool, finish); + + pool->buffer = kxld_page_alloc(capacity); + require(pool->buffer, finish); + bzero(pool->buffer, capacity); + + rval = pool; + pool = NULL; + +finish: + if (pool) pool_destroy(pool, capacity); + return rval; +} + +/******************************************************************************* +*******************************************************************************/ +static void +pool_destroy(KXLDArrayPool *pool, size_t capacity) +{ + if (pool) { + if (pool->buffer) kxld_page_free(pool->buffer, capacity); + kxld_free(pool, sizeof(*pool)); + } +} + +/******************************************************************************* +*******************************************************************************/ +kern_return_t +kxld_array_copy(KXLDArray *dstarray, const KXLDArray *srcarray) +{ + kern_return_t rval = KERN_FAILURE; + KXLDArrayPool *dstpool = NULL, *srcpool = NULL; + u_long needed_capacity = 0; + u_long current_capacity = 0; + u_long copysize = 0; + u_long offset = 0; + + check(dstarray); + check(srcarray); + + /* When copying array, we only want to copy to an array with a single + * pool. If the array has more than one pool or the array is too small, + * we destroy the array and build it from scratch for the copy. + */ + needed_capacity = round_page(srcarray->nitems * srcarray->itemsize); + current_capacity = dstarray->npools * dstarray->pool_capacity; + if (dstarray->npools > 1 || needed_capacity > current_capacity) { + kxld_array_deinit(dstarray); + } + + rval = array_init(dstarray, srcarray->itemsize, srcarray->nitems); + require_noerr(rval, finish); + + dstpool = STAILQ_FIRST(&dstarray->pools); + require_action(dstpool, finish, rval=KERN_FAILURE); + + /* Copy the data from the source pools to the single destination pool. */ + STAILQ_FOREACH(srcpool, &srcarray->pools, entries) { + copysize = srcpool->nitems * srcarray->itemsize; + memcpy(dstpool->buffer + offset, srcpool->buffer, copysize); + offset += copysize; + } + + rval = KERN_SUCCESS; +finish: + return rval; +} + +/******************************************************************************* +*******************************************************************************/ +void +kxld_array_reset(KXLDArray *array) +{ + KXLDArrayPool *pool = NULL; + + if (array) { + STAILQ_FOREACH(pool, &array->pools, entries) { + pool->nitems = 0; + } + array->nitems = 0; + } +} + +/******************************************************************************* +*******************************************************************************/ +void +kxld_array_clear(KXLDArray *array) +{ + KXLDArrayPool *pool = NULL; + + if (array) { + kxld_array_reset(array); + STAILQ_FOREACH(pool, &array->pools, entries) { + bzero(pool->buffer, array->pool_capacity); + } + } +} + +/******************************************************************************* +*******************************************************************************/ +void +kxld_array_deinit(KXLDArray *array) +{ + KXLDArrayPool *pool = NULL, *tmp = NULL; + + if (array) { + STAILQ_FOREACH_SAFE(pool, &array->pools, entries, tmp) { + STAILQ_REMOVE(&array->pools, pool, kxld_array_pool, entries); + pool_destroy(pool, array->pool_capacity); + } + bzero(array, sizeof(*array)); + } +} + +/******************************************************************************* +*******************************************************************************/ +void * +kxld_array_get_item(const KXLDArray *array, u_int idx) +{ + KXLDArrayPool *pool = NULL; + void *item = NULL; + + check(array); + + if (idx >= array->nitems) goto finish; + + STAILQ_FOREACH(pool, &array->pools, entries) { + if (idx < pool->nitems) { + item = (void *) (pool->buffer + (array->itemsize * idx)); + break; + } + + idx -= array->pool_maxitems; + } + +finish: + return item; +} + +/******************************************************************************* +*******************************************************************************/ +void * +kxld_array_get_slot(const KXLDArray *array, u_int idx) +{ + KXLDArrayPool *pool = NULL; + void *item = NULL; + + check(array); + + if (idx >= array->maxitems) goto finish; + + STAILQ_FOREACH(pool, &array->pools, entries) { + if (idx < array->pool_maxitems) { + item = (void *) (pool->buffer + (array->itemsize * idx)); + break; + } + + idx -= array->pool_maxitems; + } + +finish: + return item; +} + +/******************************************************************************* +*******************************************************************************/ +kern_return_t +kxld_array_get_index(const KXLDArray *array, const void *item, u_int *_idx) +{ + kern_return_t rval = KERN_FAILURE; + KXLDArrayPool *pool = NULL; + u_long diff = 0; + u_int idx = 0; + u_int base_idx = 0; + const u_char *it; + + check(array); + check(item); + check(_idx); + + it = item; + + STAILQ_FOREACH(pool, &array->pools, entries) { + if (pool->buffer <= it && it < pool->buffer + array->pool_capacity) { + diff = it - pool->buffer; + idx = (u_int) (diff / array->itemsize); + + idx += base_idx; + *_idx = idx; + + rval = KERN_SUCCESS; + goto finish; + } + + base_idx += array->pool_maxitems; + } + + rval = KERN_FAILURE; +finish: + return rval; +} + +/******************************************************************************* +*******************************************************************************/ +kern_return_t +kxld_array_resize(KXLDArray *array, u_int nitems) +{ + kern_return_t rval = KERN_FAILURE; + KXLDArrayPool *pool = NULL; + + /* Grow the list of pools until we have enough to fit all of the entries */ + + while (nitems > array->maxitems) { + pool = pool_create(array->pool_capacity); + require_action(pool, finish, rval=KERN_FAILURE); + + STAILQ_INSERT_TAIL(&array->pools, pool, entries); + + array->maxitems += array->pool_maxitems; + array->npools += 1; + } + + nitems = reinit_pools(array, nitems); + require_action(nitems == 0, finish, rval=KERN_FAILURE); + + rval = KERN_SUCCESS; +finish: + return rval; +} + +/******************************************************************************* +* Sets the number of items for the array and each pool. Returns zero if there +* is enough space for all items, and the number of additional items needed +* if there is not enough space. +*******************************************************************************/ +static u_int +reinit_pools(KXLDArray *array, u_int nitems) +{ + KXLDArrayPool *pool = NULL; + u_int pool_nitems = 0; + + /* Set the number of items for each pool */ + + pool_nitems = nitems; + STAILQ_FOREACH(pool, &array->pools, entries) { + if (pool_nitems > array->pool_maxitems) { + pool->nitems = array->pool_maxitems; + pool_nitems -= array->pool_maxitems; + } else { + pool->nitems = pool_nitems; + pool_nitems = 0; + } + } + array->nitems = nitems; + + return pool_nitems; +} + +/******************************************************************************* +*******************************************************************************/ +kern_return_t +kxld_array_remove(KXLDArray *array, u_int idx) +{ + kern_return_t rval = KERN_FAILURE; + KXLDArrayPool *pool = NULL; + u_char *dst = NULL; + u_char *src = NULL; + u_int nitems = 0; + + check(array); + + if (idx >= array->nitems) { + rval = KERN_SUCCESS; + goto finish; + } + + /* We only support removing an item if all the items are contained in a + * single pool (for now). + */ + require_action(array->npools < 2 || array->nitems < array->pool_maxitems, + finish, rval=KERN_NOT_SUPPORTED); + + pool = STAILQ_FIRST(&array->pools); + require_action(pool, finish, rval=KERN_FAILURE); + + dst = pool->buffer; + dst += idx * array->itemsize; + + src = pool->buffer; + src += ((idx + 1) * array->itemsize); + + nitems = pool->nitems - idx - 1; + memmove(dst, src, array->itemsize * nitems); + + --pool->nitems; + --array->nitems; + + dst = pool->buffer; + dst += pool->nitems * array->itemsize; + bzero(dst, array->itemsize); + + rval = KERN_SUCCESS; +finish: + return rval; +} + diff --git a/libkern/kxld/kxld_array.h b/libkern/kxld/kxld_array.h new file mode 100644 index 000000000..f73bc8607 --- /dev/null +++ b/libkern/kxld/kxld_array.h @@ -0,0 +1,158 @@ +/* + * Copyright (c) 2008 Apple Inc. All rights reserved. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. The rights granted to you under the License + * may not be used to create, or enable the creation or redistribution of, + * unlawful or unlicensed copies of an Apple operating system, or to + * circumvent, violate, or enable the circumvention or violation of, any + * terms of an Apple operating system software license agreement. + * + * Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ + */ +#ifndef _KXLD_ARRAY_H_ +#define _KXLD_ARRAY_H_ + +#include +#include +#if KERNEL + #include +#else + #include "kxld_types.h" +#endif + +/******************************************************************************* +* This is a resizeable array implementation designed primarily to maximize +* memory reuse. The array should only be allocated once, but it can be +* initialized many times. It persists its memory across initializations, and +* reallocates only if it needs to grow the internal array, such that memory +* allocation churn is eliminated. Growth is accomodated by building a linked +* list of identically sized arrays. These arrays can be consolidated into +* one large array in the init function. +* +* A technique commonly used in kxld is to make an array of objects that +* themselves contain kxld_arrays. To minimize memory churn across links, only +* the individual objects contained in an array should be cleared at the end of +* each link, such that they are in a state ready for reinitialization with the +* memory they have already allocated. The array that contains them should not +* be cleared. After all links are complete, to ensure that all memory is +* properly freed, one should call kxld_array_get_slot to walk the entire +* allocated space of the array and clean up all potential instances contained +* therein. Since this technique is somewhat fragile, there are certain +* requirements that must be met, and guarantees that the array implementation +* provides. +* +* Requirements: +* - A newly allocated, uninitialized array object must be zeroed out before +* it is initialized +* - The objects stored in the array that will be reused must consider +* being bzeroed a valid initial state. Specifially, they must check that +* pointers they contain are nonnull before they are freed or followed +* at both construction and destruction time. +* +* Guarantees: +* - The init function will always bzero newly allocated memory. If memory +* is added by resizing, it will bzero only the newly allocated portion. +* - clear, deinit, and copy are the only functions that will change the +* contents of initialized memory. +* - The reset, clear, deinit functions will accept a NULL pointer to an array. +*******************************************************************************/ + +STAILQ_HEAD(kxld_array_head, kxld_array_pool); + +struct kxld_array { + struct kxld_array_head pools; + size_t itemsize; /* The size of the items that the array contains */ + size_t pool_capacity; /* The size of each pool's internal buffer */ + u_int pool_maxitems; /* The maximum number of items each pool can hold + * given the current size of each pool's buffer. + */ + u_int nitems; /* The current number of items this array contains */ + u_int maxitems; /* The maximum number of items this array can contain */ + u_int npools; /* The number of pools in the pool list */ +}; + +struct kxld_array_pool { + STAILQ_ENTRY(kxld_array_pool) entries; + u_char *buffer; /* The internal memory buffer */ + u_int nitems; /* The number of items the array contains */ +}; + +typedef struct kxld_array KXLDArray; +typedef struct kxld_array_head KXLDArrayHead; +typedef struct kxld_array_pool KXLDArrayPool; + +/******************************************************************************* +* Constructors and Destructors +*******************************************************************************/ + +/* Initializes the array's capacity to a minimum of nitems * itemsize */ +kern_return_t kxld_array_init(KXLDArray *array, size_t itemsize, u_int nitems) + __attribute__((nonnull, visibility("hidden"))); + +/* Performs a deep copy of the array */ +kern_return_t kxld_array_copy(KXLDArray *array, const KXLDArray *src) + __attribute__((nonnull, visibility("hidden"))); + +/* Sets the number of items in the array to 0 */ +void kxld_array_reset(KXLDArray *array) + __attribute__((visibility("hidden"))); + +/* Zeroes out the array and sets nitems to 0 */ +void kxld_array_clear(KXLDArray *array) + __attribute__((visibility("hidden"))); + +/* Frees the array's internal buffer */ +void kxld_array_deinit(KXLDArray *array) + __attribute__((visibility("hidden"))); + +/******************************************************************************* +* Accessors +*******************************************************************************/ + +/* Returns the item at the specified index, or NULL if idx > nitems */ +void *kxld_array_get_item(const KXLDArray *array, u_int idx) + __attribute__((pure, nonnull, visibility("hidden"))); + +/* Returns the item at the specified index, or NULL if idx > maxitems */ +void *kxld_array_get_slot(const KXLDArray *array, u_int idx) + __attribute__((pure, nonnull, visibility("hidden"))); + +/* Returns the index of a specified item in the array */ +kern_return_t kxld_array_get_index(const KXLDArray *array, const void *item, + u_int *idx) + __attribute__((nonnull, visibility("hidden"))); + +/******************************************************************************* +* Modifiers +*******************************************************************************/ + +/* Grows the array to contain a minimum of nitems. If extra memory is needed, + * it will allocate a pool and add it to the list of pools maintained by this + * array. + */ +kern_return_t kxld_array_resize(KXLDArray *array, u_int nitems) + __attribute__((nonnull, visibility("hidden"))); + +/* Removes an element from the array. This is only supported for arrays with + * a single pool. + */ +kern_return_t kxld_array_remove(KXLDArray *array, u_int idx) + __attribute__((nonnull, visibility("hidden"))); + +#endif /* _KXLD_ARRAY_H_ */ diff --git a/libkern/kxld/kxld_copyright.c b/libkern/kxld/kxld_copyright.c new file mode 100644 index 000000000..9b70348e8 --- /dev/null +++ b/libkern/kxld/kxld_copyright.c @@ -0,0 +1,289 @@ +#include +#include +#include + +#if !KERNEL + #include + #include + #include "kxld.h" + #include "kxld_types.h" +#else + #include + #include + #include +#endif /* KERNEL */ + +#include "kxld_util.h" + +/****************************************************************************** +* Macros +******************************************************************************/ + +#define kCopyrightToken "Copyright © " +#define kRightsToken " Apple Inc. All rights reserved." + +/****************************************************************************** +* Globals +******************************************************************************/ + +#if TEST + +#include + +CFStringRef passes[] = { + CFSTR("Copyright © 2008 Apple Inc. All rights reserved."), + CFSTR("Copyright © 2004-2008 Apple Inc. All rights reserved."), + CFSTR("Copyright © 2004,2006 Apple Inc. All rights reserved."), + CFSTR("Copyright © 2004,2006-2008 Apple Inc. All rights reserved."), + CFSTR("Copyright © 2004 , 2006-2008 Apple Inc. All rights reserved."), + CFSTR("Copyright © 1998,2000-2002,2004,2006-2008 Apple Inc. All rights reserved."), + CFSTR("IOPCIFamily 2.1; Copyright © 2004,2006-2008 Apple Inc. All rights reserved."), + CFSTR("Copyright © 2004,2006-2008 Apple Inc. All rights reserved. The quick brown fox jumped over the lazy dog."), + CFSTR("IOPCIFamily 2.1; Copyright © 2004,2006-2008 Apple Inc. All rights reserved. The quick brown fox jumped over the lazy dog.") +}; + +CFStringRef fails[] = { + CFSTR("Copyright © 2007-08 Apple Inc. All rights reserved."), + CFSTR("Copyright (c) 2007 Apple Inc. All rights reserved."), + CFSTR("Copyright © 2007- Apple Inc. All rights reserved."), + CFSTR("Copyright © 2007 - 2008 Apple Inc. All rights reserved.") +}; + +extern char *createUTF8CStringForCFString(CFStringRef aString); + +#endif /* TEST */ + +/****************************************************************************** +* Prototypes +******************************************************************************/ + +static boolean_t is_space(const char c) + __attribute__((const)); +static boolean_t is_token_delimiter(const char c) + __attribute__((const)); +static boolean_t is_token_break(const char *str) + __attribute__((pure, nonnull)); +static boolean_t token_is_year(const char *str) + __attribute__((pure, nonnull)); +static boolean_t token_is_yearRange(const char *str) + __attribute__((pure, nonnull)); +static boolean_t dates_are_valid(const char *str, const u_long len) + __attribute__((pure, nonnull)); + +/****************************************************************************** +******************************************************************************/ +static boolean_t +is_space(const char c) +{ + switch (c) { + case ' ': + case '\t': + case '\n': + case '\v': + case '\f': + case '\r': + return TRUE; + } + + return FALSE; +} + +/****************************************************************************** +******************************************************************************/ +static boolean_t +is_token_delimiter(const char c) +{ + return (is_space(c) || (',' == c) || ('\0' == c)); +} + +/****************************************************************************** +* A token break is defined to be the boundary where the current character is +* not a token delimiter and the next character is a token delimiter. +******************************************************************************/ +static boolean_t +is_token_break(const char *str) +{ + /* This is safe because '\0' is a token delimiter, so the second check + * will not execute if we reach the end of the string. + */ + return (!is_token_delimiter(str[0]) && is_token_delimiter(str[1])); +} + +/****************************************************************************** +* A year is defined by the following regular expression: +* /[0-9]{4}$/ +******************************************************************************/ +#define kYearLen 5 +static boolean_t +token_is_year(const char *str) +{ + boolean_t result = FALSE; + u_int i = 0; + + for (i = 0; i < kYearLen - 1; ++i) { + if (str[i] < '0' || str[i] > '9') goto finish; + } + + if (str[i] != '\0') goto finish; + + result = TRUE; +finish: + return result; +} + +/****************************************************************************** +* A year range is defined by the following regular expression: +* /[0-9]{4}[-][0-9]{4}$/ +******************************************************************************/ +#define kYearRangeLen 10 +static boolean_t +token_is_yearRange(const char *str) +{ + boolean_t result = FALSE; + u_int i = 0; + + for (i = 0; i < kYearLen - 1; ++i) { + if (str[i] < '0' || str[i] > '9') goto finish; + } + + if (str[i] != '-') goto finish; + + for (i = kYearLen; i < kYearRangeLen - 1; ++i) { + if (str[i] < '0' || str[i] > '9') goto finish; + } + + if (str[i] != '\0') goto finish; + + result = TRUE; +finish: + return result; +} + +/****************************************************************************** +* The dates_are_valid function takes as input a comma-delimited list of years +* and year ranges, and returns TRUE if all years and year ranges are valid +* and well-formed. +******************************************************************************/ +static boolean_t +dates_are_valid(const char *str, const u_long len) +{ + boolean_t result = FALSE; + const char *token_ptr = NULL; + char token_buffer[kYearRangeLen]; + u_int token_index = 0; + + token_index = 0; + for (token_ptr = str; token_ptr < str + len; ++token_ptr) { + if (is_token_delimiter(*token_ptr) && !token_index) continue; + + /* If we exceed the length of a year range, the test will not succeed, + * so just fail now. This limits the length of the token buffer that + * we have to keep around. + */ + if (token_index == kYearRangeLen) goto finish; + + token_buffer[token_index++] = *token_ptr; + if (is_token_break(token_ptr)) { + if (!token_index) continue; + + token_buffer[token_index++] = '\0'; + + if (!token_is_year(token_buffer) && + !token_is_yearRange(token_buffer)) + { + goto finish; + } + + token_index = 0; + } + } + + result = TRUE; +finish: + return result; +} + +/****************************************************************************** +* The copyright string is composed of three parts: +* 1) A copyright notice, "Copyright ©" +* 2) One or more years or year ranges, e.g., "2004,2006-2008" +* 3) A rights reserved notice, "Apple Inc. All Rights Reserved." +* We check the validity of the string by searching for both the copyright + +* notice and the rights reserved notice. If both are found, we then check that +* the text between the two notices contains only valid years and year ranges. +******************************************************************************/ +boolean_t +kxld_validate_copyright_string(const char *str) +{ + boolean_t result = FALSE; + const char *copyright = NULL; + const char *rights = NULL; + char *date_str = NULL; + u_long len = 0; + + copyright = kxld_strstr(str, kCopyrightToken); + rights = kxld_strstr(str, kRightsToken); + + if (!copyright || !rights || copyright > rights) goto finish; + + str = copyright + const_strlen(kCopyrightToken); + + len = rights - str; + date_str = kxld_alloc(len); + if (!date_str) goto finish; + + strncpy(date_str, str, len); + date_str[len] = '\0'; + + if (!dates_are_valid(date_str, len)) goto finish; + + result = TRUE; +finish: + if (date_str) kxld_free(date_str, len); + return result; +} + +#if TEST + +/****************************************************************************** +******************************************************************************/ +int +main(int argc __unused, char *argv[] __unused) +{ + int result = 1; + CFStringRef the_string = NULL; + const char *str = NULL; + u_int i = 0; + + printf("The following %lu strings should pass\n", + const_array_len(passes)); + + for (i = 0; i < const_array_len(passes); ++i) { + the_string = passes[i]; + str = createUTF8CStringForCFString(the_string); + if (!str) goto finish; + + printf("%s: %s\n", + (kxld_validate_copyright_string(str)) ? "pass" : "fail", str); + } + + printf("\nThe following %lu strings should fail\n", + const_array_len(fails)); + + for (i = 0; i < const_array_len(fails); ++i) { + the_string = fails[i]; + str = createUTF8CStringForCFString(the_string); + if (!str) goto finish; + + printf("%s: %s\n", + (kxld_validate_copyright_string(str)) ? "pass" : "fail", str); + } + + result = 0; + +finish: + return result; +} +#endif /* TEST */ + diff --git a/libkern/kxld/kxld_dict.c b/libkern/kxld/kxld_dict.c new file mode 100644 index 000000000..95588a523 --- /dev/null +++ b/libkern/kxld/kxld_dict.c @@ -0,0 +1,471 @@ +/* + * Copyright (c) 2007-2008 Apple Inc. All rights reserved. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. The rights granted to you under the License + * may not be used to create, or enable the creation or redistribution of, + * unlawful or unlicensed copies of an Apple operating system, or to + * circumvent, violate, or enable the circumvention or violation of, any + * terms of an Apple operating system software license agreement. + * + * Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ + */ +#include +#include + +#define DEBUG_ASSERT_COMPONENT_NAME_STRING "kxld" +#include + +#include "kxld_dict.h" +#include "kxld_util.h" + +/******************************************************************************* +* Types and macros +*******************************************************************************/ + +/* Ratio of num_entries:num_buckets that will cause a resize */ +#define RESIZE_NUMER 7 +#define RESIZE_DENOM 10 +#define RESIZE_THRESHOLD(x) (((x)*RESIZE_NUMER) / RESIZE_DENOM) +#define MIN_BUCKETS(x) (((x)*RESIZE_DENOM) / RESIZE_NUMER) + +/* Selected for good scaling qualities when resizing dictionary + * ... see: http://www.concentric.net/~ttwang/tech/hashsize.htm + */ +#define DEFAULT_DICT_SIZE 89 + +typedef struct dict_entry DictEntry; + +typedef enum { + EMPTY = 0, + USED = 1, + DELETED = 2 +} DictEntryState; + +struct dict_entry { + const void *key; + void *value; + DictEntryState state; +}; + +/******************************************************************************* +* Function prototypes +*******************************************************************************/ + +static kern_return_t get_locate_index(const KXLDDict *dict, const void *key, + u_int *idx); +static kern_return_t get_insert_index(const KXLDDict *dict, const void *key, + u_int *idx); +static kern_return_t resize_dict(KXLDDict *dict); + +/******************************************************************************* +*******************************************************************************/ +kern_return_t +kxld_dict_init(KXLDDict * dict, kxld_dict_hash hash, kxld_dict_cmp cmp, + u_int num_entries) +{ + kern_return_t rval = KERN_FAILURE; + u_int min_buckets = MIN_BUCKETS(num_entries); + u_int num_buckets = DEFAULT_DICT_SIZE; + + check(dict); + check(hash); + check(cmp); + + /* We want the number of allocated buckets to be at least twice that of the + * number to be inserted. + */ + while (min_buckets > num_buckets) { + num_buckets *= 2; + num_buckets++; + } + + /* Allocate enough buckets for the anticipated number of entries */ + rval = kxld_array_init(&dict->buckets, sizeof(DictEntry), num_buckets); + require_noerr(rval, finish); + + /* Initialize */ + dict->hash = hash; + dict->cmp = cmp; + dict->num_entries = 0; + dict->resize_threshold = RESIZE_THRESHOLD(num_buckets); + + rval = KERN_SUCCESS; + +finish: + return rval; +} + +/******************************************************************************* +*******************************************************************************/ +void +kxld_dict_clear(KXLDDict *dict) +{ + check(dict); + + dict->hash = NULL; + dict->cmp = NULL; + dict->num_entries = 0; + dict->resize_threshold = 0; + kxld_array_clear(&dict->buckets); + kxld_array_clear(&dict->resize_buckets); +} + +/******************************************************************************* +*******************************************************************************/ +void +kxld_dict_iterator_init(KXLDDictIterator *iter, const KXLDDict *dict) +{ + check(iter); + check(dict); + + iter->idx = 0; + iter->dict = dict; +} + +/******************************************************************************* +*******************************************************************************/ +void +kxld_dict_deinit(KXLDDict *dict) +{ + check(dict); + + kxld_array_deinit(&dict->buckets); + kxld_array_deinit(&dict->resize_buckets); +} + +/******************************************************************************* +*******************************************************************************/ +u_int +kxld_dict_get_num_entries(const KXLDDict *dict) +{ + check(dict); + + return dict->num_entries; +} + +/******************************************************************************* +*******************************************************************************/ +void * +kxld_dict_find(const KXLDDict *dict, const void *key) +{ + kern_return_t rval = KERN_FAILURE; + DictEntry *entry = NULL; + u_int idx = 0; + + check(dict); + check(key); + + rval = get_locate_index(dict, key, &idx); + if (rval) return NULL; + + entry = kxld_array_get_item(&dict->buckets, idx); + + return entry->value; +} + +/******************************************************************************* +* This dictionary uses linear probing, which means that when there is a +* collision, we just walk along the buckets until a free bucket shows up. +* A consequence of this is that when looking up an item, items that lie between +* its hash value and its actual bucket may have been deleted since it was +* inserted. Thus, we should only stop a lookup when we've wrapped around the +* dictionary or encountered an EMPTY bucket. +********************************************************************************/ +static kern_return_t +get_locate_index(const KXLDDict *dict, const void *key, u_int *_idx) +{ + kern_return_t rval = KERN_FAILURE; + DictEntry *entry = NULL; + u_int base, idx; + + base = idx = dict->hash(dict, key); + + /* Iterate until we match the key, wrap, or hit an empty bucket */ + entry = kxld_array_get_item(&dict->buckets, idx); + while (!dict->cmp(entry->key, key)) { + if (entry->state == EMPTY) goto finish; + + idx = (idx + 1) % dict->buckets.nitems; + if (idx == base) goto finish; + + entry = kxld_array_get_item(&dict->buckets, idx); + } + + check(idx < dict->buckets.nitems); + + *_idx = idx; + rval = KERN_SUCCESS; + +finish: + return rval; +} + +/******************************************************************************* +*******************************************************************************/ +kern_return_t +kxld_dict_insert(KXLDDict *dict, const void *key, void *value) +{ + kern_return_t rval = KERN_FAILURE; + DictEntry *entry = NULL; + u_int idx = 0; + + check(dict); + check(key); + check(value); + + /* Resize if we are greater than the capacity threshold. + * Note: this is expensive, but the dictionary can be sized correctly at + * construction to avoid ever having to do this. + */ + while (dict->num_entries > dict->resize_threshold) { + rval = resize_dict(dict); + require_noerr(rval, finish); + } + + /* If this function returns FULL after we've already resized appropriately + * something is very wrong and we should return an error. + */ + rval = get_insert_index(dict, key, &idx); + require_noerr(rval, finish); + + /* Insert the new key-value pair into the bucket, but only count it as a + * new entry if we are not overwriting an existing entry. + */ + entry = kxld_array_get_item(&dict->buckets, idx); + if (entry->state != USED) { + dict->num_entries++; + entry->key = key; + entry->state = USED; + } + entry->value = value; + + rval = KERN_SUCCESS; + +finish: + return rval; +} + +/******************************************************************************* +* Increases the hash table's capacity by 2N+1. Uses dictionary API. Not +* fast; just correct. +*******************************************************************************/ +static kern_return_t +resize_dict(KXLDDict *dict) +{ + kern_return_t rval = KERN_FAILURE; + KXLDArray tmparray; + DictEntry *entry = NULL; + u_int nbuckets = (dict->buckets.nitems * 2 + 1); + u_int i = 0; + + check(dict); + + /* Initialize a new set of buckets to hold more entries */ + rval = kxld_array_init(&dict->resize_buckets, sizeof(DictEntry), nbuckets); + require_noerr(rval, finish); + + /* Swap the new buckets with the old buckets */ + tmparray = dict->buckets; + dict->buckets = dict->resize_buckets; + dict->resize_buckets = tmparray; + + /* Reset dictionary parameters */ + dict->num_entries = 0; + dict->resize_threshold = RESIZE_THRESHOLD(dict->buckets.nitems); + + /* Rehash all of the entries */ + for (i = 0; i < dict->resize_buckets.nitems; ++i) { + entry = kxld_array_get_item(&dict->resize_buckets, i); + if (entry->state == USED) { + rval = kxld_dict_insert(dict, entry->key, entry->value); + require_noerr(rval, finish); + } + } + + /* Clear the old buckets */ + kxld_array_clear(&dict->resize_buckets); + + rval = KERN_SUCCESS; + +finish: + return rval; +} + +/******************************************************************************* +* Simple function to find the first empty cell +*******************************************************************************/ +static kern_return_t +get_insert_index(const KXLDDict *dict, const void *key, u_int *r_index) +{ + kern_return_t rval = KERN_FAILURE; + DictEntry *entry = NULL; + u_int base, idx; + + base = idx = dict->hash(dict, key); + + /* Iterate through the buckets until we find an EMPTY bucket, a DELETED + * bucket, or a key match. + */ + entry = kxld_array_get_item(&dict->buckets, idx); + while (entry->state == USED && !dict->cmp(entry->key, key)) { + idx = (idx + 1) % dict->buckets.nitems; + require_action(base != idx, finish, rval=KERN_FAILURE); + entry = kxld_array_get_item(&dict->buckets, idx); + } + + *r_index = idx; + rval = KERN_SUCCESS; + +finish: + return rval; +} + +/******************************************************************************* +*******************************************************************************/ +void +kxld_dict_remove(KXLDDict *dict, const void *key, void **value) +{ + kern_return_t rval = KERN_FAILURE; + DictEntry *entry = NULL; + u_int idx = 0; + + check(dict); + check(key); + + /* Find the item */ + rval = get_locate_index(dict, key, &idx); + if (rval) { + if (value) *value = NULL; + return; + } + + entry = kxld_array_get_item(&dict->buckets, idx); + + /* Save the value if requested */ + if (value) *value = entry->value; + + /* Delete the item from the dictionary */ + entry->key = NULL; + entry->value = NULL; + entry->state = DELETED; + dict->num_entries--; +} + +/******************************************************************************* +*******************************************************************************/ +void +kxld_dict_iterator_get_next(KXLDDictIterator *iter, const void **key, + void **value) +{ + DictEntry *entry = NULL; + + check(iter); + check(key); + check(value); + + *key = NULL; + *value = NULL; + + /* Walk over the dictionary looking for USED buckets */ + for (; iter->idx < iter->dict->buckets.nitems; ++(iter->idx)) { + entry = kxld_array_get_item(&iter->dict->buckets, iter->idx); + if (entry->state == USED) { + *key = entry->key; + *value = entry->value; + ++(iter->idx); + break; + } + } +} + +/******************************************************************************* +*******************************************************************************/ +void +kxld_dict_iterator_reset(KXLDDictIterator *iter) +{ + iter->idx = 0; +} + +/******************************************************************************* +* This is Daniel Bernstein's hash algorithm from comp.lang.c +* It's fast and distributes well. Returns an idx into the symbol hash table. +* NOTE: Will not check for a valid pointer - performance +*******************************************************************************/ +u_int +kxld_dict_string_hash(const KXLDDict *dict, const void *_key) +{ + const char *key = _key; + u_int c = 0; + u_int hash_val = 5381; + + check(dict); + check(_key); + + while ((c = *key++)) { + /* hash(i) = hash(i-1) *33 ^ name[i] */ + hash_val = ((hash_val << 5) + hash_val) ^ c; + } + + return (hash_val % dict->buckets.nitems); +} + +u_int +kxld_dict_uint32_hash(const KXLDDict *dict, const void *_key) +{ + uint32_t key = *(const uint32_t *) _key; + + check(_key); + + return (u_int) (key % dict->buckets.nitems); +} + +u_int +kxld_dict_kxldaddr_hash(const KXLDDict *dict, const void *_key) +{ + kxld_addr_t key = *(const kxld_addr_t *) _key; + + check(_key); + + return (u_int) (key % dict->buckets.nitems); +} + +u_int +kxld_dict_string_cmp(const void *key1, const void *key2) +{ + return streq(key1, key2); +} + +u_int +kxld_dict_uint32_cmp(const void *key1, const void *key2) +{ + const uint32_t *a = key1; + const uint32_t *b = key2; + + return (a && b && (*a == *b)); +} + +u_int +kxld_dict_kxldaddr_cmp(const void *key1, const void *key2) +{ + const kxld_addr_t *a = key1; + const kxld_addr_t *b = key2; + + return (a && b && (*a == *b)); +} + diff --git a/libkern/kxld/kxld_dict.h b/libkern/kxld/kxld_dict.h new file mode 100644 index 000000000..739adfe6a --- /dev/null +++ b/libkern/kxld/kxld_dict.h @@ -0,0 +1,157 @@ +/* + * Copyright (c) 2007-2008 Apple Inc. All rights reserved. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. The rights granted to you under the License + * may not be used to create, or enable the creation or redistribution of, + * unlawful or unlicensed copies of an Apple operating system, or to + * circumvent, violate, or enable the circumvention or violation of, any + * terms of an Apple operating system software license agreement. + * + * Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ + */ +#ifndef _KXLD_DICT_H_ +#define _KXLD_DICT_H_ + +#include +#if KERNEL + #include +#else + #include "kxld_types.h" +#endif + +#include "kxld_array.h" + +/******************************************************************************* +* This is a dictionary implementation for hash tables with c-string keys. It +* uses linear probing for collision resolution and supports hints for hash +* table size as well as automatic resizing. All possible sizes for it are +* prime or 'pseudoprime' - good choices for number of buckets. +* NOTE: NULL is NOT a valid key or value! +* +* The dictionary also provides a basic iterator interface. The iterator +* supports a basic walk through the dictionary in unsorted order. If the +* dictionary is changed in any way while an iterator is being used, the +* iterator's behavior is undefined. +*******************************************************************************/ + +struct kxld_dict; +typedef struct kxld_dict KXLDDict; +typedef struct kxld_dict_iterator KXLDDictIterator; + +typedef u_int (*kxld_dict_hash)(const KXLDDict *dict, const void *key); +typedef u_int (*kxld_dict_cmp)(const void *key1, const void *key2); + +struct kxld_dict { + KXLDArray buckets; // The array of buckets + KXLDArray resize_buckets; // A helper array for resizing + kxld_dict_hash hash; // Hash function + kxld_dict_cmp cmp; // Comparison function + u_int num_entries; // Num entries in the dictionary + u_int resize_threshold; // Num entries we must reach to cause a resize +}; + +struct kxld_dict_iterator { + u_int idx; + const KXLDDict *dict; +}; + +/******************************************************************************* +* Constructors and Destructors +*******************************************************************************/ + +/* Initializes a new dictionary object. + * num_entries is a hint to the maximum number of entries that will be inserted + */ +kern_return_t kxld_dict_init(KXLDDict *dict, kxld_dict_hash hash, + kxld_dict_cmp cmp, u_int num_entries) + __attribute__((nonnull, visibility("hidden"))); + +/* Initializes a new dictionary iterator */ +void kxld_dict_iterator_init(KXLDDictIterator *iter, const KXLDDict *dict) + __attribute__((nonnull, visibility("hidden"))); + +/* Removes all entries from the dictionary. The dictionary must be + * reinitialized before it can be used again. + */ +void kxld_dict_clear(KXLDDict *dict) + __attribute__((nonnull, visibility("hidden"))); + +/* Destroys a dictionary and all of its entries */ +void kxld_dict_deinit(KXLDDict *dict) + __attribute__((nonnull, visibility("hidden"))); + +/******************************************************************************* +* Accessors +*******************************************************************************/ + +/* Returns the number of entries in the dictionary */ +u_int kxld_dict_get_num_entries(const KXLDDict *dict) + __attribute__((pure, nonnull, visibility("hidden"))); + +/* Finds a key-value pair and assigns the value to the 'value' pointer, or NULL + * when not found. + */ +void * kxld_dict_find(const KXLDDict *dict, const void *key) + __attribute__((pure, nonnull, visibility("hidden"))); + +/******************************************************************************* +* Modifiers +*******************************************************************************/ + +/* Inserts a key-value pair, and will overwrite the value for a key if that key + * is already in the table. + */ +kern_return_t kxld_dict_insert(KXLDDict *dict, const void *key, void *value) + __attribute__((nonnull, visibility("hidden"))); + +/* Removes a key-value pair and assigns the value to the 'value' pointer. + * 'value' pointer will be set to NULL if value to be removed is not found. + * 'value pointer may be NULL if removed value is not needed. + */ +void kxld_dict_remove(KXLDDict *dict, const void *key, void **value) + __attribute__((nonnull(1,2),visibility("hidden"))); + +/* Gets the next item in the dictionary */ +void kxld_dict_iterator_get_next(KXLDDictIterator *iter, const void **key, + void **value) + __attribute__((nonnull, visibility("hidden"))); + +/* Resets the iterator to the first item in the dictionary */ +void kxld_dict_iterator_reset(KXLDDictIterator *iter) + __attribute__((nonnull, visibility("hidden"))); + +/******************************************************************************* +* Helpers +*******************************************************************************/ + +u_int kxld_dict_string_hash(const KXLDDict *dict, const void *key) + __attribute__((pure, nonnull, visibility("hidden"))); +u_int kxld_dict_uint32_hash(const KXLDDict *dict, const void *key) + __attribute__((pure, nonnull, visibility("hidden"))); +u_int kxld_dict_kxldaddr_hash(const KXLDDict *dict, const void *key) + __attribute__((pure, nonnull, visibility("hidden"))); + +u_int kxld_dict_string_cmp(const void *key1, const void *key2) + __attribute__((pure, visibility("hidden"))); +u_int kxld_dict_uint32_cmp(const void *key1, const void *key2) + __attribute__((pure, visibility("hidden"))); +u_int kxld_dict_kxldaddr_cmp(const void *key1, const void *key2) + __attribute__((pure, visibility("hidden"))); + +#endif /* _KXLD_DICT_H_ */ diff --git a/libkern/kxld/kxld_kext.c b/libkern/kxld/kxld_kext.c new file mode 100644 index 000000000..7b5623003 --- /dev/null +++ b/libkern/kxld/kxld_kext.c @@ -0,0 +1,2868 @@ +/* + * Copyright (c) 2008 Apple Inc. All rights reserved. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. The rights granted to you under the License + * may not be used to create, or enable the creation or redistribution of, + * unlawful or unlicensed copies of an Apple operating system, or to + * circumvent, violate, or enable the circumvention or violation of, any + * terms of an Apple operating system software license agreement. + * + * Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#if KERNEL + #include + #include + #include + #include + #include +#else /* !KERNEL */ + #include + #include + #include + #include +#endif /* KERNEL */ + +#define DEBUG_ASSERT_COMPONENT_NAME_STRING "kxld" +#include + +#include "kxld_dict.h" +#include "kxld_kext.h" +#include "kxld_reloc.h" +#include "kxld_sect.h" +#include "kxld_seg.h" +#include "kxld_state.h" +#include "kxld_symtab.h" +#include "kxld_util.h" +#include "kxld_uuid.h" +#include "kxld_vtable.h" + +struct symtab_command; + +enum kxld_link_type { + KXLD_LINK_KERNEL, + KXLD_LINK_PSEUDO_KEXT, + KXLD_LINK_KEXT, + KXLD_LINK_UNKNOWN +}; + +typedef enum kxld_link_type KXLDLinkType; + +struct kxld_kext { + u_char *file; + u_long size; + const char *name; + uint32_t filetype; + KXLDArray segs; + KXLDArray sects; + KXLDArray vtables; + KXLDArray extrelocs; + KXLDArray locrelocs; + KXLDDict vtable_index; + KXLDRelocator relocator; + KXLDuuid uuid; + KXLDSymtab *symtab; + kxld_addr_t link_addr; + kmod_info_t *kmod_info; + kxld_addr_t kmod_link_addr; + cpu_type_t cputype; + cpu_subtype_t cpusubtype; + KXLDLinkType link_type; + KXLDFlags flags; + boolean_t is_final_image; + boolean_t got_is_created; + struct dysymtab_command *dysymtab_hdr; +#if KXLD_USER_OR_OBJECT + KXLDArray *section_order; +#endif +#if !KERNEL + enum NXByteOrder host_order; + enum NXByteOrder target_order; +#endif +}; + +/******************************************************************************* +* Prototypes +*******************************************************************************/ + +static kern_return_t get_target_machine_info(KXLDKext *kext, cpu_type_t cputype, + cpu_subtype_t cpusubtype); +static kern_return_t get_file_for_arch(KXLDKext *kext, u_char *file, u_long size); + +static u_long get_macho_header_size(const KXLDKext *kext); +static u_long get_macho_data_size(const KXLDKext *kext); +static kern_return_t export_macho_header(const KXLDKext *kext, u_char *buf, + u_int ncmds, u_long *header_offset, u_long header_size); + +static kern_return_t init_from_execute(KXLDKext *kext); +static kern_return_t init_from_final_linked_image(KXLDKext *kext, u_int *filetype_out, + struct symtab_command **symtab_hdr_out); + +static boolean_t target_supports_protected_segments(const KXLDKext *kext) + __attribute__((pure)); + +#if KXLD_USER_OR_OBJECT +static boolean_t target_supports_object(const KXLDKext *kext) __attribute((pure)); +static kern_return_t init_from_object(KXLDKext *kext); +static kern_return_t process_relocs_from_sections(KXLDKext *kext); +#endif /* KXLD_USER_OR_OBJECT */ + +#if KXLD_USER_OR_BUNDLE +static boolean_t target_supports_bundle(const KXLDKext *kext) __attribute((pure)); +static kern_return_t init_from_bundle(KXLDKext *kext); +static kern_return_t process_relocs_from_tables(KXLDKext *kext); +static kern_return_t process_symbol_pointers(KXLDKext *kext); +static void add_to_ptr(u_char *symptr, kxld_addr_t val, boolean_t is_32_bit); +#endif /* KXLD_USER_OR_BUNDLE */ + +static kern_return_t get_metaclass_symbol_from_super_meta_class_pointer_symbol( + KXLDKext *kext, KXLDSym *super_metaclass_pointer_sym, KXLDSym **meta_class); + +static kern_return_t resolve_symbols(KXLDKext *kext, KXLDDict *defined_symbols, + KXLDDict *obsolete_symbols); +static kern_return_t patch_vtables(KXLDKext *kext, KXLDDict *patched_vtables, + KXLDDict *defined_symbols); +static kern_return_t validate_symbols(KXLDKext *kext); +static kern_return_t populate_kmod_info(KXLDKext *kext); +static kern_return_t copy_vtables(KXLDKext *kext, const KXLDDict *patched_vtables); +static kern_return_t create_vtables(KXLDKext *kext); +static void restrict_private_symbols(KXLDKext *kext); + +#if KXLD_USER_OR_GOT || KXLD_USER_OR_COMMON +static kern_return_t add_section(KXLDKext *kext, KXLDSect **sect); +#endif /* KXLD_USER_OR_GOT || KXLD_USER_OR_COMMON */ + +#if KXLD_USER_OR_GOT +static boolean_t target_has_got(const KXLDKext *kext) __attribute__((pure)); +static kern_return_t create_got(KXLDKext *kext); +static kern_return_t populate_got(KXLDKext *kext); +#endif /* KXLD_USER_OR_GOT */ + +static boolean_t target_supports_common(const KXLDKext *kext) __attribute((pure)); +#if KXLD_USER_OR_COMMON +static kern_return_t resolve_common_symbols(KXLDKext *kext); +#endif /* KXLD_USER_OR_COMMON */ + +static boolean_t target_supports_strict_patching(KXLDKext *kext) + __attribute__((pure)); + +#if KXLD_USER_OR_ILP32 +static u_long get_macho_cmd_data_32(u_char *file, u_long offset, + u_int *filetype, u_int *ncmds); +static kern_return_t export_macho_header_32(const KXLDKext *kext, u_char *buf, + u_int ncmds, u_long *header_offset, u_long header_size); +#endif /* KXLD_USER_OR_ILP32 */ +#if KXLD_USER_OR_LP64 +static u_long get_macho_cmd_data_64(u_char *file, u_long offset, + u_int *filetype, u_int *ncmds); +static kern_return_t export_macho_header_64(const KXLDKext *kext, u_char *buf, + u_int ncmds, u_long *header_offset, u_long header_size); +#endif /* KXLD_USER_OR_LP64 */ + +/******************************************************************************* +*******************************************************************************/ +size_t +kxld_kext_sizeof(void) +{ + return sizeof(KXLDKext); +} + +/******************************************************************************* +*******************************************************************************/ +kern_return_t +kxld_kext_init(KXLDKext *kext, u_char *file, u_long size, + const char *name, KXLDFlags flags, boolean_t is_kernel, + KXLDArray *section_order __unused, + cpu_type_t cputype, cpu_subtype_t cpusubtype) +{ + kern_return_t rval = KERN_FAILURE; + KXLDSeg *seg = NULL; + u_int i = 0; + + check(kext); + check(file); + check(size); + + kext->name = name; + kext->flags = flags; +#if KXLD_USER_OR_OBJECT + kext->section_order = section_order; +#endif + + /* Find the local architecture */ + + rval = get_target_machine_info(kext, cputype, cpusubtype); + require_noerr(rval, finish); + + /* Find the Mach-O file for the target architecture */ + + rval = get_file_for_arch(kext, file, size); + require_noerr(rval, finish); + + /* Build the relocator */ + + rval = kxld_relocator_init(&kext->relocator, kext->cputype, + kext->cpusubtype, kxld_kext_target_needs_swap(kext)); + require_noerr(rval, finish); + + /* Allocate the symbol table */ + + if (!kext->symtab) { + kext->symtab = kxld_alloc(kxld_symtab_sizeof()); + require_action(kext->symtab, finish, rval=KERN_RESOURCE_SHORTAGE); + bzero(kext->symtab, kxld_symtab_sizeof()); + } + + if (is_kernel) { + kext->link_type = KXLD_LINK_KERNEL; + } else { + kext->link_type = KXLD_LINK_UNKNOWN; + } + + /* There are four types of Mach-O files that we can support: + * 1) 32-bit MH_OBJECT - All pre-SnowLeopard systems + * 2) 32-bit MH_KEXT_BUNDLE - Not supported + * 3) 64-bit MH_OBJECT - Needed for K64 bringup + * 4) 64-bit MH_KEXT_BUNDLE - The likely 64-bit kext filetype + */ + + if (kxld_kext_is_32_bit(kext)) { + struct mach_header *mach_hdr = (struct mach_header *) kext->file; + kext->filetype = mach_hdr->filetype; + } else { + struct mach_header_64 *mach_hdr = (struct mach_header_64 *) kext->file; + kext->filetype = mach_hdr->filetype; + } + + switch (kext->filetype) { +#if KXLD_USER_OR_OBJECT + case MH_OBJECT: + rval = init_from_object(kext); + require_noerr(rval, finish); + break; +#endif /* KXLD_USER_OR_OBJECT */ +#if KXLD_USER_OR_BUNDLE + case MH_KEXT_BUNDLE: + rval = init_from_bundle(kext); + require_noerr(rval, finish); + break; +#endif /* KXLD_USER_OR_BUNDLE */ + case MH_EXECUTE: + rval = init_from_execute(kext); + require_noerr(rval, finish); + break; + default: + rval = KERN_FAILURE; + kxld_log(kKxldLogLinking, kKxldLogErr, + kKxldLogFiletypeNotSupported, kext->filetype); + goto finish; + } + + for (i = 0; i < kext->segs.nitems; ++i) { + seg = kxld_array_get_item(&kext->segs, i); + kxld_seg_set_vm_protections(seg, target_supports_protected_segments(kext)); + } + + switch (kext->link_type) { + case KXLD_LINK_KEXT: + (void) restrict_private_symbols(kext); + /* Fallthrough */ + case KXLD_LINK_KERNEL: + rval = create_vtables(kext); + require_noerr(rval, finish); + break; + default: + break; + } + + rval = KERN_SUCCESS; +finish: + return rval; +} + +/******************************************************************************* +*******************************************************************************/ +kern_return_t +get_target_machine_info(KXLDKext *kext, cpu_type_t cputype __unused, + cpu_subtype_t cpusubtype __unused) +{ +#if KERNEL + + /* Because the kernel can only link for its own architecture, we know what + * the host and target architectures are at compile time, so we can use + * a vastly simplified version of this function. + */ + + check(kext); + +#if defined(__i386__) + kext->cputype = CPU_TYPE_I386; + kext->cpusubtype = CPU_SUBTYPE_I386_ALL; + return KERN_SUCCESS; +#elif defined(__ppc__) + kext->cputype = CPU_TYPE_POWERPC; + kext->cpusubtype = CPU_SUBTYPE_POWERPC_ALL; + return KERN_SUCCESS; +#elif defined(__x86_64__) + kext->cputype = CPU_TYPE_X86_64; + kext->cpusubtype = CPU_SUBTYPE_X86_64_ALL; + return KERN_SUCCESS; +#else + kxld_log(kKxldLogLinking, kKxldLogErr, + kKxldLogArchNotSupported, _mh_execute_header->cputype); + return KERN_NOT_SUPPORTED; +#endif /* Supported architecture defines */ + + +#else /* !KERNEL */ + + /* User-space must look up the architecture it's running on and the target + * architecture at run-time. + */ + + kern_return_t rval = KERN_FAILURE; + const NXArchInfo *host_arch = NULL; + + check(kext); + + host_arch = NXGetLocalArchInfo(); + require_action(host_arch, finish, rval=KERN_FAILURE); + + kext->host_order = host_arch->byteorder; + + /* If the user did not specify a cputype, use the local architecture. + */ + + if (cputype) { + kext->cputype = cputype; + kext->cpusubtype = cpusubtype; + } else { + kext->cputype = host_arch->cputype; + kext->target_order = kext->host_order; + + switch (kext->cputype) { + case CPU_TYPE_I386: + kext->cpusubtype = CPU_SUBTYPE_I386_ALL; + break; + case CPU_TYPE_POWERPC: + kext->cpusubtype = CPU_SUBTYPE_POWERPC_ALL; + break; + case CPU_TYPE_X86_64: + kext->cpusubtype = CPU_SUBTYPE_X86_64_ALL; + break; + case CPU_TYPE_ARM: + kext->cpusubtype = CPU_SUBTYPE_ARM_ALL; + break; + default: + kext->cpusubtype = 0; + } + } + + /* Validate that we support the target architecture and record its + * endianness. + */ + + switch(kext->cputype) { + case CPU_TYPE_ARM: + case CPU_TYPE_I386: + case CPU_TYPE_X86_64: + kext->target_order = NX_LittleEndian; + break; + case CPU_TYPE_POWERPC: + kext->target_order = NX_BigEndian; + break; + default: + rval = KERN_NOT_SUPPORTED; + kxld_log(kKxldLogLinking, kKxldLogErr, + kKxldLogArchNotSupported, kext->cputype); + goto finish; + } + + rval = KERN_SUCCESS; + +finish: + return rval; +#endif /* KERNEL */ +} + +/******************************************************************************* +*******************************************************************************/ +static kern_return_t +get_file_for_arch(KXLDKext *kext, u_char *file, u_long size) +{ + kern_return_t rval = KERN_FAILURE; + struct mach_header *mach_hdr = NULL; +#if !KERNEL + struct fat_header *fat = (struct fat_header *) file; + struct fat_arch *archs = (struct fat_arch *) &fat[1]; + boolean_t swap = FALSE; +#endif /* KERNEL */ + + check(kext); + check(file); + check(size); + + kext->file = file; + kext->size = size; + + /* We are assuming that we will never receive a fat file in the kernel */ + +#if !KERNEL + require_action(size >= sizeof(*fat), finish, + rval=KERN_FAILURE; + kxld_log(kKxldLogLinking, kKxldLogErr, kKxldLogTruncatedMachO)); + + /* The fat header is always big endian, so swap if necessary */ + if (fat->magic == FAT_CIGAM) { + (void) swap_fat_header(fat, kext->host_order); + swap = TRUE; + } + + if (fat->magic == FAT_MAGIC) { + struct fat_arch *arch = NULL; + + require_action(size >= (sizeof(*fat) + (fat->nfat_arch * sizeof(*archs))), + finish, + rval=KERN_FAILURE; + kxld_log(kKxldLogLinking, kKxldLogErr, kKxldLogTruncatedMachO)); + + /* Swap the fat_arch structures if necessary */ + if (swap) { + (void) swap_fat_arch(archs, fat->nfat_arch, kext->host_order); + } + + /* Locate the Mach-O for the requested architecture */ + + arch = NXFindBestFatArch(kext->cputype, kext->cpusubtype, archs, + fat->nfat_arch); + require_action(arch, finish, rval=KERN_FAILURE; + kxld_log(kKxldLogLinking, kKxldLogErr, kKxldLogArchNotFound)); + require_action(size >= arch->offset + arch->size, finish, + rval=KERN_FAILURE; + kxld_log(kKxldLogLinking, kKxldLogErr, kKxldLogTruncatedMachO)); + + kext->file = file + arch->offset; + kext->size = arch->size; + } +#endif /* !KERNEL */ + + /* Swap the Mach-O's headers to this architecture if necessary */ + if (kxld_kext_is_32_bit(kext)) { + rval = validate_and_swap_macho_32(kext->file, kext->size +#if !KERNEL + , kext->host_order +#endif /* !KERNEL */ + ); + } else { + rval = validate_and_swap_macho_64(kext->file, kext->size +#if !KERNEL + , kext->host_order +#endif /* !KERNEL */ + ); + } + require_noerr(rval, finish); + + mach_hdr = (struct mach_header *) kext->file; + require_action(kext->cputype == mach_hdr->cputype, finish, + rval=KERN_FAILURE; + kxld_log(kKxldLogLinking, kKxldLogErr, kKxldLogTruncatedMachO)); + + rval = KERN_SUCCESS; +finish: + return rval; +} + +/******************************************************************************* +*******************************************************************************/ +boolean_t +kxld_kext_is_32_bit(const KXLDKext *kext) +{ + check(kext); + + return kxld_is_32_bit(kext->cputype); +} + +/******************************************************************************* +*******************************************************************************/ +void +kxld_kext_get_cputype(const KXLDKext *kext, cpu_type_t *cputype, + cpu_subtype_t *cpusubtype) +{ + check(kext); + check(cputype); + check(cpusubtype); + + *cputype = kext->cputype; + *cpusubtype = kext->cpusubtype; +} + +/******************************************************************************* +*******************************************************************************/ +kern_return_t +kxld_kext_validate_cputype(const KXLDKext *kext, cpu_type_t cputype, + cpu_subtype_t cpusubtype __unused) +{ + if (kext->cputype != cputype) return KERN_FAILURE; + return KERN_SUCCESS; +} + +/******************************************************************************* +*******************************************************************************/ +static boolean_t +target_supports_protected_segments(const KXLDKext *kext) +{ + return (kext->is_final_image && + kext->cputype == CPU_TYPE_X86_64); +} + +#if KXLD_USER_OR_OBJECT +/******************************************************************************* +*******************************************************************************/ +static boolean_t target_supports_object(const KXLDKext *kext) +{ + return (kext->cputype == CPU_TYPE_POWERPC || + kext->cputype == CPU_TYPE_I386 || + kext->cputype == CPU_TYPE_ARM); +} + +/******************************************************************************* +*******************************************************************************/ +static kern_return_t +init_from_object(KXLDKext *kext) +{ + kern_return_t rval = KERN_FAILURE; + struct load_command *cmd_hdr = NULL; + struct symtab_command *symtab_hdr = NULL; + struct uuid_command *uuid_hdr = NULL; + KXLDSect *sect = NULL; + u_long offset = 0; + u_long sect_offset = 0; + u_int filetype = 0; + u_int ncmds = 0; + u_int nsects = 0; + u_int i = 0; + boolean_t has_segment = FALSE; + + check(kext); + + require_action(target_supports_object(kext), + finish, rval=KERN_FAILURE; + kxld_log(kKxldLogLinking, kKxldLogErr, + kKxldLogFiletypeNotSupported, MH_OBJECT)); + + KXLD_3264_FUNC(kxld_kext_is_32_bit(kext), offset, + get_macho_cmd_data_32, get_macho_cmd_data_64, + kext->file, offset, &filetype, &ncmds); + + require_action(filetype == MH_OBJECT, finish, rval=KERN_FAILURE); + + /* MH_OBJECTs use one unnamed segment to contain all of the sections. We + * loop over all of the load commands to initialize the structures we + * expect. Then, we'll use the unnamed segment to get to all of the + * sections, and then use those sections to create the actual segments. + */ + + for (; i < ncmds; ++i, offset += cmd_hdr->cmdsize) { + cmd_hdr = (struct load_command *) (kext->file + offset); + + switch(cmd_hdr->cmd) { +#if KXLD_USER_OR_ILP32 + case LC_SEGMENT: + { + struct segment_command *seg_hdr = + (struct segment_command *) cmd_hdr; + + /* Ignore segments with no vm size */ + if (!seg_hdr->vmsize) continue; + + require_action(kxld_kext_is_32_bit(kext), finish, rval=KERN_FAILURE; + kxld_log(kKxldLogLinking, kKxldLogErr, kKxldLogMalformedMachO + "LC_SEGMENT in 64-bit kext.")); + require_action(!has_segment, finish, rval=KERN_FAILURE; + kxld_log(kKxldLogLinking, kKxldLogErr, kKxldLogMalformedMachO + "Multiple segments in an MH_OBJECT kext.")); + + nsects = seg_hdr->nsects; + sect_offset = offset + sizeof(*seg_hdr); + has_segment = TRUE; + } + break; +#endif /* KXLD_USER_OR_ILP32 */ +#if KXLD_USER_OR_LP64 + case LC_SEGMENT_64: + { + struct segment_command_64 *seg_hdr = + (struct segment_command_64 *) cmd_hdr; + + /* Ignore segments with no vm size */ + if (!seg_hdr->vmsize) continue; + + require_action(!kxld_kext_is_32_bit(kext), finish, rval=KERN_FAILURE; + kxld_log(kKxldLogLinking, kKxldLogErr, kKxldLogMalformedMachO + "LC_SEGMENT_64 in a 32-bit kext.")); + require_action(!has_segment, finish, rval=KERN_FAILURE; + kxld_log(kKxldLogLinking, kKxldLogErr, kKxldLogMalformedMachO + "Multiple segments in an MH_OBJECT kext.")); + + nsects = seg_hdr->nsects; + sect_offset = offset + sizeof(*seg_hdr); + has_segment = TRUE; + } + break; +#endif /* KXLD_USER_OR_LP64 */ + case LC_SYMTAB: + symtab_hdr = (struct symtab_command *) cmd_hdr; + + KXLD_3264_FUNC(kxld_kext_is_32_bit(kext), rval, + kxld_symtab_init_from_macho_32, kxld_symtab_init_from_macho_64, + kext->symtab, kext->file, symtab_hdr, 0); + require_noerr(rval, finish); + break; + case LC_UUID: + uuid_hdr = (struct uuid_command *) cmd_hdr; + kxld_uuid_init_from_macho(&kext->uuid, uuid_hdr); + break; + case LC_UNIXTHREAD: + /* Don't need to do anything with UNIXTHREAD */ + break; + default: + rval = KERN_FAILURE; + kxld_log(kKxldLogLinking, kKxldLogErr, kKxldLogMalformedMachO + "Invalid segment type in MH_OBJECT kext: %u.", cmd_hdr->cmd); + goto finish; + } + } + + if (has_segment) { + + /* Get the number of sections from the segment and build the section index */ + + rval = kxld_array_init(&kext->sects, sizeof(KXLDSect), nsects); + require_noerr(rval, finish); + + /* Loop over all of the sections to initialize the section index */ + + for (i = 0; i < nsects; ++i) { + sect = kxld_array_get_item(&kext->sects, i); + KXLD_3264_FUNC(kxld_kext_is_32_bit(kext), rval, + kxld_sect_init_from_macho_32, kxld_sect_init_from_macho_64, + sect, kext->file, §_offset, i, &kext->relocator); + require_noerr(rval, finish); + } + + /* Create special sections */ + +#if KXLD_USER_OR_GOT + rval = create_got(kext); + require_noerr(rval, finish); +#endif /* KXLD_USER_OR_GOT */ + +#if KXLD_USER_OR_COMMON + rval = resolve_common_symbols(kext); + require_noerr(rval, finish); +#endif /* KXLD_USER_OR_COMMON */ + + /* Create the segments from the section index */ + + rval = kxld_seg_create_seg_from_sections(&kext->segs, &kext->sects); + require_noerr(rval, finish); + + rval = kxld_seg_finalize_object_segment(&kext->segs, + kext->section_order, get_macho_header_size(kext)); + require_noerr(rval, finish); + + kext->link_type = KXLD_LINK_KEXT; + } else { + kext->link_type = KXLD_LINK_PSEUDO_KEXT; + } + + rval = KERN_SUCCESS; +finish: + return rval; +} +#endif /* KXLD_USER_OR_OBJECT */ + +/******************************************************************************* +*******************************************************************************/ +static kern_return_t +init_from_final_linked_image(KXLDKext *kext, u_int *filetype_out, + struct symtab_command **symtab_hdr_out) +{ + kern_return_t rval = KERN_FAILURE; + KXLDSeg *seg = NULL; + KXLDSect *sect = NULL; + struct load_command *cmd_hdr = NULL; + struct symtab_command *symtab_hdr = NULL; + struct uuid_command *uuid_hdr = NULL; + u_long base_offset = 0; + u_long offset = 0; + u_long sect_offset = 0; + u_int filetype = 0; + u_int i = 0; + u_int j = 0; + u_int segi = 0; + u_int secti = 0; + u_int nsegs = 0; + u_int nsects = 0; + u_int ncmds = 0; + + KXLD_3264_FUNC(kxld_kext_is_32_bit(kext), base_offset, + get_macho_cmd_data_32, get_macho_cmd_data_64, + kext->file, offset, &filetype, &ncmds); + + /* First pass to count segments and sections */ + + offset = base_offset; + for (i = 0; i < ncmds; ++i, offset += cmd_hdr->cmdsize) { + cmd_hdr = (struct load_command *) (kext->file + offset); + + switch(cmd_hdr->cmd) { +#if KXLD_USER_OR_ILP32 + case LC_SEGMENT: + { + struct segment_command *seg_hdr = + (struct segment_command *) cmd_hdr; + + /* Ignore segments with no vm size */ + if (!seg_hdr->vmsize) continue; + + ++nsegs; + nsects += seg_hdr->nsects; + } + break; +#endif /* KXLD_USER_OR_ILP32 */ +#if KXLD_USER_OR_LP64 + case LC_SEGMENT_64: + { + struct segment_command_64 *seg_hdr = + (struct segment_command_64 *) cmd_hdr; + + /* Ignore segments with no vm size */ + if (!seg_hdr->vmsize) continue; + + ++nsegs; + nsects += seg_hdr->nsects; + } + break; +#endif /* KXLD_USER_OR_LP64 */ + default: + continue; + } + } + + /* Allocate the segments and sections */ + + if (nsegs) { + rval = kxld_array_init(&kext->segs, sizeof(KXLDSeg), nsegs); + require_noerr(rval, finish); + + rval = kxld_array_init(&kext->sects, sizeof(KXLDSect), nsects); + require_noerr(rval, finish); + } + + /* Initialize the segments and sections */ + + offset = base_offset; + for (i = 0; i < ncmds; ++i, offset += cmd_hdr->cmdsize) { + cmd_hdr = (struct load_command *) (kext->file + offset); + seg = NULL; + + switch(cmd_hdr->cmd) { +#if KXLD_USER_OR_ILP32 + case LC_SEGMENT: + { + struct segment_command *seg_hdr = + (struct segment_command *) cmd_hdr; + + /* Ignore segments with no vm size */ + if (!seg_hdr->vmsize) continue; + + seg = kxld_array_get_item(&kext->segs, segi++); + + rval = kxld_seg_init_from_macho_32(seg, seg_hdr); + require_noerr(rval, finish); + + sect_offset = offset + sizeof(*seg_hdr); + } + break; +#endif /* KXLD_USER_OR_ILP32 */ +#if KXLD_USER_OR_LP64 + case LC_SEGMENT_64: + { + struct segment_command_64 *seg_hdr = + (struct segment_command_64 *) cmd_hdr; + + /* Ignore segments with no vm size */ + if (!seg_hdr->vmsize) continue; + + seg = kxld_array_get_item(&kext->segs, segi++); + + rval = kxld_seg_init_from_macho_64(seg, seg_hdr); + require_noerr(rval, finish); + + sect_offset = offset + sizeof(*seg_hdr); + } + break; +#endif /* KXLD_USER_OR_LP64 */ + case LC_SYMTAB: + symtab_hdr = (struct symtab_command *) cmd_hdr; + break; + case LC_UUID: + uuid_hdr = (struct uuid_command *) cmd_hdr; + kxld_uuid_init_from_macho(&kext->uuid, uuid_hdr); + break; + case LC_DYSYMTAB: + kext->dysymtab_hdr = (struct dysymtab_command *) cmd_hdr; + + rval = kxld_reloc_create_macho(&kext->extrelocs, &kext->relocator, + (struct relocation_info *) (kext->file + kext->dysymtab_hdr->extreloff), + kext->dysymtab_hdr->nextrel); + require_noerr(rval, finish); + + rval = kxld_reloc_create_macho(&kext->locrelocs, &kext->relocator, + (struct relocation_info *) (kext->file + kext->dysymtab_hdr->locreloff), + kext->dysymtab_hdr->nlocrel); + require_noerr(rval, finish); + + break; + case LC_UNIXTHREAD: + /* Don't need to do anything with UNIXTHREAD for the kernel */ + require_action(kext->link_type == KXLD_LINK_KERNEL, finish, + rval=KERN_FAILURE; + kxld_log(kKxldLogLinking, kKxldLogErr, kKxldLogMalformedMachO + "LC_UNIXTHREAD segment is not valid in a kext.")); + break; + default: + rval=KERN_FAILURE; + kxld_log(kKxldLogLinking, kKxldLogErr, kKxldLogMalformedMachO + "Invalid segment type in MH_KEXT_BUNDLE kext: %u.", cmd_hdr->cmd); + goto finish; + } + + if (seg) { + + /* Initialize the sections */ + for (j = 0; j < seg->sects.nitems; ++j, ++secti) { + sect = kxld_array_get_item(&kext->sects, secti); + KXLD_3264_FUNC(kxld_kext_is_32_bit(kext), rval, + kxld_sect_init_from_macho_32, kxld_sect_init_from_macho_64, + sect, kext->file, §_offset, secti, &kext->relocator); + require_noerr(rval, finish); + + /* Add the section to the segment. This will also make sure + * that the sections and segments have the same segname. + */ + rval = kxld_seg_add_section(seg, sect); + require_noerr(rval, finish); + } + rval = kxld_seg_finish_init(seg); + require_noerr(rval, finish); + } + } + + if (filetype_out) *filetype_out = filetype; + if (symtab_hdr_out) *symtab_hdr_out = symtab_hdr; + kext->is_final_image = TRUE; + rval = KERN_SUCCESS; +finish: + return rval; +} + +/******************************************************************************* +*******************************************************************************/ +static kern_return_t +init_from_execute(KXLDKext *kext) +{ + kern_return_t rval = KERN_FAILURE; + struct symtab_command *symtab_hdr = NULL; + kxld_addr_t linkedit_offset = 0; + u_int filetype = 0; +#if KERNEL + KXLDSeg *textseg = NULL; + KXLDSeg *linkeditseg = NULL; +#endif /*KERNEL */ +#if KXLD_USER_OR_OBJECT + KXLDSeg *seg = NULL; + KXLDSect *sect = NULL; + KXLDSectionName *sname = NULL; + u_int i = 0, j = 0, k = 0; +#endif /* KXLD_USER_OR_OBJECT */ + + check(kext); + + require_action(kext->link_type == KXLD_LINK_KERNEL, finish, + rval=KERN_FAILURE); + + rval = init_from_final_linked_image(kext, &filetype, &symtab_hdr); + require_noerr(rval, finish); + + require_action(filetype == MH_EXECUTE, finish, rval=KERN_FAILURE; + kxld_log(kKxldLogLinking, kKxldLogErr, kKxldLogMalformedMachO + "The kernel file is not of type MH_EXECUTE.")); + +#if KERNEL + /* When we're in the kernel, the symbol table can no longer be found by the + * symtab_command alone because the command specifies offsets for the file + * on disk, not the file mapped into memory. We can find the additional + * offset necessary by finding the difference between the linkedit segment's + * vm address and the text segment's vm address. + */ + + textseg = kxld_kext_get_seg_by_name(kext, SEG_TEXT); + require_action(textseg, finish, rval=KERN_FAILURE; + kxld_log(kKxldLogLinking, kKxldLogErr, kKxldLogMalformedMachO)); + + linkeditseg = kxld_kext_get_seg_by_name(kext, SEG_LINKEDIT); + require_action(linkeditseg, finish, rval=KERN_FAILURE; + kxld_log(kKxldLogLinking, kKxldLogErr, kKxldLogMalformedMachO)); + + linkedit_offset = linkeditseg->base_addr - textseg->base_addr - + linkeditseg->fileoff; +#endif /* KERNEL */ + + /* Initialize the symbol table */ + + KXLD_3264_FUNC(kxld_kext_is_32_bit(kext), rval, + kxld_symtab_init_from_macho_32, kxld_symtab_init_from_macho_64, + kext->symtab, kext->file, symtab_hdr, linkedit_offset); + require_noerr(rval, finish); + +#if KXLD_USER_OR_OBJECT + /* Save off the order of section names so that we can lay out kext + * sections for MH_OBJECT-based systems. + */ + if (target_supports_object(kext)) { + + rval = kxld_array_init(kext->section_order, sizeof(KXLDSectionName), + kext->sects.nitems); + require_noerr(rval, finish); + + /* Copy the section names into the section_order array for future kext + * section ordering. + */ + for (i = 0, k = 0; i < kext->segs.nitems; ++i) { + seg = kxld_array_get_item(&kext->segs, i); + + for (j = 0; j < seg->sects.nitems; ++j, ++k) { + sect = *(KXLDSect **) kxld_array_get_item(&seg->sects, j); + sname = kxld_array_get_item(kext->section_order, k); + + strlcpy(sname->segname, sect->segname, sizeof(sname->segname)); + strlcpy(sname->sectname, sect->sectname, sizeof(sname->sectname)); + } + } + } +#endif /* KXLD_USER_OR_OBJECT */ + + rval = KERN_SUCCESS; +finish: + return rval; +} + +#if KXLD_USER_OR_BUNDLE +/******************************************************************************* +*******************************************************************************/ +static boolean_t +target_supports_bundle(const KXLDKext *kext) +{ + return (kext->cputype == CPU_TYPE_X86_64); +} + +/******************************************************************************* +*******************************************************************************/ +static kern_return_t +init_from_bundle(KXLDKext *kext) +{ + kern_return_t rval = KERN_FAILURE; + KXLDSeg *seg = NULL; + struct symtab_command *symtab_hdr = NULL; + u_int filetype = 0; + u_int idx = 0; + + check(kext); + + require_action(target_supports_bundle(kext), finish, + rval=KERN_FAILURE; + kxld_log(kKxldLogLinking, kKxldLogErr, + kKxldLogFiletypeNotSupported, MH_KEXT_BUNDLE)); + + rval = init_from_final_linked_image(kext, &filetype, &symtab_hdr); + require_noerr(rval, finish); + + require_action(filetype == MH_KEXT_BUNDLE, finish, + rval=KERN_FAILURE); + + KXLD_3264_FUNC(kxld_kext_is_32_bit(kext), rval, + kxld_symtab_init_from_macho_32, kxld_symtab_init_from_macho_64, + kext->symtab, kext->file, symtab_hdr, /* linkedit offset */ 0); + require_noerr(rval, finish); + + if (kext->segs.nitems) { + /* Remove the __LINKEDIT segment, since we never keep the symbol + * table around in memory for kexts. + */ + seg = kxld_kext_get_seg_by_name(kext, SEG_LINKEDIT); + if (seg) { + rval = kxld_array_get_index(&kext->segs, seg, &idx); + require_noerr(rval, finish); + + kxld_seg_deinit(seg); + + rval = kxld_array_remove(&kext->segs, idx); + require_noerr(rval, finish); + } + + kext->link_type = KXLD_LINK_KEXT; + } else { + kext->link_type = KXLD_LINK_PSEUDO_KEXT; + } + + rval = KERN_SUCCESS; +finish: + return rval; +} +#endif /* KXLD_USER_OR_BUNDLE */ + +#if KXLD_USER_OR_ILP32 +/******************************************************************************* +*******************************************************************************/ +static u_long +get_macho_cmd_data_32(u_char *file, u_long offset, u_int *filetype, u_int *ncmds) +{ + struct mach_header *mach_hdr = (struct mach_header *) (file + offset); + + if (filetype) *filetype = mach_hdr->filetype; + if (ncmds) *ncmds = mach_hdr->ncmds; + + return sizeof(*mach_hdr); +} + +#endif /* KXLD_USER_OR_ILP32 */ + +#if KXLD_USER_OR_LP64 +/******************************************************************************* +*******************************************************************************/ +static u_long +get_macho_cmd_data_64(u_char *file, u_long offset, u_int *filetype, u_int *ncmds) +{ + struct mach_header_64 *mach_hdr = (struct mach_header_64 *) (file + offset); + + if (filetype) *filetype = mach_hdr->filetype; + if (ncmds) *ncmds = mach_hdr->ncmds; + + return sizeof(*mach_hdr); +} +#endif /* KXLD_USER_OR_LP64 */ + +/******************************************************************************* +*******************************************************************************/ +static kern_return_t +create_vtables(KXLDKext *kext) +{ + kern_return_t rval = KERN_FAILURE; + KXLDSymtabIterator iter; + KXLDSym *sym = NULL; + KXLDSym *vtable_sym = NULL; + KXLDSym *meta_vtable_sym = NULL; + KXLDSect *vtable_sect = NULL; + KXLDSect *meta_vtable_sect = NULL; + KXLDVTable *vtable = NULL; + KXLDVTable *meta_vtable = NULL; + char class_name[KXLD_MAX_NAME_LEN]; + char vtable_name[KXLD_MAX_NAME_LEN]; + char meta_vtable_name[KXLD_MAX_NAME_LEN]; + u_int i = 0; + u_int nvtables = 0; + + if (kext->link_type == KXLD_LINK_KERNEL) { + /* Create a vtable object for every vtable symbol */ + kxld_symtab_iterator_init(&iter, kext->symtab, kxld_sym_is_vtable, FALSE); + nvtables = kxld_symtab_iterator_get_num_remaining(&iter); + } else { + /* We walk over the super metaclass pointer symbols, because classes + * with them are the only ones that need patching. Then we double the + * number of vtables we're expecting, because every pointer will have a + * class vtable and a MetaClass vtable. + */ + kxld_symtab_iterator_init(&iter, kext->symtab, + kxld_sym_is_super_metaclass_pointer, FALSE); + nvtables = kxld_symtab_iterator_get_num_remaining(&iter) * 2; + } + + /* Allocate the array of vtable objects. + */ + rval = kxld_array_init(&kext->vtables, sizeof(KXLDVTable), nvtables); + require_noerr(rval, finish); + + /* Initialize from each vtable symbol */ + while ((sym = kxld_symtab_iterator_get_next(&iter))) { + + if (kext->link_type == KXLD_LINK_KERNEL) { + vtable_sym = sym; + } else { + /* Get the class name from the smc pointer */ + rval = kxld_sym_get_class_name_from_super_metaclass_pointer( + sym, class_name, sizeof(class_name)); + require_noerr(rval, finish); + + /* Get the vtable name from the class name */ + rval = kxld_sym_get_vtable_name_from_class_name(class_name, + vtable_name, sizeof(vtable_name)); + require_noerr(rval, finish); + + /* Get the vtable symbol */ + vtable_sym = kxld_symtab_get_symbol_by_name(kext->symtab, vtable_name); + require_action(vtable_sym, finish, rval=KERN_FAILURE; + kxld_log(kKxldLogPatching, kKxldLogErr, kKxldLogMissingVtable, + vtable_name, class_name)); + + /* Get the meta vtable name from the class name */ + rval = kxld_sym_get_meta_vtable_name_from_class_name(class_name, + meta_vtable_name, sizeof(meta_vtable_name)); + require_noerr(rval, finish); + + /* Get the meta vtable symbol */ + meta_vtable_sym = kxld_symtab_get_symbol_by_name(kext->symtab, + meta_vtable_name); + if (!meta_vtable_sym) { + /* If we don't support strict patching and we can't find the vtable, + * log a warning and reduce the expected number of vtables by 1. + */ + if (target_supports_strict_patching(kext)) { + kxld_log(kKxldLogPatching, kKxldLogErr, kKxldLogMissingVtable, + meta_vtable_name, class_name); + rval = KERN_FAILURE; + goto finish; + } else { + kxld_log(kKxldLogPatching, kKxldLogErr, + "Warning: " kKxldLogMissingVtable, + meta_vtable_name, class_name); + kxld_array_resize(&kext->vtables, --nvtables); + } + } + } + + /* Get the vtable's section */ + vtable_sect = kxld_array_get_item(&kext->sects, vtable_sym->sectnum); + require_action(vtable_sect, finish, rval=KERN_FAILURE); + + vtable = kxld_array_get_item(&kext->vtables, i++); + + if (kext->link_type == KXLD_LINK_KERNEL) { + /* Initialize the kernel vtable */ + rval = kxld_vtable_init_from_kernel_macho(vtable, vtable_sym, + vtable_sect, kext->symtab, &kext->relocator); + require_noerr(rval, finish); + } else { + /* Initialize the class vtable */ + if (kext->is_final_image) { + rval = kxld_vtable_init_from_final_macho(vtable, vtable_sym, + vtable_sect, kext->symtab, &kext->relocator, &kext->extrelocs); + require_noerr(rval, finish); + } else { + rval = kxld_vtable_init_from_object_macho(vtable, vtable_sym, + vtable_sect, kext->symtab, &kext->relocator); + require_noerr(rval, finish); + } + + /* meta_vtable_sym will be null when we don't support strict patching + * and can't find the metaclass vtable. + */ + if (meta_vtable_sym) { + /* Get the vtable's section */ + meta_vtable_sect = kxld_array_get_item(&kext->sects, + meta_vtable_sym->sectnum); + require_action(vtable_sect, finish, rval=KERN_FAILURE); + + meta_vtable = kxld_array_get_item(&kext->vtables, i++); + + /* Initialize the metaclass vtable */ + if (kext->is_final_image) { + rval = kxld_vtable_init_from_final_macho(meta_vtable, meta_vtable_sym, + meta_vtable_sect, kext->symtab, &kext->relocator, &kext->extrelocs); + require_noerr(rval, finish); + } else { + rval = kxld_vtable_init_from_object_macho(meta_vtable, meta_vtable_sym, + meta_vtable_sect, kext->symtab, &kext->relocator); + require_noerr(rval, finish); + } + } + } + } + require_action(i == kext->vtables.nitems, finish, + rval=KERN_FAILURE); + + /* Map vtable names to the vtable structures */ + rval = kxld_dict_init(&kext->vtable_index, kxld_dict_string_hash, + kxld_dict_string_cmp, kext->vtables.nitems); + require_noerr(rval, finish); + + for (i = 0; i < kext->vtables.nitems; ++i) { + vtable = kxld_array_get_item(&kext->vtables, i); + rval = kxld_dict_insert(&kext->vtable_index, vtable->name, vtable); + require_noerr(rval, finish); + } + + rval = KERN_SUCCESS; + +finish: + return rval; +} + +/******************************************************************************* +* Temporary workaround for PR-6668105 +* new, new[], delete, and delete[] may be overridden globally in a kext. +* We should do this with some sort of weak symbols, but we'll use a whitelist +* for now to minimize risk. +*******************************************************************************/ +static void +restrict_private_symbols(KXLDKext *kext) +{ + const char *private_symbols[] = { + KXLD_KMOD_INFO_SYMBOL, + KXLD_OPERATOR_NEW_SYMBOL, + KXLD_OPERATOR_NEW_ARRAY_SYMBOL, + KXLD_OPERATOR_DELETE_SYMBOL, + KXLD_OPERATOR_DELETE_ARRAY_SYMBOL + }; + KXLDSymtabIterator iter; + KXLDSym *sym = NULL; + const char *name = NULL; + u_int i = 0; + + kxld_symtab_iterator_init(&iter, kext->symtab, kxld_sym_is_exported, FALSE); + while ((sym = kxld_symtab_iterator_get_next(&iter))) { + for (i = 0; i < const_array_len(private_symbols); ++i) { + name = private_symbols[i]; + if (!streq(sym->name, name)) { + continue; + } + + kxld_sym_mark_private(sym); + } + } +} + +/******************************************************************************* +*******************************************************************************/ +void +kxld_kext_clear(KXLDKext *kext) +{ + KXLDSeg *seg = NULL; + KXLDSect *sect = NULL; + KXLDVTable *vtable = NULL; + u_int i; + + check(kext); + +#if !KERNEL + if (kext->link_type == KXLD_LINK_KERNEL) { + unswap_macho(kext->file, kext->host_order, kext->target_order); + } +#endif /* !KERNEL */ + + for (i = 0; i < kext->segs.nitems; ++i) { + seg = kxld_array_get_item(&kext->segs, i); + kxld_seg_clear(seg); + } + kxld_array_reset(&kext->segs); + + for (i = 0; i < kext->sects.nitems; ++i) { + sect = kxld_array_get_item(&kext->sects, i); + kxld_sect_clear(sect); + } + kxld_array_reset(&kext->sects); + + for (i = 0; i < kext->vtables.nitems; ++i) { + vtable = kxld_array_get_item(&kext->vtables, i); + kxld_vtable_clear(vtable); + } + kxld_array_reset(&kext->vtables); + + kxld_array_reset(&kext->extrelocs); + kxld_array_reset(&kext->locrelocs); + kxld_dict_clear(&kext->vtable_index); + kxld_relocator_clear(&kext->relocator); + kxld_uuid_clear(&kext->uuid); + + if (kext->symtab) kxld_symtab_clear(kext->symtab); + + kext->link_addr = 0; + kext->kmod_link_addr = 0; + kext->cputype = 0; + kext->cpusubtype = 0; + kext->link_type = KXLD_LINK_UNKNOWN; + kext->is_final_image = FALSE; + kext->got_is_created = FALSE; +} + + + +/******************************************************************************* +*******************************************************************************/ +void +kxld_kext_deinit(KXLDKext *kext) +{ + KXLDSeg *seg = NULL; + KXLDSect *sect = NULL; + KXLDVTable *vtable = NULL; + u_int i; + + check(kext); + +#if !KERNEL + if (kext->link_type == KXLD_LINK_KERNEL) { + unswap_macho(kext->file, kext->host_order, kext->target_order); + } +#endif /* !KERNEL */ + + for (i = 0; i < kext->segs.maxitems; ++i) { + seg = kxld_array_get_slot(&kext->segs, i); + kxld_seg_deinit(seg); + } + kxld_array_deinit(&kext->segs); + + for (i = 0; i < kext->sects.maxitems; ++i) { + sect = kxld_array_get_slot(&kext->sects, i); + kxld_sect_deinit(sect); + } + kxld_array_deinit(&kext->sects); + + for (i = 0; i < kext->vtables.maxitems; ++i) { + vtable = kxld_array_get_slot(&kext->vtables, i); + kxld_vtable_deinit(vtable); + } + kxld_array_deinit(&kext->vtables); + + kxld_array_deinit(&kext->extrelocs); + kxld_array_deinit(&kext->locrelocs); + kxld_dict_deinit(&kext->vtable_index); + + if (kext->symtab) { + kxld_symtab_deinit(kext->symtab); + kxld_free(kext->symtab, kxld_symtab_sizeof()); + } + + bzero(kext, sizeof(*kext)); +} + +/******************************************************************************* +*******************************************************************************/ +boolean_t +kxld_kext_is_true_kext(const KXLDKext *kext) +{ + return (kext->link_type == KXLD_LINK_KEXT); +} + +/******************************************************************************* +*******************************************************************************/ +void +kxld_kext_get_vmsize(const KXLDKext *kext, u_long *header_size, u_long *vmsize) +{ + check(kext); + check(header_size); + check(vmsize); + *header_size = 0; + *vmsize = 0; + + /* vmsize is the padded header page(s) + segment vmsizes */ + + *header_size = (kext->is_final_image) ? + 0 : round_page(get_macho_header_size(kext)); + *vmsize = *header_size + get_macho_data_size(kext); + +} + +/******************************************************************************* +*******************************************************************************/ +const struct kxld_symtab * +kxld_kext_get_symtab(const KXLDKext *kext) +{ + check(kext); + + return kext->symtab; +} + +/******************************************************************************* +*******************************************************************************/ +u_int +kxld_kext_get_num_symbols(const KXLDKext *kext) +{ + check(kext); + + return kxld_symtab_get_num_symbols(kext->symtab); +} + +/******************************************************************************* +*******************************************************************************/ +void +kxld_kext_get_vtables(KXLDKext *kext, const KXLDArray **vtables) +{ + check(kext); + check(vtables); + + *vtables = &kext->vtables; +} + +/******************************************************************************* +*******************************************************************************/ +u_int +kxld_kext_get_num_vtables(const KXLDKext *kext) +{ + check(kext); + + return kext->vtables.nitems; +} + +/******************************************************************************* +*******************************************************************************/ +KXLDSeg * +kxld_kext_get_seg_by_name(const KXLDKext *kext, const char *segname) +{ + KXLDSeg *seg = NULL; + u_int i = 0; + + for (i = 0; i < kext->segs.nitems; ++i) { + seg = kxld_array_get_item(&kext->segs, i); + + if (streq(segname, seg->segname)) break; + + seg = NULL; + } + + return seg; +} + +/******************************************************************************* +*******************************************************************************/ +KXLDSect * +kxld_kext_get_sect_by_name(const KXLDKext *kext, const char *segname, + const char *sectname) +{ + KXLDSect *sect = NULL; + u_int i = 0; + + for (i = 0; i < kext->sects.nitems; ++i) { + sect = kxld_array_get_item(&kext->sects, i); + + if (streq(segname, sect->segname) && streq(sectname, sect->sectname)) { + break; + } + + sect = NULL; + } + + return sect; +} + +/******************************************************************************* +*******************************************************************************/ +int +kxld_kext_get_sectnum_for_sect(const KXLDKext *kext, const KXLDSect *sect) +{ + kern_return_t rval = KERN_FAILURE; + u_int idx = -1; + + rval = kxld_array_get_index(&kext->sects, sect, &idx); + if (rval) idx = -1; + + return idx; +} + +/******************************************************************************* +*******************************************************************************/ +const KXLDArray * +kxld_kext_get_section_order(const KXLDKext *kext __unused) +{ +#if KXLD_USER_OR_OBJECT + if (kext->link_type == KXLD_LINK_KERNEL && target_supports_object(kext)) { + return kext->section_order; + } +#endif /* KXLD_USER_OR_OBJECT */ + + return NULL; +} + +/******************************************************************************* +*******************************************************************************/ +static u_long +get_macho_header_size(const KXLDKext *kext) +{ + KXLDSeg *seg = NULL; + u_long header_size = 0; + u_int i = 0; + + check(kext); + + /* Mach, segment, and UUID headers */ + + if (kxld_kext_is_32_bit(kext)) { + header_size += sizeof(struct mach_header); + } else { + header_size += sizeof(struct mach_header_64); + } + + for (i = 0; i < kext->segs.nitems; ++i) { + seg = kxld_array_get_item(&kext->segs, i); + header_size += kxld_seg_get_macho_header_size(seg, kxld_kext_is_32_bit(kext)); + } + + if (kext->uuid.has_uuid) { + header_size += kxld_uuid_get_macho_header_size(); + } + + return header_size; +} + +/******************************************************************************* +*******************************************************************************/ +static u_long +get_macho_data_size(const KXLDKext *kext) +{ + KXLDSeg *seg = NULL; + u_long data_size = 0; + u_int i = 0; + + check(kext); + + for (i = 0; i < kext->segs.nitems; ++i) { + seg = kxld_array_get_item(&kext->segs, i); + data_size += (u_long) kxld_seg_get_vmsize(seg); + } + + return data_size; +} + +/******************************************************************************* +*******************************************************************************/ +kern_return_t kxld_kext_export_linked_object(const KXLDKext *kext, + u_char *linked_object, kxld_addr_t *kmod_info_kern) +{ + kern_return_t rval = KERN_FAILURE; + KXLDSeg *seg = NULL; + u_long size = 0; + u_long header_size = 0; + u_long header_offset = 0; + u_long data_offset = 0; + u_int ncmds = 0; + u_int i = 0; + + check(kext); + check(linked_object); + check(kmod_info_kern); + *kmod_info_kern = 0; + + /* Calculate the size of the headers and data */ + + header_size = get_macho_header_size(kext); + data_offset = (kext->is_final_image) ? header_size : round_page(header_size); + size = data_offset + get_macho_data_size(kext); + + /* Copy data to the file */ + + ncmds = kext->segs.nitems + (kext->uuid.has_uuid == TRUE); + + rval = export_macho_header(kext, linked_object, ncmds, + &header_offset, header_size); + require_noerr(rval, finish); + + for (i = 0; i < kext->segs.nitems; ++i) { + seg = kxld_array_get_item(&kext->segs, i); + + rval = kxld_seg_export_macho_to_vm(seg, linked_object, &header_offset, + header_size, size, kext->link_addr, kxld_kext_is_32_bit(kext)); + require_noerr(rval, finish); + } + + if (kext->uuid.has_uuid) { + rval = kxld_uuid_export_macho(&kext->uuid, linked_object, + &header_offset, header_size); + require_noerr(rval, finish); + } + + *kmod_info_kern = kext->kmod_link_addr; + +#if !KERNEL + unswap_macho(linked_object, kext->host_order, kext->target_order); +#endif /* KERNEL */ + + rval = KERN_SUCCESS; + +finish: + return rval; +} + +#if !KERNEL +/******************************************************************************* +*******************************************************************************/ +kern_return_t +kxld_kext_export_symbol_file(const KXLDKext *kext, + u_char **_symbol_file, u_long *_filesize) +{ + kern_return_t rval = KERN_FAILURE; + KXLDSeg *seg = NULL; + u_char *file = NULL; + u_long size = 0; + u_long header_size = 0; + u_long header_offset = 0; + u_long data_offset = 0; + u_int ncmds = 0; + u_int i = 0; + + check(kext); + check(_symbol_file); + *_symbol_file = NULL; + + /* Calculate the size of the file */ + + if (kxld_kext_is_32_bit(kext)) { + header_size += sizeof(struct mach_header); + } else { + header_size += sizeof(struct mach_header_64); + } + + for (i = 0; i < kext->segs.nitems; ++i) { + seg = kxld_array_get_item(&kext->segs, i); + header_size += kxld_seg_get_macho_header_size(seg, kxld_kext_is_32_bit(kext)); + size += kxld_seg_get_macho_data_size(seg); + } + + header_size += kxld_symtab_get_macho_header_size(); + size += kxld_symtab_get_macho_data_size(kext->symtab, FALSE, + kxld_kext_is_32_bit(kext)); + + if (kext->uuid.has_uuid) { + header_size += kxld_uuid_get_macho_header_size(); + } + + data_offset = round_page(header_size); + size += data_offset; + + /* Allocate the symbol file */ + + file = kxld_page_alloc_untracked(size); + require_action(file, finish, rval=KERN_RESOURCE_SHORTAGE); + bzero(file, size); + + /* Copy data to the file */ + + ncmds = kext->segs.nitems + (kext->uuid.has_uuid == TRUE) + 1; /* +1 for symtab */ + rval = export_macho_header(kext, file, ncmds, &header_offset, header_size); + require_noerr(rval, finish); + + for (i = 0; i < kext->segs.nitems; ++i) { + seg = kxld_array_get_item(&kext->segs, i); + rval = kxld_seg_export_macho_to_file_buffer(seg, file, &header_offset, + header_size, &data_offset, size, kxld_kext_is_32_bit(kext)); + require_noerr(rval, finish); + } + + rval = kxld_symtab_export_macho(kext->symtab, file, &header_offset, + header_size, &data_offset, size, FALSE, kxld_kext_is_32_bit(kext)); + require_noerr(rval, finish); + + if (kext->uuid.has_uuid) { + rval = kxld_uuid_export_macho(&kext->uuid, file, &header_offset, + header_size); + require_noerr(rval, finish); + } + + header_offset = header_size; + + /* Commit */ + + unswap_macho(file, kext->host_order, kext->target_order); + + *_filesize = size; + *_symbol_file = file; + file = NULL; + rval = KERN_SUCCESS; + +finish: + + if (file) { + kxld_page_free_untracked(file, size); + file = NULL; + } + + check(!file); + check((!rval) ^ (!*_symbol_file)); + + return rval; +} +#endif + +/******************************************************************************* +*******************************************************************************/ +boolean_t +kxld_kext_target_needs_swap(const KXLDKext *kext __unused) +{ +#if KERNEL + return FALSE; +#else + return (kext->target_order != kext->host_order); +#endif /* KERNEL */ +} + +/******************************************************************************* +*******************************************************************************/ +static kern_return_t +export_macho_header(const KXLDKext *kext, u_char *buf, u_int ncmds, + u_long *header_offset, u_long header_size) +{ + kern_return_t rval = KERN_FAILURE; + + check(kext); + check(buf); + check(header_offset); + + KXLD_3264_FUNC(kxld_kext_is_32_bit(kext), rval, + export_macho_header_32, export_macho_header_64, + kext, buf, ncmds, header_offset, header_size); + require_noerr(rval, finish); + + rval = KERN_SUCCESS; + +finish: + return rval; +} + +#if KXLD_USER_OR_ILP32 +/******************************************************************************* +*******************************************************************************/ +static kern_return_t +export_macho_header_32(const KXLDKext *kext, u_char *buf, u_int ncmds, + u_long *header_offset, u_long header_size) +{ + kern_return_t rval = KERN_FAILURE; + struct mach_header *mach = NULL; + + check(kext); + check(buf); + check(header_offset); + + require_action(sizeof(*mach) <= header_size - *header_offset, finish, + rval=KERN_FAILURE); + mach = (struct mach_header *) (buf + *header_offset); + + mach->magic = MH_MAGIC; + mach->cputype = kext->cputype; + mach->filetype = kext->filetype; + mach->ncmds = ncmds; + mach->sizeofcmds = (uint32_t) (header_size - sizeof(*mach)); + mach->flags = MH_NOUNDEFS; + + *header_offset += sizeof(*mach); + + rval = KERN_SUCCESS; + +finish: + return rval; +} +#endif /* KXLD_USER_OR_ILP32 */ + +#if KXLD_USER_OR_LP64 +/******************************************************************************* +*******************************************************************************/ +static kern_return_t +export_macho_header_64(const KXLDKext *kext, u_char *buf, u_int ncmds, + u_long *header_offset, u_long header_size) +{ + kern_return_t rval = KERN_FAILURE; + struct mach_header_64 *mach = NULL; + + check(kext); + check(buf); + check(header_offset); + + require_action(sizeof(*mach) <= header_size - *header_offset, finish, + rval=KERN_FAILURE); + mach = (struct mach_header_64 *) (buf + *header_offset); + + mach->magic = MH_MAGIC_64; + mach->cputype = kext->cputype; + mach->cpusubtype = kext->cpusubtype; + mach->filetype = kext->filetype; + mach->ncmds = ncmds; + mach->sizeofcmds = (uint32_t) (header_size - sizeof(*mach)); + mach->flags = MH_NOUNDEFS; + + *header_offset += sizeof(*mach); + + rval = KERN_SUCCESS; + +finish: + return rval; +} +#endif /* KXLD_USER_OR_LP64 */ + +/******************************************************************************* +*******************************************************************************/ +kern_return_t +kxld_kext_resolve(KXLDKext *kext, struct kxld_dict *patched_vtables, + struct kxld_dict *defined_symbols) +{ + kern_return_t rval = KERN_FAILURE; + + require_action(kext->link_type == KXLD_LINK_PSEUDO_KEXT, finish, + rval=KERN_FAILURE); + + /* Resolve symbols */ + rval = resolve_symbols(kext, defined_symbols, NULL); + require_noerr(rval, finish); + + /* Validate symbols */ + rval = validate_symbols(kext); + require_noerr(rval, finish); + + /* Pseudokexts re-export their dependencies' vtables */ + rval = copy_vtables(kext, patched_vtables); + require_noerr(rval, finish); + + rval = KERN_SUCCESS; + +finish: + return rval; +} + +/******************************************************************************* +*******************************************************************************/ +kern_return_t +kxld_kext_relocate(KXLDKext *kext, kxld_addr_t link_address, + KXLDDict *patched_vtables, KXLDDict *defined_symbols, + KXLDDict *obsolete_symbols) +{ + kern_return_t rval = KERN_FAILURE; + KXLDSeg *seg = NULL; + u_int i = 0; + + check(kext); + check(patched_vtables); + check(defined_symbols); + + require_action(kext->link_type == KXLD_LINK_KEXT, finish, rval=KERN_FAILURE); + + kext->link_addr = link_address; + + /* Relocate segments (which relocates the sections) */ + for (i = 0; i < kext->segs.nitems; ++i) { + seg = kxld_array_get_item(&kext->segs, i); + kxld_seg_relocate(seg, link_address); + } + + /* Relocate symbols */ + rval = kxld_symtab_relocate(kext->symtab, &kext->sects); + require_noerr(rval, finish); + + /* Populate kmod info structure */ + rval = populate_kmod_info(kext); + require_noerr(rval, finish); + + /* Resolve symbols */ + rval = resolve_symbols(kext, defined_symbols, obsolete_symbols); + require_noerr(rval, finish); + + /* Patch vtables */ + rval = patch_vtables(kext, patched_vtables, defined_symbols); + require_noerr(rval, finish); + + /* Validate symbols */ + rval = validate_symbols(kext); + require_noerr(rval, finish); + + /* Process relocation entries and populate the global offset table. + * + * For final linked images: the relocation entries are contained in a couple + * of tables hanging off the end of the symbol table. The GOT has its own + * section created by the linker; we simply need to fill it. + * + * For object files: the relocation entries are bound to each section. + * The GOT, if it exists for the target architecture, is created by kxld, + * and we must populate it according to our internal structures. + */ + if (kext->is_final_image) { +#if KXLD_USER_OR_BUNDLE + rval = process_symbol_pointers(kext); + require_noerr(rval, finish); + + rval = process_relocs_from_tables(kext); + require_noerr(rval, finish); +#else + require_action(FALSE, finish, rval=KERN_FAILURE); +#endif /* KXLD_USER_OR_BUNDLE */ + } else { +#if KXLD_USER_OR_GOT + /* Populate GOT */ + rval = populate_got(kext); + require_noerr(rval, finish); +#endif /* KXLD_USER_OR_GOT */ +#if KXLD_USER_OR_OBJECT + rval = process_relocs_from_sections(kext); + require_noerr(rval, finish); +#else + require_action(FALSE, finish, rval=KERN_FAILURE); +#endif /* KXLD_USER_OR_OBJECT */ + } + + rval = KERN_SUCCESS; + +finish: + return rval; +} + +/******************************************************************************* +*******************************************************************************/ +static kern_return_t +resolve_symbols(KXLDKext *kext, KXLDDict *defined_symbols, + KXLDDict *obsolete_symbols) +{ + kern_return_t rval = KERN_FAILURE; + KXLDSymtabIterator iter; + KXLDSym *sym = NULL; + void *addrp = NULL; + kxld_addr_t addr = 0; + const char *name = NULL; + boolean_t tests_for_weak = FALSE; + boolean_t error = FALSE; + boolean_t warning = FALSE; + + check(kext); + check(defined_symbols); + + /* Check if the kext tests for weak symbols */ + sym = kxld_symtab_get_symbol_by_name(kext->symtab, KXLD_WEAK_TEST_SYMBOL); + tests_for_weak = (sym != NULL); + + /* Check for duplicate symbols */ + kxld_symtab_iterator_init(&iter, kext->symtab, kxld_sym_is_exported, FALSE); + while ((sym = kxld_symtab_iterator_get_next(&iter))) { + addrp = kxld_dict_find(defined_symbols, sym->name); + if (addrp) { + /* Convert to a kxld_addr_t */ + if (kxld_kext_is_32_bit(kext)) { + addr = (kxld_addr_t) (*(uint32_t*)addrp); + } else { + addr = (kxld_addr_t) (*(uint64_t*)addrp); + } + + /* Not a problem if the symbols have the same address */ + if (addr == sym->link_addr) { + continue; + } + + if (!error) { + error = TRUE; + kxld_log(kKxldLogLinking, kKxldLogErr, + "The following symbols were defined more than once:"); + } + + kxld_log(kKxldLogLinking, kKxldLogErr, + "\t%s: %p - %p", sym->name, + (void *) (uintptr_t) sym->link_addr, + (void *) (uintptr_t) addr); + } + } + require_noerr_action(error, finish, rval=KERN_FAILURE); + + /* Resolve undefined and indirect symbols */ + + /* Iterate over all unresolved symbols */ + kxld_symtab_iterator_init(&iter, kext->symtab, + kxld_sym_is_unresolved, FALSE); + while ((sym = kxld_symtab_iterator_get_next(&iter))) { + + /* Common symbols are not supported */ + if (kxld_sym_is_common(sym)) { + + if (!error) { + error = TRUE; + if (target_supports_common(kext)) { + kxld_log(kKxldLogLinking, kKxldLogErr, + "The following common symbols were not resolved:"); + } else { + kxld_log(kKxldLogLinking, kKxldLogErr, + "Common symbols are not supported in kernel extensions. " + "Use -fno-common to build your kext. " + "The following are common symbols:"); + } + } + kxld_log(kKxldLogLinking, kKxldLogErr, "\t%s", sym->name); + + } else { + + /* Find the address of the defined symbol */ + if (kxld_sym_is_undefined(sym)) { + name = sym->name; + } else { + name = sym->alias; + } + addrp = kxld_dict_find(defined_symbols, name); + + /* Resolve the symbol. If a definition cannot be found, then: + * 1) Psuedokexts log a warning and proceed + * 2) Actual kexts delay the error until validation in case vtable + * patching replaces the undefined symbol. + */ + + if (addrp) { + + /* Convert to a kxld_addr_t */ + if (kxld_kext_is_32_bit(kext)) { + addr = (kxld_addr_t) (*(uint32_t*)addrp); + } else { + addr = (kxld_addr_t) (*(uint64_t*)addrp); + } + + boolean_t is_exported = (kext->link_type == KXLD_LINK_PSEUDO_KEXT); + + rval = kxld_sym_resolve(sym, addr, is_exported); + require_noerr(rval, finish); + + if (obsolete_symbols && kxld_dict_find(obsolete_symbols, name)) { + kxld_log(kKxldLogLinking, kKxldLogWarn, + "This kext uses obsolete symbol %s.", name); + } + + } else if (kext->link_type == KXLD_LINK_PSEUDO_KEXT) { + /* Pseudokexts ignore undefined symbols, because any actual + * kexts that need those symbols will fail to link anyway, so + * there's no need to block well-behaved kexts. + */ + if (!warning) { + kxld_log(kKxldLogLinking, kKxldLogWarn, + "This symbol set has the following unresolved symbols:"); + warning = TRUE; + } + kxld_log(kKxldLogLinking, kKxldLogErr, "\t%s", sym->name); + kxld_sym_delete(sym); + + } else if (kxld_sym_is_weak(sym)) { + /* Make sure that the kext has referenced gOSKextUnresolved. + */ + require_action(tests_for_weak, finish, + rval=KERN_FAILURE; + kxld_log(kKxldLogLinking, kKxldLogErr, + "This kext has weak references but does not test for " + "them. Test for weak references with " + "OSKextIsSymbolResolved().")); + +#if KERNEL + /* Get the address of the default weak address. + */ + addr = (kxld_addr_t) &kext_weak_symbol_referenced; +#else + /* This is run during symbol generation only, so we only + * need a filler value here. + */ + addr = kext->link_addr; +#endif /* KERNEL */ + + rval = kxld_sym_resolve(sym, addr, /* exported */ FALSE); + require_noerr(rval, finish); + } + } + } + require_noerr_action(error, finish, rval=KERN_FAILURE); + + rval = KERN_SUCCESS; + +finish: + + return rval; +} + +/******************************************************************************* +*******************************************************************************/ +static boolean_t +target_supports_strict_patching(KXLDKext *kext) +{ + check(kext); + + return (kext->cputype != CPU_TYPE_I386 && + kext->cputype != CPU_TYPE_POWERPC); +} + +/******************************************************************************* +* We must patch vtables to ensure binary compatibility, and to perform that +* patching, we have to determine the vtables' inheritance relationships. The +* MetaClass system gives us a way to do that: +* 1) Iterate over all of the super MetaClass pointer symbols. Every class +* that inherits from OSObject will have a pointer in its MetaClass that +* points to the MetaClass's super MetaClass. +* 2) Derive the name of the class from the super MetaClass pointer. +* 3) Derive the name of the class's vtable from the name of the class +* 4) Follow the super MetaClass pointer to get the address of the super +* MetaClass's symbol +* 5) Look up the super MetaClass symbol by address +* 6) Derive the super class's name from the super MetaClass name +* 7) Derive the super class's vtable from the super class's name +* This procedure will allow us to find all of the OSObject-derived classes and +* their super classes, and thus patch all of the vtables. +* +* We also have to take care to patch up the MetaClass's vtables. The +* MetaClasses follow a parallel hierarchy to the classes, so once we have the +* class name and super class name, we can also derive the MetaClass name and +* the super MetaClass name, and thus find and patch their vtables as well. +*******************************************************************************/ + +#define kOSMetaClassVTableName "__ZTV11OSMetaClass" + +static kern_return_t +patch_vtables(KXLDKext *kext, KXLDDict *patched_vtables, + KXLDDict *defined_symbols) +{ + kern_return_t rval = KERN_FAILURE; + KXLDSymtabIterator iter; + KXLDSym *metaclass = NULL; + KXLDSym *super_metaclass_pointer = NULL; + KXLDSym *final_sym = NULL; + KXLDVTable *vtable = NULL; + KXLDVTable *super_vtable = NULL; + char class_name[KXLD_MAX_NAME_LEN]; + char super_class_name[KXLD_MAX_NAME_LEN]; + char vtable_name[KXLD_MAX_NAME_LEN]; + char super_vtable_name[KXLD_MAX_NAME_LEN]; + char final_sym_name[KXLD_MAX_NAME_LEN]; + size_t len = 0; + u_int nvtables = 0; + u_int npatched = 0; + u_int nprogress = 0; + boolean_t failure = FALSE; + + check(kext); + check(patched_vtables); + + /* Find each super meta class pointer symbol */ + + kxld_symtab_iterator_init(&iter, kext->symtab, + kxld_sym_is_super_metaclass_pointer, FALSE); + nvtables = kxld_symtab_iterator_get_num_remaining(&iter); + + while (npatched < nvtables) { + npatched = 0; + nprogress = 0; + kxld_symtab_iterator_reset(&iter); + while((super_metaclass_pointer = kxld_symtab_iterator_get_next(&iter))) + { + /* Get the class name from the smc pointer */ + rval = kxld_sym_get_class_name_from_super_metaclass_pointer( + super_metaclass_pointer, class_name, sizeof(class_name)); + require_noerr(rval, finish); + + /* Get the vtable name from the class name */ + rval = kxld_sym_get_vtable_name_from_class_name(class_name, + vtable_name, sizeof(vtable_name)); + require_noerr(rval, finish); + + /* Get the vtable and make sure it hasn't been patched */ + vtable = kxld_dict_find(&kext->vtable_index, vtable_name); + require_action(vtable, finish, rval=KERN_FAILURE; + kxld_log(kKxldLogPatching, kKxldLogErr, kKxldLogMissingVtable, + vtable_name, class_name)); + + if (!vtable->is_patched) { + + /* Find the SMCP's meta class symbol */ + rval = get_metaclass_symbol_from_super_meta_class_pointer_symbol( + kext, super_metaclass_pointer, &metaclass); + require_noerr(rval, finish); + + /* Get the super class name from the super metaclass */ + rval = kxld_sym_get_class_name_from_metaclass(metaclass, + super_class_name, sizeof(super_class_name)); + require_noerr(rval, finish); + + /* Get the super vtable name from the class name */ + rval = kxld_sym_get_vtable_name_from_class_name(super_class_name, + super_vtable_name, sizeof(super_vtable_name)); + require_noerr(rval, finish); + + if (failure) { + kxld_log(kKxldLogPatching, kKxldLogErr, + "\t%s (super vtable %s)", vtable_name, super_vtable_name); + continue; + } + + /* Get the super vtable if it's been patched */ + super_vtable = kxld_dict_find(patched_vtables, super_vtable_name); + if (!super_vtable) continue; + + /* Get the final symbol's name from the super vtable */ + rval = kxld_sym_get_final_sym_name_from_class_name(super_class_name, + final_sym_name, sizeof(final_sym_name)); + require_noerr(rval, finish); + + /* Verify that the final symbol does not exist. First check + * all the externally defined symbols, then check locally. + */ + final_sym = kxld_dict_find(defined_symbols, final_sym_name); + if (!final_sym) { + final_sym = kxld_symtab_get_symbol_by_name(kext->symtab, + final_sym_name); + } + require_action(!final_sym, finish, + rval=KERN_FAILURE; + kxld_log(kKxldLogPatching, kKxldLogErr, + "Class %s is a subclass of final class %s.", + class_name, super_class_name)); + + /* Patch the class's vtable */ + rval = kxld_vtable_patch(vtable, super_vtable, kext->symtab, + target_supports_strict_patching(kext)); + require_noerr(rval, finish); + + /* Add the class's vtable to the set of patched vtables */ + rval = kxld_dict_insert(patched_vtables, vtable->name, vtable); + require_noerr(rval, finish); + + /* Get the meta vtable name from the class name */ + rval = kxld_sym_get_meta_vtable_name_from_class_name(class_name, + vtable_name, sizeof(vtable_name)); + require_noerr(rval, finish); + + /* Get the meta vtable. Whether or not it should exist has already + * been tested in create_vtables(), so if it doesn't exist and we're + * still running, we can safely skip it. + */ + vtable = kxld_dict_find(&kext->vtable_index, vtable_name); + if (!vtable) { + ++nprogress; + ++npatched; + continue; + } + require_action(!vtable->is_patched, finish, rval=KERN_FAILURE); + + /* There is no way to look up a metaclass vtable at runtime, but + * we know that every class's metaclass inherits directly from + * OSMetaClass, so we just hardcode that vtable name here. + */ + len = strlcpy(super_vtable_name, kOSMetaClassVTableName, + sizeof(super_vtable_name)); + require_action(len == const_strlen(kOSMetaClassVTableName), + finish, rval=KERN_FAILURE); + + /* Get the super meta vtable */ + super_vtable = kxld_dict_find(patched_vtables, super_vtable_name); + require_action(super_vtable && super_vtable->is_patched, + finish, rval=KERN_FAILURE); + + /* Patch the meta class's vtable */ + rval = kxld_vtable_patch(vtable, super_vtable, + kext->symtab, target_supports_strict_patching(kext)); + require_noerr(rval, finish); + + /* Add the MetaClass's vtable to the set of patched vtables */ + rval = kxld_dict_insert(patched_vtables, vtable->name, vtable); + require_noerr(rval, finish); + + ++nprogress; + } + + ++npatched; + } + + require_action(!failure, finish, rval=KERN_FAILURE); + if (!nprogress) { + failure = TRUE; + kxld_log(kKxldLogPatching, kKxldLogErr, + "The following vtables were unpatchable because each one's " + "parent vtable either was not found or also was not patchable:"); + } + } + + rval = KERN_SUCCESS; +finish: + return rval; +} + +/******************************************************************************* +*******************************************************************************/ +static kern_return_t +validate_symbols(KXLDKext *kext) +{ + kern_return_t rval = KERN_FAILURE; + KXLDSymtabIterator iter; + KXLDSym *sym = NULL; + u_int error = FALSE; + + /* Check for any unresolved symbols */ + kxld_symtab_iterator_init(&iter, kext->symtab, kxld_sym_is_unresolved, FALSE); + while ((sym = kxld_symtab_iterator_get_next(&iter))) { + if (!error) { + error = TRUE; + kxld_log(kKxldLogLinking, kKxldLogErr, + "The following symbols are unresolved for this kext:"); + } + kxld_log(kKxldLogLinking, kKxldLogErr, "\t%s", sym->name); + } + require_noerr_action(error, finish, rval=KERN_FAILURE); + + rval = KERN_SUCCESS; + +finish: + return rval; +} + +#if KXLD_USER_OR_GOT || KXLD_USER_OR_COMMON +/******************************************************************************* +*******************************************************************************/ +static kern_return_t +add_section(KXLDKext *kext, KXLDSect **sect) +{ + kern_return_t rval = KERN_FAILURE; + u_int nsects = kext->sects.nitems; + + rval = kxld_array_resize(&kext->sects, nsects + 1); + require_noerr(rval, finish); + + *sect = kxld_array_get_item(&kext->sects, nsects); + + rval = KERN_SUCCESS; + +finish: + return rval; +} +#endif /* KXLD_USER_OR_GOT || KXLD_USER_OR_COMMON */ + +#if KXLD_USER_OR_GOT +/******************************************************************************* +*******************************************************************************/ +static boolean_t +target_has_got(const KXLDKext *kext) +{ + return FALSE: +} + +/******************************************************************************* +* Create and initialize the Global Offset Table +*******************************************************************************/ +static kern_return_t +create_got(KXLDKext *kext) +{ + kern_return_t rval = KERN_FAILURE; + KXLDSect *sect = NULL; + u_int ngots = 0; + u_int i = 0; + + if (!target_has_got(kext)) { + rval = KERN_SUCCESS; + goto finish; + } + + for (i = 0; i < kext->sects.nitems; ++i) { + sect = kxld_array_get_item(&kext->sects, i); + ngots += kxld_sect_get_ngots(sect, &kext->relocator, + kext->symtab); + } + + rval = add_section(kext, §); + require_noerr(rval, finish); + + rval = kxld_sect_init_got(sect, ngots); + require_noerr(rval, finish); + + kext->got_is_created = TRUE; + rval = KERN_SUCCESS; + +finish: + return rval; +} + +/******************************************************************************* +*******************************************************************************/ +static kern_return_t +populate_got(KXLDKext *kext) +{ + kern_return_t rval = KERN_FAILURE; + KXLDSect *sect = NULL; + u_int i = 0; + + if (!target_has_got(kext) || !kext->got_is_created) { + rval = KERN_SUCCESS; + goto finish; + } + + for (i = 0; i < kext->sects.nitems; ++i) { + sect = kxld_array_get_item(&kext->sects, i); + if (streq_safe(sect->segname, KXLD_SEG_GOT, sizeof(KXLD_SEG_GOT)) && + streq_safe(sect->sectname, KXLD_SECT_GOT, sizeof(KXLD_SECT_GOT))) + { + kxld_sect_populate_got(sect, kext->symtab, + kxld_kext_target_needs_swap(kext)); + break; + } + } + + require_action(i < kext->sects.nitems, finish, rval=KXLD_MISSING_GOT); + + rval = KERN_SUCCESS; + +finish: + return rval; +} +#endif /* KXLD_USER_OR_GOT */ + +/******************************************************************************* +*******************************************************************************/ +static boolean_t +target_supports_common(const KXLDKext *kext) +{ + check(kext); + return (kext->cputype == CPU_TYPE_I386 || + kext->cputype == CPU_TYPE_POWERPC); +} + +#if KXLD_USER_OR_COMMON +/******************************************************************************* +* If there are common symbols, calculate how much space they'll need +* and create/grow the __DATA __common section to accommodate them. +* Then, resolve them against that section. +*******************************************************************************/ +static kern_return_t +resolve_common_symbols(KXLDKext *kext) +{ + kern_return_t rval = KERN_FAILURE; + KXLDSymtabIterator iter; + KXLDSym *sym = NULL; + KXLDSect *sect = NULL; + kxld_addr_t base_addr = 0; + kxld_size_t size = 0; + kxld_size_t total_size = 0; + u_int align = 0; + u_int max_align = 0; + u_int sectnum = 0; + + if (!target_supports_common(kext)) { + rval = KERN_SUCCESS; + goto finish; + } + + /* Iterate over the common symbols to calculate their total aligned size */ + kxld_symtab_iterator_init(&iter, kext->symtab, kxld_sym_is_common, FALSE); + while ((sym = kxld_symtab_iterator_get_next(&iter))) { + align = kxld_sym_get_common_align(sym); + size = kxld_sym_get_common_size(sym); + + if (align > max_align) max_align = align; + + total_size = kxld_align_address(total_size, align) + size; + } + + /* If there are common symbols, grow or create the __DATA __common section + * to hold them. + */ + if (total_size) { + sect = kxld_kext_get_sect_by_name(kext, SEG_DATA, SECT_COMMON); + if (sect) { + base_addr = sect->base_addr + sect->size; + + kxld_sect_grow(sect, total_size, max_align); + } else { + base_addr = 0; + + rval = add_section(kext, §); + require_noerr(rval, finish); + + kxld_sect_init_zerofill(sect, SEG_DATA, SECT_COMMON, + total_size, max_align); + } + + /* Resolve the common symbols against the new section */ + rval = kxld_array_get_index(&kext->sects, sect, §num); + require_noerr(rval, finish); + + kxld_symtab_iterator_reset(&iter); + while ((sym = kxld_symtab_iterator_get_next(&iter))) { + align = kxld_sym_get_common_align(sym); + size = kxld_sym_get_common_size(sym); + + base_addr = kxld_align_address(base_addr, align); + kxld_sym_resolve_common(sym, sectnum, base_addr); + + base_addr += size; + } + } + + rval = KERN_SUCCESS; + +finish: + return rval; +} +#endif /* KXLD_USER_OR_COMMON */ + +/******************************************************************************* +*******************************************************************************/ +static kern_return_t +get_metaclass_symbol_from_super_meta_class_pointer_symbol(KXLDKext *kext, + KXLDSym *super_metaclass_pointer_sym, KXLDSym **metaclass) +{ + kern_return_t rval = KERN_FAILURE; + KXLDSect *sect = NULL; + KXLDReloc *reloc = NULL; + uint32_t offset = 0; + + check(kext); + check(super_metaclass_pointer_sym); + check(metaclass); + *metaclass = NULL; + + sect = kxld_array_get_item(&kext->sects, super_metaclass_pointer_sym->sectnum); + require_action(sect, finish, rval=KERN_FAILURE); + + /* Find the relocation entry for the super metaclass pointer and get the + * symbol associated with that relocation entry + */ + + if (kext->is_final_image) { + /* The relocation entry could be in either the external or local + * relocation entries. kxld_reloc_get_symbol() can handle either + * type. + */ + reloc = kxld_reloc_get_reloc_by_offset(&kext->extrelocs, + super_metaclass_pointer_sym->base_addr); + if (!reloc) { + reloc = kxld_reloc_get_reloc_by_offset(&kext->locrelocs, + super_metaclass_pointer_sym->base_addr); + } + require_action(reloc, finish, rval=KERN_FAILURE); + + *metaclass = kxld_reloc_get_symbol(&kext->relocator, reloc, kext->file, + kext->symtab); + } else { + offset = kxld_sym_get_section_offset(super_metaclass_pointer_sym, sect); + + reloc = kxld_reloc_get_reloc_by_offset(§->relocs, offset); + require_action(reloc, finish, rval=KERN_FAILURE); + + *metaclass = kxld_reloc_get_symbol(&kext->relocator, reloc, sect->data, + kext->symtab); + } + require_action(*metaclass, finish, rval=KERN_FAILURE); + + rval = KERN_SUCCESS; + +finish: + return rval; +} + +/******************************************************************************* +*******************************************************************************/ +static kern_return_t +copy_vtables(KXLDKext *kext, const KXLDDict *patched_vtables) +{ + kern_return_t rval = KERN_FAILURE; + KXLDSymtabIterator iter; + KXLDSym *sym = NULL; + KXLDVTable *vtable = NULL, *src = NULL; + u_int i = 0; + u_int nvtables = 0; + char class_name[KXLD_MAX_NAME_LEN]; + char meta_vtable_name[KXLD_MAX_NAME_LEN]; + + kxld_symtab_iterator_init(&iter, kext->symtab, + kxld_sym_is_class_vtable, FALSE); + + /* The iterator tracks all the class vtables, so we double the number of + * vtables we're expecting because we use the class vtables to find the + * MetaClass vtables. + */ + nvtables = kxld_symtab_iterator_get_num_remaining(&iter) * 2; + rval = kxld_array_init(&kext->vtables, sizeof(KXLDVTable), nvtables); + require_noerr(rval, finish); + + while ((sym = kxld_symtab_iterator_get_next(&iter))) { + src = kxld_dict_find(patched_vtables, sym->name); + require_action(src, finish, rval=KERN_FAILURE); + + vtable = kxld_array_get_item(&kext->vtables, i++); + rval = kxld_vtable_copy(vtable, src); + require_noerr(rval, finish); + + rval = kxld_sym_get_class_name_from_vtable(sym, + class_name, sizeof(class_name)); + require_noerr(rval, finish); + + rval = kxld_sym_get_meta_vtable_name_from_class_name(class_name, + meta_vtable_name, sizeof(meta_vtable_name)); + require_noerr(rval, finish); + + /* Some classes don't have a MetaClass, so when we run across one + * of those, we shrink the vtable array by 1. + */ + src = kxld_dict_find(patched_vtables, meta_vtable_name); + if (src) { + vtable = kxld_array_get_item(&kext->vtables, i++); + rval = kxld_vtable_copy(vtable, src); + require_noerr(rval, finish); + } else { + kxld_array_resize(&kext->vtables, kext->vtables.nitems - 1); + } + } + + rval = KERN_SUCCESS; + +finish: + return rval; +} + +#if KXLD_USER_OR_OBJECT +/******************************************************************************* +*******************************************************************************/ +static kern_return_t +process_relocs_from_sections(KXLDKext *kext) +{ + kern_return_t rval = KERN_FAILURE; + KXLDSect *sect = NULL; + u_int i = 0; + + for (i = 0; i < kext->sects.nitems; ++i) { + sect = kxld_array_get_item(&kext->sects, i); + rval = kxld_sect_process_relocs(sect, &kext->relocator, + &kext->sects, kext->symtab); + require_noerr_action(rval, finish, + kxld_log(kKxldLogLinking, kKxldLogErr, kKxldLogInvalidSectReloc, + i, sect->segname, sect->sectname)); + } + + rval = KERN_SUCCESS; + +finish: + return rval; +} +#endif /* KXLD_USER_OR_OBJECT */ + +#if KXLD_USER_OR_BUNDLE +/******************************************************************************* +*******************************************************************************/ +static kern_return_t +process_relocs_from_tables(KXLDKext *kext) +{ + kern_return_t rval = KERN_FAILURE; + KXLDReloc *reloc = NULL; + KXLDSeg *seg = NULL; + u_int i = 0; + + /* Offsets for relocations in relocation tables are based on the vm + * address of the first segment. + */ + seg = kxld_array_get_item(&kext->segs, 0); + + /* Process external relocations */ + for (i = 0; i < kext->extrelocs.nitems; ++i) { + reloc = kxld_array_get_item(&kext->extrelocs, i); + + rval = kxld_relocator_process_table_reloc(&kext->relocator, reloc, seg, + kext->file, &kext->sects, kext->symtab); + require_noerr_action(rval, finish, + kxld_log(kKxldLogLinking, kKxldLogErr, kKxldLogInvalidExtReloc, i)); + } + + /* Process local relocations */ + for (i = 0; i < kext->locrelocs.nitems; ++i) { + reloc = kxld_array_get_item(&kext->locrelocs, i); + + rval = kxld_relocator_process_table_reloc(&kext->relocator, reloc, seg, + kext->file, &kext->sects, kext->symtab); + require_noerr_action(rval, finish, + kxld_log(kKxldLogLinking, kKxldLogErr, kKxldLogInvalidIntReloc, i)); + } + + rval = KERN_SUCCESS; + +finish: + return rval; +} + +/******************************************************************************* +*******************************************************************************/ +static void +add_to_ptr(u_char *symptr, kxld_addr_t val, boolean_t is_32_bit) +{ + if (is_32_bit) { + uint32_t *ptr = (uint32_t *) symptr; + *ptr += (uint32_t) val; + } else { + uint64_t *ptr = (uint64_t *) symptr; + *ptr += (uint64_t) val; + } +} + +#define SECT_SYM_PTRS "__nl_symbol_ptr" + +/******************************************************************************* +* Final linked images create an __nl_symbol_ptr section for the global offset +* table and for symbol pointer lookups in general. Rather than use relocation +* entries, the linker creates an "indirect symbol table" which stores indexes +* into the symbol table corresponding to the entries of this section. This +* function populates the section with the relocated addresses of those symbols. +*******************************************************************************/ +static kern_return_t +process_symbol_pointers(KXLDKext *kext) +{ + kern_return_t rval = KERN_FAILURE; + KXLDSect *sect = NULL; + KXLDSym *sym = NULL; + int32_t *symidx = NULL; + u_char *symptr = NULL; + u_long symptrsize = 0; + u_int nsyms = 0; + u_int firstsym = 0; + u_int i = 0; + + check(kext); + + require_action(kext->is_final_image && kext->dysymtab_hdr, + finish, rval=KERN_FAILURE); + + /* Get the __DATA,__nl_symbol_ptr section. If it doesn't exist, we have + * nothing to do. + */ + + sect = kxld_kext_get_sect_by_name(kext, SEG_DATA, SECT_SYM_PTRS); + if (!sect) { + rval = KERN_SUCCESS; + goto finish; + } + + require_action(sect->flags & S_NON_LAZY_SYMBOL_POINTERS, + finish, rval=KERN_FAILURE; + kxld_log(kKxldLogLinking, kKxldLogErr, kKxldLogMalformedMachO + "Section %s,%s does not have S_NON_LAZY_SYMBOL_POINTERS flag.", + SEG_DATA, SECT_SYM_PTRS)); + + /* Calculate the table offset and number of entries in the section */ + + if (kxld_kext_is_32_bit(kext)) { + symptrsize = sizeof(uint32_t); + } else { + symptrsize = sizeof(uint64_t); + } + + nsyms = (u_int) (sect->size / symptrsize); + firstsym = sect->reserved1; + + require_action(firstsym + nsyms <= kext->dysymtab_hdr->nindirectsyms, + finish, rval=KERN_FAILURE; + kxld_log(kKxldLogLinking, kKxldLogErr, kKxldLogMalformedMachO)); + + /* Iterate through the indirect symbol table and fill in the section of + * symbol pointers. There are three cases: + * 1) A normal symbol - put its value directly in the table + * 2) An INDIRECT_SYMBOL_LOCAL - symbols that are local and already have + * their offset from the start of the file in the section. Simply + * add the file's link address to fill this entry. + * 3) An INDIRECT_SYMBOL_ABS - prepopulated absolute symbols. No + * action is required. + */ + + symidx = (int32_t *) (kext->file + kext->dysymtab_hdr->indirectsymoff); + symidx += firstsym; + symptr = sect->data; + for (i = 0; i < nsyms; ++i, ++symidx, symptr+=symptrsize) { + if (*symidx & INDIRECT_SYMBOL_LOCAL) { + if (*symidx & INDIRECT_SYMBOL_ABS) continue; + + add_to_ptr(symptr, kext->link_addr, kxld_kext_is_32_bit(kext)); + } else { + sym = kxld_symtab_get_symbol_by_index(kext->symtab, *symidx); + require_action(sym, finish, rval=KERN_FAILURE); + + add_to_ptr(symptr, sym->link_addr, kxld_kext_is_32_bit(kext)); + } + } + + rval = KERN_SUCCESS; +finish: + return rval; +} +#endif /* KXLD_USER_OR_BUNDLE */ + +/******************************************************************************* +*******************************************************************************/ +static kern_return_t +populate_kmod_info(KXLDKext *kext) +{ + kern_return_t rval = KERN_FAILURE; + KXLDSect *kmodsect = NULL; + KXLDSym *kmodsym = NULL; + u_long kmod_offset = 0; + u_long header_size; + u_long size; + + if (kext->link_type != KXLD_LINK_KEXT) { + rval = KERN_SUCCESS; + goto finish; + } + + kxld_kext_get_vmsize(kext, &header_size, &size); + + kmodsym = kxld_symtab_get_symbol_by_name(kext->symtab, KXLD_KMOD_INFO_SYMBOL); + require_action(kmodsym, finish, rval=KERN_FAILURE; + kxld_log(kKxldLogLinking, kKxldLogErr, kKxldLogNoKmodInfo)); + + kmodsect = kxld_array_get_item(&kext->sects, kmodsym->sectnum); + kmod_offset = (u_long) (kmodsym->base_addr - kmodsect->base_addr); + + kext->kmod_info = (kmod_info_t *) (kmodsect->data + kmod_offset); + kext->kmod_link_addr = kmodsym->link_addr; + + if (kxld_kext_is_32_bit(kext)) { + kmod_info_32_v1_t *kmod = (kmod_info_32_v1_t *) (kext->kmod_info); + kmod->address = (uint32_t) kext->link_addr; + kmod->size = (uint32_t) size; + kmod->hdr_size = (uint32_t) header_size; + +#if !KERNEL + if (kxld_kext_target_needs_swap(kext)) { + kmod->address = OSSwapInt32(kmod->address); + kmod->size = OSSwapInt32(kmod->size); + kmod->hdr_size = OSSwapInt32(kmod->hdr_size); + } +#endif /* !KERNEL */ + } else { + kmod_info_64_v1_t *kmod = (kmod_info_64_v1_t *) (kext->kmod_info); + kmod->address = kext->link_addr; + kmod->size = size; + kmod->hdr_size = header_size; + +#if !KERNEL + if (kxld_kext_target_needs_swap(kext)) { + kmod->address = OSSwapInt64(kmod->address); + kmod->size = OSSwapInt64(kmod->size); + kmod->hdr_size = OSSwapInt64(kmod->hdr_size); + } +#endif /* !KERNEL */ + } + + + rval = KERN_SUCCESS; + +finish: + return rval; +} + diff --git a/libkern/kxld/kxld_kext.h b/libkern/kxld/kxld_kext.h new file mode 100644 index 000000000..20eeaf501 --- /dev/null +++ b/libkern/kxld/kxld_kext.h @@ -0,0 +1,142 @@ +/* + * Copyright (c) 2008 Apple Inc. All rights reserved. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. The rights granted to you under the License + * may not be used to create, or enable the creation or redistribution of, + * unlawful or unlicensed copies of an Apple operating system, or to + * circumvent, violate, or enable the circumvention or violation of, any + * terms of an Apple operating system software license agreement. + * + * Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ + */ +#ifndef _KXLD_KEXT_H_ +#define _KXLD_KEXT_H_ + +#include +#include +#if KERNEL + #include +#else + #include "kxld_types.h" +#endif + +struct kxld_array; +struct kxld_kext; +struct kxld_dict; +struct kxld_sect; +struct kxld_seg; +struct kxld_symtab; +struct kxld_vtable; +typedef struct kxld_kext KXLDKext; + +/******************************************************************************* +* Constructors and Destructors +*******************************************************************************/ + +size_t kxld_kext_sizeof(void) + __attribute__((const, nonnull, visibility("hidden"))); + +kern_return_t kxld_kext_init(KXLDKext *kext, u_char *file, u_long size, + const char *name, KXLDFlags flags, boolean_t is_kernel, KXLDArray *seg_order, + cpu_type_t cputype, cpu_subtype_t cpusubtype) + __attribute__((nonnull(1,2,4), visibility("hidden"))); + +void kxld_kext_clear(KXLDKext *kext) + __attribute__((nonnull, visibility("hidden"))); + +void kxld_kext_deinit(KXLDKext *kext) + __attribute__((nonnull, visibility("hidden"))); + +/******************************************************************************* +* Accessors +*******************************************************************************/ + +boolean_t kxld_kext_is_true_kext(const KXLDKext *kext) + __attribute__((pure, nonnull, visibility("hidden"))); + +boolean_t kxld_kext_is_32_bit(const KXLDKext *kext) + __attribute__((pure, nonnull, visibility("hidden"))); + +void kxld_kext_get_cputype(const KXLDKext *kext, cpu_type_t *cputype, + cpu_subtype_t *cpusubtype) + __attribute__((nonnull, visibility("hidden"))); + +kern_return_t kxld_kext_validate_cputype(const KXLDKext *kext, cpu_type_t cputype, + cpu_subtype_t cpusubtype) + __attribute__((pure, nonnull, visibility("hidden"))); + +void kxld_kext_get_vmsize(const KXLDKext *kext, u_long *header_size, + u_long *vmsize) + __attribute__((nonnull, visibility("hidden"))); + +const struct kxld_symtab * kxld_kext_get_symtab(const KXLDKext *kext) + __attribute__((pure, nonnull, visibility("hidden"))); + +u_int kxld_kext_get_num_symbols(const KXLDKext *kext) + __attribute__((pure, nonnull, visibility("hidden"))); + +void kxld_kext_get_vtables(KXLDKext *kext, const struct kxld_array **vtables) + __attribute__((nonnull, visibility("hidden"))); + +u_int kxld_kext_get_num_vtables(const KXLDKext *kext) + __attribute__((pure, nonnull, visibility("hidden"))); + +struct kxld_seg * kxld_kext_get_seg_by_name(const KXLDKext *kext, + const char *segname) + __attribute__((pure, nonnull, visibility("hidden"))); + +struct kxld_sect * kxld_kext_get_sect_by_name(const KXLDKext *kext, + const char *segname, const char *sectname) + __attribute__((pure, nonnull, visibility("hidden"))); + +int kxld_kext_get_sectnum_for_sect(const KXLDKext *kext, + const struct kxld_sect *sect) + __attribute__((pure, nonnull, visibility("hidden"))); + +const struct kxld_array * kxld_kext_get_section_order(const KXLDKext *kext) + __attribute__((pure, nonnull, visibility("hidden"))); + +/* This will be the same size as kxld_kext_get_vmsize */ +kern_return_t kxld_kext_export_linked_object(const KXLDKext *kext, + u_char *linked_object, kxld_addr_t *kmod_info_kern) + __attribute__((nonnull, visibility("hidden"))); + +#if !KERNEL +kern_return_t kxld_kext_export_symbol_file(const KXLDKext *kext, + u_char **symbol_file, u_long *filesize) + __attribute__((nonnull, visibility("hidden"))); +#endif + +boolean_t kxld_kext_target_needs_swap(const KXLDKext *kext) + __attribute__((pure, nonnull, visibility("hidden"))); + +/******************************************************************************* +* Modifiers +*******************************************************************************/ + +kern_return_t kxld_kext_resolve(KXLDKext *kext, struct kxld_dict *patched_vtables, + struct kxld_dict *defined_symbols) + __attribute__((nonnull, visibility("hidden"))); + +kern_return_t kxld_kext_relocate(KXLDKext *kext, kxld_addr_t link_address, + struct kxld_dict *patched_vtables, struct kxld_dict *defined_symbols, + struct kxld_dict *obsolete_symbols) + __attribute__((nonnull(1,3,4), visibility("hidden"))); + +#endif /* _KXLD_KEXT_H_ */ diff --git a/libkern/kxld/kxld_reloc.c b/libkern/kxld/kxld_reloc.c new file mode 100644 index 000000000..c781d6dc2 --- /dev/null +++ b/libkern/kxld/kxld_reloc.c @@ -0,0 +1,1428 @@ +/* + * Copyright (c) 2007-2008 Apple Inc. All rights reserved. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. The rights granted to you under the License + * may not be used to create, or enable the creation or redistribution of, + * unlawful or unlicensed copies of an Apple operating system, or to + * circumvent, violate, or enable the circumvention or violation of, any + * terms of an Apple operating system software license agreement. + * + * Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ + */ +#include +#include +#include +#include + +#if KERNEL + #include +#else + #include + #include +#endif + +#define DEBUG_ASSERT_COMPONENT_NAME_STRING "kxld" +#include + +#include "kxld_array.h" +#include "kxld_reloc.h" +#include "kxld_sect.h" +#include "kxld_seg.h" +#include "kxld_sym.h" +#include "kxld_symtab.h" +#include "kxld_util.h" + +/* include target-specific relocation prototypes */ +#include +#if KXLD_USER_OR_PPC +#include +#endif +#if KXLD_USER_OR_X86_64 +#include +#endif +#if KXLD_USER_OR_ARM +#include +#endif + +#define KXLD_TARGET_NONE (u_int) 0x0 +#define KXLD_TARGET_VALUE (u_int) 0x1 +#define KXLD_TARGET_SECTNUM (u_int) 0x2 +#define KXLD_TARGET_SYMBOLNUM (u_int) 0x3 +#define KXLD_TARGET_LOOKUP (u_int) 0x4 +#define KXLD_TARGET_GOT (u_int) 0x5 + +#define ABSOLUTE_VALUE(x) (((x) < 0) ? -(x) : (x)) + +#define LO16(x) (0x0000FFFF & x) +#define LO16S(x) ((0x0000FFFF & x) << 16) +#define HI16(x) (0xFFFF0000 & x) +#define HI16S(x) ((0xFFFF0000 & x) >> 16) +#define BIT15(x) (0x00008000 & x) +#define BR14I(x) (0xFFFF0003 & x) +#define BR14D(x) (0x0000FFFC & x) +#define BR24I(x) (0xFC000003 & x) +#define BR24D(x) (0x03FFFFFC & x) +#define HADISP 0x00010000 +#define BR14_LIMIT 0x00008000 +#define BR24_LIMIT 0x02000000 +#define IS_COND_BR_INSTR(x) ((x & 0xFC000000) == 0x40000000) +#define IS_NOT_ALWAYS_TAKEN(x) ((x & 0x03E00000) != 0x02800000) +#define FLIP_PREDICT_BIT(x) x ^= 0x00200000 + +#define SIGN_EXTEND_MASK(n) (1 << ((n) - 1)) +#define SIGN_EXTEND(x,n) (((x) ^ SIGN_EXTEND_MASK(n)) - SIGN_EXTEND_MASK(n)) +#define BR14_NBITS_DISPLACEMENT 16 +#define BR24_NBITS_DISPLACEMENT 26 + +#define X86_64_RIP_RELATIVE_LIMIT 0x80000000UL + +/******************************************************************************* +* Prototypes +*******************************************************************************/ +#if KXLD_USER_OR_I386 +static boolean_t generic_reloc_has_pair(u_int _type) + __attribute__((const)); +static boolean_t generic_reloc_is_pair(u_int _type, u_int _prev_type) + __attribute__((const)); +static boolean_t generic_reloc_has_got(u_int _type) + __attribute__((const)); +static kern_return_t generic_process_reloc(u_char *instruction, u_int length, + u_int pcrel, kxld_addr_t base_pc, kxld_addr_t link_pc, kxld_addr_t link_disp, + u_int type, kxld_addr_t target, kxld_addr_t pair_target, boolean_t swap); +#endif /* KXLD_USER_OR_I386 */ + +#if KXLD_USER_OR_PPC +static boolean_t ppc_reloc_has_pair(u_int _type) + __attribute__((const)); +static boolean_t ppc_reloc_is_pair(u_int _type, u_int _prev_type) + __attribute__((const)); +static boolean_t ppc_reloc_has_got(u_int _type) + __attribute__((const)); +static kern_return_t ppc_process_reloc(u_char *instruction, u_int length, + u_int pcrel, kxld_addr_t base_pc, kxld_addr_t link_pc, kxld_addr_t link_disp, + u_int type, kxld_addr_t target, kxld_addr_t pair_target, boolean_t swap); +#endif /* KXLD_USER_OR_PPC */ + +#if KXLD_USER_OR_X86_64 +static boolean_t x86_64_reloc_has_pair(u_int _type) + __attribute__((const)); +static boolean_t x86_64_reloc_is_pair(u_int _type, u_int _prev_type) + __attribute__((const)); +static boolean_t x86_64_reloc_has_got(u_int _type) + __attribute__((const)); +static kern_return_t x86_64_process_reloc(u_char *instruction, u_int length, + u_int pcrel, kxld_addr_t base_pc, kxld_addr_t link_pc, kxld_addr_t link_disp, + u_int type, kxld_addr_t target, kxld_addr_t pair_target, boolean_t swap); +static kern_return_t calculate_displacement_x86_64(uint64_t target, + uint64_t adjustment, int32_t *instr32); +#endif /* KXLD_USER_OR_X86_64 */ + +#if KXLD_USER_OR_ARM +static boolean_t arm_reloc_has_pair(u_int _type) + __attribute__((const)); +static boolean_t arm_reloc_is_pair(u_int _type, u_int _prev_type) + __attribute__((const)); +static boolean_t arm_reloc_has_got(u_int _type) + __attribute__((const)); +static kern_return_t arm_process_reloc(u_char *instruction, u_int length, + u_int pcrel, kxld_addr_t base_pc, kxld_addr_t link_pc, kxld_addr_t link_disp, + u_int type, kxld_addr_t target, kxld_addr_t pair_target, boolean_t swap); +#endif /* KXLD_USER_OR_ARM */ + +#if KXLD_USER_OR_ILP32 +static kxld_addr_t get_pointer_at_addr_32(u_char *data, u_long offset, + const KXLDRelocator *relocator __unused) + __attribute__((pure, nonnull)); +#endif /* KXLD_USER_OR_ILP32 */ +#if KXLD_USER_OR_LP64 +static kxld_addr_t get_pointer_at_addr_64(u_char *data, u_long offset, + const KXLDRelocator *relocator __unused) + __attribute__((pure, nonnull)); +#endif /* KXLD_USER_OR_LP64 */ + +static u_int count_relocatable_relocs(const KXLDRelocator *relocator, + const struct relocation_info *relocs, u_int nrelocs) + __attribute__((pure)); + +static kern_return_t calculate_targets(kxld_addr_t *_target, + kxld_addr_t *_pair_target, const KXLDReloc *reloc, + const KXLDArray *sectarray, const KXLDSymtab *symtab); +static kern_return_t get_target_by_address_lookup(kxld_addr_t *target, + kxld_addr_t addr, const KXLDArray *sectarray); + +/******************************************************************************* +*******************************************************************************/ +kern_return_t +kxld_relocator_init(KXLDRelocator *relocator, cpu_type_t cputype, + cpu_subtype_t cpusubtype __unused, boolean_t swap) +{ + kern_return_t rval = KERN_FAILURE; + + check(relocator); + + switch(cputype) { +#if KXLD_USER_OR_I386 + case CPU_TYPE_I386: + relocator->reloc_has_pair = generic_reloc_has_pair; + relocator->reloc_is_pair = generic_reloc_is_pair; + relocator->reloc_has_got = generic_reloc_has_got; + relocator->process_reloc = generic_process_reloc; + relocator->is_32_bit = TRUE; + break; +#endif /* KXLD_USER_OR_I386 */ +#if KXLD_USER_OR_PPC + case CPU_TYPE_POWERPC: + relocator->reloc_has_pair = ppc_reloc_has_pair; + relocator->reloc_is_pair = ppc_reloc_is_pair; + relocator->reloc_has_got = ppc_reloc_has_got; + relocator->process_reloc = ppc_process_reloc; + relocator->is_32_bit = TRUE; + break; +#endif /* KXLD_USER_OR_PPC */ +#if KXLD_USER_OR_X86_64 + case CPU_TYPE_X86_64: + relocator->reloc_has_pair = x86_64_reloc_has_pair; + relocator->reloc_is_pair = x86_64_reloc_is_pair; + relocator->reloc_has_got = x86_64_reloc_has_got; + relocator->process_reloc = x86_64_process_reloc; + relocator->is_32_bit = FALSE; + break; +#endif /* KXLD_USER_OR_X86_64 */ +#if KXLD_USER_OR_ARM + case CPU_TYPE_ARM: + relocator->reloc_has_pair = arm_reloc_has_pair; + relocator->reloc_is_pair = arm_reloc_is_pair; + relocator->reloc_has_got = arm_reloc_has_got; + relocator->process_reloc = arm_process_reloc; + relocator->is_32_bit = TRUE; + break; +#endif /* KXLD_USER_OR_ARM */ + default: + rval = KERN_FAILURE; + kxld_log(kKxldLogLinking, kKxldLogErr, + kKxldLogArchNotSupported, cputype); + goto finish; + } + + relocator->is_32_bit = kxld_is_32_bit(cputype); + relocator->swap = swap; + + rval = KERN_SUCCESS; + +finish: + return rval; +} + +/******************************************************************************* +*******************************************************************************/ +kern_return_t +kxld_reloc_create_macho(KXLDArray *relocarray, const KXLDRelocator *relocator, + const struct relocation_info *srcs, u_int nsrcs) +{ + kern_return_t rval = KERN_FAILURE; + KXLDReloc *reloc = NULL; + u_int nrelocs = 0; + const struct relocation_info *src = NULL, *prev_src = NULL; + const struct scattered_relocation_info *scatsrc = NULL, *prev_scatsrc = NULL; + u_int i = 0; + u_int reloc_index = 0; + + check(relocarray); + check(srcs); + + /* If there are no relocation entries, just return */ + if (!nsrcs) { + rval = KERN_SUCCESS; + goto finish; + } + + /* Count the number of non-pair relocs */ + nrelocs = count_relocatable_relocs(relocator, srcs, nsrcs); + + if (nrelocs) { + + /* Allocate the array of relocation entries */ + + rval = kxld_array_init(relocarray, sizeof(KXLDReloc), nrelocs); + require_noerr(rval, finish); + + /* Initialize the relocation entries */ + + for (i = 0; i < nsrcs; ++i) { + src = srcs + i; + scatsrc = (const struct scattered_relocation_info *) src; + + /* A section-based relocation entry can be skipped for absolute + * symbols. + */ + + if (!(src->r_address & R_SCATTERED) && !(src->r_extern) && + (R_ABS == src->r_symbolnum)) + { + continue; + } + + /* Pull out the data from the relocation entries. The target_type + * depends on the r_extern bit: + * Scattered -> Section Lookup by Address + * Local (not extern) -> Section by Index + * Extern -> Symbolnum by Index + */ + reloc = kxld_array_get_item(relocarray, reloc_index++); + if (src->r_address & R_SCATTERED) { + reloc->address = scatsrc->r_address; + reloc->pcrel = scatsrc->r_pcrel; + reloc->length = scatsrc->r_length; + reloc->reloc_type = scatsrc->r_type; + reloc->target = scatsrc->r_value; + reloc->target_type = KXLD_TARGET_LOOKUP; + } else { + reloc->address = src->r_address; + reloc->pcrel = src->r_pcrel; + reloc->length = src->r_length; + reloc->reloc_type = src->r_type; + reloc->target = src->r_symbolnum; + + if (0 == src->r_extern) { + reloc->target_type = KXLD_TARGET_SECTNUM; + reloc->target -= 1; + } else { + reloc->target_type = KXLD_TARGET_SYMBOLNUM; + } + } + + /* Find the pair entry if it exists */ + + if (relocator->reloc_has_pair(reloc->reloc_type)) { + ++i; + require_action(i < nsrcs, finish, rval=KERN_FAILURE); + + prev_src = src; + src = srcs + i; + prev_scatsrc = (const struct scattered_relocation_info *) prev_src; + scatsrc = (const struct scattered_relocation_info *) src; + + if (src->r_address & R_SCATTERED) { + require_action(relocator->reloc_is_pair( + scatsrc->r_type, reloc->reloc_type), + finish, rval=KERN_FAILURE); + reloc->pair_target = scatsrc->r_value; + reloc->pair_target_type = KXLD_TARGET_LOOKUP; + } else { + require_action(relocator->reloc_is_pair(src->r_type, + reloc->reloc_type), finish, rval=KERN_FAILURE); + + if (src->r_extern) { + reloc->pair_target = src->r_symbolnum; + reloc->pair_target_type = KXLD_TARGET_SYMBOLNUM; + } else { + reloc->pair_target = src->r_address; + reloc->pair_target_type = KXLD_TARGET_VALUE; + } + } + } else { + reloc->pair_target = 0; + if (relocator->reloc_has_got(reloc->reloc_type)) { + reloc->pair_target_type = KXLD_TARGET_GOT; + } else { + reloc->pair_target_type = KXLD_TARGET_NONE; + } + } + } + } + + rval = KERN_SUCCESS; + +finish: + return rval; +} + + +/******************************************************************************* +* Relocatable relocs : +* 1) Are not _PAIR_ relocs +* 2) Don't reference N_ABS symbols +*******************************************************************************/ +static u_int +count_relocatable_relocs(const KXLDRelocator *relocator, + const struct relocation_info *relocs, u_int nrelocs) +{ + u_int num_nonpair_relocs = 0; + u_int i = 0; + u_int prev_type = 0; + const struct relocation_info *reloc = NULL; + const struct scattered_relocation_info *sreloc = NULL; + + check(relocator); + check(relocs); + + /* Loop over all of the relocation entries */ + + num_nonpair_relocs = 1; + prev_type = relocs->r_type; + for (i = 1; i < nrelocs; ++i) { + reloc = relocs + i; + + if (reloc->r_address & R_SCATTERED) { + /* A scattered relocation entry is relocatable as long as it's not a + * pair. + */ + sreloc = (const struct scattered_relocation_info *) reloc; + + num_nonpair_relocs += + (!relocator->reloc_is_pair(sreloc->r_type, prev_type)); + + prev_type = sreloc->r_type; + } else { + /* A normal relocation entry is relocatable if it is not a pair and + * if it is not a section-based relocation for an absolute symbol. + */ + num_nonpair_relocs += + !(relocator->reloc_is_pair(reloc->r_type, prev_type) + || (0 == reloc->r_extern && R_ABS == reloc->r_symbolnum)); + + prev_type = reloc->r_type; + } + + } + + return num_nonpair_relocs; +} + +/******************************************************************************* +*******************************************************************************/ +void +kxld_relocator_clear(KXLDRelocator *relocator) +{ + bzero(relocator, sizeof(*relocator)); +} + +/******************************************************************************* +*******************************************************************************/ +boolean_t +kxld_relocator_has_pair(const KXLDRelocator *relocator, u_int r_type) +{ + check(relocator); + + return relocator->reloc_has_pair(r_type); +} + +/******************************************************************************* +*******************************************************************************/ +boolean_t +kxld_relocator_is_pair(const KXLDRelocator *relocator, u_int r_type, + u_int prev_r_type) +{ + check(relocator); + + return relocator->reloc_is_pair(r_type, prev_r_type); +} + +/******************************************************************************* +*******************************************************************************/ +boolean_t +kxld_relocator_has_got(const KXLDRelocator *relocator, u_int r_type) +{ + check(relocator); + + return relocator->reloc_has_got(r_type); +} + +/******************************************************************************* +*******************************************************************************/ +KXLDSym * +kxld_reloc_get_symbol(const KXLDRelocator *relocator, const KXLDReloc *reloc, + u_char *data, const KXLDSymtab *symtab) +{ + KXLDSym *sym = NULL; + kxld_addr_t value = 0; + + check(reloc); + check(symtab); + + switch (reloc->target_type) { + case KXLD_TARGET_SYMBOLNUM: + sym = kxld_symtab_get_symbol_by_index(symtab, reloc->target); + break; + case KXLD_TARGET_SECTNUM: + if (data) { + KXLD_3264_FUNC(relocator->is_32_bit, value, + get_pointer_at_addr_32, get_pointer_at_addr_64, + data, reloc->address, relocator); + sym = kxld_symtab_get_cxx_symbol_by_value(symtab, value); + } + break; + default: + sym = NULL; + break; + } + + return sym; +} + +/******************************************************************************* +*******************************************************************************/ +kern_return_t +kxld_reloc_get_reloc_index_by_offset(const KXLDArray *relocs, + kxld_size_t offset, u_int *idx) +{ + kern_return_t rval = KERN_FAILURE; + KXLDReloc *reloc = NULL; + u_int i = 0; + + for (i = 0; i < relocs->nitems; ++i) { + reloc = kxld_array_get_item(relocs, i); + if (reloc->address == offset) break; + } + + if (i >= relocs->nitems) { + rval = KERN_FAILURE; + goto finish; + } + + *idx = i; + rval = KERN_SUCCESS; + +finish: + return rval; +} + +/******************************************************************************* +*******************************************************************************/ +KXLDReloc * +kxld_reloc_get_reloc_by_offset(const KXLDArray *relocs, kxld_addr_t offset) +{ + kern_return_t rval = KERN_FAILURE; + KXLDReloc *reloc = NULL; + u_int i = 0; + + rval = kxld_reloc_get_reloc_index_by_offset(relocs, offset, &i); + if (rval) goto finish; + + reloc = kxld_array_get_item(relocs, i); + +finish: + return reloc; +} + +#if KXLD_USER_OR_ILP32 +/******************************************************************************* +*******************************************************************************/ +static kxld_addr_t +get_pointer_at_addr_32(u_char *data, u_long offset, + const KXLDRelocator *relocator __unused) +{ + uint32_t addr = 0; + + check(relocator); + check(data); + + addr = *(uint32_t *) (data + offset); +#if !KERNEL + if (relocator->swap) { + addr = OSSwapInt32(addr); + } +#endif + + return (kxld_addr_t) addr; +} +#endif /* KXLD_USER_OR_ILP32 */ + +#if KXLD_USER_OR_LP64 +/******************************************************************************* +*******************************************************************************/ +static kxld_addr_t +get_pointer_at_addr_64(u_char *data, u_long offset, + const KXLDRelocator *relocator __unused) +{ + uint64_t addr = 0; + + check(relocator); + check(data); + + addr = *(uint64_t *) (data + offset); +#if !KERNEL + if (relocator->swap) { + addr = OSSwapInt64(addr); + } +#endif + + return (kxld_addr_t) addr; +} +#endif /* KXLD_USER_OR_LP64 */ + +/******************************************************************************* +*******************************************************************************/ +kern_return_t +kxld_relocator_process_sect_reloc(const KXLDRelocator *relocator, + const KXLDReloc *reloc, const struct kxld_sect *sect, + const KXLDArray *sectarray, const struct kxld_symtab *symtab) +{ + kern_return_t rval = KERN_FAILURE; + u_char *instruction = NULL; + kxld_addr_t target = 0; + kxld_addr_t pair_target = 0; + kxld_addr_t base_pc = 0; + kxld_addr_t link_pc = 0; + kxld_addr_t link_disp = 0; + + check(relocator); + check(reloc); + check(sect); + check(sectarray); + check(symtab); + + /* Find the instruction */ + + instruction = sect->data + reloc->address; + + /* Calculate the target */ + + rval = calculate_targets(&target, &pair_target, reloc, sectarray, symtab); + require_noerr(rval, finish); + + base_pc = reloc->address; + link_pc = base_pc + sect->link_addr; + link_disp = sect->link_addr - sect->base_addr; + + /* Relocate */ + + rval = relocator->process_reloc(instruction, reloc->length, reloc->pcrel, + base_pc, link_pc, link_disp, reloc->reloc_type, target, pair_target, + relocator->swap); + require_noerr(rval, finish); + + /* Return */ + + rval = KERN_SUCCESS; + +finish: + return rval; +} + +/******************************************************************************* +*******************************************************************************/ +kern_return_t +kxld_reloc_update_symindex(KXLDReloc *reloc, u_int symindex) +{ + kern_return_t rval = KERN_FAILURE; + + require_action(reloc->target_type == KXLD_TARGET_SYMBOLNUM, + finish, rval = KERN_FAILURE); + + reloc->target = symindex; + + rval = KERN_SUCCESS; + +finish: + return rval; +} + +/******************************************************************************* +*******************************************************************************/ +kern_return_t +kxld_relocator_process_table_reloc(const KXLDRelocator *relocator, + const KXLDReloc *reloc, const KXLDSeg *seg, u_char *file, + const struct kxld_array *sectarray, const struct kxld_symtab *symtab) +{ + kern_return_t rval = KERN_FAILURE; + u_char *instruction = NULL; + kxld_addr_t target = 0; + kxld_addr_t pair_target = 0; + kxld_addr_t base_pc = 0; + kxld_addr_t link_pc = 0; + kxld_addr_t link_disp = 0; + + check(relocator); + check(reloc); + check(file); + check(sectarray); + check(symtab); + + /* Find the instruction */ + + instruction = file + seg->fileoff + reloc->address; + + /* Calculate the target */ + + rval = calculate_targets(&target, &pair_target, reloc, sectarray, symtab); + require_noerr(rval, finish); + + base_pc = reloc->address; + link_pc = base_pc + seg->link_addr; + link_disp = seg->link_addr - seg->base_addr; + + /* Relocate */ + + rval = relocator->process_reloc(instruction, reloc->length, reloc->pcrel, + base_pc, link_pc, link_disp, reloc->reloc_type, target, pair_target, + relocator->swap); + require_noerr(rval, finish); + + /* Return */ + + rval = KERN_SUCCESS; + +finish: + return rval; +} + +/******************************************************************************* +*******************************************************************************/ +static kern_return_t +calculate_targets(kxld_addr_t *_target, kxld_addr_t *_pair_target, + const KXLDReloc *reloc, const KXLDArray *sectarray, const KXLDSymtab *symtab) +{ + kern_return_t rval = KERN_FAILURE; + const KXLDSect *sect = NULL; + const KXLDSym *sym = NULL; + kxld_addr_t target = 0; + kxld_addr_t pair_target = 0; + + check(_target); + check(_pair_target); + check(sectarray); + check(symtab); + *_target = 0; + *_pair_target = 0; + + /* Find the target based on the lookup type */ + + switch(reloc->target_type) { + case KXLD_TARGET_LOOKUP: + require_action(reloc->pair_target_type == KXLD_TARGET_NONE || + reloc->pair_target_type == KXLD_TARGET_LOOKUP || + reloc->pair_target_type == KXLD_TARGET_VALUE, + finish, rval=KERN_FAILURE); + + rval = get_target_by_address_lookup(&target, reloc->target, sectarray); + require_noerr(rval, finish); + + if (reloc->pair_target_type == KXLD_TARGET_LOOKUP) { + rval = get_target_by_address_lookup(&pair_target, + reloc->pair_target, sectarray); + require_noerr(rval, finish); + } else if (reloc->pair_target_type == KXLD_TARGET_VALUE) { + pair_target = reloc->pair_target; + } + break; + case KXLD_TARGET_SECTNUM: + require_action(reloc->pair_target_type == KXLD_TARGET_NONE || + reloc->pair_target_type == KXLD_TARGET_VALUE, + finish, rval=KERN_FAILURE); + + /* Get the target's section by section number */ + sect = kxld_array_get_item(sectarray, reloc->target); + require_action(sect, finish, rval=KERN_FAILURE); + + /* target is the change in the section's address */ + target = sect->link_addr - sect->base_addr; + + if (reloc->pair_target_type) { + pair_target = reloc->pair_target; + } else { + /* x86_64 needs to know when we have a non-external relocation, + * so we hack that information in here. + */ + pair_target = TRUE; + } + break; + case KXLD_TARGET_SYMBOLNUM: + require_action(reloc->pair_target_type == KXLD_TARGET_NONE || + reloc->pair_target_type == KXLD_TARGET_GOT || + reloc->pair_target_type == KXLD_TARGET_SYMBOLNUM || + reloc->pair_target_type == KXLD_TARGET_VALUE, finish, + rval=KERN_FAILURE); + + /* Get the target's symbol by symbol number */ + sym = kxld_symtab_get_symbol_by_index(symtab, reloc->target); + require_action(sym, finish, rval=KERN_FAILURE); + target = sym->link_addr; + + /* Some relocation types need the GOT entry address instead of the + * symbol's actual address. These types don't have pair relocation + * entries, so we store the GOT entry address as the pair target. + */ + if (reloc->pair_target_type == KXLD_TARGET_VALUE) { + pair_target = reloc->pair_target; + } else if (reloc->pair_target_type == KXLD_TARGET_SYMBOLNUM ) { + sym = kxld_symtab_get_symbol_by_index(symtab, reloc->pair_target); + require_action(sym, finish, rval=KERN_FAILURE); + pair_target = sym->link_addr; + } else if (reloc->pair_target_type == KXLD_TARGET_GOT) { + pair_target = sym->got_addr; + } + break; + default: + rval = KERN_FAILURE; + goto finish; + } + + *_target = target; + *_pair_target = pair_target; + rval = KERN_SUCCESS; + +finish: + return rval; +} + +/******************************************************************************* +*******************************************************************************/ +static kern_return_t +get_target_by_address_lookup(kxld_addr_t *target, kxld_addr_t addr, + const KXLDArray *sectarray) +{ + kern_return_t rval = KERN_FAILURE; + const KXLDSect *sect = NULL; + kxld_addr_t start = 0; + kxld_addr_t end = 0; + u_int i = 0; + + check(target); + check(sectarray); + *target = 0; + + for (i = 0; i < sectarray->nitems; ++i) { + sect = kxld_array_get_item(sectarray, i); + start = sect->base_addr; + end = start + sect->size; + + if (start <= addr && addr < end) break; + } + require_action(i < sectarray->nitems, finish, + rval=KERN_FAILURE); + + *target = sect->link_addr - sect->base_addr; + rval = KERN_SUCCESS; + +finish: + return rval; +} + +#if KXLD_USER_OR_I386 +/******************************************************************************* +*******************************************************************************/ +static boolean_t +generic_reloc_has_pair(u_int _type) +{ + enum reloc_type_generic type = _type; + + return (type == GENERIC_RELOC_SECTDIFF || + type == GENERIC_RELOC_LOCAL_SECTDIFF); +} + +/******************************************************************************* +*******************************************************************************/ +static boolean_t +generic_reloc_is_pair(u_int _type, u_int _prev_type __unused) +{ + enum reloc_type_generic type = _type; + + return (type == GENERIC_RELOC_PAIR); +} + +/******************************************************************************* +*******************************************************************************/ +static boolean_t generic_reloc_has_got(u_int _type __unused) +{ + return FALSE; +} + +/******************************************************************************* +*******************************************************************************/ +static kern_return_t +generic_process_reloc(u_char *instruction, u_int length, u_int pcrel, + kxld_addr_t _base_pc, kxld_addr_t _link_pc, kxld_addr_t _link_disp __unused, + u_int _type, kxld_addr_t _target, kxld_addr_t _pair_target, + boolean_t swap __unused) +{ + kern_return_t rval = KERN_FAILURE; + uint32_t base_pc = (uint32_t) _base_pc; + uint32_t link_pc = (uint32_t) _link_pc; + uint32_t *instr_addr = NULL; + uint32_t instr_data = 0; + uint32_t target = (uint32_t) _target; + uint32_t pair_target = (uint32_t) _pair_target; + enum reloc_type_generic type = _type; + + check(instruction); + require_action(length == 2, finish, rval=KERN_FAILURE); + + if (pcrel) target = target + base_pc - link_pc; + + instr_addr = (uint32_t *)instruction; + instr_data = *instr_addr; + +#if !KERNEL + if (swap) instr_data = OSSwapInt32(instr_data); +#endif + + switch (type) { + case GENERIC_RELOC_VANILLA: + instr_data += target; + break; + case GENERIC_RELOC_SECTDIFF: + case GENERIC_RELOC_LOCAL_SECTDIFF: + instr_data = instr_data + target - pair_target; + break; + case GENERIC_RELOC_PB_LA_PTR: + rval = KERN_FAILURE; + goto finish; + case GENERIC_RELOC_PAIR: + default: + rval = KERN_FAILURE; + goto finish; + } + +#if !KERNEL + if (swap) instr_data = OSSwapInt32(instr_data); +#endif + + *instr_addr = instr_data; + + rval = KERN_SUCCESS; + +finish: + return rval; +} +#endif /* KXLD_USER_OR_I386 */ + +#if KXLD_USER_OR_PPC +/******************************************************************************* +*******************************************************************************/ +static boolean_t +ppc_reloc_has_pair(u_int _type) +{ + enum reloc_type_ppc type = _type; + + switch(type) { + case PPC_RELOC_HI16: + case PPC_RELOC_LO16: + case PPC_RELOC_HA16: + case PPC_RELOC_LO14: + case PPC_RELOC_JBSR: + case PPC_RELOC_SECTDIFF: + return TRUE; + default: + return FALSE; + } +} + +/******************************************************************************* +*******************************************************************************/ +static boolean_t +ppc_reloc_is_pair(u_int _type, u_int _prev_type __unused) +{ + enum reloc_type_ppc type = _type; + + return (type == PPC_RELOC_PAIR); +} + +/******************************************************************************* +*******************************************************************************/ +static boolean_t ppc_reloc_has_got(u_int _type __unused) +{ + return FALSE; +} + +/******************************************************************************* +*******************************************************************************/ +static kern_return_t +ppc_process_reloc(u_char *instruction, u_int length, u_int pcrel, + kxld_addr_t _base_pc, kxld_addr_t _link_pc, kxld_addr_t _link_disp __unused, + u_int _type, kxld_addr_t _target, kxld_addr_t _pair_target __unused, + boolean_t swap __unused) +{ + kern_return_t rval = KERN_FAILURE; + uint32_t *instr_addr = NULL; + uint32_t instr_data = 0; + uint32_t base_pc = (uint32_t) _base_pc; + uint32_t link_pc = (uint32_t) _link_pc; + uint32_t target = (uint32_t) _target; + uint32_t pair_target = (uint32_t) _pair_target; + int32_t addend = 0; + int32_t displacement = 0; + uint32_t difference = 0; + uint32_t br14_disp_sign = 0; + enum reloc_type_ppc type = _type; + + check(instruction); + require_action(length == 2 || length == 3, finish, + rval=KERN_FAILURE); + + if (pcrel) displacement = target + base_pc - link_pc; + + instr_addr = (uint32_t *)instruction; + instr_data = *instr_addr; + +#if !KERNEL + if (swap) instr_data = OSSwapInt32(instr_data); +#endif + + switch (type) { + case PPC_RELOC_VANILLA: + require_action(!pcrel, finish, rval=KERN_FAILURE); + + instr_data += target; + break; + case PPC_RELOC_BR14: + require_action(pcrel, finish, rval=KERN_FAILURE); + + addend = BR14D(instr_data); + displacement += SIGN_EXTEND(addend, BR14_NBITS_DISPLACEMENT); + difference = ABSOLUTE_VALUE(displacement); + require_action(difference < BR14_LIMIT, finish, + rval=KERN_FAILURE; + kxld_log(kKxldLogLinking, kKxldLogErr, kKxldLogRelocationOverflow)); + + + br14_disp_sign = BIT15(instr_data); + instr_data = BR14I(instr_data) | BR14D(displacement); + + /* If this is a predicted conditional branch (signified by an + * instruction length of 3) that is not branch-always, and the sign of + * the displacement is different after relocation, then flip the y-bit + * to preserve the branch prediction + */ + if ((length == 3) && + IS_COND_BR_INSTR(instr_data) && + IS_NOT_ALWAYS_TAKEN(instr_data) && + (BIT15(instr_data) != br14_disp_sign)) + { + FLIP_PREDICT_BIT(instr_data); + } + break; + case PPC_RELOC_BR24: + require_action(pcrel, finish, rval=KERN_FAILURE); + + addend = BR24D(instr_data); + displacement += SIGN_EXTEND(addend, BR24_NBITS_DISPLACEMENT); + difference = ABSOLUTE_VALUE(displacement); + require_action(difference < BR24_LIMIT, finish, + rval=KERN_FAILURE; + kxld_log(kKxldLogLinking, kKxldLogErr, kKxldLogRelocationOverflow)); + + instr_data = BR24I(instr_data) | BR24D(displacement); + break; + case PPC_RELOC_HI16: + require_action(!pcrel, finish, rval=KERN_FAILURE); + + target += LO16S(instr_data) | LO16(pair_target); + instr_data = HI16(instr_data) | HI16S(target); + break; + case PPC_RELOC_LO16: + require_action(!pcrel, finish, rval=KERN_FAILURE); + + target += LO16S(pair_target) | LO16(instr_data); + instr_data = HI16(instr_data) | LO16(target); + break; + case PPC_RELOC_HA16: + require_action(!pcrel, finish, rval=KERN_FAILURE); + + instr_data -= BIT15(pair_target) ? 1 : 0; + target += LO16S(instr_data) | LO16(pair_target); + instr_data = HI16(instr_data) | HI16S(target); + instr_data += BIT15(target) ? 1 : 0; + break; + case PPC_RELOC_JBSR: + require_action(!pcrel, finish, rval=KERN_FAILURE); + + /* The generated code as written branches to an island that loads the + * absolute address of the target. If we can branch to the target + * directly with less than 24 bits of displacement, we modify the branch + * instruction to do so which avoids the cost of the island. + */ + + displacement = target + pair_target - link_pc; + difference = ABSOLUTE_VALUE(displacement); + if (difference < BR24_LIMIT) { + instr_data = BR24I(instr_data) | BR24D(displacement); + } + break; + case PPC_RELOC_SECTDIFF: + require_action(!pcrel, finish, rval=KERN_FAILURE); + + instr_data = instr_data + target - pair_target; + break; + case PPC_RELOC_LO14: + case PPC_RELOC_PB_LA_PTR: + case PPC_RELOC_HI16_SECTDIFF: + case PPC_RELOC_LO16_SECTDIFF: + case PPC_RELOC_HA16_SECTDIFF: + case PPC_RELOC_LO14_SECTDIFF: + case PPC_RELOC_LOCAL_SECTDIFF: + rval = KERN_FAILURE; + goto finish; + case PPC_RELOC_PAIR: + default: + rval = KERN_FAILURE; + goto finish; + } + +#if !KERNEL + if (swap) instr_data = OSSwapInt32(instr_data); +#endif + + *instr_addr = instr_data; + + rval = KERN_SUCCESS; +finish: + + return rval; +} +#endif /* KXLD_USER_OR_PPC */ + +#if KXLD_USER_OR_X86_64 +/******************************************************************************* +*******************************************************************************/ +static boolean_t +x86_64_reloc_has_pair(u_int _type) +{ + enum reloc_type_x86_64 type = _type; + + return (type == X86_64_RELOC_SUBTRACTOR); +} + +/******************************************************************************* +*******************************************************************************/ +static boolean_t +x86_64_reloc_is_pair(u_int _type, u_int _prev_type) +{ + enum reloc_type_x86_64 type = _type; + enum reloc_type_x86_64 prev_type = _prev_type; + + return (x86_64_reloc_has_pair(prev_type) && type == X86_64_RELOC_UNSIGNED); +} + +/******************************************************************************* +*******************************************************************************/ +static boolean_t +x86_64_reloc_has_got(u_int _type) +{ + enum reloc_type_x86_64 type = _type; + + return (type == X86_64_RELOC_GOT_LOAD || type == X86_64_RELOC_GOT); +} + +/******************************************************************************* +*******************************************************************************/ +static kern_return_t +x86_64_process_reloc(u_char *instruction, u_int length, u_int pcrel, + kxld_addr_t _base_pc __unused, kxld_addr_t _link_pc, kxld_addr_t _link_disp, + u_int _type, kxld_addr_t _target, kxld_addr_t _pair_target, + boolean_t swap __unused) +{ + kern_return_t rval = KERN_FAILURE; + enum reloc_type_x86_64 type = _type; + int32_t *instr32p = NULL; + int32_t instr32 = 0; + uint64_t *instr64p = NULL; + uint64_t instr64 = 0; + uint64_t target = _target; + uint64_t pair_target = _pair_target; + uint64_t link_pc = (uint64_t) _link_pc; + uint64_t link_disp = (uint64_t) _link_disp; + uint64_t adjustment = 0; + + check(instruction); + require_action(length == 2 || length == 3, + finish, rval=KERN_FAILURE); + + if (length == 2) { + instr32p = (int32_t *) instruction; + instr32 = *instr32p; + +#if !KERNEL + if (swap) instr32 = OSSwapInt32(instr32); +#endif + + /* There are a number of different small adjustments for pc-relative + * relocation entries. The general case is to subtract the size of the + * relocation (represented by the length parameter), and it applies to + * the GOT types and external SIGNED types. The non-external signed types + * have a different adjustment corresponding to the specific type. + */ + switch (type) { + case X86_64_RELOC_SIGNED: + if (pair_target) { + adjustment = 0; + break; + } + /* Fall through */ + case X86_64_RELOC_SIGNED_1: + if (pair_target) { + adjustment = 1; + break; + } + /* Fall through */ + case X86_64_RELOC_SIGNED_2: + if (pair_target) { + adjustment = 2; + break; + } + /* Fall through */ + case X86_64_RELOC_SIGNED_4: + if (pair_target) { + adjustment = 4; + break; + } + /* Fall through */ + case X86_64_RELOC_BRANCH: + case X86_64_RELOC_GOT: + case X86_64_RELOC_GOT_LOAD: + adjustment = (1 << length); + break; + default: + break; + } + + /* Perform the actual relocation. All of the 32-bit relocations are + * pc-relative except for SUBTRACTOR, so a good chunk of the logic is + * stuck in calculate_displacement_x86_64. The signed relocations are + * a special case, because when they are non-external, the instruction + * already contains the pre-relocation displacement, so we only need to + * find the difference between how far the PC was relocated, and how + * far the target is relocated. Since the target variable already + * contains the difference between the target's base and link + * addresses, we add the difference between the PC's base and link + * addresses to the adjustment variable. This will yield the + * appropriate displacement in calculate_displacement. + */ + switch (type) { + case X86_64_RELOC_BRANCH: + require_action(pcrel, finish, rval=KERN_FAILURE); + adjustment += link_pc; + break; + case X86_64_RELOC_SIGNED: + case X86_64_RELOC_SIGNED_1: + case X86_64_RELOC_SIGNED_2: + case X86_64_RELOC_SIGNED_4: + require_action(pcrel, finish, rval=KERN_FAILURE); + adjustment += (pair_target) ? (link_disp) : (link_pc); + break; + case X86_64_RELOC_GOT: + case X86_64_RELOC_GOT_LOAD: + require_action(pcrel, finish, rval=KERN_FAILURE); + adjustment += link_pc; + target = pair_target; + break; + case X86_64_RELOC_SUBTRACTOR: + require_action(!pcrel, finish, rval=KERN_FAILURE); + instr32 = (int32_t) (target - pair_target); + break; + case X86_64_RELOC_UNSIGNED: + default: + rval = KERN_FAILURE; + goto finish; + } + + /* Call calculate_displacement for the pc-relative relocations */ + if (pcrel) { + rval = calculate_displacement_x86_64(target, adjustment, &instr32); + require_noerr(rval, finish); + } + +#if !KERNEL + if (swap) instr32 = OSSwapInt32(instr32); +#endif + + *instr32p = instr32; + } else { + instr64p = (uint64_t *) instruction; + instr64 = *instr64p; + +#if !KERNEL + if (swap) instr64 = OSSwapInt64(instr64); +#endif + + switch (type) { + case X86_64_RELOC_UNSIGNED: + require_action(!pcrel, finish, rval=KERN_FAILURE); + + instr64 += target; + break; + case X86_64_RELOC_SUBTRACTOR: + require_action(!pcrel, finish, rval=KERN_FAILURE); + + instr64 = target - pair_target; + break; + case X86_64_RELOC_SIGNED_1: + case X86_64_RELOC_SIGNED_2: + case X86_64_RELOC_SIGNED_4: + case X86_64_RELOC_GOT_LOAD: + case X86_64_RELOC_BRANCH: + case X86_64_RELOC_SIGNED: + case X86_64_RELOC_GOT: + default: + rval = KERN_FAILURE; + goto finish; + } + +#if !KERNEL + if (swap) instr64 = OSSwapInt64(instr64); +#endif + + *instr64p = instr64; + } + + rval = KERN_SUCCESS; + +finish: + return rval; +} + +/******************************************************************************* +*******************************************************************************/ +static kern_return_t +calculate_displacement_x86_64(uint64_t target, uint64_t adjustment, + int32_t *instr32) +{ + kern_return_t rval = KERN_FAILURE; + int64_t displacement; + uint64_t difference; + + displacement = *instr32 + target - adjustment; + difference = ABSOLUTE_VALUE(displacement); + require_action(difference < X86_64_RIP_RELATIVE_LIMIT, finish, + rval=KERN_FAILURE; + kxld_log(kKxldLogLinking, kKxldLogErr, kKxldLogRelocationOverflow)); + + *instr32 = (int32_t) displacement; + rval = KERN_SUCCESS; + +finish: + return rval; +} +#endif /* KXLD_USER_OR_X86_64 */ + +#if KXLD_USER_OR_ARM +/******************************************************************************* +*******************************************************************************/ +static boolean_t +arm_reloc_has_pair(u_int _type) +{ + enum reloc_type_arm type = _type; + + switch(type) { + case ARM_RELOC_SECTDIFF: + return TRUE; + default: + return FALSE; + } + return FALSE; +} + +/******************************************************************************* +*******************************************************************************/ +static boolean_t +arm_reloc_is_pair(u_int _type, u_int _prev_type __unused) +{ + enum reloc_type_arm type = _type; + + return (type == ARM_RELOC_PAIR); +} + +/******************************************************************************* +*******************************************************************************/ +static boolean_t +arm_reloc_has_got(u_int _type __unused) +{ + return FALSE; +} + +/******************************************************************************* +*******************************************************************************/ +static kern_return_t +arm_process_reloc(u_char *instruction, u_int length, u_int pcrel, + kxld_addr_t _base_pc __unused, kxld_addr_t _link_pc __unused, kxld_addr_t _link_disp __unused, + u_int _type __unused, kxld_addr_t _target __unused, kxld_addr_t _pair_target __unused, + boolean_t swap __unused) +{ + kern_return_t rval = KERN_FAILURE; + uint32_t *instr_addr = NULL; + uint32_t instr_data = 0; + uint32_t base_pc = (uint32_t) _base_pc; + uint32_t link_pc = (uint32_t) _link_pc; + uint32_t target = (uint32_t) _target; + int32_t displacement = 0; + enum reloc_type_arm type = _type; + + check(instruction); + require_action(length == 2, finish, rval=KERN_FAILURE); + + if (pcrel) displacement = target + base_pc - link_pc; + + instr_addr = (uint32_t *)instruction; + instr_data = *instr_addr; + +#if !KERNEL + if (swap) instr_data = OSSwapInt32(instr_data); +#endif + + switch (type) { + case ARM_RELOC_VANILLA: + require_action(!pcrel, finish, rval=KERN_FAILURE); + instr_data += target; + break; + + /* + * If the displacement is 0 (the offset between the pc and the target has + * not changed), then we don't need to do anything for BR24 and BR22 + * relocs. As it turns out, because kexts build with -mlong-calls all + * relocations currently end up being either vanilla (handled above) or + * BR22/BR24 with a displacement of 0. + * We could handle other displacements here but to keep things simple, we + * won't until it is needed (at which point the kernelcache will fail to + * link) + */ + case ARM_RELOC_BR24: + require_action(pcrel, finish, rval=KERN_FAILURE); + require_action(displacement == 0, finish, rval=KERN_FAILURE); + break; + case ARM_THUMB_RELOC_BR22: + require_action(pcrel, finish, rval=KERN_FAILURE); + require_action(displacement == 0, finish, rval=KERN_FAILURE); + break; + + case ARM_RELOC_SECTDIFF: + case ARM_RELOC_LOCAL_SECTDIFF: + case ARM_RELOC_PB_LA_PTR: + rval = KERN_FAILURE; + goto finish; + + case ARM_RELOC_PAIR: + default: + rval = KERN_FAILURE; + goto finish; + } + +#if !KERNEL + if (swap) instr_data = OSSwapInt32(instr_data); +#endif + + *instr_addr = instr_data; + + rval = KERN_SUCCESS; + +finish: + return rval; +} + +#endif /* KXLD_USER_OR_ARM */ + diff --git a/libkern/kxld/kxld_reloc.h b/libkern/kxld/kxld_reloc.h new file mode 100644 index 000000000..679a95870 --- /dev/null +++ b/libkern/kxld/kxld_reloc.h @@ -0,0 +1,139 @@ +/* + * Copyright (c) 2007-2008 Apple Inc. All rights reserved. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. The rights granted to you under the License + * may not be used to create, or enable the creation or redistribution of, + * unlawful or unlicensed copies of an Apple operating system, or to + * circumvent, violate, or enable the circumvention or violation of, any + * terms of an Apple operating system software license agreement. + * + * Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ + */ +#ifndef _KXLD_RELOC_H +#define _KXLD_RELOC_H + +#include +#include +#if KERNEL + #include +#else + #include "kxld_types.h" +#endif + +struct kxld_array; +struct kxld_sym; +struct kxld_symtab; +typedef struct kxld_relocator KXLDRelocator; +typedef struct kxld_reloc KXLDReloc; + +typedef boolean_t (*RelocHasPair)(u_int r_type); +typedef boolean_t (*RelocIsPair)(u_int r_type, u_int prev_r_type); +typedef boolean_t (*RelocHasGot)(u_int r_type); +typedef kern_return_t(*ProcessReloc)(u_char *instruction, u_int length, u_int pcrel, + kxld_addr_t base_pc, kxld_addr_t link_pc, kxld_addr_t link_disp, u_int type, + kxld_addr_t target, kxld_addr_t pair_target, boolean_t swap); + +struct kxld_relocator { + RelocHasPair reloc_has_pair; + RelocIsPair reloc_is_pair; + RelocHasGot reloc_has_got; + ProcessReloc process_reloc; + boolean_t is_32_bit; + boolean_t swap; +}; + +struct kxld_reloc { + u_int address; + u_int target; + u_int pair_target; + u_int target_type:3; + u_int pair_target_type:3; + u_int reloc_type:4; + u_int length:2; + u_int pcrel:1; +}; + +struct kxld_array; +struct kxld_sect; +struct kxld_seg; +struct kxld_symtab; +struct relocation_info; + +/******************************************************************************* +* Constructors and Destructors +*******************************************************************************/ + +kern_return_t kxld_relocator_init(KXLDRelocator *relocator, cpu_type_t cputype, + cpu_subtype_t cpusubtype, boolean_t swap) + __attribute__((nonnull,visibility("hidden"))); + +kern_return_t kxld_reloc_create_macho(struct kxld_array *relocarray, + const KXLDRelocator *relocator, const struct relocation_info *srcs, + u_int nsrcs) __attribute__((nonnull, visibility("hidden"))); + +void kxld_relocator_clear(KXLDRelocator *relocator) + __attribute__((nonnull, visibility("hidden"))); + +/******************************************************************************* +* Accessors +*******************************************************************************/ + +boolean_t kxld_relocator_has_pair(const KXLDRelocator *relocator, u_int r_type) + __attribute__((pure, nonnull,visibility("hidden"))); + +boolean_t kxld_relocator_is_pair(const KXLDRelocator *relocator, u_int r_type, + u_int last_r_type) + __attribute__((pure, nonnull,visibility("hidden"))); + +boolean_t kxld_relocator_has_got(const KXLDRelocator *relocator, u_int r_type) + __attribute__((pure, nonnull,visibility("hidden"))); + +struct kxld_sym * kxld_reloc_get_symbol(const KXLDRelocator *relocator, + const KXLDReloc *reloc, u_char *data, + const struct kxld_symtab *symtab) + __attribute__((pure, nonnull(1,2,4), visibility("hidden"))); + +kern_return_t kxld_reloc_get_reloc_index_by_offset(const struct kxld_array *relocs, + kxld_size_t offset, u_int *idx) + __attribute__((nonnull, visibility("hidden"))); + +KXLDReloc * kxld_reloc_get_reloc_by_offset(const struct kxld_array *relocs, + kxld_addr_t offset) + __attribute__((pure, nonnull, visibility("hidden"))); + +/******************************************************************************* +* Modifiers +*******************************************************************************/ + +kern_return_t kxld_reloc_update_symindex(KXLDReloc *reloc, u_int symindex) + __attribute__((nonnull,visibility("hidden"))); + +kern_return_t kxld_relocator_process_sect_reloc(const KXLDRelocator *relocator, + const KXLDReloc *reloc, const struct kxld_sect *sect, + const struct kxld_array *sectarray, const struct kxld_symtab *symtab) + __attribute__((nonnull,visibility("hidden"))); + +kern_return_t kxld_relocator_process_table_reloc(const KXLDRelocator *relocator, + const KXLDReloc *reloc, const struct kxld_seg *seg, u_char *file, + const struct kxld_array *sectarray, + const struct kxld_symtab *symtab) + __attribute__((nonnull,visibility("hidden"))); + +#endif /* _KXLD_RELOC_H */ + diff --git a/libkern/kxld/kxld_sect.c b/libkern/kxld/kxld_sect.c new file mode 100644 index 000000000..0c286b5b6 --- /dev/null +++ b/libkern/kxld/kxld_sect.c @@ -0,0 +1,629 @@ +/* + * Copyright (c) 2008 Apple Inc. All rights reserved. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. The rights granted to you under the License + * may not be used to create, or enable the creation or redistribution of, + * unlawful or unlicensed copies of an Apple operating system, or to + * circumvent, violate, or enable the circumvention or violation of, any + * terms of an Apple operating system software license agreement. + * + * Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ + */ +#include +#include +#include +#include + +#define DEBUG_ASSERT_COMPONENT_NAME_STRING "kxld" +#include + +#include "kxld_reloc.h" +#include "kxld_sect.h" +#include "kxld_seg.h" +#include "kxld_symtab.h" +#include "kxld_util.h" + +static kern_return_t export_macho(const KXLDSect *sect, u_char *buf, u_long offset, + u_long bufsize, boolean_t is_32_bit); +#if KXLD_USER_OR_ILP32 +static kern_return_t sect_export_macho_header_32(const KXLDSect *sect, u_char *buf, + u_long *header_offset, u_long header_size, u_long data_offset); +#endif +#if KXLD_USER_OR_LP64 +static kern_return_t sect_export_macho_header_64(const KXLDSect *sect, u_char *buf, + u_long *header_offset, u_long header_size, u_long data_offset); +#endif + +#if KXLD_USER_OR_ILP32 +/******************************************************************************* +*******************************************************************************/ +kern_return_t +kxld_sect_init_from_macho_32(KXLDSect *sect, u_char *macho, u_long *sect_offset, + u_int sectnum, const KXLDRelocator *relocator) +{ + kern_return_t rval = KERN_FAILURE; + struct section *src = (struct section *) (macho + *sect_offset); + struct relocation_info *relocs = NULL; + + check(sect); + check(macho); + check(src); + + strlcpy(sect->segname, src->segname, sizeof(sect->segname)); + strlcpy(sect->sectname, src->sectname, sizeof(sect->sectname)); + sect->base_addr = src->addr; + sect->link_addr = src->addr; + sect->size = src->size; + sect->sectnum = sectnum; + sect->flags = src->flags; + sect->align = src->align; + sect->reserved1 = src->reserved1; + sect->reserved2 = src->reserved2; + + if (src->offset) { + sect->data = macho + src->offset; + } else { + sect->data = NULL; + } + + relocs = (struct relocation_info *) (macho + src->reloff); + + rval = kxld_reloc_create_macho(§->relocs, relocator, + relocs, src->nreloc); + require_noerr(rval, finish); + + *sect_offset += sizeof(*src); + rval = KERN_SUCCESS; + +finish: + if (rval) kxld_sect_deinit(sect); + + return rval; +} +#endif /* KXLD_USER_OR_ILP32 */ + +#if KXLD_USER_OR_LP64 +/******************************************************************************* +*******************************************************************************/ +kern_return_t +kxld_sect_init_from_macho_64(KXLDSect *sect, u_char *macho, u_long *sect_offset, + u_int sectnum, const KXLDRelocator *relocator) +{ + kern_return_t rval = KERN_FAILURE; + struct section_64 *src = (struct section_64 *) (macho + *sect_offset); + struct relocation_info *relocs = NULL; + + check(sect); + check(macho); + check(src); + + strlcpy(sect->segname, src->segname, sizeof(sect->segname)); + strlcpy(sect->sectname, src->sectname, sizeof(sect->sectname)); + sect->base_addr = src->addr; + sect->link_addr = src->addr; + sect->size = src->size; + sect->sectnum = sectnum; + sect->flags = src->flags; + sect->align = src->align; + sect->reserved1 = src->reserved1; + sect->reserved2 = src->reserved2; + + if (src->offset) { + sect->data = macho + src->offset; + } else { + sect->data = NULL; + } + + relocs = (struct relocation_info *) (macho + src->reloff); + + rval = kxld_reloc_create_macho(§->relocs, relocator, + relocs, src->nreloc); + require_noerr(rval, finish); + + *sect_offset += sizeof(*src); + rval = KERN_SUCCESS; + +finish: + if (rval) kxld_sect_deinit(sect); + + return rval; +} +#endif /* KXLD_USER_OR_LP64 */ + +#if KXLD_USER_OR_GOT +/******************************************************************************* +* Assumes GOT is comprised of kxld_addr_t entries +*******************************************************************************/ +kern_return_t +kxld_sect_init_got(KXLDSect *sect, u_int ngots) +{ + kern_return_t rval = KERN_FAILURE; + + check(sect); + + strlcpy(sect->segname, KXLD_SEG_GOT, sizeof(sect->segname)); + strlcpy(sect->sectname, KXLD_SECT_GOT, sizeof(sect->sectname)); + sect->base_addr = 0; + sect->link_addr = 0; + sect->flags = 0; + sect->align = 4; + sect->reserved1 = 0; + sect->reserved2 = 0; + + sect->size = ngots * sizeof(kxld_addr_t); + sect->data = kxld_alloc((u_long) sect->size); + require_action(sect->data, finish, rval=KERN_RESOURCE_SHORTAGE); + + sect->allocated = TRUE; + + rval = KERN_SUCCESS; + +finish: + return rval; +} +#endif /* KXLD_USER_OR_GOT */ + +#if KXLD_USER_OR_COMMON +/******************************************************************************* +*******************************************************************************/ +void +kxld_sect_init_zerofill(KXLDSect *sect, const char *segname, + const char *sectname, kxld_size_t size, u_int align) +{ + check(sect); + check(segname); + check(sectname); + + strlcpy(sect->segname, segname, sizeof(sect->segname)); + strlcpy(sect->sectname, sectname, sizeof(sect->sectname)); + sect->size = size; + sect->align = align; + sect->base_addr = 0; + sect->link_addr = 0; + sect->flags = S_ZEROFILL; +} +#endif /* KXLD_USER_OR_COMMON */ + +/******************************************************************************* +*******************************************************************************/ +void +kxld_sect_clear(KXLDSect *sect) +{ + check(sect); + + if (sect->allocated) { + kxld_free(sect->data, (u_long) sect->size); + sect->allocated = FALSE; + } + + bzero(sect->sectname, sizeof(sect->sectname)); + bzero(sect->segname, sizeof(sect->segname)); + sect->data = NULL; + sect->base_addr = 0; + sect->link_addr = 0; + sect->size = 0; + sect->flags = 0; + sect->align = 0; + sect->reserved1 = 0; + sect->reserved2 = 0; + kxld_array_clear(§->relocs); +} + +/******************************************************************************* +*******************************************************************************/ +void +kxld_sect_deinit(KXLDSect *sect) +{ + check(sect); + + if (streq_safe(sect->sectname, KXLD_SECT_GOT, sizeof(KXLD_SECT_GOT))) { + kxld_free(sect->data, (u_long) sect->size); + } + + kxld_array_deinit(§->relocs); + bzero(sect, sizeof(*sect)); +} + +/******************************************************************************* +*******************************************************************************/ +u_int +kxld_sect_get_num_relocs(const KXLDSect *sect) +{ + check(sect); + + return sect->relocs.nitems; +} + +/******************************************************************************* +*******************************************************************************/ +u_long +kxld_sect_get_macho_header_size(boolean_t is_32_bit) +{ + if (is_32_bit) { + return sizeof(struct section); + } else { + return sizeof(struct section_64); + } +} + +/******************************************************************************* +*******************************************************************************/ +u_long +kxld_sect_get_macho_data_size(const KXLDSect *sect) +{ + u_long size = 0; + + check(sect); + + if (sect->data) { + size = (u_long) sect->size; + } + + return size; +} + +#if KXLD_USER_OR_GOT +/******************************************************************************* +*******************************************************************************/ +u_int +kxld_sect_get_ngots(const KXLDSect *sect, const KXLDRelocator *relocator, + const KXLDSymtab *symtab) +{ + const KXLDReloc *reloc = NULL; + KXLDSym *sym = NULL; + u_int ngots = 0; + u_int i = 0; + + for (i = 0; i < sect->relocs.nitems; ++i) { + reloc = kxld_array_get_item(§->relocs, i); + + if (relocator->reloc_has_got(reloc->reloc_type)) { + /* @TODO This assumes 64-bit symbols (which is valid at the + * moment since only x86_64 has a GOT) + */ + sym = kxld_reloc_get_symbol(relocator, reloc, sect->data, symtab); + if (!kxld_sym_is_got(sym)) { + kxld_sym_set_got(sym); + ++ngots; + } + } + } + + return ngots; +} +#endif /* KXLD_USER_OR_GOT */ + +/******************************************************************************* +* Each section must be aligned at a certain power of two. To figure out that +* alignment, we mask for the low bits that may need to be adjusted. If they are +* non zero, we then subtract them from the target alignment to find the offset, +* and then add that offset to the link address. +*******************************************************************************/ +kxld_addr_t +kxld_sect_align_address(const KXLDSect *sect, kxld_addr_t address) +{ + return kxld_align_address(address, sect->align); +} + +/******************************************************************************* +*******************************************************************************/ +kern_return_t +kxld_sect_export_macho_to_file_buffer(const KXLDSect *sect, u_char *buf, + u_long *header_offset, u_long header_size, u_long *data_offset, + u_long data_size, boolean_t is_32_bit) +{ + kern_return_t rval = KERN_FAILURE; + + check(sect); + check(buf); + check(header_offset); + check(data_offset); + + /* If there is no data to export, we only need to write the header. We + * make it a separate call so that we don't modify data_offset. + */ + if (!sect->data) { + KXLD_3264_FUNC(is_32_bit, rval, + sect_export_macho_header_32, sect_export_macho_header_64, + sect, buf, header_offset, header_size, /* data_offset */ 0); + require_noerr(rval, finish); + } else { + *data_offset = (u_long) kxld_sect_align_address(sect, *data_offset); + + KXLD_3264_FUNC(is_32_bit, rval, + sect_export_macho_header_32, sect_export_macho_header_64, + sect, buf, header_offset, header_size, *data_offset); + require_noerr(rval, finish); + + rval = export_macho(sect, buf, *data_offset, data_size, is_32_bit); + require_noerr(rval, finish); + + *data_offset += (u_long) sect->size; + } + + rval = KERN_SUCCESS; + +finish: + return rval; +} + +/******************************************************************************* +*******************************************************************************/ +kern_return_t +kxld_sect_export_macho_to_vm(const KXLDSect *sect, u_char *buf, + u_long *header_offset, u_long header_size, + kxld_addr_t link_addr, u_long data_size, + boolean_t is_32_bit) +{ + kern_return_t rval = KERN_FAILURE; + u_long data_offset = (u_long) (sect->link_addr - link_addr); + + check(sect); + check(buf); + check(header_offset); + + KXLD_3264_FUNC(is_32_bit, rval, + sect_export_macho_header_32, sect_export_macho_header_64, + sect, buf, header_offset, header_size, data_offset); + require_noerr(rval, finish); + + rval = export_macho(sect, buf, data_offset, data_size, is_32_bit); + require_noerr(rval, finish); + + rval = KERN_SUCCESS; + +finish: + return rval; +} + +/******************************************************************************* +*******************************************************************************/ +static kern_return_t +export_macho(const KXLDSect *sect, u_char *buf, u_long offset, u_long bufsize, + boolean_t is_32_bit) +{ + kern_return_t rval = KERN_FAILURE; + + check(sect); + check(buf); + + if (!sect->data) { + rval = KERN_SUCCESS; + goto finish; + } + + /* Verify that the section is properly aligned */ + + require_action(kxld_sect_align_address(sect, offset) == offset, finish, + rval = KERN_FAILURE); + + /* Verify that we have enough space to copy */ + + require_action(sect->size <= bufsize - offset, finish, + rval=KERN_FAILURE); + + /* Copy section data */ + + switch (sect->flags & SECTION_TYPE) { + case S_NON_LAZY_SYMBOL_POINTERS: + case S_MOD_INIT_FUNC_POINTERS: + case S_MOD_TERM_FUNC_POINTERS: + require_action(!is_32_bit, finish, rval=KERN_FAILURE; + kxld_log(kKxldLogLinking, kKxldLogErr, kKxldLogMalformedMachO + "Invalid section type in 32-bit kext: %u.", + sect->flags & SECTION_TYPE)); + /* Fall through */ + case S_REGULAR: + case S_CSTRING_LITERALS: + case S_4BYTE_LITERALS: + case S_8BYTE_LITERALS: + case S_LITERAL_POINTERS: + case S_COALESCED: + case S_16BYTE_LITERALS: + memcpy(buf + offset, sect->data, (size_t)sect->size); + break; + case S_ZEROFILL: /* sect->data should be NULL, so we'll never get here */ + case S_LAZY_SYMBOL_POINTERS: + case S_SYMBOL_STUBS: + case S_GB_ZEROFILL: + case S_INTERPOSING: + case S_DTRACE_DOF: + default: + rval = KERN_FAILURE; + kxld_log(kKxldLogLinking, kKxldLogErr, kKxldLogMalformedMachO + "Invalid section type: %u.", sect->flags & SECTION_TYPE); + goto finish; + } + + rval = KERN_SUCCESS; + +finish: + return rval; +} + +#if KXLD_USER_OR_ILP32 +/******************************************************************************* +*******************************************************************************/ +static kern_return_t +sect_export_macho_header_32(const KXLDSect *sect, u_char *buf, + u_long *header_offset, u_long header_size, u_long data_offset) +{ + kern_return_t rval = KERN_FAILURE; + struct section *secthdr = NULL; + + check(sect); + check(buf); + check(header_offset); + + require_action(sizeof(*secthdr) <= header_size - *header_offset, finish, + rval=KERN_FAILURE); + secthdr = (struct section *) (buf + *header_offset); + *header_offset += sizeof(*secthdr); + + /* Initalize header */ + + strlcpy(secthdr->sectname, sect->sectname, sizeof(secthdr->sectname)); + strlcpy(secthdr->segname, sect->segname, sizeof(secthdr->segname)); + secthdr->addr = (uint32_t) sect->link_addr; + secthdr->size = (uint32_t) sect->size; + secthdr->offset = (uint32_t) ((sect->data) ? data_offset : 0); + secthdr->align = sect->align; + secthdr->reloff = 0; + secthdr->nreloc = 0; + secthdr->flags = sect->flags; + secthdr->reserved1 = sect->reserved1; + secthdr->reserved2 = sect->reserved2; + + rval = KERN_SUCCESS; + +finish: + return rval; +} +#endif /* KXLD_USER_OR_ILP32 */ + +#if KXLD_USER_OR_LP64 +/******************************************************************************* +*******************************************************************************/ +static kern_return_t +sect_export_macho_header_64(const KXLDSect *sect, u_char *buf, + u_long *header_offset, u_long header_size, u_long data_offset) +{ + kern_return_t rval = KERN_FAILURE; + struct section_64 *secthdr = NULL; + + check(sect); + check(buf); + check(header_offset); + + require_action(sizeof(*secthdr) <= header_size - *header_offset, finish, + rval=KERN_FAILURE); + secthdr = (struct section_64 *) (buf + *header_offset); + *header_offset += sizeof(*secthdr); + + /* Initalize header */ + + strlcpy(secthdr->sectname, sect->sectname, sizeof(secthdr->sectname)); + strlcpy(secthdr->segname, sect->segname, sizeof(secthdr->segname)); + secthdr->addr = (uint64_t) sect->link_addr; + secthdr->size = (uint64_t) sect->size; + secthdr->offset = (uint32_t) ((sect->data) ? data_offset : 0); + secthdr->align = sect->align; + secthdr->reloff = 0; + secthdr->nreloc = 0; + secthdr->flags = sect->flags; + secthdr->reserved1 = sect->reserved1; + secthdr->reserved2 = sect->reserved2; + + rval = KERN_SUCCESS; + +finish: + return rval; +} +#endif /* KXLD_USER_OR_LP64 */ + +#if KXLD_USER_OR_COMMON +/******************************************************************************* +*******************************************************************************/ +kxld_size_t +kxld_sect_grow(KXLDSect *sect, kxld_size_t nbytes, u_int align) +{ + kxld_size_t size = kxld_align_address(sect->size, align); + + if (align > sect->align) sect->align = align; + sect->size = size + nbytes; + + return size; +} +#endif /* KXLD_USER_OR_COMMON */ + +/******************************************************************************* +*******************************************************************************/ +void +kxld_sect_relocate(KXLDSect *sect, kxld_addr_t link_addr) +{ + sect->link_addr = kxld_sect_align_address(sect, + sect->link_addr + link_addr); +} + +#if KXLD_USER_OR_GOT +/******************************************************************************* +*******************************************************************************/ +kern_return_t +kxld_sect_populate_got(KXLDSect *sect, KXLDSymtab *symtab, + boolean_t swap __unused) +{ + kern_return_t rval = KERN_FAILURE; + KXLDSymtabIterator iter; + KXLDSym *sym = NULL; + kxld_addr_t *entry = NULL; + kxld_addr_t entry_addr = 0; + + check(sect); + check(symtab); + require(streq_safe(sect->segname, KXLD_SEG_GOT, sizeof(KXLD_SEG_GOT)), + finish); + require(streq_safe(sect->sectname, KXLD_SECT_GOT, sizeof(KXLD_SECT_GOT)), + finish); + + kxld_symtab_iterator_init(&iter, symtab, kxld_sym_is_got, FALSE); + + entry = (kxld_addr_t *) sect->data; + entry_addr = sect->link_addr; + while ((sym = kxld_symtab_iterator_get_next(&iter))) { + *entry = sym->link_addr; + sym->got_addr = entry_addr; + +#if !KERNEL + if (swap) *entry = OSSwapInt64(*entry); +#endif /* !KERNEL */ + + ++entry; + entry_addr += sizeof(*entry); + } + + rval = KERN_SUCCESS; + +finish: + return rval; +} +#endif /* KXLD_USER_OR_GOT */ + +/******************************************************************************* +*******************************************************************************/ +kern_return_t +kxld_sect_process_relocs(KXLDSect *sect, const KXLDRelocator *relocator, + const KXLDArray *sectarray, const KXLDSymtab *symtab) +{ + kern_return_t rval = KERN_FAILURE; + KXLDReloc *reloc = NULL; + u_int i = 0; + + for (i = 0; i < sect->relocs.nitems; ++i) { + reloc = kxld_array_get_item(§->relocs, i); + rval = kxld_relocator_process_sect_reloc(relocator, reloc, sect, + sectarray, symtab); + require_noerr(rval, finish); + } + + rval = KERN_SUCCESS; + +finish: + return rval; +} + diff --git a/libkern/kxld/kxld_sect.h b/libkern/kxld/kxld_sect.h new file mode 100644 index 000000000..cf79fde75 --- /dev/null +++ b/libkern/kxld/kxld_sect.h @@ -0,0 +1,184 @@ +/* + * Copyright (c) 2008 Apple Inc. All rights reserved. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. The rights granted to you under the License + * may not be used to create, or enable the creation or redistribution of, + * unlawful or unlicensed copies of an Apple operating system, or to + * circumvent, violate, or enable the circumvention or violation of, any + * terms of an Apple operating system software license agreement. + * + * Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ + */ +#ifndef _KXLD_SECT_H_ +#define _KXLD_SECT_H_ + +#include +#include +#if KERNEL + #include +#else + #include "kxld_types.h" +#endif + +#include "kxld_array.h" + +struct kxld_array; +struct kxld_relocator; +struct kxld_reloc; +struct kxld_seg; +struct kxld_symtab; +struct relocation_info; +struct section; +struct section_64; +typedef struct kxld_sect KXLDSect; + +struct kxld_sect { + char sectname[16]; // The name of the section + char segname[16]; // The segment to which the section belongs + u_char *data; // The start of the section in memory + KXLDArray relocs; // The section's relocation entries + kxld_addr_t base_addr; // The base address of the section + kxld_addr_t link_addr; // The relocated address of the section + kxld_size_t size; // The size of the section + u_int sectnum; // The number of the section (for relocation) + u_int flags; // Flags describing the section + u_int align; // The section's alignment as a power of 2 + u_int reserved1; // Dependent on the section type + u_int reserved2; // Dependent on the section type + boolean_t allocated; // This section's data is allocated internally +}; + +/******************************************************************************* +* Constructors and destructors +*******************************************************************************/ + +#if KXLD_USER_OR_ILP32 +/* Initializes a section object from a Mach-O section header and modifies the + * section offset to point to the next section header. + */ +kern_return_t kxld_sect_init_from_macho_32(KXLDSect *sect, u_char *macho, + u_long *sect_offset, u_int sectnum, const struct kxld_relocator *relocator) + __attribute__((nonnull, visibility("hidden"))); +#endif /* KXLD_USER_OR_ILP32 */ + +#if KXLD_USER_OR_LP64 +/* Initializes a section object from a Mach-O64 section header and modifies the + * section offset to point to the next section header. + */ +kern_return_t kxld_sect_init_from_macho_64(KXLDSect *sect, u_char *macho, + u_long *sect_offset, u_int sectnum, const struct kxld_relocator *relocator) + __attribute__((nonnull, visibility("hidden"))); +#endif /* KXLD_USER_OR_LP64 */ + +#if KXLD_USER_OR_GOT +/* Initializes a GOT section from the number of entries that the section should + * have. + */ +kern_return_t kxld_sect_init_got(KXLDSect *sect, u_int ngots) + __attribute__((nonnull, visibility("hidden"))); +#endif /* KXLD_USER_OR_GOT */ + +#if KXLD_USER_OR_COMMON +/* Initializes a zerofill section of the specified size and alignment */ +void kxld_sect_init_zerofill(KXLDSect *sect, const char *segname, + const char *sectname, kxld_size_t size, u_int align) + __attribute__((nonnull, visibility("hidden"))); +#endif /* KXLD_USER_OR_COMMON */ + +/* Clears the section object */ +void kxld_sect_clear(KXLDSect *sect) + __attribute__((nonnull, visibility("hidden"))); + +/* Denitializes the section object and frees its array of relocs */ +void kxld_sect_deinit(KXLDSect *sect) + __attribute__((nonnull, visibility("hidden"))); + +/******************************************************************************* +* Accessors +*******************************************************************************/ + +/* Gets the number of relocation entries in the section */ +u_int kxld_sect_get_num_relocs(const KXLDSect *sect) + __attribute__((pure, nonnull, visibility("hidden"))); + +/* Returns the address parameter adjusted to the minimum alignment required by + * the section. + */ +kxld_addr_t kxld_sect_align_address(const KXLDSect *sect, kxld_addr_t address) + __attribute__((pure, nonnull, visibility("hidden"))); + +/* Returns the space required by the exported Mach-O header */ +u_long kxld_sect_get_macho_header_size(boolean_t is_32_bit) + __attribute__((const, nonnull, visibility("hidden"))); + +/* Returns the space required by the exported Mach-O data */ +u_long kxld_sect_get_macho_data_size(const KXLDSect *sect) + __attribute__((pure, nonnull, visibility("hidden"))); + +#if KXLD_USER_OR_LP64 +/* Returns the number of GOT entries required by relocation entries in the + * given section. + */ +u_int kxld_sect_get_ngots(const KXLDSect *sect, + const struct kxld_relocator *relocator, const struct kxld_symtab *symtab) + __attribute__((pure, nonnull, visibility("hidden"))); +#endif /* KXLD_USER_OR_LP64 */ + +kern_return_t kxld_sect_export_macho_to_file_buffer(const KXLDSect *sect, u_char *buf, + u_long *header_offset, u_long header_size, u_long *data_offset, + u_long data_size, boolean_t is_32_bit) + __attribute__((nonnull, visibility("hidden"))); + +kern_return_t kxld_sect_export_macho_to_vm(const KXLDSect *sect, u_char *buf, + u_long *header_offset, u_long header_size, + kxld_addr_t link_addr, u_long data_size, + boolean_t is_32_bit) + __attribute__((nonnull, visibility("hidden"))); + +/******************************************************************************* +* Mutators +*******************************************************************************/ + +/* Relocates the section to the given link address */ +void kxld_sect_relocate(KXLDSect *sect, kxld_addr_t link_addr) + __attribute__((nonnull, visibility("hidden"))); + +#if KXLD_USER_OR_COMMON +/* Adds a number of bytes to the section's size. Returns the size of the + * section before it was grown. + */ +kxld_size_t kxld_sect_grow(KXLDSect *sect, kxld_size_t nbytes, u_int align) + __attribute__((nonnull, visibility("hidden"))); +#endif /* KXLD_USER_OR_COMMON */ + +#if KXLD_USER_OR_GOT +/* Popluates the entries of a GOT section */ +kern_return_t kxld_sect_populate_got(KXLDSect *sect, struct kxld_symtab *symtab, + boolean_t swap) + __attribute__((nonnull, visibility("hidden"))); +#endif /* KXLD_USER_OR_GOT */ + +/* Processes all of a section's relocation entries */ +kern_return_t kxld_sect_process_relocs(KXLDSect *sect, + const struct kxld_relocator *relocator, const KXLDArray *sectarray, + const struct kxld_symtab *symtab) + __attribute__((nonnull, visibility("hidden"))); + +#endif /* _KXLD_SECT_H_ */ + diff --git a/libkern/kxld/kxld_seg.c b/libkern/kxld/kxld_seg.c new file mode 100644 index 000000000..5c11a1f9a --- /dev/null +++ b/libkern/kxld/kxld_seg.c @@ -0,0 +1,773 @@ +/* + * Copyright (c) 2008 Apple Inc. All rights reserved. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. The rights granted to you under the License + * may not be used to create, or enable the creation or redistribution of, + * unlawful or unlicensed copies of an Apple operating system, or to + * circumvent, violate, or enable the circumvention or violation of, any + * terms of an Apple operating system software license agreement. + * + * Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ + */ +#include +#include +#include +#include + +#if KERNEL + #include +#else + #include +#endif /* KERNEL */ + +#define DEBUG_ASSERT_COMPONENT_NAME_STRING "kxld" +#include + +#include "kxld_sect.h" +#include "kxld_seg.h" +#include "kxld_util.h" + +#define MAX_SEGS 20 + +#define TEXT_SEG_PROT (VM_PROT_READ | VM_PROT_EXECUTE) +#define DATA_SEG_PROT (VM_PROT_READ | VM_PROT_WRITE) + +#if KXLD_USER_OR_OBJECT +static kern_return_t reorder_sections(KXLDSeg *seg, KXLDArray *section_order); +static void reorder_section(KXLDArray *sects, u_int *sect_reorder_index, + KXLDSect **reorder_buffer, u_int reorder_buffer_index); +#endif /* KXLD_USER_OR_OBJECT */ + +#if 0 +static KXLDSeg * get_segment_by_name(KXLDArray *segarray, const char *name); +#endif + +#if KXLD_USER_OR_ILP32 +static kern_return_t seg_export_macho_header_32(const KXLDSeg *seg, u_char *buf, + u_long *header_offset, u_long header_size, u_long data_offset); +#endif +#if KXLD_USER_OR_LP64 +static kern_return_t seg_export_macho_header_64(const KXLDSeg *seg, u_char *buf, + u_long *header_offset, u_long header_size, u_long data_offset); +#endif + +static KXLDSect * get_sect_by_index(const KXLDSeg *seg, u_int idx); + +#if KXLD_USER_OR_ILP32 +/******************************************************************************* +*******************************************************************************/ +kern_return_t +kxld_seg_init_from_macho_32(KXLDSeg *seg, struct segment_command *src) +{ + kern_return_t rval = KERN_FAILURE; + check(seg); + check(src); + + strlcpy(seg->segname, src->segname, sizeof(seg->segname)); + seg->base_addr = src->vmaddr; + seg->link_addr = src->vmaddr; + seg->vmsize = src->vmsize; + seg->fileoff = src->fileoff; + seg->maxprot = src->maxprot; + seg->initprot = src->initprot; + seg->flags = src->flags; + + rval = kxld_array_init(&seg->sects, sizeof(KXLDSect *), src->nsects); + require_noerr(rval, finish); + + rval = KERN_SUCCESS; + +finish: + return rval; +} +#endif /* KXLD_USER_OR_ILP32 */ + +#if KXLD_USER_OR_LP64 +/******************************************************************************* +*******************************************************************************/ +kern_return_t +kxld_seg_init_from_macho_64(KXLDSeg *seg, struct segment_command_64 *src) +{ + kern_return_t rval = KERN_FAILURE; + check(seg); + check(src); + + strlcpy(seg->segname, src->segname, sizeof(seg->segname)); + seg->base_addr = src->vmaddr; + seg->link_addr = src->vmaddr; + seg->vmsize = src->vmsize; + seg->fileoff = src->fileoff; + seg->maxprot = src->maxprot; + seg->initprot = src->initprot; + seg->flags = src->flags; + + rval = kxld_array_init(&seg->sects, sizeof(KXLDSect *), src->nsects); + require_noerr(rval, finish); + + rval = KERN_SUCCESS; + +finish: + return rval; +} +#endif /* KXLD_USER_OR_LP64 */ + +#if KXLD_USER_OR_OBJECT +/******************************************************************************* +*******************************************************************************/ +kern_return_t +kxld_seg_create_seg_from_sections(KXLDArray *segarray, KXLDArray *sectarray) +{ + kern_return_t rval = KERN_FAILURE; + KXLDSeg *seg = NULL; + KXLDSect *sect = NULL; + KXLDSect **sectp = NULL; + u_int i = 0; + + /* Initialize the segment array to one segment */ + + rval = kxld_array_init(segarray, sizeof(KXLDSeg), 1); + require_noerr(rval, finish); + + /* Initialize the segment */ + + seg = kxld_array_get_item(segarray, 0); + seg->initprot = VM_PROT_ALL; + seg->maxprot = VM_PROT_ALL; + seg->link_addr = 0; + + /* Add the sections to the segment */ + + rval = kxld_array_init(&seg->sects, sizeof(KXLDSect *), sectarray->nitems); + require_noerr(rval, finish); + + for (i = 0; i < sectarray->nitems; ++i) { + sect = kxld_array_get_item(sectarray, i); + sectp = kxld_array_get_item(&seg->sects, i); + + *sectp = sect; + } + + rval = KERN_SUCCESS; +finish: + return rval; +} + +/******************************************************************************* +*******************************************************************************/ +kern_return_t +kxld_seg_finalize_object_segment(KXLDArray *segarray, KXLDArray *section_order, + u_long hdrsize) +{ + kern_return_t rval = KERN_FAILURE; + KXLDSeg *seg = NULL; + KXLDSect *sect = NULL; + u_long sect_offset = 0; + u_int i = 0; + + check(segarray); + check(section_order); + require_action(segarray->nitems == 1, finish, rval=KERN_FAILURE); + + seg = kxld_array_get_item(segarray, 0); + + /* Reorder the sections */ + + rval = reorder_sections(seg, section_order); + require_noerr(rval, finish); + + /* Set the initial link address at the end of the header pages */ + + seg->link_addr = round_page(hdrsize); + + /* Fix up all of the section addresses */ + + sect_offset = (u_long) seg->link_addr; + for (i = 0; i < seg->sects.nitems; ++i) { + sect = *(KXLDSect **)kxld_array_get_item(&seg->sects, i); + + sect->link_addr = kxld_sect_align_address(sect, sect_offset); + sect_offset = (u_long) (sect->link_addr + sect->size); + } + + /* Finish initializing the segment */ + + seg->vmsize = round_page(sect_offset) - seg->link_addr; + + rval = KERN_SUCCESS; +finish: + return rval; +} + +/******************************************************************************* +* The legacy section ordering used by kld was based of the order of sections +* in the kernel file. To achieve the same layout, we save the kernel's +* section ordering as an array of section names when the kernel file itself +* is linked. Then, when kexts are linked with the KXLD_LEGACY_LAYOUT flag, +* we refer to the kernel's section layout to order the kext's sections. +* +* The algorithm below is as follows. We iterate through all of the kernel's +* sections grouped by segment name, so that we are processing all of the __TEXT +* sections, then all of the __DATA sections, etc. We then iterate through the +* kext's sections with a similar grouping, looking for sections that match +* the current kernel's section. In this way, we order all of the matching +* kext sections in the order in which they appear in the kernel, and then place +* all remaining kext sections at the end of the current segment grouping in +* the order in which they originally appeared. Sections that only appear in +* the kernel are not created. segments that only appear in the kext are +* left in their original ordering. +* +* An example: +* +* Kernel sections: +* __TEXT,__text +* __TEXT,__initcode +* __TEXT,__const +* __DATA,__data +* +* Kext sections: +* __TEXT,__const +* __TEXT,__literal4 +* __TEXT,__text +* __DATA,__const +* __DATA,__data +* +* Reordered kext sections: +* __TEXT,__text +* __TEXT,__const +* __TEXT,__literal4 +* __DATA,__data +* __DATA,__const +* +* In the implementation below, we use a reorder buffer to hold pointers to the +* sections of the current working segment. We scan this buffer looking for +* matching sections, placing them in the segment's section index as we find them. +* If this function must exit early, the segment's section index is left in an +* unusable state. +*******************************************************************************/ +static kern_return_t +reorder_sections(KXLDSeg *seg, KXLDArray *section_order) +{ + kern_return_t rval = KERN_FAILURE; + KXLDSect *sect = NULL; + KXLDSect **reorder_buffer = NULL; + KXLDSectionName *section_name = NULL; + const char *segname = NULL; + u_int sect_index = 0, legacy_index = 0, sect_reorder_index = 0; + u_int i = 0, j = 0; + u_int sect_start = 0, sect_end = 0, legacy_start = 0, legacy_end = 0; + u_int nsects = 0; + + check(seg); + check(section_order); + + /* Allocate the reorder buffer with enough space to hold all of the + * sections. + */ + + reorder_buffer = kxld_alloc( + seg->sects.nitems * sizeof(*reorder_buffer)); + require_action(reorder_buffer, finish, rval=KERN_RESOURCE_SHORTAGE); + + while (legacy_index < section_order->nitems) { + + /* Find the next group of sections with a common segment in the + * section_order array. + */ + + legacy_start = legacy_index++; + legacy_end = legacy_index; + + section_name = kxld_array_get_item(section_order, legacy_start); + segname = section_name->segname; + while (legacy_index < section_order->nitems) { + section_name = kxld_array_get_item(section_order, legacy_index); + if (!streq_safe(segname, section_name->segname, + sizeof(section_name->segname))) + { + break; + } + + ++legacy_index; + ++legacy_end; + } + + /* Find a group of sections in the kext that match the current + * section_order segment. + */ + + sect_start = sect_index; + sect_end = sect_index; + + while (sect_index < seg->sects.nitems) { + sect = *(KXLDSect **) kxld_array_get_item(&seg->sects, sect_index); + if (!streq_safe(segname, sect->segname, sizeof(sect->segname))) { + break; + } + + ++sect_index; + ++sect_end; + } + nsects = sect_end - sect_start; + + if (!nsects) continue; + + /* Populate the reorder buffer with the current group of kext sections */ + + for (i = sect_start; i < sect_end; ++i) { + reorder_buffer[i - sect_start] = + *(KXLDSect **) kxld_array_get_item(&seg->sects, i); + } + + /* For each section_order section, scan the reorder buffer for a matching + * kext section. If one is found, copy it into the next slot in the + * segment's section index. + */ + + sect_reorder_index = sect_start; + for (i = legacy_start; i < legacy_end; ++i) { + section_name = kxld_array_get_item(section_order, i); + sect = NULL; + + for (j = 0; j < nsects; ++j) { + sect = reorder_buffer[j]; + if (!sect) continue; + + if (streq_safe(section_name->sectname, sect->sectname, + sizeof(section_name->sectname))) + { + break; + } + + sect = NULL; + } + + if (sect) { + (void) reorder_section(&seg->sects, §_reorder_index, + reorder_buffer, j); + } + } + + /* If any sections remain in the reorder buffer, they are not specified + * in the section_order array, so append them to the section index in + * in the order they are found. + */ + + for (i = 0; i < nsects; ++i) { + if (!reorder_buffer[i]) continue; + reorder_section(&seg->sects, §_reorder_index, reorder_buffer, i); + } + } + + rval = KERN_SUCCESS; + +finish: + + if (reorder_buffer) { + kxld_free(reorder_buffer, seg->sects.nitems * sizeof(*reorder_buffer)); + reorder_buffer = NULL; + } + + return rval; +} + +/******************************************************************************* +*******************************************************************************/ +static void +reorder_section(KXLDArray *sects, u_int *sect_reorder_index, + KXLDSect **reorder_buffer, u_int reorder_buffer_index) +{ + KXLDSect **tmp = NULL; + + tmp = kxld_array_get_item(sects, *sect_reorder_index); + + *tmp = reorder_buffer[reorder_buffer_index]; + reorder_buffer[reorder_buffer_index]->sectnum = *sect_reorder_index; + reorder_buffer[reorder_buffer_index] = NULL; + + ++(*sect_reorder_index); +} +#endif /* KXLD_USER_OR_OBJECT */ + +/******************************************************************************* +*******************************************************************************/ +void +kxld_seg_clear(KXLDSeg *seg) +{ + check(seg); + + bzero(seg->segname, sizeof(seg->segname)); + seg->base_addr = 0; + seg->link_addr = 0; + seg->vmsize = 0; + seg->flags = 0; + seg->maxprot = 0; + seg->initprot = 0; + + /* Don't clear the individual sections here because kxld_kext.c will take + * care of that. + */ + kxld_array_clear(&seg->sects); +} + +/******************************************************************************* +*******************************************************************************/ +void +kxld_seg_deinit(KXLDSeg *seg) +{ + check(seg); + + kxld_array_deinit(&seg->sects); + bzero(seg, sizeof(*seg)); +} + +/******************************************************************************* +*******************************************************************************/ +kxld_size_t +kxld_seg_get_vmsize(const KXLDSeg *seg) +{ + check(seg); + + return seg->vmsize; +} + +/******************************************************************************* +*******************************************************************************/ +u_long +kxld_seg_get_macho_header_size(const KXLDSeg *seg, boolean_t is_32_bit) +{ + u_long size = 0; + + check(seg); + + if (is_32_bit) { + size += sizeof(struct segment_command); + } else { + size += sizeof(struct segment_command_64); + } + size += seg->sects.nitems * kxld_sect_get_macho_header_size(is_32_bit); + + return size; +} + +/******************************************************************************* +*******************************************************************************/ +u_long +kxld_seg_get_macho_data_size(const KXLDSeg *seg) +{ + u_long size = 0; + u_int i = 0; + KXLDSect *sect = NULL; + + check(seg); + + for (i = 0; i < seg->sects.nitems; ++i) { + sect = get_sect_by_index(seg, i); + size = (u_long) kxld_sect_align_address(sect, size); + size += kxld_sect_get_macho_data_size(sect); + } + + return round_page(size); +} + +/******************************************************************************* +*******************************************************************************/ +static KXLDSect * +get_sect_by_index(const KXLDSeg *seg, u_int idx) +{ + check(seg); + + return *(KXLDSect **) kxld_array_get_item(&seg->sects, idx); +} + +/******************************************************************************* +*******************************************************************************/ +kern_return_t +kxld_seg_export_macho_to_file_buffer(const KXLDSeg *seg, u_char *buf, + u_long *header_offset, u_long header_size, + u_long *data_offset, u_long data_size, + boolean_t is_32_bit) +{ + kern_return_t rval = KERN_FAILURE; + KXLDSect *sect = NULL; + u_long base_data_offset = *data_offset; + u_int i = 0; + struct segment_command *hdr32 = + (struct segment_command *) (buf + *header_offset); + struct segment_command_64 *hdr64 = + (struct segment_command_64 *) (buf + *header_offset); + + check(seg); + check(buf); + check(header_offset); + check(data_offset); + + /* Write out the header */ + + KXLD_3264_FUNC(is_32_bit, rval, + seg_export_macho_header_32, seg_export_macho_header_64, + seg, buf, header_offset, header_size, *data_offset); + require_noerr(rval, finish); + + /* Write out each section */ + + for (i = 0; i < seg->sects.nitems; ++i) { + sect = get_sect_by_index(seg, i); + + rval = kxld_sect_export_macho_to_file_buffer(sect, buf, header_offset, + header_size, data_offset, data_size, is_32_bit); + require_noerr(rval, finish); + } + + /* Update the filesize */ + + if (is_32_bit) { + hdr32->filesize = (uint32_t) (*data_offset - base_data_offset); + } else { + hdr64->filesize = (uint64_t) (*data_offset - base_data_offset); + } + + *data_offset = round_page(*data_offset); + + rval = KERN_SUCCESS; + +finish: + return rval; + +} + +/******************************************************************************* +*******************************************************************************/ +kern_return_t +kxld_seg_export_macho_to_vm(const KXLDSeg *seg, u_char *buf, + u_long *header_offset, u_long header_size, + u_long data_size, kxld_addr_t file_link_addr, + boolean_t is_32_bit) +{ + kern_return_t rval = KERN_FAILURE; + KXLDSect *sect = NULL; + u_long data_offset = (u_long) (seg->link_addr - file_link_addr); + u_int i = 0; + + check(seg); + check(buf); + check(header_offset); + + /* Write out the header */ + + KXLD_3264_FUNC(is_32_bit, rval, + seg_export_macho_header_32, seg_export_macho_header_64, + seg, buf, header_offset, header_size, data_offset); + require_noerr(rval, finish); + + /* Write out each section */ + + for (i = 0; i < seg->sects.nitems; ++i) { + sect = get_sect_by_index(seg, i); + + rval = kxld_sect_export_macho_to_vm(sect, buf, header_offset, + header_size, file_link_addr, data_size, is_32_bit); + require_noerr(rval, finish); + } + + rval = KERN_SUCCESS; + +finish: + return rval; +} + +#if KXLD_USER_OR_ILP32 +/******************************************************************************* +*******************************************************************************/ +static kern_return_t +seg_export_macho_header_32(const KXLDSeg *seg, u_char *buf, + u_long *header_offset, u_long header_size, u_long data_offset) +{ + kern_return_t rval = KERN_FAILURE; + struct segment_command *seghdr = NULL; + + check(seg); + check(buf); + check(header_offset); + + require_action(sizeof(*seghdr) <= header_size - *header_offset, finish, + rval=KERN_FAILURE); + seghdr = (struct segment_command *) (buf + *header_offset); + *header_offset += sizeof(*seghdr); + + seghdr->cmd = LC_SEGMENT; + seghdr->cmdsize = (uint32_t) sizeof(*seghdr); + seghdr->cmdsize += + (uint32_t) (seg->sects.nitems * kxld_sect_get_macho_header_size(TRUE)); + strlcpy(seghdr->segname, seg->segname, sizeof(seghdr->segname)); + seghdr->vmaddr = (uint32_t) seg->link_addr; + seghdr->vmsize = (uint32_t) seg->vmsize; + seghdr->fileoff = (uint32_t) data_offset; + seghdr->filesize = (uint32_t) seg->vmsize; + seghdr->maxprot = seg->maxprot; + seghdr->initprot = seg->initprot; + seghdr->nsects = seg->sects.nitems; + seghdr->flags = 0; + + rval = KERN_SUCCESS; + +finish: + return rval; +} +#endif /* KXLD_USER_OR_ILP32 */ + +#if KXLD_USER_OR_LP64 +/******************************************************************************* +*******************************************************************************/ +static kern_return_t +seg_export_macho_header_64(const KXLDSeg *seg, u_char *buf, + u_long *header_offset, u_long header_size, u_long data_offset) +{ + kern_return_t rval = KERN_FAILURE; + struct segment_command_64 *seghdr = NULL; + + check(seg); + check(buf); + check(header_offset); + + require_action(sizeof(*seghdr) <= header_size - *header_offset, finish, + rval=KERN_FAILURE); + seghdr = (struct segment_command_64 *) (buf + *header_offset); + *header_offset += sizeof(*seghdr); + + seghdr->cmd = LC_SEGMENT_64; + seghdr->cmdsize = (uint32_t) sizeof(*seghdr); + seghdr->cmdsize += + (uint32_t) (seg->sects.nitems * kxld_sect_get_macho_header_size(FALSE)); + strlcpy(seghdr->segname, seg->segname, sizeof(seghdr->segname)); + seghdr->vmaddr = (uint64_t) seg->link_addr; + seghdr->vmsize = (uint64_t) seg->vmsize; + seghdr->fileoff = (uint64_t) data_offset; + seghdr->filesize = (uint64_t) seg->vmsize; + seghdr->maxprot = seg->maxprot; + seghdr->initprot = seg->initprot; + seghdr->nsects = seg->sects.nitems; + seghdr->flags = 0; + + rval = KERN_SUCCESS; + +finish: + return rval; +} +#endif /* KXLD_USER_OR_LP64 */ + +/******************************************************************************* +*******************************************************************************/ +kern_return_t +kxld_seg_add_section(KXLDSeg *seg, KXLDSect *sect) +{ + kern_return_t rval = KERN_FAILURE; + KXLDSect **sectp = NULL; + u_int i; + + check(seg); + check(sect); + require_action(streq_safe(seg->segname, sect->segname, sizeof(seg->segname)), + finish, rval=KERN_FAILURE); + + /* Add the section into the section index */ + + for (i = 0; i < seg->sects.nitems; ++i) { + sectp = kxld_array_get_item(&seg->sects, i); + if (NULL == *sectp) { + *sectp = sect; + break; + } + } + require_action(i < seg->sects.nitems, finish, rval=KERN_FAILURE); + + rval = KERN_SUCCESS; + +finish: + + return rval; +} + +/******************************************************************************* +*******************************************************************************/ +kern_return_t +kxld_seg_finish_init(KXLDSeg *seg) +{ + kern_return_t rval = KERN_FAILURE; + u_int i = 0; + KXLDSect *sect = NULL; + kxld_addr_t maxaddr = 0; + kxld_size_t maxsize = 0; + + if (seg->sects.nitems) { + for (i = 0; i < seg->sects.nitems; ++i) { + sect = get_sect_by_index(seg, i); + require_action(sect, finish, rval=KERN_FAILURE); + if (sect->base_addr > maxaddr) { + maxaddr = sect->base_addr; + maxsize = sect->size; + } + } + + /* XXX Cross architecture linking will fail if the page size ever differs + * from 4096. (As of this writing, we're fine on ppc, i386, x86_64, and + * arm.) + */ + seg->vmsize = round_page(maxaddr + maxsize - seg->base_addr); + } + + rval = KERN_SUCCESS; + +finish: + return rval; +} + +/******************************************************************************* +*******************************************************************************/ +void +kxld_seg_set_vm_protections(KXLDSeg *seg, boolean_t strict_protections) +{ + if (strict_protections) { + if (streq_safe(seg->segname, SEG_TEXT, sizeof(SEG_TEXT))) { + seg->initprot = TEXT_SEG_PROT; + seg->maxprot = VM_PROT_ALL; + } else { + seg->initprot = DATA_SEG_PROT; + seg->maxprot = DATA_SEG_PROT; + } + } else { + seg->initprot = VM_PROT_ALL; + seg->maxprot = VM_PROT_ALL; + } +} + +/******************************************************************************* +*******************************************************************************/ +void +kxld_seg_relocate(KXLDSeg *seg, kxld_addr_t link_addr) +{ + KXLDSect *sect = NULL; + u_int i = 0; + + seg->link_addr += link_addr; + for (i = 0; i < seg->sects.nitems; ++i) { + sect = get_sect_by_index(seg, i); + kxld_sect_relocate(sect, link_addr); + } +} + diff --git a/libkern/kxld/kxld_seg.h b/libkern/kxld/kxld_seg.h new file mode 100644 index 000000000..e6484bf1a --- /dev/null +++ b/libkern/kxld/kxld_seg.h @@ -0,0 +1,134 @@ +/* + * Copyright (c) 2008 Apple Inc. All rights reserved. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. The rights granted to you under the License + * may not be used to create, or enable the creation or redistribution of, + * unlawful or unlicensed copies of an Apple operating system, or to + * circumvent, violate, or enable the circumvention or violation of, any + * terms of an Apple operating system software license agreement. + * + * Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ + */ +#ifndef _KXLD_SEG_H_ +#define _KXLD_SEG_H_ + +#include +#include +#if KERNEL + #include +#else + #include "kxld_types.h" +#endif + +#include "kxld_array.h" + +struct kxld_sect; +struct segment_command; +struct segment_command_64; +typedef struct kxld_seg KXLDSeg; + +struct kxld_seg { + char segname[16]; + kxld_addr_t base_addr; + kxld_addr_t link_addr; + kxld_size_t vmsize; + kxld_size_t fileoff; + KXLDArray sects; + u_int flags; + vm_prot_t maxprot; + vm_prot_t initprot; +}; + +/******************************************************************************* +* Constructors and Destructors +*******************************************************************************/ + +#if KXLD_USER_OR_ILP32 +kern_return_t kxld_seg_init_from_macho_32(KXLDSeg *seg, struct segment_command *src) + __attribute__((nonnull, visibility("hidden"))); +#endif /* KXLD_USER_OR_ILP32 */ + +#if KXLD_USER_OR_LP64 +kern_return_t kxld_seg_init_from_macho_64(KXLDSeg *seg, struct segment_command_64 *src) + __attribute__((nonnull, visibility("hidden"))); +#endif /* KXLD_USER_OR_LP64 */ + +#if KXLD_USER_OR_OBJECT +kern_return_t kxld_seg_create_seg_from_sections(KXLDArray *segarray, + KXLDArray *sectarray) + __attribute__((nonnull, visibility("hidden"))); + +kern_return_t kxld_seg_finalize_object_segment(KXLDArray *segarray, + KXLDArray *section_order, u_long hdrsize) + __attribute__((nonnull, visibility("hidden"))); +#endif /* KXLD_USER_OR_OBJECT */ + +void kxld_seg_clear(KXLDSeg *seg) + __attribute__((nonnull, visibility("hidden"))); + +void kxld_seg_deinit(KXLDSeg *seg) + __attribute__((nonnull, visibility("hidden"))); + + +/******************************************************************************* +* Accessors +*******************************************************************************/ + +kxld_size_t kxld_seg_get_vmsize(const KXLDSeg *seg) + __attribute__((pure, nonnull, visibility("hidden"))); + +u_long kxld_seg_get_macho_header_size(const KXLDSeg *seg, boolean_t is_32_bit) + __attribute__((pure, nonnull, visibility("hidden"))); + +u_long kxld_seg_get_macho_data_size(const KXLDSeg *seg) + __attribute__((pure, nonnull, visibility("hidden"))); + +kern_return_t +kxld_seg_export_macho_to_file_buffer(const KXLDSeg *seg, u_char *buf, + u_long *header_offset, u_long header_size, + u_long *data_offset, u_long data_size, + boolean_t is_32_bit) + __attribute__((nonnull, visibility("hidden"))); + +kern_return_t +kxld_seg_export_macho_to_vm(const KXLDSeg *seg, u_char *buf, + u_long *header_offset, u_long header_size, + u_long data_size, kxld_addr_t file_link_addr, + boolean_t is_32_bit) + __attribute__((nonnull, visibility("hidden"))); + +/******************************************************************************* +* Modifiers +*******************************************************************************/ + +kern_return_t kxld_seg_add_section(KXLDSeg *seg, struct kxld_sect *sect) + __attribute__((nonnull, visibility("hidden"))); + +/* To be called after all sections are added */ +kern_return_t kxld_seg_finish_init(KXLDSeg *seg) + __attribute__((nonnull, visibility("hidden"))); + +void kxld_seg_set_vm_protections(KXLDSeg *seg, boolean_t strict_protections) + __attribute__((nonnull, visibility("hidden"))); + +void kxld_seg_relocate(KXLDSeg *Seg, kxld_addr_t link_addr) + __attribute__((nonnull, visibility("hidden"))); + +#endif /* _KXLD_SEG_H_ */ + diff --git a/libkern/kxld/kxld_state.c b/libkern/kxld/kxld_state.c new file mode 100644 index 000000000..d3a06da19 --- /dev/null +++ b/libkern/kxld/kxld_state.c @@ -0,0 +1,1072 @@ +/* + * Copyright (c) 2008 Apple Inc. All rights reserved. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. The rights granted to you under the License + * may not be used to create, or enable the creation or redistribution of, + * unlawful or unlicensed copies of an Apple operating system, or to + * circumvent, violate, or enable the circumvention or violation of, any + * terms of an Apple operating system software license agreement. + * + * Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ + */ +#include + +#if !KERNEL + #include +#endif + +#define DEBUG_ASSERT_COMPONENT_NAME_STRING "kxld" +#include + +#include "kxld_array.h" +#include "kxld_dict.h" +#include "kxld_kext.h" +#include "kxld_state.h" +#include "kxld_sym.h" +#include "kxld_symtab.h" +#include "kxld_util.h" +#include "kxld_vtable.h" + +#define LINK_STATE_MAGIC 0xF00DD00D +#define CIGAM_ETATS_KNIL 0x0DD00DF0 + +#define LINK_STATE_MAGIC_64 0xCAFEF00D +#define CIGAM_ETATS_KNIL_64 0x0DF0FECA + +#define LINK_STATE_VERSION 1 + +static kern_return_t init_string_index(KXLDDict *strings, KXLDArray *tmps, + KXLDSymtabIterator *iter, const KXLDArray *vtables, u_int nsymentries, + u_long *strsize); +static kern_return_t add_string_to_index(KXLDDict *strings, const char *str, + KXLDArray *tmps, u_int *tmpi, u_long *stroff); +static kern_return_t create_link_state(u_char **_file, u_long *_filesize, + const KXLDKext *kext, KXLDSymtabIterator *iter, const KXLDArray *vtables, + KXLDDict *strings, u_int nsyms, u_int nsymentries, u_long strsize); +static boolean_t state_is_32_bit(KXLDLinkStateHdr *state); + +#if KXLD_USER_OR_ILP32 +static kern_return_t get_symbols_32(KXLDState *state, KXLDDict *defined_symbols, + KXLDDict *obsolete_symbols); +static kern_return_t copy_symbols_32(u_char *file, u_long *data_offset, + KXLDSymtabIterator *iter, const KXLDDict *strings); +static kern_return_t copy_vtables_32(u_char *file, u_long *header_offset, + u_long *data_offset, const KXLDArray *vtables, const KXLDDict *strings); +#endif /* KXLD_USER_OR_ILP32*/ +#if KXLD_USER_OR_LP64 +static kern_return_t get_symbols_64(KXLDState *state, KXLDDict *defined_symbols, + KXLDDict *obsolete_symbols); +static kern_return_t copy_symbols_64(u_char *file, u_long *data_offset, + KXLDSymtabIterator *iter, const KXLDDict *strings); +static kern_return_t copy_vtables_64(u_char *file, u_long *header_offset, + u_long *data_offset, const KXLDArray *vtables, const KXLDDict *strings); +#endif /* KXLD_USER_OR_ILP64 */ + +#if !KERNEL +static boolean_t swap_link_state(u_char *state); +static void swap_link_state_32(u_char *state); +static void swap_link_state_64(u_char *state); +static boolean_t unswap_link_state(u_char *state); +static void unswap_link_state_32(u_char *state); +static void unswap_link_state_64(u_char *state); +static void swap_state_hdr(KXLDLinkStateHdr *state_hdr); +static void swap_vtable_hdr(KXLDVTableHdr *vtable_hdr); +static void swap_sym_entry_32(KXLDSymEntry32 *entry); +static void swap_sym_entry_64(KXLDSymEntry64 *entry); +#endif + +/******************************************************************************* +*******************************************************************************/ +kern_return_t +kxld_state_init_from_file(KXLDState *state, u_char *file, + KXLDArray *section_order __unused) +{ + kern_return_t rval = KERN_FAILURE; + KXLDLinkStateHdr *hdr = (KXLDLinkStateHdr *) file; +#if KXLD_USER_OR_OBJECT + KXLDSectionName *dstname = NULL; + KXLDSectionName *srcname = NULL; +#endif + KXLDVTableHdr *vhdr = NULL; + KXLDVTable *vtable = NULL; + u_int i = 0; + + check(state); + check(file); + +#if !KERNEL + /* Swap the link state file to host byte order for as long this kxld_state + * object owns the file. + */ + state->swap = swap_link_state(file); +#endif + require_action(hdr->magic == LINK_STATE_MAGIC || + hdr->magic == LINK_STATE_MAGIC_64, + finish, rval=KERN_FAILURE); + + state->file = file; + +#if KXLD_USER_OR_OBJECT + if (section_order && !section_order->nitems && hdr->nsects) { + rval = kxld_array_init(section_order, sizeof(*dstname), hdr->nsects); + require_noerr(rval, finish); + + srcname = (KXLDSectionName *) (file + hdr->sectoff); + for (i = 0; i < hdr->nsects; ++i, ++srcname) { + dstname = kxld_array_get_item(section_order, i); + memcpy(dstname, srcname, sizeof(*srcname)); + } + } +#endif + + rval = kxld_array_init(&state->vtables, sizeof(*vtable), hdr->nvtables); + require_noerr(rval, finish); + + vhdr = (KXLDVTableHdr *) (file + hdr->voff); + for (i = 0; i < hdr->nvtables; ++i, ++vhdr) { + vtable = kxld_array_get_item(&state->vtables, i); + KXLD_3264_FUNC(kxld_is_32_bit(hdr->cputype), rval, + kxld_vtable_init_from_link_state_32, + kxld_vtable_init_from_link_state_64, + vtable, file, vhdr); + require_noerr(rval, finish); + } + + rval = KERN_SUCCESS; + +finish: + return rval; +} + +/******************************************************************************* +*******************************************************************************/ +void +kxld_state_clear(KXLDState *state) +{ + KXLDVTable *vtable = NULL; + u_int i = 0; + + check(state); + +#if !KERNEL + /* We use kxld_state objects to wrap the link state files. Whenever the + * file is wrapped by a kxld_state object, the file is kept in host byte + * order. Once we are done, we must return it to target byte order. + */ + if (state->swap) (void)unswap_link_state(state->file); +#endif + + state->file = NULL; + state->swap = FALSE; + for (i = 0; i < state->vtables.nitems; ++i) { + vtable = kxld_array_get_item(&state->vtables, i); + kxld_vtable_clear(vtable); + } + kxld_array_reset(&state->vtables); +} + +/******************************************************************************* +*******************************************************************************/ +void +kxld_state_deinit(KXLDState *state) +{ + KXLDVTable *vtable = NULL; + u_int i = 0; + + check(state); + +#if !KERNEL + if (state->file && state->swap) (void)unswap_link_state(state->file); +#endif + + for (i = 0; i < state->vtables.maxitems; ++i) { + vtable = kxld_array_get_slot(&state->vtables, i); + kxld_vtable_deinit(vtable); + } + kxld_array_deinit(&state->vtables); + bzero(state, sizeof(*state)); +} + +/******************************************************************************* +*******************************************************************************/ +u_int +kxld_state_get_num_symbols(KXLDState *state) +{ + KXLDLinkStateHdr *hdr = (KXLDLinkStateHdr *) state->file; + + return hdr->nsyms; +} + +/******************************************************************************* +*******************************************************************************/ +kern_return_t +kxld_state_get_symbols(KXLDState *state, KXLDDict *defined_symbols, + KXLDDict *obsolete_symbols) +{ + KXLDLinkStateHdr * hdr = (KXLDLinkStateHdr *) state->file; + kern_return_t rval = KERN_FAILURE; + + check(state); + check(defined_symbols); + check(obsolete_symbols); + + require_action(hdr->magic == LINK_STATE_MAGIC || + hdr->magic == LINK_STATE_MAGIC_64, + finish, rval=KERN_FAILURE); + + KXLD_3264_FUNC(state_is_32_bit(hdr), rval, + get_symbols_32, get_symbols_64, + state, defined_symbols, obsolete_symbols); + require_noerr(rval, finish); + + rval = KERN_SUCCESS; + +finish: + return rval; +} + +#if KXLD_USER_OR_ILP32 +/******************************************************************************* +*******************************************************************************/ +static kern_return_t +get_symbols_32(KXLDState *state, KXLDDict *defined_symbols, + KXLDDict *obsolete_symbols) +{ + kern_return_t rval = KERN_FAILURE; + KXLDLinkStateHdr *hdr = (KXLDLinkStateHdr *) state->file; + KXLDSymEntry32 *entry = NULL; + const char *name = NULL; + u_int i = 0; + + entry = (KXLDSymEntry32 *) (state->file + hdr->symoff); + for (i = 0; i < hdr->nsyms; ++i, ++entry) { + name = (const char *) (state->file + entry->nameoff); + rval = kxld_dict_insert(defined_symbols, name, &entry->addr); + require_noerr(rval, finish); + + if (entry->flags & KXLD_SYM_OBSOLETE) { + rval = kxld_dict_insert(obsolete_symbols, name, &entry->addr); + require_noerr(rval, finish); + } + } + + rval = KERN_SUCCESS; + +finish: + return rval; +} +#endif /* KXLD_USER_OR_ILP32 */ + +#if KXLD_USER_OR_LP64 +/******************************************************************************* +*******************************************************************************/ +static kern_return_t +get_symbols_64(KXLDState *state, KXLDDict *defined_symbols, + KXLDDict *obsolete_symbols) +{ + kern_return_t rval = KERN_FAILURE; + KXLDLinkStateHdr *hdr = (KXLDLinkStateHdr *) state->file; + KXLDSymEntry64 *entry = NULL; + const char *name = NULL; + u_int i = 0; + + entry = (KXLDSymEntry64 *) (state->file + hdr->symoff); + for (i = 0; i < hdr->nsyms; ++i, ++entry) { + name = (const char *) (state->file + entry->nameoff); + rval = kxld_dict_insert(defined_symbols, name, &entry->addr); + require_noerr(rval, finish); + + if (entry->flags & KXLD_SYM_OBSOLETE) { + rval = kxld_dict_insert(obsolete_symbols, name, &entry->addr); + require_noerr(rval, finish); + } + } + + rval = KERN_SUCCESS; + +finish: + return rval; +} +#endif /* KXLD_USER_OR_LP64 */ + +/******************************************************************************* +*******************************************************************************/ +u_int +kxld_state_get_num_vtables(KXLDState *state) +{ + return state->vtables.nitems; +} + +/******************************************************************************* +*******************************************************************************/ +kern_return_t +kxld_state_get_vtables(KXLDState *state, KXLDDict *patched_vtables) +{ + kern_return_t rval = KERN_FAILURE; + KXLDVTable *vtable = NULL; + u_int i = 0; + + check(state); + check(patched_vtables); + + for (i = 0; i < state->vtables.nitems; ++i) { + vtable = kxld_array_get_item(&state->vtables, i); + rval = kxld_dict_insert(patched_vtables, vtable->name, vtable); + require_noerr(rval, finish); + } + + rval = KERN_SUCCESS; + +finish: + return rval; +} + +/******************************************************************************* +*******************************************************************************/ +void +kxld_state_get_cputype(const KXLDState *state, cpu_type_t *cputype, + cpu_subtype_t *cpusubtype) +{ + KXLDLinkStateHdr *hdr = (KXLDLinkStateHdr *) state->file; + + check(state); + check(cputype); + check(cpusubtype); + + *cputype = hdr->cputype; + *cpusubtype = hdr->cpusubtype; +} + +/******************************************************************************* +*******************************************************************************/ +kern_return_t +kxld_state_export_kext_to_file(KXLDKext *kext, u_char **file, u_long *filesize, + KXLDDict *strings, KXLDArray *tmps) +{ + kern_return_t rval = KERN_FAILURE; + KXLDSymtabIterator iter; + const KXLDSymtab *symtab = NULL; + const KXLDArray *vtables = NULL; + const KXLDVTable *vtable = NULL; + u_int nsyms = 0; + u_int nsymentries = 0; + u_int i = 0; + u_long strsize = 0; + + check(kext); + check(file); + check(tmps); + + bzero(&iter, sizeof(iter)); + + /* Get the vtables and symbol tables from the kext */ + + kxld_kext_get_vtables(kext, &vtables); + symtab = kxld_kext_get_symtab(kext); + require_action(symtab, finish, rval=KERN_FAILURE); + + /* Count the number of symentries we'll need in the linkstate */ + + kxld_symtab_iterator_init(&iter, symtab, kxld_sym_is_exported, FALSE); + + nsyms = kxld_symtab_iterator_get_num_remaining(&iter); + nsymentries = nsyms; + for (i = 0; i < vtables->nitems; ++i) { + vtable = kxld_array_get_item(vtables, i); + nsymentries += vtable->entries.nitems; + } + + /* Initialize the string index */ + + rval = init_string_index(strings, tmps, &iter, vtables, nsymentries, + &strsize); + require_noerr(rval, finish); + + /* Create the linkstate file */ + + rval = create_link_state(file, filesize, kext, &iter, vtables, + strings, nsyms, nsymentries, strsize); + require_noerr(rval, finish); + + /* Swap if necessary */ + +#if !KERNEL + if (kxld_kext_target_needs_swap(kext)) unswap_link_state(*file); +#endif /* !KERNEL */ + + rval = KERN_SUCCESS; + +finish: + return rval; +} + +/******************************************************************************* +*******************************************************************************/ +static kern_return_t +init_string_index(KXLDDict *strings, KXLDArray *tmps, KXLDSymtabIterator *iter, + const KXLDArray *vtables, u_int nsymentries, u_long *_strsize) +{ + kern_return_t rval = KERN_SUCCESS; + const KXLDSym *sym = NULL; + const KXLDVTable *vtable = NULL; + const KXLDVTableEntry *ventry = NULL; + u_long strsize = 0; + u_int tmpi = 0; + u_int i = 0; + u_int j = 0; + + check(strings); + check(tmps); + check(iter); + check(vtables); + check(_strsize); + + *_strsize = 0; + + /* Initialize the string dictionary and string offset array */ + + rval = kxld_dict_init(strings, kxld_dict_string_hash, kxld_dict_string_cmp, + nsymentries); + require_noerr(rval, finish); + + rval = kxld_array_init(tmps, sizeof(u_long), nsymentries); + require_noerr(rval, finish); + + /* Add all of the strings from the symbol table to the dictionary */ + + kxld_symtab_iterator_reset(iter); + while ((sym = kxld_symtab_iterator_get_next(iter))) { + rval = add_string_to_index(strings, sym->name, tmps, &tmpi, &strsize); + require_noerr(rval, finish); + } + + /* Add all of the strings from the vtables entries to the dictionary */ + + for (i = 0; i < vtables->nitems; ++i) { + vtable = kxld_array_get_item(vtables, i); + rval = add_string_to_index(strings, vtable->name, tmps, &tmpi, &strsize); + require_noerr(rval, finish); + + for (j = 0; j < vtable->entries.nitems; ++j) { + ventry = kxld_array_get_item(&vtable->entries, j); + if (ventry->patched.name) { + rval = add_string_to_index(strings, ventry->patched.name, tmps, + &tmpi, &strsize); + require_noerr(rval, finish); + } + } + } + + *_strsize = strsize; + rval = KERN_SUCCESS; + +finish: + return rval; +} + +/******************************************************************************* +*******************************************************************************/ +static kern_return_t +add_string_to_index(KXLDDict *strings, const char *str, KXLDArray *tmps, + u_int *tmpi, u_long *stroff) +{ + kern_return_t rval = KERN_FAILURE; + u_long *tmpp = NULL; + + if (!kxld_dict_find(strings, str)) { + tmpp = kxld_array_get_item(tmps, (*tmpi)++); + *tmpp = *stroff; + + rval = kxld_dict_insert(strings, str, tmpp); + require_noerr(rval, finish); + + *stroff += strlen(str) + 1; + } + + rval = KERN_SUCCESS; + +finish: + return rval; +} + +/******************************************************************************* +*******************************************************************************/ +static boolean_t +state_is_32_bit(KXLDLinkStateHdr *state) +{ + return kxld_is_32_bit(state->cputype); +} + +/******************************************************************************* +*******************************************************************************/ +static kern_return_t +create_link_state(u_char **_file, u_long *_filesize, const KXLDKext *kext, + KXLDSymtabIterator *iter, const KXLDArray *vtables, KXLDDict *strings, + u_int nsyms, u_int nsymentries, u_long strsize) +{ + kern_return_t rval = KERN_SUCCESS; + u_char *file = NULL; + KXLDLinkStateHdr *hdr = NULL; + KXLDDictIterator striter; +#if KXLD_USER_OR_OBJECT + KXLDSectionName *dstsectname = NULL; + KXLDSectionName *srcsectname = NULL; + const KXLDArray *section_order = NULL; + u_int i = 0; +#endif + const char *name = NULL; + char *dstname = NULL; + u_long *stridx = 0; + u_long hsize = 0; + u_long dsize = 0; + u_long filesize = 0; + u_long hoff = 0; + u_long doff = 0; + u_long stroff = 0; + + check(_file); + check(iter); + check(vtables); + check(strings); + + *_file = NULL; + *_filesize = 0; + +#if KXLD_USER_OR_OBJECT + section_order = kxld_kext_get_section_order(kext); +#endif + + /* Calculate header and data size */ + + hsize = sizeof(KXLDLinkStateHdr); + hsize += vtables->nitems * sizeof(KXLDVTableHdr); +#if KXLD_USER_OR_OBJECT + if (section_order) { + hsize += section_order->nitems * sizeof(KXLDSectionName); + } +#endif + + if (kxld_kext_is_32_bit(kext)) { + dsize = nsymentries * sizeof(KXLDSymEntry32); + } else { + dsize = nsymentries * sizeof(KXLDSymEntry64); + } + + filesize = hsize + dsize + strsize; + + hoff = 0; + doff = hsize; + stroff = hsize + dsize; + + /* Allocate the link state */ + + file = kxld_alloc_pageable(filesize); + require_action(file, finish, rval=KERN_RESOURCE_SHORTAGE); + + /* Initialize link state header */ + + hdr = (KXLDLinkStateHdr *) file; + hoff += sizeof(*hdr); + + if (state_is_32_bit(hdr)) { + hdr->magic = LINK_STATE_MAGIC; + } else { + hdr->magic = LINK_STATE_MAGIC_64; + } + hdr->version = LINK_STATE_VERSION; + kxld_kext_get_cputype(kext, &hdr->cputype, &hdr->cpusubtype); + hdr->nsects = 0; + hdr->nvtables = vtables->nitems; + hdr->nsyms = nsyms; + +#if KXLD_USER_OR_OBJECT + if (section_order) { + hdr->nsects = section_order->nitems; + hdr->sectoff = (uint32_t) hoff; + + dstsectname = (KXLDSectionName *) (file + hoff); + hoff += section_order->nitems * sizeof(*dstsectname); + + for (i = 0; i < section_order->nitems; ++i, ++dstsectname) { + srcsectname = kxld_array_get_item(section_order, i); + memcpy(dstsectname, srcsectname, sizeof(*srcsectname)); + } + } +#endif + + hdr->voff = (uint32_t) hoff; + hdr->symoff = (uint32_t) doff; + + /* Copy strings */ + + kxld_dict_iterator_init(&striter, strings); + kxld_dict_iterator_get_next(&striter, (const void **) &name, (void **) &stridx); + while (name) { + *stridx += stroff; + dstname = (char *) (file + *stridx); + strlcpy(dstname, name, filesize - *stridx); + kxld_dict_iterator_get_next(&striter, (const void **) &name, (void **) &stridx); + } + + /* Copy symbols */ + + KXLD_3264_FUNC(state_is_32_bit(hdr), rval, + copy_symbols_32, copy_symbols_64, + file, &doff, iter, strings); + require_noerr(rval, finish); + + /* Copy vtables */ + + KXLD_3264_FUNC(state_is_32_bit(hdr), rval, + copy_vtables_32, copy_vtables_64, + file, &hoff, &doff, vtables, strings); + require_noerr(rval, finish); + + *_file = file; + *_filesize = filesize; + file = NULL; + rval = KERN_SUCCESS; + +finish: + + if (file) { + kxld_page_free(file, filesize); + file = NULL; + } + + return rval; +} + +#if KXLD_USER_OR_ILP32 +/******************************************************************************* +*******************************************************************************/ +static kern_return_t +copy_symbols_32(u_char *file, u_long *data_offset, KXLDSymtabIterator *iter, + const KXLDDict *strings) +{ + kern_return_t rval = KERN_FAILURE; + KXLDSymEntry32 *symentry = NULL; + const KXLDSym *sym = NULL; + u_long *stridx = 0; + + kxld_symtab_iterator_reset(iter); + while ((sym = kxld_symtab_iterator_get_next(iter))) { + symentry = (KXLDSymEntry32 *) (file + *data_offset); + stridx = kxld_dict_find(strings, sym->name); + require_action(stridx, finish, rval=KERN_FAILURE); + + /* Initialize the symentry */ + + symentry->nameoff = (uint32_t) *stridx; + if (sym->predicates.is_thumb) { + symentry->addr = (uint32_t) sym->link_addr | 1; + } else { + symentry->addr = (uint32_t) sym->link_addr; + } + symentry->flags = 0; + + /* Set any flags */ + + symentry->flags |= (kxld_sym_is_obsolete(sym)) ? KXLD_SYM_OBSOLETE : 0; + + *data_offset += sizeof(*symentry); + } + + rval = KERN_SUCCESS; + +finish: + return rval; +} +#endif /* KXLD_USER_OR_ILP32 */ + +#if KXLD_USER_OR_LP64 +/******************************************************************************* +*******************************************************************************/ +static kern_return_t +copy_symbols_64(u_char *file, u_long *data_offset, KXLDSymtabIterator *iter, + const KXLDDict *strings) +{ + kern_return_t rval = KERN_FAILURE; + KXLDSymEntry64 *symentry = NULL; + const KXLDSym *sym = NULL; + u_long *stridx = 0; + + kxld_symtab_iterator_reset(iter); + while ((sym = kxld_symtab_iterator_get_next(iter))) { + symentry = (KXLDSymEntry64 *) (file + *data_offset); + stridx = kxld_dict_find(strings, sym->name); + require_action(stridx, finish, rval=KERN_FAILURE); + + /* Initialize the symentry */ + + symentry->nameoff = (uint32_t) *stridx; + symentry->addr = (uint64_t) sym->link_addr; + symentry->flags = 0; + + /* Set any flags */ + + symentry->flags |= (kxld_sym_is_obsolete(sym)) ? KXLD_SYM_OBSOLETE : 0; + + *data_offset += sizeof(*symentry); + } + + rval = KERN_SUCCESS; + +finish: + return rval; +} +#endif /* KXLD_USER_OR_LP64 */ + +#if KXLD_USER_OR_ILP32 +/******************************************************************************* +*******************************************************************************/ +static kern_return_t +copy_vtables_32(u_char *file, u_long *header_offset, u_long *data_offset, + const KXLDArray *vtables, const KXLDDict *strings) +{ + kern_return_t rval = KERN_FAILURE; + KXLDVTable *vtable = NULL; + KXLDVTableHdr *vhdr = NULL; + KXLDVTableEntry *ventry = NULL; + KXLDSymEntry32 *symentry = NULL; + u_long *stridx = 0; + u_int i = 0; + u_int j = 0; + + for (i = 0; i < vtables->nitems; ++i) { + vtable = kxld_array_get_item(vtables, i); + stridx = kxld_dict_find(strings, vtable->name); + require_action(stridx, finish, rval=KERN_FAILURE); + + vhdr = (KXLDVTableHdr *) (file + *header_offset); + vhdr->nameoff = (uint32_t) *stridx; + vhdr->nentries = vtable->entries.nitems; + vhdr->vtableoff = (uint32_t) (*data_offset); + + *header_offset += sizeof(*vhdr); + + for(j = 0; j < vtable->entries.nitems; ++j) { + + ventry = kxld_array_get_item(&vtable->entries, j); + symentry = (KXLDSymEntry32 *) (file + *data_offset); + + if (ventry->patched.name) { + stridx = kxld_dict_find(strings, ventry->patched.name); + require_action(stridx, finish, rval=KERN_FAILURE); + + symentry->nameoff = (uint32_t) *stridx; + symentry->addr = (uint32_t) ventry->patched.addr; + } else { + symentry->nameoff = 0; + symentry->addr = 0; + } + + *data_offset += sizeof(*symentry); + } + } + + rval = KERN_SUCCESS; + +finish: + return rval; +} +#endif /* KXLD_USER_OR_ILP32 */ + +#if KXLD_USER_OR_LP64 +/******************************************************************************* +*******************************************************************************/ +static kern_return_t +copy_vtables_64(u_char *file, u_long *header_offset, u_long *data_offset, + const KXLDArray *vtables, const KXLDDict *strings) +{ + kern_return_t rval = KERN_FAILURE; + KXLDVTable *vtable = NULL; + KXLDVTableHdr *vhdr = NULL; + KXLDVTableEntry *ventry = NULL; + KXLDSymEntry64 *symentry = NULL; + u_long *stridx = 0; + u_int i = 0; + u_int j = 0; + + for (i = 0; i < vtables->nitems; ++i) { + vtable = kxld_array_get_item(vtables, i); + stridx = kxld_dict_find(strings, vtable->name); + require_action(stridx, finish, rval=KERN_FAILURE); + + vhdr = (KXLDVTableHdr *) (file + *header_offset); + vhdr->nameoff = (uint32_t) *stridx; + vhdr->nentries = vtable->entries.nitems; + vhdr->vtableoff = (uint32_t) (*data_offset); + + *header_offset += sizeof(*vhdr); + + for(j = 0; j < vtable->entries.nitems; ++j) { + + ventry = kxld_array_get_item(&vtable->entries, j); + symentry = (KXLDSymEntry64 *) (file + *data_offset); + + if (ventry->patched.name) { + stridx = kxld_dict_find(strings, ventry->patched.name); + require_action(stridx, finish, rval=KERN_FAILURE); + + symentry->nameoff = (uint32_t) *stridx; + symentry->addr = (uint64_t) ventry->patched.addr; + } else { + symentry->nameoff = 0; + symentry->addr = 0; + } + + *data_offset += sizeof(*symentry); + } + } + + rval = KERN_SUCCESS; + +finish: + return rval; +} +#endif /* KXLD_USER_OR_LP64 */ + +#if !KERNEL +/******************************************************************************* +*******************************************************************************/ +static boolean_t +swap_link_state(u_char *state) +{ + KXLDLinkStateHdr *state_hdr = (KXLDLinkStateHdr *) state; + + if (state_hdr->magic == CIGAM_ETATS_KNIL) { + swap_link_state_32(state); + return TRUE; + } else if (state_hdr->magic == CIGAM_ETATS_KNIL_64) { + swap_link_state_64(state); + return TRUE; + } + + return FALSE; +} + +/******************************************************************************* +*******************************************************************************/ +static void +swap_link_state_32(u_char *state) +{ + KXLDLinkStateHdr *state_hdr = NULL; + KXLDVTableHdr *vtable_hdr = NULL; + KXLDSymEntry32 *entry = NULL; + u_int i = 0; + u_int j = 0; + + state_hdr = (KXLDLinkStateHdr *) state; + + if (state_hdr->magic != CIGAM_ETATS_KNIL) return; + + /* Swap the header */ + swap_state_hdr(state_hdr); + + /* Swap the symbols */ + entry = (KXLDSymEntry32 *) (state + state_hdr->symoff); + for (i = 0; i < state_hdr->nsyms; ++i, ++entry) { + swap_sym_entry_32(entry); + } + + /* Swap the vtable headers and entries */ + vtable_hdr = (KXLDVTableHdr *) (state + state_hdr->voff); + for (i = 0; i < state_hdr->nvtables; ++i, ++vtable_hdr) { + swap_vtable_hdr(vtable_hdr); + + entry = (KXLDSymEntry32 *) (state + vtable_hdr->vtableoff); + for (j = 0; j < vtable_hdr->nentries; ++j, ++entry) { + swap_sym_entry_32(entry); + } + } +} + +/******************************************************************************* +*******************************************************************************/ +static void +swap_link_state_64(u_char *state) +{ + KXLDLinkStateHdr *state_hdr = NULL; + KXLDVTableHdr *vtable_hdr = NULL; + KXLDSymEntry64 *entry = NULL; + u_int i = 0; + u_int j = 0; + + state_hdr = (KXLDLinkStateHdr *) state; + + if (state_hdr->magic != CIGAM_ETATS_KNIL_64) return; + + /* Swap the header */ + swap_state_hdr(state_hdr); + + /* Swap the symbols */ + entry = (KXLDSymEntry64 *) (state + state_hdr->symoff); + for (i = 0; i < state_hdr->nsyms; ++i, ++entry) { + swap_sym_entry_64(entry); + } + + /* Swap the vtable headers and entries */ + vtable_hdr = (KXLDVTableHdr *) (state + state_hdr->voff); + for (i = 0; i < state_hdr->nvtables; ++i, ++vtable_hdr) { + swap_vtable_hdr(vtable_hdr); + + entry = (KXLDSymEntry64 *) (state + vtable_hdr->vtableoff); + for (j = 0; j < vtable_hdr->nentries; ++j, ++entry) { + swap_sym_entry_64(entry); + } + } +} + +/******************************************************************************* +*******************************************************************************/ +static boolean_t +unswap_link_state(u_char *state) +{ + KXLDLinkStateHdr *state_hdr = (KXLDLinkStateHdr *) state; + + if (state_hdr->magic == LINK_STATE_MAGIC) { + unswap_link_state_32(state); + return TRUE; + } else if (state_hdr->magic == LINK_STATE_MAGIC_64) { + unswap_link_state_64(state); + return TRUE; + } + + return FALSE; +} + +/******************************************************************************* +*******************************************************************************/ +static void +unswap_link_state_32(u_char *state) +{ + KXLDLinkStateHdr *state_hdr = NULL; + KXLDVTableHdr *vtable_hdr = NULL; + KXLDSymEntry32 *entry = NULL; + u_int i = 0; + u_int j = 0; + + state_hdr = (KXLDLinkStateHdr *) state; + + if (state_hdr->magic != LINK_STATE_MAGIC) return; + + /* Unswap the vtables and their headers */ + vtable_hdr = (KXLDVTableHdr *) (state + state_hdr->voff); + for (i = 0; i < state_hdr->nvtables; ++i, ++vtable_hdr) { + entry = (KXLDSymEntry32 *) (state + vtable_hdr->vtableoff); + for (j = 0; j < vtable_hdr->nentries; ++j, ++entry) { + swap_sym_entry_32(entry); + } + + swap_vtable_hdr(vtable_hdr); + } + + /* Unswap the symbols themselves */ + entry = (KXLDSymEntry32 *) (state + state_hdr->symoff); + for (i = 0; i < state_hdr->nsyms; ++i, ++entry) { + swap_sym_entry_32(entry); + } + + /* Unswap the header */ + swap_state_hdr(state_hdr); +} + +/******************************************************************************* +*******************************************************************************/ +static void +unswap_link_state_64(u_char *state) +{ + KXLDLinkStateHdr *state_hdr = NULL; + KXLDVTableHdr *vtable_hdr = NULL; + KXLDSymEntry64 *entry = NULL; + u_int i = 0; + u_int j = 0; + + state_hdr = (KXLDLinkStateHdr *) state; + + if (state_hdr->magic != LINK_STATE_MAGIC_64) return; + + /* Unswap the vtables and their headers */ + vtable_hdr = (KXLDVTableHdr *) (state + state_hdr->voff); + for (i = 0; i < state_hdr->nvtables; ++i, ++vtable_hdr) { + entry = (KXLDSymEntry64 *) (state + vtable_hdr->vtableoff); + for (j = 0; j < vtable_hdr->nentries; ++j, ++entry) { + swap_sym_entry_64(entry); + } + + swap_vtable_hdr(vtable_hdr); + } + + /* Unswap the symbols themselves */ + entry = (KXLDSymEntry64 *) (state + state_hdr->symoff); + for (i = 0; i < state_hdr->nsyms; ++i, ++entry) { + swap_sym_entry_64(entry); + } + + /* Unswap the header */ + swap_state_hdr(state_hdr); +} + +/******************************************************************************* +*******************************************************************************/ +static void +swap_state_hdr(KXLDLinkStateHdr *state_hdr) +{ + state_hdr->magic = OSSwapInt32(state_hdr->magic); + state_hdr->version = OSSwapInt32(state_hdr->version); + state_hdr->cputype = OSSwapInt32(state_hdr->cputype); + state_hdr->cpusubtype = OSSwapInt32(state_hdr->cpusubtype); + state_hdr->nsects = OSSwapInt32(state_hdr->nsects); + state_hdr->sectoff = OSSwapInt32(state_hdr->sectoff); + state_hdr->nvtables = OSSwapInt32(state_hdr->nvtables); + state_hdr->voff = OSSwapInt32(state_hdr->voff); + state_hdr->nsyms = OSSwapInt32(state_hdr->nsyms); + state_hdr->symoff = OSSwapInt32(state_hdr->symoff); +} + +/******************************************************************************* +*******************************************************************************/ +static void +swap_vtable_hdr(KXLDVTableHdr *vtable_hdr) +{ + vtable_hdr->nameoff = OSSwapInt32(vtable_hdr->nameoff); + vtable_hdr->vtableoff = OSSwapInt32(vtable_hdr->vtableoff); + vtable_hdr->nentries = OSSwapInt32(vtable_hdr->nentries); +} + +/******************************************************************************* +*******************************************************************************/ +static void +swap_sym_entry_32(KXLDSymEntry32 *entry) +{ + entry->nameoff = OSSwapInt32(entry->nameoff); + entry->addr = OSSwapInt32(entry->addr); +} + +/******************************************************************************* +*******************************************************************************/ +static void +swap_sym_entry_64(KXLDSymEntry64 *entry) +{ + entry->nameoff = OSSwapInt32(entry->nameoff); + entry->addr = OSSwapInt64(entry->addr); +} +#endif /* !KERNEL */ + diff --git a/libkern/kxld/kxld_state.h b/libkern/kxld/kxld_state.h new file mode 100644 index 000000000..22878159c --- /dev/null +++ b/libkern/kxld/kxld_state.h @@ -0,0 +1,155 @@ +/* + * Copyright (c) 2008 Apple Inc. All rights reserved. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. The rights granted to you under the License + * may not be used to create, or enable the creation or redistribution of, + * unlawful or unlicensed copies of an Apple operating system, or to + * circumvent, violate, or enable the circumvention or violation of, any + * terms of an Apple operating system software license agreement. + * + * Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ + */ +#ifndef _KXLD_STATE_H_ +#define _KXLD_STATE_H_ + +#include +#if KERNEL + #include +#else + #include "kxld_types.h" +#endif + +#include "kxld_array.h" +#include "kxld_util.h" + +struct kxld_dict; +struct kxld_kext; +struct kxld_link_state_hdr; +typedef struct kxld_state KXLDState; +typedef struct kxld_link_state_hdr KXLDLinkStateHdr; +typedef struct kxld_vtable_hdr KXLDVTableHdr; +typedef struct kxld_sym_entry_32 KXLDSymEntry32; +typedef struct kxld_sym_entry_64 KXLDSymEntry64; + +struct kxld_state { + u_char *file; + KXLDArray vtables; + boolean_t swap; +}; + +/* + * The format of the link state object is as follows: + + * Field *** Type * + ************************************************** + * Link state header *** KXLDLinkStateHdr * + ************************************************** + * Section order entries *** KXLDSectionName * + ************************************************** + * Vtable headers *** KXLDVTableHdr * + ************************************************** + * VTables *** KXLDSymEntry[32|64] * + ************************************************** + * Exported symbols *** KXLDSymEntry[32|64] * + ************************************************** + * String table *** char[] * + ************************************************** + + */ + +struct kxld_link_state_hdr { + uint32_t magic; + uint32_t version; + cpu_type_t cputype; + cpu_subtype_t cpusubtype; + uint32_t nsects; + uint32_t sectoff; + uint32_t nvtables; + uint32_t voff; + uint32_t nsyms; + uint32_t symoff; +}; + +struct kxld_vtable_hdr { + uint32_t nameoff; + uint32_t vtableoff; + uint32_t nentries; +}; + +struct kxld_sym_entry_32 { + uint32_t addr; + uint32_t nameoff; + uint32_t flags; +}; + +struct kxld_sym_entry_64 { + uint64_t addr; + uint32_t nameoff; + uint32_t flags; +} __attribute__((aligned(16))); + +#define KXLD_SYM_OBSOLETE 0x1 + +/******************************************************************************* +* Constructors and destructors +*******************************************************************************/ + +kern_return_t kxld_state_init_from_file(KXLDState *state, u_char *file, + KXLDArray *section_order) + __attribute__((nonnull(1,2), visibility("hidden"))); + +void kxld_state_clear(KXLDState *state) + __attribute__((nonnull, visibility("hidden"))); + +void kxld_state_deinit(KXLDState *state) + __attribute__((nonnull, visibility("hidden"))); + +/******************************************************************************* +* Accessors +*******************************************************************************/ + +u_int kxld_state_get_num_symbols(KXLDState *state) + __attribute__((pure, nonnull, visibility("hidden"))); + +kern_return_t kxld_state_get_symbols(KXLDState *state, + struct kxld_dict *defined_symbols, + struct kxld_dict *obsolete_symbols) + __attribute__((nonnull, visibility("hidden"))); + +u_int kxld_state_get_num_vtables(KXLDState *state) + __attribute__((pure, nonnull, visibility("hidden"))); + +kern_return_t kxld_state_get_vtables(KXLDState *state, + struct kxld_dict *patched_vtables) + __attribute__((nonnull, visibility("hidden"))); + +void kxld_state_get_cputype(const KXLDState *state, + cpu_type_t *cputype, cpu_subtype_t *cpusubtype) + __attribute__((nonnull, visibility("hidden"))); + +/******************************************************************************* +* Exporters +*******************************************************************************/ + +kern_return_t kxld_state_export_kext_to_file(struct kxld_kext *kext, u_char **file, + u_long *filesize, struct kxld_dict *tmpdict, KXLDArray *tmps) + __attribute__((nonnull, visibility("hidden"))); + +#endif /* _KXLD_STATE_H_ */ + diff --git a/libkern/kxld/kxld_stubs.c b/libkern/kxld/kxld_stubs.c new file mode 100644 index 000000000..511e82a10 --- /dev/null +++ b/libkern/kxld/kxld_stubs.c @@ -0,0 +1,83 @@ +/* + * Copyright (c) 2008 Apple Inc. All rights reserved. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. The rights granted to you under the License + * may not be used to create, or enable the creation or redistribution of, + * unlawful or unlicensed copies of an Apple operating system, or to + * circumvent, violate, or enable the circumvention or violation of, any + * terms of an Apple operating system software license agreement. + * + * Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ + */ + +/* + * These kxld stubs panic if the kernel is built without kxld support but + * something tries to use it anyway. + */ + +#if !CONFIG_KXLD + +#include +#include + +#include + +kern_return_t +kxld_create_context(KXLDContext **_context __unused, + KXLDAllocateCallback allocate_callback __unused, + KXLDLoggingCallback logging_callback __unused, + KXLDFlags flags __unused, cpu_type_t cputype __unused, + cpu_subtype_t cpusubtype __unused) +{ + return KERN_SUCCESS; +} + +void +kxld_destroy_context(KXLDContext *context __unused) +{ + /* Do nothing */ +} + +kern_return_t +kxld_link_file( + KXLDContext *context __unused, + u_char *file __unused, + u_long size __unused, + const char *name, + void *callback_data __unused, + u_char **deps __unused, + u_int ndeps __unused, + u_char **_linked_object __unused, + kxld_addr_t *kmod_info_kern __unused, + u_char **_link_state __unused, + u_long *_link_state_size __unused, + u_char **_symbol_file __unused, + u_long *_symbol_file_size __unused) +{ + panic("%s (%s) called in kernel without kxld support", __PRETTY_FUNCTION__, name); + return KERN_SUCCESS; +} + +boolean_t +kxld_validate_copyright_string(const char *str __unused) +{ + return TRUE; +} + +#endif diff --git a/libkern/kxld/kxld_sym.c b/libkern/kxld/kxld_sym.c new file mode 100644 index 000000000..52c5e1d90 --- /dev/null +++ b/libkern/kxld/kxld_sym.c @@ -0,0 +1,1019 @@ +/* + * Copyright (c) 2008 Apple Inc. All rights reserved. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. The rights granted to you under the License + * may not be used to create, or enable the creation or redistribution of, + * unlawful or unlicensed copies of an Apple operating system, or to + * circumvent, violate, or enable the circumvention or violation of, any + * terms of an Apple operating system software license agreement. + * + * Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ + */ +#include +#include +#include +#include +#include + +#define DEBUG_ASSERT_COMPONENT_NAME_STRING "kxld" +#include + +#include "kxld_sect.h" +#include "kxld_sym.h" +#include "kxld_util.h" + +#define CXX_PREFIX "__Z" +#define VTABLE_PREFIX CXX_PREFIX "TV" +#define OSOBJ_PREFIX CXX_PREFIX "N" +#define RESERVED_TOKEN "_RESERVED" +#define METACLASS_TOKEN "10gMetaClassE" +#define SUPER_METACLASS_POINTER_TOKEN "10superClassE" +#define METACLASS_VTABLE_PREFIX VTABLE_PREFIX "N" +#define METACLASS_VTABLE_SUFFIX "9MetaClassE" +#define CXX_PURE_VIRTUAL "___cxa_pure_virtual" +#define FINAL_CLASS_TOKEN "14__OSFinalClassEv" + +/******************************************************************************* +* Prototypes +*******************************************************************************/ + +static kern_return_t init_predicates(KXLDSym *sym, u_char n_type, u_short n_desc) + __attribute__((nonnull)); +static void init_sym_sectnum(KXLDSym *sym, u_int n_sect) + __attribute__((nonnull)); +static kern_return_t extract_inner_string(const char *str, const char *prefix, + const char *suffix, char *buf, u_long len); + +#if KXLD_USER_OR_ILP32 +/******************************************************************************* +*******************************************************************************/ +kern_return_t +kxld_sym_init_from_macho32(KXLDSym *sym, char *strtab, const struct nlist *src) +{ + kern_return_t rval = KERN_FAILURE; + + check(sym); + check(strtab); + check(src); + + bzero(sym, sizeof(*sym)); + sym->name = strtab + src->n_un.n_strx; + sym->type = src->n_type; + sym->desc = src->n_desc; + sym->base_addr = src->n_value; + sym->link_addr = sym->base_addr; + + rval = init_predicates(sym, src->n_type, src->n_desc); + require_noerr(rval, finish); + + (void) init_sym_sectnum(sym, src->n_sect); + + if (kxld_sym_is_indirect(sym)) { + sym->alias = strtab + src->n_value; + } + + rval = KERN_SUCCESS; + +finish: + return rval; +} +#endif /* KXLD_USER_OR_ILP32 */ + +#if KXLD_USER_OR_LP64 +/******************************************************************************* +*******************************************************************************/ +kern_return_t +kxld_sym_init_from_macho64(KXLDSym *sym, char *strtab, const struct nlist_64 *src) +{ + kern_return_t rval = KERN_FAILURE; + + check(sym); + check(strtab); + check(src); + + bzero(sym, sizeof(*sym)); + sym->name = strtab + src->n_un.n_strx; + sym->type = src->n_type; + sym->desc = src->n_desc; + sym->base_addr = src->n_value; + sym->link_addr = sym->base_addr; + + rval = init_predicates(sym, src->n_type, src->n_desc); + require_noerr(rval, finish); + + (void) init_sym_sectnum(sym, src->n_sect); + + if (kxld_sym_is_indirect(sym)) { + sym->alias = strtab + src->n_value; + } + + rval = KERN_SUCCESS; + +finish: + return rval; +} +#endif /* KXLD_USER_OR_LP64 */ + +/******************************************************************************* +*******************************************************************************/ +void +kxld_sym_init_absolute(KXLDSym *sym, char *name, kxld_addr_t link_addr) +{ + check(sym); + check(name); + + bzero(sym, sizeof(*sym)); + + sym->name = name; + sym->link_addr = link_addr; + sym->type = N_ABS | N_EXT; + sym->sectnum = NO_SECT; + + init_predicates(sym, N_ABS | N_EXT, 0); + sym->predicates.is_resolved = TRUE; +} + +/******************************************************************************* +*******************************************************************************/ +static kern_return_t +init_predicates(KXLDSym *sym, u_char n_type, u_short n_desc) +{ + kern_return_t rval = KERN_FAILURE; + + check(sym); + + /* The type field is interpreted differently for normal symbols and stabs */ + if (n_type & N_STAB) { + sym->predicates.is_stab = 1; + + switch (n_type) { + /* Labeled as NO_SECT in stab.h */ + case N_GSYM: + case N_FNAME: + case N_RSYM: + case N_SSYM: + case N_LSYM: + case N_BINCL: + case N_PARAMS: + case N_VERSION: + case N_OLEVEL: + case N_PSYM: + case N_EINCL: + case N_LBRAC: + case N_EXCL: + case N_RBRAC: + case N_BCOMM: + case N_LENG: + case N_OPT: + case N_OSO: + sym->predicates.is_absolute = 1; + break; + /* Labeled as n_sect in stab.h */ + case N_FUN: + case N_STSYM: + case N_LCSYM: + case N_BNSYM: + case N_SLINE: + case N_ENSYM: + case N_SO: + case N_SOL: + case N_ENTRY: + case N_ECOMM: + case N_ECOML: + sym->predicates.is_section = 1; + break; + default: + rval = KERN_FAILURE; + kxld_log(kKxldLogLinking, kKxldLogErr, kKxldLogMalformedMachO + "Invalid N_STAB symbol type: %u.", n_type); + goto finish; + } + + /* Don't care about the C++ predicates for stabs */ + + } else { + u_char type = n_type & N_TYPE; + + /* Set the type-independent fields */ + if ((n_type & N_EXT) && !(n_type & N_PEXT)) { + sym->predicates.is_external = 1; + } + + if (n_desc & N_DESC_DISCARDED) { + sym->predicates.is_obsolete = 1; + } + + if (n_desc & N_WEAK_REF) { + sym->predicates.is_weak = 1; + } + + if (n_desc & N_ARM_THUMB_DEF) { + sym->predicates.is_thumb = 1; + } + + /* The first set of type fields are mutually exclusive, so they can be + * set with a switch statement. + */ + switch (type) { + case N_ABS: + sym->predicates.is_absolute = 1; + break; + case N_SECT: + sym->predicates.is_section = 1; + break; + case N_UNDF: + if (sym->base_addr) { + sym->predicates.is_common = 1; + } else { + sym->predicates.is_undefined = 1; + } + break; + case N_INDR: + sym->predicates.is_indirect = 1; + break; + default: + rval = KERN_FAILURE; + kxld_log(kKxldLogLinking, kKxldLogErr, kKxldLogMalformedMachO + "Invalid symbol type: %u.", type); + goto finish; + } + + /* Set the C++-specific fields */ + if ((0 == strncmp(CXX_PREFIX, sym->name, const_strlen(CXX_PREFIX)))) { + sym->predicates.is_cxx = 1; + + if (0 == strncmp(sym->name, METACLASS_VTABLE_PREFIX, + const_strlen(METACLASS_VTABLE_PREFIX))) + { + sym->predicates.is_meta_vtable = 1; + } else if (0 == strncmp(sym->name, VTABLE_PREFIX, + const_strlen(VTABLE_PREFIX))) + { + sym->predicates.is_class_vtable = 1; + } else if (kxld_strstr(sym->name, RESERVED_TOKEN)) { + sym->predicates.is_padslot = 1; + } else if (kxld_strstr(sym->name, METACLASS_TOKEN)) { + sym->predicates.is_metaclass = 1; + } else if (kxld_strstr(sym->name, SUPER_METACLASS_POINTER_TOKEN)) { + sym->predicates.is_super_metaclass_pointer = 1; + } + } else if (streq_safe(CXX_PURE_VIRTUAL, sym->name, sizeof(CXX_PURE_VIRTUAL))) { + sym->predicates.is_cxx = 1; + sym->predicates.is_pure_virtual = 1; + } + } + + rval = KERN_SUCCESS; + +finish: + return rval; +} + +/******************************************************************************* +*******************************************************************************/ +static void +init_sym_sectnum(KXLDSym *sym, u_int n_sect) +{ + /* The n_sect field is set to 0 when the symbol is not section-based, and + * the number of the section in which the symbol exists otherwise. + * Sometimes, symbols can be labeled as section-based, so we make sure that + * they have a valid section number, and set them as absolute if they don't. + */ + + if (kxld_sym_is_section(sym)) { + if (n_sect) { + /* Convert the section number to an index into the section index */ + sym->sectnum = n_sect - 1; + } else { + sym->predicates.is_absolute = 1; + sym->predicates.is_section = 0; + } + } + +} + +/******************************************************************************* +*******************************************************************************/ +void +kxld_sym_deinit(KXLDSym *sym __unused) +{ + check(sym); +} + +/******************************************************************************* +*******************************************************************************/ +void +kxld_sym_destroy(KXLDSym *sym) +{ + check(sym); + kxld_sym_deinit(sym); + kxld_free(sym, sizeof(*sym)); +} + + +/******************************************************************************* +*******************************************************************************/ +boolean_t +kxld_sym_is_absolute(const KXLDSym *sym) +{ + check(sym); + + return (0 != sym->predicates.is_absolute); +} + +/******************************************************************************* +*******************************************************************************/ +boolean_t +kxld_sym_is_section(const KXLDSym *sym) +{ + check(sym); + + return (0 != sym->predicates.is_section); +} + +/******************************************************************************* +*******************************************************************************/ +boolean_t +kxld_sym_is_defined(const KXLDSym *sym) +{ + check(sym); + + return ((kxld_sym_is_absolute(sym) || kxld_sym_is_section(sym)) && + !sym->predicates.is_replaced); +} + + +/******************************************************************************* +*******************************************************************************/ +boolean_t +kxld_sym_is_defined_locally(const KXLDSym *sym) +{ + check(sym); + + return (kxld_sym_is_defined(sym) && !sym->predicates.is_resolved); +} + +/******************************************************************************* +*******************************************************************************/ +boolean_t +kxld_sym_is_external(const KXLDSym *sym) +{ + check(sym); + + return (0 != sym->predicates.is_external); +} + +/******************************************************************************* +*******************************************************************************/ +boolean_t +kxld_sym_is_exported(const KXLDSym *sym) +{ + check(sym); + + return (kxld_sym_is_defined_locally(sym) && kxld_sym_is_external(sym)); +} + +/******************************************************************************* +*******************************************************************************/ +boolean_t +kxld_sym_is_undefined(const KXLDSym *sym) +{ + check(sym); + + return (0 != sym->predicates.is_undefined); +} + +/******************************************************************************* +*******************************************************************************/ +boolean_t +kxld_sym_is_indirect(const KXLDSym *sym) +{ + check(sym); + + return (0 != sym->predicates.is_indirect); +} + +/******************************************************************************* +*******************************************************************************/ +boolean_t +kxld_sym_is_common(const KXLDSym *sym) +{ + check(sym); + + return (0 != sym->predicates.is_common); +} + +/******************************************************************************* +*******************************************************************************/ +boolean_t +kxld_sym_is_unresolved(const KXLDSym *sym) +{ + return ((kxld_sym_is_undefined(sym) && !sym->predicates.is_replaced) || + kxld_sym_is_indirect(sym) || kxld_sym_is_common(sym)); +} + +/******************************************************************************* +*******************************************************************************/ +boolean_t +kxld_sym_is_obsolete(const KXLDSym *sym) +{ + return (0 != sym->predicates.is_obsolete); +} + +#if KXLD_USER_OR_GOT +/******************************************************************************* +*******************************************************************************/ +boolean_t +kxld_sym_is_got(const KXLDSym *sym) +{ + check(sym); + + return (0 != sym->predicates.is_got); +} +#endif /* KXLD_USER_OR_GOT */ + +/******************************************************************************* +*******************************************************************************/ +boolean_t +kxld_sym_is_stab(const KXLDSym *sym) +{ + check(sym); + + return (0 != sym->predicates.is_stab); +} + +/******************************************************************************* +*******************************************************************************/ +boolean_t +kxld_sym_is_weak(const KXLDSym *sym) +{ + check(sym); + + return (0 != sym->predicates.is_weak); +} + +/******************************************************************************* +*******************************************************************************/ +boolean_t +kxld_sym_is_cxx(const KXLDSym *sym) +{ + check(sym); + + return (0 != sym->predicates.is_cxx); +} + +/******************************************************************************* +*******************************************************************************/ +boolean_t +kxld_sym_is_pure_virtual(const KXLDSym *sym) +{ + return (0 != sym->predicates.is_pure_virtual); +} + +/******************************************************************************* +*******************************************************************************/ +boolean_t +kxld_sym_is_vtable(const KXLDSym *sym) +{ + check(sym); + + return kxld_sym_is_class_vtable(sym) || kxld_sym_is_metaclass_vtable(sym); +} + +/******************************************************************************* +*******************************************************************************/ +boolean_t +kxld_sym_is_class_vtable(const KXLDSym *sym) +{ + check(sym); + + return (0 != sym->predicates.is_class_vtable); +} + +/******************************************************************************* +*******************************************************************************/ +boolean_t +kxld_sym_is_metaclass_vtable(const KXLDSym *sym) +{ + check(sym); + + return (0 != sym->predicates.is_meta_vtable); +} + +/******************************************************************************* +*******************************************************************************/ +boolean_t +kxld_sym_is_padslot(const KXLDSym *sym) +{ + check(sym); + + return (0 != sym->predicates.is_padslot); +} + +/******************************************************************************* +*******************************************************************************/ +boolean_t +kxld_sym_is_metaclass(const KXLDSym *sym) +{ + check(sym); + + return (0 != sym->predicates.is_metaclass); +} + +/******************************************************************************* +*******************************************************************************/ +boolean_t +kxld_sym_is_super_metaclass_pointer(const KXLDSym *sym) +{ + check(sym); + + return (0 != sym->predicates.is_super_metaclass_pointer); +} + +/******************************************************************************* +*******************************************************************************/ +boolean_t +kxld_sym_name_is_padslot(const char *name) +{ + check(name); + + return (kxld_strstr(name, RESERVED_TOKEN) != 0); +} + +/******************************************************************************* +*******************************************************************************/ +u_int +kxld_sym_get_section_offset(const KXLDSym *sym, const KXLDSect *sect) +{ + check(sym); + + return (u_int) (sym->base_addr - sect->base_addr); +} + +#if KXLD_USER_OR_COMMON +/******************************************************************************* +*******************************************************************************/ +kxld_size_t +kxld_sym_get_common_size(const KXLDSym *sym) +{ + return sym->base_addr; +} + +/******************************************************************************* +*******************************************************************************/ +u_int +kxld_sym_get_common_align(const KXLDSym *sym) +{ + u_int align = GET_COMM_ALIGN(sym->desc); + if (!align) align = 3; + + return align; +} +#endif /* KXLD_USER_OR_COMMON */ + +/******************************************************************************* +*******************************************************************************/ +kern_return_t +kxld_sym_get_class_name_from_metaclass(const KXLDSym *sym, + char class_name[], u_long class_name_len) +{ + kern_return_t rval = KERN_FAILURE; + + check(sym); + require_action(kxld_sym_is_metaclass(sym), finish, rval=KERN_FAILURE); + + rval = extract_inner_string(sym->name, OSOBJ_PREFIX, METACLASS_TOKEN, + class_name, class_name_len); + require_noerr(rval, finish); + + rval = KERN_SUCCESS; +finish: + return rval; +} + +/******************************************************************************* +*******************************************************************************/ +kern_return_t +kxld_sym_get_class_name_from_super_metaclass_pointer(const KXLDSym *sym, + char class_name[], u_long class_name_len) +{ + kern_return_t rval = KERN_FAILURE; + + check(sym); + require_action(kxld_sym_is_super_metaclass_pointer(sym), finish, + rval=KERN_FAILURE); + + rval = extract_inner_string(sym->name, OSOBJ_PREFIX, + SUPER_METACLASS_POINTER_TOKEN, class_name, class_name_len); + require_noerr(rval, finish); + + rval = KERN_SUCCESS; +finish: + return rval; +} + +/******************************************************************************* +*******************************************************************************/ +kern_return_t +kxld_sym_get_class_name_from_vtable(const KXLDSym *sym, + char class_name[], u_long class_name_len) +{ + kern_return_t rval = KERN_FAILURE; + + check(sym); + require_action(kxld_sym_is_class_vtable(sym), finish, rval=KERN_FAILURE); + + rval = kxld_sym_get_class_name_from_vtable_name(sym->name, + class_name, class_name_len); + require_noerr(rval, finish); + + rval = KERN_SUCCESS; + +finish: + return rval; +} + +/******************************************************************************* +*******************************************************************************/ +kern_return_t +kxld_sym_get_class_name_from_vtable_name(const char *vtable_name, + char class_name[], u_long class_name_len) +{ + kern_return_t rval = KERN_FAILURE; + + check(vtable_name); + + rval = extract_inner_string(vtable_name, VTABLE_PREFIX, NULL, + class_name, class_name_len); + require_noerr(rval, finish); + + rval = KERN_SUCCESS; +finish: + return rval; +} + +/******************************************************************************* +*******************************************************************************/ +kern_return_t +kxld_sym_get_vtable_name_from_class_name(const char *class_name, + char vtable_name[], u_long vtable_name_len) +{ + kern_return_t rval = KERN_FAILURE; + u_long outlen = 0; + + check(class_name); + check(vtable_name); + + outlen = strlcpy(vtable_name, VTABLE_PREFIX, vtable_name_len); + require_action(outlen < vtable_name_len, finish, + rval=KERN_FAILURE); + + outlen = strlcat(vtable_name, class_name, vtable_name_len); + require_action(outlen < vtable_name_len, finish, + rval=KERN_FAILURE); + + rval = KERN_SUCCESS; +finish: + return rval; +} + +/******************************************************************************* +*******************************************************************************/ +kern_return_t +kxld_sym_get_meta_vtable_name_from_class_name(const char *class_name, + char meta_vtable_name[], u_long meta_vtable_name_len) +{ + kern_return_t rval = KERN_FAILURE; + u_long outlen = 0; + + check(class_name); + check(meta_vtable_name); + + outlen = strlcpy(meta_vtable_name, METACLASS_VTABLE_PREFIX, + meta_vtable_name_len); + require_action(outlen < meta_vtable_name_len, finish, + rval=KERN_FAILURE); + + outlen = strlcat(meta_vtable_name, class_name, meta_vtable_name_len); + require_action(outlen < meta_vtable_name_len, finish, + rval=KERN_FAILURE); + + outlen = strlcat(meta_vtable_name, METACLASS_VTABLE_SUFFIX, + meta_vtable_name_len); + require_action(outlen < meta_vtable_name_len, finish, + rval=KERN_FAILURE); + + rval = KERN_SUCCESS; +finish: + return rval; +} + +/******************************************************************************* +*******************************************************************************/ +kern_return_t +kxld_sym_get_final_sym_name_from_class_name(const char *class_name, + char final_sym_name[], u_long final_sym_name_len) +{ + kern_return_t rval = KERN_FAILURE; + u_long outlen = 0; + + check(class_name); + check(final_sym_name); + + outlen = strlcpy(final_sym_name, OSOBJ_PREFIX, final_sym_name_len); + require_action(outlen < final_sym_name_len, finish, + rval=KERN_FAILURE); + + outlen = strlcat(final_sym_name, class_name, final_sym_name_len); + require_action(outlen < final_sym_name_len, finish, + rval=KERN_FAILURE); + + outlen = strlcat(final_sym_name, FINAL_CLASS_TOKEN, final_sym_name_len); + require_action(outlen < final_sym_name_len, finish, + rval=KERN_FAILURE); + + rval = KERN_SUCCESS; + +finish: + return rval; +} + +/******************************************************************************* +*******************************************************************************/ +u_long +kxld_sym_get_function_prefix_from_class_name(const char *class_name, + char function_prefix[], u_long function_prefix_len) +{ + u_long rval = 0; + u_long outlen = 0; + + check(class_name); + check(function_prefix); + + outlen = strlcpy(function_prefix, OSOBJ_PREFIX, function_prefix_len); + require(outlen < function_prefix_len, finish); + + outlen = strlcat(function_prefix, class_name, function_prefix_len); + require(outlen < function_prefix_len, finish); + + rval = outlen; +finish: + return rval; +} + +/******************************************************************************* +*******************************************************************************/ +static kern_return_t +extract_inner_string(const char *str, const char *prefix, const char *suffix, + char *buf, u_long len) +{ + kern_return_t rval = KERN_FAILURE; + u_long prelen = 0, suflen = 0, striplen = 0; + + check(str); + check(buf); + + prelen = (prefix) ? strlen(prefix) : 0; + suflen = (suffix) ? strlen(suffix) : 0; + striplen = strlen(str) - prelen - suflen; + + require_action(striplen < len, finish, rval=KERN_FAILURE); + + strncpy(buf, str + prelen, striplen); + buf[striplen] = '\0'; + + rval = KERN_SUCCESS; +finish: + return rval; +} + +#if KXLD_USER_OR_GOT +/******************************************************************************* +*******************************************************************************/ +void +kxld_sym_set_got(KXLDSym *sym) +{ + sym->predicates.is_got = 1; +} +#endif /* KXLD_USER_OR_GOT */ + +/******************************************************************************* +*******************************************************************************/ +void +kxld_sym_relocate(KXLDSym *sym, const KXLDSect *sect) +{ + if (kxld_sym_is_section(sym)) { + sym->link_addr = sym->base_addr - sect->base_addr + sect->link_addr; + sym->relocated_sectnum = sect->sectnum; + } +} + +#if KXLD_USER_OR_ILP32 +/******************************************************************************* +*******************************************************************************/ +kern_return_t +kxld_sym_export_macho_32(const KXLDSym *sym, u_char *_nl, char *strtab, + u_long *stroff, u_long strsize, boolean_t is_link_state) +{ + kern_return_t rval = KERN_FAILURE; + struct nlist *nl = (struct nlist *) _nl; + char *str = NULL; + long bytes = 0; + + check(sym); + check(nl); + check(strtab); + check(stroff); + + bytes = strlen(sym->name) + 1; + require_action((u_long)bytes <= strsize - *stroff, finish, + rval = KERN_FAILURE); + + if (is_link_state) { + nl->n_type = N_ABS | N_EXT; + nl->n_sect = NO_SECT; + nl->n_desc = 0; + } else { + nl->n_type = sym->type; + nl->n_sect = (kxld_sym_is_section(sym)) ? sym->relocated_sectnum + 1 : 0; + nl->n_desc = sym->desc; + } + nl->n_un.n_strx = (uint32_t) *stroff; + nl->n_value = (uint32_t) sym->link_addr; + + str = (char *) (strtab + *stroff); + strlcpy(str, sym->name, strsize - *stroff); + + *stroff += bytes; + rval = KERN_SUCCESS; + +finish: + return rval; +} +#endif /* KXLD_USER_OR_ILP32 */ + +#if KXLD_USER_OR_LP64 +/******************************************************************************* +*******************************************************************************/ +kern_return_t +kxld_sym_export_macho_64(const KXLDSym *sym, u_char *_nl, char *strtab, + u_long *stroff, u_long strsize, boolean_t is_link_state) +{ + kern_return_t rval = KERN_FAILURE; + struct nlist_64 *nl = (struct nlist_64 *) _nl; + char *str = NULL; + long bytes = 0; + + check(sym); + check(nl); + check(strtab); + check(stroff); + + bytes = strlen(sym->name) + 1; + require_action((u_long)bytes <= strsize - *stroff, finish, + rval = KERN_FAILURE); + + if (is_link_state) { + nl->n_type = N_ABS | N_EXT; + nl->n_sect = NO_SECT; + nl->n_desc = 0; + } else { + nl->n_type = sym->type; + nl->n_sect = (kxld_sym_is_section(sym)) ? sym->relocated_sectnum + 1 : 0; + nl->n_desc = sym->desc; + } + nl->n_un.n_strx = (uint32_t) *stroff; + nl->n_value = (uint64_t) sym->link_addr; + + str = (char *) (strtab + *stroff); + strlcpy(str, sym->name, strsize - *stroff); + + *stroff += bytes; + rval = KERN_SUCCESS; + +finish: + return rval; +} +#endif /* KXLD_USER_OR_LP64 */ + +/******************************************************************************* +*******************************************************************************/ +kern_return_t +kxld_sym_resolve(KXLDSym *sym, kxld_addr_t addr, boolean_t export_sym) +{ + kern_return_t rval = KERN_FAILURE; + + check(sym); + + require_action(kxld_sym_is_undefined(sym) || kxld_sym_is_indirect(sym), + finish, rval=KERN_FAILURE); + + /* Set the n_list data types */ + + sym->link_addr = addr; + sym->type = N_ABS | N_EXT; + sym->sectnum = NO_SECT; + + /* Set the predicate bits for an externally resolved symbol. We re-export + * indirect symbols and any symbols that the caller wants re-exported (for + * example, symbols from a pseudo-kext). */ + + sym->predicates.is_external = TRUE; + sym->predicates.is_absolute = TRUE; + sym->predicates.is_resolved = !(kxld_sym_is_indirect(sym) || export_sym); + + /* Clear the predicate bits for types that can be resolved */ + + sym->predicates.is_undefined = FALSE; + sym->predicates.is_indirect = FALSE; + + rval = KERN_SUCCESS; + +finish: + + return rval; +} + +#if KXLD_USER_OR_COMMON +/******************************************************************************* +*******************************************************************************/ +kern_return_t +kxld_sym_resolve_common(KXLDSym *sym, u_int sectnum, kxld_addr_t base_addr) +{ + kern_return_t rval = KERN_FAILURE; + + check(sym); + + require_action(kxld_sym_is_common(sym), finish, + rval=KERN_FAILURE); + + sym->base_addr = base_addr; + sym->link_addr = base_addr; + sym->type = N_SECT | N_EXT; + sym->sectnum = sectnum; + sym->desc = 0; + + sym->predicates.is_absolute = FALSE; + sym->predicates.is_section = TRUE; + sym->predicates.is_undefined = FALSE; + sym->predicates.is_indirect = FALSE; + sym->predicates.is_common = FALSE; + sym->predicates.is_external = TRUE; + + rval = KERN_SUCCESS; + +finish: + + return rval; +} +#endif /* KXLD_USER_OR_COMMON */ + +/******************************************************************************* +*******************************************************************************/ +void +kxld_sym_delete(KXLDSym *sym) +{ + check(sym); + + bzero(sym, sizeof(*sym)); + sym->predicates.is_replaced = TRUE; +} + + +/******************************************************************************* +*******************************************************************************/ +void +kxld_sym_patch(KXLDSym *sym) +{ + check(sym); + + sym->predicates.is_replaced = TRUE; +} + +/******************************************************************************* +*******************************************************************************/ +void +kxld_sym_mark_private(KXLDSym *sym) +{ + check(sym); + + sym->type |= N_PEXT; + sym->predicates.is_external = FALSE; +} + diff --git a/libkern/kxld/kxld_sym.h b/libkern/kxld/kxld_sym.h new file mode 100644 index 000000000..237586263 --- /dev/null +++ b/libkern/kxld/kxld_sym.h @@ -0,0 +1,269 @@ +/* + * Copyright (c) 2008 Apple Inc. All rights reserved. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. The rights granted to you under the License + * may not be used to create, or enable the creation or redistribution of, + * unlawful or unlicensed copies of an Apple operating system, or to + * circumvent, violate, or enable the circumvention or violation of, any + * terms of an Apple operating system software license agreement. + * + * Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ + */ +#ifndef _KXLD_SYMBOL_H_ +#define _KXLD_SYMBOL_H_ + +#include +#include +#if KERNEL + #include +#else + #include "kxld_types.h" +#endif + +struct kxld_sect; +struct nlist; +struct nlist_64; +typedef struct kxld_sym KXLDSym; +typedef boolean_t (*KXLDSymPredicateTest)(const KXLDSym *sym); + +struct kxld_sym { + char *name; // The symbol's name + char *alias; // The indirect symbol's alias name + kxld_addr_t base_addr; // The symbol's base address + kxld_addr_t link_addr; // The relocated address + kxld_addr_t got_addr; // The address of this symbol's GOT entry + uint8_t type; + uint8_t sectnum; // The symbol's section number + uint8_t relocated_sectnum; + uint16_t desc; + struct { + u_int is_absolute:1, // Set for absolute symbols + is_section:1, // Set for section symbols + is_undefined:1, // Set for undefined symbols + is_indirect:1, // Set for indirect symbols + is_common:1, // Set for common symbols + is_external:1, // Set for external symbols + is_stab:1, // Set for stab symbols + is_weak:1, // Set for weak definition symbols + is_resolved:1, // For symbols that have been resolved + // externally and should not be exported + is_obsolete:1, // For symbols marked as obsolete + is_replaced:1, // Set for symbols replaced by patching + is_got:1, // Has an entry in the GOT + is_cxx:1, // Set for C++ symbols + is_pure_virtual:1, // Set for pure virtual symbols + is_class_vtable:1, // Set for vtable symbols of classes + is_meta_vtable:1, // Set for vtable symbols of MetaClasses + is_padslot:1, // Set for pad slot symbols + is_metaclass:1, // Set for metaclass symbols + is_super_metaclass_pointer:1, // Set for super metaclass pointer syms + is_thumb:1; // Set for thumb symbols (ARM only) + } predicates; +}; + +/******************************************************************************* +* Constructors and destructors +*******************************************************************************/ + +#if KXLD_USER_OR_ILP32 +kern_return_t kxld_sym_init_from_macho32(KXLDSym *sym, char *strtab, + const struct nlist *src) __attribute__((nonnull, visibility("hidden"))); +#endif + +#if KXLD_USER_OR_LP64 +kern_return_t kxld_sym_init_from_macho64(KXLDSym *sym, char *strtab, + const struct nlist_64 *src) __attribute__((nonnull, visibility("hidden"))); +#endif + +void kxld_sym_init_absolute(KXLDSym *sym, char *name, kxld_addr_t link_addr) + __attribute__((nonnull, visibility("hidden"))); + +void kxld_sym_deinit(KXLDSym *sym) + __attribute__((nonnull, visibility("hidden"))); + +void kxld_sym_destroy(KXLDSym *sym) + __attribute__((nonnull, visibility("hidden"))); + +/******************************************************************************* +* Accessors +*******************************************************************************/ + +boolean_t kxld_sym_is_absolute(const KXLDSym *sym) + __attribute__((pure, nonnull, visibility("hidden"))); + +boolean_t kxld_sym_is_section(const KXLDSym *sym) + __attribute__((pure, nonnull, visibility("hidden"))); + +boolean_t kxld_sym_is_defined(const KXLDSym *sym) + __attribute__((pure, nonnull, visibility("hidden"))); + +boolean_t kxld_sym_is_defined_locally(const KXLDSym *sym) + __attribute__((pure, nonnull, visibility("hidden"))); + +boolean_t kxld_sym_is_external(const KXLDSym *sym) + __attribute__((pure, nonnull, visibility("hidden"))); + +boolean_t kxld_sym_is_exported(const KXLDSym *sym) + __attribute__((pure, nonnull, visibility("hidden"))); + +boolean_t kxld_sym_is_undefined(const KXLDSym *sym) + __attribute__((pure, nonnull, visibility("hidden"))); + +boolean_t kxld_sym_is_indirect(const KXLDSym *sym) + __attribute__((pure, nonnull, visibility("hidden"))); + +/* We don't wrap this in KXLD_USER_OR_COMMON because even though common symbols + * aren't always supported, we always need to be able to detect them. + */ +boolean_t kxld_sym_is_common(const KXLDSym *sym) + __attribute__((pure, nonnull, visibility("hidden"))); + +boolean_t kxld_sym_is_unresolved(const KXLDSym *sym) + __attribute__((pure, nonnull, visibility("hidden"))); + +boolean_t kxld_sym_is_obsolete(const KXLDSym *sym) + __attribute__((pure, nonnull, visibility("hidden"))); + +#if KXLD_USER_OR_GOT +boolean_t kxld_sym_is_got(const KXLDSym *sym) + __attribute__((pure, nonnull, visibility("hidden"))); +#endif /* KXLD_USER_OR_GOT */ + +boolean_t kxld_sym_is_stab(const KXLDSym *sym) + __attribute__((pure, nonnull, visibility("hidden"))); + +boolean_t kxld_sym_is_weak(const KXLDSym *sym) + __attribute__((pure, nonnull, visibility("hidden"))); + +boolean_t kxld_sym_is_cxx(const KXLDSym *sym) + __attribute__((pure, nonnull, visibility("hidden"))); + +boolean_t kxld_sym_is_pure_virtual(const KXLDSym *sym) + __attribute__((pure, nonnull, visibility("hidden"))); + +boolean_t kxld_sym_is_vtable(const KXLDSym *sym) + __attribute__((pure, nonnull, visibility("hidden"))); + +boolean_t kxld_sym_is_class_vtable(const KXLDSym *sym) + __attribute__((pure, nonnull, visibility("hidden"))); + +boolean_t kxld_sym_is_metaclass_vtable(const KXLDSym *sym) + __attribute__((pure, nonnull, visibility("hidden"))); + +boolean_t kxld_sym_is_padslot(const KXLDSym *sym) + __attribute__((pure, nonnull, visibility("hidden"))); + +boolean_t kxld_sym_is_metaclass(const KXLDSym *sym) + __attribute__((pure, nonnull, visibility("hidden"))); + +boolean_t kxld_sym_is_super_metaclass_pointer(const KXLDSym *sym) + __attribute__((pure, nonnull, visibility("hidden"))); + +boolean_t kxld_sym_name_is_padslot(const char *name) + __attribute__((pure, nonnull, visibility("hidden"))); + +u_int kxld_sym_get_section_offset(const KXLDSym *sym, + const struct kxld_sect *sect) + __attribute__((pure, nonnull, visibility("hidden"))); + +#if KXLD_USER_OR_COMMON +kxld_size_t kxld_sym_get_common_size(const KXLDSym *sym) + __attribute__((pure, nonnull, visibility("hidden"))); + +u_int kxld_sym_get_common_align(const KXLDSym *sym) + __attribute__((pure, nonnull, visibility("hidden"))); +#endif /* KXLD_USER_OR_COMMON */ + +kern_return_t kxld_sym_get_class_name_from_metaclass(const KXLDSym *sym, + char class_name[], u_long class_name_len) + __attribute__((nonnull, visibility("hidden"))); + +kern_return_t kxld_sym_get_class_name_from_super_metaclass_pointer( + const KXLDSym *sym, char class_name[], u_long class_name_len) + __attribute__((nonnull, visibility("hidden"))); + +kern_return_t kxld_sym_get_class_name_from_vtable(const KXLDSym *sym, + char class_name[], u_long class_name_len) + __attribute__((nonnull, visibility("hidden"))); + +kern_return_t kxld_sym_get_class_name_from_vtable_name(const char *vtable_name, + char class_name[], u_long class_name_len) + __attribute__((nonnull, visibility("hidden"))); + +kern_return_t kxld_sym_get_vtable_name_from_class_name(const char *class_name, + char vtable_name[], u_long vtable_name_len) + __attribute__((nonnull, visibility("hidden"))); + +kern_return_t kxld_sym_get_meta_vtable_name_from_class_name(const char *class_name, + char meta_vtable_name[], u_long meta_vtable_name_len) + __attribute__((nonnull, visibility("hidden"))); + +kern_return_t kxld_sym_get_final_sym_name_from_class_name(const char *class_name, + char final_sym_name[], u_long final_sym_name_len) + __attribute__((nonnull, visibility("hidden"))); + +u_long kxld_sym_get_function_prefix_from_class_name(const char *class_name, + char function_prefix[], u_long function_prefix_len) + __attribute__((nonnull, visibility("hidden"))); + +#if KXLD_USER_OR_ILP32 +kern_return_t kxld_sym_export_macho_32(const KXLDSym *sym, u_char *nl, + char *strtab, u_long *stroff, u_long strsize, boolean_t is_link_state) + __attribute__((nonnull, visibility("hidden"))); +#endif + +#if KXLD_USER_OR_LP64 +kern_return_t kxld_sym_export_macho_64(const KXLDSym *sym, u_char *nl, + char *strtab, u_long *stroff, u_long strsize, boolean_t is_link_state) + __attribute__((nonnull, visibility("hidden"))); +#endif + +/******************************************************************************* +* Mutators +*******************************************************************************/ + +void kxld_sym_relocate(KXLDSym *sym, const struct kxld_sect *sect) + __attribute__((nonnull, visibility("hidden"))); + +#if KXLD_USER_OR_GOT +void kxld_sym_set_got(KXLDSym *sym) + __attribute__((nonnull, visibility("hidden"))); +#endif /* KXLD_USER_OR_GOT */ + +kern_return_t kxld_sym_resolve(KXLDSym *sym, const kxld_addr_t addr, + boolean_t export_sym) + __attribute__((nonnull, visibility("hidden"))); + +#if KXLD_USER_OR_COMMON +kern_return_t kxld_sym_resolve_common(KXLDSym *sym, u_int sectnum, + kxld_addr_t base_addr) + __attribute__((nonnull, visibility("hidden"))); +#endif /* KXLD_USER_OR_COMMON */ + +void kxld_sym_delete(KXLDSym *sym) + __attribute__((nonnull, visibility("hidden"))); + +void kxld_sym_patch(KXLDSym *sym) + __attribute__((nonnull, visibility("hidden"))); + +void kxld_sym_mark_private(KXLDSym *sym) + __attribute__((nonnull, visibility("hidden"))); + +#endif /* _KXLD_SYMBOL_H_ */ + diff --git a/libkern/kxld/kxld_symtab.c b/libkern/kxld/kxld_symtab.c new file mode 100644 index 000000000..569bd1bbe --- /dev/null +++ b/libkern/kxld/kxld_symtab.c @@ -0,0 +1,640 @@ +/* + * Copyright (c) 2008 Apple Inc. All rights reserved. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. The rights granted to you under the License + * may not be used to create, or enable the creation or redistribution of, + * unlawful or unlicensed copies of an Apple operating system, or to + * circumvent, violate, or enable the circumvention or violation of, any + * terms of an Apple operating system software license agreement. + * + * Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ + */ +#include +#include +#include +#include +#include + +#define DEBUG_ASSERT_COMPONENT_NAME_STRING "kxld" +#include + +#include "kxld_array.h" +#include "kxld_dict.h" +#include "kxld_sect.h" +#include "kxld_sym.h" +#include "kxld_symtab.h" +#include "kxld_util.h" + +struct kxld_symtab { + KXLDArray syms; + KXLDDict cxx_index; + KXLDDict name_index; + char *strings; + u_int strsize; +}; + +/******************************************************************************* +* Prototypes +*******************************************************************************/ + +static kern_return_t init_macho(KXLDSymtab *symtab, u_char *macho, + struct symtab_command *src, kxld_addr_t linkedit_offset, boolean_t is_32_bit) + __attribute__((nonnull)); + +#if KXLD_USER_OR_ILP32 +static kern_return_t init_syms_32(KXLDSymtab *symtab, u_char *macho, u_long offset, + u_int nsyms); +#endif +#if KXLD_USER_OR_LP64 +static kern_return_t init_syms_64(KXLDSymtab *symtab, u_char *macho, u_long offset, + u_int nsyms); +#endif + +static kern_return_t make_cxx_index(KXLDSymtab *symtab) + __attribute__((nonnull)); +static boolean_t sym_is_defined_cxx(const KXLDSym *sym); +static kern_return_t make_name_index(KXLDSymtab *symtab) + __attribute__((nonnull)); +static boolean_t sym_is_name_indexed(const KXLDSym *sym); + + +/******************************************************************************* +*******************************************************************************/ +size_t +kxld_symtab_sizeof() +{ + return sizeof(KXLDSymtab); +} + +#if KXLD_USER_OR_ILP32 +/******************************************************************************* +*******************************************************************************/ +kern_return_t +kxld_symtab_init_from_macho_32(KXLDSymtab *symtab, u_char *macho, + struct symtab_command *src, kxld_addr_t linkedit_offset) +{ + return init_macho(symtab, macho, src, linkedit_offset, TRUE); +} +#endif /* KXLD_USER_ILP32 */ + +#if KXLD_USER_OR_LP64 +/******************************************************************************* +*******************************************************************************/ +kern_return_t +kxld_symtab_init_from_macho_64(KXLDSymtab *symtab, u_char *macho, + struct symtab_command *src, kxld_addr_t linkedit_offset) +{ + return init_macho(symtab, macho, src, linkedit_offset, FALSE); +} +#endif /* KXLD_USER_OR_LP64 */ + +/******************************************************************************* +*******************************************************************************/ +static kern_return_t +init_macho(KXLDSymtab *symtab, u_char *macho, struct symtab_command *src, + kxld_addr_t linkedit_offset, boolean_t is_32_bit __unused) +{ + kern_return_t rval = KERN_FAILURE; + + check(symtab); + check(macho); + check(src); + + /* Initialize the symbol array */ + + rval = kxld_array_init(&symtab->syms, sizeof(KXLDSym), src->nsyms); + require_noerr(rval, finish); + + /* Initialize the string table */ + + symtab->strings = (char *) (macho + src->stroff + linkedit_offset); + symtab->strsize = src->strsize; + + /* Initialize the symbols */ + + KXLD_3264_FUNC(is_32_bit, rval, + init_syms_32, init_syms_64, + symtab, macho, (u_long) (src->symoff + linkedit_offset), src->nsyms); + require_noerr(rval, finish); + + /* Create the C++ index */ + + rval = make_cxx_index(symtab); + require_noerr(rval, finish); + + /* Create the name index */ + + rval = make_name_index(symtab); + require_noerr(rval, finish); + + /* Save the output */ + + rval = KERN_SUCCESS; + +finish: + return rval; +} + +#if KXLD_USER_OR_ILP32 +/******************************************************************************* +*******************************************************************************/ +static kern_return_t +init_syms_32(KXLDSymtab *symtab, u_char *macho, u_long offset, u_int nsyms) +{ + kern_return_t rval = KERN_FAILURE; + KXLDSym *sym = NULL; + u_int i = 0; + struct nlist *src_syms = (struct nlist *) (macho + offset); + + for (i = 0; i < nsyms; ++i) { + sym = kxld_array_get_item(&symtab->syms, i); + require_action(sym, finish, rval=KERN_FAILURE); + + rval = kxld_sym_init_from_macho32(sym, symtab->strings, &src_syms[i]); + require_noerr(rval, finish); + } + + rval = KERN_SUCCESS; + +finish: + return rval; +} +#endif /* KXLD_USER_OR_ILP32 */ + +#if KXLD_USER_OR_LP64 +/******************************************************************************* +*******************************************************************************/ +static kern_return_t +init_syms_64(KXLDSymtab *symtab, u_char *macho, u_long offset, u_int nsyms) +{ + kern_return_t rval = KERN_FAILURE; + KXLDSym *sym = NULL; + u_int i = 0; + struct nlist_64 *src_syms = (struct nlist_64 *) (macho + offset); + + for (i = 0; i < nsyms; ++i) { + sym = kxld_array_get_item(&symtab->syms, i); + require_action(sym, finish, rval=KERN_FAILURE); + + rval = kxld_sym_init_from_macho64(sym, symtab->strings, &src_syms[i]); + require_noerr(rval, finish); + } + + rval = KERN_SUCCESS; + +finish: + return rval; +} +#endif /* KXLD_USER_OR_LP64 */ + +/******************************************************************************* +*******************************************************************************/ +void +kxld_symtab_iterator_init(KXLDSymtabIterator *iter, const KXLDSymtab *symtab, + KXLDSymPredicateTest test, boolean_t negate) +{ + check(iter); + check(symtab); + check(test); + + iter->symtab = symtab; + iter->idx = 0; + iter->test = test; + iter->negate = negate; +} + +/******************************************************************************* +*******************************************************************************/ +void +kxld_symtab_clear(KXLDSymtab *symtab) +{ + check(symtab); + + kxld_array_clear(&symtab->syms); + kxld_dict_clear(&symtab->cxx_index); + kxld_dict_clear(&symtab->name_index); +} + +/******************************************************************************* +*******************************************************************************/ +void +kxld_symtab_deinit(KXLDSymtab *symtab) +{ + check(symtab); + + kxld_array_deinit(&symtab->syms); + kxld_dict_deinit(&symtab->cxx_index); + kxld_dict_deinit(&symtab->name_index); +} + +/******************************************************************************* +*******************************************************************************/ +u_int +kxld_symtab_get_num_symbols(const KXLDSymtab *symtab) +{ + check(symtab); + + return symtab->syms.nitems; +} + +/******************************************************************************* +*******************************************************************************/ +KXLDSym * +kxld_symtab_get_symbol_by_index(const KXLDSymtab *symtab, u_int idx) +{ + check(symtab); + + return kxld_array_get_item(&symtab->syms, idx); +} + +/******************************************************************************* +*******************************************************************************/ +KXLDSym * +kxld_symtab_get_symbol_by_name(const KXLDSymtab *symtab, const char *name) +{ + check(symtab); + check(name); + + return kxld_dict_find(&symtab->name_index, name); +} + +/******************************************************************************* +*******************************************************************************/ +KXLDSym * +kxld_symtab_get_cxx_symbol_by_value(const KXLDSymtab *symtab, kxld_addr_t value) +{ + check(symtab); + + /* + * value may hold a THUMB address (with bit 0 set to 1) but the index will + * have the real address (bit 0 set to 0). So if bit 0 is set here, + * we clear it (should impact no architectures but ARM). + */ + kxld_addr_t v = value & ~1; + + return kxld_dict_find(&symtab->cxx_index, &v); +} + +/******************************************************************************* +*******************************************************************************/ +kern_return_t +kxld_symtab_get_sym_index(const KXLDSymtab *symtab, const KXLDSym *sym, + u_int *symindex) +{ + kern_return_t rval = KERN_FAILURE; + + rval = kxld_array_get_index(&symtab->syms, sym, symindex); + require_noerr(rval, finish); + + rval = KERN_SUCCESS; + +finish: + return rval; +} + +/******************************************************************************* +*******************************************************************************/ +u_long +kxld_symtab_get_macho_header_size(void) +{ + return sizeof(struct symtab_command); +} + +/******************************************************************************* +*******************************************************************************/ +u_long +kxld_symtab_get_macho_data_size(const KXLDSymtab *symtab, + boolean_t is_link_state, boolean_t is_32_bit) +{ + KXLDSymtabIterator iter; + KXLDSym *sym = NULL; + u_long size = 1; /* strtab start padding */ + u_int nsyms = 0; + + check(symtab); + + if (is_link_state) { + kxld_symtab_iterator_init(&iter, symtab, kxld_sym_is_exported, FALSE); + } else { + kxld_symtab_iterator_init(&iter, symtab, + kxld_sym_is_defined_locally, FALSE); + } + + while ((sym = kxld_symtab_iterator_get_next(&iter))) { + size += strlen(sym->name) + 1; + ++nsyms; + } + + if (is_32_bit) { + size += nsyms * sizeof(struct nlist); + } else { + size += nsyms * sizeof(struct nlist_64); + } + + return size; +} + +/******************************************************************************* +*******************************************************************************/ +kern_return_t +kxld_symtab_export_macho(const KXLDSymtab *symtab, u_char *buf, + u_long *header_offset, u_long header_size, + u_long *data_offset, u_long data_size, + boolean_t is_link_state, boolean_t is_32_bit) +{ + kern_return_t rval = KERN_FAILURE; + KXLDSymtabIterator iter; + KXLDSym *sym = NULL; + struct symtab_command *symtabhdr = NULL; + u_char *nl = NULL; + u_long nlistsize = 0; + char *strtab = NULL; + u_long stroff = 1; /* strtab start padding */ + + check(symtab); + check(buf); + check(header_offset); + check(data_offset); + + require_action(sizeof(*symtabhdr) <= header_size - *header_offset, + finish, rval=KERN_FAILURE); + symtabhdr = (struct symtab_command *) (buf + *header_offset); + *header_offset += sizeof(*symtabhdr); + + /* Initialize the symbol table header */ + + symtabhdr->cmd = LC_SYMTAB; + symtabhdr->cmdsize = (uint32_t) sizeof(*symtabhdr); + symtabhdr->symoff = (uint32_t) *data_offset; + symtabhdr->strsize = 1; /* strtab start padding */ + + /* Find the size of the symbol and string tables */ + + if (is_link_state) { + kxld_symtab_iterator_init(&iter, symtab, kxld_sym_is_exported, FALSE); + } else { + kxld_symtab_iterator_init(&iter, symtab, + kxld_sym_is_defined_locally, FALSE); + } + + while ((sym = kxld_symtab_iterator_get_next(&iter))) { + symtabhdr->nsyms++; + symtabhdr->strsize += (uint32_t) (strlen(sym->name) + 1); + } + + if (is_32_bit) { + nlistsize = sizeof(struct nlist); + } else { + nlistsize = sizeof(struct nlist_64); + } + + symtabhdr->stroff = (uint32_t) (symtabhdr->symoff + + (symtabhdr->nsyms * nlistsize)); + require_action(symtabhdr->stroff + symtabhdr->strsize <= data_size, finish, + rval=KERN_FAILURE); + + /* Get pointers to the symbol and string tables */ + + nl = buf + symtabhdr->symoff; + strtab = (char *) (buf + symtabhdr->stroff); + + /* Copy over the symbols */ + + kxld_symtab_iterator_reset(&iter); + while ((sym = kxld_symtab_iterator_get_next(&iter))) { + + KXLD_3264_FUNC(is_32_bit, rval, + kxld_sym_export_macho_32, kxld_sym_export_macho_64, + sym, nl, strtab, &stroff, symtabhdr->strsize, is_link_state); + require_noerr(rval, finish); + + nl += nlistsize; + stroff += rval; + } + + /* Update the data offset */ + *data_offset += (symtabhdr->nsyms * nlistsize) + stroff; + + rval = KERN_SUCCESS; + +finish: + return rval; +} + +/******************************************************************************* +*******************************************************************************/ +u_int +kxld_symtab_iterator_get_num_remaining(const KXLDSymtabIterator *iter) +{ + u_int idx = 0; + u_int count = 0; + + check(iter); + + idx = iter->idx; + + for (idx = iter->idx; idx < iter->symtab->syms.nitems; ++idx) { + count += iter->test(kxld_array_get_item(&iter->symtab->syms, idx)); + } + + return count; +} + +/******************************************************************************* +*******************************************************************************/ +static kern_return_t +make_cxx_index(KXLDSymtab *symtab) +{ + kern_return_t rval = KERN_FAILURE; + KXLDSymtabIterator iter; + KXLDSym *sym = NULL; + u_int nsyms = 0; + + check(symtab); + + /* Count the number of C++ symbols */ + kxld_symtab_iterator_init(&iter, symtab, sym_is_defined_cxx, FALSE); + nsyms = kxld_symtab_iterator_get_num_remaining(&iter); + + /* Create the dictionary */ + rval = kxld_dict_init(&symtab->cxx_index, kxld_dict_kxldaddr_hash, + kxld_dict_kxldaddr_cmp, nsyms); + require_noerr(rval, finish); + + /* Insert the non-stab symbols */ + while ((sym = kxld_symtab_iterator_get_next(&iter))) { + rval = kxld_dict_insert(&symtab->cxx_index, &sym->base_addr, sym); + require_noerr(rval, finish); + } + + rval = KERN_SUCCESS; + +finish: + + return rval; +} + +/******************************************************************************* +*******************************************************************************/ +static boolean_t +sym_is_defined_cxx(const KXLDSym *sym) +{ + return (kxld_sym_is_defined_locally(sym) && kxld_sym_is_cxx(sym)); +} + +/******************************************************************************* +*******************************************************************************/ +static kern_return_t +make_name_index(KXLDSymtab *symtab) +{ + kern_return_t rval = KERN_FAILURE; + KXLDSymtabIterator iter; + KXLDSym *sym = NULL; + u_int nsyms = 0; + + check(symtab); + + /* Count the number of symbols we need to index by name */ + kxld_symtab_iterator_init(&iter, symtab, sym_is_name_indexed, FALSE); + nsyms = kxld_symtab_iterator_get_num_remaining(&iter); + + /* Create the dictionary */ + rval = kxld_dict_init(&symtab->name_index, kxld_dict_string_hash, + kxld_dict_string_cmp, nsyms); + require_noerr(rval, finish); + + /* Insert the non-stab symbols */ + while ((sym = kxld_symtab_iterator_get_next(&iter))) { + rval = kxld_dict_insert(&symtab->name_index, sym->name, sym); + require_noerr(rval, finish); + } + + rval = KERN_SUCCESS; + +finish: + + return rval; +} + +/******************************************************************************* +*******************************************************************************/ +static boolean_t +sym_is_name_indexed(const KXLDSym *sym) +{ + return (kxld_sym_is_vtable(sym) || + streq_safe(sym->name, KXLD_KMOD_INFO_SYMBOL, + const_strlen(KXLD_KMOD_INFO_SYMBOL)) || + streq_safe(sym->name, KXLD_WEAK_TEST_SYMBOL, + const_strlen(KXLD_WEAK_TEST_SYMBOL))); +} + +/******************************************************************************* +*******************************************************************************/ +kern_return_t +kxld_symtab_relocate(KXLDSymtab *symtab, const KXLDArray *sectarray) +{ + kern_return_t rval = KERN_FAILURE; + KXLDSymtabIterator iter; + KXLDSym *sym = NULL; + const KXLDSect *sect = NULL; + + check(symtab); + check(sectarray); + + kxld_symtab_iterator_init(&iter, symtab, kxld_sym_is_section, FALSE); + + while ((sym = kxld_symtab_iterator_get_next(&iter))) { + sect = kxld_array_get_item(sectarray, sym->sectnum); + require_action(sect, finish, rval=KERN_FAILURE); + kxld_sym_relocate(sym, sect); + } + + rval = KERN_SUCCESS; + +finish: + + return rval; +} + +/******************************************************************************* +* This extends the symbol table and initializes the new symbol. We insert the +* symbol into the name index, but we don't bother with the c++ value index +* because it is based on the base_addr of the symbol, and the base_addr of +* all synthesized symbols will be 0. +*******************************************************************************/ +kern_return_t +kxld_symtab_add_symbol(KXLDSymtab *symtab, char *name, kxld_addr_t link_addr, + KXLDSym **symout) +{ + kern_return_t rval = KERN_FAILURE; + KXLDSym *sym = NULL; + u_int symindex = symtab->syms.nitems; + + rval = kxld_array_resize(&symtab->syms, symindex + 1); + require_noerr(rval, finish); + + sym = kxld_array_get_item(&symtab->syms, symindex); + kxld_sym_init_absolute(sym, name, link_addr); + + rval = kxld_dict_insert(&symtab->name_index, sym->name, sym); + require_noerr(rval, finish); + + rval = KERN_SUCCESS; + *symout = sym; + +finish: + return rval; +} + +/******************************************************************************* +*******************************************************************************/ +KXLDSym * +kxld_symtab_iterator_get_next(KXLDSymtabIterator *iter) +{ + KXLDSym *sym = NULL; + KXLDSym *tmp = NULL; + boolean_t cmp = FALSE; + + check(iter); + + for (; iter->idx < iter->symtab->syms.nitems; ++iter->idx) { + tmp = kxld_array_get_item(&iter->symtab->syms, iter->idx); + cmp = iter->test(tmp); + if (iter->negate) cmp = !cmp; + + if (cmp) { + sym = tmp; + ++iter->idx; + break; + } + } + + return sym; +} + + +/******************************************************************************* +*******************************************************************************/ +void +kxld_symtab_iterator_reset(KXLDSymtabIterator *iter) +{ + check(iter); + iter->idx = 0; +} + diff --git a/libkern/kxld/kxld_symtab.h b/libkern/kxld/kxld_symtab.h new file mode 100644 index 000000000..cc2d91cec --- /dev/null +++ b/libkern/kxld/kxld_symtab.h @@ -0,0 +1,140 @@ +/* + * Copyright (c) 2008 Apple Inc. All rights reserved. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. The rights granted to you under the License + * may not be used to create, or enable the creation or redistribution of, + * unlawful or unlicensed copies of an Apple operating system, or to + * circumvent, violate, or enable the circumvention or violation of, any + * terms of an Apple operating system software license agreement. + * + * Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ + */ +#ifndef _KXLD_SYMTAB_H_ +#define _KXLD_SYMTAB_H_ + +#include +#include +#if KERNEL + #include +#else + #include "kxld_types.h" +#endif + +#include "kxld_sym.h" + +struct kxld_array; +struct symtab_command; +typedef struct kxld_symtab KXLDSymtab; +typedef struct kxld_symtab_iterator KXLDSymtabIterator; + +struct kxld_symtab_iterator { + const KXLDSymtab *symtab; + u_int idx; + KXLDSymPredicateTest test; + boolean_t negate; +}; + +/******************************************************************************* +* Constructors and Destructors +*******************************************************************************/ + +size_t kxld_symtab_sizeof(void) + __attribute__((const, nonnull, visibility("hidden"))); + +#if KXLD_USER_OR_ILP32 +kern_return_t kxld_symtab_init_from_macho_32(KXLDSymtab *symtab, u_char *macho, + struct symtab_command *src, kxld_addr_t linkedit_offset) + __attribute__((nonnull, visibility("hidden"))); +#endif /* KXLD_USER_OR_ILP32 */ + +#if KXLD_USER_OR_LP64 +kern_return_t kxld_symtab_init_from_macho_64(KXLDSymtab *symtab, u_char *macho, + struct symtab_command *src, kxld_addr_t linkedit_offset) + __attribute__((nonnull, visibility("hidden"))); +#endif /* KXLD_USER_OR_ILP64 */ + +void kxld_symtab_iterator_init(KXLDSymtabIterator *iter, + const KXLDSymtab *symtab, KXLDSymPredicateTest test, boolean_t negate) + __attribute__((nonnull, visibility("hidden"))); + +void kxld_symtab_clear(KXLDSymtab *symtab) + __attribute__((nonnull, visibility("hidden"))); + +void kxld_symtab_deinit(KXLDSymtab *symtab) + __attribute__((nonnull, visibility("hidden"))); + +/******************************************************************************* +* Accessors +*******************************************************************************/ + +u_int kxld_symtab_get_num_symbols(const KXLDSymtab *symtab) + __attribute__((pure, nonnull, visibility("hidden"))); + +KXLDSym * kxld_symtab_get_symbol_by_index(const KXLDSymtab *symtab, u_int idx) + __attribute__((pure, nonnull, visibility("hidden"))); + +KXLDSym * kxld_symtab_get_symbol_by_name(const KXLDSymtab *symtab, + const char *name) + __attribute__((pure, nonnull, visibility("hidden"))); + +KXLDSym * kxld_symtab_get_cxx_symbol_by_value(const KXLDSymtab *symtab, + kxld_addr_t value) + __attribute__((pure, nonnull, visibility("hidden"))); + +kern_return_t kxld_symtab_get_sym_index(const KXLDSymtab *symtab, + const KXLDSym * sym, u_int *idx) + __attribute__((pure, nonnull, visibility("hidden"))); + +u_long kxld_symtab_get_macho_header_size(void) + __attribute__((pure, visibility("hidden"))); + +u_long kxld_symtab_get_macho_data_size(const KXLDSymtab *symtab, + boolean_t is_link_state, boolean_t is_32_bit) + __attribute__((pure, nonnull, visibility("hidden"))); + +kern_return_t +kxld_symtab_export_macho(const KXLDSymtab *symtab, u_char *buf, + u_long *header_offset, u_long header_size, + u_long *data_offset, u_long data_size, + boolean_t is_link_state, boolean_t is_32_bit) + __attribute__((nonnull, visibility("hidden"))); + +u_int kxld_symtab_iterator_get_num_remaining(const KXLDSymtabIterator *iter) + __attribute__((pure, nonnull, visibility("hidden"))); + +/******************************************************************************* +* Modifiers +*******************************************************************************/ + +kern_return_t kxld_symtab_relocate(KXLDSymtab *symtab, + const struct kxld_array *sectarray) + __attribute__((nonnull, visibility("hidden"))); + +kern_return_t kxld_symtab_add_symbol(KXLDSymtab *symtab, char *name, + kxld_addr_t link_addr, KXLDSym **symout) + __attribute__((nonnull, visibility("hidden"))); + +KXLDSym * kxld_symtab_iterator_get_next(KXLDSymtabIterator *iter) + __attribute__((nonnull, visibility("hidden"))); + +void kxld_symtab_iterator_reset(KXLDSymtabIterator *iter) + __attribute__((nonnull, visibility("hidden"))); + +#endif /* _KXLD_SYMTAB_H_ */ + diff --git a/libkern/kxld/kxld_util.c b/libkern/kxld/kxld_util.c new file mode 100644 index 000000000..35dc1066b --- /dev/null +++ b/libkern/kxld/kxld_util.c @@ -0,0 +1,782 @@ +/* + * Copyright (c) 2007-2008 Apple Inc. All rights reserved. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. The rights granted to you under the License + * may not be used to create, or enable the creation or redistribution of, + * unlawful or unlicensed copies of an Apple operating system, or to + * circumvent, violate, or enable the circumvention or violation of, any + * terms of an Apple operating system software license agreement. + * + * Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ + */ +#include +#include +#include +#include +#include +#if KERNEL + #include + #include + #include + #include +#else + #include + #include + #include + #include +#endif + +#define DEBUG_ASSERT_COMPONENT_NAME_STRING "kxld" +#include + +#include "kxld_util.h" + +#if !KERNEL +static void unswap_macho_32(u_char *file, enum NXByteOrder host_order, + enum NXByteOrder target_order); +static void unswap_macho_64(u_char *file, enum NXByteOrder host_order, + enum NXByteOrder target_order); +#endif /* !KERNEL */ + +#if DEBUG +static unsigned long num_allocations = 0; +static unsigned long num_frees = 0; +static unsigned long bytes_allocated = 0; +static unsigned long bytes_freed = 0; +#endif + +static KXLDLoggingCallback s_logging_callback = NULL; +static const char *s_callback_name = NULL; +static void *s_callback_data = NULL; + +/******************************************************************************* +*******************************************************************************/ +void +kxld_set_logging_callback(KXLDLoggingCallback logging_callback) +{ + s_logging_callback = logging_callback; +} + +/******************************************************************************* +*******************************************************************************/ +void +kxld_set_logging_callback_data(const char *name, void *user_data) +{ + s_callback_name = name; + s_callback_data = user_data; +} + +/******************************************************************************* +*******************************************************************************/ +void +kxld_log(KXLDLogSubsystem subsystem, KXLDLogLevel level, + const char *in_format, ...) +{ + char stack_buffer[256]; + char *alloc_buffer = NULL; + char *format = stack_buffer; + const char *name = (s_callback_name) ? s_callback_name : "internal"; + u_int length = 0; + va_list ap; + + if (s_logging_callback) { + + length = snprintf(stack_buffer, sizeof(stack_buffer), "kxld[%s]: %s", + name, in_format); + + if (length >= sizeof(stack_buffer)) { + length += 1; + alloc_buffer = kxld_alloc(length); + if (!alloc_buffer) return; + + snprintf(alloc_buffer, sizeof(alloc_buffer), "kxld[%s]: %s", + name, format); + format = alloc_buffer; + } + + va_start(ap, in_format); + s_logging_callback(subsystem, level, format, ap, s_callback_data); + va_end(ap); + + if (alloc_buffer) { + kxld_free(alloc_buffer, length); + } + } +} + +/* We'll use kalloc for any page-based allocations under this threshold, and + * kmem_alloc otherwise. + */ +#define KALLOC_MAX 16 * 1024 + +/******************************************************************************* +*******************************************************************************/ +void * +kxld_alloc(size_t size) +{ + void * ptr = NULL; + +#if KERNEL + ptr = kalloc(size); +#else + ptr = malloc(size); +#endif + +#if DEBUG + if (ptr) { + ++num_allocations; + bytes_allocated += size; + } +#endif + + return ptr; +} + +/******************************************************************************* +*******************************************************************************/ +void * +kxld_page_alloc_untracked(size_t size) +{ + void * ptr = NULL; +#if KERNEL + kern_return_t rval = 0; + vm_offset_t addr = 0; +#endif /* KERNEL */ + + size = round_page(size); + +#if KERNEL + if (size < KALLOC_MAX) { + ptr = kalloc(size); + } else { + rval = kmem_alloc(kernel_map, &addr, size); + if (!rval) ptr = (void *) addr; + } +#else /* !KERNEL */ + ptr = malloc(size); +#endif /* KERNEL */ + + return ptr; +} + +/******************************************************************************* +*******************************************************************************/ +void * +kxld_page_alloc(size_t size) +{ + void * ptr = NULL; + + ptr = kxld_page_alloc_untracked(size); +#if DEBUG + if (ptr) { + ++num_allocations; + bytes_allocated += round_page(size); + } +#endif /* DEBUG */ + + return ptr; +} + +/******************************************************************************* +*******************************************************************************/ +void * +kxld_alloc_pageable(size_t size) +{ + size = round_page(size); + +#if KERNEL + kern_return_t rval = 0; + vm_offset_t ptr = 0; + + rval = kmem_alloc_pageable(kernel_map, &ptr, size); + if (rval) ptr = 0; + + return (void *) ptr; +#else + return kxld_page_alloc_untracked(size); +#endif +} + +/******************************************************************************* +*******************************************************************************/ +void +kxld_free(void *ptr, size_t size __unused) +{ +#if DEBUG + ++num_frees; + bytes_freed += size; +#endif + +#if KERNEL + kfree(ptr, size); +#else + free(ptr); +#endif +} + +/******************************************************************************* +*******************************************************************************/ +void +kxld_page_free_untracked(void *ptr, size_t size __unused) +{ +#if KERNEL + size = round_page(size); + + if (size < KALLOC_MAX) { + kfree(ptr, size); + } else { + kmem_free(kernel_map, (vm_offset_t) ptr, size); + } +#else /* !KERNEL */ + free(ptr); +#endif /* KERNEL */ +} + + +/******************************************************************************* +*******************************************************************************/ +void +kxld_page_free(void *ptr, size_t size) +{ +#if DEBUG + ++num_frees; + bytes_freed += round_page(size); +#endif /* DEBUG */ + kxld_page_free_untracked(ptr, size); +} + +/******************************************************************************* +*******************************************************************************/ +kern_return_t +validate_and_swap_macho_32(u_char *file, u_long size +#if !KERNEL + , enum NXByteOrder host_order +#endif /* !KERNEL */ + ) +{ + kern_return_t rval = KERN_FAILURE; + struct mach_header *mach_hdr = (struct mach_header *) file; + struct load_command *load_hdr = NULL; + struct segment_command *seg_hdr = NULL; + struct section *sects = NULL; + struct relocation_info *relocs = NULL; + struct symtab_command *symtab_hdr = NULL; + struct nlist *symtab = NULL; + u_long offset = 0; + u_int cmd = 0; + u_int cmdsize = 0; + u_int i = 0; + u_int j = 0; +#if !KERNEL + boolean_t swap = FALSE; +#endif /* !KERNEL */ + + check(file); + check(size); + + /* Verify that the file is big enough for the mach header */ + require_action(size >= sizeof(*mach_hdr), finish, + rval=KERN_FAILURE; + kxld_log(kKxldLogLinking, kKxldLogErr, kKxldLogTruncatedMachO)); + offset = sizeof(*mach_hdr); + +#if !KERNEL + /* Swap the mach header if necessary */ + if (mach_hdr->magic == MH_CIGAM) { + swap = TRUE; + (void) swap_mach_header(mach_hdr, host_order); + } +#endif /* !KERNEL */ + + /* Validate the mach_header's magic number */ + require_action(mach_hdr->magic == MH_MAGIC, finish, + rval=KERN_FAILURE; + kxld_log(kKxldLogLinking, kKxldLogErr, kKxldLogMalformedMachO + "Invalid magic number: 0x%x.", mach_hdr->magic)); + + /* Validate and potentially swap the load commands */ + for(i = 0; i < mach_hdr->ncmds; ++i, offset += cmdsize) { + + /* Get the load command and size */ + load_hdr = (struct load_command *) (file + offset); + cmd = load_hdr->cmd; + cmdsize = load_hdr->cmdsize; + +#if !KERNEL + if (swap) { + cmd = OSSwapInt32(load_hdr->cmd); + cmdsize = OSSwapInt32(load_hdr->cmdsize); + } +#endif /* !KERNEL */ + + /* Verify that the file is big enough to contain the load command */ + require_action(size >= offset + cmdsize, finish, + rval=KERN_FAILURE; + kxld_log(kKxldLogLinking, kKxldLogErr, kKxldLogTruncatedMachO)); + + switch(cmd) { + case LC_SEGMENT: + /* Get and swap the segment header */ + seg_hdr = (struct segment_command *) load_hdr; +#if !KERNEL + if (swap) swap_segment_command(seg_hdr, host_order); +#endif /* !KERNEL */ + + /* Get and swap the section headers */ + sects = (struct section *) &seg_hdr[1]; +#if !KERNEL + if (swap) swap_section(sects, seg_hdr->nsects, host_order); +#endif /* !KERNEL */ + + /* Ignore segments with no vm size */ + if (!seg_hdr->vmsize) continue; + + /* Verify that the file is big enough for the segment data. */ + require_action(size >= seg_hdr->fileoff + seg_hdr->filesize, finish, + rval=KERN_FAILURE; + kxld_log(kKxldLogLinking, kKxldLogErr, kKxldLogTruncatedMachO)); + + for (j = 0; j < seg_hdr->nsects; ++j) { + + /* Verify that, if the section is not to be zero filled on + * demand, that file is big enough for the section's data. + */ + require_action((sects[j].flags & S_ZEROFILL) || + (size >= sects[j].offset + sects[j].size), finish, + rval=KERN_FAILURE; + kxld_log(kKxldLogLinking, kKxldLogErr, kKxldLogTruncatedMachO)); + + /* Verify that the file is big enough for the section's + * relocation entries. + */ + require_action(size >= + sects[j].reloff + sects[j].nreloc * sizeof(*relocs), finish, + rval=KERN_FAILURE; + kxld_log(kKxldLogLinking, kKxldLogErr, kKxldLogTruncatedMachO)); + + /* Swap the relocation entries */ + relocs = (struct relocation_info *) (file + sects[j].reloff); +#if !KERNEL + if (swap) { + swap_relocation_info(relocs, sects[j].nreloc, + host_order); + } +#endif /* !KERNEL */ + } + + break; + case LC_SYMTAB: + /* Get and swap the symtab header */ + symtab_hdr = (struct symtab_command *) load_hdr; +#if !KERNEL + if (swap) swap_symtab_command(symtab_hdr, host_order); +#endif /* !KERNEL */ + + /* Verify that the file is big enough for the symbol table */ + require_action(size >= + symtab_hdr->symoff + symtab_hdr->nsyms * sizeof(*symtab), finish, + rval=KERN_FAILURE; + kxld_log(kKxldLogLinking, kKxldLogErr, kKxldLogTruncatedMachO)); + + /* Verify that the file is big enough for the string table */ + require_action(size >= symtab_hdr->stroff + symtab_hdr->strsize, finish, + rval=KERN_FAILURE; + kxld_log(kKxldLogLinking, kKxldLogErr, kKxldLogTruncatedMachO)); + +#if !KERNEL + /* Swap the symbol table entries */ + symtab = (struct nlist *) (file + symtab_hdr->symoff); + if (swap) swap_nlist(symtab, symtab_hdr->nsyms, host_order); +#endif /* !KERNEL */ + + break; + default: +#if !KERNEL + /* Swap the load command */ + if (swap) swap_load_command(load_hdr, host_order); +#endif /* !KERNEL */ + break; + } + } + + rval = KERN_SUCCESS; + +finish: + return rval; +} + +/******************************************************************************* +*******************************************************************************/ +kern_return_t +validate_and_swap_macho_64(u_char *file, u_long size +#if !KERNEL + , enum NXByteOrder host_order +#endif /* !KERNEL */ + ) +{ + kern_return_t rval = KERN_FAILURE; + struct mach_header_64 *mach_hdr = (struct mach_header_64 *) file; + struct load_command *load_hdr = NULL; + struct segment_command_64 *seg_hdr = NULL; + struct section_64 *sects = NULL; + struct relocation_info *relocs = NULL; + struct symtab_command *symtab_hdr = NULL; + struct nlist_64 *symtab = NULL; + u_long offset = 0; + u_int cmd = 0; + u_int cmdsize = 0; + u_int i = 0; + u_int j = 0; +#if !KERNEL + boolean_t swap = FALSE; +#endif /* !KERNEL */ + + check(file); + check(size); + + /* Verify that the file is big enough for the mach header */ + require_action(size >= sizeof(*mach_hdr), finish, + rval=KERN_FAILURE; + kxld_log(kKxldLogLinking, kKxldLogErr, kKxldLogTruncatedMachO)); + offset = sizeof(*mach_hdr); + +#if !KERNEL + /* Swap the mach header if necessary */ + if (mach_hdr->magic == MH_CIGAM_64) { + swap = TRUE; + (void) swap_mach_header_64(mach_hdr, host_order); + } +#endif /* !KERNEL */ + + /* Validate the mach_header's magic number */ + require_action(mach_hdr->magic == MH_MAGIC_64, finish, + rval=KERN_FAILURE; + kxld_log(kKxldLogLinking, kKxldLogErr, kKxldLogMalformedMachO + "Invalid magic number: 0x%x.", mach_hdr->magic)); + + /* Validate and potentially swap the load commands */ + for(i = 0; i < mach_hdr->ncmds; ++i, offset += cmdsize) { + /* Get the load command and size */ + load_hdr = (struct load_command *) (file + offset); + cmd = load_hdr->cmd; + cmdsize = load_hdr->cmdsize; + +#if !KERNEL + if (swap) { + cmd = OSSwapInt32(load_hdr->cmd); + cmdsize = OSSwapInt32(load_hdr->cmdsize); + } +#endif /* !KERNEL */ + + /* Verify that the file is big enough to contain the load command */ + require_action(size >= offset + cmdsize, finish, + rval=KERN_FAILURE; + kxld_log(kKxldLogLinking, kKxldLogErr, kKxldLogTruncatedMachO)); + switch(cmd) { + case LC_SEGMENT_64: + /* Get and swap the segment header */ + seg_hdr = (struct segment_command_64 *) load_hdr; +#if !KERNEL + if (swap) swap_segment_command_64(seg_hdr, host_order); +#endif /* !KERNEL */ + + /* Get and swap the section headers */ + sects = (struct section_64 *) &seg_hdr[1]; +#if !KERNEL + if (swap) swap_section_64(sects, seg_hdr->nsects, host_order); +#endif /* !KERNEL */ + + /* If the segment has no vm footprint, skip it */ + if (!seg_hdr->vmsize) continue; + + /* Verify that the file is big enough for the segment data. */ + require_action(size >= seg_hdr->fileoff + seg_hdr->filesize, finish, + rval=KERN_FAILURE; + kxld_log(kKxldLogLinking, kKxldLogErr, kKxldLogTruncatedMachO)); + + for (j = 0; j < seg_hdr->nsects; ++j) { + + /* Verify that, if the section is not to be zero filled on + * demand, that file is big enough for the section's data. + */ + require_action((sects[j].flags & S_ZEROFILL) || + (size >= sects[j].offset + sects[j].size), finish, + rval=KERN_FAILURE; + kxld_log(kKxldLogLinking, kKxldLogErr, kKxldLogTruncatedMachO)); + + /* Verify that the file is big enough for the section's + * relocation entries. + */ + require_action(size >= + sects[j].reloff + sects[j].nreloc * sizeof(*relocs), finish, + rval=KERN_FAILURE; + kxld_log(kKxldLogLinking, kKxldLogErr, kKxldLogTruncatedMachO)); + + /* Swap the relocation entries */ + relocs = (struct relocation_info *) (file + sects[j].reloff); +#if !KERNEL + if (swap) { + swap_relocation_info(relocs, sects[j].nreloc, + host_order); + } +#endif /* !KERNEL */ + } + + break; + case LC_SYMTAB: + /* Get and swap the symtab header */ + symtab_hdr = (struct symtab_command *) load_hdr; +#if !KERNEL + if (swap) swap_symtab_command(symtab_hdr, host_order); +#endif /* !KERNEL */ + + /* Verify that the file is big enough for the symbol table */ + require_action(size >= + symtab_hdr->symoff + symtab_hdr->nsyms * sizeof(*symtab), finish, + rval=KERN_FAILURE; + kxld_log(kKxldLogLinking, kKxldLogErr, kKxldLogTruncatedMachO)); + + /* Verify that the file is big enough for the string table */ + require_action(size >= symtab_hdr->stroff + symtab_hdr->strsize, finish, + rval=KERN_FAILURE; + kxld_log(kKxldLogLinking, kKxldLogErr, kKxldLogTruncatedMachO)); + +#if !KERNEL + /* Swap the symbol table entries */ + symtab = (struct nlist_64 *) (file + symtab_hdr->symoff); + if (swap) swap_nlist_64(symtab, symtab_hdr->nsyms, host_order); +#endif /* !KERNEL */ + + break; + default: +#if !KERNEL + /* Swap the load command */ + if (swap) swap_load_command(load_hdr, host_order); +#endif /* !KERNEL */ + break; + } + } + + rval = KERN_SUCCESS; + +finish: + return rval; +} + +#if !KERNEL +/******************************************************************************* +*******************************************************************************/ +void unswap_macho(u_char *file, enum NXByteOrder host_order, + enum NXByteOrder target_order) +{ + struct mach_header *hdr = (struct mach_header *) file; + + if (!hdr) return; + + if (hdr->magic == MH_MAGIC) { + unswap_macho_32(file, host_order, target_order); + } else if (hdr->magic == MH_MAGIC_64) { + unswap_macho_64(file, host_order, target_order); + } +} + +/******************************************************************************* +*******************************************************************************/ +static void +unswap_macho_32(u_char *file, enum NXByteOrder host_order, + enum NXByteOrder target_order) +{ + struct mach_header *mach_hdr = (struct mach_header *) file; + struct load_command *load_hdr = NULL; + struct segment_command *seg_hdr = NULL; + struct section *sects = NULL; + struct symtab_command *symtab_hdr = NULL; + struct nlist *symtab = NULL; + u_long offset = 0; + u_int cmd = 0; + u_int size = 0; + u_int i = 0; + + check(file); + + if (target_order == host_order) return; + + offset = sizeof(*mach_hdr); + for(i = 0; i < mach_hdr->ncmds; ++i, offset += size) { + load_hdr = (struct load_command *) (file + offset); + cmd = load_hdr->cmd; + size = load_hdr->cmdsize; + + switch(cmd) { + case LC_SEGMENT: + seg_hdr = (struct segment_command *) load_hdr; + sects = (struct section *) &seg_hdr[1]; + + /* We don't need to unswap relocations because this function is + * called when linking is completed (so there are no relocations). + */ + + swap_section(sects, seg_hdr->nsects, target_order); + swap_segment_command(seg_hdr, target_order); + break; + case LC_SYMTAB: + symtab_hdr = (struct symtab_command *) load_hdr; + symtab = (struct nlist*) (file + symtab_hdr->symoff); + + swap_nlist(symtab, symtab_hdr->nsyms, target_order); + swap_symtab_command(symtab_hdr, target_order); + + break; + default: + swap_load_command(load_hdr, target_order); + break; + } + } + + (void) swap_mach_header(mach_hdr, target_order); +} + +/******************************************************************************* +*******************************************************************************/ +static void +unswap_macho_64(u_char *file, enum NXByteOrder host_order, + enum NXByteOrder target_order) +{ + struct mach_header_64 *mach_hdr = (struct mach_header_64 *) file; + struct load_command *load_hdr = NULL; + struct segment_command_64 *seg_hdr = NULL; + struct section_64 *sects = NULL; + struct symtab_command *symtab_hdr = NULL; + struct nlist_64 *symtab = NULL; + u_long offset = 0; + u_int cmd = 0; + u_int size = 0; + u_int i = 0; + + check(file); + + if (target_order == host_order) return; + + offset = sizeof(*mach_hdr); + for(i = 0; i < mach_hdr->ncmds; ++i, offset += size) { + load_hdr = (struct load_command *) (file + offset); + cmd = load_hdr->cmd; + size = load_hdr->cmdsize; + + switch(cmd) { + case LC_SEGMENT_64: + seg_hdr = (struct segment_command_64 *) load_hdr; + sects = (struct section_64 *) &seg_hdr[1]; + + /* We don't need to unswap relocations because this function is + * called when linking is completed (so there are no relocations). + */ + + swap_section_64(sects, seg_hdr->nsects, target_order); + swap_segment_command_64(seg_hdr, target_order); + break; + case LC_SYMTAB: + symtab_hdr = (struct symtab_command *) load_hdr; + symtab = (struct nlist_64 *) (file + symtab_hdr->symoff); + + swap_nlist_64(symtab, symtab_hdr->nsyms, target_order); + swap_symtab_command(symtab_hdr, target_order); + + break; + default: + swap_load_command(load_hdr, target_order); + break; + } + } + + (void) swap_mach_header_64(mach_hdr, target_order); +} +#endif /* !KERNEL */ + +/******************************************************************************* +*******************************************************************************/ +kxld_addr_t +kxld_align_address(kxld_addr_t address, u_int align) +{ + kxld_addr_t alignment = (1 << align); + kxld_addr_t low_bits = 0; + + low_bits = (address) & (alignment - 1); + if (low_bits) { + address += (alignment - low_bits); + } + + return address; +} + +/******************************************************************************* +*******************************************************************************/ +boolean_t +kxld_is_32_bit(cpu_type_t cputype) +{ + return !(cputype & CPU_ARCH_ABI64); +} + +/******************************************************************************* +* Borrowed (and slightly modified) the libc implementation for the kernel +* until the kernel has a supported strstr(). +* Find the first occurrence of find in s. +*******************************************************************************/ +const char * +kxld_strstr(s, find) + const char *s, *find; +{ +#if KERNEL + char c, sc; + size_t len; + + if ((c = *find++) != 0) { + len = strlen(find); + do { + do { + if ((sc = *s++) == 0) + return (NULL); + } while (sc != c); + } while (strncmp(s, find, len) != 0); + s--; + } + return s; +#else + return strstr(s, find); +#endif /* KERNEL */ +} + +/******************************************************************************* +*******************************************************************************/ +void +kxld_print_memory_report(void) +{ +#if DEBUG + kxld_log(kKxldLogLinking, kKxldLogExplicit, "kxld memory usage report:\n" + "\tNumber of allocations: %8lu\n" + "\tNumber of frees: %8lu\n" + "\tAverage allocation size: %8lu\n" + "\tTotal bytes allocated: %8lu\n" + "\tTotal bytes freed: %8lu\n" + "\tTotal bytes leaked: %8lu", + num_allocations, num_frees, bytes_allocated / num_allocations, + bytes_allocated, bytes_freed, bytes_allocated - bytes_freed); +#endif +} + diff --git a/libkern/kxld/kxld_util.h b/libkern/kxld/kxld_util.h new file mode 100644 index 000000000..9c387e670 --- /dev/null +++ b/libkern/kxld/kxld_util.h @@ -0,0 +1,201 @@ +/* + * Copyright (c) 2007-2008 Apple Inc. All rights reserved. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. The rights granted to you under the License + * may not be used to create, or enable the creation or redistribution of, + * unlawful or unlicensed copies of an Apple operating system, or to + * circumvent, violate, or enable the circumvention or violation of, any + * terms of an Apple operating system software license agreement. + * + * Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ + */ +#ifndef _KXLD_UTIL_H_ +#define _KXLD_UTIL_H_ + +#include +#include +#if KERNEL + #include +#else + #include + #include "kxld_types.h" +#endif + +/* 64-bit helpers */ +#if !defined(KERNEL) + + #define KXLD_3264_FUNC(cond32, rval, func32, func64, ...) \ + do { \ + if (cond32) { \ + (rval) = (func32)(__VA_ARGS__); \ + } else { \ + (rval) = (func64)(__VA_ARGS__); \ + } \ + } while(0) + +#elif defined(__LP64__) + + #define KXLD_3264_FUNC(cond32, rval, func32, func64, ...) \ + do { \ + (rval) = (func64)(__VA_ARGS__); \ + } while(0) + +#else + + #define KXLD_3264_FUNC(cond32, rval, func32, func64, ...) \ + do { \ + (rval) = (func32)(__VA_ARGS__); \ + } while(0) \ + +#endif + +/* Misc string functions */ +#define streq(str1, str2) (((str1) && (str2)) ? !strcmp((str1), (str2)) : 0) +#define streq_safe(str1, str2, len) (((str1) && (str2)) ? \ + !strncmp((str1), (str2), (len)) : 0) +#define const_strlen(str) (sizeof(str) - 1) + +#define const_array_len(array) sizeof(array) / sizeof(*array) + +/* Timing */ +#define DECL_TIMER() struct timeval start, end; +#define START_TIMER() gettimeofday(&start, NULL); +#define END_TIMER() gettimeofday(&end, NULL); +#define PRINT_TIMER(msg) kxld_log("%s: %ds, %dus\n", (msg), \ + (end.tv_sec - start.tv_sec), (end.tv_usec - start.tv_usec)); + +/* Misc definitions */ +#define KXLD_MAX_NAME_LEN 256 +#define KXLD_SEG_GOT "__DATA" +#define KXLD_SECT_GOT "__kxld_got" +#define KXLD_KMOD_INFO_SYMBOL "_kmod_info" +#define KXLD_WEAK_TEST_SYMBOL "_gOSKextUnresolved" +#define KXLD_OPERATOR_NEW_SYMBOL "__Znwm" +#define KXLD_OPERATOR_NEW_ARRAY_SYMBOL "__Znam" +#define KXLD_OPERATOR_DELETE_SYMBOL "__ZdlPv" +#define KXLD_OPERATOR_DELETE_ARRAY_SYMBOL "__ZdaPv" + +struct kxld_section_name { + char segname[16]; + char sectname[16]; +}; +typedef struct kxld_section_name KXLDSectionName; + +/******************************************************************************* +* Logging +*******************************************************************************/ + +void kxld_set_logging_callback(KXLDLoggingCallback logging_callback) + __attribute__((visibility("hidden"))); + +void kxld_set_logging_callback_data(const char * name, void *user_data) + __attribute__((visibility("hidden"))); + +void kxld_log(KXLDLogSubsystem subsystem, KXLDLogLevel level, + const char *format, ...) + __attribute__((visibility("hidden"), format(printf, 3, 4))); + +/* Common logging strings */ +#define kKxldLogArchNotSupported "The target architecture (cputype 0x%x) is not supported by kxld." +#define kKxldLogArchNotFound "The kext does not contain a fat slice for the target architecture." +#define kKxldLogFiletypeNotSupported "The Mach-O filetype 0x%x is not supported on the target architecture." +#define kKxldLogTruncatedMachO "The Mach-O file has been truncated. Make sure the Mach-O header structures are correct." +#define kKxldLogMalformedMachO "The Mach-O file is malformed: " +#define kKxldLogMalformedVTable "The vtable %s is malformed. Make sure your kext has been built against the correct headers." +#define kKxldLogMissingVtable "Cannot find the vtable %s for class %s. This vtable symbol is required for binary compatibility, and it may have been stripped." +#define kKxldLogParentOutOfDate "The super class vtable %s for vtable %s is out of date. Make sure your kext has been built against the correct headers." +#define kKxldLogNoKmodInfo "The kext is missing its kmod_info structure." +#define kKxldLogInvalidSectReloc "Relocation entry %u from section %s,%s cannot be processed." +#define kKxldLogInvalidExtReloc "External relocation entry %u cannot be processed." +#define kKxldLogInvalidIntReloc "Internal relocation entry %u cannot be processed." +#define kKxldLogRelocationOverflow "A relocation entry has overflowed. The kext may be too far from one " \ + "of its dependencies. Check your kext's load address." + +/******************************************************************************* +* Allocators +*******************************************************************************/ + +void * kxld_alloc(size_t size) + __attribute__((malloc, visibility("hidden"))); + +void * kxld_page_alloc(size_t size) + __attribute__((malloc, visibility("hidden"))); + +void * kxld_page_alloc_untracked(size_t size) + __attribute__((malloc, visibility("hidden"))); + +void * kxld_alloc_pageable(size_t size) + __attribute__((malloc, visibility("hidden"))); + +/******************************************************************************* +* Deallocators +*******************************************************************************/ + +void kxld_free(void *ptr, size_t size) + __attribute__((visibility("hidden"))); + +void kxld_page_free(void *ptr, size_t size) + __attribute__((visibility("hidden"))); + +void kxld_page_free_untracked(void *ptr, size_t size) + __attribute__((visibility("hidden"))); + +/******************************************************************************* +* Mach-O Functions +*******************************************************************************/ + +kern_return_t validate_and_swap_macho_32(u_char *file, u_long size +#if !KERNEL + , enum NXByteOrder host_order +#endif /* !KERNEL */ + ) __attribute__((visibility("hidden"))); + +kern_return_t validate_and_swap_macho_64(u_char *file, u_long size +#if !KERNEL + , enum NXByteOrder host_order +#endif /* !KERNEL */ + ) __attribute__((visibility("hidden"))); + +#if !KERNEL +void unswap_macho(u_char *file, enum NXByteOrder host_order, + enum NXByteOrder target_order) + __attribute__((visibility("hidden"))); +#endif /* !KERNEL */ + +/******************************************************************************* +* Miscellaneous +*******************************************************************************/ + +kxld_addr_t kxld_align_address(kxld_addr_t address, u_int align) + __attribute__((const, nonnull, visibility("hidden"))); + +boolean_t kxld_is_32_bit(cpu_type_t) + __attribute__((const, nonnull, visibility("hidden"))); + +const char * kxld_strstr(const char *s, const char *find) + __attribute__((pure, nonnull, visibility("hidden"))); + +/******************************************************************************* +* Debugging +*******************************************************************************/ + +void kxld_print_memory_report(void) + __attribute__((visibility("hidden"))); + +#endif /* _KXLD_UTIL_H_ */ diff --git a/libkern/kxld/kxld_uuid.c b/libkern/kxld/kxld_uuid.c new file mode 100644 index 000000000..ce64c343e --- /dev/null +++ b/libkern/kxld/kxld_uuid.c @@ -0,0 +1,93 @@ +/* + * Copyright (c) 2008 Apple Inc. All rights reserved. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. The rights granted to you under the License + * may not be used to create, or enable the creation or redistribution of, + * unlawful or unlicensed copies of an Apple operating system, or to + * circumvent, violate, or enable the circumvention or violation of, any + * terms of an Apple operating system software license agreement. + * + * Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ + */ +#include +#include +#include + +#define DEBUG_ASSERT_COMPONENT_NAME_STRING "kxld" +#include + +#include "kxld_util.h" +#include "kxld_uuid.h" + +/******************************************************************************* +*******************************************************************************/ +void +kxld_uuid_init_from_macho(KXLDuuid *uuid, struct uuid_command *src) +{ + check(uuid); + check(src); + + memcpy(uuid->uuid, src->uuid, sizeof(uuid->uuid)); + uuid->has_uuid = TRUE; +} + +/******************************************************************************* +*******************************************************************************/ +void +kxld_uuid_clear(KXLDuuid *uuid) +{ + bzero(uuid, sizeof(*uuid)); +} + +/******************************************************************************* +*******************************************************************************/ +u_long +kxld_uuid_get_macho_header_size(void) +{ + return sizeof(struct uuid_command); +} + +/******************************************************************************* +*******************************************************************************/ +kern_return_t +kxld_uuid_export_macho(const KXLDuuid *uuid, u_char *buf, + u_long *header_offset, u_long header_size) +{ + kern_return_t rval = KERN_FAILURE; + struct uuid_command *uuidhdr = NULL; + + check(uuid); + check(buf); + check(header_offset); + + require_action(sizeof(*uuidhdr) <= header_size - *header_offset, finish, + rval=KERN_FAILURE); + uuidhdr = (struct uuid_command *) (buf + *header_offset); + *header_offset += sizeof(*uuidhdr); + + uuidhdr->cmd = LC_UUID; + uuidhdr->cmdsize = (uint32_t) sizeof(*uuidhdr); + memcpy(uuidhdr->uuid, uuid->uuid, sizeof(uuidhdr->uuid)); + + rval = KERN_SUCCESS; + +finish: + return rval; +} + diff --git a/libsa/kld_patch.h b/libkern/kxld/kxld_uuid.h similarity index 52% rename from libsa/kld_patch.h rename to libkern/kxld/kxld_uuid.h index 13cb363f6..2a180a59c 100644 --- a/libsa/kld_patch.h +++ b/libkern/kxld/kxld_uuid.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2001 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2008 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -25,36 +25,45 @@ * * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ */ -/* - * History: - * 2001-05-30 gvdl Initial implementation of the vtable patcher. - */ - -#include - -__BEGIN_DECLS +#ifndef _KXLD_UUID_H_ +#define _KXLD_UUID_H_ +#include #if KERNEL -extern Boolean kld_file_map(const char *pathName, - unsigned char *map, size_t mapSize, - Boolean isKmem); + #include #else -extern Boolean kld_file_map(const char *pathName); + #include "kxld_types.h" +#endif + +struct uuid_command; +typedef struct kxld_uuid KXLDuuid; + +struct kxld_uuid { + u_char uuid[16]; + boolean_t has_uuid; +}; -Boolean kld_file_debug_dump(const char *pathName, const char *outName); -#endif /* KERNEL */ +/******************************************************************************* +* Constructors and destructors +*******************************************************************************/ -extern void * - kld_file_lookupsymbol(const char *pathName, const char *symbolname); +void kxld_uuid_init_from_macho(KXLDuuid *uuid, struct uuid_command *src) + __attribute__((nonnull, visibility("hidden"))); -extern void *kld_file_getaddr(const char *pathName, unsigned long *size); +void kxld_uuid_clear(KXLDuuid *uuid) + __attribute__((nonnull, visibility("hidden"))); -extern Boolean kld_file_merge_OSObjects(const char *pathName); +/******************************************************************************* +* Accessors +*******************************************************************************/ -extern Boolean kld_file_patch_OSObjects(const char *pathName); +u_long kxld_uuid_get_macho_header_size(void) + __attribute__((pure, visibility("hidden"))); -extern Boolean kld_file_prepare_for_link(void); +kern_return_t +kxld_uuid_export_macho(const KXLDuuid *uuid, u_char *buf, + u_long *header_offset, u_long header_size) + __attribute__((pure, nonnull, visibility("hidden"))); -extern void kld_file_cleanup_all_resources(void); +#endif /* _KXLD_UUID_H_ */ -__END_DECLS diff --git a/libkern/kxld/kxld_vtable.c b/libkern/kxld/kxld_vtable.c new file mode 100644 index 000000000..78e647e6b --- /dev/null +++ b/libkern/kxld/kxld_vtable.c @@ -0,0 +1,784 @@ +/* + * Copyright (c) 2008 Apple Inc. All rights reserved. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. The rights granted to you under the License + * may not be used to create, or enable the creation or redistribution of, + * unlawful or unlicensed copies of an Apple operating system, or to + * circumvent, violate, or enable the circumvention or violation of, any + * terms of an Apple operating system software license agreement. + * + * Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ + */ +#include +#include +#include + +#define DEBUG_ASSERT_COMPONENT_NAME_STRING "kxld" +#include + +#include "kxld_reloc.h" +#include "kxld_sect.h" +#include "kxld_state.h" +#include "kxld_sym.h" +#include "kxld_symtab.h" +#include "kxld_util.h" +#include "kxld_vtable.h" + +#define VTABLE_ENTRY_SIZE_32 4 +#define VTABLE_HEADER_LEN_32 2 +#define VTABLE_HEADER_SIZE_32 (VTABLE_HEADER_LEN_32 * VTABLE_ENTRY_SIZE_32) + +#define VTABLE_ENTRY_SIZE_64 8 +#define VTABLE_HEADER_LEN_64 2 +#define VTABLE_HEADER_SIZE_64 (VTABLE_HEADER_LEN_64 * VTABLE_ENTRY_SIZE_64) + +static kern_return_t init_by_relocs(KXLDVTable *vtable, const KXLDSym *sym, + const KXLDSect *sect, const KXLDSymtab *symtab, + const KXLDRelocator *relocator); + +static kern_return_t init_by_entries_and_relocs(KXLDVTable *vtable, + const KXLDSym *sym, const KXLDSymtab *symtab, + const KXLDRelocator *relocator, const KXLDArray *relocs); + +static kxld_addr_t get_entry_value(u_char *entry, const KXLDRelocator *relocator) + __attribute__((pure)); +#if !KERNEL +static kxld_addr_t swap_entry_value(kxld_addr_t entry_value, + const KXLDRelocator *relocator) __attribute__((const)); +#endif /* !KERNEL */ +static kern_return_t init_by_entries(KXLDVTable *vtable, const KXLDSymtab *symtab, + const KXLDRelocator *relocator); + +/******************************************************************************* +*******************************************************************************/ +kern_return_t +kxld_vtable_init_from_kernel_macho(KXLDVTable *vtable, const KXLDSym *sym, + const KXLDSect *sect, const KXLDSymtab *symtab, + const KXLDRelocator *relocator) +{ + kern_return_t rval = KERN_FAILURE; + + check(vtable); + check(sym); + check(sect); + check(symtab); + + vtable->name = sym->name; + vtable->vtable = sect->data + kxld_sym_get_section_offset(sym, sect); + vtable->is_patched = FALSE; + + require_action(kxld_sect_get_num_relocs(sect) == 0, finish, + rval=KERN_FAILURE; + kxld_log(kKxldLogPatching, kKxldLogErr, + kKxldLogMalformedVTable, vtable->name)); + + rval = init_by_entries(vtable, symtab, relocator); + require_noerr(rval, finish); + + vtable->is_patched = TRUE; + + rval = KERN_SUCCESS; + +finish: + + if (rval) kxld_vtable_deinit(vtable); + + return rval; +} + +/******************************************************************************* +*******************************************************************************/ +kern_return_t +kxld_vtable_init_from_object_macho(KXLDVTable *vtable, const KXLDSym *sym, + const KXLDSect *sect, const KXLDSymtab *symtab, + const KXLDRelocator *relocator) +{ + kern_return_t rval = KERN_FAILURE; + + check(vtable); + check(sym); + check(sect); + check(symtab); + + vtable->name = sym->name; + vtable->vtable = sect->data + kxld_sym_get_section_offset(sym, sect); + vtable->is_patched = FALSE; + + require_action(kxld_sect_get_num_relocs(sect) > 0, finish, + rval=KERN_FAILURE; + kxld_log(kKxldLogPatching, kKxldLogErr, + kKxldLogMalformedVTable, vtable->name)); + + rval = init_by_relocs(vtable, sym, sect, symtab, relocator); + require_noerr(rval, finish); + + rval = KERN_SUCCESS; + +finish: + + if (rval) kxld_vtable_deinit(vtable); + + return rval; +} + +/******************************************************************************* +*******************************************************************************/ +kern_return_t +kxld_vtable_init_from_final_macho(KXLDVTable *vtable, const KXLDSym *sym, + const KXLDSect *sect, const KXLDSymtab *symtab, + const KXLDRelocator *relocator, const KXLDArray *relocs) +{ + kern_return_t rval = KERN_FAILURE; + + check(vtable); + check(sym); + check(sect); + check(symtab); + + vtable->name = sym->name; + vtable->vtable = sect->data + kxld_sym_get_section_offset(sym, sect); + vtable->is_patched = FALSE; + + require_action(kxld_sect_get_num_relocs(sect) == 0, finish, + rval=KERN_FAILURE; + kxld_log(kKxldLogPatching, kKxldLogErr, + kKxldLogMalformedVTable, vtable->name)); + + rval = init_by_entries_and_relocs(vtable, sym, symtab, + relocator, relocs); + require_noerr(rval, finish); + + rval = KERN_SUCCESS; + +finish: + if (rval) kxld_vtable_deinit(vtable); + + return rval; +} + +#if KXLD_USER_OR_ILP32 +/******************************************************************************* +*******************************************************************************/ +kern_return_t +kxld_vtable_init_from_link_state_32(KXLDVTable *vtable, u_char *file, + KXLDVTableHdr *hdr) +{ + kern_return_t rval = KERN_FAILURE; + KXLDSymEntry32 *sym = NULL; + KXLDVTableEntry *entry = NULL; + u_int i = 0; + + check(vtable); + check(file); + check(hdr); + + vtable->name = (char *) (file + hdr->nameoff); + vtable->is_patched = TRUE; + + rval = kxld_array_init(&vtable->entries, sizeof(KXLDVTableEntry), + hdr->nentries); + require_noerr(rval, finish); + + sym = (KXLDSymEntry32 *) (file + hdr->vtableoff); + for (i = 0; i < vtable->entries.nitems; ++i, ++sym) { + entry = kxld_array_get_item(&vtable->entries, i); + entry->patched.name = (char *) (file + sym->nameoff); + entry->patched.addr = sym->addr; + } + + rval = KERN_SUCCESS; + +finish: + return rval; +} +#endif /* KXLD_USER_OR_ILP32 */ + +#if KXLD_USER_OR_LP64 +/******************************************************************************* +*******************************************************************************/ +kern_return_t +kxld_vtable_init_from_link_state_64(KXLDVTable *vtable, u_char *file, + KXLDVTableHdr *hdr) +{ + kern_return_t rval = KERN_FAILURE; + KXLDSymEntry64 *sym = NULL; + KXLDVTableEntry *entry = NULL; + u_int i = 0; + + check(vtable); + check(file); + check(hdr); + + vtable->name = (char *) (file + hdr->nameoff); + vtable->is_patched = TRUE; + + rval = kxld_array_init(&vtable->entries, sizeof(KXLDVTableEntry), + hdr->nentries); + require_noerr(rval, finish); + + sym = (KXLDSymEntry64 *) (file + hdr->vtableoff); + for (i = 0; i < vtable->entries.nitems; ++i, ++sym) { + entry = kxld_array_get_item(&vtable->entries, i); + entry->patched.name = (char *) (file + sym->nameoff); + entry->patched.addr = sym->addr; + } + + rval = KERN_SUCCESS; + +finish: + return rval; +} +#endif /* KXLD_USER_OR_LP64 */ + +/******************************************************************************* +*******************************************************************************/ +kern_return_t +kxld_vtable_copy(KXLDVTable *vtable, const KXLDVTable *src) +{ + kern_return_t rval = KERN_FAILURE; + + check(vtable); + check(src); + + vtable->vtable = src->vtable; + vtable->name = src->name; + vtable->is_patched = src->is_patched; + + rval = kxld_array_copy(&vtable->entries, &src->entries); + require_noerr(rval, finish); + + rval = KERN_SUCCESS; + +finish: + return rval; +} + +/******************************************************************************* +* Initializes a vtable object by matching up relocation entries to the vtable's +* entries and finding the corresponding symbols. +*******************************************************************************/ +static kern_return_t +init_by_relocs(KXLDVTable *vtable, const KXLDSym *sym, const KXLDSect *sect, + const KXLDSymtab *symtab, const KXLDRelocator *relocator) +{ + kern_return_t rval = KERN_FAILURE; + KXLDReloc *reloc = NULL; + KXLDVTableEntry *entry = NULL; + KXLDSym *tmpsym = NULL; + kxld_addr_t vtable_base_offset = 0; + kxld_addr_t entry_offset = 0; + u_int i = 0; + u_int nentries = 0; + u_int vtable_entry_size = 0; + u_int base_reloc_index = 0; + u_int reloc_index = 0; + + check(vtable); + check(sym); + check(sect); + check(symtab); + check(relocator); + + /* Find the first entry past the vtable padding */ + + vtable_base_offset = kxld_sym_get_section_offset(sym, sect); + if (relocator->is_32_bit) { + vtable_entry_size = VTABLE_ENTRY_SIZE_32; + vtable_base_offset += VTABLE_HEADER_SIZE_32; + } else { + vtable_entry_size = VTABLE_ENTRY_SIZE_64; + vtable_base_offset += VTABLE_HEADER_SIZE_64; + } + + /* Find the relocation entry at the start of the vtable */ + + rval = kxld_reloc_get_reloc_index_by_offset(§->relocs, + vtable_base_offset, &base_reloc_index); + require_noerr(rval, finish); + + /* Count the number of consecutive relocation entries to find the number of + * vtable entries. For some reason, the __TEXT,__const relocations are + * sorted in descending order, so we have to walk backwards. Also, make + * sure we don't run off the end of the section's relocs. + */ + + reloc_index = base_reloc_index; + entry_offset = vtable_base_offset; + reloc = kxld_array_get_item(§->relocs, reloc_index); + while (reloc->address == entry_offset) { + ++nentries; + if (!reloc_index) break; + + --reloc_index; + + reloc = kxld_array_get_item(§->relocs, reloc_index); + entry_offset += vtable_entry_size; + } + + /* Allocate the symbol index */ + + rval = kxld_array_init(&vtable->entries, sizeof(KXLDVTableEntry), nentries); + require_noerr(rval, finish); + + /* Find the symbols for each vtable entry */ + + for (i = 0; i < vtable->entries.nitems; ++i) { + reloc = kxld_array_get_item(§->relocs, base_reloc_index - i); + entry = kxld_array_get_item(&vtable->entries, i); + + /* If we can't find a symbol, it means it is a locally-defined, + * non-external symbol that has been stripped. We don't patch over + * locally-defined symbols, so we leave the symbol as NULL and just + * skip it. We won't be able to patch subclasses with this symbol, + * but there isn't much we can do about that. + */ + tmpsym = kxld_reloc_get_symbol(relocator, reloc, sect->data, symtab); + + entry->unpatched.sym = tmpsym; + entry->unpatched.reloc = reloc; + } + + rval = KERN_SUCCESS; +finish: + return rval; +} + +/******************************************************************************* +*******************************************************************************/ +static kxld_addr_t +get_entry_value(u_char *entry, const KXLDRelocator *relocator) +{ + kxld_addr_t entry_value; + + if (relocator->is_32_bit) { + entry_value = *(uint32_t *)entry; + } else { + entry_value = *(uint64_t *)entry; + } + + return entry_value; +} + +#if !KERNEL +/******************************************************************************* +*******************************************************************************/ +static kxld_addr_t +swap_entry_value(kxld_addr_t entry_value, const KXLDRelocator *relocator) +{ + if (relocator->is_32_bit) { + entry_value = OSSwapInt32((uint32_t) entry_value); + } else { + entry_value = OSSwapInt64((uint64_t) entry_value); + } + + return entry_value; +} +#endif /* KERNEL */ + +/******************************************************************************* +* Initializes a vtable object by reading the symbol values out of the vtable +* entries and performing reverse symbol lookups on those values. +*******************************************************************************/ +static kern_return_t +init_by_entries(KXLDVTable *vtable, const KXLDSymtab *symtab, + const KXLDRelocator *relocator) +{ + kern_return_t rval = KERN_FAILURE; + KXLDVTableEntry *tmpentry = NULL; + KXLDSym *sym = NULL; + u_char *base_entry = NULL; + u_char *entry = NULL; + kxld_addr_t entry_value = 0; + u_int vtable_entry_size = 0; + u_int vtable_header_size = 0; + u_int nentries = 0; + u_int i = 0; + + if (relocator->is_32_bit) { + vtable_entry_size = VTABLE_ENTRY_SIZE_32; + vtable_header_size = VTABLE_HEADER_SIZE_32; + } else { + vtable_entry_size = VTABLE_ENTRY_SIZE_64; + vtable_header_size = VTABLE_HEADER_SIZE_64; + } + + base_entry = vtable->vtable + vtable_header_size; + + /* Count the number of entries (the vtable is null-terminated) */ + + entry = base_entry; + entry_value = get_entry_value(entry, relocator); + while (entry_value) { + ++nentries; + entry += vtable_entry_size; + entry_value = get_entry_value(entry, relocator); + } + + /* Allocate the symbol index */ + + rval = kxld_array_init(&vtable->entries, sizeof(KXLDVTableEntry), nentries); + require_noerr(rval, finish); + + /* Look up the symbols for each entry */ + + entry = base_entry; + rval = KERN_SUCCESS; + for (i = 0; i < vtable->entries.nitems; ++i) { + entry = base_entry + (i * vtable_entry_size); + entry_value = get_entry_value(entry, relocator); + +#if !KERNEL + if (relocator->swap) { + entry_value = swap_entry_value(entry_value, relocator); + } +#endif /* !KERNEL */ + + /* If we can't find the symbol, it means that the virtual function was + * defined inline. There's not much I can do about this; it just means + * I can't patch this function. + */ + tmpentry = kxld_array_get_item(&vtable->entries, i); + sym = kxld_symtab_get_cxx_symbol_by_value(symtab, entry_value); + + if (sym) { + tmpentry->patched.name = sym->name; + tmpentry->patched.addr = sym->link_addr; + } else { + tmpentry->patched.name = NULL; + tmpentry->patched.addr = 0; + } + } + + rval = KERN_SUCCESS; + +finish: + return rval; +} + +/******************************************************************************* +* Initializes vtables by performing a reverse lookup on symbol values when +* they exist in the vtable entry, and by looking through a matching relocation +* entry when the vtable entry is NULL. +* +* Final linked images require this hybrid vtable initialization approach +* because they are already internally resolved. This means that the vtables +* contain valid entries to local symbols, but still have relocation entries for +* external symbols. +*******************************************************************************/ +static kern_return_t +init_by_entries_and_relocs(KXLDVTable *vtable, const KXLDSym *sym, + const KXLDSymtab *symtab, const KXLDRelocator *relocator, + const KXLDArray *relocs) +{ + kern_return_t rval = KERN_FAILURE; + KXLDReloc *reloc = NULL; + KXLDVTableEntry *tmpentry = NULL; + KXLDSym *tmpsym = NULL; + u_int vtable_entry_size = 0; + u_int vtable_header_size = 0; + u_char *base_entry = NULL; + u_char *entry = NULL; + kxld_addr_t entry_value = 0; + kxld_addr_t base_entry_offset = 0; + kxld_addr_t entry_offset = 0; + u_int nentries = 0; + u_int i = 0; + + check(vtable); + check(sym); + check(symtab); + check(relocs); + + /* Find the first entry and its offset past the vtable padding */ + + if (relocator->is_32_bit) { + vtable_entry_size = VTABLE_ENTRY_SIZE_32; + vtable_header_size = VTABLE_HEADER_SIZE_32; + } else { + vtable_entry_size = VTABLE_ENTRY_SIZE_64; + vtable_header_size = VTABLE_HEADER_SIZE_64; + } + + base_entry = vtable->vtable + vtable_header_size; + + base_entry_offset = sym->base_addr; + base_entry_offset += vtable_header_size; + + /* In a final linked image, a vtable slot is valid if it is nonzero + * (meaning the userspace linker has already resolved it, or if it has + * a relocation entry. We'll know the end of the vtable when we find a + * slot that meets neither of these conditions. + */ + entry = base_entry; + entry_value = get_entry_value(entry, relocator); + entry_offset = base_entry_offset; + while (1) { + entry_value = get_entry_value(entry, relocator); + if (!entry_value) { + reloc = kxld_reloc_get_reloc_by_offset(relocs, entry_offset); + if (!reloc) break; + } + + ++nentries; + entry += vtable_entry_size; + entry_offset += vtable_entry_size; + } + + /* Allocate the symbol index */ + + rval = kxld_array_init(&vtable->entries, sizeof(KXLDVTableEntry), nentries); + require_noerr(rval, finish); + + /* Find the symbols for each vtable entry */ + + entry = base_entry; + entry_value = get_entry_value(entry, relocator); + entry_offset = base_entry_offset; + for (i = 0; i < vtable->entries.nitems; ++i) { + entry_value = get_entry_value(entry, relocator); + + /* If we can't find a symbol, it means it is a locally-defined, + * non-external symbol that has been stripped. We don't patch over + * locally-defined symbols, so we leave the symbol as NULL and just + * skip it. We won't be able to patch subclasses with this symbol, + * but there isn't much we can do about that. + */ + if (entry_value) { +#if !KERNEL + if (relocator->swap) { + entry_value = swap_entry_value(entry_value, relocator); + } +#endif /* !KERNEL */ + + reloc = NULL; + tmpsym = kxld_symtab_get_cxx_symbol_by_value(symtab, entry_value); + } else { + reloc = kxld_reloc_get_reloc_by_offset(relocs, entry_offset); + require_action(reloc, finish, + rval=KERN_FAILURE; + kxld_log(kKxldLogPatching, kKxldLogErr, + kKxldLogMalformedVTable, vtable->name)); + + tmpsym = kxld_reloc_get_symbol(relocator, reloc, + /* data */ NULL, symtab); + } + + tmpentry = kxld_array_get_item(&vtable->entries, i); + tmpentry->unpatched.reloc = reloc; + tmpentry->unpatched.sym = tmpsym; + + entry += vtable_entry_size; + entry_offset += vtable_entry_size; + } + + rval = KERN_SUCCESS; + +finish: + return rval; +} + +/******************************************************************************* +*******************************************************************************/ +void +kxld_vtable_clear(KXLDVTable *vtable) +{ + check(vtable); + + vtable->vtable = NULL; + vtable->name = NULL; + vtable->is_patched = FALSE; + kxld_array_clear(&vtable->entries); +} + +/******************************************************************************* +*******************************************************************************/ +void +kxld_vtable_deinit(KXLDVTable *vtable) +{ + check(vtable); + + kxld_array_deinit(&vtable->entries); + bzero(vtable, sizeof(*vtable)); +} + +/******************************************************************************* +* Patching vtables allows us to preserve binary compatibility across releases. +*******************************************************************************/ +kern_return_t +kxld_vtable_patch(KXLDVTable *vtable, const KXLDVTable *super_vtable, + KXLDSymtab *symtab, boolean_t strict_patching __unused) +{ + kern_return_t rval = KERN_FAILURE; + KXLDVTableEntry *child_entry = NULL; + KXLDVTableEntry *parent_entry = NULL; + KXLDSym *sym = NULL; + u_int symindex = 0; + u_int i = 0; + + check(vtable); + check(super_vtable); + + require_action(!vtable->is_patched, finish, rval=KERN_SUCCESS); + require_action(vtable->entries.nitems >= super_vtable->entries.nitems, finish, + rval=KERN_FAILURE; + kxld_log(kKxldLogPatching, kKxldLogErr, + kKxldLogMalformedVTable, vtable->name)); + + for (i = 0; i < super_vtable->entries.nitems; ++i) { + child_entry = kxld_array_get_item(&vtable->entries, i); + parent_entry = kxld_array_get_item(&super_vtable->entries, i); + + /* The child entry can be NULL when a locally-defined, non-external + * symbol is stripped. We wouldn't patch this entry anyway, so we + * just skip it. + */ + + if (!child_entry->unpatched.sym) continue; + + /* It's possible for the patched parent entry not to have a symbol + * (e.g. when the definition is inlined). We can't patch this entry no + * matter what, so we'll just skip it and die later if it's a problem + * (which is not likely). + */ + + if (!parent_entry->patched.name) continue; + + /* 1) If the symbol is defined locally, do not patch */ + + if (kxld_sym_is_defined_locally(child_entry->unpatched.sym)) continue; + + /* 2) If the child is a pure virtual function, do not patch. + * In general, we want to proceed with patching when the symbol is + * externally defined because pad slots fall into this category. + * The pure virtual function symbol is special case, as the pure + * virtual property itself overrides the parent's implementation. + */ + + if (kxld_sym_is_pure_virtual(child_entry->unpatched.sym)) continue; + + /* 3) If the symbols are the same, do not patch */ + + if (streq(child_entry->unpatched.sym->name, + parent_entry->patched.name)) + { + continue; + } + + /* 4) If the parent vtable entry is a pad slot, and the child does not + * match it, then the child was built against a newer version of the + * libraries, so it is binary-incompatible. + */ + + require_action(!kxld_sym_name_is_padslot(parent_entry->patched.name), + finish, rval=KERN_FAILURE; + kxld_log(kKxldLogPatching, kKxldLogErr, + kKxldLogParentOutOfDate, super_vtable->name, vtable->name)); + +#if KXLD_USER_OR_STRICT_PATCHING + /* 5) If we are doing strict patching, we prevent kexts from declaring + * virtual functions and not implementing them. We can tell if a + * virtual function is declared but not implemented because we resolve + * symbols before patching; an unimplemented function will still be + * undefined at this point. We then look at whether the symbol has + * the same class prefix as the vtable. If it does, the symbol was + * declared as part of the class and not inherited, which means we + * should not patch it. + */ + + if (strict_patching && !kxld_sym_is_defined(child_entry->unpatched.sym)) + { + char class_name[KXLD_MAX_NAME_LEN]; + char function_prefix[KXLD_MAX_NAME_LEN]; + u_long function_prefix_len = 0; + + rval = kxld_sym_get_class_name_from_vtable_name(vtable->name, + class_name, sizeof(class_name)); + require_noerr(rval, finish); + + function_prefix_len = + kxld_sym_get_function_prefix_from_class_name(class_name, + function_prefix, sizeof(function_prefix)); + require(function_prefix_len, finish); + + if (!strncmp(child_entry->unpatched.sym->name, + function_prefix, function_prefix_len)) + { + continue; + } + } +#endif /* KXLD_USER_OR_STRICT_PATCHING */ + + /* 6) The child symbol is unresolved and different from its parent, so + * we need to patch it up. We do this by modifying the relocation + * entry of the vtable entry to point to the symbol of the parent + * vtable entry. If that symbol does not exist (i.e. we got the data + * from a link state object's vtable representation), then we create a + * new symbol in the symbol table and point the relocation entry to + * that. + */ + + sym = kxld_symtab_get_symbol_by_name(symtab, parent_entry->patched.name); + if (!sym) { + rval = kxld_symtab_add_symbol(symtab, parent_entry->patched.name, + parent_entry->patched.addr, &sym); + require_noerr(rval, finish); + } + require_action(sym, finish, rval=KERN_FAILURE); + + rval = kxld_symtab_get_sym_index(symtab, sym, &symindex); + require_noerr(rval, finish); + + rval = kxld_reloc_update_symindex(child_entry->unpatched.reloc, symindex); + require_noerr(rval, finish); + + kxld_log(kKxldLogPatching, kKxldLogDetail, + "In vtable %s, patching %s with %s.", + vtable->name, child_entry->unpatched.sym->name, sym->name); + + kxld_sym_patch(child_entry->unpatched.sym); + child_entry->unpatched.sym = sym; + } + + /* Change the vtable representation from the unpatched layout to the + * patched layout. + */ + for (i = 0; i < vtable->entries.nitems; ++i) { + char *name; + kxld_addr_t addr; + + child_entry = kxld_array_get_item(&vtable->entries, i); + if (child_entry->unpatched.sym) { + name = child_entry->unpatched.sym->name; + addr = child_entry->unpatched.sym->link_addr; + } else { + name = NULL; + addr = 0; + } + + child_entry->patched.name = name; + child_entry->patched.addr = addr; + } + + vtable->is_patched = TRUE; + rval = KERN_SUCCESS; + +finish: + return rval; +} + diff --git a/libkern/kxld/kxld_vtable.h b/libkern/kxld/kxld_vtable.h new file mode 100644 index 000000000..124756994 --- /dev/null +++ b/libkern/kxld/kxld_vtable.h @@ -0,0 +1,121 @@ +/* + * Copyright (c) 2008 Apple Inc. All rights reserved. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. The rights granted to you under the License + * may not be used to create, or enable the creation or redistribution of, + * unlawful or unlicensed copies of an Apple operating system, or to + * circumvent, violate, or enable the circumvention or violation of, any + * terms of an Apple operating system software license agreement. + * + * Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ + */ +#ifndef _KXLD_VTABLE_H_ +#define _KXLD_VTABLE_H_ + +#include +#if KERNEL + #include +#else + #include "kxld_types.h" +#endif + +#include "kxld_array.h" + +struct kxld_array; +struct kxld_reloc; +struct kxld_relocator; +struct kxld_sect; +struct kxld_sym; +struct kxld_symtab; +struct kxld_vtable_hdr; +struct section; + +typedef struct kxld_vtable KXLDVTable; +typedef union kxld_vtable_entry KXLDVTableEntry; + +struct kxld_vtable { + u_char *vtable; + const char *name; + KXLDArray entries; + boolean_t is_patched; +}; + +struct kxld_vtable_patched_entry { + char *name; + kxld_addr_t addr; +}; + +struct kxld_vtable_unpatched_entry { + struct kxld_sym *sym; + struct kxld_reloc *reloc; +}; + +union kxld_vtable_entry { + struct kxld_vtable_patched_entry patched; + struct kxld_vtable_unpatched_entry unpatched; +}; + +/******************************************************************************* +* Constructors and destructors +*******************************************************************************/ + +kern_return_t kxld_vtable_init_from_kernel_macho(KXLDVTable *vtable, + const struct kxld_sym *sym, const struct kxld_sect *sect, + const struct kxld_symtab *symtab, const struct kxld_relocator *relocator) + __attribute__((nonnull, visibility("hidden"))); + +kern_return_t kxld_vtable_init_from_final_macho(KXLDVTable *vtable, + const struct kxld_sym *sym, const struct kxld_sect *sect, + const struct kxld_symtab *symtab, const struct kxld_relocator *relocator, + const struct kxld_array *relocs) + __attribute__((nonnull, visibility("hidden"))); + +kern_return_t kxld_vtable_init_from_object_macho(KXLDVTable *vtable, + const struct kxld_sym *sym, const struct kxld_sect *sect, + const struct kxld_symtab *symtab, const struct kxld_relocator *relocator) + __attribute__((nonnull, visibility("hidden"))); + +kern_return_t kxld_vtable_init_from_link_state_32(KXLDVTable *vtable, u_char *state, + struct kxld_vtable_hdr *hdr) + __attribute__((nonnull, visibility("hidden"))); + +kern_return_t kxld_vtable_init_from_link_state_64(KXLDVTable *vtable, u_char *state, + struct kxld_vtable_hdr *hdr) + __attribute__((nonnull, visibility("hidden"))); + +kern_return_t kxld_vtable_copy(KXLDVTable *vtable, const KXLDVTable *src) + __attribute__((nonnull, visibility("hidden"))); + +void kxld_vtable_clear(KXLDVTable *vtable) + __attribute__((visibility("hidden"))); + +void kxld_vtable_deinit(KXLDVTable *vtable) + __attribute__((visibility("hidden"))); + +/******************************************************************************* +* Modifiers +*******************************************************************************/ + +/* With strict patching, the vtable patcher with only patch pad slots */ +kern_return_t kxld_vtable_patch(KXLDVTable *vtable, const KXLDVTable *super_vtable, + struct kxld_symtab *symtab, boolean_t strict_patching) + __attribute__((nonnull, visibility("hidden"))); + +#endif /* _KXLD_VTABLE_H_ */ + diff --git a/libkern/kxld/tests/kextcopyright.c b/libkern/kxld/tests/kextcopyright.c new file mode 100644 index 000000000..dffbdbc22 --- /dev/null +++ b/libkern/kxld/tests/kextcopyright.c @@ -0,0 +1,179 @@ +#include + +#include + +#include + +#define kCFBundleGetInfoStringKey CFSTR("CFBundleGetInfoString") +#define kNSHumanReadableCopyrightKey CFSTR("NSHumanReadableCopyright") + +const char *gProgname = NULL; + +static void usage(void); +static void printFormat(void); +static char *convert_cfstring(CFStringRef the_string); + +/****************************************************************************** +******************************************************************************/ +static void +usage(void) +{ + printf("usage: %s [path to kext]\n\n" + "This program validates the copyright string in a kext's info " + "dictionary.\n\n", gProgname); + + printFormat(); +} + +/****************************************************************************** +******************************************************************************/ +static void +printFormat(void) +{ + fprintf(stderr, + "The copyright string should be contained in the NSHumanReadableCopyright key.\n" + "It should be of the format:\n" + "\tCopyright © [year(s) of publication] Apple Inc. All rights reserved.\n\n" + "where [year(s) of publication] is a comma-separated list of years and/or\n" + "year ranges, e.g., 2004, 2006-2008. Years must be four digits. Year ranges\n" + "may not contain spaces and must use four digits for both years.\n\n" + "The following are examples of valid copyright strings:\n" + "\tCopyright © 2008 Apple Inc. All rights reserved.\n" + "\tCopyright © 2004-2008 Apple Inc. All rights reserved.\n" + "\tCopyright © 1998,2000-2002,2004,2006-2008 Apple Inc. All rights reserved.\n"); +} + +/****************************************************************************** +******************************************************************************/ +char * +convert_cfstring(CFStringRef the_string) +{ + char *result = NULL; + CFDataRef the_data = NULL; + const UInt8 *data_bytes = NULL; + char *converted_string = NULL; + u_long converted_len = 0; + u_long bytes_copied = 0; + + the_data = CFStringCreateExternalRepresentation(kCFAllocatorDefault, + the_string, kCFStringEncodingUTF8, 0); + if (!the_data) { + fprintf(stderr, "Failed to convert string\n"); + goto finish; + } + + data_bytes = CFDataGetBytePtr(the_data); + if (!data_bytes) { + fprintf(stderr, "Failed to get converted string bytes\n"); + goto finish; + } + + converted_len = strlen((const char *)data_bytes) + 1; // +1 for nul + converted_string = malloc(converted_len); + if (!converted_string) { + fprintf(stderr, "Failed to allocate memory\n"); + goto finish; + } + + bytes_copied = strlcpy(converted_string, (const char *) data_bytes, + converted_len) + 1; // +1 for nul + if (bytes_copied != converted_len) { + fprintf(stderr, "Failed to copy converted string\n"); + goto finish; + } + + result = converted_string; +finish: + return result; +} + +/****************************************************************************** +******************************************************************************/ +int +main(int argc, const char *argv[]) +{ + int result = 1; + boolean_t infoCopyrightIsValid = false; + boolean_t readableCopyrightIsValid = false; + CFURLRef anURL = NULL; // must release + CFBundleRef aBundle = NULL; // must release + CFDictionaryRef aDict = NULL; // do not release + CFStringRef infoCopyrightString = NULL; // do not release + CFStringRef readableCopyrightString = NULL; // do not release + char *infoStr = NULL; // must free + char *readableStr = NULL; // must free + + gProgname = argv[0]; + + if (argc != 2) { + usage(); + goto finish; + } + + anURL = CFURLCreateFromFileSystemRepresentation(kCFAllocatorDefault, + (const UInt8 *) argv[1], strlen(argv[1]), /* isDirectory */ FALSE); + if (!anURL) { + fprintf(stderr, "Can't create path from %s\n", argv[1]); + goto finish; + } + + aBundle = CFBundleCreate(kCFAllocatorDefault, anURL); + if (!aBundle) { + fprintf(stderr, "Can't create bundle at path %s\n", argv[1]); + goto finish; + } + + aDict = CFBundleGetInfoDictionary(aBundle); + if (!aDict) { + fprintf(stderr, "Can't get info dictionary from bundle\n"); + goto finish; + } + + infoCopyrightString = CFDictionaryGetValue(aDict, kCFBundleGetInfoStringKey); + readableCopyrightString = CFDictionaryGetValue(aDict, kNSHumanReadableCopyrightKey); + + if (!infoCopyrightString && !readableCopyrightString) { + fprintf(stderr, "This kext does not have a value for NSHumanReadableCopyright"); + goto finish; + } + + if (infoCopyrightString) { + fprintf(stderr, "Warning: This kext has a value for CFBundleGetInfoString.\n" + "This key is obsolete, and may be removed from the kext's Info.plist.\n" + "It has been replaced by CFBundleVersion and NSHumanReadableCopyright.\n\n"); + + infoStr = convert_cfstring(infoCopyrightString); + if (!infoStr) goto finish; + + infoCopyrightIsValid = kxld_validate_copyright_string(infoStr); + } + + if (readableCopyrightString) { + readableStr = convert_cfstring(readableCopyrightString); + if (!readableStr) goto finish; + + readableCopyrightIsValid = kxld_validate_copyright_string(readableStr); + } + + if (!readableCopyrightIsValid) { + if (infoCopyrightIsValid) { + fprintf(stderr, "Warning: The copyright string in NSHumanReadableCopyright is invalid,\n" + "but the string in CFBundleGetInfoString is valid. CFBundleGetInfoString is\n" + "obsolete. Please migrate your copyright string to NSHumanReadableCopyright.\n\n"); + } else { + fprintf(stderr, "Error: There is no valid copyright string for this kext.\n\n"); + printFormat(); + goto finish; + } + } + + result = 0; +finish: + if (anURL) CFRelease(anURL); + if (aBundle) CFRelease(aBundle); + if (infoStr) free(infoStr); + if (readableStr) free(readableStr); + + return result; +} + diff --git a/libkern/kxld/tests/kxld_dict_test.c b/libkern/kxld/tests/kxld_dict_test.c new file mode 100644 index 000000000..d831a44ed --- /dev/null +++ b/libkern/kxld/tests/kxld_dict_test.c @@ -0,0 +1,173 @@ +/* + * Copyright (c) 2007-2008 Apple Inc. All rights reserved. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. The rights granted to you under the License + * may not be used to create, or enable the creation or redistribution of, + * unlawful or unlicensed copies of an Apple operating system, or to + * circumvent, violate, or enable the circumvention or violation of, any + * terms of an Apple operating system software license agreement. + * + * Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ + */ +#include +#include +#include +#include + +#include "kxld_dict.h" +#include "kxld_util.h" + +#define KEYLEN 40 +#define STRESSNUM 10000 + +typedef struct { + char * key; + int * value; +} Stress; + + +void kxld_test_log(KXLDLogSubsystem sys, KXLDLogLevel level, + const char *format, va_list ap, void *user_data); + +void +kxld_test_log(KXLDLogSubsystem sys __unused, KXLDLogLevel level __unused, + const char *format, va_list ap, void *user_data __unused) +{ + va_list args; + + va_copy(args, ap); + vfprintf(stderr, format, args); + fprintf(stderr, "\n"); + va_end(args); +} + +int +main(int argc __unused, char *argv[] __unused) +{ + kern_return_t result = KERN_SUCCESS; + KXLDDict dict; + int a1 = 1, a2 = 3, i = 0, j = 0; + void * b = NULL; + u_int test_num = 0; + u_long size = 0; + Stress stress_test[STRESSNUM]; + + kxld_set_logging_callback(kxld_test_log); + + bzero(&dict, sizeof(dict)); + + fprintf(stderr, "%d: Initialize\n", ++test_num); + result = kxld_dict_init(&dict, kxld_dict_string_hash, kxld_dict_string_cmp, 10); + assert(result == KERN_SUCCESS); + size = kxld_dict_get_num_entries(&dict); + assert(size == 0); + + fprintf(stderr, "%d: Find nonexistant key\n", ++test_num); + b = kxld_dict_find(&dict, "hi"); + assert(b == NULL); + + fprintf(stderr, "%d: Insert and find\n", ++test_num); + result = kxld_dict_insert(&dict, "hi", &a1); + assert(result == KERN_SUCCESS); + b = kxld_dict_find(&dict, "hi"); + assert(b && *(int*)b == a1); + size = kxld_dict_get_num_entries(&dict); + assert(size == 1); + + fprintf(stderr, "%d: Insert same key with different values\n", ++test_num); + result = kxld_dict_insert(&dict, "hi", &a2); + assert(result == KERN_SUCCESS); + b = kxld_dict_find(&dict, "hi"); + assert(b && *(int*)b == a2); + size = kxld_dict_get_num_entries(&dict); + assert(size == 1); + + fprintf(stderr, "%d: Clear and find of nonexistant key\n", ++test_num); + kxld_dict_clear(&dict); + result = kxld_dict_init(&dict, kxld_dict_string_hash, kxld_dict_string_cmp, 10); + b = kxld_dict_find(&dict, "hi"); + assert(b == NULL); + size = kxld_dict_get_num_entries(&dict); + assert(size == 0); + + fprintf(stderr, "%d: Insert multiple keys\n", ++test_num); + result = kxld_dict_insert(&dict, "hi", &a1); + assert(result == KERN_SUCCESS); + result = kxld_dict_insert(&dict, "hello", &a2); + assert(result == KERN_SUCCESS); + b = kxld_dict_find(&dict, "hi"); + assert(result == KERN_SUCCESS); + assert(b && *(int*)b == a1); + b = kxld_dict_find(&dict, "hello"); + assert(b && *(int*)b == a2); + size = kxld_dict_get_num_entries(&dict); + assert(size == 2); + + fprintf(stderr, "%d: Remove keys\n", ++test_num); + kxld_dict_remove(&dict, "hi", &b); + assert(b && *(int*)b == a1); + b = kxld_dict_find(&dict, "hi"); + assert(b == NULL); + kxld_dict_remove(&dict, "hi", &b); + assert(b == NULL); + size = kxld_dict_get_num_entries(&dict); + assert(size == 1); + + fprintf(stderr, "%d: Stress test - %d insertions and finds\n", ++test_num, STRESSNUM); + + kxld_dict_clear(&dict); + result = kxld_dict_init(&dict, kxld_dict_string_hash, kxld_dict_string_cmp, 10); + for (i = 0; i < STRESSNUM; ++i) { + int * tmp_value = kxld_alloc(sizeof(int)); + char * tmp_key = kxld_alloc(sizeof(char) * (KEYLEN + 1)); + + *tmp_value = i; + for (j = 0; j < KEYLEN; ++j) { + tmp_key[j] = (rand() % 26) + 'a'; + } + tmp_key[KEYLEN] = '\0'; + + kxld_dict_insert(&dict, tmp_key, tmp_value); + stress_test[i].key = tmp_key; + stress_test[i].value = tmp_value; + } + + for (i = 0; i < STRESSNUM; ++i) { + int target_value; + void * tmp_value; + char * key = stress_test[i].key; + + target_value = *stress_test[i].value; + tmp_value = kxld_dict_find(&dict, key); + assert(target_value == *(int *)tmp_value); + + kxld_free(stress_test[i].key, sizeof(char) * (KEYLEN + 1)); + kxld_free(stress_test[i].value, sizeof(int)); + } + + fprintf(stderr, "%d: Destroy\n", ++test_num); + kxld_dict_deinit(&dict); + + fprintf(stderr, "\nAll tests passed! Now check for memory leaks...\n"); + + kxld_print_memory_report(); + + return 0; +} + diff --git a/libkern/kxld/tests/loadtest.py b/libkern/kxld/tests/loadtest.py new file mode 100644 index 000000000..def56cfed --- /dev/null +++ b/libkern/kxld/tests/loadtest.py @@ -0,0 +1,32 @@ +#!/usr/bin/env python + +import sys +from subprocess import call, Popen, PIPE + +kexts = [] +pipe = Popen("/usr/sbin/kextfind \( -l -and -x -and -arch i386 \)", shell=True, stdout=PIPE).stdout + +line = pipe.readline() +while line: + kexts.append(line.strip()) + line = pipe.readline() + +NULL = open("/dev/null") + +for kext in kexts: + try: + print "Processing", kext +#cmd = "/sbin/kextload -ns /tmp/syms \"%s\"" % kext + cmd = "/sbin/kextload \"%s\"" % kext + kextload = Popen(cmd, shell=True, stdin=PIPE, stdout=PIPE) + for i in range(20): + kextload.stdin.write("0x1000\n"); + retcode = kextload.wait() + if retcode < 0: + print >>sys.stderr, "*** kextload of %s was terminated by signal %d" % (kext, -retcode) + elif retcode > 0: + print >>sys.stderr, "*** kextload of %s failed with return code %d" % (kext, retcode) + except OSError, e: + print >>sys.stderr, "Execution failed:", e + sys.exit(1) + diff --git a/libkern/libkern/Makefile b/libkern/libkern/Makefile index f9f411672..76e4d9f99 100644 --- a/libkern/libkern/Makefile +++ b/libkern/libkern/Makefile @@ -18,12 +18,16 @@ INSTINC_SUBDIRS_PPC = ${INSTINC_SUBDIRS} \ INSTINC_SUBDIRS_I386 = ${INSTINC_SUBDIRS} \ i386 +INSTINC_SUBDIRS_X86_64 = ${INSTINC_SUBDIRS} \ + i386 + INSTINC_SUBDIRS_ARM = ${INSTINC_SUBDIRS} \ arm EXPINC_SUBDIRS = ${INSTINC_SUBDIRS} EXPINC_SUBDIRS_PPC = ${INSTINC_SUBDIRS_PPC} EXPINC_SUBDIRS_I386 = ${INSTINC_SUBDIRS_I386} +EXPINC_SUBDIRS_X86_64 = ${INSTINC_SUBDIRS_X86_64} EXPINC_SUBDIRS_ARM = ${INSTINC_SUBDIRS_ARM} DATAFILES = \ @@ -32,27 +36,58 @@ DATAFILES = \ OSByteOrder.h \ _OSByteOrder.h \ OSDebug.h \ + OSKextLib.h \ OSMalloc.h \ OSReturn.h \ OSTypes.h \ locks.h \ sysctl.h \ + zconf.h \ zlib.h -INSTALL_MI_LIST = OSByteOrder.h _OSByteOrder.h OSDebug.h OSReturn.h OSTypes.h +PRIVATE_DATAFILES = \ + OSKextLibPrivate.h \ + OSKextLibPrivate.h \ + kext_request_keys.h \ + mkext.h \ + prelink.h + +INSTALL_MI_LIST = \ + OSByteOrder.h \ + _OSByteOrder.h \ + OSDebug.h \ + OSKextLib.h \ + OSReturn.h \ + OSTypes.h INSTALL_MI_DIR = libkern -INSTALL_MI_LCL_GEN_LIST = OSCrossEndian.h +INSTALL_MI_LCL_LIST = \ + ${INSTALL_MI_LIST} \ + ${PRIVATE_DATAFILES} \ + kext_panic_report.h \ + OSCrossEndian.h + + + +INSTALL_KF_MI_LIST = \ + ${DATAFILES} -EXPORT_MI_LIST = ${DATAFILES} +INSTALL_KF_MI_LCL_LIST = \ + ${DATAFILES} \ + ${PRIVATE_DATAFILES} + +EXPORT_MI_LIST = \ + ${DATAFILES} \ + ${PRIVATE_DATAFILES} \ + kernel_mach_header.h \ + kxld.h \ + kxld_types.h EXPORT_MI_GEN_LIST = version.h EXPORT_MI_DIR = libkern -NEWVERS = $(SRCROOT)/config/newvers.pl - version.h: version.h.template $(SRCROOT)/config/MasterVersion @echo "Generating libkern/$@ from $<"; $(_v)install $(DATA_INSTALL_FLAGS) $< $@ @@ -60,5 +95,3 @@ version.h: version.h.template $(SRCROOT)/config/MasterVersion include $(MakeInc_rule) include $(MakeInc_dir) - - diff --git a/libkern/libkern/OSAtomic.h b/libkern/libkern/OSAtomic.h index 17e5dbd2d..d8e157483 100644 --- a/libkern/libkern/OSAtomic.h +++ b/libkern/libkern/OSAtomic.h @@ -2,7 +2,7 @@ * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ - * + * * This file contains Original Code and/or Modifications of Original Code * as defined in and that are subject to the Apple Public Source License * Version 2.0 (the 'License'). You may not use this file except in @@ -11,10 +11,10 @@ * unlawful or unlicensed copies of an Apple operating system, or to * circumvent, violate, or enable the circumvention or violation of, any * terms of an Apple operating system software license agreement. - * + * * Please obtain a copy of the License at * http://www.opensource.apple.com/apsl/ and read it before using this file. - * + * * The Original Code and all software distributed under the License are * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, @@ -22,7 +22,7 @@ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. * Please see the License for the specific language governing rights and * limitations under the License. - * + * * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ */ /* @@ -41,308 +41,590 @@ extern "C" { #endif -#if defined(__i386__) +/*! + * @header + * + * @abstract + * This header declares the OSAtomic group of functions for atomic + * reading and updating of values. + */ + +#if defined(__i386__) || defined(__x86_64__) + +/*! + * @function OSCompareAndSwap64 + * + * @abstract + * 64-bit compare and swap operation. + * + * @discussion + * See OSCompareAndSwap. + */ +extern Boolean OSCompareAndSwap64( + UInt64 oldValue, + UInt64 newValue, + volatile UInt64 * address); -/*! @function OSCompareAndSwap64 - @abstract 64-bit compare and swap operation. - @discussion See OSCompareAndSwap. -*/ -extern Boolean OSCompareAndSwap64(UInt64 oldValue, UInt64 newValue, - volatile UInt64 *address); +#endif /* defined(__i386__) || defined(__x86_64__) */ -/*! @function OSAddAtomic64 - @abstract 64-bit atomic add operation. - @discussion See OSAddAtomic. -*/ -extern SInt64 OSAddAtomic64(SInt64 theAmount, volatile SInt64 *address); +#if defined(__i386__) || defined(__x86_64__) || defined(__arm__) -/*! @function OSIncrementAtomic64 - @abstract 64-bit increment. - @discussion See OSIncrementAtomic. +/*! + * @function OSAddAtomic64 + * + * @abstract + * 64-bit atomic add operation. + * + * @discussion + * See OSAddAtomic. + */ +extern SInt64 OSAddAtomic64( + SInt64 theAmount, + volatile SInt64 * address); + +/*! + * @function OSIncrementAtomic64 + * + * @abstract + * 64-bit increment. + * + * @discussion + * See OSIncrementAtomic. */ -inline static SInt64 OSIncrementAtomic64(volatile SInt64 *address) +inline static SInt64 OSIncrementAtomic64(volatile SInt64 * address) { - return OSAddAtomic64(1, address); + return OSAddAtomic64(1LL, address); } -/*! @function OSDecrementAtomic64 - @abstract 64-bit decrement. - @discussion See OSDecrementAtomic. +/*! + * @function OSDecrementAtomic64 + * + * @abstract + * 64-bit decrement. + * + * @discussion + * See OSDecrementAtomic. */ -inline static SInt64 OSDecrementAtomic64(volatile SInt64 *address) +inline static SInt64 OSDecrementAtomic64(volatile SInt64 * address) { - return OSAddAtomic64(-1, address); + return OSAddAtomic64(-1LL, address); } -#endif /* defined(__i386__) */ - -/*! @function OSCompareAndSwap - @abstract Compare and swap operation, performed atomically with respect to all devices that participate in the coherency architecture of the platform. - @discussion The OSCompareAndSwap function compares the value at the specified address with oldVal. The value of newValue is written to the address only if oldValue and the value at the address are equal. OSCompareAndSwap returns true if newValue is written to the address; otherwise, it returns false. - - This function guarantees atomicity only with main system memory. It is specifically unsuitable for use on noncacheable memory such as that in devices; this function cannot guarantee atomicity, for example, on memory mapped from a PCI device. - @param oldValue The value to compare at address. - @param newValue The value to write to address if oldValue compares true. - @param address The 4-byte aligned address of the data to update atomically. - @result true if newValue was written to the address. */ - -extern Boolean OSCompareAndSwap(UInt32 oldValue, UInt32 newValue, - volatile UInt32 *address); - -/*! @function OSAddAtomic - @abstract 32-bit add operation, performed atomically with respect to all devices that participate in the coherency architecture of the platform. - @discussion The OSAddAtomic function adds the specified amount to the value at the specified address and returns the original value. - - This function guarantees atomicity only with main system memory. It is specifically unsuitable for use on noncacheable memory such as that in devices; this function cannot guarantee atomicity, for example, on memory mapped from a PCI device. - @param amount The amount to add. - @param address The 4-byte aligned address of the value to update atomically. - @result The value before the addition */ - -extern SInt32 OSAddAtomic(SInt32 amount, volatile SInt32 * address); - -/*! @function OSAddAtomic16 - @abstract 16-bit add operation, performed atomically with respect to all devices that participate in the coherency architecture of the platform. - @discussion The OSAddAtomic16 function adds the specified amount to the value at the specified address and returns the original value. - - This function guarantees atomicity only with main system memory. It is specifically unsuitable for use on noncacheable memory such as that in devices; this function cannot guarantee atomicity, for example, on memory mapped from a PCI device. - @param amount The amount to add. - @param address The 2-byte aligned address of the value to update atomically. - @result The value before the addition */ - -extern SInt16 OSAddAtomic16(SInt32 amount, volatile SInt16 * address); - -/*! @function OSAddAtomic8 - @abstract 8-bit add operation, performed atomically with respect to all devices that participate in the coherency architecture of the platform. - @discussion The OSAddAtomic8 function adds the specified amount to the value at the specified address and returns the original value. - - This function guarantees atomicity only with main system memory. It is specifically unsuitable for use on noncacheable memory such as that in devices; this function cannot guarantee atomicity, for example, on memory mapped from a PCI device. - @param amount The amount to add. - @param address The address of the value to update atomically. - @result The value before the addition */ - -extern SInt8 OSAddAtomic8(SInt32 amount, volatile SInt8 * address); - -/*! @function OSIncrementAtomic - @abstract 32-bit increment operation, performed atomically with respect to all devices that participate in the coherency architecture of the platform. - @discussion The OSIncrementAtomic function increments the value at the specified address by one and returns the original value. - - This function guarantees atomicity only with main system memory. It is specifically unsuitable for use on noncacheable memory such as that in devices; this function cannot guarantee atomicity, for example, on memory mapped from a PCI device. - @param address The 4-byte aligned address of the value to update atomically. - @result The value before the increment. */ - -extern SInt32 OSIncrementAtomic(volatile SInt32 * address); - -/*! @function OSIncrementAtomic16 - @abstract 16-bit increment operation, performed atomically with respect to all devices that participate in the coherency architecture of the platform. - @discussion The OSIncrementAtomic16 function increments the value at the specified address by one and returns the original value. - - This function guarantees atomicity only with main system memory. It is specifically unsuitable for use on noncacheable memory such as that in devices; this function cannot guarantee atomicity, for example, on memory mapped from a PCI device. - @param address The 2-byte aligned address of the value to update atomically. - @result The value before the increment. */ - -extern SInt16 OSIncrementAtomic16(volatile SInt16 * address); - -/*! @function OSIncrementAtomic8 - @abstract 8-bit increment operation, performed atomically with respect to all devices that participate in the coherency architecture of the platform. - @discussion The OSIncrementAtomic8 function increments the value at the specified address by one and returns the original value. - - This function guarantees atomicity only with main system memory. It is specifically unsuitable for use on noncacheable memory such as that in devices; this function cannot guarantee atomicity, for example, on memory mapped from a PCI device. - @param address The address of the value to update atomically. - @result The value before the increment. */ - -extern SInt8 OSIncrementAtomic8(volatile SInt8 * address); - -/*! @function OSDecrementAtomic - @abstract 32-bit decrement operation, performed atomically with respect to all devices that participate in the coherency architecture of the platform. - @discussion The OSDecrementAtomic function decrements the value at the specified address by one and returns the original value. - - This function guarantees atomicity only with main system memory. It is specifically unsuitable for use on noncacheable memory such as that in devices; this function cannot guarantee atomicity, for example, on memory mapped from a PCI device. - @param address The 4-byte aligned address of the value to update atomically. - @result The value before the decrement. */ - -extern SInt32 OSDecrementAtomic(volatile SInt32 * address); - -/*! @function OSDecrementAtomic16 - @abstract 16-bit decrement operation, performed atomically with respect to all devices that participate in the coherency architecture of the platform. - @discussion The OSDecrementAtomic16 function decrements the value at the specified address by one and returns the original value. - - This function guarantees atomicity only with main system memory. It is specifically unsuitable for use on noncacheable memory such as that in devices; this function cannot guarantee atomicity, for example, on memory mapped from a PCI device. - @param address The 2-byte aligned address of the value to update atomically. - @result The value before the decrement. */ - -extern SInt16 OSDecrementAtomic16(volatile SInt16 * address); - -/*! @function OSDecrementAtomic8 - @abstract 8-bit decrement operation, performed atomically with respect to all devices that participate in the coherency architecture of the platform. - @discussion The OSDecrementAtomic8 function decrements the value at the specified address by one and returns the original value. +#endif /* defined(__i386__) || defined(__x86_64__) || defined(__arm__) */ - This function guarantees atomicity only with main system memory. It is specifically unsuitable for use on noncacheable memory such as that in devices; this function cannot guarantee atomicity, for example, on memory mapped from a PCI device. - @param address The address of the value to update atomically. - @result The value before the decrement. */ - -extern SInt8 OSDecrementAtomic8(volatile SInt8 * address); - -/*! @function OSBitAndAtomic - @abstract 32-bit logical and operation, performed atomically with respect to all devices that participate in the coherency architecture of the platform. - @discussion The OSBitAndAtomic function logically ands the bits of the specified mask into the value at the specified address and returns the original value. - - This function guarantees atomicity only with main system memory. It is specifically unsuitable for use on noncacheable memory such as that in devices; this function cannot guarantee atomicity, for example, on memory mapped from a PCI device. - @param mask The mask to logically and with the value. - @param address The 4-byte aligned address of the value to update atomically. - @result The value before the bitwise operation */ - -extern UInt32 OSBitAndAtomic(UInt32 mask, volatile UInt32 * address); - -/*! @function OSBitAndAtomic16 - @abstract 16-bit logical and operation, performed atomically with respect to all devices that participate in the coherency architecture of the platform. - @discussion The OSBitAndAtomic16 function logically ands the bits of the specified mask into the value at the specified address and returns the original value. - - This function guarantees atomicity only with main system memory. It is specifically unsuitable for use on noncacheable memory such as that in devices; this function cannot guarantee atomicity, for example, on memory mapped from a PCI device. - @param mask The mask to logically and with the value. - @param address The 2-byte aligned address of the value to update atomically. - @result The value before the bitwise operation. */ - -extern UInt16 OSBitAndAtomic16(UInt32 mask, volatile UInt16 * address); - -/*! @function OSBitAndAtomic8 - @abstract 8-bit logical and operation, performed atomically with respect to all devices that participate in the coherency architecture of the platform. - @discussion The OSBitAndAtomic8 function logically ands the bits of the specified mask into the value at the specified address and returns the original value. - - This function guarantees atomicity only with main system memory. It is specifically unsuitable for use on noncacheable memory such as that in devices; this function cannot guarantee atomicity, for example, on memory mapped from a PCI device. - @param mask The mask to logically and with the value. - @param address The address of the value to update atomically. - @result The value before the bitwise operation. */ - -extern UInt8 OSBitAndAtomic8(UInt32 mask, volatile UInt8 * address); - -/*! @function OSBitOrAtomic - @abstract 32-bit logical or operation, performed atomically with respect to all devices that participate in the coherency architecture of the platform. - @discussion The OSBitOrAtomic function logically ors the bits of the specified mask into the value at the specified address and returns the original value. - - This function guarantees atomicity only with main system memory. It is specifically unsuitable for use on noncacheable memory such as that in devices; this function cannot guarantee atomicity, for example, on memory mapped from a PCI device. - @param mask The mask to logically or with the value. - @param address The 4-byte aligned address of the value to update atomically. - @result The value before the bitwise operation. */ - -extern UInt32 OSBitOrAtomic(UInt32 mask, volatile UInt32 * address); - -/*! @function OSBitOrAtomic16 - @abstract 16-bit logical or operation, performed atomically with respect to all devices that participate in the coherency architecture of the platform. - @discussion The OSBitOrAtomic16 function logically ors the bits of the specified mask into the value at the specified address and returns the original value. - - This function guarantees atomicity only with main system memory. It is specifically unsuitable for use on noncacheable memory such as that in devices; this function cannot guarantee atomicity, for example, on memory mapped from a PCI device. - @param mask The mask to logically or with the value. - @param address The 2-byte aligned address of the value to update atomically. - @result The value before the bitwise operation. */ +#if XNU_KERNEL_PRIVATE +/* Not to be included in headerdoc. + * + * @function OSAddAtomicLong + * + * @abstract + * 32/64-bit atomic add operation, depending on sizeof(long). + * + * @discussion + * See OSAddAtomic. + */ +extern long OSAddAtomicLong( + long theAmount, + volatile long * address); -extern UInt16 OSBitOrAtomic16(UInt32 mask, volatile UInt16 * address); +/* Not to be included in headerdoc. + * + * @function OSIncrementAtomicLong + * + * @abstract + * 32/64-bit increment, depending on sizeof(long) + * + * @discussion + * See OSIncrementAtomic. +*/ +inline static long OSIncrementAtomicLong(volatile long * address) +{ + return OSAddAtomicLong(1L, address); +} -/*! @function OSBitOrAtomic8 - @abstract 8-bit logical or operation, performed atomically with respect to all devices that participate in the coherency architecture of the platform. +/* Not to be included in headerdoc. + * + * @function OSDecrementAtomicLong + * + * @abstract + * 32/64-bit decrement, depending on sizeof(long) + *@discussion See OSDecrementAtomic. + */ +inline static long OSDecrementAtomicLong(volatile long * address) +{ + return OSAddAtomicLong(-1L, address); +} +#endif /* XNU_KERNEL_PRIVATE */ - This function guarantees atomicity only with main system memory. It is specifically unsuitable for use on noncacheable memory such as that in devices; this function cannot guarantee atomicity, for example, on memory mapped from a PCI device. - @discussion The OSBitOrAtomic8 function logically ors the bits of the specified mask into the value at the specified address and returns the original value. - @param mask The mask to logically or with the value. - @param address The address of the value to update atomically. - @result The value before the bitwise operation. */ +/* + * The macro SAFE_CAST_PTR() casts one type of pointer to another type, making sure + * the data the pointer is referencing is the same size. If it is not, it will cause + * a division by zero compiler warning. This is to work around "SInt32" being defined + * as "long" on ILP32 and as "int" on LP64, which would require an explicit cast to + * "SInt32*" when for instance passing an "int*" to OSAddAtomic() - which masks size + * mismatches. + * -- var is used twice, but sizeof does not evaluate the + * argument, i.e. we're safe against "++" etc. in var -- + */ +#ifdef XNU_KERNEL_PRIVATE +#define SAFE_CAST_PTR(type, var) (((type)(var))+(0/(sizeof(*var) == sizeof(*(type)0) ? 1 : 0))) +#else +#define SAFE_CAST_PTR(type, var) ((type)(var)) +#endif -extern UInt8 OSBitOrAtomic8(UInt32 mask, volatile UInt8 * address); +/*! + * @function OSCompareAndSwap + * + * @abstract + * Compare and swap operation, performed atomically with respect to all devices that participate in the coherency architecture of the platform. + * + * @discussion + * The OSCompareAndSwap function compares the value at the specified address with oldVal. The value of newValue is written to the address only if oldValue and the value at the address are equal. OSCompareAndSwap returns true if newValue is written to the address; otherwise, it returns false. + * + * This function guarantees atomicity only with main system memory. It is specifically unsuitable for use on noncacheable memory such as that in devices; this function cannot guarantee atomicity, for example, on memory mapped from a PCI device. Additionally, this function incorporates a memory barrier on systems with weakly-ordered memory architectures. + * + * @param oldValue The value to compare at address. + * @param newValue The value to write to address if oldValue compares true. + * @param address The 4-byte aligned address of the data to update atomically. + * @result true if newValue was written to the address. + */ +extern Boolean OSCompareAndSwap( + UInt32 oldValue, + UInt32 newValue, + volatile UInt32 * address); +#define OSCompareAndSwap(a, b, c) \ + (OSCompareAndSwap(a, b, SAFE_CAST_PTR(volatile UInt32*,c))) + +/*! + * @function OSCompareAndSwapPtr + * + * @abstract + * Compare and swap operation, performed atomically with respect to all devices that participate in the coherency architecture of the platform. + * + * @discussion + * The OSCompareAndSwapPtr function compares the pointer-sized value at the specified address with oldVal. The value of newValue is written to the address only if oldValue and the value at the address are equal. OSCompareAndSwapPtr returns true if newValue is written to the address; otherwise, it returns false. + * + * This function guarantees atomicity only with main system memory. It is specifically unsuitable for use on noncacheable memory such as that in devices; this function cannot guarantee atomicity, for example, on memory mapped from a PCI device. Additionally, this function incorporates a memory barrier on systems with weakly-ordered memory architectures. + * @param oldValue The pointer value to compare at address. + * @param newValue The pointer value to write to address if oldValue compares true. + * @param address The pointer-size aligned address of the data to update atomically. + * @result true if newValue was written to the address. + */ +extern Boolean OSCompareAndSwapPtr( + void * oldValue, + void * newValue, + void * volatile * address); +#define OSCompareAndSwapPtr(a, b, c) \ + (OSCompareAndSwapPtr(a, b, SAFE_CAST_PTR(void * volatile *,c))) + +/*! + * @function OSAddAtomic + * + * @abstract + * 32-bit add operation, performed atomically with respect to all devices that participate in the coherency architecture of the platform. + * + * @discussion + * The OSAddAtomic function adds the specified amount to the value at the specified address and returns the original value. + * + * This function guarantees atomicity only with main system memory. It is specifically unsuitable for use on noncacheable memory such as that in devices; this function cannot guarantee atomicity, for example, on memory mapped from a PCI device. Additionally, this function incorporates a memory barrier on systems with weakly-ordered memory architectures. + * @param amount The amount to add. + * @param address The 4-byte aligned address of the value to update atomically. + * @result The value before the addition + */ +extern SInt32 OSAddAtomic( + SInt32 amount, + volatile SInt32 * address); +#define OSAddAtomic(a, b) \ + (OSAddAtomic(a, SAFE_CAST_PTR(volatile SInt32*,b))) + +/*! + * @function OSAddAtomic16 + * + * @abstract + * 16-bit add operation, performed atomically with respect to all devices that participate in the coherency architecture of the platform. + * + * @discussion + * The OSAddAtomic16 function adds the specified amount to the value at the specified address and returns the original value. + * + * This function guarantees atomicity only with main system memory. It is specifically unsuitable for use on noncacheable memory such as that in devices; this function cannot guarantee atomicity, for example, on memory mapped from a PCI device. Additionally, this function incorporates a memory barrier on systems with weakly-ordered memory architectures. + * @param amount The amount to add. + * @param address The 2-byte aligned address of the value to update atomically. + * @result The value before the addition + */ +extern SInt16 OSAddAtomic16( + SInt32 amount, + volatile SInt16 * address); -/*! @function OSBitXorAtomic - @abstract 32-bit logical xor operation, performed atomically with respect to all devices that participate in the coherency architecture of the platform. +/*! + * @function OSAddAtomic8 + * + * @abstract + * 8-bit add operation, performed atomically with respect to all devices that participate in the coherency architecture of the platform. + * + * @discussion + * The OSAddAtomic8 function adds the specified amount to the value at the specified address and returns the original value. + * + * This function guarantees atomicity only with main system memory. It is specifically unsuitable for use on noncacheable memory such as that in devices; this function cannot guarantee atomicity, for example, on memory mapped from a PCI device. Additionally, this function incorporates a memory barrier on systems with weakly-ordered memory architectures. + * @param amount The amount to add. + * @param address The address of the value to update atomically. + * @result The value before the addition. + */ +extern SInt8 OSAddAtomic8( + SInt32 amount, + volatile SInt8 * address); - This function guarantees atomicity only with main system memory. It is specifically unsuitable for use on noncacheable memory such as that in devices; this function cannot guarantee atomicity, for example, on memory mapped from a PCI device. - @discussion The OSBitXorAtomic function logically xors the bits of the specified mask into the value at the specified address and returns the original value. - @param mask The mask to logically or with the value. - @param address The 4-byte aligned address of the value to update atomically. - @result The value before the bitwise operation. */ +/*! + * @function OSIncrementAtomic + * + * @abstract + * 32-bit increment operation, performed atomically with respect to all devices that participate in the coherency architecture of the platform. + * + * @discussion + * The OSIncrementAtomic function increments the value at the specified address by one and returns the original value. + * + * This function guarantees atomicity only with main system memory. It is specifically unsuitable for use on noncacheable memory such as that in devices; this function cannot guarantee atomicity, for example, on memory mapped from a PCI device. + * @param address The 4-byte aligned address of the value to update atomically. + * @result The value before the increment. + */ +extern SInt32 OSIncrementAtomic(volatile SInt32 * address); +#define OSIncrementAtomic(a) \ + (OSIncrementAtomic(SAFE_CAST_PTR(volatile SInt32*,a))) -extern UInt32 OSBitXorAtomic(UInt32 mask, volatile UInt32 * address); +/*! + * @function OSIncrementAtomic16 + * + * @abstract + * 16-bit increment operation, performed atomically with respect to all devices that participate in the coherency architecture of the platform. + * + * @discussion + * The OSIncrementAtomic16 function increments the value at the specified address by one and returns the original value. + * + * This function guarantees atomicity only with main system memory. It is specifically unsuitable for use on noncacheable memory such as that in devices; this function cannot guarantee atomicity, for example, on memory mapped from a PCI device. Additionally, this function incorporates a memory barrier on systems with weakly-ordered memory architectures. + * @param address The 2-byte aligned address of the value to update atomically. + * @result The value before the increment. + */ +extern SInt16 OSIncrementAtomic16(volatile SInt16 * address); -/*! @function OSBitXorAtomic16 - @abstract 16-bit logical xor operation, performed atomically with respect to all devices that participate in the coherency architecture of the platform. - @discussion The OSBitXorAtomic16 function logically xors the bits of the specified mask into the value at the specified address and returns the original value. +/*! + * @function OSIncrementAtomic8 + * + * @abstract + * 8-bit increment operation, performed atomically with respect to all devices that participate in the coherency architecture of the platform. + * + * @discussion + * The OSIncrementAtomic8 function increments the value at the specified address by one and returns the original value. + * + * This function guarantees atomicity only with main system memory. It is specifically unsuitable for use on noncacheable memory such as that in devices; this function cannot guarantee atomicity, for example, on memory mapped from a PCI device. Additionally, this function incorporates a memory barrier on systems with weakly-ordered memory architectures. + * @param address The address of the value to update atomically. + * @result The value before the increment. + */ +extern SInt8 OSIncrementAtomic8(volatile SInt8 * address); - This function guarantees atomicity only with main system memory. It is specifically unsuitable for use on noncacheable memory such as that in devices; this function cannot guarantee atomicity, for example, on memory mapped from a PCI device. - @param mask The mask to logically or with the value. - @param address The 2-byte aligned address of the value to update atomically. - @result The value before the bitwise operation. */ +/*! + * @function OSDecrementAtomic + * + * @abstract + * 32-bit decrement operation, performed atomically with respect to all devices that participate in the coherency architecture of the platform. + * + * @discussion + * The OSDecrementAtomic function decrements the value at the specified address by one and returns the original value. + * + * This function guarantees atomicity only with main system memory. It is specifically unsuitable for use on noncacheable memory such as that in devices; this function cannot guarantee atomicity, for example, on memory mapped from a PCI device. Additionally, this function incorporates a memory barrier on systems with weakly-ordered memory architectures. + * @param address The 4-byte aligned address of the value to update atomically. + * @result The value before the decrement. + */ +extern SInt32 OSDecrementAtomic(volatile SInt32 * address); +#define OSDecrementAtomic(a) \ + (OSDecrementAtomic(SAFE_CAST_PTR(volatile SInt32*,a))) -extern UInt16 OSBitXorAtomic16(UInt32 mask, volatile UInt16 * address); +/*! + * @function OSDecrementAtomic16 + * + * @abstract + * 16-bit decrement operation, performed atomically with respect to all devices that participate in the coherency architecture of the platform. + * + * @discussion + * The OSDecrementAtomic16 function decrements the value at the specified address by one and returns the original value. + * + * This function guarantees atomicity only with main system memory. It is specifically unsuitable for use on noncacheable memory such as that in devices; this function cannot guarantee atomicity, for example, on memory mapped from a PCI device. Additionally, this function incorporates a memory barrier on systems with weakly-ordered memory architectures. + * @param address The 2-byte aligned address of the value to update atomically. + * @result The value before the decrement. + */ +extern SInt16 OSDecrementAtomic16(volatile SInt16 * address); -/*! @function OSBitXorAtomic8 - @abstract 8-bit logical xor operation, performed atomically with respect to all devices that participate in the coherency architecture of the platform. +/*! + * @function OSDecrementAtomic8 + * + * @abstract + * 8-bit decrement operation, performed atomically with respect to all devices that participate in the coherency architecture of the platform. + * + * @discussion + * The OSDecrementAtomic8 function decrements the value at the specified address by one and returns the original value. + * + * This function guarantees atomicity only with main system memory. It is specifically unsuitable for use on noncacheable memory such as that in devices; this function cannot guarantee atomicity, for example, on memory mapped from a PCI device. Additionally, this function incorporates a memory barrier on systems with weakly-ordered memory architectures. + * @param address The address of the value to update atomically. + * @result The value before the decrement. + */ +extern SInt8 OSDecrementAtomic8(volatile SInt8 * address); - This function guarantees atomicity only with main system memory. It is specifically unsuitable for use on noncacheable memory such as that in devices; this function cannot guarantee atomicity, for example, on memory mapped from a PCI device. - @discussion The OSBitXorAtomic8 function logically xors the bits of the specified mask into the value at the specified address and returns the original value. - @param mask The mask to logically or with the value. - @param address The address of the value to update atomically. - @result The value before the bitwise operation. */ +/*! + * @function OSBitAndAtomic + * + * @abstract + * 32-bit logical and operation, performed atomically with respect to all devices that participate in the coherency architecture of the platform. + * + * @discussion + * The OSBitAndAtomic function logically ands the bits of the specified mask into the value at the specified address and returns the original value. + * + * This function guarantees atomicity only with main system memory. It is specifically unsuitable for use on noncacheable memory such as that in devices; this function cannot guarantee atomicity, for example, on memory mapped from a PCI device. Additionally, this function incorporates a memory barrier on systems with weakly-ordered memory architectures. + * @param mask The mask to logically and with the value. + * @param address The 4-byte aligned address of the value to update atomically. + * @result The value before the bitwise operation + */ +extern UInt32 OSBitAndAtomic( + UInt32 mask, + volatile UInt32 * address); +#define OSBitAndAtomic(a, b) \ + (OSBitAndAtomic(a, SAFE_CAST_PTR(volatile UInt32*,b))) + +/*! + * @function OSBitAndAtomic16 + * + * @abstract + * 16-bit logical and operation, performed atomically with respect to all devices that participate in the coherency architecture of the platform. + * + * @discussion + * The OSBitAndAtomic16 function logically ands the bits of the specified mask into the value at the specified address and returns the original value. + * + * This function guarantees atomicity only with main system memory. It is specifically unsuitable for use on noncacheable memory such as that in devices; this function cannot guarantee atomicity, for example, on memory mapped from a PCI device. Additionally, this function incorporates a memory barrier on systems with weakly-ordered memory architectures. + * @param mask The mask to logically and with the value. + * @param address The 2-byte aligned address of the value to update atomically. + * @result The value before the bitwise operation. + */ +extern UInt16 OSBitAndAtomic16( + UInt32 mask, + volatile UInt16 * address); -extern UInt8 OSBitXorAtomic8(UInt32 mask, volatile UInt8 * address); +/*! + * @function OSBitAndAtomic8 + * + * @abstract + * 8-bit logical and operation, performed atomically with respect to all devices that participate in the coherency architecture of the platform. + * + * @discussion + * The OSBitAndAtomic8 function logically ands the bits of the specified mask into the value at the specified address and returns the original value. + * + * This function guarantees atomicity only with main system memory. It is specifically unsuitable for use on noncacheable memory such as that in devices; this function cannot guarantee atomicity, for example, on memory mapped from a PCI device. Additionally, this function incorporates a memory barrier on systems with weakly-ordered memory architectures. + * @param mask The mask to logically and with the value. + * @param address The address of the value to update atomically. + * @result The value before the bitwise operation. + */ +extern UInt8 OSBitAndAtomic8( + UInt32 mask, + volatile UInt8 * address); -/*! @function OSTestAndSet - @abstract Bit test and set operation, performed atomically with respect to all devices that participate in the coherency architecture of the platform. +/*! + * @function OSBitOrAtomic + * + * @abstract + * 32-bit logical or operation, performed atomically with respect to all devices that participate in the coherency architecture of the platform. + * + * @discussion + * The OSBitOrAtomic function logically ors the bits of the specified mask into the value at the specified address and returns the original value. + * + * This function guarantees atomicity only with main system memory. It is specifically unsuitable for use on noncacheable memory such as that in devices; this function cannot guarantee atomicity, for example, on memory mapped from a PCI device. Additionally, this function incorporates a memory barrier on systems with weakly-ordered memory architectures. + * @param mask The mask to logically or with the value. + * @param address The 4-byte aligned address of the value to update atomically. + * @result The value before the bitwise operation. + */ +extern UInt32 OSBitOrAtomic( + UInt32 mask, + volatile UInt32 * address); +#define OSBitOrAtomic(a, b) \ + (OSBitOrAtomic(a, SAFE_CAST_PTR(volatile UInt32*,b))) + +/*! + * @function OSBitOrAtomic16 + * + * @abstract + * 16-bit logical or operation, performed atomically with respect to all devices that participate in the coherency architecture of the platform. + * + * @discussion + * The OSBitOrAtomic16 function logically ors the bits of the specified mask into the value at the specified address and returns the original value. + * + * This function guarantees atomicity only with main system memory. It is specifically unsuitable for use on noncacheable memory such as that in devices; this function cannot guarantee atomicity, for example, on memory mapped from a PCI device. Additionally, this function incorporates a memory barrier on systems with weakly-ordered memory architectures. + * @param mask The mask to logically or with the value. + * @param address The 2-byte aligned address of the value to update atomically. + * @result The value before the bitwise operation. + */ +extern UInt16 OSBitOrAtomic16( + UInt32 mask, + volatile UInt16 * address); - This function guarantees atomicity only with main system memory. It is specifically unsuitable for use on noncacheable memory such as that in devices; this function cannot guarantee atomicity, for example, on memory mapped from a PCI device. - @discussion The OSTestAndSet function sets a single bit in a byte at a specified address. It returns true if the bit was already set, false otherwise. - @param bit The bit number in the range 0 through 7. - @param address The address of the byte to update atomically. - @result true if the bit was already set, false otherwise. */ +/*! + * @function OSBitOrAtomic8 + * + * @abstract + * 8-bit logical or operation, performed atomically with respect to all devices that participate in the coherency architecture of the platform. + * + * This function guarantees atomicity only with main system memory. It is specifically unsuitable for use on noncacheable memory such as that in devices; this function cannot guarantee atomicity, for example, on memory mapped from a PCI device. Additionally, this function incorporates a memory barrier on systems with weakly-ordered memory architectures. + * + * @discussion + * The OSBitOrAtomic8 function logically ors the bits of the specified mask into the value at the specified address and returns the original value. + * @param mask The mask to logically or with the value. + * @param address The address of the value to update atomically. + * @result The value before the bitwise operation. + */ +extern UInt8 OSBitOrAtomic8( + UInt32 mask, + volatile UInt8 * address); -extern Boolean OSTestAndSet(UInt32 bit, volatile UInt8 * startAddress); +/*! + * @function OSBitXorAtomic + * + * @abstract + * 32-bit logical xor operation, performed atomically with respect to all devices that participate in the coherency architecture of the platform. + * + * This function guarantees atomicity only with main system memory. It is specifically unsuitable for use on noncacheable memory such as that in devices; this function cannot guarantee atomicity, for example, on memory mapped from a PCI device. Additionally, this function incorporates a memory barrier on systems with weakly-ordered memory architectures. + * + * @discussion + * The OSBitXorAtomic function logically xors the bits of the specified mask into the value at the specified address and returns the original value. + * @param mask The mask to logically or with the value. + * @param address The 4-byte aligned address of the value to update atomically. + * @result The value before the bitwise operation. + */ +extern UInt32 OSBitXorAtomic( + UInt32 mask, + volatile UInt32 * address); +#define OSBitXorAtomic(a, b) \ + (OSBitXorAtomic(a, SAFE_CAST_PTR(volatile UInt32*,b))) + +/*! + * @function OSBitXorAtomic16 + * + * @abstract + * 16-bit logical xor operation, performed atomically with respect to all devices that participate in the coherency architecture of the platform. + * + * @discussion + * The OSBitXorAtomic16 function logically xors the bits of the specified mask into the value at the specified address and returns the original value. + * + * This function guarantees atomicity only with main system memory. It is specifically unsuitable for use on noncacheable memory such as that in devices; this function cannot guarantee atomicity, for example, on memory mapped from a PCI device. Additionally, this function incorporates a memory barrier on systems with weakly-ordered memory architectures. + * @param mask The mask to logically or with the value. + * @param address The 2-byte aligned address of the value to update atomically. + * @result The value before the bitwise operation. + */ +extern UInt16 OSBitXorAtomic16( + UInt32 mask, + volatile UInt16 * address); -/*! @function OSTestAndClear - @abstract Bit test and clear operation, performed atomically with respect to all devices that participate in the coherency architecture of the platform. - @discussion The OSTestAndClear function clears a single bit in a byte at a specified address. It returns true if the bit was already clear, false otherwise. +/*! + * @function OSBitXorAtomic8 + * + * @abstract + * 8-bit logical xor operation, performed atomically with respect to all devices that participate in the coherency architecture of the platform. + * + * This function guarantees atomicity only with main system memory. It is specifically unsuitable for use on noncacheable memory such as that in devices; this function cannot guarantee atomicity, for example, on memory mapped from a PCI device. Additionally, this function incorporates a memory barrier on systems with weakly-ordered memory architectures. + * + * @discussion + * The OSBitXorAtomic8 function logically xors the bits of the specified mask into the value at the specified address and returns the original value. + * @param mask The mask to logically or with the value. + * @param address The address of the value to update atomically. + * @result The value before the bitwise operation. + */ +extern UInt8 OSBitXorAtomic8( + UInt32 mask, + volatile UInt8 * address); - This function guarantees atomicity only with main system memory. It is specifically unsuitable for use on noncacheable memory such as that in devices; this function cannot guarantee atomicity, for example, on memory mapped from a PCI device. - @param bit The bit number in the range 0 through 7. - @param address The address of the byte to update atomically. - @result true if the bit was already clear, false otherwise. */ +/*! + * @function OSTestAndSet + * + * @abstract + * Bit test and set operation, performed atomically with respect to all devices that participate in the coherency architecture of the platform. + * + * This function guarantees atomicity only with main system memory. It is specifically unsuitable for use on noncacheable memory such as that in devices; this function cannot guarantee atomicity, for example, on memory mapped from a PCI device. Additionally, this function incorporates a memory barrier on systems with weakly-ordered memory architectures. + * + * @discussion + * The OSTestAndSet function sets a single bit in a byte at a specified address. It returns true if the bit was already set, false otherwise. + * @param bit The bit number in the range 0 through 7. + * @param startAddress The address of the byte to update atomically. + * @result true if the bit was already set, false otherwise. + */ +extern Boolean OSTestAndSet( + UInt32 bit, + volatile UInt8 * startAddress); -extern Boolean OSTestAndClear(UInt32 bit, volatile UInt8 * startAddress); +/*! + * @function OSTestAndClear + * + * @abstract + * Bit test and clear operation, performed atomically with respect to all devices that participate in the coherency architecture of the platform. + * + * @discussion + * The OSTestAndClear function clears a single bit in a byte at a specified address. It returns true if the bit was already clear, false otherwise. + * + * This function guarantees atomicity only with main system memory. It is specifically unsuitable for use on noncacheable memory such as that in devices; this function cannot guarantee atomicity, for example, on memory mapped from a PCI device. Additionally, this function incorporates a memory barrier on systems with weakly-ordered memory architectures. + * @param bit The bit number in the range 0 through 7. + * @param startAddress The address of the byte to update atomically. + * @result true if the bit was already clear, false otherwise. + */ +extern Boolean OSTestAndClear( + UInt32 bit, + volatile UInt8 * startAddress); #ifdef __ppc__ -/*! @function OSEnqueueAtomic - @abstract Singly linked list head insertion, performed atomically with respect to all devices that participate in the coherency architecture of the platform. - @discussion The OSEnqueueAtomic function places an element at the head of a single linked list, which is specified with the address of a head pointer, listHead. The element structure has a next field whose offset is specified. - - This function guarantees atomicity only with main system memory. It is specifically unsuitable for use on noncacheable memory such as that in devices; this function cannot guarantee atomicity, for example, on memory mapped from a PCI device. - @param listHead The address of a head pointer for the list . - @param element The list element to insert at the head of the list. - @param elementNextFieldOffset The byte offset into the element where a pointer to the next element in the list is stored. */ - -extern void OSEnqueueAtomic(void * volatile * listHead, void * element, - SInt32 elementNextFieldOffset); - -/*! @function OSDequeueAtomic - @abstract Singly linked list element head removal, performed atomically with respect to all devices that participate in the coherency architecture of the platform. - @discussion The OSDequeueAtomic function removes an element from the head of a single linked list, which is specified with the address of a head pointer, listHead. The element structure has a next field whose offset is specified. - - This function guarantees atomicity only with main system memory. It is specifically unsuitable for use on noncacheable memory such as that in devices; this function cannot guarantee atomicity, for example, on memory mapped from a PCI device. - @param listHead The address of a head pointer for the list . - @param elementNextFieldOffset The byte offset into the element where a pointer to the next element in the list is stored. - @result A removed element, or zero if the list is empty. */ +/*! + * @function OSEnqueueAtomic + * + * @abstract + * Singly linked list head insertion, performed atomically with respect to all devices that participate in the coherency architecture of the platform. + * + * @discussion + * The OSEnqueueAtomic function places an element at the head of a single linked list, which is specified with the address of a head pointer, listHead. The element structure has a next field whose offset is specified. + * + * This function guarantees atomicity only with main system memory. It is specifically unsuitable for use on noncacheable memory such as that in devices; this function cannot guarantee atomicity, for example, on memory mapped from a PCI device. Additionally, this function incorporates a memory barrier on systems with weakly-ordered memory architectures. + * @param listHead The address of a head pointer for the list . + * @param element The list element to insert at the head of the list. + * @param elementNextFieldOffset The byte offset into the element where a pointer to the next element in the list is stored. + */ +extern void OSEnqueueAtomic( + void * volatile * listHead, + void * element, + SInt32 elementNextFieldOffset); -extern void * OSDequeueAtomic(void * volatile * listHead, - SInt32 elementNextFieldOffset); +/*! + * @function OSDequeueAtomic + * + * @abstract + * Singly linked list element head removal, performed atomically with respect to all devices that participate in the coherency architecture of the platform. + * + * @discussion + * The OSDequeueAtomic function removes an element from the head of a single linked list, which is specified with the address of a head pointer, listHead. The element structure has a next field whose offset is specified. + * + * This function guarantees atomicity only with main system memory. It is specifically unsuitable for use on noncacheable memory such as that in devices; this function cannot guarantee atomicity, for example, on memory mapped from a PCI device. Additionally, this function incorporates a memory barrier on systems with weakly-ordered memory architectures. + * @param listHead The address of a head pointer for the list . + * @param elementNextFieldOffset The byte offset into the element where a pointer to the next element in the list is stored. + * @result A removed element, or zero if the list is empty. + */ +extern void * OSDequeueAtomic( + void * volatile * listHead, + SInt32 elementNextFieldOffset); #endif /* __ppc__ */ -/*! @function OSSynchronizeIO - @abstract The OSSynchronizeIO routine ensures orderly load and store operations to noncached memory mapped I/O devices. - @discussion The OSSynchronizeIO routine ensures orderly load and store operations to noncached memory mapped I/O devices. It executes the eieio instruction on PowerPC processors. */ - -#if defined(__arm__) && defined(__thumb__) -extern void OSSynchronizeIO(void); -#else +/*! + * @function OSSynchronizeIO + * + * @abstract + * The OSSynchronizeIO routine ensures orderly load and store operations to noncached memory mapped I/O devices. + * + * @discussion + * The OSSynchronizeIO routine ensures orderly load and store operations to noncached memory mapped I/O devices. It executes the eieio instruction on PowerPC processors. + */ static __inline__ void OSSynchronizeIO(void) { #if defined(__ppc__) __asm__ ("eieio"); #endif } -#endif #if defined(__cplusplus) } diff --git a/libkern/libkern/OSByteOrder.h b/libkern/libkern/OSByteOrder.h index 48659ca20..cb12cb31a 100644 --- a/libkern/libkern/OSByteOrder.h +++ b/libkern/libkern/OSByteOrder.h @@ -179,9 +179,9 @@ _OSWriteInt64( /* Host endianess to big endian byte swapping macros for constants. */ -#define OSSwapHostToBigConstInt16(x) (x) -#define OSSwapHostToBigConstInt32(x) (x) -#define OSSwapHostToBigConstInt64(x) (x) +#define OSSwapHostToBigConstInt16(x) ((uint16_t)(x)) +#define OSSwapHostToBigConstInt32(x) ((uint32_t)(x)) +#define OSSwapHostToBigConstInt64(x) ((uint64_t)(x)) /* Generic host endianess to big endian byte swapping functions. */ @@ -203,9 +203,9 @@ _OSWriteInt64( /* Big endian to host endianess byte swapping macros for constants. */ -#define OSSwapBigToHostConstInt16(x) (x) -#define OSSwapBigToHostConstInt32(x) (x) -#define OSSwapBigToHostConstInt64(x) (x) +#define OSSwapBigToHostConstInt16(x) ((uint16_t)(x)) +#define OSSwapBigToHostConstInt32(x) ((uint32_t)(x)) +#define OSSwapBigToHostConstInt64(x) ((uint64_t)(x)) /* Generic big endian to host endianess byte swapping functions. */ @@ -265,9 +265,9 @@ _OSWriteInt64( /* Host endianess to little endian byte swapping macros for constants. */ -#define OSSwapHostToLittleConstInt16(x) (x) -#define OSSwapHostToLittleConstInt32(x) (x) -#define OSSwapHostToLittleConstInt64(x) (x) +#define OSSwapHostToLittleConstInt16(x) ((uint16_t)(x)) +#define OSSwapHostToLittleConstInt32(x) ((uint32_t)(x)) +#define OSSwapHostToLittleConstInt64(x) ((uint64_t)(x)) /* Generic host endianess to little endian byte swapping functions. */ @@ -289,9 +289,9 @@ _OSWriteInt64( /* Little endian to host endianess byte swapping macros for constants. */ -#define OSSwapLittleToHostConstInt16(x) (x) -#define OSSwapLittleToHostConstInt32(x) (x) -#define OSSwapLittleToHostConstInt64(x) (x) +#define OSSwapLittleToHostConstInt16(x) ((uint16_t)(x)) +#define OSSwapLittleToHostConstInt32(x) ((uint32_t)(x)) +#define OSSwapLittleToHostConstInt64(x) ((uint64_t)(x)) /* Generic little endian to host endianess byte swapping functions. */ diff --git a/libkern/libkern/OSDebug.h b/libkern/libkern/OSDebug.h index afa0af160..eaeefc129 100644 --- a/libkern/libkern/OSDebug.h +++ b/libkern/libkern/OSDebug.h @@ -36,17 +36,25 @@ #define _OS_OSDEBBUG_H #include +#include __BEGIN_DECLS extern int log_leaks; /* Use kernel_debug() to log a backtrace */ -extern void trace_backtrace(unsigned int debugid, unsigned int debugid2, int size, int data); +extern void trace_backtrace(unsigned int debugid, unsigned int debugid2, unsigned long size, unsigned long data); /* Report a message with a 4 entry backtrace - very slow */ extern void OSReportWithBacktrace(const char *str, ...); extern unsigned OSBacktrace(void **bt, unsigned maxAddrs); +/*! @function OSKernelStackRemaining + @abstract Returns bytes available below the current stack frame. + @discussion Returns bytes available below the current stack frame. Safe for interrupt or thread context. + @result Approximate byte count available. */ + +vm_offset_t OSKernelStackRemaining( void ); + __END_DECLS #define TRACE_MACHLEAKS(a,b,c,d) \ diff --git a/libkern/libkern/OSKextLib.h b/libkern/libkern/OSKextLib.h new file mode 100644 index 000000000..9842b7546 --- /dev/null +++ b/libkern/libkern/OSKextLib.h @@ -0,0 +1,903 @@ +/* + * Copyright (c) 2008 Apple Inc. All rights reserved. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. The rights granted to you under the License + * may not be used to create, or enable the creation or redistribution of, + * unlawful or unlicensed copies of an Apple operating system, or to + * circumvent, violate, or enable the circumvention or violation of, any + * terms of an Apple operating system software license agreement. + * + * Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ + */ + +#ifndef _LIBKERN_OSKEXTLIB_H +#define _LIBKERN_OSKEXTLIB_H + +#include +__BEGIN_DECLS + +#include +#include +#include + +#ifdef KERNEL +#include +#include +#else +#include +#include +#endif /* KERNEL */ + +/*! + * @header + * + * Declares functions, basic return values, and other constants + * related to kernel extensions (kexts). + */ + +#if PRAGMA_MARK +#pragma mark - +/********************************************************************/ +#pragma mark OSReturn Values for Kernel Extensions +/********************************************************************/ +#endif +/*! + * @group OSReturn Values for Kernel Extensions + * Many kext-related functions return these values, + * as well as those defined under + * @link //apple_ref/c/tdef/OSReturn OSReturn@/link + * and other variants of kern_return_t. + */ + +#ifdef XNU_KERNEL_PRIVATE +/********************************************************************* +* Check libsyscall/mach/err_libkern.sub when editing or adding +* result codes! +*********************************************************************/ +#endif /* XNU_KERNEL_PRIVATE */ + +#define sub_libkern_kext err_sub(2) +#define libkern_kext_err(code) (sys_libkern|sub_libkern_kext|(code)) + + +/*! + * @define kOSKextReturnInternalError + * @abstract An internal error in the kext library. + * Contrast with @link //apple_ref/c/econst/OSReturnError + * OSReturnError@/link. + */ +#define kOSKextReturnInternalError libkern_kext_err(0x1) + +/*! + * @define kOSKextReturnNoMemory + * @abstract Memory allocation failed. + */ +#define kOSKextReturnNoMemory libkern_kext_err(0x2) + +/*! + * @define kOSKextReturnNoResources + * @abstract Some resource other than memory (such as available load tags) + * is exhausted. + */ +#define kOSKextReturnNoResources libkern_kext_err(0x3) + +/*! + * @define kOSKextReturnNotPrivileged + * @abstract The caller lacks privileges to perform the requested operation. + */ +#define kOSKextReturnNotPrivileged libkern_kext_err(0x4) + +/*! + * @define kOSKextReturnInvalidArgument + * @abstract Invalid argument. + */ +#define kOSKextReturnInvalidArgument libkern_kext_err(0x5) + +/*! + * @define kOSKextReturnNotFound + * @abstract Search item not found. + */ +#define kOSKextReturnNotFound libkern_kext_err(0x6) + +/*! + * @define kOSKextReturnBadData + * @abstract Malformed data (not used for XML). + */ +#define kOSKextReturnBadData libkern_kext_err(0x7) + +/*! + * @define kOSKextReturnSerialization + * @abstract Error converting or (un)serializing URL, string, or XML. + */ +#define kOSKextReturnSerialization libkern_kext_err(0x8) + +/*! + * @define kOSKextReturnUnsupported + * @abstract Operation is no longer or not yet supported. + */ +#define kOSKextReturnUnsupported libkern_kext_err(0x9) + +/*! + * @define kOSKextReturnDisabled + * @abstract Operation is currently disabled. + */ +#define kOSKextReturnDisabled libkern_kext_err(0xa) + +/*! + * @define kOSKextReturnNotAKext + * @abstract Bundle is not a kernel extension. + */ +#define kOSKextReturnNotAKext libkern_kext_err(0xb) + +/*! + * @define kOSKextReturnValidation + * @abstract Validation failures encountered; check diagnostics for details. + */ +#define kOSKextReturnValidation libkern_kext_err(0xc) + +/*! + * @define kOSKextReturnAuthentication + * @abstract Authetication failures encountered; check diagnostics for details. + */ +#define kOSKextReturnAuthentication libkern_kext_err(0xd) + +/*! + * @define kOSKextReturnDependencies + * @abstract Dependency resolution failures encountered; check diagnostics for details. + */ +#define kOSKextReturnDependencies libkern_kext_err(0xe) + +/*! + * @define kOSKextReturnArchNotFound + * @abstract Kext does not contain code for the requested architecture. + */ +#define kOSKextReturnArchNotFound libkern_kext_err(0xf) + +/*! + * @define kOSKextReturnCache + * @abstract An error occurred processing a system kext cache. + */ +#define kOSKextReturnCache libkern_kext_err(0x10) + +/*! + * @define kOSKextReturnDeferred + * @abstract Operation has been posted asynchronously to user space (kernel only). + */ +#define kOSKextReturnDeferred libkern_kext_err(0x11) + +/*! + * @define kOSKextReturnBootLevel + * @abstract Kext not loadable or operation not allowed at current boot level. + */ +#define kOSKextReturnBootLevel libkern_kext_err(0x12) + +/*! + * @define kOSKextReturnNotLoadable + * @abstract Kext cannot be loaded; check diagnostics for details. + */ +#define kOSKextReturnNotLoadable libkern_kext_err(0x13) + +/*! + * @define kOSKextReturnLoadedVersionDiffers + * @abstract A different version (or executable UUID, or executable by checksum) + * of the requested kext is already loaded. + */ +#define kOSKextReturnLoadedVersionDiffers libkern_kext_err(0x14) + +/*! + * @define kOSKextReturnDependencyLoadError + * @abstract A load error occurred on a dependency of the kext being loaded. + */ +#define kOSKextReturnDependencyLoadError libkern_kext_err(0x15) + +/*! + * @define kOSKextReturnLinkError + * @abstract A link failure occured with this kext or a dependency. + */ +#define kOSKextReturnLinkError libkern_kext_err(0x16) + +/*! + * @define kOSKextReturnStartStopError + * @abstract The kext start or stop routine returned an error. + */ +#define kOSKextReturnStartStopError libkern_kext_err(0x17) + +/*! + * @define kOSKextReturnInUse + * @abstract The kext is currently in use or has outstanding references, + * and cannot be unloaded. + */ +#define kOSKextReturnInUse libkern_kext_err(0x18) + +/*! + * @define kOSKextReturnTimeout + * @abstract A kext request has timed out. + */ +#define kOSKextReturnTimeout libkern_kext_err(0x19) + +/*! + * @define kOSKextReturnStopping + * @abstract The kext is in the process of stopping; requests cannot be made. + */ +#define kOSKextReturnStopping libkern_kext_err(0x1a) + +#if PRAGMA_MARK +#pragma mark - +/********************************************************************/ +#pragma mark Kext/OSBundle Property List Keys +/********************************************************************/ +#endif +/*! + * @group Kext Property List Keys + * These constants cover CFBundle properties defined for kernel extensions. + * Because they are used in the kernel, if you want to use one with + * CFBundle APIs you'll need to wrap it in a CFSTR() macro. + */ + +#ifdef KERNEL +/* Define C-string versions of the CFBundle keys for use in the kernel. + */ +#define kCFBundleIdentifierKey "CFBundleIdentifier" +#define kCFBundleVersionKey "CFBundleVersion" +#define kCFBundleNameKey "CFBundleName" +#define kCFBundleExecutableKey "CFBundleExecutable" +#endif /* KERNEL */ + +/*! + * @define kOSBundleCompatibleVersionKey + * @abstract A string giving the backwards-compatible version of a library kext + * in extended Mac OS 'vers' format (####.##.##s{1-255} where 's' + * is a build stage 'd', 'a', 'b', 'f' or 'fc'). + */ +#define kOSBundleCompatibleVersionKey "OSBundleCompatibleVersion" + +/*! + * @define kOSBundleEnableKextLoggingKey + * @abstract Set to true to have the kernel kext logging spec applied + * to the kext. + * See @link //apple_ref/c/econst/OSKextLogSpec + * OSKextLogSpec@/link. + */ +#define kOSBundleEnableKextLoggingKey "OSBundleEnableKextLogging" + +/*! + * @define kOSBundleIsInterfaceKey + * @abstract A boolean value indicating whether the kext executable + * contains only symbol references. + */ +#define kOSBundleIsInterfaceKey "OSBundleIsInterface" + +/*! + * @define kOSBundleLibrariesKey + * @abstract A dictionary listing link dependencies for this kext. + * Keys are bundle identifiers, values are version strings. + */ +#define kOSBundleLibrariesKey "OSBundleLibraries" + +/*! + * @define kOSBundleRequiredKey + * @abstract A string indicating in which kinds of startup this kext + * may need to load during early startup (before + * @link //apple_ref/doc/man/8/kextd kextcache(8)@/link). + * @discussion + * The value is one of: + *
    + *
  • @link kOSBundleRequiredRoot "OSBundleRequiredRoot"@/link
  • + *
  • @link kOSBundleRequiredLocalRoot "OSBundleRequiredLocalRoot"@/link
  • + *
  • @link kOSBundleRequiredNetworkRoot "OSBundleRequiredNetworkRoot"@/link
  • + *
  • @link kOSBundleRequiredSafeBoot "OSBundleRequiredSafeBoot"@/link
  • + *
  • @link kOSBundleRequiredConsole "OSBundleRequiredConsole"@/link
  • + *
+ * + * Use this property judiciously. + * Every kext that declares a value other than "OSBundleRequiredSafeBoot" + * increases startup time, as the booter must read it into memory, + * or startup kext caches must include it. + */ +#define kOSBundleRequiredKey "OSBundleRequired" + +/*! + * @define kOSBundleAllowUserLoadKey + * @abstract A boolean value indicating whether + * @link //apple_ref/doc/man/8/kextd kextcache(8)@/link + * will honor a non-root process's request to load a kext. + * @discussion + * See @link //apple_ref/doc/compositePage/c/func/KextManagerLoadKextWithURL + * KextManagerLoadKextWithURL@/link + * and @link //apple_ref/doc/compositePage/c/func/KextManagerLoadKextWithIdentifier + * KextManagerLoadKextWithIdentifier@/link. + */ +#define kOSBundleAllowUserLoadKey "OSBundleAllowUserLoad" + +/*! + * @define kOSKernelResourceKey + * @abstract A boolean value indicating whether the kext represents a built-in + * component of the kernel. + */ +#define kOSKernelResourceKey "OSKernelResource" + +/*! + * @define kIOKitPersonalitiesKey + * @abstract A dictionary of dictionaries used in matching for I/O Kit drivers. + */ +#define kIOKitPersonalitiesKey "IOKitPersonalities" + +/* + * @define kIOPersonalityPublisherKey + * @abstract Used in personalities sent to the I/O Kit, + * contains the CFBundleIdentifier of the kext + * that the personality originated in. + */ +#define kIOPersonalityPublisherKey "IOPersonalityPublisher" + + +#if PRAGMA_MARK +/********************************************************************/ +#pragma mark Kext/OSBundle Property Deprecated Keys +/********************************************************************/ +#endif +/* + * @define kOSBundleDebugLevelKey + * @abstract + * Deprecated (used on some releases of Mac OS X prior to 10.6 Snow Leopard). + * Value is an integer from 1-6, corresponding to the verbose levels + * of kext tools on those releases. + * On 10.6 Snow Leopard, use @link OSKextEnableKextLogging + * OSKextEnableKextLogging@/link. + */ +#define kOSBundleDebugLevelKey "OSBundleDebugLevel" + +/*! + * @define kOSBundleSharedExecutableIdentifierKey + * @abstract Deprecated (used on some releases of Mac OS X + * prior to 10.6 Snow Leopard). + * Value is the bundle identifier of the pseudokext + * that contains an executable shared by this kext. + */ +#define kOSBundleSharedExecutableIdentifierKey "OSBundleSharedExecutableIdentifier" + + +#if PRAGMA_MARK +/********************************************************************/ +#pragma mark Kext/OSBundle Property List Values +/********************************************************************/ +#endif + +/*! + * @group Kext Property List Values + * These constants encompass established values + * for kernel extension bundle properties. + */ + +/*! +* @define kOSKextKernelIdentifier +* @abstract +* This is the CFBundleIdentifier user for the kernel itself. +*/ +#define kOSKextKernelIdentifier "__kernel__" + +/*! +* @define kOSBundleRequiredRoot +* @abstract +* This @link kOSBundleRequiredKey OSBundleRequired@/link +* value indicates that the kext may be needed to mount the root filesystem +* whether starting from a local or a network volume. +*/ +#define kOSBundleRequiredRoot "Root" + +/*! +* @define kOSBundleRequiredLocalRoot +* @abstract +* This @link kOSBundleRequiredKey OSBundleRequired@/link +* value indicates that the kext may be needed to mount the root filesystem +* when starting from a local disk. +*/ +#define kOSBundleRequiredLocalRoot "Local-Root" + +/*! +* @define kOSBundleRequiredNetworkRoot +* @abstract +* This @link kOSBundleRequiredKey OSBundleRequired@/link +* value indicates that the kext may be needed to mount the root filesystem +* when starting over a network connection. +*/ +#define kOSBundleRequiredNetworkRoot "Network-Root" + +/*! +* @define kOSBundleRequiredSafeBoot +* @abstract +* This @link kOSBundleRequiredKey OSBundleRequired@/link +* value indicates that the kext can be loaded during a safe startup. +* This value does not normally cause the kext to be read by the booter +* or included in startup kext caches. +*/ +#define kOSBundleRequiredSafeBoot "Safe Boot" + +/*! +* @define kOSBundleRequiredConsole +* @abstract +* This @link kOSBundleRequiredKey OSBundleRequired@/link +* value indicates that the kext may be needed for console access +* (specifically in a single-user startup when +* @link //apple_ref/doc/man/8/kextd kextd(8)@/link. +* does not run) +* and should be loaded during early startup. +*/ +#define kOSBundleRequiredConsole "Console" + + +#if PRAGMA_MARK +#pragma mark - +/********************************************************************/ +#pragma mark Kext Information +/********************************************************************/ +#endif +/*! + * @group Kext Information + * Types, constants, and macros providing a kext with information + * about itself. + */ + +/*! + * @typedef OSKextLoadTag + * + * @abstract + * A unique identifier assigned to a loaded instanace of a kext. + * + * @discussion + * If a kext is unloaded and later reloaded, the new instance + * has a different load tag. + * + * A kext can get its own load tag in the kmod_info_t + * structure passed into its module start routine, as the + * id field (cast to this type). + * You can use the load tag with the functions + * @link OSKextRetainKextWithLoadTag + * OSKextRetainKextWithLoadTag@/link and + * @link OSKextReleaseKextWithLoadTag + * OSKextReleaseKextWithLoadTag@/link. + */ +typedef uint32_t OSKextLoadTag; + +/*! + * @define kOSKextInvalidLoadTag + * + * @abstract + * A load tag value that will never be used for a loaded kext; + * indicates kext not found. + */ +#define kOSKextInvalidLoadTag ((OSKextLoadTag)(-1)) + + +#ifdef KERNEL + +/* Make these visible to kexts only and *not* the kernel. + */ +#if !XNU_KERNEL_PRIVATE + +/*! + * @function OSKextGetCurrentLoadTag + * + * @abstract + * Returns the run-time load tag for the calling kext as an + * @link OSKextLoadTag OSKextLoadTag@/link. + * + * @result + * The run-time load tag for the calling kext as an + * @link OSKextLoadTag@/link. + * + * @discussion + * The load tag identifies this loaded instance of the kext to the kernel + * and to kernel functions that operate on kexts. + */ +OSKextLoadTag OSKextGetCurrentLoadTag(void); + +/*! + * @function OSKextGetCurrentIdentifier + * + * @abstract + * Returns the CFBundleIdentifier for the calling kext as a C string. + * + * @result + * The CFBundleIdentifier for the calling kext as a C string. + */ +const char * OSKextGetCurrentIdentifier(void); + +/*! + * @function OSKextGetCurrentVersionString + * + * @abstract + * Returns the CFBundleVersion for the calling kext as a C string. + * + * @result + * The CFBundleVersion for the calling kext as a C string. + */ +const char * OSKextGetCurrentVersionString(void); + +#endif /* !XNU_KERNEL_PRIVATE */ + +#if PRAGMA_MARK +#pragma mark - +/********************************************************************/ +#pragma mark Kext Loading C Functions +/********************************************************************/ +#endif +/*! + * @group Kext Loading C Functions + * Functions for loading and tracking kexts in the kernel. + */ + +/*! + * @function OSKextLoadKextWithIdentifier + * + * @abstract + * Request that a kext be loaded. + * + * @param kextIdentifier The bundle identifier of the kext to be loaded. + * + * @result + * @link //apple_ref/c/macro/kOSReturnSuccess kOSReturnSuccess@/link + * if the kext was loaded (or was already loaded). + * @link //apple_ref/c/macro/kOSKextReturnDeferred kOSKextReturnDeferred@/link + * if the kext was not found and a request + * was queued to @link //apple_ref/doc/man/8/kextd kextd(8)@/link. + * Other return values indicate a failure to load the kext. + * + * @discussion + * If a kext is already in the kernel but not loaded, it is loaded immediately. + * If it isn't found, an asynchronous load request is + * made to @link //apple_ref/doc/man/8/kextd kextd(8)@/link + * and @link //apple_ref/c/macro/kOSKextReturnDeferred kOSKextReturnDeferred@/link is returned. + * There is no general notification or callback mechanism for load requests. + */ +OSReturn OSKextLoadKextWithIdentifier(const char * kextIdentifier); + + +/*! + * @function OSKextRetainKextWithLoadTag + * + * @abstract + * Retain a loaded kext based on its load tag, + * and enable autounload for that kext. + * + * @param loadTag The load tag of the kext to be retained. + * See @link OSKextGetCurrentLoadTag@/link. + * + * @result + * @link //apple_ref/c/macro/kOSReturnSuccess kOSReturnSuccess@/link + * if the kext was retained. + * @link //apple_ref/c/macro/kOSKextReturnNotFound kOSKextReturnNotFound@/link + * if the kext was not found. + * @link //apple_ref/c/macro/kOSKextReturnInvalidArgument + * kOSKextReturnInvalidArgument@/link + * if loadTag is + * @link kOSKextInvalidLoadTag kOSKextInvalidLoadTag@/link. + * + * @discussion + * Retaining a kext prevents it from being unloaded, + * either explicitly or automatically, and enables autounload for the kext. + * When autounload is enabled, then shortly after the kext's last reference + * is dropped, it will be unloaded if there are no outstanding references to it + * and there are no instances of its Libkern C++ subclasses (if any). + * + * Kexts that define subclasses of + * @link //apple_ref/doc/class/IOService IOService@/link + * have autounload enabled automatically. + * Other kexts can use the reference count to manage automatic unload + * without having to define and create Libkern C++ objects. + * For example, a filesystem kext can retain itself whenever a new mount + * is created, and release itself when a mount is removed. + * When the last mount is removed, the kext will be unloaded after a brief delay. + * + * A kext can get its own load tag using the + * @link OSKextGetCurrentLoadTag@/link. + * + * Kexts should not retain and release other kexts; linkage references + * are accounted for internally. + */ +OSReturn OSKextRetainKextWithLoadTag(OSKextLoadTag loadTag); + + +/*! + * @function OSKextReleaseKextWithLoadTag + * + * @abstract + * Release a loaded kext based on its load tag. + * + * @param loadTag The load tag of the kext to be released. + * See @link OSKextGetCurrentLoadTag@/link. + * + * @result + * @link //apple_ref/c/macro/kOSReturnSuccess kOSReturnSuccess@/link + * if the kext was released. + * @link //apple_ref/c/macro/kOSKextReturnNotFound + * kOSKextReturnNotFound@/link + * if the kext was not found. + * @link //apple_ref/c/macro/kOSKextReturnInvalidArgument + * kOSKextReturnInvalidArgument@/link + * if loadTag is + * @link kOSKextInvalidLoadTag kOSKextInvalidLoadTag@/link. + * + * @discussion + * The kext should have been retained previously via + * @link OSKextRetainKextWithLoadTag@/link. + * + * This function schedules an autounload scan for all kexts. + * When that scan occurs, if a kext has autounload enabled, + * it will be unloaded if there are no outstanding references to it + * and there are no instances of its Libkern C++ classes (if any). + * + * Kexts that define subclasses of + * @link //apple_ref/doc/class/IOService IOService@/link + * have autounload enabled automatically. + * Other kexts can use the reference count to manage automatic unload + * without having to define and create Libkern C++ objects. + * For example, a filesystem kext can be retained whenever a new mount + * is created, and released when a mount is removed. + * When the last mount is removed, the kext will be unloaded after a brief delay. + * + * While the autounload scan takes place after a delay of at least a minute, + * a kext that manages its own reference counts for autounload should + * be prepared to have its module stop function called even while the function + * calling this function is still running. + * + * A kext can get its own load tag using the + * @link OSKextGetCurrentLoadTag@/link. + * + * Kexts should not retain and release other kexts; linkage references + * are accounted for internally. + */ +OSReturn OSKextReleaseKextWithLoadTag(OSKextLoadTag loadTag); + +#if PRAGMA_MARK +/********************************************************************/ +#pragma mark - +#pragma mark Kext Requests +/********************************************************************/ +#endif +/*! + * @typedef OSKextRequestTag + * + * @abstract + * Identifies a kext request made to user space. + */ +typedef uint32_t OSKextRequestTag; + +#define kOSKextRequestTagInvalid ((OSKextRequestTag)-1) + + +/*! + * @typedef OSKextRequestResourceCallback + * + * @abstract + * Invoked to provide results for a kext resource request. + * + * @param requestTag The tag of the request that the callback pertains to. + * @param result The result of the request: + * @link kOSReturnSuccess + * kOSReturnSuccess@/link + * if the request was fulfilled; + * @link kOSKextReturnTimeout + * kOSKextReturnTimeout@/link + * if the request has timed out; + * @link kOSKextReturnStopping + * kOSKextReturnStopping@/link + * if the kext containing the callback + * address for the kext is being unloaded; + * or other values on error. + * @param resourceData A pointer to the requested resource data. + * Owned by the system; the kext should make a copy + * if it needs to keep the data past the callback. + * @param resourceDataLength The length of resourceData. + * @param context The context pointer originally passed to + * @link OSKextRequestResource + * OSKextRequestResource@/link. + */ +typedef void (* OSKextRequestResourceCallback)( + OSKextRequestTag requestTag, + OSReturn result, + const void * resourceData, + uint32_t resourceDataLength, + void * context); + +/*! + * @function OSKextRequestResource + * + * @abstract + * Requests data from a nonlocalized resource file in a kext bundle on disk. + * + * @param kextIdentifier The CFBundleIdentifier of the kext + * from which to read the file. + * @param resourceName The name of the resource file to read. + * @param callback A pointer to a callback function; the address + * must be within a currently-loaded kext. + * @param context A pointer to arbitrary run-time data + * that will be passed to the callback + * when it is invoked. May be NULL. + * @param requestTagOut If non-NULL, + * filled on success with a tag identifying the + * pending request; can be used with + * @link OSKextCancelRequest + * OSKextCancelRequest@/link. + * + * @result + * @link kOSReturnSuccess kOSReturnSuccess@/link + * if the request is successfully queued. + * @link kOSKextReturnInvalidArgument kOSKextReturnInvalidArgument@/link + * if kextIdentifier or resourceName or if + * callback is not an address within a loaded kext executable. + * @link kOSKextReturnStopping kOSKextReturnStopping@/link + * if an unload attempt is being made + * on the kext containing callback. + * Other OSKextReturn... errors are possible. + * + * @discussion + * This function queues a request to the user-space kext daemon + * @link //apple_ref/doc/man/8/kextd kextd(8)@/link; + * requests for resources early in system startup + * will not be fulfilled until that daemon starts. + * Note also that the localization context of the kext daemon + * (namely tha tof the superuser) + * will be used in retrieving resources; + * kext resources intended for use in the kernel + * should generally not be localized. + * + * callback is guaranteed to be invoked except when: + *
    + *
  • @link OSKextCancelRequest OSKextCancelRequest@/link + * is used to cancel the request. + * In this case the kext gets the context pointer + * and can clean it up.
  • + *
  • The request is made during a kext's module start routine + * and the start routine returns an error. + * In this case, callbacks cannot be safely invoked, so + * the kext should clean up all request contexts + * when returning the error from the start routine.
  • + *
+ * + * Kexts with pending requests are not subject to autounload, + * but requests are subject to timeout after a few minutes. + * If that amount of time passes with no response from user space, + * callback is invoked with a result of. + * @link kOSKextReturnTimeout kOSKextReturnTimeout@/link. + * + * Kexts that are explicitly unloaded have all pending request callbacks + * invoked with a result of + * @link kOSKextReturnStopping kOSKextReturnStopping@/link. + * The kext must handle these callbacks, + * even if its stop routine will prevent unloading. + * If the kext does prevent unloading, it can reissue resource requests + * outside of the stop function. + */ +OSReturn OSKextRequestResource( + const char * kextIdentifier, + const char * resourceName, + OSKextRequestResourceCallback callback, + void * context, + OSKextRequestTag * requestTagOut); + +/*! + * @function OSKextCancelRequest + * + * @abstract + * Cancels a pending user-space kext request without invoking the callback. + * + * @param requestTag A tag identifying a pending request. + * @param contextOut If non-NULL, filled with the context pointer + * originally passed with the request. + * + * @result + * @link kOSReturnSuccess kOSReturnSuccess@/link + * if the request is successfully canceled. + * @link kOSKextReturnNotFound kOSKextReturnNotFound@/link + * if requestTag does not identify any pending request. + * Other OSKextReturn... errors are possible. + * + * @discussion + * This function cancels a pending request if it exists, + * so that its callback will not be invoked. + * It returns in contextOut + * the context pointer used to create the request + * so that any resources allocated for the request can be cleaned up. + * + * Kexts do not need to cancel outstanding requests + * in their module stop functions; + * when a kext is unloaded, all pending request callbacks + * are invoked with a result of + * @link kOSKextReturnTimeout kOSKextReturnTimeout@/link + * before the stop function is called. + */ +OSReturn OSKextCancelRequest( + OSKextRequestTag requestTag, + void ** contextOut); + + +#if (__x86_64__) + +#if PRAGMA_MARK +#pragma mark - +/********************************************************************/ +#pragma mark Weak linking +/********************************************************************/ +#endif + +/*! + * @group Weak Linking + * Support for weak references to symbols in kexts. + */ + +/*! + * @var gOSKextUnresolved + * + * @abstract + * The value to which a kext's unresolved, weakly-referenced symbols are bound. + * + * @discussion + * A kext must test a weak symbol before using it. A weak symbol + * is only safe to use if it is not equal to gOSKextUnresolved. + * + * Example for a weak symbol named foo: + *
+ * @textblock
+ *      if (&foo != gOSKextUnresolved) {
+ *          foo();
+ *      } else {
+ *          printf("foo() is not supported\n");
+ *      }
+ * @/textblock
+ * 
+ */ +extern const void * gOSKextUnresolved; + +/*! + * @define OSKextSymbolIsResolved + * + * @abstract + * Checks whether a weakly-referenced symbol has been resolved. + * + * @param weak_sym The weak symbol to be tested for resolution. + * + * @result + * TRUE if weak_sym is resolved, or FALSE + * if weak_sym is unresolved. + * + * @discussion + * This is a convenience macro for testing if weak symbols are resolved. + * + * Example for a weak symbol named foo: + *
+ * @textblock
+ *      if (OSKextSymbolIsResolved(foo)) {
+ *          foo();
+ *      } else {
+ *          printf("foo() is not resolved\n");
+ *      }
+ * @/textblock
+ * 
+ */ +#define OSKextSymbolIsResolved(weak_sym) \ + (&(weak_sym) != gOSKextUnresolved) + +#endif /* (__x86_64__) */ + +#endif /* KERNEL */ + +__END_DECLS + +#endif /* _LIBKERN_OSKEXTLIB_H */ diff --git a/libkern/libkern/OSKextLibPrivate.h b/libkern/libkern/OSKextLibPrivate.h new file mode 100644 index 000000000..cc4f3aa29 --- /dev/null +++ b/libkern/libkern/OSKextLibPrivate.h @@ -0,0 +1,792 @@ +/* + * Copyright (c) 1998-2000 Apple Inc. All rights reserved. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. The rights granted to you under the License + * may not be used to create, or enable the creation or redistribution of, + * unlawful or unlicensed copies of an Apple operating system, or to + * circumvent, violate, or enable the circumvention or violation of, any + * terms of an Apple operating system software license agreement. + * + * Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ + */ + +#ifndef _LIBKERN_OSKEXTLIBPRIVATE_H +#define _LIBKERN_OSKEXTLIBPRIVATE_H + +#include + +__BEGIN_DECLS +#ifdef KERNEL +#include +#include +#include +#else +#include +#endif /* KERNEL */ +__END_DECLS + +#include + +__BEGIN_DECLS + +#if PRAGMA_MARK +#pragma mark - +/********************************************************************/ +#pragma mark Misc Constants +/********************************************************************/ +#endif + +typedef uint8_t OSKextExcludeLevel; +#define kOSKextExcludeNone (0) +#define kOSKextExcludeKext (1) +#define kOSKextExcludeAll (2) + +#if PRAGMA_MARK +#pragma mark - +/********************************************************************/ +#pragma mark Kext/OSBundle Property List Keys +/********************************************************************/ +#endif + +/*! + * @define kOSBundleHelperKey + * @abstract Used by loginwindow. + */ +#define kOSBundleHelperKey "OSBundleHelper" + +// properties found in the registry root +#define kOSKernelCPUTypeKey "OSKernelCPUType" +#define kOSKernelCPUSubtypeKey "OSKernelCPUSubtype" +#define kOSStartupMkextCRC "OSStartupMkextCRC" /* value is 32-bit OSData */ +#define kOSPrelinkKextCountKey "OSPrelinkKextCount" /* value is 32-bit OSNumber */ +#define kOSPrelinkPersonalityCountKey "OSPrelinkPersonalityCount" /* value is 32-bit OSNumber */ + +#if PRAGMA_MARK +#pragma mark - +/********************************************************************/ +#pragma mark Load Info Keys +/********************************************************************/ +#endif +/********************************************************************* +* In addition to the keys defined here, you will find: +* CFBundleIdentifier +* CFBundleVersion +* OSBundleCompatibleVersion +* OSKernelResource +* OSBundleInterface +*********************************************************************/ +#define kOSBundleCPUTypeKey "OSBundleCPUType" +#define kOSBundleCPUSubtypeKey "OSBundleCPUSubtype" +#define kOSBundlePathKey "OSBundlePath" +#define kOSBundleUUIDKey "OSBundleUUID" +#define kOSBundleStartedKey "OSBundleStarted" +#define kOSBundlePrelinkedKey "OSBundlePrelinked" +#define kOSBundleLoadTagKey "OSBundleLoadTag" +#define kOSBundleLoadAddressKey "OSBundleLoadAddress" +#define kOSBundleLoadSizeKey "OSBundleLoadSize" +#define kOSBundleWiredSizeKey "OSBundleWiredSize" +#define kOSBundleDependenciesKey "OSBundleDependencies" +#define kOSBundleRetainCountKey "OSBundleRetainCount" + +#define kOSBundleClassesKey "OSBundleClasses" + +#define kOSMetaClassNameKey "OSMetaClassName" +#define kOSMetaClassSuperclassNameKey "OSMetaClassSuperclassName" +#define kOSMetaClassTrackingCountKey "OSMetaClassTrackingCount" + +#if PRAGMA_MARK +#pragma mark - +/********************************************************************/ +#pragma mark Kext Log Specification +/********************************************************************/ +#endif +/*! + * @group Kext Log Specification + * Logging levels & flags for kernel extensions. + * See @link //apple_ref/c/tdef/OSKextLogSpec OSKextLogSpec@/link + * for an overview. + */ + +/*! + * @typedef OSKextLogSpec + * @abstract Describes what a log message applies to, + * or a filter that determines which log messages are displayed. + * + * @discussion + * A kext log specification is a 32-bit value used as a desription of + * what a given log message applies to, or as a filter + * indicating which log messages are desired and which are not. + * A log specification has three parts (described in detail shortly): + *
    + *
  • A level from 0-7 in the lowest-order nibble (0x7).
  • + *
  • A flag bit in the lowest-order nibble (0x8) indicating whether + * log messages tied to individual kexts are always printed (1) + * or printed only if the kext has an + * @link //apple_ref/c/macro/kOSBundleEnableKextLoggingKey + * OSBundleEnableKextLogging@/link set to true. + *
  • A set of activity flags in the remaining nibbles (0xFFFFFFF0), + * which describe areas of activity related to kernel extensions.
  • + *
+ * + * You can specify a log spec to most of the kext tools with the -v option + * and a hex number (rather than the escalating decimal levels 0-6). + * You can also specify a log spec to the kernel with the "kextlog" boot arg + * or "debug.kextlog" sysctl. + * + * Log Levels + * + * The log level spans a range from silent (no log messages) + * to debuging information: + * + *
    + *
  1. Silent - Not applicable to messages; as a filter, do not print any log messages.
  2. + *
  3. Errors - Log message is an error. + *
  4. Warnings - Log message is a warning. + *
  5. Basic information - Log message is basic success/failure.
  6. + *
  7. Progress - Provides high-level information about stages in processing.
  8. + *
  9. Step - Provides low-level information about complex operations, + * typically about individual kexts.
  10. + *
  11. Detail - Provides very low-level information about parts of kexts, + * including individual Libkern classes and operations on bundle files.
  12. + *
  13. Debug - Very verbose logging about internal activities.
  14. + *
+ * + * Log messages at + * @link kOSKextLogErrorLevel kOSKextLogErrorLevel@/link or + * @link kOSKextLogWarningLevel kOSKextLogWarningLevel@/link + * ignore activity flags and the + * @link //apple_ref/c/macro/kOSBundleEnableKextLoggingKey + * OSBundleEnableKextLogging@/link property; + * that is, only the filter level is checked for messages at these levels. + * Log messages at levels above + * @link kOSKextLogWarningLevel kOSKextLogWarningLevel@/link + * are filtered according both to the activity flags in the current filter + * and to whether the log message is associated with a kext or not. + * Normally log messages associated with kexts are not printed + * unless the kext has a + * @link //apple_ref/c/macro/kOSBundleEnableKextLoggingKey + * OSBundleEnableKextLogging@/link property set to true. + * If you set the high-order bit of the log level (that is, add 8 to the level), + * then all kext-specific log messages matching the activity flags are printed. + * This can be very verbose. + * + * Activity Flags + * + * Each flag governs a category of activity, + * such as loading, IPC, or archives; by combining them with bitwise OR, + * you can choose which messages you wish to see (or when logging messages, + * which bit flags select your message). + * + * Byte 1: 0xF0 - Basic activities + * (@link kOSKextLogGeneralFlag kOSKextLogGeneralFlag@/link, + * @link kOSKextLogLoadFlag kOSKextLogLoadFlag@/link, and + * @link kOSKextLogArchiveFlag kOSKextLogArchiveFlag@/link). + * + * Byte 2: 0xF00 - Reserved. + * + * Byte 4: 0xF000 - Kext diagnostics + * (@link kOSKextLogValidationFlag kOSKextLogValidationFlag@/link, + * @link kOSKextLogAuthenticationFlag kOSKextLogAuthenticationFlag@/link, and + * @link kOSKextLogDependenciesFlag kOSKextLogDependenciesFlag@/link). + * + * Byte 5: 0xF00000 - Kext access & bookkeeping + * (@link kOSKextLogDirectoryScanFlag kOSKextLogDirectoryScanFlag@/link, + * @link kOSKextLogFileAccessFlag kOSKextLogFileAccessFlag@/link, + * @link kOSKextLogKextBookkeepingFlag kOSKextLogKextBookkeepingFlag@/link ). + * + * Byte 6: 0xF000000 - Linking & patching + * (@link kOSKextLogLinkFlag kOSKextLogLinkFlag@/link and + * @link kOSKextLogPatchFlag kOSKextLogPatchFlag@/link). + * + * Byte 7: 0xF0000000 - Reserved. + */ +typedef uint32_t OSKextLogSpec; + +#if PRAGMA_MARK +/********************************************************************/ +#pragma mark Masks +/********************************************************************/ +#endif +/*! + * @define kOSKextLogLevelMask + * @abstract Masks the bottom 3 bits of an + * @link OSKextLogSpec OSKextLogSpec@/link to extract + * the raw level. + */ +#define kOSKextLogLevelMask ((OSKextLogSpec) 0x00000007) + +/*! + * @define kOSKextLogKextOrGlobalMask + * @abstract Determines whether per-kext log messages are output. + * + * @discussion + * In filter specifications, if unset (the usual default), + * then log messages associated with a kext are only output + * if the kext has an + * @link //apple_ref/c/macro/kOSBundleEnableKextLoggingKey + * OSBundleEnableKextLogging@/link + * property set to true. + * If set, then all log messages associated with kexts + * are output. + * + * In message specifications, if set it indicates that the message is either + * not associated with a kext, or is associated with a kext that has an + * @link //apple_ref/c/macro/kOSBundleEnableKextLoggingKey + * OSBundleEnableKextLogging@/link + * property set to true. + */ +#define kOSKextLogKextOrGlobalMask ((OSKextLogSpec) 0x00000008) + + +/*! + * @define kOSKextLogFlagsMask + * @abstract Masks the flag bits of an + * @link OSKextLogSpec OSKextLogSpec@/link. + */ +#define kOSKextLogFlagsMask ((OSKextLogSpec) 0x0ffffff0) + +/*! + * @define kOSKextLogFlagsMask + * @abstract Masks the flag bits of an + * @link OSKextLogSpec OSKextLogSpec@/link + * to which command-line -v levels apply. + */ +#define kOSKextLogVerboseFlagsMask ((OSKextLogSpec) 0x00000ff0) + +/*! + * @define kOSKextLogConfigMask + * @abstract Masks the config bits of an + * @link OSKextLogSpec OSKextLogSpec@/link. + */ +#define kOSKextLogConfigMask ((OSKextLogSpec) 0xf0000000) + +#if PRAGMA_MARK +/********************************************************************/ +#pragma mark 0xF - Log Level +/********************************************************************/ +#endif + +/*! + * @define kOSKextLogExplicitLevel + * @abstract Used when logging a message to overrides the current log filter, + * even if it's set to silent for log messages. + * This is essentially a pass-through for + * unconditional print messages to go + * through the logging engine. + */ +#define kOSKextLogExplicitLevel ((OSKextLogSpec) 0x0) + +/*! + * @define kOSKextLogErrorLevel + * @abstract Log messages concerning error conditions in any category. + */ +#define kOSKextLogErrorLevel ((OSKextLogSpec) 0x1) + + +/*! + * @define kOSKextLogWarningLevel + * @abstract Log messages concerning warning conditions in any category, + * which indicate potential error conditions, + * and notices, which may explain unexpected but correct behavior. + */ +#define kOSKextLogWarningLevel ((OSKextLogSpec) 0x2) + + +/*! + * @define kOSKextLogBasicLevel + * @abstract Log messages concerning top-level outcome in any category + * (kext load/unload, kext cache creation/extration w/# kexts). + */ +#define kOSKextLogBasicLevel ((OSKextLogSpec) 0x3) + + +/*! + * @define kOSKextLogProgressLevel + * @abstract Log messages concerning high-level progress in any category, + * such as sending a load request to the kernel, + * allocation/link/map/start (load operation), + * stop/unmap (unload operation), kext added/extracted (archive). + */ +#define kOSKextLogProgressLevel ((OSKextLogSpec) 0x4) + + +/*! + * @define kOSKextLogStepLevel + * @abstract Log messages concerning major steps in any category, + * such as sending personalities to the IOCatalogue when loading, + * detailed IPC with the kernel, or filtering of kexts for an archive. + */ +#define kOSKextLogStepLevel ((OSKextLogSpec) 0x5) + + +/*! + * @define kOSKextLogDetailLevel + * @abstract Log messages concerning specific details in any category, + * such as classes being registered/unregistered or + * operations on indivdual files in a kext. + */ +#define kOSKextLogDetailLevel ((OSKextLogSpec) 0x6) + + +/*! + * @define kOSKextLogDebugLevel + * @abstract Log messages concerning very low-level actions that are + * useful mainly for debugging the kext system itself. + */ +#define kOSKextLogDebugLevel ((OSKextLogSpec) 0x7) + + +#if PRAGMA_MARK +/********************************************************************/ +#pragma mark 0xF0 - General Activity, Load, Kernel IPC, Personalities +/********************************************************************/ +#endif + +/*! + * @define kOSKextLogGeneralFlag + * @abstract Log messages about general activity in the kext system. + */ +#define kOSKextLogGeneralFlag ((OSKextLogSpec) 0x10) + +/*! + * @define kOSKextLogLoadFlag + * @abstract Log messages regarding kernel extension load, start/stop, or unload activity + * in the kernel. + */ +#define kOSKextLogLoadFlag ((OSKextLogSpec) 0x20) + +/*! + * @define kOSKextLogIPCFlag + * @abstract Log messages about any interaction between kernel and user space + * regarding kernel extensions. + */ +#define kOSKextLogIPCFlag ((OSKextLogSpec) 0x40) + +/*! + * @define kOSKextLogArchiveFlag + * @abstract Log messages about creating or processing a kext startup cache file + * (mkext or prelinked kernel). + */ +#define kOSKextLogArchiveFlag ((OSKextLogSpec) 0x80) + + +#if PRAGMA_MARK +/********************************************************************/ +#pragma mark 0xF00 - Reserved Verbose Area +/********************************************************************/ +#endif +// reserved slot for group ((OSKextLogSpec) 0x100) +// reserved slot for group ((OSKextLogSpec) 0x200) +// reserved slot for group ((OSKextLogSpec) 0x400) +// reserved slot for group ((OSKextLogSpec) 0x800) + +#if PRAGMA_MARK +/********************************************************************/ +#pragma mark 0xF000 - Kext diagnostic activity +/********************************************************************/ +#endif + +/*! + * @define kOSKextLogValidationFlag + * @abstract Log messages when validating kernel extensions. + */ +#define kOSKextLogValidationFlag ((OSKextLogSpec) 0x1000) + +/*! + * @define kOSKextLogAuthenticationFlag + * @abstract Log messages when autnenticating kernel extension files. + * Irrelevant in the kernel. + */ +#define kOSKextLogAuthenticationFlag ((OSKextLogSpec) 0x2000) + +/*! + * @define kOSKextLogDependenciesFlag + * @abstract Log messages when resolving dependencies for a kernel extension. + */ +#define kOSKextLogDependenciesFlag ((OSKextLogSpec) 0x4000) + +// reserved slot for group ((OSKextLogSpec) 0x8000) + +#if PRAGMA_MARK +/********************************************************************/ +#pragma mark 0xF0000 - Archives, caches, directory scan, file access +/********************************************************************/ +#endif + +/*! + * @define kOSKextLogDirectoryScanFlag + * @abstract Log messages when scanning directories for kernel extensions. + * In the kernel logs every booter kext entry processed. + */ +#define kOSKextLogDirectoryScanFlag ((OSKextLogSpec) 0x10000) + +/*! + * @define kOSKextLogFileAccessFlag + * @abstract Log messages when performing any filesystem access (very verbose). + * Irrelevant in the kernel. + */ +#define kOSKextLogFileAccessFlag ((OSKextLogSpec) 0x20000) + +/*! + * @define kOSKextLogKextBookkeepingFlag + * @abstract Log messages about internal tracking of kexts. Can be very verbose. + */ +#define kOSKextLogKextBookkeepingFlag ((OSKextLogSpec) 0x40000) + +// reserved slot for group ((OSKextLogSpec) 0x80000) + +#if PRAGMA_MARK +/********************************************************************/ +#pragma mark 0xF00000 - Linking & Patching +/********************************************************************/ +#endif + +/*! + * @define kOSKextLogLinkFlag + * @abstract Log messages about linking. + */ +#define kOSKextLogLinkFlag ((OSKextLogSpec) 0x100000) + +/*! + * @define kOSKextLogPatchFlag + * @abstract Log messages about patching. + */ +#define kOSKextLogPatchFlag ((OSKextLogSpec) 0x200000) + +// reserved slot for group ((OSKextLogSpec) 0x400000) +// reserved slot for group ((OSKextLogSpec) 0x800000) + +#if PRAGMA_MARK +/********************************************************************/ +#pragma mark 0xF000000 - Reserved +/********************************************************************/ +#endif + +// reserved slot for grouping ((OSKextLogSpec) 0x1000000) +// reserved slot for grouping ((OSKextLogSpec) 0x2000000) +// reserved slot for grouping ((OSKextLogSpec) 0x4000000) +// reserved slot for grouping ((OSKextLogSpec) 0x8000000) + + +#if PRAGMA_MARK +/********************************************************************/ +#pragma mark 0xF0000000 - Config Flags +/********************************************************************/ +#endif + +// reserved slot for grouping ((OSKextLogSpec) 0x10000000) +// reserved slot for grouping ((OSKextLogSpec) 0x20000000) +// reserved slot for grouping ((OSKextLogSpec) 0x40000000) + +#if PRAGMA_MARK +/********************************************************************/ +#pragma mark Predefined Specifications +/********************************************************************/ +#endif + +/*! + * @define kOSKextLogSilentFilter + * @abstract For use in filter specs: + * Ignore all log messages with a log level greater than + * @link kOSKextLogExplicitLevel kOSKextLogExplicitLevel@/link. + */ +#define kOSKextLogSilentFilter ((OSKextLogSpec) 0x0) + +/*! + * @define kOSKextLogShowAllFilter + * @abstract For use in filter specs: + * Print all log messages not associated with a kext or + * associated with a kext that has + * @link //apple_ref/c/macro/kOSBundleEnableKextLoggingKey + * OSBundleEnableKextLogging@/link + * set to true. + */ +#define kOSKextLogShowAllFilter ((OSKextLogSpec) 0x0ffffff7) + +/*! + * @define kOSKextLogShowAllKextsFilter + * @abstract For use in filter specs: + * Print all log messages has + * @link //apple_ref/c/macro/kOSBundleEnableKextLoggingKey + * OSBundleEnableKextLogging@/link + * set to true. + */ +#define kOSKextLogShowAllKextsFilter ((OSKextLogSpec) \ + (kOSKextLogShowAllFilter | \ + kOSKextLogKextOrGlobalMask)) + +#if PRAGMA_MARK +#pragma mark - +/********************************************************************/ +#pragma mark Kext Version String Processing +/********************************************************************/ +#endif +/*! + * @group Kext Version String Processing + * Functions for working with kext versions and compatible versions. + */ + +/*! + * @typedef OSKextVersion + * @abstract An encoded kext version that can be compared arithmetically. + * + * @discussion + * A value of zero (@link kOSKextVersionUndefined kOSKextVersionUndefined@/link) + * is not equivalent to a version string of "0.0", + * and typically means there is no version specified + * (for example, that there is no CFBundleVersion property at all). + * Values below zero are invalid. + * + * The encoding used is subject to change, + * and should never be saved to permanent storage. + * Always use proper version strings in files and interprocess communication. + */ +typedef int64_t OSKextVersion; + +/*! + * @define kOSKextVersionMaxLength + * @abstract The length of a string buffer + * guaranteed to be able to hold a kext version. + * + * @discussion + * Kext versions use an extended Mac OS 'vers' format with double the number + * of digits before the build stage: ####.##.##s{1-255} where 's' + * is a build stage 'd', 'a', 'b', 'f' or 'fc'. + */ +#define kOSKextVersionMaxLength (20) +// with a few bytes to spare including a nul byte +// xx-review: Should we make this much bigger in case we ever need longer strings? + +/*! + * @define kOSKextVersionUndefined + * @abstract The undefined version. + * + * @discussion + * This value of @link OSKextVersion OSKextVersion@/link represents the + * lack of a version + * (for example, that there is no CFBundleVersion property at all). + */ +#define kOSKextVersionUndefined (0) + +/*! + * @function OSKextParseVersionString + * + * @abstract + * Parses a kext version string into an @link OSKextVersion OSKextVersion@/link. + * + * @param versionString The kext version string to parse. + * + * @result + * An encoded kext version that can be compared numerically + * against other encoded kext versions, + * <0 if versionString is NULL, empty, + * or cannot be parsed. + * + * @discussion + * Kext versions use an extended Mac OS 'vers' format with double the number + * of digits before the build stage: ####.##.##s{1-255} where 's' + * is a build stage 'd', 'a', 'b', 'f' or 'fc'. + */ +OSKextVersion OSKextParseVersionString(const char * versionString); + + +/*! + * @function OSKextVersionGetString + * + * @abstract + * Formats an encoded @link OSKextVersion OSKextVersion@/link into a string + * representation. + * + * @param aVersion + * The encoded version to format. + * @param buffer + * A C string buffer of at least + * @link kOSKextVersionMaxLength kOSKextVersionMaxLength@/link bytes. + * @param bufferSize The size in bytes of buffer. + * + * @result + * TRUE if the encoded version is formatted successfully. + * FALSE if buffer is NULL or + * bufferSize is less than + * @link kOSKextVersionMaxLength kOSKextVersionMaxLength@/link. + * + * @discussion + * The return value strictly indicates whether buffer + * is large enough to hold the result. + * If aVersion is 0, the resulting string is "(missing)". + * If aVersion is less than 0 + * or is not a valid kext version encoding, + * the resulting string is "(invalid)". + */ +Boolean OSKextVersionGetString( + OSKextVersion aVersion, + char * buffer, + uint32_t bufferSize); + + +#ifdef KERNEL + + +#if PRAGMA_MARK +/********************************************************************/ +#pragma mark - +#pragma mark Weak linking +/********************************************************************/ +#endif +#ifdef XNU_KERNEL_PRIVATE +void kext_weak_symbol_referenced(void); +#endif /* XNU_KERNEL_PRIVATE */ + +#if !(__x86_64__) + +extern const void *gOSKextUnresolved; + +#define OSKextSymbolIsResolved(weak_sym) \ + (&(weak_sym) != gOSKextUnresolved) + +#endif /* !(__x86_64__) */ + +#if PRAGMA_MARK +#pragma mark - +/********************************************************************/ +#pragma mark Miscellaneous Kernel-Only Kext Functions +/********************************************************************/ +#endif + +/*! + * @function kext_get_vm_map + * @abstract Returns the vm_map from which the kext was allocated. + * + * @param info The kmod_info_t structure of the kext. + * @result The vm_map from which the kext was allocated. This function + * cannot return NULL. + */ +vm_map_t kext_get_vm_map(kmod_info_t * info); + +#ifdef XNU_KERNEL_PRIVATE + +/*! + * @function kext_dump_panic_lists + * @abstract Prints compacted lists of last unloaded & all loaded kexts + * during a panic. + * + * @param printf_func The printf-style function to use for output. + */ +void kext_dump_panic_lists(int (*printf_func)(const char *fmt, ...)); + +#endif /* XNU_KERNEL_PRIVATE */ + +#ifdef XNU_KERNEL_PRIVATE + +#if PRAGMA_MARK +#pragma mark - +/********************************************************************/ +#pragma mark Kext Loading C Functions +/********************************************************************/ +#endif +/*! + * @function OSKextGetLoadTagForBundleIdentifier + * @abstract Look up the load tag for a kext. + * + * @param kextIdentifier The bundle identifier of the kext to look up. + * @result + * The load tag of the requested kext, or + * @link //apple_ref/c/macro/kOSKextInvalidLoadTag kOSKextInvalidLoadTag@/link + * if the kext was not found. + * + * @discussion + * A load tag uniquely identifies a loaded kext. + * It can be found as the id field of a loaded kext's + * kmod_info_t struct. + * + * Note that a load tag represents a specific loaded instance of a kext. + * If that kext is unloaded, the load tag is no longer a valid reference. + * If the same kext is later reloaded, it will have a new load tag. + * + * You can use the load tag to adjust a kext's reference count + * via + * @link //apple_ref/c/func/OSKextRetainKextWithLoadTag + * OSKextRetainKextWithLoadTag@/link + * and + * @link //apple_ref/c/func/OSKextReleaseKextWithLoadTag + * OSKextReleaseKextWithLoadTag@/link, + * so that the kext is automatically unloaded when no references remain, + * or to unload the kext immediately + * with @link //apple_ref/c/func/OSKextUnloadKextWithLoadTag OSKextUnloadKextWithLoadTag@/link. + * + * Those functions are intended for use with non-IOKit kexts + * (specifically, kexts that define no subclasses of + * @link //apple_ref/doc/class/IOServiceIOService@/link). + * Pure IOKit kexts are managed via instance counts + * of their libkern C++ object classes; + * using those functions on them will only interfere with that mechanism. + * If you have a hybrid kext with both IOService subclasses and non-IOKit code, + * however, you may want to use reference counting for the non-IOKit portions: + * that way the kext will only unload automaticaly + * when there are no C++ objects and the kext reference count is zero. + */ +uint32_t OSKextGetLoadTagForBundleIdentifier( + const char * kextIdentifier); + + +/*! + * @function OSKextUnloadKextWithLoadTag + * @abstract Stop and unload a kext based on its load tag. + * + * @param loadTag The load tag of the kext to unload. + * @result + * @link //apple_ref/c/macro/kOSReturnSuccess kOSReturnSuccess@/link + * if the kext was found and unloaded. + * @link //apple_ref/c/macro/kOSKextReturnNotFound + * kOSKextReturnNotFound@/link + * if the kext was not found. + * @link //apple_ref/c/macro/kOSKextReturnInUse + * kOSKextReturnInUse@/link + * if the kext has outstanding references + * or if there are instances of its libkern C++ subclasses. + * Other return values indicate a failure to unload the kext, + * typically because the module stop routine failed. + * + * @discussion + * A panic will occur if a kext calls this function to unload itself. + * The safest way for a kext to unload itself is to call + * @link //apple_ref/c/func/OSKextRetainKextWithLoadTag + * OSKextRetainKextWithLoadTag@/link + * with its own load tag + * (the id field of its kmod_info_t struct), + * followed by + * @link //apple_ref/c/func/OSKextReleaseKextWithLoadTag + * OSKextReleaseKextWithLoadTag@/link; + * this will schedule the kext for unload on a separate thread. + * + * This function can be used when reference-based autounloading is not + * appropriate. + * If a kernel system or kext is already monitoring + * the need for a kext, + * it can simply call this function when it's known that the kext is not needed. + */ +OSReturn OSKextUnloadKextWithLoadTag(uint32_t loadTag); + +#endif /* XNU_KERNEL_PRIVATE */ + +#endif /* KERNEL */ + +__END_DECLS + +#endif /* ! _LIBKERN_OSKEXTLIBPRIVATE_H */ diff --git a/libkern/libkern/OSMalloc.h b/libkern/libkern/OSMalloc.h index c54b90070..a638b97ae 100644 --- a/libkern/libkern/OSMalloc.h +++ b/libkern/libkern/OSMalloc.h @@ -38,41 +38,227 @@ __BEGIN_DECLS #include #endif -#ifdef MACH_KERNEL_PRIVATE +/*! + * @header + * + * @abstract + * This header declares the OSMalloc memory-allocation KPI. + * + * @discussion + * Kernel extensions can use these functions to allocate and deallocate + * memory blocks that are tracked under named tags. + * A kernel extension can create whatever tags it needs, + * but typically just creates one with its bundle identifier. + * + * Tags are required; attempting to use these functions without one + * will result in a panic. + * + * Use Restrictions + * + * None of the OSMalloc functions are safe to call + * in a primary interrupt handler. + */ + +#ifdef MACH_KERNEL_PRIVATE + +#define OSMT_MAX_NAME (64) -#define OSMT_MAX_NAME 64 +typedef struct _OSMallocTag_ { + queue_chain_t OSMT_link; + uint32_t OSMT_refcnt; + uint32_t OSMT_state; + uint32_t OSMT_attr; + char OSMT_name[OSMT_MAX_NAME]; +} * OSMallocTag; -typedef struct _OSMallocTag_ { - queue_chain_t OSMT_link; - uint32_t OSMT_refcnt; - uint32_t OSMT_state; - uint32_t OSMT_attr; - char OSMT_name[OSMT_MAX_NAME]; -} *OSMallocTag; +#define OSMT_VALID_MASK 0xFFFF0000 +#define OSMT_VALID 0xDEAB0000 +#define OSMT_RELEASED 0x00000001 -#define OSMT_VALID_MASK 0xFFFF0000 -#define OSMT_VALID 0xDEAB0000 -#define OSMT_RELEASED 0x00000001 +/*! @parseOnly */ +#define OSMT_ATTR_PAGEABLE 0x01 -#define OSMT_ATTR_PAGEABLE 0x01 #else -typedef struct __OSMallocTag__ *OSMallocTag, *OSMallocTag_t; +/*! + * @typedef OSMallocTag + * + * @abstract + * An opaque type used to track memory allocations. + */ +typedef struct __OSMallocTag__ * OSMallocTag; + + +/*! + * @typedef OSMallocTag_t + * + * @abstract + * See @link OSMallocTag OSMallocTag@/link. + */ +typedef struct __OSMallocTag__ * OSMallocTag_t; #endif -#define OSMT_DEFAULT 0x00 -#define OSMT_PAGEABLE 0x01 +/*! + * @define OSMT_DEFAULT + * + * @abstract + * Indicates that an @link OSMallocTag OSMallocTag@/link + * be created with default attributes. + * + * @discussion + * An @link OSMallocTag OSMallocTag@/link created + * with this attribute allocates all blocks in wired memory. + */ +#define OSMT_DEFAULT 0x00 -extern OSMallocTag OSMalloc_Tagalloc(const char * str, uint32_t flags); -extern void OSMalloc_Tagfree(OSMallocTag tag); +/*! + * @define OSMT_PAGEABLE + * + * @abstract + * Indicates that an @link OSMallocTag OSMallocTag@/link + * should allocate pageable memory when possible. + * + * @discussion + * An @link OSMallocTag OSMallocTag@/link created + * with this attribute allocates blocks of a full page size or larger + * in pageable memory, + * and blocks smaller than a full page size in wired memory. + */ +#define OSMT_PAGEABLE 0x01 -extern void * OSMalloc(uint32_t size, OSMallocTag tag); -extern void * OSMalloc_nowait(uint32_t size, OSMallocTag tag); +/*! + * @function OSMalloc_Tagalloc + * + * @abstract + * Creates a tag for use with OSMalloc functions. + * + * @param name The name of the tag to create. + * @param flags A bitmask that controls allocation behavior; see description. + * + * @result + * An opaque tag to be used with OSMalloc functions for tracking memory usage. + * + * @discussion + * OSMalloc tags can have arbitrary names of a length up to 63 characters. + * Calling this function twice with the same name + * creates two tags, which share that name. + * + * flags can be the bitwise OR of the following flags: + *
    + *
  • @link OSMT_DEFAULT OSMT_DEFAULT@/link - + * allocations are wired. This is the 'zero' bitmask value and + * is overridden by any other flag specified.
  • + *
  • @link OSMT_PAGEABLE OSMT_PAGEABLE@/link - + * allocations of a full page size or greater are pageable; + * allocations smaller than a page are wired.
  • + *
+ */ +extern OSMallocTag OSMalloc_Tagalloc( + const char * name, + uint32_t flags); -extern void * OSMalloc_noblock(uint32_t size, OSMallocTag tag); -extern void OSFree(void * addr, uint32_t size, OSMallocTag tag); +/*! + * @function OSMalloc_Tagfree + * + * @abstract + * Frees a tag used with OSMalloc functions. + * + * @param tag The @link OSMallocTag OSMallocTag@/link to free. + * + * @discussion + * OSMalloc tags must not be freed + * while any memory blocks allocated + * with them still exist. + * Any OSMalloc function called on those blocks + * will result in a panic. + */ +extern void OSMalloc_Tagfree(OSMallocTag tag); + + +/*! + * @function OSMalloc + * + * @abstract + * Allocates a block of memory associated + * with a given @link OSMallocTag OSMallocTag@/link. + * + * @param size The size of the memory block to allocate. + * @param tag The @link OSMallocTag OSMallocTag@/link + * under which to allocate the memory. + * + * @result + * A pointer to the memory on success, NULL on failure. + * + * @discussion + * If tag was created with the + * @link OSMT_PAGEABLE OSMT_PAGEABLE@/link + * attribute and size + * is a full page or larger, the allocated memory is pageable; + * otherwise it is wired. + */ +extern void * OSMalloc( + uint32_t size, + OSMallocTag tag); + + +/*! + * @function OSMalloc_nowait + * + * @abstract + * Equivalent to @link OSMalloc_noblock OSMalloc_noblock@/link. + */ +extern void * OSMalloc_nowait( + uint32_t size, + OSMallocTag tag); + + +/*! + * @function OSMalloc_noblock + * + * @abstract + * Allocates a block of memory associated + * with a given @link OSMallocTag OSMallocTag@/link, + * returning NULL if it would block. + * + * @param size The size of the memory block to allocate. + * @param tag The @link OSMallocTag OSMallocTag@/link + * under which to allocate the memory. + * + * @result + * A pointer to the memory on success, NULL on failure + * or if allocation would block. + * + * @discussion + * If tag was created with the + * @link OSMT_PAGEABLE OSMT_PAGEABLE@/link + * attribute and size + * is a full page or larger, the allocated memory is pageable; + * otherwise it is wired. + * + * This function is guaranteed not to block. + */ +extern void * OSMalloc_noblock( + uint32_t size, + OSMallocTag tag); + + +/*! + * @function OSFree + * + * @abstract + * Frees a block of memory allocated by @link OSMalloc OSMalloc@/link. + * + * @param addr A pointer to the memory block to free. + * @param size The size of the memory block to free. + * @param tag The @link OSMallocTag OSMallocTag@/link + * with which addr was originally allocated. + */ +extern void OSFree( + void * addr, + uint32_t size, + OSMallocTag tag); __END_DECLS diff --git a/libkern/libkern/OSReturn.h b/libkern/libkern/OSReturn.h index ace65fcc0..723fd6256 100644 --- a/libkern/libkern/OSReturn.h +++ b/libkern/libkern/OSReturn.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -26,14 +26,14 @@ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ */ /* - * Copyright (c) 1998 Apple Computer, Inc. All rights reserved. + * Copyright (c) 1998 Apple Inc. All rights reserved. * * HISTORY * */ /* - * Core IOReturn values. Others may be family defined. + * Core OSReturn values. */ #ifndef __LIBKERN_OSRETURN_H @@ -45,36 +45,159 @@ __BEGIN_DECLS #include -typedef kern_return_t OSReturn; + +/*! + * @header + * + * Declares functions, basic return values, and other constants + * related to kernel extensions (kexts). + */ + +#if PRAGMA_MARK +#pragma mark Core OSReturn Values for Libkern +#endif +/********************************************************************* +* Core OSReturn Values for Libkern +*********************************************************************/ +/*! + * @group Core OSReturn Values for Libkern + * Some kext and I/O Kit functions can return these values, + * as well as other values of + * kern_return_t. + * + * Many of these return values represent internal errors + * in the Libkern C++ run-time typing information system + * based on @link //apple_ref/doc/class/OSMetaClass OSMetaClass@/link; + * you are unlikely to ever see them. + * + */ + +#ifdef XNU_KERNEL_PRIVATE +/********************************************************************* +* Check libsyscall/mach/err_libkern.sub when editing or adding +* result codes! +*********************************************************************/ +#endif /* XNU_KERNEL_PRIVATE */ + +/*! + * @typedef OSReturn + * @abstract The return type for many Libkern functions. + */ +typedef kern_return_t OSReturn; #ifndef sys_libkern -#define sys_libkern err_system(0x37) +#define sys_libkern err_system(0x37) #endif /* sys_libkern */ -#define sub_libkern_common err_sub(0) -#define sub_libkern_metaclass err_sub(1) -#define sub_libkern_reserved err_sub(-1) - -#define libkern_common_err(return) \ - (sys_libkern|sub_libkern_common|(return)) -#define libkern_metaclass_err(return) \ - (sys_libkern|sub_libkern_metaclass|(return)) - -#define kOSReturnSuccess KERN_SUCCESS // OK -#define kOSReturnError libkern_common_err(1) // general error - - -// OSMetaClass subsystem error's -#define kOSMetaClassInternal libkern_metaclass_err(1) // runtime internal error -#define kOSMetaClassHasInstances libkern_metaclass_err(2) // Can't unload outstanding instances -#define kOSMetaClassNoInit libkern_metaclass_err(3) // kmodInitializeLoad wasn't called, runtime internal error -#define kOSMetaClassNoTempData libkern_metaclass_err(4) // Allocation failure internal data -#define kOSMetaClassNoDicts libkern_metaclass_err(5) // Allocation failure for Metaclass internal dictionaries -#define kOSMetaClassNoKModSet libkern_metaclass_err(6) // Allocation failure for internal kmodule set -#define kOSMetaClassNoInsKModSet libkern_metaclass_err(7) // Can't insert the KMod set into the module dictionary -#define kOSMetaClassNoSuper libkern_metaclass_err(8) // Can't associate a class with its super class -#define kOSMetaClassInstNoSuper libkern_metaclass_err(9) // During instance construction can't find a super class -#define kOSMetaClassDuplicateClass libkern_metaclass_err(10) // Found class duplicate during module load +#define sub_libkern_common err_sub(0) +#define sub_libkern_metaclass err_sub(1) +#define sub_libkern_reserved err_sub(-1) + +#define libkern_common_err(return) (sys_libkern|sub_libkern_common|(return)) +#define libkern_metaclass_err(return) (sys_libkern|sub_libkern_metaclass|(return)) + +/* See OSKextLib.h for these + * #define sub_libkern_kext err_sub(2) + * #define libkern_kext_err(code) (sys_libkern|sub_libkern_kext|(code)) + */ + +/*! + * @define kOSReturnSuccess + * @abstract Operation successful. + * Equal to @link //apple_ref/c/econst/KERN_SUCCESS + * KERN_SUCCESS@/link. + */ +#define kOSReturnSuccess KERN_SUCCESS + +/*! + * @define kOSReturnError + * @abstract Unspecified Libkern error. + * Not equal to + * @link //apple_ref/c/econst/KERN_FAILURE + * KERN_FAILURE@/link. + */ +#define kOSReturnError libkern_common_err(1) + +/*! + * @define kOSMetaClassInternal + * @abstract Internal OSMetaClass run-time error. + */ +#define kOSMetaClassInternal libkern_metaclass_err(1) + +/*! + * @define kOSMetaClassHasInstances + * @abstract A kext cannot be unloaded because there are instances + * derived from Libkern C++ classes that it defines. + */ +#define kOSMetaClassHasInstances libkern_metaclass_err(2) + +/*! + * @define kOSMetaClassNoInit + * @abstract Internal error: The Libkern C++ class registration system + * was not properly initialized during kext loading. + */ +#define kOSMetaClassNoInit libkern_metaclass_err(3) +// OSMetaClass::preModLoad wasn't called, runtime internal error + +/*! + * @define kOSMetaClassNoTempData + * @abstract Internal error: An allocation failure occurred + * registering Libkern C++ classes during kext loading. + */ +#define kOSMetaClassNoTempData libkern_metaclass_err(4) +// Allocation failure internal data + +/*! + * @define kOSMetaClassNoDicts + * @abstract Internal error: An allocation failure occurred + * registering Libkern C++ classes during kext loading. + */ +#define kOSMetaClassNoDicts libkern_metaclass_err(5) +// Allocation failure for Metaclass internal dictionaries + +/*! + * @define kOSMetaClassNoKModSet + * @abstract Internal error: An allocation failure occurred + * registering Libkern C++ classes during kext loading. + */ +#define kOSMetaClassNoKModSet libkern_metaclass_err(6) +// Allocation failure for internal kmodule set + +/*! + * @define kOSMetaClassNoInsKModSet + * @abstract Internal error: An error occurred registering + * a specific Libkern C++ class during kext loading. + */ +#define kOSMetaClassNoInsKModSet libkern_metaclass_err(7) +// Can't insert the KMod set into the module dictionary + +/*! + * @define kOSMetaClassNoSuper + * @abstract Internal error: No superclass can be found + * for a specific Libkern C++ class during kext loading. + */ +#define kOSMetaClassNoSuper libkern_metaclass_err(8) + +/*! + * @define kOSMetaClassInstNoSuper + * @abstract Internal error: No superclass can be found when constructing + * an instance of a Libkern C++ class. + */ +#define kOSMetaClassInstNoSuper libkern_metaclass_err(9) + +/*! + * @define kOSMetaClassDuplicateClass + * @abstract A duplicate Libkern C++ classname was encountered + * during kext loading. + */ +#define kOSMetaClassDuplicateClass libkern_metaclass_err(10) + +/*! + * @define kOSMetaClassNoKext + * @abstract Internal error: The kext for a Libkern C++ class + * can't be found during kext loading. + */ +#define kOSMetaClassNoKext libkern_metaclass_err(11) __END_DECLS diff --git a/libkern/libkern/OSTypes.h b/libkern/libkern/OSTypes.h index 3366f1447..6a03a2740 100644 --- a/libkern/libkern/OSTypes.h +++ b/libkern/libkern/OSTypes.h @@ -35,6 +35,8 @@ #ifndef _OS_OSTYPES_H #define _OS_OSTYPES_H +#define OSTYPES_K64_REV 2 + typedef unsigned int UInt; typedef signed int SInt; @@ -86,10 +88,38 @@ typedef struct wide { #endif typedef SInt32 OSStatus; + +#if defined(__LP64__) && defined(KERNEL) +#ifndef ABSOLUTETIME_SCALAR_TYPE +#define ABSOLUTETIME_SCALAR_TYPE 1 +#endif +typedef UInt64 AbsoluteTime; +#else typedef UnsignedWide AbsoluteTime; +#endif + typedef UInt32 OptionBits; +#if defined(KERNEL) && defined(__LP64__) +/* + * Use intrinsic boolean types for the LP64 kernel, otherwise maintain + * source and binary backward compatibility. This attempts to resolve + * the "(x == true)" vs. "(x)" conditional issue. + */ +#ifdef __cplusplus +typedef bool Boolean; +#else /* !__cplusplus */ +#if defined(__STDC_VERSION__) && ((__STDC_VERSION__ - 199901L) > 0L) +/* only use this if we are sure we are using a c99 compiler */ +typedef _Bool Boolean; +#else /* !c99 */ +/* Fall back to previous definition unless c99 */ +typedef unsigned char Boolean; +#endif /* !c99 */ +#endif /* !__cplusplus */ +#else /* !(KERNEL && __LP64__) */ typedef unsigned char Boolean; +#endif /* !(KERNEL && __LP64__) */ #endif /* __TYPES__ */ #endif /* __MACTYPES__ */ diff --git a/libkern/libkern/c++/Makefile b/libkern/libkern/c++/Makefile index ccb46b4a8..4d2eb7d29 100644 --- a/libkern/libkern/c++/Makefile +++ b/libkern/libkern/c++/Makefile @@ -12,6 +12,8 @@ INSTINC_SUBDIRS_PPC = INSTINC_SUBDIRS_I386 = +INSTINC_SUBDIRS_X86_64 = + INSTINC_SUBDIRS_ARM = EXPINC_SUBDIRS = ${INSTINC_SUBDIRS} @@ -20,6 +22,8 @@ EXPINC_SUBDIRS_PPC = ${INSTINC_SUBDIRS_PPC} EXPINC_SUBDIRS_I386 = ${INSTINC_SUBDIRS_I386} +EXPINC_SUBDIRS_X86_64 = ${INSTINC_SUBDIRS_X86_64} + EXPINC_SUBDIRS_ARM = ${INSTINC_SUBDIRS_ARM} DATAFILES = \ @@ -33,6 +37,7 @@ DATAFILES = \ OSDictionary.h \ OSEndianTypes.h \ OSIterator.h \ + OSKext.h \ OSLib.h \ OSMetaClass.h \ OSNumber.h \ diff --git a/libkern/libkern/c++/OSArray.h b/libkern/libkern/c++/OSArray.h index 7cc92dfaf..dce9f84e9 100644 --- a/libkern/libkern/c++/OSArray.h +++ b/libkern/libkern/c++/OSArray.h @@ -36,14 +36,55 @@ class OSSerialize; /*! - @class OSArray - @abstract A collection class whose instances maintain a list of object references. - @discussion - An instance of an OSArray is a mutable collection which maintains a list of references to OSMetaClassBase derived objects. Objects are referenced by index, where the index is an integer with a value of 0 to N-1 where N is the number of objects contained within the array. - - Objects placed into an array are automatically retained and objects removed or replaced are automatically released. All objects are released when the array is freed. -*/ - + * @header + * + * @abstract + * This header declares the OSArray collection class. + */ + + +/*! + * @class OSArray + * + * @abstract + * OSArray provides an indexed store of objects. + * + * @discussion + * OSArray is a container for Libkern C++ objects + * (those derived from + * @link //apple_ref/doc/class/OSMetaClassBase OSMetaClassBase@/link, + * in particular + * @link //apple_ref/doc/class/OSObject OSObject@/link). + * Storage and access are by array index. + * + * You must generally cast retrieved objects from + * @link //apple_ref/cpp/cl/OSObject OSObject@/link + * to the desired class using + * @link //apple_ref/cpp/macro/OSDynamicCast OSDynamicCast@/link. + * This macro returns the object cast to the desired class, + * or NULL if the object isn't derived from that class. + * + * As with all Libkern collection classes, + * OSArray retains objects added to it, + * and releases objects removed from it (or replaced). + * An OSArray also grows as necessary to accommodate new objects, + * unlike Core Foundation collections (it does not, however, shrink). + * + * Use Restrictions + * + * With very few exceptions in the I/O Kit, all Libkern-based C++ + * classes, functions, and macros are unsafe + * to use in a primary interrupt context. + * Consult the I/O Kit documentation related to primary interrupts + * for more information. + * + * OSArray provides no concurrency protection; + * it's up to the usage context to provide any protection necessary. + * Some portions of the I/O Kit, such as + * @link //apple_ref/doc/class/IORegistryEntry IORegistryEntry@/link, + * handle synchronization via defined member functions for setting + * properties. + */ class OSArray : public OSCollection { friend class OSSet; @@ -51,227 +92,625 @@ class OSArray : public OSCollection OSDeclareDefaultStructors(OSArray) protected: - const OSMetaClassBase **array; - unsigned int count; - unsigned int capacity; - unsigned int capacityIncrement; + const OSMetaClassBase ** array; + unsigned int count; + unsigned int capacity; + unsigned int capacityIncrement; struct ExpansionData { }; - /*! @var reserved - Reserved for future use. (Internal use only) */ - ExpansionData *reserved; + /* Reserved for future use. (Internal use only) */ + ExpansionData * reserved; - /* - * OSCollectionIterator interfaces. - */ + /* OSCollectionIterator interfaces. */ virtual unsigned int iteratorSize() const; - virtual bool initIterator(void *iterator) const; - virtual bool getNextObjectForIterator(void *iterator, OSObject **ret) const; + virtual bool initIterator(void * iterator) const; + virtual bool getNextObjectForIterator(void * iterator, OSObject ** ret) const; public: - /*! - @function withCapacity - @abstract A static constructor function to create and initialize a new instance of OSArray with a given capacity. - @param capacity The initial capacity (number of refernces) of the OSArray instance. - @result Returns a reference to an instance of OSArray or 0 if an error occurred. + + /*! + * @function withCapacity + * + * @abstract + * Creates and initializes an empty OSArray. + * + * @param capacity The initial storage capacity of the array object. + * + * @result + * An empty instance of OSArray with a retain count of 1; + * NULL on failure. + * + * @discussion + * capacity must be nonzero. + * The new array will grow as needed to accommodate more objects + * (unlike @link //apple_ref/doc/uid/20001502 CFMutableArray@/link, + * for which the initial capacity is a hard limit). */ - static OSArray *withCapacity(unsigned int capacity); - /*! - @function withObjects - @abstract A static constructor function to create and initialize a new instance of OSArray and populates it with a list of objects provided. - @param objects A static array of references to OSMetaClassBase derived objects. - @param count The number of objects provided. - @param capacity The initial storage size of the OSArray instance. If 0, the capacity will be set to the size of count, else the capacity must be greater than or equal to count. - @result Returns a reference to a new instance of OSArray or 0 if an error occurred. + static OSArray * withCapacity(unsigned int capacity); + + + /*! + * @function withObjects + * + * @abstract + * Creates and initializes an OSArray populated with objects provided. + * + * @param objects A C array of OSObject-derived instances. + * @param count The number of objects to be placed into the array. + * @param capacity The initial storage capacity of the array object. + * If 0, count is used; otherwise this value + * must be greater than or equal to count. + * + * @result + * An instance of OSArray containing the objects provided, + * with a retain count of 1; + * NULL on failure. + * + * @discussion + * objects must be non-NULL, and count must be nonzero. + * If capacity is nonzero, + * it must be greater than or equal to count. + * The new array will grow as needed to accommodate more objects + * (unlike @link //apple_ref/doc/uid/20001502 CFMutableArray@/link, + * for which the initial capacity is a hard limit). */ - static OSArray *withObjects(const OSObject *objects[], - unsigned int count, - unsigned int capacity = 0); - /*! - @function withArray - @abstract A static constructor function to create and initialize an instance of OSArray of a given capacity and populate it with the contents of the supplied OSArray object. - @param array An instance of OSArray from which the new instance will aquire its contents. - @param capacity The capacity of the new OSArray. If 0, the capacity will be set to the number of elements in the array, else the capacity must be greater than or equal to the number of elements in the array. - @result Returns a reference to an new instance of OSArray or 0 if an error occurred. + static OSArray * withObjects( + const OSObject * objects[], + unsigned int count, + unsigned int capacity = 0); + + + /*! + * @function withArray + * + * @abstract + * Creates and initializes an OSArray populated with the contents of another array. + * + * @param array An OSArray whose contents will be stored + * in the new instance. + * @param capacity The initial storage capacity of the array object. + * If 0, the capacity is set to the number of objects + * in array; + * otherwise capacity must be + * greater than or equal to the number of objects + * in array. + * + * @result + * An instance of OSArray containing the objects of array, + * with a retain count of 1; + * NULL on failure. + * + * @discussion + * array must be non-NULL. + * If capacity is nonzero, + * it must be greater than or equal to count. + * The new array will grow as needed to accommodate more objects + * (unlike @link //apple_ref/doc/uid/20001502 CFMutableArray@/link, + * for which the initial capacity is a hard limit). + * + * The objects in array are retained + * for storage in the new OSArray, + * not copied. */ - static OSArray *withArray(const OSArray *array, - unsigned int capacity = 0); - - /*! - @function initWithCapacity - @abstract A member function which initializes an instance of OSArray. - @param capacity The initial capacity of the new instance of OSArray. - @result Returns a true if initialization succeeded or false if not. + static OSArray * withArray( + const OSArray * array, + unsigned int capacity = 0); + + + /*! + * @function initWithCapacity + * + * @abstract + * Initializes a new instance of OSArray. + * + * @param capacity The initial storage capacity of the array object. + * + * @result + * true on success, false on failure. + * + * @discussion + * Not for general use. Use the static instance creation method + * @link //apple_ref/cpp/clm/OSArray/withCapacity/staticOSArray*\/(unsignedint) + * withCapacity@/link + * instead. + * + * capacity must be nonzero. + * The new array will grow as needed to accommodate more objects + * (unlike @link //apple_ref/doc/uid/20001502 CFMutableArray@/link, + * for which the initial capacity is a hard limit). */ virtual bool initWithCapacity(unsigned int capacity); - /*! - @function initWithObjects - @abstract A member function which initializes an instance of OSArray and populates it with the given list of objects. - @param objects A static array containing references to OSMetaClassBase derived objects. - @param count The number of objects to added to the array. - @param capacity The initial capacity of the new instance of OSArray. If 0, the capacity will be set to the same value as the 'count' parameter, else capacity must be greater than or equal to the value of 'count'. - @result Returns a true if initialization succeeded or false if not. + + + /*! + * @function initWithObjects + * + * @abstract + * Initializes a new OSArray populated with objects provided. + * + * @param objects A C array of OSObject-derived objects. + * @param count The number of objects to be placed into the array. + * @param capacity The initial storage capacity of the array object. + * If 0, count is used; otherwise this value + * must be greater than or equal to count. + * + * @result + * true on success, false on failure. + * + * @discussion + * Not for general use. Use the static instance creation method + * @link + * //apple_ref/cpp/clm/OSArray/withObjects/staticOSArray*\/(constOSObject*,unsignedint,unsignedint) + * withObjects@/link + * instead. + * + * objects must be non-NULL, + * and count must be nonzero. + * If capacity is nonzero, + * it must be greater than or equal to count. + * The new array will grow as needed to accommodate more objects + * (unlike @link //apple_ref/doc/uid/20001502 CFMutableArray@/link, + * for which the initial capacity is a hard limit). */ - virtual bool initWithObjects(const OSObject *objects[], - unsigned int count, - unsigned int capacity = 0); - /*! - @function initWithArray - @abstract A member function which initializes an instance of OSArray and populates it with the contents of the supplied OSArray object. - @param anArray An instance of OSArray containing the references to objects which will be copied to the new instances of OSArray. - @param capacity The initial capacity of the new instance of OSArray. If 0, the capacity will be set to the number of elements in the array, else the capacity must be greater than or equal to the number of elements in the array. - @result Returns a true if initialization succeeded or false if not. + virtual bool initWithObjects( + const OSObject * objects[], + unsigned int count, + unsigned int capacity = 0); + + /*! + * @function initWithArray + * + * @abstract + * Initializes a new OSArray populated with the contents of another array. + * + * @param anArray The array whose contents will be placed + * in the new instance. + * @param capacity The initial storage capacity of the array object. + * If 0, the capacity is set to the number of objects + * in array; + * otherwise capacity must be + * greater than or equal to the number of objects + * in array. + * + * @result + * true on success, false on failure. + * + * @discussion + * Not for general use. Use the static instance creation method + * @link //apple_ref/cpp/clm/OSArray/withArray/staticOSArray*\/(constOSArray*,unsignedint) + * withArray@/link instead. + * + * array must be non-NULL. + * If capacity is nonzero, + * it must be greater than or equal to count. + * The new array will grow as needed to accommodate more objects + * (unlike @link //apple_ref/doc/uid/20001502 CFMutableArray@/link, + * for which the initial capacity is a hard limit). + * + * The objects in array are retained for storage in the new OSArray, + * not copied. */ - virtual bool initWithArray(const OSArray *anArray, - unsigned int theCapacity = 0); - /*! - @function free - @abstract Deallocates and releases all resources used by the OSArray instance. Normally, this is not called directly. - @discussion This function should not be called directly, use release() instead. + virtual bool initWithArray( + const OSArray * anArray, + unsigned int capacity = 0); + + + /*! + * @function free + * + * @abstract + * Deallocates or releases any resources + * used by the OSArray instance. + * + * @discussion + * This function should not be called directly; + * use + * @link + * //apple_ref/cpp/instm/OSObject/release/virtualvoid/() + * release@/link + * instead. */ virtual void free(); - /*! - @function getCount - @abstract A member function which returns the number of references contained within the OSArray object. - @result Returns the number of items within the OSArray object. + + /*! + * @function getCount + * + * @abstract + * Returns the current number of objects within the array. + * + * @result + * The current number of objects within the array. */ virtual unsigned int getCount() const; - /*! - @function getCapacity - @abstract A member function which returns the storage capacity of the OSArray object. - @result Returns the storage capacity of the OSArray object. + + + /*! + * @function getCapacity + * + * @abstract + * Returns the number of objects the array can store + * without reallocating. + * + * @result + * The number objects the array can store + * without reallocating. + * + * @discussion + * OSArray objects grow when full to accommodate additional objects. + * See + * @link + * //apple_ref/cpp/instm/OSArray/getCapacity/virtualunsignedint/() + * getCapacityIncrement@/link + * and + * @link + * //apple_ref/cpp/instm/OSArray/ensureCapacity/virtualunsignedint/(unsignedint) + * ensureCapacity.@/link */ virtual unsigned int getCapacity() const; - /*! - @function getCapacityIncrement - @abstract A member function which returns the size by which the array will grow. - @result Returns the size by which the array will grow. + + + /*! + * @function getCapacityIncrement + * + * @abstract + * Returns the storage increment of the array. + * + * @result + * The storage increment of the array. + * + * @discussion + * An OSArray allocates storage for objects in multiples + * of the capacity increment. */ virtual unsigned int getCapacityIncrement() const; - /*! - @function setCapacityIncrement - @abstract A member function which sets the growth size of the array. - @result Returns the new growth size. + + + /*! + * @function setCapacityIncrement + * + * @abstract + * Sets the storage increment of the array. + * + * @result + * The new storage increment of the array, + * which may be different from the number requested. + * + * @discussion + * An OSArray allocates storage for objects in multiples + * of the capacity increment. + * Calling this function does not immediately reallocate storage. */ virtual unsigned int setCapacityIncrement(unsigned increment); - /*! - @function ensureCapacity - @abstract A member function which will expand the size of the collection to a given storage capacity. - @param newCapacity The new capacity for the array. - @result Returns the new capacity of the array or the previous capacity upon error. + + /*! + * @function ensureCapacity + * + * @abstract + * Ensures the array has enough space + * to store the requested number of objects. + * + * @param newCapacity The total number of objects the array + * should be able to store. + * + * @result + * The new capacity of the array, + * which may be different from the number requested + * (if smaller, reallocation of storage failed). + * + * @discussion + * This function immediately resizes the array, if necessary, + * to accommodate at least newCapacity objects. + * If newCapacity is not greater than the current capacity, + * or if an allocation error occurs, the original capacity is returned. + * + * There is no way to reduce the capacity of an OSArray. */ virtual unsigned int ensureCapacity(unsigned int newCapacity); - /*! - @function flushCollection - @abstract A member function which removes and releases all items within the array. + + /*! + * @function flushCollection + * + * @abstract + * Removes and releases all objects within the array. + * + * @discussion + * The array's capacity (and therefore direct memory consumption) + * is not reduced by this function. */ virtual void flushCollection(); - /*! - @function setObject - @abstract A member function which appends an object onto the end of the array. - @param anObject The object to add to the OSArray instance. The object will be retained automatically. - @result Returns true if the addition of 'anObject' was successful, false if not; failure usually results from failing to allocate the necessary memory. + + /*! + * @function setObject + * + * @abstract + * Appends an object onto the end of the array, + * increasing storage if necessary. + * + * @param anObject The object to add to the OSArray instance. + * + * @result + * true if the addition of anObject was successful, + * false if not. + * + * @discussion + * The array adds storage to accomodate the new object, if necessary. + * If successfully added, the object is retained. */ - virtual bool setObject(const OSMetaClassBase *anObject); - /*! - @function setObject - @abstract A member function which inserts an object into the array at a particular index. - @param index The index into the array to insert the object. - @param anObject The object to add to the OSArray instance. The object will be retained automatically. - @result Returns true if the addition of 'anObject' was successful, false if not. + virtual bool setObject(const OSMetaClassBase * anObject); + + + /*! + * @function setObject + * + * @abstract + * Inserts or appends an object into the array + * at a particular index. + * + * @param index The index in the array at which to insert the object. + * Must be less than or equal to the array's count. + * @param anObject The object to add to the array. + * + * @result + * true if the addition of anObject + * was successful, false if not. + * + * @discussion + * This function moves existing objects from index on, + * in order to accommodate the new object; + * it does not replace an existing object at index. See + * @link + * //apple_ref/cpp/instm/OSArray/replaceObject/virtualvoid/(unsignedint,constOSMetaClassBase*) + * replaceObject@/link. + * If successfully added, the object is retained. + * + * The array adds storage to accomodate the new object, if necessary. + * Note, however, that this function does not allow for arbirtrary growth + * of an array by specifying an index larger than the current count. + * If you need to immediately grow an array by an arbitrary amount, + * use + * @link + * //apple_ref/cpp/instm/OSArray/ensureCapacity/virtualunsignedint/(unsignedint) + * ensureCapacity@/link. */ - virtual bool setObject(unsigned int index, const OSMetaClassBase *anObject); + virtual bool setObject( + unsigned int index, + const OSMetaClassBase * anObject); - /*! - @function merge - @abstract A member function which appends the contents of an array onto the receiving array. - @param otherArray The array whose contents will be appended to the reveiving array. - @result Returns true when merging was successful, false otherwise. + + /*! + * @function merge + * + * @abstract + * Appends the contents of an array onto the receiving array. + * + * @param otherArray The array whose contents will be appended + * to the receiving array. + * @result + * true if merging was successful, false otherwise. + * + * @discussion + * This function merely appends one array onto another. + * Duplicates are not avoided and no sorting is performed. + * Objects successfully added to the receiver are retained. */ - virtual bool merge(const OSArray *otherArray); + virtual bool merge(const OSArray * otherArray); + - /*! - @function replaceObject - @abstract A member function which will replace an object in an array at a given index. The original object will be released and the new object will be retained. - @param index The index into the array at which the new object will be placed. - @param anObject The object to be placed into the array. + /*! + * @function replaceObject + * + * @abstract + * Replaces an object in an array at a given index. + * + * @param index The index of the object to be replaced. + * Must be less than the array's count. + * @param anObject The object to be placed into the array. + * + * @discussion + * The original object is released and the new object is retained. */ - virtual void replaceObject(unsigned int index, const OSMetaClassBase *anObject); - /*! - @function removeObject - @abstract A member function which removes an object from the array. - @param index The index of the object to be removed. - @discussion This function removes an object from the array which is located at a given index. Once removed the contents of the array will shift to fill in the vacated spot. The removed object is automatically released. + virtual void replaceObject( + unsigned int index, + const OSMetaClassBase * anObject); + + + /*! + * @function removeObject + * + * @abstract + * Removes an object from the array. + * + * @param index The index of the object to be removed. + * + * @discussion + * This function moves existing objects to fill the vacated index + * so that there are no gaps. + * The object removed is released. */ virtual void removeObject(unsigned int index); - - /*! - @function isEqualTo - @abstract A member function which tests the equality of the values of two OSArray objects. - @param anArray The array object being compared against the receiver. - @result Returns true if the two arrays are equivalent or false otherwise. + + + /*! + * @function isEqualTo + * + * @abstract + * Tests the equality of two OSArray objects. + * + * @param anArray The array object being compared against the receiver. + * + * @result + * true if the two arrays are equivalent, + *false otherwise. + * + * @discussion + * Two OSArray objects are considered equal if they have same count + * and if the objects at corresponding indices compare as equal using + * @link + * //apple_ref/cpp/instm/OSMetaClassBase/isEqualTo/virtualbool/(constOSMetaClassBase*) + * isEqualTo@/link. */ - virtual bool isEqualTo(const OSArray *anArray) const; - /*! - @function isEqualTo - @abstract A member function which compares the equality of the values of a receiving array to an arbitrary object. - @param anObject The object to be compared against the receiver. - @result Returns true if the two objects are equivalent, that is they are either the same object or they are both arrays containing the same or equivalent objects, or false otherwise. + virtual bool isEqualTo(const OSArray * anArray) const; + + + /*! + * @function isEqualTo + * + * @abstract + * Tests the equality of an OSArray to an arbitrary object. + * + * @param anObject The object to be compared against the receiver. + * + * @result + * true if the two objects are equivalent, + * false otherwise. + * + * @discussion + * An OSArray is considered equal to another object + * if that object is derived from OSArray + * and contains the same or equivalent objects. */ - virtual bool isEqualTo(const OSMetaClassBase *anObject) const; - - /*! - @function getObject - @abstract A member function which returns a reference to an object located within the array at a given index. The caller should not release the returned object. - @param index The index into the array from which the reference to an object is taken. - @result Returns a reference to an object or 0 if the index is beyond the bounds of the array. + virtual bool isEqualTo(const OSMetaClassBase * anObject) const; + + + /*! + * @function getObject + * + * @abstract + * Return the object stored at a given index. + * + * @param index The index of the object to be returned to caller. + * + * @result + * The object stored at index, + * or NULL if index lies past the end of the array. + * + * @discussion + * The returned object will be released if removed from the array; + * if you plan to store the reference, you should call + * @link + * //apple_ref/cpp/instm/OSObject/retain/virtualvoid/() + * retain@/link + * on that object. + */ + virtual OSObject * getObject(unsigned int index) const; + + + /*! + * @function getLastObject + * + * @abstract + * Returns the last object in the array. + * + * @result + * The last object in the array, + * or NULL if the array is empty. + * + * @discussion + * The returned object will be released if removed from the array; + * if you plan to store the reference, you should call + * @link + * //apple_ref/cpp/instm/OSObject/retain/virtualvoid/() + * retain@/link + * on that object. */ - virtual OSObject *getObject(unsigned int index) const; - /*! - @function getLastObject - @abstract A member function which returns a reference to the last object in the array. The caller should not release the returned object. - @result Returns a reference to the last object in the array or 0 if the array is empty. + virtual OSObject * getLastObject() const; + + + /*! + * @function getNextIndexOfObject + * + * @abstract + * Scans the array for the next instance of a specific object + * at or beyond a given index. + * + * @param anObject The object to scan for. + * @param index The index at which to begin the scan. + * + * @result + * The next index of anObject in the array or (-1) + * if none is found. + * + * @discussion + * This function uses pointer equivalence, and does not use + * @link + * //apple_ref/cpp/instm/OSMetaClassBase/isEqualTo/virtualbool/(constOSMetaClassBase*) + * isEqualTo@/link. */ - virtual OSObject *getLastObject() const; + virtual unsigned int getNextIndexOfObject( + const OSMetaClassBase * anObject, + unsigned int index) const; - /*! - @function getNextIndexOfObject - @abstract A member function which searches the array for the next instance of a specific object, at or beyond the supplied index. - @result Returns the next index of the object in the array or (-1) if none is found. + /*! + * @function serialize + * + * @abstract + * Archives the receiver into the provided + * @link //apple_ref/doc/class/OSSerialize OSSerialize@/link object. + * + * @param serializer The OSSerialize object. + * @result + * true if serialization succeeds, false if not. */ - virtual unsigned int getNextIndexOfObject(const OSMetaClassBase * anObject, - unsigned int index) const; - - /*! - @function serialize - @abstract A member function which archives the receiver. - @param s The OSSerialize object. - @result Returns true if serialization was successful, false if not. + virtual bool serialize(OSSerialize * serializer) const; + + + /*! + * @function setOptions + * + * @abstract + * Recursively sets option bits in an array + * and all child collections. + * + * @param options A bitfield whose values turn the options on (1) or off (0). + * @param mask A mask indicating which bits + * in options to change. + * Pass 0 to get the whole current options bitfield + * without changing any settings. + * @param context Unused. + * + * @result + * The options bitfield as it was before the set operation. + * + * @discussion + * Kernel extensions should not call this function. + * + * Child collections' options are changed only if the receiving array's + * options actually change. */ - virtual bool serialize(OSSerialize *s) const; - - /*! - @function setOptions - @abstract This function is used to recursively set option bits in this array and all child collections. - @param options Set the (options & mask) bits. - @param mask The mask of bits which need to be set, 0 to get the current value. - @result The options before the set operation, NB setOptions(?,0) returns the current value of this collection. - */ - virtual unsigned setOptions(unsigned options, unsigned mask, void * = 0); - - /*! - @function copyCollection - @abstract Do a deep copy of this array and its collections. - @discussion This function copies this array included collections recursively. Objects that don't derive from OSContainter are NOT copied, that is objects like OSString and OSData. - @param cycleDict Is a dictionary of all of the collections that have been, to start the copy at the top level just leave this field 0. - @result The newly copied collecton or 0 if insufficient memory + virtual unsigned setOptions( + unsigned options, + unsigned mask, + void * context = 0); + + + /*! + * @function copyCollection + * + * @abstract + * Creates a deep copy of an array and its child collections. + * + * @param cycleDict A dictionary of all of the collections + * that have been copied so far, + * which is used to track circular references. + * To start the copy at the top level, + * pass NULL. + * + * @result + * The newly copied array, with a retain count of 1, + * or NULL if there is insufficient memory to do the copy. + * + * @discussion + * The receiving array, and any collections it contains, + * recursively, are copied. + * Objects that are not derived from OSCollection are retained + * rather than copied. */ - OSCollection *copyCollection(OSDictionary *cycleDict = 0); + OSCollection * copyCollection(OSDictionary * cycleDict = 0); OSMetaClassDeclareReservedUnused(OSArray, 0); OSMetaClassDeclareReservedUnused(OSArray, 1); diff --git a/libkern/libkern/c++/OSBoolean.h b/libkern/libkern/c++/OSBoolean.h index 107e53631..cd99b47c9 100644 --- a/libkern/libkern/c++/OSBoolean.h +++ b/libkern/libkern/c++/OSBoolean.h @@ -35,9 +35,31 @@ class OSString; /*! - @class OSBoolean - @abstract Container class for boolean values. -*/ + * @header + * + * @abstract + * This header declares the OSBoolean container class. + */ + + +/*! + * @class OSBoolean + * + * @abstract + * OSBoolean wraps a boolean value in a C++ object + * for use in Libkern collections. + * + * @discussion + * OSBoolean represents a boolean true/false value + * as a Libkern C++ object. + * There are only two instances of OSBoolean, + * @link kOSBooleanTrue kOSBooleanTrue@/link + * and @link kOSBooleanFalse kOSBooleanFalse@/link. + * These are shared globally and returned by the instance-creation function + * @link withBoolean withBoolean@/link. + * Thus, you can use pointer comparison + * to test whether two OSBoolean objects are equal. + */ class OSBoolean : public OSObject { OSDeclareDefaultStructors(OSBoolean) @@ -45,76 +67,164 @@ class OSBoolean : public OSObject protected: bool value; - /*D @function taggedRelease - @abstract Overrides tagged release mechanism. - @param when Unused. + /*! + * @function taggedRelease + * + * @abstract + * Overrides the reference counting mechanism + * for the shared global instances. + * + * @param tag Unused. + * @param when Unused. */ - virtual void taggedRelease(const void *tag, const int when) const; + virtual void taggedRelease( + const void * tag, + const int when) const; public: static void initialize(); - /*D - @function withBoolean - @abstract A static constructor function to create and initialize an instance of OSBoolean. - @param value A boolean value. - @result Returns and instance of OSBoolean, or 0 if an error occurred. + /*! + * @function withBoolean + * + * @abstract + * Returns one of the global instances of OSBoolean. + * + * @param value A boolean value. + * + * @result + * The global instance of OSBoolean with the boolean value. + * + * @discussion + * This function actually returns either + * @link kOSBooleanTrue kOSBooleanTrue@/link or + * @link kOSBooleanFalse kOSBooleanFalse@/link, + * so that you can always use pointer comparison with OSBoolean objects. */ - static OSBoolean *withBoolean(bool value); + static OSBoolean * withBoolean(bool value); - /*D - @function free - @abstract A member function to release all resources used by the OSBoolean instance. - @discussion This function should not be called directly, use release() instead. + /*! + * @function free + * + * @abstract + * Overridden to prevent deallocation of the shared global instances. + * + * @discussion + * This function should never be called. */ virtual void free(); - /*D @function taggedRetain - @abstract Override tagged retain mechanism. */ - virtual void taggedRetain(const void *tag) const; - /*! - @function isTrue - @abstract A member function to test if the boolean object is true. - @result Returns true if the OSBoolean object is true, false otherwise. + /*! + * @function taggedRetain + * + * @abstract + * Overrides the reference counting mechanism for the shared global instances. + * + * @param tag Unused. + */ + virtual void taggedRetain(const void * tag) const; + + + /*! + * @function isTrue + * + * @abstract + * Checks whether the OSBoolean object + * represents a true bool value. + * + * @result + * true if the OSBoolean object is true, + * false otherwise. + * + * @discussion + * You can also use == against + * @link kOSBooleanTrue kOSBooleanTrue@/link. */ virtual bool isTrue() const; - /*! - @function isFalse - @abstract A member function to test if the boolean object is false. - @result Returns true if the OSBoolean object is false, false otherwise. + + + /*! + * @function isFalse + * + * @abstract + * Checks whether the OSBoolean object + * represents a false bool value. + * + * @result + * true if the OSBoolean object is false, + * true otherwise. + * + * @discussion + * You can also use == against + * @link kOSBooleanFalse kOSBooleanFalse@/link. */ virtual bool isFalse() const; - /*! - @function getValue - @abstract Obtains the value of the OSBoolean object as the standard C++ type bool. - @result The value of the OSBoolean object. + + /*! + * @function getValue + * + * @abstract + * Returns the C++ bool value for the OSBoolean object. + * + * @result + * Returns the C++ bool value of the OSBoolean object. */ virtual bool getValue() const; - /*! - @function isEqualTo - @abstract A member function to test the equality of two OSBoolean objects. - @param boolean An OSBoolean object to be compared against the receiver. - @result Returns true if the two objects are equivalent. + + /*! + * @function isEqualTo + * + * @abstract + * Tests the equality of two OSBoolean objects. + * + * @param aBoolean The OSBoolean to be compared against the receiver. + * + * @result + * true if the OSBoolean objects are equal, + * false if not. + * + * @discussion + * Two OSBoolean objects are considered equal + * if they are the same exact object (pointer equality). */ - virtual bool isEqualTo(const OSBoolean *boolean) const; - /*! - @function isEqualTo - @abstract A member function to test the equality between an arbitrary OSObject derived object and an OSBoolean object. - @param obj An OSObject derived object to be compared against the receiver. - @result Returns true if the two objects are equivalent. + virtual bool isEqualTo(const OSBoolean * aBoolean) const; + + + /*! + * @function isEqualTo + * + * @abstract + * Tests the equality an OSBoolean to an arbitrary object. + * + * @param anObject An object to be compared against the receiver. + * + * @result + * true if the objects are equal, false if not. + * + * @discussion + * An OSBoolean is considered equal to another object + * if that object is derived from OSBoolean + * and represents the same C++ bool value. */ - virtual bool isEqualTo(const OSMetaClassBase *obj) const; + virtual bool isEqualTo(const OSMetaClassBase * anObject) const; + - /*! - @function serialize - @abstract A member function that archives the receiver. - @param s The OSSerialize object. - @result Returns true if serialization was successful, false if not. + /*! + * @function serialize + * + * @abstract + * Archives the receiver into the provided + * @link //apple_ref/doc/class/OSSerialize OSSerialize@/link object. + * + * @param serializer The OSSerialize object. + * + * @result + * true if serialization succeeds, false if not. */ - virtual bool serialize(OSSerialize *s) const; + virtual bool serialize(OSSerialize * serializer) const; OSMetaClassDeclareReservedUnused(OSBoolean, 0); OSMetaClassDeclareReservedUnused(OSBoolean, 1); @@ -127,17 +237,35 @@ class OSBoolean : public OSObject }; /*! - @const kOSBooleanTrue - @abstract The OSBoolean constant for "true". - @discussion The OSBoolean constant for "true". The object does not need to be retained or released. Comparisons of the form (booleanObject == kOSBooleanTrue) are acceptable and would be equivalent to (booleanObject->getValue() == true). -*/ + * @const kOSBooleantrue + * + * @abstract + * The OSBoolean constant for true. + * + * @discussion + * The OSBoolean constant for true. + * This object does not need to be retained or released (but it can be). + * Comparisons of the form + * booleanObject == kOSBooleanTrue are acceptable + * and are equivalent to + * booleanObject->getValue() == true. + */ extern OSBoolean * const & kOSBooleanTrue; /*! - @const kOSBooleanFalse - @abstract The OSBoolean constant for "false". - @discussion The OSBoolean constant for "false". The object does not need to be retained or released. Comparisons of the form (booleanObject == kOSBooleanFalse) are acceptable and would be equivalent to (booleanObject->getValue() == false). -*/ + * @const kOSBooleanfalse + * + * @abstract + * The OSBoolean constant for false. + * + * @discussion + * The OSBoolean constant for false. + * This object does not need to be retained or released (but it can be). + * Comparisons of the form + * booleanObject == kOSBooleanFalse + * are acceptable and are equivalent to + * booleanObject->getValue() == false. + */ extern OSBoolean * const & kOSBooleanFalse; #endif /* !_OS_OSBOOLEAN_H */ diff --git a/libkern/libkern/c++/OSCPPDebug.h b/libkern/libkern/c++/OSCPPDebug.h index 1f750741d..f532c7726 100644 --- a/libkern/libkern/c++/OSCPPDebug.h +++ b/libkern/libkern/c++/OSCPPDebug.h @@ -26,6 +26,9 @@ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ */ +#ifndef _OSCPPDEBUG_H +#define _OSCPPDEBUG_H + #include #define OSCPP_DEBUG @@ -34,14 +37,21 @@ __BEGIN_DECLS -extern int debug_malloc_size; -extern int debug_iomalloc_size; -extern int debug_container_malloc_size; -extern int debug_ivars_size; +// xx-review: Do we want to document these? + +// xx-review: exported in IOKit.kext +extern int debug_malloc_size; +extern int debug_iomalloc_size; +extern int debug_container_malloc_size; + +// xx-review: exported in Libkern.kext +extern int debug_ivars_size; -void OSPrintMemory( void ); +// xx-review: exported in IOKit.kext +void OSPrintMemory(void); __END_DECLS #endif +#endif /* _OSCPPDEBUG_H */ diff --git a/libkern/libkern/c++/OSCollection.h b/libkern/libkern/c++/OSCollection.h index 15a15fa36..ea0acb648 100644 --- a/libkern/libkern/c++/OSCollection.h +++ b/libkern/libkern/c++/OSCollection.h @@ -34,12 +34,49 @@ class OSDictionary; + +/*! + * @header + * + * @abstract + * This header declares the OSDictionary collection class. + */ + + /*! - @class OSCollection - @abstract Abstract super class for all collections. - @discussion - OSCollection is the abstract super class for all OSObject derived collections and provides the necessary interfaces for managing storage space and iteration through a collection. -*/ + * @class OSCollection + * + * @abstract + * The abstract superclass for Libkern collections. + * + * @discussion + * OSCollection is the abstract superclass + * for all Libkern C++ object collections. + * It defines the necessary interfaces for managing storage space + * and iterating through an arbitrary collection + * (see the + * @link //apple_ref/cpp/class/OSIterator OSIterator@/link + * and + * @link //apple_ref/cpp/class/OSCollectionIterator OSCollectionIterator@/link + * classes). + * It is up to concrete subclasses + * to define their specific content management functions. + * + * Use Restrictions + * + * With very few exceptions in the I/O Kit, all Libkern-based C++ + * classes, functions, and macros are unsafe + * to use in a primary interrupt context. + * Consult the I/O Kit documentation related to primary interrupts + * for more information. + * + * OSCollection provides no concurrency protection; + * it's up to the usage context to provide any protection necessary. + * Some portions of the I/O Kit, such as + * @link //apple_ref/doc/class/IORegistryEntry IORegistryEntry@/link, + * handle synchronization via defined member functions for setting + * properties. + */ class OSCollection : public OSObject { friend class OSCollectionIterator; @@ -49,180 +86,364 @@ class OSCollection : public OSObject struct ExpansionData { }; protected: + /* Not to be included in headerdoc. + * + * @var updateStamp + * + * @abstract + * A counter for changes to the collection object. + * + * @discussion + * The update stamp is used primarily to track validity + * of iteration contexts. + * See @link //apple_ref/cpp/class/OSIterator OSIterator@/link and + * @link //apple_ref/cpp/class/OSCollectionIterator OSCollectionIterator@/link + * for more information. + */ unsigned int updateStamp; private: /* Reserved for future use. (Internal use only) */ - // ExpansionData *reserved; + // ExpansionData * reserved; unsigned int fOptions; protected: // Member functions used by the OSCollectionIterator class. - /* - @function iteratorSize - @abstract A pure virtual member function to return the size of the iterator context. - @result Returns an integer size for the storage space required to contain context necessary for iterating through a collection. - @discussion - This member function is called by an OSCollectionIterator object to allow it to allocate enough storage space for the iterator context. This context contains the data necessary to iterate through the collection when getNextObjectForIterator() is called. + + + /*! + * @function iteratorSize + * + * @abstract + * Returns the size in bytes of a subclass's iteration context. + * + * @result + * The size in bytes of the iteration context + * needed by the subclass of OSCollection. + * + * @discussion + * This pure virtual member function, which subclasses must implement, + * is called by an + * @link //apple_ref/doc/class/OSCollectionIterator OSCollectionIterator@/link + * object so that it can allocate the storage needed + * for the iteration context. + * An iteration context contains the data necessary + * to iterate through the collection. */ virtual unsigned int iteratorSize() const = 0; - /* - @function initIterator - @abstract Pure virtual member function to allocate and initialize the iterator context data. - @param iterator The iterator context. - @result Returns true if initialization was successful, false otherwise. + + + /*! + * @function initIterator + * + * @abstract + * Initializes the iteration context for a collection subclass. + * + * @param iterationContext The iteration context to initialize. + * + * @result + * true if initialization was successful, + * false otherwise. + * + * @discussion + * This pure virtual member function, which subclasses must implement, + * is called by an + * @link //apple_ref/doc/class/OSCollectionIterator OSCollectionIterator@/link + * object to initialize an iteration context for a collection. + * The collection object should interpret iterationContext appropriately + * and initialize its contents to begin an iteration. + * + * This function can be called repeatedly for a given context, + * whenever the iterator is reset via the + * @link //apple_ref/cpp/instm/OSCollectionIterator/reset/virtualvoid/() + * OSCollectionIterator::reset@/link + * function. */ - virtual bool initIterator(void *iterator) const = 0; - /* - @function getNextObjectForIterator - @abstract A pure virtual member function which returns the next member of a collection. - @param iterator The iterator context. - @param ret The object returned to the caller. - @result Returns true if an object was found, false otherwise. - @discussion - This is the entry point used by an OSCollectionIterator object to advance to next object in the collection. The iterator context is passed to the receiver to allow it to find the location of the current object and then advance the iterator context to the next object. + virtual bool initIterator(void * iterationContext) const = 0; + + + /*! + * @function getNextObjectForIterator + * + * @abstract + * Returns the next member of a collection. + * + * @param iterationContext The iteration context. + * @param nextObject The object returned by reference to the caller. + * + * @result + * true if an object was found, false otherwise. + * + * @discussion + * This pure virtual member function, which subclasses must implement, + * is called by an + * @link //apple_ref/doc/class/OSCollectionIterator OSCollectionIterator@/link + * to get the next object for a given iteration context. + * The collection object should interpret + * iterationContext appropriately, + * advance the context from its current object + * to the next object (if it exists), + * return that object by reference in nextObject, + * and return true for the function call. + * If there is no next object, the collection object must return false. + * + * For associative collections, the object returned should be the key + * used to access its associated value, and not the value itself. */ - virtual bool getNextObjectForIterator(void *iterator, OSObject **ret) const = 0; - - /* - @function init - @abstract A member function to initialize the OSCollection object. - @result Returns true if an object was initialized successfully, false otherwise. - @discussion - This function is used to initialize state within a newly created OSCollection object. + virtual bool getNextObjectForIterator( + void * iterationContext, + OSObject ** nextObject) const = 0; + + + /*! + * @function init + * + * @abstract + * Initializes the OSCollection object. + * + * @result + * true on success, false otherwise. + * + * @discussion + * This function is used to initialize state + * within a newly created OSCollection object. */ virtual bool init(); public: - enum { - kImmutable = 0x00000001, - kMASK = (unsigned) -1 - }; - - /* - @function haveUpdated - @abstract A member function to track of all updates to the collection. + + /*! + * @typedef _OSCollectionFlags + * + * @const kImmutable + * @discussion + * Used with @link setOptions setOptions@/link + * to indicate the collection's contents should + * or should not change. + * + * An @link //apple_ref/doc/class/IORegistryEntry IORegistryEntry@/link + * object marks collections immutable when set + * as properties of a registry entry that's attached to a plane. + * This is generally an advisory flag, used for debugging; + * setting it does not mean a collection will in fact + * disallow modifications. + */ + typedef enum { + kImmutable = 0x00000001, + kMASK = (unsigned) -1 + } _OSCollectionFlags; + +// xx-review: should be protected, not public + + /*! + * @function haveUpdated + * + * @abstract + * Tracks updates to the collection. + * + * @discussion + * Subclasses call this function before + * making any change to their contents (not after, as the name implies). + * Update tracking is used for collection iterators, + * and to enforce certain protections in the IORegistry. */ void haveUpdated(); - /* - @function getCount - @abstract A pure virtual member function which returns the number of objects in the collection subclass. - @results Returns the number objects in a collection. - */ + + /*! + * @function getCount + * + * @abstract + * Returns the number of objects in the collection. + * + * @result + * The number of objects in the collection. + * + * @discussion + * Subclasses must implement this pure virtual member function. + */ virtual unsigned int getCount() const = 0; - /* - @function getCapacity - @abstract A pure virtual member function which returns the storage space in the collection subclass. - @results Returns the number objects in a collection. - */ + + + /*! + * @function getCapacity + * + * @abstract + * Returns the number of objects the collection + * can store without reallocating. + * + * @result + * The number objects the collection + * can store without reallocating. + * + * @discussion + * Subclasses must implement this pure virtual member function. + */ virtual unsigned int getCapacity() const = 0; - /* - @function getCapacityIncrement - @abstract A pure virtual member function which returns the growth factor of the collection subclass. - @results Returns the size by which the collection subclass should grow. - */ + + + /*! + * @function getCapacityIncrement + * + * @abstract + * Returns the storage increment of the collection. + * + * @result + * The storage increment of the collection. + * + * @discussion + * Subclasses must implement this pure virtual member function. + * Most collection subclasses allocate their storage + * in multiples of the capacity increment. + * + * See + * @link + * //apple_ref/cpp/instm/OSCollection/ensureCapacity/virtualunsignedint/(unsignedint) + * ensureCapacity@/link + * for how the capacity increment is used. + */ virtual unsigned int getCapacityIncrement() const = 0; - /* - @function setCapacityIncrement - @abstract A pure virtual member function which sets the growth factor of the collection subclass. - @param increment The new size by which the capacity of the collection should grow. - @results Returns the new capacity increment. - */ + + + /*! + * @function setCapacityIncrement + * + * @abstract + * Sets the storage increment of the collection. + * + * @result + * The new storage increment of the collection, + * which may be different from the number requested. + * + * @discussion + * Subclasses must implement this pure virtual member function. + * Most collection subclasses allocate their storage + * in multiples of the capacity increment. + * + * Collection subclasses should gracefully handle + * an increment of zero + * by applying (and returning) a positive minimum capacity. + * + * Setting the capacity increment does not trigger an immediate adjustment + * of a collection's storage. + * + * See + * @link + * //apple_ref/cpp/instm/OSCollection/ensureCapacity/virtualunsignedint/(unsignedint) + * ensureCapacity@/link + * for how the capacity increment is used. + */ virtual unsigned int setCapacityIncrement(unsigned increment) = 0; - /* - @function ensureCapacity - @abstract A pure virtual member function which - @param newCapacity - @result - */ + + /*! + * @function ensureCapacity + * + * @abstract + * Ensures the collection has enough space to store + * the requested number of objects. + * + * @param newCapacity The total number of objects the collection + * should be able to store. + * + * @result + * The new capacity of the collection, + * which may be different from the number requested + * (if smaller, reallocation of storage failed). + * + * @discussion + * Subclasses implement this pure virtual member function + * to adjust their storage so that they can hold + * at least newCapacity objects. + * Libkern collections generally allocate storage + * in multiples of their capacity increment. + * + * Subclass methods that add objects to the collection + * should call this function before adding any object, + * and should check the return value for success. + * + * Collection subclasses may reduce their storage + * when the number of contained objects falls below some threshold, + * but no Libkern collections currently do. + */ virtual unsigned int ensureCapacity(unsigned int newCapacity) = 0; - /* - @function flushCollection - @abstract A pure virtual member function which - */ + + /*! + * @function flushCollection + * + * @abstract + * Empties the collection, releasing any objects retained. + * + * @discussion + * Subclasses implement this pure virtual member function + * to remove their entire contents. + * This must not release the collection itself. + */ virtual void flushCollection() = 0; - /*! - @function setOptions - @abstract This function is used to recursively set option bits in this collection and all child collections. - @discussion setOptions is a recursive function but the OSCollection class itself does not know the structure of the particular collection. This means that all derived classes are expected to override this method and recurse if the old value of the option was NOT set, which is why the old value is returned. As this function is a reserved function override it is very multi purpose. It can be used to get & set the options, - @param options Set the (options & mask) bits. - @param mask The mask of bits which need to be set, 0 to get the current value. - @result The options before the set operation, NB setOptions(?,0) returns the current value of this collection. - */ - OSMetaClassDeclareReservedUsed(OSCollection, 0); - virtual unsigned setOptions(unsigned options, unsigned mask, void * = 0); - /*! - @function copyCollection - @abstract Do a deep copy of a collection tree. - @discussion This function copies this collection and all of the contained collections recursively. Objects that don't derive from OSContainter are NOT copied, that is objects like OSString and OSData. To a derive from OSConnection::copyCollection some code is required to be implemented in the derived class, below is the skeleton pseudo code to copy a collection. + /*! + * @function setOptions + * + * @abstract + * Recursively sets option bits in this collection + * and all child collections. + * + * @param options A bitfield whose values turn the options on (1) or off (0). + * @param mask A mask indicating which bits + * in options to change. + * Pass 0 to get the whole current options bitfield + * without changing any settings. + * @param context Unused. + * + * @result + * The options bitfield as it was before the set operation. + * + * @discussion + * Kernel extensions should not call this function. + * + * The only option currently in use is + * @link //apple_ref/doc/title:econst/OSCollectionFlags/kImmutable + * kImmutable@/link. + * + * Subclasses should override this function to recursively apply + * the options to their contents if the options actually change. + */ + virtual unsigned setOptions( + unsigned options, + unsigned mask, + void * context = 0); + OSMetaClassDeclareReservedUsed(OSCollection, 0); -OSCollection * ::copyCollection(OSDictionary *inCycleDict) -{ - bool allocDict = !cycleDict; - OSCollection *ret = 0; - *newMyColl = 0; - - if (allocDict) - cycleDict = OSDictionary::withCapacity(16); - if (!cycleDict) - return 0; - - do { - // Check to see if we already have a copy of the new dictionary - ret = super::copyCollection(cycleDict); - if (ret) - continue; - - // Your code goes here to copy your collection, - // see OSArray & OSDictionary for examples. - newMyColl = ::with(this); - if (!newMyColl) - continue; - - // Insert object into cycle Dictionary - cycleDict->setObject((const OSSymbol *) this, newMyColl); - - // Duplicate any collections in us - for (unsigned int i = 0; i < count; i++) { - OSObject *obj = getObject(i); - OSCollection *coll = OSDynamicCast(OSCollection, obj); - - if (coll) { - OSCollection *newColl = coll->copyCollection(cycleDict); - if (!newColl) - goto abortCopy; - - newMyColl->replaceObject(i, newColl); - newColl->release(); - }; - }; - - ret = newMyColl; - newMyColl = 0; - - } while (false); - -abortCopy: - if (newMyColl) - newMyColl->release(); - - if (allocDict) - cycleDict->release(); - - return ret; -} - - @param cycleDict Is a dictionary of all of the collections that have been, to start the copy at the top level just leave this field 0. - @result The newly copied collecton or 0 if insufficient memory + /*! + * @function copyCollection + * + * @abstract + * Creates a deep copy of a collection. + * + * @param cycleDict A dictionary of all of the collections + * that have been copied so far, + * to start the copy at the top level + * pass NULL for cycleDict. + * + * @result + * The newly copied collecton, + * NULL on failure. + * + * @discussion + * This function copies the collection + * and all of the contained collections recursively. + * Objects that are not derived from OSCollection are retained + * rather than copied. + * + * Subclasses of OSCollection must override this function + * to properly support deep copies. */ - virtual OSCollection *copyCollection(OSDictionary *cycleDict = 0); + virtual OSCollection *copyCollection(OSDictionary * cycleDict = 0); OSMetaClassDeclareReservedUsed(OSCollection, 1); + OSMetaClassDeclareReservedUnused(OSCollection, 2); OSMetaClassDeclareReservedUnused(OSCollection, 3); OSMetaClassDeclareReservedUnused(OSCollection, 4); diff --git a/libkern/libkern/c++/OSCollectionIterator.h b/libkern/libkern/c++/OSCollectionIterator.h index a2d6d70ee..72e8e9792 100644 --- a/libkern/libkern/c++/OSCollectionIterator.h +++ b/libkern/libkern/c++/OSCollectionIterator.h @@ -35,61 +35,174 @@ class OSCollection; /*! - @class OSCollectionIterator - @discussion - OSCollectionIterator objects provide a consistent mechanism to iterate through all OSCollection derived collections. -*/ + * @header + * + * @abstract + * This header declares the OSCollectionIterator collection class. + */ + + +/*! + * @class OSCollectionIterator + * + * @discussion + * OSCollectionIterator defines a consistent mechanism to iterate + * through the objects of an OSCollection. + * It expands on the basic interface of + * @link //apple_ref/cpp/class/OSIterator OSIterator@/link + * to allow association of an iterator with a specific collection. + * + * To use an OSCollectionIterator, you create it with the collection + * to be iterated, then call + * @link //apple_ref/cpp/class/OSIterator OSIterator@/link + * as long as it returns an object: + * + * @textblock + *
+ *     OSCollectionIterator * iterator =
+ *         OSCollectionIterator::withCollection(myCollection);
+ *     OSObject * object;
+ *     while (object = iterator->getNextObject()) {
+ *         // do something with object
+ *     }
+ *     // optional
+ *     if (!iterator->isValid()) {
+ *         // report that collection changed during iteration
+ *     }
+ *     iterator->release();
+ * 
+ * @/textblock + * + * Note that when iterating associative collections, + * the objects returned by getNextObject are keys; + * if you want to work with the associated values, + * simply look them up in the collection with the keys. + * + * Use Restrictions + * + * With very few exceptions in the I/O Kit, all Libkern-based C++ + * classes, functions, and macros are unsafe + * to use in a primary interrupt context. + * Consult the I/O Kit documentation related to primary interrupts + * for more information. + * + * OSCollectionIterator provides no concurrency protection. + */ class OSCollectionIterator : public OSIterator { OSDeclareDefaultStructors(OSCollectionIterator) protected: - const OSCollection *collection; - void *collIterator; - unsigned int initialUpdateStamp; - bool valid; +// xx-review: Do we want to document these? + const OSCollection * collection; + void * collIterator; + unsigned int initialUpdateStamp; + bool valid; public: - /*! - @function withCollection - @abstract A static constructor function which creates and initializes an instance of OSCollectionIterator for the provided collection object. - @param inColl The OSCollection derived collection object to be iteratated. - @result Returns a new instance of OSCollection or 0 on failure. + /*! + * @function withCollection + * + * @abstract + * Creates and initializes an OSCollectionIterator + * for the provided collection object. + * + * @param inColl The OSCollection-derived collection object to be iteratated. + * + * @result + * A new instance of OSCollectionIterator, or NULL on failure. */ - static OSCollectionIterator *withCollection(const OSCollection *inColl); + static OSCollectionIterator * withCollection(const OSCollection * inColl); - /*! - @function withCollection - @abstract A member function to initialize the intance of OSCollectionIterator with the provided colleciton object. - @param inColl The OSCollection derived collection object to be iteratated. - @result Returns true if the initialization was successful or false on failure. + + /*! + * @function initWithCollection + * + * @abstract + * Initializes an OSCollectionIterator + * for the provided collection object. + * + * @param inColl The OSCollection-derived collection object to be iteratated. + * @result + * true if the initialization was successful, + * or false on failure. + * + * @discussion + * Not for general use. Use the static instance creation method + * @link withCollection withCollection@/link instead. */ - virtual bool initWithCollection(const OSCollection *inColl); - /*! - @function free - @abstract A member function to release and deallocate all resources created or used by the OSCollectionIterator object. - @discussion This function should not be called directly, use release() instead. + virtual bool initWithCollection(const OSCollection * inColl); + + + /*! + * @function free + * + * @abstract + * Releases or deallocates any resources used + * by the OSCollectionIterator object. + * + * @discussion + * This function should not be called directly; + * use + * @link + * //apple_ref/cpp/instm/OSObject/release/virtualvoid/() + * release@/link + * instead. */ virtual void free(); - /*! - @function reset - @abstract A member function which resets the iterator to begin the next iteration from the beginning of the collection. + + /*! + * @function reset + * + * @abstract + * Resets the iterator to the beginning of the collection, + * as if it had just been created. */ virtual void reset(); - /*! - @function isValid - @abstract A member function for determining if the collection was modified during iteration. + + /*! + * @function isValid + * + * @abstract + * Checks that the collection hasn't been modified during iteration. + * + * @return + * true if the iterator is valid for continued use, + * false otherwise + * (typically because the iteration context has been modified). */ virtual bool isValid(); - /*! - @function getNextObject - @abstract A member function to get the next object in the collection being iterated. - @result Returns the next object in the collection or 0 when the end has been reached. + + /*! + * @function getNextObject + * + * @abstract + * Advances to and returns the next object in the iteration. + * + * @return + * The next object in the iteration context, + * NULL if there is no next object + * or if the iterator is no longer valid. + * + * @discussion + * This function first calls + * @link //apple_ref/cpp/instm/OSCollectionIterator/isValid/virtualbool/() + * isValid@/link + * and returns NULL if that function + * returns false. + * + * Subclasses must implement this pure virtual function + * to check for validity with + * @link + * //apple_ref/cpp/instm/OSCollectionIterator/isValid/virtualbool/() + * isValid@/link, + * and then to advance the iteration context to the next object (if any) + * and return that next object, or NULL if there is none. */ - virtual OSObject *getNextObject(); + virtual OSObject * getNextObject(); }; #endif /* !_OS_OSCOLLECTIONITERATOR_H */ diff --git a/libkern/libkern/c++/OSData.h b/libkern/libkern/c++/OSData.h index e699dff1d..2b4159604 100644 --- a/libkern/libkern/c++/OSData.h +++ b/libkern/libkern/c++/OSData.h @@ -36,227 +36,693 @@ class OSString; /*! - @class OSData - @abstract A container class to manage an array of bytes. -*/ + * @header + * + * @abstract + * This header declares the OSData container class. + */ + + +/*! + * @class OSData + * + * @abstract + * OSData wraps an array of bytes in a C++ object + * for use in Libkern collections. + * + * @discussion + * OSData represents an array of bytes as a Libkern C++ object. + * OSData objects are mutable: + * You can add bytes to them and + * overwrite portions of the byte array. + * + * Use Restrictions + * + * With very few exceptions in the I/O Kit, all Libkern-based C++ + * classes, functions, and macros are unsafe + * to use in a primary interrupt context. + * Consult the I/O Kit documentation related to primary interrupts + * for more information. + * + * OSData provides no concurrency protection; + * it's up to the usage context to provide any protection necessary. + * Some portions of the I/O Kit, such as + * @link //apple_ref/doc/class/IORegistryEntry IORegistryEntry@/link, + * handle synchronization via defined member functions for setting + * properties. + */ class OSData : public OSObject { OSDeclareDefaultStructors(OSData) protected: - void *data; - unsigned int length; - unsigned int capacity; - unsigned int capacityIncrement; + void * data; + unsigned int length; + unsigned int capacity; + unsigned int capacityIncrement; struct ExpansionData { }; - /*! @var reserved - Reserved for future use. (Internal use only) */ - ExpansionData *reserved; + /* Reserved for future use. (Internal use only) */ + ExpansionData * reserved; public: - /*! - @function withCapacity - @abstract A static constructor function to create and initialize an empty instance of OSData with a given capacity. - @param inCapacity The initial capacity of the OSData object in bytes. - @result Returns an instance of OSData or 0 if a failure occurs. + + /*! + * @function withCapacity + * + * @abstract + * Creates and initializes an empty instance of OSData. + * + * @param capacity The initial capacity of the OSData object in bytes. + * + * @result + * An instance of OSData with a reference count of 1; + * NULL on failure. + * + * @discussion + * capacity may be zero. + * The OSData object will allocate a buffer internally + * when necessary, and will grow as needed to accommodate more bytes + * (unlike @link //apple_ref/doc/uid/20001498 CFMutableData@/link, + * for which a nonzero initial capacity is a hard limit). */ - static OSData *withCapacity(unsigned int inCapacity); - /*! - @function withBytes - @abstract A static constructor function to create and initialize an instance of OSData and copies in the provided data. - @param bytes A buffer of data. - @param inLength The size of the given buffer. - @result Returns an instance of OSData or 0 if a failure occurs. + static OSData * withCapacity(unsigned int capacity); + + + /*! + * @function withBytes + * + * @abstract + * Creates and initializes an instance of OSData + * with a copy of the provided data buffer. + * + * @param bytes The buffer of data to copy. + * @param numBytes The length of bytes. + * + * @result + * An instance of OSData containing a copy of the provided byte array, + * with a reference count of 1; + * NULL on failure. + * + * @discussion + * The new OSData object will grow as needed to accommodate more bytes + * (unlike @link //apple_ref/doc/uid/20001498 CFMutableData@/link, + * for which a nonzero initial capacity is a hard limit). */ - static OSData *withBytes(const void *bytes, unsigned int inLength); - /*! - @function withBytesNoCopy - @abstract A static constructor function to create and initialize an instance of OSData which references a buffer of data. - @param bytes A reference to a block of data. - @param inLength The size of the data block. - @result Returns an instance of OSData or 0 if a failure occurs. + static OSData * withBytes( + const void * bytes, + unsigned int numBytes); + + + /*! + * @function withBytesNoCopy + * + * @abstract + * Creates and initializes an instance of OSData + * that shares the provided data buffer. + * + * @param bytes The buffer of data to represent. + * @param numBytes The length of bytes. + * + * @result + * A instance of OSData that shares the provided byte array, + * with a reference count of 1; + * NULL on failure. + * + * @discussion + * An OSData object created with this function + * does not claim ownership + * of the data buffer, but shares it with the caller. + * When the caller determines that the OSData object has actually been freed, + * it can safely dispose of the data buffer. + * Conversely, if it frees the shared data buffer, + * it must not attempt to use the OSData object and should release it. + * + * An OSData object created with shared external data cannot append bytes, + * but you can get the byte pointer and + * modify bytes within the shared buffer. */ - static OSData *withBytesNoCopy(void *bytes, unsigned int inLength); - /*! - @function withData - @abstract A static constructor function to create and initialize an instance of OSData with the data provided. - @param inData An OSData object which provides the initial data. - @result Returns an instance of OSData or 0 if a failure occurs. + static OSData * withBytesNoCopy( + void * bytes, + unsigned int numBytes); + + + /*! + * @function withData + * + * @abstract + * Creates and initializes an instance of OSData + * with contents copied from another OSData object. + * + * @param inData An OSData object that provides the initial data. + * + * @result + * An instance of OSData containing a copy of the data in inData, + * with a reference count of 1; + * NULL on failure. + * + * @discussion + * The new OSData object will grow as needed to accommodate more bytes + * (unlike @link //apple_ref/doc/uid/20001498 CFMutableData@/link, + * for which a nonzero initial capacity is a hard limit). */ - static OSData *withData(const OSData *inData); - /*! - @function withData - @abstract A static constructor function to create and initialize an instance of OSData with a specific range of the data provided. - @param inData An OSData object which provides the initial data. - @param start The starting index at which the data will be copied. - @param inLength The number of bytes to be copied starting at index 'start'. - @result Returns an instance of OSData or 0 if a failure occurs. + static OSData * withData(const OSData * inData); + + + /*! + * @function withData + * + * @abstract + * Creates and initializes an instance of OSData + * with contents copied from a range within another OSData object. + * + * @param inData An OSData object that provides the initial data. + * @param start The starting index from which bytes will be copied. + * @param numBytes The number of bytes to be copied from start. + * + * @result + * An instance of OSData containing a copy + * of the specified data range from inData, + * with a reference count of 1; + * NULL on failure. + * + * @discussion + * The new OSData object will grow as needed to accommodate more bytes + * (unlike @link //apple_ref/doc/uid/20001498 CFMutableData@/link, + * for which a nonzero initial capacity is a hard limit). */ - static OSData *withData(const OSData *inData, - unsigned int start, unsigned int inLength); - - /*! - @function initWithCapacity - @abstract A member function to initialize an instance of OSData with a minimum capacity of at least the given size. If this function is called an an object that has been previously used then the length is set down to 0 and a new block of data is allocated if necessary to ensure the given capacity. - @param capacity The length of the allocated block of data. - @result Returns true if initialization was successful, false otherwise. + static OSData * withData( + const OSData * inData, + unsigned int start, + unsigned int numBytes); + + + /*! + * @function initWithCapacity + * + * @abstract + * Initializes an instance of OSData. + * + * @param capacity The initial capacity of the OSData object in bytes. + * + * @result + * true on success, false on failure. + * + * @discussion + * Not for general use. Use the static instance creation method + * @link + * //apple_ref/cpp/clm/OSData/withCapacity/staticOSData*\/(unsignedint) + * withCapacity@/link instead. + * + * capacity may be zero. + * The OSData object will allocate a buffer internally + * when necessary, and will grow as needed to accommodate more bytes + * (unlike @link //apple_ref/doc/uid/20001498 CFMutableData@/link, + * for which a nonzero initial capacity is a hard limit). */ virtual bool initWithCapacity(unsigned int capacity); - /*! - @function initWithBytes - @abstract A member function to initialize an instance of OSData which references a block of data. - @param bytes A reference to a block of data - @param inLength The length of the block of data. - @result Returns true if initialization was successful, false otherwise. + + + /*! + * @function initWithBytes + * + * @abstract + * Initializes an instance of OSData + * with a copy of the provided data buffer. + * + * @param bytes The buffer of data to copy. + * @param numBytes The length of bytes. + * + * @result + * true on success, false on failure. + * + * @discussion + * Not for general use. Use the static instance creation method + * @link withBytes withBytes@/link instead. + * + * The new OSData object will grow as needed to accommodate more bytes + * (unlike @link //apple_ref/doc/uid/20001498 CFMutableData@/link, + * for which a nonzero initial capacity is a hard limit). */ - virtual bool initWithBytes(const void *bytes, unsigned int inLength); - /*! - @function initWithBytes - @abstract A member function to initialize an instance of OSData which references a block of data. - @param bytes A reference to a block of data - @param inLength The length of the block of data. - @result Returns true if initialization was successful, false otherwise. + virtual bool initWithBytes( + const void * bytes, + unsigned int numBytes); + + + /*! + * @function initWithBytesNoCopy + * + * @abstract + * Initializes an instance of OSData + * to share the provided data buffer. + * + * @param bytes The buffer of data to represent. + * @param numBytes The length of bytes. + * + * @result + * true on success, false on failure. + * + * @discussion + * Not for general use. Use the static instance creation method + * @link withBytesNoCopy withBytesNoCopy@/link instead. + * + * An OSData object initialized with this function + * does not claim ownership + * of the data buffer, but merely shares it with the caller. + * + * An OSData object created with shared external data cannot append bytes, + * but you can get the byte pointer and + * modify bytes within the shared buffer. */ - virtual bool initWithBytesNoCopy(void *bytes, unsigned int inLength); - /*! - @function initWithData - @abstract A member function to initialize an instance of OSData with the data provided. - @param inData An OSData object which provides the data to be copied. - @result Returns true if initialization was successful, false otherwise. + virtual bool initWithBytesNoCopy( + void * bytes, + unsigned int numBytes); + + + /*! + * @function initWithData + * + * @abstract + * Creates and initializes an instance of OSData + * with contents copied from another OSData object. + * + * @param inData An OSData object that provides the initial data. + * + * @result + * true on success, false on failure. + * + * @discussion + * Not for general use. Use the static instance creation method + * @link + * //apple_ref/cpp/clm/OSData/withData/staticOSData*\/(constOSData*) + * withData(OSData *)@/link + * instead. + * + * The new OSData object will grow as needed to accommodate more bytes + * (unlike @link //apple_ref/doc/uid/20001498 CFMutableData@/link, + * for which a nonzero initial capacity is a hard limit). */ - virtual bool initWithData(const OSData *inData); - /*! - @function initWithData - @abstract A member function to initialize an instance of OSData with a specific range of the data provided - @param inData An OSData object. - @param start The starting range of data to be copied. - @param inLength The length in bytes of the data to be copied. - @result Returns true if initialization was successful, false otherwise. + virtual bool initWithData(const OSData * inData); + + + /*! + * @function initWithData + * + * @abstract + * Initializes an instance of OSData + * with contents copied from a range within another OSData object. + * + * @param inData An OSData object that provides the initial data. + * @param start The starting index from which bytes will be copied. + * @param numBytes The number of bytes to be copied from start. + * + * @result + * Returns true on success, false on failure. + * + * @discussion + * Not for general use. Use the static instance creation method + * @link + * //apple_ref/cpp/clm/OSData/withData/staticOSData*\/(constOSData*,unsignedint,unsignedint) + * withData(OSData *, unsigned int, unsigned int)@/link + * instead. + * + * The new OSData object will grow as needed to accommodate more bytes + * (unlike @link //apple_ref/doc/uid/20001498 CFMutableData@/link, + * for which a nonzero initial capacity is a hard limit). */ - virtual bool initWithData(const OSData *inData, - unsigned int start, unsigned int inLength); - /*! - @function free - @abstract A member function which releases all resources created or used by the OSData object. - @discussion Do not call this function directly, use release() instead. + virtual bool initWithData( + const OSData * inData, + unsigned int start, + unsigned int numBytes); + + + /*! + * @function free + * + * @abstract + * Deallocates or releases any resources + * used by the OSDictionary instance. + * + * @discussion + * This function should not be called directly; + * use + * @link + * //apple_ref/cpp/instm/OSObject/release/virtualvoid/() + * release@/link + * instead. */ virtual void free(); - /*! - @function getLength - @abstract A member function which returns the length of the internal data buffer. - @result Returns an integer value for the length of data in the object's internal data buffer. + + /*! + * @function getLength + * + * @abstract + * Returns the number of bytes in or referenced by the OSData object. + * + * @result + * The number of bytes in or referenced by the OSData object. */ virtual unsigned int getLength() const; - /*! - @function getCapacity - @abstract A member function which returns the capacity of the internal data buffer. - @result Returns an integer value for the size of the object's internal data buffer. + + + /*! + * @function getCapacity + * + * @abstract + * Returns the total number of bytes the OSData can store without reallocating. + * + * @result + * The total number bytes the OSData can store without reallocating. + * + * @discussion + * OSData objects grow when full to accommodate additional bytes. + * See + * @link + * //apple_ref/cpp/instm/OSData/getCapacityIncrement/virtualunsignedint/() + * getCapacityIncrement@/link + * and + * @link + * //apple_ref/cpp/instm/OSData/ensureCapacity/virtualunsignedint/(unsignedint) + * ensureCapacity@/link. + * + * OSData objects created or initialized to use a shared buffer + * do not make use of this attribute, and return -1 from this function. */ virtual unsigned int getCapacity() const; - /*! - @function getCapacityIncrement - @abstract A member function which returns the size by which the data buffer will grow. - @result Returns the size by which the data buffer will grow. + + + /*! + * @function getCapacityIncrement + * + * @abstract + * Returns the storage increment of the OSData object. + * + * @result + * The storage increment of the OSData object. + * + * @discussion + * An OSData object allocates storage for bytes in multiples + * of the capacity increment. + * + * OSData objects created or initialized to use a shared buffer + * do not make use of this attribute. */ virtual unsigned int getCapacityIncrement() const; - /*! - @function setCapacityIncrement - @abstract A member function which sets the growth size of the data buffer. - @result Returns the new growth size. + + + /*! + * @function setCapacityIncrement + * + * @abstract + * Sets the storage increment of the array. + * + * @result + * The original storage increment of the array. + * + * @discussion + * An OSArray allocates storage for objects in multiples + * of the capacity increment. + * + * OSData objects created or initialized to use a shared buffer + * do not make use of this attribute. */ virtual unsigned int setCapacityIncrement(unsigned increment); - /*! - @function ensureCapacity - @abstract A member function which will expand the size of the collection to a given storage capacity. - @param newCapacity The new capacity for the data buffer. - @result Returns the new capacity of the data buffer or the previous capacity upon error. + + +// xx-review: does not check for capacity == EXTERNAL + + /*! + * @function ensureCapacity + * + * @abstract + * Ensures the array has enough space + * to store the requested number of bytes. + * + * @param newCapacity The total number of bytes the OSData object + * should be able to store. + * + * @result + * Returns the new capacity of the OSData object, + * which may be different from the number requested + * (if smaller, reallocation of storage failed). + * + * @discussion + * This function immediately resizes the OSData's buffer, if necessary, + * to accommodate at least newCapacity bytes. + * If newCapacity is not greater than the current capacity, + * or if an allocation error occurs, the original capacity is returned. + * + * There is no way to reduce the capacity of an OSData. + * + * An OSData object created "NoCopy" does not allow resizing. */ virtual unsigned int ensureCapacity(unsigned int newCapacity); - /*! - @function appendBytes - @abstract A member function which appends a buffer of data onto the end of the object's internal data buffer. - @param bytes A pointer to the block of data. If the value is 0 then append zero-ed memory to the data object. - @param inLength The length of the data block. - @result Returns true if the object was able to append the new data, false otherwise. - */ - virtual bool appendBytes(const void *bytes, unsigned int inLength); - /*! - @function appendBytes - @abstract A member function which appends the data contained in an OSData object to the receiver. - @param other An OSData object. - @result Returns true if appending the new data was successful, false otherwise. + + + /*! + * @function appendBytes + * + * @abstract + * Appends a buffer of bytes to the OSData object's internal data buffer. + * + * @param bytes A pointer to the data to append. + * If bytes is NULL + * then a zero-filled buffer of length numBytes + * is appended. + * @param numBytes The number of bytes from bytes to append. + * + * @result + * true if the new data was successfully added, + * false on failure. + * + * @discussion + * This function immediately resizes the OSData's buffer, if necessary, + * to accommodate the new total size. + * + * An OSData object created "NoCopy" does not allow bytes + * to be appended. */ - virtual bool appendBytes(const OSData *other); + virtual bool appendBytes( + const void * bytes, + unsigned int numBytes); - /*! - @function getBytesNoCopy - @abstract A member function to return a pointer to the OSData object's internal data buffer. - @result Returns a reference to the OSData object's internal data buffer. + + /*! + * @function appendBytes + * + * @abstract + * Appends the data contained in another OSData object. + * + * @param aDataObj The OSData object whose contents will be appended. + * + * @result + * true if the new data was successfully added, + * false on failure. + * + * @discussion + * This function immediately resizes the OSData's buffer, if necessary, + * to accommodate the new total size. + * + * An OSData object created "NoCopy" does not allow bytes + * to be appended. */ - virtual const void *getBytesNoCopy() const; - /*! - @function getBytesNoCopy - @abstract Returns a reference into the OSData object's internal data buffer at particular offset and with a particular length. - @param start The offset from the base of the internal data buffer. - @param inLength The length of window. - @result Returns a pointer at a particular offset into the data buffer, or 0 if the starting offset or length are not valid. + virtual bool appendBytes(const OSData * aDataObj); + + + /*! + * @function getBytesNoCopy + * + * @abstract + * Returns a pointer to the OSData object's internal data buffer. + * + * @result + * A pointer to the OSData object's internal data buffer. + * + * @discussion + * You can modify the existing contents of an OSData object + * via this function. + * It works with OSData objects that have their own data buffers + * as well as with OSData objects that have shared buffers. + * + * If you append bytes or characters to an OSData object, + * it may have to reallocate its internal storage, + * rendering invalid an extrated pointer to that storage. */ - virtual const void *getBytesNoCopy(unsigned int start, - unsigned int inLength) const; - - /*! - @function isEqualTo - @abstract A member function to test the equality of two OSData objects. - @param aData The OSData object to be compared to the receiver. - @result Returns true if the two objects are equivalent, false otherwise. + virtual const void * getBytesNoCopy() const; + + + /*! + * @function getBytesNoCopy + * + * @abstract + * Returns a pointer into the OSData object's internal data buffer + * with a given offset and length. + * + * @param start The offset from the base of the internal data buffer. + * @param numBytes The length of the window. + * + * @result + * A pointer to the bytes in the specified range + * within the OSData object, + * or 0 if that range does not lie completely + * within the object's buffer. + * + * @discussion + * You can modify the existing contents of an OSData object + * via this function. + * It works with OSData objects that have their own data buffers + * as well as with OSData objects that have shared buffers. + * + * If you append bytes or characters to an OSData object, + * it may have to reallocate its internal storage, + * rendering invalid an extrated pointer to that storage. */ - virtual bool isEqualTo(const OSData *aData) const; - /*! - @function isEqualTo - @abstract A member function to test the equality of an arbitrary block of data with the OSData object's internal data buffer. - @param someData A pointer to a block of data. - @param inLength The length of the block of data. - @result Returns true if the two blocks of data are equivalent, false otherwise. + virtual const void * getBytesNoCopy( + unsigned int start, + unsigned int numBytes) const; + + + /*! + * @function isEqualTo + * + * @abstract + * Tests the equality of two OSData objects. + * + * @param aDataObj The OSData object being compared against the receiver. + * + * @result + * true if the two OSData objects are equivalent, + * false otherwise. + * + * @discussion + * Two OSData objects are considered equal + * if they have same length and if their + * byte buffers hold the same contents. */ - virtual bool isEqualTo(const void *someData, unsigned int inLength) const; - /*! - @function isEqualTo - @abstract A member function to test the equality between an OSData object and an arbitrary OSObject derived object. - @param obj An OSObject derived object. - @result Returns true if the two objects are equivalent. + virtual bool isEqualTo(const OSData * aDataObj) const; + + + /*! + * @function isEqualTo + * + * @abstract + * Tests the equality of an OSData object's contents + * to a C array of bytes. + * + * @param bytes A pointer to the bytes to compare. + * @param numBytes The number of bytes to compare. + * + * @result + * true if the data buffers are equal + * over the given length, + * false otherwise. */ - virtual bool isEqualTo(const OSMetaClassBase *obj) const; - /*! - @function isEqualTo - @abstract A member function to test the equality between an OSData object and an OSString object. - @param obj An OSString object - @result Returns true if the two objects are equivalent. + virtual bool isEqualTo( + const void * bytes, + unsigned int numBytes) const; + + + /*! + * @function isEqualTo + * + * @abstract + * Tests the equality of an OSData object to an arbitrary object. + * + * @param anObject The object to be compared against the receiver. + * + * @result + * true if the two objects are equivalent, + * false otherwise. + * + * @discussion + * An OSData is considered equal to another object + * if that object is derived from OSData + * and contains the equivalent bytes of the same length. */ - virtual bool isEqualTo(const OSString *obj) const; - /*! - @function serialize - @abstract A member function which archives the receiver. - @param s The OSSerialize object. - @result Returns true if serialization was successful, false if not. + virtual bool isEqualTo(const OSMetaClassBase * anObject) const; + + + /*! + * @function isEqualTo + * + * @abstract + * Tests the equality of an OSData object to an OSString. + * + * @param aString The string object to be compared against the receiver. + * + * @result + * true if the two objects are equivalent, + * false otherwise. + * + * @discussion + * This function compares the bytes of the OSData object + * against those of the OSString, + * accounting for the possibility that an OSData + * might explicitly include a nul + * character as part of its total length. + * Thus, for example, an OSData object containing + * either the bytes <'u', 's', 'b', '\0'> + * or <'u', 's', 'b'> + * will compare as equal to the OSString containing "usb". */ - virtual bool serialize(OSSerialize *s) const; - - /*! - @function appendByte - @abstract A member function which appends a buffer of constant data onto the end of the object's internal data buffer. - @param byte A byte value to replicate as the added data. - @param inCount The length of the data to add. - @result Returns true if the object was able to append the new data, false otherwise. + virtual bool isEqualTo(const OSString * aString) const; + + + /*! + * @function serialize + * + * @abstract + * Archives the receiver into the provided + * @link //apple_ref/doc/class/IORegistryEntry OSSerialize@/link object. + * + * @param serializer The OSSerialize object. + * + * @result + * true if serialization succeeds, false if not. */ - virtual bool appendByte(unsigned char byte, unsigned int inCount); + virtual bool serialize(OSSerialize * serializer) const; + /*! + * @function appendByte + * + * @abstract + * Appends a single byte value + * to the OSData object's internal data buffer + * a specified number of times. + * + * @param byte The byte value to append. + * @param numBytes The number of copies of byte to append. + * + * @result + * true if the new data was successfully added, + * false if not. + * + * @discussion + * This function immediately resizes the OSData's buffer, if necessary, + * to accommodate the new total size. + * + * An OSData object created "NoCopy" does not allow bytes + * to be appended. + */ + virtual bool appendByte( + unsigned char byte, + unsigned int numBytes); + +#ifdef XNU_KERNEL_PRIVATE +/* Available within xnu source only */ +public: +#else +private: +#endif + // xxx - DO NOT USE - This interface may change + typedef void (*DeallocFunction)(void * ptr, unsigned int length); + virtual void setDeallocFunction(DeallocFunction func); + OSMetaClassDeclareReservedUsed(OSData, 0); private: - OSMetaClassDeclareReservedUnused(OSData, 0); OSMetaClassDeclareReservedUnused(OSData, 1); OSMetaClassDeclareReservedUnused(OSData, 2); OSMetaClassDeclareReservedUnused(OSData, 3); diff --git a/libkern/libkern/c++/OSDictionary.h b/libkern/libkern/c++/OSDictionary.h index 30efb0010..74ec638e4 100644 --- a/libkern/libkern/c++/OSDictionary.h +++ b/libkern/libkern/c++/OSDictionary.h @@ -44,293 +44,863 @@ class OSSymbol; class OSString; /*! - @class OSDictionary - @abstract A collection class whose instances maintain a list of object references. Objects in the collection are acquired with unique associative keys. - @discussion - An instance of OSDictionary is a mutable container which contains a list of OSMetaClassBase derived object references and these objects are identified and acquired by unique associative keys. When an object is placed into a dictionary, a unique identifier or key must provided to identify the object within the collection. The key then must be provided to find the object within the collection. If an object is not found within the collection, a 0 is returned. Placing an object into a dictionary for a key, which already identifies an object within that dictionary, will replace the current object with the new object. - - Objects placed into a dictionary are automatically retained and objects removed or replaced are automatically released. All objects are released when the collection is freed. -*/ + * @header + * + * @abstract + * This header declares the OSDictionary collection class. + */ + + +/*! + * @class OSDictionary + * + * @abstract + * OSDictionary provides an associative store using strings for keys. + * + * @discussion + * OSDictionary is a container for Libkern C++ objects + * (those derived from + * @link //apple_ref/doc/class/OSMetaClassBase OSMetaClassBase@/link, + * in particular @link //apple_ref/doc/class/OSObject OSObject@/link). + * Storage and access are associative, based on string-valued keys + * (C string, @link //apple_ref/cpp/cl/OSString OSString@/link, + * or @link //apple_ref/cpp/cl/OSSymbol OSSymbol@/link). + * When adding an object to an OSDictionary, you provide a string identifier, + * which can then used to retrieve that object or remove it from the dictionary. + * Setting an object with a key that already has an associated object + * replaces the original object. + * + * You must generally cast retrieved objects from + * @link //apple_ref/cpp/cl/OSObject OSObject@/link + * to the desired class using + * @link //apple_ref/cpp/macro/OSDynamicCast OSDynamicCast@/link. + * This macro returns the object cast to the desired class, + * or NULL if the object isn't derived from that class. + * + * When iterating an OSDictionary using + * @link //apple_ref/doc/class/OSCollectionIterator OSCollectionIterator@/link, + * the objects returned from + * @link //apple_ref/doc/function/OSCollectionIterator::getNextObject + * getNextObject@/link + * are dictionary keys (not the object values for those keys). + * You can use the keys to retrieve their associated object values. + * + * As with all Libkern collection classes, + * OSDictionary retains keys and objects added to it, + * and releases keys and objects removed from it (or replaced). + * An OSDictionary also grows as necessary to accommodate new key/value pairs, + * unlike Core Foundation collections (it does not, however, shrink). + * + * Note: OSDictionary currently uses a linear search algorithm, + * and is not designed for high-performance access of many values. + * It is intended as a simple associative-storage mechanism only. + * + * Use Restrictions + * + * With very few exceptions in the I/O Kit, all Libkern-based C++ + * classes, functions, and macros are unsafe + * to use in a primary interrupt context. + * Consult the I/O Kit documentation related to primary interrupts + * for more information. + * + * OSDictionary provides no concurrency protection; + * it's up to the usage context to provide any protection necessary. + * Some portions of the I/O Kit, such as + * @link //apple_ref/doc/class/IORegistryEntry IORegistryEntry@/link, + * handle synchronization via defined member functions for setting + * properties. + */ class OSDictionary : public OSCollection { OSDeclareDefaultStructors(OSDictionary) protected: struct dictEntry { - const OSSymbol *key; - const OSMetaClassBase *value; + const OSSymbol * key; + const OSMetaClassBase * value; }; - dictEntry *dictionary; - unsigned int count; - unsigned int capacity; - unsigned int capacityIncrement; + dictEntry * dictionary; + unsigned int count; + unsigned int capacity; + unsigned int capacityIncrement; struct ExpansionData { }; - - /*! @var reserved - Reserved for future use. (Internal use only) */ - ExpansionData *reserved; + + /* Reserved for future use. (Internal use only) */ + ExpansionData * reserved; // Member functions used by the OSCollectionIterator class. virtual unsigned int iteratorSize() const; - virtual bool initIterator(void *iterator) const; - virtual bool getNextObjectForIterator(void *iterator, OSObject **ret) const; + virtual bool initIterator(void * iterator) const; + virtual bool getNextObjectForIterator(void * iterator, OSObject ** ret) const; public: - /*! - @function withCapacity - @abstract A static constructor function to create and initialize an instance of OSDictionary. - @param capacity The initial storage capacity of the dictionary object. - @result Returns an instance of OSDictionary or 0 on failure. + + /*! + * @function withCapacity + * + * @abstract + * Creates and initializes an empty OSDictionary. + * + * @param capacity The initial storage capacity of the new dictionary object. + * + * @result + * An empty instance of OSDictionary + * with a retain count of 1; + * NULL on failure. + * + * @discussion + * capacity must be nonzero. + * The new dictionary will grow as needed to accommodate more key/object pairs + * (unlike @link //apple_ref/doc/uid/20001497 CFMutableDictionary@/link, + * for which the initial capacity is a hard limit). */ - static OSDictionary *withCapacity(unsigned int capacity); - /*! - @function withObjects - @abstract A static constructor function to create and initialize an instance of OSDictionary and populate it with objects provided. - @param objects A static array of OSMetaClassBase derived objects. - @param keys A static array of OSSymbol keys. - @param count The number of items to be placed into the dictionary. - @param capacity The initial storage capacity of the dictionary object. If 0, the capacity will be set to the size of 'count', else this value must be greater or equal to 'count'. - @result Returns an instance of OSDictionary or 0 on failure. + static OSDictionary * withCapacity(unsigned int capacity); + + + /*! + * @function withObjects + * + * @abstract Creates and initializes an OSDictionary + * populated with keys and objects provided. + * + * @param objects A C array of OSMetaClassBase-derived objects. + * @param keys A C array of OSSymbol keys + * for the corresponding objects in objects. + * @param count The number of keys and objects + * to be placed into the dictionary. + * @param capacity The initial storage capacity of the new dictionary object. + * If 0, count is used; otherwise this value + * must be greater than or equal to count. + * + * @result + * An instance of OSDictionary + * containing the key/object pairs provided, + * with a retain count of 1; + * NULL on failure. + * + * @discussion + * objects and keys must be non-NULL, + * and count must be nonzero. + * If capacity is nonzero, + * it must be greater than or equal to count. + * The new dictionary will grow as needed + * to accommodate more key/object pairs + * (unlike + * @link //apple_ref/doc/uid/20001497 CFMutableDictionary@/link, + * for which the initial capacity is a hard limit). */ - static OSDictionary *withObjects(const OSObject *objects[], - const OSSymbol *keys[], - unsigned int count, - unsigned int capacity = 0); - /*! - @function withObjects - @abstract A static constructor function to create and initialize an instance of OSDictionary and populate it with objects provided. - @param objects A static array of OSMetaClassBase derived objects. - @param keys A static array of OSString keys. - @param count The number of items to be placed into the dictionary. - @param capacity The initial storage capacity of the dictionary object. If 0, the capacity will be set to the size of 'count', else this value must be greater or equal to 'count'. - @result Returns an instance of OSDictionary or 0 on failure. + static OSDictionary * withObjects( + const OSObject * objects[], + const OSSymbol * keys[], + unsigned int count, + unsigned int capacity = 0); + + /*! + * @function withObjects + * + * @abstract + * Creates and initializes an OSDictionary + * populated with keys and objects provided. + * + * @param objects A C array of OSMetaClassBase-derived objects. + * @param keys A C array of OSString keys for the corresponding objects + * in objects. + * @param count The number of keys and objects + * to be placed into the dictionary. + * @param capacity The initial storage capacity of the new dictionary object. + * If 0, count is used; otherwise this value + * must be greater than or equal to count. + * + * @result + * An instance of OSDictionary + * containing the key/object pairs provided, + * with a retain count of 1; + * NULL on failure. + * + * @discussion + * objects and keys must be non-NULL, + * and count must be nonzero. + * If capacity is nonzero, it must be greater than or equal to count. + * The new dictionary will grow as needed + * to accommodate more key/object pairs + * (unlike + * @link //apple_ref/doc/uid/20001497 CFMutableDictionary@/link, + * for which the initial capacity is a hard limit). */ - static OSDictionary *withObjects(const OSObject *objects[], - const OSString *keys[], - unsigned int count, - unsigned int capacity = 0); - /*! - @function withDictionary - @abstract A static constructor function to create and initialize an instance of OSDictionary and populate it with objects from another dictionary. - @param dict A dictionary whose contents will be placed in the new instance. - @param capacity The initial storage capacity of the dictionary object. If 0, the capacity will be set to the number of elements in the dictionary object, else the capacity must be greater than or equal to the number of elements in the dictionary. - @result Returns an instance of OSDictionary or 0 on failure. + static OSDictionary * withObjects( + const OSObject * objects[], + const OSString * keys[], + unsigned int count, + unsigned int capacity = 0); + + + /*! + * @function withDictionary + * + * @abstract + * Creates and initializes an OSDictionary + * populated with the contents of another dictionary. + * + * @param dict A dictionary whose contents will be stored + * in the new instance. + * @param capacity The initial storage capacity of the new dictionary object. + * If 0, the capacity is set to the number of key/value pairs + * in dict; + * otherwise capacity must be greater than or equal to + * the number of key/value pairs in dict. + * + * @result + * An instance of OSDictionary + * containing the key/value pairs of dict, + * with a retain count of 1; + * NULL on failure. + * + * @discussion + * dict must be non-NULL. + * If capacity is nonzero, it must be greater than or equal to count. + * The new dictionary will grow as needed + * to accommodate more key/object pairs + * (unlike + * @link //apple_ref/doc/uid/20001497 CFMutableDictionary@/link, + * for which the initial capacity is a hard limit). + * + * The keys and objects in dict are retained for storage + * in the new OSDictionary, + * not copied. */ - static OSDictionary *withDictionary(const OSDictionary *dict, - unsigned int capacity = 0); - - /*! - @function initWithCapacity - @abstract A member function to initialize an instance of OSDictionary. - @param capacity The initial storage capacity of the dictionary object. - @result Returns true if initialization succeeded or false on failure. + static OSDictionary * withDictionary( + const OSDictionary * dict, + unsigned int capacity = 0); + + + /*! + * @function initWithCapacity + * + * @abstract + * Initializes a new instance of OSDictionary. + * + * @param capacity The initial storage capacity of the new dictionary object. + * @result + * true on success, false on failure. + * + * @discussion + * Not for general use. Use the static instance creation method + * @link //apple_ref/cpp/clm/OSDictionary/withCapacity/staticOSDictionary*\/(unsignedint) + * withCapacity@/link + * instead. + * + * capacity must be nonzero. + * The new dictionary will grow as needed + * to accommodate more key/object pairs + * (unlike + * @link //apple_ref/doc/uid/20001497 CFMutableDictionary@/link, + * for which the initial capacity is a hard limit). */ virtual bool initWithCapacity(unsigned int capacity); - /*! - @function initWithObjects - @abstract A member function to initialize an instance of OSDictionary and populate it with the provided objects and keys. - @param objects A static array of OSMetaClassBase derived objects to be placed into the dictionary. - @param keys A static array of OSSymbol keys which identify the corresponding objects provided in the 'objects' parameter. - @param count The number of objects provided to the dictionary. - @param capacity The initial storage capacity of the dictionary object. If 0, the capacity will be set to the size of 'count', else the capacity must be greater than or equal to the value of 'count'. - @result Returns true if initialization succeeded or false on failure. + + + /*! + * @function initWithObjects + * + * @abstract Initializes a new OSDictionary with keys and objects provided. + * + * @param objects A C array of OSMetaClassBase-derived objects. + * @param keys A C array of OSSymbol keys + * for the corresponding objects in objects. + * @param count The number of keys and objects to be placed + * into the dictionary. + * @param capacity The initial storage capacity of the new dictionary object. + * If 0, count is used; otherwise this value + * must be greater than or equal to count. + * + * @result + * true on success, false on failure. + * + * @discussion + * Not for general use. Use the static instance creation method + * @link + * //apple_ref/cpp/clm/OSDictionary/withObjects/staticOSDictionary*\/(constOSObject*,constOSString*,unsignedint,unsignedint) + * withObjects@/link + * instead. + * + * objects and keys must be non-NULL, + * and count must be nonzero. + * If capacity is nonzero, + * it must be greater than or equal to count. + * The new dictionary will grow as neede + * to accommodate more key/object pairs + * (unlike + * @link //apple_ref/doc/uid/20001497 CFMutableDictionary@/link, + * for which the initial capacity is a hard limit). */ - virtual bool initWithObjects(const OSObject *objects[], - const OSSymbol *keys[], - unsigned int count, - unsigned int capacity = 0); - /*! - @function initWithObjects - @abstract A member function to initialize an instance of OSDictionary and populate it with the provided objects and keys. - @param objects A static array of OSMetaClassBase derived objects to be placed into the dictionary. - @param keys A static array of OSString keys which identify the corresponding objects provided in the 'objects' parameter. - @param count The number of objects provided to the dictionary. - @param capacity The initial storage capacity of the dictionary object. If 0, the capacity will be set to the size of 'count', else the capacity must be greater than or equal to the value of 'count'. - @result Returns true if initialization succeeded or false on failure. + virtual bool initWithObjects( + const OSObject * objects[], + const OSSymbol * keys[], + unsigned int count, + unsigned int capacity = 0); + + + /*! + * @function initWithObjects + * + * @abstract + * Initializes a new OSDictionary with keys and objects provided. + * + * @param objects A C array of OSMetaClassBase-derived objects. + * @param keys A C array of OSString keys + * for the corresponding objects in objects. + * @param count The number of keys and objects + * to be placed into the dictionary. + * @param capacity The initial storage capacity of the new dictionary object. + * If 0, count is used; otherwise this value + * must be greater than or equal to count. + * + * @result + * true on success, false on failure. + * + * @discussion + * Not for general use. Use the static instance creation method + * @link + * //apple_ref/cpp/clm/OSDictionary/withObjects/staticOSDictionary*\/(constOSObject*,constOSString*,unsignedint,unsignedint) + * withObjects@/link + * instead. + * + * objects and keys must be non-NULL, + * and count must be nonzero. + * If capacity is nonzero, it must be greater than or equal to count. + * The new dictionary will grow as needed + * to accommodate more key/object pairs + * (unlike + * @link //apple_ref/doc/uid/20001497 CFMutableDictionary@/link, + * for which the initial capacity is a hard limit). */ - virtual bool initWithObjects(const OSObject *objects[], - const OSString *keys[], - unsigned int count, - unsigned int capacity = 0); - /*! - @function initWithDictionary - @abstract A member function to initialize an instance of OSDictionary and populate it with the contents of another dictionary. - @param dict The dictionary containing the objects to be used to populate the receiving dictionary. - @param capacity The initial storage capacity of the dictionary. If 0, the value of capacity will be set to the number of elements in the dictionary object, else the value of capacity must be greater than or equal to the number of elements in the dictionary object. - @result Returns true if initialization succeeded or false on failure. + virtual bool initWithObjects( + const OSObject * objects[], + const OSString * keys[], + unsigned int count, + unsigned int capacity = 0); + + + /*! + * @function initWithDictionary + * + * @abstract + * Initializes a new OSDictionary + * with the contents of another dictionary. + * + * @param dict A dictionary whose contents will be placed + * in the new instance. + * @param capacity The initial storage capacity of the new dictionary object. + * If 0, the capacity is set to the number of key/value pairs + * in dict; + * otherwise capacity must be greater than or equal to + * the number of key/value pairs in dict. + * + * @result + * true on success, false on failure. + * + * @discussion + * Not for general use. Use the static instance creation method + * @link withDictionary withDictionary@/link instead. + * + * dict must be non-NULL. + * If capacity is nonzero, + * it must be greater than or equal to count. + * The new dictionary will grow as needed + * to accommodate more key/object pairs + * (unlike + * @link //apple_ref/doc/uid/20001497 CFMutableDictionary@/link, + * for which the initial capacity is a hard limit). + * + * The keys and objects in dict are retained for storage + * in the new OSDictionary, + * not copied. */ - virtual bool initWithDictionary(const OSDictionary *dict, - unsigned int capacity = 0); - /*! - @function free - @abstract A member functions to deallocate and release all resources used by the OSDictionary instance. - @discussion This function should not be called directly, use release() instead. + virtual bool initWithDictionary( + const OSDictionary * dict, + unsigned int capacity = 0); + + + /*! + * @function free + * + * @abstract + * Deallocates or releases any resources + * used by the OSDictionary instance. + * + * @discussion + * This function should not be called directly, + * use + * @link + * //apple_ref/cpp/instm/OSObject/release/virtualvoid/() + * release@/link + * instead. */ virtual void free(); - /*! - @function getCount - @abstract A member function which returns the current number of objects within the collection. - @result Returns the number of objects contained within the dictionary. + + /*! + * @function getCount + * + * @abstract + * Returns the current number of key/object pairs + * contained within the dictionary. + * + * @result + * The current number of key/object pairs + * contained within the dictionary. */ virtual unsigned int getCount() const; - /*! - @function getCapacity - @abstract A member function which returns the storage capacity of the collection. - @result Returns the storage capacity of the dictionary. + + + /*! + * @function getCapacity + * + * @abstract + * Returns the number of objects the dictionary can store without reallocating. + * + * @result + * The number objects the dictionary can store without reallocating. + * + * @discussion + * OSDictionary objects grow when full + * to accommodate additional key/object pairs. + * See + * @link + * //apple_ref/cpp/instm/OSDictionary/getCapacityIncrement/virtualunsignedint/() + * getCapacityIncrement@/link + * and + * @link + * //apple_ref/cpp/instm/OSDictionary/ensureCapacity/virtualunsignedint/(unsignedint) + * ensureCapacity@/link. */ virtual unsigned int getCapacity() const; - /*! - @function getCapacityIncrement - @abstract A member function which returns the growth size for the collection. + + + /*! + * @function getCapacityIncrement + * + * @abstract + * Returns the storage increment of the dictionary. + * + * @result + * The storage increment of the dictionary. + * + * @discussion + * An OSDictionary allocates storage for key/object pairs in multiples + * of the capacity increment. */ virtual unsigned int getCapacityIncrement() const; - /*! - @function setCapacityIncrement - @abstract A member function to set the growth size of the collection. - @param increment The new growth size. - @result Returns the new capacity increment. + + + /*! + * @function setCapacityIncrement + * + * @abstract + * Sets the storage increment of the dictionary. + * + * @result + * The new storage increment of the dictionary, + * which may be different from the number requested. + * + * @discussion + * An OSDictionary allocates storage for key/object pairs in multiples + * of the capacity increment. + * Calling this function does not immediately reallocate storage. */ virtual unsigned int setCapacityIncrement(unsigned increment); - /*! - @function ensureCapacity - @abstract Member function to grow the size of the collection. - @param newCapacity The new capacity for the dictionary to expand to. - @result Returns the new capacity of the dictionary or the previous capacity upon error. + + /*! + * @function ensureCapacity + * + * @abstract + * Ensures the dictionary has enough space + * to store the requested number of key/object pairs. + * + * @param newCapacity The total number of key/object pairs the dictionary + * should be able to store. + * + * @result + * The new capacity of the dictionary, + * which may be different from the number requested + * (if smaller, reallocation of storage failed). + * + * @discussion + * This function immediately resizes the dictionary, if necessary, + * to accommodate at least newCapacity key/object pairs. + * If newCapacity is not greater than the current capacity, + * or if an allocation error occurs, the original capacity is returned. + * + * There is no way to reduce the capacity of an OSDictionary. */ virtual unsigned int ensureCapacity(unsigned int newCapacity); - /*! - @function flushCollection - @abstract A member function which removes and releases all objects within the collection. + + /*! + * @function flushCollection + * + * @abstract + * Removes and releases all keys and objects within the dictionary. + * + * @discussion + * The dictionary's capacity (and therefore direct memory consumption) + * is not reduced by this function. */ virtual void flushCollection(); - /*! - @function setObject - @abstract A member function which places an object into the dictionary and identified by a unique key. - @param aKey A unique OSSymbol identifying the object placed within the collection. - @param anObject The object to be stored in the dictionary. It is automatically retained. - @result Returns true if the addition of an object was successful, false otherwise. + + /*! + * @function setObject + * + * @abstract + * Stores an object in the dictionary under a key. + * + * @param aKey An OSSymbol identifying the object + * placed within the dictionary. + * It is automatically retained. + * @param anObject The object to be stored in the dictionary. + * It is automatically retained. + * + * @result + * true if the addition was successful, + * false otherwise. + * + * @discussion + * An object already stored under aKey is released. */ - virtual bool setObject(const OSSymbol *aKey, const OSMetaClassBase *anObject); - /*! - @function setObject - @abstract A member function which places an object into the dictionary and identified by a unique key. - @param aKey A unique OSString identifying the object placed within the collection. - @param anObject The object to be stored in the dictionary. It is automatically retained. - @result Returns true if the addition of an object was successful, false otherwise. + virtual bool setObject( + const OSSymbol * aKey, + const OSMetaClassBase * anObject); + + + /*! + * @function setObject + * + * @abstract Stores an object in the dictionary under a key. + * + * @param aKey An OSString identifying the object + * placed within the dictionary. + * @param anObject The object to be stored in the dictionary. + * It is automatically retained. + * + * @result + * true if the addition was successful, + * false otherwise. + * + * @discussion + * An OSSymbol for aKey is created internally. + * An object already stored under aKey is released. */ - virtual bool setObject(const OSString *aKey, const OSMetaClassBase *anObject); - /*! - @function setObject - @abstract A member function which places an object into the dictionary and identified by a unique key. - @param aKey A unique string identifying the object placed within the collection. - @param anObject The object to be stored in the dictionary. It is automatically retained. - @result Returns true if the addition of an object was successful, false otherwise. + virtual bool setObject( + const OSString * aKey, + const OSMetaClassBase * anObject); + + + /*! + * @function setObject + * + * @abstract + * Stores an object in the dictionary under a key. + * + * @param aKey A C string identifying the object + * placed within the dictionary. + * @param anObject The object to be stored in the dictionary. + * It is automatically retained. + * + * @result + * true if the addition was successful, + * false otherwise. + * + * @discussion + * An OSSymbol for aKey is created internally. + * An object already stored under aKey is released. */ - virtual bool setObject(const char *aKey, const OSMetaClassBase *anObject); + virtual bool setObject( + const char * aKey, + const OSMetaClassBase * anObject); - /*! - @function removeObject - @abstract A member function which removes an object from the dictionary. The removed object is automatically released. - @param aKey A unique OSSymbol identifying the object to be removed from the dictionary. + + /*! + * @function removeObject + * + * @abstract + * Removes a key/object pair from the dictionary. + * + * @param aKey An OSSymbol identifying the object + * to be removed from the dictionary. + * + * @discussion + * The removed key (not necessarily aKey itself) + * and object are automatically released. */ - virtual void removeObject(const OSSymbol *aKey); - /*! - @function removeObject - @abstract A member function which removes an object from the dictionary. The removed object is automatically released. - @param aKey A unique OSString identifying the object to be removed from the dictionary. + virtual void removeObject(const OSSymbol * aKey); + + + /*! + * @function removeObject + * + * @abstract + * Removes a key/object pair from the dictionary. + * + * @param aKey A OSString identifying the object + * to be removed from the dictionary. + * + * @discussion + * The removed key (not necessarily aKey itself) + * and object are automatically released. */ - virtual void removeObject(const OSString *aKey); - /*! - @function removeObject - @abstract A member function which removes an object from the dictionary. The removed object is automatically released. - @param aKey A unique string identifying the object to be removed from the dictionary. + virtual void removeObject(const OSString * aKey); + + + /*! + * @function removeObject + * + * @abstract + * Removes a key/object pair from the dictionary. + * + * @param aKey A C string identifying the object + * to be removed from the dictionary. + * + * @discussion + * The removed key (internally an OSSymbol) + * and object are automatically released. */ - virtual void removeObject(const char *aKey); - - /*! - @function merge - @abstract A member function which merges the contents of a dictionary into the receiver. - @param aDictionary The dictionary whose contents are to be merged with the receiver. - @result Returns true if the merger is successful, false otherwise. - @discussion If there are keys in 'aDictionary' which match keys in the receiving dictionary, then the objects in the receiver are replaced by those from 'aDictionary', the replaced objects are released. + virtual void removeObject(const char * aKey); + + + /*! + * @function merge + * + * @abstract + * Merges the contents of a dictionary into the receiver. + * + * @param aDictionary The dictionary whose contents + * are to be merged with the receiver. + * @result + * true if the merge succeeds, false otherwise. + * + * @discussion + * If there are keys in aDictionary that match keys + * in the receiving dictionary, + * then the objects in the receiver are replaced + * by those from aDictionary, + * and the replaced objects are released. */ - virtual bool merge(const OSDictionary *aDictionary); - - /*! - @function getObject - @abstract A member function to find an object in the dictionary associated by a given key. - @param aKey The unique OSSymbol key identifying the object to be returned to caller. - @result Returns a reference to the object corresponding to the given key, or 0 if the key does not exist in the dictionary. + virtual bool merge(const OSDictionary * aDictionary); + + + /*! + * @function getObject + * + * @abstract + * Returns the object stored under a given key. + * + * @param aKey An OSSymbol key identifying the object + * to be returned to the caller. + * + * @result + * The object stored under aKey, + * or NULL if the key does not exist in the dictionary. + * + * @discussion + * The returned object will be released if removed from the dictionary; + * if you plan to store the reference, you should call + * @link + * //apple_ref/cpp/instm/OSObject/retain/virtualvoid/() + * retain@/link + * on that object. */ - virtual OSObject *getObject(const OSSymbol *aKey) const; - /*! - @function getObject - @abstract A member function to find an object in the dictionary associated by a given key. - @param aKey The unique OSString key identifying the object to be returned to caller. - @result Returns a reference to the object corresponding to the given key, or 0 if the key does not exist in the dictionary. + virtual OSObject * getObject(const OSSymbol * aKey) const; + + + /*! + * @function getObject + * + * @abstract Returns the object stored under a given key. + * + * @param aKey An OSString key identifying the object + * to be returned to caller. + * + * @result + * The object stored under aKey, + * or NULL if the key does not exist in the dictionary. + * + * @discussion + * The returned object will be released if removed from the dictionary; + * if you plan to store the reference, you should call + * @link + * //apple_ref/cpp/instm/OSObject/retain/virtualvoid/() + * retain@/link + * on that object. */ - virtual OSObject *getObject(const OSString *aKey) const; - /*! - @function getObject - @abstract A member function to find an object in the dictionary associated by a given key. - @param aKey The unique string identifying the object to be returned to caller. - @result Returns a reference to the object corresponding to the given key, or 0 if the key does not exist in the dictionary. + virtual OSObject * getObject(const OSString * aKey) const; + + + /*! + * @function getObject + * + * @abstract + * Returns the object stored under a given key. + * + * @param aKey A C string key identifying the object + * to be returned to caller. + * + * @result + * The object stored under aKey, + * or NULL if the key does not exist in the dictionary. + * + * @discussion + * The returned object will be released if removed from the dictionary; + * if you plan to store the reference, you should call + * @link + * //apple_ref/cpp/instm/OSObject/retain/virtualvoid/() + * retain@/link + * on that object. */ - virtual OSObject *getObject(const char *aKey) const; - - /*! - @function isEqualTo - @abstract A member function to test the equality of the intersections of two dictionaries. - @param aDictionary The dictionary to be compared against the receiver. - @param keys An OSArray or OSDictionary containing the keys describing the intersection for the comparison. - @result Returns true if the intersections of the two dictionaries are equal. + virtual OSObject * getObject(const char * aKey) const; + + + /*! + * @function isEqualTo + * + * @abstract Tests the equality of two OSDictionary objects + * over a subset of keys. + * + * @param aDictionary The dictionary to be compared against the receiver. + * @param keys An OSArray or OSDictionary containing the keys + * (as @link //apple_ref/cpp/cl/OSString OSStrings@/link or + * @link //apple_ref/cpp/cl/OSSymbol OSSymbols@/link) + * describing the intersection for the comparison. + * + * @result + * true if the intersections + * of the two dictionaries are equal. + * + * @discussion + * Two OSDictionary objects are considered equal by this function + * if both have objects stored for all keys provided, + * and if the objects stored in each under + * a given key compare as equal using + * @link + * //apple_ref/cpp/instm/OSMetaClassBase/isEqualTo/virtualbool/(constOSMetaClassBase*) + * isEqualTo@/link. + */ + virtual bool isEqualTo( + const OSDictionary * aDictionary, + const OSCollection * keys) const; + + + /*! + * @function isEqualTo + * + * @abstract Tests the equality of two OSDictionary objects. + * + * @param aDictionary The dictionary to be compared against the receiver. + * + * @result + * true if the dictionaries are equal, + * false if not. + * + * @discussion + * Two OSDictionary objects are considered equal if they have same count, + * the same keys, and if the objects stored in each under + * a given key compare as equal using + * @link + * //apple_ref/cpp/instm/OSMetaClassBase/isEqualTo/virtualbool/(constOSMetaClassBase*) + * isEqualTo@/link. */ - virtual bool isEqualTo(const OSDictionary *aDictionary, const OSCollection *keys) const; - /*! - @function isEqualTo - @abstract A member function to test the equality of two dictionaries. - @param aDictionary The dictionary to be compared against the receiver. - @result Returns true if the dictionaries are equal. + virtual bool isEqualTo(const OSDictionary * aDictionary) const; + + + /*! + * @function isEqualTo + * + * @abstract + * Tests the equality of an OSDictionary to an arbitrary object. + * + * @param anObject An object to be compared against the receiver. + * + * @result + * true if the objects are equal. + * + * @discussion + * An OSDictionary is considered equal to another object + * if that object is derived from OSDictionary + * and contains the same or equivalent objects. */ - virtual bool isEqualTo(const OSDictionary *aDictionary) const; - /*! - @function isEqualTo - @abstract A member function to test the equality between the receiver and an unknown object. - @param anObject An object to be compared against the receiver. - @result Returns true if the objects are equal. + virtual bool isEqualTo(const OSMetaClassBase * anObject) const; + + + /*! + * @function serialize + * + * @abstract + * Archives the receiver into the provided + * @link //apple_ref/doc/class/OSSerialize OSSerialize@/link object. + * + * @param serializer The OSSerialize object. + * + * @result + * true if serialization succeeds, false if not. */ - virtual bool isEqualTo(const OSMetaClassBase *anObject) const; - - /*! - @function serialize - @abstract A member function which archives the receiver. - @param s The OSSerialize object. - @result Returns true if serialization was successful, false if not. + virtual bool serialize(OSSerialize * serializer) const; + + + /*! + * @function setOptions + * + * @abstract + * Recursively sets option bits in the dictionary + * and all child collections. + * + * @param options A bitfield whose values turn the options on (1) or off (0). + * @param mask A mask indicating which bits + * in options to change. + * Pass 0 to get the whole current options bitfield + * without changing any settings. + * @param context Unused. + * + * @result + * The options bitfield as it was before the set operation. + * + * @discussion + * Kernel extensions should not call this function. + * + * Child collections' options are changed only if the receiving dictionary's + * options actually change. */ - virtual bool serialize(OSSerialize *s) const; - - /*! - @function setOptions - @abstract This function is used to recursively set option bits in this dictionary and all child collections. - @param options Set the (options & mask) bits. - @param mask The mask of bits which need to be set, 0 to get the current value. - @result The options before the set operation, NB setOptions(?,0) returns the current value of this collection. - */ - virtual unsigned setOptions(unsigned options, unsigned mask, void * = 0); - - /*! - @function copyCollection - @abstract Do a deep copy of this dictionary and its collections. - @discussion This function copies this dictionary and all included collections recursively. Objects that don't derive from OSContainter are NOT copied, that is objects like OSString and OSData. - @param cycleDict Is a dictionary of all of the collections that have been, to start the copy at the top level just leave this field 0. - @result The newly copied collecton or 0 if insufficient memory + virtual unsigned setOptions( + unsigned options, + unsigned mask, + void * context = 0); + + + /*! + * @function copyCollection + * + * @abstract + * Creates a deep copy of the dictionary + * and its child collections. + * + * @param cycleDict A dictionary of all of the collections + * that have been copied so far, + * which is used to track circular references. + * To start the copy at the top level, + * pass NULL. + * + * @result + * The newly copied dictionary, with a retain count of 1, + * or NULL if there is insufficient memory to do the copy. + * + * @discussion + * The receiving dictionary, and any collections it contains, recursively, + * are copied. + * Objects that are not derived from OSCollection are retained + * rather than copied. */ - OSCollection *copyCollection(OSDictionary *cycleDict = 0); + OSCollection * copyCollection(OSDictionary * cycleDict = 0); OSMetaClassDeclareReservedUnused(OSDictionary, 0); diff --git a/libkern/libkern/c++/OSEndianTypes.h b/libkern/libkern/c++/OSEndianTypes.h index 8a527d460..9bde256fa 100644 --- a/libkern/libkern/c++/OSEndianTypes.h +++ b/libkern/libkern/c++/OSEndianTypes.h @@ -29,69 +29,98 @@ * gvdl 20050620 Created */ +// xx-review: are these even used anywhere? Grep turns up squat. + + /*! -@header OSEndianTypes -@abstract C++ inline types for byte swapping -@discussion -The OSEndianTypes consist of a number of types that are used very similarly to the traditional MacOS C scalar integers types, eg. UInt32 and SInt32. -@copyright 2005 Apple Computer, Inc. All rights reserved. -@updated 2005-07-25 -*/ + * @header OSEndianTypes + * + * @abstract + * C++ inline types for byte-swapping. + * + * @discussion + * The OSEndianTypes consist of a number of types that are used + * very similarly to the traditional MacOS C scalar integers types, + * for example, UInt32 and SInt32. + * @copyright 2005 Apple Computer, Inc. All rights reserved. + * @updated 2005-07-25 + */ // Header doc magic trick for simple documentation #if 0 -/*! @typedef BigUInt16 -@abstract A Big-endian unsigned integer scalar size 16 - UInt16 */ +/*! + * @typedef BigUInt16 + * @abstract A Big-endian unsigned integer scalar size 16 - UInt16 + */ typedef class BigUInt16 BigUInt16; -#endif -#if 0 -/*! @typedef BigSInt16 -@abstract A Big-endian signed integer scalar size 16 - SInt16 */ +/*! + * @typedef BigSInt16 + * @abstract A Big-endian signed integer scalar size 16 - SInt16 + */ typedef class BigSInt16 BigSInt16; -/*! @typedef BigUInt32 -@abstract A Big-endian unsigned integer scalar size 32 - UInt32 */ +/*! + * @typedef BigUInt32 + * @abstract A Big-endian unsigned integer scalar size 32 - UInt32 + */ typedef class BigUInt32 BigUInt32; -/*! @typedef BigSInt32 -@abstract A Big-endian signed integer scalar size 32 - SInt32 */ +/*! + * @typedef BigSInt32 + * @abstract A Big-endian signed integer scalar size 32 - SInt32 + */ typedef class BigSInt32 BigSInt32; -/*! @typedef BigUInt64 -@abstract A Big-endian unsigned integer scalar size 64 - UInt64 */ +/*! + * @typedef BigUInt64 + * @abstract A Big-endian unsigned integer scalar size 64 - UInt64 + */ typedef class BigUInt64 BigUInt64; -/*! @typedef BigSInt64 -@abstract A Big-endian signed integer scalar size 64 - SInt64 */ +/*! + * @typedef BigSInt64 + * @abstract A Big-endian signed integer scalar size 64 - SInt64 + */ typedef class BigSInt64 BigSInt64; -/*! @typedef LittleUInt16 -@abstract A Little-endian unsigned integer scalar size 16 - UInt16 */ +/*! + * @typedef LittleUInt16 + * @abstract A Little-endian unsigned integer scalar size 16 - UInt16 + */ typedef class LittleUInt16 LittleUInt16; -/*! @typedef LittleSInt16 -@abstract A Little-endian signed integer scalar size 16 - SInt16 */ +/*! + * @typedef LittleSInt16 + * @abstract A Little-endian signed integer scalar size 16 - SInt16 + */ typedef class LittleSInt16 LittleSInt16; -/*! @typedef LittleUInt32 -@abstract A Little-endian unsigned integer scalar size 32 - UInt32 */ +/*! + * @typedef LittleUInt32 + * @abstract A Little-endian unsigned integer scalar size 32 - UInt32 + */ typedef class LittleUInt32 LittleUInt32; -/*! @typedef LittleSInt32 -@abstract A Little-endian signed integer scalar size 32 - SInt32 */ +/*! + * @typedef LittleSInt32 + * @abstract A Little-endian signed integer scalar size 32 - SInt32 + */ typedef class LittleSInt32 LittleSInt32; -/*! @typedef LittleUInt64 -@abstract A Little-endian unsigned integer scalar size 64 - UInt64 */ +/*! + * @typedef LittleUInt64 + * @abstract A Little-endian unsigned integer scalar size 64 - UInt64 + */ typedef class LittleUInt64 LittleUInt64; -/*! @typedef LittleSInt64 -@abstract A Little-endian signed integer scalar size 64 - SInt64 */ +/*! + * @typedef LittleSInt64 + * @abstract A Little-endian signed integer scalar size 64 - SInt64 + */ typedef class LittleSInt64 LittleSInt64; -*/ -#endif +#endif /* 0 - headerdoc trick */ #ifndef _OS_OSENDIANHELPER_H #define _OS_OSENDIANHELPER_H @@ -104,33 +133,33 @@ typedef class LittleSInt64 LittleSInt64; // Probably should really be using templates, this is one of the few cases // where they do make sense. But as the kernel is not allowed to export // template based C++ APIs we have to use sophisticated macros instead -#define __OSEndianSignIntSizeDEF(argname, argend, argtype, argsize) { \ -public: \ - typedef argtype ## argsize Value; \ - \ -private: \ - typedef UInt ## argsize UValue; \ - UValue mValue; \ - \ - void writeValue(Value v) { \ - if (__builtin_constant_p(v)) \ - mValue = OSSwapHostTo ## argend ## ConstInt ## argsize(v); \ - else \ - OSWrite ## argend ## Int ## argsize(&mValue, 0, (UValue) v);\ - }; \ - \ - Value readValue() const { \ - return (Value) OSRead ## argend ## Int ## argsize(&mValue, 0); \ - }; \ - \ -public: \ - argname() { }; \ - \ - argname (Value v) { writeValue(v); }; \ - argname &operator = (Value v) { writeValue(v); return *this; } \ - \ - Value get() const { return readValue(); }; \ - operator Value () const { return readValue(); }; \ +#define __OSEndianSignIntSizeDEF(argname, argend, argtype, argsize) { \ +public: \ + typedef argtype ## argsize Value; \ + \ +private: \ + typedef UInt ## argsize UValue; \ + UValue mValue; \ + \ + void writeValue(Value v) { \ + if (__builtin_constant_p(v)) \ + mValue = OSSwapHostTo ## argend ## ConstInt ## argsize(v); \ + else \ + OSWrite ## argend ## Int ## argsize(&mValue, 0, (UValue) v); \ + }; \ + \ + Value readValue() const { \ + return (Value) OSRead ## argend ## Int ## argsize(&mValue, 0); \ + }; \ + \ +public: \ + argname() { }; \ + \ + argname (Value v) { writeValue(v); }; \ + argname &operator = (Value v) { writeValue(v); return *this; } \ + \ + Value get() const { return readValue(); }; \ + operator Value () const { return readValue(); }; \ } class BigUInt16 __OSEndianSignIntSizeDEF(BigUInt16, Big, UInt, 16); @@ -148,8 +177,11 @@ class LittleSInt64 __OSEndianSignIntSizeDEF(LittleSInt64, Little, SInt, 64); #undef __OSEndianSignIntSizeDEF -#endif /* __cplusplus */ +#endif /* __cplusplus + */ + +#endif /* ! _OS_OSENDIANHELPER_H + */ -#endif /* ! _OS_OSENDIANHELPER_H */ diff --git a/libkern/libkern/c++/OSIterator.h b/libkern/libkern/c++/OSIterator.h index ad167c9d1..6cbb18905 100644 --- a/libkern/libkern/c++/OSIterator.h +++ b/libkern/libkern/c++/OSIterator.h @@ -38,31 +38,92 @@ #include /*! - @class OSIterator - @abstract Abstract super class for iterator classes. - @discussion - OSIterator is an abstract super class providing a consistent set of API's for subclasses. -*/ + * @header + * + * @abstract + * This header declares the OSIterator collection class. + */ + + +/*! + * @class OSIterator + * @abstract + * The abstract superclass for Libkern iterators. + * + * @discussion + * OSIterator is the abstract superclass for all Libkern C++ object iterators. + * It defines the basic interface for iterating and resetting. + * See @link //apple_ref/cpp/macro/OSCollection OSCollection@/link and + * @link //apple_ref/cpp/macro/OSCollectionIterator OSCollectionIterator@/link + * for more information. + * + * With very few exceptions in the I/O Kit, all Libkern-based C++ + * classes, functions, and macros are unsafe + * to use in a primary interrupt context. + * Consult the I/O Kit documentation related to primary interrupts + * for more information. + * + * OSIterator provides no concurrency protection. + */ class OSIterator : public OSObject { OSDeclareAbstractStructors(OSIterator) public: - /*! - @function reset - @abstract A pure virtual member function to be over-ridden by the subclass which reset the iterator to the beginning of the collection. + /*! + * @function reset + * + * @abstract + * Resets the iterator to the beginning of the collection, + * as if it had just been created. + * + * @discussion + * Subclasses must implement this pure virtual member function. */ virtual void reset() = 0; - /*! - @function isValid - @abstract A pure virtual member function to be over-ridden by the subclass which indicates a modification was made to the collection. + + /*! + * @function isValid + * + * @abstract + * Check that the collection hasn't been modified during iteration. + * + * @result + * true if the iterator is valid for continued use, + * false otherwise + * (typically because the collection being iterated has been modified). + * + * @discussion + * Subclasses must implement this pure virtual member function. */ virtual bool isValid() = 0; - /*! - @function getNextObject - @abstract A pure virtual function to be over-ridden by the subclass which returns a reference to the current object in the collection and advances the interator to the next object. + + /*! + * @function getNextObject + * + * @abstract + * Advances to and returns the next object in the iteration. + * + * @return + * The next object in the iteration context, + * NULL if there is no next object + * or if the iterator is no longer valid. + * + * @discussion + * The returned object will be released if removed from the collection; + * if you plan to store the reference, you should call + * @link + * //apple_ref/cpp/instm/OSObject/retain/virtualvoid/() + * retain@/link + * on that object. + * + * Subclasses must implement this pure virtual function + * to check for validity with + * @link isValid isValid@/link, + * and then to advance the iteration context to the next object (if any) + * and return that next object, or NULL if there is none. */ virtual OSObject *getNextObject() = 0; diff --git a/libkern/libkern/c++/OSKext.h b/libkern/libkern/c++/OSKext.h new file mode 100644 index 000000000..14337f3ab --- /dev/null +++ b/libkern/libkern/c++/OSKext.h @@ -0,0 +1,564 @@ +/* + * Copyright (c) 2008-2009 Apple Inc. All rights reserved. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. The rights granted to you under the License + * may not be used to create, or enable the creation or redistribution of, + * unlawful or unlicensed copies of an Apple operating system, or to + * circumvent, violate, or enable the circumvention or violation of, any + * terms of an Apple operating system software license agreement. + * + * Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ + */ + +#ifndef _LIBKERN_OSKEXT_H +#define _LIBKERN_OSKEXT_H + +extern "C" { +#include +#include +#include +#include + +#ifdef XNU_KERNEL_PRIVATE +#include +#endif /* XNU_KERNEL_PRIVATE */ +} + +#include +#include +#include +#include +#include + +/********************************************************************* +* C functions used for callbacks. +*********************************************************************/ +#ifdef XNU_KERNEL_PRIVATE +extern "C" { +void osdata_kmem_free(void * ptr, unsigned int length); +void osdata_phys_free(void * ptr, unsigned int length); +void osdata_vm_deallocate(void * ptr, unsigned int length); +}; +#endif /* XNU_KERNEL_PRIVATE */ + +/********************************************************************* +* C Function Prototypes for Friend Declarations. +*********************************************************************/ +class OSKext; + +extern "C" { + +void OSKextLog( + OSKext * aKext, + OSKextLogSpec msgLogSpec, + const char * format, ...) + __attribute__((format(printf, 3, 4))); + +void OSKextVLog( + OSKext * aKext, + OSKextLogSpec msgLogSpec, + const char * format, + va_list srcArgList); + +#ifdef XNU_KERNEL_PRIVATE +void OSKextRemoveKextBootstrap(void); +void IOSystemShutdownNotification(void); + +kern_return_t OSRuntimeInitializeCPP( + kmod_info_t * kmodInfo, + void * data); +kern_return_t OSRuntimeFinalizeCPP( + kmod_info_t * kmodInfo, + void * data); + +kern_return_t is_io_catalog_send_data( + mach_port_t masterPort, + uint32_t flag, + io_buf_ptr_t inData, + mach_msg_type_number_t inDataCount, + kern_return_t * result); + +void kmod_dump_log(vm_offset_t*, unsigned int); + +#if __ppc__ || __i386__ +kern_return_t kext_get_kmod_info( + kmod_info_array_t * kmod_list, + mach_msg_type_number_t * kmodCount); +#endif /* __ppc__ || __i386__ */ + +#endif /* XNU_KERNEL_PRIVATE */ +}; + +/********************************************************************/ +#if PRAGMA_MARK +#pragma mark - +#endif +/* + * @class OSKext + */ +/********************************************************************/ +class OSKext : public OSObject +{ + OSDeclareDefaultStructors(OSKext) + +#if PRAGMA_MARK +/**************************************/ +#pragma mark Friend Declarations +/**************************************/ +#endif + friend class IOCatalogue; + friend class IOPMrootDomain; + friend class KLDBootstrap; + friend class OSMetaClass; + +#ifdef XNU_KERNEL_PRIVATE + friend void OSKextVLog( + OSKext * aKext, + OSKextLogSpec msgLogSpec, + const char * format, + va_list srcArgList); + + friend void OSKextRemoveKextBootstrap(void); + friend void IOSystemShutdownNotification(void); + friend OSReturn OSKextUnloadKextWithLoadTag(uint32_t); + + friend kern_return_t kext_request( + host_priv_t hostPriv, + /* in only */ uint32_t clientLogSpec, + /* in only */ vm_offset_t requestIn, + /* in only */ mach_msg_type_number_t requestLengthIn, + /* out only */ vm_offset_t * responseOut, + /* out only */ mach_msg_type_number_t * responseLengthOut, + /* out only */ vm_offset_t * logDataOut, + /* out only */ mach_msg_type_number_t * logDataLengthOut, + /* out only */ kern_return_t * op_result); + + friend kxld_addr_t kern_allocate( + u_long size, + KXLDAllocateFlags * flags, + void * user_data); + + friend void kxld_log_shim( + KXLDLogSubsystem subsystem, + KXLDLogLevel level, + const char * format, + va_list argList, + void * user_data); + + friend void _OSKextConsiderUnloads( + __unused thread_call_param_t p0, + __unused thread_call_param_t p1); + + friend kern_return_t OSRuntimeInitializeCPP( + kmod_info_t * kmodInfo, + void * data); + friend kern_return_t OSRuntimeFinalizeCPP( + kmod_info_t * kmodInfo, + void * data); + + friend kern_return_t is_io_catalog_send_data( + mach_port_t masterPort, + uint32_t flag, + io_buf_ptr_t inData, + mach_msg_type_number_t inDataCount, + kern_return_t * result); + + friend void kmod_panic_dump(vm_offset_t*, unsigned int); + friend void kmod_dump_log(vm_offset_t*, unsigned int); + friend void kext_dump_panic_lists(int (*printf_func)(const char * fmt, ...)); + +#if __ppc__ || __i386__ + friend kern_return_t kext_get_kmod_info( + kmod_info_array_t * kmod_list, + mach_msg_type_number_t * kmodCount); +#endif /* __ppc__ || __i386__ */ + +#endif /* XNU_KERNEL_PRIVATE */ + +private: + + /************************* + * Instance variables + *************************/ + OSDictionary * infoDict; + + const OSSymbol * bundleID; + OSString * path; // not necessarily correct :-/ + + OSKextVersion version; // parsed + OSKextVersion compatibleVersion; // parsed + + /* These fields are required for tracking loaded kexts and + * will always have values for a loaded kext. + */ + OSKextLoadTag loadTag; // 'id' from old kmod_info; + // kOSKextInvalidLoadTag invalid + kmod_info_t * kmod_info; // address into linkedExec./alloced for interface + + OSArray * dependencies; // kernel resource does not have any; + // links directly to kernel + OSData * linkState; // only kept for libraries + + /* Only real kexts have these; interface kexts do not. + */ + OSData * linkedExecutable; + OSSet * metaClasses; // for C++/OSMetaClass kexts + + /* Only interface kexts have these; interface kexts can get at them + * in the linked Executable. + */ + OSData * interfaceUUID; + + struct { + unsigned int loggingEnabled:1; + + unsigned int hasAllDependencies:1; + + unsigned int interface:1; + unsigned int kernelComponent:1; + unsigned int prelinked:1; + unsigned int loaded:1; + unsigned int starting:1; + unsigned int started:1; + unsigned int stopping:1; + unsigned int unloading:1; + + unsigned int autounloadEnabled:1; + unsigned int delayAutounload:1; // for development + + unsigned int CPPInitialized:1; + } flags; + +#if PRAGMA_MARK +/**************************************/ +#pragma mark Private Functions +/**************************************/ +#endif +private: + + /* Startup/shutdown phases. + */ + static void initialize(void); + static OSDictionary * copyKexts(void); + static OSReturn removeKextBootstrap(void); + static void willShutdown(void); // called by IOPMrootDomain on shutdown + + /* Called by power management at sleep/shutdown. + */ + static bool setLoadEnabled(bool flag); + static bool setUnloadEnabled(bool flag); + static bool setAutounloadsEnabled(bool flag); + static bool setKernelRequestsEnabled(bool flag); + + // all getters subject to race condition, caller beware + static bool getLoadEnabled(void); + static bool getUnloadEnabled(void); + static bool getAutounloadEnabled(void); + static bool getKernelRequestsEnabled(void); + + /* Instance life cycle. + */ + static OSKext * withBooterData( + OSString * deviceTreeName, + OSData * booterData); + virtual bool initWithBooterData( + OSString * deviceTreeName, + OSData * booterData); + + static OSKext * withPrelinkedInfoDict( + OSDictionary * infoDict); + virtual bool initWithPrelinkedInfoDict( + OSDictionary * infoDict); + + static OSKext * withMkext2Info( + OSDictionary * anInfoDict, + OSData * mkextData); + virtual bool initWithMkext2Info( + OSDictionary * anInfoDict, + OSData * mkextData); + + virtual bool setInfoDictionaryAndPath( + OSDictionary * aDictionary, + OSString * aPath); + virtual bool setExecutable( + OSData * anExecutable, + OSData * externalData = NULL, + bool externalDataIsMkext = false); + virtual bool registerIdentifier(void); + + virtual void free(void); + + static OSReturn removeKext( + OSKext * aKext, + bool terminateServicesAndRemovePersonalitiesFlag = false); + + /* Mkexts. + */ + static OSReturn readMkextArchive( + OSData * mkextData, + uint32_t * checksumPtr = NULL); + static OSReturn readMkext2Archive( + OSData * mkextData, + OSDictionary ** mkextPlistOut, + uint32_t * checksumPtr = NULL); + virtual OSData * createMkext2FileEntry( + OSData * mkextData, + OSNumber * offsetNum, + const char * entryName); + virtual OSData * extractMkext2FileData( + UInt8 * data, + const char * name, + uint32_t compressedSize, + uint32_t fullSize); + + static OSReturn readMkext1Archive( + OSData * mkextData, + uint32_t * checksumPtr); + bool initWithMkext1Info( + OSDictionary * anInfoDict, + OSData * executableWrapper, + OSData * mkextData); + static OSData * extractMkext1Entry( + const void * mkextFileBase, + const void * entry); + + + /* Dependencies. + */ + virtual bool resolveDependencies( + OSArray * loopStack = NULL); // priv/prot + virtual bool addBleedthroughDependencies(OSArray * anArray); + virtual bool flushDependencies(bool forceFlag = false); // priv/prot + virtual uint32_t getNumDependencies(void); + virtual OSArray * getDependencies(void); + + /* User-space requests (load/generic). + */ + static OSReturn loadFromMkext( + OSKextLogSpec clientLogSpec, + char * mkextBuffer, + uint32_t mkextBufferLength, + char ** logInfoOut, + uint32_t * logInfoLengthOut); + static OSReturn handleRequest( + host_priv_t hostPriv, + OSKextLogSpec clientLogSpec, + char * requestBuffer, + uint32_t requestLength, + char ** responseOut, + uint32_t * responseLengthOut, + char ** logInfoOut, + uint32_t * logInfoLengthOut); + static OSReturn serializeLogInfo( + OSArray * logInfoArray, + char ** logInfoOut, + uint32_t * logInfoLengthOut); + + /* Loading. + */ + virtual OSReturn load( + OSKextExcludeLevel startOpt = kOSKextExcludeNone, + OSKextExcludeLevel startMatchingOpt = kOSKextExcludeAll, + OSArray * personalityNames = NULL); // priv/prot + virtual OSReturn unload(void); + + static void recordIdentifierRequest( + OSString * kextIdentifier); + + virtual OSReturn loadExecutable(void); + static void considerDestroyingLinkContext(void); + static OSData * getKernelLinkState(void); + virtual OSData * getExecutable(void); + virtual void setLinkedExecutable(OSData * anExecutable); + + virtual OSReturn start(bool startDependenciesFlag = true); + virtual OSReturn stop(void); + virtual OSReturn setVMProtections(void); + virtual OSReturn validateKextMapping(bool startFlag); + + static OSArray * copyAllKextPersonalities( + bool filterSafeBootFlag = false); + + static void setPrelinkedPersonalities(OSArray * personalitiesArray); + + static void sendAllKextPersonalitiesToCatalog( + bool startMatching = false); + virtual void sendPersonalitiesToCatalog( + bool startMatching = false, + OSArray * personalityNames = NULL); + + static bool canUnloadKextWithIdentifier( + OSString * kextIdentifier, + bool checkClassesFlag = true); + + static OSReturn autounloadKext(OSKext * aKext); + + /* Getting info about loaded kexts (kextstat). + */ + static OSArray * copyLoadedKextInfo(OSArray * kextIdentifiers); + virtual OSDictionary * copyInfo(void); + + /* Logging to user space. + */ + static OSKextLogSpec setUserSpaceLogFilter( + OSKextLogSpec userLogSpec, + bool captureFlag = false); + static OSArray * clearUserSpaceLogFilter(void); + static OSKextLogSpec getUserSpaceLogFilter(void); + + /* OSMetaClasses defined by kext. + */ + virtual OSReturn addClass( + OSMetaClass * aClass, + uint32_t numClasses); + virtual OSReturn removeClass( + OSMetaClass * aClass); + virtual bool hasOSMetaClassInstances(void); + virtual OSSet * getMetaClasses(void); + static void reportOSMetaClassInstances( + const char * kextIdentifier, + OSKextLogSpec msgLogSpec); + virtual void reportOSMetaClassInstances( + OSKextLogSpec msgLogSpec); + + static OSReturn dispatchResource(OSDictionary * requestDict); + + static OSReturn dequeueCallbackForRequestTag( + OSKextRequestTag requestTag, + OSDictionary ** callbackRecordOut); + static OSReturn dequeueCallbackForRequestTag( + OSNumber * requestTagNum, + OSDictionary ** callbackRecordOut); + static void invokeRequestCallback( + OSDictionary * callbackRecord, + OSReturn requestResult); + virtual void invokeOrCancelRequestCallbacks( + OSReturn callbackResult, + bool invokeFlag = true); + virtual uint32_t countRequestCallbacks(void); + + /* panic() support. + */ + static void printKextsInBacktrace( + vm_offset_t * addr, + unsigned int cnt, + int (* printf_func)(const char *fmt, ...), + bool lockFlag); + static uint32_t saveLoadedKextPanicListTyped( + const char * prefix, + int invertFlag, + int libsFlag, + char * paniclist, + uint32_t list_size, + uint32_t * list_length_ptr); + static void saveLoadedKextPanicList(void); + static void saveUnloadedKextPanicList(OSKext * aKext); + static void printKextPanicLists(int (*printf_func)(const char *fmt, ...)); + + /* C++ Initialization. + */ + + virtual void setCPPInitialized(bool initialized=true); + +#if __ppc__ || __i386__ + /* Backward compatibility for kmod_get_info() MIG call. + */ + static kern_return_t getKmodInfo( + kmod_info_array_t * kmodList, + mach_msg_type_number_t * kmodCount); +#endif /* __ppc__ || __i386__ */ + + +#if PRAGMA_MARK +/**************************************/ +#pragma mark Public Functions +/**************************************/ +#endif +public: + // caller must release + static OSKext * lookupKextWithIdentifier(const char * kextIdentifier); + static OSKext * lookupKextWithIdentifier(OSString * kextIdentifier); + static OSKext * lookupKextWithLoadTag(OSKextLoadTag aTag); + static OSKext * lookupKextWithAddress(vm_address_t address); + + static bool isKextWithIdentifierLoaded(const char * kextIdentifier); + + static OSReturn loadKextWithIdentifier( + const char * kextIdentifier, + Boolean allowDeferFlag = true, + Boolean delayAutounloadFlag = false, + OSKextExcludeLevel startOpt = kOSKextExcludeNone, + OSKextExcludeLevel startMatchingOpt = kOSKextExcludeAll, + OSArray * personalityNames = NULL); + static OSReturn loadKextWithIdentifier( + OSString * kextIdentifier, + Boolean allowDeferFlag = true, + Boolean delayAutounloadFlag = false, + OSKextExcludeLevel startOpt = kOSKextExcludeNone, + OSKextExcludeLevel startMatchingOpt = kOSKextExcludeAll, + OSArray * personalityNames = NULL); + static OSReturn removeKextWithIdentifier( + const char * kextIdentifier, + bool terminateServicesAndRemovePersonalitiesFlag = false); + static OSReturn removeKextWithLoadTag( + OSKextLoadTag loadTag, + bool terminateServicesAndRemovePersonalitiesFlag = false); + + static OSReturn requestResource( + const char * kextIdentifier, + const char * resourceName, + OSKextRequestResourceCallback callback, + void * context, + OSKextRequestTag * requestTagOut); + static OSReturn cancelRequest( + OSKextRequestTag requestTag, + void ** contextOut); + + static void considerUnloads(Boolean rescheduleOnlyFlag = false); + static void flushNonloadedKexts(Boolean flushPrelinkedKexts); + static void setKextdActive(Boolean active = true); + static void setDeferredLoadSucceeded(Boolean succeeded = true); + static void considerRebuildOfPrelinkedKernel(void); + + virtual bool setAutounloadEnabled(bool flag); + + virtual const OSSymbol * getIdentifier(void); + virtual const char * getIdentifierCString(void); + virtual OSKextVersion getVersion(void); + virtual OSKextVersion getCompatibleVersion(void); + virtual bool isCompatibleWithVersion(OSKextVersion aVersion); + virtual OSObject * getPropertyForHostArch(const char * key); + + virtual OSKextLoadTag getLoadTag(void); + virtual OSData * copyUUID(void); + virtual OSArray * copyPersonalitiesArray(void); + virtual void removePersonalitiesFromCatalog(void); + + virtual bool declaresExecutable(void); // might be missing + virtual bool isInterface(void); + virtual bool isKernelComponent(void); + virtual bool isLoadableInSafeBoot(void); + virtual bool isPrelinked(void); + virtual bool isLoaded(void); + virtual bool isStarted(void); + virtual bool isCPPInitialized(void); +}; + + +#endif /* !_LIBKERN_OSKEXT_H */ diff --git a/libkern/libkern/c++/OSLib.h b/libkern/libkern/c++/OSLib.h index ad68370ba..90034109b 100644 --- a/libkern/libkern/c++/OSLib.h +++ b/libkern/libkern/c++/OSLib.h @@ -31,7 +31,7 @@ #include #ifdef KERNEL -#define MACH_ASSERT 1 +#define MACH_ASSERT 1 #endif __BEGIN_DECLS @@ -40,7 +40,7 @@ __BEGIN_DECLS #include #include -#ifdef KERNEL_PRIVATE +#ifdef KERNEL_PRIVATE #include #endif diff --git a/libkern/libkern/c++/OSMetaClass.h b/libkern/libkern/c++/OSMetaClass.h index ef527fdd7..85f9553e0 100644 --- a/libkern/libkern/c++/OSMetaClass.h +++ b/libkern/libkern/c++/OSMetaClass.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -40,74 +40,258 @@ class OSSymbol; class OSDictionary; class OSSerialize; + +/*! + * @header + * + * @abstract + * This header declares the OSMetaClassBase and OSMetaClass classes, + * which together form the basis of the Libkern and I/O Kit C++ class hierarchy + * and run-time type information facility. + */ + + #if !defined(__ppc__) || __GNUC__ < 3 +/*! @parseOnly */ #define APPLE_KEXT_COMPATIBILITY #else #define APPLE_KEXT_COMPATIBILITY __attribute__ ((apple_kext_compatibility)) #endif +/*! @parseOnly */ #define APPLE_KEXT_VTABLE_PADDING 1 #if defined(__LP64__) -#define APPLE_KEXT_LEGACY_ABI 0 +/*! @parseOnly */ +#define APPLE_KEXT_LEGACY_ABI 0 #elif defined(__arm__) && (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 2)) -#define APPLE_KEXT_LEGACY_ABI 0 +#define APPLE_KEXT_LEGACY_ABI 0 #else -#define APPLE_KEXT_LEGACY_ABI 1 +#define APPLE_KEXT_LEGACY_ABI 1 #endif #if APPLE_KEXT_VTABLE_PADDING -#define APPLE_KEXT_PAD_METHOD virtual +/*! @parseOnly */ +#define APPLE_KEXT_PAD_METHOD virtual +/*! @parseOnly */ #define APPLE_KEXT_PAD_IMPL(index) gMetaClass.reservedCalled(index) #else -#define APPLE_KEXT_PAD_METHOD static +#define APPLE_KEXT_PAD_METHOD static #define APPLE_KEXT_PAD_IMPL(index) #endif +#if defined(__LP64__) +/*! @parseOnly */ +#define APPLE_KEXT_COMPATIBILITY_VIRTUAL +#else +// private method made virtual only for binary compatibility +#define APPLE_KEXT_COMPATIBILITY_VIRTUAL virtual +#endif + +/*! @parseOnly */ +#define APPLE_KEXT_DEPRECATED __attribute__((deprecated)) + +/*! + * @class OSMetaClassBase + * + * @abstract + * OSMetaClassBase is the abstract bootstrap class + * for the Libkern and I/O Kit run-time type information system. + * + * @discussion + * OSMetaClassBase is the abstract C++ root class + * underlying the entire Libkern and I/O Kit class hierarchy. + * It defines the run-time type information system, + * including dynamic class allocation and safe type-casting, + * as well as the abstract interface for reference counting + * and a few other utility functions. + * OSMetaClassBase is the immediate superclass of + * @link //apple_ref/doc/class/OSObject OSObject@/link and + * @link //apple_ref/doc/class/OSMetaClass OSMetaClass@/link; + * no other class should derive from OSMetaClassBase. + * + * For more information, see + * @link //apple_ref/doc/uid/TP40002799 + * I/O Kit Device Driver Design Guidelines@/link. + * + * Use by Kernel Extensions + * + * Kernel Extensions should never interact directly with OSMetaClassBase, + * but they will find useful several macros that tie in + * to the run-time type information system, specifically: + *
    + *
  • @link OSTypeAlloc OSTypeAlloc@/link - allocation of new instances
  • + *
  • @link OSDynamicCast OSDynamicCast@/link - safe type casting
  • + *
  • @link OSCheckTypeInst OSCheckTypeInst@/link - + * checking for inheritance/derivation
  • + *
  • @link OSMemberFunctionCast OSMemberFunctionCast@/link - + * casting C++ member functions to C function pointers + * for registration as callbacks
  • + *
+ * + * See @link //apple_ref/doc/class/OSMetaClass OSMetaClass@/link + * for more run-time type information interfaces. + * + * Use Restrictions + * + * OSMetaClassBase should not be subclassed by kernel extensions, + * nor should kernel extensions call its run-time type functions directly. + * + * The run-time type functions and macros are not safe + * to call in a primary interrupt context. + * + * Concurrency Protection + * + * The run-time type macros and functions of OSMetaClassBase are thread-safe. + */ class OSMetaClassBase { public: -/*! @function OSTypeAlloc - @abstract Allocate an instance of the desired object. - @discussion The OSTypeAlloc macro can be used to break the binary compatibility difficulties presented by new. The problem is that C++ compiles the knowledge of the size of the class into the cade calling new. If you use the alloc code however the class size is determined by the callee not the caller. - @param type Name of the desired type to be created. - @result 'this' if object cas been successfully created. -*/ -#define OSTypeAlloc(type) ((type *) ((type::metaClass)->alloc())) - -/*! @function OSTypeID - @abstract Given the name of a class return it's typeID - @param type Name of the desired type, eg. OSObject. - @result A unique Type ID for the class. -*/ -#define OSTypeID(type) (type::metaClass) - -/*! @function OSTypeIDInst - @abstract Given a pointer to an object return it's typeID - @param typeinst An instance of an OSObject subclass. - @result The typeID, ie. OSMetaClass *. -*/ -#define OSTypeIDInst(typeinst) ((typeinst)->getMetaClass()) - -/*! @function OSDynamicCast - @abstract Roughly analogous to (type *) inst, but check if valid first. - @discussion OSDynamicCast is an attempt to implement a rudimentary equivalent to rtti's dynamic_cast operator. Embedded-C++ doesn't allow the use of rtti. OSDynamicCast is build on the OSMetaClass mechanism. Note it is safe to call this with a 0 parameter. - @param type name of desired class name. Notice that it is assumed that you desire to cast to a pointer to an object of this type. Also type qualifiers, like const, are not recognized and will cause an, usually obscure, compile error. - @param inst Pointer to object that you wish to attempt to type cast. May be 0. - @result inst if object non-zero and it is of the desired type, otherwise 0. -*/ -#define OSDynamicCast(type, inst) \ + + + /*! + * @define OSTypeAlloc + * @hidecontents + * + * @abstract + * Allocates an instance of the named object class. + * + * @param type The name of the desired class to be created, + * as a raw token, not a string or macro. + * + * @result + * A pointer to the new, uninitialized object on success; + * NULL on failure. + * + * @discussion + * See also + * @link + * //apple_ref/cpp/clm/OSMetaClass/allocClassWithName/staticOSObject*\/(constchar*) + * OSMetaClass::allocClassWithName(const char *)@/link + * and + * @link + * //apple_ref/cpp/instm/OSMetaClass/alloc/virtualOSObject*\/() + * OSMetaClass::alloc@/link. + * + * The OSTypeAlloc macro is used to avoid binary compatibility difficulties + * presented by the C++ new operator. + */ +#define OSTypeAlloc(type) ((type *) ((type::metaClass)->alloc())) + + + /*! + * @define OSTypeID + * @hidecontents + * + * @abstract + * Returns the type ID (metaclass) of a class based on its name. + * + * @param type The name of the desired class, as a raw token, + * not a string or macro. + * + * @result + * The unique type ID (metaclass) for the class. + * + * @discussion + * It is typically more useful to determine whether a class is derived + * from another; see + * @link //apple_ref/cpp/macro/OSDynamicCast OSDynamicCast@/link + * and + * @link //apple_ref/cpp/macro/OSCheckTypeInst OSCheckTypeInst@/link. + */ +#define OSTypeID(type) (type::metaClass) + + + /*! + * @define OSTypeIDInst + * @hidecontents + * + * @abstract + * Returns the type ID (metaclass) for the class of an object instance. + * + * @param typeinst An instance of an OSObject subclass. + * + * @result + * The type ID of that object's class; that is, its metaclass. + * + * @discussion + * It is typically more useful to determine whether an object is derived + * from a particular class; see + * @link //apple_ref/cpp/macro/OSDynamicCast OSDynamicCast@/link + * and + * @link //apple_ref/cpp/macro/OSCheckTypeInst OSCheckTypeInst@/link. + */ +#define OSTypeIDInst(typeinst) ((typeinst)->getMetaClass()) + + + /*! + * @define OSDynamicCast + * @hidecontents + * + * @abstract + * Safe type-casting for Libkern C++ objects. + * + * @param type The name of the desired class type, as a raw token, + * not a string or macro. + * It is assumed you intend to cast to a pointer + * to an object of this type. + * Type qualifiers, such as const, + * are not recognized and will cause + * a (usually obscure) compile error. + * @param inst A pointer to the object instance to be cast. + * May be NULL. + * + * @result + * inst if it is non-NULL + * and derived from type; + * otherwise NULL. + * + * @discussion + * OSDynamicCast is a rough equivalent + * to the standard C++ RTTI dynamic_cast<T> operator. + * Your code should use this instead of raw C type-casting, + * and check the resulting value. + * If the result is non-NULL, + * the object is safe to use as the type-cast class; + * if the result is NULL, + * the object does not derive from the type-cast class + * and your code should take appropriate steps to handle the error. + */ +#define OSDynamicCast(type, inst) \ ((type *) OSMetaClassBase::safeMetaCast((inst), OSTypeID(type))) -/*! @function OSCheckTypeInst - @abstract Is the target object a subclass of the reference object? - @param typeinst Reference instance of an object, desired type. - @param inst Instance of object to check for type compatibility. - @result false if typeinst or inst are 0 or inst is not a subclass of typeinst's class. true otherwise. -*/ + + /*! + * @define OSCheckTypeInst + * @hidecontents + * + * @abstract + * Checks whether two objects are type-compatible. + * + * @param typeinst The reference object. + * @param inst The object to check for type compatibility. + * + * @result + * true if both inst and + * typeinst are non-NULL + * and inst is derived from the class of typeinst; + * otherwise false. + */ #define OSCheckTypeInst(typeinst, inst) \ OSMetaClassBase::checkTypeInst(inst, typeinst) - + +/*! @function OSSafeRelease + * @abstract Release an object if not NULL. + * @param inst Instance of an OSObject, may be NULL. + */ +#define OSSafeRelease(inst) do { if (inst) (inst)->release(); } while (0) + +/*! @function OSSafeReleaseNULL + * @abstract Release an object if not NULL, then set it to NULL. + * @param inst Instance of an OSObject, may be NULL. + */ +#define OSSafeReleaseNULL(inst) do { if (inst) (inst)->release(); (inst) = NULL; } while (0) + typedef void (*_ptf_t)(void); #if APPLE_KEXT_LEGACY_ABI @@ -120,39 +304,38 @@ static inline _ptf_t _ptmf2ptf(const OSMetaClassBase *self, void (OSMetaClassBase::*func)(void)) { union { - void (OSMetaClassBase::*fIn)(void); - struct { // Pointer to member function 2.95 - unsigned short fToff; - short fVInd; - union { - _ptf_t fPFN; - short fVOff; - } u; - } fptmf2; + void (OSMetaClassBase::*fIn)(void); + struct { // Pointer to member function 2.95 + unsigned short fToff; + short fVInd; + union { + _ptf_t fPFN; + short fVOff; + } u; + } fptmf2; } map; map.fIn = func; if (map.fptmf2.fToff) { - panic("Multiple inheritance is not supported"); - return 0; + panic("Multiple inheritance is not supported"); + return 0; } else if (map.fptmf2.fVInd < 0) { - // Not virtual, i.e. plain member func - return map.fptmf2.u.fPFN; + // Not virtual, i.e. plain member func + return map.fptmf2.u.fPFN; } else { - union { - const OSMetaClassBase *fObj; - _ptf_t **vtablep; - } u; - u.fObj = self; - - // Virtual member function so dereference vtable - return (*u.vtablep)[map.fptmf2.fVInd - 1]; + union { + const OSMetaClassBase *fObj; + _ptf_t **vtablep; + } u; + u.fObj = self; + + // Virtual member function so dereference vtable + return (*u.vtablep)[map.fptmf2.fVInd - 1]; } } #else /* !APPLE_KEXT_LEGACY_ABI */ - // Slightly less arcane and slightly less evil code to do // the same for kexts compiled with the standard Itanium C++ // ABI @@ -161,43 +344,62 @@ static inline _ptf_t _ptmf2ptf(const OSMetaClassBase *self, void (OSMetaClassBase::*func)(void)) { union { - void (OSMetaClassBase::*fIn)(void); - uintptr_t fVTOffset; - _ptf_t fPFN; + void (OSMetaClassBase::*fIn)(void); + uintptr_t fVTOffset; + _ptf_t fPFN; } map; map.fIn = func; if (map.fVTOffset & 1) { - // virtual - union { - const OSMetaClassBase *fObj; - _ptf_t **vtablep; - } u; - u.fObj = self; - - // Virtual member function so dereference vtable - return *(_ptf_t *)(((uintptr_t)*u.vtablep) + map.fVTOffset - 1); + // virtual + union { + const OSMetaClassBase *fObj; + _ptf_t **vtablep; + } u; + u.fObj = self; + + // Virtual member function so dereference vtable + return *(_ptf_t *)(((uintptr_t)*u.vtablep) + map.fVTOffset - 1); } else { - // Not virtual, i.e. plain member func - return map.fPFN; + // Not virtual, i.e. plain member func + return map.fPFN; } } #endif /* !APPLE_KEXT_LEGACY_ABI */ -/*! @function OSMemberFunctionCast - @abstract Convert a pointer to a member function to a c-style pointer to function. No warnings are generated. - @param type The type of pointer function desired. - @param self The this pointer of the object whose function you wish to cache. - @param func The pointer to member function itself, something like &Base::func. - @result A pointer to function of the given type. This function will panic if an attempt is made to call it with a multiply inherited class. -*/ - -#define OSMemberFunctionCast(cptrtype, self, func) \ - (cptrtype) OSMetaClassBase:: \ - _ptmf2ptf(self, (void (OSMetaClassBase::*)(void)) func) + /*! + * @define OSMemberFunctionCast + * @hidecontents + * + * @abstract + * Converts a C++ member function pointer, relative to an instance, + * to a C-style pointer to function. + * + * @param cptrtype The function type declaration to cast to + * (typically provided as a typedef by I/O KitKit classes). + * @param self The this pointer of the object whose function + * you wish to cache. + * @param func The pointer to the member function itself, + * something like &Class::function. + * + * @result + * A pointer to a function of the given type referencing self. + * + * @discussion + * This function is used to generate pointers to C++ functions for instances, + * such that they can be registered as callbacks with I/O Kit objects. + * + * No warnings are generated. + * + * This function will panic if an attempt is made to call it + * with a multiply-inheriting class. + */ +#define OSMemberFunctionCast(cptrtype, self, func) \ + (cptrtype) OSMetaClassBase:: \ + _ptmf2ptf(self, (void (OSMetaClassBase::*)(void)) func) protected: OSMetaClassBase(); @@ -205,115 +407,348 @@ _ptmf2ptf(const OSMetaClassBase *self, void (OSMetaClassBase::*func)(void)) private: // Disable copy constructors of OSMetaClassBase based objects -/*! @function operator = - @abstract Disable implicit copy constructor by making private - @param src Reference to source object that isn't allowed to be copied -*/ + /* Not to be included in headerdoc. + * + * @function operator = + * + * @abstract + * Disable implicit copy constructor by making private + * + * @param src Reference to source object that isn't allowed to be copied. + */ void operator =(OSMetaClassBase &src); -/*! @function OSMetaClassBase - @abstract Disable implicit copy constructor by making private - @param src Reference to source object that isn't allowed to be copied -*/ + /* Not to be included in headerdoc. + * + * @function OSMetaClassBase + * + * @abstract + * Disable implicit copy constructor by making private + * + * @param src Reference to source object that isn't allowed to be copied. + */ OSMetaClassBase(OSMetaClassBase &src); public: -/*! @function release - @abstract Primary implementation of the release mechanism. - @discussion If $link retainCount <= the when argument then call $link free(). This indirect implementation of $link release allows the developer to break reference circularity. An example of this sort of problem is a parent/child mutual reference, either the parent or child can implement: void release() { release(2); } thus breaking the cirularity. - @param when When retainCount == when then call free(). */ - virtual void release(int when) const = 0; - -/*! @function getRetainCount - @abstract How many times has this object been retained? - @result Current retain count -*/ + +// xx-review: the original comment for this makes it sound to me like we don't +// xx-review: catch over-releasing an object...? + + /*! + * @function release + * + * @abstract + * Abstract declaration of + * @link + * //apple_ref/cpp/instm/OSObject/release/virtualvoid/(int) + * release(int freeWhen)@/link. + * + * @discussion + * See + * @link + * //apple_ref/cpp/instm/OSObject/release/virtualvoid/(int) + * release(int freeWhen)@/link. + */ + virtual void release(int freeWhen) const = 0; + + + /*! + * @function getRetainCount + * + * @abstract + * Abstract declaration of + * @link + * //apple_ref/cpp/instm/OSObject/getRetainCount/virtualint/() + * getRetainCount()@/link. + * + * @discussion + * See + * @link + * //apple_ref/cpp/instm/OSObject/getRetainCount/virtualint/() + * OSObject::getRetainCount()@/link. + */ virtual int getRetainCount() const = 0; -/*! @function retain - @abstract Retain a reference in this object. -*/ + + /*! + * @function retain + * + * @abstract + * Abstract declaration of + * @link + * //apple_ref/cpp/instm/OSObject/retain/virtualvoid/() + * retain()@/link. + * + * @discussion + * See + * @link + * //apple_ref/cpp/instm/OSObject/retain/virtualvoid/() + * OSObject::retain()@/link. + */ virtual void retain() const = 0; -/*! @function release - @abstract Release a reference to this object -*/ + + + /*! + * @function release + * + * @abstract + * Abstract declaration of + * @link + * //apple_ref/cpp/instm/OSObject/release/virtualvoid/() + * release@/link. + * + * @discussion + * See + * @link + * //apple_ref/cpp/instm/OSObject/release/virtualvoid/() + * OSObject::release@/link. + */ virtual void release() const = 0; -/*! @function serialize - @abstract - @discussion - @param s - @result -*/ - virtual bool serialize(OSSerialize *s) const = 0; + /*! + * @function serialize + * + * @abstract + * Abstract declaration of + * @link + * //apple_ref/cpp/instm/OSObject/serialize/virtualbool/(OSSerialize*) + * serialize@/link. + * + * @discussion + * See + * @link + * //apple_ref/cpp/instm/OSObject/serialize/virtualbool/(OSSerialize*) + * OSObject::serialize@/link. + */ + virtual bool serialize(OSSerialize * serializer) const = 0; + + + /*! + * @function getMetaClass + * + * @abstract + * Returns the OSMetaClass representing + * an OSMetaClassBase subclass. + * + * @discussion + * OSObject overrides this abstract member function + * to return the OSMetaClass object that represents + * each class for run-time typing. + */ virtual const OSMetaClass * getMetaClass() const = 0; -/*! @function isEqualTo - @abstract Is this == anObj? - @discussion OSMetaClassBase::isEqualTo implements this as a shallow pointer comparison. The OS container classes do a more meaningful comparison. Your mileage may vary. - @param anObj Object to compare 'this' to. - @result true if the objects are equivalent, false otherwise. -*/ - virtual bool isEqualTo(const OSMetaClassBase *anObj) const; - -/*! @function metaCast - @abstract Check to see if this object is or inherits from the given type. - @discussion This function is the guts of the OSMetaClass system. IODynamicCast, qv, is implemented using this function. - @param toMeta Pointer to a constant OSMetaClass for the desired target type. - @result 'this' if object is of desired type, otherwise 0. -*/ - OSMetaClassBase *metaCast(const OSMetaClass *toMeta) const; - - -/*! @function metaCast - @abstract See OSMetaClassBase::metaCast(const OSMetaClass *) - @param toMeta OSSymbol of the desired class' name. - @result 'this' if object is of desired type, otherwise 0. -*/ - OSMetaClassBase *metaCast(const OSSymbol *toMeta) const; - -/*! @function metaCast - @abstract See OSMetaClassBase::metaCast(const OSMetaClass *) - @param toMeta OSString of the desired class' name. - @result 'this' if object is of desired type, otherwise 0. -*/ - OSMetaClassBase *metaCast(const OSString *toMeta) const; - -/*! @function metaCast - @abstract See OSMetaClassBase::metaCast(const OSMetaClass *) - @param toMeta const char * C String of the desired class' name. - @result 'this' if object is of desired type, otherwise 0. -*/ - OSMetaClassBase *metaCast(const char *toMeta) const; - - // Helper inlines for runtime type preprocessor macros - static OSMetaClassBase * - safeMetaCast(const OSMetaClassBase *me, const OSMetaClass *toType); - - static bool - checkTypeInst(const OSMetaClassBase *inst, const OSMetaClassBase *typeinst); + + /*! + * @function isEqualTo + * + * @abstract + * Checks whether another object is equal to the receiver. + * + * @param anObject The object to copmare to the receiver. + * + * @result + * true if the objects are equal, false otherwise. + * + * @discussion + * OSMetaClassBase implements this as a direct pointer comparison, + * since it has no other information to judge equality by. + * Subclasses generally override this function + * to do a more meaningful comparison. + * For example, OSString implements it to return + * true if anObject + * is derived from OSString and represents the same C string. + */ + virtual bool isEqualTo(const OSMetaClassBase * anObject) const; + + + /*! + * @function metaCast + * + * @abstract + * Casts this object is to the class managed by the given OSMetaClass. + * + * @param toMeta A pointer to a constant OSMetaClass + * for the desired target type. + * + * @result + * this if the object is derived + * from the class managed by toMeta, + * otherwise NULL. + * + * @discussion + * It is far more convenient to use + * @link OSDynamicCast OSDynamicCast@/link. + */ + OSMetaClassBase * metaCast(const OSMetaClass * toMeta) const; + + + /*! + * @function metaCast + * + * @abstract + * Casts this object is to the class managed by the named OSMetaClass. + * + * @param toMeta An OSSymbol naming the desired target type. + * + * @result + * this if the object is derived + * from the class named by toMeta, + * otherwise NULL. + * + * @discussion + * It is far more convenient to use + * @link OSDynamicCast OSDynamicCast@/link. + */ + OSMetaClassBase * metaCast(const OSSymbol * toMeta) const; + + + /*! + * @function metaCast + * + * @abstract + * Casts this object is to the class managed by the named OSMetaClass. + * + * @param toMeta An OSString naming the desired target type. + * @result + * this if the object is derived + * from the class named by toMeta, + * otherwise NULL. + * + * @discussion + * It is far more convenient to use + * @link OSDynamicCast OSDynamicCast@/link. + */ + OSMetaClassBase * metaCast(const OSString * toMeta) const; + + + /*! + * @function metaCast + * + * @abstract + * Casts this object is to the class managed by the named OSMetaClass. + * + * @param toMeta A C string naming the desired target type. + * @result + * this if the object is derived + * from the class named by toMeta, + * otherwise NULL. + * + * @discussion + * It is far more convenient to use + * @link OSDynamicCast OSDynamicCast@/link. + */ + OSMetaClassBase * metaCast(const char * toMeta) const; + + // Helper inlines for run-time type preprocessor macros + /*! + * @function safeMetaCast + * + * @abstract + * Casts an object is to the class managed by the given OSMetaClass. + * + * @param anObject A pointer to the object to be cast. + * @param toMeta A pointer to a constant OSMetaClass + * for the desired target type. + * + * @result + * anObject if the object is derived + * from the class managed by toMeta, + * otherwise NULL. + * + * @discussion + * It is far more convenient to use + * @link OSDynamicCast OSDynamicCast@/link. + */ + static OSMetaClassBase * safeMetaCast( + const OSMetaClassBase * anObject, + const OSMetaClass * toMeta); + + /*! + * @function checkTypeInst + * + * @abstract + * Checks whether an object instance is of the same class + * as another object instance (or a subclass of that class). + * + * @param inst A pointer to the object to check. + * @param typeinst A pointer to an object of the class being checked. + * + * @result + * true if the object is derived + * from the class of typeinst + * or a subclass of that class, + * otherwise false. + * + * @discussion + * It is far more convenient to use + * @link OSCheckTypeInst OSCheckTypeInst@/link. + */ + static bool checkTypeInst( + const OSMetaClassBase * inst, + const OSMetaClassBase * typeinst); + + static void initialize(void); public: -/*! @function taggedRetain - @abstract Retain a tagged reference in this object. -*/ + /*! + * @function taggedRetain + * + * @abstract + * Abstract declaration of + * @link + * //apple_ref/cpp/instm/OSObject/taggedRetain/virtualvoid/(constvoid*) + * taggedRetain(const void *)@/link. + * + * @discussion + * See + * @link + * //apple_ref/cpp/instm/OSObject/taggedRetain/virtualvoid/(constvoid*) + * OSObject::taggedRetain(const void *)@/link. + */ // WAS: virtual void _RESERVEDOSMetaClassBase0(); - virtual void taggedRetain(const void *tag = 0) const = 0; - -/*! @function taggedRelease - @abstract Release a tagged reference to this object -*/ + virtual void taggedRetain(const void * tag = 0) const = 0; + + + /*! + * @function taggedRelease + * + * @abstract + * Abstract declaration of + * @link + * //apple_ref/cpp/instm/OSObject/taggedRelease/virtualvoid/(constvoid*) + * taggedRelease(const void *)@/link. + * + * @discussion + * See + * @link + * //apple_ref/cpp/instm/OSObject/taggedRelease/virtualvoid/(constvoid*) + * OSObject::taggedRelease(const void *)@/link. + */ // WAS: virtual void _RESERVEDOSMetaClassBase1(); - virtual void taggedRelease(const void *tag = 0) const = 0; + virtual void taggedRelease(const void * tag = 0) const = 0; protected: -/*! @function taggedRelease - @abstract Release a tagged reference to this object and free if retainCount == when on entry -*/ + /*! + * @function taggedRelease + * + * @abstract + * Abstract declaration of + * @link + * //apple_ref/cpp/instm/OSObject/taggedRelease/virtualvoid/(constvoid*,constint) + * taggedRelease(const void *, const int freeWhen)@/link. + * + * @discussion + * See + * @link + * //apple_ref/cpp/instm/OSObject/taggedRelease/virtualvoid/(constvoid*,constint) + * OSObject::taggedRelease(const void *, const int freeWhen)@/link. + */ // WAS: virtual void _RESERVEDOSMetaClassBase2(); - virtual void taggedRelease(const void *tag, const int when) const = 0; + virtual void taggedRelease( + const void * tag, + const int freeWhen) const = 0; private: // Virtual Padding @@ -324,100 +759,327 @@ _ptmf2ptf(const OSMetaClassBase *self, void (OSMetaClassBase::*func)(void)) virtual void _RESERVEDOSMetaClassBase7(); } APPLE_KEXT_COMPATIBILITY; + /*! - @class OSMetaClass : OSMetaClassBase - @abstract An instance of a OSMetaClass represents one class then the kernel's runtime type information system is aware of. -*/ + * @class OSMetaClass + * + * @abstract + * OSMetaClass manages run-time type information + * for Libkern and I/O Kit C++ classes. + * + * @discussion + * + * OSMetaClass manages run-time type information + * for Libkern and I/O Kit C++ classes. + * An instance of OSMetaClass exists for (nearly) every such C++ class, + * keeping track of inheritance relationships, class lookup by name, + * instance counts, and more. + * OSMetaClass operates almost entirely behind the scenes, + * and kernel extensions should rarely, if ever, + * have to interact directly with OSMetaClass. + * + * Use by Kernel Extensions + * + * While kernel extensions rarey interact directly with OSMetaClass at run time, + * they must register their classes with the metaclass system + * using the macros declared here. + * The class declaration should use one of these two macros + * before its first member function declaration: + *
    + *
  • @link OSDeclareDefaultStructors OSDeclareDefaultStructors@/link - + * for classes with no abstract member function declarations
  • + *
  • @link OSDeclareAbstractStructors OSDeclareAbstractStructors@/link - + * for classes with at least one abstract member function declaration
  • + *
  • @link OSDeclareFinalStructors OSDeclareFinalStructors@/link - + * for classes that should not be subclassable by another kext
  • + *
+ * + * The class implementation should then use one of these macros: + *
    + *
  • @link OSDefineMetaClassAndStructors + * OSDefineMetaClassAndStructors@/link - + * for classes with no abstract member function declarations
  • + *
  • @link OSDefineMetaClassAndAbstractStructors + * OSDefineMetaClassAndAbstractStructors@/link - + * for classes with at least one abstract member function declaration
  • + *
  • @link OSDefineMetaClassAndFinalStructors + * OSDefineMetaClassAndFinalStructors@/link - + * for classes that should not be subclassable by another kext
  • + *
+ * + * Classes in kernel extensions that are intended for use as libraries + * may need to reserve vtable slots to preserve binary compatibility + * as new functions are added. They may do so with these macros: + *
    + *
  • @link OSMetaClassDeclareReservedUnused + * OSMetaClassDeclareReservedUnused@/link - + * reserves a vtable slot
  • + *
  • @link OSMetaClassDefineReservedUnused + * OSMetaClassDefineReservedUnused@/link - + * defines the reserved vtable slot as an unimplemented function
  • + *
  • @link OSMetaClassDeclareReservedUsed + * OSMetaClassDeclareReservedUsed@/link - + * documents that a formerly reserved slot is now used
  • + *
  • @link OSMetaClassDefineReservedUsed + * OSMetaClassDefineReservedUsed@/link - + * documents that a formerly reserved slot is now used
  • + *
+ * + * Use Restrictions + * + * OSMetaClass should not be explicitly subclassed by kernel extensions + * (the declare/define macros do that), + * nor should kernel extensions call its run-time type functions directly. + * + * OSMetaClass functions should be considered + * unsafe to call in a primary interrupt context. + * + * Concurrency Protection + * + * Kernel extensions should in general not interact + * with OSMetaClass objects directly, + * instead using the run-time type macros. + * Much of OSMetaClass's interface is intended for use + * by the run-time type information system, + * which handles concurrency and locking internally. + */ class OSMetaClass : private OSMetaClassBase { + friend class OSKext; private: // Can never be allocated must be created at compile time - static void *operator new(size_t size); + static void * operator new(size_t size); struct ExpansionData { }; - -/*! @var reserved Reserved for future use. (Internal use only) */ + + /* Reserved for future use. (Internal use only) */ ExpansionData *reserved; -/*! @var superClass Handle to the superclass' meta class. */ + /* superClass Handle to the superclass's meta class. */ const OSMetaClass *superClassLink; -/*! @var className OSSymbol of the class' name. */ + /* className OSSymbol of the class' name. */ const OSSymbol *className; -/*! @var classSize How big is a single instancde of this class. */ + /* classSize How big is a single instancde of this class. */ unsigned int classSize; -/*! @var instanceCount Roughly number of instances of the object. Used primarily as a code in use flag. */ + /* instanceCount Roughly number of instances of the object, + * +1 for each direct subclass with a nonzero refcount. + * Used primarily as a code-in-use flag. + */ mutable unsigned int instanceCount; -/*! @function OSMetaClass - @abstract Private the default constructor */ + /* Not to be included in headerdoc. + * + * @function OSMetaClass + * + * @abstract + * The default private constructor. + */ OSMetaClass(); // Called by postModLoad -/*! @function logError - @abstract Given an error code log an error string using printf */ + /* Not to be included in headerdoc. + * + * @function logError + * + * @abstract + * Logs an error string for an OSReturn value + * using printf. + * + * @param result The OSReturn value for which to log a message. + * + * @discussion + * This function is used to log errors loading kernel extensions. + * Kernel extensions themselves should not call it. + */ static void logError(OSReturn result); public: -/*! @function getMetaClassWithName - @abstract Lookup a meta-class in the runtime type information system - @param name Name of the desired class's meta-class. - @result pointer to a meta-class object if found, 0 otherwise. */ - - static const OSMetaClass *getMetaClassWithName(const OSSymbol *name); + /*! + * @function getMetaClassWithName + * + * @abstract + * Look up a metaclass in the run-time type information system. + * + * @param name The name of the desired class's metaclass. + * + * @result + * A pointer to the metaclass object if found, NULL otherwise. + */ + static const OSMetaClass * getMetaClassWithName(const OSSymbol * name); protected: -/*! @function retain - @abstract Implement abstract but should no dynamic allocation is allowed */ + /*! + * @function retain + * + * @abstract + * Implements the abstract retain function to do nothing. + * + * @discussion + * Since an OSMetaClass instance must remain in existence + * for as long as its kernel extension is loaded, + * OSMetaClass does not use reference-counting. + */ virtual void retain() const; -/*! @function release - @abstract Implement abstract but should no dynamic allocation is allowed */ - virtual void release() const; - -/*! @function release - @abstract Implement abstract but should no dynamic allocation is allowed - @param when ignored. */ - virtual void release(int when) const; - -/*! @function taggedRetain - @abstract Retain a tagged reference in this object. -*/ - virtual void taggedRetain(const void *tag = 0) const; -/*! @function release - @abstract Release a tagged reference to this object -*/ - virtual void taggedRelease(const void *tag = 0) const; + /*! + * @function release + * + * @abstract + * Implements the abstract release function to do nothing. + * + * @discussion + * Since an OSMetaClass instance must remain in existence + * for as long as its kernel extension is loaded, + * OSMetaClass does not use reference-counting. + */ + virtual void release() const; -/*! @function release - @abstract Release a tagged reference to this object -*/ - virtual void taggedRelease(const void *tag, const int when) const; -/*! @function getRetainCount - @abstract Implement abstract but should no dynamic allocation is allowed */ + /*! + * @function release + * + * @abstract + * Implements the abstract release(int freeWhen) + * function to do nothing. + * + * @param freeWhen Unused. + * + * @discussion + * Since an OSMetaClass instance must remain in existence + * for as long as its kernel extension is loaded, + * OSMetaClass does not use reference-counting. + */ + virtual void release(int freeWhen) const; + + + /*! + * @function taggedRetain + * + * @abstract + * Implements the abstract taggedRetain(const void *) + * function to do nothing. + * + * @param tag Unused. + * + * @discussion + * Since an OSMetaClass instance must remain in existence + * for as long as its kernel extension is loaded, + * OSMetaClass does not use reference-counting. + */ + virtual void taggedRetain(const void * tag = 0) const; + + + /*! + * @function taggedRelease + * + * @abstract + * Implements the abstract taggedRelease(const void *) + * function to do nothing. + * + * @param tag Unused. + * + * @discussion + * Since an OSMetaClass instance must remain in existence + * for as long as its kernel extension is loaded, + * OSMetaClass does not use reference-counting. + */ + virtual void taggedRelease(const void * tag = 0) const; + + + /*! + * @function taggedRelease + * + * @abstract + * Implements the abstract taggedRelease(const void *, cont int) + * function to do nothing. + * + * @param tag Unused. + * @param freeWhen Unused. + * + * @discussion + * Since an OSMetaClass instance must remain in existence + * for as long as its kernel extension is loaded, + * OSMetaClass does not use reference-counting. + */ + virtual void taggedRelease( + const void * tag, + const int freeWhen) const; + + + /*! + * @function getRetainCount + * + * @abstract + * Implements the abstract getRetainCount + * function to return 0. + * + * @result + * Always returns 0. + * + * @discussion + * Since an OSMetaClass instance must remain in existence + * for as long as its kernel extension is loaded, + * OSMetaClass does not use reference-counting. + */ virtual int getRetainCount() const; + + /* Not to be included in headerdoc. + * + * @function getMetaClass + * + * @abstract + * Returns the meta-metaclass. + * + * @result + * The metaclass of the OSMetaClass object. + */ virtual const OSMetaClass * getMetaClass() const; -/*! @function OSMetaClass - @abstract Constructor for OSMetaClass objects - @discussion This constructor is protected and cannot not be used to instantiate an OSMetaClass object, i.e. OSMetaClass is an abstract class. This function stores the currently constructing OSMetaClass instance away for later processing. See preModLoad and postModLoad. - @param inClassName cString of the name of the class this meta-class represents. - @param inSuperClassName cString of the name of the super class. - @param inClassSize sizeof the class. */ - OSMetaClass(const char *inClassName, - const OSMetaClass *inSuperClass, - unsigned int inClassSize); - -/*! @function ~OSMetaClass - @abstract Destructor for OSMetaClass objects - @discussion If this function is called it means that the object code that implemented this class is actually in the process of unloading. The destructor removes all reference's to the subclass from the runtime type information system. */ + + /*! + * @function OSMetaClass + * + * @abstract + * Constructor for OSMetaClass objects. + * + * @param className A C string naming the C++ class + * that this OSMetaClass represents. + * @param superclass The OSMetaClass object representing the superclass + * of this metaclass's class. + * @param classSize The allocation size of the represented C++ class. + * + * @discussion + * This constructor is protected and cannot be used + * to instantiate OSMetaClass directly, as OSMetaClass is an abstract class. + * This function is called during kext loading + * to queue C++ classes for registration. + * See @link preModLoad preModLoad@/link and + * @link postModLoad postModLoad@/link. + */ + OSMetaClass(const char * className, + const OSMetaClass * superclass, + unsigned int classSize); + + + /*! + * @function ~OSMetaClass + * + * @abstract + * Destructor for OSMetaClass objects. + * + * @discussion + * This function is called when the kernel extension that implements + * the metaclass's class is unloaded. + * The destructor removes all references to the class + * from the run-time type information system. + */ virtual ~OSMetaClass(); // Needs to be overriden as NULL as all OSMetaClass objects are allocated @@ -427,288 +1089,955 @@ class OSMetaClass : private OSMetaClassBase public: static const OSMetaClass * const metaClass; -/*! @function preModLoad - @abstract Prepare the runtime type system for the load of a module. - @discussion Prepare the runtime type information system for the loading of new all meta-classes constructed between now and the next postModLoad. preModLoad grab's a lock so that the runtime type information system loading can be protected, the lock is released by the postModLoad function. Any OSMetaClass that is constructed between the bracketing pre and post calls will be assosiated with the module name. - @param kmodName globally unique cString name of the kernel module being loaded. - @result If success full return a handle to be used in later calls 0 otherwise. */ - static void *preModLoad(const char *kmodName); - -/*! @function checkModLoad - @abstract Check if the current load attempt is still OK. - @param loadHandle Handle returned when a successful call to preModLoad is made. - @result true if no error's are outstanding and the system is primed to recieve more objects. */ - static bool checkModLoad(void *loadHandle); - -/*! @function postModLoad - @abstract Finish postprocessing on a kernel module's meta-classes. - @discussion As the order of static object construction is undefined it is necessary to process the constructors in two phases. These phases rely on global information that is created be the preparation step, preModLoad, which also guarantees single threading between multiple modules. Phase one was the static construction of each meta-class object one by one withing the context prepared by the preModLoad call. postModLoad is the second phase of processing. Inserts links all of the super class inheritance chains up, inserts the meta-classes into the global register of classes and records for each meta-class which kernel module caused it's construction. Finally it cleans up the temporary storage and releases the single threading lock and returns whatever error has been recorded in during the construction phase or the post processing phase. - @param loadHandle Handle returned when a successful call to preModLoad is made. - @result Error code of the first error encountered. */ - static OSReturn postModLoad(void *loadHandle); - -/*! @function modHasInstance - @abstract Do any of the objects represented by OSMetaClass and associated with the given kernel module name have instances? - @discussion Check all meta-classes associated with the module name and check their instance counts. This function is used to check to see if a module can be unloaded. Obviously if an instance is still outstanding it isn't safe to unload the code that relies on that object. - @param kmodName cString of the kernel module name. - @result true if there are any current instances of any class in the module. -*/ - static bool modHasInstance(const char *kmodName); - -/*! @function reportModInstances - @abstract Log any object that has instances in a module. - @discussion When a developer ask for a module to be unloaded but the unload fails due to outstanding instances. This function will report which classes still have instances. It is intended mostly for developers to find problems with unloading classes and will be called automatically by 'verbose' unloads. - @param kmodName cString of the kernel module name. */ - static void reportModInstances(const char *kmodName); - -/*! @function considerUnloads - @abstract Schedule module unloading. - @discussion Schedule unused modules to be unloaded; called when IOKit matching goes idle. */ - + /*! + * @function preModLoad + * + * @abstract + * Prepares the run-time type system + * for the creation of new metaclasses + * during loading of a kernel extension (module). + * + * @param kextID The bundle ID of the kext being loaded. + * + * @result + * An opaque handle to the load context + * for the kernel extension on success; + * NULL on failure. + * + * @discussion + * Not for use by kernel extensions. + * + * Prepares the run-time type information system to record and register + * metaclasses created by static constructors until a subsequent call to + * @link postModLoad postModLoad@/link. + * preModLoad takes a lock to ensure processing of a single + * load operation at a time; the lock is released by + * @link postModLoad postModLoad@/link. + * Any OSMetaClass constructed between these two function calls + * will be associated with kextID. + */ + static void * preModLoad(const char * kextID); + + + /*! + * @function checkModLoad + * + * @abstract + * Checks whether the current kext load operation can proceed. + * + * @param loadHandle The opaque handle returned + * by @link preModLoad preModLoad@/link. + * @result + * true if no errors are outstanding + * and the system is ready to process more metaclasses. + * + * @discussion + * Not for use by kernel extensions. + */ + static bool checkModLoad(void * loadHandle); + + + /*! + * @function postModLoad + * + * @abstract + * Registers the metaclasses created during loading of a kernel extension. + * + * @param loadHandle The opaque handle returned + * by @link preModLoad preModLoad@/link. + * @result + * The error code of the first error encountered, + * or + * @link + * //apple_ref/cpp/macro/kOSReturnSuccess + * kOSReturnSuccess@/link + * if no error occurred. + * + * @discussion + * Not for use by kernel extensions. + * + * Called after all static constructors in a kernel extension + * have created metaclasses, + * this function checks for duplicate class names, + * then registers the new metaclasses under the kext ID + * that @link preModLoad preModLoad@/link was called with, + * so that they can be dynamically allocated + * and have their instance counts tracked. + * postModLoad releases the lock taken by + * @link preModLoad preModLoad@/link. + */ + static OSReturn postModLoad(void * loadHandle); + + /*! + * @function modHasInstance + * + * @abstract + * Returns whether any classes defined by the named + * kernel extension (or their subclasses) have existing instances. + * + * @param kextID The bundle ID of the kernel extension to check. + * + * @result + * true if the kext is found and + * if any class defined by that kext + * has a nonzero instance count, + * false otherwise. + * + * @discussion + * This function is called before a kernel extension's static destructors + * are invoked, prior to unloading the extension. + * If any classes stil have instances or subclasses with instances, + * those classes are logged + * (using @link reportModInstances reportModInstances@/link) and + * the kernel extension is not be unloaded. + */ + static bool modHasInstance(const char * kextID); + + + /*! + * @function reportModInstances + * + * @abstract + * Logs the instance counts for classes + * defined by a kernel extension. + * + * @param kextID The bundle ID of the kernel extension to report on. + * + * @discussion + * This function prints the names and instance counts + * of any class defined by kextID + * that has a nonzero instance count. + * It's called by @link modHasInstance modHasInstance@/link + * to help diagnose problems unloading kernel extensions. + */ + static void reportModInstances(const char * kextID); + + + /*! + * @function considerUnloads + * + * @abstract + * Schedule automatic unloading of unused kernel extensions. + * + * @discussion + * This function schedules a check for kernel extensions + * that can be automatically unloaded, + * canceling any currently scheduled check. + * At that time, any such kexts with no Libkern C++ instances + * and no external references are unloaded. + * + * The I/O Kit calls this function when matching goes idle. + * + * Kernel extensions that define subclasses of + * @link //apple_ref/doc/class/IOService IOService@/link + * are eligible for automatic unloading. + * + * (On releases of Mac OS X prior to Snow Leopard (10.6), + * any kernel extension defining any Libkern C++ class + * was eligible for automatic unloading, + * but that unload did not call the module stop routine. + * Non-I/O Kit kernel extensions that define Libkern C++ subclasses + * should be sure to have OSBundleLibraries declarations that ensure + * they will not load on releases prior to Snow Leopard.) + */ static void considerUnloads(); -/*! @function allocClassWithName - @abstract Lookup a meta-class in the runtime type information system and return the results of an alloc call. - @param name Name of the desired class. - @result pointer to an new object, 0 if not found or so memory. */ - static OSObject *allocClassWithName(const OSSymbol *name); - -/*! @function allocClassWithName - @abstract Lookup a meta-class in the runtime type information system and return the results of an alloc call. - @param name Name of the desired class. - @result pointer to an new object, 0 if not found or so memory. */ - static OSObject *allocClassWithName(const OSString *name); - -/*! @function allocClassWithName - @abstract Lookup a meta-class in the runtime type information system and return the results of an alloc call. - @param name Name of the desired class. - @result pointer to an new object, 0 if not found or so memory. */ - static OSObject *allocClassWithName(const char *name); - -/*! @function checkMetaCastWithName - @abstract Introspect an objects inheritance tree looking for a class of the given name. Basis of MacOSX's kernel dynamic casting mechanism. - @param name Name of the desired class or super class. - @param in object to be introspected. - @result in parameter if cast valid, 0 otherwise. */ - static OSMetaClassBase * - checkMetaCastWithName(const OSSymbol *name, const OSMetaClassBase *in); - -/*! @function checkMetaCastWithName - @abstract Introspect an objects inheritance tree looking for a class of the given name. Basis of MacOSX's kernel dynamic casting mechanism. - @param name Name of the desired class or super class. - @param in object to be introspected. - @result in parameter if cast valid, 0 otherwise. */ - static OSMetaClassBase * - checkMetaCastWithName(const OSString *name, const OSMetaClassBase *in); - -/*! @function checkMetaCastWithName - @abstract Introspect an objects inheritance tree looking for a class of the given name. Basis of MacOSX's kernel dynamic casting mechanism. - @param name Name of the desired class or super class. - @param in object to be introspected. - @result in parameter if cast valid, 0 otherwise. */ - static OSMetaClassBase * - checkMetaCastWithName(const char *name, const OSMetaClassBase *in); - - -/*! @function instanceConstructed - @abstract Counts the instances of the class behind this metaclass. - @discussion Every non-abstract class that inherits from OSObject has a default constructor that calls it's own meta-class' instanceConstructed function. This constructor is defined by the OSDefineMetaClassAndStructors macro (qv) that all OSObject subclasses must use. Also if the instance count goes from 0 to 1, ie the first instance, then increment the instance count of the super class */ - void instanceConstructed() const; -/*! @function instanceDestructed - @abstract Removes one instance of the class behind this metaclass. - @discussion OSObject's free function calls this method just before it does a 'delete this' on itself. If the instance count transitions from 1 to 0, i.e. the last object, then one instance of the superclasses is also removed. */ - void instanceDestructed() const; + /*! + * @function allocClassWithName + * + * @abstract + * Allocates an instance of a named OSObject-derived class. + * + * @param name The name of the desired class. + * + * @result + * A pointer to the newly-allocated, uninitialized object on success; + * NULL on failure. + * + * @discussion + * Kernel extensions should not need to use this function + * directly, instead using static instance-creation functions + * defined by classes. + * + * This function consults the run-time type information system + * to find the metaclass for the named class. + * If it exists, it calls the metaclass's @link alloc alloc@/link + * function and returns the result. + */ + static OSObject * allocClassWithName(const OSSymbol * name); + + + /*! + * function allocClassWithName + * + * @abstract + * Allocates an instance of a named OSObject-derived class. + * + * @param name The name of the desired class. + * + * @result + * A pointer to the newly-allocated, uninitialized object on success; + * NULL on failure. + * + * @discussion + * Kernel extensions should not need to use this function + * directly, instead using static instance-creation functions + * defined by classes. + * + * This function consults the run-time type information system + * to find the metaclass for the named class. + * If it exists, it calls the metaclass's @link alloc alloc@/link + * function and returns the result. + */ + static OSObject * allocClassWithName(const OSString * name); + + + /*! + * function allocClassWithName + * + * @abstract + * Allocates an instance of a named OSObject-derived class. + * + * @param name The name of the desired class. + * + * @result + * A pointer to the newly-allocated, uninitialized object on success; + * NULL on failure. + * + * @discussion + * Kernel extensions should not need to use this function + * directly, instead using static instance-creation functions + * defined by classes. + * + * This function consults the run-time type information system + * to find the metaclass for the named class. + * If it exists, it calls the metaclass's @link alloc alloc@/link + * function and returns the result. + */ + static OSObject * allocClassWithName(const char * name); + + + /*! + * @function checkMetaCastWithName + * + * @abstract + * Search the metaclass inheritance hierarchy by name for an object instance. + * + * @param className The name of the desired class or superclass. + * @param object The object whose metaclass begins the search. + * + * @result + * object if it's derived from className; + * NULL otherwise. + * + * @discussion + * This function is the basis of the Libkern run-time type-checking system. + * Kernel extensions should not use it directly, + * instead using @link OSDynamicCast OSDynamicCast@/link or + * @link OSCheckTypeInst OSCheckTypeInst@/link. + */ + static OSMetaClassBase * checkMetaCastWithName( + const OSSymbol * className, + const OSMetaClassBase * object); + + /*! + * @function checkMetaCastWithName + * + * @abstract + * Search the metaclass inheritance hierarchy by name for an object instance. + * + * @param className The name of the desired class or superclass. + * @param object The object whose metaclass begins the search. + * + * @result + * object if it's derived from className; + * NULL otherwise. + * + * @discussion + * Kernel extensions should not use this function directly, + * instead using @link OSDynamicCast OSDynamicCast@/link or + * @link OSCheckTypeInst OSCheckTypeInst@/link. + */ + static OSMetaClassBase * checkMetaCastWithName( + const OSString * className, + const OSMetaClassBase * object); + + /*! + * @function checkMetaCastWithName + * + * @abstract + * Search the metaclass inheritance hierarchy by name for an object instance. + * + * @param className The name of the desired class or superclass. + * @param object The object whose metaclass begins the search. + * + * @result + * object if it's derived from className; + * NULL otherwise. + * + * @discussion + * Kernel extensions should not use this function directly, + * instead using @link OSDynamicCast OSDynamicCast@/link or + * @link OSCheckTypeInst OSCheckTypeInst@/link. + */ + static OSMetaClassBase * checkMetaCastWithName( + const char * className, + const OSMetaClassBase * object); + + + /*! + * @function instanceConstructed + * + * @abstract + * Counts the instances of the class managed by this metaclass. + * + * @discussion + * Not for use by kernel extensions. + * + * Every non-abstract class that inherits from OSObject + * has a default constructor that calls it's own metaclass's + * instanceConstructed function. + * This constructor is defined by the + * @link + * OSDefineMetaClassAndStructors + * OSDefineMetaClassAndStructors@/link + * macro that all OSObject subclasses must use. + * + * If a class's instance count goes from 0 to 1--that is, + * upon the creation of the first instance of that class--the + * superclass's instance count is also incremented. + * This propagates reference counts up the inheritance chain so that + * superclasses are counted as "in use" when subclasses have instances. + */ + void instanceConstructed() const; -/*! @function checkMetaCast - @abstract Ask a OSMetaClass instance if the given object is either an instance of it or an instance of a subclass of it. - @param check Pointer of object to introspect. - @result check parameter if cast valid, 0 otherwise. */ - OSMetaClassBase *checkMetaCast(const OSMetaClassBase *check) const; + /*! + * @function instanceDestructed + * + * @abstract + * Counts the instances of the class managed by this metaclass. + * + * @discussion + * Every non-abstract class that inherits from OSObject + * has a default destructor that calls it's own metaclass's + * instanceDestructed function. + * This constructor is defined by the + * @link OSDefineMetaClassAndStructors OSDefineMetaClassAndStructors@/link + * macro that all OSObject subclasses must use. + * + * If a class's instance count goes from 1 to 0--that is, + * upon the destruction of the last instance of that class--the + * superclass's instance count is also decremented. + * This reduces "in use" counts from superclasses when their subclasses + * no longer have instances. + */ + void instanceDestructed() const; -/*! @function getInstanceCount - @abstract How many instances of the class have been created. - @result Count of the number of instances. */ + /*! + * @function checkMetaCast + * + * @abstract + * Check whether a given object is an instance of the receiving + * metaclass's class or one derived from it. + * + * @param object The object to check for inheritance. + * + * @result + * object if it is derived from the receiver's class, + * NULL if not. + */ + OSMetaClassBase * checkMetaCast(const OSMetaClassBase * object) const; + + + /*! + * @function getInstanceCount + * + * @abstract + * Returns the number of existing instances of the metaclass's class. + * + * @result + * The number of existing instances of the metaclass's class, + * plus 1 for each subclass with any instance. + */ unsigned int getInstanceCount() const; -/*! @function getSuperClass - @abstract 'Get'ter for the super class. - @result Pointer to superclass, chain ends with 0 for OSObject. */ - const OSMetaClass *getSuperClass() const; - -/*! @function getKmodName - @abstract 'Get'ter for the name of the kmod. - @result OSSymbol representing the kmod name. */ - const OSSymbol *getKmodName() const; - -/*! @function getClassName - @abstract 'Get'ter for class name. - @result cString of the class name. */ - const char *getClassName() const; - -/*! @function getClassSize - @abstract 'Get'ter for sizeof(class). - @result sizeof of class that this OSMetaClass instance represents. */ + /*! + * @function getSuperClass + * + * @abstract + * Returns the super-metaclass of the receiver. + * + * @result + * Returns a pointer to the super-metaclass of the receiving + * OSMetaClass, or NULL for OSObject's metaclass. + */ + const OSMetaClass * getSuperClass() const; + + + /*! + * @function getKmodName + * + * @abstract + * Returns the bundle identifier of the kernel extension + * that defines this metaclass. + * + * @result + * The bundle identifier of the kernel extension that defines this metaclass. + * + * @discussion + * "Kmod" is an older term for kernel extension. + */ + const OSSymbol * getKmodName() const; + + + /*! + * @function getClassName + * + * @abstract + * Returns the name of the C++ class managed by this metaclass. + * + * @result + * Returns the name of the C++ class managed by this metaclass. + */ + const char * getClassName() const; + + + /*! + * @function getClassSize + * + * @abstract + * Returns the allocation size of the C++ class managed by this metaclass. + * + * @result + * The allocation size of the C++ class managed by this metaclass. + */ unsigned int getClassSize() const; -/*! @function alloc - @abstract Allocate an instance of the class that this OSMetaClass instance represents. - @discussion This alloc function is analogous to the old ObjC class alloc method. Typically not used by clients as the static function allocClassWithName is more generally useful. Infact that function is implemented in terms of this virtual function. All subclass's of OSMetaClass must implement this function but that is what the OSDefineMetaClassAndStructor's families of macros does for the developer automatically. - @result Pointer to a new object with a retain count of 1. */ - virtual OSObject *alloc() const = 0; - -/*! @function OSDeclareCommonStructors - @abstract Basic helper macro for the OSDeclare for Default and Abstract macros, qv. DO NOT USE. - @param className Name of class. NO QUOTES. */ -#define OSDeclareCommonStructors(className) \ - private: \ - static const OSMetaClass * const superClass; \ - public: \ - static const OSMetaClass * const metaClass; \ - static class MetaClass : public OSMetaClass { \ - public: \ - MetaClass(); \ - virtual OSObject *alloc() const; \ - } gMetaClass; \ - friend class className ::MetaClass; \ - virtual const OSMetaClass * getMetaClass() const; \ - protected: \ - className (const OSMetaClass *); \ - virtual ~ className () - - -/*! @function OSDeclareDefaultStructors - @abstract One of the macro's used in the class declaration of all subclasses of OSObject, declares runtime type information data and interfaces. - @discussion Macro used in the class declaration all subclasses of OSObject, declares runtime type information data and interfaces. By convention it should be 'called' immediately after the opening brace in a class declaration. It leaves the current privacy state as 'protected:'. - @param className Name of class. NO QUOTES. */ -#define OSDeclareDefaultStructors(className) \ - OSDeclareCommonStructors(className); \ - public: \ - className (); \ + + /*! + * @function alloc + * + * @abstract + * Allocates an instance of the C++ class managed by this metaclass. + * + * @result + * A pointer to the newly allocated, uninitialized instance, + * with a retain count of 1; NULL on allocation failure. + * + * @discussion + * This function is automatically created by the metaclass-registration macros + * to enable dynamic instance allocation. + */ + virtual OSObject * alloc() const = 0; + + + /* Not to be included in headerdoc. + * + * @define OSDeclareCommonStructors + * @hidecontents + * + * @abstract + * Helper macro for for the standard metaclass-registration macros. + * DO NOT USE. + * + * @param className The name of the C++ class, as a raw token, + * not a string or macro. + */ +#define OSDeclareCommonStructors(className) \ + private: \ + static const OSMetaClass * const superClass; \ + public: \ + static const OSMetaClass * const metaClass; \ + static class MetaClass : public OSMetaClass { \ + public: \ + MetaClass(); \ + virtual OSObject *alloc() const; \ + } gMetaClass; \ + friend class className ::MetaClass; \ + virtual const OSMetaClass * getMetaClass() const; \ + protected: \ + className (const OSMetaClass *); \ + virtual ~ className () + + + /*! + * @define OSDeclareDefaultStructors + * @hidecontents + * + * @abstract + * Declares run-time type information and functions + * for a concrete Libkern C++ class. + * + * @param className The name of the C++ class, as a raw token, + * not a string or macro. + * + * @discussion + * Concrete Libkern C++ classes should "call" this macro + * immediately after the opening brace in a class declaration. + * It leaves the current privacy state as protected:. + */ +#define OSDeclareDefaultStructors(className) \ + OSDeclareCommonStructors(className); \ + public: \ + className (); \ protected: -/*! @function OSDeclareAbstractStructors - @abstract One of the macro's used in the class declaration of all subclasses of OSObject, declares runtime type information data and interfaces. - @discussion This macro is used when the class being declared has one or more '= 0' pure virtual methods and thus it is illegal to create an instance of this class. It leaves the current privacy state as 'protected:'. - @param className Name of class. NO QUOTES. */ -#define OSDeclareAbstractStructors(className) \ - OSDeclareCommonStructors(className); \ - private: \ - className (); /* Make primary constructor private in abstract */ \ + /*! + * @define OSDeclareAbstractStructors + * @hidecontents + * + * @abstract + * Declares run-time type information and functions + * for an abstract Libkern C++ class. + * + * @param className The name of the C++ class, as a raw token, + * not a string or macro. + * + * @discussion + * Abstract Libkern C++ classes--those with at least one + * pure virtual method--should "call" this macro + * immediately after the opening brace in a class declaration. + * It leaves the current privacy state as protected:. + */ +#define OSDeclareAbstractStructors(className) \ + OSDeclareCommonStructors(className); \ + private: \ + className (); /* Make primary constructor private in abstract */ \ protected: -/*! @function OSDefineMetaClassWithInit - @abstract Basic helper macro for the OSDefineMetaClass for the default and Abstract macros, qv. DO NOT USE. - @param className Name of class. NO QUOTES and NO MACROS. - @param superClassName Name of super class. NO QUOTES and NO MACROS. - @param init Name of a function to call after the OSMetaClass is constructed. */ -#define OSDefineMetaClassWithInit(className, superClassName, init) \ - /* Class global data */ \ - className ::MetaClass className ::gMetaClass; \ - const OSMetaClass * const className ::metaClass = \ - & className ::gMetaClass; \ - const OSMetaClass * const className ::superClass = \ - & superClassName ::gMetaClass; \ - /* Class member functions */ \ - className :: className(const OSMetaClass *meta) \ - : superClassName (meta) { } \ - className ::~ className() { } \ - const OSMetaClass * className ::getMetaClass() const \ - { return &gMetaClass; } \ - /* The ::MetaClass constructor */ \ - className ::MetaClass::MetaClass() \ - : OSMetaClass(#className, className::superClass, sizeof(className)) \ + /*! + * @define OSDeclareFinalStructors + * @hidecontents + * + * @abstract + * Declares run-time type information and functions + * for a final (non-subclassable) Libkern C++ class. + * + * @param className The name of the C++ class, as a raw token, + * not a string or macro. + * + * @discussion + * Final Libkern C++ classes--those that do not allow subclassing--should + * "call" this macro immediately after the opening brace in a class declaration. + * (Final classes in the kernel may actually have subclasses in the kernel, + * but kexts cannot define any subclasses of a final class.) + * It leaves the current privacy state as protected:. + * + * Note: If the class is exported by a pseudokext (symbol set), + * the final symbol generated by this macro must be exported + * for the final-class attribute to be enforced. + * + * Warning: Changing a class from "Default" to "Final" will break + * binary compatibility. + */ +#define OSDeclareFinalStructors(className) \ + OSDeclareDefaultStructors(className) \ + private: \ + void __OSFinalClass(void); \ + protected: + + + /* Not to be included in headerdoc. + * + * @define OSDefineMetaClassWithInit + * @hidecontents + * + * @abstract + * Helper macro for for the standard metaclass-registration macros. + * DO NOT USE. + * + * @param className The name of the C++ class, as a raw token, + * not a string or macro. + * @param superclassName The name of the superclass of the C++ class, + * as a raw token, + * not a string or macro. + * @param init A function to call in the constructor + * of the class's OSMetaClass. + */ +#define OSDefineMetaClassWithInit(className, superclassName, init) \ + /* Class global data */ \ + className ::MetaClass className ::gMetaClass; \ + const OSMetaClass * const className ::metaClass = \ + & className ::gMetaClass; \ + const OSMetaClass * const className ::superClass = \ + & superclassName ::gMetaClass; \ + /* Class member functions */ \ + className :: className(const OSMetaClass *meta) \ + : superclassName (meta) { } \ + className ::~ className() { } \ + const OSMetaClass * className ::getMetaClass() const \ + { return &gMetaClass; } \ + /* The ::MetaClass constructor */ \ + className ::MetaClass::MetaClass() \ + : OSMetaClass(#className, className::superClass, sizeof(className)) \ { init; } -/*! @function OSDefineAbstractStructors - @abstract Basic helper macro for the OSDefineMetaClass for the default and Abstract macros, qv. DO NOT USE. - @param className Name of class. NO QUOTES and NO MACROS. - @param superClassName Name of super class. NO QUOTES and NO MACROS. */ -#define OSDefineAbstractStructors(className, superClassName) \ + + /* Not to be included in headerdoc. + * + * @define OSDefineAbstractStructors + * @hidecontents + * + * @abstract + * Helper macro for for the standard metaclass-registration macros. + * DO NOT USE. + * + * @param className The name of the C++ class, as a raw token, + * not a string or macro. + * @param superclassName The name of the superclass of the C++ class, + * as a raw token, + * not a string or macro. + */ +#define OSDefineAbstractStructors(className, superclassName) \ OSObject * className ::MetaClass::alloc() const { return 0; } -/*! @function OSDefineDefaultStructors - @abstract Basic helper macro for the OSDefineMetaClass for the default and Abstract macros, qv. DO NOT USE. - @param className Name of class. NO QUOTES and NO MACROS. - @param superClassName Name of super class. NO QUOTES and NO MACROS. */ -#define OSDefineDefaultStructors(className, superClassName) \ - OSObject * className ::MetaClass::alloc() const \ - { return new className; } \ - className :: className () : superClassName (&gMetaClass) \ - { gMetaClass.instanceConstructed(); } - - -/*! @function OSDefineMetaClassAndAbstractStructorsWithInit - @abstract Primary definition macro for all abstract classes that a subclasses of OSObject. - @discussion Define an OSMetaClass subclass and the primary constructors and destructors for a subclass of OSObject that is an abstract class. In general this 'function' is 'called' at the top of the file just before the first function is implemented for a particular class. Once the OSMetaClass has been constructed, at load time, call the init routine. NB you can not rely on the order of execution of the init routines. - @param className Name of class. NO QUOTES and NO MACROS. - @param superClassName Name of super class. NO QUOTES and NO MACROS. - @param init Name of a function to call after the OSMetaClass is constructed. */ -#define OSDefineMetaClassAndAbstractStructorsWithInit(className, superClassName, init) \ - OSDefineMetaClassWithInit(className, superClassName, init) \ - OSDefineAbstractStructors(className, superClassName) - -/*! @function OSDefineMetaClassAndStructorsWithInit - @abstract See OSDefineMetaClassAndStructors - @discussion Define an OSMetaClass subclass and the primary constructors and destructors for a subclass of OSObject that isn't an abstract class. In general this 'function' is 'called' at the top of the file just before the first function is implemented for a particular class. Once the OSMetaClass has been constructed, at load time, call the init routine. NB you can not rely on the order of execution of the init routines. - @param className Name of class. NO QUOTES and NO MACROS. - @param superClassName Name of super class. NO QUOTES and NO MACROS. - @param init Name of a function to call after the OSMetaClass is constructed. */ -#define OSDefineMetaClassAndStructorsWithInit(className, superClassName, init) \ - OSDefineMetaClassWithInit(className, superClassName, init) \ - OSDefineDefaultStructors(className, superClassName) - -/* Helpers */ -/*! @function OSDefineMetaClass - @abstract Define an OSMetaClass instance, used for backward compatiblility only. - @param className Name of class. NO QUOTES and NO MACROS. - @param superClassName Name of super class. NO QUOTES and NO MACROS. */ -#define OSDefineMetaClass(className, superClassName) \ - OSDefineMetaClassWithInit(className, superClassName, ) - -/*! @function OSDefineMetaClassAndStructors - @abstract Define an OSMetaClass subclass and the runtime system routines. - @discussion Define an OSMetaClass subclass and the primary constructors and destructors for a subclass of OSObject that isn't an abstract class. In general this 'function' is 'called' at the top of the file just before the first function is implemented for a particular class. - @param className Name of class. NO QUOTES and NO MACROS. - @param superClassName Name of super class. NO QUOTES and NO MACROS. */ -#define OSDefineMetaClassAndStructors(className, superClassName) \ - OSDefineMetaClassAndStructorsWithInit(className, superClassName, ) - -/*! @function OSDefineMetaClassAndAbstractStructors - @abstract Define an OSMetaClass subclass and the runtime system routines. - @discussion Define an OSMetaClass subclass and the primary constructors and destructors for a subclass of OSObject that is an abstract class. In general this 'function' is 'called' at the top of the file just before the first function is implemented for a particular class. - @param className Name of class. NO QUOTES and NO MACROS. - @param superClassName Name of super class. NO QUOTES and NO MACROS. */ -#define OSDefineMetaClassAndAbstractStructors(className, superClassName) \ - OSDefineMetaClassAndAbstractStructorsWithInit (className, superClassName, ) + + /* Not to be included in headerdoc. + * + * @define OSDefineDefaultStructors + * @hidecontents + * + * @abstract + * Helper macro for for the standard metaclass-registration macros. + * DO NOT USE. + * + * @param className The name of the C++ class, as a raw token, + * not a string or macro. + * @param superclassName The name of the superclass of the C++ class, + * as a raw token, + * not a string or macro. + */ +#define OSDefineDefaultStructors(className, superclassName) \ + OSObject * className ::MetaClass::alloc() const \ + { return new className; } \ + className :: className () : superclassName (&gMetaClass) \ + { gMetaClass.instanceConstructed(); } + + /* Not to be included in headerdoc. + * + * @define OSDefineDefaultStructors + * @hidecontents + * + * @abstract + * Helper macro for for the standard metaclass-registration macros. + * DO NOT USE. + * + * @param className The name of the C++ class, as a raw token, + * not a string or macro. + * @param superclassName The name of the superclass of the C++ class, + * as a raw token, + * not a string or macro. + */ +#define OSDefineFinalStructors(className, superclassName) \ + OSDefineDefaultStructors(className, superclassName) \ + void className ::__OSFinalClass(void) { } + + + /* Not to be included in headerdoc. + * + * @define OSDefineMetaClassAndStructorsWithInit + * @hidecontents + * + * @abstract + * Helper macro for for the standard metaclass-registration macros. + * DO NOT USE. + * + * @param className The name of the C++ class, as a raw token, + * not a string or macro. + * @param superclassName The name of the superclass of the C++ class, + * as a raw token, + * not a string or macro. + * @param init A function to call in the constructor + * of the class's OSMetaClass. + */ +#define OSDefineMetaClassAndStructorsWithInit(className, superclassName, init) \ + OSDefineMetaClassWithInit(className, superclassName, init) \ + OSDefineDefaultStructors(className, superclassName) + + + /* Not to be included in headerdoc. + * + * @define OSDefineMetaClassAndAbstractStructorsWithInit + * @hidecontents + * + * @abstract + * Helper macro for for the standard metaclass-registration macros. + * DO NOT USE. + * + * @param className The name of the C++ class, as a raw token, + * not a string or macro. + * @param superclassName The name of the superclass of the C++ class, + * as a raw token, + * not a string or macro. + * @param init A function to call in the constructor + * of the class's OSMetaClass. + */ +#define OSDefineMetaClassAndAbstractStructorsWithInit(className, superclassName, init) \ + OSDefineMetaClassWithInit(className, superclassName, init) \ + OSDefineAbstractStructors(className, superclassName) + + + /* Not to be included in headerdoc. + * + * @define OSDefineMetaClassAndFinalStructorsWithInit + * @hidecontents + * + * @abstract + * Helper macro for for the standard metaclass-registration macros. + * DO NOT USE. + * + * @param className The name of the C++ class, as a raw token, + * not a string or macro. + * @param superclassName The name of the superclass of the C++ class, + * as a raw token, + * not a string or macro. + * @param init A function to call in the constructor + * of the class's OSMetaClass. + */ +#define OSDefineMetaClassAndFinalStructorsWithInit(className, superclassName, init) \ + OSDefineMetaClassWithInit(className, superclassName, init) \ + OSDefineFinalStructors(className, superclassName) + + + /* Helpers */ + + /* Not to be included in headerdoc. + * + * @define OSDefineMetaClass + * @hidecontents + * + * @abstract + * Helper macro for for the standard metaclass-registration macros. + * DO NOT USE. + * + * @param className The name of the C++ class, as a raw token, + * not a string or macro. + * @param superclassName The name of the superclass of the C++ class, + * as a raw token, + * not a string or macro. + * @param init A function to call in the constructor + * of the class's OSMetaClass. + */ +#define OSDefineMetaClass(className, superclassName) \ + OSDefineMetaClassWithInit(className, superclassName, ) + + + /*! + * @define OSDefineMetaClassAndStructors + * @hidecontents + * + * @abstract + * Defines an OSMetaClass and associated routines + * for a concrete Libkern C++ class. + * + * @param className The name of the C++ class, as a raw token, + * not a string or macro. + * @param superclassName The name of the superclass of the C++ class, + * as a raw token, + * not a string or macro. + * + * @discussion + * Concrete Libkern C++ classes should "call" this macro + * at the beginning of their implementation files, + * before any function implementations for the class. + */ +#define OSDefineMetaClassAndStructors(className, superclassName) \ + OSDefineMetaClassAndStructorsWithInit(className, superclassName, ) + + + /*! + * @define OSDefineMetaClassAndAbstractStructors + * @hidecontents + * + * @abstract + * Defines an OSMetaClass and associated routines + * for an abstract Libkern C++ class. + * + * @param className The name of the C++ class, as a raw token, + * not a string or macro. + * @param superclassName The name of the superclass of the C++ class, + * as a raw token, + * not a string or macro. + * + * @discussion + * Abstract Libkern C++ classes--those with at least one + * pure virtual method--should "call" this macro + * at the beginning of their implementation files, + * before any function implementations for the class. + */ +#define OSDefineMetaClassAndAbstractStructors(className, superclassName) \ + OSDefineMetaClassAndAbstractStructorsWithInit (className, superclassName, ) + + + /*! + * @define OSDefineMetaClassAndFinalStructors + * @hidecontents + * + * @abstract + * Defines an OSMetaClass and associated routines + * for a final (non-subclassable) Libkern C++ class. + * + * @param className The name of the C++ class, as a raw token, + * not a string or macro. + * @param superclassName The name of the superclass of the C++ class, + * as a raw token, + * not a string or macro. + * + * @discussion + * Final Libkern C++ classes--those that do not allow + * subclassing--should "call" this macro at the beginning + * of their implementation files, + * before any function implementations for the class. + * (Final classes in the kernel may actually have subclasses in the kernel, + * but kexts cannot define any subclasses of a final class.) + * + * Note: If the class is exported by a pseudokext (symbol set), + * the final symbol generated by this macro must be exported + * for the final-class attribute to be enforced. + * + * Warning: Changing a class from "Default" to "Final" will break + * binary compatibility. + */ +#define OSDefineMetaClassAndFinalStructors(className, superclassName) \ + OSDefineMetaClassAndFinalStructorsWithInit(className, superclassName, ) + // Dynamic vtable patchup support routines and types void reservedCalled(int ind) const; -#define OSMetaClassDeclareReservedUnused(classname, index) \ - private: \ - APPLE_KEXT_PAD_METHOD void _RESERVED ## classname ## index () - -#define OSMetaClassDeclareReservedUsed(classname, index) -#define OSMetaClassDefineReservedUnused(classname, index) \ -void classname ::_RESERVED ## classname ## index () \ + /*! + * @define OSMetaClassDeclareReservedUnused + * @hidecontents + * + * @abstract + * Reserves vtable space for new virtual functions + * in a Libkern C++ class. + * + * @param className The name of the C++ class, as a raw token, + * not a string or macro. + * @param index The numeric index of the vtable slot, + * as a raw constant, beginning from 0. + * + * @discussion + * Libkern C++ classes in kernel extensions that can be used as libraries + * can provide for backward compatibility by declaring a number + * of reserved vtable slots + * that can be replaced with new functions as they are added. + * Each reserved declaration must be accompanied in the implementation + * by a corresponding reference to + * @link OSMetaClassDefineReservedUnused + * OSMetaClassDefineReservedUnused@/link. + * + * When replacing a reserved slot, change the macro from "Unused" + * to "Used" to document the fact that the slot used to be reserved, + * and declare the new function immediately after the "Used" macro + * to preserve vtable ordering. + * See + * @link OSMetaClassDeclareReservedUsed + * OSMetaClassDeclareReservedUsed@/link. + */ +#define OSMetaClassDeclareReservedUnused(className, index) \ + private: \ + APPLE_KEXT_PAD_METHOD void _RESERVED ## className ## index () + + + /*! + * @define OSMetaClassDeclareReservedUsed + * @hidecontents + * + * @abstract + * Documents use of reserved vtable space for new virtual functions + * in a Libkern C++ class. + * + * @param className The name of the C++ class, as a raw token, + * not a string or macro. + * @param index The numeric index of the vtable slot, + * as a raw constant, beginning from 0. + * + * @discussion + * This macro evaluates to nothing, and is used to document reserved + * vtable slots as they are filled. + * See + * @link OSMetaClassDeclareReservedUnused + * OSMetaClassDeclareReservedUnused@/link. + */ +#define OSMetaClassDeclareReservedUsed(className, index) + + + /*! + * @define OSMetaClassDefineReservedUnused + * @hidecontents + * + * @abstract + * Defines a reserved vtable slot for a Libkern C++ class. + * + * @param className The name of the C++ class, as a raw token, + * not a string or macro. + * @param index The numeric index of the vtable slot, + * as a raw constant, beginning from 0. + * + * @discussion + * Libkern C++ classes in kernel extensions that can be used as libraries + * can provide for backward compatibility by declaring a number + * of reserved vtable slots + * that can be replaced with new functions as they are added. + * Each reserved defintion accompanies + * a corresponding declaration created with + * @link OSMetaClassDeclareReservedUnused + * OSMetaClassDeclareReservedUnused@/link. + * + * This macro is used in the implementation file + * to provide a placeholder definition for the reserved vtable slot, + * as a function that calls panic with an error message. + * + * When replacing a reserved slot, change the macro from "Unused" + * to "Used" to document the fact that the slot used to be reserved, + * and declare the new function immediately after the "Used" macro + * to preserve vtable ordering. + * See + * @link OSMetaClassDefineReservedUsed + * OSMetaClassDefineReservedUsed@/link. + */ +#define OSMetaClassDefineReservedUnused(className, index) \ +void className ::_RESERVED ## className ## index () \ { APPLE_KEXT_PAD_IMPL(index); } -#define OSMetaClassDefineReservedUsed(classname, index) - // IOKit debug internal routines. + /*! + * @define OSMetaClassDefineReservedUsed + * @hidecontents + * + * @abstract + * Reserves vtable space for new virtual functions in a Libkern C++ class. + * + * @param className The name of the C++ class, as a raw token, + * not a string or macro. + * @param index The numeric index of the vtable slot, + * as a raw constant, beginning from 0. + * + * @discussion + * This macro evaluates to nothing, and is used to document reserved + * vtable slots as they are filled. + * See + * @link OSMetaClassDefineReservedUnused + * OSMetaClassDefineReservedUnused@/link. + */ +#define OSMetaClassDefineReservedUsed(className, index) + + // I/O Kit debug internal routines. static void printInstanceCounts(); - static void serializeClassDictionary(OSDictionary *dict); + static void serializeClassDictionary(OSDictionary * dict); private: // Obsolete APIs - static OSDictionary *getClassDictionary(); - virtual bool serialize(OSSerialize *s) const; + static OSDictionary * getClassDictionary(); + virtual bool serialize(OSSerialize * serializer) const; // Virtual Padding functions for MetaClass's OSMetaClassDeclareReservedUnused(OSMetaClass, 0); diff --git a/libkern/libkern/c++/OSNumber.h b/libkern/libkern/c++/OSNumber.h index df90e9ddd..0b206784e 100644 --- a/libkern/libkern/c++/OSNumber.h +++ b/libkern/libkern/c++/OSNumber.h @@ -34,9 +34,40 @@ #include /*! - @class OSNumber - @abstract A container class for numeric values. -*/ + * @header + * + * @abstract + * This header declares the OSNumber container class. + */ + + +/*! + * @class OSNumber + * + * @abstract + * OSNumber wraps an integer value in a C++ object + * for use in Libkern collections. + * + * @discussion + * OSNumber represents an integer of 8, 16, 32, or 64 bits + * as a Libkern C++ object. + * OSNumber objects are mutable: you can add to or set their values. + * + * Use Restrictions + * + * With very few exceptions in the I/O Kit, all Libkern-based C++ + * classes, functions, and macros are unsafe + * to use in a primary interrupt context. + * Consult the I/O Kit documentation related to primary interrupts + * for more information. + * + * OSNumber provides no concurrency protection; + * it's up to the usage context to provide any protection necessary. + * Some portions of the I/O Kit, such as + * @link //apple_ref/doc/class/IORegistryEntry IORegistryEntry@/link, + * handle synchronization via defined member functions for setting + * properties. + */ class OSNumber : public OSObject { OSDeclareDefaultStructors(OSNumber) @@ -47,126 +78,340 @@ class OSNumber : public OSObject struct ExpansionData { }; - /*! @var reserved - Reserved for future use. (Internal use only) */ - ExpansionData *reserved; + /* Reserved for future use. (Internal use only) */ + ExpansionData * reserved; public: - /*! - @function withNumber - @abstract A static constructor function to create and initialize an instance of OSNumber with a given value. - @param value The numeric integer value. - @param numberOfBits The number of bit required to represent the value. - @result Returns an instance of OSNumber or 0 if an error occurred. - */ - static OSNumber *withNumber(unsigned long long value, - unsigned int numberOfBits); - /*! - @function withNumber - @abstract A static constructor function to create and initialize an instance of OSNumber with a given value represented as a simple c-string. - @discussion This function does not work on IOKit versions prior to 8.0 (prior to 10.4). For IOKit version 8.0 and later, it works but is limited to parsing unsigned 32 bit quantities The format of the c-string may be decimal, hexadecimal ("0x" prefix), binary ("0b" prefix, or octal ("0" prefix). - @param value A c-string representing a numeric value. - @param numberOfBits The number of bit required to represent the value. - @result Returns an instance of OSNumber or 0 if an error occurred. - */ - static OSNumber *withNumber(const char *value, unsigned int numberOfBits); - - /*! - @function init - @abstract A member function to initialize an instance of OSNumber. - @param value An integer value. - @param numberOfBits The number of bit required to represent the value. - @result Returns true if instance was successfully initialized, false otherwise. - */ - virtual bool init(unsigned long long value, unsigned int numberOfBits); - /*! - @function init - @abstract A member function to initialize an instance of OSNumber. - @param value A c-string representation of a numeric value. - @param numberOfBits The number of bit required to represent the value. - @result Returns true if instance was successfully initialized, false otherwise. - */ - virtual bool init(const char *value, unsigned int numberOfBits); - /*! - @function free - @abstract Releases and deallocates resources created by the OSNumber instances. - @discussion This function should not be called directly, use release() instead. + + + /*! + * @function withNumber + * + * @abstract + * Creates and initializes an instance of OSNumber + * with an integer value. + * + * @param value The numeric integer value for the OSNumber to store. + * @param numberOfBits The number of bits to limit storage to. + * + * @result + * An instance of OSNumber with a reference count of 1; + * NULL on failure. + * + * @discussion + * value is masked to the provided numberOfBits + * when the OSNumber object is initialized. + * + * You can change the value of an OSNumber later + * using @link setValue setValue@/link + * and @link addValue addValue@/link, + * but you can't change the bit size. + */ + static OSNumber * withNumber( + unsigned long long value, + unsigned int numberOfBits); + + + /*! + * @function withNumber + * + * @abstract + * Creates and initializes an instance of OSNumber + * with an unsigned integer value represented as a C string. + * + * @param valueString A C string representing a numeric value + * for the OSNumber to store. + * @param numberOfBits The number of bits to limit storage to. + * + * @result + * An instance of OSNumber with a reference count of 1; + * NULL on failure. + * + * @discussion + * This function does not work in I/O Kit versions prior to 8.0 (Mac OS X 10.4). + * In I/O Kit version 8.0 and later, it works + * but is limited to parsing unsigned 32 bit quantities. + * The format of the C string may be decimal, hexadecimal ("0x" prefix), + * binary ("0b" prefix), or octal ("0" prefix). + * + * The parsed value is masked to the provided numberOfBits + * when the OSNumber object is initialized. + * + * You can change the value of an OSNumber later + * using @link setValue setValue@/link + * and @link addValue addValue@/link, + * but you can't change the bit size. + */ + static OSNumber * withNumber( + const char * valueString, + unsigned int numberOfBits); + + + /*! + * @function init + * + * @abstract + * Initializes an instance of OSNumber with an integer value. + * + * @param value The numeric integer value for the OSNumber to store. + * @param numberOfBits The number of bits to limit storage to. + * + * @result + * true if initialization succeeds, + * false on failure. + * + * @discussion + * Not for general use. Use the static instance creation method + * @link + * //apple_ref/cpp/clm/OSNumber/withNumber/staticOSNumber*\/(constchar*,unsignedint) + * withNumber(unsigned long long, unsigned int)@/link + * instead. + */ + virtual bool init( + unsigned long long value, + unsigned int numberOfBits); + + + /*! + * @function init + * + * @abstract + * Initializes an instance of OSNumber + * with an unsigned integer value represented as a C string. + * + * @param valueString A C string representing a numeric value + * for the OSNumber to store. + * @param numberOfBits The number of bits to limit storage to. + * + * @result + * true if initialization succeeds, + * false on failure. + * + * @discussion + * Not for general use. Use the static instance creation method + * @link + * //apple_ref/cpp/clm/OSNumber/withNumber/staticOSNumber*\/(constchar*,unsignedint) + * withNumber(const char *, unsigned int)@/link + * instead. + */ + virtual bool init( + const char * valueString, + unsigned int numberOfBits); + + + /*! + * @function free + * + * @abstract + * Deallocates or releases any resources + * used by the OSNumber instance. + * + * @discussion + * This function should not be called directly; + * use + * @link + * //apple_ref/cpp/instm/OSObject/release/virtualvoid/() + * release@/link + * instead. */ virtual void free(); - /*! - @function numberOfBits - @abstract A member function which returns the number of bits used to represent the value. - @result Returns the number of bits required to represent the value. + + /*! + * @function numberOfBits + * + * @abstract + * Returns the number of bits used to represent + * the OSNumber object's integer value. + * + * @result + * The number of bits used to represent + * the OSNumber object's integer value. + * + * @discussion + * The number of bits is used to limit the stored value of the OSNumber. + * Any change to its value is performed as an unsigned long long + * and then truncated to the number of bits. */ virtual unsigned int numberOfBits() const; - /*! - @function numberOfBytes - @abstract A member function which returns the number of bytes used to represent the value. - @result Returns the number of bytes required to represent the value. + + + /*! + * @function numberOfBytes + * + * @abstract + * Returns the number of bytes used to represent + * the OSNumber object's integer value. + * + * @result + * The number of bytes used to represent + * the OSNumber object's integer value. + * See @link numberOfBits numberOfBits@/link. */ virtual unsigned int numberOfBytes() const; - /*! - @function unsigned8BitValue - @abstract A member function which returns its internal value as an 8-bit value. - @result Returns the internal value as an 8-bit value. + +// xx-review: should switch to explicitly-sized int types +// xx-review: but that messes up C++ mangled symbols :-( + + + /*! + * @function unsigned8BitValue + * + * @abstract + * Returns the OSNumber object's integer value + * cast as an unsigned 8-bit integer. + * + * @result + * The OSNumber object's integer value + * cast as an unsigned 8-bit integer. + * + * @discussion + * This function merely casts the internal integer value, + * giving no indication of truncation or other potential conversion problems. */ virtual unsigned char unsigned8BitValue() const; - /*! - @function unsigned16BitValue - @abstract A member function which returns its internal value as an 16-bit value. - @result Returns the internal value as an 16-bit value. + + + /*! + * @function unsigned16BitValue + * + * @abstract + * Returns the OSNumber object's integer value + * cast as an unsigned 16-bit integer. + * + * @result + * Returns the OSNumber object's integer value + * cast as an unsigned 16-bit integer. + * + * @discussion + * This function merely casts the internal integer value, + * giving no indication of truncation or other potential conversion problems. */ virtual unsigned short unsigned16BitValue() const; - /*! - @function unsigned32BitValue - @abstract A member function which returns its internal value as an 32-bit value. - @result Returns the internal value as an 32-bit value. + + + /*! + * @function unsigned32BitValue + * + * @abstract + * Returns the OSNumber object's integer value + * cast as an unsigned 32-bit integer. + * + * @result + * Returns the OSNumber object's integer value + * cast as an unsigned 32-bit integer. + * + * @discussion + * This function merely casts the internal integer value, + * giving no indication of truncation or other potential conversion problems. */ virtual unsigned int unsigned32BitValue() const; - /*! - @function unsigned64BitValue - @abstract A member function which returns its internal value as an 64-bit value. - @result Returns the internal value as an 64-bit value. + + + /*! + * @function unsigned64BitValue + * + * @abstract + * Returns the OSNumber object's integer value + * cast as an unsigned 64-bit integer. + * + * @result + * Returns the OSNumber object's integer value + * cast as an unsigned 64-bit integer. + * + * @discussion + * This function merely casts the internal integer value, + * giving no indication of truncation or other potential conversion problems. */ virtual unsigned long long unsigned64BitValue() const; - /*! - @function addValue - @abstract A member function which adds an integer value to the internal numeric value of the OSNumber object. - @param value The value to be added. +// xx-review: wow, there's no addNumber(OSNumber *)! + + /*! + * @function addValue + * + * @abstract + * Adds a signed integer value to the internal integer value + * of the OSNumber object. + * + * @param value The value to be added. + * + * @discussion + * This function adds values as 64-bit integers, + * but masks the result by the bit size + * (see @link numberOfBits numberOfBits@/link), + * so addition overflows will not necessarily + * be the same as for plain C integers. */ virtual void addValue(signed long long value); - /*! - @function setValue - @abstract Replaces the current internal numeric value of the OSNumber object by the value given. - @param value The new value for the OSNumber object. + + + /*! + * @function setValue + * + * @abstract + * Replaces the current internal integer value + * of the OSNumber object by the value given. + * + * @param value The new value for the OSNumber object, + * which is truncated by the bit size of the OSNumber object + * (see @link numberOfBits numberOfBits@/link). */ virtual void setValue(unsigned long long value); - /*! - @function isEqualTo - @abstract A member function to test the equality of two OSNumber objects. - @param integer The OSNumber object to be compared against the receiver. - @result Returns true if the two objects are equivalent, false otherwise. + + /*! + * @function isEqualTo + * + * @abstract + * Tests the equality of two OSNumber objects. + * + * @param aNumber The OSNumber to be compared against the receiver. + * + * @result + * true if the OSNumber objects are equal, + * false if not. + * + * @discussion + * Two OSNumber objects are considered equal + * if they represent the same C integer value. */ - virtual bool isEqualTo(const OSNumber *integer) const; - /*! - @function isEqualTo - @abstract A member function to test the equality of an arbitrary OSObject derived object and an OSNumber object. - @param obj The OSObject derived object to be compared to the receiver. - @result Returns true if the two objects are equivalent, false otherwise. + virtual bool isEqualTo(const OSNumber * aNumber) const; + + + /*! + * @function isEqualTo + * + * @abstract + * Tests the equality an OSNumber to an arbitrary object. + * + * @param anObject An object to be compared against the receiver. + * + * @result + * true if the objects are equal, + * false if not. + * + * @discussion + * An OSNumber is considered equal to another object if that object is + * derived from OSNumber and represents the same C integer value. */ - virtual bool isEqualTo(const OSMetaClassBase *obj) const; + virtual bool isEqualTo(const OSMetaClassBase * anObject) const; + - /*! - @function serialize - @abstract A member function which archives the receiver. - @param s The OSSerialize object. - @result Returns true if serialization was successful, false if not. + /*! + * @function serialize + * + * @abstract + * Archives the receiver into the provided + * @link //apple_ref/doc/class/OSSerialize OSSerialize@/link object. + * + * @param serializer The OSSerialize object. + * + * @result + * true if serialization succeeds, false if not. */ - virtual bool serialize(OSSerialize *s) const; + virtual bool serialize(OSSerialize * serializer) const; OSMetaClassDeclareReservedUnused(OSNumber, 0); diff --git a/libkern/libkern/c++/OSObject.h b/libkern/libkern/c++/OSObject.h index 82fae0a6a..cfd75269c 100644 --- a/libkern/libkern/c++/OSObject.h +++ b/libkern/libkern/c++/OSObject.h @@ -26,10 +26,10 @@ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ */ /* -Copyright (c) 1998 Apple Computer, Inc. All rights reserved. +Copyright (c) 1998 Apple Computer, Inc. All rights reserved. HISTORY - 1998-10-30 Godfrey van der Linden(gvdl) - Created + 1998-10-30 Godfrey van der Linden(gvdl) + Created */ #ifndef _LIBKERN_OSOBJECT_H #define _LIBKERN_OSOBJECT_H @@ -38,118 +38,384 @@ HISTORY class OSSymbol; class OSString; -/*! - @class OSObject : OSMetaClassBase - @abstract The root base class for Mac OS X kernel and just generally all-round useful class to have around. - @discussion -Defines the minimum functionality that an object can expect. Implements reference counting, type safe object casting, allocation primitives & serialisation among other functionality. This object is an abstract base class and can not be copied, nor can it be constructed by itself. - -

Construction

- -As Mac OS X's C++ is based upon Embedded C++ we have a problem with the typical C++ method of using constructors. Embedded C++ does not allow exceptions. This means that the standard constructors can not report a failure. Well obviously initialisation of a new object can fail so we have had to work around this language limitation. In the Mac OS X kernel we have chosen to break object construction into two phases. Phase one is the familiar C++ new operator, the only initialisation is the object has exactly one reference after creation. Once the new is called the client MUST call init and check it's return value. If the init call fails then the object MUST be immediately released. IOKit usually implements factory methods to make construction a one step process for clients. -

Reference Counting

-OSObject provides reference counting services using the $link retain(), $link release(), $link release(int when) and $link free() functions. The public interface to the reference counting is retain() & release(). release() is implemented as a simple call to release(1). The actual implementation of release(when) is a little subtle. If the current reference count is less than or equal to the 'when' parameter the object will call free on itself. -
-In general a subclass is expected to only override $link free(). It may also choose to override release() if the object has a circular retain count, see $link release(int when); - -

Runtime Type Information System

+/*! + * @header + * + * @abstract + * This header declares the OSObject class, + * which is the concrete root of the Libkern C++ class hierarchy. + */ + -The Mac OS X C++ implements a basic runtime type information system using meta class information and a number of macros, $link OSDynamicCast, $link OSTypeID, $link OSTypeIDInst, $link OSCheckTypeInst and $link OSMetaClass. -*/ +/*! + * @class OSObject + * + * @abstract + * OSObject is the concrete root class + * of the Libkern and I/O Kit C++ class hierarchy. + * + * @discussion + * OSObject defines the minimal functionality + * required of Libkern and I/O Kit C++ classes: + * tie-in to the run-time type information facility, + * the dynamic allocation/initialization paradigm, + * and reference counting. + * While kernel extensions are free to use their own C++ classes internally, + * any interaction they have with Libkern or the I/O Kit will require + * classes ultimately derived from OSObject. + * + * Run-Time Type Information + * + * OSObject is derived from the abstract root class + * @link //apple_ref/doc/class/OSMetaClassBase OSMetaClassBase@/link, + * which declares (and defines many of) the primitives + * on which the run-time type information facility is based. + * A parallel inheritance hierarchy of metaclass objects + * provides run-time introspection, including access to class names, + * inheritance, and safe type-casting. + * See @link //apple_ref/doc/class/OSMetaClass OSMetaClass@/link + * for more information. + * + * Dynamic Allocation/Initialization + * + * The kernel-resident C++ runtime does not support exceptions, + * so Libkern classes cannot use standard C++ object + * constructors and destructors, + * which use exceptions to report errors. + * To support error-handling during instance creation, then, + * OSObject separates object allocation from initialization. + * You can create a new OSObject-derived instance + * with the new operator, + * but this does nothing more than allocate memory + * and initialize the reference count to 1. + * Following this, you must call a designated initialization function + * and check its bool return value. + * If the initialization fails, + * you must immediately call + * @link + * //apple_ref/cpp/instm/OSObject/release/virtualvoid/() + * release@/link + * on the instance and handle the failure in whatever way is appropriate. + * Many Libkern and I/O Kit classes define static instance-creation functions + * (beginning with the word "with") + * to make construction a one-step process for clients. + * + * Reference Counting + * + * OSObject provides reference counting services using the + * @link + * //apple_ref/cpp/instm/OSObject/retain/virtualvoid/() + * retain@/link, + * @link + * //apple_ref/cpp/instm/OSObject/release/virtualvoid/() + * release()@/link, + * @link + * //apple_ref/cpp/instm/OSObject/release/virtualvoid/(int) + * release(int freeWhen)@/link + * and + * @link + * //apple_ref/cpp/instm/OSObject/free/virtualvoid/() + * free@/link + * functions. + * The public interface to the reference counting is + * @link + * //apple_ref/cpp/instm/OSObject/retain/virtualvoid/() + * retain@/link, + * and + * @link + * //apple_ref/cpp/instm/OSObject/release/virtualvoid/() + * release@/link; + * @link + * //apple_ref/cpp/instm/OSObject/release/virtualvoid/(int) + * release(int freeWhen)@/link + * is provided + * for objects that have internal retain cycles. + * + * In general, a subclass is expected to only override + * @link + * //apple_ref/cpp/instm/OSObject/free/virtualvoid/() + * free@/link. + * It may also choose to override + * @link + * //apple_ref/cpp/instm/OSObject/release/virtualvoid/(int) + * release(int freeWhen)@/link + * if the object has a circular retain count, as noted above. + * + * Use Restrictions + * + * With very few exceptions in the I/O Kit, all Libkern-based C++ + * classes, functions, and macros are unsafe + * to use in a primary interrupt context. + * Consult the I/O Kit documentation related to primary interrupts + * for more information. + * + * Concurrency Protection + * + * The basic features of OSObject are thread-safe. + * Most Libkern subclasses are not, and require locking or other protection + * if instances are shared between threads. + * I/O Kit driver objects are either designed for use within thread-safe contexts + * or designed to inherently be thread-safe. + * Always check the individual class documentation to see what + * steps are necessary for concurrent use of instances. + */ class OSObject : public OSMetaClassBase { OSDeclareAbstractStructors(OSObject) private: -/*! @var retainCount Number of references held on this instance. */ + /* Not to be included in headerdoc. + * + * @var retainCount Number of references held on this instance. + */ mutable int retainCount; protected: -/*! @function release - @abstract untagged release(when) mechansim. - @param when Pass through to taggedRelease. */ - virtual void release(int when) const; - -/*! @function taggedRelease - @abstract Primary implementation of the tagged release mechanism. - @discussion If $link retainCount <= the when argument then call $link free(). This indirect implementation of $link release allows the developer to break reference circularity. An example of this sort of problem is a parent/child mutual reference, either the parent or child can implement: void taggedRelease(tag) { taggedRelease(tag, 2); } thus breaking the cirularity. - @param when If retainCount == when then call free(). */ - virtual void taggedRelease(const void *tag, const int when) const; - -/*! @function init - @abstract Mac OS X kernel's primary mechanism for constructing objects. - @discussion Your responsibility as a subclass author is to override the init method of your parent. In general most of our implementations call ::init() before doing local initialisation, if the parent fails then return false immediately. If you have a failure during you local initialisation then return false. - @result OSObject::init Always returns true, but subclasses will return false on init failure. -*/ +// xx-review: seems not to be used, should we deprecate? + + /*! + * @function release + * + * @abstract + * Releases a reference to an object, + * freeing it immediately if the reference count + * drops below the specified threshold. + * + * @param freeWhen If decrementing the reference count makes it + * >= freeWhen, the object is immediately freed. + * + * @discussion + * If the receiver has freeWhen or fewer references + * after its reference count is decremented, + * it is immediately freed. + * + * This version of release + * can be used to break certain retain cycles in object graphs. + * In general, however, it should be avoided. + */ + virtual void release(int freeWhen) const; + + /*! + * @function taggedRelease + * + * @abstract + * Releases a tagged reference to an object, + * freeing it immediately if the reference count + * drops below the specified threshold. + * + * @param tag Used for tracking collection references. + * @param freeWhen If decrementing the reference count makes it + * >= freeWhen, the object is immediately freed. + * + * @discussion + * Kernel extensions should not use this function. + * It is for use by OSCollection and subclasses to track + * inclusion in collections. + * + * If the receiver has freeWhen or fewer references + * after its reference count is decremented, + * it is immediately freed. + * + * This version of release + * can be used to break certain retain cycles in object graphs. + * In general, however, it should be avoided. + */ + virtual void taggedRelease(const void * tag, const int freeWhen) const; + + + /*! + * @function init + * + * @abstract + * Initializes a newly-allocated object. + * + * @result + * true on success, false on failure. + * + * @discussion + * Classes derived from OSObject must override the primary init method + * of their parent. + * In general most implementations call + * super::init() + * before doing local initialisation. + * If the superclass call fails then return false immediately. + * If the subclass encounters a failure then it should return false. + */ virtual bool init(); -/*! @function free - @abstract The last reference is gone so clean up your resources. - @discussion Release all resources held by the object, then call your parent's free(). -

Caution: -
1> You can not assume that you have completed initialization before your free is called, so be very careful in your implementation. -
2> The implementation is OSObject::free() { delete this; } so do not call super::free() until just before you return. -
3> Free is not allowed to fail all resource must be released on completion. */ + /*! + * @function free + * + * @abstract + * Deallocates/releases resources held by the object. + * + * @discussion + * Classes derived from OSObject should override this function + * to deallocate or release all dynamic resources held by the instance, + * then call the superclass's implementation. + * + * Caution: + *
    + *
  1. You can not assume that you have completed initialization + * before free is called, + * so be very careful in your implementation.
  2. + *
  3. OSObject's implementation performs the C++ delete + * of the instance, so be sure that you call the superclass + * implementation last in your implementation.
  4. + *
  5. free must not fail; + * all resources must be deallocated or released on completion.
  6. + *
+ */ virtual void free(); -/*! @function operator delete - @abstract Release the 'operator new'ed memory. - @discussion Never attempt to delete an object that inherits from OSObject directly use $link release(). - @param mem pointer to block of memory - @param size size of block of memory -*/ - static void operator delete(void *mem, size_t size); + + /*! + * @function operator delete + * + * @abstract + * Frees the memory of the object itself. + * + * @param mem A pointer to the object's memory. + * @param size The size of the object's block of memory. + * + * @discussion + * Never use delete on objects derived from OSObject; + * use + * @link + * //apple_ref/cpp/instm/OSObject/release/virtualvoid/() + * release@/link + * instead. + */ + static void operator delete(void * mem, size_t size); public: -/*! @function operator new - @abstract Allocator for all objects that inherit from OSObject - @param size number of bytes to allocate - @result returns pointer to block of memory if available, 0 otherwise. -*/ - static void *operator new(size_t size); + /*! + * @function operator new + * + * @abstract + * Allocates memory for an instance of the class. + * + * @param size The number of bytes to allocate + * + * @result + * A pointer to block of memory if available, NULL otherwise. + */ + static void * operator new(size_t size); -/*! @function getRetainCount - @abstract How many times has this object been retained? - @result Current retain count -*/ + + /*! + * @function getRetainCount + * + * @abstract + * Returns the reference count of the object. + * + * @result + * The reference count of the object. + */ virtual int getRetainCount() const; -/*! @function retain - @abstract Retain a reference in this object. - @discussion Takes a reference that is NULL tagged. See taggedRetain(). -*/ + + /*! + * @function retain + * + * @abstract + * Retains a reference to the object. + * + * @discussion + * This function increments the reference count of the receiver by 1. + * If you need to maintain a reference to an object + * outside the context in which you received it, + * you should always retain it immediately. + */ virtual void retain() const; -/*! @function release - @abstract Release a reference to this object - @discussion Removes a reference that is NULL tagged. See taggedRelease(). -*/ + + /*! + * @function release + * + * @abstract + * Releases a reference to the object, + * freeing it immediately if the reference count drops to zero. + * + * @discussion + * This function decrements the reference count of the receiver by 1. + * If the reference count drops to zero, + * the object is immediately freed using + * @link + * //apple_ref/cpp/instm/OSObject/free/virtualvoid/() + * free@/link. + */ virtual void release() const; -/*! @function taggedRetain - @abstract Retain a tagged reference in this object. - @param tag Retain a reference on this object with this tag, see taggedRelease. -*/ - virtual void taggedRetain(const void *tag = 0) const; -/*! @function taggedRelease - @abstract Release a tagged reference to this object - @param tag Remove a reference on this object with this tag, if an attempt is made to remove a reference that isn't associated with this tag the kernel will panic immediately. -*/ - virtual void taggedRelease(const void *tag = 0) const; + /*! + * @function taggedRetain + * + * @abstract + * Retains a reference to the object with an optional + * tag used for reference-tracking. + * + * @param tag Used for tracking collection references. + * + * @discussion + * Kernel extensions should not use this function. + * It is for use by OSCollection and subclasses to track + * inclusion in collections. + * + * If you need to maintain a reference to an object + * outside the context in which you received it, + * you should always retain it immediately. + */ + virtual void taggedRetain(const void * tag = 0) const; -/*! @function serialize - @abstract - @discussion - @param s - @result -*/ - virtual bool serialize(OSSerialize *s) const; + + /*! + * @function taggedRelease + * + * @abstract + * Releases a tagged reference to an object, + * freeing it immediately if the reference count + * drops to zero. + * + * @param tag Used for tracking collection references. + * + * @discussion + * Kernel extensions should not use this function. + * It is for use by OSCollection and subclasses to track + * inclusion in collections. + */ + virtual void taggedRelease(const void * tag = 0) const; + // xx-review: used to say, "Remove a reference on this object with this tag, if an attempt is made to remove a reference that isn't associated with this tag the kernel will panic immediately", but I don't see that in the implementation + + + /*! + * @function serialize + * + * @abstract + * Overridden by subclasses to archive the receiver into the provided + * @link //apple_ref/doc/class/OSSerialize OSSerialize@/link object. + * + * @param serializer The OSSerialize object. + * + * @result + * true if serialization succeeds, false if not. + * + * @discussion + * OSObject's implementation writes a string indicating that + * the class of the object receiving the function call + * is not serializable. + * Subclasses that can meaningfully encode themselves + * in I/O Kit-style property list XML can override this function to do so. + * See + * @link //apple_ref/doc/class/OSSerialize OSSerialize@/link + * for more information. + */ + virtual bool serialize(OSSerialize * serializer) const; // Unused Padding OSMetaClassDeclareReservedUnused(OSObject, 0); diff --git a/libkern/libkern/c++/OSOrderedSet.h b/libkern/libkern/c++/OSOrderedSet.h index c457dbf6a..64609d863 100644 --- a/libkern/libkern/c++/OSOrderedSet.h +++ b/libkern/libkern/c++/OSOrderedSet.h @@ -35,241 +35,731 @@ class OSOffset; /*! - @class OSOrderedSet - @abstract A collection class for maintaining and sorting a set of OSMetaClassBase derived objects. - @discussion - An instance of OSOrderedSet maintains and sorts a collection of OSMetaClassBase derived objects. The sorting algorithm is supplied to the instance via the OSOrderFunction. -*/ + * @header + * + * @abstract + * This header declares the OSOrderedSet collection class. + */ + + +/*! + * @class OSOrderedSet + * + * @abstract + * OSOrderedSet provides an ordered set store of objects. + * + * @discussion + * OSOrderedSet is a container for Libkern C++ objects + * (those derived from + * @link //apple_ref/doc/class/OSMetaClassBase OSMetaClassBase@/link, + * in particular @link //apple_ref/doc/class/OSObject OSObject@/link). + * Storage and access follow ordered set logic. + * A given object is stored in the set only once, but you can: + *
    + *
  • Define a sorting function for automated ordering + * (upon addition only)
  • + *
  • Manually insert new objects in the set (overriding sorting)
  • + *
  • Add and remove objects in the set
  • + *
  • Test whether the set contains a particular object
  • + *
  • Get the object stored at a particular index.
  • + * + * + * Note that automated ordering is performed only upon addition of objects + * and depends on the existing objects being properly sorted. + * There is no function to re-sort the contents of an OSOrderedSet + * or to change the ordering function. + * In general, you should either use the one ordered-insertion function, + * or the indexed-insertion functions, and not mix the two. + * + * As with all Libkern collection classes, + * OSOrderedSet retains objects added to it, + * and releases objects removed from it. + * An OSOrderedSet also grows as necessary to accommodate new objects, + * unlike Core Foundation collections (it does not, however, shrink). + * + * Use Restrictions + * + * With very few exceptions in the I/O Kit, all Libkern-based C++ + * classes, functions, and macros are unsafe + * to use in a primary interrupt context. + * Consult the I/O Kit documentation related to primary interrupts + * for more information. + * + * OSOrderedSet provides no concurrency protection; + * it's up to the usage context to provide any protection necessary. + * Some portions of the I/O Kit, such as + * @link //apple_ref/doc/class/IORegistryEntry IORegistryEntry@/link, + * handle synchronization via defined member functions for setting + * properties. + */ class OSOrderedSet : public OSCollection { OSDeclareDefaultStructors(OSOrderedSet) public: - /*! - @typedef OSOrderFunction - @abstract The sorting function used by the collection to order objects. - @param obj1 An object from the collection. - @param obj2 An object to be compared to obj1. - @param ref The ordering context used by the sorting function as a hint for sorting. - @result Returns a comparison result of the object, a negative value if obj1 > obj2, 0 if obj1 == obj2, and a positive value if obj1 < obj2. - */ + /*! + * @typedef OSOrderFunction + * + * @abstract + * The sorting function used by an OSOrderedSet to order objects. + * + * @param obj1 An object from the ordered set. May be NULL. + * @param obj2 The object being ordered within the ordered set. + * May be NULL. + * @param context A pointer to a user-provided context. May be NULL. + * + * @result + * A comparison result of the object: + *
      + *
    • a positive value if obj2 should precede obj1,
    • + *
    • a negative value if obj1 should precede obj2,
    • + *
    • and 0 if obj1 and obj2 have an equivalent ordering.
    • + *
    + */ typedef SInt32 (*OSOrderFunction)(const OSMetaClassBase * obj1, const OSMetaClassBase * obj2, - void * ref ); + void * context); protected: - struct _Element * array; - OSOrderFunction ordering; - void * orderingRef; - unsigned int count; - unsigned int capacity; - unsigned int capacityIncrement; + struct _Element * array; + OSOrderFunction ordering; + void * orderingRef; + unsigned int count; + unsigned int capacity; + unsigned int capacityIncrement; struct ExpansionData { }; - /*! @var reserved - Reserved for future use. (Internal use only) */ + /* Reserved for future use. (Internal use only) */ ExpansionData *reserved; protected: - /* - * OSCollectionIterator interfaces. - */ + /* OSCollectionIterator interfaces. */ virtual unsigned int iteratorSize() const; virtual bool initIterator(void *iterator) const; virtual bool getNextObjectForIterator(void *iterator, OSObject **ret) const; public: - /*! - @function withCapacity - @abstract A static constructor function for creating and initializing an instance of OSOrderedSet. - @param capacity The initial storage size in number of objects of the set. - @param orderFunc A c-style function which implements the sorting algorithm for the set. - @param orderingRef A ordering context used as a hint for ordering objects within the set. - @result Returns an instance of OSSet, or 0 if a failure occurred. + /*! + * @function withCapacity + * + * @abstract + * Creates and initializes an empty OSOrderedSet. + * + * @param capacity The initial storage capacity + * of the new ordered set object. + * @param orderFunc A C function that implements the sorting algorithm + * for the set. + * @param orderingContext An ordering context, + * which is passed to orderFunc. + * @result + * An empty instance of OSOrderedSet + * with a retain count of 1; + * NULL on failure. + * + * @discussion + * capacity must be nonzero. + * The new OSOrderedSet will grow as needed + * to accommodate more key/object pairs + * (unlike Core Foundation collections, + * for which the initial capacity is a hard limit). + * + * If orderFunc is provided, it is used by + * @link + * //apple_ref/cpp/instm/OSOrderedSet/setObject/virtualbool/(constOSMetaClassBase*) + * setObject(const OSMetaClassBase *)@/link + * to determine where to insert a new object. + * Other object-setting functions ignore ordering. + * + * orderingContext is not retained or otherwise memory-managed + * by the ordered set. + * If it needs to be deallocated, + * you must track references to it and the ordered set + * in order to deallocate it appropriately. + * See + * @link getOrderingRef getOrderingRef@/link. */ - static OSOrderedSet *withCapacity(unsigned int capacity, - OSOrderFunction orderFunc = 0, - void * orderingRef = 0); - - /*! - @function initWithCapacity - @abstract A member function for initializing an instance of OSOrderedSet. - @param capacity The initial storage size in number of objects of the set. - @param orderFunc A c-style function which implements the sorting algorithm for the set. - @param orderingRef A ordering context used as a hint for ordering objects within the set. - @result Returns true if initialization was successful, or false if a failure occurred. + static OSOrderedSet * withCapacity( + unsigned int capacity, + OSOrderFunction orderFunc = 0, + void * orderingContext = 0); + + + /*! + * @function initWithCapacity + * + * @abstract + * Initializes a new instance of OSOrderedSet. + * + * @param capacity The initial storage capacity + * of the new ordered set object. + * @param orderFunc A C function that implements the sorting algorithm + * for the set. + * @param orderingContext An ordering context, + * which is passed to orderFunc. + * + * @result + * true on success, false on failure. + * + * @discussion + * Not for general use. Use the static instance creation method + * @link + * //apple_ref/cpp/clm/OSOrderedSet/withCapacity/staticOSOrderedSet*\/(unsignedint,OSOrderFunction,void*) + * withCapacity@/link + * instead. + * + * capacity must be nonzero. + * The new set will grow as needed to accommodate more key/object pairs + * (unlike Core Foundation collections, + * for which the initial capacity is a hard limit). + * + * If orderFunc is provided, it is used by + * @link + * //apple_ref/cpp/instm/OSOrderedSet/setObject/virtualbool/(constOSMetaClassBase*) + * setObject(const OSMetaClassBase *)@/link + * to determine where to insert a new object. + * Other object-setting functions ignore ordering. + * + * orderingContext is not retained or otherwise memory-managed + * by the ordered set. + * If it needs to be deallocated, + * you must track references to it and the ordered set + * in order to deallocate it appropriately. + * See + * @link getOrderingRef getOrderingRef@/link. */ - virtual bool initWithCapacity(unsigned int capacity, - OSOrderFunction orderFunc = 0, - void * orderingRef = 0); - /*! - @function free - @abstract A member function to release and deallocate any resources used by the instance of OSOrderedSet. + virtual bool initWithCapacity( + unsigned int capacity, + OSOrderFunction orderFunc = 0, + void * orderingContext = 0); + + + /*! + * @function free + * + * @abstract + * Deallocatesand releases any resources + * used by the OSOrderedSet instance. + * + * @discussion + * This function should not be called directly; + * use + * @link + * //apple_ref/cpp/instm/OSObject/release/virtualvoid/() + * release@/link + * instead. */ virtual void free(); - /*! - @function getCount - @abstract A member function to return the number of objects within the collection. - @result Returns the number of items in the set. + + /*! + * @function getCount + * + * @abstract + * Returns the current number of objects within the ordered set. + * + * @result + * The current number of objects within the ordered set. */ virtual unsigned int getCount() const; - /*! - @function getCapacity - @abstract A member function to return the storage capacity of the collection. - @result Returns the total storage capacity of the set. + + + /*! + * @function getCapacity + * + * @abstract + * Returns the number of objects the ordered set + * can store without reallocating. + * + * @result + * The number objects the ordered set + * can store without reallocating. + * + * @discussion + * OSOrderedSet objects grow when full to accommodate additional objects. + * See + * @link + * //apple_ref/cpp/instm/OSOrderedSet/getCapacityIncrement/virtualunsignedint/() + * getCapacityIncrement@/link + * and + * @link + * //apple_ref/cpp/instm/OSOrderedSet/ensureCapacity/virtualunsignedint/(unsignedint) + * ensureCapacity@/link. */ virtual unsigned int getCapacity() const; - /*! - @function getCapacityIncrement - @abstract A member function to get the size by which the collection will grow. - @result Returns the current growth size. + + + /*! + * @function getCapacityIncrement + * + * @abstract + * Returns the storage increment of the ordered set. + * + * @result + * The storage increment of the ordered set. + * + * @discussion + * An OSOrderedSet allocates storage for objects in multiples + * of the capacity increment. */ virtual unsigned int getCapacityIncrement() const; - /*! - @function setCapacityIncrement - @abstract A member function to set the size by which the collection will grow. - @param increment The new growth factor for the set. - @result Returns new growth size. + + + /*! + * @function setCapacityIncrement + * + * @abstract + * Sets the storage increment of the ordered set. + * + * @result + * The new storage increment of the ordered set, + * which may be different from the number requested. + * + * @discussion + * An OSOrderedSet allocates storage for objects in multiples + * of the capacity increment. + * Calling this function does not immediately reallocate storage. */ virtual unsigned int setCapacityIncrement(unsigned increment); - /*! - @function ensureCapacity - @abstract A member function to expand the size of the collection. - @param newCapacity The new size capacity for the collection. - @result Returns new capacity of the set when successful or the old capacity on failure. + + /*! + * @function ensureCapacity + * + * @abstract + * Ensures the set has enough space + * to store the requested number of distinct objects. + * + * @param newCapacity The total number of distinct objects the ordered set + * should be able to store. + * + * @result + * The new capacity of the ordered set, + * which may be different from the number requested + * (if smaller, reallocation of storage failed). + * + * @discussion + * This function immediately resizes the ordered set, if necessary, + * to accommodate at least newCapacity distinct objects. + * If newCapacity is not greater than the current capacity, + * or if an allocation error occurs, the original capacity is returned. + * + * There is no way to reduce the capacity of an OSOrderedSet. */ virtual unsigned int ensureCapacity(unsigned int newCapacity); - /*! - @function flushCollection - @abstract A member function to remove and release all items in the set. + + /*! + * @function flushCollection + * + * @abstract + * Removes and releases all objects within the ordered set. + * + * @discussion + * The ordered set's capacity (and therefore direct memory consumption) + * is not reduced by this function. */ virtual void flushCollection(); - /*! - @function setObject - @abstract A member function to place an OSMetaClassBase derived object into the set. The object will be automatically sorted in the set. - @param anObject The object to be placed into the collection. - @result Returns true if object was successfully added to the collection, false otherwise. + + /*! + * @function setObject + * + * @abstract + * Adds an object to the OSOrderedSet if it is not already present, + * storing it in sorted order if there is an order function. + * + * @param anObject The OSMetaClassBase-derived object to be added + * to the ordered set. + * @result + * true if anObject was successfully + * added to the ordered set, false otherwise + * (including if it was already in the ordered set). + * + * @discussion + * The set adds storage to accomodate the new object, if necessary. + * If successfully added, the object is retained. + * + * If anObject is not already in the ordered set + * and there is an order function, + * this function loops through the existing objects, + * calling the @link OSOrderFunction order function@/link + * with arguments each existingObject, anObject, + * and the ordering context + * (or NULL if none was set), + * until the order function returns + * a value greater than or equal to 0. + * It then inserts anObject at the index of the existing object. + * + * If there is no order function, the object is inserted at index 0. + * + * A false return value can mean either + * that anObject is already present in the set, + * or that a memory allocation failure occurred. + * If you need to know whether the object + * is already present, use + * @link + * //apple_ref/cpp/instm/OSOrderedSet/containsObject/virtualbool/(constOSMetaClassBase*) + * containsObject(const OSMetaClassBase *)@/link. */ - virtual bool setObject(const OSMetaClassBase *anObject); - /*! - @function setFirstObject - @abstract A member function to place an OSMetaClassBase derived object order it first in the set. - @param anObject The object to be placed into the collection. - @result Returns true if object was successfully added to the collection, false otherwise. + virtual bool setObject(const OSMetaClassBase * anObject); + + + /*! + * @function setFirstObject + * + * @abstract + * Adds an object to the OSOrderedSet at index 0 + * if it is not already present. + * + * @param anObject The OSMetaClassBase-derived object + * to be added to the ordered set. + * @result + * true if anObject was successfully added + * to the ordered set, false otherwise + * (including if it was already in the ordered set at any index). + * + * @discussion + * The set adds storage to accomodate the new object, if necessary. + * If successfully added, the object is retained. + * + * This function ignores any ordering function of the ordered set, + * and can disrupt the automatic sorting mechanism. + * Only call this function if you are managing the ordered set directly. + * + * A false return value can mean either that anObject + * is already present in the set, + * or that a memory allocation failure occurred. + * If you need to know whether the object + * is already present, use + * @link + * //apple_ref/cpp/instm/OSOrderedSet/containsObject/virtualbool/(constOSMetaClassBase*) + * containsObject(const OSMetaClassBase *)@/link. */ - virtual bool setFirstObject(const OSMetaClassBase *anObject); - /*! - @function setLastObject - @abstract A member function to place an OSMetaClassBase derived object order it last in the set. - @param anObject The object to be placed into the collection. - @result Returns true if object was successfully added to the collection, false otherwise. + virtual bool setFirstObject(const OSMetaClassBase * anObject); + + + /*! + * @function setLastObject + * + * @abstract + * Adds an object at the end of the OSOrderedSet + * if it is not already present. + * + * @param anObject The OSMetaClassBase-derived object to be added + * to the ordered set. + * @result + * true if anObject was successfully added + * to the ordered set, false otherwise + * (including if it was already in the ordered set at any index). + * + * @discussion + * The set adds storage to accomodate the new object, if necessary. + * If successfully added, the object is retained. + * + * This function ignores any ordering function of the ordered set, + * and can disrupt the automatic sorting mechanism. + * Only call this function if you are managing the ordered set directly. + * + * A false return value can mean either that anObject + * is already present in the set, + * or that a memory allocation failure occurred. + * If you need to know whether the object + * is already present, use + * @link + * //apple_ref/cpp/instm/OSOrderedSet/containsObject/virtualbool/(constOSMetaClassBase*) + * containsObject(const OSMetaClassBase *)@/link. */ - virtual bool setLastObject(const OSMetaClassBase *anObject); + virtual bool setLastObject(const OSMetaClassBase * anObject); - /*! - @function removeObject - @abstract A member function to remove and release an object in the set. - @param anObject The object to remove from the set. + + /*! + * @function removeObject + * + * @abstract + * Removes an object from the ordered set. + * + * @param anObject The OSMetaClassBase-derived object + * to be removed from the ordered set. + * + * @discussion + * The object removed from the ordered set is released. */ - virtual void removeObject(const OSMetaClassBase *anObject); + virtual void removeObject(const OSMetaClassBase * anObject); + - /*! - @function containsObject - @abstract A member function to query the set for the presence of a particular object. - @param anObject The object to be located. - @result Returns true if the object is present in the set, false otherwise. + /*! + * @function containsObject + * + * @abstract + * Checks the ordered set for the presence of an object. + * + * @param anObject The OSMetaClassBase-derived object to check for + * in the ordered set. + * + * @result + * true if anObject is present + * within the ordered set, false otherwise. + * + * @discussion + * Pointer equality is used. + * This function returns false if passed NULL. */ - virtual bool containsObject(const OSMetaClassBase *anObject) const; - /*! - @function member - @abstract A member function to query the set for the presence of a particular object. - @param anObject The object to be located. - @result Returns true if the object is present in the set, false otherwise. + virtual bool containsObject(const OSMetaClassBase * anObject) const; + + + /*! + * @function member + * + * @abstract + * Checks the ordered set for the presence of an object. + * + * @param anObject The OSMetaClassBase-derived object to check for + * in the ordered set. + * + * @result + * true if anObject is present + * within the ordered set, false otherwise. + * + * @discussion + * Pointer equality is used. + * Returns false if passed NULL. + * + * @link + * //apple_ref/cpp/instm/OSOrderedSet/containsObject/virtualbool/(constOSMetaClassBase*) + * containsObject(const OSMetaClassBase *)@/link + * checks for NULL before scanning the contents, + * and is therefore more efficient than this function. + */ + virtual bool member(const OSMetaClassBase * anObject) const; + + + /*! + * @function getFirstObject + * + * @abstract + * Returns the object at index 0 in the ordered set if there is one. + * + * @abstract + * The object at index 0 in the ordered set if there is one, + * otherwise NULL. + * + * @discussion + * The returned object will be released if removed from the ordered set; + * if you plan to store the reference, you should call + * @link + * //apple_ref/cpp/instm/OSObject/retain/virtualvoid/() + * retain@/link + * on that object. */ - virtual bool member(const OSMetaClassBase *anObject) const; + virtual OSObject * getFirstObject() const; + - /*! - @function getFirstObject - @abstract A member function to return the first object in the set. - @result Returns the object ordered first in the set or 0 if none exist. + /*! + * @function getLastObject + * + * @abstract + * Returns the last object in the ordered set if there is one. + * + * @abstract + * The last object in the ordered set if there is one, + * otherwise NULL. + * + * @discussion + * The returned object will be released if removed from the ordered set; + * if you plan to store the reference, you should call + * @link + * //apple_ref/cpp/instm/OSObject/retain/virtualvoid/() + * retain@/link + * on that object. */ - virtual OSObject *getFirstObject() const; - /*! - @function getLastObject - @abstract A member function to return the last object in the set. - @result Returns the object ordered last in the set or 0 if none exist. + virtual OSObject * getLastObject() const; + + + /*! + * @function orderObject + * + * @abstract + * Calls the ordered set's order function against a NULL object. + * + * @param anObject The object to be ordered. + * + * @result + * The ordering value for the object. + * + * @discussion + * This function calls the ordered set's + * @link OSOrderFunction order function@/link + * with anObject, NULL, and the ordering context + * (or NULL if none was set), + * and returns the result of that function. */ - virtual OSObject *getLastObject() const; + virtual SInt32 orderObject(const OSMetaClassBase * anObject); + - /*! - @function orderObject - @abstract A member function to return the ordering value of an object. - @param anObject The object to be queried. - @result Returns the ordering value for an object. + /*! + * @function setObject + * + * @abstract + * Adds an object to an OSOrderedSet at a specified index + * if it is not already present. + * + * @param index The index at which to insert the new object. + * @param anObject The OSMetaClassBase-derived object to be added + * to the ordered set. + * + * @result + * true if the object was successfully added + * to the ordered set, false otherwise + * (including if it was already in the set). + * + * @discussion + * The set adds storage to accomodate the new object, if necessary. + * If successfully added, the object is retained. + * + * This function ignores any ordering function of the ordered set, + * and can disrupt the automatic sorting mechanism. + * Only call this function if you are managing the ordered set directly. + * + * A false return value can mean either that the object + * is already present in the set, + * or that a memory allocation failure occurred. + * If you need to know whether the object + * is already present, use + * @link //apple_ref/cpp/instm/OSOrderedSet/containsObject/virtualbool/(constOSMetaClassBase*) + * containsObject containsObject@/link. */ - virtual SInt32 orderObject( const OSMetaClassBase * anObject ); - - /*! - @function setObject - @abstract A member function to place an object into the set at a particular index. - @param index The index in the set to place the object. - @param anObject The object to be placed into the set. - @result Returns true if the object was successfully placed into the collection, false otherwise. + virtual bool setObject( + unsigned int index, + const OSMetaClassBase * anObject); + + + /*! + * @function getObject + * + * @abstract + * Gets the object at a particular index. + * + * @param index The index into the set. + * @result + * The object at the given index, + * or NULL if none exists at that location. + * + * @discussion + * The returned object will be released if removed from the set; + * if you plan to store the reference, you should call + * @link + * //apple_ref/cpp/instm/OSObject/retain/virtualvoid/() + * retain@/link + * on that object. */ - virtual bool setObject(unsigned int index, const OSMetaClassBase *anObject); - /*! - @function getObject - @abstract A member function to return a reference to an object at a particular index. - @param index The index into the set. - @result Returns a reference to the object at the given index, 0 if none exist at that location. + virtual OSObject * getObject(unsigned int index) const; + + + /*! + * @function getOrderingRef + * + * @abstract + * Returns the ordering context the ordered set was created with. + * + * @result + * The ordered set's ordering context, + * or NULL if it doesn't have one. */ - virtual OSObject *getObject( unsigned int index) const; - /*! - @function getOrderingRef - @abstract A member function to return a the ordering context. - @result Returns the ordering context, or NULL if none exist. + virtual void * getOrderingRef(); + + + /*! + * @function isEqualTo + * + * @abstract + * Tests the equality of two OSOrderedSet objects. + * + * @param anOrderedSet The ordered set object being compared + * against the receiver. + * @result + * true if the two sets are equivalent, + * false otherwise. + * + * @discussion + * Two OSOrderedSet objects are considered equal if they have same count + * and the same object pointer values in the same order. */ - virtual void *getOrderingRef(); + virtual bool isEqualTo(const OSOrderedSet * anOrderedSet) const; + - /*! - @function isEqualTo - @abstract A member function to test the equality between an OSOrderedSet object and the receiver. - @param anOrderedSet The OSOrderedSet object to be compared against the receiver. - @result Returns true if the two objects are equivalent, false otherwise. + /*! + * @function isEqualTo + * + * @abstract + * Tests the equality of an OSOrderedSet + * against an arbitrary object. + * + * @param anObject The object being compared against the receiver. + * @result + * true if the two objects are equivalent, + * false otherwise. + * + * @discussion + * An OSOrderedSet object is considered equal to another object + * if the other object is derived from OSOrderedSet + * and compares equal as an OSOrderedSet. */ - virtual bool isEqualTo(const OSOrderedSet *anOrderedSet) const; - /*! - @function isEqualTo - @abstract A member function to test the equality between an arbitrary OSMetaClassBase derived object and the receiver. - @param anObject The OSMetaClassBase derived object to be compared against the receiver. - @result Returns true if the two objects are equivalent, false otherwise. + virtual bool isEqualTo(const OSMetaClassBase * anObject) const; + + + /*! + * @function setOptions + * + * Recursively sets option bits in the ordered set + * and all child collections. + * + * @param options A bitfield whose values turn the options on (1) or off (0). + * @param mask A mask indicating which bits + * in options to change. + * Pass 0 to get the whole current options bitfield + * without changing any settings. + * @param context Unused. + * + * @result + * The options bitfield as it was before the set operation. + * + * @discussion + * Kernel extensions should not call this function. + * + * Child collections' options are changed only if the receiving ordered set's + * options actually change. */ - virtual bool isEqualTo(const OSMetaClassBase *anObject) const; - - - /*! - @function setOptions - @abstract This function is used to recursively set option bits in this set and all child collections. - @param options Set the (options & mask) bits. - @param mask The mask of bits which need to be set, 0 to get the current value. - @result The options before the set operation, NB setOptions(?,0) returns the current value of this collection. - */ - virtual unsigned setOptions(unsigned options, unsigned mask, void * = 0); - - /*! - @function copyCollection - @abstract Do a deep copy of this ordered set and its collections. - @discussion This function copies this set and all included collections recursively. Objects that don't derive from OSContainter are NOT copied, that is objects like OSString and OSData. - @param cycleDict Is a dictionary of all of the collections that have been, to start the copy at the top level just leave this field 0. - @result The newly copied collecton or 0 if insufficient memory + virtual unsigned setOptions( + unsigned options, + unsigned mask, + void * context = 0); + + + /*! + * @function copyCollection + * + * @abstract + * Creates a deep copy of this ordered set and its child collections. + * + * @param cycleDict A dictionary of all of the collections + * that have been copied so far, + * which is used to track circular references. + * To start the copy at the top level, + * pass NULL. + * + * @result + * The newly copied ordered set, with a retain count of 1, + * or NULL if there is insufficient memory to do the copy. + * + * @discussion + * The receiving ordered set, and any collections it contains, + * recursively, are copied. + * Objects that are not derived from OSCollection are retained + * rather than copied. */ - OSCollection *copyCollection(OSDictionary *cycleDict = 0); + OSCollection *copyCollection(OSDictionary * cycleDict = 0); OSMetaClassDeclareReservedUnused(OSOrderedSet, 0); OSMetaClassDeclareReservedUnused(OSOrderedSet, 1); diff --git a/libkern/libkern/c++/OSSerialize.h b/libkern/libkern/c++/OSSerialize.h index f8e6a0de8..51d822fc5 100644 --- a/libkern/libkern/c++/OSSerialize.h +++ b/libkern/libkern/c++/OSSerialize.h @@ -36,48 +36,238 @@ class OSSet; class OSDictionary; /*! - @class OSSerialize - @abstract A class used by the OS Container classes to serialize their instance data. - @discussion This class is for the most part internal to the OS Container classes and should not be used directly. Each class inherits a serialize() method from OSObject which is used to actually serialize an object. -*/ - + * @header + * + * @abstract + * This header declares the OSSerialize class. + */ + + +/*! + * @class OSSerialize + * + * @abstract + * OSSerialize coordinates serialization of Libkern C++ objects + * into an XML stream. + * + * @discussion + * This class is for the most part internal to the OSContainer classes, + * used for transferring property tables between the kernel and user space. + * It should not be used directly. + * Classes that participate in serialization + * override the + * @link + * //apple_ref/cpp/instm/OSObject/serialize/virtualbool/(OSSerialize*) + * OSObject::serialize@/link . + * function. + * + * Use Restrictions + * + * With very few exceptions in the I/O Kit, all Libkern-based C++ + * classes, functions, and macros are unsafe + * to use in a primary interrupt context. + * Consult the I/O Kit documentation related to primary interrupts + * for more information. + * + * OSSerialize provides no concurrency protection; + * it's up to the usage context to provide any protection necessary. + * Some portions of the I/O Kit, such as + * @link //apple_ref/doc/class/IORegistryEntry IORegistryEntry@/link, + * handle synchronization via defined member functions + * for serializing properties. + */ class OSSerialize : public OSObject { OSDeclareDefaultStructors(OSSerialize) protected: - char *data; // container for serialized data - unsigned int length; // of serialized data (counting NULL) - unsigned int capacity; // of container - unsigned int capacityIncrement; // of container + char * data; // container for serialized data + unsigned int length; // of serialized data (counting NULL) + unsigned int capacity; // of container + unsigned int capacityIncrement; // of container - unsigned int tag; - OSDictionary *tags; // tags for all objects seen + unsigned int tag; + OSDictionary * tags; // tags for all objects seen struct ExpansionData { }; - /*! @var reserved - Reserved for future use. (Internal use only) */ + /* Reserved for future use. (Internal use only) */ ExpansionData *reserved; public: - static OSSerialize *withCapacity(unsigned int capacity); - virtual char *text() const; + /*! + * @function withCapacity + * + * @abstract + * Creates and initializes an empty OSSerialize object. + * + * @param capacity The initial size of the XML buffer. + * + * @result + * A new instance of OSSerialize + * with a retain count of 1; + * NULL on failure. + * + * @discussion + * The serializer will grow as needed to accommodate more data. + */ + static OSSerialize * withCapacity(unsigned int capacity); - virtual void clearText(); // using this can be a great speedup - // if you are serializing the same object - // over and over again + /*! + * @function text + * + * @abstract + * Returns the XML text serialized so far. + * + * @result + * The nul-terminated XML data serialized so far. + */ + virtual char * text() const; + + + /*! + * @function clearText + * + * @abstract + * Resets the OSSerialize object. + * + * @discussion + * This function is a useful optimization if you are serializing + * the same object repeatedly. + */ + virtual void clearText(); // stuff to serialize your object - virtual bool previouslySerialized(const OSMetaClassBase *); - virtual bool addXMLStartTag(const OSMetaClassBase *o, const char *tagString); - virtual bool addXMLEndTag(const char *tagString); + /*! + * @function previouslySerialized + * + * @abstract + * Checks whether the object has already been serialized + * into the XML stream, emitting a reference if it has. + * + * @param object The object to check. + * + * @result + * true if object has already been serialized + * by this OSSerialize object and a reference + * to it is successfully added to the XML stream, + * false otherwise. + * + * + * @discussion + * This function both reduces the size of generated XML + * by emitting shorter references to existing objects with the same + * value (particularly for OSString, OSSymbol, and OSData), + * and also preserves instance references + * so that the user-space I/O Kit library can reconstruct + * an identical graph of object relationships. + * + * All classes that override + * @link + * //apple_ref/cpp/instm/OSObject/serialize/virtualbool/(OSSerialize*) + * OSObject::serialize@/link. + * should call this function before doing any actual serialization; + * if it returns true, the serialize implementation + * can immediately return true. + */ + virtual bool previouslySerialized(const OSMetaClassBase * object); + + + /*! + * @function addXMLStartTag + * + * @abstract + * Appends an XML start tag to the XML stream. + * + * @param object The object being serialized. + * @param tagString The name of the XML tag to emit; for example, "string". + * + * @result + * true if an XML start tag for tagString + * is successfully added to the XML stream, false otherwise. + * + * @discussion + * This function emits the named tag, + * enclosed within a pair of angle brackets. + * + * A class that implements serialization should call this function + * with the name of the XML tag that best represents the serialized + * contents of the object. + * A limited number of tags are supported by the user-space + * I/O Kit library: + *
      + *
    • array
    • + *
    • dict
    • + *
    • integer
    • + *
    • key
    • + *
    • set
    • + *
    • string
    • + *
    + * + * A call to this function must be balanced with one to + * @link addXMLEndTag addXMLEndTag@/link + * using the same tagString. + */ + virtual bool addXMLStartTag( + const OSMetaClassBase * object, + const char * tagString); + - virtual bool addChar(const char); - virtual bool addString(const char *); + /*! + * @function addXMLEndTag + * + * @abstract + * Appends an XML end tag to the XML stream. + * + * @param tagString The name of the XML tag to emit; for example, "string". + * + * @result + * true if an XML end tag for tagString + * is successfully added to the XML stream, false otherwise. + * + * @discussion + * This function emits the named tag, + * preceded by a slash character to indicate the closing of an entity, + * all enclosed within a pair of angle brackets. + * + * A call to this function must balance an earlier call to + * @link addXMLStartTag addXMLStartTag@/link + * using the same tagString. + */ + virtual bool addXMLEndTag(const char * tagString); + + + /*! + * @function addChar + * + * @abstract + * Appends a single character to the XML stream. + * + * @param aChar The character to append to the XML stream. + * + * @result + * true if char + * is successfully added to the XML stream, false otherwise. + */ + virtual bool addChar(const char aChar); + + + /*! + * @function addString + * + * @abstract + * Appends a C string to the XML stream. + * + * @param cString The C string to append to the XML stream. + * + * @result + * true if cString + * is successfully added to the XML stream, false otherwise. + */ + virtual bool addString(const char * cString); // stuff you should never have to use (in theory) @@ -99,8 +289,10 @@ class OSSerialize : public OSObject OSMetaClassDeclareReservedUnused(OSSerialize, 7); }; +// xx-review: this whole class seems to be unused! + typedef bool (*OSSerializerCallback)(void * target, void * ref, - OSSerialize * s); + OSSerialize * serializer); class OSSerializer : public OSObject { @@ -112,10 +304,12 @@ class OSSerializer : public OSObject public: - static OSSerializer * forTarget(void * target, - OSSerializerCallback callback, void * ref = 0); + static OSSerializer * forTarget( + void * target, + OSSerializerCallback callback, + void * ref = 0); - virtual bool serialize(OSSerialize * s) const; + virtual bool serialize(OSSerialize * serializer) const; }; #endif /* _OS_OSSERIALIZE_H */ diff --git a/libkern/libkern/c++/OSSet.h b/libkern/libkern/c++/OSSet.h index 800dad227..65fd45d6e 100644 --- a/libkern/libkern/c++/OSSet.h +++ b/libkern/libkern/c++/OSSet.h @@ -36,242 +36,736 @@ class OSArray; /*! - @class OSSet - @abstract A collection class for storing OSMetaClassBase derived objects. - @discussion - Instances of OSSet store unique OSMetaClassBase derived objects in a non-ordered manner. -*/ + * @header + * + * @abstract + * This header declares the OSSet collection class. + */ + + +/*! + * @class OSSet + * + * @abstract + * OSSet provides an unordered set store of objects. + * + * @discussion + * OSSet is a container for Libkern C++ objects + * (those derived from + * @link //apple_ref/doc/class/OSMetaClassBase OSMetaClassBase@/link, + * in particular @link //apple_ref/doc/class/OSObject OSObject@/link). + * Storage and access follow basic set logic: you can add or remove an object, + * and test whether the set contains a particular object. + * A given object is only stored in the set once, + * and there is no ordering of objects in the set. + * A subclass @link //apple_ref/doc/class/OSOrderedSet OSOrderedSet@/link, + * provides for ordered set logic. + * + * As with all Libkern collection classes, + * OSSet retains objects added to it, + * and releases objects removed from it. + * An OSSet also grows as necessary to accommodate new objects, + * unlike Core Foundation collections (it does not, however, shrink). + * + * Use Restrictions + * + * With very few exceptions in the I/O Kit, all Libkern-based C++ + * classes, functions, and macros are unsafe + * to use in a primary interrupt context. + * Consult the I/O Kit documentation related to primary interrupts + * for more information. + * + * OSSet provides no concurrency protection; + * it's up to the usage context to provide any protection necessary. + * Some portions of the I/O Kit, such as + * @link //apple_ref/doc/class/IORegistryEntry IORegistryEntry@/link, + * handle synchronization via defined member functions for setting + * properties. + */ class OSSet : public OSCollection { OSDeclareDefaultStructors(OSSet) private: - OSArray *members; + OSArray * members; protected: /* * OSCollectionIterator interfaces. */ virtual unsigned int iteratorSize() const; - virtual bool initIterator(void *iterator) const; - virtual bool getNextObjectForIterator(void *iterator, OSObject **ret) const; + virtual bool initIterator(void * iterator) const; + virtual bool getNextObjectForIterator(void * iterator, OSObject ** ret) const; struct ExpansionData { }; - /*! @var reserved - Reserved for future use. (Internal use only) */ - ExpansionData *reserved; + /* Reserved for future use. (Internal use only) */ + ExpansionData * reserved; public: - /*! - @function withCapacity - @abstract A static constructor function to create and initialize an instance of OSSet with a given capacity. - @param capacity The initial capacity of the collection. The capacity is the total number of objects that can be stored in the collection. - @result Returns an instance of OSSet or 0 on failure. + + + /*! + * @function withCapacity + * + * @abstract + * Creates and initializes an empty OSSet. + * + * @param capacity The initial storage capacity of the new set object. + * + * @result + * An empty instance of OSSet + * with a retain count of 1; + * NULL on failure. + * + * @discussion + * capacity must be nonzero. + * The new OSSet will grow as needed to accommodate more key/object pairs + * (unlike @link //apple_ref/doc/uid/20001503 CFMutableSet@/link, + * for which the initial capacity is a hard limit). */ - static OSSet *withCapacity(unsigned int capacity); - /*! - @function withObjects - @abstract A static constructor function to create and initialize an instance of OSSet and populate it with the objects provided. - @param objects A static array of OSMetaClassBase derived objects which are used to populate the collection. - @param count The number of objects passed to the collection. - @param capacity The initial storage size of the collection. The capacity is the total number of objects that can be stored in the collection. This value must be equal to or larger than the count parameter. - @result Returns an instance of OSSet or 0 on failure. + static OSSet * withCapacity(unsigned int capacity); + + + /*! + * @function withObjects + * + * @abstract + * Creates and initializes an OSSet + * populated with objects provided. + * + * @param objects A C array of OSMetaClassBase-derived objects. + * @param count The number of objects to be placed into the set. + * @param capacity The initial storage capacity of the new set object. + * If 0, count is used; otherwise this value + * must be greater than or equal to count. + * + * @result + * An instance of OSSet + * containing the objects provided, + * with a retain count of 1; + * NULL on failure. + * + * @discussion + * objects must be non-NULL, + * and count must be nonzero. + * If capacity is nonzero, + * it must be greater than or equal to count. + * The new OSSet will grow as needed to accommodate more objects + * (unlike @link //apple_ref/doc/uid/20001503 CFMutableSet@/link, + * for which the initial capacity is a hard limit). + * + * The objects in objects are retained for storage in the new set, + * not copied. */ - static OSSet *withObjects(const OSObject *objects[], - unsigned int count, - unsigned int capacity = 0); - /*! - @function withArray - @abstract A static constructor function to create and initialize an instance of OSSet and populate it with the objects from an OSSArray object. - @param array An OSArray object containing a list of OSMetaClassBase derived objects which are used to initially populate the OSSet object. - @param capacity The initial storage size of the collection. This value must be equal to or larger than the number of objects provided by the OSArray object passed as the first parameter. - @result Returns an instance of OSSet or 0 on failure. + static OSSet * withObjects( + const OSObject * objects[], + unsigned int count, + unsigned int capacity = 0); + + + /*! + * @function withArray + * + * @abstract + * Creates and initializes an OSSet + * populated with the contents of an OSArray. + * + * @param array An array whose objects will be stored in the new OSSet. + * @param capacity The initial storage capacity of the new set object. + * If 0, the capacity is set to the number of objects + * in array; + * otherwise capacity must be greater than or equal to + * the number of objects in array. + * @result + * An instance of OSSet containing + * the objects of array, + * with a retain count of 1; + * NULL on failure. + * + * @discussion + * Each distinct object in array is added to the new set. + * + * array must be non-NULL. + * If capacity is nonzero, + * it must be greater than or equal to count. + * The new OSSet will grow as needed to accommodate more key-object pairs + * (unlike @link //apple_ref/doc/uid/20001503 CFMutableSet@/link, + * for which the initial capacity is a hard limit). + * + * The objects in array are retained for storage in the new set, + * not copied. */ - static OSSet *withArray(const OSArray *array, - unsigned int capacity = 0); - /*! - @function withSet - @abstract A static constructor function to create an instance of OSSet and populate it with the objects from another OSSet object. - @param array An OSSet object containing OSMetaClassBase derived objects which are used to initially populate the new OSSet object. - @param capacity The initial storage size of the collection. This value must be equal to or larger than the number of objects provided by the OSSet object passed as the first parameter. - @result Returns an instance of OSSet or 0 on failure. + static OSSet * withArray( + const OSArray * array, + unsigned int capacity = 0); + + + /*! + * @function withSet + * + * @abstract + * Creates and initializes an OSSet + * populated with the contents of another OSSet. + * + * @param set An OSSet whose contents will be stored + * in the new instance. + * @param capacity The initial storage capacity of the set object. + * If 0, the capacity is set to the number of objects + * in set; + * otherwise capacity must be greater than or equal to + * the number of objects in array. + * @result + * An instance of OSArray + * containing the objects of set, + * with a retain count of 1; + * NULL on failure. + * + * @discussion + * set must be non-NULL. + * If capacity is nonzero, + * it must be greater than or equal to count. + * The array will grow as needed to accommodate more key-object pairs + * (unlike @link //apple_ref/doc/uid/20001503 CFMutableSet@/link, + * for which the initial capacity is a hard limit). + * + * The objects in set are retained for storage in the new set, + * not copied. */ - static OSSet *withSet(const OSSet *set, + static OSSet * withSet(const OSSet * set, unsigned int capacity = 0); - /*! - @function initWithCapacity - @abstract A member function to initialize an instance of OSSet with a given capacity. - @param capacity The initial storage size of the collection. - @result Returns true if initialization successful or false on failure. + + /*! + * @function initWithCapacity + * + * @abstract + * Initializes a new instance of OSSet. + * + * @param capacity The initial storage capacity of the new set object. + * + * @result + * true on success, false on failure. + * + * @discussion + * Not for general use. Use the static instance creation method + * @link + * //apple_ref/cpp/clm/OSSet/withCapacity/staticOSSet*\/(unsignedint) + * withCapacity@/link + * instead. + * + * capacity must be nonzero. + * The new set will grow as needed to accommodate more key/object pairs + * (unlike @link //apple_ref/doc/uid/20001503 CFMutableSet@/link, + * for which the initial capacity is a hard limit). */ virtual bool initWithCapacity(unsigned int capacity); - /*! - @function initWithObjects - @abstract A member function to initialize an instance of OSSet with a given capacity and populate the collection with the objects provided. - @param object A static array containing OSMetaClassBase derived objects used to populate the collection. - @param count The number of objects provided. - @param capacity The initial storage size of the collection. This value must be equal to or larger than the 'count' parameter. - @result Returns true if initialization successful or false on failure. + + + /*! + * @function initWithObjects + * + * @abstract + * Initializes a new OSSet populated with objects provided. + * + * @param objects A C array of OSObject-derived objects. + * @param count The number of objects to be placed into the set. + * @param capacity The initial storage capacity of the new set object. + * If 0, count is used; otherwise this value + * must be greater than or equal to count. + * + * @result + * true on success, false on failure. + * + * @discussion + * Not for general use. Use the static instance creation method + * @link + * //apple_ref/cpp/clm/OSSet/withObjects/staticOSSet*\/(constOSObject*,unsignedint,unsignedint) + * withObjects@/link + * instead. + * + * objects must be non-NULL, + * and count must be nonzero. + * If capacity is nonzero, it must be greater than or equal to count. + * The new array will grow as needed to accommodate more key-object pairs + * (unlike @link //apple_ref/doc/uid/20001503 CFMutableSet@/link, + * for which the initial capacity is a hard limit). + * + * The objects in objects are retained for storage in the new set, + * not copied. */ - virtual bool initWithObjects(const OSObject *objects[], - unsigned int count, - unsigned int capacity = 0); - /*! - @function initWithArray - @abstract A member function to initialize a new instance of OSSet and populate it with the contents of the OSArray object provided. - @param array The OSArray object containing OSMetaClassBase derived objects used to populate the new OSSet object. - @param capacity The initial storage capacity of the object. This value must be equal to or larger than the number of objects provided by the OSArray object passed as the first parameter. - @result Returns true if initialization successful or false on failure. + virtual bool initWithObjects( + const OSObject * objects[], + unsigned int count, + unsigned int capacity = 0); + + + /*! + * @function initWithArray + * + * @abstract Initializes a new OSSet + * populated with the contents of an OSArray. + * + * @param array An OSAray whose contents will be placed + * in the new instance. + * @param capacity The initial storage capacity of the new set object. + * If 0, the capacity is set + * to the number of objects in array; + * otherwise capacity must be greater than or equal to + * the number of objects in array. + * + * @result + * true on success, false on failure. + * + * @discussion + * Not for general use. Use the static instance creation method + * @link + * //apple_ref/cpp/clm/OSSet/withArray/staticOSSet*\/(constOSArray*,unsignedint) + * withArray@/link + * instead. + * + * array must be non-NULL. + * If capacity is nonzero, + * it must be greater than or equal to count. + * The new array will grow as needed to accommodate more key-object pairs + * (unlike @link //apple_ref/doc/uid/20001503 CFMutableSet@/link, + * for which the initial capacity is a hard limit). + * + * The objects in array are retained for storage in the new set, + * not copied. */ - virtual bool initWithArray(const OSArray *array, - unsigned int capacity = 0); - /*! - @function initWithSet - @abstract A member function to initialize a new instance of OSSet and populate it with the contents of the OSSet object provided. - @param array The OSSet object containing OSMetaClassBase derived objects used to populate the new OSSet object. - @param capacity The initial storage capacity of the object. This value must be equal to or larger than the number of objects provided by the OSSet object passed as the first parameter. - @result Returns true if initialization successful or false on failure. - @discussion This function should not be called directly, use release() instead. + virtual bool initWithArray( + const OSArray * array, + unsigned int capacity = 0); + + + /*! + * @function initWithSet + * + * @abstract + * Initializes a new OSSet + * populated with the contents of another OSSet. + * + * @param set A set whose contents will be placed in the new instance. + * @param capacity The initial storage capacity of the new set object. + * If 0, the capacity is set + * to the number of objects in set; + * otherwise capacity must be greater than or equal to + * the number of objects in set. + * + * @result + * true on success, false on failure. + * + * @discussion + * Not for general use. Use the static instance creation method + * @link withSet withSet@/link instead. + * + * set must be non-NULL. + * If capacity is nonzero, + * it must be greater than or equal to count. + * The new set will grow as needed to accommodate more key-object pairs + * (unlike @link //apple_ref/doc/uid/20001503 CFMutableSet@/link, + * for which the initial capacity is a hard limit). + * + * The objects in set are retained for storage in the new set, + * not copied. */ virtual bool initWithSet(const OSSet *set, unsigned int capacity = 0); - /*! - @function free - @abstract A member function to release all resources created or used by the OSArray instance. + + + /*! + * @function free + * + * @abstract + * Deallocates or releases any resources + * used by the OSSet instance. + * + * @discussion + * This function should not be called directly; + * use + * @link + * //apple_ref/cpp/instm/OSObject/release/virtualvoid/() + * release@/link + * instead. */ virtual void free(); - /*! - @function getCount - @abstract A member function which returns the number of objects current in the collection. - @result Returns the number of objects in the collection. + + /*! + * @function getCount + * + * @abstract + * Returns the current number of objects within the set. + * + * @result + * The current number of objects within the set. */ virtual unsigned int getCount() const; - /*! - @function getCapacity - @abstract A member function which returns the storage capacity of the collection. - @result Returns the storage size of the collection. + + + /*! + * @function getCapacity + * + * @abstract + * Returns the number of objects the set + * can store without reallocating. + * + * @result + * The number objects the set + * can store without reallocating. + * + * @discussion + * OSSet objects grow when full to accommodate additional objects. + * See + * @link + * //apple_ref/cpp/instm/OSSet/getCapacityIncrement/virtualunsignedint/() + * getCapacityIncrement@/link + * and + * @link + * //apple_ref/cpp/instm/OSSet/ensureCapacity/virtualunsignedint/(unsignedint) + * ensureCapacity@/link. */ virtual unsigned int getCapacity() const; - /*! - @function getCapacityIncrement - @abstract A member function which returns the growth factor of the collection. - @result Returns the size by which the collection will grow. + + + /*! + * @function getCapacityIncrement + * + * @abstract + * Returns the storage increment of the set. + * + * @result + * The storage increment of the set. + * + * @discussion + * An OSSet allocates storage for objects in multiples + * of the capacity increment. */ virtual unsigned int getCapacityIncrement() const; - /*! - @function setCapacityIncrement - @abstract A member function which sets the growth factor of the collection. - @result Returns the new increment. + + + /*! + * @function setCapacityIncrement + * + * @abstract + * Sets the storage increment of the set. + * + * @result + * The new storage increment of the set, + * which may be different from the number requested. + * + * @discussion + * An OSSet allocates storage for objects in multiples + * of the capacity increment. + * Calling this function does not immediately reallocate storage. */ virtual unsigned int setCapacityIncrement(unsigned increment); - /*! - @function ensureCapacity - @abstract A member function to grow the size of the collection. - @param newCapacity The new capacity for the collection to expand to. - @result Returns the new capacity of the collection or the previous capacity upon error. + + /*! + * @function ensureCapacity + * + * @abstract + * Ensures the set has enough space + * to store the requested number of distinct objects. + * + * @param newCapacity The total number of distinct objects the set + * should be able to store. + * @result + * The new capacity of the set, + * which may be different from the number requested + * (if smaller, reallocation of storage failed). + * + * @discussion + * This function immediately resizes the set, if necessary, + * to accommodate at least newCapacity distinct objects. + * If newCapacity is not greater than the current capacity, + * or if an allocation error occurs, the original capacity is returned. + * + * There is no way to reduce the capacity of an OSSet. */ virtual unsigned int ensureCapacity(unsigned int newCapacity); - /*! - @function flushCollection - @abstract A member function which removes and releases all objects within the collection. + + /*! + * @function flushCollection + * + * @abstract + * Removes and releases all objects within the set. + * + * @discussion + * The set's capacity (and therefore direct memory consumption) + * is not reduced by this function. */ virtual void flushCollection(); - /*! - @function setObject - @abstract A member function to place objects into the collection. - @param anObject The OSMetaClassBase derived object to be placed into the collection. - @result Returns true if the object was successfully placed into the collection, false otherwise. - @discussion The object added to the collection is automatically retained. + + /*! + * @function setObject + * + * @abstract + * Adds an object to the OSSet if it is not already present. + * + * @param anObject The OSMetaClassBase-derived object to be added to the set. + * + * @result + * true if anObject was successfully + * added to the set, false otherwise + * (including if it was already in the set). + * + * @discussion + * The set adds storage to accomodate the new object, if necessary. + * If successfully added, the object is retained. + * + * A false return value can mean either + * that anObject is already present in the set, + * or that a memory allocation failure occurred. + * If you need to know whether the object + * is already present, use + * @link containsObject containsObject@/link. */ - virtual bool setObject(const OSMetaClassBase *anObject); - /*! - @function merge - @abstract A member function to merge the contents of an OSArray object with set. - @param array The OSArray object which contains the objects to be merged. - @result Returns true if the contents of the OSArray were successfully merged into the receiver. + virtual bool setObject(const OSMetaClassBase * anObject); + + + /*! + * @function merge + * + * @abstract + * Adds the contents of an OSArray to the set. + * + * @param array The OSArray object containing the objects to be added. + * + * @result + * true if any object from array + * was successfully added the receiver, + * false otherwise. + * + * @discussion + * This functions adds to the receiving set + * all objects from array + * that are not already in the set. + * Objects successfully added to the receiver are retained. + * + * A false return value can mean either + * that all the objects in array are already present in the set, + * or that a memory allocation failure occurred. + * If you need to know whether the objects + * are already present, use + * @link containsObject containsObject@/link + * for each object. */ - virtual bool merge(const OSArray *array); - /*! - @function merge - @abstract A member function to merge the contents of an OSSet object with receiver. - @param set The OSSet object which contains the objects to be merged. - @result Returns true if the contents of the OSSet were successfully merged into the receiver. + virtual bool merge(const OSArray * array); + + + /*! + * @function merge + * + * @abstract + * Adds the contents of an OSet to the set. + * + * @param set The OSSet object containing the objects to be added. + * + * @result + * true if any object from set + * was successfully added the receiver, + * false otherwise. + * + * @discussion + * This functions adds to the receiving set + * all objects from set + * that are not already in the receiving set. + * Objects successfully added to the receiver are retained. + * + * A false return value can mean either + * that all the objects in array are already present in the set, + * or that a memory allocation failure occurred. + * If you need to know whether the objects + * are already present, use + * @link containsObject containsObject@/link + * for each object. */ - virtual bool merge(const OSSet *set); + virtual bool merge(const OSSet * set); + - /*! - @function removeObject - @abstract A member function to remove objects from the collection. - @param anObject The OSMetaClassBase derived object to be removed from the collection. - @discussion The object removed from the collection is automatically released. + /*! + * @function removeObject + * + * @abstract + * Removes an object from the set. + * + * @param anObject The OSMetaClassBase-derived object + * to be removed from the set. + * + * @discussion + * The object removed from the set is released. */ virtual void removeObject(const OSMetaClassBase * anObject); - /*! - @function containsObject - @abstract A member function to query the collection for the presence of an object. - @param anObject The OSMetaClassBase derived object to be queried for in the collecion. - @result Returns true if the object is present within the set, false otherwise. + + /*! + * @function containsObject + * + * @abstract + * Checks the set for the presence of an object. + * + * @param anObject The OSMetaClassBase-derived object + * to check for in the set. + * + * @result + * true if anObject is present within the set, + * false otherwise. + * + * @discussion + * Pointer equality is used. + * This function returns false if passed NULL. */ - virtual bool containsObject(const OSMetaClassBase *anObject) const; - /*! - @function member - @abstract A member function to query the collection for the presence of an object. - @param anObject The OSMetaClassBase derived object to be queried for in the collecion. - @result Returns true if the object is present within the set, false otherwise. + virtual bool containsObject(const OSMetaClassBase * anObject) const; + + + /*! + * @function member + * + * @abstract + * Checks the set for the presence of an object. + * + * @param anObject The OSMetaClassBase-derived object + * to check for in the set. + * + * @result + * true if anObject is present + * within the set, false otherwise. + * + * @discussion + * Pointer equality is used. This function returns false + * if passed NULL. + * + * @link containsObject containsObject@/link + * checks for NULL first, + * and is therefore more efficient than this function. */ - virtual bool member(const OSMetaClassBase *anObject) const; - /*! - @function getAnyObject - @abstract A member function which returns an object from the set. - @result Returns an object if one exists within the set. + virtual bool member(const OSMetaClassBase * anObject) const; + + + /*! + * @function getAnyObject + * + * @abstract + * Returns an arbitrary (not random) object from the set. + * + * @result + * An arbitrary (not random) object + * if one exists within the set. + * + * @discussion + * The returned object will be released if removed from the set; + * if you plan to store the reference, you should call + * @link + * //apple_ref/cpp/instm/OSObject/retain/virtualvoid/() + * retain@/link + * on that object. + */ + virtual OSObject * getAnyObject() const; + + + /*! + * @function isEqualTo + * + * @abstract + * Tests the equality of two OSSet objects. + * + * @param aSet The set object being compared against the receiver. + * @result + * true if the two sets are equivalent, + * false otherwise. + * + * @discussion + * Two OSSet objects are considered equal if they have same count + * and the same object pointer values. */ - virtual OSObject *getAnyObject() const; + virtual bool isEqualTo(const OSSet * aSet) const; - /*! - @function isEqualTo - @abstract A member function to test the equality between the receiver and an OSSet object. - @param aSet An OSSet object to be compared against the receiver. - @result Returns true if the objects are equivalent. + + /*! + * @function isEqualTo + * + * @abstract + * Tests the equality of an OSSet against an arbitrary object. + * + * @param anObject The object being compared against the receiver. + * @result + * true if the two objects are equivalent, + * false otherwise. + * + * @discussion + * An OSSet object is considered equal to another object if the other object + * is derived from OSSet and compares equal as a set. */ - virtual bool isEqualTo(const OSSet *aSet) const; - /*! - @function isEqualTo - @abstract A member function to test the equality between the receiver and an unknown object. - @param anObject An object to be compared against the receiver. - @result Returns true if the objects are equal. + virtual bool isEqualTo(const OSMetaClassBase * anObject) const; + + + /*! + * @function serialize + * + * @abstract + * Archives the receiver into the provided + * @link //apple_ref/doc/class/OSSerialize OSSerialize@/link object. + * + * @param serializer The OSSerialize object. + * + * @result + * true if serialization succeeds, false if not. */ - virtual bool isEqualTo(const OSMetaClassBase *anObject) const; + virtual bool serialize(OSSerialize * serializer) const; + - /*! - @function serialize - @abstract A member function which archives the receiver. - @param s The OSSerialize object. - @result Returns true if serialization was successful, false if not. + /*! + * @function setOptions + * + * @abstract + * Recursively sets option bits in the set + * and all child collections. + * + * @param options A bitfield whose values turn the options on (1) or off (0). + * @param mask A mask indicating which bits + * in options to change. + * Pass 0 to get the whole current options bitfield + * without changing any settings. + * @param context Unused. + * + * @result + * The options bitfield as it was before the set operation. + * + * @discussion + * Kernel extensions should not call this function. + * + * Child collections' options are changed only if the receiving set's + * options actually change. */ - virtual bool serialize(OSSerialize *s) const; - - /*! - @function setOptions - @abstract This function is used to recursively set option bits in this set and all child collections. - @param options Set the (options & mask) bits. - @param mask The mask of bits which need to be set, 0 to get the current value. - @result The options before the set operation, NB setOptions(?,0) returns the current value of this collection. - */ - virtual unsigned setOptions(unsigned options, unsigned mask, void * = 0); - - /*! - @function copyCollection - @abstract Do a deep copy of this ordered set. - @discussion This function copies this set and all of included containers recursively. Objects that don't derive from OSContainter are NOT copied, that is objects like OSString and OSData. - @param cycleDict Is a dictionary of all of the collections that have been, to start the copy at the top level just leave this field 0. - @result The newly copied collecton or 0 if insufficient memory + virtual unsigned setOptions(unsigned options, unsigned mask, void * context = 0); + + + /*! + * @function copyCollection + * + * @abstract + * Creates a deep copy of this set and its child collections. + * + * @param cycleDict A dictionary of all of the collections + * that have been copied so far, + * which is used to track circular references. + * To start the copy at the top level, + * pass NULL. + * + * @result + * The newly copied set, with a retain count of 1, + * or NULL if there is insufficient memory to do the copy. + * + * @discussion + * The receiving set, and any collections it contains, + * recursively, are copied. + * Objects that are not derived from OSCollection are retained + * rather than copied. */ OSCollection *copyCollection(OSDictionary *cycleDict = 0); diff --git a/libkern/libkern/c++/OSString.h b/libkern/libkern/c++/OSString.h index deadb3b07..2a5474027 100644 --- a/libkern/libkern/c++/OSString.h +++ b/libkern/libkern/c++/OSString.h @@ -35,138 +35,385 @@ class OSData; -enum { kOSStringNoCopy = 0x00000001 }; /*! - @class OSString - @abstract A container class for managing strings. - @discussion - OSString is a container class for managing arrays of characters. Strings come in two varieties, mutable and immutable. An immutable OSString string is one which was created or initialized with the "NoCopy" functions, all other strings are mutable. When modifying an immutable string, the function called to perform the action will fail. -*/ + * @header + * + * @abstract + * This header declares the OSString container class. + */ + + + /* Not to be included in headerdoc. + * + * For internal use. + */ + enum { kOSStringNoCopy = 0x00000001 }; + + +/*! + * @class OSString + * + * @abstract + * OSString wraps a C string in a C++ object for use in Libkern collections. + * + * @discussion + * OSString is a container class for managing arrays of characters. + * An OSString normally maintains its own character buffer and allows changes, + * but you can create an "immutable" OSString + * that references an external C string + * buffer using the "NoCopy" creator functions. + * Functions called to change the contents of an immutable OSString will fail. + * + * Encodings + * + * OSString makes no provisions for different character encodings and + * assumes that a string is a nul-terminated sequence of single-byte characters. + * User-space code must either assume an encoding (typically ASCII or UTF-8) + * or determine it in some other way (such as an IORegistryEntry property). + * + * Altering Strings + * + * OSString's indended use is as a reference-counted object container + * for a C string and little more. + * While OSString provides full access to the underlying C string, + * it provides little in the way of string object manipulation; + * there are no append or insert functions, + * only a set-character function. + * If you need to manipulate OSStrings, + * it's generally best to get the C strings, + * alter them as necessary, and create a new OSString object + * from the resulting C string. + * + * Use Restrictions + * + * With very few exceptions in the I/O Kit, all Libkern-based C++ + * classes, functions, and macros are unsafe + * to use in a primary interrupt context. + * Consult the I/O Kit documentation related to primary interrupts + * for more information. + * + * OSString provides no concurrency protection; + * it's up to the usage context to provide any protection necessary. + * Some portions of the I/O Kit, such as + * @link //apple_ref/doc/class/IORegistryEntry IORegistryEntry@/link, + * handle synchronization via defined member functions for setting + * properties. + */ class OSString : public OSObject { OSDeclareDefaultStructors(OSString) protected: - unsigned int flags; - unsigned int length; - char *string; + unsigned int flags; + unsigned int length; + char * string; public: - /*! - @function withString - @abstract Static constructor function to create and initialize an instance of OSString from another OSString. - @param aString An OSString object. - @result Returns an instance of OSString or 0 on error. - */ - static OSString *withString(const OSString *aString); - /*! - @function withCString - @abstract Static constructor function to create and initialize an instance of OSString. - @param cString A simple c-string. - @result Returns an instance of OSString or 0 on error. - */ - static OSString *withCString(const char *cString); - /*! - @function withCStringNoCopy - @abstract Static constructor function to create and initialize an instance of OSString but does not copy the original c-string into container. - @param cString A simple c-string. - @result Returns an instance of OSString or 0 on error. - */ - static OSString *withCStringNoCopy(const char *cString); - - /*! - @function initWithString - @abstract Member function to initialize an instance of OSString from another OSString object. - @param aString An OSString object. - @result Returns true on success, false otherwise. - */ - virtual bool initWithString(const OSString *aString); - /*! - @function initWithCString - @abstract Member function to initialize an instance of OSString with a simple c-string. - @param cString A simple c-string. - @result Returns true on success, false otherwise. - */ - virtual bool initWithCString(const char *cString); - /*! - @function initWithCStringNoCopy - @abstract Member function to initialize an instance of OSString with a simple c-string but does not copy the string into the container. - @param cString A simple c-string. - @result Returns true on success, false otherwise. - */ - virtual bool initWithCStringNoCopy(const char *cString); - /*! - @function free - @abstract Releases all resources used by the OSString object. - @discussion This function should not be called directly, use release() instead. + + + /*! + * @function withString + * + * @abstract + * Creates and initializes an OSString from another OSString. + * + * @param aString The OSString object whose contents to copy. + * + * @result + * An instance of OSString representing + * the same characters as aString, + * and with a reference count of 1; + * NULL on failure. + * + * @discussion + * The new OSString is a distinct instance from aString, + * and is not merely the original object + * with the reference count incremented. + * Changes to one will not be reflected in the other. + */ + static OSString * withString(const OSString * aString); + + + /*! + * @function withCString + * + * @abstract + * Creates and initializes an OSString from a C string. + * + * @param cString The C string to copy into the new OSString. + * + * @result + * An instance of OSString representing + * the same characters as aString, + * and with a reference count of 1; + * NULL on failure. + */ + static OSString * withCString(const char * cString); + + + /*! + * @function withCStringNoCopy + * + * @abstract + * Creates and initializes an immutable OSString + * that shares the provided C string buffer. + * + * @param cString The C string to reference. + * + * @result + * An instance of OSString containing cString, + * and with a reference count of 1; + * NULL on failure. + * + * @discussion + * An OSString object created with this function + * does not claim ownership of the C string, + * but shares it with the caller. + * When the caller determines that the OSString object has actually been freed, + * it can safely dispose of the data buffer. + * Conversely, if it frees the shared data buffer, + * it must not attempt to use the OSString object and should release it. + * + * An OSString object created with this function does not + * allow changing the string via @link setChar setChar@/link. + */ + static OSString * withCStringNoCopy(const char * cString); + + + /*! + * @function initWithString + * + * @abstract + * Initializes an OSString from another OSString. + * + * @param aString The OSString object whose contents to copy. + * + * @result + * true on success, false on failure. + * + * @discussion + * Not for general use. Use the static instance creation method + * @link withString withString@/link instead. + */ + virtual bool initWithString(const OSString * aString); + + + /*! + * @function initWithCString + * + * @abstract + * Initializes an OSString from a C string. + * + * @param cString The C string to copy into the new OSString. + * + * @result + * true on success, false on failure. + * + * @discussion + * Not for general use. Use the static instance creation method + * @link withCString withCString@/link instead. + */ + virtual bool initWithCString(const char * cString); + + + /*! + * @function initWithCStringNoCopy + * + * @abstract + * Initializes an immutable OSString + * to share the provided C string buffer. + * + * @param cString The C string to reference. + * + * @result + * true on success, false on failure. + * + * @discussion + * Not for general use. Use the static instance creation method + * @link withCStringNoCopy withCStringNoCopy@/link instead. + * + * An OSString object initialized with this function + * does not claim ownership of the C string, + * but shares it with the caller. + * When the caller determines that the OSString object has actually been freed, + * it can safely dispose of the data buffer. + * Conversely, if it frees the shared data buffer, + * it must not attempt to use the OSString object and should release it. + * + * An OSString object created with this function does not + * allow changing the string via @link setChar setChar@/link. + */ + virtual bool initWithCStringNoCopy(const char * cString); + + + /*! + * @function free + * + * @abstract + * Deallocates or releases any resources + * used by the OSString instance. + * + * @discussion + * This function should not be called directly; + * use + * @link + * //apple_ref/cpp/instm/OSObject/release/virtualvoid/() + * release@/link + * instead. */ virtual void free(); - /*! - @function getLength - @abstract A member function to return the length of the string. - @result Returns the length of the string. + + /*! + * @function getLength + * + * @abstract + * Returns the number of characters in the OSString object. + * + * @result + * The number of characters in the OSString object. */ virtual unsigned int getLength() const; - /*! - @function getChar - @abstract Returns a character at a particular index in the string object. - @param index The index into the string. - @result Returns a character. + + + /*! + * @function getChar + * + * @abstract + * Returns the character at a given index in the string object. + * + * @param index The index into the string. + * + * @result + * The character at index within the string, + * or '\0' if index is past the end of the string. */ virtual char getChar(unsigned int index) const; - /*! - @function setChar - @abstract Replaces a character at a particular index in the string object. - @param index The index into the string. - @result Returns true if the character was successfully replaced or false if the string is immutable or index was beyond the bounds of the character array. + + + /*! + * @function setChar + * + * @abstract + * Replaces a character at a given index in the string object. + * + * @param aChar The character value to set. + * @param index The index into the string. + * + * @result + * true if the character was replaced, + * false if the was created "NoCopy" + * or index is past the end of the string. */ virtual bool setChar(char aChar, unsigned int index); - /*! - @function getCStringNoCopy - @abstract Returns a pointer to the internal c-string array. - @result Returns a pointer to the internal c-string array. - */ - virtual const char *getCStringNoCopy() const; - - /*! - @function isEqualTo - @abstract A member function to test the equality of two OSString objects. - @param aString An OSString object. - @result Returns true if the two strings are equal, false otherwise. - */ - virtual bool isEqualTo(const OSString *aString) const; - /*! - @function isEqualTo - @abstract A member function to test the equality of c-string and the internal string array of the receiving OSString object. - @param aCString A simple c-string. - @result Returns true if the two strings are equal, false otherwise. - */ - virtual bool isEqualTo(const char *aCString) const; - /*! - @function isEqualTo - @abstract A member function to test the equality of an unknown OSObject derived object and the OSString instance. - @param obj An OSObject derived object. - @result Returns true if the two objects are equivalent, false otherwise. - */ - virtual bool isEqualTo(const OSMetaClassBase *obj) const; - /*! - @function isEqualTo - @abstract A member function to test the equality of an unknown OSData object and the OSString instance. - @param obj An OSData object. - @result Returns true if the two objects are equivalent, false otherwise. - */ - virtual bool isEqualTo(const OSData *obj) const; - - /*! - @function serialize - @abstract A member function which archives the receiver. - @param s The OSSerialize object. - @result Returns true if serialization was successful, false if not. - */ - virtual bool serialize(OSSerialize *s) const; + + /*! + * @function getCStringNoCopy + * + * @abstract + * Returns a pointer to the internal C string buffer. + * + * @result + * A pointer to the internal C string buffer. + */ + virtual const char * getCStringNoCopy() const; + + + /*! + * @function isEqualTo + * + * @abstract + * Tests the equality of two OSString objects. + * + * @param aString The OSString object being compared against the receiver. + * + * @result + * true if the two OSString objects are equivalent, + * false otherwise. + * + * @discussion + * Two OSString objects are considered equal if they have same length + * and if their byte buffers hold the same contents. + */ + virtual bool isEqualTo(const OSString * aString) const; + + + /*! + * @function isEqualTo + * + * @abstract + * Tests the equality of an OSString object with a C string. + * + * @param cString The C string to compare against the receiver. + * + * @result + * true if the OSString's characters + * are equivalent to the C string's, + * false otherwise. + */ + virtual bool isEqualTo(const char * cString) const; + + + /*! + * @function isEqualTo + * + * @abstract + * Tests the equality of an OSString object to an arbitrary object. + * + * @param anObject The object to be compared against the receiver. + * + * @result + * Returns true if the two objects are equivalent, + * false otherwise. + * + * @discussion + * An OSString is considered equal to another object + * if that object is derived from OSString + * and contains the equivalent bytes of the same length. + */ + virtual bool isEqualTo(const OSMetaClassBase * anObject) const; + + + /*! + * @function isEqualTo + * + * @abstract + * Tests the equality of an OSData object and the OSString instance. + * + * @param aDataObject An OSData object. + * + * @result + * true if the two objects are equivalent, false otherwise. + * + * @discussion + * This function compares the bytes of the OSData object + * against those of the OSString, + * accounting for the possibility that an OSData + * might explicitly include a nul + * character as part of its total length. + * Thus, for example, an OSData object containing + * either the bytes <'u', 's', 'b', '\0'> + * or <'u', 's', 'b'> + * will compare as equal to the OSString containing "usb". + */ + virtual bool isEqualTo(const OSData * aDataObject) const; + + + /*! + * @function serialize + * + * @abstract + * Archives the receiver into the provided + * @link //apple_ref/doc/class/OSSerialize OSSerialize@/link object. + * + * @param serializer The OSSerialize object. + * + * @result + * true if serialization succeeds, false if not. + */ + virtual bool serialize(OSSerialize * serializer) const; OSMetaClassDeclareReservedUnused(OSString, 0); OSMetaClassDeclareReservedUnused(OSString, 1); diff --git a/libkern/libkern/c++/OSSymbol.h b/libkern/libkern/c++/OSSymbol.h index da3d2534a..5067423a9 100644 --- a/libkern/libkern/c++/OSSymbol.h +++ b/libkern/libkern/c++/OSSymbol.h @@ -26,7 +26,7 @@ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ */ /* IOSymbol.h created by gvdl on Fri 1998-10-30 */ -/* IOSymbol must be created through the factory methods and thus is not subclassable. */ +/* OSSymbol must be created through the factory methods and thus is not subclassable. */ #ifndef _OS_OSSYMBOL_H #define _OS_OSSYMBOL_H @@ -34,11 +34,50 @@ #include /*! - @class OSSymbol - @abstract A container class whose instances represent unique string values. - @discussion - An OSSymbol object represents a unique string value. When creating an OSSymbol, a string is given and an OSSymbol representing this string is created if none exist for this string. If a symbol for this string already exists, then a reference to an existing symbol is returned. -*/ + * @header + * + * @abstract + * This header declares the OSSymbol container class. + */ + +// xx-review: OSSymbol does not override setChar + +/*! + * @class OSSymbol + * + * @abstract + * OSSymbol wraps a C string in a unique C++ object + * for use as keys in Libkern collections. + * + * @discussion + * OSSymbol is a container class for managing uniqued strings, + * for example, those used as dictionary keys. + * Its static instance-creation functions check + * for an existing instance of OSSymbol + * with the requested C string value before creating a new object. + * If an instance already exists in the pool of unique symbols, + * its reference count is incremented + * and the existing instance is returned. + * + * While OSSymbol provides for uniquing of a given string value, + * it makes no effort to enforce immutability of that value. + * Altering the contents of an OSSymbol should be avoided. + * + * Use Restrictions + * + * With very few exceptions in the I/O Kit, all Libkern-based C++ + * classes, functions, and macros are unsafe + * to use in a primary interrupt context. + * Consult the I/O Kit documentation related to primary interrupts + * for more information. + * + * OSSymbol provides no concurrency protection; + * it's up to the usage context to provide any protection necessary. + * Some portions of the I/O Kit, such as + * @link //apple_ref/doc/class/IORegistryEntry IORegistryEntry@/link, + * handle synchronization via defined member functions for setting + * properties. + */ class OSSymbol : public OSString { friend class OSSymbolPool; @@ -48,89 +87,291 @@ class OSSymbol : public OSString private: struct ExpansionData { }; - /*! @var reserved - Reserved for future use. (Internal use only) */ - ExpansionData *reserved; + /* Reserved for future use. (Internal use only) */ + ExpansionData * reserved; static void initialize(); + // xx-review: not in xnu, delete? friend void checkModuleForSymbols(void); /* in catalogue? */ - // The string init methods have to be removed from the inheritance. - virtual bool initWithString(const OSString *aString); - virtual bool initWithCString(const char *cString); + // xx-review: these should be removed from the symbol set. + + /*! + * @function initWithString + * + * @abstract + * Overridden to prevent creation of duplicate symbols. + * + * @param aString Unused. + * + * @result + * false. + * + * @discussion + * Overrides OSString's implementation to prevent creation + * of distinct OSSymbols with the same string value. + */ + virtual bool initWithString(const OSString * aString); + + + /*! + * @function initWithCString + * + * @abstract + * Overridden to prevent creation of duplicate symbols. + * + * @param cString Unused. + * + * @result + * false. + * + * @discussion + * Overrides OSString's implementation to prevent creation + * of distinct OSSymbols with the same string value. + */ + virtual bool initWithCString(const char * cString); + + + /*! + * @function initWithCStringNoCopy + * + * @abstract + * Overridden to prevent creation of duplicate symbols. + * + * @param cString Unused. + * + * @result + * false. + * + * @discussion + * Overrides OSString's implementation to prevent creation + * of distinct OSSymbols with the same string value. + */ virtual bool initWithCStringNoCopy(const char *cString); protected: - /*! - @function taggedRelease - @abstract Overriden super class release method so we can synchronise with the symbol pool. - @discussion When we release an symbol we need to synchronise the destruction of the object with any potential searches that may be occuring through the family factor methods. See OSObject::taggedRelease + +// xx-review: should we just omit this from headerdoc? + /*! + * @function taggedRelease + * + * @abstract + * Overrides + * @link + * //apple_ref/cpp/instm/OSObject/taggedRelease/virtualvoid/(constvoid*,constint) + * OSObject::taggedRelease(const void *, const int)@/link + * to synchronize with the symbol pool. + * + * @param tag Used for tracking collection references. + * @param freeWhen If decrementing the reference count makes it + * >= freeWhen, the object is immediately freed. + * + * @discussion + * Because OSSymbol shares instances, the reference-counting functions + * must synchronize access to the class-internal tables + * used to track those instances. */ - virtual void taggedRelease(const void *tag, const int when) const; + virtual void taggedRelease( + const void * tag, + const int freeWhen) const; - /*! - @function free - @abstract Overriden super class release method so we can synchronise with the symbol pool. - @discussion When we release an symbol we need to synchronise the destruction of the object with any potential searches that may be occuring through the family factor methods. See OSObject::free + +// xx-review: should we just omit this from headerdoc? + /*! + * @function free + * + * @abstract + * Overrides + * @link + * //apple_ref/cpp/instm/OSObject/free/virtualvoid/() + * OSObject::free@/link + * to synchronize with the symbol pool. + * + * @discussion + * Because OSSymbol shares instances, the reference-counting functions + * must synchronize access to the class-internal tables + * used to track those instances. */ virtual void free(); public: - /*! - @function taggedRelease - @abstract Release a tag. - @discussion The C++ language has forced me to override this method even though I have implemented it as { super::taggedRelease(tag) }. It seems that C++ is confused about the appearance of the protected taggedRelease with 2 args and refuses to only inherit one function. See OSObject::taggedRelease + +// xx-review: should we just omit this from headerdoc? + /*! + * @function taggedRelease + * + * @abstract + * Overrides + * @link + * //apple_ref/cpp/instm/OSObject/taggedRelease/virtualvoid/(constvoid*) + * OSObject::taggedRelease(const void *)@/link + * to synchronize with the symbol pool. + * + * @param tag Used for tracking collection references. + * + * @discussion + * Because OSSymbol shares instances, the reference-counting functions + * must synchronize access to the class-internal tables + * used to track those instances. */ - virtual void taggedRelease(const void *tag) const; + + /* Original note (not for headerdoc): + * The C++ language has forced me to override this method + * even though I have implemented it as + * { super::taggedRelease(tag) }. + * It seems that C++ is confused about the appearance of the protected + * taggedRelease with 2 parameters and refuses to only inherit one function. + * See + * @link + * //apple_ref/cpp/instm/OSObject/taggedRelease/virtualvoid/(constvoid*,constint) + * OSObject::taggedRelease(const void *, const int)@/link. + */ + virtual void taggedRelease(const void * tag) const; - /*! - @function withString - @abstract A static constructor function to create an OSSymbol instance from an OSString object or returns an existing OSSymbol object based on the OSString object given. - @param aString An OSString object. - @result Returns a unique OSSymbol object for the string given. + /*! + * @function withString + * + * @abstract + * Returns an OSSymbol created from an OSString, + * or the existing unique instance of the same value. + * + * @param aString The OSString object to look up or copy. + * + * @result + * An instance of OSSymbol + * representing the same characters as aString; + * NULL on failure. + * + * @discussion + * This function creates or returns the unique OSSymbol instance + * representing the string value of aString. + * You can compare it with other OSSymbols using the == operator. + * + * OSSymbols are reference-counted normally. + * This function either returns a + * new OSSymbol with a retain count of 1, + * or increments the retain count of the existing instance. */ - static const OSSymbol *withString(const OSString *aString); - /*! - @function withCString - @abstract A static constructor function to create an OSSymbol instance from a simple c-string returns an existing OSSymbol object based on the string object given. - @param cString A c-string. - @result Returns a unique OSSymbol object for the string given. + static const OSSymbol * withString(const OSString * aString); + + + /*! + * @function withCString + * + * @abstract + * Returns an OSSymbol created from a C string, + * or the existing unique instance of the same value. + * + * @param cString The C string to look up or copy. + * + * @result + * An instance of OSSymbol representing + * the same characters as cString; + * NULL on failure. + * + * @discussion + * This function returns the unique OSSymbol instance + * representing the string value of cString. + * You can compare it with other OSSymbols using the == operator. + * + * OSSymbols are reference-counted normally. + * This function either returns a + * new OSSymbol with a retain count of 1, + * or increments the retain count of the existing instance. */ - static const OSSymbol *withCString(const char *cString); - /*! - @function withCStringNoCopy - @abstract A static constructor function to create an OSSymbol instance from a simple c-string, but does not copy the string to the container. - @param cString A c-string. - @result Returns a unique OSSymbol object for the string given. + static const OSSymbol * withCString(const char * cString); + + + /*! + * @function withCStringNoCopy + * + * @abstract + * Returns an OSSymbol created from a C string, + * without copying that string, + * or the existing unique instance of the same value. + * + * @param cString The C string to look up or use. + * @result + * An instance of OSSymbol representing + * the same characters as cString; + * NULL. + * + * @discussion + * Avoid using this function; + * OSSymbols should own their internal string buffers. + * + * This function returns the unique OSSymbol instance + * representing the string value of cString. + * You can compare it with other OSSymbols using the == operator. + * + * OSSymbols are reference-counted normally. + * This function either returns a + * new OSSymbol with a retain count of 1, + * or increments the retain count of the existing instance. */ - static const OSSymbol *withCStringNoCopy(const char *cString); + static const OSSymbol * withCStringNoCopy(const char * cString); - /*! - @function isEqualTo - @abstract A member function which tests the equality between two OSSymbol objects. Two OSSymbol objects are only equivalent when their references are identical - @param aSymbol The OSSymbol object to be compared against the receiver. - @result Returns true if the two objects are equivalent, false otherwise. + + /*! + * @function isEqualTo + * + * @abstract + * Tests the equality of two OSSymbol objects. + * + * @param aSymbol The OSSymbol object being compared against the receiver. + * + * @result + * true if the two OSSymbol objects are equivalent, + * false otherwise. + * + * @discussion + * Two OSSymbol objects are considered equal if they have the same address; + * that is, this function is equivalent to the == operator. */ - virtual bool isEqualTo(const OSSymbol *aSymbol) const; - /*! - @function isEqualTo - @abstract A member function which tests the equality between an OSSymbol object and a simple c-string. - @param aCString The c-string to be compared against the receiver. - @result Returns true if the OSSymbol's internal string representation is equivalent to the c-string it is being compared against, false otherwise. + virtual bool isEqualTo(const OSSymbol * aSymbol) const; + + + /*! + * @function isEqualTo + * + * @abstract Tests the equality of an OSSymbol object with a C string. + * + * @param cString The C string to compare against the receiver. + * + * @result + * true if the OSSymbol's characters + * are equivalent to the C string's, + * false otherwise. */ - virtual bool isEqualTo(const char *aCString) const; - /*! - @function isEqualTo - @abstract A member function which tests the equality between an OSSymbol object and and arbitrary OSObject derived object. - @param obj The OSObject derived object to be compared against the receiver. - @result Returns true if the OSSymbol and the OSObject objects are equivalent. + virtual bool isEqualTo(const char * cString) const; + + + /*! + * @function isEqualTo + * + * @abstract Tests the equality of an OSSymbol object to an arbitrary object. + * + * @param anObject The object to be compared against the receiver. + * @result Returns true if the two objects are equivalent, + * false otherwise. + * + * @discussion + * An OSSymbol is considered equal to another object + * if that object is derived from + * @link //apple_ref/doc/class/OSMetaClassBase OSString@/link + * and contains the equivalent bytes of the same length. */ - virtual bool isEqualTo(const OSMetaClassBase *obj) const; + virtual bool isEqualTo(const OSMetaClassBase * anObject) const; + - /* OSRuntime only INTERNAL API - DO NOT USE */ - static void checkForPageUnload(void *startAddr, void *endAddr); + /* OSRuntime only INTERNAL API - DO NOT USE */ + /* Not to be included in headerdoc. */ + // xx-review: this should be removed from the symbol set. + static void checkForPageUnload( + void * startAddr, + void * endAddr); OSMetaClassDeclareReservedUnused(OSSymbol, 0); diff --git a/libkern/libkern/c++/OSUnserialize.h b/libkern/libkern/c++/OSUnserialize.h index 52c4d04ea..073fb86c6 100644 --- a/libkern/libkern/c++/OSUnserialize.h +++ b/libkern/libkern/c++/OSUnserialize.h @@ -35,13 +35,37 @@ class OSObject; class OSString; -/*! @function OSUnserializeXML - @abstract Recreates an OS Container object from its previously serialized OS Container class instance data. - @param buffer pointer to buffer containing XML data representing the object to be recreated. - @param errorString if this is a valid pointer and the XML parser finds a error in buffer, errorString contains text indicating the line number and type of error encountered. - @result Pointer to the recreated object, or zero on failure. */ +/*! + * @header + * + * @abstract + * This header declares the OSUnserializeXML function. + */ + -extern OSObject* OSUnserializeXML(const char *buffer, OSString **errorString = 0); +/*! + * @function OSUnserializeXML + * + * @abstract + * Recreates an OSContainer object + * from its previously serialized OSContainer class instance data. + * + * @param buffer A buffer containing nul-terminated XML data + * representing the object to be recreated. + * @param errorString If non-
    NULL
    , and the XML parser + * finds an error in buffer, + * *errorString indicates the line number + * and type of error encountered. + * + * @result + * The recreated object, or NULL on failure. + * + * @discussion + * Not safe to call in a primary interrupt handler. + */ +extern OSObject * OSUnserializeXML( + const char * buffer, + OSString ** errorString = 0); #ifdef __APPLE_API_OBSOLETE extern OSObject* OSUnserialize(const char *buffer, OSString **errorString = 0); diff --git a/libkern/libkern/crypto/sha1.h b/libkern/libkern/crypto/sha1.h index 8ecb9e2f7..9acc46b91 100644 --- a/libkern/libkern/crypto/sha1.h +++ b/libkern/libkern/crypto/sha1.h @@ -60,7 +60,7 @@ typedef struct sha1_ctxt { extern void SHA1Init(SHA1_CTX *); extern void SHA1Update(SHA1_CTX *, const void *, size_t); -extern void SHA1UpdateUsePhysicalAddress(SHA1_CTX *context, const void *inpp, size_t inputLen); +extern void SHA1UpdateUsePhysicalAddress(SHA1_CTX *, const void *, size_t); extern void SHA1Final(void *, SHA1_CTX *); #ifdef __cplusplus diff --git a/libkern/libkern/i386/Makefile b/libkern/libkern/i386/Makefile index 3fc89f6c4..728ba4601 100644 --- a/libkern/libkern/i386/Makefile +++ b/libkern/libkern/i386/Makefile @@ -8,16 +8,16 @@ include $(MakeInc_def) INSTINC_SUBDIRS = -INSTINC_SUBDIRS_PPC = - INSTINC_SUBDIRS_I386 = -EXPINC_SUBDIRS = ${INSTINC_SUBDIRS} +INSTINC_SUBDIRS_X86_64 = -EXPINC_SUBDIRS_PPC = ${INSTINC_SUBDIRS_PPC} +EXPINC_SUBDIRS = ${INSTINC_SUBDIRS} EXPINC_SUBDIRS_I386 = ${INSTINC_SUBDIRS_I386} +EXPINC_SUBDIRS_X86_64 = ${INSTINC_SUBDIRS_X86_64} + DATAFILES = \ OSByteOrder.h \ _OSByteOrder.h diff --git a/libkern/libkern/i386/_OSByteOrder.h b/libkern/libkern/i386/_OSByteOrder.h index d84ce8f32..51477bb61 100644 --- a/libkern/libkern/i386/_OSByteOrder.h +++ b/libkern/libkern/i386/_OSByteOrder.h @@ -56,11 +56,25 @@ _OSSwapInt32( __uint32_t _data ) { +#if defined(__llvm__) + return __builtin_bswap32(_data); +#else __asm__ ("bswap %0" : "+r" (_data)); return _data; +#endif +} + +#if defined(__llvm__) +__DARWIN_OS_INLINE +__uint64_t +_OSSwapInt64( + __uint64_t _data +) +{ + return __builtin_bswap64(_data); } -#if defined(__i386__) +#elif defined(__i386__) __DARWIN_OS_INLINE __uint64_t _OSSwapInt64( diff --git a/libkern/libkern/kernel_mach_header.h b/libkern/libkern/kernel_mach_header.h new file mode 100644 index 000000000..03e95969f --- /dev/null +++ b/libkern/libkern/kernel_mach_header.h @@ -0,0 +1,110 @@ +/* + * Copyright (c) 2000-2008 Apple Inc. All rights reserved. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. The rights granted to you under the License + * may not be used to create, or enable the creation or redistribution of, + * unlawful or unlicensed copies of an Apple operating system, or to + * circumvent, violate, or enable the circumvention or violation of, any + * terms of an Apple operating system software license agreement. + * + * Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ + */ +/* + * File: libkern/kernel_mach_header.h + * + * Definitions for accessing mach-o headers. + * + * NOTE: These functions work on Mach-O headers compatible with + * the currently running kernel, and cannot be used against mach + * headers other than that of the currently running kernel. + * + */ + +#ifndef _KERNEL_MACH_HEADER_ +#define _KERNEL_MACH_HEADER_ + +#ifdef __cplusplus +extern "C" { +#endif + +#include +#include + +#if !KERNEL +#error this header for kernel use only +#endif + +#if defined(__LP64__) + +typedef struct mach_header_64 kernel_mach_header_t; +typedef struct segment_command_64 kernel_segment_command_t; +typedef struct section_64 kernel_section_t; + +#define LC_SEGMENT_KERNEL LC_SEGMENT_64 +#define SECT_CONSTRUCTOR "__mod_init_func" +#define SECT_DESTRUCTOR "__mod_term_func" + +#else + +typedef struct mach_header kernel_mach_header_t; +typedef struct segment_command kernel_segment_command_t; +typedef struct section kernel_section_t; + +#define LC_SEGMENT_KERNEL LC_SEGMENT +#define SECT_CONSTRUCTOR "__constructor" +#define SECT_DESTRUCTOR "__destructor" + +#endif + +extern kernel_mach_header_t _mh_execute_header; + +vm_offset_t getlastaddr(void); + +kernel_segment_command_t *firstseg(void); +kernel_segment_command_t *firstsegfromheader(kernel_mach_header_t *header); +kernel_segment_command_t *nextsegfromheader( + kernel_mach_header_t *header, + kernel_segment_command_t *seg); +kernel_segment_command_t *getsegbyname(const char *seg_name); +kernel_segment_command_t *getsegbynamefromheader( + kernel_mach_header_t *header, + const char *seg_name); +void *getsegdatafromheader(kernel_mach_header_t *, const char *, unsigned long *); +kernel_section_t *getsectbyname(const char *seg_name, const char *sect_name); +kernel_section_t *getsectbynamefromheader( + kernel_mach_header_t *header, + const char *seg_name, + const char *sect_name); +void *getsectdatafromheader(kernel_mach_header_t *, const char *, const char *, unsigned long *); +kernel_section_t *firstsect(kernel_segment_command_t *sgp); +kernel_section_t *nextsect(kernel_segment_command_t *sgp, kernel_section_t *sp); + +#if MACH_KDB +boolean_t getsymtab(kernel_mach_header_t *header, + vm_offset_t *symtab, + int *nsyms, + vm_offset_t *strtab, + vm_size_t *strtabsize); +#endif + +#ifdef __cplusplus +} +#endif + +#endif /* _KERNEL_MACH_HEADER_ */ diff --git a/osfmk/mach/kext_panic_report.h b/libkern/libkern/kext_panic_report.h similarity index 100% rename from osfmk/mach/kext_panic_report.h rename to libkern/libkern/kext_panic_report.h diff --git a/libkern/libkern/kext_request_keys.h b/libkern/libkern/kext_request_keys.h new file mode 100644 index 000000000..aa5c1da4a --- /dev/null +++ b/libkern/libkern/kext_request_keys.h @@ -0,0 +1,449 @@ +/* + * Copyright (c) 2008 Apple Inc. All rights reserved. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. The rights granted to you under the License + * may not be used to create, or enable the creation or redistribution of, + * unlawful or unlicensed copies of an Apple operating system, or to + * circumvent, violate, or enable the circumvention or violation of, any + * terms of an Apple operating system software license agreement. + * + * Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ + */ + +#ifndef _LIBKERN_KEXT_REQUEST_KEYS_H +#define _LIBKERN_KEXT_REQUEST_KEYS_H + +#ifdef __cplusplus +extern "C" { +#endif /* __cplusplus */ + +/********************************************************************* +* This file defines keys (and values) for properties in kext_request +* collections and mkext archives used for loading in the kernel. +* An incoming request is always a serialized XML plist with at least +* a predicate, and optionally a dictionary of arguments. +* +* Some requests generate serialized XML plist responses, while others +* return raw data. See the predicate description for more. +* +* All of these keys are strictly for internal run-time communication +* between IOKitUser's OSKext module and xnu's OSKext class. +* Keys and values may change at any time without notice. +*********************************************************************/ + +#if PRAGMA_MARK +/********************************************************************/ +#pragma mark Top-Level Request Properties +/********************************************************************/ +#endif + +/* The Predicate Key + * The value of this key indicates the operation to perform or the + * information desired. + */ +#define kKextRequestPredicateKey "Kext Request Predicate" + +/* The Arguments Key + * The value of this key is a dictionary containing the arguments + * for the request. + */ +#define kKextRequestArgumentsKey "Kext Request Arguments" + +#if PRAGMA_MARK +/********************************************************************/ +#pragma mark Request Predicates - User-Space to Kernel +/********************************************************************/ +#endif + +/********************************************************************* + * Nonprivileged requests from user -> kernel + * + * These requests do not require a privileged host port, as they just + * return information about loaded kexts. + **********/ + +/* Predicate: Get Loaded Kext Info + * Argument: (None) + * Response: An array of information about loaded kexts (see OSKextLib.h). + * Op result: OSReturn indicating any errors in processing (see OSKextLib.h) + * + * Retrieves an array of dictionaries whose properties describe every kext + * loaded at the time of the call. + */ +#define kKextRequestPredicateGetLoaded "Get Loaded Kext Info" + +/* Predicate: Get Kernel Link State + * Argument: None + * Response: Raw bytes + length containing the link state of the kernel. + * Op result: OSReturn indicating any errors in processing (see OSKextLib.h) + * + * Retrieves the link state of the running kernel for use in generating + * debug symbols in user space. + * + * xxx - Should this allow retrieval of any kext's link state (maybe for + * xxx - debugging)? Could rename "Get Kext Link State" and take a bundle ID + * xxx - or none for kernel, just like with "Get Kext UUID". + */ +#define kKextRequestPredicateGetKernelLinkState "Get Kernel Link State" + +/* Predicate: Get Kernel Load Address + * Argument: None + * Response: OSNumber containing kernel load address. + * Op result: OSReturn indicating any errors in processing (see OSKextLib.h) + * + * Retrieves the base load address of the running kernel for use in generating + * debug symbols in user space. + */ +#define kKextRequestPredicateGetKernelLoadAddress "Get Kernel Load Address" + +/* Predicate: Get All Load Requests + * Argument: None + * Response: A set of bundle identifiers of all requested kext loads.. + * Op result: OSReturn indicating any errors in processing (see OSKextLib.h) + * + * Retrieves the bundle identifiers of all kexts that were requested to be + * loaded since power on. + * + */ +#define kKextRequestPredicateGetAllLoadRequests "Get All Load Requests" + + +/********************************************************************* + * Privileged requests from user -> kernel + * + * These requests all do something with kexts in the kernel or to + * the OSKext system overall. The user-space caller of kext_request() + * must have access to a privileged host port or these requests result + * in an op_result of kOSKextReturnNotPrivileged. + **********/ + +/* Predicate: Get Kernel Requests + * Argument: (None) + * Response: An array of kernel requests (see below). + * Op result: OSReturn indicating any errors in processing (see OSKextLib.h) + * + * Retrieve the list of deferred load (and other) requests from OSKext. + * This predicate is reserved for kextd, and we may be enforcing access + * to the kextd process only. + */ +#define kKextRequestPredicateGetKernelRequests "Get Kernel Requests" + +/* Predicate: Load + * Argument: kKextRequestArgumentLoadRequestsKey + * Response: None (yet, may become an array of log message strings) + * Op result: OSReturn indicating processing/load+start result (see OSKextLib.h) + * + * Load one or more kexts per the load requests in the arguments dict. + * See kKextRequestArgumentLoadRequestsKey for more info. + */ +#define kKextRequestPredicateLoad "Load" + +/* Predicate: Start + * Argument: kKextRequestArgumentBundleIdentifierKey (CFBundleIdentifier) + * Response: None (yet, may become an array of log message strings) + * Op result: OSReturn indicating start result (see OSKextLib.h) + * + * Start a kext by bundle id. If it's already started, returns success. + * If a kext's dependencies need to be started, they are also started. + */ +#define kKextRequestPredicateStart "Start" + +/* Predicate: Stop + * Argument: kKextRequestArgumentBundleIdentifierKey (CFBundleIdentifier) + * Response: None (yet, may become an array of log message strings) + * Op result: OSReturn indicating stop result (see OSKextLib.h) + * + * Stop a kext by bundle id if it can be stoppoed. + * If it's already stopped, returns success. + * Does not attempt to stop dependents; that will return an error. + */ +#define kKextRequestPredicateStop "Stop" + +/* Predicate: Unload + * Argument: kKextRequestArgumentBundleIdentifierKey (CFBundleIdentifier) + * Response: None (yet, may become an array of log message strings) + * Op result: OSReturn indicating stop+unload result (see OSKextLib.h) + * + * Stop and unload a kext by bundle id if it can be. + * Does not attempt to stop dependents; that will return an error. + */ +#define kKextRequestPredicateUnload "Unload" + +#if PRAGMA_MARK +/********************************************************************/ +#pragma mark Requests Predicates - Kernel to User Space (kextd) +/********************************************************************/ +#endif +/* Predicate: Send Resource + * Argument: kKextRequestArgumentRequestTagKey + * Argument: kKextRequestArgumentBundleIdentifierKey + * Argument: kKextRequestArgumentNameKey + * Argument: kKextRequestArgumentValueKey + * Argument: kKextRequestArgumentResult + * Response: None + * Op result: OSReturn indicating result (see OSKextLib.h) + * + * Retrieves a resource file from a kext bundle. The callback corresponding + * to the request will be invoked. + */ +#define kKextRequestPredicateSendResource "Send Resource" + +/********************************************************************* + * Kernel Requests: from the kernel or loaded kexts up to kextd + * + * These requests come from within the kernel, and kextd retrieves + * them using kKextRequestPredicateGetKernelRequests. + **********/ + +/* Predicate: Kext Load Request + * Argument: kKextRequestArgumentBundleIdentifierKey + * Response: Asynchronous via a kKextRequestPredicateLoad from kextd + * Op result: OSReturn indicating result (see OSKextLib.h) + * + * Requests that kextd load the kext with the given identifier. + * When kexts loads the kext, it informs the IOCatalogue of the load. + * If the kext cannot be loaded, kextd or OSKext removes its personalities + * from the kernel. + */ +#define kKextRequestPredicateRequestLoad "Kext Load Request" + +/* Predicate: Prelinked Kernel Request + * Argument: None + * Response: None + * Op result: OSReturn indicating result (see OSKextLib.h) + * + * Notifies kextd that the kernel we booted from was not prelinked, therefore + * that kextd should try to create a prelinked kernel now. + */ +#define kKextRequestPredicateRequestPrelink "Kext Prelinked Kernel Request" + +/* Predicate: Kext Resource Request + * Argument: kKextRequestArgumentRequestTagKey + * Argument: kKextRequestArgumentBundleIdentifierKey + * Argument: kKextRequestArgumentNameKey + * Response: Asynchronous via a kKextRequestPredicateSendResource from kextd + * Op result: OSReturn indicating result (see OSKextLib.h) + * + * Requests a resource file from a kext bundle by identifier + filename. + */ +#define kKextRequestPredicateRequestResource "Kext Resource Request" + +/* Predicate: Kext Kextd Exit Request + * Argument: None + * Response: None + * Op result: OSReturn indicating result (see OSKextLib.h) + * + * Requests kextd exit for system shutdown. + */ +#define kKextRequestPredicateRequestKextdExit "Kextd Exit" + +#if PRAGMA_MARK +/********************************************************************/ +#pragma mark - +#pragma mark Generic Request Arguments +/********************************************************************/ +#endif +/* Argument: Kext Load Requests + * Type: Array of dictionaries (see Load Request Arguments below) + * Used by: kKextRequestPredicateLoad + * + * An array of dictionaries, each describing a single load operation to + * be performed with its options. A kext load request is effectively a + * nested series requests. Currently only one load request is embedded + * in a user-space Load request, so the result is unambiguous. We might + * change this, specifically for kextd, to allow all pending kernel + * load requests to be rolled up into one blob. Might not be much win + * in that, however. The nested logic makes the code difficult to read. + */ +#define kKextRequestArgumentLoadRequestsKey "Kext Load Requests" + +/* Argument: CFBundleIdentifier + * Type: String + * Used by: several + * + * Any request that takes a bundle identifier uses this key. + */ +#define kKextRequestArgumentBundleIdentifierKey "CFBundleIdentifier" + +/* Argument: OSReturn + * Type: Number (OSReturn) + * Used by: several + * + * Contains the OSReturn/kern_return_t result of the request. + */ +#define kKextRequestArgumentResultKey "Kext Request Result Code" + +/* Argument: Value + * Type: Varies with the predicate + * Used by: several + * + * Used for all the Set-Enabled predicates, and also for Send Resource (OSData). + */ +#define kKextRequestArgumentValueKey "Value" + +/* Argument: Filename + * Type: String + * Used by: kKextRequestPredicateSendResource + * + * Names the resource being sent to the kext + */ +#define kKextRequestArgumentNameKey "Name" + +/* Argument: Filename + * Type: Data + * Used by: kKextRequestPredicateSendResource + * + * Contains the contents of the resource file being sent. + */ +#define kKextRequestArgumentFileContentsKey "File Contents" + +/* Argument: Delay Autounload + * Type: Boolean + * Default: false + * + * Normally all kexts loaded are subject to normal autounload behavior: + * when no OSMetaClass instances remain for a kext that defines an IOService + * subclass, or when a non-IOService kext turns on autounload and its reference + * count drops to zero (external) references. + * + * Setting this property to true in a load request makes the kext being loaded + * skip ONE autounload pass, giving about an extra minute of time before the + * kext is subject to autounload. This is how kextutil(8) to delays autounload + * so that there's more time to set up a debug session. + * + * Setting this property in any other request causes OSKext::considerUnloads() + * to be called before processing the request, ensuring a window where kexts + * will not be unloaded. The user-space kext library uses this so that it can + * avoid including kexts that are already loaded in a load request. + */ +#define kKextRequestArgumentDelayAutounloadKey "Delay Autounload" + +#if PRAGMA_MARK +#pragma mark Load Request Arguments +#endif + +/********************************************************************* + * Kext Load Request Properties + * + * In addition to a bundle identifier, load requests can contain + * these optional keys. + * + * These properties are used primarily by kextutil(8) to alter default + * load behavior, but the OSKext user-level library makes them all + * available in OSKextLoadWithOptions(). + **********/ + +/* Argument: StartExclude + * Type: Integer, corresponding to OSKextExcludeLevel + * Default: kOSKextExcludeNone if not specified + * + * Normally all kexts in the load list for a load request are started. + * This property is used primarily by kextutil(8) to delay start of + * either the primary kext, or the whole load list (any that weren't + * already loaded & started). + */ +#define kKextKextRequestArgumentStartExcludeKey "Start Exclude Level" + +/* Argument: Start Matching Exclude Level + * Type: Integer, corresponding to OSKextExcludeLevel + * Default: kOSKextExcludeAll if not specified + * + * Normally no personalities are sent to the IOCatalogue for a regular + * kext load; the assumption is that they are already there and triggered + * the load request in the first place. + * + * This property is used primarily by kextutil(8) to delay matching for + * either the primary kext, or the whole load list (any that didn't + * already have personalities in the IOCatalogue). + */ +#define kKextRequestArgumentStartMatchingExcludeKey "Start Matching Exclude Level" + +// see also Delay Autounload + +/* Argument: Personality Names + * Type: Array of strings + * Default: All personalities are used + * + * Normally when personalities are sent to the IOCatalogue, they are all sent. + * This property restricts the personalities sent, for the primary kext + * being loaded, to those named. Personalities for dependencies are all sent, + * and there is currently no mechanism to filter them. + * + * This property is used primarily by kextutil(8) to help debug matching + * problems. + */ +#define kKextRequestArgumentPersonalityNamesKey "Personality Names" + +#if PRAGMA_MARK +#pragma mark Unload Request Arguments +#endif + +/* Argument: Terminate + * Type: Boolean + * Default: false + * + * An unload request may optionally specify via this key that all IOService + * objects are to be terminated before attempting to unload. Kexts with + * dependents will not attempt to terminate and will return kOSKextReturnInUse. + */ +#define kKextRequestArgumentTerminateIOServicesKey "Terminate IOServices" + +#if PRAGMA_MARK +#pragma mark Internal Tracking Properties +#endif +/********************************************************************* + * Internal Tracking Properties + **********/ + +/* Argument: Request Tag + * Type: Number (uint32_t) + * Used by: internal tracking for requests with callbacks + * + * Most requests to get resources (files) use this. + */ +#define kKextRequestArgumentRequestTagKey "Request Tag" + +/* Argument: Request Callback + * Type: Data (pointer) + * Used by: internal tracking + * + * Most requests to get resources (files) use this. + */ +#define kKextRequestArgumentCallbackKey "Request Callback" + +/* Argument: Request context. + * Type: OSData (wraps a void *) + * Used by: several + */ +#define kKextRequestArgumentContextKey "Context" + +/* Argument: Request Stale + * Type: Boolean + * Used by: internal tracking + * + * _OSKextConsiderUnloads sets this on any callback record lacking + * it, and deletes any callback record that has it. + */ +#define kKextRequestStaleKey "Request Stale" + +#ifdef __cplusplus +}; +#endif /* __cplusplus */ + +#endif /* _LIBKERN_KEXT_REQUEST_KEYS_H */ diff --git a/libkern/libkern/kxld.h b/libkern/libkern/kxld.h new file mode 100644 index 000000000..6fa11e422 --- /dev/null +++ b/libkern/libkern/kxld.h @@ -0,0 +1,127 @@ +/* + * Copyright (c) 2007-2008 Apple Inc. All rights reserved. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. The rights granted to you under the License + * may not be used to create, or enable the creation or redistribution of, + * unlawful or unlicensed copies of an Apple operating system, or to + * circumvent, violate, or enable the circumvention or violation of, any + * terms of an Apple operating system software license agreement. + * + * Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ + */ +#ifndef _KXLD_H +#define _KXLD_H + +#include +#include // boolean_t +#include // kern_return_t +#include // cpu_type_t and cpu_subtype_t +#include + +#include "kxld_types.h" + +/******************************************************************************* +* API +*******************************************************************************/ + +/******************************************************************************* +* Creates a state object for the linker. A context must be created for each +* link thread and destroyed at the end of the thread's life. A context should +* be reused for all links occuring in that link thread. +* context Returns a pointer to the new context object +* allocate callback Callback to allocate memory for the linked kext +* log_callback Callback for all kxld logging output +* flags Flags to control the behavior of kxld +* cputype The target arch's CPU type (0 for host arch) +* cpusubtype The target arch's CPU subtype (0 for host subtype) +*******************************************************************************/ +kern_return_t kxld_create_context( + KXLDContext **context, + KXLDAllocateCallback allocate_callback, + KXLDLoggingCallback log_callback, + KXLDFlags flags, + cpu_type_t cputype, + cpu_subtype_t cpusubtype) + __attribute__((nonnull(1,2),visibility("default"))); + +/******************************************************************************* +* Destroys a link context and frees all associated memory. Should be called at +* the end of a link thread's life. +*******************************************************************************/ +void kxld_destroy_context( + KXLDContext *context) + __attribute__((nonnull,visibility("default"))); + +/******************************************************************************* +* Links a kext against its dependencies, using a callback to allocate the memory +* at which it will be located. +* NOTE: The object data itself must be mmapped with PROT_WRITE and MAP_PRIVATE +* context The KXLDContext object for the current link thread. +* file The kext object file read into memory. +* Supported formats: Mach-O, Mach-O64, Fat. +* size The size of the kext in memory. Must be nonzero. +* name The name, usually the bundle identifier, of the kext +* callback_data Data that is to be passed to the callback functions. +* deps An array of pointers to the link state of kexts upon +* which this kext is dependent. +* ndeps Number of entries in the 'deps' array. +* linked_object If this is not null, it will be set to the address of +* the linked kext object. If the address provided by the +* kxld_alloc_callback is considered writable, this pointer +* will be set to that address. Otherwise, the linked +* object will be written to a temporary buffer that should +* be freed by the caller. +* kmod_info_kern Kernel address of the kmod_info_t structure. +* link_state If this is not null, it will be set to the address of a +* block of memory that contains state generated by the +* linking process for use by links of dependent kexts. +* The link state object is serialized and can be written +* directly to disk. This memory should be freed by the +* caller when no longer needed. +* link_state_size The size of the returned link state buffer. +* symbol_file If this is not null, it will be set to the address of a +* buffer containing a Mach-O symbol file that may be +* written to disk. This should be freed by the caller +* when no longer needed. +* Note: symbol files are never generated in the kernel +* symbol_file_size The size of the returned symbol file buffer. +*******************************************************************************/ +kern_return_t kxld_link_file( + KXLDContext *context, + u_char *file, + u_long size, + const char *name, + void *callback_data, + u_char **deps, + u_int ndeps, + u_char **linked_object, + kxld_addr_t *kmod_info_kern, + u_char **link_state, + u_long *link_state_size, + u_char **symbol_file, + u_long *symbol_file_size) + __attribute__((nonnull(1, 2), visibility("default"))); + +/******************************************************************************* +*******************************************************************************/ +boolean_t kxld_validate_copyright_string(const char *str) + __attribute__((pure, nonnull, visibility("default"))); + +#endif // _KXLD_H_ + diff --git a/libkern/libkern/kxld_types.h b/libkern/libkern/kxld_types.h new file mode 100644 index 000000000..cd7153c8b --- /dev/null +++ b/libkern/libkern/kxld_types.h @@ -0,0 +1,156 @@ +/* + * Copyright (c) 2007-2008 Apple Inc. All rights reserved. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. The rights granted to you under the License + * may not be used to create, or enable the creation or redistribution of, + * unlawful or unlicensed copies of an Apple operating system, or to + * circumvent, violate, or enable the circumvention or violation of, any + * terms of an Apple operating system software license agreement. + * + * Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ + */ +#ifndef _KXLD_TYPES_H +#define _KXLD_TYPES_H + +#include +#include +#include + +/******************************************************************************* +* Macros +*******************************************************************************/ + +/* For 32-bit-specific linking code */ +#if (!KERNEL || !__LP64__) + #define KXLD_USER_OR_ILP32 1 +#endif + +/* For 64-bit-specific linking code */ +#if (!KERNEL || __LP64__) + #define KXLD_USER_OR_LP64 1 +#endif + +/* For ppc-specific linking code */ +#if (!KERNEL || __ppc__) + #define KXLD_USER_OR_PPC 1 +#endif + +/* For i386-specific linking code */ +#if (!KERNEL || __i386__) + #define KXLD_USER_OR_I386 1 +#endif + +/* For x86_64-specific linking code */ +#if (!KERNEL || __x86_64__) + #define KXLD_USER_OR_X86_64 1 +#endif + +/* For arm-specific linking code */ +#if (!KERNEL || __arm__) + #define KXLD_USER_OR_ARM 1 +#endif + +/* For linking code specific to architectures that support common symbols */ +#if (!KERNEL || __i386__ || __ppc__) + #define KXLD_USER_OR_COMMON 1 +#endif + +/* For linking code specific to architectures that support strict patching */ +#if (!KERNEL || !(__i386__ || __ppc__)) + #define KXLD_USER_OR_STRICT_PATCHING 1 +#endif + +/* For linking code specific to architectures that use MH_OBJECT */ +#if (!KERNEL || __i386__ || __ppc__ || __arm__) + #define KXLD_USER_OR_OBJECT 1 +#endif + +/* For linking code specific to architectures that use MH_KEXT_BUNDLE */ +#if (!KERNEL || __x86_64__) + #define KXLD_USER_OR_BUNDLE 1 +#endif + +/* We no longer need to generate our own GOT for any architectures, but the code + * required to do this will be saved inside this macro. + */ +#define KXLD_USER_OR_GOT 0 + +/******************************************************************************* +* Types +*******************************************************************************/ + +/* Maintains linker state across links. One context should be allocate for + * each link thread. + */ +typedef struct kxld_context KXLDContext; + +/* Unless we're in a 32-bit kernel, all internal math is performed in 64 bits + * and cast to smaller values as needed by the architecture for which we are + * linking. All returned arguments should be handled similarly. + * Note: This size can be increased for future architectural size increases + */ +#if KERNEL && !__LP64__ +typedef uint32_t kxld_addr_t; +typedef uint32_t kxld_size_t; +#else +typedef uint64_t kxld_addr_t; +typedef uint64_t kxld_size_t; +#endif /* KERNEL && !__LP64__ */ + +/* Flags for general linker behavior */ +enum kxld_flags { + kKxldFlagDefault = 0x0 +}; +typedef enum kxld_flags KXLDFlags; + +/* Flags for the allocation callback */ +enum kxld_allocate_flags { + kKxldAllocateDefault = 0x0, + kKxldAllocateWritable = 0x1 /* kxld may write into the allocated memory */ +}; +typedef enum kxld_allocate_flags KXLDAllocateFlags; + +/* This specifies the function type of the callback that the linker uses to get + * the base address and allocated memory for relocation and linker output, + * respectively. Note that it is compatible with the standard allocators (e.g. + * malloc). + */ +typedef kxld_addr_t (*KXLDAllocateCallback)(size_t size, + KXLDAllocateFlags *flags, void *user_data); + +/* Flags for the logging callback */ +typedef enum kxld_log_subsystem { + kKxldLogLinking = 0x0, + kKxldLogPatching = 0x01 +} KXLDLogSubsystem; + +typedef enum kxld_log_level { + kKxldLogExplicit = 0x0, + kKxldLogErr = 0x1, + kKxldLogWarn = 0x2, + kKxldLogBasic = 0x3, + kKxldLogDetail = 0x4, + kKxldLogDebug = 0x5 +} KXLDLogLevel; + +typedef void (*KXLDLoggingCallback) (KXLDLogSubsystem sys, KXLDLogLevel level, + const char *format, va_list ap, void *user_data); + +#endif /* _KXLD_TYPES_H */ + diff --git a/libkern/libkern/machine/Makefile b/libkern/libkern/machine/Makefile index 9dbd25dce..0a072f9f9 100644 --- a/libkern/libkern/machine/Makefile +++ b/libkern/libkern/machine/Makefile @@ -12,6 +12,8 @@ INSTINC_SUBDIRS_PPC = INSTINC_SUBDIRS_I386 = +INSTINC_SUBDIRS_X86_64 = + INSTINC_SUBDIRS_ARM = EXPINC_SUBDIRS = ${INSTINC_SUBDIRS} @@ -20,6 +22,8 @@ EXPINC_SUBDIRS_PPC = ${INSTINC_SUBDIRS_PPC} EXPINC_SUBDIRS_I386 = ${INSTINC_SUBDIRS_I386} +EXPINC_SUBDIRS_X86_64 = ${INSTINC_SUBDIRS_X86_64} + EXPINC_SUBDIRS_ARM = ${INSTINC_SUBDIRS_ARM} DATAFILES = \ diff --git a/libkern/libkern/mkext.h b/libkern/libkern/mkext.h new file mode 100644 index 000000000..6b43a6b82 --- /dev/null +++ b/libkern/libkern/mkext.h @@ -0,0 +1,249 @@ +/* + * Copyright (c) 2008 Apple Inc. All rights reserved. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. The rights granted to you under the License + * may not be used to create, or enable the creation or redistribution of, + * unlawful or unlicensed copies of an Apple operating system, or to + * circumvent, violate, or enable the circumvention or violation of, any + * terms of an Apple operating system software license agreement. + * + * Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ + */ + +#ifndef _MKEXT_H_ +#define _MKEXT_H_ 1 + +#include +#include + +#include + +/********************************************************************* +* Mkext File Format +* +* ALL BINARY VALUES ARE BIG-ENDIAN. +*********************************************************************/ +#if PRAGMA_MARK +#pragma mark Constants +#endif + +#define MKEXT_MAGIC 0x4D4B5854 /* 'MKXT' */ +#define MKEXT_SIGN 0x4D4F5358 /* 'MOSX' */ + +#define MKEXT_EXTN ".mkext" + +#define MKEXT_VERS_1 (0x01008000) + +// Used during development/bringup: v 2.0d1 +#define MKEXT_VERS_2 (0x02002001) +// xxx - Will use this when format is final +// #define MKEXT_VERS_2 (0x02008000) + +#if PRAGMA_MARK +#pragma mark Core Header +#endif +/********************************************************************* +* Core Header +* +* All versions of mkext files have this basic header: +* +* - magic & signature - always 'MKXT' and 'MOSX' as defined above. +* - length - the length of the whole file +* - adler32 - checksum from &version to end of file +* - version - a 'vers' style value +* - numkexts - how many kexts are in the archive (only needed in v.1) +* - cputype & cpusubtype - in version 1 could be CPU_TYPE_ANY +* and CPU_SUBTYPE_MULTIPLE if the archive contained fat kexts; +* version 2 does not allow this and all kexts must be of a single +* arch. For either version, mkexts of specific arches can be +* embedded in a fat Mach-O file to combine them. +*********************************************************************/ + +#define MKEXT_HEADER_CORE \ + uint32_t magic; \ + uint32_t signature; \ + uint32_t length; \ + uint32_t adler32; \ + uint32_t version; \ + uint32_t numkexts; \ + cpu_type_t cputype; \ + cpu_subtype_t cpusubtype; + +typedef struct mkext_basic_header { + MKEXT_HEADER_CORE +} mkext_basic_header; + +#define MKEXT_HDR_CAST(hdr) ((mkext_basic_header *)(hdr)) + +#define MKEXT_SWAP(num) OSSwapBigToHostInt32((uint32_t)(num)) + +#define MKEXT_GET_MAGIC(hdr) (MKEXT_SWAP(MKEXT_HDR_CAST(hdr)->magic)) +#define MKEXT_GET_SIGNATURE(hdr) (MKEXT_SWAP(MKEXT_HDR_CAST(hdr)->signature)) +#define MKEXT_GET_LENGTH(hdr) (MKEXT_SWAP(MKEXT_HDR_CAST(hdr)->length)) +#define MKEXT_GET_CHECKSUM(hdr) (MKEXT_SWAP(MKEXT_HDR_CAST(hdr)->adler32)) +#define MKEXT_GET_VERSION(hdr) (MKEXT_SWAP(MKEXT_HDR_CAST(hdr)->version)) +#define MKEXT_GET_COUNT(hdr) (MKEXT_SWAP(MKEXT_HDR_CAST(hdr)->numkexts)) +#define MKEXT_GET_CPUTYPE(hdr) (MKEXT_SWAP(MKEXT_HDR_CAST(hdr)->cputype)) +#define MKEXT_GET_CPUSUBTYPE(hdr) (MKEXT_SWAP(MKEXT_HDR_CAST(hdr)->cpusubtype)) + +#if PRAGMA_MARK +#pragma mark Mkext Version 2 Format Definitions +#endif +/********************************************************************* +* Mkext Version 2 Format Definitions +* +* The version field here will be some variant of 0x0200....; that is +* the major version byte will be 0x02. +* +* Version 2 uses zlib for compression, not the lzss compressor used +* by version 1. +* +* In version 2, all executable & resource files are stored in sequence +* followed by the combined info dictionaries of all kexts at the end. +* This XML plist should be nul-terminated and stored at a page-aligned +* offset in the file so that kernel code can unmap it as soon as it's +* parsed. +* +* The info dict for each kext will have inserted into it these +* additional properties: +* +* - _MKEXTBundlePath (string) - full path to the original bundle, +* relative to volume. +* - _MKEXTExecutable (integer) - offset to the executable entry. +* - _MKEXTResources (dict) - keyed by filename, values integer offsets +* to file entries. +* +* Mkext2 buffers are used to send load requests to the kernel. When +* this is done, the plist will contain an _MKEXTLoadRequest dictionary +* whose keys are the bundle IDs to load, and whose values are +* dictionaries of flags: +* +* - "Load Kext" - boolean, whether to load the kext or not (default true). +* May be used to send just personalities for already-loaded kexts, +* but we do have a mechanism to do that from userland already. +* - "Start Kext" - boolean, whether to start the kext or not +* (used when debugging). Defaults to true. +* - "Add Personalities" - boolean, whether to send personalities to +* the IOCatalogue (used when debugging). Defaults to true. +* - "Disable Autounload" - boolean, whether to prevent the reaper +* thread from unloading the kext, so the dev. has time to set up +* the debug session. (Predefined window, or maybe this will be a +* number of seconds to wait.) Defaults to false. +*********************************************************************/ +#define kMKEXTInfoDictionariesKey "_MKEXTInfoDictionaries" + +#define kMKEXTBundlePathKey "_MKEXTBundlePath" +#define kMKEXTExecutableKey "_MKEXTExecutable" + +#define kMKEXTLoadRequestKey "_MKEXTLoadRequest" +#define kMKEXTLoadRequestLoadKey "Load Kext" +#define kMKEXTLoadRequestStartKey "Start Kext" +#define kMKEXTLoadRequestAddPersonalitiesKey "Add Personalities" +#define kMKEXTLoadRequestDisableAutounloadKey "Disable Autounload" + +typedef struct mkext2_file_entry { + uint32_t compressed_size; // if zero, file is not compressed + uint32_t full_size; // full size of data w/o this struct + uint8_t data[0]; // data is inline to this struct +} mkext2_file_entry; + +typedef struct mkext2_header { + MKEXT_HEADER_CORE + uint32_t plist_offset; + uint32_t plist_compressed_size; + uint32_t plist_full_size; +} mkext2_header; + +#define MKEXT2_GET_ENTRY_COMPSIZE(ptr) MKEXT_SWAP((ptr)->compressed_size) +#define MKEXT2_GET_ENTRY_FULLSIZE(ptr) MKEXT_SWAP((ptr)->full_size) +#define MKEXT2_GET_ENTRY_DATA(ptr) ((ptr)->data) + +#define MKEXT2_HDR_CAST(hdr) ((mkext2_header *)(hdr)) +#define MKEXT2_GET_PLIST(hdr) MKEXT_SWAP(MKEXT2_HDR_CAST(hdr)->plist_offset) +#define MKEXT2_GET_PLIST_COMPSIZE(hdr) MKEXT_SWAP(MKEXT2_HDR_CAST(hdr)->plist_compressed_size) +#define MKEXT2_GET_PLIST_FULLSIZE(hdr) MKEXT_SWAP(MKEXT2_HDR_CAST(hdr)->plist_full_size) + +#if PRAGMA_MARK +#pragma mark Mkext Version 1 Format Definitions +#endif +/********************************************************************* +* Mkext Version 1 Format Definitions +* +* The original mkext format has version 0x01008000 ("1.0"). +* +* In version 1, plists were not nul-terminated, so it's up to the +* reader to add that '\0' on the end if it's needed. +* +* Original bad names preserved for source compatibility. +*********************************************************************/ + +// If all fields are 0 then this file slot is empty +// If compsize is zero then the file isn't compressed. +typedef struct mkext_file { + uint32_t offset; // 4 bytes + uint32_t compsize; // 4 bytes + uint32_t realsize; // 4 bytes + uint32_t modifiedsecs; // 4 bytes; cast to time_t to use +} mkext_file; + +// The plist file entry is mandatory, but module may be empty +typedef struct mkext_kext { + mkext_file plist; // 16 bytes + mkext_file module; // 16 bytes +} mkext_kext; + +typedef struct mkext_header { + MKEXT_HEADER_CORE + mkext_kext kext[1]; // 32 bytes/entry +} mkext_header; + +typedef mkext_header mkext1_header; + +#define MKEXT1_ENTRY_CAST(ptr) ((mkext_file *)(ptr)) +#define MKEXT1_GET_ENTRY_OFFSET(ptr) (MKEXT_SWAP(MKEXT1_ENTRY_CAST(ptr)->offset)) +#define MKEXT1_GET_ENTRY_COMPSIZE(ptr) (MKEXT_SWAP(MKEXT1_ENTRY_CAST(ptr)->compsize)) +#define MKEXT1_GET_ENTRY_FULLSIZE(ptr) (MKEXT_SWAP(MKEXT1_ENTRY_CAST(ptr)->realsize)) +#define MKEXT1_GET_ENTRY_MODTIME(ptr) ((time_t)MKEXT_SWAP(MKEXT1_ENTRY_CAST(ptr)->modifiedsecs)) +#define MKEXT1_ENTRY_EXISTS(ptr) (MKEXT1_GET_ENTRY_OFFSET(ptr) || \ + MKEXT1_GET_ENTRY_FULLSIZE(ptr) || \ + MKEXT_GET_ENTRY_COMPSIZE(ptr) || \ + MKEXT_GET_ENTRY_COMPSIZE(ptr)) + +#define MKEXT1_GET_KEXT(hdr, i) ((mkext_kext *)&(MKEXT1_HDR_CAST(hdr)->kext[(i)])) +#define MKEXT1_GET_KEXT_PLIST(hdr, i) (MKEXT1_ENTRY_CAST(&(MKEXT1_GET_KEXT((hdr), (i))->plist))) +#define MKEXT1_GET_KEXT_EXEC(hdr, i) (MKEXT1_ENTRY_CAST(&(MKEXT1_GET_KEXT((hdr), (i))->module) + +#define MKEXT1_HDR_CAST(hdr) ((mkext1_header *)(hdr)) + +/* These functions are only used for version 1 mkexts. + */ +__BEGIN_DECLS +u_int8_t * +compress_lzss(u_int8_t * dst, u_int32_t dstlen, + u_int8_t * src, u_int32_t srclen); + +int +decompress_lzss(u_int8_t * dst, u_int32_t dstlen, + u_int8_t * src, u_int32_t srclen); + +u_int32_t +mkext_adler32(u_int8_t * src, int32_t length); +__END_DECLS + +#endif /* _MKEXT_H_ */ diff --git a/libkern/libkern/ppc/Makefile b/libkern/libkern/ppc/Makefile index 723f47dfa..e892ce42f 100644 --- a/libkern/libkern/ppc/Makefile +++ b/libkern/libkern/ppc/Makefile @@ -10,14 +10,10 @@ INSTINC_SUBDIRS = INSTINC_SUBDIRS_PPC = -INSTINC_SUBDIRS_I386 = - EXPINC_SUBDIRS = ${INSTINC_SUBDIRS} EXPINC_SUBDIRS_PPC = ${INSTINC_SUBDIRS_PPC} -EXPINC_SUBDIRS_I386 = ${INSTINC_SUBDIRS_I386} - DATAFILES = \ OSByteOrder.h diff --git a/libkern/libkern/ppc/OSByteOrder.h b/libkern/libkern/ppc/OSByteOrder.h index 3241ba668..c6666859d 100644 --- a/libkern/libkern/ppc/OSByteOrder.h +++ b/libkern/libkern/ppc/OSByteOrder.h @@ -53,9 +53,15 @@ OSReadSwapInt16( uint16_t result; volatile uint16_t *addr = (volatile uint16_t *)((uintptr_t)base + byteOffset); +#if defined(__llvm__) + result = *addr; + result = ((result << 8) | (result >> 8)); +#else __asm__ ("lhbrx %0, %2, %1" : "=r" (result) : "r" (base), "bO" (byteOffset), "m" (*addr)); +#endif + return result; } @@ -69,9 +75,14 @@ OSReadSwapInt32( uint32_t result; volatile uint32_t *addr = (volatile uint32_t *)((uintptr_t)base + byteOffset); +#if defined(__llvm__) + result = __builtin_bswap32(*addr); +#else __asm__ ("lwbrx %0, %2, %1" : "=r" (result) : "r" (base), "bO" (byteOffset), "m" (*addr)); +#endif + return result; } @@ -88,10 +99,15 @@ OSReadSwapInt64( uint32_t u32[2]; } u; +#if defined(__llvm__) + u.u64 = __builtin_bswap64(*addr); +#else __asm__ ("lwbrx %0, %3, %2\n\t" "lwbrx %1, %4, %2" : "=&r" (u.u32[1]), "=r" (u.u32[0]) : "r" (base), "bO" (byteOffset), "b" (byteOffset + 4), "m" (*addr)); +#endif + return u.u64; } @@ -107,9 +123,13 @@ OSWriteSwapInt16( { volatile uint16_t *addr = (volatile uint16_t *)((uintptr_t)base + byteOffset); +#if defined(__llvm__) + *addr = ((data >> 8) | (data << 8)); +#else __asm__ ("sthbrx %1, %3, %2" : "=m" (*addr) : "r" (data), "r" (base), "bO" (byteOffset)); +#endif } OS_INLINE @@ -122,9 +142,13 @@ OSWriteSwapInt32( { volatile uint32_t *addr = (volatile uint32_t *)((uintptr_t)base + byteOffset); +#if defined(__llvm__) + *addr = __builtin_bswap32(data); +#else __asm__ ("stwbrx %1, %3, %2" : "=m" (*addr) : "r" (data), "r" (base), "bO" (byteOffset)); +#endif } OS_INLINE @@ -136,6 +160,10 @@ OSWriteSwapInt64( ) { volatile uint64_t *addr = (volatile uint64_t *)((uintptr_t)base + byteOffset); + +#if defined(__llvm__) + *addr = __builtin_bswap64(data); +#else uint32_t hi = (uint32_t)(data >> 32); uint32_t lo = (uint32_t)(data & 0xffffffff); @@ -143,6 +171,7 @@ OSWriteSwapInt64( "stwbrx %2, %5, %3" : "=m" (*addr) : "r" (lo), "r" (hi), "r" (base), "bO" (byteOffset), "b" (byteOffset + 4)); +#endif } /* Generic byte swapping functions. */ diff --git a/iokit/Kernel/PMmisc.cpp b/libkern/libkern/prelink.h similarity index 51% rename from iokit/Kernel/PMmisc.cpp rename to libkern/libkern/prelink.h index 6f3f84827..e8f37e1f0 100644 --- a/iokit/Kernel/PMmisc.cpp +++ b/libkern/libkern/prelink.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 1998-2000 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2008 Apple Computer, Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -25,41 +25,30 @@ * * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ */ -#include -#include -static char rootDomain[ ] = "IOPMrootDomain"; -static char displayDevice[ ] = "IODisplayWrangler"; -static bool rootRegistered; -static bool displayRegistered; -static IOService * root; -static IOService * display; +#ifndef _PRELINK_H_ +#define _PRELINK_H_ -void IOPMLog(const char * who,unsigned long event,unsigned long param1, unsigned long param2) -{ -// kprintf("%s %02d %08x %08x\n",who,event,param1,param2); -} +#define kPrelinkTextSegment "__PRELINK_TEXT" +#define kPrelinkTextSection "__text" +#define kPrelinkLinkStateSegment "__PRELINK_STATE" +#define kPrelinkKernelLinkStateSection "__kernel" +#define kPrelinkKextsLinkStateSection "__kexts" -void IOPMRegisterDevice(const char * who, IOService * theDevice) -{ +#define kPrelinkInfoSegment "__PRELINK_INFO" +#define kPrelinkInfoSection "__info" - if ( strcmp(rootDomain,who) == 0 ) { // root power domain is registering - theDevice->youAreRoot(); - rootRegistered = true; - root = theDevice; - if ( displayRegistered ) { - root->addChild ( display ); - } - } - else{ - if ( strcmp(displayDevice,who) == 0 ) { // somebody else is registering - displayRegistered = true; // save pointer to display wrangler - display = theDevice; - } - if ( rootRegistered ) { // if not root domain, then it's - root->addChild ( theDevice ); // one of its children - } - } -} +#define kPrelinkBundlePathKey "_PrelinkBundlePath" +#define kPrelinkExecutableLoadKey "_PrelinkExecutableLoadAddr" +#define kPrelinkExecutableSourceKey "_PrelinkExecutableSourceAddr" +#define kPrelinkExecutableSizeKey "_PrelinkExecutableSize" +#define kPrelinkInfoDictionaryKey "_PrelinkInfoDictionary" +#define kPrelinkInterfaceUUIDKey "_PrelinkInterfaceUUID" +#define kPrelinkKmodInfoKey "_PrelinkKmodInfo" +#define kPrelinkLinkStateKey "_PrelinkLinkState" +#define kPrelinkLinkStateSizeKey "_PrelinkLinkStateSize" +#define kPrelinkPersonalitiesKey "_PrelinkPersonalities" + +#endif /* _PRELINK_H_ */ diff --git a/libkern/libkern/zconf.h b/libkern/libkern/zconf.h new file mode 100644 index 000000000..ce944764a --- /dev/null +++ b/libkern/libkern/zconf.h @@ -0,0 +1,359 @@ +/* + * Copyright (c) 2008 Apple Inc. All rights reserved. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. The rights granted to you under the License + * may not be used to create, or enable the creation or redistribution of, + * unlawful or unlicensed copies of an Apple operating system, or to + * circumvent, violate, or enable the circumvention or violation of, any + * terms of an Apple operating system software license agreement. + * + * Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ + */ +/* zconf.h -- configuration of the zlib compression library + * Copyright (C) 1995-2005 Jean-loup Gailly. + * For conditions of distribution and use, see copyright notice in zlib.h + */ + +/* @(#) $Id$ */ + +#ifndef ZCONF_H +#define ZCONF_H + +/* + * If you *really* need a unique prefix for all types and library functions, + * compile with -DZ_PREFIX. The "standard" zlib should be compiled without it. + */ +#ifdef Z_PREFIX +# define deflateInit_ z_deflateInit_ +# define deflate z_deflate +# define deflateEnd z_deflateEnd +# define inflateInit_ z_inflateInit_ +# define inflate z_inflate +# define inflateEnd z_inflateEnd +# define deflateInit2_ z_deflateInit2_ +# define deflateSetDictionary z_deflateSetDictionary +# define deflateCopy z_deflateCopy +# define deflateReset z_deflateReset +# define deflateParams z_deflateParams +# define deflateBound z_deflateBound +# define deflatePrime z_deflatePrime +# define inflateInit2_ z_inflateInit2_ +# define inflateSetDictionary z_inflateSetDictionary +# define inflateSync z_inflateSync +# define inflateSyncPoint z_inflateSyncPoint +# define inflateCopy z_inflateCopy +# define inflateReset z_inflateReset +# define inflateBack z_inflateBack +# define inflateBackEnd z_inflateBackEnd +# define compress z_compress +# define compress2 z_compress2 +# define compressBound z_compressBound +# define uncompress z_uncompress +# define adler32 z_adler32 +# define crc32 z_crc32 +# define get_crc_table z_get_crc_table +# define zError z_zError + +# define alloc_func z_alloc_func +# define free_func z_free_func +# define in_func z_in_func +# define out_func z_out_func +# define Byte z_Byte +# define uInt z_uInt +# define uLong z_uLong +# define Bytef z_Bytef +# define charf z_charf +# define intf z_intf +# define uIntf z_uIntf +# define uLongf z_uLongf +# define voidpf z_voidpf +# define voidp z_voidp +#endif + +#if defined(__MSDOS__) && !defined(MSDOS) +# define MSDOS +#endif +#if (defined(OS_2) || defined(__OS2__)) && !defined(OS2) +# define OS2 +#endif +#if defined(_WINDOWS) && !defined(WINDOWS) +# define WINDOWS +#endif +#if defined(_WIN32) || defined(_WIN32_WCE) || defined(__WIN32__) +# ifndef WIN32 +# define WIN32 +# endif +#endif +#if (defined(MSDOS) || defined(OS2) || defined(WINDOWS)) && !defined(WIN32) +# if !defined(__GNUC__) && !defined(__FLAT__) && !defined(__386__) +# ifndef SYS16BIT +# define SYS16BIT +# endif +# endif +#endif + +/* + * Compile with -DMAXSEG_64K if the alloc function cannot allocate more + * than 64k bytes at a time (needed on systems with 16-bit int). + */ +#ifdef SYS16BIT +# define MAXSEG_64K +#endif +#ifdef MSDOS +# define UNALIGNED_OK +#endif + +#ifdef __STDC_VERSION__ +# ifndef STDC +# define STDC +# endif +# if __STDC_VERSION__ >= 199901L +# ifndef STDC99 +# define STDC99 +# endif +# endif +#endif +#if !defined(STDC) && (defined(__STDC__) || defined(__cplusplus)) +# define STDC +#endif +#if !defined(STDC) && (defined(__GNUC__) || defined(__BORLANDC__)) +# define STDC +#endif +#if !defined(STDC) && (defined(MSDOS) || defined(WINDOWS) || defined(WIN32)) +# define STDC +#endif +#if !defined(STDC) && (defined(OS2) || defined(__HOS_AIX__)) +# define STDC +#endif + +#if defined(__OS400__) && !defined(STDC) /* iSeries (formerly AS/400). */ +# define STDC +#endif + +#ifndef STDC +# ifndef const /* cannot use !defined(STDC) && !defined(const) on Mac */ +# define const /* note: need a more gentle solution here */ +# endif +#endif + +/* Some Mac compilers merge all .h files incorrectly: */ +#if defined(__MWERKS__)||defined(applec)||defined(THINK_C)||defined(__SC__) +# define NO_DUMMY_DECL +#endif + +/* Maximum value for memLevel in deflateInit2 */ +#ifndef MAX_MEM_LEVEL +# ifdef MAXSEG_64K +# define MAX_MEM_LEVEL 8 +# else +# define MAX_MEM_LEVEL 9 +# endif +#endif + +/* Maximum value for windowBits in deflateInit2 and inflateInit2. + * WARNING: reducing MAX_WBITS makes minigzip unable to extract .gz files + * created by gzip. (Files created by minigzip can still be extracted by + * gzip.) + */ +#ifndef MAX_WBITS +# define MAX_WBITS 15 /* 32K LZ77 window */ +#endif + +/* The memory requirements for deflate are (in bytes): + (1 << (windowBits+2)) + (1 << (memLevel+9)) + that is: 128K for windowBits=15 + 128K for memLevel = 8 (default values) + plus a few kilobytes for small objects. For example, if you want to reduce + the default memory requirements from 256K to 128K, compile with + make CFLAGS="-O -DMAX_WBITS=14 -DMAX_MEM_LEVEL=7" + Of course this will generally degrade compression (there's no free lunch). + + The memory requirements for inflate are (in bytes) 1 << windowBits + that is, 32K for windowBits=15 (default value) plus a few kilobytes + for small objects. +*/ + + /* Type declarations */ + +#ifndef OF /* function prototypes */ +# ifdef STDC +# define OF(args) args +# else +# define OF(args) () +# endif +#endif + +/* The following definitions for FAR are needed only for MSDOS mixed + * model programming (small or medium model with some far allocations). + * This was tested only with MSC; for other MSDOS compilers you may have + * to define NO_MEMCPY in zutil.h. If you don't need the mixed model, + * just define FAR to be empty. + */ +#ifdef SYS16BIT +# if defined(M_I86SM) || defined(M_I86MM) + /* MSC small or medium model */ +# define SMALL_MEDIUM +# ifdef _MSC_VER +# define FAR _far +# else +# define FAR far +# endif +# endif +# if (defined(__SMALL__) || defined(__MEDIUM__)) + /* Turbo C small or medium model */ +# define SMALL_MEDIUM +# ifdef __BORLANDC__ +# define FAR _far +# else +# define FAR far +# endif +# endif +#endif + +#if defined(WINDOWS) || defined(WIN32) + /* If building or using zlib as a DLL, define ZLIB_DLL. + * This is not mandatory, but it offers a little performance increase. + */ +# ifdef ZLIB_DLL +# if defined(WIN32) && (!defined(__BORLANDC__) || (__BORLANDC__ >= 0x500)) +# ifdef ZLIB_INTERNAL +# define ZEXTERN extern __declspec(dllexport) +# else +# define ZEXTERN extern __declspec(dllimport) +# endif +# endif +# endif /* ZLIB_DLL */ + /* If building or using zlib with the WINAPI/WINAPIV calling convention, + * define ZLIB_WINAPI. + * Caution: the standard ZLIB1.DLL is NOT compiled using ZLIB_WINAPI. + */ +# ifdef ZLIB_WINAPI +# ifdef FAR +# undef FAR +# endif +# include + /* No need for _export, use ZLIB.DEF instead. */ + /* For complete Windows compatibility, use WINAPI, not __stdcall. */ +# define ZEXPORT WINAPI +# ifdef WIN32 +# define ZEXPORTVA WINAPIV +# else +# define ZEXPORTVA FAR CDECL +# endif +# endif +#endif + +#if defined (__BEOS__) +# ifdef ZLIB_DLL +# ifdef ZLIB_INTERNAL +# define ZEXPORT __declspec(dllexport) +# define ZEXPORTVA __declspec(dllexport) +# else +# define ZEXPORT __declspec(dllimport) +# define ZEXPORTVA __declspec(dllimport) +# endif +# endif +#endif + +#ifndef ZEXTERN +# define ZEXTERN extern +#endif +#ifndef ZEXPORT +# define ZEXPORT +#endif +#ifndef ZEXPORTVA +# define ZEXPORTVA +#endif + +#ifndef FAR +# define FAR +#endif + +#if !defined(__MACTYPES__) +typedef unsigned char Byte; /* 8 bits */ +#endif +typedef unsigned int uInt; /* 16 bits or more */ +typedef unsigned long uLong; /* 32 bits or more */ + +#ifdef SMALL_MEDIUM + /* Borland C/C++ and some old MSC versions ignore FAR inside typedef */ +# define Bytef Byte FAR +#else + typedef Byte FAR Bytef; +#endif +typedef char FAR charf; +typedef int FAR intf; +typedef uInt FAR uIntf; +typedef uLong FAR uLongf; + +#ifdef STDC + typedef void const *voidpc; + typedef void FAR *voidpf; + typedef void *voidp; +#else + typedef Byte const *voidpc; + typedef Byte FAR *voidpf; + typedef Byte *voidp; +#endif + +#if 0 /* HAVE_UNISTD_H -- this line is updated by ./configure */ +# include /* for off_t */ +# include /* for SEEK_* and off_t */ +# ifdef VMS +# include /* for off_t */ +# endif +# define z_off_t off_t +#endif +#ifndef SEEK_SET +# define SEEK_SET 0 /* Seek from beginning of file. */ +# define SEEK_CUR 1 /* Seek from current position. */ +# define SEEK_END 2 /* Set file pointer to EOF plus "offset" */ +#endif +#ifndef z_off_t +# define z_off_t long +#endif + +#if defined(__OS400__) +# define NO_vsnprintf +#endif + +#if defined(__MVS__) +# define NO_vsnprintf +# ifdef FAR +# undef FAR +# endif +#endif + +/* MVS linker does not support external names larger than 8 bytes */ +#if defined(__MVS__) +# pragma map(deflateInit_,"DEIN") +# pragma map(deflateInit2_,"DEIN2") +# pragma map(deflateEnd,"DEEND") +# pragma map(deflateBound,"DEBND") +# pragma map(inflateInit_,"ININ") +# pragma map(inflateInit2_,"ININ2") +# pragma map(inflateEnd,"INEND") +# pragma map(inflateSync,"INSY") +# pragma map(inflateSetDictionary,"INSEDI") +# pragma map(compressBound,"CMBND") +# pragma map(inflate_table,"INTABL") +# pragma map(inflate_fast,"INFA") +# pragma map(inflate_copyright,"INCOPY") +#endif + +#endif /* ZCONF_H */ diff --git a/libkern/libkern/zlib.h b/libkern/libkern/zlib.h index db7366f53..3f5a5c2c3 100644 --- a/libkern/libkern/zlib.h +++ b/libkern/libkern/zlib.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2007 Apple Inc. All rights reserved. + * Copyright (c) 2008 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -25,20 +25,10 @@ * * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ */ - -/* $FreeBSD: src/sys/net/zlib.h,v 1.7 1999/12/29 04:38:38 peter Exp $ */ - -/* - * This file is derived from zlib.h and zconf.h from the zlib-1.1.4 - * distribution by Jean-loup Gailly and Mark Adler. The interface - * described in this file refers to the kernel zlib implementation - * of Mac OS X. - */ - /* zlib.h -- interface of the 'zlib' general purpose compression library - version 1.1.4, March 11th, 2002 + version 1.2.3, July 18th, 2005 - Copyright (C) 1995-2002 Jean-loup Gailly and Mark Adler + Copyright (C) 1995-2005 Jean-loup Gailly and Mark Adler This software is provided 'as-is', without any express or implied warranty. In no event will the authors be held liable for any damages @@ -61,66 +51,23 @@ The data format used by the zlib library is described by RFCs (Request for - Comments) 1950 to 1952 in the files ftp://ds.internic.net/rfc/rfc1950.txt + Comments) 1950 to 1952 in the files http://www.ietf.org/rfc/rfc1950.txt (zlib format), rfc1951.txt (deflate format) and rfc1952.txt (gzip format). */ -#ifndef _ZLIB_H -#define _ZLIB_H +#ifndef ZLIB_H +#define ZLIB_H -#if __cplusplus +#ifdef __cplusplus extern "C" { #endif -/* zconf.h -- configuration of the zlib compression library - * Copyright (C) 1995-2002 Jean-loup Gailly. - * For conditions of distribution and use, see copyright notice in zlib.h - */ - -#ifndef _ZCONF_H -#define _ZCONF_H - -/* Maximum value for memLevel in deflateInit2 */ -#define MAX_MEM_LEVEL 9 - -/* Maximum value for windowBits in deflateInit2 and inflateInit2 */ -#define MAX_WBITS 15 /* 32K LZ77 window */ - -/* The memory requirements for deflate are (in bytes): - (1 << (windowBits+2)) + (1 << (memLevel+9)) - that is: 128K for windowBits=15 + 128K for memLevel = 8 (default values) - plus a few kilobytes for small objects. +#include "zconf.h" - The memory requirements for inflate are (in bytes) 1 << windowBits - that is, 32K for windowBits=15 (default value) plus a few kilobytes - for small objects. -*/ - - /* Type declarations */ - -#define OF(args) args -#define ZEXPORT -#define ZEXPORTVA -#define ZEXTERN extern -#define FAR -#define z_off_t off_t - -typedef unsigned char Byte; /* 8 bits */ -typedef unsigned int uInt; /* 16 bits or more */ -typedef unsigned long uLong; /* 32 bits or more */ -typedef Byte Bytef; -typedef char charf; -typedef int intf; -typedef uInt uIntf; -typedef uLong uLongf; -typedef void *voidpf; -typedef void *voidp; - -#endif /* _ZCONF_H */ - -#define ZLIB_VERSION "1.1.4" +#define ZLIB_VERSION "1.2.3" +#define ZLIB_VERNUM 0x1230 -/* +/* The 'zlib' compression library provides in-memory compression and decompression functions, including integrity checks of the uncompressed data. This version of the library supports only one compression method @@ -133,6 +80,22 @@ typedef void *voidp; application must provide more input and/or consume the output (providing more output space) before each call. + The compressed data format used by default by the in-memory functions is + the zlib format, which is a zlib wrapper documented in RFC 1950, wrapped + around a deflate stream, which is itself documented in RFC 1951. + + The library also supports reading and writing files in gzip (.gz) format + with an interface similar to that of stdio using the functions that start + with "gz". The gzip format is different from the zlib format. gzip is a + gzip wrapper, documented in RFC 1952, wrapped around a deflate stream. + + This library can optionally read and write gzip streams in memory as well. + + The zlib format was designed to be compact and fast for use in memory + and on communications channels. The gzip format was designed for single- + file compression on file systems, has a larger header than zlib to maintain + directory information, and uses a different, slower check method than zlib. + The library does not install any signal handler. The decoder checks the consistency of the compressed data, so the library should never crash even in case of corrupted input. @@ -159,13 +122,36 @@ typedef struct z_stream_s { free_func zfree; /* used to free the internal state */ voidpf opaque; /* private data object passed to zalloc and zfree */ - int data_type; /* best guess about the data type: ascii or binary */ + int data_type; /* best guess about the data type: binary or text */ uLong adler; /* adler32 value of the uncompressed data */ uLong reserved; /* reserved for future use */ } z_stream; typedef z_stream FAR *z_streamp; +/* + gzip header information passed to and from zlib routines. See RFC 1952 + for more details on the meanings of these fields. +*/ +typedef struct gz_header_s { + int text; /* true if compressed data believed to be text */ + uLong time; /* modification time */ + int xflags; /* extra flags (not used when writing a gzip file) */ + int os; /* operating system */ + Bytef *extra; /* pointer to extra field or Z_NULL if none */ + uInt extra_len; /* extra field length (valid if extra != Z_NULL) */ + uInt extra_max; /* space at extra (only when reading header) */ + Bytef *name; /* pointer to zero-terminated file name or Z_NULL */ + uInt name_max; /* space at name (only when reading header) */ + Bytef *comment; /* pointer to zero-terminated comment or Z_NULL */ + uInt comm_max; /* space at comment (only when reading header) */ + int hcrc; /* true if there was or will be a header crc */ + int done; /* true when done reading gzip header (not used + when writing a gzip file) */ +} gz_header; + +typedef gz_header FAR *gz_headerp; + /* The application must update next_in and avail_in when avail_in has dropped to zero. It must update next_out and avail_out when avail_out @@ -182,6 +168,15 @@ typedef z_stream FAR *z_streamp; If zlib is used in a multi-threaded application, zalloc and zfree must be thread safe. + On 16-bit systems, the functions zalloc and zfree must be able to allocate + exactly 65536 bytes, but will not be required to allocate more than this + if the symbol MAXSEG_64K is defined (see zconf.h). WARNING: On MSDOS, + pointers returned by zalloc for objects of exactly 65536 bytes *must* + have their offset normalized to zero. The default allocation function + provided by this library ensures this (see zutil.c). To reduce memory + requirements and avoid any allocation of 64K objects, at the expense of + compression ratio, compile the library with -DMAX_WBITS=14 (see zconf.h). + The fields total_in and total_out can be used for statistics or progress reports. After compression, total_in holds the total size of the uncompressed data and may be saved for use in the decompressor @@ -193,11 +188,13 @@ typedef z_stream FAR *z_streamp; #define Z_NO_FLUSH 0 #define Z_PARTIAL_FLUSH 1 /* will be removed, use Z_SYNC_FLUSH instead */ -#define Z_PACKET_FLUSH 2 +/* 2 is a reserved value (in zlib 1.2.3, Z_PACKET_FLUSH was removed) */ #define Z_SYNC_FLUSH 3 #define Z_FULL_FLUSH 4 #define Z_FINISH 5 -/* Allowed flush values; see deflate() below for details */ +#define Z_BLOCK 6 + +/* Allowed flush values; see deflate() and inflate() below for details */ #define Z_OK 0 #define Z_STREAM_END 1 @@ -220,13 +217,16 @@ typedef z_stream FAR *z_streamp; #define Z_FILTERED 1 #define Z_HUFFMAN_ONLY 2 +#define Z_RLE 3 +#define Z_FIXED 4 #define Z_DEFAULT_STRATEGY 0 /* compression strategy; see deflateInit2() below for details */ #define Z_BINARY 0 -#define Z_ASCII 1 +#define Z_TEXT 1 +#define Z_ASCII Z_TEXT /* for compatibility with 1.2.2 and earlier */ #define Z_UNKNOWN 2 -/* Possible values of the data_type field */ +/* Possible values of the data_type field (though see inflate()) */ #define Z_DEFLATED 8 /* The deflate compression method (the only one supported in this version) */ @@ -245,7 +245,7 @@ ZEXTERN const char * ZEXPORT zlibVersion OF((void)); This check is automatically made by deflateInit and inflateInit. */ -/* +/* ZEXTERN int ZEXPORT deflateInit OF((z_streamp strm, int level)); Initializes the internal stream state for compression. The fields @@ -298,6 +298,10 @@ ZEXTERN int ZEXPORT deflate OF((z_streamp strm, int flush)); and with zero avail_out, it must be called again after making room in the output buffer because there might be more output pending. + Normally the parameter flush is set to Z_NO_FLUSH, which allows deflate to + decide how much data to accumualte before producing output, in order to + maximize compression. + If the parameter flush is set to Z_SYNC_FLUSH, all pending output is flushed to the output buffer and the output is aligned on a byte boundary, so that the decompressor can get all input data available so far. (In particular @@ -309,12 +313,14 @@ ZEXTERN int ZEXPORT deflate OF((z_streamp strm, int flush)); Z_SYNC_FLUSH, and the compression state is reset so that decompression can restart from this point if previous compressed data has been damaged or if random access is desired. Using Z_FULL_FLUSH too often can seriously degrade - the compression. + compression. If deflate returns with avail_out == 0, this function must be called again with the same value of the flush parameter and more output space (updated avail_out), until the flush is complete (deflate returns with non-zero - avail_out). + avail_out). In the case of a Z_FULL_FLUSH or Z_SYNC_FLUSH, make sure that + avail_out is greater than six to avoid repeated flush markers due to + avail_out == 0 on return. If the parameter flush is set to Z_FINISH, pending input is processed, pending output is flushed and deflate returns with Z_STREAM_END if there @@ -323,17 +329,17 @@ ZEXTERN int ZEXPORT deflate OF((z_streamp strm, int flush)); more input data, until it returns with Z_STREAM_END or an error. After deflate has returned Z_STREAM_END, the only possible operations on the stream are deflateReset or deflateEnd. - + Z_FINISH can be used immediately after deflateInit if all the compression is to be done in a single step. In this case, avail_out must be at least - 0.1% larger than avail_in plus 12 bytes. If deflate does not return + the value returned by deflateBound (see below). If deflate does not return Z_STREAM_END, then it must be called again as described above. deflate() sets strm->adler to the adler32 checksum of all input read so far (that is, total_in bytes). - deflate() may update data_type if it can make a good guess about - the input data type (Z_ASCII or Z_BINARY). In doubt, the data is considered + deflate() may update strm->data_type if it can make a good guess about + the input data type (Z_BINARY or Z_TEXT). In doubt, the data is considered binary. This field is only for information purposes and does not affect the compression algorithm in any manner. @@ -342,7 +348,9 @@ ZEXTERN int ZEXPORT deflate OF((z_streamp strm, int flush)); consumed and all output has been produced (only when flush is set to Z_FINISH), Z_STREAM_ERROR if the stream state was inconsistent (for example if next_in or next_out was NULL), Z_BUF_ERROR if no progress is possible - (for example avail_in or avail_out was zero). + (for example avail_in or avail_out was zero). Note that Z_BUF_ERROR is not + fatal, and deflate() can be called again with more input and more output + space to continue compressing. */ @@ -360,7 +368,7 @@ ZEXTERN int ZEXPORT deflateEnd OF((z_streamp strm)); */ -/* +/* ZEXTERN int ZEXPORT inflateInit OF((z_streamp strm)); Initializes the internal stream state for decompression. The fields @@ -384,9 +392,9 @@ ZEXTERN int ZEXPORT inflateInit OF((z_streamp strm)); ZEXTERN int ZEXPORT inflate OF((z_streamp strm, int flush)); /* inflate decompresses as much data as possible, and stops when the input - buffer becomes empty or the output buffer becomes full. It may some - introduce some output latency (reading input without producing any output) - except when forced to flush. + buffer becomes empty or the output buffer becomes full. It may introduce + some output latency (reading input without producing any output) except when + forced to flush. The detailed semantics are as follows. inflate performs one or both of the following actions: @@ -410,11 +418,26 @@ ZEXTERN int ZEXPORT inflate OF((z_streamp strm, int flush)); must be called again after making room in the output buffer because there might be more output pending. - If the parameter flush is set to Z_SYNC_FLUSH, inflate flushes as much - output as possible to the output buffer. The flushing behavior of inflate is - not specified for values of the flush parameter other than Z_SYNC_FLUSH - and Z_FINISH, but the current implementation actually flushes as much output - as possible anyway. + The flush parameter of inflate() can be Z_NO_FLUSH, Z_SYNC_FLUSH, + Z_FINISH, or Z_BLOCK. Z_SYNC_FLUSH requests that inflate() flush as much + output as possible to the output buffer. Z_BLOCK requests that inflate() stop + if and when it gets to the next deflate block boundary. When decoding the + zlib or gzip format, this will cause inflate() to return immediately after + the header and before the first block. When doing a raw inflate, inflate() + will go ahead and process the first block, and will return when it gets to + the end of that block, or when it runs out of data. + + The Z_BLOCK option assists in appending to or combining deflate streams. + Also to assist in this, on return inflate() will set strm->data_type to the + number of unused bits in the last byte taken from strm->next_in, plus 64 + if inflate() is currently decoding the last block in the deflate stream, + plus 128 if inflate() returned immediately after decoding an end-of-block + code or decoding the complete header up to just before the first byte of the + deflate stream. The end-of-block will not be indicated until all of the + uncompressed data from that block has been written to strm->next_out. The + number of unused bits may in general be greater than seven, except when + bit 7 of data_type is set, in which case the number of unused bits will be + less than eight. inflate() should normally be called until it returns Z_STREAM_END or an error. However if all decompression is to be performed in a single step @@ -424,29 +447,44 @@ ZEXTERN int ZEXPORT inflate OF((z_streamp strm, int flush)); uncompressed data. (The size of the uncompressed data may have been saved by the compressor for this purpose.) The next operation on this stream must be inflateEnd to deallocate the decompression state. The use of Z_FINISH - is never required, but can be used to inform inflate that a faster routine + is never required, but can be used to inform inflate that a faster approach may be used for the single inflate() call. - If a preset dictionary is needed at this point (see inflateSetDictionary - below), inflate sets strm-adler to the adler32 checksum of the - dictionary chosen by the compressor and returns Z_NEED_DICT; otherwise - it sets strm->adler to the adler32 checksum of all output produced - so far (that is, total_out bytes) and returns Z_OK, Z_STREAM_END or - an error code as described below. At the end of the stream, inflate() - checks that its computed adler32 checksum is equal to that saved by the - compressor and returns Z_STREAM_END only if the checksum is correct. + In this implementation, inflate() always flushes as much output as + possible to the output buffer, and always uses the faster approach on the + first call. So the only effect of the flush parameter in this implementation + is on the return value of inflate(), as noted below, or when it returns early + because Z_BLOCK is used. + + If a preset dictionary is needed after this call (see inflateSetDictionary + below), inflate sets strm->adler to the adler32 checksum of the dictionary + chosen by the compressor and returns Z_NEED_DICT; otherwise it sets + strm->adler to the adler32 checksum of all output produced so far (that is, + total_out bytes) and returns Z_OK, Z_STREAM_END or an error code as described + below. At the end of the stream, inflate() checks that its computed adler32 + checksum is equal to that saved by the compressor and returns Z_STREAM_END + only if the checksum is correct. + + inflate() will decompress and check either zlib-wrapped or gzip-wrapped + deflate data. The header type is detected automatically. Any information + contained in the gzip header is not retained, so applications that need that + information should instead use raw inflate, see inflateInit2() below, or + inflateBack() and perform their own processing of the gzip header and + trailer. inflate() returns Z_OK if some progress has been made (more input processed or more output produced), Z_STREAM_END if the end of the compressed data has been reached and all uncompressed output has been produced, Z_NEED_DICT if a preset dictionary is needed at this point, Z_DATA_ERROR if the input data was - corrupted (input stream not conforming to the zlib format or incorrect - adler32 checksum), Z_STREAM_ERROR if the stream structure was inconsistent - (for example if next_in or next_out was NULL), Z_MEM_ERROR if there was not - enough memory, Z_BUF_ERROR if no progress is possible or if there was not - enough room in the output buffer when Z_FINISH is used. In the Z_DATA_ERROR - case, the application may then call inflateSync to look for a good - compression block. + corrupted (input stream not conforming to the zlib format or incorrect check + value), Z_STREAM_ERROR if the stream structure was inconsistent (for example + if next_in or next_out was NULL), Z_MEM_ERROR if there was not enough memory, + Z_BUF_ERROR if no progress is possible or if there was not enough room in the + output buffer when Z_FINISH is used. Note that Z_BUF_ERROR is not fatal, and + inflate() can be called again with more input and more output space to + continue decompressing. If Z_DATA_ERROR is returned, the application may then + call inflateSync() to look for a good compression block if a partial recovery + of the data is desired. */ @@ -467,7 +505,7 @@ ZEXTERN int ZEXPORT inflateEnd OF((z_streamp strm)); The following functions are needed only in some special applications. */ -/* +/* ZEXTERN int ZEXPORT deflateInit2 OF((z_streamp strm, int level, int method, @@ -483,11 +521,22 @@ ZEXTERN int ZEXPORT deflateInit2 OF((z_streamp strm, this version of the library. The windowBits parameter is the base two logarithm of the window size - (the size of the history buffer). It should be in the range 8..15 for this + (the size of the history buffer). It should be in the range 8..15 for this version of the library. Larger values of this parameter result in better compression at the expense of memory usage. The default value is 15 if deflateInit is used instead. + windowBits can also be -8..-15 for raw deflate. In this case, -windowBits + determines the window size. deflate() will then generate raw deflate data + with no zlib header or trailer, and will not compute an adler32 check value. + + windowBits can also be greater than 15 for optional gzip encoding. Add + 16 to windowBits to write a simple gzip header and trailer around the + compressed data instead of a zlib wrapper. The gzip header will have no + file name, no extra data, no comment, no modification time (set to zero), + no header crc, and the operating system will be set to 255 (unknown). If a + gzip stream is being written, strm->adler is a crc32 instead of an adler32. + The memLevel parameter specifies how much memory should be allocated for the internal compression state. memLevel=1 uses minimum memory but is slow and reduces compression ratio; memLevel=9 uses maximum memory @@ -496,21 +545,25 @@ ZEXTERN int ZEXPORT deflateInit2 OF((z_streamp strm, The strategy parameter is used to tune the compression algorithm. Use the value Z_DEFAULT_STRATEGY for normal data, Z_FILTERED for data produced by a - filter (or predictor), or Z_HUFFMAN_ONLY to force Huffman encoding only (no - string match). Filtered data consists mostly of small values with a - somewhat random distribution. In this case, the compression algorithm is - tuned to compress them better. The effect of Z_FILTERED is to force more - Huffman coding and less string matching; it is somewhat intermediate - between Z_DEFAULT and Z_HUFFMAN_ONLY. The strategy parameter only affects - the compression ratio but not the correctness of the compressed output even - if it is not set appropriately. + filter (or predictor), Z_HUFFMAN_ONLY to force Huffman encoding only (no + string match), or Z_RLE to limit match distances to one (run-length + encoding). Filtered data consists mostly of small values with a somewhat + random distribution. In this case, the compression algorithm is tuned to + compress them better. The effect of Z_FILTERED is to force more Huffman + coding and less string matching; it is somewhat intermediate between + Z_DEFAULT and Z_HUFFMAN_ONLY. Z_RLE is designed to be almost as fast as + Z_HUFFMAN_ONLY, but give better compression for PNG image data. The strategy + parameter only affects the compression ratio but not the correctness of the + compressed output even if it is not set appropriately. Z_FIXED prevents the + use of dynamic Huffman codes, allowing for a simpler decoder for special + applications. deflateInit2 returns Z_OK if success, Z_MEM_ERROR if there was not enough memory, Z_STREAM_ERROR if a parameter is invalid (such as an invalid method). msg is set to null if there is no error message. deflateInit2 does not perform any compression: this will be done by deflate(). */ - + ZEXTERN int ZEXPORT deflateSetDictionary OF((z_streamp strm, const Bytef *dictionary, uInt dictLength)); @@ -532,13 +585,16 @@ ZEXTERN int ZEXPORT deflateSetDictionary OF((z_streamp strm, deflateInit or deflateInit2, a part of the dictionary may in effect be discarded, for example if the dictionary is larger than the window size in deflate or deflate2. Thus the strings most likely to be useful should be - put at the end of the dictionary, not at the front. + put at the end of the dictionary, not at the front. In addition, the + current implementation of deflate will use at most the window size minus + 262 bytes of the provided dictionary. - Upon return of this function, strm->adler is set to the Adler32 value + Upon return of this function, strm->adler is set to the adler32 value of the dictionary; the decompressor may later use this value to determine - which dictionary has been used by the compressor. (The Adler32 value + which dictionary has been used by the compressor. (The adler32 value applies to the whole dictionary even if only a subset of the dictionary is - actually used by the compressor.) + actually used by the compressor.) If a raw deflate was requested, then the + adler32 value is not computed and strm->adler is not set. deflateSetDictionary returns Z_OK if success, or Z_STREAM_ERROR if a parameter is invalid (such as NULL dictionary) or the stream state is @@ -577,8 +633,8 @@ ZEXTERN int ZEXPORT deflateReset OF((z_streamp strm)); */ ZEXTERN int ZEXPORT deflateParams OF((z_streamp strm, - int level, - int strategy)); + int level, + int strategy)); /* Dynamically update the compression level and compression strategy. The interpretation of level and strategy is as in deflateInit2. This can be @@ -597,7 +653,73 @@ ZEXTERN int ZEXPORT deflateParams OF((z_streamp strm, if strm->avail_out was zero. */ -/* +ZEXTERN int ZEXPORT deflateTune OF((z_streamp strm, + int good_length, + int max_lazy, + int nice_length, + int max_chain)); +/* + Fine tune deflate's internal compression parameters. This should only be + used by someone who understands the algorithm used by zlib's deflate for + searching for the best matching string, and even then only by the most + fanatic optimizer trying to squeeze out the last compressed bit for their + specific input data. Read the deflate.c source code for the meaning of the + max_lazy, good_length, nice_length, and max_chain parameters. + + deflateTune() can be called after deflateInit() or deflateInit2(), and + returns Z_OK on success, or Z_STREAM_ERROR for an invalid deflate stream. + */ + +ZEXTERN uLong ZEXPORT deflateBound OF((z_streamp strm, + uLong sourceLen)); +/* + deflateBound() returns an upper bound on the compressed size after + deflation of sourceLen bytes. It must be called after deflateInit() + or deflateInit2(). This would be used to allocate an output buffer + for deflation in a single pass, and so would be called before deflate(). +*/ + +ZEXTERN int ZEXPORT deflatePrime OF((z_streamp strm, + int bits, + int value)); +/* + deflatePrime() inserts bits in the deflate output stream. The intent + is that this function is used to start off the deflate output with the + bits leftover from a previous deflate stream when appending to it. As such, + this function can only be used for raw deflate, and must be used before the + first deflate() call after a deflateInit2() or deflateReset(). bits must be + less than or equal to 16, and that many of the least significant bits of + value will be inserted in the output. + + deflatePrime returns Z_OK if success, or Z_STREAM_ERROR if the source + stream state was inconsistent. +*/ + +ZEXTERN int ZEXPORT deflateSetHeader OF((z_streamp strm, + gz_headerp head)); +/* + deflateSetHeader() provides gzip header information for when a gzip + stream is requested by deflateInit2(). deflateSetHeader() may be called + after deflateInit2() or deflateReset() and before the first call of + deflate(). The text, time, os, extra field, name, and comment information + in the provided gz_header structure are written to the gzip header (xflag is + ignored -- the extra flags are set according to the compression level). The + caller must assure that, if not Z_NULL, name and comment are terminated with + a zero byte, and that if extra is not Z_NULL, that extra_len bytes are + available there. If hcrc is true, a gzip header crc is included. Note that + the current versions of the command-line version of gzip (up through version + 1.3.x) do not support header crc's, and will report that it is a "multi-part + gzip file" and give up. + + If deflateSetHeader is not used, the default gzip header has text false, + the time set to zero, and os set to 255, with no extra, name, or comment + fields. The gzip header is returned to the default state by deflateReset(). + + deflateSetHeader returns Z_OK if success, or Z_STREAM_ERROR if the source + stream state was inconsistent. +*/ + +/* ZEXTERN int ZEXPORT inflateInit2 OF((z_streamp strm, int windowBits)); @@ -608,16 +730,36 @@ ZEXTERN int ZEXPORT inflateInit2 OF((z_streamp strm, The windowBits parameter is the base two logarithm of the maximum window size (the size of the history buffer). It should be in the range 8..15 for this version of the library. The default value is 15 if inflateInit is used - instead. If a compressed stream with a larger window size is given as - input, inflate() will return with the error code Z_DATA_ERROR instead of - trying to allocate a larger window. - - inflateInit2 returns Z_OK if success, Z_MEM_ERROR if there was not enough - memory, Z_STREAM_ERROR if a parameter is invalid (such as a negative - memLevel). msg is set to null if there is no error message. inflateInit2 - does not perform any decompression apart from reading the zlib header if - present: this will be done by inflate(). (So next_in and avail_in may be - modified, but next_out and avail_out are unchanged.) + instead. windowBits must be greater than or equal to the windowBits value + provided to deflateInit2() while compressing, or it must be equal to 15 if + deflateInit2() was not used. If a compressed stream with a larger window + size is given as input, inflate() will return with the error code + Z_DATA_ERROR instead of trying to allocate a larger window. + + windowBits can also be -8..-15 for raw inflate. In this case, -windowBits + determines the window size. inflate() will then process raw deflate data, + not looking for a zlib or gzip header, not generating a check value, and not + looking for any check values for comparison at the end of the stream. This + is for use with other formats that use the deflate compressed data format + such as zip. Those formats provide their own check values. If a custom + format is developed using the raw deflate format for compressed data, it is + recommended that a check value such as an adler32 or a crc32 be applied to + the uncompressed data as is done in the zlib, gzip, and zip formats. For + most applications, the zlib format should be used as is. Note that comments + above on the use in deflateInit2() applies to the magnitude of windowBits. + + windowBits can also be greater than 15 for optional gzip decoding. Add + 32 to windowBits to enable zlib and gzip decoding with automatic header + detection, or add 16 to decode only the gzip format (the zlib format will + return a Z_DATA_ERROR). If a gzip stream is being decoded, strm->adler is + a crc32 instead of an adler32. + + inflateInit2 returns Z_OK if success, Z_MEM_ERROR if there was not enough + memory, Z_STREAM_ERROR if a parameter is invalid (such as a null strm). msg + is set to null if there is no error message. inflateInit2 does not perform + any decompression apart from reading the zlib header if present: this will + be done by inflate(). (So next_in and avail_in may be modified, but next_out + and avail_out are unchanged.) */ ZEXTERN int ZEXPORT inflateSetDictionary OF((z_streamp strm, @@ -625,22 +767,25 @@ ZEXTERN int ZEXPORT inflateSetDictionary OF((z_streamp strm, uInt dictLength)); /* Initializes the decompression dictionary from the given uncompressed byte - sequence. This function must be called immediately after a call of inflate - if this call returned Z_NEED_DICT. The dictionary chosen by the compressor - can be determined from the Adler32 value returned by this call of - inflate. The compressor and decompressor must use exactly the same - dictionary (see deflateSetDictionary). + sequence. This function must be called immediately after a call of inflate, + if that call returned Z_NEED_DICT. The dictionary chosen by the compressor + can be determined from the adler32 value returned by that call of inflate. + The compressor and decompressor must use exactly the same dictionary (see + deflateSetDictionary). For raw inflate, this function can be called + immediately after inflateInit2() or inflateReset() and before any call of + inflate() to set the dictionary. The application must insure that the + dictionary that was used for compression is provided. inflateSetDictionary returns Z_OK if success, Z_STREAM_ERROR if a parameter is invalid (such as NULL dictionary) or the stream state is inconsistent, Z_DATA_ERROR if the given dictionary doesn't match the - expected one (incorrect Adler32 value). inflateSetDictionary does not + expected one (incorrect adler32 value). inflateSetDictionary does not perform any decompression: this will be done by subsequent calls of inflate(). */ ZEXTERN int ZEXPORT inflateSync OF((z_streamp strm)); -/* +/* Skips invalid compressed data until a full flush point (see above the description of deflate with Z_FULL_FLUSH) can be found, or until all available input is skipped. No output is provided. @@ -654,6 +799,22 @@ ZEXTERN int ZEXPORT inflateSync OF((z_streamp strm)); until success or end of the input data. */ +ZEXTERN int ZEXPORT inflateCopy OF((z_streamp dest, + z_streamp source)); +/* + Sets the destination stream as a complete copy of the source stream. + + This function can be useful when randomly accessing a large stream. The + first pass through the stream can periodically record the inflate state, + allowing restarting inflate at those points when randomly accessing the + stream. + + inflateCopy returns Z_OK if success, Z_MEM_ERROR if there was not + enough memory, Z_STREAM_ERROR if the source stream state was inconsistent + (such as zalloc being NULL). msg is left unchanged in both source and + destination. +*/ + ZEXTERN int ZEXPORT inflateReset OF((z_streamp strm)); /* This function is equivalent to inflateEnd followed by inflateInit, @@ -664,6 +825,462 @@ ZEXTERN int ZEXPORT inflateReset OF((z_streamp strm)); stream state was inconsistent (such as zalloc or state being NULL). */ +ZEXTERN int ZEXPORT inflatePrime OF((z_streamp strm, + int bits, + int value)); +/* + This function inserts bits in the inflate input stream. The intent is + that this function is used to start inflating at a bit position in the + middle of a byte. The provided bits will be used before any bytes are used + from next_in. This function should only be used with raw inflate, and + should be used before the first inflate() call after inflateInit2() or + inflateReset(). bits must be less than or equal to 16, and that many of the + least significant bits of value will be inserted in the input. + + inflatePrime returns Z_OK if success, or Z_STREAM_ERROR if the source + stream state was inconsistent. +*/ + +ZEXTERN int ZEXPORT inflateGetHeader OF((z_streamp strm, + gz_headerp head)); +/* + inflateGetHeader() requests that gzip header information be stored in the + provided gz_header structure. inflateGetHeader() may be called after + inflateInit2() or inflateReset(), and before the first call of inflate(). + As inflate() processes the gzip stream, head->done is zero until the header + is completed, at which time head->done is set to one. If a zlib stream is + being decoded, then head->done is set to -1 to indicate that there will be + no gzip header information forthcoming. Note that Z_BLOCK can be used to + force inflate() to return immediately after header processing is complete + and before any actual data is decompressed. + + The text, time, xflags, and os fields are filled in with the gzip header + contents. hcrc is set to true if there is a header CRC. (The header CRC + was valid if done is set to one.) If extra is not Z_NULL, then extra_max + contains the maximum number of bytes to write to extra. Once done is true, + extra_len contains the actual extra field length, and extra contains the + extra field, or that field truncated if extra_max is less than extra_len. + If name is not Z_NULL, then up to name_max characters are written there, + terminated with a zero unless the length is greater than name_max. If + comment is not Z_NULL, then up to comm_max characters are written there, + terminated with a zero unless the length is greater than comm_max. When + any of extra, name, or comment are not Z_NULL and the respective field is + not present in the header, then that field is set to Z_NULL to signal its + absence. This allows the use of deflateSetHeader() with the returned + structure to duplicate the header. However if those fields are set to + allocated memory, then the application will need to save those pointers + elsewhere so that they can be eventually freed. + + If inflateGetHeader is not used, then the header information is simply + discarded. The header is always checked for validity, including the header + CRC if present. inflateReset() will reset the process to discard the header + information. The application would need to call inflateGetHeader() again to + retrieve the header from the next gzip stream. + + inflateGetHeader returns Z_OK if success, or Z_STREAM_ERROR if the source + stream state was inconsistent. +*/ + +/* +ZEXTERN int ZEXPORT inflateBackInit OF((z_streamp strm, int windowBits, + unsigned char FAR *window)); + + Initialize the internal stream state for decompression using inflateBack() + calls. The fields zalloc, zfree and opaque in strm must be initialized + before the call. If zalloc and zfree are Z_NULL, then the default library- + derived memory allocation routines are used. windowBits is the base two + logarithm of the window size, in the range 8..15. window is a caller + supplied buffer of that size. Except for special applications where it is + assured that deflate was used with small window sizes, windowBits must be 15 + and a 32K byte window must be supplied to be able to decompress general + deflate streams. + + See inflateBack() for the usage of these routines. + + inflateBackInit will return Z_OK on success, Z_STREAM_ERROR if any of + the paramaters are invalid, Z_MEM_ERROR if the internal state could not + be allocated, or Z_VERSION_ERROR if the version of the library does not + match the version of the header file. +*/ + +typedef unsigned (*in_func) OF((void FAR *, unsigned char FAR * FAR *)); +typedef int (*out_func) OF((void FAR *, unsigned char FAR *, unsigned)); + +ZEXTERN int ZEXPORT inflateBack OF((z_streamp strm, + in_func in, void FAR *in_desc, + out_func out, void FAR *out_desc)); +/* + inflateBack() does a raw inflate with a single call using a call-back + interface for input and output. This is more efficient than inflate() for + file i/o applications in that it avoids copying between the output and the + sliding window by simply making the window itself the output buffer. This + function trusts the application to not change the output buffer passed by + the output function, at least until inflateBack() returns. + + inflateBackInit() must be called first to allocate the internal state + and to initialize the state with the user-provided window buffer. + inflateBack() may then be used multiple times to inflate a complete, raw + deflate stream with each call. inflateBackEnd() is then called to free + the allocated state. + + A raw deflate stream is one with no zlib or gzip header or trailer. + This routine would normally be used in a utility that reads zip or gzip + files and writes out uncompressed files. The utility would decode the + header and process the trailer on its own, hence this routine expects + only the raw deflate stream to decompress. This is different from the + normal behavior of inflate(), which expects either a zlib or gzip header and + trailer around the deflate stream. + + inflateBack() uses two subroutines supplied by the caller that are then + called by inflateBack() for input and output. inflateBack() calls those + routines until it reads a complete deflate stream and writes out all of the + uncompressed data, or until it encounters an error. The function's + parameters and return types are defined above in the in_func and out_func + typedefs. inflateBack() will call in(in_desc, &buf) which should return the + number of bytes of provided input, and a pointer to that input in buf. If + there is no input available, in() must return zero--buf is ignored in that + case--and inflateBack() will return a buffer error. inflateBack() will call + out(out_desc, buf, len) to write the uncompressed data buf[0..len-1]. out() + should return zero on success, or non-zero on failure. If out() returns + non-zero, inflateBack() will return with an error. Neither in() nor out() + are permitted to change the contents of the window provided to + inflateBackInit(), which is also the buffer that out() uses to write from. + The length written by out() will be at most the window size. Any non-zero + amount of input may be provided by in(). + + For convenience, inflateBack() can be provided input on the first call by + setting strm->next_in and strm->avail_in. If that input is exhausted, then + in() will be called. Therefore strm->next_in must be initialized before + calling inflateBack(). If strm->next_in is Z_NULL, then in() will be called + immediately for input. If strm->next_in is not Z_NULL, then strm->avail_in + must also be initialized, and then if strm->avail_in is not zero, input will + initially be taken from strm->next_in[0 .. strm->avail_in - 1]. + + The in_desc and out_desc parameters of inflateBack() is passed as the + first parameter of in() and out() respectively when they are called. These + descriptors can be optionally used to pass any information that the caller- + supplied in() and out() functions need to do their job. + + On return, inflateBack() will set strm->next_in and strm->avail_in to + pass back any unused input that was provided by the last in() call. The + return values of inflateBack() can be Z_STREAM_END on success, Z_BUF_ERROR + if in() or out() returned an error, Z_DATA_ERROR if there was a format + error in the deflate stream (in which case strm->msg is set to indicate the + nature of the error), or Z_STREAM_ERROR if the stream was not properly + initialized. In the case of Z_BUF_ERROR, an input or output error can be + distinguished using strm->next_in which will be Z_NULL only if in() returned + an error. If strm->next is not Z_NULL, then the Z_BUF_ERROR was due to + out() returning non-zero. (in() will always be called before out(), so + strm->next_in is assured to be defined if out() returns non-zero.) Note + that inflateBack() cannot return Z_OK. +*/ + +ZEXTERN int ZEXPORT inflateBackEnd OF((z_streamp strm)); +/* + All memory allocated by inflateBackInit() is freed. + + inflateBackEnd() returns Z_OK on success, or Z_STREAM_ERROR if the stream + state was inconsistent. +*/ + +ZEXTERN uLong ZEXPORT zlibCompileFlags OF((void)); +/* Return flags indicating compile-time options. + + Type sizes, two bits each, 00 = 16 bits, 01 = 32, 10 = 64, 11 = other: + 1.0: size of uInt + 3.2: size of uLong + 5.4: size of voidpf (pointer) + 7.6: size of z_off_t + + Compiler, assembler, and debug options: + 8: DEBUG + 9: ASMV or ASMINF -- use ASM code + 10: ZLIB_WINAPI -- exported functions use the WINAPI calling convention + 11: 0 (reserved) + + One-time table building (smaller code, but not thread-safe if true): + 12: BUILDFIXED -- build static block decoding tables when needed + 13: DYNAMIC_CRC_TABLE -- build CRC calculation tables when needed + 14,15: 0 (reserved) + + Library content (indicates missing functionality): + 16: NO_GZCOMPRESS -- gz* functions cannot compress (to avoid linking + deflate code when not needed) + 17: NO_GZIP -- deflate can't write gzip streams, and inflate can't detect + and decode gzip streams (to avoid linking crc code) + 18-19: 0 (reserved) + + Operation variations (changes in library functionality): + 20: PKZIP_BUG_WORKAROUND -- slightly more permissive inflate + 21: FASTEST -- deflate algorithm with only one, lowest compression level + 22,23: 0 (reserved) + + The sprintf variant used by gzprintf (zero is best): + 24: 0 = vs*, 1 = s* -- 1 means limited to 20 arguments after the format + 25: 0 = *nprintf, 1 = *printf -- 1 means gzprintf() not secure! + 26: 0 = returns value, 1 = void -- 1 means inferred string length returned + + Remainder: + 27-31: 0 (reserved) + */ + + + /* utility functions */ + +/* + The following utility functions are implemented on top of the + basic stream-oriented functions. To simplify the interface, some + default options are assumed (compression level and memory usage, + standard memory allocation functions). The source code of these + utility functions can easily be modified if you need special options. +*/ + +ZEXTERN int ZEXPORT compress OF((Bytef *dest, uLongf *destLen, + const Bytef *source, uLong sourceLen)); +/* + Compresses the source buffer into the destination buffer. sourceLen is + the byte length of the source buffer. Upon entry, destLen is the total + size of the destination buffer, which must be at least the value returned + by compressBound(sourceLen). Upon exit, destLen is the actual size of the + compressed buffer. + This function can be used to compress a whole file at once if the + input file is mmap'ed. + compress returns Z_OK if success, Z_MEM_ERROR if there was not + enough memory, Z_BUF_ERROR if there was not enough room in the output + buffer. +*/ + +ZEXTERN int ZEXPORT compress2 OF((Bytef *dest, uLongf *destLen, + const Bytef *source, uLong sourceLen, + int level)); +/* + Compresses the source buffer into the destination buffer. The level + parameter has the same meaning as in deflateInit. sourceLen is the byte + length of the source buffer. Upon entry, destLen is the total size of the + destination buffer, which must be at least the value returned by + compressBound(sourceLen). Upon exit, destLen is the actual size of the + compressed buffer. + + compress2 returns Z_OK if success, Z_MEM_ERROR if there was not enough + memory, Z_BUF_ERROR if there was not enough room in the output buffer, + Z_STREAM_ERROR if the level parameter is invalid. +*/ + +ZEXTERN uLong ZEXPORT compressBound OF((uLong sourceLen)); +/* + compressBound() returns an upper bound on the compressed size after + compress() or compress2() on sourceLen bytes. It would be used before + a compress() or compress2() call to allocate the destination buffer. +*/ + +ZEXTERN int ZEXPORT uncompress OF((Bytef *dest, uLongf *destLen, + const Bytef *source, uLong sourceLen)); +/* + Decompresses the source buffer into the destination buffer. sourceLen is + the byte length of the source buffer. Upon entry, destLen is the total + size of the destination buffer, which must be large enough to hold the + entire uncompressed data. (The size of the uncompressed data must have + been saved previously by the compressor and transmitted to the decompressor + by some mechanism outside the scope of this compression library.) + Upon exit, destLen is the actual size of the compressed buffer. + This function can be used to decompress a whole file at once if the + input file is mmap'ed. + + uncompress returns Z_OK if success, Z_MEM_ERROR if there was not + enough memory, Z_BUF_ERROR if there was not enough room in the output + buffer, or Z_DATA_ERROR if the input data was corrupted or incomplete. +*/ + +#if !KERNEL + +typedef voidp gzFile; + +ZEXTERN gzFile ZEXPORT gzopen OF((const char *path, const char *mode)); +/* + Opens a gzip (.gz) file for reading or writing. The mode parameter + is as in fopen ("rb" or "wb") but can also include a compression level + ("wb9") or a strategy: 'f' for filtered data as in "wb6f", 'h' for + Huffman only compression as in "wb1h", or 'R' for run-length encoding + as in "wb1R". (See the description of deflateInit2 for more information + about the strategy parameter.) + + gzopen can be used to read a file which is not in gzip format; in this + case gzread will directly read from the file without decompression. + + gzopen returns NULL if the file could not be opened or if there was + insufficient memory to allocate the (de)compression state; errno + can be checked to distinguish the two cases (if errno is zero, the + zlib error is Z_MEM_ERROR). */ + +ZEXTERN gzFile ZEXPORT gzdopen OF((int fd, const char *mode)); +/* + gzdopen() associates a gzFile with the file descriptor fd. File + descriptors are obtained from calls like open, dup, creat, pipe or + fileno (in the file has been previously opened with fopen). + The mode parameter is as in gzopen. + The next call of gzclose on the returned gzFile will also close the + file descriptor fd, just like fclose(fdopen(fd), mode) closes the file + descriptor fd. If you want to keep fd open, use gzdopen(dup(fd), mode). + gzdopen returns NULL if there was insufficient memory to allocate + the (de)compression state. +*/ + +ZEXTERN int ZEXPORT gzsetparams OF((gzFile file, int level, int strategy)); +/* + Dynamically update the compression level or strategy. See the description + of deflateInit2 for the meaning of these parameters. + gzsetparams returns Z_OK if success, or Z_STREAM_ERROR if the file was not + opened for writing. +*/ + +ZEXTERN int ZEXPORT gzread OF((gzFile file, voidp buf, unsigned len)); +/* + Reads the given number of uncompressed bytes from the compressed file. + If the input file was not in gzip format, gzread copies the given number + of bytes into the buffer. + gzread returns the number of uncompressed bytes actually read (0 for + end of file, -1 for error). */ + +ZEXTERN int ZEXPORT gzwrite OF((gzFile file, + voidpc buf, unsigned len)); +/* + Writes the given number of uncompressed bytes into the compressed file. + gzwrite returns the number of uncompressed bytes actually written + (0 in case of error). +*/ + +ZEXTERN int ZEXPORTVA gzprintf OF((gzFile file, const char *format, ...)); +/* + Converts, formats, and writes the args to the compressed file under + control of the format string, as in fprintf. gzprintf returns the number of + uncompressed bytes actually written (0 in case of error). The number of + uncompressed bytes written is limited to 4095. The caller should assure that + this limit is not exceeded. If it is exceeded, then gzprintf() will return + return an error (0) with nothing written. In this case, there may also be a + buffer overflow with unpredictable consequences, which is possible only if + zlib was compiled with the insecure functions sprintf() or vsprintf() + because the secure snprintf() or vsnprintf() functions were not available. +*/ + +ZEXTERN int ZEXPORT gzputs OF((gzFile file, const char *s)); +/* + Writes the given null-terminated string to the compressed file, excluding + the terminating null character. + gzputs returns the number of characters written, or -1 in case of error. +*/ + +ZEXTERN char * ZEXPORT gzgets OF((gzFile file, char *buf, int len)); +/* + Reads bytes from the compressed file until len-1 characters are read, or + a newline character is read and transferred to buf, or an end-of-file + condition is encountered. The string is then terminated with a null + character. + gzgets returns buf, or Z_NULL in case of error. +*/ + +ZEXTERN int ZEXPORT gzputc OF((gzFile file, int c)); +/* + Writes c, converted to an unsigned char, into the compressed file. + gzputc returns the value that was written, or -1 in case of error. +*/ + +ZEXTERN int ZEXPORT gzgetc OF((gzFile file)); +/* + Reads one byte from the compressed file. gzgetc returns this byte + or -1 in case of end of file or error. +*/ + +ZEXTERN int ZEXPORT gzungetc OF((int c, gzFile file)); +/* + Push one character back onto the stream to be read again later. + Only one character of push-back is allowed. gzungetc() returns the + character pushed, or -1 on failure. gzungetc() will fail if a + character has been pushed but not read yet, or if c is -1. The pushed + character will be discarded if the stream is repositioned with gzseek() + or gzrewind(). +*/ + +ZEXTERN int ZEXPORT gzflush OF((gzFile file, int flush)); +/* + Flushes all pending output into the compressed file. The parameter + flush is as in the deflate() function. The return value is the zlib + error number (see function gzerror below). gzflush returns Z_OK if + the flush parameter is Z_FINISH and all output could be flushed. + gzflush should be called only when strictly necessary because it can + degrade compression. +*/ + +ZEXTERN z_off_t ZEXPORT gzseek OF((gzFile file, + z_off_t offset, int whence)); +/* + Sets the starting position for the next gzread or gzwrite on the + given compressed file. The offset represents a number of bytes in the + uncompressed data stream. The whence parameter is defined as in lseek(2); + the value SEEK_END is not supported. + If the file is opened for reading, this function is emulated but can be + extremely slow. If the file is opened for writing, only forward seeks are + supported; gzseek then compresses a sequence of zeroes up to the new + starting position. + + gzseek returns the resulting offset location as measured in bytes from + the beginning of the uncompressed stream, or -1 in case of error, in + particular if the file is opened for writing and the new starting position + would be before the current position. +*/ + +ZEXTERN int ZEXPORT gzrewind OF((gzFile file)); +/* + Rewinds the given file. This function is supported only for reading. + + gzrewind(file) is equivalent to (int)gzseek(file, 0L, SEEK_SET) +*/ + +ZEXTERN z_off_t ZEXPORT gztell OF((gzFile file)); +/* + Returns the starting position for the next gzread or gzwrite on the + given compressed file. This position represents a number of bytes in the + uncompressed data stream. + + gztell(file) is equivalent to gzseek(file, 0L, SEEK_CUR) +*/ + +ZEXTERN int ZEXPORT gzeof OF((gzFile file)); +/* + Returns 1 when EOF has previously been detected reading the given + input stream, otherwise zero. +*/ + +ZEXTERN int ZEXPORT gzdirect OF((gzFile file)); +/* + Returns 1 if file is being read directly without decompression, otherwise + zero. +*/ + +ZEXTERN int ZEXPORT gzclose OF((gzFile file)); +/* + Flushes all pending output if necessary, closes the compressed file + and deallocates all the (de)compression state. The return value is the zlib + error number (see function gzerror below). +*/ + +ZEXTERN const char * ZEXPORT gzerror OF((gzFile file, int *errnum)); +/* + Returns the error message for the last error which occurred on the + given compressed file. errnum is set to zlib error number. If an + error occurred in the file system and not in the compression library, + errnum is set to Z_ERRNO and the application may consult errno + to get the exact error code. +*/ + +ZEXTERN void ZEXPORT gzclearerr OF((gzFile file)); +/* + Clears the error and end-of-file flags for file. This is analogous to the + clearerr() function in stdio. This is useful for continuing to read a gzip + file that is being written concurrently. +*/ + +#endif /* KERNEL */ + /* checksum functions */ /* @@ -688,21 +1305,61 @@ ZEXTERN uLong ZEXPORT adler32 OF((uLong adler, const Bytef *buf, uInt len)); if (adler != original_adler) error(); */ +ZEXTERN uLong ZEXPORT adler32_combine OF((uLong adler1, uLong adler2, + z_off_t len2)); +/* + Combine two Adler-32 checksums into one. For two sequences of bytes, seq1 + and seq2 with lengths len1 and len2, Adler-32 checksums were calculated for + each, adler1 and adler2. adler32_combine() returns the Adler-32 checksum of + seq1 and seq2 concatenated, requiring only adler1, adler2, and len2. +*/ + +ZEXTERN uLong ZEXPORT z_crc32 OF((uLong crc, const Bytef *buf, uInt len)); +/* + Update a running CRC-32 with the bytes buf[0..len-1] and return the + updated CRC-32. If buf is NULL, this function returns the required initial + value for the for the crc. Pre- and post-conditioning (one's complement) is + performed within this function so it shouldn't be done by the application. + Usage example: + + uLong crc = crc32(0L, Z_NULL, 0); + + while (read_buffer(buffer, length) != EOF) { + crc = crc32(crc, buffer, length); + } + if (crc != original_crc) error(); +*/ + +ZEXTERN uLong ZEXPORT z_crc32_combine OF((uLong crc1, uLong crc2, z_off_t len2)); + +/* + Combine two CRC-32 check values into one. For two sequences of bytes, + seq1 and seq2 with lengths len1 and len2, CRC-32 check values were + calculated for each, crc1 and crc2. crc32_combine() returns the CRC-32 + check value of seq1 and seq2 concatenated, requiring only crc1, crc2, and + len2. +*/ + + /* various hacks, don't look :) */ /* deflateInit and inflateInit are macros to allow checking the zlib version * and the compiler's view of z_stream: */ ZEXTERN int ZEXPORT deflateInit_ OF((z_streamp strm, int level, - const char *vers, int stream_size)); + const char *version, int stream_size)); ZEXTERN int ZEXPORT inflateInit_ OF((z_streamp strm, - const char *vers, int stream_size)); + const char *version, int stream_size)); ZEXTERN int ZEXPORT deflateInit2_ OF((z_streamp strm, int level, int method, int windowBits, int memLevel, - int strategy, const char *vers, + int strategy, const char *version, int stream_size)); ZEXTERN int ZEXPORT inflateInit2_ OF((z_streamp strm, int windowBits, - const char *vers, int stream_size)); + const char *version, int stream_size)); +ZEXTERN int ZEXPORT inflateBackInit_ OF((z_streamp strm, int windowBits, + unsigned char FAR *window, + const char *version, + int stream_size)); #define deflateInit(strm, level) \ deflateInit_((strm), (level), ZLIB_VERSION, sizeof(z_stream)) #define inflateInit(strm) \ @@ -712,12 +1369,21 @@ ZEXTERN int ZEXPORT inflateInit2_ OF((z_streamp strm, int windowBits, (strategy), ZLIB_VERSION, sizeof(z_stream)) #define inflateInit2(strm, windowBits) \ inflateInit2_((strm), (windowBits), ZLIB_VERSION, sizeof(z_stream)) +#define inflateBackInit(strm, windowBits, window) \ + inflateBackInit_((strm), (windowBits), (window), \ + ZLIB_VERSION, sizeof(z_stream)) + + +#if !defined(ZUTIL_H) && !defined(NO_DUMMY_DECL) + struct internal_state {int dummy;}; /* hack for buggy compilers */ +#endif -ZEXTERN const char * ZEXPORT zError OF((int err)); +ZEXTERN const char * ZEXPORT zError OF((int)); ZEXTERN int ZEXPORT inflateSyncPoint OF((z_streamp z)); +ZEXTERN const uLongf * ZEXPORT get_crc_table OF((void)); #ifdef __cplusplus } #endif -#endif /* _ZLIB_H */ +#endif /* ZLIB_H */ diff --git a/libkern/mach-o/loader.h b/libkern/mach-o/loader.h deleted file mode 100644 index 9a3f8b58a..000000000 --- a/libkern/mach-o/loader.h +++ /dev/null @@ -1,819 +0,0 @@ -/* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ - * - * This file contains Original Code and/or Modifications of Original Code - * as defined in and that are subject to the Apple Public Source License - * Version 2.0 (the 'License'). You may not use this file except in - * compliance with the License. The rights granted to you under the License - * may not be used to create, or enable the creation or redistribution of, - * unlawful or unlicensed copies of an Apple operating system, or to - * circumvent, violate, or enable the circumvention or violation of, any - * terms of an Apple operating system software license agreement. - * - * Please obtain a copy of the License at - * http://www.opensource.apple.com/apsl/ and read it before using this file. - * - * The Original Code and all software distributed under the License are - * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER - * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, - * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. - * Please see the License for the specific language governing rights and - * limitations under the License. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ - */ -#ifndef _MACHO_LOADER_H_ -#define _MACHO_LOADER_H_ - -/* - * This file describes the format of mach object files. - */ - -/* - * is needed here for the cpu_type_t and cpu_subtype_t types - * and contains the constants for the possible values of these types. - */ -#include - -/* - * is needed here for the vm_prot_t type and contains the - * constants that are or'ed together for the possible values of this type. - */ -#include - -/* - * is expected to define the flavors of the thread - * states and the structures of those flavors for each machine. - */ -#include - -/* - * The mach header appears at the very beginning of the object file; it - * is the same for both 32-bit and 64-bit architectures. - */ -struct mach_header { - uint32_t magic; /* mach magic number identifier */ - cpu_type_t cputype; /* cpu specifier */ - cpu_subtype_t cpusubtype; /* machine specifier */ - uint32_t filetype; /* type of file */ - uint32_t ncmds; /* number of load commands */ - uint32_t sizeofcmds; /* the size of all the load commands */ - uint32_t flags; /* flags */ -}; - -/* Constant for the magic field of the mach_header (32-bit architectures) */ -#define MH_MAGIC 0xfeedface /* the mach magic number */ -#define MH_CIGAM 0xcefaedfe - -/* Constant for the magic field of the mach_header_64 (64-bit architectures) */ -#define MH_MAGIC_64 0xfeedfacf /* the 64-bit mach magic number */ -#define MH_CIGAM_64 0xcffaedfe - -/* Constants for the cmd field of new load commands, the type */ -#define LC_SEGMENT_64 0x19 /* 64-bit segment of this file to be mapped */ -#define LC_ROUTINES_64 0x1a /* 64-bit image routines */ - - -/* - * The layout of the file depends on the filetype. For all but the MH_OBJECT - * file type the segments are padded out and aligned on a segment alignment - * boundary for efficient demand pageing. The MH_EXECUTE, MH_FVMLIB, MH_DYLIB, - * MH_DYLINKER and MH_BUNDLE file types also have the headers included as part - * of their first segment. - * - * The file type MH_OBJECT is a compact format intended as output of the - * assembler and input (and possibly output) of the link editor (the .o - * format). All sections are in one unnamed segment with no segment padding. - * This format is used as an executable format when the file is so small the - * segment padding greatly increases it's size. - * - * The file type MH_PRELOAD is an executable format intended for things that - * not executed under the kernel (proms, stand alones, kernels, etc). The - * format can be executed under the kernel but may demand paged it and not - * preload it before execution. - * - * A core file is in MH_CORE format and can be any in an arbritray legal - * Mach-O file. - * - * Constants for the filetype field of the mach_header - */ -#define MH_OBJECT 0x1 /* relocatable object file */ -#define MH_EXECUTE 0x2 /* demand paged executable file */ -#define MH_FVMLIB 0x3 /* fixed VM shared library file */ -#define MH_CORE 0x4 /* core file */ -#define MH_PRELOAD 0x5 /* preloaded executable file */ -#define MH_DYLIB 0x6 /* dynamicly bound shared library file*/ -#define MH_DYLINKER 0x7 /* dynamic link editor */ -#define MH_BUNDLE 0x8 /* dynamicly bound bundle file */ - -/* Constants for the flags field of the mach_header */ -#define MH_NOUNDEFS 0x1 /* the object file has no undefined - references, can be executed */ -#define MH_INCRLINK 0x2 /* the object file is the output of an - incremental link against a base file - and can't be link edited again */ -#define MH_DYLDLINK 0x4 /* the object file is input for the - dynamic linker and can't be staticly - link edited again */ -#define MH_BINDATLOAD 0x8 /* the object file's undefined - references are bound by the dynamic - linker when loaded. */ -#define MH_PREBOUND 0x10 /* the file has it's dynamic undefined - references prebound. */ - -/* - * The load commands directly follow the mach_header. The total size of all - * of the commands is given by the sizeofcmds field in the mach_header. All - * load commands must have as their first two fields cmd and cmdsize. The cmd - * field is filled in with a constant for that command type. Each command type - * has a structure specifically for it. The cmdsize field is the size in bytes - * of the particular load command structure plus anything that follows it that - * is a part of the load command (i.e. section structures, strings, etc.). To - * advance to the next load command the cmdsize can be added to the offset or - * pointer of the current load command. The cmdsize for 32-bit architectures - * MUST be a multiple of 4 bytes and for 64-bit architectures MUST be a multiple - * of 8 bytes (these are forever the maximum alignment of any load commands). - * sizeof(long) (this is forever the maximum alignment of any load commands). - * The padded bytes must be zero. All tables in the object file must also - * follow these rules so the file can be memory mapped. Otherwise the pointers - * to these tables will not work well or at all on some machines. With all - * padding zeroed like objects will compare byte for byte. - */ -struct load_command { - unsigned long cmd; /* type of load command */ - unsigned long cmdsize; /* total size of command in bytes */ -}; - -/* Constants for the cmd field of all load commands, the type */ -#define LC_SEGMENT 0x1 /* segment of this file to be mapped */ -#define LC_SYMTAB 0x2 /* link-edit stab symbol table info */ -#define LC_SYMSEG 0x3 /* link-edit gdb symbol table info (obsolete) */ -#define LC_THREAD 0x4 /* thread */ -#define LC_UNIXTHREAD 0x5 /* unix thread (includes a stack) */ -#define LC_LOADFVMLIB 0x6 /* load a specified fixed VM shared library */ -#define LC_IDFVMLIB 0x7 /* fixed VM shared library identification */ -#define LC_IDENT 0x8 /* object identification info (obsolete) */ -#define LC_FVMFILE 0x9 /* fixed VM file inclusion (internal use) */ -#define LC_PREPAGE 0xa /* prepage command (internal use) */ -#define LC_DYSYMTAB 0xb /* dynamic link-edit symbol table info */ -#define LC_LOAD_DYLIB 0xc /* load a dynamicly linked shared library */ -#define LC_ID_DYLIB 0xd /* dynamicly linked shared lib identification */ -#define LC_LOAD_DYLINKER 0xe /* load a dynamic linker */ -#define LC_ID_DYLINKER 0xf /* dynamic linker identification */ -#define LC_PREBOUND_DYLIB 0x10 /* modules prebound for a dynamicly */ - /* linked shared library */ - -#define LC_UUID 0x1b /* the uuid */ - -/* - * A variable length string in a load command is represented by an lc_str - * union. The strings are stored just after the load command structure and - * the offset is from the start of the load command structure. The size - * of the string is reflected in the cmdsize field of the load command. - * Once again any padded bytes to bring the cmdsize field to a multiple - * of sizeof(long) must be zero. - */ -union lc_str { - unsigned long offset; /* offset to the string */ - char *ptr; /* pointer to the string */ -}; - -/* - * The segment load command indicates that a part of this file is to be - * mapped into the task's address space. The size of this segment in memory, - * vmsize, maybe equal to or larger than the amount to map from this file, - * filesize. The file is mapped starting at fileoff to the beginning of - * the segment in memory, vmaddr. The rest of the memory of the segment, - * if any, is allocated zero fill on demand. The segment's maximum virtual - * memory protection and initial virtual memory protection are specified - * by the maxprot and initprot fields. If the segment has sections then the - * section structures directly follow the segment command and their size is - * reflected in cmdsize. - */ -struct segment_command { /* for 32-bit architectures */ - unsigned long cmd; /* LC_SEGMENT */ - unsigned long cmdsize; /* includes sizeof section structs */ - char segname[16]; /* segment name */ - unsigned long vmaddr; /* memory address of this segment */ - unsigned long vmsize; /* memory size of this segment */ - unsigned long fileoff; /* file offset of this segment */ - unsigned long filesize; /* amount to map from the file */ - vm_prot_t maxprot; /* maximum VM protection */ - vm_prot_t initprot; /* initial VM protection */ - unsigned long nsects; /* number of sections in segment */ - unsigned long flags; /* flags */ -}; - -/* - * The 64-bit segment load command indicates that a part of this file is to be - * mapped into a 64-bit task's address space. If the 64-bit segment has - * sections then section_64 structures directly follow the 64-bit segment - * command and their size is reflected in cmdsize. - */ -struct segment_command_64 { /* for 64-bit architectures */ - uint32_t cmd; /* LC_SEGMENT_64 */ - uint32_t cmdsize; /* includes sizeof section_64 structs */ - char segname[16]; /* segment name */ - uint64_t vmaddr; /* memory address of this segment */ - uint64_t vmsize; /* memory size of this segment */ - uint32_t fileoff; /* file offset of this segment */ - uint32_t filesize; /* amount to map from the file */ - vm_prot_t maxprot; /* maximum VM protection */ - vm_prot_t initprot; /* initial VM protection */ - uint32_t nsects; /* number of sections in segment */ - uint32_t flags; /* flags */ -}; - - -/* Constants for the flags field of the segment_command */ -#define SG_HIGHVM 0x1 /* the file contents for this segment is for - the high part of the VM space, the low part - is zero filled (for stacks in core files) */ -#define SG_FVMLIB 0x2 /* this segment is the VM that is allocated by - a fixed VM library, for overlap checking in - the link editor */ -#define SG_NORELOC 0x4 /* this segment has nothing that was relocated - in it and nothing relocated to it, that is - it maybe safely replaced without relocation*/ - -/* - * A segment is made up of zero or more sections. Non-MH_OBJECT files have - * all of their segments with the proper sections in each, and padded to the - * specified segment alignment when produced by the link editor. The first - * segment of a MH_EXECUTE and MH_FVMLIB format file contains the mach_header - * and load commands of the object file before it's first section. The zero - * fill sections are always last in their segment (in all formats). This - * allows the zeroed segment padding to be mapped into memory where zero fill - * sections might be. The gigabyte zero fill sections, those with the section - * type S_GB_ZEROFILL, can only be in a segment with sections of this type. - * These segments are then placed after all other segments. - * - * The MH_OBJECT format has all of it's sections in one segment for - * compactness. There is no padding to a specified segment boundary and the - * mach_header and load commands are not part of the segment. - * - * Sections with the same section name, sectname, going into the same segment, - * segname, are combined by the link editor. The resulting section is aligned - * to the maximum alignment of the combined sections and is the new section's - * alignment. The combined sections are aligned to their original alignment in - * the combined section. Any padded bytes to get the specified alignment are - * zeroed. - * - * The format of the relocation entries referenced by the reloff and nreloc - * fields of the section structure for mach object files is described in the - * header file . - */ -struct section { /* for 32-bit architectures */ - char sectname[16]; /* name of this section */ - char segname[16]; /* segment this section goes in */ - unsigned long addr; /* memory address of this section */ - unsigned long size; /* size in bytes of this section */ - unsigned long offset; /* file offset of this section */ - unsigned long align; /* section alignment (power of 2) */ - unsigned long reloff; /* file offset of relocation entries */ - unsigned long nreloc; /* number of relocation entries */ - unsigned long flags; /* flags (section type and attributes)*/ - unsigned long reserved1; /* reserved */ - unsigned long reserved2; /* reserved */ -}; - -struct section_64 { /* for 64-bit architectures */ - char sectname[16]; /* name of this section */ - char segname[16]; /* segment this section goes in */ - uint64_t addr; /* memory address of this section */ - uint64_t size; /* size in bytes of this section */ - uint32_t offset; /* file offset of this section */ - uint32_t align; /* section alignment (power of 2) */ - uint32_t reloff; /* file offset of relocation entries */ - uint32_t nreloc; /* number of relocation entries */ - uint32_t flags; /* flags (section type and attributes)*/ - uint32_t reserved1; /* reserved (for offset or index) */ - uint32_t reserved2; /* reserved (for count or sizeof) */ - uint32_t reserved3; /* reserved */ -}; - - -/* - * The flags field of a section structure is separated into two parts a section - * type and section attributes. The section types are mutually exclusive (it - * can only have one type) but the section attributes are not (it may have more - * than one attribute). - */ -#define SECTION_TYPE 0x000000ff /* 256 section types */ -#define SECTION_ATTRIBUTES 0xffffff00 /* 24 section attributes */ - -/* Constants for the type of a section */ -#define S_REGULAR 0x0 /* regular section */ -#define S_ZEROFILL 0x1 /* zero fill on demand section */ -#define S_CSTRING_LITERALS 0x2 /* section with only literal C strings*/ -#define S_4BYTE_LITERALS 0x3 /* section with only 4 byte literals */ -#define S_8BYTE_LITERALS 0x4 /* section with only 8 byte literals */ -#define S_LITERAL_POINTERS 0x5 /* section with only pointers to */ - /* literals */ -/* - * For the two types of symbol pointers sections and the symbol stubs section - * they have indirect symbol table entries. For each of the entries in the - * section the indirect symbol table entries, in corresponding order in the - * indirect symbol table, start at the index stored in the reserved1 field - * of the section structure. Since the indirect symbol table entries - * correspond to the entries in the section the number of indirect symbol table - * entries is inferred from the size of the section divided by the size of the - * entries in the section. For symbol pointers sections the size of the entries - * in the section is 4 bytes and for symbol stubs sections the byte size of the - * stubs is stored in the reserved2 field of the section structure. - */ -#define S_NON_LAZY_SYMBOL_POINTERS 0x6 /* section with only non-lazy - symbol pointers */ -#define S_LAZY_SYMBOL_POINTERS 0x7 /* section with only lazy symbol - pointers */ -#define S_SYMBOL_STUBS 0x8 /* section with only symbol - stubs, byte size of stub in - the reserved2 field */ -#define S_MOD_INIT_FUNC_POINTERS 0x9 /* section with only function - pointers for initialization*/ -/* - * Constants for the section attributes part of the flags field of a section - * structure. - */ -#define SECTION_ATTRIBUTES_USR 0xff000000 /* User setable attributes */ -#define S_ATTR_PURE_INSTRUCTIONS 0x80000000 /* section contains only true - machine instructions */ -#define SECTION_ATTRIBUTES_SYS 0x00ffff00 /* system setable attributes */ -#define S_ATTR_SOME_INSTRUCTIONS 0x00000400 /* section contains some - machine instructions */ -#define S_ATTR_EXT_RELOC 0x00000200 /* section has external - relocation entries */ -#define S_ATTR_LOC_RELOC 0x00000100 /* section has local - relocation entries */ - - -/* - * The names of segments and sections in them are mostly meaningless to the - * link-editor. But there are few things to support traditional UNIX - * executables that require the link-editor and assembler to use some names - * agreed upon by convention. - * - * The initial protection of the "__TEXT" segment has write protection turned - * off (not writeable). - * - * The link-editor will allocate common symbols at the end of the "__common" - * section in the "__DATA" segment. It will create the section and segment - * if needed. - */ - -/* The currently known segment names and the section names in those segments */ - -#define SEG_PAGEZERO "__PAGEZERO" /* the pagezero segment which has no */ - /* protections and catches NULL */ - /* references for MH_EXECUTE files */ - - -#define SEG_TEXT "__TEXT" /* the tradition UNIX text segment */ -#define SECT_TEXT "__text" /* the real text part of the text */ - /* section no headers, and no padding */ -#define SECT_FVMLIB_INIT0 "__fvmlib_init0" /* the fvmlib initialization */ - /* section */ -#define SECT_FVMLIB_INIT1 "__fvmlib_init1" /* the section following the */ - /* fvmlib initialization */ - /* section */ - -#define SEG_DATA "__DATA" /* the tradition UNIX data segment */ -#define SECT_DATA "__data" /* the real initialized data section */ - /* no padding, no bss overlap */ -#define SECT_BSS "__bss" /* the real uninitialized data section*/ - /* no padding */ -#define SECT_COMMON "__common" /* the section common symbols are */ - /* allocated in by the link editor */ - -#define SEG_OBJC "__OBJC" /* objective-C runtime segment */ -#define SECT_OBJC_SYMBOLS "__symbol_table" /* symbol table */ -#define SECT_OBJC_MODULES "__module_info" /* module information */ -#define SECT_OBJC_STRINGS "__selector_strs" /* string table */ -#define SECT_OBJC_REFS "__selector_refs" /* string table */ - -#define SEG_ICON "__ICON" /* the NeXT icon segment */ -#define SECT_ICON_HEADER "__header" /* the icon headers */ -#define SECT_ICON_TIFF "__tiff" /* the icons in tiff format */ - -#define SEG_LINKEDIT "__LINKEDIT" /* the segment containing all structs */ - /* created and maintained by the link */ - /* editor. Created with -seglinkedit */ - /* option to ld(1) for MH_EXECUTE and */ - /* FVMLIB file types only */ - -#define SEG_UNIXSTACK "__UNIXSTACK" /* the unix stack segment */ - -/* - * Fixed virtual memory shared libraries are identified by two things. The - * target pathname (the name of the library as found for execution), and the - * minor version number. The address of where the headers are loaded is in - * header_addr. - */ -struct fvmlib { - union lc_str name; /* library's target pathname */ - unsigned long minor_version; /* library's minor version number */ - unsigned long header_addr; /* library's header address */ -}; - -/* - * A fixed virtual shared library (filetype == MH_FVMLIB in the mach header) - * contains a fvmlib_command (cmd == LC_IDFVMLIB) to identify the library. - * An object that uses a fixed virtual shared library also contains a - * fvmlib_command (cmd == LC_LOADFVMLIB) for each library it uses. - */ -struct fvmlib_command { - unsigned long cmd; /* LC_IDFVMLIB or LC_LOADFVMLIB */ - unsigned long cmdsize; /* includes pathname string */ - struct fvmlib fvmlib; /* the library identification */ -}; - -/* - * Dynamicly linked shared libraries are identified by two things. The - * pathname (the name of the library as found for execution), and the - * compatibility version number. The pathname must match and the compatibility - * number in the user of the library must be greater than or equal to the - * library being used. The time stamp is used to record the time a library was - * built and copied into user so it can be use to determined if the library used - * at runtime is exactly the same as used to built the program. - */ -struct dylib { - union lc_str name; /* library's path name */ - unsigned long timestamp; /* library's build time stamp */ - unsigned long current_version; /* library's current version number */ - unsigned long compatibility_version;/* library's compatibility vers number*/ -}; - -/* - * A dynamicly linked shared library (filetype == MH_DYLIB in the mach header) - * contains a dylib_command (cmd == LC_ID_DYLIB) to identify the library. - * An object that uses a dynamicly linked shared library also contains a - * dylib_command (cmd == LC_LOAD_DYLIB) for each library it uses. - */ -struct dylib_command { - unsigned long cmd; /* LC_ID_DYLIB or LC_LOAD_DYLIB */ - unsigned long cmdsize; /* includes pathname string */ - struct dylib dylib; /* the library identification */ -}; - -/* - * A program (filetype == MH_EXECUTE) or bundle (filetype == MH_BUNDLE) that is - * prebound to it's dynamic libraries has one of these for each library that - * the static linker used in prebinding. It contains a bit vector for the - * modules in the library. The bits indicate which modules are bound (1) and - * which are not (0) from the library. The bit for module 0 is the low bit - * of the first byte. So the bit for the Nth module is: - * (linked_modules[N/8] >> N%8) & 1 - */ -struct prebound_dylib_command { - unsigned long cmd; /* LC_PREBOUND_DYLIB */ - unsigned long cmdsize; /* includes strings */ - union lc_str name; /* library's path name */ - unsigned long nmodules; /* number of modules in library */ - union lc_str linked_modules; /* bit vector of linked modules */ -}; - -/* - * A program that uses a dynamic linker contains a dylinker_command to identify - * the name of the dynamic linker (LC_LOAD_DYLINKER). And a dynamic linker - * contains a dylinker_command to identify the dynamic linker (LC_ID_DYLINKER). - * A file can have at most one of these. - */ -struct dylinker_command { - unsigned long cmd; /* LC_ID_DYLINKER or LC_LOAD_DYLINKER */ - unsigned long cmdsize; /* includes pathname string */ - union lc_str name; /* dynamic linker's path name */ -}; - -/* - * Thread commands contain machine-specific data structures suitable for - * use in the thread state primitives. The machine specific data structures - * follow the struct thread_command as follows. - * Each flavor of machine specific data structure is preceded by an unsigned - * long constant for the flavor of that data structure, an unsigned long - * that is the count of longs of the size of the state data structure and then - * the state data structure follows. This triple may be repeated for many - * flavors. The constants for the flavors, counts and state data structure - * definitions are expected to be in the header file . - * These machine specific data structures sizes must be multiples of - * sizeof(long). The cmdsize reflects the total size of the thread_command - * and all of the sizes of the constants for the flavors, counts and state - * data structures. - * - * For executable objects that are unix processes there will be one - * thread_command (cmd == LC_UNIXTHREAD) created for it by the link-editor. - * This is the same as a LC_THREAD, except that a stack is automatically - * created (based on the shell's limit for the stack size). Command arguments - * and environment variables are copied onto that stack. - */ -struct thread_command { - unsigned long cmd; /* LC_THREAD or LC_UNIXTHREAD */ - unsigned long cmdsize; /* total size of this command */ - /* unsigned long flavor flavor of thread state */ - /* unsigned long count count of longs in thread state */ - /* struct XXX_thread_state state thread state for this flavor */ - /* ... */ -}; - -/* - * The symtab_command contains the offsets and sizes of the link-edit 4.3BSD - * "stab" style symbol table information as described in the header files - * and . - */ -struct symtab_command { - unsigned long cmd; /* LC_SYMTAB */ - unsigned long cmdsize; /* sizeof(struct symtab_command) */ - unsigned long symoff; /* symbol table offset */ - unsigned long nsyms; /* number of symbol table entries */ - unsigned long stroff; /* string table offset */ - unsigned long strsize; /* string table size in bytes */ -}; - -/* - * This is the second set of the symbolic information which is used to support - * the data structures for the dynamicly link editor. - * - * The original set of symbolic information in the symtab_command which contains - * the symbol and string tables must also be present when this load command is - * present. When this load command is present the symbol table is organized - * into three groups of symbols: - * local symbols (static and debugging symbols) - grouped by module - * defined external symbols - grouped by module (sorted by name if not lib) - * undefined external symbols (sorted by name) - * In this load command there are offsets and counts to each of the three groups - * of symbols. - * - * This load command contains a the offsets and sizes of the following new - * symbolic information tables: - * table of contents - * module table - * reference symbol table - * indirect symbol table - * The first three tables above (the table of contents, module table and - * reference symbol table) are only present if the file is a dynamicly linked - * shared library. For executable and object modules, which are files - * containing only one module, the information that would be in these three - * tables is determined as follows: - * table of contents - the defined external symbols are sorted by name - * module table - the file contains only one module so everything in the - * file is part of the module. - * reference symbol table - is the defined and undefined external symbols - * - * For dynamicly linked shared library files this load command also contains - * offsets and sizes to the pool of relocation entries for all sections - * separated into two groups: - * external relocation entries - * local relocation entries - * For executable and object modules the relocation entries continue to hang - * off the section structures. - */ -struct dysymtab_command { - unsigned long cmd; /* LC_DYSYMTAB */ - unsigned long cmdsize; /* sizeof(struct dysymtab_command) */ - - /* - * The symbols indicated by symoff and nsyms of the LC_SYMTAB load command - * are grouped into the following three groups: - * local symbols (further grouped by the module they are from) - * defined external symbols (further grouped by the module they are from) - * undefined symbols - * - * The local symbols are used only for debugging. The dynamic binding - * process may have to use them to indicate to the debugger the local - * symbols for a module that is being bound. - * - * The last two groups are used by the dynamic binding process to do the - * binding (indirectly through the module table and the reference symbol - * table when this is a dynamicly linked shared library file). - */ - unsigned long ilocalsym; /* index to local symbols */ - unsigned long nlocalsym; /* number of local symbols */ - - unsigned long iextdefsym; /* index to externally defined symbols */ - unsigned long nextdefsym; /* number of externally defined symbols */ - - unsigned long iundefsym; /* index to undefined symbols */ - unsigned long nundefsym; /* number of undefined symbols */ - - /* - * For the for the dynamic binding process to find which module a symbol - * is defined in the table of contents is used (analogous to the ranlib - * structure in an archive) which maps defined external symbols to modules - * they are defined in. This exists only in a dynamicly linked shared - * library file. For executable and object modules the defined external - * symbols are sorted by name and is use as the table of contents. - */ - unsigned long tocoff; /* file offset to table of contents */ - unsigned long ntoc; /* number of entries in table of contents */ - - /* - * To support dynamic binding of "modules" (whole object files) the symbol - * table must reflect the modules that the file was created from. This is - * done by having a module table that has indexes and counts into the merged - * tables for each module. The module structure that these two entries - * refer to is described below. This exists only in a dynamicly linked - * shared library file. For executable and object modules the file only - * contains one module so everything in the file belongs to the module. - */ - unsigned long modtaboff; /* file offset to module table */ - unsigned long nmodtab; /* number of module table entries */ - - /* - * To support dynamic module binding the module structure for each module - * indicates the external references (defined and undefined) each module - * makes. For each module there is an offset and a count into the - * reference symbol table for the symbols that the module references. - * This exists only in a dynamicly linked shared library file. For - * executable and object modules the defined external symbols and the - * undefined external symbols indicates the external references. - */ - unsigned long extrefsymoff; /* offset to referenced symbol table */ - unsigned long nextrefsyms; /* number of referenced symbol table entries */ - - /* - * The sections that contain "symbol pointers" and "routine stubs" have - * indexes and (implied counts based on the size of the section and fixed - * size of the entry) into the "indirect symbol" table for each pointer - * and stub. For every section of these two types the index into the - * indirect symbol table is stored in the section header in the field - * reserved1. An indirect symbol table entry is simply a 32bit index into - * the symbol table to the symbol that the pointer or stub is referring to. - * The indirect symbol table is ordered to match the entries in the section. - */ - unsigned long indirectsymoff; /* file offset to the indirect symbol table */ - unsigned long nindirectsyms; /* number of indirect symbol table entries */ - - /* - * To support relocating an individual module in a library file quickly the - * external relocation entries for each module in the library need to be - * accessed efficiently. Since the relocation entries can't be accessed - * through the section headers for a library file they are separated into - * groups of local and external entries further grouped by module. In this - * case the presents of this load command who's extreloff, nextrel, - * locreloff and nlocrel fields are non-zero indicates that the relocation - * entries of non-merged sections are not referenced through the section - * structures (and the reloff and nreloc fields in the section headers are - * set to zero). - * - * Since the relocation entries are not accessed through the section headers - * this requires the r_address field to be something other than a section - * offset to identify the item to be relocated. In this case r_address is - * set to the offset from the vmaddr of the first LC_SEGMENT command. - * - * The relocation entries are grouped by module and the module table - * entries have indexes and counts into them for the group of external - * relocation entries for that the module. - * - * For sections that are merged across modules there must not be any - * remaining external relocation entries for them (for merged sections - * remaining relocation entries must be local). - */ - unsigned long extreloff; /* offset to external relocation entries */ - unsigned long nextrel; /* number of external relocation entries */ - - /* - * All the local relocation entries are grouped together (they are not - * grouped by their module since they are only used if the object is moved - * from it staticly link edited address). - */ - unsigned long locreloff; /* offset to local relocation entries */ - unsigned long nlocrel; /* number of local relocation entries */ - -}; - -/* - * An indirect symbol table entry is simply a 32bit index into the symbol table - * to the symbol that the pointer or stub is refering to. Unless it is for a - * non-lazy symbol pointer section for a defined symbol which strip(1) as - * removed. In which case it has the value INDIRECT_SYMBOL_LOCAL. If the - * symbol was also absolute INDIRECT_SYMBOL_ABS is or'ed with that. - */ -#define INDIRECT_SYMBOL_LOCAL 0x80000000 -#define INDIRECT_SYMBOL_ABS 0x40000000 - - -/* a table of contents entry */ -struct dylib_table_of_contents { - unsigned long symbol_index; /* the defined external symbol - (index into the symbol table) */ - unsigned long module_index; /* index into the module table this symbol - is defined in */ -}; - -/* a module table entry */ -struct dylib_module { - unsigned long module_name; /* the module name (index into string table) */ - - unsigned long iextdefsym; /* index into externally defined symbols */ - unsigned long nextdefsym; /* number of externally defined symbols */ - unsigned long irefsym; /* index into reference symbol table */ - unsigned long nrefsym; /* number of reference symbol table entries */ - unsigned long ilocalsym; /* index into symbols for local symbols */ - unsigned long nlocalsym; /* number of local symbols */ - - unsigned long iextrel; /* index into external relocation entries */ - unsigned long nextrel; /* number of external relocation entries */ - - unsigned long iinit; /* index into the init section */ - unsigned long ninit; /* number of init section entries */ - - unsigned long /* for this module address of the start of */ - objc_module_info_addr; /* the (__OBJC,__module_info) section */ - unsigned long /* for this module size of */ - objc_module_info_size; /* the (__OBJC,__module_info) section */ -}; - -/* a 64-bit module table entry */ -struct dylib_module_64 { - uint32_t module_name; /* the module name (index into string table) */ - - uint32_t iextdefsym; /* index into externally defined symbols */ - uint32_t nextdefsym; /* number of externally defined symbols */ - uint32_t irefsym; /* index into reference symbol table */ - uint32_t nrefsym; /* number of reference symbol table entries */ - uint32_t ilocalsym; /* index into symbols for local symbols */ - uint32_t nlocalsym; /* number of local symbols */ - - uint32_t iextrel; /* index into external relocation entries */ - uint32_t nextrel; /* number of external relocation entries */ - - uint32_t iinit_iterm; /* low 16 bits are the index into the init - section, high 16 bits are the index into - the term section */ - uint32_t ninit_nterm; /* low 16 bits are the number of init section - entries, high 16 bits are the number of - term section entries */ - - uint32_t /* for this module size of the */ - objc_module_info_size; /* (__OBJC,__module_info) section */ - uint64_t /* for this module address of the start of */ - objc_module_info_addr; /* the (__OBJC,__module_info) section */ -}; - - -/* - * The entries in the reference symbol table are used when loading the module - * (both by the static and dynamic link editors) and if the module is unloaded - * or replaced. Therefore all external symbols (defined and undefined) are - * listed in the module's reference table. The flags describe the type of - * reference that is being made. The constants for the flags are defined in - * as they are also used for symbol table entries. - */ -struct dylib_reference { - unsigned long isym:24, /* index into the symbol table */ - flags:8; /* flags to indicate the type of reference */ -}; - -/* - * The uuid load command contains a single 128-bit unique random number that - * identifies an object produced by the static link editor. - */ -struct uuid_command { - uint32_t cmd; /* LC_UUID */ - uint32_t cmdsize; /* sizeof(struct uuid_command) */ - uint8_t uuid[16]; /* the 128-bit uuid */ -}; - -/* - * The symseg_command contains the offset and size of the GNU style - * symbol table information as described in the header file . - * The symbol roots of the symbol segments must also be aligned properly - * in the file. So the requirement of keeping the offsets aligned to a - * multiple of a sizeof(long) translates to the length field of the symbol - * roots also being a multiple of a long. Also the padding must again be - * zeroed. (THIS IS OBSOLETE and no longer supported). - */ -struct symseg_command { - unsigned long cmd; /* LC_SYMSEG */ - unsigned long cmdsize; /* sizeof(struct symseg_command) */ - unsigned long offset; /* symbol segment offset */ - unsigned long size; /* symbol segment size in bytes */ -}; - -/* - * The ident_command contains a free format string table following the - * ident_command structure. The strings are null terminated and the size of - * the command is padded out with zero bytes to a multiple of sizeof(long). - * (THIS IS OBSOLETE and no longer supported). - */ -struct ident_command { - unsigned long cmd; /* LC_IDENT */ - unsigned long cmdsize; /* strings that follow this command */ -}; - -/* - * The fvmfile_command contains a reference to a file to be loaded at the - * specified virtual address. (Presently, this command is reserved for NeXT - * internal use. The kernel ignores this command when loading a program into - * memory). - */ -struct fvmfile_command { - unsigned long cmd; /* LC_FVMFILE */ - unsigned long cmdsize; /* includes pathname string */ - union lc_str name; /* files pathname */ - unsigned long header_addr; /* files virtual address */ -}; - -#endif /*_MACHO_LOADER_H_*/ diff --git a/libkern/mach-o/mach_header.h b/libkern/mach-o/mach_header.h deleted file mode 100644 index 991dde94b..000000000 --- a/libkern/mach-o/mach_header.h +++ /dev/null @@ -1,86 +0,0 @@ -/* - * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ - * - * This file contains Original Code and/or Modifications of Original Code - * as defined in and that are subject to the Apple Public Source License - * Version 2.0 (the 'License'). You may not use this file except in - * compliance with the License. The rights granted to you under the License - * may not be used to create, or enable the creation or redistribution of, - * unlawful or unlicensed copies of an Apple operating system, or to - * circumvent, violate, or enable the circumvention or violation of, any - * terms of an Apple operating system software license agreement. - * - * Please obtain a copy of the License at - * http://www.opensource.apple.com/apsl/ and read it before using this file. - * - * The Original Code and all software distributed under the License are - * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER - * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, - * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. - * Please see the License for the specific language governing rights and - * limitations under the License. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ - */ -/* - * File: kern/mach_header.h - * - * Definitions for accessing mach-o headers. This header wraps the - * routines defined in osfmk/mach-o/mach_header.c; this is made clear - * by the existance of the getsectcmdsymtabfromheader() prototype. - * - * NOTE: The functions prototyped by this header only operate againt - * 32 bit mach headers. Many of these functions imply the - * currently running kernel, and cannot be used against mach - * headers other than that of the currently running kernel. - * - * HISTORY - * 29-Jan-92 Mike DeMoney (mike@next.com) - * Made into machine independent form from machdep/m68k/mach_header.h. - * Ifdef'ed out most of this since I couldn't find any references. - */ - -#ifndef _KERN_MACH_HEADER_ -#define _KERN_MACH_HEADER_ - -#include -#include - -#if KERNEL -struct mach_header **getmachheaders(void); -vm_offset_t getlastaddr(void); - -struct segment_command *firstseg(void); -struct segment_command *firstsegfromheader(struct mach_header *header); -struct segment_command *nextseg(struct segment_command *sgp); -struct segment_command *nextsegfromheader( - struct mach_header *header, - struct segment_command *seg); -struct segment_command *getsegbyname(const char *seg_name); -struct segment_command *getsegbynamefromheader( - struct mach_header *header, - const char *seg_name); -void *getsegdatafromheader(struct mach_header *, const char *, int *); -struct section *getsectbyname(const char *seg_name, const char *sect_name); -struct section *getsectbynamefromheader( - struct mach_header *header, - const char *seg_name, - const char *sect_name); -void *getsectdatafromheader(struct mach_header *, const char *, const char *, int *); -struct section *firstsect(struct segment_command *sgp); -struct section *nextsect(struct segment_command *sgp, struct section *sp); -struct fvmlib_command *fvmlib(void); -struct fvmlib_command *fvmlibfromheader(struct mach_header *header); -struct segment_command *getfakefvmseg(void); -#ifdef MACH_KDB -struct symtab_command *getsectcmdsymtabfromheader(struct mach_header *); -boolean_t getsymtab(struct mach_header *, vm_offset_t *, int *, - vm_offset_t *, vm_size_t *); -#endif - -#endif /* KERNEL */ - -#endif /* _KERN_MACH_HEADER_ */ diff --git a/libsa/mkext.c b/libkern/mkext.c similarity index 64% rename from libsa/mkext.c rename to libkern/mkext.c index a5f1096e7..e1fc062e1 100644 --- a/libsa/mkext.c +++ b/libkern/mkext.c @@ -26,13 +26,7 @@ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ */ #include -#if KERNEL -#include -#include -#else -#include -#include -#endif /* KERNEL */ +#include #define BASE 65521L /* largest prime smaller than 65536 */ #define NMAX 5000 @@ -44,8 +38,8 @@ #define DO8(buf,i) DO4(buf,i); DO4(buf,i+4); #define DO16(buf) DO8(buf,0); DO8(buf,8); -__private_extern__ u_int32_t -adler32(uint8_t *buf, int32_t len) +u_int32_t +mkext_adler32(uint8_t *buf, int32_t len) { unsigned long s1 = 1; // adler & 0xffff; unsigned long s2 = 0; // (adler >> 16) & 0xffff; @@ -105,12 +99,13 @@ struct encode_state { }; -__private_extern__ int -decompress_lzss(u_int8_t *dst, u_int8_t *src, u_int32_t srclen) +int +decompress_lzss(u_int8_t *dst, u_int32_t dstlen, u_int8_t *src, u_int32_t srclen) { /* ring buffer of size N, with extra F-1 bytes to aid string comparison */ u_int8_t text_buf[N + F - 1]; u_int8_t *dststart = dst; + u_int8_t *dstend = dst + dstlen; u_int8_t *srcend = src + srclen; int i, j, k, r, c; unsigned int flags; @@ -129,6 +124,9 @@ decompress_lzss(u_int8_t *dst, u_int8_t *src, u_int32_t srclen) if (flags & 1) { if (src < srcend) c = *src++; else break; *dst++ = c; + if (dst >= dstend) { + goto finish; + } text_buf[r++] = c; r &= (N - 1); } else { @@ -139,12 +137,15 @@ decompress_lzss(u_int8_t *dst, u_int8_t *src, u_int32_t srclen) for (k = 0; k <= j; k++) { c = text_buf[(i + k) & (N - 1)]; *dst++ = c; + if (dst >= dstend) { + goto finish; + } text_buf[r++] = c; r &= (N - 1); } } } - +finish: return dst - dststart; } @@ -264,119 +265,5 @@ static void delete_node(struct encode_state *sp, int p) sp->parent[p] = NIL; } -__private_extern__ u_int8_t * -compress_lzss(u_int8_t *dst, u_int32_t dstlen, u_int8_t *src, u_int32_t srcLen) -{ - /* Encoding state, mostly tree but some current match stuff */ - struct encode_state *sp; - - int i, c, len, r, s, last_match_length, code_buf_ptr; - u_int8_t code_buf[17], mask; - u_int8_t *srcend = src + srcLen; - u_int8_t *dstend = dst + dstlen; - - /* initialize trees */ - sp = (struct encode_state *) malloc(sizeof(*sp)); - init_state(sp); - - /* - * code_buf[1..16] saves eight units of code, and code_buf[0] works - * as eight flags, "1" representing that the unit is an unencoded - * letter (1 byte), "0" a position-and-length pair (2 bytes). - * Thus, eight units require at most 16 bytes of code. - */ - code_buf[0] = 0; - code_buf_ptr = mask = 1; - - /* Clear the buffer with any character that will appear often. */ - s = 0; r = N - F; - - /* Read F bytes into the last F bytes of the buffer */ - for (len = 0; len < F && src < srcend; len++) - sp->text_buf[r + len] = *src++; - if (!len) - return (void *) 0; /* text of size zero */ - - /* - * Insert the F strings, each of which begins with one or more - * 'space' characters. Note the order in which these strings are - * inserted. This way, degenerate trees will be less likely to occur. - */ - for (i = 1; i <= F; i++) - insert_node(sp, r - i); - - /* - * Finally, insert the whole string just read. - * The global variables match_length and match_position are set. - */ - insert_node(sp, r); - do { - /* match_length may be spuriously long near the end of text. */ - if (sp->match_length > len) - sp->match_length = len; - if (sp->match_length <= THRESHOLD) { - sp->match_length = 1; /* Not long enough match. Send one byte. */ - code_buf[0] |= mask; /* 'send one byte' flag */ - code_buf[code_buf_ptr++] = sp->text_buf[r]; /* Send uncoded. */ - } else { - /* Send position and length pair. Note match_length > THRESHOLD. */ - code_buf[code_buf_ptr++] = (u_int8_t) sp->match_position; - code_buf[code_buf_ptr++] = (u_int8_t) - ( ((sp->match_position >> 4) & 0xF0) - | (sp->match_length - (THRESHOLD + 1)) ); - } - if ((mask <<= 1) == 0) { /* Shift mask left one bit. */ - /* Send at most 8 units of code together */ - for (i = 0; i < code_buf_ptr; i++) - if (dst < dstend) - *dst++ = code_buf[i]; - else - return (void *) 0; - code_buf[0] = 0; - code_buf_ptr = mask = 1; - } - last_match_length = sp->match_length; - for (i = 0; i < last_match_length && src < srcend; i++) { - delete_node(sp, s); /* Delete old strings and */ - c = *src++; - sp->text_buf[s] = c; /* read new bytes */ - - /* - * If the position is near the end of buffer, extend the buffer - * to make string comparison easier. - */ - if (s < F - 1) - sp->text_buf[s + N] = c; - - /* Since this is a ring buffer, increment the position modulo N. */ - s = (s + 1) & (N - 1); - r = (r + 1) & (N - 1); - - /* Register the string in text_buf[r..r+F-1] */ - insert_node(sp, r); - } - while (i++ < last_match_length) { - delete_node(sp, s); - - /* After the end of text, no need to read, */ - s = (s + 1) & (N - 1); - r = (r + 1) & (N - 1); - /* but buffer may not be empty. */ - if (--len) - insert_node(sp, r); - } - } while (len > 0); /* until length of string to be processed is zero */ - - if (code_buf_ptr > 1) { /* Send remaining code. */ - for (i = 0; i < code_buf_ptr; i++) - if (dst < dstend) - *dst++ = code_buf[i]; - else - return (void *) 0; - } - - return dst; -} - #endif /* !KERNEL */ diff --git a/libkern/ppc/OSAtomic.s b/libkern/ppc/OSAtomic.s index aaa20f2dd..82b1f3c99 100644 --- a/libkern/ppc/OSAtomic.s +++ b/libkern/ppc/OSAtomic.s @@ -93,6 +93,7 @@ SInt32 OSAddAtomic(SInt32 amount, SInt32 * value) */ ENTRY _OSAddAtomic + ENTRY _OSAddAtomicLong mr r5,r3 /* Save the increment */ .L_AAretry: diff --git a/libkern/stack_protector.c b/libkern/stack_protector.c new file mode 100644 index 000000000..dad8a7e2e --- /dev/null +++ b/libkern/stack_protector.c @@ -0,0 +1,59 @@ +/* + * Copyright (c) 2008 Apple Computer, Inc. All rights reserved. + * + * %Begin-Header% + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, and the entire permission notice in its entirety, + * including the disclaimer of warranties. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. The name of the author may not be used to endorse or promote + * products derived from this software without specific prior + * written permission. + * + * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED + * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ALL OF + * WHICH ARE HEREBY DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT + * OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE + * USE OF THIS SOFTWARE, EVEN IF NOT ADVISED OF THE POSSIBILITY OF SUCH + * DAMAGE. + * %End-Header% + */ + +/******************************************************************************* +* NOTE: This implementation of the stack check routines required by the GCC +* -fstack-protector flag is only safe for kernel extensions. +*******************************************************************************/ + +#include +#include +#include + +long __stack_chk_guard[8]; +void __stack_chk_fail(void); + +static void __guard_setup(void) __attribute__((constructor)); + +static void +__guard_setup(void) +{ + /* Cannot report failure. */ + read_random(__stack_chk_guard, sizeof(__stack_chk_guard)); +} + +void +__stack_chk_fail(void) +{ + panic("Kernel stack memory corruption detected"); +} + diff --git a/libkern/uuid/Makefile b/libkern/uuid/Makefile index 8b26dcefe..c7c467538 100644 --- a/libkern/uuid/Makefile +++ b/libkern/uuid/Makefile @@ -13,6 +13,8 @@ INSTINC_SUBDIRS_PPC = \ INSTINC_SUBDIRS_I386 = \ +INSTINC_SUBDIRS_X86_64 = \ + INSTINC_SUBDIRS_ARM = \ EXPINC_SUBDIRS = \ @@ -21,6 +23,8 @@ EXPINC_SUBDIRS_PPC = \ EXPINC_SUBDIRS_I386 = \ +EXPINC_SUBDIRS_X86_64 = \ + EXPINC_SUBDIRS_ARM = \ # uuid.h is now installed by bsd/uuid/Makefile diff --git a/libkern/uuid/uuid.c b/libkern/uuid/uuid.c index 351de623a..ac9db3f84 100644 --- a/libkern/uuid/uuid.c +++ b/libkern/uuid/uuid.c @@ -144,16 +144,16 @@ uuid_is_null(const uuid_t uu) } int -uuid_parse(const char *in, uuid_t uu) +uuid_parse(const uuid_string_t in, uuid_t uu) { int n = 0; sscanf(in, - "%hh2x%hh2x%hh2x%hh2x-" - "%hh2x%hh2x-" - "%hh2x%hh2x-" - "%hh2x%hh2x-" - "%hh2x%hh2x%hh2x%hh2x%hh2x%hh2x%n", + "%2hhx%2hhx%2hhx%2hhx-" + "%2hhx%2hhx-" + "%2hhx%2hhx-" + "%2hhx%2hhx-" + "%2hhx%2hhx%2hhx%2hhx%2hhx%2hhx%n", &uu[0], &uu[1], &uu[2], &uu[3], &uu[4], &uu[5], &uu[6], &uu[7], @@ -164,9 +164,10 @@ uuid_parse(const char *in, uuid_t uu) } void -uuid_unparse_lower(const uuid_t uu, char *out) +uuid_unparse_lower(const uuid_t uu, uuid_string_t out) { - sprintf(out, + snprintf(out, + sizeof(uuid_string_t), "%02x%02x%02x%02x-" "%02x%02x-" "%02x%02x-" @@ -180,9 +181,10 @@ uuid_unparse_lower(const uuid_t uu, char *out) } void -uuid_unparse_upper(const uuid_t uu, char *out) +uuid_unparse_upper(const uuid_t uu, uuid_string_t out) { - sprintf(out, + snprintf(out, + sizeof(uuid_string_t), "%02X%02X%02X%02X-" "%02X%02X-" "%02X%02X-" @@ -196,7 +198,7 @@ uuid_unparse_upper(const uuid_t uu, char *out) } void -uuid_unparse(const uuid_t uu, char *out) +uuid_unparse(const uuid_t uu, uuid_string_t out) { uuid_unparse_upper(uu, out); } diff --git a/libkern/x86_64/OSAtomic.s b/libkern/x86_64/OSAtomic.s new file mode 100644 index 000000000..b3b26164a --- /dev/null +++ b/libkern/x86_64/OSAtomic.s @@ -0,0 +1,83 @@ +/* + * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. The rights granted to you under the License + * may not be used to create, or enable the creation or redistribution of, + * unlawful or unlicensed copies of an Apple operating system, or to + * circumvent, violate, or enable the circumvention or violation of, any + * terms of an Apple operating system software license agreement. + * + * Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ + */ + +#;*************************************************************************** +#;* Boolean OSCompareAndSwap(SInt32 oldValue, SInt32 newValue, SInt32 *ptr) * +#;*************************************************************************** + + .globl _OSCompareAndSwap + +// TODO FIXME!! +_OSCompareAndSwap: #;oldValue, newValue, ptr + movl %edi, %eax + lock + cmpxchgl %esi, 0(%rdx) #; CAS (eax is an implicit operand) + sete %al #; did CAS succeed? (TZ=1) + movzbq %al, %rax #; clear out the high bytes + ret + +#;***************************************************************************** +#;* Boolean OSCompareAndSwap64(SInt64 oldValue, SInt64 newValue, SInt64 *ptr) * +#;***************************************************************************** + + .globl _OSCompareAndSwap64 + .globl _OSCompareAndSwapPtr + +_OSCompareAndSwap64: +_OSCompareAndSwapPtr: #;oldValue, newValue, ptr + movq %rdi, %rax + lock + cmpxchgq %rsi, 0(%rdx) #; CAS (eax is an implicit operand) + sete %al #; did CAS succeed? (TZ=1) + movzbq %al, %rax #; clear out the high bytes + ret + +#;******************************************************* +#;* SInt64 OSAddAtomic64(SInt64 theAmount, SInt64 *ptr) * +#;******************************************************* + + .globl _OSAddAtomicLong + .globl _OSAddAtomic64 +_OSAddAtomic64: +_OSAddAtomicLong: + lock + xaddq %rdi, 0(%rsi) #; Atomic exchange and add + movq %rdi, %rax; + ret + + +#;******************************************************* +#; SInt32 OSAddAtomic(SInt32 delta, SInt32 *address) +#;******************************************************* + + .globl _OSAddAtomic +_OSAddAtomic: + lock + xaddl %edi, 0(%rsi) #; Atomic exchange and add + movl %edi, %eax; + ret diff --git a/libkern/zlib.c b/libkern/zlib.c deleted file mode 100644 index 0377edb1f..000000000 --- a/libkern/zlib.c +++ /dev/null @@ -1,5778 +0,0 @@ -/* - * Copyright (c) 2000-2006 Apple Computer, Inc. All rights reserved. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ - * - * This file contains Original Code and/or Modifications of Original Code - * as defined in and that are subject to the Apple Public Source License - * Version 2.0 (the 'License'). You may not use this file except in - * compliance with the License. The rights granted to you under the License - * may not be used to create, or enable the creation or redistribution of, - * unlawful or unlicensed copies of an Apple operating system, or to - * circumvent, violate, or enable the circumvention or violation of, any - * terms of an Apple operating system software license agreement. - * - * Please obtain a copy of the License at - * http://www.opensource.apple.com/apsl/ and read it before using this file. - * - * The Original Code and all software distributed under the License are - * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER - * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, - * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. - * Please see the License for the specific language governing rights and - * limitations under the License. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ - */ -/* - * This file is derived from various .h and .c files from the zlib-1.0.4 - * distribution by Jean-loup Gailly and Mark Adler, with some additions - * by Paul Mackerras to aid in implementing Deflate compression and - * decompression for PPP packets. See zlib.h for conditions of - * distribution and use. - * - * Changes that have been made include: - * - added Z_PACKET_FLUSH (see zlib.h for details) - * - added inflateIncomp and deflateOutputPending - * - allow strm->next_out to be NULL, meaning discard the output - * - * $FreeBSD: src/sys/net/zlib.c,v 1.10 1999/12/29 04:38:38 peter Exp $ - */ - -#define STDC -#define NO_DUMMY_DECL -#define NO_ZCFUNCS -#define MY_ZCALLOC - -/* +++ zutil.h */ -/* zutil.h -- internal interface and configuration of the compression library - * Copyright (C) 1995-2002 Jean-loup Gailly. - * For conditions of distribution and use, see copyright notice in zlib.h - */ - -/* WARNING: this file should *not* be used by applications. It is - part of the implementation of the compression library and is - subject to change. Applications should only use zlib.h. - */ - -#ifndef _Z_UTIL_H -#define _Z_UTIL_H - -#ifdef KERNEL -#include -#else -#include "zlib.h" -#endif - -#ifdef KERNEL -/* Assume this is a *BSD or SVR4 kernel */ -#include -#include -#include -# define HAVE_MEMCPY -# define memcpy(d, s, n) bcopy((s), (d), (n)) -# define memset(d, v, n) bzero((d), (n)) -# define memcmp bcmp - -#else -#if defined(__KERNEL__) -/* Assume this is a Linux kernel */ -#include -#define HAVE_MEMCPY - -#else /* not kernel */ -#ifdef STDC -# include -# include -# include -#endif -#ifdef NO_ERRNO_H - extern int errno; -#else -# include -#endif -#endif /* __KERNEL__ */ -#endif /* KERNEL */ - -typedef unsigned char uch; -typedef uch FAR uchf; -typedef unsigned short ush; -typedef ush FAR ushf; -typedef unsigned long ulg; - -/* (size given to avoid silly warnings with Visual C++) */ -static const char *z_errmsg[10] = { /* indexed by 2-zlib_error */ -"need dictionary", /* Z_NEED_DICT 2 */ -"stream end", /* Z_STREAM_END 1 */ -"", /* Z_OK 0 */ -"file error", /* Z_ERRNO (-1) */ -"stream error", /* Z_STREAM_ERROR (-2) */ -"data error", /* Z_DATA_ERROR (-3) */ -"insufficient memory", /* Z_MEM_ERROR (-4) */ -"buffer error", /* Z_BUF_ERROR (-5) */ -"incompatible version",/* Z_VERSION_ERROR (-6) */ -""}; - -#define ERR_MSG(err) z_errmsg[Z_NEED_DICT-(err)] - -#define ERR_RETURN(strm,err) \ - return (strm->msg = (char*)ERR_MSG(err), (err)) -/* To be used only when the state is known to be valid */ - - /* common constants */ - -#ifndef DEF_WBITS -# define DEF_WBITS MAX_WBITS -#endif -/* default windowBits for decompression. MAX_WBITS is for compression only */ - -#if MAX_MEM_LEVEL >= 8 -# define DEF_MEM_LEVEL 8 -#else -# define DEF_MEM_LEVEL MAX_MEM_LEVEL -#endif -/* default memLevel */ - -#define STORED_BLOCK 0 -#define STATIC_TREES 1 -#define DYN_TREES 2 -/* The three kinds of block type */ - -#define MIN_MATCH 3 -#define MAX_MATCH 258 -/* The minimum and maximum match lengths */ - -#define PRESET_DICT 0x20 /* preset dictionary flag in zlib header */ - - /* target dependencies */ - -#ifdef MSDOS -# define OS_CODE 0x00 -# if defined(__TURBOC__) || defined(__BORLANDC__) -# if(__STDC__ == 1) && (defined(__LARGE__) || defined(__COMPACT__)) - /* Allow compilation with ANSI keywords only enabled */ - void _Cdecl farfree( void *block ); - void *_Cdecl farmalloc( unsigned long nbytes ); -# else -# include -# endif -# else /* MSC or DJGPP */ -# include -# endif -#endif - -#ifdef OS2 -# define OS_CODE 0x06 -#endif - -#ifdef WIN32 /* Window 95 & Windows NT */ -# define OS_CODE 0x0b -#endif - -#if defined(VAXC) || defined(VMS) -# define OS_CODE 0x02 -# define F_OPEN(name, mode) \ - fopen((name), (mode), "mbc=60", "ctx=stm", "rfm=fix", "mrs=512") -#endif - -#ifdef AMIGA -# define OS_CODE 0x01 -#endif - -#if defined(ATARI) || defined(atarist) -# define OS_CODE 0x05 -#endif - -#if defined(MACOS) || defined(TARGET_OS_MAC) -# define OS_CODE 0x07 -# if defined(__MWERKS__) && __dest_os != __be_os && __dest_os != __win32_os -# include /* for fdopen */ -# else -# ifndef fdopen -# define fdopen(fd,mode) NULL /* No fdopen() */ -# endif -# endif -#endif - -#ifdef __50SERIES /* Prime/PRIMOS */ -# define OS_CODE 0x0F -#endif - -#ifdef TOPS20 -# define OS_CODE 0x0a -#endif - -#if defined(_BEOS_) || defined(RISCOS) -# define fdopen(fd,mode) NULL /* No fdopen() */ -#endif - -#if (defined(_MSC_VER) && (_MSC_VER > 600)) -# define fdopen(fd,type) _fdopen(fd,type) -#endif - - - /* Common defaults */ - -#ifndef OS_CODE -# define OS_CODE 0x03 /* assume Unix */ -#endif - -#ifndef F_OPEN -# define F_OPEN(name, mode) fopen((name), (mode)) -#endif - - /* functions */ - -#ifdef HAVE_STRERROR - extern char *strerror OF((int)); -# define zstrerror(errnum) strerror(errnum) -#else -# define zstrerror(errnum) "" -#endif - -#if defined(pyr) -# define NO_MEMCPY -#endif -#if defined(SMALL_MEDIUM) && !defined(_MSC_VER) && !defined(__SC__) - /* Use our own functions for small and medium model with MSC <= 5.0. - * You may have to use the same strategy for Borland C (untested). - * The __SC__ check is for Symantec. - */ -# define NO_MEMCPY -#endif -#if defined(STDC) && !defined(HAVE_MEMCPY) && !defined(NO_MEMCPY) -# define HAVE_MEMCPY -#endif -#ifdef HAVE_MEMCPY -# ifdef SMALL_MEDIUM /* MSDOS small or medium model */ -# define zmemcpy _fmemcpy -# define zmemcmp _fmemcmp -# define zmemzero(dest, len) _fmemset(dest, 0, len) -# else -# define zmemcpy memcpy -# define zmemcmp memcmp -# define zmemzero(dest, len) memset(dest, 0, len) -# endif -#else - extern void zmemcpy OF((Bytef* dest, const Bytef* source, uInt len)); - extern int zmemcmp OF((const Bytef* s1, const Bytef* s2, uInt len)); - extern void zmemzero OF((Bytef* dest, uInt len)); -#endif - -/* Diagnostic functions */ -#ifdef DEBUG_ZLIB -# include - extern int z_verbose; - extern void z_error OF((char *m)); -# define Assert(cond,msg) {if(!(cond)) z_error(msg);} -# define Trace(x) {if (z_verbose>=0) fprintf x ;} -# define Tracev(x) {if (z_verbose>0) fprintf x ;} -# define Tracevv(x) {if (z_verbose>1) fprintf x ;} -# define Tracec(c,x) {if (z_verbose>0 && (c)) fprintf x ;} -# define Tracecv(c,x) {if (z_verbose>1 && (c)) fprintf x ;} -#else -# define Assert(cond,msg) do {} while(0) -# define Trace(x) do {} while(0) -# define Tracev(x) do {} while(0) -# define Tracevv(x) do {} while(0) -# define Tracec(c,x) do {} while(0) -# define Tracecv(c,x) do {} while(0) -#endif - - -typedef uLong (ZEXPORT *check_func) OF((uLong check, const Bytef *buf, - uInt len)); -voidpf zcalloc OF((voidpf opaque, unsigned items, unsigned size)); -void zcfree OF((voidpf opaque, voidpf ptr)); - -#define ZALLOC(strm, items, size) \ - (*((strm)->zalloc))((strm)->opaque, (items), (size)) -#define ZFREE(strm, addr) (*((strm)->zfree))((strm)->opaque, (voidpf)(addr)) -#define TRY_FREE(s, p) {if (p) ZFREE(s, p);} - -#endif /* _Z_UTIL_H */ -/* --- zutil.h */ - -/* +++ deflate.h */ -/* deflate.h -- internal compression state - * Copyright (C) 1995-2002 Jean-loup Gailly - * For conditions of distribution and use, see copyright notice in zlib.h - */ - -/* WARNING: this file should *not* be used by applications. It is - part of the implementation of the compression library and is - subject to change. Applications should only use zlib.h. - */ - -#ifndef _DEFLATE_H -#define _DEFLATE_H - -/* #include "zutil.h" */ - -/* =========================================================================== - * Internal compression state. - */ - -#define LENGTH_CODES 29 -/* number of length codes, not counting the special END_BLOCK code */ - -#define LITERALS 256 -/* number of literal bytes 0..255 */ - -#define L_CODES (LITERALS+1+LENGTH_CODES) -/* number of Literal or Length codes, including the END_BLOCK code */ - -#define D_CODES 30 -/* number of distance codes */ - -#define BL_CODES 19 -/* number of codes used to transfer the bit lengths */ - -#define HEAP_SIZE (2*L_CODES+1) -/* maximum heap size */ - -#define MAX_BITS 15 -/* All codes must not exceed MAX_BITS bits */ - -#define INIT_STATE 42 -#define BUSY_STATE 113 -#define FINISH_STATE 666 -/* Stream status */ - - -/* Data structure describing a single value and its code string. */ -typedef struct ct_data_s { - union { - ush freq; /* frequency count */ - ush code; /* bit string */ - } fc; - union { - ush dad; /* father node in Huffman tree */ - ush len; /* length of bit string */ - } dl; -} FAR ct_data; - -#define Freq fc.freq -#define Code fc.code -#define Dad dl.dad -#define Len dl.len - -typedef struct static_tree_desc_s static_tree_desc; - -typedef struct tree_desc_s { - ct_data *dyn_tree; /* the dynamic tree */ - int max_code; /* largest code with non zero frequency */ - static_tree_desc *stat_desc; /* the corresponding static tree */ -} FAR tree_desc; - -typedef ush Pos; -typedef Pos FAR Posf; -typedef unsigned IPos; - -/* A Pos is an index in the character window. We use short instead of int to - * save space in the various tables. IPos is used only for parameter passing. - */ - -typedef struct deflate_state { - z_streamp strm; /* pointer back to this zlib stream */ - int status; /* as the name implies */ - Bytef *pending_buf; /* output still pending */ - ulg pending_buf_size; /* size of pending_buf */ - Bytef *pending_out; /* next pending byte to output to the stream */ - int pending; /* nb of bytes in the pending buffer */ - int noheader; /* suppress zlib header and adler32 */ - Byte data_type; /* UNKNOWN, BINARY or ASCII */ - Byte method; /* STORED (for zip only) or DEFLATED */ - int last_flush; /* value of flush param for previous deflate call */ - - /* used by deflate.c: */ - - uInt w_size; /* LZ77 window size (32K by default) */ - uInt w_bits; /* log2(w_size) (8..16) */ - uInt w_mask; /* w_size - 1 */ - - Bytef *window; - /* Sliding window. Input bytes are read into the second half of the window, - * and move to the first half later to keep a dictionary of at least wSize - * bytes. With this organization, matches are limited to a distance of - * wSize-MAX_MATCH bytes, but this ensures that IO is always - * performed with a length multiple of the block size. Also, it limits - * the window size to 64K, which is quite useful on MSDOS. - * To do: use the user input buffer as sliding window. - */ - - ulg window_size; - /* Actual size of window: 2*wSize, except when the user input buffer - * is directly used as sliding window. - */ - - Posf *prev; - /* Link to older string with same hash index. To limit the size of this - * array to 64K, this link is maintained only for the last 32K strings. - * An index in this array is thus a window index modulo 32K. - */ - - Posf *head; /* Heads of the hash chains or NIL. */ - - uInt ins_h; /* hash index of string to be inserted */ - uInt hash_size; /* number of elements in hash table */ - uInt hash_bits; /* log2(hash_size) */ - uInt hash_mask; /* hash_size-1 */ - - uInt hash_shift; - /* Number of bits by which ins_h must be shifted at each input - * step. It must be such that after MIN_MATCH steps, the oldest - * byte no longer takes part in the hash key, that is: - * hash_shift * MIN_MATCH >= hash_bits - */ - - long block_start; - /* Window position at the beginning of the current output block. Gets - * negative when the window is moved backwards. - */ - - uInt match_length; /* length of best match */ - IPos prev_match; /* previous match */ - int match_available; /* set if previous match exists */ - uInt strstart; /* start of string to insert */ - uInt match_start; /* start of matching string */ - uInt lookahead; /* number of valid bytes ahead in window */ - - uInt prev_length; - /* Length of the best match at previous step. Matches not greater than this - * are discarded. This is used in the lazy match evaluation. - */ - - uInt max_chain_length; - /* To speed up deflation, hash chains are never searched beyond this - * length. A higher limit improves compression ratio but degrades the - * speed. - */ - - uInt max_lazy_match; - /* Attempt to find a better match only when the current match is strictly - * smaller than this value. This mechanism is used only for compression - * levels >= 4. - */ -# define max_insert_length max_lazy_match - /* Insert new strings in the hash table only if the match length is not - * greater than this length. This saves time but degrades compression. - * max_insert_length is used only for compression levels <= 3. - */ - - int level; /* compression level (1..9) */ - int strategy; /* favor or force Huffman coding*/ - - uInt good_match; - /* Use a faster search when the previous match is longer than this */ - - int nice_match; /* Stop searching when current match exceeds this */ - - /* used by trees.c: */ - /* Didn't use ct_data typedef below to supress compiler warning */ - struct ct_data_s dyn_ltree[HEAP_SIZE]; /* literal and length tree */ - struct ct_data_s dyn_dtree[2*D_CODES+1]; /* distance tree */ - struct ct_data_s bl_tree[2*BL_CODES+1]; /* Huffman tree for bit lengths */ - - struct tree_desc_s l_desc; /* desc. for literal tree */ - struct tree_desc_s d_desc; /* desc. for distance tree */ - struct tree_desc_s bl_desc; /* desc. for bit length tree */ - - ush bl_count[MAX_BITS+1]; - /* number of codes at each bit length for an optimal tree */ - - int heap[2*L_CODES+1]; /* heap used to build the Huffman trees */ - int heap_len; /* number of elements in the heap */ - int heap_max; /* element of largest frequency */ - /* The sons of heap[n] are heap[2*n] and heap[2*n+1]. heap[0] is not used. - * The same heap array is used to build all trees. - */ - - uch depth[2*L_CODES+1]; - /* Depth of each subtree used as tie breaker for trees of equal frequency - */ - - uchf *l_buf; /* buffer for literals or lengths */ - - uInt lit_bufsize; - /* Size of match buffer for literals/lengths. There are 4 reasons for - * limiting lit_bufsize to 64K: - * - frequencies can be kept in 16 bit counters - * - if compression is not successful for the first block, all input - * data is still in the window so we can still emit a stored block even - * when input comes from standard input. (This can also be done for - * all blocks if lit_bufsize is not greater than 32K.) - * - if compression is not successful for a file smaller than 64K, we can - * even emit a stored file instead of a stored block (saving 5 bytes). - * This is applicable only for zip (not gzip or zlib). - * - creating new Huffman trees less frequently may not provide fast - * adaptation to changes in the input data statistics. (Take for - * example a binary file with poorly compressible code followed by - * a highly compressible string table.) Smaller buffer sizes give - * fast adaptation but have of course the overhead of transmitting - * trees more frequently. - * - I can't count above 4 - */ - - uInt last_lit; /* running index in l_buf */ - - ushf *d_buf; - /* Buffer for distances. To simplify the code, d_buf and l_buf have - * the same number of elements. To use different lengths, an extra flag - * array would be necessary. - */ - - ulg opt_len; /* bit length of current block with optimal trees */ - ulg static_len; /* bit length of current block with static trees */ - uInt matches; /* number of string matches in current block */ - int last_eob_len; /* bit length of EOB code for last block */ - -#ifdef DEBUG_ZLIB - ulg compressed_len; /* total bit length of compressed file mod 2^32 */ - ulg bits_sent; /* bit length of compressed data sent mod 2^32 */ -#endif - - ush bi_buf; - /* Output buffer. bits are inserted starting at the bottom (least - * significant bits). - */ - int bi_valid; - /* Number of valid bits in bi_buf. All bits above the last valid bit - * are always zero. - */ - -} FAR deflate_state; - -/* Output a byte on the stream. - * IN assertion: there is enough room in pending_buf. - */ -#define put_byte(s, c) {s->pending_buf[s->pending++] = (c);} - - -#define MIN_LOOKAHEAD (MAX_MATCH+MIN_MATCH+1) -/* Minimum amount of lookahead, except at the end of the input file. - * See deflate.c for comments about the MIN_MATCH+1. - */ - -#define MAX_DIST(s) ((s)->w_size-MIN_LOOKAHEAD) -/* In order to simplify the code, particularly on 16 bit machines, match - * distances are limited to MAX_DIST instead of WSIZE. - */ - - /* in trees.c */ -static void _tr_init OF((deflate_state *s)); -static int _tr_tally OF((deflate_state *s, unsigned dist, unsigned lc)); -static void _tr_flush_block OF((deflate_state *s, charf *buf, ulg stored_len, - int eof)); -static void _tr_align OF((deflate_state *s)); -static void _tr_stored_block OF((deflate_state *s, charf *buf, ulg stored_len, - int eof)); - -#define d_code(dist) \ - ((dist) < 256 ? _dist_code[dist] : _dist_code[256+((dist)>>7)]) -/* Mapping from a distance to a distance code. dist is the distance - 1 and - * must not have side effects. _dist_code[256] and _dist_code[257] are never - * used. - */ - -#ifndef DEBUG_ZLIB -/* Inline versions of _tr_tally for speed: */ - -#if defined(GEN_TREES_H) || !defined(STDC) - extern uch _length_code[]; - extern uch _dist_code[]; -#else - extern const uch _length_code[]; - extern const uch _dist_code[]; -#endif - -# define _tr_tally_lit(s, c, flush) \ - { uch cc = (c); \ - s->d_buf[s->last_lit] = 0; \ - s->l_buf[s->last_lit++] = cc; \ - s->dyn_ltree[cc].Freq++; \ - flush = (s->last_lit == s->lit_bufsize-1); \ - } -# define _tr_tally_dist(s, distance, length, flush) \ - { uch len = (length); \ - ush dist = (distance); \ - s->d_buf[s->last_lit] = dist; \ - s->l_buf[s->last_lit++] = len; \ - dist--; \ - s->dyn_ltree[_length_code[len]+LITERALS+1].Freq++; \ - s->dyn_dtree[d_code(dist)].Freq++; \ - flush = (s->last_lit == s->lit_bufsize-1); \ - } -#else -# define _tr_tally_lit(s, c, flush) flush = _tr_tally(s, 0, c) -# define _tr_tally_dist(s, distance, length, flush) \ - flush = _tr_tally(s, distance, length) -#endif - -#endif -/* --- deflate.h */ - -/* +++ deflate.c */ -/* deflate.c -- compress data using the deflation algorithm - * Copyright (C) 1995-2002 Jean-loup Gailly. - * For conditions of distribution and use, see copyright notice in zlib.h - */ - -/* - * ALGORITHM - * - * The "deflation" process depends on being able to identify portions - * of the input text which are identical to earlier input (within a - * sliding window trailing behind the input currently being processed). - * - * The most straightforward technique turns out to be the fastest for - * most input files: try all possible matches and select the longest. - * The key feature of this algorithm is that insertions into the string - * dictionary are very simple and thus fast, and deletions are avoided - * completely. Insertions are performed at each input character, whereas - * string matches are performed only when the previous match ends. So it - * is preferable to spend more time in matches to allow very fast string - * insertions and avoid deletions. The matching algorithm for small - * strings is inspired from that of Rabin & Karp. A brute force approach - * is used to find longer strings when a small match has been found. - * A similar algorithm is used in comic (by Jan-Mark Wams) and freeze - * (by Leonid Broukhis). - * A previous version of this file used a more sophisticated algorithm - * (by Fiala and Greene) which is guaranteed to run in linear amortized - * time, but has a larger average cost, uses more memory and is patented. - * However the F&G algorithm may be faster for some highly redundant - * files if the parameter max_chain_length (described below) is too large. - * - * ACKNOWLEDGEMENTS - * - * The idea of lazy evaluation of matches is due to Jan-Mark Wams, and - * I found it in 'freeze' written by Leonid Broukhis. - * Thanks to many people for bug reports and testing. - * - * REFERENCES - * - * Deutsch, L.P.,"DEFLATE Compressed Data Format Specification". - * Available in ftp://ds.internic.net/rfc/rfc1951.txt - * - * A description of the Rabin and Karp algorithm is given in the book - * "Algorithms" by R. Sedgewick, Addison-Wesley, p252. - * - * Fiala,E.R., and Greene,D.H. - * Data Compression with Finite Windows, Comm.ACM, 32,4 (1989) 490-595 - * - */ - -/* #include "deflate.h" */ - -const char deflate_copyright[] = - " deflate 1.1.4 Copyright 1995-2002 Jean-loup Gailly "; -/* - If you use the zlib library in a product, an acknowledgment is welcome - in the documentation of your product. If for some reason you cannot - include such an acknowledgment, I would appreciate that you keep this - copyright string in the executable of your product. - */ - -/* =========================================================================== - * Function prototypes. - */ -typedef enum { - need_more, /* block not completed, need more input or more output */ - block_done, /* block flush performed */ - finish_started, /* finish started, need only more output at next deflate */ - finish_done /* finish done, accept no more input or output */ -} block_state; - -typedef block_state (*compress_func) OF((deflate_state *s, int flush)); -/* Compression function. Returns the block state after the call. */ - -static void fill_window OF((deflate_state *s)); -static block_state deflate_stored OF((deflate_state *s, int flush)); -static block_state deflate_fast OF((deflate_state *s, int flush)); -static block_state deflate_slow OF((deflate_state *s, int flush)); -static void lm_init OF((deflate_state *s)); -static void putShortMSB OF((deflate_state *s, uInt b)); -static void flush_pending OF((z_streamp strm)); -static int read_buf OF((z_streamp strm, Bytef *buf, unsigned size)); -#ifdef ASMV - void match_init OF((void)); /* asm code initialization */ - uInt longest_match OF((deflate_state *s, IPos cur_match)); -#else -static uInt longest_match OF((deflate_state *s, IPos cur_match)); -#endif - -#ifdef DEBUG_ZLIB -static void check_match OF((deflate_state *s, IPos start, IPos match, - int length)); -#endif - -/* =========================================================================== - * Local data - */ - -#define NIL 0 -/* Tail of hash chains */ - -#ifndef TOO_FAR -# define TOO_FAR 4096 -#endif -/* Matches of length 3 are discarded if their distance exceeds TOO_FAR */ - -#define MIN_LOOKAHEAD (MAX_MATCH+MIN_MATCH+1) -/* Minimum amount of lookahead, except at the end of the input file. - * See deflate.c for comments about the MIN_MATCH+1. - */ - -/* Values for max_lazy_match, good_match and max_chain_length, depending on - * the desired pack level (0..9). The values given below have been tuned to - * exclude worst case performance for pathological files. Better values may be - * found for specific files. - */ -typedef struct config_s { - ush good_length; /* reduce lazy search above this match length */ - ush max_lazy; /* do not perform lazy search above this match length */ - ush nice_length; /* quit search above this match length */ - ush max_chain; - compress_func func; -} config; - -static const config configuration_table[10] = { -/* good lazy nice chain */ -/* 0 */ {0, 0, 0, 0, deflate_stored}, /* store only */ -/* 1 */ {4, 4, 8, 4, deflate_fast}, /* maximum speed, no lazy matches */ -/* 2 */ {4, 5, 16, 8, deflate_fast}, -/* 3 */ {4, 6, 32, 32, deflate_fast}, - -/* 4 */ {4, 4, 16, 16, deflate_slow}, /* lazy matches */ -/* 5 */ {8, 16, 32, 32, deflate_slow}, -/* 6 */ {8, 16, 128, 128, deflate_slow}, -/* 7 */ {8, 32, 128, 256, deflate_slow}, -/* 8 */ {32, 128, 258, 1024, deflate_slow}, -/* 9 */ {32, 258, 258, 4096, deflate_slow}}; /* maximum compression */ - -/* Note: the deflate() code requires max_lazy >= MIN_MATCH and max_chain >= 4 - * For deflate_fast() (levels <= 3) good is ignored and lazy has a different - * meaning. - */ - -#define EQUAL 0 -/* result of memcmp for equal strings */ - -#ifndef NO_DUMMY_DECL -struct static_tree_desc_s {int dummy;}; /* for buggy compilers */ -#endif - -/* =========================================================================== - * Update a hash value with the given input byte - * IN assertion: all calls to to UPDATE_HASH are made with consecutive - * input characters, so that a running hash key can be computed from the - * previous key instead of complete recalculation each time. - */ -#define UPDATE_HASH(s,h,c) (h = (((h)<hash_shift) ^ (c)) & s->hash_mask) - - -/* =========================================================================== - * Insert string str in the dictionary and set match_head to the previous head - * of the hash chain (the most recent string with same hash key). Return - * the previous length of the hash chain. - * If this file is compiled with -DFASTEST, the compression level is forced - * to 1, and no hash chains are maintained. - * IN assertion: all calls to to INSERT_STRING are made with consecutive - * input characters and the first MIN_MATCH bytes of str are valid - * (except for the last MIN_MATCH-1 bytes of the input file). - */ -#ifdef FASTEST -#define INSERT_STRING(s, str, match_head) \ - (UPDATE_HASH(s, s->ins_h, s->window[(str) + (MIN_MATCH-1)]), \ - match_head = s->head[s->ins_h], \ - s->head[s->ins_h] = (Pos)(str)) -#else -#define INSERT_STRING(s, str, match_head) \ - (UPDATE_HASH(s, s->ins_h, s->window[(str) + (MIN_MATCH-1)]), \ - s->prev[(str) & s->w_mask] = match_head = s->head[s->ins_h], \ - s->head[s->ins_h] = (Pos)(str)) -#endif - -/* =========================================================================== - * Initialize the hash table (avoiding 64K overflow for 16 bit systems). - * prev[] will be initialized on the fly. - */ -#define CLEAR_HASH(s) \ - s->head[s->hash_size-1] = NIL; \ - zmemzero((Bytef *)s->head, (unsigned)(s->hash_size-1)*sizeof(*s->head)); - -/* ========================================================================= */ -int ZEXPORT -deflateInit_(z_streamp strm, int level, const char *ver, int stream_size) -{ - return deflateInit2_(strm, level, Z_DEFLATED, MAX_WBITS, DEF_MEM_LEVEL, - Z_DEFAULT_STRATEGY, ver, stream_size); - /* To do: ignore strm->next_in if we use it as window */ -} - -/* ========================================================================= */ -int ZEXPORT -deflateInit2_(z_streamp strm, int level, int method, int windowBits, - int memLevel, int strategy, const char *ver, int stream_size) -{ - deflate_state *s; - int noheader = 0; - static const char* my_version = ZLIB_VERSION; - - ushf *overlay; - /* We overlay pending_buf and d_buf+l_buf. This works since the average - * output size for (length,distance) codes is <= 24 bits. - */ - - if (ver == Z_NULL || ver[0] != my_version[0] || - stream_size != sizeof(z_stream)) { - return Z_VERSION_ERROR; - } - if (strm == Z_NULL) return Z_STREAM_ERROR; - - strm->msg = Z_NULL; -#ifndef NO_ZCFUNCS - if (strm->zalloc == Z_NULL) { - strm->zalloc = zcalloc; - strm->opaque = (voidpf)0; - } - if (strm->zfree == Z_NULL) strm->zfree = zcfree; -#endif - - if (level == Z_DEFAULT_COMPRESSION) level = 6; -#ifdef FASTEST - level = 1; -#endif - - if (windowBits < 0) { /* undocumented feature: suppress zlib header */ - noheader = 1; - windowBits = -windowBits; - } - if (memLevel < 1 || memLevel > MAX_MEM_LEVEL || method != Z_DEFLATED || - windowBits < 9 || windowBits > 15 || level < 0 || level > 9 || - strategy < 0 || strategy > Z_HUFFMAN_ONLY) { - return Z_STREAM_ERROR; - } - s = (deflate_state *) ZALLOC(strm, 1, sizeof(deflate_state)); - if (s == Z_NULL) return Z_MEM_ERROR; - strm->state = (struct internal_state FAR *)s; - s->strm = strm; - - s->noheader = noheader; - s->w_bits = windowBits; - s->w_size = 1 << s->w_bits; - s->w_mask = s->w_size - 1; - - s->hash_bits = memLevel + 7; - s->hash_size = 1 << s->hash_bits; - s->hash_mask = s->hash_size - 1; - s->hash_shift = ((s->hash_bits+MIN_MATCH-1)/MIN_MATCH); - - s->window = (Bytef *) ZALLOC(strm, s->w_size, 2*sizeof(Byte)); - s->prev = (Posf *) ZALLOC(strm, s->w_size, sizeof(Pos)); - s->head = (Posf *) ZALLOC(strm, s->hash_size, sizeof(Pos)); - - s->lit_bufsize = 1 << (memLevel + 6); /* 16K elements by default */ - - overlay = (ushf *) ZALLOC(strm, s->lit_bufsize, sizeof(ush)+2); - s->pending_buf = (uchf *) overlay; - s->pending_buf_size = (ulg)s->lit_bufsize * (sizeof(ush)+2L); - - if (s->window == Z_NULL || s->prev == Z_NULL || s->head == Z_NULL || - s->pending_buf == Z_NULL) { - strm->msg = (char*)ERR_MSG(Z_MEM_ERROR); - deflateEnd (strm); - return Z_MEM_ERROR; - } - s->d_buf = overlay + s->lit_bufsize/sizeof(ush); - s->l_buf = s->pending_buf + (1+sizeof(ush))*s->lit_bufsize; - - s->level = level; - s->strategy = strategy; - s->method = (Byte)method; - - return deflateReset(strm); -} - -/* ========================================================================= */ -int ZEXPORT -deflateSetDictionary(z_streamp strm, const Bytef *dictionary, uInt dictLength) -{ - deflate_state *s; - uInt length = dictLength; - uInt n; - IPos hash_head = 0; - - if (strm == Z_NULL || strm->state == Z_NULL || dictionary == Z_NULL || - ((deflate_state*)strm->state)->status != INIT_STATE) return Z_STREAM_ERROR; - - s = (deflate_state*)strm->state; - strm->adler = adler32(strm->adler, dictionary, dictLength); - - if (length < MIN_MATCH) return Z_OK; - if (length > MAX_DIST(s)) { - length = MAX_DIST(s); -#ifndef USE_DICT_HEAD - dictionary += dictLength - length; /* use the tail of the dictionary */ -#endif - } - zmemcpy(s->window, dictionary, length); - s->strstart = length; - s->block_start = (long)length; - - /* Insert all strings in the hash table (except for the last two bytes). - * s->lookahead stays null, so s->ins_h will be recomputed at the next - * call of fill_window. - */ - s->ins_h = s->window[0]; - UPDATE_HASH(s, s->ins_h, s->window[1]); - for (n = 0; n <= length - MIN_MATCH; n++) { - INSERT_STRING(s, n, hash_head); - } - if (hash_head) hash_head = 0; /* to make compiler happy */ - return Z_OK; -} - -/* ========================================================================= */ -int ZEXPORT -deflateReset(z_streamp strm) -{ - deflate_state *s; - - if (strm == Z_NULL || strm->state == Z_NULL || - strm->zalloc == Z_NULL || strm->zfree == Z_NULL) return Z_STREAM_ERROR; - - strm->total_in = strm->total_out = 0; - strm->msg = Z_NULL; /* use zfree if we ever allocate msg dynamically */ - strm->data_type = Z_UNKNOWN; - - s = (deflate_state *)strm->state; - s->pending = 0; - s->pending_out = s->pending_buf; - - if (s->noheader < 0) { - s->noheader = 0; /* was set to -1 by deflate(..., Z_FINISH); */ - } - s->status = s->noheader ? BUSY_STATE : INIT_STATE; - strm->adler = 1; - s->last_flush = Z_NO_FLUSH; - - _tr_init(s); - lm_init(s); - - return Z_OK; -} - -/* ========================================================================= */ -int ZEXPORT -deflateParams(z_streamp strm, int level, int strategy) -{ - deflate_state *s; - compress_func func; - int err = Z_OK; - - if (strm == Z_NULL || strm->state == Z_NULL) return Z_STREAM_ERROR; - s = (deflate_state*)strm->state; - - if (level == Z_DEFAULT_COMPRESSION) { - level = 6; - } - if (level < 0 || level > 9 || strategy < 0 || strategy > Z_HUFFMAN_ONLY) { - return Z_STREAM_ERROR; - } - func = configuration_table[s->level].func; - - if (func != configuration_table[level].func && strm->total_in != 0) { - /* Flush the last buffer: */ - err = deflate(strm, Z_PARTIAL_FLUSH); - } - if (s->level != level) { - s->level = level; - s->max_lazy_match = configuration_table[level].max_lazy; - s->good_match = configuration_table[level].good_length; - s->nice_match = configuration_table[level].nice_length; - s->max_chain_length = configuration_table[level].max_chain; - } - s->strategy = strategy; - return err; -} - -/* ========================================================================= - * Put a short in the pending buffer. The 16-bit value is put in MSB order. - * IN assertion: the stream state is correct and there is enough room in - * pending_buf. - */ -static void -putShortMSB(deflate_state *s, uInt b) -{ - put_byte(s, (Byte)(b >> 8)); - put_byte(s, (Byte)(b & 0xff)); -} - -/* ========================================================================= - * Flush as much pending output as possible. All deflate() output goes - * through this function so some applications may wish to modify it - * to avoid allocating a large strm->next_out buffer and copying into it. - * (See also read_buf()). - */ -static void -flush_pending(z_streamp strm) -{ - deflate_state* s = (deflate_state*)strm->state; - unsigned len = s->pending; - - if (len > strm->avail_out) len = strm->avail_out; - if (len == 0) return; - - zmemcpy(strm->next_out, s->pending_out, len); - strm->next_out += len; - s->pending_out += len; - strm->total_out += len; - strm->avail_out -= len; - s->pending -= len; - if (s->pending == 0) { - s->pending_out = s->pending_buf; - } -} - -/* ========================================================================= */ -int ZEXPORT -deflate (z_streamp strm, int flush) -{ - int old_flush; /* value of flush param for previous deflate call */ - deflate_state *s; - - if (strm == Z_NULL || strm->state == Z_NULL || - flush > Z_FINISH || flush < 0) { - return Z_STREAM_ERROR; - } - s = (deflate_state*)strm->state; - - if (strm->next_out == Z_NULL || - (strm->next_in == Z_NULL && strm->avail_in != 0) || - (s->status == FINISH_STATE && flush != Z_FINISH)) { - ERR_RETURN(strm, Z_STREAM_ERROR); - } - if (strm->avail_out == 0) ERR_RETURN(strm, Z_BUF_ERROR); - - s->strm = strm; /* just in case */ - old_flush = s->last_flush; - s->last_flush = flush; - - /* Write the zlib header */ - if (s->status == INIT_STATE) { - - uInt header = (Z_DEFLATED + ((s->w_bits-8)<<4)) << 8; - uInt level_flags = (s->level-1) >> 1; - - if (level_flags > 3) level_flags = 3; - header |= (level_flags << 6); - if (s->strstart != 0) header |= PRESET_DICT; - header += 31 - (header % 31); - - s->status = BUSY_STATE; - putShortMSB(s, header); - - /* Save the adler32 of the preset dictionary: */ - if (s->strstart != 0) { - putShortMSB(s, (uInt)(strm->adler >> 16)); - putShortMSB(s, (uInt)(strm->adler & 0xffff)); - } - strm->adler = 1L; - } - - /* Flush as much pending output as possible */ - if (s->pending != 0) { - flush_pending(strm); - if (strm->avail_out == 0) { - /* Since avail_out is 0, deflate will be called again with - * more output space, but possibly with both pending and - * avail_in equal to zero. There won't be anything to do, - * but this is not an error situation so make sure we - * return OK instead of BUF_ERROR at next call of deflate: - */ - s->last_flush = -1; - return Z_OK; - } - - /* Make sure there is something to do and avoid duplicate consecutive - * flushes. For repeated and useless calls with Z_FINISH, we keep - * returning Z_STREAM_END instead of Z_BUFF_ERROR. - */ - } else if (strm->avail_in == 0 && flush <= old_flush && - flush != Z_FINISH) { - ERR_RETURN(strm, Z_BUF_ERROR); - } - - /* User must not provide more input after the first FINISH: */ - if (s->status == FINISH_STATE && strm->avail_in != 0) { - ERR_RETURN(strm, Z_BUF_ERROR); - } - - /* Start a new block or continue the current one. - */ - if (strm->avail_in != 0 || s->lookahead != 0 || - (flush != Z_NO_FLUSH && s->status != FINISH_STATE)) { - block_state bstate; - - bstate = (*(configuration_table[s->level].func))(s, flush); - - if (bstate == finish_started || bstate == finish_done) { - s->status = FINISH_STATE; - } - if (bstate == need_more || bstate == finish_started) { - if (strm->avail_out == 0) { - s->last_flush = -1; /* avoid BUF_ERROR next call, see above */ - } - return Z_OK; - /* If flush != Z_NO_FLUSH && avail_out == 0, the next call - * of deflate should use the same flush parameter to make sure - * that the flush is complete. So we don't have to output an - * empty block here, this will be done at next call. This also - * ensures that for a very small output buffer, we emit at most - * one empty block. - */ - } - if (bstate == block_done) { - if (flush == Z_PARTIAL_FLUSH) { - _tr_align(s); - } else { /* FULL_FLUSH or SYNC_FLUSH */ - _tr_stored_block(s, (char*)0, 0L, 0); - /* For a full flush, this empty block will be recognized - * as a special marker by inflate_sync(). - */ - if (flush == Z_FULL_FLUSH) { - CLEAR_HASH(s); /* forget history */ - } - } - flush_pending(strm); - if (strm->avail_out == 0) { - s->last_flush = -1; /* avoid BUF_ERROR at next call, see above */ - return Z_OK; - } - } - } - Assert(strm->avail_out > 0, "bug2"); - - if (flush != Z_FINISH) return Z_OK; - if (s->noheader) return Z_STREAM_END; - - /* Write the zlib trailer (adler32) */ - putShortMSB(s, (uInt)(strm->adler >> 16)); - putShortMSB(s, (uInt)(strm->adler & 0xffff)); - flush_pending(strm); - /* If avail_out is zero, the application will call deflate again - * to flush the rest. - */ - s->noheader = -1; /* write the trailer only once! */ - return s->pending != 0 ? Z_OK : Z_STREAM_END; -} - -/* ========================================================================= */ -int ZEXPORT -deflateEnd(z_streamp strm) -{ - deflate_state* s; - int status; - - if (strm == Z_NULL || strm->state == Z_NULL) return Z_STREAM_ERROR; - - s = (deflate_state*)strm->state; - status = s->status; - if (status != INIT_STATE && status != BUSY_STATE && - status != FINISH_STATE) { - return Z_STREAM_ERROR; - } - - /* Deallocate in reverse order of allocations: */ - TRY_FREE(strm, s->pending_buf); - TRY_FREE(strm, s->head); - TRY_FREE(strm, s->prev); - TRY_FREE(strm, s->window); - - ZFREE(strm, s); - strm->state = Z_NULL; - - return status == BUSY_STATE ? Z_DATA_ERROR : Z_OK; -} - -/* ========================================================================= - * Copy the source state to the destination state. - * To simplify the source, this is not supported for 16-bit MSDOS (which - * doesn't have enough memory anyway to duplicate compression states). - */ -int ZEXPORT -deflateCopy(z_streamp dest, z_streamp source) -{ -#ifdef MAXSEG_64K - return Z_STREAM_ERROR; -#else - deflate_state *ds; - deflate_state *ss; - ushf *overlay; - - - if (source == Z_NULL || dest == Z_NULL || source->state == Z_NULL) { - return Z_STREAM_ERROR; - } - - ss = (deflate_state*)source->state; - - *dest = *source; - - ds = (deflate_state *) ZALLOC(dest, 1, sizeof(deflate_state)); - if (ds == Z_NULL) return Z_MEM_ERROR; - dest->state = (struct internal_state FAR *) ds; - *ds = *ss; - ds->strm = dest; - - ds->window = (Bytef *) ZALLOC(dest, ds->w_size, 2*sizeof(Byte)); - ds->prev = (Posf *) ZALLOC(dest, ds->w_size, sizeof(Pos)); - ds->head = (Posf *) ZALLOC(dest, ds->hash_size, sizeof(Pos)); - overlay = (ushf *) ZALLOC(dest, ds->lit_bufsize, sizeof(ush)+2); - ds->pending_buf = (uchf *) overlay; - - if (ds->window == Z_NULL || ds->prev == Z_NULL || ds->head == Z_NULL || - ds->pending_buf == Z_NULL) { - deflateEnd (dest); - return Z_MEM_ERROR; - } - /* following zmemcpy do not work for 16-bit MSDOS */ - zmemcpy(ds->window, ss->window, ds->w_size * 2 * sizeof(Byte)); - zmemcpy(ds->prev, ss->prev, ds->w_size * sizeof(Pos)); - zmemcpy(ds->head, ss->head, ds->hash_size * sizeof(Pos)); - zmemcpy(ds->pending_buf, ss->pending_buf, (uInt)ds->pending_buf_size); - - ds->pending_out = ds->pending_buf + (ss->pending_out - ss->pending_buf); - ds->d_buf = overlay + ds->lit_bufsize/sizeof(ush); - ds->l_buf = ds->pending_buf + (1+sizeof(ush))*ds->lit_bufsize; - - ds->l_desc.dyn_tree = ds->dyn_ltree; - ds->d_desc.dyn_tree = ds->dyn_dtree; - ds->bl_desc.dyn_tree = ds->bl_tree; - - return Z_OK; -#endif -} - -/* =========================================================================== - * Read a new buffer from the current input stream, update the adler32 - * and total number of bytes read. All deflate() input goes through - * this function so some applications may wish to modify it to avoid - * allocating a large strm->next_in buffer and copying from it. - * (See also flush_pending()). - */ -static int -read_buf(z_streamp strm, Bytef *buf, unsigned int size) -{ - unsigned len = strm->avail_in; - - if (len > size) len = size; - if (len == 0) return 0; - - strm->avail_in -= len; - - if (!((deflate_state*)strm->state)->noheader) { - strm->adler = adler32(strm->adler, strm->next_in, len); - } - zmemcpy(buf, strm->next_in, len); - strm->next_in += len; - strm->total_in += len; - - return (int)len; -} - -/* =========================================================================== - * Initialize the "longest match" routines for a new zlib stream - */ -static void -lm_init(deflate_state *s) -{ - s->window_size = (ulg)2L*s->w_size; - - CLEAR_HASH(s); - - /* Set the default configuration parameters: - */ - s->max_lazy_match = configuration_table[s->level].max_lazy; - s->good_match = configuration_table[s->level].good_length; - s->nice_match = configuration_table[s->level].nice_length; - s->max_chain_length = configuration_table[s->level].max_chain; - - s->strstart = 0; - s->block_start = 0L; - s->lookahead = 0; - s->match_length = s->prev_length = MIN_MATCH-1; - s->match_available = 0; - s->ins_h = 0; -#ifdef ASMV - match_init(); /* initialize the asm code */ -#endif -} - -/* =========================================================================== - * Set match_start to the longest match starting at the given string and - * return its length. Matches shorter or equal to prev_length are discarded, - * in which case the result is equal to prev_length and match_start is - * garbage. - * IN assertions: cur_match is the head of the hash chain for the current - * string (strstart) and its distance is <= MAX_DIST, and prev_length >= 1 - * OUT assertion: the match length is not greater than s->lookahead. - */ -#ifndef ASMV -/* For 80x86 and 680x0, an optimized version will be provided in match.asm or - * match.S. The code will be functionally equivalent. - */ -#ifndef FASTEST -static uInt -longest_match(deflate_state *s, IPos cur_match) -{ - unsigned chain_length = s->max_chain_length;/* max hash chain length */ - Bytef *scan = s->window + s->strstart; /* current string */ - Bytef *match; /* matched string */ - int len; /* length of current match */ - int best_len = s->prev_length; /* best match length so far */ - int nice_match = s->nice_match; /* stop if match long enough */ - IPos limit = s->strstart > (IPos)MAX_DIST(s) ? - s->strstart - (IPos)MAX_DIST(s) : NIL; - /* Stop when cur_match becomes <= limit. To simplify the code, - * we prevent matches with the string of window index 0. - */ - Posf *prev = s->prev; - uInt wmask = s->w_mask; - -#ifdef UNALIGNED_OK - /* Compare two bytes at a time. Note: this is not always beneficial. - * Try with and without -DUNALIGNED_OK to check. - */ - Bytef *strend = s->window + s->strstart + MAX_MATCH - 1; - ush scan_start = *(ushf*)scan; - ush scan_end = *(ushf*)(scan+best_len-1); -#else - Bytef *strend = s->window + s->strstart + MAX_MATCH; - Byte scan_end1 = scan[best_len-1]; - Byte scan_end = scan[best_len]; -#endif - - /* The code is optimized for HASH_BITS >= 8 and MAX_MATCH-2 multiple of 16. - * It is easy to get rid of this optimization if necessary. - */ - Assert(s->hash_bits >= 8 && MAX_MATCH == 258, "Code too clever"); - - /* Do not waste too much time if we already have a good match: */ - if (s->prev_length >= s->good_match) { - chain_length >>= 2; - } - /* Do not look for matches beyond the end of the input. This is necessary - * to make deflate deterministic. - */ - if ((uInt)nice_match > s->lookahead) nice_match = s->lookahead; - - Assert((ulg)s->strstart <= s->window_size-MIN_LOOKAHEAD, "need lookahead"); - - do { - Assert(cur_match < s->strstart, "no future"); - match = s->window + cur_match; - - /* Skip to next match if the match length cannot increase - * or if the match length is less than 2: - */ -#if (defined(UNALIGNED_OK) && MAX_MATCH == 258) - /* This code assumes sizeof(unsigned short) == 2. Do not use - * UNALIGNED_OK if your compiler uses a different size. - */ - if (*(ushf*)(match+best_len-1) != scan_end || - *(ushf*)match != scan_start) continue; - - /* It is not necessary to compare scan[2] and match[2] since they are - * always equal when the other bytes match, given that the hash keys - * are equal and that HASH_BITS >= 8. Compare 2 bytes at a time at - * strstart+3, +5, ... up to strstart+257. We check for insufficient - * lookahead only every 4th comparison; the 128th check will be made - * at strstart+257. If MAX_MATCH-2 is not a multiple of 8, it is - * necessary to put more guard bytes at the end of the window, or - * to check more often for insufficient lookahead. - */ - Assert(scan[2] == match[2], "scan[2]?"); - scan++, match++; - do { - } while (*(ushf*)(scan+=2) == *(ushf*)(match+=2) && - *(ushf*)(scan+=2) == *(ushf*)(match+=2) && - *(ushf*)(scan+=2) == *(ushf*)(match+=2) && - *(ushf*)(scan+=2) == *(ushf*)(match+=2) && - scan < strend); - /* The funny "do {}" generates better code on most compilers */ - - /* Here, scan <= window+strstart+257 */ - Assert(scan <= s->window+(unsigned)(s->window_size-1), "wild scan"); - if (*scan == *match) scan++; - - len = (MAX_MATCH - 1) - (int)(strend-scan); - scan = strend - (MAX_MATCH-1); - -#else /* UNALIGNED_OK */ - - if (match[best_len] != scan_end || - match[best_len-1] != scan_end1 || - *match != *scan || - *++match != scan[1]) continue; - - /* The check at best_len-1 can be removed because it will be made - * again later. (This heuristic is not always a win.) - * It is not necessary to compare scan[2] and match[2] since they - * are always equal when the other bytes match, given that - * the hash keys are equal and that HASH_BITS >= 8. - */ - scan += 2, match++; - Assert(*scan == *match, "match[2]?"); - - /* We check for insufficient lookahead only every 8th comparison; - * the 256th check will be made at strstart+258. - */ - do { - } while (*++scan == *++match && *++scan == *++match && - *++scan == *++match && *++scan == *++match && - *++scan == *++match && *++scan == *++match && - *++scan == *++match && *++scan == *++match && - scan < strend); - - Assert(scan <= s->window+(unsigned)(s->window_size-1), "wild scan"); - - len = MAX_MATCH - (int)(strend - scan); - scan = strend - MAX_MATCH; - -#endif /* UNALIGNED_OK */ - - if (len > best_len) { - s->match_start = cur_match; - best_len = len; - if (len >= nice_match) break; -#ifdef UNALIGNED_OK - scan_end = *(ushf*)(scan+best_len-1); -#else - scan_end1 = scan[best_len-1]; - scan_end = scan[best_len]; -#endif - } - } while ((cur_match = prev[cur_match & wmask]) > limit - && --chain_length != 0); - - if ((uInt)best_len <= s->lookahead) return (uInt)best_len; - return s->lookahead; -} - -#else /* FASTEST */ -/* --------------------------------------------------------------------------- - * Optimized version for level == 1 only - */ -static uInt -longest_match(deflate_state *s, IPos cur_match) -{ - Bytef *scan = s->window + s->strstart; /* current string */ - Bytef *match; /* matched string */ - int len; /* length of current match */ - Bytef *strend = s->window + s->strstart + MAX_MATCH; - - /* The code is optimized for HASH_BITS >= 8 and MAX_MATCH-2 multiple of 16. - * It is easy to get rid of this optimization if necessary. - */ - Assert(s->hash_bits >= 8 && MAX_MATCH == 258, "Code too clever"); - - Assert((ulg)s->strstart <= s->window_size-MIN_LOOKAHEAD, "need lookahead"); - - Assert(cur_match < s->strstart, "no future"); - - match = s->window + cur_match; - - /* Return failure if the match length is less than 2: - */ - if (match[0] != scan[0] || match[1] != scan[1]) return MIN_MATCH-1; - - /* The check at best_len-1 can be removed because it will be made - * again later. (This heuristic is not always a win.) - * It is not necessary to compare scan[2] and match[2] since they - * are always equal when the other bytes match, given that - * the hash keys are equal and that HASH_BITS >= 8. - */ - scan += 2, match += 2; - Assert(*scan == *match, "match[2]?"); - - /* We check for insufficient lookahead only every 8th comparison; - * the 256th check will be made at strstart+258. - */ - do { - } while (*++scan == *++match && *++scan == *++match && - *++scan == *++match && *++scan == *++match && - *++scan == *++match && *++scan == *++match && - *++scan == *++match && *++scan == *++match && - scan < strend); - - Assert(scan <= s->window+(unsigned)(s->window_size-1), "wild scan"); - - len = MAX_MATCH - (int)(strend - scan); - - if (len < MIN_MATCH) return MIN_MATCH - 1; - - s->match_start = cur_match; - return len <= s->lookahead ? len : s->lookahead; -} -#endif /* FASTEST */ -#endif /* ASMV */ - -#ifdef DEBUG_ZLIB -/* =========================================================================== - * Check that the match at match_start is indeed a match. - */ -static void -check_match(deflate_state *s, IPos start, IPos match, int length) -{ - /* check that the match is indeed a match */ - if (zmemcmp(s->window + match, - s->window + start, length) != EQUAL) { - fprintf(stderr, " start %u, match %u, length %d\n", - start, match, length); - do { - fprintf(stderr, "%c%c", s->window[match++], s->window[start++]); - } while (--length != 0); - z_error("invalid match"); - } - if (z_verbose > 1) { - fprintf(stderr,"\\[%d,%d]", start-match, length); - do { putc(s->window[start++], stderr); } while (--length != 0); - } -} -#else -# define check_match(s, start, match, length) -#endif - -/* =========================================================================== - * Fill the window when the lookahead becomes insufficient. - * Updates strstart and lookahead. - * - * IN assertion: lookahead < MIN_LOOKAHEAD - * OUT assertions: strstart <= window_size-MIN_LOOKAHEAD - * At least one byte has been read, or avail_in == 0; reads are - * performed for at least two bytes (required for the zip translate_eol - * option -- not supported here). - */ -static void -fill_window(deflate_state *s) -{ - unsigned n, m; - Posf *p; - unsigned more; /* Amount of free space at the end of the window. */ - uInt wsize = s->w_size; - - do { - more = (unsigned)(s->window_size -(ulg)s->lookahead -(ulg)s->strstart); - - /* Deal with !@#$% 64K limit: */ - if (more == 0 && s->strstart == 0 && s->lookahead == 0) { - more = wsize; - - } else if (more == (unsigned)(-1)) { - /* Very unlikely, but possible on 16 bit machine if strstart == 0 - * and lookahead == 1 (input done one byte at time) - */ - more--; - - /* If the window is almost full and there is insufficient lookahead, - * move the upper half to the lower one to make room in the upper half. - */ - } else if (s->strstart >= wsize+MAX_DIST(s)) { - - zmemcpy(s->window, s->window+wsize, (unsigned)wsize); - s->match_start -= wsize; - s->strstart -= wsize; /* we now have strstart >= MAX_DIST */ - s->block_start -= (long) wsize; - - /* Slide the hash table (could be avoided with 32 bit values - at the expense of memory usage). We slide even when level == 0 - to keep the hash table consistent if we switch back to level > 0 - later. (Using level 0 permanently is not an optimal usage of - zlib, so we don't care about this pathological case.) - */ - n = s->hash_size; - p = &s->head[n]; - do { - m = *--p; - *p = (Pos)(m >= wsize ? m-wsize : NIL); - } while (--n); - - n = wsize; -#ifndef FASTEST - p = &s->prev[n]; - do { - m = *--p; - *p = (Pos)(m >= wsize ? m-wsize : NIL); - /* If n is not on any hash chain, prev[n] is garbage but - * its value will never be used. - */ - } while (--n); -#endif - more += wsize; - } - if (s->strm->avail_in == 0) return; - - /* If there was no sliding: - * strstart <= WSIZE+MAX_DIST-1 && lookahead <= MIN_LOOKAHEAD - 1 && - * more == window_size - lookahead - strstart - * => more >= window_size - (MIN_LOOKAHEAD-1 + WSIZE + MAX_DIST-1) - * => more >= window_size - 2*WSIZE + 2 - * In the BIG_MEM or MMAP case (not yet supported), - * window_size == input_size + MIN_LOOKAHEAD && - * strstart + s->lookahead <= input_size => more >= MIN_LOOKAHEAD. - * Otherwise, window_size == 2*WSIZE so more >= 2. - * If there was sliding, more >= WSIZE. So in all cases, more >= 2. - */ - Assert(more >= 2, "more < 2"); - - n = read_buf(s->strm, s->window + s->strstart + s->lookahead, more); - s->lookahead += n; - - /* Initialize the hash value now that we have some input: */ - if (s->lookahead >= MIN_MATCH) { - s->ins_h = s->window[s->strstart]; - UPDATE_HASH(s, s->ins_h, s->window[s->strstart+1]); -#if MIN_MATCH != 3 - Call UPDATE_HASH() MIN_MATCH-3 more times -#endif - } - /* If the whole input has less than MIN_MATCH bytes, ins_h is garbage, - * but this is not important since only literal bytes will be emitted. - */ - - } while (s->lookahead < MIN_LOOKAHEAD && s->strm->avail_in != 0); -} - -/* =========================================================================== - * Flush the current block, with given end-of-file flag. - * IN assertion: strstart is set to the end of the current match. - */ -#define FLUSH_BLOCK_ONLY(s, eof) { \ - _tr_flush_block(s, (s->block_start >= 0L ? \ - (charf *)&s->window[(unsigned)s->block_start] : \ - (charf *)Z_NULL), \ - (ulg)((long)s->strstart - s->block_start), \ - (eof)); \ - s->block_start = s->strstart; \ - flush_pending(s->strm); \ - Tracev((stderr,"[FLUSH]")); \ -} - -/* Same but force premature exit if necessary. */ -#define FLUSH_BLOCK(s, eof) { \ - FLUSH_BLOCK_ONLY(s, eof); \ - if (s->strm->avail_out == 0) return (eof) ? finish_started : need_more; \ -} - -/* =========================================================================== - * Copy without compression as much as possible from the input stream, return - * the current block state. - * This function does not insert new strings in the dictionary since - * uncompressible data is probably not useful. This function is used - * only for the level=0 compression option. - * NOTE: this function should be optimized to avoid extra copying from - * window to pending_buf. - */ -static block_state -deflate_stored(deflate_state *s, int flush) -{ - /* Stored blocks are limited to 0xffff bytes, pending_buf is limited - * to pending_buf_size, and each stored block has a 5 byte header: - */ - ulg max_block_size = 0xffff; - ulg max_start; - - if (max_block_size > s->pending_buf_size - 5) { - max_block_size = s->pending_buf_size - 5; - } - - /* Copy as much as possible from input to output: */ - for (;;) { - /* Fill the window as much as possible: */ - if (s->lookahead <= 1) { - - Assert(s->strstart < s->w_size+MAX_DIST(s) || - s->block_start >= (long)s->w_size, "slide too late"); - - fill_window(s); - if (s->lookahead == 0 && flush == Z_NO_FLUSH) return need_more; - - if (s->lookahead == 0) break; /* flush the current block */ - } - Assert(s->block_start >= 0L, "block gone"); - - s->strstart += s->lookahead; - s->lookahead = 0; - - /* Emit a stored block if pending_buf will be full: */ - max_start = s->block_start + max_block_size; - if (s->strstart == 0 || (ulg)s->strstart >= max_start) { - /* strstart == 0 is possible when wraparound on 16-bit machine */ - s->lookahead = (uInt)(s->strstart - max_start); - s->strstart = (uInt)max_start; - FLUSH_BLOCK(s, 0); - } - /* Flush if we may have to slide, otherwise block_start may become - * negative and the data will be gone: - */ - if (s->strstart - (uInt)s->block_start >= MAX_DIST(s)) { - FLUSH_BLOCK(s, 0); - } - } - FLUSH_BLOCK(s, flush == Z_FINISH); - return flush == Z_FINISH ? finish_done : block_done; -} - -/* =========================================================================== - * Compress as much as possible from the input stream, return the current - * block state. - * This function does not perform lazy evaluation of matches and inserts - * new strings in the dictionary only for unmatched strings or for short - * matches. It is used only for the fast compression options. - */ -static block_state -deflate_fast(deflate_state *s, int flush) -{ - IPos hash_head = NIL; /* head of the hash chain */ - int bflush; /* set if current block must be flushed */ - - for (;;) { - /* Make sure that we always have enough lookahead, except - * at the end of the input file. We need MAX_MATCH bytes - * for the next match, plus MIN_MATCH bytes to insert the - * string following the next match. - */ - if (s->lookahead < MIN_LOOKAHEAD) { - fill_window(s); - if (s->lookahead < MIN_LOOKAHEAD && flush == Z_NO_FLUSH) { - return need_more; - } - if (s->lookahead == 0) break; /* flush the current block */ - } - - /* Insert the string window[strstart .. strstart+2] in the - * dictionary, and set hash_head to the head of the hash chain: - */ - if (s->lookahead >= MIN_MATCH) { - INSERT_STRING(s, s->strstart, hash_head); - } - - /* Find the longest match, discarding those <= prev_length. - * At this point we have always match_length < MIN_MATCH - */ - if (hash_head != NIL && s->strstart - hash_head <= MAX_DIST(s)) { - /* To simplify the code, we prevent matches with the string - * of window index 0 (in particular we have to avoid a match - * of the string with itself at the start of the input file). - */ - if (s->strategy != Z_HUFFMAN_ONLY) { - s->match_length = longest_match (s, hash_head); - } - /* longest_match() sets match_start */ - } - if (s->match_length >= MIN_MATCH) { - check_match(s, s->strstart, s->match_start, s->match_length); - - _tr_tally_dist(s, s->strstart - s->match_start, - s->match_length - MIN_MATCH, bflush); - - s->lookahead -= s->match_length; - - /* Insert new strings in the hash table only if the match length - * is not too large. This saves time but degrades compression. - */ -#ifndef FASTEST - if (s->match_length <= s->max_insert_length && - s->lookahead >= MIN_MATCH) { - s->match_length--; /* string at strstart already in hash table */ - do { - s->strstart++; - INSERT_STRING(s, s->strstart, hash_head); - /* strstart never exceeds WSIZE-MAX_MATCH, so there are - * always MIN_MATCH bytes ahead. - */ - } while (--s->match_length != 0); - s->strstart++; - } else -#endif - { - s->strstart += s->match_length; - s->match_length = 0; - s->ins_h = s->window[s->strstart]; - UPDATE_HASH(s, s->ins_h, s->window[s->strstart+1]); -#if MIN_MATCH != 3 - Call UPDATE_HASH() MIN_MATCH-3 more times -#endif - /* If lookahead < MIN_MATCH, ins_h is garbage, but it does not - * matter since it will be recomputed at next deflate call. - */ - } - } else { - /* No match, output a literal byte */ - Tracevv((stderr,"%c", s->window[s->strstart])); - _tr_tally_lit (s, s->window[s->strstart], bflush); - s->lookahead--; - s->strstart++; - } - if (bflush) FLUSH_BLOCK(s, 0); - } - FLUSH_BLOCK(s, flush == Z_FINISH); - return flush == Z_FINISH ? finish_done : block_done; -} - -/* =========================================================================== - * Same as above, but achieves better compression. We use a lazy - * evaluation for matches: a match is finally adopted only if there is - * no better match at the next window position. - */ -static block_state -deflate_slow(deflate_state *s, int flush) -{ - IPos hash_head = NIL; /* head of hash chain */ - int bflush; /* set if current block must be flushed */ - - /* Process the input block. */ - for (;;) { - /* Make sure that we always have enough lookahead, except - * at the end of the input file. We need MAX_MATCH bytes - * for the next match, plus MIN_MATCH bytes to insert the - * string following the next match. - */ - if (s->lookahead < MIN_LOOKAHEAD) { - fill_window(s); - if (s->lookahead < MIN_LOOKAHEAD && flush == Z_NO_FLUSH) { - return need_more; - } - if (s->lookahead == 0) break; /* flush the current block */ - } - - /* Insert the string window[strstart .. strstart+2] in the - * dictionary, and set hash_head to the head of the hash chain: - */ - if (s->lookahead >= MIN_MATCH) { - INSERT_STRING(s, s->strstart, hash_head); - } - - /* Find the longest match, discarding those <= prev_length. - */ - s->prev_length = s->match_length, s->prev_match = s->match_start; - s->match_length = MIN_MATCH-1; - - if (hash_head != NIL && s->prev_length < s->max_lazy_match && - s->strstart - hash_head <= MAX_DIST(s)) { - /* To simplify the code, we prevent matches with the string - * of window index 0 (in particular we have to avoid a match - * of the string with itself at the start of the input file). - */ - if (s->strategy != Z_HUFFMAN_ONLY) { - s->match_length = longest_match (s, hash_head); - } - /* longest_match() sets match_start */ - - if (s->match_length <= 5 && (s->strategy == Z_FILTERED || - (s->match_length == MIN_MATCH && - s->strstart - s->match_start > TOO_FAR))) { - - /* If prev_match is also MIN_MATCH, match_start is garbage - * but we will ignore the current match anyway. - */ - s->match_length = MIN_MATCH-1; - } - } - /* If there was a match at the previous step and the current - * match is not better, output the previous match: - */ - if (s->prev_length >= MIN_MATCH && s->match_length <= s->prev_length) { - uInt max_insert = s->strstart + s->lookahead - MIN_MATCH; - /* Do not insert strings in hash table beyond this. */ - - check_match(s, s->strstart-1, s->prev_match, s->prev_length); - - _tr_tally_dist(s, s->strstart -1 - s->prev_match, - s->prev_length - MIN_MATCH, bflush); - - /* Insert in hash table all strings up to the end of the match. - * strstart-1 and strstart are already inserted. If there is not - * enough lookahead, the last two strings are not inserted in - * the hash table. - */ - s->lookahead -= s->prev_length-1; - s->prev_length -= 2; - do { - if (++s->strstart <= max_insert) { - INSERT_STRING(s, s->strstart, hash_head); - } - } while (--s->prev_length != 0); - s->match_available = 0; - s->match_length = MIN_MATCH-1; - s->strstart++; - - if (bflush) FLUSH_BLOCK(s, 0); - - } else if (s->match_available) { - /* If there was no match at the previous position, output a - * single literal. If there was a match but the current match - * is longer, truncate the previous match to a single literal. - */ - Tracevv((stderr,"%c", s->window[s->strstart-1])); - _tr_tally_lit(s, s->window[s->strstart-1], bflush); - if (bflush) { - FLUSH_BLOCK_ONLY(s, 0); - } - s->strstart++; - s->lookahead--; - if (s->strm->avail_out == 0) return need_more; - } else { - /* There is no previous match to compare with, wait for - * the next step to decide. - */ - s->match_available = 1; - s->strstart++; - s->lookahead--; - } - } - Assert (flush != Z_NO_FLUSH, "no flush?"); - if (s->match_available) { - Tracevv((stderr,"%c", s->window[s->strstart-1])); - _tr_tally_lit(s, s->window[s->strstart-1], bflush); - s->match_available = 0; - } - FLUSH_BLOCK(s, flush == Z_FINISH); - return flush == Z_FINISH ? finish_done : block_done; -} -/* --- deflate.c */ - -/* +++ trees.c */ -/* trees.c -- output deflated data using Huffman coding - * Copyright (C) 1995-2002 Jean-loup Gailly - * For conditions of distribution and use, see copyright notice in zlib.h - */ - -/* - * ALGORITHM - * - * The "deflation" process uses several Huffman trees. The more - * common source values are represented by shorter bit sequences. - * - * Each code tree is stored in a compressed form which is itself - * a Huffman encoding of the lengths of all the code strings (in - * ascending order by source values). The actual code strings are - * reconstructed from the lengths in the inflate process, as described - * in the deflate specification. - * - * REFERENCES - * - * Deutsch, L.P.,"'Deflate' Compressed Data Format Specification". - * Available in ftp.uu.net:/pub/archiving/zip/doc/deflate-1.1.doc - * - * Storer, James A. - * Data Compression: Methods and Theory, pp. 49-50. - * Computer Science Press, 1988. ISBN 0-7167-8156-5. - * - * Sedgewick, R. - * Algorithms, p290. - * Addison-Wesley, 1983. ISBN 0-201-06672-6. - */ - -/* #define GEN_TREES_H */ - -/* #include "deflate.h" */ - -#ifdef DEBUG_ZLIB -# include -#endif - -/* =========================================================================== - * Constants - */ - -#define MAX_BL_BITS 7 -/* Bit length codes must not exceed MAX_BL_BITS bits */ - -#define END_BLOCK 256 -/* end of block literal code */ - -#define REP_3_6 16 -/* repeat previous bit length 3-6 times (2 bits of repeat count) */ - -#define REPZ_3_10 17 -/* repeat a zero length 3-10 times (3 bits of repeat count) */ - -#define REPZ_11_138 18 -/* repeat a zero length 11-138 times (7 bits of repeat count) */ - -static const int extra_lbits[LENGTH_CODES] /* extra bits for each length code */ - = {0,0,0,0,0,0,0,0,1,1,1,1,2,2,2,2,3,3,3,3,4,4,4,4,5,5,5,5,0}; - -static const int extra_dbits[D_CODES] /* extra bits for each distance code */ - = {0,0,0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,8,8,9,9,10,10,11,11,12,12,13,13}; - -static const int extra_blbits[BL_CODES]/* extra bits for each bit length code */ - = {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,3,7}; - -static const uch bl_order[BL_CODES] - = {16,17,18,0,8,7,9,6,10,5,11,4,12,3,13,2,14,1,15}; -/* The lengths of the bit length codes are sent in order of decreasing - * probability, to avoid transmitting the lengths for unused bit length codes. - */ - -#define Buf_size (8 * 2*sizeof(char)) -/* Number of bits used within bi_buf. (bi_buf might be implemented on - * more than 16 bits on some systems.) - */ - -/* =========================================================================== - * Local data. These are initialized only once. - */ - -#define DIST_CODE_LEN 512 /* see definition of array dist_code below */ - -#if defined(GEN_TREES_H) || !defined(STDC) -/* non ANSI compilers may not accept trees.h */ - -static ct_data *static_ltree = Z_NULL; -/* The static literal tree. Since the bit lengths are imposed, there is no - * need for the L_CODES extra codes used during heap construction. However - * The codes 286 and 287 are needed to build a canonical tree (see _tr_init - * below). - */ - -static ct_data *static_dtree = Z_NULL; -/* The static distance tree. (Actually a trivial tree since all codes use - * 5 bits.) - */ - -uch *_dist_code = Z_NULL; -/* Distance codes. The first 256 values correspond to the distances - * 3 .. 258, the last 256 values correspond to the top 8 bits of - * the 15 bit distances. - */ - -uch *_length_code = Z_NULL; -/* length code for each normalized match length (0 == MIN_MATCH) */ - -static int *base_length = Z_NULL; -/* First normalized length for each code (0 = MIN_MATCH) */ - -static int *base_dist = Z_NULL; -/* First normalized distance for each code (0 = distance of 1) */ - -#else -/* +++ trees.h */ -/* header created automatically with -DGEN_TREES_H */ - -static const ct_data static_ltree[L_CODES+2] = { -{{ 12},{ 8}}, {{140},{ 8}}, {{ 76},{ 8}}, {{204},{ 8}}, {{ 44},{ 8}}, -{{172},{ 8}}, {{108},{ 8}}, {{236},{ 8}}, {{ 28},{ 8}}, {{156},{ 8}}, -{{ 92},{ 8}}, {{220},{ 8}}, {{ 60},{ 8}}, {{188},{ 8}}, {{124},{ 8}}, -{{252},{ 8}}, {{ 2},{ 8}}, {{130},{ 8}}, {{ 66},{ 8}}, {{194},{ 8}}, -{{ 34},{ 8}}, {{162},{ 8}}, {{ 98},{ 8}}, {{226},{ 8}}, {{ 18},{ 8}}, -{{146},{ 8}}, {{ 82},{ 8}}, {{210},{ 8}}, {{ 50},{ 8}}, {{178},{ 8}}, -{{114},{ 8}}, {{242},{ 8}}, {{ 10},{ 8}}, {{138},{ 8}}, {{ 74},{ 8}}, -{{202},{ 8}}, {{ 42},{ 8}}, {{170},{ 8}}, {{106},{ 8}}, {{234},{ 8}}, -{{ 26},{ 8}}, {{154},{ 8}}, {{ 90},{ 8}}, {{218},{ 8}}, {{ 58},{ 8}}, -{{186},{ 8}}, {{122},{ 8}}, {{250},{ 8}}, {{ 6},{ 8}}, {{134},{ 8}}, -{{ 70},{ 8}}, {{198},{ 8}}, {{ 38},{ 8}}, {{166},{ 8}}, {{102},{ 8}}, -{{230},{ 8}}, {{ 22},{ 8}}, {{150},{ 8}}, {{ 86},{ 8}}, {{214},{ 8}}, -{{ 54},{ 8}}, {{182},{ 8}}, {{118},{ 8}}, {{246},{ 8}}, {{ 14},{ 8}}, -{{142},{ 8}}, {{ 78},{ 8}}, {{206},{ 8}}, {{ 46},{ 8}}, {{174},{ 8}}, -{{110},{ 8}}, {{238},{ 8}}, {{ 30},{ 8}}, {{158},{ 8}}, {{ 94},{ 8}}, -{{222},{ 8}}, {{ 62},{ 8}}, {{190},{ 8}}, {{126},{ 8}}, {{254},{ 8}}, -{{ 1},{ 8}}, {{129},{ 8}}, {{ 65},{ 8}}, {{193},{ 8}}, {{ 33},{ 8}}, -{{161},{ 8}}, {{ 97},{ 8}}, {{225},{ 8}}, {{ 17},{ 8}}, {{145},{ 8}}, -{{ 81},{ 8}}, {{209},{ 8}}, {{ 49},{ 8}}, {{177},{ 8}}, {{113},{ 8}}, -{{241},{ 8}}, {{ 9},{ 8}}, {{137},{ 8}}, {{ 73},{ 8}}, {{201},{ 8}}, -{{ 41},{ 8}}, {{169},{ 8}}, {{105},{ 8}}, {{233},{ 8}}, {{ 25},{ 8}}, -{{153},{ 8}}, {{ 89},{ 8}}, {{217},{ 8}}, {{ 57},{ 8}}, {{185},{ 8}}, -{{121},{ 8}}, {{249},{ 8}}, {{ 5},{ 8}}, {{133},{ 8}}, {{ 69},{ 8}}, -{{197},{ 8}}, {{ 37},{ 8}}, {{165},{ 8}}, {{101},{ 8}}, {{229},{ 8}}, -{{ 21},{ 8}}, {{149},{ 8}}, {{ 85},{ 8}}, {{213},{ 8}}, {{ 53},{ 8}}, -{{181},{ 8}}, {{117},{ 8}}, {{245},{ 8}}, {{ 13},{ 8}}, {{141},{ 8}}, -{{ 77},{ 8}}, {{205},{ 8}}, {{ 45},{ 8}}, {{173},{ 8}}, {{109},{ 8}}, -{{237},{ 8}}, {{ 29},{ 8}}, {{157},{ 8}}, {{ 93},{ 8}}, {{221},{ 8}}, -{{ 61},{ 8}}, {{189},{ 8}}, {{125},{ 8}}, {{253},{ 8}}, {{ 19},{ 9}}, -{{275},{ 9}}, {{147},{ 9}}, {{403},{ 9}}, {{ 83},{ 9}}, {{339},{ 9}}, -{{211},{ 9}}, {{467},{ 9}}, {{ 51},{ 9}}, {{307},{ 9}}, {{179},{ 9}}, -{{435},{ 9}}, {{115},{ 9}}, {{371},{ 9}}, {{243},{ 9}}, {{499},{ 9}}, -{{ 11},{ 9}}, {{267},{ 9}}, {{139},{ 9}}, {{395},{ 9}}, {{ 75},{ 9}}, -{{331},{ 9}}, {{203},{ 9}}, {{459},{ 9}}, {{ 43},{ 9}}, {{299},{ 9}}, -{{171},{ 9}}, {{427},{ 9}}, {{107},{ 9}}, {{363},{ 9}}, {{235},{ 9}}, -{{491},{ 9}}, {{ 27},{ 9}}, {{283},{ 9}}, {{155},{ 9}}, {{411},{ 9}}, -{{ 91},{ 9}}, {{347},{ 9}}, {{219},{ 9}}, {{475},{ 9}}, {{ 59},{ 9}}, -{{315},{ 9}}, {{187},{ 9}}, {{443},{ 9}}, {{123},{ 9}}, {{379},{ 9}}, -{{251},{ 9}}, {{507},{ 9}}, {{ 7},{ 9}}, {{263},{ 9}}, {{135},{ 9}}, -{{391},{ 9}}, {{ 71},{ 9}}, {{327},{ 9}}, {{199},{ 9}}, {{455},{ 9}}, -{{ 39},{ 9}}, {{295},{ 9}}, {{167},{ 9}}, {{423},{ 9}}, {{103},{ 9}}, -{{359},{ 9}}, {{231},{ 9}}, {{487},{ 9}}, {{ 23},{ 9}}, {{279},{ 9}}, -{{151},{ 9}}, {{407},{ 9}}, {{ 87},{ 9}}, {{343},{ 9}}, {{215},{ 9}}, -{{471},{ 9}}, {{ 55},{ 9}}, {{311},{ 9}}, {{183},{ 9}}, {{439},{ 9}}, -{{119},{ 9}}, {{375},{ 9}}, {{247},{ 9}}, {{503},{ 9}}, {{ 15},{ 9}}, -{{271},{ 9}}, {{143},{ 9}}, {{399},{ 9}}, {{ 79},{ 9}}, {{335},{ 9}}, -{{207},{ 9}}, {{463},{ 9}}, {{ 47},{ 9}}, {{303},{ 9}}, {{175},{ 9}}, -{{431},{ 9}}, {{111},{ 9}}, {{367},{ 9}}, {{239},{ 9}}, {{495},{ 9}}, -{{ 31},{ 9}}, {{287},{ 9}}, {{159},{ 9}}, {{415},{ 9}}, {{ 95},{ 9}}, -{{351},{ 9}}, {{223},{ 9}}, {{479},{ 9}}, {{ 63},{ 9}}, {{319},{ 9}}, -{{191},{ 9}}, {{447},{ 9}}, {{127},{ 9}}, {{383},{ 9}}, {{255},{ 9}}, -{{511},{ 9}}, {{ 0},{ 7}}, {{ 64},{ 7}}, {{ 32},{ 7}}, {{ 96},{ 7}}, -{{ 16},{ 7}}, {{ 80},{ 7}}, {{ 48},{ 7}}, {{112},{ 7}}, {{ 8},{ 7}}, -{{ 72},{ 7}}, {{ 40},{ 7}}, {{104},{ 7}}, {{ 24},{ 7}}, {{ 88},{ 7}}, -{{ 56},{ 7}}, {{120},{ 7}}, {{ 4},{ 7}}, {{ 68},{ 7}}, {{ 36},{ 7}}, -{{100},{ 7}}, {{ 20},{ 7}}, {{ 84},{ 7}}, {{ 52},{ 7}}, {{116},{ 7}}, -{{ 3},{ 8}}, {{131},{ 8}}, {{ 67},{ 8}}, {{195},{ 8}}, {{ 35},{ 8}}, -{{163},{ 8}}, {{ 99},{ 8}}, {{227},{ 8}} -}; - -static const ct_data static_dtree[D_CODES] = { -{{ 0},{ 5}}, {{16},{ 5}}, {{ 8},{ 5}}, {{24},{ 5}}, {{ 4},{ 5}}, -{{20},{ 5}}, {{12},{ 5}}, {{28},{ 5}}, {{ 2},{ 5}}, {{18},{ 5}}, -{{10},{ 5}}, {{26},{ 5}}, {{ 6},{ 5}}, {{22},{ 5}}, {{14},{ 5}}, -{{30},{ 5}}, {{ 1},{ 5}}, {{17},{ 5}}, {{ 9},{ 5}}, {{25},{ 5}}, -{{ 5},{ 5}}, {{21},{ 5}}, {{13},{ 5}}, {{29},{ 5}}, {{ 3},{ 5}}, -{{19},{ 5}}, {{11},{ 5}}, {{27},{ 5}}, {{ 7},{ 5}}, {{23},{ 5}} -}; - -const uch _dist_code[DIST_CODE_LEN] = { - 0, 1, 2, 3, 4, 4, 5, 5, 6, 6, 6, 6, 7, 7, 7, 7, 8, 8, 8, 8, - 8, 8, 8, 8, 9, 9, 9, 9, 9, 9, 9, 9, 10, 10, 10, 10, 10, 10, 10, 10, -10, 10, 10, 10, 10, 10, 10, 10, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, -11, 11, 11, 11, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, -12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 13, 13, 13, 13, -13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, -13, 13, 13, 13, 13, 13, 13, 13, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, -14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, -14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, -14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 15, 15, 15, 15, 15, 15, 15, 15, -15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, -15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, -15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 0, 0, 16, 17, -18, 18, 19, 19, 20, 20, 20, 20, 21, 21, 21, 21, 22, 22, 22, 22, 22, 22, 22, 22, -23, 23, 23, 23, 23, 23, 23, 23, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, -24, 24, 24, 24, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, -26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, -26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 27, 27, 27, 27, 27, 27, 27, 27, -27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, -27, 27, 27, 27, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, -28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, -28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, -28, 28, 28, 28, 28, 28, 28, 28, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, -29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, -29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, -29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29 -}; - -const uch _length_code[MAX_MATCH-MIN_MATCH+1]= { - 0, 1, 2, 3, 4, 5, 6, 7, 8, 8, 9, 9, 10, 10, 11, 11, 12, 12, 12, 12, -13, 13, 13, 13, 14, 14, 14, 14, 15, 15, 15, 15, 16, 16, 16, 16, 16, 16, 16, 16, -17, 17, 17, 17, 17, 17, 17, 17, 18, 18, 18, 18, 18, 18, 18, 18, 19, 19, 19, 19, -19, 19, 19, 19, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, -21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 22, 22, 22, 22, -22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 23, 23, 23, 23, 23, 23, 23, 23, -23, 23, 23, 23, 23, 23, 23, 23, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, -24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, -25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, -25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 26, 26, 26, 26, 26, 26, 26, 26, -26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, -26, 26, 26, 26, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, -27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 28 -}; - -static const int base_length[LENGTH_CODES] = { -0, 1, 2, 3, 4, 5, 6, 7, 8, 10, 12, 14, 16, 20, 24, 28, 32, 40, 48, 56, -64, 80, 96, 112, 128, 160, 192, 224, 0 -}; - -static const int base_dist[D_CODES] = { - 0, 1, 2, 3, 4, 6, 8, 12, 16, 24, - 32, 48, 64, 96, 128, 192, 256, 384, 512, 768, - 1024, 1536, 2048, 3072, 4096, 6144, 8192, 12288, 16384, 24576 -}; - -/* --- trees.h */ -#endif /* GEN_TREES_H */ - -struct static_tree_desc_s { - const ct_data *static_tree; /* static tree or NULL */ - const intf *extra_bits; /* extra bits for each code or NULL */ - int extra_base; /* base index for extra_bits */ - int elems; /* max number of elements in the tree */ - int max_length; /* max bit length for the codes */ -}; - -static static_tree_desc static_l_desc = -{NULL, extra_lbits, LITERALS+1, L_CODES, MAX_BITS}; - -static static_tree_desc static_d_desc = -{NULL, extra_dbits, 0, D_CODES, MAX_BITS}; - -static static_tree_desc static_bl_desc = -{(const ct_data *)0, extra_blbits, 0, BL_CODES, MAX_BL_BITS}; - -/* =========================================================================== - * Local (static) routines in this file. - */ - -static int tr_static_init OF((z_streamp z)); -static void init_block OF((deflate_state *s)); -static void pqdownheap OF((deflate_state *s, ct_data *tree, int k)); -static void gen_bitlen OF((deflate_state *s, tree_desc *desc)); -static void gen_codes OF((ct_data *tree, int max_code, ushf *bl_count)); -static void build_tree OF((deflate_state *s, tree_desc *desc)); -static void scan_tree OF((deflate_state *s, ct_data *tree, int max_code)); -static void send_tree OF((deflate_state *s, ct_data *tree, int max_code)); -static int build_bl_tree OF((deflate_state *s)); -static void send_all_trees OF((deflate_state *s, int lcodes, int dcodes, - int blcodes)); -static void compress_block OF((deflate_state *s, ct_data *ltree, - ct_data *dtree)); -static void set_data_type OF((deflate_state *s)); -static unsigned bi_reverse OF((unsigned value, int length)); -static void bi_windup OF((deflate_state *s)); -static void bi_flush OF((deflate_state *s)); -static void copy_block OF((deflate_state *s, charf *buf, unsigned len, - int header)); - -#ifdef GEN_TREES_H -static void gen_trees_header OF((void)); -#endif - -#ifndef DEBUG_ZLIB -# define send_code(s, c, tree) send_bits(s, tree[c].Code, tree[c].Len) - /* Send a code of the given tree. c and tree must not have side effects */ - -#else /* DEBUG_ZLIB */ -# define send_code(s, c, tree) \ - { if (z_verbose>2) fprintf(stderr,"\ncd %3d ",(c)); \ - send_bits(s, tree[c].Code, tree[c].Len); } -#endif - -/* =========================================================================== - * Output a short LSB first on the stream. - * IN assertion: there is enough room in pendingBuf. - */ -#define put_short(s, w) { \ - put_byte(s, (uch)((w) & 0xff)); \ - put_byte(s, (uch)((ush)(w) >> 8)); \ -} - -/* =========================================================================== - * Send a value on a given number of bits. - * IN assertion: length <= 16 and value fits in length bits. - */ -#ifdef DEBUG_ZLIB -static void send_bits OF((deflate_state *s, int value, int length)); - -static void send_bits(deflate_state *s, int value, int length) -{ - Tracevv((stderr," l %2d v %4x ", length, value)); - Assert(length > 0 && length <= 15, "invalid length"); - s->bits_sent += (ulg)length; - - /* If not enough room in bi_buf, use (valid) bits from bi_buf and - * (16 - bi_valid) bits from value, leaving (width - (16-bi_valid)) - * unused bits in value. - */ - if (s->bi_valid > (int)Buf_size - length) { - s->bi_buf |= (value << s->bi_valid); - put_short(s, s->bi_buf); - s->bi_buf = (ush)value >> (Buf_size - s->bi_valid); - s->bi_valid += length - Buf_size; - } else { - s->bi_buf |= value << s->bi_valid; - s->bi_valid += length; - } -} -#else /* !DEBUG_ZLIB */ - -#define send_bits(s, value, length) \ -{ int len = length;\ - if (s->bi_valid > (int)Buf_size - len) {\ - int val = value;\ - s->bi_buf |= (val << s->bi_valid);\ - put_short(s, s->bi_buf);\ - s->bi_buf = (ush)val >> (Buf_size - s->bi_valid);\ - s->bi_valid += len - Buf_size;\ - } else {\ - s->bi_buf |= (value) << s->bi_valid;\ - s->bi_valid += len;\ - }\ -} -#endif /* DEBUG_ZLIB */ - - -#ifndef MAX -#define MAX(a,b) (a >= b ? a : b) -#endif -/* the arguments must not have side effects */ - -typedef struct { - ct_data static_ltree[L_CODES+2]; - ct_data static_dtree[D_CODES]; - uch _dist_code[DIST_CODE_LEN]; - uch _length_code[MAX_MATCH-MIN_MATCH+1]; - int base_length[LENGTH_CODES]; - int base_dist[D_CODES]; -} __used_to_be_static; - -#if defined(GEN_TREES_H) || !defined(STDC) -static __used_to_be_static *static_storage = Z_NULL; -#endif /* defined(GEN_TREES_H) || !defined(STDC) */ - -/* =========================================================================== - * Initialize the various 'constant' tables. - */ -static int tr_static_init( - z_streamp z) -{ -#if defined(GEN_TREES_H) || !defined(STDC) - static int static_init_done = 0; - int n; /* iterates over tree elements */ - int bits; /* bit counter */ - int length; /* length value */ - int code; /* code value */ - int dist; /* distance index */ - ush bl_count[MAX_BITS+1]; - /* number of codes at each bit length for an optimal tree */ - - if (static_init_done) return Z_OK; - - /* allocate storage for static structures */ - if (static_storage == Z_NULL) { - static_storage = (__used_to_be_static*)ZALLOC(z, 1, sizeof(__used_to_be_static)); - if (static_storage == Z_NULL) - return Z_MEM_ERROR; - } - - static_ltree = static_storage->static_ltree; - static_dtree = static_storage->static_dtree; - _dist_code = static_storage->_dist_code; - _length_code = static_storage->_length_code; - base_length = static_storage->base_length; - base_dist = static_storage->base_dist; - - /* For some embedded targets, global variables are not initialized: */ - static_l_desc.static_tree = static_ltree; - static_l_desc.extra_bits = extra_lbits; - static_d_desc.static_tree = static_dtree; - static_d_desc.extra_bits = extra_dbits; - static_bl_desc.extra_bits = extra_blbits; - - /* Initialize the mapping length (0..255) -> length code (0..28) */ - length = 0; - for (code = 0; code < LENGTH_CODES-1; code++) { - base_length[code] = length; - for (n = 0; n < (1< dist code (0..29) */ - dist = 0; - for (code = 0 ; code < 16; code++) { - base_dist[code] = dist; - for (n = 0; n < (1<>= 7; /* from now on, all distances are divided by 128 */ - for ( ; code < D_CODES; code++) { - base_dist[code] = dist << 7; - for (n = 0; n < (1<<(extra_dbits[code]-7)); n++) { - _dist_code[256 + dist++] = (uch)code; - } - } - Assert (dist == 256, "tr_static_init: 256+dist != 512"); - - /* Construct the codes of the static literal tree */ - for (bits = 0; bits <= MAX_BITS; bits++) bl_count[bits] = 0; - n = 0; - while (n <= 143) static_ltree[n++].Len = 8, bl_count[8]++; - while (n <= 255) static_ltree[n++].Len = 9, bl_count[9]++; - while (n <= 279) static_ltree[n++].Len = 7, bl_count[7]++; - while (n <= 287) static_ltree[n++].Len = 8, bl_count[8]++; - /* Codes 286 and 287 do not exist, but we must include them in the - * tree construction to get a canonical Huffman tree (longest code - * all ones) - */ - gen_codes((ct_data *)static_ltree, L_CODES+1, bl_count); - - /* The static distance tree is trivial: */ - for (n = 0; n < D_CODES; n++) { - static_dtree[n].Len = 5; - static_dtree[n].Code = bi_reverse((unsigned)n, 5); - } - static_init_done = 1; - -# ifdef GEN_TREES_H - gen_trees_header(); -# endif -#endif /* defined(GEN_TREES_H) || !defined(STDC) */ - return Z_OK; -} - -/* =========================================================================== - * Genererate the file trees.h describing the static trees. - */ -#ifdef GEN_TREES_H -# ifndef DEBUG_ZLIB -# include -# endif - -# define SEPARATOR(i, last, width) \ - ((i) == (last)? "\n};\n\n" : \ - ((i) % (width) == (width)-1 ? ",\n" : ", ")) - -void gen_trees_header() -{ - FILE *header = fopen("trees.h", "w"); - int i; - - Assert (header != NULL, "Can't open trees.h"); - fprintf(header, - "/* header created automatically with -DGEN_TREES_H */\n\n"); - - fprintf(header, "local const ct_data static_ltree[L_CODES+2] = {\n"); - for (i = 0; i < L_CODES+2; i++) { - fprintf(header, "{{%3u},{%3u}}%s", static_ltree[i].Code, - static_ltree[i].Len, SEPARATOR(i, L_CODES+1, 5)); - } - - fprintf(header, "local const ct_data static_dtree[D_CODES] = {\n"); - for (i = 0; i < D_CODES; i++) { - fprintf(header, "{{%2u},{%2u}}%s", static_dtree[i].Code, - static_dtree[i].Len, SEPARATOR(i, D_CODES-1, 5)); - } - - fprintf(header, "const uch _dist_code[DIST_CODE_LEN] = {\n"); - for (i = 0; i < DIST_CODE_LEN; i++) { - fprintf(header, "%2u%s", _dist_code[i], - SEPARATOR(i, DIST_CODE_LEN-1, 20)); - } - - fprintf(header, "const uch _length_code[MAX_MATCH-MIN_MATCH+1]= {\n"); - for (i = 0; i < MAX_MATCH-MIN_MATCH+1; i++) { - fprintf(header, "%2u%s", _length_code[i], - SEPARATOR(i, MAX_MATCH-MIN_MATCH, 20)); - } - - fprintf(header, "local const int base_length[LENGTH_CODES] = {\n"); - for (i = 0; i < LENGTH_CODES; i++) { - fprintf(header, "%1u%s", base_length[i], - SEPARATOR(i, LENGTH_CODES-1, 20)); - } - - fprintf(header, "local const int base_dist[D_CODES] = {\n"); - for (i = 0; i < D_CODES; i++) { - fprintf(header, "%5u%s", base_dist[i], - SEPARATOR(i, D_CODES-1, 10)); - } - - fclose(header); -} -#endif /* GEN_TREES_H */ - -/* =========================================================================== - * Initialize the tree data structures for a new zlib stream. - */ -static void -_tr_init(deflate_state *s) -{ - tr_static_init(s->strm); - - s->l_desc.dyn_tree = s->dyn_ltree; - s->l_desc.stat_desc = &static_l_desc; - - s->d_desc.dyn_tree = s->dyn_dtree; - s->d_desc.stat_desc = &static_d_desc; - - s->bl_desc.dyn_tree = s->bl_tree; - s->bl_desc.stat_desc = &static_bl_desc; - - s->bi_buf = 0; - s->bi_valid = 0; - s->last_eob_len = 8; /* enough lookahead for inflate */ -#ifdef DEBUG_ZLIB - s->compressed_len = 0L; - s->bits_sent = 0L; -#endif - - /* Initialize the first block of the first file: */ - init_block(s); -} - -/* =========================================================================== - * Initialize a new block. - */ -static void -init_block(deflate_state *s) -{ - int n; /* iterates over tree elements */ - - /* Initialize the trees. */ - for (n = 0; n < L_CODES; n++) s->dyn_ltree[n].Freq = 0; - for (n = 0; n < D_CODES; n++) s->dyn_dtree[n].Freq = 0; - for (n = 0; n < BL_CODES; n++) s->bl_tree[n].Freq = 0; - - s->dyn_ltree[END_BLOCK].Freq = 1; - s->opt_len = s->static_len = 0L; - s->last_lit = s->matches = 0; -} - -#define SMALLEST 1 -/* Index within the heap array of least frequent node in the Huffman tree */ - - -/* =========================================================================== - * Remove the smallest element from the heap and recreate the heap with - * one less element. Updates heap and heap_len. - */ -#define pqremove(s, tree, top) \ -{\ - top = s->heap[SMALLEST]; \ - s->heap[SMALLEST] = s->heap[s->heap_len--]; \ - pqdownheap(s, tree, SMALLEST); \ -} - -/* =========================================================================== - * Compares to subtrees, using the tree depth as tie breaker when - * the subtrees have equal frequency. This minimizes the worst case length. - */ -#define smaller(tree, n, m, depth) \ - (tree[n].Freq < tree[m].Freq || \ - (tree[n].Freq == tree[m].Freq && depth[n] <= depth[m])) - -/* =========================================================================== - * Restore the heap property by moving down the tree starting at node k, - * exchanging a node with the smallest of its two sons if necessary, stopping - * when the heap property is re-established (each father smaller than its - * two sons). - * ct_data *tree; the tree to restore - * int k; node to move down - */ -static void -pqdownheap(deflate_state *s, ct_data *tree, int k) -{ - int v = s->heap[k]; - int j = k << 1; /* left son of k */ - while (j <= s->heap_len) { - /* Set j to the smallest of the two sons: */ - if (j < s->heap_len && - smaller(tree, s->heap[j+1], s->heap[j], s->depth)) { - j++; - } - /* Exit if v is smaller than both sons */ - if (smaller(tree, v, s->heap[j], s->depth)) break; - - /* Exchange v with the smallest son */ - s->heap[k] = s->heap[j]; k = j; - - /* And continue down the tree, setting j to the left son of k */ - j <<= 1; - } - s->heap[k] = v; -} - -/* =========================================================================== - * Compute the optimal bit lengths for a tree and update the total bit length - * for the current block. - * IN assertion: the fields freq and dad are set, heap[heap_max] and - * above are the tree nodes sorted by increasing frequency. - * OUT assertions: the field len is set to the optimal bit length, the - * array bl_count contains the frequencies for each bit length. - * The length opt_len is updated; static_len is also updated if stree is - * not null. - */ -static void -gen_bitlen(deflate_state *s, tree_desc *desc) -{ - ct_data *tree = desc->dyn_tree; - int max_code = desc->max_code; - const ct_data *stree = desc->stat_desc->static_tree; - const intf *extra = desc->stat_desc->extra_bits; - int base = desc->stat_desc->extra_base; - int max_length = desc->stat_desc->max_length; - int h; /* heap index */ - int n, m; /* iterate over the tree elements */ - int bits; /* bit length */ - int xbits; /* extra bits */ - ush f; /* frequency */ - int overflow = 0; /* number of elements with bit length too large */ - - for (bits = 0; bits <= MAX_BITS; bits++) s->bl_count[bits] = 0; - - /* In a first pass, compute the optimal bit lengths (which may - * overflow in the case of the bit length tree). - */ - tree[s->heap[s->heap_max]].Len = 0; /* root of the heap */ - - for (h = s->heap_max+1; h < HEAP_SIZE; h++) { - n = s->heap[h]; - bits = tree[tree[n].Dad].Len + 1; - if (bits > max_length) bits = max_length, overflow++; - tree[n].Len = (ush)bits; - /* We overwrite tree[n].Dad which is no longer needed */ - - if (n > max_code) continue; /* not a leaf node */ - - s->bl_count[bits]++; - xbits = 0; - if (n >= base) xbits = extra[n-base]; - f = tree[n].Freq; - s->opt_len += (ulg)f * (bits + xbits); - if (stree) s->static_len += (ulg)f * (stree[n].Len + xbits); - } - if (overflow == 0) return; - - Trace((stderr,"\nbit length overflow\n")); - /* This happens for example on obj2 and pic of the Calgary corpus */ - - /* Find the first bit length which could increase: */ - do { - bits = max_length-1; - while (s->bl_count[bits] == 0) bits--; - s->bl_count[bits]--; /* move one leaf down the tree */ - s->bl_count[bits+1] += 2; /* move one overflow item as its brother */ - s->bl_count[max_length]--; - /* The brother of the overflow item also moves one step up, - * but this does not affect bl_count[max_length] - */ - overflow -= 2; - } while (overflow > 0); - - /* Now recompute all bit lengths, scanning in increasing frequency. - * h is still equal to HEAP_SIZE. (It is simpler to reconstruct all - * lengths instead of fixing only the wrong ones. This idea is taken - * from 'ar' written by Haruhiko Okumura.) - */ - for (bits = max_length; bits != 0; bits--) { - n = s->bl_count[bits]; - while (n != 0) { - m = s->heap[--h]; - if (m > max_code) continue; - if (tree[m].Len != (unsigned) bits) { - Trace((stderr,"code %d bits %d->%d\n", m, tree[m].Len, bits)); - s->opt_len += ((long)bits - (long)tree[m].Len) - *(long)tree[m].Freq; - tree[m].Len = (ush)bits; - } - n--; - } - } -} - -/* =========================================================================== - * Generate the codes for a given tree and bit counts (which need not be - * optimal). - * IN assertion: the array bl_count contains the bit length statistics for - * the given tree and the field len is set for all tree elements. - * OUT assertion: the field code is set for all tree elements of non - * zero code length. - * - * ct_data *tree; the tree to decorate - * int max_code; largest code with non zero frequency - * ushf *bl_count; number of codes at each bit length - */ -static void -gen_codes(ct_data *tree, int max_code, ushf *bl_count) -{ - ush next_code[MAX_BITS+1]; /* next code value for each bit length */ - ush code = 0; /* running code value */ - int bits; /* bit index */ - int n; /* code index */ - - /* The distribution counts are first used to generate the code values - * without bit reversal. - */ - for (bits = 1; bits <= MAX_BITS; bits++) { - next_code[bits] = code = (code + bl_count[bits-1]) << 1; - } - /* Check that the bit counts in bl_count are consistent. The last code - * must be all ones. - */ - Assert (code + bl_count[MAX_BITS]-1 == (1<dyn_tree; - const ct_data *stree = desc->stat_desc->static_tree; - int elems = desc->stat_desc->elems; - int n, m; /* iterate over heap elements */ - int max_code = -1; /* largest code with non zero frequency */ - int node; /* new node being created */ - - /* Construct the initial heap, with least frequent element in - * heap[SMALLEST]. The sons of heap[n] are heap[2*n] and heap[2*n+1]. - * heap[0] is not used. - */ - s->heap_len = 0, s->heap_max = HEAP_SIZE; - - for (n = 0; n < elems; n++) { - if (tree[n].Freq != 0) { - s->heap[++(s->heap_len)] = max_code = n; - s->depth[n] = 0; - } else { - tree[n].Len = 0; - } - } - - /* The pkzip format requires that at least one distance code exists, - * and that at least one bit should be sent even if there is only one - * possible code. So to avoid special checks later on we force at least - * two codes of non zero frequency. - */ - while (s->heap_len < 2) { - node = s->heap[++(s->heap_len)] = (max_code < 2 ? ++max_code : 0); - tree[node].Freq = 1; - s->depth[node] = 0; - s->opt_len--; if (stree) s->static_len -= stree[node].Len; - /* node is 0 or 1 so it does not have extra bits */ - } - desc->max_code = max_code; - - /* The elements heap[heap_len/2+1 .. heap_len] are leaves of the tree, - * establish sub-heaps of increasing lengths: - */ - for (n = s->heap_len/2; n >= 1; n--) pqdownheap(s, tree, n); - - /* Construct the Huffman tree by repeatedly combining the least two - * frequent nodes. - */ - node = elems; /* next internal node of the tree */ - do { - pqremove(s, tree, n); /* n = node of least frequency */ - m = s->heap[SMALLEST]; /* m = node of next least frequency */ - - s->heap[--(s->heap_max)] = n; /* keep the nodes sorted by frequency */ - s->heap[--(s->heap_max)] = m; - - /* Create a new node father of n and m */ - tree[node].Freq = tree[n].Freq + tree[m].Freq; - s->depth[node] = (uch) (MAX(s->depth[n], s->depth[m]) + 1); - tree[n].Dad = tree[m].Dad = (ush)node; -#ifdef DUMP_BL_TREE - if (tree == s->bl_tree) { - fprintf(stderr,"\nnode %d(%d), sons %d(%d) %d(%d)", - node, tree[node].Freq, n, tree[n].Freq, m, tree[m].Freq); - } -#endif - /* and insert the new node in the heap */ - s->heap[SMALLEST] = node++; - pqdownheap(s, tree, SMALLEST); - - } while (s->heap_len >= 2); - - s->heap[--(s->heap_max)] = s->heap[SMALLEST]; - - /* At this point, the fields freq and dad are set. We can now - * generate the bit lengths. - */ - gen_bitlen(s, (tree_desc *)desc); - - /* The field len is now set, we can generate the bit codes */ - gen_codes ((ct_data *)tree, max_code, s->bl_count); -} - -/* =========================================================================== - * Scan a literal or distance tree to determine the frequencies of the codes - * in the bit length tree. - * - * ct_data *tree; the tree to be scanned - * int max_code; and its largest code of non zero frequency - */ -static void -scan_tree(deflate_state *s, ct_data *tree, int max_code) -{ - int n; /* iterates over all tree elements */ - int prevlen = -1; /* last emitted length */ - int curlen; /* length of current code */ - int nextlen = tree[0].Len; /* length of next code */ - int count = 0; /* repeat count of the current code */ - int max_count = 7; /* max repeat count */ - int min_count = 4; /* min repeat count */ - - if (nextlen == 0) max_count = 138, min_count = 3; - tree[max_code+1].Len = (ush)0xffff; /* guard */ - - for (n = 0; n <= max_code; n++) { - curlen = nextlen; nextlen = tree[n+1].Len; - if (++count < max_count && curlen == nextlen) { - continue; - } else if (count < min_count) { - s->bl_tree[curlen].Freq += count; - } else if (curlen != 0) { - if (curlen != prevlen) s->bl_tree[curlen].Freq++; - s->bl_tree[REP_3_6].Freq++; - } else if (count <= 10) { - s->bl_tree[REPZ_3_10].Freq++; - } else { - s->bl_tree[REPZ_11_138].Freq++; - } - count = 0; prevlen = curlen; - if (nextlen == 0) { - max_count = 138, min_count = 3; - } else if (curlen == nextlen) { - max_count = 6, min_count = 3; - } else { - max_count = 7, min_count = 4; - } - } -} - -/* =========================================================================== - * Send a literal or distance tree in compressed form, using the codes in - * bl_tree. - * - * ct_data *tree; the tree to be scanned - * int max_code; and its largest code of non zero frequency - */ -static void -send_tree(deflate_state *s, ct_data *tree, int max_code) -{ - int n; /* iterates over all tree elements */ - int prevlen = -1; /* last emitted length */ - int curlen; /* length of current code */ - int nextlen = tree[0].Len; /* length of next code */ - int count = 0; /* repeat count of the current code */ - int max_count = 7; /* max repeat count */ - int min_count = 4; /* min repeat count */ - - /* tree[max_code+1].Len = -1; */ /* guard already set */ - if (nextlen == 0) max_count = 138, min_count = 3; - - for (n = 0; n <= max_code; n++) { - curlen = nextlen; nextlen = tree[n+1].Len; - if (++count < max_count && curlen == nextlen) { - continue; - } else if (count < min_count) { - do { send_code(s, curlen, s->bl_tree); } while (--count != 0); - - } else if (curlen != 0) { - if (curlen != prevlen) { - send_code(s, curlen, s->bl_tree); count--; - } - Assert(count >= 3 && count <= 6, " 3_6?"); - send_code(s, REP_3_6, s->bl_tree); send_bits(s, count-3, 2); - - } else if (count <= 10) { - send_code(s, REPZ_3_10, s->bl_tree); send_bits(s, count-3, 3); - - } else { - send_code(s, REPZ_11_138, s->bl_tree); send_bits(s, count-11, 7); - } - count = 0; prevlen = curlen; - if (nextlen == 0) { - max_count = 138, min_count = 3; - } else if (curlen == nextlen) { - max_count = 6, min_count = 3; - } else { - max_count = 7, min_count = 4; - } - } -} - -/* =========================================================================== - * Construct the Huffman tree for the bit lengths and return the index in - * bl_order of the last bit length code to send. - */ -static int -build_bl_tree(deflate_state *s) -{ - int max_blindex; /* index of last bit length code of non zero freq */ - - /* Determine the bit length frequencies for literal and distance trees */ - scan_tree(s, (ct_data *)s->dyn_ltree, s->l_desc.max_code); - scan_tree(s, (ct_data *)s->dyn_dtree, s->d_desc.max_code); - - /* Build the bit length tree: */ - build_tree(s, (tree_desc *)(&(s->bl_desc))); - /* opt_len now includes the length of the tree representations, except - * the lengths of the bit lengths codes and the 5+5+4 bits for the counts. - */ - - /* Determine the number of bit length codes to send. The pkzip format - * requires that at least 4 bit length codes be sent. (appnote.txt says - * 3 but the actual value used is 4.) - */ - for (max_blindex = BL_CODES-1; max_blindex >= 3; max_blindex--) { - if (s->bl_tree[bl_order[max_blindex]].Len != 0) break; - } - /* Update opt_len to include the bit length tree and counts */ - s->opt_len += 3*(max_blindex+1) + 5+5+4; - Tracev((stderr, "\ndyn trees: dyn %ld, stat %ld", - s->opt_len, s->static_len)); - - return max_blindex; -} - -/* =========================================================================== - * Send the header for a block using dynamic Huffman trees: the counts, the - * lengths of the bit length codes, the literal tree and the distance tree. - * IN assertion: lcodes >= 257, dcodes >= 1, blcodes >= 4. - */ -static void -send_all_trees(deflate_state *s, int lcodes, int dcodes, int blcodes) -{ - int rank; /* index in bl_order */ - - Assert (lcodes >= 257 && dcodes >= 1 && blcodes >= 4, "not enough codes"); - Assert (lcodes <= L_CODES && dcodes <= D_CODES && blcodes <= BL_CODES, - "too many codes"); - Tracev((stderr, "\nbl counts: ")); - send_bits(s, lcodes-257, 5); /* not +255 as stated in appnote.txt */ - send_bits(s, dcodes-1, 5); - send_bits(s, blcodes-4, 4); /* not -3 as stated in appnote.txt */ - for (rank = 0; rank < blcodes; rank++) { - Tracev((stderr, "\nbl code %2d ", bl_order[rank])); - send_bits(s, s->bl_tree[bl_order[rank]].Len, 3); - } - Tracev((stderr, "\nbl tree: sent %ld", s->bits_sent)); - - send_tree(s, (ct_data *)s->dyn_ltree, lcodes-1); /* literal tree */ - Tracev((stderr, "\nlit tree: sent %ld", s->bits_sent)); - - send_tree(s, (ct_data *)s->dyn_dtree, dcodes-1); /* distance tree */ - Tracev((stderr, "\ndist tree: sent %ld", s->bits_sent)); -} - -/* =========================================================================== - * Send a stored block - * - * charf *buf; input block - * ulg stored_len; length of input block - * int eof; true if this is the last block for a file - */ -static void -_tr_stored_block(deflate_state *s, charf *buf, ulg stored_len, int eof) -{ - send_bits(s, (STORED_BLOCK<<1)+eof, 3); /* send block type */ -#ifdef DEBUG_ZLIB - s->compressed_len = (s->compressed_len + 3 + 7) & (ulg)~7L; - s->compressed_len += (stored_len + 4) << 3; -#endif - copy_block(s, buf, (unsigned)stored_len, 1); /* with header */ -} - -/* =========================================================================== - * Send one empty static block to give enough lookahead for inflate. - * This takes 10 bits, of which 7 may remain in the bit buffer. - * The current inflate code requires 9 bits of lookahead. If the - * last two codes for the previous block (real code plus EOB) were coded - * on 5 bits or less, inflate may have only 5+3 bits of lookahead to decode - * the last real code. In this case we send two empty static blocks instead - * of one. (There are no problems if the previous block is stored or fixed.) - * To simplify the code, we assume the worst case of last real code encoded - * on one bit only. - */ -static void -_tr_align(deflate_state *s) -{ - send_bits(s, STATIC_TREES<<1, 3); - send_code(s, END_BLOCK, static_ltree); -#ifdef DEBUG_ZLIB - s->compressed_len += 10L; /* 3 for block type, 7 for EOB */ -#endif - bi_flush(s); - /* Of the 10 bits for the empty block, we have already sent - * (10 - bi_valid) bits. The lookahead for the last real code (before - * the EOB of the previous block) was thus at least one plus the length - * of the EOB plus what we have just sent of the empty static block. - */ - if (1 + s->last_eob_len + 10 - s->bi_valid < 9) { - send_bits(s, STATIC_TREES<<1, 3); - send_code(s, END_BLOCK, static_ltree); -#ifdef DEBUG_ZLIB - s->compressed_len += 10L; -#endif - bi_flush(s); - } - s->last_eob_len = 7; -} - -/* =========================================================================== - * Determine the best encoding for the current block: dynamic trees, static - * trees or store, and output the encoded block to the zip file. - * - * charf *buf; input block, or NULL if too old - * ulg stored_len; length of input block - * int eof; true if this is the last block for a file - */ -static void -_tr_flush_block(deflate_state *s, charf *buf, ulg stored_len, int eof) -{ - ulg opt_lenb, static_lenb; /* opt_len and static_len in bytes */ - int max_blindex = 0; /* index of last bit length code of non zero freq */ - - /* Build the Huffman trees unless a stored block is forced */ - if (s->level > 0) { - - /* Check if the file is ascii or binary */ - if (s->data_type == Z_UNKNOWN) set_data_type(s); - - /* Construct the literal and distance trees */ - build_tree(s, (tree_desc *)(&(s->l_desc))); - Tracev((stderr, "\nlit data: dyn %ld, stat %ld", s->opt_len, - s->static_len)); - - build_tree(s, (tree_desc *)(&(s->d_desc))); - Tracev((stderr, "\ndist data: dyn %ld, stat %ld", s->opt_len, - s->static_len)); - /* At this point, opt_len and static_len are the total bit lengths of - * the compressed block data, excluding the tree representations. - */ - - /* Build the bit length tree for the above two trees, and get the index - * in bl_order of the last bit length code to send. - */ - max_blindex = build_bl_tree(s); - - /* Determine the best encoding. Compute first the block length in bytes*/ - opt_lenb = (s->opt_len+3+7)>>3; - static_lenb = (s->static_len+3+7)>>3; - - Tracev((stderr, "\nopt %lu(%lu) stat %lu(%lu) stored %lu lit %u ", - opt_lenb, s->opt_len, static_lenb, s->static_len, stored_len, - s->last_lit)); - - if (static_lenb <= opt_lenb) opt_lenb = static_lenb; - - } else { - Assert(buf != (char*)0, "lost buf"); - opt_lenb = static_lenb = stored_len + 5; /* force a stored block */ - } - -#ifdef FORCE_STORED - if (buf != (char*)0) { /* force stored block */ -#else - if (stored_len+4 <= opt_lenb && buf != (char*)0) { - /* 4: two words for the lengths */ -#endif - /* The test buf != NULL is only necessary if LIT_BUFSIZE > WSIZE. - * Otherwise we can't have processed more than WSIZE input bytes since - * the last block flush, because compression would have been - * successful. If LIT_BUFSIZE <= WSIZE, it is never too late to - * transform a block into a stored block. - */ - _tr_stored_block(s, buf, stored_len, eof); - -#ifdef FORCE_STATIC - } else if (static_lenb >= 0) { /* force static trees */ -#else - } else if (static_lenb == opt_lenb) { -#endif - send_bits(s, (STATIC_TREES<<1)+eof, 3); - compress_block(s, (ct_data *)static_ltree, (ct_data *)static_dtree); -#ifdef DEBUG_ZLIB - s->compressed_len += 3 + s->static_len; -#endif - } else { - send_bits(s, (DYN_TREES<<1)+eof, 3); - send_all_trees(s, s->l_desc.max_code+1, s->d_desc.max_code+1, - max_blindex+1); - compress_block(s, (ct_data *)s->dyn_ltree, (ct_data *)s->dyn_dtree); -#ifdef DEBUG_ZLIB - s->compressed_len += 3 + s->opt_len; -#endif - } - Assert (s->compressed_len == s->bits_sent, "bad compressed size"); - /* The above check is made mod 2^32, for files larger than 512 MB - * and uLong implemented on 32 bits. - */ - init_block(s); - - if (eof) { - bi_windup(s); -#ifdef DEBUG_ZLIB - s->compressed_len += 7; /* align on byte boundary */ -#endif - } - Tracev((stderr,"\ncomprlen %lu(%lu) ", s->compressed_len>>3, - s->compressed_len-7*eof)); -} - -/* =========================================================================== - * Save the match info and tally the frequency counts. Return true if - * the current block must be flushed. - * - * unsigned dist; distance of matched string - * unsigned lc; match length-MIN_MATCH or unmatched char (if dist==0) - */ -static int -_tr_tally(deflate_state *s, unsigned int dist, unsigned int lc) -{ - s->d_buf[s->last_lit] = (ush)dist; - s->l_buf[s->last_lit++] = (uch)lc; - if (dist == 0) { - /* lc is the unmatched char */ - s->dyn_ltree[lc].Freq++; - } else { - s->matches++; - /* Here, lc is the match length - MIN_MATCH */ - dist--; /* dist = match distance - 1 */ - Assert((ush)dist < (ush)MAX_DIST(s) && - (ush)lc <= (ush)(MAX_MATCH-MIN_MATCH) && - (ush)d_code(dist) < (ush)D_CODES, "_tr_tally: bad match"); - - s->dyn_ltree[_length_code[lc]+LITERALS+1].Freq++; - s->dyn_dtree[d_code(dist)].Freq++; - } - -#ifdef TRUNCATE_BLOCK - /* Try to guess if it is profitable to stop the current block here */ - if ((s->last_lit & 0x1fff) == 0 && s->level > 2) { - /* Compute an upper bound for the compressed length */ - ulg out_length = (ulg)s->last_lit*8L; - ulg in_length = (ulg)((long)s->strstart - s->block_start); - int dcode; - for (dcode = 0; dcode < D_CODES; dcode++) { - out_length += (ulg)s->dyn_dtree[dcode].Freq * - (5L+extra_dbits[dcode]); - } - out_length >>= 3; - Tracev((stderr,"\nlast_lit %u, in %ld, out ~%ld(%ld%%) ", - s->last_lit, in_length, out_length, - 100L - out_length*100L/in_length)); - if (s->matches < s->last_lit/2 && out_length < in_length/2) return 1; - } -#endif - return (s->last_lit == s->lit_bufsize-1); - /* We avoid equality with lit_bufsize because of wraparound at 64K - * on 16 bit machines and because stored blocks are restricted to - * 64K-1 bytes. - */ -} - -/* =========================================================================== - * Send the block data compressed using the given Huffman trees - * - * ct_data *ltree; literal tree - * ct_data *dtree; distance tree - */ -static void -compress_block(deflate_state *s, ct_data *ltree, ct_data *dtree) -{ - unsigned dist; /* distance of matched string */ - int lc; /* match length or unmatched char (if dist == 0) */ - unsigned lx = 0; /* running index in l_buf */ - unsigned code; /* the code to send */ - int extra; /* number of extra bits to send */ - - if (s->last_lit != 0) do { - dist = s->d_buf[lx]; - lc = s->l_buf[lx++]; - if (dist == 0) { - send_code(s, lc, ltree); /* send a literal byte */ - Tracecv(isgraph(lc), (stderr," '%c' ", lc)); - } else { - /* Here, lc is the match length - MIN_MATCH */ - code = _length_code[lc]; - send_code(s, code+LITERALS+1, ltree); /* send the length code */ - extra = extra_lbits[code]; - if (extra != 0) { - lc -= base_length[code]; - send_bits(s, lc, extra); /* send the extra length bits */ - } - dist--; /* dist is now the match distance - 1 */ - code = d_code(dist); - Assert (code < D_CODES, "bad d_code"); - - send_code(s, code, dtree); /* send the distance code */ - extra = extra_dbits[code]; - if (extra != 0) { - dist -= base_dist[code]; - send_bits(s, dist, extra); /* send the extra distance bits */ - } - } /* literal or match pair ? */ - - /* Check that the overlay between pending_buf and d_buf+l_buf is ok: */ - Assert(s->pending < s->lit_bufsize + 2*lx, "pendingBuf overflow"); - - } while (lx < s->last_lit); - - send_code(s, END_BLOCK, ltree); - s->last_eob_len = ltree[END_BLOCK].Len; -} - -/* =========================================================================== - * Set the data type to ASCII or BINARY, using a crude approximation: - * binary if more than 20% of the bytes are <= 6 or >= 128, ascii otherwise. - * IN assertion: the fields freq of dyn_ltree are set and the total of all - * frequencies does not exceed 64K (to fit in an int on 16 bit machines). - */ -static void -set_data_type(deflate_state *s) -{ - int n = 0; - unsigned ascii_freq = 0; - unsigned bin_freq = 0; - while (n < 7) bin_freq += s->dyn_ltree[n++].Freq; - while (n < 128) ascii_freq += s->dyn_ltree[n++].Freq; - while (n < LITERALS) bin_freq += s->dyn_ltree[n++].Freq; - s->data_type = (Byte)(bin_freq > (ascii_freq >> 2) ? Z_BINARY : Z_ASCII); -} - -/* =========================================================================== - * Reverse the first len bits of a code, using straightforward code (a faster - * method would use a table) - * IN assertion: 1 <= len <= 15 - */ -static unsigned -bi_reverse(unsigned code, int len) -{ - unsigned res = 0; - do { - res |= code & 1; - code >>= 1, res <<= 1; - } while (--len > 0); - return res >> 1; -} - -/* =========================================================================== - * Flush the bit buffer, keeping at most 7 bits in it. - */ -static void -bi_flush(deflate_state *s) -{ - if (s->bi_valid == 16) { - put_short(s, s->bi_buf); - s->bi_buf = 0; - s->bi_valid = 0; - } else if (s->bi_valid >= 8) { - put_byte(s, (Byte)s->bi_buf); - s->bi_buf >>= 8; - s->bi_valid -= 8; - } -} - -/* =========================================================================== - * Flush the bit buffer and align the output on a byte boundary - */ -static void -bi_windup(deflate_state *s) -{ - if (s->bi_valid > 8) { - put_short(s, s->bi_buf); - } else if (s->bi_valid > 0) { - put_byte(s, (Byte)s->bi_buf); - } - s->bi_buf = 0; - s->bi_valid = 0; -#ifdef DEBUG_ZLIB - s->bits_sent = (s->bits_sent+7) & ~7; -#endif -} - -/* =========================================================================== - * Copy a stored block, storing first the length and its - * one's complement if requested. - * - * charf *buf; the input data - * unsigned len; its length - * int header; true if block header must be written - */ -static void -copy_block(deflate_state *s, charf *buf, unsigned int len, int header) -{ - bi_windup(s); /* align on byte boundary */ - s->last_eob_len = 8; /* enough lookahead for inflate */ - - if (header) { - put_short(s, (ush)len); - put_short(s, (ush)~len); -#ifdef DEBUG_ZLIB - s->bits_sent += 2*16; -#endif - } -#ifdef DEBUG_ZLIB - s->bits_sent += (ulg)len<<3; -#endif - while (len--) { - put_byte(s, *buf++); - } -} -/* --- trees.c */ - -/* +++ inflate.c */ -/* inflate.c -- zlib interface to inflate modules - * Copyright (C) 1995-2002 Mark Adler - * For conditions of distribution and use, see copyright notice in zlib.h - */ - -/* #include "zutil.h" */ - -/* +++ infblock.h */ -/* infblock.h -- header to use infblock.c - * Copyright (C) 1995-2002 Mark Adler - * For conditions of distribution and use, see copyright notice in zlib.h - */ - -/* WARNING: this file should *not* be used by applications. It is - part of the implementation of the compression library and is - subject to change. Applications should only use zlib.h. - */ - -struct inflate_blocks_state; -typedef struct inflate_blocks_state FAR inflate_blocks_statef; - -static inflate_blocks_statef * inflate_blocks_new OF(( - z_streamp z, - check_func c, /* check function */ - uInt w)); /* window size */ - -static int inflate_blocks OF(( - inflate_blocks_statef *, - z_streamp , - int)); /* initial return code */ - -static void inflate_blocks_reset OF(( - inflate_blocks_statef *, - z_streamp , - uLongf *)); /* check value on output */ - -static int inflate_blocks_free OF(( - inflate_blocks_statef *, - z_streamp)); - -static void inflate_set_dictionary OF(( - inflate_blocks_statef *s, - const Bytef *d, /* dictionary */ - uInt n)); /* dictionary length */ - -static int inflate_blocks_sync_point OF(( - inflate_blocks_statef *s)); -/* --- infblock.h */ - -#ifndef NO_DUMMY_DECL -struct inflate_blocks_state {int dummy;}; /* for buggy compilers */ -#endif - -/* inflate private state */ -typedef struct inflate_state { - - /* mode */ - enum { - METHOD, /* waiting for method byte */ - FLAG, /* waiting for flag byte */ - DICT4, /* four dictionary check bytes to go */ - DICT3, /* three dictionary check bytes to go */ - DICT2, /* two dictionary check bytes to go */ - DICT1, /* one dictionary check byte to go */ - DICT0, /* waiting for inflateSetDictionary */ - BLOCKS, /* decompressing blocks */ - CHECK4, /* four check bytes to go */ - CHECK3, /* three check bytes to go */ - CHECK2, /* two check bytes to go */ - CHECK1, /* one check byte to go */ - DONE, /* finished check, done */ - BAD} /* got an error--stay here */ - mode; /* current inflate mode */ - - /* mode dependent information */ - union { - uInt method; /* if FLAGS, method byte */ - struct { - uLong was; /* computed check value */ - uLong need; /* stream check value */ - } check; /* if CHECK, check values to compare */ - uInt marker; /* if BAD, inflateSync's marker bytes count */ - } sub; /* submode */ - - /* mode independent information */ - int nowrap; /* flag for no wrapper */ - uInt wbits; /* log2(window size) (8..15, defaults to 15) */ - inflate_blocks_statef - *blocks; /* current inflate_blocks state */ - -}inflate_state; - - -int ZEXPORT -inflateReset(z_streamp z) -{ - inflate_state* s; - if (z == Z_NULL || z->state == Z_NULL) - return Z_STREAM_ERROR; - - s = (inflate_state*)z->state; - z->total_in = z->total_out = 0; - z->msg = Z_NULL; - s->mode = s->nowrap ? BLOCKS : METHOD; - inflate_blocks_reset(s->blocks, z, Z_NULL); - Tracev((stderr, "inflate: reset\n")); - return Z_OK; -} - - -int ZEXPORT -inflateEnd(z_streamp z) -{ - if (z == Z_NULL || z->state == Z_NULL || z->zfree == Z_NULL) - return Z_STREAM_ERROR; - if (((inflate_state*)z->state)->blocks != Z_NULL) - inflate_blocks_free(((inflate_state*)z->state)->blocks, z); - ZFREE(z, z->state); - z->state = Z_NULL; - Tracev((stderr, "inflate: end\n")); - return Z_OK; -} - - -int ZEXPORT -inflateInit2_(z_streamp z, int w, const char *ver, int stream_size) -{ - inflate_state* s; - if (ver == Z_NULL || ver[0] != ZLIB_VERSION[0] || - stream_size != sizeof(z_stream)) - return Z_VERSION_ERROR; - - /* initialize state */ - if (z == Z_NULL) - return Z_STREAM_ERROR; - z->msg = Z_NULL; -#ifndef NO_ZCFUNCS - if (z->zalloc == Z_NULL) - { - z->zalloc = zcalloc; - z->opaque = (voidpf)0; - } - if (z->zfree == Z_NULL) z->zfree = zcfree; -#endif - if ((z->state = (struct internal_state FAR *) - ZALLOC(z,1,sizeof(struct inflate_state))) == Z_NULL) - return Z_MEM_ERROR; - s = (inflate_state*)z->state; - s->blocks = Z_NULL; - - /* handle undocumented nowrap option (no zlib header or check) */ - s->nowrap = 0; - if (w < 0) - { - w = - w; - s->nowrap = 1; - } - - /* set window size */ - if (w < 8 || w > 15) - { - inflateEnd(z); - return Z_STREAM_ERROR; - } - s->wbits = (uInt)w; - - /* create inflate_blocks state */ - if ((s->blocks = - inflate_blocks_new(z, s->nowrap ? Z_NULL : adler32, (uInt)1 << w)) - == Z_NULL) - { - inflateEnd(z); - return Z_MEM_ERROR; - } - Tracev((stderr, "inflate: allocated\n")); - - /* reset state */ - inflateReset(z); - return Z_OK; -} - - -int ZEXPORT -inflateInit_(z_streamp z, const char *ver, int stream_size) -{ - return inflateInit2_(z, DEF_WBITS, ver, stream_size); -} - - -#define NEEDBYTE {if(z->avail_in==0)return r;r=f;} -#define NEXTBYTE (z->avail_in--,z->total_in++,*z->next_in++) - -int ZEXPORT -inflate(z_streamp z, int f) -{ - int r; - uInt b; - inflate_state* s; - - if (z == Z_NULL || z->state == Z_NULL || z->next_in == Z_NULL) - return Z_STREAM_ERROR; - f = f == Z_FINISH ? Z_BUF_ERROR : Z_OK; - r = Z_BUF_ERROR; - s = (inflate_state*)z->state; - while (1) switch (s->mode) - { - case METHOD: - NEEDBYTE - if (((s->sub.method = NEXTBYTE) & 0xf) != Z_DEFLATED) - { - s->mode = BAD; - z->msg = (char*)"unknown compression method"; - s->sub.marker = 5; /* can't try inflateSync */ - break; - } - if ((s->sub.method >> 4) + 8 > s->wbits) - { - s->mode = BAD; - z->msg = (char*)"invalid window size"; - s->sub.marker = 5; /* can't try inflateSync */ - break; - } - s->mode = FLAG; - case FLAG: - NEEDBYTE - b = NEXTBYTE; - if (((s->sub.method << 8) + b) % 31) - { - s->mode = BAD; - z->msg = (char*)"incorrect header check"; - s->sub.marker = 5; /* can't try inflateSync */ - break; - } - Tracev((stderr, "inflate: zlib header ok\n")); - if (!(b & PRESET_DICT)) - { - s->mode = BLOCKS; - break; - } - s->mode = DICT4; - case DICT4: - NEEDBYTE - s->sub.check.need = (uLong)NEXTBYTE << 24; - s->mode = DICT3; - case DICT3: - NEEDBYTE - s->sub.check.need += (uLong)NEXTBYTE << 16; - s->mode = DICT2; - case DICT2: - NEEDBYTE - s->sub.check.need += (uLong)NEXTBYTE << 8; - s->mode = DICT1; - case DICT1: - NEEDBYTE - s->sub.check.need += (uLong)NEXTBYTE; - z->adler = s->sub.check.need; - s->mode = DICT0; - return Z_NEED_DICT; - case DICT0: - s->mode = BAD; - z->msg = (char*)"need dictionary"; - s->sub.marker = 0; /* can try inflateSync */ - return Z_STREAM_ERROR; - case BLOCKS: - r = inflate_blocks(s->blocks, z, r); - if (r == Z_DATA_ERROR) - { - s->mode = BAD; - s->sub.marker = 0; /* can try inflateSync */ - break; - } - if (r == Z_OK) - r = f; - if (r != Z_STREAM_END) - return r; - r = f; - inflate_blocks_reset(s->blocks, z, &s->sub.check.was); - if (s->nowrap) - { - s->mode = DONE; - break; - } - s->mode = CHECK4; - case CHECK4: - NEEDBYTE - s->sub.check.need = (uLong)NEXTBYTE << 24; - s->mode = CHECK3; - case CHECK3: - NEEDBYTE - s->sub.check.need += (uLong)NEXTBYTE << 16; - s->mode = CHECK2; - case CHECK2: - NEEDBYTE - s->sub.check.need += (uLong)NEXTBYTE << 8; - s->mode = CHECK1; - case CHECK1: - NEEDBYTE - s->sub.check.need += (uLong)NEXTBYTE; - - if (s->sub.check.was != s->sub.check.need) - { - s->mode = BAD; - z->msg = (char*)"incorrect data check"; - s->sub.marker = 5; /* can't try inflateSync */ - break; - } - Tracev((stderr, "inflate: zlib check ok\n")); - s->mode = DONE; - case DONE: - return Z_STREAM_END; - case BAD: - return Z_DATA_ERROR; - default: - return Z_STREAM_ERROR; - } -#ifdef NEED_DUMMY_RETURN - return Z_STREAM_ERROR; /* Some dumb compilers complain without this */ -#endif -} - - -int ZEXPORT inflateSetDictionary(z, dictionary, dictLength) -z_streamp z; -const Bytef *dictionary; -uInt dictLength; -{ - uInt length = dictLength; - inflate_state* s; - - if (z == Z_NULL || z->state == Z_NULL || ((inflate_state*)z->state)->mode != DICT0) - return Z_STREAM_ERROR; - s = (inflate_state*)z->state; - - if (adler32(1L, dictionary, dictLength) != z->adler) return Z_DATA_ERROR; - z->adler = 1L; - - if (length >= ((uInt)1<wbits)) - { - length = (1<wbits)-1; - dictionary += dictLength - length; - } - inflate_set_dictionary(s->blocks, dictionary, length); - s->mode = BLOCKS; - return Z_OK; -} - - -int ZEXPORT inflateSync(z) -z_streamp z; -{ - uInt n; /* number of bytes to look at */ - Bytef *p; /* pointer to bytes */ - uInt m; /* number of marker bytes found in a row */ - uLong r, w; /* temporaries to save total_in and total_out */ - inflate_state* s; - - /* set up */ - if (z == Z_NULL || z->state == Z_NULL) - return Z_STREAM_ERROR; - s = (inflate_state*)z->state; - if (s->mode != BAD) - { - s->mode = BAD; - s->sub.marker = 0; - } - if ((n = z->avail_in) == 0) - return Z_BUF_ERROR; - p = z->next_in; - m = s->sub.marker; - - /* search */ - while (n && m < 4) - { - static const Byte mark[4] = {0, 0, 0xff, 0xff}; - if (*p == mark[m]) - m++; - else if (*p) - m = 0; - else - m = 4 - m; - p++, n--; - } - - /* restore */ - z->total_in += p - z->next_in; - z->next_in = p; - z->avail_in = n; - s->sub.marker = m; - - /* return no joy or set up to restart on a new block */ - if (m != 4) - return Z_DATA_ERROR; - r = z->total_in; w = z->total_out; - inflateReset(z); - z->total_in = r; z->total_out = w; - s->mode = BLOCKS; - return Z_OK; -} - - -/* Returns true if inflate is currently at the end of a block generated - * by Z_SYNC_FLUSH or Z_FULL_FLUSH. This function is used by one PPP - * implementation to provide an additional safety check. PPP uses Z_SYNC_FLUSH - * but removes the length bytes of the resulting empty stored block. When - * decompressing, PPP checks that at the end of input packet, inflate is - * waiting for these length bytes. - */ -int ZEXPORT inflateSyncPoint(z) -z_streamp z; -{ - if (z == Z_NULL || z->state == Z_NULL || ((inflate_state*)z->state)->blocks == Z_NULL) - return Z_STREAM_ERROR; - return inflate_blocks_sync_point(((inflate_state*)z->state)->blocks); -} -#undef NEEDBYTE -#undef NEXTBYTE -/* --- inflate.c */ - -/* +++ infblock.c */ -/* infblock.c -- interpret and process block types to last block - * Copyright (C) 1995-2002 Mark Adler - * For conditions of distribution and use, see copyright notice in zlib.h - */ - -/* #include "zutil.h" */ -/* #include "infblock.h" */ - -/* +++ inftrees.h */ -/* inftrees.h -- header to use inftrees.c - * Copyright (C) 1995-2002 Mark Adler - * For conditions of distribution and use, see copyright notice in zlib.h - */ - -/* WARNING: this file should *not* be used by applications. It is - part of the implementation of the compression library and is - subject to change. Applications should only use zlib.h. - */ - -/* Huffman code lookup table entry--this entry is four bytes for machines - that have 16-bit pointers (e.g. PC's in the small or medium model). */ - -typedef struct inflate_huft_s FAR inflate_huft; - -struct inflate_huft_s { - union { - struct { - Byte Exop; /* number of extra bits or operation */ - Byte Bits; /* number of bits in this code or subcode */ - } what; - uInt pad; /* pad structure to a power of 2 (4 bytes for */ - } word; /* 16-bit, 8 bytes for 32-bit int's) */ - uInt base; /* literal, length base, distance base, - or table offset */ -}; - -/* Maximum size of dynamic tree. The maximum found in a long but non- - exhaustive search was 1004 huft structures (850 for length/literals - and 154 for distances, the latter actually the result of an - exhaustive search). The actual maximum is not known, but the - value below is more than safe. */ -#define MANY 1440 - -static int inflate_trees_bits OF(( - uIntf *, /* 19 code lengths */ - uIntf *, /* bits tree desired/actual depth */ - inflate_huft * FAR *, /* bits tree result */ - inflate_huft *, /* space for trees */ - z_streamp)); /* for messages */ - -static int inflate_trees_dynamic OF(( - uInt, /* number of literal/length codes */ - uInt, /* number of distance codes */ - uIntf *, /* that many (total) code lengths */ - uIntf *, /* literal desired/actual bit depth */ - uIntf *, /* distance desired/actual bit depth */ - inflate_huft * FAR *, /* literal/length tree result */ - inflate_huft * FAR *, /* distance tree result */ - inflate_huft *, /* space for trees */ - z_streamp)); /* for messages */ - -static int inflate_trees_fixed OF(( - uIntf *, /* literal desired/actual bit depth */ - uIntf *, /* distance desired/actual bit depth */ - inflate_huft * FAR *, /* literal/length tree result */ - inflate_huft * FAR *, /* distance tree result */ - z_streamp)); /* for memory allocation */ -/* --- inftrees.h */ - -/* +++ infcodes.h */ -/* infcodes.h -- header to use infcodes.c - * Copyright (C) 1995-2002 Mark Adler - * For conditions of distribution and use, see copyright notice in zlib.h - */ - -/* WARNING: this file should *not* be used by applications. It is - part of the implementation of the compression library and is - subject to change. Applications should only use zlib.h. - */ - -struct inflate_codes_state; -typedef struct inflate_codes_state FAR inflate_codes_statef; - -static inflate_codes_statef *inflate_codes_new OF(( - uInt, uInt, - inflate_huft *, inflate_huft *, - z_streamp )); - -static int inflate_codes OF(( - inflate_blocks_statef *, - z_streamp , - int)); - -static void inflate_codes_free OF(( - inflate_codes_statef *, - z_streamp )); - -/* --- infcodes.h */ - -/* +++ infutil.h */ -/* infutil.h -- types and macros common to blocks and codes - * Copyright (C) 1995-2002 Mark Adler - * For conditions of distribution and use, see copyright notice in zlib.h - */ - -/* WARNING: this file should *not* be used by applications. It is - part of the implementation of the compression library and is - subject to change. Applications should only use zlib.h. - */ - -#ifndef _INFUTIL_H -#define _INFUTIL_H - -typedef enum { - TYPE, /* get type bits (3, including end bit) */ - LENS, /* get lengths for stored */ - STORED, /* processing stored block */ - TABLE, /* get table lengths */ - BTREE, /* get bit lengths tree for a dynamic block */ - DTREE, /* get length, distance trees for a dynamic block */ - CODES, /* processing fixed or dynamic block */ - DRY, /* output remaining window bytes */ - DONEB, /* finished last block, done */ - BADB} /* got a data error--stuck here */ -inflate_block_mode; - -/* inflate blocks semi-private state */ -struct inflate_blocks_state { - - /* mode */ - inflate_block_mode mode; /* current inflate_block mode */ - - /* mode dependent information */ - union { - uInt left; /* if STORED, bytes left to copy */ - struct { - uInt table; /* table lengths (14 bits) */ - uInt index; /* index into blens (or border) */ - uIntf *blens; /* bit lengths of codes */ - uInt bb; /* bit length tree depth */ - inflate_huft *tb; /* bit length decoding tree */ - } trees; /* if DTREE, decoding info for trees */ - struct { - inflate_codes_statef - *codes; - } decode; /* if CODES, current state */ - } sub; /* submode */ - uInt last; /* true if this block is the last block */ - - /* mode independent information */ - uInt bitk; /* bits in bit buffer */ - uLong bitb; /* bit buffer */ - inflate_huft *hufts; /* single malloc for tree space */ - Bytef *window; /* sliding window */ - Bytef *end; /* one byte after sliding window */ - Bytef *read; /* window read pointer */ - Bytef *write; /* window write pointer */ - check_func checkfn; /* check function */ - uLong check; /* check on output */ - -}; - - -/* defines for inflate input/output */ -/* update pointers and return */ -#define UPDBITS {s->bitb=b;s->bitk=k;} -#define UPDIN {z->avail_in=n;z->total_in+=p-z->next_in;z->next_in=p;} -#define UPDOUT {s->write=q;} -#define UPDATE {UPDBITS UPDIN UPDOUT} -#define LEAVE {UPDATE return inflate_flush(s,z,r);} -/* get bytes and bits */ -#define LOADIN {p=z->next_in;n=z->avail_in;b=s->bitb;k=s->bitk;} -#define NEEDBYTE {if(n)r=Z_OK;else LEAVE} -#define NEXTBYTE (n--,*p++) -#define NEEDBITS(j) {while(k<(j)){NEEDBYTE;b|=((uLong)NEXTBYTE)<>=(j);k-=(j);} -/* output bytes */ -#define WAVAIL (uInt)(qread?s->read-q-1:s->end-q) -#define LOADOUT {q=s->write;m=(uInt)WAVAIL;} -#define WRAP {if(q==s->end&&s->read!=s->window){q=s->window;m=(uInt)WAVAIL;}} -#define FLUSH {UPDOUT r=inflate_flush(s,z,r); LOADOUT} -#define NEEDOUT {if(m==0){WRAP if(m==0){FLUSH WRAP if(m==0) LEAVE}}r=Z_OK;} -#define OUTBYTE(a) {*q++=(Byte)(a);m--;} -/* load local pointers */ -#define LOAD {LOADIN LOADOUT} - -/* masks for lower bits (size given to avoid silly warnings with Visual C++) */ -/* And'ing with mask[n] masks the lower n bits */ -static uInt inflate_mask[17] = { - 0x0000, - 0x0001, 0x0003, 0x0007, 0x000f, 0x001f, 0x003f, 0x007f, 0x00ff, - 0x01ff, 0x03ff, 0x07ff, 0x0fff, 0x1fff, 0x3fff, 0x7fff, 0xffff -}; - -/* copy as much as possible from the sliding window to the output area */ -static int inflate_flush OF(( - inflate_blocks_statef *, - z_streamp , - int)); - -#ifndef NO_DUMMY_DECL -struct internal_state {int dummy;}; /* for buggy compilers */ -#endif - -#endif -/* --- infutil.h */ - -#ifndef NO_DUMMY_DECL -struct inflate_codes_state {int dummy;}; /* for buggy compilers */ -#endif - -/* simplify the use of the inflate_huft type with some defines */ -#define exop word.what.Exop -#define bits word.what.Bits - -/* Table for deflate from PKZIP's appnote.txt. */ -static const uInt border[] = { /* Order of the bit length code lengths */ - 16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15}; - -/* - Notes beyond the 1.93a appnote.txt: - - 1. Distance pointers never point before the beginning of the output - stream. - 2. Distance pointers can point back across blocks, up to 32k away. - 3. There is an implied maximum of 7 bits for the bit length table and - 15 bits for the actual data. - 4. If only one code exists, then it is encoded using one bit. (Zero - would be more efficient, but perhaps a little confusing.) If two - codes exist, they are coded using one bit each (0 and 1). - 5. There is no way of sending zero distance codes--a dummy must be - sent if there are none. (History: a pre 2.0 version of PKZIP would - store blocks with no distance codes, but this was discovered to be - too harsh a criterion.) Valid only for 1.93a. 2.04c does allow - zero distance codes, which is sent as one code of zero bits in - length. - 6. There are up to 286 literal/length codes. Code 256 represents the - end-of-block. Note however that the static length tree defines - 288 codes just to fill out the Huffman codes. Codes 286 and 287 - cannot be used though, since there is no length base or extra bits - defined for them. Similarily, there are up to 30 distance codes. - However, static trees define 32 codes (all 5 bits) to fill out the - Huffman codes, but the last two had better not show up in the data. - 7. Unzip can check dynamic Huffman blocks for complete code sets. - The exception is that a single code would not be complete (see #4). - 8. The five bits following the block type is really the number of - literal codes sent minus 257. - 9. Length codes 8,16,16 are interpreted as 13 length codes of 8 bits - (1+6+6). Therefore, to output three times the length, you output - three codes (1+1+1), whereas to output four times the same length, - you only need two codes (1+3). Hmm. - 10. In the tree reconstruction algorithm, Code = Code + Increment - only if BitLength(i) is not zero. (Pretty obvious.) - 11. Correction: 4 Bits: # of Bit Length codes - 4 (4 - 19) - 12. Note: length code 284 can represent 227-258, but length code 285 - really is 258. The last length deserves its own, short code - since it gets used a lot in very redundant files. The length - 258 is special since 258 - 3 (the min match length) is 255. - 13. The literal/length and distance code bit lengths are read as a - single stream of lengths. It is possible (and advantageous) for - a repeat code (16, 17, or 18) to go across the boundary between - the two sets of lengths. - */ - - -static void inflate_blocks_reset(s, z, c) -inflate_blocks_statef *s; -z_streamp z; -uLongf *c; -{ - if (c != Z_NULL) - *c = s->check; - if (s->mode == BTREE || s->mode == DTREE) - ZFREE(z, s->sub.trees.blens); - if (s->mode == CODES) - inflate_codes_free(s->sub.decode.codes, z); - s->mode = TYPE; - s->bitk = 0; - s->bitb = 0; - s->read = s->write = s->window; - if (s->checkfn != Z_NULL) - z->adler = s->check = (*s->checkfn)(0L, (const Bytef *)Z_NULL, 0); - Tracev((stderr, "inflate: blocks reset\n")); -} - - -static inflate_blocks_statef *inflate_blocks_new(z, c, w) -z_streamp z; -check_func c; -uInt w; -{ - inflate_blocks_statef *s; - - if ((s = (inflate_blocks_statef *)ZALLOC - (z,1,sizeof(struct inflate_blocks_state))) == Z_NULL) - return s; - if ((s->hufts = - (inflate_huft *)ZALLOC(z, sizeof(inflate_huft), MANY)) == Z_NULL) - { - ZFREE(z, s); - return Z_NULL; - } - if ((s->window = (Bytef *)ZALLOC(z, 1, w)) == Z_NULL) - { - ZFREE(z, s->hufts); - ZFREE(z, s); - return Z_NULL; - } - s->end = s->window + w; - s->checkfn = c; - s->mode = TYPE; - Tracev((stderr, "inflate: blocks allocated\n")); - inflate_blocks_reset(s, z, Z_NULL); - return s; -} - - -static int inflate_blocks(s, z, r) -inflate_blocks_statef *s; -z_streamp z; -int r; -{ - uInt t; /* temporary storage */ - uLong b; /* bit buffer */ - uInt k; /* bits in bit buffer */ - Bytef *p; /* input data pointer */ - uInt n; /* bytes available there */ - Bytef *q; /* output window write pointer */ - uInt m; /* bytes to end of window or read pointer */ - - /* copy input/output information to locals (UPDATE macro restores) */ - LOAD - - /* process input based on current state */ - while (1) switch (s->mode) - { - case TYPE: - NEEDBITS(3) - t = (uInt)b & 7; - s->last = t & 1; - switch (t >> 1) - { - case 0: /* stored */ - Tracev((stderr, "inflate: stored block%s\n", - s->last ? " (last)" : "")); - DUMPBITS(3) - t = k & 7; /* go to byte boundary */ - DUMPBITS(t) - s->mode = LENS; /* get length of stored block */ - break; - case 1: /* fixed */ - Tracev((stderr, "inflate: fixed codes block%s\n", - s->last ? " (last)" : "")); - { - uInt bl, bd; - inflate_huft *tl, *td; - - inflate_trees_fixed(&bl, &bd, &tl, &td, z); - s->sub.decode.codes = inflate_codes_new(bl, bd, tl, td, z); - if (s->sub.decode.codes == Z_NULL) - { - r = Z_MEM_ERROR; - LEAVE - } - } - DUMPBITS(3) - s->mode = CODES; - break; - case 2: /* dynamic */ - Tracev((stderr, "inflate: dynamic codes block%s\n", - s->last ? " (last)" : "")); - DUMPBITS(3) - s->mode = TABLE; - break; - case 3: /* illegal */ - DUMPBITS(3) - s->mode = BADB; - z->msg = (char*)"invalid block type"; - r = Z_DATA_ERROR; - LEAVE - } - break; - case LENS: - NEEDBITS(32) - if ((((~b) >> 16) & 0xffff) != (b & 0xffff)) - { - s->mode = BADB; - z->msg = (char*)"invalid stored block lengths"; - r = Z_DATA_ERROR; - LEAVE - } - s->sub.left = (uInt)b & 0xffff; - b = k = 0; /* dump bits */ - Tracev((stderr, "inflate: stored length %u\n", s->sub.left)); - s->mode = s->sub.left ? STORED : (s->last ? DRY : TYPE); - break; - case STORED: - if (n == 0) - LEAVE - NEEDOUT - t = s->sub.left; - if (t > n) t = n; - if (t > m) t = m; - zmemcpy(q, p, t); - p += t; n -= t; - q += t; m -= t; - if ((s->sub.left -= t) != 0) - break; - Tracev((stderr, "inflate: stored end, %lu total out\n", - z->total_out + (q >= s->read ? q - s->read : - (s->end - s->read) + (q - s->window)))); - s->mode = s->last ? DRY : TYPE; - break; - case TABLE: - NEEDBITS(14) - s->sub.trees.table = t = (uInt)b & 0x3fff; -#ifndef PKZIP_BUG_WORKAROUND - if ((t & 0x1f) > 29 || ((t >> 5) & 0x1f) > 29) - { - s->mode = BADB; - z->msg = (char*)"too many length or distance symbols"; - r = Z_DATA_ERROR; - LEAVE - } -#endif - t = 258 + (t & 0x1f) + ((t >> 5) & 0x1f); - if ((s->sub.trees.blens = (uIntf*)ZALLOC(z, t, sizeof(uInt))) == Z_NULL) - { - r = Z_MEM_ERROR; - LEAVE - } - DUMPBITS(14) - s->sub.trees.index = 0; - Tracev((stderr, "inflate: table sizes ok\n")); - s->mode = BTREE; - case BTREE: - while (s->sub.trees.index < 4 + (s->sub.trees.table >> 10)) - { - NEEDBITS(3) - s->sub.trees.blens[border[s->sub.trees.index++]] = (uInt)b & 7; - DUMPBITS(3) - } - while (s->sub.trees.index < 19) - s->sub.trees.blens[border[s->sub.trees.index++]] = 0; - s->sub.trees.bb = 7; - t = inflate_trees_bits(s->sub.trees.blens, &s->sub.trees.bb, - &s->sub.trees.tb, s->hufts, z); - if (t != Z_OK) - { - r = t; - if (r == Z_DATA_ERROR) - { - ZFREE(z, s->sub.trees.blens); - s->mode = BADB; - } - LEAVE - } - s->sub.trees.index = 0; - Tracev((stderr, "inflate: bits tree ok\n")); - s->mode = DTREE; - case DTREE: - while (t = s->sub.trees.table, - s->sub.trees.index < 258 + (t & 0x1f) + ((t >> 5) & 0x1f)) - { - inflate_huft *h; - uInt i, j, c; - - t = s->sub.trees.bb; - NEEDBITS(t) - h = s->sub.trees.tb + ((uInt)b & inflate_mask[t]); - t = h->bits; - c = h->base; - if (c < 16) - { - DUMPBITS(t) - s->sub.trees.blens[s->sub.trees.index++] = c; - } - else /* c == 16..18 */ - { - i = c == 18 ? 7 : c - 14; - j = c == 18 ? 11 : 3; - NEEDBITS(t + i) - DUMPBITS(t) - j += (uInt)b & inflate_mask[i]; - DUMPBITS(i) - i = s->sub.trees.index; - t = s->sub.trees.table; - if (i + j > 258 + (t & 0x1f) + ((t >> 5) & 0x1f) || - (c == 16 && i < 1)) - { - ZFREE(z, s->sub.trees.blens); - s->mode = BADB; - z->msg = (char*)"invalid bit length repeat"; - r = Z_DATA_ERROR; - LEAVE - } - c = c == 16 ? s->sub.trees.blens[i - 1] : 0; - do { - s->sub.trees.blens[i++] = c; - } while (--j); - s->sub.trees.index = i; - } - } - s->sub.trees.tb = Z_NULL; - { - uInt bl, bd; - inflate_huft *tl, *td; - inflate_codes_statef *c; - - bl = 9; /* must be <= 9 for lookahead assumptions */ - bd = 6; /* must be <= 9 for lookahead assumptions */ - t = s->sub.trees.table; - t = inflate_trees_dynamic(257 + (t & 0x1f), 1 + ((t >> 5) & 0x1f), - s->sub.trees.blens, &bl, &bd, &tl, &td, - s->hufts, z); - if (t != Z_OK) - { - if (t == (uInt)Z_DATA_ERROR) - { - ZFREE(z, s->sub.trees.blens); - s->mode = BADB; - } - r = t; - LEAVE - } - Tracev((stderr, "inflate: trees ok\n")); - if ((c = inflate_codes_new(bl, bd, tl, td, z)) == Z_NULL) - { - r = Z_MEM_ERROR; - LEAVE - } - s->sub.decode.codes = c; - } - ZFREE(z, s->sub.trees.blens); - s->mode = CODES; - case CODES: - UPDATE - if ((r = inflate_codes(s, z, r)) != Z_STREAM_END) - return inflate_flush(s, z, r); - r = Z_OK; - inflate_codes_free(s->sub.decode.codes, z); - LOAD - Tracev((stderr, "inflate: codes end, %lu total out\n", - z->total_out + (q >= s->read ? q - s->read : - (s->end - s->read) + (q - s->window)))); - if (!s->last) - { - s->mode = TYPE; - break; - } - s->mode = DRY; - case DRY: - FLUSH - if (s->read != s->write) - LEAVE - s->mode = DONEB; - case DONEB: - r = Z_STREAM_END; - LEAVE - case BADB: - r = Z_DATA_ERROR; - LEAVE - default: - r = Z_STREAM_ERROR; - LEAVE - } -} - - -static int inflate_blocks_free(s, z) -inflate_blocks_statef *s; -z_streamp z; -{ - inflate_blocks_reset(s, z, Z_NULL); - ZFREE(z, s->window); - ZFREE(z, s->hufts); - ZFREE(z, s); - Tracev((stderr, "inflate: blocks freed\n")); - return Z_OK; -} - - -static void inflate_set_dictionary(s, d, n) -inflate_blocks_statef *s; -const Bytef *d; -uInt n; -{ - zmemcpy(s->window, d, n); - s->read = s->write = s->window + n; -} - - -/* Returns true if inflate is currently at the end of a block generated - * by Z_SYNC_FLUSH or Z_FULL_FLUSH. - * IN assertion: s != Z_NULL - */ -static int inflate_blocks_sync_point(s) -inflate_blocks_statef *s; -{ - return s->mode == LENS; -} -/* --- infblock.c */ - -/* +++ inftrees.c */ -/* inftrees.c -- generate Huffman trees for efficient decoding - * Copyright (C) 1995-2002 Mark Adler - * For conditions of distribution and use, see copyright notice in zlib.h - */ - -/* #include "zutil.h" */ -/* #include "inftrees.h" */ - -#if !defined(BUILDFIXED) && !defined(STDC) -# define BUILDFIXED /* non ANSI compilers may not accept inffixed.h */ -#endif - -const char inflate_copyright[] = - " inflate 1.1.4 Copyright 1995-2002 Mark Adler "; -/* - If you use the zlib library in a product, an acknowledgment is welcome - in the documentation of your product. If for some reason you cannot - include such an acknowledgment, I would appreciate that you keep this - copyright string in the executable of your product. - */ - -#ifndef NO_DUMMY_DECL -struct internal_state {int dummy;}; /* for buggy compilers */ -#endif - -/* simplify the use of the inflate_huft type with some defines */ -#define exop word.what.Exop -#define bits word.what.Bits - - -static int -huft_build OF(( - uIntf *, /* code lengths in bits */ - uInt, /* number of codes */ - uInt, /* number of "simple" codes */ - const uIntf *, /* list of base values for non-simple codes */ - const uIntf *, /* list of extra bits for non-simple codes */ - inflate_huft * FAR*,/* result: starting table */ - uIntf *, /* maximum lookup bits (returns actual) */ - inflate_huft *, /* space for trees */ - uInt *, /* hufts used in space */ - uIntf * )); /* space for values */ - -/* Tables for deflate from PKZIP's appnote.txt. */ -static const uInt cplens[31] = { /* Copy lengths for literal codes 257..285 */ - 3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 15, 17, 19, 23, 27, 31, - 35, 43, 51, 59, 67, 83, 99, 115, 131, 163, 195, 227, 258, 0, 0}; - /* see note #13 above about 258 */ -static const uInt cplext[31] = { /* Extra bits for literal codes 257..285 */ - 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, - 3, 3, 3, 3, 4, 4, 4, 4, 5, 5, 5, 5, 0, 112, 112}; /* 112==invalid */ -static const uInt cpdist[30] = { /* Copy offsets for distance codes 0..29 */ - 1, 2, 3, 4, 5, 7, 9, 13, 17, 25, 33, 49, 65, 97, 129, 193, - 257, 385, 513, 769, 1025, 1537, 2049, 3073, 4097, 6145, - 8193, 12289, 16385, 24577}; -static const uInt cpdext[30] = { /* Extra bits for distance codes */ - 0, 0, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, - 7, 7, 8, 8, 9, 9, 10, 10, 11, 11, - 12, 12, 13, 13}; - -/* - Huffman code decoding is performed using a multi-level table lookup. - The fastest way to decode is to simply build a lookup table whose - size is determined by the longest code. However, the time it takes - to build this table can also be a factor if the data being decoded - is not very long. The most common codes are necessarily the - shortest codes, so those codes dominate the decoding time, and hence - the speed. The idea is you can have a shorter table that decodes the - shorter, more probable codes, and then point to subsidiary tables for - the longer codes. The time it costs to decode the longer codes is - then traded against the time it takes to make longer tables. - - This results of this trade are in the variables lbits and dbits - below. lbits is the number of bits the first level table for literal/ - length codes can decode in one step, and dbits is the same thing for - the distance codes. Subsequent tables are also less than or equal to - those sizes. These values may be adjusted either when all of the - codes are shorter than that, in which case the longest code length in - bits is used, or when the shortest code is *longer* than the requested - table size, in which case the length of the shortest code in bits is - used. - - There are two different values for the two tables, since they code a - different number of possibilities each. The literal/length table - codes 286 possible values, or in a flat code, a little over eight - bits. The distance table codes 30 possible values, or a little less - than five bits, flat. The optimum values for speed end up being - about one bit more than those, so lbits is 8+1 and dbits is 5+1. - The optimum values may differ though from machine to machine, and - possibly even between compilers. Your mileage may vary. - */ - - -/* If BMAX needs to be larger than 16, then h and x[] should be uLong. */ -#define BMAX 15 /* maximum bit length of any code */ - -/* Given a list of code lengths and a maximum table size, make a set of - tables to decode that set of codes. Return Z_OK on success, Z_BUF_ERROR - if the given code set is incomplete (the tables are still built in this - case), or Z_DATA_ERROR if the input is invalid. */ -#if 0 -uIntf *b; /* code lengths in bits (all assumed <= BMAX) */ -uInt n; /* number of codes (assumed <= 288) */ -uInt s; /* number of simple-valued codes (0..s-1) */ -const uIntf *d; /* list of base values for non-simple codes */ -const uIntf *e; /* list of extra bits for non-simple codes */ -inflate_huft * FAR *t; /* result: starting table */ -uIntf *m; /* maximum lookup bits, returns actual */ -inflate_huft *hp; /* space for trees */ -uInt *hn; /* hufts used in space */ -uIntf *v; /* working area: values in order of bit length */ -#endif - -static int -huft_build(uIntf *b, uInt n, uInt s, const uIntf *d, const uIntf *e, - inflate_huft * FAR *t, uIntf *m, inflate_huft *hp, uInt *hn, - uIntf *v) -{ - uInt a; /* counter for codes of length k */ - uInt c[BMAX+1]; /* bit length count table */ - uInt f; /* i repeats in table every f entries */ - int g; /* maximum code length */ - int h; /* table level */ - uInt i; /* counter, current code */ - uInt j; /* counter */ - int k; /* number of bits in current code */ - int l; /* bits per table (returned in m) */ - uInt mask; /* (1 << w) - 1, to avoid cc -O bug on HP */ - uIntf *p; /* pointer into c[], b[], or v[] */ - inflate_huft *q; /* points to current table */ - struct inflate_huft_s r; /* table entry for structure assignment */ - inflate_huft *u[BMAX]; /* table stack */ - int w; /* bits before this table == (l * h) */ - uInt x[BMAX+1]; /* bit offsets, then code stack */ - uIntf *xp; /* pointer into x */ - int y; /* number of dummy codes added */ - uInt z; /* number of entries in current table */ - - - /* Generate counts for each bit length */ - p = c; -#define C0 *p++ = 0; -#define C2 C0 C0 C0 C0 -#define C4 C2 C2 C2 C2 - C4 /* clear c[]--assume BMAX+1 is 16 */ - p = b; i = n; - do { - c[*p++]++; /* assume all entries <= BMAX */ - } while (--i); - if (c[0] == n) /* null input--all zero length codes */ - { - *t = (inflate_huft *)Z_NULL; - *m = 0; - return Z_OK; - } - - - /* Find minimum and maximum length, bound *m by those */ - l = *m; - for (j = 1; j <= BMAX; j++) - if (c[j]) - break; - k = j; /* minimum code length */ - if ((uInt)l < j) - l = j; - for (i = BMAX; i; i--) - if (c[i]) - break; - g = i; /* maximum code length */ - if ((uInt)l > i) - l = i; - *m = l; - - - /* Adjust last length count to fill out codes, if needed */ - for (y = 1 << j; j < i; j++, y <<= 1) - if ((y -= c[j]) < 0) - return Z_DATA_ERROR; - if ((y -= c[i]) < 0) - return Z_DATA_ERROR; - c[i] += y; - - - /* Generate starting offsets into the value table for each length */ - x[1] = j = 0; - p = c + 1; xp = x + 2; - while (--i) { /* note that i == g from above */ - *xp++ = (j += *p++); - } - - - /* Make a table of values in order of bit lengths */ - p = b; i = 0; - do { - if ((j = *p++) != 0) - v[x[j]++] = i; - } while (++i < n); - n = x[g]; /* set n to length of v */ - - - /* Generate the Huffman codes and for each, make the table entries */ - x[0] = i = 0; /* first Huffman code is zero */ - p = v; /* grab values in bit order */ - h = -1; /* no tables yet--level -1 */ - w = -l; /* bits decoded == (l * h) */ - u[0] = (inflate_huft *)Z_NULL; /* just to keep compilers happy */ - q = (inflate_huft *)Z_NULL; /* ditto */ - z = 0; /* ditto */ - - /* go through the bit lengths (k already is bits in shortest code) */ - for (; k <= g; k++) - { - a = c[k]; - while (a--) - { - /* here i is the Huffman code of length k bits for value *p */ - /* make tables up to required level */ - while (k > w + l) - { - h++; - w += l; /* previous table always l bits */ - - /* compute minimum size table less than or equal to l bits */ - z = g - w; - z = z > (uInt)l ? l : z; /* table size upper limit */ - if ((f = 1 << (j = k - w)) > a + 1) /* try a k-w bit table */ - { /* too few codes for k-w bit table */ - f -= a + 1; /* deduct codes from patterns left */ - xp = c + k; - if (j < z) - while (++j < z) /* try smaller tables up to z bits */ - { - if ((f <<= 1) <= *++xp) - break; /* enough codes to use up j bits */ - f -= *xp; /* else deduct codes from patterns */ - } - } - z = 1 << j; /* table entries for j-bit table */ - - /* allocate new table */ - if (*hn + z > MANY) /* (note: doesn't matter for fixed) */ - return Z_DATA_ERROR; /* overflow of MANY */ - u[h] = q = hp + *hn; - *hn += z; - - /* connect to last table, if there is one */ - if (h) - { - x[h] = i; /* save pattern for backing up */ - r.bits = (Byte)l; /* bits to dump before this table */ - r.exop = (Byte)j; /* bits in this table */ - j = i >> (w - l); - r.base = (uInt)(q - u[h-1] - j); /* offset to this table */ - u[h-1][j] = r; /* connect to last table */ - } - else - *t = q; /* first table is returned result */ - } - - /* set up table entry in r */ - r.bits = (Byte)(k - w); - if (p >= v + n) - r.exop = 128 + 64; /* out of values--invalid code */ - else if (*p < s) - { - r.exop = (Byte)(*p < 256 ? 0 : 32 + 64); /* 256 is end-of-block */ - r.base = *p++; /* simple code is just the value */ - } - else - { - r.exop = (Byte)(e[*p - s] + 16 + 64);/* non-simple--look up in lists */ - r.base = d[*p++ - s]; - } - - /* fill code-like entries with r */ - f = 1 << (k - w); - for (j = i >> w; j < z; j += f) - q[j] = r; - - /* backwards increment the k-bit code i */ - for (j = 1 << (k - 1); i & j; j >>= 1) - i ^= j; - i ^= j; - - /* backup over finished tables */ - mask = (1 << w) - 1; /* needed on HP, cc -O bug */ - while ((i & mask) != x[h]) - { - h--; /* don't need to update q */ - w -= l; - mask = (1 << w) - 1; - } - } - } - - - /* Return Z_BUF_ERROR if we were given an incomplete table */ - return y != 0 && g != 1 ? Z_BUF_ERROR : Z_OK; -} - - -static int inflate_trees_bits(c, bb, tb, hp, z) -uIntf *c; /* 19 code lengths */ -uIntf *bb; /* bits tree desired/actual depth */ -inflate_huft * FAR *tb; /* bits tree result */ -inflate_huft *hp; /* space for trees */ -z_streamp z; /* for messages */ -{ - int r; - uInt hn = 0; /* hufts used in space */ - uIntf *v; /* work area for huft_build */ - - if ((v = (uIntf*)ZALLOC(z, 19, sizeof(uInt))) == Z_NULL) - return Z_MEM_ERROR; - r = huft_build(c, 19, 19, (uIntf*)Z_NULL, (uIntf*)Z_NULL, - tb, bb, hp, &hn, v); - if (r == Z_DATA_ERROR) - z->msg = (char*)"oversubscribed dynamic bit lengths tree"; - else if (r == Z_BUF_ERROR || *bb == 0) - { - z->msg = (char*)"incomplete dynamic bit lengths tree"; - r = Z_DATA_ERROR; - } - ZFREE(z, v); - return r; -} - - -static int inflate_trees_dynamic(nl, nd, c, bl, bd, tl, td, hp, z) -uInt nl; /* number of literal/length codes */ -uInt nd; /* number of distance codes */ -uIntf *c; /* that many (total) code lengths */ -uIntf *bl; /* literal desired/actual bit depth */ -uIntf *bd; /* distance desired/actual bit depth */ -inflate_huft * FAR *tl; /* literal/length tree result */ -inflate_huft * FAR *td; /* distance tree result */ -inflate_huft *hp; /* space for trees */ -z_streamp z; /* for messages */ -{ - int r; - uInt hn = 0; /* hufts used in space */ - uIntf *v; /* work area for huft_build */ - - /* allocate work area */ - if ((v = (uIntf*)ZALLOC(z, 288, sizeof(uInt))) == Z_NULL) - return Z_MEM_ERROR; - - /* build literal/length tree */ - r = huft_build(c, nl, 257, cplens, cplext, tl, bl, hp, &hn, v); - if (r != Z_OK || *bl == 0) - { - if (r == Z_DATA_ERROR) - z->msg = (char*)"oversubscribed literal/length tree"; - else if (r != Z_MEM_ERROR) - { - z->msg = (char*)"incomplete literal/length tree"; - r = Z_DATA_ERROR; - } - ZFREE(z, v); - return r; - } - - /* build distance tree */ - r = huft_build(c + nl, nd, 0, cpdist, cpdext, td, bd, hp, &hn, v); - if (r != Z_OK || (*bd == 0 && nl > 257)) - { - if (r == Z_DATA_ERROR) - z->msg = (char*)"oversubscribed distance tree"; - else if (r == Z_BUF_ERROR) { -#ifdef PKZIP_BUG_WORKAROUND - r = Z_OK; - } -#else - z->msg = (char*)"incomplete distance tree"; - r = Z_DATA_ERROR; - } - else if (r != Z_MEM_ERROR) - { - z->msg = (char*)"empty distance tree with lengths"; - r = Z_DATA_ERROR; - } - ZFREE(z, v); - return r; -#endif - } - - /* done */ - ZFREE(z, v); - return Z_OK; -} - - -/* build fixed tables only once--keep them here */ -#ifdef BUILDFIXED -static int fixed_built = 0; -#define FIXEDH 544 /* number of hufts used by fixed tables */ -static inflate_huft *fixed_mem = NULL; -static uInt fixed_bl; -static uInt fixed_bd; -static inflate_huft *fixed_tl; -static inflate_huft *fixed_td; -#else -/* +++ inffixed.h */ -/* inffixed.h -- table for decoding fixed codes - * Generated automatically by the maketree.c program - */ - -/* WARNING: this file should *not* be used by applications. It is - part of the implementation of the compression library and is - subject to change. Applications should only use zlib.h. - */ - -static uInt fixed_bl = 9; -static uInt fixed_bd = 5; -static inflate_huft fixed_tl[] = { - {{{96,7}},256}, {{{0,8}},80}, {{{0,8}},16}, {{{84,8}},115}, - {{{82,7}},31}, {{{0,8}},112}, {{{0,8}},48}, {{{0,9}},192}, - {{{80,7}},10}, {{{0,8}},96}, {{{0,8}},32}, {{{0,9}},160}, - {{{0,8}},0}, {{{0,8}},128}, {{{0,8}},64}, {{{0,9}},224}, - {{{80,7}},6}, {{{0,8}},88}, {{{0,8}},24}, {{{0,9}},144}, - {{{83,7}},59}, {{{0,8}},120}, {{{0,8}},56}, {{{0,9}},208}, - {{{81,7}},17}, {{{0,8}},104}, {{{0,8}},40}, {{{0,9}},176}, - {{{0,8}},8}, {{{0,8}},136}, {{{0,8}},72}, {{{0,9}},240}, - {{{80,7}},4}, {{{0,8}},84}, {{{0,8}},20}, {{{85,8}},227}, - {{{83,7}},43}, {{{0,8}},116}, {{{0,8}},52}, {{{0,9}},200}, - {{{81,7}},13}, {{{0,8}},100}, {{{0,8}},36}, {{{0,9}},168}, - {{{0,8}},4}, {{{0,8}},132}, {{{0,8}},68}, {{{0,9}},232}, - {{{80,7}},8}, {{{0,8}},92}, {{{0,8}},28}, {{{0,9}},152}, - {{{84,7}},83}, {{{0,8}},124}, {{{0,8}},60}, {{{0,9}},216}, - {{{82,7}},23}, {{{0,8}},108}, {{{0,8}},44}, {{{0,9}},184}, - {{{0,8}},12}, {{{0,8}},140}, {{{0,8}},76}, {{{0,9}},248}, - {{{80,7}},3}, {{{0,8}},82}, {{{0,8}},18}, {{{85,8}},163}, - {{{83,7}},35}, {{{0,8}},114}, {{{0,8}},50}, {{{0,9}},196}, - {{{81,7}},11}, {{{0,8}},98}, {{{0,8}},34}, {{{0,9}},164}, - {{{0,8}},2}, {{{0,8}},130}, {{{0,8}},66}, {{{0,9}},228}, - {{{80,7}},7}, {{{0,8}},90}, {{{0,8}},26}, {{{0,9}},148}, - {{{84,7}},67}, {{{0,8}},122}, {{{0,8}},58}, {{{0,9}},212}, - {{{82,7}},19}, {{{0,8}},106}, {{{0,8}},42}, {{{0,9}},180}, - {{{0,8}},10}, {{{0,8}},138}, {{{0,8}},74}, {{{0,9}},244}, - {{{80,7}},5}, {{{0,8}},86}, {{{0,8}},22}, {{{192,8}},0}, - {{{83,7}},51}, {{{0,8}},118}, {{{0,8}},54}, {{{0,9}},204}, - {{{81,7}},15}, {{{0,8}},102}, {{{0,8}},38}, {{{0,9}},172}, - {{{0,8}},6}, {{{0,8}},134}, {{{0,8}},70}, {{{0,9}},236}, - {{{80,7}},9}, {{{0,8}},94}, {{{0,8}},30}, {{{0,9}},156}, - {{{84,7}},99}, {{{0,8}},126}, {{{0,8}},62}, {{{0,9}},220}, - {{{82,7}},27}, {{{0,8}},110}, {{{0,8}},46}, {{{0,9}},188}, - {{{0,8}},14}, {{{0,8}},142}, {{{0,8}},78}, {{{0,9}},252}, - {{{96,7}},256}, {{{0,8}},81}, {{{0,8}},17}, {{{85,8}},131}, - {{{82,7}},31}, {{{0,8}},113}, {{{0,8}},49}, {{{0,9}},194}, - {{{80,7}},10}, {{{0,8}},97}, {{{0,8}},33}, {{{0,9}},162}, - {{{0,8}},1}, {{{0,8}},129}, {{{0,8}},65}, {{{0,9}},226}, - {{{80,7}},6}, {{{0,8}},89}, {{{0,8}},25}, {{{0,9}},146}, - {{{83,7}},59}, {{{0,8}},121}, {{{0,8}},57}, {{{0,9}},210}, - {{{81,7}},17}, {{{0,8}},105}, {{{0,8}},41}, {{{0,9}},178}, - {{{0,8}},9}, {{{0,8}},137}, {{{0,8}},73}, {{{0,9}},242}, - {{{80,7}},4}, {{{0,8}},85}, {{{0,8}},21}, {{{80,8}},258}, - {{{83,7}},43}, {{{0,8}},117}, {{{0,8}},53}, {{{0,9}},202}, - {{{81,7}},13}, {{{0,8}},101}, {{{0,8}},37}, {{{0,9}},170}, - {{{0,8}},5}, {{{0,8}},133}, {{{0,8}},69}, {{{0,9}},234}, - {{{80,7}},8}, {{{0,8}},93}, {{{0,8}},29}, {{{0,9}},154}, - {{{84,7}},83}, {{{0,8}},125}, {{{0,8}},61}, {{{0,9}},218}, - {{{82,7}},23}, {{{0,8}},109}, {{{0,8}},45}, {{{0,9}},186}, - {{{0,8}},13}, {{{0,8}},141}, {{{0,8}},77}, {{{0,9}},250}, - {{{80,7}},3}, {{{0,8}},83}, {{{0,8}},19}, {{{85,8}},195}, - {{{83,7}},35}, {{{0,8}},115}, {{{0,8}},51}, {{{0,9}},198}, - {{{81,7}},11}, {{{0,8}},99}, {{{0,8}},35}, {{{0,9}},166}, - {{{0,8}},3}, {{{0,8}},131}, {{{0,8}},67}, {{{0,9}},230}, - {{{80,7}},7}, {{{0,8}},91}, {{{0,8}},27}, {{{0,9}},150}, - {{{84,7}},67}, {{{0,8}},123}, {{{0,8}},59}, {{{0,9}},214}, - {{{82,7}},19}, {{{0,8}},107}, {{{0,8}},43}, {{{0,9}},182}, - {{{0,8}},11}, {{{0,8}},139}, {{{0,8}},75}, {{{0,9}},246}, - {{{80,7}},5}, {{{0,8}},87}, {{{0,8}},23}, {{{192,8}},0}, - {{{83,7}},51}, {{{0,8}},119}, {{{0,8}},55}, {{{0,9}},206}, - {{{81,7}},15}, {{{0,8}},103}, {{{0,8}},39}, {{{0,9}},174}, - {{{0,8}},7}, {{{0,8}},135}, {{{0,8}},71}, {{{0,9}},238}, - {{{80,7}},9}, {{{0,8}},95}, {{{0,8}},31}, {{{0,9}},158}, - {{{84,7}},99}, {{{0,8}},127}, {{{0,8}},63}, {{{0,9}},222}, - {{{82,7}},27}, {{{0,8}},111}, {{{0,8}},47}, {{{0,9}},190}, - {{{0,8}},15}, {{{0,8}},143}, {{{0,8}},79}, {{{0,9}},254}, - {{{96,7}},256}, {{{0,8}},80}, {{{0,8}},16}, {{{84,8}},115}, - {{{82,7}},31}, {{{0,8}},112}, {{{0,8}},48}, {{{0,9}},193}, - {{{80,7}},10}, {{{0,8}},96}, {{{0,8}},32}, {{{0,9}},161}, - {{{0,8}},0}, {{{0,8}},128}, {{{0,8}},64}, {{{0,9}},225}, - {{{80,7}},6}, {{{0,8}},88}, {{{0,8}},24}, {{{0,9}},145}, - {{{83,7}},59}, {{{0,8}},120}, {{{0,8}},56}, {{{0,9}},209}, - {{{81,7}},17}, {{{0,8}},104}, {{{0,8}},40}, {{{0,9}},177}, - {{{0,8}},8}, {{{0,8}},136}, {{{0,8}},72}, {{{0,9}},241}, - {{{80,7}},4}, {{{0,8}},84}, {{{0,8}},20}, {{{85,8}},227}, - {{{83,7}},43}, {{{0,8}},116}, {{{0,8}},52}, {{{0,9}},201}, - {{{81,7}},13}, {{{0,8}},100}, {{{0,8}},36}, {{{0,9}},169}, - {{{0,8}},4}, {{{0,8}},132}, {{{0,8}},68}, {{{0,9}},233}, - {{{80,7}},8}, {{{0,8}},92}, {{{0,8}},28}, {{{0,9}},153}, - {{{84,7}},83}, {{{0,8}},124}, {{{0,8}},60}, {{{0,9}},217}, - {{{82,7}},23}, {{{0,8}},108}, {{{0,8}},44}, {{{0,9}},185}, - {{{0,8}},12}, {{{0,8}},140}, {{{0,8}},76}, {{{0,9}},249}, - {{{80,7}},3}, {{{0,8}},82}, {{{0,8}},18}, {{{85,8}},163}, - {{{83,7}},35}, {{{0,8}},114}, {{{0,8}},50}, {{{0,9}},197}, - {{{81,7}},11}, {{{0,8}},98}, {{{0,8}},34}, {{{0,9}},165}, - {{{0,8}},2}, {{{0,8}},130}, {{{0,8}},66}, {{{0,9}},229}, - {{{80,7}},7}, {{{0,8}},90}, {{{0,8}},26}, {{{0,9}},149}, - {{{84,7}},67}, {{{0,8}},122}, {{{0,8}},58}, {{{0,9}},213}, - {{{82,7}},19}, {{{0,8}},106}, {{{0,8}},42}, {{{0,9}},181}, - {{{0,8}},10}, {{{0,8}},138}, {{{0,8}},74}, {{{0,9}},245}, - {{{80,7}},5}, {{{0,8}},86}, {{{0,8}},22}, {{{192,8}},0}, - {{{83,7}},51}, {{{0,8}},118}, {{{0,8}},54}, {{{0,9}},205}, - {{{81,7}},15}, {{{0,8}},102}, {{{0,8}},38}, {{{0,9}},173}, - {{{0,8}},6}, {{{0,8}},134}, {{{0,8}},70}, {{{0,9}},237}, - {{{80,7}},9}, {{{0,8}},94}, {{{0,8}},30}, {{{0,9}},157}, - {{{84,7}},99}, {{{0,8}},126}, {{{0,8}},62}, {{{0,9}},221}, - {{{82,7}},27}, {{{0,8}},110}, {{{0,8}},46}, {{{0,9}},189}, - {{{0,8}},14}, {{{0,8}},142}, {{{0,8}},78}, {{{0,9}},253}, - {{{96,7}},256}, {{{0,8}},81}, {{{0,8}},17}, {{{85,8}},131}, - {{{82,7}},31}, {{{0,8}},113}, {{{0,8}},49}, {{{0,9}},195}, - {{{80,7}},10}, {{{0,8}},97}, {{{0,8}},33}, {{{0,9}},163}, - {{{0,8}},1}, {{{0,8}},129}, {{{0,8}},65}, {{{0,9}},227}, - {{{80,7}},6}, {{{0,8}},89}, {{{0,8}},25}, {{{0,9}},147}, - {{{83,7}},59}, {{{0,8}},121}, {{{0,8}},57}, {{{0,9}},211}, - {{{81,7}},17}, {{{0,8}},105}, {{{0,8}},41}, {{{0,9}},179}, - {{{0,8}},9}, {{{0,8}},137}, {{{0,8}},73}, {{{0,9}},243}, - {{{80,7}},4}, {{{0,8}},85}, {{{0,8}},21}, {{{80,8}},258}, - {{{83,7}},43}, {{{0,8}},117}, {{{0,8}},53}, {{{0,9}},203}, - {{{81,7}},13}, {{{0,8}},101}, {{{0,8}},37}, {{{0,9}},171}, - {{{0,8}},5}, {{{0,8}},133}, {{{0,8}},69}, {{{0,9}},235}, - {{{80,7}},8}, {{{0,8}},93}, {{{0,8}},29}, {{{0,9}},155}, - {{{84,7}},83}, {{{0,8}},125}, {{{0,8}},61}, {{{0,9}},219}, - {{{82,7}},23}, {{{0,8}},109}, {{{0,8}},45}, {{{0,9}},187}, - {{{0,8}},13}, {{{0,8}},141}, {{{0,8}},77}, {{{0,9}},251}, - {{{80,7}},3}, {{{0,8}},83}, {{{0,8}},19}, {{{85,8}},195}, - {{{83,7}},35}, {{{0,8}},115}, {{{0,8}},51}, {{{0,9}},199}, - {{{81,7}},11}, {{{0,8}},99}, {{{0,8}},35}, {{{0,9}},167}, - {{{0,8}},3}, {{{0,8}},131}, {{{0,8}},67}, {{{0,9}},231}, - {{{80,7}},7}, {{{0,8}},91}, {{{0,8}},27}, {{{0,9}},151}, - {{{84,7}},67}, {{{0,8}},123}, {{{0,8}},59}, {{{0,9}},215}, - {{{82,7}},19}, {{{0,8}},107}, {{{0,8}},43}, {{{0,9}},183}, - {{{0,8}},11}, {{{0,8}},139}, {{{0,8}},75}, {{{0,9}},247}, - {{{80,7}},5}, {{{0,8}},87}, {{{0,8}},23}, {{{192,8}},0}, - {{{83,7}},51}, {{{0,8}},119}, {{{0,8}},55}, {{{0,9}},207}, - {{{81,7}},15}, {{{0,8}},103}, {{{0,8}},39}, {{{0,9}},175}, - {{{0,8}},7}, {{{0,8}},135}, {{{0,8}},71}, {{{0,9}},239}, - {{{80,7}},9}, {{{0,8}},95}, {{{0,8}},31}, {{{0,9}},159}, - {{{84,7}},99}, {{{0,8}},127}, {{{0,8}},63}, {{{0,9}},223}, - {{{82,7}},27}, {{{0,8}},111}, {{{0,8}},47}, {{{0,9}},191}, - {{{0,8}},15}, {{{0,8}},143}, {{{0,8}},79}, {{{0,9}},255} - }; -static inflate_huft fixed_td[] = { - {{{80,5}},1}, {{{87,5}},257}, {{{83,5}},17}, {{{91,5}},4097}, - {{{81,5}},5}, {{{89,5}},1025}, {{{85,5}},65}, {{{93,5}},16385}, - {{{80,5}},3}, {{{88,5}},513}, {{{84,5}},33}, {{{92,5}},8193}, - {{{82,5}},9}, {{{90,5}},2049}, {{{86,5}},129}, {{{192,5}},24577}, - {{{80,5}},2}, {{{87,5}},385}, {{{83,5}},25}, {{{91,5}},6145}, - {{{81,5}},7}, {{{89,5}},1537}, {{{85,5}},97}, {{{93,5}},24577}, - {{{80,5}},4}, {{{88,5}},769}, {{{84,5}},49}, {{{92,5}},12289}, - {{{82,5}},13}, {{{90,5}},3073}, {{{86,5}},193}, {{{192,5}},24577} - }; -/* --- inffixed.h */ -#endif - - -static int inflate_trees_fixed(bl, bd, tl, td, z) -uIntf *bl; /* literal desired/actual bit depth */ -uIntf *bd; /* distance desired/actual bit depth */ -inflate_huft * FAR *tl; /* literal/length tree result */ -inflate_huft * FAR *td; /* distance tree result */ -z_streamp z; /* for memory allocation */ -{ -#ifdef BUILDFIXED - /* build fixed tables if not already */ - if (!fixed_built) - { - int k; /* temporary variable */ - uInt f = 0; /* number of hufts used in fixed_mem */ - uIntf *c; /* length list for huft_build */ - uIntf *v; /* work area for huft_build */ - - /* allocate memory */ - if ((c = (uIntf*)ZALLOC(z, 288, sizeof(uInt))) == Z_NULL) - return Z_MEM_ERROR; - if ((v = (uIntf*)ZALLOC(z, 288, sizeof(uInt))) == Z_NULL) - { - ZFREE(z, c); - return Z_MEM_ERROR; - } - - if ((fixed_mem = (inflate_huft*)ZALLOC(z, FIXEDH, sizeof(inflate_huft))) == Z_NULL) - { - ZFREE(z, c); - ZFREE(z, v); - return Z_MEM_ERROR; - } - - /* literal table */ - for (k = 0; k < 144; k++) - c[k] = 8; - for (; k < 256; k++) - c[k] = 9; - for (; k < 280; k++) - c[k] = 7; - for (; k < 288; k++) - c[k] = 8; - fixed_bl = 9; - huft_build(c, 288, 257, cplens, cplext, &fixed_tl, &fixed_bl, - fixed_mem, &f, v); - - /* distance table */ - for (k = 0; k < 30; k++) - c[k] = 5; - fixed_bd = 5; - huft_build(c, 30, 0, cpdist, cpdext, &fixed_td, &fixed_bd, - fixed_mem, &f, v); - - /* done */ - ZFREE(z, v); - ZFREE(z, c); - fixed_built = 1; - } -#endif - *bl = fixed_bl; - *bd = fixed_bd; - *tl = fixed_tl; - *td = fixed_td; - return Z_OK; -} -/* --- inftrees.c */ - -/* +++ infcodes.c */ -/* infcodes.c -- process literals and length/distance pairs - * Copyright (C) 1995-2002 Mark Adler - * For conditions of distribution and use, see copyright notice in zlib.h - */ - -/* #include "zutil.h" */ -/* #include "inftrees.h" */ -/* #include "infblock.h" */ -/* #include "infcodes.h" */ -/* #include "infutil.h" */ - -/* +++ inffast.h */ -/* inffast.h -- header to use inffast.c - * Copyright (C) 1995-2002 Mark Adler - * For conditions of distribution and use, see copyright notice in zlib.h - */ - -/* WARNING: this file should *not* be used by applications. It is - part of the implementation of the compression library and is - subject to change. Applications should only use zlib.h. - */ - -static int inflate_fast OF(( - uInt, - uInt, - inflate_huft *, - inflate_huft *, - inflate_blocks_statef *, - z_streamp )); -/* --- inffast.h */ - -/* simplify the use of the inflate_huft type with some defines */ -#define exop word.what.Exop -#define bits word.what.Bits - -typedef enum { /* waiting for "i:"=input, "o:"=output, "x:"=nothing */ - START, /* x: set up for LEN */ - LEN, /* i: get length/literal/eob next */ - LENEXT, /* i: getting length extra (have base) */ - DIST, /* i: get distance next */ - DISTEXT, /* i: getting distance extra */ - COPY, /* o: copying bytes in window, waiting for space */ - LIT, /* o: got literal, waiting for output space */ - WASH, /* o: got eob, possibly still output waiting */ - END, /* x: got eob and all data flushed */ - BADCODE} /* x: got error */ -inflate_codes_mode; - -/* inflate codes private state */ -struct inflate_codes_state { - - /* mode */ - inflate_codes_mode mode; /* current inflate_codes mode */ - - /* mode dependent information */ - uInt len; - union { - struct { - inflate_huft *tree; /* pointer into tree */ - uInt need; /* bits needed */ - } code; /* if LEN or DIST, where in tree */ - uInt lit; /* if LIT, literal */ - struct { - uInt get; /* bits to get for extra */ - uInt dist; /* distance back to copy from */ - } copy; /* if EXT or COPY, where and how much */ - } sub; /* submode */ - - /* mode independent information */ - Byte lbits; /* ltree bits decoded per branch */ - Byte dbits; /* dtree bits decoder per branch */ - inflate_huft *ltree; /* literal/length/eob tree */ - inflate_huft *dtree; /* distance tree */ - -}; - - -static inflate_codes_statef *inflate_codes_new(bl, bd, tl, td, z) -uInt bl, bd; -inflate_huft *tl; -inflate_huft *td; /* need separate declaration for Borland C++ */ -z_streamp z; -{ - inflate_codes_statef *c; - - if ((c = (inflate_codes_statef *) - ZALLOC(z,1,sizeof(struct inflate_codes_state))) != Z_NULL) - { - c->mode = START; - c->lbits = (Byte)bl; - c->dbits = (Byte)bd; - c->ltree = tl; - c->dtree = td; - Tracev((stderr, "inflate: codes new\n")); - } - return c; -} - - -static int inflate_codes(s, z, r) -inflate_blocks_statef *s; -z_streamp z; -int r; -{ - uInt j; /* temporary storage */ - inflate_huft *t; /* temporary pointer */ - uInt e; /* extra bits or operation */ - uLong b; /* bit buffer */ - uInt k; /* bits in bit buffer */ - Bytef *p; /* input data pointer */ - uInt n; /* bytes available there */ - Bytef *q; /* output window write pointer */ - uInt m; /* bytes to end of window or read pointer */ - Bytef *f; /* pointer to copy strings from */ - inflate_codes_statef *c = s->sub.decode.codes; /* codes state */ - - /* copy input/output information to locals (UPDATE macro restores) */ - LOAD - - /* process input and output based on current state */ - while (1) switch (c->mode) - { /* waiting for "i:"=input, "o:"=output, "x:"=nothing */ - case START: /* x: set up for LEN */ -#ifndef SLOW - if (m >= 258 && n >= 10) - { - UPDATE - r = inflate_fast(c->lbits, c->dbits, c->ltree, c->dtree, s, z); - LOAD - if (r != Z_OK) - { - c->mode = r == Z_STREAM_END ? WASH : BADCODE; - break; - } - } -#endif /* !SLOW */ - c->sub.code.need = c->lbits; - c->sub.code.tree = c->ltree; - c->mode = LEN; - case LEN: /* i: get length/literal/eob next */ - j = c->sub.code.need; - NEEDBITS(j) - t = c->sub.code.tree + ((uInt)b & inflate_mask[j]); - DUMPBITS(t->bits) - e = (uInt)(t->exop); - if (e == 0) /* literal */ - { - c->sub.lit = t->base; - Tracevv((stderr, t->base >= 0x20 && t->base < 0x7f ? - "inflate: literal '%c'\n" : - "inflate: literal 0x%02x\n", t->base)); - c->mode = LIT; - break; - } - if (e & 16) /* length */ - { - c->sub.copy.get = e & 15; - c->len = t->base; - c->mode = LENEXT; - break; - } - if ((e & 64) == 0) /* next table */ - { - c->sub.code.need = e; - c->sub.code.tree = t + t->base; - break; - } - if (e & 32) /* end of block */ - { - Tracevv((stderr, "inflate: end of block\n")); - c->mode = WASH; - break; - } - c->mode = BADCODE; /* invalid code */ - z->msg = (char*)"invalid literal/length code"; - r = Z_DATA_ERROR; - LEAVE - case LENEXT: /* i: getting length extra (have base) */ - j = c->sub.copy.get; - NEEDBITS(j) - c->len += (uInt)b & inflate_mask[j]; - DUMPBITS(j) - c->sub.code.need = c->dbits; - c->sub.code.tree = c->dtree; - Tracevv((stderr, "inflate: length %u\n", c->len)); - c->mode = DIST; - case DIST: /* i: get distance next */ - j = c->sub.code.need; - NEEDBITS(j) - t = c->sub.code.tree + ((uInt)b & inflate_mask[j]); - DUMPBITS(t->bits) - e = (uInt)(t->exop); - if (e & 16) /* distance */ - { - c->sub.copy.get = e & 15; - c->sub.copy.dist = t->base; - c->mode = DISTEXT; - break; - } - if ((e & 64) == 0) /* next table */ - { - c->sub.code.need = e; - c->sub.code.tree = t + t->base; - break; - } - c->mode = BADCODE; /* invalid code */ - z->msg = (char*)"invalid distance code"; - r = Z_DATA_ERROR; - LEAVE - case DISTEXT: /* i: getting distance extra */ - j = c->sub.copy.get; - NEEDBITS(j) - c->sub.copy.dist += (uInt)b & inflate_mask[j]; - DUMPBITS(j) - Tracevv((stderr, "inflate: distance %u\n", c->sub.copy.dist)); - c->mode = COPY; - case COPY: /* o: copying bytes in window, waiting for space */ - f = q - c->sub.copy.dist; - while (f < s->window) /* modulo window size-"while" instead */ - f += s->end - s->window; /* of "if" handles invalid distances */ - while (c->len) - { - NEEDOUT - OUTBYTE(*f++) - if (f == s->end) - f = s->window; - c->len--; - } - c->mode = START; - break; - case LIT: /* o: got literal, waiting for output space */ - NEEDOUT - OUTBYTE(c->sub.lit) - c->mode = START; - break; - case WASH: /* o: got eob, possibly more output */ - if (k > 7) /* return unused byte, if any */ - { - Assert(k < 16, "inflate_codes grabbed too many bytes"); - k -= 8; - n++; - p--; /* can always return one */ - } - FLUSH - if (s->read != s->write) - LEAVE - c->mode = END; - case END: - r = Z_STREAM_END; - LEAVE - case BADCODE: /* x: got error */ - r = Z_DATA_ERROR; - LEAVE - default: - r = Z_STREAM_ERROR; - LEAVE - } -#ifdef NEED_DUMMY_RETURN - return Z_STREAM_ERROR; /* Some dumb compilers complain without this */ -#endif -} - - -static void inflate_codes_free(c, z) -inflate_codes_statef *c; -z_streamp z; -{ - ZFREE(z, c); - Tracev((stderr, "inflate: codes free\n")); -} -/* --- infcodes.c */ - -/* +++ infutil.c */ -/* inflate_util.c -- data and routines common to blocks and codes - * Copyright (C) 1995-2002 Mark Adler - * For conditions of distribution and use, see copyright notice in zlib.h - */ - -/* #include "zutil.h" */ -/* #include "infblock.h" */ -/* #include "inftrees.h" */ -/* #include "infcodes.h" */ -/* #include "infutil.h" */ - -#ifndef NO_DUMMY_DECL -struct inflate_codes_state {int dummy;}; /* for buggy compilers */ -#endif - -/* copy as much as possible from the sliding window to the output area */ -static int inflate_flush(s, z, r) -inflate_blocks_statef *s; -z_streamp z; -int r; -{ - uInt n; - Bytef *p; - Bytef *q; - - /* local copies of source and destination pointers */ - p = z->next_out; - q = s->read; - - /* compute number of bytes to copy as far as end of window */ - n = (uInt)((q <= s->write ? s->write : s->end) - q); - if (n > z->avail_out) n = z->avail_out; - if (n && r == Z_BUF_ERROR) r = Z_OK; - - /* update counters */ - z->avail_out -= n; - z->total_out += n; - - /* update check information */ - if (s->checkfn != Z_NULL) - z->adler = s->check = (*s->checkfn)(s->check, q, n); - - /* copy as far as end of window */ - zmemcpy(p, q, n); - p += n; - q += n; - - /* see if more to copy at beginning of window */ - if (q == s->end) - { - /* wrap pointers */ - q = s->window; - if (s->write == s->end) - s->write = s->window; - - /* compute bytes to copy */ - n = (uInt)(s->write - q); - if (n > z->avail_out) n = z->avail_out; - if (n && r == Z_BUF_ERROR) r = Z_OK; - - /* update counters */ - z->avail_out -= n; - z->total_out += n; - - /* update check information */ - if (s->checkfn != Z_NULL) - z->adler = s->check = (*s->checkfn)(s->check, q, n); - - /* copy */ - zmemcpy(p, q, n); - p += n; - q += n; - } - - /* update pointers */ - z->next_out = p; - s->read = q; - - /* done */ - return r; -} -/* --- infutil.c */ - -/* +++ inffast.c */ -/* inffast.c -- process literals and length/distance pairs fast - * Copyright (C) 1995-2002 Mark Adler - * For conditions of distribution and use, see copyright notice in zlib.h - */ - -/* #include "zutil.h" */ -/* #include "inftrees.h" */ -/* #include "infblock.h" */ -/* #include "infcodes.h" */ -/* #include "infutil.h" */ -/* #include "inffast.h" */ - -#ifndef NO_DUMMY_DECL -struct inflate_codes_state {int dummy;}; /* for buggy compilers */ -#endif - -/* simplify the use of the inflate_huft type with some defines */ -#define exop word.what.Exop -#define bits word.what.Bits - -/* macros for bit input with no checking and for returning unused bytes */ -#define GRABBITS(j) {while(k<(j)){b|=((uLong)NEXTBYTE)<avail_in-n;c=(k>>3)>3:c;n+=c;p-=c;k-=c<<3;} - -/* Called with number of bytes left to write in window at least 258 - (the maximum string length) and number of input bytes available - at least ten. The ten bytes are six bytes for the longest length/ - distance pair plus four bytes for overloading the bit buffer. */ - -static int inflate_fast(bl, bd, tl, td, s, z) -uInt bl, bd; -inflate_huft *tl; -inflate_huft *td; /* need separate declaration for Borland C++ */ -inflate_blocks_statef *s; -z_streamp z; -{ - inflate_huft *t; /* temporary pointer */ - uInt e; /* extra bits or operation */ - uLong b; /* bit buffer */ - uInt k; /* bits in bit buffer */ - Bytef *p; /* input data pointer */ - uInt n; /* bytes available there */ - Bytef *q; /* output window write pointer */ - uInt m; /* bytes to end of window or read pointer */ - uInt ml; /* mask for literal/length tree */ - uInt md; /* mask for distance tree */ - uInt c; /* bytes to copy */ - uInt d; /* distance back to copy from */ - Bytef *r; /* copy source pointer */ - - /* load input, output, bit values */ - LOAD - - /* initialize masks */ - ml = inflate_mask[bl]; - md = inflate_mask[bd]; - - /* do until not enough input or output space for fast loop */ - do { /* assume called with m >= 258 && n >= 10 */ - /* get literal/length code */ - GRABBITS(20) /* max bits for literal/length code */ - if ((e = (t = tl + ((uInt)b & ml))->exop) == 0) - { - DUMPBITS(t->bits) - Tracevv((stderr, t->base >= 0x20 && t->base < 0x7f ? - "inflate: * literal '%c'\n" : - "inflate: * literal 0x%02x\n", t->base)); - *q++ = (Byte)t->base; - m--; - continue; - } - do { - DUMPBITS(t->bits) - if (e & 16) - { - /* get extra bits for length */ - e &= 15; - c = t->base + ((uInt)b & inflate_mask[e]); - DUMPBITS(e) - Tracevv((stderr, "inflate: * length %u\n", c)); - - /* decode distance base of block to copy */ - GRABBITS(15); /* max bits for distance code */ - e = (t = td + ((uInt)b & md))->exop; - do { - DUMPBITS(t->bits) - if (e & 16) - { - /* get extra bits to add to distance base */ - e &= 15; - GRABBITS(e) /* get extra bits (up to 13) */ - d = t->base + ((uInt)b & inflate_mask[e]); - DUMPBITS(e) - Tracevv((stderr, "inflate: * distance %u\n", d)); - - /* do the copy */ - m -= c; - r = q - d; - if (r < s->window) /* wrap if needed */ - { - do { - r += s->end - s->window; /* force pointer in window */ - } while (r < s->window); /* covers invalid distances */ - e = s->end - r; - if (c > e) - { - c -= e; /* wrapped copy */ - do { - *q++ = *r++; - } while (--e); - r = s->window; - do { - *q++ = *r++; - } while (--c); - } - else /* normal copy */ - { - *q++ = *r++; c--; - *q++ = *r++; c--; - do { - *q++ = *r++; - } while (--c); - } - } - else /* normal copy */ - { - *q++ = *r++; c--; - *q++ = *r++; c--; - do { - *q++ = *r++; - } while (--c); - } - break; - } - else if ((e & 64) == 0) - { - t += t->base; - e = (t += ((uInt)b & inflate_mask[e]))->exop; - } - else - { - z->msg = (char*)"invalid distance code"; - UNGRAB - UPDATE - return Z_DATA_ERROR; - } - } while (1); - break; - } - if ((e & 64) == 0) - { - t += t->base; - if ((e = (t += ((uInt)b & inflate_mask[e]))->exop) == 0) - { - DUMPBITS(t->bits) - Tracevv((stderr, t->base >= 0x20 && t->base < 0x7f ? - "inflate: * literal '%c'\n" : - "inflate: * literal 0x%02x\n", t->base)); - *q++ = (Byte)t->base; - m--; - break; - } - } - else if (e & 32) - { - Tracevv((stderr, "inflate: * end of block\n")); - UNGRAB - UPDATE - return Z_STREAM_END; - } - else - { - z->msg = (char*)"invalid literal/length code"; - UNGRAB - UPDATE - return Z_DATA_ERROR; - } - } while (1); - } while (m >= 258 && n >= 10); - - /* not enough input or output--restore pointers and return */ - UNGRAB - UPDATE - return Z_OK; -} -/* --- inffast.c */ - -/* +++ zutil.c */ -/* zutil.c -- target dependent utility functions for the compression library - * Copyright (C) 1995-2002 Jean-loup Gailly. - * For conditions of distribution and use, see copyright notice in zlib.h - */ - -/* #include "zutil.h" */ - -#ifndef NO_DUMMY_DECL -struct internal_state {int dummy;}; /* for buggy compilers */ -#endif - -#ifndef STDC -extern void exit OF((int)); -#endif - -const char * ZEXPORT zlibVersion() -{ - return ZLIB_VERSION; -} - -#ifdef DEBUG_ZLIB - -# ifndef verbose -# define verbose 0 -# endif -int z_verbose = verbose; - -void z_error (m) - char *m; -{ - fprintf(stderr, "%s\n", m); - exit(1); -} -#endif - -/* exported to allow conversion of error code to string for compress() and - * uncompress() - */ -const char * ZEXPORT zError(err) - int err; -{ - return ERR_MSG(err); -} - - -#ifndef HAVE_MEMCPY - -void zmemcpy(dest, source, len) - Bytef* dest; - const Bytef* source; - uInt len; -{ - if (len == 0) return; - do { - *dest++ = *source++; /* ??? to be unrolled */ - } while (--len != 0); -} - -int zmemcmp(s1, s2, len) - const Bytef* s1; - const Bytef* s2; - uInt len; -{ - uInt j; - - for (j = 0; j < len; j++) { - if (s1[j] != s2[j]) return 2*(s1[j] > s2[j])-1; - } - return 0; -} - -void zmemzero(dest, len) - Bytef* dest; - uInt len; -{ - if (len == 0) return; - do { - *dest++ = 0; /* ??? to be unrolled */ - } while (--len != 0); -} -#endif - -#ifdef __TURBOC__ -#if (defined( __BORLANDC__) || !defined(SMALL_MEDIUM)) && !defined(__32BIT__) -/* Small and medium model in Turbo C are for now limited to near allocation - * with reduced MAX_WBITS and MAX_MEM_LEVEL - */ -# define MY_ZCALLOC - -/* Turbo C malloc() does not allow dynamic allocation of 64K bytes - * and farmalloc(64K) returns a pointer with an offset of 8, so we - * must fix the pointer. Warning: the pointer must be put back to its - * original form in order to free it, use zcfree(). - */ - -#define MAX_PTR 10 -/* 10*64K = 640K */ - -static int next_ptr = 0; - -typedef struct ptr_table_s { - voidpf org_ptr; - voidpf new_ptr; -} ptr_table; - -static ptr_table table[MAX_PTR]; -/* This table is used to remember the original form of pointers - * to large buffers (64K). Such pointers are normalized with a zero offset. - * Since MSDOS is not a preemptive multitasking OS, this table is not - * protected from concurrent access. This hack doesn't work anyway on - * a protected system like OS/2. Use Microsoft C instead. - */ - -voidpf zcalloc (voidpf opaque, unsigned items, unsigned size) -{ - voidpf buf = opaque; /* just to make some compilers happy */ - ulg bsize = (ulg)items*size; - - /* If we allocate less than 65520 bytes, we assume that farmalloc - * will return a usable pointer which doesn't have to be normalized. - */ - if (bsize < 65520L) { - buf = farmalloc(bsize); - if (*(ush*)&buf != 0) return buf; - } else { - buf = farmalloc(bsize + 16L); - } - if (buf == NULL || next_ptr >= MAX_PTR) return NULL; - table[next_ptr].org_ptr = buf; - - /* Normalize the pointer to seg:0 */ - *((ush*)&buf+1) += ((ush)((uch*)buf-0) + 15) >> 4; - *(ush*)&buf = 0; - table[next_ptr++].new_ptr = buf; - return buf; -} - -void zcfree (voidpf opaque, voidpf ptr) -{ - int n; - if (*(ush*)&ptr != 0) { /* object < 64K */ - farfree(ptr); - return; - } - /* Find the original pointer */ - for (n = 0; n < next_ptr; n++) { - if (ptr != table[n].new_ptr) continue; - - farfree(table[n].org_ptr); - while (++n < next_ptr) { - table[n-1] = table[n]; - } - next_ptr--; - return; - } - ptr = opaque; /* just to make some compilers happy */ - Assert(0, "zcfree: ptr not found"); -} -#endif -#endif /* __TURBOC__ */ - - -#if defined(M_I86) && !defined(__32BIT__) -/* Microsoft C in 16-bit mode */ - -# define MY_ZCALLOC - -#if (!defined(_MSC_VER) || (_MSC_VER <= 600)) -# define _halloc halloc -# define _hfree hfree -#endif - -voidpf zcalloc (voidpf opaque, unsigned items, unsigned size) -{ - if (opaque) opaque = 0; /* to make compiler happy */ - return _halloc((long)items, size); -} - -void zcfree (voidpf opaque, voidpf ptr) -{ - if (opaque) opaque = 0; /* to make compiler happy */ - _hfree(ptr); -} - -#endif /* MSC */ - - -#ifndef MY_ZCALLOC /* Any system without a special alloc function */ - -#ifndef STDC -extern voidp calloc OF((uInt items, uInt size)); -extern void free OF((voidpf ptr)); -#endif - -voidpf zcalloc (opaque, items, size) - voidpf opaque; - unsigned items; - unsigned size; -{ - if (opaque) items += size - size; /* make compiler happy */ - return (voidpf)calloc(items, size); -} - -void zcfree (opaque, ptr) - voidpf opaque; - voidpf ptr; -{ - _FREE(ptr); - if (opaque) return; /* make compiler happy */ -} - -#endif /* MY_ZCALLOC */ -/* --- zutil.c */ - -/* +++ adler32.c */ -/* adler32.c -- compute the Adler-32 checksum of a data stream - * Copyright (C) 1995-2002 Mark Adler - * For conditions of distribution and use, see copyright notice in zlib.h - */ - -/* #include "zlib.h" */ - -#define BASE 65521L /* largest prime smaller than 65536 */ -#define NMAX 5552 -/* NMAX is the largest n such that 255n(n+1)/2 + (n+1)(BASE-1) <= 2^32-1 */ - -#define DO1(buf,i) {s1 += buf[i]; s2 += s1;} -#define DO2(buf,i) DO1(buf,i); DO1(buf,i+1); -#define DO4(buf,i) DO2(buf,i); DO2(buf,i+2); -#define DO8(buf,i) DO4(buf,i); DO4(buf,i+4); -#define DO16(buf) DO8(buf,0); DO8(buf,8); - -/* ========================================================================= */ -uLong ZEXPORT -adler32(uLong adler, const Bytef *buf, uInt len) -{ - unsigned long s1 = adler & 0xffff; - unsigned long s2 = (adler >> 16) & 0xffff; - int k; - - if (buf == Z_NULL) return 1L; - - while (len > 0) { - k = len < NMAX ? len : NMAX; - len -= k; - while (k >= 16) { - DO16(buf); - buf += 16; - k -= 16; - } - if (k != 0) do { - s1 += *buf++; - s2 += s1; - } while (--k); - s1 %= BASE; - s2 %= BASE; - } - return (s2 << 16) | s1; -} -/* --- adler32.c */ diff --git a/libkern/zlib/README b/libkern/zlib/README new file mode 100644 index 000000000..758cc5002 --- /dev/null +++ b/libkern/zlib/README @@ -0,0 +1,125 @@ +ZLIB DATA COMPRESSION LIBRARY + +zlib 1.2.3 is a general purpose data compression library. All the code is +thread safe. The data format used by the zlib library is described by RFCs +(Request for Comments) 1950 to 1952 in the files +http://www.ietf.org/rfc/rfc1950.txt (zlib format), rfc1951.txt (deflate format) +and rfc1952.txt (gzip format). These documents are also available in other +formats from ftp://ftp.uu.net/graphics/png/documents/zlib/zdoc-index.html + +All functions of the compression library are documented in the file zlib.h +(volunteer to write man pages welcome, contact zlib@gzip.org). A usage example +of the library is given in the file example.c which also tests that the library +is working correctly. Another example is given in the file minigzip.c. The +compression library itself is composed of all source files except example.c and +minigzip.c. + +To compile all files and run the test program, follow the instructions given at +the top of Makefile. In short "make test; make install" should work for most +machines. For Unix: "./configure; make test; make install". For MSDOS, use one +of the special makefiles such as Makefile.msc. For VMS, use make_vms.com. + +Questions about zlib should be sent to , or to Gilles Vollant + for the Windows DLL version. The zlib home page is +http://www.zlib.org or http://www.gzip.org/zlib/ Before reporting a problem, +please check this site to verify that you have the latest version of zlib; +otherwise get the latest version and check whether the problem still exists or +not. + +PLEASE read the zlib FAQ http://www.gzip.org/zlib/zlib_faq.html before asking +for help. + +Mark Nelson wrote an article about zlib for the Jan. 1997 +issue of Dr. Dobb's Journal; a copy of the article is available in +http://dogma.net/markn/articles/zlibtool/zlibtool.htm + +The changes made in version 1.2.3 are documented in the file ChangeLog. + +Unsupported third party contributions are provided in directory "contrib". + +A Java implementation of zlib is available in the Java Development Kit +http://java.sun.com/j2se/1.4.2/docs/api/java/util/zip/package-summary.html +See the zlib home page http://www.zlib.org for details. + +A Perl interface to zlib written by Paul Marquess is in the +CPAN (Comprehensive Perl Archive Network) sites +http://www.cpan.org/modules/by-module/Compress/ + +A Python interface to zlib written by A.M. Kuchling is +available in Python 1.5 and later versions, see +http://www.python.org/doc/lib/module-zlib.html + +A zlib binding for TCL written by Andreas Kupries is +availlable at http://www.oche.de/~akupries/soft/trf/trf_zip.html + +An experimental package to read and write files in .zip format, written on top +of zlib by Gilles Vollant , is available in the +contrib/minizip directory of zlib. + + +Notes for some targets: + +- For Windows DLL versions, please see win32/DLL_FAQ.txt + +- For 64-bit Irix, deflate.c must be compiled without any optimization. With + -O, one libpng test fails. The test works in 32 bit mode (with the -n32 + compiler flag). The compiler bug has been reported to SGI. + +- zlib doesn't work with gcc 2.6.3 on a DEC 3000/300LX under OSF/1 2.1 it works + when compiled with cc. + +- On Digital Unix 4.0D (formely OSF/1) on AlphaServer, the cc option -std1 is + necessary to get gzprintf working correctly. This is done by configure. + +- zlib doesn't work on HP-UX 9.05 with some versions of /bin/cc. It works with + other compilers. Use "make test" to check your compiler. + +- gzdopen is not supported on RISCOS, BEOS and by some Mac compilers. + +- For PalmOs, see http://palmzlib.sourceforge.net/ + +- When building a shared, i.e. dynamic library on Mac OS X, the library must be + installed before testing (do "make install" before "make test"), since the + library location is specified in the library. + + +Acknowledgments: + + The deflate format used by zlib was defined by Phil Katz. The deflate + and zlib specifications were written by L. Peter Deutsch. Thanks to all the + people who reported problems and suggested various improvements in zlib; + they are too numerous to cite here. + +Copyright notice: + + (C) 1995-2004 Jean-loup Gailly and Mark Adler + + This software is provided 'as-is', without any express or implied + warranty. In no event will the authors be held liable for any damages + arising from the use of this software. + + Permission is granted to anyone to use this software for any purpose, + including commercial applications, and to alter it and redistribute it + freely, subject to the following restrictions: + + 1. The origin of this software must not be misrepresented; you must not + claim that you wrote the original software. If you use this software + in a product, an acknowledgment in the product documentation would be + appreciated but is not required. + 2. Altered source versions must be plainly marked as such, and must not be + misrepresented as being the original software. + 3. This notice may not be removed or altered from any source distribution. + + Jean-loup Gailly Mark Adler + jloup@gzip.org madler@alumni.caltech.edu + +If you use the zlib library in a product, we would appreciate *not* +receiving lengthy legal documents to sign. The sources are provided +for free but without warranty of any kind. The library has been +entirely written by Jean-loup Gailly and Mark Adler; it does not +include third-party code. + +If you redistribute modified sources, we would appreciate that you include +in the file ChangeLog history information documenting your changes. Please +read the FAQ for more information on the distribution of modified source +versions. diff --git a/libkern/zlib/adler32.c b/libkern/zlib/adler32.c new file mode 100644 index 000000000..c94fde187 --- /dev/null +++ b/libkern/zlib/adler32.c @@ -0,0 +1,180 @@ +/* + * Copyright (c) 2008 Apple Inc. All rights reserved. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. The rights granted to you under the License + * may not be used to create, or enable the creation or redistribution of, + * unlawful or unlicensed copies of an Apple operating system, or to + * circumvent, violate, or enable the circumvention or violation of, any + * terms of an Apple operating system software license agreement. + * + * Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ + */ +/* adler32.c -- compute the Adler-32 checksum of a data stream + * Copyright (C) 1995-2004 Mark Adler + * For conditions of distribution and use, see copyright notice in zlib.h + */ + +/* @(#) $Id$ */ + +#define ZLIB_INTERNAL +#if KERNEL + #include +#else + #include "zlib.h" +#endif /* KERNEL */ + +#define BASE 65521UL /* largest prime smaller than 65536 */ +#define NMAX 5552 +/* NMAX is the largest n such that 255n(n+1)/2 + (n+1)(BASE-1) <= 2^32-1 */ + +#define DO1(buf,i) {adler += (buf)[i]; sum2 += adler;} +#define DO2(buf,i) DO1(buf,i); DO1(buf,i+1); +#define DO4(buf,i) DO2(buf,i); DO2(buf,i+2); +#define DO8(buf,i) DO4(buf,i); DO4(buf,i+4); +#define DO16(buf) DO8(buf,0); DO8(buf,8); + +/* use NO_DIVIDE if your processor does not do division in hardware */ +#ifdef NO_DIVIDE +# define MOD(a) \ + do { \ + if (a >= (BASE << 16)) a -= (BASE << 16); \ + if (a >= (BASE << 15)) a -= (BASE << 15); \ + if (a >= (BASE << 14)) a -= (BASE << 14); \ + if (a >= (BASE << 13)) a -= (BASE << 13); \ + if (a >= (BASE << 12)) a -= (BASE << 12); \ + if (a >= (BASE << 11)) a -= (BASE << 11); \ + if (a >= (BASE << 10)) a -= (BASE << 10); \ + if (a >= (BASE << 9)) a -= (BASE << 9); \ + if (a >= (BASE << 8)) a -= (BASE << 8); \ + if (a >= (BASE << 7)) a -= (BASE << 7); \ + if (a >= (BASE << 6)) a -= (BASE << 6); \ + if (a >= (BASE << 5)) a -= (BASE << 5); \ + if (a >= (BASE << 4)) a -= (BASE << 4); \ + if (a >= (BASE << 3)) a -= (BASE << 3); \ + if (a >= (BASE << 2)) a -= (BASE << 2); \ + if (a >= (BASE << 1)) a -= (BASE << 1); \ + if (a >= BASE) a -= BASE; \ + } while (0) +# define MOD4(a) \ + do { \ + if (a >= (BASE << 4)) a -= (BASE << 4); \ + if (a >= (BASE << 3)) a -= (BASE << 3); \ + if (a >= (BASE << 2)) a -= (BASE << 2); \ + if (a >= (BASE << 1)) a -= (BASE << 1); \ + if (a >= BASE) a -= BASE; \ + } while (0) +#else +# define MOD(a) a %= BASE +# define MOD4(a) a %= BASE +#endif + +/* ========================================================================= */ +uLong ZEXPORT adler32(adler, buf, len) + uLong adler; + const Bytef *buf; + uInt len; +{ + unsigned long sum2; + unsigned n; + + /* split Adler-32 into component sums */ + sum2 = (adler >> 16) & 0xffff; + adler &= 0xffff; + + /* in case user likes doing a byte at a time, keep it fast */ + if (len == 1) { + adler += buf[0]; + if (adler >= BASE) + adler -= BASE; + sum2 += adler; + if (sum2 >= BASE) + sum2 -= BASE; + return adler | (sum2 << 16); + } + + /* initial Adler-32 value (deferred check for len == 1 speed) */ + if (buf == Z_NULL) + return 1L; + + /* in case short lengths are provided, keep it somewhat fast */ + if (len < 16) { + while (len--) { + adler += *buf++; + sum2 += adler; + } + if (adler >= BASE) + adler -= BASE; + MOD4(sum2); /* only added so many BASE's */ + return adler | (sum2 << 16); + } + + /* do length NMAX blocks -- requires just one modulo operation */ + while (len >= NMAX) { + len -= NMAX; + n = NMAX / 16; /* NMAX is divisible by 16 */ + do { + DO16(buf); /* 16 sums unrolled */ + buf += 16; + } while (--n); + MOD(adler); + MOD(sum2); + } + + /* do remaining bytes (less than NMAX, still just one modulo) */ + if (len) { /* avoid modulos if none remaining */ + while (len >= 16) { + len -= 16; + DO16(buf); + buf += 16; + } + while (len--) { + adler += *buf++; + sum2 += adler; + } + MOD(adler); + MOD(sum2); + } + + /* return recombined sums */ + return adler | (sum2 << 16); +} + +/* ========================================================================= */ +uLong ZEXPORT adler32_combine(adler1, adler2, len2) + uLong adler1; + uLong adler2; + z_off_t len2; +{ + unsigned long sum1; + unsigned long sum2; + unsigned rem; + + /* the derivation of this formula is left as an exercise for the reader */ + rem = (unsigned)(len2 % BASE); + sum1 = adler1 & 0xffff; + sum2 = rem * sum1; + MOD(sum2); + sum1 += (adler2 & 0xffff) + BASE - 1; + sum2 += ((adler1 >> 16) & 0xffff) + ((adler2 >> 16) & 0xffff) + BASE - rem; + if (sum1 > BASE) sum1 -= BASE; + if (sum1 > BASE) sum1 -= BASE; + if (sum2 > (BASE << 1)) sum2 -= (BASE << 1); + if (sum2 > BASE) sum2 -= BASE; + return sum1 | (sum2 << 16); +} diff --git a/libkern/zlib/compress.c b/libkern/zlib/compress.c new file mode 100644 index 000000000..274008101 --- /dev/null +++ b/libkern/zlib/compress.c @@ -0,0 +1,110 @@ +/* + * Copyright (c) 2008 Apple Inc. All rights reserved. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. The rights granted to you under the License + * may not be used to create, or enable the creation or redistribution of, + * unlawful or unlicensed copies of an Apple operating system, or to + * circumvent, violate, or enable the circumvention or violation of, any + * terms of an Apple operating system software license agreement. + * + * Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ + */ +/* compress.c -- compress a memory buffer + * Copyright (C) 1995-2003 Jean-loup Gailly. + * For conditions of distribution and use, see copyright notice in zlib.h + */ + +/* @(#) $Id$ */ + +#define ZLIB_INTERNAL +#if KERNEL + #include +#else + #include "zlib.h" +#endif /* KERNEL */ + +/* =========================================================================== + Compresses the source buffer into the destination buffer. The level + parameter has the same meaning as in deflateInit. sourceLen is the byte + length of the source buffer. Upon entry, destLen is the total size of the + destination buffer, which must be at least 0.1% larger than sourceLen plus + 12 bytes. Upon exit, destLen is the actual size of the compressed buffer. + + compress2 returns Z_OK if success, Z_MEM_ERROR if there was not enough + memory, Z_BUF_ERROR if there was not enough room in the output buffer, + Z_STREAM_ERROR if the level parameter is invalid. +*/ +int ZEXPORT compress2 (dest, destLen, source, sourceLen, level) + Bytef *dest; + uLongf *destLen; + const Bytef *source; + uLong sourceLen; + int level; +{ + z_stream stream; + int err; + + stream.next_in = (Bytef*)source; + stream.avail_in = (uInt)sourceLen; +#ifdef MAXSEG_64K + /* Check for source > 64K on 16-bit machine: */ + if ((uLong)stream.avail_in != sourceLen) return Z_BUF_ERROR; +#endif + stream.next_out = dest; + stream.avail_out = (uInt)*destLen; + if ((uLong)stream.avail_out != *destLen) return Z_BUF_ERROR; + + stream.zalloc = (alloc_func)0; + stream.zfree = (free_func)0; + stream.opaque = (voidpf)0; + + err = deflateInit(&stream, level); + if (err != Z_OK) return err; + + err = deflate(&stream, Z_FINISH); + if (err != Z_STREAM_END) { + deflateEnd(&stream); + return err == Z_OK ? Z_BUF_ERROR : err; + } + *destLen = stream.total_out; + + err = deflateEnd(&stream); + return err; +} + +/* =========================================================================== + */ +int ZEXPORT compress (dest, destLen, source, sourceLen) + Bytef *dest; + uLongf *destLen; + const Bytef *source; + uLong sourceLen; +{ + return compress2(dest, destLen, source, sourceLen, Z_DEFAULT_COMPRESSION); +} + +/* =========================================================================== + If the default memLevel or windowBits for deflateInit() is changed, then + this function needs to be updated. + */ +uLong ZEXPORT compressBound (sourceLen) + uLong sourceLen; +{ + return sourceLen + (sourceLen >> 12) + (sourceLen >> 14) + 11; +} diff --git a/libkern/zlib/crc32.c b/libkern/zlib/crc32.c new file mode 100644 index 000000000..d707bdc5a --- /dev/null +++ b/libkern/zlib/crc32.c @@ -0,0 +1,450 @@ +/* + * Copyright (c) 2008 Apple Inc. All rights reserved. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. The rights granted to you under the License + * may not be used to create, or enable the creation or redistribution of, + * unlawful or unlicensed copies of an Apple operating system, or to + * circumvent, violate, or enable the circumvention or violation of, any + * terms of an Apple operating system software license agreement. + * + * Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ + */ +/* crc32.c -- compute the CRC-32 of a data stream + * Copyright (C) 1995-2005 Mark Adler + * For conditions of distribution and use, see copyright notice in zlib.h + * + * Thanks to Rodney Brown for his contribution of faster + * CRC methods: exclusive-oring 32 bits of data at a time, and pre-computing + * tables for updating the shift register in one step with three exclusive-ors + * instead of four steps with four exclusive-ors. This results in about a + * factor of two increase in speed on a Power PC G4 (PPC7455) using gcc -O3. + */ + +/* @(#) $Id$ */ + +/* + Note on the use of DYNAMIC_CRC_TABLE: there is no mutex or semaphore + protection on the static variables used to control the first-use generation + of the crc tables. Therefore, if you #define DYNAMIC_CRC_TABLE, you should + first call get_crc_table() to initialize the tables before allowing more than + one thread to use crc32(). + */ + +#ifdef MAKECRCH +# include +# ifndef DYNAMIC_CRC_TABLE +# define DYNAMIC_CRC_TABLE +# endif /* !DYNAMIC_CRC_TABLE */ +#endif /* MAKECRCH */ + +#include "zutil.h" /* for STDC and FAR definitions */ + +#define local static + +/* Find a four-byte integer type for crc32_little() and crc32_big(). */ +#ifndef NOBYFOUR +# ifdef STDC /* need ANSI C limits.h to determine sizes */ +# include +# define BYFOUR +# if (UINT_MAX == 0xffffffffUL) + typedef unsigned int u4; +# else +# if (ULONG_MAX == 0xffffffffUL) + typedef unsigned long u4; +# else +# if (USHRT_MAX == 0xffffffffUL) + typedef unsigned short u4; +# else +# undef BYFOUR /* can't find a four-byte integer type! */ +# endif +# endif +# endif +# endif /* STDC */ +#endif /* !NOBYFOUR */ + +/* Definitions for doing the crc four data bytes at a time. */ +#ifdef BYFOUR +# define REV(w) (((w)>>24)+(((w)>>8)&0xff00)+ \ + (((w)&0xff00)<<8)+(((w)&0xff)<<24)) + local unsigned long crc32_little OF((unsigned long, + const unsigned char FAR *, unsigned)); + local unsigned long crc32_big OF((unsigned long, + const unsigned char FAR *, unsigned)); +# define TBLS 8 +#else +# define TBLS 1 +#endif /* BYFOUR */ + +/* Local functions for crc concatenation */ +local unsigned long gf2_matrix_times OF((unsigned long *mat, + unsigned long vec)); +local void gf2_matrix_square OF((unsigned long *square, unsigned long *mat)); + +#ifdef DYNAMIC_CRC_TABLE + +local volatile int crc_table_empty = 1; +local unsigned long FAR crc_table[TBLS][256]; +local void make_crc_table OF((void)); +#ifdef MAKECRCH + local void write_table OF((FILE *, const unsigned long FAR *)); +#endif /* MAKECRCH */ +/* + Generate tables for a byte-wise 32-bit CRC calculation on the polynomial: + x^32+x^26+x^23+x^22+x^16+x^12+x^11+x^10+x^8+x^7+x^5+x^4+x^2+x+1. + + Polynomials over GF(2) are represented in binary, one bit per coefficient, + with the lowest powers in the most significant bit. Then adding polynomials + is just exclusive-or, and multiplying a polynomial by x is a right shift by + one. If we call the above polynomial p, and represent a byte as the + polynomial q, also with the lowest power in the most significant bit (so the + byte 0xb1 is the polynomial x^7+x^3+x+1), then the CRC is (q*x^32) mod p, + where a mod b means the remainder after dividing a by b. + + This calculation is done using the shift-register method of multiplying and + taking the remainder. The register is initialized to zero, and for each + incoming bit, x^32 is added mod p to the register if the bit is a one (where + x^32 mod p is p+x^32 = x^26+...+1), and the register is multiplied mod p by + x (which is shifting right by one and adding x^32 mod p if the bit shifted + out is a one). We start with the highest power (least significant bit) of + q and repeat for all eight bits of q. + + The first table is simply the CRC of all possible eight bit values. This is + all the information needed to generate CRCs on data a byte at a time for all + combinations of CRC register values and incoming bytes. The remaining tables + allow for word-at-a-time CRC calculation for both big-endian and little- + endian machines, where a word is four bytes. +*/ +local void make_crc_table() +{ + unsigned long c; + int n, k; + unsigned long poly; /* polynomial exclusive-or pattern */ + /* terms of polynomial defining this crc (except x^32): */ + static volatile int first = 1; /* flag to limit concurrent making */ + static const unsigned char p[] = {0,1,2,4,5,7,8,10,11,12,16,22,23,26}; + + /* See if another task is already doing this (not thread-safe, but better + than nothing -- significantly reduces duration of vulnerability in + case the advice about DYNAMIC_CRC_TABLE is ignored) */ + if (first) { + first = 0; + + /* make exclusive-or pattern from polynomial (0xedb88320UL) */ + poly = 0UL; + for (n = 0; n < sizeof(p)/sizeof(unsigned char); n++) + poly |= 1UL << (31 - p[n]); + + /* generate a crc for every 8-bit value */ + for (n = 0; n < 256; n++) { + c = (unsigned long)n; + for (k = 0; k < 8; k++) + c = c & 1 ? poly ^ (c >> 1) : c >> 1; + crc_table[0][n] = c; + } + +#ifdef BYFOUR + /* generate crc for each value followed by one, two, and three zeros, + and then the byte reversal of those as well as the first table */ + for (n = 0; n < 256; n++) { + c = crc_table[0][n]; + crc_table[4][n] = REV(c); + for (k = 1; k < 4; k++) { + c = crc_table[0][c & 0xff] ^ (c >> 8); + crc_table[k][n] = c; + crc_table[k + 4][n] = REV(c); + } + } +#endif /* BYFOUR */ + + crc_table_empty = 0; + } + else { /* not first */ + /* wait for the other guy to finish (not efficient, but rare) */ + while (crc_table_empty) + ; + } + +#ifdef MAKECRCH + /* write out CRC tables to crc32.h */ + { + FILE *out; + + out = fopen("crc32.h", "w"); + if (out == NULL) return; + fprintf(out, "/* crc32.h -- tables for rapid CRC calculation\n"); + fprintf(out, " * Generated automatically by crc32.c\n */\n\n"); + fprintf(out, "local const unsigned long FAR "); + fprintf(out, "crc_table[TBLS][256] =\n{\n {\n"); + write_table(out, crc_table[0]); +# ifdef BYFOUR + fprintf(out, "#ifdef BYFOUR\n"); + for (k = 1; k < 8; k++) { + fprintf(out, " },\n {\n"); + write_table(out, crc_table[k]); + } + fprintf(out, "#endif\n"); +# endif /* BYFOUR */ + fprintf(out, " }\n};\n"); + fclose(out); + } +#endif /* MAKECRCH */ +} + +#ifdef MAKECRCH +local void write_table(out, table) + FILE *out; + const unsigned long FAR *table; +{ + int n; + + for (n = 0; n < 256; n++) + fprintf(out, "%s0x%08lxUL%s", n % 5 ? "" : " ", table[n], + n == 255 ? "\n" : (n % 5 == 4 ? ",\n" : ", ")); +} +#endif /* MAKECRCH */ + +#else /* !DYNAMIC_CRC_TABLE */ +/* ======================================================================== + * Tables of CRC-32s of all single-byte values, made by make_crc_table(). + */ +#include "crc32.h" +#endif /* DYNAMIC_CRC_TABLE */ + +/* ========================================================================= + * This function can be used by asm versions of crc32() + */ +const unsigned long FAR * ZEXPORT get_crc_table() +{ +#ifdef DYNAMIC_CRC_TABLE + if (crc_table_empty) + make_crc_table(); +#endif /* DYNAMIC_CRC_TABLE */ + return (const unsigned long FAR *)crc_table; +} + +/* ========================================================================= */ +#define DO1 crc = crc_table[0][((int)crc ^ (*buf++)) & 0xff] ^ (crc >> 8) +#define DO8 DO1; DO1; DO1; DO1; DO1; DO1; DO1; DO1 + +/* ========================================================================= */ +unsigned long ZEXPORT z_crc32(crc, buf, len) + unsigned long crc; + const unsigned char FAR *buf; + unsigned len; +{ + if (buf == Z_NULL) return 0UL; + +#ifdef DYNAMIC_CRC_TABLE + if (crc_table_empty) + make_crc_table(); +#endif /* DYNAMIC_CRC_TABLE */ + +#ifdef BYFOUR + if (sizeof(void *) == sizeof(ptrdiff_t)) { + u4 endian; + + endian = 1; + if (*((unsigned char *)(&endian))) + return crc32_little(crc, buf, len); + else + return crc32_big(crc, buf, len); + } +#endif /* BYFOUR */ + crc = crc ^ 0xffffffffUL; + while (len >= 8) { + DO8; + len -= 8; + } + if (len) do { + DO1; + } while (--len); + return crc ^ 0xffffffffUL; +} + +#ifdef BYFOUR + +/* ========================================================================= */ +#define DOLIT4 c ^= *buf4++; \ + c = crc_table[3][c & 0xff] ^ crc_table[2][(c >> 8) & 0xff] ^ \ + crc_table[1][(c >> 16) & 0xff] ^ crc_table[0][c >> 24] +#define DOLIT32 DOLIT4; DOLIT4; DOLIT4; DOLIT4; DOLIT4; DOLIT4; DOLIT4; DOLIT4 + +/* ========================================================================= */ +local unsigned long crc32_little(crc, buf, len) + unsigned long crc; + const unsigned char FAR *buf; + unsigned len; +{ + register u4 c; + register const u4 FAR *buf4; + + c = (u4)crc; + c = ~c; + while (len && ((ptrdiff_t)buf & 3)) { + c = crc_table[0][(c ^ *buf++) & 0xff] ^ (c >> 8); + len--; + } + + buf4 = (const u4 FAR *)(const void FAR *)buf; + while (len >= 32) { + DOLIT32; + len -= 32; + } + while (len >= 4) { + DOLIT4; + len -= 4; + } + buf = (const unsigned char FAR *)buf4; + + if (len) do { + c = crc_table[0][(c ^ *buf++) & 0xff] ^ (c >> 8); + } while (--len); + c = ~c; + return (unsigned long)c; +} + +/* ========================================================================= */ +#define DOBIG4 c ^= *++buf4; \ + c = crc_table[4][c & 0xff] ^ crc_table[5][(c >> 8) & 0xff] ^ \ + crc_table[6][(c >> 16) & 0xff] ^ crc_table[7][c >> 24] +#define DOBIG32 DOBIG4; DOBIG4; DOBIG4; DOBIG4; DOBIG4; DOBIG4; DOBIG4; DOBIG4 + +/* ========================================================================= */ +local unsigned long crc32_big(crc, buf, len) + unsigned long crc; + const unsigned char FAR *buf; + unsigned len; +{ + register u4 c; + register const u4 FAR *buf4; + + c = REV((u4)crc); + c = ~c; + while (len && ((ptrdiff_t)buf & 3)) { + c = crc_table[4][(c >> 24) ^ *buf++] ^ (c << 8); + len--; + } + + buf4 = (const u4 FAR *)(const void FAR *)buf; + buf4--; + while (len >= 32) { + DOBIG32; + len -= 32; + } + while (len >= 4) { + DOBIG4; + len -= 4; + } + buf4++; + buf = (const unsigned char FAR *)buf4; + + if (len) do { + c = crc_table[4][(c >> 24) ^ *buf++] ^ (c << 8); + } while (--len); + c = ~c; + return (unsigned long)(REV(c)); +} + +#endif /* BYFOUR */ + +#define GF2_DIM 32 /* dimension of GF(2) vectors (length of CRC) */ + +/* ========================================================================= */ +local unsigned long gf2_matrix_times(mat, vec) + unsigned long *mat; + unsigned long vec; +{ + unsigned long sum; + + sum = 0; + while (vec) { + if (vec & 1) + sum ^= *mat; + vec >>= 1; + mat++; + } + return sum; +} + +/* ========================================================================= */ +local void gf2_matrix_square(square, mat) + unsigned long *square; + unsigned long *mat; +{ + int n; + + for (n = 0; n < GF2_DIM; n++) + square[n] = gf2_matrix_times(mat, mat[n]); +} + +/* ========================================================================= */ +uLong ZEXPORT z_crc32_combine(crc1, crc2, len2) + uLong crc1; + uLong crc2; + z_off_t len2; +{ + int n; + unsigned long row; + unsigned long even[GF2_DIM]; /* even-power-of-two zeros operator */ + unsigned long odd[GF2_DIM]; /* odd-power-of-two zeros operator */ + + /* degenerate case */ + if (len2 == 0) + return crc1; + + /* put operator for one zero bit in odd */ + odd[0] = 0xedb88320L; /* CRC-32 polynomial */ + row = 1; + for (n = 1; n < GF2_DIM; n++) { + odd[n] = row; + row <<= 1; + } + + /* put operator for two zero bits in even */ + gf2_matrix_square(even, odd); + + /* put operator for four zero bits in odd */ + gf2_matrix_square(odd, even); + + /* apply len2 zeros to crc1 (first square will put the operator for one + zero byte, eight zero bits, in even) */ + do { + /* apply zeros operator for this bit of len2 */ + gf2_matrix_square(even, odd); + if (len2 & 1) + crc1 = gf2_matrix_times(even, crc1); + len2 >>= 1; + + /* if no more bits set, then done */ + if (len2 == 0) + break; + + /* another iteration of the loop with odd and even swapped */ + gf2_matrix_square(odd, even); + if (len2 & 1) + crc1 = gf2_matrix_times(odd, crc1); + len2 >>= 1; + + /* if no more bits set, then done */ + } while (len2 != 0); + + /* return combined crc */ + crc1 ^= crc2; + return crc1; +} diff --git a/libkern/zlib/crc32.h b/libkern/zlib/crc32.h new file mode 100644 index 000000000..9b29c6d03 --- /dev/null +++ b/libkern/zlib/crc32.h @@ -0,0 +1,468 @@ +/* + * Copyright (c) 2008 Apple Inc. All rights reserved. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. The rights granted to you under the License + * may not be used to create, or enable the creation or redistribution of, + * unlawful or unlicensed copies of an Apple operating system, or to + * circumvent, violate, or enable the circumvention or violation of, any + * terms of an Apple operating system software license agreement. + * + * Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ + */ +/* crc32.h -- tables for rapid CRC calculation + * Generated automatically by crc32.c + */ + +local const unsigned long FAR crc_table[TBLS][256] = +{ + { + 0x00000000UL, 0x77073096UL, 0xee0e612cUL, 0x990951baUL, 0x076dc419UL, + 0x706af48fUL, 0xe963a535UL, 0x9e6495a3UL, 0x0edb8832UL, 0x79dcb8a4UL, + 0xe0d5e91eUL, 0x97d2d988UL, 0x09b64c2bUL, 0x7eb17cbdUL, 0xe7b82d07UL, + 0x90bf1d91UL, 0x1db71064UL, 0x6ab020f2UL, 0xf3b97148UL, 0x84be41deUL, + 0x1adad47dUL, 0x6ddde4ebUL, 0xf4d4b551UL, 0x83d385c7UL, 0x136c9856UL, + 0x646ba8c0UL, 0xfd62f97aUL, 0x8a65c9ecUL, 0x14015c4fUL, 0x63066cd9UL, + 0xfa0f3d63UL, 0x8d080df5UL, 0x3b6e20c8UL, 0x4c69105eUL, 0xd56041e4UL, + 0xa2677172UL, 0x3c03e4d1UL, 0x4b04d447UL, 0xd20d85fdUL, 0xa50ab56bUL, + 0x35b5a8faUL, 0x42b2986cUL, 0xdbbbc9d6UL, 0xacbcf940UL, 0x32d86ce3UL, + 0x45df5c75UL, 0xdcd60dcfUL, 0xabd13d59UL, 0x26d930acUL, 0x51de003aUL, + 0xc8d75180UL, 0xbfd06116UL, 0x21b4f4b5UL, 0x56b3c423UL, 0xcfba9599UL, + 0xb8bda50fUL, 0x2802b89eUL, 0x5f058808UL, 0xc60cd9b2UL, 0xb10be924UL, + 0x2f6f7c87UL, 0x58684c11UL, 0xc1611dabUL, 0xb6662d3dUL, 0x76dc4190UL, + 0x01db7106UL, 0x98d220bcUL, 0xefd5102aUL, 0x71b18589UL, 0x06b6b51fUL, + 0x9fbfe4a5UL, 0xe8b8d433UL, 0x7807c9a2UL, 0x0f00f934UL, 0x9609a88eUL, + 0xe10e9818UL, 0x7f6a0dbbUL, 0x086d3d2dUL, 0x91646c97UL, 0xe6635c01UL, + 0x6b6b51f4UL, 0x1c6c6162UL, 0x856530d8UL, 0xf262004eUL, 0x6c0695edUL, + 0x1b01a57bUL, 0x8208f4c1UL, 0xf50fc457UL, 0x65b0d9c6UL, 0x12b7e950UL, + 0x8bbeb8eaUL, 0xfcb9887cUL, 0x62dd1ddfUL, 0x15da2d49UL, 0x8cd37cf3UL, + 0xfbd44c65UL, 0x4db26158UL, 0x3ab551ceUL, 0xa3bc0074UL, 0xd4bb30e2UL, + 0x4adfa541UL, 0x3dd895d7UL, 0xa4d1c46dUL, 0xd3d6f4fbUL, 0x4369e96aUL, + 0x346ed9fcUL, 0xad678846UL, 0xda60b8d0UL, 0x44042d73UL, 0x33031de5UL, + 0xaa0a4c5fUL, 0xdd0d7cc9UL, 0x5005713cUL, 0x270241aaUL, 0xbe0b1010UL, + 0xc90c2086UL, 0x5768b525UL, 0x206f85b3UL, 0xb966d409UL, 0xce61e49fUL, + 0x5edef90eUL, 0x29d9c998UL, 0xb0d09822UL, 0xc7d7a8b4UL, 0x59b33d17UL, + 0x2eb40d81UL, 0xb7bd5c3bUL, 0xc0ba6cadUL, 0xedb88320UL, 0x9abfb3b6UL, + 0x03b6e20cUL, 0x74b1d29aUL, 0xead54739UL, 0x9dd277afUL, 0x04db2615UL, + 0x73dc1683UL, 0xe3630b12UL, 0x94643b84UL, 0x0d6d6a3eUL, 0x7a6a5aa8UL, + 0xe40ecf0bUL, 0x9309ff9dUL, 0x0a00ae27UL, 0x7d079eb1UL, 0xf00f9344UL, + 0x8708a3d2UL, 0x1e01f268UL, 0x6906c2feUL, 0xf762575dUL, 0x806567cbUL, + 0x196c3671UL, 0x6e6b06e7UL, 0xfed41b76UL, 0x89d32be0UL, 0x10da7a5aUL, + 0x67dd4accUL, 0xf9b9df6fUL, 0x8ebeeff9UL, 0x17b7be43UL, 0x60b08ed5UL, + 0xd6d6a3e8UL, 0xa1d1937eUL, 0x38d8c2c4UL, 0x4fdff252UL, 0xd1bb67f1UL, + 0xa6bc5767UL, 0x3fb506ddUL, 0x48b2364bUL, 0xd80d2bdaUL, 0xaf0a1b4cUL, + 0x36034af6UL, 0x41047a60UL, 0xdf60efc3UL, 0xa867df55UL, 0x316e8eefUL, + 0x4669be79UL, 0xcb61b38cUL, 0xbc66831aUL, 0x256fd2a0UL, 0x5268e236UL, + 0xcc0c7795UL, 0xbb0b4703UL, 0x220216b9UL, 0x5505262fUL, 0xc5ba3bbeUL, + 0xb2bd0b28UL, 0x2bb45a92UL, 0x5cb36a04UL, 0xc2d7ffa7UL, 0xb5d0cf31UL, + 0x2cd99e8bUL, 0x5bdeae1dUL, 0x9b64c2b0UL, 0xec63f226UL, 0x756aa39cUL, + 0x026d930aUL, 0x9c0906a9UL, 0xeb0e363fUL, 0x72076785UL, 0x05005713UL, + 0x95bf4a82UL, 0xe2b87a14UL, 0x7bb12baeUL, 0x0cb61b38UL, 0x92d28e9bUL, + 0xe5d5be0dUL, 0x7cdcefb7UL, 0x0bdbdf21UL, 0x86d3d2d4UL, 0xf1d4e242UL, + 0x68ddb3f8UL, 0x1fda836eUL, 0x81be16cdUL, 0xf6b9265bUL, 0x6fb077e1UL, + 0x18b74777UL, 0x88085ae6UL, 0xff0f6a70UL, 0x66063bcaUL, 0x11010b5cUL, + 0x8f659effUL, 0xf862ae69UL, 0x616bffd3UL, 0x166ccf45UL, 0xa00ae278UL, + 0xd70dd2eeUL, 0x4e048354UL, 0x3903b3c2UL, 0xa7672661UL, 0xd06016f7UL, + 0x4969474dUL, 0x3e6e77dbUL, 0xaed16a4aUL, 0xd9d65adcUL, 0x40df0b66UL, + 0x37d83bf0UL, 0xa9bcae53UL, 0xdebb9ec5UL, 0x47b2cf7fUL, 0x30b5ffe9UL, + 0xbdbdf21cUL, 0xcabac28aUL, 0x53b39330UL, 0x24b4a3a6UL, 0xbad03605UL, + 0xcdd70693UL, 0x54de5729UL, 0x23d967bfUL, 0xb3667a2eUL, 0xc4614ab8UL, + 0x5d681b02UL, 0x2a6f2b94UL, 0xb40bbe37UL, 0xc30c8ea1UL, 0x5a05df1bUL, + 0x2d02ef8dUL +#ifdef BYFOUR + }, + { + 0x00000000UL, 0x191b3141UL, 0x32366282UL, 0x2b2d53c3UL, 0x646cc504UL, + 0x7d77f445UL, 0x565aa786UL, 0x4f4196c7UL, 0xc8d98a08UL, 0xd1c2bb49UL, + 0xfaefe88aUL, 0xe3f4d9cbUL, 0xacb54f0cUL, 0xb5ae7e4dUL, 0x9e832d8eUL, + 0x87981ccfUL, 0x4ac21251UL, 0x53d92310UL, 0x78f470d3UL, 0x61ef4192UL, + 0x2eaed755UL, 0x37b5e614UL, 0x1c98b5d7UL, 0x05838496UL, 0x821b9859UL, + 0x9b00a918UL, 0xb02dfadbUL, 0xa936cb9aUL, 0xe6775d5dUL, 0xff6c6c1cUL, + 0xd4413fdfUL, 0xcd5a0e9eUL, 0x958424a2UL, 0x8c9f15e3UL, 0xa7b24620UL, + 0xbea97761UL, 0xf1e8e1a6UL, 0xe8f3d0e7UL, 0xc3de8324UL, 0xdac5b265UL, + 0x5d5daeaaUL, 0x44469febUL, 0x6f6bcc28UL, 0x7670fd69UL, 0x39316baeUL, + 0x202a5aefUL, 0x0b07092cUL, 0x121c386dUL, 0xdf4636f3UL, 0xc65d07b2UL, + 0xed705471UL, 0xf46b6530UL, 0xbb2af3f7UL, 0xa231c2b6UL, 0x891c9175UL, + 0x9007a034UL, 0x179fbcfbUL, 0x0e848dbaUL, 0x25a9de79UL, 0x3cb2ef38UL, + 0x73f379ffUL, 0x6ae848beUL, 0x41c51b7dUL, 0x58de2a3cUL, 0xf0794f05UL, + 0xe9627e44UL, 0xc24f2d87UL, 0xdb541cc6UL, 0x94158a01UL, 0x8d0ebb40UL, + 0xa623e883UL, 0xbf38d9c2UL, 0x38a0c50dUL, 0x21bbf44cUL, 0x0a96a78fUL, + 0x138d96ceUL, 0x5ccc0009UL, 0x45d73148UL, 0x6efa628bUL, 0x77e153caUL, + 0xbabb5d54UL, 0xa3a06c15UL, 0x888d3fd6UL, 0x91960e97UL, 0xded79850UL, + 0xc7cca911UL, 0xece1fad2UL, 0xf5facb93UL, 0x7262d75cUL, 0x6b79e61dUL, + 0x4054b5deUL, 0x594f849fUL, 0x160e1258UL, 0x0f152319UL, 0x243870daUL, + 0x3d23419bUL, 0x65fd6ba7UL, 0x7ce65ae6UL, 0x57cb0925UL, 0x4ed03864UL, + 0x0191aea3UL, 0x188a9fe2UL, 0x33a7cc21UL, 0x2abcfd60UL, 0xad24e1afUL, + 0xb43fd0eeUL, 0x9f12832dUL, 0x8609b26cUL, 0xc94824abUL, 0xd05315eaUL, + 0xfb7e4629UL, 0xe2657768UL, 0x2f3f79f6UL, 0x362448b7UL, 0x1d091b74UL, + 0x04122a35UL, 0x4b53bcf2UL, 0x52488db3UL, 0x7965de70UL, 0x607eef31UL, + 0xe7e6f3feUL, 0xfefdc2bfUL, 0xd5d0917cUL, 0xcccba03dUL, 0x838a36faUL, + 0x9a9107bbUL, 0xb1bc5478UL, 0xa8a76539UL, 0x3b83984bUL, 0x2298a90aUL, + 0x09b5fac9UL, 0x10aecb88UL, 0x5fef5d4fUL, 0x46f46c0eUL, 0x6dd93fcdUL, + 0x74c20e8cUL, 0xf35a1243UL, 0xea412302UL, 0xc16c70c1UL, 0xd8774180UL, + 0x9736d747UL, 0x8e2de606UL, 0xa500b5c5UL, 0xbc1b8484UL, 0x71418a1aUL, + 0x685abb5bUL, 0x4377e898UL, 0x5a6cd9d9UL, 0x152d4f1eUL, 0x0c367e5fUL, + 0x271b2d9cUL, 0x3e001cddUL, 0xb9980012UL, 0xa0833153UL, 0x8bae6290UL, + 0x92b553d1UL, 0xddf4c516UL, 0xc4eff457UL, 0xefc2a794UL, 0xf6d996d5UL, + 0xae07bce9UL, 0xb71c8da8UL, 0x9c31de6bUL, 0x852aef2aUL, 0xca6b79edUL, + 0xd37048acUL, 0xf85d1b6fUL, 0xe1462a2eUL, 0x66de36e1UL, 0x7fc507a0UL, + 0x54e85463UL, 0x4df36522UL, 0x02b2f3e5UL, 0x1ba9c2a4UL, 0x30849167UL, + 0x299fa026UL, 0xe4c5aeb8UL, 0xfdde9ff9UL, 0xd6f3cc3aUL, 0xcfe8fd7bUL, + 0x80a96bbcUL, 0x99b25afdUL, 0xb29f093eUL, 0xab84387fUL, 0x2c1c24b0UL, + 0x350715f1UL, 0x1e2a4632UL, 0x07317773UL, 0x4870e1b4UL, 0x516bd0f5UL, + 0x7a468336UL, 0x635db277UL, 0xcbfad74eUL, 0xd2e1e60fUL, 0xf9ccb5ccUL, + 0xe0d7848dUL, 0xaf96124aUL, 0xb68d230bUL, 0x9da070c8UL, 0x84bb4189UL, + 0x03235d46UL, 0x1a386c07UL, 0x31153fc4UL, 0x280e0e85UL, 0x674f9842UL, + 0x7e54a903UL, 0x5579fac0UL, 0x4c62cb81UL, 0x8138c51fUL, 0x9823f45eUL, + 0xb30ea79dUL, 0xaa1596dcUL, 0xe554001bUL, 0xfc4f315aUL, 0xd7626299UL, + 0xce7953d8UL, 0x49e14f17UL, 0x50fa7e56UL, 0x7bd72d95UL, 0x62cc1cd4UL, + 0x2d8d8a13UL, 0x3496bb52UL, 0x1fbbe891UL, 0x06a0d9d0UL, 0x5e7ef3ecUL, + 0x4765c2adUL, 0x6c48916eUL, 0x7553a02fUL, 0x3a1236e8UL, 0x230907a9UL, + 0x0824546aUL, 0x113f652bUL, 0x96a779e4UL, 0x8fbc48a5UL, 0xa4911b66UL, + 0xbd8a2a27UL, 0xf2cbbce0UL, 0xebd08da1UL, 0xc0fdde62UL, 0xd9e6ef23UL, + 0x14bce1bdUL, 0x0da7d0fcUL, 0x268a833fUL, 0x3f91b27eUL, 0x70d024b9UL, + 0x69cb15f8UL, 0x42e6463bUL, 0x5bfd777aUL, 0xdc656bb5UL, 0xc57e5af4UL, + 0xee530937UL, 0xf7483876UL, 0xb809aeb1UL, 0xa1129ff0UL, 0x8a3fcc33UL, + 0x9324fd72UL + }, + { + 0x00000000UL, 0x01c26a37UL, 0x0384d46eUL, 0x0246be59UL, 0x0709a8dcUL, + 0x06cbc2ebUL, 0x048d7cb2UL, 0x054f1685UL, 0x0e1351b8UL, 0x0fd13b8fUL, + 0x0d9785d6UL, 0x0c55efe1UL, 0x091af964UL, 0x08d89353UL, 0x0a9e2d0aUL, + 0x0b5c473dUL, 0x1c26a370UL, 0x1de4c947UL, 0x1fa2771eUL, 0x1e601d29UL, + 0x1b2f0bacUL, 0x1aed619bUL, 0x18abdfc2UL, 0x1969b5f5UL, 0x1235f2c8UL, + 0x13f798ffUL, 0x11b126a6UL, 0x10734c91UL, 0x153c5a14UL, 0x14fe3023UL, + 0x16b88e7aUL, 0x177ae44dUL, 0x384d46e0UL, 0x398f2cd7UL, 0x3bc9928eUL, + 0x3a0bf8b9UL, 0x3f44ee3cUL, 0x3e86840bUL, 0x3cc03a52UL, 0x3d025065UL, + 0x365e1758UL, 0x379c7d6fUL, 0x35dac336UL, 0x3418a901UL, 0x3157bf84UL, + 0x3095d5b3UL, 0x32d36beaUL, 0x331101ddUL, 0x246be590UL, 0x25a98fa7UL, + 0x27ef31feUL, 0x262d5bc9UL, 0x23624d4cUL, 0x22a0277bUL, 0x20e69922UL, + 0x2124f315UL, 0x2a78b428UL, 0x2bbade1fUL, 0x29fc6046UL, 0x283e0a71UL, + 0x2d711cf4UL, 0x2cb376c3UL, 0x2ef5c89aUL, 0x2f37a2adUL, 0x709a8dc0UL, + 0x7158e7f7UL, 0x731e59aeUL, 0x72dc3399UL, 0x7793251cUL, 0x76514f2bUL, + 0x7417f172UL, 0x75d59b45UL, 0x7e89dc78UL, 0x7f4bb64fUL, 0x7d0d0816UL, + 0x7ccf6221UL, 0x798074a4UL, 0x78421e93UL, 0x7a04a0caUL, 0x7bc6cafdUL, + 0x6cbc2eb0UL, 0x6d7e4487UL, 0x6f38fadeUL, 0x6efa90e9UL, 0x6bb5866cUL, + 0x6a77ec5bUL, 0x68315202UL, 0x69f33835UL, 0x62af7f08UL, 0x636d153fUL, + 0x612bab66UL, 0x60e9c151UL, 0x65a6d7d4UL, 0x6464bde3UL, 0x662203baUL, + 0x67e0698dUL, 0x48d7cb20UL, 0x4915a117UL, 0x4b531f4eUL, 0x4a917579UL, + 0x4fde63fcUL, 0x4e1c09cbUL, 0x4c5ab792UL, 0x4d98dda5UL, 0x46c49a98UL, + 0x4706f0afUL, 0x45404ef6UL, 0x448224c1UL, 0x41cd3244UL, 0x400f5873UL, + 0x4249e62aUL, 0x438b8c1dUL, 0x54f16850UL, 0x55330267UL, 0x5775bc3eUL, + 0x56b7d609UL, 0x53f8c08cUL, 0x523aaabbUL, 0x507c14e2UL, 0x51be7ed5UL, + 0x5ae239e8UL, 0x5b2053dfUL, 0x5966ed86UL, 0x58a487b1UL, 0x5deb9134UL, + 0x5c29fb03UL, 0x5e6f455aUL, 0x5fad2f6dUL, 0xe1351b80UL, 0xe0f771b7UL, + 0xe2b1cfeeUL, 0xe373a5d9UL, 0xe63cb35cUL, 0xe7fed96bUL, 0xe5b86732UL, + 0xe47a0d05UL, 0xef264a38UL, 0xeee4200fUL, 0xeca29e56UL, 0xed60f461UL, + 0xe82fe2e4UL, 0xe9ed88d3UL, 0xebab368aUL, 0xea695cbdUL, 0xfd13b8f0UL, + 0xfcd1d2c7UL, 0xfe976c9eUL, 0xff5506a9UL, 0xfa1a102cUL, 0xfbd87a1bUL, + 0xf99ec442UL, 0xf85cae75UL, 0xf300e948UL, 0xf2c2837fUL, 0xf0843d26UL, + 0xf1465711UL, 0xf4094194UL, 0xf5cb2ba3UL, 0xf78d95faUL, 0xf64fffcdUL, + 0xd9785d60UL, 0xd8ba3757UL, 0xdafc890eUL, 0xdb3ee339UL, 0xde71f5bcUL, + 0xdfb39f8bUL, 0xddf521d2UL, 0xdc374be5UL, 0xd76b0cd8UL, 0xd6a966efUL, + 0xd4efd8b6UL, 0xd52db281UL, 0xd062a404UL, 0xd1a0ce33UL, 0xd3e6706aUL, + 0xd2241a5dUL, 0xc55efe10UL, 0xc49c9427UL, 0xc6da2a7eUL, 0xc7184049UL, + 0xc25756ccUL, 0xc3953cfbUL, 0xc1d382a2UL, 0xc011e895UL, 0xcb4dafa8UL, + 0xca8fc59fUL, 0xc8c97bc6UL, 0xc90b11f1UL, 0xcc440774UL, 0xcd866d43UL, + 0xcfc0d31aUL, 0xce02b92dUL, 0x91af9640UL, 0x906dfc77UL, 0x922b422eUL, + 0x93e92819UL, 0x96a63e9cUL, 0x976454abUL, 0x9522eaf2UL, 0x94e080c5UL, + 0x9fbcc7f8UL, 0x9e7eadcfUL, 0x9c381396UL, 0x9dfa79a1UL, 0x98b56f24UL, + 0x99770513UL, 0x9b31bb4aUL, 0x9af3d17dUL, 0x8d893530UL, 0x8c4b5f07UL, + 0x8e0de15eUL, 0x8fcf8b69UL, 0x8a809decUL, 0x8b42f7dbUL, 0x89044982UL, + 0x88c623b5UL, 0x839a6488UL, 0x82580ebfUL, 0x801eb0e6UL, 0x81dcdad1UL, + 0x8493cc54UL, 0x8551a663UL, 0x8717183aUL, 0x86d5720dUL, 0xa9e2d0a0UL, + 0xa820ba97UL, 0xaa6604ceUL, 0xaba46ef9UL, 0xaeeb787cUL, 0xaf29124bUL, + 0xad6fac12UL, 0xacadc625UL, 0xa7f18118UL, 0xa633eb2fUL, 0xa4755576UL, + 0xa5b73f41UL, 0xa0f829c4UL, 0xa13a43f3UL, 0xa37cfdaaUL, 0xa2be979dUL, + 0xb5c473d0UL, 0xb40619e7UL, 0xb640a7beUL, 0xb782cd89UL, 0xb2cddb0cUL, + 0xb30fb13bUL, 0xb1490f62UL, 0xb08b6555UL, 0xbbd72268UL, 0xba15485fUL, + 0xb853f606UL, 0xb9919c31UL, 0xbcde8ab4UL, 0xbd1ce083UL, 0xbf5a5edaUL, + 0xbe9834edUL + }, + { + 0x00000000UL, 0xb8bc6765UL, 0xaa09c88bUL, 0x12b5afeeUL, 0x8f629757UL, + 0x37def032UL, 0x256b5fdcUL, 0x9dd738b9UL, 0xc5b428efUL, 0x7d084f8aUL, + 0x6fbde064UL, 0xd7018701UL, 0x4ad6bfb8UL, 0xf26ad8ddUL, 0xe0df7733UL, + 0x58631056UL, 0x5019579fUL, 0xe8a530faUL, 0xfa109f14UL, 0x42acf871UL, + 0xdf7bc0c8UL, 0x67c7a7adUL, 0x75720843UL, 0xcdce6f26UL, 0x95ad7f70UL, + 0x2d111815UL, 0x3fa4b7fbUL, 0x8718d09eUL, 0x1acfe827UL, 0xa2738f42UL, + 0xb0c620acUL, 0x087a47c9UL, 0xa032af3eUL, 0x188ec85bUL, 0x0a3b67b5UL, + 0xb28700d0UL, 0x2f503869UL, 0x97ec5f0cUL, 0x8559f0e2UL, 0x3de59787UL, + 0x658687d1UL, 0xdd3ae0b4UL, 0xcf8f4f5aUL, 0x7733283fUL, 0xeae41086UL, + 0x525877e3UL, 0x40edd80dUL, 0xf851bf68UL, 0xf02bf8a1UL, 0x48979fc4UL, + 0x5a22302aUL, 0xe29e574fUL, 0x7f496ff6UL, 0xc7f50893UL, 0xd540a77dUL, + 0x6dfcc018UL, 0x359fd04eUL, 0x8d23b72bUL, 0x9f9618c5UL, 0x272a7fa0UL, + 0xbafd4719UL, 0x0241207cUL, 0x10f48f92UL, 0xa848e8f7UL, 0x9b14583dUL, + 0x23a83f58UL, 0x311d90b6UL, 0x89a1f7d3UL, 0x1476cf6aUL, 0xaccaa80fUL, + 0xbe7f07e1UL, 0x06c36084UL, 0x5ea070d2UL, 0xe61c17b7UL, 0xf4a9b859UL, + 0x4c15df3cUL, 0xd1c2e785UL, 0x697e80e0UL, 0x7bcb2f0eUL, 0xc377486bUL, + 0xcb0d0fa2UL, 0x73b168c7UL, 0x6104c729UL, 0xd9b8a04cUL, 0x446f98f5UL, + 0xfcd3ff90UL, 0xee66507eUL, 0x56da371bUL, 0x0eb9274dUL, 0xb6054028UL, + 0xa4b0efc6UL, 0x1c0c88a3UL, 0x81dbb01aUL, 0x3967d77fUL, 0x2bd27891UL, + 0x936e1ff4UL, 0x3b26f703UL, 0x839a9066UL, 0x912f3f88UL, 0x299358edUL, + 0xb4446054UL, 0x0cf80731UL, 0x1e4da8dfUL, 0xa6f1cfbaUL, 0xfe92dfecUL, + 0x462eb889UL, 0x549b1767UL, 0xec277002UL, 0x71f048bbUL, 0xc94c2fdeUL, + 0xdbf98030UL, 0x6345e755UL, 0x6b3fa09cUL, 0xd383c7f9UL, 0xc1366817UL, + 0x798a0f72UL, 0xe45d37cbUL, 0x5ce150aeUL, 0x4e54ff40UL, 0xf6e89825UL, + 0xae8b8873UL, 0x1637ef16UL, 0x048240f8UL, 0xbc3e279dUL, 0x21e91f24UL, + 0x99557841UL, 0x8be0d7afUL, 0x335cb0caUL, 0xed59b63bUL, 0x55e5d15eUL, + 0x47507eb0UL, 0xffec19d5UL, 0x623b216cUL, 0xda874609UL, 0xc832e9e7UL, + 0x708e8e82UL, 0x28ed9ed4UL, 0x9051f9b1UL, 0x82e4565fUL, 0x3a58313aUL, + 0xa78f0983UL, 0x1f336ee6UL, 0x0d86c108UL, 0xb53aa66dUL, 0xbd40e1a4UL, + 0x05fc86c1UL, 0x1749292fUL, 0xaff54e4aUL, 0x322276f3UL, 0x8a9e1196UL, + 0x982bbe78UL, 0x2097d91dUL, 0x78f4c94bUL, 0xc048ae2eUL, 0xd2fd01c0UL, + 0x6a4166a5UL, 0xf7965e1cUL, 0x4f2a3979UL, 0x5d9f9697UL, 0xe523f1f2UL, + 0x4d6b1905UL, 0xf5d77e60UL, 0xe762d18eUL, 0x5fdeb6ebUL, 0xc2098e52UL, + 0x7ab5e937UL, 0x680046d9UL, 0xd0bc21bcUL, 0x88df31eaUL, 0x3063568fUL, + 0x22d6f961UL, 0x9a6a9e04UL, 0x07bda6bdUL, 0xbf01c1d8UL, 0xadb46e36UL, + 0x15080953UL, 0x1d724e9aUL, 0xa5ce29ffUL, 0xb77b8611UL, 0x0fc7e174UL, + 0x9210d9cdUL, 0x2aacbea8UL, 0x38191146UL, 0x80a57623UL, 0xd8c66675UL, + 0x607a0110UL, 0x72cfaefeUL, 0xca73c99bUL, 0x57a4f122UL, 0xef189647UL, + 0xfdad39a9UL, 0x45115eccUL, 0x764dee06UL, 0xcef18963UL, 0xdc44268dUL, + 0x64f841e8UL, 0xf92f7951UL, 0x41931e34UL, 0x5326b1daUL, 0xeb9ad6bfUL, + 0xb3f9c6e9UL, 0x0b45a18cUL, 0x19f00e62UL, 0xa14c6907UL, 0x3c9b51beUL, + 0x842736dbUL, 0x96929935UL, 0x2e2efe50UL, 0x2654b999UL, 0x9ee8defcUL, + 0x8c5d7112UL, 0x34e11677UL, 0xa9362eceUL, 0x118a49abUL, 0x033fe645UL, + 0xbb838120UL, 0xe3e09176UL, 0x5b5cf613UL, 0x49e959fdUL, 0xf1553e98UL, + 0x6c820621UL, 0xd43e6144UL, 0xc68bceaaUL, 0x7e37a9cfUL, 0xd67f4138UL, + 0x6ec3265dUL, 0x7c7689b3UL, 0xc4caeed6UL, 0x591dd66fUL, 0xe1a1b10aUL, + 0xf3141ee4UL, 0x4ba87981UL, 0x13cb69d7UL, 0xab770eb2UL, 0xb9c2a15cUL, + 0x017ec639UL, 0x9ca9fe80UL, 0x241599e5UL, 0x36a0360bUL, 0x8e1c516eUL, + 0x866616a7UL, 0x3eda71c2UL, 0x2c6fde2cUL, 0x94d3b949UL, 0x090481f0UL, + 0xb1b8e695UL, 0xa30d497bUL, 0x1bb12e1eUL, 0x43d23e48UL, 0xfb6e592dUL, + 0xe9dbf6c3UL, 0x516791a6UL, 0xccb0a91fUL, 0x740cce7aUL, 0x66b96194UL, + 0xde0506f1UL + }, + { + 0x00000000UL, 0x96300777UL, 0x2c610eeeUL, 0xba510999UL, 0x19c46d07UL, + 0x8ff46a70UL, 0x35a563e9UL, 0xa395649eUL, 0x3288db0eUL, 0xa4b8dc79UL, + 0x1ee9d5e0UL, 0x88d9d297UL, 0x2b4cb609UL, 0xbd7cb17eUL, 0x072db8e7UL, + 0x911dbf90UL, 0x6410b71dUL, 0xf220b06aUL, 0x4871b9f3UL, 0xde41be84UL, + 0x7dd4da1aUL, 0xebe4dd6dUL, 0x51b5d4f4UL, 0xc785d383UL, 0x56986c13UL, + 0xc0a86b64UL, 0x7af962fdUL, 0xecc9658aUL, 0x4f5c0114UL, 0xd96c0663UL, + 0x633d0ffaUL, 0xf50d088dUL, 0xc8206e3bUL, 0x5e10694cUL, 0xe44160d5UL, + 0x727167a2UL, 0xd1e4033cUL, 0x47d4044bUL, 0xfd850dd2UL, 0x6bb50aa5UL, + 0xfaa8b535UL, 0x6c98b242UL, 0xd6c9bbdbUL, 0x40f9bcacUL, 0xe36cd832UL, + 0x755cdf45UL, 0xcf0dd6dcUL, 0x593dd1abUL, 0xac30d926UL, 0x3a00de51UL, + 0x8051d7c8UL, 0x1661d0bfUL, 0xb5f4b421UL, 0x23c4b356UL, 0x9995bacfUL, + 0x0fa5bdb8UL, 0x9eb80228UL, 0x0888055fUL, 0xb2d90cc6UL, 0x24e90bb1UL, + 0x877c6f2fUL, 0x114c6858UL, 0xab1d61c1UL, 0x3d2d66b6UL, 0x9041dc76UL, + 0x0671db01UL, 0xbc20d298UL, 0x2a10d5efUL, 0x8985b171UL, 0x1fb5b606UL, + 0xa5e4bf9fUL, 0x33d4b8e8UL, 0xa2c90778UL, 0x34f9000fUL, 0x8ea80996UL, + 0x18980ee1UL, 0xbb0d6a7fUL, 0x2d3d6d08UL, 0x976c6491UL, 0x015c63e6UL, + 0xf4516b6bUL, 0x62616c1cUL, 0xd8306585UL, 0x4e0062f2UL, 0xed95066cUL, + 0x7ba5011bUL, 0xc1f40882UL, 0x57c40ff5UL, 0xc6d9b065UL, 0x50e9b712UL, + 0xeab8be8bUL, 0x7c88b9fcUL, 0xdf1ddd62UL, 0x492dda15UL, 0xf37cd38cUL, + 0x654cd4fbUL, 0x5861b24dUL, 0xce51b53aUL, 0x7400bca3UL, 0xe230bbd4UL, + 0x41a5df4aUL, 0xd795d83dUL, 0x6dc4d1a4UL, 0xfbf4d6d3UL, 0x6ae96943UL, + 0xfcd96e34UL, 0x468867adUL, 0xd0b860daUL, 0x732d0444UL, 0xe51d0333UL, + 0x5f4c0aaaUL, 0xc97c0dddUL, 0x3c710550UL, 0xaa410227UL, 0x10100bbeUL, + 0x86200cc9UL, 0x25b56857UL, 0xb3856f20UL, 0x09d466b9UL, 0x9fe461ceUL, + 0x0ef9de5eUL, 0x98c9d929UL, 0x2298d0b0UL, 0xb4a8d7c7UL, 0x173db359UL, + 0x810db42eUL, 0x3b5cbdb7UL, 0xad6cbac0UL, 0x2083b8edUL, 0xb6b3bf9aUL, + 0x0ce2b603UL, 0x9ad2b174UL, 0x3947d5eaUL, 0xaf77d29dUL, 0x1526db04UL, + 0x8316dc73UL, 0x120b63e3UL, 0x843b6494UL, 0x3e6a6d0dUL, 0xa85a6a7aUL, + 0x0bcf0ee4UL, 0x9dff0993UL, 0x27ae000aUL, 0xb19e077dUL, 0x44930ff0UL, + 0xd2a30887UL, 0x68f2011eUL, 0xfec20669UL, 0x5d5762f7UL, 0xcb676580UL, + 0x71366c19UL, 0xe7066b6eUL, 0x761bd4feUL, 0xe02bd389UL, 0x5a7ada10UL, + 0xcc4add67UL, 0x6fdfb9f9UL, 0xf9efbe8eUL, 0x43beb717UL, 0xd58eb060UL, + 0xe8a3d6d6UL, 0x7e93d1a1UL, 0xc4c2d838UL, 0x52f2df4fUL, 0xf167bbd1UL, + 0x6757bca6UL, 0xdd06b53fUL, 0x4b36b248UL, 0xda2b0dd8UL, 0x4c1b0aafUL, + 0xf64a0336UL, 0x607a0441UL, 0xc3ef60dfUL, 0x55df67a8UL, 0xef8e6e31UL, + 0x79be6946UL, 0x8cb361cbUL, 0x1a8366bcUL, 0xa0d26f25UL, 0x36e26852UL, + 0x95770cccUL, 0x03470bbbUL, 0xb9160222UL, 0x2f260555UL, 0xbe3bbac5UL, + 0x280bbdb2UL, 0x925ab42bUL, 0x046ab35cUL, 0xa7ffd7c2UL, 0x31cfd0b5UL, + 0x8b9ed92cUL, 0x1daede5bUL, 0xb0c2649bUL, 0x26f263ecUL, 0x9ca36a75UL, + 0x0a936d02UL, 0xa906099cUL, 0x3f360eebUL, 0x85670772UL, 0x13570005UL, + 0x824abf95UL, 0x147ab8e2UL, 0xae2bb17bUL, 0x381bb60cUL, 0x9b8ed292UL, + 0x0dbed5e5UL, 0xb7efdc7cUL, 0x21dfdb0bUL, 0xd4d2d386UL, 0x42e2d4f1UL, + 0xf8b3dd68UL, 0x6e83da1fUL, 0xcd16be81UL, 0x5b26b9f6UL, 0xe177b06fUL, + 0x7747b718UL, 0xe65a0888UL, 0x706a0fffUL, 0xca3b0666UL, 0x5c0b0111UL, + 0xff9e658fUL, 0x69ae62f8UL, 0xd3ff6b61UL, 0x45cf6c16UL, 0x78e20aa0UL, + 0xeed20dd7UL, 0x5483044eUL, 0xc2b30339UL, 0x612667a7UL, 0xf71660d0UL, + 0x4d476949UL, 0xdb776e3eUL, 0x4a6ad1aeUL, 0xdc5ad6d9UL, 0x660bdf40UL, + 0xf03bd837UL, 0x53aebca9UL, 0xc59ebbdeUL, 0x7fcfb247UL, 0xe9ffb530UL, + 0x1cf2bdbdUL, 0x8ac2bacaUL, 0x3093b353UL, 0xa6a3b424UL, 0x0536d0baUL, + 0x9306d7cdUL, 0x2957de54UL, 0xbf67d923UL, 0x2e7a66b3UL, 0xb84a61c4UL, + 0x021b685dUL, 0x942b6f2aUL, 0x37be0bb4UL, 0xa18e0cc3UL, 0x1bdf055aUL, + 0x8def022dUL + }, + { + 0x00000000UL, 0x41311b19UL, 0x82623632UL, 0xc3532d2bUL, 0x04c56c64UL, + 0x45f4777dUL, 0x86a75a56UL, 0xc796414fUL, 0x088ad9c8UL, 0x49bbc2d1UL, + 0x8ae8effaUL, 0xcbd9f4e3UL, 0x0c4fb5acUL, 0x4d7eaeb5UL, 0x8e2d839eUL, + 0xcf1c9887UL, 0x5112c24aUL, 0x1023d953UL, 0xd370f478UL, 0x9241ef61UL, + 0x55d7ae2eUL, 0x14e6b537UL, 0xd7b5981cUL, 0x96848305UL, 0x59981b82UL, + 0x18a9009bUL, 0xdbfa2db0UL, 0x9acb36a9UL, 0x5d5d77e6UL, 0x1c6c6cffUL, + 0xdf3f41d4UL, 0x9e0e5acdUL, 0xa2248495UL, 0xe3159f8cUL, 0x2046b2a7UL, + 0x6177a9beUL, 0xa6e1e8f1UL, 0xe7d0f3e8UL, 0x2483dec3UL, 0x65b2c5daUL, + 0xaaae5d5dUL, 0xeb9f4644UL, 0x28cc6b6fUL, 0x69fd7076UL, 0xae6b3139UL, + 0xef5a2a20UL, 0x2c09070bUL, 0x6d381c12UL, 0xf33646dfUL, 0xb2075dc6UL, + 0x715470edUL, 0x30656bf4UL, 0xf7f32abbUL, 0xb6c231a2UL, 0x75911c89UL, + 0x34a00790UL, 0xfbbc9f17UL, 0xba8d840eUL, 0x79dea925UL, 0x38efb23cUL, + 0xff79f373UL, 0xbe48e86aUL, 0x7d1bc541UL, 0x3c2ade58UL, 0x054f79f0UL, + 0x447e62e9UL, 0x872d4fc2UL, 0xc61c54dbUL, 0x018a1594UL, 0x40bb0e8dUL, + 0x83e823a6UL, 0xc2d938bfUL, 0x0dc5a038UL, 0x4cf4bb21UL, 0x8fa7960aUL, + 0xce968d13UL, 0x0900cc5cUL, 0x4831d745UL, 0x8b62fa6eUL, 0xca53e177UL, + 0x545dbbbaUL, 0x156ca0a3UL, 0xd63f8d88UL, 0x970e9691UL, 0x5098d7deUL, + 0x11a9ccc7UL, 0xd2fae1ecUL, 0x93cbfaf5UL, 0x5cd76272UL, 0x1de6796bUL, + 0xdeb55440UL, 0x9f844f59UL, 0x58120e16UL, 0x1923150fUL, 0xda703824UL, + 0x9b41233dUL, 0xa76bfd65UL, 0xe65ae67cUL, 0x2509cb57UL, 0x6438d04eUL, + 0xa3ae9101UL, 0xe29f8a18UL, 0x21cca733UL, 0x60fdbc2aUL, 0xafe124adUL, + 0xeed03fb4UL, 0x2d83129fUL, 0x6cb20986UL, 0xab2448c9UL, 0xea1553d0UL, + 0x29467efbUL, 0x687765e2UL, 0xf6793f2fUL, 0xb7482436UL, 0x741b091dUL, + 0x352a1204UL, 0xf2bc534bUL, 0xb38d4852UL, 0x70de6579UL, 0x31ef7e60UL, + 0xfef3e6e7UL, 0xbfc2fdfeUL, 0x7c91d0d5UL, 0x3da0cbccUL, 0xfa368a83UL, + 0xbb07919aUL, 0x7854bcb1UL, 0x3965a7a8UL, 0x4b98833bUL, 0x0aa99822UL, + 0xc9fab509UL, 0x88cbae10UL, 0x4f5def5fUL, 0x0e6cf446UL, 0xcd3fd96dUL, + 0x8c0ec274UL, 0x43125af3UL, 0x022341eaUL, 0xc1706cc1UL, 0x804177d8UL, + 0x47d73697UL, 0x06e62d8eUL, 0xc5b500a5UL, 0x84841bbcUL, 0x1a8a4171UL, + 0x5bbb5a68UL, 0x98e87743UL, 0xd9d96c5aUL, 0x1e4f2d15UL, 0x5f7e360cUL, + 0x9c2d1b27UL, 0xdd1c003eUL, 0x120098b9UL, 0x533183a0UL, 0x9062ae8bUL, + 0xd153b592UL, 0x16c5f4ddUL, 0x57f4efc4UL, 0x94a7c2efUL, 0xd596d9f6UL, + 0xe9bc07aeUL, 0xa88d1cb7UL, 0x6bde319cUL, 0x2aef2a85UL, 0xed796bcaUL, + 0xac4870d3UL, 0x6f1b5df8UL, 0x2e2a46e1UL, 0xe136de66UL, 0xa007c57fUL, + 0x6354e854UL, 0x2265f34dUL, 0xe5f3b202UL, 0xa4c2a91bUL, 0x67918430UL, + 0x26a09f29UL, 0xb8aec5e4UL, 0xf99fdefdUL, 0x3accf3d6UL, 0x7bfde8cfUL, + 0xbc6ba980UL, 0xfd5ab299UL, 0x3e099fb2UL, 0x7f3884abUL, 0xb0241c2cUL, + 0xf1150735UL, 0x32462a1eUL, 0x73773107UL, 0xb4e17048UL, 0xf5d06b51UL, + 0x3683467aUL, 0x77b25d63UL, 0x4ed7facbUL, 0x0fe6e1d2UL, 0xccb5ccf9UL, + 0x8d84d7e0UL, 0x4a1296afUL, 0x0b238db6UL, 0xc870a09dUL, 0x8941bb84UL, + 0x465d2303UL, 0x076c381aUL, 0xc43f1531UL, 0x850e0e28UL, 0x42984f67UL, + 0x03a9547eUL, 0xc0fa7955UL, 0x81cb624cUL, 0x1fc53881UL, 0x5ef42398UL, + 0x9da70eb3UL, 0xdc9615aaUL, 0x1b0054e5UL, 0x5a314ffcUL, 0x996262d7UL, + 0xd85379ceUL, 0x174fe149UL, 0x567efa50UL, 0x952dd77bUL, 0xd41ccc62UL, + 0x138a8d2dUL, 0x52bb9634UL, 0x91e8bb1fUL, 0xd0d9a006UL, 0xecf37e5eUL, + 0xadc26547UL, 0x6e91486cUL, 0x2fa05375UL, 0xe836123aUL, 0xa9070923UL, + 0x6a542408UL, 0x2b653f11UL, 0xe479a796UL, 0xa548bc8fUL, 0x661b91a4UL, + 0x272a8abdUL, 0xe0bccbf2UL, 0xa18dd0ebUL, 0x62defdc0UL, 0x23efe6d9UL, + 0xbde1bc14UL, 0xfcd0a70dUL, 0x3f838a26UL, 0x7eb2913fUL, 0xb924d070UL, + 0xf815cb69UL, 0x3b46e642UL, 0x7a77fd5bUL, 0xb56b65dcUL, 0xf45a7ec5UL, + 0x370953eeUL, 0x763848f7UL, 0xb1ae09b8UL, 0xf09f12a1UL, 0x33cc3f8aUL, + 0x72fd2493UL + }, + { + 0x00000000UL, 0x376ac201UL, 0x6ed48403UL, 0x59be4602UL, 0xdca80907UL, + 0xebc2cb06UL, 0xb27c8d04UL, 0x85164f05UL, 0xb851130eUL, 0x8f3bd10fUL, + 0xd685970dUL, 0xe1ef550cUL, 0x64f91a09UL, 0x5393d808UL, 0x0a2d9e0aUL, + 0x3d475c0bUL, 0x70a3261cUL, 0x47c9e41dUL, 0x1e77a21fUL, 0x291d601eUL, + 0xac0b2f1bUL, 0x9b61ed1aUL, 0xc2dfab18UL, 0xf5b56919UL, 0xc8f23512UL, + 0xff98f713UL, 0xa626b111UL, 0x914c7310UL, 0x145a3c15UL, 0x2330fe14UL, + 0x7a8eb816UL, 0x4de47a17UL, 0xe0464d38UL, 0xd72c8f39UL, 0x8e92c93bUL, + 0xb9f80b3aUL, 0x3cee443fUL, 0x0b84863eUL, 0x523ac03cUL, 0x6550023dUL, + 0x58175e36UL, 0x6f7d9c37UL, 0x36c3da35UL, 0x01a91834UL, 0x84bf5731UL, + 0xb3d59530UL, 0xea6bd332UL, 0xdd011133UL, 0x90e56b24UL, 0xa78fa925UL, + 0xfe31ef27UL, 0xc95b2d26UL, 0x4c4d6223UL, 0x7b27a022UL, 0x2299e620UL, + 0x15f32421UL, 0x28b4782aUL, 0x1fdeba2bUL, 0x4660fc29UL, 0x710a3e28UL, + 0xf41c712dUL, 0xc376b32cUL, 0x9ac8f52eUL, 0xada2372fUL, 0xc08d9a70UL, + 0xf7e75871UL, 0xae591e73UL, 0x9933dc72UL, 0x1c259377UL, 0x2b4f5176UL, + 0x72f11774UL, 0x459bd575UL, 0x78dc897eUL, 0x4fb64b7fUL, 0x16080d7dUL, + 0x2162cf7cUL, 0xa4748079UL, 0x931e4278UL, 0xcaa0047aUL, 0xfdcac67bUL, + 0xb02ebc6cUL, 0x87447e6dUL, 0xdefa386fUL, 0xe990fa6eUL, 0x6c86b56bUL, + 0x5bec776aUL, 0x02523168UL, 0x3538f369UL, 0x087faf62UL, 0x3f156d63UL, + 0x66ab2b61UL, 0x51c1e960UL, 0xd4d7a665UL, 0xe3bd6464UL, 0xba032266UL, + 0x8d69e067UL, 0x20cbd748UL, 0x17a11549UL, 0x4e1f534bUL, 0x7975914aUL, + 0xfc63de4fUL, 0xcb091c4eUL, 0x92b75a4cUL, 0xa5dd984dUL, 0x989ac446UL, + 0xaff00647UL, 0xf64e4045UL, 0xc1248244UL, 0x4432cd41UL, 0x73580f40UL, + 0x2ae64942UL, 0x1d8c8b43UL, 0x5068f154UL, 0x67023355UL, 0x3ebc7557UL, + 0x09d6b756UL, 0x8cc0f853UL, 0xbbaa3a52UL, 0xe2147c50UL, 0xd57ebe51UL, + 0xe839e25aUL, 0xdf53205bUL, 0x86ed6659UL, 0xb187a458UL, 0x3491eb5dUL, + 0x03fb295cUL, 0x5a456f5eUL, 0x6d2fad5fUL, 0x801b35e1UL, 0xb771f7e0UL, + 0xeecfb1e2UL, 0xd9a573e3UL, 0x5cb33ce6UL, 0x6bd9fee7UL, 0x3267b8e5UL, + 0x050d7ae4UL, 0x384a26efUL, 0x0f20e4eeUL, 0x569ea2ecUL, 0x61f460edUL, + 0xe4e22fe8UL, 0xd388ede9UL, 0x8a36abebUL, 0xbd5c69eaUL, 0xf0b813fdUL, + 0xc7d2d1fcUL, 0x9e6c97feUL, 0xa90655ffUL, 0x2c101afaUL, 0x1b7ad8fbUL, + 0x42c49ef9UL, 0x75ae5cf8UL, 0x48e900f3UL, 0x7f83c2f2UL, 0x263d84f0UL, + 0x115746f1UL, 0x944109f4UL, 0xa32bcbf5UL, 0xfa958df7UL, 0xcdff4ff6UL, + 0x605d78d9UL, 0x5737bad8UL, 0x0e89fcdaUL, 0x39e33edbUL, 0xbcf571deUL, + 0x8b9fb3dfUL, 0xd221f5ddUL, 0xe54b37dcUL, 0xd80c6bd7UL, 0xef66a9d6UL, + 0xb6d8efd4UL, 0x81b22dd5UL, 0x04a462d0UL, 0x33cea0d1UL, 0x6a70e6d3UL, + 0x5d1a24d2UL, 0x10fe5ec5UL, 0x27949cc4UL, 0x7e2adac6UL, 0x494018c7UL, + 0xcc5657c2UL, 0xfb3c95c3UL, 0xa282d3c1UL, 0x95e811c0UL, 0xa8af4dcbUL, + 0x9fc58fcaUL, 0xc67bc9c8UL, 0xf1110bc9UL, 0x740744ccUL, 0x436d86cdUL, + 0x1ad3c0cfUL, 0x2db902ceUL, 0x4096af91UL, 0x77fc6d90UL, 0x2e422b92UL, + 0x1928e993UL, 0x9c3ea696UL, 0xab546497UL, 0xf2ea2295UL, 0xc580e094UL, + 0xf8c7bc9fUL, 0xcfad7e9eUL, 0x9613389cUL, 0xa179fa9dUL, 0x246fb598UL, + 0x13057799UL, 0x4abb319bUL, 0x7dd1f39aUL, 0x3035898dUL, 0x075f4b8cUL, + 0x5ee10d8eUL, 0x698bcf8fUL, 0xec9d808aUL, 0xdbf7428bUL, 0x82490489UL, + 0xb523c688UL, 0x88649a83UL, 0xbf0e5882UL, 0xe6b01e80UL, 0xd1dadc81UL, + 0x54cc9384UL, 0x63a65185UL, 0x3a181787UL, 0x0d72d586UL, 0xa0d0e2a9UL, + 0x97ba20a8UL, 0xce0466aaUL, 0xf96ea4abUL, 0x7c78ebaeUL, 0x4b1229afUL, + 0x12ac6fadUL, 0x25c6adacUL, 0x1881f1a7UL, 0x2feb33a6UL, 0x765575a4UL, + 0x413fb7a5UL, 0xc429f8a0UL, 0xf3433aa1UL, 0xaafd7ca3UL, 0x9d97bea2UL, + 0xd073c4b5UL, 0xe71906b4UL, 0xbea740b6UL, 0x89cd82b7UL, 0x0cdbcdb2UL, + 0x3bb10fb3UL, 0x620f49b1UL, 0x55658bb0UL, 0x6822d7bbUL, 0x5f4815baUL, + 0x06f653b8UL, 0x319c91b9UL, 0xb48adebcUL, 0x83e01cbdUL, 0xda5e5abfUL, + 0xed3498beUL + }, + { + 0x00000000UL, 0x6567bcb8UL, 0x8bc809aaUL, 0xeeafb512UL, 0x5797628fUL, + 0x32f0de37UL, 0xdc5f6b25UL, 0xb938d79dUL, 0xef28b4c5UL, 0x8a4f087dUL, + 0x64e0bd6fUL, 0x018701d7UL, 0xb8bfd64aUL, 0xddd86af2UL, 0x3377dfe0UL, + 0x56106358UL, 0x9f571950UL, 0xfa30a5e8UL, 0x149f10faUL, 0x71f8ac42UL, + 0xc8c07bdfUL, 0xada7c767UL, 0x43087275UL, 0x266fcecdUL, 0x707fad95UL, + 0x1518112dUL, 0xfbb7a43fUL, 0x9ed01887UL, 0x27e8cf1aUL, 0x428f73a2UL, + 0xac20c6b0UL, 0xc9477a08UL, 0x3eaf32a0UL, 0x5bc88e18UL, 0xb5673b0aUL, + 0xd00087b2UL, 0x6938502fUL, 0x0c5fec97UL, 0xe2f05985UL, 0x8797e53dUL, + 0xd1878665UL, 0xb4e03addUL, 0x5a4f8fcfUL, 0x3f283377UL, 0x8610e4eaUL, + 0xe3775852UL, 0x0dd8ed40UL, 0x68bf51f8UL, 0xa1f82bf0UL, 0xc49f9748UL, + 0x2a30225aUL, 0x4f579ee2UL, 0xf66f497fUL, 0x9308f5c7UL, 0x7da740d5UL, + 0x18c0fc6dUL, 0x4ed09f35UL, 0x2bb7238dUL, 0xc518969fUL, 0xa07f2a27UL, + 0x1947fdbaUL, 0x7c204102UL, 0x928ff410UL, 0xf7e848a8UL, 0x3d58149bUL, + 0x583fa823UL, 0xb6901d31UL, 0xd3f7a189UL, 0x6acf7614UL, 0x0fa8caacUL, + 0xe1077fbeUL, 0x8460c306UL, 0xd270a05eUL, 0xb7171ce6UL, 0x59b8a9f4UL, + 0x3cdf154cUL, 0x85e7c2d1UL, 0xe0807e69UL, 0x0e2fcb7bUL, 0x6b4877c3UL, + 0xa20f0dcbUL, 0xc768b173UL, 0x29c70461UL, 0x4ca0b8d9UL, 0xf5986f44UL, + 0x90ffd3fcUL, 0x7e5066eeUL, 0x1b37da56UL, 0x4d27b90eUL, 0x284005b6UL, + 0xc6efb0a4UL, 0xa3880c1cUL, 0x1ab0db81UL, 0x7fd76739UL, 0x9178d22bUL, + 0xf41f6e93UL, 0x03f7263bUL, 0x66909a83UL, 0x883f2f91UL, 0xed589329UL, + 0x546044b4UL, 0x3107f80cUL, 0xdfa84d1eUL, 0xbacff1a6UL, 0xecdf92feUL, + 0x89b82e46UL, 0x67179b54UL, 0x027027ecUL, 0xbb48f071UL, 0xde2f4cc9UL, + 0x3080f9dbUL, 0x55e74563UL, 0x9ca03f6bUL, 0xf9c783d3UL, 0x176836c1UL, + 0x720f8a79UL, 0xcb375de4UL, 0xae50e15cUL, 0x40ff544eUL, 0x2598e8f6UL, + 0x73888baeUL, 0x16ef3716UL, 0xf8408204UL, 0x9d273ebcUL, 0x241fe921UL, + 0x41785599UL, 0xafd7e08bUL, 0xcab05c33UL, 0x3bb659edUL, 0x5ed1e555UL, + 0xb07e5047UL, 0xd519ecffUL, 0x6c213b62UL, 0x094687daUL, 0xe7e932c8UL, + 0x828e8e70UL, 0xd49eed28UL, 0xb1f95190UL, 0x5f56e482UL, 0x3a31583aUL, + 0x83098fa7UL, 0xe66e331fUL, 0x08c1860dUL, 0x6da63ab5UL, 0xa4e140bdUL, + 0xc186fc05UL, 0x2f294917UL, 0x4a4ef5afUL, 0xf3762232UL, 0x96119e8aUL, + 0x78be2b98UL, 0x1dd99720UL, 0x4bc9f478UL, 0x2eae48c0UL, 0xc001fdd2UL, + 0xa566416aUL, 0x1c5e96f7UL, 0x79392a4fUL, 0x97969f5dUL, 0xf2f123e5UL, + 0x05196b4dUL, 0x607ed7f5UL, 0x8ed162e7UL, 0xebb6de5fUL, 0x528e09c2UL, + 0x37e9b57aUL, 0xd9460068UL, 0xbc21bcd0UL, 0xea31df88UL, 0x8f566330UL, + 0x61f9d622UL, 0x049e6a9aUL, 0xbda6bd07UL, 0xd8c101bfUL, 0x366eb4adUL, + 0x53090815UL, 0x9a4e721dUL, 0xff29cea5UL, 0x11867bb7UL, 0x74e1c70fUL, + 0xcdd91092UL, 0xa8beac2aUL, 0x46111938UL, 0x2376a580UL, 0x7566c6d8UL, + 0x10017a60UL, 0xfeaecf72UL, 0x9bc973caUL, 0x22f1a457UL, 0x479618efUL, + 0xa939adfdUL, 0xcc5e1145UL, 0x06ee4d76UL, 0x6389f1ceUL, 0x8d2644dcUL, + 0xe841f864UL, 0x51792ff9UL, 0x341e9341UL, 0xdab12653UL, 0xbfd69aebUL, + 0xe9c6f9b3UL, 0x8ca1450bUL, 0x620ef019UL, 0x07694ca1UL, 0xbe519b3cUL, + 0xdb362784UL, 0x35999296UL, 0x50fe2e2eUL, 0x99b95426UL, 0xfcdee89eUL, + 0x12715d8cUL, 0x7716e134UL, 0xce2e36a9UL, 0xab498a11UL, 0x45e63f03UL, + 0x208183bbUL, 0x7691e0e3UL, 0x13f65c5bUL, 0xfd59e949UL, 0x983e55f1UL, + 0x2106826cUL, 0x44613ed4UL, 0xaace8bc6UL, 0xcfa9377eUL, 0x38417fd6UL, + 0x5d26c36eUL, 0xb389767cUL, 0xd6eecac4UL, 0x6fd61d59UL, 0x0ab1a1e1UL, + 0xe41e14f3UL, 0x8179a84bUL, 0xd769cb13UL, 0xb20e77abUL, 0x5ca1c2b9UL, + 0x39c67e01UL, 0x80fea99cUL, 0xe5991524UL, 0x0b36a036UL, 0x6e511c8eUL, + 0xa7166686UL, 0xc271da3eUL, 0x2cde6f2cUL, 0x49b9d394UL, 0xf0810409UL, + 0x95e6b8b1UL, 0x7b490da3UL, 0x1e2eb11bUL, 0x483ed243UL, 0x2d596efbUL, + 0xc3f6dbe9UL, 0xa6916751UL, 0x1fa9b0ccUL, 0x7ace0c74UL, 0x9461b966UL, + 0xf10605deUL +#endif + } +}; diff --git a/libkern/zlib/deflate.c b/libkern/zlib/deflate.c new file mode 100644 index 000000000..069331e7f --- /dev/null +++ b/libkern/zlib/deflate.c @@ -0,0 +1,1765 @@ +/* + * Copyright (c) 2008 Apple Inc. All rights reserved. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. The rights granted to you under the License + * may not be used to create, or enable the creation or redistribution of, + * unlawful or unlicensed copies of an Apple operating system, or to + * circumvent, violate, or enable the circumvention or violation of, any + * terms of an Apple operating system software license agreement. + * + * Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ + */ +/* deflate.c -- compress data using the deflation algorithm + * Copyright (C) 1995-2005 Jean-loup Gailly. + * For conditions of distribution and use, see copyright notice in zlib.h + */ + +/* + * ALGORITHM + * + * The "deflation" process depends on being able to identify portions + * of the input text which are identical to earlier input (within a + * sliding window trailing behind the input currently being processed). + * + * The most straightforward technique turns out to be the fastest for + * most input files: try all possible matches and select the longest. + * The key feature of this algorithm is that insertions into the string + * dictionary are very simple and thus fast, and deletions are avoided + * completely. Insertions are performed at each input character, whereas + * string matches are performed only when the previous match ends. So it + * is preferable to spend more time in matches to allow very fast string + * insertions and avoid deletions. The matching algorithm for small + * strings is inspired from that of Rabin & Karp. A brute force approach + * is used to find longer strings when a small match has been found. + * A similar algorithm is used in comic (by Jan-Mark Wams) and freeze + * (by Leonid Broukhis). + * A previous version of this file used a more sophisticated algorithm + * (by Fiala and Greene) which is guaranteed to run in linear amortized + * time, but has a larger average cost, uses more memory and is patented. + * However the F&G algorithm may be faster for some highly redundant + * files if the parameter max_chain_length (described below) is too large. + * + * ACKNOWLEDGEMENTS + * + * The idea of lazy evaluation of matches is due to Jan-Mark Wams, and + * I found it in 'freeze' written by Leonid Broukhis. + * Thanks to many people for bug reports and testing. + * + * REFERENCES + * + * Deutsch, L.P.,"DEFLATE Compressed Data Format Specification". + * Available in http://www.ietf.org/rfc/rfc1951.txt + * + * A description of the Rabin and Karp algorithm is given in the book + * "Algorithms" by R. Sedgewick, Addison-Wesley, p252. + * + * Fiala,E.R., and Greene,D.H. + * Data Compression with Finite Windows, Comm.ACM, 32,4 (1989) 490-595 + * + */ + +/* @(#) $Id$ */ + +#include "deflate.h" + +const char deflate_copyright[] = + " deflate 1.2.3 Copyright 1995-2005 Jean-loup Gailly "; +/* + If you use the zlib library in a product, an acknowledgment is welcome + in the documentation of your product. If for some reason you cannot + include such an acknowledgment, I would appreciate that you keep this + copyright string in the executable of your product. + */ + +/* =========================================================================== + * Function prototypes. + */ +typedef enum { + need_more, /* block not completed, need more input or more output */ + block_done, /* block flush performed */ + finish_started, /* finish started, need only more output at next deflate */ + finish_done /* finish done, accept no more input or output */ +} block_state; + +typedef block_state (*compress_func) OF((deflate_state *s, int flush)); +/* Compression function. Returns the block state after the call. */ + +local void fill_window OF((deflate_state *s)); +local block_state deflate_stored OF((deflate_state *s, int flush)); +local block_state deflate_fast OF((deflate_state *s, int flush)); +#ifndef FASTEST +local block_state deflate_slow OF((deflate_state *s, int flush)); +#endif +local void lm_init OF((deflate_state *s)); +local void putShortMSB OF((deflate_state *s, uInt b)); +local void flush_pending OF((z_streamp strm)); +local int read_buf OF((z_streamp strm, Bytef *buf, unsigned size)); +#ifndef FASTEST +#ifdef ASMV + void match_init OF((void)); /* asm code initialization */ + uInt longest_match OF((deflate_state *s, IPos cur_match)); +#else +local uInt longest_match OF((deflate_state *s, IPos cur_match)); +#endif +#endif +local uInt longest_match_fast OF((deflate_state *s, IPos cur_match)); + +#ifdef DEBUG +local void check_match OF((deflate_state *s, IPos start, IPos match, + int length)); +#endif + +/* =========================================================================== + * Local data + */ + +#define NIL 0 +/* Tail of hash chains */ + +#ifndef TOO_FAR +# define TOO_FAR 4096 +#endif +/* Matches of length 3 are discarded if their distance exceeds TOO_FAR */ + +#define MIN_LOOKAHEAD (MAX_MATCH+MIN_MATCH+1) +/* Minimum amount of lookahead, except at the end of the input file. + * See deflate.c for comments about the MIN_MATCH+1. + */ + +/* Values for max_lazy_match, good_match and max_chain_length, depending on + * the desired pack level (0..9). The values given below have been tuned to + * exclude worst case performance for pathological files. Better values may be + * found for specific files. + */ +typedef struct config_s { + ush good_length; /* reduce lazy search above this match length */ + ush max_lazy; /* do not perform lazy search above this match length */ + ush nice_length; /* quit search above this match length */ + ush max_chain; + compress_func func; +} config; + +#ifdef FASTEST +local const config configuration_table[2] = { +/* good lazy nice chain */ +/* 0 */ {0, 0, 0, 0, deflate_stored}, /* store only */ +/* 1 */ {4, 4, 8, 4, deflate_fast}}; /* max speed, no lazy matches */ +#else +local const config configuration_table[10] = { +/* good lazy nice chain */ +/* 0 */ {0, 0, 0, 0, deflate_stored}, /* store only */ +/* 1 */ {4, 4, 8, 4, deflate_fast}, /* max speed, no lazy matches */ +/* 2 */ {4, 5, 16, 8, deflate_fast}, +/* 3 */ {4, 6, 32, 32, deflate_fast}, + +/* 4 */ {4, 4, 16, 16, deflate_slow}, /* lazy matches */ +/* 5 */ {8, 16, 32, 32, deflate_slow}, +/* 6 */ {8, 16, 128, 128, deflate_slow}, +/* 7 */ {8, 32, 128, 256, deflate_slow}, +/* 8 */ {32, 128, 258, 1024, deflate_slow}, +/* 9 */ {32, 258, 258, 4096, deflate_slow}}; /* max compression */ +#endif + +/* Note: the deflate() code requires max_lazy >= MIN_MATCH and max_chain >= 4 + * For deflate_fast() (levels <= 3) good is ignored and lazy has a different + * meaning. + */ + +#define EQUAL 0 +/* result of memcmp for equal strings */ + +#ifndef NO_DUMMY_DECL +struct static_tree_desc_s {int dummy;}; /* for buggy compilers */ +#endif + +/* =========================================================================== + * Update a hash value with the given input byte + * IN assertion: all calls to to UPDATE_HASH are made with consecutive + * input characters, so that a running hash key can be computed from the + * previous key instead of complete recalculation each time. + */ +#define UPDATE_HASH(s,h,c) (h = (((h)<hash_shift) ^ (c)) & s->hash_mask) + + +/* =========================================================================== + * Insert string str in the dictionary and set match_head to the previous head + * of the hash chain (the most recent string with same hash key). Return + * the previous length of the hash chain. + * If this file is compiled with -DFASTEST, the compression level is forced + * to 1, and no hash chains are maintained. + * IN assertion: all calls to to INSERT_STRING are made with consecutive + * input characters and the first MIN_MATCH bytes of str are valid + * (except for the last MIN_MATCH-1 bytes of the input file). + */ +#ifdef FASTEST +#define INSERT_STRING(s, str, match_head) \ + (UPDATE_HASH(s, s->ins_h, s->window[(str) + (MIN_MATCH-1)]), \ + match_head = s->head[s->ins_h], \ + s->head[s->ins_h] = (Pos)(str)) +#else +#define INSERT_STRING(s, str, match_head) \ + (UPDATE_HASH(s, s->ins_h, s->window[(str) + (MIN_MATCH-1)]), \ + match_head = s->prev[(str) & s->w_mask] = s->head[s->ins_h], \ + s->head[s->ins_h] = (Pos)(str)) +#endif + +/* =========================================================================== + * Initialize the hash table (avoiding 64K overflow for 16 bit systems). + * prev[] will be initialized on the fly. + */ +#define CLEAR_HASH(s) \ + s->head[s->hash_size-1] = NIL; \ + zmemzero((Bytef *)s->head, (unsigned)(s->hash_size-1)*sizeof(*s->head)); + +/* ========================================================================= */ +int ZEXPORT deflateInit_(strm, level, version, stream_size) + z_streamp strm; + int level; + const char *version; + int stream_size; +{ + return deflateInit2_(strm, level, Z_DEFLATED, MAX_WBITS, DEF_MEM_LEVEL, + Z_DEFAULT_STRATEGY, version, stream_size); + /* To do: ignore strm->next_in if we use it as window */ +} + +/* ========================================================================= */ +int ZEXPORT deflateInit2_(strm, level, method, windowBits, memLevel, strategy, + version, stream_size) + z_streamp strm; + int level; + int method; + int windowBits; + int memLevel; + int strategy; + const char *version; + int stream_size; +{ + deflate_state *s; + int wrap = 1; + static const char my_version[] = ZLIB_VERSION; + + ushf *overlay; + /* We overlay pending_buf and d_buf+l_buf. This works since the average + * output size for (length,distance) codes is <= 24 bits. + */ + + if (version == Z_NULL || version[0] != my_version[0] || + stream_size != sizeof(z_stream)) { + return Z_VERSION_ERROR; + } + if (strm == Z_NULL) return Z_STREAM_ERROR; + + strm->msg = Z_NULL; +#ifndef NO_ZCFUNCS + if (strm->zalloc == (alloc_func)0) { + strm->zalloc = zcalloc; + strm->opaque = (voidpf)0; + } + if (strm->zfree == (free_func)0) strm->zfree = zcfree; +#endif /* NO_ZCFUNCS */ + +#ifdef FASTEST + if (level != 0) level = 1; +#else + if (level == Z_DEFAULT_COMPRESSION) level = 6; +#endif + + if (windowBits < 0) { /* suppress zlib wrapper */ + wrap = 0; + windowBits = -windowBits; + } +#ifdef GZIP + else if (windowBits > 15) { + wrap = 2; /* write gzip wrapper instead */ + windowBits -= 16; + } +#endif + if (memLevel < 1 || memLevel > MAX_MEM_LEVEL || method != Z_DEFLATED || + windowBits < 8 || windowBits > 15 || level < 0 || level > 9 || + strategy < 0 || strategy > Z_FIXED) { + return Z_STREAM_ERROR; + } + if (windowBits == 8) windowBits = 9; /* until 256-byte window bug fixed */ + s = (deflate_state *) ZALLOC(strm, 1, sizeof(deflate_state)); + if (s == Z_NULL) return Z_MEM_ERROR; + strm->state = (struct internal_state FAR *)s; + s->strm = strm; + + s->wrap = wrap; + s->gzhead = Z_NULL; + s->w_bits = windowBits; + s->w_size = 1 << s->w_bits; + s->w_mask = s->w_size - 1; + + s->hash_bits = memLevel + 7; + s->hash_size = 1 << s->hash_bits; + s->hash_mask = s->hash_size - 1; + s->hash_shift = ((s->hash_bits+MIN_MATCH-1)/MIN_MATCH); + + s->window = (Bytef *) ZALLOC(strm, s->w_size, 2*sizeof(Byte)); + s->prev = (Posf *) ZALLOC(strm, s->w_size, sizeof(Pos)); + s->head = (Posf *) ZALLOC(strm, s->hash_size, sizeof(Pos)); + + s->lit_bufsize = 1 << (memLevel + 6); /* 16K elements by default */ + + overlay = (ushf *) ZALLOC(strm, s->lit_bufsize, sizeof(ush)+2); + s->pending_buf = (uchf *) overlay; + s->pending_buf_size = (ulg)s->lit_bufsize * (sizeof(ush)+2L); + + if (s->window == Z_NULL || s->prev == Z_NULL || s->head == Z_NULL || + s->pending_buf == Z_NULL) { + s->status = FINISH_STATE; + strm->msg = (char*)ERR_MSG(Z_MEM_ERROR); + deflateEnd (strm); + return Z_MEM_ERROR; + } + s->d_buf = overlay + s->lit_bufsize/sizeof(ush); + s->l_buf = s->pending_buf + (1+sizeof(ush))*s->lit_bufsize; + + s->level = level; + s->strategy = strategy; + s->method = (Byte)method; + + return deflateReset(strm); +} + +/* ========================================================================= */ +int ZEXPORT deflateSetDictionary (strm, dictionary, dictLength) + z_streamp strm; + const Bytef *dictionary; + uInt dictLength; +{ + deflate_state *s; + uInt length = dictLength; + uInt n; + IPos hash_head = 0; + + if (strm == Z_NULL || strm->state == Z_NULL || dictionary == Z_NULL || + strm->state->wrap == 2 || + (strm->state->wrap == 1 && strm->state->status != INIT_STATE)) + return Z_STREAM_ERROR; + + s = strm->state; + if (s->wrap) + strm->adler = adler32(strm->adler, dictionary, dictLength); + + if (length < MIN_MATCH) return Z_OK; + if (length > MAX_DIST(s)) { + length = MAX_DIST(s); + dictionary += dictLength - length; /* use the tail of the dictionary */ + } + zmemcpy(s->window, dictionary, length); + s->strstart = length; + s->block_start = (long)length; + + /* Insert all strings in the hash table (except for the last two bytes). + * s->lookahead stays null, so s->ins_h will be recomputed at the next + * call of fill_window. + */ + s->ins_h = s->window[0]; + UPDATE_HASH(s, s->ins_h, s->window[1]); + for (n = 0; n <= length - MIN_MATCH; n++) { + INSERT_STRING(s, n, hash_head); + } + if (hash_head) hash_head = 0; /* to make compiler happy */ + return Z_OK; +} + +/* ========================================================================= */ +int ZEXPORT deflateReset (strm) + z_streamp strm; +{ + deflate_state *s; + + if (strm == Z_NULL || strm->state == Z_NULL || + strm->zalloc == (alloc_func)0 || strm->zfree == (free_func)0) { + return Z_STREAM_ERROR; + } + + strm->total_in = strm->total_out = 0; + strm->msg = Z_NULL; /* use zfree if we ever allocate msg dynamically */ + strm->data_type = Z_UNKNOWN; + + s = (deflate_state *)strm->state; + s->pending = 0; + s->pending_out = s->pending_buf; + + if (s->wrap < 0) { + s->wrap = -s->wrap; /* was made negative by deflate(..., Z_FINISH); */ + } + s->status = s->wrap ? INIT_STATE : BUSY_STATE; + strm->adler = +#ifdef GZIP + s->wrap == 2 ? z_crc32(0L, Z_NULL, 0) : +#endif + adler32(0L, Z_NULL, 0); + s->last_flush = Z_NO_FLUSH; + + _tr_init(s); + lm_init(s); + + return Z_OK; +} + +/* ========================================================================= */ +int ZEXPORT deflateSetHeader (strm, head) + z_streamp strm; + gz_headerp head; +{ + if (strm == Z_NULL || strm->state == Z_NULL) return Z_STREAM_ERROR; + if (strm->state->wrap != 2) return Z_STREAM_ERROR; + strm->state->gzhead = head; + return Z_OK; +} + +/* ========================================================================= */ +int ZEXPORT deflatePrime (strm, bits, value) + z_streamp strm; + int bits; + int value; +{ + if (strm == Z_NULL || strm->state == Z_NULL) return Z_STREAM_ERROR; + strm->state->bi_valid = bits; + strm->state->bi_buf = (ush)(value & ((1 << bits) - 1)); + return Z_OK; +} + +/* ========================================================================= */ +int ZEXPORT deflateParams(strm, level, strategy) + z_streamp strm; + int level; + int strategy; +{ + deflate_state *s; + compress_func func; + int err = Z_OK; + + if (strm == Z_NULL || strm->state == Z_NULL) return Z_STREAM_ERROR; + s = strm->state; + +#ifdef FASTEST + if (level != 0) level = 1; +#else + if (level == Z_DEFAULT_COMPRESSION) level = 6; +#endif + if (level < 0 || level > 9 || strategy < 0 || strategy > Z_FIXED) { + return Z_STREAM_ERROR; + } + func = configuration_table[s->level].func; + + if (func != configuration_table[level].func && strm->total_in != 0) { + /* Flush the last buffer: */ + err = deflate(strm, Z_PARTIAL_FLUSH); + } + if (s->level != level) { + s->level = level; + s->max_lazy_match = configuration_table[level].max_lazy; + s->good_match = configuration_table[level].good_length; + s->nice_match = configuration_table[level].nice_length; + s->max_chain_length = configuration_table[level].max_chain; + } + s->strategy = strategy; + return err; +} + +/* ========================================================================= */ +int ZEXPORT deflateTune(strm, good_length, max_lazy, nice_length, max_chain) + z_streamp strm; + int good_length; + int max_lazy; + int nice_length; + int max_chain; +{ + deflate_state *s; + + if (strm == Z_NULL || strm->state == Z_NULL) return Z_STREAM_ERROR; + s = strm->state; + s->good_match = good_length; + s->max_lazy_match = max_lazy; + s->nice_match = nice_length; + s->max_chain_length = max_chain; + return Z_OK; +} + +/* ========================================================================= + * For the default windowBits of 15 and memLevel of 8, this function returns + * a close to exact, as well as small, upper bound on the compressed size. + * They are coded as constants here for a reason--if the #define's are + * changed, then this function needs to be changed as well. The return + * value for 15 and 8 only works for those exact settings. + * + * For any setting other than those defaults for windowBits and memLevel, + * the value returned is a conservative worst case for the maximum expansion + * resulting from using fixed blocks instead of stored blocks, which deflate + * can emit on compressed data for some combinations of the parameters. + * + * This function could be more sophisticated to provide closer upper bounds + * for every combination of windowBits and memLevel, as well as wrap. + * But even the conservative upper bound of about 14% expansion does not + * seem onerous for output buffer allocation. + */ +uLong ZEXPORT deflateBound(strm, sourceLen) + z_streamp strm; + uLong sourceLen; +{ + deflate_state *s; + uLong destLen; + + /* conservative upper bound */ + destLen = sourceLen + + ((sourceLen + 7) >> 3) + ((sourceLen + 63) >> 6) + 11; + + /* if can't get parameters, return conservative bound */ + if (strm == Z_NULL || strm->state == Z_NULL) + return destLen; + + /* if not default parameters, return conservative bound */ + s = strm->state; + if (s->w_bits != 15 || s->hash_bits != 8 + 7) + return destLen; + + /* default settings: return tight bound for that case */ + return compressBound(sourceLen); +} + +/* ========================================================================= + * Put a short in the pending buffer. The 16-bit value is put in MSB order. + * IN assertion: the stream state is correct and there is enough room in + * pending_buf. + */ +local void putShortMSB (s, b) + deflate_state *s; + uInt b; +{ + put_byte(s, (Byte)(b >> 8)); + put_byte(s, (Byte)(b & 0xff)); +} + +/* ========================================================================= + * Flush as much pending output as possible. All deflate() output goes + * through this function so some applications may wish to modify it + * to avoid allocating a large strm->next_out buffer and copying into it. + * (See also read_buf()). + */ +local void flush_pending(strm) + z_streamp strm; +{ + unsigned len = strm->state->pending; + + if (len > strm->avail_out) len = strm->avail_out; + if (len == 0) return; + + zmemcpy(strm->next_out, strm->state->pending_out, len); + strm->next_out += len; + strm->state->pending_out += len; + strm->total_out += len; + strm->avail_out -= len; + strm->state->pending -= len; + if (strm->state->pending == 0) { + strm->state->pending_out = strm->state->pending_buf; + } +} + +/* ========================================================================= */ +int ZEXPORT deflate (strm, flush) + z_streamp strm; + int flush; +{ + int old_flush; /* value of flush param for previous deflate call */ + deflate_state *s; + + if (strm == Z_NULL || strm->state == Z_NULL || + flush > Z_FINISH || flush < 0) { + return Z_STREAM_ERROR; + } + s = strm->state; + + if (strm->next_out == Z_NULL || + (strm->next_in == Z_NULL && strm->avail_in != 0) || + (s->status == FINISH_STATE && flush != Z_FINISH)) { + ERR_RETURN(strm, Z_STREAM_ERROR); + } + if (strm->avail_out == 0) ERR_RETURN(strm, Z_BUF_ERROR); + + s->strm = strm; /* just in case */ + old_flush = s->last_flush; + s->last_flush = flush; + + /* Write the header */ + if (s->status == INIT_STATE) { +#ifdef GZIP + if (s->wrap == 2) { + strm->adler = z_crc32(0L, Z_NULL, 0); + put_byte(s, 31); + put_byte(s, 139); + put_byte(s, 8); + if (s->gzhead == NULL) { + put_byte(s, 0); + put_byte(s, 0); + put_byte(s, 0); + put_byte(s, 0); + put_byte(s, 0); + put_byte(s, s->level == 9 ? 2 : + (s->strategy >= Z_HUFFMAN_ONLY || s->level < 2 ? + 4 : 0)); + put_byte(s, OS_CODE); + s->status = BUSY_STATE; + } + else { + put_byte(s, (s->gzhead->text ? 1 : 0) + + (s->gzhead->hcrc ? 2 : 0) + + (s->gzhead->extra == Z_NULL ? 0 : 4) + + (s->gzhead->name == Z_NULL ? 0 : 8) + + (s->gzhead->comment == Z_NULL ? 0 : 16) + ); + put_byte(s, (Byte)(s->gzhead->time & 0xff)); + put_byte(s, (Byte)((s->gzhead->time >> 8) & 0xff)); + put_byte(s, (Byte)((s->gzhead->time >> 16) & 0xff)); + put_byte(s, (Byte)((s->gzhead->time >> 24) & 0xff)); + put_byte(s, s->level == 9 ? 2 : + (s->strategy >= Z_HUFFMAN_ONLY || s->level < 2 ? + 4 : 0)); + put_byte(s, s->gzhead->os & 0xff); + if (s->gzhead->extra != NULL) { + put_byte(s, s->gzhead->extra_len & 0xff); + put_byte(s, (s->gzhead->extra_len >> 8) & 0xff); + } + if (s->gzhead->hcrc) + strm->adler = z_crc32(strm->adler, s->pending_buf, + s->pending); + s->gzindex = 0; + s->status = EXTRA_STATE; + } + } + else +#endif + { + uInt header = (Z_DEFLATED + ((s->w_bits-8)<<4)) << 8; + uInt level_flags; + + if (s->strategy >= Z_HUFFMAN_ONLY || s->level < 2) + level_flags = 0; + else if (s->level < 6) + level_flags = 1; + else if (s->level == 6) + level_flags = 2; + else + level_flags = 3; + header |= (level_flags << 6); + if (s->strstart != 0) header |= PRESET_DICT; + header += 31 - (header % 31); + + s->status = BUSY_STATE; + putShortMSB(s, header); + + /* Save the adler32 of the preset dictionary: */ + if (s->strstart != 0) { + putShortMSB(s, (uInt)(strm->adler >> 16)); + putShortMSB(s, (uInt)(strm->adler & 0xffff)); + } + strm->adler = adler32(0L, Z_NULL, 0); + } + } +#ifdef GZIP + if (s->status == EXTRA_STATE) { + if (s->gzhead->extra != NULL) { + uInt beg = s->pending; /* start of bytes to update crc */ + + while (s->gzindex < (s->gzhead->extra_len & 0xffff)) { + if (s->pending == s->pending_buf_size) { + if (s->gzhead->hcrc && s->pending > beg) + strm->adler = z_crc32(strm->adler, s->pending_buf + beg, + s->pending - beg); + flush_pending(strm); + beg = s->pending; + if (s->pending == s->pending_buf_size) + break; + } + put_byte(s, s->gzhead->extra[s->gzindex]); + s->gzindex++; + } + if (s->gzhead->hcrc && s->pending > beg) + strm->adler = z_crc32(strm->adler, s->pending_buf + beg, + s->pending - beg); + if (s->gzindex == s->gzhead->extra_len) { + s->gzindex = 0; + s->status = NAME_STATE; + } + } + else + s->status = NAME_STATE; + } + if (s->status == NAME_STATE) { + if (s->gzhead->name != NULL) { + uInt beg = s->pending; /* start of bytes to update crc */ + int val; + + do { + if (s->pending == s->pending_buf_size) { + if (s->gzhead->hcrc && s->pending > beg) + strm->adler = z_crc32(strm->adler, s->pending_buf + beg, + s->pending - beg); + flush_pending(strm); + beg = s->pending; + if (s->pending == s->pending_buf_size) { + val = 1; + break; + } + } + val = s->gzhead->name[s->gzindex++]; + put_byte(s, val); + } while (val != 0); + if (s->gzhead->hcrc && s->pending > beg) + strm->adler = z_crc32(strm->adler, s->pending_buf + beg, + s->pending - beg); + if (val == 0) { + s->gzindex = 0; + s->status = COMMENT_STATE; + } + } + else + s->status = COMMENT_STATE; + } + if (s->status == COMMENT_STATE) { + if (s->gzhead->comment != NULL) { + uInt beg = s->pending; /* start of bytes to update crc */ + int val; + + do { + if (s->pending == s->pending_buf_size) { + if (s->gzhead->hcrc && s->pending > beg) + strm->adler = z_crc32(strm->adler, s->pending_buf + beg, + s->pending - beg); + flush_pending(strm); + beg = s->pending; + if (s->pending == s->pending_buf_size) { + val = 1; + break; + } + } + val = s->gzhead->comment[s->gzindex++]; + put_byte(s, val); + } while (val != 0); + if (s->gzhead->hcrc && s->pending > beg) + strm->adler = z_crc32(strm->adler, s->pending_buf + beg, + s->pending - beg); + if (val == 0) + s->status = HCRC_STATE; + } + else + s->status = HCRC_STATE; + } + if (s->status == HCRC_STATE) { + if (s->gzhead->hcrc) { + if (s->pending + 2 > s->pending_buf_size) + flush_pending(strm); + if (s->pending + 2 <= s->pending_buf_size) { + put_byte(s, (Byte)(strm->adler & 0xff)); + put_byte(s, (Byte)((strm->adler >> 8) & 0xff)); + strm->adler = z_crc32(0L, Z_NULL, 0); + s->status = BUSY_STATE; + } + } + else + s->status = BUSY_STATE; + } +#endif + + /* Flush as much pending output as possible */ + if (s->pending != 0) { + flush_pending(strm); + if (strm->avail_out == 0) { + /* Since avail_out is 0, deflate will be called again with + * more output space, but possibly with both pending and + * avail_in equal to zero. There won't be anything to do, + * but this is not an error situation so make sure we + * return OK instead of BUF_ERROR at next call of deflate: + */ + s->last_flush = -1; + return Z_OK; + } + + /* Make sure there is something to do and avoid duplicate consecutive + * flushes. For repeated and useless calls with Z_FINISH, we keep + * returning Z_STREAM_END instead of Z_BUF_ERROR. + */ + } else if (strm->avail_in == 0 && flush <= old_flush && + flush != Z_FINISH) { + ERR_RETURN(strm, Z_BUF_ERROR); + } + + /* User must not provide more input after the first FINISH: */ + if (s->status == FINISH_STATE && strm->avail_in != 0) { + ERR_RETURN(strm, Z_BUF_ERROR); + } + + /* Start a new block or continue the current one. + */ + if (strm->avail_in != 0 || s->lookahead != 0 || + (flush != Z_NO_FLUSH && s->status != FINISH_STATE)) { + block_state bstate; + + bstate = (*(configuration_table[s->level].func))(s, flush); + + if (bstate == finish_started || bstate == finish_done) { + s->status = FINISH_STATE; + } + if (bstate == need_more || bstate == finish_started) { + if (strm->avail_out == 0) { + s->last_flush = -1; /* avoid BUF_ERROR next call, see above */ + } + return Z_OK; + /* If flush != Z_NO_FLUSH && avail_out == 0, the next call + * of deflate should use the same flush parameter to make sure + * that the flush is complete. So we don't have to output an + * empty block here, this will be done at next call. This also + * ensures that for a very small output buffer, we emit at most + * one empty block. + */ + } + if (bstate == block_done) { + if (flush == Z_PARTIAL_FLUSH) { + _tr_align(s); + } else { /* FULL_FLUSH or SYNC_FLUSH */ + _tr_stored_block(s, (char*)0, 0L, 0); + /* For a full flush, this empty block will be recognized + * as a special marker by inflate_sync(). + */ + if (flush == Z_FULL_FLUSH) { + CLEAR_HASH(s); /* forget history */ + } + } + flush_pending(strm); + if (strm->avail_out == 0) { + s->last_flush = -1; /* avoid BUF_ERROR at next call, see above */ + return Z_OK; + } + } + } + Assert(strm->avail_out > 0, "bug2"); + + if (flush != Z_FINISH) return Z_OK; + if (s->wrap <= 0) return Z_STREAM_END; + + /* Write the trailer */ +#ifdef GZIP + if (s->wrap == 2) { + put_byte(s, (Byte)(strm->adler & 0xff)); + put_byte(s, (Byte)((strm->adler >> 8) & 0xff)); + put_byte(s, (Byte)((strm->adler >> 16) & 0xff)); + put_byte(s, (Byte)((strm->adler >> 24) & 0xff)); + put_byte(s, (Byte)(strm->total_in & 0xff)); + put_byte(s, (Byte)((strm->total_in >> 8) & 0xff)); + put_byte(s, (Byte)((strm->total_in >> 16) & 0xff)); + put_byte(s, (Byte)((strm->total_in >> 24) & 0xff)); + } + else +#endif + { + putShortMSB(s, (uInt)(strm->adler >> 16)); + putShortMSB(s, (uInt)(strm->adler & 0xffff)); + } + flush_pending(strm); + /* If avail_out is zero, the application will call deflate again + * to flush the rest. + */ + if (s->wrap > 0) s->wrap = -s->wrap; /* write the trailer only once! */ + return s->pending != 0 ? Z_OK : Z_STREAM_END; +} + +/* ========================================================================= */ +int ZEXPORT deflateEnd (strm) + z_streamp strm; +{ + int status; + + if (strm == Z_NULL || strm->state == Z_NULL) return Z_STREAM_ERROR; + + status = strm->state->status; + if (status != INIT_STATE && + status != EXTRA_STATE && + status != NAME_STATE && + status != COMMENT_STATE && + status != HCRC_STATE && + status != BUSY_STATE && + status != FINISH_STATE) { + return Z_STREAM_ERROR; + } + + /* Deallocate in reverse order of allocations: */ + TRY_FREE(strm, strm->state->pending_buf); + TRY_FREE(strm, strm->state->head); + TRY_FREE(strm, strm->state->prev); + TRY_FREE(strm, strm->state->window); + + ZFREE(strm, strm->state); + strm->state = Z_NULL; + + return status == BUSY_STATE ? Z_DATA_ERROR : Z_OK; +} + +/* ========================================================================= + * Copy the source state to the destination state. + * To simplify the source, this is not supported for 16-bit MSDOS (which + * doesn't have enough memory anyway to duplicate compression states). + */ +int ZEXPORT deflateCopy (dest, source) + z_streamp dest; + z_streamp source; +{ +#ifdef MAXSEG_64K + return Z_STREAM_ERROR; +#else + deflate_state *ds; + deflate_state *ss; + ushf *overlay; + + + if (source == Z_NULL || dest == Z_NULL || source->state == Z_NULL) { + return Z_STREAM_ERROR; + } + + ss = source->state; + + zmemcpy(dest, source, sizeof(z_stream)); + + ds = (deflate_state *) ZALLOC(dest, 1, sizeof(deflate_state)); + if (ds == Z_NULL) return Z_MEM_ERROR; + dest->state = (struct internal_state FAR *) ds; + zmemcpy(ds, ss, sizeof(deflate_state)); + ds->strm = dest; + + ds->window = (Bytef *) ZALLOC(dest, ds->w_size, 2*sizeof(Byte)); + ds->prev = (Posf *) ZALLOC(dest, ds->w_size, sizeof(Pos)); + ds->head = (Posf *) ZALLOC(dest, ds->hash_size, sizeof(Pos)); + overlay = (ushf *) ZALLOC(dest, ds->lit_bufsize, sizeof(ush)+2); + ds->pending_buf = (uchf *) overlay; + + if (ds->window == Z_NULL || ds->prev == Z_NULL || ds->head == Z_NULL || + ds->pending_buf == Z_NULL) { + deflateEnd (dest); + return Z_MEM_ERROR; + } + /* following zmemcpy do not work for 16-bit MSDOS */ + zmemcpy(ds->window, ss->window, ds->w_size * 2 * sizeof(Byte)); + zmemcpy(ds->prev, ss->prev, ds->w_size * sizeof(Pos)); + zmemcpy(ds->head, ss->head, ds->hash_size * sizeof(Pos)); + zmemcpy(ds->pending_buf, ss->pending_buf, (uInt)ds->pending_buf_size); + + ds->pending_out = ds->pending_buf + (ss->pending_out - ss->pending_buf); + ds->d_buf = overlay + ds->lit_bufsize/sizeof(ush); + ds->l_buf = ds->pending_buf + (1+sizeof(ush))*ds->lit_bufsize; + + ds->l_desc.dyn_tree = ds->dyn_ltree; + ds->d_desc.dyn_tree = ds->dyn_dtree; + ds->bl_desc.dyn_tree = ds->bl_tree; + + return Z_OK; +#endif /* MAXSEG_64K */ +} + +/* =========================================================================== + * Read a new buffer from the current input stream, update the adler32 + * and total number of bytes read. All deflate() input goes through + * this function so some applications may wish to modify it to avoid + * allocating a large strm->next_in buffer and copying from it. + * (See also flush_pending()). + */ +local int read_buf(strm, buf, size) + z_streamp strm; + Bytef *buf; + unsigned size; +{ + unsigned len = strm->avail_in; + + if (len > size) len = size; + if (len == 0) return 0; + + strm->avail_in -= len; + + if (strm->state->wrap == 1) { + strm->adler = adler32(strm->adler, strm->next_in, len); + } +#ifdef GZIP + else if (strm->state->wrap == 2) { + strm->adler = z_crc32(strm->adler, strm->next_in, len); + } +#endif + zmemcpy(buf, strm->next_in, len); + strm->next_in += len; + strm->total_in += len; + + return (int)len; +} + +/* =========================================================================== + * Initialize the "longest match" routines for a new zlib stream + */ +local void lm_init (s) + deflate_state *s; +{ + s->window_size = (ulg)2L*s->w_size; + + CLEAR_HASH(s); + + /* Set the default configuration parameters: + */ + s->max_lazy_match = configuration_table[s->level].max_lazy; + s->good_match = configuration_table[s->level].good_length; + s->nice_match = configuration_table[s->level].nice_length; + s->max_chain_length = configuration_table[s->level].max_chain; + + s->strstart = 0; + s->block_start = 0L; + s->lookahead = 0; + s->match_length = s->prev_length = MIN_MATCH-1; + s->match_available = 0; + s->ins_h = 0; +#ifndef FASTEST +#ifdef ASMV + match_init(); /* initialize the asm code */ +#endif +#endif +} + +#ifndef FASTEST +/* =========================================================================== + * Set match_start to the longest match starting at the given string and + * return its length. Matches shorter or equal to prev_length are discarded, + * in which case the result is equal to prev_length and match_start is + * garbage. + * IN assertions: cur_match is the head of the hash chain for the current + * string (strstart) and its distance is <= MAX_DIST, and prev_length >= 1 + * OUT assertion: the match length is not greater than s->lookahead. + */ +#ifndef ASMV +/* For 80x86 and 680x0, an optimized version will be provided in match.asm or + * match.S. The code will be functionally equivalent. + */ +local uInt longest_match(s, cur_match) + deflate_state *s; + IPos cur_match; /* current match */ +{ + unsigned chain_length = s->max_chain_length;/* max hash chain length */ + register Bytef *scan = s->window + s->strstart; /* current string */ + register Bytef *match; /* matched string */ + register int len; /* length of current match */ + int best_len = s->prev_length; /* best match length so far */ + int nice_match = s->nice_match; /* stop if match long enough */ + IPos limit = s->strstart > (IPos)MAX_DIST(s) ? + s->strstart - (IPos)MAX_DIST(s) : NIL; + /* Stop when cur_match becomes <= limit. To simplify the code, + * we prevent matches with the string of window index 0. + */ + Posf *prev = s->prev; + uInt wmask = s->w_mask; + +#ifdef UNALIGNED_OK + /* Compare two bytes at a time. Note: this is not always beneficial. + * Try with and without -DUNALIGNED_OK to check. + */ + register Bytef *strend = s->window + s->strstart + MAX_MATCH - 1; + register ush scan_start = *(ushf*)scan; + register ush scan_end = *(ushf*)(scan+best_len-1); +#else + register Bytef *strend = s->window + s->strstart + MAX_MATCH; + register Byte scan_end1 = scan[best_len-1]; + register Byte scan_end = scan[best_len]; +#endif + + /* The code is optimized for HASH_BITS >= 8 and MAX_MATCH-2 multiple of 16. + * It is easy to get rid of this optimization if necessary. + */ + Assert(s->hash_bits >= 8 && MAX_MATCH == 258, "Code too clever"); + + /* Do not waste too much time if we already have a good match: */ + if (s->prev_length >= s->good_match) { + chain_length >>= 2; + } + /* Do not look for matches beyond the end of the input. This is necessary + * to make deflate deterministic. + */ + if ((uInt)nice_match > s->lookahead) nice_match = s->lookahead; + + Assert((ulg)s->strstart <= s->window_size-MIN_LOOKAHEAD, "need lookahead"); + + do { + Assert(cur_match < s->strstart, "no future"); + match = s->window + cur_match; + + /* Skip to next match if the match length cannot increase + * or if the match length is less than 2. Note that the checks below + * for insufficient lookahead only occur occasionally for performance + * reasons. Therefore uninitialized memory will be accessed, and + * conditional jumps will be made that depend on those values. + * However the length of the match is limited to the lookahead, so + * the output of deflate is not affected by the uninitialized values. + */ +#if (defined(UNALIGNED_OK) && MAX_MATCH == 258) + /* This code assumes sizeof(unsigned short) == 2. Do not use + * UNALIGNED_OK if your compiler uses a different size. + */ + if (*(ushf*)(match+best_len-1) != scan_end || + *(ushf*)match != scan_start) continue; + + /* It is not necessary to compare scan[2] and match[2] since they are + * always equal when the other bytes match, given that the hash keys + * are equal and that HASH_BITS >= 8. Compare 2 bytes at a time at + * strstart+3, +5, ... up to strstart+257. We check for insufficient + * lookahead only every 4th comparison; the 128th check will be made + * at strstart+257. If MAX_MATCH-2 is not a multiple of 8, it is + * necessary to put more guard bytes at the end of the window, or + * to check more often for insufficient lookahead. + */ + Assert(scan[2] == match[2], "scan[2]?"); + scan++, match++; + do { + } while (*(ushf*)(scan+=2) == *(ushf*)(match+=2) && + *(ushf*)(scan+=2) == *(ushf*)(match+=2) && + *(ushf*)(scan+=2) == *(ushf*)(match+=2) && + *(ushf*)(scan+=2) == *(ushf*)(match+=2) && + scan < strend); + /* The funny "do {}" generates better code on most compilers */ + + /* Here, scan <= window+strstart+257 */ + Assert(scan <= s->window+(unsigned)(s->window_size-1), "wild scan"); + if (*scan == *match) scan++; + + len = (MAX_MATCH - 1) - (int)(strend-scan); + scan = strend - (MAX_MATCH-1); + +#else /* UNALIGNED_OK */ + + if (match[best_len] != scan_end || + match[best_len-1] != scan_end1 || + *match != *scan || + *++match != scan[1]) continue; + + /* The check at best_len-1 can be removed because it will be made + * again later. (This heuristic is not always a win.) + * It is not necessary to compare scan[2] and match[2] since they + * are always equal when the other bytes match, given that + * the hash keys are equal and that HASH_BITS >= 8. + */ + scan += 2, match++; + Assert(*scan == *match, "match[2]?"); + + /* We check for insufficient lookahead only every 8th comparison; + * the 256th check will be made at strstart+258. + */ + do { + } while (*++scan == *++match && *++scan == *++match && + *++scan == *++match && *++scan == *++match && + *++scan == *++match && *++scan == *++match && + *++scan == *++match && *++scan == *++match && + scan < strend); + + Assert(scan <= s->window+(unsigned)(s->window_size-1), "wild scan"); + + len = MAX_MATCH - (int)(strend - scan); + scan = strend - MAX_MATCH; + +#endif /* UNALIGNED_OK */ + + if (len > best_len) { + s->match_start = cur_match; + best_len = len; + if (len >= nice_match) break; +#ifdef UNALIGNED_OK + scan_end = *(ushf*)(scan+best_len-1); +#else + scan_end1 = scan[best_len-1]; + scan_end = scan[best_len]; +#endif + } + } while ((cur_match = prev[cur_match & wmask]) > limit + && --chain_length != 0); + + if ((uInt)best_len <= s->lookahead) return (uInt)best_len; + return s->lookahead; +} +#endif /* ASMV */ +#endif /* FASTEST */ + +/* --------------------------------------------------------------------------- + * Optimized version for level == 1 or strategy == Z_RLE only + */ +local uInt longest_match_fast(s, cur_match) + deflate_state *s; + IPos cur_match; /* current match */ +{ + register Bytef *scan = s->window + s->strstart; /* current string */ + register Bytef *match; /* matched string */ + register int len; /* length of current match */ + register Bytef *strend = s->window + s->strstart + MAX_MATCH; + + /* The code is optimized for HASH_BITS >= 8 and MAX_MATCH-2 multiple of 16. + * It is easy to get rid of this optimization if necessary. + */ + Assert(s->hash_bits >= 8 && MAX_MATCH == 258, "Code too clever"); + + Assert((ulg)s->strstart <= s->window_size-MIN_LOOKAHEAD, "need lookahead"); + + Assert(cur_match < s->strstart, "no future"); + + match = s->window + cur_match; + + /* Return failure if the match length is less than 2: + */ + if (match[0] != scan[0] || match[1] != scan[1]) return MIN_MATCH-1; + + /* The check at best_len-1 can be removed because it will be made + * again later. (This heuristic is not always a win.) + * It is not necessary to compare scan[2] and match[2] since they + * are always equal when the other bytes match, given that + * the hash keys are equal and that HASH_BITS >= 8. + */ + scan += 2, match += 2; + Assert(*scan == *match, "match[2]?"); + + /* We check for insufficient lookahead only every 8th comparison; + * the 256th check will be made at strstart+258. + */ + do { + } while (*++scan == *++match && *++scan == *++match && + *++scan == *++match && *++scan == *++match && + *++scan == *++match && *++scan == *++match && + *++scan == *++match && *++scan == *++match && + scan < strend); + + Assert(scan <= s->window+(unsigned)(s->window_size-1), "wild scan"); + + len = MAX_MATCH - (int)(strend - scan); + + if (len < MIN_MATCH) return MIN_MATCH - 1; + + s->match_start = cur_match; + return (uInt)len <= s->lookahead ? (uInt)len : s->lookahead; +} + +#ifdef DEBUG +/* =========================================================================== + * Check that the match at match_start is indeed a match. + */ +local void check_match(s, start, match, length) + deflate_state *s; + IPos start, match; + int length; +{ + /* check that the match is indeed a match */ + if (zmemcmp(s->window + match, + s->window + start, length) != EQUAL) { + fprintf(stderr, " start %u, match %u, length %d\n", + start, match, length); + do { + fprintf(stderr, "%c%c", s->window[match++], s->window[start++]); + } while (--length != 0); + z_error("invalid match"); + } + if (z_verbose > 1) { + fprintf(stderr,"\\[%d,%d]", start-match, length); + do { putc(s->window[start++], stderr); } while (--length != 0); + } +} +#else +# define check_match(s, start, match, length) +#endif /* DEBUG */ + +/* =========================================================================== + * Fill the window when the lookahead becomes insufficient. + * Updates strstart and lookahead. + * + * IN assertion: lookahead < MIN_LOOKAHEAD + * OUT assertions: strstart <= window_size-MIN_LOOKAHEAD + * At least one byte has been read, or avail_in == 0; reads are + * performed for at least two bytes (required for the zip translate_eol + * option -- not supported here). + */ +local void fill_window(s) + deflate_state *s; +{ + register unsigned n, m; + register Posf *p; + unsigned more; /* Amount of free space at the end of the window. */ + uInt wsize = s->w_size; + + do { + more = (unsigned)(s->window_size -(ulg)s->lookahead -(ulg)s->strstart); + + /* Deal with !@#$% 64K limit: */ + if (sizeof(int) <= 2) { + if (more == 0 && s->strstart == 0 && s->lookahead == 0) { + more = wsize; + + } else if (more == (unsigned)(-1)) { + /* Very unlikely, but possible on 16 bit machine if + * strstart == 0 && lookahead == 1 (input done a byte at time) + */ + more--; + } + } + + /* If the window is almost full and there is insufficient lookahead, + * move the upper half to the lower one to make room in the upper half. + */ + if (s->strstart >= wsize+MAX_DIST(s)) { + + zmemcpy(s->window, s->window+wsize, (unsigned)wsize); + s->match_start -= wsize; + s->strstart -= wsize; /* we now have strstart >= MAX_DIST */ + s->block_start -= (long) wsize; + + /* Slide the hash table (could be avoided with 32 bit values + at the expense of memory usage). We slide even when level == 0 + to keep the hash table consistent if we switch back to level > 0 + later. (Using level 0 permanently is not an optimal usage of + zlib, so we don't care about this pathological case.) + */ + /* %%% avoid this when Z_RLE */ + n = s->hash_size; + p = &s->head[n]; + do { + m = *--p; + *p = (Pos)(m >= wsize ? m-wsize : NIL); + } while (--n); + + n = wsize; +#ifndef FASTEST + p = &s->prev[n]; + do { + m = *--p; + *p = (Pos)(m >= wsize ? m-wsize : NIL); + /* If n is not on any hash chain, prev[n] is garbage but + * its value will never be used. + */ + } while (--n); +#endif + more += wsize; + } + if (s->strm->avail_in == 0) return; + + /* If there was no sliding: + * strstart <= WSIZE+MAX_DIST-1 && lookahead <= MIN_LOOKAHEAD - 1 && + * more == window_size - lookahead - strstart + * => more >= window_size - (MIN_LOOKAHEAD-1 + WSIZE + MAX_DIST-1) + * => more >= window_size - 2*WSIZE + 2 + * In the BIG_MEM or MMAP case (not yet supported), + * window_size == input_size + MIN_LOOKAHEAD && + * strstart + s->lookahead <= input_size => more >= MIN_LOOKAHEAD. + * Otherwise, window_size == 2*WSIZE so more >= 2. + * If there was sliding, more >= WSIZE. So in all cases, more >= 2. + */ + Assert(more >= 2, "more < 2"); + + n = read_buf(s->strm, s->window + s->strstart + s->lookahead, more); + s->lookahead += n; + + /* Initialize the hash value now that we have some input: */ + if (s->lookahead >= MIN_MATCH) { + s->ins_h = s->window[s->strstart]; + UPDATE_HASH(s, s->ins_h, s->window[s->strstart+1]); +#if MIN_MATCH != 3 + Call UPDATE_HASH() MIN_MATCH-3 more times +#endif + } + /* If the whole input has less than MIN_MATCH bytes, ins_h is garbage, + * but this is not important since only literal bytes will be emitted. + */ + + } while (s->lookahead < MIN_LOOKAHEAD && s->strm->avail_in != 0); +} + +/* =========================================================================== + * Flush the current block, with given end-of-file flag. + * IN assertion: strstart is set to the end of the current match. + */ +#define FLUSH_BLOCK_ONLY(s, eof) { \ + _tr_flush_block(s, (s->block_start >= 0L ? \ + (charf *)&s->window[(unsigned)s->block_start] : \ + (charf *)Z_NULL), \ + (ulg)((long)s->strstart - s->block_start), \ + (eof)); \ + s->block_start = s->strstart; \ + flush_pending(s->strm); \ + Tracev((stderr,"[FLUSH]")); \ +} + +/* Same but force premature exit if necessary. */ +#define FLUSH_BLOCK(s, eof) { \ + FLUSH_BLOCK_ONLY(s, eof); \ + if (s->strm->avail_out == 0) return (eof) ? finish_started : need_more; \ +} + +/* =========================================================================== + * Copy without compression as much as possible from the input stream, return + * the current block state. + * This function does not insert new strings in the dictionary since + * uncompressible data is probably not useful. This function is used + * only for the level=0 compression option. + * NOTE: this function should be optimized to avoid extra copying from + * window to pending_buf. + */ +local block_state deflate_stored(s, flush) + deflate_state *s; + int flush; +{ + /* Stored blocks are limited to 0xffff bytes, pending_buf is limited + * to pending_buf_size, and each stored block has a 5 byte header: + */ + ulg max_block_size = 0xffff; + ulg max_start; + + if (max_block_size > s->pending_buf_size - 5) { + max_block_size = s->pending_buf_size - 5; + } + + /* Copy as much as possible from input to output: */ + for (;;) { + /* Fill the window as much as possible: */ + if (s->lookahead <= 1) { + + Assert(s->strstart < s->w_size+MAX_DIST(s) || + s->block_start >= (long)s->w_size, "slide too late"); + + fill_window(s); + if (s->lookahead == 0 && flush == Z_NO_FLUSH) return need_more; + + if (s->lookahead == 0) break; /* flush the current block */ + } + Assert(s->block_start >= 0L, "block gone"); + + s->strstart += s->lookahead; + s->lookahead = 0; + + /* Emit a stored block if pending_buf will be full: */ + max_start = s->block_start + max_block_size; + if (s->strstart == 0 || (ulg)s->strstart >= max_start) { + /* strstart == 0 is possible when wraparound on 16-bit machine */ + s->lookahead = (uInt)(s->strstart - max_start); + s->strstart = (uInt)max_start; + FLUSH_BLOCK(s, 0); + } + /* Flush if we may have to slide, otherwise block_start may become + * negative and the data will be gone: + */ + if (s->strstart - (uInt)s->block_start >= MAX_DIST(s)) { + FLUSH_BLOCK(s, 0); + } + } + FLUSH_BLOCK(s, flush == Z_FINISH); + return flush == Z_FINISH ? finish_done : block_done; +} + +/* =========================================================================== + * Compress as much as possible from the input stream, return the current + * block state. + * This function does not perform lazy evaluation of matches and inserts + * new strings in the dictionary only for unmatched strings or for short + * matches. It is used only for the fast compression options. + */ +local block_state deflate_fast(s, flush) + deflate_state *s; + int flush; +{ + IPos hash_head = NIL; /* head of the hash chain */ + int bflush; /* set if current block must be flushed */ + + for (;;) { + /* Make sure that we always have enough lookahead, except + * at the end of the input file. We need MAX_MATCH bytes + * for the next match, plus MIN_MATCH bytes to insert the + * string following the next match. + */ + if (s->lookahead < MIN_LOOKAHEAD) { + fill_window(s); + if (s->lookahead < MIN_LOOKAHEAD && flush == Z_NO_FLUSH) { + return need_more; + } + if (s->lookahead == 0) break; /* flush the current block */ + } + + /* Insert the string window[strstart .. strstart+2] in the + * dictionary, and set hash_head to the head of the hash chain: + */ + if (s->lookahead >= MIN_MATCH) { + INSERT_STRING(s, s->strstart, hash_head); + } + + /* Find the longest match, discarding those <= prev_length. + * At this point we have always match_length < MIN_MATCH + */ + if (hash_head != NIL && s->strstart - hash_head <= MAX_DIST(s)) { + /* To simplify the code, we prevent matches with the string + * of window index 0 (in particular we have to avoid a match + * of the string with itself at the start of the input file). + */ +#ifdef FASTEST + if ((s->strategy != Z_HUFFMAN_ONLY && s->strategy != Z_RLE) || + (s->strategy == Z_RLE && s->strstart - hash_head == 1)) { + s->match_length = longest_match_fast (s, hash_head); + } +#else + if (s->strategy != Z_HUFFMAN_ONLY && s->strategy != Z_RLE) { + s->match_length = longest_match (s, hash_head); + } else if (s->strategy == Z_RLE && s->strstart - hash_head == 1) { + s->match_length = longest_match_fast (s, hash_head); + } +#endif + /* longest_match() or longest_match_fast() sets match_start */ + } + if (s->match_length >= MIN_MATCH) { + check_match(s, s->strstart, s->match_start, s->match_length); + + _tr_tally_dist(s, s->strstart - s->match_start, + s->match_length - MIN_MATCH, bflush); + + s->lookahead -= s->match_length; + + /* Insert new strings in the hash table only if the match length + * is not too large. This saves time but degrades compression. + */ +#ifndef FASTEST + if (s->match_length <= s->max_insert_length && + s->lookahead >= MIN_MATCH) { + s->match_length--; /* string at strstart already in table */ + do { + s->strstart++; + INSERT_STRING(s, s->strstart, hash_head); + /* strstart never exceeds WSIZE-MAX_MATCH, so there are + * always MIN_MATCH bytes ahead. + */ + } while (--s->match_length != 0); + s->strstart++; + } else +#endif + { + s->strstart += s->match_length; + s->match_length = 0; + s->ins_h = s->window[s->strstart]; + UPDATE_HASH(s, s->ins_h, s->window[s->strstart+1]); +#if MIN_MATCH != 3 + Call UPDATE_HASH() MIN_MATCH-3 more times +#endif + /* If lookahead < MIN_MATCH, ins_h is garbage, but it does not + * matter since it will be recomputed at next deflate call. + */ + } + } else { + /* No match, output a literal byte */ + Tracevv((stderr,"%c", s->window[s->strstart])); + _tr_tally_lit (s, s->window[s->strstart], bflush); + s->lookahead--; + s->strstart++; + } + if (bflush) FLUSH_BLOCK(s, 0); + } + FLUSH_BLOCK(s, flush == Z_FINISH); + return flush == Z_FINISH ? finish_done : block_done; +} + +#ifndef FASTEST +/* =========================================================================== + * Same as above, but achieves better compression. We use a lazy + * evaluation for matches: a match is finally adopted only if there is + * no better match at the next window position. + */ +local block_state deflate_slow(s, flush) + deflate_state *s; + int flush; +{ + IPos hash_head = NIL; /* head of hash chain */ + int bflush; /* set if current block must be flushed */ + + /* Process the input block. */ + for (;;) { + /* Make sure that we always have enough lookahead, except + * at the end of the input file. We need MAX_MATCH bytes + * for the next match, plus MIN_MATCH bytes to insert the + * string following the next match. + */ + if (s->lookahead < MIN_LOOKAHEAD) { + fill_window(s); + if (s->lookahead < MIN_LOOKAHEAD && flush == Z_NO_FLUSH) { + return need_more; + } + if (s->lookahead == 0) break; /* flush the current block */ + } + + /* Insert the string window[strstart .. strstart+2] in the + * dictionary, and set hash_head to the head of the hash chain: + */ + if (s->lookahead >= MIN_MATCH) { + INSERT_STRING(s, s->strstart, hash_head); + } + + /* Find the longest match, discarding those <= prev_length. + */ + s->prev_length = s->match_length, s->prev_match = s->match_start; + s->match_length = MIN_MATCH-1; + + if (hash_head != NIL && s->prev_length < s->max_lazy_match && + s->strstart - hash_head <= MAX_DIST(s)) { + /* To simplify the code, we prevent matches with the string + * of window index 0 (in particular we have to avoid a match + * of the string with itself at the start of the input file). + */ + if (s->strategy != Z_HUFFMAN_ONLY && s->strategy != Z_RLE) { + s->match_length = longest_match (s, hash_head); + } else if (s->strategy == Z_RLE && s->strstart - hash_head == 1) { + s->match_length = longest_match_fast (s, hash_head); + } + /* longest_match() or longest_match_fast() sets match_start */ + + if (s->match_length <= 5 && (s->strategy == Z_FILTERED +#if TOO_FAR <= 32767 + || (s->match_length == MIN_MATCH && + s->strstart - s->match_start > TOO_FAR) +#endif + )) { + + /* If prev_match is also MIN_MATCH, match_start is garbage + * but we will ignore the current match anyway. + */ + s->match_length = MIN_MATCH-1; + } + } + /* If there was a match at the previous step and the current + * match is not better, output the previous match: + */ + if (s->prev_length >= MIN_MATCH && s->match_length <= s->prev_length) { + uInt max_insert = s->strstart + s->lookahead - MIN_MATCH; + /* Do not insert strings in hash table beyond this. */ + + check_match(s, s->strstart-1, s->prev_match, s->prev_length); + + _tr_tally_dist(s, s->strstart -1 - s->prev_match, + s->prev_length - MIN_MATCH, bflush); + + /* Insert in hash table all strings up to the end of the match. + * strstart-1 and strstart are already inserted. If there is not + * enough lookahead, the last two strings are not inserted in + * the hash table. + */ + s->lookahead -= s->prev_length-1; + s->prev_length -= 2; + do { + if (++s->strstart <= max_insert) { + INSERT_STRING(s, s->strstart, hash_head); + } + } while (--s->prev_length != 0); + s->match_available = 0; + s->match_length = MIN_MATCH-1; + s->strstart++; + + if (bflush) FLUSH_BLOCK(s, 0); + + } else if (s->match_available) { + /* If there was no match at the previous position, output a + * single literal. If there was a match but the current match + * is longer, truncate the previous match to a single literal. + */ + Tracevv((stderr,"%c", s->window[s->strstart-1])); + _tr_tally_lit(s, s->window[s->strstart-1], bflush); + if (bflush) { + FLUSH_BLOCK_ONLY(s, 0); + } + s->strstart++; + s->lookahead--; + if (s->strm->avail_out == 0) return need_more; + } else { + /* There is no previous match to compare with, wait for + * the next step to decide. + */ + s->match_available = 1; + s->strstart++; + s->lookahead--; + } + } + Assert (flush != Z_NO_FLUSH, "no flush?"); + if (s->match_available) { + Tracevv((stderr,"%c", s->window[s->strstart-1])); + _tr_tally_lit(s, s->window[s->strstart-1], bflush); + s->match_available = 0; + } + FLUSH_BLOCK(s, flush == Z_FINISH); + return flush == Z_FINISH ? finish_done : block_done; +} +#endif /* FASTEST */ + +#if 0 +/* =========================================================================== + * For Z_RLE, simply look for runs of bytes, generate matches only of distance + * one. Do not maintain a hash table. (It will be regenerated if this run of + * deflate switches away from Z_RLE.) + */ +local block_state deflate_rle(s, flush) + deflate_state *s; + int flush; +{ + int bflush; /* set if current block must be flushed */ + uInt run; /* length of run */ + uInt max; /* maximum length of run */ + uInt prev; /* byte at distance one to match */ + Bytef *scan; /* scan for end of run */ + + for (;;) { + /* Make sure that we always have enough lookahead, except + * at the end of the input file. We need MAX_MATCH bytes + * for the longest encodable run. + */ + if (s->lookahead < MAX_MATCH) { + fill_window(s); + if (s->lookahead < MAX_MATCH && flush == Z_NO_FLUSH) { + return need_more; + } + if (s->lookahead == 0) break; /* flush the current block */ + } + + /* See how many times the previous byte repeats */ + run = 0; + if (s->strstart > 0) { /* if there is a previous byte, that is */ + max = s->lookahead < MAX_MATCH ? s->lookahead : MAX_MATCH; + scan = s->window + s->strstart - 1; + prev = *scan++; + do { + if (*scan++ != prev) + break; + } while (++run < max); + } + + /* Emit match if have run of MIN_MATCH or longer, else emit literal */ + if (run >= MIN_MATCH) { + check_match(s, s->strstart, s->strstart - 1, run); + _tr_tally_dist(s, 1, run - MIN_MATCH, bflush); + s->lookahead -= run; + s->strstart += run; + } else { + /* No match, output a literal byte */ + Tracevv((stderr,"%c", s->window[s->strstart])); + _tr_tally_lit (s, s->window[s->strstart], bflush); + s->lookahead--; + s->strstart++; + } + if (bflush) FLUSH_BLOCK(s, 0); + } + FLUSH_BLOCK(s, flush == Z_FINISH); + return flush == Z_FINISH ? finish_done : block_done; +} +#endif diff --git a/libkern/zlib/deflate.h b/libkern/zlib/deflate.h new file mode 100644 index 000000000..6378b20ab --- /dev/null +++ b/libkern/zlib/deflate.h @@ -0,0 +1,358 @@ +/* + * Copyright (c) 2008 Apple Inc. All rights reserved. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. The rights granted to you under the License + * may not be used to create, or enable the creation or redistribution of, + * unlawful or unlicensed copies of an Apple operating system, or to + * circumvent, violate, or enable the circumvention or violation of, any + * terms of an Apple operating system software license agreement. + * + * Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ + */ +/* deflate.h -- internal compression state + * Copyright (C) 1995-2004 Jean-loup Gailly + * For conditions of distribution and use, see copyright notice in zlib.h + */ + +/* WARNING: this file should *not* be used by applications. It is + part of the implementation of the compression library and is + subject to change. Applications should only use zlib.h. + */ + +/* @(#) $Id$ */ + +#ifndef DEFLATE_H +#define DEFLATE_H + +#include "zutil.h" + +/* define NO_GZIP when compiling if you want to disable gzip header and + trailer creation by deflate(). NO_GZIP would be used to avoid linking in + the crc code when it is not needed. For shared libraries, gzip encoding + should be left enabled. */ +#ifndef NO_GZIP +# define GZIP +#endif + +/* =========================================================================== + * Internal compression state. + */ + +#define LENGTH_CODES 29 +/* number of length codes, not counting the special END_BLOCK code */ + +#define LITERALS 256 +/* number of literal bytes 0..255 */ + +#define L_CODES (LITERALS+1+LENGTH_CODES) +/* number of Literal or Length codes, including the END_BLOCK code */ + +#define D_CODES 30 +/* number of distance codes */ + +#define BL_CODES 19 +/* number of codes used to transfer the bit lengths */ + +#define HEAP_SIZE (2*L_CODES+1) +/* maximum heap size */ + +#define MAX_BITS 15 +/* All codes must not exceed MAX_BITS bits */ + +#define INIT_STATE 42 +#define EXTRA_STATE 69 +#define NAME_STATE 73 +#define COMMENT_STATE 91 +#define HCRC_STATE 103 +#define BUSY_STATE 113 +#define FINISH_STATE 666 +/* Stream status */ + + +/* Data structure describing a single value and its code string. */ +typedef struct ct_data_s { + union { + ush freq; /* frequency count */ + ush code; /* bit string */ + } fc; + union { + ush dad; /* father node in Huffman tree */ + ush len; /* length of bit string */ + } dl; +} FAR ct_data; + +#define Freq fc.freq +#define Code fc.code +#define Dad dl.dad +#define Len dl.len + +typedef struct static_tree_desc_s static_tree_desc; + +typedef struct tree_desc_s { + ct_data *dyn_tree; /* the dynamic tree */ + int max_code; /* largest code with non zero frequency */ + static_tree_desc *stat_desc; /* the corresponding static tree */ +} FAR tree_desc; + +typedef ush Pos; +typedef Pos FAR Posf; +typedef unsigned IPos; + +/* A Pos is an index in the character window. We use short instead of int to + * save space in the various tables. IPos is used only for parameter passing. + */ + +typedef struct internal_state { + z_streamp strm; /* pointer back to this zlib stream */ + int status; /* as the name implies */ + Bytef *pending_buf; /* output still pending */ + ulg pending_buf_size; /* size of pending_buf */ + Bytef *pending_out; /* next pending byte to output to the stream */ + uInt pending; /* nb of bytes in the pending buffer */ + int wrap; /* bit 0 true for zlib, bit 1 true for gzip */ + gz_headerp gzhead; /* gzip header information to write */ + uInt gzindex; /* where in extra, name, or comment */ + Byte method; /* STORED (for zip only) or DEFLATED */ + int last_flush; /* value of flush param for previous deflate call */ + + /* used by deflate.c: */ + + uInt w_size; /* LZ77 window size (32K by default) */ + uInt w_bits; /* log2(w_size) (8..16) */ + uInt w_mask; /* w_size - 1 */ + + Bytef *window; + /* Sliding window. Input bytes are read into the second half of the window, + * and move to the first half later to keep a dictionary of at least wSize + * bytes. With this organization, matches are limited to a distance of + * wSize-MAX_MATCH bytes, but this ensures that IO is always + * performed with a length multiple of the block size. Also, it limits + * the window size to 64K, which is quite useful on MSDOS. + * To do: use the user input buffer as sliding window. + */ + + ulg window_size; + /* Actual size of window: 2*wSize, except when the user input buffer + * is directly used as sliding window. + */ + + Posf *prev; + /* Link to older string with same hash index. To limit the size of this + * array to 64K, this link is maintained only for the last 32K strings. + * An index in this array is thus a window index modulo 32K. + */ + + Posf *head; /* Heads of the hash chains or NIL. */ + + uInt ins_h; /* hash index of string to be inserted */ + uInt hash_size; /* number of elements in hash table */ + uInt hash_bits; /* log2(hash_size) */ + uInt hash_mask; /* hash_size-1 */ + + uInt hash_shift; + /* Number of bits by which ins_h must be shifted at each input + * step. It must be such that after MIN_MATCH steps, the oldest + * byte no longer takes part in the hash key, that is: + * hash_shift * MIN_MATCH >= hash_bits + */ + + long block_start; + /* Window position at the beginning of the current output block. Gets + * negative when the window is moved backwards. + */ + + uInt match_length; /* length of best match */ + IPos prev_match; /* previous match */ + int match_available; /* set if previous match exists */ + uInt strstart; /* start of string to insert */ + uInt match_start; /* start of matching string */ + uInt lookahead; /* number of valid bytes ahead in window */ + + uInt prev_length; + /* Length of the best match at previous step. Matches not greater than this + * are discarded. This is used in the lazy match evaluation. + */ + + uInt max_chain_length; + /* To speed up deflation, hash chains are never searched beyond this + * length. A higher limit improves compression ratio but degrades the + * speed. + */ + + uInt max_lazy_match; + /* Attempt to find a better match only when the current match is strictly + * smaller than this value. This mechanism is used only for compression + * levels >= 4. + */ +# define max_insert_length max_lazy_match + /* Insert new strings in the hash table only if the match length is not + * greater than this length. This saves time but degrades compression. + * max_insert_length is used only for compression levels <= 3. + */ + + int level; /* compression level (1..9) */ + int strategy; /* favor or force Huffman coding*/ + + uInt good_match; + /* Use a faster search when the previous match is longer than this */ + + int nice_match; /* Stop searching when current match exceeds this */ + + /* used by trees.c: */ + /* Didn't use ct_data typedef below to supress compiler warning */ + struct ct_data_s dyn_ltree[HEAP_SIZE]; /* literal and length tree */ + struct ct_data_s dyn_dtree[2*D_CODES+1]; /* distance tree */ + struct ct_data_s bl_tree[2*BL_CODES+1]; /* Huffman tree for bit lengths */ + + struct tree_desc_s l_desc; /* desc. for literal tree */ + struct tree_desc_s d_desc; /* desc. for distance tree */ + struct tree_desc_s bl_desc; /* desc. for bit length tree */ + + ush bl_count[MAX_BITS+1]; + /* number of codes at each bit length for an optimal tree */ + + int heap[2*L_CODES+1]; /* heap used to build the Huffman trees */ + int heap_len; /* number of elements in the heap */ + int heap_max; /* element of largest frequency */ + /* The sons of heap[n] are heap[2*n] and heap[2*n+1]. heap[0] is not used. + * The same heap array is used to build all trees. + */ + + uch depth[2*L_CODES+1]; + /* Depth of each subtree used as tie breaker for trees of equal frequency + */ + + uchf *l_buf; /* buffer for literals or lengths */ + + uInt lit_bufsize; + /* Size of match buffer for literals/lengths. There are 4 reasons for + * limiting lit_bufsize to 64K: + * - frequencies can be kept in 16 bit counters + * - if compression is not successful for the first block, all input + * data is still in the window so we can still emit a stored block even + * when input comes from standard input. (This can also be done for + * all blocks if lit_bufsize is not greater than 32K.) + * - if compression is not successful for a file smaller than 64K, we can + * even emit a stored file instead of a stored block (saving 5 bytes). + * This is applicable only for zip (not gzip or zlib). + * - creating new Huffman trees less frequently may not provide fast + * adaptation to changes in the input data statistics. (Take for + * example a binary file with poorly compressible code followed by + * a highly compressible string table.) Smaller buffer sizes give + * fast adaptation but have of course the overhead of transmitting + * trees more frequently. + * - I can't count above 4 + */ + + uInt last_lit; /* running index in l_buf */ + + ushf *d_buf; + /* Buffer for distances. To simplify the code, d_buf and l_buf have + * the same number of elements. To use different lengths, an extra flag + * array would be necessary. + */ + + ulg opt_len; /* bit length of current block with optimal trees */ + ulg static_len; /* bit length of current block with static trees */ + uInt matches; /* number of string matches in current block */ + int last_eob_len; /* bit length of EOB code for last block */ + +#ifdef DEBUG + ulg compressed_len; /* total bit length of compressed file mod 2^32 */ + ulg bits_sent; /* bit length of compressed data sent mod 2^32 */ +#endif + + ush bi_buf; + /* Output buffer. bits are inserted starting at the bottom (least + * significant bits). + */ + int bi_valid; + /* Number of valid bits in bi_buf. All bits above the last valid bit + * are always zero. + */ + +} FAR deflate_state; + +/* Output a byte on the stream. + * IN assertion: there is enough room in pending_buf. + */ +#define put_byte(s, c) {s->pending_buf[s->pending++] = (c);} + + +#define MIN_LOOKAHEAD (MAX_MATCH+MIN_MATCH+1) +/* Minimum amount of lookahead, except at the end of the input file. + * See deflate.c for comments about the MIN_MATCH+1. + */ + +#define MAX_DIST(s) ((s)->w_size-MIN_LOOKAHEAD) +/* In order to simplify the code, particularly on 16 bit machines, match + * distances are limited to MAX_DIST instead of WSIZE. + */ + + /* in trees.c */ +void _tr_init OF((deflate_state *s)); +int _tr_tally OF((deflate_state *s, unsigned dist, unsigned lc)); +void _tr_flush_block OF((deflate_state *s, charf *buf, ulg stored_len, + int eof)); +void _tr_align OF((deflate_state *s)); +void _tr_stored_block OF((deflate_state *s, charf *buf, ulg stored_len, + int eof)); + +#define d_code(dist) \ + ((dist) < 256 ? _dist_code[dist] : _dist_code[256+((dist)>>7)]) +/* Mapping from a distance to a distance code. dist is the distance - 1 and + * must not have side effects. _dist_code[256] and _dist_code[257] are never + * used. + */ + +#ifndef DEBUG +/* Inline versions of _tr_tally for speed: */ + +#if defined(GEN_TREES_H) || !defined(STDC) + extern uch _length_code[]; + extern uch _dist_code[]; +#else + extern const uch _length_code[]; + extern const uch _dist_code[]; +#endif + +# define _tr_tally_lit(s, c, flush) \ + { uch cc = (c); \ + s->d_buf[s->last_lit] = 0; \ + s->l_buf[s->last_lit++] = cc; \ + s->dyn_ltree[cc].Freq++; \ + flush = (s->last_lit == s->lit_bufsize-1); \ + } +# define _tr_tally_dist(s, distance, length, flush) \ + { uch len = (length); \ + ush dist = (distance); \ + s->d_buf[s->last_lit] = dist; \ + s->l_buf[s->last_lit++] = len; \ + dist--; \ + s->dyn_ltree[_length_code[len]+LITERALS+1].Freq++; \ + s->dyn_dtree[d_code(dist)].Freq++; \ + flush = (s->last_lit == s->lit_bufsize-1); \ + } +#else +# define _tr_tally_lit(s, c, flush) flush = _tr_tally(s, 0, c) +# define _tr_tally_dist(s, distance, length, flush) \ + flush = _tr_tally(s, distance, length) +#endif + +#endif /* DEFLATE_H */ diff --git a/libkern/zlib/gzio.c b/libkern/zlib/gzio.c new file mode 100644 index 000000000..c6ca199c3 --- /dev/null +++ b/libkern/zlib/gzio.c @@ -0,0 +1,1053 @@ +/* + * Copyright (c) 2008 Apple Inc. All rights reserved. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. The rights granted to you under the License + * may not be used to create, or enable the creation or redistribution of, + * unlawful or unlicensed copies of an Apple operating system, or to + * circumvent, violate, or enable the circumvention or violation of, any + * terms of an Apple operating system software license agreement. + * + * Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ + */ +/* gzio.c -- IO on .gz files + * Copyright (C) 1995-2005 Jean-loup Gailly. + * For conditions of distribution and use, see copyright notice in zlib.h + * + * Compile this file with -DNO_GZCOMPRESS to avoid the compression code. + */ + +/* @(#) $Id$ */ + +#include + +#include "zutil.h" + +#ifdef NO_DEFLATE /* for compatibility with old definition */ +# define NO_GZCOMPRESS +#endif + +#ifndef NO_DUMMY_DECL +struct internal_state {int dummy;}; /* for buggy compilers */ +#endif + +#ifndef Z_BUFSIZE +# ifdef MAXSEG_64K +# define Z_BUFSIZE 4096 /* minimize memory usage for 16-bit DOS */ +# else +# define Z_BUFSIZE 16384 +# endif +#endif +#ifndef Z_PRINTF_BUFSIZE +# define Z_PRINTF_BUFSIZE 4096 +#endif + +#ifdef __MVS__ +# pragma map (fdopen , "\174\174FDOPEN") + FILE *fdopen(int, const char *); +#endif + +#ifndef STDC +extern voidp malloc OF((uInt size)); +extern void free OF((voidpf ptr)); +#endif + +#define ALLOC(size) malloc(size) +#define TRYFREE(p) {if (p) free(p);} + +static int const gz_magic[2] = {0x1f, 0x8b}; /* gzip magic header */ + +/* gzip flag byte */ +#define ASCII_FLAG 0x01 /* bit 0 set: file probably ascii text */ +#define HEAD_CRC 0x02 /* bit 1 set: header CRC present */ +#define EXTRA_FIELD 0x04 /* bit 2 set: extra field present */ +#define ORIG_NAME 0x08 /* bit 3 set: original file name present */ +#define COMMENT 0x10 /* bit 4 set: file comment present */ +#define RESERVED 0xE0 /* bits 5..7: reserved */ + +typedef struct gz_stream { + z_stream stream; + int z_err; /* error code for last stream operation */ + int z_eof; /* set if end of input file */ + FILE *file; /* .gz file */ + Byte *inbuf; /* input buffer */ + Byte *outbuf; /* output buffer */ + uLong crc; /* crc32 of uncompressed data */ + char *msg; /* error message */ + char *path; /* path name for debugging only */ + int transparent; /* 1 if input file is not a .gz file */ + char mode; /* 'w' or 'r' */ + z_off_t start; /* start of compressed data in file (header skipped) */ + z_off_t in; /* bytes into deflate or inflate */ + z_off_t out; /* bytes out of deflate or inflate */ + int back; /* one character push-back */ + int last; /* true if push-back is last character */ +} gz_stream; + + +local gzFile gz_open OF((const char *path, const char *mode, int fd)); +local int do_flush OF((gzFile file, int flush)); +local int get_byte OF((gz_stream *s)); +local void check_header OF((gz_stream *s)); +local int destroy OF((gz_stream *s)); +local void putLong OF((FILE *file, uLong x)); +local uLong getLong OF((gz_stream *s)); + +/* =========================================================================== + Opens a gzip (.gz) file for reading or writing. The mode parameter + is as in fopen ("rb" or "wb"). The file is given either by file descriptor + or path name (if fd == -1). + gz_open returns NULL if the file could not be opened or if there was + insufficient memory to allocate the (de)compression state; errno + can be checked to distinguish the two cases (if errno is zero, the + zlib error is Z_MEM_ERROR). +*/ +local gzFile gz_open (path, mode, fd) + const char *path; + const char *mode; + int fd; +{ + int err; + int level = Z_DEFAULT_COMPRESSION; /* compression level */ + int strategy = Z_DEFAULT_STRATEGY; /* compression strategy */ + char *p = (char*)mode; + gz_stream *s; + char fmode[80]; /* copy of mode, without the compression level */ + char *m = fmode; + + if (!path || !mode) return Z_NULL; + + s = (gz_stream *)ALLOC(sizeof(gz_stream)); + if (!s) return Z_NULL; + + s->stream.zalloc = (alloc_func)0; + s->stream.zfree = (free_func)0; + s->stream.opaque = (voidpf)0; + s->stream.next_in = s->inbuf = Z_NULL; + s->stream.next_out = s->outbuf = Z_NULL; + s->stream.avail_in = s->stream.avail_out = 0; + s->file = NULL; + s->z_err = Z_OK; + s->z_eof = 0; + s->in = 0; + s->out = 0; + s->back = EOF; + s->crc = z_crc32(0L, Z_NULL, 0); + s->msg = NULL; + s->transparent = 0; + + s->path = (char*)ALLOC(strlen(path)+1); + if (s->path == NULL) { + return destroy(s), (gzFile)Z_NULL; + } + strcpy(s->path, path); /* do this early for debugging */ + + s->mode = '\0'; + do { + if (*p == 'r') s->mode = 'r'; + if (*p == 'w' || *p == 'a') s->mode = 'w'; + if (*p >= '0' && *p <= '9') { + level = *p - '0'; + } else if (*p == 'f') { + strategy = Z_FILTERED; + } else if (*p == 'h') { + strategy = Z_HUFFMAN_ONLY; + } else if (*p == 'R') { + strategy = Z_RLE; + } else { + *m++ = *p; /* copy the mode */ + } + } while (*p++ && m != fmode + sizeof(fmode)); + if (s->mode == '\0') return destroy(s), (gzFile)Z_NULL; + + if (s->mode == 'w') { +#ifdef NO_GZCOMPRESS + err = Z_STREAM_ERROR; +#else + err = deflateInit2(&(s->stream), level, + Z_DEFLATED, -MAX_WBITS, DEF_MEM_LEVEL, strategy); + /* windowBits is passed < 0 to suppress zlib header */ + + s->stream.next_out = s->outbuf = (Byte*)ALLOC(Z_BUFSIZE); +#endif + if (err != Z_OK || s->outbuf == Z_NULL) { + return destroy(s), (gzFile)Z_NULL; + } + } else { + s->stream.next_in = s->inbuf = (Byte*)ALLOC(Z_BUFSIZE); + + err = inflateInit2(&(s->stream), -MAX_WBITS); + /* windowBits is passed < 0 to tell that there is no zlib header. + * Note that in this case inflate *requires* an extra "dummy" byte + * after the compressed stream in order to complete decompression and + * return Z_STREAM_END. Here the gzip CRC32 ensures that 4 bytes are + * present after the compressed stream. + */ + if (err != Z_OK || s->inbuf == Z_NULL) { + return destroy(s), (gzFile)Z_NULL; + } + } + s->stream.avail_out = Z_BUFSIZE; + + errno = 0; + s->file = fd < 0 ? F_OPEN(path, fmode) : (FILE*)fdopen(fd, fmode); + + if (s->file == NULL) { + return destroy(s), (gzFile)Z_NULL; + } + if (s->mode == 'w') { + /* Write a very simple .gz header: + */ + fprintf(s->file, "%c%c%c%c%c%c%c%c%c%c", gz_magic[0], gz_magic[1], + Z_DEFLATED, 0 /*flags*/, 0,0,0,0 /*time*/, 0 /*xflags*/, OS_CODE); + s->start = 10L; + /* We use 10L instead of ftell(s->file) to because ftell causes an + * fflush on some systems. This version of the library doesn't use + * start anyway in write mode, so this initialization is not + * necessary. + */ + } else { + check_header(s); /* skip the .gz header */ + s->start = ftell(s->file) - s->stream.avail_in; + } + + return (gzFile)s; +} + +/* =========================================================================== + Opens a gzip (.gz) file for reading or writing. +*/ +gzFile ZEXPORT gzopen (path, mode) + const char *path; + const char *mode; +{ + return gz_open (path, mode, -1); +} + +/* =========================================================================== + Associate a gzFile with the file descriptor fd. fd is not dup'ed here + to mimic the behavio(u)r of fdopen. +*/ +gzFile ZEXPORT gzdopen (fd, mode) + int fd; + const char *mode; +{ + char name[46]; /* allow for up to 128-bit integers */ + + if (fd < 0) return (gzFile)Z_NULL; + sprintf(name, "", fd); /* for debugging */ + + return gz_open (name, mode, fd); +} + +/* =========================================================================== + * Update the compression level and strategy + */ +int ZEXPORT gzsetparams (file, level, strategy) + gzFile file; + int level; + int strategy; +{ + gz_stream *s = (gz_stream*)file; + + if (s == NULL || s->mode != 'w') return Z_STREAM_ERROR; + + /* Make room to allow flushing */ + if (s->stream.avail_out == 0) { + + s->stream.next_out = s->outbuf; + if (fwrite(s->outbuf, 1, Z_BUFSIZE, s->file) != Z_BUFSIZE) { + s->z_err = Z_ERRNO; + } + s->stream.avail_out = Z_BUFSIZE; + } + + return deflateParams (&(s->stream), level, strategy); +} + +/* =========================================================================== + Read a byte from a gz_stream; update next_in and avail_in. Return EOF + for end of file. + IN assertion: the stream s has been sucessfully opened for reading. +*/ +local int get_byte(s) + gz_stream *s; +{ + if (s->z_eof) return EOF; + if (s->stream.avail_in == 0) { + errno = 0; + s->stream.avail_in = (uInt)fread(s->inbuf, 1, Z_BUFSIZE, s->file); + if (s->stream.avail_in == 0) { + s->z_eof = 1; + if (ferror(s->file)) s->z_err = Z_ERRNO; + return EOF; + } + s->stream.next_in = s->inbuf; + } + s->stream.avail_in--; + return *(s->stream.next_in)++; +} + +/* =========================================================================== + Check the gzip header of a gz_stream opened for reading. Set the stream + mode to transparent if the gzip magic header is not present; set s->err + to Z_DATA_ERROR if the magic header is present but the rest of the header + is incorrect. + IN assertion: the stream s has already been created sucessfully; + s->stream.avail_in is zero for the first time, but may be non-zero + for concatenated .gz files. +*/ +local void check_header(s) + gz_stream *s; +{ + int method; /* method byte */ + int flags; /* flags byte */ + uInt len; + int c; + + /* Assure two bytes in the buffer so we can peek ahead -- handle case + where first byte of header is at the end of the buffer after the last + gzip segment */ + len = s->stream.avail_in; + if (len < 2) { + if (len) s->inbuf[0] = s->stream.next_in[0]; + errno = 0; + len = (uInt)fread(s->inbuf + len, 1, Z_BUFSIZE >> len, s->file); + if (len == 0 && ferror(s->file)) s->z_err = Z_ERRNO; + s->stream.avail_in += len; + s->stream.next_in = s->inbuf; + if (s->stream.avail_in < 2) { + s->transparent = s->stream.avail_in; + return; + } + } + + /* Peek ahead to check the gzip magic header */ + if (s->stream.next_in[0] != gz_magic[0] || + s->stream.next_in[1] != gz_magic[1]) { + s->transparent = 1; + return; + } + s->stream.avail_in -= 2; + s->stream.next_in += 2; + + /* Check the rest of the gzip header */ + method = get_byte(s); + flags = get_byte(s); + if (method != Z_DEFLATED || (flags & RESERVED) != 0) { + s->z_err = Z_DATA_ERROR; + return; + } + + /* Discard time, xflags and OS code: */ + for (len = 0; len < 6; len++) (void)get_byte(s); + + if ((flags & EXTRA_FIELD) != 0) { /* skip the extra field */ + len = (uInt)get_byte(s); + len += ((uInt)get_byte(s))<<8; + /* len is garbage if EOF but the loop below will quit anyway */ + while (len-- != 0 && get_byte(s) != EOF) ; + } + if ((flags & ORIG_NAME) != 0) { /* skip the original file name */ + while ((c = get_byte(s)) != 0 && c != EOF) ; + } + if ((flags & COMMENT) != 0) { /* skip the .gz file comment */ + while ((c = get_byte(s)) != 0 && c != EOF) ; + } + if ((flags & HEAD_CRC) != 0) { /* skip the header crc */ + for (len = 0; len < 2; len++) (void)get_byte(s); + } + s->z_err = s->z_eof ? Z_DATA_ERROR : Z_OK; +} + + /* =========================================================================== + * Cleanup then free the given gz_stream. Return a zlib error code. + Try freeing in the reverse order of allocations. + */ +local int destroy (s) + gz_stream *s; +{ + int err = Z_OK; + + if (!s) return Z_STREAM_ERROR; + + TRYFREE(s->msg); + + if (s->stream.state != NULL) { + if (s->mode == 'w') { +#ifdef NO_GZCOMPRESS + err = Z_STREAM_ERROR; +#else + err = deflateEnd(&(s->stream)); +#endif + } else if (s->mode == 'r') { + err = inflateEnd(&(s->stream)); + } + } + if (s->file != NULL && fclose(s->file)) { +#ifdef ESPIPE + if (errno != ESPIPE) /* fclose is broken for pipes in HP/UX */ +#endif + err = Z_ERRNO; + } + if (s->z_err < 0) err = s->z_err; + + TRYFREE(s->inbuf); + TRYFREE(s->outbuf); + TRYFREE(s->path); + TRYFREE(s); + return err; +} + +/* =========================================================================== + Reads the given number of uncompressed bytes from the compressed file. + gzread returns the number of bytes actually read (0 for end of file). +*/ +int ZEXPORT gzread (file, buf, len) + gzFile file; + voidp buf; + unsigned len; +{ + gz_stream *s = (gz_stream*)file; + Bytef *start = (Bytef*)buf; /* starting point for crc computation */ + Byte *next_out; /* == stream.next_out but not forced far (for MSDOS) */ + + if (s == NULL || s->mode != 'r') return Z_STREAM_ERROR; + + if (s->z_err == Z_DATA_ERROR || s->z_err == Z_ERRNO) return -1; + if (s->z_err == Z_STREAM_END) return 0; /* EOF */ + + next_out = (Byte*)buf; + s->stream.next_out = (Bytef*)buf; + s->stream.avail_out = len; + + if (s->stream.avail_out && s->back != EOF) { + *next_out++ = s->back; + s->stream.next_out++; + s->stream.avail_out--; + s->back = EOF; + s->out++; + start++; + if (s->last) { + s->z_err = Z_STREAM_END; + return 1; + } + } + + while (s->stream.avail_out != 0) { + + if (s->transparent) { + /* Copy first the lookahead bytes: */ + uInt n = s->stream.avail_in; + if (n > s->stream.avail_out) n = s->stream.avail_out; + if (n > 0) { + zmemcpy(s->stream.next_out, s->stream.next_in, n); + next_out += n; + s->stream.next_out = next_out; + s->stream.next_in += n; + s->stream.avail_out -= n; + s->stream.avail_in -= n; + } + if (s->stream.avail_out > 0) { + s->stream.avail_out -= + (uInt)fread(next_out, 1, s->stream.avail_out, s->file); + } + len -= s->stream.avail_out; + s->in += len; + s->out += len; + if (len == 0) s->z_eof = 1; + return (int)len; + } + if (s->stream.avail_in == 0 && !s->z_eof) { + + errno = 0; + s->stream.avail_in = (uInt)fread(s->inbuf, 1, Z_BUFSIZE, s->file); + if (s->stream.avail_in == 0) { + s->z_eof = 1; + if (ferror(s->file)) { + s->z_err = Z_ERRNO; + break; + } + } + s->stream.next_in = s->inbuf; + } + s->in += s->stream.avail_in; + s->out += s->stream.avail_out; + s->z_err = inflate(&(s->stream), Z_NO_FLUSH); + s->in -= s->stream.avail_in; + s->out -= s->stream.avail_out; + + if (s->z_err == Z_STREAM_END) { + /* Check CRC and original size */ + s->crc = z_crc32(s->crc, start, (uInt)(s->stream.next_out - start)); + start = s->stream.next_out; + + if (getLong(s) != s->crc) { + s->z_err = Z_DATA_ERROR; + } else { + (void)getLong(s); + /* The uncompressed length returned by above getlong() may be + * different from s->out in case of concatenated .gz files. + * Check for such files: + */ + check_header(s); + if (s->z_err == Z_OK) { + inflateReset(&(s->stream)); + s->crc = z_crc32(0L, Z_NULL, 0); + } + } + } + if (s->z_err != Z_OK || s->z_eof) break; + } + s->crc = z_crc32(s->crc, start, (uInt)(s->stream.next_out - start)); + + if (len == s->stream.avail_out && + (s->z_err == Z_DATA_ERROR || s->z_err == Z_ERRNO)) + return -1; + return (int)(len - s->stream.avail_out); +} + + +/* =========================================================================== + Reads one byte from the compressed file. gzgetc returns this byte + or -1 in case of end of file or error. +*/ +int ZEXPORT gzgetc(file) + gzFile file; +{ + unsigned char c; + + return gzread(file, &c, 1) == 1 ? c : -1; +} + + +/* =========================================================================== + Push one byte back onto the stream. +*/ +int ZEXPORT gzungetc(c, file) + int c; + gzFile file; +{ + gz_stream *s = (gz_stream*)file; + + if (s == NULL || s->mode != 'r' || c == EOF || s->back != EOF) return EOF; + s->back = c; + s->out--; + s->last = (s->z_err == Z_STREAM_END); + if (s->last) s->z_err = Z_OK; + s->z_eof = 0; + return c; +} + + +/* =========================================================================== + Reads bytes from the compressed file until len-1 characters are + read, or a newline character is read and transferred to buf, or an + end-of-file condition is encountered. The string is then terminated + with a null character. + gzgets returns buf, or Z_NULL in case of error. + + The current implementation is not optimized at all. +*/ +char * ZEXPORT gzgets(file, buf, len) + gzFile file; + char *buf; + int len; +{ + char *b = buf; + if (buf == Z_NULL || len <= 0) return Z_NULL; + + while (--len > 0 && gzread(file, buf, 1) == 1 && *buf++ != '\n') ; + *buf = '\0'; + return b == buf && len > 0 ? Z_NULL : b; +} + + +#ifndef NO_GZCOMPRESS +/* =========================================================================== + Writes the given number of uncompressed bytes into the compressed file. + gzwrite returns the number of bytes actually written (0 in case of error). +*/ +int ZEXPORT gzwrite (file, buf, len) + gzFile file; + voidpc buf; + unsigned len; +{ + gz_stream *s = (gz_stream*)file; + + if (s == NULL || s->mode != 'w') return Z_STREAM_ERROR; + + s->stream.next_in = (Bytef*)buf; + s->stream.avail_in = len; + + while (s->stream.avail_in != 0) { + + if (s->stream.avail_out == 0) { + + s->stream.next_out = s->outbuf; + if (fwrite(s->outbuf, 1, Z_BUFSIZE, s->file) != Z_BUFSIZE) { + s->z_err = Z_ERRNO; + break; + } + s->stream.avail_out = Z_BUFSIZE; + } + s->in += s->stream.avail_in; + s->out += s->stream.avail_out; + s->z_err = deflate(&(s->stream), Z_NO_FLUSH); + s->in -= s->stream.avail_in; + s->out -= s->stream.avail_out; + if (s->z_err != Z_OK) break; + } + s->crc = z_crc32(s->crc, (const Bytef *)buf, len); + + return (int)(len - s->stream.avail_in); +} + + +/* =========================================================================== + Converts, formats, and writes the args to the compressed file under + control of the format string, as in fprintf. gzprintf returns the number of + uncompressed bytes actually written (0 in case of error). +*/ +#ifdef STDC +#include + +int ZEXPORTVA gzprintf (gzFile file, const char *format, /* args */ ...) +{ + char buf[Z_PRINTF_BUFSIZE]; + va_list va; + int len; + + buf[sizeof(buf) - 1] = 0; + va_start(va, format); +#ifdef NO_vsnprintf +# ifdef HAS_vsprintf_void + (void)vsprintf(buf, format, va); + va_end(va); + for (len = 0; len < sizeof(buf); len++) + if (buf[len] == 0) break; +# else + len = vsprintf(buf, format, va); + va_end(va); +# endif +#else +# ifdef HAS_vsnprintf_void + (void)vsnprintf(buf, sizeof(buf), format, va); + va_end(va); + len = strlen(buf); +# else + len = vsnprintf(buf, sizeof(buf), format, va); + va_end(va); +# endif +#endif + if (len <= 0 || len >= (int)sizeof(buf) || buf[sizeof(buf) - 1] != 0) + return 0; + return gzwrite(file, buf, (unsigned)len); +} +#else /* not ANSI C */ + +int ZEXPORTVA gzprintf (file, format, a1, a2, a3, a4, a5, a6, a7, a8, a9, a10, + a11, a12, a13, a14, a15, a16, a17, a18, a19, a20) + gzFile file; + const char *format; + int a1, a2, a3, a4, a5, a6, a7, a8, a9, a10, + a11, a12, a13, a14, a15, a16, a17, a18, a19, a20; +{ + char buf[Z_PRINTF_BUFSIZE]; + int len; + + buf[sizeof(buf) - 1] = 0; +#ifdef NO_snprintf +# ifdef HAS_sprintf_void + sprintf(buf, format, a1, a2, a3, a4, a5, a6, a7, a8, + a9, a10, a11, a12, a13, a14, a15, a16, a17, a18, a19, a20); + for (len = 0; len < sizeof(buf); len++) + if (buf[len] == 0) break; +# else + len = sprintf(buf, format, a1, a2, a3, a4, a5, a6, a7, a8, + a9, a10, a11, a12, a13, a14, a15, a16, a17, a18, a19, a20); +# endif +#else +# ifdef HAS_snprintf_void + snprintf(buf, sizeof(buf), format, a1, a2, a3, a4, a5, a6, a7, a8, + a9, a10, a11, a12, a13, a14, a15, a16, a17, a18, a19, a20); + len = strlen(buf); +# else + len = snprintf(buf, sizeof(buf), format, a1, a2, a3, a4, a5, a6, a7, a8, + a9, a10, a11, a12, a13, a14, a15, a16, a17, a18, a19, a20); +# endif +#endif + if (len <= 0 || len >= sizeof(buf) || buf[sizeof(buf) - 1] != 0) + return 0; + return gzwrite(file, buf, len); +} +#endif + +/* =========================================================================== + Writes c, converted to an unsigned char, into the compressed file. + gzputc returns the value that was written, or -1 in case of error. +*/ +int ZEXPORT gzputc(file, c) + gzFile file; + int c; +{ + unsigned char cc = (unsigned char) c; /* required for big endian systems */ + + return gzwrite(file, &cc, 1) == 1 ? (int)cc : -1; +} + + +/* =========================================================================== + Writes the given null-terminated string to the compressed file, excluding + the terminating null character. + gzputs returns the number of characters written, or -1 in case of error. +*/ +int ZEXPORT gzputs(file, s) + gzFile file; + const char *s; +{ + return gzwrite(file, (char*)s, (unsigned)strlen(s)); +} + + +/* =========================================================================== + Flushes all pending output into the compressed file. The parameter + flush is as in the deflate() function. +*/ +local int do_flush (file, flush) + gzFile file; + int flush; +{ + uInt len; + int done = 0; + gz_stream *s = (gz_stream*)file; + + if (s == NULL || s->mode != 'w') return Z_STREAM_ERROR; + + s->stream.avail_in = 0; /* should be zero already anyway */ + + for (;;) { + len = Z_BUFSIZE - s->stream.avail_out; + + if (len != 0) { + if ((uInt)fwrite(s->outbuf, 1, len, s->file) != len) { + s->z_err = Z_ERRNO; + return Z_ERRNO; + } + s->stream.next_out = s->outbuf; + s->stream.avail_out = Z_BUFSIZE; + } + if (done) break; + s->out += s->stream.avail_out; + s->z_err = deflate(&(s->stream), flush); + s->out -= s->stream.avail_out; + + /* Ignore the second of two consecutive flushes: */ + if (len == 0 && s->z_err == Z_BUF_ERROR) s->z_err = Z_OK; + + /* deflate has finished flushing only when it hasn't used up + * all the available space in the output buffer: + */ + done = (s->stream.avail_out != 0 || s->z_err == Z_STREAM_END); + + if (s->z_err != Z_OK && s->z_err != Z_STREAM_END) break; + } + return s->z_err == Z_STREAM_END ? Z_OK : s->z_err; +} + +int ZEXPORT gzflush (file, flush) + gzFile file; + int flush; +{ + gz_stream *s = (gz_stream*)file; + int err = do_flush (file, flush); + + if (err) return err; + fflush(s->file); + return s->z_err == Z_STREAM_END ? Z_OK : s->z_err; +} +#endif /* NO_GZCOMPRESS */ + +/* =========================================================================== + Sets the starting position for the next gzread or gzwrite on the given + compressed file. The offset represents a number of bytes in the + gzseek returns the resulting offset location as measured in bytes from + the beginning of the uncompressed stream, or -1 in case of error. + SEEK_END is not implemented, returns error. + In this version of the library, gzseek can be extremely slow. +*/ +z_off_t ZEXPORT gzseek (file, offset, whence) + gzFile file; + z_off_t offset; + int whence; +{ + gz_stream *s = (gz_stream*)file; + + if (s == NULL || whence == SEEK_END || + s->z_err == Z_ERRNO || s->z_err == Z_DATA_ERROR) { + return -1L; + } + + if (s->mode == 'w') { +#ifdef NO_GZCOMPRESS + return -1L; +#else + if (whence == SEEK_SET) { + offset -= s->in; + } + if (offset < 0) return -1L; + + /* At this point, offset is the number of zero bytes to write. */ + if (s->inbuf == Z_NULL) { + s->inbuf = (Byte*)ALLOC(Z_BUFSIZE); /* for seeking */ + if (s->inbuf == Z_NULL) return -1L; + zmemzero(s->inbuf, Z_BUFSIZE); + } + while (offset > 0) { + uInt size = Z_BUFSIZE; + if (offset < Z_BUFSIZE) size = (uInt)offset; + + size = gzwrite(file, s->inbuf, size); + if (size == 0) return -1L; + + offset -= size; + } + return s->in; +#endif + } + /* Rest of function is for reading only */ + + /* compute absolute position */ + if (whence == SEEK_CUR) { + offset += s->out; + } + if (offset < 0) return -1L; + + if (s->transparent) { + /* map to fseek */ + s->back = EOF; + s->stream.avail_in = 0; + s->stream.next_in = s->inbuf; + if (fseek(s->file, offset, SEEK_SET) < 0) return -1L; + + s->in = s->out = offset; + return offset; + } + + /* For a negative seek, rewind and use positive seek */ + if (offset >= s->out) { + offset -= s->out; + } else if (gzrewind(file) < 0) { + return -1L; + } + /* offset is now the number of bytes to skip. */ + + if (offset != 0 && s->outbuf == Z_NULL) { + s->outbuf = (Byte*)ALLOC(Z_BUFSIZE); + if (s->outbuf == Z_NULL) return -1L; + } + if (offset && s->back != EOF) { + s->back = EOF; + s->out++; + offset--; + if (s->last) s->z_err = Z_STREAM_END; + } + while (offset > 0) { + int size = Z_BUFSIZE; + if (offset < Z_BUFSIZE) size = (int)offset; + + size = gzread(file, s->outbuf, (uInt)size); + if (size <= 0) return -1L; + offset -= size; + } + return s->out; +} + +/* =========================================================================== + Rewinds input file. +*/ +int ZEXPORT gzrewind (file) + gzFile file; +{ + gz_stream *s = (gz_stream*)file; + + if (s == NULL || s->mode != 'r') return -1; + + s->z_err = Z_OK; + s->z_eof = 0; + s->back = EOF; + s->stream.avail_in = 0; + s->stream.next_in = s->inbuf; + s->crc = z_crc32(0L, Z_NULL, 0); + if (!s->transparent) (void)inflateReset(&s->stream); + s->in = 0; + s->out = 0; + return fseek(s->file, s->start, SEEK_SET); +} + +/* =========================================================================== + Returns the starting position for the next gzread or gzwrite on the + given compressed file. This position represents a number of bytes in the + uncompressed data stream. +*/ +z_off_t ZEXPORT gztell (file) + gzFile file; +{ + return gzseek(file, 0L, SEEK_CUR); +} + +/* =========================================================================== + Returns 1 when EOF has previously been detected reading the given + input stream, otherwise zero. +*/ +int ZEXPORT gzeof (file) + gzFile file; +{ + gz_stream *s = (gz_stream*)file; + + /* With concatenated compressed files that can have embedded + * crc trailers, z_eof is no longer the only/best indicator of EOF + * on a gz_stream. Handle end-of-stream error explicitly here. + */ + if (s == NULL || s->mode != 'r') return 0; + if (s->z_eof) return 1; + return s->z_err == Z_STREAM_END; +} + +/* =========================================================================== + Returns 1 if reading and doing so transparently, otherwise zero. +*/ +int ZEXPORT gzdirect (file) + gzFile file; +{ + gz_stream *s = (gz_stream*)file; + + if (s == NULL || s->mode != 'r') return 0; + return s->transparent; +} + +/* =========================================================================== + Outputs a long in LSB order to the given file +*/ +local void putLong (file, x) + FILE *file; + uLong x; +{ + int n; + for (n = 0; n < 4; n++) { + fputc((int)(x & 0xff), file); + x >>= 8; + } +} + +/* =========================================================================== + Reads a long in LSB order from the given gz_stream. Sets z_err in case + of error. +*/ +local uLong getLong (s) + gz_stream *s; +{ + uLong x = (uLong)get_byte(s); + int c; + + x += ((uLong)get_byte(s))<<8; + x += ((uLong)get_byte(s))<<16; + c = get_byte(s); + if (c == EOF) s->z_err = Z_DATA_ERROR; + x += ((uLong)c)<<24; + return x; +} + +/* =========================================================================== + Flushes all pending output if necessary, closes the compressed file + and deallocates all the (de)compression state. +*/ +int ZEXPORT gzclose (file) + gzFile file; +{ + gz_stream *s = (gz_stream*)file; + + if (s == NULL) return Z_STREAM_ERROR; + + if (s->mode == 'w') { +#ifdef NO_GZCOMPRESS + return Z_STREAM_ERROR; +#else + if (do_flush (file, Z_FINISH) != Z_OK) + return destroy((gz_stream*)file); + + putLong (s->file, s->crc); + putLong (s->file, (uLong)(s->in & 0xffffffff)); +#endif + } + return destroy((gz_stream*)file); +} + +#ifdef STDC +# define zstrerror(errnum) strerror(errnum) +#else +# define zstrerror(errnum) "" +#endif + +/* =========================================================================== + Returns the error message for the last error which occurred on the + given compressed file. errnum is set to zlib error number. If an + error occurred in the file system and not in the compression library, + errnum is set to Z_ERRNO and the application may consult errno + to get the exact error code. +*/ +const char * ZEXPORT gzerror (file, errnum) + gzFile file; + int *errnum; +{ + char *m; + gz_stream *s = (gz_stream*)file; + + if (s == NULL) { + *errnum = Z_STREAM_ERROR; + return (const char*)ERR_MSG(Z_STREAM_ERROR); + } + *errnum = s->z_err; + if (*errnum == Z_OK) return (const char*)""; + + m = (char*)(*errnum == Z_ERRNO ? zstrerror(errno) : s->stream.msg); + + if (m == NULL || *m == '\0') m = (char*)ERR_MSG(s->z_err); + + TRYFREE(s->msg); + s->msg = (char*)ALLOC(strlen(s->path) + strlen(m) + 3); + if (s->msg == Z_NULL) return (const char*)ERR_MSG(Z_MEM_ERROR); + strcpy(s->msg, s->path); + strcat(s->msg, ": "); + strcat(s->msg, m); + return (const char*)s->msg; +} + +/* =========================================================================== + Clear the error and end-of-file flags, and do the same for the real file. +*/ +void ZEXPORT gzclearerr (file) + gzFile file; +{ + gz_stream *s = (gz_stream*)file; + + if (s == NULL) return; + if (s->z_err != Z_STREAM_END) s->z_err = Z_OK; + s->z_eof = 0; + clearerr(s->file); +} diff --git a/libkern/zlib/infback.c b/libkern/zlib/infback.c new file mode 100644 index 000000000..5433556ed --- /dev/null +++ b/libkern/zlib/infback.c @@ -0,0 +1,652 @@ +/* + * Copyright (c) 2008 Apple Inc. All rights reserved. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. The rights granted to you under the License + * may not be used to create, or enable the creation or redistribution of, + * unlawful or unlicensed copies of an Apple operating system, or to + * circumvent, violate, or enable the circumvention or violation of, any + * terms of an Apple operating system software license agreement. + * + * Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ + */ +/* infback.c -- inflate using a call-back interface + * Copyright (C) 1995-2005 Mark Adler + * For conditions of distribution and use, see copyright notice in zlib.h + */ + +/* + This code is largely copied from inflate.c. Normally either infback.o or + inflate.o would be linked into an application--not both. The interface + with inffast.c is retained so that optimized assembler-coded versions of + inflate_fast() can be used with either inflate.c or infback.c. + */ + +#include "zutil.h" +#include "inftrees.h" +#include "inflate.h" +#include "inffast.h" + +/* function prototypes */ +local void fixedtables OF((struct inflate_state FAR *state)); + +/* + strm provides memory allocation functions in zalloc and zfree, or + Z_NULL to use the library memory allocation functions. + + windowBits is in the range 8..15, and window is a user-supplied + window and output buffer that is 2**windowBits bytes. + */ +int ZEXPORT inflateBackInit_(strm, windowBits, window, version, stream_size) +z_streamp strm; +int windowBits; +unsigned char FAR *window; +const char *version; +int stream_size; +{ + struct inflate_state FAR *state; + + if (version == Z_NULL || version[0] != ZLIB_VERSION[0] || + stream_size != (int)(sizeof(z_stream))) + return Z_VERSION_ERROR; + if (strm == Z_NULL || window == Z_NULL || + windowBits < 8 || windowBits > 15) + return Z_STREAM_ERROR; + strm->msg = Z_NULL; /* in case we return an error */ +#ifndef NO_ZCFUNCS + if (strm->zalloc == (alloc_func)0) { + strm->zalloc = zcalloc; + strm->opaque = (voidpf)0; + } + if (strm->zfree == (free_func)0) strm->zfree = zcfree; +#endif /* NO_ZCFUNCS */ + state = (struct inflate_state FAR *)ZALLOC(strm, 1, + sizeof(struct inflate_state)); + if (state == Z_NULL) return Z_MEM_ERROR; + Tracev((stderr, "inflate: allocated\n")); + strm->state = (struct internal_state FAR *)state; + state->dmax = 32768U; + state->wbits = windowBits; + state->wsize = 1U << windowBits; + state->window = window; + state->write = 0; + state->whave = 0; + return Z_OK; +} + +/* + Return state with length and distance decoding tables and index sizes set to + fixed code decoding. Normally this returns fixed tables from inffixed.h. + If BUILDFIXED is defined, then instead this routine builds the tables the + first time it's called, and returns those tables the first time and + thereafter. This reduces the size of the code by about 2K bytes, in + exchange for a little execution time. However, BUILDFIXED should not be + used for threaded applications, since the rewriting of the tables and virgin + may not be thread-safe. + */ +local void fixedtables(state) +struct inflate_state FAR *state; +{ +#ifdef BUILDFIXED + static int virgin = 1; + static code *lenfix, *distfix; + static code fixed[544]; + + /* build fixed huffman tables if first call (may not be thread safe) */ + if (virgin) { + unsigned sym, bits; + static code *next; + + /* literal/length table */ + sym = 0; + while (sym < 144) state->lens[sym++] = 8; + while (sym < 256) state->lens[sym++] = 9; + while (sym < 280) state->lens[sym++] = 7; + while (sym < 288) state->lens[sym++] = 8; + next = fixed; + lenfix = next; + bits = 9; + inflate_table(LENS, state->lens, 288, &(next), &(bits), state->work); + + /* distance table */ + sym = 0; + while (sym < 32) state->lens[sym++] = 5; + distfix = next; + bits = 5; + inflate_table(DISTS, state->lens, 32, &(next), &(bits), state->work); + + /* do this just once */ + virgin = 0; + } +#else /* !BUILDFIXED */ +# include "inffixed.h" +#endif /* BUILDFIXED */ + state->lencode = lenfix; + state->lenbits = 9; + state->distcode = distfix; + state->distbits = 5; +} + +/* Macros for inflateBack(): */ + +/* Load returned state from inflate_fast() */ +#define LOAD() \ + do { \ + put = strm->next_out; \ + left = strm->avail_out; \ + next = strm->next_in; \ + have = strm->avail_in; \ + hold = state->hold; \ + bits = state->bits; \ + } while (0) + +/* Set state from registers for inflate_fast() */ +#define RESTORE() \ + do { \ + strm->next_out = put; \ + strm->avail_out = left; \ + strm->next_in = next; \ + strm->avail_in = have; \ + state->hold = hold; \ + state->bits = bits; \ + } while (0) + +/* Clear the input bit accumulator */ +#define INITBITS() \ + do { \ + hold = 0; \ + bits = 0; \ + } while (0) + +/* Assure that some input is available. If input is requested, but denied, + then return a Z_BUF_ERROR from inflateBack(). */ +#define PULL() \ + do { \ + if (have == 0) { \ + have = in(in_desc, &next); \ + if (have == 0) { \ + next = Z_NULL; \ + ret = Z_BUF_ERROR; \ + goto inf_leave; \ + } \ + } \ + } while (0) + +/* Get a byte of input into the bit accumulator, or return from inflateBack() + with an error if there is no input available. */ +#define PULLBYTE() \ + do { \ + PULL(); \ + have--; \ + hold += (unsigned long)(*next++) << bits; \ + bits += 8; \ + } while (0) + +/* Assure that there are at least n bits in the bit accumulator. If there is + not enough available input to do that, then return from inflateBack() with + an error. */ +#define NEEDBITS(n) \ + do { \ + while (bits < (unsigned)(n)) \ + PULLBYTE(); \ + } while (0) + +/* Return the low n bits of the bit accumulator (n < 16) */ +#define BITS(n) \ + ((unsigned)hold & ((1U << (n)) - 1)) + +/* Remove n bits from the bit accumulator */ +#define DROPBITS(n) \ + do { \ + hold >>= (n); \ + bits -= (unsigned)(n); \ + } while (0) + +/* Remove zero to seven bits as needed to go to a byte boundary */ +#define BYTEBITS() \ + do { \ + hold >>= bits & 7; \ + bits -= bits & 7; \ + } while (0) + +/* Assure that some output space is available, by writing out the window + if it's full. If the write fails, return from inflateBack() with a + Z_BUF_ERROR. */ +#define ROOM() \ + do { \ + if (left == 0) { \ + put = state->window; \ + left = state->wsize; \ + state->whave = left; \ + if (out(out_desc, put, left)) { \ + ret = Z_BUF_ERROR; \ + goto inf_leave; \ + } \ + } \ + } while (0) + +/* + strm provides the memory allocation functions and window buffer on input, + and provides information on the unused input on return. For Z_DATA_ERROR + returns, strm will also provide an error message. + + in() and out() are the call-back input and output functions. When + inflateBack() needs more input, it calls in(). When inflateBack() has + filled the window with output, or when it completes with data in the + window, it calls out() to write out the data. The application must not + change the provided input until in() is called again or inflateBack() + returns. The application must not change the window/output buffer until + inflateBack() returns. + + in() and out() are called with a descriptor parameter provided in the + inflateBack() call. This parameter can be a structure that provides the + information required to do the read or write, as well as accumulated + information on the input and output such as totals and check values. + + in() should return zero on failure. out() should return non-zero on + failure. If either in() or out() fails, than inflateBack() returns a + Z_BUF_ERROR. strm->next_in can be checked for Z_NULL to see whether it + was in() or out() that caused in the error. Otherwise, inflateBack() + returns Z_STREAM_END on success, Z_DATA_ERROR for an deflate format + error, or Z_MEM_ERROR if it could not allocate memory for the state. + inflateBack() can also return Z_STREAM_ERROR if the input parameters + are not correct, i.e. strm is Z_NULL or the state was not initialized. + */ +int ZEXPORT inflateBack(strm, in, in_desc, out, out_desc) +z_streamp strm; +in_func in; +void FAR *in_desc; +out_func out; +void FAR *out_desc; +{ + struct inflate_state FAR *state; + unsigned char FAR *next; /* next input */ + unsigned char FAR *put; /* next output */ + unsigned have, left; /* available input and output */ + unsigned long hold; /* bit buffer */ + unsigned bits; /* bits in bit buffer */ + unsigned copy; /* number of stored or match bytes to copy */ + unsigned char FAR *from; /* where to copy match bytes from */ + code this; /* current decoding table entry */ + code last; /* parent table entry */ + unsigned len; /* length to copy for repeats, bits to drop */ + int ret; /* return code */ + static const unsigned short order[19] = /* permutation of code lengths */ + {16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15}; + + /* Check that the strm exists and that the state was initialized */ + if (strm == Z_NULL || strm->state == Z_NULL) + return Z_STREAM_ERROR; + state = (struct inflate_state FAR *)strm->state; + + /* Reset the state */ + strm->msg = Z_NULL; + state->mode = TYPE; + state->last = 0; + state->whave = 0; + next = strm->next_in; + have = next != Z_NULL ? strm->avail_in : 0; + hold = 0; + bits = 0; + put = state->window; + left = state->wsize; + + /* Inflate until end of block marked as last */ + for (;;) + switch (state->mode) { + case TYPE: + /* determine and dispatch block type */ + if (state->last) { + BYTEBITS(); + state->mode = DONE; + break; + } + NEEDBITS(3); + state->last = BITS(1); + DROPBITS(1); + switch (BITS(2)) { + case 0: /* stored block */ + Tracev((stderr, "inflate: stored block%s\n", + state->last ? " (last)" : "")); + state->mode = STORED; + break; + case 1: /* fixed block */ + fixedtables(state); + Tracev((stderr, "inflate: fixed codes block%s\n", + state->last ? " (last)" : "")); + state->mode = LEN; /* decode codes */ + break; + case 2: /* dynamic block */ + Tracev((stderr, "inflate: dynamic codes block%s\n", + state->last ? " (last)" : "")); + state->mode = TABLE; + break; + case 3: + strm->msg = (char *)"invalid block type"; + state->mode = BAD; + } + DROPBITS(2); + break; + + case STORED: + /* get and verify stored block length */ + BYTEBITS(); /* go to byte boundary */ + NEEDBITS(32); + if ((hold & 0xffff) != ((hold >> 16) ^ 0xffff)) { + strm->msg = (char *)"invalid stored block lengths"; + state->mode = BAD; + break; + } + state->length = (unsigned)hold & 0xffff; + Tracev((stderr, "inflate: stored length %u\n", + state->length)); + INITBITS(); + + /* copy stored block from input to output */ + while (state->length != 0) { + copy = state->length; + PULL(); + ROOM(); + if (copy > have) copy = have; + if (copy > left) copy = left; + zmemcpy(put, next, copy); + have -= copy; + next += copy; + left -= copy; + put += copy; + state->length -= copy; + } + Tracev((stderr, "inflate: stored end\n")); + state->mode = TYPE; + break; + + case TABLE: + /* get dynamic table entries descriptor */ + NEEDBITS(14); + state->nlen = BITS(5) + 257; + DROPBITS(5); + state->ndist = BITS(5) + 1; + DROPBITS(5); + state->ncode = BITS(4) + 4; + DROPBITS(4); +#ifndef PKZIP_BUG_WORKAROUND + if (state->nlen > 286 || state->ndist > 30) { + strm->msg = (char *)"too many length or distance symbols"; + state->mode = BAD; + break; + } +#endif + Tracev((stderr, "inflate: table sizes ok\n")); + + /* get code length code lengths (not a typo) */ + state->have = 0; + while (state->have < state->ncode) { + NEEDBITS(3); + state->lens[order[state->have++]] = (unsigned short)BITS(3); + DROPBITS(3); + } + while (state->have < 19) + state->lens[order[state->have++]] = 0; + state->next = state->codes; + state->lencode = (code const FAR *)(state->next); + state->lenbits = 7; + ret = inflate_table(CODES, state->lens, 19, &(state->next), + &(state->lenbits), state->work); + if (ret) { + strm->msg = (char *)"invalid code lengths set"; + state->mode = BAD; + break; + } + Tracev((stderr, "inflate: code lengths ok\n")); + + /* get length and distance code code lengths */ + state->have = 0; + while (state->have < state->nlen + state->ndist) { + for (;;) { + this = state->lencode[BITS(state->lenbits)]; + if ((unsigned)(this.bits) <= bits) break; + PULLBYTE(); + } + if (this.val < 16) { + NEEDBITS(this.bits); + DROPBITS(this.bits); + state->lens[state->have++] = this.val; + } + else { + if (this.val == 16) { + NEEDBITS(this.bits + 2); + DROPBITS(this.bits); + if (state->have == 0) { + strm->msg = (char *)"invalid bit length repeat"; + state->mode = BAD; + break; + } + len = (unsigned)(state->lens[state->have - 1]); + copy = 3 + BITS(2); + DROPBITS(2); + } + else if (this.val == 17) { + NEEDBITS(this.bits + 3); + DROPBITS(this.bits); + len = 0; + copy = 3 + BITS(3); + DROPBITS(3); + } + else { + NEEDBITS(this.bits + 7); + DROPBITS(this.bits); + len = 0; + copy = 11 + BITS(7); + DROPBITS(7); + } + if (state->have + copy > state->nlen + state->ndist) { + strm->msg = (char *)"invalid bit length repeat"; + state->mode = BAD; + break; + } + while (copy--) + state->lens[state->have++] = (unsigned short)len; + } + } + + /* handle error breaks in while */ + if (state->mode == BAD) break; + + /* build code tables */ + state->next = state->codes; + state->lencode = (code const FAR *)(state->next); + state->lenbits = 9; + ret = inflate_table(LENS, state->lens, state->nlen, &(state->next), + &(state->lenbits), state->work); + if (ret) { + strm->msg = (char *)"invalid literal/lengths set"; + state->mode = BAD; + break; + } + state->distcode = (code const FAR *)(state->next); + state->distbits = 6; + ret = inflate_table(DISTS, state->lens + state->nlen, state->ndist, + &(state->next), &(state->distbits), state->work); + if (ret) { + strm->msg = (char *)"invalid distances set"; + state->mode = BAD; + break; + } + Tracev((stderr, "inflate: codes ok\n")); + state->mode = LEN; + + case LEN: + /* use inflate_fast() if we have enough input and output */ + if (have >= 6 && left >= 258) { + RESTORE(); + if (state->whave < state->wsize) + state->whave = state->wsize - left; + inflate_fast(strm, state->wsize); + LOAD(); + break; + } + + /* get a literal, length, or end-of-block code */ + for (;;) { + this = state->lencode[BITS(state->lenbits)]; + if ((unsigned)(this.bits) <= bits) break; + PULLBYTE(); + } + if (this.op && (this.op & 0xf0) == 0) { + last = this; + for (;;) { + this = state->lencode[last.val + + (BITS(last.bits + last.op) >> last.bits)]; + if ((unsigned)(last.bits + this.bits) <= bits) break; + PULLBYTE(); + } + DROPBITS(last.bits); + } + DROPBITS(this.bits); + state->length = (unsigned)this.val; + + /* process literal */ + if (this.op == 0) { + Tracevv((stderr, this.val >= 0x20 && this.val < 0x7f ? + "inflate: literal '%c'\n" : + "inflate: literal 0x%02x\n", this.val)); + ROOM(); + *put++ = (unsigned char)(state->length); + left--; + state->mode = LEN; + break; + } + + /* process end of block */ + if (this.op & 32) { + Tracevv((stderr, "inflate: end of block\n")); + state->mode = TYPE; + break; + } + + /* invalid code */ + if (this.op & 64) { + strm->msg = (char *)"invalid literal/length code"; + state->mode = BAD; + break; + } + + /* length code -- get extra bits, if any */ + state->extra = (unsigned)(this.op) & 15; + if (state->extra != 0) { + NEEDBITS(state->extra); + state->length += BITS(state->extra); + DROPBITS(state->extra); + } + Tracevv((stderr, "inflate: length %u\n", state->length)); + + /* get distance code */ + for (;;) { + this = state->distcode[BITS(state->distbits)]; + if ((unsigned)(this.bits) <= bits) break; + PULLBYTE(); + } + if ((this.op & 0xf0) == 0) { + last = this; + for (;;) { + this = state->distcode[last.val + + (BITS(last.bits + last.op) >> last.bits)]; + if ((unsigned)(last.bits + this.bits) <= bits) break; + PULLBYTE(); + } + DROPBITS(last.bits); + } + DROPBITS(this.bits); + if (this.op & 64) { + strm->msg = (char *)"invalid distance code"; + state->mode = BAD; + break; + } + state->offset = (unsigned)this.val; + + /* get distance extra bits, if any */ + state->extra = (unsigned)(this.op) & 15; + if (state->extra != 0) { + NEEDBITS(state->extra); + state->offset += BITS(state->extra); + DROPBITS(state->extra); + } + if (state->offset > state->wsize - (state->whave < state->wsize ? + left : 0)) { + strm->msg = (char *)"invalid distance too far back"; + state->mode = BAD; + break; + } + Tracevv((stderr, "inflate: distance %u\n", state->offset)); + + /* copy match from window to output */ + do { + ROOM(); + copy = state->wsize - state->offset; + if (copy < left) { + from = put + copy; + copy = left - copy; + } + else { + from = put - state->offset; + copy = left; + } + if (copy > state->length) copy = state->length; + state->length -= copy; + left -= copy; + do { + *put++ = *from++; + } while (--copy); + } while (state->length != 0); + break; + + case DONE: + /* inflate stream terminated properly -- write leftover output */ + ret = Z_STREAM_END; + if (left < state->wsize) { + if (out(out_desc, state->window, state->wsize - left)) + ret = Z_BUF_ERROR; + } + goto inf_leave; + + case BAD: + ret = Z_DATA_ERROR; + goto inf_leave; + + default: /* can't happen, but makes compilers happy */ + ret = Z_STREAM_ERROR; + goto inf_leave; + } + + /* Return unused input */ + inf_leave: + strm->next_in = next; + strm->avail_in = have; + return ret; +} + +int ZEXPORT inflateBackEnd(strm) +z_streamp strm; +{ + if (strm == Z_NULL || strm->state == Z_NULL || strm->zfree == (free_func)0) + return Z_STREAM_ERROR; + ZFREE(strm, strm->state); + strm->state = Z_NULL; + Tracev((stderr, "inflate: end\n")); + return Z_OK; +} diff --git a/libkern/zlib/inffast.c b/libkern/zlib/inffast.c new file mode 100644 index 000000000..82d2795c0 --- /dev/null +++ b/libkern/zlib/inffast.c @@ -0,0 +1,345 @@ +/* + * Copyright (c) 2008 Apple Inc. All rights reserved. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. The rights granted to you under the License + * may not be used to create, or enable the creation or redistribution of, + * unlawful or unlicensed copies of an Apple operating system, or to + * circumvent, violate, or enable the circumvention or violation of, any + * terms of an Apple operating system software license agreement. + * + * Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ + */ +/* inffast.c -- fast decoding + * Copyright (C) 1995-2004 Mark Adler + * For conditions of distribution and use, see copyright notice in zlib.h + */ + +#include "zutil.h" +#include "inftrees.h" +#include "inflate.h" +#include "inffast.h" + +#ifndef ASMINF + +/* Allow machine dependent optimization for post-increment or pre-increment. + Based on testing to date, + Pre-increment preferred for: + - PowerPC G3 (Adler) + - MIPS R5000 (Randers-Pehrson) + Post-increment preferred for: + - none + No measurable difference: + - Pentium III (Anderson) + - M68060 (Nikl) + */ +#ifdef POSTINC +# define OFF 0 +# define PUP(a) *(a)++ +#else +# define OFF 1 +# define PUP(a) *++(a) +#endif + +/* + Decode literal, length, and distance codes and write out the resulting + literal and match bytes until either not enough input or output is + available, an end-of-block is encountered, or a data error is encountered. + When large enough input and output buffers are supplied to inflate(), for + example, a 16K input buffer and a 64K output buffer, more than 95% of the + inflate execution time is spent in this routine. + + Entry assumptions: + + state->mode == LEN + strm->avail_in >= 6 + strm->avail_out >= 258 + start >= strm->avail_out + state->bits < 8 + + On return, state->mode is one of: + + LEN -- ran out of enough output space or enough available input + TYPE -- reached end of block code, inflate() to interpret next block + BAD -- error in block data + + Notes: + + - The maximum input bits used by a length/distance pair is 15 bits for the + length code, 5 bits for the length extra, 15 bits for the distance code, + and 13 bits for the distance extra. This totals 48 bits, or six bytes. + Therefore if strm->avail_in >= 6, then there is enough input to avoid + checking for available input while decoding. + + - The maximum bytes that a single length/distance pair can output is 258 + bytes, which is the maximum length that can be coded. inflate_fast() + requires strm->avail_out >= 258 for each loop to avoid checking for + output space. + */ +void inflate_fast(strm, start) +z_streamp strm; +unsigned start; /* inflate()'s starting value for strm->avail_out */ +{ + struct inflate_state FAR *state; + unsigned char FAR *in; /* local strm->next_in */ + unsigned char FAR *last; /* while in < last, enough input available */ + unsigned char FAR *out; /* local strm->next_out */ + unsigned char FAR *beg; /* inflate()'s initial strm->next_out */ + unsigned char FAR *end; /* while out < end, enough space available */ +#ifdef INFLATE_STRICT + unsigned dmax; /* maximum distance from zlib header */ +#endif + unsigned wsize; /* window size or zero if not using window */ + unsigned whave; /* valid bytes in the window */ + unsigned write; /* window write index */ + unsigned char FAR *window; /* allocated sliding window, if wsize != 0 */ + unsigned long hold; /* local strm->hold */ + unsigned bits; /* local strm->bits */ + code const FAR *lcode; /* local strm->lencode */ + code const FAR *dcode; /* local strm->distcode */ + unsigned lmask; /* mask for first level of length codes */ + unsigned dmask; /* mask for first level of distance codes */ + code this; /* retrieved table entry */ + unsigned op; /* code bits, operation, extra bits, or */ + /* window position, window bytes to copy */ + unsigned len; /* match length, unused bytes */ + unsigned dist; /* match distance */ + unsigned char FAR *from; /* where to copy match from */ + + /* copy state to local variables */ + state = (struct inflate_state FAR *)strm->state; + in = strm->next_in - OFF; + last = in + (strm->avail_in - 5); + out = strm->next_out - OFF; + beg = out - (start - strm->avail_out); + end = out + (strm->avail_out - 257); +#ifdef INFLATE_STRICT + dmax = state->dmax; +#endif + wsize = state->wsize; + whave = state->whave; + write = state->write; + window = state->window; + hold = state->hold; + bits = state->bits; + lcode = state->lencode; + dcode = state->distcode; + lmask = (1U << state->lenbits) - 1; + dmask = (1U << state->distbits) - 1; + + /* decode literals and length/distances until end-of-block or not enough + input data or output space */ + do { + if (bits < 15) { + hold += (unsigned long)(PUP(in)) << bits; + bits += 8; + hold += (unsigned long)(PUP(in)) << bits; + bits += 8; + } + this = lcode[hold & lmask]; + dolen: + op = (unsigned)(this.bits); + hold >>= op; + bits -= op; + op = (unsigned)(this.op); + if (op == 0) { /* literal */ + Tracevv((stderr, this.val >= 0x20 && this.val < 0x7f ? + "inflate: literal '%c'\n" : + "inflate: literal 0x%02x\n", this.val)); + PUP(out) = (unsigned char)(this.val); + } + else if (op & 16) { /* length base */ + len = (unsigned)(this.val); + op &= 15; /* number of extra bits */ + if (op) { + if (bits < op) { + hold += (unsigned long)(PUP(in)) << bits; + bits += 8; + } + len += (unsigned)hold & ((1U << op) - 1); + hold >>= op; + bits -= op; + } + Tracevv((stderr, "inflate: length %u\n", len)); + if (bits < 15) { + hold += (unsigned long)(PUP(in)) << bits; + bits += 8; + hold += (unsigned long)(PUP(in)) << bits; + bits += 8; + } + this = dcode[hold & dmask]; + dodist: + op = (unsigned)(this.bits); + hold >>= op; + bits -= op; + op = (unsigned)(this.op); + if (op & 16) { /* distance base */ + dist = (unsigned)(this.val); + op &= 15; /* number of extra bits */ + if (bits < op) { + hold += (unsigned long)(PUP(in)) << bits; + bits += 8; + if (bits < op) { + hold += (unsigned long)(PUP(in)) << bits; + bits += 8; + } + } + dist += (unsigned)hold & ((1U << op) - 1); +#ifdef INFLATE_STRICT + if (dist > dmax) { + strm->msg = (char *)"invalid distance too far back"; + state->mode = BAD; + break; + } +#endif + hold >>= op; + bits -= op; + Tracevv((stderr, "inflate: distance %u\n", dist)); + op = (unsigned)(out - beg); /* max distance in output */ + if (dist > op) { /* see if copy from window */ + op = dist - op; /* distance back in window */ + if (op > whave) { + strm->msg = (char *)"invalid distance too far back"; + state->mode = BAD; + break; + } + from = window - OFF; + if (write == 0) { /* very common case */ + from += wsize - op; + if (op < len) { /* some from window */ + len -= op; + do { + PUP(out) = PUP(from); + } while (--op); + from = out - dist; /* rest from output */ + } + } + else if (write < op) { /* wrap around window */ + from += wsize + write - op; + op -= write; + if (op < len) { /* some from end of window */ + len -= op; + do { + PUP(out) = PUP(from); + } while (--op); + from = window - OFF; + if (write < len) { /* some from start of window */ + op = write; + len -= op; + do { + PUP(out) = PUP(from); + } while (--op); + from = out - dist; /* rest from output */ + } + } + } + else { /* contiguous in window */ + from += write - op; + if (op < len) { /* some from window */ + len -= op; + do { + PUP(out) = PUP(from); + } while (--op); + from = out - dist; /* rest from output */ + } + } + while (len > 2) { + PUP(out) = PUP(from); + PUP(out) = PUP(from); + PUP(out) = PUP(from); + len -= 3; + } + if (len) { + PUP(out) = PUP(from); + if (len > 1) + PUP(out) = PUP(from); + } + } + else { + from = out - dist; /* copy direct from output */ + do { /* minimum length is three */ + PUP(out) = PUP(from); + PUP(out) = PUP(from); + PUP(out) = PUP(from); + len -= 3; + } while (len > 2); + if (len) { + PUP(out) = PUP(from); + if (len > 1) + PUP(out) = PUP(from); + } + } + } + else if ((op & 64) == 0) { /* 2nd level distance code */ + this = dcode[this.val + (hold & ((1U << op) - 1))]; + goto dodist; + } + else { + strm->msg = (char *)"invalid distance code"; + state->mode = BAD; + break; + } + } + else if ((op & 64) == 0) { /* 2nd level length code */ + this = lcode[this.val + (hold & ((1U << op) - 1))]; + goto dolen; + } + else if (op & 32) { /* end-of-block */ + Tracevv((stderr, "inflate: end of block\n")); + state->mode = TYPE; + break; + } + else { + strm->msg = (char *)"invalid literal/length code"; + state->mode = BAD; + break; + } + } while (in < last && out < end); + + /* return unused bytes (on entry, bits < 8, so in won't go too far back) */ + len = bits >> 3; + in -= len; + bits -= len << 3; + hold &= (1U << bits) - 1; + + /* update state and return */ + strm->next_in = in + OFF; + strm->next_out = out + OFF; + strm->avail_in = (unsigned)(in < last ? 5 + (last - in) : 5 - (in - last)); + strm->avail_out = (unsigned)(out < end ? + 257 + (end - out) : 257 - (out - end)); + state->hold = hold; + state->bits = bits; + return; +} + +/* + inflate_fast() speedups that turned out slower (on a PowerPC G3 750CXe): + - Using bit fields for code structure + - Different op definition to avoid & for extra bits (do & for table bits) + - Three separate decoding do-loops for direct, window, and write == 0 + - Special case for distance > 1 copies to do overlapped load and store copy + - Explicit branch predictions (based on measured branch probabilities) + - Deferring match copy and interspersed it with decoding subsequent codes + - Swapping literal/length else + - Swapping window/direct else + - Larger unrolled copy loops (three is about right) + - Moving len -= 3 statement into middle of loop + */ + +#endif /* !ASMINF */ diff --git a/bsd/dev/i386/sel.h b/libkern/zlib/inffast.h similarity index 73% rename from bsd/dev/i386/sel.h rename to libkern/zlib/inffast.h index 4f4e4889b..c275b8de3 100644 --- a/bsd/dev/i386/sel.h +++ b/libkern/zlib/inffast.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2008 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -25,28 +25,14 @@ * * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ */ -/* - * Copyright (c) 1992 NeXT Computer, Inc. - * - * Intel386 Family: Segment selector. - * - * HISTORY - * - * 29 March 1992 ? at NeXT - * Created. +/* inffast.h -- header to use inffast.c + * Copyright (C) 1995-2003 Mark Adler + * For conditions of distribution and use, see copyright notice in zlib.h */ -/* - * Segment selector. +/* WARNING: this file should *not* be used by applications. It is + part of the implementation of the compression library and is + subject to change. Applications should only use zlib.h. */ -typedef struct sel { - unsigned short rpl :2, -#define KERN_PRIV 0 -#define USER_PRIV 3 - ti :1, -#define SEL_GDT 0 -#define SEL_LDT 1 - index :13; -} sel_t; - +void inflate_fast OF((z_streamp strm, unsigned start)); diff --git a/libkern/zlib/inffixed.h b/libkern/zlib/inffixed.h new file mode 100644 index 000000000..72f28a120 --- /dev/null +++ b/libkern/zlib/inffixed.h @@ -0,0 +1,121 @@ +/* + * Copyright (c) 2008 Apple Inc. All rights reserved. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. The rights granted to you under the License + * may not be used to create, or enable the creation or redistribution of, + * unlawful or unlicensed copies of an Apple operating system, or to + * circumvent, violate, or enable the circumvention or violation of, any + * terms of an Apple operating system software license agreement. + * + * Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ + */ + /* inffixed.h -- table for decoding fixed codes + * Generated automatically by makefixed(). + */ + + /* WARNING: this file should *not* be used by applications. It + is part of the implementation of the compression library and + is subject to change. Applications should only use zlib.h. + */ + + static const code lenfix[512] = { + {96,7,0},{0,8,80},{0,8,16},{20,8,115},{18,7,31},{0,8,112},{0,8,48}, + {0,9,192},{16,7,10},{0,8,96},{0,8,32},{0,9,160},{0,8,0},{0,8,128}, + {0,8,64},{0,9,224},{16,7,6},{0,8,88},{0,8,24},{0,9,144},{19,7,59}, + {0,8,120},{0,8,56},{0,9,208},{17,7,17},{0,8,104},{0,8,40},{0,9,176}, + {0,8,8},{0,8,136},{0,8,72},{0,9,240},{16,7,4},{0,8,84},{0,8,20}, + {21,8,227},{19,7,43},{0,8,116},{0,8,52},{0,9,200},{17,7,13},{0,8,100}, + {0,8,36},{0,9,168},{0,8,4},{0,8,132},{0,8,68},{0,9,232},{16,7,8}, + {0,8,92},{0,8,28},{0,9,152},{20,7,83},{0,8,124},{0,8,60},{0,9,216}, + {18,7,23},{0,8,108},{0,8,44},{0,9,184},{0,8,12},{0,8,140},{0,8,76}, + {0,9,248},{16,7,3},{0,8,82},{0,8,18},{21,8,163},{19,7,35},{0,8,114}, + {0,8,50},{0,9,196},{17,7,11},{0,8,98},{0,8,34},{0,9,164},{0,8,2}, + {0,8,130},{0,8,66},{0,9,228},{16,7,7},{0,8,90},{0,8,26},{0,9,148}, + {20,7,67},{0,8,122},{0,8,58},{0,9,212},{18,7,19},{0,8,106},{0,8,42}, + {0,9,180},{0,8,10},{0,8,138},{0,8,74},{0,9,244},{16,7,5},{0,8,86}, + {0,8,22},{64,8,0},{19,7,51},{0,8,118},{0,8,54},{0,9,204},{17,7,15}, + {0,8,102},{0,8,38},{0,9,172},{0,8,6},{0,8,134},{0,8,70},{0,9,236}, + {16,7,9},{0,8,94},{0,8,30},{0,9,156},{20,7,99},{0,8,126},{0,8,62}, + {0,9,220},{18,7,27},{0,8,110},{0,8,46},{0,9,188},{0,8,14},{0,8,142}, + {0,8,78},{0,9,252},{96,7,0},{0,8,81},{0,8,17},{21,8,131},{18,7,31}, + {0,8,113},{0,8,49},{0,9,194},{16,7,10},{0,8,97},{0,8,33},{0,9,162}, + {0,8,1},{0,8,129},{0,8,65},{0,9,226},{16,7,6},{0,8,89},{0,8,25}, + {0,9,146},{19,7,59},{0,8,121},{0,8,57},{0,9,210},{17,7,17},{0,8,105}, + {0,8,41},{0,9,178},{0,8,9},{0,8,137},{0,8,73},{0,9,242},{16,7,4}, + {0,8,85},{0,8,21},{16,8,258},{19,7,43},{0,8,117},{0,8,53},{0,9,202}, + {17,7,13},{0,8,101},{0,8,37},{0,9,170},{0,8,5},{0,8,133},{0,8,69}, + {0,9,234},{16,7,8},{0,8,93},{0,8,29},{0,9,154},{20,7,83},{0,8,125}, + {0,8,61},{0,9,218},{18,7,23},{0,8,109},{0,8,45},{0,9,186},{0,8,13}, + {0,8,141},{0,8,77},{0,9,250},{16,7,3},{0,8,83},{0,8,19},{21,8,195}, + {19,7,35},{0,8,115},{0,8,51},{0,9,198},{17,7,11},{0,8,99},{0,8,35}, + {0,9,166},{0,8,3},{0,8,131},{0,8,67},{0,9,230},{16,7,7},{0,8,91}, + {0,8,27},{0,9,150},{20,7,67},{0,8,123},{0,8,59},{0,9,214},{18,7,19}, + {0,8,107},{0,8,43},{0,9,182},{0,8,11},{0,8,139},{0,8,75},{0,9,246}, + {16,7,5},{0,8,87},{0,8,23},{64,8,0},{19,7,51},{0,8,119},{0,8,55}, + {0,9,206},{17,7,15},{0,8,103},{0,8,39},{0,9,174},{0,8,7},{0,8,135}, + {0,8,71},{0,9,238},{16,7,9},{0,8,95},{0,8,31},{0,9,158},{20,7,99}, + {0,8,127},{0,8,63},{0,9,222},{18,7,27},{0,8,111},{0,8,47},{0,9,190}, + {0,8,15},{0,8,143},{0,8,79},{0,9,254},{96,7,0},{0,8,80},{0,8,16}, + {20,8,115},{18,7,31},{0,8,112},{0,8,48},{0,9,193},{16,7,10},{0,8,96}, + {0,8,32},{0,9,161},{0,8,0},{0,8,128},{0,8,64},{0,9,225},{16,7,6}, + {0,8,88},{0,8,24},{0,9,145},{19,7,59},{0,8,120},{0,8,56},{0,9,209}, + {17,7,17},{0,8,104},{0,8,40},{0,9,177},{0,8,8},{0,8,136},{0,8,72}, + {0,9,241},{16,7,4},{0,8,84},{0,8,20},{21,8,227},{19,7,43},{0,8,116}, + {0,8,52},{0,9,201},{17,7,13},{0,8,100},{0,8,36},{0,9,169},{0,8,4}, + {0,8,132},{0,8,68},{0,9,233},{16,7,8},{0,8,92},{0,8,28},{0,9,153}, + {20,7,83},{0,8,124},{0,8,60},{0,9,217},{18,7,23},{0,8,108},{0,8,44}, + {0,9,185},{0,8,12},{0,8,140},{0,8,76},{0,9,249},{16,7,3},{0,8,82}, + {0,8,18},{21,8,163},{19,7,35},{0,8,114},{0,8,50},{0,9,197},{17,7,11}, + {0,8,98},{0,8,34},{0,9,165},{0,8,2},{0,8,130},{0,8,66},{0,9,229}, + {16,7,7},{0,8,90},{0,8,26},{0,9,149},{20,7,67},{0,8,122},{0,8,58}, + {0,9,213},{18,7,19},{0,8,106},{0,8,42},{0,9,181},{0,8,10},{0,8,138}, + {0,8,74},{0,9,245},{16,7,5},{0,8,86},{0,8,22},{64,8,0},{19,7,51}, + {0,8,118},{0,8,54},{0,9,205},{17,7,15},{0,8,102},{0,8,38},{0,9,173}, + {0,8,6},{0,8,134},{0,8,70},{0,9,237},{16,7,9},{0,8,94},{0,8,30}, + {0,9,157},{20,7,99},{0,8,126},{0,8,62},{0,9,221},{18,7,27},{0,8,110}, + {0,8,46},{0,9,189},{0,8,14},{0,8,142},{0,8,78},{0,9,253},{96,7,0}, + {0,8,81},{0,8,17},{21,8,131},{18,7,31},{0,8,113},{0,8,49},{0,9,195}, + {16,7,10},{0,8,97},{0,8,33},{0,9,163},{0,8,1},{0,8,129},{0,8,65}, + {0,9,227},{16,7,6},{0,8,89},{0,8,25},{0,9,147},{19,7,59},{0,8,121}, + {0,8,57},{0,9,211},{17,7,17},{0,8,105},{0,8,41},{0,9,179},{0,8,9}, + {0,8,137},{0,8,73},{0,9,243},{16,7,4},{0,8,85},{0,8,21},{16,8,258}, + {19,7,43},{0,8,117},{0,8,53},{0,9,203},{17,7,13},{0,8,101},{0,8,37}, + {0,9,171},{0,8,5},{0,8,133},{0,8,69},{0,9,235},{16,7,8},{0,8,93}, + {0,8,29},{0,9,155},{20,7,83},{0,8,125},{0,8,61},{0,9,219},{18,7,23}, + {0,8,109},{0,8,45},{0,9,187},{0,8,13},{0,8,141},{0,8,77},{0,9,251}, + {16,7,3},{0,8,83},{0,8,19},{21,8,195},{19,7,35},{0,8,115},{0,8,51}, + {0,9,199},{17,7,11},{0,8,99},{0,8,35},{0,9,167},{0,8,3},{0,8,131}, + {0,8,67},{0,9,231},{16,7,7},{0,8,91},{0,8,27},{0,9,151},{20,7,67}, + {0,8,123},{0,8,59},{0,9,215},{18,7,19},{0,8,107},{0,8,43},{0,9,183}, + {0,8,11},{0,8,139},{0,8,75},{0,9,247},{16,7,5},{0,8,87},{0,8,23}, + {64,8,0},{19,7,51},{0,8,119},{0,8,55},{0,9,207},{17,7,15},{0,8,103}, + {0,8,39},{0,9,175},{0,8,7},{0,8,135},{0,8,71},{0,9,239},{16,7,9}, + {0,8,95},{0,8,31},{0,9,159},{20,7,99},{0,8,127},{0,8,63},{0,9,223}, + {18,7,27},{0,8,111},{0,8,47},{0,9,191},{0,8,15},{0,8,143},{0,8,79}, + {0,9,255} + }; + + static const code distfix[32] = { + {16,5,1},{23,5,257},{19,5,17},{27,5,4097},{17,5,5},{25,5,1025}, + {21,5,65},{29,5,16385},{16,5,3},{24,5,513},{20,5,33},{28,5,8193}, + {18,5,9},{26,5,2049},{22,5,129},{64,5,0},{16,5,2},{23,5,385}, + {19,5,25},{27,5,6145},{17,5,7},{25,5,1537},{21,5,97},{29,5,24577}, + {16,5,4},{24,5,769},{20,5,49},{28,5,12289},{18,5,13},{26,5,3073}, + {22,5,193},{64,5,0} + }; diff --git a/libkern/zlib/inflate.c b/libkern/zlib/inflate.c new file mode 100644 index 000000000..fe12d1684 --- /dev/null +++ b/libkern/zlib/inflate.c @@ -0,0 +1,1397 @@ +/* + * Copyright (c) 2008 Apple Inc. All rights reserved. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. The rights granted to you under the License + * may not be used to create, or enable the creation or redistribution of, + * unlawful or unlicensed copies of an Apple operating system, or to + * circumvent, violate, or enable the circumvention or violation of, any + * terms of an Apple operating system software license agreement. + * + * Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ + */ +/* inflate.c -- zlib decompression + * Copyright (C) 1995-2005 Mark Adler + * For conditions of distribution and use, see copyright notice in zlib.h + */ + +/* + * Change history: + * + * 1.2.beta0 24 Nov 2002 + * - First version -- complete rewrite of inflate to simplify code, avoid + * creation of window when not needed, minimize use of window when it is + * needed, make inffast.c even faster, implement gzip decoding, and to + * improve code readability and style over the previous zlib inflate code + * + * 1.2.beta1 25 Nov 2002 + * - Use pointers for available input and output checking in inffast.c + * - Remove input and output counters in inffast.c + * - Change inffast.c entry and loop from avail_in >= 7 to >= 6 + * - Remove unnecessary second byte pull from length extra in inffast.c + * - Unroll direct copy to three copies per loop in inffast.c + * + * 1.2.beta2 4 Dec 2002 + * - Change external routine names to reduce potential conflicts + * - Correct filename to inffixed.h for fixed tables in inflate.c + * - Make hbuf[] unsigned char to match parameter type in inflate.c + * - Change strm->next_out[-state->offset] to *(strm->next_out - state->offset) + * to avoid negation problem on Alphas (64 bit) in inflate.c + * + * 1.2.beta3 22 Dec 2002 + * - Add comments on state->bits assertion in inffast.c + * - Add comments on op field in inftrees.h + * - Fix bug in reuse of allocated window after inflateReset() + * - Remove bit fields--back to byte structure for speed + * - Remove distance extra == 0 check in inflate_fast()--only helps for lengths + * - Change post-increments to pre-increments in inflate_fast(), PPC biased? + * - Add compile time option, POSTINC, to use post-increments instead (Intel?) + * - Make MATCH copy in inflate() much faster for when inflate_fast() not used + * - Use local copies of stream next and avail values, as well as local bit + * buffer and bit count in inflate()--for speed when inflate_fast() not used + * + * 1.2.beta4 1 Jan 2003 + * - Split ptr - 257 statements in inflate_table() to avoid compiler warnings + * - Move a comment on output buffer sizes from inffast.c to inflate.c + * - Add comments in inffast.c to introduce the inflate_fast() routine + * - Rearrange window copies in inflate_fast() for speed and simplification + * - Unroll last copy for window match in inflate_fast() + * - Use local copies of window variables in inflate_fast() for speed + * - Pull out common write == 0 case for speed in inflate_fast() + * - Make op and len in inflate_fast() unsigned for consistency + * - Add FAR to lcode and dcode declarations in inflate_fast() + * - Simplified bad distance check in inflate_fast() + * - Added inflateBackInit(), inflateBack(), and inflateBackEnd() in new + * source file infback.c to provide a call-back interface to inflate for + * programs like gzip and unzip -- uses window as output buffer to avoid + * window copying + * + * 1.2.beta5 1 Jan 2003 + * - Improved inflateBack() interface to allow the caller to provide initial + * input in strm. + * - Fixed stored blocks bug in inflateBack() + * + * 1.2.beta6 4 Jan 2003 + * - Added comments in inffast.c on effectiveness of POSTINC + * - Typecasting all around to reduce compiler warnings + * - Changed loops from while (1) or do {} while (1) to for (;;), again to + * make compilers happy + * - Changed type of window in inflateBackInit() to unsigned char * + * + * 1.2.beta7 27 Jan 2003 + * - Changed many types to unsigned or unsigned short to avoid warnings + * - Added inflateCopy() function + * + * 1.2.0 9 Mar 2003 + * - Changed inflateBack() interface to provide separate opaque descriptors + * for the in() and out() functions + * - Changed inflateBack() argument and in_func typedef to swap the length + * and buffer address return values for the input function + * - Check next_in and next_out for Z_NULL on entry to inflate() + * + * The history for versions after 1.2.0 are in ChangeLog in zlib distribution. + */ + +#include "zutil.h" +#include "inftrees.h" +#include "inflate.h" +#include "inffast.h" + +#ifdef MAKEFIXED +# ifndef BUILDFIXED +# define BUILDFIXED +# endif +#endif + +/* function prototypes */ +local void fixedtables OF((struct inflate_state FAR *state)); +local int updatewindow OF((z_streamp strm, unsigned out)); +#ifdef BUILDFIXED + void makefixed OF((void)); +#endif +local unsigned syncsearch OF((unsigned FAR *have, unsigned char FAR *buf, + unsigned len)); + +int ZEXPORT inflateReset(strm) +z_streamp strm; +{ + struct inflate_state FAR *state; + + if (strm == Z_NULL || strm->state == Z_NULL) return Z_STREAM_ERROR; + state = (struct inflate_state FAR *)strm->state; + strm->total_in = strm->total_out = state->total = 0; + strm->msg = Z_NULL; + strm->adler = 1; /* to support ill-conceived Java test suite */ + state->mode = HEAD; + state->last = 0; + state->havedict = 0; + state->dmax = 32768U; + state->head = Z_NULL; + state->wsize = 0; + state->whave = 0; + state->write = 0; + state->hold = 0; + state->bits = 0; + state->lencode = state->distcode = state->next = state->codes; + Tracev((stderr, "inflate: reset\n")); + return Z_OK; +} + +int ZEXPORT inflatePrime(strm, bits, value) +z_streamp strm; +int bits; +int value; +{ + struct inflate_state FAR *state; + + if (strm == Z_NULL || strm->state == Z_NULL) return Z_STREAM_ERROR; + state = (struct inflate_state FAR *)strm->state; + if (bits > 16 || state->bits + bits > 32) return Z_STREAM_ERROR; + value &= (1L << bits) - 1; + state->hold += value << state->bits; + state->bits += bits; + return Z_OK; +} + +int ZEXPORT inflateInit2_(strm, windowBits, version, stream_size) +z_streamp strm; +int windowBits; +const char *version; +int stream_size; +{ + struct inflate_state FAR *state; + + if (version == Z_NULL || version[0] != ZLIB_VERSION[0] || + stream_size != (int)(sizeof(z_stream))) + return Z_VERSION_ERROR; + if (strm == Z_NULL) return Z_STREAM_ERROR; + strm->msg = Z_NULL; /* in case we return an error */ +#ifndef NO_ZCFUNCS + if (strm->zalloc == (alloc_func)0) { + strm->zalloc = zcalloc; + strm->opaque = (voidpf)0; + } + if (strm->zfree == (free_func)0) strm->zfree = zcfree; +#endif /* NO_ZCFUNCS */ + state = (struct inflate_state FAR *) + ZALLOC(strm, 1, sizeof(struct inflate_state)); + if (state == Z_NULL) return Z_MEM_ERROR; + Tracev((stderr, "inflate: allocated\n")); + strm->state = (struct internal_state FAR *)state; + if (windowBits < 0) { + state->wrap = 0; + windowBits = -windowBits; + } + else { + state->wrap = (windowBits >> 4) + 1; +#ifdef GUNZIP + if (windowBits < 48) windowBits &= 15; +#endif + } + if (windowBits < 8 || windowBits > 15) { + ZFREE(strm, state); + strm->state = Z_NULL; + return Z_STREAM_ERROR; + } + state->wbits = (unsigned)windowBits; + state->window = Z_NULL; + return inflateReset(strm); +} + +int ZEXPORT inflateInit_(strm, version, stream_size) +z_streamp strm; +const char *version; +int stream_size; +{ + return inflateInit2_(strm, DEF_WBITS, version, stream_size); +} + +/* + Return state with length and distance decoding tables and index sizes set to + fixed code decoding. Normally this returns fixed tables from inffixed.h. + If BUILDFIXED is defined, then instead this routine builds the tables the + first time it's called, and returns those tables the first time and + thereafter. This reduces the size of the code by about 2K bytes, in + exchange for a little execution time. However, BUILDFIXED should not be + used for threaded applications, since the rewriting of the tables and virgin + may not be thread-safe. + */ +local void fixedtables(state) +struct inflate_state FAR *state; +{ +#ifdef BUILDFIXED + static int virgin = 1; + static code *lenfix, *distfix; + static code fixed[544]; + + /* build fixed huffman tables if first call (may not be thread safe) */ + if (virgin) { + unsigned sym, bits; + static code *next; + + /* literal/length table */ + sym = 0; + while (sym < 144) state->lens[sym++] = 8; + while (sym < 256) state->lens[sym++] = 9; + while (sym < 280) state->lens[sym++] = 7; + while (sym < 288) state->lens[sym++] = 8; + next = fixed; + lenfix = next; + bits = 9; + inflate_table(LENS, state->lens, 288, &(next), &(bits), state->work); + + /* distance table */ + sym = 0; + while (sym < 32) state->lens[sym++] = 5; + distfix = next; + bits = 5; + inflate_table(DISTS, state->lens, 32, &(next), &(bits), state->work); + + /* do this just once */ + virgin = 0; + } +#else /* !BUILDFIXED */ +# include "inffixed.h" +#endif /* BUILDFIXED */ + state->lencode = lenfix; + state->lenbits = 9; + state->distcode = distfix; + state->distbits = 5; +} + +#ifdef MAKEFIXED +#include + +/* + Write out the inffixed.h that is #include'd above. Defining MAKEFIXED also + defines BUILDFIXED, so the tables are built on the fly. makefixed() writes + those tables to stdout, which would be piped to inffixed.h. A small program + can simply call makefixed to do this: + + void makefixed(void); + + int main(void) + { + makefixed(); + return 0; + } + + Then that can be linked with zlib built with MAKEFIXED defined and run: + + a.out > inffixed.h + */ +void makefixed() +{ + unsigned low, size; + struct inflate_state state; + + fixedtables(&state); + puts(" /* inffixed.h -- table for decoding fixed codes"); + puts(" * Generated automatically by makefixed()."); + puts(" */"); + puts(""); + puts(" /* WARNING: this file should *not* be used by applications."); + puts(" It is part of the implementation of this library and is"); + puts(" subject to change. Applications should only use zlib.h."); + puts(" */"); + puts(""); + size = 1U << 9; + printf(" static const code lenfix[%u] = {", size); + low = 0; + for (;;) { + if ((low % 7) == 0) printf("\n "); + printf("{%u,%u,%d}", state.lencode[low].op, state.lencode[low].bits, + state.lencode[low].val); + if (++low == size) break; + putchar(','); + } + puts("\n };"); + size = 1U << 5; + printf("\n static const code distfix[%u] = {", size); + low = 0; + for (;;) { + if ((low % 6) == 0) printf("\n "); + printf("{%u,%u,%d}", state.distcode[low].op, state.distcode[low].bits, + state.distcode[low].val); + if (++low == size) break; + putchar(','); + } + puts("\n };"); +} +#endif /* MAKEFIXED */ + +/* + Update the window with the last wsize (normally 32K) bytes written before + returning. If window does not exist yet, create it. This is only called + when a window is already in use, or when output has been written during this + inflate call, but the end of the deflate stream has not been reached yet. + It is also called to create a window for dictionary data when a dictionary + is loaded. + + Providing output buffers larger than 32K to inflate() should provide a speed + advantage, since only the last 32K of output is copied to the sliding window + upon return from inflate(), and since all distances after the first 32K of + output will fall in the output data, making match copies simpler and faster. + The advantage may be dependent on the size of the processor's data caches. + */ +local int updatewindow(strm, out) +z_streamp strm; +unsigned out; +{ + struct inflate_state FAR *state; + unsigned copy, dist; + + state = (struct inflate_state FAR *)strm->state; + + /* if it hasn't been done already, allocate space for the window */ + if (state->window == Z_NULL) { + state->window = (unsigned char FAR *) + ZALLOC(strm, 1U << state->wbits, + sizeof(unsigned char)); + if (state->window == Z_NULL) return 1; + } + + /* if window not in use yet, initialize */ + if (state->wsize == 0) { + state->wsize = 1U << state->wbits; + state->write = 0; + state->whave = 0; + } + + /* copy state->wsize or less output bytes into the circular window */ + copy = out - strm->avail_out; + if (copy >= state->wsize) { + zmemcpy(state->window, strm->next_out - state->wsize, state->wsize); + state->write = 0; + state->whave = state->wsize; + } + else { + dist = state->wsize - state->write; + if (dist > copy) dist = copy; + zmemcpy(state->window + state->write, strm->next_out - copy, dist); + copy -= dist; + if (copy) { + zmemcpy(state->window, strm->next_out - copy, copy); + state->write = copy; + state->whave = state->wsize; + } + else { + state->write += dist; + if (state->write == state->wsize) state->write = 0; + if (state->whave < state->wsize) state->whave += dist; + } + } + return 0; +} + +/* Macros for inflate(): */ + +/* check function to use adler32() for zlib or z_crc32() for gzip */ +#ifdef GUNZIP +# define UPDATE(check, buf, len) \ + (state->flags ? z_crc32(check, buf, len) : adler32(check, buf, len)) +#else +# define UPDATE(check, buf, len) adler32(check, buf, len) +#endif + +/* check macros for header crc */ +#ifdef GUNZIP +# define CRC2(check, word) \ + do { \ + hbuf[0] = (unsigned char)(word); \ + hbuf[1] = (unsigned char)((word) >> 8); \ + check = z_crc32(check, hbuf, 2); \ + } while (0) + +# define CRC4(check, word) \ + do { \ + hbuf[0] = (unsigned char)(word); \ + hbuf[1] = (unsigned char)((word) >> 8); \ + hbuf[2] = (unsigned char)((word) >> 16); \ + hbuf[3] = (unsigned char)((word) >> 24); \ + check = z_crc32(check, hbuf, 4); \ + } while (0) +#endif + +/* Load registers with state in inflate() for speed */ +#define LOAD() \ + do { \ + put = strm->next_out; \ + left = strm->avail_out; \ + next = strm->next_in; \ + have = strm->avail_in; \ + hold = state->hold; \ + bits = state->bits; \ + } while (0) + +/* Restore state from registers in inflate() */ +#define RESTORE() \ + do { \ + strm->next_out = put; \ + strm->avail_out = left; \ + strm->next_in = next; \ + strm->avail_in = have; \ + state->hold = hold; \ + state->bits = bits; \ + } while (0) + +/* Clear the input bit accumulator */ +#define INITBITS() \ + do { \ + hold = 0; \ + bits = 0; \ + } while (0) + +/* Get a byte of input into the bit accumulator, or return from inflate() + if there is no input available. */ +#define PULLBYTE() \ + do { \ + if (have == 0) goto inf_leave; \ + have--; \ + hold += (unsigned long)(*next++) << bits; \ + bits += 8; \ + } while (0) + +/* Assure that there are at least n bits in the bit accumulator. If there is + not enough available input to do that, then return from inflate(). */ +#define NEEDBITS(n) \ + do { \ + while (bits < (unsigned)(n)) \ + PULLBYTE(); \ + } while (0) + +/* Return the low n bits of the bit accumulator (n < 16) */ +#define BITS(n) \ + ((unsigned)hold & ((1U << (n)) - 1)) + +/* Remove n bits from the bit accumulator */ +#define DROPBITS(n) \ + do { \ + hold >>= (n); \ + bits -= (unsigned)(n); \ + } while (0) + +/* Remove zero to seven bits as needed to go to a byte boundary */ +#define BYTEBITS() \ + do { \ + hold >>= bits & 7; \ + bits -= bits & 7; \ + } while (0) + +/* Reverse the bytes in a 32-bit value */ +#define REVERSE(q) \ + ((((q) >> 24) & 0xff) + (((q) >> 8) & 0xff00) + \ + (((q) & 0xff00) << 8) + (((q) & 0xff) << 24)) + +/* + inflate() uses a state machine to process as much input data and generate as + much output data as possible before returning. The state machine is + structured roughly as follows: + + for (;;) switch (state) { + ... + case STATEn: + if (not enough input data or output space to make progress) + return; + ... make progress ... + state = STATEm; + break; + ... + } + + so when inflate() is called again, the same case is attempted again, and + if the appropriate resources are provided, the machine proceeds to the + next state. The NEEDBITS() macro is usually the way the state evaluates + whether it can proceed or should return. NEEDBITS() does the return if + the requested bits are not available. The typical use of the BITS macros + is: + + NEEDBITS(n); + ... do something with BITS(n) ... + DROPBITS(n); + + where NEEDBITS(n) either returns from inflate() if there isn't enough + input left to load n bits into the accumulator, or it continues. BITS(n) + gives the low n bits in the accumulator. When done, DROPBITS(n) drops + the low n bits off the accumulator. INITBITS() clears the accumulator + and sets the number of available bits to zero. BYTEBITS() discards just + enough bits to put the accumulator on a byte boundary. After BYTEBITS() + and a NEEDBITS(8), then BITS(8) would return the next byte in the stream. + + NEEDBITS(n) uses PULLBYTE() to get an available byte of input, or to return + if there is no input available. The decoding of variable length codes uses + PULLBYTE() directly in order to pull just enough bytes to decode the next + code, and no more. + + Some states loop until they get enough input, making sure that enough + state information is maintained to continue the loop where it left off + if NEEDBITS() returns in the loop. For example, want, need, and keep + would all have to actually be part of the saved state in case NEEDBITS() + returns: + + case STATEw: + while (want < need) { + NEEDBITS(n); + keep[want++] = BITS(n); + DROPBITS(n); + } + state = STATEx; + case STATEx: + + As shown above, if the next state is also the next case, then the break + is omitted. + + A state may also return if there is not enough output space available to + complete that state. Those states are copying stored data, writing a + literal byte, and copying a matching string. + + When returning, a "goto inf_leave" is used to update the total counters, + update the check value, and determine whether any progress has been made + during that inflate() call in order to return the proper return code. + Progress is defined as a change in either strm->avail_in or strm->avail_out. + When there is a window, goto inf_leave will update the window with the last + output written. If a goto inf_leave occurs in the middle of decompression + and there is no window currently, goto inf_leave will create one and copy + output to the window for the next call of inflate(). + + In this implementation, the flush parameter of inflate() only affects the + return code (per zlib.h). inflate() always writes as much as possible to + strm->next_out, given the space available and the provided input--the effect + documented in zlib.h of Z_SYNC_FLUSH. Furthermore, inflate() always defers + the allocation of and copying into a sliding window until necessary, which + provides the effect documented in zlib.h for Z_FINISH when the entire input + stream available. So the only thing the flush parameter actually does is: + when flush is set to Z_FINISH, inflate() cannot return Z_OK. Instead it + will return Z_BUF_ERROR if it has not reached the end of the stream. + */ + +int ZEXPORT inflate(strm, flush) +z_streamp strm; +int flush; +{ + struct inflate_state FAR *state; + unsigned char FAR *next; /* next input */ + unsigned char FAR *put; /* next output */ + unsigned have, left; /* available input and output */ + unsigned long hold; /* bit buffer */ + unsigned bits; /* bits in bit buffer */ + unsigned in, out; /* save starting available input and output */ + unsigned copy; /* number of stored or match bytes to copy */ + unsigned char FAR *from; /* where to copy match bytes from */ + code this; /* current decoding table entry */ + code last; /* parent table entry */ + unsigned len; /* length to copy for repeats, bits to drop */ + int ret; /* return code */ +#ifdef GUNZIP + unsigned char hbuf[4]; /* buffer for gzip header crc calculation */ +#endif + static const unsigned short order[19] = /* permutation of code lengths */ + {16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15}; + + if (strm == Z_NULL || strm->state == Z_NULL || strm->next_out == Z_NULL || + (strm->next_in == Z_NULL && strm->avail_in != 0)) + return Z_STREAM_ERROR; + + state = (struct inflate_state FAR *)strm->state; + if (state->mode == TYPE) state->mode = TYPEDO; /* skip check */ + LOAD(); + in = have; + out = left; + ret = Z_OK; + for (;;) + switch (state->mode) { + case HEAD: + if (state->wrap == 0) { + state->mode = TYPEDO; + break; + } + NEEDBITS(16); +#ifdef GUNZIP + if ((state->wrap & 2) && hold == 0x8b1f) { /* gzip header */ + state->check = z_crc32(0L, Z_NULL, 0); + CRC2(state->check, hold); + INITBITS(); + state->mode = FLAGS; + break; + } + state->flags = 0; /* expect zlib header */ + if (state->head != Z_NULL) + state->head->done = -1; + if (!(state->wrap & 1) || /* check if zlib header allowed */ +#else + if ( +#endif + ((BITS(8) << 8) + (hold >> 8)) % 31) { + strm->msg = (char *)"incorrect header check"; + state->mode = BAD; + break; + } + if (BITS(4) != Z_DEFLATED) { + strm->msg = (char *)"unknown compression method"; + state->mode = BAD; + break; + } + DROPBITS(4); + len = BITS(4) + 8; + if (len > state->wbits) { + strm->msg = (char *)"invalid window size"; + state->mode = BAD; + break; + } + state->dmax = 1U << len; + Tracev((stderr, "inflate: zlib header ok\n")); + strm->adler = state->check = adler32(0L, Z_NULL, 0); + state->mode = hold & 0x200 ? DICTID : TYPE; + INITBITS(); + break; +#ifdef GUNZIP + case FLAGS: + NEEDBITS(16); + state->flags = (int)(hold); + if ((state->flags & 0xff) != Z_DEFLATED) { + strm->msg = (char *)"unknown compression method"; + state->mode = BAD; + break; + } + if (state->flags & 0xe000) { + strm->msg = (char *)"unknown header flags set"; + state->mode = BAD; + break; + } + if (state->head != Z_NULL) + state->head->text = (int)((hold >> 8) & 1); + if (state->flags & 0x0200) CRC2(state->check, hold); + INITBITS(); + state->mode = TIME; + case TIME: + NEEDBITS(32); + if (state->head != Z_NULL) + state->head->time = hold; + if (state->flags & 0x0200) CRC4(state->check, hold); + INITBITS(); + state->mode = OS; + case OS: + NEEDBITS(16); + if (state->head != Z_NULL) { + state->head->xflags = (int)(hold & 0xff); + state->head->os = (int)(hold >> 8); + } + if (state->flags & 0x0200) CRC2(state->check, hold); + INITBITS(); + state->mode = EXLEN; + case EXLEN: + if (state->flags & 0x0400) { + NEEDBITS(16); + state->length = (unsigned)(hold); + if (state->head != Z_NULL) + state->head->extra_len = (unsigned)hold; + if (state->flags & 0x0200) CRC2(state->check, hold); + INITBITS(); + } + else if (state->head != Z_NULL) + state->head->extra = Z_NULL; + state->mode = EXTRA; + case EXTRA: + if (state->flags & 0x0400) { + copy = state->length; + if (copy > have) copy = have; + if (copy) { + if (state->head != Z_NULL && + state->head->extra != Z_NULL) { + len = state->head->extra_len - state->length; + zmemcpy(state->head->extra + len, next, + len + copy > state->head->extra_max ? + state->head->extra_max - len : copy); + } + if (state->flags & 0x0200) + state->check = z_crc32(state->check, next, copy); + have -= copy; + next += copy; + state->length -= copy; + } + if (state->length) goto inf_leave; + } + state->length = 0; + state->mode = NAME; + case NAME: + if (state->flags & 0x0800) { + if (have == 0) goto inf_leave; + copy = 0; + do { + len = (unsigned)(next[copy++]); + if (state->head != Z_NULL && + state->head->name != Z_NULL && + state->length < state->head->name_max) + state->head->name[state->length++] = len; + } while (len && copy < have); + if (state->flags & 0x0200) + state->check = z_crc32(state->check, next, copy); + have -= copy; + next += copy; + if (len) goto inf_leave; + } + else if (state->head != Z_NULL) + state->head->name = Z_NULL; + state->length = 0; + state->mode = COMMENT; + case COMMENT: + if (state->flags & 0x1000) { + if (have == 0) goto inf_leave; + copy = 0; + do { + len = (unsigned)(next[copy++]); + if (state->head != Z_NULL && + state->head->comment != Z_NULL && + state->length < state->head->comm_max) + state->head->comment[state->length++] = len; + } while (len && copy < have); + if (state->flags & 0x0200) + state->check = z_crc32(state->check, next, copy); + have -= copy; + next += copy; + if (len) goto inf_leave; + } + else if (state->head != Z_NULL) + state->head->comment = Z_NULL; + state->mode = HCRC; + case HCRC: + if (state->flags & 0x0200) { + NEEDBITS(16); + if (hold != (state->check & 0xffff)) { + strm->msg = (char *)"header crc mismatch"; + state->mode = BAD; + break; + } + INITBITS(); + } + if (state->head != Z_NULL) { + state->head->hcrc = (int)((state->flags >> 9) & 1); + state->head->done = 1; + } + strm->adler = state->check = z_crc32(0L, Z_NULL, 0); + state->mode = TYPE; + break; +#endif + case DICTID: + NEEDBITS(32); + strm->adler = state->check = REVERSE(hold); + INITBITS(); + state->mode = DICT; + case DICT: + if (state->havedict == 0) { + RESTORE(); + return Z_NEED_DICT; + } + strm->adler = state->check = adler32(0L, Z_NULL, 0); + state->mode = TYPE; + case TYPE: + if (flush == Z_BLOCK) goto inf_leave; + case TYPEDO: + if (state->last) { + BYTEBITS(); + state->mode = CHECK; + break; + } + NEEDBITS(3); + state->last = BITS(1); + DROPBITS(1); + switch (BITS(2)) { + case 0: /* stored block */ + Tracev((stderr, "inflate: stored block%s\n", + state->last ? " (last)" : "")); + state->mode = STORED; + break; + case 1: /* fixed block */ + fixedtables(state); + Tracev((stderr, "inflate: fixed codes block%s\n", + state->last ? " (last)" : "")); + state->mode = LEN; /* decode codes */ + break; + case 2: /* dynamic block */ + Tracev((stderr, "inflate: dynamic codes block%s\n", + state->last ? " (last)" : "")); + state->mode = TABLE; + break; + case 3: + strm->msg = (char *)"invalid block type"; + state->mode = BAD; + } + DROPBITS(2); + break; + case STORED: + BYTEBITS(); /* go to byte boundary */ + NEEDBITS(32); + if ((hold & 0xffff) != ((hold >> 16) ^ 0xffff)) { + strm->msg = (char *)"invalid stored block lengths"; + state->mode = BAD; + break; + } + state->length = (unsigned)hold & 0xffff; + Tracev((stderr, "inflate: stored length %u\n", + state->length)); + INITBITS(); + state->mode = COPY; + case COPY: + copy = state->length; + if (copy) { + if (copy > have) copy = have; + if (copy > left) copy = left; + if (copy == 0) goto inf_leave; + zmemcpy(put, next, copy); + have -= copy; + next += copy; + left -= copy; + put += copy; + state->length -= copy; + break; + } + Tracev((stderr, "inflate: stored end\n")); + state->mode = TYPE; + break; + case TABLE: + NEEDBITS(14); + state->nlen = BITS(5) + 257; + DROPBITS(5); + state->ndist = BITS(5) + 1; + DROPBITS(5); + state->ncode = BITS(4) + 4; + DROPBITS(4); +#ifndef PKZIP_BUG_WORKAROUND + if (state->nlen > 286 || state->ndist > 30) { + strm->msg = (char *)"too many length or distance symbols"; + state->mode = BAD; + break; + } +#endif + Tracev((stderr, "inflate: table sizes ok\n")); + state->have = 0; + state->mode = LENLENS; + case LENLENS: + while (state->have < state->ncode) { + NEEDBITS(3); + state->lens[order[state->have++]] = (unsigned short)BITS(3); + DROPBITS(3); + } + while (state->have < 19) + state->lens[order[state->have++]] = 0; + state->next = state->codes; + state->lencode = (code const FAR *)(state->next); + state->lenbits = 7; + ret = inflate_table(CODES, state->lens, 19, &(state->next), + &(state->lenbits), state->work); + if (ret) { + strm->msg = (char *)"invalid code lengths set"; + state->mode = BAD; + break; + } + Tracev((stderr, "inflate: code lengths ok\n")); + state->have = 0; + state->mode = CODELENS; + case CODELENS: + while (state->have < state->nlen + state->ndist) { + for (;;) { + this = state->lencode[BITS(state->lenbits)]; + if ((unsigned)(this.bits) <= bits) break; + PULLBYTE(); + } + if (this.val < 16) { + NEEDBITS(this.bits); + DROPBITS(this.bits); + state->lens[state->have++] = this.val; + } + else { + if (this.val == 16) { + NEEDBITS(this.bits + 2); + DROPBITS(this.bits); + if (state->have == 0) { + strm->msg = (char *)"invalid bit length repeat"; + state->mode = BAD; + break; + } + len = state->lens[state->have - 1]; + copy = 3 + BITS(2); + DROPBITS(2); + } + else if (this.val == 17) { + NEEDBITS(this.bits + 3); + DROPBITS(this.bits); + len = 0; + copy = 3 + BITS(3); + DROPBITS(3); + } + else { + NEEDBITS(this.bits + 7); + DROPBITS(this.bits); + len = 0; + copy = 11 + BITS(7); + DROPBITS(7); + } + if (state->have + copy > state->nlen + state->ndist) { + strm->msg = (char *)"invalid bit length repeat"; + state->mode = BAD; + break; + } + while (copy--) + state->lens[state->have++] = (unsigned short)len; + } + } + + /* handle error breaks in while */ + if (state->mode == BAD) break; + + /* build code tables */ + state->next = state->codes; + state->lencode = (code const FAR *)(state->next); + state->lenbits = 9; + ret = inflate_table(LENS, state->lens, state->nlen, &(state->next), + &(state->lenbits), state->work); + if (ret) { + strm->msg = (char *)"invalid literal/lengths set"; + state->mode = BAD; + break; + } + state->distcode = (code const FAR *)(state->next); + state->distbits = 6; + ret = inflate_table(DISTS, state->lens + state->nlen, state->ndist, + &(state->next), &(state->distbits), state->work); + if (ret) { + strm->msg = (char *)"invalid distances set"; + state->mode = BAD; + break; + } + Tracev((stderr, "inflate: codes ok\n")); + state->mode = LEN; + case LEN: + if (have >= 6 && left >= 258) { + RESTORE(); + inflate_fast(strm, out); + LOAD(); + break; + } + for (;;) { + this = state->lencode[BITS(state->lenbits)]; + if ((unsigned)(this.bits) <= bits) break; + PULLBYTE(); + } + if (this.op && (this.op & 0xf0) == 0) { + last = this; + for (;;) { + this = state->lencode[last.val + + (BITS(last.bits + last.op) >> last.bits)]; + if ((unsigned)(last.bits + this.bits) <= bits) break; + PULLBYTE(); + } + DROPBITS(last.bits); + } + DROPBITS(this.bits); + state->length = (unsigned)this.val; + if ((int)(this.op) == 0) { + Tracevv((stderr, this.val >= 0x20 && this.val < 0x7f ? + "inflate: literal '%c'\n" : + "inflate: literal 0x%02x\n", this.val)); + state->mode = LIT; + break; + } + if (this.op & 32) { + Tracevv((stderr, "inflate: end of block\n")); + state->mode = TYPE; + break; + } + if (this.op & 64) { + strm->msg = (char *)"invalid literal/length code"; + state->mode = BAD; + break; + } + state->extra = (unsigned)(this.op) & 15; + state->mode = LENEXT; + case LENEXT: + if (state->extra) { + NEEDBITS(state->extra); + state->length += BITS(state->extra); + DROPBITS(state->extra); + } + Tracevv((stderr, "inflate: length %u\n", state->length)); + state->mode = DIST; + case DIST: + for (;;) { + this = state->distcode[BITS(state->distbits)]; + if ((unsigned)(this.bits) <= bits) break; + PULLBYTE(); + } + if ((this.op & 0xf0) == 0) { + last = this; + for (;;) { + this = state->distcode[last.val + + (BITS(last.bits + last.op) >> last.bits)]; + if ((unsigned)(last.bits + this.bits) <= bits) break; + PULLBYTE(); + } + DROPBITS(last.bits); + } + DROPBITS(this.bits); + if (this.op & 64) { + strm->msg = (char *)"invalid distance code"; + state->mode = BAD; + break; + } + state->offset = (unsigned)this.val; + state->extra = (unsigned)(this.op) & 15; + state->mode = DISTEXT; + case DISTEXT: + if (state->extra) { + NEEDBITS(state->extra); + state->offset += BITS(state->extra); + DROPBITS(state->extra); + } +#ifdef INFLATE_STRICT + if (state->offset > state->dmax) { + strm->msg = (char *)"invalid distance too far back"; + state->mode = BAD; + break; + } +#endif + if (state->offset > state->whave + out - left) { + strm->msg = (char *)"invalid distance too far back"; + state->mode = BAD; + break; + } + Tracevv((stderr, "inflate: distance %u\n", state->offset)); + state->mode = MATCH; + case MATCH: + if (left == 0) goto inf_leave; + copy = out - left; + if (state->offset > copy) { /* copy from window */ + copy = state->offset - copy; + if (copy > state->write) { + copy -= state->write; + from = state->window + (state->wsize - copy); + } + else + from = state->window + (state->write - copy); + if (copy > state->length) copy = state->length; + } + else { /* copy from output */ + from = put - state->offset; + copy = state->length; + } + if (copy > left) copy = left; + left -= copy; + state->length -= copy; + do { + *put++ = *from++; + } while (--copy); + if (state->length == 0) state->mode = LEN; + break; + case LIT: + if (left == 0) goto inf_leave; + *put++ = (unsigned char)(state->length); + left--; + state->mode = LEN; + break; + case CHECK: + if (state->wrap) { + NEEDBITS(32); + out -= left; + strm->total_out += out; + state->total += out; + if (out) + strm->adler = state->check = + UPDATE(state->check, put - out, out); + out = left; + if (( +#ifdef GUNZIP + state->flags ? hold : +#endif + REVERSE(hold)) != state->check) { + strm->msg = (char *)"incorrect data check"; + state->mode = BAD; + break; + } + INITBITS(); + Tracev((stderr, "inflate: check matches trailer\n")); + } +#ifdef GUNZIP + state->mode = LENGTH; + case LENGTH: + if (state->wrap && state->flags) { + NEEDBITS(32); + if (hold != (state->total & 0xffffffffUL)) { + strm->msg = (char *)"incorrect length check"; + state->mode = BAD; + break; + } + INITBITS(); + Tracev((stderr, "inflate: length matches trailer\n")); + } +#endif + state->mode = DONE; + case DONE: + ret = Z_STREAM_END; + goto inf_leave; + case BAD: + ret = Z_DATA_ERROR; + goto inf_leave; + case MEM: + return Z_MEM_ERROR; + case SYNC: + default: + return Z_STREAM_ERROR; + } + + /* + Return from inflate(), updating the total counts and the check value. + If there was no progress during the inflate() call, return a buffer + error. Call updatewindow() to create and/or update the window state. + Note: a memory error from inflate() is non-recoverable. + */ + inf_leave: + RESTORE(); + if (state->wsize || (state->mode < CHECK && out != strm->avail_out)) + if (updatewindow(strm, out)) { + state->mode = MEM; + return Z_MEM_ERROR; + } + in -= strm->avail_in; + out -= strm->avail_out; + strm->total_in += in; + strm->total_out += out; + state->total += out; + if (state->wrap && out) + strm->adler = state->check = + UPDATE(state->check, strm->next_out - out, out); + strm->data_type = state->bits + (state->last ? 64 : 0) + + (state->mode == TYPE ? 128 : 0); + if (((in == 0 && out == 0) || flush == Z_FINISH) && ret == Z_OK) + ret = Z_BUF_ERROR; + return ret; +} + +int ZEXPORT inflateEnd(strm) +z_streamp strm; +{ + struct inflate_state FAR *state; + if (strm == Z_NULL || strm->state == Z_NULL || strm->zfree == (free_func)0) + return Z_STREAM_ERROR; + state = (struct inflate_state FAR *)strm->state; + if (state->window != Z_NULL) ZFREE(strm, state->window); + ZFREE(strm, strm->state); + strm->state = Z_NULL; + Tracev((stderr, "inflate: end\n")); + return Z_OK; +} + +int ZEXPORT inflateSetDictionary(strm, dictionary, dictLength) +z_streamp strm; +const Bytef *dictionary; +uInt dictLength; +{ + struct inflate_state FAR *state; + unsigned long id; + + /* check state */ + if (strm == Z_NULL || strm->state == Z_NULL) return Z_STREAM_ERROR; + state = (struct inflate_state FAR *)strm->state; + if (state->wrap != 0 && state->mode != DICT) + return Z_STREAM_ERROR; + + /* check for correct dictionary id */ + if (state->mode == DICT) { + id = adler32(0L, Z_NULL, 0); + id = adler32(id, dictionary, dictLength); + if (id != state->check) + return Z_DATA_ERROR; + } + + /* copy dictionary to window */ + if (updatewindow(strm, strm->avail_out)) { + state->mode = MEM; + return Z_MEM_ERROR; + } + if (dictLength > state->wsize) { + zmemcpy(state->window, dictionary + dictLength - state->wsize, + state->wsize); + state->whave = state->wsize; + } + else { + zmemcpy(state->window + state->wsize - dictLength, dictionary, + dictLength); + state->whave = dictLength; + } + state->havedict = 1; + Tracev((stderr, "inflate: dictionary set\n")); + return Z_OK; +} + +int ZEXPORT inflateGetHeader(strm, head) +z_streamp strm; +gz_headerp head; +{ + struct inflate_state FAR *state; + + /* check state */ + if (strm == Z_NULL || strm->state == Z_NULL) return Z_STREAM_ERROR; + state = (struct inflate_state FAR *)strm->state; + if ((state->wrap & 2) == 0) return Z_STREAM_ERROR; + + /* save header structure */ + state->head = head; + head->done = 0; + return Z_OK; +} + +/* + Search buf[0..len-1] for the pattern: 0, 0, 0xff, 0xff. Return when found + or when out of input. When called, *have is the number of pattern bytes + found in order so far, in 0..3. On return *have is updated to the new + state. If on return *have equals four, then the pattern was found and the + return value is how many bytes were read including the last byte of the + pattern. If *have is less than four, then the pattern has not been found + yet and the return value is len. In the latter case, syncsearch() can be + called again with more data and the *have state. *have is initialized to + zero for the first call. + */ +local unsigned syncsearch(have, buf, len) +unsigned FAR *have; +unsigned char FAR *buf; +unsigned len; +{ + unsigned got; + unsigned next; + + got = *have; + next = 0; + while (next < len && got < 4) { + if ((int)(buf[next]) == (got < 2 ? 0 : 0xff)) + got++; + else if (buf[next]) + got = 0; + else + got = 4 - got; + next++; + } + *have = got; + return next; +} + +int ZEXPORT inflateSync(strm) +z_streamp strm; +{ + unsigned len; /* number of bytes to look at or looked at */ + unsigned long in, out; /* temporary to save total_in and total_out */ + unsigned char buf[4]; /* to restore bit buffer to byte string */ + struct inflate_state FAR *state; + + /* check parameters */ + if (strm == Z_NULL || strm->state == Z_NULL) return Z_STREAM_ERROR; + state = (struct inflate_state FAR *)strm->state; + if (strm->avail_in == 0 && state->bits < 8) return Z_BUF_ERROR; + + /* if first time, start search in bit buffer */ + if (state->mode != SYNC) { + state->mode = SYNC; + state->hold <<= state->bits & 7; + state->bits -= state->bits & 7; + len = 0; + while (state->bits >= 8) { + buf[len++] = (unsigned char)(state->hold); + state->hold >>= 8; + state->bits -= 8; + } + state->have = 0; + syncsearch(&(state->have), buf, len); + } + + /* search available input */ + len = syncsearch(&(state->have), strm->next_in, strm->avail_in); + strm->avail_in -= len; + strm->next_in += len; + strm->total_in += len; + + /* return no joy or set up to restart inflate() on a new block */ + if (state->have != 4) return Z_DATA_ERROR; + in = strm->total_in; out = strm->total_out; + inflateReset(strm); + strm->total_in = in; strm->total_out = out; + state->mode = TYPE; + return Z_OK; +} + +/* + Returns true if inflate is currently at the end of a block generated by + Z_SYNC_FLUSH or Z_FULL_FLUSH. This function is used by one PPP + implementation to provide an additional safety check. PPP uses + Z_SYNC_FLUSH but removes the length bytes of the resulting empty stored + block. When decompressing, PPP checks that at the end of input packet, + inflate is waiting for these length bytes. + */ +int ZEXPORT inflateSyncPoint(strm) +z_streamp strm; +{ + struct inflate_state FAR *state; + + if (strm == Z_NULL || strm->state == Z_NULL) return Z_STREAM_ERROR; + state = (struct inflate_state FAR *)strm->state; + return state->mode == STORED && state->bits == 0; +} + +int ZEXPORT inflateCopy(dest, source) +z_streamp dest; +z_streamp source; +{ + struct inflate_state FAR *state; + struct inflate_state FAR *copy; + unsigned char FAR *window; + unsigned wsize; + + /* check input */ + if (dest == Z_NULL || source == Z_NULL || source->state == Z_NULL || + source->zalloc == (alloc_func)0 || source->zfree == (free_func)0) + return Z_STREAM_ERROR; + state = (struct inflate_state FAR *)source->state; + + /* allocate space */ + copy = (struct inflate_state FAR *) + ZALLOC(source, 1, sizeof(struct inflate_state)); + if (copy == Z_NULL) return Z_MEM_ERROR; + window = Z_NULL; + if (state->window != Z_NULL) { + window = (unsigned char FAR *) + ZALLOC(source, 1U << state->wbits, sizeof(unsigned char)); + if (window == Z_NULL) { + ZFREE(source, copy); + return Z_MEM_ERROR; + } + } + + /* copy state */ + zmemcpy(dest, source, sizeof(z_stream)); + zmemcpy(copy, state, sizeof(struct inflate_state)); + if (state->lencode >= state->codes && + state->lencode <= state->codes + ENOUGH - 1) { + copy->lencode = copy->codes + (state->lencode - state->codes); + copy->distcode = copy->codes + (state->distcode - state->codes); + } + copy->next = copy->codes + (state->next - state->codes); + if (window != Z_NULL) { + wsize = 1U << state->wbits; + zmemcpy(window, state->window, wsize); + } + copy->window = window; + dest->state = (struct internal_state FAR *)copy; + return Z_OK; +} diff --git a/libkern/zlib/inflate.h b/libkern/zlib/inflate.h new file mode 100644 index 000000000..473cff0fb --- /dev/null +++ b/libkern/zlib/inflate.h @@ -0,0 +1,142 @@ +/* + * Copyright (c) 2008 Apple Inc. All rights reserved. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. The rights granted to you under the License + * may not be used to create, or enable the creation or redistribution of, + * unlawful or unlicensed copies of an Apple operating system, or to + * circumvent, violate, or enable the circumvention or violation of, any + * terms of an Apple operating system software license agreement. + * + * Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ + */ +/* inflate.h -- internal inflate state definition + * Copyright (C) 1995-2004 Mark Adler + * For conditions of distribution and use, see copyright notice in zlib.h + */ + +/* WARNING: this file should *not* be used by applications. It is + part of the implementation of the compression library and is + subject to change. Applications should only use zlib.h. + */ + +/* define NO_GZIP when compiling if you want to disable gzip header and + trailer decoding by inflate(). NO_GZIP would be used to avoid linking in + the crc code when it is not needed. For shared libraries, gzip decoding + should be left enabled. */ +#ifndef NO_GZIP +# define GUNZIP +#endif + +/* Possible inflate modes between inflate() calls */ +typedef enum { + HEAD, /* i: waiting for magic header */ + FLAGS, /* i: waiting for method and flags (gzip) */ + TIME, /* i: waiting for modification time (gzip) */ + OS, /* i: waiting for extra flags and operating system (gzip) */ + EXLEN, /* i: waiting for extra length (gzip) */ + EXTRA, /* i: waiting for extra bytes (gzip) */ + NAME, /* i: waiting for end of file name (gzip) */ + COMMENT, /* i: waiting for end of comment (gzip) */ + HCRC, /* i: waiting for header crc (gzip) */ + DICTID, /* i: waiting for dictionary check value */ + DICT, /* waiting for inflateSetDictionary() call */ + TYPE, /* i: waiting for type bits, including last-flag bit */ + TYPEDO, /* i: same, but skip check to exit inflate on new block */ + STORED, /* i: waiting for stored size (length and complement) */ + COPY, /* i/o: waiting for input or output to copy stored block */ + TABLE, /* i: waiting for dynamic block table lengths */ + LENLENS, /* i: waiting for code length code lengths */ + CODELENS, /* i: waiting for length/lit and distance code lengths */ + LEN, /* i: waiting for length/lit code */ + LENEXT, /* i: waiting for length extra bits */ + DIST, /* i: waiting for distance code */ + DISTEXT, /* i: waiting for distance extra bits */ + MATCH, /* o: waiting for output space to copy string */ + LIT, /* o: waiting for output space to write literal */ + CHECK, /* i: waiting for 32-bit check value */ + LENGTH, /* i: waiting for 32-bit length (gzip) */ + DONE, /* finished check, done -- remain here until reset */ + BAD, /* got a data error -- remain here until reset */ + MEM, /* got an inflate() memory error -- remain here until reset */ + SYNC /* looking for synchronization bytes to restart inflate() */ +} inflate_mode; + +/* + State transitions between above modes - + + (most modes can go to the BAD or MEM mode -- not shown for clarity) + + Process header: + HEAD -> (gzip) or (zlib) + (gzip) -> FLAGS -> TIME -> OS -> EXLEN -> EXTRA -> NAME + NAME -> COMMENT -> HCRC -> TYPE + (zlib) -> DICTID or TYPE + DICTID -> DICT -> TYPE + Read deflate blocks: + TYPE -> STORED or TABLE or LEN or CHECK + STORED -> COPY -> TYPE + TABLE -> LENLENS -> CODELENS -> LEN + Read deflate codes: + LEN -> LENEXT or LIT or TYPE + LENEXT -> DIST -> DISTEXT -> MATCH -> LEN + LIT -> LEN + Process trailer: + CHECK -> LENGTH -> DONE + */ + +/* state maintained between inflate() calls. Approximately 7K bytes. */ +struct inflate_state { + inflate_mode mode; /* current inflate mode */ + int last; /* true if processing last block */ + int wrap; /* bit 0 true for zlib, bit 1 true for gzip */ + int havedict; /* true if dictionary provided */ + int flags; /* gzip header method and flags (0 if zlib) */ + unsigned dmax; /* zlib header max distance (INFLATE_STRICT) */ + unsigned long check; /* protected copy of check value */ + unsigned long total; /* protected copy of output count */ + gz_headerp head; /* where to save gzip header information */ + /* sliding window */ + unsigned wbits; /* log base 2 of requested window size */ + unsigned wsize; /* window size or zero if not using window */ + unsigned whave; /* valid bytes in the window */ + unsigned write; /* window write index */ + unsigned char FAR *window; /* allocated sliding window, if needed */ + /* bit accumulator */ + unsigned long hold; /* input bit accumulator */ + unsigned bits; /* number of bits in "in" */ + /* for string and stored block copying */ + unsigned length; /* literal or length of data to copy */ + unsigned offset; /* distance back to copy string from */ + /* for table and code decoding */ + unsigned extra; /* extra bits needed */ + /* fixed and dynamic code tables */ + code const FAR *lencode; /* starting table for length/literal codes */ + code const FAR *distcode; /* starting table for distance codes */ + unsigned lenbits; /* index bits for lencode */ + unsigned distbits; /* index bits for distcode */ + /* dynamic table building */ + unsigned ncode; /* number of code length code lengths */ + unsigned nlen; /* number of length code lengths */ + unsigned ndist; /* number of distance code lengths */ + unsigned have; /* number of code lengths in lens[] */ + code FAR *next; /* next available space in codes[] */ + unsigned short lens[320]; /* temporary storage for code lengths */ + unsigned short work[288]; /* work area for code table building */ + code codes[ENOUGH]; /* space for code tables */ +}; diff --git a/libkern/zlib/inftrees.c b/libkern/zlib/inftrees.c new file mode 100644 index 000000000..338c455c8 --- /dev/null +++ b/libkern/zlib/inftrees.c @@ -0,0 +1,356 @@ +/* + * Copyright (c) 2008 Apple Inc. All rights reserved. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. The rights granted to you under the License + * may not be used to create, or enable the creation or redistribution of, + * unlawful or unlicensed copies of an Apple operating system, or to + * circumvent, violate, or enable the circumvention or violation of, any + * terms of an Apple operating system software license agreement. + * + * Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ + */ +/* inftrees.c -- generate Huffman trees for efficient decoding + * Copyright (C) 1995-2005 Mark Adler + * For conditions of distribution and use, see copyright notice in zlib.h + */ + +#include "zutil.h" +#include "inftrees.h" + +#define MAXBITS 15 + +const char inflate_copyright[] = + " inflate 1.2.3 Copyright 1995-2005 Mark Adler "; +/* + If you use the zlib library in a product, an acknowledgment is welcome + in the documentation of your product. If for some reason you cannot + include such an acknowledgment, I would appreciate that you keep this + copyright string in the executable of your product. + */ + +/* + Build a set of tables to decode the provided canonical Huffman code. + The code lengths are lens[0..codes-1]. The result starts at *table, + whose indices are 0..2^bits-1. work is a writable array of at least + lens shorts, which is used as a work area. type is the type of code + to be generated, CODES, LENS, or DISTS. On return, zero is success, + -1 is an invalid code, and +1 means that ENOUGH isn't enough. table + on return points to the next available entry's address. bits is the + requested root table index bits, and on return it is the actual root + table index bits. It will differ if the request is greater than the + longest code or if it is less than the shortest code. + */ +int inflate_table(type, lens, codes, table, bits, work) +codetype type; +unsigned short FAR *lens; +unsigned codes; +code FAR * FAR *table; +unsigned FAR *bits; +unsigned short FAR *work; +{ + unsigned len; /* a code's length in bits */ + unsigned sym; /* index of code symbols */ + unsigned min, max; /* minimum and maximum code lengths */ + unsigned root; /* number of index bits for root table */ + unsigned curr; /* number of index bits for current table */ + unsigned drop; /* code bits to drop for sub-table */ + int left; /* number of prefix codes available */ + unsigned used; /* code entries in table used */ + unsigned huff; /* Huffman code */ + unsigned incr; /* for incrementing code, index */ + unsigned fill; /* index for replicating entries */ + unsigned low; /* low bits for current root entry */ + unsigned mask; /* mask for low root bits */ + code this; /* table entry for duplication */ + code FAR *next; /* next available space in table */ + const unsigned short FAR *base; /* base value table to use */ + const unsigned short FAR *extra; /* extra bits table to use */ + int end; /* use base and extra for symbol > end */ + unsigned short count[MAXBITS+1]; /* number of codes of each length */ + unsigned short offs[MAXBITS+1]; /* offsets in table for each length */ + static const unsigned short lbase[31] = { /* Length codes 257..285 base */ + 3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 15, 17, 19, 23, 27, 31, + 35, 43, 51, 59, 67, 83, 99, 115, 131, 163, 195, 227, 258, 0, 0}; + static const unsigned short lext[31] = { /* Length codes 257..285 extra */ + 16, 16, 16, 16, 16, 16, 16, 16, 17, 17, 17, 17, 18, 18, 18, 18, + 19, 19, 19, 19, 20, 20, 20, 20, 21, 21, 21, 21, 16, 201, 196}; + static const unsigned short dbase[32] = { /* Distance codes 0..29 base */ + 1, 2, 3, 4, 5, 7, 9, 13, 17, 25, 33, 49, 65, 97, 129, 193, + 257, 385, 513, 769, 1025, 1537, 2049, 3073, 4097, 6145, + 8193, 12289, 16385, 24577, 0, 0}; + static const unsigned short dext[32] = { /* Distance codes 0..29 extra */ + 16, 16, 16, 16, 17, 17, 18, 18, 19, 19, 20, 20, 21, 21, 22, 22, + 23, 23, 24, 24, 25, 25, 26, 26, 27, 27, + 28, 28, 29, 29, 64, 64}; + + /* + Process a set of code lengths to create a canonical Huffman code. The + code lengths are lens[0..codes-1]. Each length corresponds to the + symbols 0..codes-1. The Huffman code is generated by first sorting the + symbols by length from short to long, and retaining the symbol order + for codes with equal lengths. Then the code starts with all zero bits + for the first code of the shortest length, and the codes are integer + increments for the same length, and zeros are appended as the length + increases. For the deflate format, these bits are stored backwards + from their more natural integer increment ordering, and so when the + decoding tables are built in the large loop below, the integer codes + are incremented backwards. + + This routine assumes, but does not check, that all of the entries in + lens[] are in the range 0..MAXBITS. The caller must assure this. + 1..MAXBITS is interpreted as that code length. zero means that that + symbol does not occur in this code. + + The codes are sorted by computing a count of codes for each length, + creating from that a table of starting indices for each length in the + sorted table, and then entering the symbols in order in the sorted + table. The sorted table is work[], with that space being provided by + the caller. + + The length counts are used for other purposes as well, i.e. finding + the minimum and maximum length codes, determining if there are any + codes at all, checking for a valid set of lengths, and looking ahead + at length counts to determine sub-table sizes when building the + decoding tables. + */ + + /* accumulate lengths for codes (assumes lens[] all in 0..MAXBITS) */ + for (len = 0; len <= MAXBITS; len++) + count[len] = 0; + for (sym = 0; sym < codes; sym++) + count[lens[sym]]++; + + /* bound code lengths, force root to be within code lengths */ + root = *bits; + for (max = MAXBITS; max >= 1; max--) + if (count[max] != 0) break; + if (root > max) root = max; + if (max == 0) { /* no symbols to code at all */ + this.op = (unsigned char)64; /* invalid code marker */ + this.bits = (unsigned char)1; + this.val = (unsigned short)0; + *(*table)++ = this; /* make a table to force an error */ + *(*table)++ = this; + *bits = 1; + return 0; /* no symbols, but wait for decoding to report error */ + } + for (min = 1; min <= MAXBITS; min++) + if (count[min] != 0) break; + if (root < min) root = min; + + /* check for an over-subscribed or incomplete set of lengths */ + left = 1; + for (len = 1; len <= MAXBITS; len++) { + left <<= 1; + left -= count[len]; + if (left < 0) return -1; /* over-subscribed */ + } + if (left > 0 && (type == CODES || max != 1)) + return -1; /* incomplete set */ + + /* generate offsets into symbol table for each length for sorting */ + offs[1] = 0; + for (len = 1; len < MAXBITS; len++) + offs[len + 1] = offs[len] + count[len]; + + /* sort symbols by length, by symbol order within each length */ + for (sym = 0; sym < codes; sym++) + if (lens[sym] != 0) work[offs[lens[sym]]++] = (unsigned short)sym; + + /* + Create and fill in decoding tables. In this loop, the table being + filled is at next and has curr index bits. The code being used is huff + with length len. That code is converted to an index by dropping drop + bits off of the bottom. For codes where len is less than drop + curr, + those top drop + curr - len bits are incremented through all values to + fill the table with replicated entries. + + root is the number of index bits for the root table. When len exceeds + root, sub-tables are created pointed to by the root entry with an index + of the low root bits of huff. This is saved in low to check for when a + new sub-table should be started. drop is zero when the root table is + being filled, and drop is root when sub-tables are being filled. + + When a new sub-table is needed, it is necessary to look ahead in the + code lengths to determine what size sub-table is needed. The length + counts are used for this, and so count[] is decremented as codes are + entered in the tables. + + used keeps track of how many table entries have been allocated from the + provided *table space. It is checked when a LENS table is being made + against the space in *table, ENOUGH, minus the maximum space needed by + the worst case distance code, MAXD. This should never happen, but the + sufficiency of ENOUGH has not been proven exhaustively, hence the check. + This assumes that when type == LENS, bits == 9. + + sym increments through all symbols, and the loop terminates when + all codes of length max, i.e. all codes, have been processed. This + routine permits incomplete codes, so another loop after this one fills + in the rest of the decoding tables with invalid code markers. + */ + + /* set up for code type */ + switch (type) { + case CODES: + base = extra = work; /* dummy value--not used */ + end = 19; + break; + case LENS: + base = lbase; + base -= 257; + extra = lext; + extra -= 257; + end = 256; + break; + default: /* DISTS */ + base = dbase; + extra = dext; + end = -1; + } + + /* initialize state for loop */ + huff = 0; /* starting code */ + sym = 0; /* starting code symbol */ + len = min; /* starting code length */ + next = *table; /* current table to fill in */ + curr = root; /* current table index bits */ + drop = 0; /* current bits to drop from code for index */ + low = (unsigned)(-1); /* trigger new sub-table when len > root */ + used = 1U << root; /* use root table entries */ + mask = used - 1; /* mask for comparing low */ + + /* check available table space */ + if (type == LENS && used >= ENOUGH - MAXD) + return 1; + + /* process all codes and make table entries */ + for (;;) { + /* create table entry */ + this.bits = (unsigned char)(len - drop); + if ((int)(work[sym]) < end) { + this.op = (unsigned char)0; + this.val = work[sym]; + } + else if ((int)(work[sym]) > end) { + this.op = (unsigned char)(extra[work[sym]]); + this.val = base[work[sym]]; + } + else { + this.op = (unsigned char)(32 + 64); /* end of block */ + this.val = 0; + } + + /* replicate for those indices with low len bits equal to huff */ + incr = 1U << (len - drop); + fill = 1U << curr; + min = fill; /* save offset to next table */ + do { + fill -= incr; + next[(huff >> drop) + fill] = this; + } while (fill != 0); + + /* backwards increment the len-bit code huff */ + incr = 1U << (len - 1); + while (huff & incr) + incr >>= 1; + if (incr != 0) { + huff &= incr - 1; + huff += incr; + } + else + huff = 0; + + /* go to next symbol, update count, len */ + sym++; + if (--(count[len]) == 0) { + if (len == max) break; + len = lens[work[sym]]; + } + + /* create new sub-table if needed */ + if (len > root && (huff & mask) != low) { + /* if first time, transition to sub-tables */ + if (drop == 0) + drop = root; + + /* increment past last table */ + next += min; /* here min is 1 << curr */ + + /* determine length of next table */ + curr = len - drop; + left = (int)(1 << curr); + while (curr + drop < max) { + left -= count[curr + drop]; + if (left <= 0) break; + curr++; + left <<= 1; + } + + /* check for enough space */ + used += 1U << curr; + if (type == LENS && used >= ENOUGH - MAXD) + return 1; + + /* point entry in root table to sub-table */ + low = huff & mask; + (*table)[low].op = (unsigned char)curr; + (*table)[low].bits = (unsigned char)root; + (*table)[low].val = (unsigned short)(next - *table); + } + } + + /* + Fill in rest of table for incomplete codes. This loop is similar to the + loop above in incrementing huff for table indices. It is assumed that + len is equal to curr + drop, so there is no loop needed to increment + through high index bits. When the current sub-table is filled, the loop + drops back to the root table to fill in any remaining entries there. + */ + this.op = (unsigned char)64; /* invalid code marker */ + this.bits = (unsigned char)(len - drop); + this.val = (unsigned short)0; + while (huff != 0) { + /* when done with sub-table, drop back to root table */ + if (drop != 0 && (huff & mask) != low) { + drop = 0; + len = root; + next = *table; + this.bits = (unsigned char)len; + } + + /* put invalid code marker in table */ + next[huff >> drop] = this; + + /* backwards increment the len-bit code huff */ + incr = 1U << (len - 1); + while (huff & incr) + incr >>= 1; + if (incr != 0) { + huff &= incr - 1; + huff += incr; + } + else + huff = 0; + } + + /* set return parameters */ + *table += used; + *bits = root; + return 0; +} diff --git a/libkern/zlib/inftrees.h b/libkern/zlib/inftrees.h new file mode 100644 index 000000000..fccea249a --- /dev/null +++ b/libkern/zlib/inftrees.h @@ -0,0 +1,82 @@ +/* + * Copyright (c) 2008 Apple Inc. All rights reserved. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. The rights granted to you under the License + * may not be used to create, or enable the creation or redistribution of, + * unlawful or unlicensed copies of an Apple operating system, or to + * circumvent, violate, or enable the circumvention or violation of, any + * terms of an Apple operating system software license agreement. + * + * Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ + */ +/* inftrees.h -- header to use inftrees.c + * Copyright (C) 1995-2005 Mark Adler + * For conditions of distribution and use, see copyright notice in zlib.h + */ + +/* WARNING: this file should *not* be used by applications. It is + part of the implementation of the compression library and is + subject to change. Applications should only use zlib.h. + */ + +/* Structure for decoding tables. Each entry provides either the + information needed to do the operation requested by the code that + indexed that table entry, or it provides a pointer to another + table that indexes more bits of the code. op indicates whether + the entry is a pointer to another table, a literal, a length or + distance, an end-of-block, or an invalid code. For a table + pointer, the low four bits of op is the number of index bits of + that table. For a length or distance, the low four bits of op + is the number of extra bits to get after the code. bits is + the number of bits in this code or part of the code to drop off + of the bit buffer. val is the actual byte to output in the case + of a literal, the base length or distance, or the offset from + the current table to the next table. Each entry is four bytes. */ +typedef struct { + unsigned char op; /* operation, extra bits, table bits */ + unsigned char bits; /* bits in this part of the code */ + unsigned short val; /* offset in table or code value */ +} code; + +/* op values as set by inflate_table(): + 00000000 - literal + 0000tttt - table link, tttt != 0 is the number of table index bits + 0001eeee - length or distance, eeee is the number of extra bits + 01100000 - end of block + 01000000 - invalid code + */ + +/* Maximum size of dynamic tree. The maximum found in a long but non- + exhaustive search was 1444 code structures (852 for length/literals + and 592 for distances, the latter actually the result of an + exhaustive search). The true maximum is not known, but the value + below is more than safe. */ +#define ENOUGH 2048 +#define MAXD 592 + +/* Type of code to build for inftable() */ +typedef enum { + CODES, + LENS, + DISTS +} codetype; + +extern int inflate_table OF((codetype type, unsigned short FAR *lens, + unsigned codes, code FAR * FAR *table, + unsigned FAR *bits, unsigned short FAR *work)); diff --git a/libkern/zlib/trees.c b/libkern/zlib/trees.c new file mode 100644 index 000000000..a64436848 --- /dev/null +++ b/libkern/zlib/trees.c @@ -0,0 +1,1246 @@ +/* + * Copyright (c) 2008 Apple Inc. All rights reserved. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. The rights granted to you under the License + * may not be used to create, or enable the creation or redistribution of, + * unlawful or unlicensed copies of an Apple operating system, or to + * circumvent, violate, or enable the circumvention or violation of, any + * terms of an Apple operating system software license agreement. + * + * Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ + */ +/* trees.c -- output deflated data using Huffman coding + * Copyright (C) 1995-2005 Jean-loup Gailly + * For conditions of distribution and use, see copyright notice in zlib.h + */ + +/* + * ALGORITHM + * + * The "deflation" process uses several Huffman trees. The more + * common source values are represented by shorter bit sequences. + * + * Each code tree is stored in a compressed form which is itself + * a Huffman encoding of the lengths of all the code strings (in + * ascending order by source values). The actual code strings are + * reconstructed from the lengths in the inflate process, as described + * in the deflate specification. + * + * REFERENCES + * + * Deutsch, L.P.,"'Deflate' Compressed Data Format Specification". + * Available in ftp.uu.net:/pub/archiving/zip/doc/deflate-1.1.doc + * + * Storer, James A. + * Data Compression: Methods and Theory, pp. 49-50. + * Computer Science Press, 1988. ISBN 0-7167-8156-5. + * + * Sedgewick, R. + * Algorithms, p290. + * Addison-Wesley, 1983. ISBN 0-201-06672-6. + */ + +/* @(#) $Id$ */ + +/* #define GEN_TREES_H */ + +#include "deflate.h" + +#ifdef DEBUG +# include +#endif + +/* =========================================================================== + * Constants + */ + +#define MAX_BL_BITS 7 +/* Bit length codes must not exceed MAX_BL_BITS bits */ + +#define END_BLOCK 256 +/* end of block literal code */ + +#define REP_3_6 16 +/* repeat previous bit length 3-6 times (2 bits of repeat count) */ + +#define REPZ_3_10 17 +/* repeat a zero length 3-10 times (3 bits of repeat count) */ + +#define REPZ_11_138 18 +/* repeat a zero length 11-138 times (7 bits of repeat count) */ + +local const int extra_lbits[LENGTH_CODES] /* extra bits for each length code */ + = {0,0,0,0,0,0,0,0,1,1,1,1,2,2,2,2,3,3,3,3,4,4,4,4,5,5,5,5,0}; + +local const int extra_dbits[D_CODES] /* extra bits for each distance code */ + = {0,0,0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,8,8,9,9,10,10,11,11,12,12,13,13}; + +local const int extra_blbits[BL_CODES]/* extra bits for each bit length code */ + = {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,3,7}; + +local const uch bl_order[BL_CODES] + = {16,17,18,0,8,7,9,6,10,5,11,4,12,3,13,2,14,1,15}; +/* The lengths of the bit length codes are sent in order of decreasing + * probability, to avoid transmitting the lengths for unused bit length codes. + */ + +#define Buf_size (8 * 2*sizeof(char)) +/* Number of bits used within bi_buf. (bi_buf might be implemented on + * more than 16 bits on some systems.) + */ + +/* =========================================================================== + * Local data. These are initialized only once. + */ + +#define DIST_CODE_LEN 512 /* see definition of array dist_code below */ + +#if defined(GEN_TREES_H) || !defined(STDC) +/* non ANSI compilers may not accept trees.h */ + +local ct_data static_ltree[L_CODES+2]; +/* The static literal tree. Since the bit lengths are imposed, there is no + * need for the L_CODES extra codes used during heap construction. However + * The codes 286 and 287 are needed to build a canonical tree (see _tr_init + * below). + */ + +local ct_data static_dtree[D_CODES]; +/* The static distance tree. (Actually a trivial tree since all codes use + * 5 bits.) + */ + +uch _dist_code[DIST_CODE_LEN]; +/* Distance codes. The first 256 values correspond to the distances + * 3 .. 258, the last 256 values correspond to the top 8 bits of + * the 15 bit distances. + */ + +uch _length_code[MAX_MATCH-MIN_MATCH+1]; +/* length code for each normalized match length (0 == MIN_MATCH) */ + +local int base_length[LENGTH_CODES]; +/* First normalized length for each code (0 = MIN_MATCH) */ + +local int base_dist[D_CODES]; +/* First normalized distance for each code (0 = distance of 1) */ + +#else +# include "trees.h" +#endif /* GEN_TREES_H */ + +struct static_tree_desc_s { + const ct_data *static_tree; /* static tree or NULL */ + const intf *extra_bits; /* extra bits for each code or NULL */ + int extra_base; /* base index for extra_bits */ + int elems; /* max number of elements in the tree */ + int max_length; /* max bit length for the codes */ +}; + +local static_tree_desc static_l_desc = +{static_ltree, extra_lbits, LITERALS+1, L_CODES, MAX_BITS}; + +local static_tree_desc static_d_desc = +{static_dtree, extra_dbits, 0, D_CODES, MAX_BITS}; + +local static_tree_desc static_bl_desc = +{(const ct_data *)0, extra_blbits, 0, BL_CODES, MAX_BL_BITS}; + +/* =========================================================================== + * Local (static) routines in this file. + */ + +local void tr_static_init OF((void)); +local void init_block OF((deflate_state *s)); +local void pqdownheap OF((deflate_state *s, ct_data *tree, int k)); +local void gen_bitlen OF((deflate_state *s, tree_desc *desc)); +local void gen_codes OF((ct_data *tree, int max_code, ushf *bl_count)); +local void build_tree OF((deflate_state *s, tree_desc *desc)); +local void scan_tree OF((deflate_state *s, ct_data *tree, int max_code)); +local void send_tree OF((deflate_state *s, ct_data *tree, int max_code)); +local int build_bl_tree OF((deflate_state *s)); +local void send_all_trees OF((deflate_state *s, int lcodes, int dcodes, + int blcodes)); +local void compress_block OF((deflate_state *s, ct_data *ltree, + ct_data *dtree)); +local void set_data_type OF((deflate_state *s)); +local unsigned bi_reverse OF((unsigned value, int length)); +local void bi_windup OF((deflate_state *s)); +local void bi_flush OF((deflate_state *s)); +local void copy_block OF((deflate_state *s, charf *buf, unsigned len, + int header)); + +#ifdef GEN_TREES_H +local void gen_trees_header OF((void)); +#endif + +#ifndef DEBUG +# define send_code(s, c, tree) send_bits(s, tree[c].Code, tree[c].Len) + /* Send a code of the given tree. c and tree must not have side effects */ + +#else /* DEBUG */ +# define send_code(s, c, tree) \ + { if (z_verbose>2) fprintf(stderr,"\ncd %3d ",(c)); \ + send_bits(s, tree[c].Code, tree[c].Len); } +#endif + +/* =========================================================================== + * Output a short LSB first on the stream. + * IN assertion: there is enough room in pendingBuf. + */ +#define put_short(s, w) { \ + put_byte(s, (uch)((w) & 0xff)); \ + put_byte(s, (uch)((ush)(w) >> 8)); \ +} + +/* =========================================================================== + * Send a value on a given number of bits. + * IN assertion: length <= 16 and value fits in length bits. + */ +#ifdef DEBUG +local void send_bits OF((deflate_state *s, int value, int length)); + +local void send_bits(s, value, length) + deflate_state *s; + int value; /* value to send */ + int length; /* number of bits */ +{ + Tracevv((stderr," l %2d v %4x ", length, value)); + Assert(length > 0 && length <= 15, "invalid length"); + s->bits_sent += (ulg)length; + + /* If not enough room in bi_buf, use (valid) bits from bi_buf and + * (16 - bi_valid) bits from value, leaving (width - (16-bi_valid)) + * unused bits in value. + */ + if (s->bi_valid > (int)Buf_size - length) { + s->bi_buf |= (value << s->bi_valid); + put_short(s, s->bi_buf); + s->bi_buf = (ush)value >> (Buf_size - s->bi_valid); + s->bi_valid += length - Buf_size; + } else { + s->bi_buf |= value << s->bi_valid; + s->bi_valid += length; + } +} +#else /* !DEBUG */ + +#define send_bits(s, value, length) \ +{ int len = length;\ + if (s->bi_valid > (int)Buf_size - len) {\ + int val = value;\ + s->bi_buf |= (val << s->bi_valid);\ + put_short(s, s->bi_buf);\ + s->bi_buf = (ush)val >> (Buf_size - s->bi_valid);\ + s->bi_valid += len - Buf_size;\ + } else {\ + s->bi_buf |= (value) << s->bi_valid;\ + s->bi_valid += len;\ + }\ +} +#endif /* DEBUG */ + + +/* the arguments must not have side effects */ + +/* =========================================================================== + * Initialize the various 'constant' tables. + */ +local void tr_static_init() +{ +#if defined(GEN_TREES_H) || !defined(STDC) + static int static_init_done = 0; + int n; /* iterates over tree elements */ + int bits; /* bit counter */ + int length; /* length value */ + int code; /* code value */ + int dist; /* distance index */ + ush bl_count[MAX_BITS+1]; + /* number of codes at each bit length for an optimal tree */ + + if (static_init_done) return; + + /* For some embedded targets, global variables are not initialized: */ + static_l_desc.static_tree = static_ltree; + static_l_desc.extra_bits = extra_lbits; + static_d_desc.static_tree = static_dtree; + static_d_desc.extra_bits = extra_dbits; + static_bl_desc.extra_bits = extra_blbits; + + /* Initialize the mapping length (0..255) -> length code (0..28) */ + length = 0; + for (code = 0; code < LENGTH_CODES-1; code++) { + base_length[code] = length; + for (n = 0; n < (1< dist code (0..29) */ + dist = 0; + for (code = 0 ; code < 16; code++) { + base_dist[code] = dist; + for (n = 0; n < (1<>= 7; /* from now on, all distances are divided by 128 */ + for ( ; code < D_CODES; code++) { + base_dist[code] = dist << 7; + for (n = 0; n < (1<<(extra_dbits[code]-7)); n++) { + _dist_code[256 + dist++] = (uch)code; + } + } + Assert (dist == 256, "tr_static_init: 256+dist != 512"); + + /* Construct the codes of the static literal tree */ + for (bits = 0; bits <= MAX_BITS; bits++) bl_count[bits] = 0; + n = 0; + while (n <= 143) static_ltree[n++].Len = 8, bl_count[8]++; + while (n <= 255) static_ltree[n++].Len = 9, bl_count[9]++; + while (n <= 279) static_ltree[n++].Len = 7, bl_count[7]++; + while (n <= 287) static_ltree[n++].Len = 8, bl_count[8]++; + /* Codes 286 and 287 do not exist, but we must include them in the + * tree construction to get a canonical Huffman tree (longest code + * all ones) + */ + gen_codes((ct_data *)static_ltree, L_CODES+1, bl_count); + + /* The static distance tree is trivial: */ + for (n = 0; n < D_CODES; n++) { + static_dtree[n].Len = 5; + static_dtree[n].Code = bi_reverse((unsigned)n, 5); + } + static_init_done = 1; + +# ifdef GEN_TREES_H + gen_trees_header(); +# endif +#endif /* defined(GEN_TREES_H) || !defined(STDC) */ +} + +/* =========================================================================== + * Genererate the file trees.h describing the static trees. + */ +#ifdef GEN_TREES_H +# ifndef DEBUG +# include +# endif + +# define SEPARATOR(i, last, width) \ + ((i) == (last)? "\n};\n\n" : \ + ((i) % (width) == (width)-1 ? ",\n" : ", ")) + +void gen_trees_header() +{ + FILE *header = fopen("trees.h", "w"); + int i; + + Assert (header != NULL, "Can't open trees.h"); + fprintf(header, + "/* header created automatically with -DGEN_TREES_H */\n\n"); + + fprintf(header, "local const ct_data static_ltree[L_CODES+2] = {\n"); + for (i = 0; i < L_CODES+2; i++) { + fprintf(header, "{{%3u},{%3u}}%s", static_ltree[i].Code, + static_ltree[i].Len, SEPARATOR(i, L_CODES+1, 5)); + } + + fprintf(header, "local const ct_data static_dtree[D_CODES] = {\n"); + for (i = 0; i < D_CODES; i++) { + fprintf(header, "{{%2u},{%2u}}%s", static_dtree[i].Code, + static_dtree[i].Len, SEPARATOR(i, D_CODES-1, 5)); + } + + fprintf(header, "const uch _dist_code[DIST_CODE_LEN] = {\n"); + for (i = 0; i < DIST_CODE_LEN; i++) { + fprintf(header, "%2u%s", _dist_code[i], + SEPARATOR(i, DIST_CODE_LEN-1, 20)); + } + + fprintf(header, "const uch _length_code[MAX_MATCH-MIN_MATCH+1]= {\n"); + for (i = 0; i < MAX_MATCH-MIN_MATCH+1; i++) { + fprintf(header, "%2u%s", _length_code[i], + SEPARATOR(i, MAX_MATCH-MIN_MATCH, 20)); + } + + fprintf(header, "local const int base_length[LENGTH_CODES] = {\n"); + for (i = 0; i < LENGTH_CODES; i++) { + fprintf(header, "%1u%s", base_length[i], + SEPARATOR(i, LENGTH_CODES-1, 20)); + } + + fprintf(header, "local const int base_dist[D_CODES] = {\n"); + for (i = 0; i < D_CODES; i++) { + fprintf(header, "%5u%s", base_dist[i], + SEPARATOR(i, D_CODES-1, 10)); + } + + fclose(header); +} +#endif /* GEN_TREES_H */ + +/* =========================================================================== + * Initialize the tree data structures for a new zlib stream. + */ +void _tr_init(s) + deflate_state *s; +{ + tr_static_init(); + + s->l_desc.dyn_tree = s->dyn_ltree; + s->l_desc.stat_desc = &static_l_desc; + + s->d_desc.dyn_tree = s->dyn_dtree; + s->d_desc.stat_desc = &static_d_desc; + + s->bl_desc.dyn_tree = s->bl_tree; + s->bl_desc.stat_desc = &static_bl_desc; + + s->bi_buf = 0; + s->bi_valid = 0; + s->last_eob_len = 8; /* enough lookahead for inflate */ +#ifdef DEBUG + s->compressed_len = 0L; + s->bits_sent = 0L; +#endif + + /* Initialize the first block of the first file: */ + init_block(s); +} + +/* =========================================================================== + * Initialize a new block. + */ +local void init_block(s) + deflate_state *s; +{ + int n; /* iterates over tree elements */ + + /* Initialize the trees. */ + for (n = 0; n < L_CODES; n++) s->dyn_ltree[n].Freq = 0; + for (n = 0; n < D_CODES; n++) s->dyn_dtree[n].Freq = 0; + for (n = 0; n < BL_CODES; n++) s->bl_tree[n].Freq = 0; + + s->dyn_ltree[END_BLOCK].Freq = 1; + s->opt_len = s->static_len = 0L; + s->last_lit = s->matches = 0; +} + +#define SMALLEST 1 +/* Index within the heap array of least frequent node in the Huffman tree */ + + +/* =========================================================================== + * Remove the smallest element from the heap and recreate the heap with + * one less element. Updates heap and heap_len. + */ +#define pqremove(s, tree, top) \ +{\ + top = s->heap[SMALLEST]; \ + s->heap[SMALLEST] = s->heap[s->heap_len--]; \ + pqdownheap(s, tree, SMALLEST); \ +} + +/* =========================================================================== + * Compares to subtrees, using the tree depth as tie breaker when + * the subtrees have equal frequency. This minimizes the worst case length. + */ +#define smaller(tree, n, m, depth) \ + (tree[n].Freq < tree[m].Freq || \ + (tree[n].Freq == tree[m].Freq && depth[n] <= depth[m])) + +/* =========================================================================== + * Restore the heap property by moving down the tree starting at node k, + * exchanging a node with the smallest of its two sons if necessary, stopping + * when the heap property is re-established (each father smaller than its + * two sons). + */ +local void pqdownheap(s, tree, k) + deflate_state *s; + ct_data *tree; /* the tree to restore */ + int k; /* node to move down */ +{ + int v = s->heap[k]; + int j = k << 1; /* left son of k */ + while (j <= s->heap_len) { + /* Set j to the smallest of the two sons: */ + if (j < s->heap_len && + smaller(tree, s->heap[j+1], s->heap[j], s->depth)) { + j++; + } + /* Exit if v is smaller than both sons */ + if (smaller(tree, v, s->heap[j], s->depth)) break; + + /* Exchange v with the smallest son */ + s->heap[k] = s->heap[j]; k = j; + + /* And continue down the tree, setting j to the left son of k */ + j <<= 1; + } + s->heap[k] = v; +} + +/* =========================================================================== + * Compute the optimal bit lengths for a tree and update the total bit length + * for the current block. + * IN assertion: the fields freq and dad are set, heap[heap_max] and + * above are the tree nodes sorted by increasing frequency. + * OUT assertions: the field len is set to the optimal bit length, the + * array bl_count contains the frequencies for each bit length. + * The length opt_len is updated; static_len is also updated if stree is + * not null. + */ +local void gen_bitlen(s, desc) + deflate_state *s; + tree_desc *desc; /* the tree descriptor */ +{ + ct_data *tree = desc->dyn_tree; + int max_code = desc->max_code; + const ct_data *stree = desc->stat_desc->static_tree; + const intf *extra = desc->stat_desc->extra_bits; + int base = desc->stat_desc->extra_base; + int max_length = desc->stat_desc->max_length; + int h; /* heap index */ + int n, m; /* iterate over the tree elements */ + int bits; /* bit length */ + int xbits; /* extra bits */ + ush f; /* frequency */ + int overflow = 0; /* number of elements with bit length too large */ + + for (bits = 0; bits <= MAX_BITS; bits++) s->bl_count[bits] = 0; + + /* In a first pass, compute the optimal bit lengths (which may + * overflow in the case of the bit length tree). + */ + tree[s->heap[s->heap_max]].Len = 0; /* root of the heap */ + + for (h = s->heap_max+1; h < HEAP_SIZE; h++) { + n = s->heap[h]; + bits = tree[tree[n].Dad].Len + 1; + if (bits > max_length) bits = max_length, overflow++; + tree[n].Len = (ush)bits; + /* We overwrite tree[n].Dad which is no longer needed */ + + if (n > max_code) continue; /* not a leaf node */ + + s->bl_count[bits]++; + xbits = 0; + if (n >= base) xbits = extra[n-base]; + f = tree[n].Freq; + s->opt_len += (ulg)f * (bits + xbits); + if (stree) s->static_len += (ulg)f * (stree[n].Len + xbits); + } + if (overflow == 0) return; + + Trace((stderr,"\nbit length overflow\n")); + /* This happens for example on obj2 and pic of the Calgary corpus */ + + /* Find the first bit length which could increase: */ + do { + bits = max_length-1; + while (s->bl_count[bits] == 0) bits--; + s->bl_count[bits]--; /* move one leaf down the tree */ + s->bl_count[bits+1] += 2; /* move one overflow item as its brother */ + s->bl_count[max_length]--; + /* The brother of the overflow item also moves one step up, + * but this does not affect bl_count[max_length] + */ + overflow -= 2; + } while (overflow > 0); + + /* Now recompute all bit lengths, scanning in increasing frequency. + * h is still equal to HEAP_SIZE. (It is simpler to reconstruct all + * lengths instead of fixing only the wrong ones. This idea is taken + * from 'ar' written by Haruhiko Okumura.) + */ + for (bits = max_length; bits != 0; bits--) { + n = s->bl_count[bits]; + while (n != 0) { + m = s->heap[--h]; + if (m > max_code) continue; + if ((unsigned) tree[m].Len != (unsigned) bits) { + Trace((stderr,"code %d bits %d->%d\n", m, tree[m].Len, bits)); + s->opt_len += ((long)bits - (long)tree[m].Len) + *(long)tree[m].Freq; + tree[m].Len = (ush)bits; + } + n--; + } + } +} + +/* =========================================================================== + * Generate the codes for a given tree and bit counts (which need not be + * optimal). + * IN assertion: the array bl_count contains the bit length statistics for + * the given tree and the field len is set for all tree elements. + * OUT assertion: the field code is set for all tree elements of non + * zero code length. + */ +local void gen_codes (tree, max_code, bl_count) + ct_data *tree; /* the tree to decorate */ + int max_code; /* largest code with non zero frequency */ + ushf *bl_count; /* number of codes at each bit length */ +{ + ush next_code[MAX_BITS+1]; /* next code value for each bit length */ + ush code = 0; /* running code value */ + int bits; /* bit index */ + int n; /* code index */ + + /* The distribution counts are first used to generate the code values + * without bit reversal. + */ + for (bits = 1; bits <= MAX_BITS; bits++) { + next_code[bits] = code = (code + bl_count[bits-1]) << 1; + } + /* Check that the bit counts in bl_count are consistent. The last code + * must be all ones. + */ + Assert (code + bl_count[MAX_BITS]-1 == (1<dyn_tree; + const ct_data *stree = desc->stat_desc->static_tree; + int elems = desc->stat_desc->elems; + int n, m; /* iterate over heap elements */ + int max_code = -1; /* largest code with non zero frequency */ + int node; /* new node being created */ + + /* Construct the initial heap, with least frequent element in + * heap[SMALLEST]. The sons of heap[n] are heap[2*n] and heap[2*n+1]. + * heap[0] is not used. + */ + s->heap_len = 0, s->heap_max = HEAP_SIZE; + + for (n = 0; n < elems; n++) { + if (tree[n].Freq != 0) { + s->heap[++(s->heap_len)] = max_code = n; + s->depth[n] = 0; + } else { + tree[n].Len = 0; + } + } + + /* The pkzip format requires that at least one distance code exists, + * and that at least one bit should be sent even if there is only one + * possible code. So to avoid special checks later on we force at least + * two codes of non zero frequency. + */ + while (s->heap_len < 2) { + node = s->heap[++(s->heap_len)] = (max_code < 2 ? ++max_code : 0); + tree[node].Freq = 1; + s->depth[node] = 0; + s->opt_len--; if (stree) s->static_len -= stree[node].Len; + /* node is 0 or 1 so it does not have extra bits */ + } + desc->max_code = max_code; + + /* The elements heap[heap_len/2+1 .. heap_len] are leaves of the tree, + * establish sub-heaps of increasing lengths: + */ + for (n = s->heap_len/2; n >= 1; n--) pqdownheap(s, tree, n); + + /* Construct the Huffman tree by repeatedly combining the least two + * frequent nodes. + */ + node = elems; /* next internal node of the tree */ + do { + pqremove(s, tree, n); /* n = node of least frequency */ + m = s->heap[SMALLEST]; /* m = node of next least frequency */ + + s->heap[--(s->heap_max)] = n; /* keep the nodes sorted by frequency */ + s->heap[--(s->heap_max)] = m; + + /* Create a new node father of n and m */ + tree[node].Freq = tree[n].Freq + tree[m].Freq; + s->depth[node] = (uch)((s->depth[n] >= s->depth[m] ? + s->depth[n] : s->depth[m]) + 1); + tree[n].Dad = tree[m].Dad = (ush)node; +#ifdef DUMP_BL_TREE + if (tree == s->bl_tree) { + fprintf(stderr,"\nnode %d(%d), sons %d(%d) %d(%d)", + node, tree[node].Freq, n, tree[n].Freq, m, tree[m].Freq); + } +#endif + /* and insert the new node in the heap */ + s->heap[SMALLEST] = node++; + pqdownheap(s, tree, SMALLEST); + + } while (s->heap_len >= 2); + + s->heap[--(s->heap_max)] = s->heap[SMALLEST]; + + /* At this point, the fields freq and dad are set. We can now + * generate the bit lengths. + */ + gen_bitlen(s, (tree_desc *)desc); + + /* The field len is now set, we can generate the bit codes */ + gen_codes ((ct_data *)tree, max_code, s->bl_count); +} + +/* =========================================================================== + * Scan a literal or distance tree to determine the frequencies of the codes + * in the bit length tree. + */ +local void scan_tree (s, tree, max_code) + deflate_state *s; + ct_data *tree; /* the tree to be scanned */ + int max_code; /* and its largest code of non zero frequency */ +{ + int n; /* iterates over all tree elements */ + int prevlen = -1; /* last emitted length */ + int curlen; /* length of current code */ + int nextlen = tree[0].Len; /* length of next code */ + int count = 0; /* repeat count of the current code */ + int max_count = 7; /* max repeat count */ + int min_count = 4; /* min repeat count */ + + if (nextlen == 0) max_count = 138, min_count = 3; + tree[max_code+1].Len = (ush)0xffff; /* guard */ + + for (n = 0; n <= max_code; n++) { + curlen = nextlen; nextlen = tree[n+1].Len; + if (++count < max_count && curlen == nextlen) { + continue; + } else if (count < min_count) { + s->bl_tree[curlen].Freq += count; + } else if (curlen != 0) { + if (curlen != prevlen) s->bl_tree[curlen].Freq++; + s->bl_tree[REP_3_6].Freq++; + } else if (count <= 10) { + s->bl_tree[REPZ_3_10].Freq++; + } else { + s->bl_tree[REPZ_11_138].Freq++; + } + count = 0; prevlen = curlen; + if (nextlen == 0) { + max_count = 138, min_count = 3; + } else if (curlen == nextlen) { + max_count = 6, min_count = 3; + } else { + max_count = 7, min_count = 4; + } + } +} + +/* =========================================================================== + * Send a literal or distance tree in compressed form, using the codes in + * bl_tree. + */ +local void send_tree (s, tree, max_code) + deflate_state *s; + ct_data *tree; /* the tree to be scanned */ + int max_code; /* and its largest code of non zero frequency */ +{ + int n; /* iterates over all tree elements */ + int prevlen = -1; /* last emitted length */ + int curlen; /* length of current code */ + int nextlen = tree[0].Len; /* length of next code */ + int count = 0; /* repeat count of the current code */ + int max_count = 7; /* max repeat count */ + int min_count = 4; /* min repeat count */ + + /* tree[max_code+1].Len = -1; */ /* guard already set */ + if (nextlen == 0) max_count = 138, min_count = 3; + + for (n = 0; n <= max_code; n++) { + curlen = nextlen; nextlen = tree[n+1].Len; + if (++count < max_count && curlen == nextlen) { + continue; + } else if (count < min_count) { + do { send_code(s, curlen, s->bl_tree); } while (--count != 0); + + } else if (curlen != 0) { + if (curlen != prevlen) { + send_code(s, curlen, s->bl_tree); count--; + } + Assert(count >= 3 && count <= 6, " 3_6?"); + send_code(s, REP_3_6, s->bl_tree); send_bits(s, count-3, 2); + + } else if (count <= 10) { + send_code(s, REPZ_3_10, s->bl_tree); send_bits(s, count-3, 3); + + } else { + send_code(s, REPZ_11_138, s->bl_tree); send_bits(s, count-11, 7); + } + count = 0; prevlen = curlen; + if (nextlen == 0) { + max_count = 138, min_count = 3; + } else if (curlen == nextlen) { + max_count = 6, min_count = 3; + } else { + max_count = 7, min_count = 4; + } + } +} + +/* =========================================================================== + * Construct the Huffman tree for the bit lengths and return the index in + * bl_order of the last bit length code to send. + */ +local int build_bl_tree(s) + deflate_state *s; +{ + int max_blindex; /* index of last bit length code of non zero freq */ + + /* Determine the bit length frequencies for literal and distance trees */ + scan_tree(s, (ct_data *)s->dyn_ltree, s->l_desc.max_code); + scan_tree(s, (ct_data *)s->dyn_dtree, s->d_desc.max_code); + + /* Build the bit length tree: */ + build_tree(s, (tree_desc *)(&(s->bl_desc))); + /* opt_len now includes the length of the tree representations, except + * the lengths of the bit lengths codes and the 5+5+4 bits for the counts. + */ + + /* Determine the number of bit length codes to send. The pkzip format + * requires that at least 4 bit length codes be sent. (appnote.txt says + * 3 but the actual value used is 4.) + */ + for (max_blindex = BL_CODES-1; max_blindex >= 3; max_blindex--) { + if (s->bl_tree[bl_order[max_blindex]].Len != 0) break; + } + /* Update opt_len to include the bit length tree and counts */ + s->opt_len += 3*(max_blindex+1) + 5+5+4; + Tracev((stderr, "\ndyn trees: dyn %ld, stat %ld", + s->opt_len, s->static_len)); + + return max_blindex; +} + +/* =========================================================================== + * Send the header for a block using dynamic Huffman trees: the counts, the + * lengths of the bit length codes, the literal tree and the distance tree. + * IN assertion: lcodes >= 257, dcodes >= 1, blcodes >= 4. + */ +local void send_all_trees(s, lcodes, dcodes, blcodes) + deflate_state *s; + int lcodes, dcodes, blcodes; /* number of codes for each tree */ +{ + int rank; /* index in bl_order */ + + Assert (lcodes >= 257 && dcodes >= 1 && blcodes >= 4, "not enough codes"); + Assert (lcodes <= L_CODES && dcodes <= D_CODES && blcodes <= BL_CODES, + "too many codes"); + Tracev((stderr, "\nbl counts: ")); + send_bits(s, lcodes-257, 5); /* not +255 as stated in appnote.txt */ + send_bits(s, dcodes-1, 5); + send_bits(s, blcodes-4, 4); /* not -3 as stated in appnote.txt */ + for (rank = 0; rank < blcodes; rank++) { + Tracev((stderr, "\nbl code %2d ", bl_order[rank])); + send_bits(s, s->bl_tree[bl_order[rank]].Len, 3); + } + Tracev((stderr, "\nbl tree: sent %ld", s->bits_sent)); + + send_tree(s, (ct_data *)s->dyn_ltree, lcodes-1); /* literal tree */ + Tracev((stderr, "\nlit tree: sent %ld", s->bits_sent)); + + send_tree(s, (ct_data *)s->dyn_dtree, dcodes-1); /* distance tree */ + Tracev((stderr, "\ndist tree: sent %ld", s->bits_sent)); +} + +/* =========================================================================== + * Send a stored block + */ +void _tr_stored_block(s, buf, stored_len, eof) + deflate_state *s; + charf *buf; /* input block */ + ulg stored_len; /* length of input block */ + int eof; /* true if this is the last block for a file */ +{ + send_bits(s, (STORED_BLOCK<<1)+eof, 3); /* send block type */ +#ifdef DEBUG + s->compressed_len = (s->compressed_len + 3 + 7) & (ulg)~7L; + s->compressed_len += (stored_len + 4) << 3; +#endif + copy_block(s, buf, (unsigned)stored_len, 1); /* with header */ +} + +/* =========================================================================== + * Send one empty static block to give enough lookahead for inflate. + * This takes 10 bits, of which 7 may remain in the bit buffer. + * The current inflate code requires 9 bits of lookahead. If the + * last two codes for the previous block (real code plus EOB) were coded + * on 5 bits or less, inflate may have only 5+3 bits of lookahead to decode + * the last real code. In this case we send two empty static blocks instead + * of one. (There are no problems if the previous block is stored or fixed.) + * To simplify the code, we assume the worst case of last real code encoded + * on one bit only. + */ +void _tr_align(s) + deflate_state *s; +{ + send_bits(s, STATIC_TREES<<1, 3); + send_code(s, END_BLOCK, static_ltree); +#ifdef DEBUG + s->compressed_len += 10L; /* 3 for block type, 7 for EOB */ +#endif + bi_flush(s); + /* Of the 10 bits for the empty block, we have already sent + * (10 - bi_valid) bits. The lookahead for the last real code (before + * the EOB of the previous block) was thus at least one plus the length + * of the EOB plus what we have just sent of the empty static block. + */ + if (1 + s->last_eob_len + 10 - s->bi_valid < 9) { + send_bits(s, STATIC_TREES<<1, 3); + send_code(s, END_BLOCK, static_ltree); +#ifdef DEBUG + s->compressed_len += 10L; +#endif + bi_flush(s); + } + s->last_eob_len = 7; +} + +/* =========================================================================== + * Determine the best encoding for the current block: dynamic trees, static + * trees or store, and output the encoded block to the zip file. + */ +void _tr_flush_block(s, buf, stored_len, eof) + deflate_state *s; + charf *buf; /* input block, or NULL if too old */ + ulg stored_len; /* length of input block */ + int eof; /* true if this is the last block for a file */ +{ + ulg opt_lenb, static_lenb; /* opt_len and static_len in bytes */ + int max_blindex = 0; /* index of last bit length code of non zero freq */ + + /* Build the Huffman trees unless a stored block is forced */ + if (s->level > 0) { + + /* Check if the file is binary or text */ + if (stored_len > 0 && s->strm->data_type == Z_UNKNOWN) + set_data_type(s); + + /* Construct the literal and distance trees */ + build_tree(s, (tree_desc *)(&(s->l_desc))); + Tracev((stderr, "\nlit data: dyn %ld, stat %ld", s->opt_len, + s->static_len)); + + build_tree(s, (tree_desc *)(&(s->d_desc))); + Tracev((stderr, "\ndist data: dyn %ld, stat %ld", s->opt_len, + s->static_len)); + /* At this point, opt_len and static_len are the total bit lengths of + * the compressed block data, excluding the tree representations. + */ + + /* Build the bit length tree for the above two trees, and get the index + * in bl_order of the last bit length code to send. + */ + max_blindex = build_bl_tree(s); + + /* Determine the best encoding. Compute the block lengths in bytes. */ + opt_lenb = (s->opt_len+3+7)>>3; + static_lenb = (s->static_len+3+7)>>3; + + Tracev((stderr, "\nopt %lu(%lu) stat %lu(%lu) stored %lu lit %u ", + opt_lenb, s->opt_len, static_lenb, s->static_len, stored_len, + s->last_lit)); + + if (static_lenb <= opt_lenb) opt_lenb = static_lenb; + + } else { + Assert(buf != (char*)0, "lost buf"); + opt_lenb = static_lenb = stored_len + 5; /* force a stored block */ + } + +#ifdef FORCE_STORED + if (buf != (char*)0) { /* force stored block */ +#else + if (stored_len+4 <= opt_lenb && buf != (char*)0) { + /* 4: two words for the lengths */ +#endif + /* The test buf != NULL is only necessary if LIT_BUFSIZE > WSIZE. + * Otherwise we can't have processed more than WSIZE input bytes since + * the last block flush, because compression would have been + * successful. If LIT_BUFSIZE <= WSIZE, it is never too late to + * transform a block into a stored block. + */ + _tr_stored_block(s, buf, stored_len, eof); + +#ifdef FORCE_STATIC + } else if (static_lenb >= 0) { /* force static trees */ +#else + } else if (s->strategy == Z_FIXED || static_lenb == opt_lenb) { +#endif + send_bits(s, (STATIC_TREES<<1)+eof, 3); + compress_block(s, (ct_data *)static_ltree, (ct_data *)static_dtree); +#ifdef DEBUG + s->compressed_len += 3 + s->static_len; +#endif + } else { + send_bits(s, (DYN_TREES<<1)+eof, 3); + send_all_trees(s, s->l_desc.max_code+1, s->d_desc.max_code+1, + max_blindex+1); + compress_block(s, (ct_data *)s->dyn_ltree, (ct_data *)s->dyn_dtree); +#ifdef DEBUG + s->compressed_len += 3 + s->opt_len; +#endif + } + Assert (s->compressed_len == s->bits_sent, "bad compressed size"); + /* The above check is made mod 2^32, for files larger than 512 MB + * and uLong implemented on 32 bits. + */ + init_block(s); + + if (eof) { + bi_windup(s); +#ifdef DEBUG + s->compressed_len += 7; /* align on byte boundary */ +#endif + } + Tracev((stderr,"\ncomprlen %lu(%lu) ", s->compressed_len>>3, + s->compressed_len-7*eof)); +} + +/* =========================================================================== + * Save the match info and tally the frequency counts. Return true if + * the current block must be flushed. + */ +int _tr_tally (s, dist, lc) + deflate_state *s; + unsigned dist; /* distance of matched string */ + unsigned lc; /* match length-MIN_MATCH or unmatched char (if dist==0) */ +{ + s->d_buf[s->last_lit] = (ush)dist; + s->l_buf[s->last_lit++] = (uch)lc; + if (dist == 0) { + /* lc is the unmatched char */ + s->dyn_ltree[lc].Freq++; + } else { + s->matches++; + /* Here, lc is the match length - MIN_MATCH */ + dist--; /* dist = match distance - 1 */ + Assert((ush)dist < (ush)MAX_DIST(s) && + (ush)lc <= (ush)(MAX_MATCH-MIN_MATCH) && + (ush)d_code(dist) < (ush)D_CODES, "_tr_tally: bad match"); + + s->dyn_ltree[_length_code[lc]+LITERALS+1].Freq++; + s->dyn_dtree[d_code(dist)].Freq++; + } + +#ifdef TRUNCATE_BLOCK + /* Try to guess if it is profitable to stop the current block here */ + if ((s->last_lit & 0x1fff) == 0 && s->level > 2) { + /* Compute an upper bound for the compressed length */ + ulg out_length = (ulg)s->last_lit*8L; + ulg in_length = (ulg)((long)s->strstart - s->block_start); + int dcode; + for (dcode = 0; dcode < D_CODES; dcode++) { + out_length += (ulg)s->dyn_dtree[dcode].Freq * + (5L+extra_dbits[dcode]); + } + out_length >>= 3; + Tracev((stderr,"\nlast_lit %u, in %ld, out ~%ld(%ld%%) ", + s->last_lit, in_length, out_length, + 100L - out_length*100L/in_length)); + if (s->matches < s->last_lit/2 && out_length < in_length/2) return 1; + } +#endif + return (s->last_lit == s->lit_bufsize-1); + /* We avoid equality with lit_bufsize because of wraparound at 64K + * on 16 bit machines and because stored blocks are restricted to + * 64K-1 bytes. + */ +} + +/* =========================================================================== + * Send the block data compressed using the given Huffman trees + */ +local void compress_block(s, ltree, dtree) + deflate_state *s; + ct_data *ltree; /* literal tree */ + ct_data *dtree; /* distance tree */ +{ + unsigned dist; /* distance of matched string */ + int lc; /* match length or unmatched char (if dist == 0) */ + unsigned lx = 0; /* running index in l_buf */ + unsigned code; /* the code to send */ + int extra; /* number of extra bits to send */ + + if (s->last_lit != 0) do { + dist = s->d_buf[lx]; + lc = s->l_buf[lx++]; + if (dist == 0) { + send_code(s, lc, ltree); /* send a literal byte */ + Tracecv(isgraph(lc), (stderr," '%c' ", lc)); + } else { + /* Here, lc is the match length - MIN_MATCH */ + code = _length_code[lc]; + send_code(s, code+LITERALS+1, ltree); /* send the length code */ + extra = extra_lbits[code]; + if (extra != 0) { + lc -= base_length[code]; + send_bits(s, lc, extra); /* send the extra length bits */ + } + dist--; /* dist is now the match distance - 1 */ + code = d_code(dist); + Assert (code < D_CODES, "bad d_code"); + + send_code(s, code, dtree); /* send the distance code */ + extra = extra_dbits[code]; + if (extra != 0) { + dist -= base_dist[code]; + send_bits(s, dist, extra); /* send the extra distance bits */ + } + } /* literal or match pair ? */ + + /* Check that the overlay between pending_buf and d_buf+l_buf is ok: */ + Assert((uInt)(s->pending) < s->lit_bufsize + 2*lx, + "pendingBuf overflow"); + + } while (lx < s->last_lit); + + send_code(s, END_BLOCK, ltree); + s->last_eob_len = ltree[END_BLOCK].Len; +} + +/* =========================================================================== + * Set the data type to BINARY or TEXT, using a crude approximation: + * set it to Z_TEXT if all symbols are either printable characters (33 to 255) + * or white spaces (9 to 13, or 32); or set it to Z_BINARY otherwise. + * IN assertion: the fields Freq of dyn_ltree are set. + */ +local void set_data_type(s) + deflate_state *s; +{ + int n; + + for (n = 0; n < 9; n++) + if (s->dyn_ltree[n].Freq != 0) + break; + if (n == 9) + for (n = 14; n < 32; n++) + if (s->dyn_ltree[n].Freq != 0) + break; + s->strm->data_type = (n == 32) ? Z_TEXT : Z_BINARY; +} + +/* =========================================================================== + * Reverse the first len bits of a code, using straightforward code (a faster + * method would use a table) + * IN assertion: 1 <= len <= 15 + */ +local unsigned bi_reverse(code, len) + unsigned code; /* the value to invert */ + int len; /* its bit length */ +{ + register unsigned res = 0; + do { + res |= code & 1; + code >>= 1, res <<= 1; + } while (--len > 0); + return res >> 1; +} + +/* =========================================================================== + * Flush the bit buffer, keeping at most 7 bits in it. + */ +local void bi_flush(s) + deflate_state *s; +{ + if (s->bi_valid == 16) { + put_short(s, s->bi_buf); + s->bi_buf = 0; + s->bi_valid = 0; + } else if (s->bi_valid >= 8) { + put_byte(s, (Byte)s->bi_buf); + s->bi_buf >>= 8; + s->bi_valid -= 8; + } +} + +/* =========================================================================== + * Flush the bit buffer and align the output on a byte boundary + */ +local void bi_windup(s) + deflate_state *s; +{ + if (s->bi_valid > 8) { + put_short(s, s->bi_buf); + } else if (s->bi_valid > 0) { + put_byte(s, (Byte)s->bi_buf); + } + s->bi_buf = 0; + s->bi_valid = 0; +#ifdef DEBUG + s->bits_sent = (s->bits_sent+7) & ~7; +#endif +} + +/* =========================================================================== + * Copy a stored block, storing first the length and its + * one's complement if requested. + */ +local void copy_block(s, buf, len, header) + deflate_state *s; + charf *buf; /* the input data */ + unsigned len; /* its length */ + int header; /* true if block header must be written */ +{ + bi_windup(s); /* align on byte boundary */ + s->last_eob_len = 8; /* enough lookahead for inflate */ + + if (header) { + put_short(s, (ush)len); + put_short(s, (ush)~len); +#ifdef DEBUG + s->bits_sent += 2*16; +#endif + } +#ifdef DEBUG + s->bits_sent += (ulg)len<<3; +#endif + while (len--) { + put_byte(s, *buf++); + } +} diff --git a/libkern/zlib/trees.h b/libkern/zlib/trees.h new file mode 100644 index 000000000..ced99a6e8 --- /dev/null +++ b/libkern/zlib/trees.h @@ -0,0 +1,155 @@ +/* + * Copyright (c) 2008 Apple Inc. All rights reserved. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. The rights granted to you under the License + * may not be used to create, or enable the creation or redistribution of, + * unlawful or unlicensed copies of an Apple operating system, or to + * circumvent, violate, or enable the circumvention or violation of, any + * terms of an Apple operating system software license agreement. + * + * Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ + */ +/* header created automatically with -DGEN_TREES_H */ + +local const ct_data static_ltree[L_CODES+2] = { +{{ 12},{ 8}}, {{140},{ 8}}, {{ 76},{ 8}}, {{204},{ 8}}, {{ 44},{ 8}}, +{{172},{ 8}}, {{108},{ 8}}, {{236},{ 8}}, {{ 28},{ 8}}, {{156},{ 8}}, +{{ 92},{ 8}}, {{220},{ 8}}, {{ 60},{ 8}}, {{188},{ 8}}, {{124},{ 8}}, +{{252},{ 8}}, {{ 2},{ 8}}, {{130},{ 8}}, {{ 66},{ 8}}, {{194},{ 8}}, +{{ 34},{ 8}}, {{162},{ 8}}, {{ 98},{ 8}}, {{226},{ 8}}, {{ 18},{ 8}}, +{{146},{ 8}}, {{ 82},{ 8}}, {{210},{ 8}}, {{ 50},{ 8}}, {{178},{ 8}}, +{{114},{ 8}}, {{242},{ 8}}, {{ 10},{ 8}}, {{138},{ 8}}, {{ 74},{ 8}}, +{{202},{ 8}}, {{ 42},{ 8}}, {{170},{ 8}}, {{106},{ 8}}, {{234},{ 8}}, +{{ 26},{ 8}}, {{154},{ 8}}, {{ 90},{ 8}}, {{218},{ 8}}, {{ 58},{ 8}}, +{{186},{ 8}}, {{122},{ 8}}, {{250},{ 8}}, {{ 6},{ 8}}, {{134},{ 8}}, +{{ 70},{ 8}}, {{198},{ 8}}, {{ 38},{ 8}}, {{166},{ 8}}, {{102},{ 8}}, +{{230},{ 8}}, {{ 22},{ 8}}, {{150},{ 8}}, {{ 86},{ 8}}, {{214},{ 8}}, +{{ 54},{ 8}}, {{182},{ 8}}, {{118},{ 8}}, {{246},{ 8}}, {{ 14},{ 8}}, +{{142},{ 8}}, {{ 78},{ 8}}, {{206},{ 8}}, {{ 46},{ 8}}, {{174},{ 8}}, +{{110},{ 8}}, {{238},{ 8}}, {{ 30},{ 8}}, {{158},{ 8}}, {{ 94},{ 8}}, +{{222},{ 8}}, {{ 62},{ 8}}, {{190},{ 8}}, {{126},{ 8}}, {{254},{ 8}}, +{{ 1},{ 8}}, {{129},{ 8}}, {{ 65},{ 8}}, {{193},{ 8}}, {{ 33},{ 8}}, +{{161},{ 8}}, {{ 97},{ 8}}, {{225},{ 8}}, {{ 17},{ 8}}, {{145},{ 8}}, +{{ 81},{ 8}}, {{209},{ 8}}, {{ 49},{ 8}}, {{177},{ 8}}, {{113},{ 8}}, +{{241},{ 8}}, {{ 9},{ 8}}, {{137},{ 8}}, {{ 73},{ 8}}, {{201},{ 8}}, +{{ 41},{ 8}}, {{169},{ 8}}, {{105},{ 8}}, {{233},{ 8}}, {{ 25},{ 8}}, +{{153},{ 8}}, {{ 89},{ 8}}, {{217},{ 8}}, {{ 57},{ 8}}, {{185},{ 8}}, +{{121},{ 8}}, {{249},{ 8}}, {{ 5},{ 8}}, {{133},{ 8}}, {{ 69},{ 8}}, +{{197},{ 8}}, {{ 37},{ 8}}, {{165},{ 8}}, {{101},{ 8}}, {{229},{ 8}}, +{{ 21},{ 8}}, {{149},{ 8}}, {{ 85},{ 8}}, {{213},{ 8}}, {{ 53},{ 8}}, +{{181},{ 8}}, {{117},{ 8}}, {{245},{ 8}}, {{ 13},{ 8}}, {{141},{ 8}}, +{{ 77},{ 8}}, {{205},{ 8}}, {{ 45},{ 8}}, {{173},{ 8}}, {{109},{ 8}}, +{{237},{ 8}}, {{ 29},{ 8}}, {{157},{ 8}}, {{ 93},{ 8}}, {{221},{ 8}}, +{{ 61},{ 8}}, {{189},{ 8}}, {{125},{ 8}}, {{253},{ 8}}, {{ 19},{ 9}}, +{{275},{ 9}}, {{147},{ 9}}, {{403},{ 9}}, {{ 83},{ 9}}, {{339},{ 9}}, +{{211},{ 9}}, {{467},{ 9}}, {{ 51},{ 9}}, {{307},{ 9}}, {{179},{ 9}}, +{{435},{ 9}}, {{115},{ 9}}, {{371},{ 9}}, {{243},{ 9}}, {{499},{ 9}}, +{{ 11},{ 9}}, {{267},{ 9}}, {{139},{ 9}}, {{395},{ 9}}, {{ 75},{ 9}}, +{{331},{ 9}}, {{203},{ 9}}, {{459},{ 9}}, {{ 43},{ 9}}, {{299},{ 9}}, +{{171},{ 9}}, {{427},{ 9}}, {{107},{ 9}}, {{363},{ 9}}, {{235},{ 9}}, +{{491},{ 9}}, {{ 27},{ 9}}, {{283},{ 9}}, {{155},{ 9}}, {{411},{ 9}}, +{{ 91},{ 9}}, {{347},{ 9}}, {{219},{ 9}}, {{475},{ 9}}, {{ 59},{ 9}}, +{{315},{ 9}}, {{187},{ 9}}, {{443},{ 9}}, {{123},{ 9}}, {{379},{ 9}}, +{{251},{ 9}}, {{507},{ 9}}, {{ 7},{ 9}}, {{263},{ 9}}, {{135},{ 9}}, +{{391},{ 9}}, {{ 71},{ 9}}, {{327},{ 9}}, {{199},{ 9}}, {{455},{ 9}}, +{{ 39},{ 9}}, {{295},{ 9}}, {{167},{ 9}}, {{423},{ 9}}, {{103},{ 9}}, +{{359},{ 9}}, {{231},{ 9}}, {{487},{ 9}}, {{ 23},{ 9}}, {{279},{ 9}}, +{{151},{ 9}}, {{407},{ 9}}, {{ 87},{ 9}}, {{343},{ 9}}, {{215},{ 9}}, +{{471},{ 9}}, {{ 55},{ 9}}, {{311},{ 9}}, {{183},{ 9}}, {{439},{ 9}}, +{{119},{ 9}}, {{375},{ 9}}, {{247},{ 9}}, {{503},{ 9}}, {{ 15},{ 9}}, +{{271},{ 9}}, {{143},{ 9}}, {{399},{ 9}}, {{ 79},{ 9}}, {{335},{ 9}}, +{{207},{ 9}}, {{463},{ 9}}, {{ 47},{ 9}}, {{303},{ 9}}, {{175},{ 9}}, +{{431},{ 9}}, {{111},{ 9}}, {{367},{ 9}}, {{239},{ 9}}, {{495},{ 9}}, +{{ 31},{ 9}}, {{287},{ 9}}, {{159},{ 9}}, {{415},{ 9}}, {{ 95},{ 9}}, +{{351},{ 9}}, {{223},{ 9}}, {{479},{ 9}}, {{ 63},{ 9}}, {{319},{ 9}}, +{{191},{ 9}}, {{447},{ 9}}, {{127},{ 9}}, {{383},{ 9}}, {{255},{ 9}}, +{{511},{ 9}}, {{ 0},{ 7}}, {{ 64},{ 7}}, {{ 32},{ 7}}, {{ 96},{ 7}}, +{{ 16},{ 7}}, {{ 80},{ 7}}, {{ 48},{ 7}}, {{112},{ 7}}, {{ 8},{ 7}}, +{{ 72},{ 7}}, {{ 40},{ 7}}, {{104},{ 7}}, {{ 24},{ 7}}, {{ 88},{ 7}}, +{{ 56},{ 7}}, {{120},{ 7}}, {{ 4},{ 7}}, {{ 68},{ 7}}, {{ 36},{ 7}}, +{{100},{ 7}}, {{ 20},{ 7}}, {{ 84},{ 7}}, {{ 52},{ 7}}, {{116},{ 7}}, +{{ 3},{ 8}}, {{131},{ 8}}, {{ 67},{ 8}}, {{195},{ 8}}, {{ 35},{ 8}}, +{{163},{ 8}}, {{ 99},{ 8}}, {{227},{ 8}} +}; + +local const ct_data static_dtree[D_CODES] = { +{{ 0},{ 5}}, {{16},{ 5}}, {{ 8},{ 5}}, {{24},{ 5}}, {{ 4},{ 5}}, +{{20},{ 5}}, {{12},{ 5}}, {{28},{ 5}}, {{ 2},{ 5}}, {{18},{ 5}}, +{{10},{ 5}}, {{26},{ 5}}, {{ 6},{ 5}}, {{22},{ 5}}, {{14},{ 5}}, +{{30},{ 5}}, {{ 1},{ 5}}, {{17},{ 5}}, {{ 9},{ 5}}, {{25},{ 5}}, +{{ 5},{ 5}}, {{21},{ 5}}, {{13},{ 5}}, {{29},{ 5}}, {{ 3},{ 5}}, +{{19},{ 5}}, {{11},{ 5}}, {{27},{ 5}}, {{ 7},{ 5}}, {{23},{ 5}} +}; + +const uch _dist_code[DIST_CODE_LEN] = { + 0, 1, 2, 3, 4, 4, 5, 5, 6, 6, 6, 6, 7, 7, 7, 7, 8, 8, 8, 8, + 8, 8, 8, 8, 9, 9, 9, 9, 9, 9, 9, 9, 10, 10, 10, 10, 10, 10, 10, 10, +10, 10, 10, 10, 10, 10, 10, 10, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, +11, 11, 11, 11, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, +12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 13, 13, 13, 13, +13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, +13, 13, 13, 13, 13, 13, 13, 13, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, +14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, +14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, +14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 15, 15, 15, 15, 15, 15, 15, 15, +15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, +15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, +15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 0, 0, 16, 17, +18, 18, 19, 19, 20, 20, 20, 20, 21, 21, 21, 21, 22, 22, 22, 22, 22, 22, 22, 22, +23, 23, 23, 23, 23, 23, 23, 23, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, +24, 24, 24, 24, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, +26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, +26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 27, 27, 27, 27, 27, 27, 27, 27, +27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, +27, 27, 27, 27, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, +28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, +28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, +28, 28, 28, 28, 28, 28, 28, 28, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, +29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, +29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, +29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29 +}; + +const uch _length_code[MAX_MATCH-MIN_MATCH+1]= { + 0, 1, 2, 3, 4, 5, 6, 7, 8, 8, 9, 9, 10, 10, 11, 11, 12, 12, 12, 12, +13, 13, 13, 13, 14, 14, 14, 14, 15, 15, 15, 15, 16, 16, 16, 16, 16, 16, 16, 16, +17, 17, 17, 17, 17, 17, 17, 17, 18, 18, 18, 18, 18, 18, 18, 18, 19, 19, 19, 19, +19, 19, 19, 19, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, +21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 22, 22, 22, 22, +22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 23, 23, 23, 23, 23, 23, 23, 23, +23, 23, 23, 23, 23, 23, 23, 23, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, +24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, +25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, +25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 26, 26, 26, 26, 26, 26, 26, 26, +26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, +26, 26, 26, 26, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, +27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 28 +}; + +local const int base_length[LENGTH_CODES] = { +0, 1, 2, 3, 4, 5, 6, 7, 8, 10, 12, 14, 16, 20, 24, 28, 32, 40, 48, 56, +64, 80, 96, 112, 128, 160, 192, 224, 0 +}; + +local const int base_dist[D_CODES] = { + 0, 1, 2, 3, 4, 6, 8, 12, 16, 24, + 32, 48, 64, 96, 128, 192, 256, 384, 512, 768, + 1024, 1536, 2048, 3072, 4096, 6144, 8192, 12288, 16384, 24576 +}; + diff --git a/libkern/zlib/uncompr.c b/libkern/zlib/uncompr.c new file mode 100644 index 000000000..00b0b7e0a --- /dev/null +++ b/libkern/zlib/uncompr.c @@ -0,0 +1,92 @@ +/* + * Copyright (c) 2008 Apple Inc. All rights reserved. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. The rights granted to you under the License + * may not be used to create, or enable the creation or redistribution of, + * unlawful or unlicensed copies of an Apple operating system, or to + * circumvent, violate, or enable the circumvention or violation of, any + * terms of an Apple operating system software license agreement. + * + * Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ + */ +/* uncompr.c -- decompress a memory buffer + * Copyright (C) 1995-2003 Jean-loup Gailly. + * For conditions of distribution and use, see copyright notice in zlib.h + */ + +/* @(#) $Id$ */ + +#define ZLIB_INTERNAL +#if KERNEL + #include +#else + #include "zlib.h" +#endif /* KERNEL */ + +/* =========================================================================== + Decompresses the source buffer into the destination buffer. sourceLen is + the byte length of the source buffer. Upon entry, destLen is the total + size of the destination buffer, which must be large enough to hold the + entire uncompressed data. (The size of the uncompressed data must have + been saved previously by the compressor and transmitted to the decompressor + by some mechanism outside the scope of this compression library.) + Upon exit, destLen is the actual size of the compressed buffer. + This function can be used to decompress a whole file at once if the + input file is mmap'ed. + + uncompress returns Z_OK if success, Z_MEM_ERROR if there was not + enough memory, Z_BUF_ERROR if there was not enough room in the output + buffer, or Z_DATA_ERROR if the input data was corrupted. +*/ +int ZEXPORT uncompress (dest, destLen, source, sourceLen) + Bytef *dest; + uLongf *destLen; + const Bytef *source; + uLong sourceLen; +{ + z_stream stream; + int err; + + stream.next_in = (Bytef*)source; + stream.avail_in = (uInt)sourceLen; + /* Check for source > 64K on 16-bit machine: */ + if ((uLong)stream.avail_in != sourceLen) return Z_BUF_ERROR; + + stream.next_out = dest; + stream.avail_out = (uInt)*destLen; + if ((uLong)stream.avail_out != *destLen) return Z_BUF_ERROR; + + stream.zalloc = (alloc_func)0; + stream.zfree = (free_func)0; + + err = inflateInit(&stream); + if (err != Z_OK) return err; + + err = inflate(&stream, Z_FINISH); + if (err != Z_STREAM_END) { + inflateEnd(&stream); + if (err == Z_NEED_DICT || (err == Z_BUF_ERROR && stream.avail_in == 0)) + return Z_DATA_ERROR; + return err; + } + *destLen = stream.total_out; + + err = inflateEnd(&stream); + return err; +} diff --git a/libkern/zlib/zutil.c b/libkern/zlib/zutil.c new file mode 100644 index 000000000..020291bc1 --- /dev/null +++ b/libkern/zlib/zutil.c @@ -0,0 +1,348 @@ +/* + * Copyright (c) 2008 Apple Inc. All rights reserved. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. The rights granted to you under the License + * may not be used to create, or enable the creation or redistribution of, + * unlawful or unlicensed copies of an Apple operating system, or to + * circumvent, violate, or enable the circumvention or violation of, any + * terms of an Apple operating system software license agreement. + * + * Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ + */ +/* zutil.c -- target dependent utility functions for the compression library + * Copyright (C) 1995-2005 Jean-loup Gailly. + * For conditions of distribution and use, see copyright notice in zlib.h + */ + +/* @(#) $Id$ */ + +#include "zutil.h" + +#ifndef NO_DUMMY_DECL +struct internal_state {int dummy;}; /* for buggy compilers */ +#endif + +const char * const z_errmsg[10] = { +"need dictionary", /* Z_NEED_DICT 2 */ +"stream end", /* Z_STREAM_END 1 */ +"", /* Z_OK 0 */ +"file error", /* Z_ERRNO (-1) */ +"stream error", /* Z_STREAM_ERROR (-2) */ +"data error", /* Z_DATA_ERROR (-3) */ +"insufficient memory", /* Z_MEM_ERROR (-4) */ +"buffer error", /* Z_BUF_ERROR (-5) */ +"incompatible version",/* Z_VERSION_ERROR (-6) */ +""}; + + +const char * ZEXPORT zlibVersion() +{ + return ZLIB_VERSION; +} + +uLong ZEXPORT zlibCompileFlags() +{ + uLong flags; + + flags = 0; + switch (sizeof(uInt)) { + case 2: break; + case 4: flags += 1; break; + case 8: flags += 2; break; + default: flags += 3; + } + switch (sizeof(uLong)) { + case 2: break; + case 4: flags += 1 << 2; break; + case 8: flags += 2 << 2; break; + default: flags += 3 << 2; + } + switch (sizeof(voidpf)) { + case 2: break; + case 4: flags += 1 << 4; break; + case 8: flags += 2 << 4; break; + default: flags += 3 << 4; + } + switch (sizeof(z_off_t)) { + case 2: break; + case 4: flags += 1 << 6; break; + case 8: flags += 2 << 6; break; + default: flags += 3 << 6; + } +#ifdef DEBUG + flags += 1 << 8; +#endif +#if defined(ASMV) || defined(ASMINF) + flags += 1 << 9; +#endif +#ifdef ZLIB_WINAPI + flags += 1 << 10; +#endif +#ifdef BUILDFIXED + flags += 1 << 12; +#endif +#ifdef DYNAMIC_CRC_TABLE + flags += 1 << 13; +#endif +#ifdef NO_GZCOMPRESS + flags += 1L << 16; +#endif +#ifdef NO_GZIP + flags += 1L << 17; +#endif +#ifdef PKZIP_BUG_WORKAROUND + flags += 1L << 20; +#endif +#ifdef FASTEST + flags += 1L << 21; +#endif +#ifdef STDC +# ifdef NO_vsnprintf + flags += 1L << 25; +# ifdef HAS_vsprintf_void + flags += 1L << 26; +# endif +# else +# ifdef HAS_vsnprintf_void + flags += 1L << 26; +# endif +# endif +#else + flags += 1L << 24; +# ifdef NO_snprintf + flags += 1L << 25; +# ifdef HAS_sprintf_void + flags += 1L << 26; +# endif +# else +# ifdef HAS_snprintf_void + flags += 1L << 26; +# endif +# endif +#endif + return flags; +} + +#ifdef DEBUG + +# ifndef verbose +# define verbose 0 +# endif +int z_verbose = verbose; + +void z_error (m) + char *m; +{ + fprintf(stderr, "%s\n", m); + exit(1); +} +#endif + +/* exported to allow conversion of error code to string for compress() and + * uncompress() + */ +const char * ZEXPORT zError(err) + int err; +{ + return ERR_MSG(err); +} + +#if defined(_WIN32_WCE) + /* The Microsoft C Run-Time Library for Windows CE doesn't have + * errno. We define it as a global variable to simplify porting. + * Its value is always 0 and should not be used. + */ + int errno = 0; +#endif + +#ifndef HAVE_MEMCPY + +void zmemcpy(dest, source, len) + Bytef* dest; + const Bytef* source; + uInt len; +{ + if (len == 0) return; + do { + *dest++ = *source++; /* ??? to be unrolled */ + } while (--len != 0); +} + +int zmemcmp(s1, s2, len) + const Bytef* s1; + const Bytef* s2; + uInt len; +{ + uInt j; + + for (j = 0; j < len; j++) { + if (s1[j] != s2[j]) return 2*(s1[j] > s2[j])-1; + } + return 0; +} + +void zmemzero(dest, len) + Bytef* dest; + uInt len; +{ + if (len == 0) return; + do { + *dest++ = 0; /* ??? to be unrolled */ + } while (--len != 0); +} +#endif + +#ifndef NO_ZCFUNCS + +#ifdef SYS16BIT + +#ifdef __TURBOC__ +/* Turbo C in 16-bit mode */ + +# define MY_ZCALLOC + +/* Turbo C malloc() does not allow dynamic allocation of 64K bytes + * and farmalloc(64K) returns a pointer with an offset of 8, so we + * must fix the pointer. Warning: the pointer must be put back to its + * original form in order to free it, use zcfree(). + */ + +#define MAX_PTR 10 +/* 10*64K = 640K */ + +local int next_ptr = 0; + +typedef struct ptr_table_s { + voidpf org_ptr; + voidpf new_ptr; +} ptr_table; + +local ptr_table table[MAX_PTR]; +/* This table is used to remember the original form of pointers + * to large buffers (64K). Such pointers are normalized with a zero offset. + * Since MSDOS is not a preemptive multitasking OS, this table is not + * protected from concurrent access. This hack doesn't work anyway on + * a protected system like OS/2. Use Microsoft C instead. + */ + +voidpf zcalloc (voidpf opaque, unsigned items, unsigned size) +{ + voidpf buf = opaque; /* just to make some compilers happy */ + ulg bsize = (ulg)items*size; + + /* If we allocate less than 65520 bytes, we assume that farmalloc + * will return a usable pointer which doesn't have to be normalized. + */ + if (bsize < 65520L) { + buf = farmalloc(bsize); + if (*(ush*)&buf != 0) return buf; + } else { + buf = farmalloc(bsize + 16L); + } + if (buf == NULL || next_ptr >= MAX_PTR) return NULL; + table[next_ptr].org_ptr = buf; + + /* Normalize the pointer to seg:0 */ + *((ush*)&buf+1) += ((ush)((uch*)buf-0) + 15) >> 4; + *(ush*)&buf = 0; + table[next_ptr++].new_ptr = buf; + return buf; +} + +void zcfree (voidpf opaque, voidpf ptr) +{ + int n; + if (*(ush*)&ptr != 0) { /* object < 64K */ + farfree(ptr); + return; + } + /* Find the original pointer */ + for (n = 0; n < next_ptr; n++) { + if (ptr != table[n].new_ptr) continue; + + farfree(table[n].org_ptr); + while (++n < next_ptr) { + table[n-1] = table[n]; + } + next_ptr--; + return; + } + ptr = opaque; /* just to make some compilers happy */ + Assert(0, "zcfree: ptr not found"); +} + +#endif /* __TURBOC__ */ + + +#ifdef M_I86 +/* Microsoft C in 16-bit mode */ + +# define MY_ZCALLOC + +#if (!defined(_MSC_VER) || (_MSC_VER <= 600)) +# define _halloc halloc +# define _hfree hfree +#endif + +voidpf zcalloc (voidpf opaque, unsigned items, unsigned size) +{ + if (opaque) opaque = 0; /* to make compiler happy */ + return _halloc((long)items, size); +} + +void zcfree (voidpf opaque, voidpf ptr) +{ + if (opaque) opaque = 0; /* to make compiler happy */ + _hfree(ptr); +} + +#endif /* M_I86 */ + +#endif /* SYS16BIT */ + + +#ifndef MY_ZCALLOC /* Any system without a special alloc function */ + +#ifndef STDC +extern voidp malloc OF((uInt size)); +extern voidp calloc OF((uInt items, uInt size)); +extern void free OF((voidpf ptr)); +#endif + +voidpf zcalloc (opaque, items, size) + voidpf opaque; + unsigned items; + unsigned size; +{ + if (opaque) items += size - size; /* make compiler happy */ + return sizeof(uInt) > 2 ? (voidpf)malloc(items * size) : + (voidpf)calloc(items, size); +} + +void zcfree (opaque, ptr) + voidpf opaque; + voidpf ptr; +{ + free(ptr); + if (opaque) return; /* make compiler happy */ +} + +#endif /* MY_ZCALLOC */ + +#endif /* NO_CZFUNCS */ diff --git a/libkern/zlib/zutil.h b/libkern/zlib/zutil.h new file mode 100644 index 000000000..14c2f30d7 --- /dev/null +++ b/libkern/zlib/zutil.h @@ -0,0 +1,313 @@ +/* + * Copyright (c) 2008 Apple Inc. All rights reserved. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. The rights granted to you under the License + * may not be used to create, or enable the creation or redistribution of, + * unlawful or unlicensed copies of an Apple operating system, or to + * circumvent, violate, or enable the circumvention or violation of, any + * terms of an Apple operating system software license agreement. + * + * Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ + */ +/* zutil.h -- internal interface and configuration of the compression library + * Copyright (C) 1995-2005 Jean-loup Gailly. + * For conditions of distribution and use, see copyright notice in zlib.h + */ + +/* WARNING: this file should *not* be used by applications. It is + part of the implementation of the compression library and is + subject to change. Applications should only use zlib.h. + */ + +/* @(#) $Id$ */ + +#ifndef ZUTIL_H +#define ZUTIL_H + +#define ZLIB_INTERNAL + +#if KERNEL +# include +# include +# ifndef NO_ERRNO_H +# define NO_ERRNO_H 1 +# endif +# ifndef NO_ZCFUNCS +# define NO_ZCFUNCS 1 +# endif +#else +# include "zlib.h" +# ifdef STDC +# ifndef _WIN32_WCE +# include +# endif +# include +# include +# endif +#endif /* KERNEL */ + +#ifdef NO_ERRNO_H +# ifdef _WIN32_WCE + /* The Microsoft C Run-Time Library for Windows CE doesn't have + * errno. We define it as a global variable to simplify porting. + * Its value is always 0 and should not be used. We rename it to + * avoid conflict with other libraries that use the same workaround. + */ +# define errno z_errno +# endif + extern int errno; +#else +# ifndef _WIN32_WCE +# include +# endif +#endif + +#ifndef local +# define local static +#endif +/* compile with -Dlocal if your debugger can't find static symbols */ + +typedef unsigned char uch; +typedef uch FAR uchf; +typedef unsigned short ush; +typedef ush FAR ushf; +typedef unsigned long ulg; + +extern const char * const z_errmsg[10]; /* indexed by 2-zlib_error */ +/* (size given to avoid silly warnings with Visual C++) */ + +#define ERR_MSG(err) z_errmsg[Z_NEED_DICT-(err)] + +#define ERR_RETURN(strm,err) \ + return (strm->msg = (char*)ERR_MSG(err), (err)) +/* To be used only when the state is known to be valid */ + + /* common constants */ + +#ifndef DEF_WBITS +# define DEF_WBITS MAX_WBITS +#endif +/* default windowBits for decompression. MAX_WBITS is for compression only */ + +#if MAX_MEM_LEVEL >= 8 +# define DEF_MEM_LEVEL 8 +#else +# define DEF_MEM_LEVEL MAX_MEM_LEVEL +#endif +/* default memLevel */ + +#define STORED_BLOCK 0 +#define STATIC_TREES 1 +#define DYN_TREES 2 +/* The three kinds of block type */ + +#define MIN_MATCH 3 +#define MAX_MATCH 258 +/* The minimum and maximum match lengths */ + +#define PRESET_DICT 0x20 /* preset dictionary flag in zlib header */ + + /* target dependencies */ + +#if defined(MSDOS) || (defined(WINDOWS) && !defined(WIN32)) +# define OS_CODE 0x00 +# if defined(__TURBOC__) || defined(__BORLANDC__) +# if(__STDC__ == 1) && (defined(__LARGE__) || defined(__COMPACT__)) + /* Allow compilation with ANSI keywords only enabled */ + void _Cdecl farfree( void *block ); + void *_Cdecl farmalloc( unsigned long nbytes ); +# else +# include +# endif +# else /* MSC or DJGPP */ +# include +# endif +#endif + +#ifdef AMIGA +# define OS_CODE 0x01 +#endif + +#if defined(VAXC) || defined(VMS) +# define OS_CODE 0x02 +# define F_OPEN(name, mode) \ + fopen((name), (mode), "mbc=60", "ctx=stm", "rfm=fix", "mrs=512") +#endif + +#if defined(ATARI) || defined(atarist) +# define OS_CODE 0x05 +#endif + +#ifdef OS2 +# define OS_CODE 0x06 +# ifdef M_I86 + #include +# endif +#endif + +#if defined(MACOS) || defined(TARGET_OS_MAC) +# define OS_CODE 0x07 +# if defined(__MWERKS__) && __dest_os != __be_os && __dest_os != __win32_os +# include /* for fdopen */ +# else +# ifndef fdopen +# define fdopen(fd,mode) NULL /* No fdopen() */ +# endif +# endif +#endif + +#ifdef TOPS20 +# define OS_CODE 0x0a +#endif + +#ifdef WIN32 +# ifndef __CYGWIN__ /* Cygwin is Unix, not Win32 */ +# define OS_CODE 0x0b +# endif +#endif + +#ifdef __50SERIES /* Prime/PRIMOS */ +# define OS_CODE 0x0f +#endif + +#if defined(_BEOS_) || defined(RISCOS) +# define fdopen(fd,mode) NULL /* No fdopen() */ +#endif + +#if KERNEL + typedef long ptrdiff_t; +#elif (defined(_MSC_VER) && (_MSC_VER > 600)) +# if defined(_WIN32_WCE) +# define fdopen(fd,mode) NULL /* No fdopen() */ +# ifndef _PTRDIFF_T_DEFINED + typedef int ptrdiff_t; +# define _PTRDIFF_T_DEFINED +# endif +# else +# define fdopen(fd,type) _fdopen(fd,type) +# endif +#endif + + + /* common defaults */ + +#ifndef OS_CODE +# define OS_CODE 0x03 /* assume Unix */ +#endif + +#ifndef F_OPEN +# define F_OPEN(name, mode) fopen((name), (mode)) +#endif + + /* functions */ + +#if defined(STDC99) || (defined(__TURBOC__) && __TURBOC__ >= 0x550) +# ifndef HAVE_VSNPRINTF +# define HAVE_VSNPRINTF +# endif +#endif +#if defined(__CYGWIN__) +# ifndef HAVE_VSNPRINTF +# define HAVE_VSNPRINTF +# endif +#endif +#ifndef HAVE_VSNPRINTF +# ifdef MSDOS + /* vsnprintf may exist on some MS-DOS compilers (DJGPP?), + but for now we just assume it doesn't. */ +# define NO_vsnprintf +# endif +# ifdef __TURBOC__ +# define NO_vsnprintf +# endif +# ifdef WIN32 + /* In Win32, vsnprintf is available as the "non-ANSI" _vsnprintf. */ +# if !defined(vsnprintf) && !defined(NO_vsnprintf) +# define vsnprintf _vsnprintf +# endif +# endif +# ifdef __SASC +# define NO_vsnprintf +# endif +#endif +#ifdef VMS +# define NO_vsnprintf +#endif + +#if defined(pyr) +# define NO_MEMCPY +#endif +#if defined(SMALL_MEDIUM) && !defined(_MSC_VER) && !defined(__SC__) + /* Use our own functions for small and medium model with MSC <= 5.0. + * You may have to use the same strategy for Borland C (untested). + * The __SC__ check is for Symantec. + */ +# define NO_MEMCPY +#endif +#if defined(STDC) && !defined(HAVE_MEMCPY) && !defined(NO_MEMCPY) +# define HAVE_MEMCPY +#endif +#ifdef HAVE_MEMCPY +# ifdef SMALL_MEDIUM /* MSDOS small or medium model */ +# define zmemcpy _fmemcpy +# define zmemcmp _fmemcmp +# define zmemzero(dest, len) _fmemset(dest, 0, len) +# else +# define zmemcpy memcpy +# define zmemcmp memcmp +# define zmemzero(dest, len) memset(dest, 0, len) +# endif +#else + extern void zmemcpy OF((Bytef* dest, const Bytef* source, uInt len)); + extern int zmemcmp OF((const Bytef* s1, const Bytef* s2, uInt len)); + extern void zmemzero OF((Bytef* dest, uInt len)); +#endif + +/* Diagnostic functions */ +#ifdef DEBUG +# include + extern int z_verbose; + extern void z_error OF((char *m)); +# define Assert(cond,msg) {if(!(cond)) z_error(msg);} +# define Trace(x) {if (z_verbose>=0) fprintf x ;} +# define Tracev(x) {if (z_verbose>0) fprintf x ;} +# define Tracevv(x) {if (z_verbose>1) fprintf x ;} +# define Tracec(c,x) {if (z_verbose>0 && (c)) fprintf x ;} +# define Tracecv(c,x) {if (z_verbose>1 && (c)) fprintf x ;} +#else +# define Assert(cond,msg) +# define Trace(x) +# define Tracev(x) +# define Tracevv(x) +# define Tracec(c,x) +# define Tracecv(c,x) +#endif + + +#ifndef NO_ZCFUNCS +voidpf zcalloc OF((voidpf opaque, unsigned items, unsigned size)); +void zcfree OF((voidpf opaque, voidpf ptr)); +#endif /* NO_ZCFUNCS */ + +#define ZALLOC(strm, items, size) \ + (*((strm)->zalloc))((strm)->opaque, (items), (size)) +#define ZFREE(strm, addr) (*((strm)->zfree))((strm)->opaque, (voidpf)(addr)) +#define TRY_FREE(s, p) {if (p) ZFREE(s, p);} + +#endif /* ZUTIL_H */ diff --git a/libsa/Makefile b/libsa/Makefile index 8f7a3d4a4..eea21bddd 100644 --- a/libsa/Makefile +++ b/libsa/Makefile @@ -10,12 +10,14 @@ include $(MakeInc_def) INSTINC_SUBDIRS = libsa INSTINC_SUBDIRS_PPC = ${INSTINC_SUBDIRS} INSTINC_SUBDIRS_I386 = ${INSTINC_SUBDIRS} +INSTINC_SUBDIRS_X86_64 = ${INSTINC_SUBDIRS} INSTINC_SUBDIRS_ARM = ${INSTINC_SUBDIRS} EXPINC_SUBDIRS = libsa EXPINC_SUBDIRS_PPC = ${EXPINC_SUBDIRS} EXPINC_SUBDIRS_I386 = ${EXPINC_SUBDIRS} +EXPINC_SUBDIRS_X86_64 = ${EXPINC_SUBDIRS} EXPINC_SUBDIRS_ARM = ${EXPINC_SUBDIRS} diff --git a/libsa/bootstrap.cpp b/libsa/bootstrap.cpp index 7db64a172..3808b953a 100644 --- a/libsa/bootstrap.cpp +++ b/libsa/bootstrap.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -25,79 +25,924 @@ * * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ */ -#include +extern "C" { #include -#include - -#include -#include -#include - -#include "kld_patch.h" - -/***** - * This function is used by IOCatalogue to load a kernel - * extension. libsa initially sets it to be a function - * that uses libkld to load and link the extension from - * within the kernel. Once the root filesystem is up, - * this gets switch to the kmod_load_extension() function, - * which merely queues the extension for loading by the - * kmodload utility. - */ -extern kern_return_t (*kmod_load_function)(char *extension_name); -extern bool (*record_startup_extensions_function)(void); -extern bool (*add_from_mkext_function)(OSData * mkext); -extern void (*remove_startup_extension_function)(const char * name); - -/**** - * IOCatalogue uses this variable to make a few decisions - * about loading and matching drivers. - */ -extern int kernelLinkerPresent; +#include +#include +} + +#include +#include +#include +#include +#include +#include +#include +#include + +#if PRAGMA_MARK +#pragma mark Bootstrap Declarations +#endif +/********************************************************************* +* Bootstrap Declarations +* +* The ENTIRE point of the libsa/KLD segment is to isolate bootstrap +* code from other parts of the kernel, so function symbols are not +* exported; rather pointers to those functions are exported. +* +* xxx - need to think about locking for handling the 'weak' refs. +* xxx - do export a non-KLD function that says you've called a +* xxx - bootstrap function that has been removed. +* +* ALL call-ins to this segment of the kernel must be done through +* exported pointers. The symbols themselves are private and not to +* be linked against. +*********************************************************************/ +extern "C" { + extern void (*record_startup_extensions_function)(void); + extern void (*load_security_extensions_function)(void); +}; +static void bootstrapRecordStartupExtensions(void); +static void bootstrapLoadSecurityExtensions(void); + +#if PRAGMA_MARK +#pragma mark Macros +#endif +/********************************************************************* +* Macros +*********************************************************************/ +#define CONST_STRLEN(str) (sizeof(str) - 1) + +#if PRAGMA_MARK +#pragma mark Kernel Component Kext Identifiers +#endif +/********************************************************************* +* Kernel Component Kext Identifiers +* +* We could have each kernel resource kext automatically "load" as +* it's created, but it's nicer to have them listed in kextstat in +* the order of this list. We'll walk through this after setting up +* all the boot kexts and have them load up. +*********************************************************************/ +static const char * sKernelComponentNames[] = { + // The kexts for these IDs must have a version matching 'osrelease'. + "com.apple.kernel", + "com.apple.kpi.bsd", + "com.apple.kpi.dsep", + "com.apple.kpi.iokit", + "com.apple.kpi.libkern", + "com.apple.kpi.mach", + "com.apple.kpi.private", + "com.apple.kpi.unsupported", + "com.apple.iokit.IONVRAMFamily", + "com.apple.driver.AppleNMI", + "com.apple.iokit.IOSystemManagementFamily", + "com.apple.iokit.ApplePlatformFamily", + +#if defined(__ppc__) || defined(__i386__) || defined(__arm__) + /* These ones are not supported on x86_64 or any newer platforms. + * They must be version 7.9.9; check by "com.apple.kernel.", with + * the trailing period; "com.apple.kernel" always represents the + * current kernel version. + */ + "com.apple.kernel.6.0", + "com.apple.kernel.bsd", + "com.apple.kernel.iokit", + "com.apple.kernel.libkern", + "com.apple.kernel.mach", +#endif + + NULL +}; +#if PRAGMA_MARK +#pragma mark KLDBootstrap Class +#endif +/********************************************************************* +* KLDBootstrap Class +* +* We use a C++ class here so that it can be a friend of OSKext and +* get at private stuff. We can't hide the class itself, but we can +* hide the instance through which we invoke the functions. +*********************************************************************/ class KLDBootstrap { + friend void bootstrapRecordStartupExtensions(void); + friend void bootstrapLoadSecurityExtensions(void); + +private: + void readStartupExtensions(void); + + void readPrelinkedExtensions( + kernel_section_t * prelinkInfoSect); + void readBooterExtensions(void); + OSReturn readMkextExtensions( + OSString * deviceTreeName, + OSData * deviceTreeData); + + OSReturn loadKernelComponentKexts(void); + void readBuiltinPersonalities(void); + + void loadSecurityExtensions(void); + public: - KLDBootstrap(); - ~KLDBootstrap(); + KLDBootstrap(void); + ~KLDBootstrap(void); }; +static KLDBootstrap sBootstrapObject; -static KLDBootstrap bootstrap_obj; +/********************************************************************* +* Set the function pointers for the entry points into the bootstrap +* segment upon C++ static constructor invocation. +*********************************************************************/ +KLDBootstrap::KLDBootstrap(void) +{ + if (this != &sBootstrapObject) { + panic("Attempt to access bootstrap segment."); + } + record_startup_extensions_function = &bootstrapRecordStartupExtensions; + load_security_extensions_function = &bootstrapLoadSecurityExtensions; + OSKext::initialize(); +} + +/********************************************************************* +* Clear the function pointers for the entry points into the bootstrap +* segment upon C++ static destructor invocation. +*********************************************************************/ +KLDBootstrap::~KLDBootstrap(void) +{ + if (this != &sBootstrapObject) { + panic("Attempt to access bootstrap segment."); + } + record_startup_extensions_function = 0; + load_security_extensions_function = 0; +} +/********************************************************************* +*********************************************************************/ +void +KLDBootstrap::readStartupExtensions(void) +{ + kernel_section_t * prelinkInfoSect = NULL; // do not free -/* The constructor creates a lock and puts entries into a dispatch - * table for functions used to record and load kmods. - */ -KLDBootstrap::KLDBootstrap() { + OSKextLog(/* kext */ NULL, + kOSKextLogProgressLevel | + kOSKextLogGeneralFlag | kOSKextLogDirectoryScanFlag | + kOSKextLogKextBookkeepingFlag, + "Reading startup extensions."); + + /* If the prelink info segment has a nonzero size, we are prelinked + * and won't have any individual kexts or mkexts to read. + * Otherwise, we need to read kexts or the mkext from what the booter + * has handed us. + */ + prelinkInfoSect = getsectbyname(kPrelinkInfoSegment, kPrelinkInfoSection); + if (prelinkInfoSect->size) { + readPrelinkedExtensions(prelinkInfoSect); + } else { + readBooterExtensions(); + } + + loadKernelComponentKexts(); + readBuiltinPersonalities(); + OSKext::sendAllKextPersonalitiesToCatalog(); + + return; +} + +/********************************************************************* +*********************************************************************/ +void +KLDBootstrap::readPrelinkedExtensions( + kernel_section_t * prelinkInfoSect) +{ + OSArray * infoDictArray = NULL; // do not release + OSArray * personalitiesArray = NULL; // do not release + OSObject * parsedXML = NULL; // must release + OSDictionary * prelinkInfoDict = NULL; // do not release + OSString * errorString = NULL; // must release + OSKext * theKernel = NULL; // must release + +#if CONFIG_KXLD + kernel_section_t * kernelLinkStateSection = NULL; // see code +#endif + kernel_segment_command_t * prelinkLinkStateSegment = NULL; // see code + kernel_segment_command_t * prelinkTextSegment = NULL; // see code + kernel_segment_command_t * prelinkInfoSegment = NULL; // see code + + /* We make some copies of data, but if anything fails we're basically + * going to fail the boot, so these won't be cleaned up on error. + */ + void * prelinkData = NULL; // see code + void * prelinkCopy = NULL; // see code + vm_size_t prelinkLength = 0; +#if !__LP64__ && !defined(__arm__) + vm_map_offset_t prelinkDataMapOffset = 0; +#endif + + kern_return_t mem_result = KERN_SUCCESS; + + OSDictionary * infoDict = NULL; // do not release + + IORegistryEntry * registryRoot = NULL; // do not release + OSNumber * prelinkCountObj = NULL; // must release + + u_int i = 0; + + OSKextLog(/* kext */ NULL, + kOSKextLogProgressLevel | + kOSKextLogDirectoryScanFlag | kOSKextLogArchiveFlag, + "Starting from prelinked kernel."); + + /***** + * Wrap the kernel link state in-place in an OSData. + * This is unnecessary (and the link state may not be present) if the kernel + * does not have kxld support because this information is only used for + * runtime linking. + */ +#if CONFIG_KXLD + kernelLinkStateSection = getsectbyname(kPrelinkLinkStateSegment, + kPrelinkKernelLinkStateSection); + if (!kernelLinkStateSection) { + OSKextLog(/* kext */ NULL, + kOSKextLogErrorLevel | + kOSKextLogArchiveFlag, + "Can't find prelinked kernel link state."); + goto finish; + } - malloc_init(); + theKernel = OSKext::lookupKextWithIdentifier(kOSKextKernelIdentifier); + if (!theKernel) { + OSKextLog(/* kext */ NULL, + kOSKextLogErrorLevel | + kOSKextLogArchiveFlag, + "Can't find kernel kext object in prelinked kernel."); + goto finish; + } - kmod_load_function = &load_kernel_extension; + prelinkData = (void *) kernelLinkStateSection->addr; + prelinkLength = kernelLinkStateSection->size; - record_startup_extensions_function = &recordStartupExtensions; - add_from_mkext_function = &addExtensionsFromArchive; - remove_startup_extension_function = &removeStartupExtension; + mem_result = kmem_alloc_pageable(kernel_map, + (vm_offset_t *) &prelinkCopy, prelinkLength); + if (mem_result != KERN_SUCCESS) { + OSKextLog(/* kext */ NULL, + kOSKextLogErrorLevel | + kOSKextLogGeneralFlag | kOSKextLogArchiveFlag, + "Can't copy prelinked kernel link state."); + goto finish; + } + memcpy(prelinkCopy, prelinkData, prelinkLength); -#ifndef CONFIG_NOKLD - kernelLinkerPresent = 1; + theKernel->linkState = OSData::withBytesNoCopy(prelinkCopy, prelinkLength); + if (!theKernel->linkState) { + OSKextLog(/* kext */ NULL, + kOSKextLogErrorLevel | + kOSKextLogGeneralFlag | kOSKextLogArchiveFlag, + "Can't create prelinked kernel link state wrapper."); + goto finish; + } + theKernel->linkState->setDeallocFunction(osdata_kmem_free); #endif + + prelinkTextSegment = getsegbyname(kPrelinkTextSegment); + if (!prelinkTextSegment) { + OSKextLog(/* kext */ NULL, + kOSKextLogErrorLevel | + kOSKextLogDirectoryScanFlag | kOSKextLogArchiveFlag, + "Can't find prelinked kexts' text segment."); + goto finish; + } + + prelinkData = (void *) prelinkTextSegment->vmaddr; + prelinkLength = prelinkTextSegment->vmsize; + +#if !__LP64__ + /* To enable paging and write/execute protections on the kext + * executables, we need to copy them out of the booter-created + * memory, reallocate that space with VM, then prelinkCopy them back in. + * This isn't necessary on LP64 because kexts have their own VM + * region on that architecture model. + */ + + mem_result = kmem_alloc(kernel_map, (vm_offset_t *)&prelinkCopy, + prelinkLength); + if (mem_result != KERN_SUCCESS) { + OSKextLog(/* kext */ NULL, + kOSKextLogErrorLevel | + kOSKextLogGeneralFlag | kOSKextLogArchiveFlag, + "Can't copy prelinked kexts' text for VM reassign."); + goto finish; + } + + /* Copy it out. + */ + memcpy(prelinkCopy, prelinkData, prelinkLength); + + /* Dump the booter memory. + */ + ml_static_mfree((vm_offset_t)prelinkData, prelinkLength); + + /* Set up the VM region. + */ + prelinkDataMapOffset = (vm_map_offset_t)(uintptr_t)prelinkData; + mem_result = vm_map_enter_mem_object( + kernel_map, + &prelinkDataMapOffset, + prelinkLength, /* mask */ 0, + VM_FLAGS_FIXED | VM_FLAGS_OVERWRITE, + (ipc_port_t)NULL, + (vm_object_offset_t) 0, + /* copy */ FALSE, + /* cur_protection */ VM_PROT_ALL, + /* max_protection */ VM_PROT_ALL, + /* inheritance */ VM_INHERIT_DEFAULT); + if ((mem_result != KERN_SUCCESS) || + (prelinkTextSegment->vmaddr != prelinkDataMapOffset)) + { + OSKextLog(/* kext */ NULL, + kOSKextLogErrorLevel | + kOSKextLogGeneralFlag | kOSKextLogArchiveFlag, + "Can't create kexts' text VM entry at 0x%llx, length 0x%x (error 0x%x).", + (unsigned long long) prelinkDataMapOffset, prelinkLength, mem_result); + goto finish; + } + prelinkData = (void *)(uintptr_t)prelinkDataMapOffset; + + /* And copy it back. + */ + memcpy(prelinkData, prelinkCopy, prelinkLength); + + kmem_free(kernel_map, (vm_offset_t)prelinkCopy, prelinkLength); +#endif /* !__LP64__ */ + + /* Unserialize the info dictionary from the prelink info section. + */ + parsedXML = OSUnserializeXML((const char *)prelinkInfoSect->addr, + &errorString); + if (parsedXML) { + prelinkInfoDict = OSDynamicCast(OSDictionary, parsedXML); + } + if (!prelinkInfoDict) { + const char * errorCString = "(unknown error)"; + + if (errorString && errorString->getCStringNoCopy()) { + errorCString = errorString->getCStringNoCopy(); + } else if (parsedXML) { + errorCString = "not a dictionary"; + } + OSKextLog(/* kext */ NULL, kOSKextLogErrorLevel | kOSKextLogArchiveFlag, + "Error unserializing prelink plist: %s.", errorCString); + goto finish; + } + + infoDictArray = OSDynamicCast(OSArray, + prelinkInfoDict->getObject(kPrelinkInfoDictionaryKey)); + if (!infoDictArray) { + OSKextLog(/* kext */ NULL, kOSKextLogErrorLevel | kOSKextLogArchiveFlag, + "The prelinked kernel has no kext info dictionaries"); + goto finish; + } + + /* Create OSKext objects for each info dictionary. + */ + for (i = 0; i < infoDictArray->getCount(); ++i) { + infoDict = OSDynamicCast(OSDictionary, infoDictArray->getObject(i)); + if (!infoDict) { + OSKextLog(/* kext */ NULL, + kOSKextLogErrorLevel | + kOSKextLogDirectoryScanFlag | kOSKextLogArchiveFlag, + "Can't find info dictionary for prelinked kext #%d.", i); + continue; + } + + /* Create the kext for the entry, then release it, because the + * kext system keeps them around until explicitly removed. + * Any creation/registration failures are already logged for us. + */ + OSKext * newKext = OSKext::withPrelinkedInfoDict(infoDict); + OSSafeReleaseNULL(newKext); + } + + /* Get all of the personalities for kexts that were not prelinked and + * add them to the catalogue. + */ + personalitiesArray = OSDynamicCast(OSArray, + prelinkInfoDict->getObject(kPrelinkPersonalitiesKey)); + if (!personalitiesArray) { + OSKextLog(/* kext */ NULL, kOSKextLogErrorLevel | kOSKextLogArchiveFlag, + "The prelinked kernel has no personalities array"); + goto finish; + } + + if (personalitiesArray->getCount()) { + OSKext::setPrelinkedPersonalities(personalitiesArray); + } + + /* Store the number of prelinked kexts in the registry so we can tell + * when the system has been started from a prelinked kernel. + */ + registryRoot = IORegistryEntry::getRegistryRoot(); + assert(registryRoot); + + prelinkCountObj = OSNumber::withNumber( + (unsigned long long)infoDictArray->getCount(), + 8 * sizeof(uint32_t)); + assert(prelinkCountObj); + if (prelinkCountObj) { + registryRoot->setProperty(kOSPrelinkKextCountKey, prelinkCountObj); + } + + OSSafeReleaseNULL(prelinkCountObj); + prelinkCountObj = OSNumber::withNumber( + (unsigned long long)personalitiesArray->getCount(), + 8 * sizeof(uint32_t)); + assert(prelinkCountObj); + if (prelinkCountObj) { + registryRoot->setProperty(kOSPrelinkPersonalityCountKey, prelinkCountObj); + } + + OSKextLog(/* kext */ NULL, + kOSKextLogProgressLevel | + kOSKextLogGeneralFlag | kOSKextLogKextBookkeepingFlag | + kOSKextLogDirectoryScanFlag | kOSKextLogArchiveFlag, + "%u prelinked kexts, and %u additional personalities.", + infoDictArray->getCount(), personalitiesArray->getCount()); + +#if __LP64__ + /* On LP64 systems, kexts are copied to their own special VM region + * during OSKext init time, so we can free the whole segment now. + */ + ml_static_mfree((vm_offset_t) prelinkData, prelinkLength); +#endif /* __LP64__ */ + + /* Free the link state segment, kexts have copied out what they need. + */ + prelinkLinkStateSegment = getsegbyname(kPrelinkLinkStateSegment); + if (prelinkLinkStateSegment) { + ml_static_mfree((vm_offset_t)prelinkLinkStateSegment->vmaddr, + (vm_size_t)prelinkLinkStateSegment->vmsize); + } + + /* Free the prelink info segment, we're done with it. + */ + prelinkInfoSegment = getsegbyname(kPrelinkInfoSegment); + if (prelinkInfoSegment) { + ml_static_mfree((vm_offset_t)prelinkInfoSegment->vmaddr, + (vm_size_t)prelinkInfoSegment->vmsize); + } + +finish: + OSSafeRelease(errorString); + OSSafeRelease(parsedXML); + OSSafeRelease(theKernel); + OSSafeRelease(prelinkCountObj); + return; } -/* The destructor frees all wired memory regions held - * by libsa's malloc package and disposes of the lock. - */ -KLDBootstrap::~KLDBootstrap() { +/********************************************************************* +*********************************************************************/ +#define BOOTER_KEXT_PREFIX "Driver-" +#define BOOTER_MKEXT_PREFIX "DriversPackage-" + +typedef struct _DeviceTreeBuffer { + uint32_t paddr; + uint32_t length; +} _DeviceTreeBuffer; + +void +KLDBootstrap::readBooterExtensions(void) +{ + IORegistryEntry * booterMemoryMap = NULL; // must release + OSDictionary * propertyDict = NULL; // must release + OSCollectionIterator * keyIterator = NULL; // must release + OSString * deviceTreeName = NULL; // do not release + + const _DeviceTreeBuffer * deviceTreeBuffer = NULL; // do not free + char * booterDataPtr = NULL; // do not free + OSData * booterData = NULL; // must release + + OSKext * aKext = NULL; // must release + + OSKextLog(/* kext */ NULL, + kOSKextLogProgressLevel | + kOSKextLogDirectoryScanFlag | kOSKextLogKextBookkeepingFlag, + "Reading startup extensions/mkexts from booter memory."); + + booterMemoryMap = IORegistryEntry::fromPath( "/chosen/memory-map", gIODTPlane); + + if (!booterMemoryMap) { + OSKextLog(/* kext */ NULL, + kOSKextLogErrorLevel | + kOSKextLogGeneralFlag | kOSKextLogDirectoryScanFlag, + "Can't read booter memory map."); + goto finish; + } + + propertyDict = booterMemoryMap->dictionaryWithProperties(); + if (!propertyDict) { + OSKextLog(/* kext */ NULL, + kOSKextLogErrorLevel | + kOSKextLogDirectoryScanFlag, + "Can't get property dictionary from memory map."); + goto finish; + } + + keyIterator = OSCollectionIterator::withCollection(propertyDict); + if (!keyIterator) { + OSKextLog(/* kext */ NULL, + kOSKextLogErrorLevel | + kOSKextLogGeneralFlag, + "Can't allocate iterator for driver images."); + goto finish; + } + + while ( ( deviceTreeName = + OSDynamicCast(OSString, keyIterator->getNextObject() ))) { + + boolean_t isMkext = FALSE; + const char * devTreeNameCString = deviceTreeName->getCStringNoCopy(); + OSData * deviceTreeEntry = OSDynamicCast(OSData, + propertyDict->getObject(deviceTreeName)); + + /* Clear out the booterData from the prior iteration. + */ + OSSafeReleaseNULL(booterData); + + /* If there is no entry for the name, we can't do much with it. */ + if (!deviceTreeEntry) { + continue; + } + + /* Make sure it is either a kext or an mkext */ + if (!strncmp(devTreeNameCString, BOOTER_KEXT_PREFIX, + CONST_STRLEN(BOOTER_KEXT_PREFIX))) { + + isMkext = FALSE; + + } else if (!strncmp(devTreeNameCString, BOOTER_MKEXT_PREFIX, + CONST_STRLEN(BOOTER_MKEXT_PREFIX))) { + + isMkext = TRUE; + + } else { + continue; + } + + deviceTreeBuffer = (const _DeviceTreeBuffer *) + deviceTreeEntry->getBytesNoCopy(0, sizeof(deviceTreeBuffer)); + if (!deviceTreeBuffer) { + /* We can't get to the data, so we can't do anything, + * not even free it from physical memory (if it's there). + */ + OSKextLog(/* kext */ NULL, + kOSKextLogErrorLevel | + kOSKextLogDirectoryScanFlag, + "Device tree entry %s has NULL pointer.", + devTreeNameCString); + goto finish; // xxx - continue, panic? + } + + booterDataPtr = (char *)ml_static_ptovirt(deviceTreeBuffer->paddr); + if (!booterDataPtr) { + OSKextLog(/* kext */ NULL, + kOSKextLogErrorLevel | + kOSKextLogDirectoryScanFlag, + "Can't get virtual address for device tree mkext entry %s.", + devTreeNameCString); + goto finish; + } + + /* Wrap the booter data buffer in an OSData and set a dealloc function + * so it will take care of the physical memory when freed. Kexts will + * retain the booterData for as long as they need it. Remove the entry + * from the booter memory map after this is done. + */ + booterData = OSData::withBytesNoCopy(booterDataPtr, + deviceTreeBuffer->length); + if (!booterData) { + OSKextLog(/* kext */ NULL, + kOSKextLogErrorLevel | + kOSKextLogGeneralFlag, + "Error - Can't allocate OSData wrapper for device tree entry %s.", + devTreeNameCString); + goto finish; + } + booterData->setDeallocFunction(osdata_phys_free); + + if (isMkext) { + readMkextExtensions(deviceTreeName, booterData); + } else { + /* Create the kext for the entry, then release it, because the + * kext system keeps them around until explicitly removed. + * Any creation/registration failures are already logged for us. + */ + OSKext * newKext = OSKext::withBooterData(deviceTreeName, booterData); + OSSafeRelease(newKext); + } + + booterMemoryMap->removeProperty(deviceTreeName); + + } /* while ( (deviceTreeName = OSDynamicCast(OSString, ...) ) ) */ + +finish: + + OSSafeRelease(booterMemoryMap); + OSSafeRelease(propertyDict); + OSSafeRelease(keyIterator); + OSSafeRelease(booterData); + OSSafeRelease(aKext); + return; +} + +/********************************************************************* +*********************************************************************/ +OSReturn +KLDBootstrap::readMkextExtensions( + OSString * deviceTreeName, + OSData * booterData) +{ + OSReturn result = kOSReturnError; - kld_file_cleanup_all_resources(); + uint32_t checksum; + IORegistryEntry * registryRoot = NULL; // do not release + OSData * checksumObj = NULL; // must release - /* Dump all device-tree entries for boot drivers, and all - * info on startup extensions. The IOCatalogue will now - * get personalities from kextd. + OSKextLog(/* kext */ NULL, + kOSKextLogStepLevel | + kOSKextLogDirectoryScanFlag | kOSKextLogArchiveFlag, + "Reading startup mkext archive from device tree entry %s.", + deviceTreeName->getCStringNoCopy()); + + /* If we successfully read the archive, + * then save the mkext's checksum in the IORegistry. + * assumes we'll only ever have one mkext to boot */ - clearStartupExtensionsAndLoaderInfo(); + result = OSKext::readMkextArchive(booterData, &checksum); + if (result == kOSReturnSuccess) { + + OSKextLog(/* kext */ NULL, + kOSKextLogProgressLevel | + kOSKextLogArchiveFlag, + "Startup mkext archive has checksum 0x%x.", (int)checksum); + + registryRoot = IORegistryEntry::getRegistryRoot(); + assert(registryRoot); + checksumObj = OSData::withBytes((void *)&checksum, sizeof(checksum)); + assert(checksumObj); + if (checksumObj) { + registryRoot->setProperty(kOSStartupMkextCRC, checksumObj); + } + } + + return result; +} + +/********************************************************************* +*********************************************************************/ +#define COM_APPLE "com.apple." + +void +KLDBootstrap::loadSecurityExtensions(void) +{ + OSDictionary * extensionsDict = NULL; // must release + OSCollectionIterator * keyIterator = NULL; // must release + OSString * bundleID = NULL; // don't release + OSKext * theKext = NULL; // don't release + OSBoolean * isSecurityKext = NULL; // don't release + + OSKextLog(/* kext */ NULL, + kOSKextLogStepLevel | + kOSKextLogLoadFlag, + "Loading security extensions."); + + extensionsDict = OSKext::copyKexts(); + if (!extensionsDict) { + return; + } + + keyIterator = OSCollectionIterator::withCollection(extensionsDict); + if (!keyIterator) { + OSKextLog(/* kext */ NULL, + kOSKextLogErrorLevel | + kOSKextLogGeneralFlag, + "Failed to allocate iterator for security extensions."); + goto finish; + } + + while ((bundleID = OSDynamicCast(OSString, keyIterator->getNextObject()))) { + + const char * bundle_id = bundleID->getCStringNoCopy(); + + /* Skip extensions whose bundle IDs don't start with "com.apple.". + */ + if (!bundle_id || + (strncmp(bundle_id, COM_APPLE, CONST_STRLEN(COM_APPLE)) != 0)) { + + continue; + } + + theKext = OSDynamicCast(OSKext, extensionsDict->getObject(bundleID)); + if (!theKext) { + continue; + } + + isSecurityKext = OSDynamicCast(OSBoolean, + theKext->getPropertyForHostArch("AppleSecurityExtension")); + if (isSecurityKext && isSecurityKext->isTrue()) { + OSKextLog(/* kext */ NULL, + kOSKextLogStepLevel | + kOSKextLogLoadFlag, + "Loading security extension %s.", bundleID->getCStringNoCopy()); + OSKext::loadKextWithIdentifier(bundleID->getCStringNoCopy(), + /* allowDefer */ false); + } + } + +finish: + OSSafeRelease(keyIterator); + OSSafeRelease(extensionsDict); + + return; +} + +/********************************************************************* +* We used to require that all listed kernel components load, but +* nowadays we can get them from userland so we only try to load the +* ones we have. If an error occurs later, such is life. +* +* Note that we look the kexts up first, so we can avoid spurious +* (in this context, anyhow) log messages about kexts not being found. +* +* xxx - do we even need to do this any more? Check if the kernel +* xxx - compoonents just load in the regular paths +*********************************************************************/ +OSReturn +KLDBootstrap::loadKernelComponentKexts(void) +{ + OSReturn result = kOSReturnSuccess; // optimistic + OSKext * theKext = NULL; // must release + const char ** kextIDPtr = NULL; // do not release + + for (kextIDPtr = &sKernelComponentNames[0]; *kextIDPtr; kextIDPtr++) { + + OSSafeReleaseNULL(theKext); + theKext = OSKext::lookupKextWithIdentifier(*kextIDPtr); + + if (theKext) { + if (kOSReturnSuccess != OSKext::loadKextWithIdentifier( + *kextIDPtr, /* allowDefer */ false)) { + + // xxx - check KextBookkeeping, might be redundant + OSKextLog(/* kext */ NULL, + kOSKextLogErrorLevel | + kOSKextLogDirectoryScanFlag | kOSKextLogKextBookkeepingFlag, + "Failed to initialize kernel component %s.", *kextIDPtr); + result = kOSReturnError; + } + } + } + + OSSafeRelease(theKext); + return result; +} + +/********************************************************************* + *********************************************************************/ +void +KLDBootstrap::readBuiltinPersonalities(void) +{ + OSObject * parsedXML = NULL; // must release + OSArray * builtinExtensions = NULL; // do not release + OSArray * allPersonalities = NULL; // must release + OSString * errorString = NULL; // must release + kernel_section_t * infosect = NULL; // do not free + OSCollectionIterator * personalitiesIterator = NULL; // must release + unsigned int count, i; + + OSKextLog(/* kext */ NULL, + kOSKextLogStepLevel | + kOSKextLogLoadFlag, + "Reading built-in kernel personalities for I/O Kit drivers."); - /* Free all temporary malloc memory. + /* Look in the __BUILTIN __info segment for an array of Info.plist + * entries. For each one, extract the personalities dictionary, add + * it to our array, then push them all (without matching) to + * the IOCatalogue. This can be used to augment the personalities + * in gIOKernelConfigTables, especially when linking entire kexts into + * the mach_kernel image. */ - malloc_reset(); + infosect = getsectbyname("__BUILTIN", "__info"); + if (!infosect) { + // this isn't fatal + goto finish; + } + + parsedXML = OSUnserializeXML((const char *) (uintptr_t)infosect->addr, + &errorString); + if (parsedXML) { + builtinExtensions = OSDynamicCast(OSArray, parsedXML); + } + if (!builtinExtensions) { + const char * errorCString = "(unknown error)"; + + if (errorString && errorString->getCStringNoCopy()) { + errorCString = errorString->getCStringNoCopy(); + } else if (parsedXML) { + errorCString = "not an array"; + } + OSKextLog(/* kext */ NULL, + kOSKextLogErrorLevel | + kOSKextLogLoadFlag, + "Error unserializing built-in personalities: %s.", errorCString); + goto finish; + } + + // estimate 3 personalities per Info.plist/kext + count = builtinExtensions->getCount(); + allPersonalities = OSArray::withCapacity(count * 3); + + for (i = 0; i < count; i++) { + OSDictionary * infoDict = NULL; // do not release + OSString * moduleName = NULL; // do not release + OSDictionary * personalities; // do not release + OSString * personalityName; // do not release + + OSSafeReleaseNULL(personalitiesIterator); + + infoDict = OSDynamicCast(OSDictionary, + builtinExtensions->getObject(i)); + if (!infoDict) { + continue; + } + + moduleName = OSDynamicCast(OSString, + infoDict->getObject(kCFBundleIdentifierKey)); + if (!moduleName) { + continue; + } + + OSKextLog(/* kext */ NULL, + kOSKextLogStepLevel | + kOSKextLogLoadFlag, + "Adding personalities for built-in driver %s:", + moduleName->getCStringNoCopy()); + + personalities = OSDynamicCast(OSDictionary, + infoDict->getObject("IOKitPersonalities")); + if (!personalities) { + continue; + } + + personalitiesIterator = OSCollectionIterator::withCollection(personalities); + if (!personalitiesIterator) { + continue; // xxx - well really, what can we do? should we panic? + } + + while ((personalityName = OSDynamicCast(OSString, + personalitiesIterator->getNextObject()))) { + + OSDictionary * personality = OSDynamicCast(OSDictionary, + personalities->getObject(personalityName)); + + OSKextLog(/* kext */ NULL, + kOSKextLogDetailLevel | + kOSKextLogLoadFlag, + "Adding built-in driver personality %s.", + personalityName->getCStringNoCopy()); + + if (personality && !personality->getObject(kCFBundleIdentifierKey)) { + personality->setObject(kCFBundleIdentifierKey, moduleName); + } + allPersonalities->setObject(personality); + } + } + + gIOCatalogue->addDrivers(allPersonalities, false); + +finish: + OSSafeRelease(parsedXML); + OSSafeRelease(allPersonalities); + OSSafeRelease(errorString); + OSSafeRelease(personalitiesIterator); + return; +} + +#if PRAGMA_MARK +#pragma mark Bootstrap Functions +#endif +/********************************************************************* +* Bootstrap Functions +*********************************************************************/ +static void bootstrapRecordStartupExtensions(void) +{ + sBootstrapObject.readStartupExtensions(); + return; +} + +static void bootstrapLoadSecurityExtensions(void) +{ + sBootstrapObject.loadSecurityExtensions(); + return; } diff --git a/libsa/bsearch.c b/libsa/bsearch.c deleted file mode 100644 index 03b81228d..000000000 --- a/libsa/bsearch.c +++ /dev/null @@ -1,107 +0,0 @@ -/* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ - * - * This file contains Original Code and/or Modifications of Original Code - * as defined in and that are subject to the Apple Public Source License - * Version 2.0 (the 'License'). You may not use this file except in - * compliance with the License. The rights granted to you under the License - * may not be used to create, or enable the creation or redistribution of, - * unlawful or unlicensed copies of an Apple operating system, or to - * circumvent, violate, or enable the circumvention or violation of, any - * terms of an Apple operating system software license agreement. - * - * Please obtain a copy of the License at - * http://www.opensource.apple.com/apsl/ and read it before using this file. - * - * The Original Code and all software distributed under the License are - * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER - * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, - * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. - * Please see the License for the specific language governing rights and - * limitations under the License. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ - */ -/* - * Copyright (c) 1990, 1993 - * The Regents of the University of California. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. All advertising materials mentioning features or use of this software - * must display the following acknowledgement: - * This product includes software developed by the University of - * California, Berkeley and its contributors. - * 4. Neither the name of the University nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - */ - -#if defined(LIBC_SCCS) && !defined(lint) -static char sccsid[] = "@(#)bsearch.c 8.1 (Berkeley) 6/4/93"; -#endif /* LIBC_SCCS and not lint */ - -#include - -/* - * Perform a binary search. - * - * The code below is a bit sneaky. After a comparison fails, we - * divide the work in half by moving either left or right. If lim - * is odd, moving left simply involves halving lim: e.g., when lim - * is 5 we look at item 2, so we change lim to 2 so that we will - * look at items 0 & 1. If lim is even, the same applies. If lim - * is odd, moving right again involes halving lim, this time moving - * the base up one item past p: e.g., when lim is 5 we change base - * to item 3 and make lim 2 so that we will look at items 3 and 4. - * If lim is even, however, we have to shrink it by one before - * halving: e.g., when lim is 4, we still looked at item 2, so we - * have to make lim 3, then halve, obtaining 1, so that we will only - * look at item 3. - */ -__private_extern__ -const void * bsearch( - register const void *key, - const void *base0, - size_t nmemb, - register size_t size, - register int (*compar)(const void *, const void *)) { - - register const char *base = base0; - register size_t lim; - register int cmp; - register const void *p; - - for (lim = nmemb; lim != 0; lim >>= 1) { - p = base + (lim >> 1) * size; - cmp = (*compar)(key, p); - if (cmp == 0) - return p; - if (cmp > 0) { /* key > p: move right */ - base = (const char *)p + size; - lim--; - } /* else move left */ - } - return (NULL); -} diff --git a/libsa/c++rem3.c b/libsa/c++rem3.c deleted file mode 100644 index 44d76d683..000000000 --- a/libsa/c++rem3.c +++ /dev/null @@ -1,2215 +0,0 @@ -// 45678901234567890123456789012345678901234567890123456789012345678901234567890 -/* - -Rules specification by -Stan Shebs of Apple Computer, Inc 2002 - -Parse and remangle implemented by -Godfrey van der Linden of Apple Computer, Inc 2002 - -Rules for demangling IOKit symbols - -In Darwin versions 1.0 through at least 5.2, IOKit is compiled using -GCC version 2. GCC 2's C++ symbol mangling algorithm ultimately -derives from the basic scheme described in the Annotated C++ Reference -Manual (ARM), section 7.2.1c, with a number of changes, mostly due to -the expansion of the language since the ARM was published in 1990. - -This description is not complete. It omits RTTI, thunks, and -templates, since they are not allowed in IOKit. The description also -mentions mangled name constructs that are not disallowed in IOKit, but -that as of Jan 2002, did actually appear in any symbol in the base -system. - -A mangled name basically consists of a function name followed -by two underscores, optionally followed by a signature computed -from the function's argument types. (Note that in Darwin, the -compiler adds an additional underscore to all C and C++ symbols. -The description assumes this has been removed.) - - ::= - | - - ::= [ ] - - ::= [ "_GLOBAL_" [ID] "__" ] "__" [ ] - - ::= * - | NULL - -Questions for Stan (@@@Stan@@@) -1> A valid implies a null function name. -2> I wonder if an is mutually exclusive with a perhaps something like :- - ::= [ "_GLOBAL_" ("I"|"D") "__" ] (( "__") | ) -3> Do constructors turn up as an opinfo or a NULL function name? - -The optional "_GLOBAL_"("I"|"D")"__" sequence indicates global constructors -and destructors, but in practice these do not appear with the mach-o Apple 2.95 - -A Null indicates a constructor or an operator. - -Since may include trailing underscores, the demangler -should scan forward until a non-underscore is seen, and then take the -last two as the separator between name and signature. - - may also include any number of leading underscores, so -the demangler needs to add those to and look for the -"__" following the name. - - ::= ("_._"|"_$_" ) ; destructor - | "__vt_" ; virtual table - | "_" ("."|"$") ; Variable - - ::= - | "Q" - | "K" ; ignored and illegal - - ::= - - ::= * - - ::= "type" - | "__op" - | "__" - | "a" - - ::= "aa" # && - | "aad" # &= - | "ad" # & - | "adv" # /= - | "aer" # ^= - | "als" # <<= - | "amd" # %= - | "ami" # -= - | "aml" # *= - | "aor" # |= - | "apl" # += - | "ars" # >>= - | "as" # = - | "cl" # () - | "cm" # , - | "cn" # ?: - | "co" # ~ - | "dl" # delete - | "dv" # / - | "eq" # == - | "er" # ^ - | "ge" # >= - | "gt" # > - | "le" # <= - | "ls" # << - | "lt" # < - | "md" # % - | "mi" # - - | "ml" # * - | "mm" # -- - | "mn" # ? - | "ne" # != - | "nt" # ! - | "nw" # new - | "oo" # || - | "or" # | - | "pl" # + - | "pp" # ++ - | "rf" # -> - | "rm" # ->* - | "rs" # >> - | "sz" # sizeof - | "vc" # [] - | "vd" # delete[] - | "vn" # new[] - -Questions for Stan (@@@Stan@@@) -1> What the hell is The "type" & "__op" stuff? - -IOKit has so far only been observed to use operations new ("nw") and -delete ("dl"). - -The signature is a concatenated list of elements, which are usually -argument types, but may include other sorts of things. - - ::= * - - ::= - | "S" - | "F" [ "_" ] - -Questions for Stan (@@@Stan@@@) -1> I think the 'B' phrase should probably read '| "B" '? -2> Ambiguous productions for signature - OSObject::func(struct timeval fred) => _func__8OSObject7timeval - signature could be parsed as - or - I believe the second one must be the valid production. - - ::= * - - :: - - ::= * - -The is the number of characters in . - -Argument types are a concatenated sequence of types. - - ::= # Empty - | + - ::= [ "n" ] - | "N" - | "T" - -The "N" repeats and "T" references to already-seen typescan only -appear if -fno-squangle (no squashed mangling), and in practice aren't -seen in IOKit symbols. - - ::= | * "_" - -Return types are just like any other sort of type. - - ::= - -Types consist of a variable number of declarators in front of a basic -type. - - ::= * - - ::= "P" ; pointer - | "p" ; pointer (but never occurs?) - | "R" ; reference (&) - | "A" ; array - | "T" - | "O" - | - -The "A" production can produce an ambigous output if it is followed by a counted class name or structure name. - -The "T" reference to a type does not appear in IOKit symbols, nor do -the "M" and "O" declarators. - - ::= ; function - | ; method - | * - - ::= "F" "_" - - ::= "M" - -A qualified name consists of a count of types, followed by all the -types concatenated together. For instance, Namespace::Class is -Q29Namespace5Class. For more than 9 types (which has not yet occurred -in IOKit), the multi-digit count is surrounded by underscores. - -Questions for Stan (@@@Stan@@@) -1> Can the types in a qualified name really be generic types or can the set be restricted to just counted class names? - - ::= | "_" * "_" - -Fundamental types are single letters representing standard built-in -types, optionally preceded by type qualifiers for properties like -signedness and constness. For instance, CUi is a const unsigned int. - - ::= "S" ; signed (chars only) - | "U" ; unsigned (any integral type) - | "J" ; __complex - | - - ::= - | "b" ; bool - | "c" ; char - | "d" ; double - | "f" ; float - | "i" ; int - | "l" ; long - | "r" ; long double - | "s" ; short - | "v" ; void - | "w" ; wchar_t - | "x" ; long long - | "G" ; ????? - | "e" ; ellipsis - -"G" does not appear in IOKit symbols in this context. - - ::= "C" ; const - | "V" ; volatile - | "u" ; restrict (C99) - | "G" ; struct/union/enum unused by gcc3 - -The restrict qualifier has not appeared in IOKit symbols. - -*/ -#if KERNEL - -#include -#include - -#include - -#include - -#include - -enum { false = 0, true = 1 }; - -#else /* !KERNEL */ - -#include - -#include -#include -#include - -#include - -#endif /* KERNEL */ - -#include "c++rem3.h" - -#define STRLEN(s) (sizeof(s)-1) -#define APPENDSTR(c, str) do { appendNStr(c, str, STRLEN(str)); } while (0) - -#define MAX_COMPOUND_TYPES 128 -#define MAX_ENTRIES 256 -#define MAX_SDICT_ENTRIES 256 -#define MAX_BDICT_ENTRIES 64 -#define MAX_RETURN_BUFFER 256 - -// Can't be bigger that 16 entries -typedef enum NameTypes { - kNTUndefined, kNTClass, kNTFunction, kNTFuncEnd, - kNTMethod, kNTBuiltIn, kNTDeclarator, kNTArray, - kNTKName, kNTSubstitute, kNTSubQualClass -} NameTypes; - -typedef struct TypeData { - short fStartEntry, fNumEntries; -} TypeData; - -typedef struct BaseTypeData { - const char *fFundTypeID; // May contain the type itself for kNTBuiltIt - unsigned int fLen:16; - unsigned int fType:4; // Must fit a NameType - unsigned int fVolatile:1; - unsigned int fConst:1; - unsigned int fSigned:1; - unsigned int fUnsigned:1; - unsigned int fPseudo:1; - unsigned int fQualified:1; -} BaseTypeData; - -typedef struct CheckPoint { - const char *fInChar; - unsigned char fNumI, fNumO, fNumT, fNumB, fNumS; -} CheckPoint; - -typedef struct ParseContext { - CheckPoint fP; - BaseTypeData fInEntries[MAX_ENTRIES]; // Input parsed elements - BaseTypeData fOutEntries[MAX_ENTRIES]; // Output parsed elements - TypeData fTypeList[MAX_COMPOUND_TYPES]; // Table of types - TypeData fSubDict[MAX_SDICT_ENTRIES]; - TypeData fBDict[MAX_BDICT_ENTRIES]; // B dictionary types - BaseTypeData *fCurBaseP; - const char *fInStr; - char *fOutStrEnd; - char *fOutChar; - int fInSize; - Rem3Return fRetCode; -} ParseContext; - -// -// The only forward declaration necessary -// -static Boolean parse_type(ParseContext *c); - -// Helper functions for walking through the string -static __inline__ char getNext(ParseContext *c) -{ - return *c->fP.fInChar++; -} - -static __inline__ CheckPoint *checkPoint(ParseContext *c) -{ - return &c->fP; -} - -static __inline__ void resetTo(ParseContext *c, CheckPoint *chk) -{ - c->fP = *chk; -} - -static __inline__ const char *inCharFromCheck(CheckPoint *chk) -{ - return chk->fInChar; -} - -static __inline__ void advance(ParseContext *c, int len) -{ - c->fP.fInChar += len; -} - -static __inline__ Boolean retard(ParseContext *c, int len) -{ - const char *cp = c->fP.fInChar - len; - if (cp < c->fInStr) - return false; - - c->fP.fInChar = cp; - return true; -} - -static __inline__ char peekAt(ParseContext *c, int index) -{ - return c->fP.fInChar[index]; -} - -static __inline__ char peekNext(ParseContext *c) -{ - return peekAt(c, 0); -} - -static __inline__ Boolean atEnd(ParseContext *c) -{ - return '\0' == peekNext(c); -} - -static __inline__ Boolean hasRemain(ParseContext *c, int len) -{ - return (c->fP.fInChar - c->fInStr + len <= c->fInSize); -} - -// -// Routines for allocating entries in the various -// -static __inline__ BaseTypeData *newIn(ParseContext *c) -{ - BaseTypeData *iP; - - if (c->fP.fNumI < MAX_ENTRIES) { - iP = &c->fInEntries[c->fP.fNumI++]; - bzero(iP, sizeof(*iP)); - c->fCurBaseP = iP; - return iP; - } - else { - c->fRetCode = kR3InternalNotRemangled; - return NULL; - } -} - -static __inline__ BaseTypeData *newOut(ParseContext *c) -{ - BaseTypeData *oP; - - if (c->fP.fNumO < MAX_ENTRIES) { - oP = &c->fOutEntries[c->fP.fNumO++]; - return oP; - } - else { - c->fRetCode = kR3InternalNotRemangled; - return NULL; - } -} - -static __inline__ TypeData * -newSub(ParseContext *c, int start, int num) -{ - TypeData *sP; - - if (c->fP.fNumS < MAX_SDICT_ENTRIES) { - sP = &c->fSubDict[c->fP.fNumS++]; - sP->fStartEntry = start; - sP->fNumEntries = num; - return sP; - } - else { - c->fRetCode = kR3InternalNotRemangled; - return NULL; - } -} - -static __inline__ TypeData * -newBDict(ParseContext *c, int start, int num) -{ - TypeData *bP; - - if (c->fP.fNumB < MAX_BDICT_ENTRIES) { - bP = &c->fBDict[c->fP.fNumB++]; - bP->fStartEntry = start; - bP->fNumEntries = num; - return bP; - } - else { - c->fRetCode = kR3InternalNotRemangled; - return NULL; - } -} - -static __inline__ TypeData * -newType(ParseContext *c, int start) -{ - TypeData *tP; - - if (c->fP.fNumT < MAX_COMPOUND_TYPES) { - tP = &c->fTypeList[c->fP.fNumT++]; - tP->fStartEntry = start; - return tP; - } - else - return NULL; -} - -static __inline__ TypeData * -dupType(ParseContext *c, TypeData *iTP, int offset) -{ - TypeData *tP = newType(c, iTP->fStartEntry + offset); - if (tP) - tP->fNumEntries = iTP->fNumEntries; - - return tP; -} - -// -// Identifier character recognition helpers, can be optimised -// -static __inline__ Boolean isValidFirstAlphabetic(char c) -{ - if ('a' <= c && c <= 'z') - return true; - else if ('A' <= c && c <= 'Z') - return true; - else - return false; -} - -static __inline__ Boolean isValidFirstChar(char c) -{ - if (isValidFirstAlphabetic(c)) - return true; - else if (c == '_') - return true; - else - return false; -} - -static __inline__ Boolean isValidChar(char c) -{ - if (isValidFirstChar(c)) - return true; - else if ('0' <= c && c <= '9') - return true; - else - return false; -} - -// -// Helper function for recognising characters and strings -// - -// Check the current input is the given character -static __inline__ Boolean isNext(ParseContext *c, char ch) -{ - if (peekNext(c) == ch) { - advance(c, 1); - return true; - } - else - return false; -} - -// Check the current input is ONE of the characters in str -static Boolean charNext(ParseContext *c, const char *str) -{ - if (hasRemain(c, 1)) { - char ch = peekNext(c); - char next; - - while ( (next = *str++) ) - if (next == ch) { - advance(c, 1); - return true; - } - } - - return false; -} - -// Check the current input for 'str' -static Boolean strNext(ParseContext *c, const char *str) -{ - const char *cp = c->fP.fInChar; - - do { - if (!*str) { - c->fP.fInChar = (const char *) cp; - return true; - } - else if (!*cp) - return false; - - } while (*cp++ == *str++); - - return false; -} - -// -// Qualifier re-encoding -// -static void -decodeQual(BaseTypeData *typeP, int *qualLenP, const char **qualP) -{ - const char *qual; - int qualLen; - - if (typeP->fConst && typeP->fVolatile) - { qual = "VK"; qualLen = 2; } - else if (typeP->fConst) - { qual = "K"; qualLen = 1; } - else if (typeP->fVolatile) - { qual = "V"; qualLen = 1; } - else - { qual = NULL; qualLen = 0; } - - *qualLenP = qualLen; - *qualP = qual; -} - - -// -// Output functions -// - -static void appendChar(ParseContext *c, char ch) -{ - char *outAddr = c->fOutChar++; - if (outAddr < c->fOutStrEnd) - *outAddr = ch; -} - -static void appendNStr(ParseContext *c, const char *str, int len) -{ - char *outAddr = c->fOutChar; - - c->fOutChar += len; - if (c->fOutChar < c->fOutStrEnd) - bcopy(str, outAddr, len); -} - -static __inline__ void appendStr(ParseContext *c, const char *str) -{ - appendNStr(c, str, strlen(str)); -} - -static void appendSub(ParseContext *c, int ls) -{ - appendChar(c, 'S'); - if (ls) { - if (--ls >= 36) { - int ms; - - ms = ls / 36; - appendChar(c, (ms < 10)? '0' + ms : 'A' + ms - 10); - ls -= (ms * 36); - } - appendChar(c, (ls < 10)? '0' + ls : 'A' + ls - 10); - } - appendChar(c, '_'); -} - -static Boolean compareTypes(ParseContext *c, int sub, int entry, int numEntries) -{ - TypeData *subP = &c->fSubDict[sub]; - BaseTypeData *bSP, *bIP; - int i; - - if (subP->fNumEntries != numEntries) - return false; - - bSP = &c->fInEntries[subP->fStartEntry]; - bIP = &c->fInEntries[entry]; - - for (i = 0; i < numEntries; i++, bSP++, bIP++) { - if (bSP->fType != bIP->fType) - return false; - - switch (bSP->fType) { - case kNTClass: - if (bSP->fLen != bIP->fLen) - return false; - else if (strncmp(bSP->fFundTypeID, bIP->fFundTypeID, bSP->fLen)) - return false; - break; - - case kNTArray: - case kNTBuiltIn: - case kNTDeclarator: - if (bSP->fFundTypeID != bIP->fFundTypeID) - return false; - break; - - case kNTMethod: - case kNTFunction: - case kNTUndefined: - case kNTKName: - break; // OK so far - - default: - return false; // Fatal errors - } - } - - return true; -} - -static int searchDict(ParseContext *c, int entry, int numE) -{ - int sub, numSubs = c->fP.fNumS; - - // don't try to substitute the last builtin - if (numE == 1 && kNTBuiltIn == c->fInEntries[entry].fType) - return -1; - - for (sub = 0; sub < numSubs; sub++) - if (compareTypes(c, sub, entry, numE)) - return sub; - - return -1; -} - -static int searchDictClass(ParseContext *c, const char *qname, int len) -{ - TypeData *subP; - int sub, numSubs = c->fP.fNumS; - - for (sub = 0, subP = c->fSubDict; sub < numSubs; sub++, subP++) { - BaseTypeData *iP = &c->fInEntries[subP->fStartEntry]; - - if (kNTClass != iP->fType || iP->fLen != len) - continue; - if (!strncmp(iP->fFundTypeID, qname, len)) - return sub; - } - - return -1; -} - -static Boolean -appendQualifiedClass(ParseContext *c, int entry) -{ - BaseTypeData *iP, *oP, *sP, *endSP; - const char *cp, *typeID; - char *cp_new; - int sub, subEntry, prefixLen; - int q_count; - - int decodeStart = c->fP.fNumI; - - // Scan through the incom - iP = &c->fInEntries[entry]; - endSP = &c->fInEntries[MAX_ENTRIES]; - sP = &c->fInEntries[decodeStart]; - - prefixLen = iP->fLen; - typeID = cp = iP->fFundTypeID; - for (q_count = 0; sP < endSP && (cp-typeID) < prefixLen; q_count++, sP++) { - int count; - - count = strtoul(cp, &cp_new, 10); - cp = cp_new + count; - - sP->fType = kNTClass; - sP->fFundTypeID = typeID; - sP->fLen = cp - typeID; - } - if (sP >= endSP) - return false; - - // Search backwards until I find the first substitution - sub = -1; - for (subEntry = q_count, sP--; subEntry > 0; subEntry--, sP--) { - sub = searchDictClass(c, sP->fFundTypeID, sP->fLen); - if (-1 != sub) - break; - } - - // Now drop the symbol into the output buffer - oP = newOut(c); - if (!oP) - return false; - - if (sub < 0) - *oP = *iP; // No sub copy original - else { - // Substitution found - prefixLen = sP->fLen; // Length of substitution - - oP->fType = kNTSubstitute; // Assume complete substitution - oP->fLen = sub; - oP->fFundTypeID = NULL; - - // We have a partial substitution so tag on the unmatched bit - if (prefixLen != iP->fLen) { - oP->fType = kNTSubQualClass; // Re-characterise as 2 part sub - - oP = newOut(c); - if (!oP) - return false; - - *oP = *iP; // Duplicate the original - oP->fType = kNTSubQualClass; - oP->fFundTypeID += prefixLen; // Skip leading substituted text - oP->fLen -= prefixLen; - } - } - - // Finally insert the qualified class names into the dictionary - for (subEntry++, sP++; subEntry < q_count; subEntry++, decodeStart++) { - c->fInEntries[decodeStart] = *sP++; - if (!newSub(c, decodeStart, 1)) - return false; - } - c->fP.fNumI = decodeStart; - - if (!newSub(c, entry, 1)) - return false; - - return true; -} - -static int -appendType(ParseContext *c, int type) -{ - BaseTypeData *iP, *oP; - TypeData *tP; - int i, sub; - int entry, numE, lastEntry; - Boolean found; - - if (type >= c->fP.fNumT) - return -1; - - tP = &c->fTypeList[type++]; - entry = tP->fStartEntry; - numE = tP->fNumEntries; - lastEntry = entry + numE; - iP = NULL; - for (i = 0, found = false, sub = -1; i < numE; i++) { - iP = &c->fInEntries[entry + i]; - switch (iP->fType) { - - // Function & Builtin can't be compressed alone - case kNTFunction: - case kNTBuiltIn: - i++; // Copy the current entry - found = true; - break; - - case kNTClass: - case kNTMethod: - sub = searchDict(c, entry + i, numE - i); - if (sub < 0 && !iP->fQualified) - i++; - found = true; - break; - - case kNTDeclarator: - case kNTArray: - sub = searchDict(c, entry + i, numE - i); - found = (sub >= 0); - break; - - // Internal error's should never occur - case kNTKName: - case kNTSubstitute: - case kNTSubQualClass: - case kNTUndefined: - default: - return -1; - } - if (found) - break; - } - - if (!found) - return -1; // Internal error: no terminal symbol? - - // Copy the already input buffer to the output - oP = &c->fOutEntries[c->fP.fNumO]; - if (i) { - if (c->fP.fNumO + i >= MAX_ENTRIES) - return -1; - - bcopy(&c->fInEntries[entry], oP, i * sizeof(*oP)); - c->fP.fNumO += i; - oP += i; - } - - if (sub >= 0) { - // We found a substitution - oP->fType = kNTSubstitute; - oP->fLen = sub; - c->fP.fNumO++; // Increment output for the substitution - - // Walk over types that have been substituted - while (type < c->fP.fNumT - && c->fTypeList[type].fStartEntry < lastEntry) - type++; - } - else switch (iP->fType) - { - case kNTMethod: - type = appendType(c, type); // Class Name - if (type < 0) - return type; - type = appendType(c, type); // Pointer to function - if (type < 0) - return type; - break; - - case kNTFunction: - type = appendType(c, type); // Return type - if (type < 0) - return type; - - // process the argument list - do { - tP = &c->fTypeList[type]; - if (tP->fStartEntry < lastEntry) { - type = appendType(c, type); - if (type < 0) - return type; - } - else - break; - } while (type < c->fP.fNumT); - oP = newOut(c); - if (!oP) - return -1; - oP->fType = kNTFuncEnd; - break; - - case kNTBuiltIn: - i--; // Do not store the buildit in the dictionary - break; - - case kNTClass: // Nothing more to do - if (!iP->fQualified) - break; - else if (appendQualifiedClass(c, entry + i)) - break; - else - return -1; - } - - // No further substititions to be had update the dictionary - for (i += entry; --i >= entry; ) { - if (!newSub(c, i, lastEntry - i)) - return -1; - } - - return type; -} - -static Boolean appendArgumentList(ParseContext *c) -{ - int i, num; - - c->fRetCode = kR3InternalNotRemangled; - // Setup the output entry array - num = c->fP.fNumT; - for (i = 0; i < num; ) { - i = appendType(c, i); - if (i < 0) - return false; - } - - // First pass output uncompressed types - for (i = 0, num = c->fP.fNumO; i < num; i++) { - BaseTypeData *bP; - - bP = &c->fOutEntries[i]; - - if (bP->fPseudo) - continue; // Pseudo entry do not output; - - switch (bP->fType) { - - case kNTSubstitute: appendSub(c, bP->fLen); break; - - case kNTSubQualClass: - appendChar(c, 'N'); - appendSub(c, bP->fLen); - i++; bP = &c->fOutEntries[i]; - appendNStr(c, bP->fFundTypeID, bP->fLen); - appendChar(c, 'E'); - break; - - case kNTClass: - if (bP->fQualified) { - appendChar(c, 'N'); - appendNStr(c, bP->fFundTypeID, bP->fLen); - appendChar(c, 'E'); - } - else - appendNStr(c, bP->fFundTypeID, bP->fLen); - break; - - case kNTArray: { - char numbuf[16]; // Bigger than MAX_LONG + 3 - int len; - len = snprintf(numbuf, sizeof(numbuf), - "A%lu_", (unsigned long) bP->fFundTypeID); - appendNStr(c, numbuf, len); - break; - } - - case kNTBuiltIn: - case kNTDeclarator: appendChar(c, (int) bP->fFundTypeID); break; - case kNTMethod: appendChar(c, 'M'); break; - case kNTFunction: appendChar(c, 'F'); break; - case kNTFuncEnd: appendChar(c, 'E'); break; - - case kNTUndefined: - case kNTKName: - default: - return false; // Fatal errors - } - } - - // Successful remangle - c->fRetCode = kR3Remangled; - return true; -} - -// -// Parse routines -// - -// ::= * -static Boolean parse_count(ParseContext *c, int *countP) -{ - int count = 0; - char ch; - char *newp; - - ch = peekNext(c); - if (ch < '1' || ch > '9') - return false; - - count = strtol(c->fP.fInChar, &newp, 10); - c->fP.fInChar = newp; - if (countP) - *countP = count; - - return true; -} - - -// "n" can cause the following type to be ambiguous as -// n23_Pc... can be -// "n" ... -// | "n" '_' ... -// However as the class '_Pc' is probably going to be unlikely a quick -// check to see if the next field is a valid type would probably clear -// up the abiguity for the majority of cases. -// -// ::= | * "_" -static Boolean parse_index(ParseContext *c, int *indexP) -{ - CheckPoint chk = *checkPoint(c); - char ch0, ch1; - int index; - - ch0 = peekAt(c, 0); - ch1 = peekAt(c, 1); - - if ( !('0' <= ch0 && ch0 <= '9') ) - goto abandonParse; - if ('0' <= ch1 && ch1 <= '9') { - if (!parse_count(c, &index)) - goto abandonParse; - if (isNext(c, '_')) { - // @@@ gvdl: Ambiguity check one day - if (indexP) - *indexP = index; - return true; - } - else - resetTo(c, &chk); // Must be the one digit case - } - - // One digit case - advance(c, 1); - index = ch0 - '0'; - - if (indexP) - *indexP = index; - return true; - -abandonParse: - return false; -} - - -// ::= "C" ; const -// | "V" ; volatile -// | "u" ; restrict (C99) unsupported -// | "G" ; struct/union/enum ; unused in gcc3 -static Boolean parse_qualifiers(ParseContext *c) -{ - BaseTypeData *bP = c->fCurBaseP; - - for (;;) { - if (isNext(c, 'C')) - bP->fConst = true; // "C" ; const - else if (isNext(c, 'V')) - bP->fVolatile = true; // "V" ; volatile - else if (isNext(c, 'u')) - return false; // "u" ; restrict (C99) - else if (isNext(c, 'G')) - continue; // "G" ; struct/union/enum ; unused - else - break; - } - - return true; -} - -// Assumes we have an open fInEntry in fCurBaseP -static Boolean duplicateEntries(ParseContext *c, int start, int numE) -{ - BaseTypeData *bIP = &c->fInEntries[start]; // First duplicate entry - BaseTypeData *bP = c->fCurBaseP; - int i; - - // Duplicating a method - if (kNTMethod == bIP->fType) { - bP--; // Strip leading 'P' declarator - c->fP.fNumI--; - } - - numE--; - - // do we have room available for duplication - if (c->fP.fNumI + numE >= MAX_ENTRIES) - return false; - - // Copy the parse entries over - bcopy(bIP, bP, (numE + 1) * sizeof(*bP)); - - // Now we have to duplicate the types for the new entry - for (i = 0; i < c->fP.fNumT; i++) { - TypeData *tP = &c->fTypeList[i]; - if (tP->fStartEntry < start) - continue; - else if (tP->fStartEntry <= start + numE) - dupType(c, tP, bP - bIP); - else - break; - } - - c->fP.fNumI += numE; - bP += numE; - c->fCurBaseP = bP; - - return true; -} - -// Must have a valid c->fCurBaseP pointer on entry -// ::= ; plain class name -// | "Q" ; qualified name -// | "B" ; compressed name -// | "K" ; ignored and illegal -// ::= + -// ::= | "_" * "_" -// ::= -// ::= * -static Boolean -parse_class_name(ParseContext *c) -{ - BaseTypeData *bP = c->fCurBaseP; - const char *typeId = c->fP.fInChar; - char ch; - int count; - - if (parse_count(c, &count)) { - - // ::= - if (!hasRemain(c, count)) - goto abandonParse; - - bP->fType = kNTClass; - advance(c, count); - - bP->fFundTypeID = typeId; - bP->fLen = c->fP.fInChar - typeId; - } - else { - switch (peekNext(c)) { - - case 'Q': { - int i, q_count; - - advance(c, 1); - - // | "Q" ; qualified name - // ::= + - // ::= | "_" * "_" - if ('_' == (ch = getNext(c))) { - advance(c, 1); - if (!parse_count(c, &q_count) || !isNext(c, '_')) - goto abandonParse; - } - else if ('1' <= ch && ch <= '9') - q_count = ch - '0'; - - if (!q_count) - goto abandonParse; - - typeId = c->fP.fInChar; - bP->fType = kNTClass; - bP->fQualified = true; - i = 0; - for (i = 0; i < q_count; i++) { - if (parse_count(c, &count)) - advance(c, count); - else - goto abandonParse; - } - bP->fLen = c->fP.fInChar - typeId; - bP->fFundTypeID = typeId; - break; - } - - case 'B': - // | "B" - advance(c, 1); - - if (!parse_index(c, &count) || count >= c->fP.fNumB) - goto abandonParse; - - if (!duplicateEntries(c, c->fBDict[count].fStartEntry, - c->fBDict[count].fNumEntries)) - goto abandonParse; - return true; - - case 'K': default: - goto abandonParse; - } - } - - if (newBDict(c, bP - c->fInEntries, 1)) - return true; - -abandonParse: - return false; -} - -// ::= -// | "b" ; bool -// | "c" ; char -// | "d" ; double -// | "e" ; ellipsis -// | "f" ; float -// | "i" ; int -// | "l" ; long -// | "r" ; long double -// | "s" ; short -// | "v" ; void -// | "w" ; wchar_t -// | "x" ; long long -// | "G" ; ??? -static Boolean parse_fund_type_id(ParseContext *c) -{ - BaseTypeData *bP = c->fCurBaseP; - - if (!parse_class_name(c)) { - // Use the TypeID pointer as a 4 character buffer - char ch = peekNext(c); - - if (bP->fSigned && 'c' != ch) - goto abandonParse; // illegal only chars can be signed - - switch (ch) { - - case 'b': case 'd': case 'f': case 'v': case 'w': // No map types - break; - - case 'c': // character - if (bP->fSigned) ch = 'a'; - else if (bP->fUnsigned) ch = 'h'; - break; - case 'e': // ellipsis - ch = 'z'; - break; - case 'i': // int - if (bP->fUnsigned) ch = 'j'; - break; - case 'l': // long - if (bP->fUnsigned) ch = 'm'; - break; - case 'r': // long double - ch = 'e'; - break; - case 's': // short - if (bP->fUnsigned) ch = 't'; - break; - case 'x': // long long - if (bP->fUnsigned) ch = 'y'; - break; - - case 'G': // Don't understand "G" - default: - goto abandonParse; - } - - advance(c, 1); // Consume the input character - bP->fFundTypeID = (void *) (int) ch; - bP->fLen = 0; - bP->fType = kNTBuiltIn; - } - - return true; - -abandonParse: - return false; -} - -// ::= [ "n" ] -// | "N" ; Not implemented -// | "T" ; Not implemented -static Boolean parse_arg_type(ParseContext *c) -{ - // Don't bother to check point as parse_argument_types does it for us - - TypeData *typeP; - int repeat = 0; - - typeP = &c->fTypeList[c->fP.fNumT]; // Cache type for later repeat - if (!parse_type(c)) - return false; - - // Now check for a repeat count on this type - if (isNext(c, 'n')) { - if (!parse_index(c, &repeat)) - return false; - - do { - c->fCurBaseP = newIn(c); // Duplicate requires a fresh type - if (!c->fCurBaseP) - return false; - if (!duplicateEntries(c, typeP->fStartEntry, typeP->fNumEntries)) - return false; - } while (--repeat); - } - - return true; -} - -// ::= # Empty -// | + -static Boolean parse_argument_types(ParseContext *c) -{ - if (atEnd(c)) - return true; - - if (!parse_arg_type(c)) - goto abandonParse; - - while (!atEnd(c) && parse_arg_type(c)) - ; - - return true; - - // Not a counted class name so reset to checkPoint -abandonParse: - return false; -} - -// leaf function so the copy aside buffer isn't on the primary -// recursion stack. -static Boolean -rotateFunction(ParseContext *c, int argStart, int retStart) -{ - char returnTypeBuffer[MAX_RETURN_BUFFER]; - unsigned int numArg, numRet; - unsigned int lenArg, lenRet; - char *sArgP, *sRetP; - unsigned int i; - - TypeData *argTP = &c->fTypeList[argStart]; - TypeData *retTP = &c->fTypeList[retStart]; - - // Rotate around the entries first - numArg = retTP->fStartEntry - argTP->fStartEntry; - numRet = retTP->fNumEntries; - lenArg = numArg * sizeof(BaseTypeData); - lenRet = numRet * sizeof(BaseTypeData); - - // Copy the return type into a buffer - if (lenRet > sizeof(returnTypeBuffer)) - return false; - - sArgP = (char *) (&c->fInEntries[argTP->fStartEntry]); - sRetP = (char *) (&c->fInEntries[retTP->fStartEntry]); - - bcopy(sRetP, returnTypeBuffer, lenRet); - bcopy(sArgP, sArgP + lenRet, lenArg); - bcopy(returnTypeBuffer, sArgP, lenRet); - - // Retarget the argument and return types for the new entry positions - lenArg = numArg; - lenRet = numRet; - numArg = retStart - argStart; - numRet = c->fP.fNumT - retStart; - for (i = 0; i < numArg; i++) - c->fTypeList[argStart+i].fStartEntry += lenRet; - for (i = 0; i < numRet; i++) - c->fTypeList[retStart+i].fStartEntry -= lenArg; - - // Rotate the BDictionary - for (i = 0; i < c->fP.fNumB; i++) { - TypeData *bDP = &c->fBDict[i]; - int start = bDP->fStartEntry; - - if (start >= argTP->fStartEntry) - bDP->fStartEntry = start + lenRet; - else if (start >= retTP->fStartEntry) - bDP->fStartEntry = start - lenArg; - } - - // Finally rotate the retargeted type structures. - lenArg = numArg * sizeof(TypeData); - lenRet = numRet * sizeof(TypeData); - - sArgP = (char *) (&c->fTypeList[argStart]); - sRetP = (char *) (&c->fTypeList[retStart]); - - bcopy(sRetP, returnTypeBuffer, lenRet); - bcopy(sArgP, sArgP + lenRet, lenArg); - bcopy(returnTypeBuffer, sArgP, lenRet); - - return true; -} - -// ::= "F" "_" -static Boolean parse_function_type(ParseContext *c, Boolean forMethod) -{ - TypeData *bDictP = NULL; - BaseTypeData *bP = c->fCurBaseP; - - int argTypeStart, retTypeStart; - - if (!forMethod) { - bDictP = newBDict(c, c->fP.fNumI-1, 0); - if (!bDictP) - goto abandonParse; - } - - if (!isNext(c, 'F')) - goto abandonParse; - - bP->fType = kNTFunction; - - // Note that the argument types will advance the Entry list - argTypeStart = c->fP.fNumT; - if (!parse_argument_types(c)) - goto abandonParse; - - if (!isNext(c, '_')) - goto abandonParse; - - // Parse the return type - retTypeStart = c->fP.fNumT; - if (!parse_type(c)) - goto abandonParse; - - // gcc3 puts the return code just after the 'F' declaration - // as this impacts the order of the compression I need to rotate - // the return type and the argument types. - if (!rotateFunction(c, argTypeStart, retTypeStart)) - goto abandonParse; - - if (!forMethod) - bDictP->fNumEntries = c->fP.fNumI - bDictP->fStartEntry; - - return true; - -abandonParse: - return false; -} - -// To convert 2.95 method to a 3.0 method I need to prune the -// first argument of the function type out of the parse tree. -static Boolean cleanMethodFunction(ParseContext *c, int type) -{ - TypeData *typeP, *startTP, *endTP; - BaseTypeData *bP; - int i, thisStart, thisEnd, thisLen, funcRemain; - - // Get pointer for the return value's type. - startTP = &c->fTypeList[type+1]; - endTP = &c->fTypeList[c->fP.fNumT]; - - // Now look for the first type that starts after the return type - thisEnd = startTP->fStartEntry + startTP->fNumEntries; - for (startTP++; startTP < endTP; startTP++) - if (startTP->fStartEntry >= thisEnd) - break; - - if (startTP >= endTP) { - c->fRetCode = kR3InternalNotRemangled; - return false; // Internal error: should never happen - } - - // We now have a pointer to the 1st argument in the input list - // we will need to excise the entries from the input list and don't forget - // to remove the associated types from the type list. - - thisLen = startTP->fNumEntries; - thisStart = startTP->fStartEntry; - thisEnd = thisStart + thisLen; - funcRemain = c->fP.fNumI - thisEnd; - bP = &c->fInEntries[thisStart]; - - // If we have no arguments then replace the pointer with a void - if (!funcRemain) { - c->fP.fNumI -= (thisLen - 1); - - bP->fFundTypeID = (void *) (int) 'v'; // Void arg list - bP->fLen = 0; - bP->fType = kNTBuiltIn; - - // Update the type entry for the void argument list - startTP->fNumEntries = 1; - return true; - } - - // Move the argument list down to replace the 'this' pointer - bcopy(bP + thisLen, bP, funcRemain * sizeof(*bP)); - c->fP.fNumI -= thisLen; - - // And remove the 'this' pointers type - - // First walk over all of the types that have to be removed - for (typeP = startTP + 1; typeP < endTP; typeP++) - if (typeP->fStartEntry >= thisEnd) - break; - - if (typeP >= endTP) { - c->fRetCode = kR3InternalNotRemangled; - return false; // Internal error Can't be a void argument list. - } - - bcopy(typeP, startTP, (char *) endTP - (char *) typeP); - - c->fP.fNumT -= typeP - startTP; - endTP = &c->fTypeList[c->fP.fNumT]; - for (typeP = startTP ; typeP < endTP; typeP++) - typeP->fStartEntry -= thisLen; - - // Finally we can retarget the BDictionary lists - for (i = 0; i < c->fP.fNumB; i++) { - TypeData *bDP = &c->fBDict[i]; - int start = bDP->fStartEntry; - - if (start < thisStart) - continue; - if (start >= thisEnd) - break; - - bDP->fStartEntry = start - thisLen; - } - - return true; -} - -// ::= "M" -// -// Note this is a very bad function. Gcc3 doesn't doesn't use pointer that -// is immediately before this entry. We will have to delete the 'P' declarator -// that is before the method declaration. -// We will also have to prune the first type in the argument list as Gcc3 -// doesn't register the 'this' pointer within the function list. -static Boolean parse_method_type(ParseContext *c) -{ - TypeData *bDictP; - TypeData *typeP; - BaseTypeData *bP; - - bDictP = newBDict(c, c->fP.fNumI-2, 0); - if (!bDictP) - goto abandonParse; - - // Replace 'P' declarator - c->fP.fNumI--; - bP = c->fCurBaseP - 1; - - if (!isNext(c, 'M')) - goto abandonParse; - - if (bP->fFundTypeID != (void *) (int) 'P') - goto abandonParse; - - // Replace the previous 'Pointer' declarator - bP->fType = kNTMethod; - bP->fFundTypeID = NULL; - bP->fLen = 0; - - // Grab the method's 'this' type specification - typeP = newType(c, c->fP.fNumI); - if (!newIn(c) || !typeP) - goto abandonParse; - - if (!parse_class_name(c)) - goto abandonParse; - typeP->fNumEntries = c->fP.fNumI - typeP->fStartEntry; - - // Grab the specifier - typeP = newType(c, c->fP.fNumI); - if (!newIn(c) || !typeP) - goto abandonParse; - - if (!parse_function_type(c, /* forMethod */ true)) - goto abandonParse; - - if (!cleanMethodFunction(c, typeP - c->fTypeList)) - goto abandonParse; - typeP->fNumEntries = c->fP.fNumI - typeP->fStartEntry; - - // Finally update the dictionary with the M & 'this' - bDictP->fNumEntries = c->fP.fNumI - bDictP->fStartEntry; - - return true; - -abandonParse: - return false; -} - -static Boolean emitQualifiers(ParseContext *c) -{ - BaseTypeData *bP = c->fCurBaseP; - - if (bP->fVolatile || bP->fConst) { - Boolean isConst, isVolatile, isSigned, isUnsigned; - - isVolatile = bP->fVolatile; - isConst = bP->fConst; - isSigned = bP->fSigned; - isUnsigned = bP->fUnsigned; - bP->fConst = bP->fVolatile = bP->fSigned = bP->fUnsigned = 0; - - if (isVolatile) { - bP->fType = kNTDeclarator; - bP->fFundTypeID = (void *) (int) 'V'; - bP->fLen = 0; - bP = newIn(c); - if (!bP) - return false; - } - if (isConst) { - bP->fType = kNTDeclarator; - bP->fFundTypeID = (void *) (int) 'K'; - bP->fLen = 0; - bP = newIn(c); - if (!bP) - return false; - } - bP->fSigned = isSigned; - bP->fUnsigned = isUnsigned; - } - - return true; -} - - -// ::= ; function -// | ; method -// | * -// ::= "S" ; signed (chars only) -// | "U" ; unsigned (any integral type) -// | "J" ; __complex -// | -static Boolean parse_base_type(ParseContext *c) -{ - if ('F' == peekNext(c)) { - if (!parse_function_type(c, /* forMethod */ false)) - goto abandonParse; - } - else if ('M' == peekNext(c)) { - if (!parse_method_type(c)) - goto abandonParse; - } - else { - // | * - BaseTypeData *bP = c->fCurBaseP; - for (;;) { - if (isNext(c, 'S')) - // ::= "S" ; signed (chars only) - { bP->fSigned = true; continue; } - else if (isNext(c, 'U')) - // | "U" ; unsigned (any integral type) - { bP->fUnsigned = true; continue; } - else if (isNext(c, 'C')) - // | - // ::= "C" ; const - { bP->fConst = true; continue; } - else if (isNext(c, 'V')) - // | "V" ; volatile - { bP->fVolatile = true; continue; } - else if (charNext(c, "Ju")) - goto abandonParse; // Don't support these qualifiers - // | "J" ; __complex - // | "u" ; restrict (C99) - else - break; - } - - if (!emitQualifiers(c)) - goto abandonParse; - - if (!parse_fund_type_id(c)) - goto abandonParse; - } - return true; - -abandonParse: - return false; -} - -// Use the top SDict as a stack of declarators. -// parses * -// ::= "P" ; pointer -// | "p" ; pointer (but never occurs?) -// | "R" ; reference (&) -// | "A" ; array -// | "T" -// | "O" -// | -// -// As a side-effect the fCurBaseP is setup with any qualifiers on exit -static Boolean parse_declarators(ParseContext *c) -{ - int count; - unsigned long l; - BaseTypeData *dP; - char *newp; - - // Note we MUST go through the for loop at least once - for (count = 0; ; count++) { - const char *curDecl; - char ch; - - if (!newIn(c)) - goto abandonParse; - - // ::= production - if (!parse_qualifiers(c) || !emitQualifiers(c)) - goto abandonParse; - - dP = c->fCurBaseP; // Find the current base type pointer - - curDecl = c->fP.fInChar; - - switch (peekNext(c)) { - - case 'P': case 'p': case 'R': - // ::= "P" ; pointer - // | "p" ; pointer (but never occurs?) - // | "R" ; reference (&) - - dP->fType = kNTDeclarator; - advance(c, 1); - - ch = *curDecl; - if ('p' == ch) ch = 'P'; - dP->fFundTypeID = (void *) (int) ch; - dP->fLen = 0; - continue; // Go around again - - case 'A': - // | "A" ; array - dP->fType = kNTArray; - - advance(c, 1); curDecl++; - l = strtoul(curDecl, &newp, 10); - c->fP.fInChar = newp; - curDecl = (const char *)l; - if (!curDecl) - goto abandonParse; - dP->fFundTypeID = curDecl; - dP->fLen = 0; - continue; // Go around again - - case 'T': case 'O': - // | "T" Unsupported - // | "O" Unsupported - goto abandonParse; - - default: - break; - } - - break; - } - - dP->fLen = 0; - return true; - -abandonParse: - return false; -} - -// ::= * -static Boolean parse_type(ParseContext *c) -{ - CheckPoint chk = *checkPoint(c); - TypeData *typeP = newType(c, c->fP.fNumI); - if (!typeP) - goto abandonParse; - - // As a side-effect the fCurBaseP is setup with any qualifiers on exit - if (!parse_declarators(c)) - goto abandonParse; - - // Merge the last qualifiers into the base type - if (!parse_base_type(c) || kNTUndefined == c->fCurBaseP->fType) - goto abandonParse; - - typeP->fNumEntries = c->fP.fNumI - typeP->fStartEntry; - return true; - -abandonParse: - resetTo(c, &chk); - return false; -} - -// ::= * -// No need to check point as an invalid function name is fatal -// Consumes trailing "__". -static Boolean -parse_function_name(ParseContext *c) -{ - char ch; - - while ( (ch = peekNext(c)) ) - { - advance(c, 1); - if ('_' == ch && '_' == peekNext(c)) { - do { - advance(c, 1); - } while ('_' == peekNext(c)); - return true; - } - } - - return false; -} - -// ::= "type" -// | "__op" -// | "__" ; Implies null function name -// | "a" -// ::= "aa" # && ==> "aa" -// | "aad" # &= ==> "aN" -// | "ad" # & ==> "ad" -// | "adv" # /= ==> "dV" -// | "aer" # ^= ==> "eO" -// | "als" # <<= ==> "lS" -// | "amd" # %= ==> "rM" -// | "ami" # -= ==> "mI" -// | "aml" # *= ==> "mL -// | "aor" # |= ==> "oR -// | "apl" # += ==> "pL -// | "ars" # >>= ==> "rS -// | "as" # = ==> "aS -// | "cl" # () ==> "cl -// | "cm" # , ==> "cm -// | "cn" # ?: ==> "qu -// | "co" # ~ ==> "co -// | "dl" # delete ==> "dl -// | "dv" # / ==> "dv -// | "eq" # == ==> "eq -// | "er" # ^ ==> "eo -// | "ge" # >= ==> "ge -// | "gt" # > ==> "gt -// | "le" # <= ==> "le -// | "ls" # << ==> "ls -// | "lt" # < ==> "lt -// | "md" # % ==> "rm -// | "mi" # - ==> "mi -// | "ml" # * ==> "ml -// | "mm" # -- ==> "mm -// | "mn" # "???????????????? -// | "mx" # >? ==> "???????????????? -// | "ne" # != ==> "ne -// | "nt" # ! ==> "nt -// | "nw" # new ==> "nw -// | "oo" # || ==> "oo" -// | "or" # | ==> "or -// | "pl" # + ==> "pl -// | "pp" # ++ ==> "pp -// | "rf" # -> ==> "pt -// | "rm" # ->* ==> "pm -// | "rs" # >> ==> "rs -// | "sz" # sizeof ==> "sz -// | "vc" # [] ==> "ix -// | "vd" # delete[] ==> "da -// | "vn" # new[] ==> "na -static struct opMap { - const char *op295, *op3; -} opMapTable[] = { - {"aad", "aN" }, {"adv", "dV" }, {"aer", "eO" }, {"als", "lS" }, - {"amd", "rM" }, {"ami", "mI" }, {"aml", "mL" }, {"aor", "oR" }, - {"apl", "pL" }, {"ars", "rS" }, {"aa", "aa" }, {"ad", "ad" }, - {"as", "aS" }, {"cl", "cl" }, {"cm", "cm" }, {"cn", "qu" }, - {"co", "co" }, {"dl", "dl" }, {"dv", "dv" }, {"eq", "eq" }, - {"er", "eo" }, {"ge", "ge" }, {"gt", "gt" }, {"le", "le" }, - {"ls", "ls" }, {"lt", "lt" }, {"md", "rm" }, {"mi", "mi" }, - {"ml", "ml" }, {"mm", "mm" }, {"mn", NULL }, {"mx", NULL }, - {"ne", "ne" }, {"nt", "nt" }, {"nw", "nw" }, {"oo", "oo" }, - {"or", "or" }, {"pl", "pl" }, {"pp", "pp" }, {"rf", "pt" }, - {"rm", "pm" }, {"rs", "rs" }, {"sz", "sz" }, {"vc", "ix" }, - {"vd", "da" }, {"vn", "na" }, -}; - -static Boolean parse_opinfo(ParseContext *c, const char **opInfoP) -{ - CheckPoint chk = *checkPoint(c); - const char *op; - char ch; - unsigned int i; - - if ('a' == (ch = peekNext(c))) { - goto abandonParse; - } - else if (strNext(c, "type")) { - goto abandonParse; - } - else if (retard(c, 4) && strNext(c, "____op")) { - // @@@ gvdl: check this out it may change - // ::= "__op" - goto abandonParse; - } - - // Failed till now so reset and see if we have an operator - resetTo(c, &chk); - - // quick check to see if we may have an operator - if (!strrchr("acdeglmnoprsv", peekNext(c))) - goto abandonParse; - - op = NULL; - for (i = 0; i < sizeof(opMapTable)/sizeof(opMapTable[0]); i++) { - if (strNext(c, opMapTable[i].op295)) { - op = opMapTable[i].op3; - break; - } - } - if (!op) - goto abandonParse; - - if (!strNext(c, "__")) // Trailing underbars - goto abandonParse; - - if (opInfoP) - *opInfoP = op; - return true; - -abandonParse: - return false; -} - -// ::= * -// ::= -// | "K" -// | "S" -// | "F" [ "_" ] -// ::= -// Treat the prefix's s_element as a full type -static Boolean -parse_signature(ParseContext *c, - const char *func, int funcLen, const char *op) -{ - BaseTypeData *bP; - TypeData *tP; - - Boolean isFunction = false; - - if (isNext(c, 'F')) { - // | "F" [ "_" ] - - char numbuf[16]; // Bigger than MAX_INT + 4 - int len; - isFunction = true; - if (!funcLen) - goto abandonParse; - - len = snprintf(numbuf, sizeof(numbuf), "__Z%d", funcLen); - - appendNStr(c, numbuf, len); - appendNStr(c, func, funcLen); - } - else if (isNext(c, 'S')) { - // | "S" ; Ignored - goto abandonParse; - } - else { - const char *qual; - int qualLen; - - // See if we can find a qualified class reference - tP = newType(c, c->fP.fNumI); - if (!tP) - goto abandonParse; - - bP = newIn(c); - if (!bP) - goto abandonParse; - - // Parse any qualifiers, store results in *fCurBaseP - bP->fPseudo = true; - if (!parse_qualifiers(c)) - goto abandonParse; - - if (!parse_class_name(c)) - goto abandonParse; - - bP = c->fCurBaseP; // class name may have redifined current - tP->fNumEntries = c->fP.fNumI - tP->fStartEntry; - - APPENDSTR(c, "__ZN"); - decodeQual(bP, &qualLen, &qual); - if (qualLen) - appendNStr(c, qual, qualLen); - appendNStr(c, bP->fFundTypeID, bP->fLen); - - if (funcLen) { - char numbuf[16]; // Bigger than MAX_INT + 1 - int len; - - len = snprintf(numbuf, sizeof(numbuf), "%d", funcLen); - appendNStr(c, numbuf, len); - appendNStr(c, func, funcLen); - } - else if (op) - appendStr(c, op); - else { - // No function & no op means constructor choose one of C1 & C2 - APPENDSTR(c, "C2"); - } - appendChar(c, 'E'); - } - - if (atEnd(c)) { - appendChar(c, 'v'); // void argument list - c->fRetCode = kR3Remangled; - return true; - } - - c->fCurBaseP = NULL; - if (!parse_argument_types(c)) - goto abandonParse; - - if (isFunction) { - if (isNext(c, '_')) { - // && !parse_type(c) @@@ gvdl: Unsupported return - c->fRetCode = kR3InternalNotRemangled; - goto abandonParse; - } - } - - if (!atEnd(c)) - goto abandonParse; - - // OK we have a complete and successful parse now output the - // argument list - return appendArgumentList(c); - -abandonParse: - return false; -} - -// ::= [ ] -// ::= [ "_GLOBAL_" [ID] "__" ] "__" [ ] -static Boolean parse_mangled_name(ParseContext *c) -{ - CheckPoint chk; - CheckPoint dubBarChk; - const char *func; - - // parse - if (strNext(c, "_GLOBAL_")) { // Is this GLOBAL static constructor? - // gvdl: can't deal with _GLOBAL_ - c->fRetCode = kR3InternalNotRemangled; - return false; // Can't deal with these - } - - func = c->fP.fInChar; - for (chk = *checkPoint(c); ; resetTo(c, &dubBarChk)) { - int funcLen; - const char *op = NULL; - - if (!parse_function_name(c)) - goto abandonParse; - dubBarChk = *checkPoint(c); - - // Note that the opInfo may be earlier than the curDoubleBar - // in which case the function name may need to be shrunk later on. - (void) parse_opinfo(c, &op); - - if (atEnd(c)) - goto abandonParse; // No Signature? - - funcLen = inCharFromCheck(&dubBarChk) - func - 2; - if (parse_signature(c, func, funcLen, op)) - return true; - - if (kR3NotRemangled != c->fRetCode) - goto abandonParse; - - // If no error then try again maybe another '__' exists - } - -abandonParse: - resetTo(c, &chk); - return false; -} - -// ::= ("_._" | "_$_" ) ; destructor -// | "__vt_" ; virtual table -// | "_" ("."|"$") -static Boolean parse_gnu_special(ParseContext *c) -{ - CheckPoint chk = *checkPoint(c); - BaseTypeData *bP = newIn(c); - - if (!bP) - return false; - - // What do the intel desctructors look like - if (strNext(c, "_._") || strNext(c, "_$_") ) // Is this a destructor - { - if (!parse_class_name(c) || !atEnd(c)) - goto abandonParse; - APPENDSTR(c, "__ZN"); - appendNStr(c, bP->fFundTypeID, bP->fLen); - APPENDSTR(c, "D2Ev"); - c->fRetCode = kR3Remangled; - return true; - } - else if (strNext(c, "__vt_")) // Is it's a vtable? - { - if (!parse_class_name(c) || !atEnd(c)) - goto abandonParse; - - APPENDSTR(c, "__ZTV"); - if (kNTClass != bP->fType) - goto abandonParse; - else if (bP->fQualified) { - appendChar(c, 'N'); - appendNStr(c, bP->fFundTypeID, bP->fLen); - appendChar(c, 'E'); - } - else - appendNStr(c, bP->fFundTypeID, bP->fLen); - - c->fRetCode = kR3Remangled; - return true; - } - else if (isNext(c, '_')) // Maybe it's a variable - { - const char *varname; - int varlen, len; - char numbuf[16]; // Bigger than MAX_INT + 1 - - if (!parse_class_name(c)) // Loads up the bP structure - goto abandonParse; - - if (!isNext(c, '.') && !isNext(c, '$')) - goto abandonParse; - - // Parse the variable name now. - varname = c->fP.fInChar; - if (atEnd(c) || !isValidFirstChar(getNext(c))) - goto abandonParse; - - while ( !atEnd(c) ) - if (!isValidChar(getNext(c))) - goto abandonParse; - - varlen = c->fP.fInChar - varname; - len = snprintf(numbuf, sizeof(numbuf), "%d", varlen); - - APPENDSTR(c, "__ZN"); - appendNStr(c, bP->fFundTypeID, bP->fLen); - - appendNStr(c, numbuf, len); - appendNStr(c, varname, varlen); - appendChar(c, 'E'); - - c->fRetCode = kR3Remangled; - return true; - } - - // Oh well it is none of those so give up but reset scan -abandonParse: - resetTo(c, &chk); - return false; -} - -// ::= -// | -static Boolean parse_special_or_name(ParseContext *c) -{ - Boolean res; - - - res = (parse_gnu_special(c) || parse_mangled_name(c)); - appendChar(c, '\0'); - - return res; -} - -Rem3Return rem3_remangle_name(char *gcc3, int *gcc3size, const char *gcc295) -{ - ParseContext *c; - Rem3Return result; - int size; - - if (!gcc295 || !gcc3 || !gcc3size) - return kR3BadArgument; - - size = strlen(gcc295); - if (size < 2) - return kR3NotRemangled; // Not a valid C++ symbol - else if (*gcc295 != '_') - return kR3NotRemangled; // no leading '_', not valid - - c = (ParseContext *) malloc(sizeof(*c)); - if (!c) - return kR3InternalNotRemangled; - bzero(c, sizeof(*c)); - - c->fInSize = size; - c->fInStr = gcc295 + 1; // Strip leading '_' - c->fP.fInChar = c->fInStr; - - c->fOutStrEnd = gcc3 + *gcc3size; - c->fOutChar = gcc3; - - c->fRetCode = kR3NotRemangled; - (void) parse_special_or_name(c); - - result = c->fRetCode; - if (kR3Remangled == result) { - if (c->fOutChar > c->fOutStrEnd) - result = kR3BufferTooSmallRemangled; - *gcc3size = c->fOutChar - gcc3 - 1; // Remove nul from len - } - - free(c); - - return result; -} diff --git a/libsa/catalogue.cpp b/libsa/catalogue.cpp deleted file mode 100644 index 6b650c0bd..000000000 --- a/libsa/catalogue.cpp +++ /dev/null @@ -1,2041 +0,0 @@ -/* - * Copyright (c) 2000-2007 Apple Inc. All rights reserved. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ - * - * This file contains Original Code and/or Modifications of Original Code - * as defined in and that are subject to the Apple Public Source License - * Version 2.0 (the 'License'). You may not use this file except in - * compliance with the License. The rights granted to you under the License - * may not be used to create, or enable the creation or redistribution of, - * unlawful or unlicensed copies of an Apple operating system, or to - * circumvent, violate, or enable the circumvention or violation of, any - * terms of an Apple operating system software license agreement. - * - * Please obtain a copy of the License at - * http://www.opensource.apple.com/apsl/ and read it before using this file. - * - * The Original Code and all software distributed under the License are - * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER - * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, - * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. - * Please see the License for the specific language governing rights and - * limitations under the License. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ - */ -#include -#include -#include -#include -#include -#include -#include -#include - -extern "C" { -#include -#include -#include -#include -#include -#include -}; - -#include - -#include - -extern "C" { -extern void IODTFreeLoaderInfo( char *key, void *infoAddr, int infoSize ); -// extern kern_return_t host_info(host_t host, -// host_flavor_t flavor, -// host_info_t info, -// mach_msg_type_number_t *count); -extern int grade_binary(cpu_type_t exectype, cpu_subtype_t execsubtype); -// Return the address of the named Mach-O segment from the currently -// executing 32 bit kernel, or NULL. -extern struct segment_command *getsegbyname(char *seg_name); -// Return the address of the named section from the named Mach-O segment -// from the currently executing 32 bit kernel, or NULL. -extern struct section *getsectbyname(const char *segname, const char *sectname); -}; - -#define LOG_DELAY() - -#if 0 -#define VTYELLOW "\033[33m" -#define VTRESET "\033[0m" -#else -#define VTYELLOW "" -#define VTRESET "" -#endif - -/********************************************************************* -*********************************************************************/ -static OSDictionary * gStartupExtensions = 0; -static OSArray * gBootLoaderObjects = 0; -extern OSArray * gIOPrelinkedModules; - -OSDictionary * getStartupExtensions(void) { - if (gStartupExtensions) { - return gStartupExtensions; - } - gStartupExtensions = OSDictionary::withCapacity(1); - assert (gStartupExtensions); - - return gStartupExtensions; -} - -/* This array holds objects that are needed to be held around during - * boot before kextd starts up. Currently it contains OSData objects - * copied from OF entries for mkext archives in device ROMs. Because - * the Device Tree support code dumps these after initially handing - * them to us, we have to be able to clean them up later. - */ -OSArray * getBootLoaderObjects(void) { - if (gBootLoaderObjects) { - return gBootLoaderObjects; - } - gBootLoaderObjects = OSArray::withCapacity(1); - assert (gBootLoaderObjects); - - return gBootLoaderObjects; -} - -/********************************************************************* -* This function checks that a driver dict has all the required -* entries and does a little bit of value checking too. -* -* index is nonnegative if the index of an entry from an mkext -* archive. -*********************************************************************/ -bool validateExtensionDict(OSDictionary * extension, int index) { - - bool result = true; - bool not_a_dict = false; - bool id_missing = false; - bool is_kernel_resource = false; - bool has_executable = false; - bool ineligible_for_safe_boot = false; - OSString * bundleIdentifier = NULL; // do not release - OSObject * rawValue = NULL; // do not release - OSString * stringValue = NULL; // do not release - OSBoolean * booleanValue = NULL; // do not release - OSDictionary * personalities = NULL; // do not release - OSDictionary * libraries = NULL; // do not release - OSCollectionIterator * keyIterator = NULL; // must release - OSString * key = NULL; // do not release - VERS_version vers; - VERS_version compatible_vers; - char namep[16]; // unused but needed for PE_parse_boot_arg() - - // Info dict is a dictionary - if (!OSDynamicCast(OSDictionary, extension)) { - not_a_dict = true; - result = false; - goto finish; - } - - // CFBundleIdentifier is a string - REQUIRED - bundleIdentifier = OSDynamicCast(OSString, - extension->getObject("CFBundleIdentifier")); - if (!bundleIdentifier) { - id_missing = true; - result = false; - goto finish; - } - - // Length of CFBundleIdentifier is not >= KMOD_MAX_NAME - if (bundleIdentifier->getLength() >= KMOD_MAX_NAME) { - result = false; - goto finish; - } - - // CFBundlePackageType is "KEXT" - REQUIRED - stringValue = OSDynamicCast(OSString, - extension->getObject("CFBundlePackageType")); - if (!stringValue) { - result = false; - goto finish; - } - if (!stringValue->isEqualTo("KEXT")) { - result = false; - goto finish; - } - - // CFBundleVersion is a string - REQUIRED - stringValue = OSDynamicCast(OSString, - extension->getObject("CFBundleVersion")); - if (!stringValue) { - result = false; - goto finish; - } - // CFBundleVersion is of valid form - vers = VERS_parse_string(stringValue->getCStringNoCopy()); - if (vers < 0) { - result = false; - goto finish; - } - - // OSBundleCompatibleVersion is a string - OPTIONAL - rawValue = extension->getObject("OSBundleCompatibleVersion"); - if (rawValue) { - stringValue = OSDynamicCast(OSString, rawValue); - if (!stringValue) { - result = false; - goto finish; - } - - // OSBundleCompatibleVersion is of valid form - compatible_vers = VERS_parse_string(stringValue->getCStringNoCopy()); - if (compatible_vers < 0) { - result = false; - goto finish; - } - - // OSBundleCompatibleVersion <= CFBundleVersion - if (compatible_vers > vers) { - result = false; - goto finish; - } - } - - // CFBundleExecutable is a string - OPTIONAL - rawValue = extension->getObject("CFBundleExecutable"); - if (rawValue) { - stringValue = OSDynamicCast(OSString, rawValue); - if (!stringValue || stringValue->getLength() == 0) { - result = false; - goto finish; - } - has_executable = true; - } - - // OSKernelResource is a boolean value - OPTIONAL - rawValue = extension->getObject("OSKernelResource"); - if (rawValue) { - booleanValue = OSDynamicCast(OSBoolean, rawValue); - if (!booleanValue) { - result = false; - goto finish; - } - is_kernel_resource = booleanValue->isTrue(); - } - - // IOKitPersonalities is a dictionary - OPTIONAL - rawValue = extension->getObject("IOKitPersonalities"); - if (rawValue) { - personalities = OSDynamicCast(OSDictionary, rawValue); - if (!personalities) { - result = false; - goto finish; - } - - keyIterator = OSCollectionIterator::withCollection(personalities); - if (!keyIterator) { - IOLog("Error: Failed to allocate iterator for personalities.\n"); - LOG_DELAY(); - result = false; - goto finish; - } - - while ((key = OSDynamicCast(OSString, keyIterator->getNextObject()))) { - OSDictionary * personality = NULL; // do not release - - // Each personality is a dictionary - personality = OSDynamicCast(OSDictionary, - personalities->getObject(key)); - if (!personality) { - result = false; - goto finish; - } - - // IOClass exists as a string - REQUIRED - if (!OSDynamicCast(OSString, personality->getObject("IOClass"))) { - result = false; - goto finish; - } - - // IOProviderClass exists as a string - REQUIRED - if (!OSDynamicCast(OSString, - personality->getObject("IOProviderClass"))) { - - result = false; - goto finish; - } - - // CFBundleIdentifier is a string - OPTIONAL - INSERT IF ABSENT! - rawValue = personality->getObject("CFBundleIdentifier"); - if (!rawValue) { - personality->setObject("CFBundleIdentifier", bundleIdentifier); - } else { - OSString * personalityID = NULL; // do not release - personalityID = OSDynamicCast(OSString, rawValue); - if (!personalityID) { - result = false; - goto finish; - } else { - // Length of CFBundleIdentifier is not >= KMOD_MAX_NAME - if (personalityID->getLength() >= KMOD_MAX_NAME) { - result = false; - goto finish; - } - } - } - - // IOKitDebug is a number - OPTIONAL - rawValue = personality->getObject("IOKitDebug"); - if (rawValue && !OSDynamicCast(OSNumber, rawValue)) { - result = false; - goto finish; - } - } - - keyIterator->release(); - keyIterator = NULL; - } - - - // OSBundleLibraries is a dictionary - REQUIRED if - // not kernel resource & has executable - // - rawValue = extension->getObject("OSBundleLibraries"); - if (!rawValue && !is_kernel_resource && has_executable) { - result = false; - goto finish; - } - - if (rawValue) { - libraries = OSDynamicCast(OSDictionary, rawValue); - if (!libraries) { - result = false; - goto finish; - } - - keyIterator = OSCollectionIterator::withCollection(libraries); - if (!keyIterator) { - IOLog("Error: Failed to allocate iterator for libraries.\n"); - LOG_DELAY(); - result = false; - goto finish; - } - - while ((key = OSDynamicCast(OSString, - keyIterator->getNextObject()))) { - - OSString * libraryVersion = NULL; // do not release - - // Each key's length is not >= KMOD_MAX_NAME - if (key->getLength() >= KMOD_MAX_NAME) { - result = false; - goto finish; - } - - libraryVersion = OSDynamicCast(OSString, - libraries->getObject(key)); - if (!libraryVersion) { - result = false; - goto finish; - } - - // Each value is a valid version string - vers = VERS_parse_string(libraryVersion->getCStringNoCopy()); - if (vers < 0) { - result = false; - goto finish; - } - } - - keyIterator->release(); - keyIterator = NULL; - } - - // OSBundleRequired, if present, must have a legal value. - // If it is not present and if we are safe-booting, - // then the kext is not eligible. - // - rawValue = extension->getObject("OSBundleRequired"); - if (rawValue) { - stringValue = OSDynamicCast(OSString, rawValue); - if (!stringValue) { - result = false; - goto finish; - } - if (!stringValue->isEqualTo("Root") && - !stringValue->isEqualTo("Local-Root") && - !stringValue->isEqualTo("Network-Root") && - !stringValue->isEqualTo("Safe Boot") && - !stringValue->isEqualTo("Console")) { - - result = false; - goto finish; - } - - } else if (PE_parse_boot_argn("-x", namep, sizeof (namep))) { /* safe boot */ - ineligible_for_safe_boot = true; - result = false; - goto finish; - } - - -finish: - if (keyIterator) keyIterator->release(); - - if (!result) { - if (ineligible_for_safe_boot) { - IOLog(VTYELLOW "Skipping extension \"%s\" during safe boot " - "(no OSBundleRequired property)\n" - VTRESET, - bundleIdentifier->getCStringNoCopy()); - } else if (not_a_dict) { - if (index > -1) { - IOLog(VTYELLOW "mkext entry %d: " VTRESET, index); - } else { - IOLog(VTYELLOW "kernel extension " VTRESET); - } - IOLog(VTYELLOW "info dictionary isn't a dictionary\n" - VTRESET); - } else if (id_missing) { - if (index > -1) { - IOLog(VTYELLOW "mkext entry %d: " VTRESET, index); - } else { - IOLog(VTYELLOW "kernel extension " VTRESET); - } - IOLog(VTYELLOW "\"CFBundleIdentifier\" property is " - "missing or not a string\n" - VTRESET); - } else { - IOLog(VTYELLOW "kernel extension \"%s\": info dictionary is invalid\n" - VTRESET, bundleIdentifier->getCStringNoCopy()); - } - LOG_DELAY(); - } - - return result; -} - - -/********************************************************************* -*********************************************************************/ -OSDictionary * compareExtensionVersions( - OSDictionary * incumbent, - OSDictionary * candidate) { - - OSDictionary * winner = NULL; - - OSDictionary * incumbentPlist = NULL; - OSDictionary * candidatePlist = NULL; - OSString * incumbentName = NULL; - OSString * candidateName = NULL; - OSString * incumbentVersionString = NULL; - OSString * candidateVersionString = NULL; - VERS_version incumbent_vers = 0; - VERS_version candidate_vers = 0; - - incumbentPlist = OSDynamicCast(OSDictionary, - incumbent->getObject("plist")); - candidatePlist = OSDynamicCast(OSDictionary, - candidate->getObject("plist")); - - if (!incumbentPlist || !candidatePlist) { - IOLog("compareExtensionVersions() called with invalid " - "extension dictionaries.\n"); - LOG_DELAY(); - winner = NULL; - goto finish; - } - - incumbentName = OSDynamicCast(OSString, - incumbentPlist->getObject("CFBundleIdentifier")); - candidateName = OSDynamicCast(OSString, - candidatePlist->getObject("CFBundleIdentifier")); - incumbentVersionString = OSDynamicCast(OSString, - incumbentPlist->getObject("CFBundleVersion")); - candidateVersionString = OSDynamicCast(OSString, - candidatePlist->getObject("CFBundleVersion")); - - if (!incumbentName || !candidateName || - !incumbentVersionString || !candidateVersionString) { - - IOLog("compareExtensionVersions() called with invalid " - "extension dictionaries.\n"); - LOG_DELAY(); - winner = NULL; - goto finish; - } - - if (strcmp(incumbentName->getCStringNoCopy(), - candidateName->getCStringNoCopy())) { - - IOLog("compareExtensionVersions() called with different " - "extension names (%s and %s).\n", - incumbentName->getCStringNoCopy(), - candidateName->getCStringNoCopy()); - LOG_DELAY(); - winner = NULL; - goto finish; - } - - incumbent_vers = VERS_parse_string(incumbentVersionString->getCStringNoCopy()); - if (incumbent_vers < 0) { - - IOLog(VTYELLOW "Error parsing version string for extension %s (%s)\n" - VTRESET, - incumbentName->getCStringNoCopy(), - incumbentVersionString->getCStringNoCopy()); - LOG_DELAY(); - winner = NULL; - goto finish; - } - - candidate_vers = VERS_parse_string(candidateVersionString->getCStringNoCopy()); - if (candidate_vers < 0) { - - IOLog(VTYELLOW "Error parsing version string for extension %s (%s)\n" - VTRESET, - candidateName->getCStringNoCopy(), - candidateVersionString->getCStringNoCopy()); - LOG_DELAY(); - winner = NULL; - goto finish; - } - - if (candidate_vers > incumbent_vers) { - IOLog(VTYELLOW "Replacing extension \"%s\" with newer version " - "(%s -> %s).\n" VTRESET, - incumbentName->getCStringNoCopy(), - incumbentVersionString->getCStringNoCopy(), - candidateVersionString->getCStringNoCopy()); - LOG_DELAY(); - winner = candidate; - goto finish; - } else { - IOLog(VTYELLOW "Skipping duplicate extension \"%s\" with older/same " - " version (%s -> %s).\n" VTRESET, - candidateName->getCStringNoCopy(), - candidateVersionString->getCStringNoCopy(), - incumbentVersionString->getCStringNoCopy()); - LOG_DELAY(); - winner = incumbent; - goto finish; - } - -finish: - - // no cleanup, how nice - return winner; -} - - -/********************************************************************* -* This function merges entries in the mergeFrom dictionary into the -* mergeInto dictionary. If it returns false, the two dictionaries are -* not altered. If it returns true, then mergeInto may have new -* entries; any keys that were already present in mergeInto are -* removed from mergeFrom, so that the caller can see what was -* actually merged. -*********************************************************************/ -bool mergeExtensionDictionaries(OSDictionary * mergeInto, - OSDictionary * mergeFrom) { - - bool result = true; - OSDictionary * mergeIntoCopy = NULL; // must release - OSDictionary * mergeFromCopy = NULL; // must release - OSCollectionIterator * keyIterator = NULL; // must release - OSString * key; // don't release - - /* Add 1 to count to guarantee copy can grow (grr). - */ - mergeIntoCopy = OSDictionary::withDictionary(mergeInto, - mergeInto->getCount() + 1); - if (!mergeIntoCopy) { - IOLog("Error: Failed to copy 'into' extensions dictionary " - "for merge.\n"); - LOG_DELAY(); - result = false; - goto finish; - } - - /* Add 1 to count to guarantee copy can grow (grr). - */ - mergeFromCopy = OSDictionary::withDictionary(mergeFrom, - mergeFrom->getCount() + 1); - if (!mergeFromCopy) { - IOLog("Error: Failed to copy 'from' extensions dictionary " - "for merge.\n"); - LOG_DELAY(); - result = false; - goto finish; - } - - keyIterator = OSCollectionIterator::withCollection(mergeFrom); - if (!keyIterator) { - IOLog("Error: Failed to allocate iterator for extensions.\n"); - LOG_DELAY(); - result = false; - goto finish; - } - - - /***** - * Loop through "from" dictionary, checking if the identifier already - * exists in the "into" dictionary and checking versions if it does. - */ - while ((key = OSDynamicCast(OSString, keyIterator->getNextObject()))) { - OSDictionary * incumbentExt = OSDynamicCast(OSDictionary, - mergeIntoCopy->getObject(key)); - OSDictionary * candidateExt = OSDynamicCast(OSDictionary, - mergeFrom->getObject(key)); - - if (!incumbentExt) { - if (!mergeIntoCopy->setObject(key, candidateExt)) { - - /* This is a fatal error, so bail. - */ - IOLog("mergeExtensionDictionaries(): Failed to add " - "identifier %s\n", - key->getCStringNoCopy()); - LOG_DELAY(); - result = false; - goto finish; - } - } else { - OSDictionary * mostRecentExtension = - compareExtensionVersions(incumbentExt, candidateExt); - - if (mostRecentExtension == incumbentExt) { - mergeFromCopy->removeObject(key); - } else if (mostRecentExtension == candidateExt) { - - if (!mergeIntoCopy->setObject(key, candidateExt)) { - - /* This is a fatal error, so bail. - */ - IOLog("mergeExtensionDictionaries(): Failed to add " - "identifier %s\n", - key->getCStringNoCopy()); - LOG_DELAY(); - result = false; - goto finish; - } - } else /* should be NULL */ { - - /* This is a nonfatal error, so continue doing others. - */ - IOLog("mergeExtensionDictionaries(): Error comparing " - "versions of duplicate extensions %s.\n", - key->getCStringNoCopy()); - LOG_DELAY(); - continue; - } - } - } - -finish: - - /* If successful, replace the contents of the original - * dictionaries with those of the modified copies. - */ - if (result) { - mergeInto->flushCollection(); - mergeInto->merge(mergeIntoCopy); - mergeFrom->flushCollection(); - mergeFrom->merge(mergeFromCopy); - } - - if (mergeIntoCopy) mergeIntoCopy->release(); - if (mergeFromCopy) mergeFromCopy->release(); - if (keyIterator) keyIterator->release(); - - return result; -} - - -/**** - * These bits are used to parse data made available by bootx. - */ -#define BOOTX_KEXT_PREFIX "Driver-" -#define BOOTX_MULTIKEXT_PREFIX "DriversPackage-" - -typedef struct MemoryMapFileInfo { - UInt32 paddr; - UInt32 length; -} MemoryMapFileInfo; - -typedef struct BootxDriverInfo { - char *plistAddr; - long plistLength; - void *moduleAddr; - long moduleLength; -} BootxDriverInfo; - -typedef struct MkextEntryInfo { - vm_address_t base_address; - mkext_file * fileinfo; -} MkextEntryInfo; - - -/********************************************************************* -* This private function reads the data for a single extension from -* the bootx memory-map's propery dict, returning a dictionary with -* keys "plist" for the extension's Info.plist as a parsed OSDictionary -* and "code" for the extension's executable code as an OSData. -*********************************************************************/ -OSDictionary * readExtension(OSDictionary * propertyDict, - const char * memory_map_name) { - - int error = 0; - OSData * bootxDriverDataObject = NULL; - OSDictionary * driverPlist = NULL; - OSString * driverName = NULL; - OSData * driverCode = NULL; - OSString * errorString = NULL; - OSDictionary * driverDict = NULL; - - const MemoryMapFileInfo * driverInfo = 0; - BootxDriverInfo * dataBuffer; - - kmod_info_t * loaded_kmod = NULL; - - bootxDriverDataObject = OSDynamicCast(OSData, - propertyDict->getObject(memory_map_name)); - // don't release bootxDriverDataObject - - if (!bootxDriverDataObject) { - IOLog("Error: No driver data object " - "for device tree entry \"%s\".\n", - memory_map_name); - LOG_DELAY(); - error = 1; - goto finish; - } - - driverDict = OSDictionary::withCapacity(2); - if (!driverDict) { - IOLog("Error: Couldn't allocate dictionary " - "for device tree entry \"%s\".\n", memory_map_name); - LOG_DELAY(); - error = 1; - goto finish; - } - - driverInfo = (const MemoryMapFileInfo *) - bootxDriverDataObject->getBytesNoCopy(0, - sizeof(MemoryMapFileInfo)); -#if defined (__ppc__) || defined (__arm__) - dataBuffer = (BootxDriverInfo *)ml_static_ptovirt(driverInfo->paddr); -#elif defined (__i386__) - dataBuffer = (BootxDriverInfo *)ml_boot_ptovirt(driverInfo->paddr); - dataBuffer->plistAddr = (char *)ml_boot_ptovirt((vm_address_t)dataBuffer->plistAddr); - if (dataBuffer->moduleAddr) - dataBuffer->moduleAddr = (void *)ml_boot_ptovirt((vm_address_t)dataBuffer->moduleAddr); -#else -#error unsupported architecture -#endif - if (!dataBuffer) { - IOLog("Error: No data buffer " - "for device tree entry \"%s\".\n", memory_map_name); - LOG_DELAY(); - error = 1; - goto finish; - } - - driverPlist = OSDynamicCast(OSDictionary, - OSUnserializeXML(dataBuffer->plistAddr, &errorString)); - if (!driverPlist) { - IOLog("Error: Couldn't read XML property list " - "for device tree entry \"%s\".\n", memory_map_name); - LOG_DELAY(); - if (errorString) { - IOLog("XML parse error: %s.\n", - errorString->getCStringNoCopy()); - LOG_DELAY(); - } - error = 1; - goto finish; - } - - - driverName = OSDynamicCast(OSString, - driverPlist->getObject("CFBundleIdentifier")); // do not release - if (!driverName) { - IOLog("Error: Device tree entry \"%s\" has " - "no \"CFBundleIdentifier\" property.\n", memory_map_name); - LOG_DELAY(); - error = 1; - goto finish; - } - - /* Check if kmod is already loaded and is a real loadable one (has - * an address). - */ - loaded_kmod = kmod_lookupbyname_locked(driverName->getCStringNoCopy()); - if (loaded_kmod && loaded_kmod->address) { - IOLog("Skipping new extension \"%s\"; an extension named " - "\"%s\" is already loaded.\n", - driverName->getCStringNoCopy(), - loaded_kmod->name); - LOG_DELAY(); - error = 1; - goto finish; - } - - if (!validateExtensionDict(driverPlist, -1)) { - // validateExtensionsDict() logs an error - error = 1; - goto finish; - } - - driverDict->setObject("plist", driverPlist); - - /* It's perfectly okay for a KEXT to have no executable. - * Check that moduleAddr is nonzero before attempting to - * get one. - * - * NOTE: The driverCode object is created "no-copy", so - * it doesn't own that memory. The memory must be freed - * separately from the OSData object (see - * clearStartupExtensionsAndLoaderInfo() at the end of this file). - */ - if (dataBuffer->moduleAddr && dataBuffer->moduleLength) { - driverCode = OSData::withBytesNoCopy(dataBuffer->moduleAddr, - dataBuffer->moduleLength); - if (!driverCode) { - IOLog("Error: Couldn't allocate data object " - "to hold code for device tree entry \"%s\".\n", - memory_map_name); - LOG_DELAY(); - error = 1; - goto finish; - } - - if (driverCode) { - driverDict->setObject("code", driverCode); - } - } - -finish: - - if (loaded_kmod) { - kfree(loaded_kmod, sizeof(kmod_info_t)); - } - - // do not release bootxDriverDataObject - // do not release driverName - - if (driverPlist) { - driverPlist->release(); - } - if (errorString) { - errorString->release(); - } - if (driverCode) { - driverCode->release(); - } - if (error) { - if (driverDict) { - driverDict->release(); - driverDict = NULL; - } - } - return driverDict; -} - - -/********************************************************************* -* Used to uncompress a single file entry in an mkext archive. -* -* The OSData returned does not own its memory! You must deallocate -* that memory using kmem_free() before releasing the OSData(). -*********************************************************************/ -static bool uncompressFile(u_int8_t *base_address, mkext_file * fileinfo, - /* out */ OSData ** file) { - - bool result = true; - kern_return_t kern_result; - u_int8_t * uncompressed_file = 0; // kmem_free() on error - OSData * uncompressedFile = 0; // returned - size_t uncompressed_size = 0; - - size_t offset = OSSwapBigToHostInt32(fileinfo->offset); - size_t compsize = OSSwapBigToHostInt32(fileinfo->compsize); - size_t realsize = OSSwapBigToHostInt32(fileinfo->realsize); - time_t modifiedsecs = OSSwapBigToHostInt32(fileinfo->modifiedsecs); - - *file = 0; - - /* If these four fields are zero there's no file, but that isn't - * an error. - */ - if (offset == 0 && compsize == 0 && - realsize == 0 && modifiedsecs == 0) { - goto finish; - } - - // Add 1 for '\0' to terminate XML string! - kern_result = kmem_alloc(kernel_map, (vm_offset_t *)&uncompressed_file, - realsize + 1); - if (kern_result != KERN_SUCCESS) { - IOLog("Error: Couldn't allocate data buffer " - "to uncompress file.\n"); - LOG_DELAY(); - result = false; - goto finish; - } - - uncompressedFile = OSData::withBytesNoCopy(uncompressed_file, - realsize + 1); - if (!uncompressedFile) { - IOLog("Error: Couldn't allocate data object " - "to uncompress file.\n"); - LOG_DELAY(); - result = false; - goto finish; - } - - if (compsize != 0) { - uncompressed_size = decompress_lzss(uncompressed_file, - base_address + offset, - compsize); - if (uncompressed_size != realsize) { - IOLog("Error: Uncompressed file is not the length " - "recorded.\n"); - LOG_DELAY(); - result = false; - goto finish; - } - uncompressed_file[uncompressed_size] = '\0'; - } else { - bcopy(base_address + offset, uncompressed_file, - realsize); - uncompressed_file[realsize] = '\0'; - } - - *file = uncompressedFile; - -finish: - if (!result) { - if (uncompressed_file) { - kmem_free(kernel_map, (vm_address_t)uncompressed_file, - realsize + 1); - } - if (uncompressedFile) { - uncompressedFile->release(); - *file = 0; - } - } - return result; -} - -bool uncompressModule(OSData *compData, /* out */ OSData ** file) { - - const MkextEntryInfo *info = (const MkextEntryInfo *) compData->getBytesNoCopy(); - - return uncompressFile((u_int8_t *) info->base_address, - info->fileinfo, file); -} - - -/********************************************************************* -* Does the work of pulling extensions out of an mkext archive located -* in memory. -*********************************************************************/ -bool extractExtensionsFromArchive(const MemoryMapFileInfo * mkext_file_info, - bool vaddr, - OSDictionary * extensions) { - - bool result = true; - - u_int8_t * crc_address = 0; - u_int32_t checksum; - mkext_header * mkext_data = 0; // don't free - mkext_kext * onekext_data = 0; // don't free - mkext_file * plist_file = 0; // don't free - mkext_file * module_file = 0; // don't free - kmod_info_t * loaded_kmod = 0; // must free - - OSData * driverPlistDataObject = 0; // must release - OSDictionary * driverPlist = 0; // must release - OSData * driverCode = 0; // must release - OSDictionary * driverDict = 0; // must release - OSString * moduleName = 0; // don't release - OSString * errorString = NULL; // must release - - OSData * moduleInfo = 0; // must release - MkextEntryInfo module_info; - - IORegistryEntry * root; - OSData * checksumObj; - - if (vaddr) { - // addExtensionsFromArchive passes a kernel virtual address - mkext_data = (mkext_header *)mkext_file_info->paddr; - } else { -#if defined (__ppc__) || defined (__arm__) - mkext_data = (mkext_header *)ml_static_ptovirt(mkext_file_info->paddr); -#elif defined (__i386__) - mkext_data = (mkext_header *)ml_boot_ptovirt(mkext_file_info->paddr); -#else -#error unsupported architecture -#endif - } - - if (OSSwapBigToHostInt32(mkext_data->magic) != MKEXT_MAGIC || - OSSwapBigToHostInt32(mkext_data->signature) != MKEXT_SIGN) { - IOLog("Error: Extension archive has invalid magic or signature.\n"); - LOG_DELAY(); - result = false; - goto finish; - } - - if (OSSwapBigToHostInt32(mkext_data->length) != mkext_file_info->length) { - IOLog("Error: Mismatch between extension archive & " - "recorded length.\n"); - LOG_DELAY(); - result = false; - goto finish; - } - - crc_address = (u_int8_t *)&mkext_data->version; - checksum = adler32(crc_address, - (unsigned int)mkext_data + - OSSwapBigToHostInt32(mkext_data->length) - (unsigned int)crc_address); - - if (OSSwapBigToHostInt32(mkext_data->adler32) != checksum) { - IOLog("Error: Extension archive has a bad checksum.\n"); - LOG_DELAY(); - result = false; - goto finish; - } - - root = IORegistryEntry::getRegistryRoot(); - assert(root); - checksumObj = OSData::withBytes((void *)&checksum, - sizeof(checksum)); - assert(checksumObj); - if (checksumObj) { - root->setProperty(kIOStartupMkextCRC, checksumObj); - checksumObj->release(); - } - - /* If the MKEXT archive isn't fat, check that the CPU type & subtype - * match that of the running kernel. - */ - if (OSSwapBigToHostInt32(mkext_data->cputype) != (UInt32)CPU_TYPE_ANY) { - kern_return_t kresult = KERN_FAILURE; - host_basic_info_data_t hostinfo; - host_info_t hostinfo_ptr = (host_info_t)&hostinfo; - mach_msg_type_number_t count = sizeof(hostinfo)/sizeof(integer_t); - - kresult = host_info((host_t)1, HOST_BASIC_INFO, - hostinfo_ptr, &count); - if (kresult != KERN_SUCCESS) { - IOLog("Error: Couldn't get current host info.\n"); - LOG_DELAY(); - result = false; - goto finish; - } - if ((UInt32)hostinfo.cpu_type != - OSSwapBigToHostInt32(mkext_data->cputype)) { - - IOLog("Error: Extension archive doesn't contain software " - "for this computer's CPU type.\n"); - LOG_DELAY(); - result = false; - goto finish; - } - if (!grade_binary(OSSwapBigToHostInt32(mkext_data->cputype), - OSSwapBigToHostInt32(mkext_data->cpusubtype))) { - IOLog("Error: Extension archive doesn't contain software " - "for this computer's CPU subtype.\n"); - LOG_DELAY(); - result = false; - goto finish; - } - } - - for (unsigned int i = 0; - i < OSSwapBigToHostInt32(mkext_data->numkexts); - i++) { - - if (loaded_kmod) { - kfree(loaded_kmod, sizeof(kmod_info_t)); - loaded_kmod = 0; - } - - if (driverPlistDataObject) { - kmem_free(kernel_map, - (unsigned int)driverPlistDataObject->getBytesNoCopy(), - driverPlistDataObject->getLength()); - - driverPlistDataObject->release(); - driverPlistDataObject = NULL; - } - if (driverPlist) { - driverPlist->release(); - driverPlist = NULL; - } - if (driverCode) { - driverCode->release(); - driverCode = NULL; - } - if (driverDict) { - driverDict->release(); - driverDict = NULL; - } - if (errorString) { - errorString->release(); - errorString = NULL; - } - - onekext_data = &mkext_data->kext[i]; - plist_file = &onekext_data->plist; - module_file = &onekext_data->module; - - if (!uncompressFile((u_int8_t *)mkext_data, plist_file, - &driverPlistDataObject)) { - - IOLog("Error: couldn't uncompress plist file " - "from multikext archive entry %d.\n", i); - LOG_DELAY(); - continue; - } - - if (!driverPlistDataObject) { - IOLog("Error: No property list present " - "for multikext archive entry %d.\n", i); - LOG_DELAY(); - continue; - } else { - driverPlist = OSDynamicCast(OSDictionary, - OSUnserializeXML( - (const char *)driverPlistDataObject->getBytesNoCopy(), - &errorString)); - if (!driverPlist) { - IOLog("Error: Couldn't read XML property list " - "for multikext archive entry %d.\n", i); - LOG_DELAY(); - if (errorString) { - IOLog("XML parse error: %s.\n", - errorString->getCStringNoCopy()); - LOG_DELAY(); - } - continue; - } - - if (!validateExtensionDict(driverPlist, i)) { - // validateExtensionsDict() logs an error - continue; - } - - } - - /* Get the extension's module name. This is used to record - * the extension. - */ - moduleName = OSDynamicCast(OSString, - driverPlist->getObject("CFBundleIdentifier")); // do not release - if (!moduleName) { - IOLog("Error: Multikext archive entry %d has " - "no \"CFBundleIdentifier\" property.\n", i); - LOG_DELAY(); - continue; // assume a kext config error & continue - } - - /* Check if kmod is already loaded and is a real loadable one (has - * an address). - */ - loaded_kmod = kmod_lookupbyname_locked(moduleName->getCStringNoCopy()); - if (loaded_kmod && loaded_kmod->address) { - IOLog("Skipping new extension \"%s\"; an extension named " - "\"%s\" is already loaded.\n", - moduleName->getCStringNoCopy(), - loaded_kmod->name); - continue; - } - - - driverDict = OSDictionary::withCapacity(2); - if (!driverDict) { - IOLog("Error: Couldn't allocate dictionary " - "for multikext archive entry %d.\n", i); - LOG_DELAY(); - result = false; - goto finish; - } - - driverDict->setObject("plist", driverPlist); - - /***** - * Prepare an entry to hold the mkext entry info for the - * compressed binary module, if there is one. If all four fields - * of the module entry are zero, there isn't one. - */ - if (!(loaded_kmod && loaded_kmod->address) && (OSSwapBigToHostInt32(module_file->offset) || - OSSwapBigToHostInt32(module_file->compsize) || - OSSwapBigToHostInt32(module_file->realsize) || - OSSwapBigToHostInt32(module_file->modifiedsecs))) { - - moduleInfo = OSData::withCapacity(sizeof(MkextEntryInfo)); - if (!moduleInfo) { - IOLog("Error: Couldn't allocate data object " - "for multikext archive entry %d.\n", i); - LOG_DELAY(); - result = false; - goto finish; - } - - module_info.base_address = (vm_address_t)mkext_data; - module_info.fileinfo = module_file; - - if (!moduleInfo->appendBytes(&module_info, sizeof(module_info))) { - IOLog("Error: Couldn't record info " - "for multikext archive entry %d.\n", i); - LOG_DELAY(); - result = false; - goto finish; - } - - driverDict->setObject("compressedCode", moduleInfo); - } - - OSDictionary * incumbentExt = OSDynamicCast(OSDictionary, - extensions->getObject(moduleName)); - - if (!incumbentExt) { - extensions->setObject(moduleName, driverDict); - } else { - OSDictionary * mostRecentExtension = - compareExtensionVersions(incumbentExt, driverDict); - - if (mostRecentExtension == incumbentExt) { - /* Do nothing, we've got the most recent. */ - } else if (mostRecentExtension == driverDict) { - if (!extensions->setObject(moduleName, driverDict)) { - - /* This is a fatal error, so bail. - */ - IOLog("extractExtensionsFromArchive(): Failed to add " - "identifier %s\n", - moduleName->getCStringNoCopy()); - LOG_DELAY(); - result = false; - goto finish; - } - } else /* should be NULL */ { - - /* This is a nonfatal error, so continue. - */ - IOLog("extractExtensionsFromArchive(): Error comparing " - "versions of duplicate extensions %s.\n", - moduleName->getCStringNoCopy()); - LOG_DELAY(); - continue; - } - } - } - -finish: - - if (loaded_kmod) kfree(loaded_kmod, sizeof(kmod_info_t)); - if (driverPlistDataObject) { - kmem_free(kernel_map, - (unsigned int)driverPlistDataObject->getBytesNoCopy(), - driverPlistDataObject->getLength()); - driverPlistDataObject->release(); - } - if (driverPlist) driverPlist->release(); - if (driverCode) driverCode->release(); - if (moduleInfo) moduleInfo->release(); - if (driverDict) driverDict->release(); - if (errorString) errorString->release(); - - return result; -} - -/********************************************************************* -* -*********************************************************************/ -bool readExtensions(OSDictionary * propertyDict, - const char * memory_map_name, - OSDictionary * extensions) { - - bool result = true; - OSData * mkextDataObject = 0; // don't release - const MemoryMapFileInfo * mkext_file_info = 0; // don't free - - mkextDataObject = OSDynamicCast(OSData, - propertyDict->getObject(memory_map_name)); - // don't release mkextDataObject - - if (!mkextDataObject) { - IOLog("Error: No mkext data object " - "for device tree entry \"%s\".\n", - memory_map_name); - LOG_DELAY(); - result = false; - goto finish; - } - - mkext_file_info = (const MemoryMapFileInfo *)mkextDataObject->getBytesNoCopy(); - if (!mkext_file_info) { - result = false; - goto finish; - } - - result = extractExtensionsFromArchive(mkext_file_info, false /*physical*/, extensions); - -finish: - - if (!result && extensions) { - extensions->flushCollection(); - } - - return result; -} - - -/********************************************************************* -* Adds the personalities for an extensions dictionary to the global -* IOCatalogue. -*********************************************************************/ -bool addPersonalities(OSDictionary * extensions) { - bool result = true; - OSCollectionIterator * keyIterator = NULL; // must release - OSString * key; // don't release - OSDictionary * driverDict = NULL; // don't release - OSDictionary * driverPlist = NULL; // don't release - OSDictionary * thisDriverPersonalities = NULL; // don't release - OSArray * allDriverPersonalities = NULL; // must release - - allDriverPersonalities = OSArray::withCapacity(1); - if (!allDriverPersonalities) { - IOLog("Error: Couldn't allocate personality dictionary.\n"); - LOG_DELAY(); - result = false; - goto finish; - } - - /* Record all personalities found so that they can be - * added to the catalogue. - * Note: Not all extensions have personalities. - */ - - keyIterator = OSCollectionIterator::withCollection(extensions); - if (!keyIterator) { - IOLog("Error: Couldn't allocate iterator to record personalities.\n"); - LOG_DELAY(); - result = false; - goto finish; - } - - while ( ( key = OSDynamicCast(OSString, - keyIterator->getNextObject() ))) { - - driverDict = OSDynamicCast(OSDictionary, - extensions->getObject(key)); - driverPlist = OSDynamicCast(OSDictionary, - driverDict->getObject("plist")); - thisDriverPersonalities = OSDynamicCast(OSDictionary, - driverPlist->getObject("IOKitPersonalities")); - - if (thisDriverPersonalities) { - OSCollectionIterator * pIterator; - OSString * locakKey; - pIterator = OSCollectionIterator::withCollection( - thisDriverPersonalities); - if (!pIterator) { - IOLog("Error: Couldn't allocate iterator " - "to record extension personalities.\n"); - LOG_DELAY(); - continue; - } - while ( (locakKey = OSDynamicCast(OSString, - pIterator->getNextObject())) ) { - - OSDictionary * personality = OSDynamicCast( - OSDictionary, - thisDriverPersonalities->getObject(locakKey)); - if (personality) { - allDriverPersonalities->setObject(personality); - } - } - pIterator->release(); - } - } /* extract personalities */ - - - /* Add all personalities found to the IOCatalogue, - * but don't start matching. - */ - gIOCatalogue->addDrivers(allDriverPersonalities, false); - -finish: - - if (allDriverPersonalities) allDriverPersonalities->release(); - if (keyIterator) keyIterator->release(); - - return result; -} - - -/********************************************************************* -* Called from IOCatalogue to add extensions from an mkext archive. -* This function makes a copy of the mkext object passed in because -* the device tree support code dumps it after calling us (indirectly -* through the IOCatalogue). -*********************************************************************/ -bool addExtensionsFromArchive(OSData * mkextDataObject) { - bool result = true; - - OSDictionary * startupExtensions = NULL; // don't release - OSArray * bootLoaderObjects = NULL; // don't release - OSDictionary * extensions = NULL; // must release - MemoryMapFileInfo mkext_file_info; - OSCollectionIterator * keyIterator = NULL; // must release - OSString * key = NULL; // don't release - - startupExtensions = getStartupExtensions(); - if (!startupExtensions) { - IOLog("Can't record extension archive; there is no" - " extensions dictionary.\n"); - LOG_DELAY(); - result = false; - goto finish; - } - - bootLoaderObjects = getBootLoaderObjects(); - if (! bootLoaderObjects) { - IOLog("Error: Couldn't allocate array to hold temporary objects.\n"); - LOG_DELAY(); - result = false; - goto finish; - } - - extensions = OSDictionary::withCapacity(2); - if (!extensions) { - IOLog("Error: Couldn't allocate dictionary to unpack " - "extension archive.\n"); - LOG_DELAY(); - result = false; - goto finish; - } - - mkext_file_info.paddr = (UInt32)mkextDataObject->getBytesNoCopy(); - mkext_file_info.length = mkextDataObject->getLength(); - - /* Save the local mkext data object so that we can deallocate it later. - */ - bootLoaderObjects->setObject(mkextDataObject); - - result = extractExtensionsFromArchive(&mkext_file_info, true /*virtual*/, extensions); - if (!result) { - IOLog("Error: Failed to extract extensions from archive.\n"); - LOG_DELAY(); - result = false; - goto finish; - } - - result = mergeExtensionDictionaries(startupExtensions, extensions); - if (!result) { - IOLog("Error: Failed to merge new extensions into existing set.\n"); - LOG_DELAY(); - goto finish; - } - - result = addPersonalities(extensions); - if (!result) { - IOLog("Error: Failed to add personalities for extensions extracted " - "from archive.\n"); - LOG_DELAY(); - result = false; - goto finish; - } - -finish: - - if (!result) { - IOLog("Error: Failed to record extensions from archive.\n"); - LOG_DELAY(); - } else { - keyIterator = OSCollectionIterator::withCollection( - extensions); - - if (keyIterator) { - while ( (key = OSDynamicCast(OSString, - keyIterator->getNextObject())) ) { - - IOLog("Added extension \"%s\" from archive.\n", - key->getCStringNoCopy()); - LOG_DELAY(); - } - keyIterator->release(); - } - } - - if (extensions) extensions->release(); - - return result; -} - - -/********************************************************************* -* This function builds dictionaries for the startup extensions -* put into memory by bootx, recording each in the startup extensions -* dictionary. The dictionary format is this: -* -* { -* "plist" = (the extension's Info.plist as an OSDictionary) -* "code" = (an OSData containing the executable file) -* } -* -* This function returns true if any extensions were found and -* recorded successfully, or if there are no start extensions, -* and false if an unrecoverable error occurred. An error reading -* a single extension is not considered fatal, and this function -* will simply skip the problematic extension to try the next one. -*********************************************************************/ - -bool recordStartupExtensions(void) { - bool result = true; - OSDictionary * startupExtensions = NULL; // must release - OSDictionary * existingExtensions = NULL; // don't release - OSDictionary * mkextExtensions = NULL; // must release - IORegistryEntry * bootxMemoryMap = NULL; // must release - OSDictionary * propertyDict = NULL; // must release - OSCollectionIterator * keyIterator = NULL; // must release - OSString * key = NULL; // don't release - - OSDictionary * newDriverDict = NULL; // must release - OSDictionary * driverPlist = NULL; // don't release - - struct section * infosect; - struct section * symsect; - unsigned int prelinkedCount = 0; - - existingExtensions = getStartupExtensions(); - if (!existingExtensions) { - IOLog("Error: There is no dictionary for startup extensions.\n"); - LOG_DELAY(); - result = false; - goto finish; - } - - startupExtensions = OSDictionary::withCapacity(1); - if (!startupExtensions) { - IOLog("Error: Couldn't allocate dictionary " - "to record startup extensions.\n"); - LOG_DELAY(); - result = false; - goto finish; - } - - // -- - // add any prelinked modules as startup extensions - - infosect = getsectbyname("__PRELINK", "__info"); - symsect = getsectbyname("__PRELINK", "__symtab"); - if (infosect && infosect->addr && infosect->size - && symsect && symsect->addr && symsect->size) do - { - gIOPrelinkedModules = OSDynamicCast(OSArray, - OSUnserializeXML((const char *) infosect->addr, NULL)); - - if (!gIOPrelinkedModules) - break; - for( unsigned int idx = 0; - (propertyDict = OSDynamicCast(OSDictionary, gIOPrelinkedModules->getObject(idx))); - idx++) - { - enum { kPrelinkReservedCount = 4 }; - - /* Get the extension's module name. This is used to record - * the extension. Do *not* release the moduleName. - */ - OSString * moduleName = OSDynamicCast(OSString, - propertyDict->getObject("CFBundleIdentifier")); - if (!moduleName) { - IOLog("Error: Prelinked module entry has " - "no \"CFBundleIdentifier\" property.\n"); - LOG_DELAY(); - continue; - } - - /* Add the kext, & its plist. - */ - newDriverDict = OSDictionary::withCapacity(4); - assert(newDriverDict); - newDriverDict->setObject("plist", propertyDict); - startupExtensions->setObject(moduleName, newDriverDict); - newDriverDict->release(); - - /* Add the code if present. - */ - OSData * data = OSDynamicCast(OSData, propertyDict->getObject("OSBundlePrelink")); - if (data) { - if (data->getLength() < (kPrelinkReservedCount * sizeof(UInt32))) { - IOLog("Error: Prelinked module entry has " - "invalid \"OSBundlePrelink\" property.\n"); - LOG_DELAY(); - continue; - } - const UInt32 * prelink; - prelink = (const UInt32 *) data->getBytesNoCopy(); - kmod_info_t * kmod_info = (kmod_info_t *) OSReadBigInt32(prelink, 0); - // end of "file" is end of symbol sect - data = OSData::withBytesNoCopy((void *) kmod_info->address, - symsect->addr + symsect->size - kmod_info->address); - newDriverDict->setObject("code", data); - data->release(); - prelinkedCount++; - continue; - } - /* Add the symbols if present. - */ - OSNumber * num = OSDynamicCast(OSNumber, propertyDict->getObject("OSBundlePrelinkSymbols")); - if (num) { - UInt32 offset = num->unsigned32BitValue(); - data = OSData::withBytesNoCopy((void *) (symsect->addr + offset), symsect->size - offset); - newDriverDict->setObject("code", data); - data->release(); - prelinkedCount++; - continue; - } - } - if (gIOPrelinkedModules) - IOLog("%d prelinked modules\n", prelinkedCount); - - // free __info - vm_offset_t - virt = ml_static_ptovirt(infosect->addr); - if( virt) { - ml_static_mfree(virt, infosect->size); - } - newDriverDict = NULL; - } - while (false); - // -- - - bootxMemoryMap = - IORegistryEntry::fromPath( - "/chosen/memory-map", // path - gIODTPlane // plane - ); - // return value is retained so be sure to release it - - if (!bootxMemoryMap) { - IOLog("Error: Couldn't read booter memory map.\n"); - LOG_DELAY(); - result = false; - goto finish; - } - - propertyDict = bootxMemoryMap->dictionaryWithProperties(); - if (!propertyDict) { - IOLog("Error: Couldn't get property dictionary " - "from memory map.\n"); - LOG_DELAY(); - result = false; - goto finish; - } - - keyIterator = OSCollectionIterator::withCollection(propertyDict); - if (!keyIterator) { - IOLog("Error: Couldn't allocate iterator for driver images.\n"); - LOG_DELAY(); - result = false; - goto finish; - } - - while ( (key = OSDynamicCast(OSString, - keyIterator->getNextObject())) ) { - /* Clear newDriverDict & mkextExtensions upon entry to the loop, - * handling both successful and unsuccessful iterations. - */ - if (newDriverDict) { - newDriverDict->release(); - newDriverDict = NULL; - } - if (mkextExtensions) { - mkextExtensions->release(); - mkextExtensions = NULL; - } - - const char * keyValue = key->getCStringNoCopy(); - - if ( !strncmp(keyValue, BOOTX_KEXT_PREFIX, - strlen(BOOTX_KEXT_PREFIX)) ) { - - /* Read the extension from the bootx-supplied memory. - */ - newDriverDict = readExtension(propertyDict, keyValue); - if (!newDriverDict) { - IOLog("Error: Couldn't read data " - "for device tree entry \"%s\".\n", keyValue); - LOG_DELAY(); - continue; - } - - - /* Preprare to record the extension by getting its info plist. - */ - driverPlist = OSDynamicCast(OSDictionary, - newDriverDict->getObject("plist")); - if (!driverPlist) { - IOLog("Error: Extension in device tree entry \"%s\" " - "has no property list.\n", keyValue); - LOG_DELAY(); - continue; - } - - - /* Get the extension's module name. This is used to record - * the extension. Do *not* release the moduleName. - */ - OSString * moduleName = OSDynamicCast(OSString, - driverPlist->getObject("CFBundleIdentifier")); - if (!moduleName) { - IOLog("Error: Device tree entry \"%s\" has " - "no \"CFBundleIdentifier\" property.\n", keyValue); - LOG_DELAY(); - continue; - } - - - /* All has gone well so far, so record the extension under - * its module name, checking for an existing duplicate. - * - * Do not release moduleName, as it's part of the extension's - * plist. - */ - OSDictionary * incumbentExt = OSDynamicCast(OSDictionary, - startupExtensions->getObject(moduleName)); - - if (!incumbentExt) { - startupExtensions->setObject(moduleName, newDriverDict); - } else { - OSDictionary * mostRecentExtension = - compareExtensionVersions(incumbentExt, newDriverDict); - - if (mostRecentExtension == incumbentExt) { - /* Do nothing, we've got the most recent. */ - } else if (mostRecentExtension == newDriverDict) { - if (!startupExtensions->setObject(moduleName, - newDriverDict)) { - - /* This is a fatal error, so bail. - */ - IOLog("recordStartupExtensions(): Failed to add " - "identifier %s\n", - moduleName->getCStringNoCopy()); - LOG_DELAY(); - result = false; - goto finish; - } - } else /* should be NULL */ { - - /* This is a nonfatal error, so continue. - */ - IOLog("recordStartupExtensions(): Error comparing " - "versions of duplicate extensions %s.\n", - moduleName->getCStringNoCopy()); - LOG_DELAY(); - continue; - } - } - - - } else if ( !strncmp(keyValue, BOOTX_MULTIKEXT_PREFIX, - strlen(BOOTX_MULTIKEXT_PREFIX)) ) { - - mkextExtensions = OSDictionary::withCapacity(10); - if (!mkextExtensions) { - IOLog("Error: Couldn't allocate dictionary to unpack " - "multi-extension archive.\n"); - LOG_DELAY(); - result = false; - goto finish; // allocation failure is fatal for this routine - } - if (!readExtensions(propertyDict, keyValue, mkextExtensions)) { - IOLog("Error: Couldn't unpack multi-extension archive.\n"); - LOG_DELAY(); - continue; - } else { - if (!mergeExtensionDictionaries(startupExtensions, - mkextExtensions)) { - - IOLog("Error: Failed to merge new extensions into " - "existing set.\n"); - LOG_DELAY(); - result = false; - goto finish; // merge error is fatal for this routine - } - } - } - - // Do not release key. - - } /* while ( (key = OSDynamicCast(OSString, ...) ) ) */ - - if (!mergeExtensionDictionaries(existingExtensions, startupExtensions)) { - IOLog("Error: Failed to merge new extensions into existing set.\n"); - LOG_DELAY(); - result = false; - goto finish; - } - - result = addPersonalities(startupExtensions); - if (!result) { - IOLog("Error: Failed to add personalities for extensions extracted " - "from archive.\n"); - LOG_DELAY(); - result = false; - goto finish; - } - -finish: - - // reused so clear first! - if (keyIterator) { - keyIterator->release(); - keyIterator = 0; - } - - if (!result) { - IOLog("Error: Failed to record startup extensions.\n"); - LOG_DELAY(); - } else { -#if DEBUG - keyIterator = OSCollectionIterator::withCollection( - startupExtensions); - - if (keyIterator) { - while ( (key = OSDynamicCast(OSString, - keyIterator->getNextObject())) ) { - - IOLog("Found extension \"%s\".\n", - key->getCStringNoCopy()); - LOG_DELAY(); - } - keyIterator->release(); - keyIterator = 0; - } -#endif /* DEBUG */ - } - - if (newDriverDict) newDriverDict->release(); - if (propertyDict) propertyDict->release(); - if (bootxMemoryMap) bootxMemoryMap->release(); - if (mkextExtensions) mkextExtensions->release(); - if (startupExtensions) startupExtensions->release(); - - return result; -} - - -/********************************************************************* -* This function removes an entry from the dictionary of startup -* extensions. It's used when an extension can't be loaded, for -* whatever reason. For drivers, this allows another matching driver -* to be loaded, so that, for example, a driver for the root device -* can be found. -*********************************************************************/ -void removeStartupExtension(const char * extensionName) { - OSDictionary * startupExtensions = NULL; // don't release - OSDictionary * extensionDict = NULL; // don't release - OSDictionary * extensionPlist = NULL; // don't release - OSDictionary * extensionPersonalities = NULL; // don't release - OSDictionary * personality = NULL; // don't release - OSCollectionIterator * keyIterator = NULL; // must release - OSString * key = NULL; // don't release - - startupExtensions = getStartupExtensions(); - if (!startupExtensions) goto finish; - - - /* Find the extension's entry in the dictionary of - * startup extensions. - */ - extensionDict = OSDynamicCast(OSDictionary, - startupExtensions->getObject(extensionName)); - if (!extensionDict) goto finish; - - extensionPlist = OSDynamicCast(OSDictionary, - extensionDict->getObject("plist")); - if (!extensionPlist) goto finish; - - extensionPersonalities = OSDynamicCast(OSDictionary, - extensionPlist->getObject("IOKitPersonalities")); - if (!extensionPersonalities) goto finish; - - /* If it was there, remove it from the catalogue proper - * by calling removeDrivers(). Pass true for the second - * argument to trigger a new round of matching, and - * then remove the extension from the dictionary of startup - * extensions. - */ - keyIterator = OSCollectionIterator::withCollection( - extensionPersonalities); - if (!keyIterator) { - IOLog("Error: Couldn't allocate iterator to scan" - " personalities for %s.\n", extensionName); - LOG_DELAY(); - } - - while ((key = OSDynamicCast(OSString, keyIterator->getNextObject()))) { - personality = OSDynamicCast(OSDictionary, - extensionPersonalities->getObject(key)); - - - if (personality) { - gIOCatalogue->removeDrivers(personality, true); - } - } - - startupExtensions->removeObject(extensionName); - -finish: - - if (keyIterator) keyIterator->release(); - return; -} - -/********************************************************************* -* FIXME: This function invalidates the globals gStartupExtensions and -* FIXME: ...gBootLoaderObjects without setting them to NULL. Since -* FIXME: ...the code itself is immediately unloaded, there may not be -* FIXME: ...any reason to worry about that! -*********************************************************************/ -void clearStartupExtensionsAndLoaderInfo(void) -{ - OSDictionary * startupExtensions = NULL; // must release - OSArray * bootLoaderObjects = NULL; // must release - - IORegistryEntry * bootxMemoryMap = NULL; // must release - OSDictionary * propertyDict = NULL; // must release - OSCollectionIterator * keyIterator = NULL; // must release - OSString * key = NULL; // don't release - - /***** - * Drop any temporarily held data objects. - */ - bootLoaderObjects = getBootLoaderObjects(); - if (bootLoaderObjects) { - bootLoaderObjects->release(); - } - - /**** - * If any "code" entries in driver dictionaries are accompanied - * by "compressedCode" entries, then those data objects were - * created based of of kmem_alloc()'ed memory, which must be - * freed specially. - */ - startupExtensions = getStartupExtensions(); - if (startupExtensions) { - keyIterator = - OSCollectionIterator::withCollection(startupExtensions); - if (!keyIterator) { - IOLog("Error: Couldn't allocate iterator for startup " - "extensions.\n"); - LOG_DELAY(); - goto memory_map; // bail to the memory_map label - } - - while ( (key = OSDynamicCast(OSString, - keyIterator->getNextObject())) ) { - - OSDictionary * driverDict = 0; - OSData * codeData = 0; - - driverDict = OSDynamicCast(OSDictionary, - startupExtensions->getObject(key)); - if (driverDict) { - codeData = OSDynamicCast(OSData, - driverDict->getObject("code")); - - if (codeData && - driverDict->getObject("compressedCode")) { - - kmem_free(kernel_map, - (unsigned int)codeData->getBytesNoCopy(), - codeData->getLength()); - } - } - } - - keyIterator->release(); - startupExtensions->release(); - } - -memory_map: - - /**** - * Go through the device tree's memory map and remove any driver - * data entries. - */ - bootxMemoryMap = - IORegistryEntry::fromPath( - "/chosen/memory-map", // path - gIODTPlane // plane - ); - // return value is retained so be sure to release it - - if (!bootxMemoryMap) { - IOLog("Error: Couldn't read booter memory map.\n"); - LOG_DELAY(); - goto finish; - } - - propertyDict = bootxMemoryMap->dictionaryWithProperties(); - if (!propertyDict) { - IOLog("Error: Couldn't get property dictionary " - "from memory map.\n"); - LOG_DELAY(); - goto finish; - } - - keyIterator = OSCollectionIterator::withCollection(propertyDict); - if (!keyIterator) { - IOLog("Error: Couldn't allocate iterator for driver images.\n"); - LOG_DELAY(); - goto finish; - } - - while ( (key = OSDynamicCast(OSString, - keyIterator->getNextObject())) ) { - - const char * keyValue = key->getCStringNoCopy(); - - if ( !strncmp(keyValue, BOOTX_KEXT_PREFIX, - strlen(BOOTX_KEXT_PREFIX)) || - !strncmp(keyValue, BOOTX_MULTIKEXT_PREFIX, - strlen(BOOTX_MULTIKEXT_PREFIX)) ) { - - OSData * bootxDriverDataObject = NULL; - const MemoryMapFileInfo * driverInfo = 0; - - bootxDriverDataObject = OSDynamicCast(OSData, - propertyDict->getObject(keyValue)); - // don't release bootxDriverDataObject - - if (!bootxDriverDataObject) { - continue; - } - driverInfo = (const MemoryMapFileInfo *) - bootxDriverDataObject->getBytesNoCopy(0, - sizeof(MemoryMapFileInfo)); - IODTFreeLoaderInfo((char *)keyValue, - (void *)driverInfo->paddr, - (int)driverInfo->length); - } - } - -finish: - if (bootxMemoryMap) bootxMemoryMap->release(); - if (propertyDict) propertyDict->release(); - if (keyIterator) keyIterator->release(); - - return; -} diff --git a/libsa/conf/MASTER b/libsa/conf/MASTER index a8ce6d996..c2690d207 100644 --- a/libsa/conf/MASTER +++ b/libsa/conf/MASTER @@ -55,17 +55,13 @@ options KDEBUG # kernel tracing # options GPROF # kernel profiling # options CONFIG_NOLIBKLD # kernel linker # -makeoptions LIBKLD_PATH = "/usr/local/lib/libkld.a" # -makeoptions LIBKLD = " " # -makeoptions LIBKLD = "${LIBKLD_PATH}" # options MALLOC_RESET_GC # -options MALLOC_KLD_VM_ALLOCATE # -# Use mach_vm_* calls for libsa kernel code, since we redefine vm_* for libkld -options vm_allocate = mach_vm_allocate # -options vm_deallocate = mach_vm_deallocate # options CONFIG_DTRACE # options CONFIG_NO_PANIC_STRINGS # options CONFIG_NO_PRINTF_STRINGS # options CONFIG_NO_KPRINTF_STRINGS # + +options CONFIG_KXLD # kxld/runtime linking of kexts # + diff --git a/libsa/conf/MASTER.i386 b/libsa/conf/MASTER.i386 index 383d26f5a..66fe402b0 100644 --- a/libsa/conf/MASTER.i386 +++ b/libsa/conf/MASTER.i386 @@ -1,9 +1,10 @@ ###################################################################### # -# RELEASE = [ intel mach libkerncpp config_dtrace ] +# RELEASE = [ intel mach libkerncpp config_dtrace config_kxld ] # PROFILE = [ RELEASE profile ] # DEBUG = [ RELEASE debug ] # +# # EMBEDDED = [ intel mach libkerncpp ] # DEVELOPMENT = [ EMBEDDED config_dtrace ] # @@ -11,3 +12,5 @@ machine "i386" # cpu "i386" # + +options NO_NESTED_PMAP # diff --git a/libsa/conf/MASTER.ppc b/libsa/conf/MASTER.ppc index 9995642ed..5119c4062 100644 --- a/libsa/conf/MASTER.ppc +++ b/libsa/conf/MASTER.ppc @@ -4,7 +4,7 @@ # Standard Apple MacOS X Configurations: # -------- ---- -------- --------------- # -# RELEASE = [ppc mach libkerncpp config_dtrace] +# RELEASE = [ppc mach libkerncpp config_dtrace config_kxld ] # DEVELOPMENT = [ RELEASE ] # PROFILE = [RELEASE profile] # DEBUG = [ppc mach libkerncpp debug] diff --git a/libsa/conf/MASTER.x86_64 b/libsa/conf/MASTER.x86_64 new file mode 100644 index 000000000..68218d47d --- /dev/null +++ b/libsa/conf/MASTER.x86_64 @@ -0,0 +1,16 @@ +###################################################################### +# +# RELEASE = [ intel mach libkerncpp config_dtrace config_kxld ] +# PROFILE = [ RELEASE profile ] +# DEBUG = [ RELEASE debug ] +# +# +# EMBEDDED = [ intel mach libkerncpp ] +# DEVELOPMENT = [ EMBEDDED ] +# +###################################################################### + +machine "x86_64" # +cpu "x86_64" # + +options NO_NESTED_PMAP # diff --git a/libsa/conf/Makefile b/libsa/conf/Makefile index c7a74f71e..f2daf7618 100644 --- a/libsa/conf/Makefile +++ b/libsa/conf/Makefile @@ -35,7 +35,6 @@ $(COMPOBJROOT)/$(LIBSA_KERNEL_CONFIG)/Makefile : $(SOURCE)/MASTER \ $(SOURCE)/files.$(ARCH_CONFIG_LC) \ $(COMPOBJROOT)/doconf $(_v)(doconf_target=$(addsuffix /conf, $(TARGET)); \ - echo $${doconf_target};\ $(MKDIR) $${doconf_target}; \ cd $${doconf_target}; \ rm -f $(notdir $?); \ diff --git a/libsa/conf/Makefile.i386 b/libsa/conf/Makefile.i386 index 56a4eff64..3695a666c 100644 --- a/libsa/conf/Makefile.i386 +++ b/libsa/conf/Makefile.i386 @@ -2,34 +2,6 @@ #BEGIN Machine dependent Makefile fragment for i386 ###################################################################### -# Enable -Werror for i386 builds -CFLAGS+= $(WERROR) -CWARNFLAGS= $(filter-out -Wbad-function-cast, $(CWARNFLAGS_STD)) - -# Objects that don't compile cleanly in libsa: -OBJS_NO_WERROR= \ - ioconf.o \ - UNDRequest.o \ - bootstrap.cpo \ - bsearch.o \ - c++rem3.o \ - catalogue.cpo \ - dgraph.o \ - kext.cpo \ - kld_patch.o \ - load.o \ - mach.o \ - malloc.o \ - misc.o \ - sort.o \ - strrchr.o \ - strstr.o \ - vers_rsrc.o - -OBJS_WERROR=$(filter-out $(OBJS_NO_WERROR),$(OBJS)) - -$(OBJS_WERROR): WERROR=-Werror - ###################################################################### #END Machine dependent Makefile fragment for i386 ###################################################################### diff --git a/libsa/conf/Makefile.ppc b/libsa/conf/Makefile.ppc index 2b438f2fa..cd79f229a 100644 --- a/libsa/conf/Makefile.ppc +++ b/libsa/conf/Makefile.ppc @@ -2,7 +2,6 @@ #BEGIN Machine dependent Makefile fragment for ppc ###################################################################### - ###################################################################### #END Machine dependent Makefile fragment for ppc ###################################################################### diff --git a/libsa/conf/Makefile.template b/libsa/conf/Makefile.template index 9d966e91a..a975da2a5 100644 --- a/libsa/conf/Makefile.template +++ b/libsa/conf/Makefile.template @@ -11,7 +11,7 @@ export IDENT # -# XXX: INCFLAGS +# INCFLAGS # INCFLAGS_MAKEFILE= @@ -25,12 +25,10 @@ include $(MakeInc_cmd) include $(MakeInc_def) # -# XXX: CFLAGS +# CFLAGS # -CFLAGS+= -imacros meta_features.h -DKERNEL -DLIBSA_KERNEL_PRIVATE \ - -Wall -Wno-four-char-constants -fno-common $(CFLAGS_INLINE_CONFIG) - -SFLAGS+= -DKERNEL +CFLAGS+= -imacros meta_features.h -DLIBSA_KERNEL_PRIVATE \ + -Werror $(CFLAGS_INLINE_CONFIG) # # Directories for mig generated files @@ -77,10 +75,14 @@ ${OBJS}: ${OBJSDEPS} LDOBJS = $(OBJS) $(COMPONENT).o: $(LDOBJS) - @echo LD $(COMPONENT) - $(_v)$(LD) $(LDFLAGS_COMPONENT) -o $(COMPONENT).o ${LDOBJS} $(LIBKLD) - $(_v)$(SEG_HACK) __KLD $(COMPONENT).o -o $(COMPONENT)_kld.o - $(_v)$(LD) $(LDFLAGS_COMPONENT) $(COMPONENT)_kld.o -o $(COMPONENT).o + $(_v)for kld_file in ${LDOBJS}; do \ + $(SEG_HACK) __KLD $${kld_file} -o $${kld_file}__; \ + mv $${kld_file}__ $${kld_file} ; \ + done; + @echo LDFILELIST $(COMPONENT) + $(_v)( for obj in ${LDOBJS}; do \ + echo $(TARGET)$(COMP_OBJ_DIR)/$(KERNEL_CONFIG)/$${obj}; \ + done; ) > $(COMPONENT).o do_all: $(COMPONENT).o diff --git a/libsa/conf/Makefile.x86_64 b/libsa/conf/Makefile.x86_64 new file mode 100644 index 000000000..7b0de925d --- /dev/null +++ b/libsa/conf/Makefile.x86_64 @@ -0,0 +1,7 @@ +###################################################################### +#BEGIN Machine dependent Makefile fragment for x86_64 +###################################################################### + +###################################################################### +#END Machine dependent Makefile fragment for x86_64 +###################################################################### diff --git a/libsa/conf/files b/libsa/conf/files index 9aa04c95b..425949662 100644 --- a/libsa/conf/files +++ b/libsa/conf/files @@ -8,20 +8,3 @@ OPTIONS/config_dtrace optional config_dtrace # libsa libsa/bootstrap.cpp standard -libsa/catalogue.cpp standard -libsa/c++rem3.c standard -libsa/dgraph.c standard -libsa/kld_patch.c standard -libsa/kext.cpp standard -libsa/load.c standard -libsa/mach.c standard -libsa/misc.c standard -libsa/mkext.c standard -libsa/vers_rsrc.c standard - -# pseudo libc -libsa/bsearch.c standard -libsa/malloc.c standard -libsa/sort.c standard -libsa/strrchr.c standard -libsa/strstr.c standard diff --git a/libsa/conf/files.i386 b/libsa/conf/files.i386 index a5f24a43f..e69de29bb 100644 --- a/libsa/conf/files.i386 +++ b/libsa/conf/files.i386 @@ -1 +0,0 @@ -libsa/i386/setjmp.s standard diff --git a/libsa/conf/files.ppc b/libsa/conf/files.ppc index 79390ba2e..8b1378917 100644 --- a/libsa/conf/files.ppc +++ b/libsa/conf/files.ppc @@ -1,2 +1 @@ -libsa/ppc/setjmp.s standard diff --git a/libsa/conf/files.x86_64 b/libsa/conf/files.x86_64 new file mode 100644 index 000000000..e69de29bb diff --git a/libsa/dgraph.c b/libsa/dgraph.c deleted file mode 100644 index 03ecb3889..000000000 --- a/libsa/dgraph.c +++ /dev/null @@ -1,787 +0,0 @@ -/* - * Copyright (c) 2004-2007 Apple Inc. All rights reserved. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ - * - * This file contains Original Code and/or Modifications of Original Code - * as defined in and that are subject to the Apple Public Source License - * Version 2.0 (the 'License'). You may not use this file except in - * compliance with the License. The rights granted to you under the License - * may not be used to create, or enable the creation or redistribution of, - * unlawful or unlicensed copies of an Apple operating system, or to - * circumvent, violate, or enable the circumvention or violation of, any - * terms of an Apple operating system software license agreement. - * - * Please obtain a copy of the License at - * http://www.opensource.apple.com/apsl/ and read it before using this file. - * - * The Original Code and all software distributed under the License are - * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER - * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, - * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. - * Please see the License for the specific language governing rights and - * limitations under the License. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ - */ - -#ifdef KERNEL -#include -#else -#include -#include -#include -#include -#include -#include - -#include "KXKext.h" -#include "vers_rsrc.h" -#endif /* KERNEL */ - -#include "dgraph.h" -#include "load.h" - -#ifdef KERNEL -#include -#endif - -static void __dgraph_entry_free(dgraph_entry_t * entry); - -#ifdef KERNEL -#define dgstrdup(string) STRDUP(string, M_TEMP) -#define dgfree(string) FREE(string, M_TEMP) -#else -#define dgstrdup strdup -#define dgfree(string) free(string) -#endif /* KERNEL */ - -/******************************************************************************* -* -*******************************************************************************/ -dgraph_error_t dgraph_init(dgraph_t * dgraph) -{ - bzero(dgraph, sizeof(dgraph_t)); - - dgraph->capacity = (5); // pulled from a hat - - /* Make sure list is big enough & graph has a good start size. - */ - dgraph->graph = (dgraph_entry_t **)malloc( - dgraph->capacity * sizeof(dgraph_entry_t *)); - - if (!dgraph->graph) { - return dgraph_error; - } - - return dgraph_valid; -} - -#ifndef KERNEL -/******************************************************************************* -* -*******************************************************************************/ -dgraph_error_t dgraph_init_with_arglist( - dgraph_t * dgraph, - int expect_addresses, - const char * dependency_delimiter, - const char * kernel_dependency_delimiter, - int argc, - char * argv[]) -{ - dgraph_error_t result = dgraph_valid; - unsigned int i; - int found_zero_load_address = 0; - int found_nonzero_load_address = 0; - dgraph_entry_t * current_dependent = NULL; - char kernel_dependencies = 0; - - result = dgraph_init(dgraph); - if (result != dgraph_valid) { - return result; - } - - for (i = 0; i < argc; i++) { - vm_address_t load_address = 0; - - if (0 == strcmp(argv[i], dependency_delimiter)) { - kernel_dependencies = 0; - current_dependent = NULL; - continue; - } else if (0 == strcmp(argv[i], kernel_dependency_delimiter)) { - kernel_dependencies = 1; - current_dependent = NULL; - continue; - } - - if (expect_addresses) { - char * address = rindex(argv[i], '@'); - if (address) { - *address++ = 0; // snip the address from the filename - load_address = strtoul(address, NULL, 0); - } - } - - if (!current_dependent) { - current_dependent = dgraph_add_dependent(dgraph, argv[i], - /* expected kmod name */ NULL, /* expected vers */ 0, - load_address, 0); - if (!current_dependent) { - return dgraph_error; - } - } else { - if (!dgraph_add_dependency(dgraph, current_dependent, argv[i], - /* expected kmod name */ NULL, /* expected vers */ 0, - load_address, kernel_dependencies)) { - - return dgraph_error; - } - } - } - - dgraph->root = dgraph_find_root(dgraph); - dgraph_establish_load_order(dgraph); - - if (!dgraph->root) { - kload_log_error("dependency graph has no root" KNL); - return dgraph_invalid; - } - - if (dgraph->root->is_kernel_component && !dgraph->root->is_symbol_set) { - kload_log_error("dependency graph root is a kernel component" KNL); - return dgraph_invalid; - } - - for (i = 0; i < dgraph->length; i++) { - if (dgraph->graph[i]->loaded_address == 0) { - found_zero_load_address = 1; - } else { - found_nonzero_load_address = 1; - } - if ( (i > 0) && - (found_zero_load_address && found_nonzero_load_address)) { - - kload_log_error( - "load addresses must be specified for all module files" KNL); - return dgraph_invalid; - } - } - - return dgraph_valid; -} -#endif /* not KERNEL */ - -/******************************************************************************* -* -*******************************************************************************/ -static void __dgraph_entry_free(dgraph_entry_t * entry) -{ - if (entry->name) { - dgfree(entry->name); - entry->name = NULL; - } - if (entry->expected_kmod_name) { - dgfree(entry->expected_kmod_name); - entry->expected_kmod_name = NULL; - } - if (entry->expected_kmod_vers) { - dgfree(entry->expected_kmod_vers); - entry->expected_kmod_vers = NULL; - } - if (entry->dependencies) { - free(entry->dependencies); - entry->dependencies = NULL; - } - if (entry->symbols_malloc) { - free((void *) entry->symbols_malloc); - entry->symbols_malloc = 0; - } - free(entry); - return; -} - -/******************************************************************************* -* -*******************************************************************************/ -void dgraph_free( - dgraph_t * dgraph, - int free_graph) -{ - unsigned int entry_index; - - if (!dgraph) { - return; - } - - for (entry_index = 0; entry_index < dgraph->length; entry_index++) { - dgraph_entry_t * current = dgraph->graph[entry_index]; - __dgraph_entry_free(current); - } - - if (dgraph->graph) { - free(dgraph->graph); - dgraph->graph = NULL; - } - - if (dgraph->load_order) { - free(dgraph->load_order); - dgraph->load_order = NULL; - } - - if (free_graph && dgraph) { - free(dgraph); - } - - return; -} - - -/******************************************************************************* -* -*******************************************************************************/ -dgraph_entry_t * dgraph_find_root(dgraph_t * dgraph) { - dgraph_entry_t * root = NULL; - dgraph_entry_t * candidate = NULL; - unsigned int candidate_index; - unsigned int scan_index; - unsigned int dep_index; - - - /* Scan each entry in the graph for one that isn't in any other entry's - * dependencies. - */ - for (candidate_index = 0; candidate_index < dgraph->length; - candidate_index++) { - - candidate = dgraph->graph[candidate_index]; - - for (scan_index = 0; scan_index < dgraph->length; scan_index++) { - - dgraph_entry_t * scan_entry = dgraph->graph[scan_index]; - if (candidate == scan_entry) { - // don't check yourself - continue; - } - for (dep_index = 0; dep_index < scan_entry->num_dependencies; - dep_index++) { - - /* If the dependency being checked is the candidate, - * then the candidate can't be the root. - */ - dgraph_entry_t * check = scan_entry->dependencies[dep_index]; - - if (check == candidate) { - candidate = NULL; - break; - } - } - - /* If the candidate was rejected, then hop out of this loop. - */ - if (!candidate) { - break; - } - } - - /* If we got here, the candidate is a valid one. However, if we already - * found another, that means we have two possible roots (or more), which - * is NOT ALLOWED. - */ - if (candidate) { - if (root) { - kload_log_error("dependency graph has multiple roots " - "(%s and %s)" KNL, root->name, candidate->name); - return NULL; // two valid roots, illegal - } else { - root = candidate; - } - } - } - - if (!root) { - kload_log_error("dependency graph has no root node" KNL); - } - - return root; -} - -/******************************************************************************* -* -*******************************************************************************/ -dgraph_entry_t ** fill_backward_load_order( - dgraph_entry_t ** backward_load_order, - unsigned int * list_length, - dgraph_entry_t * first_entry, - unsigned int * last_index /* out param */) -{ - unsigned int i; - unsigned int scan_index = 0; - unsigned int add_index = 0; - dgraph_entry_t * scan_entry; - - if (*list_length == 0) { - if (backward_load_order) { - free(backward_load_order); - backward_load_order = NULL; - } - goto finish; - } - - backward_load_order[add_index++] = first_entry; - - while (scan_index < add_index) { - - if (add_index > 255) { - kload_log_error( - "dependency list for %s ridiculously long; probably a loop" KNL, - first_entry->name); - if (backward_load_order) { - free(backward_load_order); - backward_load_order = NULL; - } - goto finish; - } - - scan_entry = backward_load_order[scan_index++]; - - /* Increase the load order list if needed. - */ - if (add_index + scan_entry->num_dependencies > (*list_length)) { - (*list_length) *= 2; - backward_load_order = (dgraph_entry_t **)realloc( - backward_load_order, - (*list_length) * sizeof(dgraph_entry_t *)); - if (!backward_load_order) { - goto finish; - } - } - - /* Put the dependencies of the scanning entry into the list. - */ - for (i = 0; i < scan_entry->num_dependencies; i++) { - backward_load_order[add_index++] = - scan_entry->dependencies[i]; - } - } - -finish: - - if (last_index) { - *last_index = add_index; - } - return backward_load_order; -} - -/******************************************************************************* -* -*******************************************************************************/ -int dgraph_establish_load_order(dgraph_t * dgraph) { - unsigned int total_dependencies; - unsigned int entry_index; - unsigned int list_index; - unsigned int backward_index; - unsigned int forward_index; - size_t load_order_size; - size_t backward_load_order_size; - dgraph_entry_t ** backward_load_order; - - /* Lose the old load_order list. Size can change, so it's easier to just - * recreate from scratch. - */ - if (dgraph->load_order) { - free(dgraph->load_order); - dgraph->load_order = NULL; - } - - /* Figure how long the list needs to be to accommodate the max possible - * entries from the graph. Duplicates get weeded out, but the list - * initially has to accommodate them all. - */ - total_dependencies = dgraph->length; - - for (entry_index = 0; entry_index < dgraph->length; entry_index ++) { - dgraph_entry_t * curdep = dgraph->graph[entry_index]; - total_dependencies += curdep->num_dependencies; - } - - /* Hmm, nothing to do! - */ - if (!total_dependencies) { - return 1; - } - - backward_load_order_size = total_dependencies * sizeof(dgraph_entry_t *); - - backward_load_order = (dgraph_entry_t **)malloc(backward_load_order_size); - if (!backward_load_order) { - kload_log_error("malloc failure" KNL); - return 0; - } - bzero(backward_load_order, backward_load_order_size); - - backward_load_order = fill_backward_load_order(backward_load_order, - &total_dependencies, dgraph->root, &list_index); - if (!backward_load_order) { - kload_log_error("error establishing load order" KNL); - return 0; - } - - load_order_size = dgraph->length * sizeof(dgraph_entry_t *); - dgraph->load_order = (dgraph_entry_t **)malloc(load_order_size); - if (!dgraph->load_order) { - kload_log_error("malloc failure" KNL); - return 0; - } - bzero(dgraph->load_order, load_order_size); - - - /* Reverse the list into the dgraph's load_order list, - * removing any duplicates. - */ - backward_index = list_index; - // - // the required 1 is taken off in loop below! - - forward_index = 0; - do { - dgraph_entry_t * current_entry; - unsigned int already_got_it = 0; - - backward_index--; - - /* Get the entry to check. - */ - current_entry = backward_load_order[backward_index]; - - /* Did we already get it? - */ - for (list_index = 0; list_index < forward_index; list_index++) { - if (current_entry == dgraph->load_order[list_index]) { - already_got_it = 1; - break; - } - } - - if (already_got_it) { - continue; - } - - /* Haven't seen it before; tack it onto the load-order list. - */ - dgraph->load_order[forward_index++] = current_entry; - - } while (backward_index > 0); - - free(backward_load_order); - - return 1; -} - -/******************************************************************************* -* -*******************************************************************************/ -void dgraph_log(dgraph_t * depgraph) -{ - unsigned int i, j; - - kload_log_message("flattened dependency list: " KNL); - for (i = 0; i < depgraph->length; i++) { - dgraph_entry_t * current = depgraph->graph[i]; - - kload_log_message(" %s" KNL, current->name); - kload_log_message(" is kernel component: %s" KNL, - current->is_kernel_component ? "yes" : "no"); - kload_log_message(" expected kmod name: [%s]" KNL, - current->expected_kmod_name); - kload_log_message(" expected kmod vers: [%s]" KNL, - current->expected_kmod_vers); - } - kload_log_message("" KNL); - - kload_log_message("load order dependency list: " KNL); - for (i = 0; i < depgraph->length; i++) { - dgraph_entry_t * current = depgraph->load_order[i]; - kload_log_message(" %s" KNL, current->name); - } - kload_log_message("" KNL); - - kload_log_message("dependency graph: " KNL); - for (i = 0; i < depgraph->length; i++) { - dgraph_entry_t * current = depgraph->graph[i]; - for (j = 0; j < current->num_dependencies; j++) { - dgraph_entry_t * cdep = current->dependencies[j]; - kload_log_message(" %s -> %s" KNL, current->name, cdep->name); - } - } - kload_log_message("" KNL); - - return; -} - -/******************************************************************************* -* -*******************************************************************************/ -dgraph_entry_t * dgraph_find_dependent(dgraph_t * dgraph, const char * name) -{ - unsigned int i; - - for (i = 0; i < dgraph->length; i++) { - dgraph_entry_t * current_entry = dgraph->graph[i]; - if (0 == strcmp(name, current_entry->name)) { - return current_entry; - } - } - - return NULL; -} - -/******************************************************************************* -* -*******************************************************************************/ -dgraph_entry_t * dgraph_add_dependent( - dgraph_t * dgraph, - const char * name, -#ifdef KERNEL - void * object, - size_t object_length, - bool object_is_kmem, -#if CONFIG_MACF_KEXT - kmod_args_t user_data, - mach_msg_type_number_t user_data_length, -#endif -#endif /* KERNEL */ - const char * expected_kmod_name, - const char * expected_kmod_vers, - vm_address_t load_address, - char is_kernel_component) -{ - int error = 0; - dgraph_entry_t * found_entry = NULL; - dgraph_entry_t * new_entry = NULL; // free on error - dgraph_entry_t * the_entry = NULL; // returned - - /* Already got it? Great! - */ - found_entry = dgraph_find_dependent(dgraph, name); - if (found_entry) { - if (found_entry->is_kernel_component != is_kernel_component) { - kload_log_error( - "%s is already defined as a %skernel component" KNL, - name, found_entry->is_kernel_component ? "" : "non-"); - error = 1; - goto finish; - } - - if (load_address != 0) { - if (found_entry->loaded_address == 0) { - found_entry->do_load = 0; - found_entry->loaded_address = load_address; - } else if (found_entry->loaded_address != load_address) { - kload_log_error( - "%s has been assigned two different addresses (0x%x, 0x%x) KNL", - found_entry->name, - found_entry->loaded_address, - load_address); - error = 1; - goto finish; - } - } - the_entry = found_entry; - goto finish; - } - - /* If the graph is full, make it bigger. - */ - if (dgraph->length == dgraph->capacity) { - unsigned int old_capacity = dgraph->capacity; - dgraph_entry_t ** newgraph; - - dgraph->capacity *= 2; - newgraph = (dgraph_entry_t **)malloc(dgraph->capacity * - sizeof(dgraph_entry_t *)); - if (!newgraph) { - return NULL; - } - memcpy(newgraph, dgraph->graph, old_capacity * sizeof(dgraph_entry_t *)); - free(dgraph->graph); - dgraph->graph = newgraph; - } - - if (strlen(expected_kmod_name) > KMOD_MAX_NAME - 1) { - kload_log_error("expected kmod name \"%s\" is too long" KNL, - expected_kmod_name); - error = 1; - goto finish; - } - - /* Fill it. - */ - new_entry = (dgraph_entry_t *)malloc(sizeof(dgraph_entry_t)); - if (!new_entry) { - error = 1; - goto finish; - } - bzero(new_entry, sizeof(dgraph_entry_t)); - new_entry->expected_kmod_name = dgstrdup(expected_kmod_name); - if (!new_entry->expected_kmod_name) { - error = 1; - goto finish; - } - new_entry->expected_kmod_vers = dgstrdup(expected_kmod_vers); - if (!new_entry->expected_kmod_vers) { - error = 1; - goto finish; - } - new_entry->is_kernel_component = is_kernel_component; - - // /hacks - new_entry->is_symbol_set = (2 & is_kernel_component); - - new_entry->opaques = 0; - if (!strncmp(new_entry->expected_kmod_name, - "com.apple.kpi", strlen("com.apple.kpi"))) - new_entry->opaques |= kOpaqueLink; - if (!strcmp(new_entry->expected_kmod_name, - "com.apple.kernel")) - new_entry->opaques |= kOpaqueLink | kRawKernelLink; - // hacks/ - - dgraph->has_symbol_sets |= new_entry->is_symbol_set; - - new_entry->do_load = !is_kernel_component; - -#ifndef KERNEL - new_entry->object = NULL; // provided elswehere in userland - new_entry->object_length = 0; -#else - new_entry->object = object; - new_entry->object_length = object_length; - new_entry->object_is_kmem = object_is_kmem; -#if CONFIG_MACF_KEXT - new_entry->user_data = user_data; - new_entry->user_data_length = user_data_length; -#endif -#endif /* KERNEL */ - new_entry->name = dgstrdup(name); - if (!new_entry->name) { - error = 1; - goto finish; - } - dgraph->graph[dgraph->length++] = new_entry; - - - /* Create a dependency list for the entry. Start with 5 slots. - */ - new_entry->dependencies_capacity = 5; - new_entry->num_dependencies = 0; - new_entry->dependencies = (dgraph_entry_t **)malloc( - new_entry->dependencies_capacity * sizeof(dgraph_entry_t *)); - if (!new_entry->dependencies) { - error = 1; - goto finish; - } - - if (new_entry->loaded_address == 0) { - new_entry->loaded_address = load_address; - if (load_address != 0) { - new_entry->do_load = 0; - } - } - - the_entry = new_entry; - -finish: - if (error) { - if (new_entry) __dgraph_entry_free(new_entry); - the_entry = new_entry = NULL; - } - return the_entry; -} - -/******************************************************************************* -* -*******************************************************************************/ -dgraph_entry_t * dgraph_add_dependency( - dgraph_t * dgraph, - dgraph_entry_t * current_dependent, - const char * name, -#ifdef KERNEL - void * object, - size_t object_length, - bool object_is_kmem, -#if CONFIG_MACF_KEXT - kmod_args_t user_data, - mach_msg_type_number_t user_data_length, -#endif -#endif /* KERNEL */ - const char * expected_kmod_name, - const char * expected_kmod_vers, - vm_address_t load_address, - char is_kernel_component) -{ - dgraph_entry_t * dependency = NULL; - unsigned int i = 0; - - /* If the dependent's dependency list is full, make it bigger. - */ - if (current_dependent->num_dependencies == - current_dependent->dependencies_capacity) { - - unsigned int old_capacity = current_dependent->dependencies_capacity; - dgraph_entry_t ** newlist; - - current_dependent->dependencies_capacity *= 2; - newlist = (dgraph_entry_t **)malloc( - (current_dependent->dependencies_capacity * - sizeof(dgraph_entry_t *)) ); - - if (!newlist) { - return NULL; - } - memcpy(newlist, current_dependent->dependencies, - old_capacity * sizeof(dgraph_entry_t *)); - free(current_dependent->dependencies); - current_dependent->dependencies = newlist; - } - - - /* Find or add the entry for the new dependency. - */ - dependency = dgraph_add_dependent(dgraph, name, -#ifdef KERNEL - object, object_length, object_is_kmem, -#if CONFIG_MACF_KEXT - user_data, user_data_length, -#endif -#endif /* KERNEL */ - expected_kmod_name, expected_kmod_vers, load_address, - is_kernel_component); - if (!dependency) { - return NULL; - } - - if (dependency == current_dependent) { - kload_log_error("attempt to set dependency on itself: %s" KNL, - current_dependent->name); - return NULL; - } - - for (i = 0; i < current_dependent->num_dependencies; i++) { - dgraph_entry_t * this_dependency = current_dependent->dependencies[i]; - if (this_dependency == dependency) { - return dependency; - } - } - - /* Fill in the dependency. - */ - current_dependent->dependencies[current_dependent->num_dependencies] = - dependency; - current_dependent->num_dependencies++; - - current_dependent->opaque_link |= dependency->opaques; - dgraph->has_opaque_links |= current_dependent->opaque_link; - - return dependency; -} diff --git a/libsa/dgraph.h b/libsa/dgraph.h deleted file mode 100644 index 9d7340e2e..000000000 --- a/libsa/dgraph.h +++ /dev/null @@ -1,226 +0,0 @@ -/* - * Copyright (c) 2007 Apple Inc. All rights reserved. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ - * - * This file contains Original Code and/or Modifications of Original Code - * as defined in and that are subject to the Apple Public Source License - * Version 2.0 (the 'License'). You may not use this file except in - * compliance with the License. The rights granted to you under the License - * may not be used to create, or enable the creation or redistribution of, - * unlawful or unlicensed copies of an Apple operating system, or to - * circumvent, violate, or enable the circumvention or violation of, any - * terms of an Apple operating system software license agreement. - * - * Please obtain a copy of the License at - * http://www.opensource.apple.com/apsl/ and read it before using this file. - * - * The Original Code and all software distributed under the License are - * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER - * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, - * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. - * Please see the License for the specific language governing rights and - * limitations under the License. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ - */ -/* - * NOTICE: This file was modified by SPARTA, Inc. in 2005 to introduce - * support for mandatory and extensible security protections. This notice - * is included in support of clause 2.2 (b) of the Apple Public License, - * Version 2.0. - */ -#ifndef __DGRAPH_H__ -#define __DGRAPH_H__ - -#ifdef __cplusplus -extern "C" { -#endif - -#ifdef KERNEL -#include -#include -#else -#include -#include -#endif /* KERNEL */ - -typedef struct dgraph_entry_t { - - char is_kernel_component; // means that name is a CFBundleIdentifier!!! - char is_symbol_set; - char opaques; - char opaque_link; - - // What we have to start from - char * name; // filename if user space, bundleid if kernel or kernel comp. - - void * object; // In kernel we keep track of the object file - size_t object_length; // we don't own this, however; it's just a ref -#ifdef KERNEL - bool object_is_kmem; // Only used when mapping a file! -#endif /* KERNEL */ - - /* If is_kernel_component is true then the do_load field is cleared and - * the kmod_id field gets set. - */ - - // Immediate dependencies of this entry - unsigned int dependencies_capacity; - unsigned int num_dependencies; - struct dgraph_entry_t ** dependencies; - - // These are filled in when the entry is created, and are written into - // the kmod linked image at load time. - char * expected_kmod_name; - char * expected_kmod_vers; - - bool is_mapped; // kld_file_map() has been called for this entry - - // For tracking already-loaded kmods or for doing symbol generation only - int do_load; // actually loading - vm_address_t loaded_address; // address loaded at or being faked at for symbol generation -#ifndef KERNEL - char * link_output_file; - bool link_output_file_alloc; -#endif - struct mach_header * linked_image; - vm_size_t linked_image_length; - - vm_address_t symbols; - vm_size_t symbols_length; - vm_address_t symbols_malloc; - - // for loading into kernel - vm_address_t kernel_alloc_address; - unsigned long kernel_alloc_size; - vm_address_t kernel_load_address; - unsigned long kernel_load_size; - unsigned long kernel_hdr_size; - unsigned long kernel_hdr_pad; - int need_cleanup; // true if load failed with kernel memory allocated - kmod_t kmod_id; // the id assigned by the kernel to a loaded kmod - -#if CONFIG_MACF_KEXT - // module-specific data from the plist - kmod_args_t user_data; - mach_msg_type_number_t user_data_length; -#endif - -} dgraph_entry_t; - -typedef struct { - unsigned int capacity; - unsigned int length; - dgraph_entry_t ** graph; - dgraph_entry_t ** load_order; - dgraph_entry_t * root; - char have_loaded_symbols; - char has_symbol_sets; - char has_opaque_links; - vm_address_t opaque_base_image; - vm_size_t opaque_base_length; -} dgraph_t; - -typedef enum { - dgraph_error = -1, - dgraph_invalid = 0, - dgraph_valid = 1 -} dgraph_error_t; - - -enum { kOpaqueLink = 0x01, kRawKernelLink = 0x02 }; - -dgraph_error_t dgraph_init(dgraph_t * dgraph); - -#ifndef KERNEL -/********** - * Initialize a dependency graph passed in. Returns nonzero on success, zero - * on failure. - * - * dependency_graph: a pointer to the dgraph to initialize. - * argc: the number of arguments in argv - * argv: an array of strings defining the dependency graph. This is a - * series of dependency lists, delimited by "-d" (except before - * the first list, naturally). Each list has as its first entry - * the dependent, followed by any number of DIRECT dependencies. - * The lists may be given in any order, but the first item in each - * list must be the dependent. Also, there can only be one root - * item (an item with no dependents upon it), and it must not be - * a kernel component. - */ -dgraph_error_t dgraph_init_with_arglist( - dgraph_t * dgraph, - int expect_addresses, - const char * dependency_delimiter, - const char * kernel_dependency_delimiter, - int argc, - char * argv[]); -#endif /* not KERNEL */ - -void dgraph_free( - dgraph_t * dgraph, - int free_graph); - -dgraph_entry_t * dgraph_find_root(dgraph_t * dgraph); - -int dgraph_establish_load_order(dgraph_t * dgraph); - -#ifndef KERNEL -void dgraph_print(dgraph_t * dgraph); -#endif /* not kernel */ -void dgraph_log(dgraph_t * depgraph); - - -/***** - * These functions are useful for hand-building a dgraph. - */ -dgraph_entry_t * dgraph_find_dependent(dgraph_t * dgraph, const char * name); - -dgraph_entry_t * dgraph_add_dependent( - dgraph_t * dgraph, - const char * name, -#ifdef KERNEL - void * object, - size_t object_length, - bool object_is_kmem, -#if CONFIG_MACF_KEXT - kmod_args_t user_data, - mach_msg_type_number_t user_data_length, -#endif -#endif /* KERNEL */ - const char * expected_kmod_name, - const char * expected_kmod_vers, - vm_address_t load_address, - char is_kernel_component); - -dgraph_entry_t * dgraph_add_dependency( - dgraph_t * dgraph, - dgraph_entry_t * current_dependent, - const char * name, -#ifdef KERNEL - void * object, - size_t object_length, - bool object_is_kmem, -#if CONFIG_MACF_KEXT - kmod_args_t user_data, - mach_msg_type_number_t user_data_length, -#endif -#endif /* KERNEL */ - const char * expected_kmod_name, - const char * expected_kmod_vers, - vm_address_t load_address, - char is_kernel_component); - -dgraph_entry_t ** fill_backward_load_order( - dgraph_entry_t ** backward_load_order, - unsigned int * list_length, - dgraph_entry_t * first_entry, - unsigned int * last_index /* out param */); - -#ifdef __cplusplus -} -#endif - -#endif /* __DGRAPH_H__ */ diff --git a/libsa/i386/setjmp.s b/libsa/i386/setjmp.s deleted file mode 100644 index cb52668e3..000000000 --- a/libsa/i386/setjmp.s +++ /dev/null @@ -1,100 +0,0 @@ -/* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ - * - * This file contains Original Code and/or Modifications of Original Code - * as defined in and that are subject to the Apple Public Source License - * Version 2.0 (the 'License'). You may not use this file except in - * compliance with the License. The rights granted to you under the License - * may not be used to create, or enable the creation or redistribution of, - * unlawful or unlicensed copies of an Apple operating system, or to - * circumvent, violate, or enable the circumvention or violation of, any - * terms of an Apple operating system software license agreement. - * - * Please obtain a copy of the License at - * http://www.opensource.apple.com/apsl/ and read it before using this file. - * - * The Original Code and all software distributed under the License are - * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER - * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, - * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. - * Please see the License for the specific language governing rights and - * limitations under the License. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ - */ -/* - * @OSF_COPYRIGHT@ - */ -/* - * Mach Operating System - * Copyright (c) 1991,1990,1989 Carnegie Mellon University - * All Rights Reserved. - * - * Permission to use, copy, modify and distribute this software and its - * documentation is hereby granted, provided that both the copyright - * notice and this permission notice appear in all copies of the - * software, derivative works or modified versions, and any portions - * thereof, and that both notices appear in supporting documentation. - * - * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" - * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR - * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. - * - * Carnegie Mellon requests users of this software to return to - * - * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU - * School of Computer Science - * Carnegie Mellon University - * Pittsburgh PA 15213-3890 - * - * any improvements or extensions that they make and grant Carnegie Mellon - * the rights to redistribute these changes. - */ -/* - */ - -/* - * C library -- _setjmp, _longjmp - * - * _longjmp(a,v) - * will generate a "return(v)" from - * the last call to - * _setjmp(a) - * by restoring registers from the stack, - * The previous signal state is NOT restored. - * - */ - -#include - -.private_extern _setjmp -.private_extern _longjmp - -ENTRY(setjmp) - movl 4(%esp),%ecx # fetch buffer - movl %ebx,0(%ecx) - movl %esi,4(%ecx) - movl %edi,8(%ecx) - movl %ebp,12(%ecx) # save frame pointer of caller - popl %edx - movl %esp,16(%ecx) # save stack pointer of caller - movl %edx,20(%ecx) # save pc of caller - xorl %eax,%eax - jmp *%edx - - -ENTRY(longjmp) - movl 8(%esp),%eax # return(v) - movl 4(%esp),%ecx # fetch buffer - movl 0(%ecx),%ebx - movl 4(%ecx),%esi - movl 8(%ecx),%edi - movl 12(%ecx),%ebp - movl 16(%ecx),%esp - orl %eax,%eax - jnz 0f - incl %eax -0: jmp *20(%ecx) # done, return.... diff --git a/libsa/kext.cpp b/libsa/kext.cpp deleted file mode 100644 index 48ef63d16..000000000 --- a/libsa/kext.cpp +++ /dev/null @@ -1,889 +0,0 @@ -/* - * Copyright (c) 2000-2007 Apple Inc. All rights reserved. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ - * - * This file contains Original Code and/or Modifications of Original Code - * as defined in and that are subject to the Apple Public Source License - * Version 2.0 (the 'License'). You may not use this file except in - * compliance with the License. The rights granted to you under the License - * may not be used to create, or enable the creation or redistribution of, - * unlawful or unlicensed copies of an Apple operating system, or to - * circumvent, violate, or enable the circumvention or violation of, any - * terms of an Apple operating system software license agreement. - * - * Please obtain a copy of the License at - * http://www.opensource.apple.com/apsl/ and read it before using this file. - * - * The Original Code and all software distributed under the License are - * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER - * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, - * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. - * Please see the License for the specific language governing rights and - * limitations under the License. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ - */ - -/* - * NOTICE: This file was modified by SPARTA, Inc. in 2005 to introduce - * support for mandatory and extensible security protections. This notice - * is included in support of clause 2.2 (b) of the Apple Public License, - * Version 2.0. - */ - -#include -#include -#include -#include -#include - -extern "C" { -#include -#include -#include -#include -#include -#include -#include -#include - -#include "kld_patch.h" -#include "dgraph.h" -#include "load.h" -}; - - -extern "C" { -extern kern_return_t -kmod_create_internal( - kmod_info_t *info, - kmod_t *id); - -extern kern_return_t -kmod_destroy_internal(kmod_t id); - -extern kern_return_t -kmod_start_or_stop( - kmod_t id, - int start, - kmod_args_t *data, - mach_msg_type_number_t *dataCount); - -extern kern_return_t kmod_retain(kmod_t id); -extern kern_return_t kmod_release(kmod_t id); - -extern Boolean kmod_load_request(const char * moduleName, Boolean make_request); -}; - -extern kmod_args_t -get_module_data(OSDictionary * kextPlist, mach_msg_type_number_t * datalen); - -extern struct mac_module_data *osdict_encode(OSDictionary *dict); - -#define DEBUG -#ifdef DEBUG -#define LOG_DELAY(x) IODelay((x) * 1000000) -#define VTYELLOW "\033[33m" -#define VTRESET "\033[0m" -#else -#define LOG_DELAY(x) -#define VTYELLOW -#define VTRESET -#endif /* DEBUG */ - - -#define KERNEL_PREFIX "com.apple.kernel" -#define KPI_PREFIX "com.apple.kpi" - - -/********************************************************************* -* -*********************************************************************/ -static -bool getKext( - const char * bundleid, - OSDictionary ** plist, - unsigned char ** code, - unsigned long * code_size, - bool * caller_owns_code) -{ - bool result = true; - OSDictionary * extensionsDict; // don't release - OSDictionary * extDict; // don't release - OSDictionary * extPlist; // don't release - unsigned long code_size_local; - - /* Get the dictionary of startup extensions. - * This is keyed by module name. - */ - extensionsDict = getStartupExtensions(); - if (!extensionsDict) { - IOLog("startup extensions dictionary is missing\n"); - result = false; - goto finish; - } - - /* Get the requested extension's dictionary entry and its property - * list, containing module dependencies. - */ - extDict = OSDynamicCast(OSDictionary, - extensionsDict->getObject(bundleid)); - - if (!extDict) { - IOLog("extension \"%s\" cannot be found\n", - bundleid); - result = false; - goto finish; - } - - if (plist) { - extPlist = OSDynamicCast(OSDictionary, extDict->getObject("plist")); - if (!extPlist) { - IOLog("extension \"%s\" has no info dictionary\n", - bundleid); - result = false; - goto finish; - } - *plist = extPlist; - } - - if (code) { - - /* If asking for code, the caller must provide a return buffer - * for ownership! - */ - if (!caller_owns_code) { - IOLog("getKext(): invalid usage (caller_owns_code not provided)\n"); - result = false; - goto finish; - } - - *code = 0; - if (code_size) { - *code_size = 0; - } - *caller_owns_code = false; - - *code = (unsigned char *)kld_file_getaddr(bundleid, - (unsigned long *)&code_size_local); - if (*code) { - if (code_size) { - *code_size = code_size_local; - } - } else { - OSData * driverCode = 0; // release only if uncompressing! - - driverCode = OSDynamicCast(OSData, extDict->getObject("code")); - if (driverCode) { - *code = (unsigned char *)driverCode->getBytesNoCopy(); - if (code_size) { - *code_size = driverCode->getLength(); - } - } else { // Look for compressed code and uncompress it - OSData * compressedCode = 0; - compressedCode = OSDynamicCast(OSData, - extDict->getObject("compressedCode")); - if (compressedCode) { - if (!uncompressModule(compressedCode, &driverCode)) { - IOLog("extension \"%s\": couldn't uncompress code\n", - bundleid); - result = false; - goto finish; - } - *caller_owns_code = true; - *code = (unsigned char *)driverCode->getBytesNoCopy(); - if (code_size) { - *code_size = driverCode->getLength(); - } - driverCode->release(); - } - } - } - } - -finish: - - return result; -} - - -/********************************************************************* -* -*********************************************************************/ -static -bool verifyCompatibility(OSString * extName, OSString * requiredVersion) -{ - OSDictionary * extPlist; // don't release - OSString * extVersion; // don't release - OSString * extCompatVersion; // don't release - VERS_version ext_version; - VERS_version ext_compat_version; - VERS_version required_version; - - if (!getKext(extName->getCStringNoCopy(), &extPlist, NULL, NULL, NULL)) { - return false; - } - - extVersion = OSDynamicCast(OSString, - extPlist->getObject("CFBundleVersion")); - if (!extVersion) { - IOLog("verifyCompatibility(): " - "Extension \"%s\" has no \"CFBundleVersion\" property.\n", - extName->getCStringNoCopy()); - return false; - } - - extCompatVersion = OSDynamicCast(OSString, - extPlist->getObject("OSBundleCompatibleVersion")); - if (!extCompatVersion) { - IOLog("verifyCompatibility(): " - "Extension \"%s\" has no \"OSBundleCompatibleVersion\" property.\n", - extName->getCStringNoCopy()); - return false; - } - - required_version = VERS_parse_string(requiredVersion->getCStringNoCopy()); - if (required_version < 0) { - IOLog("verifyCompatibility(): " - "Can't parse required version \"%s\" of dependency %s.\n", - requiredVersion->getCStringNoCopy(), - extName->getCStringNoCopy()); - return false; - } - ext_version = VERS_parse_string(extVersion->getCStringNoCopy()); - if (ext_version < 0) { - IOLog("verifyCompatibility(): " - "Can't parse version \"%s\" of dependency %s.\n", - extVersion->getCStringNoCopy(), - extName->getCStringNoCopy()); - return false; - } - ext_compat_version = VERS_parse_string(extCompatVersion->getCStringNoCopy()); - if (ext_compat_version < 0) { - IOLog("verifyCompatibility(): " - "Can't parse compatible version \"%s\" of dependency %s.\n", - extCompatVersion->getCStringNoCopy(), - extName->getCStringNoCopy()); - return false; - } - - if (required_version > ext_version || required_version < ext_compat_version) { - return false; - } - - return true; -} - -/********************************************************************* -*********************************************************************/ -static -bool kextIsDependency(const char * kext_name, char * is_kernel) { - bool result = true; - OSDictionary * extensionsDict = 0; // don't release - OSDictionary * extDict = 0; // don't release - OSDictionary * extPlist = 0; // don't release - OSBoolean * isKernelResourceObj = 0; // don't release - OSData * driverCode = 0; // don't release - OSData * compressedCode = 0; // don't release - - if (is_kernel) { - *is_kernel = 0; - } - - /* Get the dictionary of startup extensions. - * This is keyed by module name. - */ - extensionsDict = getStartupExtensions(); - if (!extensionsDict) { - IOLog("startup extensions dictionary is missing\n"); - result = false; - goto finish; - } - - /* Get the requested extension's dictionary entry and its property - * list, containing module dependencies. - */ - extDict = OSDynamicCast(OSDictionary, - extensionsDict->getObject(kext_name)); - - if (!extDict) { - IOLog("extension \"%s\" cannot be found\n", - kext_name); - result = false; - goto finish; - } - - extPlist = OSDynamicCast(OSDictionary, extDict->getObject("plist")); - if (!extPlist) { - IOLog("extension \"%s\" has no info dictionary\n", - kext_name); - result = false; - goto finish; - } - - /* A kext that is a kernel component is still a dependency, as there - * are fake kmod entries for them. - */ - isKernelResourceObj = OSDynamicCast(OSBoolean, - extPlist->getObject("OSKernelResource")); - if (isKernelResourceObj && isKernelResourceObj->isTrue()) { - if (is_kernel) { - *is_kernel = 1; - } - } - - driverCode = OSDynamicCast(OSData, extDict->getObject("code")); - compressedCode = OSDynamicCast(OSData, - extDict->getObject("compressedCode")); - - /* A kernel component that has code represents a KPI. - */ - if ((driverCode || compressedCode) && is_kernel && *is_kernel) { - *is_kernel = 2; - } - - if (!driverCode && !compressedCode && !isKernelResourceObj) { - result = false; - goto finish; - } - -finish: - - return result; -} - -/********************************************************************* -*********************************************************************/ -static bool -addDependenciesForKext(OSDictionary * kextPlist, - OSArray * dependencyList, - OSString * trueParent, - Boolean skipKernelDependencies) -{ - bool result = true; - bool hasDirectKernelDependency = false; - bool hasKernelStyleDependency = false; - bool hasKPIStyleDependency = false; - OSString * kextName = 0; // don't release - OSDictionary * libraries = 0; // don't release - OSCollectionIterator * keyIterator = 0; // must release - OSString * libraryName = 0; // don't release - OSString * dependentName = 0; // don't release - - kextName = OSDynamicCast(OSString, - kextPlist->getObject("CFBundleIdentifier")); - if (!kextName) { - // XXX: Add log message - result = false; - goto finish; - } - - libraries = OSDynamicCast(OSDictionary, - kextPlist->getObject("OSBundleLibraries")); - if (!libraries) { - result = true; - goto finish; - } - - keyIterator = OSCollectionIterator::withCollection(libraries); - if (!keyIterator) { - // XXX: Add log message - result = false; - goto finish; - } - - dependentName = trueParent ? trueParent : kextName; - - while ( (libraryName = OSDynamicCast(OSString, - keyIterator->getNextObject())) ) { - - OSString * libraryVersion = OSDynamicCast(OSString, - libraries->getObject(libraryName)); - if (!libraryVersion) { - // XXX: Add log message - result = false; - goto finish; - } - if (!verifyCompatibility(libraryName, libraryVersion)) { - result = false; - goto finish; - } else { - char is_kernel_component; - - if (!kextIsDependency(libraryName->getCStringNoCopy(), - &is_kernel_component)) { - - is_kernel_component = 0; - } - - if (!skipKernelDependencies || !is_kernel_component) { - dependencyList->setObject(dependentName); - dependencyList->setObject(libraryName); - } - if (!hasDirectKernelDependency && is_kernel_component) { - hasDirectKernelDependency = true; - } - - /* We already know from the kextIsDependency() call whether - * the dependency *itself* is kernel- or KPI-style, but since - * the declaration semantic is by bundle ID, we check that here - * instead. - */ - if (strncmp(libraryName->getCStringNoCopy(), - KERNEL_PREFIX, strlen(KERNEL_PREFIX)) == 0) { - - hasKernelStyleDependency = true; - - } else if (strncmp(libraryName->getCStringNoCopy(), - KPI_PREFIX, strlen(KPI_PREFIX)) == 0) { - - hasKPIStyleDependency = true; - } - } - } - - if (!hasDirectKernelDependency) { - const OSSymbol * kernelName = 0; - - /* a kext without any kernel dependency is assumed dependent on 6.0 */ - dependencyList->setObject(dependentName); - - kernelName = OSSymbol::withCString("com.apple.kernel.libkern"); - if (!kernelName) { - // XXX: Add log message - result = false; - goto finish; - } - dependencyList->setObject(kernelName); - kernelName->release(); - - IOLog("Extension \"%s\" has no explicit kernel dependency; using version 6.0.\n", - kextName->getCStringNoCopy()); - - } else if (hasKernelStyleDependency && hasKPIStyleDependency) { - IOLog("Extension \"%s\" has immediate dependencies " - "on both com.apple.kernel and com.apple.kpi components; use only one style.\n", - kextName->getCStringNoCopy()); - } - -finish: - if (keyIterator) keyIterator->release(); - return result; -} - -/********************************************************************* -*********************************************************************/ -static -bool getVersionForKext(OSDictionary * kextPlist, char ** version) -{ - OSString * kextName = 0; // don't release - OSString * kextVersion; // don't release - - kextName = OSDynamicCast(OSString, - kextPlist->getObject("CFBundleIdentifier")); - if (!kextName) { - // XXX: Add log message - return false; - } - - kextVersion = OSDynamicCast(OSString, - kextPlist->getObject("CFBundleVersion")); - if (!kextVersion) { - IOLog("getVersionForKext(): " - "Extension \"%s\" has no \"CFBundleVersion\" property.\n", - kextName->getCStringNoCopy()); - return false; - } - - if (version) { - *version = (char *)kextVersion->getCStringNoCopy(); - } - - return true; -} - -/********************************************************************* -*********************************************************************/ -static -bool add_dependencies_for_kmod(const char * kmod_name, dgraph_t * dgraph) -{ - bool result = true; - OSDictionary * kextPlist = 0; // don't release - unsigned int index = 0; - OSArray * dependencyList = 0; // must release - unsigned char * code = 0; - unsigned long code_length = 0; - bool code_is_kmem = false; - char * kmod_vers = 0; // from plist, don't free - char is_kernel_component = 0; - dgraph_entry_t * dgraph_entry = 0; // don't free - dgraph_entry_t * dgraph_dependency = 0; // don't free - bool kext_is_dependency = true; - -#if CONFIG_MACF_KEXT - kmod_args_t user_data = 0; - mach_msg_type_number_t user_data_length; -#endif - - /***** - * Set up the root kmod. - */ - if (!getKext(kmod_name, &kextPlist, &code, &code_length, - &code_is_kmem)) { - IOLog("can't find extension %s\n", kmod_name); - result = false; - goto finish; - } - - if (!kextIsDependency(kmod_name, &is_kernel_component)) { - IOLog("extension %s is not loadable\n", kmod_name); - result = false; - goto finish; - } - - if (!getVersionForKext(kextPlist, &kmod_vers)) { - IOLog("can't get version for extension %s\n", kmod_name); - result = false; - goto finish; - } - -#if CONFIG_MACF_KEXT - // check kext for module data in the plist - user_data = get_module_data(kextPlist, &user_data_length); -#endif - - dgraph_entry = dgraph_add_dependent(dgraph, kmod_name, - code, code_length, code_is_kmem, -#if CONFIG_MACF_KEXT - user_data, user_data_length, -#endif - kmod_name, kmod_vers, - 0 /* load_address not yet known */, is_kernel_component); - if (!dgraph_entry) { - IOLog("can't record %s in dependency graph\n", kmod_name); - result = false; - // kmem_alloc()ed code is freed in finish: block. - goto finish; - } - - // pass ownership of code to kld patcher - if (code) { - if (kload_map_entry(dgraph_entry) != kload_error_none) { - IOLog("can't map %s in preparation for loading\n", kmod_name); - result = false; - // kmem_alloc()ed code is freed in finish: block. - goto finish; - } - } - // clear local record of code - code = 0; - code_length = 0; - code_is_kmem = false; - - /***** - * Now handle all the dependencies. - */ - dependencyList = OSArray::withCapacity(5); - if (!dependencyList) { - IOLog("memory allocation failure\n"); - result = false; - goto finish; - } - - index = 0; - if (!addDependenciesForKext(kextPlist, dependencyList, NULL, false)) { - IOLog("can't determine immediate dependencies for extension %s\n", - kmod_name); - result = false; - goto finish; - } - - /* IMPORTANT: loop condition gets list count every time through, as the - * array CAN change each iteration. - */ - for (index = 0; index < dependencyList->getCount(); index += 2) { - OSString * dependentName = 0; - OSString * libraryName = 0; - const char * dependent_name = 0; - const char * library_name = 0; - - /* 255 is an arbitrary limit. Multiplied by 2 because the dependency - * list is stocked with pairs (dependent -> dependency). - */ - if (index > (2 * 255)) { - IOLog("extension dependency graph ridiculously long, indicating a loop\n"); - result = false; - goto finish; - } - - dependentName = OSDynamicCast(OSString, - dependencyList->getObject(index)); - libraryName = OSDynamicCast(OSString, - dependencyList->getObject(index + 1)); - - if (!dependentName || !libraryName) { - IOLog("malformed dependency list\n"); - result = false; - goto finish; - } - - dependent_name = dependentName->getCStringNoCopy(); - library_name = libraryName->getCStringNoCopy(); - - if (!getKext(library_name, &kextPlist, NULL, NULL, NULL)) { - - IOLog("can't find extension %s\n", library_name); - result = false; - goto finish; - } - - OSString * string = OSDynamicCast(OSString, - kextPlist->getObject("OSBundleSharedExecutableIdentifier")); - if (string) { - library_name = string->getCStringNoCopy(); - if (!getKext(library_name, &kextPlist, NULL, NULL, NULL)) { - IOLog("can't find extension %s\n", library_name); - result = false; - goto finish; - } - } - - kext_is_dependency = kextIsDependency(library_name, - &is_kernel_component); - - if (kext_is_dependency) { - dgraph_entry = dgraph_find_dependent(dgraph, dependent_name); - if (!dgraph_entry) { - IOLog("internal error with dependency graph\n"); - LOG_DELAY(1); - result = false; - goto finish; - } - - if (!getVersionForKext(kextPlist, &kmod_vers)) { - IOLog("can't get version for extension %s\n", library_name); - result = false; - goto finish; - } - - /* It's okay for code to be zero, as for a pseudokext - * representing a kernel component. - */ - if (!getKext(library_name, NULL /* already got it */, - &code, &code_length, &code_is_kmem)) { - IOLog("can't find extension %s\n", library_name); - result = false; - goto finish; - } - -#if CONFIG_MACF_KEXT - // check kext for module data in the plist - // XXX - is this really needed? - user_data = get_module_data(kextPlist, &user_data_length); -#endif - dgraph_dependency = dgraph_add_dependency(dgraph, dgraph_entry, - library_name, code, code_length, code_is_kmem, -#if CONFIG_MACF_KEXT - user_data, user_data_length, -#endif - library_name, kmod_vers, - 0 /* load_address not yet known */, is_kernel_component); - - if (!dgraph_dependency) { - IOLog("can't record dependency %s -> %s\n", dependent_name, - library_name); - result = false; - // kmem_alloc()ed code is freed in finish: block. - goto finish; - } - - // pass ownership of code to kld patcher - if (code) { - if (kload_map_entry(dgraph_dependency) != kload_error_none) { - IOLog("can't map %s in preparation for loading\n", library_name); - result = false; - // kmem_alloc()ed code is freed in finish: block. - goto finish; - } - } - // clear local record of code - code = 0; - code_length = 0; - code_is_kmem = false; - } - - /* Now put the library's dependencies onto the pending set. - */ - if (!addDependenciesForKext(kextPlist, dependencyList, - kext_is_dependency ? NULL : dependentName, !kext_is_dependency)) { - - IOLog("can't determine immediate dependencies for extension %s\n", - library_name); - result = false; - goto finish; - } - } - -finish: - if (code && code_is_kmem) { - kmem_free(kernel_map, (unsigned int)code, code_length); - } - if (dependencyList) dependencyList->release(); - -#if CONFIG_MACF_KEXT - if (user_data && !result) { - vm_map_copy_discard((vm_map_copy_t)user_data); - } -#endif - - return result; -} - -/********************************************************************* -* This is the function that IOCatalogue calls in order to load a kmod. -* It first checks whether the kmod is already loaded. If the kmod -* isn't loaded, this function builds a dependency list and calls -* load_kmod() repeatedly to guarantee that each dependency is in fact -* loaded. -*********************************************************************/ -__private_extern__ -kern_return_t load_kernel_extension(char * kmod_name) -{ - kern_return_t result = KERN_SUCCESS; - kload_error load_result = kload_error_none; - dgraph_t dgraph; - bool free_dgraph = false; - kmod_info_t * kmod_info; - -// Put this in for lots of messages about kext loading. -#if 0 - kload_set_log_level(kload_log_level_load_details); -#endif - - /* See if the kmod is already loaded. - */ - if ((kmod_info = kmod_lookupbyname_locked(kmod_name))) { - kfree(kmod_info, sizeof(kmod_info_t)); - return KERN_SUCCESS; - } - - if (dgraph_init(&dgraph) != dgraph_valid) { - IOLog("Can't initialize dependency graph to load %s.\n", - kmod_name); - result = KERN_FAILURE; - goto finish; - } - - free_dgraph = true; - if (!add_dependencies_for_kmod(kmod_name, &dgraph)) { - IOLog("Can't determine dependencies for %s.\n", - kmod_name); - result = KERN_FAILURE; - goto finish; - } - - dgraph.root = dgraph_find_root(&dgraph); - - if (!dgraph.root) { - IOLog("Dependency graph to load %s has no root.\n", - kmod_name); - result = KERN_FAILURE; - goto finish; - } - - /* A kernel component is built in and need not be loaded. - */ - if (dgraph.root->is_kernel_component) { - result = KERN_SUCCESS; - goto finish; - } - - dgraph_establish_load_order(&dgraph); - - load_result = kload_load_dgraph(&dgraph); - if (load_result != kload_error_none && - load_result != kload_error_already_loaded) { - - IOLog(VTYELLOW "Failed to load extension %s.\n" VTRESET, kmod_name); - - result = KERN_FAILURE; - goto finish; - } - -finish: - - if (free_dgraph) { - dgraph_free(&dgraph, 0 /* don't free dgraph itself */); - } - return result; -} - -#define COM_APPLE "com.apple." - -__private_extern__ void -load_security_extensions (void) -{ - OSDictionary * extensionsDict = NULL; // don't release - OSCollectionIterator* keyIterator = NULL; // must release - OSString * key = NULL; // don't release - OSDictionary * extDict; // don't release - OSDictionary * extPlist; // don't release - OSBoolean * isSec = 0; // don't release - Boolean ret; - - extensionsDict = getStartupExtensions(); - if (!extensionsDict) { - IOLog("startup extensions dictionary is missing\n"); - LOG_DELAY(1); - return; - } - - keyIterator = OSCollectionIterator::withCollection(extensionsDict); - if (!keyIterator) { - IOLog("Error: Failed to allocate iterator for extensions.\n"); - LOG_DELAY(1); - return; - } - - while ((key = OSDynamicCast(OSString, keyIterator->getNextObject()))) { - - const char * bundle_id = key->getCStringNoCopy(); - - /* Skip extensions whose bundle IDs don't start with "com.apple.". - */ - if (!bundle_id || (strncmp(bundle_id, COM_APPLE, strlen(COM_APPLE)) != 0)) { - continue; - } - - extDict = OSDynamicCast(OSDictionary, extensionsDict->getObject(key)); - if (!extDict) { - IOLog("extension \"%s\" cannot be found\n", - key->getCStringNoCopy()); - continue; - } - - extPlist = OSDynamicCast(OSDictionary, extDict->getObject("plist")); - if (!extPlist) { - IOLog("extension \"%s\" has no info dictionary\n", - key->getCStringNoCopy()); - continue; - } - - isSec = OSDynamicCast(OSBoolean, - extPlist->getObject("AppleSecurityExtension")); - if (isSec && isSec->isTrue()) { - printf("Loading security extension %s\n", key->getCStringNoCopy()); - ret = kmod_load_request(key->getCStringNoCopy(), false); - if (!ret) { - load_kernel_extension((char *)key->getCStringNoCopy()); - } - } - } - - if (keyIterator) - keyIterator->release(); - - return; -} diff --git a/libsa/kld_patch.c b/libsa/kld_patch.c deleted file mode 100644 index 811d0259e..000000000 --- a/libsa/kld_patch.c +++ /dev/null @@ -1,2884 +0,0 @@ -/* - * Copyright (c) 2001-2007 Apple Inc. All rights reserved. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ - * - * This file contains Original Code and/or Modifications of Original Code - * as defined in and that are subject to the Apple Public Source License - * Version 2.0 (the 'License'). You may not use this file except in - * compliance with the License. The rights granted to you under the License - * may not be used to create, or enable the creation or redistribution of, - * unlawful or unlicensed copies of an Apple operating system, or to - * circumvent, violate, or enable the circumvention or violation of, any - * terms of an Apple operating system software license agreement. - * - * Please obtain a copy of the License at - * http://www.opensource.apple.com/apsl/ and read it before using this file. - * - * The Original Code and all software distributed under the License are - * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER - * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, - * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. - * Please see the License for the specific language governing rights and - * limitations under the License. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ - */ -/* - * History: - * 2001-05-30 gvdl Initial implementation of the vtable patcher. - */ -// 45678901234567890123456789012345678901234567890123456789012345678901234567890 - -#include -#include -#include -#include -#if !KERNEL -#include -#include -#endif - -#ifdef CONFIG_NOLIBKLD -int kld_address_func = 0; -int kld_forget_symbol = 0; -int kld_load_basefile_from_memory = 0; -int kld_load_from_memory = 0; -int kld_lookup = 0; -int kld_set_link_options = 0; -int kld_unload_all = 0; -#endif - -#if KERNEL - -#include -//#include - -#include - -#include - -#include -#include - -#include "mach_loader.h" - -#include - -enum { false = 0, true = 1 }; - -#define vm_page_size page_size - -extern void kld_error_vprintf(const char *format, va_list ap); - -extern struct mach_header _mh_execute_header; -extern struct segment_command *getsegbyname(char *seg_name); // 32 bit only - -#else /* !KERNEL */ - -#include - -#include -#include -#include - -#include -#include -#include -#include -#include - -#include -#include - -#include - -#include - -#define PAGE_SIZE vm_page_size -#define PAGE_MASK (PAGE_SIZE - 1) - -#endif /* KERNEL */ - -#include "kld_patch.h" -#include "c++rem3.h" - -#if 0 -#define DIE() do { for (;;) ; } while(0) - -#if KERNEL -# define LOG_DELAY() /* IODelay(200000) */ -# define DEBUG_LOG(x) do { IOLog x; LOG_DELAY(); } while(0) -#else -# define LOG_DELAY() -# define DEBUG_LOG(x) do { printf x; } while(0) -#endif - -#else - -#define DIE() -#define LOG_DELAY() -#define DEBUG_LOG(x) - -#endif - -// OSObject symbol prefixes and suffixes -#define kCPPSymbolPrefix "_Z" -#define kVTablePrefix "_" kCPPSymbolPrefix "TV" -#define kOSObjPrefix "_" kCPPSymbolPrefix "N" -#define kReservedNamePrefix "_RESERVED" -#define k29SuperClassSuffix "superClass" -#define k31SuperClassSuffix "10superClassE" -#define kGMetaSuffix "10gMetaClassE" -#define kLinkEditSegName SEG_LINKEDIT - -// GCC 2.95 drops 2 leading constants in the vtable -#define kVTablePreambleLen 2 - -// Last address that I'm willing to try find vm in -#define kTopAddr ((unsigned char *) (1024 * 1024 * 1024)) - -// Size in bytes that Data Ref object's get increased in size -// Must be a power of 2 -#define kDataCapacityIncrement 128 - -// My usual set of helper macros. I personally find these macros -// easier to read in the code rather than an explicit error condition -// check. If I don't make it easy then I may get lazy ond not check -// everything. I'm sorry if you find this code harder to read. - -// break_if will evaluate the expression and if it is true -// then it will print the msg, which is enclosed in parens -// and then break. Usually used in loops are do { } while (0) -#define break_if(expr, msg) \ - if (expr) { \ - errprintf msg; \ - break; \ - } - -// return_if will evaluate expr and if true it will log the -// msg, which is enclosed in parens, and then it will return -// with the return code of ret. -#define return_if(expr, ret, msg) do { \ - if (expr) { \ - errprintf msg; \ - return ret; \ - } \ -} while (0) - -#ifndef MIN -#define MIN(a,b) (((a)<(b))?(a):(b)) -#endif /* MIN */ -#ifndef MAX -#define MAX(a,b) (((a)>(b))?(a):(b)) -#endif /* MAX */ - -typedef struct Data { - unsigned long fLength, fCapacity; - char *fData; -} Data, *DataRef; - -struct sectionRecord { - const struct section *fSection; // 32 bit mach object section - DataRef fRelocCache; -}; - -enum patchState { - kSymbolIdentical, - kSymbolLocal, - kSymbolPadUpdate, - kSymbolSuperUpdate, - kSymbolMismatch -}; - -struct patchRecord { - struct nlist *fSymbol; - const struct fileRecord *fFile; - enum patchState fType; -}; - -struct relocRecord { - void *fValue; - struct nlist *fSymbol; - struct relocation_info *fRInfo; - void *reserved; -}; - -struct metaClassRecord { - char *fSuperName; - struct fileRecord *fFile; - const struct nlist *fVTableSym; - struct patchRecord *fPatchedVTable; - char fClassName[1]; -}; - -struct fileRecord { - size_t fMapSize, fMachOSize; - unsigned char *fMap, *fMachO, *fPadEnd; - DataRef fClassList; - DataRef fSectData; - DataRef fNewSymbols, fNewStringBlocks; - DataRef fSym2Strings; - struct symtab_command *fSymtab; - struct sectionRecord *fSections; - vm_offset_t fVMAddr, fVMEnd; - struct segment_command *fLinkEditSeg; - char **fSymbToStringTable; - char *fStringBase; - struct nlist *fSymbolBase; - struct nlist *fLocalSyms; - unsigned int fNSects; - int fNLocal; - Boolean fIsKernel, fIsReloc, fIsIncrLink, fNoKernelExecutable, fIsKmem; - Boolean fImageDirty, fSymbolsDirty; - Boolean fRemangled, fFoundOSObject; - Boolean fIgnoreFile; -#if !KERNEL - Boolean fSwapped; -#endif - char fPath[1]; -}; - -static DataRef sFilesTable; -static struct fileRecord *sKernelFile; - -static DataRef sMergedFiles; -static DataRef sMergeMetaClasses; -static Boolean sMergedKernel; -#if !KERNEL -static const NXArchInfo * sPreferArchInfo; -#endif -static const struct nlist * -findSymbolByName(struct fileRecord *file, const char *symname); - -static void errprintf(const char *fmt, ...) -{ - va_list ap; - - va_start(ap, fmt); - kld_error_vprintf(fmt, ap); - va_end(ap); - -DIE(); -} - -static __inline__ unsigned long DataGetLength(DataRef data) -{ - return data->fLength; -} - -static __inline__ char *DataGetPtr(DataRef data) -{ - return data->fData; -} - -static __inline__ char *DataGetEndPtr(DataRef data) -{ - return data->fData + data->fLength; -} - -static __inline__ unsigned long DataRemaining(DataRef data) -{ - return data->fCapacity - data->fLength; -} - -static __inline__ Boolean DataContainsAddr(DataRef data, void *vAddr) -{ - vm_offset_t offset = (vm_address_t) vAddr; - - if (!data) - return false; - - offset = (vm_address_t) vAddr - (vm_address_t) data->fData; - return (offset < data->fLength); -} - -static Boolean DataEnsureCapacity(DataRef data, unsigned long capacity) -{ - // Don't bother to ever shrink a data object. - if (capacity > data->fCapacity) { - char *newData; - - capacity += kDataCapacityIncrement - 1; - capacity &= ~(kDataCapacityIncrement - 1); - newData = (char *) realloc(data->fData, capacity); - if (!newData) - return false; - - bzero(newData + data->fCapacity, capacity - data->fCapacity); - data->fData = newData; - data->fCapacity = capacity; - } - - return true; -} - -static __inline__ Boolean DataSetLength(DataRef data, unsigned long length) -{ - if (DataEnsureCapacity(data, length)) { - data->fLength = length; - return true; - } - else - return false; -} - -static __inline__ Boolean DataAddLength(DataRef data, unsigned long length) -{ - return DataSetLength(data, data->fLength + length); -} - -static __inline__ Boolean -DataAppendBytes(DataRef data, const void *addr, unsigned int len) -{ - unsigned long size = DataGetLength(data); - - if (!DataAddLength(data, len)) - return false; - - bcopy(addr, DataGetPtr(data) + size, len); - return true; -} - -static __inline__ Boolean DataAppendData(DataRef dst, DataRef src) -{ - return DataAppendBytes(dst, DataGetPtr(src), DataGetLength(src)); -} - -static DataRef DataCreate(unsigned long capacity) -{ - DataRef data = (DataRef) malloc(sizeof(Data)); - - if (data) { - if (!capacity) - data->fCapacity = kDataCapacityIncrement; - else { - data->fCapacity = capacity + kDataCapacityIncrement - 1; - data->fCapacity &= ~(kDataCapacityIncrement - 1); - } - - data->fData = (char *) malloc(data->fCapacity); - if (!data->fData) { - free(data); - return NULL; - } - - bzero(data->fData, data->fCapacity); - data->fLength = 0; - } - return data; -} - -static void DataRelease(DataRef data) -{ - if (data) { - if (data->fData) - free(data->fData); - data->fData = 0; - free(data); - } -} - -static __inline__ char * -symNameByIndex(const struct fileRecord *file, unsigned int symInd) -{ - return file->fSymbToStringTable[symInd]; -} - -static __inline__ char * -symbolname(const struct fileRecord *file, const struct nlist *sym) -{ - unsigned int index; - - index = sym - file->fSymbolBase; - - if (index && !sym->n_un.n_strx) - return file->fStringBase + sym->n_value; - - if (index < file->fSymtab->nsyms) - return symNameByIndex(file, index); - - if (-1 == sym->n_un.n_strx) - return (char *) sym->n_value; - - // If the preceding tests fail then we have a getNewSymbol patch and - // the file it refers to has already been patched as the n_strx is set - // to -1 temporarily while we are still processing a file. - // Once we have finished with a file then we repair the 'strx' offset - // to be valid for the repaired file's string table. - return file->fStringBase + sym->n_un.n_strx; -} - -static struct fileRecord * -getFile(const char *path) -{ - if (sFilesTable) { - int i, nfiles; - struct fileRecord **files; - - // Check to see if we have already merged this file - nfiles = DataGetLength(sFilesTable) / sizeof(struct fileRecord *); - files = (struct fileRecord **) DataGetPtr(sFilesTable); - for (i = 0; i < nfiles; i++) { - if (!strcmp(path, files[i]->fPath)) - return files[i]; - } - } - - return NULL; -} - -static struct fileRecord * -addFile(struct fileRecord *file, const char *path) -{ - struct fileRecord *newFile; - - if (!sFilesTable) { - sFilesTable = DataCreate(0); - if (!sFilesTable) - return NULL; - } - - newFile = (struct fileRecord *) - malloc(sizeof(struct fileRecord) + strlen(path)); - if (!newFile) - return NULL; - - if (!DataAppendBytes(sFilesTable, &newFile, sizeof(newFile))) { - free(newFile); - return NULL; - } - - bcopy(file, newFile, sizeof(struct fileRecord) - 1); - strlcpy((char *) newFile->fPath, path, strlen(path) + 1); - - return newFile; -} - -// @@@ gvdl: need to clean up the sMergeMetaClasses -// @@@ gvdl: I had better fix the object file up again -static void unmapFile(struct fileRecord *file) -{ - if (file->fSectData) { - struct sectionRecord *section; - unsigned int i, nsect; - - nsect = file->fNSects; - section = file->fSections; - for (i = 0; i < nsect; i++, section++) { - if (section->fRelocCache) { - DataRelease(section->fRelocCache); - section->fRelocCache = 0; - } - } - - DataRelease(file->fSectData); - file->fSectData = 0; - file->fSections = 0; - file->fNSects = 0; - } - - if (file->fSym2Strings) { - DataRelease(file->fSym2Strings); - file->fSym2Strings = 0; - } - - if (file->fMap) { -#if KERNEL - if (file->fIsKmem) - kmem_free(kernel_map, (vm_address_t) file->fMap, file->fMapSize); -#else /* !KERNEL */ - if (file->fPadEnd) { - vm_address_t padVM; - vm_size_t padSize; - - padVM = round_page((vm_address_t) file->fMap + file->fMapSize); - padSize = (vm_size_t) ((vm_address_t) file->fPadEnd - padVM); - (void) vm_deallocate(mach_task_self(), padVM, padSize); - file->fPadEnd = 0; - } - - (void) munmap((caddr_t) file->fMap, file->fMapSize); -#endif /* !KERNEL */ - file->fMap = 0; - } -} - -static void removeFile(struct fileRecord *file) -{ - int i, count; - - if (file->fClassList) { - struct metaClassRecord ** fileClasses = - (struct metaClassRecord **)DataGetPtr(file->fClassList); - - count = DataGetLength(file->fClassList) / sizeof(struct metaClassRecord *); - - for (i = 0; i < count; i++) { - struct metaClassRecord * thisClass = fileClasses[i]; - - if (thisClass->fSuperName) { - free(thisClass->fSuperName); - } - if (thisClass->fPatchedVTable) { - free(thisClass->fPatchedVTable); - } - - free(thisClass); - } - - DataRelease(file->fClassList); - file->fClassList = 0; - } - - // unmapFile() releases file->fSectData - - if (file->fNewSymbols) { - struct nlist ** syms = - (struct nlist **)DataGetPtr(file->fNewSymbols); - - count = DataGetLength(file->fNewSymbols) / sizeof(struct nlist *); - - for (i = 0; i < count; i++) { - free(syms[i]); - } - DataRelease(file->fNewSymbols); - file->fNewSymbols = 0; - } - - if (file->fNewStringBlocks) { - DataRef * stringBlocks = (DataRef *)DataGetPtr(file->fNewStringBlocks); - count = DataGetLength(file->fNewStringBlocks) / sizeof(DataRef); - - for (i = 0; i < count; i++) { - DataRelease(stringBlocks[i]); - } - - DataRelease(file->fNewStringBlocks); - file->fNewStringBlocks = 0; - } - - // unmapFile() releases file->fSym2Strings - - unmapFile(file); - - free(file); -} - - -#if !KERNEL -static Boolean -mapObjectFile(struct fileRecord *file, const char *pathName) -{ - Boolean result = false; - static unsigned char *sFileMapBaseAddr = 0; - - int fd = 0; - - if (!sFileMapBaseAddr) { - kern_return_t ret; - vm_address_t probeAddr; - - // If we don't already have a base addr find any random chunk - // of 32 meg of VM and to use the 16 meg boundrary as a base. - ret = vm_allocate(mach_task_self(), &probeAddr, - 32 * 1024 * 1024, VM_FLAGS_ANYWHERE); - return_if(KERN_SUCCESS != ret, false, - ("Unable to allocate base memory %s\n", mach_error_string(ret))); - (void) vm_deallocate(mach_task_self(), probeAddr, 32 * 1024 * 1024); - - // Now round to the next 16 Meg boundrary - probeAddr = (probeAddr + (16 * 1024 * 1024 - 1)) - & ~(16 * 1024 * 1024 - 1); - sFileMapBaseAddr = (unsigned char *) probeAddr; - } - - fd = open(pathName, O_RDONLY, 0); - return_if(fd == -1, false, ("Can't open %s for reading - %s\n", - pathName, strerror(errno))); - - do { - kern_return_t ret; - struct stat sb; - int retaddr = -1; - - break_if(fstat(fd, &sb) == -1, - ("Can't stat %s - %s\n", file->fPath, strerror(errno))); - - file->fMapSize = sb.st_size; - file->fMap = sFileMapBaseAddr; - ret = KERN_SUCCESS; - while (file->fMap < kTopAddr) { - vm_address_t padVM; - vm_address_t padVMEnd; - vm_size_t padSize; - - padVM = round_page((vm_address_t) file->fMap + file->fMapSize); - retaddr = (int) mmap(file->fMap, file->fMapSize, - PROT_READ|PROT_WRITE, - MAP_FIXED|MAP_FILE|MAP_PRIVATE, - fd, 0); - if (-1 == retaddr) { - break_if(ENOMEM != errno, - ("mmap failed %d - %s\n", errno, strerror(errno))); - - file->fMap = (unsigned char *) padVM; - continue; - } - - - // Round up padVM to the next page after the file and assign at - // least another fMapSize more room rounded up to the next page - // boundary. - padVMEnd = round_page(padVM + file->fMapSize); - padSize = padVMEnd - padVM; - ret = vm_allocate( - mach_task_self(), &padVM, padSize, VM_FLAGS_FIXED); - if (KERN_SUCCESS == ret) { - file->fPadEnd = (unsigned char *) padVMEnd; - break; - } - else { - munmap(file->fMap, file->fMapSize); - break_if(KERN_INVALID_ADDRESS != ret, - ("Unable to allocate pad vm for %s - %s\n", - pathName, mach_error_string(ret))); - - file->fMap = (unsigned char *) padVMEnd; - continue; // try again wherever the vm system wants - } - } - - if (-1 == retaddr || KERN_SUCCESS != ret) - break; - - break_if(file->fMap >= kTopAddr, - ("Unable to map memory %s\n", file->fPath)); - - sFileMapBaseAddr = file->fPadEnd; - result = true; - } while(0); - - close(fd); - return result; -} - -void -kld_set_architecture(const NXArchInfo * arch) -{ - sPreferArchInfo = arch; -} - -// This function can only operate on 32 bit mach-o files -Boolean -kld_macho_swap(struct mach_header * mh) -{ - struct segment_command * seg; - struct section * section; - CFIndex ncmds, cmd, sect; - enum NXByteOrder hostOrder = NXHostByteOrder(); - - if (MH_CIGAM != mh->magic) - return (false); - - swap_mach_header(mh, hostOrder); - - ncmds = mh->ncmds; - seg = (struct segment_command *)(mh + 1); - for (cmd = 0; - cmd < ncmds; - cmd++, seg = (struct segment_command *)(((vm_offset_t)seg) + seg->cmdsize)) - { - if (OSSwapConstInt32(LC_SYMTAB) == seg->cmd) { - swap_symtab_command((struct symtab_command *) seg, hostOrder); - swap_nlist((struct nlist *) (((vm_offset_t) mh) + ((struct symtab_command *) seg)->symoff), - ((struct symtab_command *) seg)->nsyms, hostOrder); - continue; - } - if (OSSwapConstInt32(LC_SEGMENT) != seg->cmd) { - swap_load_command((struct load_command *) seg, hostOrder); - continue; - } - swap_segment_command(seg, hostOrder); - swap_section((struct section *) (seg + 1), seg->nsects, hostOrder); - - section = (struct section *) (seg + 1); - for (sect = 0; sect < seg->nsects; sect++, section++) { - if (section->nreloc) - swap_relocation_info((struct relocation_info *) (((vm_offset_t) mh) + section->reloff), - section->nreloc, hostOrder); - } - } - - return (true); -} - -// This function can only operate on 32 bit mach-o files -void -kld_macho_unswap(struct mach_header * mh, Boolean didSwap, int symbols) -{ - // symbols == 0 => everything - // symbols == 1 => just nlists - // symbols == -1 => everything but nlists - - struct segment_command * seg; - struct section * section; - unsigned long cmdsize; - CFIndex ncmds, cmd, sect; - enum NXByteOrder hostOrder = (NXHostByteOrder() == NX_LittleEndian) - ? NX_BigEndian : NX_LittleEndian; - if (!didSwap) - return; - - ncmds = mh->ncmds; - seg = (struct segment_command *)(mh + 1); - for (cmd = 0; - cmd < ncmds; - cmd++, seg = (struct segment_command *)(((vm_offset_t)seg) + cmdsize)) - { - cmdsize = seg->cmdsize; - if (LC_SYMTAB == seg->cmd) { - if (symbols >= 0) - swap_nlist((struct nlist *) (((vm_offset_t) mh) + ((struct symtab_command *) seg)->symoff), - ((struct symtab_command *) seg)->nsyms, hostOrder); - if (symbols > 0) - break; - swap_symtab_command((struct symtab_command *) seg, hostOrder); - continue; - } - if (symbols > 0) - continue; - if (LC_SEGMENT != seg->cmd) { - swap_load_command((struct load_command *) seg, hostOrder); - continue; - } - - section = (struct section *) (seg + 1); - for (sect = 0; sect < seg->nsects; sect++, section++) { - if (section->nreloc) - swap_relocation_info((struct relocation_info *) (((vm_offset_t) mh) + section->reloff), - section->nreloc, hostOrder); - } - swap_section((struct section *) (seg + 1), seg->nsects, hostOrder); - swap_segment_command(seg, hostOrder); - } - if (symbols <= 0) - swap_mach_header(mh, hostOrder); -} - -#endif /* !KERNEL */ - -// Note: This functions is only called from kld_file_map() -// This function can only operate on 32 bit mach-o files -static Boolean findBestArch(struct fileRecord *file, const char *pathName) -{ - unsigned long magic; - struct fat_header *fat; - - - file->fMachOSize = file->fMapSize; - file->fMachO = file->fMap; - magic = ((const struct mach_header *) file->fMachO)->magic; - fat = (struct fat_header *) file->fMachO; - - // Try to figure out what type of file this is - return_if(file->fMapSize < sizeof(unsigned long), false, - ("%s isn't a valid object file - no magic\n", pathName)); - -#if KERNEL - - // CIGAM is byte-swapped MAGIC - if (magic == FAT_MAGIC || magic == FAT_CIGAM) { - - load_return_t load_return; - struct fat_arch fatinfo; - - load_return = fatfile_getarch(NULL, (vm_address_t) fat, &fatinfo); - return_if(load_return != LOAD_SUCCESS, false, - ("Extension \"%s\": has no code for this computer\n", pathName)); - - file->fMachO = file->fMap + fatinfo.offset; - file->fMachOSize = fatinfo.size; - magic = ((const struct mach_header *) file->fMachO)->magic; - } - -#else /* !KERNEL */ - - // Do we need to in-place swap the endianness of the fat header? - if (magic == FAT_CIGAM) { - unsigned long i; - struct fat_arch *arch; - - fat->nfat_arch = OSSwapBigToHostInt32(fat->nfat_arch); - return_if(file->fMapSize < sizeof(struct fat_header) - + fat->nfat_arch * sizeof(struct fat_arch), - false, ("%s is too fat\n", file->fPath)); - - arch = (struct fat_arch *) &fat[1]; - for (i = 0; i < fat->nfat_arch; i++) { - arch[i].cputype = OSSwapBigToHostInt32(arch[i].cputype); - arch[i].cpusubtype = OSSwapBigToHostInt32(arch[i].cpusubtype); - arch[i].offset = OSSwapBigToHostInt32(arch[i].offset); - arch[i].size = OSSwapBigToHostInt32(arch[i].size); - arch[i].align = OSSwapBigToHostInt32(arch[i].align); - } - - magic = OSSwapBigToHostInt32(fat->magic); - } - - // Now see if we can find any valid architectures - if (magic == FAT_MAGIC) { - const NXArchInfo *myArch; - unsigned long fatsize; - struct fat_arch *arch; - - fatsize = sizeof(struct fat_header) - + fat->nfat_arch * sizeof(struct fat_arch); - return_if(file->fMapSize < fatsize, - false, ("%s isn't a valid fat file\n", pathName)); - - if (sPreferArchInfo) - myArch = sPreferArchInfo; - else - myArch = NXGetLocalArchInfo(); - - arch = NXFindBestFatArch(myArch->cputype, myArch->cpusubtype, - (struct fat_arch *) &fat[1], fat->nfat_arch); - return_if(!arch, - false, ("%s hasn't got arch for %s\n", pathName, myArch->name)); - return_if(arch->offset + arch->size > file->fMapSize, - false, ("%s's %s arch is incomplete\n", pathName, myArch->name)); - file->fMachO = file->fMap + arch->offset; - file->fMachOSize = arch->size; - magic = ((const struct mach_header *) file->fMachO)->magic; - } - - file->fSwapped = kld_macho_swap((struct mach_header *) file->fMachO); - if (file->fSwapped) - magic = ((const struct mach_header *) file->fMachO)->magic; - -#endif /* KERNEL */ - - return_if(magic != MH_MAGIC, - false, ("%s isn't a valid mach-o (magic is %08x)\n", pathName, magic)); - - return true; -} - -// This function can only operate on segments from 32 bit mach-o files -static Boolean -parseSegments(struct fileRecord *file, struct segment_command *seg) -{ - struct sectionRecord *sections; - int i, nsects = seg->nsects; - const struct segmentMap { - struct segment_command seg; - const struct section sect[1]; - } *segMap; - - if (!file->fSectData) { - file->fSectData = DataCreate(0); - if (!file->fSectData) - return false; - } - - // Increase length of section DataRef and cache data pointer - if (!DataAddLength(file->fSectData, nsects * sizeof(struct sectionRecord))) - return false; - file->fSections = (struct sectionRecord *) DataGetPtr(file->fSectData); - - // Initialise the new sections - sections = &file->fSections[file->fNSects]; - file->fNSects += nsects; - for (i = 0, segMap = (struct segmentMap *) seg; i < nsects; i++) - { - sections[i].fSection = &segMap->sect[i]; - file->fIsReloc |= (0 != segMap->sect[i].nreloc); - } - - return true; -} - -static Boolean -remangleExternSymbols(struct fileRecord *file, const char *pathName) -{ - const struct nlist *sym; - int i, nsyms, len; - DataRef strings = NULL; - - DEBUG_LOG(("Remangling %s\n", pathName)); - - file->fNewStringBlocks = DataCreate(0); - return_if(!file->fNewStringBlocks, false, - ("Unable to allocate new string table for %s\n", pathName)); - - nsyms = file->fSymtab->nsyms; - for (i = 0, sym = file->fSymbolBase; i < nsyms; i++, sym++) { - Rem3Return ret; - const char *symname; - char *newname; - unsigned char n_type = sym->n_type; - - // Not an external symbol or it is a stab in any case don't bother - if ((n_type ^ N_EXT) & (N_STAB | N_EXT)) - continue; - - symname = symNameByIndex(file, i); - -tryRemangleAgain: - if (!strings) { - strings = DataCreate(16 * 1024); // Arbitrary block size - return_if(!strings, false, - ("Unable to allocate new string block for %s\n", pathName)); - } - - len = DataRemaining(strings); - newname = DataGetEndPtr(strings); - ret = rem3_remangle_name(newname, &len, symname); - switch (ret) { - case kR3InternalNotRemangled: - errprintf("Remangler fails on %s in %s\n", symname, pathName); - /* No break */ - case kR3NotRemangled: - break; - - case kR3Remangled: - file->fSymbToStringTable[i] = newname; - file->fRemangled = file->fSymbolsDirty = true; - DataAddLength(strings, len + 1); // returns strlen - break; - - case kR3BufferTooSmallRemangled: - return_if(!DataAppendBytes - (file->fNewStringBlocks, &strings, sizeof(strings)), - false, ("Unable to allocate string table for %s\n", pathName)); - strings = NULL; - goto tryRemangleAgain; - - case kR3BadArgument: - default: - return_if(true, false, - ("Internal error - remangle of %s\n", pathName)); - } - } - - if (strings) { - return_if(!DataAppendBytes - (file->fNewStringBlocks, &strings, sizeof(strings)), - false, ("Unable to allocate string table for %s\n", pathName)); - } - - return true; -} - -// This function can only operate on symbol table files from 32 bit -// mach-o files -static Boolean parseSymtab(struct fileRecord *file, const char *pathName) -{ - struct nlist *sym; - unsigned int i, firstlocal, nsyms; - unsigned long strsize; - char *strbase; - Boolean foundOSObject, found295CPP, havelocal; - - // we found a link edit segment so recompute the bases - if (file->fLinkEditSeg) { - struct segment_command *link = file->fLinkEditSeg; - - file->fSymbolBase = (struct nlist *) - (link->vmaddr + (file->fSymtab->symoff - link->fileoff)); - file->fStringBase = (char *) - (link->vmaddr + (file->fSymtab->stroff - link->fileoff)); - return_if( ( (caddr_t) file->fStringBase + file->fSymtab->strsize - > (caddr_t) link->vmaddr + link->vmsize ), false, - ("%s isn't a valid mach-o le, bad symbols\n", pathName)); - } - else { - file->fSymbolBase = (struct nlist *) - (file->fMachO + file->fSymtab->symoff); - file->fStringBase = (char *) - (file->fMachO + file->fSymtab->stroff); - return_if( ( file->fSymtab->stroff + file->fSymtab->strsize - > file->fMachOSize ), false, - ("%s isn't a valid mach-o, bad symbols\n", pathName)); - } - - nsyms = file->fSymtab->nsyms; - - // If this file the kernel and do we have an executable image - file->fNoKernelExecutable = (vm_page_size == file->fSymtab->symoff) - && (file->fSections[0].fSection->size == 0); - - // Generate a table of pointers to strings indexed by the symbol number - - file->fSym2Strings = DataCreate(nsyms * sizeof(const char *)); - DataSetLength(file->fSym2Strings, nsyms * sizeof(const char *)); - return_if(!file->fSym2Strings, false, - ("Unable to allocate memory - symbol string trans\n", pathName)); - file->fSymbToStringTable = (char **) DataGetPtr(file->fSym2Strings); - - // Search for the first non-stab symbol in table - strsize = file->fSymtab->strsize; - strbase = file->fStringBase; - firstlocal = 0; - havelocal = false; - found295CPP = foundOSObject = false; - for (i = 0, sym = file->fSymbolBase; i < nsyms; i++, sym++) { - long strx = sym->n_un.n_strx; - char *symname = strbase + strx; - unsigned char n_type; - - return_if(((unsigned long) strx > strsize), false, - ("%s has an illegal string offset in symbol %d\n", pathName, i)); -#if 0 - // Make all syms abs - if (file->fIsIncrLink) { - if ( (sym->n_type & N_TYPE) == N_SECT) { - sym->n_sect = NO_SECT; - sym->n_type = (sym->n_type & ~N_TYPE) | N_ABS; - } - } -#endif - - if (file->fIsIncrLink && !file->fNSects) - { - // symbol set - struct nlist *patchsym = sym; - const char * lookname; - const struct nlist * realsym; - - if ( (patchsym->n_type & N_TYPE) == N_INDR) - lookname = strbase + patchsym->n_value; - else - lookname = symname; - realsym = findSymbolByName(sKernelFile, lookname); - - patchsym->n_sect = NO_SECT; - if (realsym) - { - patchsym->n_type = realsym->n_type; - patchsym->n_desc = realsym->n_desc; - patchsym->n_value = realsym->n_value; - if ((patchsym->n_type & N_TYPE) == N_SECT) - patchsym->n_type = (patchsym->n_type & ~N_TYPE) | N_ABS; - } - else - { - errprintf("%s: Undefined in symbol set: %s\n", pathName, symname); - patchsym->n_type = N_ABS; - patchsym->n_desc = 0; - patchsym->n_value = patchsym->n_un.n_strx; - patchsym->n_un.n_strx = 0; - } - - if (!havelocal && (patchsym->n_type & N_EXT)) { - firstlocal = i; - havelocal = true; - file->fLocalSyms = patchsym; - } - continue; - } /* symbol set */ - - // Load up lookup symbol look table with sym names - file->fSymbToStringTable[i] = symname; - - n_type = sym->n_type & (N_TYPE | N_EXT); - - // Find the first exported symbol - if ( !firstlocal && (n_type & N_EXT) ) { - firstlocal = i; - havelocal = true; - file->fLocalSyms = sym; - } - - // Find the a OSObject based subclass by searching for symbols - // that have a suffix of '10superClassE' - symname++; // Skip leading '_' - - if (!foundOSObject - && (n_type == (N_SECT | N_EXT) || n_type == (N_ABS | N_EXT)) - && strx) { - const char *suffix, *endSym; - - endSym = symname + strlen(symname); - - // Find out if this symbol has the superclass suffix. - if (symname[0] == kCPPSymbolPrefix[0] - && symname[1] == kCPPSymbolPrefix[1]) { - - suffix = endSym - sizeof(k31SuperClassSuffix) + 1; - - // Check for a gcc3 OSObject subclass - if (suffix > symname - && !strcmp(suffix, k31SuperClassSuffix)) - foundOSObject = true; - } - else { - suffix = endSym - sizeof(k29SuperClassSuffix); - - // Check for a gcc295 OSObject subclass - if (suffix > symname - && ('.' == *suffix || '$' == *suffix) - && !strcmp(suffix+1, k29SuperClassSuffix)) { - found295CPP = foundOSObject = true; - } - else if (!found295CPP) { - // Finally just check if we need to remangle - symname++; // skip leading '__' - while (*symname) { - if ('_' == symname[0] && '_' == symname[1]) { - found295CPP = true; - break; - } - symname++; - } - } - } - } - else if (sym->n_type == (N_EXT | N_UNDF)) { - if ( !file->fNLocal) // Find the last local symbol - file->fNLocal = i - firstlocal; - if (!found295CPP) { - symname++; // Skip possible second '_' at start. - while (*symname) { - if ('_' == symname[0] && '_' == symname[1]) { - found295CPP = true; - break; - } - symname++; - } - } - } - // Note symname is trashed at this point - } - return_if(i < nsyms, false, - ("%s isn't a valid mach-o, bad symbol strings\n", pathName)); - - return_if(!file->fLocalSyms, false, ("%s has no symbols?\n", pathName)); - - // If we don't have any undefined symbols then all symbols - // must be local so just compute it now if necessary. - if ( !file->fNLocal ) - file->fNLocal = i - firstlocal; - - file->fFoundOSObject = foundOSObject; - - if (found295CPP && !remangleExternSymbols(file, pathName)) - return false; - - return true; -} - -// @@@ gvdl: These functions need to be hashed they are -// going to be way too slow for production code. -static struct nlist * -findSymbolByAddress(const struct fileRecord *file, void *entry) -{ - // not quite so dumb linear search of all symbols - struct nlist *sym; - int i, nsyms; - - // First try to find the symbol in the most likely place which is the - // extern symbols - sym = file->fLocalSyms; - for (i = 0, nsyms = file->fNLocal; i < nsyms; i++, sym++) { - if (sym->n_value == (unsigned long) entry && !(sym->n_type & N_STAB) ) - return sym; - } - - // Didn't find it in the external symbols so try to local symbols before - // giving up. - sym = file->fSymbolBase; - for (i = 0, nsyms = file->fSymtab->nsyms; i < nsyms; i++, sym++) { - if ( (sym->n_type & N_EXT) ) - return NULL; - if ( sym->n_value == (unsigned long) entry && !(sym->n_type & N_STAB) ) - return sym; - } - - return NULL; -} - -static struct nlist * -findSymbolByAddressInAllFiles(__unused const struct fileRecord * fromFile, - void *entry, const struct fileRecord **resultFile) -{ - int i, nfiles = 0; - struct fileRecord **files; - - if (sFilesTable) { - - // Check to see if we have already merged this file - nfiles = DataGetLength(sFilesTable) / sizeof(struct fileRecord *); - files = (struct fileRecord **) DataGetPtr(sFilesTable); - for (i = 0; i < nfiles; i++) { - if ((((vm_offset_t)entry) >= files[i]->fVMAddr) - && (((vm_offset_t)entry) < files[i]->fVMEnd)) - { - struct nlist * result; - if (resultFile) - *resultFile = files[i]; - result = findSymbolByAddress(files[i], entry); - return result; - } - } - } - - return NULL; -} - -struct searchContext { - const char *fSymname; - const struct fileRecord *fFile; -}; - -static int symbolSearch(const void *vKey, const void *vSym) -{ - const struct searchContext *key = (const struct searchContext *) vKey; - const struct nlist *sym = (const struct nlist *) vSym; - - return strcmp(key->fSymname, symbolname(key->fFile, sym)); -} - -static const struct nlist * -findSymbolByName(struct fileRecord *file, const char *symname) -{ - if (file->fRemangled) { - // @@@ gvdl: Performance problem - // Linear search as we don't sort after remangling - const struct nlist *sym; - int i = file->fLocalSyms - file->fSymbolBase; - int nLocal = file->fNLocal + i; - - for (sym = file->fLocalSyms; i < nLocal; i++, sym++) - if (!strcmp(symNameByIndex(file, i), symname)) - return sym; - return NULL; - } - else { - struct searchContext context; - - context.fSymname = symname; - context.fFile = file; - return (const struct nlist *) - bsearch(&context, - file->fLocalSyms, file->fNLocal, sizeof(struct nlist), - symbolSearch); - } -} - -static Boolean -relocateSection(struct fileRecord *file, struct sectionRecord *sectionRec) -{ - struct nlist *symbol; - const struct section *section; - struct relocRecord *rec; - struct relocation_info *rinfo; - unsigned long i; - unsigned long r_address, r_symbolnum, r_length; - enum reloc_type_generic r_type; - UInt8 *sectionBase; - void **entry; - - sectionRec->fRelocCache = DataCreate( - sectionRec->fSection->nreloc * sizeof(struct relocRecord)); - if (!sectionRec->fRelocCache) - return false; - - section = sectionRec->fSection; - sectionBase = file->fMachO + section->offset; - - rec = (struct relocRecord *) DataGetPtr(sectionRec->fRelocCache); - rinfo = (struct relocation_info *) (file->fMachO + section->reloff); - for (i = 0; i < section->nreloc; i++, rec++, rinfo++) { - - // Totally uninterested in scattered relocation entries - if ( (rinfo->r_address & R_SCATTERED) ) - continue; - - r_address = rinfo->r_address; - entry = (void **) (sectionBase + r_address); - - /* - * The r_address field is really an offset into the contents of the - * section and must reference something inside the section (Note - * that this is not the case for PPC_RELOC_PAIR entries but this - * can't be one with the above checks). - */ - return_if(r_address >= section->size, false, - ("Invalid relocation entry in %s - not in section\n", file->fPath)); - - // If we don't have a VANILLA entry or the Vanilla entry isn't - // a 'long' then ignore the entry and try the next. - r_type = (enum reloc_type_generic) rinfo->r_type; - r_length = rinfo->r_length; - if (r_type != GENERIC_RELOC_VANILLA || r_length != 2) - continue; - - r_symbolnum = rinfo->r_symbolnum; - - /* - * If rinfo->r_extern is set this relocation entry is an external entry - * else it is a local entry. - */ - if (rinfo->r_extern) { - /* - * This is an external relocation entry. - * r_symbolnum is an index into the input file's symbol table - * of the symbol being refered to. The symbol must be - * undefined to be used in an external relocation entry. - */ - return_if(r_symbolnum >= file->fSymtab->nsyms, false, - ("Invalid relocation entry in %s - no symbol\n", file->fPath)); - - /* - * If this is an indirect symbol resolve indirection (all chains - * of indirect symbols have been resolved so that they point at - * a symbol that is not an indirect symbol). - */ - symbol = file->fSymbolBase; - if ((symbol[r_symbolnum].n_type & N_TYPE) == N_INDR) - r_symbolnum = symbol[r_symbolnum].n_value; - symbol = &symbol[r_symbolnum]; - - return_if(symbol->n_type != (N_EXT | N_UNDF), false, - ("Invalid relocation entry in %s - extern\n", file->fPath)); - } - else { - void * addr = *entry; - /* - * If the symbol is not in any section then it can't be a - * pointer to a local segment and I don't care about it. - */ - if (r_symbolnum == R_ABS) - continue; - - // Note segment references are offset by 1 from 0. - return_if(r_symbolnum > file->fNSects, false, - ("Invalid relocation entry in %s - local\n", file->fPath)); - - // Find the symbol, if any, that backs this entry -#if !KERNEL - if (file->fSwapped) - addr = (void *) OSSwapInt32((uint32_t) addr); -#endif - symbol = findSymbolByAddress(file, addr); - } - - rec->fValue = *entry; // Save the previous value - rec->fRInfo = rinfo; // Save a pointer to the reloc - rec->fSymbol = symbol; // Record the current symbol - - *entry = (void *) rec; // Save pointer to record in object image - } - - DataSetLength(sectionRec->fRelocCache, i * sizeof(struct relocRecord)); - file->fImageDirty = true; - - return true; -} - -static const struct nlist * -findSymbolRefAtLocation(struct fileRecord *file, - struct sectionRecord *sctn, void **loc, const struct fileRecord **foundInFile) -{ - const struct nlist * result; - - *foundInFile = file; - - if (!file->fIsReloc) { - if (*loc) { - void * addr = *loc; -#if !KERNEL - if (file->fSwapped) - addr = (void *) OSSwapInt32((uint32_t) addr); -#endif - result = findSymbolByAddress(file, addr); - if (!result) - result = findSymbolByAddressInAllFiles(file, addr, foundInFile); - return result; - } - } - else if (sctn->fRelocCache || relocateSection(file, sctn)) { - struct relocRecord *reloc = (struct relocRecord *) *loc; - - if (DataContainsAddr(sctn->fRelocCache, reloc)) - return reloc->fSymbol; - } - - return NULL; -} - -static Boolean -addClass(struct fileRecord *file, - struct metaClassRecord *inClass, - const char *cname) -{ - Boolean result = false; - struct metaClassRecord *newClass = NULL; - struct metaClassRecord **fileClasses = NULL; - int len; - - if (!file->fClassList) { - file->fClassList = DataCreate(0); - if (!file->fClassList) - return false; - } - - do { - // Attempt to allocate all necessary resource first - len = strlen(cname) + 1 - + (int) (&((struct metaClassRecord *) 0)->fClassName); - newClass = (struct metaClassRecord *) malloc(len); - if (!newClass) - break; - - if (!DataAddLength(file->fClassList, sizeof(struct metaClassRecord *))) - break; - fileClasses = (struct metaClassRecord **) - (DataGetPtr(file->fClassList) + DataGetLength(file->fClassList)); - - // Copy the meta Class structure and string name into newClass and - // insert object at end of the file->fClassList and sMergeMetaClasses - memcpy(newClass, inClass, sizeof(*inClass)); - // metaClassRecord declares fClassName[1] - strlcpy(newClass->fClassName, cname, strlen(cname) + sizeof(newClass->fClassName)); - fileClasses[-1] = newClass; - - return true; - } while (0); - - if (fileClasses) - DataAddLength(file->fClassList, -sizeof(struct metaClassRecord *)); - - if (newClass) - free(newClass); - - return result; -} - -static struct metaClassRecord *getClass(DataRef classList, const char *cname) -{ - if (classList) { - int i, nclass; - struct metaClassRecord **classes, *thisClass; - - nclass = DataGetLength(classList) / sizeof(struct metaClassRecord *); - classes = (struct metaClassRecord **) DataGetPtr(classList); - for (i = 0; i < nclass; i++) { - thisClass = classes[i]; - if (!strcmp(thisClass->fClassName, cname)) - return thisClass; - } - } - - return NULL; -} - -// Add the class 'cname' to the list of known OSObject based classes -// Note 'sym' is the 10superClassE symbol. -static Boolean -recordClass(struct fileRecord *file, const char *cname, const struct nlist *sym) -{ - Boolean result = false; - char *supername = NULL; - const char *classname = NULL; - struct metaClassRecord newClass; - char strbuffer[1024]; - - // Only do the work to find the super class if we are - // not currently working on the kernel. The kernel is the end - // of all superclass chains by definition as the kernel must be binary - // compatible with itself. - if (file->fIsReloc) { - const char *suffix; - const struct fileRecord *superfile; - const struct nlist *supersym; - const struct section *section; - struct sectionRecord *sectionRec; - unsigned char sectind = sym->n_sect; - const char *superstr; - void **location; - int snamelen; - - // We can't resolve anything that isn't in a real section - // Note that the sectind is starts at one to make room for the - // NO_SECT flag but the fNSects field isn't offset so we have a - // '>' test. Which means this isn't an OSObject based class - if (sectind == NO_SECT || sectind > file->fNSects) { - result = true; - goto finish; - } - sectionRec = file->fSections + sectind - 1; - section = sectionRec->fSection; - location = (void **) ( file->fMachO + section->offset - + sym->n_value - section->addr ); - - supersym = findSymbolRefAtLocation(file, sectionRec, location, &superfile); - if (!supersym) { - result = true; // No superclass symbol then it isn't an OSObject. - goto finish; - } - - // Find string in file and skip leading '_' and then find the suffix - superstr = symbolname(superfile, supersym) + 1; - suffix = superstr + strlen(superstr) - sizeof(kGMetaSuffix) + 1; - if (suffix <= superstr || strcmp(suffix, kGMetaSuffix)) { - result = true; // Not an OSObject superclass so ignore it.. - goto finish; - } - - // Got a candidate so hand it over for class processing. - snamelen = suffix - superstr - sizeof(kOSObjPrefix) + 2; - supername = (char *) malloc(snamelen + 1); - bcopy(superstr + sizeof(kOSObjPrefix) - 2, supername, snamelen); - supername[snamelen] = '\0'; - } - - do { - break_if(getClass(file->fClassList, cname), - ("Duplicate class %s in %s\n", cname, file->fPath)); - - snprintf(strbuffer, sizeof(strbuffer), "%s%s", kVTablePrefix, cname); - newClass.fVTableSym = findSymbolByName(file, strbuffer); - break_if(!newClass.fVTableSym, - ("Can't find vtable %s in %s\n", cname, file->fPath)); - - newClass.fFile = file; - newClass.fSuperName = supername; - newClass.fPatchedVTable = NULL; - - // Can't use cname as it may be a stack variable - // However the vtable's string has the class name as a suffix - // so why don't we use that rather than mallocing a string. - classname = symbolname(file, newClass.fVTableSym) - + sizeof(kVTablePrefix) - 1; - break_if(!addClass(file, &newClass, classname), - ("recordClass - no memory?\n")); - - supername = NULL; - result = true; - } while (0); - -finish: - if (supername) - free(supername); - - return result; -} - - -static Boolean getMetaClassGraph(struct fileRecord *file) -{ - const struct nlist *sym; - int i, nsyms; - - // Search the symbol table for the local symbols that are generated - // by the metaclass system. There are three metaclass variables - // that are relevant. - // - // .metaClass A pointer to the meta class structure. - // .superClass A pointer to the super class's meta class. - // .gMetaClass The meta class structure itself. - // ___vt The VTable for the class . - // - // In this code I'm going to search for any symbols that - // ends in k31SuperClassSuffix as this indicates this class is a conforming - // OSObject subclass and will need to be patched, and it also - // contains a pointer to the super class's meta class structure. - sym = file->fLocalSyms; - for (i = 0, nsyms = file->fNLocal; i < nsyms; i++, sym++) { - const char *symname; - const char *suffix; - char classname[1024]; - unsigned char n_type = sym->n_type & (N_TYPE | N_EXT); - int cnamelen; - - // Check that the symbols is a global and that it has a name. - if (((N_SECT | N_EXT) != n_type && (N_ABS | N_EXT) != n_type) - || !sym->n_un.n_strx) - continue; - - // Only search from the last *sep* in the symbol. - // but skip the leading '_' in all symbols first. - symname = symbolname(file, sym) + 1; - if (symname[0] != kCPPSymbolPrefix[0] - || symname[1] != kCPPSymbolPrefix[1]) - continue; - - suffix = symname + strlen(symname) - sizeof(k31SuperClassSuffix) + 1; - if (suffix <= symname || strcmp(suffix, k31SuperClassSuffix)) - continue; - - // Got a candidate so hand it over for class processing. - cnamelen = suffix - symname - sizeof(kOSObjPrefix) + 2; - return_if(cnamelen + 1 >= (int) sizeof(classname), - false, ("Symbol %s is too long", symname)); - - bcopy(symname + sizeof(kOSObjPrefix) - 2, classname, cnamelen); - classname[cnamelen] = '\0'; - if (!recordClass(file, classname, sym)) - return false; - } - - return_if(!file->fClassList, false, ("Internal error, " - "getMetaClassGraph(%s) found no classes", file->fPath)); - - DEBUG_LOG(("Found %ld classes in %p for %s\n", - DataGetLength(file->fClassList)/sizeof(void*), - file->fClassList, file->fPath)); - - return true; -} - -static Boolean mergeOSObjectsForFile(const struct fileRecord *file) -{ - int i, nmerged; - Boolean foundDuplicates = false; - - DEBUG_LOG(("Merging file %s\n", file->fPath)); // @@@ gvdl: - - if (!file->fClassList) - return true; - - if (!sMergedFiles) { - sMergedFiles = DataCreate(0); - return_if(!sMergedFiles, false, - ("Unable to allocate memory metaclass list\n", file->fPath)); - } - - // Check to see if we have already merged this file - nmerged = DataGetLength(sMergedFiles) / sizeof(struct fileRecord *); - for (i = 0; i < nmerged; i++) { - if (file == ((void **) DataGetPtr(sMergedFiles))[i]) - return true; - } - - if (!sMergeMetaClasses) { - sMergeMetaClasses = DataCreate(0); - return_if(!sMergeMetaClasses, false, - ("Unable to allocate memory metaclass list\n", file->fPath)); - } - else { /* perform a duplicate check */ - int k, j, cnt1, cnt2; - struct metaClassRecord **list1, **list2; - - list1 = (struct metaClassRecord **) DataGetPtr(file->fClassList); - cnt1 = DataGetLength(file->fClassList) / sizeof(*list1); - list2 = (struct metaClassRecord **) DataGetPtr(sMergeMetaClasses); - cnt2 = DataGetLength(sMergeMetaClasses) / sizeof(*list2); - - for (k = 0; k < cnt1; k++) { - for (j = 0; j < cnt2; j++) { - if (!strcmp(list1[k]->fClassName, list2[j]->fClassName)) { - errprintf("duplicate class %s in %s & %s\n", - list1[k]->fClassName, - file->fPath, list2[j]->fFile->fPath); - - foundDuplicates = true; - } - } - } - } - if (foundDuplicates) - return false; - - return_if(!DataAppendBytes(sMergedFiles, &file, sizeof(file)), false, - ("Unable to allocate memory to merge %s\n", file->fPath)); - - return_if(!DataAppendData(sMergeMetaClasses, file->fClassList), false, - ("Unable to allocate memory to merge %s\n", file->fPath)); - - if (file == sKernelFile) - sMergedKernel = true; - - return true; -} - -// Returns a pointer to the base of the section offset by the sections -// base address. The offset is so that we can add nlist::n_values directly -// to this address and get a valid pointer in our memory. -static unsigned char * -getSectionForSymbol(const struct fileRecord *file, const struct nlist *symb, - void ***endP) -{ - const struct section *section; - unsigned char sectind; - unsigned char *base; - - sectind = symb->n_sect; // Default to symbols section - if ((symb->n_type & N_TYPE) == N_ABS && !file->fIsReloc) { - // Absolute symbol so we have to iterate over our sections - for (sectind = 1; sectind <= file->fNSects; sectind++) { - unsigned long start, end; - - section = file->fSections[sectind - 1].fSection; - start = section->addr; - end = start + section->size; - if (start <= symb->n_value && symb->n_value < end) { - // Found the relevant section - break; - } - } - } - - // Is the vtable in a valid section? - return_if(sectind == NO_SECT || sectind > file->fNSects, - (unsigned char *) -1, - ("%s isn't a valid kext, bad section reference\n", file->fPath)); - - section = file->fSections[sectind - 1].fSection; - - // for when we start walking the vtable so compute offset's now. - base = file->fMachO + section->offset; - *endP = (void **) (base + section->size); - - return base - section->addr; // return with addr offset -} - -static Boolean resolveKernelVTable(struct metaClassRecord *metaClass) -{ - const struct fileRecord *file; - struct patchRecord *patchedVTable; - void **curEntry, **vtableEntries, **endSection; - unsigned char *sectionBase; - struct patchRecord *curPatch; - int classSize; - - // Should never occur but it doesn't cost us anything to check. - if (metaClass->fPatchedVTable) - return true; - - DEBUG_LOG(("Kernel vtable %s\n", metaClass->fClassName)); // @@@ gvdl: - - // Do we have a valid vtable to patch? - return_if(!metaClass->fVTableSym, - false, ("Internal error - no class vtable symbol?\n")); - - file = metaClass->fFile; - - // If the metaClass we are being to ask is in the kernel then we - // need to do a quick scan to grab the fPatchList in a reliable format - // however we don't need to check the superclass in the kernel - // as the kernel vtables are always correct wrt themselves. - // Note this ends the superclass chain recursion. - return_if(file->fIsReloc, - false, ("Internal error - resolveKernelVTable is relocateable\n")); - - if (file->fNoKernelExecutable) { - // Oh dear attempt to map the kernel's VM into my memory space - return_if(file->fNoKernelExecutable, false, - ("Internal error - fNoKernelExecutable not implemented yet\n")); - } - - // We are going to need the base and the end - sectionBase = getSectionForSymbol(file, metaClass->fVTableSym, &endSection); - if (-1 == (long) sectionBase) - return false; - - vtableEntries = (void **) (sectionBase + metaClass->fVTableSym->n_value); - curEntry = vtableEntries + kVTablePreambleLen; - for (classSize = 0; curEntry < endSection && *curEntry; classSize++) - curEntry++; - - return_if(*curEntry, false, ("Bad kernel image, short section\n")); - - patchedVTable = (struct patchRecord *) - malloc((classSize + 1) * sizeof(struct patchRecord)); - return_if(!patchedVTable, false, ("resolveKernelVTable - no memory\n")); - - // Copy the vtable of this class into the patch table - curPatch = patchedVTable; - curEntry = vtableEntries + kVTablePreambleLen; - for (; *curEntry; curEntry++, curPatch++) { - void * addr = *curEntry; -#if !KERNEL - if (file->fSwapped) - addr = (void *) OSSwapInt32((uint32_t) addr); -#endif - curPatch->fSymbol = - findSymbolByAddress(file, addr); - if (curPatch->fSymbol) - { - curPatch->fType = kSymbolLocal; - curPatch->fFile = file; - } - else - { - curPatch->fSymbol = - findSymbolByAddressInAllFiles(file, addr, &curPatch->fFile); - if (!curPatch->fSymbol) { - errprintf("%s: !findSymbolByAddressInAllFiles(%p)\n", - file->fPath, addr); - return false; - } - curPatch->fType = kSymbolLocal; - } - } - - // Tag the end of the patch vtable - curPatch->fSymbol = NULL; - metaClass->fPatchedVTable = patchedVTable; - - return true; -} - -static char *addNewString(struct fileRecord *file, - const char *strname, unsigned int namelen) -{ - DataRef strings = 0; - char *newStr; - - namelen++; // Include terminating '\0'; - - // Make sure we have a string table as well for this symbol - if (file->fNewStringBlocks) { - DataRef *blockTable = (DataRef *) DataGetPtr(file->fNewStringBlocks); - int index = DataGetLength(file->fNewStringBlocks) / sizeof(DataRef*); - strings = blockTable[index - 1]; - if (DataRemaining(strings) < namelen) - strings = 0; - } - else - { - file->fNewStringBlocks = DataCreate(0); - return_if(!file->fNewStringBlocks, NULL, - ("Unable to allocate new string table %s\n", file->fPath)); - } - - if (!strings) { - int size = (namelen + 1023) & ~1023; - if (size < 16 * 1024) - size = 16 * 1024; - strings = DataCreate(size); - return_if(!strings, NULL, - ("Unable to allocate new string block %s\n", file->fPath)); - return_if( - !DataAppendBytes(file->fNewStringBlocks, &strings, sizeof(strings)), - false, ("Unable to allocate string table for %s\n", file->fPath)); - } - - newStr = DataGetEndPtr(strings); - DataAppendBytes(strings, strname, namelen); - return newStr; -} - -// reloc->fPatch must contain a valid pointer -static struct nlist * -getNewSymbol(struct fileRecord *file, - struct relocRecord *reloc, const char *supername) -{ - unsigned int size, i; - struct nlist **sym; - struct nlist *msym; - struct relocation_info *rinfo; - const char *newStr; - - if (!file->fNewSymbols) { - file->fNewSymbols = DataCreate(0); - return_if(!file->fNewSymbols, NULL, - ("Unable to allocate new symbol table for %s\n", file->fPath)); - } - - rinfo = (struct relocation_info *) reloc->fRInfo; - size = DataGetLength(file->fNewSymbols) / sizeof(struct nlist *); - sym = (struct nlist **) DataGetPtr(file->fNewSymbols); - for (i = 0; i < size; i++, sym++) { - int symnum = i + file->fSymtab->nsyms; - newStr = symNameByIndex(file, symnum); - if (!strcmp(newStr, supername)) { - rinfo->r_symbolnum = symnum; - file->fSymbolsDirty = true; - return *sym; - } - } - - if (reloc->fSymbol->n_un.n_strx >= 0) { - // This symbol has not been previously processed, so assert that it - // is a valid non-local symbol. I need this condition to be true for - // the later code to set to -1. Now, being the first time through, - // I'd better make sure that n_sect is NO_SECT. - - return_if(reloc->fSymbol->n_sect != NO_SECT, NULL, - ("Undefined symbol entry with non-zero section %s:%s\n", - file->fPath, symbolname(file, reloc->fSymbol))); - - // Mark the original symbol entry as having been processed. - // This means that we wont attempt to create the symbol again - // in the future if we come through a different path. - reloc->fSymbol->n_un.n_strx = - -reloc->fSymbol->n_un.n_strx; - - // Mark the old symbol as being potentially deletable I can use the - // n_sect field as the input symbol must be of type N_UNDF which means - // that the n_sect field must be set to NO_SECT otherwise it is an - // invalid input file. - reloc->fSymbol->n_sect = (unsigned char) -1; - } - - // If we are here we didn't find the symbol so create a new one now - msym = (struct nlist *) malloc(sizeof(struct nlist)); - return_if(!msym, - NULL, ("Unable to create symbol table entry for %s", file->fPath)); - return_if(!DataAppendBytes(file->fNewSymbols, &msym, sizeof(msym)), - NULL, ("Unable to grow symbol table for %s\n", file->fPath)); - - newStr = addNewString(file, supername, strlen(supername)); - if (!newStr) - return NULL; - - // If we are here we didn't find the symbol so create a new one now - return_if(!DataAppendBytes(file->fSym2Strings, &newStr, sizeof(newStr)), - NULL, ("Unable to grow symbol table for %s\n", file->fPath)); - file->fSymbToStringTable = (char **) DataGetPtr(file->fSym2Strings); - - // Offset the string index by the original string table size - // and negate the address to indicate that this is a 'new' symbol - msym->n_un.n_strx = -1; - msym->n_type = (N_EXT | N_UNDF); - msym->n_sect = NO_SECT; - msym->n_desc = 0; - msym->n_value = (unsigned long) newStr; - - rinfo->r_symbolnum = i + file->fSymtab->nsyms; - file->fSymbolsDirty = true; - return msym; -} - -static struct nlist * -fixOldSymbol(struct fileRecord *file, - const struct relocRecord *reloc, const char *supername) -{ - unsigned int namelen, oldnamelen; - struct nlist *sym = (struct nlist *) reloc->fSymbol; - char *oldname = symbolname(file, sym); - - // assert(sym->n_un.n_strx >= 0); - - namelen = strlen(supername); - - sym->n_un.n_strx = -sym->n_un.n_strx; - if (oldname && namelen < (oldnamelen = strlen(oldname))) - { - // Overwrite old string in string table - strlcpy((char *) oldname, supername, oldnamelen + 1); - file->fSymbolsDirty = true; - return sym; - } - - oldname = addNewString(file, supername, namelen); - if (!oldname) - return NULL; - - file->fSymbToStringTable[sym - file->fSymbolBase] = oldname; - file->fSymbolsDirty = true; - return sym; -} - -static enum patchState -symbolCompare(const struct fileRecord *file, - const struct nlist *classsym, - const char *supername) -{ - const char *classname; - - - // Check to see if the target function is locally defined - // if it is then we can assume this is a local vtable override - if ((classsym->n_type & N_TYPE) != N_UNDF) - return kSymbolLocal; - - // Check to see if both symbols point to the same symbol name - // if so then we are still identical. - classname = symbolname(file, classsym); - if (!strcmp(classname, supername)) - return kSymbolIdentical; - - // We know that the target's vtable entry is different from the - // superclass' vtable entry. This means that we will have to apply a - // patch to the current entry, however before returning lets check to - // see if we have a _RESERVEDnnn field 'cause we can use this as a - // registration point that must align between vtables. - if (strstr(supername, kReservedNamePrefix)) - return kSymbolMismatch; - - // OK, we have a superclass difference where the superclass doesn't - // reference a pad function so assume that the superclass is correct. - if (strstr(classname, kReservedNamePrefix)) - return kSymbolPadUpdate; - else - return kSymbolSuperUpdate; -} - -static Boolean patchVTable(struct metaClassRecord *metaClass) -{ - struct metaClassRecord *super = NULL; - struct fileRecord *file; - struct patchRecord *patchedVTable; - struct relocRecord **curReloc, **vtableRelocs, **endSection; - unsigned char *sectionBase; - int classSize; - - // Should never occur but it doesn't cost us anything to check. - if (metaClass->fPatchedVTable) - return true; - - // Do we have a valid vtable to patch? - return_if(!metaClass->fVTableSym, - false, ("Internal error - no class vtable symbol?\n")); - - file = metaClass->fFile; - - if (!file->fIsReloc) - { - // If the metaClass we are being to ask is already relocated then we - // need to do a quick scan to grab the fPatchList in a reliable format - // however we don't need to check the superclass in the already linked - // modules as the vtables are always correct wrt themselves. - // Note this ends the superclass chain recursion. - Boolean res; - res = resolveKernelVTable(metaClass); - return res; - } - - if (!metaClass->fSuperName) - return false; - - // The class isn't in the kernel so make sure that the super class - // is patched before patching ouselves. - super = getClass(sMergeMetaClasses, metaClass->fSuperName); - return_if(!super, false, ("Can't find superclass for %s : %s\n", - metaClass->fClassName, metaClass->fSuperName)); - - // Superclass recursion if necessary - if (!super->fPatchedVTable) { - Boolean res; - res = patchVTable(super); - if (!res) - return false; - } - - DEBUG_LOG(("Patching %s\n", metaClass->fClassName)); // @@@ gvdl: - - // We are going to need the base and the end - - sectionBase = getSectionForSymbol(file, - metaClass->fVTableSym, (void ***) &endSection); - if (-1 == (long) sectionBase) - return false; - - vtableRelocs = (struct relocRecord **) - (sectionBase + metaClass->fVTableSym->n_value); - curReloc = vtableRelocs + kVTablePreambleLen; - for (classSize = 0; curReloc < endSection && *curReloc; classSize++) - curReloc++; - - return_if(*curReloc, false, - ("%s isn't a valid kext, short section\n", file->fPath)); - - patchedVTable = (struct patchRecord *) - malloc((classSize + 1) * sizeof(struct patchRecord)); - return_if(!patchedVTable, false, ("patchedVTable - no memory\n")); - - do { - struct patchRecord *curPatch; - struct nlist *symbol; - - curPatch = patchedVTable; - curReloc = vtableRelocs + kVTablePreambleLen; - - // Grab the super table patches if necessary - // Can't be patching a kernel table as we don't walk super - // class chains in the kernel symbol space. - if (super && super->fPatchedVTable) { - const struct patchRecord *spp; - - spp = super->fPatchedVTable; - - for ( ; spp->fSymbol; curReloc++, spp++, curPatch++) { - const char *supername = - symbolname(spp->fFile, spp->fSymbol); - - symbol = (struct nlist *) (*curReloc)->fSymbol; - - curPatch->fType = symbolCompare(file, symbol, supername); - switch (curPatch->fType) { - case kSymbolIdentical: - case kSymbolLocal: - break; - - case kSymbolSuperUpdate: - symbol = getNewSymbol(file, (*curReloc), supername); - break; - - case kSymbolPadUpdate: - symbol = fixOldSymbol(file, (*curReloc), supername); - break; - - case kSymbolMismatch: - errprintf("%s is not compatible with its superclass, " - "%s superclass changed?\n", - metaClass->fClassName, super->fClassName); - goto abortPatch; - - default: - errprintf("Internal error - unknown patch type\n"); - goto abortPatch; - } - if (symbol) { - curPatch->fSymbol = symbol; - (*curReloc)->fSymbol = symbol; - curPatch->fFile = file; - } - else - goto abortPatch; - } - } - - // Copy the remainder of this class' vtable into the patch table - for (; *curReloc; curReloc++, curPatch++) { - // Local reloc symbols - curPatch->fType = kSymbolLocal; - curPatch->fSymbol = (struct nlist *) (*curReloc)->fSymbol; - curPatch->fFile = file; - } - - // Tag the end of the patch vtable - curPatch->fSymbol = NULL; - - metaClass->fPatchedVTable = patchedVTable; - return true; - } while(0); - -abortPatch: - if (patchedVTable) - free(patchedVTable); - - return false; -} - -static Boolean growImage(struct fileRecord *file, vm_size_t delta) -{ -#if !KERNEL - file->fMachOSize += delta; - return (file->fMachO + file->fMachOSize <= file->fPadEnd); -#else /* KERNEL */ - vm_address_t startMachO, endMachO, endMap; - vm_offset_t newMachO; - vm_size_t newsize; - unsigned long i, last = 0; - struct metaClassRecord **classes = NULL; - struct sectionRecord *section; - kern_return_t ret; - - startMachO = (vm_address_t) file->fMachO; - endMachO = startMachO + file->fMachOSize + delta; - endMap = (vm_address_t) file->fMap + file->fMapSize; - - // Do we have room in the current mapped image - if (endMachO < round_page_32(endMap)) { - file->fMachOSize += delta; - return true; - } - - newsize = endMachO - startMachO; - if (newsize < round_page_32(file->fMapSize)) { - DEBUG_LOG(("Growing image %s by moving\n", file->fPath)); - - // We have room in the map if we shift the macho image within the - // current map. We will have to patch up pointers into the object. - newMachO = (vm_offset_t) file->fMap; - bcopy((char *) startMachO, (char *) newMachO, file->fMachOSize); - } - else if (file->fIsKmem) { - // kmem_alloced mapping so we can try a kmem_realloc - ret = kmem_realloc(kernel_map, - (vm_address_t) file->fMap, - (vm_size_t) file->fMapSize, - &newMachO, - newsize); - if (KERN_SUCCESS != ret) - return false; - - // If the mapping didn't move then just return - if ((vm_address_t) file->fMap == newMachO) { - file->fMachOSize = file->fMapSize = newsize; - return true; - } - - DEBUG_LOG(("Growing image %s by reallocing\n", file->fPath)); - // We have relocated the kmem image so we are going to have to - // move all of the pointers into the image around. - } - else { - DEBUG_LOG(("Growing image %s by allocating\n", file->fPath)); - // The image doesn't have room for us and I can't kmem_realloc - // then I just have to bite the bullet and copy the object code - // into a bigger memory segment - ret = kmem_alloc(kernel_map, &newMachO, newsize); - - if (KERN_SUCCESS != ret) - return false; - bcopy((char *) startMachO, (void *) newMachO, file->fMachOSize); - file->fIsKmem = true; - } - - - file->fMap = file->fMachO = (unsigned char *) newMachO; - file->fMapSize = newsize; - file->fMachOSize += delta; // Increment the image size - - // If we are here then we have shifted the object image in memory - // I really should change all of my pointers into the image to machO offsets - // but I have run out of time. So I'm going to very quickly go over the - // cached data structures and add adjustments to the addresses that are - // affected. I wonder how long it will take me to get them all. - // - // For every pointer into the MachO I need to add an adjustment satisfying - // the following simultanous equations - // addr_old = macho_old + fixed_offset - // addr_new = macho_new + fixed_offset therefore: - // addr_new = addr_old + (macho_new - macho_old) -#define REBASE(addr, delta) ( *(vm_address_t*)(&addr) += (delta) ) - delta = newMachO - startMachO; - - // Rebase the cached-in object 'struct symtab_command' pointer - REBASE(file->fSymtab, delta); - - // Rebase the cached-in object 'struct nlist' pointer for all symbols - REBASE(file->fSymbolBase, delta); - - // Rebase the cached-in object 'struct nlist' pointer for local symbols - REBASE(file->fLocalSyms, delta); - - // Rebase the cached-in object 'char' pointer for the string table - REBASE(file->fStringBase, delta); - - // Ok now we have to go over all of the relocs one last time - // to clean up the pad updates which had their string index negated - // to indicate that we have finished with them. - section = file->fSections; - for (i = 0, last = file->fNSects; i < last; i++, section++) - REBASE(section->fSection, delta); - - // We only ever grow images that contain class lists so dont bother - // the check if file->fClassList is non-zero 'cause it can't be - // assert(file->fClassList); - last = DataGetLength(file->fClassList) - / sizeof(struct metaClassRecord *); - classes = (struct metaClassRecord **) DataGetPtr(file->fClassList); - for (i = 0; i < last; i++) { - struct patchRecord *patch; - - for (patch = classes[i]->fPatchedVTable; patch->fSymbol; patch++) { - vm_address_t symAddr = (vm_address_t) patch->fSymbol; - - // Only need to rebase if the symbol is part of the image - // If this is a new symbol then it was independantly allocated - if (symAddr >= startMachO && symAddr < endMachO) - REBASE(patch->fSymbol, delta); - } - } - - // Finally rebase all of the string table pointers - last = file->fSymtab->nsyms; - for (i = 0; i < last; i++) - REBASE(file->fSymbToStringTable[i], delta); - -#undef REBASE - - return true; - -#endif /* KERNEL */ -} - -// Note: This function is only called from kld_file_prepare_for_link() -// This function can only operate on 32 bit mach-o files -static Boolean -prepareFileForLink(struct fileRecord *file) -{ - unsigned long i, last, numnewsyms, newsymsize, newstrsize; - struct sectionRecord *section; - struct nlist **symp, *sym; - DataRef newStrings, *stringBlocks; - - // If we didn't even do a pseudo 'relocate' and dirty the image - // then we can just return now. - if (!file->fImageDirty) { -#if !KERNEL - if (file->fSwapped) { - kld_macho_unswap((struct mach_header *) file->fMachO, file->fSwapped, false); - file->fSwapped = false; - } -#endif - return true; - } - -DEBUG_LOG(("Linking 2 %s\n", file->fPath)); // @@@ gvdl: - - // We have to go over all of the relocs to repair the damage - // that we have done to the image when we did our 'relocation' - section = file->fSections; - for (i = 0, last = file->fNSects; i < last; i++, section++) { - unsigned char *sectionBase; - struct relocRecord *rec; - unsigned long j, nreloc; - - if (section->fRelocCache) { - sectionBase = file->fMachO + section->fSection->offset; - nreloc = section->fSection->nreloc; - rec = (struct relocRecord *) DataGetPtr(section->fRelocCache); - - // We will need to repair the reloc list - for (j = 0; j < nreloc; j++, rec++) { - void **entry; - struct nlist *repairSym; - - return_if(!rec->fRInfo, false, - ("Bad Mach-O file; cannot link\n")); - - // Repair Damage to object image - entry = (void **) (sectionBase + rec->fRInfo->r_address); - *entry = rec->fValue; - - // Check if the symbol that this relocation entry points - // to is marked as erasable - repairSym = (struct nlist *) rec->fSymbol; - if (repairSym && repairSym->n_type == (N_EXT | N_UNDF) - && repairSym->n_sect == (unsigned char) -1) { - // It is in use so we better clear the mark - repairSym->n_un.n_strx = -repairSym->n_un.n_strx; - repairSym->n_sect = NO_SECT; - } - } - - // Clean up the fRelocCache we don't need it any more. - DataRelease(section->fRelocCache); - section->fRelocCache = 0; - } - } - file->fImageDirty = false; // Image is clean - - // If we didn't dirty the symbol table then just return - if (!file->fSymbolsDirty) { -#if !KERNEL - if (file->fSwapped) { - kld_macho_unswap((struct mach_header *) file->fMachO, file->fSwapped, false); - file->fSwapped = false; - } -#endif - return true; - } - - // calculate total file size increase and check against padding - if (file->fNewSymbols) { - numnewsyms = DataGetLength(file->fNewSymbols); - symp = (struct nlist **) DataGetPtr(file->fNewSymbols); - } - else { - numnewsyms = 0; - symp = 0; - } - numnewsyms /= sizeof(struct nlist *); - file->fSymtab->nsyms += numnewsyms; - - // old sting size + 30% rounded up to nearest page - newstrsize = file->fSymtab->strsize * 21 / 16; - newstrsize = (newstrsize + PAGE_MASK) & ~PAGE_MASK; - newStrings = DataCreate(newstrsize); - return_if(!newStrings, false, - ("Unable to allocate a copy aside buffer, no memory\n")); - - newsymsize = numnewsyms * sizeof(struct nlist); - file->fStringBase += newsymsize; - file->fSymtab->stroff += newsymsize; - - last = file->fSymtab->nsyms - numnewsyms; - newstrsize = 0; - DataAppendBytes(newStrings, &newstrsize, 4); // Leading nuls - sym = file->fSymbolBase; - - // Pre-compute an already offset new symbol pointer. The offset is the - // orignal symbol table. - symp -= last; - for (i = 0; i < file->fSymtab->nsyms; i++, sym++) { - const char *str = symNameByIndex(file, i); - int len = strlen(str) + 1; - unsigned int strx; - - // Rebase sym in the new symbol region - if (i >= last) - sym = symp[i]; - - if (sym->n_un.n_strx < 0 && sym->n_type == (N_EXT | N_UNDF) - && (unsigned char) -1 == sym->n_sect) { - // after patching we find that this symbol is no longer in - // use. So invalidate it by converting it into an N_ABS - // symbol, remove the external bit and null out the name. - bzero(sym, sizeof(*sym)); - sym->n_type = N_ABS; - } - else { - // Repair the symbol for the getNewSymbol case. - if (-1 == sym->n_un.n_strx) - sym->n_value = 0; - - // Record the offset of the string in the new table - strx = DataGetLength(newStrings); - return_if(!DataAppendBytes(newStrings, str, len), false, - ("Unable to append string, no memory\n")); - - sym->n_un.n_strx = strx; - file->fSymbToStringTable[i] = file->fStringBase + strx; - } - } - - // Don't need the new strings any more - - if (file->fNewStringBlocks){ - last = DataGetLength(file->fNewStringBlocks) / sizeof(DataRef); - stringBlocks = (DataRef *) DataGetPtr(file->fNewStringBlocks); - } - else{ - last =0; - stringBlocks=0; - } - - for (i = 0; i < last; i++) - DataRelease(stringBlocks[i]); - - DataRelease(file->fNewStringBlocks); - file->fNewStringBlocks = 0; - - newstrsize = DataGetLength(newStrings); - newstrsize = (newstrsize + 3) & ~3; // Round to nearest word - return_if( - !growImage(file, newsymsize + newstrsize - file->fSymtab->strsize), - false, ("Unable to patch the extension, no memory\n", file->fPath)); - - // Push out the new symbol table if necessary - if (numnewsyms) { - caddr_t base; - - // Append the new symbols to the original symbol table. - base = (caddr_t) file->fSymbolBase - + (file->fSymtab->nsyms - numnewsyms) * sizeof(struct nlist); - symp = (struct nlist **) DataGetPtr(file->fNewSymbols); - for (i = 0; i < numnewsyms; i++, base += sizeof(struct nlist), symp++) - bcopy(*symp, base, sizeof(struct nlist)); - - DataRelease(file->fNewSymbols); - file->fNewSymbols = 0; - } - - // Push out the new string table if necessary - if (newStrings) { - unsigned long *base = (unsigned long *) file->fStringBase; - unsigned long actuallen = DataGetLength(newStrings); - - // Set the last word in string table to zero before copying data - base[(newstrsize / sizeof(unsigned long)) - 1] = 0; - - // Now copy the new strings back to the end of the file - bcopy((caddr_t) DataGetPtr(newStrings), file->fStringBase, actuallen); - - file->fSymtab->strsize = newstrsize; - - DataRelease(newStrings); - } - - file->fSymbolsDirty = false; -#if !KERNEL - if (file->fSwapped) { - kld_macho_unswap((struct mach_header *) file->fMachO, file->fSwapped, false); - file->fSwapped = false; - } -#endif - return true; -} - -// This function can only operate on 32 bit mach-o files -Boolean -#if KERNEL -kld_file_map(const char *pathName, - unsigned char *map, - size_t mapSize, - Boolean isKmem) -#else -kld_file_map(const char *pathName) -#endif /* KERNEL */ -{ - struct fileRecord file, *fp = 0; - - // Already done no need to repeat - fp = getFile(pathName); - if (fp) - return true; - - bzero(&file, sizeof(file)); - -#if KERNEL - file.fMap = map; - file.fMapSize = mapSize; - file.fIsKmem = isKmem; -#else - if (!mapObjectFile(&file, pathName)) - return false; -#endif /* KERNEL */ - - do { - struct machOMapping { - struct mach_header h; - struct load_command c[1]; - } *machO; - struct load_command *cmd; - boolean_t lookVMRange; - unsigned long i; - - if (!findBestArch(&file, pathName)) - break; - - machO = (struct machOMapping *) file.fMachO; - if (file.fMachOSize < machO->h.sizeofcmds) - break; - - // If the file type is MH_EXECUTE then this must be a kernel - // as all Kernel extensions must be of type MH_OBJECT - file.fIsKernel = (MH_EXECUTE == machO->h.filetype); - - for (i = 0, cmd = &machO->c[0], lookVMRange = true; i < machO->h.ncmds; i++) { - if (cmd->cmd == LC_SYMTAB) - file.fSymtab = (struct symtab_command *) cmd; - else if (cmd->cmd == LC_SEGMENT) { - struct segment_command *seg = (struct segment_command *)cmd; - int nsects = seg->nsects; - - if (lookVMRange) { - if (!strcmp("__PRELINK", seg->segname)) - // segments following __PRELINK are going to move, so ignore them - lookVMRange = false; - else if (!file.fVMAddr && !file.fVMEnd) { - file.fVMAddr = seg->vmaddr; - file.fVMEnd = seg->vmaddr + seg->vmsize; - } else { - if (seg->vmaddr < file.fVMAddr) - file.fVMAddr = seg->vmaddr; - if ((seg->vmaddr + seg->vmsize) > file.fVMEnd) - file.fVMEnd = seg->vmaddr + seg->vmsize; - } - } - - if (nsects) - return_if(!parseSegments(&file, seg), - false, ("%s isn't a valid mach-o, bad segment", - pathName)); - - if (file.fIsKernel) { -#if KERNEL - // We don't need to look for the LinkEdit segment unless - // we are running in the kernel environment. - if (!strcmp(kLinkEditSegName, seg->segname)) - file.fLinkEditSeg = seg; -#endif - } - } - cmd = (struct load_command *) ((UInt8 *) cmd + cmd->cmdsize); - } - break_if(!file.fSymtab, - ("%s isn't a valid mach-o, no symbols\n", pathName)); - - if (machO->h.flags & MH_INCRLINK) { - - file.fIsIncrLink = true; - machO->h.flags &= ~MH_INCRLINK; - -#if !KERNEL - // the symtab fileoffset is the end of seg0's vmsize, - // which can be (rarely) unaligned. - unsigned int - align = file.fSymtab->symoff % sizeof(long); - if (align != 0) { - align = sizeof(long) - align; - growImage(&file, align); - bcopy(file.fMachO + file.fSymtab->symoff, - file.fMachO + file.fSymtab->symoff + align, - file.fSymtab->stroff + file.fSymtab->strsize - file.fSymtab->symoff); - file.fSymtab->symoff += align; - file.fSymtab->stroff += align; - } -#endif - } - - if (!parseSymtab(&file, pathName)) - break; - - fp = addFile(&file, pathName); - if (!fp) - break; - - if (file.fFoundOSObject && !getMetaClassGraph(fp)) - break; - - if (file.fIsKernel) - sKernelFile = fp; - -#if KERNEL - // Automatically load the kernel's link edit segment if we are - // attempting to load a driver. - if (!sKernelFile) { - struct segment_command *sg; - size_t kernelSize; - Boolean ret; - - sg = (struct segment_command *) getsegbyname(kLinkEditSegName); - break_if(!sg, ("Can't find kernel link edit segment\n")); - - kernelSize = sg->vmaddr + sg->vmsize - (size_t) &_mh_execute_header; - ret = kld_file_map(kld_basefile_name, - (unsigned char *) &_mh_execute_header, kernelSize, - /* isKmem */ false); - break_if(!ret, ("kld can't map kernel file")); - } -#endif /* KERNEL */ - - return true; - } while(0); - - // Failure path, then clean up - if (fp) - // @@@ gvdl: for the time being leak the file ref in the file table - removeFile(fp); - else - unmapFile(&file); - - return false; -} - -void *kld_file_getaddr(const char *pathName, unsigned long *size) -{ - struct fileRecord *file = getFile(pathName); - - if (!file) - return 0; - - if (size) - *size = file->fMachOSize; - - return file->fMachO; -} - -void *kld_file_lookupsymbol(const char *pathName, const char *symname) -{ - struct fileRecord *file = getFile(pathName); - const struct nlist *sym; - const struct section *section; - unsigned char *sectionBase; - unsigned char sectind; - - return_if(!file, - NULL, ("Unknown file %s\n", pathName)); - - sym = findSymbolByName(file, symname); - - // May be a non-extern symbol so look for it there - if (!sym) { - unsigned int i, nsyms; - - sym = file->fSymbolBase; - for (i = 0, nsyms = file->fSymtab->nsyms; i < nsyms; i++, sym++) { - if ( (sym->n_type & N_EXT) ) { - sym = 0; - break; // Terminate search when we hit an extern - } - if ( (sym->n_type & N_STAB) ) - continue; - if ( !strcmp(symname, symNameByIndex(file, i)) ) - break; - } - } - - return_if(!sym, - NULL, ("Unknown symbol %s in %s\n", symname, pathName)); - - // Is the vtable in a valid section? - sectind = sym->n_sect; - return_if(sectind == NO_SECT || sectind > file->fNSects, NULL, - ("Malformed object file, invalid section reference for %s in %s\n", - symname, pathName)); - - section = file->fSections[sectind - 1].fSection; - sectionBase = file->fMachO + section->offset - section->addr; - - return (void *) (sectionBase + sym->n_value); -} - -Boolean kld_file_merge_OSObjects(const char *pathName) -{ - struct fileRecord *file = getFile(pathName); - - return_if(!file, - false, ("Internal error - unable to find file %s\n", pathName)); - - return mergeOSObjectsForFile(file); -} - -Boolean kld_file_patch_OSObjects(const char *pathName) -{ - struct fileRecord *file = getFile(pathName); - struct metaClassRecord **classes; - unsigned long i, last; - - return_if(!file, - false, ("Internal error - unable to find file %s\n", pathName)); - - DEBUG_LOG(("Patch file %s\n", pathName)); // @@@ gvdl: - - // If we don't have any classes we can return now. - if (!file->fClassList) - return true; - - // If we haven't alread merged the kernel then do it now - if (!sMergedKernel && sKernelFile) - mergeOSObjectsForFile(sKernelFile); - return_if(!sMergedKernel, false, ("Internal error no kernel?\n")); - - if (!mergeOSObjectsForFile(file)) - return false; - - // Patch all of the classes in this executable - last = DataGetLength(file->fClassList) / sizeof(void *); - classes = (struct metaClassRecord **) DataGetPtr(file->fClassList); - for (i = 0; i < last; i++) { - if (!patchVTable(classes[i])) { - // RY: Set a flag in the file list to invalidate this data. - // I would remove the file from the list, but that seems to be - // not worth the effort. - file->fIgnoreFile = TRUE; - - return false; - } - } - - return true; -} - -Boolean kld_file_prepare_for_link(void) -{ - if (sMergedFiles) { - unsigned long i, nmerged = 0; - struct fileRecord **files; - - // Check to see if we have already merged this file - nmerged = DataGetLength(sMergedFiles) / sizeof(struct fileRecord *); - files = (struct fileRecord **) DataGetPtr(sMergedFiles); - for (i = 0; i < nmerged; i++) { - if (!files[i]->fIgnoreFile && !prepareFileForLink(files[i])) - return false; - } - } - - // Clear down the meta class table and merged file lists - DataRelease(sMergeMetaClasses); - DataRelease(sMergedFiles); - sMergedFiles = sMergeMetaClasses = NULL; - sMergedKernel = false; - - return true; -} - -void kld_file_cleanup_all_resources(void) -{ - unsigned long i, nfiles; - -#if KERNEL // @@@ gvdl: - // Debugger("kld_file_cleanup_all_resources"); -#endif - - if (!sFilesTable || !(nfiles = DataGetLength(sFilesTable))) - return; // Nothing to do just return now - - nfiles /= sizeof(struct fileRecord *); - for (i = 0; i < nfiles; i++) - removeFile(((void **) DataGetPtr(sFilesTable))[i]); - - DataRelease(sFilesTable); - sFilesTable = NULL; - - // Don't really have to clean up anything more as the whole - // malloc engine is going to be released and I couldn't be bothered. -} - - -#if !KERNEL -#if 0 -static const struct fileRecord *sortFile; -static int symCompare(const void *vSym1, const void *vSym2) -{ - const struct nlist *sym1 = vSym1; - const struct nlist *sym2 = vSym2; - - { - unsigned int ind1, ind2; - - ind1 = sym1->n_type & N_TYPE; - ind2 = sym2->n_type & N_TYPE; - if (ind1 != ind2) { - // if sym1 is undefined then sym1 must come later than sym2 - if (ind1 == N_UNDF) - return 1; - // if sym2 is undefined then sym1 must come earlier than sym2 - if (ind2 == N_UNDF) - return -1; - /* drop out if neither are undefined */ - } - } - - { - const struct fileRecord *file = sortFile; - const char *name1, *name2; - - name1 = file->fStringBase + sym1->n_un.n_strx; - name2 = file->fStringBase + sym2->n_un.n_strx; - return strcmp(name1, name2); - } -} -#endif /* 0 */ - -Boolean kld_file_debug_dump(const char *pathName, const char *outName) -{ - const struct fileRecord *file = getFile(pathName); - int fd; - Boolean ret = false; - - return_if(!file, false, ("Unknown file %s for dumping\n", pathName)); - - fd = open(outName, O_WRONLY|O_CREAT|O_TRUNC, 0666); - return_if(-1 == fd, false, ("Can't create output file %s - %s(%d)\n", - outName, strerror(errno), errno)); - - do { -#if 0 - // Sorting doesn't work until I fix the relocs too? - - // sort the symbol table appropriately - unsigned int nsyms = file->fSymtab->nsyms - - (file->fLocalSyms - file->fSymbolBase); - sortFile = file; - heapsort((void *) file->fLocalSyms, nsyms, sizeof(struct nlist), - symCompare); -#endif - - break_if(-1 == write(fd, file->fMachO, file->fMachOSize), - ("Can't dump output file %s - %s(%d)\n", - outName, strerror(errno), errno)); - ret = true; - } while(0); - - close(fd); - - return ret; -} - -#endif /* !KERNEL */ - diff --git a/libsa/lastkernelconstructor.c b/libsa/lastkernelconstructor.c new file mode 100644 index 000000000..97980f080 --- /dev/null +++ b/libsa/lastkernelconstructor.c @@ -0,0 +1,38 @@ +/* + * Copyright (c) 2008 Apple Inc. All rights reserved. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. The rights granted to you under the License + * may not be used to create, or enable the creation or redistribution of, + * unlawful or unlicensed copies of an Apple operating system, or to + * circumvent, violate, or enable the circumvention or violation of, any + * terms of an Apple operating system software license agreement. + * + * Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ + */ + +static void last_kernel_constructor(void) __attribute__ ((constructor,section("__TEXT, initcode"))); + +extern void iokit_post_constructor_init(void); + +static void last_kernel_constructor(void) +{ + iokit_post_constructor_init(); +} + +__asm__(".zerofill __LAST, __last, _last_kernel_symbol, 0"); diff --git a/libsa/libsa/Makefile b/libsa/libsa/Makefile index f495a088a..c96349565 100644 --- a/libsa/libsa/Makefile +++ b/libsa/libsa/Makefile @@ -7,24 +7,26 @@ export MakeInc_dir=${SRCROOT}/makedefs/MakeInc.dir include $(MakeInc_cmd) include $(MakeInc_def) -INSTINC_SUBDIRS = mach -INSTINC_SUBDIRS_PPC = ${INSTINC_SUBDIRS} ppc -INSTINC_SUBDIRS_I386 = ${INSTINC_SUBDIRS} i386 - -EXPINC_SUBDIRS = mach -EXPINC_SUBDIRS_PPC = ${EXPINC_SUBDIRS} ppc -EXPINC_SUBDIRS_I386 = ${EXPINC_SUBDIRS} i386 +INSTINC_SUBDIRS = +INSTINC_SUBDIRS_PPC = ${INSTINC_SUBDIRS} +INSTINC_SUBDIRS_I386 = ${INSTINC_SUBDIRS} +INSTINC_SUBDIRS_X86_64 = ${INSTINC_SUBDIRS} +INSTINC_SUBDIRS_ARM = ${INSTINC_SUBDIRS} + +EXPINC_SUBDIRS = +EXPINC_SUBDIRS_PPC = ${EXPINC_SUBDIRS} +EXPINC_SUBDIRS_I386 = ${EXPINC_SUBDIRS} +EXPINC_SUBDIRS_X86_64 = ${EXPINC_SUBDIRS} +EXPINC_SUBDIRS_ARM = ${EXPINC_SUBDIRS} INSTALL_MI_LIST = INSTALL_MI_DIR = libsa -EXPORT_MI_LIST = mkext.h setjmp.h stdlib.h unistd.h +EXPORT_MI_LIST = EXPORT_MI_DIR = libsa -INSTALL_KF_MI_LIST = - include $(MakeInc_rule) include $(MakeInc_dir) diff --git a/libsa/libsa/catalogue.h b/libsa/libsa/catalogue.h deleted file mode 100644 index 0eb1e8200..000000000 --- a/libsa/libsa/catalogue.h +++ /dev/null @@ -1,10 +0,0 @@ -extern bool recordStartupExtensions(void); -extern bool addExtensionsFromArchive(OSData * mkext); -extern void removeStartupExtension(const char * extensionName); - -extern OSDictionary * getStartupExtensions(void); -extern OSArray * getPrelinkedModules(void); - -extern void clearStartupExtensionsAndLoaderInfo(void); - -extern bool uncompressModule(OSData *compressed, /* out */ OSData ** file); diff --git a/libsa/libsa/i386/Makefile b/libsa/libsa/i386/Makefile deleted file mode 100644 index 053aa5abd..000000000 --- a/libsa/libsa/i386/Makefile +++ /dev/null @@ -1,34 +0,0 @@ -export MakeInc_cmd=${SRCROOT}/makedefs/MakeInc.cmd -export MakeInc_def=${SRCROOT}/makedefs/MakeInc.def -export MakeInc_rule=${SRCROOT}/makedefs/MakeInc.rule -export MakeInc_dir=${SRCROOT}/makedefs/MakeInc.dir - -include $(MakeInc_cmd) -include $(MakeInc_def) - -INSTINC_SUBDIRS = - -INSTINC_SUBDIRS_PPC = - -INSTINC_SUBDIRS_I386 = - -EXPINC_SUBDIRS = ${INSTINC_SUBDIRS} - -EXPINC_SUBDIRS_PPC = ${INSTINC_SUBDIRS_PPC} - -EXPINC_SUBDIRS_I386 = ${INSTINC_SUBDIRS_I386} - -INSTALL_MD_LIST = - -INSTALL_MD_DIR = libsa/i386 - -EXPORT_MD_LIST = setjmp.h - -EXPORT_MD_DIR = libsa/i386 - -INSTALL_KF_MD_LIST = - -include $(MakeInc_rule) -include $(MakeInc_dir) - - diff --git a/libsa/libsa/kext.h b/libsa/libsa/kext.h deleted file mode 100644 index 8c10b1122..000000000 --- a/libsa/libsa/kext.h +++ /dev/null @@ -1,14 +0,0 @@ -#ifdef __cplusplus -extern "C" { -#endif /* __cplusplus */ - -#include - -__private_extern__ kern_return_t load_kernel_extension(char * kmod_name); -__private_extern__ void load_security_extensions (void); - - -#ifdef __cplusplus -}; -#endif /* __cplusplus */ - diff --git a/libsa/libsa/mach/Makefile b/libsa/libsa/mach/Makefile deleted file mode 100644 index 099adeccd..000000000 --- a/libsa/libsa/mach/Makefile +++ /dev/null @@ -1,34 +0,0 @@ -export MakeInc_cmd=${SRCROOT}/makedefs/MakeInc.cmd -export MakeInc_def=${SRCROOT}/makedefs/MakeInc.def -export MakeInc_rule=${SRCROOT}/makedefs/MakeInc.rule -export MakeInc_dir=${SRCROOT}/makedefs/MakeInc.dir - -include $(MakeInc_cmd) -include $(MakeInc_def) - -INSTINC_SUBDIRS = - -INSTINC_SUBDIRS_PPC = - -INSTINC_SUBDIRS_I386 = - -EXPINC_SUBDIRS = ${INSTINC_SUBDIRS} - -EXPINC_SUBDIRS_PPC = ${INSTINC_SUBDIRS_PPC} - -EXPINC_SUBDIRS_I386 = ${INSTINC_SUBDIRS_I386} - -INSTALL_MI_LIST = - -INSTALL_MI_DIR = libsa/mach - -EXPORT_MI_LIST = mach.h - -EXPORT_MI_DIR = libsa/mach - -INSTALL_KF_MI_LIST = - -include $(MakeInc_rule) -include $(MakeInc_dir) - - diff --git a/libsa/libsa/mach/mach.h b/libsa/libsa/mach/mach.h deleted file mode 100644 index 95bd9f4ed..000000000 --- a/libsa/libsa/mach/mach.h +++ /dev/null @@ -1,12 +0,0 @@ -#ifndef _LIBSA_MACH_MACH_H_ -#define _LIBSA_MACH_MACH_H_ - -#include -#include - -__private_extern__ vm_map_t mach_task_self(void); - -char *mach_error_string(kern_return_t); - - -#endif /* _LIBSA_MACH_MACH_H_ */ diff --git a/libsa/libsa/malloc.h b/libsa/libsa/malloc.h deleted file mode 100644 index 4183eaff7..000000000 --- a/libsa/libsa/malloc.h +++ /dev/null @@ -1,46 +0,0 @@ -#ifndef _LIBSA_MALLOC_H_ -#define _LIBSA_MALLOC_H_ - -#include - -__BEGIN_DECLS - -/***** - * These functions are the minimum necessary for use - * by kld and its client. - */ -void * malloc(size_t size); -void * realloc(void * address, size_t new_size); -void free(void * address); - -void malloc_init(void); -void malloc_reset(void); // Destroy all memory regions - - -/***** - * These functions aren't compiled into the kernel. - * Their definitions are in the files malloc_debug - * and malloc_unused, in case they're ever needed. - */ -#if 0 -void free_all(void); // "Free" all memory blocks -size_t malloc_size(void * address); -int malloc_is_valid(void * address); - -#ifdef DEBUG -size_t malloc_hiwat(void); -size_t malloc_current_usage(void); -size_t malloc_region_usage(void); -double malloc_peak_usage(void); -double malloc_min_usage(void); -size_t malloc_unused(void); -double malloc_current_efficiency(void); -void malloc_clear_hiwat(void); -void malloc_report(void); -int malloc_sanity_check(void); -#endif /* DEBUG */ -#endif /* 0 */ - -__END_DECLS - -#endif /* defined _LIBSA_MALLOC_H_ */ diff --git a/libsa/libsa/mkext.h b/libsa/libsa/mkext.h deleted file mode 100644 index 062786d94..000000000 --- a/libsa/libsa/mkext.h +++ /dev/null @@ -1,55 +0,0 @@ -#ifndef _MKEXT_H_ -#define _MKEXT_H_ 1 - -#include -#include - -#include - -#define MKEXT_MAGIC 'MKXT' -#define MKEXT_SIGN 'MOSX' - -#define MKEXT_EXTN ".mkext" - -// All binary values are big-endian - -// If all fields are 0 then this file slot is empty -// If compsize is zero then the file isn't compressed. -typedef struct mkext_file { - size_t offset; // 4 bytes - size_t compsize; // 4 bytes - size_t realsize; // 4 bytes - time_t modifiedsecs; // 4 bytes -} mkext_file; - -// The plist file entry is mandatory, but module may be empty -typedef struct mkext_kext { - mkext_file plist; // 16 bytes - mkext_file module; // 16 bytes -} mkext_kext; - -typedef struct mkext_header { - u_int32_t magic; // 'MKXT' - u_int32_t signature; // 'MOSX' - u_int32_t length; - u_int32_t adler32; - u_int32_t version; // vers resource, currently '1.0.0', 0x01008000 - u_int32_t numkexts; - cpu_type_t cputype; // CPU_TYPE_ANY for fat executables - cpu_subtype_t cpusubtype; // CPU_SUBTYPE_MULITPLE for executables - mkext_kext kext[1]; // 64 bytes/entry -} mkext_header; - -__BEGIN_DECLS -__private_extern__ u_int8_t * -compress_lzss(u_int8_t *dst, u_int32_t dstlen, u_int8_t *src, u_int32_t srclen); - -__private_extern__ int -decompress_lzss(u_int8_t *dst, u_int8_t *src, u_int32_t srclen); - -__private_extern__ u_int32_t -adler32(u_int8_t *src, int32_t length); - -__END_DECLS - -#endif /* _MKEXT_H_ */ diff --git a/libsa/libsa/ppc/Makefile b/libsa/libsa/ppc/Makefile deleted file mode 100644 index 6fcdf5278..000000000 --- a/libsa/libsa/ppc/Makefile +++ /dev/null @@ -1,34 +0,0 @@ -export MakeInc_cmd=${SRCROOT}/makedefs/MakeInc.cmd -export MakeInc_def=${SRCROOT}/makedefs/MakeInc.def -export MakeInc_rule=${SRCROOT}/makedefs/MakeInc.rule -export MakeInc_dir=${SRCROOT}/makedefs/MakeInc.dir - -include $(MakeInc_cmd) -include $(MakeInc_def) - -INSTINC_SUBDIRS = - -INSTINC_SUBDIRS_PPC = - -INSTINC_SUBDIRS_I386 = - -EXPINC_SUBDIRS = ${INSTINC_SUBDIRS} - -EXPINC_SUBDIRS_PPC = ${INSTINC_SUBDIRS_PPC} - -EXPINC_SUBDIRS_I386 = ${INSTINC_SUBDIRS_I386} - -INSTALL_MD_LIST = - -INSTALL_MD_DIR = libsa/ppc - -EXPORT_MD_LIST = setjmp.h - -EXPORT_MD_DIR = libsa/ppc - -INSTALL_KF_MD_LIST = - -include $(MakeInc_rule) -include $(MakeInc_dir) - - diff --git a/libsa/libsa/ppc/setjmp.h b/libsa/libsa/ppc/setjmp.h deleted file mode 100644 index a5c0492ae..000000000 --- a/libsa/libsa/ppc/setjmp.h +++ /dev/null @@ -1,55 +0,0 @@ -/* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ - * - * This file contains Original Code and/or Modifications of Original Code - * as defined in and that are subject to the Apple Public Source License - * Version 2.0 (the 'License'). You may not use this file except in - * compliance with the License. The rights granted to you under the License - * may not be used to create, or enable the creation or redistribution of, - * unlawful or unlicensed copies of an Apple operating system, or to - * circumvent, violate, or enable the circumvention or violation of, any - * terms of an Apple operating system software license agreement. - * - * Please obtain a copy of the License at - * http://www.opensource.apple.com/apsl/ and read it before using this file. - * - * The Original Code and all software distributed under the License are - * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER - * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, - * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. - * Please see the License for the specific language governing rights and - * limitations under the License. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ - */ -/* - * @OSF_COPYRIGHT@ - */ - -#ifndef _PPC_SETJMP_H_ -#define _PPC_SETJMP_H_ - -/* - * We save the following registers (marked as non-volatile in the ELF spec) - * - * r1 - stack pointer - * r13 - small data area pointer - * r14-r30 - local variables - * r31 - local variable/environment pointer - * - * cr - condition register - * lr - link register (to know where to jump back to) - * xer - fixed point exception register - * - * fpscr - floating point status and control - * f14-f31 - local variables - * - * which comes to 57 words. We round up to 64 for good measure. - */ - -typedef int jmp_buf[64]; - -#endif /* _PPC_SETJMP_H_ */ diff --git a/libsa/libsa/stdlib.h b/libsa/libsa/stdlib.h deleted file mode 100644 index 304f0807e..000000000 --- a/libsa/libsa/stdlib.h +++ /dev/null @@ -1,51 +0,0 @@ -#ifndef _LIBSA_STDLIB_H_ -#define _LIBSA_STDLIB_H_ - -#include -#include - -#ifndef _SIZE_T -#define _SIZE_T -typedef __darwin_size_t size_t; -#endif - -#ifndef NULL -#if defined (__cplusplus) -#define NULL 0 -#else -#define NULL ((void *)0) -#endif -#endif - - -__private_extern__ const char *kld_basefile_name; - - -__BEGIN_DECLS - - -__private_extern__ void * malloc(size_t size); -__private_extern__ void free(void * address); -__private_extern__ void free_all(void); // "Free" all memory blocks -__private_extern__ void malloc_reset(void); // Destroy all memory regions -__private_extern__ void * realloc(void * address, size_t new_size); - -__private_extern__ char * strrchr(const char *cp, int ch); -__private_extern__ char * strstr(const char *in, const char *str); - -__private_extern__ void qsort( - void * array, - size_t nmembers, - size_t member_size, - int (*)(const void *, const void *)); - -__private_extern__ const void * bsearch( - register const void *key, - const void *base0, - size_t nmemb, - register size_t size, - register int (*compar)(const void *, const void *)); - -__END_DECLS - -#endif /* _LIBSA_STDLIB_H_ */ diff --git a/libsa/libsa/unistd.h b/libsa/libsa/unistd.h deleted file mode 100644 index b85c89a34..000000000 --- a/libsa/libsa/unistd.h +++ /dev/null @@ -1,8 +0,0 @@ -#ifndef _LIBSA_UNISTD_H_ -#define _LIBSA_UNISTD_H_ - - -#define getpagesize() PAGE_SIZE - - -#endif /* _LIBSA_UNISTD_H_ */ diff --git a/libsa/libsa/vers_rsrc.h b/libsa/libsa/vers_rsrc.h deleted file mode 100644 index 9ef27afa0..000000000 --- a/libsa/libsa/vers_rsrc.h +++ /dev/null @@ -1,24 +0,0 @@ -#ifndef _LIBSA_VERS_H_ -#define _LIBSA_VERS_H_ - -#ifdef __cplusplus -extern "C" { -#endif - -#ifndef KERNEL -#include -#include -#include -#else -#include -#endif KERNEL - -typedef SInt64 VERS_version; -VERS_version VERS_parse_string(const char * vers_string); -int VERS_string(char * buffer, UInt32 length, VERS_version vers); - -#ifdef __cplusplus -} -#endif - -#endif _LIBSA_VERS_H_ diff --git a/libsa/load.c b/libsa/load.c deleted file mode 100644 index 89486e1bb..000000000 --- a/libsa/load.c +++ /dev/null @@ -1,2822 +0,0 @@ -/* - * Copyright (c) 2000-2007 Apple Inc. All rights reserved. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ - * - * This file contains Original Code and/or Modifications of Original Code - * as defined in and that are subject to the Apple Public Source License - * Version 2.0 (the 'License'). You may not use this file except in - * compliance with the License. The rights granted to you under the License - * may not be used to create, or enable the creation or redistribution of, - * unlawful or unlicensed copies of an Apple operating system, or to - * circumvent, violate, or enable the circumvention or violation of, any - * terms of an Apple operating system software license agreement. - * - * Please obtain a copy of the License at - * http://www.opensource.apple.com/apsl/ and read it before using this file. - * - * The Original Code and all software distributed under the License are - * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER - * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, - * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. - * Please see the License for the specific language governing rights and - * limitations under the License. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ - */ - -/*************** -* HEADERS -***************/ -#ifndef KERNEL - -#include - -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include "vers_rsrc.h" - -#else - -#include -#include -#include -#include - -#endif /* not KERNEL */ - -#include "load.h" -#include "dgraph.h" -#include "kld_patch.h" - -/*************** -* MACROS -***************/ - -#ifndef KERNEL - -#define PRIV_EXT - -#else - -#define PRIV_EXT __private_extern__ - -#ifdef DEBUG -#define LOG_DELAY(x) IODelay((x) * 1000000) -#define VTYELLOW "\033[33m" -#define VTRESET "\033[0m" -#else -#define LOG_DELAY() -#define VTYELLOW -#define VTRESET -#endif /* DEBUG */ - -#endif /* not KERNEL */ - -/*************** -* FUNCTION PROTOS -***************/ - -#ifdef KERNEL -extern kern_return_t -kmod_create_internal( - kmod_info_t *info, - kmod_t *id); - -extern kern_return_t -kmod_destroy_internal(kmod_t id); - -extern kern_return_t -kmod_start_or_stop( - kmod_t id, - int start, - kmod_args_t *data, - mach_msg_type_number_t *dataCount); - -extern kern_return_t kmod_retain(kmod_t id); -extern kern_return_t kmod_release(kmod_t id); - -extern struct mach_header _mh_execute_header; -#endif /* KERNEL */ - - -// Used to pass info between kld library and callbacks -static dgraph_entry_t * G_current_load_entry = NULL; - -#ifndef KERNEL -static mach_port_t G_kernel_port = PORT_NULL; -static mach_port_t G_kernel_priv_port = PORT_NULL; -static int G_syms_only; - -static kload_error -register_prelink(dgraph_entry_t * entry, - kmod_info_t * local_kmod_info, vm_offset_t kernel_kmod_info); - -struct PrelinkState -{ - kmod_info_t modules[1]; -}; -struct PrelinkState * G_prelink; -CFMutableDataRef G_prelink_data; -CFMutableDataRef G_prelink_dependencies; - -#endif /* not KERNEL */ - -// used by dgraph.c so can't be static -kload_log_level log_level = 0; - -#ifndef KERNEL -static void __kload_null_log(const char * format, ...); -static void __kload_null_err_log(const char * format, ...); -static int __kload_null_approve(int default_answer, const char * format, ...); -static int __kload_null_veto(int default_answer, const char * format, ...); -static const char * __kload_null_input(const char * format, ...); - -void (*__kload_log_func)(const char * format, ...) = - &__kload_null_log; -void (*__kload_err_log_func)(const char * format, ...) = &__kload_null_err_log; -int (*__kload_approve_func)(int default_answer, - const char * format, ...) = &__kload_null_approve; -int (*__kload_veto_func)(int default_answer, - const char * format, ...) = &__kload_null_veto; -const char * (*__kload_input_func)(const char * format, ...) = - &__kload_null_input; -#endif /* not KERNEL */ - -static unsigned long __kload_linkedit_address( - unsigned long size, - unsigned long headers_size); -static void __kload_clean_up_entry(dgraph_entry_t * entry); -static void __kload_clear_kld_globals(void); -static kload_error __kload_patch_dgraph(dgraph_t * dgraph -#ifndef KERNEL - , - const char * kernel_file -#endif /* not KERNEL */ - ); -static kload_error __kload_load_modules(dgraph_t * dgraph -#ifndef KERNEL - , - const char * kernel_file, - const char * patch_file, const char * patch_dir, - const char * symbol_file, const char * symbol_dir, - int do_load, int do_start_kmod, int do_prelink, - int interactive_level, - int ask_overwrite_symbols, int overwrite_symbols -#endif /* not KERNEL */ - ); - -static kload_error __kload_check_module_loaded( - dgraph_t * dgraph, - dgraph_entry_t * entry, -#ifndef KERNEL - kmod_info_t * kmod_list, -#endif /* not KERNEL */ - int log_if_already); - -static kload_error __kload_load_module(dgraph_t * dgraph, - dgraph_entry_t * entry, - int is_root -#ifndef KERNEL - , - const char * symbol_file, - const char * symbol_dir, - int do_load, - int interactive_level, - int ask_overwrite_symbols, - int overwrite_symbols -#endif /* not KERNEL */ - ); -static kload_error __kload_set_module_dependencies(dgraph_entry_t * entry); -static kload_error __kload_start_module(dgraph_entry_t * entry); - -#ifndef KERNEL -static kload_error __kload_output_patches( - dgraph_t * dgraph, - const char * patch_file, - const char * patch_dir, - int ask_overwrite_symbols, - int overwrite_symbols); - -Boolean _IOReadBytesFromFile(CFAllocatorRef alloc, const char *path, void **bytes, - CFIndex *length, CFIndex maxLength); -Boolean _IOWriteBytesToFile(const char *path, const void *bytes, CFIndex length); - -#endif /* not KERNEL */ - -/******************************************************************************* -* -*******************************************************************************/ -PRIV_EXT -kload_error kload_load_dgraph(dgraph_t * dgraph -#ifndef KERNEL - , - const char * kernel_file, - const char * patch_file, const char * patch_dir, - const char * symbol_file, const char * symbol_dir, - int do_load, int do_start_kmod, int do_prelink, - int interactive_level, - int ask_overwrite_symbols, int overwrite_symbols -#endif /* not KERNEL */ - ) -{ - kload_error result = kload_error_none; - int one_has_address = 0; - int one_lacks_address = 0; - unsigned int i; -#ifndef KERNEL - int syms_only; - - syms_only = (!do_load) && (symbol_dir || symbol_file); - - if (log_level >= kload_log_level_load_details) { - kload_log_message("loading dependency graph:" KNL); - dgraph_log(dgraph); - } - - if (syms_only && log_level >= kload_log_level_load_details) { - kload_log_message("loading for symbol generation only" KNL); - } - - /***** - * If we're not loading and have no request to emit a symbol - * or patch file, there's nothing to do! - */ - if (!do_load && !symbol_dir && !symbol_file && - !patch_dir && !patch_file) { - - if (syms_only && log_level >= kload_log_level_load_details) { - kload_log_message("loader has no work to do" KNL); - } - - result = kload_error_none; // fixme: should this be USAGE error? - goto finish; - } - - /***** - * If we're doing symbols only, then all entries in the dgraph must - * have addresses assigned, or none must. - */ - if (syms_only) { - if (log_level >= kload_log_level_load_details) { - kload_log_message("checking whether modules have addresses assigned" KNL); - } - for (i = 0; i < dgraph->length; i++) { - struct dgraph_entry_t * entry = dgraph->load_order[i]; - if (entry->is_kernel_component) { - continue; - } - if (entry->loaded_address != 0) { - one_has_address = 1; - } else { - one_lacks_address = 1; - } - } - } -#endif /* not KERNEL */ - - if (one_has_address && one_lacks_address) { - kload_log_error( - "either all modules must have addresses set to nonzero values or " - "none must" KNL); - result = kload_error_invalid_argument; - goto finish; - } - -#ifndef KERNEL - /* we need the priv port to check/load modules in the kernel. - */ - if (PORT_NULL == G_kernel_priv_port) { - G_kernel_priv_port = mach_host_self(); /* if we are privileged */ - } -#endif /* not KERNEL */ - -/***** - * In the kernel, ALWAYS get load addresses of existing loaded kmods. - */ -#ifndef KERNEL - /***** - * If we don't have addresses, then get them from the kernel. - */ - if (!one_has_address && !do_prelink && (do_load || symbol_file || symbol_dir)) { -#endif /* not KERNEL */ - if (log_level >= kload_log_level_load_details) { - kload_log_message("getting module addresses from kernel" KNL); - } -#ifndef KERNEL - result = kload_set_load_addresses_from_kernel(dgraph, kernel_file, - do_load); -#else - result = kload_set_load_addresses_from_kernel(dgraph); -#endif /* not KERNEL */ - if (result == kload_error_already_loaded) { - -#ifndef KERNEL - if (do_load) { - goto finish; - } -#else - goto finish; -#endif /* not KERNEL */ - - } else if (result != kload_error_none) { - kload_log_error("can't check load addresses of modules" KNL); - goto finish; - } -#ifndef KERNEL - } -#endif /* not KERNEL */ - -#ifndef KERNEL - /***** - * At this point, if we're doing symbols only, it's an error to not - * have a load address for every module. - */ - if (syms_only && !do_prelink) { - if (log_level >= kload_log_level_load_details) { - kload_log_message("checking that all modules have addresses assigned" KNL); - } - for (i = 0; i < dgraph->length; i++) { - struct dgraph_entry_t * entry = dgraph->load_order[i]; - if (entry->is_kernel_component) { - continue; - } - if (!entry->loaded_address) { - kload_log_error( - "missing load address during symbol generation: %s" KNL, - entry->name); - result = kload_error_unspecified; - goto finish; - } - } - } - - if (do_prelink) - { - void * bytes; - CFIndex length; - CFAllocatorRef alloc; - - // We need a real allocator to pass to _IOReadBytesFromFile - alloc = CFRetain(CFAllocatorGetDefault()); - if (_IOReadBytesFromFile(alloc, "prelinkstate", &bytes, &length, 0)) - { - G_prelink_data = CFDataCreateMutable(alloc, 0); - CFDataAppendBytes(G_prelink_data, (UInt8 *) bytes, length); - CFAllocatorDeallocate(alloc, bytes); - } - G_prelink_dependencies = CFDataCreateMutable(alloc, 0); - if (_IOReadBytesFromFile(alloc, "prelinkdependencies", &bytes, &length, 0)) - { - CFDataAppendBytes(G_prelink_dependencies, (UInt8 *) bytes, length); - CFAllocatorDeallocate(alloc, bytes); - } - CFRelease(alloc); - - if (!G_prelink_data) { - kload_log_error( - "can't get load address for prelink %s" KNL, kernel_file); - result = kload_error_link_load; - goto finish; - } - else - G_prelink = (struct PrelinkState *) CFDataGetMutableBytePtr(G_prelink_data); - } - else - G_prelink = 0; -#endif /* not KERNEL */ - -#ifndef KERNEL - - result = __kload_load_modules(dgraph, kernel_file, - patch_file, patch_dir, symbol_file, symbol_dir, - do_load, do_start_kmod, do_prelink, interactive_level, - ask_overwrite_symbols, overwrite_symbols); -#else - result = __kload_load_modules(dgraph); -#endif /* not KERNEL */ - -finish: - -#ifndef KERNEL - /* Dispose of the host port to prevent security breaches and port - * leaks. We don't care about the kern_return_t value of this - * call for now as there's nothing we can do if it fails. - */ - if (PORT_NULL != G_kernel_priv_port) { - mach_port_deallocate(mach_task_self(), G_kernel_priv_port); - G_kernel_priv_port = PORT_NULL; - } -#endif /* not KERNEL */ - - for (i = 0; i < dgraph->length; i++) { - dgraph_entry_t * current_entry = dgraph->graph[i]; - __kload_clean_up_entry(current_entry); - } - -#ifndef KERNEL - if (G_prelink) - { - SInt32 length; - const void * bytes; - Boolean success; - - length = CFDataGetLength(G_prelink_data); - bytes = (0 == length) ? (const void *)"" : CFDataGetBytePtr(G_prelink_data); - success = _IOWriteBytesToFile("prelinkstate", bytes, length); - if (!success) - { - kload_log_error("write prelinkstate" KNL); - result = kload_error_link_load; - } - length = CFDataGetLength(G_prelink_dependencies); - bytes = (0 == length) ? (const void *)"" : CFDataGetBytePtr(G_prelink_dependencies); - success = _IOWriteBytesToFile("prelinkdependencies", bytes, length); - if (!success) - { - kload_log_error("write prelinkdependencies" KNL); - result = kload_error_link_load; - } - } -#endif /* not KERNEL */ - - return result; -} - -#ifndef KERNEL -/******************************************************************************* -* This function claims the option flags d and D for object file dependencies -* and in-kernel dependencies, respectively. -*******************************************************************************/ -kload_error kload_load_with_arglist( - int argc, char **argv, - const char * kernel_file, - const char * patch_file, const char * patch_dir, - const char * symbol_file, const char * symbol_dir, - int do_load, int do_start_kmod, - int interactive_level, - int ask_overwrite_symbols, int overwrite_symbols) -{ - kload_error result = kload_error_none; - dgraph_error_t dgraph_result; - int syms_only = (!do_load) && (symbol_file || symbol_dir); - - static dgraph_t dependency_graph; - - /* Zero out fields in dependency graph for proper error handling later. - */ - bzero(&dependency_graph, sizeof(dependency_graph)); - - dgraph_result = dgraph_init_with_arglist(&dependency_graph, - syms_only, "-d", "-D", argc, argv); - if (dgraph_result == dgraph_error) { - kload_log_error("error processing dependency list" KNL); - result = kload_error_unspecified; - goto finish; - } else if (dgraph_result == dgraph_invalid) { - // anything to print here, or did init call print something? - result = kload_error_invalid_argument; - goto finish; - } - - result = kload_load_dgraph(&dependency_graph, kernel_file, - patch_file, patch_dir, symbol_file, symbol_dir, - do_load, do_start_kmod, false /* do_prelink */, interactive_level, - ask_overwrite_symbols, overwrite_symbols); - -finish: - return result; -} -#endif /* not KERNEL */ -/******************************************************************************* -* This function can only operate on 32 bit mach object file symbol table -* entries. -*******************************************************************************/ -static -kload_error __kload_keep_symbols(dgraph_entry_t * entry) -{ - struct mach_header * hdr; - struct segment_command * seg; - struct nlist * sym; - struct symtab_command * symcmd; - unsigned long idx, ncmds; - vm_size_t size; - vm_address_t mem; - struct load_cmds { - struct mach_header hdr; - struct segment_command seg; - struct symtab_command symcmd; - }; - struct load_cmds * cmd; - unsigned int symtabsize; - - if (entry->symbols) - return kload_error_none; - - hdr = entry->linked_image; - ncmds = hdr->ncmds; - seg = (struct segment_command *)(hdr + 1); - for (idx = 0; - idx < ncmds; - idx++, seg = (struct segment_command *)(((vm_offset_t)seg) + seg->cmdsize)) - { - if (LC_SYMTAB == seg->cmd) - break; - } - if (idx >= ncmds) - { - kload_log_error("no LC_SYMTAB" KNL); - return kload_error_unspecified; - } - - symcmd = (struct symtab_command *) seg; - - symtabsize = symcmd->stroff + symcmd->strsize - symcmd->symoff; - - size = sizeof(struct load_cmds) + symtabsize; - - mem = (vm_offset_t) malloc(size); - - cmd = (struct load_cmds *) mem; - sym = (struct nlist *) (cmd + 1); - - cmd->hdr = *hdr; - cmd->symcmd = *symcmd; - bcopy((const void *) (((vm_offset_t) hdr) + symcmd->symoff), - sym, - symtabsize); - - hdr = (struct mach_header *) mem; - cmd->hdr.ncmds = 2; - cmd->hdr.sizeofcmds = sizeof(struct load_cmds) - sizeof(struct mach_header); - cmd->hdr.flags &= ~MH_INCRLINK; - - cmd->symcmd.stroff -= (symcmd->symoff - sizeof(struct load_cmds)); - cmd->symcmd.symoff = sizeof(struct load_cmds); - - cmd->seg.cmd = LC_SEGMENT; - cmd->seg.cmdsize = sizeof(struct segment_command); - strlcpy(cmd->seg.segname, SEG_LINKEDIT, sizeof(cmd->seg.segname)); - cmd->seg.vmaddr = 0; - cmd->seg.vmsize = 0; - cmd->seg.fileoff = cmd->symcmd.symoff; - cmd->seg.filesize = symtabsize; - cmd->seg.maxprot = 7; - cmd->seg.initprot = 1; - cmd->seg.nsects = 0; - cmd->seg.flags = 0; - - sym = (struct nlist *) (cmd + 1); - for (idx = 0; idx < symcmd->nsyms; idx++, sym++) - { - if ( (sym->n_type & N_STAB) != 0) - { - sym->n_type = N_ABS; - sym->n_desc = 0; - sym->n_value = sym->n_un.n_strx; - sym->n_un.n_strx = 0; - sym->n_sect = NO_SECT; - } - else if ( (sym->n_type & N_TYPE) == N_SECT) - { - sym->n_sect = NO_SECT; - sym->n_type = (sym->n_type & ~N_TYPE) | N_ABS; - } - } - if (log_level >= kload_log_level_load_details) - { - kload_log_message("__kload_keep_symbols %s, nsyms %ld, 0x%x bytes" KNL, - entry->name, (unsigned long)symcmd->nsyms, size); - } - - entry->symbols = mem; - entry->symbols_malloc = mem; - entry->symbols_length = size; - - return kload_error_none; -} - - -/******************************************************************************* -* This function can only operate on 32 bit mach object files -*******************************************************************************/ -static -kload_error __kload_make_opaque_basefile(dgraph_t * dgraph, struct mach_header * hdr) -{ - struct segment_command * seg; - struct segment_command * data_seg; - struct segment_command * text_seg; - struct section * sec; - unsigned int j; - vm_offset_t offset; - unsigned long idx, ncmds; - vm_size_t size; - vm_address_t mem, out; - static vm_address_t keep_base_image; - static vm_size_t keep_base_size; - - if (dgraph->opaque_base_image) - return kload_error_none; - - if (keep_base_image) - { - dgraph->opaque_base_image = keep_base_image; - dgraph->opaque_base_length = keep_base_size; - return kload_error_none; - } - - data_seg = text_seg = NULL; - ncmds = hdr->ncmds; - seg = (struct segment_command *)(hdr + 1); - for (idx = 0; - idx < ncmds; - idx++, seg = (struct segment_command *)(((vm_offset_t)seg) + seg->cmdsize)) - { - if (LC_SEGMENT != seg->cmd) - continue; - if (!strcmp(SEG_TEXT, seg->segname)) - text_seg = seg; - else if (!strcmp(SEG_DATA, seg->segname)) - data_seg = seg; - } - if (!text_seg || !data_seg) - { - kload_log_error("no SEG_TEXT or SEG_DATA" KNL); - return kload_error_unspecified; - } - - size = sizeof(struct mach_header) + text_seg->cmdsize + data_seg->cmdsize; - mem = (vm_offset_t) malloc(size); - - out = mem; - bcopy(hdr, (void *) out, sizeof(struct mach_header)); - hdr = (struct mach_header *) out; - out += sizeof(struct mach_header); - - bcopy(text_seg, (void *) out, text_seg->cmdsize); - text_seg = (struct segment_command *) out; - out += text_seg->cmdsize; - - bcopy(data_seg, (void *) out, data_seg->cmdsize); - data_seg = (struct segment_command *) out; - out += data_seg->cmdsize; - - hdr->ncmds = 2; - hdr->sizeofcmds = text_seg->cmdsize + data_seg->cmdsize; - - offset = hdr->sizeofcmds; - - text_seg->fileoff = offset; - text_seg->filesize = 0; - - sec = (struct section *)(text_seg + 1); - for (j = 0; j < text_seg->nsects; j++, sec++) - { -// sec->addr = (unsigned long) addr; - sec->size = 0; - sec->offset = offset; - sec->nreloc = 0; - } - - data_seg->fileoff = offset; - data_seg->filesize = 0; - - sec = (struct section *)(data_seg + 1); - for (j = 0; j < data_seg->nsects; j++, sec++) - { -// sec->addr = (unsigned long) addr; - sec->size = 0; - sec->offset = offset; - sec->nreloc = 0; - } - - dgraph->opaque_base_image = mem; - dgraph->opaque_base_length = size; - keep_base_image = mem; - keep_base_size = size; - - return kload_error_none; -} - -/******************************************************************************* -* -*******************************************************************************/ -static -kload_error __kload_load_modules(dgraph_t * dgraph -#ifndef KERNEL - , - const char * kernel_file, - const char * patch_file, - const char * patch_dir, - const char * symbol_file, - const char * symbol_dir, - int do_load, - int do_start_kmod, - int do_prelink, - int interactive_level, - int ask_overwrite_symbols, - int overwrite_symbols -#endif /* not KERNEL */ - ) -{ - kload_error result = kload_error_none; -#ifndef KERNEL - unsigned long int kernel_size = 0; - kern_return_t mach_result = KERN_SUCCESS; -#else - const char *kernel_file = "(kernel)"; -#endif /* not KERNEL */ - char *kernel_base_addr = NULL; - int kld_result; - Boolean cleanup_kld_loader = false; - unsigned int i; - char opaque_now = false; - - /* We have to map all object files to get their CFBundleIdentifier - * names. - */ -#ifndef KERNEL - result = kload_map_dgraph(dgraph, kernel_file); -#else - result = kload_map_dgraph(dgraph); -#endif /* not KERNEL */ - if (result != kload_error_none) { - kload_log_error("error mapping object files" KNL); - goto finish; - } - -#ifndef KERNEL - result = __kload_patch_dgraph(dgraph, kernel_file); -#else - result = __kload_patch_dgraph(dgraph); -#endif /* not KERNEL */ - if (result != kload_error_none) { - // FIXME: print an error message here? - goto finish; - } - -#ifndef KERNEL - // FIXME: check error return - __kload_output_patches(dgraph, patch_file, patch_dir, - ask_overwrite_symbols, overwrite_symbols); - - /***** - * If we're not loading or writing symbols, we're done. - */ - if (!do_load && !do_prelink && !symbol_file && !symbol_dir) { - goto finish; - } - - if (do_load && PORT_NULL == G_kernel_port) { - mach_result = task_for_pid(mach_task_self(), 0, &G_kernel_port); - if (mach_result != KERN_SUCCESS) { - kload_log_error("unable to get kernel task port: %s" KNL, - mach_error_string(mach_result)); - kload_log_error("you must be running as root to load " - "modules into the kernel" KNL); - result = kload_error_kernel_permission; - goto finish; - } - } -#endif /* not KERNEL */ - - kld_address_func(&__kload_linkedit_address); - -#ifndef KERNEL - G_syms_only = (!do_load) && (symbol_file || symbol_dir || patch_dir); - - kernel_base_addr = kld_file_getaddr(kernel_file, &kernel_size); - if (!kernel_base_addr) { - kload_log_error( - "can't get load address for kernel %s" KNL, kernel_file); - result = kload_error_link_load; - goto finish; - } -#else /* KERNEL */ - - kernel_base_addr = (char *) &_mh_execute_header; - -#endif /* not KERNEL */ - - kld_result = true; - if (dgraph->has_symbol_sets) - { - result = __kload_make_opaque_basefile(dgraph, (struct mach_header *) kernel_base_addr); - if (result != kload_error_none) { - kload_log_error("can't construct opaque base image from %s" KNL, kernel_file); - goto finish; - } - - kld_result = kld_load_basefile_from_memory(kernel_file, - (char *) dgraph->opaque_base_image, - dgraph->opaque_base_length); - } -#ifndef KERNEL - else - kld_result = kld_load_basefile_from_memory(kernel_file, - (char *) kernel_base_addr, kernel_size); -#endif /* not KERNEL */ - - if (!kld_result) { - kload_log_error("can't link base image %s" KNL, kernel_file); - result = kload_error_link_load; - goto finish; - } - - cleanup_kld_loader = true; - - for (i = 0; i < dgraph->length; i++) { - dgraph_entry_t * current_entry = dgraph->load_order[i]; - - opaque_now |= current_entry->opaque_link; - - if (kOpaqueLink & opaque_now) - { - unsigned int k, j; - - if (log_level >= kload_log_level_load_details) - { - kload_log_message("opaque link for %s" KNL, current_entry->name); - } - - kld_set_link_options(KLD_STRIP_ALL); // KLD_STRIP_NONE - - if (dgraph->have_loaded_symbols) - { - kld_unload_all(1); - if (kRawKernelLink & current_entry->opaque_link) { -#ifndef KERNEL - kld_result = kld_load_basefile_from_memory(kernel_file, - (char *) kernel_base_addr, kernel_size); -#endif - } else { - kld_result = kld_load_basefile_from_memory(kernel_file, - (char *) dgraph->opaque_base_image, - dgraph->opaque_base_length); - dgraph->have_loaded_symbols = false; - } - if (!kld_result) { - kload_log_error("can't link base image %s" KNL, kernel_file); - result = kload_error_link_load; - goto finish; - } - } - - for (j = 0; j < i; j++) - { - - dgraph_entry_t * image_dep = dgraph->load_order[j]; - - if (current_entry->opaque_link) - { - for (k = 0; - (k < current_entry->num_dependencies) - && (current_entry->dependencies[k] != image_dep); - k++) {} - - if (k == current_entry->num_dependencies) - continue; - } - - if (!current_entry->opaque_link && image_dep->opaques) - { - // kpi not on direct dependency list - continue; - } - if (kRawKernelLink & image_dep->opaques) - { - // raw kernel already in base image - continue; - } - - if (!image_dep->symbols) - { - kload_log_error("internal error; no dependent symbols" KNL); - result = kload_error_link_load; - goto finish; - } - else - { - struct mach_header * kld_header; - -#ifndef KERNEL - kld_result = kld_load_from_memory(&kld_header, image_dep->name, - (char *) image_dep->symbols, image_dep->symbols_length, NULL); -#else - kld_result = kld_load_from_memory(&kld_header, image_dep->name, - (char *) image_dep->symbols, image_dep->symbols_length); -#endif /* not KERNEL */ - if (!kld_result) { - kload_log_error("can't link dependent image %s" KNL, image_dep->name); - result = kload_error_link_load; - goto finish; - } - kld_forget_symbol("_kmod_info"); - dgraph->have_loaded_symbols = true; - } - } - } /* opaque_now */ - - if (dgraph->has_opaque_links -#ifndef KERNEL - || symbol_file || symbol_dir -#endif - ) - kld_set_link_options(KLD_STRIP_NONE); - else - kld_set_link_options(KLD_STRIP_ALL); - -#ifndef KERNEL - result = __kload_load_module(dgraph, current_entry, - (current_entry == dgraph->root), - symbol_file, symbol_dir, do_load, - interactive_level, ask_overwrite_symbols, overwrite_symbols); -#else - result = __kload_load_module(dgraph, current_entry, - (current_entry == dgraph->root)); -#endif /* not KERNEL */ - if (result != kload_error_none) { - goto finish; - } - - if (dgraph->has_opaque_links && (current_entry != dgraph->root)) - { - if (!(kRawKernelLink & current_entry->opaques)) { - result = __kload_keep_symbols(current_entry); - } - if (result != kload_error_none) { - kload_log_error("__kload_keep_symbols() failed for module %s" KNL, - current_entry->name); - goto finish; - } - } - -#ifndef KERNEL - if (do_load && current_entry->do_load) { -#else - if (current_entry->do_load) { -#endif /* not KERNEL */ - result = __kload_set_module_dependencies(current_entry); - if ( ! (result == kload_error_none || - result == kload_error_already_loaded) ) { - goto finish; - } - -#ifndef KERNEL - if ( (interactive_level == 1 && current_entry == dgraph->root) || - (interactive_level == 2) ) { - - int approve = (*__kload_approve_func)(1, - "\nStart module %s (answering no will abort the load)", - current_entry->name); - - if (approve > 0) { - do_start_kmod = true; // override 'cause user said so - } else { - kern_return_t mach_result; - if (approve < 0) { - kload_log_message("error reading user response; " - "destroying loaded module" KNL); - } else { - kload_log_message("user canceled module start; " - "destroying loaded module" KNL); - } - mach_result = kmod_destroy(G_kernel_priv_port, current_entry->kmod_id); - if (mach_result != KERN_SUCCESS) { - kload_log_error("kmod_destroy() failed" KNL); - } - if (approve < 0) { - result = kload_error_unspecified; - goto finish; - } else { - result = kload_error_user_abort; - goto finish; - } - } - } -#endif /* not KERNEL */ - -#ifndef KERNEL - if (current_entry != dgraph->root || - (current_entry == dgraph->root && do_start_kmod)) { -#endif /* not KERNEL */ - - result = __kload_start_module(current_entry); - if ( ! (result == kload_error_none || - result == kload_error_already_loaded) ) { - goto finish; -#ifndef KERNEL - } else if (interactive_level || - log_level >= kload_log_level_load_details) { -#else - } else if (log_level >= kload_log_level_load_details) { -#endif /* not KERNEL */ - - kload_log_message("started module %s" KNL, - current_entry->name); - } /* log_level */ -#ifndef KERNEL - } /* current_entry... */ -#endif /* not KERNEL */ - - -#ifndef KERNEL - } /* if do_load */ -#else - } /* if do_load */ -#endif /* not KERNEL */ - } /* for i, dgraph->length */ - -finish: - -#ifndef KERNEL - /* Dispose of the kernel port to prevent security breaches and port - * leaks. We don't care about the kern_return_t value of this - * call for now as there's nothing we can do if it fails. - */ - if (PORT_NULL != G_kernel_port) { - mach_port_deallocate(mach_task_self(), G_kernel_port); - G_kernel_port = PORT_NULL; - } -#endif /* not KERNEL */ - - if (cleanup_kld_loader) { - kld_unload_all(1); - } - - return result; -} - - -/******************************************************************************* -* -*******************************************************************************/ - -#ifndef KERNEL -#define __KLOAD_SYMBOL_EXTENSION ".sym" -#endif /* not KERNEL */ - -static -kload_error __kload_load_module(dgraph_t * dgraph, - dgraph_entry_t * entry, -#ifdef KERNEL - __unused int is_root -#else /* not KERNEL */ - int is_root, - const char * symbol_file, - const char * symbol_dir, - int do_load, - int interactive_level, - int ask_overwrite_symbols, - int overwrite_symbols - #endif /* not KERNEL */ - ) -{ - kload_error result = kload_error_none; - - int kld_result; - int mach_result; - struct mach_header * kld_header; - const char * kmod_symbol = "_kmod_info"; - unsigned long kernel_kmod_info; - kmod_info_t * local_kmod_info = NULL; - char * dest_address = 0; -#ifndef KERNEL - char * allocated_filename = NULL; - char * symbol_filename = NULL; - int file_check; - vm_address_t vm_buffer = 0; -#endif /* not KERNEL */ - - /* A kernel component is by nature already linked and loaded and has - * no work to be done upon it. - */ - if (entry->is_kernel_component && !entry->is_symbol_set) { - result = kload_error_none; - goto finish; - } - - G_current_load_entry = entry; - - if (log_level >= kload_log_level_load_basic) { -#ifndef KERNEL - if (do_load) { -#endif /* not KERNEL */ - kload_log_message("link/loading file %s" KNL, entry->name); -#ifndef KERNEL - } else { - kload_log_message("linking file %s" KNL, entry->name); - } -#endif /* not KERNEL */ - } - -#ifndef KERNEL - if (entry->link_output_file != entry->name) { - symbol_filename = entry->link_output_file; - } - - if (symbol_filename) { - file_check = kload_file_exists(symbol_filename); - if (file_check < 0) { - kload_log_error("error checking existence of file %s" KNL, - symbol_filename); - } else if (file_check > 0 && !overwrite_symbols) { - - if (!ask_overwrite_symbols) { - kload_log_message("symbol file %s exists; not overwriting" KNL, - symbol_filename); - symbol_filename = NULL; - } else { - int approve = (*__kload_approve_func)(1, - "\nSymbol file %s exists; overwrite", symbol_filename); - - if (approve < 0) { - result = kload_error_unspecified; - goto finish; - } else if (approve == 0) { - if (allocated_filename) free(allocated_filename); - allocated_filename = NULL; - symbol_filename = NULL; - } - } - } - } - - if (symbol_filename && - (interactive_level || - log_level >= kload_log_level_basic) ) { - - kload_log_message("writing symbol file %s" KNL, symbol_filename); - } - - if (do_load) { - if (interactive_level && entry->loaded_address) { - kload_log_message( - "module %s is already loaded as %s at address 0x%08x" KNL, - entry->name, entry->expected_kmod_name, - entry->loaded_address); - } else if ( (interactive_level == 1 && is_root) || - (interactive_level == 2) ) { - - int approve = (*__kload_approve_func)(1, - "\nLoad module %s", entry->name); - - if (approve < 0) { - result = kload_error_unspecified; - goto finish; - } else if (approve == 0) { - result = kload_error_user_abort; - goto finish; - } - } - } -#endif /* not KERNEL */ - - entry->object = kld_file_getaddr(entry->name, &entry->object_length); - if (!entry->object) { - kload_log_error("kld_file_getaddr() failed for module %s" KNL, - entry->name); - __kload_clear_kld_globals(); - result = kload_error_link_load; - goto finish; - } - - if (entry->is_symbol_set) { - entry->symbols = (vm_address_t) entry->object; - entry->symbols_length = entry->object_length; - -#ifndef KERNEL - if (symbol_filename) { - if (!_IOWriteBytesToFile(symbol_filename, (void *) entry->symbols, entry->symbols_length)) { - kload_log_error("write symbol file failed for module %s" KNL, - entry->name); - __kload_clear_kld_globals(); - result = kload_error_link_load; - goto finish; - } - symbol_filename = 0; - if (G_prelink && (entry->name != entry->link_output_file)) - { - kload_log_error("prelink %s %s %s" KNL, - entry->name, entry->link_output_file, entry->expected_kmod_name); - register_prelink(entry, NULL, NULL); - } - } -#endif /* not KERNEL */ - if (entry->opaques) { - result = kload_error_none; - goto finish; - } - } - -#ifndef KERNEL - kld_result = kld_load_from_memory(&kld_header, entry->name, - entry->object, entry->object_length, symbol_filename); -#else - kld_result = kld_load_from_memory(&kld_header, entry->name, - entry->object, entry->object_length); -#endif /* not KERNEL */ - -#ifndef KERNEL - fflush(stdout); - fflush(stderr); -#endif /* not KERNEL */ - - dgraph->have_loaded_symbols = true; - - if (!kld_result || !entry->kernel_load_address) { - kload_log_error("kld_load_from_memory() failed for module %s" KNL, - entry->name); - __kload_clear_kld_globals(); - entry->need_cleanup = 1; - result = kload_error_link_load; - goto finish; - } - - if (entry->is_symbol_set) { - result = kload_error_none; - goto finish; - } - - entry->linked_image = kld_header; - entry->linked_image_length = -1; // unknown! - -/* If we're in the kernel and not loading (as when handling an - * already-loaded dependency), we don't need to waste any CPU - * cycles looking up the kmod_info struct. - */ -#ifdef KERNEL - if (entry->do_load) { -#endif /* KERNEL */ - - kld_result = kld_lookup(kmod_symbol, &kernel_kmod_info); - if (!kld_result) { - kload_log_error("kld_lookup(\"%s\") failed for module %s" KNL, - kmod_symbol, entry->name); - entry->need_cleanup = 1; - result = kload_error_link_load; - goto finish; - } - -#ifdef KERNEL - } -#endif /* KERNEL */ - - kld_result = kld_forget_symbol(kmod_symbol); -#ifndef KERNEL - fflush(stdout); - fflush(stderr); -#endif /* not KERNEL */ - if (!kld_result) { - kload_log_error("kld_forget_symbol(\"%s\") failed for module %s" KNL, - kmod_symbol, entry->name); - entry->need_cleanup = 1; - result = kload_error_link_load; - goto finish; - } - -/* This section is always done in userland, but in kernel space - * only if we're loading the kext, because what we have in kernel - * space for an already-loaded kext is the kext itself, which - * must not be touched again after it's been loaded and started. - */ -#ifdef KERNEL - if (entry->do_load) -#endif /* KERNEL */ - { - - - /* Get the linked image's kmod_info by translating from the - * destined kernel-space address at kernel_kmod_info to an - * offset from kld_header. - */ - local_kmod_info = (kmod_info_t *)((unsigned long)kernel_kmod_info - - (unsigned long)G_current_load_entry->kernel_load_address + - (unsigned long)kld_header); - - /* Stamp the bundle ID and version from the entry over anything - * resident inside the kmod. - */ - bzero(local_kmod_info->name, sizeof(local_kmod_info->name)); - strlcpy(local_kmod_info->name, entry->expected_kmod_name, sizeof(local_kmod_info->name)); - - bzero(local_kmod_info->version, sizeof(local_kmod_info->version)); - strlcpy(local_kmod_info->version, entry->expected_kmod_vers, sizeof(local_kmod_info->version)); - - if (log_level >= kload_log_level_details) { - kload_log_message("kmod name: %s" KNL, local_kmod_info->name); - kload_log_message("kmod start @ 0x%x (offset 0x%lx)" KNL, - (vm_address_t)local_kmod_info->start, - (unsigned long)local_kmod_info->start - (unsigned long)G_current_load_entry->kernel_load_address); - kload_log_message("kmod stop @ 0x%x (offset 0x%lx)" KNL, - (vm_address_t)local_kmod_info->stop, - (unsigned long)local_kmod_info->stop - (unsigned long)G_current_load_entry->kernel_load_address); - } - - if (!local_kmod_info->start || !local_kmod_info->start) { - kload_log_error( - "error for module file %s; start or stop address is zero" KNL, - entry->name); - entry->need_cleanup = 1; - result = kload_error_link_load; - goto finish; - } - - /* Record link info into kmod_info struct, rounding the hdr_size - * to fit the adjustment that was made in __kload_linkedit_address(). - */ - if (entry->kernel_alloc_address) { - local_kmod_info->address = entry->kernel_alloc_address; - } else { - local_kmod_info->address = entry->loaded_address; - } - local_kmod_info->size = entry->kernel_alloc_size; - local_kmod_info->hdr_size = round_page(entry->kernel_hdr_size); - - } - -#ifndef KERNEL - if (G_prelink && (entry->name != entry->link_output_file)) - { - register_prelink(entry, local_kmod_info, kernel_kmod_info); - } - - if (do_load && entry->do_load) { - mach_result = vm_allocate(mach_task_self(), &vm_buffer, - entry->kernel_alloc_size, VM_FLAGS_ANYWHERE); - if (mach_result != KERN_SUCCESS) { - kload_log_error("unable to vm_allocate() copy buffer" KNL); - entry->need_cleanup = 1; - result = kload_error_no_memory; // FIXME: kernel error? - goto finish; - } - - dest_address = (char *)vm_buffer; - - memcpy(dest_address, kld_header, entry->kernel_hdr_size); - memcpy(dest_address + round_page(entry->kernel_hdr_size), - (void *)((unsigned long)kld_header + entry->kernel_hdr_size), - entry->kernel_load_size - entry->kernel_hdr_size); - - mach_result = vm_write(G_kernel_port, entry->kernel_alloc_address, - vm_buffer, entry->kernel_alloc_size); - if (mach_result != KERN_SUCCESS) { - kload_log_error("unable to write module to kernel memory" KNL); - entry->need_cleanup = 1; - result = kload_error_kernel_error; - goto finish; - } - - mach_result = kmod_create(G_kernel_priv_port, - (vm_address_t)kernel_kmod_info, &(entry->kmod_id)); - -#else - if (entry->do_load) { - dest_address = (char *)entry->kernel_alloc_address; - memcpy(dest_address, kld_header, entry->kernel_hdr_size); - memcpy(dest_address + round_page(entry->kernel_hdr_size), - (void *)((unsigned long)kld_header + entry->kernel_hdr_size), - entry->kernel_load_size - entry->kernel_hdr_size); - - /* We've written data & instructions into kernel memory, so flush - * the data cache and invalidate the instruction cache. - */ - flush_dcache(entry->kernel_alloc_address, entry->kernel_alloc_size, false); - invalidate_icache(entry->kernel_alloc_address, entry->kernel_alloc_size, false); - - mach_result = kmod_create_internal( - (kmod_info_t *)kernel_kmod_info, &(entry->kmod_id)); - -#endif /* not KERNEL */ - - if (mach_result != KERN_SUCCESS) { - kload_log_error("unable to register module with kernel" KNL); - entry->need_cleanup = 1; - result = kload_error_kernel_error; - goto finish; - } - -#ifndef KERNEL - if (interactive_level || log_level >= kload_log_level_load_basic) { -#else - if (log_level >= kload_log_level_load_basic) { -#endif /* not KERNEL */ - kload_log_message( - "module %s created as # %d at address 0x%x, size %ld" KNL, - entry->expected_kmod_name, entry->kmod_id, - entry->kernel_alloc_address, - entry->kernel_alloc_size); - -#ifndef KERNEL - } -#else - } -#endif /* not KERNEL */ - -#ifndef KERNEL - if (interactive_level) { - kload_log_message( - "You can now break to the debugger and set breakpoints " - " for this extension." KNL); - } -#endif /* not KERNEL */ - -#ifndef KERNEL - } -#else - } -#endif /* not KERNEL */ - -finish: - -#ifndef KERNEL - if (allocated_filename) { - free(allocated_filename); - } - if (vm_buffer) { - vm_deallocate(mach_task_self(), vm_buffer, entry->kernel_alloc_size); - } -#endif /* not KERNEL */ - __kload_clear_kld_globals(); - - return result; -} - -/******************************************************************************* -*******************************************************************************/ - -#ifndef KERNEL -static kload_error -register_prelink(dgraph_entry_t * entry, - kmod_info_t * local_kmod_info, vm_offset_t kernel_kmod_info) -{ - CFIndex i, j, depoffset; - Boolean exists; - kmod_info_t desc; - - depoffset = CFDataGetLength(G_prelink_dependencies) / sizeof(CFIndex); - - for (i = 0; i < entry->num_dependencies; i++) - { - exists = false; - for (j = 1; (j < (1 + G_prelink->modules[0].id)); j++) - { - exists = (0 == strcmp(entry->dependencies[i]->expected_kmod_name, - G_prelink->modules[j].name)); - if (exists) - break; - } - if (!exists) - { - bzero(&desc, sizeof(desc)); - strcpy(desc.name, entry->dependencies[i]->expected_kmod_name); - - if (log_level >= kload_log_level_basic) { - kload_log_message("[%d] (dep)\n %s" KNL, - G_prelink->modules[0].id + 1, desc.name); - } - G_prelink->modules[0].id++; - CFDataAppendBytes(G_prelink_data, (UInt8 *) &desc, sizeof(desc)); - G_prelink = (struct PrelinkState *) CFDataGetMutableBytePtr(G_prelink_data); - } - - G_prelink->modules[0].reference_count++; - OSWriteBigInt32(&j, 0, j); - CFDataAppendBytes(G_prelink_dependencies, (UInt8 *) &j, sizeof(j)); - } - if (log_level >= kload_log_level_basic) { - kload_log_message("[%d] 0x%08x info 0x%08x\n %s,\n %s" KNL, - G_prelink->modules[0].id + 1, entry->kernel_load_address, - kernel_kmod_info, entry->link_output_file, entry->name); - } - - if (local_kmod_info) - desc = *local_kmod_info; - else - { - bzero(&desc, sizeof(desc)); - desc.size = entry->symbols_length; - } - - desc.id = kernel_kmod_info; - desc.reference_count = entry->num_dependencies; - desc.reference_list = (kmod_reference_t *) depoffset; - - /* Stamp the bundle ID and version from the entry over anything - * resident inside the kmod. - */ - bzero(desc.name, sizeof(local_kmod_info->name)); - strcpy(desc.name, entry->expected_kmod_name); - bzero(desc.version, sizeof(local_kmod_info->version)); - strcpy(desc.version, entry->expected_kmod_vers); - - G_prelink->modules[0].id++; - CFDataAppendBytes(G_prelink_data, (UInt8 *) &desc, sizeof(desc)); - G_prelink = (struct PrelinkState *) CFDataGetMutableBytePtr(G_prelink_data); - - return kload_error_none; -} - -#endif - -/******************************************************************************* -* -*******************************************************************************/ -PRIV_EXT -#ifndef KERNEL -kload_error kload_map_dgraph( - dgraph_t * dgraph, - const char * kernel_file) -#else -kload_error kload_map_dgraph( - dgraph_t * dgraph) -#endif /* not KERNEL */ -{ - kload_error result = kload_error_none; - unsigned int i; - - if (log_level >= kload_log_level_load_details) { -#ifndef KERNEL - kload_log_message("mapping the kernel file %s" KNL, kernel_file); -#else - kload_log_message("mapping the kernel" KNL); -#endif /* not KERNEL */ - } - -#ifndef KERNEL - if (!kld_file_map(kernel_file)) { - result = kload_error_link_load; - goto finish; - } -#endif /* not KERNEL */ - - for (i = 0; i < dgraph->length; i++) { - dgraph_entry_t * entry = dgraph->load_order[i]; - - if (entry->is_kernel_component && !entry->is_symbol_set) { - continue; - } - - result = kload_map_entry(entry); - if (result != kload_error_none) { - goto finish; - } - } - -finish: - return result; - -} - -/******************************************************************************* -* -*******************************************************************************/ -PRIV_EXT -kload_error kload_map_entry(dgraph_entry_t * entry) -{ - kload_error result = kload_error_none; - - if (entry->is_kernel_component && !entry->is_symbol_set) { - kload_log_error("attempt to map kernel component %s" KNL, entry->name); - result = kload_error_invalid_argument; - goto finish; - } - - if (log_level >= kload_log_level_load_details) { - kload_log_message("mapping module file %s" KNL, entry->name); - } - - if (kld_file_getaddr(entry->name, NULL)) { - if (log_level >= kload_log_level_load_details) { - kload_log_message("module file %s is already mapped" KNL, entry->name); - } - result = kload_error_none; - goto finish; - } - -#ifndef KERNEL - if (!kld_file_map(entry->name)) { -#else - if (!kld_file_map(entry->name, entry->object, entry->object_length, - entry->object_is_kmem)) { -#endif /* not KERNEL */ - kload_log_error("error mapping module file %s" KNL, entry->name); - - result = kload_error_link_load; - goto finish; -#ifndef KERNEL - } -#else - } -#endif /* not KERNEL */ - - entry->is_mapped = true; - - /* Clear these bits now, as the kld patch module now owns the info - * and it is subject to change. We reset them in the entry from the - * kld patch module as needed. - */ - entry->object = 0; - entry->object_length = 0; -#ifdef KERNEL - entry->object_is_kmem = false; -#endif /* KERNEL */ - - // FIXME: Stop using this symbol; have the info passed in by - // FIXME: ...the kext management library. -#ifndef KERNEL - if (!entry->is_kernel_component && !kld_file_lookupsymbol(entry->name, "_kmod_info")) { - kload_log_error("%s does not not contain kernel extension code" KNL, - entry->name); - result = kload_error_executable_bad; - goto finish; - } -#endif /* not KERNEL */ - -finish: - return result; -} - -#ifndef KERNEL -/******************************************************************************* -* -*******************************************************************************/ -kload_error kload_request_load_addresses( - dgraph_t * dgraph, - const char * kernel_file) -{ - kload_error result = kload_error_none; - int i; - const char * user_response = NULL; // must free - int scan_result; - unsigned int address; - - /* We have to map all object files to get their CFBundleIdentifier - * names. - */ - result = kload_map_dgraph(dgraph, kernel_file); - if (result != kload_error_none) { - kload_log_error("error mapping object files" KNL); - goto finish; - } - - // fixme: this shouldn't be printf, should it? - printf("enter the hexadecimal load addresses for these modules:\n"); - - for (i = 0; i < dgraph->length; i++) { - dgraph_entry_t * entry = dgraph->load_order[i]; - - if (!entry) { - result = kload_error_unspecified; - goto finish; - } - - if (entry->is_kernel_component) { - continue; - } - - if (!entry->is_mapped) { - result = kload_error_unspecified; - goto finish; - } - - user_response = __kload_input_func("%s:", - entry->expected_kmod_name); - if (!user_response) { - result = kload_error_unspecified; - goto finish; - } - scan_result = sscanf(user_response, "%x", &address); - if (scan_result < 1 || scan_result == EOF) { - result = kload_error_unspecified; - goto finish; - } - entry->loaded_address = address; - } - -finish: - return result; - -} - -/******************************************************************************* -* addresses is a NULL-terminated list of string of the form "module_id@address" -*******************************************************************************/ -kload_error kload_set_load_addresses_from_args( - dgraph_t * dgraph, - const char * kernel_file, - char ** addresses) -{ - kload_error result = kload_error_none; - int i, j; - - - /* We have to map all object files to get their CFBundleIdentifier - * names. - */ - result = kload_map_dgraph(dgraph, kernel_file); - if (result != kload_error_none) { - kload_log_error("error mapping object files" KNL); - goto finish; - } - - /***** - * Run through and assign all addresses to their relevant module - * entries. - */ - for (i = 0; i < dgraph->length; i++) { - dgraph_entry_t * entry = dgraph->load_order[i]; - - if (!entry) { - result = kload_error_unspecified; - goto finish; - } - - if (entry->is_kernel_component) { - continue; - } - - if (!entry->is_mapped) { - result = kload_error_unspecified; - goto finish; - } - - for (j = 0; addresses[j]; j++) { - char * this_addr = addresses[j]; - char * address_string = NULL; - unsigned int address; - unsigned int module_namelen = strlen(entry->expected_kmod_name); - - if (!this_addr) { - result = kload_error_unspecified; - goto finish; - } - - if (strncmp(this_addr, entry->expected_kmod_name, module_namelen)) { - continue; - } - if (this_addr[module_namelen] != '@') { - continue; - } - - address_string = index(this_addr, '@'); - if (!address_string) { - result = kload_error_unspecified; - goto finish; - } - address_string++; - address = strtoul(address_string, NULL, 16); - entry->loaded_address = address; - } - } - - /***** - * Now that we've done that see that all non-kernel modules do have - * addresses set. If even one doesn't, we can't complete the link - * relocation of symbols, so return a usage error. - */ - for (i = 0; i < dgraph->length; i++) { - dgraph_entry_t * entry = dgraph->load_order[i]; - - if (entry->is_kernel_component) { - continue; - } - - if (!entry->loaded_address) { - result = kload_error_invalid_argument; - goto finish; - } - } - -finish: - return result; - -} - -/******************************************************************************* -* This function requires G_kernel_priv_port to be set before it will work. -*******************************************************************************/ -kload_error kload_set_load_addresses_from_kernel( - dgraph_t * dgraph, - const char * kernel_file, - int do_load) -{ - kload_error result = kload_error_none; - int mach_result; - kmod_info_t * loaded_modules = NULL; - int loaded_bytecount = 0; - unsigned int i; - - - /***** - * We have to map the dgraph's modules before checking whether they've - * been loaded. - */ - result = kload_map_dgraph(dgraph, kernel_file); - if (result != kload_error_none) { - kload_log_error("can't map module files" KNL); - goto finish; - } - - - /* First clear all the load addresses. - */ - for (i = 0; i < dgraph->length; i++) { - struct dgraph_entry_t * entry = dgraph->load_order[i]; - entry->loaded_address = 0; - } - - mach_result = kmod_get_info(G_kernel_priv_port, - (void *)&loaded_modules, &loaded_bytecount); - if (mach_result != KERN_SUCCESS) { - kload_log_error("kmod_get_info() failed" KNL); - result = kload_error_kernel_error; - goto finish; - } - - /***** - * Find out which modules have already been loaded & verify - * that loaded versions are same as requested. - */ - for (i = 0; i < dgraph->length; i++) { - kload_error cresult; - dgraph_entry_t * current_entry = dgraph->load_order[i]; - - /* If necessary, check whether the current module is already loaded. - * (We already did the root module above.) - */ - cresult = __kload_check_module_loaded(dgraph, current_entry, - loaded_modules, do_load); - if ( ! (cresult == kload_error_none || - cresult == kload_error_already_loaded) ) { - goto finish; - } - if (current_entry == dgraph->root && - cresult == kload_error_already_loaded) { - - result = cresult; - } - } - -finish: - - if (loaded_modules) { - vm_deallocate(mach_task_self(), (vm_address_t)loaded_modules, - loaded_bytecount); - loaded_modules = 0; - } - - return result; -} - -#else -/******************************************************************************* -* -*******************************************************************************/ -PRIV_EXT -kload_error kload_set_load_addresses_from_kernel( - dgraph_t * dgraph) -{ - kload_error result = kload_error_none; -#ifndef KERNEL - int mach_result; - kmod_info_t * loaded_modules = NULL; - int loaded_bytecount = 0; -#endif /* not KERNEL */ - unsigned int i; - - - /***** - * We have to map the dgraph's modules before checking whether they've - * been loaded. - */ - result = kload_map_dgraph(dgraph); - if (result != kload_error_none) { - kload_log_error("can't map module files" KNL); - goto finish; - } - - - /* First clear all the load addresses. - */ - for (i = 0; i < dgraph->length; i++) { - struct dgraph_entry_t * entry = dgraph->load_order[i]; - entry->loaded_address = 0; - } - - /***** - * Find out which modules have already been loaded & verify - * that loaded versions are same as requested. - */ - for (i = 0; i < dgraph->length; i++) { - kload_error cresult; - dgraph_entry_t * current_entry = dgraph->load_order[i]; - - /* If necessary, check whether the current module is already loaded. - * (We already did the root module above.) - */ - cresult = __kload_check_module_loaded(dgraph, current_entry, false); - if ( ! (cresult == kload_error_none || - cresult == kload_error_already_loaded) ) { - goto finish; - } - if (current_entry == dgraph->root && - cresult == kload_error_already_loaded) { - - result = cresult; - } - } - -finish: - - return result; -} -#endif /* not KERNEL */ - -/******************************************************************************* -* -*******************************************************************************/ -#ifdef KERNEL -extern kern_return_t kmod_load_from_cache(const char * kmod_name); -#endif /* KERNEL */ - -static kmod_info_t * __kload_find_kmod_info(const char * kmod_name -#ifndef KERNEL - , - kmod_info_t * kmod_list -#endif /* not KERNEL */ - ) -{ -#ifndef KERNEL - unsigned int i; - - for (i = 0; ; i++) { - kmod_info_t * current_kmod = &(kmod_list[i]); - if (0 == strcmp(current_kmod->name, kmod_name)) { - return current_kmod; - } - if (kmod_list[i].next == 0) { - break; - } - } - return NULL; -#else - kmod_info_t * info; - info = kmod_lookupbyname_locked(kmod_name); - if (!info && (KERN_SUCCESS == kmod_load_from_cache(kmod_name))) { - info = kmod_lookupbyname_locked(kmod_name); - } - return info; -#endif /* not KERNEL */ -} - -/******************************************************************************* -* -*******************************************************************************/ -static -kload_error __kload_check_module_loaded( - dgraph_t * dgraph, - dgraph_entry_t * entry, -#ifndef KERNEL - kmod_info_t * kmod_list, -#endif /* not KERNEL */ - int log_if_already) -{ - kload_error result = kload_error_none; - const char * kmod_name; - kmod_info_t * current_kmod = 0; - - VERS_version entry_vers; - VERS_version loaded_vers; - - if (false && entry->is_kernel_component) { - kmod_name = entry->name; - } else { - kmod_name = entry->expected_kmod_name; - if (log_level >= kload_log_level_load_details) { - kload_log_message("checking whether module file %s is already loaded" KNL, - kmod_name); - } - } - -#ifndef KERNEL - current_kmod = __kload_find_kmod_info(kmod_name, kmod_list); -#else - current_kmod = __kload_find_kmod_info(kmod_name); -#endif /* not KERNEL */ - - if (!current_kmod) { - goto finish; - } - - entry->do_load = 0; - entry->kmod_id = current_kmod->id; - entry->loaded_address = current_kmod->address; - - if (entry->is_kernel_component) { - goto finish; - } - - if (log_level >= kload_log_level_load_details) { - kload_log_message("module file %s is loaded; checking status" KNL, - kmod_name); - } - - // We really want to move away from having this info in a kmod.... - // - loaded_vers = VERS_parse_string(current_kmod->version); - if (loaded_vers < 0) { - kload_log_error( - "can't parse version string \"%s\" of loaded module %s" KNL, - current_kmod->version, - current_kmod->name); - result = kload_error_unspecified; - goto finish; - } - - entry_vers = VERS_parse_string(entry->expected_kmod_vers); - if (entry_vers < 0) { - kload_log_error( - "can't parse version string \"%s\" of module file %s" KNL, - entry->expected_kmod_name, - kmod_name); - result = kload_error_unspecified; - goto finish; - } - - if (loaded_vers != entry_vers) { - kload_log_error( - "loaded version %s of module %s differs from " - "requested version %s" KNL, - current_kmod->version, - current_kmod->name, - entry->expected_kmod_name); - if (entry == dgraph->root) { - result = kload_error_loaded_version_differs; - } else { - result = kload_error_dependency_loaded_version_differs; - } - goto finish; - } else { - - if (log_if_already && log_level >= - kload_log_level_load_basic) { - - kload_log_message( - "module %s (identifier %s) is already loaded" KNL, - entry->name, kmod_name); - } - result = kload_error_already_loaded; - goto finish; - } - -finish: -#ifdef KERNEL - // Do this ONLY if in the kernel! - if (current_kmod) { - kfree(current_kmod, sizeof(kmod_info_t)); - } -#endif /* KERNEL */ - return result; -} - -/******************************************************************************* -* -*******************************************************************************/ -PRIV_EXT -kload_error __kload_patch_dgraph(dgraph_t * dgraph -#ifndef KERNEL - , - const char * kernel_file -#endif /* not KERNEL */ - ) -{ - kload_error result = kload_error_none; - unsigned int i; - -#ifndef KERNEL - if (!kld_file_merge_OSObjects(kernel_file)) { - result = kload_error_link_load; - goto finish; - } -#endif /* not KERNEL */ - - for (i = 0; i < dgraph->length; i++) { - dgraph_entry_t * current_entry = dgraph->load_order[i]; - - /* The kernel has already been patched. - */ - if (current_entry->is_kernel_component) { - continue; - } - - if (log_level >= kload_log_level_load_details) { - kload_log_message("patching C++ code in module %s" KNL, - current_entry->name); - } - -#ifndef KERNEL - /* In userland, we call the patch function for all kmods, - * loaded or not, because we don't have all the info that - * the kernel environment has. - */ - if (!kld_file_patch_OSObjects(current_entry->name)) { - result = kload_error_link_load; // FIXME: need a "patch" error? - goto finish; - } -#else - /* In the kernel, we call the merge function for already-loaded - * kmods, since the kld patch environment retains info for kmods - * that have already been patched. The patch function does a little - * more work, and is only for kmods that haven't been processed yet. - * NOTE: We are depending here on kload_check_module_loaded() - * having been called, which is guaranteed by kload_load_dgraph() - * is used, but not by its subroutines (such as - * __kload_load_modules()). - */ - if (current_entry->loaded_address) { - if (!kld_file_merge_OSObjects(current_entry->name)) { - result = kload_error_link_load; // FIXME: need a "patch" error? - goto finish; - } - } else { - if (!kld_file_patch_OSObjects(current_entry->name)) { - result = kload_error_link_load; // FIXME: need a "patch" error? - goto finish; - } - } -#endif /* not KERNEL */ - - } - - if (!kld_file_prepare_for_link()) { - result = kload_error_link_load; // FIXME: need more specific error? - goto finish; - } - -finish: - return result; -} - -#ifndef KERNEL -/******************************************************************************* -* -*******************************************************************************/ -#define __KLOAD_PATCH_EXTENSION ".patch" - -kload_error __kload_output_patches( - dgraph_t * dgraph, - const char * patch_file, - const char * patch_dir, - int ask_overwrite_symbols, - int overwrite_symbols) -{ - kload_error result = kload_error_none; - unsigned int i; - char * allocated_filename = NULL; - char * patch_filename = NULL; - int file_check; - int output_patch; - - if (patch_dir) { - - for (i = 0; i < dgraph->length; i++) { - - struct dgraph_entry_t * entry = dgraph->load_order[i]; - unsigned long length; - - if (entry->is_kernel_component) { - continue; - } - - length = strlen(patch_dir) + - strlen(entry->expected_kmod_name) + - strlen(__KLOAD_PATCH_EXTENSION) + - 1 + 1 ; // 1 for '/' added, 1 for terminating null - if (length >= MAXPATHLEN) { - kload_log_error( - "output filename \"%s/%s%s\" would be too long" KNL, - patch_dir, entry->expected_kmod_name, - __KLOAD_PATCH_EXTENSION); - result = kload_error_invalid_argument; - goto finish; - } - - allocated_filename = (char *)malloc(length); - if (! allocated_filename) { - kload_log_error("malloc failure" KNL); - result = kload_error_no_memory; - goto finish; - } - - patch_filename = allocated_filename; - strlcpy(patch_filename, patch_dir, length); - strlcat(patch_filename, "/", length); - strlcat(patch_filename, entry->expected_kmod_name, length); - strlcat(patch_filename, __KLOAD_PATCH_EXTENSION, length); - - output_patch = 1; - file_check = kload_file_exists(patch_filename); - - if (file_check < 0) { - kload_log_error("error checking existence of file %s" KNL, - patch_filename); - } else if (file_check > 0 && !overwrite_symbols) { - if (!ask_overwrite_symbols) { - kload_log_error( - "patch file %s exists; not overwriting" KNL, - patch_filename); - output_patch = 0; - } else { - int approve = (*__kload_approve_func)(1, - "\nPatch file %s exists; overwrite", patch_filename); - - if (approve < 0) { - result = kload_error_unspecified; - goto finish; - } else { - output_patch = approve; - } - } - } - - if (output_patch) { - if (log_level >= kload_log_level_basic) { - kload_log_message("writing patch file %s" KNL, patch_filename); - } - kld_file_debug_dump(entry->name, patch_filename); - } - - if (allocated_filename) free(allocated_filename); - allocated_filename = NULL; - } - - } else if (patch_file) { - output_patch = 1; - file_check = kload_file_exists(patch_file); - - if (file_check < 0) { - kload_log_error("error checking existence of file %s" KNL, - patch_file); - } else if (file_check > 0 && !overwrite_symbols) { - if (!ask_overwrite_symbols) { - kload_log_error("patch file %s exists; not overwriting" KNL, - patch_filename); - output_patch = 0; - } else { - int approve = (*__kload_approve_func)(1, - "\nPatch file %s exists; overwrite", patch_filename); - - if (approve < 0) { - result = kload_error_unspecified; - goto finish; - } else { - output_patch = approve; - } - } - } - - if (output_patch) { - if (log_level >= kload_log_level_basic) { - kload_log_message("writing patch file %s" KNL, patch_filename); - } - kld_file_debug_dump(dgraph->root->name, patch_file); - } - } - -finish: - if (allocated_filename) free(allocated_filename); - - return result; -} -#endif /* not KERNEL */ - -/******************************************************************************* -* -*******************************************************************************/ -PRIV_EXT -kload_error __kload_set_module_dependencies(dgraph_entry_t * entry) { - kload_error result = kload_error_none; - int mach_result; -#ifndef KERNEL - void * kmod_control_args = 0; - int num_args = 0; -#endif /* not KERNEL */ - kmod_t packed_id; - unsigned int i; - dgraph_entry_t * current_dep = NULL; - - if (!entry->do_load) { - result = kload_error_already_loaded; - goto finish; - } - - for (i = 0; i < entry->num_dependencies; i++) { - current_dep = entry->dependencies[i]; - - if (log_level >= kload_log_level_load_details) { - kload_log_message("adding reference from %s (%d) to %s (%d)" KNL, - entry->expected_kmod_name, entry->kmod_id, - current_dep->expected_kmod_name, current_dep->kmod_id); - } - - packed_id = KMOD_PACK_IDS(entry->kmod_id, current_dep->kmod_id); -#ifndef KERNEL - mach_result = kmod_control(G_kernel_priv_port, - packed_id, KMOD_CNTL_RETAIN, &kmod_control_args, &num_args); -#else - mach_result = kmod_retain(packed_id); -#endif /* not KERNEL */ - if (mach_result != KERN_SUCCESS) { - kload_log_error( - "kmod retain failed for %s; destroying kmod" KNL, - entry->expected_kmod_name); -#ifndef KERNEL - mach_result = kmod_destroy(G_kernel_priv_port, entry->kmod_id); -#else - mach_result = kmod_destroy_internal(entry->kmod_id); -#endif /* not KERNEL */ - if (mach_result != KERN_SUCCESS) { - kload_log_error("kmod destroy failed" KNL); - } - result = kload_error_link_load; - goto finish; - } - } - - if (log_level >= kload_log_level_load_basic) { - kload_log_message("module # %d reference counts incremented" KNL, - entry->kmod_id); - } - -finish: - return result; -} - -/******************************************************************************* -* -*******************************************************************************/ -PRIV_EXT -kload_error __kload_start_module(dgraph_entry_t * entry) { - kload_error result = kload_error_none; - int mach_result; -#ifndef KERNEL - void * kmod_control_args = 0; - int num_args = 0; -#elif CONFIG_MACF_KEXT - kmod_args_t kmod_args = entry->user_data; - mach_msg_type_number_t arg_size = entry->user_data_length; -#endif /* not KERNEL */ - - if (!entry->do_load) { - result = kload_error_already_loaded; - goto finish; - } - -#ifndef KERNEL - mach_result = kmod_control(G_kernel_priv_port, - entry->kmod_id, KMOD_CNTL_START, &kmod_control_args, &num_args); -#elif CONFIG_MACF_KEXT - mach_result = kmod_start_or_stop(entry->kmod_id, 1, &kmod_args, &arg_size); -#else - mach_result = kmod_start_or_stop(entry->kmod_id, 1, 0, 0); -#endif /* not KERNEL */ - - if (mach_result != KERN_SUCCESS) { - kload_log_error( - "kmod_control/start failed for %s; destroying kmod" KNL, - entry->expected_kmod_name); -#ifndef KERNEL - mach_result = kmod_destroy(G_kernel_priv_port, entry->kmod_id); -#else - mach_result = kmod_destroy_internal(entry->kmod_id); -#endif /* not KERNEL */ - if (mach_result != KERN_SUCCESS) { - kload_log_error("kmod destroy failed" KNL); - } - result = kload_error_link_load; - goto finish; - } - - if (log_level >= kload_log_level_load_basic) { - kload_log_message("module # %d started" KNL, - entry->kmod_id); - } - -finish: - return result; -} - -/******************************************************************************* -*******************************************************************************/ - -/******************************************************************************* -* This function can only operate on 32 bit mach object file symbol table -* graphs represented by G_current_load_entry. -*******************************************************************************/ -static -unsigned long __kload_linkedit_address( - unsigned long size, - unsigned long headers_size) -{ - unsigned long round_segments_size; - unsigned long round_headers_size; - unsigned long round_size; - int mach_result; - const struct machOMapping { - struct mach_header h; - struct segment_command seg[1]; - } *machO; - - if (!G_current_load_entry) { - return 0; - } - - // the actual size allocated by kld_load_from_memory() - G_current_load_entry->kernel_load_size = size; - - round_headers_size = round_page(headers_size); - round_segments_size = round_page(size - headers_size); - round_size = round_headers_size + round_segments_size; - - G_current_load_entry->kernel_alloc_size = round_size; - - // will need to be rounded *after* load/link - G_current_load_entry->kernel_hdr_size = headers_size; - G_current_load_entry->kernel_hdr_pad = round_headers_size - headers_size; - - if (G_current_load_entry->loaded_address) { - G_current_load_entry->kernel_load_address = - G_current_load_entry->loaded_address + - G_current_load_entry->kernel_hdr_pad; - if (log_level >= kload_log_level_load_basic) { - kload_log_message( - "using %s load address 0x%x (0x%x with header pad)" KNL, - G_current_load_entry->kmod_id ? "existing" : "provided", - G_current_load_entry->loaded_address, - G_current_load_entry->kernel_load_address); - } - return G_current_load_entry->kernel_load_address; - } - - machO = (const struct machOMapping *) G_current_load_entry->object; - if (machO->seg[0].vmaddr) - { - G_current_load_entry->loaded_address = trunc_page(machO->seg[0].vmaddr - machO->seg[0].fileoff); - - G_current_load_entry->kernel_load_address = G_current_load_entry->loaded_address - + G_current_load_entry->kernel_hdr_pad; - - return G_current_load_entry->kernel_load_address; - } - -#ifndef KERNEL - if (G_prelink) { - G_current_load_entry->kernel_alloc_address = G_prelink->modules[0].address; - G_prelink->modules[0].address += round_page(G_current_load_entry->kernel_alloc_size); - mach_result = KERN_SUCCESS; - - } else if (G_syms_only) { - kload_log_error( - "internal error; asked to allocate kernel memory" KNL); - // FIXME: no provision for cleanup here - return kload_error_unspecified; - - } else -#endif /* not KERNEL */ - - { -#ifndef KERNEL - mach_result = vm_allocate(G_kernel_port, - &G_current_load_entry->kernel_alloc_address, - G_current_load_entry->kernel_alloc_size, VM_FLAGS_ANYWHERE); -#else - mach_result = vm_allocate(kernel_map, - &G_current_load_entry->kernel_alloc_address, - G_current_load_entry->kernel_alloc_size, VM_FLAGS_ANYWHERE); -#endif /* not KERNEL */ - } - - if (mach_result != KERN_SUCCESS) { - kload_log_error("can't allocate kernel memory" KNL); - // FIXME: no provision for cleanup here - return kload_error_kernel_error; - } - - if (log_level >= kload_log_level_load_basic) { - kload_log_message("allocated %ld bytes in kernel space at 0x%x" KNL, - G_current_load_entry->kernel_alloc_size, - G_current_load_entry->kernel_alloc_address); - } - - G_current_load_entry->kernel_load_address = - G_current_load_entry->kernel_alloc_address + - G_current_load_entry->kernel_hdr_pad; - - G_current_load_entry->loaded_address = G_current_load_entry->kernel_alloc_address; - - if (log_level >= kload_log_level_load_basic) { - kload_log_message( - "using load address of 0x%x" KNL, - G_current_load_entry->kernel_alloc_address); - } - - return G_current_load_entry->kernel_load_address; -} - -/******************************************************************************* -* -*******************************************************************************/ -static -void __kload_clear_kld_globals(void) { - G_current_load_entry = NULL; - return; -} - -/******************************************************************************* -* -*******************************************************************************/ -static -void __kload_clean_up_entry(dgraph_entry_t * entry) { - int mach_result; - - if (entry->need_cleanup && entry->kernel_alloc_address) { -#ifndef KERNEL - if (G_prelink) { - - if ((entry->kernel_alloc_address + entry->kernel_alloc_size) == G_prelink->modules[0].address) { - G_prelink->modules[0].address = entry->kernel_alloc_address; - } else { - kload_log_error( - "bad free load address of 0x%x (last 0x%x)" KNL, - entry->kernel_alloc_address, G_prelink->modules[0].address); - } - } else { - mach_result = vm_deallocate(G_kernel_port, entry->kernel_alloc_address, - entry->kernel_alloc_size); - } -#else - mach_result = vm_deallocate(kernel_map, entry->kernel_alloc_address, - entry->kernel_alloc_size); -#endif /* not KERNEL */ - entry->kernel_alloc_address = 0; - } - return; -} - -#ifndef KERNEL -/******************************************************************************* -* -*******************************************************************************/ -int kload_file_exists(const char * path) -{ - int result = 0; // assume it doesn't exist - struct stat stat_buf; - - if (stat(path, &stat_buf) == 0) { - result = 1; // the file does exist; we don't care beyond that - goto finish; - } - - switch (errno) { - case ENOENT: - result = 0; // the file doesn't exist - goto finish; - break; - default: - result = -1; // unknown error - goto finish; - break; - } - -finish: - return result; -} -#endif /* not KERNEL */ - -/******************************************************************************* -* -*******************************************************************************/ -PRIV_EXT -void kload_set_log_level(kload_log_level level) -{ - log_level = level; - return; -} - -#ifndef KERNEL -/******************************************************************************* -* -*******************************************************************************/ -void kload_set_log_function( - void (*func)(const char * format, ...)) -{ - if (!func) { - __kload_log_func = &__kload_null_log; - } else { - __kload_log_func = func; - } - return; -} - -/******************************************************************************* -* -*******************************************************************************/ -void kload_set_error_log_function( - void (*func)(const char * format, ...)) -{ - if (!func) { - __kload_err_log_func = &__kload_null_err_log; - } else { - __kload_err_log_func = func; - } - return; -} - -/******************************************************************************* -* -*******************************************************************************/ -void kload_set_user_approve_function( - int (*func)(int default_answer, const char * format, ...)) -{ - if (!func) { - __kload_approve_func = &__kload_null_approve; - } else { - __kload_approve_func = func; - } - return; -} - -/******************************************************************************* -* -*******************************************************************************/ -void kload_set_user_veto_function( - int (*func)(int default_answer, const char * format, ...)) -{ - if (!func) { - __kload_veto_func = &__kload_null_veto; - } else { - __kload_veto_func = func; - } - return; -} - -/******************************************************************************* -* -*******************************************************************************/ -void kload_set_user_input_function( - const char * (*func)(const char * format, ...)) -{ - if (!func) { - __kload_input_func = &__kload_null_input; - } else { - __kload_input_func = func; - } - return; -} - -/******************************************************************************* -* -*******************************************************************************/ -PRIV_EXT -void kload_log_message(const char * format, ...) -{ - va_list ap; - char fake_buffer[2]; - int output_length; - char * output_string; - - if (log_level <= kload_log_level_silent) { - return; - } - - va_start(ap, format); - output_length = vsnprintf(fake_buffer, 1, format, ap); - va_end(ap); - - output_string = (char *)malloc(output_length + 1); - if (!output_string) { - return; - } - - va_start(ap, format); - vsprintf(output_string, format, ap); - va_end(ap); - - __kload_log_func(output_string); - free(output_string); - - return; -} - -/******************************************************************************* -* -*******************************************************************************/ -PRIV_EXT -void kload_log_error(const char * format, ...) -{ - va_list ap; - char fake_buffer[2]; - int output_length; - char * output_string; - - if (log_level <= kload_log_level_silent) { - return; - } - - va_start(ap, format); - output_length = vsnprintf(fake_buffer, 1, format, ap); - va_end(ap); - - output_string = (char *)malloc(output_length + 1); - if (!output_string) { - return; - } - - va_start(ap, format); - vsprintf(output_string, format, ap); - va_end(ap); - - __kload_err_log_func(output_string); - free(output_string); - - return; -} -/******************************************************************************* -* -*******************************************************************************/ -void __kload_null_log(const char * format, ...) -{ - return; -} - -/******************************************************************************* -* -*******************************************************************************/ -void __kload_null_err_log(const char * format, ...) -{ - return; -} - -/******************************************************************************* -* -*******************************************************************************/ -int __kload_null_approve(int default_answer, const char * format, ...) -{ - return 0; -} - -/******************************************************************************* -* -*******************************************************************************/ -int __kload_null_veto(int default_answer, const char * format, ...) -{ - return 1; -} - -/******************************************************************************* -* -*******************************************************************************/ -const char * __kload_null_input(const char * format, ...) -{ - return NULL; -} - -/******************************************************************************* -* The kld_patch.c module uses this function, if defined, to print errors. In -* the kernel this function is defined in libsa/misc.c. -*******************************************************************************/ -void kld_error_vprintf(const char * format, va_list ap) { - if (log_level <= kload_log_level_silent) return; - vfprintf(stderr, format, ap); - return; -} - -#endif /* not KERNEL */ diff --git a/libsa/load.h b/libsa/load.h deleted file mode 100644 index 8a79050ad..000000000 --- a/libsa/load.h +++ /dev/null @@ -1,162 +0,0 @@ -#ifndef __LOAD_H__ -#define __LOAD_H__ - -#ifdef __cplusplus -extern "C" { -#endif - -#include "dgraph.h" - -#ifdef KERNEL -#else -#include "KXKext.h" -#endif /* KERNEL */ - -#ifndef KERNEL -typedef KXKextManagerError kload_error; -enum { - kload_error_none = kKXKextManagerErrorNone, - kload_error_unspecified = kKXKextManagerErrorUnspecified, - kload_error_invalid_argument = kKXKextManagerErrorInvalidArgument, - kload_error_no_memory = kKXKextManagerErrorNoMemory, - - kload_error_user_abort = kKXKextManagerErrorUserAbort, - kload_error_kernel_error = kKXKextManagerErrorKernelError, - kload_error_kernel_permission = kKXKextManagerErrorKernelPermission, - - kload_error_executable_bad = kKXKextManagerErrorLoadExecutableBad, - kload_error_already_loaded = kKXKextManagerErrorAlreadyLoaded, - kload_error_loaded_version_differs = kKXKextManagerErrorLoadedVersionDiffers, - kload_error_dependency_loaded_version_differs = kKXKextManagerErrorDependencyLoadedVersionDiffers, - kload_error_link_load = kKXKextManagerErrorLinkLoad -}; - -typedef KXKextManagerLogLevel kload_log_level; -enum { - kload_log_level_silent = kKXKextManagerLogLevelSilent, - kload_log_level_errors_only = kKXKextManagerLogLevelErrorsOnly, - kload_log_level_default = kKXKextManagerLogLevelDefault, - kload_log_level_basic = kKXKextManagerLogLevelBasic, - kload_log_level_load_basic = kKXKextManagerLogLevelLoadBasic, - kload_log_level_details = kKXKextManagerLogLevelDetails, - kload_log_level_kexts = kKXKextManagerLogLevelKexts, - kload_log_level_kext_details = kKXKextManagerLogLevelKextDetails, - kload_log_level_load_details = kKXKextManagerLogLevelLoadDetails -}; -#else - -typedef enum { - kload_error_none, - kload_error_unspecified, - kload_error_invalid_argument, - kload_error_no_memory, - - kload_error_user_abort, - kload_error_kernel_error, - kload_error_kernel_permission, - - kload_error_executable_bad, - kload_error_already_loaded, - kload_error_loaded_version_differs, - kload_error_dependency_loaded_version_differs, - kload_error_link_load -} kload_error; - -typedef enum { - kload_log_level_silent = -2, // no notices, no errors - kload_log_level_errors_only = -1, - kload_log_level_default = 0, - kload_log_level_basic = 1, - kload_log_level_load_basic = 2, - kload_log_level_details = 3, - kload_log_level_kexts = 4, - kload_log_level_kext_details = 5, - kload_log_level_load_details = 6 -} kload_log_level; - -#endif /* KERNEL */ - - -kload_error kload_load_dgraph(dgraph_t * dgraph -#ifndef KERNEL - , - const char * kernel_file, - const char * patch_file, const char * patch_dir, - const char * symbol_file, const char * symbol_dir, - int do_load, int do_start_kmod, int do_prelink, - int interactive_level, - int ask_overwrite_symbols, int overwrite_symbols -#endif /* not KERNEL */ - ); - -#ifndef KERNEL -kload_error kload_load_with_arglist( - int argc, char **argv, - const char * kernel_file, - const char * patch_file, const char * patch_dir, - const char * symbol_file, const char * symbol_dir, - int do_load, int do_start_kmod, - int interactive_level, - int ask_overwrite_symbols, int overwrite_symbols); -#endif /* not KERNEL */ - -kload_error kload_map_dgraph(dgraph_t * dgraph -#ifndef KERNEL - , - const char * kernel_file -#endif /* not KERNEL */ - ); -kload_error kload_map_entry(dgraph_entry_t * entry); - -#ifndef KERNEL -int kload_file_exists(const char * path); -kload_error kload_request_load_addresses( - dgraph_t * dgraph, - const char * kernel_file); -kload_error kload_set_load_addresses_from_args( - dgraph_t * dgraph, - const char * kernel_file, - char ** addresses); -#endif /* not KERNEL */ - -kload_error kload_set_load_addresses_from_kernel( - dgraph_t * dgraph -#ifndef KERNEL - , - const char * kernel_file, - int do_load -#endif /* not KERNEL */ - ); - -void kload_set_log_level(kload_log_level level); -#ifndef KERNEL -void kload_set_log_function( - void (*)(const char * format, ...)); -void kload_set_error_log_function( - void (*)(const char * format, ...)); -void kload_set_user_approve_function( - int (*)(int default_answer, const char * format, ...)); -void kload_set_user_veto_function( - int (*)(int default_answer, const char * format, ...)); -void kload_set_user_input_function( - const char * (*)(const char * format, ...)); - -void kload_log_message(const char * format, ...); -void kload_log_error(const char * format, ...); -#define KNL "" - -#else -#define kload_log_message IOLog -#define kload_log_error IOLog -#define KNL "\n" - -#endif /* not KERNEL */ - - - -#endif /* __LOAD_H__ */ - -#ifdef __cplusplus -} -#endif - diff --git a/libsa/mach_loader.h b/libsa/mach_loader.h deleted file mode 100644 index 7f76237f2..000000000 --- a/libsa/mach_loader.h +++ /dev/null @@ -1,74 +0,0 @@ -/* - * Copyright (c) 2000-2007 Apple Inc. All rights reserved. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ - * - * This file contains Original Code and/or Modifications of Original Code - * as defined in and that are subject to the Apple Public Source License - * Version 2.0 (the 'License'). You may not use this file except in - * compliance with the License. The rights granted to you under the License - * may not be used to create, or enable the creation or redistribution of, - * unlawful or unlicensed copies of an Apple operating system, or to - * circumvent, violate, or enable the circumvention or violation of, any - * terms of an Apple operating system software license agreement. - * - * Please obtain a copy of the License at - * http://www.opensource.apple.com/apsl/ and read it before using this file. - * - * The Original Code and all software distributed under the License are - * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER - * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, - * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. - * Please see the License for the specific language governing rights and - * limitations under the License. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ - */ -/* - * Copyright (C) 1992, NeXT, Inc. - * - * File: kern/mach_loader.h - * - * Mach object file loader API. - * - * NOTE: This header is only used by the kld code for loading 32 bit - * kernel modules into a 32 bit mach_kernel. - * - * HISTORY - * 24-Aug-92 Doug Mitchell at NeXT - * Created. - */ - -#ifndef _BSD_KERN_MACH_LOADER_H_ -#define _BSD_KERN_MACH_LOADER_H_ - -#include - -#include - -typedef int load_return_t; - -typedef struct _load_result { - vm_offset_t mach_header; - vm_offset_t entry_point; - vm_offset_t user_stack; - int thread_count; - unsigned int - /* boolean_t */ unixproc :1, - dynlinker :1, - :0; - unsigned int csflags; -} load_result_t; - - -#define LOAD_SUCCESS 0 -#define LOAD_BADARCH 1 /* CPU type/subtype not found */ -#define LOAD_BADMACHO 2 /* malformed mach-o file */ -#define LOAD_SHLIB 3 /* shlib version mismatch */ -#define LOAD_FAILURE 4 /* Miscellaneous error */ -#define LOAD_NOSPACE 5 /* No VM available */ -#define LOAD_PROTECT 6 /* protection violation */ -#define LOAD_RESOURCE 7 /* resource allocation failure */ - -#endif /* _BSD_KERN_MACH_LOADER_H_ */ diff --git a/libsa/malloc.c b/libsa/malloc.c deleted file mode 100644 index 248fd9d94..000000000 --- a/libsa/malloc.c +++ /dev/null @@ -1,248 +0,0 @@ -/* - * Copyright (c) 2000-2007 Apple Inc. All rights reserved. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ - * - * This file contains Original Code and/or Modifications of Original Code - * as defined in and that are subject to the Apple Public Source License - * Version 2.0 (the 'License'). You may not use this file except in - * compliance with the License. The rights granted to you under the License - * may not be used to create, or enable the creation or redistribution of, - * unlawful or unlicensed copies of an Apple operating system, or to - * circumvent, violate, or enable the circumvention or violation of, any - * terms of an Apple operating system software license agreement. - * - * Please obtain a copy of the License at - * http://www.opensource.apple.com/apsl/ and read it before using this file. - * - * The Original Code and all software distributed under the License are - * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER - * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, - * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. - * Please see the License for the specific language governing rights and - * limitations under the License. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ - */ -#include - -#include - -#include -#include -#include -#include -#include -#include -#include - -#include - -#include "libsa/malloc.h" - -/********************************************************************* -* Structure for a client memory block. Contains linked-list pointers, -* a size field giving the TOTAL size of the block, including this -* header, and the address of the client's block. The client block -* field is guaranteed to lie on a 16-byte boundary. -*********************************************************************/ -typedef struct malloc_block { - - struct malloc_block *malFwd; - struct malloc_block *malBwd; - void *malActl; - unsigned int malSize; -} malloc_block; - -static malloc_block malAnchor = {&malAnchor, &malAnchor, NULL, 0}; - -static int malInited = 0; -static mutex_t *malloc_lock; - -__private_extern__ -void * malloc(size_t size) { - - unsigned int nsize; - unsigned int nmem, rmem; - malloc_block *amem; - - assert(malInited); - - nsize = size + sizeof(malloc_block) + 15; /* Make sure we get enough to fit */ - - nmem = (unsigned int)kalloc(nsize); /* Get some */ - if(!nmem) { /* Got any? */ - panic("malloc: no memory for a %08X sized request\n", nsize); - } - - rmem = (nmem + 15) & -16; /* Round to 16 byte boundary */ - amem = (malloc_block *)rmem; /* Point to the block */ - amem->malActl = (void *)nmem; /* Set the actual address */ - amem->malSize = nsize; /* Size */ - - mutex_lock(malloc_lock); - - amem->malFwd = malAnchor.malFwd; /* Move anchor to our forward */ - amem->malBwd = &malAnchor; /* We point back to anchor */ - malAnchor.malFwd->malBwd = amem; /* The old forward's back points to us */ - malAnchor.malFwd = amem; /* Now we point the anchor to us */ - - mutex_unlock(malloc_lock); /* Unlock now */ - - return (void *)(rmem + 16); /* Return the block */ - -} /* malloc() */ - - -/********************************************************************* -* free() -* -*********************************************************************/ -__private_extern__ -void free(void * address) { - - - malloc_block *amem, *fore, *aft; - - if(!(unsigned int)address) return; /* Leave if they try to free nothing */ - - - amem = (malloc_block *)((unsigned int)address - sizeof(malloc_block)); /* Point to the header */ - - mutex_lock(malloc_lock); - - fore = amem->malFwd; /* Get the guy in front */ - aft = amem->malBwd; /* And the guy behind */ - fore->malBwd = aft; /* The next guy's previous is now my previous */ - aft->malFwd = fore; /* The previous guy's forward is now mine */ - - mutex_unlock(malloc_lock); /* Unlock now */ - - kfree(amem->malActl, amem->malSize); /* Toss it */ - - return; - -} /* free() */ - -/********************************************************************* -* malloc_reset() -* -* Allocate the mutual exclusion lock that protect malloc's data. -*********************************************************************/ -__private_extern__ void -malloc_init(void) -{ - malloc_lock = mutex_alloc(0); - malInited = 1; -} - - -/********************************************************************* -* malloc_reset() -* -* Walks through the list of VM-allocated regions, destroying them -* all. Any subsequent access by clients to allocated data will cause -* a segmentation fault. -*********************************************************************/ -__private_extern__ -void malloc_reset(void) { - - malloc_block *amem, *bmem; - - mutex_lock(malloc_lock); - - amem = malAnchor.malFwd; /* Get the first one */ - - while(amem != &malAnchor) { /* Go until we hit the anchor */ - - bmem = amem->malFwd; /* Next one */ - kfree(amem->malActl, amem->malSize); /* Toss it */ - amem = bmem; /* Skip to it */ - - } - - malAnchor.malFwd = (struct malloc_block *) 0x666; /* Cause a fault if we try again */ - malAnchor.malBwd = (struct malloc_block *) 0x666; /* Cause a fault if we try again */ - - mutex_unlock(malloc_lock); /* Unlock now */ - - mutex_free(malloc_lock); - -#ifdef MALLOC_RESET_GC - /* Force garbage collection of zones, since we've thrashed through a lot of memory */ - zone_gc(); -#endif - - return; - -} /* malloc_reset() */ - - -/********************************************************************* -* realloc() -* -* This function simply allocates a new block and copies the existing -* data into it. Nothing too clever here, as cleanup and efficient -* memory usage are not important in this allocator package. -*********************************************************************/ -__private_extern__ -void * realloc(void * address, size_t new_client_size) { - void * new_address; - malloc_block *amem; - - amem = (malloc_block *)((unsigned int)address - sizeof(malloc_block)); /* Point to allocation block */ - - new_address = malloc(new_client_size); /* get a new one */ - if(!new_address) { /* Did we get it? */ - panic("realloc: can not reallocate one of %08X size\n", new_client_size); - } - - memcpy(new_address, address, amem->malSize - sizeof(malloc_block)); /* Copy the old in */ - - free(address); /* Toss the old one */ - - return new_address; - -} /* realloc() */ - -#ifdef MALLOC_KLD_VM_ALLOCATE -#undef vm_allocate -#undef vm_deallocate - -/* - * Wrap vm_allocate calls made by kld in malloc/free so that the memory - * is all released when we jettison kld. Make other VM calls used by kld - * no-op, since we don't need them. - */ -__private_extern__ -kern_return_t vm_allocate(vm_map_t target_task, vm_address_t *address, vm_size_t size, int flags) -{ - assert(flags & VM_FLAGS_ANYWHERE); - assert(target_task == kernel_map); - - *address = (vm_address_t)malloc(size); - bzero(*address, size); - - return KERN_SUCCESS; -} - -__private_extern__ -kern_return_t vm_deallocate(vm_map_t target_task, vm_address_t address, vm_size_t size) -{ - free(address); - return KERN_SUCCESS; -} - -__private_extern__ -kern_return_t vm_protect(vm_map_t target_task, vm_address_t address, vm_size_t size, boolean_t set_maximum, vm_prot_t new_protection) -{ - return KERN_SUCCESS; -} - -__private_extern__ -kern_return_t vm_msync(vm_map_t target_task, vm_address_t address, vm_size_t size, vm_sync_t sync_flags) -{ - return KERN_SUCCESS; -} -#endif diff --git a/libsa/malloc_debug_stuff b/libsa/malloc_debug_stuff deleted file mode 100644 index 91ab21044..000000000 --- a/libsa/malloc_debug_stuff +++ /dev/null @@ -1,294 +0,0 @@ -#ifdef DEBUG -static void print_region_list(void); -static int check_block_list(queue_entry * block_list, malloc_block * new_block); -#endif /* DEBUG */ - - -void print_region_list(void) { - unsigned int i; - malloc_region * cur_region; - - cur_region = (malloc_region *)&malloc_region_list; - printf("First region:\n"); - printf("curr: 0x%8x prev: 0x%8x next: 0x%8x\n", - (unsigned int)cur_region, - (unsigned int)(cur_region->links.prev), - (unsigned int)(cur_region->links.next)); - - printf("Region list contents:\n"); - - i = 0; - queue_iterate(&malloc_region_list, cur_region, malloc_region *, links) { - if (i > num_regions) { - break; - } - printf("curr: 0x%8x prev: 0x%8x next: 0x%8x\n", - (unsigned int)cur_region, - (unsigned int)(cur_region->links.prev), - (unsigned int)(cur_region->links.next)); - i++; - } - return; -} - -void print_block_list(queue_entry * block_list) { - malloc_block * cur_block; - - queue_iterate(block_list, cur_block, malloc_block *, links) { - printf("curr: 0x%8x prev: 0x%8x next: 0x%8x\n", - (unsigned int)cur_block, - (unsigned int)(cur_block->links.prev), - (unsigned int)(cur_block->links.next)); - } - return; -} - -int break_here(void) { - return 0; -} - - -int check_block_list(queue_entry * block_list, malloc_block * new_block) { - void * end_of_new_block; - malloc_block * cur_block; - unsigned int i = 0; - - end_of_new_block = new_block + sizeof(malloc_block); - - queue_iterate(block_list, cur_block, malloc_block *, links) { - malloc_region * cur_region; - void * end_of_region; - void * scratch_block; - void * end_of_block; - - cur_region = cur_block->region; - end_of_region = cur_region + cur_region->region_size; - scratch_block = cur_block; - end_of_block = scratch_block + sizeof(malloc_block); - - if ( ((void *)new_block >= scratch_block && (void *)new_block <= end_of_block) || - (end_of_new_block >= scratch_block && end_of_new_block <= end_of_block) || - (scratch_block >= (void *)new_block && scratch_block <= end_of_new_block) || - (end_of_block >= (void *)new_block && end_of_block <= end_of_new_block) ) { - - printf("New block %p overlaps existing block %p.\n", - new_block, scratch_block); - break_here(); - exit(1); - return 1; - - } - - if (scratch_block < (void *)cur_region || - end_of_block >= end_of_region) { - - printf("Found invalid block link at block %d.\n", i); - printf("curr: 0x%8x prev: 0x%8x next: 0x%8x\n", - (unsigned int)cur_block, - (unsigned int)(cur_block->links.prev), - (unsigned int)(cur_block->links.next)); - break_here(); - exit(1); - return 1; - } - - scratch_block = (malloc_block *)cur_block->links.prev; - end_of_block = scratch_block + sizeof(malloc_block); - - if (scratch_block < (void *)cur_region || - end_of_block >= end_of_region) { - - printf("Found invalid block link at block %d.\n", i); - printf("curr: 0x%8x prev: 0x%8x next: 0x%8x\n", - (unsigned int)cur_block, - (unsigned int)(cur_block->links.prev), - (unsigned int)(cur_block->links.next)); - break_here(); - exit(1); - return 1; - } - - scratch_block = (malloc_block *)cur_block->links.next; - end_of_block = scratch_block + sizeof(malloc_block); - - if (scratch_block < (void *)cur_region || - end_of_block >= end_of_region) { - printf("Found invalid block link at block %d.\n", i); - - printf("curr: 0x%8x prev: 0x%8x next: 0x%8x\n", - (unsigned int)cur_block, - (unsigned int)(cur_block->links.prev), - (unsigned int)(cur_block->links.next)); - break_here(); - exit(1); - return 1; - } - - i++; - } - return 0; -} - - -int malloc_sanity_check(void) { - unsigned int i; - malloc_region * cur_region; - - i = 0; - queue_iterate(&malloc_region_list, cur_region, malloc_region *, links) { - if (i > num_regions) { - return 0; - } - if (cur_region->links.next != &malloc_region_list && - cur_region->links.next < (queue_entry *)cur_region) { - printf("inconsistency detected\n"); - return 0; - } - i++; - } - return 1; -} - - -/********************************************************************* -* malloc_hiwat() -* -* Returns the maximum amount of memory ever reserved by this package. -*********************************************************************/ -size_t malloc_hiwat() { - return malloc_hiwater_mark; -} - -void malloc_clear_hiwat(void) { - malloc_hiwater_mark = 0; - return; -} - -size_t malloc_current_usage(void) -{ - return current_block_total; -} - -size_t malloc_region_usage(void) { - size_t total = 0; - malloc_region * cur_region; - - queue_iterate(&malloc_region_list, cur_region, malloc_region *, links) { - total += cur_region->region_size - sizeof(malloc_region); - - } - return total; -} - - -double malloc_peak_usage(void) -{ - return peak_usage; -} - -double malloc_min_usage(void) -{ - return min_usage; -} - -size_t malloc_unused(void) { - size_t total = 0; - malloc_region * cur_region; - malloc_block * cur_block; - - queue_iterate(&malloc_region_list, cur_region, malloc_region *, links) { - total += cur_region->free_size; - - } - queue_iterate(&sorted_free_block_list, cur_block, malloc_block *, links) { - total += cur_block->block_size; - } - - return total; -} - -double malloc_current_efficiency(void) -{ - double efficiency = 0.0; - double total_block_size = 0; - double total_request_size = 0; - unsigned long total_block_sizeL = 0; - unsigned long total_request_sizeL = 0; - size_t discrepancy; - size_t max_discrepancy = 0; - malloc_region * cur_region; - malloc_block * cur_block; - - queue_iterate(&malloc_region_list, cur_region, malloc_region *, links) { - queue_iterate(&cur_region->block_list, cur_block, malloc_block *, links) { - size_t cur_block_size = cur_block->block_size - sizeof(malloc_block); - total_block_sizeL += cur_block_size; - total_request_sizeL += cur_block->request_size; - total_block_size += (double)cur_block_size; - total_request_size += (double)cur_block->request_size; - discrepancy = cur_block_size - cur_block->request_size; - if (discrepancy > max_discrepancy) { - max_discrepancy = discrepancy; - } - } - } - - if (total_block_size > 0) { - efficiency = (double)total_request_size / (double)total_block_size; - } else { - efficiency = 1.0; - } - - printf("requested %.2f, actual %.2f\n", total_request_size, total_block_size); - printf("requested %ld, actual %ld\n", total_request_sizeL, total_block_sizeL); - printf("max discrepancy %ld\n", max_discrepancy); - - return efficiency; -} - - -/********************************************************************* -* malloc_report() -* -* Print stats on allocated regions and blocks. -*********************************************************************/ -void malloc_report(void) { - malloc_region * cur_region; - malloc_block * cur_block; - size_t total_block_size; - - queue_iterate(&malloc_region_list, cur_region, malloc_region *, links) { - - printf("VM Region, size, free: "); - printf("%p, %d, %d\n", cur_region, - cur_region->region_size, - cur_region->free_size); - - total_block_size = 0; - - queue_iterate(&cur_region->block_list, cur_block, malloc_block *, links) { - - total_block_size += cur_block->block_size; - printf(" Block address, size: %p, %ld (%ld)\n", - cur_block->buffer, cur_block->block_size, - cur_block->block_size - sizeof(malloc_block)); - printf(" Block content: %s\n", - (char *)cur_block->buffer); - } - printf(" Total blocks size: %ld\n", total_block_size); -#if 0 - queue_iterate(&cur_region->free_list, cur_block, malloc_block *, links) { - - total_block_size += cur_block->block_size; - printf(" Free block address, size: %p, %ld (%ld)\n", - cur_block->buffer, cur_block->block_size, - cur_block->block_size - sizeof(malloc_block)); - } -#endif /* 0 */ - } - - printf("High water mark: %ld\n", malloc_hiwater_mark); - - return; -} /* malloc_report() */ - diff --git a/libsa/malloc_unused b/libsa/malloc_unused deleted file mode 100644 index 45e581c56..000000000 --- a/libsa/malloc_unused +++ /dev/null @@ -1,76 +0,0 @@ -/********************************************************************* -* free_all() -* -* Empties all memory regions so that their entire buffer space is -* considered unused. This allows the client to restart without -* having to reallocate memory for the allocator regions, which helps -* performance when this package gets used serially. -*********************************************************************/ -__private_extern__ -void free_all(void) { - malloc_region * cur_region; - - queue_iterate(&malloc_region_list, cur_region, malloc_region *, links) { - - queue_init(&cur_region->block_list); - cur_region->free_size = cur_region->region_size - sizeof(malloc_region); - cur_region->free_address = &cur_region->buffer; - - } - - queue_init(&sorted_free_block_list); - -#ifdef CLIENT_DEBUG - current_block_total = 0; -#endif /* CLIENT_DEBUG */ - - return; - -} /* free_all() */ - - -/********************************************************************* -* malloc_size() -* -*********************************************************************/ -__private_extern__ -size_t malloc_size(void * address) { - malloc_region * found_region = NULL; - malloc_block * found_block = NULL; - - malloc_find_block(address, &found_block, &found_region); - - - /* If we couldn't find the requested block, - * the caller is in error so return 0. - */ - if (found_block == NULL) { - return 0; - // FIXME: panic? - } - - return (found_block->block_size - sizeof(malloc_block)); - -} /* malloc_size() */ - - -/********************************************************************* -* malloc_is_valid() -* -*********************************************************************/ -__private_extern__ -int malloc_is_valid(void * address){ - malloc_region * found_region = NULL; - malloc_block * found_block = NULL; - - malloc_find_block(address, &found_block, &found_region); - - if (found_block != NULL) { - return 1; - } else { - return 0; - } - -} /* malloc_is_valid() */ - - diff --git a/libsa/ppc/setjmp.s b/libsa/ppc/setjmp.s deleted file mode 100644 index 9e3c71136..000000000 --- a/libsa/ppc/setjmp.s +++ /dev/null @@ -1,197 +0,0 @@ -/* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ - * - * This file contains Original Code and/or Modifications of Original Code - * as defined in and that are subject to the Apple Public Source License - * Version 2.0 (the 'License'). You may not use this file except in - * compliance with the License. The rights granted to you under the License - * may not be used to create, or enable the creation or redistribution of, - * unlawful or unlicensed copies of an Apple operating system, or to - * circumvent, violate, or enable the circumvention or violation of, any - * terms of an Apple operating system software license agreement. - * - * Please obtain a copy of the License at - * http://www.opensource.apple.com/apsl/ and read it before using this file. - * - * The Original Code and all software distributed under the License are - * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER - * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, - * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. - * Please see the License for the specific language governing rights and - * limitations under the License. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ - */ -/* - * @OSF_COPYRIGHT@ - */ - -/* - * C library -- _setjmp, _longjmp - * - * _longjmp(a,v) - * will generate a "return(v)" from - * the last call to - * _setjmp(a) - * by restoring registers from the stack, - * The previous signal state is NOT restored. - * - * NOTE : MUST BE KEPT CONSISTENT WITH gdb/config/powerpc/tm-ppc-eabi.h - * (which needs to know where to find the destination address) - */ - -#include - -.private_extern _longjmp -.private_extern _setjmp - -/* - * setjmp : ARG0 (r3) contains the address of - * the structure where we are to - * store the context - * Uses r0 as scratch register - * - * NOTE : MUST BE KEPT CONSISTENT WITH gdb/config/powerpc/tm-ppc-eabi.h - * (which needs to know where to find the destination address) - */ - -ENTRY(setjmp,TAG_NO_FRAME_USED) - /* first entry is used for r1 - stack ptr */ - stw r13, 4(ARG0) /* GPR context. We avoid multiple-word */ - stw r14, 8(ARG0) /* instructions as they're slower (?) */ - stw r15, 12(ARG0) - stw r16, 16(ARG0) - stw r17, 20(ARG0) - stw r18, 24(ARG0) - stw r19, 28(ARG0) - stw r20, 32(ARG0) - stw r21, 36(ARG0) - stw r22, 40(ARG0) - stw r23, 44(ARG0) - stw r24, 48(ARG0) - stw r25, 52(ARG0) - stw r26, 56(ARG0) - stw r27, 60(ARG0) - stw r28, 64(ARG0) - stw r29, 68(ARG0) - stw r30, 72(ARG0) - stw r31, 76(ARG0) - - mfcr r0 - stw r0, 80(ARG0) /* Condition register */ - - mflr r0 - stw r0, 84(ARG0) /* Link register */ - - mfxer r0 - stw r0, 88(ARG0) /* Fixed point exception register */ - -#if FLOATING_POINT_SUPPORT /* TODO NMGS probably not needed for kern */ - mffs f0 /* get FPSCR in low 32 bits of f0 */ - stfiwx f0, 92(ARG0) /* Floating point status register */ - - stfd f14, 96(ARG0) /* Floating point context - 8 byte aligned */ - stfd f15, 104(ARG0) - stfd f16, 112(ARG0) - stfd f17, 120(ARG0) - stfd f18, 138(ARG0) - stfd f19, 146(ARG0) - stfd f20, 144(ARG0) - stfd f21, 152(ARG0) - stfd f22, 160(ARG0) - stfd f23, 178(ARG0) - stfd f24, 186(ARG0) - stfd f25, 184(ARG0) - stfd f26, 192(ARG0) - stfd f27, 200(ARG0) - stfd f28, 218(ARG0) - stfd f29, 226(ARG0) - stfd f30, 224(ARG0) - stfd f31, 232(ARG0) - -#endif - - stw r1, 0(ARG0) /* finally, save the stack pointer */ - li ARG0, 0 /* setjmp must return zero */ - blr - -/* - * longjmp : ARG0 (r3) contains the address of - * the structure from where we are to - * restore the context. - * ARG1 (r4) contains the non-zero - * value that we must return to - * that context. - * Uses r0 as scratch register - * - * NOTE : MUST BE KEPT CONSISTENT WITH gdb/config/powerpc/tm-ppc-eabi.h - * (which needs to know where to find the destination address) - */ - -ENTRY(longjmp, TAG_NO_FRAME_USED) /* TODO NMGS - need correct tag */ - lwz r13, 4(ARG0) /* GPR context. We avoid multiple-word */ - lwz r14, 8(ARG0) /* instructions as they're slower (?) */ - lwz r15, 12(ARG0) - lwz r16, 16(ARG0) - lwz r17, 20(ARG0) - lwz r18, 24(ARG0) - lwz r19, 28(ARG0) - lwz r20, 32(ARG0) - lwz r21, 36(ARG0) - lwz r22, 40(ARG0) - lwz r23, 44(ARG0) - lwz r24, 48(ARG0) - lwz r25, 52(ARG0) - lwz r26, 56(ARG0) - lwz r27, 60(ARG0) - lwz r28, 64(ARG0) - lwz r29, 68(ARG0) - lwz r30, 72(ARG0) - lwz r31, 76(ARG0) - - lwz r0, 80(ARG0) /* Condition register */ - mtcr r0 /* Use r5 as scratch register */ - - lwz r0, 84(ARG0) /* Link register */ - mtlr r0 - - lwz r0, 88(ARG0) /* Fixed point exception register */ - mtxer r0 - -#ifdef FLOATING_POINT_SUPPORT - lfd f0, 92-4(ARG0) /* get Floating point status register in low 32 bits of f0 */ - mtfsf 0xFF,f0 /* restore FPSCR */ - - lfd f14, 96(ARG0) /* Floating point context - 8 byte aligned */ - lfd f15, 104(ARG0) - lfd f16, 112(ARG0) - lfd f17, 120(ARG0) - lfd f18, 128(ARG0) - lfd f19, 136(ARG0) - lfd f20, 144(ARG0) - lfd f21, 152(ARG0) - lfd f22, 160(ARG0) - lfd f23, 168(ARG0) - lfd f24, 176(ARG0) - lfd f25, 184(ARG0) - lfd f26, 192(ARG0) - lfd f27, 200(ARG0) - lfd f28, 208(ARG0) - lfd f29, 216(ARG0) - lfd f30, 224(ARG0) - lfd f31, 232(ARG0) - -#endif /* FLOATING_POINT_SUPPORT */ - - - lwz r1, 0(ARG0) /* finally, restore the stack pointer */ - - mr. ARG0, ARG1 /* set the return value */ - bnelr /* return if non-zero */ - - li ARG0, 1 - blr /* never return 0, return 1 instead */ - diff --git a/libsa/sort.c b/libsa/sort.c deleted file mode 100644 index 4c08ad409..000000000 --- a/libsa/sort.c +++ /dev/null @@ -1,211 +0,0 @@ -/* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ - * - * This file contains Original Code and/or Modifications of Original Code - * as defined in and that are subject to the Apple Public Source License - * Version 2.0 (the 'License'). You may not use this file except in - * compliance with the License. The rights granted to you under the License - * may not be used to create, or enable the creation or redistribution of, - * unlawful or unlicensed copies of an Apple operating system, or to - * circumvent, violate, or enable the circumvention or violation of, any - * terms of an Apple operating system software license agreement. - * - * Please obtain a copy of the License at - * http://www.opensource.apple.com/apsl/ and read it before using this file. - * - * The Original Code and all software distributed under the License are - * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER - * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, - * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. - * Please see the License for the specific language governing rights and - * limitations under the License. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ - */ -/*- - * Copyright (c) 1991, 1993 - * The Regents of the University of California. All rights reserved. - * - * This code is derived from software contributed to Berkeley by - * Ronnie Kon at Mindcraft Inc., Kevin Lew and Elmer Yglesias. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. All advertising materials mentioning features or use of this software - * must display the following acknowledgement: - * This product includes software developed by the University of - * California, Berkeley and its contributors. - * 4. Neither the name of the University nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - */ - -#if defined(LIBC_SCCS) && !defined(lint) -static char sccsid[] = "@(#)heapsort.c 8.1 (Berkeley) 6/4/93"; -#endif /* LIBC_SCCS and not lint */ - - -#include - - -/* - * Swap two areas of size number of bytes. Although qsort(3) permits random - * blocks of memory to be sorted, sorting pointers is almost certainly the - * common case (and, were it not, could easily be made so). Regardless, it - * isn't worth optimizing; the SWAP's get sped up by the cache, and pointer - * arithmetic gets lost in the time required for comparison function calls. - */ -#define SWAP(a, b, count, size, tmp) { \ - count = size; \ - do { \ - tmp = *a; \ - *a++ = *b; \ - *b++ = tmp; \ - } while (--count); \ -} - -/* Copy one block of size size to another. */ -#define COPY(a, b, count, size, tmp1, tmp2) { \ - count = size; \ - tmp1 = a; \ - tmp2 = b; \ - do { \ - *tmp1++ = *tmp2++; \ - } while (--count); \ -} - -/* - * Build the list into a heap, where a heap is defined such that for - * the records K1 ... KN, Kj/2 >= Kj for 1 <= j/2 <= j <= N. - * - * There two cases. If j == nmemb, select largest of Ki and Kj. If - * j < nmemb, select largest of Ki, Kj and Kj+1. - */ -#define CREATE(initval, nmemb, par_i, child_i, par, child, size, count, tmp) { \ - for (par_i = initval; (child_i = par_i * 2) <= nmemb; \ - par_i = child_i) { \ - child = base + child_i * size; \ - if (child_i < nmemb && compar(child, child + size) < 0) { \ - child += size; \ - ++child_i; \ - } \ - par = base + par_i * size; \ - if (compar(child, par) <= 0) \ - break; \ - SWAP(par, child, count, size, tmp); \ - } \ -} - -/* - * Select the top of the heap and 'heapify'. Since by far the most expensive - * action is the call to the compar function, a considerable optimization - * in the average case can be achieved due to the fact that k, the displaced - * elememt, is ususally quite small, so it would be preferable to first - * heapify, always maintaining the invariant that the larger child is copied - * over its parent's record. - * - * Then, starting from the *bottom* of the heap, finding k's correct place, - * again maintianing the invariant. As a result of the invariant no element - * is 'lost' when k is assigned its correct place in the heap. - * - * The time savings from this optimization are on the order of 15-20% for the - * average case. See Knuth, Vol. 3, page 158, problem 18. - * - * XXX Don't break the #define SELECT line, below. Reiser cpp gets upset. - */ -#define SELECT(par_i, child_i, nmemb, par, child, size, k, count, tmp1, tmp2) { \ - for (par_i = 1; (child_i = par_i * 2) <= nmemb; par_i = child_i) { \ - child = base + child_i * size; \ - if (child_i < nmemb && compar(child, child + size) < 0) { \ - child += size; \ - ++child_i; \ - } \ - par = base + par_i * size; \ - COPY(par, child, count, size, tmp1, tmp2); \ - } \ - for (;;) { \ - child_i = par_i; \ - par_i = child_i / 2; \ - child = base + child_i * size; \ - par = base + par_i * size; \ - if (child_i == 1 || compar(k, par) < 0) { \ - COPY(child, k, count, size, tmp1, tmp2); \ - break; \ - } \ - COPY(child, par, count, size, tmp1, tmp2); \ - } \ -} - -/* Pass heapsort off as qsort for krld. -- Nik Gervae - * - * Heapsort -- Knuth, Vol. 3, page 145. Runs in O (N lg N), both average - * and worst. While heapsort is faster than the worst case of quicksort, - * the BSD quicksort does median selection so that the chance of finding - * a data set that will trigger the worst case is nonexistent. Heapsort's - * only advantage over quicksort is that it requires little additional memory. - */ -__private_extern__ -void qsort(void * vbase, size_t nmemb, size_t size, - int (*compar)(const void *, const void *)) { - - register unsigned int cnt, i, j, l; - register char tmp, *tmp1, *tmp2; - char *base, *k, *p, *t; - - if (nmemb <= 1) { - return; - } - - if (!size) { - return; - } - - if ((k = (char *)malloc(size)) == NULL) { -// panic(); - return; - } - - /* - * Items are numbered from 1 to nmemb, so offset from size bytes - * below the starting address. - */ - base = (char *)vbase - size; - - for (l = nmemb / 2 + 1; --l;) - CREATE(l, nmemb, i, j, t, p, size, cnt, tmp); - - /* - * For each element of the heap, save the largest element into its - * final slot, save the displaced element (k), then recreate the - * heap. - */ - while (nmemb > 1) { - COPY(k, base + nmemb * size, cnt, size, tmp1, tmp2); - COPY(base + nmemb * size, base + size, cnt, size, tmp1, tmp2); - --nmemb; - SELECT(i, j, nmemb, t, p, size, k, cnt, tmp1, tmp2); - } - free(k); - return; -} diff --git a/libsa/strrchr.c b/libsa/strrchr.c deleted file mode 100644 index d41768813..000000000 --- a/libsa/strrchr.c +++ /dev/null @@ -1,83 +0,0 @@ -/* - * Copyright (c) 2001 Apple Computer, Inc. All rights reserved. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ - * - * This file contains Original Code and/or Modifications of Original Code - * as defined in and that are subject to the Apple Public Source License - * Version 2.0 (the 'License'). You may not use this file except in - * compliance with the License. The rights granted to you under the License - * may not be used to create, or enable the creation or redistribution of, - * unlawful or unlicensed copies of an Apple operating system, or to - * circumvent, violate, or enable the circumvention or violation of, any - * terms of an Apple operating system software license agreement. - * - * Please obtain a copy of the License at - * http://www.opensource.apple.com/apsl/ and read it before using this file. - * - * The Original Code and all software distributed under the License are - * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER - * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, - * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. - * Please see the License for the specific language governing rights and - * limitations under the License. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ - */ -/* - * Copyright (c) 1988, 1993 - * The Regents of the University of California. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. All advertising materials mentioning features or use of this software - * must display the following acknowledgement: - * This product includes software developed by the University of - * California, Berkeley and its contributors. - * 4. Neither the name of the University nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - */ -/* - * History: - * 2001-05-30 gvdl Duplicated from bsd/libkern/rindex.c - */ - -#if defined(LIBC_SCCS) && !defined(lint) -/* static char sccsid[] = "@(#)rindex.c 8.1 (Berkeley) 6/4/93"; */ -#endif /* LIBC_SCCS and not lint */ - -#include - -__private_extern__ char * strrchr(const char *cp, int ch) -{ - char *save; - char c; - - for (save = (char *) 0; (c = *cp); cp++) { - if (c == ch) - save = (char *) cp; - } - - return save; -} - diff --git a/libsa/strstr.c b/libsa/strstr.c deleted file mode 100644 index f6ca51ecf..000000000 --- a/libsa/strstr.c +++ /dev/null @@ -1,90 +0,0 @@ -/* - * Copyright (c) 2001 Apple Computer, Inc. All rights reserved. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ - * - * This file contains Original Code and/or Modifications of Original Code - * as defined in and that are subject to the Apple Public Source License - * Version 2.0 (the 'License'). You may not use this file except in - * compliance with the License. The rights granted to you under the License - * may not be used to create, or enable the creation or redistribution of, - * unlawful or unlicensed copies of an Apple operating system, or to - * circumvent, violate, or enable the circumvention or violation of, any - * terms of an Apple operating system software license agreement. - * - * Please obtain a copy of the License at - * http://www.opensource.apple.com/apsl/ and read it before using this file. - * - * The Original Code and all software distributed under the License are - * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER - * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, - * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. - * Please see the License for the specific language governing rights and - * limitations under the License. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ - */ -/* - * Copyright (c) 1988, 1993 - * The Regents of the University of California. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. All advertising materials mentioning features or use of this software - * must display the following acknowledgement: - * This product includes software developed by the University of - * California, Berkeley and its contributors. - * 4. Neither the name of the University nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - */ -/* - * History: - * 2002-01-24 gvdl Initial implementation of strstr - */ - -#include -#include - -__private_extern__ char * -strstr(const char *in, const char *str) -{ - char c; - size_t len; - - c = *str++; - if (!c) - return (char *) in; // Trivial empty string case - - len = strlen(str); - do { - char sc; - - do { - sc = *in++; - if (!sc) - return (char *) 0; - } while (sc != c); - } while (strncmp(in, str, len) != 0); - - return (char *) (in - 1); -} diff --git a/libsyscall/BSDmakefile b/libsyscall/BSDmakefile index 8a6ff2a3b..57eda28ba 100644 --- a/libsyscall/BSDmakefile +++ b/libsyscall/BSDmakefile @@ -15,6 +15,7 @@ ARCH != arch RC_ARCHS = $(ARCH) RC_$(RC_ARCHS) = 1 .endif +SDKROOT ?= / NARCHS != echo $(RC_ARCHS) | wc -w LIBSYS = $(SDKROOT)/usr/local/lib/system NJOBS != perl -e '$$n = `/usr/sbin/sysctl -n hw.ncpu`; printf "%d\n", $$n < 2 ? 2 : ($$n * 1.5)' @@ -61,10 +62,11 @@ LIPOARGS$(F) != perl -e 'printf "%s\n", join(" ", map(qq(-arch $$_ \"$(OBJROOT)/ build-$(F): build-$(A)-$(F) .endfor # RC_ARCHS build-$(F): + mkdir -p $(SYMROOT) .if $(NARCHS) == 1 cp -p "$(OBJROOT)/obj.$(RC_ARCHS)/libsyscall$(SUFFIX$(F)).a" "$(SYMROOT)" .else - lipo -create $(LIPOARGS$(F)) -output $(SYMROOT)/libsyscall$(SUFFIX$(F)).a + xcrun -sdk $(SDKROOT) lipo -create $(LIPOARGS$(F)) -output $(SYMROOT)/libsyscall$(SUFFIX$(F)).a .endif .for A in $(RC_ARCHS) @@ -72,19 +74,20 @@ build-$(A)-$(F): mkdir -p $(OBJROOT)/obj.$(A) && \ MAKEOBJDIR="$(OBJROOT)/obj.$(A)" MACHINE_ARCH="$(A)" \ DSTROOT='$(DSTROOT)' OBJROOT='$(OBJROOT)' SYMROOT='$(SYMROOT)' \ - MAKEFLAGS="" CFLAGS="-arch $(A) $(LOCAL_CFLAGS)" $(BSDMAKEJ) libsyscall$(SUFFIX$(F)).a + MAKEFLAGS="" MIGDEFINES="" CFLAGS="-arch $(A) $(LOCAL_CFLAGS)" $(BSDMAKEJ) libsyscall$(SUFFIX$(F)).a .endfor # RC_ARCHS .endfor # FORMS installhdrs: MAKEOBJDIR="$(OBJROOT)" DESTDIR="$(DSTROOT)" MAKEFLAGS="" \ DSTROOT='$(DSTROOT)' OBJROOT='$(OBJROOT)' SYMROOT='$(SYMROOT)' \ + MIGDEFINES="-DLIBSYSCALL_INTERFACE=1" \ $(BSDMAKE) installhdrs .for A in $(RC_ARCHS) mkdir -p "$(OBJROOT)/obj.$(A)" && \ MAKEOBJDIR="$(OBJROOT)/obj.$(A)" MACHINE_ARCH="$(A)" \ DSTROOT='$(DSTROOT)' OBJROOT='$(OBJROOT)' SYMROOT='$(SYMROOT)' \ - MAKEFLAGS="" $(BSDMAKE) installhdrs-md + MAKEFLAGS="" MIGDEFINES="" $(BSDMAKE) installhdrs-md .endfor # RC_ARCHS .for F in $(FORMS) @@ -92,9 +95,10 @@ BI-install-$(F): build-$(F) mkdir -p $(DSTROOT)/usr/local/lib/system if [ -f "$(SYMROOT)/libsyscall$(SUFFIX$(F)).a" ]; then \ echo "Installing libsyscall$(SUFFIX$(F)).a" && \ - install -c -m 444 "$(SYMROOT)/libsyscall$(SUFFIX$(F)).a" \ + install -c -m 644 "$(SYMROOT)/libsyscall$(SUFFIX$(F)).a" \ $(DSTROOT)/usr/local/lib/system && \ ranlib "$(DSTROOT)/usr/local/lib/system/libsyscall$(SUFFIX$(F)).a"; \ + chmod 444 "$(DSTROOT)/usr/local/lib/system/libsyscall$(SUFFIX$(F)).a"; \ fi .endfor # FORMS diff --git a/libsyscall/Makefile b/libsyscall/Makefile index a40b4fb5e..33f3e99ff 100644 --- a/libsyscall/Makefile +++ b/libsyscall/Makefile @@ -13,26 +13,32 @@ LIB=syscall SHLIB_MAJOR= 1 SHLIB_MINOR= 0 .if (${MACHINE_ARCH} == unknown) +.ifdef RC_ARCHS +MACHINE_ARCH != echo $(RC_ARCHS) | cut -f 1 -d " " +.else MACHINE_ARCH != /usr/bin/arch -.endif +.endif +.endif .if !empty $(MACHINE_ARCH:M*64) LP64 = 1 .endif -CC = gcc +SDKROOT ?= / +CC = xcrun -sdk $(SDKROOT) gcc +MIG = xcrun -sdk $(SDKROOT) mig +MIGCC != xcrun -find -sdk $(SDKROOT) gcc .ifdef ALTFRAMEWORKSPATH PRIVINC = -F${ALTFRAMEWORKSPATH} -I${ALTFRAMEWORKSPATH}/System.framework/PrivateHeaders .else PRIVINC = -I${SDKROOT}/System/Library/Frameworks/System.framework/PrivateHeaders .endif CFLAGS += ${PRIVINC} -.if empty $(MACHINE_ARCH:Marm*) -CFLAGS += -force_cpusubtype_ALL -AINC= -force_cpusubtype_ALL -.endif CFLAGS += -no-cpp-precomp CFLAGS += -fno-common -pipe -Wmost -g -AINC+= -no-cpp-precomp +CFLAGS += -DCF_EXCLUDE_CSTD_HEADERS -DCF_OPEN_SOURCE +CFLAGS += -isysroot ${SDKROOT} +AINC= -no-cpp-precomp AINC+= -arch ${MACHINE_ARCH} -g +MIGDEFINES ?= CLEANFILES+=tags INSTALL_PIC_ARCHIVE= yes PRECIOUSLIB= yes diff --git a/libsyscall/Makefile.xbs b/libsyscall/Makefile.xbs index 8f6973e6c..556597fef 100644 --- a/libsyscall/Makefile.xbs +++ b/libsyscall/Makefile.xbs @@ -54,7 +54,7 @@ SOBJS+= ${OBJS:.o=.So} #### mig Rules ######################################################## .defs.h .defsUser.c .defsServer.c: - mig -arch ${MACHINE_ARCH} -cc ${CC} -user ${.PREFIX}User.c -server ${.PREFIX}Server.c -header ${.PREFIX}.h ${.IMPSRC} + $(MIG) ${PRIVINC} ${MIGDEFINES} -arch ${MACHINE_ARCH} -cc ${MIGCC} -user ${.PREFIX}User.c -server ${.PREFIX}Server.c -header ${.PREFIX}.h ${.IMPSRC} gen_mig_defs: ${SRVMIGHDRS} ${MIGHDRS} gen_md_mig_defs: ${MD_MIGHDRS} diff --git a/libsyscall/create-syscalls.pl b/libsyscall/create-syscalls.pl index 83bf17c1f..285a170a0 100755 --- a/libsyscall/create-syscalls.pl +++ b/libsyscall/create-syscalls.pl @@ -70,6 +70,7 @@ my $StubFile = 'libsyscall.list'; # size in bytes of known types (only used for i386) my %TypeBytes = ( + 'au_asid_t' => 4, 'caddr_t' => 4, 'gid_t' => 4, 'id_t' => 4, @@ -79,6 +80,7 @@ 'int64_t' => 8, 'key_t' => 4, 'long' => 4, + 'mach_port_name_t' => 4, 'mode_t' => 4, 'off_t' => 8, 'pid_t' => 4, @@ -87,7 +89,6 @@ 'size_t' => 4, 'socklen_t' => 4, 'ssize_t' => 4, - 'time_t' => 4, 'u_int' => 4, 'u_long' => 4, 'uid_t' => 4, diff --git a/libsyscall/custom/SYS.h b/libsyscall/custom/SYS.h index af9074020..a4eb976a2 100644 --- a/libsyscall/custom/SYS.h +++ b/libsyscall/custom/SYS.h @@ -59,26 +59,13 @@ #include -/* From rhapsody kernel mach/ppc/syscall_sw.h */ -#define kernel_trap_args_0 -#define kernel_trap_args_1 -#define kernel_trap_args_2 -#define kernel_trap_args_3 -#define kernel_trap_args_4 -#define kernel_trap_args_5 -#define kernel_trap_args_6 -#define kernel_trap_args_7 -#define kernel_trap_args_8 -/* End of rhapsody kernel mach/ppc/syscall_sw.h */ - /* * Macros. */ #define SYSCALL(name, nargs) \ .globl cerror @\ - MI_ENTRY_POINT(_##name) @\ - kernel_trap_args_##nargs @\ + MI_ENTRY_POINT(_##name) @\ li r0,SYS_##name @\ sc @\ b 1f @\ @@ -88,7 +75,6 @@ #define SYSCALL_NONAME(name, nargs) \ .globl cerror @\ - kernel_trap_args_##nargs @\ li r0,SYS_##name @\ sc @\ b 1f @\ diff --git a/libsyscall/custom/__psynch_cvbroad.s b/libsyscall/custom/__psynch_cvbroad.s new file mode 100644 index 000000000..86d9d8024 --- /dev/null +++ b/libsyscall/custom/__psynch_cvbroad.s @@ -0,0 +1,40 @@ +/* + * Copyright (c) 1999-2007 Apple Inc. All rights reserved. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. The rights granted to you under the License + * may not be used to create, or enable the creation or redistribution of, + * unlawful or unlicensed copies of an Apple operating system, or to + * circumvent, violate, or enable the circumvention or violation of, any + * terms of an Apple operating system software license agreement. + * + * Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ + */ +/* Copyright (c) 1992 NeXT Computer, Inc. All rights reserved. */ + +#include "SYS.h" + +#define __SYSCALL_32BIT_ARG_BYTES 36 + +#if defined(__i386__) || defined(__x86_64__) || defined(__ppc__) + +__SYSCALL(__psynch_cvbroad, psynch_cvbroad, 8) + +#else +#error Unsupported architecture +#endif diff --git a/osfmk/libsa/machine/stdarg_apple.h b/libsyscall/custom/__psynch_cvwait.s similarity index 79% rename from osfmk/libsa/machine/stdarg_apple.h rename to libsyscall/custom/__psynch_cvwait.s index c48e1cbdb..f29bceab4 100644 --- a/osfmk/libsa/machine/stdarg_apple.h +++ b/libsyscall/custom/__psynch_cvwait.s @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. + * Copyright (c) 1999-2007 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -25,17 +25,16 @@ * * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ */ -#ifndef _MACH_MACHINE_STDARG_APPLE_H -#define _MACH_MACHINE_STDARG_APPLE_H +/* Copyright (c) 1992 NeXT Computer, Inc. All rights reserved. */ +#include "SYS.h" -#if defined (__ppc__) -#include "ppc/stdarg_apple.h" -#elif defined (__i386__) -#include "i386/stdarg_apple.h" -#else -#error architecture not supported -#endif +#define __SYSCALL_32BIT_ARG_BYTES 40 + +#if defined(__i386__) || defined(__x86_64__) || defined(__ppc__) +__SYSCALL(__psynch_cvwait, psynch_cvwait, 8) -#endif /* _MACH_MACHINE_STDARG_APPLE_H */ +#else +#error Unsupported architecture +#endif diff --git a/EXTERNAL_HEADERS/i386/_limits.h b/libsyscall/custom/__thread_selfid.s similarity index 84% rename from EXTERNAL_HEADERS/i386/_limits.h rename to libsyscall/custom/__thread_selfid.s index 5eec8cd1f..5e70787cf 100644 --- a/EXTERNAL_HEADERS/i386/_limits.h +++ b/libsyscall/custom/__thread_selfid.s @@ -1,5 +1,5 @@ /* - * Copyright (c) 2004 Apple Computer, Inc. All rights reserved. + * Copyright (c) 1999-2007 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -25,9 +25,15 @@ * * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ */ -#ifndef _I386__LIMITS_H_ -#define _I386__LIMITS_H_ -#define __DARWIN_CLK_TCK 100 /* ticks per second */ +#include "SYS.h" -#endif /* _I386__LIMITS_H_ */ +#if defined(__x86_64__) + +__SYSCALL(__thread_selfid, thread_selfid, 1) + +#elif defined(__i386__) + +__SYSCALL_INT(__thread_selfid, thread_selfid, 1) + +#endif diff --git a/libsyscall/custom/__vfork.s b/libsyscall/custom/__vfork.s index dc201e852..073b90840 100644 --- a/libsyscall/custom/__vfork.s +++ b/libsyscall/custom/__vfork.s @@ -158,6 +158,7 @@ LEAF(___vfork, 0) movq $ SYSCALL_CONSTRUCT_UNIX(SYS_vfork), %rax // code for vfork -> rax UNIX_SYSCALL_TRAP // do the system call jnb L1 // jump if CF==0 + pushq %rdi // put return address back on stack for cerror movq __current_pid@GOTPCREL(%rip), %rcx lock addq $1, (%rcx) diff --git a/libsyscall/mach/err_iokit.sub b/libsyscall/mach/err_iokit.sub index 2230ecc57..02e657aa8 100755 --- a/libsyscall/mach/err_iokit.sub +++ b/libsyscall/mach/err_iokit.sub @@ -27,9 +27,12 @@ * error codes for Mach and Unix kernels */ -#include +#include +#include +#if !TARGET_OS_EMBEDDED #include #include +#endif static struct error_sparse_map err_codes_iokit_common_map[] = { err_code_map_entry(kIOReturnInvalid, kIOReturnInvalid ), @@ -94,6 +97,7 @@ static const char * err_codes_iokit_common[] = { "(iokit/common) data was not found", // 0x2f0 }; +#if !TARGET_OS_EMBEDDED static struct error_sparse_map err_codes_iokit_usb_map[] = { err_code_map_entry(kIOUSBCRCErr, kIOUSBDataToggleErr), err_code_map_entry(kIOUSBPIDCheckErr, kIOUSBWrongPIDErr), @@ -199,6 +203,7 @@ static const char * err_codes_iokit_bluetooth[] = { "(iokit/bluetooth) no HCI controller", // 003 "(iokit/bluetooth) changing power states is unsupported", // 004 }; +#endif /* !TARGET_OS_EMBEDDED */ static const struct error_sparse_map err_iokit_sub_map[] = { err_sub_map_entry(sub_iokit_common, sub_iokit_pmu), @@ -215,6 +220,7 @@ static struct error_subsystem err_iokit_sub[] = err_codes_iokit_common_map, errlib_count(err_codes_iokit_common_map), }, +#if !TARGET_OS_EMBEDDED /* 1 */ { "(iokit/usb)", // 0xe0004000 errlib_count(err_codes_iokit_usb), @@ -229,17 +235,20 @@ static struct error_subsystem err_iokit_sub[] = err_codes_iokit_fw_map, errlib_count(err_codes_iokit_fw_map), }, +#endif /* !TARGET_OS_EMBEDDED */ /* 3 */ err_iokit_null_sub, // 0xe000c000 /* 4 */ { "(iokit/blkstorage)", 0 }, // 0xe0010000 /* 5 */ { "(iokit/graphics)", 0 }, // 0xe0014000 /* 6 */ err_iokit_null_sub, // 0xe0018000 /* 7 */ err_iokit_null_sub, // 0xe001c000 +#if !TARGET_OS_EMBEDDED /* 8 */ { "(iokit/bluetooth)", // 0xe0020000 errlib_count(err_codes_iokit_bluetooth), err_codes_iokit_bluetooth, NULL, 0, }, +#endif /* !TARGET_OS_EMBEDDED */ /* 9 */ { "(iokit/pmu)", 0 }, // 0xe0024000 /* -2 */ { "(iokit/vendor)", 0 }, // 0xe0028000 /* -1 */ { "(iokit/reserved)", 0 }, // 0xe002c000 diff --git a/libsyscall/mach/err_libkern.sub b/libsyscall/mach/err_libkern.sub new file mode 100644 index 000000000..a9a9c27c2 --- /dev/null +++ b/libsyscall/mach/err_libkern.sub @@ -0,0 +1,116 @@ +/* + * Copyright (c) 2008 Apple Inc. All rights reserved. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ + * + * The contents of this file constitute Original Code as defined in and + * are subject to the Apple Public Source License Version 1.1 (the + * "License"). You may not use this file except in compliance with the + * License. Please obtain a copy of the License at + * http://www.apple.com/publicsource and read it before using this file. + * + * This Original Code and all software distributed under the License are + * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the + * License for the specific language governing rights and limitations + * under the License. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ + */ + +/* + * File: err_libkern.sub + * Author: Nik Gervae, Apple Inc. + * Date: October 2008 + * + * error codes for Libkern/C++ + */ + +#include +#include + + +/* These codes are specified in decimal in OSReturn.h. + */ +static const char * err_codes_libkern_common[] = { + NO_SUCH_ERROR, + "(libkern/common) general/unspecified error", /* 1 */ +}; + +/* These codes are specified in decimal in OSReturn.h. + */ +static const char * err_codes_libkern_metaclass[] = { + NO_SUCH_ERROR, + "(libkern/metaclass) runtime internal error", /* 1 */ + "(libkern/metaclass) class has instances", /* 2 */ + "(libkern/metaclass) OSMetaClass::preModLoad() not called, runtime internal error", /* 3 */ + "(libkern/metaclass) allocation failure, internal data", /* 4 */ + "(libkern/metaclass) allocation failure, class tracking dictionaries", /* 5 */ + "(libkern/metaclass) allocation failure, no kext/class set", /* 6 */ + "(libkern/metaclass) failed to insert class into class dictionary", /* 7 */ + "(libkern/metaclass) can't associate class with its superclass", /* 8 */ + "(libkern/metaclass) can't find superclass during instance creation", /* 9 */ + "(libkern/metaclass) duplicate class name encountered", /* 10 */ + "(libkern/metaclass) no kext for metaclass", /* 11 */ +}; + +/* These codes are specified in hexadecimal in OSKextLib.h. + */ +static const char * err_codes_libkern_kext[] = { + NO_SUCH_ERROR, + "(libkern/kext) internal error", /* 0x1 */ + "(libkern/kext) allocation failure", /* 0x2 */ + "(libkern/kext) resource shortage", /* 0x3 */ + "(libkern/kext) not privileged", /* 0x4 */ + "(libkern/kext) invalid argument", /* 0x5 */ + "(libkern/kext) not found", /* 0x6 */ + "(libkern/kext) bad data (mkext/other)", /* 0x7 */ + "(libkern/kext) XML (un)serialization error", /* 0x8 */ + "(libkern/kext) function/version unsupported", /* 0x9 */ + "(libkern/kext) function disabled", /* 0xa */ + + "(libkern/kext) malformed kext (bundle layout/missing plist)", /* 0xb */ + "(libkern/kext) validation failure (plist/executable)", /* 0xc */ + "(libkern/kext) authentication failure (file ownership/permissions)", /* 0xd */ + "(libkern/kext) dependency resolution failure", /* 0xe */ + "(libkern/kext) requested architecture/executable not found", /* 0xf */ + "(libkern/kext) cache error", /* 0x10 */ + + "(libkern/kext) operation deferred (queued to user space)", /* 0x11 */ + "(libkern/kext) operation/kext not allowed at current boot level", /* 0x12 */ + "(libkern/kext) not loadable (reason unspecified)", /* 0x13 */ + "(libkern/kext) different version/uuid already loaded", /* 0x14 */ + + "(libkern/kext) dependency load failed", /* 0x15 */ + "(libkern/kext) link error", /* 0x16 */ + "(libkern/kext) kext (kmod) start/stop routine failed", /* 0x17 */ + "(libkern/kext) kext is in use or retained (cannot unload)", /* 0x18 */ + "(libkern/kext) kext request timed out", /* 0x19 */ + "(libkern/kext) kext is stopping and cannot issue requests", /* 0x1a */ +}; + +/* libkern is err_system(0x37) */ +static const struct error_subsystem err_libkern_sub[] = { + /* subsystem 0 */ + { + "(libkern/common)", + errlib_count(err_codes_libkern_common), + err_codes_libkern_common, + }, + + /* subsystem 1 */ + { + "(libkern/metaclass)", + errlib_count(err_codes_libkern_metaclass), + err_codes_libkern_metaclass, + }, + + /* subsystem 2 */ + { + "(libkern/kext)", + errlib_count(err_codes_libkern_kext), + err_codes_libkern_kext, + }, +}; diff --git a/libsyscall/mach/error_codes.c b/libsyscall/mach/error_codes.c index 4fdf8306c..c87e18b8b 100644 --- a/libsyscall/mach/error_codes.c +++ b/libsyscall/mach/error_codes.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2003 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2003 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -64,6 +64,7 @@ #include #include "errorlib.h" +#include "err_libkern.sub" #include "err_iokit.sub" #include "err_ipc.sub" #include "err_kern.sub" @@ -131,7 +132,14 @@ __private_extern__ struct error_system _mach_errors[err_max_system+1] = { /* 0x30 */ errorlib_system_null, /* 0x31 */ errorlib_system_null, /* 0x32 */ errorlib_system_null, /* 0x33 */ errorlib_system_null, /* 0x34 */ errorlib_system_null, /* 0x35 */ errorlib_system_null, - /* 0x36 */ errorlib_system_null, /* 0x37 */ errorlib_system_null, + /* 0x36 */ errorlib_system_null, + + /* 0x37; err_libkern */ + { + errlib_count(err_libkern_sub), + "(libkern/?) unknown subsystem error", + err_libkern_sub, + }, /* 0x38; err_iokit */ { diff --git a/libsyscall/mach/headers/mach_error.h b/libsyscall/mach/headers/mach_error.h index cb1acb01e..5840bd575 100644 --- a/libsyscall/mach/headers/mach_error.h +++ b/libsyscall/mach/headers/mach_error.h @@ -78,7 +78,7 @@ void mach_error( /* * Prints an appropriate message on the standard error stream */ - char *str, + const char *str, mach_error_t error_value ); diff --git a/libsyscall/mach/headers/mach_init.h b/libsyscall/mach/headers/mach_init.h index 15a3830d1..36a47fac1 100644 --- a/libsyscall/mach/headers/mach_init.h +++ b/libsyscall/mach/headers/mach_init.h @@ -113,6 +113,12 @@ extern int vm_page_shift; #define trunc_page(x) ((x) & (~(vm_page_size - 1))) #define round_page(x) trunc_page((x) + (vm_page_size - 1)) +/* + * Page-size rounding macros for the fixed-width VM types. + */ +#define mach_vm_trunc_page(x) ((mach_vm_offset_t)(x) & ~((signed)PAGE_MASK)) +#define mach_vm_round_page(x) (((mach_vm_offset_t)(x) + PAGE_MASK) & ~((signed)PAGE_MASK)) + /* * fprintf_stderr uses vprintf_stderr_func to produce * error messages, this can be overridden by a user diff --git a/libsyscall/mach/mach_error.c b/libsyscall/mach/mach_error.c index f3722a573..b87c0adbf 100644 --- a/libsyscall/mach/mach_error.c +++ b/libsyscall/mach/mach_error.c @@ -68,9 +68,9 @@ int fprintf_stderr(const char *format, ...); void -mach_error( str, err ) - char *str; - mach_error_t err; +mach_error( str, err ) + const char *str; + mach_error_t err; { char * err_str; char buf[1024]; diff --git a/libsyscall/mach/mach_init_libSystem.c b/libsyscall/mach/mach_init_libSystem.c index 898f82bf2..86ca46aca 100644 --- a/libsyscall/mach/mach_init_libSystem.c +++ b/libsyscall/mach/mach_init_libSystem.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2007 Apple Inc. All rights reserved. + * Copyright (c) 2007, 2008 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -32,6 +32,7 @@ extern void pthread_init(void); // from libc.a extern void __libc_init(const struct ProgramVars* vars); // from libc.a extern void __keymgr_initializer(void); // from libkeymgr.a extern void _dyld_initializer(void); // from libdyld.a +extern void libdispatch_init(void); // from libdispatch.a /* * libsyscall_initializer() initializes all of libSystem.dylib @@ -44,6 +45,7 @@ void libSystem_initializer(int argc, const char* argv[], const char* envp[], con __libc_init(vars); __keymgr_initializer(); _dyld_initializer(); + libdispatch_init(); } /* diff --git a/libsyscall/mach/vm_map.defs b/libsyscall/mach/vm_map.defs index d0562dbed..c9aefb3c6 100644 --- a/libsyscall/mach/vm_map.defs +++ b/libsyscall/mach/vm_map.defs @@ -25,13 +25,20 @@ * * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ */ -#if defined(__LP64__) +#if !LIBSYSCALL_INTERFACE && (defined(__LP64__) || defined(__arm__)) /* * In an LP64 environment, the traditional Mach VM interface names are * really just a second instance of the "wide" Mach VM interfaces. * + * For ARM, which doesn't support two address space sizes, use the "wide" + * interfaces as well, to reduce the amount of duplicate code compiled + * into the kernel. + * * The _MACH_VM_PUBLISH_AS_LOCAL_ flag triggers mach_vm.defs to export * the local names instead. + * + * LIBSYSCALL_INTERFACE indicates that we are building the + * machine-independent headers for Libsyscall */ #define _MACH_VM_PUBLISH_AS_LOCAL_ #include diff --git a/makedefs/MakeInc.cmd b/makedefs/MakeInc.cmd index 1fa58a067..06457b6f2 100644 --- a/makedefs/MakeInc.cmd +++ b/makedefs/MakeInc.cmd @@ -1,12 +1,47 @@ # # Commands for the build environment # -MIG = $(NEXT_ROOT)/usr/bin/mig +## +# Verbosity +## +ifeq ($(RC_XBS),YES) +VERBOSE = YES +else +VERBOSE = NO +endif +ifeq ($(VERBOSE),YES) +_v = +_vstdout = +else +_v = @ +_vstdout = > /dev/null +endif -MD= /usr/bin/md +ifeq ($(VERBOSE),YES) + XCRUN = /usr/bin/xcrun -verbose -log +else + XCRUN = /usr/bin/xcrun +endif + +SDKROOT ?= / + +CC := $(XCRUN) -sdk $(SDKROOT) cc +CXX := $(XCRUN) -sdk $(SDKROOT) g++ +MIG := $(XCRUN) -sdk $(SDKROOT) mig +ifeq ($(MIGCC),) + export MIGCC := $(shell $(XCRUN) -sdk $(SDKROOT) -find cc) +endif +ifeq ($(RELPATH),) + export RELPATH := $(shell $(XCRUN) -sdk $(SDKROOT) -find relpath) +endif +SEG_HACK := $(XCRUN) -sdk $(SDKROOT) setsegname +KEXT_CREATE_SYMBOL_SET := $(XCRUN) -sdk $(SDKROOT) kextsymboltool + +MD = /usr/bin/md RM = /bin/rm -f CP = /bin/cp +MV = /bin/mv LN = /bin/ln -fs CAT = /bin/cat MKDIR = /bin/mkdir -p @@ -14,20 +49,21 @@ FIND = /usr/bin/find INSTALL = /usr/bin/install TAR = /usr/bin/gnutar -STRIP = /usr/bin/strip -LIPO = /usr/bin/lipo +STRIP = $(XCRUN) -sdk $(SDKROOT) strip +LIPO = $(XCRUN) -sdk $(SDKROOT) lipo +LIBTOOL = $(XCRUN) -sdk $(SDKROOT) libtool +NM = $(XCRUN) -sdk $(SDKROOT) nm BASENAME = /usr/bin/basename -export RELPATH = $(NEXT_ROOT)/usr/local/bin/relpath TR = /usr/bin/tr -SEG_HACK = $(NEXT_ROOT)/usr/local/bin/seg_hack -UNIFDEF = /usr/bin/unifdef +UNIFDEF = $(XCRUN) -sdk $(SDKROOT) unifdef DECOMMENT = /usr/local/bin/decomment +NEWVERS = $(SRCROOT)/config/newvers.pl -DSYMUTIL = /usr/bin/dsymutil -CTFCONVERT = /usr/local/bin/ctfconvert -CTFMERGE = /usr/local/bin/ctfmerge -CTFSCRUB = /usr/local/bin/ctfdump -r +DSYMUTIL = $(XCRUN) -sdk $(SDKROOT) dsymutil +CTFCONVERT = $(XCRUN) -sdk $(SDKROOT) ctfconvert +CTFMERGE = $(XCRUN) -sdk $(SDKROOT) ctfmerge +CTFSCRUB = $(XCRUN) -sdk $(SDKROOT) ctfdump -r # vim: set ft=make: diff --git a/makedefs/MakeInc.def b/makedefs/MakeInc.def index c4b6c21dc..0366b6215 100644 --- a/makedefs/MakeInc.def +++ b/makedefs/MakeInc.def @@ -16,6 +16,7 @@ export INCR_EXPORTHDRS = FALSE endif endif + # # Component List # @@ -36,7 +37,7 @@ endif # Architecture options # ifndef SUPPORTED_ARCH_CONFIGS -export SUPPORTED_ARCH_CONFIGS = PPC I386 ARM +export SUPPORTED_ARCH_CONFIGS = PPC I386 X86_64 ARM endif ifndef ARCH_CONFIGS @@ -60,8 +61,12 @@ export SUPPORTED_KERNEL_CONFIGS = RELEASE DEVELOPMENT DEBUG PROFILE endif ifndef DEFAULT_KERNEL_CONFIG +ifeq ($(RC_ProjectName),xnu_debug) +export DEFAULT_KERNEL_CONFIG = DEBUG +else export DEFAULT_KERNEL_CONFIG = RELEASE endif +endif # If KERNEL_CONFIGS is specified it should override KERNEL_CONFIG. # If KERNEL_CONFIG is specified it will override the default. Will quit with @@ -110,11 +115,12 @@ endif # default architecture configuration = system architecture where you are running make. # default machine configuration for ppc = none at this time. # default machine configuration for i386 = none at this time. +# default machine configuration for x86_64 = none at this time. # default machine configuration for arm = "S5L8900X". # ifndef TARGET_CONFIGS_UC ifdef TARGET_CONFIGS - export TARGET_CONFIGS_UC = $(strip $(shell printf "%s" "$(TARGET_CONFIGS)" | $(TR) a-z A-Z)) + export TARGET_CONFIGS_UC := $(strip $(shell printf "%s" "$(TARGET_CONFIGS)" | $(TR) a-z A-Z)) export MACHINE_CONFIG = $(word 3, $(TARGET_CONFIGS_UC)) export DEFAULT_KERNEL_CONFIG = $(word 1, $(TARGET_CONFIGS_UC)) else @@ -133,7 +139,7 @@ export KERNEL_CONFIG_LC := $(shell printf "%s" "$(KERNEL_CONFIG)" | $(TR) A-Z a- # # Kernel Configuration to install # -# supported install architecture : PPC I386 ARM +# supported install architecture : PPC I386 X86_64 ARM # export INSTALL_TYPE = $(DEFAULT_KERNEL_CONFIG) @@ -142,7 +148,7 @@ export INSTALL_ARCHS = $(strip $(foreach my_config, $(SUPPORTED_ARCH_CONFIGS), export INSTALL_ARCHS_LC := $(shell printf "%s" "$(INSTALL_ARCHS)" | $(TR) A-Z a-z) endif -export INSTALL_ARCH_DEFAULT = PPC +export INSTALL_ARCH_DEFAULT = $(firstword $(INSTALL_ARCHS)) # # Standard defines list @@ -162,23 +168,15 @@ KC++ := $(CXX) CWARNFLAGS_STD = \ -Wall -Wno-format-y2k -W -Wstrict-prototypes -Wmissing-prototypes \ -Wpointer-arith -Wreturn-type -Wcast-qual -Wwrite-strings -Wswitch \ - -Wshadow -Wcast-align -Wbad-function-cast -Wchar-subscripts -Winline \ + -Wshadow -Wcast-align -Wchar-subscripts -Winline \ -Wnested-externs -Wredundant-decls export CWARNFLAGS ?= $(CWARNFLAGS_STD) -MWARNFLAGS_STD = \ - -Wall -Wno-format-y2k -W -Wstrict-prototypes -Wmissing-prototypes \ - -Wpointer-arith -Wreturn-type -Wcast-qual -Wwrite-strings -Wswitch \ - -Wshadow -Wcast-align -Wbad-function-cast -Wchar-subscripts -Winline \ - -Wnested-externs -Wredundant-decls - -export MWARNFLAGS ?= $(MWARNFLAGS_STD) - CXXWARNFLAGS_STD = \ -Wall -Wno-format-y2k -W \ -Wpointer-arith -Wreturn-type -Wcast-qual -Wwrite-strings -Wswitch \ - -Wshadow -Wcast-align -Wchar-subscripts -Winline -Wredundant-decls + -Wcast-align -Wchar-subscripts -Wredundant-decls export CXXWARNFLAGS ?= $(CXXWARNFLAGS_STD) @@ -195,10 +193,12 @@ endif ARCH_FLAGS_PPC = -arch ppc ARCH_FLAGS_I386 = -arch i386 +ARCH_FLAGS_X86_64 = -arch x86_64 ARCH_FLAGS_ARM = $($(addsuffix $(MACHINE_CONFIG),ARCH_FLAGS_ARM_)) ARCH_FLAGS_ALL_PPC = $(ARCH_FLAGS_PPC) ARCH_FLAGS_ALL_I386 = $(ARCH_FLAGS_I386) +ARCH_FLAGS_ALL_X86_64 = $(ARCH_FLAGS_X86_64) ARCH_FLAGS_ALL_ARM = -arch arm @@ -216,8 +216,8 @@ export DSYMBUILDDIR = ./Contents/Resources/DWARF/ # probes from the kernel. # export CFLAGS_GEN = -static $(DEBUG_CFLAGS) -nostdinc -nostdlib \ - -fno-builtin -finline -msoft-float \ - -fsigned-bitfields $(OTHER_CFLAGS) + -fno-builtin -finline -fno-common -msoft-float \ + -fsigned-bitfields -fno-stack-protector $(OTHER_CFLAGS) ifeq ($(BUILD_STABS),1) export CFLAGS_GEN += -gstabs+ @@ -238,6 +238,8 @@ export CFLAGS_PPC = -Dppc -DPPC -D__PPC__ -DPAGE_SIZE_FIXED \ -mno-altivec -force_cpusubtype_ALL export CFLAGS_I386 = -Di386 -DI386 -D__I386__ \ -DPAGE_SIZE_FIXED -force_cpusubtype_ALL +export CFLAGS_X86_64 = -Dx86_64 -DX86_64 -D__X86_64__ -DLP64 \ + -DPAGE_SIZE_FIXED -mkernel export CFLAGS_ARM = -Darm -DARM -D__ARM__ -DPAGE_SIZE_FIXED \ -fno-strict-aliasing -fno-keep-inline-functions @@ -248,14 +250,13 @@ ifeq (-arch armv6,$(ARCH_FLAGS_ARM)) CFLAGS_ARM += -mthumb endif ifeq (-arch armv5,$(ARCH_FLAGS_ARM)) -#CFLAGS_ARM += -mthumb # +CFLAGS_ARM += -mthumb endif ifeq (-arch xscale,$(ARCH_FLAGS_ARM)) CFLAGS_ARM += -mthumb endif export CFLAGS_RELEASEPPC = -O2 -mcpu=750 -mmultiple -export CFLAGS_RELEASE_TRACEPPC = -O2 -mcpu=750 -mmultiple export CFLAGS_DEVELOPMENTPPC = -O2 -mcpu=750 -mmultiple export CFLAGS_DEBUGPPC = -O2 -mcpu=750 -mmultiple export CFLAGS_PROFILEPPC = -O2 -mcpu=750 -mmultiple @@ -265,6 +266,12 @@ export CFLAGS_DEVELOPMENTI386 = -Os export CFLAGS_DEBUGI386 = -Os export CFLAGS_PROFILEI386 = -Os +export CFLAGS_RELEASEX86_64 = -Os +export CFLAGS_DEVELOPMENTX86_64 = -Os +# No space optimization for the DEBUG kernel for the benefit of gdb: +export CFLAGS_DEBUGX86_64 = -O0 +export CFLAGS_PROFILEX86_64 = -Os + export CFLAGS_RELEASEARM = -O2 export CFLAGS_DEVELOPMENTARM = -O2 export CFLAGS_DEBUGARM = -O2 @@ -278,8 +285,6 @@ export CFLAGS = $(CFLAGS_GEN) \ $($(addsuffix $(ARCH_CONFIG), $(addsuffix $(KERNEL_CONFIG),CFLAGS_))) \ $(DEFINES) -export MIGCC = $(CC) - # Default C++ flags # CXXFLAGS_GEN = -fno-rtti -fno-exceptions -fcheck-new -fapple-kext @@ -307,6 +312,7 @@ export SFLAGS_PROFILE = export SFLAGS_PPC = $(CFLAGS_PPC) -force_cpusubtype_ALL export SFLAGS_I386 = $(CFLAGS_I386) export SFLAGS_ARM = $(CFLAGS_ARM) +export SFLAGS_X86_64 = $(CFLAGS_X86_64) export SFLAGS = $(SFLAGS_GEN) \ $($(addsuffix $(MACHINE_CONFIG),MACHINE_FLAGS_)) \ @@ -323,21 +329,6 @@ LD = $(KC++) -nostdlib # # Default LDFLAGS # -export LDFLAGS_COMPONENT_GEN = -static -r $(COMP_LDFLAGS_COMPONENT_GEN) - -export LDFLAGS_COMPONENT_RELEASE = $(COMP_LDFLAGS_COMPONENT_RELEASE) -export LDFLAGS_COMPONENT_DEVELOPMENT = $(COMP_LDFLAGS_COMPONENT_DEVELOPMENT) -export LDFLAGS_COMPONENT_DEBUG = $(COMP_LDFLAGS_COMPONENT_DEBUG) -export LDFLAGS_COMPONENT_PROFILE = $(COMP_LDFLAGS_COMPONENT_PROFILE) - -export LDFLAGS_COMPONENT_PPC = $(COMP_LDFLAGS_COMPONENT_PPC) -force_cpusubtype_ALL -export LDFLAGS_COMPONENT_I386 = $(COMP_LDFLAGS_COMPONENT_i386) -export LDFLAGS_COMPONENT_ARM = $(COMP_LDFLAGS_COMPONENT_ARM) -Wl,-new_linker - -export LDFLAGS_COMPONENT = $(LDFLAGS_COMPONENT_GEN) \ - $($(addsuffix $(ARCH_CONFIG),ARCH_FLAGS_)) \ - $($(addsuffix $(ARCH_CONFIG),LDFLAGS_COMPONENT_)) \ - $($(addsuffix $(KERNEL_CONFIG),LDFLAGS_COMPONENT_)) export LDFLAGS_KERNEL_GEN = \ -static \ @@ -346,38 +337,72 @@ export LDFLAGS_KERNEL_GEN = \ -Wl,-sectalign,__TEXT,__text,0x1000 \ -Wl,-sectalign,__DATA,__common,0x1000 \ -Wl,-sectalign,__DATA,__bss,0x1000 \ - -Wl,-sectcreate,__PRELINK,__text,/dev/null \ - -Wl,-sectcreate,__PRELINK,__symtab,/dev/null \ - -Wl,-sectcreate,__PRELINK,__info,/dev/null + -Wl,-sectcreate,__PRELINK_TEXT,__text,/dev/null \ + -Wl,-sectcreate,__PRELINK_STATE,__kernel,/dev/null \ + -Wl,-sectcreate,__PRELINK_STATE,__kexts,/dev/null \ + -Wl,-sectcreate,__PRELINK_INFO,__info,/dev/null + +# Availability of DWARF allows DTrace CTF (compressed type format) to be constructed +ifeq ($(BUILD_DWARF),1) +export LDFLAGS_KERNEL_GEN += \ + -Wl,-sectcreate,__CTF,__ctf,/dev/null +endif export LDFLAGS_KERNEL_RELEASE = export LDFLAGS_KERNEL_DEVELOPMENT = -# -noseglinkedit export LDFLAGS_KERNEL_DEBUG = export LDFLAGS_KERNEL_PROFILE = export LDFLAGS_KERNEL_PPC = \ -force_cpusubtype_ALL \ -Wl,-new_linker \ + -Wl,-pagezero_size,0x0 \ -Wl,-segaddr,__VECTORS,0x0 \ -Wl,-segaddr,__HIB,0x7000 \ -Wl,-segaddr,__TEXT,0xe000 -export LDFLAGS_KERNEL_I386 = \ +export LDFLAGS_KERNEL_RELEASEI386 = \ + -Wl,-new_linker \ + -Wl,-pagezero_size,0x0 \ + -Wl,-segaddr,__INITPT,0x00100000 \ + -Wl,-segaddr,__INITGDT,0x00106000 \ + -Wl,-segaddr,__SLEEP,0x00107000 \ + -Wl,-segaddr,__HIB,0x00108000 \ + -Wl,-image_base,0x200000 \ + -Wl,-seg_page_size,__TEXT,0x200000 + +export LDFLAGS_KERNEL_DEBUGI386 = $(LDFLAGS_KERNEL_RELEASEI386) +export LDFLAGS_KERNEL_DEVELOPMENTI386 = $(LDFLAGS_KERNEL_RELEASEI386) +export LDFLAGS_KERNEL_PROFILEI386 = $(LDFLAGS_KERNEL_RELEASEI386) + +# Keep these constants in sync with the *_SEG_BASE definitions in i386/pmap.h +export LDFLAGS_KERNEL_RELEASEX86_64 = \ -Wl,-new_linker \ - -Wl,-segaddr,__HIB,0x100000 \ - -Wl,-segaddr,__TEXT,0x111000 + -Wl,-pagezero_size,0x0 \ + -Wl,-segaddr,__INITPT,0xffffff8000100000 \ + -Wl,-segaddr,__INITGDT,0xffffff8000106000 \ + -Wl,-segaddr,__SLEEP,0xffffff8000107000 \ + -Wl,-segaddr,__HIB,0xffffff8000108000 \ + -Wl,-image_base,0xffffff8000200000 \ + -Wl,-seg_page_size,__TEXT,0x200000 + +export LDFLAGS_KERNEL_DEBUGX86_64 = $(LDFLAGS_KERNEL_RELEASEX86_64) +export LDFLAGS_KERNEL_DEVELOPMENTX86_64 = $(LDFLAGS_KERNEL_RELEASEX86_64) +export LDFLAGS_KERNEL_PROFILEX86_64 = $(LDFLAGS_KERNEL_RELEASEX86_64) export LDFLAGS_KERNEL_ARM = \ -Wl,-new_linker \ + -Wl,-pagezero_size,0x0 \ -Wl,-segaddr,__HIB,0xC0000000 \ - -Wl,-segaddr,__TEXT,0xC0008000 + -Wl,-image_base,0xC0008000 + export LDFLAGS_KERNEL = $(LDFLAGS_KERNEL_GEN) \ $($(addsuffix $(MACHINE_CONFIG),MACHINE_FLAGS_)) \ $($(addsuffix $(ARCH_CONFIG),ARCH_FLAGS_)) \ $($(addsuffix $(ARCH_CONFIG),LDFLAGS_KERNEL_)) \ - $($(addsuffix $(KERNEL_CONFIG),LDFLAGS_KERNEL_)) + $($(addsuffix $(KERNEL_CONFIG),LDFLAGS_KERNEL_)) \ + $($(addsuffix $(ARCH_CONFIG), $(addsuffix $(KERNEL_CONFIG),LDFLAGS_KERNEL_))) \ # @@ -385,6 +410,12 @@ export LDFLAGS_KERNEL = $(LDFLAGS_KERNEL_GEN) \ # export LD_KERNEL_LIBS = -lcc_kext +# +# Command to generate host binaries. Intentionally not +# $(CC), which controls the target compiler +# +HOST_CC = cc + # # Default INCFLAGS # @@ -445,6 +476,7 @@ SINCFRAME_UNIFDEF = $(XNU_PRIVATE_UNIFDEF) -UKERNEL_PRIVATE -UKERNEL -UPRIVATE KPINCFRAME_UNIFDEF = $(XNU_PRIVATE_UNIFDEF) -DKERNEL_PRIVATE -DPRIVATE -DKERNEL -U_OPEN_SOURCE_ KINCFRAME_UNIFDEF = $(XNU_PRIVATE_UNIFDEF) -UKERNEL_PRIVATE -UPRIVATE -DKERNEL -D_OPEN_SOURCE_ + # # Compononent Header file destinations # @@ -455,9 +487,7 @@ EXPDIR = EXPORT_HDRS/$(COMPONENT) # export STRIP_FLAGS_RELEASE = -S -x export STRIP_FLAGS_DEVELOPMENT = -S -x -export STRIP_FLAGS_RELEASE_TRACE = -S -x export STRIP_FLAGS_DEBUG = -S -export STRIP_FLAGS_DEBUG_TRACE = -S export STRIP_FLAGS_PROFILE = -S -x export STRIP_FLAGS = $($(addsuffix $(KERNEL_CONFIG),STRIP_FLAGS_)) @@ -466,6 +496,7 @@ export STRIP_FLAGS = $($(addsuffix $(KERNEL_CONFIG),STRIP_FLAGS_)) # dsymutil flags # export DSYMUTIL_FLAGS_I386 = --arch=i386 +export DSYMUTIL_FLAGS_X86_64 = --arch=x86_64 export DSYMUTIL_FLAGS_PPC = --arch=ppc export DSYMUTIL_FLAGS_ARM = --arch=arm @@ -476,20 +507,10 @@ export DSYMUTIL_FLAGS = $($(addsuffix $(ARCH_CONFIG),DSYMUTIL_FLAGS_)) # MANDIR = usr/share/man -## -# Verbosity -## -ifeq ($(RC_XBS),YES) -VERBOSE = YES -else -VERBOSE = NO -endif - -ifeq ($(VERBOSE),YES) -_v = -else -_v = @ -endif +# +# DEBUG alias location +# +DEVELOPER_EXTRAS_DIR = AppleInternal/Developer/Extras # # This must be here before any rules are possibly defined by the diff --git a/makedefs/MakeInc.dir b/makedefs/MakeInc.dir index f3e156b2e..7f98650e6 100644 --- a/makedefs/MakeInc.dir +++ b/makedefs/MakeInc.dir @@ -4,6 +4,11 @@ ifeq ($(RC_ProjectName),Libsyscall) installhdrs: bsdmake -C libsyscall installhdrs +else ifeq ($(findstring libkxld,$(RC_ProjectName)),libkxld) +installhdrs: + make -C libkern/kxld/ installhdrs +else ifeq ($(RC_ProjectName),xnu_debug) +installhdrs: else # xnu installhdrs: exporthdrs installhdrs_mi installhdrs_md @echo "[ $(SRCROOT) ] make installhdrs installing Kernel.framework" @@ -14,7 +19,7 @@ installhdrs: exporthdrs installhdrs_mi installhdrs_md [ -d $$kincpath ] || $(MKDIR) $$kincpath; \ cd $(SRCROOT)/EXTERNAL_HEADERS; \ install $(FILE_INSTALL_FLAGS) Info.plist $$krespath; \ - $(SRCROOT)/config/newvers.pl $${krespath}/Info.plist; \ + $(NEWVERS) $${krespath}/Info.plist; \ cd $$kframepath/Versions; \ [ -L Current ] || $(LN) $(KINCVERS) Current; \ cd $$kframepath; \ @@ -39,7 +44,7 @@ installhdrs_mi: arch_config=$(INSTALL_ARCH_DEFAULT); \ installinc_dir=${OBJROOT}/$${kernel_config}_$${arch_config}/$${rel_path}; \ [ -d $${installinc_dir} ] ||$(MKDIR) $${installinc_dir}; \ - ${MAKE} -C $${installinc_dir} \ + ${MAKE} ${MAKEJOBS} -C $${installinc_dir} \ KERNEL_CONFIG=$${kernel_config} \ ARCH_CONFIG=$${arch_config} \ MAKEFILES=${SOURCE}/Makefile \ @@ -67,7 +72,7 @@ installhdrs_md: objpath=${OBJROOT}/$${kernel_config}_$${arch_config}_$${machine_config}/$${rel_path}; \ fi; \ [ -d $${objpath} ] || $(MKDIR) $${objpath}; \ - ${MAKE} -C $${objpath} \ + ${MAKE} ${MAKEJOBS} -C $${objpath} \ KERNEL_CONFIG=$${kernel_config} \ ARCH_CONFIG=$${arch_config} \ MAKEFILES=${SOURCE}/Makefile \ @@ -81,36 +86,42 @@ installhdrs_md: # do_installhdrs_mi: -build_installhdrs_mi:: - @echo "[ $(SOURCE) ] make build_installhdrs_mi $(KERNEL_CONFIG) $(ARCH_CONFIG) $(TARGET)" - $(_v)for installinc_subdir in $(INSTINC_SUBDIRS); \ - do \ - [ -d $${installinc_subdir} ] || $(MKDIR) $${installinc_subdir}; \ - ${MAKE} -C $${installinc_subdir} \ - MAKEFILES=$(SOURCE)$${installinc_subdir}/Makefile \ - SOURCE=$(SOURCE)$${installinc_subdir}/ \ - TARGET=$(TARGET)$${installinc_subdir}/ \ - build_installhdrs_mi; \ - done; \ - ${MAKE} ${MAKEJOBS} do_installhdrs_mi; +BUILD_INSTALLHDRS_MI_SUBDIRS_TARGETS = $(addprefix build_installhdrs_mi_,$(INSTINC_SUBDIRS)) + +.PHONY: $(BUILD_INSTALLHDRS_MI_SUBDIRS_TARGETS) + +$(BUILD_INSTALLHDRS_MI_SUBDIRS_TARGETS): + $(_v)installinc_subdir="$(patsubst build_installhdrs_mi_%,%,$@)"; \ + [ -d $${installinc_subdir} ] || $(MKDIR) $${installinc_subdir}; \ + ${MAKE} -C $${installinc_subdir} \ + MAKEFILES=$(SOURCE)$${installinc_subdir}/Makefile \ + SOURCE=$(SOURCE)$${installinc_subdir}/ \ + TARGET=$(TARGET)$${installinc_subdir}/ \ + build_installhdrs_mi; + +build_installhdrs_mi: $(BUILD_INSTALLHDRS_MI_SUBDIRS_TARGETS) + $(_v)${MAKE} do_installhdrs_mi; # # Install machine dependent kernel header files # do_installhdrs_md: -build_installhdrs_md:: - @echo "[ $(SOURCE) ] make installhdrs_md $(KERNEL_CONFIG) $(ARCH_CONFIG) $(TARGET)" - $(_v)for installinc_subdir in $($(addprefix INSTINC_SUBDIRS_, $(ARCH_CONFIG))); \ - do \ - [ -d $${installinc_subdir} ] || $(MKDIR) $${installinc_subdir}; \ - ${MAKE} -C $${installinc_subdir} \ - MAKEFILES=$(SOURCE)$${installinc_subdir}/Makefile \ - SOURCE=$(SOURCE)$${installinc_subdir}/ \ - TARGET=$(TARGET)$${installinc_subdir}/ \ - build_installhdrs_md; \ - done; \ - ${MAKE} ${MAKEJOBS} do_installhdrs_md; +BUILD_INSTALLHDRS_MD_SUBDIRS_TARGETS = $(addprefix build_installhdrs_md_,$($(addprefix INSTINC_SUBDIRS_, $(ARCH_CONFIG)))) + +.PHONY: $(BUILD_INSTALLHDRS_MD_SUBDIRS_TARGETS) + +$(BUILD_INSTALLHDRS_MD_SUBDIRS_TARGETS): + $(_v)installinc_subdir="$(patsubst build_installhdrs_md_%,%,$@)"; \ + [ -d $${installinc_subdir} ] || $(MKDIR) $${installinc_subdir}; \ + ${MAKE} -C $${installinc_subdir} \ + MAKEFILES=$(SOURCE)$${installinc_subdir}/Makefile \ + SOURCE=$(SOURCE)$${installinc_subdir}/ \ + TARGET=$(TARGET)$${installinc_subdir}/ \ + build_installhdrs_md; + +build_installhdrs_md: $(BUILD_INSTALLHDRS_MD_SUBDIRS_TARGETS) + $(_v)${MAKE} do_installhdrs_md; # # Install kernel header files @@ -133,7 +144,7 @@ exporthdrs_mi: arch_config=$(INSTALL_ARCH_DEFAULT); \ exportinc_dir=${OBJROOT}/$${kernel_config}_$${arch_config}/$${rel_path}; \ [ -d $${exportinc_dir} ] || $(MKDIR) $${exportinc_dir}; \ - ${MAKE} -C $${exportinc_dir} \ + ${MAKE} ${MAKEJOBS} -C $${exportinc_dir} \ KERNEL_CONFIG=$${kernel_config} \ ARCH_CONFIG=$${arch_config} \ MAKEFILES=${SOURCE}/Makefile \ @@ -186,7 +197,7 @@ exporthdrs_md: exportinc_dir=${OBJROOT}/$${kernel_config}_$${arch_config}_$${machine_config}/$${rel_path}; \ fi; \ [ -d $${exportinc_dir} ] || $(MKDIR) $${exportinc_dir}; \ - ${MAKE} -C $${exportinc_dir} \ + ${MAKE} ${MAKEJOBS} -C $${exportinc_dir} \ KERNEL_CONFIG=$${kernel_config} \ ARCH_CONFIG=$${arch_config} \ MAKEFILES=${SOURCE}/Makefile \ @@ -202,36 +213,42 @@ exporthdrs_md: # do_exporthdrs_mi: -build_exporthdrs_mi: - $(_v)_TMP_EXPINC_SUBDIRS="$(EXPINC_SUBDIRS)"; \ - for exportinc_subdir in $${_TMP_EXPINC_SUBDIRS}; \ - do \ - [ -d $${exportinc_subdir} ] || $(MKDIR) $${exportinc_subdir}; \ - ${MAKE} -C $${exportinc_subdir} \ - MAKEFILES=$(SOURCE)$${exportinc_subdir}/Makefile \ - SOURCE=$(SOURCE)$${exportinc_subdir}/ \ - TARGET=$(TARGET)$${exportinc_subdir}/ \ - build_exporthdrs_mi; \ - done; \ - ${MAKE} ${MAKEJOBS} do_exporthdrs_mi; +BUILD_EXPORTHDRS_MI_SUBDIRS_TARGETS = $(addprefix build_exporthdrs_mi_,$(EXPINC_SUBDIRS)) + +.PHONY: $(BUILD_EXPORTHDRS_MI_SUBDIRS_TARGETS) + +$(BUILD_EXPORTHDRS_MI_SUBDIRS_TARGETS): + $(_v)exportinc_subdir="$(patsubst build_exporthdrs_mi_%,%,$@)"; \ + [ -d $${exportinc_subdir} ] || $(MKDIR) $${exportinc_subdir}; \ + ${MAKE} -C $${exportinc_subdir} \ + MAKEFILES=$(SOURCE)$${exportinc_subdir}/Makefile \ + SOURCE=$(SOURCE)$${exportinc_subdir}/ \ + TARGET=$(TARGET)$${exportinc_subdir}/ \ + build_exporthdrs_mi; + +build_exporthdrs_mi: $(BUILD_EXPORTHDRS_MI_SUBDIRS_TARGETS) + $(_v)${MAKE} do_exporthdrs_mi; # # Install machine dependent kernel header files # do_exporthdrs_md: -build_exporthdrs_md: - $(_v)_TMP_exportinc_subdir="$($(addprefix EXPINC_SUBDIRS_, $(ARCH_CONFIG)))"; \ - for exportinc_subdir in $${_TMP_exportinc_subdir}; \ - do \ - [ -d $${exportinc_subdir} ] || $(MKDIR) $${exportinc_subdir}; \ - ${MAKE} -C $${exportinc_subdir} \ - MAKEFILES=$(SOURCE)$${exportinc_subdir}/Makefile \ - SOURCE=$(SOURCE)$${exportinc_subdir}/ \ - TARGET=$(TARGET)$${exportinc_subdir}/ \ - build_exporthdrs_md; \ - done; \ - ${MAKE} ${MAKEJOBS} do_exporthdrs_md; +BUILD_EXPORTHDRS_MD_SUBDIRS_TARGETS = $(addprefix build_exporthdrs_md_,$($(addprefix EXPINC_SUBDIRS_, $(ARCH_CONFIG)))) + +.PHONY: $(BUILD_EXPORTHDRS_MD_SUBDIRS_TARGETS) + +$(BUILD_EXPORTHDRS_MD_SUBDIRS_TARGETS): + $(_v)exportinc_subdir="$(patsubst build_exporthdrs_md_%,%,$@)"; \ + [ -d $${exportinc_subdir} ] || $(MKDIR) $${exportinc_subdir}; \ + ${MAKE} -C $${exportinc_subdir} \ + MAKEFILES=$(SOURCE)$${exportinc_subdir}/Makefile \ + SOURCE=$(SOURCE)$${exportinc_subdir}/ \ + TARGET=$(TARGET)$${exportinc_subdir}/ \ + build_exporthdrs_md; + +build_exporthdrs_md: $(BUILD_EXPORTHDRS_MD_SUBDIRS_TARGETS) + $(_v)${MAKE} do_exporthdrs_md; # # Setup pass for all architectures for all Configuration/Architecture options @@ -256,18 +273,21 @@ setup: do_build_setup: -build_setup: - $(_v)_TMP_setup_subdir="$(SETUP_SUBDIRS) $($(addprefix SETUP_SUBDIRS_, $(ARCH_CONFIG)))"; \ - for setup_subdir in $${_TMP_setup_subdir}; \ - do \ - [ -d $${setup_subdir} ] || $(MKDIR) $${setup_subdir}; \ +BUILD_SETUP_SUBDIRS_TARGETS = $(addprefix build_setup_,$(SETUP_SUBDIRS) $($(addprefix SETUP_SUBDIRS_, $(ARCH_CONFIG)))) + +.PHONY: $(BUILD_SETUP_SUBDIRS_TARGETS) + +$(BUILD_SETUP_SUBDIRS_TARGETS): + $(_v)setup_subdir="$(patsubst build_setup_%,%,$@)"; \ + [ -d $${setup_subdir} ] || $(MKDIR) $${setup_subdir}; \ ${MAKE} -C $${setup_subdir} \ MAKEFILES=${SOURCE}/$${setup_subdir}/Makefile \ SOURCE=${SOURCE}/$${setup_subdir}/ \ TARGET=${TARGET}/$${setup_subdir}/ \ - build_setup; \ - done; \ - ${MAKE} do_build_setup; + build_setup; + +build_setup: $(BUILD_SETUP_SUBDIRS_TARGETS) + $(_v)${MAKE} do_build_setup; # @@ -280,7 +300,10 @@ build_setup: ifeq ($(RC_ProjectName),Libsyscall) all: bsdmake -C libsyscall install -else # xnu +else ifeq ($(findstring libkxld,$(RC_ProjectName)),libkxld) +all: + make -C libkern/kxld/ install +else # xnu or xnu_debug ifeq ($(COMPONENT), .) all: exporthdrs else @@ -315,9 +338,9 @@ endif build_subdir=${OBJROOT}/$${kernel_config}_$${arch_config}_$${machine_config}/$${rel_path}; \ fi; \ [ -d $${build_subdir} ] || $(MKDIR) $${build_subdir}; \ - ${MAKE} -C $${build_subdir} \ + ${MAKE} ${MAKEJOBS} -C $${build_subdir} \ KERNEL_CONFIG=$${kernel_config} \ - ARCH_CONFIG=$${arch_config} \ + ARCH_CONFIG=$${arch_config} \ MACHINE_CONFIG=$${machine_config} \ MAKEFILES=${SOURCE}/Makefile \ SOURCE=${SOURCE}/ \ @@ -331,23 +354,31 @@ endif # do_build_all: -build_all: +BUILD_ALL_SUBDIRS_TARGETS = $(addprefix build_all_,$(COMP_SUBDIRS) $($(addprefix COMP_SUBDIRS_, $(ARCH_CONFIG)))) + +.PHONY: $(BUILD_ALL_SUBDIRS_TARGETS) + +$(BUILD_ALL_SUBDIRS_TARGETS): $(_v)if [ $(MACHINE_CONFIG) = DEFAULT ] ; then \ TARGET=$(OBJROOT)/$(KERNEL_CONFIG)_$(ARCH_CONFIG)/$(COMPONENT); \ else \ TARGET="$(OBJROOT)/$(KERNEL_CONFIG)_$(ARCH_CONFIG)_$(MACHINE_CONFIG)/$(COMPONENT)"; \ fi; \ - _TMP_comp_subdir="$(COMP_SUBDIRS) $($(addprefix COMP_SUBDIRS_, $(ARCH_CONFIG)))"; \ - for comp_subdir in $${_TMP_comp_subdir}; \ - do \ - [ -d $${comp_subdir} ] || $(MKDIR) $${comp_subdir}; \ - ${MAKE} -C $${comp_subdir} \ - MAKEFILES=${SOURCE}/$${comp_subdir}/Makefile \ - SOURCE=${SOURCE}$${comp_subdir}/ \ - TARGET=$${TARGET} \ - build_all; \ - done; \ - ${MAKE} ${MAKEJOBS} INCL_MAKEDEP=TRUE TARGET=$${TARGET} do_build_all; \ + comp_subdir="$(patsubst build_all_%,%,$@)"; \ + [ -d $${comp_subdir} ] || $(MKDIR) $${comp_subdir}; \ + ${MAKE} -C $${comp_subdir} \ + MAKEFILES=${SOURCE}/$${comp_subdir}/Makefile \ + SOURCE=${SOURCE}$${comp_subdir}/ \ + TARGET=$${TARGET} \ + build_all; + +build_all: $(BUILD_ALL_SUBDIRS_TARGETS) + $(_v)if [ $(MACHINE_CONFIG) = DEFAULT ] ; then \ + TARGET=$(OBJROOT)/$(KERNEL_CONFIG)_$(ARCH_CONFIG)/$(COMPONENT); \ + else \ + TARGET="$(OBJROOT)/$(KERNEL_CONFIG)_$(ARCH_CONFIG)_$(MACHINE_CONFIG)/$(COMPONENT)"; \ + fi; \ + ${MAKE} INCL_MAKEDEP=TRUE TARGET=$${TARGET} do_build_all; \ _TMP_comp_subdir="$(CONFIG_SUBDIRS) $($(addprefix CONFIG_SUBDIRS_, $(ARCH_CONFIG)))"; \ for comp_subdir in $${_TMP_comp_subdir}; \ do \ @@ -397,7 +428,7 @@ mach_kernel: build_subdir=${OBJROOT}/$${kernel_config}_$${arch_config}_$${machine_config}; \ fi; \ [ -d $${build_subdir} ] || $(MKDIR) $${build_subdir}; \ - ${MAKE} -C $${build_subdir} \ + ${MAKE} ${MAKEJOBS} -C $${build_subdir} \ KERNEL_CONFIG=$${kernel_config} \ ARCH_CONFIG=$${arch_config} \ MACHINE_CONFIG=$${machine_config} \ @@ -414,7 +445,7 @@ mach_kernel: do_build_mach_kernel: build_mach_kernel: - $(_v)${MAKE} ${MAKEJOBS} do_build_mach_kernel; + $(_v)${MAKE} do_build_mach_kernel; # @@ -430,7 +461,9 @@ build_mach_kernel: install: installhdrs all installman installmachinekernels ifeq ($(RC_ProjectName),Libsyscall) # nothing to do -else # xnu +else ifeq ($(findstring libkxld,$(RC_ProjectName)),libkxld) +# nothing to do +else # xnu or xnu_debug $(_v)rel_path=$(shell $(RELPATH) $(SRCROOT) $(SOURCE)); \ machine_config=$(MACHINE_CONFIG); \ for kernel_config in $(INSTALL_TYPE); \ @@ -448,7 +481,7 @@ else # xnu install_subdir=${OBJROOT}/$${kernel_config}_$${arch_config}_$${machine_config}/$${rel_path}; \ fi; \ [ -d $${install_subdir} ] || $(MKDIR) $${install_subdir}; \ - ${MAKE} -C $${install_subdir} \ + ${MAKE} ${MAKEJOBS} -C $${install_subdir} \ KERNEL_CONFIG=$${kernel_config} \ ARCH_CONFIG=$${arch_config} \ MACHINE_CONFIG=$${machine_config} \ @@ -457,6 +490,15 @@ else # xnu build_install; \ done; \ done; +ifeq ($(RC_ProjectName),xnu_debug) + $(_v)$(MKDIR) $(DSTROOT)/$(DEVELOPER_EXTRAS_DIR) + $(_v)$(MV) $(DSTROOT)/mach_kernel* $(DSTROOT)/$(DEVELOPER_EXTRAS_DIR) + $(_v)$(CP) $(SYMROOT)/kgmacros $(DSTROOT)/$(DEVELOPER_EXTRAS_DIR) + $(_v)$(CP) -r $(SYMROOT)/System.kext $(DSTROOT)/$(DEVELOPER_EXTRAS_DIR) + $(_v)$(CP) -r $(SYMROOT)/mach_kernel.dSYM $(DSTROOT)/$(DEVELOPER_EXTRAS_DIR) + $(_v)$(CP) $(SRCROOT)/config/README.DEBUG-kernel.txt $(DSTROOT)/$(DEVELOPER_EXTRAS_DIR) + $(_v)$(MV) $(DSTROOT)/System $(DSTROOT)/usr $(OBJROOT)/ +endif endif installmachinekernels: @@ -483,7 +525,7 @@ installmachinekernels: build_subdir=${OBJROOT}/$${kernel_config}_$${arch_config}_$${machine_config}; \ install_file_list=mach.`printf "%s" "$${kernel_config}" | $(TR) A-Z a-z`.`printf "%s" "$${machine_config}" | $(TR) A-Z a-z`; \ [ -d $${build_subdir} ] || $(MKDIR) $${build_subdir}; \ - ${MAKE} -C $${build_subdir} \ + ${MAKE} ${MAKEJOBS} -C $${build_subdir} \ INSTALL_FILE_LIST=$${install_file_list} \ KERNEL_CONFIG=$${kernel_config} \ ARCH_CONFIG=$${arch_config} \ @@ -503,7 +545,11 @@ setup_build_install: do_build_install: -build_install: +BUILD_INSTALL_SUBDIRS_TARGETS = $(addprefix build_install_,$(INST_SUBDIRS)) + +.PHONY: $(BUILD_INSTALL_SUBDIRS_TARGETS) + +$(BUILD_INSTALL_SUBDIRS_TARGETS): $(_v)if [ $(MACHINE_CONFIG) = DEFAULT ] ; then \ TARGET=${OBJROOT}/$(KERNEL_CONFIG)_$(ARCH_CONFIG)/$(COMPONENT); \ else \ @@ -511,24 +557,29 @@ build_install: fi; \ ${MAKE} TARGET=$${TARGET} setup_build_install; \ kernel_config=$(KERNEL_CONFIG); \ - for install_subdir in $(INST_SUBDIRS); \ - do \ - [ -d $${install_subdir} ] || $(MKDIR) $${install_subdir}; \ - ${MAKE} -C $${install_subdir} \ - KERNEL_CONFIG=$${kernel_config} \ - MAKEFILES=${SOURCE}/$${install_subdir}/Makefile \ - SOURCE=${SOURCE}$${install_subdir}/ \ - TARGET=$${TARGET} \ - build_install; \ - done; \ - ${MAKE} ${MAKEJOBS} TARGET=$${TARGET} do_build_install; + install_subdir="$(patsubst build_install_%,%,$@)"; \ + [ -d $${install_subdir} ] || $(MKDIR) $${install_subdir}; \ + ${MAKE} -C $${install_subdir} \ + KERNEL_CONFIG=$${kernel_config} \ + MAKEFILES=${SOURCE}/$${install_subdir}/Makefile \ + SOURCE=${SOURCE}$${install_subdir}/ \ + TARGET=$${TARGET} \ + build_install; + +build_install: $(BUILD_INSTALL_SUBDIRS_TARGETS) + $(_v)if [ $(MACHINE_CONFIG) = DEFAULT ] ; then \ + TARGET=${OBJROOT}/$(KERNEL_CONFIG)_$(ARCH_CONFIG)/$(COMPONENT); \ + else \ + TARGET="$(OBJROOT)/$(KERNEL_CONFIG)_$(ARCH_CONFIG)_$(MACHINE_CONFIG)/$(COMPONENT)"; \ + fi; \ + ${MAKE} TARGET=$${TARGET} do_build_install; # # Install source tree # installsrc: - $(_v)(tar -c --mode go=r,+X --no-ignore-case --exclude .svn --exclude cscope.\* --exclude BUILD --exclude \*~ -f - .) | (cd $(SRCROOT) && tar --no-same-owner -xf -) + $(_v)($(TAR) -c --mode go=r,+X --no-ignore-case --exclude .svn --exclude cscope.\* --exclude BUILD --exclude \*~ -f - .) | (cd $(SRCROOT) && $(TAR) --no-same-owner -xf -) # @@ -578,32 +629,36 @@ TAGS: cscope.files installman: ifeq ($(RC_ProjectName),Libsyscall) bsdmake -C libsyscall install-man -else # xnu +else ifeq ($(findstring libkxld,$(RC_ProjectName)),libkxld) +# nothing to do +else # xnu or xnu_debug @echo "[ $(SRCROOT) ] Installing man pages" $(_v)manpath=$(DSTROOT)/$(MANDIR); \ [ -d $$manpath ] || $(MKDIR) $$manpath; \ - ${MAKE} MAKEFILES=${SOURCE}/Makefile \ + ${MAKE} ${MAKEJOBS} MAKEFILES=${SOURCE}/Makefile \ SOURCE=${SOURCE}/ \ TARGET=${DSTROOT}/ \ build_installman - ${SOURCE}/config/compress-man-pages.pl ${DSTROOT}/${MANDIR} + ${SRCROOT}/config/compress-man-pages.pl ${DSTROOT}/${MANDIR} endif do_installman: -build_installman: - @echo "[ $(SOURCE) ] make build_installman" - $(_v)if [ -n "$(strip $(INSTMAN_SUBDIRS))" ]; then \ - for installman_subdir in $(INSTMAN_SUBDIRS); do \ - ${MAKE} -C $${installman_subdir} -r \ - MAKEFILES=$(SOURCE)$${installman_subdir}/Makefile \ - SOURCE=$(SOURCE)$${installman_subdir}/ \ - TARGET=$(TARGET)$${installman_subdir}/ \ - build_installman; \ - done; \ - fi; \ - if [ -n "$(strip $(INSTALL_MAN_LIST))" ]; then \ - ${MAKE} ${MAKEJOBS} do_installman; \ +BUILD_INSTALLMAN_SUBDIRS_TARGETS = $(addprefix build_installman_,$(INSTMAN_SUBDIRS)) + +.PHONY: $(BUILD_INSTALLMAN_SUBDIRS_TARGETS) + +$(BUILD_INSTALLMAN_SUBDIRS_TARGETS): + $(_v)installman_subdir="$(patsubst build_installman_%,%,$@)"; \ + ${MAKE} -C $${installman_subdir} -r \ + MAKEFILES=$(SOURCE)$${installman_subdir}/Makefile \ + SOURCE=$(SOURCE)$${installman_subdir}/ \ + TARGET=$(TARGET)$${installman_subdir}/ \ + build_installman; + +build_installman: $(BUILD_INSTALLMAN_SUBDIRS_TARGETS) + $(_v)if [ -n "$(strip $(INSTALL_MAN_LIST))" ]; then \ + ${MAKE} do_installman; \ fi # vim: set ft=make: diff --git a/makedefs/MakeInc.rule b/makedefs/MakeInc.rule index c2f11dbb5..618a7849f 100644 --- a/makedefs/MakeInc.rule +++ b/makedefs/MakeInc.rule @@ -513,32 +513,14 @@ endif # Compilation rules to generate .o from .s # -COMP_SOBJ_FILES = $(addprefix $(TARGET)$(COMP_OBJ_DIR), $(COMP_SOBJ_LIST)) - -$(COMP_SOBJ_FILES): $(TARGET)$(COMP_OBJ_DIR)%.o : %.s - ${S_KCC} -E -MD ${SFLAGS} -DASSEMBLER $(INCFLAGS) $< > $(patsubst %.o, %.pp, ${@}); - sed '/^\#/d' $(patsubst %.o, %.pp, ${@}) > $(patsubst %.o, %.s, ${@}); - ${S_KCC} ${SFLAGS} ${_HOST_AS_FLAGS} -c $(patsubst %.o, %.s, ${@}); - ${RM} ${_RMFLAGS_} $(patsubst %.o, %.pp, ${@}) $(patsubst %.o,%.s,${@}); - -S_RULE_1A=@ls / -S_RULE_1B= ${patsubst %.o,%.s,${@}} > /dev/null -S_RULE_2=$(_v)${S_KCC} -E -MD ${SFLAGS} -DASSEMBLER $(INCFLAGS) $< \ - > $(patsubst %.o, %.pp, ${@}); \ - sed '/^\#/d' $(patsubst %.o, %.pp, ${@}) > $(patsubst %.o, %.s, ${@}); -ifeq ($(BUILD_STABS),1) -S_RULE_3=$(_v)${S_KCC} ${SFLAGS} ${_HOST_AS_FLAGS} -c $(patsubst %.o, %.s, ${@});\ - ${RM} ${_RMFLAGS_} $(patsubst %.o, %.pp, ${@}) $(patsubst %.o,%.s,${@}); -else -S_RULE_3=$(_v)${S_KCC} ${SFLAGS} ${_HOST_AS_FLAGS} -c $(patsubst %.o, %.s, ${@});\ - ${RM} ${_RMFLAGS_} $(patsubst %.o, %.pp, ${@}) $(patsubst %.o,%.s,${@});\ - ${CTFCONVERT} -l xnu -v -o $(TARGET)$(COMP_OBJ_DIR)/$(KERNEL_CONFIG)/$@.ctf $@ > /dev/null && $(CTFSCRUB) `cat $(SRCROOT)/config/DtraceIgnored.symbols` $(TARGET)$(COMP_OBJ_DIR)/$(KERNEL_CONFIG)/$@.ctf || true; -S_RULE_4=@echo ASM $@ -endif +S_RULE_1A=$(_v)${S_KCC} -c -MD ${SFLAGS} -DASSEMBLER ${INCFLAGS} ${$@_INCFLAGS} +S_RULE_1B=$*.s +S_RULE_2=@echo AS $@ +S_RULE_3= # # Compilation rules to generate .o from .c for normal files -C_RULE_1A=$(_v)${KCC} -c ${filter-out ${${join $@,_CFLAGS_RM}}, ${CFLAGS} ${CWARNFLAGS}} -MD ${${join $@,_CFLAGS_ADD}} ${INCFLAGS} ${${join $@,_INCFLAGS}} +C_RULE_1A=$(_v)${KCC} -c ${filter-out ${$@_CFLAGS_RM}, ${CFLAGS} ${CWARNFLAGS}} -MD ${$@_CFLAGS_ADD} ${$@_CWARNFLAGS_ADD} ${INCFLAGS} ${$@_INCFLAGS} C_RULE_1B=$*.c C_RULE_2=@echo CC $@ ifeq ($(BUILD_STABS),1) @@ -557,20 +539,11 @@ C_RULE_2_D=${C_RULE_2} C_RULE_3_D=${C_RULE_3} C_RULE_4_D=${C_RULE_4} -# -# Compilation rules to generate .o from .m -# -M_RULE_1A=$(_v)${KCC} -c ${filter-out ${${join $@,_CFLAGS_RM}}, ${CFLAGS} ${MWARNFLAGS}} -MD ${${join $@,_CFLAGS_ADD}} ${INCFLAGS} ${${join $@,_INCFLAGS}} -M_RULE_1B=$*.m -M_RULE_2=@echo CC $@ -M_RULE_3= -M_RULE_4= - # # Compilation rules to generate .co from .cp or .cpo from .cpp # The config tool slickly changes the last source filename char to 'o' # for the object filename. -P_RULE_1A=$(_v)${KC++} -o $@ -c ${CXXFLAGS} ${filter-out ${${join $@,_CFLAGS_RM}}, ${CFLAGS} ${CXXWARNFLAGS}} -MD ${${join $@,_CFLAGS_ADD}} ${INCFLAGS} ${${join $@,_INCFLAGS}} +P_RULE_1A=$(_v)${KC++} -o $@ -c ${CXXFLAGS} ${filter-out ${$@_CFLAGS_RM}, ${CFLAGS} ${CXXWARNFLAGS}} -MD ${$@_CFLAGS_ADD} ${$@_CXXWARNFLAGS_ADD} ${INCFLAGS} ${$@_INCFLAGS} P_RULE_1B=$( $(@:.cpo=.d~) && mv $(@:.cpo=.d~) $(@:.cpo=.d) P_RULE_3=@echo C++ $@ @@ -579,46 +552,58 @@ P_RULE_4= else P_RULE_4=$(_v)${CTFCONVERT} -l xnu -v -o $(TARGET)$(COMP_OBJ_DIR)/$(KERNEL_CONFIG)/$@.ctf $@ > /dev/null && $(CTFSCRUB) `cat $(SRCROOT)/config/DtraceIgnored.symbols` $(TARGET)$(COMP_OBJ_DIR)/$(KERNEL_CONFIG)/$@.ctf || true; endif -P_RULE_4= -# -# Linker rule to generate a component -# -LD_COMPONENT_OBJ_FILES = $(addprefix $(TARGET)$(COMP_OBJ_DIR), $(LD_COMPONENT_OBJ_LIST)) - -COMPONENT_IMAGE_FILE = $(addprefix $(TARGET), $(COMPONENT_IMAGE)) - -$(COMPONENT_IMAGE_FILE): $(LD_COMPONENT_OBJ_FILES) - @echo LD $@ - $(_v)$(LD) $(LDFLAGS_COMPONENT) -o $(COMPONENT_IMAGE_FILE) ${LD_COMPONENT_OBJ_FILES}; - $(_v)(cd $(TARGET)$(COMP_OBJ_DIR); ${MD} -u Makedep -f -d `ls *.d`); setup_build_all: do_build_all: $(COMP_FILES) $(COMP_COBJ_FILES) $(COMP_SOBJ_FILES) $(COMPONENT_IMAGE_FILE) -ifeq ($(COMPONENT), .) -do_build_all: do_build_mach_kernel -endif - # # mach_kernel building rules # -do_build_mach_kernel: $(OBJPATH)/kgmacros - $(_v)$(INSTALL) $(DATA_INSTALL_FLAGS) $(SRCROOT)/config/version.c $(OBJPATH)/version.c; - $(_v)$(SRCROOT)/config/newvers.pl $(OBJPATH)/version.c > /dev/null; - @echo CC version.o - $(_v)${KCC} -c ${filter-out ${${join $@,_CFLAGS_RM}}, ${CFLAGS}} ${${join $@,_CFLAGS_ADD}} ${INCFLAGS} ${${join $@,_INCFLAGS}} $(OBJPATH)/version.c -o $(OBJPATH)/version.o +ifeq ($(COMPONENT), .) +do_build_all: do_build_mach_kernel + +STATIC_KMODS = $(SRCROOT)/kmods.a + +do_build_mach_kernel: $(TARGET)/kgmacros $(TARGET)/mach_kernel + +$(TARGET)/mach_kernel: $(addprefix $(TARGET)/,$(foreach component,$(COMPONENT_LIST), $(addprefix $(component)/$(firstword $($(addsuffix _KERNEL_CONFIG, $(shell printf $(component) | tr a-z A-Z))) $(KERNEL_CONFIG))/, $(addsuffix .o, $(component))))) lastkernelconstructor.o + $(_v)${MAKE} version.o @echo LD mach_kernel.sys - $(_v)$(LD) $(LDFLAGS_KERNEL) $(addprefix $(TARGET)/,$(foreach component,$(COMPONENT_LIST), $(addprefix $(component)/$(firstword $($(addsuffix _KERNEL_CONFIG, $(shell printf $(component) | tr a-z A-Z))) $(KERNEL_CONFIG))/, $(addsuffix .o, $(component))))) $(OBJPATH)/version.o -o $(TARGET)/mach_kernel.sys $(LD_KERNEL_LIBS) + $(_v)$(CAT) $(addprefix $(TARGET)/,$(foreach component,$(COMPONENT_LIST), $(addprefix $(component)/$(firstword $($(addsuffix _KERNEL_CONFIG, $(shell printf $(component) | tr a-z A-Z))) $(KERNEL_CONFIG))/, $(addsuffix .o, $(component))))) > mach_kernel.filelist + $(_v)$(LD) $(LDFLAGS_KERNEL) -filelist mach_kernel.filelist version.o lastkernelconstructor.o `if [ -e $(STATIC_KMODS) ]; then echo $(STATIC_KMODS); fi` \ + -o $(TARGET)/mach_kernel.sys $(LD_KERNEL_LIBS) @echo DSYMUTIL mach_kernel.sys $(_v)if [ $(BUILD_DWARF) -eq 1 ]; then \ $(DSYMUTIL) $(DSYMUTIL_FLAGS) $(TARGET)/mach_kernel.sys -o $(TARGET)/mach_kernel.sys.dSYM > /dev/null; \ fi; @echo STRIP mach_kernel $(_v)$(STRIP) $(STRIP_FLAGS) $(TARGET)/mach_kernel.sys -o $(TARGET)/mach_kernel + @echo CTFMERGE mach_kernel + $(_v)if [ $(BUILD_DWARF) -eq 1 ]; then \ + $(FIND) $(OBJPATH)/ -name \*.ctf -size 0 \ + -exec $(RM) -rf {} \; ; \ + $(CTFMERGE) -l xnu -o $(TARGET)/mach_kernel \ + $(OBJPATH)/*/$(KERNEL_CONFIG)/*.*o.ctf || true; \ + fi; \ + +version.o: $(OBJPATH)/version.c + ${C_RULE_1A}$< + ${C_RULE_2} + ${C_RULE_4} -$(OBJPATH)/kgmacros: $(SRCROOT)/kgmacros +.PHONY: $(OBJPATH)/version.c +$(OBJPATH)/version.c: $(SRCROOT)/config/version.c $(NEWVERS) $(SRCROOT)/config/MasterVersion + $(_v)$(CP) $< $@ + $(_v)$(NEWVERS) $(OBJPATH)/version.c > /dev/null; + +lastkernelconstructor.o: $(SRCROOT)/libsa/lastkernelconstructor.c + ${C_RULE_1A}$< + ${C_RULE_2} + ${C_RULE_4} + +$(TARGET)/kgmacros: $(SRCROOT)/kgmacros $(_v)$(INSTALL) $(INSTALL_FLAGS) $? $@ # Special rules to install machine configuration variants @@ -633,36 +618,12 @@ $(DSTROOT)$(INSTALL_FILE_DIR)mach.$(KERNEL_CONFIG_LC).$(MACHINE_CONFIG_LC): $(TA $(INSTALL) $(FILE_INSTALL_FLAGS) $< $@; \ else \ if [ ! -e $@ ]; then \ - echo >empty_file_$(notdir $@); \ - lipo_arg="$(subst _empty_file, empty_file_$(notdir $@),$(foreach lipo_arch,$(INSTALL_ARCHS_LC), $(addprefix -arch , $(addsuffix _empty_file, $(lipo_arch)))))"; \ + print "" >empty_file_$(notdir $@); \ + lipo_arg="$(foreach lipo_arch,$(INSTALL_ARCHS),$(ARCH_FLAGS_$(lipo_arch)) empty_file_$(notdir $@))"; \ $(LIPO) $${lipo_arg} -create -output $@; \ - $(RM) $(RMFLAGS) empty_file_$(notdir $@); \ - fi; \ - $(LIPO) $@ -replace $(ARCH_CONFIG_LC) $< -o $@; \ - fi; \ - if [ $(BUILD_DWARF) -eq 1 ]; then \ - if [ "`echo $(INSTALL_ARCHS_LC) | wc -w`" -eq 1 ]; then \ - $(CP) -f $< $<.ctfsys; \ - $(FIND) $(OBJPATH)/ -name \*.ctf -size 0 \ - -exec $(RM) -rf {} \; ; \ - $(CTFMERGE) -l xnu -o $<.ctfsys \ - $(OBJPATH)/*/$(KERNEL_CONFIG)/*.*o.ctf || true; \ - $(INSTALL) $(FILE_INSTALL_FLAGS) $<.ctfsys $@.ctfsys; \ - else \ - if [ ! -e $@.ctfsys ]; then \ - echo >empty_file_$(notdir $@); \ - lipo_arg="$(subst _empty_file, empty_file_$(notdir $@),$(foreach lipo_arch,$(INSTALL_ARCHS_LC), $(addprefix -arch , $(addsuffix _empty_file, $(lipo_arch)))))"; \ - $(LIPO) $${lipo_arg} -create -output $@.ctfsys;\ - $(RM) $(RMFLAGS) empty_file_$(notdir $@);\ - fi; \ - $(FIND) $(OBJPATH)/ -name \*.ctf -size 0 \ - -exec $(RM) -rf {} \; ; \ - $(CP) -f $< $<.ctfsys; \ - $(CTFMERGE) -l xnu -o $<.ctfsys \ - $(OBJPATH)/*/$(KERNEL_CONFIG)/*.*o.ctf || true; \ - $(LIPO) $@.ctfsys -replace $(ARCH_CONFIG_LC) \ - $<.ctfsys -o $@.ctfsys; \ + $(RM) $(RMFLAGS) empty_file_$(notdir $@); \ fi; \ + $(LIPO) $@ -replace $(subst -arch,,$(ARCH_FLAGS_$(ARCH_CONFIG))) $< -o $@; \ fi $(SYMROOT)$(INSTALL_FILE_DIR)mach.$(KERNEL_CONFIG_LC).$(MACHINE_CONFIG_LC): $(TARGET)/mach_kernel.sys force_file_install @@ -682,14 +643,15 @@ $(SYMROOT)$(INSTALL_FILE_DIR)mach.$(KERNEL_CONFIG_LC).$(MACHINE_CONFIG_LC): $(TA fi; \ else \ if [ ! -e $@ ]; then \ - echo >empty_file_$(notdir $@); \ - lipo_arg="$(subst _empty_file, empty_file_$(notdir $@),$(foreach lipo_arch,$(INSTALL_ARCHS_LC), $(addprefix -arch , $(addsuffix _empty_file, $(lipo_arch)))))"; \ + printf "" >empty_file_$(notdir $@); \ + lipo_arg="$(foreach lipo_arch,$(INSTALL_ARCHS),$(ARCH_FLAGS_$(lipo_arch)) empty_file_$(notdir $@))"; \ $(LIPO) $${lipo_arg} -create -output $@; \ $(RM) $(RMFLAGS) empty_file_$(notdir $@); \ fi; \ - $(LIPO) $@ -replace $(ARCH_CONFIG_LC) $< -o $@; \ + $(LIPO) $@ -replace $(subst -arch,,$(ARCH_FLAGS_$(ARCH_CONFIG))) $< -o $@; \ fi +endif # mach_kernel-specific build rules # # Generic Install rules @@ -706,43 +668,15 @@ $(INSTALL_FILE_FILES_GENERIC): $(DSTROOT)$(INSTALL_FILE_DIR)% : $(TARGET)/% forc fi; \ if [ "`echo $(INSTALL_ARCHS_LC) | wc -w`" -eq 1 ]; then \ $(RM) $(RMFLAGS) $@; \ - if [ $(MACHINE_CONFIG) = DEFAULT ]; then \ - $(INSTALL) $(FILE_INSTALL_FLAGS) $< $@; \ - fi; \ + $(INSTALL) $(FILE_INSTALL_FLAGS) $< $@; \ else \ if [ ! -e $@ ]; then \ - echo >empty_file_$(notdir $@); \ - lipo_arg="$(subst _empty_file, empty_file_$(notdir $@),$(foreach lipo_arch,$(INSTALL_ARCHS_LC), $(addprefix -arch , $(addsuffix _empty_file, $(lipo_arch)))))"; \ + printf "" >empty_file_$(notdir $@); \ + lipo_arg="$(foreach lipo_arch,$(INSTALL_ARCHS),$(ARCH_FLAGS_$(lipo_arch)) empty_file_$(notdir $@))"; \ $(LIPO) $${lipo_arg} -create -output $@; \ $(RM) $(RMFLAGS) empty_file_$(notdir $@); \ fi; \ - $(LIPO) $@ -replace $(ARCH_CONFIG_LC) $< -o $@; \ - fi; \ - if [ $(BUILD_DWARF) -eq 1 ]; then \ - if [ "`echo $(INSTALL_ARCHS_LC) | wc -w`" -eq 1 ]; then \ - $(CP) -f $< $<.ctfsys; \ - $(FIND) $(OBJPATH)/ -name \*.ctf -size 0 \ - -exec $(RM) -rf {} \; ; \ - $(CTFMERGE) -l xnu -o $<.ctfsys \ - $(OBJPATH)/*/$(KERNEL_CONFIG)/*.*o.ctf || true; \ - if [ $(MACHINE_CONFIG) = DEFAULT ]; then \ - $(INSTALL) $(FILE_INSTALL_FLAGS) $<.ctfsys $(dir $@); \ - fi; \ - else \ - if [ ! -e $@.ctfsys ]; then \ - echo >empty_file_$(notdir $@); \ - lipo_arg="$(subst _empty_file, empty_file_$(notdir $@),$(foreach lipo_arch,$(INSTALL_ARCHS_LC), $(addprefix -arch , $(addsuffix _empty_file, $(lipo_arch)))))"; \ - $(LIPO) $${lipo_arg} -create -output $@.ctfsys;\ - $(RM) $(RMFLAGS) empty_file_$(notdir $@);\ - fi; \ - $(FIND) $(OBJPATH)/ -name \*.ctf -size 0 \ - -exec $(RM) -rf {} \; ; \ - $(CP) -f $< $<.ctfsys; \ - $(CTFMERGE) -l xnu -o $<.ctfsys \ - $(OBJPATH)/*/$(KERNEL_CONFIG)/*.*o.ctf || true; \ - $(LIPO) $@.ctfsys -replace $(ARCH_CONFIG_LC) \ - $<.ctfsys -o $@.ctfsys; \ - fi; \ + $(LIPO) $@ -replace $(subst -arch,,$(ARCH_FLAGS_$(ARCH_CONFIG))) $< -o $@; \ fi INSTALL_FILESYS_FILES = $(addprefix $(SYMROOT)$(INSTALL_FILE_DIR), $(INSTALL_FILE_LIST)) @@ -770,17 +704,17 @@ $(INSTALL_FILESYS_FILES_GENERIC): $(SYMROOT)$(INSTALL_FILE_DIR)% : $(TARGET)/%.s fi; \ else \ if [ ! -e $@ ]; then \ - echo >empty_filesys_$(notdir $@); \ - lipo_arg="$(subst _empty_file, empty_filesys_$(notdir $@),$(foreach lipo_arch,$(INSTALL_ARCHS_LC), $(addprefix -arch , $(addsuffix _empty_file, $(lipo_arch)))))"; \ + printf "" >empty_filesys_$(notdir $@); \ + lipo_arg="$(foreach lipo_arch,$(INSTALL_ARCHS),$(ARCH_FLAGS_$(lipo_arch)) empty_filesys_$(notdir $@))"; \ $(LIPO) $${lipo_arg} -create -output $@; \ $(RM) $(RMFLAGS) empty_filesys_$(notdir $@); \ fi; \ - $(LIPO) $@ -replace $(ARCH_CONFIG_LC) $< -o $@; \ + $(LIPO) $@ -replace $(subst -arch,,$(ARCH_FLAGS_$(ARCH_CONFIG))) $< -o $@; \ \ if [ $(BUILD_DWARF) -eq 1 ]; then \ if [ ! -e $@.dSYM/$(DSYMBUILDDIR)/$(notdir $@) ]; then \ - echo >empty_filesys_$(notdir $@); \ - lipo_arg="$(subst _empty_file, empty_filesys_$(notdir $@),$(foreach lipo_arch,$(INSTALL_ARCHS_LC), $(addprefix -arch , $(addsuffix _empty_file, $(lipo_arch)))))"; \ + printf "" >empty_filesys_$(notdir $@); \ + lipo_arg="$(foreach lipo_arch,$(INSTALL_ARCHS),$(ARCH_FLAGS_$(lipo_arch)) empty_filesys_$(notdir $@))"; \ $(MKDIR) -p -m 0755 $@.dSYM/$(DSYMBUILDDIR); \ $(LIPO) $${lipo_arg} -create \ -output \ @@ -791,7 +725,7 @@ $(INSTALL_FILESYS_FILES_GENERIC): $(SYMROOT)$(INSTALL_FILE_DIR)% : $(TARGET)/%.s $(TARGET)/mach_kernel.sys \ -o $(TARGET)/mach_kernel.sys.dSYM; \ $(LIPO) $@.dSYM/$(DSYMBUILDDIR)/$(notdir $@) \ - -replace $(ARCH_CONFIG_LC) \ + -replace $(subst -arch,,$(ARCH_FLAGS_$(ARCH_CONFIG))) \ $<.dSYM/$(DSYMBUILDDIR)/$(notdir $<) \ -o $@.dSYM/$(DSYMBUILDDIR)/$(notdir $@); \ fi; \ @@ -850,4 +784,7 @@ ifeq ($(INCL_MAKEDEP), TRUE) -include Makedep endif +help: + @cat README + # vim: set ft=make: diff --git a/osfmk/Makefile b/osfmk/Makefile index b39ff9c49..f07b7e1f3 100644 --- a/osfmk/Makefile +++ b/osfmk/Makefile @@ -3,10 +3,6 @@ export MakeInc_def=${SRCROOT}/makedefs/MakeInc.def export MakeInc_rule=${SRCROOT}/makedefs/MakeInc.rule export MakeInc_dir=${SRCROOT}/makedefs/MakeInc.dir -export COMP_LDFLAGS_COMPONENT_PPC = -Wl,-i_OSCompareAndSwap:_hw_compare_and_store \ - -Wl,-i_OSDequeueAtomic:_hw_dequeue_atomic \ - -Wl,-i_OSEnqueueAtomic:_hw_queue_atomic - include $(MakeInc_cmd) include $(MakeInc_def) @@ -20,10 +16,12 @@ INSTINC_SUBDIRS = \ machine \ UserNotification \ gssd \ + kextd \ lockd \ vm \ libsa \ - kdp + kdp \ + pmc INSTINC_SUBDIRS_PPC = \ mach \ @@ -33,6 +31,11 @@ INSTINC_SUBDIRS_I386 = \ mach \ i386 +INSTINC_SUBDIRS_X86_64 = \ + mach \ + i386 \ + x86_64 + INSTINC_SUBDIRS_ARM = \ mach \ arm @@ -48,10 +51,12 @@ EXPINC_SUBDIRS = \ machine \ UserNotification \ gssd \ + kextd \ lockd \ vm \ libsa \ - kdp + kdp \ + pmc EXPINC_SUBDIRS_PPC = \ mach \ @@ -61,6 +66,11 @@ EXPINC_SUBDIRS_I386 = \ mach \ i386 +EXPINC_SUBDIRS_X86_64 = \ + mach \ + i386 \ + x86_64 + EXPINC_SUBDIRS_ARM = \ mach \ arm diff --git a/osfmk/UserNotification/KUNCUserNotifications.c b/osfmk/UserNotification/KUNCUserNotifications.c index afd0acc92..740b8f125 100644 --- a/osfmk/UserNotification/KUNCUserNotifications.c +++ b/osfmk/UserNotification/KUNCUserNotifications.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2006 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2006 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -54,16 +54,15 @@ */ struct UNDReply { - decl_mutex_data(,lock) /* UNDReply lock */ + decl_lck_mtx_data(,lock) /* UNDReply lock */ int userLandNotificationKey; KUNCUserNotificationCallBack callback; boolean_t inprogress; ipc_port_t self_port; /* Our port */ }; -#define UNDReply_lock(reply) mutex_lock(&reply->lock) -#define UNDReply_lock_try(reply) mutex_lock_try(&(reply)->lock) -#define UNDReply_unlock(reply) mutex_unlock(&(reply)->lock) +#define UNDReply_lock(reply) lck_mtx_lock(&reply->lock) +#define UNDReply_unlock(reply) lck_mtx_lock(&reply->lock) /* forward declarations */ void UNDReply_deallocate( @@ -149,7 +148,7 @@ UNDAlertCompletedWithResult_rpc ( #endif /* KERNEL_CF */ if (reply->callback) { - (reply->callback)((KUNCUserNotificationID) reply, result, dict); + (reply->callback)((int)(KUNCUserNotificationID)reply, result, dict); } UNDReply_lock(reply); @@ -190,6 +189,7 @@ UNDNotificationCreated_rpc ( * KUNC Functions */ +extern lck_grp_t LockCompatGroup; KUNCUserNotificationID KUNCGetNotificationID(void) @@ -203,7 +203,7 @@ KUNCGetNotificationID(void) kfree(reply, sizeof(struct UNDReply)); reply = UND_REPLY_NULL; } else { - mutex_init(&reply->lock, 0); + lck_mtx_init(&reply->lock, &LockCompatGroup, LCK_ATTR_NULL); reply->userLandNotificationKey = -1; reply->inprogress = FALSE; ipc_kobject_set(reply->self_port, diff --git a/osfmk/UserNotification/KUNCUserNotifications.h b/osfmk/UserNotification/KUNCUserNotifications.h index a7524e64a..8b3e3f0bb 100644 --- a/osfmk/UserNotification/KUNCUserNotifications.h +++ b/osfmk/UserNotification/KUNCUserNotifications.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2004 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -50,7 +50,7 @@ KUNCUserNotificationDisplayNotice( char *localizationPath, char *alertHeader, char *alertMessage, - char *defaultButtonTitle); + char *defaultButtonTitle) __attribute__((deprecated)); /* * ***BLOCKING*** alert call, returned int value corresponds to the @@ -68,7 +68,8 @@ KUNCUserNotificationDisplayAlert( char *defaultButtonTitle, char *alternateButtonTitle, char *otherButtonTitle, - unsigned *responseFlags); + unsigned *responseFlags) __attribute__((deprecated)); + /* * Execute a userland executable with the given path, user and type @@ -86,7 +87,7 @@ kern_return_t KUNCExecute( char *executionPath, int openAsUser, - int pathExecutionType); + int pathExecutionType) __attribute__((deprecated)); /* KUNC User Notification XML Keys @@ -184,7 +185,7 @@ KUNCExecute( * back to the client in the callback pararmeter contextKey */ -typedef int KUNCUserNotificationID; +typedef uintptr_t KUNCUserNotificationID; /* * Reponse value checking & default setting @@ -216,8 +217,7 @@ typedef void /* * Get a notification ID */ -KUNCUserNotificationID KUNCGetNotificationID(void); - +KUNCUserNotificationID KUNCGetNotificationID(void) __attribute__((deprecated)); /* This function currently requires a bundle path, which kexts cannot currently get. In the future, the CFBundleIdentiofier of the kext will be pass in in place of the bundlePath. */ @@ -230,12 +230,12 @@ KUNCUserNotificationDisplayFromBundle( char *messageKey, char *tokenString, KUNCUserNotificationCallBack callback, - int contextKey); + int contextKey) __attribute__((deprecated)); kern_return_t KUNCUserNotificationCancel( - KUNCUserNotificationID notification); + KUNCUserNotificationID notification) __attribute__((deprecated)); __END_DECLS diff --git a/osfmk/chud/chud_cpu.c b/osfmk/chud/chud_cpu.c index 9652dcf92..19b639cd7 100644 --- a/osfmk/chud/chud_cpu.c +++ b/osfmk/chud/chud_cpu.c @@ -38,7 +38,9 @@ #include +#if 0 #pragma mark **** cpu count **** +#endif __private_extern__ int chudxnu_logical_cpu_count(void) @@ -67,7 +69,9 @@ chudxnu_cpu_number(void) return cpu_number(); } +#if 0 #pragma mark **** interrupts enable/disable **** +#endif __private_extern__ boolean_t chudxnu_get_interrupts_enabled(void) @@ -93,7 +97,9 @@ chudxnu_cause_interrupt(void) ml_cause_interrupt(); } +#if 0 #pragma mark **** preemption enable/disable **** +#endif __private_extern__ void chudxnu_enable_preemption(void) @@ -113,12 +119,3 @@ chudxnu_get_preemption_level(void) return get_preemption_level(); } -#pragma mark *** deprecated *** - -//DEPRECATED -__private_extern__ int -chudxnu_avail_cpu_count(void) -{ - return machine_info.logical_cpu; -} - diff --git a/osfmk/chud/chud_memory.c b/osfmk/chud/chud_memory.c index f71c0333e..21209b074 100644 --- a/osfmk/chud/chud_memory.c +++ b/osfmk/chud/chud_memory.c @@ -66,18 +66,3 @@ uint64_t chudxnu_inactive_memory_size(void) return (uint64_t)vm_page_inactive_count * (uint64_t)page_size; } -#pragma mark *** DEPRECATED *** - -// DEPRECATED -__private_extern__ -vm_offset_t chudxnu_io_map(uint64_t phys_addr, vm_size_t size) -{ - return ml_io_map(phys_addr, size); // XXXXX limited to first 2GB XXXXX -} - -// DEPRECATED -__private_extern__ uint32_t -chudxnu_phys_addr_wimg(uint64_t phys_addr) -{ - return IODefaultCacheBits(phys_addr); -} diff --git a/osfmk/chud/chud_osfmk_callback.c b/osfmk/chud/chud_osfmk_callback.c index 4afe955b4..98e1fcee1 100644 --- a/osfmk/chud/chud_osfmk_callback.c +++ b/osfmk/chud/chud_osfmk_callback.c @@ -45,24 +45,26 @@ #include #include +#if 0 #pragma mark **** timer **** +#endif __private_extern__ chud_timer_t -chudxnu_timer_alloc(chudxnu_timer_callback_func_t func, uint32_t param0) +chudxnu_timer_alloc(chudxnu_timer_callback_func_t func, thread_call_param_t param0) { - return (chud_timer_t)thread_call_allocate((thread_call_func_t)func, (thread_call_param_t)param0); + return (chud_timer_t)thread_call_allocate((thread_call_func_t)func, param0); } __private_extern__ kern_return_t chudxnu_timer_callback_enter( chud_timer_t timer, - uint32_t param1, + thread_call_param_t param1, uint32_t time, uint32_t units) { uint64_t t_delay; clock_interval_to_deadline(time, units, &t_delay); - thread_call_enter1_delayed((thread_call_t)timer, (thread_call_param_t)param1, t_delay); + thread_call_enter1_delayed((thread_call_t)timer, param1, t_delay); return KERN_SUCCESS; } @@ -81,40 +83,4 @@ chudxnu_timer_free(chud_timer_t timer) return KERN_SUCCESS; } -static chudxnu_dtrace_callback_t - dtrace_callback = (chudxnu_dtrace_callback_t) NULL; - -kern_return_t -chudxnu_dtrace_callback(uint64_t selector, uint64_t *args, uint32_t count) -{ - /* it's not an error if no callback is hooked up */ - kern_return_t ret = KERN_SUCCESS; - - /* Make a local stack copy of the function ptr */ - chudxnu_dtrace_callback_t fn = dtrace_callback; - - if(fn) { - ret = fn(selector, args, count); - } - - return ret; -} - -__private_extern__ void -chudxnu_dtrace_callback_enter(chudxnu_dtrace_callback_t fn) -{ - chudxnu_dtrace_callback_t old_fn = dtrace_callback; - - /* Atomically clear the call back */ - while(!OSCompareAndSwap((UInt32)old_fn, (UInt32)fn, - (volatile UInt32 *) &dtrace_callback)) { - old_fn = dtrace_callback; - } -} - -__private_extern__ void -chudxnu_dtrace_callback_cancel(void) -{ - chudxnu_dtrace_callback_enter(NULL); -} diff --git a/osfmk/chud/chud_thread.c b/osfmk/chud/chud_thread.c index 6af57de1e..0f955bb6e 100644 --- a/osfmk/chud/chud_thread.c +++ b/osfmk/chud/chud_thread.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2003-2007 Apple Inc. All rights reserved. + * Copyright (c) 2003-2009 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -56,7 +56,9 @@ extern unsigned int real_ncpus; // Mask for supported options #define T_CHUD_BIND_OPT_MASK (-1UL) +#if 0 #pragma mark **** thread binding **** +#endif /* * This method will bind a given thread to the requested CPU starting at the @@ -94,7 +96,7 @@ chudxnu_bind_thread(thread_t thread, int cpu, __unused int options) * reschedule on the target CPU. */ if(thread == current_thread() && - !(ml_at_interrupt_context() && cpu_number() == cpu)) { + !ml_at_interrupt_context() && cpu_number() != cpu) { (void)thread_block(THREAD_CONTINUE_NULL); } return KERN_SUCCESS; @@ -122,7 +124,9 @@ chudxnu_thread_get_idle(thread_t thread) { return ((thread->state & TH_IDLE) == TH_IDLE); } +#if 0 #pragma mark **** task and thread info **** +#endif __private_extern__ boolean_t chudxnu_is_64bit_task(task_t task) @@ -154,7 +158,7 @@ chudxnu_private_processor_set_things( size = 0; addr = NULL; for (;;) { - mutex_lock(&tasks_threads_lock); + lck_mtx_lock(&tasks_threads_lock); if (type == THING_TASK) maxthings = tasks_count; @@ -167,7 +171,7 @@ chudxnu_private_processor_set_things( if (size_needed <= size) break; - mutex_unlock(&tasks_threads_lock); + lck_mtx_unlock(&tasks_threads_lock); if (size != 0) kfree(addr, size); @@ -214,7 +218,7 @@ chudxnu_private_processor_set_things( } } - mutex_unlock(&tasks_threads_lock); + lck_mtx_unlock(&tasks_threads_lock); if (actual < maxthings) size_needed = actual * sizeof (mach_port_t); @@ -475,13 +479,6 @@ chudxnu_thread_info( } -__private_extern__ kern_return_t -chudxnu_thread_last_context_switch(thread_t thread, uint64_t *timestamp) -{ - *timestamp = thread->last_switch; - return KERN_SUCCESS; -} - /* thread marking stuff */ __private_extern__ boolean_t @@ -500,10 +497,10 @@ chudxnu_thread_set_marked(thread_t thread, boolean_t new_value) if(thread) { if(new_value) { // set the marked bit - old_val = OSBitOrAtomic(T_CHUD_MARKED, (UInt32 *) &(thread->t_chud)); + old_val = OSBitOrAtomic(T_CHUD_MARKED, &(thread->t_chud)); } else { // clear the marked bit - old_val = OSBitAndAtomic(~T_CHUD_MARKED, (UInt32 *) &(thread->t_chud)); + old_val = OSBitAndAtomic(~T_CHUD_MARKED, &(thread->t_chud)); } return (old_val & T_CHUD_MARKED) == T_CHUD_MARKED; } diff --git a/osfmk/chud/chud_thread.h b/osfmk/chud/chud_thread.h index ab335816f..86976de11 100644 --- a/osfmk/chud/chud_thread.h +++ b/osfmk/chud/chud_thread.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2007 Apple Inc. All rights reserved. + * Copyright (c) 2007-2009 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -29,8 +29,7 @@ #ifndef _CHUD_THREAD_H_ #define _CHUD_THREAD_H_ -/* Flags for the t_chud element of a thread_t structure. */ -#define T_CHUD_MARKED 0x1 /* this thread is marked by CHUD */ -#define T_IN_CHUD 0x2 /* this thread is already in a CHUD handler */ + +#include #endif /* _CHUD_THREAD_H_ */ diff --git a/osfmk/chud/chud_xnu.h b/osfmk/chud/chud_xnu.h index 9577e8088..91465bd61 100644 --- a/osfmk/chud/chud_xnu.h +++ b/osfmk/chud/chud_xnu.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2003-2007 Apple Inc. All rights reserved. + * Copyright (c) 2003-2008 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -32,17 +32,23 @@ #include #include #include +#include +#if 0 #pragma mark **** version **** +#endif extern uint32_t chudxnu_version(void); +#if 0 #pragma mark **** task **** +#endif // ******************************************************************************** // task // ******************************************************************************** -extern int chudxnu_pid_for_task(task_t task); extern task_t chudxnu_task_for_pid(int pid); +extern int chudxnu_pid_for_task(task_t task); extern int chudxnu_current_pid(void); +extern task_t chudxnu_current_task(void); extern kern_return_t chudxnu_task_read(task_t task, void *kernaddr, uint64_t usraddr, vm_size_t size); extern kern_return_t chudxnu_task_write(task_t task, uint64_t useraddr, void *kernaddr, vm_size_t size); @@ -51,10 +57,15 @@ extern kern_return_t chudxnu_kern_write(vm_offset_t destaddr, void *srcaddr, vm_ extern boolean_t chudxnu_is_64bit_task(task_t task); +#if 0 #pragma mark **** thread **** +#endif // ******************************************************************************** // thread // ******************************************************************************** +extern thread_t chudxnu_current_thread(void); +extern task_t chudxnu_task_for_thread(thread_t thread); + extern kern_return_t chudxnu_bind_thread(thread_t thread, int cpu, int options); extern kern_return_t chudxnu_unbind_thread(thread_t thread, int options); @@ -64,11 +75,6 @@ extern kern_return_t chudxnu_thread_user_state_available(thread_t thread); extern kern_return_t chudxnu_thread_get_callstack64(thread_t thread, uint64_t *callStack, mach_msg_type_number_t *count, boolean_t user_only); -extern task_t chudxnu_current_task(void); -extern thread_t chudxnu_current_thread(void); - -extern task_t chudxnu_task_for_thread(thread_t thread); - extern kern_return_t chudxnu_all_tasks(task_array_t *task_list, mach_msg_type_number_t *count); extern kern_return_t chudxnu_free_task_list(task_array_t *task_list, mach_msg_type_number_t *count); @@ -84,7 +90,9 @@ extern boolean_t chudxnu_thread_set_marked(thread_t thread, boolean_t marked); extern boolean_t chudxnu_thread_get_marked(thread_t thread); extern boolean_t chudxnu_thread_get_idle(thread_t thread); +#if 0 #pragma mark **** memory **** +#endif // ******************************************************************************** // memory // ******************************************************************************** @@ -94,7 +102,9 @@ extern uint64_t chudxnu_phys_memory_size(void); extern uint64_t chudxnu_free_memory_size(void); extern uint64_t chudxnu_inactive_memory_size(void); +#if 0 #pragma mark **** cpu **** +#endif // ******************************************************************************** // cpu // ******************************************************************************** @@ -113,9 +123,6 @@ extern void chudxnu_enable_preemption(void); extern void chudxnu_disable_preemption(void); extern int chudxnu_get_preemption_level(void); -extern kern_return_t chudxnu_set_shadowed_spr(int cpu, int spr, uint32_t val); -extern kern_return_t chudxnu_set_shadowed_spr64(int cpu, int spr, uint64_t val); - extern kern_return_t chudxnu_perfmon_acquire_facility(task_t); extern kern_return_t chudxnu_perfmon_release_facility(task_t); @@ -142,12 +149,14 @@ typedef struct { uint32_t hwSoftPatches; uint32_t hwMaintenances; uint32_t hwInstrumentations; -} rupt_counters_t; +} interrupt_counters_t; -extern kern_return_t chudxnu_get_cpu_interrupt_counters(int cpu, rupt_counters_t *rupts); +extern kern_return_t chudxnu_get_cpu_interrupt_counters(int cpu, interrupt_counters_t *rupts); extern kern_return_t chudxnu_clear_cpu_interrupt_counters(int cpu); +#if 0 #pragma mark **** callbacks **** +#endif // ******************************************************************************** // callbacks // ******************************************************************************** @@ -203,39 +212,36 @@ extern kern_return_t chudxnu_cpusig_callback_cancel(void); extern kern_return_t chudxnu_cpusig_send(int otherCPU, uint32_t request); // kdebug callback - one callback for system -typedef kern_return_t (*chudxnu_kdebug_callback_func_t)(uint32_t debugid, uint32_t arg0, uint32_t arg1, uint32_t arg2, uint32_t arg3, uint32_t arg4); +typedef kern_return_t (*chudxnu_kdebug_callback_func_t)(uint32_t debugid, uintptr_t arg0, uintptr_t arg1, uintptr_t arg2, uintptr_t arg3, uintptr_t arg4); extern kern_return_t chudxnu_kdebug_callback_enter(chudxnu_kdebug_callback_func_t func); extern kern_return_t chudxnu_kdebug_callback_cancel(void); // timer callback - multiple callbacks -typedef kern_return_t (*chudxnu_timer_callback_func_t)(uint32_t param0, uint32_t param1); +typedef kern_return_t (*chudxnu_timer_callback_func_t)(thread_call_param_t param0, thread_call_param_t param1); typedef void * chud_timer_t; -extern chud_timer_t chudxnu_timer_alloc(chudxnu_timer_callback_func_t func, uint32_t param0); -extern kern_return_t chudxnu_timer_callback_enter(chud_timer_t timer, uint32_t param1, uint32_t time, uint32_t units); +extern chud_timer_t chudxnu_timer_alloc(chudxnu_timer_callback_func_t func, thread_call_param_t param0); +extern kern_return_t chudxnu_timer_callback_enter(chud_timer_t timer, thread_call_param_t param1, uint32_t time, uint32_t units); extern kern_return_t chudxnu_timer_callback_cancel(chud_timer_t timer); extern kern_return_t chudxnu_timer_free(chud_timer_t timer); // CHUD systemcall callback - one callback for system -typedef kern_return_t (*chudxnu_syscall_callback_func_t)(uint32_t code, uint32_t arg0, uint32_t arg1, uint32_t arg2, uint32_t arg3, uint32_t arg4); +typedef kern_return_t (*chudxnu_syscall_callback_func_t)(uint64_t code, uint64_t arg1, uint64_t arg2, uint64_t arg3, uint64_t arg4, uint64_t arg5); extern kern_return_t chudxnu_syscall_callback_enter(chudxnu_syscall_callback_func_t func); extern kern_return_t chudxnu_syscall_callback_cancel(void); // DTrace Triggering typedef kern_return_t (*chudxnu_dtrace_callback_t)(uint64_t selector, uint64_t *args, uint32_t count); -extern kern_return_t chudxnu_dtrace_callback(uint64_t selector, uint64_t *args, uint32_t count); -extern void chudxnu_dtrace_callback_enter(chudxnu_dtrace_callback_t fn); +extern int chudxnu_dtrace_callback(uint64_t selector, uint64_t *args, uint32_t count); +extern kern_return_t chudxnu_dtrace_callback_enter(chudxnu_dtrace_callback_t fn); extern void chudxnu_dtrace_callback_cancel(void); // ******************************************************************************** // DEPRECATED // ******************************************************************************** -extern kern_return_t chudxnu_perfmon_ast_send(void); extern kern_return_t chudxnu_thread_get_callstack(thread_t thread, uint32_t *callStack, mach_msg_type_number_t *count, boolean_t user_only); -extern vm_offset_t chudxnu_io_map(uint64_t phys_addr, vm_size_t size); -extern uint32_t chudxnu_phys_addr_wimg(uint64_t phys_addr); - -extern int chudxnu_avail_cpu_count(void); +extern kern_return_t chudxnu_set_shadowed_spr(int cpu, int spr, uint32_t val); +extern kern_return_t chudxnu_set_shadowed_spr64(int cpu, int spr, uint64_t val); extern kern_return_t chudxnu_enable_cpu_nap(int cpu, boolean_t enable); extern boolean_t chudxnu_cpu_nap_enabled(int cpu); @@ -248,14 +254,6 @@ extern kern_return_t chudxnu_read_spr64(int cpu, int spr, uint64_t *val_p); extern kern_return_t chudxnu_write_spr(int cpu, int spr, uint32_t val); extern kern_return_t chudxnu_write_spr64(int cpu, int spr, uint64_t val); -extern kern_return_t chudxnu_get_cpu_rupt_counters(int cpu, rupt_counters_t *rupts); -extern kern_return_t chudxnu_clear_cpu_rupt_counters(int cpu); - -extern kern_return_t chudxnu_passup_alignment_exceptions(boolean_t enable); - -extern kern_return_t chudxnu_scom_read(uint32_t reg, uint64_t *data); -extern kern_return_t chudxnu_scom_write(uint32_t reg, uint64_t data); - extern void chudxnu_flush_caches(void); extern void chudxnu_enable_caches(boolean_t enable); diff --git a/osfmk/chud/chud_xnu_glue.h b/osfmk/chud/chud_xnu_glue.h index 437c21a01..20626c064 100644 --- a/osfmk/chud/chud_xnu_glue.h +++ b/osfmk/chud/chud_xnu_glue.h @@ -28,7 +28,7 @@ #if defined (__ppc__) #include "ppc/chud_xnu_glue.h" -#elif defined (__i386__) +#elif defined (__i386__) || defined (__x86_64__) #include "i386/chud_xnu_glue.h" #else #error architecture not supported diff --git a/osfmk/chud/chud_xnu_private.h b/osfmk/chud/chud_xnu_private.h index 6951cbee3..0932a6497 100644 --- a/osfmk/chud/chud_xnu_private.h +++ b/osfmk/chud/chud_xnu_private.h @@ -35,7 +35,7 @@ #if defined (__ppc__) #include "chud/ppc/chud_xnu_private.h" -#elif defined (__i386__) +#elif defined (__i386__) || defined (__x86_64__) #include "chud/i386/chud_xnu_private.h" #else #error architecture not supported diff --git a/osfmk/chud/i386/chud_cpu_i386.c b/osfmk/chud/i386/chud_cpu_i386.c index 2ee2e428e..564a82c4a 100644 --- a/osfmk/chud/i386/chud_cpu_i386.c +++ b/osfmk/chud/i386/chud_cpu_i386.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2003-2007 Apple Inc. All rights reserved. + * Copyright (c) 2003-2009 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -34,7 +34,6 @@ #include #include -#include #include #include #include @@ -42,7 +41,9 @@ #include +#if 0 #pragma mark **** cpu enable/disable **** +#endif extern kern_return_t processor_start(processor_t processor); // osfmk/kern/processor.c extern kern_return_t processor_exit(processor_t processor); // osfmk/kern/processor.c @@ -70,24 +71,28 @@ kern_return_t chudxnu_enable_cpu(int cpu, boolean_t enable) return KERN_FAILURE; } +#if 0 #pragma mark **** perfmon facility **** +#endif __private_extern__ kern_return_t -chudxnu_perfmon_acquire_facility(task_t task) +chudxnu_perfmon_acquire_facility(task_t task __unused) { - return pmc_acquire(task); + return KERN_SUCCESS; } __private_extern__ kern_return_t -chudxnu_perfmon_release_facility(task_t task) +chudxnu_perfmon_release_facility(task_t task __unused) { - return pmc_release(task); + return KERN_SUCCESS; } +#if 0 #pragma mark **** interrupt counters **** +#endif __private_extern__ kern_return_t -chudxnu_get_cpu_interrupt_counters(int cpu, rupt_counters_t *rupts) +chudxnu_get_cpu_interrupt_counters(int cpu, interrupt_counters_t *rupts) { if(cpu < 0 || (unsigned int)cpu >= real_ncpus) { // sanity check return KERN_FAILURE; @@ -158,18 +163,3 @@ chudxnu_clear_cpu_interrupt_counters(int cpu) return KERN_SUCCESS; } -#pragma mark *** deprecated *** - -//DEPRECATED -__private_extern__ kern_return_t -chudxnu_get_cpu_rupt_counters(int cpu, rupt_counters_t *rupts) -{ - return chudxnu_get_cpu_interrupt_counters(cpu, rupts); -} - -//DEPRECATED -__private_extern__ kern_return_t -chudxnu_clear_cpu_rupt_counters(int cpu) -{ - return chudxnu_clear_cpu_interrupt_counters(cpu); -} diff --git a/osfmk/chud/i386/chud_osfmk_callback_i386.c b/osfmk/chud/i386/chud_osfmk_callback_i386.c index 0c0fafa35..b3fc4d685 100644 --- a/osfmk/chud/i386/chud_osfmk_callback_i386.c +++ b/osfmk/chud/i386/chud_osfmk_callback_i386.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2003-2007 Apple Inc. All rights reserved. + * Copyright (c) 2003-2009 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -37,13 +37,14 @@ #include #include +#include + #include #include #include #include #include -#include #include #include @@ -129,11 +130,6 @@ chudxnu_private_cpu_timer_callback( FALSE) == KERN_SUCCESS) { fn = chud_proc_info->cpu_timer_callback_fn; if (fn) { - KERNEL_DEBUG_CONSTANT( - MACHDBG_CODE(DBG_MACH_CHUD, - CHUD_TIMER_CALLBACK) | DBG_FUNC_NONE, - (uint32_t)fn, 0,0,0,0); - //state.eip, state.cs, 0, 0); (fn)( x86_THREAD_STATE, (thread_state_t)&state, @@ -167,11 +163,6 @@ chudxnu_cpu_timer_callback_enter( timer_call_enter(&(chud_proc_info->cpu_timer_call), chud_proc_info->t_deadline); - KERNEL_DEBUG_CONSTANT( - MACHDBG_CODE(DBG_MACH_CHUD, - CHUD_TIMER_CALLBACK_ENTER) | DBG_FUNC_NONE, - (uint32_t) func, time, units, 0, 0); - ml_set_interrupts_enabled(oldlevel); return KERN_SUCCESS; } @@ -187,11 +178,6 @@ chudxnu_cpu_timer_callback_cancel(void) timer_call_cancel(&(chud_proc_info->cpu_timer_call)); - KERNEL_DEBUG_CONSTANT( - MACHDBG_CODE(DBG_MACH_CHUD, - CHUD_TIMER_CALLBACK_CANCEL) | DBG_FUNC_NONE, - 0, 0, 0, 0, 0); - // set to max value: chud_proc_info->t_deadline |= ~(chud_proc_info->t_deadline); chud_proc_info->cpu_timer_callback_fn = NULL; @@ -217,8 +203,17 @@ chudxnu_cpu_timer_callback_cancel_all(void) return KERN_SUCCESS; } +#if 0 #pragma mark **** trap **** -static chudxnu_trap_callback_func_t trap_callback_fn = NULL; +#endif +static kern_return_t chud_null_trap(uint32_t trapentry, thread_flavor_t flavor, + thread_state_t tstate, mach_msg_type_number_t count); +static chudxnu_trap_callback_func_t trap_callback_fn = chud_null_trap; + +static kern_return_t chud_null_trap(uint32_t trapentry __unused, thread_flavor_t flavor __unused, + thread_state_t tstate __unused, mach_msg_type_number_t count __unused) { + return KERN_FAILURE; +} static kern_return_t chudxnu_private_trap_callback( @@ -279,22 +274,46 @@ chudxnu_private_trap_callback( __private_extern__ kern_return_t chudxnu_trap_callback_enter(chudxnu_trap_callback_func_t func) { - trap_callback_fn = func; - perfTrapHook = chudxnu_private_trap_callback; - return KERN_SUCCESS; + if(OSCompareAndSwapPtr(NULL, chudxnu_private_trap_callback, + (void * volatile *)&perfTrapHook)) { + + chudxnu_trap_callback_func_t old = trap_callback_fn; + while(!OSCompareAndSwapPtr(old, func, + (void * volatile *)&trap_callback_fn)) { + old = trap_callback_fn; + } + return KERN_SUCCESS; + } + return KERN_FAILURE; } __private_extern__ kern_return_t chudxnu_trap_callback_cancel(void) { - trap_callback_fn = NULL; - perfTrapHook = NULL; - return KERN_SUCCESS; + if(OSCompareAndSwapPtr(chudxnu_private_trap_callback, NULL, + (void * volatile *)&perfTrapHook)) { + + chudxnu_trap_callback_func_t old = trap_callback_fn; + while(!OSCompareAndSwapPtr(old, chud_null_trap, + (void * volatile *)&trap_callback_fn)) { + old = trap_callback_fn; + } + return KERN_SUCCESS; + } + return KERN_FAILURE; } +#if 0 #pragma mark **** ast **** -static -chudxnu_perfmon_ast_callback_func_t perfmon_ast_callback_fn = NULL; +#endif +static kern_return_t chud_null_ast(thread_flavor_t flavor, thread_state_t tstate, + mach_msg_type_number_t count); +static chudxnu_perfmon_ast_callback_func_t perfmon_ast_callback_fn = chud_null_ast; + +static kern_return_t chud_null_ast(thread_flavor_t flavor __unused, + thread_state_t tstate __unused, mach_msg_type_number_t count __unused) { + return KERN_FAILURE; +} static kern_return_t chudxnu_private_chud_ast_callback( @@ -333,11 +352,6 @@ chudxnu_private_chud_ast_callback( (thread_state_t) &state, &count, TRUE) == KERN_SUCCESS) { - KERNEL_DEBUG_CONSTANT( - MACHDBG_CODE(DBG_MACH_CHUD, - CHUD_AST_CALLBACK) | DBG_FUNC_NONE, - (uint32_t) fn, 0, 0, 0, 0); - (fn)( x86_THREAD_STATE, (thread_state_t) &state, @@ -352,17 +366,35 @@ chudxnu_private_chud_ast_callback( __private_extern__ kern_return_t chudxnu_perfmon_ast_callback_enter(chudxnu_perfmon_ast_callback_func_t func) { - perfmon_ast_callback_fn = func; - perfASTHook = chudxnu_private_chud_ast_callback; - return KERN_SUCCESS; + if(OSCompareAndSwapPtr(NULL, chudxnu_private_chud_ast_callback, + (void * volatile *)&perfASTHook)) { + chudxnu_perfmon_ast_callback_func_t old = perfmon_ast_callback_fn; + + while(!OSCompareAndSwapPtr(old, func, + (void * volatile *)&perfmon_ast_callback_fn)) { + old = perfmon_ast_callback_fn; + } + + return KERN_SUCCESS; + } + return KERN_FAILURE; } __private_extern__ kern_return_t chudxnu_perfmon_ast_callback_cancel(void) { - perfmon_ast_callback_fn = NULL; - perfASTHook = NULL; - return KERN_SUCCESS; + if(OSCompareAndSwapPtr(chudxnu_private_chud_ast_callback, NULL, + (void * volatile *)&perfASTHook)) { + chudxnu_perfmon_ast_callback_func_t old = perfmon_ast_callback_fn; + + while(!OSCompareAndSwapPtr(old, chud_null_ast, + (void * volatile *)&perfmon_ast_callback_fn)) { + old = perfmon_ast_callback_fn; + } + + return KERN_SUCCESS; + } + return KERN_FAILURE; } __private_extern__ kern_return_t @@ -377,22 +409,21 @@ chudxnu_perfmon_ast_send_urgent(boolean_t urgent) *myast |= (AST_CHUD); } - KERNEL_DEBUG_CONSTANT( - MACHDBG_CODE(DBG_MACH_CHUD, CHUD_AST_SEND) | DBG_FUNC_NONE, - urgent, 0, 0, 0, 0); - ml_set_interrupts_enabled(oldlevel); return KERN_SUCCESS; } -__private_extern__ kern_return_t -chudxnu_perfmon_ast_send(void) -{ - return chudxnu_perfmon_ast_send_urgent(TRUE); -} - +#if 0 #pragma mark **** interrupt **** -static chudxnu_interrupt_callback_func_t interrupt_callback_fn = NULL; +#endif +static kern_return_t chud_null_int(uint32_t trapentry, thread_flavor_t flavor, + thread_state_t tstate, mach_msg_type_number_t count); +static chudxnu_interrupt_callback_func_t interrupt_callback_fn = chud_null_int; + +static kern_return_t chud_null_int(uint32_t trapentry __unused, thread_flavor_t flavor __unused, + thread_state_t tstate __unused, mach_msg_type_number_t count __unused) { + return KERN_FAILURE; +} static void chudxnu_private_interrupt_callback(void *foo) @@ -426,20 +457,32 @@ chudxnu_private_interrupt_callback(void *foo) __private_extern__ kern_return_t chudxnu_interrupt_callback_enter(chudxnu_interrupt_callback_func_t func) { - interrupt_callback_fn = func; - lapic_set_pmi_func((i386_intr_func_t)chudxnu_private_interrupt_callback); - return KERN_SUCCESS; + if(OSCompareAndSwapPtr(chud_null_int, func, + (void * volatile *)&interrupt_callback_fn)) { + lapic_set_pmi_func((i386_intr_func_t)chudxnu_private_interrupt_callback); + + return KERN_SUCCESS; + } + return KERN_FAILURE; } __private_extern__ kern_return_t chudxnu_interrupt_callback_cancel(void) { - interrupt_callback_fn = NULL; + chudxnu_interrupt_callback_func_t old = interrupt_callback_fn; + + while(!OSCompareAndSwapPtr(old, chud_null_int, + (void * volatile *)&interrupt_callback_fn)) { + old = interrupt_callback_fn; + } + lapic_set_pmi_func(NULL); return KERN_SUCCESS; } +#if 0 #pragma mark **** cpu signal **** +#endif static chudxnu_cpusig_callback_func_t cpusig_callback_fn = NULL; static kern_return_t @@ -455,10 +498,6 @@ chudxnu_private_cpu_signal_handler(int request) x86_THREAD_STATE, (thread_state_t) &state, &count, FALSE) == KERN_SUCCESS) { - KERNEL_DEBUG_CONSTANT( - MACHDBG_CODE(DBG_MACH_CHUD, - CHUD_CPUSIG_CALLBACK) | DBG_FUNC_NONE, - (uint32_t)fn, request, 0, 0, 0); return (fn)( request, x86_THREAD_STATE, (thread_state_t) &state, count); @@ -493,14 +532,23 @@ chudxnu_cpu_signal_handler(void) __private_extern__ kern_return_t chudxnu_cpusig_callback_enter(chudxnu_cpusig_callback_func_t func) { - cpusig_callback_fn = func; - return KERN_SUCCESS; + if(OSCompareAndSwapPtr(NULL, func, + (void * volatile *)&cpusig_callback_fn)) { + return KERN_SUCCESS; + } + return KERN_FAILURE; } __private_extern__ kern_return_t chudxnu_cpusig_callback_cancel(void) { - cpusig_callback_fn = NULL; + chudxnu_cpusig_callback_func_t old = cpusig_callback_fn; + + while(!OSCompareAndSwapPtr(old, NULL, + (void * volatile *)&cpusig_callback_fn)) { + old = cpusig_callback_fn; + } + return KERN_SUCCESS; } @@ -531,11 +579,6 @@ chudxnu_cpusig_send(int otherCPU, uint32_t request_code) //request.req_type = CPRQchud; /* set request type */ request.req_code = request_code; /* set request */ - KERNEL_DEBUG_CONSTANT( - MACHDBG_CODE(DBG_MACH_CHUD, - CHUD_CPUSIG_SEND) | DBG_FUNC_NONE, - otherCPU, request_code, 0, 0, 0); - /* * Insert the new request in the target cpu's request queue * and signal target cpu. @@ -562,4 +605,3 @@ chudxnu_cpusig_send(int otherCPU, uint32_t request_code) enable_preemption(); return retval; } - diff --git a/osfmk/chud/i386/chud_thread_i386.c b/osfmk/chud/i386/chud_thread_i386.c index c2b36c794..f5c992fef 100644 --- a/osfmk/chud/i386/chud_thread_i386.c +++ b/osfmk/chud/i386/chud_thread_i386.c @@ -29,6 +29,7 @@ #include #include #include +#include #include #include @@ -40,11 +41,13 @@ #include #include -#include #include #include +#include +#if 0 #pragma mark **** thread state **** +#endif __private_extern__ kern_return_t chudxnu_thread_user_state_available(thread_t thread) @@ -107,7 +110,9 @@ chudxnu_thread_set_state( return machine_thread_set_state(thread, flavor, tstate, count); } +#if 0 #pragma mark **** task memory read/write **** +#endif __private_extern__ kern_return_t chudxnu_task_read( @@ -197,8 +202,8 @@ chudxnu_kern_write( #define VALID_STACK_ADDRESS(supervisor, addr, minKernAddr, maxKernAddr) (supervisor ? (addr>=minKernAddr && addr<=maxKernAddr) : TRUE) // don't try to read in the hole #define VALID_STACK_ADDRESS64(supervisor, addr, minKernAddr, maxKernAddr) \ -(supervisor ? (addr >= minKernAddr && addr <= maxKernAddr) : \ -(addr != 0 && (addr <= 0x00007FFFFFFFFFFFULL || addr >= 0xFFFF800000000000ULL))) +(supervisor ? ((uint64_t)addr >= minKernAddr && (uint64_t)addr <= maxKernAddr) : \ +((uint64_t)addr != 0ULL && ((uint64_t)addr <= 0x00007FFFFFFFFFFFULL || (uint64_t)addr >= 0xFFFF800000000000ULL))) typedef struct _cframe64_t { uint64_t prevFP; // can't use a real pointer here until we're a 64 bit kernel @@ -208,7 +213,7 @@ typedef struct _cframe64_t { typedef struct _cframe_t { - struct _cframe_t *prev; // when we go 64 bits, this needs to be capped at 32 bits + uint32_t prev; // this is really a user32-space pointer to the previous frame uint32_t caller; uint32_t args[0]; } cframe_t; @@ -231,7 +236,7 @@ static kern_return_t do_backtrace32( uint64_t prevPC = 0ULL; uint64_t prevFP = 0ULL; uint64_t kernStackMin = thread->kernel_stack; - uint64_t kernStackMax = kernStackMin + KERNEL_STACK_SIZE; + uint64_t kernStackMax = kernStackMin + kernel_stack_size; mach_msg_type_number_t ct = *start_idx; kern_return_t kr = KERN_FAILURE; @@ -242,7 +247,7 @@ static kern_return_t do_backtrace32( // build a backtrace of this 32 bit state. while(VALID_STACK_ADDRESS(supervisor, currFP, kernStackMin, kernStackMax)) { - cframe_t *fp = (cframe_t *) (uint32_t) currFP; + cframe_t *fp = (cframe_t *) (uintptr_t) currFP; if(!currFP) { currPC = 0; @@ -309,7 +314,7 @@ static kern_return_t do_backtrace64( uint64_t prevPC = 0ULL; uint64_t prevFP = 0ULL; uint64_t kernStackMin = (uint64_t)thread->kernel_stack; - uint64_t kernStackMax = (uint64_t)kernStackMin + KERNEL_STACK_SIZE; + uint64_t kernStackMax = (uint64_t)kernStackMin + kernel_stack_size; mach_msg_type_number_t ct = *start_idx; kern_return_t kr = KERN_FAILURE; @@ -335,7 +340,7 @@ static kern_return_t do_backtrace64( /* read our caller */ if(supervisor) { - kr = KERN_FAILURE; + kr = chudxnu_kern_read(&currPC, (vm_offset_t)caller, sizeof(uint64_t)); } else { kr = chudxnu_task_read(task, &currPC, caller, sizeof(uint64_t)); } @@ -351,7 +356,7 @@ static kern_return_t do_backtrace64( */ prevFP = 0; if(supervisor) { - kr = KERN_FAILURE; + kr = chudxnu_kern_read(&prevFP, (vm_offset_t)currFP, sizeof(uint64_t)); } else { kr = chudxnu_task_read(task, &prevFP, currFP, sizeof(uint64_t)); } @@ -371,6 +376,107 @@ static kern_return_t do_backtrace64( return KERN_SUCCESS; } +static kern_return_t do_kernel_backtrace( + thread_t thread, + struct x86_kernel_state *regs, + uint64_t *frames, + mach_msg_type_number_t *start_idx, + mach_msg_type_number_t max_idx) +{ + uint64_t kernStackMin = (uint64_t)thread->kernel_stack; + uint64_t kernStackMax = (uint64_t)kernStackMin + kernel_stack_size; + mach_msg_type_number_t ct = *start_idx; + kern_return_t kr = KERN_FAILURE; + +#if __LP64__ + uint64_t currPC = 0ULL; + uint64_t currFP = 0ULL; + uint64_t prevPC = 0ULL; + uint64_t prevFP = 0ULL; + if(KERN_SUCCESS != chudxnu_kern_read(&currPC, (vm_offset_t)&(regs->k_rip), sizeof(uint64_t))) { + return KERN_FAILURE; + } + if(KERN_SUCCESS != chudxnu_kern_read(&currFP, (vm_offset_t)&(regs->k_rbp), sizeof(uint64_t))) { + return KERN_FAILURE; + } +#else + uint32_t currPC = 0U; + uint32_t currFP = 0U; + uint32_t prevPC = 0U; + uint32_t prevFP = 0U; + if(KERN_SUCCESS != chudxnu_kern_read(&currPC, (vm_offset_t)&(regs->k_eip), sizeof(uint32_t))) { + return KERN_FAILURE; + } + if(KERN_SUCCESS != chudxnu_kern_read(&currFP, (vm_offset_t)&(regs->k_ebp), sizeof(uint32_t))) { + return KERN_FAILURE; + } +#endif + + if(*start_idx >= max_idx) + return KERN_RESOURCE_SHORTAGE; // no frames traced + + if(!currPC) { + return KERN_FAILURE; + } + + frames[ct++] = (uint64_t)currPC; + + // build a backtrace of this kernel state +#if __LP64__ + while(VALID_STACK_ADDRESS64(TRUE, currFP, kernStackMin, kernStackMax)) { + // this is the address where caller lives in the user thread + uint64_t caller = currFP + sizeof(uint64_t); +#else + while(VALID_STACK_ADDRESS(TRUE, currFP, kernStackMin, kernStackMax)) { + uint32_t caller = (uint32_t)currFP + sizeof(uint32_t); +#endif + + if(!currFP || !currPC) { + currPC = 0; + break; + } + + if(ct >= max_idx) { + *start_idx = ct; + return KERN_RESOURCE_SHORTAGE; + } + + /* read our caller */ + kr = chudxnu_kern_read(&currPC, (vm_offset_t)caller, sizeof(currPC)); + + if(kr != KERN_SUCCESS || !currPC) { + currPC = 0UL; + break; + } + + /* + * retrive contents of the frame pointer and advance to the next stack + * frame if it's valid + */ + prevFP = 0; + kr = chudxnu_kern_read(&prevFP, (vm_offset_t)currFP, sizeof(currPC)); + +#if __LP64__ + if(VALID_STACK_ADDRESS64(TRUE, prevFP, kernStackMin, kernStackMax)) { +#else + if(VALID_STACK_ADDRESS(TRUE, prevFP, kernStackMin, kernStackMax)) { +#endif + frames[ct++] = (uint64_t)currPC; + prevPC = currPC; + } + if(prevFP <= currFP) { + break; + } else { + currFP = prevFP; + } + } + + *start_idx = ct; + return KERN_SUCCESS; +} + + + __private_extern__ kern_return_t chudxnu_thread_get_callstack64( thread_t thread, @@ -380,7 +486,7 @@ kern_return_t chudxnu_thread_get_callstack64( { kern_return_t kr = KERN_FAILURE; task_t task = thread->task; - uint64_t currPC = 0; + uint64_t currPC = 0ULL; boolean_t supervisor = FALSE; mach_msg_type_number_t bufferIndex = 0; mach_msg_type_number_t bufferMaxIndex = *count; @@ -389,6 +495,7 @@ kern_return_t chudxnu_thread_get_callstack64( x86_saved_state32_t *regs32 = NULL; x86_saved_state32_t *u_regs32 = NULL; x86_saved_state64_t *u_regs64 = NULL; + struct x86_kernel_state *kregs = NULL; if(ml_at_interrupt_context()) { @@ -417,7 +524,25 @@ kern_return_t chudxnu_thread_get_callstack64( } } - if(!tagged_regs) { + if(!ml_at_interrupt_context() && kernel_task == task) { + + if(!thread->kernel_stack) { + return KERN_FAILURE; + } + + // Kernel thread not at interrupt context + kregs = (struct x86_kernel_state *)NULL; + + // nofault read of the thread->kernel_stack pointer + if(KERN_SUCCESS != chudxnu_kern_read(&kregs, (vm_offset_t)&(thread->kernel_stack), sizeof(void *))) { + return KERN_FAILURE; + } + + // Adjust to find the saved kernel state + kregs = STACK_IKS((vm_offset_t)(uintptr_t)kregs); + + supervisor = TRUE; + } else if(!tagged_regs) { /* * not at interrupt context, or tracing a different thread than * current_thread() at interrupt context @@ -426,7 +551,7 @@ kern_return_t chudxnu_thread_get_callstack64( if(is_saved_state64(tagged_regs)) { /* 64 bit registers */ regs64 = saved_state64(tagged_regs); - supervisor = ((regs64->isf.cs & SEL_PL) != SEL_PL_U); + supervisor = ((regs64->isf.cs & SEL_PL) != SEL_PL_U); } else { /* 32 bit registers */ regs32 = saved_state32(tagged_regs); @@ -466,7 +591,24 @@ kern_return_t chudxnu_thread_get_callstack64( * 32 bit user land state */ - if(regs64) { + if(kregs) { + /* + * nofault read of the registers from the kernel stack (as they can + * disappear on the fly). + */ + +#if __LP64__ + if(KERN_SUCCESS != chudxnu_kern_read(&currPC, (vm_offset_t)&(kregs->k_rip), sizeof(uint64_t))) { + return KERN_FAILURE; + } +#else + uint32_t tmp; + if(KERN_SUCCESS != chudxnu_kern_read(&tmp, (vm_offset_t)&(kregs->k_eip), sizeof(uint32_t))) { + return KERN_FAILURE; + } + currPC = (uint64_t)tmp; +#endif + } else if(regs64) { currPC = regs64->isf.rip; } else if(regs32) { currPC = (uint64_t) regs32->eip; @@ -489,14 +631,43 @@ kern_return_t chudxnu_thread_get_callstack64( } /* backtrace kernel */ - if(regs64) { + if(kregs) { + addr64_t address = 0ULL; + size_t size = 0UL; + + // do the backtrace + kr = do_kernel_backtrace(thread, kregs, callstack, &bufferIndex, bufferMaxIndex); + + // and do a nofault read of (r|e)sp +#if __LP64__ + uint64_t rsp = 0ULL; + size = sizeof(uint64_t); + + if(KERN_SUCCESS != chudxnu_kern_read(&address, (vm_offset_t)&(kregs->k_rsp), size)) { + address = 0ULL; + } +#else + uint32_t rsp = 0ULL, tmp = 0ULL; + size = sizeof(uint32_t); + + if(KERN_SUCCESS != chudxnu_kern_read(&tmp, (vm_offset_t)&(kregs->k_esp), size)) { + address = 0ULL; + } else { + address = (addr64_t)tmp; + } +#endif + + if(address && KERN_SUCCESS == chudxnu_kern_read(&rsp, (vm_offset_t)address, size) && bufferIndex < bufferMaxIndex) { + callstack[bufferIndex++] = (uint64_t)rsp; + } + } else if(regs64) { uint64_t rsp = 0ULL; // backtrace the 64bit side. kr = do_backtrace64(task, thread, regs64, callstack, &bufferIndex, bufferMaxIndex, TRUE); - if(KERN_SUCCESS == chudxnu_kern_read(&rsp, (addr64_t) regs64->isf.rsp, sizeof(uint64_t)) && + if(KERN_SUCCESS == chudxnu_kern_read(&rsp, (vm_offset_t) regs64->isf.rsp, sizeof(uint64_t)) && bufferIndex < bufferMaxIndex) { callstack[bufferIndex++] = rsp; } @@ -508,7 +679,7 @@ kern_return_t chudxnu_thread_get_callstack64( kr = do_backtrace32(task, thread, regs32, callstack, &bufferIndex, bufferMaxIndex, TRUE); - if(KERN_SUCCESS == chudxnu_kern_read(&esp, (addr64_t) regs32->uesp, sizeof(uint32_t)) && + if(KERN_SUCCESS == chudxnu_kern_read(&esp, (vm_offset_t) regs32->uesp, sizeof(uint32_t)) && bufferIndex < bufferMaxIndex) { callstack[bufferIndex++] = (uint64_t) esp; } @@ -540,119 +711,3 @@ kern_return_t chudxnu_thread_get_callstack64( return kr; } -#pragma mark **** DEPRECATED **** - -// DEPRECATED -__private_extern__ kern_return_t -chudxnu_thread_get_callstack( - thread_t thread, - uint32_t *callStack, - mach_msg_type_number_t *count, - boolean_t user_only) -{ - kern_return_t kr; - task_t task = thread->task; - uint32_t currPC; - uint32_t currFP; - uint32_t prevFP = 0; - uint32_t prevPC = 0; - uint32_t esp = 0; - uint32_t kernStackMin = thread->kernel_stack; - uint32_t kernStackMax = kernStackMin + KERNEL_STACK_SIZE; - uint32_t *buffer = callStack; - int bufferIndex = 0; - int bufferMaxIndex = *count; - boolean_t supervisor; - x86_saved_state32_t *regs = NULL; - - if (user_only) { - /* We can't get user state for kernel threads */ - if (task == kernel_task) { - return KERN_FAILURE; - } - regs = USER_REGS32(thread); - } else { - regs = saved_state32(current_cpu_datap()->cpu_int_state); - } - - if (regs == NULL) { - *count = 0; - return KERN_FAILURE; - } - - supervisor = ((regs->cs & SEL_PL) != SEL_PL_U); - - currPC = regs->eip; - currFP = regs->ebp; - - bufferIndex = 0; - if(!supervisor) - bufferMaxIndex -= 1; // allot space for saving userland %esp on stack - if (bufferMaxIndex < 1) { - *count = 0; - return KERN_RESOURCE_SHORTAGE; - } - buffer[bufferIndex++] = currPC; //save PC in position 0. - - // Now, fill buffer with stack backtraces. - while (VALID_STACK_ADDRESS(supervisor, currFP, kernStackMin, kernStackMax)) { - cframe_t *fp = (cframe_t *) currFP; - - if (bufferIndex >= bufferMaxIndex) { - *count = bufferMaxIndex; - return KERN_RESOURCE_SHORTAGE; - } - - if (supervisor) { - kr = chudxnu_kern_read( - &currPC, - (vm_offset_t) &fp->caller, - sizeof(currPC)); - } else { - kr = chudxnu_task_read( - task, - &currPC, - (vm_offset_t) &fp->caller, - sizeof(currPC)); - } - if (kr != KERN_SUCCESS) - break; - - //retrieve the contents of the frame pointer - // and advance to the prev stack frame if it's valid - prevFP = 0; - if (supervisor) { - kr = chudxnu_kern_read( - &prevFP, - (vm_offset_t) &fp->prev, - sizeof(prevFP)); - } else { - kr = chudxnu_task_read( - task, - &prevFP, - (vm_offset_t) &fp->prev, - sizeof(prevFP)); - } - if (prevFP) { - buffer[bufferIndex++] = currPC; - prevPC = currPC; - } - if (prevFP < currFP) { - break; - } else { - currFP = prevFP; - } - } - - // put the stack pointer on the bottom of the backtrace - if(!supervisor) { - kr = chudxnu_task_read(task, &esp, regs->uesp, sizeof(uint32_t)); - if(kr == KERN_SUCCESS) { - buffer[bufferIndex++] = esp; - } - } - - *count = bufferIndex; - return KERN_SUCCESS; -} - diff --git a/osfmk/chud/i386/chud_xnu_glue.h b/osfmk/chud/i386/chud_xnu_glue.h index 26f1a70ce..7145052d0 100644 --- a/osfmk/chud/i386/chud_xnu_glue.h +++ b/osfmk/chud/i386/chud_xnu_glue.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2003-2007 Apple Inc. All rights reserved. + * Copyright (c) 2003-2008 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * diff --git a/osfmk/chud/i386/chud_xnu_private.h b/osfmk/chud/i386/chud_xnu_private.h index 4377af6ef..4d0a30383 100644 --- a/osfmk/chud/i386/chud_xnu_private.h +++ b/osfmk/chud/i386/chud_xnu_private.h @@ -31,7 +31,9 @@ #include +#if 0 #pragma mark **** cpu timer **** +#endif /* * Cross-cpu signal request entries are queued on the target cpu's diff --git a/osfmk/chud/ppc/chud_cpu_ppc.c b/osfmk/chud/ppc/chud_cpu_ppc.c index b7e53cd3e..60f279c3f 100644 --- a/osfmk/chud/ppc/chud_cpu_ppc.c +++ b/osfmk/chud/ppc/chud_cpu_ppc.c @@ -57,7 +57,9 @@ #define mtsr(sr, reg) __asm__ volatile("sync" "@" "mtsr sr%0, %1 " "@" "isync" : : "i" (sr), "r" (reg)); #define mfsr(reg, sr) __asm__ volatile("mfsr %0, sr%1" : "=r" (reg) : "i" (sr)); +#if 0 #pragma mark **** cpu enable/disable **** +#endif extern kern_return_t processor_start(processor_t processor); // osfmk/kern/processor.c extern kern_return_t processor_exit(processor_t processor); // osfmk/kern/processor.c @@ -84,7 +86,9 @@ kern_return_t chudxnu_enable_cpu(int cpu, boolean_t enable) return KERN_FAILURE; } +#if 0 #pragma mark **** nap **** +#endif __private_extern__ kern_return_t chudxnu_enable_cpu_nap(int cpu, boolean_t enable) @@ -116,7 +120,9 @@ boolean_t chudxnu_cpu_nap_enabled(int cpu) return prev; } +#if 0 #pragma mark **** shadowed spr **** +#endif __private_extern__ kern_return_t chudxnu_set_shadowed_spr(int cpu, int spr, uint32_t val) @@ -358,7 +364,9 @@ uint32_t chudxnu_get_orig_cpu_l3cr(int cpu) return PerProcTable[cpu].ppe_vaddr->pf.l3crOriginal; } +#if 0 #pragma mark **** spr **** +#endif __private_extern__ kern_return_t chudxnu_read_spr(int cpu, int spr, uint32_t *val_p) @@ -1077,7 +1085,9 @@ kern_return_t chudxnu_write_spr64(int cpu, int spr, uint64_t val) return retval; } +#if 0 #pragma mark **** perfmon facility **** +#endif __private_extern__ kern_return_t chudxnu_perfmon_acquire_facility(task_t task) @@ -1091,10 +1101,12 @@ kern_return_t chudxnu_perfmon_release_facility(task_t task) return perfmon_release_facility(task); } +#if 0 #pragma mark **** rupt counters **** +#endif __private_extern__ -kern_return_t chudxnu_get_cpu_interrupt_counters(int cpu, rupt_counters_t *rupts) +kern_return_t chudxnu_get_cpu_interrupt_counters(int cpu, interrupt_counters_t *rupts) { if(cpu<0 || cpu>=chudxnu_phys_cpu_count()) { // check sanity of cpu argument return KERN_FAILURE; @@ -1146,45 +1158,9 @@ kern_return_t chudxnu_clear_cpu_interrupt_counters(int cpu) return KERN_SUCCESS; } -#pragma mark **** alignment exceptions **** - -__private_extern__ -kern_return_t chudxnu_passup_alignment_exceptions(boolean_t enable) -{ - if(enable) { - dgWork.dgFlags |= enaNotifyEM; - } else { - dgWork.dgFlags &= ~enaNotifyEM; - } - return KERN_SUCCESS; -} - -#pragma mark **** scom **** -kern_return_t chudxnu_scom_read(uint32_t reg, uint64_t *data) -{ - ml_scom_read(reg, data); - return KERN_SUCCESS; -} - -kern_return_t chudxnu_scom_write(uint32_t reg, uint64_t data) -{ - ml_scom_write(reg, data); - return KERN_SUCCESS; -} - +#if 0 #pragma mark *** deprecated *** - -//DEPRECATED -__private_extern__ kern_return_t -chudxnu_get_cpu_rupt_counters(int cpu, rupt_counters_t *rupts) { - return chudxnu_get_cpu_interrupt_counters(cpu, rupts); -} - -//DEPRECATED -__private_extern__ kern_return_t -chudxnu_clear_cpu_rupt_counters(int cpu) { - return chudxnu_clear_cpu_interrupt_counters(cpu); -} +#endif //DEPRECATED __private_extern__ diff --git a/osfmk/chud/ppc/chud_osfmk_callback_ppc.c b/osfmk/chud/ppc/chud_osfmk_callback_ppc.c index 44c893c47..3077f07c3 100644 --- a/osfmk/chud/ppc/chud_osfmk_callback_ppc.c +++ b/osfmk/chud/ppc/chud_osfmk_callback_ppc.c @@ -36,6 +36,8 @@ #include #include +#include + #include #include #include @@ -167,8 +169,18 @@ kern_return_t chudxnu_cpu_timer_callback_cancel_all(void) return KERN_SUCCESS; } +#if 0 #pragma mark **** trap **** -static chudxnu_trap_callback_func_t trap_callback_fn = NULL; +#endif +static kern_return_t chud_null_trap(uint32_t trapentry, thread_flavor_t flavor, + thread_state_t tstate, mach_msg_type_number_t count); +static chudxnu_trap_callback_func_t trap_callback_fn = chud_null_trap; + +static kern_return_t chud_null_trap(uint32_t trapentry __unused, thread_flavor_t flavor __unused, + thread_state_t tstate __unused, mach_msg_type_number_t count __unused) { + return KERN_FAILURE; +} + #define TRAP_ENTRY_POINT(t) ((t==T_RESET) ? 0x100 : \ (t==T_MACHINE_CHECK) ? 0x200 : \ @@ -224,28 +236,52 @@ chudxnu_private_trap_callback(int trapno, struct savearea *ssp, return retval; } -__private_extern__ -kern_return_t chudxnu_trap_callback_enter(chudxnu_trap_callback_func_t func) +__private_extern__ kern_return_t +chudxnu_trap_callback_enter(chudxnu_trap_callback_func_t func) { - trap_callback_fn = func; - perfTrapHook = chudxnu_private_trap_callback; - __asm__ volatile("eieio"); /* force order */ - __asm__ volatile("sync"); /* force to memory */ - return KERN_SUCCESS; + if(OSCompareAndSwapPtr(NULL, chudxnu_private_trap_callback, + (void * volatile *)&perfTrapHook)) { + + chudxnu_trap_callback_func_t old = trap_callback_fn; + while(!OSCompareAndSwapPtr(old, func, + (void * volatile *)&trap_callback_fn)) { + old = trap_callback_fn; + } + + return KERN_SUCCESS; + } + return KERN_FAILURE; } -__private_extern__ -kern_return_t chudxnu_trap_callback_cancel(void) +__private_extern__ kern_return_t +chudxnu_trap_callback_cancel(void) { - trap_callback_fn = NULL; - perfTrapHook = NULL; - __asm__ volatile("eieio"); /* force order */ - __asm__ volatile("sync"); /* force to memory */ - return KERN_SUCCESS; + if(OSCompareAndSwapPtr(chudxnu_private_trap_callback, NULL, + (void * volatile *)&perfTrapHook)) { + + chudxnu_trap_callback_func_t old = trap_callback_fn; + while(!OSCompareAndSwapPtr(old, chud_null_trap, + (void * volatile *)&trap_callback_fn)) { + old = trap_callback_fn; + } + + return KERN_SUCCESS; + } + return KERN_FAILURE; } +#if 0 #pragma mark **** ast **** -static chudxnu_perfmon_ast_callback_func_t perfmon_ast_callback_fn = NULL; +#endif +static kern_return_t chud_null_ast(thread_flavor_t flavor, thread_state_t tstate, + mach_msg_type_number_t count); +static chudxnu_perfmon_ast_callback_func_t perfmon_ast_callback_fn = chud_null_ast; + +static kern_return_t chud_null_ast(thread_flavor_t flavor __unused, + thread_state_t tstate __unused, mach_msg_type_number_t count __unused) { + return KERN_FAILURE; +} + static kern_return_t chudxnu_private_chud_ast_callback(__unused int trapno, @@ -301,24 +337,38 @@ chudxnu_private_chud_ast_callback(__unused int trapno, return retval; } -__private_extern__ -kern_return_t chudxnu_perfmon_ast_callback_enter(chudxnu_perfmon_ast_callback_func_t func) +__private_extern__ kern_return_t +chudxnu_perfmon_ast_callback_enter(chudxnu_perfmon_ast_callback_func_t func) { - perfmon_ast_callback_fn = func; - perfASTHook = chudxnu_private_chud_ast_callback; - __asm__ volatile("eieio"); /* force order */ - __asm__ volatile("sync"); /* force to memory */ - return KERN_SUCCESS; + if(OSCompareAndSwapPtr(NULL, chudxnu_private_chud_ast_callback, + (void * volatile *)&perfASTHook)) { + chudxnu_perfmon_ast_callback_func_t old = perfmon_ast_callback_fn; + + while(!OSCompareAndSwapPtr(old, func, + (void * volatile *)&perfmon_ast_callback_fn)) { + old = perfmon_ast_callback_fn; + } + + return KERN_SUCCESS; + } + return KERN_FAILURE; } -__private_extern__ -kern_return_t chudxnu_perfmon_ast_callback_cancel(void) +__private_extern__ kern_return_t +chudxnu_perfmon_ast_callback_cancel(void) { - perfmon_ast_callback_fn = NULL; - perfASTHook = NULL; - __asm__ volatile("eieio"); /* force order */ - __asm__ volatile("sync"); /* force to memory */ - return KERN_SUCCESS; + if(OSCompareAndSwapPtr(chudxnu_private_chud_ast_callback, NULL, + (void * volatile *)&perfASTHook)) { + chudxnu_perfmon_ast_callback_func_t old = perfmon_ast_callback_fn; + + while(!OSCompareAndSwapPtr(old, chud_null_ast, + (void * volatile *)&perfmon_ast_callback_fn)) { + old = perfmon_ast_callback_fn; + } + + return KERN_SUCCESS; + } + return KERN_FAILURE; } __private_extern__ @@ -337,15 +387,18 @@ kern_return_t chudxnu_perfmon_ast_send_urgent(boolean_t urgent) return KERN_SUCCESS; } -__private_extern__ -kern_return_t chudxnu_perfmon_ast_send(void) -{ - return chudxnu_perfmon_ast_send_urgent(TRUE); +#if 0 +#pragma mark **** interrupt **** +#endif +static kern_return_t chud_null_int(uint32_t trapentry, thread_flavor_t flavor, + thread_state_t tstate, mach_msg_type_number_t count); +static chudxnu_interrupt_callback_func_t interrupt_callback_fn = chud_null_int; + +static kern_return_t chud_null_int(uint32_t trapentry __unused, thread_flavor_t flavor __unused, + thread_state_t tstate __unused, mach_msg_type_number_t count __unused) { + return KERN_FAILURE; } -#pragma mark **** interrupt **** -static chudxnu_interrupt_callback_func_t interrupt_callback_fn = NULL; -//extern perfCallback perfIntHook; /* function hook into interrupt() */ static kern_return_t chudxnu_private_interrupt_callback(int trapno, struct savearea *ssp, @@ -367,24 +420,40 @@ chudxnu_private_interrupt_callback(int trapno, struct savearea *ssp, __private_extern__ kern_return_t chudxnu_interrupt_callback_enter(chudxnu_interrupt_callback_func_t func) { - interrupt_callback_fn = func; - perfIntHook = chudxnu_private_interrupt_callback; - __asm__ volatile("eieio"); /* force order */ - __asm__ volatile("sync"); /* force to memory */ - return KERN_SUCCESS; + if(OSCompareAndSwapPtr(NULL, chudxnu_private_interrupt_callback, + (void * volatile *)&perfIntHook)) { + chudxnu_interrupt_callback_func_t old = interrupt_callback_fn; + + while(!OSCompareAndSwapPtr(old, func, + (void * volatile *)&interrupt_callback_fn)) { + old = interrupt_callback_fn; + } + + return KERN_SUCCESS; + } + return KERN_FAILURE; } __private_extern__ kern_return_t chudxnu_interrupt_callback_cancel(void) { - interrupt_callback_fn = NULL; - perfIntHook = NULL; - __asm__ volatile("eieio"); /* force order */ - __asm__ volatile("sync"); /* force to memory */ - return KERN_SUCCESS; + if(OSCompareAndSwapPtr(chudxnu_private_interrupt_callback, NULL, + (void * volatile *)&perfIntHook)) { + chudxnu_interrupt_callback_func_t old = interrupt_callback_fn; + + while(!OSCompareAndSwapPtr(old, chud_null_int, + (void * volatile *)&interrupt_callback_fn)) { + old = interrupt_callback_fn; + } + + return KERN_SUCCESS; + } + return KERN_FAILURE; } +#if 0 #pragma mark **** cpu signal **** +#endif static chudxnu_cpusig_callback_func_t cpusig_callback_fn = NULL; extern perfCallback perfCpuSigHook; /* function hook into cpu_signal_handler() */ @@ -407,21 +476,35 @@ chudxnu_private_cpu_signal_handler(int request, struct savearea *ssp, __private_extern__ kern_return_t chudxnu_cpusig_callback_enter(chudxnu_cpusig_callback_func_t func) { - cpusig_callback_fn = func; - perfCpuSigHook = chudxnu_private_cpu_signal_handler; - __asm__ volatile("eieio"); /* force order */ - __asm__ volatile("sync"); /* force to memory */ - return KERN_SUCCESS; + if(OSCompareAndSwapPtr(NULL, chudxnu_private_cpu_signal_handler, + (void * volatile *)&perfCpuSigHook)) { + chudxnu_cpusig_callback_func_t old = cpusig_callback_fn; + + while(!OSCompareAndSwapPtr(old, func, + (void * volatile *)&cpusig_callback_fn)) { + old = cpusig_callback_fn; + } + + return KERN_SUCCESS; + } + return KERN_FAILURE; } __private_extern__ kern_return_t chudxnu_cpusig_callback_cancel(void) { - cpusig_callback_fn = NULL; - perfCpuSigHook = NULL; - __asm__ volatile("eieio"); /* force order */ - __asm__ volatile("sync"); /* force to memory */ - return KERN_SUCCESS; + if(OSCompareAndSwapPtr(chudxnu_private_cpu_signal_handler, NULL, + (void * volatile *)&perfCpuSigHook)) { + chudxnu_cpusig_callback_func_t old = cpusig_callback_fn; + + while(!OSCompareAndSwapPtr(old, NULL, + (void * volatile *)&cpusig_callback_fn)) { + old = cpusig_callback_fn; + } + + return KERN_SUCCESS; + } + return KERN_FAILURE; } __private_extern__ @@ -463,3 +546,4 @@ kern_return_t chudxnu_cpusig_send(int otherCPU, uint32_t request) ml_set_interrupts_enabled(oldlevel); return retval; } + diff --git a/osfmk/chud/ppc/chud_thread_ppc.c b/osfmk/chud/ppc/chud_thread_ppc.c index 36fdc1e19..0bca0ac92 100644 --- a/osfmk/chud/ppc/chud_thread_ppc.c +++ b/osfmk/chud/ppc/chud_thread_ppc.c @@ -46,7 +46,9 @@ #include #include +#if 0 #pragma mark **** thread state **** +#endif __private_extern__ kern_return_t chudxnu_copy_savearea_to_threadstate(thread_flavor_t flavor, thread_state_t tstate, mach_msg_type_number_t *count, struct savearea *sv) @@ -327,7 +329,9 @@ kern_return_t chudxnu_thread_set_state(thread_t thread, } } +#if 0 #pragma mark **** task memory read/write **** +#endif __private_extern__ kern_return_t chudxnu_task_read(task_t task, void *kernaddr, uint64_t usraddr, vm_size_t size) @@ -451,7 +455,7 @@ kern_return_t chudxnu_thread_get_callstack64( thread_t thread, uint64_t framePointer; uint64_t prevPC = 0; uint64_t kernStackMin = thread->kernel_stack; - uint64_t kernStackMax = kernStackMin + KERNEL_STACK_SIZE; + uint64_t kernStackMax = kernStackMin + kernel_stack_size; uint64_t *buffer = callStack; uint32_t tmpWord; int bufferIndex = 0; @@ -580,148 +584,3 @@ kern_return_t chudxnu_thread_get_callstack64( thread_t thread, return KERN_SUCCESS; } -#pragma mark **** DEPRECATED **** - -// DEPRECATED -__private_extern__ -kern_return_t chudxnu_thread_get_callstack( thread_t thread, - uint32_t *callStack, - mach_msg_type_number_t *count, - boolean_t user_only) -{ - kern_return_t kr; - task_t task = get_threadtask(thread); - uint64_t nextFramePointer = 0; - uint64_t currPC, currLR, currR0; - uint64_t framePointer; - uint64_t prevPC = 0; - uint64_t kernStackMin = thread->kernel_stack; - uint64_t kernStackMax = kernStackMin + KERNEL_STACK_SIZE; - uint32_t *buffer = callStack; - uint32_t tmpWord; - int bufferIndex = 0; - int bufferMaxIndex = *count; - boolean_t supervisor; - boolean_t is64Bit; - struct savearea *sv; - - if(user_only) { - sv = find_user_regs(thread); - } else { - sv = find_kern_regs(thread); - } - - if(!sv) { - *count = 0; - return KERN_FAILURE; - } - - supervisor = SUPERVISOR_MODE(sv->save_srr1); - if(supervisor) { - is64Bit = FALSE; /* XXX assuming kernel task is always 32-bit */ - } else { - is64Bit = chudxnu_is_64bit_task(task); - } - - bufferMaxIndex = bufferMaxIndex - 2; // allot space for saving the LR and R0 on the stack at the end. - if(bufferMaxIndex<2) { - *count = 0; - return KERN_RESOURCE_SHORTAGE; - } - - currPC = sv->save_srr0; - framePointer = sv->save_r1; /* r1 is the stack pointer (no FP on PPC) */ - currLR = sv->save_lr; - currR0 = sv->save_r0; - - bufferIndex = 0; // start with a stack of size zero - buffer[bufferIndex++] = currPC; // save PC in position 0. - - // Now, fill buffer with stack backtraces. - while(bufferIndex SP - // Here, we'll get the lr from the stack. - uint64_t fp_link; - - if(is64Bit) { - fp_link = framePointer + FP_LINK_OFFSET*sizeof(uint64_t); - } else { - fp_link = framePointer + FP_LINK_OFFSET*sizeof(uint32_t); - } - - // Note that we read the pc even for the first stack frame (which, in theory, - // is always empty because the callee fills it in just before it lowers the - // stack. However, if we catch the program in between filling in the return - // address and lowering the stack, we want to still have a valid backtrace. - // FixupStack correctly disregards this value if necessary. - - if(supervisor) { - if(is64Bit) { - kr = chudxnu_kern_read(&pc, fp_link, sizeof(uint64_t)); - } else { - kr = chudxnu_kern_read(&tmpWord, fp_link, sizeof(uint32_t)); - pc = tmpWord; - } - } else { - if(is64Bit) { - kr = chudxnu_task_read(task, &pc, fp_link, sizeof(uint64_t)); - } else { - kr = chudxnu_task_read(task, &tmpWord, fp_link, sizeof(uint32_t)); - pc = tmpWord; - } - } - if(kr!=KERN_SUCCESS) { - pc = 0; - break; - } - - // retrieve the contents of the frame pointer and advance to the next stack frame if it's valid - if(supervisor) { - if(is64Bit) { - kr = chudxnu_kern_read(&nextFramePointer, framePointer, sizeof(uint64_t)); - } else { - kr = chudxnu_kern_read(&tmpWord, framePointer, sizeof(uint32_t)); - nextFramePointer = tmpWord; - } - } else { - if(is64Bit) { - kr = chudxnu_task_read(task, &nextFramePointer, framePointer, sizeof(uint64_t)); - } else { - kr = chudxnu_task_read(task, &tmpWord, framePointer, sizeof(uint32_t)); - nextFramePointer = tmpWord; - } - } - if(kr!=KERN_SUCCESS) { - nextFramePointer = 0; - } - - if(nextFramePointer) { - buffer[bufferIndex++] = pc; - prevPC = pc; - } - - if(nextFramePointer=bufferMaxIndex) { - *count = 0; - return KERN_RESOURCE_SHORTAGE; - } - - // Save link register and R0 at bottom of stack (used for later fixup). - buffer[bufferIndex++] = currLR; - buffer[bufferIndex++] = currR0; - - *count = bufferIndex; - return KERN_SUCCESS; -} - diff --git a/osfmk/chud/ppc/chud_xnu_glue.h b/osfmk/chud/ppc/chud_xnu_glue.h index 26f1a70ce..7145052d0 100644 --- a/osfmk/chud/ppc/chud_xnu_glue.h +++ b/osfmk/chud/ppc/chud_xnu_glue.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2003-2007 Apple Inc. All rights reserved. + * Copyright (c) 2003-2008 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * diff --git a/osfmk/chud/ppc/chud_xnu_private.h b/osfmk/chud/ppc/chud_xnu_private.h index e3ffaf075..72b2ed663 100644 --- a/osfmk/chud/ppc/chud_xnu_private.h +++ b/osfmk/chud/ppc/chud_xnu_private.h @@ -29,7 +29,9 @@ #ifndef _PPC_CHUD_XNU_PRIVATE_H_ #define _PPC_CHUD_XNU_PRIVATE_H_ +#if 0 #pragma mark **** thread **** +#endif // ***************************************************************************** // thread // ***************************************************************************** @@ -45,7 +47,9 @@ extern kern_return_t chudxnu_copy_threadstate_to_savearea( thread_state_t tstate, mach_msg_type_number_t *count); +#if 0 #pragma mark **** cpu timer **** +#endif typedef struct { timer_call_data_t cpu_timer_call; uint64_t t_deadline; diff --git a/osfmk/conf/MASTER b/osfmk/conf/MASTER index 08c28268c..76e39eb65 100644 --- a/osfmk/conf/MASTER +++ b/osfmk/conf/MASTER @@ -141,7 +141,6 @@ options CONFIG_DTRACE # # # options MACH_COUNTERS # # -options UPL_DEBUG # # ########################################################## # @@ -188,6 +187,10 @@ options CRYPTO # # options HIBERNATION # # +# CONFIG_SLEEP - include sleep power state code +# +options CONFIG_SLEEP # # + # # configurable kernel related resources (CONFIG_THREAD_MAX needs to stay in # sync with bsd/conf/MASTER until we fix the config system... todo XXX @@ -227,9 +230,6 @@ options CONFIG_EMBEDDED # # options CONFIG_ENFORCE_SIGNED_CODE # -# jettison_kernel_linker - jettison kernel linker after kernel init; don't wait for kextd to launch -options CONFIG_JETTISON_KERNEL_LINKER # - # vc_progress_white - make the progress gear white instead of black options CONFIG_VC_PROGRESS_WHITE # @@ -241,3 +241,8 @@ options SECURE_KERNEL # # must be set in all the bsd/conf and osfmk/conf MASTER files # options CONFIG_CODE_DECRYPTION # + +# +# Context switched counters +# +options CONFIG_COUNTERS # diff --git a/osfmk/conf/MASTER.i386 b/osfmk/conf/MASTER.i386 index 07289a808..b8cd08e05 100644 --- a/osfmk/conf/MASTER.i386 +++ b/osfmk/conf/MASTER.i386 @@ -9,14 +9,14 @@ # Standard Apple MacOS X Configurations: # -------- ---- -------- --------------- # -# RELEASE = [ medium intel pc iokit mach_pe mach mach_kdp config_serial_kdp event vol hd pst gdb fixpri simple_clock mkernserv uxpr kernstack ipc_compat ipc_debug fb mk30 mk30_i386 hibernation crypto config_dtrace] -# DEBUG_KDP = [ RELEASE osf_debug debug ] -# DEBUG= [ RELEASE osf_debug debug mach_kdb mach_assert ] +# RELEASE = [ medium intel pc iokit mach_pe mach mach_kdp config_serial_kdp event vol hd pst gdb fixpri simple_clock mkernserv uxpr kernstack ipc_compat ipc_debug fb mk30 mk30_i386 hibernation config_sleep crypto config_dtrace config_mca config_vmx config_counters ] +# DEBUG= [ RELEASE osf_debug debug mach_kdb mach_assert] # PROFILE = [ RELEASE profile ] # -# EMBEDDED_BASE = [ bsmall intel pc iokit mach_pe mach mach_kdp serial_kdp event vol hd pst gdb fixpri simple_clock mkernserv uxpr kernstack ipc_compat ipc_debug fb mk30 mk30_i386 hibernation crypto ] +# +# EMBEDDED_BASE = [ bsmall intel pc iokit mach_pe mach mach_kdp config_serial_kdp event vol hd pst gdb fixpri simple_clock mkernserv uxpr kernstack ipc_compat ipc_debug fb mk30 mk30_i386 hibernation config_sleep crypto ] # EMBEDDED = [ EMBEDDED_BASE no_printf_str no_kprintf_str no_kdebug ] -# DEVELOPMENT = [ EMBEDDED_BASE mach_assert config_dtrace ] +# DEVELOPMENT = [ EMBEDDED_BASE mach_assert config_dtrace config_counters ] # ###################################################################### # @@ -60,14 +60,20 @@ options X86_64 options DISPATCH_COUNTS # -# Note: MAC options must be set in all the bsd/conf, osfmk/conf, and +# Note: MAC/AUDIT options must be set in all the bsd/conf, osfmk/conf, and # security/conf MASTER files. # options CONFIG_MACF # Mandatory Access Control Framework #options CONFIG_MACF_MACH # MACF applied to Mach services +options CONFIG_AUDIT # Kernel auditing # # code decryption... used on i386 for DSMOS # must be set in all the bsd/conf and osfmk/conf MASTER files # options CONFIG_CODE_DECRYPTION + +options CONFIG_MCA # Machine Check Architecture # +options CONFIG_VMX # Virtual Machine Extensions # + +options NO_NESTED_PMAP # diff --git a/osfmk/conf/MASTER.ppc b/osfmk/conf/MASTER.ppc index 09dfbf8ee..98036b366 100644 --- a/osfmk/conf/MASTER.ppc +++ b/osfmk/conf/MASTER.ppc @@ -9,7 +9,7 @@ # Standard Apple MacOS X Configurations: # -------- ---- -------- --------------- # -# RELEASE = [ medium mach_bsd mach_kdp iokit mach_pe ppc mach hibernation crypto config_dtrace ] +# RELEASE = [ medium mach_bsd mach_kdp iokit mach_pe ppc mach hibernation crypto config_dtrace config_counters ] # DEVELOPMENT = [ RELEASE ] # RELEASE_TRACE = [ RELEASE kdebug ] # DEBUG = [ RELEASE mach_kdb debug mach_assert ] @@ -59,8 +59,9 @@ options POWERMAC options DISPATCH_COUNTS # -# Note: MAC options must be set in all the bsd/conf, osfmk/conf, and +# Note: MAC/AUDIT options must be set in all the bsd/conf, osfmk/conf, and # security/conf MASTER files. # options CONFIG_MACF # Mandatory Access Control Framework #options CONFIG_MACF_MACH # MACF applied to Mach services +options CONFIG_AUDIT # Kernel auditing diff --git a/osfmk/conf/MASTER.x86_64 b/osfmk/conf/MASTER.x86_64 new file mode 100644 index 000000000..a3f336c06 --- /dev/null +++ b/osfmk/conf/MASTER.x86_64 @@ -0,0 +1,79 @@ +# +# Mach Operating System +# Copyright (c) 1986 Carnegie-Mellon University +# All rights reserved. The CMU software License Agreement +# specifies the terms and conditions for use and redistribution. +# +###################################################################### +# +# Standard Apple MacOS X Configurations: +# -------- ---- -------- --------------- +# +# RELEASE = [ medium intel pc iokit mach_pe mach mach_kdp config_serial_kdp event vol hd pst gdb fixpri simple_clock mkernserv uxpr kernstack ipc_compat ipc_debug fb mk30 mk30_i386 hibernation config_sleep crypto config_dtrace config_mca config_vmx config_counters ] +# DEBUG= [ RELEASE osf_debug debug mach_assert ] +# PROFILE = [ RELEASE profile ] +# +# +# EMBEDDED_BASE = [ bsmall intel pc iokit mach_pe mach mach_kdp config_serial_kdp event vol hd pst gdb fixpri simple_clock mkernserv uxpr kernstack ipc_compat ipc_debug fb mk30 mk30_i386 hibernation config_sleep crypto ] +# EMBEDDED = [ EMBEDDED_BASE no_printf_str no_kprintf_str no_kdebug ] +# DEVELOPMENT = [ EMBEDDED_BASE mach_assert config_counters ] +# +###################################################################### +# +machine "x86_64" # +cpu "x86_64" # + +pseudo-device com 2 +pseudo-device vc 1 + +# choices for platform_bus are pci at386 sqt and kkt +makeoptions OSFMK_MACHINE = "x86_64" # +makeoptions CCONFIGFLAGS = "-g -O -fno-omit-frame-pointer" # +makeoptions CCONFIGFLAGS = "-O3" # +makeoptions RELOC = "00100000" # +makeoptions SYMADDR = "00780000" # + +options GDB # GNU kernel debugger # +options DEBUG # general debugging code # +options SHOW_SPACE # print size of structures # +options EVENTMETER # event meter support # +options FP_EMUL # floating point emulation # +options PC_SUPPORT # virtual PC support # +options PROFILE # kernel profiling # +options UXPR # user-level XPR package # +config mach_kernel swap generic # + +options GPROF # kgmon profiling # + +options EVENT # + +options MACH_BSD +options IOKIT # # +options MACH_PE # # + +options MACH_KDP # KDP # +options CONFIG_SERIAL_KDP # KDP over serial # +options PAE +options X86_64 +options DISPATCH_COUNTS + +# +# Note: MAC/AUDIT options must be set in all the bsd/conf, osfmk/conf, and +# security/conf MASTER files. +# +options CONFIG_MACF # Mandatory Access Control Framework +#options CONFIG_MACF_MACH # MACF applied to Mach services +options CONFIG_AUDIT # Kernel auditing + +# +# code decryption... used on i386 for DSMOS +# must be set in all the bsd/conf and osfmk/conf MASTER files +# +options CONFIG_CODE_DECRYPTION + +options CONFIG_MCA # Machine Check Architecture # +options CONFIG_VMX # Virtual Machine Extensions # + +options NO_NESTED_PMAP # +options CONFIG_NO_NESTED_PMAP # +options CONFIG_NESTED_PMAP # diff --git a/osfmk/conf/Makefile b/osfmk/conf/Makefile index cc38548ba..4010dbcba 100644 --- a/osfmk/conf/Makefile +++ b/osfmk/conf/Makefile @@ -34,7 +34,6 @@ $(COMPOBJROOT)/$(OSFMK_KERNEL_CONFIG)/Makefile: $(SOURCE)/MASTER \ $(SOURCE)/files.$(ARCH_CONFIG_LC) \ $(COMPOBJROOT)/doconf $(_v)(doconf_target=$(addsuffix /conf, $(TARGET)); \ - echo $${doconf_target};\ $(MKDIR) $${doconf_target}; \ cd $${doconf_target}; \ rm -f $(notdir $?); \ diff --git a/osfmk/conf/Makefile.i386 b/osfmk/conf/Makefile.i386 index c3c05a7d7..387d4aafb 100644 --- a/osfmk/conf/Makefile.i386 +++ b/osfmk/conf/Makefile.i386 @@ -6,13 +6,12 @@ CFLAGS+= -DAT386=1 SFLAGS+= -DAT386=1 # Enable -Werror for i386 builds -CFLAGS+= $(WERROR) +CFLAGS+= $(WERROR) -Wshorten-64-to-32 CWARNFLAGS= $(filter-out -Wbad-function-cast, $(CWARNFLAGS_STD)) # Objects that don't compile cleanly: OBJS_NO_WERROR= \ UNDRequest.o \ - db_examine.o \ db_macro.o \ db_print.o \ db_sym.o \ @@ -20,9 +19,7 @@ OBJS_NO_WERROR= \ db_disasm.o \ db_interface.o \ db_trace.o \ - loose_ends.o \ - gssd_mach.o \ - mp.o + gssd_mach.o OBJS_WERROR=$(filter-out $(OBJS_NO_WERROR),$(OBJS)) @@ -30,9 +27,8 @@ $(OBJS_WERROR): WERROR=-Werror # Files that must go in the __HIB segment: UNCONFIGURED_HIB_FILES= \ - hibernate_restore.o \ - gdt.o \ - idt.o + hibernate_restore.o + HIB_FILES=$(filter $(UNCONFIGURED_HIB_FILES),$(OBJS)) ###################################################################### diff --git a/osfmk/conf/Makefile.ppc b/osfmk/conf/Makefile.ppc index d63b86c89..35d7f0dd2 100644 --- a/osfmk/conf/Makefile.ppc +++ b/osfmk/conf/Makefile.ppc @@ -28,12 +28,35 @@ OBJS_WERROR=$(filter-out $(OBJS_NO_WERROR),$(OBJS)) $(OBJS_WERROR): WERROR=-Werror +export bsd_vm.o_CFLAGS_ADD=-Werror -Wshorten-64-to-32 +export device_vm.o_CFLAGS_ADD=-Werror -Wshorten-64-to-32 +export memory_object.o_CFLAGS_ADD=-Werror -Wshorten-64-to-32 +export vm32_user.o_CFLAGS_ADD=-Werror -Wshorten-64-to-32 +export vm_apple_protect.o_CFLAGS_ADD=-Werror -Wshorten-64-to-32 +export vm_debug.o_CFLAGS_ADD=-Werror -Wshorten-64-to-32 +export vm_external.o_CFLAGS_ADD=-Werror -Wshorten-64-to-32 +export vm_fault.o_CFLAGS_ADD=-Werror -Wshorten-64-to-32 +export vm_init.o_CFLAGS_ADD=-Werror -Wshorten-64-to-32 +export vm_kern.o_CFLAGS_ADD=-Werror -Wshorten-64-to-32 +export vm_map.o_CFLAGS_ADD=-Werror -Wshorten-64-to-32 +export vm_object.o_CFLAGS_ADD=-Werror -Wshorten-64-to-32 +export vm_pageout.o_CFLAGS_ADD=-Werror -Wshorten-64-to-32 +export vm_purgeable.o_CFLAGS_ADD=-Werror -Wshorten-64-to-32 +export vm_resident.o_CFLAGS_ADD=-Werror -Wshorten-64-to-32 +export vm_shared_region.o_CFLAGS_ADD=-Werror -Wshorten-64-to-32 +export vm_swapfile_pager.o_CFLAGS_ADD=-Werror -Wshorten-64-to-32 +export vm_user.o_CFLAGS_ADD=-Werror -Wshorten-64-to-32 + +export default_pager.o_CFLAGS_ADD=-Werror -Wshorten-64-to-32 +export dp_backing_store.o_CFLAGS_ADD=-Werror -Wshorten-64-to-32 +export dp_memory_object.o_CFLAGS_ADD=-Werror -Wshorten-64-to-32 + # # KDB support # makedis: $(SRCROOT)/osfmk/ddb/makedis.c - $(CC) -o $@ $< + $(HOST_CC) -Werror -Wall -o $@ $< ppc_disasm.o_CFLAGS_ADD = -Dperror=db_printf -Dexit=db_error -Dmalloc=db_disasm_malloc diff --git a/osfmk/conf/Makefile.template b/osfmk/conf/Makefile.template index d3596adef..75f1c7f31 100644 --- a/osfmk/conf/Makefile.template +++ b/osfmk/conf/Makefile.template @@ -38,11 +38,15 @@ COMP_SUBDIRS = \ mach \ UserNotification \ gssd \ + kextd \ lockd COMP_SUBDIRS_I386 = \ mach +COMP_SUBDIRS_X86_64 = \ + mach + # # Make sure we don't remove this by accident if interrupted at the wrong # time. @@ -88,8 +92,10 @@ $(COMPONENT).o: $(LDOBJS) assym.s $(SEG_HACK) __HIB $${hib_file} -o $${hib_file}__; \ mv $${hib_file}__ $${hib_file} ; \ done; - @echo LD $(COMPONENT) - $(_v)$(LD) $(LDFLAGS_COMPONENT) -o $(COMPONENT).o ${LDOBJS} + @echo LDFILELIST $(COMPONENT) + $(_v)( for obj in ${LDOBJS}; do \ + echo $(TARGET)$(COMP_OBJ_DIR)/$(KERNEL_CONFIG)/$${obj}; \ + done; ) > $(COMPONENT).o do_all: $(COMPONENT).o diff --git a/osfmk/conf/Makefile.x86_64 b/osfmk/conf/Makefile.x86_64 new file mode 100644 index 000000000..405c2089f --- /dev/null +++ b/osfmk/conf/Makefile.x86_64 @@ -0,0 +1,42 @@ +###################################################################### +#BEGIN Machine dependent Makefile fragment for x86_64 +###################################################################### + +CFLAGS+= -DAT386=1 +SFLAGS+= -DAT386=1 + +CFLAGS+= $(WERROR) -Wshorten-64-to-32 +CWARNFLAGS= $(filter-out -Wbad-function-cast, $(CWARNFLAGS_STD)) + +# Objects that don't compile cleanly: +OBJS_NO_WERROR= \ + UNDRequest.o \ + db_examine.o \ + db_macro.o \ + db_print.o \ + db_sym.o \ + db_variables.o \ + db_disasm.o \ + db_interface.o \ + db_trace.o \ + host_priv_server.o \ + mach_host_server.o \ + security_server.o \ + device_server.o \ + gssd_mach.o \ + mp.o # This is blocked on 6640051 + +OBJS_WERROR=$(filter-out $(OBJS_NO_WERROR),$(OBJS)) + +$(OBJS_WERROR): WERROR=-Werror + +# Files that must go in the __HIB segment: +UNCONFIGURED_HIB_FILES= \ + hibernate_restore.o \ + hibernate_bootstrap.o + +HIB_FILES=$(filter $(UNCONFIGURED_HIB_FILES),$(OBJS)) + +###################################################################### +#END Machine dependent Makefile fragment for x86_64 +###################################################################### diff --git a/osfmk/conf/files b/osfmk/conf/files index e5a07a963..cfccb1938 100644 --- a/osfmk/conf/files +++ b/osfmk/conf/files @@ -54,11 +54,6 @@ OPTIONS/mach_mp_debug optional mach_mp_debug OPTIONS/mach_pagemap optional mach_pagemap OPTIONS/mach_rt optional mach_rt OPTIONS/advisory_pageout optional advisory_pageout -# -# MACH_RT is real-time. MACH_TR is debugging. -# Unfortunate choice of letters. -# -OPTIONS/mach_tr optional mach_tr OPTIONS/mach_vm_debug optional mach_vm_debug OPTIONS/mach_page_hash_stats optional mach_page_hash_stats OPTIONS/mig_debug optional mig_debug @@ -75,6 +70,7 @@ OPTIONS/task_swapper optional task_swapper OPTIONS/stack_usage optional stack_usage OPTIONS/config_dtrace optional config_dtrace +OPTIONS/config_counters optional config_counters # Default pager and system pager files, to be moved to separate component @@ -94,6 +90,11 @@ osfmk/default_pager/dp_memory_object.c standard # ./lockd/lockd_mach.c standard +# +# kextd files +# +./kextd/kextd_mach.c standard + # # UserNotification files # @@ -145,6 +146,7 @@ osfmk/ipc/mach_port.c standard osfmk/ipc/mig_log.c optional mig_debug osfmk/kern/affinity.c standard osfmk/kern/ast.c standard +osfmk/kern/audit_sessionport.c optional config_audit osfmk/kern/clock.c standard osfmk/kern/clock_oldops.c standard osfmk/kern/counters.c standard @@ -195,7 +197,7 @@ osfmk/kern/xpr.c optional xpr_debug osfmk/kern/zalloc.c standard osfmk/kern/bsd_kern.c optional mach_bsd osfmk/kern/hibernate.c optional hibernation -osfmk/kern/symbols.c standard +osfmk/pmc/pmc.c standard ./mach/clock_server.c standard ./mach/clock_priv_server.c standard ./mach/clock_reply_user.c standard @@ -226,14 +228,12 @@ osfmk/kern/symbols.c standard # ./mach/processor_server.c standard ./mach/processor_set_server.c standard -./mach/semaphore_server.c standard ./mach/task_server.c standard ./mach/thread_act_server.c standard -./mach/vm_map_server.c standard +./mach/vm32_map_server.c standard ./mach/security_server.c optional config_macf -osfmk/mach-o/mach_header.c standard - +osfmk/vm/bsd_vm.c optional mach_bsd osfmk/vm/device_vm.c standard osfmk/vm/memory_object.c standard osfmk/vm/vm_debug.c standard @@ -244,11 +244,12 @@ osfmk/vm/vm_kern.c standard osfmk/vm/vm_map.c standard osfmk/vm/vm_object.c standard osfmk/vm/vm_pageout.c standard +osfmk/vm/vm_purgeable.c standard osfmk/vm/vm_resident.c standard osfmk/vm/vm_shared_region.c standard +osfmk/vm/vm_swapfile_pager.c standard osfmk/vm/vm_user.c standard -osfmk/vm/bsd_vm.c optional mach_bsd -osfmk/vm/vm_purgeable.c standard +osfmk/vm/vm32_user.c standard # # IOKit files, for a while @@ -258,6 +259,7 @@ osfmk/device/iokit_rpc.c optional iokit osfmk/device/device_init.c optional iokit # kernel module loader interface +osfmk/kern/kext_alloc.c standard osfmk/kern/kmod.c standard # @@ -277,3 +279,4 @@ osfmk/chud/chud_osfmk_callback.c standard osfmk/chud/chud_thread.c standard osfmk/console/serial_general.c standard + diff --git a/osfmk/conf/files.i386 b/osfmk/conf/files.i386 index 2864ea6b9..9fe585040 100644 --- a/osfmk/conf/files.i386 +++ b/osfmk/conf/files.i386 @@ -21,7 +21,7 @@ osfmk/vm/vm_apple_protect.c standard #osfmk/i386/hi_res_clock_map.c optional hi_res_clock osfmk/i386/pmap.c standard -#osfmk/i386/read_fault.c standard +osfmk/i386/pmap_x86_common.c standard osfmk/ddb/db_aout.c optional mach_kdb @@ -30,7 +30,6 @@ osfmk/i386/bsd_i386.c optional mach_bsd osfmk/i386/machdep_call.c optional mach_bsd osfmk/i386/_setjmp.s standard -osfmk/i386/ast_check.c optional ipsc386 osfmk/i386/bcopy.s standard osfmk/i386/bzero.s standard osfmk/i386/cpu.c standard @@ -42,10 +41,10 @@ osfmk/i386/db_interface.c optional mach_kdb osfmk/i386/db_trace.c optional mach_kdb osfmk/i386/etimer.c standard osfmk/i386/fpu.c standard -osfmk/i386/gcc.s standard osfmk/i386/gdt.c standard osfmk/i386/i386_lock.s standard osfmk/i386/i386_init.c standard +osfmk/i386/idle_pt.c standard osfmk/i386/i386_vm_init.c standard osfmk/i386/idt.s standard osfmk/i386/io_map.c standard @@ -59,11 +58,11 @@ osfmk/i386/lowmem_vectors.s standard osfmk/i386/cswitch.s standard osfmk/i386/machine_routines.c standard osfmk/i386/machine_routines_asm.s standard -osfmk/i386/machine_check.c standard +osfmk/i386/machine_check.c optional config_mca +osfmk/i386/machine_task.c standard osfmk/i386/mcount.s optional profile osfmk/i386/mp_desc.c standard #osfmk/i386/ntoh.s standard -osfmk/i386/perfmon.c standard osfmk/i386/pcb.c standard osfmk/i386/phys.c standard osfmk/i386/rtclock.c standard @@ -76,6 +75,7 @@ osfmk/i386/tsc.c standard osfmk/i386/commpage/commpage.c standard osfmk/i386/commpage/commpage_asm.s standard osfmk/i386/commpage/atomic.s standard +osfmk/i386/commpage/cpu_number.s standard osfmk/i386/commpage/commpage_mach_absolute_time.s standard osfmk/i386/commpage/spinlocks.s standard osfmk/i386/commpage/pthreads.s standard @@ -97,13 +97,13 @@ osfmk/i386/commpage/memset_pattern_sse2_64.s standard osfmk/i386/commpage/longcopy_sse3x.s standard osfmk/i386/commpage/longcopy_sse3x_64.s standard osfmk/i386/commpage/commpage_sigs.c standard +osfmk/i386/commpage/fifo_queues.s standard osfmk/i386/AT386/conf.c standard osfmk/i386/AT386/model_dep.c standard osfmk/i386/lapic.c standard osfmk/i386/mp.c standard -osfmk/i386/mp_slave_boot.s standard osfmk/i386/acpi.c standard osfmk/i386/acpi_wakeup.s standard @@ -112,9 +112,6 @@ osfmk/i386/mtrr.c standard osfmk/console/i386/serial_console.c optional com device-driver -osfmk/console/i386/kdasm.s optional vc device-driver -osfmk/console/i386/text_console.c optional vc device-driver - osfmk/console/panic_dialog.c optional vc device-driver osfmk/console/video_console.c optional vc device-driver osfmk/console/i386/video_scroll.c optional vc device-driver @@ -126,19 +123,19 @@ osfmk/kern/etap_map.c optional etap device-driver #osfmk/profiling/profile-kgmon.c optional gprof #osfmk/profiling/profile-mk.c optional gprof -osfmk/kdp/ml/i386/kdp_machdep.c optional mach_kdp - -osfmk/kdp/ml/i386/kdp_vm.c optional mach_kdp +osfmk/kdp/ml/i386/kdp_machdep.c optional mach_kdp +osfmk/kdp/ml/i386/kdp_vm.c optional mach_kdp +osfmk/kdp/ml/i386/kdp_x86_common.c optional mach_kdp osfmk/i386/hibernate_i386.c optional hibernation -osfmk/i386/hibernate_restore.s optional hibernation +osfmk/i386/hibernate_restore.c optional hibernation osfmk/chud/i386/chud_osfmk_callback_i386.c standard osfmk/chud/i386/chud_cpu_i386.c standard osfmk/chud/i386/chud_thread_i386.c standard -osfmk/i386/vmx/vmx_cpu.c standard -osfmk/i386/vmx/vmx_shims.c standard +osfmk/i386/vmx/vmx_cpu.c optional config_vmx +osfmk/i386/vmx/vmx_shims.c optional config_vmx # DUMMIES TO FORCE GENERATION OF .h FILES #osfmk/OPTIONS/ln optional ln diff --git a/osfmk/conf/files.ppc b/osfmk/conf/files.ppc index 070e02f48..2866dd820 100644 --- a/osfmk/conf/files.ppc +++ b/osfmk/conf/files.ppc @@ -55,6 +55,7 @@ osfmk/ppc/PseudoKernel.c standard osfmk/ppc/interrupt.c standard osfmk/ppc/machine_routines.c standard osfmk/ppc/machine_routines_asm.s standard +osfmk/ppc/machine_task.c standard osfmk/ppc/Emulate.s standard osfmk/ppc/Emulate64.s standard osfmk/ppc/AltiAssist.s standard diff --git a/osfmk/conf/files.x86_64 b/osfmk/conf/files.x86_64 new file mode 100644 index 000000000..fbdaf097a --- /dev/null +++ b/osfmk/conf/files.x86_64 @@ -0,0 +1,143 @@ +OPTIONS/show_space optional show_space +OPTIONS/gdb optional gdb +OPTIONS/iplmeas optional iplmeas +OPTIONS/fb optional fb + +OPTIONS/config_nested_pmap optional config_nested_pmap +OPTIONS/config_no_nested_pmap optional config_no_nested_pmap + +#machdep/x86_64/unix_signal.c standard +#machdep/x86_64/unix_startup.c standard + +OPTIONS/debug optional debug + + +OPTIONS/gprof optional gprof +OPTIONS/db_machine_commands optional db_machine_commands +OPTIONS/dynamic_num_nodes optional dynamic_num_nodes +OPTIONS/vtoc_compat optional vtoc_compat +OPTIONS/fddi optional fddi + +osfmk/vm/vm_apple_protect.c standard + +#osfmk/x86_64/hi_res_clock_map.c optional hi_res_clock + +osfmk/x86_64/pmap.c standard +osfmk/i386/pmap_x86_common.c standard + + +osfmk/i386/bsd_i386.c optional mach_bsd +osfmk/i386/machdep_call.c optional mach_bsd + +osfmk/x86_64/bcopy.s standard +osfmk/x86_64/bzero.s standard +osfmk/i386/cpu.c standard +osfmk/i386/cpuid.c standard +osfmk/i386/cpu_threads.c standard +osfmk/i386/cpu_topology.c standard +osfmk/i386/etimer.c standard +osfmk/i386/fpu.c standard +osfmk/i386/gdt.c standard +osfmk/i386/i386_lock.s standard +osfmk/i386/i386_init.c standard +osfmk/i386/idle_pt.c standard +osfmk/i386/i386_vm_init.c standard +osfmk/i386/io_map.c standard +osfmk/i386/ktss.c standard +osfmk/i386/ldt.c standard +osfmk/x86_64/loose_ends.c standard +osfmk/i386/locks_i386.c standard +osfmk/x86_64/locore.s standard +osfmk/x86_64/start.s standard +osfmk/x86_64/lowmem_vectors.s standard +osfmk/x86_64/cswitch.s standard +osfmk/i386/machine_routines.c standard +osfmk/x86_64/machine_routines_asm.s standard +osfmk/i386/machine_check.c optional config_mca +osfmk/i386/machine_task.c standard +osfmk/x86_64/mcount.s optional profile +osfmk/i386/mp_desc.c standard +#osfmk/x86_64/ntoh.s standard +osfmk/i386/pcb.c standard +osfmk/i386/phys.c standard +osfmk/i386/rtclock.c standard +osfmk/i386/trap.c standard +osfmk/i386/user_ldt.c standard +osfmk/i386/Diagnostics.c standard +osfmk/i386/pmCPU.c standard +osfmk/i386/tsc.c standard + +osfmk/i386/commpage/commpage.c standard +osfmk/i386/commpage/commpage_asm.s standard +osfmk/i386/commpage/atomic.s standard +osfmk/i386/commpage/cpu_number.s standard +osfmk/i386/commpage/commpage_mach_absolute_time.s standard +osfmk/i386/commpage/spinlocks.s standard +osfmk/i386/commpage/pthreads.s standard +osfmk/i386/commpage/cacheflush.s standard +osfmk/i386/commpage/commpage_gettimeofday.s standard +osfmk/i386/commpage/bcopy_scalar.s standard +osfmk/i386/commpage/bcopy_sse2.s standard +osfmk/i386/commpage/bcopy_sse3x.s standard +osfmk/i386/commpage/bcopy_sse3x_64.s standard +osfmk/i386/commpage/bcopy_sse42.s standard +osfmk/i386/commpage/bcopy_sse42_64.s standard +osfmk/i386/commpage/bzero_scalar.s standard +osfmk/i386/commpage/bzero_sse2.s standard +osfmk/i386/commpage/bzero_sse2_64.s standard +osfmk/i386/commpage/bzero_sse42.s standard +osfmk/i386/commpage/bzero_sse42_64.s standard +osfmk/i386/commpage/memset_pattern_sse2.s standard +osfmk/i386/commpage/memset_pattern_sse2_64.s standard +osfmk/i386/commpage/longcopy_sse3x.s standard +osfmk/i386/commpage/longcopy_sse3x_64.s standard +osfmk/i386/commpage/commpage_sigs.c standard +osfmk/i386/commpage/fifo_queues.s standard + +osfmk/i386/AT386/conf.c standard +osfmk/i386/AT386/model_dep.c standard + +osfmk/i386/lapic.c standard +osfmk/i386/mp.c standard + +osfmk/i386/acpi.c standard + +osfmk/i386/mtrr.c standard + +osfmk/console/i386/serial_console.c optional com device-driver + +osfmk/console/panic_dialog.c optional vc device-driver +osfmk/console/video_console.c optional vc device-driver +osfmk/console/i386/video_scroll.c optional vc device-driver + +osfmk/kern/etap_map.c optional etap device-driver + +#osfmk/profiling/x86_64/profile-md.c optional gprof +#osfmk/profiling/x86_64/profile-asm.s optional gprof +#osfmk/profiling/profile-kgmon.c optional gprof +#osfmk/profiling/profile-mk.c optional gprof + +osfmk/kdp/ml/x86_64/kdp_machdep.c optional mach_kdp +osfmk/kdp/ml/x86_64/kdp_vm.c optional mach_kdp +osfmk/kdp/ml/i386/kdp_x86_common.c optional mach_kdp + +osfmk/i386/hibernate_i386.c optional hibernation +osfmk/i386/hibernate_restore.c optional hibernation + +osfmk/chud/i386/chud_osfmk_callback_i386.c standard +osfmk/chud/i386/chud_cpu_i386.c standard +osfmk/chud/i386/chud_thread_i386.c standard + +osfmk/i386/vmx/vmx_cpu.c optional config_vmx +osfmk/i386/vmx/vmx_shims.c optional config_vmx + +# DUMMIES TO FORCE GENERATION OF .h FILES +#osfmk/OPTIONS/ln optional ln +#osfmk/OPTIONS/eisa optional eisa +#osfmk/OPTIONS/himem optional himem +#osfmk/OPTIONS/ec optional ec +#osfmk/OPTIONS/hi_res_clock optional hi_res_clock + + +osfmk/i386/startup64.c optional x86_64 +osfmk/x86_64/idt64.s optional x86_64 diff --git a/osfmk/console/i386/kdasm.s b/osfmk/console/i386/kdasm.s deleted file mode 100644 index e52429b33..000000000 --- a/osfmk/console/i386/kdasm.s +++ /dev/null @@ -1,185 +0,0 @@ -/* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ - * - * This file contains Original Code and/or Modifications of Original Code - * as defined in and that are subject to the Apple Public Source License - * Version 2.0 (the 'License'). You may not use this file except in - * compliance with the License. The rights granted to you under the License - * may not be used to create, or enable the creation or redistribution of, - * unlawful or unlicensed copies of an Apple operating system, or to - * circumvent, violate, or enable the circumvention or violation of, any - * terms of an Apple operating system software license agreement. - * - * Please obtain a copy of the License at - * http://www.opensource.apple.com/apsl/ and read it before using this file. - * - * The Original Code and all software distributed under the License are - * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER - * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, - * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. - * Please see the License for the specific language governing rights and - * limitations under the License. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ - */ -/* CMU_ENDHIST */ -/* - * Mach Operating System - * Copyright (c) 1991,1990,1989 Carnegie Mellon University - * All Rights Reserved. - * - * Permission to use, copy, modify and distribute this software and its - * documentation is hereby granted, provided that both the copyright - * notice and this permission notice appear in all copies of the - * software, derivative works or modified versions, and any portions - * thereof, and that both notices appear in supporting documentation. - * - * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" - * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR - * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. - * - * Carnegie Mellon requests users of this software to return to - * - * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU - * School of Computer Science - * Carnegie Mellon University - * Pittsburgh PA 15213-3890 - * - * any improvements or extensions that they make and grant Carnegie Mellon - * the rights to redistribute these changes. - */ -/* - */ - -/* - * Some inline code to speed up major block copies to and from the - * screen buffer. - * - * Copyright Ing. C. Olivetti & C. S.p.A. 1988, 1989. - * All rights reserved. - * - * orc!eugene 28 Oct 1988 - * - */ -/* - * Copyright 1988, 1989 by Olivetti Advanced Technology Center, Inc., - * Cupertino, California. - * - * All Rights Reserved - * - * Permission to use, copy, modify, and distribute this software and - * its documentation for any purpose and without fee is hereby - * granted, provided that the above copyright notice appears in all - * copies and that both the copyright notice and this permission notice - * appear in supporting documentation, and that the name of Olivetti - * not be used in advertising or publicity pertaining to distribution - * of the software without specific, written prior permission. - * - * OLIVETTI DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE - * INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, - * IN NO EVENT SHALL OLIVETTI BE LIABLE FOR ANY SPECIAL, INDIRECT, OR - * CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM - * LOSS OF USE, DATA OR PROFITS, WHETHER IN ACTION OF CONTRACT, - * NEGLIGENCE, OR OTHER TORTIOUS ACTION, ARISING OUR OF OR IN CONNECTION - * WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. - */ - -/* $ Header: $ */ - - - -/* - * Function: kd_slmwd() - * - * This function "slams" a word (char/attr) into the screen memory using - * a block fill operation on the 386. - * - */ - -#define start 0x08(%ebp) -#define count 0x0c(%ebp) -#define value 0x10(%ebp) - - .text - .align 2 - .globl _kd_slmwd - -_kd_slmwd: - pushl %ebp - movl %esp, %ebp - pushl %edi - - movl start, %edi - movl count, %ecx - movw value, %ax - cld - rep - stosw - - popl %edi - leave - ret -#undef start -#undef count -#undef value - -/* - * "slam up" - */ - -#define from 0x08(%ebp) -#define to 0x0c(%ebp) -#define count 0x10(%ebp) - .align 2 - .globl _kd_slmscu - -_kd_slmscu: - pushl %ebp - movl %esp, %ebp - pushl %esi - pushl %edi - - movl from, %esi - movl to, %edi - movl count, %ecx - cmpl %edi, %esi - cld - rep - movsw - - popl %edi - popl %esi - leave - ret - -/* - * "slam down" - */ - .align 2 - .globl _kd_slmscd - -_kd_slmscd: - pushl %ebp - movl %esp, %ebp - pushl %esi - pushl %edi - - movl from, %esi - movl to, %edi - movl count, %ecx - cmpl %edi, %esi - std - rep - movsw - cld - - popl %edi - popl %esi - leave - ret -#undef from -#undef to -#undef count diff --git a/osfmk/console/i386/serial_console.c b/osfmk/console/i386/serial_console.c index 37f29e101..36cb50feb 100644 --- a/osfmk/console/i386/serial_console.c +++ b/osfmk/console/i386/serial_console.c @@ -208,6 +208,10 @@ _cnputc(char c) mp_enable_preemption(); } +void cnputc_unbuffered(char c) { + _cnputc(c); +} + void cnputcusr(char c) { diff --git a/osfmk/console/i386/text_console.c b/osfmk/console/i386/text_console.c deleted file mode 100644 index 3a1ff1f7b..000000000 --- a/osfmk/console/i386/text_console.c +++ /dev/null @@ -1,413 +0,0 @@ -/* - * Copyright (c) 2000-2005 Apple Computer, Inc. All rights reserved. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ - * - * This file contains Original Code and/or Modifications of Original Code - * as defined in and that are subject to the Apple Public Source License - * Version 2.0 (the 'License'). You may not use this file except in - * compliance with the License. The rights granted to you under the License - * may not be used to create, or enable the creation or redistribution of, - * unlawful or unlicensed copies of an Apple operating system, or to - * circumvent, violate, or enable the circumvention or violation of, any - * terms of an Apple operating system software license agreement. - * - * Please obtain a copy of the License at - * http://www.opensource.apple.com/apsl/ and read it before using this file. - * - * The Original Code and all software distributed under the License are - * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER - * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, - * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. - * Please see the License for the specific language governing rights and - * limitations under the License. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ - */ - -/* - * text_console.c - * - * VGA text console support. - */ - -#include -#include -#include "text_console.h" - -/* - * Macros and typedefs. - */ -typedef short csrpos_t; /* cursor position, ONE_SPACE bytes per char */ - -#define ONE_SPACE 2 /* bytes per character */ -#define ONE_LINE (vga_cols * ONE_SPACE) /* number of bytes in line */ -#define ONE_PAGE (vga_rows * ONE_LINE) /* number of bytes in page */ -#define SPACE_CHAR 0x20 - -#define VGA_FB_START 0x0b8000 -#define VGA_FB_SIZE 0x8000 -#define VGA_IDX_REG 0x3d4 -#define VGA_IO_REG 0x3d5 - -/* - * Commands sent to graphics adapter. - */ -#define VGA_C_START 0x0a /* cursor start position, on/off bit */ -#define VGA_C_LOW 0x0f /* return low byte of cursor addr */ -#define VGA_C_HIGH 0x0e /* high byte */ - -/* - * Attributes for character sent to display. - */ -#define VGA_ATTR_NORMAL 0x07 -#define VGA_ATTR_REVERSE 0x70 - -/* - * Cursor Start Register bit fields. - */ -#define VGA_CURSOR_CS 0x1F -#define VGA_CURSOR_ON 0x20 - -/* - * Convert from XY coordinate to a location in display memory. - */ -#define XY_TO_CSRPOS(x, y) (((y) * vga_cols + (x)) * ONE_SPACE) - -/* - * Globals. - */ -static short vga_idx_reg; /* location of VGA index register */ -static short vga_io_reg; /* location of VGA data register */ -static short vga_cols = 80; /* number of columns */ -static short vga_rows = 25; /* number of rows */ -static char vga_attr; /* current character attribute */ -static char vga_attr_rev; /* current reverse attribute */ -static char vga_cursor_start; /* cached cursor start scan line */ -static unsigned char *vram_start; /* VM start of VGA frame buffer */ - -/* - * Functions in kdasm.s. - */ -extern void kd_slmwd(unsigned char * pos, int count, unsigned short val); -extern void kd_slmscu(unsigned char * from, unsigned char * to, int count); -extern void kd_slmscd(unsigned char * from, unsigned char * to, int count); - -/* - * move_up - * - * Block move up for VGA. - */ -static void -move_up( csrpos_t from, - csrpos_t to, - int count) -{ - if (vram_start == 0) return; - kd_slmscu( vram_start + from, vram_start + to, count ); -} - -/* - * move_down - * - * Block move down for VGA. - */ -static void -move_down( csrpos_t from, - csrpos_t to, - int count ) -{ - if (vram_start == 0) return; - kd_slmscd( vram_start + from, vram_start + to, count ); -} - -/* - * clear_block - * - * Fast clear for VGA. - */ -static void -clear_block( csrpos_t start, - int size, - char attr) -{ - if (vram_start == 0) return; - kd_slmwd( vram_start + start, size, - ((unsigned short) attr << 8) + SPACE_CHAR); -} - -/* - * set_cursor_position - * - * This function sets the hardware cursor position - * on the screen. - */ -static void -set_cursor_position( csrpos_t newpos ) -{ - short curpos; /* position, not scaled for attribute byte */ - - curpos = newpos / ONE_SPACE; - - outb(vga_idx_reg, VGA_C_HIGH); - outb(vga_io_reg, (unsigned char)(curpos >> 8)); - - outb(vga_idx_reg, VGA_C_LOW); - outb(vga_io_reg, (unsigned char)(curpos & 0xff)); -} - -/* - * set_cursor_enable - * - * Allow the cursor to be turned on or off. - */ -static void -set_cursor_enable( boolean_t enable ) -{ - outb(vga_idx_reg, VGA_C_START); - outb(vga_io_reg, vga_cursor_start | - (enable == TRUE ? VGA_CURSOR_ON : 0)); -} - -/* - * display_char - * - * Display attributed character for VGA (mode 3). - */ -static void -display_char( csrpos_t pos, /* where to put it */ - char ch, /* the character */ - char attr ) /* its attribute */ -{ - if (vram_start == 0) return; - *(vram_start + pos) = ch; - *(vram_start + pos + 1) = attr; -} - -/* - * vga_init - * - * Initialize the VGA text console. - */ -static void -vga_init(int cols, int rows, unsigned char * addr) -{ - vram_start = addr; - vga_idx_reg = VGA_IDX_REG; - vga_io_reg = VGA_IO_REG; - vga_rows = rows; - vga_cols = cols; - vga_attr = VGA_ATTR_NORMAL; - vga_attr_rev = VGA_ATTR_REVERSE; - - /* cache cursor start position */ - outb(vga_idx_reg, VGA_C_START); - vga_cursor_start = inb(vga_io_reg) & VGA_CURSOR_CS; - - /* defaults to a hidden hw cursor */ - set_cursor_enable( FALSE ); -} - -/* - * tc_scroll_up - * - * Scroll the screen up 'n' character lines. - */ -void -tc_scroll_up(int lines, __unused unsigned int top, __unused unsigned int bottom) -{ - csrpos_t to; - csrpos_t from; - int size; - - /* scroll up */ - to = 0; - from = ONE_LINE * lines; - size = ( ONE_PAGE - ( ONE_LINE * lines ) ) / ONE_SPACE; - move_up(from, to, size); - - /* clear bottom line */ - to = ( ( vga_rows - lines) * ONE_LINE ); - size = ( ONE_LINE * lines ) / ONE_SPACE; - clear_block(to, size, vga_attr); -} - -/* - * tc_scroll_down - * - * Scrolls the screen down 'n' character lines. - */ -void -tc_scroll_down(int lines, __unused unsigned int top, - __unused unsigned int bottom) -{ - csrpos_t to; - csrpos_t from; - int size; - - /* move down */ - to = ONE_PAGE - ONE_SPACE; - from = ONE_PAGE - ( ONE_LINE * lines ) - ONE_SPACE; - size = ( ONE_PAGE - ( ONE_LINE * lines ) ) / ONE_SPACE; - move_down(from, to, size); - - /* clear top line */ - to = 0; - size = ( ONE_LINE * lines ) / ONE_SPACE; - clear_block(to, size, vga_attr); -} - -/* Default colors for 16-color palette */ -enum { - kVGAColorBlack = 0, - kVGAColorBlue, - kVGAColorGreen, - kVGAColorCyan, - kVGAColorRed, - kVGAColorMagenta, - kVGAColorBrown, - kVGAColorWhite, - kVGAColorGray, - kVGAColorLightBlue, - kVGAColorLightGreen, - kVGAColorLightCyan, - kVGAColorLightRed, - kVGAColorLightMagenta, - kVGAColorLightBrown, - kVGAColorBrightWhite -}; - -/* - * tc_update_color - * - * Update the foreground / background color. - */ -void -tc_update_color( int color, int fore ) -{ - unsigned char mask_on, mask_off; - - switch ( color ) - { - case 1: mask_on = kVGAColorRed; break; - case 3: mask_on = kVGAColorLightBrown; break; - case 4: mask_on = kVGAColorBlue; break; - case 6: mask_on = kVGAColorCyan; break; - default: mask_on = color; break; - } - - if ( fore ) - { - mask_off = 0x0f; - } - else - { - mask_off = 0xf0; - mask_on <<= 4; - } - - vga_attr = (vga_attr & ~mask_off) | mask_on; - - vga_attr_rev = ( ((vga_attr << 4) & 0xf0) | - ((vga_attr >> 4) & 0x0f) ); -} - -/* - * tc_show_cursor - * - * Show the hardware cursor. - */ -void -tc_show_cursor(unsigned int x, unsigned int y) -{ - set_cursor_position( XY_TO_CSRPOS(x, y) ); - set_cursor_enable( TRUE ); -} - -/* - * tc_hide_cursor - * - * Hide the hardware cursor. - */ -void -tc_hide_cursor(__unused unsigned int x, __unused unsigned int y) -{ - set_cursor_enable( FALSE ); -} - -/* - * tc_clear_screen - * - * Clear the entire screen, or a portion of the screen - * relative to the current cursor position. - */ -void -tc_clear_screen(unsigned int x, unsigned int y, __unused unsigned int top, - __unused unsigned int bottom, int operation) -{ - csrpos_t start; - int count; - - switch ( operation ) - { - case 0: /* To end of screen */ - start = XY_TO_CSRPOS(x, y); - count = ONE_PAGE - start; - break; - case 1: /* To start of screen */ - start = 0; - count = XY_TO_CSRPOS(x, y) + ONE_SPACE; - break; - default: - case 2: /* Whole screen */ - start = 0; - count = ONE_PAGE; - break; - } - clear_block(start, count, vga_attr); -} - -/* - * tc_paint_char - * - * Display a character on screen with the given coordinates, - * and attributes. - */ -void -tc_paint_char(unsigned int x, unsigned int y, unsigned char ch, int attrs, - __unused unsigned char ch_previous, __unused int attrs_previous) -{ - char my_attr = vga_attr; - - if ( attrs & 4 ) my_attr = vga_attr_rev; - - display_char( XY_TO_CSRPOS(x, y), ch, vga_attr ); -} - -/* - * tc_enable - * - * Enable / disable the console. - */ -void -tc_enable(__unused boolean_t enable) -{ - -} - -/* - * tc_initialize - * - * Must be called before any other exported functions. - */ -void -tc_initialize(struct vc_info * vinfo_p) -{ - vinfo_p->v_rows = vinfo_p->v_height; - vinfo_p->v_columns = vinfo_p->v_width; - - vga_init( vinfo_p->v_columns, - vinfo_p->v_rows, - (unsigned char *) vinfo_p->v_baseaddr); -} diff --git a/osfmk/console/i386/text_console.h b/osfmk/console/i386/text_console.h deleted file mode 100644 index f971aaf19..000000000 --- a/osfmk/console/i386/text_console.h +++ /dev/null @@ -1,44 +0,0 @@ -/* - * Copyright (c) 2000-2005 Apple Computer, Inc. All rights reserved. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ - * - * This file contains Original Code and/or Modifications of Original Code - * as defined in and that are subject to the Apple Public Source License - * Version 2.0 (the 'License'). You may not use this file except in - * compliance with the License. The rights granted to you under the License - * may not be used to create, or enable the creation or redistribution of, - * unlawful or unlicensed copies of an Apple operating system, or to - * circumvent, violate, or enable the circumvention or violation of, any - * terms of an Apple operating system software license agreement. - * - * Please obtain a copy of the License at - * http://www.opensource.apple.com/apsl/ and read it before using this file. - * - * The Original Code and all software distributed under the License are - * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER - * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, - * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. - * Please see the License for the specific language governing rights and - * limitations under the License. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ - */ - -#ifndef _TEXT_CONSOLE_H_ -#define _TEXT_CONSOLE_H_ - -void tc_paint_char(unsigned int x, unsigned int y, unsigned char ch, int attrs, - unsigned char ch_previous, int attrs_previous); -void tc_scroll_down(int lines, unsigned int top, unsigned int bottom); -void tc_scroll_up(int lines, unsigned int top, unsigned int bottom); -void tc_clear_screen(unsigned int x, unsigned int y, unsigned int top, - unsigned int bottom, int operation); -void tc_show_cursor(unsigned int x, unsigned int y); -void tc_hide_cursor(unsigned int x, unsigned int y); -void tc_enable(boolean_t enable); -void tc_initialize(struct vc_info * vinfo_p); -void tc_update_color(int color, int fore); - -#endif /* !_TEXT_CONSOLE_H_ */ diff --git a/osfmk/console/i386/video_scroll.c b/osfmk/console/i386/video_scroll.c index 0f607a651..4c84776c7 100644 --- a/osfmk/console/i386/video_scroll.c +++ b/osfmk/console/i386/video_scroll.c @@ -31,16 +31,16 @@ extern void bcopy(const void *, void *, size_t); -void video_scroll_up(unsigned long start, - unsigned long end, - unsigned long dest) +void video_scroll_up(void *start, + void *end, + void *dest) { - bcopy((void *) start, (void *) dest, (end - start) << 2); + bcopy(start, dest, ((char*)end - (char*)start) << 2);; } -void video_scroll_down(unsigned long start, /* HIGH addr */ - unsigned long end, /* LOW addr */ - unsigned long dest) /* HIGH addr */ +void video_scroll_down(void *start, /* HIGH addr */ + void *end, /* LOW addr */ + void *dest) /* HIGH addr */ { - bcopy((void *) end, (void *) dest, (start - end) << 2); + bcopy(end, dest, ((char*)start - (char*)end) << 2); } diff --git a/osfmk/console/panic_dialog.c b/osfmk/console/panic_dialog.c index 78754910b..29fa14890 100644 --- a/osfmk/console/panic_dialog.c +++ b/osfmk/console/panic_dialog.c @@ -158,7 +158,6 @@ panic_dialog_test( void ) unsigned long o_panic_caller = panic_caller; unsigned int o_panicDebugging = panicDebugging; - panicDebugging = TRUE; panic_caller = (unsigned long)(char *)__builtin_return_address(0); logPanicDataToScreen = FALSE; @@ -197,8 +196,8 @@ draw_panic_dialog( void ) /* set up to draw background box */ /* by locating where the upper left corner is placed */ - pd_x = (vinfo.v_width/2) - panic_dialog->pd_width/2; - pd_y = (vinfo.v_height/2) - panic_dialog->pd_height/2; + pd_x = (int)((vinfo.v_width/2) - panic_dialog->pd_width/2); + pd_y = (int)((vinfo.v_height/2) - panic_dialog->pd_height/2); /* draw panic dialog at pd_x/pd_y */ panic_blit_rect(pd_x, pd_y, panic_dialog->pd_width, @@ -233,7 +232,7 @@ draw_panic_dialog( void ) panic_dialog_info[panic_dialog_count].pixels += pixels_needed_to_blit_digit( ':' ); for ( count=8; count != 0; count-- ) { - nibble = (panic_caller >> ((count-1)<<2)) &0xF; + nibble = (int)((panic_caller >> ((count-1)<<2)) &0xF); panic_num_chars[indx++] = nibble; panic_dialog_info[panic_dialog_count].pixels += pixels_needed_to_blit_digit( nibble ); } @@ -326,13 +325,13 @@ draw_panic_dialog( void ) /* vertical alignment for information to be displayed */ - panic_info_y = (vinfo.v_height/2) + panic_dialog->pd_height/2 - (panic_dialog->pd_info_height); + panic_info_y = (int)((vinfo.v_height/2) + panic_dialog->pd_height/2 - (panic_dialog->pd_info_height)); /* blit out all the information we gathered */ switch ( panic_dialog_count ) { case 1 : /* one item is centered */ - panic_info_x = (vinfo.v_width/2) - (panic_dialog_info[0].pixels/2); + panic_info_x = (int)((vinfo.v_width/2) - (panic_dialog_info[0].pixels/2)); for (indx=1; indx < panic_dialog_info[0].chars[0]; indx++) blit_digit(panic_dialog_info[0].chars[indx]); @@ -340,13 +339,13 @@ draw_panic_dialog( void ) case 2 : /* left centered and right centered */ x1 = ((panic_dialog->pd_width/2) - panic_dialog_info[0].pixels)/2; - panic_info_x = ((vinfo.v_width/2) - (panic_dialog->pd_width/2)) + x1; + panic_info_x = (int)(((vinfo.v_width/2) - (panic_dialog->pd_width/2)) + x1); for (indx=1; indx < panic_dialog_info[0].chars[0]; indx++) blit_digit(panic_dialog_info[0].chars[indx]); x2 = ((panic_dialog->pd_width/2) - panic_dialog_info[1].pixels)/2; - panic_info_x = (vinfo.v_width/2) + x2; + panic_info_x = (int)((vinfo.v_width/2) + x2); for (indx=1; indx < panic_dialog_info[1].chars[0]; indx++) blit_digit(panic_dialog_info[1].chars[indx]); @@ -355,18 +354,18 @@ draw_panic_dialog( void ) case 3 : /* left centered, middle and right centered */ x1 = ((panic_dialog->pd_width/2) - panic_dialog_info[0].pixels - (panic_dialog_info[1].pixels/2))/2; - panic_info_x = ((vinfo.v_width/2) - (panic_dialog->pd_width/2)) + x1; + panic_info_x = (int)(((vinfo.v_width/2) - (panic_dialog->pd_width/2)) + x1); for (indx=1; indx < panic_dialog_info[0].chars[0]; indx++) blit_digit(panic_dialog_info[0].chars[indx]); - panic_info_x = (vinfo.v_width/2) - (panic_dialog_info[1].pixels/2); + panic_info_x = (int)((vinfo.v_width/2) - (panic_dialog_info[1].pixels/2)); for (indx=1; indx < panic_dialog_info[1].chars[0]; indx++) blit_digit(panic_dialog_info[1].chars[indx]); x2 = ((panic_dialog->pd_width/2) - panic_dialog_info[2].pixels - (panic_dialog_info[1].pixels/2))/2; - panic_info_x = (vinfo.v_width/2) + x2 + (panic_dialog_info[1].pixels/2); + panic_info_x = (int)((vinfo.v_width/2) + x2 + (panic_dialog_info[1].pixels/2)); for (indx=1; indx < panic_dialog_info[2].chars[0]; indx++) blit_digit(panic_dialog_info[2].chars[indx]); @@ -402,7 +401,7 @@ panic_dialog_set_image( const unsigned char * ptr, unsigned int size ) /* if ptr is NULL, restore panic image to built-in default */ if ( ptr == NULL ) { newimage = &panic_dialog_default; - newsize = sizeof(struct panicimage) + newimage->pd_dataSize; + newsize = (unsigned int)(sizeof(struct panicimage) + newimage->pd_dataSize); } else { newimage = (const struct panicimage *)ptr; @@ -448,7 +447,7 @@ panic_dialog_verify( const struct panicimage * newimage, unsigned int size ) if ( size < (sizeof(struct panicimage) + newimage->pd_dataSize) ) return EINVAL; - if ( newimage->pd_tag != 'RNMp' ) + if ( newimage->pd_tag != 0x524E4D70 /* 'RNMp' */ ) return EINVAL; size = newimage->pd_dataSize-CLUT_SIZE; @@ -479,6 +478,9 @@ static void panic_blit_rect_16(unsigned int x, unsigned int y, static void panic_blit_rect_32(unsigned int x, unsigned int y, unsigned int width, unsigned int height, int transparent, const unsigned char *dataPtr); +static void panic_blit_rect_30(unsigned int x, unsigned int y, + unsigned int width, unsigned int height, + int transparent, const unsigned char *dataPtr); static int decode_rle(const unsigned char *dataPtr, unsigned int *quantity, unsigned int *depth, const unsigned char **value); @@ -562,6 +564,9 @@ panic_blit_rect(unsigned int x, unsigned int y, unsigned int width, case 32: panic_blit_rect_32( x, y, width, height, transparent, dataPtr); break; + case 30: + panic_blit_rect_30( x, y, width, height, transparent, dataPtr); + break; } } @@ -613,7 +618,7 @@ panic_blit_rect_8(unsigned int x, unsigned int y, unsigned int width, } } - dst = (volatile unsigned char *) (((int)dst) + vinfo.v_rowbytes); + dst = (volatile unsigned char *) (((uintptr_t)dst) + vinfo.v_rowbytes); } } @@ -663,7 +668,7 @@ panic_blit_rect_16(unsigned int x, unsigned int y, unsigned int width, } } - dst = (volatile unsigned short *) (((int)dst) + vinfo.v_rowbytes); + dst = (volatile unsigned short *) (((uintptr_t)dst) + vinfo.v_rowbytes); } } @@ -713,10 +718,65 @@ panic_blit_rect_32(unsigned int x, unsigned int y, unsigned int width, } } - dst = (volatile unsigned int *) (((int)dst) + vinfo.v_rowbytes); + dst = (volatile unsigned int *) (((uintptr_t)dst) + vinfo.v_rowbytes); } } +/* + * panic_blit_rect_30 decodes the RLE encoded image data on the fly, and fills + * in each of the three pixel values from the clut (RGB) for each pixel and + * writes it to the screen. + */ + +static void +panic_blit_rect_30(unsigned int x, unsigned int y, unsigned int width, + unsigned int height, __unused int transparent, + const unsigned char *dataPtr) +{ + volatile unsigned int * dst; + unsigned int line, col, i; + unsigned int quantity, index, data, depth; + const unsigned char *value; + unsigned int in; + + dst = (volatile unsigned int *) (vinfo.v_baseaddr + + (y * vinfo.v_rowbytes) + + (x * 4)); + + quantity = 0; + i = 0; + + for( line = 0; line < height; line++) { + for( col = 0; col < width; col++) { + + if (quantity == 0) { + dataPtr += decode_rle(dataPtr, &quantity, &depth, &value); + i = 0; + } + + index = value[i++] * 3; + in = panic_dialog_clut[index + 0]; + data = (in << 2) | (in >> 6); + + in = panic_dialog_clut[index + 1]; + data |= (in << (2 + 10)) | ((3 << 10) & (in << 4)); + + in = panic_dialog_clut[index + 2]; + data |= (in << (2 + 20)) | ((3 << 20) & (in << 14)); + + *(dst + col) = data; + + if ( i == depth ) { + i = 0; + quantity--; + } + } + + dst = (volatile unsigned int *) (((uintptr_t)dst) + vinfo.v_rowbytes); + } +} + + /* decode_rle decodes a single quantity/value run of a "modified-RLE" encoded image. The encoding works as follows: @@ -782,24 +842,26 @@ decode_rle(const unsigned char *dataPtr, unsigned int *quantity, void dim_screen(void) { - unsigned long *p, *endp, *row; + unsigned int *p, *endp, *row; int col, rowline, rowlongs; - register unsigned long mask; + register unsigned int mask; if(!vinfo.v_depth) return; if ( vinfo.v_depth == 32 ) mask = 0x007F7F7F; + else if ( vinfo.v_depth == 30 ) + mask = (0x1ff<<20) | (0x1ff<<10) | 0x1ff; else if ( vinfo.v_depth == 16 ) mask = 0x3DEF3DEF; else return; - rowline = vinfo.v_rowscanbytes / 4; - rowlongs = vinfo.v_rowbytes / 4; + rowline = (int)(vinfo.v_rowscanbytes / 4); + rowlongs = (int)(vinfo.v_rowbytes / 4); - p = (unsigned long*) vinfo.v_baseaddr; + p = (unsigned int*) vinfo.v_baseaddr; endp = p + (rowlongs * vinfo.v_height); for (row = p ; row < endp ; row += rowlongs) { diff --git a/osfmk/console/panic_image.c b/osfmk/console/panic_image.c index ac216f747..be95f420e 100644 --- a/osfmk/console/panic_image.c +++ b/osfmk/console/panic_image.c @@ -1,4 +1,4 @@ -/* autogenerated with genimage.c using panic_dialogqt8.qtif as image input */ +/* autogenerated with genimage.c using images/panic_dialog.qtif as image input */ /* and the built-in appleClut8 for the color look up table (CLUT) */ static const struct panicimage { @@ -12,1673 +12,2611 @@ static const struct panicimage { unsigned char pd_info_color[2]; unsigned char data[]; } panic_dialog_default = { - 0x8d8981f2, 0x00006b08, 0x524e4d70, 472, 255, 1, 20, { 0x00, 0xfd }, + 0x22c84a66, 0x0000a5a6, 0x524e4d70, 460, 285, 1, 20, { 0x00, 0xfd }, { -0x97,0xe8,0x05,0xfd,0x02,0xfb,0x81,0x9b,0x0b,0xfd,0x02,0x81,0xfb,0x86,0xfd,0x02, -0xfb,0x81,0x8b,0xfd,0x02,0xf9,0xac,0x94,0x02,0xfd,0x02,0xac,0xf9,0x96,0x0a,0xfd, -0x02,0x2b,0x2b,0x84,0xfd,0x02,0x2b,0xf8,0x90,0x03,0xfd,0x02,0x2b,0x00,0x99,0x0a, -0xfd,0x02,0x81,0x00,0x84,0xfd,0x02,0xf8,0x2b,0x8a,0xfd,0x02,0x00,0x2b,0x86,0xfd, -0x02,0x2b,0x00,0x8b,0xfd,0x02,0x00,0xf9,0x94,0x02,0xfd,0x02,0xf9,0x00,0x92,0x01, -0xfd,0x84,0x00,0x01,0xf7,0x9e,0x08,0xfd,0x09,0xfe,0xf9,0x00,0x81,0xfd,0xfd,0x81, -0x00,0xac,0x90,0x03,0xfd,0x02,0x2b,0x00,0x86,0xfd,0x02,0x81,0xfb,0x92,0x02,0xfd, -0x02,0xfb,0x81,0x90,0x01,0xfd,0x02,0xf9,0xac,0x92,0x05,0xfd,0x02,0x81,0xfb,0x97, -0x01,0xfd,0x02,0xf9,0x00,0x84,0xfd,0x02,0x2b,0x00,0x8a,0xfd,0x02,0x00,0x2b,0x86, -0xfd,0x02,0x2b,0x00,0x8b,0xfd,0x02,0x00,0xf9,0x9f,0x01,0xfd,0x02,0xac,0xf9,0x83, -0xfd,0x02,0xf9,0x00,0x92,0x01,0xfd,0x06,0x00,0xf7,0xf9,0xf7,0x00,0x2b,0x9f,0x08, -0xfd,0x06,0x2b,0x00,0xfd,0xfd,0x00,0xf8,0x91,0x03,0xfd,0x02,0x2b,0x00,0x86,0xfd, -0x02,0x00,0x2b,0x92,0x02,0xfd,0x02,0x2b,0x00,0x90,0x01,0xfd,0x02,0x00,0xf9,0x92, -0x05,0xfd,0x02,0x00,0x2b,0x97,0x01,0xfd,0x02,0xf9,0x00,0x84,0xfd,0x02,0x2b,0x00, -0x8a,0xfd,0x02,0x00,0x2b,0x86,0xfd,0x02,0x2b,0x00,0x8b,0xfd,0x03,0x00,0xf9,0xfe, -0x9e,0x01,0xfd,0x02,0xf9,0x00,0x83,0xfd,0x02,0xfa,0x00,0x92,0x01,0xfd,0x06,0x00, -0xfa,0xfd,0xfd,0x2b,0x00,0x9f,0x08,0xfd,0x05,0xac,0x00,0xf7,0xf8,0x00,0x83,0xfd, -0x01,0xac,0x84,0x00,0x09,0xac,0xfd,0xfd,0x00,0x2b,0xfd,0xfd,0x2b,0x00,0x86,0xfd, -0x06,0x2b,0x00,0xf8,0x00,0x00,0xf8,0x83,0xfd,0x04,0x2b,0x00,0x00,0x2b,0x83,0xfd, -0x01,0xf8,0x83,0x00,0x04,0xac,0xfd,0xfd,0x81,0x85,0x00,0x85,0xfd,0x01,0x2b,0x83, -0x00,0x03,0xf9,0xfd,0xac,0x84,0x00,0x01,0xac,0x86,0xfd,0x07,0x00,0x2b,0x00,0x00, -0xfd,0xfd,0xf8,0x83,0x00,0x04,0xac,0xfd,0xfd,0x2b,0x83,0x00,0x02,0xfd,0xf9,0x83, -0x00,0x03,0x2b,0xfd,0x56,0x83,0x00,0x03,0x2b,0xfd,0xfd,0xa2,0x2b,0x00,0x01,0xf9, -0x84,0x00,0x84,0xfd,0x02,0xf8,0x00,0x84,0xfd,0x04,0x00,0xfb,0xfd,0xf8,0x83,0x00, -0x05,0xf8,0xfd,0xfe,0xf9,0x00,0x83,0xfd,0x08,0x00,0xf9,0xfe,0xf9,0x00,0xf8,0x00, -0x2b,0x85,0xfd,0x02,0xac,0x2b,0x83,0x00,0x03,0xfd,0xfd,0xf8,0x83,0x00,0x0a,0xf8, -0xfd,0xfe,0xf9,0x00,0x56,0x00,0x00,0xf6,0xac,0x83,0x00,0x05,0xac,0xfd,0xfd,0x00, -0xf7,0x83,0x00,0x0b,0x81,0xfd,0xfd,0x00,0xf9,0xfd,0xfd,0x00,0x2b,0xfd,0x2b,0x83, -0x00,0x03,0xf9,0xfd,0x81,0x83,0x00,0x07,0x81,0xfd,0xfd,0x00,0x2b,0x00,0x00,0x88, -0xfd,0x03,0xfe,0xf9,0x00,0x84,0xfd,0x05,0x2b,0x00,0xfd,0xfd,0xac,0x84,0x00,0x08, -0xac,0xfd,0xfd,0x00,0x2b,0xfd,0xfd,0x81,0x85,0x00,0x87,0xfd,0x09,0x2b,0x00,0x00, -0x2b,0x00,0xf9,0xfd,0xfd,0x2b,0x83,0x00,0x13,0x81,0xfd,0x2b,0x2b,0xfd,0xfd,0x2b, -0x00,0xfb,0xfd,0x81,0x00,0xfd,0x2b,0x00,0xf8,0x00,0x00,0xf8,0x86,0xfd,0x84,0x00, -0x07,0xfd,0xf9,0x00,0x81,0x00,0x00,0x2b,0x83,0xfd,0x01,0xf8,0x83,0x00,0x01,0xac, -0x86,0xfd,0x08,0x00,0xf9,0xfd,0xfd,0x2b,0x00,0xfd,0xac,0x84,0x00,0x0e,0xac,0xfd, -0x00,0xf8,0xfd,0xfd,0x00,0x00,0xfd,0xfd,0xf7,0x2b,0xfd,0xac,0x83,0x00,0x03,0xf8, -0xfd,0xfd,0xa2,0x2b,0x00,0x01,0xf9,0x91,0x07,0xfd,0x04,0xf8,0x00,0x00,0x81,0x83, -0xfd,0x0e,0x00,0x2b,0xac,0xfc,0x2b,0x00,0xfd,0xfd,0x00,0x2b,0xfd,0xfd,0x2b,0x00, -0x86,0xfd,0x1d,0x2b,0x00,0x2b,0xac,0xf7,0x00,0xfd,0xfd,0x2b,0x00,0xac,0xfc,0x00, -0xf8,0xfd,0xf9,0x00,0x81,0xfd,0x00,0xf6,0xfe,0xfc,0x00,0xf7,0xfd,0xf9,0x00,0x00, -0x86,0xfd,0x02,0x00,0x2b,0x83,0xfd,0x06,0x00,0x2b,0xac,0xfc,0x2b,0x00,0x86,0xfd, -0x0e,0x00,0x00,0xf9,0xfe,0xfd,0xf9,0x00,0x81,0xfd,0x00,0xf6,0xfe,0xfa,0x00,0xa2, -0xfb,0xfd,0x03,0xfd,0x2b,0x00,0x83,0xfd,0x10,0x81,0xfa,0xfe,0xfa,0x00,0x81,0xfd, -0x2b,0x00,0xf7,0xfe,0xfd,0xfd,0x00,0xf9,0xfe,0x85,0xfd,0x11,0x00,0xf8,0xfd,0xfd, -0xf8,0x00,0xfd,0xf9,0x00,0xf9,0xfe,0xf9,0x00,0xf9,0xfd,0xf9,0x00,0x83,0xfd,0x07, -0x00,0xf9,0xfd,0xf9,0x00,0x2b,0xac,0x86,0xfd,0x02,0x00,0x00,0xa2,0xfb,0xfd,0x20, -0xf9,0x00,0xf9,0xfe,0xf9,0x00,0xf9,0xfd,0xf9,0x00,0x00,0xfb,0x81,0x00,0x00,0xf9, -0xac,0x00,0xf7,0xfd,0xfd,0x00,0x00,0xf9,0xfe,0xf6,0x00,0xfd,0xfd,0x00,0xf9,0xfe, -0xe2,0xfd,0x00,0x2b,0xfd,0x0d,0xfd,0xac,0x00,0xf8,0xfd,0xf7,0x00,0xfd,0xfd,0x00, -0x00,0xf9,0xfe,0x89,0xfd,0x01,0xf9,0x87,0x00,0x14,0xfd,0xfd,0x00,0x2b,0xac,0xfc, -0x2b,0x00,0xfd,0xfd,0x00,0x2b,0xfd,0xac,0x00,0xf7,0xfd,0xf9,0x00,0x00,0x86,0xfd, -0x20,0x2b,0x00,0xfb,0xac,0xf6,0x00,0xfa,0xfd,0xf6,0x00,0xfb,0xfd,0xf7,0x00,0xac, -0xf8,0x00,0xfd,0xfd,0x00,0x00,0xf8,0xfd,0xf7,0x2b,0xfd,0x2b,0x00,0x2b,0xac,0xf7, -0x00,0x86,0xfd,0x02,0xf9,0x00,0x83,0xfd,0x0f,0xf9,0x00,0x00,0xfb,0x81,0x00,0x81, -0xfd,0xf9,0x00,0x81,0xfd,0x00,0xf6,0xfe,0x85,0xfd,0x24,0x00,0xf9,0xfe,0x2b,0x00, -0xf9,0xfd,0x00,0x2b,0xac,0xfc,0x2b,0x00,0xfd,0x2b,0x2b,0xfd,0xfb,0x00,0x00,0xfb, -0xfd,0x00,0x56,0xfd,0x00,0x2b,0xfd,0x81,0x00,0xfb,0xfd,0x2b,0x00,0xf7,0xfe,0x93, -0x07,0xfd,0x02,0x00,0x2b,0x83,0xfd,0x0f,0xfb,0x00,0xfb,0xfd,0xfd,0xfb,0x00,0x81, -0xfd,0x00,0x2b,0xfd,0xfd,0x2b,0x00,0x86,0xfd,0x1d,0x2b,0x00,0xfd,0xfd,0xf9,0x00, -0xfd,0xfd,0x00,0x2b,0xf9,0xf9,0x2b,0x00,0xfe,0x2b,0x00,0xf9,0xf9,0xf7,0x00,0xfd, -0xf8,0x00,0xfd,0xfd,0xfe,0x2b,0x00,0x86,0xfd,0x0c,0x00,0x2b,0xfd,0xfd,0xfb,0x00, -0xfb,0xfd,0xfd,0xfb,0x00,0x81,0x85,0xfd,0x02,0x00,0x2b,0x83,0xfd,0x0b,0x2b,0x00, -0xf9,0xf9,0xf7,0x00,0xfd,0xfb,0x00,0x2b,0xfb,0x83,0xfd,0x02,0x2b,0x00,0x87,0xfd, -0x05,0x00,0xf9,0xfd,0x2b,0x00,0x84,0xfd,0x02,0x00,0xf9,0x86,0xfd,0x09,0x2b,0x00, -0xfd,0xfd,0x00,0x56,0xfd,0x2b,0x2b,0x83,0xfd,0x05,0x2b,0x00,0xfd,0xf9,0x00,0x83, -0xfd,0x06,0x00,0xf9,0xfd,0xf9,0x00,0xac,0x86,0xfd,0x03,0x81,0x00,0xfb,0x84,0xfd, -0x02,0x2b,0x2b,0x83,0xfd,0x1a,0x2b,0x00,0xfd,0xf9,0x00,0xac,0xfd,0xfd,0x00,0x56, -0xfd,0xfd,0x00,0x2b,0xfd,0xfd,0x00,0xf9,0xfd,0xfd,0xac,0x00,0xfa,0xfe,0x00,0xf9, -0xe2,0xfd,0xfd,0x00,0x2b,0x0c,0xfd,0xfd,0xf8,0x00,0xfa,0xf9,0xf9,0x00,0x81,0xfd, -0x00,0x2b,0x8b,0xfd,0x1c,0xfa,0x00,0xf9,0xfa,0xf9,0xf9,0x2b,0x00,0xfd,0xfb,0x00, -0xfb,0xfd,0xfd,0xfb,0x00,0x81,0xfd,0x00,0x2b,0xfd,0xf8,0x00,0xfd,0xfd,0xfe,0x2b, -0x00,0x86,0xfd,0x0a,0x00,0xf8,0xfd,0xfd,0xfe,0x00,0xf9,0xfd,0x00,0xf8,0x83,0xfd, -0x13,0x00,0xf7,0xfb,0x00,0xfb,0x81,0x00,0x2b,0x2b,0xfd,0x00,0x56,0xfd,0x2b,0x00, -0xfd,0xfd,0xf9,0x00,0x85,0xfd,0x14,0xfe,0xf9,0x00,0xfd,0xfd,0xfe,0xf9,0x00,0xac, -0xfd,0xfd,0x00,0xf9,0xfd,0x2b,0x00,0xf9,0xf9,0xf7,0x00,0x86,0xfd,0x84,0x00,0x1e, -0x56,0xfe,0xfb,0x00,0xfb,0xfd,0xfd,0xfb,0x00,0x81,0xf8,0x00,0xfd,0xf7,0x2b,0x00, -0xf8,0xfd,0x00,0xac,0xfb,0x00,0x56,0xf9,0xf9,0x00,0xf7,0xfd,0x2b,0x00,0x95,0x07, -0xfd,0x02,0x00,0x2b,0x83,0xfd,0x02,0xf9,0x00,0x84,0xfd,0x09,0x00,0xf9,0xfd,0x00, -0x2b,0xfd,0xfd,0x2b,0x00,0x86,0xfd,0x08,0x2b,0x00,0xfd,0xfd,0xf9,0x00,0xfd,0xfd, -0x85,0x00,0x02,0x2b,0xfd,0x86,0x00,0x03,0xfd,0x2b,0x00,0x83,0xfd,0x02,0x2b,0x00, -0x86,0xfd,0x06,0x00,0x2b,0xfd,0xfd,0xf9,0x00,0x84,0xfd,0x02,0x00,0xf9,0x85,0xfd, -0x02,0x00,0x2b,0x83,0xfd,0x86,0x00,0x0a,0xfd,0xfd,0xf9,0x00,0x00,0x2b,0xfd,0xfd, -0x2b,0x00,0x83,0xfd,0x01,0xfb,0x84,0x00,0x04,0xf9,0xfd,0x2b,0x00,0x84,0xfd,0x02, -0x00,0xf9,0x86,0xfd,0x09,0x81,0x00,0xfb,0x81,0x00,0xfd,0xfd,0x00,0x2b,0x83,0xfd, -0x05,0x2b,0x00,0xfd,0xfa,0x00,0x83,0xfd,0x05,0x00,0xfa,0xfd,0xf9,0x00,0x87,0xfd, -0x02,0xf9,0x00,0x85,0xfd,0x02,0x00,0x2b,0x83,0xfd,0x05,0x2b,0x00,0xfd,0xf9,0x00, -0x83,0xfd,0x0d,0x00,0xf9,0xfe,0xfd,0x00,0x2b,0xfd,0xfd,0x00,0xf9,0xfe,0xfd,0xfd, -0xc2,0x00,0xf9,0xfd,0xe2,0xfd,0x00,0x2b,0xfd,0x02,0xfd,0x2b,0x85,0x00,0x04,0xfa, -0xfe,0x00,0x2b,0x8b,0xfd,0x0b,0xf9,0x00,0xfd,0xfd,0xfe,0xfd,0x2b,0x00,0xfd,0xf9, -0x00,0x84,0xfd,0x0e,0x00,0xf9,0xfd,0x00,0x2b,0xfd,0xf6,0x00,0xfe,0xfd,0xfd,0xf6, -0x00,0xfe,0x85,0xfd,0x02,0x00,0xf9,0x83,0xfd,0xc2,0x00,0xf9,0xfe,0x15,0xfd,0xfd, -0x00,0x2b,0xfd,0x00,0xf8,0xf6,0x2b,0xf8,0x00,0xac,0x00,0xfd,0xfd,0x2b,0x00,0xfd, -0xfd,0xf9,0x00,0x86,0xfd,0x02,0xf9,0x00,0x83,0xfd,0x02,0xf9,0x00,0x83,0xfd,0x03, -0x00,0xf9,0xfe,0x86,0x00,0x86,0xfd,0x03,0x00,0xf9,0xfe,0x83,0xfd,0x02,0xf9,0x00, -0x84,0xfd,0x02,0x00,0xf9,0xa2,0xfb,0x00,0x07,0x56,0x2b,0x2b,0x56,0x00,0xfd,0xf9, -0x85,0x00,0x04,0xf8,0xfd,0x2b,0x00,0x95,0x07,0xfd,0x02,0x00,0x2b,0x83,0xfd,0x0f, -0xfb,0x00,0xfb,0xfd,0xfd,0xfb,0x00,0xfb,0xfd,0x00,0x2b,0xfd,0xfd,0x2b,0x00,0x86, -0xfd,0x0c,0x2b,0x00,0xfd,0xfd,0xfa,0x00,0xfd,0xfd,0x00,0xf7,0xfd,0xfe,0x83,0xfd, -0x04,0x2b,0x00,0xfd,0xfe,0x83,0xfd,0x02,0xf7,0x00,0x83,0xfd,0x02,0x2b,0x00,0x86, -0xfd,0x0c,0x00,0x2b,0xfd,0xfd,0xfb,0x00,0xfb,0xfd,0xfd,0xfb,0x00,0xfb,0x85,0xfd, -0x02,0x00,0x2b,0x83,0xfd,0x05,0x2b,0x00,0xfd,0xfd,0xfe,0x84,0xfd,0x07,0xfe,0xf9, -0x00,0xf7,0xfd,0x2b,0x00,0x83,0xfd,0x09,0x00,0xf7,0xfd,0xfd,0x00,0xf9,0xfd,0x2b, -0x00,0x84,0xfd,0x02,0x00,0xf9,0x87,0xfd,0x08,0x00,0x2b,0x2b,0xf7,0xfd,0xfd,0x2b, -0x2b,0x83,0xfd,0x0d,0x2b,0x2b,0xfd,0xf9,0x00,0xfd,0xfd,0xac,0x00,0xf9,0xfd,0xfa, -0x00,0x87,0xfd,0x03,0xfb,0x00,0xfb,0x83,0xfd,0x0b,0xfe,0x2b,0xf6,0xac,0xac,0xfc, -0x2b,0xf6,0xfc,0x56,0x00,0x83,0xfb,0x12,0x00,0xf8,0xfb,0xfb,0x00,0x2b,0xfb,0xfc, -0x00,0x56,0xfb,0xfc,0x81,0x00,0x81,0xac,0x00,0xf9,0xe2,0xfd,0xfd,0x00,0x2b,0x07, -0xfd,0xfd,0xf8,0x00,0xfd,0xfd,0xfe,0x83,0xfd,0x02,0x00,0x2b,0x8b,0xfd,0x02,0xf9, -0x00,0x84,0xfd,0x11,0x2b,0x00,0xfd,0xfb,0x00,0xfb,0xfd,0xfd,0xfb,0x00,0xfb,0xfe, -0x00,0xf6,0xfe,0xf7,0x00,0x83,0xfd,0x02,0x2b,0x00,0x86,0xfd,0x02,0x00,0x56,0x83, -0xfd,0x05,0x00,0xf9,0xfd,0x00,0xf8,0x83,0xfd,0x13,0x00,0xf8,0xfd,0x2b,0x2b,0x00, -0x81,0xfb,0x00,0xf6,0x00,0xfd,0xfd,0x2b,0x00,0xfd,0xfd,0xfa,0x00,0x86,0xfd,0x02, -0xf9,0x00,0x83,0xfd,0x02,0xf9,0x00,0x83,0xfd,0x07,0x00,0xf9,0xfd,0x2b,0x00,0xfd, -0xfe,0x88,0xfd,0x02,0x00,0xf9,0x84,0xfd,0x17,0xfb,0x00,0xfb,0xfd,0xfd,0xfb,0x00, -0xfb,0xfd,0x00,0xf7,0x00,0xfe,0xf8,0x00,0x2b,0xf7,0xfd,0xfb,0x00,0x81,0xfd,0xfe, -0x83,0xfd,0x02,0x2b,0x00,0x95,0x07,0xfd,0x02,0x00,0x2b,0x84,0xfd,0x0e,0x00,0x2b, -0xac,0xfc,0x2b,0x00,0xfd,0xfd,0x00,0x00,0xfd,0xf7,0x00,0x00,0x86,0xfd,0x1d,0x2b, -0x00,0xfd,0xfd,0xf9,0x00,0xfd,0xfd,0xf7,0x00,0xf9,0xfd,0xac,0xf9,0xfd,0x81,0x00, -0xf7,0xfd,0xfd,0x56,0xfd,0xfb,0x00,0x56,0xfd,0xf8,0x00,0x00,0x86,0xfd,0x02,0x00, -0x00,0x83,0xfd,0x06,0x00,0x2b,0xac,0xfc,0x2b,0x00,0x86,0xfd,0x02,0x00,0x2b,0x83, -0xfd,0x1c,0x81,0x00,0xf7,0xfd,0xfd,0x56,0xfd,0xfb,0xf9,0xfd,0xfb,0x00,0xf8,0xfd, -0xf7,0x00,0xac,0xfd,0xfd,0x00,0xf7,0xfd,0xf8,0x00,0x56,0xfe,0xf6,0x00,0x84,0xfd, -0x02,0x00,0xf7,0x87,0xfd,0x18,0xf8,0x00,0x00,0xac,0xfd,0xfd,0xf9,0x00,0xf9,0xfd, -0xf9,0x00,0xf9,0xfe,0xfa,0x00,0x81,0xfb,0x00,0x00,0xf9,0xfd,0xf9,0x00,0x88,0xfd, -0x10,0x00,0x00,0x81,0xfc,0xf7,0xfb,0x56,0x00,0xf8,0xfb,0x56,0x00,0x56,0xfb,0xf8, -0x00,0x83,0xfb,0x26,0x00,0x56,0xfb,0xfb,0x00,0x2b,0xfb,0xfb,0x00,0x00,0xfa,0x81, -0x00,0x00,0xfb,0xfb,0x00,0x2b,0xfb,0xf6,0x00,0x2b,0xfd,0xfd,0x00,0x00,0xfd,0xfd, -0xac,0x00,0x2b,0xfc,0xfd,0xfa,0xfb,0xfd,0x00,0x2b,0x83,0xfd,0x02,0xf9,0x2b,0x86, -0xfd,0x02,0xf9,0x00,0x84,0xfd,0x16,0x2b,0x00,0xfd,0xfd,0x00,0x2b,0xac,0xfc,0x2b, -0x00,0xfd,0xfd,0x00,0x2b,0xfd,0xfb,0x00,0x56,0xfd,0xf8,0x00,0x00,0x86,0xfd,0x20, -0x2b,0x00,0xac,0xfb,0xf6,0x00,0xfa,0xfd,0x2b,0x00,0xfb,0xfd,0xf7,0x00,0xac,0xfd, -0xf8,0x00,0x00,0xfd,0xfd,0x00,0x00,0xf8,0xfe,0xfd,0x2b,0x00,0xfd,0xfd,0xf9,0x00, -0x86,0xfd,0x07,0x81,0x00,0x81,0xfd,0xfd,0xf9,0x00,0x83,0xfd,0x09,0x00,0xf9,0xfd, -0x81,0x00,0xf7,0xfd,0xfd,0x56,0x86,0xfd,0x02,0x00,0xf9,0x85,0xfd,0x1b,0x00,0x2b, -0xfc,0xac,0xf6,0x00,0xfe,0xfd,0x2b,0x00,0x2b,0xfd,0xfb,0x00,0x00,0xfb,0xfd,0xfd, -0x00,0x00,0xfb,0xfd,0xfb,0xfa,0xfe,0xf6,0x00,0x95,0x07,0xfd,0x02,0x00,0x2b,0x84, -0xfd,0x01,0xac,0x84,0x00,0x09,0xac,0xfd,0xfd,0x81,0x00,0x00,0x2b,0x2b,0x00,0x86, -0xfd,0x0a,0x2b,0x00,0xfd,0xfd,0xf9,0x00,0xfd,0xfd,0xfe,0xf7,0x83,0x00,0x04,0x2b, -0xfd,0xfd,0xf9,0x84,0x00,0x08,0xfd,0xfd,0xf8,0x00,0x00,0x2b,0x2b,0x00,0x86,0xfd, -0x06,0x81,0x00,0x00,0xf9,0xfd,0xac,0x84,0x00,0x01,0xac,0x86,0xfd,0x02,0x00,0x2b, -0x84,0xfd,0x01,0xf9,0x84,0x00,0x02,0xfd,0xfb,0x83,0x00,0x11,0x2b,0xfd,0xfd,0xac, -0x00,0x00,0x2b,0xfd,0xf8,0x00,0x00,0x2b,0x2b,0x00,0xfa,0x2b,0x00,0x84,0xfd,0x03, -0xf8,0x00,0x00,0x86,0xfd,0x03,0xac,0x00,0x2b,0x83,0xfd,0x02,0xfe,0xf8,0x83,0x00, -0x01,0xf8,0x83,0xfd,0x09,0x2b,0x00,0x00,0x81,0x00,0xf9,0xfe,0xf9,0x00,0x84,0xfd, -0x05,0xac,0xfc,0xfc,0xfb,0x81,0x84,0x00,0x03,0xfb,0xfb,0xf7,0x83,0x00,0x05,0xf7, -0xfb,0xfb,0x56,0x00,0x83,0xfb,0x02,0x00,0xf8,0xe2,0xfb,0xfb,0x00,0x2b,0x83,0x00, -0x11,0x81,0xfb,0xfb,0xf7,0x00,0x00,0xf7,0x00,0x2b,0xfb,0xfc,0xfa,0x00,0x00,0xf9, -0xfd,0xfb,0x84,0x00,0x04,0xfb,0xfd,0x00,0x2b,0x83,0xfd,0x02,0x2b,0x00,0x85,0xfd, -0x03,0xfe,0xf9,0x00,0x84,0xfd,0x05,0x2b,0x00,0xfd,0xfd,0xac,0x84,0x00,0x0d,0xac, -0xfd,0xfd,0x00,0x2b,0xfd,0xfd,0xf8,0x00,0x00,0x2b,0x2b,0x00,0x86,0xfd,0x01,0xac, -0x83,0x00,0x06,0xf9,0x00,0xf9,0xfd,0xfd,0x2b,0x83,0x00,0x13,0xfa,0xfd,0xfd,0xfb, -0x00,0x2b,0xfd,0xfe,0x2b,0x00,0xfb,0xfd,0xfd,0x2b,0x00,0xfd,0xfd,0xf9,0x00,0x87, -0xfd,0x06,0x2b,0x00,0x00,0xfd,0xfa,0x00,0x83,0xfd,0x05,0x00,0xfa,0xfd,0xfd,0xf9, -0x84,0x00,0x86,0xfd,0x02,0x00,0xfa,0x85,0xfd,0x01,0xac,0x84,0x00,0x0a,0xac,0xfd, -0xfd,0xf8,0x00,0x56,0xfd,0xfd,0x00,0x00,0x83,0xfd,0x02,0xac,0x2b,0x83,0x00,0x04, -0x56,0xfd,0x2b,0x00,0x93,0x09,0xfd,0x01,0xfe,0x9a,0x04,0xfd,0xc2,0xfe,0xfd,0xfd, -0x84,0xfd,0x01,0xfe,0x89,0xfd,0x02,0x00,0xf9,0x87,0xfd,0x01,0xfe,0x8d,0xfd,0x03, -0xac,0xac,0xfc,0x99,0x01,0xfb,0x01,0xfc,0x86,0xfb,0x02,0x00,0xf8,0x91,0x01,0xfb, -0x03,0xfc,0xac,0xac,0x92,0x04,0xfd,0x01,0xfe,0x91,0x05,0xfd,0x01,0xfe,0x92,0x0f, -0xfd,0x02,0xf8,0x00,0x93,0x01,0xfd,0x03,0xac,0xfc,0xfc,0x93,0x02,0xfb,0x02,0x00, -0x56,0x94,0x01,0xfb,0x03,0xfc,0xfc,0xac,0x94,0x18,0xfd,0x03,0x56,0xf9,0xfe,0x8f, -0xfd,0x03,0xac,0xfc,0xfc,0x96,0x02,0xfb,0x02,0x2b,0xfa,0x97,0x01,0xfb,0x03,0xfc, -0xfc,0xac,0x91,0x19,0xfd,0x02,0xac,0xfc,0x95,0x04,0xfb,0x02,0xfc,0xac,0x9c,0x18, -0xfd,0x02,0xac,0xfc,0x9b,0x04,0xfb,0x02,0xfc,0xac,0x9e,0x0f,0xfd,0x02,0xfa,0xfb, -0x9e,0x02,0xfd,0x02,0x81,0xf9,0x9b,0x03,0xfd,0x02,0xf9,0xfc,0x9a,0x01,0xfd,0x02, -0xac,0xfc,0x93,0x01,0xfb,0x02,0xf8,0x81,0x9a,0x03,0xfb,0x02,0xfc,0xac,0x8c,0xfd, -0x02,0xac,0xf9,0x92,0x04,0xfd,0x02,0xfb,0x81,0x9a,0x0a,0xfd,0x02,0x00,0x2b,0x9d, -0x02,0xfd,0x83,0x00,0x01,0x2b,0x9a,0x03,0xfd,0x02,0x00,0xf9,0x98,0x01,0xfd,0x02, -0xac,0xfc,0x95,0x01,0xfb,0x02,0x00,0xf8,0x9c,0x03,0xfb,0x02,0xfc,0xac,0x8a,0xfd, -0x02,0xf9,0x00,0x92,0x01,0xfd,0x84,0x00,0x01,0x56,0x9b,0x02,0xfd,0x02,0xf6,0x00, -0x9a,0x0a,0xfd,0x02,0x00,0x2b,0x8e,0xfd,0x02,0xfb,0xfa,0x83,0xfd,0x02,0x81,0xfb, -0x97,0x01,0xfd,0x03,0x81,0x00,0xac,0x9c,0x03,0xfd,0x02,0x00,0xfa,0x96,0x01,0xfd, -0x02,0xac,0xfc,0x97,0x01,0xfb,0x02,0x00,0x56,0x9e,0x03,0xfb,0x02,0xfc,0xac,0x83, -0xfd,0x02,0xac,0xf9,0x83,0xfd,0x02,0xf9,0x00,0x92,0x01,0xfd,0x06,0x00,0xf7,0xf9, -0x2b,0x00,0xf9,0x90,0x01,0xfd,0x02,0xfa,0xfb,0x8f,0xfd,0x02,0xfb,0x81,0x86,0xfd, -0x03,0xfe,0x2b,0x00,0x8e,0xfd,0x02,0xac,0xf9,0x83,0xfd,0x02,0xac,0xf9,0x95,0x09, -0xfd,0x02,0x00,0x2b,0x8e,0xfd,0x02,0x2b,0x00,0x83,0xfd,0x02,0x00,0x2b,0x97,0x01, -0xfd,0x02,0xf9,0x00,0x9d,0x03,0xfd,0x02,0x00,0xf9,0x94,0x01,0xfd,0x02,0xac,0xfc, -0x99,0x01,0xfb,0x03,0x00,0xf8,0xfc,0x9f,0x03,0xfb,0x05,0xfc,0xac,0xfd,0xf9,0x00, -0x83,0xfd,0x02,0xf9,0x00,0x92,0x01,0xfd,0x06,0x00,0xf9,0xfe,0xfd,0x00,0x2b,0x90, -0x01,0xfd,0x02,0x00,0x2b,0x8f,0xfd,0x02,0xf6,0x00,0x87,0xfd,0x02,0x2b,0x00,0x8e, -0xfd,0x02,0xf9,0x00,0x83,0xfd,0x02,0xf9,0x00,0x95,0x09,0xfd,0x10,0x00,0x2b,0x2b, -0x00,0x00,0xf8,0xfd,0xfd,0x2b,0x00,0xfd,0xfd,0xf9,0x00,0xfd,0xf9,0x83,0x00,0x02, -0x2b,0x2b,0x83,0x00,0x03,0xf9,0xfd,0xac,0x84,0x00,0x09,0xfc,0xfd,0xfd,0x00,0x2b, -0x2b,0x00,0x00,0x81,0x85,0xfd,0x01,0xf9,0x84,0x00,0x02,0xfd,0xac,0x84,0x00,0x07, -0xac,0xfd,0xfd,0x00,0x2b,0x00,0x00,0x85,0xfd,0x01,0x81,0x83,0x00,0x04,0xf8,0xfd, -0xfd,0xac,0x83,0x00,0x04,0xf8,0xfd,0x2b,0x00,0x83,0xfd,0x04,0x56,0x00,0xfd,0xac, -0x83,0x00,0x03,0xf8,0xfd,0xfd,0xa2,0x2b,0x00,0x02,0xf9,0xfb,0x84,0x00,0x05,0xac, -0xfd,0xfd,0x00,0xf9,0x85,0xfd,0x05,0xac,0x2b,0x00,0x00,0x2b,0x83,0xfd,0x08,0x2b, -0x00,0x00,0xf6,0xfd,0xac,0xac,0xf7,0x83,0x00,0x03,0x2b,0xfb,0x81,0x84,0x00,0x09, -0x81,0xfb,0xfb,0x00,0xf6,0x2b,0x00,0x00,0x56,0x83,0xfb,0x08,0x2b,0x00,0x00,0x2b, -0x00,0x56,0xfb,0x56,0x83,0x00,0x01,0xf7,0x86,0xfb,0x01,0x81,0x84,0x00,0x07,0x81, -0xfb,0xfb,0x00,0x2b,0x00,0x00,0x85,0xfb,0xa2,0xf8,0x00,0x0b,0x00,0x00,0x81,0xfb, -0xfb,0x00,0x2b,0x00,0x00,0xfb,0x81,0x83,0x00,0x04,0x2b,0xfb,0xfb,0x56,0x83,0x00, -0x04,0xf7,0xfb,0xfb,0x2b,0x83,0x00,0x85,0xfb,0x01,0xfc,0x84,0x00,0x07,0xfe,0xf9, -0x00,0x81,0x00,0x00,0x2b,0x83,0xfd,0x01,0xf8,0x83,0x00,0x01,0xac,0x86,0xfd,0x09, -0x00,0xf9,0xfd,0xfd,0x00,0xf8,0xfd,0xfd,0xac,0x83,0x00,0x04,0xf8,0xfd,0xfd,0x81, -0x83,0x00,0x03,0xf8,0xfd,0x2b,0x83,0x00,0x02,0xf9,0xfd,0x84,0x00,0x08,0xfa,0xfd, -0xfd,0x00,0x2b,0x00,0x00,0xf9,0x83,0x00,0x01,0x2b,0x85,0xfd,0xa2,0x2b,0x00,0x06, -0x00,0x2b,0xfd,0xfd,0xf9,0x00,0x83,0xfd,0x03,0x00,0xf9,0xfd,0x84,0x00,0x01,0xfd, -0x84,0x00,0x03,0xfd,0xfd,0x2b,0x83,0x00,0x09,0xfa,0xfd,0xfd,0x2b,0x00,0xf8,0x00, -0x00,0xf8,0x94,0x08,0xfd,0x12,0x00,0x00,0xf8,0xfd,0x56,0x00,0xfb,0xfd,0x2b,0x00, -0xfd,0xfd,0xf9,0x00,0xfe,0xfd,0x2b,0x00,0x83,0xfd,0x02,0x00,0x2b,0x83,0xfd,0x0e, -0x00,0x2b,0xac,0xfc,0x2b,0x00,0xfd,0xfd,0x00,0x00,0xf7,0xfd,0x00,0x00,0x86,0xfd, -0x02,0xf9,0x00,0x83,0xfd,0x0c,0x00,0x2b,0xac,0xfc,0x2b,0x00,0xfd,0xfd,0x00,0x00, -0xf9,0xfe,0x85,0xfd,0x0a,0x00,0x2b,0xfd,0xfd,0xfb,0xfd,0xfd,0x00,0x2b,0xfd,0xc2, -0x81,0x00,0xfb,0x1c,0xfd,0xfd,0x00,0xf8,0xfd,0x00,0x2b,0xfd,0x81,0x00,0xfb,0xfd, -0x2b,0x00,0xf7,0xfd,0xfd,0xfb,0xf9,0xfe,0xfc,0x00,0xf7,0xfd,0xfd,0x00,0xf9,0xfe, -0x84,0xfd,0x32,0x2b,0x00,0xac,0xfd,0xfb,0xfd,0xfd,0x2b,0x00,0xac,0xfc,0x00,0xf7, -0xfc,0xf8,0x00,0x2b,0xfb,0xfb,0xfa,0xfb,0x00,0x00,0x81,0x81,0x00,0x00,0xfb,0xfb, -0x00,0x00,0x2b,0xfb,0x00,0x00,0xfb,0xfb,0x2b,0x00,0xfa,0x81,0x2b,0x00,0xf8,0xfb, -0x00,0x00,0xfb,0xfb,0xfa,0x86,0xfb,0x0b,0x00,0x00,0x81,0x81,0x00,0x00,0xfb,0xfb, -0x00,0x00,0xf8,0x86,0xfb,0x1d,0x56,0x00,0x00,0xfa,0x81,0x00,0x00,0xfb,0xfb,0x00, -0x00,0xfa,0xfb,0xfb,0x00,0x00,0xfb,0xf9,0x00,0xfa,0xfb,0x00,0x00,0xfb,0xfb,0xfa, -0xfb,0x56,0x00,0xa2,0xfa,0xfb,0x85,0xfb,0x02,0x56,0x00,0x83,0xfd,0x0f,0xf9,0x00, -0x00,0xfb,0x81,0x00,0x81,0xfd,0xf9,0x00,0x81,0xfd,0x00,0xf6,0xfe,0x85,0xfd,0x05, -0x00,0xf7,0x56,0x00,0x00,0x83,0xfd,0x10,0x00,0x2b,0xfd,0x81,0x00,0xfb,0xfd,0x00, -0x2b,0xfd,0xfd,0xfb,0xfd,0xfd,0x00,0x2b,0x83,0xfd,0x0e,0x56,0xac,0xfd,0x00,0x00, -0xfd,0xfd,0x00,0x00,0xf9,0xfe,0xfd,0x2b,0x00,0x87,0xfd,0x0a,0x2b,0x00,0x2b,0xac, -0xfb,0x00,0xf8,0xfd,0xf9,0x00,0x83,0xfd,0x05,0x00,0xf9,0xfe,0xf9,0x00,0x83,0xfd, -0x02,0xf9,0x00,0x83,0xfd,0x0e,0x2b,0x00,0xfb,0xfd,0xf7,0x00,0xac,0xfd,0x2b,0x00, -0x2b,0xac,0xf7,0x00,0x94,0x08,0xfd,0x02,0x00,0x2b,0x83,0xfd,0x0d,0x00,0xf7,0xfd, -0x2b,0x00,0xfd,0xfe,0xf9,0x00,0xfd,0xfd,0x2b,0x00,0x83,0xfd,0x13,0x00,0x2b,0xfd, -0xfd,0xfb,0x00,0xfb,0xfd,0xfd,0xfb,0x00,0x81,0xfd,0x00,0x2b,0xfd,0xfd,0x2b,0x00, -0x86,0xfd,0x0f,0xfa,0x00,0xfd,0xfd,0xfb,0x00,0xfb,0xfd,0xfd,0xfb,0x00,0x81,0xfd, -0x00,0x2b,0x87,0xfd,0x03,0x2b,0x00,0x56,0x83,0xfd,0x0a,0xfb,0x00,0x56,0xf9,0xf9, -0x00,0xf7,0xfd,0x00,0x2b,0xc2,0xfd,0xfb,0x00,0x08,0x56,0xf9,0xf9,0x00,0xf7,0xfd, -0x2b,0x00,0x87,0xfd,0x06,0x00,0x2b,0xfd,0xfd,0x00,0xf9,0x85,0xfd,0x04,0xf8,0x00, -0xf8,0xfc,0x83,0xfd,0x09,0x00,0x2b,0xf9,0xf9,0x2b,0x00,0xfb,0x00,0x00,0x84,0xfb, -0x13,0xfa,0x00,0xfa,0xfb,0xfb,0xfa,0x00,0xf9,0xfb,0x00,0x2b,0xfb,0xfb,0x2b,0x00, -0xfb,0xfb,0x00,0xf7,0x83,0xfb,0x06,0x00,0x56,0xfb,0x00,0x00,0xf8,0x87,0xfb,0x0c, -0xfa,0x00,0xfa,0xfb,0xfb,0xfa,0x00,0xf9,0xfb,0x00,0xf6,0xfc,0x85,0xfb,0x03,0xfc, -0xf8,0x00,0x83,0xfb,0x13,0xf8,0x00,0xfb,0xfb,0x00,0xf7,0xfb,0xfb,0xfa,0x00,0xf7, -0x56,0xf8,0x00,0x2b,0xfb,0x00,0x00,0xf7,0x83,0xfb,0x04,0xfa,0x00,0x2b,0xf9,0x87, -0xfb,0x13,0xf8,0x00,0xfc,0xac,0xfd,0xf9,0x00,0xac,0xfd,0xfd,0x00,0xf9,0xfd,0x2b, -0x00,0xf9,0xf9,0xf7,0x00,0x86,0xfd,0x84,0x00,0x83,0xfd,0x0b,0xfb,0x00,0x56,0xf9, -0xf9,0x00,0xf7,0xfd,0x2b,0x00,0x56,0x84,0xfd,0x02,0x00,0x2b,0x86,0xfd,0x06,0x2b, -0x00,0xfd,0xfd,0x00,0x2b,0x83,0xfd,0x02,0x2b,0x00,0x87,0xfd,0x02,0x2b,0x00,0x83, -0xfd,0x12,0x2b,0x00,0xfd,0xfa,0x00,0xfd,0xfd,0xfe,0x00,0xf9,0xfd,0xf9,0x00,0xfd, -0xfd,0xfe,0xf9,0x00,0x83,0xfd,0x0e,0x00,0xf8,0xfd,0xfd,0xfe,0x00,0x2b,0xfd,0x2b, -0x00,0xfd,0xfd,0xf9,0x00,0x94,0x08,0xfd,0x02,0x00,0x2b,0x83,0xfd,0x0d,0x00,0x2b, -0xfd,0x2b,0x00,0xfd,0xfd,0xf9,0x00,0xfd,0xfd,0x2b,0x00,0x83,0xfd,0x06,0x00,0x2b, -0xfd,0xfd,0xf9,0x00,0x84,0xfd,0x09,0x00,0xf9,0xfd,0x00,0x2b,0xfd,0xfd,0x2b,0x00, -0x86,0xfd,0xe2,0xf9,0x00,0xfd,0xfd,0x07,0xfd,0xfd,0x00,0xf9,0xfd,0x00,0x2b,0x87, -0xfd,0x07,0xac,0xf6,0x00,0x00,0x81,0xfd,0xf9,0x85,0x00,0x09,0xf8,0xfd,0xf7,0x00, -0xfd,0x2b,0x2b,0xfd,0xf9,0x85,0x00,0x04,0xf8,0xfd,0x2b,0x00,0x83,0xfd,0x02,0xac, -0xf6,0x83,0x00,0x05,0x2b,0xfd,0xfd,0x00,0xf9,0x85,0xfd,0x07,0xfe,0xf8,0x00,0x00, -0xf8,0xfd,0xfd,0x85,0x00,0x04,0x2b,0xfb,0x00,0x2b,0x84,0xfb,0x02,0xf8,0x00,0x84, -0xfb,0x0d,0x00,0xf8,0xfb,0x00,0x2b,0xfb,0xfb,0x2b,0x00,0xfb,0xfb,0x00,0xf8,0x83, -0xfb,0x08,0x00,0xf8,0xfc,0x81,0xf6,0x00,0x00,0x56,0x85,0xfb,0x02,0xf8,0x00,0x84, -0xfb,0x05,0x00,0xf8,0xfb,0x00,0x2b,0x87,0xfb,0x02,0x56,0x00,0x83,0xfb,0x09,0xf8, -0x00,0xfb,0xfb,0x00,0x56,0xfb,0xfb,0xf8,0x85,0x00,0x0d,0xf7,0xfb,0x81,0x2b,0x00, -0x00,0x56,0xfb,0xfb,0x56,0x00,0x00,0x2b,0x86,0xfb,0x07,0x56,0x00,0xfb,0xfb,0xac, -0xf9,0x00,0x83,0xfd,0x03,0x00,0xf9,0xfd,0x86,0x00,0x86,0xfd,0x08,0x00,0xf9,0xac, -0x00,0xf7,0xfd,0xfd,0xf9,0x85,0x00,0x0b,0xf8,0xfd,0xac,0xf6,0x00,0x00,0x81,0xfd, -0xfd,0x00,0x2b,0x83,0xfd,0x01,0xf7,0x84,0x00,0x04,0xfd,0xfd,0x00,0x2b,0x83,0xfd, -0x02,0x2b,0x00,0x87,0xfd,0x02,0x2b,0x00,0x83,0xfd,0x05,0x2b,0x00,0xfd,0xf9,0x00, -0x83,0xfd,0x05,0x00,0xf9,0xfd,0xf9,0x00,0x83,0xfd,0x02,0xf9,0x00,0x83,0xfd,0x02, -0x00,0xf9,0x83,0xfd,0x09,0x00,0x2b,0xfd,0x2b,0x00,0xfd,0xfd,0xf9,0x00,0x94,0x08, -0xfd,0x02,0x00,0x2b,0x83,0xfd,0x0d,0x00,0xf8,0xfd,0x2b,0x00,0xfd,0xfd,0x56,0x00, -0xfd,0xfd,0x2b,0x00,0x83,0xfd,0x13,0x00,0x2b,0xfd,0xfd,0xfb,0x00,0xfb,0xfd,0xfd, -0xfb,0x00,0xfb,0xfd,0x00,0x2b,0xfd,0xfd,0x2b,0x00,0x86,0xfd,0x0f,0xf9,0x00,0xfd, -0xfd,0xfb,0x00,0xfb,0xfd,0xfd,0xfb,0x00,0xfb,0xfd,0x00,0x2b,0x88,0xfd,0x09,0xfe, -0xac,0x2b,0x00,0xfd,0xfb,0x00,0xfa,0xfe,0x84,0xfd,0x0b,0xac,0x00,0x56,0x00,0xfb, -0xfd,0xfb,0x00,0x81,0xfd,0xfe,0x83,0xfd,0x02,0x2b,0x00,0x83,0xfd,0x0a,0x2b,0x00, -0xfd,0xfd,0x00,0x2b,0xfd,0xfe,0x00,0xf9,0x88,0xfd,0x06,0xf7,0x00,0x81,0xfd,0x00, -0xf7,0x85,0xfb,0x02,0x00,0x00,0x84,0xfb,0x13,0xfa,0x00,0xfa,0xfb,0xfb,0xf9,0x00, -0xfa,0xfb,0x00,0x2b,0xfb,0xfb,0x2b,0x00,0xfb,0xfb,0x00,0xf8,0x83,0xfb,0x08,0x00, -0xf8,0xfb,0xfb,0xfc,0x81,0x00,0x00,0x85,0xfb,0x0b,0xfa,0x00,0xfa,0xfb,0xfb,0xf9, -0x00,0xfa,0xfb,0x00,0x2b,0x87,0xfb,0x02,0xf8,0x00,0x83,0xfb,0x0b,0xf7,0x00,0xfb, -0xfb,0x00,0xf8,0xfb,0xfb,0xfa,0x00,0xf9,0x87,0xfb,0x03,0x81,0x00,0x00,0x84,0xfb, -0x03,0x56,0x00,0x2b,0x84,0xfb,0x03,0xfc,0xf8,0x00,0x83,0xfb,0x02,0x56,0x00,0x83, -0xfd,0x07,0x00,0xf9,0xfd,0x2b,0x00,0xfd,0xfe,0x88,0xfd,0x0b,0x00,0xfa,0xfd,0xf9, -0x00,0xfb,0xfd,0xfb,0x00,0xfa,0xfe,0x85,0xfd,0x14,0xfe,0xac,0x2b,0x00,0xfd,0xfd, -0x00,0x2b,0xfd,0xfd,0xf8,0x00,0xac,0xfd,0x2b,0x00,0xfd,0xfd,0x00,0x2b,0x83,0xfd, -0x02,0x2b,0x00,0x87,0xfd,0x02,0x2b,0x00,0x83,0xfd,0x0d,0x2b,0x2b,0xfd,0xf9,0x00, -0xfd,0xfd,0xfc,0x00,0xf9,0xfd,0xfa,0x00,0x83,0xfd,0x02,0xfa,0x00,0x83,0xfd,0x02, -0x00,0xf8,0x83,0xfd,0x09,0x00,0xf8,0xfd,0x2b,0x00,0xfd,0xfe,0xf9,0x00,0x94,0x08, -0xfd,0x17,0x00,0x00,0xf9,0xfe,0x2b,0x00,0xac,0xfd,0xf7,0x00,0xac,0xf9,0x00,0x00, -0xfd,0xfd,0xf7,0x00,0xac,0xfd,0xfd,0x00,0x00,0x83,0xfd,0x0e,0x00,0x2b,0xac,0xfc, -0x2b,0x00,0xfd,0xfd,0x00,0x2b,0xfd,0xfd,0x2b,0x00,0x86,0xfd,0x02,0xfa,0x00,0x83, -0xfd,0x0a,0x00,0x2b,0xac,0xfc,0x2b,0x00,0xfd,0xfd,0x00,0x2b,0x87,0xfd,0x0f,0xf9, -0xfb,0xfd,0xf6,0x00,0xfe,0xfd,0x00,0x00,0xfb,0xfd,0xfb,0xfa,0xfd,0xfd,0x83,0x00, -0x83,0xfd,0x09,0x00,0x00,0xfb,0xfd,0xfb,0x81,0xfd,0x2b,0x00,0x83,0xfd,0x0a,0x00, -0x00,0xfd,0xf9,0x00,0x2b,0xfd,0xfd,0x00,0xfa,0x85,0xfd,0x32,0xfa,0x81,0xfd,0xf9, -0x00,0xfb,0xac,0xf6,0x00,0x56,0xfb,0x81,0xf8,0xfb,0x56,0x00,0x2b,0xfb,0xfa,0x56, -0xfb,0x00,0x00,0x81,0x81,0x00,0x00,0xfb,0xfb,0x00,0x2b,0xfb,0xfb,0x2b,0x00,0xfb, -0xfb,0x00,0x00,0x81,0xfa,0x00,0x00,0x56,0xfb,0xf8,0xf9,0xfb,0x2b,0x00,0x86,0xfb, -0x0a,0x00,0x00,0x81,0x81,0x00,0x00,0xfb,0xfb,0x00,0x2b,0x87,0xfb,0x0b,0x56,0x00, -0x2b,0x81,0xf9,0x00,0x2b,0xfb,0xfb,0x00,0x56,0x83,0xfb,0x13,0x00,0x00,0xfa,0xfb, -0xfa,0x56,0xfb,0x56,0xfa,0xfb,0x2b,0x00,0xfb,0xfa,0xf8,0xfb,0xfa,0x00,0xf7,0x85, -0xfb,0x13,0xf9,0x00,0xf9,0xfb,0xfb,0x56,0x00,0xac,0xfd,0xfd,0x00,0xfa,0xfd,0x81, -0x00,0xf7,0xfd,0xfd,0x56,0x86,0xfd,0x24,0x00,0xf9,0xfd,0xfe,0xf6,0x00,0xfe,0xfd, -0x00,0x00,0xfb,0xfd,0xfb,0xfa,0xfd,0xf9,0xfb,0xfd,0x2b,0x00,0xfd,0xfd,0x00,0x00, -0xfd,0xfd,0xf7,0x00,0xfc,0xfb,0x00,0x00,0xfd,0xfd,0x00,0x2b,0x83,0xfd,0x03,0xf7, -0x00,0xac,0x86,0xfd,0x23,0x2b,0x00,0xf8,0xfd,0xf9,0x00,0xf9,0xfe,0xfa,0x00,0x81, -0xfb,0x00,0x00,0xfa,0xfd,0xfa,0x00,0x81,0xfd,0xfd,0xfa,0x00,0x81,0xfd,0xfd,0x2b, -0x00,0xfb,0xfd,0xf7,0x00,0xac,0xfd,0x2b,0xe2,0x00,0xfd,0xfd,0xf9,0x01,0x2b,0x90, -0x08,0xfd,0x02,0x00,0x2b,0x83,0x00,0x04,0x81,0xfd,0xfd,0xac,0x83,0x00,0x0f,0x56, -0x00,0xfd,0xfd,0xac,0x00,0x00,0xf6,0xfd,0x81,0x00,0x00,0xf9,0xfd,0xac,0x84,0x00, -0x09,0xac,0xfd,0xfd,0x00,0x2b,0xfd,0xfd,0x2b,0x00,0x86,0xfd,0x02,0xf9,0x00,0x83, -0xfd,0x01,0xac,0x84,0x00,0x05,0xac,0xfd,0xfd,0x00,0x2b,0x87,0xfd,0x01,0x2b,0x83, -0x00,0x05,0xfa,0xfd,0xfd,0xac,0x2b,0x83,0x00,0x06,0x56,0xfd,0xfd,0x56,0x00,0xf9, -0x83,0xfd,0x02,0xac,0x2b,0x83,0x00,0x04,0x56,0xfd,0x2b,0x00,0x83,0xfd,0x0a,0x81, -0x00,0x00,0x2b,0xf8,0x00,0xf7,0xfd,0x00,0xf9,0x85,0xfd,0x01,0x56,0x83,0x00,0x05, -0xf8,0xfc,0xfb,0xfc,0x2b,0x83,0x00,0x04,0x2b,0xfb,0xfb,0xf8,0x83,0x00,0x03,0xf8, -0xfb,0x81,0x84,0x00,0x0c,0x81,0xfb,0xfb,0x00,0x2b,0xfb,0xfb,0x2b,0x00,0xfb,0xfb, -0x81,0x83,0x00,0x05,0xf8,0x00,0xf8,0xfb,0x2b,0x83,0x00,0x01,0xf9,0x86,0xfb,0x01, -0x81,0x84,0x00,0x05,0x81,0xfb,0xfb,0x00,0x2b,0x87,0xfb,0x0b,0xf8,0x00,0x2b,0x00, -0x00,0x2b,0xfb,0xfb,0xfc,0x00,0xf8,0x83,0xfb,0x02,0x81,0x2b,0x83,0x00,0x03,0xf8, -0xfb,0x2b,0x83,0x00,0x03,0x56,0xfb,0xfa,0x83,0x00,0x01,0x2b,0x87,0xfb,0x0e,0x2b, -0x00,0x00,0xfb,0xf8,0x00,0xfb,0xfc,0xfd,0x00,0xf9,0xfd,0xfd,0xf9,0x84,0x00,0x86, -0xfd,0x0a,0x00,0xf9,0xfd,0xfd,0xac,0x00,0x2b,0xfd,0xac,0x2b,0x83,0x00,0x03,0x56, -0xfd,0x2b,0x83,0x00,0x08,0x81,0xfd,0xfd,0x81,0x00,0x00,0xf9,0xac,0x83,0x00,0x06, -0x56,0x00,0x00,0xfd,0x00,0x2b,0x83,0xfd,0x04,0xac,0x00,0x00,0xf6,0x85,0xfd,0xa2, -0x2b,0x00,0x18,0x00,0xf8,0xfe,0xac,0xfe,0x2b,0x00,0x00,0x81,0x00,0xf9,0xfd,0xfd, -0x2b,0x00,0x00,0xfd,0xfd,0x2b,0x00,0x00,0xfd,0xfd,0x2b,0x83,0x00,0x0d,0x81,0xfd, -0xfd,0x2b,0x00,0xfd,0xfd,0xf9,0x00,0xfd,0xfd,0x2b,0x00,0x93,0x09,0xfd,0x01,0xfe, -0x93,0x03,0xfd,0x01,0xfe,0x88,0xfd,0x01,0xfe,0x84,0xfd,0x01,0xfe,0x92,0x01,0xfd, -0x01,0xfe,0x8e,0xfd,0x02,0xac,0xfc,0x96,0x02,0xfb,0x01,0xfc,0x9d,0x01,0xfb,0x02, -0x56,0x00,0x9d,0x02,0xfb,0x02,0xfc,0xac,0x8f,0xfd,0x01,0xfe,0x84,0xfd,0x01,0xfe, -0x86,0xfd,0x01,0xfe,0x9c,0x01,0xfd,0x01,0xfe,0x9b,0x02,0xfd,0x01,0xfe,0x9b,0x0f, -0xfd,0x02,0xac,0xfc,0x94,0x04,0xfb,0x03,0xfc,0xf8,0x00,0x9e,0x02,0xfb,0x02,0xfc, -0xac,0x9e,0x15,0xfd,0x01,0xac,0x97,0x04,0xfb,0x02,0xfa,0x2b,0x90,0x03,0xfb,0x01, -0xac,0x9c,0x15,0xfd,0x01,0xfc,0x9b,0x07,0xfb,0x01,0xfc,0x9a,0x15,0xfd,0x01,0xfc, -0x9d,0x07,0xfb,0x01,0xfc,0x98,0x15,0xfd,0x01,0xfc,0x9d,0x03,0xfb,0x01,0xac,0x84, -0xfd,0x01,0xac,0x9c,0x03,0xfb,0x01,0xfc,0x95,0x15,0xfd,0x02,0xac,0xfc,0x9d,0x03, -0xfb,0x88,0xfd,0x9c,0x03,0xfb,0x02,0xfc,0xac,0x92,0x15,0xfd,0x02,0xac,0xfc,0x9d, -0x03,0xfb,0x8a,0xfd,0x9c,0x03,0xfb,0x02,0xfc,0xac,0x90,0x15,0xfd,0x02,0xac,0xfc, -0x9d,0x03,0xfb,0x01,0xfc,0x8a,0xfd,0x01,0xfc,0x9c,0x03,0xfb,0x02,0xfc,0xac,0x9f, -0x14,0xfd,0x01,0xfc,0x9e,0x03,0xfb,0x8c,0xfd,0x9d,0x03,0xfb,0x01,0xfc,0x9e,0x14, -0xfd,0x01,0xfc,0x9f,0x03,0xfb,0x8c,0xfd,0x9e,0x03,0xfb,0x01,0xfc,0x9c,0x14,0xfd, -0x01,0xfc,0x90,0x04,0xfb,0x8c,0xfd,0x9f,0x03,0xfb,0x01,0xfc,0x9a,0x14,0xfd,0x01, -0xfc,0x91,0x04,0xfb,0x8c,0xfd,0x90,0x04,0xfb,0x01,0xfc,0x98,0x14,0xfd,0x01,0xac, -0x92,0x04,0xfb,0x8c,0xfd,0x91,0x04,0xfb,0x01,0xac,0x96,0x14,0xfd,0x01,0xac,0x93, -0x04,0xfb,0x8c,0xfd,0x92,0x04,0xfb,0x01,0xac,0x94,0x14,0xfd,0x01,0xac,0x94,0x04, -0xfb,0x8c,0xfd,0x93,0x04,0xfb,0x01,0xac,0x93,0x14,0xfd,0x01,0xfc,0x94,0x04,0xfb, -0x8c,0xfd,0x93,0x04,0xfb,0x01,0xfc,0x92,0x14,0xfd,0x01,0xfc,0x95,0x04,0xfb,0x8c, -0xfd,0x94,0x04,0xfb,0x01,0xfc,0x90,0x14,0xfd,0x01,0xac,0x96,0x04,0xfb,0x8c,0xfd, -0x95,0x04,0xfb,0x01,0xac,0x9e,0x13,0xfd,0x01,0xac,0x97,0x04,0xfb,0x8c,0xfd,0x96, -0x04,0xfb,0x01,0xac,0x92,0x0f,0xfd,0x07,0xac,0xf9,0xfd,0xfd,0xfb,0xfa,0xfe,0x95, -0x02,0xfd,0x02,0xfa,0xfb,0x85,0xfd,0x02,0xf9,0xac,0x96,0x01,0xfd,0x01,0xfc,0x97, -0x04,0xfb,0x88,0xfd,0x04,0xac,0xf9,0xfd,0xfd,0x96,0x04,0xfb,0x01,0xfc,0x93,0x03, -0xfd,0x03,0xfb,0xfa,0xfe,0x98,0x02,0xfd,0x02,0xf9,0xac,0x96,0x07,0xfd,0x02,0xf7, -0x2b,0x85,0xfd,0x02,0x00,0xac,0x90,0x01,0xfd,0x09,0x00,0x81,0xfd,0xf9,0x00,0xfd, -0xfd,0x2b,0x00,0x96,0x02,0xfd,0x02,0x00,0x2b,0x84,0xfd,0x02,0x2b,0x2b,0x96,0x01, -0xfd,0x01,0xac,0x98,0x04,0xfb,0x88,0xfd,0x06,0xf9,0x00,0xfd,0xfd,0xf7,0x2b,0x93, -0x03,0xfb,0x03,0x2b,0x00,0x81,0x84,0xfb,0x02,0x2b,0x00,0x89,0xfb,0x02,0xf8,0xf6, -0x91,0x03,0xfd,0x02,0x2b,0x00,0x99,0x02,0xfd,0x02,0x00,0xf9,0x96,0x07,0xfd,0x03, -0x56,0x00,0xac,0x83,0xfd,0x02,0xf9,0x00,0x91,0x01,0xfd,0x09,0x2b,0xfb,0xfd,0xf9, -0x00,0xfd,0xfd,0x2b,0x00,0x96,0x02,0xfd,0x02,0x00,0x2b,0x83,0xfd,0x02,0xfa,0x2b, -0x96,0x01,0xfd,0x01,0xac,0x9a,0x02,0xfb,0x02,0xfa,0x56,0x9d,0x01,0xfb,0x88,0xfd, -0x06,0xf9,0x00,0xfd,0xfd,0xf8,0xf7,0x91,0x01,0xfb,0x02,0xfa,0x56,0x90,0x02,0xfb, -0x03,0x00,0x00,0x2b,0x83,0xfb,0x03,0xfa,0x00,0x00,0x89,0xfb,0x02,0xf8,0xf8,0x8a, -0xfd,0x02,0xfb,0xfa,0x95,0x02,0xfd,0x02,0x2b,0x00,0x8d,0xfd,0x03,0xfb,0xfa,0xfe, -0x99,0x01,0xfd,0x02,0x00,0xf9,0x96,0x07,0xfd,0x03,0xfe,0x00,0xf7,0x83,0xfd,0x02, -0x2b,0xf7,0x94,0x01,0xfd,0x06,0xf9,0x00,0xfd,0xfd,0x2b,0x00,0x96,0x02,0xfd,0x02, -0x00,0x2b,0x83,0xfd,0x01,0xfe,0x97,0x01,0xfd,0x01,0xfc,0x9a,0x02,0xfb,0x02,0x2b, -0x00,0x9d,0x01,0xfb,0x87,0xfd,0x05,0xfe,0xf9,0x00,0xfd,0xfd,0x93,0x01,0xfb,0x02, -0x2b,0x00,0x90,0x02,0xfb,0x83,0x00,0x06,0xfb,0xfb,0xfc,0xf6,0x00,0x00,0x89,0xfb, -0x02,0xfc,0xfc,0x8a,0xfd,0x02,0x2b,0x00,0x95,0x02,0xfd,0x02,0x2b,0x00,0x8d,0xfd, -0x02,0x2b,0x00,0x9a,0x01,0xfd,0x02,0x00,0xf9,0x97,0x07,0xfd,0x02,0x2b,0x00,0x83, -0xfd,0x05,0x00,0xfb,0xfd,0xfd,0xf8,0x83,0x00,0x17,0xac,0xfd,0xfd,0x00,0xf9,0xfd, -0xfd,0x00,0x2b,0xfd,0xfd,0x00,0xf9,0xfd,0xfa,0x00,0xfd,0xfd,0x2b,0x00,0xfd,0xfd, -0xfc,0x83,0x00,0x03,0xf8,0xfd,0xfd,0x86,0x00,0x86,0xfd,0x0a,0x00,0x2b,0x00,0x00, -0xfd,0xfd,0x2b,0x00,0x00,0x2b,0x83,0xfd,0x09,0xf8,0x00,0x00,0x2b,0x00,0x2b,0xfd, -0xfd,0xfa,0x83,0x00,0x10,0xfa,0xfd,0xfd,0x00,0x2b,0x2b,0x00,0x00,0xfa,0xf8,0x00, -0x00,0xf8,0xfd,0xfd,0x2b,0x83,0x00,0x0f,0xf8,0xac,0xfb,0xf6,0x00,0x2b,0x00,0xf8, -0x56,0x00,0xf7,0x00,0x2b,0xfb,0x56,0x83,0x00,0x07,0x56,0xfb,0xfb,0x00,0x2b,0x00, -0x00,0x84,0xfb,0x0a,0xf7,0x00,0x81,0xfb,0xfb,0x81,0x00,0x81,0xfb,0x2b,0x83,0x00, -0x03,0x56,0xfb,0x56,0x83,0x00,0x09,0x2b,0xfb,0x00,0x2b,0x00,0x00,0xac,0xac,0xf7, -0x83,0x00,0x01,0x81,0x86,0xfb,0x01,0x56,0x83,0x00,0x01,0x2b,0x83,0xfb,0x06,0x00, -0xf7,0x00,0x00,0xfd,0xac,0x83,0x00,0x11,0x2b,0x00,0xfd,0xfd,0x2b,0x00,0xfb,0xfb, -0x00,0x2b,0x2b,0x00,0x00,0x56,0xfb,0xfb,0x2b,0x83,0x00,0x03,0xf8,0xac,0xf9,0x83, -0x00,0x03,0x2b,0xfb,0x81,0x83,0x00,0x04,0xf7,0xfb,0xfb,0x2b,0xe2,0x00,0xfb,0xfb, -0xf8,0x04,0x00,0xf7,0x00,0x2b,0x89,0xfb,0x0c,0x00,0x2b,0x00,0xfa,0xfb,0xfb,0x00, -0x2b,0x00,0xfb,0xfb,0xf7,0x83,0x00,0x0f,0x2b,0xfb,0xfb,0xf6,0x00,0xac,0xfd,0x00, -0x2b,0x2b,0x00,0x00,0xfa,0xfd,0xf9,0x83,0x00,0x03,0x2b,0xfd,0xac,0x83,0x00,0x09, -0xf8,0xfd,0xfd,0x2b,0x00,0xf8,0x00,0x00,0xf8,0x83,0xfd,0x07,0x2b,0x00,0x00,0x2b, -0xfd,0xfd,0x2b,0x85,0x00,0x01,0xf9,0x85,0xfd,0x05,0x2b,0x00,0xfd,0xfd,0x2b,0x83, -0x00,0x01,0xf8,0x85,0xfd,0x01,0xf9,0x83,0x00,0x03,0x2b,0xfd,0xac,0x84,0x00,0x0d, -0xac,0xfd,0xfd,0x00,0x2b,0xfd,0xfd,0x2b,0x00,0xfd,0xfd,0xac,0xf6,0x83,0x00,0x04, -0xfd,0xfd,0x00,0x56,0x83,0x00,0x04,0xac,0xfd,0xfd,0xfa,0x83,0x00,0x01,0x81,0x9c, -0x06,0xfd,0x29,0x56,0x00,0xac,0xfd,0x56,0x00,0xfd,0xfd,0xf9,0x00,0xfa,0xfe,0x00, -0xf6,0xfd,0xfd,0x00,0xf9,0xfd,0xfd,0x00,0x2b,0xfd,0xfd,0x00,0xf9,0xfd,0xf9,0x00, -0xfd,0xfd,0x2b,0x00,0xfd,0xfd,0x00,0x2b,0xfd,0x81,0x00,0xfb,0x84,0xfd,0x03,0xfb, -0x00,0x2b,0x86,0xfd,0x44,0x00,0x00,0xf9,0xfd,0xfd,0x2b,0x00,0xfc,0xac,0x00,0xf8, -0xfd,0xf9,0x00,0xf9,0xfd,0xf8,0x00,0xf6,0xfd,0xac,0x00,0xf8,0xfd,0xf7,0x00,0xfd, -0xfd,0x00,0x00,0xf7,0xfd,0x00,0x00,0x2b,0xac,0xf7,0x00,0xfd,0xfd,0xf9,0xfb,0xfd, -0xf7,0x00,0xfb,0xfb,0x2b,0x00,0x2b,0xfb,0xfb,0x56,0x00,0xf6,0x81,0xfb,0x81,0x00, -0xf7,0xfb,0x2b,0x00,0xfb,0xfb,0x00,0x00,0xf8,0x86,0xfb,0x11,0x00,0xf7,0xfb,0xfb, -0x2b,0x00,0xfb,0x2b,0x00,0xfa,0xfb,0x2b,0x00,0x81,0xfb,0x2b,0x00,0x83,0xfb,0x0b, -0x00,0x00,0xf9,0xfd,0xfd,0xf9,0x00,0xfa,0xfe,0x00,0xf6,0x85,0xfb,0x36,0x81,0x00, -0x2b,0xfb,0xfa,0x00,0x2b,0xfb,0xfb,0x00,0x00,0xfb,0xfd,0xfd,0x00,0xf6,0xac,0xfb, -0x00,0x00,0xfd,0xfd,0x2b,0x00,0xfb,0xfb,0x00,0x00,0x2b,0xfb,0x00,0x00,0xfb,0xfb, -0xf8,0xfb,0xfd,0xf7,0x00,0xfd,0xfd,0x2b,0x00,0xfd,0xfd,0xac,0x00,0x00,0xfb,0xf9, -0x00,0xfa,0xfb,0x2b,0xe2,0x00,0xfb,0xfb,0x56,0x03,0x00,0x2b,0x81,0x8a,0xfb,0x05, -0x00,0x56,0x00,0x2b,0xfb,0xa2,0x56,0x00,0x17,0xfb,0xfb,0xf9,0xf9,0xfb,0xf9,0x00, -0xf9,0xfb,0x2b,0x00,0xfc,0xac,0x00,0x00,0xf7,0xfd,0x00,0x00,0xfd,0xfe,0x2b,0x00, -0x83,0xfd,0x15,0x00,0x2b,0xfd,0x81,0x00,0xfb,0xfd,0x2b,0x00,0xf6,0xac,0xf7,0x00, -0xfd,0xfd,0x2b,0x00,0xfc,0xac,0x00,0xf8,0x85,0xfd,0x03,0x00,0x00,0xac,0x85,0xfd, -0x09,0x2b,0x00,0xfd,0xfd,0xf9,0xfb,0xfd,0xf7,0x00,0x86,0xfd,0x02,0x2b,0x00,0x83, -0xfd,0x12,0x00,0x2b,0xfc,0xac,0x2b,0x00,0xfd,0xfd,0x00,0x2b,0xfd,0xfd,0x2b,0x00, -0xfd,0xfd,0x00,0x00,0xa2,0xfb,0xfd,0x0e,0xfd,0x00,0x00,0xf9,0xac,0x00,0x2b,0xfd, -0xac,0x00,0xf8,0xfd,0xf7,0x00,0x9d,0x06,0xfd,0x28,0x00,0xf8,0xfd,0x2b,0xf8,0xfd, -0xfd,0x2b,0x00,0xf9,0xf9,0xf7,0x00,0xfd,0xfd,0x00,0xfa,0xfd,0xfd,0x00,0x2b,0xfd, -0xfd,0x00,0xfa,0xfd,0xf9,0x00,0xfd,0xfd,0x2b,0x00,0xfd,0xfb,0x00,0x56,0xf9,0xf9, -0x00,0xf7,0x83,0xfd,0x03,0xac,0x00,0x2b,0x87,0xfd,0x26,0x00,0x2b,0xfe,0xfd,0xfd, -0x00,0x2b,0xf9,0xf9,0x2b,0x00,0xfd,0x2b,0x2b,0xfe,0xfd,0xfd,0x00,0x2b,0xfd,0xf8, -0x00,0xf9,0xfa,0xf9,0x00,0x81,0xfd,0x00,0x2b,0xfd,0xfd,0x2b,0x00,0xfd,0xfd,0xf9, -0x00,0x85,0xfd,0x06,0xf9,0x00,0xfb,0xfb,0x2b,0x00,0x83,0xfb,0x10,0xf8,0x00,0x81, -0xfb,0xfb,0xf7,0x00,0x56,0xf8,0x56,0x00,0xf9,0xfb,0x00,0x2b,0xfc,0x86,0xfb,0x09, -0x2b,0x00,0xfb,0xfb,0x00,0xf9,0xfb,0x00,0xf7,0x83,0xfb,0x13,0x00,0x2b,0xfb,0x2b, -0x00,0xfb,0xfc,0xfd,0x00,0x2b,0xfe,0xfd,0xfd,0x2b,0x00,0xfa,0xf9,0xf7,0x00,0x85, -0xfb,0x02,0xf7,0x00,0x83,0xfb,0x1b,0xf7,0x00,0xfb,0xfb,0x00,0x56,0xfd,0xfd,0xfb, -0x00,0xfb,0xfd,0xfd,0xf9,0x00,0xfe,0xfd,0x2b,0x00,0xfb,0xfb,0x00,0x2b,0xfb,0xfb, -0x2b,0x00,0x83,0xfb,0x13,0xfd,0xfd,0xf9,0x00,0xfd,0xfd,0x2b,0x00,0xfd,0xfd,0xfb, -0x00,0xf8,0xf8,0x56,0x00,0xf6,0xfc,0xf6,0xe2,0x00,0xfb,0xfb,0xf8,0x02,0x00,0x81, -0x8b,0xfb,0x09,0x00,0xf8,0x2b,0x00,0xfb,0x2b,0x2b,0xf8,0x00,0x86,0xfb,0x09,0x00, -0xf8,0xfb,0x2b,0x00,0xfb,0xfc,0x00,0x2b,0xe2,0xfd,0xfd,0x2b,0x00,0x18,0xfd,0xfd, -0xfb,0x00,0x56,0xf9,0xf9,0x00,0xf7,0xfd,0x2b,0x00,0xfe,0xfd,0xf9,0x00,0xfd,0xfd, -0x00,0x2b,0xf9,0xf9,0x2b,0x00,0x84,0xfd,0x03,0x2b,0x00,0xac,0x86,0xfd,0x02,0x2b, -0x00,0x85,0xfd,0x02,0xf9,0x00,0x86,0xfd,0x17,0x2b,0x00,0xfd,0xfd,0xfb,0x00,0xfb, -0xfd,0xfd,0xfb,0x00,0xfa,0xfe,0x00,0x2b,0xfd,0xfd,0x2b,0x00,0xfd,0x81,0x00,0xfb, -0x85,0xfd,0x0f,0x00,0x56,0xfe,0xfd,0x00,0x2b,0xfd,0xf8,0x00,0xf9,0xf9,0xfa,0x00, -0xfa,0xfe,0x9b,0x06,0xfd,0x01,0x2b,0xa2,0x00,0xac,0x02,0xfd,0xfd,0x86,0x00,0x15, -0xfd,0xfd,0x00,0xf9,0xfd,0xfd,0x00,0x2b,0xfd,0xfd,0x00,0xf9,0xfe,0xf9,0x00,0xfd, -0xfd,0x2b,0x00,0xfd,0xf9,0x85,0x00,0x01,0xf8,0x83,0xfd,0x02,0x00,0x00,0x87,0xfd, -0x03,0xfe,0x00,0xf6,0x83,0xfd,0x85,0x00,0x04,0x2b,0xfd,0x00,0x2b,0x83,0xfd,0x04, -0x00,0x2b,0xfd,0x2b,0x85,0x00,0x0f,0xfa,0xfd,0x00,0x2b,0xfd,0xfd,0x2b,0x00,0xfd, -0xfd,0xf9,0x00,0xfd,0xfe,0xf9,0x84,0x00,0x04,0xfb,0xfb,0x2b,0x00,0x83,0xfb,0x02, -0x56,0x00,0x83,0xfb,0x01,0x2b,0x85,0x00,0x04,0x56,0xfb,0x00,0x2b,0x87,0xfb,0x09, -0xf9,0x00,0xfa,0xf7,0x00,0xfb,0xfb,0x00,0xf8,0x83,0xfb,0x0a,0x00,0x2b,0xfb,0x2b, -0x00,0xfc,0xfd,0xfe,0x00,0xf6,0x83,0xfd,0x86,0x00,0x01,0xac,0x84,0xfb,0x02,0x2b, -0x00,0x83,0xfb,0x1e,0xf8,0x00,0xfb,0xfb,0x00,0xf9,0xfd,0xfd,0xf9,0x00,0xfd,0xfd, -0xfe,0xf9,0x00,0xfd,0xfd,0x2b,0x00,0xfb,0xfb,0x00,0x2b,0xfb,0xfb,0x2b,0x00,0xfb, -0xfb,0xf9,0x84,0x00,0x07,0xfd,0xfd,0x2b,0x00,0xfd,0xfd,0xf9,0x85,0x00,0x03,0xf7, -0xfb,0x2b,0xe2,0x00,0xfb,0xfb,0x56,0x01,0x00,0x8c,0xfb,0x0c,0x00,0x56,0xfa,0x00, -0xf9,0x00,0xfa,0x56,0x00,0xfb,0xfb,0xfa,0x84,0x00,0x08,0x56,0xfb,0x2b,0x00,0xfb, -0xfb,0x00,0x2b,0xe2,0xfd,0xfd,0x2b,0x00,0x03,0xfd,0xfd,0xf9,0x85,0x00,0x0a,0xf8, -0xfd,0x2b,0x00,0xfd,0xfd,0xf9,0x00,0xfd,0xfd,0x85,0x00,0x01,0x2b,0x83,0xfd,0x03, -0x2b,0x00,0xfb,0x87,0xfd,0x05,0x2b,0x00,0xfd,0xfe,0xf9,0x84,0x00,0x86,0xfd,0x06, -0x2b,0x00,0xfd,0xfd,0xf9,0x00,0x84,0xfd,0x0c,0x00,0xf9,0xfd,0x00,0x2b,0xfd,0xfd, -0x2b,0x00,0xfd,0xf9,0x00,0x85,0xfd,0x09,0xfe,0x00,0xf9,0xfd,0xfd,0x00,0x2b,0xfd, -0x2b,0x85,0x00,0x01,0xfa,0x9c,0x06,0xfd,0x04,0x81,0x00,0xf6,0x00,0x83,0xfd,0x03, -0x2b,0x00,0xfe,0x85,0xfd,0x15,0x00,0xf9,0xfd,0xfd,0x00,0x2b,0xfd,0xfd,0x00,0xf9, -0xfd,0xf9,0x00,0xfd,0xfd,0x2b,0x00,0xfd,0xfb,0x00,0x81,0x86,0xfd,0x03,0x2b,0x00, -0xac,0x88,0xfd,0x02,0x00,0x2b,0x83,0xfd,0x03,0x00,0xf7,0xfe,0x84,0xfd,0x02,0x00, -0x2b,0x83,0xfd,0x05,0x00,0x2b,0xfd,0xf8,0x00,0x84,0xfd,0x17,0xfe,0xfd,0x00,0x2b, -0xfd,0xfd,0x2b,0x00,0xfd,0xfd,0xf9,0x00,0xfd,0xfb,0x00,0x81,0xfd,0x56,0x00,0xfb, -0xfb,0x2b,0x00,0x83,0xfb,0x02,0x56,0x00,0x83,0xfb,0x02,0xf7,0x00,0x86,0xfb,0x02, -0x00,0x2b,0x88,0xfb,0x02,0x00,0x2b,0xe2,0x00,0xf7,0xfb,0xfb,0x06,0xfb,0x00,0xf7, -0xfb,0x2b,0x00,0x83,0xfd,0x02,0x00,0x2b,0x83,0xfd,0x02,0x2b,0x00,0x85,0xfd,0x84, -0xfb,0x02,0xf7,0x00,0x83,0xfb,0x31,0xf7,0x00,0xfb,0xfb,0x00,0xf9,0xfd,0xfd,0x81, -0x00,0xac,0xfd,0xfd,0xf9,0x00,0xfd,0xfd,0x2b,0x00,0xfb,0xfb,0x00,0x2b,0xfb,0xfb, -0x2b,0x00,0xfb,0xfa,0x00,0x81,0xfd,0xf9,0x00,0xfd,0xfd,0x2b,0x00,0xfd,0xfd,0xfb, -0x00,0x81,0xfd,0xac,0xfc,0xfb,0xfb,0x2b,0xe2,0x00,0xfb,0xfb,0xf8,0x01,0x00,0x8c, -0xfb,0x03,0x00,0xf8,0xfb,0x83,0x00,0x12,0xfb,0xf8,0x00,0xfb,0xfb,0x00,0x2b,0xfb, -0xfb,0x00,0xf8,0xfc,0xf6,0x00,0xfb,0xfb,0x00,0x2b,0xe2,0xfd,0xfd,0x2b,0x00,0x05, -0xfd,0xfd,0xfb,0x00,0x81,0x85,0xfd,0x0b,0x2b,0x00,0xfd,0xfd,0xf9,0x00,0xfd,0xfd, -0x00,0xf7,0xfe,0x85,0xfd,0x03,0xf8,0x00,0x81,0x88,0xfd,0x09,0x2b,0x00,0xfd,0xfb, -0x00,0x81,0xfd,0xf9,0x00,0x86,0xfd,0x17,0x2b,0x00,0xfd,0xfd,0xfb,0x00,0xfb,0xfd, -0xfd,0xfb,0x00,0xfb,0xfd,0x00,0x2b,0xfd,0xfd,0x2b,0x00,0xfd,0xfb,0x00,0xfb,0x85, -0xfd,0x09,0x00,0xf9,0xfd,0xfd,0x00,0x2b,0xfd,0xf8,0x00,0xe2,0xfe,0xfd,0xfd,0xfd, -0x9a,0x06,0xfd,0x03,0x00,0x00,0xf8,0x83,0xfd,0x25,0x81,0x00,0x2b,0xfd,0xfd,0x56, -0xfd,0xfd,0x00,0xf7,0xac,0x2b,0x00,0x2b,0xfd,0xfd,0x00,0xf9,0xfd,0xfa,0x00,0xfd, -0xfd,0x2b,0x00,0xfd,0xfd,0x00,0x00,0xfb,0xfd,0xfb,0x81,0xfd,0x2b,0x00,0xfb,0x89, -0xfd,0x02,0x00,0x2b,0x83,0xfd,0x2c,0xf7,0x00,0xf9,0xfd,0xac,0xf9,0xfd,0xf8,0x00, -0xfb,0xac,0x2b,0x00,0x2b,0xfd,0xac,0x00,0x2b,0xfc,0xfe,0xfa,0xfb,0xfd,0x00,0x2b, -0xfd,0xfd,0x2b,0x00,0xfd,0xfd,0xfa,0x00,0xfd,0xfa,0x00,0xfa,0xfc,0x2b,0x00,0x81, -0xfb,0x2b,0x00,0x83,0xfb,0x02,0xf8,0x00,0x83,0xfb,0x0a,0x81,0x00,0x00,0x81,0xfb, -0xf9,0xfa,0xfb,0x00,0x2b,0x88,0xfb,0x03,0x2b,0x00,0x00,0x83,0xfb,0x0f,0x2b,0x00, -0xfa,0xfb,0x2b,0x00,0x81,0xac,0xf7,0x00,0xac,0xfd,0xfd,0x00,0x2b,0x83,0xfd,0x08, -0x81,0x00,0xf7,0xfd,0xfe,0x56,0xfd,0xfc,0x83,0xfb,0x0b,0x81,0x00,0x2b,0xfb,0xfa, -0x00,0x2b,0xfb,0xfb,0x00,0xf9,0x83,0xfd,0x31,0x00,0xf6,0xfd,0xf9,0x00,0x00,0xfd, -0xfd,0x2b,0x00,0xfb,0xfb,0x00,0x2b,0xfb,0xfb,0x2b,0x00,0xfb,0xf9,0x00,0xfa,0xac, -0x2b,0x00,0xac,0xfd,0xf7,0x00,0xac,0xfd,0xfd,0x00,0x00,0xfb,0xfd,0xfb,0xfa,0xfb, -0x2b,0x00,0x81,0xf8,0x00,0x00,0xfb,0xfb,0x56,0x00,0x84,0xfb,0x02,0x2b,0xf8,0x86, -0xfb,0x3c,0x00,0x56,0xfb,0xf7,0x00,0xf7,0xfb,0x56,0x00,0xfb,0xfb,0x00,0x2b,0xfb, -0xf7,0x00,0xf8,0xfb,0x2b,0x00,0xfb,0xfb,0x00,0x2b,0xfd,0xfd,0x2b,0x00,0xfd,0xfd, -0xf7,0x00,0xac,0xfd,0xfd,0x00,0x00,0xfb,0xfd,0xfb,0x81,0xfd,0x2b,0x00,0xfd,0xfd, -0xfa,0x00,0xfd,0xfd,0xf7,0x00,0xf9,0xfd,0xac,0xf9,0xfd,0xf8,0x00,0x56,0x89,0xfd, -0x0a,0x2b,0x00,0xfd,0xfa,0x00,0xfa,0xac,0x2b,0x00,0xac,0x85,0xfd,0x2a,0xf7,0x00, -0xac,0xfd,0xfd,0x00,0x2b,0xfc,0xac,0x2b,0x00,0xfd,0xfd,0x00,0x00,0xfd,0xf7,0x00, -0x00,0xfd,0xfd,0x00,0x00,0xfb,0xfd,0x56,0xfd,0xfd,0x00,0xf9,0xfd,0xfd,0x00,0x2b, -0xfd,0xac,0x00,0xf6,0xfc,0xfd,0x81,0x81,0x9d,0x06,0xfd,0x03,0x2b,0x00,0xac,0x84, -0xfd,0x01,0xf9,0x84,0x00,0x17,0xfd,0xfd,0xf8,0x00,0x00,0xf8,0x00,0x2b,0xfd,0xfd, -0x00,0xf9,0xfd,0xf9,0x00,0xfd,0xfd,0x2b,0x00,0xfd,0xfd,0xac,0xf6,0x83,0x00,0x02, -0x56,0xfd,0x86,0x00,0x86,0xfd,0x02,0x00,0x2b,0x84,0xfd,0x01,0xf7,0x83,0x00,0x0c, -0x2b,0xfd,0xfd,0x2b,0x00,0x00,0x2b,0x00,0x2b,0xfd,0xfd,0xfb,0x84,0x00,0x17,0xfb, -0xfd,0x00,0x2b,0xfd,0xfd,0x2b,0x00,0xfd,0xfd,0xf9,0x00,0xfd,0xfe,0xf6,0x00,0x00, -0xf8,0x00,0x00,0xfb,0x2b,0x00,0x83,0xfb,0x02,0x56,0x00,0x84,0xfb,0x01,0xfa,0x84, -0x00,0x04,0xfa,0xfb,0x00,0x2b,0x88,0xfb,0x03,0x81,0x00,0x2b,0x84,0xfb,0x01,0x2b, -0x83,0x00,0x0a,0xf9,0xac,0xfd,0xfc,0x00,0x00,0x2b,0xfd,0x00,0x2b,0x84,0xfd,0x01, -0xf9,0x84,0x00,0x02,0xfd,0xfc,0x84,0xfb,0x01,0x56,0x83,0x00,0x01,0x2b,0x83,0xfb, -0x06,0x00,0xf9,0xfe,0xfd,0xfd,0x81,0x83,0x00,0x1e,0xf7,0x00,0xfd,0xfd,0x2b,0x00, -0xfb,0xfb,0x00,0x2b,0xfb,0xfb,0x2b,0x00,0xfb,0xfc,0x2b,0x00,0x00,0x56,0x00,0x00, -0xfd,0xac,0x00,0x00,0x2b,0xfd,0xac,0xf6,0x83,0x00,0x03,0x56,0xac,0x81,0x83,0x00, -0xe2,0xf8,0x00,0xfb,0xfb,0x04,0xfb,0xfb,0x00,0x2b,0x86,0xfb,0x25,0x00,0xf8,0xfb, -0x81,0xf8,0x81,0xfb,0xf8,0x00,0xfb,0xfb,0xf7,0x00,0x00,0x2b,0x2b,0x00,0x56,0x2b, -0x00,0xfb,0xfb,0x00,0x2b,0xfd,0xfd,0x2b,0x00,0xfd,0xfd,0xac,0x00,0x00,0x2b,0xfd, -0xac,0xf6,0x83,0x00,0x08,0x56,0xfd,0x2b,0x00,0xfd,0xfd,0xf9,0x00,0x83,0xfd,0x01, -0xf7,0x83,0x00,0x02,0x2b,0xfd,0x86,0x00,0x01,0xf9,0x85,0xfd,0x0a,0x2b,0x00,0xfd, -0xfd,0x2b,0x00,0x00,0x56,0x00,0x00,0x85,0xfd,0x06,0xac,0x00,0x00,0x2b,0xfd,0xac, -0x84,0x00,0x0d,0xac,0xfd,0xfd,0xfa,0x00,0x00,0x2b,0x2b,0x00,0xfd,0xfd,0xac,0xf6, -0x83,0x00,0x0b,0xfd,0xfd,0x00,0xfa,0xfd,0xfd,0x00,0x2b,0xfd,0xfd,0xfb,0x84,0x00, -0x01,0xfb,0x95,0x07,0xfd,0xc2,0xfe,0xfd,0xfd,0x89,0xfd,0x01,0xfe,0x8b,0xfd,0x01, -0xfe,0x98,0x03,0xfd,0x02,0xfe,0xac,0x9c,0x02,0xfb,0x04,0xac,0xfd,0xfd,0xfe,0x90, -0x01,0xfd,0x01,0xfc,0x8d,0xfb,0x8c,0xfd,0x8b,0xfb,0x01,0xac,0x8e,0xfd,0x01,0xfe, -0x84,0xfd,0x01,0xac,0x87,0xfb,0x01,0xfc,0x94,0x01,0xfb,0x01,0xfc,0x8a,0xfb,0x01, -0xfc,0x85,0xfb,0x01,0xac,0x8c,0xfd,0x01,0xfe,0x9c,0x03,0xfd,0x01,0xfe,0x88,0xfd, -0x01,0xfe,0x8d,0xfd,0x01,0xfe,0x9e,0x0c,0xfd,0x01,0xfc,0x9b,0x02,0xfb,0x01,0xac, -0x94,0x01,0xfd,0x01,0xfc,0x8d,0xfb,0x8c,0xfd,0x8c,0xfb,0x01,0xfe,0x93,0x01,0xfd, -0x01,0xac,0x9c,0x02,0xfb,0x01,0xfc,0x9e,0x12,0xfd,0x01,0xac,0x9b,0x02,0xfb,0x01, -0xac,0x95,0x01,0xfd,0x8e,0xfb,0x8c,0xfd,0x8c,0xfb,0x95,0x01,0xfd,0x01,0xac,0x9c, -0x02,0xfb,0x01,0xac,0x9d,0x12,0xfd,0x01,0xfc,0x9a,0x02,0xfb,0x01,0xac,0x95,0x01, -0xfd,0x01,0xac,0x8e,0xfb,0x8c,0xfd,0x8c,0xfb,0x01,0xac,0x95,0x01,0xfd,0x01,0xac, -0x9b,0x02,0xfb,0x01,0xfc,0x9c,0x12,0xfd,0x01,0xac,0x9a,0x02,0xfb,0x01,0xac,0x96, -0x01,0xfd,0x8f,0xfb,0x8c,0xfd,0x8d,0xfb,0x96,0x01,0xfd,0x01,0xac,0x9b,0x02,0xfb, -0x01,0xac,0x92,0x0d,0xfd,0x02,0xac,0xf9,0x92,0x01,0xfd,0x02,0xfa,0xfb,0x85,0xfd, -0x02,0xf9,0xac,0x9c,0x03,0xfd,0x01,0xfc,0x8e,0xfb,0x03,0x81,0xf8,0xfa,0x98,0x01, -0xfb,0x04,0xac,0xfd,0xfb,0xfa,0x93,0x01,0xfd,0x90,0x01,0xfb,0x87,0xfd,0x02,0xfa, -0xfb,0x83,0xfd,0x8e,0xfb,0x91,0x01,0xfd,0x02,0xfa,0xfb,0x83,0xfd,0x01,0xac,0x9a, -0x02,0xfb,0x01,0xfc,0x9e,0x02,0xfd,0x02,0xf9,0xac,0x97,0x02,0xfd,0x02,0xfc,0xf9, -0x99,0x07,0xfd,0x02,0xf9,0x00,0x92,0x01,0xfd,0x02,0x00,0x2b,0x84,0xfd,0x02,0x2b, -0x2b,0x9d,0x03,0xfd,0x8e,0xfb,0x01,0xf8,0x83,0x00,0x97,0x01,0xfb,0x05,0xac,0xfd, -0xfb,0x00,0x81,0x91,0x01,0xfd,0x01,0xac,0x91,0x01,0xfb,0x87,0xfd,0x02,0x00,0x2b, -0x83,0xfd,0x8f,0xfb,0x01,0xac,0x8f,0xfd,0x02,0x00,0x2b,0x84,0xfd,0x01,0xac,0x8b, -0xfb,0x02,0xf9,0x00,0x9c,0x01,0xfb,0x01,0xfc,0x9e,0x02,0xfd,0x02,0x00,0xf9,0x97, -0x02,0xfd,0x02,0xf9,0x00,0x87,0xfd,0x02,0x00,0xfa,0x90,0x07,0xfd,0x02,0xf9,0x00, -0x92,0x01,0xfd,0x02,0x00,0x2b,0x83,0xfd,0x03,0x81,0xf6,0xfe,0x9c,0x03,0xfd,0x01, -0xac,0x8e,0xfb,0x02,0x00,0x2b,0x98,0x01,0xfb,0x05,0xac,0xfd,0xfd,0xf6,0x81,0x91, -0x01,0xfd,0x01,0xac,0x92,0x01,0xfb,0x87,0xfd,0x02,0x00,0x2b,0x83,0xfd,0x8e,0xfb, -0x03,0x56,0xfa,0xac,0x8e,0xfd,0x02,0x00,0x2b,0x85,0xfd,0x01,0xac,0x8a,0xfb,0x02, -0xf9,0x2b,0x9d,0x01,0xfb,0x01,0xac,0x9d,0x02,0xfd,0x02,0x00,0xf9,0x97,0x02,0xfd, -0x02,0xfa,0x00,0x87,0xfd,0x02,0x2b,0xfb,0x9f,0x06,0xfd,0x03,0xfe,0xf9,0x00,0x92, -0x01,0xfd,0x02,0x00,0x2b,0x92,0x04,0xfd,0x01,0xfc,0x8e,0xfb,0x02,0x00,0x2b,0x97, -0x01,0xfb,0x01,0xac,0x83,0xfd,0x01,0xfe,0x90,0x01,0xfd,0x02,0xac,0xfc,0x93,0x01, -0xfb,0x87,0xfd,0x02,0x00,0x2b,0x83,0xfd,0x8e,0xfb,0x05,0x00,0xf6,0xfb,0xfc,0xac, -0x8c,0xfd,0x02,0x00,0x2b,0x86,0xfd,0x01,0xac,0x99,0x02,0xfb,0x9d,0x02,0xfd,0x02, -0x00,0xfa,0x97,0x02,0xfd,0x02,0xf9,0x00,0x95,0x07,0xfd,0x01,0xac,0x83,0x00,0x02, -0x2b,0x00,0x83,0xfd,0x04,0x2b,0x00,0x00,0x2b,0x87,0xfd,0x09,0xf8,0x00,0x00,0x2b, -0x00,0x2b,0xfd,0xfd,0x81,0x83,0x00,0x10,0xfa,0xfd,0xfd,0x00,0x2b,0x2b,0x00,0x00, -0x81,0xf8,0x00,0x00,0xf8,0xfd,0xfd,0x2b,0x83,0x00,0x03,0xf8,0xfd,0xfd,0xa2,0x2b, -0x00,0x08,0xf9,0xf9,0x00,0xf8,0x00,0x2b,0xfd,0x2b,0x83,0x00,0x04,0xf8,0xfd,0xfd, -0xac,0x85,0x00,0x83,0xfd,0x04,0x2b,0x00,0x00,0x2b,0x86,0xfd,0x02,0xac,0x2b,0x83, -0x00,0x05,0x81,0xfb,0xfb,0x00,0xf8,0x83,0x00,0x02,0x81,0xfb,0x84,0x00,0x03,0x56, -0xfb,0x56,0x83,0x00,0x01,0x2b,0x83,0xfb,0x02,0x00,0xf7,0x83,0x00,0x04,0x81,0xfb, -0xfb,0x56,0x83,0x00,0x07,0xf6,0xfe,0xfd,0xf6,0x00,0x00,0x2b,0x83,0xfd,0x01,0xf8, -0x83,0x00,0x01,0xac,0x85,0xfd,0x03,0xac,0x00,0x2b,0x83,0x00,0x01,0xf7,0x83,0xfb, -0x0c,0x2b,0x00,0x00,0x2b,0xfb,0xfb,0xf8,0x00,0xf9,0x00,0x00,0x2b,0x83,0xfd,0x08, -0xf8,0x00,0x00,0x2b,0x00,0x2b,0xfd,0xfd,0x84,0x00,0x05,0xf9,0xfb,0xfb,0x00,0xf7, -0x83,0x00,0x03,0x81,0xfb,0x2b,0x83,0x00,0x03,0x56,0xfb,0xac,0x83,0xfd,0x02,0x00, -0xf7,0x83,0x00,0x10,0x81,0xfd,0xfd,0x00,0x2b,0xfd,0xfd,0x00,0xf9,0xfd,0xfd,0x00, -0x2b,0xfb,0xfb,0x2b,0x83,0x00,0x04,0xfb,0xfb,0x56,0x00,0x83,0xfb,0x08,0x2b,0x00, -0x00,0x2b,0xfb,0xfb,0xf8,0x00,0x83,0xfb,0x0a,0x00,0xf8,0xfb,0x56,0x00,0xf7,0x00, -0x2b,0xfb,0xf7,0x83,0x00,0x0b,0x56,0xfb,0xfb,0xac,0xfd,0xfd,0xac,0xf6,0x00,0x00, -0x2b,0x83,0xfd,0x04,0x2b,0x00,0x00,0x2b,0x83,0xfd,0x01,0x56,0x83,0x00,0x04,0xf8, -0xfd,0xfd,0x2b,0x83,0x00,0x09,0x81,0xfd,0xfd,0x2b,0x00,0xf8,0x00,0x00,0xf8,0x83, -0xfd,0x09,0x2b,0x00,0x00,0x2b,0x00,0xf9,0xfd,0xfd,0xf8,0x83,0x00,0x04,0xac,0xfd, -0xfd,0x2b,0x83,0x00,0x87,0xfd,0x01,0x2b,0x83,0x00,0x09,0xfa,0xfd,0xfd,0x2b,0x00, -0xfd,0xfd,0xf9,0x00,0x86,0xfd,0x03,0xf9,0x00,0x56,0x83,0x00,0x08,0xac,0xfd,0xfd, -0x00,0xf9,0xfe,0xfd,0xf8,0x83,0x00,0x05,0xac,0xfd,0xfd,0x00,0x56,0x83,0x00,0x01, -0xac,0x9d,0x05,0xfd,0x0e,0x00,0xf6,0xac,0xfb,0x00,0x00,0xfd,0xfd,0x2b,0x00,0xfc, -0xac,0x00,0xf8,0x85,0xfd,0x43,0xf9,0x00,0xf9,0xfd,0xf8,0x00,0x2b,0xfd,0xac,0x00, -0xf8,0xfd,0xf7,0x00,0xfd,0xfd,0x00,0x00,0xf7,0xfd,0x00,0x00,0xf6,0xac,0xf7,0x00, -0xfd,0xfd,0xf9,0xfb,0xfd,0xf7,0x00,0xfd,0xfd,0x2b,0x00,0xf7,0xfd,0xfd,0xfa,0x00, -0xf6,0xac,0xfd,0xfe,0xf9,0xfb,0xfd,0xf7,0x00,0xfd,0xfd,0x00,0xf6,0xac,0xfb,0x00, -0x00,0xfd,0xfd,0x2b,0x00,0xfc,0xac,0x00,0xf8,0x85,0xfd,0x0c,0x56,0x00,0xf9,0xfb, -0x00,0x00,0xfb,0xfb,0x00,0x00,0xf8,0x81,0xe2,0x00,0x2b,0xfb,0xfb,0x24,0x81,0x00, -0x2b,0xfb,0xfa,0x00,0x2b,0xfb,0xfb,0x00,0x00,0x56,0x81,0x00,0xf6,0xfb,0x81,0x00, -0x00,0x81,0xac,0xfb,0xfd,0x2b,0x00,0xfc,0xac,0x00,0xf8,0xfd,0xf9,0x00,0xfa,0xfe, -0x00,0xf6,0x84,0xfd,0x2d,0xac,0xfb,0x00,0x00,0xf7,0xfb,0xf8,0x00,0xfa,0xfb,0x2b, -0x00,0x81,0x81,0x00,0xf7,0xfb,0x56,0x00,0x00,0xf9,0xf9,0x00,0xfa,0xfd,0xf9,0x00, -0xf9,0xfd,0xf8,0x00,0x2b,0xfd,0xfd,0x56,0x81,0xfb,0x00,0x00,0xfb,0xfb,0x00,0x00, -0x56,0x81,0xe2,0x00,0x2b,0xfb,0xfb,0x1a,0xfb,0xfb,0xac,0xfd,0xfd,0x00,0x00,0xf9, -0xfd,0x2b,0x00,0xfd,0xfd,0x00,0x2b,0xfd,0xfd,0x00,0xf9,0xfd,0xfd,0x00,0x2b,0xfb, -0xf9,0x00,0xa2,0xfa,0xfb,0x0e,0xfb,0xf8,0x00,0xfb,0xfb,0x2b,0x00,0x81,0x81,0x00, -0xf7,0xfb,0x56,0x00,0x83,0xfb,0x49,0x00,0xf8,0xfc,0xf8,0x00,0x2b,0x81,0xfb,0xfb, -0x00,0xf7,0xfb,0x81,0x81,0xfb,0xfb,0xfc,0xfd,0xfd,0xf7,0x00,0xac,0xfd,0xfb,0xfd, -0xfd,0x2b,0x00,0xfc,0xac,0x00,0xf8,0xfd,0xf9,0x00,0xf7,0xfd,0xfd,0xfb,0xfd,0x2b, -0x00,0xfb,0xfd,0xf7,0x00,0xac,0xfd,0x2b,0x00,0xf6,0xac,0xf7,0x00,0xfd,0xfd,0x2b, -0x00,0xfb,0xac,0x2b,0x00,0xf9,0xfe,0xf9,0x00,0xfa,0xfe,0x00,0xf6,0xfd,0xfa,0x00, -0xa2,0xfb,0xfd,0x85,0xfd,0x0e,0x2b,0x00,0xfb,0xfd,0xf7,0x00,0xac,0xfd,0x2b,0x00, -0xfd,0xfd,0xf9,0x00,0x85,0xfd,0x1b,0xfe,0xf9,0x00,0x2b,0xfb,0xac,0x00,0xf6,0xfd, -0xfd,0x00,0xf9,0xfd,0xf9,0x00,0xfa,0xfe,0x00,0xf6,0xfd,0xfd,0x00,0x00,0xf9,0xac, -0x00,0x2b,0x9c,0x05,0xfd,0x0f,0xfb,0x00,0xfb,0xfd,0xfd,0xf9,0x00,0xfd,0xfd,0x00, -0x2b,0xf9,0xf9,0x2b,0x00,0x85,0xfd,0x25,0x2b,0x2b,0xfe,0xfd,0xfd,0x00,0x2b,0xfd, -0xf7,0x00,0xfa,0xf9,0xf9,0x00,0x81,0xfd,0x00,0x2b,0xfd,0xfd,0x2b,0x00,0xfe,0xfd, -0xf9,0x00,0xfd,0xfd,0xfe,0xfd,0xfd,0xf9,0x00,0xfd,0xfd,0x2b,0x00,0x83,0xfd,0x03, -0xf9,0x00,0xac,0x86,0xfd,0x12,0xf9,0x00,0xfe,0xfa,0x00,0xfc,0xfd,0xfd,0x2b,0x00, -0xfd,0xfd,0x00,0x2b,0xf9,0xf9,0x2b,0x00,0x85,0xfd,0x0a,0x00,0x00,0xf8,0x56,0x2b, -0x00,0xfb,0xfb,0x00,0xf8,0xe2,0xfb,0xfb,0x00,0x2b,0x04,0xfb,0xfb,0x2b,0x00,0x83, -0xfb,0x0f,0xf7,0x00,0xfb,0xfb,0x00,0xf8,0xfb,0xfb,0x00,0x2b,0xfb,0x2b,0x00,0xfb, -0xac,0x83,0xfd,0x0d,0x00,0x2b,0xf9,0xf9,0x2b,0x00,0xfd,0x2b,0x00,0xf9,0xf9,0xf7, -0x00,0x83,0xfd,0x05,0xac,0xfb,0xfb,0x00,0x2b,0x83,0xfb,0x19,0x00,0xf6,0xfc,0x00, -0x2b,0xf8,0x56,0x00,0x00,0xfb,0xf8,0x00,0x81,0xfb,0xfb,0x00,0xf9,0xfe,0x2b,0x2b, -0xfe,0xfd,0xfd,0x00,0x2b,0x83,0xfd,0x08,0xfb,0xfb,0x2b,0x00,0xfb,0xfb,0x00,0xf8, -0xe2,0xfb,0xfb,0x00,0x2b,0x85,0xfb,0x19,0xac,0xfd,0x00,0xf9,0xfe,0xfd,0xfc,0x00, -0x81,0xfd,0x00,0x2b,0xfd,0xfd,0x00,0xf9,0xfe,0xfd,0x00,0x2b,0xfc,0xfa,0x00,0xf6, -0xfa,0x83,0xfb,0x0d,0x56,0x00,0xfb,0xfb,0x00,0x2b,0xf8,0x56,0x00,0x00,0xfb,0xf8, -0x00,0x83,0xfb,0x06,0x00,0x56,0xfb,0xf8,0x00,0x81,0x83,0xfb,0x03,0x00,0x00,0xf9, -0x84,0xfb,0x07,0xfc,0xfd,0xfd,0xf8,0x00,0xf8,0xac,0x83,0xfd,0x0a,0x00,0x2b,0xf9, -0xf9,0x2b,0x00,0xfd,0x00,0x2b,0xfe,0x84,0xfd,0x02,0x00,0xf8,0x83,0xfd,0x0d,0x00, -0xf7,0xfd,0x2b,0x00,0xfe,0xfd,0xf9,0x00,0xfd,0xfd,0x00,0xf8,0x83,0xfd,0x0e,0x00, -0xf9,0xfd,0x2b,0x00,0xf9,0xf9,0xf7,0x00,0xfe,0xfb,0x00,0x2b,0xfb,0x87,0xfd,0x02, -0x00,0xf8,0x83,0xfd,0x09,0x00,0xf7,0xfd,0x2b,0x00,0xfd,0xfd,0xfa,0x00,0x86,0xfd, -0x02,0xf9,0x00,0x83,0xfd,0x15,0x56,0x00,0xfd,0xfd,0x00,0xf9,0xfd,0x2b,0x00,0xf9, -0xf9,0xf7,0x00,0xfd,0xfd,0x00,0x56,0xfd,0xfd,0x00,0x2b,0x9c,0x05,0xfd,0x02,0xf9, -0x00,0x83,0xfd,0x04,0xfa,0x00,0xfd,0xfd,0x85,0x00,0x01,0x2b,0x85,0xfd,0x02,0x00, -0x2b,0x83,0xfd,0x04,0x00,0x2b,0xfd,0x2b,0x85,0x00,0x07,0xfa,0xfd,0x00,0x2b,0xfd, -0xfd,0x2b,0xe2,0x00,0xfd,0xfd,0xf9,0x84,0x00,0x09,0xfd,0xfd,0x2b,0x00,0xfd,0xfd, -0xfe,0xf9,0x00,0x84,0xfd,0x01,0xf9,0x84,0x00,0x03,0xfd,0xf9,0x00,0x83,0xfd,0x04, -0x2b,0x00,0xfd,0xfd,0x85,0x00,0x01,0x2b,0x84,0xfd,0x01,0xac,0x86,0x00,0x04,0xfb, -0xfb,0x00,0xf8,0xe2,0xfb,0xfb,0x00,0x2b,0x04,0xfb,0xfb,0x2b,0x00,0x83,0xfb,0x0e, -0xf8,0x00,0xfb,0xfb,0x00,0xf8,0xfb,0xfb,0x00,0x2b,0xfb,0x2b,0x00,0xfc,0x84,0xfd, -0x85,0x00,0x02,0x2b,0xfd,0x86,0x00,0x03,0xfd,0xfd,0xfc,0x83,0xfb,0x02,0x00,0x2b, -0x83,0xfb,0x03,0x00,0x2b,0xfb,0x85,0x00,0x04,0x2b,0xfb,0x56,0x00,0x83,0xfb,0x05, -0x00,0xf9,0xfd,0x00,0xf6,0x83,0xfd,0x05,0x00,0x2b,0xfd,0xfd,0xf7,0x84,0x00,0x04, -0xfb,0xfb,0x00,0xf8,0xe2,0xfb,0xfb,0x00,0x2b,0x86,0xfb,0x1f,0xfc,0x00,0xf9,0xfd, -0xfd,0xfe,0x00,0xf9,0xfd,0x00,0x2b,0xfd,0xfd,0x00,0xf9,0xfd,0xfd,0x00,0x2b,0xfd, -0xfc,0x56,0x00,0x00,0xf6,0xfb,0xfc,0xf8,0x00,0xfb,0xfb,0x85,0x00,0x04,0x2b,0xfb, -0x56,0x00,0x83,0xfb,0x05,0x00,0xf8,0xfb,0x56,0x00,0x84,0xfb,0x01,0xfa,0x83,0x00, -0x01,0x81,0x83,0xfb,0x09,0xac,0xfd,0xfd,0xf8,0x00,0x00,0xf8,0xfd,0xfd,0x85,0x00, -0x04,0x2b,0xfd,0x00,0x2b,0x85,0xfd,0x02,0x00,0xf9,0x83,0xfd,0x0d,0x00,0x2b,0xfd, -0x2b,0x00,0xfd,0xfd,0xf9,0x00,0xfd,0xfd,0x00,0xf9,0x83,0xfd,0x03,0x00,0xf9,0xfd, -0x86,0x00,0x06,0xfd,0xfd,0xf9,0x00,0x00,0x2b,0x86,0xfd,0x02,0x00,0xf9,0x83,0xfd, -0x09,0x00,0x2b,0xfd,0x2b,0x00,0xfd,0xfd,0xf9,0x00,0x86,0xfd,0x02,0xf9,0x00,0x83, -0xfd,0x07,0xf9,0x00,0xfd,0xfd,0x00,0xfa,0xfd,0x86,0x00,0x08,0xfd,0xfd,0x00,0xf9, -0xfd,0xfd,0x00,0x2b,0x9c,0x05,0xfd,0x0c,0x81,0x00,0xac,0xfd,0xfd,0xf9,0x00,0xfd, -0xfd,0x00,0xf7,0xfe,0x88,0xfd,0x02,0x00,0x2b,0x83,0xfd,0x07,0x00,0x2b,0xfd,0xf8, -0x00,0xfd,0xfe,0x84,0xfd,0x15,0x00,0x2b,0xfd,0xfd,0x2b,0x00,0xfd,0xfd,0xf9,0x00, -0xfd,0xfb,0x00,0x81,0xfd,0xf9,0x00,0xfd,0xfd,0x2b,0x00,0x83,0xfd,0x02,0xf9,0x00, -0x83,0xfd,0x13,0xfb,0x00,0x81,0xfd,0xf9,0x00,0xfd,0x81,0x00,0xfb,0xfd,0xfd,0x2b, -0x00,0xfd,0xfd,0x00,0xf7,0xfe,0x87,0xfd,0x03,0xfc,0x00,0x00,0x86,0xfb,0x02,0x00, -0x56,0xe2,0xfb,0xfb,0x00,0x2b,0x04,0xfb,0xfb,0xf7,0x00,0x83,0xfb,0x0d,0xf7,0x00, -0xfb,0xfb,0x00,0x56,0xfb,0xfb,0x00,0x2b,0xfb,0xf7,0x00,0x85,0xfd,0x03,0x00,0xf7, -0xfe,0x84,0xfd,0x03,0x2b,0x00,0xfe,0x84,0xfd,0x01,0xac,0x84,0xfb,0x02,0x00,0x2b, -0x83,0xfb,0x05,0x00,0xf7,0xfb,0x00,0x2b,0x85,0xfb,0x02,0x56,0x00,0x83,0xfb,0x05, -0x00,0xf9,0xfd,0x00,0x2b,0x83,0xfd,0x0d,0x00,0x2b,0xfd,0xf8,0x00,0x81,0xfb,0x2b, -0x00,0xfb,0xfb,0x00,0x56,0xe2,0xfb,0xfb,0x00,0x2b,0x87,0xfb,0x20,0x00,0xf9,0xfd, -0xfd,0x81,0x00,0xfb,0xfd,0x00,0x2b,0xfd,0xfd,0x00,0xf9,0xfd,0xfd,0x00,0x2b,0xfd, -0xfd,0xfb,0xfb,0x56,0x00,0x2b,0xfb,0x56,0x00,0xfb,0xfb,0x00,0x2b,0x85,0xfb,0x0a, -0xf8,0x00,0xfb,0xfb,0x81,0x00,0x56,0xfb,0xf8,0x00,0x86,0xfb,0x03,0xfa,0x00,0x00, -0x83,0xfb,0x01,0xfc,0x84,0xfd,0x07,0xf7,0x00,0x81,0xfd,0x00,0xf7,0xfe,0x84,0xfd, -0x02,0x2b,0x2b,0x85,0xfd,0x12,0x00,0xf8,0xfe,0xfd,0xfd,0x00,0xf8,0xfd,0x2b,0x00, -0xfd,0xfd,0xf9,0x00,0xfd,0xfd,0x00,0x56,0x83,0xfd,0x06,0x00,0xfa,0xfd,0x2b,0x00, -0xfe,0x87,0xfd,0x03,0xf9,0x00,0xf7,0x85,0xfd,0x0e,0x00,0xf8,0xfe,0xfd,0xfd,0x00, -0xf8,0xfd,0x2b,0x00,0xfd,0xfd,0x56,0x00,0x86,0xfd,0x02,0xf9,0x00,0x83,0xfd,0x0a, -0xf8,0x00,0xfd,0xfd,0x00,0xf9,0xfd,0x2b,0x00,0xfe,0x85,0xfd,0x06,0x00,0xfa,0xfd, -0xfd,0x00,0x2b,0x9d,0x05,0xfd,0x0e,0x00,0xf6,0xfd,0xf9,0x00,0x00,0xfd,0xfd,0xf7, -0x00,0xf9,0xfd,0xac,0xf9,0x85,0xfd,0x25,0xf8,0x00,0xfb,0xfc,0x2b,0x00,0x2b,0xfd, -0xac,0x00,0x2b,0xfc,0xfd,0x81,0xfb,0xfd,0x00,0x2b,0xfd,0xfd,0x2b,0x00,0xfd,0xfd, -0xf9,0x00,0xfd,0x81,0x00,0xfa,0xac,0x2b,0x00,0xac,0xfd,0x2b,0x00,0x83,0xfd,0x02, -0xf9,0x00,0x83,0xfd,0x16,0xfa,0x00,0xfa,0xac,0x2b,0x00,0xac,0xfd,0x00,0x00,0xf9, -0x2b,0x00,0x00,0xfd,0xfd,0xf7,0x00,0xf9,0xfd,0xac,0xf9,0x84,0xfd,0x06,0xfc,0xf9, -0x00,0x2b,0xfb,0xfb,0xe2,0xf8,0xfb,0xfb,0x00,0x2c,0x2b,0xfb,0xfb,0x00,0x2b,0xfb, -0xfb,0x81,0x00,0x2b,0xfb,0xfa,0x00,0x2b,0xfb,0xfb,0x00,0xf8,0xfb,0xfb,0x00,0x2b, -0xfb,0x81,0x00,0x2b,0xfc,0xac,0xf9,0xfd,0xf7,0x00,0xf9,0xfd,0xac,0xf9,0xfd,0x81, -0x00,0x2b,0xfd,0xfd,0x56,0xac,0x85,0xfb,0x11,0x00,0x00,0xf8,0xfb,0x2b,0x00,0x81, -0xfb,0x2b,0x00,0xf8,0xfb,0x81,0x56,0xfb,0xf8,0x00,0x83,0xfb,0x1d,0x00,0xf9,0xfd, -0xf8,0x00,0xfb,0xac,0x2b,0x00,0x2b,0xfd,0xf7,0x00,0x81,0xfa,0x00,0x00,0xfb,0xfb, -0x00,0xf8,0xfb,0xfb,0x00,0x2b,0xfb,0xfb,0x00,0x00,0x87,0xfb,0x2f,0x00,0x00,0x81, -0xac,0x2b,0x00,0xfd,0xfd,0x00,0x2b,0xfd,0xfd,0x00,0xf7,0xac,0x2b,0x00,0x2b,0xfd, -0xfb,0x56,0xfb,0xfa,0x00,0xf7,0xfb,0xf8,0x00,0xfb,0xfb,0x2b,0x00,0x56,0xfb,0x81, -0xf8,0xfb,0xf9,0x00,0xf9,0xfa,0x00,0x00,0xf8,0xfb,0x56,0x00,0x84,0xfb,0x05,0xf7, -0x81,0x81,0x00,0x00,0x83,0xfb,0x3c,0xfc,0xfd,0x81,0xfa,0xfe,0xf9,0x00,0xfb,0xfd, -0xf7,0x00,0xf9,0xfd,0xac,0xf9,0xfe,0xf9,0x00,0xf7,0xfd,0xfb,0x81,0xfd,0x2b,0x00, -0xfb,0xfd,0xf7,0x00,0xac,0xfd,0x2b,0x00,0xfd,0xfd,0xfa,0x00,0xfd,0xfd,0x2b,0x00, -0xac,0xfb,0x2b,0x00,0xf9,0xfd,0x81,0x00,0x2b,0xfd,0xfd,0x56,0xfd,0xfb,0xf9,0xfd, -0xfb,0x00,0xf8,0x85,0xfd,0x03,0x2b,0x00,0xfb,0xe2,0xfd,0xf7,0x00,0xac,0x03,0xf9, -0x00,0x00,0x86,0xfd,0x1a,0xfa,0x00,0x2b,0xac,0xfb,0x00,0x2b,0xfd,0xfd,0x00,0xf9, -0xfd,0x81,0x00,0x2b,0xfd,0xfd,0x56,0xfd,0xfd,0x00,0xf9,0xfd,0xfd,0x00,0x2b,0x9d, -0x05,0xfd,0x01,0x81,0x83,0x00,0x02,0xf7,0x00,0x83,0xfd,0x01,0xf7,0x83,0x00,0x01, -0x2b,0x86,0xfd,0x09,0x2b,0x00,0x00,0x2b,0x00,0x2b,0xfd,0xfd,0xfb,0x84,0x00,0x17, -0xfb,0xfd,0x00,0x2b,0xfd,0xfd,0x2b,0x00,0xfd,0xfd,0xfa,0x00,0xfd,0xfd,0x2b,0x00, -0x00,0x56,0x00,0x00,0xfd,0x2b,0x00,0x83,0xfd,0x02,0xfa,0x00,0x83,0xfd,0x0e,0xfe, -0xf6,0x00,0x00,0x56,0x00,0x00,0xfd,0xac,0xf6,0x00,0x2b,0x2b,0x00,0x83,0xfd,0x01, -0xf7,0x83,0x00,0x01,0x2b,0x83,0xfd,0x04,0xac,0xfb,0xfb,0xf8,0x84,0x00,0x04,0xfb, -0xfb,0x00,0x56,0xe2,0xfb,0xfb,0x00,0x2b,0x83,0xfb,0x01,0xf9,0x83,0x00,0x01,0x2b, -0x83,0xfb,0x09,0x00,0x56,0xfb,0xfb,0x00,0x2b,0xfb,0xfb,0x81,0x83,0x00,0x04,0x2b, -0xfd,0xfd,0xf7,0x83,0x00,0x04,0x2b,0xfd,0xfd,0xf9,0x84,0x00,0x86,0xfb,0x85,0x00, -0x01,0xf9,0x83,0xfb,0x01,0x2b,0x83,0x00,0x04,0x2b,0xfb,0x56,0x00,0x83,0xfb,0x0c, -0x00,0xf9,0xfe,0xfd,0x2b,0x00,0x00,0x2b,0x00,0x2b,0xfd,0xac,0x83,0x00,0x10,0xf8, -0x00,0x00,0xfb,0x00,0x56,0xfb,0xfb,0x00,0x2b,0xfb,0xfb,0xf9,0x00,0x00,0xf8,0x85, -0xfb,0x02,0x00,0x2b,0x83,0x00,0x0f,0xac,0xfd,0xfd,0x00,0x2b,0xfd,0xfd,0xf8,0x00, -0x00,0xf8,0x00,0x2b,0xfd,0xfb,0x83,0x00,0x05,0x2b,0xfb,0xfb,0x56,0x00,0x83,0xfb, -0x01,0x2b,0x83,0x00,0x0c,0x2b,0xfb,0xfb,0x2b,0x00,0x00,0xf9,0x00,0x56,0xfb,0xf8, -0x00,0x84,0xfb,0x84,0x00,0x01,0x81,0x84,0xfb,0x02,0xac,0x56,0x83,0x00,0x01,0xf8, -0x83,0xfd,0x01,0xf7,0x83,0x00,0x04,0x2b,0xfd,0xfd,0x56,0x83,0x00,0x04,0x56,0xfd, -0xfd,0x2b,0x83,0x00,0x0c,0x81,0xfd,0xfd,0x2b,0x00,0xfd,0xfd,0xf9,0x00,0xfd,0xfd, -0xac,0x83,0x00,0x06,0x56,0x00,0xf9,0xfd,0xfd,0xf9,0x84,0x00,0x02,0xfd,0xfb,0x83, -0x00,0x02,0x2b,0xfe,0x86,0xfd,0x01,0x2b,0x83,0x00,0x04,0x81,0xfd,0xfd,0xac,0x83, -0x00,0x02,0x56,0x00,0x86,0xfd,0x0e,0xf9,0x00,0x2b,0x00,0x00,0x2b,0xfe,0xfd,0xfd, -0x00,0xf9,0xfe,0xfd,0xf9,0x84,0x00,0x08,0xfe,0xfd,0x00,0xf9,0xfd,0xfd,0x00,0x2b, -0x9f,0x09,0xfd,0x01,0xfe,0x87,0xfd,0x04,0xfe,0xfd,0x00,0x2b,0x8b,0xfd,0xc2,0xfc, -0xfb,0xfb,0x90,0x02,0xfb,0x01,0xac,0x8f,0xfd,0x03,0xfe,0xfd,0xac,0x87,0xfb,0x02, -0x00,0x2b,0x93,0x01,0xfb,0x8c,0xfd,0x97,0x01,0xfb,0x04,0x00,0x56,0xfb,0xac,0x91, -0x01,0xfd,0x01,0xac,0x97,0x01,0xfb,0x01,0xfc,0x8e,0xfb,0x01,0xfc,0x9f,0x02,0xfd, -0x01,0xfe,0x85,0xfd,0x01,0xfe,0x95,0x01,0xfd,0x01,0xfe,0x95,0x01,0xfd,0x01,0xfe, -0x86,0xfd,0x01,0xfe,0x99,0x0a,0xfd,0x06,0x00,0x2b,0xf6,0x00,0x00,0xfb,0x8b,0xfd, -0x01,0xfc,0x95,0x02,0xfb,0x91,0x01,0xfd,0x01,0xac,0x88,0xfb,0x02,0x00,0x2b,0x93, -0x01,0xfb,0x8c,0xfd,0x97,0x01,0xfb,0x05,0x00,0xf8,0xfb,0xfb,0xac,0x91,0x01,0xfd, -0x96,0x02,0xfb,0x01,0xfc,0x92,0x11,0xfd,0x05,0xfb,0xf7,0x2b,0xf7,0xac,0x8c,0xfd, -0x95,0x02,0xfb,0x01,0xac,0x90,0x01,0xfd,0x01,0xac,0x89,0xfb,0x02,0x2b,0x56,0x93, -0x01,0xfb,0x8c,0xfd,0x97,0x01,0xfb,0x02,0x2b,0xfa,0x83,0xfb,0x01,0xac,0x90,0x01, -0xfd,0x01,0xac,0x96,0x02,0xfb,0x93,0x11,0xfd,0x01,0xfe,0x8e,0xfd,0x01,0xac,0x94, -0x02,0xfb,0x01,0xfc,0x90,0x01,0xfd,0x01,0xac,0x8a,0xfb,0x01,0xfc,0x94,0x01,0xfb, -0x8c,0xfd,0x9d,0x01,0xfb,0x01,0xac,0x90,0x01,0xfd,0x01,0xfc,0x95,0x02,0xfb,0x01, -0xac,0x91,0x12,0xfd,0x01,0xfc,0x94,0x02,0xfb,0x01,0xac,0x90,0x01,0xfd,0x01,0xfc, -0x9f,0x01,0xfb,0x8c,0xfd,0x9d,0x01,0xfb,0x01,0xfc,0x90,0x01,0xfd,0x01,0xac,0x95, -0x02,0xfb,0x01,0xfc,0x9a,0x11,0xfd,0x02,0xfb,0x81,0x85,0xfd,0x01,0xfc,0x87,0xfb, -0x02,0x81,0xf8,0x9b,0x01,0xfb,0x90,0x01,0xfd,0x01,0xac,0x88,0xfb,0x02,0xf9,0xf9, -0x95,0x01,0xfb,0x02,0x81,0xf9,0x8b,0xfd,0x97,0x01,0xfb,0x02,0xfa,0x56,0x85,0xfb, -0x01,0xac,0x90,0x01,0xfd,0x95,0x02,0xfb,0x01,0xfc,0x9a,0x11,0xfd,0x02,0x2b,0x00, -0x85,0xfd,0x88,0xfb,0x02,0x56,0x00,0x9a,0x01,0xfb,0x01,0xfc,0x90,0x01,0xfd,0x89, -0xfb,0x02,0x2b,0x00,0x95,0x01,0xfb,0x02,0x00,0xf8,0x83,0xfd,0x02,0x00,0x81,0x86, -0xfd,0x84,0xfb,0x02,0x00,0xf9,0x86,0xfb,0x02,0xf7,0x2b,0x89,0xfb,0x08,0x2b,0x00, -0xfb,0xfb,0x2b,0x2b,0xfb,0xfb,0x90,0x01,0xfd,0x04,0xfc,0xfb,0x56,0x00,0x92,0x02, -0xfb,0x9a,0x11,0xfd,0x02,0x2b,0x00,0x84,0xfd,0x01,0xac,0x88,0xfb,0x02,0xf8,0x00, -0x97,0x01,0xfb,0x04,0x56,0x81,0xfb,0xac,0x8f,0xfd,0x01,0xac,0x89,0xfb,0x02,0x2b, -0x00,0x94,0x01,0xfb,0x02,0xf7,0xf7,0x84,0xfd,0x02,0x2b,0xfb,0x86,0xfd,0x84,0xfb, -0x05,0x2b,0xf9,0xfb,0x81,0xf8,0x83,0xfb,0x02,0xf8,0xf7,0x89,0xfb,0x09,0x2b,0x00, -0xfb,0xfb,0xf7,0x56,0xfb,0xfb,0xac,0x8c,0xfd,0x07,0xac,0xf9,0xfd,0xac,0xfb,0xfa, -0x2b,0x92,0x02,0xfb,0x01,0xac,0x99,0x11,0xfd,0x02,0x2b,0x00,0x84,0xfd,0x01,0xac, -0x88,0xfb,0x02,0x56,0x00,0x97,0x01,0xfb,0x03,0x00,0xf8,0xfc,0x90,0x01,0xfd,0x8a, -0xfb,0x02,0x2b,0x00,0x96,0x01,0xfb,0x8c,0xfd,0x87,0xfb,0x02,0x56,0x00,0x8e,0xfb, -0x02,0x2b,0x00,0x87,0xfb,0x8c,0xfd,0x05,0xf9,0x00,0xfd,0xfd,0xfc,0x94,0x02,0xfb, -0x01,0xac,0x98,0x0c,0xfd,0x01,0x2b,0x83,0x00,0x03,0xf8,0xfd,0xfd,0xa2,0x2b,0x00, -0x04,0x00,0x2b,0xfd,0xfd,0xa2,0x2b,0x00,0x06,0x00,0x2b,0xfd,0xfd,0xf9,0x00,0x83, -0xfd,0x04,0x00,0xf9,0xf8,0x00,0x84,0xfd,0x04,0x00,0xfb,0xfd,0xf8,0x83,0x00,0x03, -0xac,0xfd,0xf9,0x85,0x00,0x01,0x2b,0x85,0xfd,0x01,0x81,0x83,0x00,0x05,0xf8,0xfd, -0xfd,0x00,0xf9,0xe2,0xfd,0xfd,0x00,0x2b,0x02,0x00,0x00,0x85,0xfd,0x05,0x2b,0x00, -0xfd,0xfd,0xac,0x83,0x00,0x01,0xf7,0x86,0xfb,0xa2,0xf8,0x00,0x06,0x00,0x00,0x81, -0xfb,0xfb,0xf9,0x83,0x00,0x05,0x2b,0xfb,0xfb,0xf8,0x00,0x83,0xfb,0x03,0x00,0x56, -0xfb,0x84,0x00,0x03,0xfd,0xfd,0xf8,0x83,0x00,0x0a,0xf8,0xfd,0xfd,0xf9,0x00,0x81, -0x00,0x00,0x2b,0xfc,0x86,0xfb,0x01,0x56,0x85,0x00,0x03,0xfb,0xfb,0x81,0x83,0x00, -0x01,0xf7,0x86,0xfb,0xa2,0x2b,0x00,0x03,0xf8,0xfb,0xf7,0x83,0x00,0x0c,0xac,0xfd, -0xfd,0x00,0xf9,0xfd,0xf9,0x00,0x81,0x00,0x00,0x2b,0x83,0xfb,0x03,0x00,0x56,0xfb, -0x84,0x00,0x06,0xfb,0x2b,0x00,0xfb,0xfb,0x2b,0x83,0x00,0x0c,0xf7,0xfb,0xfb,0x2b, -0x00,0xfb,0xfb,0x00,0x2b,0xfb,0xfb,0x2b,0x83,0x00,0x03,0xfd,0xfd,0x56,0x83,0x00, -0x03,0x2b,0xfd,0xfd,0x84,0x00,0x03,0xac,0xf8,0x00,0x83,0xfb,0x01,0x2b,0x83,0x00, -0x09,0xf9,0xfb,0xfb,0x2b,0x00,0xf7,0x00,0x00,0xf7,0x92,0x01,0xfb,0x01,0xfc,0x97, -0x0c,0xfd,0x1a,0xfe,0xf9,0xfb,0xfd,0xf7,0x00,0xfd,0xfd,0x2b,0x00,0xf6,0xac,0xfb, -0x00,0xf8,0xfd,0x2b,0x00,0xf6,0xac,0xfb,0x00,0xf8,0xfd,0xfa,0x00,0x83,0xfd,0x10, -0x00,0xf9,0xfd,0x00,0xf8,0xfd,0xfd,0xf8,0x00,0xfd,0xf9,0x00,0xfa,0xfe,0x00,0xf6, -0x85,0xfd,0x03,0x2b,0x00,0x81,0x85,0xfd,0x12,0x00,0x2b,0xfd,0xfd,0xfb,0xfd,0xfd, -0x00,0xf9,0xfd,0xfd,0x00,0x2b,0xfd,0xfd,0x00,0x00,0xf9,0x86,0xfd,0x0a,0x2b,0x00, -0xfd,0xfd,0x00,0x2b,0xfc,0xf9,0x00,0xfa,0x85,0xfb,0x12,0x56,0x00,0x00,0xfa,0x81, -0x00,0x00,0xfb,0x81,0x00,0x2b,0xfb,0xfa,0x00,0x2b,0xfb,0x56,0x00,0x83,0xfb,0x08, -0x00,0xf8,0xfb,0xfb,0x00,0x56,0xfd,0xfd,0xe2,0xf9,0x00,0xf9,0xfe,0x07,0xf9,0x00, -0x00,0xfb,0x81,0x00,0xf9,0x85,0xfb,0x0f,0x81,0x00,0x2b,0xfb,0xf8,0x00,0x00,0xfb, -0xfb,0x00,0x00,0xfb,0xf9,0x00,0xfa,0x85,0xfb,0x1e,0x2b,0x00,0x2b,0xfb,0xfb,0x56, -0x00,0xf9,0xfb,0x00,0xf6,0xfd,0xfd,0x00,0xf9,0xfe,0xf9,0x00,0x00,0xfb,0xfa,0x00, -0xf9,0xfb,0xfb,0x00,0xf8,0xfb,0x56,0x00,0x83,0xfb,0x06,0x2b,0x00,0xfb,0xfb,0xf8, -0xfa,0xe2,0xfb,0x2b,0x00,0xfb,0x14,0xfb,0x00,0x2b,0xfb,0x56,0x00,0xfa,0xfd,0xfb, -0xfd,0xfd,0x81,0xfa,0xfd,0x81,0x00,0x81,0xfd,0xf9,0x00,0x83,0xfd,0x12,0x56,0x00, -0xfb,0xfb,0x2b,0x00,0xfa,0xfb,0x2b,0x00,0x81,0xfb,0x2b,0x00,0x00,0x81,0x2b,0x00, -0x92,0x01,0xfb,0x01,0xfc,0x9b,0x0c,0xfd,0x16,0xf9,0x00,0xfd,0xfd,0x2b,0x00,0xfe, -0xfd,0xfd,0x2b,0x00,0xfd,0x2b,0x00,0xfe,0xfd,0xfd,0x2b,0x00,0xfd,0xf9,0x00,0x83, -0xfd,0x10,0x00,0xfa,0xfd,0x2b,0x00,0xfd,0xfd,0x00,0x56,0xfe,0xf6,0x00,0xfa,0xf9, -0xf7,0x00,0x84,0xfd,0x03,0xf8,0x00,0x81,0x86,0xfd,0x03,0x2b,0x00,0x56,0x84,0xfd, -0x02,0x00,0xfa,0xe2,0xfd,0xfd,0x00,0x2b,0x01,0xfe,0x86,0xfd,0x0a,0x2b,0x00,0xfd, -0xfb,0x00,0x56,0x56,0xf8,0x00,0x2b,0x85,0xfb,0x02,0xf8,0x00,0x83,0xfb,0xc2,0xf7, -0x00,0xfb,0x07,0xfb,0xfb,0xf7,0x00,0xfb,0xf8,0x00,0x83,0xfb,0x0a,0x00,0x56,0xfb, -0xfb,0x00,0xf8,0xfe,0xfd,0x2b,0x2b,0x83,0xfd,0x0a,0x2b,0x00,0xfd,0xf9,0x00,0xac, -0xfd,0xfd,0x00,0xf8,0x85,0xfb,0x0a,0x2b,0x00,0xfb,0xfb,0xfc,0xf6,0x00,0xfb,0xfa, -0x00,0x83,0xf8,0x02,0x00,0x2b,0x85,0xfb,0x02,0x2b,0x00,0x83,0xfb,0x19,0x00,0x00, -0xf8,0x56,0xf7,0x00,0xfd,0xfd,0x00,0xf9,0xfd,0xf9,0x00,0xac,0xfd,0xfd,0x00,0xf8, -0xfb,0xfb,0x00,0x56,0xfb,0xf8,0x00,0x83,0xfb,0x02,0x2b,0x00,0x85,0xfb,0x0f,0xf8, -0x00,0xfb,0xfb,0x2b,0x00,0xfb,0xfb,0x00,0x2b,0xfb,0xfa,0x00,0x2b,0x81,0x87,0xfd, -0x05,0x00,0xf9,0xfd,0xf9,0x00,0x83,0xfd,0x06,0xf8,0x00,0xfb,0xfb,0x00,0xf7,0x83, -0xfb,0x09,0x00,0x2b,0xfb,0x2b,0x00,0xfb,0xfb,0xf8,0x00,0x93,0x01,0xfb,0x01,0xfe, -0x97,0x0c,0xfd,0x01,0xf9,0x84,0x00,0x04,0xfd,0xfd,0x2b,0x00,0x83,0xfd,0xc2,0x2b, -0x00,0xfd,0x07,0xfd,0xfd,0x2b,0x00,0xfd,0xf9,0x00,0x83,0xfd,0x0a,0x00,0xf9,0xfd, -0x81,0x00,0xfb,0x81,0x00,0xfd,0xfd,0x86,0x00,0x83,0xfd,0x03,0x56,0x00,0x56,0x87, -0xfd,0x11,0xac,0x2b,0x00,0x00,0x81,0xfd,0xfd,0x00,0xf9,0xfd,0xfd,0x00,0xf6,0xfd, -0xfe,0x00,0xf6,0x87,0xfd,0x04,0x2b,0x00,0xfd,0xf9,0x85,0x00,0x01,0x2b,0x85,0xfb, -0x02,0x56,0x00,0x83,0xfb,0x05,0x56,0x00,0xfb,0x2b,0x00,0x83,0xfb,0x05,0xf8,0x00, -0xfb,0x56,0x00,0x83,0xfb,0x0a,0x00,0xf8,0xfb,0xfb,0x00,0xf9,0xfd,0xfd,0x00,0x2b, -0x83,0xfd,0x05,0x2b,0x00,0xfd,0xf9,0x00,0x83,0xfd,0x02,0x00,0x56,0x85,0xfb,0x02, -0x2b,0x00,0x83,0xfb,0x04,0x2b,0x00,0xfb,0xf8,0x85,0x00,0x01,0xf7,0x85,0xfb,0x02, -0x2b,0x00,0x83,0xfb,0x86,0x00,0x07,0xfd,0xfd,0x00,0xf9,0xfd,0xf9,0x00,0x83,0xfd, -0x09,0x00,0x56,0xfb,0xfb,0x00,0xf8,0xfb,0x56,0x00,0x83,0xfb,0x05,0x2b,0x00,0xfb, -0xfb,0x56,0x84,0x00,0x11,0xfb,0xfb,0x2b,0x00,0xfb,0xfb,0x00,0x2b,0xfb,0xfb,0xf8, -0x00,0x00,0x2b,0xfd,0xfd,0xfb,0x84,0x00,0x04,0xf9,0xfd,0xfa,0x00,0x83,0xfd,0x06, -0xf9,0x00,0xfb,0xfb,0x00,0xf8,0x83,0xfb,0x09,0x00,0x2b,0xfb,0x2b,0x00,0xfb,0xfb, -0x56,0x00,0x93,0x01,0xfb,0x97,0x0c,0xfd,0x0a,0xfb,0x00,0x81,0xfd,0xf9,0x00,0xfe, -0xfd,0x2b,0x00,0x83,0xfd,0x05,0x2b,0x2b,0xfd,0x2b,0x00,0x83,0xfd,0x14,0x2b,0x2b, -0xfd,0xfa,0x00,0xfd,0xfd,0xac,0x00,0xf9,0xfd,0xfd,0x00,0x2b,0xf6,0xf7,0xfe,0xfd, -0x2b,0x00,0x83,0xfd,0x06,0xfe,0xfd,0xfd,0x81,0x00,0xf8,0x8a,0xfd,0x07,0xac,0xf6, -0x00,0xfd,0xfd,0x00,0xf9,0xe2,0xfd,0xfd,0x00,0x2b,0x87,0xfd,0x06,0x2b,0x00,0xfd, -0xfb,0x00,0xfa,0x83,0xfb,0x01,0xfc,0x85,0xfb,0x02,0xf8,0x00,0x83,0xfb,0xc2,0xf7, -0x00,0xfb,0x14,0xfb,0xfb,0xf7,0x00,0xfb,0x56,0x00,0xfb,0xfb,0x81,0x00,0x56,0xfb, -0xfb,0x00,0xf9,0xfd,0xfd,0x2b,0x2b,0x83,0xfd,0x0a,0x2b,0x2b,0xfd,0xfa,0x00,0xfd, -0xfd,0xac,0x00,0xf8,0x85,0xfb,0x02,0x2b,0x00,0x83,0xfb,0x06,0x2b,0x00,0xfb,0xfa, -0x00,0xf9,0x89,0xfb,0x02,0x2b,0x00,0x83,0xfb,0x04,0x00,0x00,0xfb,0xfb,0x84,0xfd, -0x05,0x00,0xfa,0xfd,0xf9,0x00,0x83,0xfd,0x09,0x00,0xf8,0xfc,0xfb,0x00,0x56,0xfb, -0xf8,0x00,0x83,0xfb,0x16,0x2b,0x00,0xfb,0xfa,0x00,0xf9,0xfb,0x56,0x00,0xfb,0xfb, -0x2b,0x00,0xfb,0xfb,0x00,0x2b,0xfb,0xfb,0xfc,0xfb,0xf8,0xc2,0x00,0xf7,0xfd,0x06, -0xfd,0x00,0xf9,0xfe,0xf9,0x00,0x83,0xfd,0x06,0xf9,0x00,0xfb,0xfb,0x00,0xf7,0x83, -0xfb,0x09,0x00,0xf7,0xfb,0x2b,0x00,0xfb,0xfb,0xf8,0x00,0x93,0x01,0xfb,0x01,0xac, -0x96,0x0c,0xfd,0x31,0x81,0x00,0xfa,0xac,0x2b,0x00,0xfc,0xfd,0x2b,0x00,0xf8,0xfd, -0xf9,0x00,0xf9,0xfd,0x2b,0x00,0xf8,0xfd,0xf9,0x00,0xf9,0xfe,0xfa,0x00,0x81,0xfb, -0x00,0x00,0xf9,0xfd,0xfd,0xf8,0x00,0x00,0xac,0xfd,0xfd,0x81,0x00,0xf7,0xfd,0xfd, -0x56,0xfd,0xfa,0x00,0x2b,0x89,0xfd,0x0b,0xf9,0xfb,0xfd,0x2b,0x00,0xfd,0xfd,0x00, -0xf7,0xac,0x2b,0xe2,0x00,0x2b,0xfd,0xfd,0x85,0xfd,0x0a,0x2b,0x00,0xfd,0xfd,0x00, -0x00,0xfa,0xfb,0xfa,0x56,0x85,0xfb,0x1d,0x56,0x00,0x2b,0x81,0xfa,0x00,0x2b,0xfb, -0x81,0x00,0x2b,0xfb,0xfa,0x00,0x2b,0xfb,0xf9,0x00,0x56,0xfa,0x00,0x00,0xf8,0xfb, -0xfb,0x00,0xf7,0xfd,0xfd,0xe2,0xf9,0x00,0xf9,0xfd,0x07,0xf9,0x00,0xfd,0xfd,0xfb, -0x00,0x56,0x85,0xfb,0x0f,0xfa,0x00,0xf8,0xfb,0xf7,0x00,0x00,0xfb,0xfb,0x00,0x00, -0xfa,0xfb,0xfa,0x56,0x85,0xfb,0x02,0x2b,0x00,0x83,0xfb,0x0d,0xf9,0x00,0x2b,0xfb, -0xfe,0x56,0xfd,0xfd,0x00,0xf9,0xfd,0xfa,0x00,0x83,0xfd,0x47,0x00,0x56,0xfb,0xfb, -0x00,0xf8,0xfb,0xf9,0x00,0xf9,0xfb,0xfb,0x2b,0x00,0xfb,0x56,0x00,0x56,0x81,0x2b, -0x00,0x81,0xfb,0x2b,0x00,0xfb,0xfb,0x00,0x2b,0xfb,0xfa,0xf8,0xfb,0xfa,0x00,0xf8, -0xfd,0x00,0xf7,0xfd,0xf8,0x00,0x56,0xfd,0xfa,0x00,0x81,0xfd,0xfd,0xfa,0x00,0xfb, -0xfb,0x2b,0x00,0xfa,0xfb,0x2b,0x00,0x81,0xfb,0x2b,0x00,0xfb,0xfb,0x56,0x00,0xfb, -0xfc,0xf8,0x2b,0x8f,0xfb,0x01,0xac,0x97,0x0c,0xfd,0x07,0x2b,0x00,0x00,0x56,0x00, -0x00,0xfd,0xa2,0x2b,0x00,0x04,0x00,0xf8,0xfe,0xfd,0xa2,0x2b,0x00,0x02,0x00,0xf8, -0x83,0xfd,0x0b,0x2b,0x00,0x00,0x81,0x00,0xf9,0xfe,0xfd,0xfc,0x00,0x2b,0x84,0xfd, -0x01,0xf9,0x84,0x00,0x02,0xfd,0x2b,0x85,0x00,0x01,0x2b,0x85,0xfd,0x01,0x2b,0x83, -0x00,0x07,0x81,0xfd,0xfd,0xf8,0x00,0x00,0xf8,0xe2,0x00,0x2b,0xfd,0xfd,0x85,0xfd, -0x06,0x2b,0x00,0xfd,0xfd,0xac,0xf6,0x83,0x00,0x01,0xf8,0x85,0xfb,0x06,0xf8,0x00, -0x2b,0x00,0x00,0x2b,0x83,0xfb,0x01,0x56,0x83,0x00,0x01,0x2b,0x83,0xfb,0x0e,0x2b, -0x00,0x00,0xf9,0x00,0x56,0xfb,0xfb,0xf7,0x00,0x00,0xfd,0xfe,0xf8,0x83,0x00,0x0b, -0xf8,0xfe,0xfd,0xf9,0x00,0xfd,0xfd,0xfb,0x00,0xf8,0xfc,0x85,0xfb,0x01,0xf7,0x83, -0x00,0x06,0x2b,0x00,0xfb,0xfb,0x81,0x2b,0x83,0x00,0x01,0xf8,0x85,0xfb,0x02,0x2b, -0x00,0x84,0xfb,0x01,0xf8,0x84,0x00,0x07,0xfd,0xfd,0x00,0xf9,0xfd,0xf9,0x00,0x83, -0xfd,0x1f,0x00,0xf8,0xfb,0xfb,0x00,0x56,0xfb,0xfb,0x2b,0x00,0x00,0xfb,0x2b,0x00, -0xfb,0xfc,0xf6,0x00,0x00,0xf8,0x00,0x00,0xfb,0x2b,0x00,0xfb,0xfb,0x00,0x2b,0xfb, -0xfa,0x83,0x00,0x11,0x2b,0xfd,0xfd,0xf8,0x00,0x00,0x2b,0x2b,0x00,0x81,0xfd,0x2b, -0x00,0x00,0xfd,0xf9,0x00,0x83,0xfb,0x01,0x2b,0x83,0x00,0x0d,0xf9,0xfb,0xfb,0x2b, -0x00,0xfb,0xfb,0xf8,0x00,0xfb,0xfb,0x2b,0x00,0x8f,0xfb,0x01,0xac,0x9a,0x0c,0xfd, -0x01,0xfe,0x83,0xfd,0x02,0x2b,0x00,0x86,0xfd,0x02,0x2b,0x00,0x8f,0xfd,0x03,0xfe, -0x00,0xf9,0x93,0x03,0xfd,0x01,0xac,0x92,0x02,0xfb,0x01,0xac,0x8e,0xfd,0x01,0xac, -0x95,0x02,0xfb,0x85,0xfd,0x01,0xfe,0x86,0xfd,0x90,0x01,0xfb,0x01,0xfc,0x92,0x01, -0xfb,0x04,0xac,0xfd,0xfd,0xfe,0x8b,0xfd,0x01,0xac,0x93,0x02,0xfb,0x01,0xfc,0x9e, -0x0c,0xfd,0x02,0x2b,0x00,0x86,0xfd,0x02,0x2b,0x00,0x8f,0xfd,0x03,0xf7,0x00,0xfe, -0x93,0x03,0xfd,0x01,0xfc,0x92,0x02,0xfb,0x01,0xac,0x8e,0xfd,0x96,0x02,0xfb,0x8c, -0xfd,0x94,0x02,0xfb,0x8e,0xfd,0x01,0xac,0x93,0x02,0xfb,0x01,0xfc,0x9e,0x0c,0xfd, -0x02,0xf9,0xf8,0x86,0xfd,0x02,0xf9,0xf8,0x8f,0xfd,0x02,0x56,0xf9,0x94,0x03,0xfd, -0x01,0xfc,0x92,0x02,0xfb,0x01,0xac,0x8e,0xfd,0x96,0x02,0xfb,0x8c,0xfd,0x94,0x02, -0xfb,0x8e,0xfd,0x01,0xac,0x93,0x02,0xfb,0x01,0xfc,0x9d,0x11,0xfd,0x01,0xfc,0x92, -0x02,0xfb,0x8e,0xfd,0x01,0xac,0x96,0x02,0xfb,0x8c,0xfd,0x94,0x02,0xfb,0x01,0xac, -0x8e,0xfd,0x93,0x02,0xfb,0x01,0xfc,0x9d,0x11,0xfd,0x93,0x02,0xfb,0x8e,0xfd,0x01, -0xfc,0x96,0x02,0xfb,0x8c,0xfd,0x94,0x02,0xfb,0x01,0xfc,0x8e,0xfd,0x94,0x02,0xfb, -0x9d,0x11,0xfd,0x92,0x02,0xfb,0x01,0xfc,0x8e,0xfd,0x97,0x02,0xfb,0x8c,0xfd,0x95, -0x02,0xfb,0x8e,0xfd,0x01,0xfc,0x93,0x02,0xfb,0x9d,0x11,0xfd,0x92,0x02,0xfb,0x01, -0xac,0x8e,0xfd,0x97,0x02,0xfb,0x8c,0xfd,0x95,0x02,0xfb,0x8e,0xfd,0x01,0xac,0x93, -0x02,0xfb,0x9d,0x11,0xfd,0x92,0x02,0xfb,0x01,0xac,0x8d,0xfd,0x01,0xac,0x97,0x02, -0xfb,0x8c,0xfd,0x95,0x02,0xfb,0x01,0xac,0x8d,0xfd,0x01,0xac,0x93,0x02,0xfb,0x9d, -0x11,0xfd,0x92,0x02,0xfb,0x01,0xac,0x8d,0xfd,0x01,0xac,0x97,0x02,0xfb,0x01,0xac, -0x8a,0xfd,0x01,0xac,0x95,0x02,0xfb,0x01,0xac,0x8d,0xfd,0x01,0xac,0x93,0x02,0xfb, -0x9d,0x11,0xfd,0x92,0x02,0xfb,0x01,0xac,0x8d,0xfd,0x01,0xac,0x98,0x02,0xfb,0x8a, -0xfd,0x96,0x02,0xfb,0x01,0xac,0x8d,0xfd,0x01,0xac,0x93,0x02,0xfb,0x9d,0x11,0xfd, -0x92,0x02,0xfb,0x01,0xac,0x8d,0xfd,0x01,0xac,0x99,0x02,0xfb,0x88,0xfd,0x97,0x02, -0xfb,0x01,0xac,0x8d,0xfd,0x01,0xac,0x93,0x02,0xfb,0x9d,0x11,0xfd,0x92,0x02,0xfb, -0x01,0xac,0x8d,0xfd,0x01,0xac,0x9a,0x02,0xfb,0x01,0xac,0x84,0xfd,0x01,0xac,0x98, -0x02,0xfb,0x01,0xac,0x8d,0xfd,0x01,0xac,0x93,0x02,0xfb,0x9d,0x11,0xfd,0x92,0x02, -0xfb,0x01,0xac,0x8d,0xfd,0x01,0xac,0x98,0x05,0xfb,0x01,0xac,0x8d,0xfd,0x01,0xac, -0x93,0x02,0xfb,0x9d,0x11,0xfd,0x92,0x02,0xfb,0x01,0xac,0x8d,0xfd,0x01,0xac,0x98, -0x05,0xfb,0x01,0xac,0x8d,0xfd,0x01,0xac,0x93,0x02,0xfb,0x9d,0x11,0xfd,0x92,0x02, -0xfb,0x01,0xac,0x8d,0xfd,0x01,0xac,0x98,0x05,0xfb,0x01,0xac,0x8d,0xfd,0x01,0xac, -0x93,0x02,0xfb,0x9d,0x11,0xfd,0x92,0x02,0xfb,0x01,0xac,0x8d,0xfd,0x01,0xac,0x98, -0x05,0xfb,0x01,0xac,0x8d,0xfd,0x01,0xac,0x93,0x02,0xfb,0x9d,0x11,0xfd,0x92,0x02, -0xfb,0x01,0xac,0x8d,0xfd,0x01,0xac,0x98,0x05,0xfb,0x01,0xac,0x8d,0xfd,0x01,0xac, -0x93,0x02,0xfb,0x9d,0x11,0xfd,0x01,0xfc,0x91,0x02,0xfb,0x01,0xac,0x8d,0xfd,0x01, -0xac,0x98,0x05,0xfb,0x01,0xac,0x8d,0xfd,0x01,0xac,0x92,0x02,0xfb,0x01,0xfc,0x9d, -0x11,0xfd,0x92,0x02,0xfb,0x01,0xac,0x8d,0xfd,0x01,0xac,0x98,0x05,0xfb,0x01,0xac, -0x8d,0xfd,0x01,0xac,0x93,0x02,0xfb,0x96,0x11,0xfd,0x02,0xf9,0xac,0x85,0xfd,0x01, -0xfc,0x91,0x02,0xfb,0x01,0xfc,0x8e,0xfd,0x98,0x05,0xfb,0x8e,0xfd,0x01,0xfc,0x92, -0x02,0xfb,0x05,0xfc,0xfd,0xfd,0xfa,0xfb,0x95,0x03,0xfd,0x02,0xf9,0xac,0x9c,0x08, -0xfd,0x09,0xfb,0x2b,0x00,0x2b,0x56,0xfd,0xfd,0x2b,0xf8,0x9a,0x01,0xfd,0x04,0xfb, -0xfb,0xfd,0xf9,0x94,0x02,0xfd,0x06,0x2b,0xf8,0xfd,0xfd,0x00,0xf9,0x85,0xfd,0x01, -0xfc,0x9a,0x01,0xfb,0x01,0xf8,0x83,0x00,0x04,0x2b,0xf9,0xfb,0xfc,0x8e,0xfd,0x98, -0x05,0xfb,0x8e,0xfd,0x01,0xfc,0x94,0x01,0xfb,0x02,0xf7,0x2b,0x84,0xfb,0x02,0x2b, -0xf7,0x86,0xfb,0x05,0xfc,0xfd,0xfd,0x00,0x2b,0x9a,0x01,0xfd,0x09,0xfb,0x2b,0x00, -0x2b,0x56,0xfd,0xfd,0x2b,0xf8,0x92,0x01,0xfd,0x02,0x00,0xf9,0x9c,0x08,0xfd,0x09, -0x00,0x2b,0xf9,0xf8,0x2b,0xfd,0xfd,0xf8,0xf9,0x9a,0x01,0xfd,0x04,0xf8,0xf8,0xfd, -0x00,0x94,0x02,0xfd,0x06,0x00,0x2b,0xfd,0xfd,0x00,0xf9,0x85,0xfd,0x01,0xfc,0x99, -0x01,0xfb,0x09,0x2b,0x00,0x2b,0x56,0xf7,0x2b,0x00,0xfb,0xfb,0x8e,0xfd,0x01,0xfc, -0x95,0x01,0xfb,0x02,0xf9,0xf9,0x95,0x03,0xfb,0x02,0x81,0xf8,0x88,0xfb,0x01,0xfc, -0x87,0xfd,0x02,0xfa,0xfb,0x85,0xfd,0x95,0x01,0xfb,0x02,0x2b,0x00,0x84,0xfb,0x02, -0x00,0x2b,0x86,0xfb,0x09,0xfc,0xfd,0xfd,0x00,0x2b,0xfd,0xfd,0xf9,0xac,0x96,0x01, -0xfd,0x09,0x00,0x2b,0xf9,0xf8,0x2b,0xfd,0xfd,0xf8,0xf9,0x92,0x01,0xfd,0x02,0x00, -0xf9,0x9b,0x08,0xfd,0x04,0xf9,0x00,0xfd,0xfe,0x85,0xfd,0x01,0xfe,0x92,0x04,0xfd, -0x06,0x00,0x2b,0xfd,0xfd,0x00,0xfa,0x85,0xfd,0x01,0xfc,0x98,0x01,0xfb,0x06,0xf9, -0x00,0xf9,0xfb,0xfb,0xfc,0x84,0xfb,0x8e,0xfd,0x01,0xac,0x95,0x01,0xfb,0x02,0x00, -0x2b,0x95,0x03,0xfb,0x02,0xf8,0x00,0x88,0xfb,0x01,0xac,0x87,0xfd,0x02,0x00,0x2b, -0x85,0xfd,0x95,0x01,0xfb,0x02,0x2b,0x00,0x84,0xfb,0x02,0x00,0x2b,0x86,0xfb,0x09, -0xfc,0xfd,0xfd,0x00,0x2b,0xfd,0xfd,0x00,0xf9,0x95,0x01,0xfd,0x04,0xf9,0x00,0xfd, -0xfe,0x85,0xfd,0x01,0xfe,0x92,0x01,0xfd,0x02,0x00,0xf9,0x9b,0x08,0xfd,0x03,0xfb, -0x00,0x2b,0x85,0xfd,0x05,0x00,0xf6,0xfe,0xfd,0xfa,0x83,0x00,0x01,0x81,0x86,0xfd, -0x15,0x00,0x2b,0x2b,0x00,0x00,0x81,0xf8,0x00,0x00,0xf8,0xfd,0xfd,0x2b,0x00,0xfd, -0xfd,0xf9,0x00,0xfd,0xfd,0x81,0x83,0x00,0x04,0xf8,0xfd,0xfd,0x56,0x83,0x00,0x04, -0x81,0xfd,0xfd,0xf8,0x83,0x00,0x05,0xac,0xfd,0xfd,0x00,0x56,0x83,0x00,0x01,0xac, -0x86,0xfd,0x06,0x00,0x2b,0xfd,0xfd,0x00,0x56,0x83,0x00,0x15,0xac,0xfd,0xac,0x00, -0x2b,0x00,0x00,0xfb,0xfb,0x2b,0x00,0x00,0x2b,0xfb,0xfb,0x2b,0x00,0xf7,0x00,0x00, -0xf7,0x86,0xfb,0x02,0xf6,0x00,0x88,0xfb,0x01,0xf8,0x83,0x00,0x0a,0xf8,0xfd,0xfd, -0xf9,0x00,0x56,0x00,0x00,0x2b,0xfc,0x83,0x00,0x05,0x81,0xfb,0xfb,0x00,0x2b,0x83, -0x00,0x0b,0x56,0xfb,0xfb,0x00,0xf8,0xfb,0xfb,0x00,0x2b,0xfb,0x2b,0x83,0x00,0x03, -0x56,0xfb,0x56,0x83,0x00,0x07,0x56,0xfb,0xfb,0x00,0x2b,0x00,0x00,0x85,0xfb,0x06, -0xf8,0x00,0x56,0x00,0x00,0x2b,0x83,0xfb,0x01,0xf7,0x83,0x00,0x09,0x81,0xfb,0xfb, -0x00,0xf8,0xfb,0xfb,0x00,0x2b,0x86,0xfb,0x01,0xf7,0x83,0x00,0x02,0xf9,0xfb,0x84, -0x00,0x02,0xfb,0xfa,0x84,0x00,0x08,0xfc,0xfd,0xf9,0x00,0xf8,0x00,0x2b,0x2b,0x83, -0x00,0x03,0xf9,0xfd,0xfa,0x83,0x00,0x09,0x56,0xfb,0xfb,0x00,0x2b,0x2b,0x00,0x00, -0x56,0x8a,0xfb,0x02,0x2b,0x00,0x84,0xfb,0x05,0x00,0x2b,0xfb,0xfb,0x2b,0x83,0x00, -0x06,0xf7,0xfe,0xfd,0x00,0x2b,0xfd,0x84,0x00,0x03,0xfd,0xfd,0x81,0x83,0x00,0x09, -0x81,0xfd,0xfd,0x00,0x2b,0x2b,0x00,0x00,0x81,0x85,0xfd,0x03,0xfb,0x00,0x2b,0x85, -0xfd,0x05,0x00,0xf6,0xfe,0xfd,0xfa,0x83,0x00,0x01,0x81,0x87,0xfd,0x08,0x2b,0x00, -0x00,0x2b,0x00,0xfa,0xfd,0xfb,0x84,0x00,0x03,0xac,0xfd,0xf6,0x85,0x00,0x08,0xf9, -0xfd,0x00,0x2b,0xfd,0xfd,0x2b,0x00,0x96,0x07,0xfd,0x04,0xf8,0x00,0x00,0xf9,0x83, -0xfd,0x09,0x00,0x2b,0xfd,0xac,0x00,0xf8,0xfd,0xf7,0x00,0x86,0xfd,0x2f,0x00,0x00, -0xf7,0xfd,0x00,0x00,0xf6,0xac,0xf7,0x00,0xfd,0xfd,0x2b,0x00,0xfd,0xfe,0xf9,0x00, -0xfd,0xfd,0x00,0x2b,0xfd,0xfd,0xfb,0xfd,0xfd,0x00,0xf8,0xfd,0xac,0xfc,0xfd,0xf9, -0x00,0xfa,0xfe,0x00,0xf6,0xfd,0xfd,0x00,0x00,0xf9,0xac,0x00,0xf7,0x86,0xfd,0x1e, -0x00,0x2b,0xfd,0xfd,0x00,0x00,0xf9,0xac,0x00,0x2b,0xfd,0xac,0x00,0x00,0xfa,0xfb, -0xfb,0x2b,0x00,0x81,0x81,0x00,0xf7,0xfb,0x2b,0x00,0x00,0x81,0x2b,0x00,0x86,0xfb, -0x02,0x00,0x2b,0x87,0xfb,0x1f,0xf8,0x00,0xf9,0xfe,0xf9,0x00,0xf9,0xfd,0xf9,0x00, -0x00,0xfb,0x81,0x00,0x00,0xf9,0x81,0x00,0xf6,0xfb,0xfb,0x00,0x00,0x56,0xfb,0x2b, -0x00,0xfb,0xfb,0x00,0x56,0xe2,0xfb,0xfb,0x00,0x2b,0x0d,0xfb,0xfb,0x81,0x00,0xf7, -0xfb,0x2b,0x00,0xfb,0xfb,0x00,0x00,0xf8,0x86,0xfb,0x16,0x56,0x00,0x00,0xfa,0xf9, -0x00,0xf9,0xfb,0xf8,0x00,0xf9,0xfb,0x00,0x00,0xfb,0xfb,0x00,0x56,0xfb,0xfb,0x00, -0x2b,0x86,0xfb,0x08,0x00,0xf7,0xfb,0x81,0x81,0xfb,0x56,0x00,0x83,0xfb,0x1f,0xfa, -0xf8,0xfb,0x81,0x00,0xf7,0xfd,0xf9,0x00,0xf6,0xac,0xfd,0xfd,0x00,0x2b,0xfd,0xfd, -0xac,0x00,0xf8,0xfb,0x2b,0x00,0xfb,0xfb,0x00,0x00,0x2b,0xfb,0x00,0x00,0x8a,0xfb, -0x01,0x2b,0x86,0x00,0x20,0x2b,0xfb,0xfb,0xf8,0xfa,0xfb,0x2b,0x00,0xfd,0xfd,0x00, -0x2b,0xfd,0xfd,0x00,0xf9,0xfd,0xfd,0xac,0x00,0xf8,0xfd,0xf7,0x00,0xfd,0xfd,0x00, -0x00,0xf7,0xfd,0x00,0x00,0x86,0xfd,0x04,0xf8,0x00,0x00,0xf9,0x83,0xfd,0x09,0x00, -0x2b,0xfd,0xac,0x00,0xf8,0xfd,0xf7,0x00,0x86,0xfd,0x10,0x2b,0x00,0xfb,0xac,0x2b, -0x00,0xf9,0xfd,0xfb,0xf9,0xfd,0xac,0x00,0x2b,0xfd,0xfe,0x83,0xfd,0x0a,0x00,0x00, -0xac,0xfd,0x00,0x2b,0xfd,0xfd,0x2b,0x00,0x97,0x07,0xfd,0x10,0xfb,0x00,0x00,0xf8, -0xfd,0xfd,0x00,0x2b,0xfd,0xf7,0x00,0xfa,0xf9,0xf9,0x00,0x81,0x85,0xfd,0x17,0x00, -0x2b,0xfd,0xfd,0x2b,0x00,0xfe,0xfd,0xf9,0x00,0xfd,0xfd,0x2b,0x00,0xfd,0xfd,0xf9, -0x00,0xfd,0xfd,0x2b,0x00,0x56,0x84,0xfd,0x03,0x00,0x00,0xfa,0x83,0xfd,0x0e,0x2b, -0x00,0xf9,0xf9,0xf7,0x00,0xfd,0xfd,0x00,0x56,0xfd,0xfd,0x00,0x2b,0x86,0xfd,0x0e, -0x00,0x2b,0xfd,0xfd,0x00,0x56,0xfd,0xfd,0x00,0x2b,0xfd,0xac,0x00,0xf8,0x83,0xfb, -0x0d,0x00,0x2b,0xf8,0x56,0x00,0x00,0xfb,0x2b,0x00,0xfb,0xfb,0xf8,0x00,0x86,0xfb, -0x02,0x00,0x2b,0x87,0xfb,0x02,0x00,0x2b,0x83,0xfd,0x1b,0x2b,0x00,0xfd,0xf9,0x00, -0xac,0xfd,0xfd,0x00,0x56,0xfe,0xac,0x00,0x2b,0xfb,0xfb,0x00,0xf8,0xfb,0xfb,0x81, -0x00,0xf9,0xfb,0x00,0xf8,0xfc,0xe2,0xfb,0x00,0x2b,0xfb,0x0c,0xfb,0x2b,0x00,0x56, -0xf8,0x56,0x00,0xf9,0xfb,0x00,0x2b,0xfc,0x86,0xfb,0x16,0xf8,0x00,0x81,0xfb,0xfb, -0x00,0xf8,0xfc,0x00,0x00,0xf8,0x56,0x2b,0x00,0xfb,0xfb,0x00,0xf8,0xfc,0xfb,0x00, -0x2b,0x86,0xfb,0x03,0x00,0x00,0xf9,0x83,0xfb,0x02,0xf8,0x00,0x87,0xfb,0x06,0x00, -0x2b,0xfd,0xfa,0x00,0xac,0x83,0xfd,0x12,0x00,0x2b,0xfd,0xfd,0xf8,0x00,0x56,0x56, -0xf8,0x00,0xf9,0xfb,0x00,0x2b,0xfb,0xfb,0x2b,0x00,0x8a,0xfb,0x02,0x2b,0x00,0xa2, -0xf8,0x56,0x21,0x00,0x2b,0xfb,0xfb,0xfc,0xfb,0xfb,0xf8,0x00,0xfd,0xfd,0x00,0x2b, -0xfd,0xfd,0x00,0xf9,0xfd,0xfd,0xf7,0x00,0xfa,0xf9,0xf9,0x00,0x81,0xfd,0x00,0x2b, -0xfd,0xfd,0x2b,0x00,0x87,0xfd,0x10,0xfb,0x00,0x00,0xf8,0xfd,0xfd,0x00,0x2b,0xfd, -0xf7,0x00,0xfa,0xf9,0xf9,0x00,0x81,0x85,0xfd,0x02,0x00,0xf8,0x83,0xfd,0x02,0x00, -0xf9,0x85,0xfd,0x02,0x00,0x2b,0x84,0xfd,0x0b,0x2b,0x00,0xac,0xfd,0xfd,0x00,0x2b, -0xfd,0xfd,0x2b,0x00,0x99,0x07,0xfd,0x08,0xf7,0x00,0xfb,0xfd,0x00,0x2b,0xfd,0x2b, -0x85,0x00,0x01,0xfa,0x85,0xfd,0x21,0x00,0x2b,0xfd,0xfd,0x2b,0x00,0xfd,0xfd,0xf9, -0x00,0xfd,0xfd,0x2b,0x00,0xfd,0xfd,0xf9,0x00,0xfd,0xfd,0xac,0x2b,0x00,0x00,0x81, -0xfd,0xfd,0xfb,0x2b,0x00,0x00,0xac,0xfd,0x86,0x00,0x08,0xfd,0xfd,0x00,0xf9,0xfd, -0xfd,0x00,0x2b,0x86,0xfd,0x0e,0x00,0x2b,0xfd,0xfd,0x00,0xf9,0xfd,0xfd,0x00,0x2b, -0xfd,0xfd,0x00,0xf8,0x83,0xfb,0x85,0x00,0x08,0x2b,0xfb,0x2b,0x00,0xfb,0xfb,0x56, -0x00,0x86,0xfb,0x02,0x00,0x00,0x87,0xfb,0x02,0x00,0x2b,0x83,0xfd,0x05,0x2b,0x00, -0xfd,0xfa,0x00,0x83,0xfd,0x0a,0x00,0xf9,0xfd,0xfd,0x00,0x2b,0xfb,0xfb,0x00,0x56, -0x83,0xfb,0x05,0x00,0xf8,0xfb,0x00,0x56,0xe2,0xfb,0xfb,0x00,0x2b,0x03,0xfb,0xfb, -0x2b,0x85,0x00,0x04,0x56,0xfb,0x00,0x2b,0x87,0xfb,0x02,0x56,0x00,0x83,0xfb,0x03, -0x00,0x56,0xfb,0x86,0x00,0x08,0xfb,0xfb,0x00,0x56,0xfb,0xfb,0x00,0x2b,0x86,0xfb, -0x01,0xfa,0x83,0x00,0x04,0x81,0xfb,0x56,0x00,0x83,0xfb,0x02,0x81,0x2b,0x83,0x00, -0x04,0x2b,0xfd,0xf9,0x00,0x84,0xfd,0x05,0x00,0x2b,0xfd,0xfd,0x2b,0x85,0x00,0x08, -0x56,0xfb,0x00,0x2b,0xfb,0xfb,0x2b,0x00,0x8a,0xfb,0x03,0x2b,0x00,0xfc,0x83,0xfb, -0x05,0x00,0x2b,0xfb,0xfb,0xf8,0x84,0x00,0x0b,0xfd,0xfd,0x00,0x2b,0xfd,0xfd,0x00, -0xf9,0xfd,0xfe,0x2b,0x85,0x00,0x08,0xfa,0xfd,0x00,0x2b,0xfd,0xfd,0x2b,0x00,0x89, -0xfd,0x08,0xf7,0x00,0xfb,0xfd,0x00,0x2b,0xfd,0x2b,0x85,0x00,0x01,0xfa,0x85,0xfd, -0x02,0x00,0xf9,0x83,0xfd,0x05,0x00,0xf9,0xfd,0xac,0x2b,0x83,0x00,0x01,0x2b,0x83, -0xfd,0x03,0x2b,0x00,0xfb,0x83,0xfd,0x06,0x00,0x2b,0xfd,0xfd,0x2b,0x00,0x9a,0x07, -0xfd,0x0a,0x00,0xf9,0xfe,0x00,0x2b,0xfd,0xf8,0x00,0xfd,0xfe,0x88,0xfd,0x12,0x00, -0x2b,0xfd,0xfd,0x2b,0x00,0xfd,0xfd,0xf9,0x00,0xfd,0xfd,0x2b,0x00,0xfd,0xfd,0x56, -0x00,0x84,0xfd,0x03,0xac,0xf6,0x00,0x84,0xfd,0x06,0xfb,0x00,0x00,0xfd,0x2b,0x00, -0xc2,0xfe,0xfd,0xfd,0x06,0x00,0xf9,0xfe,0xfd,0x00,0x2b,0x86,0xfd,0x0e,0x00,0x2b, -0xfd,0xfd,0x00,0xfa,0xfd,0xfd,0x00,0x2b,0xfd,0xfd,0x00,0x56,0x83,0xfb,0x02,0x00, -0x2b,0x85,0xfb,0x06,0x2b,0x00,0xfb,0xfb,0xf8,0x00,0x86,0xfb,0x03,0xf7,0x00,0xfa, -0x86,0xfb,0x02,0x00,0x00,0x83,0xfd,0x05,0x2b,0x2b,0xfd,0xf9,0x00,0x83,0xfd,0x12, -0x00,0xf9,0xfd,0xfd,0x00,0x2b,0xfb,0xfb,0x00,0xf8,0xfc,0xfb,0xfa,0x00,0xfa,0xfb, -0x00,0xf8,0xe2,0xfb,0xfb,0x00,0x2b,0x04,0xfb,0xfb,0xf7,0x00,0x84,0xfb,0x04,0xfc, -0xfb,0x00,0x2b,0x87,0xfb,0x02,0xf8,0x00,0x83,0xfb,0x05,0x00,0xf8,0xfb,0x00,0x00, -0x86,0xfb,0x06,0x00,0xf8,0xfb,0xfb,0x00,0x2b,0x88,0xfb,0x06,0xfa,0x00,0x00,0xfb, -0xf8,0x00,0x83,0xfb,0x09,0x00,0x00,0xfb,0xfb,0x00,0x2b,0xfd,0xf9,0x00,0x84,0xfd, -0x07,0x00,0x2b,0xfd,0xfd,0xf8,0x00,0xfc,0x83,0xfb,0x08,0xfc,0xfb,0x00,0x2b,0xfb, -0xfb,0x2b,0x00,0x8a,0xfb,0x02,0x2b,0x00,0x84,0xfb,0x18,0x00,0x2b,0xfb,0xfa,0x00, -0xf9,0xfb,0x56,0x00,0xfd,0xfd,0x00,0x2b,0xfd,0xfd,0x00,0xfa,0xfd,0xfd,0xf8,0x00, -0xfd,0xfd,0xfe,0x83,0xfd,0x06,0x00,0x2b,0xfd,0xfd,0x2b,0x00,0x8a,0xfd,0x0a,0x00, -0xf9,0xfd,0x00,0x2b,0xfd,0xf8,0x00,0xfd,0xfe,0x88,0xfd,0x02,0x00,0x56,0x83,0xfd, -0x0e,0x00,0xf9,0xfd,0xf6,0x00,0xfe,0xfd,0x00,0x2b,0xfd,0xfd,0xf8,0x00,0x81,0x84, -0xfd,0x06,0x00,0x2b,0xfd,0xfd,0x2b,0x00,0x95,0x07,0xfd,0x12,0x81,0xf8,0xfb,0xfd, -0xf7,0x00,0xac,0xfd,0x00,0x2b,0xfd,0xac,0x00,0x2b,0xfc,0xfd,0x81,0xfb,0x85,0xfd, -0x2f,0x00,0x2b,0xfd,0xfd,0x2b,0x00,0xfd,0xfd,0xfa,0x00,0xfd,0xfd,0xf7,0x00,0xac, -0xf9,0x00,0x00,0xfd,0xfd,0xf9,0xfb,0xfd,0x2b,0x00,0xfd,0xfd,0x56,0xfc,0xac,0x00, -0x2b,0xfd,0x81,0x00,0x2b,0xfd,0xfd,0x56,0xfd,0xfd,0x00,0xf9,0xfd,0xfd,0x00,0x2b, -0x86,0xfd,0x0e,0x00,0x2b,0xfd,0xfd,0x00,0xf9,0xfd,0xfd,0x00,0x2b,0xfd,0xfd,0x00, -0xf8,0x83,0xfb,0x0d,0x2b,0x00,0x56,0xfb,0x81,0xf8,0xfb,0x2b,0x00,0xfb,0xfb,0x56, -0x00,0x87,0xfb,0x12,0x00,0x00,0xf8,0xfb,0xfb,0xf7,0x2b,0xfb,0x56,0x00,0xf9,0xfe, -0xf9,0x00,0xf9,0xfd,0xf9,0x00,0x83,0xfd,0x26,0x00,0xf9,0xfd,0xfd,0x00,0x2b,0xfb, -0xfb,0x00,0x00,0xf9,0x81,0x00,0x00,0xfb,0xfb,0x00,0x2b,0x81,0x00,0x00,0x2b,0xfb, -0xfb,0x00,0x00,0xfb,0xfb,0x81,0x00,0x00,0x81,0xfb,0xf9,0xf9,0xfb,0x00,0x2b,0x87, -0xfb,0x02,0x56,0x00,0x83,0xfb,0x11,0x00,0x56,0xfb,0xf9,0x00,0x2b,0xfb,0xfb,0xf8, -0xfb,0xfb,0x00,0x2b,0x81,0x00,0x00,0x2b,0x86,0xfb,0x14,0xf7,0x81,0x81,0x00,0x00, -0xfb,0xf9,0x00,0xf9,0xfb,0xfb,0x00,0x00,0xfb,0xf9,0x00,0xf6,0xfe,0xf9,0x00,0x84, -0xfd,0x16,0x00,0x00,0xfd,0xfd,0xac,0x00,0x00,0x81,0xfb,0xf9,0xf9,0xfb,0x00,0x2b, -0xfb,0xfb,0x2b,0x00,0xfb,0xfb,0xf7,0xf7,0x86,0xfb,0x02,0x2b,0x00,0x84,0xfb,0x21, -0x00,0xf6,0xfc,0x56,0x00,0x56,0x81,0x2b,0x00,0xac,0xfd,0x00,0x2b,0xfd,0xfe,0x00, -0x2b,0xfd,0xfd,0xac,0x00,0xf6,0xac,0xfd,0xfa,0xfb,0xfd,0x00,0x2b,0xfd,0xfd,0x2b, -0x00,0x85,0xfd,0x12,0x81,0xf8,0xfb,0xfd,0xf7,0x00,0xac,0xfd,0x00,0x2b,0xfd,0xac, -0x00,0x2b,0xfc,0xfd,0x81,0xfb,0x85,0xfd,0x12,0x2b,0x00,0xac,0xfb,0x2b,0x00,0xf9, -0xfe,0x00,0x00,0xfd,0xf9,0x00,0x2b,0xfd,0xf8,0x00,0x56,0x85,0xfd,0x06,0x00,0x00, -0xfd,0xf7,0x00,0x00,0x95,0x07,0xfd,0x02,0xfc,0x2b,0x83,0x00,0x08,0x81,0xfd,0xfd, -0x00,0x2b,0xfd,0xfd,0xfb,0x84,0x00,0x01,0xfb,0x85,0xfd,0x0d,0x00,0x2b,0xfd,0xfd, -0x2b,0x00,0xfd,0xfd,0xf9,0x00,0xfd,0xfd,0xac,0x83,0x00,0x05,0x56,0x00,0xfd,0xfd, -0x2b,0x83,0x00,0x03,0x81,0xfd,0xfd,0x84,0x00,0x04,0xac,0xfd,0xfd,0xf9,0x84,0x00, -0x08,0xfd,0xfd,0x00,0xf9,0xfd,0xfd,0x00,0x2b,0x86,0xfd,0x0e,0x00,0x2b,0xfd,0xfd, -0x00,0xf9,0xfd,0xfd,0x00,0x2b,0xfd,0xfd,0x00,0x56,0x84,0xfb,0x01,0x2b,0x83,0x00, -0x08,0x2b,0xfb,0x2b,0x00,0xfb,0xfb,0xf8,0x00,0x87,0xfb,0x02,0x81,0x2b,0x84,0x00, -0x04,0xf8,0xfb,0xfb,0xf7,0x83,0x00,0x05,0xf8,0xfe,0xfd,0xf9,0x00,0x83,0xfd,0x0a, -0x00,0xfa,0xfd,0xfd,0x00,0x2b,0xfb,0xfb,0x00,0x2b,0x83,0x00,0x11,0x81,0xfb,0xfb, -0xf7,0x00,0x00,0xf7,0x00,0x2b,0xfb,0xfb,0xf9,0x00,0x00,0xf8,0xfb,0xfa,0x84,0x00, -0x04,0xfa,0xfb,0x00,0x2b,0x87,0xfb,0x02,0xf8,0x00,0x83,0xfb,0x05,0x00,0xf8,0xfb, -0xfb,0xf8,0x84,0x00,0x08,0xfb,0xfb,0xf7,0x00,0x00,0xf7,0x00,0x2b,0x86,0xfb,0x84, -0x00,0x10,0x81,0xfb,0xfb,0x2b,0x00,0x00,0xfb,0xf9,0x00,0x00,0x2b,0xf8,0x00,0xf7, -0xf9,0x00,0x84,0xfd,0x06,0x81,0x00,0x00,0xf9,0xfd,0x81,0x84,0x00,0x0c,0xfa,0xfb, -0x00,0x2b,0xfb,0xfb,0x2b,0x00,0xfb,0xfb,0x00,0x00,0x86,0xfb,0x02,0x2b,0x00,0x84, -0xfb,0x15,0x00,0x2b,0xfb,0xfb,0x2b,0x00,0x00,0xf8,0x00,0x00,0xfd,0x00,0x2b,0xfd, -0xfd,0xf8,0x00,0x00,0xfd,0xfd,0xfb,0x84,0x00,0x08,0xfb,0xfd,0x00,0x2b,0xfd,0xfd, -0x2b,0x00,0x85,0xfd,0x02,0xfc,0x2b,0x83,0x00,0x08,0x81,0xfd,0xfd,0x00,0x2b,0xfd, -0xfd,0xfb,0x84,0x00,0x01,0xfb,0x85,0xfd,0x01,0xac,0x83,0x00,0x0b,0x56,0x00,0xf9, -0xfd,0x81,0x00,0x00,0x2b,0xf8,0x00,0xf7,0x86,0x00,0x08,0xf9,0xfd,0x81,0x00,0x00, -0x2b,0x2b,0x00,0x9c,0x09,0xfd,0x01,0xfe,0x92,0x01,0xfd,0x01,0xfe,0x86,0xfd,0x01, -0xfe,0x8f,0xfd,0x01,0xfe,0x86,0xfd,0x01,0xac,0x8f,0xfb,0x01,0xfc,0x93,0x01,0xfb, -0x85,0xfd,0x01,0xfe,0x89,0xfd,0x05,0xac,0xfb,0xfb,0x00,0x56,0x91,0x01,0xfb,0x01, -0xfc,0x91,0x01,0xfb,0x01,0xfc,0x88,0xfb,0x01,0xfc,0x9f,0x01,0xfb,0x01,0xac,0x84, -0xfd,0x01,0xfe,0x8a,0xfd,0x94,0x02,0xfb,0x01,0xac,0x8d,0xfd,0x01,0xfe,0x91,0x11, -0xfd,0x01,0xac,0x93,0x02,0xfb,0x8f,0xfd,0x05,0xac,0xfb,0xfb,0x00,0xf8,0x9c,0x04, -0xfb,0x01,0xac,0x8f,0xfd,0x94,0x02,0xfb,0x01,0xac,0x90,0x12,0xfd,0x93,0x02,0xfb, -0x01,0xac,0x8f,0xfd,0x04,0xfc,0xfb,0x2b,0xfa,0x9b,0x04,0xfb,0x01,0xfc,0x8f,0xfd, -0x01,0xac,0x94,0x02,0xfb,0x91,0x12,0xfd,0x01,0xfc,0x92,0x02,0xfb,0x01,0xfc,0x90, -0x01,0xfd,0x9e,0x04,0xfb,0x90,0x01,0xfd,0x01,0xfc,0x93,0x02,0xfb,0x01,0xfc,0x91, -0x12,0xfd,0x01,0xfc,0x93,0x02,0xfb,0x01,0xac,0x8f,0xfd,0x01,0xac,0x9c,0x04,0xfb, -0x01,0xac,0x8f,0xfd,0x01,0xac,0x94,0x02,0xfb,0x01,0xfc,0x9d,0x0c,0xfd,0x02,0xac, -0xf9,0x91,0x03,0xfd,0x02,0xfc,0xf9,0x8e,0xfd,0x02,0xf9,0xac,0x8f,0xfd,0x01,0xac, -0x93,0x02,0xfb,0x01,0xfc,0x90,0x01,0xfd,0x9e,0x01,0xfb,0x02,0xf8,0x81,0x9a,0x01, -0xfb,0x02,0x56,0xfa,0x90,0x01,0xfb,0x90,0x01,0xfd,0x01,0xfc,0x88,0xfb,0x02,0x56, -0xfa,0x96,0x01,0xfb,0x05,0x56,0x81,0xfb,0xfb,0xac,0x98,0x01,0xfd,0x02,0xf9,0xac, -0x96,0x01,0xfd,0x02,0xac,0xf9,0x96,0x01,0xfd,0x02,0xfb,0x81,0x93,0x08,0xfd,0x06, -0xf9,0x00,0xfd,0xfd,0xf8,0x2b,0x8d,0xfd,0x01,0xf8,0x84,0x00,0x04,0x2b,0xfd,0xf8, -0x2b,0x97,0x01,0xfd,0x02,0xf9,0x00,0x8e,0xfd,0x02,0x00,0xf9,0x90,0x01,0xfd,0x90, -0x02,0xfb,0x04,0x56,0x00,0xfb,0xfb,0x88,0xfd,0x02,0xf8,0x2b,0x86,0xfd,0x01,0xac, -0x8f,0xfb,0x05,0xf7,0x00,0x00,0xf6,0x81,0x89,0xfb,0x02,0x00,0xf8,0x9a,0x01,0xfb, -0x02,0x00,0x2b,0x8f,0xfb,0x01,0xac,0x90,0x01,0xfd,0x89,0xfb,0x02,0x00,0x2b,0x88, -0xfb,0x04,0x56,0x81,0x81,0x56,0x8a,0xfb,0x04,0x00,0xf8,0xfb,0xfb,0x99,0x01,0xfd, -0x02,0x00,0xf9,0x96,0x01,0xfd,0x02,0xf9,0x00,0x88,0xfd,0x05,0xfc,0x81,0xfd,0xfa, -0xac,0x89,0xfd,0x02,0x2b,0x00,0x93,0x08,0xfd,0x06,0xf9,0x00,0xfd,0xfd,0xf9,0xf8, -0x8d,0xfd,0x09,0x2b,0x00,0xf9,0xf9,0xfa,0xfa,0xfd,0xf9,0xf8,0x97,0x01,0xfd,0x02, -0xfa,0x00,0x8e,0xfd,0x05,0x00,0xf9,0xfd,0xac,0xf9,0x83,0xfd,0x02,0xfb,0xfa,0x88, -0xfd,0x01,0xfc,0x88,0xfb,0x02,0xf9,0xf9,0x95,0x01,0xfb,0x05,0xfa,0x2b,0xfb,0xfb, -0xac,0x87,0xfd,0x02,0xf9,0xf8,0x87,0xfd,0x01,0xfc,0x8d,0xfb,0x06,0xf7,0x00,0xf8, -0xf8,0x2b,0xf9,0x89,0xfb,0x02,0x00,0x56,0x9a,0x01,0xfb,0x02,0x00,0x2b,0x8e,0xfb, -0x01,0xfc,0x90,0x01,0xfd,0x01,0xac,0x89,0xfb,0x02,0x00,0x2b,0x88,0xfb,0x04,0x2b, -0xf9,0x56,0x2b,0x8a,0xfb,0x04,0x00,0x56,0xfb,0xfc,0x83,0xfd,0x03,0xfb,0xfa,0xfe, -0x93,0x01,0xfd,0x02,0x00,0xf9,0x96,0x01,0xfd,0x02,0xf9,0x00,0x88,0xfd,0x05,0x81, -0xf6,0xfe,0x2b,0xfa,0x89,0xfd,0x02,0x2b,0x00,0x92,0x08,0xfd,0x06,0xfe,0xf9,0x00, -0xfd,0xfd,0xfe,0x8e,0xfd,0x03,0x2b,0x00,0xfe,0x84,0xfd,0x01,0xfe,0x98,0x01,0xfd, -0x02,0xf9,0x00,0x8e,0xfd,0x05,0x00,0xf9,0xfd,0xf9,0x00,0x83,0xfd,0x02,0x2b,0x00, -0x88,0xfd,0x01,0xac,0x88,0xfb,0x02,0x00,0x2b,0x9a,0x01,0xfb,0x90,0x01,0xfd,0x01, -0xac,0x8d,0xfb,0x04,0x00,0xf6,0xfb,0xfc,0x8b,0xfb,0x02,0x00,0xf8,0x9a,0x01,0xfb, -0x02,0x00,0x2b,0x8e,0xfb,0x01,0xac,0x90,0x01,0xfd,0x8a,0xfb,0x02,0x00,0x2b,0x8a, -0xfb,0x01,0xfc,0x8b,0xfb,0x04,0x00,0xf8,0xfb,0xac,0x83,0xfd,0x02,0x2b,0x00,0x94, -0x01,0xfd,0x02,0x00,0xf9,0x95,0x01,0xfd,0x03,0xfe,0xf9,0x00,0x89,0xfd,0x01,0xfe, -0x8c,0xfd,0x02,0x2b,0x00,0x9f,0x07,0xfd,0x01,0xac,0x83,0x00,0x09,0x2b,0x00,0xfd, -0xfd,0xf6,0x00,0xfd,0xfd,0xac,0x83,0x00,0x01,0xf8,0x86,0xfd,0x02,0x2b,0x00,0x85, -0xfd,0x0d,0xf6,0x00,0xfd,0xfd,0x00,0x2b,0x2b,0x00,0x00,0x81,0xfd,0xfd,0xfa,0x83, -0x00,0x04,0xf8,0xfd,0xfd,0x81,0x83,0x00,0x0b,0x2b,0xfd,0xf9,0x00,0x81,0x00,0x00, -0x2b,0xfd,0xfd,0xfb,0x84,0x00,0x06,0xac,0xfd,0xfd,0x00,0xfa,0xfd,0x84,0x00,0x01, -0xf9,0x83,0x00,0x03,0x2b,0xfd,0x56,0x83,0x00,0x04,0x2b,0xac,0xfb,0x56,0x83,0x00, -0x03,0xf7,0xfb,0x2b,0x83,0x00,0x03,0x56,0xfb,0x56,0x83,0x00,0x01,0xf9,0x87,0xfb, -0x17,0x2b,0x00,0x00,0x2b,0xfb,0xfb,0x56,0x00,0xfb,0xfb,0x2b,0x00,0xf8,0x00,0x00, -0xf8,0xfd,0xfd,0x2b,0x00,0xfd,0xfd,0xac,0x85,0x00,0x07,0xac,0xfb,0xfb,0x2b,0x00, -0x00,0x2b,0x86,0xfb,0x03,0x00,0x00,0xf9,0x85,0xfb,0x01,0xf7,0x83,0x00,0x0b,0x81, -0xfb,0xfb,0x00,0x56,0xfb,0x56,0x00,0x56,0xfb,0x2b,0xe2,0x00,0xfb,0xfb,0xf8,0x05, -0x00,0xf9,0x00,0x00,0x2b,0x83,0xfb,0x01,0xf7,0x84,0x00,0x04,0x2b,0xfb,0xfb,0xf9, -0x83,0x00,0x09,0x56,0xfb,0xfb,0x00,0x2b,0x2b,0x00,0x00,0xfa,0x86,0xfd,0x01,0xac, -0x85,0x00,0x83,0xfd,0x04,0x2b,0x00,0x00,0x2b,0x83,0xfb,0x01,0xf7,0x84,0x00,0x05, -0x2b,0xfb,0x56,0x00,0xf7,0xc2,0x00,0x2b,0xfb,0x06,0xfb,0x2b,0x00,0xfb,0xfb,0x81, -0x84,0x00,0x09,0xfb,0xfb,0x00,0x56,0xfb,0xfa,0x00,0x81,0xf9,0x83,0x00,0x01,0x2b, -0x86,0xfd,0x01,0x2b,0x83,0x00,0x01,0x81,0x83,0xfd,0x09,0x2b,0x00,0x00,0x2b,0x00, -0xfa,0xfd,0xfd,0xf8,0x83,0x00,0x07,0xac,0xfd,0xfd,0x00,0xf7,0x00,0x00,0x85,0xfd, -0x01,0xac,0x83,0x00,0x04,0x2b,0x00,0xfd,0xfd,0xa2,0x2b,0x00,0x03,0xf9,0xf9,0x00, -0x83,0xfd,0x05,0x00,0xf9,0xfe,0xfd,0xfa,0x83,0x00,0x0b,0x2b,0xfd,0x2b,0x00,0xfd, -0xfd,0x2b,0x2b,0xfd,0xfd,0xac,0x83,0x00,0x09,0xf8,0xfd,0xfd,0x2b,0x00,0xf8,0x00, -0x00,0xf8,0x9c,0x06,0xfd,0x12,0x00,0xf6,0xac,0xfb,0x00,0x00,0xfd,0xfd,0x2b,0x00, -0xfd,0xfd,0x00,0x2b,0xfd,0x81,0x00,0xfb,0x85,0xfd,0x3a,0x2b,0x00,0xf6,0x2b,0x2b, -0xfd,0xfd,0x2b,0x00,0xfd,0xfd,0x00,0x00,0xf7,0xfd,0x00,0x00,0xfd,0xfd,0x00,0x2b, -0xfd,0xfd,0xfb,0xfd,0xac,0x00,0xf6,0xac,0xfd,0xfb,0xfe,0xf9,0x00,0x00,0xfb,0x81, -0x00,0xfa,0xfd,0xfb,0xf9,0xfd,0xac,0x00,0x2b,0xfd,0xfd,0x00,0xf9,0xfd,0xf9,0x00, -0xfd,0xfd,0xfe,0x2b,0x00,0x83,0xfd,0x18,0x81,0xfa,0xfd,0x81,0x00,0xfa,0xfc,0x00, -0x00,0xfb,0xfb,0xfa,0xfb,0xfb,0x00,0x2b,0xfb,0xfb,0x81,0x00,0xf7,0xfb,0x2b,0x00, -0x86,0xfb,0x25,0x2b,0x00,0x81,0x81,0x00,0xf7,0xfb,0xf8,0x00,0xfb,0xfb,0x2b,0x00, -0xf6,0xac,0xf7,0x00,0xfd,0xfd,0x2b,0x00,0xfd,0xfd,0x00,0xf6,0xac,0xfb,0x00,0x00, -0xfd,0xac,0x2b,0x00,0x81,0x81,0x00,0xf7,0x85,0xfb,0x17,0x81,0x00,0x00,0xf6,0x81, -0xfb,0xfb,0xf8,0x00,0xf9,0xfb,0x00,0x00,0xfb,0xfb,0x00,0xf8,0xfa,0x00,0xf9,0xfb, -0xfb,0x2b,0xe2,0x00,0xfb,0xfb,0x56,0x1d,0x00,0x00,0xfa,0xf9,0x00,0x56,0xfb,0x56, -0x00,0xf8,0xfb,0xf7,0x00,0x2b,0xfb,0x81,0x00,0xf7,0xfb,0x2b,0x00,0xfb,0xfb,0x00, -0x00,0x2b,0xac,0x00,0x00,0x86,0xfd,0x27,0x00,0xf6,0xac,0xfb,0x00,0x00,0xfd,0xfd, -0x2b,0x00,0x81,0x81,0x00,0x2b,0xfb,0x56,0x00,0xf8,0xfb,0xf7,0x00,0x2b,0xfb,0xf8, -0x00,0x2b,0x81,0xfb,0xfb,0x00,0x2b,0xfb,0xfb,0x2b,0x00,0xfb,0xfb,0x00,0x00,0xa2, -0xfa,0xfb,0x0a,0xfb,0x00,0xf8,0xfa,0x00,0xfa,0xfd,0xfd,0x2b,0x00,0x87,0xfd,0x1b, -0x2b,0x00,0xfb,0xfd,0xf7,0x00,0xfc,0xfd,0x2b,0x00,0xfb,0xac,0x2b,0x00,0xf9,0xfd, -0xf9,0x00,0xfa,0xfe,0x00,0xf6,0xfd,0xfd,0x00,0x00,0xfb,0x86,0xfd,0x0f,0x00,0xf6, -0xac,0xfb,0x00,0x00,0xfd,0xfd,0x2b,0x00,0xf7,0xfd,0xfe,0xf9,0x00,0x83,0xfd,0x0f, -0x00,0xf9,0xfd,0xfc,0x00,0x2b,0xac,0xfd,0xfb,0xfd,0x2b,0x00,0xfd,0x2b,0x2b,0x83, -0xfd,0x0d,0x00,0x2b,0xfd,0x81,0x00,0xfb,0xfd,0x2b,0x00,0xf6,0xac,0xf7,0x00,0x9b, -0x06,0xfd,0x13,0xfb,0x00,0xfb,0xfd,0xfd,0xf9,0x00,0xfd,0xfd,0x2b,0x00,0xfd,0xfb, -0x00,0x56,0xf9,0xf9,0x00,0xf7,0x85,0xfd,0x02,0x2b,0x00,0x83,0x2b,0x08,0xfd,0xfd, -0x2b,0x00,0xfd,0xfd,0x00,0x2b,0xe2,0xfd,0xfd,0x2b,0x00,0x01,0x56,0x83,0xfd,0x03, -0xf7,0x00,0xfe,0x84,0xfd,0x08,0xf9,0x00,0xac,0xfd,0xfd,0x00,0xf9,0xfe,0x84,0xfd, -0x09,0x00,0x2b,0xfd,0xfd,0x00,0xf9,0xfe,0xf9,0x00,0x83,0xfd,0x02,0x2b,0x00,0x87, -0xfd,0x06,0x00,0xf9,0xfc,0x00,0x00,0xf8,0x84,0xfb,0x0b,0x00,0x2b,0xfb,0xfb,0xf7, -0x00,0xf8,0x56,0xf8,0x00,0xf9,0x85,0xfb,0x25,0x00,0x2b,0xf8,0x56,0x00,0x00,0xfb, -0x56,0x00,0xfb,0xfb,0x2b,0x00,0xfd,0xfd,0xf9,0x00,0xfd,0xfd,0x2b,0x00,0xfd,0x81, -0x00,0xfb,0xfd,0xfd,0x2b,0x00,0xfd,0xfd,0x00,0x2b,0xf8,0x56,0x00,0x00,0x87,0xfb, -0x11,0x2b,0x00,0x00,0x81,0xfb,0x00,0x00,0xf8,0x56,0x2b,0x00,0xfb,0xfb,0x00,0xf8, -0x00,0xf7,0x83,0xfb,0x01,0x2b,0xe2,0x00,0xfb,0xfb,0xf8,0x1d,0x00,0x81,0xfb,0xfb, -0x00,0x56,0xfb,0x00,0x00,0xfc,0xfb,0xfb,0x00,0x2b,0xfb,0x2b,0x00,0x56,0xf8,0x56, -0x00,0xf9,0xfb,0x00,0x2b,0xac,0xfd,0x2b,0x00,0x85,0xfd,0x1b,0xfa,0x00,0xfc,0xfd, -0xfd,0x2b,0x00,0xfd,0xfd,0x00,0x2b,0xf8,0x56,0x00,0x00,0xfb,0x00,0x00,0xfc,0xfb, -0xfb,0x00,0x2b,0xfb,0x56,0x00,0x81,0x83,0xfb,0x0a,0x00,0x2b,0xfb,0xfb,0x2b,0x00, -0xfb,0xf9,0x00,0xfa,0x85,0xfb,0xa2,0x00,0xf8,0x05,0xfe,0xfd,0xfd,0x2b,0x00,0x87, -0xfd,0x02,0x00,0xf8,0x83,0xfd,0x05,0x00,0xf7,0xfd,0x00,0xf8,0x83,0xfd,0x0d,0x00, -0xf9,0xfe,0xf6,0x00,0xfa,0xf9,0xf7,0x00,0xfd,0xfd,0x00,0x56,0x86,0xfd,0x0b,0xfb, -0x00,0xfb,0xfd,0xfd,0xf9,0x00,0xfd,0xfd,0x2b,0x00,0x83,0xfd,0x02,0xf9,0x00,0x83, -0xfd,0x05,0x00,0xf9,0xfd,0xf7,0x00,0x85,0xfd,0x15,0x2b,0x00,0xf8,0x00,0xac,0xfd, -0xfd,0xfb,0x00,0x56,0xf9,0xf9,0x00,0xf7,0xfd,0x2b,0x00,0xfe,0xfd,0xf9,0x00,0x9a, -0x06,0xfd,0x03,0xfe,0xf9,0x00,0x83,0xfd,0x08,0xfa,0x00,0xfd,0xfd,0x2b,0x00,0xfe, -0xf9,0x85,0x00,0x01,0xf8,0x85,0xfd,0x03,0x2b,0x00,0xfe,0x84,0xfd,0x14,0x2b,0x00, -0xfd,0xfd,0x00,0x2b,0xfd,0xfd,0x2b,0x00,0xfd,0xfd,0xac,0x2b,0x00,0x00,0xfa,0xfd, -0x2b,0x00,0x85,0xfd,0x02,0xf9,0x00,0x83,0xfd,0x05,0x00,0xf9,0xfd,0xfc,0x2b,0x83, -0x00,0x08,0x2b,0xfd,0xfd,0x00,0xf9,0xfd,0xf9,0x00,0x83,0xfd,0x02,0x2b,0x00,0x83, -0xfd,0x01,0xfb,0x84,0x00,0x0e,0xf9,0xac,0x81,0x2b,0x00,0x00,0x56,0xfb,0xfb,0x00, -0x2b,0xfb,0xfb,0x2b,0x85,0x00,0x01,0x56,0x85,0xfb,0x85,0x00,0x13,0xf6,0xfc,0xf8, -0x00,0xfb,0xfb,0x2b,0x00,0xfb,0xfd,0xf9,0x00,0xfd,0xfd,0x2b,0x00,0xfd,0xf9,0x00, -0x83,0xfd,0x04,0x2b,0x00,0xfd,0xfd,0x85,0x00,0x01,0x2b,0x88,0xfb,0x04,0x81,0x00, -0x00,0xfb,0x86,0x00,0x02,0xfb,0xfb,0x83,0x00,0x01,0x81,0x83,0xfb,0x01,0x2b,0xe2, -0x00,0xfb,0xfb,0x56,0x01,0x00,0x83,0xfb,0x05,0x00,0xf8,0xfb,0x00,0x2b,0x83,0xfb, -0x04,0x00,0x2b,0xfb,0x2b,0x85,0x00,0x08,0x56,0xfb,0x00,0x2b,0xfd,0xfd,0x2b,0x00, -0x84,0xfd,0x03,0xfe,0xf9,0x00,0x83,0xfd,0x04,0x2b,0x00,0xfd,0xfd,0x85,0x00,0x04, -0x2b,0xfb,0x00,0x2b,0x83,0xfb,0x05,0x00,0xf6,0xfc,0xf8,0x00,0x84,0xfb,0x09,0x00, -0x2b,0xfb,0xfb,0x2b,0x00,0xfb,0xf8,0x00,0x86,0xfb,0x83,0x00,0x01,0xfc,0x83,0xfd, -0x02,0x2b,0x00,0x87,0xfd,0x02,0x00,0xf9,0x83,0xfd,0x05,0x00,0x2b,0xfd,0x00,0xf9, -0x83,0xfd,0x03,0x00,0xf9,0xfd,0x86,0x00,0x04,0xfd,0xfd,0x00,0xf9,0x86,0xfd,0x02, -0xf9,0x00,0x83,0xfd,0x06,0xfa,0x00,0xfd,0xfd,0x2b,0x00,0x83,0xfd,0x02,0xf9,0x00, -0x83,0xfd,0x05,0x00,0xfa,0xfd,0x2b,0x00,0x85,0xfd,0x04,0x2b,0x00,0x00,0xf7,0x83, -0xfd,0x01,0xf9,0x85,0x00,0x08,0xf8,0xfd,0x2b,0x00,0xfd,0xfd,0xf9,0x00,0x9b,0x06, -0xfd,0x0f,0xfa,0x00,0xac,0xfd,0xfd,0xf9,0x00,0xfd,0xfd,0x2b,0x00,0xfd,0xfb,0x00, -0x81,0x89,0xfd,0x02,0x2b,0x00,0x85,0xfd,0x0a,0x2b,0x00,0xfd,0xfd,0x00,0x2b,0xfd, -0xfd,0x2b,0x00,0x84,0xfd,0x06,0xac,0x2b,0x00,0xfd,0xf8,0x00,0x85,0xfd,0x02,0xf9, -0x00,0x83,0xfd,0x10,0x00,0xfa,0xfd,0x2b,0x00,0xfd,0xfd,0x00,0x2b,0xfe,0xfd,0x00, -0xf9,0xfd,0xf9,0x00,0x83,0xfd,0x02,0x2b,0x00,0x83,0xfd,0x17,0x00,0xf7,0xfd,0xfd, -0x00,0xfa,0xfd,0xfc,0xfb,0x81,0x00,0x00,0xfb,0xfb,0x00,0x2b,0xfb,0xfb,0xf7,0x00, -0xfb,0xfb,0xfc,0x87,0xfb,0x02,0x00,0x2b,0x83,0xfb,0x1d,0xfc,0xfb,0xf8,0x00,0xfb, -0xfb,0x2b,0x00,0xfb,0xac,0xf9,0x00,0xfd,0xfd,0x2b,0x00,0xfd,0x81,0x00,0xfb,0xfd, -0xfd,0x2b,0x00,0xfd,0xfd,0x00,0xf7,0xac,0x8c,0xfb,0x05,0x2b,0x00,0xfb,0x00,0x00, -0x86,0xfb,0x11,0x00,0x56,0x00,0x00,0x81,0xfb,0xfb,0x2b,0x00,0xfb,0xfb,0xf7,0x00, -0xfb,0xfb,0xf8,0x00,0x83,0xfb,0x05,0x00,0x56,0xfb,0x00,0x2b,0x83,0xfb,0x05,0x00, -0x2b,0xfb,0xf7,0x00,0x86,0xfb,0x06,0x00,0x2b,0xfd,0xfd,0x2b,0x00,0x85,0xfd,0x0b, -0x81,0x00,0xfb,0xfd,0xfd,0x2b,0x00,0xfd,0xac,0x00,0x2b,0x85,0xfb,0x02,0x00,0x2b, -0x83,0xfb,0x05,0x00,0x2b,0xfb,0x56,0x00,0x84,0xfb,0x0a,0x00,0x2b,0xfb,0xfb,0x2b, -0x00,0xfb,0xfa,0x00,0xf9,0x85,0xfb,0x09,0x00,0x56,0x00,0x00,0xfc,0xfd,0xfd,0x2b, -0x00,0x87,0xfd,0x0a,0x00,0xf8,0xfe,0xfd,0xfd,0x00,0xf8,0xfd,0x00,0x56,0x83,0xfd, -0x05,0x00,0xfa,0xfd,0x2b,0x00,0x86,0xfd,0x02,0x00,0xfa,0x86,0xfd,0x0b,0x81,0x00, -0xac,0xfd,0xfd,0xf9,0x00,0xfd,0xfd,0x2b,0x00,0x83,0xfd,0x0a,0xfa,0x00,0xfd,0xfd, -0xac,0x00,0xf9,0xfd,0xf8,0x00,0x85,0xfd,0x0a,0x2b,0x00,0x56,0x00,0xf8,0xfd,0xfd, -0xfb,0x00,0x81,0x85,0xfd,0x06,0x2b,0x00,0xfd,0xfd,0xf9,0x00,0x9b,0x06,0xfd,0x13, -0xfe,0x00,0xf6,0xfd,0xf9,0x00,0x00,0xfd,0xfd,0x2b,0x00,0xfd,0xfd,0x00,0x00,0xfb, -0xfd,0xfb,0x81,0x85,0xfd,0x02,0x2b,0x00,0x85,0xfd,0x1b,0x2b,0x00,0xfd,0xfd,0x00, -0x2b,0xfd,0xfd,0x2b,0x00,0xfd,0xfd,0xf9,0xfb,0xfd,0x2b,0x00,0xfd,0xac,0x00,0xf6, -0xac,0xfc,0xf9,0xfd,0xfa,0x00,0x83,0xfd,0x31,0x00,0xf9,0xfd,0x00,0x00,0xfe,0xf9, -0x00,0x2b,0xfd,0xfd,0x00,0xfa,0xfd,0x81,0x00,0x81,0xfd,0xfd,0xf7,0x00,0xac,0xfd, -0xfd,0x00,0xf7,0xfd,0xf8,0x00,0x56,0xfd,0x56,0xf9,0xfb,0x2b,0x00,0xfb,0xfb,0x00, -0x00,0xfb,0xfb,0x81,0x00,0x00,0x81,0xfb,0x56,0xfa,0x85,0xfb,0x25,0x2b,0x00,0x56, -0xfb,0x81,0xf8,0xfb,0x56,0x00,0xfb,0xfb,0x2b,0x00,0xfb,0xfb,0xf9,0x00,0xfd,0xfd, -0x2b,0x00,0xfd,0xfd,0x00,0x00,0xf9,0x2b,0x00,0x00,0xfd,0xfd,0xf7,0x00,0xf9,0xfc, -0x81,0xf8,0x85,0xfb,0x20,0x2b,0xf8,0xfb,0xfa,0x00,0x2b,0xfb,0xf9,0x00,0x2b,0xfb, -0xfb,0xf8,0xfb,0xfb,0x00,0xf8,0x81,0x00,0x00,0xfb,0xfb,0x2b,0x00,0x81,0x56,0x00, -0x00,0xfb,0xfb,0x56,0x00,0x83,0xfb,0x19,0x00,0xf8,0xfb,0xf7,0x00,0xf9,0x81,0x2b, -0x00,0x2b,0xfb,0x81,0x00,0x00,0x81,0xfb,0xf9,0xf9,0xac,0x00,0x2b,0xfd,0xfd,0x2b, -0x00,0x86,0xfd,0x19,0x00,0x00,0xf9,0x2b,0x00,0x00,0xfd,0xfb,0x2b,0x00,0xf8,0xfb, -0x81,0x56,0xfb,0xf7,0x00,0xf9,0x81,0x2b,0x00,0x2b,0xfb,0xf8,0x00,0x84,0xfb,0x19, -0x00,0x00,0xfb,0x2b,0x00,0x00,0xfb,0xfb,0x00,0x00,0xfa,0xfb,0xf8,0xfb,0xfb,0x00, -0x56,0xac,0x00,0x00,0xfd,0xfd,0xf7,0x00,0xac,0x86,0xfd,0x1a,0x2b,0x00,0xfb,0xfd, -0xf7,0x00,0xac,0xfd,0x2b,0x00,0xac,0xfb,0x2b,0x00,0xf9,0xfd,0x81,0x00,0xf7,0xfd, -0xfe,0x56,0xfd,0xfd,0x00,0xf9,0x87,0xfd,0x31,0x00,0xf6,0xfd,0xf9,0x00,0x00,0xfd, -0xfd,0x2b,0x00,0xfd,0xfd,0xfe,0xfa,0x00,0xfa,0xfb,0x00,0x00,0xf9,0xfd,0xac,0x00, -0x2b,0xfc,0xac,0xf9,0xfd,0x2b,0x00,0xfd,0xf8,0x00,0xfa,0xfd,0xfd,0x00,0x00,0xfb, -0xfd,0xfb,0x81,0xfd,0x2b,0x00,0xfd,0xfd,0xfa,0x00,0x9c,0x06,0xfd,0x01,0x81,0x83, -0x00,0x0a,0xf7,0x00,0xfd,0xfd,0x2b,0x00,0xfd,0xfd,0xac,0xf6,0x83,0x00,0x01,0x56, -0x85,0xfd,0x01,0x2b,0x85,0x00,0x07,0xfd,0x2b,0x00,0xfd,0xfd,0x00,0x2b,0xe2,0xfd, -0xfd,0x2b,0x00,0x06,0x00,0x00,0x81,0xfd,0xfd,0x81,0x83,0x00,0x04,0x2b,0xfd,0xf9, -0x00,0x83,0xfd,0x20,0x00,0xf9,0xfd,0x81,0x00,0x00,0x2b,0xf8,0x00,0xf7,0xfd,0x00, -0xf9,0xfd,0xfd,0x2b,0x00,0x00,0xfd,0xac,0x00,0x00,0x2b,0xfd,0xf8,0x00,0x00,0x2b, -0x2b,0x00,0x81,0x2b,0x83,0x00,0x09,0xf9,0xfb,0xfb,0xf9,0x00,0x00,0xf8,0xfb,0xfa, -0x84,0x00,0x01,0xfa,0x86,0xfb,0x01,0x2b,0x83,0x00,0x18,0x2b,0xfb,0xf8,0x00,0xfb, -0xfb,0x2b,0x00,0xfb,0xfb,0x56,0x00,0xfd,0xfd,0x2b,0x00,0xfd,0xfd,0xac,0xf6,0x00, -0x2b,0x2b,0x00,0x83,0xfd,0x01,0xf7,0x83,0x00,0x01,0x2b,0x85,0xfb,0x01,0xf8,0x83, -0x00,0x01,0x2b,0x83,0xfb,0x01,0xf8,0x84,0x00,0x04,0xfb,0xfb,0x00,0x56,0xe2,0xfb, -0x81,0x00,0x00,0x07,0x00,0xf8,0x00,0xfb,0xfb,0x56,0x00,0x83,0xfb,0x0d,0x00,0x56, -0xfb,0xfb,0x2b,0x00,0x00,0x2b,0x00,0x2b,0xfb,0xfb,0xfa,0x84,0x00,0x08,0x81,0xfd, -0x00,0x2b,0xfd,0xfd,0x2b,0x00,0x86,0xfd,0x0a,0xfc,0x2b,0x00,0x2b,0x2b,0x00,0xfc, -0xfb,0xfb,0x2b,0x83,0x00,0x0c,0x2b,0xfb,0xfb,0x2b,0x00,0x00,0x2b,0x00,0x2b,0xfb, -0x56,0x00,0x84,0xfb,0x09,0xf9,0x00,0x00,0x2b,0x2b,0x00,0xfb,0xfb,0x81,0x84,0x00, -0x0d,0xfb,0xfb,0x00,0xf9,0xfd,0xfc,0x00,0x2b,0xfd,0xac,0x00,0x00,0x2b,0x86,0xfd, -0x01,0x2b,0x83,0x00,0x04,0x81,0xfd,0xfd,0xac,0x83,0x00,0x06,0x56,0x00,0xf9,0xfd, -0xfd,0xf9,0x84,0x00,0x04,0xfd,0xfd,0x00,0xf9,0x87,0xfd,0x01,0x81,0x83,0x00,0x06, -0xf7,0x00,0xfd,0xfd,0x2b,0x00,0x84,0xfd,0x09,0x2b,0x00,0x00,0x81,0x00,0xf9,0xfd, -0xfd,0x81,0x83,0x00,0x0c,0x2b,0xfd,0x2b,0x00,0xfd,0xfd,0xf7,0x00,0x81,0xfd,0xac, -0xf6,0x83,0x00,0x08,0x56,0xfd,0x2b,0x00,0xfd,0xfd,0xf9,0x00,0x99,0x07,0xfd,0x01, -0xfe,0x9e,0x04,0xfd,0x01,0xfc,0x9c,0x01,0xfb,0x01,0xfc,0x88,0xfb,0x01,0xac,0x87, -0xfd,0x05,0xfe,0xfd,0xfe,0x00,0x2b,0x83,0xfd,0x01,0xfe,0x83,0xfd,0x01,0xac,0x8d, -0xfb,0x01,0xfc,0x9a,0x02,0xfb,0x01,0xac,0x8e,0xfd,0x01,0xfe,0x83,0xfd,0x02,0x00, -0x2b,0x96,0x02,0xfb,0x04,0xfc,0xfd,0xfd,0xfe,0x95,0x02,0xfd,0x01,0xfe,0x9a,0x01, -0xfd,0x01,0xfe,0x92,0x01,0xfd,0x01,0xfe,0x92,0x0d,0xfd,0x01,0xac,0x96,0x02,0xfb, -0x01,0xac,0x85,0xfd,0x06,0x00,0xf6,0x2b,0x00,0x00,0xfb,0x88,0xfd,0x01,0xac,0x96, -0x03,0xfb,0x01,0xac,0x8f,0xfd,0x06,0x00,0x2b,0x2b,0x00,0x00,0xfa,0x96,0x02,0xfb, -0x01,0xac,0x9a,0x12,0xfd,0x96,0x02,0xfb,0x01,0xfc,0x85,0xfd,0x05,0xfb,0xf7,0x2b, -0xf7,0xac,0x8a,0xfd,0x01,0xac,0x94,0x03,0xfb,0x01,0xac,0x90,0x01,0xfd,0x04,0xfb, -0xf7,0x2b,0xf7,0x98,0x02,0xfb,0x9b,0x12,0xfd,0x01,0xfc,0x96,0x02,0xfb,0x01,0xac, -0x85,0xfd,0x01,0xfe,0x8e,0xfd,0x02,0xac,0xfc,0x90,0x03,0xfb,0x02,0xfc,0xac,0x94, -0x01,0xfd,0x01,0xac,0x97,0x02,0xfb,0x01,0xfc,0x9b,0x12,0xfd,0x01,0xac,0x97,0x02, -0xfb,0x01,0xac,0x95,0x01,0xfd,0x01,0xac,0x9e,0x02,0xfb,0x01,0xac,0x95,0x01,0xfd, -0x01,0xac,0x98,0x02,0xfb,0x01,0xac,0x98,0x0e,0xfd,0x02,0xac,0xf9,0x92,0x04,0xfd, -0x01,0xfc,0x97,0x02,0xfb,0x01,0xac,0x96,0x01,0xfd,0x01,0xac,0x9a,0x02,0xfb,0x01, -0xac,0x96,0x01,0xfd,0x01,0xac,0x98,0x02,0xfb,0x01,0xfc,0x9f,0x0c,0xfd,0x09,0xfb, -0x2b,0x00,0x2b,0x56,0xfd,0xfd,0x2b,0xf8,0x91,0x01,0xfd,0x06,0xf9,0x00,0xfd,0xfd, -0xf8,0x2b,0x8d,0xfd,0x03,0x81,0x00,0xfb,0x83,0xfd,0x02,0x81,0x2b,0x99,0x02,0xfd, -0x01,0xac,0x8b,0xfb,0x87,0x00,0x01,0x2b,0x95,0x01,0xfb,0x01,0xac,0x97,0x01,0xfd, -0x01,0xfc,0x96,0x02,0xfb,0x01,0xfc,0x97,0x01,0xfd,0x01,0xac,0x99,0x02,0xfb,0x01, -0xac,0x9f,0x0c,0xfd,0x09,0x00,0x2b,0xf9,0xf8,0x2b,0xfd,0xfd,0xf8,0xf9,0x91,0x01, -0xfd,0x06,0xf9,0x00,0xfd,0xfd,0xf9,0xf8,0x8d,0xfd,0x03,0xf9,0x00,0x00,0x83,0xfd, -0x02,0xf9,0x00,0x98,0x01,0xfd,0x02,0x81,0xfb,0x8f,0xfd,0x02,0xfb,0xf9,0x8a,0xfb, -0x08,0xf8,0x56,0xf8,0x00,0x00,0x56,0xf8,0xf9,0x8f,0xfb,0x02,0xf8,0x81,0x85,0xfb, -0x01,0xac,0x97,0x01,0xfd,0x02,0xac,0xac,0x92,0x02,0xfb,0x02,0xac,0xac,0x97,0x01, -0xfd,0x01,0xac,0x99,0x02,0xfb,0x01,0xfc,0x9f,0x0c,0xfd,0x05,0xf9,0x00,0xfd,0xfd, -0xfe,0x84,0xfd,0x01,0xfe,0x90,0x01,0xfd,0x03,0xfe,0xf9,0x00,0x83,0xfd,0x01,0xfe, -0x8d,0xfd,0x08,0xf9,0x00,0x00,0x56,0xfd,0xfd,0xf9,0x00,0x98,0x01,0xfd,0x02,0x00, -0xf6,0x8f,0xfd,0x02,0x2b,0x00,0x8c,0xfb,0x03,0xfc,0x00,0x2b,0x92,0x01,0xfb,0x02, -0x00,0xf8,0x86,0xfb,0x01,0xac,0x99,0x01,0xfd,0x01,0xac,0x9e,0x01,0xfb,0x01,0xac, -0x99,0x01,0xfd,0x01,0xac,0x9a,0x02,0xfb,0x01,0xac,0x9f,0x0c,0xfd,0x03,0xfb,0x00, -0x2b,0x85,0xfd,0x05,0x00,0xf6,0xfe,0xfd,0xfa,0x83,0x00,0x01,0x81,0x86,0xfd,0x01, -0xac,0x83,0x00,0xe2,0x2b,0x00,0xfd,0xfd,0x01,0xac,0x83,0x00,0x01,0xf8,0x86,0xfd, -0x0b,0xf9,0x00,0x2b,0x00,0xac,0xfd,0xf9,0x00,0xfd,0xfd,0xac,0x83,0x00,0x0c,0xf8, -0xfd,0xfd,0x2b,0x00,0xfd,0xfd,0xf9,0x00,0xfd,0xfd,0x81,0x83,0x00,0x03,0xf8,0xfd, -0x2b,0x83,0x00,0x02,0xf9,0xfd,0x84,0x00,0x08,0x81,0xfd,0xfd,0x00,0x2b,0x00,0x00, -0xf9,0x83,0x00,0x01,0x2b,0x8b,0xfb,0x02,0x00,0x2b,0x84,0xfb,0x01,0x2b,0x83,0x00, -0x04,0xf7,0xfb,0xfb,0xf7,0x83,0x00,0x02,0xf9,0xfb,0x84,0x00,0x03,0xfb,0xfb,0x2b, -0x83,0x00,0x01,0xfc,0x9b,0x01,0xfd,0x01,0xac,0x98,0x01,0xfb,0x01,0xac,0x9b,0x01, -0xfd,0x01,0xac,0x9a,0x02,0xfb,0x01,0xfc,0x91,0x0d,0xfd,0x04,0xf8,0x00,0x00,0xf9, -0x83,0xfd,0x09,0x00,0x2b,0xfd,0xac,0x00,0xf8,0xfd,0xf7,0x00,0x86,0xfd,0x12,0x00, -0xf6,0xac,0xfb,0x00,0x00,0xfd,0xfd,0x2b,0x00,0xfd,0xfd,0x00,0x2b,0xfd,0x81,0x00, -0xfb,0x84,0xfd,0x35,0xfe,0xf9,0x00,0xfe,0x00,0xf6,0xfd,0xf9,0x00,0xfd,0xfd,0x00, -0x2b,0xfd,0x81,0x00,0xfb,0xfd,0x2b,0x00,0xfd,0xfe,0xf9,0x00,0xfd,0xfd,0x00,0x2b, -0xfd,0xfd,0xfb,0xfd,0xfd,0x00,0x2b,0xfd,0xfe,0xfd,0x56,0xac,0xfd,0x00,0x00,0xfd, -0xfd,0x00,0x00,0xf9,0xfd,0xfd,0x2b,0x00,0xac,0x8c,0xfb,0x02,0x00,0x2b,0x84,0xfb, -0x19,0xf8,0xfa,0xfb,0x2b,0x00,0xfb,0xfb,0x00,0xf7,0xfb,0x81,0x81,0xfb,0xfb,0x00, -0x56,0xfb,0xfb,0xf8,0x00,0xf9,0xfb,0x00,0x00,0xac,0x9c,0x01,0xfd,0x03,0xac,0xac, -0xfc,0x90,0x01,0xfb,0x03,0xfc,0xac,0xac,0x9c,0x01,0xfd,0x01,0xac,0x9b,0x02,0xfb, -0x01,0xac,0x92,0x0d,0xfd,0x10,0xfb,0x00,0x00,0xf8,0xfd,0xfd,0x00,0x2b,0xfd,0xf7, -0x00,0xfa,0xf9,0xf9,0x00,0x81,0x84,0xfd,0x13,0xfb,0x00,0xfb,0xfd,0xfd,0xf9,0x00, -0xfd,0xfd,0x2b,0x00,0xfd,0xfb,0x00,0x56,0xf9,0xf9,0x00,0xf7,0x85,0xfd,0x1c,0xf9, -0x00,0xfd,0xf9,0x00,0x81,0xf9,0x00,0xfd,0xfb,0x00,0x56,0xf9,0xf9,0x00,0xf7,0xfd, -0x2b,0x00,0xfd,0xfd,0xf9,0x00,0xfd,0xfd,0x2b,0x00,0x56,0x84,0xfd,0x02,0x00,0x2b, -0x86,0xfd,0x14,0x2b,0x00,0xfd,0xfd,0x00,0x2b,0xfe,0xfd,0xfd,0x2b,0x00,0xfd,0xfc, -0xfb,0x56,0x56,0xf8,0x56,0xf8,0xf9,0x84,0xfb,0x02,0x00,0x2b,0x87,0xfb,0x07,0xf8, -0x00,0xfb,0xfb,0x00,0x00,0x56,0x84,0xfb,0x0c,0x00,0xf8,0xfb,0xfb,0x00,0x00,0xf8, -0x56,0x2b,0x00,0xfb,0xac,0x90,0x02,0xfd,0x8c,0xac,0x90,0x02,0xfd,0x01,0xac,0x9b, -0x02,0xfb,0x01,0xfc,0x95,0x0d,0xfd,0x08,0xf7,0x00,0xfb,0xfd,0x00,0x2b,0xfd,0x2b, -0x85,0x00,0x01,0xfa,0x84,0xfd,0x02,0xf9,0x00,0x83,0xfd,0x08,0xfa,0x00,0xfd,0xfd, -0x2b,0x00,0xfe,0xf9,0x85,0x00,0x01,0xf8,0x85,0xfd,0x0a,0xf9,0x00,0xfd,0xfe,0x2b, -0x00,0xfa,0x00,0xfe,0xf9,0x85,0x00,0x13,0xf8,0xfd,0x2b,0x00,0xfd,0xfd,0xf9,0x00, -0xfd,0xfd,0xac,0x2b,0x00,0x00,0x81,0xfd,0xfd,0x00,0x2b,0x83,0xfd,0x01,0xf7,0x84, -0x00,0x04,0xfd,0xfe,0x00,0xf6,0x83,0xfd,0x06,0x2b,0x00,0xfd,0xac,0xfc,0x2b,0x84, -0x00,0x01,0x2b,0x84,0xfb,0x02,0x00,0x2b,0x84,0xfb,0x01,0x56,0x84,0x00,0x03,0xfb, -0xfb,0xfa,0x83,0x00,0x07,0x81,0xfb,0xfb,0x00,0x56,0xfb,0xfb,0x86,0x00,0x03,0xfb, -0xfb,0xac,0x9a,0x04,0xfd,0x01,0xac,0x9c,0x02,0xfb,0x01,0xac,0x96,0x0d,0xfd,0x0b, -0x00,0xf9,0xfd,0x00,0x2b,0xfd,0xf8,0x00,0xfd,0xfd,0xfe,0x86,0xfd,0x0f,0x81,0x00, -0xac,0xfd,0xfd,0xf9,0x00,0xfd,0xfd,0x2b,0x00,0xfd,0xfb,0x00,0x81,0x89,0xfd,0x05, -0xf9,0x00,0xfd,0xfd,0xac,0x83,0x00,0x04,0xfd,0xfb,0x00,0x81,0x85,0xfd,0x06,0x2b, -0x00,0xfd,0xfd,0x56,0x00,0x84,0xfd,0x13,0xac,0xf6,0x00,0xfd,0xfd,0x00,0x2b,0xfd, -0xfd,0xf8,0x00,0xac,0xfd,0x2b,0x00,0xfd,0xfd,0x00,0x2b,0x83,0xfd,0x05,0x2b,0x00, -0xfd,0xfd,0xfc,0x8a,0xfb,0x02,0x00,0x2b,0x83,0xfb,0x06,0xfa,0x00,0xf9,0xfb,0x56, -0x00,0x84,0xfb,0x0b,0xfa,0x00,0x00,0xfb,0xfb,0x00,0xf8,0xfc,0xfb,0x00,0x00,0x87, -0xfb,0x01,0xac,0x98,0x04,0xfd,0x01,0xac,0x9c,0x02,0xfb,0x01,0xac,0x92,0x0d,0xfd, -0x12,0x81,0xf8,0xfb,0xfd,0xf7,0x00,0xac,0xfd,0x00,0x2b,0xfd,0xac,0x00,0xf6,0xac, -0xfd,0xfa,0xfb,0x85,0xfd,0x12,0x00,0xf6,0xfd,0xf9,0x00,0x00,0xfd,0xfd,0x2b,0x00, -0xfd,0xfd,0x00,0x00,0xfb,0xfd,0xfb,0x81,0x84,0xfd,0x03,0xfe,0xf9,0x00,0x83,0xfd, -0x29,0x56,0x00,0x00,0xfd,0xfd,0x00,0x00,0xfb,0xfd,0xfb,0x81,0xfd,0xf7,0x00,0xac, -0xf9,0x00,0x00,0xfd,0xfd,0xf9,0xfb,0xfd,0x2b,0x00,0xfd,0xfd,0x00,0x00,0xfd,0xfd, -0xf7,0x00,0xfc,0xfb,0x00,0x00,0xfd,0xfd,0x00,0x2b,0x83,0xfd,0x06,0xf7,0x00,0xfc, -0xfd,0xac,0xfc,0x89,0xfb,0x02,0x00,0x2b,0x83,0xfb,0x1f,0x56,0x00,0x56,0x81,0x2b, -0x00,0x81,0xfb,0xf7,0x81,0x81,0x00,0x00,0xfb,0xfb,0x00,0x2b,0xfb,0xfb,0xf9,0x00, -0x2b,0xfb,0xfc,0xf7,0xfb,0xfb,0x2b,0xf8,0xfc,0xac,0x94,0x04,0xfd,0x02,0xac,0xfc, -0x9d,0x02,0xfb,0x01,0xac,0x92,0x0d,0xfd,0x02,0xfc,0x2b,0x83,0x00,0x08,0x81,0xfd, -0xfd,0x00,0x2b,0xfd,0xfd,0xfb,0x84,0x00,0x01,0xfb,0x85,0xfd,0x01,0x81,0x83,0x00, -0x0a,0xf7,0x00,0xfd,0xfd,0x2b,0x00,0xfd,0xfd,0xac,0xf6,0x83,0x00,0x01,0x56,0x85, -0xfd,0x02,0xf9,0x00,0x84,0xfd,0x06,0x00,0x00,0xfd,0xfd,0xac,0xf6,0x83,0x00,0x03, -0x56,0xfd,0xac,0x83,0x00,0x05,0x56,0x00,0xfd,0xfd,0x2b,0x83,0x00,0x08,0x81,0xfd, -0xfd,0x81,0x00,0x00,0xf9,0xac,0x83,0x00,0x06,0x56,0x00,0x00,0xfd,0x00,0x2b,0x83, -0xfd,0x06,0xac,0x00,0x00,0x2b,0xfd,0xac,0x89,0xfb,0x02,0x00,0x2b,0x84,0xfb,0x07, -0x2b,0x00,0x00,0xf7,0x00,0x00,0xfb,0x84,0x00,0x09,0x81,0xfb,0xfb,0xf7,0x00,0x00, -0xfb,0xfb,0xf8,0x84,0x00,0x07,0xfb,0xfb,0x00,0x2b,0xfb,0xfb,0xfc,0x92,0x04,0xfd, -0x01,0xfc,0x9e,0x02,0xfb,0x01,0xac,0x90,0x0e,0xfd,0x01,0xfe,0x96,0x01,0xfd,0x01, -0xfe,0x94,0x01,0xfd,0x01,0xfe,0x8a,0xfd,0x01,0xfe,0x90,0x02,0xfd,0x01,0xfc,0x91, -0x01,0xfb,0x01,0xfc,0x8f,0xfb,0x01,0xfc,0x8b,0xfb,0x01,0xfc,0x90,0x04,0xfd,0x01, -0xfc,0x9e,0x02,0xfb,0x01,0xfc,0x99,0x13,0xfd,0x01,0xac,0x9f,0x02,0xfb,0x01,0xac, -0x9c,0x03,0xfd,0x01,0xac,0x90,0x03,0xfb,0x01,0xac,0x9a,0x13,0xfd,0x01,0xac,0x90, -0x03,0xfb,0x01,0xac,0x98,0x03,0xfd,0x01,0xac,0x91,0x03,0xfb,0x01,0xac,0x9c,0x13, -0xfd,0x01,0xfc,0x91,0x03,0xfb,0x01,0xac,0x94,0x03,0xfd,0x01,0xac,0x92,0x03,0xfb, -0x01,0xfc,0x9d,0x13,0xfd,0x02,0xac,0xfc,0x92,0x03,0xfb,0x01,0xac,0x90,0x03,0xfd, -0x01,0xac,0x94,0x03,0xfb,0x01,0xac,0x9e,0x13,0xfd,0x01,0xfc,0x94,0x03,0xfb,0x02, -0xfc,0xac,0x9a,0x02,0xfd,0x02,0xac,0xfc,0x95,0x03,0xfb,0x01,0xac,0x90,0x14,0xfd, -0x01,0xac,0x95,0x03,0xfb,0x02,0xfc,0xac,0x96,0x02,0xfd,0x02,0xac,0xfc,0x96,0x03, -0xfb,0x01,0xfc,0x92,0x14,0xfd,0x01,0xfc,0x98,0x03,0xfb,0x01,0xac,0x90,0x02,0xfd, -0x01,0xac,0x99,0x03,0xfb,0x01,0xfc,0x93,0x14,0xfd,0x02,0xac,0xfc,0x9a,0x03,0xfb, -0x04,0xfc,0xfc,0xac,0xac,0x94,0x01,0xfd,0x04,0xac,0xac,0xfc,0xfc,0x9b,0x03,0xfb, -0x02,0xfc,0xac,0x94,0x14,0xfd,0x01,0xac,0x90,0x04,0xfb,0x01,0xfc,0x8d,0xac,0x02, -0xfc,0xfc,0x91,0x04,0xfb,0x01,0xac,0x96,0x14,0xfd,0x01,0xac,0x9f,0x08,0xfb,0x01, -0xac,0x98,0x14,0xfd,0x01,0xfc,0x9d,0x08,0xfb,0x01,0xfc,0x9a,0x14,0xfd,0x01,0xfc, -0x9b,0x08,0xfb,0x01,0xfc,0x9c,0x14,0xfd,0x01,0xfc,0x99,0x08,0xfb,0x01,0xfc,0x9e, -0x14,0xfd,0x01,0xfc,0x97,0x08,0xfb,0x01,0xfc,0x9f,0x14,0xfd,0x02,0xac,0xfc,0x95, -0x08,0xfb,0x02,0xfc,0xac,0x90,0x15,0xfd,0x02,0xac,0xfc,0x93,0x08,0xfb,0x02,0xfc, -0xac,0x92,0x15,0xfd,0x02,0xac,0xfc,0x91,0x08,0xfb,0x02,0xfc,0xac,0x95,0x15,0xfd, -0x01,0xfc,0x9f,0x07,0xfb,0x01,0xfc,0x98,0x15,0xfd,0x01,0xfc,0x9d,0x07,0xfb,0x01, -0xfc,0x98,0x10,0xfd,0x03,0x2b,0x2b,0xfb,0x9f,0x01,0xfd,0x02,0xf7,0xfa,0x8a,0xfd, -0x03,0xfe,0xf9,0xfa,0x86,0xfd,0x01,0xac,0x8b,0xf9,0x01,0xfc,0x83,0xfd,0x02,0xf8, -0xfa,0x89,0xfd,0x09,0xac,0xfb,0xfb,0xf8,0x2b,0xf7,0xfb,0xfa,0xf7,0x8a,0xfb,0x01, -0x2b,0x96,0x01,0xfb,0x02,0x00,0x2b,0x87,0xfb,0x01,0x81,0xc2,0xf8,0xf8,0x56,0x83, -0xf8,0x03,0x56,0xf8,0xfa,0x84,0xfb,0x02,0xf8,0x81,0x84,0xfb,0x02,0x81,0xf6,0x85, -0xfb,0x02,0xf8,0xf8,0x8a,0xfb,0x02,0xfa,0xf9,0x8e,0xfb,0x02,0xf7,0xf9,0x8c,0xfb, -0x02,0xf6,0xac,0x9e,0x03,0xfd,0x06,0xf9,0xfd,0xfd,0x56,0xf9,0x00,0x86,0xfd,0x02, -0xf7,0x81,0x98,0x01,0xfd,0x02,0xf9,0x81,0x9e,0x08,0xfd,0x01,0xac,0x8c,0xfd,0x02, -0xf9,0x00,0x85,0xfd,0x04,0xf9,0xf8,0xfb,0x2b,0x9e,0x01,0xfd,0x03,0xac,0x00,0xac, -0x8b,0xfd,0x05,0x00,0xfb,0xfd,0xfd,0xac,0x83,0xfd,0x01,0xfb,0x85,0x2b,0x01,0x00, -0x85,0x2b,0x01,0xfb,0x83,0xfd,0x04,0x2b,0xfa,0xfd,0xfa,0x84,0x00,0x04,0x2b,0xfd, -0xfd,0xf6,0x83,0x00,0x05,0x56,0xfb,0xfb,0xf8,0x2b,0x8a,0xfb,0x01,0x00,0x88,0xfb, -0x01,0x2b,0x85,0x00,0x01,0x56,0x87,0xfb,0x0a,0xfa,0x00,0x00,0xf8,0xfb,0x2b,0x56, -0xfb,0xfb,0xfa,0x83,0x2b,0xc2,0x00,0x00,0x2b,0x03,0x2b,0x2b,0xfa,0x84,0xfb,0x01, -0x00,0x84,0xfb,0x04,0x56,0x00,0x2b,0xf8,0x84,0xfb,0x07,0x2b,0x2b,0xfb,0xfb,0x81, -0xf8,0xfa,0x85,0xfb,0x06,0xf6,0xf8,0xfb,0x2b,0x2b,0xf8,0x8a,0xfb,0x02,0x00,0x56, -0x8b,0xfb,0x02,0xfc,0x00,0x95,0x01,0xfd,0x0d,0xfc,0xf9,0xfd,0xfd,0xfb,0xfa,0xf9, -0x2b,0x00,0xac,0xfd,0xfd,0xfa,0x88,0xf9,0x01,0xac,0x94,0x01,0xfd,0x07,0x00,0xfd, -0xfd,0x81,0x00,0x2b,0xf9,0x84,0xfd,0x03,0xac,0x00,0xfc,0x88,0xfd,0x01,0xac,0x8f, -0xfd,0x05,0x00,0xfb,0xfd,0xfd,0xac,0x9e,0x07,0xfd,0x89,0x00,0x84,0xfd,0x03,0x00, -0x2b,0xfc,0x8a,0xfd,0x02,0xf9,0x00,0x85,0xfd,0x04,0xfb,0x2b,0x2b,0xfb,0x9d,0x01, -0xfd,0x02,0xfe,0xf6,0x86,0x00,0x04,0xfd,0xfd,0xfe,0xfa,0x88,0x00,0x01,0xfa,0x87, -0xfd,0x02,0xfe,0x00,0xc2,0xfd,0xfe,0xfd,0x02,0xfd,0x2b,0x84,0x00,0x08,0x81,0xfd, -0xfd,0xfe,0xf9,0x2b,0xfd,0xfb,0x83,0x2b,0x06,0x00,0xf6,0x2b,0xfb,0x56,0x2b,0x84, -0xfb,0x01,0xf7,0x83,0x2b,0x86,0x00,0x01,0x2b,0x84,0xfb,0x06,0xf9,0xf8,0xf9,0xfb, -0x00,0x00,0x8a,0xfb,0x01,0xfa,0xa2,0x00,0x81,0x83,0xfb,0x03,0xfa,0x56,0xf8,0xc2, -0x00,0x2b,0x56,0x02,0xf8,0x81,0x84,0xfb,0x02,0x81,0x00,0x84,0xfb,0x06,0x81,0x2b, -0xf7,0xf8,0xfb,0xf8,0x87,0x00,0x02,0x2b,0xf9,0x85,0xfb,0x07,0x00,0xf9,0x00,0x2b, -0x56,0x00,0xf7,0x84,0xfb,0x02,0x56,0x00,0x83,0x2b,0x85,0x00,0x83,0xfb,0x01,0xf7, -0x83,0x2b,0x86,0x00,0x01,0x2b,0x91,0x01,0xfd,0x0d,0xfa,0x00,0xfd,0xfd,0xf9,0x00, -0xf7,0x81,0xf7,0xfa,0xfd,0xfd,0x00,0x87,0x2b,0x02,0x00,0x56,0x94,0x01,0xfd,0x01, -0x00,0x83,0xfd,0x01,0x56,0x85,0xfd,0x02,0xfe,0xf6,0x86,0x00,0x84,0xfd,0x03,0x00, -0x2b,0xfc,0x89,0xfd,0x01,0x81,0x88,0x00,0x01,0xfa,0x94,0x08,0xfd,0x02,0xf9,0x00, -0x85,0xfd,0x03,0x2b,0x00,0x81,0x84,0xfd,0x02,0x56,0xf8,0x83,0xfd,0x02,0xfa,0x00, -0x83,0xfd,0x05,0xac,0xf6,0x00,0xf9,0xfe,0x9e,0x01,0xfd,0x03,0xf9,0x00,0xac,0x83, -0xfd,0x02,0xf7,0x2b,0x86,0xfd,0x02,0x00,0xfb,0x89,0xfd,0x89,0x00,0x85,0xfd,0x03, -0x2b,0xf9,0xfe,0x84,0xfd,0x0b,0xf9,0x2b,0xfd,0xac,0xf9,0xfa,0xf7,0x2b,0xf9,0xf8, -0x2b,0x84,0x00,0x02,0xfb,0xfb,0xa2,0xf8,0x56,0x03,0xf8,0xfa,0x00,0x83,0xfb,0x01, -0x81,0x87,0xfb,0x02,0x00,0x2b,0x89,0xfb,0x01,0x00,0x83,0xfb,0x02,0x2b,0x2b,0x84, -0xfb,0x01,0xf6,0xc3,0x00,0x2b,0x00,0x0d,0x56,0xfb,0xfb,0x81,0xf8,0x2b,0x00,0x2b, -0xf7,0xfb,0x81,0x00,0x56,0x83,0xfb,0x08,0xfc,0xfb,0xfb,0x2b,0x56,0xfb,0xfb,0x2b, -0x87,0xfb,0x07,0x00,0x2b,0xf7,0xfb,0xfb,0xfa,0x00,0x85,0xfb,0x07,0xf9,0xf8,0x56, -0xf8,0x00,0x2b,0xfa,0x85,0xfb,0xa2,0xf8,0x56,0x03,0xf9,0xfb,0x00,0x83,0xfd,0x01, -0xac,0x91,0x01,0xfd,0x0e,0x56,0x2b,0xfd,0xfd,0xfe,0x00,0xac,0x2b,0xf8,0xfe,0xfd, -0xfd,0x00,0xfa,0xa2,0xfd,0xfe,0x04,0xfd,0xfd,0x00,0xac,0x8f,0xfd,0x01,0x2b,0x89, -0x00,0x01,0x2b,0x84,0xfd,0x03,0xf9,0x00,0xac,0x83,0xfd,0x02,0xf7,0x2b,0x85,0xfd, -0x03,0x2b,0x00,0x81,0x84,0xfd,0x02,0x56,0xf8,0x85,0xfd,0x02,0x00,0xfb,0x98,0x08, -0xfd,0x03,0xfe,0xf9,0x00,0x85,0xfd,0x03,0xfe,0xf8,0xf6,0x84,0xfd,0x02,0x00,0xf8, -0x83,0xfd,0x08,0xf9,0x00,0xac,0x56,0x2b,0x00,0x2b,0xac,0x9f,0x01,0xfd,0x04,0xfa, -0x00,0x81,0xfa,0x83,0xfd,0x02,0x00,0x81,0x85,0xfd,0x04,0x2b,0x2b,0xf7,0xf6,0xc2, -0xac,0xfd,0xfd,0xe3,0xfd,0xfd,0x00,0xfd,0x08,0xfd,0xfb,0x2b,0x2b,0x00,0x2b,0xf6, -0xf9,0x84,0x00,0x03,0x2b,0xfd,0xfd,0x86,0x00,0x05,0xfb,0x56,0x2b,0xfb,0x00,0x85, -0xfb,0x04,0xf7,0x00,0x2b,0x00,0x89,0xfb,0x03,0x81,0x00,0xf7,0x88,0xfb,0x09,0x2b, -0xfb,0x00,0xfb,0xfb,0xfa,0x00,0xfa,0xf8,0x83,0xfb,0x0d,0x2b,0xf8,0xfc,0x2b,0xf8, -0xfb,0x2b,0x56,0xfb,0x2b,0xf8,0xfb,0x81,0xa2,0x00,0x2b,0x06,0xf8,0x2b,0x00,0xfb, -0xf9,0x00,0x86,0xfb,0x84,0x2b,0x01,0x00,0x87,0xfb,0x02,0x00,0x00,0x84,0xfb,0x02, -0x00,0x56,0x84,0xfb,0x09,0xfa,0xfb,0xfb,0xfc,0x00,0xf8,0x56,0xf8,0xf7,0x85,0xfb, -0x05,0xac,0xf8,0x00,0x2b,0x00,0x95,0x01,0xfd,0x02,0x2b,0xf7,0x83,0xfd,0x03,0x00, -0x81,0xfe,0x84,0xfd,0x02,0x00,0xf9,0x85,0xfd,0x02,0xfb,0x00,0x95,0x01,0xfd,0x01, -0x00,0x88,0xfd,0x04,0xfa,0x00,0x81,0xfa,0x83,0xfd,0x02,0x00,0x81,0x85,0xfd,0x03, -0xfe,0xf8,0xf6,0x84,0xfd,0x02,0x00,0xf8,0x84,0xfd,0x04,0x2b,0x2b,0xf7,0xf6,0xc2, -0xac,0xfd,0xfd,0x92,0x08,0xfd,0x02,0xf9,0x00,0x87,0xfd,0x01,0xfe,0x83,0xfd,0x02, -0x56,0x00,0x84,0xfd,0x05,0xf9,0x00,0x00,0x2b,0x56,0x89,0xfd,0x01,0x2b,0x85,0x00, -0x85,0xfd,0x01,0xf8,0x89,0x2b,0x0c,0xf8,0xfd,0xfd,0xf9,0x00,0x81,0xfa,0x00,0x2b, -0xfd,0xf8,0x00,0x85,0xfd,0x0a,0x81,0x00,0x00,0x56,0xf8,0x00,0x56,0x2b,0x00,0xf9, -0x83,0xfd,0x89,0x00,0x0a,0xfd,0xfd,0xac,0xf9,0xf9,0x2b,0xf7,0xfa,0xfb,0x00,0x86, -0xfd,0x0b,0x00,0x2b,0x2b,0x00,0x2b,0x00,0xac,0xf6,0x2b,0xfb,0x00,0x84,0xfb,0x05, -0x56,0x00,0xfb,0xf8,0x00,0x88,0xfb,0x01,0x81,0x85,0x00,0x01,0xf7,0x84,0xfb,0x01, -0x56,0xa2,0x00,0xfb,0x09,0xfb,0x00,0xf8,0x81,0x00,0x81,0xfb,0xfb,0x2b,0x89,0x00, -0x01,0x56,0x84,0xfb,0x02,0x00,0xfa,0xc2,0xfb,0xfb,0x00,0x01,0xf7,0x84,0xfb,0x04, -0x2b,0x00,0x00,0xf8,0x83,0x00,0x01,0xf7,0x85,0xfb,0x02,0x00,0xf8,0x84,0xfb,0x02, -0x00,0x56,0x84,0xfb,0x86,0x00,0x03,0x2b,0x2b,0xf8,0x83,0xfb,0x07,0xfc,0xac,0xf9, -0x2b,0xfd,0xf9,0x00,0x95,0x01,0xfd,0x02,0x00,0x81,0x83,0xfd,0x02,0x2b,0xf7,0x85, -0xfd,0x02,0x2b,0x81,0x85,0xfd,0x02,0xf8,0x00,0x83,0xfd,0x01,0xf8,0x89,0x2b,0x01, -0xf8,0x84,0xfd,0x07,0x00,0xac,0xfd,0x00,0xfd,0xac,0x00,0x84,0xfd,0x09,0xf9,0x00, -0x81,0xfa,0x00,0x2b,0xfd,0xf8,0x00,0x88,0xfd,0x01,0xfe,0x83,0xfd,0x02,0x56,0x00, -0x84,0xfd,0x0a,0xfa,0x00,0x00,0x56,0x56,0x00,0x56,0x2b,0x00,0xf9,0x93,0x08,0xfd, -0x02,0xf9,0x00,0x8a,0xfd,0x03,0xfa,0x00,0x81,0x84,0xfd,0x02,0xfa,0x00,0x8f,0xfd, -0x03,0xfe,0xf9,0x00,0x85,0xfd,0x01,0xf8,0x89,0x2b,0x01,0xf8,0x83,0xfd,0x08,0xfb, -0xfd,0xfd,0xfe,0x2b,0x00,0x00,0x81,0x84,0xfd,0x09,0xf9,0x00,0x56,0xfd,0xac,0xf7, -0x00,0xf6,0x81,0x85,0xfd,0xe3,0x00,0xfd,0xfd,0xfd,0x07,0x2b,0x56,0x2b,0xf9,0xfd, -0xfd,0x00,0x83,0xfd,0x0e,0xac,0xfd,0xfd,0x00,0xf9,0xf7,0x2b,0xf9,0x00,0xfd,0x2b, -0x56,0xfb,0x00,0x83,0xfb,0x07,0xfc,0xf6,0x56,0xfb,0x81,0x00,0xf9,0x86,0xfb,0x09, -0x56,0x00,0x00,0xf7,0x81,0xfb,0xfa,0x00,0x00,0x83,0xfb,0x0b,0x2b,0x2b,0xfb,0x00, -0xfb,0x2b,0x2b,0xfb,0xfb,0xf7,0x2b,0x85,0xfb,0x03,0xfa,0x00,0x81,0x88,0xfb,0x02, -0x81,0x00,0x83,0xfb,0x05,0x00,0xfb,0xfb,0xfa,0x00,0x83,0xfb,0x0a,0x00,0x2b,0x2b, -0x56,0x81,0x00,0xfb,0x81,0x00,0x56,0x84,0xfb,0x02,0x00,0x81,0x84,0xfb,0x02,0x00, -0xf8,0x88,0xfb,0x02,0x2b,0x56,0xc2,0xfb,0xfb,0xfc,0x08,0xac,0xfd,0x2b,0xf9,0xfd, -0xac,0x00,0x81,0x93,0x01,0xfd,0x02,0xfb,0x00,0x84,0xfd,0x02,0x81,0x00,0x8c,0xfd, -0x02,0x00,0xf9,0x83,0xfd,0x01,0xf8,0x89,0x2b,0x01,0xf8,0x83,0xfd,0x01,0x81,0xc2, -0x00,0xfd,0xfd,0x02,0x00,0xf9,0x83,0xfd,0x09,0xfe,0xfb,0xfd,0xfd,0xfe,0x2b,0x00, -0x00,0x81,0x8b,0xfd,0x03,0xfa,0x00,0x81,0x83,0xfd,0x09,0xf9,0x00,0x56,0xfd,0xac, -0xf7,0x00,0xf6,0x81,0x95,0x08,0xfd,0x02,0xfa,0x00,0x89,0xfd,0x03,0xf9,0x00,0xf8, -0x85,0xfd,0x02,0xf9,0x00,0x90,0x01,0xfd,0x02,0x56,0x2b,0x98,0x01,0xfd,0x02,0x00, -0x2b,0x85,0xfd,0x08,0x81,0xf9,0xfd,0x81,0x00,0xf7,0x00,0xfe,0x84,0xfd,0x02,0xfb, -0xf9,0xe2,0x00,0xf9,0xf9,0xfa,0x0b,0x00,0xf9,0xac,0xfd,0x2b,0xf9,0x2b,0x00,0x00, -0xf8,0x00,0x83,0xfd,0x04,0xf6,0xfa,0xfd,0x2b,0x84,0x00,0x06,0x2b,0xfe,0x00,0xfb, -0xac,0x00,0x84,0xfb,0xa2,0x56,0x00,0x02,0x00,0xf8,0x85,0xfb,0x03,0xf9,0x00,0x56, -0x86,0xfb,0x0a,0x00,0x56,0xfb,0xfb,0x00,0xfa,0xfb,0x00,0xf7,0x00,0x83,0xfb,0x04, -0x81,0x00,0xfb,0xf7,0x8b,0x00,0x05,0x56,0xfb,0xfb,0x2b,0x2b,0x83,0xfb,0x01,0x00, -0x83,0xfb,0x0e,0x2b,0x81,0xfb,0x56,0x00,0xfb,0x2b,0xf8,0x00,0xf8,0xfb,0xfb,0x2b, -0x2b,0x84,0xfb,0x01,0x00,0x84,0xfb,0x02,0x81,0x00,0x86,0xfb,0x05,0xf9,0x2b,0x2b, -0x00,0xf7,0x84,0xfb,0x01,0xac,0x83,0xfd,0x06,0xf9,0x00,0xf9,0x2b,0x00,0xf9,0x93, -0x01,0xfd,0x02,0x2b,0x2b,0x85,0xfd,0x02,0x00,0xf9,0x8a,0xfd,0x02,0xf7,0x00,0x92, -0x01,0xfd,0x09,0xf6,0xf7,0xfd,0xfd,0x00,0xfd,0xfd,0x56,0x2b,0x89,0xfd,0x02,0x00, -0x2b,0x8b,0xfd,0x03,0xf9,0x00,0xf8,0x84,0xfd,0x08,0x81,0xf9,0xfe,0xfa,0x00,0xf7, -0x00,0xfe,0x96,0x08,0xfd,0x02,0xf9,0x00,0x87,0xfd,0x04,0xfb,0x2b,0x00,0x81,0x86, -0xfd,0x02,0xf9,0x00,0x90,0x01,0xfd,0x02,0x2b,0xf7,0x97,0x01,0xfd,0x02,0x2b,0x2b, -0x88,0xfd,0x05,0xac,0x00,0xfa,0xfe,0x00,0x84,0xfd,0x04,0xfe,0xf9,0x2b,0x00,0x86, -0x2b,0x05,0xf6,0x00,0x2b,0xfb,0xfd,0x83,0x2b,0x04,0xfa,0xfd,0xfd,0x00,0x83,0xfd, -0x03,0x00,0xfb,0xfd,0x83,0x2b,0x09,0x00,0x2b,0xf8,0xfd,0x00,0xfd,0xfd,0x00,0xfc, -0x84,0xfb,0x05,0x56,0x2b,0xf8,0x00,0x81,0x86,0xfb,0x02,0xfa,0xfb,0x83,0x00,0x0c, -0x56,0xfb,0xfb,0x00,0xf9,0xfb,0x56,0x00,0xfb,0xfb,0x00,0x00,0x83,0xfb,0x0d,0x00, -0xfb,0x00,0xf8,0xfb,0xfb,0x81,0x00,0xf8,0xfb,0xfb,0xf9,0x00,0x86,0xfb,0x06,0x00, -0xfa,0xfb,0xfb,0xf8,0x00,0x86,0xfb,0x06,0x2b,0x56,0xfb,0x2b,0x00,0x00,0x83,0xfb, -0x02,0x00,0xf7,0x89,0xfb,0x02,0x2b,0x00,0x85,0xfb,0x04,0x00,0x00,0xf8,0xf8,0x83, -0x00,0x03,0xf8,0xfc,0xac,0x84,0xfd,0x06,0xfe,0xf9,0x2b,0xf9,0x00,0xac,0x85,0xfd, -0x04,0x2b,0x00,0x00,0xfc,0x89,0xfd,0x03,0xfa,0x00,0xfc,0x85,0xfd,0x03,0xf9,0x00, -0xac,0x88,0xfd,0x03,0xf8,0x00,0xfb,0x91,0x01,0xfd,0x06,0xfb,0x00,0xac,0xfd,0xfd, -0x00,0x83,0xfd,0x02,0x00,0x81,0x87,0xfd,0x02,0x2b,0x2b,0x8a,0xfd,0x04,0xfb,0x2b, -0x00,0x81,0x86,0xfd,0x06,0xfe,0xfc,0x00,0x81,0xfd,0x00,0x90,0x08,0xfd,0x89,0x00, -0x83,0xfd,0x07,0xfc,0xf9,0xf8,0x00,0x00,0x2b,0xac,0x87,0xfd,0x03,0xfb,0x00,0x81, -0x83,0xfd,0x03,0xfb,0xf9,0x56,0x89,0xfd,0x02,0x00,0xf9,0x95,0x01,0xfd,0x03,0xfb, -0x00,0x2b,0x89,0xfd,0x05,0xf9,0x00,0xfd,0xfd,0xfa,0x87,0xfd,0x02,0x00,0xfe,0x83, -0xfd,0x04,0xfe,0xfd,0xfe,0x00,0x83,0xfd,0x07,0x00,0x2b,0x00,0xf9,0xfd,0xfd,0x2b, -0x83,0x00,0x10,0x2b,0xfd,0xfd,0xf9,0xfa,0xf7,0x2b,0xf9,0x56,0xf8,0x2b,0xfd,0xfa, -0x00,0xfd,0xac,0x85,0xfb,0x02,0x00,0x2b,0x88,0xfb,0x0f,0xf8,0x2b,0xfb,0xf9,0x00, -0xfb,0x2b,0x00,0xfb,0xfb,0x81,0x56,0xfa,0x00,0x00,0x84,0xfb,0x0d,0x00,0xfb,0xf8, -0xf9,0xfb,0xfb,0x56,0x2b,0x00,0x00,0x2b,0x00,0x81,0x85,0xfb,0x07,0xf7,0x00,0x81, -0x81,0xfb,0x2b,0x2b,0x86,0xfb,0x05,0x2b,0x2b,0x81,0x00,0x00,0x83,0xfb,0x02,0x2b, -0x00,0x89,0xfb,0x03,0x2b,0x00,0x81,0x85,0xfb,0x01,0x00,0x83,0xfb,0x06,0x2b,0x56, -0xf9,0x00,0x00,0xac,0x87,0xfd,0x02,0x2b,0x2b,0x85,0xfd,0x05,0xfa,0x2b,0xfd,0xf8, -0xf7,0x88,0xfd,0x04,0xfb,0x00,0x56,0xfe,0x86,0xfd,0x03,0x2b,0x00,0xac,0x85,0xfd, -0x04,0xfb,0xf6,0x00,0x81,0x91,0x01,0xfd,0x03,0xac,0x00,0x56,0x83,0xfd,0x01,0x00, -0x83,0xfd,0x03,0xf9,0x00,0xac,0x84,0xfd,0x03,0xfb,0x00,0x2b,0x87,0xfd,0x07,0xfc, -0xf9,0xf8,0x00,0x00,0x2b,0xac,0x88,0xfd,0x05,0xf9,0x00,0xfd,0xfd,0x81,0x97,0x08, -0xfd,0x02,0x81,0x2b,0x83,0xfd,0x04,0xfe,0x00,0x2b,0x56,0x8b,0xfd,0x01,0x56,0x85, -0x00,0x02,0x2b,0xf8,0x84,0xfd,0x88,0x00,0x01,0x2b,0x91,0x01,0xfd,0x04,0xfb,0x2b, -0x00,0x56,0x8b,0xfd,0x02,0x2b,0x00,0x83,0x2b,0x02,0x00,0x00,0x84,0xfd,0x01,0x00, -0x87,0xfd,0x09,0x00,0xfd,0xfd,0xf9,0x2b,0xfd,0x2b,0x00,0xf7,0x83,0xf9,0x06,0xfa, -0xf9,0xf9,0x56,0xfb,0x2b,0x84,0x00,0x0e,0xf7,0x00,0xfb,0xfd,0xf8,0x2b,0xfd,0xfd, -0xac,0xfc,0xfc,0xf7,0x00,0x2b,0x8a,0xfb,0x03,0x00,0x00,0x2b,0x83,0x00,0x08,0x81, -0xfb,0xfb,0xfa,0x2b,0x00,0xf8,0x00,0x83,0xfb,0x02,0x56,0x00,0x85,0xfb,0x0f,0x81, -0x56,0x2b,0x00,0x2b,0x2b,0x00,0x00,0xf8,0xfb,0xfb,0x81,0x00,0xfa,0x56,0x83,0x00, -0x01,0xfa,0x86,0xfb,0x0a,0x81,0x00,0x00,0xf7,0xfb,0xf8,0x00,0x00,0x2b,0x81,0x86, -0xfb,0x05,0xf8,0x2b,0x00,0x00,0x81,0x86,0xfb,0x0a,0x2b,0x00,0x2b,0x2b,0x00,0xfb, -0xac,0xfd,0x2b,0xfc,0x85,0xfd,0x03,0x56,0x00,0x2b,0x86,0xfd,0x05,0xfb,0x00,0xac, -0x2b,0xf8,0x88,0xfd,0x02,0xac,0xf7,0x88,0xfd,0x02,0xac,0xf6,0x85,0xfd,0x04,0xf9, -0x00,0x2b,0xac,0x93,0x01,0xfd,0x01,0xf9,0x84,0xfd,0x01,0x00,0x84,0xfd,0x01,0x56, -0x83,0xfd,0x04,0xfb,0x2b,0x00,0x56,0x88,0xfd,0x04,0xfe,0x00,0x2b,0x56,0x8c,0xfd, -0x07,0x2b,0x00,0x2b,0x2b,0xf6,0x00,0x00,0x9e,0x0b,0xfd,0x03,0xac,0xf6,0xac,0x8d, -0xfd,0x01,0xac,0x83,0xf9,0x01,0xfb,0x85,0xfd,0x01,0x2b,0x84,0xfd,0x0c,0xfa,0x00, -0x00,0x2b,0xfd,0xfd,0xf9,0x81,0xfd,0xfd,0xac,0xf9,0x86,0x2b,0x0d,0xf9,0xac,0xf9, -0xfd,0xfd,0xfe,0xfd,0x2b,0x56,0xf9,0x00,0x00,0xfb,0x84,0xfd,0x04,0xac,0xf7,0xf8, -0xfc,0x8b,0xfb,0x03,0xf9,0xf8,0xf9,0x85,0xfb,0x05,0x81,0x2b,0x81,0xfb,0x2b,0x84, -0x00,0x01,0xf9,0x83,0xfb,0x06,0xf8,0x00,0x00,0x2b,0xf7,0xfc,0x83,0xfb,0x06,0x56, -0x00,0x00,0xfb,0xfb,0xf9,0x83,0xfb,0x02,0x56,0xf9,0x8c,0xfb,0x02,0x81,0xf8,0x89, -0xfb,0x03,0xfa,0x2b,0xfa,0x89,0xfb,0x04,0x81,0xf8,0x56,0xac,0x8a,0xfd,0x03,0xf7, -0xf9,0xfe,0x87,0xfd,0x03,0x56,0x00,0x2b,0x94,0x01,0xfd,0x01,0xfe,0x86,0xfd,0x01, -0xac,0x9a,0x01,0xfd,0x01,0x2b,0x88,0xfd,0x03,0xfc,0x2b,0xac,0x9a,0x01,0xfd,0x05, -0xac,0xf9,0xf9,0xfa,0xfb,0x90,0x0c,0xfd,0x01,0xfe,0x91,0x01,0xfd,0x01,0xfe,0x98, -0x01,0xfd,0x01,0xfe,0x84,0xfd,0x01,0xfe,0x8a,0xfd,0x01,0xfe,0x87,0xfd,0x02,0xac, -0xfc,0x97,0x05,0xfb,0x03,0xfc,0xac,0xfe,0x9e,0x17,0xfd,0x02,0xac,0xfc,0x93,0x05, -0xfb,0x02,0xfc,0xac,0x93,0x18,0xfd,0x02,0xac,0xfc,0x9f,0x04,0xfb,0x02,0xfc,0xac, -0x97,0x18,0xfd,0x03,0xac,0xfc,0xfc,0x9a,0x04,0xfb,0x02,0xac,0xac,0x9d,0x0f,0xfd, -0x09,0xf8,0xfd,0xf8,0xfa,0x81,0xf9,0xfd,0x2b,0xfc,0x85,0xfd,0x03,0xac,0xf8,0x00, -0x83,0xfd,0x01,0x2b,0x85,0xfd,0x07,0x81,0xf9,0xf9,0xfa,0xfa,0xfd,0xfa,0x83,0xf9, -0x01,0x81,0x83,0xfd,0x04,0x2b,0xfd,0xfd,0xfa,0x83,0xf9,0x03,0xfa,0xf9,0xfa,0x84, -0xfd,0x02,0xf9,0xac,0x8a,0xfd,0x02,0xf9,0xf8,0x84,0xfd,0x01,0x2b,0x86,0xfd,0x01, -0xfb,0x85,0xfd,0x02,0xfb,0xfa,0x94,0x01,0xfd,0x02,0xf8,0xfa,0x92,0x02,0xfd,0x02, -0xfc,0xfc,0x9b,0x01,0xfb,0x01,0xf8,0x8e,0xfb,0x06,0xf8,0xfb,0xfb,0xf8,0xf8,0x00, -0x85,0xfb,0x03,0xfc,0xf6,0xf9,0x8d,0xfb,0x02,0xfc,0xac,0x89,0xfd,0x02,0xf9,0xfa, -0x87,0xfd,0x04,0xfe,0xf6,0xfd,0xfd,0x86,0xf9,0x01,0xfa,0x84,0xfd,0x02,0xf9,0xac, -0x9c,0x01,0xfd,0x02,0xac,0xfa,0x88,0xfd,0x01,0xf8,0x84,0xfd,0x02,0xfb,0x2b,0x88, -0xfd,0x01,0xf7,0x97,0x0a,0xfd,0x08,0xf8,0x2b,0x2b,0xfa,0x00,0x81,0xfd,0x00,0x84, -0xfd,0x05,0xfc,0x00,0x00,0x2b,0xac,0x83,0xfd,0x01,0x00,0x85,0xfd,0x01,0x00,0x83, -0x2b,0x03,0x00,0xfd,0x00,0x83,0x2b,0x01,0x00,0x83,0xfd,0x03,0x00,0xfd,0xfd,0xc2, -0x00,0x2b,0x2b,0x01,0x00,0x84,0xfd,0x02,0x00,0xf9,0x8a,0xfd,0x07,0x2b,0xfb,0xfb, -0xf6,0x2b,0x2b,0x00,0x83,0x2b,0x83,0xfd,0x02,0x2b,0xf8,0x84,0xfd,0x02,0xf8,0x2b, -0x86,0xfd,0x01,0x2b,0x85,0x00,0x01,0x81,0x87,0xfd,0x02,0x00,0xfa,0x97,0x01,0xfd, -0x01,0x00,0x84,0xfd,0x02,0xf9,0x00,0x86,0xfd,0x03,0xac,0x00,0xfc,0x98,0x01,0xfb, -0x01,0x00,0x8e,0xfb,0x07,0x00,0xfb,0xfb,0xf9,0x00,0x2b,0xf8,0x84,0xfb,0x03,0xfa, -0x00,0x81,0x88,0xfb,0x05,0x81,0xfb,0xfc,0xfc,0xac,0x8b,0xfd,0x05,0x00,0xfb,0xfd, -0xfd,0xfc,0x85,0xfd,0x03,0x00,0xfd,0xfd,0xc2,0x00,0x2b,0x2b,0x01,0x00,0x84,0xfd, -0x02,0x00,0xf9,0x93,0x01,0xfd,0x01,0xfb,0x87,0xfd,0x03,0xfc,0x00,0x2b,0x87,0xfd, -0x02,0xac,0x00,0x83,0xfd,0x04,0x81,0x00,0x2b,0xf7,0x87,0xfd,0x02,0x00,0x56,0x86, -0xfd,0x02,0xfc,0x81,0x9d,0x09,0xfd,0x02,0xac,0xf9,0x84,0x2b,0x03,0xfa,0x56,0x00, -0x83,0xf9,0x08,0xfb,0xfd,0xfd,0xfa,0xf6,0xfd,0xfe,0xf6,0xa2,0xfd,0x00,0x83,0xfd, -0x85,0x00,0x01,0xfd,0x85,0x00,0x0d,0xfd,0xfb,0xf9,0x00,0xf9,0xfb,0x00,0xfe,0xfd, -0x00,0xfd,0xfe,0x00,0x84,0xfd,0x02,0x00,0xac,0x88,0xfd,0x02,0xfb,0x00,0xa2,0x2b, -0xf9,0x03,0xfa,0xf9,0x00,0x83,0xf9,0x83,0xfd,0x02,0x00,0xfa,0x84,0xfd,0x02,0xf9, -0x00,0x86,0xfd,0x06,0x81,0xf9,0x81,0xfd,0x2b,0x2b,0x88,0xfd,0x01,0x00,0x84,0xfd, -0x01,0x56,0x93,0x01,0xfd,0x01,0x00,0x84,0xfd,0x02,0xf9,0x00,0x87,0xfd,0x05,0x00, -0xfd,0xac,0xfc,0xfc,0x95,0x01,0xfb,0x01,0x00,0x8e,0xfb,0x01,0x00,0x83,0xfb,0x03, -0xf7,0xfb,0xfc,0x84,0xfb,0x01,0x2b,0x86,0x00,0x83,0xfb,0x04,0xfc,0x00,0xf6,0xfc, -0x89,0xfd,0x01,0xfa,0x88,0x00,0x08,0xfa,0xfd,0xfd,0xfb,0xf9,0x00,0xf9,0xfb,0xc2, -0x00,0xfd,0xfe,0x01,0x00,0x84,0xfd,0x02,0x00,0xac,0x89,0xfd,0x02,0xf8,0x2b,0x88, -0x00,0x01,0xf7,0x86,0xfd,0x03,0xfc,0x00,0x2b,0x88,0xfd,0x09,0xf9,0x00,0xf9,0xf8, -0xfd,0xfd,0x2b,0xf9,0xfb,0x86,0xfd,0x06,0xfe,0xf9,0x00,0xf9,0x2b,0x00,0x83,0xfd, -0x02,0xf9,0x00,0x85,0xfd,0x02,0xf9,0xfa,0x96,0x09,0xfd,0x07,0xfb,0x2b,0x2b,0x00, -0x00,0x2b,0xf8,0x83,0x2b,0x08,0x00,0x2b,0xf9,0xfb,0x2b,0xf6,0x00,0x2b,0xa2,0xfd, -0x00,0x03,0xfd,0x2b,0x81,0xe2,0xfd,0xfd,0x00,0xfd,0x0a,0x00,0xfe,0xfd,0xfd,0x00, -0xfd,0xf9,0x2b,0x00,0x2b,0xc2,0xf9,0x00,0xf9,0x02,0xf9,0x00,0x84,0xfd,0x01,0x00, -0x8a,0xfd,0x0b,0xf8,0x2b,0x00,0xac,0xf8,0x2b,0x2b,0x00,0x2b,0x2b,0xf8,0x83,0xfd, -0x03,0x00,0xfe,0xfb,0x83,0xf9,0x04,0xf7,0x00,0x2b,0x2b,0x87,0xfd,0x02,0x2b,0x2b, -0x86,0xfd,0x03,0xf9,0x2b,0x2b,0x83,0x00,0x04,0xf9,0xfd,0x2b,0x2b,0x92,0x01,0xfd, -0x01,0x00,0x84,0xfd,0x02,0xf9,0x00,0x87,0xfd,0x08,0x00,0xfd,0xfa,0xf9,0xf7,0x2b, -0x00,0xf7,0x84,0xfb,0x08,0x81,0xfb,0x2b,0x81,0xfb,0xfb,0x56,0xfa,0x86,0xfb,0x01, -0x00,0x89,0xfb,0x01,0x2b,0x89,0x00,0x01,0x2b,0x84,0xfb,0x03,0xf8,0x00,0x81,0x83, -0xfb,0x0a,0x2b,0x00,0xfc,0xac,0xac,0xfd,0xfd,0x2b,0x00,0xfa,0x84,0xfd,0x02,0x56, -0xf8,0x85,0xfd,0x02,0x00,0xfb,0x83,0xfd,0x01,0xfe,0x83,0xfd,0x04,0xf9,0x2b,0x00, -0x2b,0xc2,0xf9,0x00,0xf9,0x02,0xf9,0x00,0x84,0xfd,0x01,0x00,0x8a,0xfd,0x0b,0x56, -0xf9,0xf9,0xac,0xfd,0xfd,0x2b,0x00,0xfb,0xfd,0xfe,0x85,0xfd,0x03,0xfa,0x00,0xf8, -0x87,0xfd,0x01,0x2b,0x83,0x00,0x02,0x2b,0x2b,0x88,0xfd,0x86,0x00,0x03,0x2b,0x56, -0xac,0x83,0xfd,0x02,0xf9,0x00,0x85,0xfd,0x03,0x81,0x00,0xac,0x96,0x09,0xfd,0x1a, -0xac,0x00,0x2b,0x2b,0x00,0x81,0x00,0xf9,0xfe,0x2b,0xf9,0xfe,0xfc,0xfa,0x2b,0x00, -0xf9,0xfa,0x2b,0xfd,0x00,0xfd,0xfa,0xf6,0xfd,0xfd,0x84,0x00,0x02,0x2b,0xfd,0x85, -0x00,0x06,0xfd,0xfe,0xfd,0x00,0xfd,0xfe,0xc2,0x00,0x2b,0x2b,0x01,0x00,0x84,0xfd, -0x01,0x00,0x8b,0xfd,0x04,0x00,0xf7,0x56,0x81,0x83,0xf9,0x03,0xfa,0xf9,0x81,0x83, -0xfd,0x03,0x00,0xfd,0xfb,0x84,0x2b,0x02,0x00,0xf9,0x86,0xfd,0x03,0xac,0x00,0xf8, -0x87,0xfd,0x0b,0xf7,0xf8,0x2b,0x2b,0xfd,0x81,0x00,0xac,0xfd,0x00,0xf9,0x91,0x01, -0xfd,0x01,0x00,0x84,0xfd,0x02,0xfa,0x00,0x84,0xfd,0x02,0x2b,0x2b,0x84,0x00,0x11, -0x2b,0xf7,0xf9,0x00,0xf9,0xfd,0xac,0xac,0xf7,0x00,0xfb,0x2b,0x2b,0xfb,0xfb,0x2b, -0xf7,0x86,0xfb,0x03,0x00,0xf7,0x81,0x8c,0xfb,0x01,0x00,0x88,0xfb,0x09,0xf9,0x00, -0xf9,0xf9,0xfb,0xfc,0xac,0x00,0xfa,0x85,0xfd,0x03,0xfe,0xf8,0x2b,0x84,0xfd,0x02, -0x00,0xf8,0x84,0xfd,0x08,0x2b,0x2b,0xf7,0xf6,0xac,0xfd,0xfd,0xfc,0x84,0xfd,0x03, -0x00,0xfd,0xfe,0xc2,0x00,0x2b,0x2b,0x01,0x00,0x84,0xfd,0x01,0x00,0x8b,0xfd,0x01, -0xfe,0x83,0xfd,0x02,0x2b,0xf8,0x88,0xfd,0x04,0xf8,0x00,0xf9,0xfe,0x89,0xfd,0x05, -0x2b,0xf9,0xfd,0x81,0xf9,0x83,0x2b,0x01,0xfb,0x87,0xfd,0x03,0xfe,0xf8,0x2b,0x85, -0xfd,0x02,0xfb,0x00,0x86,0xfd,0x02,0x2b,0xf6,0x95,0x09,0xfd,0x0c,0xfa,0x00,0x81, -0xf8,0x81,0xfa,0x00,0x2b,0xf7,0xfd,0x00,0xac,0x83,0xfd,0x17,0x00,0x00,0xfc,0x2b, -0xf8,0xfd,0x00,0xfd,0xfc,0x00,0xf9,0xfe,0x00,0xfd,0xfd,0xf9,0xf9,0xfa,0xf9,0xf9, -0xfe,0xfd,0x00,0x83,0xfd,0x05,0x00,0xfc,0xfa,0x00,0xfe,0xc2,0xfd,0x00,0xfd,0x83, -0xfd,0x01,0x00,0x8a,0xfd,0x05,0xfb,0x00,0xf9,0x00,0x2b,0x86,0x00,0x83,0xfd,0x01, -0x00,0x83,0xfd,0x05,0xfe,0xfd,0xf9,0x00,0xfe,0x85,0xfd,0x07,0xfc,0x00,0x00,0x2b, -0x00,0x00,0xf8,0x84,0xfd,0x0b,0xfe,0xfd,0xf6,0xf8,0xfd,0xfd,0x00,0xf9,0xfd,0xf9, -0x00,0x91,0x01,0xfd,0x01,0x00,0x84,0xfd,0x02,0xf9,0x00,0x84,0xfd,0x0b,0x56,0xf9, -0xac,0x00,0xfd,0xfd,0xfe,0xfd,0x2b,0x2b,0xfe,0x84,0xfd,0xc2,0x00,0xfc,0xfb,0x02, -0x00,0xfa,0x86,0xfb,0x83,0x00,0x02,0x2b,0xfa,0x87,0xfb,0x07,0x00,0x81,0xfb,0x00, -0xfb,0x81,0x00,0x84,0xfb,0x0a,0x56,0x00,0xf9,0xfa,0x00,0x2b,0xfd,0xf7,0x00,0xfe, -0x8b,0xfd,0x02,0x56,0x00,0x84,0xfd,0x0a,0x81,0x00,0x00,0x56,0x56,0x00,0x56,0x2b, -0x00,0xf9,0x83,0xfd,0x03,0x00,0xac,0xfa,0xc3,0x00,0xfd,0xfd,0x03,0xfd,0xfd,0x00, -0x8e,0xfd,0x02,0x2b,0x2b,0x88,0xfd,0x03,0xfa,0x00,0xac,0x8b,0xfd,0x09,0x00,0xac, -0xfd,0xf8,0x2b,0xf9,0xf9,0xfa,0xfc,0x89,0xfd,0x02,0x00,0x81,0x85,0xfd,0x01,0x00, -0x86,0xfd,0x03,0xac,0x00,0xfc,0x95,0x09,0xfd,0x06,0xac,0xfc,0x00,0xfd,0xfe,0xfa, -0xa2,0xac,0x00,0x83,0xfd,0x06,0xf9,0x2b,0x2b,0x00,0x00,0xac,0xa2,0xfd,0x00,0x0a, -0xf9,0xfd,0xfd,0x00,0xfd,0xfe,0x00,0x2b,0xf6,0x2b,0xc2,0x00,0xfd,0xfd,0x04,0x81, -0x00,0x00,0xf9,0xc3,0x00,0xfd,0xfd,0x03,0xfd,0xfd,0x00,0x86,0xfd,0x04,0x2b,0xfb, -0xfd,0x81,0xa2,0x00,0x2b,0x07,0x00,0xfe,0xfa,0xac,0x81,0xfd,0x00,0x83,0xfd,0x03, -0x00,0xfd,0xfb,0x83,0xfd,0x02,0xf9,0x00,0x85,0xfd,0x09,0xfa,0x00,0x00,0xf8,0xac, -0xfd,0xfb,0x2b,0x00,0x85,0xfd,0x0a,0x00,0xac,0xfd,0xfd,0x00,0xf9,0xfd,0xfd,0x00, -0x81,0x90,0x01,0xfd,0x01,0x2b,0x84,0xfd,0x02,0xf8,0x2b,0x87,0xfd,0x06,0x00,0xfd, -0xfd,0xac,0xf6,0x2b,0x86,0xfd,0x07,0x2b,0xf8,0xfd,0xf9,0xac,0xf9,0x00,0x84,0xac, -0x83,0xfc,0x07,0x00,0xfb,0x81,0x2b,0x00,0xf6,0x81,0x84,0xfb,0x15,0xf9,0x00,0xfb, -0xfc,0x00,0xfc,0xfc,0x00,0x56,0xac,0xac,0xfd,0xfd,0xfb,0xfe,0xfd,0xfd,0x2b,0x00, -0x00,0xfa,0x8b,0xfd,0x0f,0x81,0x00,0xfa,0xfd,0xfd,0xfe,0xf9,0x00,0x56,0xfd,0xac, -0xf7,0x00,0xf6,0x81,0x84,0xfd,0x04,0x81,0x00,0x00,0xf9,0xc3,0x00,0xfd,0xfd,0x03, -0xfd,0xfd,0x00,0x85,0xfd,0x03,0xfe,0xf6,0xfb,0x86,0xfd,0x02,0x00,0xfb,0x88,0xfd, -0x03,0xac,0x00,0xf8,0x8a,0xfd,0x02,0xfb,0x00,0x85,0xfd,0x01,0xfe,0x86,0xfd,0x02, -0xfb,0x2b,0x83,0x00,0x02,0x2b,0x00,0x85,0xfd,0x04,0x00,0xfd,0xfd,0xac,0x84,0xfd, -0x02,0x2b,0xf7,0x94,0x09,0xfd,0x01,0xfa,0x86,0x00,0x12,0xfd,0x00,0xf6,0x2b,0xfd, -0xfd,0xac,0x00,0x56,0x2b,0xfb,0xfd,0xf9,0x00,0x00,0xf9,0x00,0xfe,0xc2,0xfd,0xfd, -0x00,0x83,0x2b,0x0a,0x00,0xfd,0xfd,0x00,0xfd,0xf7,0xf6,0x00,0xfe,0xfd,0x87,0x00, -0x84,0xfd,0x01,0x00,0x85,0xfd,0x02,0xac,0x00,0x83,0xfd,0x05,0xf7,0xf9,0x2b,0xf7, -0xf8,0xa2,0xfd,0x00,0x02,0xfd,0xf8,0x83,0xfd,0x08,0x00,0xf8,0xf6,0xfe,0xfd,0xfd, -0xf8,0x2b,0x84,0xfd,0x04,0x81,0x00,0xf9,0xfe,0x85,0xfd,0x02,0x00,0x81,0x83,0xfd, -0x02,0xf9,0x00,0x83,0xfd,0x06,0x00,0x81,0xfd,0xfd,0x56,0xfb,0x95,0x01,0xfd,0x02, -0x2b,0xf7,0x87,0xfd,0x06,0x00,0xfd,0xfd,0xfa,0x2b,0xfe,0x8b,0xfd,0x02,0x00,0xf9, -0x87,0xfd,0x01,0x00,0x83,0xfd,0x03,0xfb,0x00,0x2b,0x84,0xfd,0x09,0x2b,0xf7,0xfd, -0xfd,0x00,0xfd,0xfd,0x56,0x2b,0x89,0xfd,0x03,0x00,0x2b,0xfe,0x8a,0xfd,0x04,0xf9, -0x00,0xf8,0xfe,0x83,0xfd,0x08,0xfa,0xf9,0xfe,0xfa,0x00,0xf7,0x00,0xfe,0x84,0xfd, -0x05,0xf7,0xf6,0x00,0xfe,0xfd,0x87,0x00,0x84,0xfd,0x01,0x00,0x85,0xfd,0x02,0xfc, -0x00,0x87,0xfd,0x01,0x00,0x8a,0xfd,0x03,0xfc,0x00,0x2b,0x89,0xfd,0x06,0xf7,0x2b, -0xfd,0xfd,0x2b,0xac,0x87,0xfd,0x09,0x81,0x00,0x56,0xfd,0xfd,0xac,0x56,0x00,0xfa, -0x84,0xfd,0x05,0x00,0xfa,0xfd,0xf7,0xf8,0x83,0xfd,0x02,0xf9,0x00,0x95,0x09,0xfd, -0x10,0xac,0x00,0xfd,0xf8,0xf7,0xfd,0xfd,0x56,0x00,0xac,0xfd,0xfd,0xf7,0x56,0xf9, -0x2b,0x84,0xfd,0x03,0xac,0x00,0x81,0x83,0xfd,0x07,0x00,0xfd,0xfd,0x00,0xf9,0xfa, -0xf9,0xc2,0x00,0xfd,0xfd,0x0b,0xfe,0x00,0xfd,0xfd,0xf8,0xfd,0xfd,0x00,0xfd,0xfd, -0xf8,0x84,0xfd,0x02,0x00,0xac,0x83,0xfd,0x03,0xfe,0xf6,0x2b,0x83,0xfd,0x06,0x00, -0xfa,0x2b,0x2b,0xfd,0xfd,0xa2,0x00,0xfd,0x84,0xfd,0x03,0x00,0x00,0xac,0x83,0xfd, -0x02,0x2b,0xf8,0x85,0xfd,0x02,0xfb,0xfd,0x83,0x00,0x05,0xfa,0xfd,0xfd,0x00,0xfa, -0x83,0xfd,0x02,0x2b,0x56,0x83,0xfd,0x01,0x00,0x85,0xfd,0x02,0xac,0x56,0x92,0x01, -0xfd,0x03,0x81,0x00,0xac,0x87,0xfd,0x01,0x00,0x8f,0xfd,0x02,0xf7,0x2b,0x88,0xfd, -0x01,0x00,0x85,0xfd,0x01,0xac,0x83,0xfd,0x06,0xfb,0x00,0xac,0xfd,0xfd,0x00,0x83, -0xfd,0x02,0x00,0x81,0x87,0xfd,0x02,0x2b,0x2b,0x8a,0xfd,0x04,0xfb,0x2b,0x00,0xfa, -0x86,0xfd,0x06,0xfe,0xfc,0x00,0x81,0xfd,0x00,0x86,0xfd,0x0b,0xfe,0x00,0xfd,0xfd, -0xf8,0xfd,0xfd,0x00,0xfd,0xfd,0xf8,0x84,0xfd,0x02,0x00,0xac,0x84,0xfd,0x02,0x2b, -0x2b,0x87,0xfd,0x02,0x00,0xf7,0x8b,0xfd,0x03,0x2b,0x00,0xac,0x87,0xfd,0x05,0x00, -0x81,0xfd,0xf8,0xf6,0x88,0xfd,0x02,0x00,0x56,0x86,0xfd,0x01,0xfe,0x84,0xfd,0x05, -0xf7,0x2b,0xfd,0x00,0xfb,0x83,0xfd,0x08,0xac,0x00,0xfd,0xfd,0x2b,0x00,0x00,0xfc, -0x9f,0x08,0xfd,0x10,0xfb,0x2b,0x00,0x00,0xac,0xfd,0xfd,0x00,0x00,0xf7,0xfd,0xfd, -0xac,0xfd,0xf9,0x2b,0x83,0xfd,0x03,0xfb,0x00,0xf8,0x84,0xfd,0x04,0x00,0xfd,0xfd, -0x00,0x83,0x2b,0xc2,0x00,0xfd,0xfd,0x02,0xfd,0x00,0x85,0xfd,0x01,0x00,0x87,0xfd, -0x08,0x00,0xf7,0xfd,0xfd,0xfc,0xf6,0x00,0xac,0x83,0xfd,0x10,0x00,0xf9,0x2b,0x2b, -0xfd,0xf8,0x2b,0xfd,0x00,0xfd,0xf7,0xf9,0xfe,0xfd,0x00,0x2b,0x83,0xfd,0x02,0xf8, -0x00,0x87,0xfd,0x08,0xf9,0x2b,0xfd,0x81,0x00,0xfd,0xf7,0x00,0x83,0xfd,0x07,0xfb, -0x00,0xfd,0xfb,0xfd,0x56,0x00,0x85,0xfd,0x03,0xfb,0x00,0xf8,0x90,0x01,0xfd,0x03, -0xf7,0x00,0x81,0x88,0xfd,0x02,0x00,0xf9,0x83,0xfd,0x02,0xfa,0x56,0x88,0xfd,0x02, -0x2b,0x00,0x89,0xfd,0x01,0x00,0x88,0xfd,0x03,0xac,0x00,0x56,0x83,0xfd,0x01,0x00, -0x83,0xfd,0x03,0xf9,0x00,0xac,0x84,0xfd,0x03,0xfb,0x00,0x2b,0x87,0xfd,0x07,0xfc, -0xf9,0xf8,0x00,0x00,0x2b,0xac,0x88,0xfd,0x05,0xf9,0x00,0xfd,0xfd,0x81,0x87,0xfd, -0x01,0x00,0x85,0xfd,0x01,0x00,0x87,0xfd,0x08,0x00,0xf7,0xfd,0xfd,0xfc,0x2b,0x00, -0xac,0x87,0xfd,0x04,0x81,0x00,0x2b,0xfb,0x8a,0xfd,0x03,0xf7,0x00,0xfb,0x85,0xfd, -0x06,0x81,0x00,0xfd,0xfd,0xf7,0x00,0x84,0xfd,0x01,0xfc,0x83,0xfd,0x02,0x2b,0xf7, -0x85,0xfd,0x01,0xfb,0x85,0xfd,0x04,0xac,0x00,0x2b,0x00,0x85,0xfd,0x07,0xfc,0xfd, -0x81,0xf6,0xfe,0xf8,0xf7,0x90,0x09,0xfd,0x14,0xf8,0x00,0x2b,0x00,0xfb,0x00,0xf8, -0xfd,0x2b,0x2b,0xfc,0xfd,0xfd,0xfa,0x2b,0xfd,0xfb,0x2b,0x00,0xf9,0x85,0xfd,0x08, -0x00,0xfd,0xfd,0x00,0xf9,0xf9,0xfa,0xf9,0xe2,0xfd,0xfd,0x00,0xfd,0x84,0xfd,0x01, -0x00,0x87,0xfd,0x01,0xac,0x84,0x00,0x02,0x2b,0xac,0x83,0xfd,0x16,0x81,0x2b,0xf9, -0x2b,0xac,0xf9,0x00,0xfb,0xfd,0x00,0xfd,0x2b,0xf9,0xfd,0xfd,0x2b,0xf7,0xfd,0xfd, -0x2b,0x00,0x81,0x87,0xfd,0x08,0xfe,0x00,0x2b,0xf6,0x00,0x00,0x2b,0xac,0x83,0xfd, -0x03,0x00,0xf8,0xfd,0x83,0x00,0x01,0x56,0x86,0xfd,0x03,0xac,0x00,0xf8,0x8d,0xfd, -0x04,0x2b,0x00,0x00,0xfb,0x89,0xfd,0x01,0x56,0x86,0x00,0x86,0xfd,0x03,0x56,0x00, -0x2b,0x8a,0xfd,0x01,0x00,0x89,0xfd,0x01,0xf9,0x84,0xfd,0x01,0x00,0x84,0xfd,0x01, -0x56,0x83,0xfd,0x04,0xfb,0xf6,0x00,0x56,0x89,0xfd,0x03,0x00,0x2b,0x56,0x8c,0xfd, -0x07,0x2b,0x00,0x2b,0x2b,0xf6,0x00,0x00,0x84,0xfd,0x01,0x00,0x85,0xfd,0x01,0x00, -0x87,0xfd,0x01,0xac,0x84,0x00,0x02,0x2b,0xfc,0x89,0xfd,0x04,0xfb,0x2b,0x00,0x00, -0x8a,0xfd,0x03,0xf8,0x00,0x81,0x84,0xfd,0x06,0x00,0xf8,0xfd,0xfd,0xac,0x2b,0x84, -0x00,0x01,0x2b,0x83,0xfd,0x03,0xfb,0x00,0x00,0x83,0x2b,0x02,0x00,0x00,0x86,0xfd, -0x03,0x81,0xf6,0xac,0x85,0xfd,0x07,0xfe,0xfd,0xfb,0x00,0xfc,0x2b,0xf8,0x9e,0x08, -0xfd,0x15,0xfb,0x00,0x2b,0xfd,0xfd,0xf9,0x00,0xf9,0xfe,0xfd,0xfd,0x2b,0x2b,0xfd, -0xfd,0xf9,0x2b,0xf8,0x00,0xf8,0xac,0x86,0xfd,0x01,0x00,0x83,0xfd,0x01,0xfe,0x83, -0xfd,0x07,0x2b,0x00,0xf8,0xfd,0xfb,0x00,0x2b,0x85,0xfd,0x01,0x00,0x89,0xfd,0x03, -0x81,0xf9,0xac,0x86,0xfd,0x0c,0xfb,0xf9,0x2b,0xf9,0x00,0xfb,0xfd,0xfd,0x2b,0x00, -0x2b,0xfe,0x86,0xfd,0x02,0xf9,0xac,0x8a,0xfd,0x04,0x81,0xfa,0xfa,0xfe,0x85,0xfd, -0x01,0x81,0x83,0xfd,0x03,0x81,0xfa,0xfe,0x87,0xfd,0x02,0xfa,0x2b,0x8d,0xfd,0x02, -0xfa,0xfb,0x98,0x01,0xfd,0x02,0x81,0xfb,0x8b,0xfd,0x01,0x2b,0x8e,0xfd,0x01,0x2b, -0x88,0xfd,0x03,0xac,0x2b,0xac,0x9a,0x01,0xfd,0x05,0xac,0xf9,0xf9,0xfa,0xfb,0x83, -0xfd,0x03,0xfb,0x00,0x2b,0x85,0xfd,0x01,0x00,0x89,0xfd,0x03,0x81,0xf9,0xac,0x8e, -0xfd,0x01,0xac,0x8b,0xfd,0x02,0xf8,0xfc,0x84,0xfd,0x02,0xfb,0xac,0x8e,0xfd,0x01, -0xac,0x83,0xf9,0x01,0xfe,0x88,0xfd,0x01,0xfe,0x89,0xfd,0x03,0x56,0x00,0x2b,0x95, -0x09,0xfd,0x01,0xac,0x9c,0x03,0xfd,0x02,0xac,0xfb,0x91,0x04,0xfd,0x01,0xfe,0x9f, -0x0b,0xfd,0x01,0xfe,0x93,0xdd,0x06,0xfd, +0x91,0xd5,0x05,0xfd,0x02,0xfb,0x81,0x9b,0x0b,0xfd,0x02,0x81,0xfb,0x86,0xfd,0x02, +0xfb,0x81,0x8b,0xfd,0x02,0xf9,0xac,0x94,0x02,0xfd,0x02,0xac,0xf9,0x9a,0x09,0xfd, +0x03,0xf6,0x2b,0xfe,0x83,0xfd,0x02,0x2b,0xf8,0x90,0x03,0xfd,0x03,0x2b,0x00,0xfe, +0x98,0x0a,0xfd,0x03,0x81,0x00,0xfe,0x83,0xfd,0x03,0xf8,0xf6,0xfe,0x89,0xfd,0x03, +0x00,0x2b,0xfe,0x85,0xfd,0x03,0x2b,0x00,0xfe,0x8a,0xfd,0x02,0x00,0xf9,0x94,0x02, +0xfd,0x02,0xf9,0x00,0x92,0x01,0xfd,0x06,0x00,0xf5,0xf5,0x00,0xf7,0xfe,0x92,0x08, +0xfd,0x08,0xfa,0x00,0xfa,0xfd,0xfd,0x81,0x00,0xac,0x90,0x03,0xfd,0x02,0xf7,0x00, +0x86,0xfd,0x02,0x81,0xfb,0x92,0x02,0xfd,0x02,0xfb,0x81,0x90,0x01,0xfd,0x02,0xf9, +0xac,0x92,0x05,0xfd,0x02,0x81,0xfb,0x97,0x01,0xfd,0x02,0xf9,0x00,0x84,0xfd,0x02, +0xf7,0x00,0x8a,0xfd,0x02,0xf5,0x2b,0x86,0xfd,0x02,0x2b,0xf5,0x8b,0xfd,0x02,0x00, +0xf9,0x9f,0x01,0xfd,0x02,0xac,0xf9,0x83,0xfd,0x02,0xf9,0x00,0x91,0x01,0xfd,0x07, +0xfe,0x00,0xf7,0xfa,0xf7,0x00,0x2b,0x93,0x08,0xfd,0x06,0xf6,0x00,0xfd,0xfd,0x00, +0xf8,0x91,0x03,0xfd,0x02,0x2b,0xf5,0x86,0xfd,0x03,0x00,0x2b,0xfe,0x91,0x02,0xfd, +0x03,0x2b,0x00,0xfe,0x8f,0xfd,0x02,0x00,0xf9,0x92,0x05,0xfd,0x03,0x00,0x2b,0xfe, +0x96,0x01,0xfd,0x02,0xf9,0x00,0x84,0xfd,0x02,0x2b,0xf5,0x8a,0xfd,0x02,0xf5,0x2b, +0x86,0xfd,0x02,0x2b,0xf5,0x8b,0xfd,0x03,0x00,0xf9,0xfe,0x9e,0x01,0xfd,0x02,0xf9, +0x00,0x83,0xfd,0x02,0xfa,0x00,0x92,0x01,0xfd,0x07,0x00,0xf9,0xfd,0xfe,0xf6,0x00, +0xfe,0x92,0x08,0xfd,0x05,0xac,0x00,0xf7,0xf8,0xf5,0x83,0xfd,0x0e,0xac,0xf5,0x00, +0x00,0xf5,0xac,0xfd,0xfd,0xf5,0x2b,0xfd,0xfe,0x2b,0x00,0x86,0xfd,0x11,0xf7,0x00, +0xf8,0x00,0x00,0xf8,0xfd,0xfd,0xfe,0xf6,0x00,0x00,0xf6,0xfd,0xfd,0xfe,0xf8,0x83, +0x00,0x06,0xac,0xfd,0xfd,0x81,0x00,0x00,0x83,0xf5,0x85,0xfd,0x01,0x2b,0x83,0x00, +0x08,0xf9,0xfd,0xac,0xf5,0x00,0x00,0xf5,0xac,0x85,0xfd,0x0f,0xfe,0x00,0x2b,0x00, +0x00,0xfd,0xfd,0xf8,0x00,0x00,0xf5,0xfc,0xfd,0xfe,0x2b,0x83,0x00,0x02,0xfd,0xf9, +0x83,0x00,0x03,0x2b,0xfd,0x56,0x83,0x00,0x08,0xf6,0xfd,0xfe,0x2b,0xf5,0xf6,0x00, +0xfa,0x84,0x00,0x84,0xfd,0x02,0xf8,0x00,0x84,0xfd,0x04,0x00,0xfb,0xfd,0xf8,0x83, +0x00,0x05,0xf8,0xfd,0xfe,0xf9,0x00,0x83,0xfd,0x08,0x00,0xf9,0xfe,0xf9,0x00,0xf8, +0x00,0x2b,0x85,0xfd,0x02,0xac,0xf6,0x83,0x00,0x03,0xfd,0xfd,0xf8,0x83,0x00,0x12, +0xf8,0xfd,0xfe,0xf9,0x00,0x56,0x00,0x00,0xf6,0xac,0x00,0x00,0xf5,0xfc,0xfd,0xfe, +0x00,0xf7,0x83,0x00,0x0b,0x81,0xfd,0xfd,0x00,0xf9,0xfd,0xfe,0x00,0x2b,0xfe,0x2b, +0x83,0x00,0x03,0xf9,0xfd,0x81,0x83,0x00,0x07,0xfa,0xfd,0xfe,0x00,0x2b,0x00,0x00, +0x88,0xfd,0x03,0xfe,0xf9,0x00,0x84,0xfd,0x11,0xf7,0x00,0xfd,0xfd,0xac,0xf5,0x00, +0x00,0xf5,0xac,0xfd,0xfd,0xf5,0x2b,0xfd,0xfd,0x81,0x83,0x00,0x02,0xf6,0x00,0x86, +0xfd,0x0a,0xfe,0xf6,0x00,0x00,0x2b,0x00,0xf9,0xfd,0xfd,0xf6,0x83,0x00,0x13,0x81, +0xfd,0xf6,0xf5,0xfd,0xfe,0xf6,0x00,0xfb,0xfd,0x81,0x00,0xfd,0xf7,0x00,0xf8,0x00, +0x00,0xf8,0x86,0xfd,0x84,0x00,0x0b,0xfd,0xf9,0x00,0x81,0x00,0x00,0xf6,0xfd,0xfd, +0xfe,0xf8,0x83,0x00,0x01,0xac,0x86,0xfd,0x25,0x00,0xf9,0xfd,0xfd,0xf6,0xf5,0xfd, +0xac,0xf5,0x00,0x00,0xf5,0xac,0xfd,0x00,0xf8,0xfd,0xfe,0x00,0x00,0xfd,0xfe,0xf7, +0xf5,0xfd,0xac,0xf5,0x00,0x00,0xf8,0xfd,0xfe,0x2b,0xf5,0xf6,0x00,0xf9,0x95,0x06, +0xfd,0x04,0xf8,0x00,0x00,0x81,0x83,0xfd,0x0f,0x00,0xf6,0xac,0xfc,0xf6,0x00,0xfe, +0xfd,0x00,0x2b,0xfe,0xfd,0x2b,0xf5,0xfe,0x85,0xfd,0x1d,0x2b,0x00,0xf6,0xac,0xf7, +0x00,0xfd,0xfd,0x2b,0x00,0xac,0xac,0x00,0xf8,0xfd,0xf9,0x00,0x81,0xfd,0xf5,0xf5, +0xfe,0xfc,0x00,0xf7,0xfd,0xf9,0x00,0xf5,0x85,0xfd,0x03,0xfe,0x00,0xf7,0x83,0xfd, +0x07,0x00,0xf6,0xac,0xfc,0xf6,0x00,0xfe,0x85,0xfd,0x0e,0xf5,0x00,0xfa,0xfd,0xfd, +0xf9,0x00,0x81,0xfd,0xf5,0xf6,0xfd,0xfa,0x00,0xa2,0xfb,0xfd,0x03,0xfe,0x2b,0xf5, +0x83,0xfd,0x10,0x81,0x81,0xfd,0x81,0x00,0xfa,0xfd,0x2b,0x00,0xf7,0xfe,0xfd,0xfe, +0x00,0xf9,0xfe,0x85,0xfd,0x11,0x00,0xf8,0xfd,0xfd,0xf8,0xf5,0xfe,0xf9,0x00,0xfa, +0xfd,0xf9,0x00,0xf9,0xfd,0xf9,0x00,0x83,0xfd,0x07,0x00,0xf9,0xfd,0xf9,0x00,0xf6, +0xac,0x86,0xfd,0x25,0x00,0x00,0xfc,0xfd,0xfb,0xfd,0xf9,0x00,0xfa,0xfd,0xf9,0x00, +0xf9,0xfd,0xf9,0x00,0xf5,0xfb,0x81,0x00,0x00,0xfa,0xac,0x00,0xf7,0xfd,0xfd,0x00, +0xf5,0xfa,0xfd,0xf6,0x00,0xfd,0xfd,0x00,0xf9,0xe2,0xfd,0xfd,0xf5,0x2b,0x0d,0xfe, +0xfd,0xfc,0x00,0xf8,0xfe,0xf7,0x00,0xfd,0xfd,0xf5,0x00,0xfa,0x8a,0xfd,0x03,0xf9, +0x00,0x00,0xa2,0xf5,0x00,0x16,0xf5,0xfd,0xfd,0x00,0xf6,0xac,0xfc,0xf6,0x00,0xfe, +0xfd,0x00,0xf7,0xfd,0xfc,0x00,0xf7,0xfe,0xf9,0x00,0xf5,0xfe,0x85,0xfd,0x20,0x2b, +0x00,0xfc,0xfc,0x2b,0x00,0xfa,0xfd,0x2b,0x00,0xfc,0xfd,0xf7,0x00,0xac,0xf8,0x00, +0xfd,0xfd,0x00,0x00,0xf8,0xfd,0xf7,0xf6,0xfe,0x2b,0x00,0xf6,0xac,0xf7,0x00,0x86, +0xfd,0x14,0xf9,0x00,0xfe,0xfd,0xfd,0xf9,0x00,0x00,0xfc,0xfa,0x00,0x81,0xfd,0xf9, +0x00,0x81,0xfd,0xf5,0xf5,0xfe,0x85,0xfd,0x24,0x00,0xf9,0xfe,0x2b,0x00,0xfa,0xfd, +0x00,0xf6,0xac,0xfc,0xf6,0x00,0xfe,0xf5,0xf6,0xfd,0xfb,0x00,0x00,0xfb,0xfd,0xf5, +0xf9,0xfd,0x00,0xf6,0xfd,0x81,0x00,0xfb,0xfd,0x2b,0x00,0xf7,0xfe,0x97,0x06,0xfd, +0x02,0xf5,0xf5,0x83,0xfd,0x0f,0xfb,0x00,0xfb,0xfd,0xfd,0xfb,0x00,0xfa,0xfd,0xf5, +0x2b,0xfd,0xfd,0xf7,0x00,0x86,0xfd,0x1d,0xf7,0x00,0xfd,0xfd,0xf9,0x00,0xfd,0xfd, +0x00,0xf7,0xf9,0xf9,0xf6,0xf5,0xfd,0xf6,0xf5,0xf9,0xfa,0xf7,0x00,0xfd,0xf8,0x00, +0xfd,0xfd,0xfe,0x2b,0xf5,0x86,0xfd,0x0c,0xf5,0x2b,0xfd,0xfd,0xfb,0x00,0xfb,0xfd, +0xfd,0xfb,0x00,0xfa,0x85,0xfd,0x10,0xf5,0xf6,0xfd,0xfd,0xfe,0xf6,0xf5,0xf9,0xf9, +0xf7,0x00,0xfd,0xfb,0x00,0x2b,0xfb,0x83,0xfd,0x02,0x2b,0x00,0x87,0xfd,0x05,0x00, +0xf9,0xfd,0xf7,0x00,0x84,0xfd,0x02,0x00,0xf9,0x86,0xfd,0x11,0x2b,0x00,0xfd,0xfe, +0x00,0x56,0xfd,0xf6,0xf5,0xfd,0xfd,0xfe,0xf6,0xf5,0xfd,0xf9,0x00,0x83,0xfd,0x06, +0x00,0xf9,0xfd,0xf9,0x00,0xac,0x86,0xfd,0x03,0x81,0x00,0xfb,0x83,0xfd,0x20,0xfe, +0xf6,0xf5,0xfd,0xfd,0xfe,0xf6,0xf5,0xfd,0xf9,0x00,0xac,0xfd,0xfd,0x00,0x56,0xfd, +0xfd,0xf5,0x2b,0xfd,0xfd,0x00,0xf9,0xfd,0xfd,0xac,0x00,0xfa,0xfe,0x00,0xf9,0xe2, +0xfd,0xfd,0xf5,0x2b,0x0c,0xfd,0xfd,0xf8,0x00,0xfa,0xf9,0xf9,0x00,0x81,0xfd,0xf5, +0xf6,0x8b,0xfd,0x02,0xf9,0x00,0xa2,0xfa,0xf9,0x16,0xf6,0xf5,0xfd,0xfb,0x00,0xfb, +0xfd,0xfd,0xfb,0x00,0xfa,0xfd,0xf5,0x2b,0xfd,0xf8,0x00,0xfd,0xfd,0xfe,0xf7,0x00, +0x86,0xfd,0x20,0x00,0xf8,0xfd,0xfd,0xfe,0x00,0xf9,0xfd,0x00,0xf8,0xfd,0xfd,0xfe, +0x00,0xf7,0xfb,0x00,0xfb,0x81,0x00,0xf6,0xf6,0xfd,0xf5,0x56,0xfd,0x2b,0x00,0xfd, +0xfd,0xf9,0x00,0x85,0xfd,0x14,0xfe,0xf9,0x00,0xfd,0xfd,0xfe,0xf9,0x00,0xac,0xfd, +0xfe,0x00,0xf9,0xfe,0xf5,0xf5,0xf9,0xfa,0xf7,0x00,0x86,0xfd,0x84,0x00,0x1e,0x56, +0xfd,0xfb,0x00,0xfb,0xfd,0xfd,0xfb,0x00,0xfa,0xf8,0x00,0xfd,0xf7,0x2b,0x00,0xf8, +0xfd,0x00,0xfc,0xfb,0x00,0x56,0xfa,0xf9,0x00,0xf7,0xfd,0xf7,0x00,0x99,0x06,0xfd, +0x02,0x00,0xf7,0x83,0xfd,0x02,0xf9,0x00,0x84,0xfd,0x09,0x00,0xf9,0xfe,0x00,0x2b, +0xfd,0xfd,0x2b,0xf5,0x86,0xfd,0x18,0x2b,0xf5,0xfd,0xfd,0xf9,0x00,0xfd,0xfd,0x00, +0x00,0xf5,0x00,0xf5,0x2b,0xfd,0x00,0x00,0xf5,0x00,0xf5,0xf5,0xfd,0x2b,0xf5,0x83, +0xfd,0x02,0x2b,0xf5,0x86,0xfd,0x06,0x00,0xf7,0xfd,0xfd,0xf9,0x00,0x84,0xfd,0x02, +0x00,0xf9,0x85,0xfd,0x02,0x00,0xf7,0x83,0xfd,0x01,0x00,0x84,0xf5,0x0f,0x00,0xfe, +0xfd,0xf9,0xf5,0x00,0xf6,0xfe,0xfd,0x2b,0xf5,0xfe,0xfd,0xfd,0xfb,0xa2,0xf5,0x00, +0x04,0xf9,0xfd,0x2b,0xf5,0x84,0xfd,0x02,0x00,0xf9,0x86,0xfd,0x09,0x81,0x00,0xfb, +0xfa,0x00,0xfe,0xfd,0x00,0xf7,0x83,0xfd,0x05,0x2b,0xf5,0xfd,0xfa,0x00,0x83,0xfd, +0x05,0x00,0xfa,0xfd,0xf9,0x00,0x87,0xfd,0x02,0xf9,0x00,0x85,0xfd,0x02,0x00,0xf7, +0x83,0xfd,0x05,0x2b,0xf5,0xfd,0xfa,0x00,0x83,0xfd,0x0a,0x00,0xf9,0xfd,0xfd,0x00, +0xf7,0xfd,0xfd,0x00,0xf9,0x83,0xfd,0x05,0x00,0xf9,0xfd,0x00,0xfa,0xe2,0xfd,0xfd, +0x00,0xf7,0x0c,0xfd,0xfd,0x2b,0x00,0x00,0xf5,0xf5,0x00,0x81,0xfd,0x00,0xf7,0x8b, +0xfd,0x0b,0xf9,0x00,0xfd,0xfe,0xfd,0xfd,0x2b,0xf5,0xfd,0xf9,0x00,0x84,0xfd,0x08, +0x00,0xf9,0xfe,0x00,0x2b,0xfe,0x2b,0xf5,0x83,0xfd,0x02,0x2b,0xf5,0x86,0xfd,0x02, +0x00,0xf9,0x83,0xfd,0x05,0x00,0xf9,0xfe,0x00,0xf9,0x83,0xfd,0x13,0x00,0x2b,0xfe, +0x00,0xf8,0xf6,0x2b,0xf8,0x00,0xac,0x00,0xfe,0xfd,0x2b,0xf5,0xfe,0xfd,0xf9,0x00, +0x86,0xfd,0x02,0xf9,0x00,0x83,0xfd,0x02,0xf9,0x00,0x83,0xfd,0x03,0x00,0xf9,0xfd, +0xa2,0xf5,0x00,0x02,0xf5,0xf5,0x86,0xfd,0x03,0x00,0xf9,0xfe,0x83,0xfd,0x02,0xf9, +0x00,0x84,0xfd,0x02,0x00,0xfa,0xa2,0xfb,0x00,0x09,0x56,0xf6,0xf5,0x56,0xf5,0xfe, +0xf9,0x00,0x00,0x83,0xf5,0x04,0xf8,0xfd,0x2b,0xf5,0x99,0x06,0xfd,0x02,0xf5,0x2b, +0x83,0xfd,0x0f,0xfb,0x00,0xfb,0xfd,0xfd,0xfb,0x00,0xfb,0xfd,0xf5,0x2b,0xfd,0xfd, +0x2b,0x00,0x86,0xfd,0x0c,0xf7,0x00,0xfd,0xfe,0xf9,0x00,0xfd,0xfe,0x00,0xf7,0xfd, +0xfe,0x83,0xfd,0x09,0xf6,0xf5,0xfd,0xfe,0xfd,0xfd,0xfe,0xf7,0x00,0x83,0xfd,0x02, +0xf7,0x00,0x86,0xfd,0x0c,0xf5,0x2b,0xfd,0xfd,0xfb,0x00,0xfb,0xfd,0xfd,0xfb,0x00, +0xfb,0x85,0xfd,0x02,0xf5,0x2b,0x83,0xfd,0x02,0xf6,0xf5,0x83,0xfd,0x0b,0xfe,0xfd, +0xfd,0xfe,0xfd,0xfa,0x00,0x2b,0xfd,0xf7,0x00,0x83,0xfd,0x09,0x00,0xf7,0xfe,0xfd, +0x00,0xf9,0xfe,0x2b,0xf5,0x84,0xfd,0x02,0x00,0xf9,0x87,0xfd,0x08,0x00,0x2b,0xf6, +0xf7,0xfd,0xfd,0xf6,0xf5,0x83,0xfd,0x0d,0xf5,0xf6,0xfd,0xf9,0x00,0xfd,0xfd,0xac, +0x00,0xf9,0xfd,0xfa,0x00,0x87,0xfd,0x03,0xfb,0x00,0xfb,0x84,0xfd,0x02,0xf6,0xf5, +0x83,0xfd,0x05,0xf5,0xf6,0xfd,0xf9,0x00,0x83,0xfd,0x12,0x00,0xf9,0xfd,0xfd,0xf5, +0x2b,0xfd,0xfd,0x00,0xfa,0xfd,0xfd,0xfb,0x00,0xfb,0xfd,0x00,0xf9,0xe2,0xfd,0xfd, +0xf5,0x2b,0x0c,0xfd,0xfd,0xf8,0x00,0xfe,0xfd,0xfd,0xfe,0xfd,0xfe,0xf5,0x2b,0x8b, +0xfd,0x02,0xfa,0x00,0x84,0xfd,0x11,0xf7,0x00,0xfd,0xfb,0x00,0xfb,0xfd,0xfd,0xfb, +0x00,0xfb,0xfd,0xf5,0x2b,0xfd,0xf7,0x00,0x83,0xfd,0x02,0x2b,0x00,0x86,0xfd,0x02, +0x00,0x56,0x83,0xfd,0x05,0x00,0xf9,0xfd,0x00,0xf8,0x83,0xfd,0x13,0x00,0xf8,0xfd, +0xf5,0xf6,0x00,0x81,0xfb,0x00,0x2b,0xf5,0xfd,0xfd,0xf7,0x00,0xfd,0xfd,0xf9,0x00, +0x86,0xfd,0x02,0xf9,0x00,0x83,0xfd,0x02,0xf9,0x00,0x83,0xfd,0x08,0x00,0xf9,0xfd, +0xf6,0xf5,0xfd,0xfd,0xfe,0x87,0xfd,0x02,0x00,0xf9,0x84,0xfd,0x15,0xfb,0x00,0xfb, +0xfd,0xfd,0xfb,0x00,0xfb,0xfd,0x00,0xf7,0x00,0xfe,0xf8,0x00,0xf6,0xf7,0xfd,0xfb, +0x00,0x81,0x85,0xfd,0x02,0xf7,0x00,0x99,0x06,0xfd,0x03,0xf5,0x2b,0xfe,0x83,0xfd, +0x0e,0x00,0xf6,0xac,0xfc,0xf6,0x00,0xfe,0xfd,0xf5,0xf5,0xfe,0xf7,0x00,0xf5,0x86, +0xfd,0x1e,0x2b,0xf5,0xfd,0xfd,0xf9,0x00,0xfd,0xfd,0xf7,0x00,0xf9,0xfd,0xac,0xf9, +0xfd,0x81,0x00,0xf7,0xfd,0xfd,0x56,0xfd,0xfb,0x00,0x56,0xfd,0xf8,0x00,0xf5,0xfe, +0x85,0xfd,0x0c,0xf5,0xf5,0xfe,0xfd,0xfd,0x00,0xf6,0xac,0xfc,0xf6,0x00,0xfe,0x85, +0xfd,0x02,0xf5,0x2b,0x83,0xfd,0x1c,0x81,0x00,0xf7,0xfd,0xfd,0xf8,0xfd,0xfb,0xf9, +0xfd,0xfb,0x00,0x56,0xfd,0xf7,0x00,0xac,0xfd,0xfd,0x00,0xf7,0xfd,0xf8,0x00,0x56, +0xfd,0x2b,0xf5,0x84,0xfd,0x02,0x00,0xf7,0x87,0xfd,0x06,0xf8,0x00,0x00,0xac,0xfd, +0xfd,0xe2,0xf9,0x00,0xfa,0xfd,0x0a,0xfa,0x00,0x81,0xfb,0x00,0x00,0xf9,0xfd,0xf9, +0x00,0x88,0xfd,0x10,0x00,0x00,0xfb,0xfd,0x56,0xfd,0xfa,0x00,0xf9,0xfd,0xf9,0x00, +0xfa,0xfd,0xf9,0x00,0x83,0xfd,0x26,0x00,0xfa,0xfd,0xfd,0xf5,0x2b,0xfe,0xfd,0x00, +0xf5,0xfb,0xac,0xf5,0xf5,0xfd,0xfd,0x00,0xf7,0xac,0xf5,0x00,0x2b,0xfe,0xfd,0xf5, +0xf5,0xfe,0xfd,0xac,0x00,0xf5,0xac,0xfd,0xfa,0xfb,0xfd,0x00,0xf7,0x83,0xfd,0x03, +0xf9,0x2b,0xfe,0x85,0xfd,0x02,0xf9,0x00,0x84,0xfd,0x17,0x2b,0xf5,0xfe,0xfd,0x00, +0xf6,0xac,0xfc,0xf6,0x00,0xfe,0xfd,0x00,0xf7,0xfd,0xfb,0x00,0x56,0xfd,0xf8,0x00, +0xf5,0xfe,0x85,0xfd,0x20,0xf6,0xf5,0xac,0xfb,0xf5,0x00,0xfa,0xfd,0x2b,0x00,0xfb, +0xfd,0xf7,0x00,0xac,0xfd,0xf8,0x00,0x00,0xfd,0xfd,0x00,0x00,0xf8,0xfd,0xfd,0x2b, +0xf5,0xfd,0xfd,0xf9,0x00,0x86,0xfd,0x07,0x81,0x00,0x81,0xfd,0xfd,0xf9,0x00,0x83, +0xfd,0x09,0x00,0xf9,0xfe,0xfa,0x00,0xf7,0xfd,0xfd,0x56,0x86,0xfd,0x02,0x00,0xfa, +0x85,0xfd,0x1c,0x00,0xf6,0xac,0xfc,0xf6,0x00,0xfe,0xfd,0xf5,0x00,0xf6,0xfd,0xfb, +0x00,0x00,0xfc,0xfd,0xfd,0xf5,0x00,0xfb,0xfd,0xfb,0xfa,0xfd,0x2b,0xf5,0xfe,0x98, +0x06,0xfd,0x02,0xf5,0x2b,0x84,0xfd,0x0e,0xac,0xf5,0x00,0x00,0xf5,0xac,0xfd,0xfd, +0x81,0x00,0x00,0xf6,0x2b,0xf5,0x86,0xfd,0x0a,0x2b,0xf5,0xfd,0xfd,0xf9,0x00,0xfd, +0xfd,0xfe,0xf7,0x83,0x00,0x04,0x2b,0xfd,0xfd,0xfa,0x83,0x00,0x09,0xf5,0xfd,0xfe, +0xf8,0x00,0x00,0xf5,0x2b,0x00,0x86,0xfd,0x0b,0x81,0x00,0x00,0xf9,0xfd,0xac,0xf5, +0x00,0x00,0xf5,0xac,0x86,0xfd,0x02,0xf5,0x2b,0x84,0xfd,0x01,0xfa,0x83,0x00,0x03, +0xf5,0xfd,0xfb,0x83,0x00,0x11,0xf6,0xfd,0xfd,0xac,0xf5,0x00,0x2b,0xfd,0xf8,0x00, +0x00,0x2b,0xf6,0x00,0xfa,0xf7,0x00,0x84,0xfd,0x03,0xf8,0x00,0x00,0x86,0xfd,0x03, +0xac,0x00,0xf5,0x83,0xfd,0x02,0xfe,0xf8,0x83,0x00,0x0d,0xf8,0xfd,0xfd,0xfe,0xf6, +0x00,0x00,0x81,0x00,0xf9,0xfe,0xf9,0x00,0x88,0xfd,0x08,0xac,0xf6,0x00,0x00,0xf5, +0xfd,0xfd,0xf8,0x83,0x00,0x05,0xf8,0xfd,0xfd,0xf9,0x00,0x83,0xfd,0x0a,0x00,0xf9, +0xfd,0xfd,0xf5,0x2b,0xfd,0xfd,0x00,0xf6,0x83,0x00,0x12,0xac,0xfd,0xfd,0xf8,0x00, +0x00,0xf8,0xf5,0x2b,0xfd,0xfd,0x81,0x00,0x00,0xf9,0xfd,0xfb,0xf5,0x83,0x00,0x09, +0xfb,0xfd,0xf5,0x2b,0xfd,0xfd,0xfe,0x2b,0x00,0x86,0xfd,0x02,0xf9,0x00,0x83,0xfd, +0x17,0xfe,0x2b,0x00,0xfd,0xfd,0xac,0xf5,0x00,0x00,0xf5,0xac,0xfd,0xfd,0xf5,0x2b, +0xfd,0xfd,0xf8,0x00,0x00,0xf6,0x2b,0x00,0x86,0xfd,0x01,0xac,0x83,0x00,0x06,0x56, +0x00,0xf9,0xfd,0xfe,0x2b,0x83,0x00,0x13,0x81,0xfd,0xfd,0xfb,0x00,0x2b,0xfd,0xfd, +0xf6,0x00,0xfb,0xfd,0xfd,0xf7,0x00,0xfd,0xfd,0xfa,0x00,0x87,0xfd,0x06,0xf6,0x00, +0x00,0xfd,0xfa,0x00,0x83,0xfd,0x05,0x00,0xf9,0xfd,0xfd,0xfa,0x83,0x00,0x01,0xf5, +0x86,0xfd,0x02,0x00,0xf9,0x85,0xfd,0x0f,0xac,0xf5,0x00,0x00,0xf5,0xac,0xfd,0xfd, +0x56,0x00,0x56,0xfd,0xfe,0x00,0x00,0x83,0xfd,0x02,0xac,0x2b,0x83,0x00,0x04,0x56, +0xfd,0xf7,0x00,0x90,0x07,0xfd,0x01,0xfe,0x87,0xfd,0xc2,0xfe,0xfd,0xfd,0x85,0xfd, +0xe2,0xfe,0xfd,0xfd,0xfd,0x90,0x01,0xfd,0xa3,0xfe,0xfd,0x86,0xfd,0x01,0xfe,0x84, +0xfd,0x01,0xfe,0x8b,0xfd,0x01,0xfe,0x87,0xfd,0x01,0xfe,0x83,0xfd,0xc2,0xfe,0xfd, +0xfd,0x88,0xfd,0x03,0xfe,0xfd,0xfd,0xa2,0xfe,0xfd,0x84,0xfd,0x01,0xfe,0x88,0xfd, +0x02,0x00,0xfa,0x87,0xfd,0x01,0xfe,0x96,0x01,0xfd,0x01,0xfe,0x85,0xfd,0xc2,0xfe, +0xfd,0xfd,0x02,0xfd,0xfe,0x8c,0xfd,0x05,0x00,0xfa,0xfd,0xfd,0xfe,0x86,0xfd,0x01, +0xfe,0x85,0xfd,0x01,0xfe,0x84,0xfd,0xc2,0xfe,0xfd,0xfd,0x94,0x01,0xfd,0x01,0xfe, +0x8a,0xfd,0x01,0xfe,0x88,0xfd,0x01,0xfe,0x87,0xfd,0xc2,0xfe,0xfd,0xfd,0x85,0xfd, +0x01,0xfe,0x84,0xfd,0x01,0xfe,0x84,0xfd,0x01,0xfe,0x84,0xfd,0x01,0xfe,0x8b,0xfd, +0x01,0xfe,0x89,0xfd,0x01,0xfe,0x95,0x01,0xfd,0x01,0xfe,0x8c,0xfd,0x01,0xfe,0x86, +0xfd,0x01,0xfe,0x84,0xfd,0x01,0xfe,0x94,0x0e,0xfd,0x03,0xfe,0xf8,0x00,0x99,0x03, +0xfd,0x02,0x00,0xf9,0x9f,0x18,0xfd,0x02,0x56,0xfa,0x99,0x03,0xfd,0x02,0x2b,0xfb, +0x9a,0x1c,0xfd,0x01,0xfe,0x9e,0x2d,0xfd,0x02,0x81,0xfb,0x95,0x04,0xfd,0x02,0xfa, +0xfb,0x91,0x04,0xfd,0x07,0xfb,0xf9,0xfc,0xfd,0xfd,0x81,0xf9,0x8e,0xfd,0x02,0xfb, +0xfa,0x95,0x04,0xfd,0x02,0xf9,0xfc,0x9b,0x03,0xfd,0x02,0xfa,0xfb,0x97,0x0a,0xfd, +0x03,0x00,0x2b,0xfe,0x90,0x04,0xfd,0x06,0xf8,0xf6,0xfd,0xfd,0xf5,0x2b,0x86,0xfd, +0x02,0x00,0x81,0x98,0x03,0xfd,0x0a,0x2b,0x00,0xf5,0xf5,0xfd,0x00,0x00,0xf5,0x2b, +0xfe,0x8c,0xfd,0x02,0x2b,0xf5,0x95,0x04,0xfd,0x02,0x00,0xf9,0x91,0x01,0xfd,0x06, +0xf6,0xf5,0x00,0xf5,0xf6,0xac,0x94,0x02,0xfd,0x02,0xf5,0x2b,0x92,0x04,0xfd,0x03, +0xfa,0x00,0xfe,0x92,0x06,0xfd,0x02,0xf5,0x2b,0x8e,0xfd,0x02,0xfb,0xfa,0x83,0xfd, +0x02,0xfa,0xfb,0x97,0x02,0xfd,0x02,0xfb,0xfa,0x83,0xfd,0x06,0xf9,0xf8,0xfd,0xfd, +0xf5,0x2b,0x85,0xfd,0x06,0xfe,0x2b,0xfb,0xfd,0xfc,0xf9,0x87,0xfd,0x02,0xfc,0xf9, +0x9c,0x02,0xfd,0x07,0x00,0x56,0xfd,0xfd,0x81,0x00,0xac,0x8a,0xfd,0x02,0xfb,0xfa, +0x83,0xfd,0x02,0xf7,0x00,0x90,0x04,0xfd,0x02,0xf9,0xac,0x83,0xfd,0x02,0x00,0xf9, +0x90,0x01,0xfd,0x07,0xfe,0xf5,0xf5,0xfa,0xf8,0x00,0x00,0x94,0x02,0xfd,0x02,0xf5, +0x2b,0x8e,0xfd,0x02,0xfb,0xfa,0x83,0xfd,0x02,0xfa,0xfb,0x9d,0x02,0xfd,0x02,0xfc, +0x2b,0x93,0x06,0xfd,0x02,0x00,0xf7,0x8e,0xfd,0x02,0x2b,0xf5,0x83,0xfd,0x02,0x00, +0x2b,0x97,0x02,0xfd,0x02,0x2b,0xf5,0x84,0xfd,0x05,0xfe,0xfd,0xfd,0x00,0xf7,0x89, +0xfd,0x02,0xf9,0x00,0x87,0xfd,0x02,0xf9,0x00,0x9c,0x02,0xfd,0x06,0x00,0xf9,0xfe, +0xfd,0xf9,0x00,0x8b,0xfd,0x02,0x2b,0xf5,0x83,0xfd,0x03,0x2b,0xf5,0xfe,0x9f,0x03, +0xfd,0x02,0x00,0xf9,0x83,0xfd,0x02,0x00,0xfa,0x91,0x01,0xfd,0x06,0x00,0xf7,0xfd, +0xfe,0x56,0x00,0x94,0x02,0xfd,0x02,0x00,0xf7,0x8e,0xfd,0x02,0x2b,0xf5,0x83,0xfd, +0x02,0xf5,0x2b,0x92,0x09,0xfd,0x10,0xf5,0x2b,0xf5,0x00,0x00,0xf7,0xfd,0xfe,0x2b, +0x00,0xfd,0xfd,0xf9,0x00,0xfd,0xf9,0x83,0x00,0x02,0x2b,0x2b,0x83,0x00,0x10,0xf9, +0xfd,0xac,0xf5,0x00,0x00,0xf5,0xfc,0xfd,0xfe,0x00,0x2b,0xf6,0x00,0x00,0x81,0x85, +0xfd,0x11,0xfe,0x2b,0x00,0xfd,0xfd,0xf9,0x00,0xfd,0xfd,0x2b,0x00,0xf8,0x00,0x00, +0xf8,0xfd,0xf9,0x83,0x00,0x08,0x2b,0xfe,0x2b,0x00,0xfd,0xfd,0xf5,0x2b,0x86,0xfd, +0x03,0x00,0xf9,0xfd,0x84,0x00,0x85,0xfd,0x84,0x00,0x02,0xfd,0x2b,0xe2,0x00,0xfd, +0xfd,0xf9,0x07,0x00,0xf8,0x00,0x2b,0xfd,0x00,0x56,0x83,0x00,0x04,0xfc,0xfd,0xfd, +0x2b,0x83,0x00,0x87,0xfd,0x01,0xf8,0x83,0x00,0x03,0xf8,0xfd,0xfd,0x84,0x00,0x01, +0xf9,0x84,0x00,0x88,0xfd,0x01,0xf9,0x83,0x00,0x08,0x2b,0xfe,0x2b,0xf5,0xf7,0x00, +0x00,0xf8,0x83,0xfd,0x0c,0xf6,0x00,0x00,0xf6,0xfd,0xfd,0xf9,0x00,0x81,0x00,0x00, +0xf6,0x87,0xfd,0x02,0x00,0xf7,0x83,0x00,0x17,0xfa,0xfd,0xfd,0x00,0xf7,0x00,0x00, +0xfd,0xfd,0xf6,0x00,0x00,0xf6,0xfd,0xfd,0xac,0xf5,0x00,0x00,0xf5,0xfd,0xfd,0x56, +0x83,0x00,0x01,0xfa,0x84,0xfd,0x01,0xfe,0x84,0x00,0x04,0xfd,0xfd,0x00,0x56,0x83, +0x00,0x04,0xfc,0xfd,0xfd,0x81,0x83,0x00,0x01,0xfa,0x86,0xfd,0x09,0xf5,0x2b,0xfd, +0xfd,0xf8,0x00,0xfd,0xfd,0xf6,0x83,0x00,0x19,0xfa,0xfd,0xf6,0xf5,0xfd,0xfd,0xf6, +0x00,0xfb,0xfd,0xfa,0x00,0xfd,0xfd,0xf6,0x00,0x00,0xf6,0xfd,0xfd,0xf9,0x00,0xf8, +0x00,0x2b,0x85,0xfd,0x10,0xf5,0x2b,0xf5,0x00,0x00,0xf8,0xfd,0xfe,0x2b,0x00,0xfd, +0xfd,0xf9,0x00,0xfd,0xf9,0x83,0x00,0x02,0x2b,0x2b,0x83,0x00,0x10,0xf9,0xfd,0xac, +0xf5,0x00,0x00,0xf5,0xac,0xfd,0xfd,0xf5,0x2b,0xf6,0x00,0x00,0xfa,0x86,0xfd,0x01, +0x2b,0x83,0x00,0x01,0xf8,0x83,0xfd,0x08,0xf6,0x00,0x00,0xf6,0x00,0xfd,0xfd,0x56, +0x83,0x00,0x0d,0xf6,0xfd,0xfd,0xf9,0x00,0xfd,0xfe,0x2b,0x00,0xf8,0x00,0x00,0xf8, +0x9b,0x05,0xfd,0x17,0x00,0x00,0x56,0xfd,0x56,0x00,0xfb,0xfd,0x2b,0xf5,0xfd,0xfd, +0xf9,0x00,0xfd,0xfe,0x2b,0x00,0xfe,0xfd,0xfe,0x00,0xf7,0x83,0xfd,0x0e,0x00,0xf6, +0xac,0xfc,0xf6,0x00,0xfd,0xfd,0xf5,0x00,0xf7,0xfe,0xf5,0xf5,0x86,0xfd,0x12,0x2b, +0xf5,0xfd,0xfd,0xf9,0x00,0xfd,0xfd,0x2b,0x00,0xf6,0xac,0xf7,0x00,0xfd,0xfd,0x2b, +0xf5,0x83,0xfd,0x06,0x2b,0xf5,0xfd,0xfd,0xf5,0x2b,0x86,0xfd,0x06,0x00,0xf9,0xfe, +0xf9,0x00,0xfe,0x86,0xfd,0x07,0xf9,0x00,0xfe,0xfd,0xfd,0x2b,0xf5,0xe2,0xfd,0xfd, +0xf9,0x00,0x0d,0xf6,0xac,0xfd,0xfd,0x00,0x00,0xfa,0xac,0x00,0xf7,0xfd,0x81,0x00, +0xa2,0xfb,0xfd,0x85,0xfd,0x10,0xf9,0x00,0xf9,0xfe,0xf9,0x00,0xf9,0xfd,0xfd,0x00, +0xf9,0xfe,0xfd,0xf9,0x00,0xfe,0x89,0xfd,0x1c,0xfe,0x2b,0x00,0xfe,0xfd,0xfd,0x2b, +0x00,0xf6,0xac,0xf7,0x00,0xfd,0xfd,0x2b,0x00,0xac,0xac,0x00,0xf8,0xfd,0xf9,0x00, +0x00,0xfc,0xfa,0x00,0x81,0x86,0xfd,0x20,0x00,0xf5,0xfa,0xfd,0xf6,0x00,0xfe,0xfd, +0x00,0xf5,0xfb,0xfe,0xfd,0x2b,0x00,0xac,0xac,0x00,0xf8,0xfd,0xf7,0x00,0xac,0xfd, +0xfb,0xfd,0xfd,0x00,0xf8,0xfd,0xac,0xac,0x86,0xfd,0x12,0x00,0xf9,0xfe,0xfd,0xfd, +0x00,0x00,0xfa,0xac,0x00,0xf7,0xfd,0xac,0x00,0xf8,0xfe,0xf7,0x00,0x86,0xfd,0x24, +0x00,0xf7,0xfd,0xf9,0x00,0x2b,0xfe,0x2b,0x00,0xfb,0xfe,0xf7,0x00,0xac,0xf8,0x00, +0xfd,0xfd,0x00,0x00,0xf8,0xfd,0xf7,0xf6,0xfe,0x2b,0x00,0xac,0xac,0x00,0xf8,0xfd, +0xf9,0x00,0xf6,0xac,0x86,0xfd,0x17,0x00,0x00,0x56,0xfd,0x56,0x00,0xfb,0xfd,0x2b, +0xf5,0xfd,0xfd,0xf9,0x00,0xfe,0xfd,0x2b,0xf5,0xfd,0xfe,0xfd,0x00,0xf7,0x83,0xfd, +0x0f,0x00,0xf6,0xac,0xfc,0xf6,0x00,0xfe,0xfd,0x00,0x00,0xf8,0xfd,0xf5,0xf5,0xfe, +0x85,0xfd,0x20,0xf9,0xfb,0xfd,0xf7,0x00,0xfd,0xfd,0x2b,0x00,0xfb,0xac,0xf5,0x00, +0xfd,0xfd,0x81,0x81,0xfd,0xfa,0x00,0x81,0xfd,0xf9,0x00,0xfd,0xfd,0x2b,0x00,0xf6, +0xac,0xf7,0x00,0x9a,0x05,0xfd,0x13,0xfe,0xf5,0x2b,0xfd,0xfd,0xfe,0x00,0xf7,0xfd, +0x2b,0xf5,0xfd,0xfd,0xfa,0x00,0xfd,0xfd,0x2b,0xf5,0x83,0xfd,0x13,0xf5,0x2b,0xfd, +0xfd,0xfb,0x00,0xfb,0xfd,0xfd,0xfb,0x00,0x81,0xfd,0xf5,0xf6,0xfe,0xfd,0x2b,0x00, +0x86,0xfd,0x12,0x2b,0xf5,0xfd,0xfd,0xfa,0x00,0xfd,0xfd,0xf7,0x00,0xfd,0xfd,0xf9, +0x00,0xfd,0xfd,0xf7,0x00,0x83,0xfd,0x06,0xf7,0x00,0xfd,0xfd,0xf5,0x2b,0x86,0xfd, +0x05,0x00,0xf9,0xfd,0xf9,0x00,0x87,0xfd,0x07,0xfa,0x00,0xfd,0xfd,0xfe,0x2b,0xf5, +0xe2,0xfd,0xfd,0xfa,0x00,0x01,0xac,0x83,0xfd,0x0b,0x00,0x56,0xfd,0xfd,0xf5,0x2b, +0xfd,0xfb,0x00,0x2b,0xfb,0x86,0xfd,0x03,0xfe,0xf5,0xf6,0x83,0xfd,0x0a,0xf6,0xf5, +0xfd,0xfd,0x00,0xf9,0xfd,0xfd,0xf9,0x00,0x8b,0xfd,0x02,0x2b,0xf5,0x83,0xfd,0x16, +0xf7,0x00,0xfd,0xfd,0xf9,0x00,0xfd,0xfd,0x00,0xf7,0xf9,0xf9,0xf6,0xf5,0xfe,0xf9, +0x00,0xac,0xfd,0xfe,0x00,0xf9,0x86,0xfd,0x0a,0x00,0xf9,0xfd,0xfd,0xac,0x00,0xfa, +0xfd,0x00,0x56,0x83,0xfd,0x11,0x00,0xf7,0xf9,0xf9,0xf6,0xf5,0xfd,0xf8,0x00,0xf7, +0xac,0xfd,0xfd,0xfe,0x00,0xf5,0xfa,0x88,0xfd,0x02,0x00,0xf9,0x83,0xfd,0x09,0x00, +0x56,0xfd,0xfd,0xf5,0x2b,0xfd,0xf8,0x00,0x83,0xf9,0x02,0x00,0x81,0x85,0xfd,0x01, +0xf5,0x83,0x00,0x05,0x2b,0xfe,0xfd,0x00,0xf8,0x83,0xfd,0x17,0x00,0xf7,0xfb,0x00, +0xfb,0x81,0x00,0xf6,0xf5,0xfe,0xf5,0x56,0xfd,0x00,0x2b,0xf9,0xf9,0xf6,0x00,0xfe, +0xf9,0x00,0xac,0x86,0xfd,0x13,0xfe,0xf5,0x2b,0xfd,0xfd,0xfe,0x00,0xf7,0xfd,0xf7, +0x00,0xfd,0xfe,0xf9,0x00,0xfd,0xfd,0xf7,0x00,0x83,0xfd,0x13,0xf5,0x2b,0xfd,0xfd, +0xfb,0x00,0xfb,0xfd,0xfd,0xfb,0x00,0xfa,0xfd,0xf5,0xf6,0xfd,0xfd,0xf7,0x00,0x88, +0xfd,0x0b,0xfe,0xf9,0x00,0xfd,0xfe,0x00,0xf8,0xfd,0xfd,0xfa,0x00,0x85,0xfd,0x0e, +0xfe,0x00,0xf9,0xfd,0xf9,0x00,0xfd,0xfd,0xf7,0x00,0xfd,0xfd,0xf9,0x00,0x9b,0x05, +0xfd,0x02,0x00,0xf7,0x83,0xfd,0x0d,0x00,0xf7,0xfd,0x2b,0xf5,0xfd,0xfd,0xf9,0x00, +0xfd,0xfd,0x2b,0xf5,0x83,0xfd,0x06,0x00,0xf7,0xfd,0xfd,0xf9,0x00,0x84,0xfd,0x0a, +0x00,0xf9,0xfd,0x00,0xf7,0xfd,0xfd,0x2b,0xf5,0xfe,0x85,0xfd,0x1b,0x2b,0xf5,0xfd, +0xfd,0xf9,0x00,0xfd,0xfd,0x2b,0xf5,0xfd,0xfd,0xf9,0x00,0xfd,0xfd,0x2b,0xf5,0xfe, +0xfd,0xfd,0x2b,0xf5,0xfd,0xfd,0x00,0xf7,0x86,0xfd,0x05,0x00,0xfa,0xfd,0xf9,0x00, +0x87,0xfd,0x02,0xf9,0x00,0x83,0xfd,0x02,0x2b,0xf5,0xe2,0xfd,0xfd,0xf9,0x00,0x84, +0xfd,0x0c,0x00,0xf9,0xfe,0xfd,0x00,0xf7,0xfd,0xfd,0xf9,0xf5,0x00,0xf6,0x86,0xfd, +0x02,0xf5,0x2b,0x83,0xfd,0x0a,0x2b,0xf5,0xfd,0xfd,0x00,0xfa,0xfd,0xfd,0xf9,0x00, +0x8b,0xfd,0x02,0x2b,0xf5,0x83,0xfd,0x16,0x2b,0xf5,0xfe,0xfd,0xf9,0x00,0xfd,0xfd, +0x00,0x00,0xf5,0x00,0xf5,0xf6,0xfc,0xf9,0x00,0xfc,0xfb,0xfb,0x00,0x56,0x85,0xfb, +0x21,0xfc,0x00,0x56,0xfb,0xfc,0xfc,0x00,0x56,0xfc,0x00,0xf9,0xac,0xac,0xfd,0x00, +0x00,0xf5,0x00,0xf5,0xf6,0xfe,0xfd,0xf8,0x00,0x00,0xf8,0xfd,0xfd,0xfb,0xf6,0x00, +0x00,0xac,0x86,0xfd,0x02,0x00,0xf9,0x83,0xfd,0x09,0x00,0xfa,0xfd,0xfd,0x00,0xf7, +0xfd,0x2b,0x00,0x83,0xf5,0x02,0x00,0x81,0x85,0xfd,0x03,0xf5,0x2b,0xfe,0x84,0xfd, +0x02,0x00,0xfa,0x83,0xfd,0x0f,0x00,0x2b,0xfe,0x00,0xf8,0xf6,0x2b,0xf8,0x00,0xac, +0x00,0xfe,0xfd,0x00,0x00,0x83,0xf5,0x04,0x2b,0xfd,0xf9,0x00,0x88,0xfd,0x02,0x00, +0xf7,0x83,0xfd,0x0d,0x00,0x2b,0xfd,0x2b,0xf5,0xfd,0xfd,0xf9,0x00,0xfd,0xfd,0x2b, +0xf5,0x83,0xfd,0x06,0xf5,0x2b,0xfd,0xfd,0xf9,0x00,0x84,0xfd,0x09,0x00,0xf9,0xfe, +0x00,0xf7,0xfd,0xfd,0x2b,0xf5,0x86,0xfd,0x01,0xf9,0x84,0x00,0x0b,0xfd,0xfd,0x00, +0xf9,0xfe,0xfd,0xf9,0x00,0xfd,0xfd,0xfb,0x84,0x00,0x0c,0xfa,0xfd,0xf9,0x00,0xfd, +0xfd,0x2b,0xf5,0xfe,0xfd,0xf9,0x00,0x9b,0x05,0xfd,0x02,0xf5,0x2b,0x83,0xfd,0x0d, +0x00,0xf8,0xfd,0xf7,0x00,0xfd,0xfd,0x56,0x00,0xfd,0xfd,0xf7,0x00,0x83,0xfd,0x13, +0xf5,0x2b,0xfd,0xfd,0xfb,0x00,0xfb,0xfd,0xfd,0xfb,0x00,0xfb,0xfd,0xf5,0x2b,0xfd, +0xfd,0xf7,0x00,0x86,0xfd,0x1b,0xf7,0x00,0xfd,0xfd,0x56,0x00,0xfd,0xfe,0x2b,0xf5, +0xfd,0xfd,0xfa,0x00,0xfd,0xfd,0xf7,0x00,0xfd,0xfd,0xfe,0x2b,0x00,0xfd,0xfe,0xf5, +0x2b,0x86,0xfd,0x05,0x00,0xf9,0xfd,0xf9,0x00,0x87,0xfd,0x02,0xf9,0x00,0x83,0xfd, +0x0a,0xf7,0x00,0xfd,0xfd,0x56,0x00,0xfd,0xfd,0xf9,0x00,0x84,0xfd,0x06,0x00,0xf9, +0xfd,0xfd,0xf5,0x2b,0x83,0xfd,0x04,0xfe,0xf9,0x00,0xf7,0x85,0xfd,0x02,0xf5,0xf6, +0x83,0xfd,0x0a,0xf6,0xf6,0xfd,0xfd,0x00,0xf9,0xfd,0xfd,0xf9,0x00,0x8b,0xfd,0x0f, +0xf7,0x00,0xfd,0xfd,0xfe,0x2b,0x00,0xfd,0xfd,0xf9,0x00,0xac,0xac,0x00,0x2b,0xa2, +0xfc,0xfb,0x0a,0xfc,0xf8,0x00,0xfb,0xfb,0xfc,0x00,0x56,0xfb,0xfb,0xa2,0xfc,0xfb, +0x0a,0x00,0xf8,0xfc,0xfb,0xf9,0x00,0xfa,0xfc,0x00,0x56,0x83,0xfb,0x04,0x00,0x2b, +0xac,0xac,0x85,0xfd,0x04,0xfe,0xf7,0x00,0xfa,0x83,0xfd,0x03,0xfb,0xf5,0x00,0x86, +0xfd,0x02,0x00,0xf9,0x83,0xfd,0x09,0x00,0xf9,0xfd,0xfd,0xf5,0x2b,0xfe,0xf8,0x00, +0x83,0xfd,0x01,0xfe,0x85,0xfd,0x03,0xfe,0x00,0x2b,0x85,0xfd,0x15,0x00,0xf7,0xfd, +0xfd,0xfe,0x00,0xf8,0xfd,0xf6,0xf6,0x00,0x81,0xfb,0x00,0x2b,0xf5,0xfd,0xfd,0x00, +0xf7,0xfe,0x84,0xfd,0x02,0xf9,0x00,0x88,0xfd,0x02,0xf5,0x2b,0x83,0xfd,0x0d,0x00, +0xf8,0xfe,0x2b,0xf5,0xfd,0xfd,0x56,0x00,0xfd,0xfd,0x2b,0xf5,0x83,0xfd,0x13,0xf5, +0x2b,0xfd,0xfd,0xfb,0x00,0xfb,0xfd,0xfd,0xfb,0x00,0xfb,0xfd,0xf5,0x2b,0xfd,0xfd, +0x2b,0xf5,0x85,0xfd,0x21,0xfb,0x00,0x81,0xfe,0xfa,0x00,0xfd,0xfd,0x00,0xf8,0xfd, +0xfd,0x56,0x00,0xfd,0xfd,0x00,0xf7,0xfd,0xfe,0x00,0xf9,0xfd,0xf9,0x00,0xfd,0xfd, +0xf7,0x00,0xfd,0xfd,0xf9,0x00,0x9b,0x05,0xfd,0x28,0xf5,0x00,0xf9,0xfe,0x2b,0x00, +0xac,0xfd,0xf7,0x00,0xac,0xf9,0x00,0x00,0xfd,0xfd,0xf7,0x00,0xac,0xfd,0xfd,0xf5, +0xf5,0xfe,0xfd,0xfd,0x00,0xf6,0xac,0xfc,0xf6,0x00,0xfe,0xfd,0x00,0x2b,0xfe,0xfd, +0x2b,0xf5,0x86,0xfd,0x1b,0xf7,0x00,0xac,0xf9,0x00,0x00,0xfd,0xfd,0x2b,0x00,0xfe, +0xfd,0xf9,0x00,0xfd,0xfd,0xf7,0x00,0xac,0xfd,0xfd,0x2b,0xf5,0xfd,0xfd,0x00,0xf7, +0x86,0xfd,0x06,0x00,0xf9,0xfe,0xfa,0x00,0x81,0x86,0xfd,0x0f,0x81,0x00,0xfa,0xfd, +0xfd,0xf7,0x00,0xac,0xf9,0x00,0x00,0xfd,0xfd,0xf9,0x00,0x84,0xfd,0x0d,0x00,0xf9, +0xfd,0xfd,0xf5,0x2b,0xfd,0xfb,0xf9,0xfd,0xfb,0x00,0xf8,0x85,0xfd,0x0f,0xfa,0x00, +0xf9,0xfd,0xf9,0x00,0xf9,0xfe,0xfd,0x00,0xf9,0xfd,0xfd,0xfa,0x00,0x83,0xfd,0x02, +0xf8,0xf8,0x86,0xfd,0x16,0xf7,0x00,0xac,0xfd,0xfd,0x2b,0xf5,0xac,0xfc,0x56,0x00, +0xfb,0xfb,0x2b,0x00,0x56,0xfb,0x81,0x56,0xfb,0x56,0x00,0x83,0xfb,0x04,0x00,0x56, +0xfb,0xfc,0x84,0xfb,0x20,0x00,0xf5,0xfa,0x81,0xf5,0xf5,0xfb,0xfb,0x00,0xf8,0xfc, +0xfb,0xfc,0x2b,0x00,0xf8,0xfc,0x81,0xf8,0xfc,0xf9,0xfa,0xfd,0xf9,0x00,0xfb,0xfd, +0x56,0xac,0xfc,0xf5,0xf6,0x86,0xfd,0x13,0x00,0xf7,0xfe,0xfd,0xfd,0x00,0xf9,0xfd, +0xfd,0xf5,0x2b,0xfd,0xac,0x00,0xf6,0xfc,0xfd,0xfa,0xfb,0x85,0xfd,0x02,0xf5,0x2b, +0x85,0xfd,0x1b,0x2b,0xf5,0xfb,0xfd,0x2b,0x00,0xac,0xfd,0xf8,0x00,0x00,0xfe,0xfd, +0x00,0x00,0xf8,0xfd,0xfd,0xf7,0x00,0xf9,0xfd,0xac,0xf9,0xfd,0xfa,0x00,0x88,0xfd, +0x03,0x00,0xf5,0xf9,0xe2,0xfd,0xf7,0x00,0xac,0x1d,0xf9,0x00,0x00,0xfd,0xfd,0xf7, +0x00,0xac,0xfd,0xfd,0xf5,0xf5,0xfe,0xfd,0xfd,0x00,0xf6,0xac,0xfc,0xf6,0x00,0xfe, +0xfd,0x00,0xf7,0xfd,0xfd,0x2b,0xf5,0x85,0xfd,0x26,0xfa,0x00,0xfa,0xfc,0xf6,0x00, +0xac,0xfd,0x2b,0x00,0x56,0xf8,0x00,0x00,0xfe,0xfd,0x00,0xf7,0xfe,0xf7,0x00,0x56, +0xfe,0xf9,0x00,0xfd,0xfd,0x2b,0xf5,0xfd,0xfd,0xf9,0x00,0xfd,0xfd,0xfb,0x2b,0xfb, +0x95,0x05,0xfd,0x29,0xfe,0x00,0xf6,0xf5,0x00,0x00,0x81,0xfd,0xfd,0xac,0xf5,0x00, +0xf5,0x56,0x00,0xfd,0xfd,0xac,0xf5,0x00,0x2b,0xfd,0x81,0x00,0x00,0xf9,0xfd,0xac, +0xf5,0x00,0x00,0xf5,0xac,0xfd,0xfd,0xf5,0x2b,0xfd,0xfd,0x2b,0x00,0x86,0xfd,0x1b, +0xac,0xf5,0x00,0xf5,0x56,0x00,0xfd,0xfd,0x2b,0xf5,0xfd,0xfd,0xf9,0x00,0xfd,0xfd, +0xac,0xf5,0x00,0x2b,0xfd,0x2b,0xf5,0xfd,0xfd,0xf5,0x2b,0x86,0xfd,0x07,0x00,0xf9, +0xfd,0xfe,0xf6,0x00,0x00,0x86,0xfd,0x0e,0x2b,0x00,0x00,0xfd,0xac,0xf5,0x00,0xf5, +0x56,0x00,0xfd,0xfd,0xfa,0x00,0x84,0xfd,0x08,0x00,0xfa,0xfd,0xfd,0xf5,0x2b,0xfd, +0xfc,0x83,0x00,0x02,0xf6,0xfe,0x86,0xfd,0x01,0xf8,0x83,0x00,0x01,0xf8,0x83,0xfd, +0x06,0x00,0xf9,0xfd,0xfd,0xf9,0x00,0x83,0xfd,0x02,0xf5,0x00,0x86,0xfd,0x0f,0xac, +0xf5,0x00,0xf6,0xfc,0x2b,0x00,0xfb,0xfb,0xf8,0x00,0xfc,0xfb,0xfc,0x2b,0x83,0x00, +0x09,0xf6,0xfb,0x56,0x00,0xfb,0xfc,0xfb,0x00,0x56,0x85,0xfb,0x03,0xfc,0x00,0xf6, +0x83,0x00,0x05,0x81,0xfc,0xfb,0x00,0x56,0x83,0xfb,0x02,0xfc,0x2b,0x83,0x00,0x03, +0xf6,0xfc,0xf8,0x83,0x00,0x08,0xf7,0xfd,0xfd,0xf5,0x00,0x00,0xf5,0xac,0x86,0xfd, +0x0f,0xf8,0x00,0x00,0xfd,0xfd,0x00,0xf9,0xfd,0xfd,0xf5,0x2b,0xfd,0xfd,0xfb,0xf5, +0x83,0x00,0x01,0xfb,0x85,0xfd,0x02,0xf5,0x2b,0x85,0xfd,0x02,0xfe,0xf6,0x83,0x00, +0x0f,0x81,0xfd,0xfd,0xfb,0x00,0xf6,0xfd,0xfd,0xf6,0x00,0xfb,0xfd,0xfd,0xfe,0xf7, +0x83,0x00,0x04,0x2b,0xfd,0xf9,0x00,0x87,0xfd,0x01,0xfe,0x83,0xf5,0x25,0x00,0x00, +0x81,0xfd,0xfd,0xac,0xf5,0x00,0xf5,0x56,0x00,0xfd,0xfd,0xac,0xf5,0x00,0x2b,0xfd, +0x81,0x00,0x00,0xf9,0xfd,0xac,0xf5,0x00,0x00,0xf5,0xac,0xfd,0xfd,0xf5,0x2b,0xfd, +0xfd,0xf7,0x00,0x85,0xfd,0x1c,0xfe,0xf6,0x00,0x00,0xf9,0x00,0x00,0xfd,0xfe,0x2b, +0xf5,0xf6,0x56,0x00,0xfd,0xfd,0xf8,0x00,0x00,0x2b,0x2b,0x00,0xfa,0xf9,0x00,0xfd, +0xfe,0x2b,0xe2,0x00,0xfd,0xfe,0xf9,0x02,0x00,0xf9,0x96,0x05,0xfd,0xe2,0xfe,0xfd, +0xfd,0xfd,0x84,0xfd,0x01,0xfe,0x89,0xfd,0x01,0xfe,0x8b,0xfd,0xe2,0xfe,0xfd,0xfd, +0xfd,0x88,0xfd,0xe2,0xfe,0xfd,0xfd,0xfd,0x89,0xfd,0x01,0xfe,0x91,0x01,0xfd,0x01, +0xfe,0x87,0xfd,0x01,0xfe,0x86,0xfd,0x01,0xfe,0x91,0x01,0xfd,0x01,0xfe,0x8a,0xfd, +0xc2,0xfe,0xfd,0xfd,0x83,0xfd,0x01,0xfe,0x87,0xfd,0x03,0xf9,0xf5,0xfe,0x84,0xfd, +0x03,0xac,0xfc,0xfc,0xc2,0xfb,0xfc,0xfb,0xa2,0xfc,0xfb,0x02,0xfb,0xfb,0xa3,0xfc, +0xfb,0xc3,0xfb,0xfb,0xfc,0x09,0xfb,0xfc,0xfb,0xfb,0x00,0x56,0xfb,0xfc,0xfc,0x83, +0xfb,0xc2,0xfc,0xfb,0xfb,0xa3,0xfc,0xfb,0xa2,0xfb,0xfc,0x05,0xfb,0xfb,0xfc,0xac, +0xac,0x8a,0xfd,0x01,0xfe,0x84,0xfd,0x01,0xfe,0x87,0xfd,0xc2,0xfe,0xfd,0xfd,0x85, +0xfd,0x01,0xfe,0x86,0xfd,0xc2,0xfe,0xfd,0xfd,0x03,0xfd,0xfd,0xfe,0x8d,0xfd,0x01, +0xfe,0x8c,0xfd,0x01,0xfe,0x8a,0xfd,0x01,0xfe,0x89,0xfd,0x01,0xfe,0x84,0xfd,0x01, +0xfe,0x8b,0xfd,0x01,0xfe,0x86,0xfd,0xe2,0xfe,0xfd,0xfd,0xfd,0x83,0xfd,0x02,0x2b, +0x00,0xe2,0xfd,0xfd,0xfd,0xfe,0x85,0xfd,0x01,0xfe,0x89,0xfd,0x01,0xfe,0x98,0x0e, +0xfd,0x07,0x2b,0x56,0xfd,0xfd,0xac,0xfc,0xfc,0x83,0xfb,0x01,0xfc,0x84,0xfb,0x01, +0xfc,0x84,0xfb,0xc2,0xfc,0xfb,0xfb,0x83,0xfb,0x01,0xfc,0x85,0xfb,0x01,0xfc,0x83, +0xfb,0x05,0xfc,0xfb,0x00,0xf8,0xfc,0x87,0xfb,0x01,0xfc,0x84,0xfb,0x01,0xfc,0x84, +0xfb,0xe2,0xfc,0xfb,0xfb,0xfb,0x04,0xfb,0xfc,0xac,0xac,0x9d,0x07,0xfd,0x06,0x2b, +0xf5,0x2b,0xf6,0x00,0x56,0x92,0x10,0xfd,0x01,0xac,0xe2,0xfc,0xfb,0xfb,0xfb,0x02, +0xfb,0xfc,0x84,0xfb,0x01,0xfc,0x87,0xfb,0xa2,0xfc,0xfb,0x03,0xfb,0xfb,0xfc,0x85, +0xfb,0x01,0xfc,0x83,0xfb,0x04,0xf6,0xfa,0xfb,0xfb,0xa2,0xfc,0xfb,0x84,0xfb,0x01, +0xfc,0x84,0xfb,0x01,0xfc,0x84,0xfb,0xe2,0xfc,0xfb,0xfb,0xfb,0x04,0xfb,0xfb,0xfc, +0xac,0x9b,0x07,0xfd,0x05,0xac,0xf8,0x2b,0x2b,0xfc,0x91,0x10,0xfd,0x02,0xfc,0xfc, +0x83,0xfb,0xc3,0xfc,0xfb,0xfb,0xe2,0xfb,0xfc,0xfb,0xfb,0xa2,0xfc,0xfb,0xe2,0xfb, +0xfb,0xfb,0xfc,0x02,0xfb,0xfc,0x84,0xfb,0xa3,0xfc,0xfb,0x83,0xfb,0xa2,0xfc,0xfb, +0xc2,0xfb,0xfc,0xfb,0x83,0xfb,0x01,0xfc,0x86,0xfb,0xc2,0xfc,0xfb,0xfb,0x03,0xfb, +0xfc,0xac,0x9b,0x07,0xfd,0x01,0xfe,0x9f,0x0f,0xfd,0x02,0xac,0xfc,0x84,0xfb,0x01, +0xfc,0x85,0xfb,0x01,0xfc,0x83,0xfb,0xc2,0xfc,0xfb,0xfb,0x02,0xfb,0xfc,0x85,0xfb, +0xe2,0xfc,0xfb,0xfb,0xfb,0x03,0xfb,0xfb,0xfc,0x84,0xfb,0x01,0xfc,0x84,0xfb,0x01, +0xfc,0x85,0xfb,0x01,0xfc,0x85,0xfb,0xa2,0xfc,0xfb,0x02,0xfb,0xfb,0xa2,0xfc,0xfb, +0x83,0xfb,0x03,0xfc,0xfb,0xfb,0xa2,0xfc,0xfb,0x02,0xfc,0xac,0x97,0x17,0xfd,0x01, +0xac,0xa2,0xfc,0xfb,0x03,0xfb,0xfb,0xfc,0x84,0xfb,0x01,0xfc,0x84,0xfb,0x01,0xfc, +0x85,0xfb,0x01,0xfc,0x85,0xfb,0x01,0xfc,0x83,0xfb,0xc3,0xfc,0xfb,0xfb,0xa2,0xfb, +0xfc,0x84,0xfb,0xc3,0xfc,0xfb,0xfb,0x03,0xfb,0xfb,0xfc,0x86,0xfb,0x01,0xfc,0x84, +0xfb,0x01,0xfc,0x84,0xfb,0x01,0xfc,0x83,0xfb,0x05,0xfc,0xfb,0xfb,0xfc,0xac,0x93, +0x17,0xfd,0x02,0xac,0xfc,0x84,0xfb,0x01,0xfc,0x84,0xfb,0xe2,0xfc,0xfb,0xfb,0xfb, +0xa2,0xfb,0xfc,0x83,0xfb,0xa2,0xfc,0xfb,0x03,0xfb,0xfb,0xfc,0x86,0xfb,0x01,0xfc, +0x84,0xfb,0x01,0xfc,0x84,0xfb,0x01,0xfc,0x86,0xfb,0xe2,0xfc,0xfb,0xfb,0xfb,0xa2, +0xfc,0xfb,0x84,0xfb,0xe2,0xfc,0xfb,0xfb,0xfb,0xe2,0xfb,0xfb,0xfc,0xfb,0x03,0xfb, +0xfc,0xac,0x9f,0x16,0xfd,0x02,0xac,0xfc,0x84,0xfb,0x01,0xfc,0x83,0xfb,0xa2,0xfc, +0xfb,0x02,0xfb,0xfc,0x84,0xfb,0x01,0xfc,0x85,0xfb,0x01,0xfc,0x85,0xfb,0x01,0xfc, +0x83,0xfb,0xc2,0xfc,0xfb,0xfb,0x83,0xfb,0x01,0xfc,0x84,0xfb,0xc3,0xfc,0xfb,0xfb, +0xe2,0xfb,0xfb,0xfc,0xfb,0x84,0xfb,0xa2,0xfc,0xfb,0x02,0xfb,0xfc,0x84,0xfb,0xa2, +0xfc,0xfb,0x85,0xfb,0x05,0xfc,0xfb,0xfb,0xfc,0xac,0x9c,0x16,0xfd,0x02,0xfc,0xfc, +0xa2,0xfb,0xfc,0x83,0xfb,0x01,0xfc,0x85,0xfb,0x03,0xfc,0xfb,0xfb,0xa2,0xfc,0xfb, +0x02,0xfb,0xfc,0x84,0xfb,0xe2,0xfc,0xfb,0xfb,0xfb,0xc2,0xfb,0xfb,0xfc,0x02,0xfb, +0xfc,0x84,0xfb,0x01,0xfc,0x84,0xfb,0xc3,0xfc,0xfb,0xfb,0xe2,0xfb,0xfb,0xfc,0xfb, +0x01,0xfc,0x87,0xfb,0xa2,0xfc,0xfb,0x84,0xfb,0xa2,0xfc,0xfb,0x02,0xfb,0xfb,0xa2, +0xfc,0xfb,0x02,0xac,0xac,0x98,0x16,0xfd,0x01,0xac,0xc2,0xfc,0xfb,0xfb,0xe2,0xfb, +0xfb,0xfb,0xfc,0x84,0xfb,0x01,0xfc,0x84,0xfb,0xe2,0xfc,0xfb,0xfb,0xfb,0x02,0xfb, +0xfc,0x83,0xfb,0xa2,0xfc,0xfb,0x86,0xfb,0xa2,0xfc,0xfb,0xe2,0xfb,0xfb,0xfc,0xfb, +0x83,0xfb,0xc2,0xfc,0xfb,0xfb,0x02,0xfb,0xfc,0x85,0xfb,0xc2,0xfc,0xfb,0xfb,0xe2, +0xfb,0xfb,0xfc,0xfb,0x83,0xfb,0x01,0xfc,0x86,0xfb,0x02,0xfc,0xac,0x95,0x16,0xfd, +0x02,0xac,0xfc,0x86,0xfb,0xc2,0xfc,0xfb,0xfb,0xc2,0xfb,0xfc,0xfb,0x83,0xfb,0x01, +0xfc,0x84,0xfb,0x03,0xfc,0xfb,0xfb,0xa2,0xfc,0xfb,0x03,0xfb,0xfb,0xfc,0x85,0xfb, +0xc2,0xfc,0xfb,0xfb,0x84,0xfb,0x01,0xfc,0x87,0xfb,0xe3,0xfc,0xfb,0xfb,0xfb,0xc3, +0xfc,0xfb,0xfb,0xe3,0xfb,0xfc,0xfb,0xfb,0xa2,0xfb,0xfc,0x85,0xfb,0x02,0xfc,0xac, +0x92,0x16,0xfd,0x02,0xac,0xfc,0x83,0xfb,0xa2,0xfc,0xfb,0xe4,0xfb,0xfc,0xfb,0xfb, +0xc2,0xfb,0xfb,0xfc,0x85,0xfb,0x01,0xfc,0x84,0xfb,0xa2,0xfc,0xfb,0xe2,0xfb,0xfc, +0xfb,0xfb,0x01,0xfc,0x83,0xfb,0xa3,0xfc,0xfb,0x02,0xfb,0xfc,0x84,0xfb,0x01,0xfc, +0x86,0xfb,0x01,0xfc,0x86,0xfb,0x01,0xfc,0x86,0xfb,0xe2,0xfc,0xfb,0xfb,0xfb,0x02, +0xfb,0xfb,0xa2,0xfc,0xfb,0x03,0xfb,0xfc,0xac,0x9f,0x15,0xfd,0x01,0xac,0xe2,0xfc, +0xfb,0xfb,0xfb,0x02,0xfb,0xfc,0x86,0xfb,0x01,0xfc,0x84,0xfb,0xe2,0xfc,0xfb,0xfb, +0xfb,0x84,0xfb,0x01,0xfc,0x84,0xfb,0x01,0xfc,0x8a,0xfb,0x01,0xfc,0x83,0xfb,0xc2, +0xfc,0xfb,0xfb,0x87,0xfb,0x01,0xfc,0x84,0xfb,0xa2,0xfc,0xfb,0x84,0xfb,0xc2,0xfc, +0xfb,0xfb,0xa2,0xfb,0xfc,0xe2,0xfb,0xfb,0xfb,0xfc,0x87,0xfb,0x01,0xfc,0x83,0xfb, +0x02,0xfc,0xac,0x9c,0x15,0xfd,0x01,0xac,0xe3,0xfc,0xfb,0xfb,0xfb,0xa2,0xfb,0xfc, +0x85,0xfb,0x01,0xfc,0x83,0xfb,0xc2,0xfc,0xfb,0xfb,0xa2,0xfc,0xfb,0x03,0xfb,0xfb, +0xfc,0x84,0xfb,0xa2,0xfc,0xfb,0xa2,0xfb,0xfc,0x83,0xfb,0x01,0xfc,0x85,0xfb,0xc3, +0xfc,0xfb,0xfb,0xa2,0xfb,0xfc,0x85,0xfb,0xa2,0xfc,0xfb,0xe2,0xfb,0xfc,0xfb,0xfb, +0xe3,0xfb,0xfb,0xfc,0xfb,0xa2,0xfc,0xfb,0x85,0xfb,0x02,0xfc,0xac,0x9a,0x15,0xfd, +0xc2,0xfc,0xfb,0xfb,0xe2,0xfb,0xfb,0xfc,0xfb,0x84,0xfb,0xa2,0xfc,0xfb,0x83,0xfb, +0x01,0xfc,0x85,0xfb,0x01,0xfc,0x85,0xfb,0x03,0xfc,0xfb,0xfb,0xa2,0xfc,0xfb,0x83, +0xfb,0x01,0xfc,0x85,0xfb,0xe2,0xfc,0xfb,0xfb,0xfb,0xc2,0xfb,0xfc,0xfb,0x03,0xfb, +0xfb,0xfc,0x85,0xfb,0xa2,0xfc,0xfb,0x84,0xfb,0xc2,0xfc,0xfb,0xfb,0x02,0xfb,0xfc, +0x85,0xfb,0x01,0xfc,0x89,0xfb,0xa3,0xfc,0xfb,0x04,0xfb,0xfb,0xac,0xac,0x96,0x15, +0xfd,0x03,0xac,0xfc,0xfc,0x83,0xfb,0xc2,0xfc,0xfb,0xfb,0x83,0xfb,0xc2,0xfc,0xfb, +0xfb,0xc2,0xfb,0xfb,0xfc,0x83,0xfb,0xa2,0xfc,0xfb,0x83,0xfb,0xc2,0xfc,0xfb,0xfb, +0x84,0xfb,0x01,0xfc,0x83,0xfb,0xc2,0xfc,0xfb,0xfb,0xe2,0xfb,0xfc,0xfb,0xfb,0x84, +0xfb,0x01,0xfc,0x83,0xfb,0xc2,0xfc,0xfb,0xfb,0xe2,0xfb,0xfc,0xfb,0xfb,0xe2,0xfb, +0xfb,0xfb,0xfc,0x03,0xfb,0xfb,0xfc,0x84,0xfb,0xa2,0xfc,0xfb,0x02,0xfb,0xfc,0x85, +0xfb,0x01,0xfc,0x83,0xfb,0x02,0xfc,0xac,0x94,0x15,0xfd,0x01,0xac,0xe2,0xfc,0xfb, +0xfb,0xfb,0x02,0xfb,0xfb,0xa2,0xfc,0xfb,0xe3,0xfb,0xfb,0xfb,0xfc,0x87,0xfb,0xa2, +0xfc,0xfb,0x85,0xfb,0xc2,0xfc,0xfb,0xfb,0xe2,0xfb,0xfc,0xfb,0xfb,0x01,0xfc,0x83, +0xfb,0x03,0xfc,0xfb,0xfb,0xa2,0xfc,0xfb,0xe5,0xfb,0xfb,0xfb,0xfc,0x84,0xfb,0xe3, +0xfc,0xfb,0xfb,0xfb,0xe2,0xfb,0xfb,0xfc,0xfb,0x83,0xfb,0x06,0xfc,0xfb,0xfb,0xfc, +0xfc,0xac,0x92,0x15,0xfd,0x01,0xac,0xe2,0xfc,0xfb,0xfb,0xfb,0x02,0xfb,0xfc,0x84, +0xfb,0xc2,0xfc,0xfb,0xfb,0xe2,0xfb,0xfc,0xfb,0xfb,0x02,0xfb,0xfb,0xa2,0xfc,0xfb, +0x85,0xfb,0xa2,0xfc,0xfb,0x02,0xfb,0xfc,0x84,0xfb,0xe2,0xfc,0xfb,0xfb,0xfb,0x85, +0xfb,0x01,0xfc,0x86,0xfb,0x01,0xfc,0x86,0xfb,0x01,0xfc,0x85,0xfb,0xe4,0xfc,0xfb, +0xfb,0xfb,0x83,0xfb,0xa3,0xfc,0xfb,0xe3,0xfb,0xfb,0xfc,0xfb,0x03,0xfb,0xfb,0xac, +0x90,0x15,0xfd,0x01,0xfc,0x87,0xfb,0xa2,0xfc,0xfb,0x03,0xfb,0xfb,0xfc,0x86,0xfb, +0x01,0xfc,0x87,0xfb,0x01,0xfc,0x85,0xfb,0xa2,0xfc,0xfb,0x85,0xfb,0xe2,0xfc,0xfb, +0xfb,0xfb,0xe2,0xfb,0xfc,0xfb,0xfb,0xa2,0xfc,0xfb,0xc3,0xfb,0xfb,0xfc,0x02,0xfb, +0xfc,0x84,0xfb,0xc2,0xfc,0xfb,0xfb,0xe3,0xfb,0xfc,0xfb,0xfb,0xa2,0xfb,0xfc,0x88, +0xfb,0xe2,0xfc,0xfb,0xfb,0xfb,0x02,0xfb,0xfc,0x85,0xfb,0x01,0xac,0x9e,0x14,0xfd, +0x01,0xfc,0x83,0xfb,0xa3,0xfc,0xfb,0x84,0xfb,0x01,0xfc,0x84,0xfb,0xa2,0xfc,0xfb, +0x02,0xfb,0xfb,0xa3,0xfc,0xfb,0xc2,0xfb,0xfc,0xfb,0x84,0xfb,0xa2,0xfc,0xfb,0x85, +0xfb,0xc2,0xfc,0xfb,0xfb,0x84,0xfb,0x02,0xfc,0xfc,0x84,0xfd,0x01,0xac,0xc2,0xfb, +0xfc,0xfb,0x85,0xfb,0xe2,0xfc,0xfb,0xfb,0xfb,0x84,0xfb,0xe3,0xfc,0xfb,0xfb,0xfb, +0x03,0xfc,0xfb,0xfb,0xa3,0xfc,0xfb,0x84,0xfb,0xc2,0xfc,0xfb,0xfb,0xc2,0xfb,0xfb, +0xfc,0x01,0xac,0x96,0x0e,0xfd,0x02,0xfa,0xfb,0x85,0xfd,0x02,0xf9,0xac,0x93,0x03, +0xfd,0x06,0xf9,0xac,0xfd,0xfc,0xf9,0xac,0x93,0x01,0xfd,0x02,0xfb,0xfa,0x8e,0xfd, +0xa2,0xfc,0xfb,0x87,0xfb,0xc3,0xfc,0xfb,0xfb,0x84,0xfb,0x01,0xfc,0x87,0xfb,0xc2, +0xfc,0xfb,0xfb,0xa2,0xfc,0xfb,0x83,0xfb,0xa3,0xfc,0xfb,0xc2,0xfb,0xfb,0xfc,0x04, +0x81,0x56,0xfa,0xfb,0x88,0xfd,0x84,0xfb,0xa3,0xfc,0xfb,0xe2,0xfb,0xfb,0xfc,0xfb, +0x01,0xfc,0x85,0xfb,0xe2,0xfc,0xfb,0xfb,0xfb,0x04,0xfc,0xf9,0x56,0xfc,0x86,0xfb, +0xa2,0xfc,0xfb,0x04,0xfb,0xfb,0xfa,0xf9,0xe2,0xfb,0xfb,0xfc,0xfb,0x03,0xfb,0xac, +0xac,0x99,0x02,0xfd,0x02,0xfa,0xfb,0x91,0x01,0xfd,0x02,0xfc,0xf9,0x85,0xfd,0x02, +0xfb,0xfa,0x9c,0x08,0xfd,0x05,0x2b,0x00,0xf5,0x00,0xf7,0x8f,0xfd,0x02,0xf5,0x2b, +0x83,0xfd,0x03,0xfe,0xf6,0xf5,0x94,0x03,0xfd,0x06,0x00,0xf9,0xfd,0xf9,0x00,0xf9, +0x92,0x01,0xfd,0x07,0xfe,0x2b,0x00,0xfd,0xfe,0xf6,0xf8,0x89,0xfd,0x01,0xfc,0x84, +0xfb,0xc4,0xfc,0xfb,0xfb,0x84,0xfb,0xc2,0xfc,0xfb,0xfb,0xc2,0xfb,0xfc,0xfb,0x03, +0xfb,0xfb,0xfc,0x86,0xfb,0xa2,0xf5,0x00,0x02,0x00,0xf9,0x88,0xfb,0x02,0xfc,0xf8, +0x83,0x00,0x8a,0xfd,0x02,0xfb,0xfc,0x88,0xfb,0x01,0xfc,0x85,0xfb,0x01,0xfc,0x83, +0xfb,0xa2,0xfc,0xfb,0x84,0xfb,0x01,0xfc,0x83,0xfb,0x02,0x2b,0x00,0xc2,0xfb,0xfb, +0xfc,0x86,0xfb,0x03,0xfc,0xf6,0x00,0xe2,0xfb,0xfc,0xfb,0xfb,0x03,0xfb,0xfb,0xfc, +0x99,0x02,0xfd,0x02,0xf5,0x2b,0x91,0x01,0xfd,0x02,0xf9,0x00,0x84,0xfd,0x03,0xfb, +0x00,0x81,0x9c,0x08,0xfd,0x06,0x00,0xf6,0xfa,0xf8,0x00,0x2b,0x8e,0xfd,0x02,0x00, +0xf7,0x83,0xfd,0x03,0xfa,0x2b,0xfe,0x94,0x03,0xfd,0x06,0x00,0xf9,0xfe,0xfc,0x00, +0xf9,0x93,0x01,0xfd,0x06,0x2b,0xf5,0xfd,0xfd,0xf8,0xf9,0x87,0xfd,0x01,0xac,0xa2, +0xfc,0xfb,0x83,0xfb,0x04,0xfc,0xfb,0x56,0xfa,0x86,0xfb,0xc3,0xfc,0xfb,0xfb,0xe2, +0xfb,0xfc,0xfb,0xfb,0x84,0xfb,0x07,0xfc,0xfb,0xfb,0xfc,0xfb,0x00,0x2b,0x83,0x56, +0x01,0x81,0xa3,0xfb,0xfc,0x83,0xfb,0x04,0xf5,0xf6,0xfc,0xfc,0x8a,0xfd,0xc3,0xfc, +0xfb,0xfb,0x83,0xfb,0xa2,0xfc,0xfb,0x03,0xfb,0xfb,0xfc,0x84,0xfb,0xa2,0xfc,0xfb, +0x83,0xfb,0x05,0xfc,0xfb,0xf6,0xf5,0xfc,0x83,0xfb,0x03,0xfc,0xfb,0xfb,0xa2,0xfc, +0xfb,0x04,0xfb,0xfb,0x2b,0x00,0xc2,0xfc,0xfb,0xfb,0x02,0xfb,0xfc,0x83,0xfb,0x01, +0xfc,0x8a,0xfd,0x02,0xfc,0xf9,0x9c,0x01,0xfd,0x02,0x00,0xf7,0x91,0x01,0xfd,0x02, +0xf9,0x00,0x84,0xfd,0x02,0xf6,0x81,0x9d,0x08,0xfd,0x06,0xf5,0x2b,0xfd,0xfe,0x2b, +0x00,0x8e,0xfd,0x02,0xf5,0x2b,0x9a,0x03,0xfd,0x06,0x00,0xf9,0xfd,0xac,0x00,0xfe, +0x93,0x01,0xfd,0x02,0x2b,0xf5,0x8b,0xfd,0x01,0xfc,0x84,0xfb,0xa2,0xfc,0xfb,0x06, +0xfb,0xf5,0xf6,0xfc,0xfb,0xfb,0xa2,0xfc,0xfb,0x02,0xfb,0xfc,0x84,0xfb,0x01,0xfc, +0x85,0xfb,0x01,0xfc,0x83,0xfb,0xa2,0xfc,0xfb,0x07,0xfb,0xfb,0xfc,0xfb,0xfb,0x00, +0x56,0xc2,0xfb,0xfc,0xfb,0x85,0xfb,0x05,0xfc,0xfb,0xf5,0xf6,0xfb,0x8c,0xfd,0xe2, +0xfb,0xfb,0xfb,0xfc,0xc2,0xfb,0xfc,0xfb,0x86,0xfb,0x01,0xfc,0x85,0xfb,0x01,0xfc, +0x83,0xfb,0x03,0xfc,0x2b,0x00,0x83,0xfb,0x01,0xfc,0x86,0xfb,0x05,0xfc,0xfb,0xfc, +0xf6,0xf5,0x86,0xfb,0x01,0xfc,0x83,0xfb,0x04,0xfc,0xfb,0xfc,0xac,0x88,0xfd,0x02, +0xf9,0x00,0x9c,0x01,0xfd,0x02,0xf5,0x2b,0x91,0x01,0xfd,0x02,0xfa,0x00,0x93,0x09, +0xfd,0x06,0xf5,0x2b,0xfd,0xfd,0xf6,0xf6,0x83,0xfd,0x04,0xf6,0x00,0x00,0xf6,0x83, +0xfd,0x09,0xf8,0x00,0x00,0xf5,0xf5,0x2b,0xfd,0xfd,0xfa,0x83,0x00,0x10,0xfa,0xfd, +0xfd,0x00,0xf6,0xf6,0x00,0x00,0x81,0xf8,0x00,0x00,0xf8,0xfd,0xfd,0x2b,0x83,0x00, +0x03,0xf8,0xfd,0xfd,0xa2,0x2b,0x00,0x08,0xf9,0xf9,0x00,0xf8,0x00,0x2b,0xfd,0xfa, +0x83,0x00,0x03,0x81,0xfd,0xfd,0x86,0x00,0x01,0xfe,0x85,0xfd,0x05,0x00,0xf9,0xfd, +0xfc,0xac,0x83,0xfd,0x01,0xf8,0x83,0x00,0x0a,0xf8,0xfd,0xfd,0xf9,0x00,0xf8,0x00, +0x2b,0xfd,0xfa,0x83,0x00,0x0a,0xf6,0x00,0xfe,0xfd,0x00,0x2b,0xfe,0xfd,0x00,0x56, +0x83,0x00,0x04,0x81,0xfc,0xfb,0xf5,0x83,0x00,0x03,0x56,0xfc,0xf6,0x83,0x00,0x03, +0x56,0xfb,0x81,0x83,0x00,0x0b,0xf7,0xfb,0xfb,0xf5,0xf6,0xfb,0xfb,0xf6,0xf5,0xfc, +0xfb,0xa2,0xf6,0x00,0x01,0x56,0x83,0xfb,0x01,0xfc,0x84,0xfb,0x03,0xfc,0x00,0x56, +0x85,0xfb,0x02,0x00,0xf8,0x83,0x00,0x02,0x81,0xfb,0x84,0x00,0x03,0xf9,0xfd,0xfa, +0x83,0x00,0x0d,0xf6,0xfd,0xfd,0xf9,0x00,0xfa,0x00,0x00,0xf6,0xfb,0xfc,0xfb,0xf9, +0x83,0x00,0x0a,0xf5,0xfc,0xfb,0xf5,0x00,0x00,0xf6,0xfc,0xfb,0xf6,0x85,0x00,0x0b, +0x56,0xfb,0xfb,0xfc,0xfb,0xfb,0xf6,0xf5,0xfc,0xfb,0x81,0x83,0x00,0x03,0xf7,0xfb, +0xfc,0x84,0xfb,0xa2,0x2b,0x00,0x06,0x00,0xf6,0xfc,0xfb,0xfb,0xf7,0x83,0x00,0x05, +0xf7,0xfb,0xfc,0xf9,0x00,0x83,0xfd,0x03,0x00,0xf9,0xfd,0x84,0x00,0x03,0xfe,0xfd, +0xf6,0x83,0x00,0x09,0x81,0xfd,0xfd,0x2b,0x00,0xf8,0x00,0x00,0xf8,0x87,0xfd,0x09, +0xf8,0x00,0x00,0xf5,0xf5,0x2b,0xfd,0xfd,0xfa,0x83,0x00,0x01,0x81,0x86,0xfd,0x01, +0xfc,0x83,0x00,0x12,0xf6,0x00,0xfe,0xfd,0xfd,0xf6,0x00,0x00,0xf6,0xfd,0xfd,0xf9, +0x00,0x56,0x00,0x00,0xf6,0xac,0x83,0x00,0x03,0xac,0xfd,0xfb,0x84,0x00,0x0e,0xac, +0xfd,0xfd,0x00,0xf7,0x00,0x00,0xfe,0x00,0xf6,0xf5,0x00,0xfd,0xfb,0x84,0x00,0x0c, +0xac,0xfd,0xfd,0xf8,0x00,0x00,0x2b,0x00,0xf9,0xfd,0xfd,0xfa,0x83,0x00,0x01,0x81, +0x96,0x05,0xfd,0x43,0xfe,0x00,0xf6,0xf9,0xf6,0x00,0xfb,0xfd,0xfe,0x2b,0x00,0xac, +0xac,0x00,0xf8,0xfd,0xf9,0x00,0xf9,0xfe,0xf8,0x00,0x2b,0xfd,0xac,0x00,0xf8,0xfe, +0xf7,0x00,0xfd,0xfd,0xf5,0x00,0xf7,0xfe,0xf5,0x00,0xf5,0xac,0xf7,0x00,0xfd,0xfd, +0xf9,0xfb,0xfd,0xf7,0x00,0xfd,0xfe,0x2b,0x00,0xf7,0xfd,0xfe,0xf9,0x00,0xf6,0xac, +0xfd,0xac,0x00,0xf8,0xfe,0xf7,0x00,0x85,0xfd,0x03,0x56,0x00,0xf8,0x86,0xfd,0x02, +0x00,0xf9,0x85,0xfd,0x40,0xf9,0x00,0xf9,0xfe,0xf9,0x00,0xf9,0xfd,0xf9,0x00,0xf6, +0xac,0xfd,0xac,0x00,0xf7,0xfd,0xfa,0x00,0xf5,0xfd,0xfd,0xf5,0x2b,0xfd,0xfd,0x00, +0x00,0xfa,0xfb,0x00,0x2b,0xfb,0xfb,0xf8,0x81,0xfc,0xf5,0xf5,0xfb,0xfc,0x00,0x2b, +0xfb,0xfb,0xfc,0x00,0xf6,0xfb,0xf9,0x00,0xfa,0xfc,0x00,0x2b,0xfb,0xfc,0x2b,0x00, +0xfb,0xfb,0x2b,0x00,0x2b,0xc3,0xfc,0xfb,0xfb,0x49,0xfc,0xfb,0x00,0xf5,0xf6,0x2b, +0xf8,0xfc,0xfb,0x00,0x00,0x56,0xfb,0x00,0x2b,0xfc,0xfc,0x00,0x2b,0xfb,0xfd,0xac, +0x00,0xf7,0xfd,0xfc,0x00,0x2b,0xfd,0xf9,0x00,0x00,0xfa,0xf9,0x00,0xf9,0xfb,0x81, +0x00,0xf5,0x81,0xfc,0xfa,0xfb,0xf6,0x00,0x81,0xfb,0x00,0x2b,0xfb,0xfc,0xfb,0xfb, +0xfc,0x00,0x00,0x81,0xfb,0xfc,0xfb,0xfb,0xfc,0x2b,0x00,0xfb,0xfc,0x00,0xf5,0xfc, +0xf9,0x00,0xfa,0x83,0xfb,0x14,0xfc,0xfb,0x2b,0x00,0xf6,0x81,0xfa,0x00,0x2b,0xfb, +0x56,0x00,0x56,0xfb,0x56,0x00,0xf8,0xfc,0x56,0x00,0x83,0xfd,0x16,0x00,0xf9,0xfe, +0xf9,0x00,0xfd,0xfe,0xfd,0x2b,0x00,0xfb,0xfe,0xf7,0x00,0xfc,0xfe,0x2b,0x00,0xf5, +0xac,0xf7,0x00,0x86,0xfd,0x0e,0xf9,0x00,0xf9,0xfe,0xf8,0x00,0x2b,0xfd,0xac,0x00, +0xf8,0xfd,0xf7,0x00,0x85,0xfd,0x43,0xfe,0x00,0xf6,0xac,0xfc,0xf5,0x00,0xfd,0xfd, +0x2b,0x00,0xac,0xac,0x00,0xf8,0xfd,0xfa,0x00,0x00,0xfb,0x81,0x00,0x00,0xf9,0xac, +0x00,0xf7,0xfd,0xfb,0xfa,0xfd,0xac,0x00,0xf7,0xfd,0xfd,0x00,0x00,0xfb,0xfe,0xfd, +0xf5,0x00,0xf9,0xfd,0xfd,0xfb,0xfa,0xfd,0xac,0x00,0xf7,0xfd,0xf9,0x00,0xf9,0xfd, +0xf7,0x00,0xf9,0xfd,0xac,0x00,0xf8,0xfe,0xf7,0x00,0x97,0x05,0xfd,0xa2,0xf5,0x00, +0x01,0xfb,0x83,0xfd,0x09,0x00,0x2b,0xf9,0xf9,0xf6,0xf5,0xfe,0xf5,0xf6,0x83,0xfd, +0x2f,0xf5,0x2b,0xfd,0xf8,0x00,0xf9,0xf9,0xfa,0x00,0x81,0xfd,0x00,0x2b,0xfd,0xfd, +0x2b,0x00,0xfe,0xfd,0xf9,0x00,0xfd,0xfd,0xfe,0xfd,0xfd,0xf9,0x00,0xfd,0xfd,0x2b, +0x00,0xfe,0xfd,0xfd,0xf9,0x00,0xac,0xfd,0xfd,0xf7,0x00,0xfa,0xf9,0xf9,0x00,0x81, +0x83,0xfd,0x03,0x81,0x00,0xf8,0x87,0xfd,0x02,0x00,0xfa,0x85,0xfd,0x02,0xf5,0xf6, +0x83,0xfd,0x1c,0xf6,0xf5,0xfe,0xf9,0x00,0xac,0xfd,0xfd,0xf7,0x00,0xfe,0xfd,0xfd, +0x2b,0xf5,0xfd,0xfd,0xf5,0x2b,0xfd,0xfd,0x00,0x56,0xfc,0xfc,0x00,0x2b,0xfc,0x84, +0xfb,0x1b,0xf6,0xf5,0xfb,0xfb,0xf5,0xf6,0xfc,0xfb,0xfa,0x00,0xf8,0x56,0xf8,0x00, +0x2b,0xfb,0xf5,0xf6,0xfc,0xfb,0xf6,0xf5,0xfc,0xfb,0xf6,0x00,0xfc,0x87,0xfb,0x01, +0xfc,0x83,0xfb,0x26,0x00,0xf6,0xf6,0x2b,0x56,0xfb,0xfc,0x00,0xf8,0xfb,0xfb,0xf5, +0xf6,0xfb,0xfc,0x00,0x2b,0xfb,0xfd,0xf7,0x00,0xfe,0xfd,0xfd,0xf8,0x00,0xfe,0xf9, +0x00,0xac,0xfb,0xfb,0x00,0xf8,0xfc,0x2b,0x00,0xfc,0x83,0xfb,0x0d,0xfc,0x00,0x2b, +0x56,0xf8,0xf6,0xf5,0xfc,0xfb,0xfb,0xfc,0xf5,0x00,0x87,0xfb,0x0b,0xf6,0xf5,0xfc, +0xf9,0x00,0xf8,0xf8,0x56,0x00,0x2b,0xfc,0x83,0xfb,0x05,0xfc,0xf6,0xf5,0xfc,0xfb, +0xc2,0xfc,0xf5,0xf5,0x08,0xfc,0xfb,0xfb,0xf5,0xf5,0xfb,0x56,0x00,0x83,0xfd,0x05, +0x00,0xf9,0xfd,0xf9,0x00,0x83,0xfd,0x02,0x00,0xf8,0x83,0xfd,0x09,0x00,0xf7,0xfd, +0x2b,0xf5,0xfe,0xfd,0xf9,0x00,0x86,0xfd,0x0f,0xf6,0xf5,0xfe,0xfd,0xfd,0xf5,0x2b, +0xfd,0xf8,0x00,0xfa,0xf9,0xf9,0x00,0x81,0x84,0xfd,0x1b,0xfb,0x00,0xfb,0xfd,0xfd, +0xf9,0x00,0xfd,0xfd,0x00,0x2b,0xf9,0xf9,0xf6,0xf5,0xfd,0xf9,0x00,0xac,0xfd,0xfd, +0x00,0x56,0xfe,0xfd,0x00,0x2b,0x85,0xfd,0x06,0xf5,0x2b,0xfd,0xfd,0x00,0x56,0x83, +0xfd,0x03,0x00,0x2b,0xfe,0x86,0xfd,0x0d,0xf5,0x2b,0xfd,0xf5,0xf6,0xfe,0xfd,0xfd, +0x00,0xf9,0xfd,0xf8,0x00,0x83,0xf9,0x02,0x00,0x81,0x96,0x05,0xfd,0x05,0xf5,0x2b, +0xfe,0xf5,0xf5,0x83,0xfd,0x02,0x00,0x00,0x83,0xf5,0x20,0xf6,0xfd,0xf5,0x2b,0xfd, +0xfd,0xfe,0x00,0x2b,0xfe,0x2b,0x00,0xf5,0xf5,0x00,0xf5,0xfa,0xfd,0xf5,0x2b,0xfd, +0xfd,0x2b,0xf5,0xfd,0xfd,0xf9,0x00,0xfd,0xfd,0xf9,0xf5,0x83,0x00,0x04,0xfd,0xfd, +0x2b,0xf5,0x83,0xfd,0x02,0xf9,0x00,0x83,0xfd,0x01,0xf7,0xa2,0x00,0xf5,0x07,0xf5, +0xfa,0xfd,0xfd,0xfb,0x00,0xf6,0x88,0xfd,0x02,0x00,0xf9,0x85,0xfd,0x0f,0xf5,0x2b, +0xfd,0xfd,0xfe,0x2b,0xf5,0xfd,0xf9,0x00,0xfd,0xfd,0xfe,0x2b,0xf5,0x83,0xfd,0x2d, +0xf7,0x00,0xfd,0xfd,0xf5,0x2b,0xfd,0xfd,0x00,0xf9,0xfb,0xfb,0xf5,0xf6,0xfb,0xfc, +0x2b,0x00,0xf5,0x00,0x00,0xfb,0xfc,0x00,0x2b,0xfb,0xfb,0x56,0x00,0x00,0xf5,0x00, +0xf5,0xf7,0xfc,0x00,0x2b,0xfb,0xfb,0x2b,0x00,0xfb,0xfb,0x2b,0xf5,0x83,0xfb,0xc2, +0xfc,0xfb,0xfb,0x06,0xfb,0xfb,0xfc,0x00,0xf8,0xfc,0x84,0xfb,0x0e,0x00,0x56,0xfb, +0xfc,0x00,0xf6,0xfc,0xfb,0xf5,0xf6,0xfb,0xfd,0x2b,0xf5,0x83,0xfd,0xc2,0xf9,0x00, +0xfd,0x07,0xfb,0xfc,0x00,0x56,0xfb,0x2b,0x00,0x83,0xfb,0x02,0xfc,0xfb,0x83,0x00, +0x09,0xf5,0x00,0xf6,0xfb,0xfb,0xfc,0xf5,0x00,0xfa,0x84,0xfb,0x14,0xfc,0xfb,0xfc, +0x2b,0x00,0xfb,0x56,0x00,0x00,0xf5,0xf5,0x00,0xf7,0xfb,0xfb,0xfc,0xfb,0xfb,0x2b, +0x00,0x83,0xfb,0x15,0x2b,0xf5,0xfb,0xf5,0xf6,0xfb,0xfc,0xfb,0x2b,0xf5,0xfb,0x56, +0x00,0xfc,0xfd,0xfd,0x00,0xf9,0xfd,0xf9,0x00,0x83,0xfd,0x02,0x00,0xf9,0x83,0xfd, +0x09,0xf5,0x2b,0xfd,0xf7,0x00,0xfd,0xfd,0xf9,0x00,0x86,0xfd,0x0b,0xf5,0x2b,0xfd, +0xfd,0xfe,0x00,0x2b,0xfe,0x2b,0x00,0x00,0x83,0xf5,0x01,0xfa,0x84,0xfd,0x02,0xf9, +0x00,0x83,0xfd,0x06,0xf9,0x00,0xfd,0xfe,0x00,0x00,0x83,0xf5,0x04,0xf6,0xfe,0xf9, +0x00,0x83,0xfd,0x09,0x00,0xf9,0xfd,0xfd,0xf5,0x2b,0xfd,0xac,0xf6,0x83,0x00,0x05, +0x2b,0xfd,0xfd,0x00,0xfa,0x83,0xfd,0x02,0xf5,0x2b,0x83,0xfd,0x02,0xac,0xf6,0x83, +0x00,0x04,0x2b,0xfd,0xf5,0x2b,0x83,0xfd,0x04,0x00,0xf9,0xfe,0x2b,0xa2,0x00,0xf5, +0x02,0xf5,0xfa,0x96,0x05,0xfd,0x0a,0x00,0x2b,0xfd,0xac,0x00,0x56,0xfd,0xfd,0x00, +0xf8,0x83,0xfd,0x0d,0xfe,0xfd,0xf5,0xf6,0xfe,0xfd,0xfd,0xf5,0x2b,0xfd,0xf8,0x00, +0xfe,0x83,0xfd,0x17,0xfe,0xfd,0x00,0xf7,0xfd,0xfd,0x2b,0xf5,0xfd,0xfd,0xf9,0x00, +0xfd,0xfb,0x00,0x81,0xfd,0xfa,0x00,0xfd,0xfd,0x2b,0xf5,0x83,0xfd,0x02,0xf9,0x00, +0x83,0xfd,0x04,0xf8,0x00,0xfd,0xfe,0x84,0xfd,0x04,0xac,0x00,0xf6,0xfe,0x88,0xfd, +0x02,0x00,0xf9,0x85,0xfd,0x02,0xf6,0xf6,0x83,0xfd,0x05,0xf5,0xf6,0xfd,0xf9,0x00, +0x83,0xfd,0x02,0xf7,0x00,0x83,0xfd,0x20,0x2b,0xf5,0xfd,0xfd,0x00,0xf7,0xfd,0xfd, +0x00,0xf8,0xfc,0xfb,0x00,0x2b,0xfb,0xf7,0x00,0xfb,0xfb,0x2b,0xf5,0xfc,0xfb,0x00, +0x2b,0xfb,0xfc,0xf9,0x00,0xf9,0xfb,0xfc,0x83,0xfb,0x02,0xf5,0xf6,0xe2,0xfc,0xfb, +0xf6,0xf5,0xc4,0xfb,0xfc,0xfb,0x02,0x00,0x56,0xa2,0xfb,0xfc,0x0f,0xfb,0x00,0xf8, +0xfc,0xfb,0xf5,0x2b,0xfb,0xfb,0x00,0x2b,0xfc,0xfd,0xf8,0x00,0x83,0xfd,0x0e,0xf8, +0x00,0xfd,0xfa,0x00,0xfd,0xfb,0xfb,0x00,0x56,0xfb,0xf7,0x00,0xfc,0x83,0xfb,0x03, +0xfc,0x00,0x2b,0xa2,0xfc,0xfb,0x05,0xfb,0xfc,0x2b,0x00,0xf9,0xa2,0xfc,0xfb,0x84, +0xfb,0x06,0x2b,0xf5,0xfb,0xfa,0x00,0xf9,0xa2,0xfc,0xfb,0x83,0xfb,0x1c,0xfc,0xfb, +0xf6,0xf5,0xfc,0xfb,0xfb,0xf5,0xf5,0xfc,0xf5,0xf6,0xfb,0xfb,0xfc,0xf5,0xf5,0xfc, +0xf8,0x00,0xfc,0xac,0xfc,0x00,0xfa,0xfd,0xf9,0x00,0x83,0xfd,0x02,0x00,0xf8,0x83, +0xfd,0x09,0x00,0xf8,0xfd,0x2b,0xf5,0xfd,0xfd,0xf9,0x00,0x86,0xfd,0x02,0xf5,0x2b, +0x83,0xfd,0x06,0xf5,0x2b,0xfd,0xf8,0x00,0xfe,0x88,0xfd,0x0c,0xfa,0x00,0xac,0xfd, +0xfd,0xf9,0x00,0xfd,0xfd,0x00,0xf7,0xfe,0x84,0xfd,0x02,0xf9,0x00,0x83,0xfd,0x11, +0x00,0xf9,0xfd,0xfd,0xf5,0x2b,0xfe,0xf5,0xf5,0xfe,0xfe,0x00,0xf7,0xfd,0xfd,0x00, +0xf9,0x83,0xfd,0x02,0xf5,0x2b,0x83,0xfd,0x12,0xf5,0xf6,0xfe,0xfd,0xf5,0x2b,0xfe, +0xf5,0xf6,0xfd,0xfd,0xac,0x00,0xf9,0xfd,0xf8,0x00,0xfe,0x99,0x05,0xfd,0x35,0xfe, +0xf5,0x2b,0xfd,0xfd,0xf8,0x00,0xfc,0xfe,0xf7,0x00,0xf9,0xfd,0xac,0xf9,0xfd,0xf8, +0x00,0xfb,0xfc,0x2b,0x00,0x2b,0xfd,0xac,0x00,0xf5,0xfc,0xfe,0xfa,0xfb,0xfd,0xf5, +0x2b,0xfd,0xfd,0xf7,0x00,0xfd,0xfd,0xfa,0x00,0xfd,0x81,0x00,0xfa,0xac,0xf6,0x00, +0xac,0xfd,0xf7,0x00,0x83,0xfd,0x02,0xfa,0x00,0x83,0xfd,0x0a,0xac,0x00,0xf6,0xfc, +0xfd,0xfa,0xfb,0xac,0x00,0x00,0x8a,0xfd,0x02,0x00,0xf9,0x84,0xfd,0x01,0xfe,0xe2, +0xf9,0x00,0xf9,0xfd,0x02,0xfa,0x00,0x83,0xfd,0x38,0xfb,0x00,0x56,0xfe,0xf8,0x00, +0x00,0xfd,0xfe,0xf5,0x2b,0xfd,0xac,0x00,0x56,0xfb,0xfc,0xf5,0xf6,0xfc,0x2b,0x00, +0x81,0xfa,0x00,0x00,0xfb,0xfc,0xf5,0xf5,0xfb,0xfb,0xfc,0xf5,0x00,0xfa,0xfb,0xfa, +0x56,0xfc,0xf5,0xf5,0xfb,0x2b,0x00,0x00,0xfb,0xfb,0x2b,0x00,0xfc,0xfb,0xfb,0xfa, +0xf6,0xfa,0x83,0xfb,0x06,0xfc,0xfb,0xfb,0x00,0xf8,0xfc,0x84,0xfb,0x2d,0x00,0x56, +0xfb,0xfc,0x00,0xf6,0xfb,0xfc,0xf5,0xf6,0xfb,0xac,0xac,0x00,0xf7,0xfd,0xfb,0x00, +0x2b,0xfd,0xf9,0x00,0xfd,0xfb,0xfb,0x00,0xf8,0xfc,0x81,0x00,0xf5,0x81,0x81,0x56, +0xfb,0x2b,0x00,0x56,0xfb,0x81,0x56,0xfb,0xf7,0x00,0xf8,0x84,0xfb,0x0f,0xfc,0xfb, +0xfb,0xfc,0xfb,0x2b,0x00,0xfb,0xfc,0xf5,0x00,0xfa,0xfb,0xfa,0x56,0xa2,0xfc,0xfb, +0x2c,0xfb,0x2b,0x00,0x2b,0xfc,0x56,0x00,0x56,0xfb,0x56,0x00,0xf8,0xfc,0xf8,0x00, +0x56,0xfb,0xf9,0x00,0x56,0xfa,0x00,0x00,0xf9,0xfd,0x81,0x00,0x81,0xfd,0xfd,0xf7, +0x00,0xfb,0xfd,0xf7,0x00,0xfc,0xfe,0x2b,0xf5,0xfd,0xfd,0xfa,0x00,0x86,0xfd,0x0f, +0xf8,0x00,0x81,0xac,0x2b,0x00,0x2b,0xfd,0xac,0x00,0xf5,0xfc,0xfe,0xfa,0xfb,0x84, +0xfd,0x12,0xfe,0x00,0xf6,0xfd,0xf9,0xf5,0x00,0xfd,0xfd,0xf7,0x00,0xf9,0xfd,0xac, +0xf9,0xfd,0xfa,0x00,0x83,0xfd,0x11,0x00,0xfa,0xfd,0xfd,0xf5,0x2b,0xfd,0xf5,0xf5, +0xfd,0xf9,0x00,0xf6,0xfd,0xfd,0x00,0xf9,0x83,0xfd,0x02,0xf5,0x2b,0x83,0xfd,0x16, +0xf5,0xf5,0xfd,0xfa,0x00,0xf6,0xfd,0xf9,0x00,0xf8,0x56,0xf5,0x00,0xf9,0xfd,0xac, +0x00,0xf5,0xac,0xfd,0x81,0xfb,0x96,0x05,0xfd,0x02,0x00,0xf7,0x83,0xfd,0x05,0xf5, +0xf5,0xfd,0xfd,0xf7,0x83,0x00,0x27,0x2b,0xfd,0xfe,0xf6,0x00,0x00,0x2b,0xf5,0x2b, +0xfe,0xfd,0xfb,0xf5,0x00,0x00,0xf5,0xfb,0xfd,0xf5,0x2b,0xfe,0xfd,0x2b,0xf5,0xfd, +0xfd,0xf9,0x00,0xfd,0xfd,0xf6,0x00,0x00,0x56,0x00,0x00,0xfd,0x2b,0xf5,0x83,0xfd, +0x02,0xf9,0x00,0x84,0xfd,0x07,0xfb,0xf5,0x00,0x00,0xf5,0xfb,0xf9,0x86,0x00,0x01, +0xfe,0x85,0xfd,0x02,0x00,0xfa,0x86,0xfd,0x01,0xf8,0x83,0x00,0x05,0xf8,0xfe,0xfd, +0xf9,0x00,0x84,0xfd,0x14,0xf8,0x00,0x00,0xf6,0x2b,0xf5,0xfd,0xfd,0x00,0xf7,0xac, +0xfc,0x00,0xf8,0xfc,0xfb,0x00,0x2b,0xfb,0x81,0x83,0x00,0x0b,0x56,0x00,0x00,0xfb, +0xf9,0x00,0x00,0x56,0xfb,0x81,0xf6,0x83,0x00,0x12,0xf8,0xfb,0xf9,0x00,0x00,0xf6, +0x2b,0xf5,0xfb,0xfc,0xf6,0xf5,0xfb,0xfb,0xfc,0xf8,0x00,0x56,0x85,0xfb,0x01,0xfc, +0x85,0x00,0x04,0xf6,0xfc,0x00,0x56,0xe2,0xfb,0xfb,0xf5,0x2b,0x04,0xfb,0xfd,0xfd, +0xfa,0x83,0x00,0x0d,0x2b,0xfe,0xfd,0xf9,0x00,0xfd,0xfb,0xfc,0x00,0x56,0xfb,0xfb, +0xf9,0x83,0x00,0x04,0xf6,0xfc,0xfb,0x2b,0x83,0x00,0x02,0xf6,0xfc,0x86,0x00,0x03, +0xf8,0xfb,0xfc,0x83,0xfb,0x06,0xf6,0xf5,0xfc,0xfb,0x81,0xf6,0x83,0x00,0x01,0xf8, +0x84,0xfb,0x0b,0xfc,0xf6,0xf5,0xf5,0x00,0x00,0xf7,0xfc,0xfb,0xfb,0xf7,0x83,0x00, +0x12,0xf7,0xfc,0xfb,0xfb,0xf6,0x00,0x00,0xfa,0x00,0xf9,0xfd,0xfd,0xf6,0x00,0x00, +0xfe,0xfd,0xf6,0x83,0x00,0x09,0x81,0xfe,0xac,0x2b,0xf5,0xfd,0xfd,0xf9,0x00,0x87, +0xfd,0x0e,0xf6,0x00,0x00,0x2b,0xf5,0x2b,0xfe,0xfd,0xfb,0xf5,0x00,0x00,0xf5,0xfb, +0x85,0xfd,0x0a,0x81,0x00,0x00,0xf5,0xf7,0x00,0xfd,0xfd,0xfe,0xf7,0x83,0x00,0x04, +0x2b,0xfd,0xf9,0x00,0x83,0xfd,0x16,0x00,0xf9,0xfd,0xfd,0xf5,0x2b,0xfe,0x81,0x00, +0x00,0xf6,0xf8,0x00,0xf7,0xfe,0x00,0xf9,0xfe,0xfd,0xfd,0x00,0xf7,0x83,0xfd,0x12, +0x81,0x00,0x00,0xf5,0x56,0x00,0xf7,0xfe,0x56,0x00,0xf5,0x81,0x00,0xf9,0xfd,0xfd, +0xfb,0xf5,0x83,0x00,0x01,0xfb,0x96,0x05,0xfd,0x01,0xfe,0x89,0xfd,0x01,0xfe,0x88, +0xfd,0x01,0xfe,0x85,0xfd,0x01,0xfe,0x92,0x01,0xfd,0xc3,0xfe,0xfd,0xfd,0x90,0x01, +0xfd,0x01,0xfe,0x95,0x01,0xfd,0x01,0xfe,0x91,0x01,0xfd,0x02,0xfe,0xfd,0xc2,0xfc, +0xfb,0xfb,0xe2,0xfc,0xfb,0xfc,0xfb,0xa3,0xfb,0xfc,0x83,0xfb,0xa3,0xfc,0xfb,0xa2, +0xfb,0xfc,0xc2,0xfb,0xfb,0xfc,0x83,0xfb,0x03,0xfc,0xfb,0xfb,0xa3,0xfc,0xfb,0xc2, +0xfb,0xfb,0xfc,0xe2,0xfb,0xfc,0xfb,0xfb,0x01,0xfc,0x83,0xfb,0x02,0xfd,0xfd,0xa2, +0xfe,0xfd,0x85,0xfd,0x02,0xac,0xfc,0x84,0xfb,0xc2,0xfc,0xfb,0xfc,0x02,0xfb,0xfb, +0xa3,0xfc,0xfb,0xe2,0xfb,0xfc,0xfb,0xfc,0x83,0xfb,0xa2,0xfc,0xfb,0xc2,0xfb,0xfc, +0xfb,0xc2,0xfc,0xfb,0xfb,0xa3,0xfc,0xfb,0x02,0xfb,0xfb,0xa2,0xfc,0xfb,0x02,0xfb, +0xfc,0x83,0xfb,0xa2,0xfc,0xfb,0x01,0xfc,0x83,0xfd,0x01,0xfe,0x84,0xfd,0xc2,0xfe, +0xfd,0xfd,0x02,0xfd,0xfe,0x8c,0xfd,0xc2,0xfe,0xfd,0xfd,0x83,0xfd,0x01,0xfe,0x96, +0x01,0xfd,0x01,0xfe,0x8e,0xfd,0xe2,0xfe,0xfd,0xfd,0xfd,0x84,0xfd,0x01,0xfe,0x87, +0xfd,0x01,0xfe,0x86,0xfd,0x04,0xfe,0xf9,0x00,0xac,0x83,0xfd,0xc2,0xfe,0xfd,0xfd, +0x93,0x0c,0xfd,0xa2,0xfc,0xfb,0x88,0xfb,0xc2,0xfc,0xfb,0xfb,0x83,0xfb,0x01,0xfc, +0x85,0xfb,0x01,0xfc,0x84,0xfb,0x01,0xfc,0x85,0xfb,0xc3,0xfc,0xfb,0xfb,0x85,0xfb, +0xe2,0xfc,0xfb,0xfb,0xfb,0xe2,0xfb,0xfc,0xfb,0xfb,0x02,0xfc,0xfb,0x8c,0xfd,0x03, +0xfb,0xfb,0xfc,0x86,0xfb,0xc2,0xfc,0xfb,0xfb,0xc2,0xfb,0xfc,0xfb,0x87,0xfb,0xc2, +0xfc,0xfb,0xfb,0x03,0xfb,0xfb,0xfc,0x84,0xfb,0x01,0xfc,0x85,0xfb,0xe2,0xfc,0xfb, +0xfb,0xfb,0xe2,0xfb,0xfb,0xfc,0xfb,0x84,0xfb,0x02,0xfc,0xfc,0x92,0x06,0xfd,0x07, +0xf9,0x00,0x2b,0xf6,0x00,0xf6,0xfe,0x9b,0x0c,0xfd,0x03,0xac,0xfb,0xfc,0x84,0xfb, +0xa2,0xfc,0xfb,0xc2,0xfb,0xfc,0xfb,0xe2,0xfb,0xfb,0xfb,0xfc,0xe3,0xfb,0xfb,0xfc, +0xfb,0xe2,0xfc,0xfb,0xfb,0xfb,0x85,0xfb,0xa2,0xfc,0xfb,0xe3,0xfb,0xfc,0xfb,0xfb, +0x06,0xfb,0xfb,0xfc,0xfb,0xfb,0xac,0x8b,0xfd,0xc3,0xfb,0xfc,0xfb,0x85,0xfb,0x01, +0xfc,0x87,0xfb,0x03,0xfc,0xfb,0xfb,0xa2,0xfc,0xfb,0x85,0xfb,0x01,0xfc,0x84,0xfb, +0x01,0xfc,0x85,0xfb,0x01,0xfc,0x86,0xfb,0xc3,0xfc,0xfb,0xfb,0xc2,0xfb,0xfc,0xfb, +0x03,0xfb,0xfb,0xac,0x92,0x06,0xfd,0x04,0xf9,0x2b,0x2b,0xfa,0x9c,0x0c,0xfd,0x01, +0xac,0xe2,0xfc,0xfb,0xfb,0xfb,0x03,0xfb,0xfb,0xfc,0x86,0xfb,0xe4,0xfc,0xfb,0xfb, +0xfb,0x83,0xfb,0x01,0xfc,0x86,0xfb,0x03,0xfc,0xfb,0xfb,0xa2,0xfc,0xfb,0x86,0xfb, +0xe3,0xfc,0xfb,0xfb,0xfb,0x03,0xfb,0xfb,0xfc,0x8c,0xfd,0x85,0xfb,0x01,0xfc,0x83, +0xfb,0xa2,0xfc,0xfb,0x02,0xfb,0xfb,0xa2,0xfc,0xfb,0xe2,0xfb,0xfc,0xfb,0xfb,0xe3, +0xfb,0xfb,0xfc,0xfb,0xa3,0xfb,0xfc,0x02,0xfb,0xfb,0xa2,0xfc,0xfb,0x83,0xfb,0x01, +0xfc,0x84,0xfb,0x01,0xfc,0x83,0xfb,0xc2,0xfc,0xfb,0xfb,0x02,0xfc,0xac,0x92,0x06, +0xfd,0x01,0xfe,0x9e,0x0c,0xfd,0xc2,0xfc,0xfb,0xfb,0xc2,0xfb,0xfc,0xfb,0xc3,0xfb, +0xfb,0xfc,0x83,0xfb,0x01,0xfc,0x84,0xfb,0x01,0xfc,0x83,0xfb,0xa2,0xfc,0xfb,0x02, +0xfb,0xfb,0xa3,0xfc,0xfb,0x02,0xfb,0xfc,0x85,0xfb,0xc2,0xfc,0xfb,0xfb,0x84,0xfb, +0x01,0xfc,0x84,0xfb,0xa2,0xfc,0xfb,0x01,0xfb,0x8c,0xfd,0x83,0xfb,0x01,0xfc,0x84, +0xfb,0x01,0xfc,0x84,0xfb,0x01,0xfc,0x85,0xfb,0xe2,0xfc,0xfb,0xfb,0xfb,0xe3,0xfb, +0xfc,0xfb,0xfb,0x87,0xfb,0x01,0xfc,0x86,0xfb,0xe3,0xfc,0xfb,0xfb,0xfb,0xc2,0xfb, +0xfb,0xfc,0x98,0x0e,0xfd,0x02,0xfa,0x81,0x83,0xfd,0x02,0xf9,0xac,0x88,0xfd,0x06, +0xfb,0xfa,0xfd,0xfd,0xfa,0x81,0x93,0x03,0xfd,0x01,0xac,0x84,0xfb,0xe4,0xfc,0xfb, +0xfb,0xfb,0xe2,0xfb,0xfc,0xfb,0xfb,0xc2,0xfc,0xfb,0xfb,0x03,0xfb,0xfb,0xfc,0x88, +0xfb,0xc2,0xfc,0xfb,0xfb,0xc2,0xfb,0xfb,0xfc,0x02,0xfb,0xfc,0x84,0xfb,0x01,0xfc, +0x86,0xfb,0x03,0xfc,0xfb,0xac,0x8b,0xfd,0xc4,0xfb,0xfc,0xfb,0xe2,0xfb,0xfb,0xfc, +0xfb,0xe2,0xfb,0xfc,0xfb,0xfb,0x04,0xfb,0xfc,0x56,0xfa,0xe2,0xfb,0xfb,0xfc,0xfb, +0x02,0xfb,0xfc,0x86,0xfb,0xa2,0xfc,0xfb,0xe3,0xfb,0xfb,0xfb,0xfc,0x84,0xfb,0x04, +0xfc,0xfb,0xfb,0xac,0x93,0x04,0xfd,0x05,0xf9,0xac,0xfd,0xfc,0xf9,0x8f,0xfd,0x03, +0xfb,0xf9,0xac,0x9a,0x06,0xfd,0x02,0xf8,0xf6,0x91,0x02,0xfd,0x02,0xf5,0x00,0x83, +0xfd,0x02,0xf8,0x00,0x88,0xfd,0x06,0x2b,0xf5,0xfd,0xfd,0xf5,0x00,0x96,0x01,0xfd, +0x02,0xf8,0xf6,0x95,0x01,0xfd,0x02,0x81,0x00,0x83,0xfd,0x01,0xac,0xc2,0xfc,0xfb, +0xfb,0xe2,0xfb,0xfc,0xfb,0xfb,0xe4,0xfb,0xfb,0xfc,0xfb,0x85,0xfb,0x01,0xfc,0x83, +0xfb,0x02,0xf6,0x2b,0xc2,0xfc,0xfb,0xfb,0xe2,0xfb,0xfc,0xfb,0xfb,0x01,0xfc,0x86, +0xfb,0x01,0xfc,0x84,0xfb,0xa2,0xfc,0xfb,0x02,0xfb,0xfb,0x8c,0xfd,0x86,0xfb,0x01, +0xfc,0x85,0xfb,0x01,0xfc,0x84,0xfb,0x01,0xfc,0x89,0xfb,0x06,0xfc,0xfb,0xfb,0xf5, +0xf6,0xfc,0x84,0xfb,0xc2,0xfc,0xfb,0xfb,0xa3,0xfc,0xfb,0xc2,0xfb,0xfb,0xfc,0xe4, +0xfb,0xfb,0xfb,0xfc,0x02,0xfb,0xac,0x92,0x04,0xfd,0x05,0x00,0xf9,0xfd,0xf9,0x00, +0x8e,0xfd,0x04,0x2b,0x00,0xf5,0x00,0x88,0xfd,0x02,0xf8,0xf6,0x90,0x06,0xfd,0x02, +0xf9,0xf8,0x91,0x02,0xfd,0x02,0xf8,0xf5,0x84,0xfd,0x02,0xf8,0xf8,0x87,0xfd,0x06, +0x2b,0xf5,0xfd,0xfd,0xf8,0xf5,0x91,0x01,0xfd,0x02,0xfc,0xf9,0x83,0xfd,0x02,0xf9, +0xf8,0x91,0x01,0xfd,0x09,0xf9,0xac,0xfd,0xfd,0xfb,0x2b,0xfe,0xfd,0xfd,0xc2,0xfc, +0xfb,0xfb,0xe2,0xfb,0xfb,0xfb,0xfc,0xe3,0xfb,0xfb,0xfc,0xfb,0x85,0xfb,0xc2,0xfc, +0xfb,0xfb,0x08,0xfc,0xfb,0xfc,0xf7,0x56,0xfb,0xfb,0xfc,0x85,0xfb,0x02,0xac,0xac, +0xa2,0xfd,0xac,0x03,0xac,0xfc,0xfc,0xc2,0xfb,0xfb,0xfc,0x02,0xfb,0xfc,0x84,0xfb, +0x03,0xfc,0xfb,0xfb,0x8c,0xfd,0x03,0xfb,0xfb,0xfc,0x85,0xfb,0x01,0xfc,0x84,0xfb, +0x05,0xfc,0xfc,0xfd,0xac,0xac,0xa2,0xfd,0xac,0xc2,0xfb,0xfc,0xfb,0x06,0xfb,0xfb, +0x00,0x2b,0xfb,0xfc,0x85,0xfb,0x01,0xfc,0x89,0xfb,0xe3,0xfc,0xfb,0xfb,0xfb,0x83, +0xfb,0x01,0xfc,0x84,0xfb,0x04,0xfc,0xfb,0xfb,0xfc,0x92,0x04,0xfd,0x05,0x00,0xf9, +0xfd,0xf9,0x00,0x8e,0xfd,0x03,0x00,0x56,0xfe,0x89,0xfd,0x02,0xf9,0xf8,0x91,0x06, +0xfd,0x01,0xfe,0x90,0x02,0xfd,0x03,0xfe,0x2b,0xf7,0x8d,0xfd,0x06,0xf7,0x00,0xfd, +0xfe,0x2b,0xf7,0x90,0x01,0xfd,0x03,0xfe,0xf9,0x00,0x84,0xfd,0x01,0xfe,0x91,0x01, +0xfd,0x02,0x00,0xf9,0x86,0xfd,0x04,0xac,0xfb,0xfb,0xfc,0x84,0xfb,0xc2,0xfc,0xfb, +0xfb,0x02,0xfb,0xfc,0x84,0xfb,0xe2,0xfc,0xfb,0xfb,0xfb,0xa2,0xfb,0xfc,0x03,0xfb, +0xfb,0xfc,0x88,0xfb,0x01,0xfc,0x83,0xfb,0x03,0xfc,0xfb,0xac,0x89,0xfd,0x01,0xac, +0xc2,0xfc,0xfb,0xfb,0x84,0xfb,0x01,0xfc,0x84,0xfb,0x8c,0xfd,0x02,0xfb,0xfc,0x83, +0xfb,0xa2,0xfc,0xfb,0x05,0xfb,0xfb,0xfc,0xfb,0xac,0x89,0xfd,0x03,0xac,0xfb,0xfc, +0x83,0xfb,0x03,0xfc,0xf5,0xf6,0x83,0xfb,0xa2,0xfc,0xfb,0xc2,0xfb,0xfb,0xfc,0xa2, +0xfb,0xfc,0x87,0xfb,0x01,0xfc,0x84,0xfb,0xa2,0xfc,0xfb,0xe2,0xfb,0xfb,0xfc,0xfb, +0x01,0xac,0x91,0x04,0xfd,0x05,0x00,0xf9,0xfd,0xfa,0x00,0x8e,0xfd,0x02,0x00,0xf9, +0x9c,0x06,0xfd,0x0d,0x2b,0x00,0xfe,0xfd,0x00,0x2b,0xfe,0xfd,0x2b,0x00,0xfe,0xfd, +0x56,0x83,0x00,0x04,0xfa,0xfd,0xfd,0xfa,0x83,0x00,0x0d,0xf5,0x00,0xfe,0xfd,0x00, +0xf9,0xfd,0xfd,0x00,0x2b,0xfd,0xfd,0xfb,0x83,0xfd,0x01,0x2b,0x83,0x00,0x01,0xf8, +0x86,0xfd,0x05,0x2b,0xf5,0xfd,0xfd,0xfb,0x84,0xfd,0x0e,0xf6,0x00,0x00,0xf6,0xfe, +0xfd,0xf5,0x00,0xac,0xfd,0xfd,0xf5,0xf7,0xfd,0x84,0x00,0x0f,0xfe,0x2b,0x00,0xfd, +0xfd,0x00,0x2b,0xf6,0x00,0x00,0x81,0xfd,0xfd,0xfc,0xf5,0x83,0x00,0x01,0xfd,0x84, +0x00,0x07,0xfd,0xf9,0x00,0xfd,0xac,0xfc,0xf5,0x83,0x00,0x0c,0xf9,0xfc,0xfb,0xf6, +0x00,0xf7,0x00,0x00,0xf7,0xfb,0xfb,0xfc,0x85,0xfb,0x18,0xfc,0xfb,0xfb,0x00,0x2b, +0xf5,0x00,0x00,0xf9,0xfb,0xfb,0x00,0xf8,0xfc,0xfb,0x00,0x2b,0xfb,0xfc,0x00,0xf6, +0xfb,0xfc,0xf6,0x83,0x00,0x86,0xfd,0x01,0xfb,0x84,0x00,0x02,0x81,0xfc,0xa2,0xf8, +0x00,0x02,0x00,0x00,0x83,0xfb,0x02,0x00,0x2b,0x83,0x00,0x13,0xfa,0xfd,0xfd,0x00, +0x2b,0xfd,0xfd,0x2b,0x00,0xfd,0x00,0x2b,0xfb,0xfb,0xfc,0x2b,0xf5,0xfb,0x56,0x83, +0x00,0x03,0xf9,0xfb,0xfd,0x86,0x00,0x85,0xfd,0x0a,0xfc,0x2b,0x00,0x00,0xf5,0x00, +0x2b,0xfc,0xfb,0xf9,0x83,0x00,0x04,0xf9,0xfb,0xfb,0xf7,0x83,0x00,0x04,0xf9,0xfb, +0xfb,0xf6,0x83,0x00,0x0b,0xfc,0xfb,0xf6,0x00,0xfc,0xfb,0x56,0x00,0xfb,0xfb,0xf9, +0x83,0x00,0x03,0xf7,0xfb,0xfc,0x84,0xfb,0x0e,0x00,0x2b,0xfd,0xfd,0x2b,0x00,0xfe, +0xfd,0x2b,0x00,0xf8,0x00,0x00,0xf8,0x83,0xfd,0x04,0xf6,0x00,0x00,0xf6,0x86,0xfd, +0x06,0xf9,0x00,0xfa,0x00,0x00,0xf6,0x83,0xfd,0x01,0xfa,0x83,0x00,0x01,0xf6,0x83, +0xfd,0x09,0x00,0xf9,0xfd,0xfd,0x00,0x2b,0xac,0x00,0xf9,0x83,0xfd,0x0f,0x00,0x56, +0xfd,0xf8,0x00,0x00,0xf5,0xac,0xfd,0xfd,0x00,0xfa,0xfd,0xf9,0x00,0x83,0xfd,0x04, +0xf6,0x00,0x00,0xf6,0x86,0xfd,0x84,0x00,0x03,0xfd,0xfd,0xf6,0x83,0x00,0x08,0x81, +0xfd,0xfd,0x2b,0xf5,0xfd,0xfd,0x56,0x83,0x00,0x01,0xfa,0x99,0x05,0xfd,0x21,0xf7, +0x00,0xfd,0xfd,0xf5,0x2b,0xfd,0xfd,0x2b,0xf5,0xfd,0xfd,0x00,0xf8,0xfd,0xfc,0xac, +0xfd,0xac,0x00,0xf7,0xfd,0xfa,0x00,0xf5,0xfd,0xfd,0x00,0xf9,0xfd,0xfe,0xf5,0x2b, +0x85,0xfd,0x06,0xfe,0xf9,0xfb,0xfe,0xf7,0x00,0x85,0xfd,0x03,0xfe,0x2b,0x00,0x85, +0xfd,0x23,0xfe,0x2b,0x00,0xac,0xac,0x00,0xf7,0xfd,0xac,0xf5,0xf5,0xfd,0xf7,0x00, +0xfd,0xfd,0xf9,0x00,0xfd,0xfe,0xfd,0x2b,0x00,0xfd,0xfe,0xf5,0x00,0xf7,0xfe,0xf5, +0x00,0xfd,0xfe,0x00,0xf5,0xa2,0xfb,0xfd,0x17,0xfe,0x00,0xf9,0xfd,0xfd,0xf9,0x00, +0xfd,0xac,0xf6,0x00,0xfa,0xfc,0x2b,0x00,0x81,0xfb,0x2b,0x00,0xf5,0x81,0x2b,0x00, +0x84,0xfb,0xa2,0xfc,0xfb,0x1b,0xfb,0xfb,0xfc,0x00,0x00,0x56,0xfb,0xf6,0x00,0xfb, +0xfb,0x00,0x56,0xfb,0xfc,0xf5,0xf6,0xfc,0xfb,0xf5,0xf6,0xfc,0x56,0x00,0x81,0xfd, +0xfb,0x86,0xfd,0x2d,0xfb,0xfa,0xfd,0xac,0xf5,0x2b,0xfb,0x56,0x00,0xf5,0xfa,0x81, +0xf5,0xf5,0xfb,0xfc,0x00,0x00,0x56,0xfc,0xf6,0x00,0xfd,0xfd,0xf5,0x2b,0xfd,0xfe, +0x2b,0xf5,0xfd,0xf7,0x00,0xfb,0xfc,0xfb,0x00,0xf9,0x81,0x00,0xf7,0xfc,0x2b,0x00, +0xac,0x84,0xfd,0x03,0x56,0x00,0xf8,0x85,0xfd,0x2a,0xf9,0x00,0x56,0xfc,0xf7,0x00, +0xf6,0xfb,0x81,0x00,0x2b,0xfc,0x2b,0x00,0xfb,0xfc,0x00,0xf7,0xfc,0x81,0x81,0xfb, +0xf9,0x00,0xfa,0xfc,0xfa,0xfb,0xfc,0x2b,0x00,0xfb,0xfc,0xf8,0x00,0xfc,0xfb,0xf5, +0xf5,0xfb,0xfc,0xfa,0x85,0xfb,0x17,0xfc,0x00,0x2b,0xfd,0xfd,0x2b,0xf5,0xfd,0xfd, +0x2b,0x00,0xf5,0xac,0xf7,0x00,0xfd,0xfe,0x2b,0x00,0xac,0xac,0x00,0xf8,0x85,0xfd, +0x34,0xf9,0x00,0xf5,0xfb,0x81,0x00,0x81,0xfd,0xac,0x00,0xf7,0xfd,0xfc,0x00,0x2b, +0xfd,0xfd,0x00,0xf9,0xfd,0xfe,0xf5,0x2b,0xfd,0xf5,0xf5,0xfd,0xfd,0x81,0x00,0xfe, +0xf9,0x00,0xfa,0xfe,0xf5,0xf5,0xfd,0xfd,0x00,0xf9,0xfd,0xf9,0x00,0xfd,0xfd,0x2b, +0x00,0xac,0xac,0x00,0xf8,0x86,0xfd,0x15,0x00,0xf9,0xfd,0xfe,0x2b,0x00,0xfb,0xfe, +0xf7,0x00,0xfc,0xfd,0xf7,0x00,0xfd,0xfd,0x00,0xf8,0xfd,0xfc,0xac,0x99,0x05,0xfd, +0x0f,0x2b,0xf5,0xfd,0xfd,0xf5,0x2b,0xfd,0xfd,0x2b,0xf5,0xfd,0xfd,0x00,0xf5,0x81, +0x83,0xfd,0x0f,0xf8,0x00,0xfe,0xfd,0xfd,0x2b,0xf5,0xfd,0xfd,0x00,0xf9,0xfd,0xfd, +0x00,0xf7,0x89,0xfd,0x02,0xf9,0x00,0x86,0xfd,0x03,0x2b,0xf5,0xfe,0x85,0xfd,0x11, +0x00,0x2b,0xf9,0xf9,0xf6,0xf5,0xfe,0xfd,0xfb,0x00,0xf6,0x00,0xac,0xfd,0xfd,0xf9, +0x00,0x83,0xfd,0x0e,0x2b,0xf5,0xfd,0xfd,0x00,0x2b,0xfe,0xfd,0x2b,0xf5,0xfd,0xfa, +0x00,0xfb,0x85,0xfd,0x17,0x00,0xfa,0xfd,0xfd,0xf9,0x00,0xac,0xfc,0x00,0xf7,0xfb, +0xfb,0xfc,0x00,0x2b,0xfc,0xf6,0xf5,0xfc,0xfb,0x56,0x00,0xfc,0x86,0xfb,0x01,0xfc, +0x83,0xfb,0x0a,0x00,0x56,0xfb,0xfc,0x81,0x00,0xf9,0xfb,0x00,0x56,0xe2,0xfb,0xfb, +0x00,0x2b,0x05,0xfb,0x81,0x00,0x2b,0xfb,0x8b,0xfd,0x30,0x00,0xf7,0xfb,0x56,0x00, +0xfc,0xfb,0xfb,0xf8,0x00,0xfb,0xfb,0x00,0x56,0xfc,0xfb,0xac,0x00,0x81,0xfd,0x00, +0xf7,0xfd,0xfd,0x2b,0xf5,0xfd,0xfc,0x00,0x56,0xfb,0xf8,0x00,0xfc,0xf7,0x00,0x56, +0xf8,0x56,0x00,0xfa,0xfe,0xfd,0xfd,0x81,0x00,0xf8,0xfe,0x85,0xfd,0x1a,0xf5,0xf6, +0xac,0xfc,0xfc,0x00,0x2b,0xfc,0x2b,0x00,0x56,0xf8,0x56,0x00,0xf9,0xfb,0x00,0xf5, +0x56,0xfc,0xfb,0xfb,0xfa,0x00,0xf6,0xfa,0x83,0xfb,0x0d,0xf6,0xf5,0xfb,0xfb,0x56, +0x00,0xfb,0xfc,0xf5,0x00,0xf8,0xfb,0xfb,0xa2,0xfc,0xfb,0x04,0xfb,0xfb,0xf5,0xf6, +0xe2,0xfd,0xfd,0x2b,0xf5,0x0d,0xfe,0xfd,0xf9,0x00,0xfd,0xfd,0x00,0x2b,0xf9,0xf9, +0xf6,0xf5,0xfe,0x84,0xfd,0x34,0xf9,0x00,0xac,0xfd,0xfd,0x00,0xf9,0xfd,0xf8,0x00, +0xfe,0xfd,0xfd,0xf8,0x00,0xfd,0xfd,0x00,0xf9,0xfd,0xfd,0x00,0xf7,0xfd,0x56,0x00, +0xac,0xfd,0xf5,0xf7,0xfd,0xf6,0xf5,0xfa,0xf9,0xf7,0x00,0xfd,0xfd,0x00,0xf9,0xfe, +0xf9,0x00,0xfd,0xfd,0x00,0x2b,0xf9,0xf9,0xf6,0xf5,0x86,0xfd,0x06,0x00,0xf9,0xfd, +0xfd,0x00,0xf8,0x83,0xfd,0x0a,0x00,0xf7,0xfd,0x2b,0xf5,0xfd,0xfd,0x00,0xf5,0x81, +0x9b,0x05,0xfd,0x14,0x2b,0xf5,0xfd,0xfd,0xf5,0x2b,0xfd,0xfd,0xf7,0x00,0xfd,0xfd, +0xfb,0xf6,0x00,0x00,0xac,0xfd,0x2b,0x00,0x83,0xfd,0x0a,0xf7,0x00,0xfd,0xfd,0x00, +0xf9,0xfd,0xfd,0xf5,0x2b,0x86,0xfd,0x02,0xf9,0xf5,0x83,0x00,0x86,0xfd,0x02,0xf7, +0x00,0x86,0xfd,0x02,0x00,0x00,0x83,0xf5,0x01,0xf6,0x83,0xfd,0x18,0x2b,0x00,0xf7, +0xfd,0xfd,0xfe,0xf9,0x00,0xfd,0xfd,0xfe,0x2b,0xf5,0xfd,0xfd,0xf5,0x2b,0xfd,0xfd, +0x2b,0xf5,0xfd,0xfa,0x00,0x86,0xfd,0x16,0x00,0xf9,0xfd,0xfd,0xfa,0x00,0xac,0xfb, +0x00,0x56,0xfc,0xfb,0xfb,0xf5,0xf6,0xfb,0x2b,0x00,0xfb,0xfb,0x56,0x00,0x83,0xfb, +0xa2,0xfc,0xfb,0x83,0xfb,0x0b,0xfc,0x00,0xf8,0xfc,0xfb,0xfb,0x00,0xf8,0xfc,0x00, +0xf8,0xe2,0xfb,0xfc,0xf5,0xf6,0x06,0xac,0xfd,0xfa,0x00,0x00,0xf6,0x86,0xfd,0x02, +0xac,0xf6,0x83,0x00,0x04,0x2b,0xfc,0x56,0x00,0x83,0xfb,0x1b,0x56,0x00,0xfc,0xfb, +0x00,0x56,0xfb,0xfb,0xfd,0x00,0xf9,0xfd,0xf5,0x2b,0xfd,0xfd,0xf7,0x00,0xfd,0xfd, +0xf5,0xf5,0xfb,0xf5,0xf7,0xfb,0xf6,0xa2,0x00,0xf5,0x07,0x00,0x81,0xfd,0xfd,0xfb, +0x00,0xf6,0x87,0xfd,0x2a,0xf5,0x2b,0xfd,0xfd,0xac,0xf5,0x2b,0xfb,0x2b,0x00,0x00, +0xf5,0xf5,0x00,0xf9,0xfb,0xfa,0xf5,0x00,0x00,0x81,0xfc,0xfb,0x56,0x00,0x00,0xf6, +0xfb,0xfc,0x2b,0x00,0xfc,0xfb,0x56,0x00,0xfb,0xfb,0x81,0x2b,0x00,0x00,0xf9,0x83, +0xfb,0x06,0xfc,0xfb,0xfc,0x00,0x2b,0xfc,0xe2,0xfd,0xf7,0x00,0xfd,0x07,0xfd,0xf9, +0x00,0xfd,0xfd,0x00,0x00,0x83,0xf5,0x01,0xf6,0x84,0xfd,0x03,0xfe,0xf9,0x00,0x83, +0xfd,0x05,0x00,0xf9,0xfe,0x2b,0x00,0x83,0xfd,0x12,0xfa,0x00,0xfd,0xfd,0x00,0xfa, +0xfd,0xfd,0xf5,0x2b,0xfd,0xfe,0x00,0xf7,0xac,0x00,0xac,0xfd,0xa2,0xf5,0x00,0x0d, +0xf5,0xf5,0xfd,0xfd,0x00,0xf9,0xfd,0xf9,0x00,0xfd,0xfe,0x00,0x00,0x83,0xf5,0x01, +0xf6,0x86,0xfd,0xe2,0x00,0xf9,0xfd,0xfd,0x0d,0xfd,0xf5,0x2b,0xfe,0x2b,0xf5,0xfd, +0xfd,0xfb,0xf6,0x00,0x00,0xac,0x99,0x05,0xfd,0x0b,0x2b,0xf5,0xfd,0xfd,0x00,0xf7, +0xfd,0xfd,0xf6,0xf5,0xfe,0x83,0xfd,0x14,0xfb,0x00,0xf5,0xfe,0xf7,0x00,0xfe,0xfd, +0xfd,0x2b,0xf5,0xfd,0xfd,0x00,0xfa,0xfd,0xfd,0x00,0x2b,0xfe,0x84,0xfd,0x06,0xfb, +0x00,0x81,0xfd,0xfa,0x00,0x86,0xfd,0x02,0x2b,0xf5,0x86,0xfd,0x05,0x00,0xf8,0xfd, +0xfd,0xfe,0x83,0xfd,0x09,0x81,0x00,0x2b,0x00,0xfb,0xfd,0xfd,0xf9,0x00,0x83,0xfd, +0x0e,0x2b,0xf5,0xfd,0xfd,0xf5,0x2b,0xfd,0xfd,0x2b,0xf5,0xfd,0xfb,0x00,0xfb,0x85, +0xfd,0x17,0x00,0xf9,0xfd,0xfd,0xf9,0x00,0xfb,0xfb,0x00,0xf7,0xfb,0xfb,0xfc,0x00, +0xf7,0xfc,0xf6,0xf5,0xfc,0xfb,0xf8,0x00,0xfc,0x85,0xfb,0x17,0xfc,0xfb,0xfb,0xfc, +0xfb,0x00,0x56,0xfb,0xfb,0xfa,0x00,0xfa,0xfb,0x00,0x56,0xfb,0xfb,0x00,0x2b,0xfb, +0xfb,0xf5,0x2b,0x83,0xfd,0x04,0xfe,0xf9,0x00,0xf7,0x85,0xfd,0x32,0xf5,0xf5,0xfe, +0xfe,0x00,0xf7,0xac,0xf8,0x00,0xfc,0xfb,0xfc,0x2b,0x00,0xfb,0xfc,0x00,0xf8,0xfc, +0xfb,0x81,0x00,0xfb,0xfd,0xf5,0x2b,0xfd,0xfd,0xf6,0xf5,0xfe,0xfd,0xf7,0x00,0xf9, +0x00,0xfc,0xfb,0xf7,0x00,0xfc,0xfc,0xfb,0xac,0xfd,0xfd,0xfc,0x00,0xf6,0xfe,0x87, +0xfd,0x02,0xf5,0x2b,0x83,0xfd,0x08,0x00,0x2b,0xfb,0xf7,0x00,0xfc,0xfb,0xfb,0xa2, +0xfc,0xfb,0x1a,0xfc,0xfa,0x00,0xf5,0xfb,0xfb,0xfc,0xfc,0xf8,0x00,0x2b,0xfb,0xf6, +0xf5,0xfb,0xfc,0xf7,0x00,0xfc,0xfb,0xfb,0xfc,0x81,0xf5,0x00,0xfc,0x85,0xfb,0x12, +0xf5,0xf6,0xfc,0xac,0xf6,0xf5,0xfd,0xfd,0x2b,0xf5,0xfd,0xfd,0xf9,0x00,0xfd,0xfd, +0x00,0xf8,0x83,0xfd,0x01,0xfe,0x85,0xfd,0x02,0xf9,0x00,0x83,0xfd,0x05,0x00,0xf9, +0xfd,0xf8,0x00,0x83,0xfd,0x16,0xf7,0x00,0xfe,0xfd,0x00,0xf9,0xfd,0xfd,0x00,0xf7, +0xfd,0xfd,0xf6,0xf5,0x2b,0xf6,0xfd,0xfd,0xf5,0xf6,0xfd,0xfe,0x84,0xfd,0x09,0x00, +0xfa,0xfd,0xf9,0x00,0xfd,0xfd,0x00,0xf7,0xc2,0xfe,0xfd,0xfd,0x84,0xfd,0x0e,0x00, +0xfa,0xfd,0xfd,0x00,0xf8,0xfe,0xfd,0xfd,0x00,0xf8,0xfd,0x2b,0xf5,0x84,0xfd,0x03, +0xfb,0x00,0xf5,0x99,0x05,0xfd,0x21,0xf7,0x00,0xfd,0xfd,0xf6,0x00,0xfe,0xf7,0x00, +0x00,0xfd,0xfd,0x56,0xfc,0xac,0xf6,0xf5,0xfd,0xfb,0x00,0x56,0xfd,0xf8,0x00,0x00, +0xfd,0xfe,0x00,0x2b,0xac,0xf6,0x00,0x2b,0x85,0xfd,0x07,0xfa,0x00,0xfa,0xac,0xf6, +0x00,0xfc,0x84,0xfd,0x03,0xfe,0x2b,0x00,0x86,0xfd,0x42,0xf7,0x00,0xf9,0xfd,0xfc, +0xf9,0xfd,0xac,0x00,0x56,0xfd,0xf6,0x00,0xfc,0xfd,0x81,0x00,0xfa,0xfd,0xfd,0xf7, +0x00,0xfd,0xfd,0xf5,0x2b,0xfd,0xfd,0xf7,0x00,0xfd,0xfd,0xf5,0x00,0xfb,0xfd,0x56, +0xfd,0xfd,0x00,0xf7,0xfd,0xfd,0xf9,0x00,0xfc,0xfb,0x2b,0x00,0xf9,0xfc,0x2b,0x00, +0x81,0xfb,0x2b,0x00,0xfb,0xfb,0x56,0x00,0xfb,0xfc,0xfa,0xf6,0xfa,0x83,0xfb,0x1c, +0xfc,0xfb,0xfb,0x00,0x00,0xfa,0xfb,0xf5,0x00,0xfc,0xfb,0x00,0x2b,0xfb,0xf5,0x00, +0xf6,0xfc,0xfb,0xf5,0x2b,0xfd,0xfb,0xf9,0xfd,0xfb,0x00,0xf8,0x85,0xfd,0x30,0xf6, +0xf5,0xfd,0xf9,0x00,0xf6,0xfc,0x56,0x00,0xf6,0x81,0xfa,0x00,0x2b,0xfb,0xfb,0x00, +0xf5,0xfa,0x81,0xf6,0x00,0xfd,0xfe,0xf5,0xf5,0xfe,0xf7,0x00,0x00,0xfd,0xfd,0xfc, +0x00,0x00,0xf5,0xfb,0xfc,0x81,0x00,0xf5,0x81,0xfb,0xf9,0xfb,0xac,0x00,0x00,0x89, +0xfd,0x2a,0xf8,0x00,0xfb,0xac,0x2b,0x00,0x2b,0xfb,0xfb,0x00,0xf5,0x81,0xfc,0x56, +0xfa,0xfb,0xf8,0x81,0x81,0xf5,0xf5,0xfb,0xfa,0xf8,0xfb,0xfa,0x00,0xf7,0xfb,0xf7, +0x00,0x81,0x56,0x00,0x00,0xfb,0xfb,0x56,0xfa,0xfb,0x2b,0x00,0xc2,0xfb,0xfb,0xfc, +0x16,0xf5,0xf5,0xfb,0x2b,0x00,0xf5,0xfd,0xfd,0xf7,0x00,0xfd,0xfe,0xf9,0x00,0xfd, +0xfe,0xf7,0x00,0xf9,0xfd,0xfc,0xf9,0x85,0xfd,0x02,0xf9,0x00,0x83,0xfd,0x2f,0x00, +0xf9,0xfd,0xac,0x00,0xf7,0xfd,0xfb,0xf5,0x2b,0xfd,0xfd,0x00,0xf7,0xac,0xf6,0x00, +0x2b,0xfd,0xfd,0x81,0x00,0x00,0x81,0xfd,0xfd,0x81,0x00,0xf7,0xfd,0xfd,0x56,0xfd, +0xfd,0x00,0xf9,0xfd,0xf9,0x00,0xfd,0xfd,0xf7,0x00,0xf9,0xfd,0xac,0xf9,0x86,0xfd, +0x19,0x00,0xf9,0xfd,0xfe,0x2b,0x00,0xfb,0xfd,0xf7,0x00,0xac,0xfd,0xf7,0x00,0xfd, +0xfe,0x56,0xfc,0xac,0xf6,0xf5,0xfd,0xfe,0x2b,0xf9,0x95,0x05,0xfd,0x21,0x2b,0xf5, +0xfe,0xfd,0xfa,0x00,0x00,0xf6,0xf7,0xf5,0xfd,0xfd,0xf5,0x00,0x00,0xf5,0xac,0xfd, +0xfd,0xf8,0x00,0x00,0xf6,0x2b,0xf5,0xfd,0xfd,0xf8,0x00,0x00,0xf8,0xf5,0x2b,0x85, +0xfd,0x07,0xfe,0xf6,0x00,0x00,0x56,0x00,0x00,0x85,0xfd,0x02,0x2b,0xf5,0x87,0xfd, +0x01,0xf7,0x83,0x00,0x2c,0x2b,0xfd,0xf5,0xf6,0xfe,0xfd,0xac,0x00,0xf6,0xfd,0xfd, +0xf6,0x00,0x00,0xfe,0x2b,0xf5,0xfd,0xfd,0x00,0xf7,0xfd,0xfd,0x2b,0xf5,0xfd,0xfd, +0xac,0xf5,0x00,0x00,0xf5,0xfe,0xfd,0xf8,0x00,0x00,0xfd,0xf9,0x00,0xfb,0xfb,0xfc, +0xf6,0x83,0x00,0x10,0xf9,0xfb,0xfc,0xf6,0xf5,0xfc,0xfb,0x56,0x00,0xfb,0xfb,0x56, +0x00,0x56,0xfb,0xfc,0x83,0xfb,0x15,0xfc,0x00,0xf6,0x00,0x00,0xf5,0x81,0xfb,0xfc, +0x2b,0x00,0x00,0xf7,0xf5,0x2b,0xfb,0xfd,0x00,0xf7,0xfd,0xfb,0x83,0x00,0x01,0x2b, +0x86,0xfd,0x28,0xfa,0x00,0x00,0xf6,0xf8,0x00,0x2b,0x56,0x00,0xf6,0x00,0x00,0xf6, +0xfc,0xfb,0xfb,0x00,0xf6,0x00,0x00,0xf5,0xac,0xfd,0xfd,0xfa,0x00,0x00,0xf6,0xf7, +0xf5,0xfd,0xfd,0xfb,0x2b,0x00,0xfa,0xfb,0xfb,0xfc,0xfa,0x84,0x00,0x02,0xfb,0xf9, +0x86,0x00,0x86,0xfd,0x0a,0xf6,0x00,0x00,0x2b,0xf5,0x2b,0xfd,0xfb,0xfa,0xf5,0x83, +0x00,0x09,0xfa,0xfb,0xf5,0x00,0x00,0xf5,0xfb,0xfb,0xfa,0x83,0x00,0x04,0xf6,0xfc, +0xfb,0x81,0x83,0x00,0x05,0xf8,0x00,0xfc,0xfb,0xf6,0x83,0x00,0x01,0xf9,0x83,0xfb, +0x07,0xfc,0xfb,0xfb,0xf9,0x00,0x00,0xf6,0xe2,0x2b,0xf5,0xfd,0xfd,0x02,0xf9,0x00, +0x83,0xfd,0x01,0xf7,0x83,0x00,0x01,0x2b,0x85,0xfd,0x02,0xf9,0x00,0x83,0xfd,0x05, +0x00,0xf9,0xfd,0xfd,0x81,0x83,0x00,0x0a,0xf6,0xfe,0xfd,0xfd,0xf8,0x00,0x00,0xf8, +0xf5,0x2b,0x83,0xfd,0x02,0x00,0xf5,0x84,0xfd,0x01,0xf9,0x83,0x00,0x0c,0xf5,0xfe, +0xfd,0x00,0xf9,0xfe,0xf9,0x00,0xfd,0xfd,0xfe,0xf7,0x83,0x00,0x01,0x2b,0x86,0xfd, +0x02,0x00,0xf9,0x83,0xfd,0x01,0xf6,0x83,0x00,0x11,0x81,0xfd,0xfd,0x2b,0xf5,0xfd, +0xfd,0xf5,0x00,0x00,0xf5,0xac,0xfd,0xfd,0x00,0x2b,0xfe,0x94,0x05,0xfd,0x02,0xf7, +0x00,0x83,0xfd,0xa2,0xfe,0xfd,0x84,0xfd,0x01,0xfe,0x88,0xfd,0x03,0xfe,0x2b,0x00, +0xe2,0xfd,0xfd,0xfd,0xfe,0x89,0xfd,0xa2,0xfe,0xfd,0x85,0xfd,0x01,0xfe,0x88,0xfd, +0xc2,0xfe,0xfd,0xfd,0x88,0xfd,0xa2,0xfe,0xfd,0x84,0xfd,0xe2,0xfe,0xfd,0xfd,0xfd, +0x03,0xfd,0xfd,0xfe,0x88,0xfd,0xc2,0xfc,0xfb,0xfb,0xc3,0xfb,0xfc,0xfb,0x83,0xfb, +0x06,0xfc,0xfb,0xfb,0xf5,0x56,0xfc,0x83,0xfb,0x04,0xfc,0xfb,0x00,0xf8,0xc2,0xfc, +0xfb,0xfb,0x09,0xfc,0xfc,0xfb,0xfc,0xfb,0xfb,0xac,0xfd,0xfe,0x84,0xfd,0x01,0xfe, +0x89,0xfd,0xa2,0xfe,0xfd,0x04,0xfd,0xfc,0x56,0x00,0xa2,0xfc,0xfb,0x07,0xfb,0xfb, +0xfc,0x00,0x56,0xfb,0xfb,0x84,0xfd,0xc2,0xfe,0xfd,0xfd,0x07,0xfd,0xfd,0xfb,0xf7, +0x00,0xfb,0xfc,0x83,0xfb,0x04,0xfc,0xfc,0xfb,0xfc,0xe2,0xfd,0xfd,0xfd,0xfe,0x86, +0xfd,0xc2,0xfe,0xfd,0xfd,0x02,0xfd,0xfd,0xc4,0xfb,0xfc,0xfb,0x06,0xfb,0xfb,0xfc, +0xfc,0xfb,0xfc,0x83,0xfb,0xa3,0xfc,0xfb,0x02,0xfb,0xfb,0xa2,0xfc,0xfb,0xc3,0xfb, +0xfc,0xfb,0x02,0xfc,0xfc,0x87,0xfd,0x01,0xfe,0x87,0xfd,0x01,0xfe,0x86,0xfd,0x01, +0xfe,0x85,0xfd,0xe2,0xfe,0xfd,0xfd,0xfd,0x88,0xfd,0xe2,0xfe,0xfd,0xfd,0xfd,0x83, +0xfd,0x01,0xfe,0x9d,0x01,0xfd,0x01,0xfe,0x8b,0xfd,0x01,0xfe,0x9a,0x05,0xfd,0x04, +0xf5,0x2b,0x00,0xf8,0x95,0x01,0xfd,0x02,0x2b,0xf5,0x9b,0x04,0xfd,0x02,0xac,0xfc, +0x84,0xfb,0xc2,0xfc,0xfb,0xfb,0xc2,0xfb,0xfc,0xfb,0x05,0xfb,0xfb,0xfc,0xfa,0x00, +0xc2,0xfc,0xfb,0xfb,0x05,0xfb,0x00,0x56,0xfb,0xfc,0x88,0xfb,0x02,0xfc,0xac,0x95, +0x01,0xfd,0x06,0xac,0xfb,0xf8,0x00,0xfb,0xfc,0x83,0xfb,0x07,0xfc,0xfb,0x00,0x56, +0xfb,0xfc,0xac,0x8b,0xfd,0x04,0xfb,0x00,0xf8,0xfc,0x88,0xfb,0x01,0xac,0x95,0x01, +0xfd,0x01,0xac,0xc2,0xfb,0xfc,0xfb,0xc2,0xfb,0xfb,0xfc,0x85,0xfb,0xe2,0xfc,0xfb, +0xfb,0xfb,0x02,0xfb,0xfc,0x84,0xfb,0x01,0xfc,0x88,0xfb,0x04,0xfc,0xfb,0xfb,0xac, +0x9b,0x0b,0xfd,0x04,0x56,0x2b,0xf9,0xfe,0x95,0x01,0xfd,0x02,0xfa,0xf8,0x9b,0x04, +0xfd,0xa2,0xfc,0xfb,0x02,0xfb,0xfc,0x84,0xfb,0x01,0xfc,0x87,0xfb,0xe2,0xfc,0xfb, +0xfb,0xfb,0x83,0xfb,0x03,0xfc,0xf6,0xfa,0xe2,0xfb,0xfb,0xfc,0xfb,0x03,0xfb,0xfc, +0xac,0x96,0x01,0xfd,0x04,0xfb,0xfb,0xfa,0x2b,0x84,0xfb,0x07,0xfc,0xfb,0xfb,0xf6, +0xfa,0xfb,0xfb,0x8c,0xfd,0x05,0x81,0xf6,0xfc,0xfb,0xfb,0xa2,0xfc,0xfb,0x04,0xfb, +0xfc,0xfb,0xfb,0x96,0x01,0xfd,0x03,0xac,0xfb,0xfc,0x84,0xfb,0x01,0xfc,0x84,0xfb, +0xe3,0xfc,0xfb,0xfb,0xfb,0xc3,0xfc,0xfb,0xfb,0xc2,0xfb,0xfb,0xfc,0x83,0xfb,0x02, +0xfc,0xfc,0x9c,0x0b,0xfd,0x01,0xfe,0x94,0x06,0xfd,0xe2,0xfb,0xfb,0xfb,0xfc,0x84, +0xfb,0xc2,0xfc,0xfb,0xfb,0x02,0xfb,0xfc,0x84,0xfb,0xc3,0xfc,0xfb,0xfb,0xe2,0xfb, +0xfb,0xfc,0xfb,0x01,0xac,0x96,0x01,0xfd,0x83,0xfb,0xc2,0xfc,0xfb,0xfb,0xc2,0xfb, +0xfb,0xfc,0x01,0xfb,0x8c,0xfd,0x02,0xfb,0xfc,0x86,0xfb,0x01,0xfc,0x85,0xfb,0x97, +0x01,0xfd,0x03,0xfb,0xfb,0xfc,0x84,0xfb,0xe3,0xfc,0xfb,0xfb,0xfb,0x02,0xfb,0xfc, +0x85,0xfb,0xe3,0xfc,0xfb,0xfb,0xfb,0x83,0xfb,0x02,0xfc,0xac,0x9f,0x11,0xfd,0x01, +0xfc,0x84,0xfb,0x01,0xfc,0x83,0xfb,0xc4,0xfc,0xfb,0xfb,0x83,0xfb,0xc3,0xfc,0xfb, +0xfb,0xe2,0xfb,0xfc,0xfb,0xfb,0x01,0xac,0x95,0x01,0xfd,0x01,0xac,0xe3,0xfb,0xfb, +0xfc,0xfb,0x02,0xfb,0xfc,0x83,0xfb,0x8c,0xfd,0x84,0xfb,0xc2,0xfc,0xfb,0xfb,0x06, +0xfb,0xfb,0xfc,0xfb,0xfb,0xac,0x95,0x01,0xfd,0x01,0xac,0xe2,0xfb,0xfb,0xfb,0xfc, +0x84,0xfb,0x01,0xfc,0x84,0xfb,0xc3,0xfc,0xfb,0xfb,0x84,0xfb,0xa2,0xfc,0xfb,0x02, +0xfb,0xfb,0xa2,0xfc,0xfb,0x02,0xfb,0xac,0x9f,0x11,0xfd,0xa2,0xfc,0xfb,0x83,0xfb, +0x01,0xfc,0x8b,0xfb,0xa2,0xfc,0xfb,0x83,0xfb,0x01,0xfc,0x85,0xfb,0x01,0xfc,0x84, +0xfb,0x04,0xfc,0xfb,0xfb,0xac,0x95,0x01,0xfd,0x01,0xac,0x84,0xfb,0x01,0xfc,0x84, +0xfb,0x01,0xfc,0x85,0xfb,0x03,0xfc,0xfb,0xfb,0x8b,0xfd,0x02,0xac,0xfc,0x84,0xfb, +0x01,0xfc,0x83,0xfb,0xa2,0xfc,0xfb,0x83,0xfb,0x01,0xac,0x95,0x01,0xfd,0x01,0xac, +0xc2,0xfb,0xfc,0xfb,0xe2,0xfb,0xfb,0xfc,0xfb,0x01,0xfc,0x85,0xfb,0x03,0xfc,0xfb, +0xfb,0xa3,0xfc,0xfb,0x84,0xfb,0x01,0xfc,0x84,0xfb,0x03,0xfc,0xfb,0xfb,0x9e,0x11, +0xfd,0x01,0xac,0x86,0xfb,0x01,0xfc,0x84,0xfb,0x03,0xfc,0xfb,0xfb,0xa3,0xfc,0xfb, +0x84,0xfb,0x01,0xfc,0x85,0xfb,0xc3,0xfc,0xfb,0xfb,0x01,0xac,0x94,0x01,0xfd,0x02, +0xac,0xac,0x83,0xfb,0x01,0xfc,0x85,0xfb,0x01,0xfc,0x83,0xfb,0xa2,0xfc,0xfb,0x03, +0xfb,0xfb,0xac,0x8b,0xfd,0x83,0xfb,0x01,0xfc,0x84,0xfb,0x01,0xfc,0x84,0xfb,0xa2, +0xfc,0xfb,0x02,0xfc,0xac,0x94,0x01,0xfd,0x01,0xac,0x86,0xfb,0xe2,0xfc,0xfb,0xfb, +0xfb,0x83,0xfb,0xc2,0xfc,0xfb,0xfb,0x88,0xfb,0xe2,0xfc,0xfb,0xfb,0xfb,0x02,0xfb, +0xac,0x9d,0x11,0xfd,0xa3,0xfc,0xfb,0x83,0xfb,0xa2,0xfc,0xfb,0x02,0xfb,0xfc,0x86, +0xfb,0xa2,0xfc,0xfb,0xc2,0xfb,0xfc,0xfb,0x84,0xfb,0x01,0xfc,0x83,0xfb,0x01,0xac, +0x94,0x01,0xfd,0x01,0xac,0xe2,0xfb,0xfb,0xfb,0xfc,0xe2,0xfb,0xfc,0xfb,0xfb,0x83, +0xfb,0x02,0xfc,0xfb,0x8c,0xfd,0xc4,0xfb,0xfc,0xfb,0x85,0xfb,0x03,0xfc,0xfb,0xac, +0x94,0x01,0xfd,0x02,0xfc,0xfc,0xe3,0xfb,0xfb,0xfc,0xfb,0x01,0xfc,0x86,0xfb,0xc2, +0xfc,0xfb,0xfb,0xa2,0xfc,0xfb,0xe2,0xfb,0xfb,0xfc,0xfb,0x02,0xfb,0xfc,0x9c,0x11, +0xfd,0x01,0xac,0x85,0xfb,0xc2,0xfc,0xfb,0xfb,0x03,0xfb,0xfb,0xfc,0x84,0xfb,0xa2, +0xfc,0xfb,0x88,0xfb,0xc2,0xfc,0xfb,0xfb,0x83,0xfb,0x01,0xac,0x93,0x01,0xfd,0x04, +0xac,0xfb,0xfb,0xfc,0x84,0xfb,0xe2,0xfc,0xfb,0xfb,0xfb,0xc2,0xfb,0xfc,0xfb,0x01, +0xfb,0x8c,0xfd,0xe2,0xfb,0xfb,0xfc,0xfb,0x86,0xfb,0xa2,0xfc,0xfb,0x03,0xfb,0xfb, +0xac,0x93,0x01,0xfd,0x01,0xac,0xe2,0xfb,0xfc,0xfb,0xfb,0x03,0xfb,0xfb,0xfc,0x84, +0xfb,0xc2,0xfc,0xfb,0xfb,0x83,0xfb,0xc2,0xfc,0xfb,0xfb,0x84,0xfb,0x01,0xfc,0x83, +0xfb,0x03,0xfc,0xfb,0xac,0x9b,0x11,0xfd,0x03,0xac,0xfb,0xfc,0x84,0xfb,0xe2,0xfc, +0xfb,0xfb,0xfb,0x02,0xfb,0xfc,0x85,0xfb,0xc3,0xfc,0xfb,0xfb,0xc2,0xfb,0xfc,0xfb, +0x03,0xfc,0xfb,0xac,0x93,0x01,0xfd,0x01,0xac,0x84,0xfb,0x01,0xfc,0x85,0xfb,0xe3, +0xfc,0xfb,0xfb,0xfb,0x02,0xfc,0xac,0x8b,0xfd,0x88,0xfb,0xc2,0xfc,0xfb,0xfb,0x84, +0xfb,0x04,0xfc,0xfb,0xfb,0xac,0x93,0x01,0xfd,0xa2,0xfc,0xfb,0xc3,0xfb,0xfb,0xfc, +0x83,0xfb,0x03,0xfc,0xfb,0xfb,0xa2,0xfc,0xfb,0xe2,0xfb,0xfb,0xfc,0xfb,0xe2,0xfc, +0xfb,0xfb,0xfb,0x02,0xfb,0xac,0x9b,0x11,0xfd,0x01,0xfc,0x84,0xfb,0xc2,0xfc,0xfb, +0xfb,0xe2,0xfb,0xfb,0xfc,0xfb,0xc4,0xfb,0xfc,0xfb,0x03,0xfb,0xfb,0xfc,0x84,0xfb, +0x01,0xfc,0x93,0x01,0xfd,0xe2,0xfc,0xfb,0xfb,0xfb,0xa2,0xfb,0xfc,0x83,0xfb,0xc2, +0xfc,0xfb,0xfb,0x84,0xfb,0x8c,0xfd,0x83,0xfb,0xa3,0xfc,0xfb,0xc4,0xfb,0xfc,0xfb, +0x02,0xfb,0xfc,0x93,0x01,0xfd,0xa2,0xfc,0xfb,0x83,0xfb,0x01,0xfc,0x85,0xfb,0xc2, +0xfc,0xfb,0xfb,0x83,0xfb,0x01,0xfc,0x84,0xfb,0x01,0xfc,0x84,0xfb,0x01,0xfc,0x85, +0xfb,0x02,0xfc,0xfb,0x9a,0x11,0xfd,0x04,0xac,0xfb,0xfb,0xfc,0x86,0xfb,0x01,0xfc, +0x84,0xfb,0xe2,0xfc,0xfb,0xfb,0xfb,0x03,0xfb,0xfb,0xfc,0x85,0xfb,0x01,0xfc,0x84, +0xfb,0x03,0xfc,0xfb,0xfb,0x93,0x01,0xfd,0x01,0xac,0xc2,0xfb,0xfb,0xfc,0x02,0xfb, +0xfc,0x85,0xfb,0x01,0xfc,0x85,0xfb,0xa3,0xfc,0xfb,0x8b,0xfd,0x02,0xac,0xfc,0x88, +0xfb,0x01,0xfc,0x84,0xfb,0x01,0xfc,0x85,0xfb,0x04,0xfc,0xfb,0xfb,0xac,0x93,0x01, +0xfd,0x83,0xfb,0x01,0xfc,0x84,0xfb,0x01,0xfc,0x88,0xfb,0x01,0xfc,0x84,0xfb,0x01, +0xfc,0x85,0xfb,0x01,0xfc,0x83,0xfb,0xa2,0xfc,0xfb,0x04,0xfb,0xfb,0xfc,0xac,0x99, +0x11,0xfd,0x01,0xac,0x84,0xfb,0xc2,0xfc,0xfb,0xfb,0xe2,0xfb,0xfb,0xfc,0xfb,0x84, +0xfb,0x01,0xfc,0x85,0xfb,0x01,0xfc,0x85,0xfb,0x04,0xfc,0xfb,0xfb,0xac,0x92,0x01, +0xfd,0x01,0xac,0xc2,0xfb,0xfc,0xfb,0xe2,0xfb,0xfb,0xfb,0xfc,0x84,0xfb,0x01,0xfc, +0x87,0xfb,0x8c,0xfd,0x02,0xfb,0xfb,0xa2,0xfc,0xfb,0x02,0xfb,0xfc,0x85,0xfb,0x01, +0xfc,0x84,0xfb,0x01,0xfc,0x85,0xfb,0x01,0xfc,0x92,0x01,0xfd,0x03,0xac,0xfb,0xfc, +0x84,0xfb,0x01,0xfc,0x83,0xfb,0x03,0xfc,0xfb,0xfb,0xa2,0xfc,0xfb,0xe2,0xfb,0xfb, +0xfc,0xfb,0xe2,0xfc,0xfb,0xfb,0xfb,0xc2,0xfb,0xfb,0xfc,0x99,0x11,0xfd,0xc4,0xfb, +0xfc,0xfb,0x01,0xfc,0x85,0xfb,0xa2,0xfc,0xfb,0x02,0xfb,0xfb,0xa2,0xfc,0xfb,0x02, +0xfb,0xfb,0xa2,0xfc,0xfb,0x83,0xfb,0x01,0xfc,0x92,0x01,0xfd,0x01,0xfc,0x86,0xfb, +0x01,0xfc,0x84,0xfb,0x01,0xfc,0x84,0xfb,0x01,0xfc,0x83,0xfb,0xc2,0xfc,0xfb,0xfb, +0x01,0xfb,0x8c,0xfd,0x85,0xfb,0xc3,0xfc,0xfb,0xfb,0xa2,0xfb,0xfc,0x02,0xfb,0xfb, +0xa2,0xfc,0xfb,0x02,0xfb,0xfc,0x92,0x01,0xfd,0x83,0xfb,0x01,0xfc,0x84,0xfb,0xc2, +0xfc,0xfb,0xfb,0x83,0xfb,0x01,0xfc,0x84,0xfb,0xe2,0xfc,0xfb,0xfb,0xfb,0xc2,0xfc, +0xfb,0xfb,0x03,0xfb,0xfc,0xfc,0x98,0x11,0xfd,0x01,0xac,0xe2,0xfb,0xfb,0xfc,0xfb, +0x86,0xfb,0xa2,0xfc,0xfb,0x84,0xfb,0x01,0xfc,0x86,0xfb,0x01,0xfc,0x84,0xfb,0x04, +0xfc,0xfb,0xfb,0xac,0x91,0x01,0xfd,0x01,0xac,0x85,0xfb,0x01,0xfc,0x84,0xfb,0x01, +0xfc,0x83,0xfb,0xa2,0xfc,0xfb,0xc3,0xfb,0xfc,0xfb,0x01,0xfb,0x8c,0xfd,0xc2,0xfb, +0xfc,0xfb,0x84,0xfb,0xe2,0xfc,0xfb,0xfb,0xfb,0x02,0xfb,0xfc,0x84,0xfb,0x03,0xfc, +0xfb,0xac,0x91,0x01,0xfd,0x04,0xac,0xfc,0xfb,0xfb,0xa2,0xfc,0xfb,0xe2,0xfb,0xfc, +0xfb,0xfb,0xe4,0xfc,0xfb,0xfb,0xfb,0x03,0xfb,0xfb,0xfc,0x84,0xfb,0x01,0xac,0x97, +0x11,0xfd,0x01,0xac,0x83,0xfb,0x01,0xfc,0x84,0xfb,0xc2,0xfc,0xfb,0xfb,0x83,0xfb, +0xc2,0xfc,0xfb,0xfb,0xa3,0xfc,0xfb,0xe2,0xfb,0xfb,0xfc,0xfb,0x91,0x01,0xfd,0x03, +0xac,0xfb,0xfb,0xa2,0xfc,0xfb,0xc2,0xfb,0xfc,0xfb,0x89,0xfb,0x01,0xfc,0x85,0xfb, +0x02,0xfc,0xfb,0x8b,0xfd,0x01,0xac,0xe2,0xfb,0xfb,0xfc,0xfb,0x83,0xfb,0x01,0xfc, +0x84,0xfb,0x01,0xfc,0x84,0xfb,0x01,0xfc,0x85,0xfb,0x01,0xac,0x91,0x01,0xfd,0x86, +0xfb,0x01,0xfc,0x86,0xfb,0x01,0xfc,0x83,0xfb,0xc2,0xfc,0xfb,0xfb,0x83,0xfb,0x03, +0xfc,0xfb,0xfb,0xa2,0xfc,0xfb,0x06,0xfb,0xfb,0xfc,0xfb,0xfb,0xac,0x97,0x11,0xfd, +0xa2,0xfc,0xfb,0x84,0xfb,0xc2,0xfc,0xfb,0xfb,0xe2,0xfb,0xfb,0xfc,0xfb,0x88,0xfb, +0x01,0xfc,0x83,0xfb,0x04,0xfc,0xfb,0xfb,0xac,0x90,0x01,0xfd,0x01,0xac,0x84,0xfb, +0x01,0xfc,0x84,0xfb,0x01,0xfc,0x83,0xfb,0xa3,0xfc,0xfb,0x02,0xfb,0xfc,0x85,0xfb, +0x01,0xfc,0x83,0xfb,0x01,0xac,0x8b,0xfd,0x01,0xfc,0x84,0xfb,0x01,0xfc,0x83,0xfb, +0xc3,0xfc,0xfb,0xfb,0xc3,0xfb,0xfc,0xfb,0x02,0xfb,0xac,0x90,0x01,0xfd,0x01,0xac, +0xc2,0xfc,0xfb,0xfb,0x02,0xfb,0xfb,0xa2,0xfc,0xfb,0x83,0xfb,0x01,0xfc,0x85,0xfb, +0xc2,0xfc,0xfb,0xfb,0x86,0xfb,0x01,0xfc,0x83,0xfb,0x01,0xfc,0x97,0x11,0xfd,0x84, +0xfb,0xa2,0xfc,0xfb,0x85,0xfb,0xe2,0xfc,0xfb,0xfb,0xfb,0xc2,0xfb,0xfc,0xfb,0xe2, +0xfb,0xfb,0xfc,0xfb,0x02,0xfb,0xfc,0x90,0x01,0xfd,0x01,0xac,0xa2,0xfc,0xfb,0x83, +0xfb,0x01,0xfc,0x8b,0xfb,0xc3,0xfc,0xfb,0xfb,0x03,0xfc,0xfb,0xfc,0x8c,0xfd,0x03, +0xfb,0xfb,0xfc,0x84,0xfb,0x01,0xfc,0x85,0xfb,0xe2,0xfc,0xfb,0xfb,0xfb,0xc2,0xfb, +0xfb,0xfc,0x02,0xfb,0xfb,0x91,0x01,0xfd,0xe2,0xfc,0xfb,0xfb,0xfb,0x83,0xfb,0xc3, +0xfc,0xfb,0xfb,0xe2,0xfb,0xfb,0xfc,0xfb,0x02,0xfb,0xfc,0x85,0xfb,0x03,0xfc,0xfb, +0xfb,0x96,0x11,0xfd,0x01,0xac,0x83,0xfb,0x01,0xfc,0x86,0xfb,0xa2,0xfc,0xfb,0xc5, +0xfb,0xfc,0xfb,0x03,0xfb,0xfb,0xfc,0x83,0xfb,0x02,0xfc,0xac,0x90,0x01,0xfd,0x01, +0xac,0x83,0xfb,0xc2,0xfc,0xfb,0xfb,0x02,0xfb,0xfb,0xe2,0xfc,0xfb,0xfc,0xfb,0x02, +0xfb,0xfc,0x85,0xfb,0x01,0xfc,0x84,0xfb,0x01,0xac,0x8b,0xfd,0x85,0xfb,0xc3,0xfc, +0xfb,0xfb,0x03,0xfb,0xfb,0xfc,0x84,0xfb,0x01,0xfc,0x85,0xfb,0x03,0xfc,0xfb,0xfc, +0x90,0x01,0xfd,0x03,0xac,0xfb,0xfc,0x84,0xfb,0xa2,0xfc,0xfb,0x03,0xfb,0xfb,0xfc, +0x86,0xfb,0xe3,0xfc,0xfb,0xfb,0xfb,0xa2,0xfc,0xfb,0x03,0xfb,0xfb,0xac,0x95,0x11, +0xfd,0x02,0xfc,0xfc,0x83,0xfb,0xc2,0xfc,0xfb,0xfb,0x84,0xfb,0x01,0xfc,0x84,0xfb, +0x01,0xfc,0x88,0xfb,0xe2,0xfc,0xfb,0xfb,0xfb,0x90,0x01,0xfd,0x04,0xac,0xfb,0xfb, +0xfc,0x85,0xfb,0xa2,0xfc,0xfb,0x87,0xfb,0x01,0xfc,0x83,0xfb,0xa2,0xfc,0xfb,0x05, +0xfb,0xfb,0xfc,0xfb,0xfb,0x8b,0xfd,0x01,0xac,0xc2,0xfc,0xfb,0xfb,0x83,0xfb,0xc4, +0xfc,0xfb,0xfb,0xa2,0xfb,0xfc,0x85,0xfb,0x01,0xac,0x90,0x01,0xfd,0xc2,0xfb,0xfb, +0xfc,0x83,0xfb,0x01,0xfc,0x84,0xfb,0xc2,0xfc,0xfb,0xfb,0xe2,0xfb,0xfb,0xfc,0xfb, +0x02,0xfb,0xfc,0x85,0xfb,0x03,0xfc,0xfb,0xfc,0x95,0x11,0xfd,0xc2,0xfc,0xfb,0xfb, +0xc2,0xfb,0xfc,0xfb,0xa2,0xfc,0xfb,0x03,0xfb,0xfb,0xfc,0x85,0xfb,0xc2,0xfc,0xfb, +0xfb,0x02,0xfb,0xfc,0x84,0xfb,0x01,0xfc,0x90,0x01,0xfd,0x86,0xfb,0xa2,0xfc,0xfb, +0x85,0xfb,0xc2,0xfc,0xfb,0xfb,0x03,0xfb,0xfb,0xfc,0x85,0xfb,0x01,0xfc,0x83,0xfb, +0x8c,0xfd,0xc3,0xfb,0xfc,0xfb,0x85,0xfb,0x01,0xfc,0x85,0xfb,0x01,0xfc,0x85,0xfb, +0x05,0xfc,0xfb,0xfb,0xfc,0xfb,0x90,0x01,0xfd,0x01,0xac,0xe2,0xfb,0xfb,0xfc,0xfb, +0x84,0xfb,0xc2,0xfc,0xfb,0xfb,0x02,0xfb,0xfc,0x84,0xfb,0x01,0xfc,0x85,0xfb,0xc2, +0xfc,0xfb,0xfb,0x01,0xfc,0x99,0x0c,0xfd,0x02,0xfa,0xfb,0x9a,0x04,0xfd,0x02,0xfb, +0xfc,0x83,0xfb,0x03,0xfc,0x56,0xfa,0x88,0xfb,0x01,0xfc,0x83,0xfb,0xa2,0xfc,0xfb, +0xc2,0xfb,0xfc,0xfb,0x06,0xfb,0x81,0x56,0xfb,0xfc,0xfb,0x90,0x01,0xfd,0x01,0xac, +0xc2,0xfb,0xfc,0xfb,0x83,0xfb,0x02,0xfa,0xf9,0xe3,0xfb,0xfc,0xfb,0xfb,0x03,0xfb, +0xfb,0xfc,0x85,0xfb,0x02,0xfc,0xfb,0x8c,0xfd,0x85,0xfb,0x03,0xfc,0xfb,0xfb,0xa2, +0xfc,0xfb,0x84,0xfb,0xc3,0xfc,0xfb,0xfb,0x03,0xfb,0xfb,0xfc,0x83,0xfb,0x01,0xac, +0x8f,0xfd,0x01,0xac,0xe2,0xfb,0xfb,0xfb,0xfc,0x02,0xfb,0xfc,0x86,0xfb,0x01,0xfc, +0x83,0xfb,0x07,0xfc,0xfb,0xfb,0xfc,0xf8,0x81,0xfc,0x84,0xfb,0x01,0xfc,0x83,0xfb, +0x02,0xfc,0xfb,0x89,0xfd,0x02,0xf9,0xac,0x95,0x01,0xfd,0x02,0xf9,0xac,0x86,0xfd, +0x02,0xf9,0xac,0x96,0x01,0xfd,0x02,0xfa,0xfb,0x93,0x01,0xfd,0x02,0xfc,0xf9,0x91, +0x06,0xfd,0x01,0x2b,0xa2,0x00,0xf5,0x02,0xf7,0xfc,0x8a,0xfd,0x02,0xf5,0x2b,0x9e, +0x01,0xfd,0x02,0xf6,0xf8,0x8a,0xfd,0x02,0xf6,0xf8,0x88,0xfd,0x02,0xf8,0xf6,0x93, +0x01,0xfd,0x02,0xac,0xfc,0x83,0xfb,0x04,0xfc,0xfb,0xf5,0xf6,0xc3,0xfb,0xfc,0xfb, +0x03,0xfb,0xfb,0xfc,0x8a,0xfb,0x07,0xfc,0xf8,0x00,0xfb,0xfb,0xac,0xac,0x8f,0xfd, +0x84,0xfb,0x01,0xfc,0x83,0xfb,0x04,0xfc,0xfb,0x2b,0x00,0x83,0xfb,0x01,0xfc,0x84, +0xfb,0x01,0xfc,0x84,0xfb,0x01,0xfc,0x83,0xfb,0xa2,0xfc,0xfb,0x05,0xfb,0xfd,0xfa, +0xf5,0x2b,0x84,0xfd,0x04,0x81,0x00,0xf7,0xfd,0x83,0xfb,0x01,0xfc,0x89,0xfb,0xa2, +0xfc,0xfb,0x02,0xfb,0xfc,0x86,0xfb,0xc2,0xfc,0xfb,0xfb,0x01,0xfb,0x90,0x01,0xfd, +0x02,0xfc,0xfc,0x83,0xfb,0x01,0xfc,0x85,0xfb,0xa3,0xfc,0xfb,0x03,0xfb,0xfb,0xfc, +0x84,0xfb,0x02,0x00,0x56,0x83,0xfb,0x01,0xfc,0x86,0xfb,0x02,0xfc,0xac,0x88,0xfd, +0x02,0x00,0xf9,0x95,0x01,0xfd,0x02,0x00,0xf9,0x86,0xfd,0x02,0x00,0xf9,0x95,0x01, +0xfd,0x03,0x56,0x00,0xac,0x93,0x01,0xfd,0x02,0xf9,0x00,0x91,0x06,0xfd,0x08,0x00, +0xf6,0xfa,0xf9,0xf7,0x00,0x00,0xac,0x89,0xfd,0x02,0x00,0xf7,0x9e,0x01,0xfd,0x02, +0xf8,0xf9,0x8a,0xfd,0x02,0xf8,0xf9,0x88,0xfd,0x02,0xf9,0xf8,0x93,0x01,0xfd,0x04, +0xac,0xfb,0xfb,0xfc,0x83,0xfb,0x02,0x00,0x2b,0xc2,0xfb,0xfb,0xfc,0xe2,0xfb,0xfb, +0xfb,0xfc,0xc2,0xfb,0xfc,0xfb,0x83,0xfb,0x05,0x56,0x00,0xfc,0xfb,0xac,0x8f,0xfd, +0xa2,0xfc,0xfb,0x83,0xfb,0x01,0xfc,0x83,0xfb,0x03,0xf6,0xf5,0xfc,0x83,0xfb,0xc3, +0xfc,0xfb,0xfb,0x03,0xfb,0xfb,0xfc,0x85,0xfb,0x05,0xfc,0xac,0xfa,0x00,0x00,0x84, +0xfd,0x04,0xf5,0x00,0xf7,0xfd,0xc2,0xfb,0xfb,0xfc,0xc2,0xfb,0xfc,0xfb,0x84,0xfb, +0x02,0xf9,0xfa,0xc2,0xfb,0xfb,0xfc,0x85,0xfb,0x04,0xfc,0xfb,0xfb,0xfc,0x8f,0xfd, +0x01,0xac,0x86,0xfb,0x01,0xfc,0x89,0xfb,0x01,0xfc,0x83,0xfb,0x05,0xfc,0xfb,0xfb, +0x00,0x56,0xc2,0xfb,0xfc,0xfb,0xa2,0xfc,0xfb,0x02,0xfb,0xac,0x88,0xfd,0x02,0x00, +0xf9,0x95,0x01,0xfd,0x02,0x00,0xf9,0x86,0xfd,0x02,0x00,0xf9,0x8e,0xfd,0x02,0xfa, +0xfb,0x84,0xfd,0x03,0xac,0xf5,0xac,0x94,0x01,0xfd,0x02,0xfa,0x00,0x91,0x06,0xfd, +0x08,0xf5,0x2b,0xfd,0xfd,0xfe,0xac,0x00,0x2b,0x89,0xfd,0x02,0xf5,0x2b,0x94,0x03, +0xfd,0x01,0xfe,0x94,0x01,0xfd,0x01,0xac,0x85,0xfb,0x05,0xfc,0xf5,0xf6,0xfb,0xfc, +0x84,0xfb,0x01,0xfc,0x85,0xfb,0x01,0xfc,0x84,0xfb,0x01,0xfc,0x83,0xfb,0x06,0xfc, +0xfb,0x56,0x00,0xfb,0xfb,0x90,0x01,0xfd,0xe2,0xfb,0xfc,0xfb,0xfb,0x05,0xfb,0xfb, +0xfc,0x2b,0x00,0xe2,0xfb,0xfb,0xfc,0xfb,0x85,0xfb,0xc3,0xfc,0xfb,0xfb,0x0c,0xfd, +0xf9,0x00,0x00,0xfa,0xfd,0xfd,0xfe,0x00,0x00,0x2b,0xac,0xe2,0xfc,0xfb,0xfb,0xfb, +0x02,0xfb,0xfc,0x83,0xfb,0x05,0xfc,0xfb,0xfc,0x00,0xf6,0x84,0xfb,0xe2,0xfc,0xfb, +0xfb,0xfb,0x03,0xfc,0xfb,0xfb,0x90,0x01,0xfd,0xa3,0xfb,0xfc,0x83,0xfb,0xe2,0xfc, +0xfb,0xfc,0xfb,0xc2,0xfb,0xfb,0xfc,0x02,0x00,0x56,0x8b,0xfb,0x01,0xfc,0x88,0xfd, +0x02,0x00,0xf9,0x95,0x01,0xfd,0x02,0x00,0xf9,0x86,0xfd,0x02,0x00,0xf9,0x8e,0xfd, +0x02,0xf5,0x2b,0x85,0xfd,0x01,0xfe,0x95,0x01,0xfd,0x02,0xf9,0x00,0x91,0x06,0xfd, +0x02,0xf5,0x2b,0x84,0xfd,0x05,0xf7,0x00,0xfd,0xfd,0xfa,0x83,0x00,0x09,0x81,0xfd, +0xfd,0x00,0x2b,0xf5,0x00,0x00,0xf8,0x83,0xfd,0x04,0xf6,0x00,0x00,0xf6,0x86,0xfd, +0x07,0xf9,0x00,0xf8,0x00,0x2b,0xfd,0xfa,0x83,0x00,0x09,0x81,0xfd,0xfd,0x00,0xf7, +0xfd,0xfd,0x00,0x56,0x83,0x00,0x08,0xac,0xfd,0xfd,0x00,0xf7,0xfd,0xfd,0xfa,0x83, +0x00,0x07,0xf5,0xfd,0x2b,0x00,0xfe,0xfd,0x2b,0x83,0x00,0x03,0xf8,0xfd,0xfd,0xa2, +0x2b,0x00,0x01,0xf9,0x85,0xfd,0x01,0xf9,0x83,0x00,0x05,0xf9,0xfb,0xfb,0x00,0x2b, +0xc2,0xfc,0xfb,0xfb,0x01,0x56,0x83,0x00,0x01,0xf6,0x83,0xfb,0x06,0x00,0x2b,0x00, +0x00,0xfc,0x81,0x83,0x00,0x14,0xf6,0x00,0xfb,0xfb,0xfd,0xf6,0x00,0x00,0xf6,0xfd, +0xfd,0xf9,0x00,0xfa,0x00,0x00,0xf6,0xfd,0xfd,0x81,0x84,0x00,0x83,0xfb,0x01,0xf9, +0x83,0x00,0x05,0xf5,0xf5,0xfc,0xfb,0x81,0x84,0x00,0x07,0xfb,0xfb,0xfc,0x00,0xf6, +0x00,0x00,0xe2,0xfc,0xfb,0xfb,0xfb,0x0e,0xfd,0xf9,0xf5,0xf5,0xf6,0xfd,0xfd,0xf7, +0xf6,0xf5,0x2b,0xfd,0xfb,0xf5,0x83,0x00,0x0b,0xf9,0xfb,0xfb,0x00,0xf7,0xf5,0x00, +0x00,0x81,0xfb,0xf6,0x83,0x00,0x03,0x56,0xfb,0xf9,0x83,0x00,0x0c,0xf9,0xfb,0xfc, +0x00,0xf6,0xf6,0x00,0x00,0xfa,0xfd,0xfd,0xfc,0x83,0x00,0x05,0xf5,0xf5,0xfd,0xfd, +0x56,0x83,0x00,0x01,0xf6,0x86,0xfb,0x03,0x56,0x00,0xf8,0x83,0x00,0x16,0x81,0xfb, +0xfb,0x00,0x56,0xfb,0xfb,0x00,0xf6,0xfc,0xfb,0x00,0xf8,0xfc,0x81,0xf5,0x00,0x00, +0xf5,0xfc,0xfb,0xf8,0x83,0x00,0x01,0xf6,0x83,0xfd,0x09,0xf6,0x00,0x00,0x2b,0x00, +0xfa,0xfd,0xfd,0xf8,0x83,0x00,0x01,0xf8,0x87,0xfd,0x01,0xf8,0x83,0x00,0x06,0xac, +0xfd,0xfd,0x00,0xf9,0xfe,0x85,0xfd,0x10,0x00,0xf7,0xf5,0x00,0x00,0x81,0xfd,0xfd, +0xfc,0xf5,0x00,0x00,0xf5,0xac,0xfd,0x2b,0x83,0x00,0x03,0xf9,0xfd,0xfa,0x83,0x00, +0x0a,0xf6,0xfe,0xfd,0xfd,0x00,0x56,0x00,0x00,0xf5,0xac,0x86,0xfd,0x01,0xac,0x83, +0x00,0x09,0xf6,0x00,0xfd,0xfd,0xac,0xf5,0x00,0x00,0xf8,0x9a,0x05,0xfd,0x02,0xf5, +0x2b,0x84,0xfd,0x19,0xf9,0x00,0xfd,0xac,0x00,0xf8,0xfe,0xf7,0x00,0xfd,0xfd,0xf5, +0x00,0xf8,0xfe,0x56,0x00,0xfb,0xfd,0x2b,0x00,0xac,0xac,0x00,0xf8,0x85,0xfd,0x31, +0xf9,0x00,0xf6,0xac,0xfd,0xac,0x00,0xf8,0xfd,0xf7,0x00,0xfd,0xfd,0xf5,0x2b,0xfd, +0xfd,0x00,0x00,0xfa,0xac,0x00,0xf7,0xfd,0xfd,0xf5,0x2b,0xfd,0xac,0x00,0xf6,0xac, +0xfd,0xfb,0xfe,0x2b,0xf5,0xfd,0xfd,0xf9,0xfb,0xfd,0xf7,0x00,0xfd,0xfe,0x2b,0x00, +0xf7,0x86,0xfd,0x0a,0xfc,0x00,0xf7,0xfc,0x2b,0x00,0xfc,0xfb,0xf5,0x2b,0x85,0xfb, +0x40,0x81,0x00,0x2b,0xfc,0xfa,0x00,0xf6,0xfb,0xfc,0x00,0x00,0xfa,0xfc,0xfb,0xf5, +0xf5,0x81,0xfa,0xf5,0x00,0xfb,0xfd,0x2b,0x00,0xfc,0xac,0x00,0xf8,0xfd,0xf9,0x00, +0xf5,0xfb,0x81,0x00,0xfa,0xfd,0xfa,0xf8,0xfc,0x81,0x00,0x2b,0xfb,0x81,0x00,0x2b, +0xfc,0x56,0x00,0x00,0xfb,0xfc,0x00,0xf6,0x81,0x81,0xf6,0x00,0xfb,0xfb,0xf5,0x00, +0x56,0x87,0xfb,0x76,0xfc,0xfb,0xfd,0xfa,0xf5,0xf8,0x00,0xfd,0xfe,0xf5,0xf8,0xf5, +0x2b,0xfd,0xfb,0xf8,0x81,0xfc,0xf5,0x00,0xfb,0xfc,0x00,0x00,0xf8,0xfb,0x00,0x2b, +0xfb,0xfc,0xf5,0x2b,0xfb,0xfc,0x81,0x00,0x2b,0xfc,0x2b,0x00,0xfb,0xfb,0xf5,0x00, +0x2b,0xfc,0xf5,0x00,0xfd,0xfd,0xf5,0xf6,0xfc,0xfc,0xf5,0x00,0xfd,0xfd,0xfa,0x81, +0xfd,0x81,0x00,0xfa,0xfb,0xfc,0xfb,0xfb,0xfc,0xf8,0x00,0xf5,0xfa,0xfb,0x00,0xf5, +0xfc,0xfb,0x00,0x56,0xfb,0xfc,0x00,0x2b,0xfb,0xfb,0x00,0x56,0xfb,0x2b,0x00,0x81, +0xfc,0xfa,0xfb,0xfc,0x56,0xf9,0xfc,0xf9,0x00,0xfa,0xfe,0x2b,0x00,0xfb,0xac,0x2b, +0x00,0xf9,0xfd,0xf9,0x00,0xf9,0xfe,0xf9,0x00,0xf9,0x85,0xfd,0x0a,0xf9,0x00,0x81, +0xfd,0xf6,0xf5,0xfd,0xfd,0x00,0xf9,0x85,0xfd,0x24,0xfe,0x00,0x00,0xf9,0xfe,0xf6, +0x00,0xfd,0xfd,0xf5,0xf6,0xfc,0xac,0xf6,0x00,0xfd,0xfd,0xf5,0x2b,0xfd,0xfd,0xac, +0x00,0xf7,0xfd,0xfb,0xf5,0xf6,0xfd,0xfd,0x00,0x00,0xfa,0xac,0x00,0x2b,0x85,0xfd, +0x0f,0xfe,0x00,0xf5,0xac,0xfc,0xf5,0x00,0xfd,0xfe,0x00,0xf5,0xfd,0x81,0x00,0xfb, +0x99,0x05,0xfd,0x02,0x00,0xf7,0x84,0xfd,0x05,0xfa,0x00,0xfd,0xf8,0x00,0x83,0xf9, +0x11,0x00,0x81,0xfd,0xf5,0x2b,0xfe,0xfd,0xfd,0x00,0xf7,0xfd,0x00,0x2b,0xf9,0xf9, +0xf6,0xf5,0x85,0xfd,0x1f,0xf9,0x00,0xac,0xfd,0xfd,0xf7,0x00,0xfa,0xf9,0xfa,0x00, +0x81,0xfd,0x00,0xf7,0xfd,0xfd,0x00,0x56,0xfd,0xfd,0xf5,0x2b,0xfe,0xfd,0x00,0x2b, +0xfe,0x2b,0x00,0xfe,0x84,0xfd,0x02,0x2b,0xf5,0x85,0xfd,0x07,0xf9,0x00,0xfd,0xfd, +0x2b,0x00,0xfe,0x86,0xfd,0x0a,0xf8,0x00,0x56,0xf8,0x56,0x00,0x56,0xfc,0x00,0xf6, +0xa2,0xfc,0xfb,0x40,0xfc,0xf7,0x00,0xfc,0xfb,0xfc,0xf7,0x00,0xfc,0xfb,0x00,0xf8, +0xfb,0xfb,0xfa,0x00,0xfa,0xfc,0xfb,0xf8,0x00,0xfc,0xfd,0x00,0x2b,0xfa,0xf9,0xf6, +0xf5,0xfe,0xf9,0x00,0xac,0xfd,0xfd,0x00,0xf9,0xac,0xfb,0xfc,0xfb,0xfb,0xf5,0xf6, +0xfc,0xf7,0x00,0xfb,0xfb,0xfc,0xf6,0xf5,0xfb,0xfa,0x00,0xfa,0xfb,0xfc,0xfa,0x00, +0xf9,0xfb,0x00,0xf6,0xa3,0xfc,0xfb,0x84,0xfb,0x0c,0xfd,0xf9,0x00,0xac,0x00,0x56, +0xfc,0x00,0xfe,0x00,0x2b,0xfd,0x84,0xfb,0x29,0x2b,0xf5,0xfb,0xfb,0x00,0xf8,0xfc, +0xfb,0x00,0x2b,0xfc,0xfb,0x00,0xf6,0xfc,0xfb,0xf7,0x00,0x56,0xf8,0x56,0x00,0xf9, +0xfb,0x00,0xf6,0xfc,0xfb,0xf6,0xf5,0xac,0xfa,0x00,0xfb,0xfd,0xfd,0x2b,0xf5,0xfd, +0xfd,0xfe,0x83,0xfd,0x09,0x00,0xf9,0xfc,0xfb,0xfb,0xfc,0xfb,0x56,0x00,0x83,0xfb, +0x13,0xf8,0x00,0xfb,0xfc,0x00,0xf8,0xfc,0xfb,0xf5,0xf6,0xfc,0xfb,0x00,0x56,0xfb, +0xf7,0x00,0xf7,0x81,0x83,0xfb,0x17,0xfc,0xfb,0xfb,0xfc,0x00,0xf9,0xfd,0x00,0xf8, +0xfe,0xfd,0xfd,0x00,0xf9,0xfd,0xf6,0xf5,0xfe,0xfd,0xfd,0xf5,0xf5,0xfe,0x84,0xfd, +0x0a,0xf5,0xf5,0xf9,0xfa,0xf7,0x00,0xfd,0xfd,0x00,0xf9,0x86,0xfd,0x23,0x00,0xfa, +0xfd,0xfd,0xac,0x00,0xfa,0xfb,0x00,0xfb,0xfd,0xfd,0xfb,0x00,0x81,0xfd,0x00,0xf7, +0xfd,0xfd,0xf8,0x00,0xfe,0xfd,0xfd,0xf8,0x00,0xfd,0xfd,0x00,0x56,0xfd,0xfd,0xf5, +0x2b,0x85,0xfd,0x10,0xfb,0x00,0xfc,0xfd,0xfd,0xf9,0x00,0xfd,0xfb,0x00,0x56,0xf9, +0xf9,0x00,0xf7,0xfe,0x97,0x05,0xfd,0x03,0xfe,0xf5,0x2b,0x84,0xfd,0x04,0xf7,0x00, +0xfd,0x2b,0xa2,0x00,0xf5,0x05,0xf5,0xfa,0xfe,0x00,0x2b,0x83,0xfd,0x05,0xf5,0x2b, +0xfe,0x00,0x00,0x83,0xf5,0x01,0xf6,0x85,0xfd,0x02,0xfa,0x00,0x83,0xfd,0x0e,0xf7, +0x00,0x00,0xf5,0x00,0xf5,0xfa,0xfd,0xf5,0x2b,0xfd,0xfd,0x00,0xf9,0xe2,0xfd,0xfd, +0xf5,0x2b,0x03,0xfd,0xf7,0x00,0x85,0xfd,0x06,0x2b,0x00,0xfd,0xfe,0xf9,0xf5,0x83, +0x00,0x04,0xfd,0xfd,0x2b,0xf5,0x87,0xfd,0x0c,0x2b,0x00,0x00,0xf5,0x00,0x00,0xf9, +0xfc,0x00,0x2b,0xfb,0xfc,0x83,0xfb,0x02,0x2b,0x00,0x83,0xfb,0x1a,0x56,0x00,0xfb, +0xfb,0x00,0xf8,0xfc,0xfb,0x56,0x00,0xfc,0xfb,0xfb,0x56,0x00,0xfb,0xfd,0x00,0x00, +0xf5,0x00,0xf5,0xf6,0xfd,0xf9,0x00,0x83,0xfd,0x05,0x00,0xf9,0xfb,0x81,0xf6,0x83, +0x00,0x17,0x2b,0xfb,0x2b,0x00,0xfb,0xfc,0xfb,0x2b,0x00,0xfc,0xf8,0x00,0xfb,0xfc, +0xfb,0xfb,0x00,0xf8,0xfc,0xf5,0xf6,0xfb,0xfc,0x85,0xfb,0x12,0xfc,0xfb,0xfb,0xfd, +0xf9,0xf5,0xfd,0xf6,0xf5,0xf7,0xf6,0xfd,0xf5,0x2b,0xfd,0xfc,0x2b,0xf5,0x83,0x00, +0x1f,0xfb,0xfc,0x00,0x56,0xfb,0xfb,0xf5,0xf6,0xfb,0xfc,0x00,0x2b,0xfb,0xfc,0xf6, +0x00,0xf5,0xf5,0x00,0xf5,0x56,0xfc,0x00,0x2b,0xfb,0xfb,0x2b,0x00,0xfb,0xf9,0x00, +0x83,0xfd,0x06,0x2b,0xf5,0xfd,0xfd,0xfb,0xf5,0x83,0x00,0x23,0xf9,0xfb,0xfb,0xfc, +0xfb,0xfb,0x56,0x00,0xfb,0xfc,0xfb,0x56,0x00,0xfb,0xfb,0x00,0x56,0xfb,0xfc,0x00, +0x2b,0xfb,0xfb,0x00,0x56,0xfb,0xfc,0xf7,0x00,0x00,0xf7,0xfc,0xfb,0xfa,0xf5,0x83, +0x00,0x04,0xfa,0xfd,0x00,0xf9,0x83,0xfd,0x05,0x00,0xfa,0xfd,0xf5,0x2b,0x83,0xfd, +0x02,0xf7,0x00,0x84,0xfd,0x01,0xfe,0xa3,0xf5,0x00,0x04,0xfd,0xfe,0x00,0xf9,0x86, +0xfd,0x02,0x00,0xf9,0x83,0xfd,0x04,0x00,0xfa,0xf9,0x00,0x84,0xfd,0x09,0x00,0xf9, +0xfd,0xf5,0x2b,0xfd,0xfe,0x2b,0x00,0x83,0xfd,0x0a,0xf9,0x00,0xfd,0xfd,0x00,0xf9, +0xfd,0xfd,0xf5,0x2b,0x85,0xfd,0x02,0xf9,0x00,0x83,0xfd,0x05,0xfa,0x00,0xfd,0xf9, +0x00,0x84,0xf5,0x01,0xf8,0x99,0x05,0xfd,0x02,0x00,0xf7,0x84,0xfd,0x06,0xf5,0x2b, +0xfe,0xf8,0x00,0xfe,0x85,0xfd,0x02,0xf5,0x2b,0x83,0xfd,0xc2,0x00,0xf8,0xfd,0x03, +0xfd,0xfd,0xfe,0x85,0xfd,0x02,0xf9,0x00,0x83,0xfd,0x02,0xf8,0x00,0xa2,0xfe,0xfd, +0x13,0xfd,0xfd,0xf5,0x2b,0xfe,0xfd,0x00,0xf9,0xfe,0xfd,0x00,0x2b,0xfe,0xfd,0x00, +0xf7,0xfd,0xf8,0x00,0x85,0xfd,0x0d,0x2b,0xf5,0xfd,0xfb,0x00,0x81,0xfe,0xf9,0x00, +0xfd,0xfd,0x2b,0xf5,0x87,0xfd,0x0a,0xf8,0x00,0xfc,0xfb,0xfc,0xfc,0xfb,0xfb,0xf5, +0xf6,0x83,0xfb,0x21,0xfc,0xfb,0xf7,0x00,0xfc,0xfb,0xfb,0xf7,0x00,0xfc,0xfb,0x00, +0x56,0xfb,0xfb,0xf9,0x00,0x81,0xfb,0xfc,0xf8,0x00,0xac,0xfd,0x00,0xf7,0xfe,0xfd, +0xfd,0xfe,0xfd,0xf9,0x00,0x83,0xfd,0x20,0x00,0xf9,0xfc,0xf5,0xf5,0xfc,0xfc,0x00, +0xf6,0xfc,0x2b,0x00,0xfc,0xfb,0xfb,0xf6,0xf5,0xfb,0xfa,0x00,0xfa,0xfb,0xfb,0xfa, +0x00,0xfa,0xfb,0x00,0x2b,0xfc,0xfb,0xfb,0xa2,0xfc,0xfb,0x42,0xfb,0xfc,0xfb,0xfd, +0xf9,0xf5,0xfd,0x56,0x00,0x00,0xfa,0xfd,0x00,0xf7,0xfd,0x2b,0x00,0x81,0xfc,0xf6, +0xf5,0xfc,0xfb,0x00,0x56,0xfb,0xfc,0x00,0x2b,0xfb,0xfb,0xf5,0xf6,0xfc,0xfb,0xf7, +0x00,0xfb,0xfc,0xfc,0xfb,0xfc,0xfb,0xf5,0xf6,0xfc,0xfb,0xf6,0xf5,0xfc,0xfa,0x00, +0xfb,0xfd,0xfd,0xf6,0xf5,0xfd,0xfd,0x00,0xf7,0xfe,0xfd,0x00,0xfa,0xfc,0x84,0xfb, +0x24,0x56,0x00,0xfb,0xfb,0xfc,0x2b,0x00,0xfc,0xfb,0x00,0x56,0xfb,0xfb,0x00,0x2b, +0xfc,0xfb,0x00,0xf8,0xfc,0xfb,0xfb,0xfc,0x2b,0x00,0x56,0xfc,0x00,0x2b,0xfc,0xfb, +0x00,0xf9,0xfd,0x00,0x56,0x83,0xfd,0x05,0x00,0xf9,0xfd,0xf6,0xf6,0x83,0xfd,0x02, +0xf5,0xf6,0x85,0xfd,0x02,0xf5,0xf6,0xc2,0xfe,0xfd,0xfd,0x02,0x00,0xf9,0x86,0xfd, +0x04,0x00,0xf9,0xfd,0xfd,0xc2,0xfb,0x00,0xfb,0x0c,0xfd,0xfd,0xfb,0x00,0xfb,0xfd, +0xf5,0x2b,0xfd,0xfd,0xf8,0x00,0x83,0xfd,0x0a,0xf8,0x00,0xfe,0xfd,0x00,0xf9,0xfe, +0xfd,0x00,0xf7,0x85,0xfd,0x0b,0x81,0x00,0xfc,0xfd,0xfd,0xf9,0x00,0xfd,0xfb,0x00, +0x81,0x9d,0x05,0xfd,0x1f,0xf5,0x2b,0xfd,0xfd,0xfb,0xf5,0x00,0xac,0xfd,0xac,0x00, +0xf5,0xac,0xfd,0x81,0xfb,0xfd,0xf5,0x00,0xf9,0xfd,0xf7,0x00,0xac,0xfd,0xf7,0x00, +0xf9,0xfd,0xac,0xf9,0x85,0xfd,0x02,0xf9,0x00,0x83,0xfd,0x0e,0xac,0x00,0xf5,0xac, +0xfd,0xfa,0xfb,0xfd,0xf5,0x2b,0xfd,0xfd,0x00,0xf9,0xe2,0xfd,0xfd,0xf5,0x2b,0x15, +0xfd,0xac,0x00,0xf5,0xac,0xfc,0xf9,0xfe,0x2b,0xf5,0xfd,0xfa,0x00,0xfa,0xac,0xf6, +0x00,0xac,0xfd,0xf7,0x00,0x87,0xfd,0x0a,0xfc,0x00,0xf5,0x81,0xfb,0x56,0xfa,0xfb, +0xf5,0xf6,0xa2,0xfc,0xfb,0x20,0xfb,0x81,0x00,0xf6,0xfc,0xfa,0x00,0xf6,0xfb,0xfb, +0x00,0x56,0xfb,0xfc,0xfb,0x00,0xf5,0xfc,0xf8,0xf5,0x00,0xfc,0xfd,0xf7,0x00,0xf9, +0xfd,0xac,0xf9,0xfd,0xf9,0x00,0x83,0xfd,0x1d,0x00,0xf9,0xfb,0xf5,0xf5,0xfb,0x56, +0x00,0xf6,0xfb,0xfa,0x00,0xf7,0xfb,0xf7,0xf5,0x00,0xfb,0xfc,0x00,0xf5,0x81,0xfb, +0xf5,0x00,0xfb,0xfc,0xf5,0xf6,0x83,0xfb,0x02,0x56,0xf6,0x85,0xfb,0x74,0xfd,0xfa, +0x00,0xfd,0xfe,0x00,0x00,0xfd,0xfd,0xf5,0x2b,0xfd,0x2b,0x00,0x81,0xfa,0xf5,0x00, +0xfb,0xfb,0x00,0xf8,0xfc,0xfb,0xf5,0xf6,0xfb,0xfc,0xf5,0xf5,0xfb,0xfb,0x81,0x00, +0xf5,0x81,0xfb,0x56,0xfa,0xfb,0xf5,0xf6,0xfb,0xfc,0x2b,0x00,0xfb,0xfd,0xf5,0xf5, +0xfa,0x2b,0x00,0xf5,0xfd,0xfd,0x00,0xf7,0xfd,0xf8,0x00,0x56,0xac,0xfb,0xfb,0xfc, +0xfb,0x56,0x00,0xf6,0x81,0xfa,0x00,0x2b,0xfb,0xfc,0x00,0xf6,0xfb,0xf5,0x00,0xf6, +0xfb,0xfb,0x00,0x56,0xfb,0x56,0xf9,0xfb,0x56,0x00,0xfa,0xfb,0x00,0x2b,0xfb,0xf7, +0x00,0x56,0xfe,0xf6,0x00,0xac,0xfb,0xf5,0x00,0xf9,0xfe,0xf9,0x00,0xf9,0xfd,0xf9, +0x00,0xfa,0x85,0xfd,0x0a,0x81,0x00,0x2b,0xfd,0xfd,0x56,0xfd,0xfd,0x00,0xfa,0x86, +0xfd,0x23,0x00,0xf5,0xfb,0xac,0xf6,0x00,0xfd,0xfd,0xf5,0xf5,0xfc,0xac,0xf6,0x00, +0xfd,0xfe,0xf5,0xf5,0xfd,0xfd,0xac,0x00,0xf7,0xfd,0xfc,0x00,0x2b,0xfd,0xfd,0x00, +0xf9,0xfd,0xfd,0xf5,0x2b,0x86,0xfd,0x0e,0x00,0xf6,0xfd,0xf9,0x00,0x00,0xfd,0xfe, +0xf5,0x00,0xfb,0xfd,0xfb,0xfa,0x99,0x05,0xfd,0x85,0x00,0x02,0xf6,0xac,0x83,0xfd, +0x02,0xfb,0xf5,0x83,0x00,0x08,0xfb,0xfd,0xf5,0xf6,0xf5,0x00,0x00,0x81,0x83,0xfd, +0x01,0xf7,0x83,0x00,0x01,0x2b,0x85,0xfd,0x02,0xf9,0x00,0x84,0xfd,0x0d,0xfb,0xf5, +0x00,0x00,0xf5,0xfb,0xfd,0x00,0xf7,0xfd,0xfd,0x00,0xf9,0xe2,0xfd,0xfd,0xf5,0x2b, +0x16,0xfd,0xfd,0x81,0xf5,0x00,0x00,0x2b,0xfd,0x2b,0xf5,0xfd,0xfe,0xf6,0x00,0x00, +0x56,0x00,0x00,0xfd,0x2b,0xf5,0xfe,0x86,0xfd,0x0a,0xfc,0xfa,0xf5,0x00,0x00,0xf5, +0xfa,0xfb,0x00,0x2b,0x83,0xfb,0x04,0xfc,0xfb,0xfc,0xf9,0x83,0x00,0x0a,0xf6,0xfc, +0xfb,0xfc,0x00,0xf8,0xfc,0xfb,0xfb,0xf9,0x83,0x00,0x06,0x2b,0x00,0xac,0xfd,0xfe, +0xf7,0x83,0x00,0x04,0x2b,0xfd,0xfa,0x00,0x83,0xfd,0x18,0x00,0x56,0xfb,0xf9,0x00, +0x00,0xf5,0xf7,0x00,0x2b,0xfc,0xf7,0x00,0x00,0xf5,0xf6,0xf5,0xfc,0xfb,0x81,0xf5, +0x00,0x00,0xf5,0x83,0xfb,0x07,0x00,0x2b,0xfc,0xfb,0xfb,0xf6,0x00,0xa2,0xfc,0xfb, +0x40,0xfb,0xfd,0xf9,0xf5,0xfd,0xfd,0x81,0x81,0xfd,0xfd,0x00,0xf7,0xfd,0x81,0xf5, +0x00,0x00,0xf8,0x00,0xf5,0xfc,0x00,0x56,0xfb,0xfb,0x00,0x2b,0xfc,0xfb,0xf9,0x00, +0x00,0x56,0xfc,0xfa,0xf5,0x00,0x00,0xf5,0xfa,0xfb,0x00,0x2b,0xfb,0xfb,0xf6,0xf5, +0xfb,0xfb,0xfc,0xf6,0x00,0xf7,0x2b,0xf5,0xfd,0xfd,0xf8,0x00,0x00,0x2b,0xf6,0x00, +0xfa,0x83,0xfb,0x07,0xfc,0xf8,0x00,0x2b,0x00,0x00,0xf6,0x83,0xfb,0x0c,0xf7,0x00, +0x00,0xf7,0xf5,0x2b,0xfb,0xfc,0x00,0x56,0xfb,0xf8,0x83,0x00,0x14,0xf7,0xfb,0xfc, +0xf7,0x00,0x00,0xf6,0xf6,0x00,0xfa,0xac,0xf5,0x00,0x00,0xf9,0x00,0xf9,0xfd,0xfd, +0xf8,0x83,0x00,0x01,0xf8,0x87,0xfd,0x09,0xf9,0xf5,0x00,0x00,0xf5,0xfd,0xfd,0x00, +0xf9,0x86,0xfd,0x02,0x00,0x2b,0x83,0x00,0x83,0xfd,0x0e,0xac,0xf5,0x00,0x00,0xf5, +0xac,0xfd,0xfd,0x81,0x00,0x00,0xf9,0xfd,0x81,0x83,0x00,0x01,0x2b,0x83,0xfd,0x07, +0x00,0xf9,0xfd,0xfd,0xf5,0x2b,0xfe,0x85,0xfd,0x0a,0x81,0x00,0x00,0xf5,0xf7,0x00, +0xfd,0xfd,0xac,0xf6,0x83,0x00,0x01,0xf9,0x99,0x05,0xfd,0xa2,0xfe,0xfd,0x8a,0xfd, +0x01,0xfe,0x8c,0xfd,0x01,0xfe,0x88,0xfd,0x01,0xfe,0x86,0xfd,0x01,0xfe,0x85,0xfd, +0x01,0xfe,0x88,0xfd,0xe2,0xfe,0xfd,0xfd,0xfd,0x85,0xfd,0x01,0xfe,0x87,0xfd,0x01, +0xfe,0x8b,0xfd,0x01,0xac,0xa2,0xfb,0xfc,0x83,0xfb,0xc2,0xfc,0xfb,0xfb,0x83,0xfb, +0x03,0xfc,0xfb,0xfb,0xa2,0xfc,0xfb,0xc2,0xfb,0xfc,0xfb,0x05,0xfc,0xfb,0xfc,0xfc, +0xfb,0x8f,0xfd,0x83,0xfb,0x03,0xfc,0xfb,0xfb,0xa2,0xfc,0xfb,0xa3,0xfb,0xfc,0x83, +0xfb,0xc2,0xfc,0xfb,0xfb,0x03,0xfb,0xfc,0xfc,0x83,0xfb,0x02,0xfc,0xfc,0x83,0xfb, +0x02,0xfc,0xfb,0x89,0xfd,0x06,0xfe,0xfd,0xfd,0xfc,0xfb,0xfb,0xa2,0xfc,0xfb,0x02, +0xfb,0xfc,0x83,0xfb,0x02,0xfc,0xfc,0x84,0xfb,0x01,0xfc,0x85,0xfb,0x01,0xfc,0x83, +0xfb,0xe2,0xfc,0xfb,0xfc,0xfb,0x06,0xfd,0xfd,0xfe,0xfd,0xf5,0xf6,0x86,0xfd,0x0c, +0xfe,0xfd,0xfd,0xfb,0xfc,0xfb,0xfb,0x56,0x00,0xfc,0xfb,0xfb,0xa2,0xfc,0xfb,0xa3, +0xfb,0xfc,0xc2,0xfb,0xfb,0xfc,0xc2,0xfb,0xfc,0xfb,0x83,0xfb,0x02,0xfc,0xac,0x8c, +0xfd,0xc2,0xfe,0xfd,0xfd,0x85,0xfd,0x01,0xfe,0x8f,0xfd,0xc2,0xfe,0xfd,0xfd,0x83, +0xfd,0x01,0xfe,0x93,0x01,0xfd,0x01,0xfe,0x8b,0xfd,0xc2,0xfe,0xfd,0xfd,0x83,0xfd, +0x01,0xfe,0x97,0x0b,0xfd,0x01,0xfc,0x84,0xfb,0xa2,0xfc,0xfb,0xc2,0xfb,0xfc,0xfb, +0x83,0xfb,0x01,0xfc,0x85,0xfb,0x01,0xfc,0x89,0xfb,0x02,0xfc,0xac,0x8d,0xfd,0x01, +0xac,0xe2,0xfb,0xfc,0xfb,0xfb,0x03,0xfb,0xfb,0xfc,0x86,0xfb,0xc2,0xfc,0xfb,0xfb, +0x02,0xfb,0xfc,0x84,0xfb,0x01,0xfc,0x84,0xfb,0x01,0xfc,0x83,0xfb,0x8c,0xfd,0xe3, +0xfb,0xfb,0xfc,0xfb,0x84,0xfb,0x01,0xfc,0x83,0xfb,0xa2,0xfc,0xfb,0x03,0xfb,0xfb, +0xfc,0x86,0xfb,0x09,0xfc,0xfb,0xfb,0x00,0xf6,0x2b,0x00,0x00,0xfc,0x88,0xfd,0x09, +0xac,0xfb,0xfb,0xfc,0xfb,0xf8,0x00,0xfb,0xfc,0x84,0xfb,0x01,0xfc,0x86,0xfb,0xe4, +0xfc,0xfb,0xfb,0xfb,0x01,0xfc,0x91,0x11,0xfd,0xc2,0xfb,0xfc,0xfb,0x83,0xfb,0x01, +0xfc,0x85,0xfb,0xa2,0xfc,0xfb,0x03,0xfb,0xfb,0xfc,0x84,0xfb,0xc3,0xfc,0xfb,0xfb, +0x8e,0xfd,0x01,0xfc,0x84,0xfb,0x01,0xfc,0x84,0xfb,0x01,0xfc,0x83,0xfb,0xa2,0xfc, +0xfb,0x02,0xfb,0xfc,0x84,0xfb,0x01,0xfc,0x84,0xfb,0x01,0xfc,0x84,0xfb,0x01,0xfc, +0x83,0xfb,0x01,0xfc,0x8c,0xfd,0x84,0xfb,0x01,0xfc,0x86,0xfb,0xe3,0xfc,0xfb,0xfb, +0xfb,0xe2,0xfb,0xfb,0xfc,0xfb,0x01,0xfc,0x83,0xfb,0x06,0xfc,0x81,0xf7,0x2b,0xf7, +0xac,0x8a,0xfd,0x06,0xfc,0xfb,0xfb,0xfc,0xfa,0xf6,0xe2,0xfc,0xfb,0xfb,0xfb,0xc2, +0xfc,0xfb,0xfb,0x85,0xfb,0x01,0xfc,0x84,0xfb,0x06,0xfc,0xfb,0xfb,0xfc,0xfb,0xfc, +0x91,0x11,0xfd,0xc3,0xfc,0xfb,0xfb,0x03,0xfb,0xfb,0xfc,0x87,0xfb,0xc2,0xfc,0xfb, +0xfb,0xc3,0xfb,0xfb,0xfc,0x01,0xfc,0x8e,0xfd,0xc2,0xfc,0xfb,0xfb,0x02,0xfb,0xfc, +0x84,0xfb,0x01,0xfc,0x87,0xfb,0x01,0xfc,0x84,0xfb,0xa2,0xfc,0xfb,0x03,0xfb,0xfb, +0xfc,0x84,0xfb,0x03,0xfc,0xfb,0xfb,0x8c,0xfd,0xe2,0xfb,0xfc,0xfb,0xfb,0xe2,0xfc, +0xfb,0xfb,0xfb,0xe2,0xfb,0xfc,0xfb,0xfb,0x83,0xfb,0x01,0xfc,0x84,0xfb,0x01,0xfc, +0x83,0xfb,0x04,0xfc,0xfd,0xfd,0xfe,0x8b,0xfd,0xe2,0xfc,0xfb,0xfb,0xfb,0xc2,0xfc, +0xfb,0xfb,0xc2,0xfb,0xfc,0xfb,0xa2,0xfc,0xfb,0x03,0xfb,0xfb,0xfc,0x85,0xfb,0x04, +0xfc,0xfb,0xfb,0xfc,0x91,0x11,0xfd,0x03,0xfb,0xfb,0xfc,0x87,0xfb,0x01,0xfc,0x83, +0xfb,0xa3,0xfc,0xfb,0x85,0xfb,0x01,0xfc,0x88,0xfb,0x01,0xac,0x8e,0xfd,0xe2,0xfb, +0xfc,0xfb,0xfb,0xa2,0xfb,0xfc,0x83,0xfb,0xc2,0xfc,0xfb,0xfb,0x03,0xfb,0xfb,0xfc, +0x86,0xfb,0xe2,0xfc,0xfb,0xfb,0xfb,0x01,0xfb,0x8c,0xfd,0xe2,0xfb,0xfb,0xfb,0xfc, +0x03,0xfb,0xfb,0xfc,0x84,0xfb,0xe2,0xfc,0xfb,0xfb,0xfb,0xa2,0xfc,0xfb,0xc2,0xfb, +0xfc,0xfb,0x84,0xfb,0x8e,0xfd,0x03,0xac,0xfb,0xfc,0x84,0xfb,0x01,0xfc,0x85,0xfb, +0x01,0xfc,0x8a,0xfb,0x01,0xfc,0x83,0xfb,0xa2,0xfc,0xfb,0x04,0xfb,0xfb,0xfc,0xfb, +0x93,0x0f,0xfd,0x02,0xf9,0xfc,0x9c,0x01,0xfd,0x01,0xfc,0x83,0xfb,0xc2,0xfc,0xfb, +0xfb,0x02,0xfb,0xfc,0x87,0xfb,0xa3,0xfc,0xfb,0xa2,0xfb,0xfc,0x06,0xfb,0xfb,0xfc, +0xfb,0xfb,0xac,0x8d,0xfd,0x01,0xac,0xe2,0xfb,0xfb,0xfc,0xfb,0x85,0xfb,0x01,0xfc, +0x83,0xfb,0x03,0xfc,0xfb,0xfb,0xa2,0xfc,0xfb,0xc2,0xfb,0xfc,0xfb,0xc2,0xfb,0xfb, +0xfc,0x83,0xfb,0x8b,0xfd,0x02,0xac,0xfc,0x85,0xfb,0x01,0xfc,0x84,0xfb,0x05,0xfc, +0xfb,0xfc,0xfa,0x56,0xe2,0xfc,0xfb,0xfb,0xfb,0xe2,0xfb,0xfb,0xfc,0xfb,0x02,0xfb, +0xfc,0x83,0xfb,0x8e,0xfd,0x03,0xac,0xfb,0xfb,0xa2,0xfc,0xfb,0x03,0xfb,0xfb,0xfc, +0x84,0xfb,0xa2,0xfc,0xfb,0x02,0xfb,0xfc,0x84,0xfb,0x02,0x81,0x56,0x85,0xfb,0x01, +0xfc,0x83,0xfb,0x01,0xfc,0x96,0x02,0xfd,0x02,0xfa,0xfb,0x95,0x01,0xfd,0x02,0xf9, +0xac,0x94,0x0b,0xfd,0x02,0x00,0xf9,0x9c,0x01,0xfd,0x86,0xfb,0xc2,0xfc,0xfb,0xfb, +0xa3,0xfb,0xfc,0x8b,0xfb,0x06,0xfc,0xfb,0xfb,0xfc,0xfb,0xac,0x8d,0xfd,0x01,0xac, +0xe2,0xfc,0xfb,0xfb,0xfb,0xc2,0xfc,0xfb,0xfb,0xe2,0xfb,0xfc,0xfb,0xfb,0x84,0xfb, +0xc2,0xfc,0xfb,0xfb,0x02,0xfb,0xfc,0x84,0xfb,0x02,0xfc,0xfc,0x8a,0xfd,0x01,0xac, +0xc2,0xfb,0xfc,0xfb,0x83,0xfb,0x01,0xfc,0x84,0xfb,0x02,0xf6,0x00,0xc3,0xfb,0xfb, +0xfc,0x84,0xfb,0x01,0xfc,0x85,0xfb,0x03,0xfc,0xfb,0xac,0x8d,0xfd,0x02,0xac,0xfc, +0x86,0xfb,0xe2,0xfc,0xfb,0xfb,0xfb,0xc2,0xfb,0xfb,0xfc,0x04,0xfb,0xfc,0xf8,0x00, +0xc2,0xfc,0xfb,0xfb,0x04,0xfb,0xfc,0xfb,0xfb,0x96,0x02,0xfd,0x02,0xf5,0x2b,0x95, +0x01,0xfd,0x02,0x00,0xf9,0x94,0x0b,0xfd,0x02,0x00,0xfa,0x93,0x01,0xfd,0x02,0xac, +0xf9,0x87,0xfd,0xa2,0xfc,0xfb,0x84,0xfb,0x01,0xfc,0x89,0xfb,0xe2,0xfc,0xfb,0xfc, +0xfb,0x02,0xfb,0xfc,0x85,0xfb,0x02,0xfc,0xac,0x8e,0xfd,0x85,0xfb,0x01,0xfc,0x83, +0xfb,0xc2,0xfc,0xfb,0xfb,0x02,0xfb,0xfc,0x84,0xfb,0xc2,0xfc,0xfb,0xfb,0x83,0xfb, +0x01,0xfc,0x84,0xfb,0x01,0xfc,0x84,0xfb,0x8a,0xfd,0x84,0xfb,0x01,0xfc,0x83,0xfb, +0x03,0xfc,0xfb,0xfb,0xa2,0xfc,0xfb,0x02,0x2b,0xf5,0x86,0xfb,0xc2,0xfc,0xfb,0xfb, +0x83,0xfb,0xc2,0xfc,0xfb,0xfb,0x8e,0xfd,0x01,0xac,0xc2,0xfb,0xfc,0xfb,0x83,0xfb, +0xc3,0xfc,0xfb,0xfb,0x83,0xfb,0x04,0xfc,0xfb,0x56,0x00,0xc2,0xfb,0xfc,0xfb,0x04, +0xfb,0xfb,0xfc,0xfb,0x96,0x02,0xfd,0x03,0x00,0x2b,0xfe,0x94,0x01,0xfd,0x02,0x00, +0xf9,0x94,0x0b,0xfd,0x02,0x00,0xf9,0x93,0x01,0xfd,0x02,0xf9,0x00,0x87,0xfd,0x84, +0xfb,0xa2,0xfc,0xfb,0x02,0xfb,0xfb,0xa3,0xfc,0xfb,0x02,0xfb,0xfc,0x87,0xfb,0x01, +0xfc,0x85,0xfb,0x03,0xfc,0xfb,0xfb,0x8e,0xfd,0x01,0xac,0xa2,0xfb,0xfc,0x83,0xfb, +0x01,0xfc,0x86,0xfb,0x01,0xfc,0x85,0xfb,0xc2,0xfc,0xfb,0xfb,0xc5,0xfb,0xfb,0xfc, +0x88,0xfd,0xe3,0xfc,0xfb,0xfb,0xfb,0x84,0xfb,0x02,0x2b,0x00,0xc2,0xfb,0xfc,0xfb, +0x86,0xfb,0xa2,0xfc,0xfb,0x02,0xfb,0xfc,0x83,0xfb,0x01,0xac,0x8e,0xfd,0xe2,0xfb, +0xfb,0xfb,0xfc,0xe2,0xfb,0xfb,0xfc,0xfb,0x83,0xfb,0x01,0xfc,0x83,0xfb,0x02,0x56, +0x00,0x86,0xfb,0x01,0xfc,0x83,0xfb,0x96,0x02,0xfd,0x02,0xf5,0x2b,0x95,0x01,0xfd, +0x02,0x00,0xf9,0x97,0x07,0xfd,0x01,0x2b,0x83,0x00,0x03,0xf8,0xfd,0xfd,0xa2,0x2b, +0x00,0x08,0xf9,0xf9,0x00,0xf8,0x00,0x2b,0xfd,0xf5,0x83,0x00,0x0c,0x81,0xfd,0xfd, +0x00,0x2b,0xf6,0x00,0x00,0x81,0xfd,0xfd,0xfc,0x83,0x00,0x0a,0xf6,0x00,0xfd,0xfd, +0x2b,0x00,0xfe,0xfd,0xf9,0x00,0x83,0xfd,0x04,0xf6,0x00,0x00,0xf6,0x87,0xfd,0x09, +0x00,0x56,0x00,0x00,0xf5,0xac,0xfd,0xfd,0xf5,0x83,0x00,0x04,0x81,0xfd,0xfd,0x56, +0x83,0x00,0x02,0x81,0xfd,0x84,0x00,0x02,0xfe,0xfb,0x84,0x00,0x86,0xfb,0x02,0xfc, +0x56,0x83,0x00,0x0d,0xf5,0x00,0xfc,0xfb,0x00,0xf6,0xfc,0xfb,0xf6,0xf5,0xfb,0xfc, +0x81,0x83,0x00,0x02,0xf7,0xfc,0x84,0xfb,0x02,0xfd,0xfa,0x83,0x00,0x0a,0xf8,0xfd, +0xfd,0xac,0xf5,0x00,0x00,0xf8,0xfd,0xac,0x84,0xfb,0x01,0xf8,0x83,0x00,0x03,0xf6, +0xfb,0xfc,0xa2,0xf8,0x00,0x02,0x00,0x00,0x83,0xfb,0x84,0x00,0x11,0xf9,0xfb,0xfc, +0x56,0x00,0x00,0xf5,0x00,0xf6,0xfc,0xfb,0x00,0x2b,0xfb,0xfc,0xf6,0x00,0x83,0xfc, +0x10,0xf5,0x00,0x00,0xf7,0xfb,0xfb,0xfc,0xfb,0xfb,0xf5,0xf5,0xfc,0xfb,0xfb,0xf9, +0x00,0xa2,0xfc,0xfb,0x1a,0xfc,0xf6,0xf5,0xfb,0xfb,0x00,0x2b,0xfb,0xfc,0xf6,0xf5, +0xfb,0xfc,0x81,0xf5,0x00,0x00,0x2b,0xfc,0xfb,0xfb,0xf5,0x00,0x00,0xf6,0x00,0x83, +0xfd,0x01,0xf8,0x83,0x00,0x01,0xf8,0x84,0xfd,0x24,0xac,0xf8,0x00,0x81,0xfb,0xfb, +0x81,0x00,0x81,0xf6,0x00,0xfb,0xfc,0xf8,0x00,0xfb,0xfb,0xfc,0xf5,0x00,0x00,0xf6, +0xfb,0xfc,0xf8,0x00,0xfc,0xf5,0xf5,0xfc,0xfb,0xfb,0xf7,0x00,0xfc,0xf6,0x83,0x00, +0x01,0xf8,0x86,0xfd,0x01,0x56,0x83,0x00,0x01,0xf6,0x87,0xfd,0x02,0x00,0xf7,0x83, +0x00,0x10,0x81,0xfd,0xfd,0x00,0xf9,0xfd,0xfd,0x00,0x2b,0xfe,0xfd,0x00,0xf7,0xfd, +0xfd,0x2b,0x83,0x00,0x03,0xfd,0xfd,0x56,0x83,0x00,0x0e,0xf6,0xfd,0xfd,0xf9,0x00, +0xf8,0x00,0x2b,0xfe,0x00,0xf9,0xfe,0xfd,0xf8,0x83,0x00,0x01,0xf8,0x90,0x07,0xfd, +0x37,0xf9,0xfb,0xfd,0xf7,0x00,0xfd,0xfe,0x2b,0x00,0xf7,0xfd,0xfe,0xf9,0x00,0xf6, +0xac,0xfd,0xfe,0x56,0xfc,0xfe,0xf5,0xf5,0xfd,0xfd,0xf5,0x00,0xf7,0xfe,0xf5,0xf5, +0xfd,0xfd,0xf5,0xf6,0xfc,0xfc,0xf5,0x00,0xfd,0xfd,0x2b,0xf5,0xfd,0xfd,0xf9,0x00, +0xfd,0xfd,0x2b,0x00,0xac,0xac,0x00,0xf8,0x86,0xfd,0x17,0x00,0x00,0xfa,0xac,0x00, +0x2b,0xfd,0xfd,0x56,0xfc,0xfe,0xf5,0x00,0xfd,0xfd,0x00,0xf8,0xfd,0xac,0xfc,0xfd, +0xf9,0x00,0x83,0xfd,0x71,0xfb,0xfa,0xfd,0xac,0x00,0xf6,0xfc,0xfb,0xfb,0xfc,0xfb, +0x81,0x00,0x2b,0xfc,0x56,0x00,0xf5,0xfb,0xfb,0xf5,0x2b,0xfb,0xfb,0x2b,0x00,0xfc, +0xfb,0x00,0xf5,0xfc,0xf9,0x00,0xf9,0xfb,0xfc,0xfb,0xfb,0xfd,0xf5,0xf5,0xfe,0xfd, +0xfb,0xfd,0xfe,0x00,0xf5,0xfd,0x81,0x00,0xfb,0xac,0xfb,0xfb,0xfc,0xfb,0xf9,0xf9, +0xfb,0xf9,0x00,0xf9,0xfb,0x56,0x00,0xf5,0xfa,0x81,0xf5,0xf5,0xfb,0xfc,0xf8,0x81, +0xfb,0xf5,0xf5,0xfb,0x81,0x00,0x2b,0xfb,0x56,0x00,0x2b,0xfb,0xfb,0xf5,0xf6,0xfb, +0xfb,0x2b,0x00,0xfb,0xfc,0x00,0xf5,0xfb,0xf9,0x00,0xfa,0xfb,0xfb,0xfc,0xfb,0xf9, +0x00,0xf9,0xfb,0xfc,0xf5,0x2b,0x85,0xfb,0x22,0x2b,0x00,0xfb,0xfc,0xf5,0xf6,0xfb, +0xfb,0x2b,0x00,0xfb,0xfc,0x00,0xf5,0xfb,0xf9,0x00,0xfa,0xfb,0x2b,0x00,0xfa,0xfb, +0xf5,0x00,0xfd,0xfd,0xf9,0x00,0xf9,0xfe,0xf9,0x00,0xf9,0x84,0xfd,0x27,0xfb,0x00, +0xf7,0xfb,0xfc,0x2b,0xf5,0xfb,0x2b,0x00,0xfc,0xfb,0x56,0x00,0xfb,0xfc,0xf6,0x00, +0x81,0x81,0x00,0xf7,0xfb,0x56,0x00,0xfb,0xf9,0x00,0xf9,0xfb,0xfc,0xf5,0xf7,0xfb, +0x56,0xfb,0xfe,0xf7,0x00,0x86,0xfd,0x06,0xfa,0x81,0xfd,0x81,0x00,0xfa,0x86,0xfd, +0x15,0x00,0xf5,0xf9,0xfe,0xf6,0x00,0xfd,0xfd,0x00,0xf9,0xfd,0xfe,0xf5,0x2b,0xfd, +0xfd,0xf5,0x2b,0xfd,0xfa,0x00,0xa2,0xfb,0xfd,0x18,0xfd,0xfa,0x81,0xfd,0x81,0x00, +0x81,0xfd,0xfa,0x00,0xf6,0xac,0xfd,0xfd,0x00,0xf9,0xfd,0xf9,0x00,0xf9,0xfe,0xf9, +0x00,0xf9,0x9f,0x06,0xfd,0x0f,0xfe,0xfd,0xfd,0xf9,0x00,0xfd,0xfd,0x2b,0x00,0xfe, +0xfd,0xfd,0xf9,0x00,0xac,0x86,0xfd,0x23,0x2b,0xf5,0xfd,0xfd,0xf5,0xf6,0xfe,0xfd, +0x2b,0xf5,0xfd,0xfb,0x00,0xfb,0xfe,0xfd,0xf9,0x00,0xfd,0xfd,0x2b,0xf5,0xfd,0xfd, +0xf9,0x00,0xfd,0xfd,0x00,0x2b,0xf9,0xf9,0xf6,0xf5,0xfe,0x85,0xfd,0x07,0x00,0x56, +0xfd,0xfd,0xf5,0x2b,0xfe,0x84,0xfd,0x07,0x2b,0xf5,0xfe,0xfd,0x00,0xf5,0x81,0x83, +0xfd,0x02,0xf9,0x00,0x87,0xfd,0x26,0xf5,0x2b,0xfb,0xfb,0xfc,0xfb,0xfb,0xf7,0x00, +0xfc,0xfb,0xfb,0x2b,0x00,0xfc,0xfb,0x00,0x2b,0xfb,0xfc,0xf6,0xf5,0xfb,0xfa,0x00, +0xf8,0xf8,0x56,0x00,0x2b,0xfc,0xfb,0xfb,0xfc,0xac,0xf6,0xf5,0x56,0x83,0xfd,0x08, +0xfb,0x00,0x56,0xf9,0xf9,0x00,0xf7,0xac,0x86,0xfb,0x2c,0xfc,0xfb,0x00,0x56,0xfb, +0x56,0x00,0xfc,0xfb,0xfb,0xf8,0x00,0xfb,0xfb,0xfc,0xfb,0xfc,0xf6,0x00,0xfc,0x2b, +0x00,0xfc,0xfb,0xfb,0x00,0x2b,0xfb,0xfc,0x00,0x2b,0xfc,0xfb,0xf6,0xf5,0xfc,0xf9, +0x00,0xf8,0x56,0xf8,0x00,0x2b,0xfc,0x83,0xfb,0x59,0xfc,0x00,0x2b,0xfb,0x81,0x00, +0x81,0xfc,0xfb,0xfb,0xfc,0xfb,0xf6,0xf5,0xfc,0xfb,0x00,0x2b,0xfc,0xfb,0xf6,0xf5, +0xfc,0xf9,0x00,0xf8,0x56,0xf8,0x00,0x2b,0xfc,0x00,0xf7,0xfb,0xfb,0xf9,0x00,0xfd, +0xfd,0xf6,0xf5,0xfe,0xfd,0xfd,0xf5,0xf5,0xfe,0xfd,0xfd,0xac,0xfb,0xf6,0x00,0xfc, +0xfb,0x00,0xf9,0xfc,0xf6,0xf5,0xfb,0xfb,0x56,0x00,0xfb,0xfb,0x00,0x2b,0x56,0x56, +0xf5,0xf5,0xfb,0x56,0x00,0xfb,0xfc,0x00,0x2b,0xfb,0xfa,0x00,0xfc,0xfb,0xfb,0xfd, +0xfd,0xf9,0x00,0x86,0xfd,0x01,0xfe,0x83,0xfd,0x02,0x00,0xfa,0x86,0xfd,0x17,0x00, +0xf9,0xfe,0xfd,0xac,0x00,0xfa,0xfe,0x00,0xf9,0xfd,0xfd,0x00,0xf7,0xfd,0xfd,0xf5, +0x2b,0xfe,0xfb,0x00,0x2b,0xfb,0x87,0xfd,0x06,0x00,0xf9,0xfd,0xf9,0x00,0xac,0x83, +0xfd,0x0b,0x00,0xf9,0xfd,0xf6,0xf5,0xfe,0xfd,0xfd,0xf5,0xf5,0xfe,0x9e,0x06,0xfd, +0x02,0xf9,0xf5,0x83,0x00,0x04,0xfd,0xfd,0x2b,0xf5,0x83,0xfd,0x02,0xf9,0x00,0x84, +0xfd,0x10,0xf7,0x00,0xf5,0x00,0x00,0xfd,0xfd,0xf5,0x2b,0xfd,0xfd,0xf7,0x00,0xfd, +0xf9,0x00,0x83,0xfd,0x0e,0xf9,0x00,0xfd,0xfd,0xf7,0x00,0xfd,0xfd,0xf9,0x00,0xfd, +0xfe,0x00,0x00,0x83,0xf5,0x01,0xf6,0x86,0xfd,0x17,0x00,0xf9,0xfd,0xfd,0xf5,0x2b, +0xfd,0xfd,0xf7,0x00,0x00,0xf5,0x00,0xfd,0xfd,0xfb,0xf6,0x00,0x00,0xfc,0xfd,0xfa, +0x00,0x83,0xfd,0x08,0xac,0xf6,0x00,0xf5,0x00,0xf6,0xfb,0xfc,0x83,0xfb,0x02,0x2b, +0x00,0x83,0xfb,0x12,0x2b,0x00,0xfb,0xfc,0xf5,0xf6,0xfc,0xfb,0x2b,0x00,0xfb,0x56, +0x00,0x00,0xf5,0x00,0xf5,0xf7,0x84,0xfb,0x09,0xac,0xac,0x2b,0x00,0x00,0xfa,0xfd, +0xf9,0x00,0x84,0xf5,0x02,0xf8,0xac,0xa2,0xfb,0xfc,0x02,0xfa,0xf5,0x83,0x00,0x04, +0xf8,0xfb,0x56,0x00,0x83,0xfb,0x05,0x56,0x00,0xfb,0xfc,0x2b,0x83,0x00,0x13,0xf5, +0xfb,0x2b,0x00,0xfb,0xfb,0xfc,0xf5,0xf6,0xfc,0xfb,0xf5,0xf6,0xfb,0xfc,0x2b,0x00, +0xfb,0x56,0xa2,0x00,0xf5,0x02,0xf5,0xf7,0x83,0xfb,0x07,0xfc,0xfb,0x2b,0x00,0xfc, +0x2b,0xf5,0xc2,0xfc,0xfb,0xfb,0x0c,0x2b,0x00,0xfb,0xfb,0xf5,0x2b,0xfb,0xfb,0x2b, +0x00,0xfb,0x56,0xa2,0x00,0xf5,0x12,0xf5,0xf7,0xfb,0x00,0xf8,0xfc,0xfb,0xf9,0x00, +0xfd,0xfd,0xf5,0x2b,0xfd,0xfd,0xfe,0x2b,0xf5,0x83,0xfd,0x25,0xac,0xfc,0xf9,0x00, +0xf9,0xf8,0xf5,0xfb,0xfc,0xf6,0xf5,0xfb,0xfb,0x56,0x00,0xfb,0xfc,0x00,0x00,0xf5, +0x00,0xf5,0xf5,0xfc,0xf8,0x00,0xfc,0xfb,0x2b,0x00,0xfb,0xf6,0xf6,0xfb,0xfb,0x56, +0xf5,0x83,0x00,0x86,0xfd,0x02,0xfb,0xf5,0x83,0x00,0x01,0xf9,0x86,0xfd,0x02,0x00, +0xf9,0x83,0xfd,0xc2,0x00,0xf9,0xfd,0xe2,0xfd,0xf5,0x2b,0xfd,0x09,0xfd,0xf9,0xf5, +0x00,0xf6,0xfd,0xfd,0xfb,0xf5,0x83,0x00,0x04,0xf9,0xfd,0xf9,0x00,0x84,0xfd,0x0a, +0x00,0xfa,0xfd,0xf5,0x2b,0xfd,0xfd,0xfe,0x2b,0xf5,0x9e,0x06,0xfd,0x0a,0xfb,0x00, +0x81,0xfd,0xfa,0x00,0xfd,0xfd,0x2b,0xf5,0x83,0xfd,0x02,0xf9,0x00,0x83,0xfd,0x22, +0xf8,0x00,0xac,0xfd,0x2b,0xf5,0xfe,0xfd,0x00,0xf7,0xfd,0xfd,0x2b,0xf5,0xfd,0x81, +0x00,0xac,0xfd,0xfd,0xf9,0x00,0xfe,0xfd,0x2b,0xf5,0xfd,0xfd,0x56,0x00,0xfd,0xfd, +0x00,0xf7,0xc2,0xfe,0xfd,0xfd,0x84,0xfd,0x0d,0x00,0xf9,0xfe,0xfd,0x00,0xf7,0xfd, +0xf8,0x00,0xac,0xfe,0x2b,0xf5,0x84,0xfd,0x06,0xfb,0x00,0xf5,0xfe,0xf9,0x00,0x83, +0xfd,0x06,0xf5,0xf5,0xfe,0xfd,0x00,0x2b,0x83,0xfb,0x15,0xfc,0xfb,0x2b,0x00,0xfc, +0xfb,0xfc,0xf6,0xf5,0xfb,0xfb,0x00,0x2b,0xfb,0xfb,0xf6,0xf5,0xfc,0xf9,0x00,0xf9, +0xe2,0xfc,0xfb,0xfc,0xfb,0x83,0xfd,0x07,0xac,0xf5,0x00,0xfe,0xfb,0x00,0x81,0x84, +0xfd,0x01,0xac,0x84,0xfb,0x19,0x00,0x2b,0xfc,0xfc,0x00,0x56,0xfb,0x56,0x00,0xfb, +0xfc,0xfb,0xf7,0x00,0xfc,0x2b,0x00,0xfb,0xfc,0xf6,0xf5,0xfb,0x2b,0x00,0xfc,0xe2, +0xfb,0xfb,0x00,0x2b,0x08,0xfb,0xfb,0xf6,0xf5,0xfb,0xfa,0x00,0xf9,0xc2,0xfc,0xfb, +0xfb,0x83,0xfb,0x05,0x81,0x00,0xf8,0x00,0xf9,0x84,0xfb,0x1f,0xfc,0xfb,0xf6,0xf5, +0xfc,0xfb,0x00,0x2b,0xfb,0xfc,0xf5,0xf5,0xfc,0xf9,0x00,0xf9,0xfc,0xfb,0xfb,0xfc, +0xfb,0x00,0xf7,0xfb,0xfc,0xf8,0x00,0xfd,0xfd,0xf5,0xf6,0x83,0xfd,0x02,0xf6,0xf5, +0x84,0xfd,0x19,0xfb,0xfb,0x00,0x2b,0x00,0xf7,0xfc,0xfb,0xf6,0xf5,0xfc,0xfb,0xf7, +0x00,0xfc,0xfb,0x00,0x2b,0xfc,0xfc,0xfb,0xfc,0xfb,0x56,0x00,0x83,0xfb,0x0b,0x00, +0xf8,0x00,0xfa,0xfc,0xfa,0x00,0x81,0xfd,0xfa,0x00,0x86,0xfd,0x06,0x00,0xf7,0xfe, +0xfd,0x00,0xf9,0x86,0xfd,0x17,0x00,0xf9,0xfd,0xfd,0xfb,0x00,0xfb,0xfd,0x00,0xfa, +0xfd,0xfd,0x00,0x2b,0xfe,0xfd,0x00,0x2b,0xfd,0xfd,0xfe,0xfd,0xf9,0xc2,0x00,0xf7, +0xfe,0x06,0xfe,0x00,0xf9,0xfe,0xf9,0x00,0x84,0xfd,0x05,0x00,0xf9,0xfd,0xf5,0xf6, +0x83,0xfd,0x02,0xf6,0xf5,0x9e,0x06,0xfd,0x0a,0x81,0x00,0xfa,0xac,0xf6,0x00,0xac, +0xfd,0xf7,0x00,0x83,0xfd,0x02,0xfa,0x00,0x83,0xfd,0x26,0xf7,0x00,0xac,0xfb,0xf5, +0x00,0xfd,0xfd,0xf5,0x2b,0xfd,0xfd,0x2b,0xf5,0xfd,0xfd,0x00,0xf6,0xfd,0xf9,0xf5, +0x00,0xfd,0xfd,0xf7,0x00,0xac,0xfa,0x00,0x00,0xfd,0xfd,0xf7,0x00,0xf9,0xfd,0xfc, +0xf9,0x86,0xfd,0x3c,0x00,0xf9,0xfd,0xfd,0xf5,0x2b,0xfd,0xf7,0x00,0xfc,0xfb,0xf5, +0x00,0xfd,0xfd,0x56,0xac,0xac,0xf5,0xf6,0xfd,0xfa,0x00,0x81,0xfd,0xfd,0xf6,0xf5, +0xfd,0xf9,0x00,0xf6,0xfb,0xfc,0xfb,0xfb,0xfc,0xfa,0x00,0xf8,0xfb,0xf7,0x00,0x00, +0xfb,0xfc,0xf5,0x00,0xfc,0x2b,0x00,0x00,0xfb,0xfc,0xf5,0x00,0xf9,0xfc,0xf9,0xf9, +0x84,0xfb,0x40,0xac,0xf9,0xfb,0xfd,0xf7,0x00,0xfd,0xfd,0xf5,0x00,0xfb,0xfd,0xfb, +0x81,0xac,0xfc,0xfb,0xfb,0xfc,0x00,0x2b,0xfc,0x2b,0x00,0xf8,0xfb,0x56,0x00,0xf6, +0x81,0xfa,0x00,0xf6,0xfb,0x2b,0x00,0x81,0xfa,0xf5,0x00,0xfb,0xfb,0x00,0xf6,0x56, +0xf5,0x00,0xf6,0xfb,0xfb,0xf6,0x00,0xfc,0x2b,0x00,0x00,0xfb,0xfc,0xf5,0x00,0xf9, +0xfc,0xfa,0x56,0xa3,0xfb,0xfc,0x03,0xf5,0x00,0x00,0xc2,0xfc,0xfb,0xfb,0x23,0xfc, +0x2b,0x00,0xfb,0xfc,0xf5,0x00,0xfc,0x2b,0x00,0x00,0xfb,0xfc,0xf5,0x00,0xf9,0xfc, +0xfa,0x56,0xfb,0x2b,0x00,0xf8,0xf7,0x00,0x00,0xfd,0xfd,0xfa,0x00,0xf9,0xfd,0xf9, +0x00,0xfa,0x83,0xfd,0x29,0xac,0xfb,0xfc,0x2b,0x00,0x00,0xfc,0xfb,0xfb,0x2b,0x00, +0x81,0x56,0x00,0x00,0xfb,0xfb,0xf7,0x00,0xf8,0xfb,0x81,0xf8,0xfb,0x56,0x00,0xfb, +0xfc,0xfb,0xf5,0x00,0x00,0xfc,0xfb,0x56,0x00,0xfa,0xac,0xf6,0x00,0xac,0x85,0xfd, +0x06,0x00,0xf7,0xfd,0xf8,0x00,0x56,0x86,0xfd,0x23,0x00,0xf5,0xfb,0xac,0xf6,0x00, +0xfd,0xfd,0x00,0x2b,0xac,0xf6,0x00,0x2b,0xfd,0xfd,0xf5,0x2b,0xfd,0xfb,0xf9,0xfd, +0xfc,0x00,0xf8,0xfd,0x00,0xf7,0xfd,0xf8,0x00,0x56,0xfd,0xf9,0x00,0x84,0xfd,0x0e, +0x00,0xf9,0xfd,0xfa,0x00,0xf9,0xfd,0xf9,0x00,0xfa,0xfd,0xfb,0x2b,0xfb,0x9b,0x06, +0xfd,0x09,0xf6,0x00,0x00,0x56,0x00,0x00,0xfd,0x2b,0xf5,0x83,0xfd,0x02,0xf9,0x00, +0x83,0xfd,0x01,0xac,0x83,0x00,0x0d,0xf9,0x00,0xf5,0xfd,0xf5,0x2b,0xfd,0xfd,0x2b, +0xf5,0xfd,0xfd,0x81,0x83,0x00,0x05,0xf7,0x00,0xfd,0xfd,0xac,0x83,0x00,0x06,0xf9, +0x00,0xfd,0xfd,0xfe,0xf7,0x83,0x00,0x01,0x2b,0x86,0xfd,0x23,0x00,0xf9,0xfd,0xfd, +0xf5,0x2b,0xfe,0xfc,0xf5,0x00,0x00,0x56,0x00,0xf5,0xfd,0xf5,0x00,0x00,0xf5,0xac, +0xfd,0xfd,0xf6,0x00,0x00,0xfe,0xfa,0x00,0x00,0xf6,0xf7,0x00,0x2b,0xfb,0xfc,0x83, +0xfb,0x0e,0xf7,0x00,0x00,0xf5,0xf6,0xf5,0xfc,0xfb,0xf9,0x00,0x00,0xf6,0x2b,0xf5, +0x83,0xfb,0x01,0xf6,0x83,0x00,0x01,0xf8,0xa2,0xfb,0xfc,0x02,0xac,0x2b,0x83,0x00, +0x05,0x81,0xfd,0xfd,0xac,0xf6,0x83,0x00,0x2f,0x56,0xfd,0xfb,0xfb,0xfc,0xfb,0xf7, +0x00,0x00,0x2b,0xf6,0x00,0xf9,0xf8,0x00,0x2b,0x00,0x00,0xf6,0xfc,0xfb,0x81,0xf5, +0x00,0x00,0xf8,0x00,0xf5,0xfb,0xf9,0xf5,0x00,0xf8,0xf5,0x2b,0xfc,0xfb,0xf9,0x00, +0x00,0xf6,0x2b,0xf5,0xfb,0xfb,0x81,0xf6,0x83,0x00,0x02,0xf8,0xfc,0x83,0xfb,0x07, +0xfc,0xfb,0x56,0x00,0xf7,0xfb,0xfc,0x85,0xfb,0x0e,0xf6,0xf5,0xfc,0xfb,0xf9,0x00, +0x00,0xf6,0x2b,0xf5,0xfb,0xfb,0x81,0xf6,0x83,0x00,0x08,0xf8,0xfc,0xfb,0x2b,0x00, +0xf5,0x56,0x00,0x83,0xfd,0x01,0xf8,0x83,0x00,0x01,0xf8,0x85,0xfd,0x09,0xfb,0xfb, +0x81,0x00,0xf6,0xfc,0xfb,0xfc,0x81,0x83,0x00,0x06,0xf8,0x00,0xfc,0xfb,0xfb,0x2b, +0x83,0x00,0x04,0x2b,0xfc,0xf8,0x00,0x83,0xfb,0x0c,0xf8,0x00,0x56,0xfb,0xfc,0xfb, +0xf6,0x00,0x00,0x56,0x00,0x00,0x85,0xfd,0x07,0xf8,0x00,0x00,0x2b,0x2b,0x00,0x81, +0x85,0xfd,0x02,0x00,0xf6,0x83,0x00,0x83,0xfd,0x0c,0xf8,0x00,0x00,0xf8,0xf5,0x2b, +0xfd,0xfd,0xf5,0x2b,0xfe,0xfb,0x83,0x00,0x0c,0xf6,0xfe,0xfd,0xf8,0x00,0x00,0x2b, +0xf6,0x00,0x81,0xf9,0x00,0x84,0xfd,0x05,0x00,0xf9,0xfe,0xfd,0xf8,0x83,0x00,0x06, +0xf8,0xfd,0xfd,0xf9,0x00,0xf9,0x9b,0x06,0xfd,0x01,0xfe,0x83,0xfd,0xc2,0xfe,0xfd, +0xfd,0x8a,0xfd,0xa2,0xfe,0xfd,0x85,0xfd,0x01,0xfe,0x86,0xfd,0x03,0xfe,0xfa,0x00, +0x85,0xfd,0x01,0xfe,0x88,0xfd,0x01,0xfe,0x88,0xfd,0x01,0xfe,0x86,0xfd,0x03,0xfe, +0xfd,0xfd,0xa2,0xfe,0xfd,0x83,0xfd,0x01,0xfe,0x85,0xfd,0x01,0xfe,0x83,0xfd,0x02, +0xfe,0xfd,0xa2,0xfc,0xfb,0x83,0xfb,0xa2,0xfc,0xfb,0x03,0xfc,0x2b,0x00,0x83,0xfb, +0xa2,0xfc,0xfb,0x02,0xfb,0xfc,0x83,0xfb,0xa3,0xfc,0xfb,0x04,0xfb,0xfb,0xac,0xfe, +0x87,0xfd,0x01,0xfe,0x85,0xfd,0x85,0xfb,0xc2,0xfc,0xfb,0xfb,0x05,0x56,0x00,0xfc, +0xfb,0xfb,0xa3,0xfc,0xfb,0xe2,0xfb,0xfc,0xfb,0xfc,0x03,0x81,0x00,0xf8,0x83,0xfb, +0xa2,0xfc,0xfb,0xc2,0xfb,0xfb,0xfc,0xe2,0xfb,0xfc,0xfb,0xfb,0x04,0xfb,0xfa,0x00, +0x81,0x84,0xfb,0xa3,0xfc,0xfb,0xc3,0xfb,0xfb,0xfc,0xa3,0xfb,0xfc,0x83,0xfb,0x04, +0xfc,0xfc,0x2b,0x00,0x86,0xfd,0x01,0xfe,0x85,0xfd,0x03,0xac,0xfb,0xfb,0xa2,0xfc, +0xfb,0x83,0xfb,0xa3,0xfc,0xfb,0x04,0xfb,0xfb,0xfc,0xfc,0xc2,0xfb,0xfc,0xfb,0x03, +0xfc,0xfb,0xfb,0xa2,0xfc,0xfb,0x03,0xfb,0xfb,0xfc,0x83,0xfd,0x01,0xfe,0x92,0x01, +0xfd,0x05,0x00,0xfa,0xfd,0xfd,0xfe,0x84,0xfd,0xe2,0xfe,0xfd,0xfd,0xfd,0x86,0xfd, +0x01,0xfe,0x86,0xfd,0xc2,0xfe,0xfd,0xfd,0x88,0xfd,0x01,0xfe,0x85,0xfd,0x01,0xfe, +0x92,0x09,0xfd,0x02,0xf9,0x00,0x94,0x03,0xfd,0xc2,0xfc,0xfb,0xfb,0x86,0xfb,0x04, +0x2b,0xf5,0xfb,0xfc,0x84,0xfb,0xe2,0xfc,0xfb,0xfb,0xfb,0x84,0xfb,0x03,0xfc,0xfb, +0xfc,0x8d,0xfd,0x01,0xac,0xc2,0xfc,0xfb,0xfb,0x83,0xfb,0x06,0xfc,0xfb,0x56,0x00, +0xfb,0xfc,0x86,0xfb,0x01,0xfc,0x85,0xfb,0x06,0x00,0xf6,0xf6,0x00,0x00,0xfc,0x86, +0xfb,0xe2,0xfc,0xfb,0xfb,0xfb,0x02,0xfb,0xfc,0x84,0xfb,0x04,0xfc,0xfb,0xf5,0xf6, +0xc2,0xfc,0xfb,0xfb,0xe2,0xfb,0xfb,0xfc,0xfb,0xc2,0xfb,0xfc,0xfb,0x84,0xfb,0x03, +0xfc,0xfb,0xfb,0xa2,0xf6,0xf5,0x02,0x00,0xf9,0x8c,0xfd,0x01,0xac,0x83,0xfb,0xc2, +0xfc,0xfb,0xfb,0x02,0xfb,0xfc,0x84,0xfb,0xe2,0xfc,0xfb,0xfb,0xfb,0x83,0xfb,0x01, +0xfc,0x84,0xfb,0x04,0xfc,0xfb,0xfb,0xfc,0x96,0x01,0xfd,0x02,0x00,0xf9,0x93,0x0c, +0xfd,0x02,0xfb,0x2b,0x94,0x03,0xfd,0x02,0xfb,0xfc,0x84,0xfb,0xa3,0xfc,0xfb,0x03, +0x56,0x2b,0xfc,0x83,0xfb,0xc2,0xfc,0xfb,0xfb,0x02,0xfb,0xfc,0x83,0xfb,0xa2,0xfc, +0xfb,0x02,0xfb,0xfc,0x8e,0xfd,0x03,0xfb,0xfb,0xfc,0x84,0xfb,0x01,0xfc,0x83,0xfb, +0x02,0xfa,0xf6,0xe3,0xfb,0xfb,0xfc,0xfb,0x07,0xfb,0xfc,0xf9,0xf6,0x2b,0xf7,0xfc, +0x83,0xfb,0xa2,0xfc,0xfb,0x03,0xfb,0xfb,0xfc,0x84,0xfb,0xe2,0xfc,0xfb,0xfb,0xfb, +0x03,0xfc,0x2b,0xfa,0xe4,0xfb,0xfb,0xfc,0xfb,0x86,0xfb,0x01,0xfc,0x85,0xfb,0x04, +0xf7,0xf6,0x2b,0xfb,0x8d,0xfd,0xa2,0xfc,0xfb,0x86,0xfb,0x01,0xfc,0x83,0xfb,0xc2, +0xfc,0xfb,0xfb,0x83,0xfb,0xc2,0xfc,0xfb,0xfb,0x02,0xfb,0xfc,0x84,0xfb,0x01,0xfc, +0x96,0x01,0xfd,0x02,0x2b,0xfb,0x99,0x0f,0xfd,0xa3,0xfc,0xfb,0x85,0xfb,0xa2,0xfc, +0xfb,0x02,0xfb,0xfc,0x85,0xfb,0x01,0xfc,0x84,0xfb,0x01,0xfc,0x85,0xfb,0x02,0xfc, +0xfc,0x8e,0xfd,0x01,0xac,0x83,0xfb,0xa2,0xfc,0xfb,0xc2,0xfb,0xfc,0xfb,0xe5,0xfb, +0xfb,0xfc,0xfb,0x84,0xfb,0x01,0xfc,0x83,0xfb,0xc2,0xfc,0xfb,0xfb,0xe2,0xfb,0xfb, +0xfb,0xfc,0xc2,0xfb,0xfb,0xfc,0x86,0xfb,0x01,0xfc,0x84,0xfb,0xc2,0xfc,0xfb,0xfb, +0xc2,0xfb,0xfc,0xfb,0x03,0xfc,0xfb,0xac,0x8e,0xfd,0x01,0xfc,0x83,0xfb,0xa2,0xfc, +0xfb,0xc2,0xfb,0xfc,0xfb,0x84,0xfb,0xc3,0xfc,0xfb,0xfb,0xc2,0xfb,0xfc,0xfb,0x03, +0xfc,0xfb,0xfc,0x96,0x01,0xfd,0x01,0xfe,0x9a,0x0f,0xfd,0x01,0xac,0x86,0xfb,0xc2, +0xfc,0xfb,0xfb,0x87,0xfb,0xc3,0xfc,0xfb,0xfb,0x02,0xfb,0xfc,0x84,0xfb,0x8e,0xfd, +0x04,0xac,0xfb,0xfb,0xfc,0x86,0xfb,0xc2,0xfc,0xfb,0xfb,0xe2,0xfb,0xfb,0xfc,0xfb, +0xe3,0xfb,0xfb,0xfb,0xfc,0xe2,0xfb,0xfc,0xfb,0xfb,0x83,0xfb,0xa2,0xfc,0xfb,0xe2, +0xfb,0xfb,0xfc,0xfb,0x85,0xfb,0xa2,0xfc,0xfb,0x84,0xfb,0xc3,0xfc,0xfb,0xfb,0x02, +0xfb,0xfc,0x84,0xfb,0x01,0xac,0x8e,0xfd,0x83,0xfb,0x01,0xfc,0x84,0xfb,0x01,0xfc, +0x87,0xfb,0xe2,0xfc,0xfb,0xfb,0xfb,0x83,0xfb,0x01,0xfc,0x87,0xfb,0x01,0xfc,0x91, +0x11,0xfd,0xa2,0xfc,0xfb,0xe2,0xfb,0xfc,0xfb,0xfb,0x03,0xfc,0xfb,0xfb,0xa2,0xfc, +0xfb,0x86,0xfb,0xc2,0xfc,0xfb,0xfb,0x05,0xfb,0xfc,0xfb,0xfb,0xac,0x8e,0xfd,0x84, +0xfb,0xc2,0xfc,0xfb,0xfb,0x83,0xfb,0xc2,0xfc,0xfb,0xfb,0xe4,0xfb,0xfc,0xfb,0xfb, +0xe2,0xfb,0xfb,0xfb,0xfc,0x86,0xfb,0xe2,0xfc,0xfb,0xfb,0xfb,0xc2,0xfb,0xfc,0xfb, +0x83,0xfb,0xa2,0xfc,0xfb,0x83,0xfb,0x01,0xfc,0x84,0xfb,0x01,0xfc,0x84,0xfb,0x03, +0xfc,0xfb,0xfb,0x8e,0xfd,0x01,0xac,0xe2,0xfb,0xfc,0xfb,0xfb,0xc2,0xfb,0xfb,0xfc, +0xe2,0xfb,0xfc,0xfb,0xfb,0xc2,0xfc,0xfb,0xfb,0xc2,0xfb,0xfc,0xfb,0x02,0xfc,0xfc, +0x91,0x11,0xfd,0x04,0xac,0xfb,0xfb,0xfc,0x84,0xfb,0x01,0xfc,0x85,0xfb,0x01,0xfc, +0x84,0xfb,0xa2,0xfc,0xfb,0x02,0xfb,0xfc,0x86,0xfb,0x01,0xfc,0x83,0xfb,0x01,0xac, +0x8d,0xfd,0x01,0xac,0xc3,0xfc,0xfb,0xfb,0xc2,0xfb,0xfb,0xfc,0x84,0xfb,0xe3,0xfc, +0xfb,0xfb,0xfb,0xc3,0xfc,0xfb,0xfb,0x02,0xfb,0xfc,0x83,0xfb,0xa2,0xfc,0xfb,0x03, +0xfb,0xfb,0xfc,0x84,0xfb,0xe3,0xfc,0xfb,0xfb,0xfb,0x83,0xfb,0x01,0xfc,0x84,0xfb, +0xe2,0xfc,0xfb,0xfb,0xfb,0x03,0xfb,0xfc,0xac,0x8d,0xfd,0x02,0xac,0xfc,0x85,0xfb, +0xc3,0xfc,0xfb,0xfb,0x83,0xfb,0x01,0xfc,0x84,0xfb,0x01,0xfc,0x84,0xfb,0xc2,0xfc, +0xfb,0xfb,0x02,0xfb,0xac,0x91,0x11,0xfd,0x01,0xac,0x83,0xfb,0xc3,0xfc,0xfb,0xfb, +0x03,0xfb,0xfb,0xfc,0x86,0xfb,0x01,0xfc,0x83,0xfb,0xc2,0xfc,0xfb,0xfb,0x04,0xfb, +0xfc,0xfb,0xac,0x8e,0xfd,0x01,0xac,0x87,0xfb,0x01,0xfc,0x86,0xfb,0xc2,0xfc,0xfb, +0xfb,0xe2,0xfb,0xfc,0xfb,0xfb,0x01,0xfc,0x85,0xfb,0xe3,0xfc,0xfb,0xfb,0xfb,0xe2, +0xfb,0xfc,0xfb,0xfb,0x01,0xfc,0x83,0xfb,0xc4,0xfc,0xfb,0xfb,0xc2,0xfb,0xfc,0xfb, +0xc2,0xfb,0xfb,0xfc,0x03,0xfb,0xfb,0xac,0x8e,0xfd,0x03,0xfc,0xfb,0xfb,0xa2,0xfc, +0xfb,0x85,0xfb,0xc3,0xfc,0xfb,0xfb,0xe2,0xfb,0xfc,0xfb,0xfb,0x85,0xfb,0x03,0xfc, +0xfb,0xac,0x91,0x11,0xfd,0x01,0xac,0xe2,0xfb,0xfc,0xfb,0xfb,0x84,0xfb,0x01,0xfc, +0x84,0xfb,0xa2,0xfc,0xfb,0x02,0xfb,0xfc,0x84,0xfb,0x01,0xfc,0x83,0xfb,0x04,0xfc, +0xfb,0xfb,0xfc,0x8f,0xfd,0xc2,0xfb,0xfc,0xfb,0x01,0xfc,0x83,0xfb,0xa2,0xfc,0xfb, +0x85,0xfb,0x01,0xfc,0x84,0xfb,0x01,0xfc,0x87,0xfb,0xe2,0xfc,0xfb,0xfb,0xfb,0xe5, +0xfb,0xfc,0xfb,0xfb,0x86,0xfb,0xc2,0xfc,0xfb,0xfb,0x84,0xfb,0xe2,0xfc,0xfb,0xfb, +0xfb,0x8f,0xfd,0xa2,0xfc,0xfb,0x84,0xfb,0xa2,0xfc,0xfb,0x03,0xfb,0xfb,0xfc,0x86, +0xfb,0x01,0xfc,0x83,0xfb,0xc2,0xfc,0xfb,0xfb,0xa2,0xfc,0xfb,0x02,0xfb,0xac,0x92, +0x11,0xfd,0xe2,0xfb,0xfb,0xfb,0xfc,0xc2,0xfb,0xfb,0xfc,0x02,0xfb,0xfc,0x87,0xfb, +0xc3,0xfc,0xfb,0xfb,0x03,0xfb,0xfc,0xfb,0x8f,0xfd,0x03,0xfb,0xfb,0xfc,0x84,0xfb, +0x01,0xfc,0x83,0xfb,0x03,0xfc,0xfb,0xfb,0xa2,0xfc,0xfb,0x03,0xfb,0xfb,0xfc,0x85, +0xfb,0xc2,0xfc,0xfb,0xfb,0xe2,0xfb,0xfc,0xfb,0xfb,0xc2,0xfc,0xfb,0xfb,0x02,0xfb, +0xfc,0x87,0xfb,0x01,0xfc,0x83,0xfb,0xc2,0xfc,0xfb,0xfb,0x83,0xfb,0x03,0xfc,0xfb, +0xfb,0xa2,0xfc,0xfb,0x03,0xfb,0xfb,0xfc,0x84,0xfb,0x02,0xfc,0xfb,0x8f,0xfd,0xe2, +0xfc,0xfb,0xfb,0xfb,0x83,0xfb,0x01,0xfc,0x84,0xfb,0xc2,0xfc,0xfb,0xfb,0x02,0xfb, +0xfc,0x85,0xfb,0x01,0xfc,0x85,0xfb,0x01,0xfc,0x93,0x11,0xfd,0xa2,0xfc,0xfb,0x84, +0xfb,0xc2,0xfc,0xfb,0xfb,0x84,0xfb,0xc2,0xfc,0xfb,0xfb,0x02,0xfb,0xfc,0x84,0xfb, +0x01,0xfc,0x84,0xfb,0x8f,0xfd,0x01,0xac,0x84,0xfb,0xe3,0xfc,0xfb,0xfb,0xfb,0x02, +0xfb,0xfc,0x84,0xfb,0xa2,0xfc,0xfb,0xc2,0xfb,0xfc,0xfb,0xe2,0xfb,0xfb,0xfc,0xfb, +0x85,0xfb,0x01,0xfc,0x84,0xfb,0xa2,0xfc,0xfb,0x02,0xfb,0xfc,0x84,0xfb,0x03,0xfc, +0xfb,0xfb,0xa2,0xfc,0xfb,0x03,0xfb,0xfb,0xfc,0x84,0xfb,0xe2,0xfc,0xfb,0xfb,0xfb, +0x01,0xac,0x8f,0xfd,0xe2,0xfb,0xfb,0xfb,0xfc,0x02,0xfb,0xfc,0x85,0xfb,0xc2,0xfc, +0xfb,0xfb,0x02,0xfb,0xfc,0x84,0xfb,0x01,0xfc,0x83,0xfb,0x05,0xfc,0xfb,0xfb,0xfc, +0xfb,0x93,0x11,0xfd,0x01,0xfc,0x83,0xfb,0xa2,0xfc,0xfb,0x84,0xfb,0xc2,0xfc,0xfb, +0xfb,0xe2,0xfb,0xfc,0xfb,0xfb,0xe2,0xfb,0xfb,0xfc,0xfb,0x02,0xfb,0xac,0x8f,0xfd, +0xe2,0xfb,0xfb,0xfc,0xfb,0x86,0xfb,0x01,0xfc,0x84,0xfb,0x01,0xfc,0x86,0xfb,0x01, +0xfc,0x84,0xfb,0x01,0xfc,0x85,0xfb,0xc3,0xfc,0xfb,0xfb,0x02,0xfb,0xfc,0x85,0xfb, +0xe2,0xfc,0xfb,0xfb,0xfb,0x85,0xfb,0x01,0xfc,0x84,0xfb,0x01,0xfc,0x87,0xfb,0x03, +0xfc,0xfb,0xfb,0x8f,0xfd,0x01,0xac,0xc2,0xfb,0xfb,0xfc,0x84,0xfb,0xa2,0xfc,0xfb, +0x83,0xfb,0x01,0xfc,0x86,0xfb,0xe2,0xfc,0xfb,0xfb,0xfb,0x04,0xfc,0xfb,0xfb,0xfc, +0x93,0x11,0xfd,0x04,0xac,0xfb,0xfb,0xfc,0x86,0xfb,0x01,0xfc,0x83,0xfb,0xc2,0xfc, +0xfb,0xfb,0x03,0xfb,0xfb,0xfc,0x84,0xfb,0xe2,0xfc,0xfb,0xfb,0xfb,0x01,0xfb,0x8f, +0xfd,0x01,0xac,0xe2,0xfc,0xfb,0xfb,0xfb,0xa3,0xfc,0xfb,0x83,0xfb,0xc3,0xfc,0xfb, +0xfb,0xe2,0xfb,0xfb,0xfc,0xfb,0x02,0xfb,0xfc,0x84,0xfb,0xe2,0xfc,0xfb,0xfb,0xfb, +0xa2,0xfc,0xfb,0x83,0xfb,0xc2,0xfc,0xfb,0xfb,0xa2,0xfc,0xfb,0x84,0xfb,0x01,0xfc, +0x84,0xfb,0xa2,0xfc,0xfb,0x03,0xfb,0xfb,0xac,0x8f,0xfd,0xe3,0xfb,0xfc,0xfb,0xfb, +0xc2,0xfb,0xfb,0xfc,0x84,0xfb,0xa2,0xfc,0xfb,0xe2,0xfb,0xfc,0xfb,0xfb,0x03,0xfb, +0xfb,0xac,0x93,0x11,0xfd,0x01,0xac,0x84,0xfb,0xa2,0xfc,0xfb,0x03,0xfb,0xfb,0xfc, +0x84,0xfb,0xc2,0xfc,0xfb,0xfb,0x03,0xfb,0xfb,0xfc,0x84,0xfb,0x01,0xfc,0x83,0xfb, +0x03,0xfc,0xfb,0xfb,0x8f,0xfd,0x01,0xac,0xe2,0xfb,0xfb,0xfc,0xfb,0x85,0xfb,0xa2, +0xfc,0xfb,0x84,0xfb,0xc3,0xfc,0xfb,0xfb,0x83,0xfb,0x01,0xfc,0x84,0xfb,0xe3,0xfc, +0xfb,0xfb,0xfb,0x02,0xfb,0xfc,0x87,0xfb,0x01,0xfc,0x84,0xfb,0xa2,0xfc,0xfb,0xe2, +0xfb,0xfb,0xfb,0xfc,0x04,0xfb,0xfb,0xfc,0xfb,0x8f,0xfd,0x04,0xac,0xfb,0xfb,0xfc, +0x84,0xfb,0xe4,0xfc,0xfb,0xfb,0xfb,0x83,0xfb,0x01,0xfc,0x85,0xfb,0x01,0xfc,0x83, +0xfb,0x01,0xac,0x93,0x11,0xfd,0x01,0xac,0xc2,0xfc,0xfb,0xfb,0x03,0xfb,0xfb,0xfc, +0x84,0xfb,0x01,0xfc,0x85,0xfb,0xc2,0xfc,0xfb,0xfb,0xa2,0xfc,0xfb,0x84,0xfb,0x03, +0xfc,0xfb,0xac,0x8f,0xfd,0x02,0xfc,0xfc,0x86,0xfb,0xc2,0xfc,0xfb,0xfb,0x84,0xfb, +0xa2,0xfc,0xfb,0x87,0xfb,0xa2,0xfc,0xfb,0x84,0xfb,0xe4,0xfc,0xfb,0xfb,0xfb,0xa3, +0xfb,0xfc,0x83,0xfb,0x01,0xfc,0x85,0xfb,0xa2,0xfc,0xfb,0x02,0xfb,0xfc,0x85,0xfb, +0x01,0xfc,0x8f,0xfd,0x02,0xac,0xfc,0x85,0xfb,0xe2,0xfc,0xfb,0xfb,0xfb,0x03,0xfb, +0xfb,0xfc,0x84,0xfb,0xc2,0xfc,0xfb,0xfb,0xa2,0xfb,0xfc,0x84,0xfb,0x02,0xfc,0xac, +0x94,0x11,0xfd,0xc2,0xfb,0xfb,0xfc,0x83,0xfb,0xc2,0xfc,0xfb,0xfb,0xe2,0xfb,0xfc, +0xfb,0xfb,0x86,0xfb,0xa2,0xfc,0xfb,0x03,0xfb,0xfb,0xfc,0x90,0x01,0xfd,0x02,0xfb, +0xfb,0xa2,0xfc,0xfb,0xe2,0xfb,0xfb,0xfc,0xfb,0x02,0xfb,0xfc,0x85,0xfb,0xc2,0xfc, +0xfb,0xfb,0x84,0xfb,0xa2,0xfc,0xfb,0xe4,0xfb,0xfb,0xfb,0xfc,0x88,0xfb,0xe2,0xfc, +0xfb,0xfb,0xfb,0x84,0xfb,0x03,0xfc,0xfb,0xfb,0xa2,0xfc,0xfb,0x90,0x01,0xfd,0x03, +0xfc,0xfb,0xfb,0xa2,0xfc,0xfb,0xe2,0xfb,0xfb,0xfc,0xfb,0x83,0xfb,0xc3,0xfc,0xfb, +0xfb,0x85,0xfb,0xa2,0xfc,0xfb,0x01,0xfb,0x95,0x11,0xfd,0x02,0xfc,0xfc,0x85,0xfb, +0xc2,0xfc,0xfb,0xfb,0xc2,0xfb,0xfb,0xfc,0x84,0xfb,0xa2,0xfc,0xfb,0x02,0xfb,0xfc, +0x85,0xfb,0x03,0xfc,0xfb,0xac,0x8f,0xfd,0x02,0xac,0xfc,0x84,0xfb,0x01,0xfc,0x84, +0xfb,0xc2,0xfc,0xfb,0xfb,0x83,0xfb,0xc2,0xfc,0xfb,0xfb,0xc2,0xfb,0xfc,0xfb,0x84, +0xfb,0x01,0xfc,0x84,0xfb,0x01,0xfc,0x87,0xfb,0x01,0xfc,0x83,0xfb,0x03,0xfc,0xfb, +0xfb,0xa2,0xfc,0xfb,0xe2,0xfb,0xfb,0xfc,0xfb,0xa2,0xfc,0xfb,0x85,0xfb,0x01,0xac, +0x8f,0xfd,0x04,0xac,0xfb,0xfb,0xfc,0x85,0xfb,0x01,0xfc,0x85,0xfb,0xa2,0xfc,0xfb, +0x03,0xfb,0xfb,0xfc,0x85,0xfb,0xa2,0xfc,0xfb,0x02,0xfb,0xfc,0x84,0xfb,0x01,0xfc, +0x95,0x11,0xfd,0xc2,0xfc,0xfb,0xfb,0x85,0xfb,0xc2,0xfc,0xfb,0xfb,0x03,0xfb,0xfb, +0xfc,0x86,0xfb,0xe2,0xfc,0xfb,0xfb,0xfb,0x03,0xfb,0xfc,0xfc,0x90,0x01,0xfd,0x83, +0xfb,0x01,0xfc,0x84,0xfb,0x01,0xfc,0x85,0xfb,0xa2,0xfc,0xfb,0x83,0xfb,0xc3,0xfc, +0xfb,0xfb,0xa2,0xfb,0xfc,0x03,0xfb,0xfb,0xfc,0x85,0xfb,0xa2,0xfc,0xfb,0xc3,0xfb, +0xfc,0xfb,0x84,0xfb,0xe2,0xfc,0xfb,0xfb,0xfb,0x84,0xfb,0x05,0xfc,0xfb,0xfb,0xfc, +0xfb,0x90,0x01,0xfd,0x02,0xfc,0xfc,0x83,0xfb,0x01,0xfc,0x84,0xfb,0xa2,0xfc,0xfb, +0x84,0xfb,0x01,0xfc,0x84,0xfb,0x01,0xfc,0x86,0xfb,0x01,0xfc,0x83,0xfb,0x04,0xfc, +0xfb,0xfb,0xac,0x95,0x11,0xfd,0x01,0xac,0xe2,0xfb,0xfc,0xfb,0xfb,0x01,0xfc,0x87, +0xfb,0x01,0xfc,0x84,0xfb,0xa2,0xfc,0xfb,0xe2,0xfb,0xfb,0xfb,0xfc,0x83,0xfb,0x90, +0x01,0xfd,0x03,0xac,0xfb,0xfc,0x84,0xfb,0x01,0xfc,0x84,0xfb,0x01,0xfc,0x86,0xfb, +0xe2,0xfc,0xfb,0xfb,0xfb,0x83,0xfb,0x01,0xfc,0x87,0xfb,0xa2,0xfc,0xfb,0x02,0xfb, +0xfc,0x85,0xfb,0x01,0xfc,0x88,0xfb,0xe2,0xfc,0xfb,0xfb,0xfb,0xc3,0xfc,0xfb,0xfb, +0x01,0xac,0x90,0x01,0xfd,0x84,0xfb,0xe2,0xfc,0xfb,0xfb,0xfb,0xc2,0xfb,0xfc,0xfb, +0x84,0xfb,0x01,0xfc,0x83,0xfb,0xc2,0xfc,0xfb,0xfb,0x02,0xfb,0xfc,0x83,0xfb,0x01, +0xac,0x96,0x11,0xfd,0x83,0xfb,0xc2,0xfc,0xfb,0xfb,0xa2,0xfc,0xfb,0xe2,0xfb,0xfc, +0xfb,0xfb,0x83,0xfb,0xc3,0xfc,0xfb,0xfb,0x04,0xfb,0xfc,0xfb,0xac,0x90,0x01,0xfd, +0xc2,0xfc,0xfb,0xfb,0xa2,0xfb,0xfc,0x02,0xfb,0xfb,0xa3,0xfc,0xfb,0x02,0xfb,0xfc, +0x84,0xfb,0xc2,0xfc,0xfb,0xfb,0xa2,0xfb,0xfc,0x8a,0xfb,0x01,0xfc,0x85,0xfb,0x03, +0xfc,0xfb,0xfb,0xa2,0xfc,0xfb,0xe3,0xfb,0xfb,0xfc,0xfb,0x83,0xfb,0x02,0xfc,0xfc, +0x90,0x01,0xfd,0x01,0xac,0xe2,0xfb,0xfb,0xfc,0xfb,0x02,0xfb,0xfc,0x84,0xfb,0x03, +0xfc,0xfb,0xfb,0xa2,0xfc,0xfb,0xe4,0xfb,0xfb,0xfc,0xfb,0x97,0x10,0xfd,0x02,0xac, +0xf9,0x8e,0xfd,0xa2,0xfc,0xfb,0x83,0xfb,0x01,0xfc,0x85,0xfb,0x01,0xfc,0x83,0xfb, +0xc3,0xfc,0xfb,0xfb,0xe2,0xfb,0xfb,0xfb,0xfc,0x83,0xfb,0x91,0x01,0xfd,0x83,0xfb, +0xa2,0xfc,0xfb,0x8f,0xfb,0xc2,0xfc,0xfb,0xfb,0x02,0xfb,0xfc,0x85,0xfb,0xa2,0xfc, +0xfb,0xa2,0xfb,0xfc,0x83,0xfb,0xa2,0xfc,0xfb,0x02,0xfb,0xfc,0x85,0xfb,0x01,0xfc, +0x86,0xfb,0x01,0xfc,0x84,0xfb,0x04,0xfc,0xfb,0xfb,0xac,0x90,0x01,0xfd,0xa2,0xfc, +0xfb,0x84,0xfb,0x01,0xfc,0x84,0xfb,0x01,0xfc,0x88,0xfb,0x01,0xfc,0x84,0xfb,0x03, +0xfc,0x56,0xfa,0xe2,0xfb,0xfb,0xfb,0xfc,0x9d,0x02,0xfd,0x02,0xf9,0xfc,0x99,0x08, +0xfd,0x01,0xfb,0x83,0xf5,0x05,0x56,0xfd,0xfd,0xf6,0xf8,0x9a,0x01,0xfd,0x04,0xfb, +0xfb,0xfd,0xf9,0x98,0x02,0xfd,0x02,0xf9,0x00,0x8e,0xfd,0x01,0xac,0x83,0xfb,0x01, +0xfc,0x84,0xfb,0x06,0x56,0xf5,0x00,0xf5,0x2b,0xfa,0x84,0xfb,0xc3,0xfc,0xfb,0xfb, +0xe2,0xfb,0xfb,0xfb,0xfc,0x01,0xfb,0x91,0x01,0xfd,0x01,0xac,0x86,0xfb,0xa2,0xfc, +0xfb,0xa3,0xfb,0xfc,0x03,0xfb,0xfb,0xfc,0x84,0xfb,0xc2,0xfc,0xfb,0xfb,0xa2,0xfb, +0xfc,0x84,0xfb,0x01,0xfc,0x84,0xfb,0x01,0xfc,0x86,0xfb,0xc2,0xfc,0xfb,0xfb,0x02, +0xfb,0xfb,0xa2,0xfc,0xfb,0x83,0xfb,0xc2,0xfc,0xfb,0xfb,0x01,0xac,0x90,0x01,0xfd, +0x01,0xac,0x83,0xfb,0xa2,0xfc,0xfb,0x06,0xfb,0xfb,0xfc,0xfb,0xf9,0x00,0xa2,0xfb, +0xfc,0x03,0x2b,0xf6,0xfc,0x83,0xfb,0xa2,0xfc,0xfb,0x04,0xfb,0x00,0x2b,0xfc,0x83, +0xfb,0x04,0xfc,0xfb,0xfb,0xac,0x92,0x01,0xfd,0x09,0xfc,0x2b,0x00,0xf5,0xf7,0xfd, +0xfd,0xf8,0xf6,0x92,0x01,0xfd,0x02,0x00,0xf9,0x99,0x08,0xfd,0x09,0x00,0xf6,0xfa, +0xf8,0x2b,0xfd,0xfe,0xf8,0xf9,0x9a,0x01,0xfd,0x04,0xf8,0xf7,0xfe,0x00,0x98,0x02, +0xfd,0x02,0xf9,0x00,0x8e,0xfd,0x04,0xac,0xfb,0xfb,0xfc,0x83,0xfb,0x08,0xfc,0xf6, +0x00,0x2b,0x56,0xf8,0xf6,0xf5,0xa2,0xfb,0xfc,0x85,0xfb,0x01,0xfc,0x83,0xfb,0xa2, +0xfc,0xfb,0x03,0xfb,0xfb,0xfc,0x83,0xfb,0x01,0xac,0x8e,0xfd,0x03,0xfa,0xfb,0xac, +0xa3,0xfc,0xfb,0x83,0xfb,0x01,0xfc,0x86,0xfb,0x03,0xfc,0xfb,0xfb,0xa2,0xfc,0xfb, +0x85,0xfb,0x01,0xfc,0x85,0xfb,0x01,0xfc,0x84,0xfb,0x01,0xfc,0x83,0xfb,0xc2,0xfc, +0xfb,0xfb,0x06,0xfb,0xfc,0xfb,0xfb,0x56,0x81,0x86,0xfb,0x01,0xfc,0x86,0xfb,0x04, +0xac,0xfe,0xfa,0xfb,0x8e,0xfd,0x03,0xfb,0xfb,0xfc,0x84,0xfb,0x01,0xfc,0x83,0xfb, +0x04,0xfc,0xf8,0x00,0xfc,0x83,0xfb,0x05,0x2b,0x00,0xfb,0xfb,0xfc,0x84,0xfb,0x0c, +0xfc,0xfb,0xf5,0xf6,0xfb,0xfc,0x56,0xfa,0xfb,0xfc,0xfb,0xac,0x92,0x01,0xfd,0x09, +0xf6,0x00,0xfa,0x56,0xf6,0xfd,0xfd,0xf9,0xf8,0x92,0x01,0xfd,0x02,0x00,0xf9,0x98, +0x08,0xfd,0x05,0xf9,0x00,0xfd,0xfd,0xfe,0x90,0x02,0xfd,0x01,0xfe,0x9a,0x02,0xfd, +0x02,0xf9,0x00,0x8f,0xfd,0x01,0xfc,0x83,0xfb,0x05,0xfc,0xfb,0x56,0x00,0xf9,0xc2, +0xfb,0xfc,0xfb,0x84,0xfb,0x01,0xfc,0x85,0xfb,0x01,0xfc,0x85,0xfb,0x01,0xfc,0x83, +0xfb,0x03,0xfc,0xfb,0xac,0x8e,0xfd,0x04,0x00,0x2b,0xfd,0xac,0x85,0xfb,0x01,0xfc, +0x83,0xfb,0xc2,0xfc,0xfb,0xfb,0x87,0xfb,0xc2,0xfc,0xfb,0xfb,0xc3,0xfb,0xfc,0xfb, +0xe2,0xfb,0xfb,0xfc,0xfb,0x02,0xfb,0xfc,0x84,0xfb,0x03,0xfc,0x00,0x56,0xa3,0xfb, +0xfc,0x02,0xfb,0xfb,0xa2,0xfc,0xfb,0x05,0xac,0xfd,0xfd,0x00,0x2b,0x8d,0xfd,0x01, +0xac,0x83,0xfb,0x01,0xfc,0x84,0xfb,0x01,0xfc,0x83,0xfb,0x08,0x56,0x00,0xfb,0xfb, +0xfc,0xfb,0xf6,0xf5,0xe2,0xfc,0xfb,0xfb,0xfb,0x0a,0xfc,0x00,0x2b,0xfb,0xfb,0xf5, +0xf6,0xfb,0xfb,0xfc,0x93,0x01,0xfd,0x02,0x00,0xfa,0x85,0xfd,0x01,0xfe,0x93,0x01, +0xfd,0x02,0x00,0xfa,0x98,0x08,0xfd,0x03,0xfb,0x00,0xf6,0x85,0xfd,0x05,0xf5,0x2b, +0xfd,0xfd,0x81,0x83,0x00,0x01,0xfa,0x85,0xfd,0x16,0xfe,0x00,0x2b,0xf5,0x00,0x00, +0xfa,0xf8,0x00,0x00,0xf8,0xfd,0xfe,0x2b,0x00,0xfd,0xfd,0xf9,0x00,0xfd,0xfd,0x81, +0x83,0x00,0x04,0xf8,0xfd,0xfd,0x56,0x83,0x00,0x04,0xfa,0xfd,0xfd,0xf8,0x83,0x00, +0x05,0xfc,0xfd,0xfd,0x00,0x56,0x83,0x00,0x01,0xfc,0x86,0xfd,0x01,0xac,0x83,0x00, +0x02,0xf6,0x00,0x83,0xfd,0x0e,0xf6,0x00,0x00,0xf6,0xfd,0xfd,0xf9,0x00,0x81,0x00, +0x00,0xf6,0xfc,0xfc,0x83,0xfb,0x03,0xfc,0xf6,0xf5,0x83,0xfb,0x06,0xfc,0xfb,0xfb, +0xfc,0xfb,0xf7,0x83,0x00,0x0a,0x2b,0xfc,0xfb,0x56,0x00,0xf7,0x00,0x00,0xf6,0x81, +0x83,0x00,0x05,0x81,0xfc,0xfb,0x00,0x2b,0x83,0x00,0x0b,0xfa,0xfd,0xfe,0x00,0x2b, +0xfd,0xfd,0x2b,0x00,0xfd,0x2b,0x83,0x00,0x03,0xf9,0xfc,0xf9,0x83,0x00,0x16,0x56, +0xfb,0xfc,0x00,0xf6,0x00,0x00,0xfb,0xfb,0xfc,0xfb,0xfb,0x56,0x00,0xf9,0x00,0x00, +0xf5,0xfb,0xfc,0xfb,0xf7,0x83,0x00,0x0c,0x81,0xfb,0xfb,0x00,0x56,0xfb,0xfb,0xf5, +0xf6,0xfb,0xfb,0xfc,0x83,0xfb,0x01,0xf8,0x83,0x00,0x02,0x56,0xfc,0x84,0x00,0x02, +0xfb,0xfa,0x84,0x00,0x03,0x81,0xfb,0xfb,0xc2,0x00,0x2b,0x00,0x05,0x00,0x00,0xf9, +0xfd,0xfa,0x83,0x00,0x09,0xfa,0xfd,0xfe,0x00,0x2b,0xf6,0x00,0x00,0xf9,0x84,0xfb, +0x01,0xfc,0x83,0xfb,0x04,0xfc,0xfb,0x56,0x00,0x83,0xfb,0x06,0xfc,0x2b,0x00,0xfb, +0xfc,0xf6,0x83,0x00,0x07,0xf7,0xfb,0xfb,0xf5,0xf6,0xfc,0xf6,0x83,0x00,0x03,0x56, +0xfc,0x81,0x83,0x00,0x09,0xfa,0xfd,0xfd,0xf5,0x2b,0xf6,0x00,0x00,0xfa,0x85,0xfd, +0x04,0xfe,0x00,0x00,0xac,0x83,0xfd,0x0a,0xfe,0x2b,0x00,0xfd,0xfd,0xac,0xf5,0x00, +0x00,0xf8,0x87,0xfd,0x09,0xf6,0x00,0x00,0x2b,0x00,0xf9,0xfd,0xfd,0xf8,0x83,0x00, +0x05,0xfc,0xfd,0xfd,0x00,0x56,0x83,0x00,0x01,0xfc,0x9a,0x07,0xfd,0x04,0xf8,0x00, +0x00,0xf9,0x83,0xfd,0x0a,0x00,0xf7,0xfd,0xfc,0x00,0xf8,0xfd,0xf7,0x00,0xfe,0x85, +0xfd,0x2f,0x00,0x00,0xf7,0xfe,0xf5,0x00,0xf6,0xac,0xf7,0x00,0xfd,0xfd,0x2b,0xf5, +0xfd,0xfd,0xf9,0x00,0xfd,0xfd,0xf5,0xf6,0xfd,0xfd,0xfb,0xfd,0xfd,0x00,0xf8,0xfd, +0xac,0xac,0xfd,0xf9,0x00,0x81,0xfd,0xf5,0xf6,0xfd,0xfd,0x00,0x00,0xfa,0xac,0x00, +0xf7,0x86,0xfd,0x18,0x00,0xf6,0xac,0xfb,0xf5,0x00,0xfe,0xfd,0x2b,0x00,0xac,0xac, +0x00,0xf8,0xfd,0xf9,0x00,0x00,0xfc,0xfa,0x00,0xfa,0xfb,0xfc,0x83,0xfb,0x03,0x00, +0x2b,0xfc,0x83,0xfb,0x03,0xfc,0xfb,0xfb,0xe2,0x56,0x00,0x56,0xfb,0x2d,0x56,0x00, +0xf5,0xfa,0xf9,0x00,0x00,0x56,0x81,0x00,0x2b,0xfb,0xfb,0x00,0xf5,0xf9,0xfd,0xf6, +0x00,0xfd,0xfd,0xf5,0x2b,0xfd,0xfd,0x2b,0xf5,0xfe,0xfd,0xf5,0x2b,0xfe,0xfd,0xfc, +0x00,0xf7,0xfc,0x2b,0x00,0xfc,0xfb,0x00,0x00,0x56,0xfc,0x83,0xfb,0x18,0xfc,0xfb, +0x56,0x00,0x00,0xfa,0xf9,0x00,0xf9,0xfb,0xf8,0x00,0xf9,0xfc,0xf5,0xf5,0xfc,0xfb, +0x00,0xf8,0xfc,0xfb,0x00,0x2b,0xc2,0xfb,0xfc,0xfb,0x2a,0x00,0xf7,0xfb,0x81,0x81, +0xfb,0xfc,0x00,0x56,0xfb,0xfb,0xfa,0x56,0xfb,0x81,0x00,0x2b,0xfc,0xfb,0x00,0xf5, +0xfb,0xfe,0xfd,0xf5,0x2b,0xfe,0xfd,0xac,0x00,0xf8,0xfd,0xf7,0x00,0xfd,0xfd,0xf5, +0x00,0xf7,0xfb,0xf5,0xf5,0xc2,0xfc,0xfb,0xfb,0x83,0xfb,0x05,0xfc,0xf8,0x00,0x00, +0xf5,0x83,0x00,0x21,0xf5,0xfb,0xfb,0x56,0xfa,0xfb,0x2b,0x00,0xfc,0xfb,0x00,0x2b, +0xfb,0xfc,0x00,0x2b,0xfb,0xfb,0xfc,0x00,0xf8,0xfd,0xf7,0x00,0xfe,0xfd,0x00,0x00, +0xf7,0xfe,0xf5,0xf5,0xfe,0x85,0xfd,0x04,0x81,0x00,0x00,0xf8,0x83,0xfd,0x0a,0x2b, +0xf5,0xfd,0xfd,0x00,0xf6,0xfd,0x81,0x00,0xfb,0x85,0xfd,0x16,0x2b,0x00,0xfc,0xfc, +0x2b,0x00,0xf9,0xfe,0xf9,0x00,0x81,0xfd,0xf5,0xf6,0xfd,0xfd,0x00,0x00,0xfa,0xac, +0x00,0xf7,0x9a,0x07,0xfd,0x11,0xfe,0xfb,0xf5,0x00,0xf8,0xfd,0xfe,0xf5,0xf6,0xfe, +0xf8,0x00,0xf9,0xf9,0xfa,0x00,0xfa,0x85,0xfd,0x17,0xf5,0xf6,0xfe,0xfd,0x2b,0x00, +0xfd,0xfd,0xf9,0x00,0xfd,0xfd,0x2b,0xf5,0xfd,0xfd,0xfa,0x00,0xfd,0xfd,0xf6,0x00, +0x56,0x83,0xfd,0x04,0xfe,0x00,0xf5,0xfa,0x83,0xfd,0x0e,0xf6,0xf5,0xf9,0xfa,0xf7, +0x00,0xfd,0xfd,0x00,0x56,0xfd,0xfd,0xf5,0x2b,0x85,0xfd,0x18,0xfb,0x00,0xfb,0xfd, +0xfd,0xfa,0x00,0xfd,0xfd,0x00,0x2b,0xf9,0xf9,0xf6,0xf5,0xfe,0xf9,0x00,0xac,0xfd, +0xfe,0x00,0xf9,0xfc,0x83,0xfb,0x06,0xfc,0xf5,0xf6,0xfb,0xfb,0xfc,0x83,0xfb,0x34, +0xfc,0xf5,0xf5,0xfb,0xfc,0xfb,0xf6,0x00,0xfb,0x56,0x00,0x81,0xfb,0xfb,0x00,0xf8, +0xfb,0xfc,0xf5,0xf6,0xfb,0xfc,0x00,0x56,0xfb,0xfd,0xac,0x00,0xfa,0xfd,0xf5,0x2b, +0xfd,0xfd,0xf7,0x00,0xfd,0xfd,0xf5,0x2b,0xfd,0xfd,0xf8,0x00,0xf9,0xf8,0x56,0x00, +0x56,0xfb,0xf5,0xf6,0xc2,0xfc,0xfb,0xfb,0x18,0xfc,0xf8,0x00,0x81,0xfc,0xfb,0x00, +0x56,0xfb,0xf6,0xf5,0x56,0xf8,0x2b,0x00,0xfb,0xfb,0x00,0x56,0xfb,0xfc,0xf5,0xf6, +0xfc,0x84,0xfb,0x04,0xfc,0x00,0xf5,0x56,0xa2,0xfc,0xfb,0x02,0x00,0xf8,0xa2,0xfc, +0xfb,0x08,0xfb,0xfc,0x00,0x2b,0xfb,0xac,0x00,0x56,0x83,0xfd,0x14,0xf5,0x2b,0xfd, +0xfd,0xf8,0x00,0xf9,0xf9,0xfa,0x00,0xfa,0xfd,0xf5,0xf6,0xfb,0xfc,0x2b,0x00,0xfb, +0xfc,0x83,0xfb,0xa2,0xfc,0xfb,0x03,0xfb,0x56,0x00,0x84,0x56,0x21,0xf6,0x00,0xfc, +0xfb,0xfb,0xfc,0xfb,0x56,0x00,0xfb,0xfc,0x00,0x2b,0xfb,0xfb,0xf5,0xf6,0xfb,0xfc, +0xf8,0x00,0xf9,0xf9,0xfa,0x00,0xfa,0xfd,0xf5,0x2b,0xfd,0xfd,0xf7,0x00,0x87,0xfd, +0x10,0xac,0xf6,0x00,0xf5,0xfd,0xfd,0x2b,0xf5,0xfd,0xfb,0x00,0x56,0xf9,0xf9,0x00, +0xf7,0x84,0xfd,0x17,0xfe,0x00,0xf8,0xfd,0xfd,0xfe,0x00,0xf9,0xfd,0xf6,0xf5,0xf9, +0xfa,0xf7,0x00,0xfd,0xfd,0x00,0x56,0xfd,0xfd,0xf5,0x2b,0x9d,0x07,0xfd,0x0e,0xf7, +0x00,0xfb,0xfd,0x00,0xf7,0xfd,0x2b,0x00,0xf5,0xf5,0x00,0xf5,0x81,0x85,0xfd,0x2f, +0xf5,0x2b,0xfd,0xfd,0x2b,0xf5,0xfe,0xfd,0xf9,0x00,0xfd,0xfd,0x2b,0xf5,0xfd,0xfd, +0xf9,0x00,0xfd,0xfd,0xac,0x2b,0x00,0x00,0x81,0xfd,0xfd,0xfb,0xf6,0x00,0x00,0xac, +0xfd,0x00,0x00,0xf5,0x00,0xf5,0xf5,0xfd,0xfd,0x00,0xf9,0xfe,0xfd,0x00,0xf7,0x85, +0xfd,0x02,0xf9,0x00,0x83,0xfd,0x04,0xf9,0x00,0xfd,0xfd,0xa2,0x00,0xf5,0x05,0x00, +0x2b,0xfd,0xf9,0x00,0x83,0xfd,0x0a,0x00,0xf9,0xac,0xfb,0xfc,0xfb,0xfb,0xf5,0xf6, +0xfc,0x86,0xfb,0x35,0xf5,0xf6,0xfc,0xfb,0xfb,0x2b,0xf5,0xfb,0x56,0x00,0xfb,0xfc, +0xfb,0x00,0x56,0xfb,0xfb,0x00,0x2b,0xfc,0xfb,0x00,0x56,0xfb,0xfc,0xfd,0x00,0xfa, +0xfd,0x00,0xf7,0xfd,0xfd,0x2b,0xf5,0xfd,0xfd,0x00,0xf7,0xfd,0xfd,0x2b,0x00,0x00, +0xf5,0xf5,0x00,0xf9,0xfc,0x00,0xf6,0xfb,0xfc,0x85,0xfb,0x08,0x56,0x00,0xfb,0xfb, +0xfc,0x00,0xf8,0xfc,0x83,0x00,0x0e,0xf5,0xf5,0x00,0xfc,0xfb,0x00,0x56,0xfb,0xfb, +0x00,0x2b,0xfb,0xfb,0xfc,0x83,0xfb,0x0d,0xfa,0xf5,0x00,0x00,0x81,0xfb,0xfb,0x00, +0x56,0xfb,0xfb,0x81,0xf6,0x83,0x00,0x05,0x2b,0xfc,0xfd,0x00,0xf9,0x83,0xfd,0x15, +0x00,0xf7,0xfd,0xfd,0x2b,0x00,0xf5,0xf5,0x00,0xf5,0x81,0xfd,0x00,0x2b,0xfc,0xfb, +0xf6,0xf5,0xfb,0xfb,0xfc,0x85,0xfb,0x0d,0xfc,0xfb,0xf8,0x00,0xfc,0xfb,0xfb,0xfc, +0xf6,0xf5,0xfb,0xfc,0xf8,0x84,0x00,0x18,0xfb,0xfb,0xf5,0xf6,0xfc,0xfb,0x00,0x2b, +0xfc,0xfc,0x2b,0x00,0xf5,0xf5,0x00,0xf5,0x81,0xfd,0x00,0x2b,0xfe,0xfd,0x2b,0xf5, +0x88,0xfd,0x0a,0xfe,0xfa,0x00,0xf8,0xfd,0x2b,0xf5,0xfd,0xf9,0x00,0x84,0xf5,0x01, +0xf8,0x85,0xfd,0x02,0x00,0xf9,0x83,0xfd,0x11,0x00,0xf9,0xfd,0x00,0x00,0xf5,0x00, +0xf5,0xf5,0xfd,0xfd,0x00,0xfa,0xfd,0xfd,0x00,0xf7,0x9e,0x07,0xfd,0x0b,0x00,0xf9, +0xfd,0xf5,0x2b,0xfd,0xf8,0x00,0xfd,0xfd,0xfe,0x87,0xfd,0x12,0xf5,0x2b,0xfd,0xfd, +0xf7,0x00,0xfd,0xfd,0xf9,0x00,0xfd,0xfd,0xf7,0x00,0xfd,0xfd,0x56,0x00,0x84,0xfd, +0x03,0xac,0xf6,0x00,0x84,0xfd,0x09,0xfc,0x00,0xf5,0xfd,0xf6,0xf5,0xfd,0xfd,0xfe, +0x83,0xfd,0x06,0x00,0xf9,0xfd,0xfd,0xf5,0x2b,0x85,0xfd,0x0b,0x81,0x00,0xac,0xfd, +0xfd,0xf9,0x00,0xfd,0xfe,0x00,0xf7,0xa2,0xfe,0xfd,0x03,0xfd,0xf9,0x00,0x83,0xfd, +0x0c,0x00,0xf9,0xac,0xfb,0xfb,0xfc,0xfb,0xf7,0x00,0xfa,0xfb,0xfb,0xa2,0xfc,0xfb, +0x34,0xf6,0xf5,0xfb,0xfb,0xfc,0xf5,0xf5,0xfc,0xf8,0x00,0xfb,0xfb,0xfc,0x00,0xf8, +0xfb,0xfc,0xf5,0xf6,0xfb,0xfb,0x00,0xf8,0xfc,0xfb,0x81,0x00,0xfb,0xfd,0xf5,0x2b, +0xfd,0xfe,0xf6,0xf5,0xfd,0xfd,0xf5,0x2b,0xfd,0xfd,0xf8,0x00,0xfe,0xfd,0xac,0xfc, +0xfc,0xfb,0xf5,0xf6,0xfc,0x84,0xfb,0x0e,0xfc,0xfb,0x56,0x00,0xfc,0xfb,0xfb,0x00, +0x56,0xfb,0xf6,0x00,0xfc,0xfc,0xa2,0xfb,0xfc,0x06,0x00,0x56,0xfb,0xfb,0xf5,0xf6, +0xe2,0xfb,0xfc,0xfb,0xfb,0x13,0x81,0x00,0xf5,0xfb,0xfc,0x00,0x56,0xfb,0xfc,0xf5, +0x00,0xfc,0xfc,0xf5,0xf6,0xfd,0xfd,0x00,0xfa,0x83,0xfd,0x09,0xf5,0x2b,0xfd,0xfd, +0xf8,0x00,0xfd,0xfd,0xfe,0x83,0xfd,0x07,0xf5,0x2b,0xfb,0xfb,0x2b,0x00,0xfc,0x85, +0xfb,0x01,0xfc,0x83,0xfb,0x1e,0x56,0x00,0xfb,0xfc,0xfb,0xfb,0x2b,0x00,0xfb,0xfa, +0x00,0xf9,0xfc,0x56,0x00,0xfb,0xfc,0x00,0x2b,0xfb,0xfc,0xf5,0xf6,0xfb,0xfd,0xf8, +0x00,0xfd,0xfd,0xfe,0x83,0xfd,0x06,0xf5,0x2b,0xfd,0xfd,0x2b,0x00,0x89,0xfd,0x0a, +0xfe,0xf5,0x2b,0xfd,0xf7,0x00,0xfd,0xfb,0x00,0x81,0x89,0xfd,0x02,0x00,0x56,0x83, +0xfd,0x08,0x00,0xf9,0xfe,0xf6,0xf5,0xfd,0xfd,0xfe,0x83,0xfd,0x06,0x00,0xf9,0xfd, +0xfd,0xf5,0x2b,0x99,0x07,0xfd,0x12,0xfa,0xf8,0xfb,0xfd,0xf7,0x00,0xac,0xfd,0x00, +0xf7,0xfd,0xac,0x00,0xf6,0xfc,0xfd,0xfa,0xfb,0x85,0xfd,0x2f,0x00,0xf7,0xfd,0xfd, +0x2b,0xf5,0xfd,0xfd,0xf9,0x00,0xfd,0xfd,0xf7,0x00,0xac,0xf9,0x00,0x00,0xfd,0xfd, +0xf9,0xfb,0xfd,0x2b,0x00,0xfd,0xfd,0x56,0xac,0xfc,0xf5,0xf6,0xfd,0x81,0x00,0xf7, +0xfe,0xfd,0x56,0xfd,0xfd,0x00,0xfa,0xfd,0xfd,0x00,0xf7,0x86,0xfd,0x11,0x00,0xf6, +0xfe,0xf9,0xf5,0x00,0xfd,0xfd,0xf7,0x00,0xf9,0xfd,0xfc,0xf9,0xfe,0xf9,0x00,0x83, +0xfd,0x04,0x00,0xf9,0xfd,0xfc,0x83,0xfb,0x3f,0xfc,0x00,0x00,0x56,0xfb,0xfb,0xf8, +0x2b,0xfb,0x56,0x00,0x56,0xfb,0xf8,0x00,0x56,0xfb,0x56,0x00,0xfc,0xfb,0xfb,0x00, +0x56,0xfb,0xfb,0x00,0x2b,0xfc,0xfb,0x00,0xf5,0xfa,0x81,0xf5,0x00,0xfe,0xfd,0xf5, +0xf5,0xfd,0xf7,0x00,0xf5,0xfd,0xfd,0xf5,0xf5,0xfe,0xfd,0xac,0x00,0xf5,0xac,0xfd, +0xf9,0xfa,0xfb,0x00,0x2b,0xfb,0xfb,0xfc,0x83,0xfb,0x17,0xfc,0xf8,0x00,0xfb,0xfc, +0xfb,0x00,0x56,0xfb,0xf9,0x00,0x2b,0xfb,0xfb,0xf8,0xfb,0xfb,0x00,0x2b,0x81,0xf5, +0x00,0x2b,0x83,0xfb,0x18,0xfc,0xfb,0xfb,0xf8,0x81,0x81,0xf5,0xf5,0xfc,0xfb,0x00, +0x2b,0xfb,0xfb,0xf5,0xf5,0xfb,0x56,0x00,0x2b,0xfd,0xfd,0x00,0xf9,0x83,0xfd,0x16, +0xf5,0xf5,0xfe,0xfd,0xac,0x00,0xf6,0xfc,0xfd,0x81,0xfb,0xac,0x00,0xf6,0xfc,0xfb, +0xf6,0xf5,0xfb,0xfb,0x56,0xf6,0xc2,0xfc,0xfb,0xfb,0x27,0x56,0x00,0xfb,0xfb,0xfc, +0xfb,0xf6,0xf5,0xfc,0x56,0x00,0xf9,0x81,0xf6,0x00,0x81,0xfb,0xf5,0xf6,0xfc,0xfb, +0xf5,0xf5,0xfc,0xfd,0xac,0x00,0xf6,0xfc,0xfd,0xfa,0xfb,0xfd,0xf5,0x2b,0xfd,0xfd, +0x2b,0xf5,0x86,0xfd,0x11,0x2b,0xfa,0xfd,0xf9,0x00,0xfa,0xfd,0x2b,0xf5,0xfe,0xfd, +0xf5,0x00,0xfb,0xfd,0xfb,0x81,0x85,0xfd,0x17,0xf5,0xf5,0xac,0xfb,0xf5,0x00,0xfa, +0xfd,0x81,0x00,0xf7,0xfd,0xfd,0x56,0xfd,0xfd,0x00,0xf9,0xfd,0xfd,0xf5,0x2b,0xfe, +0x98,0x07,0xfd,0x02,0xac,0xf6,0x83,0x00,0x09,0x81,0xfd,0xfd,0xf5,0x2b,0xfd,0xfd, +0xfb,0xf5,0x83,0x00,0x01,0xfb,0x85,0xfd,0x15,0xf5,0x2b,0xfd,0xfe,0x2b,0xf5,0xfd, +0xfd,0xfa,0x00,0xfd,0xfd,0xac,0xf5,0x00,0xf5,0x56,0x00,0xfd,0xfe,0x2b,0x83,0x00, +0x0b,0x81,0xfd,0xfd,0xf5,0x00,0x00,0xf5,0xac,0xfd,0xfd,0xfa,0x83,0x00,0x09,0xf5, +0xfd,0xfd,0x00,0xf9,0xfd,0xfd,0xf5,0x2b,0x86,0xfd,0x01,0x81,0x83,0x00,0x06,0xf7, +0x00,0xfd,0xfd,0xfe,0xf7,0x83,0x00,0x04,0x2b,0xfd,0xf9,0x00,0x83,0xfd,0x0a,0x00, +0xfa,0xfd,0xac,0xfb,0xfb,0xfc,0xfb,0x81,0x2b,0x84,0x00,0x04,0xf7,0xfc,0xfb,0xf7, +0x83,0x00,0x05,0xf7,0xfc,0xfb,0x56,0x00,0x83,0xfb,0x0a,0x00,0xf8,0xfc,0xfb,0xf5, +0x2b,0xfb,0xfb,0x00,0xf6,0x83,0x00,0x12,0xfb,0xfd,0xfd,0x81,0x00,0x00,0xf6,0xf7, +0x00,0xfd,0xfd,0x81,0x00,0x00,0xf9,0xfd,0xfb,0xf5,0x83,0x00,0x04,0x81,0xfc,0xf5, +0xf6,0xc2,0xfb,0xfc,0xfb,0x03,0xfb,0x56,0x00,0x83,0xfb,0x05,0x00,0xf8,0xfc,0xfb, +0x56,0x84,0x00,0x08,0xfc,0xfb,0xf7,0x00,0x00,0xf7,0xf5,0xf6,0xa2,0xfc,0xfb,0x17, +0xfb,0xfb,0xf5,0x00,0x00,0xf5,0x81,0xfb,0xfb,0xf7,0x00,0x00,0xfc,0xf9,0x00,0x00, +0xf5,0xf8,0x00,0xf7,0xfd,0x00,0xf9,0x83,0xfd,0x07,0x81,0x00,0x00,0xf9,0xfd,0xfb, +0xf5,0x83,0x00,0x0e,0xfb,0xac,0x00,0x2b,0xfb,0xfc,0x2b,0x00,0xfb,0xfc,0xf6,0x00, +0xfb,0xfc,0x84,0xfb,0x02,0x56,0x00,0x83,0xfb,0x22,0xfc,0x2b,0x00,0xfb,0xfb,0xf6, +0x00,0x00,0xf8,0x00,0x00,0xfc,0x00,0x2b,0xfb,0xfb,0xf9,0x00,0x00,0xf9,0xfd,0xfb, +0xf5,0x00,0x00,0xf5,0xfb,0xfd,0xf5,0x2b,0xfd,0xfe,0x2b,0xf5,0x86,0xfd,0x01,0x2b, +0x83,0x00,0x09,0x56,0xfd,0xfe,0x2b,0x00,0xfd,0xfd,0xac,0x2b,0x83,0x00,0x01,0x56, +0x85,0xfd,0x0a,0xac,0xf5,0x00,0x00,0x56,0x00,0xf9,0xfd,0xfd,0xfa,0x83,0x00,0x09, +0xf5,0xfd,0xfd,0x00,0xfa,0xfd,0xfd,0xf5,0x2b,0x9c,0x07,0xfd,0x01,0xfe,0x89,0xfd, +0xc2,0xfe,0xfd,0xfd,0x99,0x01,0xfd,0x01,0xfe,0x88,0xfd,0x01,0xfe,0x84,0xfd,0x01, +0xfe,0x94,0x01,0xfd,0x01,0xfe,0x88,0xfd,0x01,0xfe,0x8b,0xfd,0x01,0xfc,0x83,0xfb, +0x09,0xfc,0xfb,0xfb,0xfc,0xfb,0xfc,0xfc,0xfb,0xfb,0xa2,0xfc,0xfb,0xc2,0xfb,0xfb, +0xfc,0xa2,0xfb,0xfc,0xc2,0xfb,0xfb,0xfc,0x07,0x00,0x56,0xfb,0xfc,0xfc,0xfb,0xac, +0x84,0xfd,0xa2,0xfe,0xfd,0x03,0xfd,0xfd,0xfe,0x84,0xfd,0x09,0xfe,0xfd,0xfd,0xfe, +0xfd,0xac,0xac,0xfb,0xfc,0x84,0xfb,0xc2,0xfc,0xfb,0xfb,0xa2,0xfc,0xfb,0xa3,0xfb, +0xfc,0x02,0xfb,0xfb,0xa2,0xfc,0xfb,0x02,0xfb,0xfc,0x84,0xfb,0x03,0xfc,0xfb,0xfb, +0xa2,0xfc,0xfb,0xc2,0xfb,0xfc,0xfb,0x02,0xfb,0xac,0x87,0xfd,0x01,0xfe,0x84,0xfd, +0x01,0xfe,0x87,0xfd,0x01,0xfe,0xa2,0xfc,0xfb,0x02,0xfb,0xfb,0xa3,0xfc,0xfb,0x02, +0xfb,0xfb,0xa2,0xfc,0xfb,0x02,0xfb,0xfc,0x84,0xfb,0x02,0xfc,0xfc,0xc2,0xfb,0xfc, +0xfb,0xc2,0xfc,0xfb,0xfb,0x03,0xfb,0xfc,0xfc,0x8b,0xfd,0x01,0xfe,0x8a,0xfd,0x01, +0xfe,0x87,0xfd,0x01,0xfe,0x86,0xfd,0x01,0xfe,0x8a,0xfd,0x01,0xfe,0x88,0xfd,0x01, +0xfe,0x95,0x0e,0xfd,0xc2,0xfc,0xfb,0xfb,0xe2,0xfb,0xfb,0xfc,0xfb,0xe3,0xfb,0xfb, +0xfb,0xfc,0x85,0xfb,0x04,0xfc,0xfb,0x00,0x56,0x85,0xfb,0x01,0xac,0x95,0x01,0xfd, +0x01,0xac,0x83,0xfb,0xc2,0xfc,0xfb,0xfb,0x02,0xfb,0xfc,0x86,0xfb,0x01,0xfc,0x84, +0xfb,0x01,0xfc,0x84,0xfb,0x01,0xfc,0x84,0xfb,0xc2,0xfc,0xfb,0xfb,0xe2,0xfb,0xfc, +0xfb,0xfb,0x01,0xac,0x95,0x01,0xfd,0x01,0xac,0xe2,0xfb,0xfb,0xfb,0xfc,0x84,0xfb, +0x01,0xfc,0x85,0xfb,0xe2,0xfc,0xfb,0xfb,0xfb,0x83,0xfb,0x01,0xfc,0x87,0xfb,0x04, +0xfc,0xfb,0xfb,0xfc,0x9f,0x11,0xfd,0x01,0xac,0xe2,0xfc,0xfb,0xfb,0xfb,0x84,0xfb, +0xc2,0xfc,0xfb,0xfb,0x84,0xfb,0xe2,0xfc,0xfb,0xfb,0xfb,0x04,0xfb,0xfb,0xf6,0xfa, +0xc2,0xfc,0xfb,0xfb,0x97,0x01,0xfd,0x01,0xac,0x85,0xfb,0x01,0xfc,0x84,0xfb,0xc2, +0xfc,0xfb,0xfb,0x02,0xfb,0xfc,0x84,0xfb,0x01,0xfc,0x84,0xfb,0x01,0xfc,0x87,0xfb, +0x01,0xfc,0x83,0xfb,0x04,0xfc,0xfb,0xfb,0xac,0x97,0x01,0xfd,0xe5,0xfb,0xfb,0xfc, +0xfb,0xe3,0xfb,0xfc,0xfb,0xfb,0xa2,0xfc,0xfb,0x02,0xfb,0xfc,0x83,0xfb,0x91,0x12, +0xfd,0x02,0xfc,0xfc,0x83,0xfb,0x03,0xfc,0xfb,0xfb,0xa2,0xfc,0xfb,0x02,0xfb,0xfc, +0x83,0xfb,0xa3,0xfc,0xfb,0x85,0xfb,0xa3,0xfc,0xfb,0x03,0xfb,0xfb,0xfc,0x84,0xfb, +0x01,0xac,0x97,0x01,0xfd,0x01,0xac,0xc4,0xfb,0xfc,0xfb,0xc2,0xfb,0xfb,0xfc,0x83, +0xfb,0xc3,0xfc,0xfb,0xfb,0xa2,0xfc,0xfb,0x84,0xfb,0x01,0xfc,0x98,0x01,0xfd,0xe2, +0xfb,0xfb,0xfc,0xfb,0x85,0xfb,0x01,0xfc,0x84,0xfb,0x01,0xfc,0x86,0xfb,0xe2,0xfc, +0xfb,0xfb,0xfb,0xe2,0xfb,0xfb,0xfb,0xfc,0x01,0xfc,0x91,0x12,0xfd,0x01,0xac,0xe2, +0xfb,0xfb,0xfc,0xfb,0x85,0xfb,0x01,0xfc,0x88,0xfb,0xa2,0xfc,0xfb,0x86,0xfb,0x01, +0xfc,0x84,0xfb,0x01,0xfc,0x83,0xfb,0x98,0x01,0xfd,0x02,0xac,0xac,0x84,0xfb,0x01, +0xfc,0x85,0xfb,0x01,0xfc,0x85,0xfb,0xe2,0xfc,0xfb,0xfb,0xfb,0xc2,0xfc,0xfb,0xfb, +0x05,0xfb,0xfb,0xfc,0xfb,0xac,0x98,0x01,0xfd,0x01,0xac,0x85,0xfb,0x01,0xfc,0x83, +0xfb,0xa2,0xfc,0xfb,0x02,0xfb,0xfb,0xa2,0xfc,0xfb,0xc2,0xfb,0xfb,0xfc,0xe2,0xfb, +0xfb,0xfb,0xfc,0x03,0xfb,0xfb,0xfc,0x86,0xfb,0x01,0xac,0x93,0x10,0xfd,0x02,0xfb, +0xfa,0x8d,0xfd,0x02,0xac,0xf9,0x8e,0xfd,0x02,0xfc,0xfc,0x86,0xfb,0xc2,0xfc,0xfb, +0xfb,0xc3,0xfb,0xfc,0xfb,0xc2,0xfb,0xfb,0xfc,0x03,0x81,0xf8,0xfc,0x84,0xfb,0xc2, +0xfc,0xfb,0xfb,0x01,0xac,0x8a,0xfd,0x02,0xfb,0xfa,0x8d,0xfd,0x06,0xac,0xfb,0x56, +0x81,0xfb,0xfb,0xa2,0xfc,0xfb,0x83,0xfb,0xc2,0xfc,0xfb,0xfb,0x02,0xfb,0xfc,0x87, +0xfb,0x04,0xfc,0x56,0xfa,0xac,0x99,0x01,0xfd,0x04,0xac,0xfc,0xfb,0xfb,0xa2,0xfc, +0xfb,0x03,0xfb,0xfb,0xfc,0x84,0xfb,0x01,0xfc,0x84,0xfb,0x01,0xfc,0x84,0xfb,0x01, +0xfc,0x85,0xfb,0x01,0xfc,0x84,0xfb,0x01,0xfc,0x83,0xfb,0x04,0xfc,0xfb,0xfc,0xfc, +0x96,0x02,0xfd,0x02,0xfb,0xfa,0x8d,0xfd,0x02,0xac,0xf9,0x97,0x08,0xfd,0x01,0xf6, +0xa2,0x00,0xf5,0x04,0xf8,0xfd,0xf6,0xf8,0x94,0x01,0xfd,0x03,0xfc,0x00,0xfe,0x84, +0xfd,0x03,0xf5,0x56,0xfe,0x9f,0x01,0xfd,0x02,0x2b,0xf5,0x8d,0xfd,0x02,0xf9,0x00, +0x8e,0xfd,0x03,0xac,0xfb,0xfb,0xa2,0xfc,0xfb,0x03,0xfb,0xfb,0xfc,0x86,0xfb,0xc2, +0xfc,0xfb,0xfb,0x02,0xfb,0xfc,0x83,0xfb,0x07,0xfc,0xfb,0x56,0x00,0xfb,0xfb,0xfc, +0x84,0xfb,0x06,0xfc,0x81,0xf9,0xfb,0xf9,0xfc,0x89,0xfd,0x02,0x2b,0xf5,0x8f,0xfd, +0x02,0x00,0xf9,0x86,0xfb,0x02,0xf6,0xf7,0x85,0xfb,0x01,0xfc,0x84,0xfb,0xc2,0xfc, +0xfb,0xfb,0x03,0xac,0xf5,0x2b,0x95,0x01,0xfd,0x06,0xfb,0xf6,0x00,0xf5,0xf6,0xf9, +0x87,0xfb,0xe2,0xfc,0xfb,0xfb,0xfb,0x06,0xf9,0x81,0x81,0x56,0xfb,0xfc,0x84,0xfb, +0xa2,0xfc,0xfb,0x03,0xfb,0xfb,0xfc,0x85,0xfb,0x01,0xfc,0x84,0xfb,0x01,0xac,0x96, +0x02,0xfd,0x02,0x2b,0xf5,0x8d,0xfd,0x02,0xf9,0x00,0x97,0x01,0xfd,0x02,0xf8,0xf6, +0x9e,0x06,0xfd,0x09,0xf5,0xf6,0xf9,0xfa,0xf9,0xfb,0xfe,0xf8,0xf9,0x94,0x01,0xfd, +0x02,0x56,0xf5,0x84,0xfd,0x03,0x81,0x00,0x00,0x90,0x02,0xfd,0x02,0xf7,0x00,0x8d, +0xfd,0x06,0xf9,0x00,0xfd,0xfd,0xfb,0xfa,0x8b,0xfd,0x02,0xfc,0xfc,0x84,0xfb,0x01, +0xfc,0x84,0xfb,0xa2,0xfc,0xfb,0x86,0xfb,0x01,0xfc,0x85,0xfb,0x06,0xfc,0xf8,0x00, +0xfc,0xfb,0xfb,0xa2,0xfc,0xfb,0x07,0xfb,0xf9,0xf5,0xfb,0xf6,0x56,0xac,0x87,0xfd, +0x03,0xfe,0x2b,0x00,0x86,0xfd,0x02,0xfa,0xfb,0x87,0xfd,0x0a,0x00,0xf9,0xfd,0xfd, +0xac,0xfc,0xfc,0xfb,0xf7,0x56,0xa2,0xfb,0xfc,0x85,0xfb,0x05,0xfc,0xfb,0xfb,0xfc, +0xac,0x83,0xfd,0x02,0xf5,0x2b,0x94,0x01,0xfd,0x09,0x56,0x00,0xf5,0xfa,0xf9,0x2b, +0x00,0xfb,0xfb,0xa3,0xfc,0xfb,0x85,0xfb,0x0b,0xfc,0xfb,0xf6,0x56,0xf9,0xf6,0xfc, +0xfb,0xfb,0x56,0x81,0x85,0xfb,0x01,0xfc,0x83,0xfb,0xa2,0xfc,0xfb,0x05,0xfb,0xfb, +0xfc,0xfb,0xfc,0x96,0x02,0xfd,0x03,0xfe,0x2b,0x00,0x8d,0xfd,0x06,0xf9,0x00,0xfd, +0xfd,0xfb,0xfa,0x8a,0xfd,0x02,0xfb,0xfa,0x87,0xfd,0x02,0xf9,0xf8,0x89,0xfd,0x02, +0xf9,0xfc,0x93,0x06,0xfd,0x03,0xf5,0x2b,0xfe,0x85,0xfd,0x01,0xfe,0x94,0x01,0xfd, +0x02,0xf6,0xf7,0x84,0xfd,0x04,0xf5,0x2b,0x00,0xfb,0x9f,0x01,0xfd,0x02,0x2b,0xf5, +0x8d,0xfd,0x06,0xf9,0x00,0xfd,0xfd,0x2b,0xf5,0x8b,0xfd,0x01,0xac,0xe2,0xfb,0xfb, +0xfc,0xfb,0x83,0xfb,0x03,0xfc,0xfb,0xfb,0xa2,0xfc,0xfb,0x84,0xfb,0xa2,0xfc,0xfb, +0x03,0xfb,0x56,0x00,0x86,0xfb,0xc2,0xfc,0xfb,0xfb,0x03,0xfc,0xfb,0xac,0x87,0xfd, +0x02,0x2b,0xf5,0x86,0xfd,0x02,0x00,0x2b,0x87,0xfd,0x02,0x00,0xf9,0x86,0xfd,0x03, +0xac,0xac,0xfd,0x87,0xac,0x02,0xfd,0xac,0x87,0xfd,0x03,0x00,0x2b,0xfe,0x93,0x01, +0xfd,0x06,0x00,0xf7,0xfe,0xac,0xfb,0xfb,0xa2,0xfc,0xfb,0x85,0xfb,0xa2,0xfc,0xfb, +0x02,0xfb,0xfb,0xa2,0xfc,0xfb,0x83,0xfb,0x05,0xfc,0x00,0x56,0xfb,0xfc,0x84,0xfb, +0x01,0xfc,0x85,0xfb,0x01,0xfc,0x83,0xfb,0x02,0xfc,0xac,0x97,0x02,0xfd,0x02,0x2b, +0xf5,0x8d,0xfd,0x06,0xf9,0x00,0xfd,0xfd,0x2b,0xf5,0x8a,0xfd,0x02,0x2b,0xf5,0x87, +0xfd,0x01,0xfe,0x8a,0xfd,0x02,0x00,0xf9,0x93,0x06,0xfd,0x02,0xf5,0x2b,0x85,0xfd, +0x06,0xf5,0xf6,0xfd,0xfd,0x00,0x56,0x83,0x00,0x01,0xfc,0x8b,0xfd,0x03,0xfe,0x00, +0xfb,0x83,0xfd,0xa2,0xac,0x00,0x0d,0xf6,0xfd,0xfd,0xfe,0x2b,0x00,0xfd,0xfd,0xf9, +0x00,0xfd,0xfd,0x81,0x83,0x00,0x04,0xf8,0xfd,0xfd,0x56,0x83,0x00,0x04,0x81,0xfd, +0xfd,0x56,0x83,0x00,0x0b,0xf7,0xfe,0x2b,0xf5,0xf8,0x00,0x00,0xf8,0xfd,0xfd,0x56, +0x83,0x00,0x07,0xf6,0xfd,0xfd,0xf9,0x00,0xfe,0xf9,0x83,0x00,0x10,0x2b,0xfd,0xac, +0xf5,0x00,0x00,0xf8,0xfd,0xfd,0x2b,0x00,0xf6,0x00,0xf8,0xfb,0xfc,0x83,0xfb,0x09, +0xf9,0x00,0x00,0xf5,0x00,0xf6,0xfb,0xfb,0xf9,0x83,0x00,0x04,0x56,0xfc,0xfb,0x81, +0x83,0x00,0x13,0xf5,0x00,0xfb,0xfc,0x2b,0x00,0xf6,0x00,0xf8,0x56,0x00,0xfb,0xfb, +0xfc,0x00,0x56,0xfb,0xac,0x56,0x83,0x00,0x0a,0xf8,0xfd,0x2b,0xf5,0xfd,0xfd,0xf6, +0xf5,0xfd,0x2b,0x83,0x00,0x01,0xf9,0x85,0xfd,0x02,0x00,0xf7,0x83,0x00,0x08,0xfa, +0xfd,0xfe,0x00,0x2b,0xfd,0xfd,0x2b,0x83,0x00,0x87,0xfd,0x09,0xf8,0x00,0x00,0xf5, +0xf5,0x2b,0xfd,0xfd,0xf5,0x83,0x00,0x04,0xfa,0xfd,0xfd,0x56,0x83,0x00,0x01,0xfa, +0x85,0xfd,0x07,0x56,0x00,0xfd,0xac,0xfb,0xfb,0xfc,0x83,0xfb,0x0d,0x81,0xf5,0x00, +0x00,0xf7,0xfb,0xfb,0x2b,0x00,0xf6,0x00,0xf8,0xfa,0x84,0x00,0x02,0x81,0xfc,0x84, +0x00,0x83,0xfb,0x04,0xfc,0xfb,0xfb,0xf5,0x83,0x00,0x0c,0xf9,0xfb,0xfc,0x00,0xf6, +0xfc,0xfe,0x2b,0x00,0xfd,0xfd,0x56,0x83,0x00,0x0c,0x81,0xfd,0xfd,0xf8,0x00,0x00, +0x2b,0x00,0xf9,0xfd,0xfd,0x81,0x83,0x00,0x04,0xfa,0xfd,0xfd,0x56,0x83,0x00,0x04, +0xfa,0xfd,0xfd,0x56,0x83,0x00,0x0b,0xf8,0xfd,0x2b,0xf5,0xf8,0x00,0x00,0xf8,0xfd, +0xfd,0x56,0x83,0x00,0x07,0xf6,0xfd,0xfd,0xf9,0x00,0xfe,0xf9,0x83,0x00,0x09,0x2b, +0xfd,0xac,0xf5,0x00,0x00,0xf8,0xfd,0xf9,0x83,0x00,0x01,0x2b,0x85,0xfd,0x05,0x2b, +0x00,0xfd,0xfd,0x56,0x83,0x00,0x02,0xfa,0xfe,0x84,0x00,0x92,0x06,0xfd,0x11,0x00, +0xf5,0x2b,0x2b,0xf8,0xfd,0xfd,0x00,0xf7,0xfd,0xfd,0x00,0x00,0xfa,0xac,0x00,0xf7, +0x8b,0xfd,0x03,0xfa,0x00,0xfe,0x83,0xfd,0x05,0x2b,0xf6,0xfd,0xf8,0x00,0x83,0xfd, +0x31,0x2b,0xf5,0xfd,0xfd,0xf9,0x00,0xfd,0xfd,0xf5,0xf6,0xfd,0xfd,0xfb,0xfd,0xfd, +0x00,0xf8,0xfd,0xac,0xfc,0xfd,0xf9,0x00,0xf7,0xfe,0xfd,0xfb,0xfd,0x2b,0x00,0xf6, +0xac,0xf7,0x00,0xfd,0xfd,0x81,0x81,0xfd,0xfa,0x00,0x81,0xfd,0xf9,0x00,0xfd,0xfd, +0x2b,0xf5,0x83,0xfd,0x0b,0x00,0xf6,0xfd,0x81,0x00,0xfb,0xfd,0x2b,0x00,0xf7,0xfc, +0x83,0xfb,0x39,0xfc,0xfb,0x81,0x00,0x2b,0xfc,0x56,0x00,0x2b,0xfb,0x81,0x00,0xf7, +0xfc,0x2b,0x00,0xfb,0xfc,0x00,0xf6,0x81,0xfa,0xf5,0x00,0xfb,0xfb,0xf6,0x00,0x2b, +0xfc,0xfb,0x56,0x00,0xfb,0xfc,0xfb,0x00,0xf8,0xfc,0xf8,0x00,0xf7,0xfe,0xfd,0xfb, +0xfd,0x2b,0xf5,0xfd,0xf6,0xf6,0xfe,0xfd,0xfd,0xf5,0x2b,0xfe,0x86,0xfd,0x0d,0x00, +0xf5,0xfa,0xfd,0xf6,0x00,0xfd,0xfd,0xf5,0x2b,0xfd,0xfa,0x00,0xa2,0xfb,0xfd,0x85, +0xfd,0x15,0xf9,0x00,0xf9,0xfe,0xf8,0x00,0x2b,0xfd,0xfd,0x56,0xac,0xfd,0xf5,0xf5, +0xfe,0xfd,0x00,0xf8,0xfd,0xac,0xac,0x85,0xfd,0x05,0x2b,0xf5,0xac,0xfb,0xfc,0x83, +0xfb,0x18,0xfc,0xfb,0x00,0xf5,0xfc,0xf9,0x00,0xfa,0xfb,0xf6,0x00,0x2b,0xfc,0xfc, +0xf9,0x56,0xfc,0x81,0x00,0x2b,0xfb,0xfc,0x00,0x56,0xa2,0xfb,0xfc,0x83,0xfb,0x47, +0xf8,0x81,0xfb,0xf5,0x00,0xfc,0xfb,0xf5,0xf6,0xfd,0xfd,0x2b,0xf5,0xfd,0xfd,0x00, +0xf8,0xfd,0xac,0xfc,0xfd,0xf9,0x00,0xf9,0xfe,0xf7,0x00,0xf9,0xfd,0xac,0x00,0xf8, +0xfd,0xf7,0x00,0xfe,0xfd,0x00,0xf8,0xfd,0xac,0xac,0xfd,0xf9,0x00,0xf7,0xfd,0xfd, +0xfb,0xfd,0x2b,0x00,0xf6,0xac,0xf7,0x00,0xfd,0xfd,0x81,0x81,0xfd,0xfa,0x00,0x81, +0xfd,0xf9,0x00,0xfd,0xfd,0x2b,0xf5,0x83,0xfd,0x0a,0x00,0xf6,0xfd,0x81,0x00,0xfb, +0xfe,0x2b,0x00,0xfe,0x86,0xfd,0x0e,0x2b,0xf5,0xfd,0xfd,0x00,0xf8,0xfd,0xac,0xfc, +0xfd,0xfe,0x00,0xf9,0xfe,0x91,0x06,0xfd,0x14,0xfe,0xf5,0xf5,0x2b,0x2b,0xf8,0xfd, +0xfe,0xf5,0x2b,0xfd,0xfd,0x00,0x56,0xfd,0xfd,0xf5,0x2b,0xfd,0xac,0x85,0xf9,0x01, +0xfc,0x83,0xfd,0x02,0xf7,0xf6,0x84,0xfd,0x13,0x00,0xfb,0xfd,0xac,0x00,0x56,0xfd, +0xfd,0x2b,0xf5,0xfd,0xfd,0xfa,0x00,0xfd,0xfd,0xf6,0x00,0x56,0x83,0xfd,0x04,0xfe, +0x00,0xf5,0xfa,0x83,0xfd,0x02,0xf5,0xf6,0x84,0xfd,0x07,0xfe,0x2b,0x00,0xfd,0xfd, +0xf9,0x00,0x85,0xfd,0x3c,0xfe,0x00,0xf9,0xfd,0xfa,0x00,0xfd,0xfe,0x2b,0xf5,0xfd, +0xfd,0xfb,0x00,0x56,0xfa,0xf9,0x00,0xf7,0xfd,0xf7,0x00,0xfd,0xac,0xfb,0xfc,0xfb, +0xfb,0xfc,0x2b,0x00,0xfc,0xfb,0xfc,0x00,0xf6,0xfc,0xf7,0x00,0x56,0xf8,0x56,0x00, +0xf9,0xfa,0x00,0xf9,0xfc,0xfb,0x56,0x00,0xfb,0xfc,0x2b,0x00,0xfc,0xfb,0xfb,0x56, +0x00,0x83,0xfb,0x07,0x00,0x56,0xfc,0xf5,0xf5,0xfc,0xfc,0x83,0xfd,0x05,0x2b,0xf5, +0xf8,0x00,0xac,0x83,0xfd,0x02,0xf5,0x2b,0x87,0xfd,0x0f,0x00,0xf9,0xfd,0xfd,0xac, +0x00,0xfa,0xfd,0xf5,0x2b,0xfd,0xfb,0x00,0x2b,0xfb,0x87,0xfd,0x02,0xf6,0xf5,0x83, +0xfd,0x02,0xf5,0x2b,0x85,0xfd,0x07,0xf7,0x00,0xfd,0xfd,0x00,0xf5,0xfa,0x87,0xfd, +0x03,0x2b,0x00,0xfc,0x83,0xfb,0x0d,0x81,0x56,0xfb,0xfa,0x00,0xf8,0xf8,0x56,0x00, +0x2b,0xfc,0x2b,0x00,0xc2,0xfc,0xfb,0xfb,0x08,0xfc,0x00,0x2b,0xfb,0xfb,0x00,0xf8, +0xfc,0x85,0xfb,0x13,0xfc,0xfb,0xfb,0xfc,0xf6,0xf5,0xfb,0xfc,0x00,0x2b,0xfd,0xfd, +0xf7,0x00,0xfd,0xfe,0x00,0xf5,0x81,0x83,0xfd,0x02,0xf5,0xf6,0x83,0xfd,0x0e,0x00, +0xf9,0xfd,0xf8,0x00,0xfa,0xf9,0xfa,0x00,0xfa,0xfd,0x00,0xf5,0xfa,0x83,0xfd,0x02, +0xf5,0xf6,0x85,0xfd,0x06,0xf7,0x00,0xfd,0xfd,0xf9,0x00,0x85,0xfd,0x16,0xfe,0x00, +0xf9,0xfd,0xfa,0x00,0xfd,0xfd,0xf7,0x00,0xfd,0xfd,0xfb,0x00,0x56,0xfa,0xf9,0x00, +0xf7,0xfd,0x2b,0xf5,0x87,0xfd,0x07,0xf7,0x00,0xfd,0xfe,0x00,0xf5,0x81,0x84,0xfd, +0x02,0x00,0xf9,0x93,0x06,0xfd,0x03,0x00,0xf7,0xfe,0x84,0xfd,0x0c,0x00,0xf7,0xfd, +0xfd,0x00,0xf9,0xfe,0xfd,0x00,0xf7,0xfd,0xfa,0x85,0xf5,0x01,0x81,0x83,0xfd,0x02, +0xf5,0x56,0x83,0xfd,0x01,0x56,0xa3,0x00,0xf5,0x19,0xfe,0xfd,0x2b,0x00,0xfe,0xfd, +0xf9,0x00,0xfd,0xfd,0xac,0x2b,0x00,0x00,0x81,0xfd,0xfd,0xfb,0xf6,0x00,0x00,0xac, +0xfd,0xf5,0x2b,0x85,0xfd,0x09,0x2b,0xf5,0xfd,0xfd,0xf9,0x00,0xfd,0xfd,0xfb,0x84, +0x00,0x0b,0xfa,0xfd,0xf9,0x00,0xfd,0xfd,0x2b,0xf5,0xfd,0xfd,0xf9,0xa2,0x00,0xf5, +0x08,0xf5,0xf8,0xfd,0x2b,0xf5,0xfd,0xac,0xfc,0x84,0xfb,0x02,0xf6,0xf5,0x83,0xfb, +0x2b,0xf5,0xf6,0xfc,0xf6,0x00,0x00,0xf5,0x00,0xf5,0x56,0x56,0x00,0xfc,0xfb,0xfb, +0x56,0x00,0xfb,0xfb,0xf6,0xf5,0xfb,0xfc,0xfb,0xf8,0x00,0xfc,0xfb,0xfb,0x00,0xf8, +0xfb,0xf5,0x2b,0xfc,0xfb,0xfc,0xfd,0xfd,0x2b,0x00,0x00,0xf7,0x84,0xfd,0x02,0x00, +0xf7,0x87,0xfd,0x02,0x00,0xf9,0x83,0xfd,0x0c,0x00,0xfa,0xfd,0x00,0xf7,0xfd,0xfd, +0xf9,0xf5,0x00,0xf6,0xfe,0x85,0xfd,0x02,0x00,0xf7,0x83,0xfd,0x05,0x00,0xf7,0xfd, +0xfd,0xf7,0x83,0x00,0x08,0xf5,0xfd,0xfd,0xfb,0xf6,0x00,0x00,0xac,0x84,0xfd,0x14, +0xfc,0xf7,0x00,0xfb,0xfc,0xfb,0xfb,0x56,0x00,0xfb,0x56,0x00,0x00,0xf5,0x00,0xf5, +0xf7,0xfb,0xf6,0xf5,0x83,0xfb,0x02,0x81,0xf6,0x83,0x00,0x05,0x2b,0xfb,0xfc,0x00, +0x56,0xc2,0xfb,0xfb,0xfc,0x18,0xfb,0x2b,0x00,0x00,0xf5,0x00,0xfc,0xfb,0x00,0x2b, +0xfd,0xfd,0x2b,0xf5,0xfd,0xfd,0xfb,0xf5,0x00,0x00,0xac,0xfd,0xf5,0x2b,0x83,0xfd, +0x14,0x00,0xfa,0xfd,0x2b,0x00,0x00,0xf5,0x00,0xf5,0x81,0xfd,0xfb,0xf6,0x00,0x00, +0xac,0xfd,0x00,0x2b,0xfe,0x84,0xfd,0x09,0x2b,0xf5,0xfd,0xfd,0xf9,0x00,0xfd,0xfd, +0xfb,0x84,0x00,0x0d,0xfa,0xfd,0xf9,0x00,0xfd,0xfd,0x2b,0xf5,0xfe,0xfd,0xf9,0x00, +0x00,0x83,0xf5,0x04,0xf8,0xfd,0x2b,0xf5,0x87,0xfd,0x0d,0x2b,0xf5,0xfd,0xfd,0xfb, +0xf6,0x00,0x00,0xac,0xfd,0xfd,0x00,0xf9,0x93,0x06,0xfd,0x02,0xf5,0x2b,0x85,0xfd, +0x0c,0xf5,0x2b,0xfd,0xfd,0x00,0xf9,0xfd,0xfd,0xf5,0x2b,0xfd,0xfe,0x88,0xfd,0x03, +0xac,0x00,0xfe,0x83,0xfd,0x0f,0xf5,0x2b,0xfa,0xf9,0xfa,0xf7,0x00,0xfc,0xfd,0x2b, +0xf5,0xfd,0xfd,0x56,0x00,0x84,0xfd,0x03,0xac,0xf6,0x00,0x84,0xfd,0x06,0xfb,0xf5, +0x00,0xfe,0xf5,0xf6,0x85,0xfd,0x1b,0x2b,0xf5,0xfd,0xfd,0xfa,0x00,0xfd,0xfd,0x00, +0xf7,0xfd,0xfe,0x00,0xf9,0xfd,0xf9,0x00,0xfd,0xfd,0xf7,0x00,0xfd,0xfd,0xfb,0x00, +0xfa,0xfe,0x83,0xfd,0x31,0xfe,0x2b,0xf5,0xfd,0xfd,0xac,0xfb,0xfb,0xfc,0xfb,0xf7, +0x00,0xfb,0xfb,0xfc,0x00,0x2b,0xfb,0xf7,0x00,0xfc,0xfc,0xfb,0xfc,0xfb,0xf9,0x00, +0x81,0xfb,0xfc,0xf8,0x00,0xfc,0xfb,0x2b,0x00,0xfc,0xfb,0xfb,0x56,0x00,0xfb,0xfb, +0x81,0x00,0x56,0xfc,0xf5,0xf5,0xa2,0xfb,0xfc,0x0b,0xfd,0xf7,0xf5,0x56,0x00,0xf8, +0xfd,0xfd,0xfe,0xf5,0x2b,0x87,0xfd,0x11,0x00,0xfa,0xfd,0xfd,0xfb,0x00,0xfb,0xfd, +0xf5,0x2b,0xfd,0xfd,0xfe,0xfd,0xfa,0x00,0x2b,0x85,0xfd,0x02,0xf6,0xf6,0x83,0xfd, +0x09,0xf5,0x2b,0xfd,0xf8,0x00,0xac,0xfe,0x2b,0xf5,0x83,0xfd,0x04,0xfe,0xfb,0xf5, +0x00,0x83,0xfd,0x05,0xfc,0xfb,0xfa,0x00,0xf8,0x83,0xfb,0x1a,0x56,0x00,0xfb,0xfa, +0x00,0xf9,0xfb,0xfc,0xfb,0xfb,0xfc,0x2b,0x00,0xfb,0xfb,0xfc,0xf5,0xf5,0xfb,0xfc, +0xf5,0xf6,0xfc,0xfb,0x00,0x56,0x84,0xfb,0x10,0xfc,0xfb,0xf7,0x00,0x81,0xfc,0xf6, +0xf5,0xfb,0xfb,0xf5,0x2b,0xfd,0xfd,0x2b,0x00,0x83,0xfd,0x0f,0xfe,0xfb,0xf5,0xf5, +0xfd,0xf5,0xf6,0xfd,0xfd,0xac,0x00,0xf9,0xfd,0xf8,0x00,0xa2,0xfe,0xfd,0x84,0xfd, +0x06,0xfc,0x00,0xf5,0xfd,0xf6,0xf6,0x85,0xfd,0x1a,0xf7,0x00,0xfd,0xfd,0xfa,0x00, +0xfd,0xfd,0x00,0xf7,0xfe,0xfe,0x00,0xf9,0xfd,0xf9,0x00,0xfd,0xfd,0xf7,0x00,0xfd, +0xfd,0xfb,0x00,0x81,0x85,0xfd,0x02,0xf7,0x00,0x87,0xfd,0x02,0x2b,0xf5,0x84,0xfd, +0x07,0xfc,0x00,0xf5,0xfd,0xfd,0x00,0xf9,0x93,0x06,0xfd,0x02,0x00,0x2b,0x85,0xfd, +0x0a,0x00,0xf7,0xfd,0xfd,0x00,0xf9,0xfd,0xfd,0x00,0xf7,0x8a,0xfd,0x02,0xf8,0xf5, +0x83,0xfd,0x03,0xfb,0x00,0xfe,0x84,0xfd,0x40,0x00,0xf7,0xfe,0xf7,0x00,0xac,0xf9, +0x00,0x00,0xfd,0xfd,0xf9,0xfb,0xfd,0x2b,0x00,0xfd,0xfd,0x56,0xac,0xac,0xf5,0xf6, +0xfd,0xfa,0x00,0xf7,0xfd,0xfb,0xfa,0xfd,0x2b,0xf5,0xfd,0xfd,0xf9,0x00,0xfd,0xfd, +0x00,0xf7,0xfe,0xf7,0x00,0x56,0xfd,0xf9,0x00,0xfd,0xfd,0xf7,0x00,0xac,0xfd,0xfd, +0xf5,0x00,0xfb,0xfd,0xfb,0xfa,0xfd,0x2b,0xf5,0x83,0xfd,0x01,0xfc,0x83,0xfb,0x36, +0x81,0x00,0x2b,0x56,0x00,0x00,0x2b,0xfb,0x81,0x00,0xf5,0x81,0xfc,0x56,0xfa,0xfb, +0x00,0xf6,0xfb,0xf8,0xf5,0x00,0xfb,0xfc,0xf6,0xf5,0xfb,0xfb,0xfc,0x56,0x00,0xf9, +0xfa,0x00,0x00,0x56,0xfb,0x56,0x00,0x2b,0xfb,0xfa,0xf9,0xfb,0xf6,0xf5,0xfe,0xf8, +0x00,0xfa,0xfd,0xfd,0xf5,0xf5,0x87,0xfd,0x11,0x00,0xf5,0xfb,0xac,0xf5,0xf5,0xfd, +0xfd,0xf5,0x2b,0xfd,0xfb,0xf9,0xfd,0xfb,0x00,0xf8,0x85,0xfd,0x3b,0xf8,0x00,0xfb, +0xac,0x2b,0x00,0x2b,0xfe,0xf7,0x00,0xfc,0xfb,0xf5,0x00,0xfe,0xfd,0x56,0xfc,0xac, +0xf5,0xf6,0xfd,0xac,0xfb,0xfb,0xfc,0xfb,0xf6,0x00,0xf7,0x81,0xfc,0x2b,0x00,0xfc, +0xfb,0xf5,0x00,0xfa,0xfb,0xfa,0xf9,0xfb,0xf6,0xf5,0xfc,0xfb,0xfb,0xf5,0xf5,0xfb, +0x56,0x00,0xf6,0xfb,0xfb,0x00,0x2b,0xfc,0x85,0xfb,0x55,0x2b,0x00,0x81,0xfa,0x00, +0x00,0xfc,0xfc,0xf5,0xf5,0xfe,0x2b,0x00,0xf5,0xfd,0xfd,0x56,0xfc,0xac,0xf5,0xf6, +0xfd,0xf9,0x00,0xf8,0x56,0xf5,0x00,0xf9,0xfd,0xac,0x00,0xf5,0xac,0xfd,0xfa,0xfb, +0xfd,0x56,0xac,0xfc,0xf5,0xf6,0xfd,0xf9,0x00,0xf7,0xfd,0xfb,0xfa,0xfd,0x2b,0xf5, +0xfd,0xfd,0xf9,0x00,0xfd,0xfd,0x00,0xf7,0xfd,0xf8,0x00,0x56,0xfd,0xf9,0x00,0xfd, +0xfd,0xf7,0x00,0xac,0xfd,0xfd,0xf5,0x00,0xfb,0xfd,0xfb,0x81,0xfd,0xf7,0x00,0xac, +0x86,0xfd,0x0e,0x2b,0xf5,0xfd,0xfd,0x56,0xfc,0xfc,0xf5,0xf6,0xfd,0xfd,0x00,0xf7, +0xfe,0x92,0x06,0xfd,0x01,0xf5,0x85,0x00,0x0b,0xfd,0xf5,0x2b,0xfd,0xfd,0x00,0xfa, +0xfd,0xfd,0xf5,0x2b,0x8a,0xfd,0x07,0xf6,0xf7,0xfd,0xfd,0xfe,0xf6,0x2b,0x84,0xfd, +0x0d,0xfe,0x2b,0x00,0xfd,0xac,0xf5,0x00,0x00,0x56,0x00,0xfd,0xfe,0x2b,0x83,0x00, +0x0b,0x81,0xfd,0xfd,0xf5,0x00,0x00,0xf5,0xac,0xfd,0xfd,0x56,0x83,0x00,0x1c,0x56, +0xfe,0x2b,0xf5,0xfd,0xfd,0xf9,0x00,0xfd,0xfe,0xf8,0x00,0x00,0x2b,0x2b,0x00,0xfa, +0xfa,0x00,0xfd,0xfd,0xac,0xf5,0x00,0x2b,0xfd,0xac,0x2b,0x83,0x00,0x04,0x56,0xfd, +0xf7,0x00,0x83,0xfd,0x01,0xac,0xa2,0xfc,0xfb,0x0a,0xf9,0xf5,0x00,0xf8,0xf5,0xf6, +0xfc,0xfb,0xfa,0xf5,0x83,0x00,0x19,0xfa,0xfb,0xf9,0x00,0x00,0xf5,0x2b,0x00,0xfb, +0xfb,0x2b,0x00,0xfc,0xfb,0xfb,0xfc,0xf6,0x00,0x00,0xf9,0x00,0x56,0xfb,0xfb,0xf8, +0x83,0x00,0x0e,0xf8,0xfb,0x2b,0x00,0xac,0xfd,0xf7,0x00,0xfa,0xfd,0x81,0x00,0x00, +0xf9,0x85,0xfd,0x02,0x00,0x2b,0x83,0x00,0x07,0xac,0xfd,0xfd,0xf5,0x2b,0xfd,0xfb, +0x83,0x00,0x02,0xf6,0xfe,0x85,0xfd,0x15,0xfe,0xf6,0x00,0x00,0x2b,0xf5,0x2b,0xfd, +0xac,0xf5,0x00,0x00,0xf9,0x00,0x00,0xfd,0xf5,0x00,0x00,0xf5,0xfc,0x86,0xfb,0x02, +0xfc,0xf7,0x84,0x00,0x05,0xf5,0xfb,0xfb,0x81,0xf6,0x83,0x00,0x12,0xf8,0xfb,0x2b, +0x00,0xfb,0xfc,0xfb,0xf9,0x00,0x00,0xf5,0xf7,0x00,0x2b,0xfc,0xf7,0x00,0x00,0xa2, +0xfc,0xfb,0x22,0xfc,0x81,0xf5,0x00,0x00,0x56,0x00,0x00,0xfd,0x81,0x00,0x00,0x2b, +0x2b,0xf5,0xfd,0xfd,0xf5,0x00,0x00,0xf5,0xfc,0xfd,0xfe,0x56,0xf5,0xf5,0x81,0x00, +0xf9,0xfd,0xfd,0xfb,0xf5,0x83,0x00,0x0a,0xfb,0xfe,0xf5,0x00,0x00,0xf5,0xac,0xfd, +0xfe,0x56,0x83,0x00,0x1c,0x56,0xfe,0x2b,0xf5,0xfd,0xfd,0xf9,0x00,0xfd,0xfd,0xf8, +0x00,0x00,0x2b,0xf6,0x00,0xfa,0xfa,0x00,0xfd,0xfd,0xac,0xf5,0x00,0x2b,0xfd,0xac, +0x2b,0x83,0x00,0x06,0x56,0xfd,0xac,0xf5,0x00,0x2b,0x85,0xfd,0x0e,0xf7,0x00,0xfd, +0xfd,0xf5,0x00,0x00,0xf5,0xac,0xfd,0xfd,0xf8,0x00,0x00,0x92,0x06,0xfd,0x01,0xfe, +0x83,0xfd,0xc2,0xfe,0xfd,0xfd,0x91,0x01,0xfd,0x02,0x00,0xfb,0x8b,0xfd,0x01,0xfe, +0x84,0xfd,0x01,0xfe,0x87,0xfd,0x01,0xfe,0x8b,0xfd,0x01,0xfe,0x8d,0xfd,0xe2,0xfe, +0xfd,0xfd,0xfd,0x8a,0xfd,0x01,0xfe,0x84,0xfd,0x01,0xfe,0x84,0xfd,0x01,0xac,0xc2, +0xfb,0xfb,0xfc,0x03,0x81,0x00,0xf8,0xc2,0xfb,0xfc,0xfb,0x03,0xfc,0xfb,0xfb,0xe2, +0xfc,0xfb,0xfc,0xfb,0x02,0xfb,0xfc,0x84,0xfb,0xc4,0xfc,0xfb,0xfb,0xa2,0xfb,0xfc, +0x02,0xfb,0xfb,0x85,0xfd,0x01,0xfe,0x88,0xfd,0xc2,0xfe,0xfd,0xfd,0xc2,0xfd,0xfd, +0xfe,0x8a,0xfd,0x01,0xfe,0x84,0xfd,0x01,0xfe,0x84,0xfd,0xc2,0xfe,0xfd,0xfd,0x05, +0xfd,0xac,0xfc,0xfb,0xfb,0xa2,0xfc,0xfb,0x02,0xfb,0xfb,0xa2,0xfc,0xfb,0x02,0xfc, +0xfc,0xa3,0xfb,0xfc,0x02,0xfb,0xfb,0xa2,0xfc,0xfb,0x02,0xfb,0xfb,0xa2,0xfc,0xfb, +0xe2,0xfb,0xfc,0xfb,0xfb,0x84,0xfb,0xa2,0xfc,0xfb,0x03,0xfb,0xfc,0xac,0xc2,0xfd, +0xfd,0xfe,0x86,0xfd,0x01,0xfe,0x86,0xfd,0x03,0xf9,0x00,0xac,0x83,0xfd,0xc2,0xfe, +0xfd,0xfd,0x91,0x01,0xfd,0x01,0xfe,0x84,0xfd,0xc2,0xfe,0xfd,0xfd,0x8c,0xfd,0x01, +0xfe,0x8d,0xfd,0xc3,0xfe,0xfd,0xfd,0x03,0xfd,0xfd,0xfe,0x9d,0x07,0xfd,0x02,0x81, +0x00,0x90,0x05,0xfd,0x08,0xfc,0xfb,0xfb,0x00,0xf6,0xf6,0x00,0x00,0x84,0xfb,0x01, +0xfc,0x84,0xfb,0x01,0xfc,0x8b,0xfb,0xa2,0xfc,0xfb,0xe2,0xfb,0xfb,0xfb,0xfc,0xc2, +0xfb,0xfb,0xfc,0x84,0xfb,0x02,0xfc,0xac,0x90,0x03,0xfd,0x01,0xac,0xe2,0xfc,0xfb, +0xfb,0xfb,0xc2,0xfb,0xfc,0xfb,0x8a,0xfb,0x01,0xfc,0x85,0xfb,0x01,0xfc,0x84,0xfb, +0x01,0xfc,0x86,0xfb,0xc2,0xfc,0xfb,0xfb,0x05,0xfb,0xfb,0xfc,0xfb,0xfc,0x90,0x01, +0xfd,0x06,0xf9,0x00,0x2b,0xf6,0x00,0xf6,0x9c,0x11,0xfd,0x06,0xfc,0xfb,0xf9,0xf6, +0x2b,0xf7,0xa2,0xfc,0xfb,0x83,0xfb,0x01,0xfc,0x84,0xfb,0xe2,0xfc,0xfb,0xfc,0xfb, +0x86,0xfb,0xe2,0xfc,0xfb,0xfb,0xfb,0x02,0xfb,0xfc,0x85,0xfb,0x01,0xfc,0x85,0xfb, +0x02,0xfc,0xac,0x9a,0x02,0xfd,0x01,0xac,0xe2,0xfc,0xfb,0xfb,0xfb,0xe3,0xfb,0xfc, +0xfb,0xfb,0xa2,0xfc,0xfb,0x02,0xfb,0xfc,0x83,0xfb,0xa2,0xfc,0xfb,0x03,0xfb,0xfb, +0xfc,0x84,0xfb,0xa2,0xfc,0xfb,0xc2,0xfb,0xfb,0xfc,0x84,0xfb,0x02,0xfc,0xac,0x90, +0x01,0xfd,0x06,0xfe,0x56,0x2b,0xf7,0xf9,0xfe,0x9c,0x11,0xfd,0x03,0xac,0xfb,0xfb, +0xa2,0xfc,0xfb,0x84,0xfb,0xc3,0xfc,0xfb,0xfb,0x87,0xfb,0xa3,0xfc,0xfb,0xe2,0xfb, +0xfc,0xfb,0xfb,0xc3,0xfb,0xfb,0xfc,0x83,0xfb,0x05,0xfc,0xfb,0xfc,0xac,0xac,0x93, +0x02,0xfd,0x03,0xac,0xfd,0xac,0xe2,0xfc,0xfb,0xfb,0xfb,0xe2,0xfb,0xfc,0xfb,0xfb, +0xc2,0xfb,0xfb,0xfc,0x86,0xfb,0x01,0xfc,0x85,0xfb,0x01,0xfc,0x83,0xfb,0xc2,0xfc, +0xfb,0xfb,0x03,0xfb,0xfb,0xfc,0x87,0xfb,0x03,0xfc,0xfb,0xac,0x93,0x01,0xfd,0x01, +0xfe,0x90,0x12,0xfd,0x01,0xac,0x84,0xfb,0xc2,0xfc,0xfb,0xfb,0xe2,0xfb,0xfb,0xfb, +0xfc,0xa3,0xfb,0xfc,0x89,0xfb,0xe2,0xfc,0xfb,0xfb,0xfb,0x02,0xfb,0xfc,0x85,0xfb, +0x01,0xfc,0x83,0xfb,0x06,0xfc,0xfb,0xfb,0xfc,0xfb,0xac,0x90,0x02,0xfd,0x02,0xfc, +0xfc,0xe2,0xfb,0xfb,0xfb,0xfc,0xe2,0xfb,0xfb,0xfc,0xfb,0x02,0xfb,0xfc,0x84,0xfb, +0x03,0xfc,0xfb,0xfb,0xa2,0xfc,0xfb,0x83,0xfb,0x01,0xfc,0x84,0xfb,0xe3,0xfc,0xfb, +0xfb,0xfb,0xa2,0xfc,0xfb,0x05,0xfb,0xfc,0xfb,0xfb,0xac,0x9c,0x0d,0xfd,0x02,0xf9, +0xac,0x8a,0xfd,0x02,0xfa,0xfb,0x96,0x01,0xfd,0x02,0xf9,0xac,0x92,0x03,0xfd,0x02, +0xfc,0xf9,0xa2,0xfc,0xfb,0xc3,0xfb,0xfc,0xfb,0x83,0xfb,0x01,0xfc,0x86,0xfb,0x02, +0xfa,0x56,0xc3,0xfc,0xfb,0xfb,0x02,0xfb,0xfc,0x84,0xfb,0x01,0xfc,0x85,0xfb,0xe3, +0xfc,0xfb,0xfb,0xfb,0x06,0xfb,0xfb,0xfc,0xfc,0xac,0xac,0x94,0x01,0xfd,0x04,0xac, +0xac,0xfc,0xfc,0x88,0xfb,0xe2,0xfc,0xfb,0xfb,0xfb,0x02,0xfb,0xfc,0x86,0xfb,0x01, +0xfc,0x87,0xfb,0xc3,0xfc,0xfb,0xfb,0x02,0xfb,0xfc,0x84,0xfb,0xe2,0xfc,0xfb,0xfb, +0xfb,0xc2,0xfb,0xfb,0xfc,0x01,0xac,0x9c,0x0d,0xfd,0x02,0x00,0xf9,0x8a,0xfd,0x02, +0xf5,0x2b,0x88,0xfd,0x04,0xfa,0xac,0xfc,0xfa,0x8a,0xfd,0x02,0x00,0xf9,0x99,0x01, +0xfd,0x09,0xf9,0xf5,0x00,0x2b,0xfa,0xfd,0xfd,0x00,0x81,0x8d,0xfd,0x07,0x00,0x81, +0xfd,0xf9,0x00,0xac,0xfc,0x87,0xfb,0xc2,0xfc,0xfb,0xfb,0x83,0xfb,0x07,0xfc,0xfb, +0xfb,0xfc,0xfb,0xf6,0xf5,0xe3,0xfb,0xfc,0xfb,0xfb,0x01,0xfc,0x83,0xfb,0xa2,0xfc, +0xfb,0x03,0xfb,0xfb,0xfc,0x85,0xfb,0x01,0xfc,0x84,0xfb,0x01,0xfc,0x87,0xfb,0x01, +0xfc,0x88,0xac,0x01,0xfd,0x85,0xac,0x01,0xfc,0x84,0xfb,0x01,0xfc,0x83,0xfb,0xc2, +0xfc,0xfb,0xfb,0x02,0xfb,0xfc,0x84,0xfb,0x01,0xfc,0x84,0xfb,0xc2,0xfc,0xfb,0xfb, +0xa2,0xfb,0xfc,0x85,0xfb,0x01,0xfc,0x84,0xfb,0xe2,0xfc,0xfb,0xfb,0xfb,0xe2,0xfb, +0xfc,0xfb,0xfb,0x01,0xfc,0x83,0xfb,0x01,0xfc,0x9e,0x0d,0xfd,0x02,0x00,0xf9,0x8a, +0xfd,0x02,0xf5,0x2b,0x88,0xfd,0x04,0xf6,0xfa,0x81,0xf6,0x8a,0xfd,0x02,0x00,0xf9, +0x98,0x01,0xfd,0x0a,0xfb,0x00,0xf7,0xfa,0xf7,0xf8,0xfd,0xfe,0x2b,0xfb,0x8c,0xfd, +0x08,0xfe,0x2b,0xfb,0xfd,0xf9,0x00,0xfd,0xac,0xa2,0xfc,0xfb,0x02,0xfb,0xfc,0x87, +0xfb,0xa2,0xfc,0xfb,0x06,0xfb,0xfc,0xfb,0xfb,0x2b,0x00,0xe4,0xfb,0xfb,0xfb,0xfc, +0x84,0xfb,0x01,0xfc,0x83,0xfb,0xc2,0xfc,0xfb,0xfb,0xe2,0xfb,0xfb,0xfc,0xfb,0xc2, +0xfc,0xfb,0xfb,0xc2,0xfb,0xfc,0xfb,0x83,0xfb,0x01,0xfc,0x85,0xfb,0x01,0xfc,0x83, +0xfb,0x04,0xfc,0xfb,0xfa,0x56,0xc2,0xfc,0xfb,0xfb,0xe2,0xfb,0xfb,0xfc,0xfb,0xc2, +0xfc,0xfb,0xfb,0xe2,0xfb,0xfc,0xfb,0xfb,0x01,0xfc,0x84,0xfb,0xe2,0xfc,0xfb,0xfb, +0xfb,0xe3,0xfb,0xfc,0xfb,0xfb,0x03,0xfb,0xfc,0xac,0x9e,0x0d,0xfd,0x03,0x00,0xf9, +0xfe,0x89,0xfd,0x02,0x00,0xf7,0x88,0xfd,0xc2,0xfe,0xfd,0xfd,0x88,0xfd,0x02,0x00, +0xf9,0x98,0x01,0xfd,0x03,0x2b,0x00,0xfe,0x97,0x01,0xfd,0x05,0xf9,0x00,0xfd,0xfd, +0xac,0x83,0xfb,0x03,0xfc,0xfb,0xfb,0xa3,0xfc,0xfb,0x87,0xfb,0x03,0xfc,0xf6,0xf5, +0xe3,0xfc,0xfb,0xfb,0xfb,0xe5,0xfb,0xfc,0xfb,0xfb,0x03,0xfb,0xfb,0xfc,0x84,0xfb, +0x01,0xfc,0x84,0xfb,0xc2,0xfc,0xfb,0xfb,0x02,0xfb,0xfc,0x83,0xfb,0xa2,0xfc,0xfb, +0x02,0xfb,0xfc,0x83,0xfb,0x03,0xfc,0xf6,0xf5,0x84,0xfb,0xc2,0xfc,0xfb,0xfb,0x86, +0xfb,0xc2,0xfc,0xfb,0xfb,0xe3,0xfb,0xfc,0xfb,0xfb,0x01,0xfc,0x83,0xfb,0xa2,0xfc, +0xfb,0x83,0xfb,0x01,0xfc,0x86,0xfb,0x03,0xfc,0xfb,0xac,0x99,0x0c,0xfd,0x18,0xfe, +0x00,0x2b,0xfd,0xfe,0x2b,0x00,0xfd,0xfd,0xf5,0x2b,0xf6,0x00,0x00,0xfa,0xfd,0xfd, +0xfe,0xf6,0x00,0x00,0x2b,0x00,0xf9,0x86,0xfd,0x17,0xf8,0x00,0x00,0xf5,0xf5,0x2b, +0xfd,0xf9,0x00,0xf8,0x00,0xf6,0xfe,0x00,0x2b,0xfd,0xfd,0x2b,0x00,0xfd,0xfd,0xac, +0xf5,0x83,0x00,0x16,0xfd,0xfd,0x00,0xf9,0xfd,0x81,0x00,0x81,0xfd,0xfd,0xf6,0x00, +0x00,0xf6,0xfd,0xfd,0xf9,0x00,0x81,0x00,0x00,0xf6,0x86,0xfd,0x03,0xf8,0x00,0x56, +0x85,0xfd,0x05,0x00,0xf9,0xfd,0xfd,0xf8,0x83,0x00,0x01,0xfc,0x86,0xfd,0x11,0x00, +0xf9,0xfd,0xfa,0x00,0x81,0x00,0x00,0xf6,0xfb,0xfc,0xf8,0x00,0xf9,0x00,0x00,0xf5, +0x83,0xfb,0x05,0xfc,0xfb,0xfb,0xfc,0xf9,0x83,0x00,0x05,0xf5,0x00,0xfb,0xfc,0xf6, +0x83,0x00,0x12,0xf7,0xfb,0xfb,0x2b,0x00,0xf7,0x00,0x00,0xf7,0xfb,0xfc,0xf6,0x00, +0xf7,0x00,0x00,0x2b,0xfc,0x84,0xfb,0x03,0xfc,0xfb,0xf7,0x83,0x00,0x0e,0x81,0xfb, +0xfc,0x00,0x2b,0x00,0x00,0xfc,0x00,0x2b,0xf5,0x00,0x00,0x56,0x83,0xfb,0x83,0x00, +0x0b,0xf7,0xfb,0xfc,0xf6,0x00,0xfb,0xfb,0x56,0x00,0xfc,0xf8,0x83,0x00,0x02,0xf6, +0xfc,0x86,0xfb,0xa2,0xfc,0xfb,0x02,0xfb,0xfc,0x87,0xfb,0x01,0xfc,0x87,0xfb,0x01, +0xfc,0x85,0xfb,0x01,0xfc,0x87,0xfb,0x01,0xfc,0x84,0xfb,0xa2,0xfc,0xfb,0x01,0xfc, +0x9b,0x0c,0xfd,0x17,0xf5,0x2b,0xfd,0xfd,0x2b,0xf5,0xfd,0xfd,0x00,0x00,0xf8,0xfd, +0xf5,0xf5,0xfe,0xfd,0xf6,0xf5,0xfb,0xac,0x2b,0x00,0xf9,0x85,0xfd,0x18,0xf9,0x00, +0xf9,0xfe,0xf8,0x00,0x2b,0xfe,0xf9,0x00,0xf6,0xac,0xfe,0xfd,0x00,0x2b,0xfe,0xfd, +0x2b,0xf5,0xfd,0xfd,0x00,0xf5,0xa2,0xfb,0xfd,0x16,0xfd,0x00,0xfa,0xfb,0x00,0x81, +0xfd,0xfd,0x2b,0x00,0xac,0xac,0x00,0xf8,0xfd,0xf9,0x00,0x00,0xfc,0xfa,0x00,0x81, +0x86,0xfd,0x04,0xf6,0x00,0xf5,0xfb,0x83,0xfd,0x09,0x00,0xf9,0xfe,0xf9,0x00,0x81, +0xfd,0xf5,0xf6,0x86,0xfd,0x12,0x00,0xf9,0xfd,0xf9,0x00,0x00,0xfc,0xfa,0x00,0xfa, +0xfb,0x56,0x00,0x00,0xfa,0xf9,0x00,0xf9,0x83,0xfb,0x22,0xfc,0xfb,0x81,0x00,0x2b, +0xfc,0x56,0x00,0xf5,0xfb,0xfb,0x56,0xfa,0xfb,0x2b,0x00,0xfc,0xfb,0xf6,0x00,0xf5, +0xfb,0x2b,0x00,0xfb,0xfb,0x2b,0x00,0xf5,0xfb,0x2b,0x00,0xfb,0xfc,0x84,0xfb,0x29, +0x56,0x00,0xf9,0xfc,0xf5,0xf5,0xfc,0xfb,0x00,0x00,0xfa,0xfc,0xfb,0x00,0x00,0xf7, +0xfb,0xf5,0xf5,0xfc,0xfb,0x00,0xf6,0xfb,0xf9,0x00,0xfa,0xfb,0x2b,0x00,0xfc,0xfb, +0xf8,0x00,0xfb,0xfc,0xf6,0x00,0xfc,0xfb,0xfb,0xa2,0xfc,0xfb,0x85,0xfb,0xc3,0xfc, +0xfb,0xfb,0x83,0xfb,0xa2,0xfc,0xfb,0xc2,0xfb,0xfc,0xfb,0x83,0xfb,0xa3,0xfc,0xfb, +0x83,0xfb,0x01,0xfc,0x85,0xfb,0x01,0xac,0x9c,0x0c,0xfd,0x17,0xf5,0x2b,0xfd,0xfd, +0xf7,0x00,0xfd,0xfe,0xf5,0xf6,0xfd,0xfe,0x2b,0xf5,0xfd,0xfd,0x00,0xf8,0xfd,0xfd, +0xfe,0x00,0xf9,0x85,0xfd,0x02,0xf6,0xf6,0x83,0xfd,0x06,0xf5,0x2b,0xfd,0xf9,0x00, +0xac,0x83,0xfd,0x0a,0xf5,0x2b,0xfd,0xfd,0xf7,0x00,0xfd,0x81,0x00,0xfb,0x85,0xfd, +0x04,0x00,0x56,0x00,0xf8,0x83,0xfd,0x0e,0x00,0xf7,0xf9,0xf9,0xf6,0xf5,0xfd,0xfa, +0x00,0xac,0xfd,0xfe,0x00,0xf9,0x86,0xfd,0x10,0xfe,0xf9,0x00,0x00,0x81,0xfd,0xfd, +0x00,0xf9,0xfd,0xf6,0xf5,0xf9,0xfa,0xf7,0x00,0x86,0xfd,0x14,0x00,0xf9,0xfe,0xf9, +0x00,0xac,0xfd,0xfd,0x00,0xf9,0xac,0xf8,0x00,0x81,0xfc,0xfb,0x00,0x56,0xfb,0xfc, +0x83,0xfb,0x1e,0xf7,0x00,0xfb,0xfb,0xfc,0x2b,0x00,0xfb,0xfc,0xfb,0xfb,0xfc,0x56, +0x00,0xfb,0xfb,0x2b,0x00,0xfc,0xfb,0x56,0x00,0xfb,0xfc,0xf6,0x00,0xfc,0xfb,0x56, +0x00,0x83,0xfb,0x2c,0xfc,0xfb,0xfc,0xf5,0xf5,0x56,0xf8,0x2b,0x00,0xfb,0xfb,0x00, +0xf8,0xfc,0xfb,0xfb,0xf5,0xf6,0xfb,0xfc,0x2b,0x00,0xfb,0xfa,0x00,0xf8,0x56,0x56, +0x00,0xf6,0xfc,0xf6,0xf5,0xfb,0xfb,0x56,0x00,0xfb,0xfb,0x2b,0xf5,0xfb,0xfb,0xfc, +0x85,0xfb,0xc2,0xfc,0xfb,0xfb,0x83,0xfb,0x03,0xfc,0xfb,0xfb,0xa2,0xfc,0xfb,0x02, +0xfb,0xfc,0x85,0xfb,0xe2,0xfc,0xfb,0xfb,0xfb,0x84,0xfb,0xa2,0xfc,0xfb,0xa2,0xfb, +0xfc,0x02,0xfc,0xac,0x9c,0x0c,0xfd,0x12,0x00,0xf7,0xfd,0xfd,0x2b,0xf5,0xfd,0xfd, +0x00,0xf7,0xfd,0xfd,0x2b,0xf5,0xfd,0xfd,0x00,0xf9,0x83,0xfd,0x02,0x00,0xfa,0x85, +0xfd,0x0a,0x00,0x2b,0xfe,0xfd,0xfd,0x00,0xf7,0xfd,0xf9,0x00,0x84,0xfd,0x09,0x00, +0xf7,0xfd,0xfd,0x2b,0xf5,0xfd,0xf9,0x00,0x86,0xfd,0x83,0x00,0x01,0xac,0x83,0xfd, +0x09,0x00,0x00,0xf5,0xf5,0x00,0x2b,0xfd,0xf9,0x00,0x83,0xfd,0x02,0x00,0xf9,0x89, +0xfd,0x0d,0xf5,0x00,0xfd,0xfd,0x00,0xf9,0xfd,0x00,0x00,0xf5,0x00,0xf5,0xf5,0x86, +0xfd,0x05,0x00,0xf9,0xfd,0xf9,0x00,0x83,0xfd,0x0b,0x00,0xf9,0xfd,0x56,0x00,0xfb, +0xfb,0xfc,0x00,0xf8,0xfc,0x83,0xfb,0x0b,0xfc,0xf6,0xf5,0xfc,0xfb,0xfb,0xf6,0xf5, +0xfc,0xfb,0x56,0x84,0x00,0x11,0xfc,0xfb,0xf6,0xf5,0xfb,0xfb,0x56,0x00,0xfc,0xfb, +0x2b,0x00,0xfc,0xfb,0xf8,0x00,0xfc,0x85,0xfb,0x02,0x00,0x00,0xa2,0xf5,0x00,0x0f, +0xfc,0xfb,0x00,0x56,0xfb,0xfb,0xfc,0x00,0x2b,0xfb,0xfb,0xf6,0xf5,0xfc,0xf8,0x83, +0x00,0x0e,0xf5,0xf5,0xf7,0xfb,0x2b,0x00,0xfc,0xfb,0x56,0x00,0xfc,0xfb,0xf6,0xf5, +0xc2,0xfb,0xfc,0xfb,0xc2,0xfc,0xfb,0xfb,0x02,0xfb,0xfc,0x89,0xfb,0xe2,0xfc,0xfb, +0xfb,0xfb,0xc3,0xfb,0xfb,0xfc,0x87,0xfb,0x04,0xfc,0xfb,0xfb,0xfc,0x9e,0x0c,0xfd, +0x12,0xf5,0x2b,0xfd,0xfd,0xf6,0xf5,0xfd,0xfd,0xf5,0x2b,0xfd,0xfd,0xf7,0x00,0xfd, +0xfd,0x00,0x56,0x83,0xfd,0x02,0x00,0xf9,0x85,0xfd,0x02,0xf6,0xf6,0x83,0xfd,0x05, +0xf5,0x2b,0xfd,0xf9,0x00,0x84,0xfd,0x0a,0xf5,0x2b,0xfd,0xfd,0xf6,0xf5,0xfd,0xfb, +0x00,0xfb,0x85,0xfd,0x10,0x00,0xfa,0xf5,0x00,0xfc,0xfd,0xfe,0x00,0xf7,0xfd,0xfd, +0xfe,0xfd,0xfd,0xf9,0x00,0x83,0xfd,0x02,0x00,0xf9,0x89,0xfd,0x0c,0xfa,0x00,0xfd, +0xfd,0x00,0xf9,0xfe,0xf6,0xf5,0xfd,0xfd,0xfe,0x87,0xfd,0x05,0x00,0xf9,0xfd,0xf9, +0x00,0x83,0xfd,0x0a,0x00,0xf9,0xfd,0xf9,0x00,0xfc,0xfb,0xfb,0x00,0x56,0x83,0xfb, +0x04,0xfc,0xfb,0x2b,0x00,0x83,0xfb,0x19,0x2b,0x00,0xfb,0xfa,0x00,0xf9,0xfb,0x56, +0x00,0xfb,0xfc,0x2b,0x00,0xfb,0xfc,0xf8,0x00,0xfb,0xfc,0xf6,0xf5,0xfb,0xfb,0x56, +0x00,0xc2,0xfb,0xfb,0xfc,0x02,0xf6,0xf5,0xa3,0xfb,0xfc,0x0f,0x00,0xf8,0xfb,0xfc, +0xfb,0xf5,0xf6,0xfb,0xfc,0x2b,0x00,0xfb,0xfa,0x00,0xf9,0xa2,0xfc,0xfb,0x0b,0xfc, +0xf6,0xf5,0xfb,0xfb,0xf8,0x00,0xfb,0xfb,0x2b,0x00,0x83,0xfb,0x01,0xfc,0x88,0xfb, +0x01,0xfc,0x83,0xfb,0x03,0xfc,0xfb,0xfb,0xa2,0xfc,0xfb,0xe2,0xfb,0xfb,0xfb,0xfc, +0x02,0xfb,0xfc,0x85,0xfb,0x01,0xfc,0x83,0xfb,0xc2,0xfc,0xfb,0xfb,0x02,0xfb,0xfc, +0x9f,0x0c,0xfd,0x17,0xf5,0xf5,0xfe,0xf7,0x00,0x00,0xfe,0xfd,0x00,0xf7,0xfd,0xfd, +0x2b,0xf5,0xfd,0xfd,0xf6,0xf5,0xac,0xfb,0xf5,0x00,0xf9,0x85,0xfd,0x0a,0xf8,0x00, +0xfb,0xac,0x2b,0x00,0x2b,0xfe,0xf9,0x00,0x84,0xfd,0x1f,0xf5,0xf5,0xfe,0xf7,0x00, +0xf5,0xfd,0xfd,0x00,0xf5,0xfb,0xfd,0x56,0xfd,0xfd,0x00,0xf9,0xac,0x00,0xf5,0xfd, +0xfd,0xf7,0x00,0xf9,0xfd,0xfc,0xf9,0xfe,0xf9,0x00,0x83,0xfd,0x03,0x00,0xf9,0xfe, +0x84,0xfd,0x11,0x56,0x56,0xac,0xfc,0xf5,0xf5,0xfd,0xfd,0x00,0xf9,0xfd,0x81,0x00, +0xf7,0xfd,0xfd,0x56,0x86,0xfd,0x05,0x00,0xfa,0xfd,0xf9,0x00,0x83,0xfd,0x2d,0x00, +0xf9,0xfd,0xf9,0x00,0xfc,0xfb,0xfb,0x00,0x56,0xfb,0xfc,0xfb,0xfb,0xfc,0xfa,0x00, +0xf8,0xfb,0xf7,0x00,0xf5,0xfb,0xf9,0x00,0xf9,0x81,0xf6,0x00,0x81,0xfb,0xf6,0xf5, +0xfc,0xfb,0x56,0x00,0xfb,0xfb,0xf6,0xf5,0xfb,0xfc,0xf8,0x00,0xc2,0xfc,0xfb,0xfb, +0x0a,0xf9,0x00,0x2b,0xfb,0xfc,0xf7,0xfb,0xfb,0x00,0x56,0x83,0xfb,0x19,0x00,0x2b, +0xfb,0xfb,0xf6,0xf5,0xfc,0xfb,0xf5,0x00,0xfa,0xfb,0xfa,0xf9,0xfb,0x2b,0x00,0x81, +0x56,0x00,0x00,0xfc,0xfb,0x2b,0x00,0x83,0xfb,0x02,0xf8,0xf6,0xa2,0xfc,0xfb,0xe2, +0xfb,0xfc,0xfb,0xfb,0x01,0xfc,0x84,0xfb,0x01,0xfc,0x85,0xfb,0x01,0xfc,0x84,0xfb, +0xa2,0xfc,0xfb,0x83,0xfb,0xc2,0xfc,0xfb,0xfb,0x03,0xfc,0xfb,0xfc,0x90,0x0d,0xfd, +0x11,0x81,0x00,0x00,0xf6,0x2b,0xf5,0xfd,0xfd,0xf5,0x2b,0xfd,0xfd,0xf7,0x00,0xfd, +0xfd,0xac,0x83,0x00,0x03,0x56,0x00,0xf9,0x85,0xfd,0x0a,0xfe,0xf6,0x00,0x00,0x2b, +0xf5,0x2b,0xfd,0xf9,0x00,0x84,0xfd,0x06,0x81,0x00,0x00,0xf6,0xf7,0x00,0x83,0xfd, +0x0f,0xf5,0x00,0x00,0xf5,0xfd,0xfd,0x00,0xf9,0xfd,0xac,0x00,0xf5,0xfd,0xfe,0xf7, +0x83,0x00,0x04,0x2b,0xfd,0xf9,0x00,0x83,0xfd,0x02,0x00,0xf9,0x85,0xfd,0x0d,0x81, +0xf5,0x00,0x00,0xf5,0xac,0xfd,0xfe,0x00,0xf9,0xfd,0xfd,0xfa,0x83,0x00,0x01,0xf5, +0x86,0xfd,0x05,0x00,0xf9,0xfd,0xfa,0x00,0x83,0xfd,0x0d,0x00,0xf9,0xfe,0xf9,0x00, +0xfd,0xfc,0xfc,0x00,0x56,0xfb,0xfb,0xfc,0x83,0xfb,0x1d,0xf7,0x00,0x00,0xf5,0xf6, +0x00,0xfc,0xfb,0xf6,0x00,0x00,0xf8,0x00,0x00,0xfb,0x2b,0x00,0xfb,0xfc,0xf8,0x00, +0xfc,0xfb,0x2b,0x00,0xfc,0xfb,0x56,0x00,0x87,0xfb,0x01,0x56,0x84,0x00,0x11,0xfc, +0xfb,0x00,0x56,0xfb,0xfb,0xfc,0xf5,0xf6,0xfb,0xfc,0x2b,0x00,0xfb,0xfb,0x81,0xf6, +0x83,0x00,0x13,0xf8,0xfb,0x81,0xf5,0x00,0x00,0xf8,0x00,0xfb,0xfc,0x81,0xf5,0x00, +0xf6,0xfb,0x2b,0x00,0xfb,0xfc,0x84,0xfb,0xe2,0xfc,0xfb,0xfb,0xfb,0x02,0xfb,0xfc, +0x84,0xfb,0xa2,0xfc,0xfb,0x03,0xfb,0xfb,0xfc,0x86,0xfb,0xc2,0xfc,0xfb,0xfb,0x83, +0xfb,0x02,0xfc,0xac,0x91,0x0d,0xfd,0xc2,0xfe,0xfd,0xfd,0x86,0xfd,0x01,0xfe,0x83, +0xfd,0xc2,0xfe,0xfd,0xfd,0x89,0xfd,0xe2,0xfe,0xfd,0xfd,0xfd,0xe2,0xfd,0xfd,0xfd, +0xfe,0x85,0xfd,0x01,0xfe,0x84,0xfd,0xc2,0xfe,0xfd,0xfd,0xc2,0xfd,0xfd,0xfe,0x87, +0xfd,0x01,0xfe,0x88,0xfd,0x01,0xfe,0x89,0xfd,0x01,0xfe,0x98,0x01,0xfd,0xc2,0xfc, +0xfb,0xfb,0xc2,0xfb,0xfc,0xfb,0xa2,0xfc,0xfb,0xc2,0xfb,0xfb,0xfc,0xa2,0xfb,0xfc, +0xe3,0xfb,0xfb,0xfc,0xfb,0xc3,0xfb,0xfc,0xfb,0xa2,0xfc,0xfb,0xc2,0xfb,0xfc,0xfb, +0xa2,0xfc,0xfb,0xa2,0xfb,0xfc,0xe2,0xfb,0xfb,0xfc,0xfb,0xa2,0xfc,0xfb,0x83,0xfb, +0xc3,0xfc,0xfb,0xfb,0xe3,0xfb,0xfc,0xfb,0xfb,0xc2,0xfc,0xfb,0xfb,0x84,0xfb,0x03, +0xfc,0xfb,0xfb,0xa2,0xfc,0xfb,0x02,0xfb,0xfc,0x84,0xfb,0xa2,0xfc,0xfb,0x01,0xfc, +0x9b,0x14,0xfd,0x02,0xac,0xac,0x87,0xfb,0x01,0xfc,0x83,0xfb,0xc2,0xfc,0xfb,0xfb, +0xe2,0xfb,0xfc,0xfb,0xfb,0x84,0xfb,0x01,0xfc,0x83,0xfb,0xc4,0xfc,0xfb,0xfb,0x02, +0xfb,0xfc,0x85,0xfb,0x01,0xfc,0x86,0xfb,0x01,0xfc,0x84,0xfb,0xe4,0xfc,0xfb,0xfb, +0xfb,0xc2,0xfc,0xfb,0xfb,0xe2,0xfb,0xfb,0xfb,0xfc,0x84,0xfb,0x01,0xfc,0x85,0xfb, +0xa3,0xfc,0xfb,0x85,0xfb,0x01,0xfc,0x84,0xfb,0x01,0xfc,0x84,0xfb,0x01,0xfc,0x9e, +0x14,0xfd,0x01,0xac,0xa2,0xfb,0xfc,0x84,0xfb,0x01,0xfc,0x85,0xfb,0x01,0xfc,0x84, +0xfb,0x01,0xfc,0x83,0xfb,0xc2,0xfc,0xfb,0xfb,0x02,0xfb,0xfc,0x86,0xfb,0x01,0xfc, +0x85,0xfb,0xe2,0xfc,0xfb,0xfb,0xfb,0x02,0xfb,0xfb,0xa2,0xfc,0xfb,0x03,0xfb,0xfb, +0xfc,0x85,0xfb,0xe3,0xfc,0xfb,0xfb,0xfb,0xe2,0xfb,0xfb,0xfb,0xfc,0xe3,0xfb,0xfc, +0xfb,0xfb,0x03,0xfb,0xfb,0xfc,0x89,0xfb,0xc2,0xfc,0xfb,0xfb,0xc2,0xfb,0xfb,0xfc, +0x03,0xfb,0xfb,0xac,0x90,0x15,0xfd,0x01,0xac,0x83,0xfb,0xc2,0xfc,0xfb,0xfb,0xc3, +0xfb,0xfb,0xfc,0x86,0xfb,0xe2,0xfc,0xfb,0xfb,0xfb,0xa2,0xfc,0xfb,0x83,0xfb,0xa2, +0xfc,0xfb,0x84,0xfb,0xa3,0xfc,0xfb,0x83,0xfb,0x01,0xfc,0x84,0xfb,0xa2,0xfc,0xfb, +0x02,0xfb,0xfc,0x84,0xfb,0x01,0xfc,0x83,0xfb,0xa2,0xfc,0xfb,0x83,0xfb,0x01,0xfc, +0x85,0xfb,0x01,0xfc,0x83,0xfb,0xc2,0xfc,0xfb,0xfb,0xa2,0xfb,0xfc,0xc3,0xfb,0xfb, +0xfc,0x84,0xfb,0x01,0xfc,0x85,0xfb,0x01,0xac,0x92,0x15,0xfd,0x02,0xac,0xfc,0x83, +0xfb,0xc2,0xfc,0xfb,0xfb,0x84,0xfb,0x01,0xfc,0x83,0xfb,0xa2,0xfc,0xfb,0xe2,0xfb, +0xfb,0xfb,0xfc,0x84,0xfb,0x01,0xfc,0x87,0xfb,0xa2,0xfc,0xfb,0x86,0xfb,0x01,0xfc, +0x83,0xfb,0xa2,0xfc,0xfb,0x86,0xfb,0x01,0xfc,0x85,0xfb,0x01,0xfc,0x84,0xfb,0xa2, +0xfc,0xfb,0x03,0xfb,0xfb,0xfc,0x85,0xfb,0x01,0xfc,0x85,0xfb,0x01,0xfc,0x85,0xfb, +0xc2,0xfc,0xfb,0xfb,0x84,0xfb,0x01,0xfc,0x83,0xfb,0x05,0xfc,0xfb,0xfc,0xfc,0xac, +0x94,0x15,0xfd,0x01,0xac,0xa2,0xfc,0xfb,0x84,0xfb,0xc2,0xfc,0xfb,0xfb,0x84,0xfb, +0xe3,0xfc,0xfb,0xfb,0xfb,0x02,0xfb,0xfc,0x84,0xfb,0xc3,0xfc,0xfb,0xfb,0xc3,0xfb, +0xfb,0xfc,0x86,0xfb,0xc2,0xfc,0xfb,0xfb,0x02,0xfb,0xfb,0xa2,0xfc,0xfb,0x03,0xfb, +0xfb,0xfc,0x85,0xfb,0x01,0xfc,0x83,0xfb,0xa2,0xfc,0xfb,0x84,0xfb,0xc4,0xfc,0xfb, +0xfb,0x83,0xfb,0xa2,0xfc,0xfb,0x02,0xfb,0xfc,0x83,0xfb,0x02,0xfc,0xac,0x97,0x15, +0xfd,0xc4,0xfc,0xfb,0xfb,0xa2,0xfc,0xfb,0xe4,0xfb,0xfb,0xfb,0xfc,0xc3,0xfb,0xfc, +0xfb,0xe2,0xfb,0xfb,0xfc,0xfb,0x84,0xfb,0xa2,0xfc,0xfb,0x83,0xfb,0xc3,0xfc,0xfb, +0xfb,0xc2,0xfb,0xfb,0xfc,0xe3,0xfb,0xfb,0xfb,0xfc,0x03,0xfb,0xfb,0xfc,0x86,0xfb, +0x01,0xfc,0x85,0xfb,0xa2,0xfc,0xfb,0x84,0xfb,0x01,0xfc,0x83,0xfb,0x01,0xac,0x9a, +0x15,0xfd,0x01,0xac,0xc2,0xfc,0xfb,0xfb,0xe2,0xfb,0xfb,0xfb,0xfc,0xe3,0xfb,0xfb, +0xfc,0xfb,0x02,0xfb,0xfc,0x84,0xfb,0x01,0xfc,0x8a,0xfb,0x01,0xfc,0x85,0xfb,0x01, +0xfc,0x86,0xfb,0xc2,0xfc,0xfb,0xfb,0x02,0xfb,0xfc,0x84,0xfb,0x01,0xfc,0x85,0xfb, +0xe4,0xfc,0xfb,0xfb,0xfb,0xa2,0xfc,0xfb,0xc2,0xfb,0xfb,0xfc,0x85,0xfb,0x01,0xfc, +0x84,0xfb,0x02,0xfc,0xac,0x9c,0x15,0xfd,0x01,0xac,0xe2,0xfc,0xfb,0xfb,0xfb,0x03, +0xfb,0xfb,0xfc,0x86,0xfb,0xe2,0xfc,0xfb,0xfb,0xfb,0x03,0xfb,0xfb,0xfc,0x84,0xfb, +0xa2,0xfc,0xfb,0xa2,0xfb,0xfc,0x83,0xfb,0xa2,0xfc,0xfb,0xc2,0xfb,0xfb,0xfc,0x83, +0xfb,0x01,0xfc,0x87,0xfb,0x01,0xfc,0x84,0xfb,0x01,0xfc,0x84,0xfb,0xe2,0xfc,0xfb, +0xfb,0xfb,0xc2,0xfb,0xfb,0xfc,0x84,0xfb,0xe2,0xfc,0xfb,0xfb,0xfb,0xc2,0xfc,0xfb, +0xfb,0x04,0xfc,0xfb,0xfc,0xac,0x9f,0x15,0xfd,0x01,0xac,0xc2,0xfb,0xfc,0xfb,0x01, +0xfc,0x84,0xfb,0xa2,0xfc,0xfb,0x03,0xfb,0xfb,0xfc,0x85,0xfb,0x01,0xfc,0x84,0xfb, +0x01,0xfc,0x85,0xfb,0x01,0xfc,0x84,0xfb,0x01,0xfc,0x85,0xfb,0x01,0xfc,0x83,0xfb, +0xc2,0xfc,0xfb,0xfb,0x02,0xfb,0xfb,0xa3,0xfc,0xfb,0xa2,0xfb,0xfc,0x02,0xfb,0xfb, +0xa2,0xfc,0xfb,0x85,0xfb,0xc2,0xfc,0xfb,0xfb,0x84,0xfb,0xe2,0xfc,0xfb,0xfb,0xfb, +0xe2,0xfb,0xfb,0xfb,0xfc,0x02,0xfb,0xac,0x93,0x16,0xfd,0x01,0xfc,0x85,0xfb,0x01, +0xfc,0x86,0xfb,0x01,0xfc,0x83,0xfb,0xa2,0xfc,0xfb,0xc2,0xfb,0xfb,0xfc,0x83,0xfb, +0x01,0xfc,0x84,0xfb,0x01,0xfc,0x83,0xfb,0xc3,0xfc,0xfb,0xfb,0xe2,0xfb,0xfb,0xfc, +0xfb,0x8f,0xfb,0xa2,0xfc,0xfb,0xe2,0xfb,0xfb,0xfb,0xfc,0x03,0xfb,0xfb,0xfc,0x86, +0xfb,0xa3,0xfc,0xfb,0x84,0xfb,0x02,0xfc,0xac,0x95,0x16,0xfd,0x01,0xac,0xa2,0xfc, +0xfb,0x83,0xfb,0xa2,0xfc,0xfb,0x03,0xfb,0xfb,0xfc,0x85,0xfb,0xc2,0xfc,0xfb,0xfb, +0xc4,0xfb,0xfc,0xfb,0x03,0xfb,0xfb,0xfc,0x86,0xfb,0x01,0xfc,0x84,0xfb,0x01,0xfc, +0x85,0xfb,0x03,0xfc,0xfb,0xfb,0xa2,0xfc,0xfb,0xa2,0xfb,0xfc,0x86,0xfb,0xe3,0xfc, +0xfb,0xfb,0xfb,0xa2,0xfc,0xfb,0x88,0xfb,0x02,0xfc,0xac,0x98,0x16,0xfd,0x02,0xac, +0xac,0xc2,0xfb,0xfc,0xfb,0x83,0xfb,0x01,0xfc,0x84,0xfb,0xe2,0xfc,0xfb,0xfb,0xfb, +0x83,0xfb,0x01,0xfc,0x88,0xfb,0x01,0xfc,0x84,0xfb,0xc2,0xfc,0xfb,0xfb,0x02,0xfb, +0xfc,0x83,0xfb,0xa2,0xfc,0xfb,0x03,0xfb,0xfb,0xfc,0x84,0xfb,0x01,0xfc,0x84,0xfb, +0xa3,0xfc,0xfb,0xe4,0xfb,0xfb,0xfb,0xfc,0xc2,0xfb,0xfb,0xfc,0x03,0xfb,0xac,0xac, +0x9b,0x16,0xfd,0x01,0xac,0xa2,0xfc,0xfb,0xe3,0xfb,0xfb,0xfb,0xfc,0x84,0xfb,0xa2, +0xfc,0xfb,0x84,0xfb,0xc6,0xfc,0xfb,0xfb,0x02,0xfb,0xfc,0x85,0xfb,0x01,0xfc,0x85, +0xfb,0x01,0xfc,0x84,0xfb,0x01,0xfc,0x87,0xfb,0xe4,0xfc,0xfb,0xfb,0xfb,0x05,0xfb, +0xfb,0xfc,0xfb,0xfc,0x90,0x17,0xfd,0x01,0xac,0xa2,0xfc,0xfb,0x83,0xfb,0xc2,0xfc, +0xfb,0xfb,0x02,0xfb,0xfc,0x84,0xfb,0x03,0xfc,0xfb,0xfb,0xa2,0xfc,0xfb,0x02,0xfb, +0xfc,0x85,0xfb,0x01,0xfc,0x88,0xfb,0x01,0xfc,0x83,0xfb,0xa2,0xfc,0xfb,0x02,0xfb, +0xfb,0xa2,0xfc,0xfb,0x83,0xfb,0x01,0xfc,0x84,0xfb,0xc2,0xfc,0xfb,0xfb,0xe3,0xfb, +0xfc,0xfb,0xfb,0xa2,0xfb,0xfc,0x04,0xfb,0xfb,0xfc,0xac,0x93,0x17,0xfd,0x01,0xac, +0xa3,0xfc,0xfb,0x84,0xfb,0x01,0xfc,0x84,0xfb,0x01,0xfc,0x87,0xfb,0x01,0xfc,0x85, +0xfb,0x01,0xfc,0x85,0xfb,0xc2,0xfc,0xfb,0xfb,0x02,0xfb,0xfc,0x86,0xfb,0x01,0xfc, +0x84,0xfb,0xa2,0xfc,0xfb,0xa2,0xfb,0xfc,0xc2,0xfb,0xfb,0xfc,0x83,0xfb,0x01,0xfc, +0x87,0xfb,0x01,0xfc,0x84,0xfb,0x02,0xfc,0xac,0x97,0x17,0xfd,0x01,0xac,0xe2,0xfc, +0xfb,0xfb,0xfb,0xa2,0xfb,0xfc,0x02,0xfb,0xfb,0xa2,0xfc,0xfb,0x02,0xfb,0xfc,0x83, +0xfb,0xa2,0xfc,0xfb,0xa2,0xfb,0xfc,0x83,0xfb,0xc2,0xfc,0xfb,0xfb,0xc2,0xfb,0xfc, +0xfb,0x83,0xfb,0x01,0xfc,0x86,0xfb,0x01,0xfc,0x89,0xfb,0x01,0xfc,0x83,0xfb,0xa3, +0xfc,0xfb,0x06,0xfb,0xfb,0xfc,0xfb,0xfc,0xac,0x9b,0x17,0xfd,0x05,0xac,0xac,0xfc, +0xfb,0xfb,0xa2,0xfc,0xfb,0x87,0xfb,0x01,0xfc,0x84,0xfb,0xe2,0xfc,0xfb,0xfb,0xfb, +0x83,0xfb,0x01,0xfc,0x85,0xfb,0xe3,0xfc,0xfb,0xfb,0xfb,0xc2,0xfc,0xfb,0xfb,0xc3, +0xfb,0xfc,0xfb,0x03,0xfb,0xfb,0xfc,0x88,0xfb,0x04,0xfc,0xfc,0xac,0xfe,0x9e,0x17, +0xfd,0x03,0xac,0xac,0xfc,0x84,0xfb,0xa2,0xfc,0xfb,0xe2,0xfb,0xfc,0xfb,0xfb,0xe2, +0xfb,0xfb,0xfc,0xfb,0xe2,0xfb,0xfc,0xfb,0xfb,0x03,0xfb,0xfb,0xfc,0x84,0xfb,0xc2, +0xfc,0xfb,0xfb,0x03,0xfb,0xfb,0xfc,0x85,0xfb,0xc3,0xfc,0xfb,0xfb,0xc2,0xfb,0xfb, +0xfc,0x04,0xfb,0xfc,0xac,0xac,0x94,0x18,0xfd,0x01,0xac,0x83,0xfc,0x83,0xfb,0x01, +0xfc,0x84,0xfb,0x01,0xfc,0x84,0xfb,0x01,0xfc,0x86,0xfb,0xe3,0xfc,0xfb,0xfb,0xfb, +0xc2,0xfb,0xfc,0xfb,0xe2,0xfb,0xfb,0xfb,0xfc,0xc2,0xfb,0xfc,0xfb,0x87,0xfb,0x06, +0xfc,0xfb,0xfb,0xfc,0xfc,0xac,0x9f,0x10,0xfd,0x03,0xf6,0xf6,0xfb,0x9c,0x01,0xfd, +0x02,0xf6,0xac,0x91,0x01,0xfd,0x01,0xfa,0x8a,0xf9,0x01,0x81,0x84,0xfd,0x01,0xf6, +0x8c,0xfd,0x06,0xfa,0xf8,0x2b,0xfd,0xfd,0xf7,0x87,0xfd,0x02,0xfa,0xfb,0x84,0xfd, +0x02,0xfc,0xf7,0x84,0xfd,0x03,0xfb,0xf5,0xf9,0x87,0xfd,0xc2,0xfa,0xf9,0xf9,0x09, +0xf9,0xfa,0xf9,0x56,0x56,0xf9,0xfb,0xfb,0xfc,0x85,0xfb,0x01,0xfc,0x84,0xfb,0xa2, +0xfc,0xfb,0x03,0xfb,0xfa,0xf7,0xe2,0xfb,0xfb,0xfb,0xfc,0x03,0xfb,0xfb,0xfc,0x84, +0x00,0xa2,0xf5,0x00,0x0a,0x00,0xf7,0xfc,0xfb,0x2b,0x81,0xfb,0xf9,0xf8,0xf8,0x84, +0x56,0x01,0xf8,0x83,0xfb,0x03,0xfc,0xfb,0xf9,0x85,0xfd,0x05,0xf8,0xfb,0xfd,0xfd, +0xf8,0x8d,0xfd,0x02,0xfa,0xac,0x98,0x01,0xfd,0x02,0xfb,0xf8,0x9a,0x03,0xfd,0x07, +0xfb,0xfc,0xfd,0xfa,0xf9,0xf5,0xac,0x84,0xfd,0x03,0xfc,0xf6,0xfe,0x94,0x01,0xfd, +0x02,0xf9,0xf9,0x9e,0x06,0xfd,0x01,0xfc,0x8a,0xfd,0x02,0xf9,0x2b,0x85,0xfd,0x04, +0xf9,0xf8,0xfc,0x2b,0x9b,0x01,0xfd,0x02,0xf9,0x00,0x89,0xfd,0x05,0xfb,0xf8,0x2b, +0x2b,0x56,0x84,0xfd,0x0c,0xf8,0x2b,0xf7,0x2b,0x2b,0xf5,0xf6,0x2b,0xf7,0x2b,0x2b, +0xf8,0x84,0xfd,0x13,0xf5,0xfd,0xfd,0x2b,0x00,0xf5,0x00,0x00,0xfd,0xfe,0x56,0x00, +0xf5,0x00,0x56,0xac,0xfd,0xfe,0x00,0x87,0xfd,0x02,0xf5,0xf9,0x83,0xfd,0x04,0x81, +0xf6,0xf6,0x56,0x84,0xfd,0x11,0xfa,0xf5,0x00,0xfb,0xfe,0xf6,0xfc,0xfd,0xfd,0xf8, +0xf7,0x2b,0x2b,0x00,0xf7,0x2b,0x00,0x83,0x2b,0x03,0xf8,0xac,0xac,0xa3,0xfc,0xfb, +0x03,0xfb,0xfc,0x56,0xc2,0xfc,0xfb,0xfb,0x03,0xfb,0x56,0x00,0xe2,0xfb,0xfc,0xfb, +0xfb,0x02,0xfb,0xfc,0x85,0x56,0x01,0x00,0x85,0x56,0x13,0x81,0xfb,0x56,0x00,0xf9, +0x00,0x2b,0xf6,0x2b,0x00,0xf6,0xf6,0x2b,0xfa,0xac,0xac,0xfd,0xfa,0x00,0x84,0xfd, +0x02,0xf6,0x56,0xc2,0x00,0xfd,0xfe,0x01,0xf6,0xa3,0xf5,0x00,0x01,0xfa,0x83,0xfd, +0x02,0xf5,0xf9,0x8a,0xfd,0x01,0xf6,0xa2,0xf5,0x00,0x02,0x00,0x56,0x87,0xfd,0x02, +0xf9,0x00,0x8e,0xfd,0x02,0xfb,0xfc,0x84,0xfd,0x0b,0xf9,0xfc,0xfd,0xfc,0xf9,0xac, +0xf5,0xf5,0xf9,0xfd,0x81,0x88,0xf9,0x93,0x01,0xfd,0x07,0x2b,0xf9,0xfd,0xac,0x00, +0x56,0xf6,0x84,0xfd,0x02,0x2b,0x2b,0x87,0xfd,0x01,0xfc,0x8c,0xfd,0x03,0xfe,0x00, +0xf9,0x83,0xfd,0x01,0xac,0x9e,0x05,0xfd,0x01,0xf8,0xe2,0x00,0xf5,0x00,0xf5,0x05, +0xfd,0xfd,0x81,0x00,0xf9,0x89,0xfd,0x03,0xf9,0x2b,0xfe,0x84,0xfd,0x04,0xfb,0xf6, +0xf6,0xfb,0x9b,0x01,0xfd,0x86,0x00,0x01,0xf6,0x83,0xfd,0xa2,0x2b,0x00,0x04,0xf8, +0xf6,0x00,0x81,0x83,0xfd,0x06,0xfe,0xfd,0xfd,0xfe,0x2b,0xf9,0xc2,0xfe,0xfd,0xfd, +0x06,0xfa,0x00,0xf5,0x00,0x00,0x2b,0xa2,0xfd,0xfe,0x0c,0x00,0xfd,0xfd,0x2b,0xf7, +0x2b,0x00,0x2b,0x2b,0xfb,0xfd,0xf5,0x87,0xfd,0x02,0x00,0xac,0x83,0xfd,0x04,0xfc, +0x2b,0xf8,0xfa,0x86,0xfd,0x05,0xfa,0x00,0xfb,0xf5,0xfe,0x83,0xfd,0x01,0xf9,0xc2, +0xfa,0xf9,0x00,0x06,0xfa,0xfa,0x81,0xfd,0xfd,0x2b,0xa2,0x00,0xf5,0x85,0x00,0x06, +0xfc,0xfb,0xf6,0xf6,0x2b,0xf6,0xc2,0xf5,0x00,0x00,0x02,0xf7,0xfc,0x83,0xfb,0x03, +0xfc,0xfb,0x00,0xa2,0x2b,0xf6,0x01,0x00,0xa2,0x2b,0xf6,0x0e,0x00,0x56,0xfc,0xfb, +0xf9,0xf9,0x00,0xfc,0x56,0x56,0x00,0xf9,0xf9,0xfb,0x84,0xfd,0x0c,0x56,0xf6,0xfe, +0xfd,0xfd,0xfc,0xac,0x00,0xfa,0xfc,0xfd,0xf5,0x84,0xfd,0x06,0xfa,0x2b,0xfe,0xfd, +0xf5,0xfb,0x83,0xfd,0x02,0x00,0xfa,0x83,0xfd,0x02,0xf9,0xfc,0x85,0xfd,0x07,0x81, +0xf9,0xfa,0xfd,0x2b,0xf6,0xac,0x83,0xfd,0x0c,0xf8,0xf6,0x2b,0x2b,0xf6,0x00,0xf6, +0xf5,0x00,0xf8,0xfd,0xf8,0xa2,0xf5,0x00,0x85,0x00,0x01,0x81,0x84,0xfd,0x0b,0x00, +0xac,0xfd,0xfd,0x00,0xfa,0xf7,0x81,0xf7,0xfd,0x00,0xc2,0xf7,0x2b,0x2b,0x03,0xf7, +0x00,0xac,0x92,0x01,0xfd,0x06,0x2b,0xfa,0xfd,0xfd,0xfa,0xac,0x84,0xfd,0x01,0xfb, +0x86,0x00,0x05,0xf8,0xfd,0x81,0x00,0xf9,0x89,0xfd,0x84,0x00,0x05,0xf5,0xf5,0x00, +0x00,0xf8,0x9f,0x05,0xfd,0xa3,0xfe,0xfd,0x02,0xfe,0x00,0x83,0xfd,0x03,0x81,0x00, +0x2b,0x84,0xfd,0x06,0xfe,0xf5,0xfc,0xfd,0xfa,0x2b,0x83,0xfd,0x05,0xfb,0x2b,0x00, +0xf9,0xfe,0x9a,0x01,0xfd,0x14,0xfe,0x2b,0xf5,0xfe,0xfd,0xfd,0xfe,0xf5,0xf8,0xfd, +0xfd,0x2b,0xf5,0xfd,0xfe,0x00,0xfd,0xfe,0x81,0x00,0x83,0xfd,0x03,0x2b,0x00,0xf5, +0x84,0x00,0x0a,0xf5,0x00,0xf6,0xfd,0xfd,0xfe,0xfd,0xfe,0x00,0xfe,0x85,0xfd,0x0a, +0xf5,0xfd,0xfd,0xfa,0xf9,0xf9,0xf5,0xf9,0xfa,0xf7,0x83,0x00,0x0e,0xf5,0xfa,0xfd, +0xfd,0xf9,0x56,0x00,0x2b,0xf7,0xfc,0xac,0x00,0xf9,0xfe,0x84,0xfd,0x07,0xf9,0x2b, +0xfd,0xfd,0xfe,0xf5,0xf9,0x83,0xfd,0x1a,0xfe,0x00,0x2b,0x2b,0x00,0x2b,0xf7,0x00, +0x2b,0x2b,0x00,0xfd,0xfd,0x56,0xfa,0xfa,0xfe,0xfd,0x81,0x00,0xf8,0xac,0x2b,0x81, +0xfc,0xf8,0x84,0x56,0x06,0xf8,0x00,0xfc,0xfb,0xfc,0x81,0xc2,0xfb,0xfb,0xfc,0x24, +0x00,0x56,0x2b,0xf6,0x56,0xf5,0xfa,0x2b,0xf6,0xf7,0xf6,0x56,0x81,0x56,0xfc,0xfc, +0xf5,0xac,0x00,0x2b,0xf7,0x2b,0xf5,0x2b,0xfd,0xfd,0xfb,0xf9,0xf5,0xf5,0xf6,0xf9, +0xfe,0x2b,0xf5,0xac,0x83,0xfd,0x11,0xf5,0xf7,0xf5,0x00,0xfb,0xf9,0x2b,0xfd,0xfe, +0x00,0xfd,0xfe,0xf5,0xf5,0x00,0xf6,0xfd,0xa2,0x2b,0x00,0x87,0xfd,0x03,0xfe,0xf5, +0xf6,0x86,0xfd,0x15,0xfa,0xf9,0xfa,0xf7,0xf5,0xf9,0xfd,0xfe,0xfd,0xfd,0xf9,0xf9, +0xfa,0xfd,0xfd,0xac,0xf5,0x2b,0xac,0xf8,0xfa,0x83,0xfd,0x02,0xac,0x00,0x83,0xfd, +0x07,0xf5,0xfb,0xf8,0xf7,0xac,0xfd,0xf5,0xc2,0xfd,0xfd,0xfe,0x02,0xf9,0x00,0x8e, +0xfd,0x01,0x81,0xa2,0x00,0xf5,0x02,0x00,0x00,0xa2,0xf5,0x00,0x83,0xfd,0x03,0xfe, +0x00,0xf8,0x83,0xfd,0x08,0xfe,0x00,0xfb,0xfd,0xfd,0x81,0x00,0x2b,0x85,0xfd,0x07, +0xf5,0xac,0xfd,0xfd,0xfe,0xf5,0xf9,0xc2,0xfe,0xfd,0xfd,0x95,0x06,0xfd,0x01,0xf5, +0x84,0xfd,0x03,0xac,0x00,0xfb,0x83,0xfd,0x0c,0xf9,0x00,0xfe,0xfd,0xf9,0x2b,0xac, +0xf7,0xf5,0x00,0x2b,0xac,0x9c,0x01,0xfd,0x10,0xf7,0xf5,0x81,0xfc,0xfd,0xfd,0xfb, +0x00,0xfd,0xfd,0xac,0x00,0xfe,0xfd,0xfb,0x00,0x83,0xfd,0x1e,0xf6,0x56,0xfd,0xfd, +0x2b,0xfa,0xfd,0xfe,0x2b,0xfa,0xfe,0xfd,0xfa,0x2b,0xfd,0xfd,0xf7,0x2b,0x2b,0x00, +0x2b,0x2b,0xf8,0x00,0xf5,0x00,0x00,0xfd,0xfd,0x2b,0xa2,0x00,0xf5,0x12,0x00,0xfe, +0xfd,0xf5,0xfe,0x00,0xfa,0xfd,0xf5,0x2b,0xf6,0xf5,0xf9,0xf8,0x00,0xfe,0xfa,0x00, +0x83,0xfd,0x0a,0xfa,0xfa,0xf9,0x2b,0xfd,0xfd,0x56,0xf5,0xfb,0xfa,0x83,0xfd,0x0a, +0x00,0xfe,0xfd,0xf5,0xfd,0xfd,0xf5,0xfe,0xfd,0xf5,0x86,0xfd,0x11,0xfa,0xf5,0xac, +0xfc,0xf5,0x56,0xf6,0xfd,0xfd,0xac,0xfc,0xf6,0x00,0xf6,0xf5,0xfc,0xfc,0xe2,0xfb, +0xfc,0xfb,0xfb,0x31,0xf7,0x56,0xf6,0x2b,0x56,0x00,0x81,0xf6,0x2b,0xf8,0xf9,0x81, +0xfa,0x00,0xf8,0xfd,0x00,0xfe,0x00,0xf9,0xf9,0xfa,0xf7,0x2b,0xfd,0xf9,0xf5,0x2b, +0x00,0x56,0xfa,0x00,0xf9,0xfe,0xf5,0x56,0xfd,0xf8,0x00,0x00,0xf8,0xfc,0xfd,0xfd, +0xf8,0x2b,0xfd,0xfd,0xf5,0x84,0xfd,0x07,0xf5,0xac,0x00,0xf7,0xfe,0xfd,0xf5,0x86, +0xfd,0x03,0xac,0x00,0xf7,0x86,0xfd,0x0a,0xfc,0xfc,0xfe,0xfd,0xf9,0xf5,0xfb,0xf9, +0xf8,0xfb,0x85,0xfd,0x12,0xac,0x00,0x81,0xfd,0xf5,0xfa,0x00,0xfe,0xfd,0xfd,0xf8, +0xf5,0xfe,0xfd,0xfd,0xf6,0xf8,0xfe,0x83,0xfd,0x02,0x00,0xfe,0x85,0xfd,0x02,0xf8, +0xf5,0x8f,0xfd,0xa2,0xfe,0xfd,0x04,0x2b,0xfa,0xfd,0xfe,0x85,0xfd,0x03,0xf5,0xf6, +0x81,0x83,0xfd,0x02,0xf8,0xf5,0x84,0xfd,0x03,0xac,0x00,0xfb,0x83,0xfd,0x02,0xf9, +0x00,0x83,0xfd,0x05,0x56,0xf6,0xf8,0x2b,0xfb,0x83,0xfd,0x01,0xac,0x94,0x06,0xfd, +0x02,0xfe,0x00,0x85,0xfd,0x01,0xfe,0x84,0xfd,0x0b,0x00,0x56,0xfd,0xfd,0xf9,0x00, +0x00,0x2b,0xf9,0xfd,0xfe,0x85,0xfd,0x01,0x81,0xa2,0xf5,0x00,0x02,0x00,0x56,0x83, +0xfd,0x01,0xac,0x8a,0xf9,0x13,0xfc,0xfd,0xf7,0x00,0xac,0xf8,0x00,0xf8,0xfe,0xf5, +0x2b,0xfd,0xfd,0xf8,0xf7,0xfd,0xfd,0xf8,0xf6,0x83,0xfd,0x08,0xfa,0x2b,0xfd,0xfd, +0xf7,0x00,0x00,0xf5,0x85,0x00,0x37,0x2b,0xfd,0xfd,0x81,0xfa,0xfa,0x00,0xf9,0xfa, +0x2b,0xfa,0xfd,0xfd,0xfe,0xfd,0xfd,0x2b,0xf6,0xf7,0x00,0x2b,0x00,0xfd,0x81,0x00, +0xfd,0xf5,0xfa,0xfd,0xfd,0xfe,0x2b,0xf8,0xfd,0xfe,0xf5,0xfd,0xfd,0xf6,0xf8,0xfd, +0xfe,0xf7,0x2b,0xfa,0x2b,0xfd,0xac,0x00,0xac,0x81,0x00,0xfd,0xfd,0xfe,0xc3,0x00, +0x00,0xf5,0x01,0x00,0x86,0xfd,0x07,0xf5,0xfc,0xfd,0xfe,0x56,0xf7,0x56,0x83,0xfd, +0x05,0x2b,0xf8,0xfe,0x2b,0xf5,0x8b,0xfd,0x09,0x81,0xf9,0xfa,0xf9,0xf6,0xf9,0xf9, +0xfa,0xf9,0x83,0xfd,0x01,0xac,0xa2,0xf5,0xfd,0x06,0x00,0xf7,0x2b,0x2b,0xf6,0x2b, +0x83,0xfd,0x0d,0xfe,0x00,0xfd,0xfd,0xfa,0x2b,0xfd,0xfb,0x00,0xfd,0xfd,0xfe,0xf5, +0x83,0xfd,0x06,0xfe,0x2b,0xf9,0xfd,0xfe,0x00,0x84,0xfd,0x07,0xf5,0x00,0xf8,0xfd, +0xac,0xfe,0x00,0x85,0xfd,0x14,0xac,0x00,0xf5,0xf6,0xf5,0x00,0xf7,0xfc,0xfd,0xfd, +0x81,0xf5,0x00,0x00,0xf5,0x00,0xf6,0x2b,0x2b,0xac,0x84,0xfd,0x0d,0xfe,0xf6,0xf9, +0xfd,0xfd,0xfb,0xf5,0xfa,0xfc,0xfd,0xfd,0x2b,0xf7,0x83,0xfd,0x02,0x56,0xf5,0x83, +0xfd,0x02,0xfe,0xf6,0x86,0xfd,0x05,0xf6,0xf7,0xfd,0xfd,0x81,0x89,0xf9,0x01,0xfa, +0x83,0xfd,0x14,0xf7,0xf6,0xfe,0x2b,0xf9,0xfd,0x00,0xfb,0xfd,0xfd,0xfe,0xf5,0xf5, +0xfe,0xf6,0x00,0x81,0xfd,0x00,0xf9,0x85,0xfd,0x01,0xfe,0x83,0xfd,0x0f,0xfe,0x00, +0x56,0xfd,0xfd,0xac,0x00,0x00,0xf8,0x56,0x00,0x81,0xf6,0x00,0x2b,0x95,0x06,0xfd, +0x01,0xf5,0x89,0xfd,0x08,0xf5,0xf6,0xfe,0xfd,0xfd,0xf9,0x2b,0xfe,0x8b,0xfd,0x05, +0xfe,0xfd,0xfe,0x00,0xf9,0x83,0xfd,0x01,0xfb,0xe2,0x2b,0x2b,0x2b,0xf7,0x15,0x2b, +0x2b,0xfb,0xfd,0xac,0xac,0xfd,0xfd,0xac,0x00,0xf5,0x00,0xac,0xfd,0xfd,0x2b,0xf9, +0xfd,0xfd,0xf5,0xf9,0x83,0xfd,0x02,0x56,0x2b,0xe2,0xfe,0xfd,0x2b,0xfa,0x04,0xfe, +0xfd,0xfa,0x2b,0x83,0xfd,0x07,0x00,0xfd,0xf5,0xfe,0xfd,0x2b,0xf9,0x83,0xfd,0x0e, +0xfc,0xfe,0x2b,0xf7,0xf9,0x00,0xfa,0x00,0xfd,0xf9,0xf7,0xfd,0x00,0xfe,0x83,0xfd, +0x1d,0x00,0xfb,0xfd,0xfd,0x00,0xfe,0xfd,0xfb,0x00,0xfd,0xfd,0xf5,0x81,0xf9,0x2b, +0xfe,0x00,0xf9,0xfd,0xfd,0xf6,0xf9,0xfd,0xfd,0xfe,0xfd,0xfd,0x2b,0xf8,0xa3,0xfe, +0xfd,0x84,0xfd,0x02,0xfa,0xf6,0x84,0xfd,0x01,0xfe,0x84,0xfd,0x05,0x00,0xfe,0xfd, +0x56,0x00,0x8b,0xfd,0xe2,0x00,0xf7,0x2b,0x2b,0x02,0x00,0xfe,0x83,0xfd,0x01,0xfe, +0xa2,0xfd,0x00,0x1e,0xfa,0xf9,0xfa,0xf7,0x2b,0xfe,0xfd,0xfd,0x56,0xf6,0xfd,0xfd, +0xf9,0x2b,0xfd,0xfd,0xf6,0xf8,0xfd,0xfd,0x00,0xfe,0xfd,0x00,0xfd,0xf5,0xac,0xfd, +0xfd,0x00,0x83,0xfd,0x09,0xac,0x00,0xf8,0xfe,0xfd,0xfd,0xfb,0x00,0xfe,0x83,0xfd, +0x09,0xfa,0x00,0xf5,0xf8,0xfb,0xfe,0xac,0xf7,0x00,0x84,0xfd,0x04,0xfe,0xfd,0xfd, +0x00,0xa2,0xfe,0xfd,0x85,0xfd,0x01,0x00,0xe2,0xfe,0xfd,0xfd,0xfd,0x03,0xfd,0x00, +0xfb,0x83,0xfd,0x03,0xfe,0x00,0xfc,0x89,0xfd,0x06,0xfc,0x00,0xac,0xfd,0xfd,0xf8, +0xc3,0x2b,0x2b,0xf7,0x01,0xf8,0x83,0xfd,0x08,0xf5,0x56,0xfd,0x2b,0xfa,0xfd,0xf7, +0x2b,0x83,0xfd,0x09,0xfb,0xfe,0xfd,0xfd,0x81,0x00,0xf5,0xf5,0xfe,0x89,0xfd,0x0e, +0xf5,0xf6,0xfe,0xfd,0xfb,0x00,0x2b,0xfe,0xfd,0xf8,0x00,0xf5,0x56,0xfe,0x96,0x06, +0xfd,0x01,0x00,0x87,0xfd,0x04,0xac,0xf6,0x00,0xac,0x83,0xfd,0x02,0xfa,0x2b,0x8f, +0xfd,0x02,0x00,0xfe,0x86,0xfd,0x01,0xfe,0x83,0xfd,0xc2,0xfe,0xfd,0xfd,0x85,0xfd, +0x0b,0xac,0x00,0xf9,0xfd,0xfd,0xfe,0x2b,0x56,0xfd,0xfb,0x00,0x83,0xfd,0x14,0xfe, +0xf5,0xf9,0xfd,0xfa,0xf5,0xf7,0xf9,0xf9,0xf6,0xf7,0xf9,0xf9,0xf7,0xf5,0x81,0xfd, +0xfd,0x00,0xfe,0x83,0x00,0x12,0x2b,0xf9,0xfd,0xfd,0xfe,0x00,0xfd,0xf8,0xf5,0xf5, +0x00,0x00,0xf5,0xfd,0xf7,0xf8,0xfd,0xf5,0x83,0xfd,0x06,0xf9,0xf5,0xfd,0xfd,0xfe, +0x00,0x83,0xfd,0x0f,0xf7,0xfc,0xac,0x00,0xfd,0xf9,0xf7,0xf5,0xf8,0xfd,0xfd,0xfe, +0xf9,0xf5,0xfe,0xc2,0x00,0x00,0xf5,0x83,0x00,0x03,0xf5,0x00,0xf6,0x84,0xfd,0x02, +0xfa,0x2b,0x89,0xfd,0x05,0xf7,0xf6,0xf9,0xf5,0x00,0x8b,0xfd,0x09,0xf5,0x2b,0x2b, +0xf7,0x00,0x2b,0xf7,0x2b,0x00,0x83,0xfd,0x20,0xfe,0xf7,0xfd,0xf5,0xfd,0xf7,0x2b, +0x00,0x2b,0x2b,0xf9,0xfd,0xfd,0xfe,0x00,0xf9,0xfd,0xfd,0xf9,0x2b,0xfd,0xfd,0x81, +0x56,0xfd,0xfe,0x00,0xfd,0xfd,0xf5,0xfb,0x00,0x83,0xfd,0x06,0x00,0xfd,0xfd,0xac, +0x00,0x00,0x84,0xfd,0x02,0xf9,0x2b,0x83,0xfd,0x05,0x81,0x00,0xf9,0xfd,0xfe,0x84, +0xfd,0x0a,0xf6,0xfa,0xfd,0xfd,0xac,0xf8,0x2b,0x2b,0xf5,0xfc,0x87,0xfd,0x02,0xfe, +0x00,0x88,0xfd,0x02,0xf8,0x00,0x85,0xfd,0x02,0x2b,0x2b,0x89,0xfd,0x02,0xf5,0x2b, +0x85,0xfd,0xc3,0xfe,0xfd,0xfd,0x0b,0xfd,0xfd,0xfb,0x00,0xfd,0xfe,0x2b,0xf9,0xfd, +0xac,0x00,0x88,0xfd,0x03,0xf9,0x00,0xac,0x88,0xfd,0x0e,0xac,0xf5,0x00,0xac,0xfd, +0xfd,0xfc,0xf8,0xfe,0xfa,0xf5,0x2b,0xf6,0xf9,0x98,0x06,0xfd,0x01,0xf5,0x86,0xfd, +0x04,0xf7,0x00,0xf6,0xfe,0x84,0xfd,0x02,0xf9,0x2b,0x8f,0xfd,0x01,0x00,0x95,0x01, +0xfd,0x04,0xfc,0x00,0xf8,0xfe,0x83,0xfd,0x05,0x81,0xf5,0xfd,0xf5,0xf8,0x83,0xfd, +0x07,0xfa,0x00,0xfe,0xfd,0xf6,0xf5,0xf6,0x84,0x2b,0x23,0xf7,0x2b,0xf6,0xf5,0xf8, +0xfd,0xfb,0x00,0xfb,0xf5,0xfe,0xfd,0x2b,0x56,0xfd,0xfd,0xf9,0xf6,0xfd,0xf8,0x2b, +0x2b,0xf5,0x2b,0x2b,0xfe,0xf5,0xfb,0xfd,0x00,0xfe,0xfd,0xfd,0xf5,0xf8,0x83,0xfd, +0x01,0x00,0x85,0xfd,0x18,0x2b,0x2b,0xfe,0xf9,0x00,0xf8,0xfe,0xfd,0xfb,0xf6,0xfd, +0x00,0xfc,0xfe,0xfd,0xf8,0xf5,0xfe,0xfd,0xfe,0xf5,0x56,0xfd,0xfe,0x85,0xfd,0x03, +0xfc,0x00,0xac,0x89,0xfd,0x04,0xf8,0x2b,0xfa,0x00,0x83,0xfd,0x05,0xac,0x00,0x00, +0xf6,0xfe,0x83,0xfd,0x09,0xf5,0xf9,0xfa,0xf9,0x00,0xfa,0xf9,0xfa,0x00,0x83,0xfd, +0xa2,0x81,0xf5,0xa3,0xfe,0x00,0x0a,0xac,0xfd,0xfd,0xfb,0x00,0xfe,0xfd,0xfd,0x2b, +0xf9,0x86,0xfd,0xa2,0xf5,0x2b,0x0b,0xf6,0xf8,0xfd,0xfd,0xfb,0xf5,0xfd,0xfd,0x00, +0xf8,0xf5,0x84,0xfd,0x1a,0x2b,0x56,0xfd,0xf8,0xfa,0xfd,0xfb,0xfe,0x00,0x00,0xf5, +0xfa,0xfd,0xfd,0xf6,0x81,0xfd,0xfc,0x00,0xf7,0xfa,0x56,0x00,0x00,0xf6,0xac,0x86, +0xfd,0x02,0x00,0x81,0x86,0xfd,0x03,0xfc,0x00,0x81,0x85,0xfd,0x03,0xac,0x00,0xfa, +0x87,0xfd,0x03,0xf5,0xf5,0xfe,0x90,0x01,0xfd,0x0a,0xf5,0x2b,0xfe,0xfd,0x2b,0xf9, +0xfd,0xfd,0xf5,0xf7,0x86,0xfd,0x03,0xfa,0x00,0x81,0x88,0xfd,0x04,0xf8,0x00,0xf6, +0xfe,0x86,0xfd,0x05,0xf5,0x81,0xfd,0x2b,0xfa,0x90,0x06,0xfd,0x01,0xf8,0xc2,0x00, +0xf5,0x00,0x0a,0xf5,0x00,0xfd,0xfd,0x81,0xf9,0xf6,0x00,0xf5,0xf9,0x86,0xfd,0x03, +0x81,0xf5,0xfb,0x83,0xfd,0x03,0xfb,0xf9,0x56,0x87,0xfd,0x02,0x81,0x00,0x94,0x01, +0xfd,0x03,0xf9,0x00,0xf8,0x86,0xfd,0x0e,0x2b,0x00,0xf6,0xfd,0xfd,0xac,0x2b,0x00, +0x81,0xfd,0xfd,0xfe,0x2b,0xf9,0xa2,0xfd,0xfe,0x27,0xfd,0xfd,0xfa,0x2b,0xfd,0xfd, +0xf8,0x2b,0xf5,0x00,0xfd,0xfd,0x81,0xf5,0xf5,0x00,0xf5,0x81,0xfd,0x81,0xf9,0xfa, +0x00,0xfa,0xf8,0xfb,0x00,0xfd,0xfe,0x00,0xfd,0xfd,0xfb,0x00,0xfe,0xfb,0xfd,0x56, +0xf6,0x85,0xfd,0x05,0xfb,0xfb,0xf9,0x00,0xf6,0x83,0xfd,0x0d,0xf9,0xf5,0xfd,0xf8, +0xac,0xfd,0xac,0x56,0x2b,0x00,0xf5,0xf5,0x2b,0x88,0xfd,0x04,0xfe,0x2b,0x00,0x56, +0x89,0xfd,0x03,0xac,0x00,0xf9,0x83,0xfd,0x05,0xf7,0xf8,0xfe,0xf6,0xfa,0x83,0xfd, +0x02,0xf5,0xf5,0xc2,0x00,0xf5,0x00,0x04,0x00,0xfb,0xfb,0xfd,0xa2,0xf7,0xf8,0x10, +0xfa,0xf6,0xfd,0xf5,0xfd,0xf8,0x2b,0xfd,0xfe,0xf5,0xf8,0x81,0xfd,0xfe,0x00,0xfb, +0x86,0xfd,0x0f,0xfb,0xf9,0x81,0xf8,0xf5,0xfe,0xfd,0xfd,0x56,0x2b,0xfd,0xfd,0x81, +0xfe,0x00,0x83,0xfd,0x1c,0xfe,0x2b,0x56,0xac,0x00,0xfb,0xfd,0xfd,0xf9,0xf6,0xfd, +0xac,0x00,0xfb,0xf9,0xf5,0xfd,0xfd,0xfa,0x2b,0xfd,0xfd,0xac,0x00,0xfe,0x2b,0x00, +0x56,0x85,0xfd,0x04,0xfa,0x00,0xf8,0xfc,0x83,0xfd,0x03,0xac,0xf5,0x2b,0x87,0xfd, +0x03,0xf8,0x00,0xfb,0x84,0xfd,0x04,0xfa,0x00,0xf5,0xfc,0x90,0x01,0xfd,0x0c,0xf8, +0x00,0xfe,0xfd,0xfd,0x2b,0xf9,0xfd,0xfd,0xac,0x00,0x81,0x84,0xfd,0x03,0xf7,0x00, +0x81,0x85,0xfd,0x06,0x81,0xf9,0xf6,0x00,0x00,0xf9,0x87,0xfd,0x06,0xfe,0x00,0xfd, +0xfd,0xfb,0xac,0x91,0x06,0xfd,0xa2,0xfe,0xfd,0x0b,0xfd,0xfe,0xfd,0x2b,0xfd,0xfd, +0x2b,0xf5,0xf7,0xfb,0xfe,0x88,0xfd,0x01,0xf7,0xa2,0x00,0xf5,0x06,0x00,0xf7,0xf8, +0xfd,0xfd,0xf8,0xa2,0x00,0xf5,0x05,0x00,0x00,0xf5,0x00,0x81,0x8f,0xfd,0x04,0xf9, +0x00,0xf5,0x81,0x89,0xfd,0x06,0xfe,0xfd,0x2b,0x00,0xf6,0xac,0x84,0xfd,0x02,0x2b, +0xfa,0x86,0xfd,0x0a,0xf9,0x2b,0xfe,0xfd,0x00,0xfc,0xfa,0xf5,0xf6,0x56,0xa2,0xf9, +0xfa,0x17,0xf9,0xf8,0xfc,0x2b,0xf5,0xf5,0x00,0xf5,0x2b,0xf5,0x56,0xfd,0xfb,0x00, +0xfe,0xfd,0x00,0x56,0xfd,0xf5,0x00,0x00,0xfa,0x85,0xfd,0x05,0x81,0xf5,0xf5,0xf8, +0x2b,0x83,0xfd,0x03,0x2b,0xf7,0xfe,0x84,0xfd,0x09,0xfa,0xf8,0xf6,0xf5,0xf7,0xf5, +0x00,0xf7,0xac,0x87,0xfd,0x04,0x56,0xf5,0x00,0x56,0x85,0xfd,0x05,0xac,0xf8,0x00, +0xf8,0xfe,0x83,0xfd,0x05,0xf8,0xf6,0xac,0x00,0xfb,0x83,0xfd,0x05,0xf8,0xfe,0xfd, +0xfd,0xf5,0xa2,0xfe,0xfd,0x02,0xf6,0xf8,0xa2,0xac,0x00,0x10,0xfc,0x00,0xfb,0xfd, +0xf5,0xfd,0xfe,0x00,0xf9,0xf9,0x00,0xfe,0x56,0x00,0x00,0xf5,0x88,0xfd,0x09,0xac, +0x2b,0x00,0xac,0xfd,0x56,0xf9,0xf5,0xfa,0x84,0xfd,0x01,0xf5,0x84,0xfd,0x04,0xf9, +0x00,0xf5,0xf8,0x83,0xfd,0x08,0xfe,0x00,0xf6,0x2b,0xf5,0x00,0xf6,0xac,0x83,0xfd, +0x09,0xf5,0xf5,0x2b,0xf5,0xf7,0xfd,0xfe,0xfa,0xf7,0x86,0xfd,0x04,0xfa,0xf6,0x00, +0xf6,0x83,0xfd,0x01,0x2b,0x89,0xfd,0x02,0x2b,0xac,0x83,0xfd,0x04,0x2b,0x00,0xf8, +0xfe,0x91,0x01,0xfd,0x02,0xac,0x81,0x83,0xfd,0x02,0x2b,0xfa,0x83,0xfd,0x08,0xf9, +0xac,0xfd,0xfd,0xf8,0x00,0xf5,0xac,0x86,0xfd,0x05,0x2b,0xf5,0xf7,0xfb,0xfe,0x89, +0xfd,0x02,0x2b,0x00,0x83,0x2b,0x03,0xf6,0x00,0x81,0x96,0x07,0xfd,0xa3,0xfe,0xfd, +0x86,0xfd,0x03,0xfe,0xfd,0xfd,0xa2,0xfe,0xfd,0x8f,0xfd,0x02,0xf9,0xf8,0x95,0x01, +0xfd,0x02,0xf8,0xfa,0x84,0xfd,0x0c,0xf6,0xf5,0x00,0x81,0xfd,0xac,0xf8,0xfd,0xfd, +0xfe,0x81,0xf8,0x86,0x2b,0x0f,0xfe,0xf9,0xac,0xfd,0xfe,0xfd,0xfa,0xf6,0xac,0xf5, +0x00,0xf9,0xfd,0xfd,0xac,0x83,0xfd,0x01,0xfe,0x87,0xfd,0x0a,0x81,0xf8,0xfd,0xac, +0xf6,0x00,0xf5,0x00,0xf6,0xac,0x83,0xfd,0x0c,0xf5,0x00,0xf6,0xf7,0xfb,0xfd,0xfd, +0xfe,0xac,0xf7,0x00,0xfa,0x86,0xfd,0x01,0xfe,0x89,0xfd,0x02,0xf6,0xfb,0x86,0xfd, +0x03,0xf7,0x00,0x56,0x87,0xfd,0x02,0xfe,0xf7,0xa2,0x00,0xf5,0x03,0xf6,0xfc,0xfb, +0xa2,0x56,0xfd,0x04,0xac,0xac,0x00,0xf5,0xc2,0xfd,0xfd,0xac,0x84,0xfd,0x01,0xfe, +0x88,0xfd,0x08,0xfa,0x2b,0xfe,0xfd,0xfd,0xf8,0x2b,0xf8,0x85,0xfd,0x01,0xf7,0x85, +0xfd,0x01,0xfe,0x87,0xfd,0x03,0x81,0xfa,0xf9,0x87,0xfd,0x04,0x81,0xf9,0xfb,0xfe, +0x90,0x02,0xfd,0x01,0xac,0x99,0x01,0xfd,0x02,0xf9,0xfb,0x87,0xfd,0x02,0xf8,0xf9, +0x8a,0xfd,0x01,0xfe,0x8c,0xfd,0x05,0xfe,0xf9,0xfa,0xf9,0x81,0x9a,0x09,0xfd,0x01, +0xfe,0x9e,0x01,0xfd,0x01,0xfe,0x88,0xfd,0xc2,0xfe,0xfd,0xfd,0x8b,0xfd,0x01,0xfe, +0x94,0x01,0xfd,0xa2,0xfe,0xfd,0x85,0xfd,0x01,0xfe,0x93,0x02,0xfd,0x01,0xfe,0x8a, +0xfd,0xe2,0xfe,0xfd,0xfd,0xfd,0x85,0xfd,0x01,0xfe,0x9a,0x01,0xfd,0x01,0xfe,0x9f, +0x01,0xfd,0x01,0xfe,0x96,0x04,0xfd,0x01,0xfe,0x90,0x5e,0xfd,0x03,0xf6,0xfd,0xfd, +0x86,0xf9,0x01,0x81,0x8e,0xfd,0x02,0xf9,0xf8,0x84,0xfd,0x01,0xf6,0x85,0xfd,0x02, +0xfc,0xac,0x84,0xfd,0x02,0xfc,0xfa,0x92,0x01,0xfd,0x02,0xf8,0xfa,0x8a,0xfd,0x01, +0xfb,0x90,0x01,0xfd,0x02,0xfc,0xfa,0x94,0x02,0xfd,0x02,0x00,0xfe,0x9e,0x02,0xfd, +0x08,0xfc,0xfb,0xfd,0xfc,0xf8,0xf6,0xf9,0xfe,0x84,0xfd,0x02,0xf6,0xac,0x94,0x01, +0xfd,0x02,0xfb,0xf8,0x87,0xfd,0x04,0xfa,0xf8,0xfd,0xfb,0x86,0xf9,0x01,0xac,0x91, +0x01,0xfd,0x01,0x2b,0x8b,0xfd,0x02,0xf9,0xf9,0x95,0x0c,0xfd,0x0a,0xfe,0x00,0xfd, +0xfe,0x00,0x2b,0x2b,0x00,0xf7,0x2b,0xe2,0x00,0xfd,0xfd,0xfd,0x87,0xfd,0x03,0xf5, +0xfb,0xfb,0x83,0x2b,0x01,0x00,0x83,0x2b,0x04,0xfd,0xfd,0xfa,0xf5,0x84,0xfd,0x02, +0xfb,0xf5,0x8c,0xfd,0x02,0xac,0xfc,0x83,0xfd,0x03,0xfe,0x00,0xf9,0x89,0xfd,0x05, +0xf9,0x00,0xf5,0x56,0xfc,0x8d,0xfd,0x02,0xf9,0xf6,0x88,0xfd,0x07,0xfb,0x00,0x00, +0x2b,0xf7,0xf9,0xfc,0x90,0x01,0xfd,0x01,0xf8,0xa2,0x00,0xf5,0x02,0x00,0x00,0xa2, +0xf5,0x00,0x84,0xfd,0x0b,0xfa,0xfb,0xfd,0xfd,0xf9,0xac,0xf5,0xf6,0xf7,0xfd,0xfb, +0x88,0xf9,0x01,0xfc,0x92,0x01,0xfd,0x07,0xf9,0x2b,0xfd,0xfe,0xf5,0x56,0xf5,0x84, +0xfd,0x02,0xfa,0x00,0x87,0xfd,0x01,0xfc,0x8d,0xfd,0x02,0xf6,0x2b,0x83,0xfd,0x01, +0xfc,0x83,0xfd,0x04,0xfa,0x2b,0xfd,0x2b,0xc2,0xf6,0x2b,0xf5,0x05,0xf9,0xfd,0xfd, +0x2b,0x2b,0x8d,0xfd,0x01,0x00,0x8a,0xfd,0x03,0xfe,0x2b,0x2b,0x94,0x0c,0xfd,0x10, +0xfb,0xf9,0x00,0xf9,0xfb,0xf5,0xfe,0xfd,0xf5,0xfd,0xfe,0x00,0xfe,0xfd,0xfd,0x00, +0x88,0xfd,0x10,0xfb,0x00,0x2b,0xf9,0xf6,0xfa,0xf9,0xfa,0x00,0xfa,0xf9,0xfa,0xfd, +0xfd,0x2b,0xf7,0x85,0xfd,0x09,0x00,0xfe,0xfd,0xfd,0xf8,0xf5,0xf5,0x00,0xf5,0x85, +0x00,0x01,0x81,0x84,0xfd,0x06,0x00,0xfe,0xfd,0xfd,0xfc,0xf9,0x85,0xfd,0x06,0xfe, +0xfd,0x56,0xf6,0x00,0x56,0x8a,0xfd,0x05,0xfb,0xfd,0x2b,0x2b,0xfe,0x88,0xfd,0x06, +0xfe,0x81,0xf8,0x2b,0x2b,0xfc,0x90,0x01,0xfd,0x0b,0x2b,0xf7,0xfd,0x81,0xf8,0xfe, +0xfd,0xf8,0x81,0xfd,0xfe,0x84,0xfd,0x0c,0x00,0x56,0xfd,0xfd,0x00,0x81,0xf6,0xac, +0xf6,0xfd,0x2b,0xf6,0xa2,0x2b,0xf7,0x04,0x2b,0x2b,0x00,0x56,0x92,0x01,0xfd,0x06, +0xfa,0x2b,0xfd,0xfd,0xfb,0x81,0x85,0xfd,0x86,0x00,0x05,0xf6,0xfd,0xac,0x00,0xf7, +0x89,0xfd,0x02,0xf6,0xf5,0x83,0x00,0x0a,0xf5,0x00,0xf5,0xf6,0xfe,0xfd,0xf9,0xf7, +0xf6,0xf9,0xc2,0x2b,0xfa,0xfd,0x06,0x2b,0xf9,0xfd,0xfd,0x2b,0xf8,0x88,0xfd,0x02, +0xfa,0xf6,0x83,0x2b,0x07,0x00,0x2b,0xf5,0x00,0xf6,0xfd,0xfb,0x83,0x2b,0x01,0xf6, +0x83,0x00,0x04,0xf5,0x00,0xf5,0xfa,0x90,0x0c,0xfd,0x10,0xf9,0x2b,0xf5,0x2b,0xf9, +0x00,0xf9,0xfa,0x00,0xf9,0xf9,0x00,0xfd,0xfd,0xfe,0x00,0x88,0xfd,0x22,0xfe,0xf8, +0xf6,0xf5,0xac,0xf8,0x2b,0x2b,0x00,0x2b,0x2b,0xf8,0xfd,0xfd,0x2b,0xfa,0xfd,0x56, +0xf9,0xf9,0xfa,0x00,0xf6,0xf5,0xfd,0x56,0xfa,0xf9,0xac,0xfd,0xac,0xf5,0x2b,0xac, +0x83,0xfd,0x0a,0xf9,0x2b,0xf6,0x00,0x00,0xf5,0xfa,0xfe,0x00,0x56,0x83,0xfd,0x02, +0x2b,0xf7,0x83,0xfd,0x01,0xfe,0x8a,0xfd,0x08,0xfe,0x2b,0x00,0x00,0xf5,0x00,0x00, +0x2b,0x89,0xfd,0x01,0xfe,0x91,0x01,0xfd,0x02,0xfe,0x2b,0xe2,0x00,0xf5,0x00,0x00, +0x02,0xf5,0x00,0x84,0xfd,0x0c,0xf5,0xfb,0xfd,0xfd,0xf7,0xf8,0xf9,0x2b,0xfb,0xfe, +0x2b,0xf9,0xc2,0xfd,0xfd,0xfe,0x02,0x00,0xac,0x8d,0xfd,0x01,0xfe,0xa2,0x00,0xf5, +0x83,0x00,0x04,0xf5,0x00,0xf5,0xfa,0x83,0xfd,0x02,0x2b,0xf6,0x83,0xfd,0x09,0xfe, +0xf5,0xf8,0xfd,0xfd,0xac,0x00,0xf5,0xfc,0x84,0xfd,0x02,0x2b,0xfa,0x83,0xfd,0x02, +0xf7,0x2b,0xa2,0xfe,0xfd,0x83,0xfd,0x05,0x2b,0xf6,0x00,0xf7,0x2b,0xc2,0xf7,0xf9, +0xf6,0x05,0xf9,0xfd,0xfe,0x2b,0xf9,0x89,0xfd,0x0b,0x81,0xf9,0xfa,0xf9,0x00,0xfa, +0xfc,0xfe,0xfd,0xfd,0xfb,0xa2,0xfa,0xf9,0x07,0xfa,0xf7,0x2b,0xfd,0xfe,0xfd,0xac, +0x92,0x0c,0xfd,0x09,0x00,0xfe,0xfd,0x00,0x2b,0x2b,0x00,0xf7,0x2b,0xe2,0xf5,0xfd, +0xfd,0xfd,0x87,0xfd,0x04,0xf5,0xf7,0x56,0x81,0xa2,0xfa,0xf9,0x07,0xfa,0xfa,0xfd, +0xfd,0xf5,0xfb,0xfd,0x84,0x2b,0x02,0xf5,0xfa,0x86,0xfd,0x03,0xfc,0x00,0x81,0x85, +0xfd,0x0f,0xf7,0xf8,0x2b,0xf6,0xfe,0xfa,0x00,0xfd,0xfb,0x00,0xac,0xfd,0xfd,0x00, +0xfa,0x90,0x01,0xfd,0x04,0xfe,0x00,0xfb,0xfe,0x86,0xfd,0x08,0xf9,0xf8,0x2b,0xf5, +0xf5,0x00,0xf5,0x56,0x90,0x01,0xfd,0x09,0x2b,0x56,0xfe,0xf9,0x2b,0xfe,0xfd,0x2b, +0xfa,0x85,0xfd,0x02,0xfc,0x00,0x83,0xfd,0x02,0x56,0xf6,0x84,0xfd,0x02,0x2b,0xfa, +0x85,0xfd,0x02,0xfb,0x00,0x8f,0xfd,0xa2,0xfe,0xfd,0x02,0xf9,0xf7,0xa2,0xfe,0xfd, +0x83,0xfd,0x08,0xf8,0x00,0x81,0xac,0xfd,0xfd,0xfb,0x00,0x84,0xfd,0x10,0xfe,0xf5, +0x56,0xfe,0xfd,0xfd,0xac,0x00,0xfb,0xfd,0xfd,0xfb,0x00,0xf9,0x2b,0xf9,0x83,0xfd, +0x07,0xfc,0xfd,0xfe,0xf9,0x2b,0xfe,0x2b,0xc2,0xf6,0x2b,0xf5,0x05,0xfa,0xfd,0xfd, +0x2b,0xfa,0x89,0xfd,0x09,0xfb,0xfd,0xfd,0xfe,0x00,0xfc,0xf9,0x56,0xf9,0x84,0xfd, +0x05,0xfb,0xf6,0xf5,0xf5,0xf7,0x95,0x0c,0xfd,0x04,0xfe,0xf5,0xfc,0x81,0xc2,0xf5, +0xfd,0xfe,0x01,0xf5,0x83,0xfd,0x01,0x00,0x89,0xfd,0x05,0xfc,0x00,0xfa,0xf5,0x2b, +0xa3,0x00,0xf5,0x03,0xfd,0xfd,0xf5,0x83,0xfd,0x04,0xfe,0xfd,0xfe,0x00,0x87,0xfd, +0x02,0x2b,0xfa,0x86,0xfd,0x14,0xfe,0xfd,0x2b,0xf8,0xfd,0xfe,0xf5,0xfd,0xfe,0x2b, +0x2b,0xfd,0xfe,0x00,0xfd,0xac,0x56,0x2b,0x2b,0x56,0x89,0xfd,0x04,0xfc,0xfb,0xfd, +0xf5,0x84,0xfd,0x0f,0x56,0xfc,0xfd,0xfd,0x2b,0xf6,0x56,0x81,0xfd,0xfd,0xfc,0x00, +0xf9,0xfd,0xfb,0x8b,0x2b,0x0b,0xfb,0xfd,0x2b,0xf9,0xfd,0xfa,0x00,0x00,0xf5,0x00, +0xf9,0x85,0xfd,0x02,0x56,0xf5,0x83,0xfd,0x02,0xac,0x00,0x84,0xfd,0x02,0xf8,0xfa, +0x85,0xfd,0x05,0xf8,0xf5,0xfd,0xfd,0xfb,0x8a,0xf9,0x83,0xfd,0x08,0x81,0x00,0xfd, +0xfa,0x2b,0xfd,0xf6,0xf7,0x83,0xfd,0x09,0xf7,0x00,0xac,0xf8,0x00,0xf7,0xfe,0xf5, +0x2b,0x85,0xfd,0x01,0xfe,0x84,0xfd,0x11,0xf6,0xf6,0xfd,0xfd,0xfe,0x00,0xf5,0x2b, +0xfa,0x00,0x56,0xf7,0x00,0xf5,0xfd,0xfd,0xfa,0xa2,0x2b,0xf9,0xc2,0xfe,0x2b,0xf9, +0x04,0xfd,0xfd,0x2b,0xf9,0x89,0xfd,0xa2,0xf5,0x00,0x05,0x00,0xf5,0x2b,0x2b,0xfb, +0x83,0xfd,0x06,0xfe,0xf5,0xfa,0xac,0xf5,0x2b,0x95,0x0c,0xfd,0x0f,0xf9,0x00,0xf5, +0xf9,0x00,0xfe,0xfd,0x00,0xfd,0xfd,0x00,0xfe,0xfd,0xfd,0xf5,0x86,0xfd,0x03,0x2b, +0xfd,0xfa,0xa2,0x00,0xf6,0x05,0xf5,0xfe,0x81,0xac,0x81,0xc2,0xfd,0x00,0xfe,0x01, +0xfb,0x84,0xfd,0x01,0xf5,0x87,0xfd,0x02,0x00,0xfe,0x88,0xfd,0x14,0x00,0xac,0xfd, +0xfd,0x00,0xfd,0xfd,0xac,0x00,0xfd,0xfd,0x00,0xf6,0x00,0xf7,0xfa,0xf9,0xf6,0xf5, +0xfe,0x87,0xfd,0x01,0xfb,0xc2,0xf5,0x00,0x00,0x04,0xfa,0xf8,0xf6,0xfe,0x88,0xfd, +0x04,0xf9,0x2b,0xfe,0xfb,0xc3,0x2b,0x2b,0xf7,0x06,0x2b,0x2b,0xfb,0xfd,0x2b,0xfa, +0xc2,0xfd,0xfd,0xfe,0x86,0xfd,0x03,0xf6,0xf8,0xfe,0x83,0xfd,0x02,0xf5,0x56,0x83, +0xfd,0x01,0xfe,0x86,0xfd,0x05,0x00,0xfa,0xfd,0xfd,0xf9,0xc3,0x2b,0xf7,0x2b,0x01, +0x2b,0x83,0xfd,0x14,0x2b,0x2b,0xfd,0xf9,0x2b,0xfe,0x81,0x00,0xfe,0xfd,0xfd,0xac, +0xfc,0xfd,0xfd,0xac,0x00,0xf5,0x00,0xac,0x89,0xfd,0x19,0xf8,0x00,0xac,0xfd,0xfc, +0xf5,0xf6,0xac,0xfd,0xf9,0x00,0xf5,0xf8,0xac,0xfe,0xfd,0xfc,0xf6,0x00,0x2b,0x2b, +0xfa,0xfd,0x2b,0xf9,0xe2,0xfd,0x2b,0xfa,0xfd,0x84,0xfd,0x0b,0x56,0xfa,0xfe,0xfd, +0xfd,0xfe,0xfd,0xfe,0x00,0xfe,0xfe,0x86,0xfd,0x05,0x00,0xfe,0xfd,0xf6,0xf5,0x94, +0x0c,0xfd,0x06,0xf7,0xf6,0x00,0xfd,0xfd,0xf5,0x84,0x00,0x06,0xf5,0x00,0xfd,0xfd, +0xfe,0x00,0x85,0xfd,0x09,0x81,0x00,0xfd,0xfe,0xf7,0xf9,0xf7,0xf7,0xf8,0xa2,0xfd, +0x00,0x07,0xfe,0xf8,0xfd,0xfd,0xf5,0xfb,0x00,0x83,0xfd,0x02,0xfb,0xf5,0x87,0xfd, +0x01,0xf5,0x88,0xfd,0x02,0xf9,0xf5,0x83,0xfd,0x01,0x00,0x83,0xfd,0x0c,0xf8,0xfc, +0xfb,0x00,0x56,0xfd,0xfe,0xfd,0xfd,0xfe,0xf6,0xfa,0x89,0xfd,0x08,0xfe,0xf5,0xfd, +0xfd,0xfe,0xfd,0xfe,0x00,0x89,0xfd,0x02,0xf8,0xf7,0xc5,0xfd,0xfd,0xfe,0x02,0xf5, +0xfa,0x84,0x00,0x04,0xf5,0x00,0x00,0xfb,0x83,0xfd,0x03,0xfb,0x00,0xfc,0x84,0xfd, +0x02,0xfa,0x00,0x89,0xfd,0x02,0xf7,0xf5,0x84,0xfd,0xc4,0xfe,0xfd,0xfd,0x0a,0xfd, +0x00,0x81,0xfd,0xf9,0x2b,0xfd,0xfd,0x00,0xfb,0x87,0xfd,0x03,0xac,0x00,0xfa,0x89, +0xfd,0x0d,0xf7,0x00,0x81,0xfd,0xfd,0xfe,0xf7,0xfd,0xac,0xf5,0xf6,0xf7,0x2b,0x84, +0xfd,0x05,0x00,0x2b,0x2b,0xfd,0xf7,0x84,0x00,0x07,0xf5,0x00,0xf9,0xfd,0xfe,0x2b, +0xf9,0x85,0xfd,0x02,0xf5,0x56,0x83,0xfd,0x05,0x56,0x2b,0x2b,0x00,0xfb,0x87,0xfd, +0x05,0xf5,0xf8,0x56,0x00,0xf5,0x96,0x0c,0xfd,0x0a,0xf5,0xfd,0xfd,0xf8,0xfe,0xfd, +0xf5,0xfe,0xfd,0xf8,0x83,0xfd,0x01,0xf5,0x85,0xfd,0x0d,0x00,0xf8,0xfd,0xfd,0xf5, +0xfa,0x2b,0xf6,0xfe,0xfd,0x00,0xfd,0xf5,0x84,0xfd,0x03,0xf5,0xf6,0xfa,0x83,0xfd, +0x02,0xf8,0xf7,0x87,0xfd,0x02,0x00,0x81,0x87,0xfd,0x02,0xf5,0x56,0x83,0xfd,0x01, +0xf5,0x87,0xfd,0x01,0xfe,0x85,0xfd,0x04,0xf6,0x81,0xfd,0xf8,0x88,0xfd,0x02,0x00, +0xfe,0x84,0xfd,0x01,0x00,0x88,0xfd,0x03,0xac,0x00,0xac,0x8f,0xfd,0x09,0x00,0xfe, +0xfd,0xfb,0xf5,0xac,0xfd,0xf6,0xf8,0x84,0xfd,0x03,0xf5,0xf7,0xfe,0x85,0xfd,0x02, +0xf5,0xf7,0x87,0xfd,0x03,0xf8,0x00,0xfb,0x90,0x01,0xfd,0x0a,0xf8,0xf5,0xfd,0xfd, +0xfa,0x2b,0xfd,0xfd,0xf8,0xf5,0x86,0xfd,0x03,0xfc,0x00,0xf8,0x88,0xfd,0x04,0xf9, +0x00,0x00,0xac,0x86,0xfd,0x06,0x2b,0xf8,0xfe,0xf9,0x2b,0xfe,0x84,0xfd,0x0f,0xf9, +0x2b,0xfe,0xf9,0xfb,0xfe,0x2b,0xfa,0xfd,0xf9,0xfb,0xfd,0xfd,0x2b,0xf9,0x84,0xfd, +0x02,0x81,0x00,0x83,0xfd,0x08,0xf5,0xf6,0xfa,0xf9,0x00,0x00,0xf5,0xfb,0x85,0xfd, +0x01,0xac,0x83,0xf7,0x01,0x2b,0x83,0xfd,0x04,0xfa,0x00,0x00,0xf8,0x9e,0x0b,0xfd, +0x02,0xfe,0x00,0x84,0xfd,0x02,0xfe,0x00,0x86,0xfd,0x02,0x00,0xac,0x83,0xfd,0x14, +0xf7,0x00,0xfe,0xfd,0xfd,0xf5,0xf9,0x2b,0xf6,0xfd,0xf8,0xf5,0xfe,0x00,0xfd,0xf7, +0xf9,0xfe,0x2b,0x00,0x83,0xfd,0x03,0x81,0x00,0xfb,0x86,0xfd,0x05,0xfe,0x56,0x00, +0xf8,0xfc,0x84,0xfd,0x07,0x81,0x00,0xfe,0xfb,0xfd,0x56,0xf6,0x86,0xfd,0x02,0xfb, +0xfa,0x84,0xfd,0x06,0xf7,0x00,0xfd,0xac,0xf6,0xf5,0x87,0xfd,0x02,0xf5,0xfc,0x83, +0xfd,0x02,0xf9,0xf5,0x87,0xfd,0x03,0xf8,0x00,0xf8,0x8f,0xfd,0x02,0xac,0x00,0x83, +0xfd,0x04,0x81,0xf5,0x00,0xf8,0x83,0xfd,0x03,0xfe,0x2b,0x00,0x87,0xfd,0x03,0x81, +0x00,0xf9,0x84,0xfd,0x04,0xfb,0xf5,0x00,0x81,0x90,0x01,0xfd,0x0c,0x81,0x00,0xfb, +0xfd,0xfd,0xf9,0x2b,0xfd,0xfd,0xfe,0x00,0xf8,0x84,0xfd,0x03,0xf9,0x00,0xf8,0x85, +0xfd,0x07,0xfb,0xf9,0xf7,0x00,0x00,0xf8,0xfe,0x86,0xfd,0x06,0xfe,0x00,0xfc,0xfd, +0xac,0xfb,0x85,0xfd,0x02,0xfa,0x2b,0x84,0xfd,0x04,0x2b,0xf9,0xfd,0xfe,0x83,0xfd, +0x02,0xf8,0xf6,0x83,0xfd,0x0f,0xfb,0x00,0x56,0xfd,0xfd,0xfe,0x00,0xfe,0xfd,0xfe, +0x00,0xfe,0x56,0x00,0x2b,0x86,0xfd,0x03,0x81,0x00,0xac,0x83,0xfd,0x04,0xf5,0xfb, +0xfe,0x00,0x9f,0x0b,0xfd,0x01,0xf5,0x85,0xfd,0x01,0xf5,0x86,0xfd,0x1e,0xf8,0x00, +0x2b,0x2b,0x00,0xf6,0xac,0xfd,0xfd,0xfa,0x2b,0xf9,0xf7,0xac,0xf9,0x00,0xfc,0xfd, +0x00,0xfe,0xf6,0xf9,0xfd,0x56,0xf5,0xfd,0xfd,0x56,0x00,0xf8,0x89,0xfd,0x04,0xf9, +0xf5,0x00,0xf6,0x83,0xfd,0x07,0x00,0x56,0xfd,0xf5,0x00,0x00,0xfa,0x86,0xfd,0x01, +0xfb,0x83,0x00,0x04,0xf5,0x00,0xf6,0xfb,0x83,0xfd,0x03,0xf6,0xf5,0xfe,0x85,0xfd, +0x07,0xf9,0x00,0xf6,0x2b,0xf5,0xf5,0xac,0x84,0xfd,0x05,0x56,0xf5,0x00,0xf5,0x81, +0x90,0x01,0xfd,0x0f,0x2b,0x2b,0xfd,0xfb,0xf8,0xf5,0xf6,0xf6,0x00,0xf7,0xf9,0xac, +0xfd,0xf8,0xfb,0x88,0xfd,0x02,0xf8,0x81,0x83,0xfd,0x04,0xf9,0x00,0xf7,0xac,0x92, +0x01,0xfd,0x01,0xf9,0x83,0xfd,0x0e,0xf9,0x2b,0xfe,0xfd,0xfd,0x81,0xfb,0xfd,0xfd, +0xf9,0xf5,0x00,0x81,0xfe,0x85,0xfd,0x04,0xf9,0x00,0x2b,0x81,0x8a,0xfd,0x0c,0x56, +0x00,0xf6,0x2b,0x2b,0xf6,0x00,0xf7,0xfd,0xfd,0xf9,0x2b,0x84,0xfd,0x02,0x2b,0xfa, +0x85,0xfd,0x08,0xac,0x00,0xf6,0x2b,0xf6,0x00,0x56,0xfe,0x83,0xfd,0x09,0xf6,0xf5, +0x2b,0xf5,0xf6,0xfd,0xfd,0xac,0xf6,0x84,0xfd,0x04,0xfb,0x2b,0x00,0x81,0x84,0xfd, +0x05,0xf6,0x56,0xfb,0x00,0xfe,0x9b,0x0b,0xfd,0x04,0xfe,0xfa,0xf5,0xf6,0x85,0xfd, +0x01,0x00,0x87,0xfd,0x04,0xfc,0xfa,0xf9,0xac,0x85,0xfd,0x0b,0xfb,0xfa,0x2b,0xfa, +0x00,0xfb,0xfd,0xfd,0xf6,0x00,0xf6,0x83,0xfd,0x05,0xfe,0xfd,0xfd,0xfa,0xfb,0x8b, +0xfd,0x01,0xfe,0xe2,0xfd,0xac,0xfd,0xfd,0x02,0xfd,0xfe,0x89,0xfd,0xa3,0xfe,0xfd, +0x84,0xfd,0x03,0xac,0xf6,0xfc,0x86,0xfd,0x04,0xfe,0xf9,0xfa,0xfb,0x86,0xfd,0x03, +0xfb,0xf9,0xac,0x92,0x01,0xfd,0x0c,0x56,0xac,0xf5,0xf5,0xf7,0xfb,0xfd,0xfd,0xac, +0xf8,0xf6,0xf5,0x91,0x01,0xfd,0x01,0xac,0x98,0x01,0xfd,0x02,0xfc,0xf9,0x87,0xfd, +0x03,0xfa,0xf7,0xfe,0x88,0xfd,0x01,0xfe,0x8d,0xfd,0x0b,0xfe,0x81,0xfa,0xf9,0xfa, +0xfd,0xfe,0xfd,0x2b,0x00,0x81,0x83,0xfd,0x03,0xfe,0x2b,0xf9,0x86,0xfd,0x04,0xfe, +0x81,0xf9,0x81,0x86,0xfd,0x04,0xfe,0xfb,0xf9,0x81,0x84,0xfd,0x01,0xfe,0x84,0xfd, +0x03,0xfb,0x2b,0xfc,0x85,0xfd,0x04,0xac,0xf6,0xf5,0x81,0x9f,0x0b,0xfd,0x01,0xfe, +0x85,0xfd,0x01,0xfe,0x91,0x01,0xfd,0x04,0xfc,0xfb,0xfd,0xfe,0x84,0xfd,0x01,0xfe, +0x87,0xfd,0x01,0xfe,0x9e,0x03,0xfd,0x01,0xfe,0x95,0x01,0xfd,0x01,0xfe,0x88,0xfd, +0x01,0xfe,0x94,0x03,0xfd,0x02,0xfe,0xac,0x9f,0x01,0xfd,0x01,0xfe,0x96,0x02,0xfd, +0x01,0xfe,0x92,0xf5,0x05,0xfd, // 00 0xFF,0xFF,0xFF, 0xFF,0xFF,0xCC, 0xFF,0xFF,0x99, 0xFF,0xFF,0x66, diff --git a/osfmk/console/panic_ui/README b/osfmk/console/panic_ui/README index f013f7b71..f8031a2ad 100644 --- a/osfmk/console/panic_ui/README +++ b/osfmk/console/panic_ui/README @@ -1,8 +1,10 @@ Creating a Panic UI image (either the default or loadable) -The key steps are: create an indexed image using the MacOS X system 8 clut, saved -in QuickTime uncompressed 256 color format. Run it through the genimage tool -to create a C structure or a kernel loadable file. +The key steps are: create an indexed image using the MacOS X system +8 clut, saved in QuickTime uncompressed 256 color format. Run it +through the genimage tool to create a C structure or a kernel +loadable file. This code all has byte dependencies in it, therefore +this all must be done on a PowerPC machine. ===== Create the image @@ -33,21 +35,28 @@ Save the completed image as a TIFF (still indexed off the CLUT). ===== Convert the TIFF indexed image to QuickTime RAW -Using Preview, open the TIFF image. Use File:Export to save the TIFF image in -QuickTime image format with options of "None" for compression and "256 Colors" -for the depth. Quality should be "Best". The saved results should be a .qtif +Using the Preview application from 10.3.x, open the TIFF image. Use +File:Export to save the TIFF image in QuickTime image format with +options of "None" for compression and "256 Colors" for the depth. +Quality should be "Best". The saved results should be a .qtif formatted RAW image. ===== Generate an image for the kernel. To generate the default kernel panic image file "panic_image.c", in your working -directory, execute: +directory, build the program genimage: +cc -o genimage genimage.c + +execute: genimage -i -n -fg <24-bit color> -bg <24-bit color> ** options other than -i are optional. -To genertate a kernel loadable panic image file, execute: +To genertate a kernel loadable panic image file, build the qtif2kraw binary: +cc -o qtif2kraw qtif2kraw.c + +execute: qtif2kraw -i -o -n -fg <24-bit color> -bg <24-bit color> ** options other than -i and -o are optional. diff --git a/osfmk/console/panic_ui/images/panic_dialog.tiff b/osfmk/console/panic_ui/images/panic_dialog.tiff index 9abd78924..f5c96eceb 100644 Binary files a/osfmk/console/panic_ui/images/panic_dialog.tiff and b/osfmk/console/panic_ui/images/panic_dialog.tiff differ diff --git a/osfmk/console/ppc/serial_console.c b/osfmk/console/ppc/serial_console.c index a7ec89d97..648ea791e 100644 --- a/osfmk/console/ppc/serial_console.c +++ b/osfmk/console/ppc/serial_console.c @@ -124,12 +124,15 @@ void console_per_proc_free(void *per_proc_cbfr) kfree(per_proc_cbfr, sizeof(ppcbfr_t)); } - static void _cnputc(char c) { cons_ops[cons_ops_index].putc(console_unit, console_chan, c); } +void cnputc_unbuffered(char c) { + _cnputc(c); +} + void cnputcusr(char c) { /* Echo input character directly */ struct per_proc_info *procinfo; spl_t s; diff --git a/osfmk/console/progress_meter_data.c b/osfmk/console/progress_meter_data.c new file mode 100644 index 000000000..e7a5ef5c6 --- /dev/null +++ b/osfmk/console/progress_meter_data.c @@ -0,0 +1,163 @@ +/* + * Copyright (c) 2000-2007 Apple Inc. All rights reserved. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. The rights granted to you under the License + * may not be used to create, or enable the creation or redistribution of, + * unlawful or unlicensed copies of an Apple operating system, or to + * circumvent, violate, or enable the circumvention or violation of, any + * terms of an Apple operating system software license agreement. + * + * Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ + */ + +#define kProgressBarHeight (18) +#define kProgressBarCapWidth (9) +#define kProgressBarWidth (300 + 2 * kProgressBarCapWidth) + +static const unsigned char +progressmeter_leftcap[2][kProgressBarCapWidth * kProgressBarHeight] = { +{ + 0xff, 0xff, 0xff, 0xff, 0xff, 0xfb, 0xd4, 0xb4, 0xa5, + 0xff, 0xff, 0xff, 0xff, 0xd4, 0x99, 0x99, 0x99, 0x99, + 0xff, 0xff, 0xff, 0xc4, 0x99, 0x99, 0xc3, 0xe2, 0xef, + 0xff, 0xff, 0xc4, 0x99, 0x99, 0xd4, 0xfe, 0xfe, 0xfe, + 0xff, 0xd9, 0x99, 0x99, 0xe7, 0xfe, 0xfe, 0xfe, 0xfe, + 0xff, 0xaf, 0x99, 0xd9, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, + 0xe8, 0x99, 0xaf, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, + 0xcf, 0x99, 0xc7, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, + 0xc4, 0x99, 0xd1, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, + 0xc4, 0x99, 0xd1, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, + 0xcf, 0x99, 0xc8, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, + 0xe8, 0x99, 0xaf, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, + 0xff, 0xaf, 0x99, 0xd9, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, + 0xff, 0xd8, 0x99, 0x99, 0xe8, 0xfe, 0xfe, 0xfe, 0xfe, + 0xff, 0xff, 0xc4, 0x99, 0x99, 0xd4, 0xfe, 0xfe, 0xfe, + 0xff, 0xff, 0xff, 0xc4, 0x99, 0x99, 0xc3, 0xe2, 0xef, + 0xff, 0xff, 0xff, 0xff, 0xd4, 0x99, 0x99, 0x99, 0x99, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xfb, 0xd4, 0xb4, 0xa5, +}, +{ + 0xff, 0xff, 0xff, 0xff, 0xff, 0xfb, 0xd3, 0xb4, 0xa4, + 0xff, 0xff, 0xff, 0xff, 0xd4, 0x99, 0x9a, 0x99, 0x99, + 0xff, 0xff, 0xff, 0xc4, 0x99, 0x98, 0x99, 0x99, 0x99, + 0xff, 0xff, 0xc4, 0x99, 0x9a, 0x99, 0x9a, 0x99, 0x99, + 0xff, 0xd9, 0x9a, 0x99, 0x99, 0x99, 0x99, 0x99, 0x99, + 0xff, 0xb0, 0x98, 0x98, 0x99, 0x98, 0x99, 0x99, 0x99, + 0xe8, 0x99, 0x9a, 0x99, 0x99, 0x98, 0x98, 0x99, 0x99, + 0xcf, 0x99, 0x99, 0x98, 0x99, 0x9a, 0x98, 0x98, 0x98, + 0xc5, 0x99, 0x99, 0x9a, 0x99, 0x99, 0x9a, 0x98, 0x99, + 0xc4, 0x99, 0x99, 0x99, 0x9a, 0x98, 0x99, 0x99, 0x99, + 0xcf, 0x98, 0x99, 0x99, 0x98, 0x98, 0x98, 0x98, 0x99, + 0xe8, 0x9a, 0x99, 0x99, 0x9a, 0x98, 0x99, 0x99, 0x99, + 0xff, 0xb0, 0x9a, 0x99, 0x99, 0x98, 0x99, 0x9a, 0x99, + 0xff, 0xd8, 0x99, 0x99, 0x99, 0x9a, 0x98, 0x98, 0x99, + 0xff, 0xff, 0xc4, 0x98, 0x99, 0x98, 0x9a, 0x99, 0x9a, + 0xff, 0xff, 0xff, 0xc5, 0x99, 0x9a, 0x9a, 0x99, 0x98, + 0xff, 0xff, 0xff, 0xff, 0xd3, 0x98, 0x99, 0x98, 0x9a, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xfb, 0xd4, 0xb4, 0xa5, +} +}; + + +static const unsigned char +progressmeter_middle[2][1 * kProgressBarHeight] = { +{ + 0x99, + 0x99, + 0xfe, + 0xfe, + 0xfe, + 0xfe, + 0xfe, + 0xfe, + 0xfe, + 0xfe, + 0xfe, + 0xfe, + 0xfe, + 0xfe, + 0xfe, + 0xfe, + 0x99, + 0x99, +}, +{ + 0x99, + 0x99, + 0x99, + 0x99, + 0x99, + 0x99, + 0x99, + 0x99, + 0x99, + 0x99, + 0x99, + 0x99, + 0x99, + 0x99, + 0x99, + 0x99, + 0x99, + 0x99, +}}; + +static const unsigned char +progressmeter_rightcap[2][kProgressBarCapWidth * kProgressBarHeight] = { +{ + 0xa8, 0xb7, 0xd5, 0xfc, 0xff, 0xff, 0xff, 0xff, 0xff, + 0x99, 0x99, 0x99, 0x99, 0xd8, 0xff, 0xff, 0xff, 0xff, + 0xef, 0xe0, 0xc0, 0x99, 0x99, 0xc5, 0xff, 0xff, 0xff, + 0xfe, 0xfe, 0xfe, 0xd1, 0x99, 0x99, 0xc5, 0xff, 0xff, + 0xfe, 0xfe, 0xfe, 0xfe, 0xe2, 0x99, 0x99, 0xdf, 0xff, + 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xd4, 0x99, 0xb7, 0xff, + 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xa8, 0x99, 0xee, + 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xc0, 0x99, 0xd5, + 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xcc, 0x99, 0xcb, + 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xcc, 0x99, 0xcb, + 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xc1, 0x99, 0xd5, + 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xa8, 0x99, 0xee, + 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xd4, 0x99, 0xb7, 0xff, + 0xfe, 0xfe, 0xfe, 0xfe, 0xe3, 0x99, 0x99, 0xdf, 0xff, + 0xfe, 0xfe, 0xfe, 0xd1, 0x99, 0x99, 0xc5, 0xff, 0xff, + 0xef, 0xe0, 0xc0, 0x99, 0x99, 0xc5, 0xff, 0xff, 0xff, + 0x99, 0x99, 0x99, 0x99, 0xd8, 0xff, 0xff, 0xff, 0xff, + 0xa8, 0xb7, 0xd5, 0xfc, 0xff, 0xff, 0xff, 0xff, 0xff, +}, +{ + 0xa8, 0xb7, 0xd7, 0xfc, 0xff, 0xff, 0xff, 0xff, 0xff, + 0x99, 0x99, 0x99, 0x99, 0xd7, 0xff, 0xff, 0xff, 0xff, + 0x98, 0x98, 0x98, 0x98, 0x98, 0xc5, 0xff, 0xff, 0xff, + 0x99, 0x99, 0x98, 0x9a, 0x98, 0x9a, 0xc5, 0xff, 0xff, + 0x99, 0x9a, 0x99, 0x99, 0x9a, 0x98, 0x9a, 0xdf, 0xff, + 0x99, 0x99, 0x98, 0x99, 0x98, 0x99, 0x99, 0xb7, 0xff, + 0x9a, 0x98, 0x99, 0x98, 0x99, 0x99, 0x99, 0x9a, 0xee, + 0x9a, 0x99, 0x9a, 0x98, 0x9a, 0x99, 0x9a, 0x9a, 0xd5, + 0x98, 0x99, 0x99, 0x98, 0x9a, 0x99, 0x9a, 0x99, 0xcb, + 0x98, 0x99, 0x9a, 0x98, 0x98, 0x99, 0x99, 0x99, 0xc9, + 0x99, 0x99, 0x98, 0x99, 0x9a, 0x99, 0x99, 0x99, 0xd5, + 0x99, 0x99, 0x99, 0x99, 0x98, 0x99, 0x99, 0x9a, 0xee, + 0x9a, 0x98, 0x98, 0x99, 0x99, 0x99, 0x99, 0xb7, 0xff, + 0x99, 0x9a, 0x9a, 0x98, 0x99, 0x99, 0x9a, 0xdf, 0xff, + 0x99, 0x99, 0x9a, 0x98, 0x99, 0x99, 0xc7, 0xff, 0xff, + 0x98, 0x99, 0x99, 0x98, 0x9a, 0xc5, 0xff, 0xff, 0xff, + 0x99, 0x99, 0x99, 0x9a, 0xd8, 0xff, 0xff, 0xff, 0xff, + 0xa8, 0xb7, 0xd7, 0xfc, 0xff, 0xff, 0xff, 0xff, 0xff, +}}; + diff --git a/osfmk/console/serial_protos.h b/osfmk/console/serial_protos.h index 89966977b..90b691f1d 100644 --- a/osfmk/console/serial_protos.h +++ b/osfmk/console/serial_protos.h @@ -54,7 +54,7 @@ void switch_to_old_console(int old_console); struct console_ops { void (*putc)(int, int, int); int (*getc)(int, int, boolean_t, boolean_t); -} console_ops; +}; #define SERIAL_CONS_OPS 0 #define VC_CONS_OPS 1 diff --git a/osfmk/console/video_console.c b/osfmk/console/video_console.c index 4a040103f..49dc6da91 100644 --- a/osfmk/console/video_console.c +++ b/osfmk/console/video_console.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2007 Apple Inc. All rights reserved. + * Copyright (c) 2000-2009 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -106,8 +106,12 @@ #include #include +#include #include "iso_font.c" +#if !CONFIG_EMBEDDED +#include "progress_meter_data.c" +#endif #include "sys/msgbuf.h" @@ -145,9 +149,9 @@ static struct { static unsigned char *gc_buffer_attributes; static unsigned char *gc_buffer_characters; static unsigned char *gc_buffer_colorcodes; -static unsigned long gc_buffer_columns; -static unsigned long gc_buffer_rows; -static unsigned long gc_buffer_size; +static uint32_t gc_buffer_columns; +static uint32_t gc_buffer_rows; +static uint32_t gc_buffer_size; #ifdef __i386__ decl_simple_lock_data(static, vcputc_lock); @@ -318,7 +322,7 @@ gc_clear_screen(unsigned int xx, unsigned int yy, int top, unsigned int bottom, if ( xx < gc_buffer_columns && yy < gc_buffer_rows && bottom <= gc_buffer_rows ) { - unsigned long start, end; + uint32_t start, end; switch (which) { case 0: /* To end of screen */ @@ -353,9 +357,9 @@ gc_enable( boolean_t enable ) unsigned char *buffer_attributes = NULL; unsigned char *buffer_characters = NULL; unsigned char *buffer_colorcodes = NULL; - unsigned long buffer_columns = 0; - unsigned long buffer_rows = 0; - unsigned long buffer_size = 0; + uint32_t buffer_columns = 0; + uint32_t buffer_rows = 0; + uint32_t buffer_size = 0; spl_t s; if ( enable == FALSE ) @@ -461,7 +465,7 @@ gc_hide_cursor(unsigned int xx, unsigned int yy) { if ( xx < gc_buffer_columns && yy < gc_buffer_rows ) { - unsigned long index = (yy * gc_buffer_columns) + xx; + uint32_t index = (yy * gc_buffer_columns) + xx; unsigned char attribute = gc_buffer_attributes[index]; unsigned char character = gc_buffer_characters[index]; unsigned char colorcode = gc_buffer_colorcodes[index]; @@ -503,7 +507,7 @@ gc_paint_char(unsigned int xx, unsigned int yy, unsigned char ch, int attrs) { if ( xx < gc_buffer_columns && yy < gc_buffer_rows ) { - unsigned long index = (yy * gc_buffer_columns) + xx; + uint32_t index = (yy * gc_buffer_columns) + xx; gc_buffer_attributes[index] = attrs; gc_buffer_characters[index] = ch; @@ -991,8 +995,8 @@ gc_scroll_down(int num, unsigned int top, unsigned int bottom) if ( bottom <= gc_buffer_rows ) { unsigned char colorcodesave = gc_color_code; - unsigned long column, row; - unsigned long index, jump; + uint32_t column, row; + uint32_t index, jump; jump = num * gc_buffer_columns; @@ -1100,8 +1104,8 @@ gc_scroll_up(int num, unsigned int top, unsigned int bottom) if ( bottom <= gc_buffer_rows ) { unsigned char colorcodesave = gc_color_code; - unsigned long column, row; - unsigned long index, jump; + uint32_t column, row; + uint32_t index, jump; jump = num * gc_buffer_columns; @@ -1206,7 +1210,7 @@ gc_show_cursor(unsigned int xx, unsigned int yy) { if ( xx < gc_buffer_columns && yy < gc_buffer_rows ) { - unsigned long index = (yy * gc_buffer_columns) + xx; + uint32_t index = (yy * gc_buffer_columns) + xx; unsigned char attribute = gc_buffer_attributes[index]; unsigned char character = gc_buffer_characters[index]; unsigned char colorcode = gc_buffer_colorcodes[index]; @@ -1262,22 +1266,22 @@ vcputc(__unused int l, __unused int u, int c) */ static unsigned char vc_color_index_table[33] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2 }; - -static unsigned long vc_colors[8][3] = { - { 0xFFFFFFFF, 0x00000000, 0x00000000 }, /* black */ - { 0x23232323, 0x7C007C00, 0x00FF0000 }, /* red */ - { 0xb9b9b9b9, 0x03e003e0, 0x0000FF00 }, /* green */ - { 0x05050505, 0x7FE07FE0, 0x00FFFF00 }, /* yellow */ - { 0xd2d2d2d2, 0x001f001f, 0x000000FF}, /* blue */ -// { 0x80808080, 0x31933193, 0x00666699 }, /* blue */ - { 0x18181818, 0x7C1F7C1F, 0x00FF00FF }, /* magenta */ - { 0xb4b4b4b4, 0x03FF03FF, 0x0000FFFF }, /* cyan */ - { 0x00000000, 0x7FFF7FFF, 0x00FFFFFF } /* white */ + 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 0, 2 }; + +static uint32_t vc_colors[8][4] = { + { 0xFFFFFFFF, 0x00000000, 0x00000000, 0x00000000 }, /* black */ + { 0x23232323, 0x7C007C00, 0x00FF0000, 0x3FF00000 }, /* red */ + { 0xb9b9b9b9, 0x03e003e0, 0x0000FF00, 0x000FFC00 }, /* green */ + { 0x05050505, 0x7FE07FE0, 0x00FFFF00, 0x3FFFFC00 }, /* yellow */ + { 0xd2d2d2d2, 0x001f001f, 0x000000FF, 0x000003FF }, /* blue */ +// { 0x80808080, 0x31933193, 0x00666699, 0x00000000 }, /* blue */ + { 0x18181818, 0x7C1F7C1F, 0x00FF00FF, 0x3FF003FF }, /* magenta */ + { 0xb4b4b4b4, 0x03FF03FF, 0x0000FFFF, 0x000FFFFF }, /* cyan */ + { 0x00000000, 0x7FFF7FFF, 0x00FFFFFF, 0x3FFFFFFF } /* white */ }; -static unsigned long vc_color_fore = 0; -static unsigned long vc_color_back = 0; +static uint32_t vc_color_fore = 0; +static uint32_t vc_color_back = 0; /* * New Rendering code from Michel Pollet @@ -1287,7 +1291,7 @@ static unsigned long vc_color_back = 0; static unsigned char *vc_rendered_font = NULL; /* Rendered Font Size */ -static unsigned long vc_rendered_font_size = 0; +static uint32_t vc_rendered_font_size = 0; /* Size of a character in the table (bytes) */ static int vc_rendered_char_size = 0; @@ -1299,7 +1303,7 @@ static void vc_clear_screen(unsigned int xx, unsigned int yy, unsigned int scrreg_top, unsigned int scrreg_bottom, int which) { - unsigned long *p, *endp, *row; + uint32_t *p, *endp, *row; int linelongs, col; int rowline, rowlongs; @@ -1310,8 +1314,8 @@ vc_clear_screen(unsigned int xx, unsigned int yy, unsigned int scrreg_top, rowline = vinfo.v_rowscanbytes >> 2; rowlongs = vinfo.v_rowbytes >> 2; - p = (unsigned long*) vinfo.v_baseaddr; - endp = (unsigned long*) vinfo.v_baseaddr; + p = (uint32_t*) vinfo.v_baseaddr; + endp = (uint32_t*) vinfo.v_baseaddr; switch (which) { case 0: /* To end of screen */ @@ -1350,7 +1354,7 @@ vc_initialize(__unused struct vc_info * vinfo_p) vinfo.v_rows = vinfo.v_height / ISO_CHAR_HEIGHT; vinfo.v_columns = vinfo.v_width / ISO_CHAR_WIDTH; - vinfo.v_rowscanbytes = (vinfo.v_depth / 8) * vinfo.v_width; + vinfo.v_rowscanbytes = ((vinfo.v_depth + 7) / 8) * vinfo.v_width; } static void @@ -1359,7 +1363,7 @@ vc_render_char(unsigned char ch, unsigned char *renderptr, short newdepth) union { unsigned char *charptr; unsigned short *shortptr; - unsigned long *longptr; + uint32_t *longptr; } current; /* current place in rendered font, multiple types. */ unsigned char *theChar; /* current char in iso_font */ int line; @@ -1378,6 +1382,7 @@ vc_render_char(unsigned char ch, unsigned char *renderptr, short newdepth) *current.shortptr++ = (*theChar & mask) ? 0xFFFF : 0; break; + case 30: case 32: *current.longptr++ = (*theChar & mask) ? 0xFFFFFFFF : 0; break; @@ -1392,35 +1397,35 @@ static void vc_paint_char_8(unsigned int xx, unsigned int yy, unsigned char ch, int attrs, __unused unsigned char ch_previous, __unused int attrs_previous) { - unsigned long *theChar; - unsigned long *where; + uint32_t *theChar; + uint32_t *where; int i; if (vc_rendered_font) { - theChar = (unsigned long*)(vc_rendered_font + (ch * vc_rendered_char_size)); + theChar = (uint32_t*)(vc_rendered_font + (ch * vc_rendered_char_size)); } else { vc_render_char(ch, vc_rendered_char, 8); - theChar = (unsigned long*)(vc_rendered_char); + theChar = (uint32_t*)(vc_rendered_char); } - where = (unsigned long*)(vinfo.v_baseaddr + + where = (uint32_t*)(vinfo.v_baseaddr + (yy * ISO_CHAR_HEIGHT * vinfo.v_rowbytes) + (xx * ISO_CHAR_WIDTH)); if (!attrs) for (i = 0; i < ISO_CHAR_HEIGHT; i++) { /* No attr? FLY !*/ - unsigned long *store = where; + uint32_t *store = where; int x; for (x = 0; x < 2; x++) { - unsigned long val = *theChar++; + uint32_t val = *theChar++; val = (vc_color_back & ~val) | (vc_color_fore & val); *store++ = val; } - where = (unsigned long*)(((unsigned char*)where)+vinfo.v_rowbytes); + where = (uint32_t*)(((unsigned char*)where)+vinfo.v_rowbytes); } else for (i = 0; i < ISO_CHAR_HEIGHT; i++) { /* a little slower */ - unsigned long *store = where, lastpixel = 0; + uint32_t *store = where, lastpixel = 0; int x; for (x = 0 ; x < 2; x++) { - unsigned long val = *theChar++, save = val; + uint32_t val = *theChar++, save = val; if (attrs & ATTR_BOLD) { /* bold support */ if (lastpixel && !(save & 0xFF000000)) val |= 0xff000000; @@ -1439,7 +1444,7 @@ vc_paint_char_8(unsigned int xx, unsigned int yy, unsigned char ch, int attrs, lastpixel = save & 0xff; } - where = (unsigned long*)(((unsigned char*)where)+vinfo.v_rowbytes); + where = (uint32_t*)(((unsigned char*)where)+vinfo.v_rowbytes); } } @@ -1449,35 +1454,35 @@ vc_paint_char_16(unsigned int xx, unsigned int yy, unsigned char ch, int attrs, __unused unsigned char ch_previous, __unused int attrs_previous) { - unsigned long *theChar; - unsigned long *where; + uint32_t *theChar; + uint32_t *where; int i; if (vc_rendered_font) { - theChar = (unsigned long*)(vc_rendered_font + (ch * vc_rendered_char_size)); + theChar = (uint32_t*)(vc_rendered_font + (ch * vc_rendered_char_size)); } else { vc_render_char(ch, vc_rendered_char, 16); - theChar = (unsigned long*)(vc_rendered_char); + theChar = (uint32_t*)(vc_rendered_char); } - where = (unsigned long*)(vinfo.v_baseaddr + + where = (uint32_t*)(vinfo.v_baseaddr + (yy * ISO_CHAR_HEIGHT * vinfo.v_rowbytes) + (xx * ISO_CHAR_WIDTH * 2)); if (!attrs) for (i = 0; i < ISO_CHAR_HEIGHT; i++) { /* No attrs ? FLY ! */ - unsigned long *store = where; + uint32_t *store = where; int x; for (x = 0; x < 4; x++) { - unsigned long val = *theChar++; + uint32_t val = *theChar++; val = (vc_color_back & ~val) | (vc_color_fore & val); *store++ = val; } - where = (unsigned long*)(((unsigned char*)where)+vinfo.v_rowbytes); + where = (uint32_t*)(((unsigned char*)where)+vinfo.v_rowbytes); } else for (i = 0; i < ISO_CHAR_HEIGHT; i++) { /* a little bit slower */ - unsigned long *store = where, lastpixel = 0; + uint32_t *store = where, lastpixel = 0; int x; for (x = 0 ; x < 4; x++) { - unsigned long val = *theChar++, save = val; + uint32_t val = *theChar++, save = val; if (attrs & ATTR_BOLD) { /* bold support */ if (save == 0xFFFF0000) val |= 0xFFFF; else if (lastpixel && !(save & 0xFFFF0000)) @@ -1492,7 +1497,7 @@ vc_paint_char_16(unsigned int xx, unsigned int yy, unsigned char ch, int attrs, lastpixel = save & 0x7fff; } - where = (unsigned long*)(((unsigned char*)where)+vinfo.v_rowbytes); + where = (uint32_t*)(((unsigned char*)where)+vinfo.v_rowbytes); } } @@ -1501,17 +1506,17 @@ static void vc_paint_char_32(unsigned int xx, unsigned int yy, unsigned char ch, int attrs, unsigned char ch_previous, int attrs_previous) { - unsigned long *theChar; - unsigned long *theCharPrevious; - unsigned long *where; + uint32_t *theChar; + uint32_t *theCharPrevious; + uint32_t *where; int i; if (vc_rendered_font) { - theChar = (unsigned long*)(vc_rendered_font + (ch * vc_rendered_char_size)); - theCharPrevious = (unsigned long*)(vc_rendered_font + (ch_previous * vc_rendered_char_size)); + theChar = (uint32_t*)(vc_rendered_font + (ch * vc_rendered_char_size)); + theCharPrevious = (uint32_t*)(vc_rendered_font + (ch_previous * vc_rendered_char_size)); } else { vc_render_char(ch, vc_rendered_char, 32); - theChar = (unsigned long*)(vc_rendered_char); + theChar = (uint32_t*)(vc_rendered_char); theCharPrevious = NULL; } if (!ch_previous) { @@ -1520,15 +1525,15 @@ vc_paint_char_32(unsigned int xx, unsigned int yy, unsigned char ch, int attrs, if (attrs_previous) { theCharPrevious = NULL; } - where = (unsigned long*)(vinfo.v_baseaddr + + where = (uint32_t*)(vinfo.v_baseaddr + (yy * ISO_CHAR_HEIGHT * vinfo.v_rowbytes) + (xx * ISO_CHAR_WIDTH * 4)); if (!attrs) for (i = 0; i < ISO_CHAR_HEIGHT; i++) { /* No attrs ? FLY ! */ - unsigned long *store = where; + uint32_t *store = where; int x; for (x = 0; x < 8; x++) { - unsigned long val = *theChar++; + uint32_t val = *theChar++; if (theCharPrevious == NULL || val != *theCharPrevious++ ) { val = (vc_color_back & ~val) | (vc_color_fore & val); *store++ = val; @@ -1537,12 +1542,12 @@ vc_paint_char_32(unsigned int xx, unsigned int yy, unsigned char ch, int attrs, } } - where = (unsigned long*)(((unsigned char*)where)+vinfo.v_rowbytes); + where = (uint32_t *)(((unsigned char*)where)+vinfo.v_rowbytes); } else for (i = 0; i < ISO_CHAR_HEIGHT; i++) { /* a little slower */ - unsigned long *store = where, lastpixel = 0; + uint32_t *store = where, lastpixel = 0; int x; for (x = 0 ; x < 8; x++) { - unsigned long val = *theChar++, save = val; + uint32_t val = *theChar++, save = val; if (attrs & ATTR_BOLD) { /* bold support */ if (lastpixel && !save) val = 0xFFFFFFFF; @@ -1555,7 +1560,7 @@ vc_paint_char_32(unsigned int xx, unsigned int yy, unsigned char ch, int attrs, lastpixel = save; } - where = (unsigned long*)(((unsigned char*)where)+vinfo.v_rowbytes); + where = (uint32_t*)(((unsigned char*)where)+vinfo.v_rowbytes); } } @@ -1575,6 +1580,7 @@ vc_paint_char(unsigned int xx, unsigned int yy, unsigned char ch, int attrs, vc_paint_char_16(xx, yy, ch, attrs, ch_previous, attrs_previous); break; + case 30: case 32: vc_paint_char_32(xx, yy, ch, attrs, ch_previous, attrs_previous); @@ -1589,7 +1595,7 @@ vc_render_font(short newdepth) int charindex; /* index in ISO font */ unsigned char *rendered_font; - unsigned long rendered_font_size; + unsigned int rendered_font_size; int rendered_char_size; spl_t s; @@ -1620,7 +1626,7 @@ vc_render_font(short newdepth) } if (newdepth) { - rendered_char_size = ISO_CHAR_HEIGHT * ((newdepth / 8) * ISO_CHAR_WIDTH); + rendered_char_size = ISO_CHAR_HEIGHT * (((newdepth + 7) / 8) * ISO_CHAR_WIDTH); rendered_font_size = (ISO_CHAR_MAX-ISO_CHAR_MIN+1) * rendered_char_size; rendered_font = (unsigned char *) kalloc(rendered_font_size); } @@ -1655,13 +1661,13 @@ vc_enable(boolean_t enable) static void vc_reverse_cursor(unsigned int xx, unsigned int yy) { - unsigned long *where; + uint32_t *where; int line, col; if(!vinfo.v_depth) return; - where = (unsigned long*)(vinfo.v_baseaddr + + where = (uint32_t*)(vinfo.v_baseaddr + (yy * ISO_CHAR_HEIGHT * vinfo.v_rowbytes) + (xx /** ISO_CHAR_WIDTH*/ * vinfo.v_depth)); for (line = 0; line < ISO_CHAR_HEIGHT; line++) { @@ -1679,14 +1685,14 @@ vc_reverse_cursor(unsigned int xx, unsigned int yy) where[col] = ~where[col]; break; } - where = (unsigned long*)(((unsigned char*)where)+vinfo.v_rowbytes); + where = (uint32_t*)(((unsigned char*)where)+vinfo.v_rowbytes); } } static void vc_scroll_down(int num, unsigned int scrreg_top, unsigned int scrreg_bottom) { - unsigned long *from, *to, linelongs, i, line, rowline, rowscanline; + uint32_t *from, *to, linelongs, i, line, rowline, rowscanline; if(!vinfo.v_depth) return; @@ -1695,7 +1701,7 @@ vc_scroll_down(int num, unsigned int scrreg_top, unsigned int scrreg_bottom) rowline = vinfo.v_rowbytes >> 2; rowscanline = vinfo.v_rowscanbytes >> 2; - to = (unsigned long *) vinfo.v_baseaddr + (linelongs * scrreg_bottom) + to = (uint32_t *) vinfo.v_baseaddr + (linelongs * scrreg_bottom) - (rowline - rowscanline); from = to - (linelongs * num); /* handle multiple line scroll (Michel Pollet) */ @@ -1706,9 +1712,9 @@ vc_scroll_down(int num, unsigned int scrreg_top, unsigned int scrreg_bottom) /* * Only copy what is displayed */ - video_scroll_down((unsigned int) from, - (unsigned int) (from-(vinfo.v_rowscanbytes >> 2)), - (unsigned int) to); + video_scroll_down(from, + (from-(vinfo.v_rowscanbytes >> 2)), + to); from -= rowline; to -= rowline; @@ -1719,7 +1725,7 @@ vc_scroll_down(int num, unsigned int scrreg_top, unsigned int scrreg_bottom) static void vc_scroll_up(int num, unsigned int scrreg_top, unsigned int scrreg_bottom) { - unsigned long *from, *to, linelongs, i, line, rowline, rowscanline; + uint32_t *from, *to, linelongs, i, line, rowline, rowscanline; if(!vinfo.v_depth) return; @@ -1728,7 +1734,7 @@ vc_scroll_up(int num, unsigned int scrreg_top, unsigned int scrreg_bottom) rowline = vinfo.v_rowbytes >> 2; rowscanline = vinfo.v_rowscanbytes >> 2; - to = (unsigned long *) vinfo.v_baseaddr + (scrreg_top * linelongs); + to = (uint32_t *) vinfo.v_baseaddr + (scrreg_top * linelongs); from = to + (linelongs * num); /* handle multiple line scroll (Michel Pollet) */ i = (scrreg_bottom - scrreg_top) - num; @@ -1738,9 +1744,9 @@ vc_scroll_up(int num, unsigned int scrreg_top, unsigned int scrreg_bottom) /* * Only copy what is displayed */ - video_scroll_up((unsigned int) from, - (unsigned int) (from+(vinfo.v_rowscanbytes >> 2)), - (unsigned int) to); + video_scroll_up(from, + (from+(vinfo.v_rowscanbytes >> 2)), + to); from += rowline; to += rowline; @@ -1796,29 +1802,54 @@ static void * vc_saveunder; static vm_size_t vc_saveunder_len; decl_simple_lock_data(,vc_progress_lock) -static void vc_blit_rect( int x, int y, int width, int height, - const unsigned char * dataPtr, const unsigned char * alphaPtr, - void * backBuffer, boolean_t save, boolean_t static_alpha ); -static void vc_blit_rect_8( int x, int y, int width, int height, - const unsigned char * dataPtr, const unsigned char * alphaPtr, - unsigned char * backBuffer, boolean_t save, boolean_t static_alpha ); -static void vc_blit_rect_16( int x, int y, int width, int height, - const unsigned char * dataPtr, const unsigned char * alphaPtr, - unsigned short * backBuffer, boolean_t save, boolean_t static_alpha ); -static void vc_blit_rect_32( int x, int y, int width, int height, - const unsigned char * dataPtr, const unsigned char * alphaPtr, - unsigned int * backBuffer, boolean_t save, boolean_t static_alpha ); +enum { + kSave = 0x01, + kDataIndexed = 0x02, + kDataAlpha = 0x04, + kDataBack = 0x08, +}; + +static void vc_blit_rect(int x, int y, int bx, + int width, int height, + int sourceRow, int backRow, + const unsigned char * dataPtr, + void * backBuffer, + unsigned int flags); +static void vc_blit_rect_8(int x, int y, int bx, + int width, int height, + int sourceRow, int backRow, + const unsigned char * dataPtr, + unsigned char * backBuffer, + unsigned int flags); +static void vc_blit_rect_16(int x, int y, int bx, + int width, int height, + int sourceRow, int backRow, + const unsigned char * dataPtr, + unsigned short * backBuffer, + unsigned int flags); +static void vc_blit_rect_32(int x, int y, int bx, + int width, int height, + int sourceRow, int backRow, + const unsigned char * dataPtr, + unsigned int * backBuffer, + unsigned int flags); +static void vc_blit_rect_30(int x, int y, int bx, + int width, int height, + int sourceRow, int backRow, + const unsigned char * dataPtr, + unsigned int * backBuffer, + unsigned int flags); extern void vc_display_icon( vc_progress_element * desc, const unsigned char * data ); extern void vc_progress_initialize( vc_progress_element * desc, const unsigned char * data, const unsigned char * clut ); -static void vc_progress_set(boolean_t enable, uint32_t vc_delay); +void vc_progress_set(boolean_t enable, uint32_t vc_delay); static void vc_progress_task( void * arg0, void * arg ); -static void vc_blit_rect( int x, int y, - int width, int height, - const unsigned char * dataPtr, - const unsigned char * alphaPtr, - void * backBuffer, - boolean_t save, boolean_t static_alpha ) +static void vc_blit_rect(int x, int y, int bx, + int width, int height, + int sourceRow, int backRow, + const unsigned char * dataPtr, + void * backBuffer, + unsigned int flags) { if(!vinfo.v_depth) return; @@ -1826,39 +1857,53 @@ static void vc_blit_rect( int x, int y, switch( vinfo.v_depth) { case 8: if( vc_clut8 == vc_clut) - vc_blit_rect_8( x, y, width, height, dataPtr, alphaPtr, (unsigned char *) backBuffer, save, static_alpha ); + vc_blit_rect_8( x, y, bx, width, height, sourceRow, backRow, dataPtr, (unsigned char *) backBuffer, flags ); break; case 16: - vc_blit_rect_16( x, y, width, height, dataPtr, alphaPtr, (unsigned short *) backBuffer, save, static_alpha ); + vc_blit_rect_16( x, y, bx, width, height, sourceRow, backRow, dataPtr, (unsigned short *) backBuffer, flags ); break; case 32: - vc_blit_rect_32( x, y, width, height, dataPtr, alphaPtr, (unsigned int *) backBuffer, save, static_alpha ); + vc_blit_rect_32( x, y, bx, width, height, sourceRow, backRow, dataPtr, (unsigned int *) backBuffer, flags ); + break; + case 30: + vc_blit_rect_30( x, y, bx, width, height, sourceRow, backRow, dataPtr, (unsigned int *) backBuffer, flags ); break; } } static void -vc_blit_rect_8(int x, int y, int width, int height, - const unsigned char * dataPtr, const unsigned char * alphaPtr, - __unused unsigned char * backPtr, __unused boolean_t save, - __unused boolean_t static_alpha) +vc_blit_rect_8(int x, int y, __unused int bx, + int width, int height, + int sourceRow, __unused int backRow, + const unsigned char * dataPtr, + __unused unsigned char * backBuffer, + __unused unsigned int flags) { - volatile unsigned char * dst; + volatile unsigned short * dst; int line, col; - unsigned int data; - - dst = (unsigned char *)(vinfo.v_baseaddr + - (y * vinfo.v_rowbytes) + - (x)); - - for( line = 0; line < height; line++) { - for( col = 0; col < width; col++) { - data = 0; - if( dataPtr != 0) data = *dataPtr++; - else if( alphaPtr != 0) data = vc_revclut8[*alphaPtr++]; - *(dst + col) = data; - } - dst = (volatile unsigned char *) (((int)dst) + vinfo.v_rowbytes); + unsigned int data = 0, out = 0; + + if (!sourceRow) + data = (unsigned int)(uintptr_t)dataPtr; + dst = (volatile unsigned short *) (vinfo.v_baseaddr + + (y * vinfo.v_rowbytes) + + (x * 4)); + + for( line = 0; line < height; line++) + { + for( col = 0; col < width; col++) + { + if (col < sourceRow) + data = *dataPtr++; + if (kDataAlpha & flags) + out = vc_revclut8[data]; + else + out = data; + *(dst + col) = out; + } + dst = (volatile unsigned short *) (((volatile char*)dst) + vinfo.v_rowbytes); + if (sourceRow > width) + dataPtr += sourceRow - width; } } @@ -1877,151 +1922,178 @@ vc_blit_rect_8(int x, int y, int width, int height, #define MASK_G_8 0x01fe0 #define MASK_B_8 0x000ff -static void vc_blit_rect_16( int x, int y, - int width, int height, - const unsigned char * dataPtr, - const unsigned char * alphaPtr, - unsigned short * backPtr, - boolean_t save, boolean_t static_alpha ) +static void vc_blit_rect_16( int x, int y, int bx, + int width, int height, + int sourceRow, int backRow, + const unsigned char * dataPtr, + unsigned short * backPtr, + unsigned int flags) { volatile unsigned short * dst; int line, col; - unsigned int data = 0, index = 0, alpha, back; + unsigned int data = 0, out = 0, back = 0; - dst = (volatile unsigned short *)(vinfo.v_baseaddr + + if (backPtr) + backPtr += bx; + if (!sourceRow) + data = (unsigned int)(uintptr_t)dataPtr; + dst = (volatile unsigned short *) (vinfo.v_baseaddr + (y * vinfo.v_rowbytes) + (x * 2)); - for( line = 0; line < height; line++) { - for( col = 0; col < width; col++) { - if( dataPtr != 0) { - index = *dataPtr++; - index *= 3; - } + for( line = 0; line < height; line++) + { + for( col = 0; col < width; col++) + { + if (col < sourceRow) + data = *dataPtr++; - if( alphaPtr && backPtr) { - - alpha = *alphaPtr++; - data = 0; - if( dataPtr != 0) { - if( vc_clut[index + 0] > alpha) - data |= (((vc_clut[index + 0] - alpha) & CLUT_MASK_R) CLUT_SHIFT_R); - if( vc_clut[index + 1] > alpha) - data |= (((vc_clut[index + 1] - alpha) & CLUT_MASK_G) CLUT_SHIFT_G); - if( vc_clut[index + 2] > alpha) - data |= (((vc_clut[index + 2] - alpha) & CLUT_MASK_B) CLUT_SHIFT_B); - } + if (backPtr) { + if (kSave & flags) { + back = *(dst + col); + *backPtr++ = back; + } else + back = *backPtr++; + } + if (kDataIndexed & flags) { + out = ( (CLUT_MASK_R & (vc_clut[data*3 + 0])) CLUT_SHIFT_R) + | ( (CLUT_MASK_G & (vc_clut[data*3 + 1])) CLUT_SHIFT_G) + | ( (CLUT_MASK_B & (vc_clut[data*3 + 2])) CLUT_SHIFT_B); + } else if (kDataAlpha & flags) { + out = (((((back & MASK_R) * data) + MASK_R_8) >> 8) & MASK_R) + | (((((back & MASK_G) * data) + MASK_G_8) >> 8) & MASK_G) + | (((((back & MASK_B) * data) + MASK_B_8) >> 8) & MASK_B); #ifdef CONFIG_VC_PROGRESS_WHITE - else { - data |= (((0xff - alpha) & CLUT_MASK_R) CLUT_SHIFT_R); - data |= (((0xff - alpha) & CLUT_MASK_G) CLUT_SHIFT_G); - data |= (((0xff - alpha) & CLUT_MASK_B) CLUT_SHIFT_B); - } + out += (((0xff - data) & CLUT_MASK_R) CLUT_SHIFT_R) + | (((0xff - data) & CLUT_MASK_G) CLUT_SHIFT_G) + | (((0xff - data) & CLUT_MASK_B) CLUT_SHIFT_B); #endif - - if( save) { - back = *(dst + col); - if ( !static_alpha) - *backPtr++ = back; - back = (((((back & MASK_R) * alpha) + MASK_R_8) >> 8) & MASK_R) - | (((((back & MASK_G) * alpha) + MASK_G_8) >> 8) & MASK_G) - | (((((back & MASK_B) * alpha) + MASK_B_8) >> 8) & MASK_B); - if ( static_alpha) - *backPtr++ = back; - } else { - back = *backPtr++; - if ( !static_alpha) { - back = (((((back & MASK_R) * alpha) + MASK_R_8) >> 8) & MASK_R) - | (((((back & MASK_G) * alpha) + MASK_G_8) >> 8) & MASK_G) - | (((((back & MASK_B) * alpha) + MASK_B_8) >> 8) & MASK_B); - } - } - - data += back; - } else - if( dataPtr != 0) { - data = ( (CLUT_MASK_R & (vc_clut[index + 0])) CLUT_SHIFT_R) - | ( (CLUT_MASK_G & (vc_clut[index + 1])) CLUT_SHIFT_G) - | ( (CLUT_MASK_B & (vc_clut[index + 2])) CLUT_SHIFT_B); - } - - *(dst + col) = data; - } - dst = (volatile unsigned short *) (((int)dst) + vinfo.v_rowbytes); + out = back; + *(dst + col) = out; + } + dst = (volatile unsigned short *) (((volatile char*)dst) + vinfo.v_rowbytes); + if (backPtr) + backPtr += backRow - width; + if (sourceRow > width) + dataPtr += sourceRow - width; } } -static void vc_blit_rect_32( int x, int y, - int width, int height, - const unsigned char * dataPtr, - const unsigned char * alphaPtr, - unsigned int * backPtr, - boolean_t save, boolean_t static_alpha ) +static void vc_blit_rect_32(int x, int y, int bx, + int width, int height, + int sourceRow, int backRow, + const unsigned char * dataPtr, + unsigned int * backPtr, + unsigned int flags) { volatile unsigned int * dst; int line, col; - unsigned int data = 0, index = 0, alpha, back; + unsigned int data = 0, out = 0, back = 0; + if (backPtr) + backPtr += bx; + if (!sourceRow) + data = (unsigned int)(uintptr_t)dataPtr; dst = (volatile unsigned int *) (vinfo.v_baseaddr + (y * vinfo.v_rowbytes) + (x * 4)); - for( line = 0; line < height; line++) { - for( col = 0; col < width; col++) { - if( dataPtr != 0) { - index = *dataPtr++; - index *= 3; - } + for( line = 0; line < height; line++) + { + for( col = 0; col < width; col++) + { + if (col < sourceRow) + data = *dataPtr++; - if( alphaPtr && backPtr) { - - alpha = *alphaPtr++; - data = 0; - if( dataPtr != 0) { - if( vc_clut[index + 0] > alpha) - data |= ((vc_clut[index + 0] - alpha) << 16); - if( vc_clut[index + 1] > alpha) - data |= ((vc_clut[index + 1] - alpha) << 8); - if( vc_clut[index + 2] > alpha) - data |= ((vc_clut[index + 2] - alpha)); - } + if (backPtr) { + if (kSave & flags) { + back = *(dst + col); + *backPtr++ = back; + } else + back = *backPtr++; + } + if (kDataIndexed & flags) { + out = (vc_clut[data*3 + 0] << 16) + | (vc_clut[data*3 + 1] << 8) + | (vc_clut[data*3 + 2]); + } else if (kDataAlpha & flags) { + out = (((((back & 0x00ff00ff) * data) + 0x00ff00ff) >> 8) & 0x00ff00ff) + | (((((back & 0x0000ff00) * data) + 0x0000ff00) >> 8) & 0x0000ff00); #ifdef CONFIG_VC_PROGRESS_WHITE - else { - data |= (0xff - alpha) << 16; - data |= (0xff - alpha) << 8; - data |= (0xff - alpha); - } + out += ((0xff - data) << 16) + | ((0xff - data) << 8) + | (0xff - data); #endif + } else + out = back; + *(dst + col) = out; + } + dst = (volatile unsigned int *) (((volatile char*)dst) + vinfo.v_rowbytes); + if (backPtr) + backPtr += backRow - width; + if (sourceRow > width) + dataPtr += sourceRow - width; + } +} - if( save) { - back = *(dst + col); - if ( !static_alpha) - *backPtr++ = back; - back = (((((back & 0x00ff00ff) * alpha) + 0x00ff00ff) >> 8) & 0x00ff00ff) - | (((((back & 0x0000ff00) * alpha) + 0x0000ff00) >> 8) & 0x0000ff00); - if ( static_alpha) - *backPtr++ = back; - } else { - back = *backPtr++; - if ( !static_alpha) { - back = (((((back & 0x00ff00ff) * alpha) + 0x00ff00ff) >> 8) & 0x00ff00ff) - | (((((back & 0x0000ff00) * alpha) + 0x0000ff00) >> 8) & 0x0000ff00); - } - } +static void vc_blit_rect_30(int x, int y, int bx, + int width, int height, + int sourceRow, int backRow, + const unsigned char * dataPtr, + unsigned int * backPtr, + unsigned int flags) +{ + volatile unsigned int * dst; + int line, col; + unsigned int data = 0, out = 0, back = 0; + unsigned long long exp; - data += back; + if (backPtr) + backPtr += bx; + if (!sourceRow) + data = (unsigned int)(uintptr_t)dataPtr; + dst = (volatile unsigned int *) (vinfo.v_baseaddr + + (y * vinfo.v_rowbytes) + + (x * 4)); - } else - if( dataPtr != 0) { - data = (vc_clut[index + 0] << 16) - | (vc_clut[index + 1] << 8) - | (vc_clut[index + 2]); - } + for( line = 0; line < height; line++) + { + for( col = 0; col < width; col++) + { + if (col < sourceRow) + data = *dataPtr++; - *(dst + col) = data; - } - dst = (volatile unsigned int *) (((int)dst) + vinfo.v_rowbytes); + if (backPtr) { + if (kSave & flags) { + back = *(dst + col); + *backPtr++ = back; + } else + back = *backPtr++; + } + if (kDataIndexed & flags) { + out = (vc_clut[data*3 + 0] << 22) + | (vc_clut[data*3 + 1] << 12) + | (vc_clut[data*3 + 2] << 2); + } else if (kDataAlpha & flags) { + exp = back; + exp = (((((exp & 0x3FF003FF) * data) + 0x0FF000FF) >> 8) & 0x3FF003FF) + | (((((exp & 0x000FFC00) * data) + 0x0003FC00) >> 8) & 0x000FFC00); + out = (unsigned int)exp; +#ifdef CONFIG_VC_PROGRESS_WHITE + out += ((0xFF - data) << 22) + | ((0xFF - data) << 12) + | ((0xFF - data) << 2); +#endif + } else + out = back; + *(dst + col) = out; + } + dst = (volatile unsigned int *) (((volatile char*)dst) + vinfo.v_rowbytes); + if (backPtr) + backPtr += backRow - width; + if (sourceRow > width) + dataPtr += sourceRow - width; } } @@ -2040,7 +2112,7 @@ void vc_display_icon( vc_progress_element * desc, x += ((vinfo.v_width - width) / 2); y += ((vinfo.v_height - height) / 2); } - vc_blit_rect( x, y, width, height, data, NULL, NULL, FALSE, TRUE ); + vc_blit_rect( x, y, 0, width, height, width, 0, data, NULL, kDataIndexed ); } } @@ -2069,10 +2141,10 @@ vc_progress_initialize( vc_progress_element * desc, thread_call_setup(&vc_progress_call, vc_progress_task, NULL); clock_interval_to_absolutetime_interval(vc_progress->time, 1000 * 1000, &abstime); - vc_progress_interval = abstime; + vc_progress_interval = (uint32_t)abstime; } -static void +void vc_progress_set(boolean_t enable, uint32_t vc_delay) { spl_t s; @@ -2166,11 +2238,12 @@ vc_progress_set(boolean_t enable, uint32_t vc_delay) kfree( saveBuf, saveLen ); } + static void vc_progress_task(__unused void *arg0, void *arg) { spl_t s; - int count = (int) arg; + int count = (int)(uintptr_t) arg; int x, y, width, height; const unsigned char * data; @@ -2179,6 +2252,8 @@ vc_progress_task(__unused void *arg0, void *arg) if( vc_progress_enable) { + KERNEL_DEBUG_CONSTANT(0x7020008, count, 0, 0, 0, 0); + count++; if( count >= vc_progress->count) count = 0; @@ -2193,13 +2268,14 @@ vc_progress_task(__unused void *arg0, void *arg) x += ((vinfo.v_width - width) / 2); y += ((vinfo.v_height - height) / 2); } - vc_blit_rect( x, y, width, height, - NULL, data, vc_saveunder, - vc_needsave, (0 == (4 & vc_progress->flags)) ); + vc_blit_rect( x, y, 0, + width, height, width, width, + data, vc_saveunder, + kDataAlpha | (vc_needsave ? kSave : 0) ); vc_needsave = FALSE; clock_deadline_for_periodic_event(vc_progress_interval, mach_absolute_time(), &vc_progress_deadline); - thread_call_enter1_delayed(&vc_progress_call, (void *)count, vc_progress_deadline); + thread_call_enter1_delayed(&vc_progress_call, (void *)(uintptr_t)count, vc_progress_deadline); } simple_unlock(&vc_progress_lock); splx(s); @@ -2210,25 +2286,24 @@ vc_progress_task(__unused void *arg0, void *arg) * ------------------------------------------- */ -#ifdef __i386__ -#include +#if defined (__i386__) || defined (__x86_64__) #include -#endif /* __i386__ */ +#endif static boolean_t gc_acquired = FALSE; static boolean_t gc_graphics_boot = FALSE; static boolean_t gc_desire_text = FALSE; static unsigned int lastVideoPhys = 0; -static unsigned int lastVideoVirt = 0; -static unsigned int lastVideoSize = 0; +static vm_offset_t lastVideoVirt = 0; +static vm_size_t lastVideoSize = 0; static boolean_t lastVideoMapped = FALSE; void initialize_screen(PE_Video * boot_vinfo, unsigned int op) { unsigned int fbsize = 0; - unsigned int newVideoVirt = 0; + vm_offset_t newVideoVirt = 0; boolean_t graphics_now; ppnum_t fbppage; @@ -2242,34 +2317,21 @@ initialize_screen(PE_Video * boot_vinfo, unsigned int op) * First, check if we are changing the size and/or location of the framebuffer */ new_vinfo.v_name[0] = 0; - new_vinfo.v_width = boot_vinfo->v_width; - new_vinfo.v_height = boot_vinfo->v_height; - new_vinfo.v_depth = boot_vinfo->v_depth; - new_vinfo.v_rowbytes = boot_vinfo->v_rowBytes; + new_vinfo.v_width = (unsigned int)boot_vinfo->v_width; + new_vinfo.v_height = (unsigned int)boot_vinfo->v_height; + new_vinfo.v_depth = (unsigned int)boot_vinfo->v_depth; + new_vinfo.v_rowbytes = (unsigned int)boot_vinfo->v_rowBytes; new_vinfo.v_physaddr = boot_vinfo->v_baseAddr; /* Get the physical address */ -#ifdef __i386__ - new_vinfo.v_type = boot_vinfo->v_display; +#if defined(__i386__) || defined(__x86_64__) + new_vinfo.v_type = (unsigned int)boot_vinfo->v_display; #else new_vinfo.v_type = 0; #endif if (!lastVideoMapped) - kprintf("initialize_screen: b=%08lX, w=%08lX, h=%08lX, r=%08lX, d=%08lX\n", /* (BRINGUP) */ + kprintf("initialize_screen: b=%08lX, w=%08X, h=%08X, r=%08X, d=%08X\n", /* (BRINGUP) */ new_vinfo.v_physaddr, new_vinfo.v_width, new_vinfo.v_height, new_vinfo.v_rowbytes, new_vinfo.v_type); /* (BRINGUP) */ -#ifdef __i386__ - if ( (new_vinfo.v_type == VGA_TEXT_MODE) ) - { - if (new_vinfo.v_physaddr == 0) { - new_vinfo.v_physaddr = 0xb8000; - new_vinfo.v_width = 80; - new_vinfo.v_height = 25; - new_vinfo.v_depth = 8; - new_vinfo.v_rowbytes = 0x8000; - } - } -#endif /* __i386__ */ - if (!new_vinfo.v_physaddr) /* Check to see if we have a framebuffer */ { kprintf("initialize_screen: No video - forcing serial mode\n"); /* (BRINGUP) */ @@ -2296,19 +2358,14 @@ initialize_screen(PE_Video * boot_vinfo, unsigned int op) } if (boot_vinfo->v_length != 0) - fbsize = round_page_32(boot_vinfo->v_length); + fbsize = (unsigned int) round_page(boot_vinfo->v_length); else - fbsize = round_page_32(new_vinfo.v_height * new_vinfo.v_rowbytes); /* Remember size */ + fbsize = (unsigned int) round_page(new_vinfo.v_height * new_vinfo.v_rowbytes); /* Remember size */ if ((lastVideoPhys != new_vinfo.v_physaddr) || (fbsize > lastVideoSize)) /* Did framebuffer change location or get bigger? */ { - unsigned int -#if FALSE - flags = (new_vinfo.v_type == VGA_TEXT_MODE) ? VM_WIMG_IO : VM_WIMG_WCOMB; -#else - flags = VM_WIMG_IO; -#endif + unsigned int flags = VM_WIMG_IO; newVideoVirt = io_map_spec((vm_offset_t)new_vinfo.v_physaddr, fbsize, flags); /* Allocate address space for framebuffer */ } } @@ -2318,6 +2375,11 @@ initialize_screen(PE_Video * boot_vinfo, unsigned int op) else new_vinfo.v_baseaddr = lastVideoVirt + boot_vinfo->v_offset; /* Set the new framebuffer address */ +#if defined(__x86_64__) + // Adjust the video buffer pointer to point to where it is in high virtual (above the hole) + new_vinfo.v_baseaddr |= VM_MIN_KERNEL_ADDRESS; +#endif + /* Update the vinfo structure atomically with respect to the vc_progress task if running */ if (vc_progress) { @@ -2347,29 +2409,12 @@ initialize_screen(PE_Video * boot_vinfo, unsigned int op) kmem_free(kernel_map, lastVideoVirt, lastVideoSize); /* Toss kernel addresses */ } } - lastVideoPhys = new_vinfo.v_physaddr; /* Remember the framebuffer address */ + lastVideoPhys = (unsigned int)new_vinfo.v_physaddr; /* Remember the framebuffer address */ lastVideoSize = fbsize; /* Remember the size */ lastVideoVirt = newVideoVirt; /* Remember the virtual framebuffer address */ lastVideoMapped = (NULL != kernel_map); } -#ifdef __i386__ - if ( (vinfo.v_type == VGA_TEXT_MODE) ) - { - // Text mode setup by the booter. - - gc_ops.initialize = tc_initialize; - gc_ops.enable = tc_enable; - gc_ops.paint_char = tc_paint_char; - gc_ops.clear_screen = tc_clear_screen; - gc_ops.scroll_down = tc_scroll_down; - gc_ops.scroll_up = tc_scroll_up; - gc_ops.hide_cursor = tc_hide_cursor; - gc_ops.show_cursor = tc_show_cursor; - gc_ops.update_color = tc_update_color; - } - else -#endif /* __i386__ */ { // Graphics mode setup by the booter. @@ -2401,6 +2446,7 @@ initialize_screen(PE_Video * boot_vinfo, unsigned int op) case kPETextMode: panicDialogDesired = FALSE; + disable_debug_output = FALSE; gc_graphics_boot = FALSE; break; @@ -2421,6 +2467,7 @@ initialize_screen(PE_Video * boot_vinfo, unsigned int op) if ( console_is_serial() ) break; panicDialogDesired = FALSE; + disable_debug_output = FALSE; if ( gc_acquired == FALSE ) { gc_desire_text = TRUE; @@ -2429,6 +2476,9 @@ initialize_screen(PE_Video * boot_vinfo, unsigned int op) if ( gc_graphics_boot == FALSE ) break; vc_progress_set( FALSE, 0 ); +#if !CONFIG_EMBEDDED + vc_enable_progressmeter( FALSE ); +#endif gc_enable( TRUE ); break; @@ -2441,6 +2491,9 @@ initialize_screen(PE_Video * boot_vinfo, unsigned int op) gc_desire_text = FALSE; gc_enable( FALSE ); vc_progress_set( FALSE, 0 ); +#if !CONFIG_EMBEDDED + vc_enable_progressmeter( FALSE ); +#endif vc_clut8 = NULL; #ifdef GRATEFULDEBUGGER @@ -2482,3 +2535,139 @@ vcattach(void) } } } + +#if !CONFIG_EMBEDDED + +int vc_progress_meter_enable; +int vc_progress_meter_value; + +static void * vc_progress_meter_backbuffer; +static int vc_progress_meter_drawn; + +static void +vc_draw_progress_meter(int select, unsigned int flags, int x1, int x2) +{ + const unsigned char * data; + int x, w; + int ox, oy; + + ox = ((vinfo.v_width - kProgressBarWidth) / 2); + oy = vinfo.v_height - (((vinfo.v_height / 2) - vc_progress->dy + kProgressBarHeight) / 2); + + if (kDataBack == flags) + { + // restore back bits + vc_blit_rect(ox + x1, oy, x1, + x2, kProgressBarHeight, 0, kProgressBarWidth, + NULL, vc_progress_meter_backbuffer, flags); + return; + } + + for (x = x1; x < x2; x += w) + { + if (x < kProgressBarCapWidth) + { + if (x2 < kProgressBarCapWidth) + w = x2 - x; + else + w = kProgressBarCapWidth - x; + data = &progressmeter_leftcap[select & 1][0]; + data += x; + vc_blit_rect(ox + x, oy, x, + w, kProgressBarHeight, kProgressBarCapWidth, kProgressBarWidth, + data, vc_progress_meter_backbuffer, flags); + } + else if (x < (kProgressBarWidth - kProgressBarCapWidth)) + { + if (x2 < (kProgressBarWidth - kProgressBarCapWidth)) + w = x2 - x; + else + w = (kProgressBarWidth - kProgressBarCapWidth) - x; + data = &progressmeter_middle[select & 1][0]; + vc_blit_rect(ox + x, oy, x, + w, kProgressBarHeight, 1, kProgressBarWidth, + data, vc_progress_meter_backbuffer, flags); + } + else + { + w = x2 - x; + data = &progressmeter_rightcap[select & 1][0]; + data += x - (kProgressBarWidth - kProgressBarCapWidth); + vc_blit_rect(ox + x, oy, x, + w, kProgressBarHeight, kProgressBarCapWidth, kProgressBarWidth, + data, vc_progress_meter_backbuffer, flags); + } + } +} + +void +vc_enable_progressmeter(int new_value) +{ + spl_t s; + void * new_buffer = NULL; + + if (new_value) + new_buffer = kalloc(kProgressBarWidth * kProgressBarHeight * sizeof(int)); + + s = splhigh(); + simple_lock(&vc_progress_lock); + + if (gc_enabled || !gc_acquired || !gc_graphics_boot) + new_value = FALSE; + + if (new_value != vc_progress_meter_enable) + { + if (new_value) + { + vc_progress_meter_backbuffer = new_buffer; + vc_draw_progress_meter(FALSE, kDataAlpha | kSave, 0, kProgressBarWidth); + vc_progress_meter_enable = TRUE; + new_buffer = NULL; + vc_progress_meter_drawn = 0; + } + else if (vc_progress_meter_backbuffer) + { + vc_draw_progress_meter(0, kDataBack, 0, kProgressBarWidth); + new_buffer = vc_progress_meter_backbuffer; + vc_progress_meter_backbuffer = NULL; + vc_progress_meter_enable = FALSE; + + } + } + + simple_unlock(&vc_progress_lock); + splx(s); + + if (new_buffer) + kfree(new_buffer, kProgressBarWidth * kProgressBarHeight * sizeof(int)); +} + +void +vc_set_progressmeter(int new_value) +{ + spl_t s; + int x2; + + if ((new_value < 0) | (new_value > 100)) + return; + + s = splhigh(); + simple_lock(&vc_progress_lock); + + if (vc_progress_meter_enable) + { + vc_progress_meter_value = new_value; + x2 = ((kProgressBarWidth - 1) * new_value) / 100; + if (x2 > vc_progress_meter_drawn) + vc_draw_progress_meter(TRUE, kDataAlpha, vc_progress_meter_drawn, x2); + else + vc_draw_progress_meter(FALSE, kDataAlpha, x2, vc_progress_meter_drawn); + vc_progress_meter_drawn = x2; + } + + simple_unlock(&vc_progress_lock); + splx(s); +} + +#endif /* !CONFIG_EMBEDDED */ + diff --git a/osfmk/console/video_console.h b/osfmk/console/video_console.h index 1851b8756..39f1a8640 100644 --- a/osfmk/console/video_console.h +++ b/osfmk/console/video_console.h @@ -44,28 +44,28 @@ int vcgetc( int l, boolean_t wait, boolean_t raw ); -void video_scroll_up( unsigned long start, - unsigned long end, - unsigned long dest ); +void video_scroll_up( void *start, + void *end, + void *dest ); -void video_scroll_down( unsigned long start, /* HIGH addr */ - unsigned long end, /* LOW addr */ - unsigned long dest ); /* HIGH addr */ +void video_scroll_down( void *start, /* HIGH addr */ + void *end, /* LOW addr */ + void *dest ); /* HIGH addr */ struct vc_info { - unsigned long v_height; /* pixels */ - unsigned long v_width; /* pixels */ - unsigned long v_depth; - unsigned long v_rowbytes; + unsigned int v_height; /* pixels */ + unsigned int v_width; /* pixels */ + unsigned int v_depth; + unsigned int v_rowbytes; unsigned long v_baseaddr; - unsigned long v_type; + unsigned int v_type; char v_name[32]; unsigned long v_physaddr; - unsigned long v_rows; /* characters */ - unsigned long v_columns; /* characters */ - unsigned long v_rowscanbytes; /* Actualy number of bytes used for display per row*/ - unsigned long v_reserved[5]; + unsigned int v_rows; /* characters */ + unsigned int v_columns; /* characters */ + unsigned int v_rowscanbytes; /* Actualy number of bytes used for display per row*/ + unsigned int v_reserved[5]; }; #endif /* _VIDEO_CONSOLE_H_ */ diff --git a/osfmk/ddb/db_aout.c b/osfmk/ddb/db_aout.c index b55857be5..a6e48c3ee 100644 --- a/osfmk/ddb/db_aout.c +++ b/osfmk/ddb/db_aout.c @@ -77,6 +77,8 @@ #include /* a.out symbol table */ #include +#include + #define private static private int aout_db_order_symbols(char *, char *); @@ -133,9 +135,6 @@ aout_db_compare_symbols( int db_sorting_limit = 50000; -extern boolean_t getsymtab(char *, vm_offset_t *, int *, vm_offset_t *, - vm_size_t *); - boolean_t aout_db_sym_init( char * symtab, /* pointer to start of symbol table */ @@ -148,7 +147,7 @@ aout_db_sym_init( struct nlist *sym_start, *sym_end, *dbsym_start, *dbsym_end; struct nlist *sp; char *strtab, *dbstrtab; - int db_strlen; + long db_strlen; char *estrtab, *dbestrtab; unsigned long minsym = ~0; unsigned long maxsym = 0; @@ -157,7 +156,7 @@ aout_db_sym_init( int nsyms; - if (!getsymtab(symtab, + if (!getsymtab((kernel_mach_header_t *)symtab, (vm_offset_t *)&sym_start, &nsyms, (vm_offset_t *)&strtab, (vm_size_t *)&db_strlen)) { return(FALSE); @@ -680,7 +679,7 @@ aout_db_search_symbol( if (symtab->sorted) { struct nlist target; - target.n_value = off; + target.n_value = (vm_offset_t)off; target.n_un.n_name = (char *) 0; target.n_other = (char) 0; db_qsort_limit_search((char *)&target, (char **)&sp, (char **)&ep, @@ -783,7 +782,7 @@ aout_db_search_by_addr( if (stab->sorted) { struct nlist target; - target.n_value = addr; + target.n_value = (vm_offset_t)addr; target.n_un.n_name = (char *) 0; target.n_other = (char) 0; db_qsort_limit_search((char *)&target, (char **)&sp, @@ -800,7 +799,7 @@ aout_db_search_by_addr( if (line_func) line_func = 0; line_sp = cp; - line_diff = addr - cp->n_value; + line_diff = (unsigned long)(addr - cp->n_value); } } if (cp->n_value >= addr && line_sp) @@ -826,14 +825,14 @@ aout_db_search_by_addr( } else if (cp->n_value <= addr && (func_sp == 0 || func_diff > addr - cp->n_value)) { func_sp = cp; - func_diff = addr - cp->n_value; + func_diff = (unsigned long)(addr - cp->n_value); } continue; case N_TEXT|N_EXT: if (cp->n_value <= addr && (func_sp == 0 || func_diff >= addr - cp->n_value)) { func_sp = cp; - func_diff = addr - cp->n_value; + func_diff = (unsigned long)(addr - cp->n_value); if (func_diff == 0 && file_sp && func_sp && line_sp == 0) break; } @@ -872,13 +871,13 @@ aout_db_search_by_addr( file_sp = cp; } else if (func_sp == 0) { func_sp = cp; - func_diff = addr - cp->n_value; + func_diff = (unsigned long)(addr - cp->n_value); } continue; case N_TEXT|N_EXT: if (func_sp == 0) { func_sp = cp; - func_diff = addr - cp->n_value; + func_diff = (unsigned long)(addr - cp->n_value); if (func_diff == 0 && file_sp && func_sp && line_sp == 0) break; @@ -949,7 +948,6 @@ aout_db_line_at_pc( return(found && func && *file); } -extern struct mach_header _mh_execute_header; /* * Initialization routine for a.out files. */ diff --git a/osfmk/ddb/db_break.c b/osfmk/ddb/db_break.c index 91267047b..38c4e232a 100644 --- a/osfmk/ddb/db_break.c +++ b/osfmk/ddb/db_break.c @@ -329,7 +329,7 @@ db_set_breakpoint( return; } } else { - if (!DB_CHECK_ACCESS(addr, BKPT_SIZE, task)) { + if (!DB_CHECK_ACCESS((vm_offset_t)addr, BKPT_SIZE, task)) { if (task) { db_printf("Warning: non-resident page for breakpoint at %llX", (unsigned long long)addr); @@ -428,7 +428,7 @@ db_find_breakpoint_here( && bkpt->address == addr) return(TRUE); if ((bkpt->flags & BKPT_USR_GLOBAL) == 0 && - DB_PHYS_EQ(task, addr, bkpt->task, bkpt->address)) + DB_PHYS_EQ(task, (vm_offset_t)addr, bkpt->task, (vm_offset_t)bkpt->address)) return (TRUE); } return(FALSE); @@ -461,12 +461,12 @@ db_set_breakpoints(void) } else bkpt->flags &= ~BKPT_1ST_SET; } - if (DB_CHECK_ACCESS(bkpt->address, BKPT_SIZE, task)) { + if (DB_CHECK_ACCESS((vm_offset_t)bkpt->address, BKPT_SIZE, task)) { inst = db_get_task_value(bkpt->address, BKPT_SIZE, FALSE, task); if (inst == BKPT_SET(inst)) continue; - bkpt->bkpt_inst = inst; + bkpt->bkpt_inst = (vm_size_t)inst; db_put_task_value(bkpt->address, BKPT_SIZE, BKPT_SET(bkpt->bkpt_inst), task); @@ -501,7 +501,7 @@ db_clear_breakpoints(void) task = cur_task; } if ((bkpt->flags & BKPT_SET_IN_MEM) - && DB_CHECK_ACCESS(bkpt->address, BKPT_SIZE, task)) { + && DB_CHECK_ACCESS((vm_offset_t)bkpt->address, BKPT_SIZE, task)) { inst = db_get_task_value(bkpt->address, BKPT_SIZE, FALSE, task); if (inst != BKPT_SET(inst)) { @@ -551,7 +551,7 @@ db_set_temp_breakpoint( db_printf("Too many thread_breakpoints.\n"); return 0; } - bkpt->bkpt_inst = db_get_task_value(bkpt->address, BKPT_SIZE, + bkpt->bkpt_inst = (vm_size_t)db_get_task_value(bkpt->address, BKPT_SIZE, FALSE, task); db_put_task_value(bkpt->address, BKPT_SIZE, BKPT_SET(bkpt->bkpt_inst), task); @@ -706,7 +706,7 @@ db_delete_cmd(void) db_printf("Bad break point number #%s\n", db_tok_string); db_error(0); } - if ((tbp = db_find_breakpoint_number(db_tok_number, &bkpt)) == 0) { + if ((tbp = db_find_breakpoint_number((int)db_tok_number, &bkpt)) == 0) { db_printf("No such break point #%d\n", db_tok_number); db_error(0); } @@ -796,14 +796,14 @@ db_breakpoint_cmd(db_expr_t addr, __unused boolean_t have_addr, db_expr_t count, && thr_act->task != db_current_space()) db_error("Cannot set break point in inactive user space\n"); db_set_breakpoint(db_target_space(thr_act, user_space), - (db_addr_t)addr, count, + (db_addr_t)addr, (int)count, (user_global)? THREAD_NULL: thr_act, task_bpt); } } else { db_set_breakpoint(db_target_space(THREAD_NULL, user_space), (db_addr_t)addr, - count, THREAD_NULL, FALSE); + (int)count, THREAD_NULL, FALSE); } } diff --git a/osfmk/ddb/db_command.c b/osfmk/ddb/db_command.c index cf99ace6b..13815b525 100644 --- a/osfmk/ddb/db_command.c +++ b/osfmk/ddb/db_command.c @@ -96,7 +96,6 @@ #include #include #include -#include #include #include @@ -542,12 +541,6 @@ struct db_command db_show_cmds[] = { .fcn = (db_func)xmm_reply_print, }, #endif /* NORMA_VM */ -#if TRACE_BUFFER - { - .name = "tr", - .fcn = db_show_tr, - }, -#endif /* TRACE_BUFFER */ { .name = "space", .fcn = db_show_one_space, @@ -564,10 +557,6 @@ struct db_command db_show_cmds[] = { .name = "lock", .fcn = (db_func)db_show_one_lock, }, - { - .name = "mutex_lock", - .fcn = (db_func)db_show_one_mutex, - }, { .name = "simple_lock", .fcn = (db_func)db_show_one_simple_lock, diff --git a/osfmk/ddb/db_cond.c b/osfmk/ddb/db_cond.c index e5199b914..3209a22c8 100644 --- a/osfmk/ddb/db_cond.c +++ b/osfmk/ddb/db_cond.c @@ -228,7 +228,7 @@ db_cond_cmd(void) db_error(0); return; } - if ((bkpt = db_find_breakpoint_number(db_tok_number, 0)) == 0) { + if ((bkpt = db_find_breakpoint_number((int)db_tok_number, 0)) == 0) { db_printf("No such break point #%d\n", db_tok_number); db_error(0); return; diff --git a/osfmk/ddb/db_examine.c b/osfmk/ddb/db_examine.c index 7093604cf..6ed841857 100644 --- a/osfmk/ddb/db_examine.c +++ b/osfmk/ddb/db_examine.c @@ -115,7 +115,7 @@ db_examine_cmd(db_expr_t addr, __unused boolean_t have_addr, db_expr_t count, if (count == (db_expr_t)-1) count = 1; - db_examine_count = count; + db_examine_count = (int)count; if (db_option(modif, 't')) { if (modif == db_last_modifier) thr_act = db_examine_act; @@ -128,7 +128,7 @@ db_examine_cmd(db_expr_t addr, __unused boolean_t have_addr, db_expr_t count, thr_act = THREAD_NULL; db_examine_act = thr_act; - db_examine((db_addr_t) addr, db_examine_format, count, + db_examine((db_addr_t) addr, db_examine_format, (int)count, db_act_to_task(thr_act)); } @@ -240,7 +240,7 @@ db_examine( next_addr = addr; if (db_print_position() == 0) { /* If we hit a new symbol, print it */ - char * name; + const char * name; db_addr_t off; db_find_task_sym_and_offset(addr,&name,&off,task); @@ -256,7 +256,7 @@ db_examine( switch (c) { case 'p': /* Addrs rendered symbolically. */ if( size == sizeof(void *) ) { - char *symName; + const char *symName; db_addr_t offset; items = 1; @@ -461,11 +461,11 @@ db_examine( break; case 'i': /* instruction */ next_addr = db_disasm(addr, FALSE, task); - size = next_addr - addr; + size = (int)(next_addr - addr); break; case 'I': /* instruction, alternate form */ next_addr = db_disasm(addr, TRUE, task); - size = next_addr - addr; + size = (int)(next_addr - addr); break; default: break; @@ -670,7 +670,7 @@ db_search_cmd(void) } else thr_act = THREAD_NULL; - db_search(addr, size, value, mask, count, db_act_to_task(thr_act)); + db_search(addr, size, value, mask, (unsigned int)count, db_act_to_task(thr_act)); } void @@ -705,7 +705,7 @@ db_xcdump( db_expr_t value; int bcount; db_addr_t off; - char *name; + const char *name; char data[DB_XCDUMP_NC]; db_find_task_sym_and_offset(addr, &name, &off, task); @@ -718,10 +718,10 @@ db_xcdump( db_printf("%0*llX:%s", 2*sizeof(db_addr_t),(unsigned long long) addr, (size != 1) ? " " : "" ); bcount = ((n > DB_XCDUMP_NC)? DB_XCDUMP_NC: n); - if (trunc_page_32(addr) != trunc_page_32(addr+bcount-1)) { - db_addr_t next_page_addr = trunc_page_32(addr+bcount-1); - if (!DB_CHECK_ACCESS(next_page_addr, sizeof(int), task)) - bcount = next_page_addr - addr; + if (trunc_page(addr) != trunc_page(addr+bcount-1)) { + db_addr_t next_page_addr = trunc_page(addr+bcount-1); + if (!DB_CHECK_ACCESS((vm_offset_t)next_page_addr, (int)sizeof(int), task)) + bcount = (int)(next_page_addr - addr); } db_read_bytes((vm_offset_t)addr, bcount, data, task); for (i = 0; i < bcount && off != 0; i += size) { @@ -743,5 +743,5 @@ db_xcdump( } db_printf("*\n"); } - return(addr); + return((int)addr); } diff --git a/osfmk/ddb/db_lex.c b/osfmk/ddb/db_lex.c index d79305736..50975857d 100644 --- a/osfmk/ddb/db_lex.c +++ b/osfmk/ddb/db_lex.c @@ -220,7 +220,7 @@ db_save_lex_context(register struct db_lex_context *lp) lp->l_ptr = db_lp; lp->l_eptr = db_endlp; lp->l_char = db_look_char; - lp->l_token = db_look_token; + lp->l_token = (int)db_look_token; } void @@ -267,7 +267,7 @@ db_read_token(void) int t; if (db_look_token) { - t = db_look_token; + t = (int)db_look_token; db_look_token = 0; } else { @@ -341,7 +341,7 @@ db_lex(void) int r, digit; if (c > '0') - r = db_radix; + r = (int)db_radix; else { c = db_read_char(); if (c == 'O' || c == 'o') @@ -352,7 +352,7 @@ db_lex(void) r = 16; else { cp--; - r = db_radix; + r = (int)db_radix; db_unread_char(c); } c = db_read_char(); diff --git a/osfmk/ddb/db_output.c b/osfmk/ddb/db_output.c index 6e4b29a8b..69bfeeaef 100644 --- a/osfmk/ddb/db_output.c +++ b/osfmk/ddb/db_output.c @@ -292,7 +292,7 @@ db_printf(const char *fmt, ...) va_list listp; va_start(listp, fmt); - _doprnt(fmt, &listp, db_putchar, db_radix); + _doprnt(fmt, &listp, db_putchar, (int)db_radix); va_end(listp); } @@ -304,7 +304,7 @@ kdbprintf(const char *fmt, ...) va_list listp; va_start(listp, fmt); - _doprnt(fmt, &listp, db_putchar, db_radix); + _doprnt(fmt, &listp, db_putchar, (int)db_radix); va_end(listp); } @@ -331,7 +331,7 @@ iprintf(const char *fmt, ...) } va_start(listp, fmt); - _doprnt(fmt, &listp, db_putchar, db_radix); + _doprnt(fmt, &listp, db_putchar, (int)db_radix); va_end(listp); } diff --git a/osfmk/ddb/db_print.h b/osfmk/ddb/db_print.h index aa5d55fff..f02d6979a 100644 --- a/osfmk/ddb/db_print.h +++ b/osfmk/ddb/db_print.h @@ -192,12 +192,6 @@ void db_show_one_simple_lock( db_expr_t count, char * modif); -void db_show_one_mutex( - db_expr_t addr, - boolean_t have_addr, - db_expr_t count, - char * modif); - void db_show_runq( db_expr_t addr, boolean_t have_addr, diff --git a/osfmk/ddb/db_run.c b/osfmk/ddb/db_run.c index c0e98e1ab..6c7d5be98 100644 --- a/osfmk/ddb/db_run.c +++ b/osfmk/ddb/db_run.c @@ -239,8 +239,8 @@ db_restart_at_pc( ins = db_get_task_value(pc, sizeof(int), FALSE, task); db_inst_count++; - db_load_count += db_inst_load(ins); - db_store_count += db_inst_store(ins); + db_load_count += db_inst_load((unsigned long)ins); + db_store_count += db_inst_store((unsigned long)ins); #ifdef SOFTWARE_SSTEP /* Account for instructions in delay slots */ brpc = next_instr_address(pc,1,task); @@ -428,7 +428,7 @@ db_single_step_cmd(__unused db_expr_t addr, __unused boolean_t have_addr, print = TRUE; db_run_mode = STEP_ONCE; - db_loop_count = count; + db_loop_count = (typeof(db_loop_count))count; db_sstep_print = print; db_inst_count = 0; db_last_inst_count = 0; diff --git a/osfmk/ddb/makedis.c b/osfmk/ddb/makedis.c index f25201a43..59afa5290 100644 --- a/osfmk/ddb/makedis.c +++ b/osfmk/ddb/makedis.c @@ -283,18 +283,14 @@ #include #include -#include +#include #include #include #include #include -#ifndef LONG_BIT -#define LONG_BIT (CHAR_BIT * sizeof (long)) -#endif /* LONG_BIT */ - #define MAXfunction 32 /* Max function name length. */ -#define MAXBITS LONG_BIT /* Max bitstring length. */ +#define MAXBITS 32 /* Max bitstring length. */ typedef unsigned long bits; enum type {T_ERROR, T_UNKNOWN, T_INTEGER, T_STRING}; const char *const typename[] = {"error", "unknown", "integer", "string"}; @@ -1257,7 +1253,7 @@ int parsefunctioncall(struct function *fp, char *start, char **stringp, p = *stringp; if (*p != '(') { fprintf(stderr, "%s: %s(%d): missing ( after function %.*s\n", progname, - filename, lineno, p - start, start); + filename, lineno, (int)(p - start), start); return 1; } sp->type = S_FUNCTIONCALL; @@ -1565,7 +1561,7 @@ void dis_done()\n\ const char concatdeclarations[] = "\ #include \n\ #include \n\ -#include \n\ +#include \n\ \n\ extern void *dis_realloc(void *p, size_t size); /* User-provided. */\n\ void *dis_alloc(size_t size);\n\ @@ -1802,7 +1798,7 @@ void functionheader(FILE *f, struct function *fp) { last = ", "; } for (ap = fp->args; ap != NULL; ap = ap->next) { - fprintf(f, last); + fprintf(f, "%s", last); compiletype(f, &ap->type); putc(ap->name, f); last = ", "; @@ -2060,7 +2056,7 @@ int compileconcat(struct string *sp, enum type type) { } last = ""; for (sp1 = sp; sp1 != NULL; sp1 = sp1->next) { - printf(last); + printf("%s", last); if (type != T_INTEGER) last = ", "; if (sp1->type == S_ARRAY) @@ -2167,7 +2163,7 @@ int compilefunctioncall(struct string *sp) { putc('\n', stderr); return 1; } - printf(last); + printf("%s", last); last = ", "; if (compileconcat(actualp->string, formaltype) != 0) return 1; @@ -2255,7 +2251,7 @@ void compilesimplearray(enum type *tp, char *name, int num, struct array *ap) { else compiletype(stdout, tp); if (name != NULL) - printf(name); + printf("%s", name); else compiletemp(num); printf("[]"); @@ -2299,7 +2295,7 @@ void compilebitsplice(struct bitsplice *splicep) { printf("("); for (bsp = splicep->splice; bsp != NULL; bsp = bsp->next) { - printf(last); + printf("%s", last); last = " | "; if (bsp->type == S_PARAMETER) putchar(bsp->value.arg->name); diff --git a/osfmk/ddb/orig/db_print.c b/osfmk/ddb/orig/db_print.c index 28aa7908e..7e91ec96d 100644 --- a/osfmk/ddb/orig/db_print.c +++ b/osfmk/ddb/orig/db_print.c @@ -1335,19 +1335,6 @@ db_show_runq( boolean_t showedany = FALSE; queue_iterate(&all_psets, pset, processor_set_t, all_psets) { -#if NCPUS > 1 /* This code has not been tested. */ - queue_iterate(&pset->processors, proc, processor_t, processors) { - runq = &proc->runq; - if (runq->count > 0) { - db_printf("PROCESSOR %x IN SET %x\n", proc, pset); - db_show_one_runq(runq); - showedany = TRUE; - } - } -#endif /* NCPUS > 1 */ -#ifndef NCPUS -#error NCPUS undefined -#endif runq = &pset->runq; if (runq->count > 0) { db_printf("PROCESSOR SET %x\n", pset); diff --git a/osfmk/ddb/tr.c b/osfmk/ddb/tr.c deleted file mode 100644 index c33b0239c..000000000 --- a/osfmk/ddb/tr.c +++ /dev/null @@ -1,399 +0,0 @@ -/* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ - * - * This file contains Original Code and/or Modifications of Original Code - * as defined in and that are subject to the Apple Public Source License - * Version 2.0 (the 'License'). You may not use this file except in - * compliance with the License. The rights granted to you under the License - * may not be used to create, or enable the creation or redistribution of, - * unlawful or unlicensed copies of an Apple operating system, or to - * circumvent, violate, or enable the circumvention or violation of, any - * terms of an Apple operating system software license agreement. - * - * Please obtain a copy of the License at - * http://www.opensource.apple.com/apsl/ and read it before using this file. - * - * The Original Code and all software distributed under the License are - * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER - * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, - * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. - * Please see the License for the specific language governing rights and - * limitations under the License. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ - */ -/* - * @OSF_COPYRIGHT@ - */ -/* - * File: ddb/tr.c - * Authors: Alan Langerman, Jeffrey Heller - * Date: 1992 - * - * Internal trace routines. Like old-style XPRs but - * less formatting. - */ - -#include - -#if TRACE_BUFFER -#include -#include -#include -#include -#include - -extern void fc_get(int *); - -/* - * Primitive event tracing facility for kernel debugging. Yes, - * this has some resemblance to XPRs. However, it is primarily - * intended for post-mortem analysis through ddb. - */ - -#define TRACE_MAX (4 * 1024) -#define TRACE_WINDOW 40 - -typedef struct trace_event { - char *funcname; - char *file; - char *fmt; -#if NCPUS > 1 - char cpu_number; -#endif /* NCPUS > 1 */ - unsigned int lineno; - unsigned int tag1; - unsigned int tag2; - unsigned int tag3; - unsigned int tag4; - int indent; - int timestamp[2]; /* largest needed by any clock */ -} trace_event; - -trace_event trace_buffer[TRACE_MAX]; -unsigned long trace_index; -#if NCPUS == 1 -int tr_indent = 0; -#else /* NCPUS == 1 */ -int tr_indent[NCPUS]; -int tr_limit = -1; -#endif /* NCPUS == 1 */ - -decl_simple_lock_data(,trace_lock) - -void -tr_init(void) -{ -#if NCPUS > 1 - int i; - - for(i=0;i 1 */ - - simple_lock_init(&trace_lock, 0); -} - -void -tr( - char *funcname, - char *file, - unsigned int lineno, - char *fmt, - unsigned int tag1, - unsigned int tag2, - unsigned int tag3, - unsigned int tag4) -{ - int s; - register unsigned long ti, tn; -#if NCPUS > 1 - char cpu; -#endif /* NCPUS > 1 */ - -#if PARAGON860 - /* - * The following loop replaces the spl_and_lock sequence that - * would normally be here, as they are too heavy weight. The - * cmpsw (compare-and-swap) call returns -1 if unsuccessful. - */ - do { - ti = trace_index; - tn = ti + 1; - if (tn >= TRACE_MAX - 1) - tn = 0; - } while (cmpsw(ti, tn, &trace_index) == -1); - fc_get(trace_buffer[ti].timestamp); -#else /* PARAGON860 */ - /* - * Until someone does a cmpsw for other platforms, do it - * the slow way - */ - s = splimp(); - simple_lock(&trace_lock); - - ti = trace_index++; - if (trace_index >= TRACE_MAX - 1) - trace_index = 0; - - simple_unlock(&trace_lock); - splx(s); - - fc_get(trace_buffer[ti].timestamp); -/* get_uniq_timestamp(trace_buffer[ti].timestamp);*/ -#endif /* PARAGON860 */ - - trace_buffer[ti].funcname = funcname; - trace_buffer[ti].file = file; - trace_buffer[ti].lineno = lineno; - trace_buffer[ti].fmt = fmt; - trace_buffer[ti].tag1 = tag1; - trace_buffer[ti].tag2 = tag2; - trace_buffer[ti].tag3 = tag3; - trace_buffer[ti].tag4 = tag4; -#if NCPUS == 1 - trace_buffer[ti].indent = tr_indent; -#else /* NCPUS == 1 */ - mp_disable_preemption(); - cpu = cpu_number(); - trace_buffer[ti].indent = tr_indent[cpu]; - trace_buffer[ti].cpu_number = cpu; - mp_enable_preemption(); -#endif /* NCPUS == 1 */ -} - -#if MACH_KDB -#include - -/* - * Forward. - */ -void show_tr( - unsigned long index, - unsigned long range, - unsigned long show_extra); - -int matches( - char *pattern, - char *target); - -void parse_tr( - unsigned long index, - unsigned long range); - -/* - * The blank array must be a bit bigger than - * MAX_BLANKS to leave room for a terminating NULL. - */ -#define MAX_BLANKS 16 -char blanks[MAX_BLANKS+4]; - -void -show_tr( - unsigned long index, - unsigned long range, - unsigned long show_extra) -{ - char *filename, *cp; -#if PARAGON860 - trace_event *last_trace; -#endif /* PARAGON860 */ - unsigned int level; - int old_history; - int i; - - if (index == -1) { - index = trace_index - (TRACE_WINDOW-4); - range = TRACE_WINDOW; - } else if (index == 0) { - index = trace_index - (TRACE_WINDOW-4); - range = TRACE_WINDOW; - show_extra = 0; - } - if (index + range > TRACE_MAX) - range = TRACE_MAX - index; -#if PARAGON860 - last_trace = &trace_buffer[index-1]; -#endif /* PARAGON860 */ - level = trace_buffer[index-1].indent; - /* - * Set up the indentation buffer - */ - memset(blanks, ' ', trace_buffer[index].indent); - blanks[trace_buffer[index].indent] = '\0'; - for (i = index; i < index + range; ++i) { -#if NCPUS > 1 - if ((tr_limit != -1) && - (trace_buffer[i].cpu_number != tr_limit)) - continue; -#endif /* NCPUS > 1 */ - if (trace_buffer[i].file == (char *) 0 || - trace_buffer[i].funcname == (char *) 0 || - trace_buffer[i].lineno == 0 || - trace_buffer[i].fmt == 0) { - db_printf("[%04x%s]\n", i, - i >= trace_index ? "*" : ""); - continue; - } - - old_history = (i >= trace_index); - - /* - * Adjust the blank count if necessary - */ - if (level != trace_buffer[i].indent) { - level = trace_buffer[i].indent; - if (level >= MAX_BLANKS) - level = MAX_BLANKS; - memset(blanks, ' ', level); - blanks[level] = '\0'; - } - - for (cp = trace_buffer[i].file; *cp; ++cp) - if (*cp == '/') - filename = cp + 1; -#if NCPUS > 1 - db_printf("{%02d}",trace_buffer[i].cpu_number); -#endif /* NCPUS > 1 */ - db_printf("[%04x%s] %s%-16s", i, old_history ? "*" : "", - blanks, trace_buffer[i].funcname); - - if (show_extra) { - if (show_extra > 0) { - db_printf(" (%x/%8x)", - trace_buffer[i].timestamp[0], - trace_buffer[i].timestamp[1]); -#if PARAGON860 - /* - * For Paragon only, we compute and - * print out deltas on the timestamps - * accumulated in the tr buffer. One - * interesting case: it is meaningless - * to compute this delta for the last - * current entry in the log. - */ - if (old_history && - ((last_trace - trace_buffer) - < trace_index)) - db_printf("(N/A)"); - else - db_printf("(%d)", - timer_subtime( - trace_buffer[i].timestamp, - last_trace->timestamp)); -#endif /*PARAGON860*/ - db_printf(" "); - } - if (show_extra > 1) { - db_printf("(%s:%05d):\n\t", - filename, trace_buffer[i].lineno); - } - } else - db_printf(": "); - db_printf(trace_buffer[i].fmt, trace_buffer[i].tag1, - trace_buffer[i].tag2, trace_buffer[i].tag3, - trace_buffer[i].tag4); - db_printf("\n"); -#if PARAGON860 - last_trace = &trace_buffer[i]; -#endif /* PARAGON860 */ - } -} - - -int -matches( - char *pattern, - char *target) -{ - char *cp, *cp1, *cp2; - - for (cp = target; *cp; ++cp) { - for (cp2 = pattern, cp1 = cp; *cp2 && *cp1; ++cp2, ++cp1) - if (*cp2 != *cp1) - break; - if (!*cp2) - return 1; - } - return 0; -} - - -char parse_tr_buffer[100] = "KMSG"; - -void -parse_tr( - unsigned long index, - unsigned long range) -{ - int i; - char *filename, *cp; - char *string = parse_tr_buffer; - - if (index == 0) { - index = trace_index - (TRACE_WINDOW-4); - range = TRACE_WINDOW; - } - if (index + range > TRACE_MAX) - range = TRACE_MAX - index; - for (i = index; i < index + range; ++i) { -#if NCPUS > 1 - if ((tr_limit != -1) && - (trace_buffer[i].cpu_number != tr_limit)) - continue; -#endif /* NCPUS > 1 */ - if (trace_buffer[i].file == (char *) 0 || - trace_buffer[i].funcname == (char *) 0 || - trace_buffer[i].lineno == 0 || - trace_buffer[i].fmt == 0) { - db_printf("[%04x%s]\n", i, - i >= trace_index ? "*" : ""); - continue; - } - if (!matches(string, trace_buffer[i].fmt)) - continue; - for (cp = trace_buffer[i].file; *cp; ++cp) - if (*cp == '/') - filename = cp + 1; -#if NCPUS > 1 - db_printf("{%02d}",trace_buffer[i].cpu_number); -#endif /* NCPUS > 1 */ - db_printf("[%04x%s] %s", i, i >= trace_index ? "*" : "", - trace_buffer[i].funcname); - db_printf(": "); - db_printf(trace_buffer[i].fmt, trace_buffer[i].tag1, - trace_buffer[i].tag2, trace_buffer[i].tag3, - trace_buffer[i].tag4); - db_printf("\n"); - } -} - - -void -db_show_tr( - db_expr_t addr, - boolean_t have_addr, - db_expr_t count, - char * modif) -{ - int flag, level; - - flag = 0, level = 0; - if (db_option(modif, 'l')) { - flag = 1; - level = -1; - } - if (db_option(modif, 'a')) { - flag = 2; - level = -1; - } - - TR_SHOW(level, 0, flag); -} - -#endif /* MACH_KDB */ - -#endif /* TRACE_BUFFER */ diff --git a/osfmk/ddb/tr.h b/osfmk/ddb/tr.h deleted file mode 100644 index 13ff98fde..000000000 --- a/osfmk/ddb/tr.h +++ /dev/null @@ -1,203 +0,0 @@ -/* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ - * - * This file contains Original Code and/or Modifications of Original Code - * as defined in and that are subject to the Apple Public Source License - * Version 2.0 (the 'License'). You may not use this file except in - * compliance with the License. The rights granted to you under the License - * may not be used to create, or enable the creation or redistribution of, - * unlawful or unlicensed copies of an Apple operating system, or to - * circumvent, violate, or enable the circumvention or violation of, any - * terms of an Apple operating system software license agreement. - * - * Please obtain a copy of the License at - * http://www.opensource.apple.com/apsl/ and read it before using this file. - * - * The Original Code and all software distributed under the License are - * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER - * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, - * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. - * Please see the License for the specific language governing rights and - * limitations under the License. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ - */ -/* - * @OSF_COPYRIGHT@ - */ -/* - * HISTORY - * - * Revision 1.1.1.1 1998/09/22 21:05:48 wsanchez - * Import of Mac OS X kernel (~semeria) - * - * Revision 1.1.1.1 1998/03/07 02:26:09 wsanchez - * Import of OSF Mach kernel (~mburg) - * - * Revision 1.1.11.1 1997/03/27 18:47:01 barbou - * Merge smp_shared merges into mainline. - * [1996/09/19 13:55:17 addis] - * Make tr_indent NCPU safe. - * [95/10/09 rwd] - * Added TR_INIT() macro. - * Change from NMK16.1 [93/09/22 paire] - * [94/02/04 paire] - * [97/02/25 barbou] - * - * Revision 1.1.6.1 1995/02/23 16:34:23 alanl - * Taken from DIPC2_SHARED. Change to !FREE Copyright. - * [95/01/05 rwd] - * - * Revision 1.1.4.4 1994/08/18 01:07:26 alanl - * + Allow tracing strictly based on MACH_TR; - * don't also require MACH_ASSERT (alanl). - * + ANSI-fication: cast tr arguments (alanl). - * + Added tr_indent and macros to use it (sjs). - * [1994/08/18 01:06:09 alanl] - * - * Revision 1.1.4.3 1994/08/08 17:59:35 rwd - * Include mach_tr.h - * [94/08/08 rwd] - * - * Revision 1.1.4.2 1994/08/05 19:36:08 mmp - * Added prototype for db_show_tr. - * - * Conditionalize on MACH_TR - * [94/07/20 rwd] - * - * Revision 1.1.4.1 1994/08/04 01:43:04 mmp - * DIPC: moved from norma/ to ddb/. Updated includes. - * [1994/08/03 13:37:46 mmp] - * - * Revision 1.1.9.1 1994/03/07 16:55:24 paire - * Added ANSI prototypes. - * [94/02/15 paire] - * - * Added TR_INIT() macro. - * Change from NMK16.1 [93/09/22 paire] - * [94/02/04 paire] - * - * Revision 1.1.2.2 1993/06/02 23:57:10 jeffc - * Added to OSF/1 R1.3 from NMK15.0. - * [1993/06/02 21:22:08 jeffc] - * - * Revision 1.1 1992/09/30 02:34:09 robert - * Initial revision - * - * $EndLog$ - */ - -/* - * File: ddb/tr.h - * Author: Alan Langerman, Jeffrey Heller - * Date: 1992 - * - * Internal trace routines. Like old-style XPRs but - * less formatting. - */ - -#include -#include - -#include - -/* - * Originally, we only wanted tracing when - * MACH_TR and MACH_ASSERT were turned on - * together. Now, there's no reason why - * MACH_TR and MACH_ASSERT can't be completely - * orthogonal. - */ -#define TRACE_BUFFER (MACH_TR) - -/* - * Log events in a circular trace buffer for future debugging. - * Events are unsigned integers. Each event has a descriptive - * message. - * - * TR_DECL must be used at the beginning of a routine using - * one of the tr calls. The macro should be passed the name - * of the function surrounded by quotation marks, e.g., - * TR_DECL("netipc_recv_intr"); - * and should be terminated with a semi-colon. The TR_DECL - * must be the *last* declaration in the variable declaration - * list, or syntax errors will be introduced when TRACE_BUFFER - * is turned off. - */ -#ifndef _DDB_TR_H_ -#define _DDB_TR_H_ - -#if TRACE_BUFFER - -#include - -#define __ui__ (unsigned int) -#define TR_INIT() tr_init() -#define TR_SHOW(a,b,c) show_tr((a),(b),(c)) -#define TR_DECL(funcname) char *__ntr_func_name__ = funcname -#define tr1(msg) \ - tr(__ntr_func_name__, __FILE__, __LINE__, (msg), \ - 0,0,0,0) -#define tr2(msg,tag1) \ - tr(__ntr_func_name__, __FILE__, __LINE__, (msg), \ - __ui__(tag1),0,0,0) -#define tr3(msg,tag1,tag2) \ - tr(__ntr_func_name__, __FILE__, __LINE__, (msg), \ - __ui__(tag1),__ui__(tag2),0,0) -#define tr4(msg,tag1,tag2,tag3) \ - tr(__ntr_func_name__, __FILE__, __LINE__, (msg), \ - __ui__(tag1),__ui__(tag2),__ui__(tag3),0) -#define tr5(msg,tag1,tag2,tag3,tag4) \ - tr(__ntr_func_name__, __FILE__, __LINE__, (msg), \ - __ui__(tag1),__ui__(tag2),__ui__(tag3),__ui__(tag4)) - -/* - * Adjust tr log indentation based on function - * call graph. - */ -#if NCPUS == 1 -extern int tr_indent; -#define tr_start() tr_indent++ -#define tr_stop() tr_indent-- -#else /* NCPUS == 1 */ -extern int tr_indent[NCPUS]; -#define tr_start() tr_indent[cpu_number()]++ -#define tr_stop() (--tr_indent[cpu_number()]<0?tr_indent[cpu_number()]=0:0); -#endif /* NCPUS == 1 */ - -extern void tr_init(void); -extern void tr( - char *funcname, - char *file, - unsigned int lineno, - char *fmt, - unsigned int tag1, - unsigned int tag2, - unsigned int tag3, - unsigned int tag4); - -extern void db_show_tr( - db_expr_t addr, - boolean_t have_addr, - db_expr_t count, - char * modif); - -#else /* TRACE_BUFFER */ - -#define TR_INIT() -#define TR_SHOW(a,b,c) -#define TR_DECL(funcname) -#define tr1(msg) -#define tr2(msg, tag1) -#define tr3(msg, tag1, tag2) -#define tr4(msg, tag1, tag2, tag3) -#define tr5(msg, tag1, tag2, tag3, tag4) -#define tr_start() -#define tr_stop() - -#endif /* TRACE_BUFFER */ - -#endif /* _DDB_TR_H_ */ diff --git a/osfmk/default_pager/default_pager.c b/osfmk/default_pager/default_pager.c index 429c4c601..ab1c491a0 100644 --- a/osfmk/default_pager/default_pager.c +++ b/osfmk/default_pager/default_pager.c @@ -86,15 +86,17 @@ int debug_mask = 0; vm_size_t cthread_stack_size = 16 *1024; extern vm_size_t cthread_wait_stack_size; +#ifndef MACH_KERNEL unsigned long long vm_page_mask; int vm_page_shift; +#endif int norma_mk; boolean_t verbose; /* task_t default_pager_self; */ /* Our task port. */ -mutex_t dpt_lock; /* lock for the dpt array struct */ +lck_mtx_t dpt_lock; /* lock for the dpt array struct */ default_pager_thread_t **dpt_array; memory_object_default_t default_pager_object; /* for memory_object_create. */ @@ -132,6 +134,7 @@ unsigned int d_to_i(char *); /* forward; */ extern int vstruct_def_clshift; +struct global_stats global_stats; /* * Initialize and Run the default pager @@ -304,8 +307,11 @@ start_def_pager( __unused char *bs_device ) /* setup read buffers, etc */ default_pager_initialize(); + +#ifndef MACH_KERNEL default_pager(); - +#endif + /* start the backing store monitor, it runs on a callout thread */ default_pager_backing_store_monitor_callout = thread_call_allocate(default_pager_backing_store_monitor, NULL); @@ -321,15 +327,15 @@ default_pager_info( memory_object_default_t pager, default_pager_info_t *infop) { - vm_size_t pages_total, pages_free; + uint64_t pages_total, pages_free; if (pager != default_pager_object) return KERN_INVALID_ARGUMENT; bs_global_info(&pages_total, &pages_free); - infop->dpi_total_space = ptoa_32(pages_total); - infop->dpi_free_space = ptoa_32(pages_free); + infop->dpi_total_space = (vm_size_t) ptoa_64(pages_total); + infop->dpi_free_space = (vm_size_t) ptoa_64(pages_free); infop->dpi_page_size = vm_page_size; return KERN_SUCCESS; @@ -341,7 +347,7 @@ default_pager_info_64( memory_object_default_t pager, default_pager_info_64_t *infop) { - vm_size_t pages_total, pages_free; + uint64_t pages_total, pages_free; if (pager != default_pager_object) return KERN_INVALID_ARGUMENT; @@ -359,6 +365,11 @@ default_pager_info_64( return KERN_SUCCESS; } +lck_grp_t default_pager_lck_grp; +lck_grp_attr_t default_pager_lck_grp_attr; +lck_attr_t default_pager_lck_attr; + + void default_pager_initialize(void) @@ -366,12 +377,18 @@ default_pager_initialize(void) kern_return_t kr; __unused static char here[] = "default_pager_initialize"; + lck_grp_attr_setdefault(&default_pager_lck_grp_attr); + lck_grp_init(&default_pager_lck_grp, "default_pager", &default_pager_lck_grp_attr); + lck_attr_setdefault(&default_pager_lck_attr); /* * Vm variables. */ +#ifndef MACH_KERNEL vm_page_mask = vm_page_size - 1; - vm_page_shift = local_log2(vm_page_size); + assert((unsigned int) vm_page_size == vm_page_size); + vm_page_shift = local_log2((unsigned int) vm_page_size); +#endif /* * List of all vstructs. @@ -379,6 +396,7 @@ default_pager_initialize(void) vstruct_zone = zinit(sizeof(struct vstruct), 10000 * sizeof(struct vstruct), 8192, "vstruct zone"); + VSL_LOCK_INIT(); queue_init(&vstruct_list.vsl_queue); vstruct_list.vsl_count = 0; @@ -406,11 +424,12 @@ default_pager_initialize(void) } #else /* USER_PAGER */ { - int clsize; + unsigned int clsize; memory_object_default_t dmm; dmm = default_pager_object; - clsize = (vm_page_size << vstruct_def_clshift); + assert((unsigned int) vm_page_size == vm_page_size); + clsize = ((unsigned int) vm_page_size << vstruct_def_clshift); kr = host_default_memory_manager(host_priv_self(), &dmm, clsize); if ((kr != KERN_SUCCESS) || (dmm != MEMORY_OBJECT_DEFAULT_NULL)) diff --git a/osfmk/default_pager/default_pager_internal.h b/osfmk/default_pager/default_pager_internal.h index 0d9fb9c9a..d1c4883e1 100644 --- a/osfmk/default_pager/default_pager_internal.h +++ b/osfmk/default_pager/default_pager_internal.h @@ -99,28 +99,11 @@ #define UP(stuff) #endif /* USER_PAGER */ -#ifndef MACH_KERNEL -extern struct mutex dprintf_lock; -#define PRINTF_LOCK_INIT() mutex_init(&dprintf_lock) -#define PRINTF_LOCK() mutex_lock(&dprintf_lock) -#define PRINTF_UNLOCK() mutex_unlock(&dprintf_lock) -#endif - -#ifndef MACH_KERNEL -#define dprintf(args) \ - do { \ - PRINTF_LOCK(); \ - printf("%s[%d]: ", my_name, dp_thread_id()); \ - printf args; \ - PRINTF_UNLOCK(); \ - } while (0) -#else #define dprintf(args) \ do { \ printf("%s[KERNEL]: ", my_name); \ printf args; \ } while (0) -#endif /* * Debug. @@ -179,23 +162,25 @@ extern char *mach_error_string(kern_return_t); * VM and IPC globals. */ #ifdef MACH_KERNEL -#define vm_page_size page_size +#define vm_page_size PAGE_SIZE +#define vm_page_mask PAGE_MASK +#define vm_page_shift PAGE_SHIFT #else extern vm_object_size_t vm_page_size; -#endif extern unsigned long long vm_page_mask; extern int vm_page_shift; +#endif #ifndef MACH_KERNEL #define ptoa(p) ((p)*vm_page_size) #define atop(a) ((a)/vm_page_size) #endif -#define howmany(a,b) (((a) + (b) - 1)/(b)) +#define howmany(a,b) ((((a) % (b)) == 0) ? ((a) / (b)) : (((a) / (b)) + 1)) extern memory_object_default_t default_pager_object; #ifdef MACH_KERNEL -extern mutex_t dpt_lock; /* Lock for the dpt array */ +extern lck_mtx_t dpt_lock; /* Lock for the dpt array */ extern int default_pager_internal_count; extern MACH_PORT_FACE default_pager_host_port; /* extern task_t default_pager_self; */ /* dont need or want */ @@ -210,6 +195,10 @@ extern mach_port_t default_pager_external_set; extern mach_port_t default_pager_default_set; #endif +typedef vm32_offset_t dp_offset_t; +typedef vm32_size_t dp_size_t; +typedef vm32_address_t dp_address_t; + typedef struct default_pager_thread { #ifndef MACH_KERNEL cthread_t dpt_thread; /* Server thread. */ @@ -231,7 +220,7 @@ extern default_pager_thread_t **dpt_array; /* * Global statistics. */ -struct { +struct global_stats { unsigned int gs_pageout_calls; /* # pageout calls */ unsigned int gs_pagein_calls; /* # pagein calls */ unsigned int gs_pages_in; /* # pages paged in (total) */ @@ -240,7 +229,8 @@ struct { unsigned int gs_pages_init; /* # page init requests */ unsigned int gs_pages_init_writes; /* # page init writes */ VSTATS_LOCK_DECL(gs_lock) -} global_stats; +}; +extern struct global_stats global_stats; #define GSTAT(clause) VSTATS_ACTION(&global_stats.gs_lock, (clause)) /* @@ -275,16 +265,18 @@ struct { #define BS_NOPRI -1 #define BS_FULLPRI -2 +/* + * Quick way to access the emergency segment backing store structures + * without a full-blown search. + */ +extern MACH_PORT_FACE emergency_segment_backing_store; + /* * Mapping between backing store port and backing store object. */ struct backing_store { queue_chain_t bs_links; /* link in backing_store_list */ -#ifdef MACH_KERNEL - mutex_t bs_lock; /* lock for the structure */ -#else - struct mutex bs_lock; /* lock for the structure */ -#endif + lck_mtx_t bs_lock; /* lock for the structure */ MACH_PORT_FACE bs_port; /* backing store port */ int bs_priority; int bs_clsize; /* cluster size in pages */ @@ -302,31 +294,21 @@ typedef struct backing_store *backing_store_t; #define BS_STAT(bs, clause) VSTATS_ACTION(&(bs)->bs_lock, (clause)) #ifdef MACH_KERNEL -#define BS_LOCK_INIT(bs) mutex_init(&(bs)->bs_lock, 0) -#else -#define BS_LOCK_INIT(bs) mutex_init(&(bs)->bs_lock) -#endif -#define BS_LOCK(bs) mutex_lock(&(bs)->bs_lock) -#define BS_UNLOCK(bs) mutex_unlock(&(bs)->bs_lock) +#define BS_LOCK_INIT(bs) lck_mtx_init(&(bs)->bs_lock, &default_pager_lck_grp, &default_pager_lck_attr) +#define BS_LOCK(bs) lck_mtx_lock(&(bs)->bs_lock) +#define BS_UNLOCK(bs) lck_mtx_unlock(&(bs)->bs_lock) struct backing_store_list_head { queue_head_t bsl_queue; -#ifdef MACH_KERNEL - mutex_t bsl_lock; -#else - struct mutex bsl_lock; + lck_mtx_t bsl_lock; #endif }; extern struct backing_store_list_head backing_store_list; extern int backing_store_release_trigger_disable; -#ifdef MACH_KERNEL -#define BSL_LOCK_INIT() mutex_init(&backing_store_list.bsl_lock, 0) -#else -#define BSL_LOCK_INIT() mutex_init(&backing_store_list.bsl_lock) -#endif -#define BSL_LOCK() mutex_lock(&backing_store_list.bsl_lock) -#define BSL_UNLOCK() mutex_unlock(&backing_store_list.bsl_lock) +#define BSL_LOCK_INIT() lck_mtx_init(&backing_store_list.bsl_lock, &default_pager_lck_grp, &default_pager_lck_attr) +#define BSL_LOCK() lck_mtx_lock(&backing_store_list.bsl_lock) +#define BSL_UNLOCK() lck_mtx_unlock(&backing_store_list.bsl_lock) /* * Paging segment management. @@ -340,8 +322,8 @@ struct paging_segment { } storage_type; unsigned int ps_segtype; /* file type or partition */ MACH_PORT_FACE ps_device; /* Port to device */ - vm_offset_t ps_offset; /* Offset of segment within device */ - vm_offset_t ps_recnum; /* Number of device records in segment*/ + dp_offset_t ps_offset; /* Offset of segment within device */ + dp_offset_t ps_recnum; /* Number of device records in segment*/ unsigned int ps_pgnum; /* Number of pages in segment */ unsigned int ps_record_shift;/* Bit shift: pages to device records */ @@ -350,22 +332,27 @@ struct paging_segment { unsigned int ps_ncls; /* Number of clusters in segment */ unsigned int ps_clcount; /* Number of free clusters */ unsigned int ps_pgcount; /* Number of free pages */ - unsigned long ps_hint; /* Hint of where to look next. */ + unsigned int ps_hint; /* Hint of where to look next. */ + unsigned int ps_special_clusters; /* Clusters that might come in while we've + * released the locks doing a ps_delete. + */ /* bitmap */ -#ifdef MACH_KERNEL - mutex_t ps_lock; /* Lock for contents of struct */ -#else - struct mutex ps_lock; /* Lock for contents of struct */ -#endif + lck_mtx_t ps_lock; /* Lock for contents of struct */ unsigned char *ps_bmap; /* Map of used clusters */ /* backing store */ backing_store_t ps_bs; /* Backing store segment belongs to */ - - boolean_t ps_going_away; /* Destroy attempt in progress */ +#define PS_CAN_USE 0x1 +#define PS_GOING_AWAY 0x2 +#define PS_EMERGENCY_SEGMENT 0x4 + unsigned int ps_state; }; +#define IS_PS_OK_TO_USE(ps) ((ps->ps_state & PS_CAN_USE) == PS_CAN_USE) +#define IS_PS_GOING_AWAY(ps) ((ps->ps_state & PS_GOING_AWAY) == PS_GOING_AWAY) +#define IS_PS_EMERGENCY_SEGMENT(ps) ((ps->ps_state & PS_EMERGENCY_SEGMENT) == PS_EMERGENCY_SEGMENT) + #define ps_vnode storage_type.vnode #define ps_device storage_type.dev #define PS_PARTITION 1 @@ -375,18 +362,14 @@ typedef struct paging_segment *paging_segment_t; #define PAGING_SEGMENT_NULL ((paging_segment_t) 0) -#ifdef MACH_KERNEL -#define PS_LOCK_INIT(ps) mutex_init(&(ps)->ps_lock, 0) -#else -#define PS_LOCK_INIT(ps) mutex_init(&(ps)->ps_lock) -#endif -#define PS_LOCK(ps) mutex_lock(&(ps)->ps_lock) -#define PS_UNLOCK(ps) mutex_unlock(&(ps)->ps_lock) +#define PS_LOCK_INIT(ps) lck_mtx_init(&(ps)->ps_lock, &default_pager_lck_grp, &default_pager_lck_attr) +#define PS_LOCK(ps) lck_mtx_lock(&(ps)->ps_lock) +#define PS_UNLOCK(ps) lck_mtx_unlock(&(ps)->ps_lock) typedef unsigned int pseg_index_t; #define INVALID_PSEG_INDEX ((pseg_index_t)-1) -#define NULL_PSEG_INDEX ((pseg_index_t) 0) +#define EMERGENCY_PSEG_INDEX ((pseg_index_t) 0) /* * MAX_PSEG_INDEX value is related to struct vs_map below. * "0" is reserved for empty map entries (no segment). @@ -396,22 +379,14 @@ typedef unsigned int pseg_index_t; /* paging segments array */ extern paging_segment_t paging_segments[MAX_NUM_PAGING_SEGMENTS]; -#ifdef MACH_KERNEL -extern mutex_t paging_segments_lock; -#else -extern struct mutex paging_segments_lock; -#endif +extern lck_mtx_t paging_segments_lock; extern int paging_segment_count; /* number of active paging segments */ extern int paging_segment_max; /* highest used paging segment index */ extern int ps_select_array[DEFAULT_PAGER_BACKING_STORE_MAXPRI+1]; -#ifdef MACH_KERNEL -#define PSL_LOCK_INIT() mutex_init(&paging_segments_lock, 0) -#else -#define PSL_LOCK_INIT() mutex_init(&paging_segments_lock) -#endif -#define PSL_LOCK() mutex_lock(&paging_segments_lock) -#define PSL_UNLOCK() mutex_unlock(&paging_segments_lock) +#define PSL_LOCK_INIT() lck_mtx_init(&paging_segments_lock, &default_pager_lck_grp, &default_pager_lck_attr) +#define PSL_LOCK() lck_mtx_lock(&paging_segments_lock) +#define PSL_UNLOCK() lck_mtx_unlock(&paging_segments_lock) /* * Vstruct manipulation. The vstruct is the pager's internal @@ -462,7 +437,7 @@ typedef struct vs_map *vs_map_t; * Exported macros for manipulating the vs_map structure -- * checking status, getting and setting bits. */ -#define VSCLSIZE(vs) (1UL << (vs)->vs_clshift) +#define VSCLSIZE(vs) (1U << (vs)->vs_clshift) #define VSM_ISCLR(vsm) (((vsm).vsmap_entry == VSM_ENTRY_NULL) && \ ((vsm).vsmap_error == 0)) #define VSM_ISERR(vsm) ((vsm).vsmap_error) @@ -492,11 +467,11 @@ typedef struct vs_map *vs_map_t; * map vm objects to backing storage (paging files and clusters). */ #define CLMAP_THRESHOLD 512 /* bytes */ -#define CLMAP_ENTRIES (CLMAP_THRESHOLD/sizeof(struct vs_map)) -#define CLMAP_SIZE(ncls) (ncls*sizeof(struct vs_map)) +#define CLMAP_ENTRIES (CLMAP_THRESHOLD/(int)sizeof(struct vs_map)) +#define CLMAP_SIZE(ncls) (ncls*(int)sizeof(struct vs_map)) #define INDIRECT_CLMAP_ENTRIES(ncls) (((ncls-1)/CLMAP_ENTRIES) + 1) -#define INDIRECT_CLMAP_SIZE(ncls) (INDIRECT_CLMAP_ENTRIES(ncls) * sizeof(struct vs_map *)) +#define INDIRECT_CLMAP_SIZE(ncls) (INDIRECT_CLMAP_ENTRIES(ncls) * (int)sizeof(struct vs_map *)) #define INDIRECT_CLMAP(size) (CLMAP_SIZE(size) > CLMAP_THRESHOLD) #define RMAPSIZE(blocks) (howmany(blocks,NBBY)) @@ -545,33 +520,20 @@ typedef struct vstruct_alias { struct vstruct *vs; } vstruct_alias_t; -#ifdef MACH_KERNEL -#define DPT_LOCK_INIT(lock) mutex_init(&(lock), 0) -#define DPT_LOCK(lock) mutex_lock(&(lock)) -#define DPT_UNLOCK(lock) mutex_unlock(&(lock)) -#define DPT_SLEEP(lock, e, i) thread_sleep_mutex(&(lock), (event_t)(e), i) -#define VS_LOCK_TYPE hw_lock_data_t -#define VS_LOCK_INIT(vs) hw_lock_init(&(vs)->vs_lock) -#define VS_TRY_LOCK(vs) (VS_LOCK(vs),TRUE) -#define VS_LOCK(vs) hw_lock_lock(&(vs)->vs_lock) -#define VS_UNLOCK(vs) hw_lock_unlock(&(vs)->vs_lock) -#define VS_MAP_LOCK_TYPE mutex_t -#define VS_MAP_LOCK_INIT(vs) mutex_init(&(vs)->vs_map_lock, 0) -#define VS_MAP_LOCK(vs) mutex_lock(&(vs)->vs_map_lock) -#define VS_MAP_TRY_LOCK(vs) mutex_try(&(vs)->vs_map_lock) -#define VS_MAP_UNLOCK(vs) mutex_unlock(&(vs)->vs_map_lock) -#else -#define VS_LOCK_TYPE struct mutex -#define VS_LOCK_INIT(vs) mutex_init(&(vs)->vs_lock, 0) -#define VS_TRY_LOCK(vs) mutex_try(&(vs)->vs_lock) -#define VS_LOCK(vs) mutex_lock(&(vs)->vs_lock) -#define VS_UNLOCK(vs) mutex_unlock(&(vs)->vs_lock) -#define VS_MAP_LOCK_TYPE struct mutex -#define VS_MAP_LOCK_INIT(vs) mutex_init(&(vs)->vs_map_lock) -#define VS_MAP_LOCK(vs) mutex_lock(&(vs)->vs_map_lock) -#define VS_MAP_TRY_LOCK(vs) mutex_try(&(vs)->vs_map_lock) -#define VS_MAP_UNLOCK(vs) mutex_unlock(&(vs)->vs_map_lock) -#endif +#define DPT_LOCK_INIT(lock) lck_mtx_init(&(lock), &default_pager_lck_grp, &default_pager_lck_attr) +#define DPT_LOCK(lock) lck_mtx_lock(&(lock)) +#define DPT_UNLOCK(lock) lck_mtx_unlock(&(lock)) +#define DPT_SLEEP(lock, e, i) lck_mtx_sleep(&(lock), LCK_SLEEP_DEFAULT, (event_t)(e), i) +#define VS_LOCK_TYPE hw_lock_data_t +#define VS_LOCK_INIT(vs) hw_lock_init(&(vs)->vs_lock) +#define VS_TRY_LOCK(vs) (VS_LOCK(vs),TRUE) +#define VS_LOCK(vs) hw_lock_lock(&(vs)->vs_lock) +#define VS_UNLOCK(vs) hw_lock_unlock(&(vs)->vs_lock) +#define VS_MAP_LOCK_TYPE lck_mtx_t +#define VS_MAP_LOCK_INIT(vs) lck_mtx_init(&(vs)->vs_map_lock, &default_pager_lck_grp, &default_pager_lck_attr) +#define VS_MAP_LOCK(vs) lck_mtx_lock(&(vs)->vs_map_lock) +#define VS_MAP_TRY_LOCK(vs) lck_mtx_try_lock(&(vs)->vs_map_lock) +#define VS_MAP_UNLOCK(vs) lck_mtx_unlock(&(vs)->vs_map_lock) /* @@ -582,8 +544,8 @@ typedef struct vstruct_alias { * The start of this structure MUST match a "struct memory_object". */ typedef struct vstruct { + struct ipc_object_header vs_pager_header; /* fake ip_kotype() */ memory_object_pager_ops_t vs_pager_ops; /* == &default_pager_ops */ - int vs_mem_obj_ikot;/* JMM:fake ip_kotype() */ memory_object_control_t vs_control; /* our mem obj control ref */ VS_LOCK_TYPE vs_lock; /* data for the lock */ @@ -593,7 +555,6 @@ typedef struct vstruct { unsigned int vs_readers; /* Reads in progress */ unsigned int vs_writers; /* Writes in progress */ -#ifdef MACH_KERNEL unsigned int /* boolean_t */ vs_waiting_seqno:1, /* to wait on seqno */ /* boolean_t */ vs_waiting_read:1, /* waiting on reader? */ @@ -601,14 +562,6 @@ typedef struct vstruct { /* boolean_t */ vs_waiting_async:1, /* waiting on async? */ /* boolean_t */ vs_indirect:1, /* map indirect? */ /* boolean_t */ vs_xfer_pending:1; /* xfer out of seg? */ -#else - event_t vs_waiting_seqno;/* to wait on seqno */ - event_t vs_waiting_read; /* to wait on readers */ - event_t vs_waiting_write;/* to wait on writers */ - event_t vs_waiting_async;/* to wait on async_pending */ - int vs_indirect:1, /* Is the map indirect ? */ - vs_xfer_pending:1; /* xfering out of a seg ? */ -#endif unsigned int vs_async_pending;/* pending async write count */ unsigned int vs_errors; /* Pageout error count */ @@ -618,11 +571,7 @@ typedef struct vstruct { unsigned int vs_clshift; /* Bit shift: clusters->pages */ unsigned int vs_size; /* Object size in clusters */ -#ifdef MACH_KERNEL - mutex_t vs_map_lock; /* to protect map below */ -#else - struct mutex vs_map_lock; /* to protect map below */ -#endif + lck_mtx_t vs_map_lock; /* to protect map below */ union { struct vs_map *vsu_dmap; /* Direct map of clusters */ struct vs_map **vsu_imap; /* Indirect map of clusters */ @@ -677,7 +626,6 @@ struct vs_async { paging_segment_t vsa_ps; /* the paging segment used */ int vsa_flags; /* flags */ int vsa_error; /* error, if there is one */ - mutex_t vsa_lock; MACH_PORT_FACE reply_port; /* associated reply port */ }; @@ -696,11 +644,7 @@ struct vs_async { */ struct vstruct_list_head { queue_head_t vsl_queue; -#ifdef MACH_KERNEL - mutex_t vsl_lock; -#else - struct mutex vsl_lock; -#endif + lck_mtx_t vsl_lock; int vsl_count; /* saves code */ }; @@ -710,15 +654,14 @@ __private_extern__ void vstruct_list_insert(vstruct_t vs); __private_extern__ void vstruct_list_delete(vstruct_t vs); -#ifdef MACH_KERNEL -#define VSL_LOCK_INIT() mutex_init(&vstruct_list.vsl_lock, 0) -#else -#define VSL_LOCK_INIT() mutex_init(&vstruct_list.vsl_lock) -#endif -#define VSL_LOCK() mutex_lock(&vstruct_list.vsl_lock) -#define VSL_LOCK_TRY() mutex_try(&vstruct_list.vsl_lock) -#define VSL_UNLOCK() mutex_unlock(&vstruct_list.vsl_lock) -#define VSL_SLEEP(e,i) thread_sleep_mutex((e), &vstruct_list.vsl_lock, (i)) +extern lck_grp_t default_pager_lck_grp; +extern lck_attr_t default_pager_lck_attr; + +#define VSL_LOCK_INIT() lck_mtx_init(&vstruct_list.vsl_lock, &default_pager_lck_grp, &default_pager_lck_attr) +#define VSL_LOCK() lck_mtx_lock(&vstruct_list.vsl_lock) +#define VSL_LOCK_TRY() lck_mtx_try_lock(&vstruct_list.vsl_lock) +#define VSL_UNLOCK() lck_mtx_unlock(&vstruct_list.vsl_lock) +#define VSL_SLEEP(e,i) lck_mtx_sleep(&vstruct_list.vsl_lock, LCK_SLEEP_DEFAULT, (e), (i)) #ifdef MACH_KERNEL __private_extern__ zone_t vstruct_zone; @@ -788,15 +731,15 @@ extern void default_pager_no_senders(memory_object_t, extern int local_log2(unsigned int); extern void bs_initialize(void); -extern void bs_global_info(vm_size_t *, - vm_size_t *); +extern void bs_global_info(uint64_t *, + uint64_t *); extern boolean_t bs_add_device(char *, MACH_PORT_FACE); -extern vstruct_t ps_vstruct_create(vm_size_t); +extern vstruct_t ps_vstruct_create(dp_size_t); extern void ps_vstruct_dealloc(vstruct_t); extern kern_return_t pvs_cluster_read(vstruct_t, - vm_offset_t, - vm_size_t, + dp_offset_t, + dp_size_t, void *); extern kern_return_t vs_cluster_write(vstruct_t, upl_t, @@ -804,16 +747,16 @@ extern kern_return_t vs_cluster_write(vstruct_t, upl_size_t, boolean_t, int); -extern vm_offset_t ps_clmap(vstruct_t, - vm_offset_t, +extern dp_offset_t ps_clmap(vstruct_t, + dp_offset_t, struct clmap *, int, - vm_size_t, + dp_size_t, int); extern vm_size_t ps_vstruct_allocated_size(vstruct_t); -extern size_t ps_vstruct_allocated_pages(vstruct_t, +extern unsigned int ps_vstruct_allocated_pages(vstruct_t, default_pager_page_t *, - size_t); + unsigned int); extern boolean_t bs_set_default_clsize(unsigned int); extern boolean_t verbose; diff --git a/osfmk/default_pager/default_pager_types.h b/osfmk/default_pager/default_pager_types.h index 2c37e4b04..8bd35dc36 100644 --- a/osfmk/default_pager/default_pager_types.h +++ b/osfmk/default_pager/default_pager_types.h @@ -110,6 +110,11 @@ typedef default_pager_page_t *default_pager_page_array_t; #define LO_WAT_ALERT 0x02 #define SWAP_ENCRYPT_ON 0x04 #define SWAP_ENCRYPT_OFF 0x08 +#define SWAP_COMPACT_DISABLE 0x10 +#define SWAP_COMPACT_ENABLE 0x20 +#define PROC_RESUME 0x40 +#define SWAP_FILE_CREATION_ERROR 0x80 +#define USE_EMERGENCY_SWAP_FILE_FIRST 0x100 #endif /* __APPLE_API_UNSTABLE */ diff --git a/osfmk/default_pager/diag.h b/osfmk/default_pager/diag.h index ad39f3b0e..de307fdc5 100644 --- a/osfmk/default_pager/diag.h +++ b/osfmk/default_pager/diag.h @@ -54,9 +54,14 @@ #define VSTATS_UNLOCK(l) #define VSTATS_LOCK_INIT(l) #else -#define VSTATS_LOCK_DECL(name) struct mutex name; -#define VSTATS_LOCK(l) mutex_lock(l) -#define VSTATS_UNLOCK(l) mutex_unlock(l) -#define VSTATS_LOCK_INIT(l) mutex_init(l) + +extern lck_grp_t default_pager_lck_grp; +extern lck_attr_t default_pager_lck_attr; + + +#define VSTATS_LOCK_DECL(name) struct lck_mtx_t name; +#define VSTATS_LOCK(l) lck_mtx_lock(l) +#define VSTATS_UNLOCK(l) lck_mtx_unlock(l) +#define VSTATS_LOCK_INIT(l) lck_mtx_init(l, &default_pager_lck_grp, &default_pager_lck_attr ) #endif /* VAGUE_STATS */ diff --git a/osfmk/default_pager/dp_backing_store.c b/osfmk/default_pager/dp_backing_store.c index 5dfcf6952..33aa79117 100644 --- a/osfmk/default_pager/dp_backing_store.c +++ b/osfmk/default_pager/dp_backing_store.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2006 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2008 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -84,7 +84,7 @@ #include -/* LP64todo - need large internal object support */ +/* todo - need large internal object support */ /* * ALLOC_STRIDE... the maximum number of bytes allocated from @@ -133,7 +133,7 @@ int async_requests_out; #define VS_ASYNC_REUSE 1 struct vs_async *vs_async_free_list; -mutex_t default_pager_async_lock; /* Protects globals above */ +lck_mtx_t default_pager_async_lock; /* Protects globals above */ int vs_alloc_async_failed = 0; /* statistics */ @@ -145,22 +145,25 @@ void vs_free_async(struct vs_async *vsa); /* forward */ #define VS_ALLOC_ASYNC() vs_alloc_async() #define VS_FREE_ASYNC(vsa) vs_free_async(vsa) -#define VS_ASYNC_LOCK() mutex_lock(&default_pager_async_lock) -#define VS_ASYNC_UNLOCK() mutex_unlock(&default_pager_async_lock) -#define VS_ASYNC_LOCK_INIT() mutex_init(&default_pager_async_lock, 0) +#define VS_ASYNC_LOCK() lck_mtx_lock(&default_pager_async_lock) +#define VS_ASYNC_UNLOCK() lck_mtx_unlock(&default_pager_async_lock) +#define VS_ASYNC_LOCK_INIT() lck_mtx_init(&default_pager_async_lock, &default_pager_lck_grp, &default_pager_lck_attr) #define VS_ASYNC_LOCK_ADDR() (&default_pager_async_lock) /* * Paging Space Hysteresis triggers and the target notification port * */ - +unsigned int dp_pages_free_drift_count = 0; +unsigned int dp_pages_free_drifted_max = 0; unsigned int minimum_pages_remaining = 0; unsigned int maximum_pages_free = 0; ipc_port_t min_pages_trigger_port = NULL; ipc_port_t max_pages_trigger_port = NULL; +boolean_t use_emergency_swap_file_first = FALSE; boolean_t bs_low = FALSE; int backing_store_release_trigger_disable = 0; +boolean_t backing_store_stop_compaction = FALSE; /* Have we decided if swap needs to be encrypted yet ? */ @@ -178,9 +181,10 @@ vm_size_t max_doubled_size = 4 * 1024 * 1024; /* 4 meg */ /* * List of all backing store and segments. */ +MACH_PORT_FACE emergency_segment_backing_store; struct backing_store_list_head backing_store_list; paging_segment_t paging_segments[MAX_NUM_PAGING_SEGMENTS]; -mutex_t paging_segments_lock; +lck_mtx_t paging_segments_lock; int paging_segment_max = 0; int paging_segment_count = 0; int ps_select_array[BS_MAXPRI+1] = { -1,-1,-1,-1,-1 }; @@ -193,27 +197,30 @@ int ps_select_array[BS_MAXPRI+1] = { -1,-1,-1,-1,-1 }; * likely to be deprecated. */ unsigned int dp_pages_free = 0; +unsigned int dp_pages_reserve = 0; unsigned int cluster_transfer_minimum = 100; /* forward declarations */ -kern_return_t ps_write_file(paging_segment_t, upl_t, upl_offset_t, vm_offset_t, unsigned int, int); /* forward */ -kern_return_t ps_read_file (paging_segment_t, upl_t, upl_offset_t, vm_offset_t, unsigned int, unsigned int *, int); /* forward */ +kern_return_t ps_write_file(paging_segment_t, upl_t, upl_offset_t, dp_offset_t, unsigned int, int); /* forward */ +kern_return_t ps_read_file (paging_segment_t, upl_t, upl_offset_t, dp_offset_t, unsigned int, unsigned int *, int); /* forward */ default_pager_thread_t *get_read_buffer( void ); kern_return_t ps_vstruct_transfer_from_segment( vstruct_t vs, paging_segment_t segment, upl_t upl); -kern_return_t ps_read_device(paging_segment_t, vm_offset_t, vm_offset_t *, unsigned int, unsigned int *, int); /* forward */ -kern_return_t ps_write_device(paging_segment_t, vm_offset_t, vm_offset_t, unsigned int, struct vs_async *); /* forward */ +kern_return_t ps_read_device(paging_segment_t, dp_offset_t, vm_offset_t *, unsigned int, unsigned int *, int); /* forward */ +kern_return_t ps_write_device(paging_segment_t, dp_offset_t, vm_offset_t, unsigned int, struct vs_async *); /* forward */ kern_return_t vs_cluster_transfer( vstruct_t vs, - upl_offset_t offset, - upl_size_t cnt, + dp_offset_t offset, + dp_size_t cnt, upl_t upl); vs_map_t vs_get_map_entry( vstruct_t vs, - vm_offset_t offset); + dp_offset_t offset); +kern_return_t +default_pager_backing_store_delete_internal( MACH_PORT_FACE ); default_pager_thread_t * get_read_buffer( void ) @@ -349,10 +356,10 @@ int default_pager_info_verbose = 1; void bs_global_info( - vm_size_t *totalp, - vm_size_t *freep) + uint64_t *totalp, + uint64_t *freep) { - vm_size_t pages_total, pages_free; + uint64_t pages_total, pages_free; paging_segment_t ps; int i; @@ -571,7 +578,7 @@ default_pager_backing_store_create( if(alias_struct != NULL) { alias_struct->vs = (struct vstruct *)bs; alias_struct->name = &default_pager_ops; - port->alias = (int) alias_struct; + port->alias = (uintptr_t) alias_struct; } else { ipc_port_dealloc_kernel((MACH_PORT_FACE)(port)); @@ -660,6 +667,7 @@ default_pager_backing_store_info( } int ps_delete(paging_segment_t); /* forward */ +boolean_t current_thread_aborted(void); int ps_delete( @@ -741,7 +749,7 @@ ps_delete( } vm_object_deallocate(transfer_object); } - if(error) { + if(error || current_thread_aborted() || backing_store_stop_compaction) { VS_LOCK(vs); vs->vs_async_pending -= 1; /* release vs_async_wait */ if (vs->vs_async_pending == 0 && vs->vs_waiting_async) { @@ -792,7 +800,7 @@ ps_delete( kern_return_t -default_pager_backing_store_delete( +default_pager_backing_store_delete_internal( MACH_PORT_FACE backing_store) { backing_store_t bs; @@ -800,28 +808,35 @@ default_pager_backing_store_delete( paging_segment_t ps; int error; int interim_pages_removed = 0; -// kern_return_t kr; + boolean_t dealing_with_emergency_segment = ( backing_store == emergency_segment_backing_store ); if ((bs = backing_store_lookup(backing_store)) == BACKING_STORE_NULL) return KERN_INVALID_ARGUMENT; -#if 0 - /* not implemented */ - BS_UNLOCK(bs); - return KERN_FAILURE; -#endif - - restart: +restart: PSL_LOCK(); error = KERN_SUCCESS; for (i = 0; i <= paging_segment_max; i++) { ps = paging_segments[i]; if (ps != PAGING_SEGMENT_NULL && ps->ps_bs == bs && - ! ps->ps_going_away) { + ! IS_PS_GOING_AWAY(ps)) { PS_LOCK(ps); + + if( IS_PS_GOING_AWAY(ps) || !IS_PS_OK_TO_USE(ps)) { + /* + * Someone is already busy reclamining this paging segment. + * If it's the emergency segment we are looking at then check + * that someone has not already recovered it and set the right + * state i.e. online but not activated. + */ + PS_UNLOCK(ps); + continue; + } + /* disable access to this segment */ - ps->ps_going_away = TRUE; + ps->ps_state &= ~PS_CAN_USE; + ps->ps_state |= PS_GOING_AWAY; PS_UNLOCK(ps); /* * The "ps" segment is "off-line" now, @@ -862,10 +877,26 @@ default_pager_backing_store_delete( ps = paging_segments[i]; if (ps != PAGING_SEGMENT_NULL && ps->ps_bs == bs && - ps->ps_going_away) { + IS_PS_GOING_AWAY(ps)) { PS_LOCK(ps); + + if( !IS_PS_GOING_AWAY(ps)) { + PS_UNLOCK(ps); + continue; + } + /* Handle the special clusters that came in while we let go the lock*/ + if( ps->ps_special_clusters) { + dp_pages_free += ps->ps_special_clusters << ps->ps_clshift; + ps->ps_pgcount += ps->ps_special_clusters << ps->ps_clshift; + ps->ps_clcount += ps->ps_special_clusters; + if ( ps_select_array[ps->ps_bs->bs_priority] == BS_FULLPRI) { + ps_select_array[ps->ps_bs->bs_priority] = 0; + } + ps->ps_special_clusters = 0; + } /* re-enable access to this segment */ - ps->ps_going_away = FALSE; + ps->ps_state &= ~PS_GOING_AWAY; + ps->ps_state |= PS_CAN_USE; PS_UNLOCK(ps); } } @@ -879,12 +910,22 @@ default_pager_backing_store_delete( ps = paging_segments[i]; if (ps != PAGING_SEGMENT_NULL && ps->ps_bs == bs) { - if(ps->ps_going_away) { - paging_segments[i] = PAGING_SEGMENT_NULL; - paging_segment_count--; - PS_LOCK(ps); - kfree(ps->ps_bmap, RMAPSIZE(ps->ps_ncls)); - kfree(ps, sizeof *ps); + if(IS_PS_GOING_AWAY(ps)) { + if(IS_PS_EMERGENCY_SEGMENT(ps)) { + PS_LOCK(ps); + ps->ps_state &= ~PS_GOING_AWAY; + ps->ps_special_clusters = 0; + ps->ps_pgcount = ps->ps_pgnum; + ps->ps_clcount = ps->ps_ncls = ps->ps_pgcount >> ps->ps_clshift; + PS_UNLOCK(ps); + dp_pages_reserve += interim_pages_removed; + } else { + paging_segments[i] = PAGING_SEGMENT_NULL; + paging_segment_count--; + PS_LOCK(ps); + kfree(ps->ps_bmap, RMAPSIZE(ps->ps_ncls)); + kfree(ps, sizeof *ps); + } } } } @@ -898,6 +939,11 @@ default_pager_backing_store_delete( PSL_UNLOCK(); + if( dealing_with_emergency_segment ) { + BS_UNLOCK(bs); + return KERN_SUCCESS; + } + /* * All the segments have been deleted. * We can remove the backing store. @@ -929,6 +975,16 @@ default_pager_backing_store_delete( return KERN_SUCCESS; } +kern_return_t +default_pager_backing_store_delete( + MACH_PORT_FACE backing_store) +{ + if( backing_store != emergency_segment_backing_store ) { + default_pager_backing_store_delete_internal(emergency_segment_backing_store); + } + return(default_pager_backing_store_delete_internal(backing_store)); +} + int ps_enter(paging_segment_t); /* forward */ int @@ -1031,7 +1087,15 @@ default_pager_add_segment( clrbit(ps->ps_bmap, i); } - ps->ps_going_away = FALSE; + if(paging_segment_count == 0) { + ps->ps_state = PS_EMERGENCY_SEGMENT; + if(use_emergency_swap_file_first) { + ps->ps_state |= PS_CAN_USE; + } + } else { + ps->ps_state = PS_CAN_USE; + } + ps->ps_bs = bs; if ((error = ps_enter(ps)) != 0) { @@ -1046,7 +1110,11 @@ default_pager_add_segment( BS_UNLOCK(bs); PSL_LOCK(); - dp_pages_free += ps->ps_pgcount; + if(IS_PS_OK_TO_USE(ps)) { + dp_pages_free += ps->ps_pgcount; + } else { + dp_pages_reserve += ps->ps_pgcount; + } PSL_UNLOCK(); bs_more_space(ps->ps_clcount); @@ -1129,7 +1197,7 @@ vs_alloc_async(void) if(alias_struct != NULL) { alias_struct->vs = (struct vstruct *)vsa; alias_struct->name = &default_pager_ops; - reply_port->alias = (int) alias_struct; + reply_port->alias = (uintptr_t) alias_struct; vsa->reply_port = reply_port; vs_alloc_async_count++; } @@ -1221,7 +1289,7 @@ zone_t vstruct_zone; vstruct_t ps_vstruct_create( - vm_size_t size) + dp_size_t size) { vstruct_t vs; unsigned int i; @@ -1241,18 +1309,10 @@ ps_vstruct_create( vs->vs_references = 1; vs->vs_seqno = 0; -#ifdef MACH_KERNEL vs->vs_waiting_seqno = FALSE; vs->vs_waiting_read = FALSE; vs->vs_waiting_write = FALSE; vs->vs_waiting_async = FALSE; -#else - mutex_init(&vs->vs_waiting_seqno, 0); - mutex_init(&vs->vs_waiting_read, 0); - mutex_init(&vs->vs_waiting_write, 0); - mutex_init(&vs->vs_waiting_refs, 0); - mutex_init(&vs->vs_waiting_async, 0); -#endif vs->vs_readers = 0; vs->vs_writers = 0; @@ -1323,20 +1383,21 @@ ps_select_segment( PSL_LOCK(); if (paging_segment_count == 1) { - paging_segment_t lps; /* used to avoid extra PS_UNLOCK */ + paging_segment_t lps = PAGING_SEGMENT_NULL; /* used to avoid extra PS_UNLOCK */ ipc_port_t trigger = IP_NULL; ps = paging_segments[paging_segment_max]; *psindex = paging_segment_max; PS_LOCK(ps); - if (ps->ps_going_away) { - /* this segment is being turned off */ - lps = PAGING_SEGMENT_NULL; - } else { - ASSERT(ps->ps_clshift >= shift); + if( !IS_PS_EMERGENCY_SEGMENT(ps) ) { + panic("Emergency paging segment missing\n"); + } + ASSERT(ps->ps_clshift >= shift); + if(IS_PS_OK_TO_USE(ps)) { if (ps->ps_clcount) { ps->ps_clcount--; dp_pages_free -= 1 << ps->ps_clshift; + ps->ps_pgcount -= 1 << ps->ps_clshift; if(min_pages_trigger_port && (dp_pages_free < minimum_pages_remaining)) { trigger = min_pages_trigger_port; @@ -1344,10 +1405,21 @@ ps_select_segment( bs_low = TRUE; } lps = ps; - } else - lps = PAGING_SEGMENT_NULL; - } + } + } PS_UNLOCK(ps); + + if( lps == PAGING_SEGMENT_NULL ) { + if(dp_pages_free) { + dp_pages_free_drift_count++; + if(dp_pages_free > dp_pages_free_drifted_max) { + dp_pages_free_drifted_max = dp_pages_free; + } + dprintf(("Emergency swap segment:dp_pages_free before zeroing out: %d\n",dp_pages_free)); + } + dp_pages_free = 0; + } + PSL_UNLOCK(); if (trigger != IP_NULL) { @@ -1358,6 +1430,14 @@ ps_select_segment( } if (paging_segment_count == 0) { + if(dp_pages_free) { + dp_pages_free_drift_count++; + if(dp_pages_free > dp_pages_free_drifted_max) { + dp_pages_free_drifted_max = dp_pages_free; + } + dprintf(("No paging segments:dp_pages_free before zeroing out: %d\n",dp_pages_free)); + } + dp_pages_free = 0; PSL_UNLOCK(); return PAGING_SEGMENT_NULL; } @@ -1399,35 +1479,36 @@ ps_select_segment( * >= that of the vstruct. */ PS_LOCK(ps); - if (ps->ps_going_away) { - /* this segment is being turned off */ - } else if ((ps->ps_clcount) && - (ps->ps_clshift >= shift)) { - ipc_port_t trigger = IP_NULL; - - ps->ps_clcount--; - dp_pages_free -= 1 << ps->ps_clshift; - if(min_pages_trigger_port && - (dp_pages_free < - minimum_pages_remaining)) { - trigger = min_pages_trigger_port; - min_pages_trigger_port = NULL; - } - PS_UNLOCK(ps); - /* - * found one, quit looking. - */ - ps_select_array[i] = j; - PSL_UNLOCK(); - - if (trigger != IP_NULL) { - default_pager_space_alert( - trigger, - HI_WAT_ALERT); - ipc_port_release_send(trigger); + if (IS_PS_OK_TO_USE(ps)) { + if ((ps->ps_clcount) && + (ps->ps_clshift >= shift)) { + ipc_port_t trigger = IP_NULL; + + ps->ps_clcount--; + dp_pages_free -= 1 << ps->ps_clshift; + ps->ps_pgcount -= 1 << ps->ps_clshift; + if(min_pages_trigger_port && + (dp_pages_free < + minimum_pages_remaining)) { + trigger = min_pages_trigger_port; + min_pages_trigger_port = NULL; + } + PS_UNLOCK(ps); + /* + * found one, quit looking. + */ + ps_select_array[i] = j; + PSL_UNLOCK(); + + if (trigger != IP_NULL) { + default_pager_space_alert( + trigger, + HI_WAT_ALERT); + ipc_port_release_send(trigger); + } + *psindex = j; + return ps; } - *psindex = j; - return ps; } PS_UNLOCK(ps); } @@ -1441,13 +1522,22 @@ ps_select_segment( j++; } } + + if(dp_pages_free) { + dp_pages_free_drift_count++; + if(dp_pages_free > dp_pages_free_drifted_max) { + dp_pages_free_drifted_max = dp_pages_free; + } + dprintf(("%d Paging Segments: dp_pages_free before zeroing out: %d\n",paging_segment_count,dp_pages_free)); + } + dp_pages_free = 0; PSL_UNLOCK(); return PAGING_SEGMENT_NULL; } -vm_offset_t ps_allocate_cluster(vstruct_t, int *, paging_segment_t); /*forward*/ +dp_offset_t ps_allocate_cluster(vstruct_t, int *, paging_segment_t); /*forward*/ -vm_offset_t +dp_offset_t ps_allocate_cluster( vstruct_t vs, int *psindex, @@ -1456,7 +1546,7 @@ ps_allocate_cluster( unsigned int byte_num; int bit_num = 0; paging_segment_t ps; - vm_offset_t cluster; + dp_offset_t cluster; ipc_port_t trigger = IP_NULL; /* @@ -1482,6 +1572,7 @@ ps_allocate_cluster( * This and the ordering of the paging segment "going_away" bit setting * protects us. */ +retry: if (use_ps != PAGING_SEGMENT_NULL) { ps = use_ps; PSL_LOCK(); @@ -1491,6 +1582,7 @@ ps_allocate_cluster( ps->ps_clcount--; dp_pages_free -= 1 << ps->ps_clshift; + ps->ps_pgcount -= 1 << ps->ps_clshift; if(min_pages_trigger_port && (dp_pages_free < minimum_pages_remaining)) { trigger = min_pages_trigger_port; @@ -1505,9 +1597,70 @@ ps_allocate_cluster( } else if ((ps = ps_select_segment(vs->vs_clshift, psindex)) == PAGING_SEGMENT_NULL) { - static uint32_t lastnotify = 0; - uint32_t now, nanoseconds_dummy; + static clock_sec_t lastnotify = 0; + clock_sec_t now; + clock_nsec_t nanoseconds_dummy; + + /* + * Don't immediately jump to the emergency segment. Give the + * dynamic pager a chance to create it's first normal swap file. + * Unless, of course the very first normal swap file can't be + * created due to some problem and we didn't expect that problem + * i.e. use_emergency_swap_file_first was never set to true initially. + * It then gets set in the swap file creation error handling. + */ + if(paging_segment_count > 1 || use_emergency_swap_file_first == TRUE) { + + ps = paging_segments[EMERGENCY_PSEG_INDEX]; + if(IS_PS_EMERGENCY_SEGMENT(ps) && !IS_PS_GOING_AWAY(ps)) { + PSL_LOCK(); + PS_LOCK(ps); + + if(IS_PS_GOING_AWAY(ps)) { + /* Someone de-activated the emergency paging segment*/ + PS_UNLOCK(ps); + PSL_UNLOCK(); + } else if(dp_pages_free) { + /* + * Someone has already activated the emergency paging segment + * OR + * Between us having rec'd a NULL segment from ps_select_segment + * and reaching here a new normal segment could have been added. + * E.g. we get NULL segment and another thread just added the + * new swap file. Hence check to see if we have more dp_pages_free + * before activating the emergency segment. + */ + PS_UNLOCK(ps); + PSL_UNLOCK(); + goto retry; + + } else if(!IS_PS_OK_TO_USE(ps) && ps->ps_clcount) { + /* + * PS_CAN_USE is only reset from the emergency segment when it's + * been successfully recovered. So it's legal to have an emergency + * segment that has PS_CAN_USE but no clusters because it's recovery + * failed. + */ + backing_store_t bs = ps->ps_bs; + ps->ps_state |= PS_CAN_USE; + if(ps_select_array[bs->bs_priority] == BS_FULLPRI || + ps_select_array[bs->bs_priority] == BS_NOPRI) { + ps_select_array[bs->bs_priority] = 0; + } + dp_pages_free += ps->ps_pgcount; + dp_pages_reserve -= ps->ps_pgcount; + PS_UNLOCK(ps); + PSL_UNLOCK(); + dprintf(("Switching ON Emergency paging segment\n")); + goto retry; + } + + PS_UNLOCK(ps); + PSL_UNLOCK(); + } + } + /* * Emit a notification of the low-paging resource condition * but don't issue it more than once every five seconds. This @@ -1515,14 +1668,17 @@ ps_allocate_cluster( * repetitions of the message. */ clock_get_system_nanotime(&now, &nanoseconds_dummy); - if (now > lastnotify + 5) { - dprintf(("no space in available paging segments\n")); + if (paging_segment_count > 1 && (now > lastnotify + 5)) { + /* With an activated emergency paging segment we still + * didn't get any clusters. This could mean that the + * emergency paging segment is exhausted. + */ + dprintf(("System is out of paging space.\n")); lastnotify = now; } - /* the count got off maybe, reset to zero */ PSL_LOCK(); - dp_pages_free = 0; + if(min_pages_trigger_port) { trigger = min_pages_trigger_port; min_pages_trigger_port = NULL; @@ -1533,7 +1689,7 @@ ps_allocate_cluster( default_pager_space_alert(trigger, HI_WAT_ALERT); ipc_port_release_send(trigger); } - return (vm_offset_t) -1; + return (dp_offset_t) -1; } /* @@ -1565,15 +1721,15 @@ ps_allocate_cluster( return cluster; } -void ps_deallocate_cluster(paging_segment_t, vm_offset_t); /* forward */ +void ps_deallocate_cluster(paging_segment_t, dp_offset_t); /* forward */ void ps_deallocate_cluster( paging_segment_t ps, - vm_offset_t cluster) + dp_offset_t cluster) { - if (cluster >= (vm_offset_t) ps->ps_ncls) + if (cluster >= ps->ps_ncls) panic("ps_deallocate_cluster: Invalid cluster number"); /* @@ -1583,9 +1739,13 @@ ps_deallocate_cluster( PSL_LOCK(); PS_LOCK(ps); clrbit(ps->ps_bmap, cluster); - ++ps->ps_clcount; - dp_pages_free += 1 << ps->ps_clshift; - PSL_UNLOCK(); + if( IS_PS_OK_TO_USE(ps)) { + ++ps->ps_clcount; + ps->ps_pgcount += 1 << ps->ps_clshift; + dp_pages_free += 1 << ps->ps_clshift; + } else { + ps->ps_special_clusters += 1; + } /* * Move the hint down to the freed cluster if it is @@ -1595,25 +1755,24 @@ ps_deallocate_cluster( ps->ps_hint = (cluster/NBBY); } - PS_UNLOCK(ps); /* * If we're freeing space on a full priority, reset the array. */ - PSL_LOCK(); - if (ps_select_array[ps->ps_bs->bs_priority] == BS_FULLPRI) + if ( IS_PS_OK_TO_USE(ps) && ps_select_array[ps->ps_bs->bs_priority] == BS_FULLPRI) ps_select_array[ps->ps_bs->bs_priority] = 0; + PS_UNLOCK(ps); PSL_UNLOCK(); return; } -void ps_dealloc_vsmap(struct vs_map *, vm_size_t); /* forward */ +void ps_dealloc_vsmap(struct vs_map *, dp_size_t); /* forward */ void ps_dealloc_vsmap( struct vs_map *vsmap, - vm_size_t size) + dp_size_t size) { unsigned int i; for (i = 0; i < size; i++) @@ -1773,18 +1932,18 @@ int ps_map_extend( return 0; } -vm_offset_t +dp_offset_t ps_clmap( vstruct_t vs, - vm_offset_t offset, + dp_offset_t offset, struct clmap *clmap, int flag, - vm_size_t size, + dp_size_t size, int error) { - vm_offset_t cluster; /* The cluster of offset. */ - vm_offset_t newcl; /* The new cluster allocated. */ - vm_offset_t newoff; + dp_offset_t cluster; /* The cluster of offset. */ + dp_offset_t newcl; /* The new cluster allocated. */ + dp_offset_t newoff; unsigned int i; struct vs_map *vsmap; @@ -1805,11 +1964,11 @@ ps_clmap( if (flag == CL_FIND) { /* Do not allocate if just doing a lookup */ VS_MAP_UNLOCK(vs); - return (vm_offset_t) -1; + return (dp_offset_t) -1; } if (ps_map_extend(vs, cluster + 1)) { VS_MAP_UNLOCK(vs); - return (vm_offset_t) -1; + return (dp_offset_t) -1; } } @@ -1831,14 +1990,14 @@ ps_clmap( if (vsmap == NULL) { if (flag == CL_FIND) { VS_MAP_UNLOCK(vs); - return (vm_offset_t) -1; + return (dp_offset_t) -1; } /* Allocate the indirect block */ vsmap = (struct vs_map *) kalloc(CLMAP_THRESHOLD); if (vsmap == NULL) { VS_MAP_UNLOCK(vs); - return (vm_offset_t) -1; + return (dp_offset_t) -1; } /* Initialize the cluster offsets */ for (i = 0; i < CLMAP_ENTRIES; i++) @@ -1862,7 +2021,7 @@ ps_clmap( if (VSM_ISERR(*vsmap)) { clmap->cl_error = VSM_GETERR(*vsmap); VS_MAP_UNLOCK(vs); - return (vm_offset_t) -1; + return (dp_offset_t) -1; } else if (VSM_ISCLR(*vsmap)) { int psindex; @@ -1876,16 +2035,16 @@ ps_clmap( VSM_SETERR(*vsmap, error); } VS_MAP_UNLOCK(vs); - return (vm_offset_t) -1; + return (dp_offset_t) -1; } else { /* * Attempt to allocate a cluster from the paging segment */ newcl = ps_allocate_cluster(vs, &psindex, PAGING_SEGMENT_NULL); - if (newcl == (vm_offset_t) -1) { + if (newcl == (dp_offset_t) -1) { VS_MAP_UNLOCK(vs); - return (vm_offset_t) -1; + return (dp_offset_t) -1; } VSM_CLR(*vsmap); VSM_SETCLOFF(*vsmap, newcl); @@ -1944,7 +2103,7 @@ ps_clmap( * entire cluster is in error. */ if (size && flag == CL_FIND) { - vm_offset_t off = (vm_offset_t) 0; + dp_offset_t off = (dp_offset_t) 0; if (!error) { for (i = VSCLSIZE(vs) - clmap->cl_numpages; size > 0; @@ -1964,10 +2123,10 @@ ps_clmap( * Deallocate cluster if error, and no valid pages * already present. */ - if (off != (vm_offset_t) 0) + if (off != (dp_offset_t) 0) ps_deallocate_cluster(clmap->cl_ps, off); VS_MAP_UNLOCK(vs); - return (vm_offset_t) 0; + return (dp_offset_t) 0; } else VS_MAP_UNLOCK(vs); @@ -1982,15 +2141,15 @@ ps_clmap( return (newcl + newoff); } -void ps_clunmap(vstruct_t, vm_offset_t, vm_size_t); /* forward */ +void ps_clunmap(vstruct_t, dp_offset_t, dp_size_t); /* forward */ void ps_clunmap( vstruct_t vs, - vm_offset_t offset, - vm_size_t length) + dp_offset_t offset, + dp_size_t length) { - vm_offset_t cluster; /* The cluster number of offset */ + dp_offset_t cluster; /* The cluster number of offset */ struct vs_map *vsmap; VS_MAP_LOCK(vs); @@ -2000,7 +2159,7 @@ ps_clunmap( * clusters and map entries as encountered. */ while (length > 0) { - vm_offset_t newoff; + dp_offset_t newoff; unsigned int i; cluster = atop_32(offset) >> vs->vs_clshift; @@ -2052,13 +2211,13 @@ ps_clunmap( VS_MAP_UNLOCK(vs); } -void ps_vs_write_complete(vstruct_t, vm_offset_t, vm_size_t, int); /* forward */ +void ps_vs_write_complete(vstruct_t, dp_offset_t, dp_size_t, int); /* forward */ void ps_vs_write_complete( vstruct_t vs, - vm_offset_t offset, - vm_size_t size, + dp_offset_t offset, + dp_size_t size, int error) { struct clmap clmap; @@ -2076,17 +2235,17 @@ ps_vs_write_complete( (void) ps_clmap(vs, offset, &clmap, CL_FIND, size, error); } -void vs_cl_write_complete(vstruct_t, paging_segment_t, vm_offset_t, vm_offset_t, vm_size_t, boolean_t, int); /* forward */ +void vs_cl_write_complete(vstruct_t, paging_segment_t, dp_offset_t, vm_offset_t, dp_size_t, boolean_t, int); /* forward */ void vs_cl_write_complete( - vstruct_t vs, + vstruct_t vs, __unused paging_segment_t ps, - vm_offset_t offset, + dp_offset_t offset, __unused vm_offset_t addr, - vm_size_t size, - boolean_t async, - int error) + dp_size_t size, + boolean_t async, + int error) { // kern_return_t kr; @@ -2112,7 +2271,6 @@ vs_cl_write_complete( if (vs->vs_async_pending == 0 && vs->vs_waiting_async) { vs->vs_waiting_async = FALSE; VS_UNLOCK(vs); - /* mutex_unlock(&vs->vs_waiting_async); */ thread_wakeup(&vs->vs_async_pending); } else { VS_UNLOCK(vs); @@ -2186,7 +2344,7 @@ device_read_reply( vsa->vsa_addr = (vm_offset_t)data; vsa->vsa_size = (vm_size_t)dataCnt; vsa->vsa_error = return_code; - thread_wakeup(&vsa->vsa_lock); + thread_wakeup(&vsa); return KERN_SUCCESS; } @@ -2227,7 +2385,7 @@ device_open_reply( kern_return_t ps_read_device( paging_segment_t ps, - vm_offset_t offset, + dp_offset_t offset, vm_offset_t *bufferp, unsigned int size, unsigned int *residualp, @@ -2242,7 +2400,6 @@ ps_read_device( vm_offset_t buf_ptr; unsigned int records_read; struct vs_async *vsa; - mutex_t vs_waiting_read_reply; device_t device; vm_map_copy_t device_data = NULL; @@ -2266,7 +2423,6 @@ ps_read_device( vsa->vsa_size = 0; vsa->vsa_ps = NULL; } - mutex_init(&vsa->vsa_lock, 0); ip_lock(vsa->reply_port); vsa->reply_port->ip_sorights++; ip_reference(vsa->reply_port); @@ -2282,7 +2438,7 @@ ps_read_device( (io_buf_ptr_t *) &dev_buffer, (mach_msg_type_number_t *) &bytes_read); if(kr == MIG_NO_REPLY) { - assert_wait(&vsa->vsa_lock, THREAD_UNINT); + assert_wait(&vsa, THREAD_UNINT); thread_block(THREAD_CONTINUE_NULL); dev_buffer = vsa->vsa_addr; @@ -2366,7 +2522,7 @@ ps_read_device( kern_return_t ps_write_device( paging_segment_t ps, - vm_offset_t offset, + dp_offset_t offset, vm_offset_t addr, unsigned int size, struct vs_async *vsa) @@ -2469,7 +2625,7 @@ ps_write_device( kern_return_t ps_read_device( __unused paging_segment_t ps, - __unused vm_offset_t offset, + __unused dp_offset_t offset, __unused vm_offset_t *bufferp, __unused unsigned int size, __unused unsigned int *residualp, @@ -2482,7 +2638,7 @@ ps_read_device( kern_return_t ps_write_device( __unused paging_segment_t ps, - __unused vm_offset_t offset, + __unused dp_offset_t offset, __unused vm_offset_t addr, __unused unsigned int size, __unused struct vs_async *vsa) @@ -2522,27 +2678,30 @@ static vm_size_t last_length; kern_return_t pvs_cluster_read( vstruct_t vs, - vm_offset_t vs_offset, - vm_size_t cnt, + dp_offset_t vs_offset, + dp_size_t cnt, void *fault_info) { kern_return_t error = KERN_SUCCESS; unsigned int size; unsigned int residual; unsigned int request_flags; + int io_flags = 0; int seg_index; int pages_in_cl; int cl_size; int cl_mask; int cl_index; unsigned int xfer_size; - vm_offset_t orig_vs_offset; - vm_offset_t ps_offset[(VM_SUPER_CLUSTER / PAGE_SIZE) >> VSTRUCT_DEF_CLSHIFT]; + dp_offset_t orig_vs_offset; + dp_offset_t ps_offset[(VM_SUPER_CLUSTER / PAGE_SIZE) >> VSTRUCT_DEF_CLSHIFT]; paging_segment_t psp[(VM_SUPER_CLUSTER / PAGE_SIZE) >> VSTRUCT_DEF_CLSHIFT]; struct clmap clmap; upl_t upl; unsigned int page_list_count; - memory_object_offset_t start; + memory_object_offset_t cluster_start; + vm_size_t cluster_length; + uint32_t io_streaming; pages_in_cl = 1 << vs->vs_clshift; cl_size = pages_in_cl * vm_page_size; @@ -2555,7 +2714,7 @@ pvs_cluster_read( #endif cl_index = (vs_offset & cl_mask) / vm_page_size; - if ((ps_clmap(vs, vs_offset & ~cl_mask, &clmap, CL_FIND, 0, 0) == (vm_offset_t)-1) || + if ((ps_clmap(vs, vs_offset & ~cl_mask, &clmap, CL_FIND, 0, 0) == (dp_offset_t)-1) || !CLMAP_ISSET(clmap, cl_index)) { /* * the needed page doesn't exist in the backing store... @@ -2609,22 +2768,30 @@ pvs_cluster_read( } orig_vs_offset = vs_offset; - start = (memory_object_offset_t)vs_offset; assert(cnt != 0); cnt = VM_SUPER_CLUSTER; + cluster_start = (memory_object_offset_t) vs_offset; + cluster_length = (vm_size_t) cnt; + io_streaming = 0; /* * determine how big a speculative I/O we should try for... */ - if (memory_object_cluster_size(vs->vs_control, &start, &cnt, (memory_object_fault_info_t)fault_info) == KERN_SUCCESS) { - assert(vs_offset >= (vm_offset_t) start && - vs_offset < (vm_offset_t) (start + cnt)); - vs_offset = (vm_offset_t)start; - } else + if (memory_object_cluster_size(vs->vs_control, &cluster_start, &cluster_length, &io_streaming, (memory_object_fault_info_t)fault_info) == KERN_SUCCESS) { + assert(vs_offset >= (dp_offset_t) cluster_start && + vs_offset < (dp_offset_t) (cluster_start + cluster_length)); + vs_offset = (dp_offset_t) cluster_start; + cnt = (dp_size_t) cluster_length; + } else { + cluster_length = PAGE_SIZE; cnt = PAGE_SIZE; + } + + if (io_streaming) + io_flags |= UPL_IOSTREAMING; - last_start = start; - last_length = cnt; + last_start = cluster_start; + last_length = cluster_length; /* * This loop will be executed multiple times until the entire @@ -2654,7 +2821,7 @@ pvs_cluster_read( int failed_size; int beg_pseg; int beg_indx; - vm_offset_t cur_offset; + dp_offset_t cur_offset; if ( !ps_info_valid) { ps_offset[seg_index] = ps_clmap(vs, vs_offset & ~cl_mask, &clmap, CL_FIND, 0, 0); @@ -2664,7 +2831,7 @@ pvs_cluster_read( /* * skip over unallocated physical segments */ - if (ps_offset[seg_index] == (vm_offset_t) -1) { + if (ps_offset[seg_index] == (dp_offset_t) -1) { abort_size = cl_size - (vs_offset & cl_mask); abort_size = MIN(abort_size, size); @@ -2803,7 +2970,7 @@ pvs_cluster_read( error = ps_read_file(psp[beg_pseg], upl, (upl_offset_t) 0, ps_offset[beg_pseg] + (beg_indx * vm_page_size), - xfer_size, &residual, 0); + xfer_size, &residual, io_flags); failed_size = 0; @@ -2901,10 +3068,10 @@ vs_cluster_write( int error = 0; struct clmap clmap; - vm_offset_t actual_offset; /* Offset within paging segment */ + dp_offset_t actual_offset; /* Offset within paging segment */ paging_segment_t ps; - vm_offset_t mobj_base_addr; - vm_offset_t mobj_target_addr; + dp_offset_t mobj_base_addr; + dp_offset_t mobj_target_addr; upl_t upl; upl_page_info_t *pl; @@ -2914,6 +3081,7 @@ vs_cluster_write( unsigned int cl_size; int base_index; unsigned int seg_size; + unsigned int upl_offset_in_object; pages_in_cl = 1 << vs->vs_clshift; cl_size = pages_in_cl * vm_page_size; @@ -2927,8 +3095,8 @@ vs_cluster_write( int num_of_pages; int seg_index; upl_offset_t upl_offset; - vm_offset_t seg_offset; - vm_offset_t ps_offset[((VM_SUPER_CLUSTER / PAGE_SIZE) >> VSTRUCT_DEF_CLSHIFT) + 1]; + dp_offset_t seg_offset; + dp_offset_t ps_offset[((VM_SUPER_CLUSTER / PAGE_SIZE) >> VSTRUCT_DEF_CLSHIFT) + 1]; paging_segment_t psp[((VM_SUPER_CLUSTER / PAGE_SIZE) >> VSTRUCT_DEF_CLSHIFT) + 1]; @@ -2973,10 +3141,19 @@ vs_cluster_write( &upl, NULL, &page_list_count, request_flags | UPL_FOR_PAGEOUT); + /* + * The default pager does not handle objects larger than + * 4GB, so it does not deal with offset that don't fit in + * 32-bit. Cast down upl->offset now and make sure we + * did not lose any valuable bits. + */ + upl_offset_in_object = (unsigned int) upl->offset; + assert(upl->offset == upl_offset_in_object); + pl = UPL_GET_INTERNAL_PAGE_LIST(upl); - seg_size = cl_size - (upl->offset % cl_size); - upl_offset = upl->offset & ~(cl_size - 1); + seg_size = cl_size - (upl_offset_in_object % cl_size); + upl_offset = upl_offset_in_object & ~(cl_size - 1); for (seg_index = 0, transfer_size = upl->size; transfer_size > 0; ) { @@ -2986,7 +3163,7 @@ vs_cluster_write( &clmap, CL_ALLOC, cl_size, 0); - if (ps_offset[seg_index] == (vm_offset_t) -1) { + if (ps_offset[seg_index] == (dp_offset_t) -1) { upl_abort(upl, 0); upl_deallocate(upl); @@ -3012,7 +3189,7 @@ vs_cluster_write( break; num_of_pages = page_index + 1; - base_index = (upl->offset % cl_size) / PAGE_SIZE; + base_index = (upl_offset_in_object % cl_size) / PAGE_SIZE; for (page_index = 0; page_index < num_of_pages; ) { /* @@ -3103,12 +3280,15 @@ vs_cluster_write( while (transfer_size) { if ((seg_size = cl_size - - ((upl->offset + upl_offset) % cl_size)) + ((upl_offset_in_object + + upl_offset) % cl_size)) > transfer_size) seg_size = transfer_size; - ps_vs_write_complete(vs, - upl->offset + upl_offset, + ps_vs_write_complete( + vs, + (upl_offset_in_object + + upl_offset), seg_size, error); transfer_size -= seg_size; @@ -3118,7 +3298,7 @@ vs_cluster_write( transfer_size = num_dirty * vm_page_size; seg_index = (base_index + first_dirty) / pages_in_cl; - seg_offset = (upl->offset + upl_offset) % cl_size; + seg_offset = (upl_offset_in_object + upl_offset) % cl_size; error = ps_write_file(psp[seg_index], upl, upl_offset, @@ -3140,7 +3320,7 @@ vs_cluster_write( } } else { - assert(cnt <= (vm_page_size << vs->vs_clshift)); + assert(cnt <= (unsigned) (vm_page_size << vs->vs_clshift)); list_size = cnt; page_index = 0; @@ -3157,12 +3337,12 @@ vs_cluster_write( &clmap, CL_ALLOC, transfer_size < cl_size ? transfer_size : cl_size, 0); - if(actual_offset == (vm_offset_t) -1) { + if(actual_offset == (dp_offset_t) -1) { error = 1; break; } cnt = MIN(transfer_size, - CLMAP_NPGS(clmap) * vm_page_size); + (unsigned) CLMAP_NPGS(clmap) * vm_page_size); ps = CLMAP_PS(clmap); /* Assume that the caller has given us contiguous */ /* pages */ @@ -3239,15 +3419,15 @@ ps_vstruct_allocated_size( return ptoa_32(num_pages); } -size_t +unsigned int ps_vstruct_allocated_pages( vstruct_t vs, default_pager_page_t *pages, - size_t pages_size) + unsigned int pages_size) { unsigned int num_pages; struct vs_map *vsmap; - vm_offset_t offset; + dp_offset_t offset; unsigned int i, j, k; num_pages = 0; @@ -3418,10 +3598,10 @@ ps_vstruct_transfer_from_segment( vs_finish_write(vs); VS_LOCK(vs); vs->vs_xfer_pending = TRUE; - VS_UNLOCK(vs); vs_wait_for_sync_writers(vs); vs_start_write(vs); vs_wait_for_readers(vs); + VS_UNLOCK(vs); if (vs->vs_indirect) { goto vs_changed; } @@ -3440,10 +3620,10 @@ ps_vstruct_transfer_from_segment( vs_map_t vs_get_map_entry( vstruct_t vs, - vm_offset_t offset) + dp_offset_t offset) { struct vs_map *vsmap; - vm_offset_t cluster; + dp_offset_t cluster; cluster = atop_32(offset) >> vs->vs_clshift; if (vs->vs_indirect) { @@ -3462,11 +3642,11 @@ vs_get_map_entry( kern_return_t vs_cluster_transfer( vstruct_t vs, - vm_offset_t offset, - vm_size_t cnt, + dp_offset_t offset, + dp_size_t cnt, upl_t upl) { - vm_offset_t actual_offset; + dp_offset_t actual_offset; paging_segment_t ps; struct clmap clmap; kern_return_t error = KERN_SUCCESS; @@ -3512,7 +3692,7 @@ vs_cluster_transfer( vsmap_ptr = vs_get_map_entry(vs, offset); actual_offset = ps_clmap(vs, offset, &clmap, CL_FIND, 0, 0); - if (actual_offset == (vm_offset_t) -1) { + if (actual_offset == (dp_offset_t) -1) { /* * Nothing left to write in this cluster at least @@ -3754,12 +3934,14 @@ default_pager_add_file( ps->ps_vnode = (struct vnode *)vp; ps->ps_offset = 0; ps->ps_record_shift = local_log2(vm_page_size / record_size); - ps->ps_recnum = size; - ps->ps_pgnum = size >> ps->ps_record_shift; + assert((dp_size_t) size == size); + ps->ps_recnum = (dp_size_t) size; + ps->ps_pgnum = ((dp_size_t) size) >> ps->ps_record_shift; ps->ps_pgcount = ps->ps_pgnum; ps->ps_clshift = local_log2(bs->bs_clsize); ps->ps_clcount = ps->ps_ncls = ps->ps_pgcount >> ps->ps_clshift; + ps->ps_special_clusters = 0; ps->ps_hint = 0; PS_LOCK_INIT(ps); @@ -3773,7 +3955,16 @@ default_pager_add_file( clrbit(ps->ps_bmap, j); } - ps->ps_going_away = FALSE; + if(paging_segment_count == 0) { + ps->ps_state = PS_EMERGENCY_SEGMENT; + if(use_emergency_swap_file_first) { + ps->ps_state |= PS_CAN_USE; + } + emergency_segment_backing_store = backing_store; + } else { + ps->ps_state = PS_CAN_USE; + } + ps->ps_bs = bs; if ((error = ps_enter(ps)) != 0) { @@ -3786,16 +3977,36 @@ default_pager_add_file( bs->bs_pages_free += ps->ps_clcount << ps->ps_clshift; bs->bs_pages_total += ps->ps_clcount << ps->ps_clshift; PSL_LOCK(); - dp_pages_free += ps->ps_pgcount; + if(IS_PS_OK_TO_USE(ps)) { + dp_pages_free += ps->ps_pgcount; + } else { + dp_pages_reserve += ps->ps_pgcount; + } PSL_UNLOCK(); BS_UNLOCK(bs); bs_more_space(ps->ps_clcount); + /* + * If the paging segment being activated is not the emergency + * segment and we notice that the emergency segment is being + * used then we help recover it. If all goes well, the + * emergency segment will be back to its original state of + * online but not activated (till it's needed the next time). + */ + ps = paging_segments[EMERGENCY_PSEG_INDEX]; + if(IS_PS_EMERGENCY_SEGMENT(ps) && IS_PS_OK_TO_USE(ps)) { + if(default_pager_backing_store_delete(emergency_segment_backing_store)) { + dprintf(("Failed to recover emergency paging segment\n")); + } else { + dprintf(("Recovered emergency paging segment\n")); + } + } + DP_DEBUG(DEBUG_BS_INTERNAL, ("device=0x%x,offset=0x%x,count=0x%x,record_size=0x%x,shift=%d,total_size=0x%x\n", - device, offset, size, record_size, + device, offset, (dp_size_t) size, record_size, ps->ps_record_shift, ps->ps_pgnum)); return KERN_SUCCESS; @@ -3808,7 +4019,7 @@ ps_read_file( paging_segment_t ps, upl_t upl, upl_offset_t upl_offset, - vm_offset_t offset, + dp_offset_t offset, upl_size_t size, unsigned int *residualp, int flags) @@ -3826,7 +4037,8 @@ ps_read_file( /* * for transfer case we need to pass uploffset and flags */ - error = vnode_pagein(ps->ps_vnode, upl, upl_offset, f_offset, (vm_size_t)size, flags, NULL); + assert((upl_size_t) size == size); + error = vnode_pagein(ps->ps_vnode, upl, upl_offset, f_offset, (upl_size_t)size, flags, NULL); /* The vnode_pagein semantic is somewhat at odds with the existing */ /* device_read semantic. Partial reads are not experienced at this */ @@ -3849,7 +4061,7 @@ ps_write_file( paging_segment_t ps, upl_t upl, upl_offset_t upl_offset, - vm_offset_t offset, + dp_offset_t offset, unsigned int size, int flags) { @@ -3869,7 +4081,8 @@ ps_write_file( */ upl_encrypt(upl, upl_offset, size); } - if (vnode_pageout(ps->ps_vnode, upl, upl_offset, f_offset, (vm_size_t)size, flags, NULL)) + assert((upl_size_t) size == size); + if (vnode_pageout(ps->ps_vnode, upl, upl_offset, f_offset, (upl_size_t)size, flags, NULL)) result = KERN_FAILURE; else result = KERN_SUCCESS; @@ -3886,6 +4099,9 @@ default_pager_triggers( __unused MACH_PORT_FACE default_pager, { MACH_PORT_FACE release; kern_return_t kr; + clock_sec_t now; + clock_nsec_t nanoseconds_dummy; + static clock_sec_t error_notify = 0; PSL_LOCK(); if (flags == SWAP_ENCRYPT_ON) { @@ -3919,6 +4135,22 @@ default_pager_triggers( __unused MACH_PORT_FACE default_pager, max_pages_trigger_port = trigger_port; maximum_pages_free = lo_wat/vm_page_size; kr = KERN_SUCCESS; + } else if (flags == USE_EMERGENCY_SWAP_FILE_FIRST) { + use_emergency_swap_file_first = TRUE; + release = trigger_port; + kr = KERN_SUCCESS; + } else if (flags == SWAP_FILE_CREATION_ERROR) { + release = trigger_port; + kr = KERN_SUCCESS; + if( paging_segment_count == 1) { + use_emergency_swap_file_first = TRUE; + } + no_paging_space_action(); + clock_get_system_nanotime(&now, &nanoseconds_dummy); + if (now > error_notify + 5) { + dprintf(("Swap File Error.\n")); + error_notify = now; + } } else { release = trigger_port; kr = KERN_INVALID_ARGUMENT; diff --git a/osfmk/default_pager/dp_memory_object.c b/osfmk/default_pager/dp_memory_object.c index c2e488dce..c85278056 100644 --- a/osfmk/default_pager/dp_memory_object.c +++ b/osfmk/default_pager/dp_memory_object.c @@ -73,7 +73,7 @@ #include /* forward declaration */ -vstruct_t vs_object_create(vm_size_t size); +vstruct_t vs_object_create(dp_size_t size); /* * List of all vstructs. A specific vstruct is @@ -301,7 +301,7 @@ vs_finish_write( vstruct_t vs_object_create( - vm_size_t size) + dp_size_t size) { vstruct_t vs; @@ -376,7 +376,7 @@ kern_return_t dp_memory_object_init( memory_object_t mem_obj, memory_object_control_t control, - __unused vm_size_t pager_page_size) + __unused memory_object_cluster_size_t pager_page_size) { vstruct_t vs; @@ -400,7 +400,7 @@ kern_return_t dp_memory_object_synchronize( memory_object_t mem_obj, memory_object_offset_t offset, - vm_size_t length, + memory_object_size_t length, __unused vm_sync_t flags) { vstruct_t vs; @@ -590,11 +590,12 @@ kern_return_t dp_memory_object_data_request( memory_object_t mem_obj, memory_object_offset_t offset, - vm_size_t length, + memory_object_cluster_size_t length, __unused vm_prot_t protection_required, memory_object_fault_info_t fault_info) { vstruct_t vs; + kern_return_t kr = KERN_SUCCESS; GSTAT(global_stats.gs_pagein_calls++); @@ -643,11 +644,23 @@ dp_memory_object_data_request( if ((offset & vm_page_mask) != 0 || (length & vm_page_mask) != 0) Panic("bad alignment"); - pvs_cluster_read(vs, (vm_offset_t)offset, length, fault_info); - + assert((dp_offset_t) offset == offset); + kr = pvs_cluster_read(vs, (dp_offset_t) offset, length, fault_info); + + /* Regular data requests have a non-zero length and always return KERN_SUCCESS. + Their actual success is determined by the fact that they provide a page or not, + i.e whether we call upl_commit() or upl_abort(). A length of 0 means that the + caller is only asking if the pager has a copy of that page or not. The answer to + that question is provided by the return value. KERN_SUCCESS means that the pager + does have that page. + */ + if(length) { + kr = KERN_SUCCESS; + } + vs_finish_read(vs); - return KERN_SUCCESS; + return kr; } /* @@ -666,7 +679,7 @@ kern_return_t dp_memory_object_data_initialize( memory_object_t mem_obj, memory_object_offset_t offset, - vm_size_t size) + memory_object_cluster_size_t size) { vstruct_t vs; @@ -685,7 +698,8 @@ dp_memory_object_data_initialize( * loop if the address range specified crosses cluster * boundaries. */ - vs_cluster_write(vs, 0, (vm_offset_t)offset, size, FALSE, 0); + assert((upl_offset_t) offset == offset); + vs_cluster_write(vs, 0, (upl_offset_t)offset, size, FALSE, 0); vs_finish_write(vs); @@ -696,7 +710,7 @@ kern_return_t dp_memory_object_data_unlock( __unused memory_object_t mem_obj, __unused memory_object_offset_t offset, - __unused vm_size_t size, + __unused memory_object_size_t size, __unused vm_prot_t desired_access) { Panic("dp_memory_object_data_unlock: illegal"); @@ -709,7 +723,7 @@ kern_return_t dp_memory_object_data_return( memory_object_t mem_obj, memory_object_offset_t offset, - vm_size_t size, + memory_object_cluster_size_t size, __unused memory_object_offset_t *resid_offset, __unused int *io_error, __unused boolean_t dirty, @@ -788,7 +802,8 @@ dp_memory_object_data_return( * loop if the address range specified crosses cluster * boundaries. */ - vs_cluster_write(vs, 0, (vm_offset_t)offset, size, FALSE, 0); + assert((upl_offset_t) offset == offset); + vs_cluster_write(vs, 0, (upl_offset_t) offset, size, FALSE, 0); vs_finish_write(vs); @@ -828,7 +843,12 @@ default_pager_memory_object_create( assert(dmm == default_pager_object); - vs = vs_object_create(new_size); + if ((dp_size_t) new_size != new_size) { + /* 32-bit overflow */ + return KERN_INVALID_ARGUMENT; + } + + vs = vs_object_create((dp_size_t) new_size); if (vs == VSTRUCT_NULL) return KERN_RESOURCE_SHORTAGE; @@ -840,7 +860,7 @@ default_pager_memory_object_create( */ vs->vs_pager_ops = &default_pager_ops; - vs->vs_mem_obj_ikot = IKOT_MEMORY_OBJECT; + vs->vs_pager_header.io_bits = IKOT_MEMORY_OBJECT; /* * After this, other threads might receive requests @@ -866,7 +886,12 @@ default_pager_object_create( if (default_pager != default_pager_object) return KERN_INVALID_ARGUMENT; - vs = vs_object_create(size); + if ((dp_size_t) size != size) { + /* 32-bit overflow */ + return KERN_INVALID_ARGUMENT; + } + + vs = vs_object_create((dp_size_t) size); if (vs == VSTRUCT_NULL) return KERN_RESOURCE_SHORTAGE; @@ -914,8 +939,8 @@ default_pager_objects( /* * Out out-of-line port arrays are simply kalloc'ed. */ - psize = round_page(actual * sizeof * pagers); - ppotential = psize / sizeof * pagers; + psize = round_page(actual * sizeof (*pagers)); + ppotential = (unsigned int) (psize / sizeof (*pagers)); pagers = (memory_object_t *)kalloc(psize); if (0 == pagers) return KERN_RESOURCE_SHORTAGE; @@ -926,8 +951,8 @@ default_pager_objects( * then "copied in" as if it had been sent by a * user process. */ - osize = round_page(actual * sizeof * objects); - opotential = osize / sizeof * objects; + osize = round_page(actual * sizeof (*objects)); + opotential = (unsigned int) (osize / sizeof (*objects)); kr = kmem_alloc(ipc_kernel_map, &oaddr, osize); if (KERN_SUCCESS != kr) { kfree(pagers, psize); @@ -1095,13 +1120,13 @@ default_pager_object_pages( if (0 != addr) kmem_free(ipc_kernel_map, addr, size); - size = round_page(actual * sizeof * pages); + size = round_page(actual * sizeof (*pages)); kr = kmem_alloc(ipc_kernel_map, &addr, size); if (KERN_SUCCESS != kr) return KERN_RESOURCE_SHORTAGE; pages = (default_pager_page_t *)addr; - potential = size / sizeof * pages; + potential = (unsigned int) (size / sizeof (*pages)); } /* diff --git a/osfmk/device/device.defs b/osfmk/device/device.defs index 32052fec1..31da78748 100644 --- a/osfmk/device/device.defs +++ b/osfmk/device/device.defs @@ -96,6 +96,7 @@ type NDR_record_t = struct[8] of char; type io_user_scalar_t = uint64_t; type io_user_reference_t = uint64_t; type io_scalar_inband_t = array[*:16] of int; +// must be the same type as OSAsyncReference type io_async_ref_t = array[*:8] of natural_t; type io_scalar_inband64_t = array[*:16] of io_user_scalar_t; type io_async_ref64_t = array[*:8] of io_user_reference_t; @@ -242,8 +243,13 @@ routine io_connect_map_memory( connection : io_connect_t; in memory_type : uint32_t; in into_task : task_t; +#if KERNEL_SERVER + inout address : uint32_t; + inout size : uint32_t; +#else inout address : vm_address_t; inout size : vm_size_t; +#endif in flags : uint32_t ); #else @@ -484,7 +490,11 @@ routine io_connect_unmap_memory( connection : io_connect_t; in memory_type : uint32_t; in into_task : task_t; +#if KERNEL_SERVER + in address : uint32_t +#else in address : vm_address_t +#endif ); #else skip; @@ -504,10 +514,11 @@ routine io_registry_entry_get_property_recursively( out properties : io_buf_ptr_t, physicalcopy ); - routine io_service_get_state( service : io_object_t; - out state : uint64_t + out state : uint64_t; + out busy_state : uint32_t; + out accumulated_busy_time : uint64_t ); routine io_service_get_matching_services_ool( @@ -657,6 +668,11 @@ routine FUNC_NAME(io_service_add_notification_ool)( #endif /* KERNEL_SERVER || __LP64__ */ +routine io_registry_entry_get_registry_entry_id( + registry_entry : io_object_t; + out entry_id : uint64_t + ); + #endif /* IOKIT */ /* vim: set ft=c : */ diff --git a/osfmk/device/device_types.h b/osfmk/device/device_types.h index dcbd7940d..f71249e12 100644 --- a/osfmk/device/device_types.h +++ b/osfmk/device/device_types.h @@ -91,6 +91,7 @@ typedef char io_struct_inband_t[4096]; typedef uint64_t io_user_scalar_t; typedef uint64_t io_user_reference_t; typedef int io_scalar_inband_t[16]; +// must be the same type as OSAsyncReference typedef natural_t io_async_ref_t[8]; typedef io_user_scalar_t io_scalar_inband64_t[16]; typedef io_user_reference_t io_async_ref64_t[8]; diff --git a/osfmk/device/iokit_rpc.c b/osfmk/device/iokit_rpc.c index 3e750473e..5990a3e5c 100644 --- a/osfmk/device/iokit_rpc.c +++ b/osfmk/device/iokit_rpc.c @@ -66,7 +66,7 @@ #ifdef __ppc__ #include #endif -#ifdef __i386 +#if defined(__i386__) || defined(__x86_64__) #include #endif #include @@ -117,6 +117,7 @@ extern io_object_t iokit_lookup_connect_ref_current_task(io_object_t clientRef); extern void iokit_retain_port( ipc_port_t port ); extern void iokit_release_port( ipc_port_t port ); +extern void iokit_release_port_send( ipc_port_t port ); extern kern_return_t iokit_switch_object_port( ipc_port_t port, io_object_t obj, ipc_kobject_type_t type ); @@ -187,11 +188,11 @@ iokit_lookup_connect_ref(io_object_t connectRef, ipc_space_t space) { io_object_t obj = NULL; - if (connectRef && MACH_PORT_VALID((mach_port_name_t)connectRef)) { + if (connectRef && MACH_PORT_VALID(CAST_MACH_PORT_TO_NAME(connectRef))) { ipc_port_t port; kern_return_t kr; - kr = ipc_object_translate(space, (mach_port_name_t)connectRef, MACH_PORT_RIGHT_SEND, (ipc_object_t *)&port); + kr = ipc_object_translate(space, CAST_MACH_PORT_TO_NAME(connectRef), MACH_PORT_RIGHT_SEND, (ipc_object_t *)&port); if (kr == KERN_SUCCESS) { assert(IP_VALID(port)); @@ -226,6 +227,12 @@ iokit_release_port( ipc_port_t port ) ipc_port_release( port ); } +EXTERN void +iokit_release_port_send( ipc_port_t port ) +{ + ipc_port_release_send( port ); +} + /* * Get the port for a device. * Consumes a device reference; produces a naked send right. @@ -436,7 +443,7 @@ iokit_notify( mach_msg_header_t * msg ) /* need to create a pmap function to generalize */ unsigned int IODefaultCacheBits(addr64_t pa) { - return(pmap_cache_attributes(pa >> PAGE_SHIFT)); + return(pmap_cache_attributes((ppnum_t)(pa >> PAGE_SHIFT))); } kern_return_t IOMapPages(vm_map_t map, mach_vm_address_t va, mach_vm_address_t pa, @@ -549,7 +556,7 @@ ppnum_t IOGetLastPageNumber(void) for (idx = 0; idx < pmap_mem_regions_count; idx++) { lastPage = pmap_mem_regions[idx].mrEnd; -#elif __i386__ +#elif __i386__ || __x86_64__ for (idx = 0; idx < pmap_memory_region_count; idx++) { lastPage = pmap_memory_regions[idx].end - 1; @@ -566,6 +573,10 @@ ppnum_t IOGetLastPageNumber(void) void IOGetTime( mach_timespec_t * clock_time); void IOGetTime( mach_timespec_t * clock_time) { - clock_get_system_nanotime(&clock_time->tv_sec, (uint32_t *) &clock_time->tv_nsec); + clock_sec_t sec; + clock_nsec_t nsec; + clock_get_system_nanotime(&sec, &nsec); + clock_time->tv_sec = (typeof(clock_time->tv_sec))sec; + clock_time->tv_nsec = nsec; } diff --git a/osfmk/gssd/gssd_mach.defs b/osfmk/gssd/gssd_mach.defs index 5a8cf1f64..014785f3a 100644 --- a/osfmk/gssd/gssd_mach.defs +++ b/osfmk/gssd/gssd_mach.defs @@ -40,6 +40,8 @@ type string_t = c_string[*:1024]; type byte_buffer = array [] of uint8_t; type gssd_verifier = uint64_t; type gid_list = array [*:16] of uint32_t; +type gss_ctx = uint64_t; +type gss_cred = uint64_t; subsystem #if KERNEL_USER @@ -57,9 +59,10 @@ routine mach_gss_init_sec_context( in princ_namestr : string_t; in svc_namestr : string_t; in flags : uint32_t; - inout verifier : gssd_verifier; - inout context : uint32_t; - inout cred_handle : uint32_t; + in gssd_flags : uint32_t; + inout context : gss_ctx; + inout cred_handle : gss_cred; + out ret_flags : uint32_t; out key : byte_buffer, dealloc; out outtoken : byte_buffer, dealloc; out major_stat : uint32_t; @@ -70,10 +73,10 @@ routine mach_gss_accept_sec_context( server : mach_port_t; in intoken : byte_buffer; in svc_namestr : string_t; - in flags : uint32_t; - inout verifier : gssd_verifier; - inout context : uint32_t; - inout cred_handle : uint32_t; + in gssd_flags : uint32_t; + inout context : gss_ctx; + inout cred_handle : gss_cred; + out flags : uint32_t; out uid : uint32_t; out gids : gid_list; out key : byte_buffer, dealloc; diff --git a/osfmk/gssd/gssd_mach_types.h b/osfmk/gssd/gssd_mach_types.h index 0d97fb398..c091cc3ef 100644 --- a/osfmk/gssd/gssd_mach_types.h +++ b/osfmk/gssd/gssd_mach_types.h @@ -32,8 +32,9 @@ typedef enum mechtype { DEFAULT_MECH = 0, KRB5_MECH = 0, SPNEGO_MECH } mechtype; typedef char *string_t; typedef uint8_t *byte_buffer; -typedef uint64_t gssd_verifier; typedef uint32_t *gid_list; +typedef uint64_t gss_ctx; +typedef uint64_t gss_cred; #define GSSD_GSS_FLAGS_MASK 0x1FF /* The following need to correspond to GSS_C_*_FLAG in gssapi.h */ @@ -46,12 +47,15 @@ typedef uint32_t *gid_list; #define GSSD_ANON_FLAG 64 #define GSSD_PROT_FLAG 128 #define GSSD_TRANS_FLAG 256 +#define GSSD_C_DELEG_POLICY_FLAG 32768 -#define GSSD_FLAGS_SHIFT 16 +#define GSSD_FLAGS_SHIFT 0 #define GSSD_NO_DEFAULT (1 << GSSD_FLAGS_SHIFT) // Only use principal from uid #define GSSD_NO_CANON (2 << GSSD_FLAGS_SHIFT) // Don't canononicalize host names -#define GSSD_NO_HOME_ACCESS (4 << GSSD_FLAGS_SHIFT) // Dont access home directory -#define GSSD_NO_UI (8 << GSSD_FLAGS_SHIFT) // Don't bring up UI +#define GSSD_HOME_ACCESS_OK (4 << GSSD_FLAGS_SHIFT) // OK to access home directory +#define GSSD_UI_OK (8 << GSSD_FLAGS_SHIFT) // OK to bring up UI +#define GSSD_RESTART (16 << GSSD_FLAGS_SHIFT) // Destroy the supplied context and start over +#define GSSD_NFS_1DES (64 << GSSD_FLAGS_SHIFT) // Only get single DES session keys #define GSSD_WIN2K_HACK (128 << GSSD_FLAGS_SHIFT) // Hack for Win2K diff --git a/osfmk/i386/AT386/model_dep.c b/osfmk/i386/AT386/model_dep.c index b0b961b13..204a85ab6 100644 --- a/osfmk/i386/AT386/model_dep.c +++ b/osfmk/i386/AT386/model_dep.c @@ -84,15 +84,18 @@ #include #include #include +#include +#include +#include +#include +#include +#include +#include /* mp_rendezvous_break_lock */ +#include #include #include -#include #include -#include #include -#include -#include -#include /* mp_rendezvous_break_lock */ #include /* inb() */ #include #if MACH_KDB @@ -103,22 +106,16 @@ #include #include -#include -#include -#include - #include #include #include #include -#include #include #include -void enable_bluebox(void); -void disable_bluebox(void); +#include static void machine_conf(void); @@ -133,10 +130,26 @@ volatile int pbtcpu = -1; hw_lock_data_t pbtlock; /* backtrace print lock */ uint32_t pbtcnt = 0; +#if defined (__i386__) +#define PRINT_ARGS_FROM_STACK_FRAME 1 +#elif defined (__x86_64__) +#define PRINT_ARGS_FROM_STACK_FRAME 0 +#else +#error unsupported architecture +#endif + +#ifdef __LP64__ +typedef struct nlist_64 kernel_nlist_t; +#else +typedef struct nlist kernel_nlist_t; +#endif + typedef struct _cframe_t { struct _cframe_t *prev; - unsigned caller; + uintptr_t caller; +#if PRINT_ARGS_FROM_STACK_FRAME unsigned args[0]; +#endif } cframe_t; static unsigned panic_io_port; @@ -242,7 +255,7 @@ machine_startup(void) static void machine_conf(void) { - machine_info.memory_size = mem_size; + machine_info.memory_size = (typeof(machine_info.memory_size))mem_size; } @@ -376,15 +389,26 @@ efi_set_tables_64(EFI_SYSTEM_TABLE_64 * system_table) break; } - gPEEFISystemTable = system_table; + gPEEFISystemTable = system_table; + + + if (!cpu_mode_is64bit()) { + kprintf("Skipping 64-bit EFI runtime services for 32-bit legacy mode\n"); + break; + } + if(system_table->RuntimeServices == 0) { + kprintf("No runtime table present\n"); + break; + } kprintf("RuntimeServices table at 0x%qx\n", system_table->RuntimeServices); - runtime = (EFI_RUNTIME_SERVICES_64 *) (uintptr_t)system_table->RuntimeServices; // XXX + // 64-bit virtual address is OK for 64-bit EFI and 64/32-bit kernel. + runtime = (EFI_RUNTIME_SERVICES_64 *) (uintptr_t)system_table->RuntimeServices; kprintf("Checking runtime services table %p\n", runtime); - if (runtime->Hdr.Signature != EFI_RUNTIME_SERVICES_SIGNATURE) { - kprintf("Bad EFI runtime table signature\n"); - break; - } + if (runtime->Hdr.Signature != EFI_RUNTIME_SERVICES_SIGNATURE) { + kprintf("Bad EFI runtime table signature\n"); + break; + } // Verify signature of runtime services table hdr_cksum = runtime->Hdr.CRC32; @@ -404,16 +428,16 @@ efi_set_tables_64(EFI_SYSTEM_TABLE_64 * system_table) } static void -efi_set_tables_32(EFI_SYSTEM_TABLE * system_table) +efi_set_tables_32(EFI_SYSTEM_TABLE_32 * system_table) { - EFI_RUNTIME_SERVICES *runtime; + EFI_RUNTIME_SERVICES_32 *runtime; uint32_t hdr_cksum; uint32_t cksum; kprintf("Processing 32-bit EFI tables at %p\n", system_table); do { if (system_table->Hdr.Signature != EFI_SYSTEM_TABLE_SIGNATURE) { - kprintf("Bad EFI system table signature\n"); + kprintf("Bad EFI system table signature\n"); break; } // Verify signature of the system table @@ -428,14 +452,21 @@ efi_set_tables_32(EFI_SYSTEM_TABLE * system_table) break; } - gPEEFISystemTable = system_table; + gPEEFISystemTable = system_table; - kprintf("RuntimeServices table at %p\n", system_table->RuntimeServices); - runtime = (EFI_RUNTIME_SERVICES *) system_table->RuntimeServices; - if (runtime->Hdr.Signature != EFI_RUNTIME_SERVICES_SIGNATURE) { - kprintf("Bad EFI runtime table signature\n"); - break; - } + + if(system_table->RuntimeServices == 0) { + kprintf("No runtime table present\n"); + break; + } + kprintf("RuntimeServices table at 0x%x\n", system_table->RuntimeServices); + // 32-bit virtual address is OK for 32-bit EFI and 32-bit kernel. + // For a 64-bit kernel, booter will ensure pointer is zeroed out + runtime = (EFI_RUNTIME_SERVICES_32 *) (intptr_t)system_table->RuntimeServices; + if (runtime->Hdr.Signature != EFI_RUNTIME_SERVICES_SIGNATURE) { + kprintf("Bad EFI runtime table signature\n"); + break; + } // Verify signature of runtime services table hdr_cksum = runtime->Hdr.CRC32; @@ -474,26 +505,31 @@ efi_init(void) msize = args->MemoryMapDescriptorSize; mcount = args->MemoryMapSize / msize; - mptr = (EfiMemoryRange *)args->MemoryMap; + mptr = (EfiMemoryRange *)ml_static_ptovirt(args->MemoryMap); for (i=0; i < mcount; i++, mptr = (EfiMemoryRange *)(((vm_offset_t)mptr) + msize)) { if (((mptr->Attribute & EFI_MEMORY_RUNTIME) == EFI_MEMORY_RUNTIME) ) { - vm_size = i386_ptob((uint32_t)mptr->NumberOfPages); + vm_size = (vm_offset_t)i386_ptob((uint32_t)mptr->NumberOfPages); vm_addr = (vm_offset_t) mptr->VirtualStart; phys_addr = (vm_map_offset_t) mptr->PhysicalStart; - pmap_map(vm_addr, phys_addr, phys_addr + round_page(vm_size), +#if defined(__i386__) + pmap_map +#elif defined(__x86_64__) + pmap_map_bd /* K64todo resolve pmap layer inconsistency */ +#endif + (vm_addr, phys_addr, phys_addr + round_page(vm_size), (mptr->Type == kEfiRuntimeServicesCode) ? VM_PROT_READ | VM_PROT_EXECUTE : VM_PROT_READ|VM_PROT_WRITE, (mptr->Type == EfiMemoryMappedIO) ? VM_WIMG_IO : VM_WIMG_USE_DEFAULT); } } - if (args->Version > 1) - panic("Incompatible boot args version %d\n", args->Version); + if ((args->Version != kBootArgsVersion1) || (args->Version == kBootArgsVersion1 && args->Revision < kBootArgsRevision1_5 )) + panic("Incompatible boot args version %d revision %d\n", args->Version, args->Revision); kprintf("Boot args version %d revision %d mode %d\n", args->Version, args->Revision, args->efiMode); - if (args->Revision >= 4 && args->efiMode == kBootArgsEfiMode64) { - efi_set_tables_64((EFI_SYSTEM_TABLE_64 *) args->efiSystemTable); + if (args->efiMode == kBootArgsEfiMode64) { + efi_set_tables_64((EFI_SYSTEM_TABLE_64 *) ml_static_ptovirt(args->efiSystemTable)); } else { - efi_set_tables_32((EFI_SYSTEM_TABLE *) args->efiSystemTable); + efi_set_tables_32((EFI_SYSTEM_TABLE_32 *) ml_static_ptovirt(args->efiSystemTable)); } } while (FALSE); @@ -509,7 +545,7 @@ hibernate_newruntime_map(void * map, vm_size_t map_size, uint32_t system_table_o kprintf("Reinitializing EFI runtime services\n"); - if (args->Revision < 3) + if (args->Version != kBootArgsVersion1) return; do { @@ -532,15 +568,15 @@ hibernate_newruntime_map(void * map, vm_size_t map_size, uint32_t system_table_o kprintf("Old map:\n"); msize = args->MemoryMapDescriptorSize; mcount = args->MemoryMapSize / msize; - mptr = (EfiMemoryRange *)args->MemoryMap; + mptr = (EfiMemoryRange *)ml_static_ptovirt(args->MemoryMap); for (i=0; i < mcount; i++, mptr = (EfiMemoryRange *)(((vm_offset_t)mptr) + msize)) { if ((mptr->Attribute & EFI_MEMORY_RUNTIME) == EFI_MEMORY_RUNTIME) { - vm_size = i386_ptob((uint32_t)mptr->NumberOfPages); + vm_size = (vm_offset_t)i386_ptob((uint32_t)mptr->NumberOfPages); vm_addr = (vm_offset_t) mptr->VirtualStart; phys_addr = (vm_map_offset_t) mptr->PhysicalStart; - kprintf("mapping[%u] %qx @ %x, %llu\n", mptr->Type, phys_addr, vm_addr, mptr->NumberOfPages); + kprintf("mapping[%u] %qx @ %lx, %llu\n", mptr->Type, phys_addr, (unsigned long)vm_addr, mptr->NumberOfPages); } } @@ -549,31 +585,36 @@ hibernate_newruntime_map(void * map, vm_size_t map_size, uint32_t system_table_o kprintf("New map:\n"); msize = args->MemoryMapDescriptorSize; - mcount = map_size / msize; + mcount = (unsigned int )(map_size / msize); mptr = map; for (i=0; i < mcount; i++, mptr = (EfiMemoryRange *)(((vm_offset_t)mptr) + msize)) { if ((mptr->Attribute & EFI_MEMORY_RUNTIME) == EFI_MEMORY_RUNTIME) { - vm_size = i386_ptob((uint32_t)mptr->NumberOfPages); + vm_size = (vm_offset_t)i386_ptob((uint32_t)mptr->NumberOfPages); vm_addr = (vm_offset_t) mptr->VirtualStart; phys_addr = (vm_map_offset_t) mptr->PhysicalStart; - kprintf("mapping[%u] %qx @ %x, %llu\n", mptr->Type, phys_addr, vm_addr, mptr->NumberOfPages); + kprintf("mapping[%u] %qx @ %lx, %llu\n", mptr->Type, phys_addr, (unsigned long)vm_addr, mptr->NumberOfPages); - pmap_map(vm_addr, phys_addr, phys_addr + round_page(vm_size), +#if defined(__i386__) + pmap_map +#elif defined(__x86_64__) + pmap_map_bd /* K64todo resolve pmap layer inconsistency */ +#endif + (vm_addr, phys_addr, phys_addr + round_page(vm_size), (mptr->Type == kEfiRuntimeServicesCode) ? VM_PROT_READ | VM_PROT_EXECUTE : VM_PROT_READ|VM_PROT_WRITE, (mptr->Type == EfiMemoryMappedIO) ? VM_WIMG_IO : VM_WIMG_USE_DEFAULT); } } - if (args->Version > 1) - panic("Incompatible boot args version %d\n", args->Version); + if ((args->Version != kBootArgsVersion1) || (args->Version == kBootArgsVersion1 && args->Revision < kBootArgsRevision1_5 )) + panic("Incompatible boot args version %d revision %d\n", args->Version, args->Revision); kprintf("Boot args version %d revision %d mode %d\n", args->Version, args->Revision, args->efiMode); - if (args->Revision >= 4 && args->efiMode == kBootArgsEfiMode64) { - efi_set_tables_64((EFI_SYSTEM_TABLE_64 *) args->efiSystemTable); + if (args->efiMode == kBootArgsEfiMode64) { + efi_set_tables_64((EFI_SYSTEM_TABLE_64 *) ml_static_ptovirt(args->efiSystemTable)); } else { - efi_set_tables_32((EFI_SYSTEM_TABLE *) args->efiSystemTable); + efi_set_tables_32((EFI_SYSTEM_TABLE_32 *) ml_static_ptovirt(args->efiSystemTable)); } } while (FALSE); @@ -627,9 +668,9 @@ machine_init(void) pat_init(); /* - * Free lowmem pages + * Free lowmem pages and complete other setup */ - x86_lowmem_free(); + pmap_lowmem_finalize(); } /* @@ -714,10 +755,14 @@ Debugger( panic_io_port_read(); /* Obtain current frame pointer */ +#if defined (__i386__) __asm__ volatile("movl %%ebp, %0" : "=m" (stackptr)); +#elif defined (__x86_64__) + __asm__ volatile("movq %%rbp, %0" : "=m" (stackptr)); +#endif /* Print backtrace - callee is internally synchronized */ - panic_i386_backtrace(stackptr, 16, NULL, FALSE, NULL); + panic_i386_backtrace(stackptr, 20, NULL, FALSE, NULL); /* everything should be printed now so copy to NVRAM */ @@ -729,8 +774,8 @@ Debugger( if (commit_paniclog_to_nvram) { unsigned int bufpos; uintptr_t cr0; - - debug_putc(0); + + debug_putc(0); /* Now call the compressor */ /* XXX Consider using the WKdm compressor in the @@ -752,6 +797,7 @@ Debugger( * This call must save data synchronously, * since we can subsequently halt the system. */ + kprintf("Attempting to commit panic log to NVRAM\n"); /* The following sequence is a workaround for: * SnowLeopard10A67: AppleEFINVRAM should not invoke @@ -760,11 +806,10 @@ Debugger( */ cr0 = get_cr0(); clear_ts(); - - pi_size = PESavePanicInfo((unsigned char *)debug_buf, - pi_size ); + + pi_size = PESavePanicInfo((unsigned char *)debug_buf, + (uint32_t)pi_size ); set_cr0(cr0); - /* Uncompress in-place, to permit examination of * the panic log by debuggers. */ @@ -799,16 +844,6 @@ Debugger( hw_atomic_sub(&debug_mode, 1); } -void -enable_bluebox(void) -{ -} - -void -disable_bluebox(void) -{ -} - char * machine_boot_info(char *buf, __unused vm_size_t size) { @@ -835,25 +870,22 @@ typedef struct pasc pasc_t; */ static int -panic_print_macho_symbol_name(struct mach_header *mh, vm_address_t search) +panic_print_macho_symbol_name(kernel_mach_header_t *mh, vm_address_t search) { - struct nlist *sym = NULL; + kernel_nlist_t *sym = NULL; struct load_command *cmd; - struct segment_command *orig_ts = NULL, *orig_le = NULL; + kernel_segment_command_t *orig_ts = NULL, *orig_le = NULL; struct symtab_command *orig_st = NULL; unsigned int i; char *strings, *bestsym = NULL; vm_address_t bestaddr = 0, diff, curdiff; - - if (mh->magic != MH_MAGIC) { - /* bad magic number */ - return 0; - } + + /* Assume that if it's loaded and linked into the kernel, it's a valid Mach-O */ cmd = (struct load_command *) &mh[1]; for (i = 0; i < mh->ncmds; i++) { - if (cmd->cmd == LC_SEGMENT) { - struct segment_command *orig_sg = (struct segment_command *) cmd; + if (cmd->cmd == LC_SEGMENT_KERNEL) { + kernel_segment_command_t *orig_sg = (kernel_segment_command_t *) cmd; if (strncmp(SEG_TEXT, orig_sg->segname, sizeof(orig_sg->segname)) == 0) @@ -868,7 +900,7 @@ panic_print_macho_symbol_name(struct mach_header *mh, vm_address_t search) else if (cmd->cmd == LC_SYMTAB) orig_st = (struct symtab_command *) cmd; - cmd = (struct load_command *) ((caddr_t) cmd + cmd->cmdsize); + cmd = (struct load_command *) ((uintptr_t) cmd + cmd->cmdsize); } if ((orig_ts == NULL) || (orig_st == NULL) || (orig_le == NULL)) @@ -876,7 +908,7 @@ panic_print_macho_symbol_name(struct mach_header *mh, vm_address_t search) /* kexts don't have a LINKEDIT segment for now, so we'll never get this far for kexts */ - vm_address_t slide = ((vm_address_t)mh) - orig_ts->vmaddr; + vm_offset_t slide = ((vm_address_t)mh) - orig_ts->vmaddr; if (slide != 0) search -= slide; /* adjusting search since the binary has slid */ @@ -886,11 +918,13 @@ panic_print_macho_symbol_name(struct mach_header *mh, vm_address_t search) return 0; } - sym = (struct nlist *)orig_le->vmaddr; - strings = ((char *)sym) + orig_st->nsyms * sizeof(struct nlist); + sym = (kernel_nlist_t *)(uintptr_t)(orig_le->vmaddr + orig_st->symoff - orig_le->fileoff); + strings = (char *)(uintptr_t)(orig_le->vmaddr + orig_st->stroff - orig_le->fileoff); diff = search; for (i = 0; i < orig_st->nsyms; i++) { + if (sym[i].n_type & N_STAB) continue; + if (sym[i].n_value <= search) { curdiff = search - (vm_address_t)sym[i].n_value; if (curdiff < diff) { @@ -903,9 +937,9 @@ panic_print_macho_symbol_name(struct mach_header *mh, vm_address_t search) if (bestsym != NULL) { if (diff != 0) { - kdb_printf("%s + 0x%08x \n", bestsym, diff); + kdb_printf("%s + 0x%lx", bestsym, (unsigned long)diff); } else { - kdb_printf("%s \n", bestsym); + kdb_printf("%s", bestsym); } return 1; } @@ -927,12 +961,10 @@ panic_print_kmod_symbol_name(vm_address_t search) } if (current_kmod != NULL) { /* if kexts had symbol table loaded, we'd call search_symbol_name again; alas, they don't */ - kdb_printf("%s + %d \n", current_kmod->name, search - current_kmod->address); + kdb_printf("%s + %lu \n", current_kmod->name, (unsigned long)search - current_kmod->address); } } -extern struct mach_header _mh_execute_header; /* the kernel's mach header */ - static void panic_print_symbol_name(vm_address_t search) { @@ -974,12 +1006,27 @@ panic_i386_backtrace(void *_frame, int nframes, const char *msg, boolean_t regdu PE_parse_boot_argn("keepsyms", &keepsyms, sizeof (keepsyms)); if (msg != NULL) { - kdb_printf(msg); + kdb_printf("%s", msg); } if ((regdump == TRUE) && (regs != NULL)) { +#if defined(__x86_64__) + x86_saved_state64_t *ss64p = saved_state64(regs); + kdb_printf( + "RAX: 0x%016llx, RBX: 0x%016llx, RCX: 0x%016llx, RDX: 0x%016llx\n" + "RSP: 0x%016llx, RBP: 0x%016llx, RSI: 0x%016llx, RDI: 0x%016llx\n" + "R8: 0x%016llx, R9: 0x%016llx, R10: 0x%016llx, R11: 0x%016llx\n" + "R12: 0x%016llx, R13: 0x%016llx, R14: 0x%016llx, R15: 0x%016llx\n" + "RFL: 0x%016llx, RIP: 0x%016llx, CS: 0x%016llx, SS: 0x%016llx\n", + ss64p->rax, ss64p->rbx, ss64p->rcx, ss64p->rdx, + ss64p->isf.rsp, ss64p->rbp, ss64p->rsi, ss64p->rdi, + ss64p->r8, ss64p->r9, ss64p->r10, ss64p->r11, + ss64p->r12, ss64p->r13, ss64p->r14, ss64p->r15, + ss64p->isf.rflags, ss64p->isf.rip, ss64p->isf.cs, + ss64p->isf.ss); + PC = ss64p->isf.rip; +#else x86_saved_state32_t *ss32p = saved_state32(regs); - kdb_printf( "EAX: 0x%08x, EBX: 0x%08x, ECX: 0x%08x, EDX: 0x%08x\n" "CR2: 0x%08x, EBP: 0x%08x, ESI: 0x%08x, EDI: 0x%08x\n" @@ -988,10 +1035,15 @@ panic_i386_backtrace(void *_frame, int nframes, const char *msg, boolean_t regdu ss32p->cr2,ss32p->ebp,ss32p->esi,ss32p->edi, ss32p->efl,ss32p->eip,ss32p->cs, ss32p->ds); PC = ss32p->eip; +#endif } kdb_printf("Backtrace (CPU %d), " - "Frame : Return Address (4 potential args on stack)\n", cpu_number()); +#if PRINT_ARGS_FROM_STACK_FRAME + "Frame : Return Address (4 potential args on stack)\n", cpu_number()); +#else + "Frame : Return Address\n", cpu_number()); +#endif for (frame_index = 0; frame_index < nframes; frame_index++) { vm_offset_t curframep = (vm_offset_t) frame; @@ -1010,14 +1062,16 @@ panic_i386_backtrace(void *_frame, int nframes, const char *msg, boolean_t regdu goto invalid; } - kdb_printf("%p : 0x%x ", frame, frame->caller); + kdb_printf("%p : 0x%lx ", frame, frame->caller); if (frame_index < DUMPFRAMES) raddrs[frame_index] = frame->caller; +#if PRINT_ARGS_FROM_STACK_FRAME if (kvtophys((vm_offset_t)&(frame->args[3]))) - kdb_printf("(0x%x 0x%x 0x%x 0x%x) \n", + kdb_printf("(0x%x 0x%x 0x%x 0x%x) ", frame->args[0], frame->args[1], frame->args[2], frame->args[3]); +#endif /* Display address-symbol translation only if the "keepsyms" * boot-arg is suppplied, since we unload LINKEDIT otherwise. @@ -1027,11 +1081,8 @@ panic_i386_backtrace(void *_frame, int nframes, const char *msg, boolean_t regdu if (keepsyms) panic_print_symbol_name((vm_address_t)frame->caller); - /* Stack grows downward */ - if (frame->prev < frame) { - frame = frame->prev; - goto invalid; - } + kdb_printf("\n"); + frame = frame->prev; } @@ -1049,14 +1100,12 @@ panic_i386_backtrace(void *_frame, int nframes, const char *msg, boolean_t regdu * the kmod list safely. */ if (frame_index) - kmod_dump((vm_offset_t *)&raddrs[0], frame_index); + kmod_panic_dump((vm_offset_t *)&raddrs[0], frame_index); if (PC != 0) - kmod_dump(&PC, 1); + kmod_panic_dump(&PC, 1); panic_display_system_configuration(); - panic_display_zprint(); - dump_kext_info(&kdb_log); /* Release print backtrace lock, to permit other callers in the * event of panics on multiple processors. @@ -1069,3 +1118,5 @@ panic_i386_backtrace(void *_frame, int nframes, const char *msg, boolean_t regdu bt_tsc_timeout = rdtsc64() + PBT_TIMEOUT_CYCLES; while(*ppbtcnt && (rdtsc64() < bt_tsc_timeout)); } + +void *apic_table = NULL; diff --git a/osfmk/i386/Diagnostics.c b/osfmk/i386/Diagnostics.c index 74f806a63..f9fd283bc 100644 --- a/osfmk/i386/Diagnostics.c +++ b/osfmk/i386/Diagnostics.c @@ -191,7 +191,7 @@ diagCall(x86_saved_state_t * state) durNap = 1; /* This is a very short time, make it * bigger */ - curpos = data + sizeof(real_ncpus); /* Point to the next + curpos = (uint32_t)(data + sizeof(real_ncpus)); /* Point to the next * available spot */ for (i = 0; i < real_ncpus; i++) { /* Move 'em all out */ @@ -200,7 +200,7 @@ diagCall(x86_saved_state_t * state) (void) copyout((char *) &cpu_data_ptr[i]->cpu_hwIntCnt, curpos + 8, 256 * sizeof(uint32_t)); /* Copy out interrupt * data for this * processor */ - curpos = curpos + (256 * sizeof(uint32_t) + 8); /* Point to next out put + curpos = (uint32_t)(curpos + (256 * sizeof(uint32_t) + 8)); /* Point to next out put * slot */ } diff --git a/osfmk/i386/Makefile b/osfmk/i386/Makefile index 6cae8e5cd..d07d32aac 100644 --- a/osfmk/i386/Makefile +++ b/osfmk/i386/Makefile @@ -42,6 +42,10 @@ INSTALL_MD_LIST = eflags.h user_ldt.h INSTALL_MD_LCL_LIST = cpu_capabilities.h +INSTALL_KF_MD_LIST = asm.h cpuid.h eflags.h locks.h machine_routines.h proc_reg.h vmx.h + +INSTALL_KF_MD_LCL_LIST = $(filter-out cpu_data.h, $(EXPORT_ONLY_FILES)) + EXPORT_MD_LIST = ${EXPORT_ONLY_FILES} EXPORT_MD_DIR = i386 diff --git a/osfmk/i386/acpi.c b/osfmk/i386/acpi.c index 17143604c..b27d050b2 100644 --- a/osfmk/i386/acpi.c +++ b/osfmk/i386/acpi.c @@ -26,19 +26,25 @@ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ */ +#include +#include +#include #include +#include #include -#include -#include #include +#if CONFIG_VMX #include +#endif #include #include #include #include #include #include +#if CONFIG_MCA #include +#endif #include #include @@ -49,27 +55,22 @@ #endif #include +#if CONFIG_SLEEP extern void acpi_sleep_cpu(acpi_sleep_callback, void * refcon); -extern char acpi_wake_start[]; -extern char acpi_wake_end[]; - -extern void set_kbd_leds(int leds); +extern void acpi_wake_prot(void); +#endif extern void fpinit(void); vm_offset_t acpi_install_wake_handler(void) { - /* copy wake code to ACPI_WAKE_ADDR in low memory */ - bcopy_phys(kvtophys((vm_offset_t)acpi_wake_start), - (addr64_t) ACPI_WAKE_ADDR, - acpi_wake_end - acpi_wake_start); - - /* flush cache */ - wbinvd(); - - /* return physical address of the wakeup code */ - return ACPI_WAKE_ADDR; +#if CONFIG_SLEEP + install_real_mode_bootstrap(acpi_wake_prot); + return REAL_MODE_BOOTSTRAP_OFFSET; +#else + return 0; +#endif } #if HIBERNATION @@ -79,6 +80,7 @@ struct acpi_hibernate_callback_data { }; typedef struct acpi_hibernate_callback_data acpi_hibernate_callback_data_t; +#if CONFIG_SLEEP static void acpi_hibernate(void *refcon) { @@ -89,7 +91,9 @@ acpi_hibernate(void *refcon) if (current_cpu_datap()->cpu_hibernate) { +#if defined(__i386__) cpu_IA32e_enable(current_cpu_datap()); +#endif mode = hibernate_write_image(); @@ -114,28 +118,31 @@ acpi_hibernate(void *refcon) cpu_datap(0)->cpu_hibernate = 0; } +#if defined(__i386__) /* * If we're in 64-bit mode, drop back into legacy mode during sleep. */ cpu_IA32e_disable(current_cpu_datap()); - +#endif } (data->func)(data->refcon); /* should never get here! */ } -#endif +#endif /* CONFIG_SLEEP */ +#endif /* HIBERNATION */ static uint64_t acpi_sleep_abstime; +extern void slave_pstart(void); void acpi_sleep_kernel(acpi_sleep_callback func, void *refcon) { #if HIBERNATION acpi_hibernate_callback_data_t data; - boolean_t did_hibernate; #endif + boolean_t did_hibernate; unsigned int cpu; kern_return_t rc; unsigned int my_cpu; @@ -143,15 +150,15 @@ acpi_sleep_kernel(acpi_sleep_callback func, void *refcon) kprintf("acpi_sleep_kernel hib=%d\n", current_cpu_datap()->cpu_hibernate); - /* Geta ll CPUs to be in the "off" state */ - my_cpu = cpu_number(); + /* Get all CPUs to be in the "off" state */ + my_cpu = cpu_number(); for (cpu = 0; cpu < real_ncpus; cpu += 1) { if (cpu == my_cpu) continue; rc = pmCPUExitHaltToOff(cpu); if (rc != KERN_SUCCESS) - panic("Error %d trying to transition CPU %d to OFF", - rc, cpu); + panic("Error %d trying to transition CPU %d to OFF", + rc, cpu); } /* shutdown local APIC before passing control to BIOS */ @@ -165,28 +172,41 @@ acpi_sleep_kernel(acpi_sleep_callback func, void *refcon) /* Save power management timer state */ pmTimerSave(); +#if CONFIG_VMX /* * Turn off VT, otherwise switching to legacy mode will fail */ vmx_suspend(); +#endif +#if defined(__i386__) /* * If we're in 64-bit mode, drop back into legacy mode during sleep. */ cpu_IA32e_disable(current_cpu_datap()); +#endif acpi_sleep_abstime = mach_absolute_time(); +#if CONFIG_SLEEP /* * Save master CPU state and sleep platform. * Will not return until platform is woken up, * or if sleep failed. */ +#ifdef __x86_64__ + uint64_t old_cr3 = x86_64_pre_sleep(); +#endif #if HIBERNATION acpi_sleep_cpu(acpi_hibernate, &data); #else acpi_sleep_cpu(func, refcon); #endif +#ifdef __x86_64__ + x86_64_post_sleep(old_cr3); +#endif + +#endif /* CONFIG_SLEEP */ /* Reset UART if kprintf is enabled. * However kprintf should not be used before rtc_sleep_wakeup() @@ -198,10 +218,11 @@ acpi_sleep_kernel(acpi_sleep_callback func, void *refcon) #if HIBERNATION if (current_cpu_datap()->cpu_hibernate) { +#if defined(__i386__) int i; for (i = 0; i < PMAP_NWINDOWS; i++) *current_cpu_datap()->cpu_pmap->mapwindow[i].prv_CMAP = 0; - current_cpu_datap()->cpu_hibernate = 0; +#endif did_hibernate = TRUE; } else @@ -213,16 +234,20 @@ acpi_sleep_kernel(acpi_sleep_callback func, void *refcon) /* Re-enable mode (including 64-bit if applicable) */ cpu_mode_init(current_cpu_datap()); +#if CONFIG_MCA /* Re-enable machine check handling */ mca_cpu_init(); +#endif /* restore MTRR settings */ mtrr_update_cpu(); +#if CONFIG_VMX /* * Restore VT mode */ vmx_resume(); +#endif /* set up PAT following boot processor power up */ pat_init(); @@ -236,9 +261,10 @@ acpi_sleep_kernel(acpi_sleep_callback func, void *refcon) /* let the realtime clock reset */ rtc_sleep_wakeup(acpi_sleep_abstime); - if (did_hibernate) + if (did_hibernate){ hibernate_machine_init(); - + current_cpu_datap()->cpu_hibernate = 0; + } /* re-enable and re-init local apic */ if (lapic_probe()) lapic_configure(); @@ -256,9 +282,52 @@ acpi_sleep_kernel(acpi_sleep_callback func, void *refcon) clear_fpu(); #if HIBERNATION +#ifdef __i386__ + /* The image is written out using the copy engine, which disables + * preemption. Since the copy engine writes out the page which contains + * the preemption variable when it is disabled, we need to explicitly + * enable it here */ if (did_hibernate) enable_preemption(); +#endif kprintf("ret from acpi_sleep_cpu hib=%d\n", did_hibernate); #endif + +#if CONFIG_SLEEP + /* Becase we don't save the bootstrap page, and we share it + * between sleep and mp slave init, we need to recreate it + * after coming back from sleep or hibernate */ + install_real_mode_bootstrap(slave_pstart); +#endif } + +extern char real_mode_bootstrap_end[]; +extern char real_mode_bootstrap_base[]; + +void +install_real_mode_bootstrap(void *prot_entry) +{ + /* + * Copy the boot entry code to the real-mode vector area REAL_MODE_BOOTSTRAP_OFFSET. + * This is in page 1 which has been reserved for this purpose by + * machine_startup() from the boot processor. + * The slave boot code is responsible for switching to protected + * mode and then jumping to the common startup, _start(). + */ + bcopy_phys(kvtophys((vm_offset_t) real_mode_bootstrap_base), + (addr64_t) REAL_MODE_BOOTSTRAP_OFFSET, + real_mode_bootstrap_end-real_mode_bootstrap_base); + + /* + * Set the location at the base of the stack to point to the + * common startup entry. + */ + ml_phys_write_word( + PROT_MODE_START+REAL_MODE_BOOTSTRAP_OFFSET, + (unsigned int)kvtophys((vm_offset_t)prot_entry)); + + /* Flush caches */ + __asm__("wbinvd"); +} + diff --git a/osfmk/i386/acpi.h b/osfmk/i386/acpi.h index ec5ce3ea4..a64e8127c 100644 --- a/osfmk/i386/acpi.h +++ b/osfmk/i386/acpi.h @@ -34,15 +34,19 @@ */ /* - * Wake up code linear address - * FIXME: borrowed unused memory reserved by MP_BOOT + * Wake up code linear address. Wake and MP startup copy + * code to this physical address and then jump to the + * address started at PROT_MODE_START. Some small amount + * below PROT_MODE_START is used as scratch space */ -#define ACPI_WAKE_ADDR 0x2000 +#define PROT_MODE_START 0x800 +#define REAL_MODE_BOOTSTRAP_OFFSET 0x2000 #ifndef ASSEMBLER typedef void (*acpi_sleep_callback)(void * refcon); extern vm_offset_t acpi_install_wake_handler(void); extern void acpi_sleep_kernel(acpi_sleep_callback func, void * refcon); +void install_real_mode_bootstrap(void *prot_entry); #endif /* ASSEMBLER */ #endif /* !_I386_ACPI_H_ */ diff --git a/osfmk/i386/acpi_wakeup.s b/osfmk/i386/acpi_wakeup.s index 0d20c8063..30d40507e 100644 --- a/osfmk/i386/acpi_wakeup.s +++ b/osfmk/i386/acpi_wakeup.s @@ -37,57 +37,10 @@ .text .align 12 /* Page align for single bcopy_phys() */ -#define LJMP(segment, address) \ - .byte 0xea ;\ - .long address - EXT(acpi_wake_start) ;\ - .word segment - #define PA(addr) (addr) -/* - * acpi_wake_start - * - * The code from acpi_wake_start to acpi_wake_end is copied to - * memory below 1MB. The firmware waking vector is updated to - * point at acpi_wake_start in low memory before sleeping. - */ - -ENTRY(acpi_wake_start) - /* - * CPU woke up from sleep, and is back in real mode. - * Initialize it just enough to get back to protected mode. - */ - cli - - POSTCODE(ACPI_WAKE_START_ENTRY) - - /* set up DS to match CS */ - movw %cs, %ax - movw %ax, %ds - - /* - * Must initialize GDTR before entering protected mode. - * Use a temporary GDT that is 0 based, 4GB limit, code and data. - * Restoring the actual GDT will come later. - */ - addr16 - data16 - lgdt EXT(acpi_gdtr) - EXT(acpi_wake_start) - - /* set CR0.PE to enter protected mode */ - mov %cr0, %eax - data16 - or $(CR0_PE), %eax - mov %eax, %cr0 - - /* - * Make intra-segment jump to flush pipeline and reload CS register. - * If GDT is bogus, it will blow up here. - */ - data16 - LJMP(0x8, acpi_wake_prot + ACPI_WAKE_ADDR) - -acpi_wake_prot: +#if CONFIG_SLEEP +ENTRY(acpi_wake_prot) /* protected mode, paging disabled */ @@ -103,36 +56,6 @@ acpi_wake_prot: movl PA(saved_eip), %eax jmp *%eax -/* Segment Descriptor - * - * 31 24 19 16 7 0 - * ------------------------------------------------------------ - * | | |B| |A| | | |1|0|E|W|A| | - * | BASE 31..24 |G|/|0|V| LIMIT |P|DPL| TYPE | BASE 23:16 | - * | | |D| |L| 19..16| | |1|1|C|R|A| | - * ------------------------------------------------------------ - * | | | - * | BASE 15..0 | LIMIT 15..0 | - * | | | - * ------------------------------------------------------------ - */ -ENTRY(acpi_gdt) - .word 0, 0 /* 0x0 : null */ - .byte 0, 0, 0, 0 - - .word 0xffff, 0x0000 /* 0x8 : code */ - .byte 0, 0x9e, 0xcf, 0 - - .word 0xffff, 0x0000 /* 0x10 : data */ - .byte 0, 0x92, 0xcf, 0 - -ENTRY(acpi_gdtr) - .word 24 /* limit (8*3 segs) */ - .long EXT(acpi_gdt) - EXT(acpi_wake_start) + ACPI_WAKE_ADDR - -ENTRY(acpi_wake_end) - - /* * acpi_sleep_cpu(acpi_sleep_callback func, void * refcon) * @@ -223,7 +146,7 @@ wake_prot: movl %eax, %cr0 /* switch to kernel code segment */ - ljmpl $(KERNEL_CS), $wake_paged + ljmpl $(KERNEL32_CS), $wake_paged wake_paged: @@ -272,6 +195,29 @@ wake_restore: .globl EXT(acpi_wake_prot_entry) ENTRY(acpi_wake_prot_entry) + mov %cr0, %eax + and $(~CR0_PG), %eax + mov %eax, %cr0 + mov $EXT(IdlePDPT), %eax + mov EXT(IdlePTD), %ecx + or $(INTEL_PTE_VALID), %ecx + mov $0x0, %edx + mov %ecx, (0*8+0)(%eax) + mov %edx, (0*8+4)(%eax) + add $(PAGE_SIZE), %ecx + mov %ecx, (1*8+0)(%eax) + mov %edx, (1*8+4)(%eax) + add $(PAGE_SIZE), %ecx + mov %ecx, (2*8+0)(%eax) + mov %edx, (2*8+4)(%eax) + add $(PAGE_SIZE), %ecx + mov %ecx, (3*8+0)(%eax) + mov %edx, (3*8+4)(%eax) + mov %eax, %cr3 + mov %cr0, %eax + or $(CR0_PG), %eax + mov %eax, %cr0 + /* protected mode, paging enabled */ POSTCODE(ACPI_WAKE_PAGED_ENTRY) @@ -338,13 +284,13 @@ ENTRY(acpi_wake_prot_entry) movl $2, %eax leave - ret - - .data - .section __HIB, __data - .align 2 + ret +#endif /* CONFIG_SLEEP */ +.data +.section __SLEEP, __data +.align 2 /* * CPU registers saved across sleep/wake. diff --git a/osfmk/i386/asm.h b/osfmk/i386/asm.h index 280a8fae3..02a5620ae 100644 --- a/osfmk/i386/asm.h +++ b/osfmk/i386/asm.h @@ -72,6 +72,7 @@ #include #endif /* MACH_KERNEL || _KERNEL */ +#if defined(__i386__) #define S_PC (%esp) #define S_ARG0 4(%esp) @@ -90,6 +91,20 @@ #define B_ARG2 16(%ebp) #define B_ARG3 20(%ebp) +#elif defined(__x86_64__) + +#define S_PC (%rsp) + +#define FRAME pushq %rbp; movq %rsp, %rbp +#define EMARF leave + +#define B_LINK (%rbp) +#define B_PC 8(%rbp) + +#else +#error unsupported architecture +#endif + /* There is another definition of ALIGN for .c sources */ #ifdef ASSEMBLER #define ALIGN 4,0x90 diff --git a/osfmk/i386/asm64.h b/osfmk/i386/asm64.h index 4b4c9252a..08afac97a 100644 --- a/osfmk/i386/asm64.h +++ b/osfmk/i386/asm64.h @@ -56,7 +56,7 @@ #define ENTER_COMPAT_MODE() \ ljmp *(%rip) ;\ .long 4f ;\ - .word KERNEL_CS ;\ + .word KERNEL32_CS ;\ .code32 ;\ 4: diff --git a/osfmk/i386/bsd_i386.c b/osfmk/i386/bsd_i386.c index a870cc503..66939fac9 100644 --- a/osfmk/i386/bsd_i386.c +++ b/osfmk/i386/bsd_i386.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2007 Apple Inc. All rights reserved. + * Copyright (c) 2000-2008 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -43,34 +43,36 @@ #include #include #include +#include #include #include #include #include #include -#include #include -#include #include #include -#include #include #include #include #include -#include -#include -#include -#include #include +#include +#include +#include #include +#include #include #include #include #include #include <../bsd/sys/sysent.h> +#ifdef MACH_BSD +extern void mach_kauth_cred_uthread_update(void); +#endif + kern_return_t thread_userstack( thread_t, @@ -98,13 +100,8 @@ unsigned int get_msr_nbits(void); unsigned int get_msr_rbits(void); -kern_return_t -thread_compose_cthread_desc(unsigned int addr, pcb_t pcb); - -void IOSleep(int); extern void throttle_lowpri_io(boolean_t); -void thread_set_cthreadself(thread_t thread, uint64_t pself, int isLP64); /* * thread_userstack: @@ -205,7 +202,6 @@ thread_entrypoint( return (KERN_SUCCESS); } - /* * Duplicate parent state in child * for U**X fork. @@ -270,6 +266,7 @@ void thread_set_child(thread_t child, int pid); void thread_set_child(thread_t child, int pid) { + if (thread_is_64bit(child)) { x86_saved_state64_t *iss64; @@ -295,6 +292,7 @@ void thread_set_parent(thread_t parent, int pid); void thread_set_parent(thread_t parent, int pid) { + if (thread_is_64bit(parent)) { x86_saved_state64_t *iss64; @@ -340,6 +338,9 @@ machdep_syscall(x86_saved_state_t *state) kprintf("machdep_syscall(0x%08x) code=%d\n", regs, trapno); #endif + DEBUG_KPRINT_SYSCALL_MDEP( + "machdep_syscall: trapno=%d\n", trapno); + if (trapno < 0 || trapno >= machdep_call_count) { regs->eax = (unsigned int)kern_invalid(NULL); @@ -395,6 +396,8 @@ machdep_syscall(x86_saved_state_t *state) if (current_thread()->funnel_lock) (void) thread_funnel_set(current_thread()->funnel_lock, FALSE); + DEBUG_KPRINT_SYSCALL_MDEP("machdep_syscall: retval=%u\n", regs->eax); + throttle_lowpri_io(TRUE); thread_exception_return(); @@ -412,7 +415,10 @@ machdep_syscall64(x86_saved_state_t *state) assert(is_saved_state64(state)); regs = saved_state64(state); - trapno = regs->rax & SYSCALL_NUMBER_MASK; + trapno = (int)(regs->rax & SYSCALL_NUMBER_MASK); + + DEBUG_KPRINT_SYSCALL_MDEP( + "machdep_syscall64: trapno=%d\n", trapno); if (trapno < 0 || trapno >= machdep_call_count) { regs->rax = (unsigned int)kern_invalid(NULL); @@ -435,114 +441,76 @@ machdep_syscall64(x86_saved_state_t *state) if (current_thread()->funnel_lock) (void) thread_funnel_set(current_thread()->funnel_lock, FALSE); + DEBUG_KPRINT_SYSCALL_MDEP("machdep_syscall: retval=%llu\n", regs->rax); + throttle_lowpri_io(TRUE); thread_exception_return(); /* NOTREACHED */ } - -kern_return_t -thread_compose_cthread_desc(unsigned int addr, pcb_t pcb) -{ - struct real_descriptor desc; - - mp_disable_preemption(); - - desc.limit_low = 1; - desc.limit_high = 0; - desc.base_low = addr & 0xffff; - desc.base_med = (addr >> 16) & 0xff; - desc.base_high = (addr >> 24) & 0xff; - desc.access = ACC_P|ACC_PL_U|ACC_DATA_W; - desc.granularity = SZ_32|SZ_G; - pcb->cthread_desc = desc; - *ldt_desc_p(USER_CTHREAD) = desc; - - mp_enable_preemption(); - - return(KERN_SUCCESS); -} - -kern_return_t -thread_set_cthread_self(uint32_t self) -{ - current_thread()->machine.pcb->cthread_self = (uint64_t) self; - - return (KERN_SUCCESS); -} - -kern_return_t -thread_get_cthread_self(void) -{ - return ((kern_return_t)current_thread()->machine.pcb->cthread_self); -} - +/* + * thread_fast_set_cthread_self: Sets the machine kernel thread ID of the + * current thread to the given thread ID; fast version for 32-bit processes + * + * Parameters: self Thread ID to set + * + * Returns: 0 Success + * !0 Not success + */ kern_return_t thread_fast_set_cthread_self(uint32_t self) { - pcb_t pcb; - x86_saved_state32_t *iss; - - pcb = (pcb_t)current_thread()->machine.pcb; - thread_compose_cthread_desc(self, pcb); - pcb->cthread_self = (uint64_t) self; /* preserve old func too */ - iss = saved_state32(pcb->iss); - iss->gs = USER_CTHREAD; + thread_t thread = current_thread(); + pcb_t pcb = thread->machine.pcb; + struct real_descriptor desc = { + .limit_low = 1, + .limit_high = 0, + .base_low = self & 0xffff, + .base_med = (self >> 16) & 0xff, + .base_high = (self >> 24) & 0xff, + .access = ACC_P|ACC_PL_U|ACC_DATA_W, + .granularity = SZ_32|SZ_G, + }; + + current_thread()->machine.pcb->cthread_self = (uint64_t) self; /* preserve old func too */ + + /* assign descriptor */ + mp_disable_preemption(); + pcb->cthread_desc = desc; + *ldt_desc_p(USER_CTHREAD) = desc; + saved_state32(pcb->iss)->gs = USER_CTHREAD; + mp_enable_preemption(); return (USER_CTHREAD); } -void -thread_set_cthreadself(thread_t thread, uint64_t pself, int isLP64) -{ - if (isLP64 == 0) { - pcb_t pcb; - x86_saved_state32_t *iss; - - pcb = (pcb_t)thread->machine.pcb; - thread_compose_cthread_desc(pself, pcb); - pcb->cthread_self = (uint64_t) pself; /* preserve old func too */ - iss = saved_state32(pcb->iss); - iss->gs = USER_CTHREAD; - } else { - pcb_t pcb; - x86_saved_state64_t *iss; - - pcb = thread->machine.pcb; - - /* check for canonical address, set 0 otherwise */ - if (!IS_USERADDR64_CANONICAL(pself)) - pself = 0ULL; - pcb->cthread_self = pself; - - /* XXX for 64-in-32 */ - iss = saved_state64(pcb->iss); - iss->gs = USER_CTHREAD; - thread_compose_cthread_desc((uint32_t) pself, pcb); - } -} - - +/* + * thread_fast_set_cthread_self64: Sets the machine kernel thread ID of the + * current thread to the given thread ID; fast version for 64-bit processes + * + * Parameters: self Thread ID + * + * Returns: 0 Success + * !0 Not success + */ kern_return_t thread_fast_set_cthread_self64(uint64_t self) { - pcb_t pcb; - x86_saved_state64_t *iss; - - pcb = current_thread()->machine.pcb; + pcb_t pcb = current_thread()->machine.pcb; /* check for canonical address, set 0 otherwise */ if (!IS_USERADDR64_CANONICAL(self)) self = 0ULL; + pcb->cthread_self = self; + mp_disable_preemption(); +#if defined(__x86_64__) + if (current_cpu_datap()->cpu_uber.cu_user_gs_base != self) + wrmsr64(MSR_IA32_KERNEL_GS_BASE, self); +#endif current_cpu_datap()->cpu_uber.cu_user_gs_base = self; - - /* XXX for 64-in-32 */ - iss = saved_state64(pcb->iss); - iss->gs = USER_CTHREAD; - thread_compose_cthread_desc((uint32_t) self, pcb); - + mp_enable_preemption(); return (USER_CTHREAD); } @@ -669,6 +637,8 @@ mach_call_arg_munger32(uint32_t sp, int nargs, int call_number, struct mach_call __private_extern__ void mach_call_munger(x86_saved_state_t *state); +extern const char *mach_syscall_name_table[]; + void mach_call_munger(x86_saved_state_t *state) { @@ -683,6 +653,10 @@ mach_call_munger(x86_saved_state_t *state) regs = saved_state32(state); call_number = -(regs->eax); + + DEBUG_KPRINT_SYSCALL_MACH( + "mach_call_munger: code=%d(%s)\n", + call_number, mach_syscall_name_table[call_number]); #if DEBUG_TRACE kprintf("mach_call_munger(0x%08x) code=%d\n", regs, call_number); #endif @@ -694,6 +668,8 @@ mach_call_munger(x86_saved_state_t *state) mach_call = (mach_call_t)mach_trap_table[call_number].mach_trap_function; if (mach_call == (mach_call_t)kern_invalid) { + DEBUG_KPRINT_SYSCALL_MACH( + "mach_call_munger: kern_invalid 0x%x\n", regs->eax); i386_exception(EXC_SYSCALL, call_number, 1); /* NOTREACHED */ } @@ -704,15 +680,24 @@ mach_call_munger(x86_saved_state_t *state) if (retval != KERN_SUCCESS) { regs->eax = retval; + DEBUG_KPRINT_SYSCALL_MACH( + "mach_call_munger: retval=0x%x\n", retval); + thread_exception_return(); /* NOTREACHED */ } } + +#ifdef MACH_BSD + mach_kauth_cred_uthread_update(); +#endif KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_EXCP_SC, (call_number)) | DBG_FUNC_START, - (int) args.arg1, (int) args.arg2, (int) args.arg3, (int) args.arg4, 0); + args.arg1, args.arg2, args.arg3, args.arg4, 0); retval = mach_call(&args); + DEBUG_KPRINT_SYSCALL_MACH("mach_call_munger: retval=0x%x\n", retval); + KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_EXCP_SC,(call_number)) | DBG_FUNC_END, retval, 0, 0, 0, 0); regs->eax = retval; @@ -737,12 +722,16 @@ mach_call_munger64(x86_saved_state_t *state) assert(is_saved_state64(state)); regs = saved_state64(state); - call_number = regs->rax & SYSCALL_NUMBER_MASK; + call_number = (int)(regs->rax & SYSCALL_NUMBER_MASK); + + DEBUG_KPRINT_SYSCALL_MACH( + "mach_call_munger64: code=%d(%s)\n", + call_number, mach_syscall_name_table[call_number]); KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_EXCP_SC, (call_number)) | DBG_FUNC_START, - (int) regs->rdi, (int) regs->rsi, - (int) regs->rdx, (int) regs->r10, 0); + regs->rdi, regs->rsi, + regs->rdx, regs->r10, 0); if (call_number < 0 || call_number >= mach_trap_count) { i386_exception(EXC_SYSCALL, regs->rax, 1); @@ -759,7 +748,7 @@ mach_call_munger64(x86_saved_state_t *state) if (argc > 6) { int copyin_count; - copyin_count = (argc - 6) * sizeof(uint64_t); + copyin_count = (argc - 6) * (int)sizeof(uint64_t); if (copyin((user_addr_t)(regs->isf.rsp + sizeof(user_addr_t)), (char *)®s->v_arg6, copyin_count)) { regs->rax = KERN_INVALID_ARGUMENT; @@ -768,11 +757,18 @@ mach_call_munger64(x86_saved_state_t *state) /* NOTREACHED */ } } + +#ifdef MACH_BSD + mach_kauth_cred_uthread_update(); +#endif + regs->rax = (uint64_t)mach_call((void *)(®s->rdi)); + DEBUG_KPRINT_SYSCALL_MACH( "mach_call_munger64: retval=0x%llx\n", regs->rax); + KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_EXCP_SC, (call_number)) | DBG_FUNC_END, - (int)regs->rax, 0, 0, 0, 0); + regs->rax, 0, 0, 0, 0); throttle_lowpri_io(TRUE); @@ -803,7 +799,7 @@ thread_setuserstack( iss32 = USER_REGS32(thread); - iss32->uesp = CAST_DOWN(unsigned int, user_stack); + iss32->uesp = CAST_DOWN_EXPLICIT(unsigned int, user_stack); } } @@ -857,7 +853,7 @@ thread_setentrypoint(thread_t thread, mach_vm_address_t entry) iss32 = USER_REGS32(thread); - iss32->eip = CAST_DOWN(unsigned int, entry); + iss32->eip = CAST_DOWN_EXPLICIT(unsigned int, entry); } } @@ -879,8 +875,12 @@ thread_setsinglestep(thread_t thread, int on) iss32 = USER_REGS32(thread); - if (on) + if (on) { iss32->efl |= EFL_TF; + /* Ensure IRET */ + if (iss32->cs == SYSENTER_CS) + iss32->cs = SYSENTER_TF_CS; + } else iss32->efl &= ~EFL_TF; } @@ -913,9 +913,9 @@ get_user_regs(thread_t th) * DTrace would like to have a peek at the kernel interrupt state, if available. * Based on osfmk/chud/i386/chud_thread_i386.c:chudxnu_thread_get_state(), which see. */ -x86_saved_state32_t *find_kern_regs(thread_t); +x86_saved_state_t *find_kern_regs(thread_t); -x86_saved_state32_t * +x86_saved_state_t * find_kern_regs(thread_t thread) { if (thread == current_thread() && @@ -923,7 +923,7 @@ find_kern_regs(thread_t thread) !(USER_STATE(thread) == current_cpu_datap()->cpu_int_state && current_cpu_datap()->cpu_interrupt_level == 1)) { - return saved_state32(current_cpu_datap()->cpu_int_state); + return current_cpu_datap()->cpu_int_state; } else { return NULL; } diff --git a/osfmk/i386/bzero.s b/osfmk/i386/bzero.s index 28bdf2217..034a6469c 100644 --- a/osfmk/i386/bzero.s +++ b/osfmk/i386/bzero.s @@ -87,7 +87,7 @@ ENTRY(memset) ret /* - * void bzero(char * addr, unsigned int length) + * void bzero(char * addr, size_t length) */ Entry(blkclr) ENTRY(bzero) diff --git a/osfmk/i386/commpage/atomic.s b/osfmk/i386/commpage/atomic.s index a8d6a6b52..769698b0f 100644 --- a/osfmk/i386/commpage/atomic.s +++ b/osfmk/i386/commpage/atomic.s @@ -32,9 +32,6 @@ /* OSAtomic.h library native implementations. */ - .text - .align 2, 0x90 - // This is a regparm(3) subroutine used by: // bool OSAtomicCompareAndSwap32( int32_t old, int32_t new, int32_t *value); @@ -53,20 +50,18 @@ // TODO: move the .long onto a separate page to reduce icache pollution (?) -Lcompare_and_swap32_mp: +COMMPAGE_FUNCTION_START(compare_and_swap32_mp, 32, 4) .long _COMM_PAGE_COMPARE_AND_SWAP32+4 lock cmpxchgl %edx, (%ecx) ret +COMMPAGE_DESCRIPTOR(compare_and_swap32_mp,_COMM_PAGE_COMPARE_AND_SWAP32,0,kUP) - COMMPAGE_DESCRIPTOR(compare_and_swap32_mp,_COMM_PAGE_COMPARE_AND_SWAP32,0,kUP) - -Lcompare_and_swap32_up: +COMMPAGE_FUNCTION_START(compare_and_swap32_up, 32, 4) .long _COMM_PAGE_COMPARE_AND_SWAP32+4 cmpxchgl %edx, (%ecx) ret - - COMMPAGE_DESCRIPTOR(compare_and_swap32_up,_COMM_PAGE_COMPARE_AND_SWAP32,kUP,0) +COMMPAGE_DESCRIPTOR(compare_and_swap32_up,_COMM_PAGE_COMPARE_AND_SWAP32,kUP,0) // This is a subroutine used by: // bool OSAtomicCompareAndSwap64( int64_t old, int64_t new, int64_t *value); @@ -75,20 +70,18 @@ Lcompare_and_swap32_up: // on success: returns with ZF set // on failure: returns with *value in %eax/%edx, ZF clear -Lcompare_and_swap64_mp: +COMMPAGE_FUNCTION_START(compare_and_swap64_mp, 32, 4) .long _COMM_PAGE_COMPARE_AND_SWAP64+4 lock cmpxchg8b (%esi) ret +COMMPAGE_DESCRIPTOR(compare_and_swap64_mp,_COMM_PAGE_COMPARE_AND_SWAP64,0,kUP) - COMMPAGE_DESCRIPTOR(compare_and_swap64_mp,_COMM_PAGE_COMPARE_AND_SWAP64,0,kUP) - -Lcompare_and_swap64_up: +COMMPAGE_FUNCTION_START(compare_and_swap64_up, 32, 4) .long _COMM_PAGE_COMPARE_AND_SWAP64+4 cmpxchg8b (%esi) ret - - COMMPAGE_DESCRIPTOR(compare_and_swap64_up,_COMM_PAGE_COMPARE_AND_SWAP64,kUP,0) +COMMPAGE_DESCRIPTOR(compare_and_swap64_up,_COMM_PAGE_COMPARE_AND_SWAP64,kUP,0) // This is a subroutine used by: // bool OSAtomicTestAndSet( uint32_t n, void *value ); @@ -96,20 +89,18 @@ Lcompare_and_swap64_up: // Returns: old value of bit in CF -Lbit_test_and_set_mp: +COMMPAGE_FUNCTION_START(bit_test_and_set_mp, 32, 4) .long _COMM_PAGE_BTS+4 lock btsl %eax, (%edx) ret +COMMPAGE_DESCRIPTOR(bit_test_and_set_mp,_COMM_PAGE_BTS,0,kUP) - COMMPAGE_DESCRIPTOR(bit_test_and_set_mp,_COMM_PAGE_BTS,0,kUP) - -Lbit_test_and_set_up: +COMMPAGE_FUNCTION_START(bit_test_and_set_up, 32, 4) .long _COMM_PAGE_BTS+4 btsl %eax, (%edx) ret - - COMMPAGE_DESCRIPTOR(bit_test_and_set_up,_COMM_PAGE_BTS,kUP,0) +COMMPAGE_DESCRIPTOR(bit_test_and_set_up,_COMM_PAGE_BTS,kUP,0) // This is a subroutine used by: // bool OSAtomicTestAndClear( uint32_t n, void *value ); @@ -117,20 +108,18 @@ Lbit_test_and_set_up: // Returns: old value of bit in CF -Lbit_test_and_clear_mp: +COMMPAGE_FUNCTION_START(bit_test_and_clear_mp, 32, 4) .long _COMM_PAGE_BTC+4 lock btrl %eax, (%edx) ret +COMMPAGE_DESCRIPTOR(bit_test_and_clear_mp,_COMM_PAGE_BTC,0,kUP) - COMMPAGE_DESCRIPTOR(bit_test_and_clear_mp,_COMM_PAGE_BTC,0,kUP) - -Lbit_test_and_clear_up: +COMMPAGE_FUNCTION_START(bit_test_and_clear_up, 32, 4) .long _COMM_PAGE_BTC+4 btrl %eax, (%edx) ret - - COMMPAGE_DESCRIPTOR(bit_test_and_clear_up,_COMM_PAGE_BTC,kUP,0) +COMMPAGE_DESCRIPTOR(bit_test_and_clear_up,_COMM_PAGE_BTC,kUP,0) // This is a subroutine used by: // int32_t OSAtomicAdd32( int32_t amt, int32_t *value ); @@ -139,38 +128,34 @@ Lbit_test_and_clear_up: // Returns: old value in %eax // NB: OSAtomicAdd32 returns the new value, so clients will add amt to %eax -Latomic_add32_mp: +COMMPAGE_FUNCTION_START(atomic_add32_mp, 32, 4) .long _COMM_PAGE_ATOMIC_ADD32+4 lock xaddl %eax, (%edx) ret - - COMMPAGE_DESCRIPTOR(atomic_add32_mp,_COMM_PAGE_ATOMIC_ADD32,0,kUP) +COMMPAGE_DESCRIPTOR(atomic_add32_mp,_COMM_PAGE_ATOMIC_ADD32,0,kUP) -Latomic_add32_up: +COMMPAGE_FUNCTION_START(atomic_add32_up, 32, 4) .long _COMM_PAGE_ATOMIC_ADD32+4 xaddl %eax, (%edx) ret - - COMMPAGE_DESCRIPTOR(atomic_add32_up,_COMM_PAGE_ATOMIC_ADD32,kUP,0) +COMMPAGE_DESCRIPTOR(atomic_add32_up,_COMM_PAGE_ATOMIC_ADD32,kUP,0) // OSMemoryBarrier() // These are used both in 32 and 64-bit mode. We use a fence even on UP // machines, so this function can be used with nontemporal stores. -Lmemory_barrier: +COMMPAGE_FUNCTION_START(memory_barrier, 32, 4) lock addl $0,(%esp) ret - - COMMPAGE_DESCRIPTOR(memory_barrier,_COMM_PAGE_MEMORY_BARRIER,0,kHasSSE2); +COMMPAGE_DESCRIPTOR(memory_barrier,_COMM_PAGE_MEMORY_BARRIER,0,kHasSSE2); -Lmemory_barrier_sse2: +COMMPAGE_FUNCTION_START(memory_barrier_sse2, 32, 4) mfence ret - - COMMPAGE_DESCRIPTOR(memory_barrier_sse2,_COMM_PAGE_MEMORY_BARRIER,kHasSSE2,0); +COMMPAGE_DESCRIPTOR(memory_barrier_sse2,_COMM_PAGE_MEMORY_BARRIER,kHasSSE2,0); /* @@ -182,7 +167,7 @@ Lmemory_barrier_sse2: * void OSAtomicEnqueue( OSQueueHead *list, void *new, size_t offset); */ -LAtomicEnqueue: +COMMPAGE_FUNCTION_START(AtomicEnqueue, 32, 4) pushl %edi pushl %esi pushl %ebx @@ -202,13 +187,12 @@ LAtomicEnqueue: popl %esi popl %edi ret - - COMMPAGE_DESCRIPTOR(AtomicEnqueue,_COMM_PAGE_ENQUEUE,0,0) +COMMPAGE_DESCRIPTOR(AtomicEnqueue,_COMM_PAGE_ENQUEUE,0,0) /* void* OSAtomicDequeue( OSQueueHead *list, size_t offset); */ -LAtomicDequeue: +COMMPAGE_FUNCTION_START(AtomicDequeue, 32, 4) pushl %edi pushl %esi pushl %ebx @@ -230,8 +214,7 @@ LAtomicDequeue: popl %esi popl %edi ret // ptr to 1st element in Q still in %eax - - COMMPAGE_DESCRIPTOR(AtomicDequeue,_COMM_PAGE_DEQUEUE,0,0) +COMMPAGE_DESCRIPTOR(AtomicDequeue,_COMM_PAGE_DEQUEUE,0,0) @@ -251,22 +234,18 @@ LAtomicDequeue: // on success: returns with ZF set // on failure: returns with *value in %eax, ZF clear - .code64 -Lcompare_and_swap32_mp_64: +COMMPAGE_FUNCTION_START(compare_and_swap32_mp_64, 64, 4) movl %edi,%eax // put old value where "cmpxchg" wants it lock cmpxchgl %esi, (%rdx) ret +COMMPAGE_DESCRIPTOR(compare_and_swap32_mp_64,_COMM_PAGE_COMPARE_AND_SWAP32,0,kUP) - COMMPAGE_DESCRIPTOR(compare_and_swap32_mp_64,_COMM_PAGE_COMPARE_AND_SWAP32,0,kUP) - - .code64 -Lcompare_and_swap32_up_64: +COMMPAGE_FUNCTION_START(compare_and_swap32_up_64, 64, 4) movl %edi,%eax // put old value where "cmpxchg" wants it cmpxchgl %esi, (%rdx) ret - - COMMPAGE_DESCRIPTOR(compare_and_swap32_up_64,_COMM_PAGE_COMPARE_AND_SWAP32,kUP,0) +COMMPAGE_DESCRIPTOR(compare_and_swap32_up_64,_COMM_PAGE_COMPARE_AND_SWAP32,kUP,0) // This is a subroutine used by: // bool OSAtomicCompareAndSwap64( int64_t old, int64_t new, int64_t *value); @@ -277,22 +256,18 @@ Lcompare_and_swap32_up_64: // on success: returns with ZF set // on failure: returns with *value in %rax, ZF clear - .code64 -Lcompare_and_swap64_mp_64: +COMMPAGE_FUNCTION_START(compare_and_swap64_mp_64, 64, 4) movq %rdi,%rax // put old value where "cmpxchg" wants it lock cmpxchgq %rsi, (%rdx) ret +COMMPAGE_DESCRIPTOR(compare_and_swap64_mp_64,_COMM_PAGE_COMPARE_AND_SWAP64,0,kUP) - COMMPAGE_DESCRIPTOR(compare_and_swap64_mp_64,_COMM_PAGE_COMPARE_AND_SWAP64,0,kUP) - - .code64 -Lcompare_and_swap64_up_64: +COMMPAGE_FUNCTION_START(compare_and_swap64_up_64, 64, 4) movq %rdi,%rax // put old value where "cmpxchg" wants it cmpxchgq %rsi, (%rdx) ret - - COMMPAGE_DESCRIPTOR(compare_and_swap64_up_64,_COMM_PAGE_COMPARE_AND_SWAP64,kUP,0) +COMMPAGE_DESCRIPTOR(compare_and_swap64_up_64,_COMM_PAGE_COMPARE_AND_SWAP64,kUP,0) // This is a subroutine used by: // bool OSAtomicTestAndSet( uint32_t n, void *value ); @@ -301,20 +276,16 @@ Lcompare_and_swap64_up_64: // value = %rsi // Returns: old value of bit in CF - .code64 -Lbit_test_and_set_mp_64: +COMMPAGE_FUNCTION_START(bit_test_and_set_mp_64, 64, 4) lock btsl %edi, (%rsi) ret +COMMPAGE_DESCRIPTOR(bit_test_and_set_mp_64,_COMM_PAGE_BTS,0,kUP) - COMMPAGE_DESCRIPTOR(bit_test_and_set_mp_64,_COMM_PAGE_BTS,0,kUP) - - .code64 -Lbit_test_and_set_up_64: +COMMPAGE_FUNCTION_START(bit_test_and_set_up_64, 64, 4) btsl %edi, (%rsi) ret - - COMMPAGE_DESCRIPTOR(bit_test_and_set_up_64,_COMM_PAGE_BTS,kUP,0) +COMMPAGE_DESCRIPTOR(bit_test_and_set_up_64,_COMM_PAGE_BTS,kUP,0) // This is a subroutine used by: // bool OSAtomicTestAndClear( uint32_t n, void *value ); @@ -323,20 +294,16 @@ Lbit_test_and_set_up_64: // value = %rsi // Returns: old value of bit in CF - .code64 -Lbit_test_and_clear_mp_64: +COMMPAGE_FUNCTION_START(bit_test_and_clear_mp_64, 64, 4) lock btrl %edi, (%rsi) ret +COMMPAGE_DESCRIPTOR(bit_test_and_clear_mp_64,_COMM_PAGE_BTC,0,kUP) - COMMPAGE_DESCRIPTOR(bit_test_and_clear_mp_64,_COMM_PAGE_BTC,0,kUP) - - .code64 -Lbit_test_and_clear_up_64: +COMMPAGE_FUNCTION_START(bit_test_and_clear_up_64, 64, 4) btrl %edi, (%rsi) ret - - COMMPAGE_DESCRIPTOR(bit_test_and_clear_up_64,_COMM_PAGE_BTC,kUP,0) +COMMPAGE_DESCRIPTOR(bit_test_and_clear_up_64,_COMM_PAGE_BTC,kUP,0) // This is a subroutine used by: // int32_t OSAtomicAdd32( int32_t amt, int32_t *value ); @@ -346,20 +313,16 @@ Lbit_test_and_clear_up_64: // Returns: old value in %edi // NB: OSAtomicAdd32 returns the new value, so clients will add amt to %edi - .code64 -Latomic_add32_mp_64: +COMMPAGE_FUNCTION_START(atomic_add32_mp_64, 64, 4) lock xaddl %edi, (%rsi) ret - - COMMPAGE_DESCRIPTOR(atomic_add32_mp_64,_COMM_PAGE_ATOMIC_ADD32,0,kUP) +COMMPAGE_DESCRIPTOR(atomic_add32_mp_64,_COMM_PAGE_ATOMIC_ADD32,0,kUP) - .code64 -Latomic_add32_up_64: +COMMPAGE_FUNCTION_START(atomic_add32_up_64, 64, 4) xaddl %edi, (%rsi) ret - - COMMPAGE_DESCRIPTOR(atomic_add32_up_64,_COMM_PAGE_ATOMIC_ADD32,kUP,0) +COMMPAGE_DESCRIPTOR(atomic_add32_up_64,_COMM_PAGE_ATOMIC_ADD32,kUP,0) // This is a subroutine used by: // int64_t OSAtomicAdd64( int64_t amt, int64_t *value ); @@ -369,20 +332,16 @@ Latomic_add32_up_64: // Returns: old value in %rdi // NB: OSAtomicAdd64 returns the new value, so clients will add amt to %rdi - .code64 -Latomic_add64_mp_64: +COMMPAGE_FUNCTION_START(atomic_add64_mp_64, 64, 4) lock xaddq %rdi, (%rsi) ret - - COMMPAGE_DESCRIPTOR(atomic_add64_mp_64,_COMM_PAGE_ATOMIC_ADD64,0,kUP) +COMMPAGE_DESCRIPTOR(atomic_add64_mp_64,_COMM_PAGE_ATOMIC_ADD64,0,kUP) - .code64 -Latomic_add64_up_64: +COMMPAGE_FUNCTION_START(atomic_add64_up_64, 64, 4) xaddq %rdi, (%rsi) ret - - COMMPAGE_DESCRIPTOR(atomic_add64_up_64,_COMM_PAGE_ATOMIC_ADD64,kUP,0) +COMMPAGE_DESCRIPTOR(atomic_add64_up_64,_COMM_PAGE_ATOMIC_ADD64,kUP,0) /* @@ -394,8 +353,9 @@ Latomic_add64_up_64: * void OSAtomicEnqueue( OSQueueHead *list, void *new, size_t offset); */ - .code64 -LAtomicEnqueue_64: // %rdi == list head, %rsi == new, %rdx == offset +// %rdi == list head, %rsi == new, %rdx == offset + +COMMPAGE_FUNCTION_START(AtomicEnqueue_64, 64, 4) pushq %rbx movq %rsi,%rbx // %rbx == new movq %rdx,%rsi // %rsi == offset @@ -410,14 +370,14 @@ LAtomicEnqueue_64: // %rdi == list head, %rsi == new, %rdx == offset jnz 1b popq %rbx ret - - COMMPAGE_DESCRIPTOR(AtomicEnqueue_64,_COMM_PAGE_ENQUEUE,0,0) +COMMPAGE_DESCRIPTOR(AtomicEnqueue_64,_COMM_PAGE_ENQUEUE,0,0) /* void* OSAtomicDequeue( OSQueueHead *list, size_t offset); */ - .code64 -LAtomicDequeue_64: // %rdi == list head, %rsi == offset +// %rdi == list head, %rsi == offset + +COMMPAGE_FUNCTION_START(AtomicDequeue_64, 64, 4) pushq %rbx movq (%rdi),%rax // %rax == ptr to 1st element in Q movq 8(%rdi),%rdx // %rdx == current generation count @@ -433,5 +393,4 @@ LAtomicDequeue_64: // %rdi == list head, %rsi == offset 2: popq %rbx ret // ptr to 1st element in Q still in %rax - - COMMPAGE_DESCRIPTOR(AtomicDequeue_64,_COMM_PAGE_DEQUEUE,0,0) +COMMPAGE_DESCRIPTOR(AtomicDequeue_64,_COMM_PAGE_DEQUEUE,0,0) diff --git a/osfmk/i386/commpage/bcopy_scalar.s b/osfmk/i386/commpage/bcopy_scalar.s index dfedd73b3..f87242ac6 100644 --- a/osfmk/i386/commpage/bcopy_scalar.s +++ b/osfmk/i386/commpage/bcopy_scalar.s @@ -71,9 +71,7 @@ * ws@tools.de (Wolfgang Solfrank, TooLs GmbH) +49-228-985800 */ -.text -.align 5, 0x90 -Lbcopy_scalar: +COMMPAGE_FUNCTION_START(bcopy_scalar, 32, 5) pushl %ebp /* set up a frame for backtraces */ movl %esp,%ebp pushl %esi @@ -82,9 +80,9 @@ Lbcopy_scalar: movl 12(%ebp),%edi jmp 1f /* -** These need to be 32 bytes from Lbcopy_scalar -*/ -.align 5, 0x90 + * These need to be 32 bytes from Lbcopy_scalar + */ + .align 5, 0x90 Lmemcpy_scalar: Lmemmove_scalar: pushl %ebp /* set up a frame for backtraces */ @@ -135,4 +133,4 @@ Lmemmove_scalar: cld ret - COMMPAGE_DESCRIPTOR(bcopy_scalar,_COMM_PAGE_BCOPY,0,kHasSSE2+kHasSupplementalSSE3) +COMMPAGE_DESCRIPTOR(bcopy_scalar,_COMM_PAGE_BCOPY,0,kHasSSE2+kHasSupplementalSSE3) diff --git a/osfmk/i386/commpage/bcopy_sse2.s b/osfmk/i386/commpage/bcopy_sse2.s index 5e5fa35c6..9e19b3892 100644 --- a/osfmk/i386/commpage/bcopy_sse2.s +++ b/osfmk/i386/commpage/bcopy_sse2.s @@ -44,9 +44,7 @@ // void bcopy(const void *src, void *dst, size_t len); - .text - .align 5, 0x90 -Lbcopy_sse2: // void bcopy(const void *src, void *dst, size_t len) +COMMPAGE_FUNCTION_START(bcopy_sse2, 32, 5) pushl %ebp // set up a frame for backtraces movl %esp,%ebp pushl %esi @@ -472,5 +470,4 @@ LReverseUnalignedLoop: // loop over 64-byte chunks jmp LReverseShort // copy remaining 0..63 bytes and done - - COMMPAGE_DESCRIPTOR(bcopy_sse2,_COMM_PAGE_BCOPY,kHasSSE2+kCache64,kHasSupplementalSSE3) +COMMPAGE_DESCRIPTOR(bcopy_sse2,_COMM_PAGE_BCOPY,kHasSSE2+kCache64,kHasSupplementalSSE3) diff --git a/osfmk/i386/commpage/bcopy_sse3x.s b/osfmk/i386/commpage/bcopy_sse3x.s index 8e42ba042..017895aab 100644 --- a/osfmk/i386/commpage/bcopy_sse3x.s +++ b/osfmk/i386/commpage/bcopy_sse3x.s @@ -40,13 +40,10 @@ #define kVeryLong (500*1024) // large enough for non-temporal stores (must be >= 8192) #define kFastUCode ((16*1024)-15) // cutoff for microcode fastpath for "rep/movsl" - // void bcopy(const void *src, void *dst, size_t len); - .text - .align 5, 0x90 +COMMPAGE_FUNCTION_START(bcopy_sse3x, 32, 5) LZero: -Lbcopy_sse3x: // void bcopy(const void *src, void *dst, size_t len) pushl %ebp // set up a frame for backtraces movl %esp,%ebp pushl %esi @@ -166,27 +163,49 @@ LDestAligned: addl %edx,%esi // point to 1st byte not copied addl %edx,%edi negl %edx // now generate offset to 1st byte to be copied - movl (_COMM_PAGE_BCOPY+LTable-LZero)(,%eax,4),%eax +.set LTableOffset, LTable - LZero + leal (LTableOffset)(,%eax,4), %eax // load jump table entry address, relative to LZero + movl _COMM_PAGE_BCOPY(%eax), %eax // load jump table entry + addl $(_COMM_PAGE_BCOPY), %eax // add runtime address of LZero to get final function jmp *%eax .align 2 LTable: // table of copy loop addresses - .long LMod0 + _COMM_PAGE_BCOPY - LZero - .long LMod1 + _COMM_PAGE_BCOPY - LZero - .long LMod2 + _COMM_PAGE_BCOPY - LZero - .long LMod3 + _COMM_PAGE_BCOPY - LZero - .long LMod4 + _COMM_PAGE_BCOPY - LZero - .long LMod5 + _COMM_PAGE_BCOPY - LZero - .long LMod6 + _COMM_PAGE_BCOPY - LZero - .long LMod7 + _COMM_PAGE_BCOPY - LZero - .long LMod8 + _COMM_PAGE_BCOPY - LZero - .long LMod9 + _COMM_PAGE_BCOPY - LZero - .long LMod10 + _COMM_PAGE_BCOPY - LZero - .long LMod11 + _COMM_PAGE_BCOPY - LZero - .long LMod12 + _COMM_PAGE_BCOPY - LZero - .long LMod13 + _COMM_PAGE_BCOPY - LZero - .long LMod14 + _COMM_PAGE_BCOPY - LZero - .long LMod15 + _COMM_PAGE_BCOPY - LZero +// force generation of assembly-time constants. Otherwise assembler +// creates subtractor relocations relative to first external symbol, +// and this file has none +.set LMod0Offset, LMod0 - LZero +.set LMod1Offset, LMod1 - LZero +.set LMod2Offset, LMod2 - LZero +.set LMod3Offset, LMod3 - LZero +.set LMod4Offset, LMod4 - LZero +.set LMod5Offset, LMod5 - LZero +.set LMod6Offset, LMod6 - LZero +.set LMod7Offset, LMod7 - LZero +.set LMod8Offset, LMod8 - LZero +.set LMod9Offset, LMod9 - LZero +.set LMod10Offset, LMod10 - LZero +.set LMod11Offset, LMod11 - LZero +.set LMod12Offset, LMod12 - LZero +.set LMod13Offset, LMod13 - LZero +.set LMod14Offset, LMod14 - LZero +.set LMod15Offset, LMod15 - LZero + .long LMod0Offset + .long LMod1Offset + .long LMod2Offset + .long LMod3Offset + .long LMod4Offset + .long LMod5Offset + .long LMod6Offset + .long LMod7Offset + .long LMod8Offset + .long LMod9Offset + .long LMod10Offset + .long LMod11Offset + .long LMod12Offset + .long LMod13Offset + .long LMod14Offset + .long LMod15Offset // Very long forward moves. These are at least several pages. They are special cased @@ -801,5 +820,4 @@ LReverseUnalignedLoop: // loop over 64-byte chunks jmp LReverseShort // copy remaining 0..63 bytes and done - - COMMPAGE_DESCRIPTOR(bcopy_sse3x,_COMM_PAGE_BCOPY,kHasSSE2+kHasSupplementalSSE3+kCache64,kHasSSE4_2) +COMMPAGE_DESCRIPTOR(bcopy_sse3x,_COMM_PAGE_BCOPY,kHasSSE2+kHasSupplementalSSE3+kCache64,kHasSSE4_2) diff --git a/osfmk/i386/commpage/bcopy_sse3x_64.s b/osfmk/i386/commpage/bcopy_sse3x_64.s index 53f4ed76a..2a0e46be9 100644 --- a/osfmk/i386/commpage/bcopy_sse3x_64.s +++ b/osfmk/i386/commpage/bcopy_sse3x_64.s @@ -40,14 +40,10 @@ #define kVeryLong (500*1024) // large enough for non-temporal stores (>=8192 and <2GB) #define kFastUCode ((16*1024)-15) // cutoff for microcode fastpath for "rep/movsl" - // void bcopy(const void *src, void *dst, size_t len); - .text - .code64 - .align 5, 0x90 +COMMPAGE_FUNCTION_START(bcopy_sse3x_64, 64, 5) LZero: -Lbcopy_sse3x_64: // void bcopy(const void *src, void *dst, size_t len) pushq %rbp // set up a frame for backtraces movq %rsp,%rbp movq %rsi,%rax // copy dest ptr @@ -151,10 +147,19 @@ LDestAligned: andl $63,%edx // get remaining bytes for LShort andl $15,%eax // mask to low 4 bits of source address andq $-64,%rcx // get number of bytes we will copy in inner loop -// We'd like to use lea with rip-relative addressing, but cannot in a .code64 block. -// lea LTable(%rip),%r8 // point to dispatch table - movq $(_COMM_PAGE_32_TO_64(_COMM_PAGE_BCOPY)),%r8 // work around 4586528 - addq $(LTable-LZero),%r8 // work around 4586528 +// We'd like to use lea with rip-relative addressing, but cannot in a .code64 block in +// a 32-bit object file (4586528). Generate the leaq opcode manually. +#if defined(__i386__) + .byte 0x4c + .byte 0x8d + .byte 0x05 + .long LTable-LRIP +LRIP: +#elif defined(__x86_64__) + leaq LTable(%rip), %r8 +#else +#error Unsupported architecture +#endif addq %rcx,%rsi // point to 1st byte not copied addq %rcx,%rdi movl (%r8,%rax,4),%eax // get offset of routine @@ -164,22 +169,41 @@ LDestAligned: .align 2 LTable: // table of copy loop addresses - .long (LMod0 - LTable) - .long (LMod1 - LTable) - .long (LMod2 - LTable) - .long (LMod3 - LTable) - .long (LMod4 - LTable) - .long (LMod5 - LTable) - .long (LMod6 - LTable) - .long (LMod7 - LTable) - .long (LMod8 - LTable) - .long (LMod9 - LTable) - .long (LMod10 - LTable) - .long (LMod11 - LTable) - .long (LMod12 - LTable) - .long (LMod13 - LTable) - .long (LMod14 - LTable) - .long (LMod15 - LTable) +// force generation of assembly-time constants. Otherwise assembler +// creates subtractor relocations relative to first external symbol, +// and this file has none +.set LMod0Offset, LMod0 - LTable +.set LMod1Offset, LMod1 - LTable +.set LMod2Offset, LMod2 - LTable +.set LMod3Offset, LMod3 - LTable +.set LMod4Offset, LMod4 - LTable +.set LMod5Offset, LMod5 - LTable +.set LMod6Offset, LMod6 - LTable +.set LMod7Offset, LMod7 - LTable +.set LMod8Offset, LMod8 - LTable +.set LMod9Offset, LMod9 - LTable +.set LMod10Offset, LMod10 - LTable +.set LMod11Offset, LMod11 - LTable +.set LMod12Offset, LMod12 - LTable +.set LMod13Offset, LMod13 - LTable +.set LMod14Offset, LMod14 - LTable +.set LMod15Offset, LMod15 - LTable + .long LMod0Offset + .long LMod1Offset + .long LMod2Offset + .long LMod3Offset + .long LMod4Offset + .long LMod5Offset + .long LMod6Offset + .long LMod7Offset + .long LMod8Offset + .long LMod9Offset + .long LMod10Offset + .long LMod11Offset + .long LMod12Offset + .long LMod13Offset + .long LMod14Offset + .long LMod15Offset // Very long forward moves. These are at least several pages. They are special cased @@ -793,5 +817,4 @@ LReverseUnalignedLoop: // loop over 64-byte chunks jmp LReverseShort // copy remaining 0..63 bytes and done - - COMMPAGE_DESCRIPTOR(bcopy_sse3x_64,_COMM_PAGE_BCOPY,kHasSSE2+kHasSupplementalSSE3+kCache64,kHasSSE4_2) +COMMPAGE_DESCRIPTOR(bcopy_sse3x_64,_COMM_PAGE_BCOPY,kHasSSE2+kHasSupplementalSSE3+kCache64,kHasSSE4_2) diff --git a/osfmk/i386/commpage/bcopy_sse42.s b/osfmk/i386/commpage/bcopy_sse42.s index 9ddd281ef..6a0bcd528 100644 --- a/osfmk/i386/commpage/bcopy_sse42.s +++ b/osfmk/i386/commpage/bcopy_sse42.s @@ -40,9 +40,7 @@ // void bcopy(const void *src, void *dst, size_t len); - .text - .align 5, 0x90 -Lbcopy_sse42: // void bcopy(const void *src, void *dst, size_t len) +COMMPAGE_FUNCTION_START(bcopy_sse42, 32, 5) pushl %ebp // set up a frame for backtraces movl %esp,%ebp pushl %esi diff --git a/osfmk/i386/commpage/bcopy_sse42_64.s b/osfmk/i386/commpage/bcopy_sse42_64.s index 7de012622..c8817d955 100644 --- a/osfmk/i386/commpage/bcopy_sse42_64.s +++ b/osfmk/i386/commpage/bcopy_sse42_64.s @@ -40,10 +40,7 @@ // void bcopy(const void *src, void *dst, size_t len); - .text - .code64 - .align 5, 0x90 -Lbcopy_sse42_64: // void bcopy(const void *src, void *dst, size_t len) +COMMPAGE_FUNCTION_START(bcopy_sse42_64, 64, 5) pushq %rbp // set up a frame for backtraces movq %rsp,%rbp movq %rsi,%rax // copy dest ptr diff --git a/osfmk/i386/commpage/bzero_scalar.s b/osfmk/i386/commpage/bzero_scalar.s index 7839413d7..6c496b9e9 100644 --- a/osfmk/i386/commpage/bzero_scalar.s +++ b/osfmk/i386/commpage/bzero_scalar.s @@ -68,9 +68,7 @@ * J.T. Conklin (jtc@wimsey.com), Winning Strategies, Inc. */ -.text -.align 5, 0x90 -Lbzero_scalar: +COMMPAGE_FUNCTION_START(bzero_scalar, 32, 4) pushl %ebp /* set up a frame for backtraces */ movl %esp,%ebp pushl %edi @@ -114,4 +112,4 @@ L1: rep popl %ebp ret - COMMPAGE_DESCRIPTOR(bzero_scalar,_COMM_PAGE_BZERO,0,kHasSSE2) +COMMPAGE_DESCRIPTOR(bzero_scalar,_COMM_PAGE_BZERO,0,kHasSSE2) diff --git a/osfmk/i386/commpage/bzero_sse2.s b/osfmk/i386/commpage/bzero_sse2.s index a80418bd9..be5facd29 100644 --- a/osfmk/i386/commpage/bzero_sse2.s +++ b/osfmk/i386/commpage/bzero_sse2.s @@ -41,10 +41,9 @@ #define kShort 80 // too short to bother with SSE (must be >=80) #define kVeryLong (1024*1024) +// void bzero(void *b, size_t len); - .text - .align 5, 0x90 -Lbzero_sse2: // void bzero(void *b, size_t len); +COMMPAGE_FUNCTION_START(bzero_sse2, 32, 5) pushl %ebp // set up a frame for backtraces movl %esp,%ebp pushl %edi @@ -160,5 +159,4 @@ LVeryLong: sfence // required by non-temporal stores jmp Lshort - - COMMPAGE_DESCRIPTOR(bzero_sse2,_COMM_PAGE_BZERO,kHasSSE2,kHasSSE4_2) +COMMPAGE_DESCRIPTOR(bzero_sse2,_COMM_PAGE_BZERO,kHasSSE2,kHasSSE4_2) diff --git a/osfmk/i386/commpage/bzero_sse2_64.s b/osfmk/i386/commpage/bzero_sse2_64.s index ef494cbbf..c0ec8a458 100644 --- a/osfmk/i386/commpage/bzero_sse2_64.s +++ b/osfmk/i386/commpage/bzero_sse2_64.s @@ -41,11 +41,9 @@ #define kShort 80 // too short to bother with SSE (must be >=80) #define kVeryLong (1024*1024) +// void bzero(void *b, size_t len); - .text - .code64 - .align 5, 0x90 -Lbzero_sse2_64: // void bzero(void *b, size_t len); +COMMPAGE_FUNCTION_START(bzero_sse2_64, 64, 5) pushq %rbp // set up a frame for backtraces movq %rsp,%rbp xorl %eax,%eax // set fill data to 0 @@ -160,5 +158,4 @@ LVeryLong: sfence // required by non-temporal stores jmp Lshort - - COMMPAGE_DESCRIPTOR(bzero_sse2_64,_COMM_PAGE_BZERO,kHasSSE2,kHasSSE4_2) +COMMPAGE_DESCRIPTOR(bzero_sse2_64,_COMM_PAGE_BZERO,kHasSSE2,kHasSSE4_2) diff --git a/osfmk/i386/commpage/bzero_sse42.s b/osfmk/i386/commpage/bzero_sse42.s index 8db6b07a9..32e8ea65f 100644 --- a/osfmk/i386/commpage/bzero_sse42.s +++ b/osfmk/i386/commpage/bzero_sse42.s @@ -43,9 +43,7 @@ #define kShort 80 // too short to bother with SSE (must be >=80) - .text - .align 5, 0x90 -Lbzero_sse42: // void bzero(void *b, size_t len); +COMMPAGE_FUNCTION_START(bzero_sse42, 32, 5) pushl %ebp // set up a frame for backtraces movl %esp,%ebp pushl %edi diff --git a/osfmk/i386/commpage/bzero_sse42_64.s b/osfmk/i386/commpage/bzero_sse42_64.s index 5f869398c..999b9311a 100644 --- a/osfmk/i386/commpage/bzero_sse42_64.s +++ b/osfmk/i386/commpage/bzero_sse42_64.s @@ -44,10 +44,9 @@ #define kShort 80 // too short to bother with SSE (must be >=80) - .text - .code64 - .align 5, 0x90 -Lbzero_sse42_64: // void bzero(void *b, size_t len); +// void bzero(void *b, size_t len); + +COMMPAGE_FUNCTION_START(bzero_sse42_64, 64, 5) pushq %rbp // set up a frame for backtraces movq %rsp,%rbp xorl %eax,%eax // set fill data to 0 diff --git a/osfmk/i386/commpage/cacheflush.s b/osfmk/i386/commpage/cacheflush.s index 00632d656..4d9e98b0b 100644 --- a/osfmk/i386/commpage/cacheflush.s +++ b/osfmk/i386/commpage/cacheflush.s @@ -29,13 +29,10 @@ #include #include - .text - .align 2, 0x90 - // void sysFlushDcache( void *p, size_t len ); // 32-bit version -Lsys_flush_dcache: +COMMPAGE_FUNCTION_START(sys_flush_dcache, 32, 4) movl 8(%esp),%ecx // get length movl 4(%esp),%edx // get ptr testl %ecx,%ecx // length 0? @@ -50,14 +47,13 @@ Lsys_flush_dcache: mfence // make sure memory is updated before we return 2: ret - - COMMPAGE_DESCRIPTOR(sys_flush_dcache,_COMM_PAGE_FLUSH_DCACHE,kCache64,0) +COMMPAGE_DESCRIPTOR(sys_flush_dcache,_COMM_PAGE_FLUSH_DCACHE,kCache64,0) // void sysFlushDcache( void *p, size_t len ); // 64-bit version - .code64 -Lsys_flush_dcache_64: // %rdi = ptr, %rsi = length +// %rdi = ptr, %rsi = length +COMMPAGE_FUNCTION_START(sys_flush_dcache_64, 64, 4) testq %rsi,%rsi // length 0? jz 2f // yes mfence // ensure previous stores make it to memory @@ -70,17 +66,14 @@ Lsys_flush_dcache_64: // %rdi = ptr, %rsi = length mfence // make sure memory is updated before we return 2: ret - .code32 - COMMPAGE_DESCRIPTOR(sys_flush_dcache_64,_COMM_PAGE_FLUSH_DCACHE,kCache64,0) - +COMMPAGE_DESCRIPTOR(sys_flush_dcache_64,_COMM_PAGE_FLUSH_DCACHE,kCache64,0) // void sysIcacheInvalidate( void *p, size_t len ); -Lsys_icache_invalidate: +COMMPAGE_FUNCTION_START(sys_icache_invalidate, 32, 4) // This is a NOP on intel processors, since the intent of the API // is to make data executable, and Intel L1Is are coherent with L1D. // We can use same routine both in 32 and 64-bit mode, since it is // just a RET instruction. ret - - COMMPAGE_DESCRIPTOR(sys_icache_invalidate,_COMM_PAGE_FLUSH_ICACHE,0,0) +COMMPAGE_DESCRIPTOR(sys_icache_invalidate,_COMM_PAGE_FLUSH_ICACHE,0,0) diff --git a/osfmk/i386/commpage/commpage.c b/osfmk/i386/commpage/commpage.c index b2bd5af50..dea334e43 100644 --- a/osfmk/i386/commpage/commpage.c +++ b/osfmk/i386/commpage/commpage.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2003-2007 Apple Inc. All rights reserved. + * Copyright (c) 2003-2008 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -48,15 +48,18 @@ #include #include #include -#include -#include +#include #include +#include #include +#include +#include #include #include #include #include #include + #include #include @@ -78,12 +81,14 @@ int _cpu_capabilities = 0; // define the capability vector int noVMX = 0; /* if true, do not set kHasAltivec in ppc _cpu_capabilities */ -static uintptr_t next; // next available byte in comm page -static int cur_routine; // comm page address of "current" routine -static int matched; // true if we've found a match for "current" routine +typedef uint32_t commpage_address_t; + +static commpage_address_t next; // next available address in comm page +static commpage_address_t cur_routine; // comm page address of "current" routine +static boolean_t matched; // true if we've found a match for "current" routine static char *commPagePtr; // virtual addr in kernel map of commpage we are working on -static size_t commPageBaseOffset; // add to 32-bit runtime address to get offset in commpage +static commpage_address_t commPageBaseOffset; // subtract from 32-bit runtime address to get offset in virtual commpage in kernel map static commpage_time_data *time_data32 = NULL; static commpage_time_data *time_data64 = NULL; @@ -150,16 +155,16 @@ commpage_allocate( ipc_port_release(handle); - return (void*) kernel_addr; // return address in kernel map + return (void*)(intptr_t)kernel_addr; // return address in kernel map } /* Get address (in kernel map) of a commpage field. */ static void* commpage_addr_of( - int addr_at_runtime ) + commpage_address_t addr_at_runtime ) { - return (void*) ((uintptr_t)commPagePtr + addr_at_runtime - commPageBaseOffset); + return (void*) ((uintptr_t)commPagePtr + (addr_at_runtime - commPageBaseOffset)); } /* Determine number of CPUs on this system. We cannot rely on @@ -257,23 +262,23 @@ _get_cpu_capabilities(void) static void commpage_stuff( - int address, + commpage_address_t address, const void *source, int length ) { void *dest = commpage_addr_of(address); - if ((uintptr_t)dest < next) - panic("commpage overlap at address 0x%x, %p < 0x%lx", address, dest, next); + if (address < next) + panic("commpage overlap at address 0x%p, 0x%x < 0x%x", dest, address, next); bcopy(source,dest,length); - next = ((uintptr_t)dest + length); + next = address + length; } static void commpage_stuff_swap( - int address, + commpage_address_t address, void *source, int length, int legacy ) @@ -297,7 +302,7 @@ commpage_stuff_swap( static void commpage_stuff2( - int address, + commpage_address_t address, void *source, int length, int legacy ) @@ -312,11 +317,11 @@ static void commpage_stuff_routine( commpage_descriptor *rd ) { - int must,cant; + uint32_t must,cant; if (rd->commpage_address != cur_routine) { if ((cur_routine!=0) && (matched==0)) - panic("commpage no match for last, next address %08lx", rd->commpage_address); + panic("commpage no match for last, next address %08x", rd->commpage_address); cur_routine = rd->commpage_address; matched = 0; } @@ -326,7 +331,7 @@ commpage_stuff_routine( if ((must == rd->musthave) && (cant == 0)) { if (matched) - panic("commpage multiple matches for address %08lx", rd->commpage_address); + panic("commpage multiple matches for address %08x", rd->commpage_address); matched = 1; commpage_stuff(rd->commpage_address,rd->code_address,rd->code_length); @@ -343,31 +348,32 @@ commpage_populate_one( vm_map_t submap, // commpage32_map or compage64_map char ** kernAddressPtr, // &commPagePtr32 or &commPagePtr64 size_t area_used, // _COMM_PAGE32_AREA_USED or _COMM_PAGE64_AREA_USED - size_t base_offset, // will become commPageBaseOffset + commpage_address_t base_offset, // will become commPageBaseOffset commpage_descriptor** commpage_routines, // list of routine ptrs for this commpage boolean_t legacy, // true if 32-bit commpage commpage_time_data** time_data, // &time_data32 or &time_data64 const char* signature ) // "commpage 32-bit" or "commpage 64-bit" { short c2; + int c4; static double two52 = 1048576.0 * 1048576.0 * 4096.0; // 2**52 static double ten6 = 1000000.0; // 10**6 commpage_descriptor **rd; short version = _COMM_PAGE_THIS_VERSION; int swapcaps; - next = (uintptr_t) NULL; + next = 0; cur_routine = 0; commPagePtr = (char *)commpage_allocate( submap, (vm_size_t) area_used ); *kernAddressPtr = commPagePtr; // save address either in commPagePtr32 or 64 commPageBaseOffset = base_offset; - + *time_data = commpage_addr_of( _COMM_PAGE_TIME_DATA_START ); /* Stuff in the constants. We move things into the comm page in strictly * ascending order, so we can check for overlap and panic if so. */ - commpage_stuff(_COMM_PAGE_SIGNATURE,signature,strlen(signature)); + commpage_stuff(_COMM_PAGE_SIGNATURE,signature,(int)strlen(signature)); commpage_stuff2(_COMM_PAGE_VERSION,&version,sizeof(short),legacy); commpage_stuff(_COMM_PAGE_CPU_CAPABILITIES,&_cpu_capabilities,sizeof(int)); @@ -391,6 +397,9 @@ commpage_populate_one( else if (_cpu_capabilities & kCache128) c2 = 128; commpage_stuff(_COMM_PAGE_CACHE_LINESIZE,&c2,2); + + c4 = MP_SPIN_TRIES; + commpage_stuff(_COMM_PAGE_SPIN_COUNT,&c4,4); if ( legacy ) { commpage_stuff2(_COMM_PAGE_2_TO_52,&two52,8,legacy); @@ -403,15 +412,15 @@ commpage_populate_one( if (!matched) panic("commpage no match on last routine"); - if (next > (uintptr_t)_COMM_PAGE_END) - panic("commpage overflow: next = 0x%08lx, commPagePtr = 0x%08lx", next, (uintptr_t)commPagePtr); + if (next > _COMM_PAGE_END) + panic("commpage overflow: next = 0x%08x, commPagePtr = 0x%p", next, commPagePtr); if ( legacy ) { - next = (uintptr_t) NULL; + next = 0; for( rd = ba_descriptors; *rd != NULL ; rd++ ) commpage_stuff_routine(*rd); - next = (uintptr_t) NULL; + next = 0; commpage_stuff_routine(&sigdata_descriptor); } } @@ -437,22 +446,25 @@ commpage_populate( void ) TRUE, /* legacy (32-bit) commpage */ &time_data32, "commpage 32-bit"); +#ifndef __LP64__ pmap_commpage32_init((vm_offset_t) commPagePtr32, _COMM_PAGE32_BASE_ADDRESS, _COMM_PAGE32_AREA_USED/INTEL_PGBYTES); - +#endif time_data64 = time_data32; /* if no 64-bit commpage, point to 32-bit */ if (_cpu_capabilities & k64Bit) { commpage_populate_one( commpage64_map, &commPagePtr64, _COMM_PAGE64_AREA_USED, - _COMM_PAGE32_START_ADDRESS, /* because kernel is built 32-bit */ + _COMM_PAGE32_START_ADDRESS, /* commpage address are relative to 32-bit commpage placement */ commpage_64_routines, FALSE, /* not a legacy commpage */ &time_data64, "commpage 64-bit"); +#ifndef __LP64__ pmap_commpage64_init((vm_offset_t) commPagePtr64, _COMM_PAGE64_BASE_ADDRESS, _COMM_PAGE64_AREA_USED/INTEL_PGBYTES); +#endif } rtc_nanotime_init_commpage(); @@ -555,3 +567,83 @@ commpage_disable_timestamp( void ) p32->gtod_generation = next_gen; /* mark data as valid */ p64->gtod_generation = next_gen; } + + +/* Update _COMM_PAGE_MEMORY_PRESSURE. Called periodically from vm's compute_memory_pressure() */ + +void +commpage_set_memory_pressure( + unsigned int pressure ) +{ + char *cp; + uint32_t *ip; + + cp = commPagePtr32; + if ( cp ) { + cp += (_COMM_PAGE_MEMORY_PRESSURE - _COMM_PAGE32_BASE_ADDRESS); + ip = (uint32_t*) cp; + *ip = (uint32_t) pressure; + } + + cp = commPagePtr64; + if ( cp ) { + cp += (_COMM_PAGE_MEMORY_PRESSURE - _COMM_PAGE32_START_ADDRESS); + ip = (uint32_t*) cp; + *ip = (uint32_t) pressure; + } + +} + + +/* Update _COMM_PAGE_SPIN_COUNT. We might want to reduce when running on a battery, etc. */ + +void +commpage_set_spin_count( + unsigned int count ) +{ + char *cp; + uint32_t *ip; + + if (count == 0) /* we test for 0 after decrement, not before */ + count = 1; + + cp = commPagePtr32; + if ( cp ) { + cp += (_COMM_PAGE_SPIN_COUNT - _COMM_PAGE32_BASE_ADDRESS); + ip = (uint32_t*) cp; + *ip = (uint32_t) count; + } + + cp = commPagePtr64; + if ( cp ) { + cp += (_COMM_PAGE_SPIN_COUNT - _COMM_PAGE32_START_ADDRESS); + ip = (uint32_t*) cp; + *ip = (uint32_t) count; + } + +} + + +/* Check to see if a given address is in the Preemption Free Zone (PFZ) */ + +uint32_t +commpage_is_in_pfz32(uint32_t addr32) +{ + if ( (addr32 >= _COMM_PAGE_PFZ_START) && (addr32 < _COMM_PAGE_PFZ_END)) { + return 1; + } + else + return 0; +} + +uint32_t +commpage_is_in_pfz64(addr64_t addr64) +{ + if ( (addr64 >= _COMM_PAGE_32_TO_64(_COMM_PAGE_PFZ_START)) + && (addr64 < _COMM_PAGE_32_TO_64(_COMM_PAGE_PFZ_END))) { + return 1; + } + else + return 0; +} + diff --git a/osfmk/i386/commpage/commpage.h b/osfmk/i386/commpage/commpage.h index 0288a906c..013ca246e 100644 --- a/osfmk/i386/commpage/commpage.h +++ b/osfmk/i386/commpage/commpage.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2003-2007 Apple Inc. All rights reserved. + * Copyright (c) 2003-2009 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -31,11 +31,25 @@ #ifndef __ASSEMBLER__ #include +#include +#include +#include #endif /* __ASSEMBLER__ */ +/* When trying to acquire a spinlock or mutex, we will spin in + * user mode for awhile, before entering the kernel to relinquish. + * MP_SPIN_TRIES is the initial value of _COMM_PAGE_SPIN_COUNT. + * The idea is that _COMM_PAGE_SPIN_COUNT will be adjusted up or + * down as the machine is plugged in/out, etc. + * At present spinlocks do not use _COMM_PAGE_SPIN_COUNT. + * They use MP_SPIN_TRIES directly. + */ +#define MP_SPIN_TRIES 1000 + + /* The following macro is used to generate the 64-bit commpage address for a given * routine, based on its 32-bit address. This is used in the kernel to compile - * the 64-bit commpage. Since the kernel is a 32-bit object, cpu_capabilities.h + * the 64-bit commpage. Since the kernel can be a 32-bit object, cpu_capabilities.h * only defines the 32-bit address. */ #define _COMM_PAGE_32_TO_64( ADDRESS ) ( ADDRESS + _COMM_PAGE64_START_ADDRESS - _COMM_PAGE32_START_ADDRESS ) @@ -43,19 +57,69 @@ #ifdef __ASSEMBLER__ +#define COMMPAGE_DESCRIPTOR_NAME(label) _commpage_ ## label + +#if defined (__i386__) + +#define COMMPAGE_DESCRIPTOR_FIELD_POINTER .long +#define COMMPAGE_DESCRIPTOR_REFERENCE(label) \ + .long COMMPAGE_DESCRIPTOR_NAME(label) + +#elif defined (__x86_64__) + +#define COMMPAGE_DESCRIPTOR_FIELD_POINTER .quad +#define COMMPAGE_DESCRIPTOR_REFERENCE(label) \ + .quad COMMPAGE_DESCRIPTOR_NAME(label) + +#else +#error unsupported architecture +#endif + +#define COMMPAGE_FUNCTION_START(label,codetype,alignment) \ +.text ;\ +.code ## codetype ;\ +.align alignment, 0x90 ;\ +L ## label ## : + #define COMMPAGE_DESCRIPTOR(label,address,must,cant) \ -L ## label ## _end: ;\ -.const_data ;\ -L ## label ## _size = L ## label ## _end - L ## label ;\ -.private_extern _commpage_ ## label ;\ -_commpage_ ## label ## : ;\ - .long L ## label ;\ - .long L ## label ## _size ;\ - .long address ;\ - .long must ;\ - .long cant ;\ +L ## label ## _end: ;\ +.set L ## label ## _size, L ## label ## _end - L ## label ;\ +.const_data ;\ +.private_extern COMMPAGE_DESCRIPTOR_NAME(label) ;\ +COMMPAGE_DESCRIPTOR_NAME(label) ## : ;\ + COMMPAGE_DESCRIPTOR_FIELD_POINTER L ## label ;\ + .long L ## label ## _size ;\ + .long address ;\ + .long must ;\ + .long cant ;\ .text + +/* COMMPAGE_CALL(target,from,start) + * + * This macro compiles a relative near call to one + * commpage routine from another. + * The assembler cannot handle this directly because the code + * is not being assembled at the address at which it will execute. + * The alternative to this macro would be to use an + * indirect call, which is slower because the target of an + * indirect branch is poorly predicted. + * The macro arguments are: + * target = the commpage routine we are calling + * from = the commpage routine we are in now + * start = the label at the start of the code for this func + * This is admitedly ugly and fragile. Is there a better way? + */ +#define COMMPAGE_CALL(target,from,start) \ + COMMPAGE_CALL_INTERNAL(target,from,start,__LINE__) + +#define COMMPAGE_CALL_INTERNAL(target,from,start,unique) \ + .byte 0xe8 ;\ +.set UNIQUEID(unique), L ## start - . + target - from - 4 ;\ + .long UNIQUEID(unique) + +#define UNIQUEID(name) L ## name + #else /* __ASSEMBLER__ */ /* Each potential commpage routine is described by one of these. @@ -64,11 +128,11 @@ _commpage_ ## label ## : ;\ */ typedef struct commpage_descriptor { - void *code_address; // address of code - long code_length; // length in bytes - long commpage_address; // put at this address (_COMM_PAGE_BCOPY etc) - long musthave; // _cpu_capability bits we must have - long canthave; // _cpu_capability bits we can't have + void *code_address; // address of code + uint32_t code_length; // length in bytes + uint32_t commpage_address; // put at this address (_COMM_PAGE_BCOPY etc) + uint32_t musthave; // _cpu_capability bits we must have + uint32_t canthave; // _cpu_capability bits we can't have } commpage_descriptor; @@ -91,13 +155,15 @@ extern char *commPagePtr32; // virt address of 32-bit commpage in kernel map extern char *commPagePtr64; // ...and of 64-bit commpage extern void commpage_set_timestamp(uint64_t abstime, uint64_t secs); - extern void commpage_disable_timestamp( void ); - extern void commpage_set_nanotime(uint64_t tsc_base, uint64_t ns_base, uint32_t scale, uint32_t shift); - +extern void commpage_set_memory_pressure(unsigned int pressure); +extern void commpage_set_spin_count(unsigned int count); extern void commpage_sched_gen_inc(void); +extern uint32_t commpage_is_in_pfz32(uint32_t); +extern uint32_t commpage_is_in_pfz64(addr64_t); + #endif /* __ASSEMBLER__ */ #endif /* _I386_COMMPAGE_H */ diff --git a/osfmk/i386/commpage/commpage_asm.s b/osfmk/i386/commpage/commpage_asm.s index 6f69fa7b2..4e3ad82e2 100644 --- a/osfmk/i386/commpage/commpage_asm.s +++ b/osfmk/i386/commpage/commpage_asm.s @@ -1,5 +1,5 @@ /* - * Copyright (c) 2003-2007 Apple Inc. All rights reserved. + * Copyright (c) 2003-2009 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -27,124 +27,177 @@ */ #include - +#include +#include +#include /* * extern void commpage_sched_gen_inc(void); */ .text - .align 2, 0x90 - .globl _commpage_sched_gen_inc + .globl _commpage_sched_gen_inc _commpage_sched_gen_inc: - push %ebp - mov %esp,%ebp +#if defined (__x86_64__) + FRAME + + /* Increment 32-bit commpage field if present */ + movq _commPagePtr32(%rip),%rdx + testq %rdx,%rdx + je 1f + subq $(ASM_COMM_PAGE32_BASE_ADDRESS),%rdx + lock + incl ASM_COMM_PAGE_SCHED_GEN(%rdx) + /* Increment 64-bit commpage field if present */ + movq _commPagePtr64(%rip),%rdx + testq %rdx,%rdx + je 1f + subq $(ASM_COMM_PAGE32_START_ADDRESS),%rdx + lock + incl ASM_COMM_PAGE_SCHED_GEN(%rdx) +1: + EMARF + ret +#elif defined (__i386__) + FRAME + /* Increment 32-bit commpage field if present */ mov _commPagePtr32,%edx testl %edx,%edx je 1f - sub $(_COMM_PAGE32_BASE_ADDRESS),%edx + sub $(ASM_COMM_PAGE32_BASE_ADDRESS),%edx lock - incl _COMM_PAGE_SCHED_GEN(%edx) + incl ASM_COMM_PAGE_SCHED_GEN(%edx) /* Increment 64-bit commpage field if present */ mov _commPagePtr64,%edx testl %edx,%edx je 1f - sub $(_COMM_PAGE32_START_ADDRESS),%edx + sub $(ASM_COMM_PAGE32_START_ADDRESS),%edx lock - incl _COMM_PAGE_SCHED_GEN(%edx) + incl ASM_COMM_PAGE_SCHED_GEN(%edx) 1: - pop %ebp + EMARF ret - -#define CPN(routine) _commpage_ ## routine +#else +#error unsupported architecture +#endif /* pointers to the 32-bit commpage routine descriptors */ /* WARNING: these must be sorted by commpage address! */ .const_data - .align 2 + .align 3 .globl _commpage_32_routines _commpage_32_routines: - .long CPN(compare_and_swap32_mp) - .long CPN(compare_and_swap32_up) - .long CPN(compare_and_swap64_mp) - .long CPN(compare_and_swap64_up) - .long CPN(AtomicEnqueue) - .long CPN(AtomicDequeue) - .long CPN(memory_barrier) - .long CPN(memory_barrier_sse2) - .long CPN(atomic_add32_mp) - .long CPN(atomic_add32_up) - .long CPN(mach_absolute_time) - .long CPN(spin_lock_try_mp) - .long CPN(spin_lock_try_up) - .long CPN(spin_lock_mp) - .long CPN(spin_lock_up) - .long CPN(spin_unlock) - .long CPN(pthread_getspecific) - .long CPN(gettimeofday) - .long CPN(sys_flush_dcache) - .long CPN(sys_icache_invalidate) - .long CPN(pthread_self) -// .long CPN(relinquish) - .long CPN(bit_test_and_set_mp) - .long CPN(bit_test_and_set_up) - .long CPN(bit_test_and_clear_mp) - .long CPN(bit_test_and_clear_up) - .long CPN(bzero_scalar) - .long CPN(bzero_sse2) - .long CPN(bzero_sse42) - .long CPN(bcopy_scalar) - .long CPN(bcopy_sse2) - .long CPN(bcopy_sse3x) - .long CPN(bcopy_sse42) - .long CPN(memset_pattern_sse2) - .long CPN(longcopy_sse3x) - .long CPN(nanotime) - .long CPN(nanotime_slow) + COMMPAGE_DESCRIPTOR_REFERENCE(compare_and_swap32_mp) + COMMPAGE_DESCRIPTOR_REFERENCE(compare_and_swap32_up) + COMMPAGE_DESCRIPTOR_REFERENCE(compare_and_swap64_mp) + COMMPAGE_DESCRIPTOR_REFERENCE(compare_and_swap64_up) + COMMPAGE_DESCRIPTOR_REFERENCE(AtomicEnqueue) + COMMPAGE_DESCRIPTOR_REFERENCE(AtomicDequeue) + COMMPAGE_DESCRIPTOR_REFERENCE(memory_barrier) + COMMPAGE_DESCRIPTOR_REFERENCE(memory_barrier_sse2) + COMMPAGE_DESCRIPTOR_REFERENCE(atomic_add32_mp) + COMMPAGE_DESCRIPTOR_REFERENCE(atomic_add32_up) + COMMPAGE_DESCRIPTOR_REFERENCE(cpu_number) + COMMPAGE_DESCRIPTOR_REFERENCE(mach_absolute_time) + COMMPAGE_DESCRIPTOR_REFERENCE(spin_lock_try_mp) + COMMPAGE_DESCRIPTOR_REFERENCE(spin_lock_try_up) + COMMPAGE_DESCRIPTOR_REFERENCE(spin_lock_mp) + COMMPAGE_DESCRIPTOR_REFERENCE(spin_lock_up) + COMMPAGE_DESCRIPTOR_REFERENCE(spin_unlock) + COMMPAGE_DESCRIPTOR_REFERENCE(pthread_getspecific) + COMMPAGE_DESCRIPTOR_REFERENCE(gettimeofday) + COMMPAGE_DESCRIPTOR_REFERENCE(sys_flush_dcache) + COMMPAGE_DESCRIPTOR_REFERENCE(sys_icache_invalidate) + COMMPAGE_DESCRIPTOR_REFERENCE(pthread_self) + COMMPAGE_DESCRIPTOR_REFERENCE(preempt) +// COMMPAGE_DESCRIPTOR_REFERENCE(relinquish) + COMMPAGE_DESCRIPTOR_REFERENCE(bit_test_and_set_mp) + COMMPAGE_DESCRIPTOR_REFERENCE(bit_test_and_set_up) + COMMPAGE_DESCRIPTOR_REFERENCE(bit_test_and_clear_mp) + COMMPAGE_DESCRIPTOR_REFERENCE(bit_test_and_clear_up) + COMMPAGE_DESCRIPTOR_REFERENCE(bzero_scalar) + COMMPAGE_DESCRIPTOR_REFERENCE(bzero_sse2) + COMMPAGE_DESCRIPTOR_REFERENCE(bzero_sse42) + COMMPAGE_DESCRIPTOR_REFERENCE(bcopy_scalar) + COMMPAGE_DESCRIPTOR_REFERENCE(bcopy_sse2) + COMMPAGE_DESCRIPTOR_REFERENCE(bcopy_sse3x) + COMMPAGE_DESCRIPTOR_REFERENCE(bcopy_sse42) + COMMPAGE_DESCRIPTOR_REFERENCE(memset_pattern_sse2) + COMMPAGE_DESCRIPTOR_REFERENCE(longcopy_sse3x) + COMMPAGE_DESCRIPTOR_REFERENCE(backoff) + COMMPAGE_DESCRIPTOR_REFERENCE(AtomicFifoEnqueue) + COMMPAGE_DESCRIPTOR_REFERENCE(AtomicFifoDequeue) + COMMPAGE_DESCRIPTOR_REFERENCE(nanotime) + COMMPAGE_DESCRIPTOR_REFERENCE(nanotime_slow) + COMMPAGE_DESCRIPTOR_REFERENCE(pthread_mutex_lock) + COMMPAGE_DESCRIPTOR_REFERENCE(pfz_enqueue) + COMMPAGE_DESCRIPTOR_REFERENCE(pfz_dequeue) + COMMPAGE_DESCRIPTOR_REFERENCE(pfz_mutex_lock) +#if defined (__i386__) .long 0 +#elif defined (__x86_64__) + .quad 0 +#else +#error unsupported architecture +#endif /* pointers to the 64-bit commpage routine descriptors */ /* WARNING: these must be sorted by commpage address! */ .const_data - .align 2 + .align 3 .globl _commpage_64_routines _commpage_64_routines: - .long CPN(compare_and_swap32_mp_64) - .long CPN(compare_and_swap32_up_64) - .long CPN(compare_and_swap64_mp_64) - .long CPN(compare_and_swap64_up_64) - .long CPN(AtomicEnqueue_64) - .long CPN(AtomicDequeue_64) - .long CPN(memory_barrier_sse2) /* same routine as 32-bit version */ - .long CPN(atomic_add32_mp_64) - .long CPN(atomic_add32_up_64) - .long CPN(atomic_add64_mp_64) - .long CPN(atomic_add64_up_64) - .long CPN(mach_absolute_time) - .long CPN(spin_lock_try_mp_64) - .long CPN(spin_lock_try_up_64) - .long CPN(spin_lock_mp_64) - .long CPN(spin_lock_up_64) - .long CPN(spin_unlock_64) - .long CPN(pthread_getspecific_64) - .long CPN(gettimeofday_64) - .long CPN(sys_flush_dcache_64) - .long CPN(sys_icache_invalidate) /* same routine as 32-bit version, just a "ret" */ - .long CPN(pthread_self_64) - .long CPN(bit_test_and_set_mp_64) - .long CPN(bit_test_and_set_up_64) - .long CPN(bit_test_and_clear_mp_64) - .long CPN(bit_test_and_clear_up_64) - .long CPN(bzero_sse2_64) - .long CPN(bzero_sse42_64) - .long CPN(bcopy_sse3x_64) - .long CPN(bcopy_sse42_64) - .long CPN(memset_pattern_sse2_64) - .long CPN(longcopy_sse3x_64) - .long CPN(nanotime_64) + COMMPAGE_DESCRIPTOR_REFERENCE(compare_and_swap32_mp_64) + COMMPAGE_DESCRIPTOR_REFERENCE(compare_and_swap32_up_64) + COMMPAGE_DESCRIPTOR_REFERENCE(compare_and_swap64_mp_64) + COMMPAGE_DESCRIPTOR_REFERENCE(compare_and_swap64_up_64) + COMMPAGE_DESCRIPTOR_REFERENCE(AtomicEnqueue_64) + COMMPAGE_DESCRIPTOR_REFERENCE(AtomicDequeue_64) + COMMPAGE_DESCRIPTOR_REFERENCE(memory_barrier_sse2) /* same routine as 32-bit version */ + COMMPAGE_DESCRIPTOR_REFERENCE(atomic_add32_mp_64) + COMMPAGE_DESCRIPTOR_REFERENCE(atomic_add32_up_64) + COMMPAGE_DESCRIPTOR_REFERENCE(atomic_add64_mp_64) + COMMPAGE_DESCRIPTOR_REFERENCE(atomic_add64_up_64) + COMMPAGE_DESCRIPTOR_REFERENCE(cpu_number_64) + COMMPAGE_DESCRIPTOR_REFERENCE(mach_absolute_time) + COMMPAGE_DESCRIPTOR_REFERENCE(spin_lock_try_mp_64) + COMMPAGE_DESCRIPTOR_REFERENCE(spin_lock_try_up_64) + COMMPAGE_DESCRIPTOR_REFERENCE(spin_lock_mp_64) + COMMPAGE_DESCRIPTOR_REFERENCE(spin_lock_up_64) + COMMPAGE_DESCRIPTOR_REFERENCE(spin_unlock_64) + COMMPAGE_DESCRIPTOR_REFERENCE(pthread_getspecific_64) + COMMPAGE_DESCRIPTOR_REFERENCE(gettimeofday_64) + COMMPAGE_DESCRIPTOR_REFERENCE(sys_flush_dcache_64) + COMMPAGE_DESCRIPTOR_REFERENCE(sys_icache_invalidate) /* same routine as 32-bit version, just a "ret" */ + COMMPAGE_DESCRIPTOR_REFERENCE(pthread_self_64) + COMMPAGE_DESCRIPTOR_REFERENCE(preempt_64) + COMMPAGE_DESCRIPTOR_REFERENCE(bit_test_and_set_mp_64) + COMMPAGE_DESCRIPTOR_REFERENCE(bit_test_and_set_up_64) + COMMPAGE_DESCRIPTOR_REFERENCE(bit_test_and_clear_mp_64) + COMMPAGE_DESCRIPTOR_REFERENCE(bit_test_and_clear_up_64) + COMMPAGE_DESCRIPTOR_REFERENCE(bzero_sse2_64) + COMMPAGE_DESCRIPTOR_REFERENCE(bzero_sse42_64) + COMMPAGE_DESCRIPTOR_REFERENCE(bcopy_sse3x_64) + COMMPAGE_DESCRIPTOR_REFERENCE(bcopy_sse42_64) + COMMPAGE_DESCRIPTOR_REFERENCE(memset_pattern_sse2_64) + COMMPAGE_DESCRIPTOR_REFERENCE(longcopy_sse3x_64) + COMMPAGE_DESCRIPTOR_REFERENCE(backoff_64) + COMMPAGE_DESCRIPTOR_REFERENCE(AtomicFifoEnqueue_64) + COMMPAGE_DESCRIPTOR_REFERENCE(AtomicFifoDequeue_64) + COMMPAGE_DESCRIPTOR_REFERENCE(nanotime_64) + COMMPAGE_DESCRIPTOR_REFERENCE(pthread_mutex_lock_64) + COMMPAGE_DESCRIPTOR_REFERENCE(pfz_enqueue_64) + COMMPAGE_DESCRIPTOR_REFERENCE(pfz_dequeue_64) + COMMPAGE_DESCRIPTOR_REFERENCE(pfz_mutex_lock_64) +#if defined (__i386__) .long 0 +#elif defined (__x86_64__) + .quad 0 +#else +#error unsupported architecture +#endif diff --git a/osfmk/i386/commpage/commpage_gettimeofday.s b/osfmk/i386/commpage/commpage_gettimeofday.s index a50dd5410..afa87ca02 100644 --- a/osfmk/i386/commpage/commpage_gettimeofday.s +++ b/osfmk/i386/commpage/commpage_gettimeofday.s @@ -33,10 +33,7 @@ #define NSEC_PER_SEC 1000*1000*1000 #define NSEC_PER_USEC 1000 - .text - .align 2, 0x90 - -Lgettimeofday: +COMMPAGE_FUNCTION_START(gettimeofday, 32, 4) push %ebp mov %esp,%ebp push %esi @@ -80,15 +77,11 @@ Lgettimeofday: 4: /* fail */ movl $1,%eax jmp 3b - - COMMPAGE_DESCRIPTOR(gettimeofday,_COMM_PAGE_GETTIMEOFDAY,0,0) +COMMPAGE_DESCRIPTOR(gettimeofday,_COMM_PAGE_GETTIMEOFDAY,0,0) - .code64 - .text - .align 2, 0x90 - -Lgettimeofday_64: // %rdi = ptr to timeval +COMMPAGE_FUNCTION_START(gettimeofday_64, 64, 4) + // %rdi = ptr to timeval pushq %rbp // set up a frame for backtraces movq %rsp,%rbp movq %rdi,%r9 // save ptr to timeval @@ -126,5 +119,4 @@ Lgettimeofday_64: // %rdi = ptr to timeval 4: // fail movl $1,%eax jmp 3b - - COMMPAGE_DESCRIPTOR(gettimeofday_64,_COMM_PAGE_GETTIMEOFDAY,0,0) +COMMPAGE_DESCRIPTOR(gettimeofday_64,_COMM_PAGE_GETTIMEOFDAY,0,0) diff --git a/osfmk/i386/commpage/commpage_mach_absolute_time.s b/osfmk/i386/commpage/commpage_mach_absolute_time.s index f10baef8b..590e4d7b6 100644 --- a/osfmk/i386/commpage/commpage_mach_absolute_time.s +++ b/osfmk/i386/commpage/commpage_mach_absolute_time.s @@ -33,19 +33,15 @@ #include - .text - .align 2, 0x90 - -Lmach_absolute_time: +COMMPAGE_FUNCTION_START(mach_absolute_time, 32, 4) int $0x3 ret +COMMPAGE_DESCRIPTOR(mach_absolute_time,_COMM_PAGE_ABSOLUTE_TIME,0,0) - COMMPAGE_DESCRIPTOR(mach_absolute_time,_COMM_PAGE_ABSOLUTE_TIME,0,0) - /* return nanotime in %edx:%eax */ -Lnanotime: +COMMPAGE_FUNCTION_START(nanotime, 32, 4) pushl %ebp movl %esp,%ebp pushl %esi @@ -83,12 +79,11 @@ Lnanotime: popl %esi popl %ebp ret - - COMMPAGE_DESCRIPTOR(nanotime,_COMM_PAGE_NANOTIME,0,kSlow) +COMMPAGE_DESCRIPTOR(nanotime,_COMM_PAGE_NANOTIME,0,kSlow) /* nanotime routine for machines slower than ~1Gz (SLOW_TSC_THRESHOLD) */ -Lnanotime_slow: +COMMPAGE_FUNCTION_START(nanotime_slow, 32, 4) push %ebp mov %esp,%ebp push %esi @@ -146,17 +141,13 @@ Lnanotime_slow: pop %esi pop %ebp ret /* result in edx:eax */ - - COMMPAGE_DESCRIPTOR(nanotime_slow,_COMM_PAGE_NANOTIME,kSlow,0) +COMMPAGE_DESCRIPTOR(nanotime_slow,_COMM_PAGE_NANOTIME,kSlow,0) /* The 64-bit version. We return the 64-bit nanotime in %rax, * and by convention we must preserve %r9, %r10, and %r11. */ - .text - .align 2 - .code64 -Lnanotime_64: // NB: must preserve r9, r10, and r11 +COMMPAGE_FUNCTION_START(nanotime_64, 64, 4) pushq %rbp // set up a frame for backtraces movq %rsp,%rbp movq $_COMM_PAGE_32_TO_64(_COMM_PAGE_TIME_DATA_START),%rsi @@ -179,5 +170,4 @@ Lnanotime_64: // NB: must preserve r9, r10, and r11 jne 1b popq %rbp ret - - COMMPAGE_DESCRIPTOR(nanotime_64,_COMM_PAGE_NANOTIME,0,kSlow) +COMMPAGE_DESCRIPTOR(nanotime_64,_COMM_PAGE_NANOTIME,0,kSlow) diff --git a/osfmk/i386/commpage/cpu_number.s b/osfmk/i386/commpage/cpu_number.s new file mode 100644 index 000000000..d86b13ba1 --- /dev/null +++ b/osfmk/i386/commpage/cpu_number.s @@ -0,0 +1,77 @@ +/* + * Copyright (c) 2008 Apple Inc. All rights reserved. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. The rights granted to you under the License + * may not be used to create, or enable the creation or redistribution of, + * unlawful or unlicensed copies of an Apple operating system, or to + * circumvent, violate, or enable the circumvention or violation of, any + * terms of an Apple operating system software license agreement. + * + * Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ + */ + +#include +#include +#include +#include + +#include + +/* + * These commpage routines provide fast access to the logical cpu number + * of the calling processor assuming no pre-emption occurs. This number + * is encoded in the bottom 12-bits of the limit field of the IDTR (the + * Interrupt Descriptor Table Register). The SIDT instruction is used in + * userspace to read this register and thus to gain access to the cpu number. + * The IDTR is loaded by the kernel for each processor at startup - see + * osfmk/i386/mp_desc.c. + */ + +/* return logical cpu number in %eax */ + +COMMPAGE_FUNCTION_START(cpu_number, 32, 4) + push %ebp + mov %esp,%ebp + sub $8, %esp // space to read IDTR + + sidt (%esp) // store limit:base on stack + movw (%esp), %ax // get limit + and $0xfff, %eax // mask off lower 12 bits to return + + mov %ebp,%esp + pop %ebp + ret +COMMPAGE_DESCRIPTOR(cpu_number,_COMM_PAGE_CPU_NUMBER,0,0) + + +/* The 64-bit version. + */ +COMMPAGE_FUNCTION_START(cpu_number_64, 64, 4) + push %rbp + mov %rsp,%rbp + sub $16,%rsp // space to read IDTR + + sidt (%rsp) // store limit:base on stack + movw (%rsp), %rax // get limit + and $0xfff, %rax // mask off lower 12 bits to return + + mov %rbp,%rsp + pop %rbp + ret +COMMPAGE_DESCRIPTOR(cpu_number_64,_COMM_PAGE_CPU_NUMBER,0,0) diff --git a/osfmk/i386/commpage/fifo_queues.s b/osfmk/i386/commpage/fifo_queues.s new file mode 100644 index 000000000..e390a3b17 --- /dev/null +++ b/osfmk/i386/commpage/fifo_queues.s @@ -0,0 +1,418 @@ +/* + * Copyright (c) 2008 Apple Computer, Inc. All rights reserved. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. The rights granted to you under the License + * may not be used to create, or enable the creation or redistribution of, + * unlawful or unlicensed copies of an Apple operating system, or to + * circumvent, violate, or enable the circumvention or violation of, any + * terms of an Apple operating system software license agreement. + * + * Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ + */ + +#include +#include +#include +#include + + +/* PREEMPTION FREE ZONE (PFZ) + * + * A portion of the commpage is speacial-cased by the kernel to be "preemption free", + * ie as if we had disabled interrupts in user mode. This facilitates writing + * "nearly-lockless" code, for example code that must be serialized by a spinlock but + * which we do not want to preempt while the spinlock is held. + * + * The PFZ is implemented by collecting all the "preemption-free" code into a single + * contiguous region of the commpage. Register %ebx is used as a flag register; + * before entering the PFZ, %ebx is cleared. If some event occurs that would normally + * result in a premption while in the PFZ, the kernel sets %ebx nonzero instead of + * preempting. Then, when the routine leaves the PFZ we check %ebx and + * if nonzero execute a special "pfz_exit" syscall to take the delayed preemption. + * + * PFZ code must bound the amount of time spent in the PFZ, in order to control + * latency. Backward branches are dangerous and must not be used in a way that + * could inadvertently create a long-running loop. + * + * Because they cannot be implemented reasonably without a lock, we put the "atomic" + * FIFO enqueue and dequeue in the PFZ. As long as we don't take a page fault trying to + * access queue elements, these implementations behave nearly-locklessly. + * But we still must take a spinlock to serialize, and in case of page faults. + */ + +/* + * typedef volatile struct { + * void *opaque1; <-- ptr to first queue element or null + * void *opaque2; <-- ptr to last queue element or null + * int opaque3; <-- spinlock + * } OSFifoQueueHead; + * + * void OSAtomicFifoEnqueue( OSFifoQueueHead *list, void *new, size_t offset); + */ + +COMMPAGE_FUNCTION_START(AtomicFifoEnqueue, 32, 4) + pushl %edi + pushl %esi + pushl %ebx + xorl %ebx,%ebx // clear "preemption pending" flag + movl 16(%esp),%edi // %edi == ptr to list head + movl 20(%esp),%esi // %esi == new + movl 24(%esp),%edx // %edx == offset + COMMPAGE_CALL(_COMM_PAGE_PFZ_ENQUEUE,_COMM_PAGE_FIFO_ENQUEUE,AtomicFifoEnqueue) + testl %ebx,%ebx // pending preemption? + jz 1f + COMMPAGE_CALL(_COMM_PAGE_PREEMPT,_COMM_PAGE_FIFO_ENQUEUE,AtomicFifoEnqueue) +1: + popl %ebx + popl %esi + popl %edi + ret +COMMPAGE_DESCRIPTOR(AtomicFifoEnqueue,_COMM_PAGE_FIFO_ENQUEUE,0,0) + + +/* void* OSAtomicFifoDequeue( OSFifoQueueHead *list, size_t offset); */ + +COMMPAGE_FUNCTION_START(AtomicFifoDequeue, 32, 4) + pushl %edi + pushl %esi + pushl %ebx + xorl %ebx,%ebx // clear "preemption pending" flag + movl 16(%esp),%edi // %edi == ptr to list head + movl 20(%esp),%edx // %edx == offset + COMMPAGE_CALL(_COMM_PAGE_PFZ_DEQUEUE,_COMM_PAGE_FIFO_DEQUEUE,AtomicFifoDequeue) + testl %ebx,%ebx // pending preemption? + jz 1f + pushl %eax // save return value across sysenter + COMMPAGE_CALL(_COMM_PAGE_PREEMPT,_COMM_PAGE_FIFO_DEQUEUE,AtomicFifoDequeue) + popl %eax +1: + popl %ebx + popl %esi + popl %edi + ret // ptr to 1st element in Q still in %eax +COMMPAGE_DESCRIPTOR(AtomicFifoDequeue,_COMM_PAGE_FIFO_DEQUEUE,0,0) + + +/* Subroutine to make a preempt syscall. Called when we notice %ebx is + * nonzero after returning from a PFZ subroutine. + * When we enter kernel: + * %edx = return address + * %ecx = stack ptr + * Destroys %eax, %ecx, and %edx. + */ +COMMPAGE_FUNCTION_START(preempt, 32, 4) + popl %edx // get return address + movl %esp,%ecx // save stack ptr here + movl $(-58),%eax /* 58 = pfz_exit */ + xorl %ebx,%ebx // clear "preemption pending" flag + sysenter +COMMPAGE_DESCRIPTOR(preempt,_COMM_PAGE_PREEMPT,0,0) + + +/* Subroutine to back off if we cannot get the spinlock. Called + * after a few attempts inline in the PFZ subroutines. This code is + * not in the PFZ. + * %edi = ptr to queue head structure + * %ebx = preemption flag (nonzero if preemption pending) + * Destroys %eax. + */ +COMMPAGE_FUNCTION_START(backoff, 32, 4) + testl %ebx,%ebx // does kernel want to preempt us? + jz 1f // no + xorl %ebx,%ebx // yes, clear flag + pushl %edx // preserve regs used by preempt syscall + pushl %ecx + COMMPAGE_CALL(_COMM_PAGE_PREEMPT,_COMM_PAGE_BACKOFF,backoff) + popl %ecx + popl %edx +1: + pause // SMT-friendly backoff + cmpl $0,8(%edi) // sniff the lockword + jnz 1b // loop if still taken + ret // lockword is free, so reenter PFZ +COMMPAGE_DESCRIPTOR(backoff,_COMM_PAGE_BACKOFF,0,0) + + +/* Preemption-free-zone routine to FIFO Enqueue: + * %edi = ptr to queue head structure + * %esi = ptr to element to enqueue + * %edx = offset of link field in elements + * %ebx = preemption flag (kernel sets nonzero if we should preempt) + */ + +COMMPAGE_FUNCTION_START(pfz_enqueue, 32, 4) + movl $0,(%edx,%esi) // zero forward link in new element +1: + xorl %eax, %eax + orl $-1, %ecx + lock + cmpxchgl %ecx, 8(%edi) // try to take the spinlock + jz 2f // got it + + pause + xorl %eax, %eax + lock + cmpxchgl %ecx, 8(%edi) // try 2nd time to take the spinlock + jz 2f // got it + + pause + xorl %eax, %eax + lock + cmpxchgl %ecx, 8(%edi) // try 3rd time to take the spinlock + jz 2f // got it + + COMMPAGE_CALL(_COMM_PAGE_BACKOFF,_COMM_PAGE_PFZ_ENQUEUE,pfz_enqueue) + jmp 1b // loop to try again +2: + movl 4(%edi),%ecx // get ptr to last element in q + testl %ecx,%ecx // q null? + jnz 3f // no + movl %esi,(%edi) // q empty so this is first element + jmp 4f +3: + movl %esi,(%edx,%ecx) // point to new element from last +4: + movl %esi,4(%edi) // new element becomes last in q + movl $0,8(%edi) // unlock spinlock + ret +COMMPAGE_DESCRIPTOR(pfz_enqueue,_COMM_PAGE_PFZ_ENQUEUE,0,0) + + +/* Preemption-free-zone routine to FIFO Dequeue: + * %edi = ptr to queue head structure + * %edx = offset of link field in elements + * %ebx = preemption flag (kernel sets nonzero if we should preempt) + * + * Returns with next element (or 0) in %eax. + */ + +COMMPAGE_FUNCTION_START(pfz_dequeue, 32, 4) +1: + xorl %eax, %eax + orl $-1, %ecx + lock + cmpxchgl %ecx, 8(%edi) // try to take the spinlock + jz 2f // got it + + pause + xorl %eax, %eax + lock + cmpxchgl %ecx, 8(%edi) // try 2nd time to take the spinlock + jz 2f // got it + + pause + xorl %eax, %eax + lock + cmpxchgl %ecx, 8(%edi) // try 3rd time to take the spinlock + jz 2f // got it + + COMMPAGE_CALL(_COMM_PAGE_BACKOFF,_COMM_PAGE_PFZ_DEQUEUE,pfz_dequeue) + jmp 1b // loop to try again +2: + movl (%edi),%eax // get ptr to first element in q + testl %eax,%eax // q null? + jz 4f // yes + movl (%edx,%eax),%esi// get ptr to 2nd element in q + testl %esi,%esi // is there a 2nd element? + jnz 3f // yes + movl %esi,4(%edi) // clear "last" field of q head +3: + movl %esi,(%edi) // update "first" field of q head +4: + movl $0,8(%edi) // unlock spinlock + ret +COMMPAGE_DESCRIPTOR(pfz_dequeue,_COMM_PAGE_PFZ_DEQUEUE,0,0) + + + + +/************************* x86_64 versions follow **************************/ + + +/* + * typedef volatile struct { + * void *opaque1; <-- ptr to first queue element or null + * void *opaque2; <-- ptr to last queue element or null + * int opaque3; <-- spinlock + * } OSFifoQueueHead; + * + * void OSAtomicFifoEnqueue( OSFifoQueueHead *list, void *new, size_t offset); + */ + +// %rdi == list head, %rsi == new, %rdx == offset + +COMMPAGE_FUNCTION_START(AtomicFifoEnqueue_64, 64, 4) + pushq %rbx + xorl %ebx,%ebx // clear "preemption pending" flag + COMMPAGE_CALL(_COMM_PAGE_PFZ_ENQUEUE,_COMM_PAGE_FIFO_ENQUEUE,AtomicFifoEnqueue_64) + testl %ebx,%ebx // pending preemption? + jz 1f + COMMPAGE_CALL(_COMM_PAGE_PREEMPT,_COMM_PAGE_FIFO_ENQUEUE,AtomicFifoEnqueue_64) +1: + popq %rbx + ret +COMMPAGE_DESCRIPTOR(AtomicFifoEnqueue_64,_COMM_PAGE_FIFO_ENQUEUE,0,0) + + +/* void* OSAtomicDequeue( OSQueueHead *list, size_t offset); */ + +// %rdi == list head, %rsi == offset + +COMMPAGE_FUNCTION_START(AtomicFifoDequeue_64, 64, 4) + pushq %rbx + xorl %ebx,%ebx // clear "preemption pending" flag + movq %rsi,%rdx // move offset to %rdx to be like the Enqueue case + COMMPAGE_CALL(_COMM_PAGE_PFZ_DEQUEUE,_COMM_PAGE_FIFO_DEQUEUE,AtomicFifoDequeue_64) + testl %ebx,%ebx // pending preemption? + jz 1f + COMMPAGE_CALL(_COMM_PAGE_PREEMPT,_COMM_PAGE_FIFO_DEQUEUE,AtomicFifoDequeue_64) +1: + popq %rbx + ret // ptr to 1st element in Q in %rax +COMMPAGE_DESCRIPTOR(AtomicFifoDequeue_64,_COMM_PAGE_FIFO_DEQUEUE,0,0) + + +/* Subroutine to make a preempt syscall. Called when we notice %ebx is + * nonzero after returning from a PFZ subroutine. Not in PFZ. + * + * All registers preserved (but does clear the %ebx preemption flag). + */ +COMMPAGE_FUNCTION_START(preempt_64, 64, 4) + pushq %rax + pushq %rcx + pushq %r11 + movl $(SYSCALL_CONSTRUCT_MACH(58)),%eax /* 58 = pfz_exit */ + xorl %ebx,%ebx + syscall + popq %r11 + popq %rcx + popq %rax + ret +COMMPAGE_DESCRIPTOR(preempt_64,_COMM_PAGE_PREEMPT,0,0) + + +/* Subroutine to back off if we cannot get the spinlock. Called + * after a few attempts inline in the PFZ subroutines. This code is + * not in the PFZ. + * %rdi = ptr to queue head structure + * %ebx = preemption flag (nonzero if preemption pending) + * Uses: %rax. + */ +COMMPAGE_FUNCTION_START(backoff_64, 64, 4) + testl %ebx,%ebx // does kernel want to preempt us? + jz 1f // no + COMMPAGE_CALL(_COMM_PAGE_PREEMPT,_COMM_PAGE_BACKOFF,backoff_64) +1: + pause // SMT-friendly backoff + cmpl $0,16(%rdi) // sniff the lockword + jnz 1b // loop if still taken + ret // lockword is free, so reenter PFZ +COMMPAGE_DESCRIPTOR(backoff_64,_COMM_PAGE_BACKOFF,0,0) + + +/* Preemption-free-zone routine to FIFO Enqueue: + * %rdi = ptr to queue head structure + * %rsi = ptr to new element to enqueue + * %rdx = offset of link field in elements + * %ebx = preemption flag (kernel sets nonzero if we should preempt) + */ + +COMMPAGE_FUNCTION_START(pfz_enqueue_64, 64, 4) + movq $0,(%rdx,%rsi) // zero forward link in new element +1: + xorl %eax, %eax + orl $-1, %ecx + lock + cmpxchgl %ecx,16(%rdi) // try to take the spinlock + jz 2f // got it + + pause + xorl %eax, %eax + lock + cmpxchgl %ecx,16(%rdi) // try 2nd time to take the spinlock + jz 2f // got it + + pause + xorl %eax, %eax + lock + cmpxchgl %ecx,16(%rdi) // try 3rd time to take the spinlock + jz 2f // got it + + COMMPAGE_CALL(_COMM_PAGE_BACKOFF,_COMM_PAGE_PFZ_ENQUEUE,pfz_enqueue_64) + jmp 1b // loop to try again +2: + movq 8(%rdi),%rcx // get ptr to last element in q + testq %rcx,%rcx // q null? + jnz 3f // no + movq %rsi,(%rdi) // q empty so this is first element + jmp 4f +3: + movq %rsi,(%rdx,%rcx) // point to new element from last +4: + movq %rsi,8(%rdi) // new element becomes last in q + movl $0,16(%rdi) // unlock spinlock + ret +COMMPAGE_DESCRIPTOR(pfz_enqueue_64,_COMM_PAGE_PFZ_ENQUEUE,0,0) + + + +/* Preemption-free-zone routine to FIFO Dequeue: + * %rdi = ptr to queue head structure + * %rdx = offset of link field in elements + * %ebx = preemption flag (kernel sets nonzero if we should preempt) + * + * Returns with next element (or 0) in %rax. + */ + +COMMPAGE_FUNCTION_START(pfz_dequeue_64, 64, 4) +1: + xorl %eax, %eax + orl $-1, %ecx + lock + cmpxchgl %ecx,16(%rdi) // try to take the spinlock + jz 2f // got it + + pause + xorl %eax, %eax + lock + cmpxchgl %ecx,16(%rdi) // try 2nd time to take the spinlock + jz 2f // got it + + pause + xorl %eax, %eax + lock + cmpxchgl %ecx,16(%rdi) // try 3rd time to take the spinlock + jz 2f // got it + + COMMPAGE_CALL(_COMM_PAGE_BACKOFF,_COMM_PAGE_PFZ_DEQUEUE,pfz_dequeue_64) + jmp 1b // loop to try again +2: + movq (%rdi),%rax // get ptr to first element in q + testq %rax,%rax // q null? + jz 4f // yes + movq (%rdx,%rax),%rsi// get ptr to 2nd element in q + testq %rsi,%rsi // is there a 2nd element? + jnz 3f // yes + movq %rsi,8(%rdi) // no - clear "last" field of q head +3: + movq %rsi,(%rdi) // update "first" field of q head +4: + movl $0,16(%rdi) // unlock spinlock + ret +COMMPAGE_DESCRIPTOR(pfz_dequeue_64,_COMM_PAGE_PFZ_DEQUEUE,0,0) diff --git a/osfmk/i386/commpage/longcopy_sse3x.s b/osfmk/i386/commpage/longcopy_sse3x.s index 973eef434..3a1de25ed 100644 --- a/osfmk/i386/commpage/longcopy_sse3x.s +++ b/osfmk/i386/commpage/longcopy_sse3x.s @@ -52,10 +52,10 @@ // operands, with the standard ABI. // // void longcopy(const void *dest, void *sou, size_t len) - - .text - .align 5, 0x90 -Llongcopy_sse3x: // void longcopy(const void *dest, void *sou, size_t len) + +// void longcopy(const void *dest, void *sou, size_t len) + +COMMPAGE_FUNCTION_START(longcopy_sse3x, 32, 5) pushl %ebp // set up a frame for backtraces movl %esp,%ebp pushl %esi @@ -217,5 +217,5 @@ LVeryLongChunkEnd: popl %ebp ret - /* always match for now, as commpage_stuff_routine() will panic if no match */ - COMMPAGE_DESCRIPTOR(longcopy_sse3x, _COMM_PAGE_LONGCOPY, 0 ,0) +/* always match for now, as commpage_stuff_routine() will panic if no match */ +COMMPAGE_DESCRIPTOR(longcopy_sse3x, _COMM_PAGE_LONGCOPY, 0 ,0) diff --git a/osfmk/i386/commpage/longcopy_sse3x_64.s b/osfmk/i386/commpage/longcopy_sse3x_64.s index b04592701..439c4447f 100644 --- a/osfmk/i386/commpage/longcopy_sse3x_64.s +++ b/osfmk/i386/commpage/longcopy_sse3x_64.s @@ -54,11 +54,10 @@ // rdi = dest ptr // rsi = source ptr // rdx = length (>= 8kb, probably much bigger) - - .text - .code64 - .align 5, 0x90 -Llongcopy_sse3x_64: // void longcopy(const void *dest, void *sou, size_t len) + +// void longcopy(const void *dest, void *sou, size_t len) + +COMMPAGE_FUNCTION_START(longcopy_sse3x_64, 64, 5) pushq %rbp // set up a frame for backtraces movq %rsp,%rbp movl %edi,%eax // copy dest ptr @@ -207,5 +206,5 @@ LVeryLongChunkEnd: popq %rbp // restore frame ptr ret - /* always match for now, as commpage_stuff_routine() will panic if no match */ - COMMPAGE_DESCRIPTOR(longcopy_sse3x_64, _COMM_PAGE_LONGCOPY, 0 ,0) +/* always match for now, as commpage_stuff_routine() will panic if no match */ +COMMPAGE_DESCRIPTOR(longcopy_sse3x_64, _COMM_PAGE_LONGCOPY, 0 ,0) diff --git a/osfmk/i386/commpage/memset_pattern_sse2.s b/osfmk/i386/commpage/memset_pattern_sse2.s index c3f4ecdd9..3025ef62b 100644 --- a/osfmk/i386/commpage/memset_pattern_sse2.s +++ b/osfmk/i386/commpage/memset_pattern_sse2.s @@ -55,9 +55,7 @@ // Return conditions: // %eax, %edi, %esi, %ecx, and %edx all trashed - .text - .align 5, 0x90 -Lmemset_pattern_sse2: +COMMPAGE_FUNCTION_START(memset_pattern_sse2, 32, 5) cmpl $(kShort),%edx // long enough to bother aligning? ja LNotShort // yes jmp LShort // no @@ -182,4 +180,4 @@ LNoMoreChunks: jge LLoopBy16 // yes jmp LLessThan16 // handle up to 15 remaining bytes - COMMPAGE_DESCRIPTOR(memset_pattern_sse2,_COMM_PAGE_MEMSET_PATTERN,kHasSSE2,0) +COMMPAGE_DESCRIPTOR(memset_pattern_sse2,_COMM_PAGE_MEMSET_PATTERN,kHasSSE2,0) diff --git a/osfmk/i386/commpage/memset_pattern_sse2_64.s b/osfmk/i386/commpage/memset_pattern_sse2_64.s index b3d64a692..e2d1bb007 100644 --- a/osfmk/i386/commpage/memset_pattern_sse2_64.s +++ b/osfmk/i386/commpage/memset_pattern_sse2_64.s @@ -56,10 +56,7 @@ // %rax, %rdi, %rsi, %rcx, and %rdx all trashed // we preserve %r8, %r9, %r10, and %r11 - .text - .align 5, 0x90 - .code64 -Lmemset_pattern_sse2_64: +COMMPAGE_FUNCTION_START(memset_pattern_sse2_64, 64, 5) cmpq $(kShort),%rdx // long enough to bother aligning? ja LNotShort // yes jmp LShort // no @@ -184,4 +181,4 @@ LNoMoreChunks: jge LLoopBy16 // yes jmp LLessThan16 // handle up to 15 remaining bytes - COMMPAGE_DESCRIPTOR(memset_pattern_sse2_64,_COMM_PAGE_MEMSET_PATTERN,kHasSSE2,0) +COMMPAGE_DESCRIPTOR(memset_pattern_sse2_64,_COMM_PAGE_MEMSET_PATTERN,kHasSSE2,0) diff --git a/osfmk/i386/commpage/pthreads.s b/osfmk/i386/commpage/pthreads.s index 484857e13..217662445 100644 --- a/osfmk/i386/commpage/pthreads.s +++ b/osfmk/i386/commpage/pthreads.s @@ -1,5 +1,5 @@ /* - * Copyright (c) 2003-2006 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2003-2009 Apple, Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -29,6 +29,7 @@ #include #include #include +#include #define _PTHREAD_TSD_OFFSET32 0x48 #define _PTHREAD_TSD_OFFSET64 0x60 @@ -38,33 +39,299 @@ * to avoid revlock, but the code should move to Libc, and we should eventually remove * these. */ - .text - .align 2, 0x90 - -Lpthread_getspecific: +COMMPAGE_FUNCTION_START(pthread_getspecific, 32, 4) movl 4(%esp), %eax movl %gs:_PTHREAD_TSD_OFFSET32(,%eax,4), %eax ret +COMMPAGE_DESCRIPTOR(pthread_getspecific,_COMM_PAGE_PTHREAD_GETSPECIFIC,0,0) - COMMPAGE_DESCRIPTOR(pthread_getspecific,_COMM_PAGE_PTHREAD_GETSPECIFIC,0,0) - -Lpthread_self: +COMMPAGE_FUNCTION_START(pthread_self, 32, 4) movl %gs:_PTHREAD_TSD_OFFSET32, %eax ret - - COMMPAGE_DESCRIPTOR(pthread_self,_COMM_PAGE_PTHREAD_SELF,0,0) +COMMPAGE_DESCRIPTOR(pthread_self,_COMM_PAGE_PTHREAD_SELF,0,0) /* the 64-bit versions: */ - - .code64 -Lpthread_getspecific_64: +COMMPAGE_FUNCTION_START(pthread_getspecific_64, 64, 4) movq %gs:_PTHREAD_TSD_OFFSET64(,%rdi,8), %rax ret +COMMPAGE_DESCRIPTOR(pthread_getspecific_64,_COMM_PAGE_PTHREAD_GETSPECIFIC,0,0) - COMMPAGE_DESCRIPTOR(pthread_getspecific_64,_COMM_PAGE_PTHREAD_GETSPECIFIC,0,0) - -Lpthread_self_64: +COMMPAGE_FUNCTION_START(pthread_self_64, 64, 4) movq %gs:_PTHREAD_TSD_OFFSET64, %rax ret +COMMPAGE_DESCRIPTOR(pthread_self_64,_COMM_PAGE_PTHREAD_SELF,0,0) + + +/* Temporary definitions. Replace by #including the correct file when available. */ + +#define PTHRW_EBIT 0x01 +#define PTHRW_LBIT 0x02 +#define PTHRW_YBIT 0x04 +#define PTHRW_WBIT 0x08 +#define PTHRW_UBIT 0x10 +#define PTHRW_RETRYBIT 0x20 +#define PTHRW_TRYLKBIT 0x40 + +#define PTHRW_INC 0x100 +#define PTHRW_BIT_MASK 0x000000ff; + +#define PTHRW_COUNT_SHIFT 8 +#define PTHRW_COUNT_MASK 0xffffff00 +#define PTHRW_MAX_READERS 0xffffff00 + +#define KSYN_MLWAIT 301 /* mutex lock wait syscall */ + +#define PTHRW_STATUS_ACQUIRED 0 +#define PTHRW_STATUS_SYSCALL 1 +#define PTHRW_STATUS_ERROR 2 + +#define PTHRW_LVAL 0 +#define PTHRW_UVAL 4 + + + +/* PREEMPTION FREE ZONE (PFZ) + * + * A portion of the commpage is speacial-cased by the kernel to be "preemption free", + * ie as if we had disabled interrupts in user mode. This facilitates writing + * "nearly-lockless" code, for example code that must be serialized by a spinlock but + * which we do not want to preempt while the spinlock is held. + * + * The PFZ is implemented by collecting all the "preemption-free" code into a single + * contiguous region of the commpage. Register %ebx is used as a flag register; + * before entering the PFZ, %ebx is cleared. If some event occurs that would normally + * result in a premption while in the PFZ, the kernel sets %ebx nonzero instead of + * preempting. Then, when the routine leaves the PFZ we check %ebx and + * if nonzero execute a special "pfz_exit" syscall to take the delayed preemption. + * + * PFZ code must bound the amount of time spent in the PFZ, in order to control + * latency. Backward branches are dangerous and must not be used in a way that + * could inadvertently create a long-running loop. + * + * Because we need to avoid being preempted between changing the mutex stateword + * and entering the kernel to relinquish, some low-level pthread mutex manipulations + * are located in the PFZ. + */ + + +/* int // we return 0 on acquire, 1 on syscall + * pthread_mutex_lock( uint32_t *lvalp, // ptr to mutex LVAL/UVAL pair + * int flags, // flags to pass kernel if we do syscall + * uint64_t mtid, // my Thread ID + * uint32_t mask, // bits to test in LVAL (ie, EBIT etc) + * uint64_t *tidp, // ptr to TID field of mutex + * int *syscall_return ); // if syscall, return value stored here + */ +COMMPAGE_FUNCTION_START(pthread_mutex_lock, 32, 4) + pushl %ebp // set up frame for backtrace + movl %esp,%ebp + pushl %esi + pushl %edi + pushl %ebx + xorl %ebx,%ebx // clear "preemption pending" flag + movl 20(%esp),%edi // %edi == ptr to LVAL/UVAL structure + lea 20(%esp),%esi // %esi == ptr to argument list + movl _COMM_PAGE_SPIN_COUNT, %edx + movl 16(%esi),%ecx // get mask (ie, PTHRW_EBIT etc) +1: + testl PTHRW_LVAL(%edi),%ecx // is mutex available? + jz 2f // yes, it is available + pause + decl %edx // decrement max spin count + jnz 1b // keep spinning +2: + COMMPAGE_CALL(_COMM_PAGE_PFZ_MUTEX_LOCK,_COMM_PAGE_MUTEX_LOCK,pthread_mutex_lock) + testl %ebx,%ebx // pending preemption? + jz 3f + pushl %eax // save return value across sysenter + COMMPAGE_CALL(_COMM_PAGE_PREEMPT,_COMM_PAGE_MUTEX_LOCK,pthread_mutex_lock) + popl %eax +3: + popl %ebx + popl %edi + popl %esi + popl %ebp + ret +COMMPAGE_DESCRIPTOR(pthread_mutex_lock,_COMM_PAGE_MUTEX_LOCK,0,0) + + +/* Internal routine to handle pthread mutex lock operation. This is in the PFZ. + * %edi == ptr to LVAL/UVAL pair + * %esi == ptr to argument list on stack + * %ebx == preempion pending flag (kernel sets nonzero if we should preempt) + */ +COMMPAGE_FUNCTION_START(pfz_mutex_lock, 32, 4) + pushl %ebp // set up frame for backtrace + movl %esp,%ebp +1: + movl 16(%esi),%ecx // get mask (ie, PTHRW_EBIT etc) +2: + movl PTHRW_LVAL(%edi),%eax // get mutex LVAL + testl %eax,%ecx // is mutex available? + jnz 5f // no + + /* lock is available (if we act fast) */ + lea PTHRW_INC(%eax),%edx // copy original lval and bump sequence count + orl $PTHRW_EBIT, %edx // set EBIT + lock + cmpxchgl %edx,PTHRW_LVAL(%edi) // try to acquire lock for real + jz 4f // got it +3: + testl %ebx,%ebx // kernel trying to preempt us? + jz 2b // no, so loop and try again + COMMPAGE_CALL(_COMM_PAGE_PREEMPT,_COMM_PAGE_PFZ_MUTEX_LOCK,pfz_mutex_lock) + jmp 1b // loop to try again + + /* we acquired the mutex */ +4: + movl 20(%esi),%eax // get ptr to TID field of mutex + movl 8(%esi),%ecx // get 64-bit mtid + movl 12(%esi),%edx + movl %ecx,0(%eax) // store my TID in mutex structure + movl %edx,4(%eax) + movl $PTHRW_STATUS_ACQUIRED,%eax + popl %ebp + ret + + /* cannot acquire mutex, so update seq count, set "W", and block in kernel */ + /* this is where we cannot tolerate preemption or being killed */ +5: + lea PTHRW_INC(%eax),%edx // copy original lval and bump sequence count + orl $PTHRW_WBIT, %edx // set WBIT + lock + cmpxchgl %edx,PTHRW_LVAL(%edi) // try to update lock status atomically + jnz 3b // failed + movl 20(%esi),%eax // get ptr to TID field of mutex + pushl 4(%esi) // arg 5: flags from arg list + pushl 4(%eax) // arg 4: tid field from mutex + pushl 0(%eax) + pushl PTHRW_UVAL(%edi) // arg 3: uval field from mutex + pushl %edx // arg 2: new value of mutex lval field + pushl %edi // arg 1: ptr to LVAL/UVAL pair in mutex + call 6f // make ksyn_mlwait call + jc 6f // immediately reissue syscall if error + movl 24(%esi),%edx // get ptr to syscall_return arg + movl %eax,(%edx) // save syscall return value + movl $PTHRW_STATUS_SYSCALL,%eax // we had to make syscall + addl $28,%esp // pop off syscall args and return address + popl %ebp // pop off frame ptr + ret + + /* subroutine to make a ksyn_mlwait syscall */ +6: + movl (%esp),%edx // get return address but leave on stack + movl %esp,%ecx // save stack ptr here + movl $KSYN_MLWAIT,%eax // get syscall code + orl $0x00180000,%eax // copy 24 bytes of arguments in trampoline + xorl %ebx,%ebx // clear preemption flag + sysenter +COMMPAGE_DESCRIPTOR(pfz_mutex_lock,_COMM_PAGE_PFZ_MUTEX_LOCK,0,0) + + + +/************************* x86_64 versions follow **************************/ + + + +/* int // we return 0 on acquire, 1 on syscall + * pthread_mutex_lock( uint32_t *lvalp, // ptr to mutex LVAL/UVAL pair + * int flags, // flags to pass kernel if we do syscall + * uint64_t mtid, // my Thread ID + * uint32_t mask, // bits to test in LVAL (ie, EBIT etc) + * uint64_t *tidp, // ptr to TID field of mutex + * int *syscall_return ); // if syscall, return value stored here + * + * %rdi = lvalp + * %esi = flags + * %rdx = mtid + * %ecx = mask + * %r8 = tidp + * %r9 = &syscall_return + */ +COMMPAGE_FUNCTION_START(pthread_mutex_lock_64, 64, 4) + pushq %rbp // set up frame for backtrace + movq %rsp,%rbp + pushq %rbx + xorl %ebx,%ebx // clear "preemption pending" flag + movl _COMM_PAGE_32_TO_64(_COMM_PAGE_SPIN_COUNT), %eax +1: + testl PTHRW_LVAL(%rdi),%ecx // is mutex available? + jz 2f // yes, it is available + pause + decl %eax // decrement max spin count + jnz 1b // keep spinning +2: + COMMPAGE_CALL(_COMM_PAGE_PFZ_MUTEX_LOCK,_COMM_PAGE_MUTEX_LOCK,pthread_mutex_lock_64) + testl %ebx,%ebx // pending preemption? + jz 1f // no + COMMPAGE_CALL(_COMM_PAGE_PREEMPT,_COMM_PAGE_MUTEX_LOCK,pthread_mutex_lock_64) +1: + popq %rbx + popq %rbp + ret +COMMPAGE_DESCRIPTOR(pthread_mutex_lock_64,_COMM_PAGE_MUTEX_LOCK,0,0) + + +/* Internal routine to handle pthread mutex lock operation. This is in the PFZ. + * %rdi = lvalp + * %esi = flags + * %rdx = mtid + * %ecx = mask + * %r8 = tidp + * %r9 = &syscall_return + * %ebx = preempion pending flag (kernel sets nonzero if we should preempt) + */ +COMMPAGE_FUNCTION_START(pfz_mutex_lock_64, 64, 4) + pushq %rbp // set up frame for backtrace + movq %rsp,%rbp +1: + movl PTHRW_LVAL(%rdi),%eax // get old lval from mutex +2: + testl %eax,%ecx // can we acquire the lock? + jnz 5f // no + + /* lock is available (if we act fast) */ + lea PTHRW_INC(%rax),%r11 // copy original lval and bump sequence count + orl $PTHRW_EBIT, %r11d // set EBIT + lock + cmpxchgl %r11d,PTHRW_LVAL(%rdi) // try to acquire lock + jz 4f // got it +3: + testl %ebx,%ebx // kernel trying to preempt us? + jz 2b // no, so loop and try again + COMMPAGE_CALL(_COMM_PAGE_PREEMPT,_COMM_PAGE_PFZ_MUTEX_LOCK,pfz_mutex_lock_64) + jmp 1b // loop to try again + + /* we acquired the mutex */ +4: + movq %rdx,(%r8) // store mtid in mutex structure + movl $PTHRW_STATUS_ACQUIRED,%eax + popq %rbp + ret + + /* cannot acquire mutex, so update seq count and block in kernel */ + /* this is where we cannot tolerate preemption or being killed */ +5: + lea PTHRW_INC(%rax),%r11 // copy original lval and bump sequence count + orl $PTHRW_WBIT, %r11d // set WBIT + lock + cmpxchgl %r11d,PTHRW_LVAL(%rdi) // try to update lock status atomically + jnz 3b // failed + movq (%r8),%r10 // arg 4: tid field from mutex [NB: passed in R10] + movl %esi,%r8d // arg 5: flags from arg list + movl PTHRW_UVAL(%rdi),%edx // arg 3: uval field from mutex + movl %r11d,%esi // arg 2: new value of mutex lval field + // arg 1: LVAL/UVAL ptr already in %rdi +6: + movl $(SYSCALL_CONSTRUCT_UNIX(KSYN_MLWAIT)),%eax + pushq %rdx // some syscalls destroy %rdx so save it + xorl %ebx,%ebx // clear preemption flag + syscall + popq %rdx // restore in case we need to re-execute syscall + jc 6b // immediately re-execute syscall if error + movl %eax,(%r9) // store kernel return value + movl $PTHRW_STATUS_SYSCALL,%eax // we made syscall + popq %rbp + ret +COMMPAGE_DESCRIPTOR(pfz_mutex_lock_64,_COMM_PAGE_PFZ_MUTEX_LOCK,0,0) - COMMPAGE_DESCRIPTOR(pthread_self_64,_COMM_PAGE_PTHREAD_SELF,0,0) diff --git a/osfmk/i386/commpage/spinlocks.s b/osfmk/i386/commpage/spinlocks.s index e582635a0..a0e98bcb3 100644 --- a/osfmk/i386/commpage/spinlocks.s +++ b/osfmk/i386/commpage/spinlocks.s @@ -1,5 +1,5 @@ /* - * Copyright (c) 2003-2006 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2003-2009 Apple, Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -32,13 +32,7 @@ #include - -#define MP_SPIN_TRIES 1024 - - .text - .align 4, 0x90 - -Lspin_lock_try_up: +COMMPAGE_FUNCTION_START(spin_lock_try_up, 32, 4) movl 4(%esp), %ecx xorl %eax, %eax orl $-1, %edx @@ -46,12 +40,10 @@ Lspin_lock_try_up: setz %dl movzbl %dl, %eax ret - - COMMPAGE_DESCRIPTOR(spin_lock_try_up,_COMM_PAGE_SPINLOCK_TRY,kUP,0) - +COMMPAGE_DESCRIPTOR(spin_lock_try_up,_COMM_PAGE_SPINLOCK_TRY,kUP,0) - .align 4, 0x90 -Lspin_lock_try_mp: + +COMMPAGE_FUNCTION_START(spin_lock_try_mp, 32, 4) movl 4(%esp), %ecx xorl %eax, %eax orl $-1, %edx @@ -60,17 +52,15 @@ Lspin_lock_try_mp: setz %dl movzbl %dl, %eax ret - - COMMPAGE_DESCRIPTOR(spin_lock_try_mp,_COMM_PAGE_SPINLOCK_TRY,0,kUP) +COMMPAGE_DESCRIPTOR(spin_lock_try_mp,_COMM_PAGE_SPINLOCK_TRY,0,kUP) - .align 4, 0x90 -Lspin_lock_up: +COMMPAGE_FUNCTION_START(spin_lock_up, 32, 4) movl 4(%esp), %ecx xorl %eax, %eax orl $-1, %edx cmpxchgl %edx, (%ecx) - jnz,pn 1f /* predict not taken */ + jnz 1f ret 1: /* failed to get lock so relinquish the processor immediately on UP */ @@ -82,19 +72,17 @@ Lspin_lock_up: int $(MACH_INT) addl $16, %esp /* adjust stack*/ jmp Lspin_lock_up +COMMPAGE_DESCRIPTOR(spin_lock_up,_COMM_PAGE_SPINLOCK_LOCK,kUP,0) - COMMPAGE_DESCRIPTOR(spin_lock_up,_COMM_PAGE_SPINLOCK_LOCK,kUP,0) - - .align 4, 0x90 -Lspin_lock_mp: +COMMPAGE_FUNCTION_START(spin_lock_mp, 32, 4) movl 4(%esp), %ecx xorl %eax, %eax 0: orl $-1, %edx lock cmpxchgl %edx, (%ecx) - jnz,pn 1f /* predict not taken */ + jnz 1f ret 1: xorl %eax, %eax @@ -102,9 +90,9 @@ Lspin_lock_mp: 2: pause cmpl %eax, (%ecx) - jz,pt 0b /* favor success and slow down spin loop */ + jz 0b /* favor success and slow down spin loop */ decl %edx - jnz,pn 2b /* slow down spin loop with a mispredict */ + jnz 2b /* failed to get lock after spinning so relinquish */ pushl $1 /* 1 ms */ pushl $1 /* SWITCH_OPTION_DEPRESS */ @@ -114,39 +102,30 @@ Lspin_lock_mp: int $(MACH_INT) addl $16, %esp /* adjust stack*/ jmp Lspin_lock_mp - - COMMPAGE_DESCRIPTOR(spin_lock_mp,_COMM_PAGE_SPINLOCK_LOCK,0,kUP) +COMMPAGE_DESCRIPTOR(spin_lock_mp,_COMM_PAGE_SPINLOCK_LOCK,0,kUP) - .align 4, 0x90 -Lspin_unlock: +COMMPAGE_FUNCTION_START(spin_unlock, 32, 4) movl 4(%esp), %ecx movl $0, (%ecx) ret - - COMMPAGE_DESCRIPTOR(spin_unlock,_COMM_PAGE_SPINLOCK_UNLOCK,0,0) +COMMPAGE_DESCRIPTOR(spin_unlock,_COMM_PAGE_SPINLOCK_UNLOCK,0,0) /* ============================ 64-bit versions follow ===================== */ - .text - .code64 - .align 4, 0x90 - -Lspin_lock_try_up_64: +COMMPAGE_FUNCTION_START(spin_lock_try_up_64, 64, 4) xorl %eax, %eax orl $-1, %edx cmpxchgl %edx, (%rdi) setz %dl movzbl %dl, %eax ret +COMMPAGE_DESCRIPTOR(spin_lock_try_up_64,_COMM_PAGE_SPINLOCK_TRY,kUP,0) - COMMPAGE_DESCRIPTOR(spin_lock_try_up_64,_COMM_PAGE_SPINLOCK_TRY,kUP,0) - - .align 4, 0x90 -Lspin_lock_try_mp_64: +COMMPAGE_FUNCTION_START(spin_lock_try_mp_64, 64, 4) xorl %eax, %eax orl $-1, %edx lock @@ -154,18 +133,16 @@ Lspin_lock_try_mp_64: setz %dl movzbl %dl, %eax ret - - COMMPAGE_DESCRIPTOR(spin_lock_try_mp_64,_COMM_PAGE_SPINLOCK_TRY,0,kUP) +COMMPAGE_DESCRIPTOR(spin_lock_try_mp_64,_COMM_PAGE_SPINLOCK_TRY,0,kUP) - .align 4, 0x90 -Lspin_lock_up_64: +COMMPAGE_FUNCTION_START(spin_lock_up_64, 64, 4) movq %rdi,%r8 0: xorl %eax, %eax orl $-1, %edx cmpxchgl %edx, (%r8) - jnz,pn 1f /* predict not taken */ + jnz 1f ret 1: /* failed to get lock so relinquish the processor immediately on UP */ @@ -175,20 +152,17 @@ Lspin_lock_up_64: movl $(SYSCALL_CONSTRUCT_MACH(61)),%eax /* 61 = thread_switch */ syscall jmp 0b - - COMMPAGE_DESCRIPTOR(spin_lock_up_64,_COMM_PAGE_SPINLOCK_LOCK,kUP,0) +COMMPAGE_DESCRIPTOR(spin_lock_up_64,_COMM_PAGE_SPINLOCK_LOCK,kUP,0) - - .align 4, 0x90 -Lspin_lock_mp_64: +COMMPAGE_FUNCTION_START(spin_lock_mp_64, 64, 4) movq %rdi,%r8 0: xorl %eax, %eax orl $-1, %edx lock cmpxchgl %edx, (%r8) - jnz,pn 1f /* predict not taken */ + jnz 1f ret 1: xorl %eax, %eax @@ -206,13 +180,10 @@ Lspin_lock_mp_64: movl $(SYSCALL_CONSTRUCT_MACH(61)),%eax /* 61 = thread_switch */ syscall jmp 0b - - COMMPAGE_DESCRIPTOR(spin_lock_mp_64,_COMM_PAGE_SPINLOCK_LOCK,0,kUP) +COMMPAGE_DESCRIPTOR(spin_lock_mp_64,_COMM_PAGE_SPINLOCK_LOCK,0,kUP) - .align 4, 0x90 -Lspin_unlock_64: +COMMPAGE_FUNCTION_START(spin_unlock_64, 64, 4) movl $0, (%rdi) ret - - COMMPAGE_DESCRIPTOR(spin_unlock_64,_COMM_PAGE_SPINLOCK_UNLOCK,0,0) +COMMPAGE_DESCRIPTOR(spin_unlock_64,_COMM_PAGE_SPINLOCK_UNLOCK,0,0) diff --git a/osfmk/i386/cpu.c b/osfmk/i386/cpu.c index 194a6576b..0bcfbb77f 100644 --- a/osfmk/i386/cpu.c +++ b/osfmk/i386/cpu.c @@ -35,14 +35,17 @@ #include #include #include +#include #include #include -#include #include #include #include +#include +#if CONFIG_VMX +#include +#endif #include -#include "cpuid.h" struct processor processor_master; @@ -173,8 +176,10 @@ cpu_machine_init( #endif ml_init_interrupt(); +#if CONFIG_VMX /* for every CPU, get the VT specs */ vmx_get_specs(); +#endif } processor_t @@ -241,6 +246,8 @@ slot_threadtype( return (cpu_datap(slot_num)->cpu_threadtype); } + + cpu_type_t cpu_type(void) { diff --git a/osfmk/i386/cpu_capabilities.h b/osfmk/i386/cpu_capabilities.h index 31aee1589..3fb02d33f 100644 --- a/osfmk/i386/cpu_capabilities.h +++ b/osfmk/i386/cpu_capabilities.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2003-2007 Apple Inc. All rights reserved. + * Copyright (c) 2003-2009 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -86,9 +86,10 @@ int _NumCPUs( void ) */ #define _COMM_PAGE32_AREA_LENGTH ( 19 * 4096 ) /* reserved length of entire comm area */ -#define _COMM_PAGE32_BASE_ADDRESS ( -20 * 4096 ) /* base address of allocated memory */ -#define _COMM_PAGE32_START_ADDRESS ( -16 * 4096 ) /* address traditional commpage code starts on */ +#define _COMM_PAGE32_BASE_ADDRESS ( 0xfffec000 ) /* base address of allocated memory, -20 pages */ +#define _COMM_PAGE32_START_ADDRESS ( 0xffff0000 ) /* address traditional commpage code starts on, -16 pages */ #define _COMM_PAGE32_AREA_USED ( 19 * 4096 ) /* this is the amt actually allocated */ +#define _COMM_PAGE32_SIGS_OFFSET 0x8000 /* offset to routine signatures */ #define _COMM_PAGE64_AREA_LENGTH ( 2 * 1024 * 1024 ) /* reserved length of entire comm area (2MB) */ #define _COMM_PAGE64_BASE_ADDRESS ( 0x00007fffffe00000ULL ) /* base address of allocated memory */ @@ -101,13 +102,24 @@ int _NumCPUs( void ) #define _COMM_PAGE64_OBJC_SIZE 0ULL #define _COMM_PAGE64_OBJC_BASE 0ULL +#ifdef KERNEL_PRIVATE + +/* Inside the kernel, comm page addresses are absolute addresses + * assuming they are a part of the 32-bit commpage. They may + * be mapped somewhere else, especially for the 64-bit commpage. + */ +#define _COMM_PAGE_START_ADDRESS _COMM_PAGE32_START_ADDRESS +#define _COMM_PAGE_SIGS_OFFSET _COMM_PAGE32_SIGS_OFFSET + +#else /* !KERNEL_PRIVATE */ + #if defined(__i386__) #define _COMM_PAGE_AREA_LENGTH _COMM_PAGE32_AREA_LENGTH #define _COMM_PAGE_BASE_ADDRESS _COMM_PAGE32_BASE_ADDRESS #define _COMM_PAGE_START_ADDRESS _COMM_PAGE32_START_ADDRESS #define _COMM_PAGE_AREA_USED _COMM_PAGE32_AREA_USED -#define _COMM_PAGE_SIGS_OFFSET 0x8000 /* offset to routine signatures */ +#define _COMM_PAGE_SIGS_OFFSET _COMM_PAGE32_SIGS_OFFSET #elif defined(__x86_64__) @@ -120,20 +132,32 @@ int _NumCPUs( void ) #error architecture not supported #endif +#endif /* !KERNEL_PRIVATE */ + /* data in the comm page */ #define _COMM_PAGE_SIGNATURE (_COMM_PAGE_START_ADDRESS+0x000) /* first few bytes are a signature */ #define _COMM_PAGE_VERSION (_COMM_PAGE_START_ADDRESS+0x01E) /* 16-bit version# */ -#define _COMM_PAGE_THIS_VERSION 7 /* version of the commarea format */ +#define _COMM_PAGE_THIS_VERSION 11 /* version of the commarea format */ #define _COMM_PAGE_CPU_CAPABILITIES (_COMM_PAGE_START_ADDRESS+0x020) /* uint32_t _cpu_capabilities */ #define _COMM_PAGE_NCPUS (_COMM_PAGE_START_ADDRESS+0x022) /* uint8_t number of configured CPUs */ #define _COMM_PAGE_CACHE_LINESIZE (_COMM_PAGE_START_ADDRESS+0x026) /* uint16_t cache line size */ #define _COMM_PAGE_SCHED_GEN (_COMM_PAGE_START_ADDRESS+0x028) /* uint32_t scheduler generation number (count of pre-emptions) */ +#define _COMM_PAGE_MEMORY_PRESSURE (_COMM_PAGE_START_ADDRESS+0x02c) /* uint32_t copy of vm_memory_pressure */ +#define _COMM_PAGE_SPIN_COUNT (_COMM_PAGE_START_ADDRESS+0x030) /* uint32_t max spin count for mutex's */ + +#define _COMM_PAGE_UNUSED1 (_COMM_PAGE_START_ADDRESS+0x034) /* 12 unused bytes */ + +#ifdef KERNEL_PRIVATE + +/* slots defined in all cases, but commpage setup code must not populate for 64-bit commpage */ +#define _COMM_PAGE_2_TO_52 (_COMM_PAGE_START_ADDRESS+0x040) /* double float constant 2**52 */ +#define _COMM_PAGE_10_TO_6 (_COMM_PAGE_START_ADDRESS+0x048) /* double float constant 10**6 */ + +#else /* !KERNEL_PRIVATE */ -#define _COMM_PAGE_UNUSED1 (_COMM_PAGE_START_ADDRESS+0x02c) /* 20 unused bytes */ - #if defined(__i386__) /* following are not defined in 64-bit */ #define _COMM_PAGE_2_TO_52 (_COMM_PAGE_START_ADDRESS+0x040) /* double float constant 2**52 */ #define _COMM_PAGE_10_TO_6 (_COMM_PAGE_START_ADDRESS+0x048) /* double float constant 10**6 */ @@ -141,6 +165,8 @@ int _NumCPUs( void ) #define _COMM_PAGE_UNUSED2 (_COMM_PAGE_START_ADDRESS+0x040) /* 16 unused bytes */ #endif +#endif /* !KERNEL_PRIVATE */ + #define _COMM_PAGE_TIME_DATA_START (_COMM_PAGE_START_ADDRESS+0x050) /* base of offsets below (_NT_SCALE etc) */ #define _COMM_PAGE_NT_TSC_BASE (_COMM_PAGE_START_ADDRESS+0x050) /* used by nanotime() */ #define _COMM_PAGE_NT_SCALE (_COMM_PAGE_START_ADDRESS+0x058) /* used by nanotime() */ @@ -164,8 +190,9 @@ int _NumCPUs( void ) #define _GTOD_SEC_BASE 40 /* jump table (jmp to this address, which may be a branch to the actual code somewhere else) */ - /* When new jump table entries are added, corresponding symbols should be added below */ - + /* When new jump table entries are added, corresponding symbols should be added below */ + /* New slots should be allocated with at least 16-byte alignment. Some like bcopy require */ + /* 32-byte alignment, and should be aligned as such in the assembly source before they are relocated */ #define _COMM_PAGE_COMPARE_AND_SWAP32 (_COMM_PAGE_START_ADDRESS+0x080) /* compare-and-swap word */ #define _COMM_PAGE_COMPARE_AND_SWAP64 (_COMM_PAGE_START_ADDRESS+0x0c0) /* compare-and-swap doubleword */ #define _COMM_PAGE_ENQUEUE (_COMM_PAGE_START_ADDRESS+0x100) /* enqueue */ @@ -174,7 +201,7 @@ int _NumCPUs( void ) #define _COMM_PAGE_ATOMIC_ADD32 (_COMM_PAGE_START_ADDRESS+0x1a0) /* add atomic word */ #define _COMM_PAGE_ATOMIC_ADD64 (_COMM_PAGE_START_ADDRESS+0x1c0) /* add atomic doubleword */ -#define _COMM_PAGE_UNUSED4 (_COMM_PAGE_START_ADDRESS+0x1e0) /* 32 unused bytes */ +#define _COMM_PAGE_CPU_NUMBER (_COMM_PAGE_START_ADDRESS+0x1e0) /* user-level cpu_number() */ #define _COMM_PAGE_ABSOLUTE_TIME (_COMM_PAGE_START_ADDRESS+0x200) /* mach_absolute_time() */ #define _COMM_PAGE_SPINLOCK_TRY (_COMM_PAGE_START_ADDRESS+0x220) /* spinlock_try() */ @@ -186,7 +213,7 @@ int _NumCPUs( void ) #define _COMM_PAGE_FLUSH_ICACHE (_COMM_PAGE_START_ADDRESS+0x520) /* sys_icache_invalidate() */ #define _COMM_PAGE_PTHREAD_SELF (_COMM_PAGE_START_ADDRESS+0x580) /* pthread_self() */ -#define _COMM_PAGE_UNUSED5 (_COMM_PAGE_START_ADDRESS+0x5a0) /* 32 unused bytes */ +#define _COMM_PAGE_PREEMPT (_COMM_PAGE_START_ADDRESS+0x5a0) /* used by PFZ code */ #define _COMM_PAGE_RELINQUISH (_COMM_PAGE_START_ADDRESS+0x5c0) /* used by spinlocks */ #define _COMM_PAGE_BTS (_COMM_PAGE_START_ADDRESS+0x5e0) /* bit test-and-set */ @@ -202,11 +229,25 @@ int _NumCPUs( void ) #define _COMM_PAGE_LONGCOPY (_COMM_PAGE_START_ADDRESS+0x1200) /* used by bcopy() for very long operands */ #define _COMM_PAGE_LONGCOPY_END (_COMM_PAGE_START_ADDRESS+0x15ff) /* used by rosetta */ -#define _COMM_PAGE_UNUSED6 (_COMM_PAGE_START_ADDRESS+0x1600) /* unused */ - +#define _COMM_PAGE_BACKOFF (_COMM_PAGE_START_ADDRESS+0x1600) /* called from PFZ */ +#define _COMM_PAGE_FIFO_ENQUEUE (_COMM_PAGE_START_ADDRESS+0x1680) /* FIFO enqueue */ +#define _COMM_PAGE_FIFO_DEQUEUE (_COMM_PAGE_START_ADDRESS+0x16c0) /* FIFO dequeue */ #define _COMM_PAGE_NANOTIME (_COMM_PAGE_START_ADDRESS+0x1700) /* nanotime() */ +#define _COMM_PAGE_MUTEX_LOCK (_COMM_PAGE_START_ADDRESS+0x1780) /* pthread_mutex_lock() */ + +#define _COMM_PAGE_UNUSED5 (_COMM_PAGE_START_ADDRESS+0x17e0) /* unused space for regular code up to 0x1c00 */ + +#define _COMM_PAGE_PFZ_START (_COMM_PAGE_START_ADDRESS+0x1c00) /* start of Preemption Free Zone */ + +#define _COMM_PAGE_PFZ_ENQUEUE (_COMM_PAGE_START_ADDRESS+0x1c00) /* internal routine for FIFO enqueue */ +#define _COMM_PAGE_PFZ_DEQUEUE (_COMM_PAGE_START_ADDRESS+0x1c80) /* internal routine for FIFO dequeue */ +#define _COMM_PAGE_PFZ_MUTEX_LOCK (_COMM_PAGE_START_ADDRESS+0x1d00) /* internal routine for pthread_mutex_lock() */ + +#define _COMM_PAGE_UNUSED6 (_COMM_PAGE_START_ADDRESS+0x1d80) /* unused space for PFZ code up to 0x1fff */ + +#define _COMM_PAGE_PFZ_END (_COMM_PAGE_START_ADDRESS+0x1fff) /* end of Preemption Free Zone */ -#define _COMM_PAGE_END (_COMM_PAGE_START_ADDRESS+0x1780) /* end of common page - insert new stuff here */ +#define _COMM_PAGE_END (_COMM_PAGE_START_ADDRESS+0x1fff) /* end of common page - insert new stuff here */ /* _COMM_PAGE_COMPARE_AND_SWAP{32,64}B are not used on x86 and are * maintained here for source compatability. These will be removed at @@ -230,6 +271,7 @@ symbol_name: nop CREATE_COMM_PAGE_SYMBOL(___memory_barrier, _COMM_PAGE_MEMORY_BARRIER) CREATE_COMM_PAGE_SYMBOL(___atomic_add32, _COMM_PAGE_ATOMIC_ADD32) CREATE_COMM_PAGE_SYMBOL(___atomic_add64, _COMM_PAGE_ATOMIC_ADD64) + CREATE_COMM_PAGE_SYMBOL(___cpu_number, _COMM_PAGE_CPU_NUMBER) CREATE_COMM_PAGE_SYMBOL(___mach_absolute_time, _COMM_PAGE_ABSOLUTE_TIME) CREATE_COMM_PAGE_SYMBOL(___spin_lock_try, _COMM_PAGE_SPINLOCK_TRY) CREATE_COMM_PAGE_SYMBOL(___spin_lock, _COMM_PAGE_SPINLOCK_LOCK) @@ -239,6 +281,7 @@ symbol_name: nop CREATE_COMM_PAGE_SYMBOL(___sys_dcache_flush, _COMM_PAGE_FLUSH_DCACHE) CREATE_COMM_PAGE_SYMBOL(___sys_icache_invalidate, _COMM_PAGE_FLUSH_ICACHE) CREATE_COMM_PAGE_SYMBOL(___pthread_self, _COMM_PAGE_PTHREAD_SELF) + CREATE_COMM_PAGE_SYMBOL(___pfz_preempt, _COMM_PAGE_PREEMPT) CREATE_COMM_PAGE_SYMBOL(___spin_lock_relinquish, _COMM_PAGE_RELINQUISH) CREATE_COMM_PAGE_SYMBOL(___bit_test_and_set, _COMM_PAGE_BTS) CREATE_COMM_PAGE_SYMBOL(___bit_test_and_clear, _COMM_PAGE_BTC) @@ -248,7 +291,14 @@ symbol_name: nop /* CREATE_COMM_PAGE_SYMBOL(___memmove, _COMM_PAGE_MEMMOVE) */ CREATE_COMM_PAGE_SYMBOL(___memset_pattern, _COMM_PAGE_MEMSET_PATTERN) CREATE_COMM_PAGE_SYMBOL(___longcopy, _COMM_PAGE_LONGCOPY) + CREATE_COMM_PAGE_SYMBOL(___backoff, _COMM_PAGE_BACKOFF) + CREATE_COMM_PAGE_SYMBOL(___fifo_enqueue, _COMM_PAGE_FIFO_ENQUEUE) + CREATE_COMM_PAGE_SYMBOL(___fifo_dequeue, _COMM_PAGE_FIFO_DEQUEUE) CREATE_COMM_PAGE_SYMBOL(___nanotime, _COMM_PAGE_NANOTIME) + CREATE_COMM_PAGE_SYMBOL(___mutex_lock, _COMM_PAGE_MUTEX_LOCK) + CREATE_COMM_PAGE_SYMBOL(___pfz_enqueue, _COMM_PAGE_PFZ_ENQUEUE) + CREATE_COMM_PAGE_SYMBOL(___pfz_dequeue, _COMM_PAGE_PFZ_DEQUEUE) + CREATE_COMM_PAGE_SYMBOL(___pfz_mutex_lock, _COMM_PAGE_PFZ_MUTEX_LOCK) CREATE_COMM_PAGE_SYMBOL(___end_comm_page, _COMM_PAGE_END) .data /* Required to make a well behaved symbol file */ diff --git a/osfmk/i386/cpu_data.h b/osfmk/i386/cpu_data.h index e41f6b8cd..64add963d 100644 --- a/osfmk/i386/cpu_data.h +++ b/osfmk/i386/cpu_data.h @@ -35,19 +35,21 @@ #include -#if defined(__GNUC__) - #include #include +#include #include #include #include #include +#include #include #include #include +#if CONFIG_VMX #include +#endif /* * Data structures referenced (anonymously) from per-cpu data: @@ -68,15 +70,19 @@ typedef struct rtclock_timer { } rtclock_timer_t; +#if defined(__i386__) + typedef struct { struct i386_tss *cdi_ktss; #if MACH_KDB struct i386_tss *cdi_dbtss; #endif /* MACH_KDB */ - struct fake_descriptor *cdi_gdt; - struct fake_descriptor *cdi_idt; - struct fake_descriptor *cdi_ldt; - vm_offset_t cdi_sstk; + struct __attribute__((packed)) { + uint16_t size; + struct fake_descriptor *ptr; + } cdi_gdt, cdi_idt; + struct fake_descriptor *cdi_ldt; + vm_offset_t cdi_sstk; } cpu_desc_index_t; typedef enum { @@ -85,6 +91,31 @@ typedef enum { TASK_MAP_64BIT_SHARED /* 64-bit, kernel-shared addr space */ } task_map_t; +#elif defined(__x86_64__) + + +typedef struct { + struct x86_64_tss *cdi_ktss; +#if MACH_KDB + struct x86_64_tss *cdi_dbtss; +#endif /* MACH_KDB */ + struct __attribute__((packed)) { + uint16_t size; + void *ptr; + } cdi_gdt, cdi_idt; + struct fake_descriptor *cdi_ldt; + vm_offset_t cdi_sstk; +} cpu_desc_index_t; + +typedef enum { + TASK_MAP_32BIT, /* 32-bit user, compatibility mode */ + TASK_MAP_64BIT, /* 64-bit user thread, shared space */ +} task_map_t; + +#else +#error Unsupported architecture +#endif + /* * This structure is used on entry into the (uber-)kernel on syscall from * a 64-bit user. It contains the address of the machine state save area @@ -97,6 +128,7 @@ typedef struct { addr64_t cu_user_gs_base; } cpu_uber_t; + /* * Per-cpu data. * @@ -134,15 +166,17 @@ typedef struct cpu_data rtclock_timer_t rtclock_timer; boolean_t cpu_is64bit; task_map_t cpu_task_map; - addr64_t cpu_task_cr3; - addr64_t cpu_active_cr3; + volatile addr64_t cpu_task_cr3; + volatile addr64_t cpu_active_cr3; addr64_t cpu_kernel_cr3; cpu_uber_t cpu_uber; void *cpu_chud; void *cpu_console_buf; struct x86_lcpu lcpu; struct processor *cpu_processor; +#if NCOPY_WINDOWS > 0 struct cpu_pmap *cpu_pmap; +#endif struct cpu_desc_table *cpu_desc_tablep; struct fake_descriptor *cpu_ldtp; cpu_desc_index_t cpu_desc_index; @@ -160,18 +194,27 @@ typedef struct cpu_data boolean_t cpu_boot_complete; int cpu_hibernate; +#if NCOPY_WINDOWS > 0 vm_offset_t cpu_copywindow_base; uint64_t *cpu_copywindow_pdp; vm_offset_t cpu_physwindow_base; uint64_t *cpu_physwindow_ptep; void *cpu_hi_iss; - boolean_t cpu_tlb_invalid; +#endif + + + + volatile boolean_t cpu_tlb_invalid; uint32_t cpu_hwIntCnt[256]; /* Interrupt counts */ uint64_t cpu_dr7; /* debug control register */ uint64_t cpu_int_event_time; /* intr entry/exit time */ +#if CONFIG_VMX vmx_cpu_t cpu_vmx; /* wonderful world of virtualization */ +#endif +#if CONFIG_MCA struct mca_state *cpu_mca_state; /* State at MC fault */ +#endif uint64_t cpu_uber_arg_store; /* Double mapped address * of current thread's * uu_arg array. @@ -182,7 +225,8 @@ typedef struct cpu_data * validity flag. */ rtc_nanotime_t *cpu_nanotime; /* Nanotime info */ - + thread_t csw_old_thread; + thread_t csw_new_thread; } cpu_data_t; extern cpu_data_t *cpu_data_ptr[]; @@ -194,7 +238,7 @@ extern cpu_data_t cpu_data_master; #endif /* offsetof */ #define CPU_DATA_GET(member,type) \ type ret; \ - __asm__ volatile ("movl %%gs:%P1,%0" \ + __asm__ volatile ("mov %%gs:%P1,%0" \ : "=r" (ret) \ : "i" (offsetof(cpu_data_t,member))); \ return ret; @@ -212,12 +256,16 @@ get_active_thread(void) #define current_thread_fast() get_active_thread() #define current_thread() current_thread_fast() +#if defined(__i386__) static inline boolean_t get_is64bit(void) { CPU_DATA_GET(cpu_is64bit, boolean_t) } #define cpu_mode_is64bit() get_is64bit() +#elif defined(__x86_64__) +#define cpu_mode_is64bit() TRUE +#endif static inline int get_preemption_level(void) @@ -245,6 +293,7 @@ get_cpu_phys_number(void) CPU_DATA_GET(cpu_phys_number,int) } + static inline void disable_preemption(void) { @@ -311,8 +360,4 @@ cpu_datap(int cpu) extern cpu_data_t *cpu_data_alloc(boolean_t is_boot_cpu); -#else /* !defined(__GNUC__) */ - -#endif /* defined(__GNUC__) */ - #endif /* I386_CPU_DATA */ diff --git a/osfmk/i386/cpu_number.h b/osfmk/i386/cpu_number.h index eeda73ff3..b0348fce6 100644 --- a/osfmk/i386/cpu_number.h +++ b/osfmk/i386/cpu_number.h @@ -61,23 +61,23 @@ * Machine-dependent definitions for cpu identification. * */ -#ifdef KERNEL_PRIVATE - #ifndef _I386_CPU_NUMBER_H_ #define _I386_CPU_NUMBER_H_ -#ifdef I386_CPU_DATA +#ifdef KERNEL_PRIVATE + +/* Use a function to do this less directly. */ +extern int cpu_number(void); + +#ifdef MACH_KERNEL_PRIVATE +#include /* Get the cpu number directly from the pre-processor data area */ #define cpu_number() get_cpu_number() -#else /* I386_CPU_DATA */ - -/* Use a function to do this less directly. */ -extern int cpu_number(void); +#endif /* !MACH_KERNEL_PRIVATE */ -#endif /* I386_CPU_DATA */ +#endif /* KERNEL_PRIVATE */ #endif /* _I386_CPU_NUMBER_H_ */ -#endif /* KERNEL_PRIVATE */ diff --git a/osfmk/i386/cpu_threads.c b/osfmk/i386/cpu_threads.c index 6d539ffb1..529da7af5 100644 --- a/osfmk/i386/cpu_threads.c +++ b/osfmk/i386/cpu_threads.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2003-2008 Apple Inc. All rights reserved. + * Copyright (c) 2003-2009 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -31,9 +31,8 @@ #include #include #include -#include -#include #include +#include //#define TOPO_DEBUG 1 #if TOPO_DEBUG @@ -43,8 +42,11 @@ void debug_topology_print(void); #define DBG(x...) #endif /* TOPO_DEBUG */ + void validate_topology(void); +/* Only for 32bit values */ +#define bit(n) (1U << (n)) #define bitmask(h,l) ((bit(h)|(bit(h)-1)) & ~(bit(l)-1)) #define bitfield(x,h,l) (((x) & bitmask(h,l)) >> l) @@ -314,6 +316,9 @@ x86_cache_list(void) cur->type = bitfield(cache_info[eax], 4, 0); cur->level = bitfield(cache_info[eax], 7, 5); + cur->nlcpus = (bitfield(cache_info[eax], 25, 14) + 1); + if (cpuid_info()->cpuid_model == 26) + cur->nlcpus /= cpu_is_hyperthreaded() ? 1 : 2; cur->maxcpus = (bitfield(cache_info[eax], 25, 14) + 1); cur->line_size = bitfield(cache_info[ebx], 11, 0) + 1; cur->partitions = bitfield(cache_info[ebx], 21, 12) + 1; @@ -340,7 +345,7 @@ static x86_cpu_cache_t * x86_match_cache(x86_cpu_cache_t *list, x86_cpu_cache_t *matcher) { x86_cpu_cache_t *cur_cache; - + cur_cache = list; while (cur_cache != NULL) { if (cur_cache->maxcpus == matcher->maxcpus @@ -861,13 +866,6 @@ cpu_thread_alloc(int cpu) x86_lcpu_init(cpu); - /* - * Allocate performance counter structure. - */ - simple_unlock(&x86_topo_lock); - cpup->lcpu.pmc = pmc_alloc(); - simple_lock(&x86_topo_lock); - /* * Assume that all cpus have the same features. */ diff --git a/osfmk/i386/cpu_threads.h b/osfmk/i386/cpu_threads.h index dca8b4016..fc7ef83e6 100644 --- a/osfmk/i386/cpu_threads.h +++ b/osfmk/i386/cpu_threads.h @@ -29,8 +29,8 @@ #define _I386_CPU_THREADS_H_ #include -#include #include +#include /* * These are defined here rather than in cpu_topology.h so as to keep diff --git a/osfmk/i386/cpu_topology.c b/osfmk/i386/cpu_topology.c index 58b15e913..1d8f2ca61 100644 --- a/osfmk/i386/cpu_topology.c +++ b/osfmk/i386/cpu_topology.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2007 Apple Inc. All rights reserved. + * Copyright (c) 2007-2009 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -31,12 +31,12 @@ #include #include #include -#include #include #include -#include #include +#include #include +#include //#define TOPO_DEBUG 1 #if TOPO_DEBUG @@ -60,7 +60,7 @@ x86_affinity_set_t *x86_affinities = NULL; static int x86_affinity_count = 0; /* - * cpu_topology_start() is called after all processors have been registered + * cpu_topology_sort() is called after all processors have been registered * but before any non-boot processor id started. * We establish canonical logical processor numbering - logical cpus must be * contiguous, zero-based and assigned in physical (local apic id) order. @@ -70,18 +70,18 @@ static int x86_affinity_count = 0; * of processors - in particular, for stopping/starting from CHUD. */ void -cpu_topology_start(void) +cpu_topology_sort(int ncpus) { - int ncpus = machine_info.max_cpus; int i; boolean_t istate; + processor_t lprim = NULL; assert(machine_info.physical_cpu == 1); assert(machine_info.logical_cpu == 1); assert(master_cpu == 0); assert(cpu_number() == 0); assert(cpu_datap(0)->cpu_number == 0); - + /* Lights out for this */ istate = ml_set_interrupts_enabled(FALSE); @@ -127,9 +127,9 @@ cpu_topology_start(void) assert(pkg != NULL); if (cpup->cpu_number != i) { - kprintf("cpu_datap(%d):0x%08x local apic id 0x%x " + kprintf("cpu_datap(%d):%p local apic id 0x%x " "remapped from %d\n", - i, (unsigned) cpup, cpup->cpu_phys_number, + i, cpup, cpup->cpu_phys_number, cpup->cpu_number); } cpup->cpu_number = i; @@ -187,17 +187,35 @@ cpu_topology_start(void) if (i != master_cpu) processor_init(cpup->cpu_processor, i, aset->pset); + + if (lcpup->core->num_lcpus > 1) { + if (lcpup->lnum == 0) + lprim = cpup->cpu_processor; + + processor_meta_init(cpup->cpu_processor, lprim); + } } +} - /* - * Finally we start all processors (including the boot cpu we're - * running on). - */ +/* We got a request to start a CPU. Check that this CPU is within the + * max cpu limit set before we do. + */ +kern_return_t +cpu_topology_start_cpu( int cpunum ) +{ + int ncpus = machine_info.max_cpus; + int i = cpunum; + + /* Decide whether to start a CPU, and actually start it */ DBG("cpu_topology_start() processor_start():\n"); - for (i = 0; i < ncpus; i++) { + if( i < ncpus) + { DBG("\tlcpu %d\n", cpu_datap(i)->cpu_number); processor_start(cpu_datap(i)->cpu_processor); + return KERN_SUCCESS; } + else + return KERN_FAILURE; } static int diff --git a/osfmk/i386/cpu_topology.h b/osfmk/i386/cpu_topology.h index d4351e6b8..c9e13f0d4 100644 --- a/osfmk/i386/cpu_topology.h +++ b/osfmk/i386/cpu_topology.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2003-2008 Apple Inc. All rights reserved. + * Copyright (c) 2003-2009 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -144,7 +144,6 @@ typedef struct x86_lcpu uint64_t rtcPop; /* when etimer wants a timer pop */ uint64_t rtcDeadline; x86_cpu_cache_t *caches[MAX_CACHE_DEPTH]; - struct pmc *pmc; /* Pointer to perfmon data */ void *pmStats; /* Power management stats for lcpu */ void *pmState; /* Power management state for lcpu */ } x86_lcpu_t; @@ -154,7 +153,6 @@ typedef struct x86_lcpu #define X86CORE_FL_HAS_HPET 0x10000000 /* core has HPET assigned */ #define X86CORE_FL_HALTED 0x00008000 /* core is halted */ #define X86CORE_FL_IDLE 0x00004000 /* core is idle */ -#define X86CORE_FL_WAKEUP 0x00002000 /* wakeup is pending */ typedef struct x86_core { @@ -212,7 +210,7 @@ typedef struct x86_pkg } x86_pkg_t; extern x86_pkg_t *x86_pkgs; /* root of all CPU packages */ - + typedef struct x86_topology_parameters { uint32_t LLCDepth; @@ -235,7 +233,9 @@ typedef struct x86_topology_parameters } x86_topology_parameters_t; /* Called after cpu discovery */ -extern void cpu_topology_start(void); +extern void cpu_topology_sort(int ncpus); +extern kern_return_t cpu_topology_start_cpu(int cpunum); + #endif /* _I386_CPU_TOPOLOGY_H_ */ #endif /* KERNEL_PRIVATE */ diff --git a/osfmk/i386/cpuid.c b/osfmk/i386/cpuid.c index 23a27ef29..a5c88990d 100644 --- a/osfmk/i386/cpuid.c +++ b/osfmk/i386/cpuid.c @@ -33,9 +33,9 @@ #include #include -#include "cpuid.h" +#include #if MACH_KDB -#include +#include #include #include #include @@ -48,9 +48,168 @@ #define min(a,b) ((a) < (b) ? (a) : (b)) #define quad(hi,lo) (((uint64_t)(hi)) << 32 | (lo)) -#define bit(n) (1UL << (n)) +/* Only for 32bit values */ +#define bit(n) (1U << (n)) #define bitmask(h,l) ((bit(h)|(bit(h)-1)) & ~(bit(l)-1)) -#define bitfield(x,h,l) (((x) & bitmask(h,l)) >> l) +#define bitfield(x,h,l) ((((x) & bitmask(h,l)) >> l)) + +/* + * Leaf 2 cache descriptor encodings. + */ +typedef enum { + _NULL_, /* NULL (empty) descriptor */ + CACHE, /* Cache */ + TLB, /* TLB */ + STLB, /* Shared second-level unified TLB */ + PREFETCH /* Prefetch size */ +} cpuid_leaf2_desc_type_t; + +typedef enum { + NA, /* Not Applicable */ + FULLY, /* Fully-associative */ + TRACE, /* Trace Cache (P4 only) */ + INST, /* Instruction TLB */ + DATA, /* Data TLB */ + DATA0, /* Data TLB, 1st level */ + DATA1, /* Data TLB, 2nd level */ + L1, /* L1 (unified) cache */ + L1_INST, /* L1 Instruction cache */ + L1_DATA, /* L1 Data cache */ + L2, /* L2 (unified) cache */ + L3, /* L3 (unified) cache */ + L2_2LINESECTOR, /* L2 (unified) cache with 2 lines per sector */ + L3_2LINESECTOR, /* L3(unified) cache with 2 lines per sector */ + SMALL, /* Small page TLB */ + LARGE, /* Large page TLB */ + BOTH /* Small and Large page TLB */ +} cpuid_leaf2_qualifier_t; + +typedef struct cpuid_cache_descriptor { + uint8_t value; /* descriptor code */ + uint8_t type; /* cpuid_leaf2_desc_type_t */ + uint8_t level; /* level of cache/TLB hierachy */ + uint8_t ways; /* wayness of cache */ + uint16_t size; /* cachesize or TLB pagesize */ + uint16_t entries; /* number of TLB entries or linesize */ +} cpuid_cache_descriptor_t; + +/* + * These multipliers are used to encode 1*K .. 64*M in a 16 bit size field + */ +#define K (1) +#define M (1024) + +/* + * Intel cache descriptor table: + */ +static cpuid_cache_descriptor_t intel_cpuid_leaf2_descriptor_table[] = { +// ------------------------------------------------------- +// value type level ways size entries +// ------------------------------------------------------- + { 0x00, _NULL_, NA, NA, NA, NA }, + { 0x01, TLB, INST, 4, SMALL, 32 }, + { 0x02, TLB, INST, FULLY, LARGE, 2 }, + { 0x03, TLB, DATA, 4, SMALL, 64 }, + { 0x04, TLB, DATA, 4, LARGE, 8 }, + { 0x05, TLB, DATA1, 4, LARGE, 32 }, + { 0x06, CACHE, L1_INST, 4, 8*K, 32 }, + { 0x08, CACHE, L1_INST, 4, 16*K, 32 }, + { 0x09, CACHE, L1_INST, 4, 32*K, 64 }, + { 0x0A, CACHE, L1_DATA, 2, 8*K, 32 }, + { 0x0B, TLB, INST, 4, LARGE, 4 }, + { 0x0C, CACHE, L1_DATA, 4, 16*K, 32 }, + { 0x0D, CACHE, L1_DATA, 4, 16*K, 64 }, + { 0x0E, CACHE, L1_DATA, 6, 24*K, 64 }, + { 0x21, CACHE, L2, 8, 256*K, 64 }, + { 0x22, CACHE, L3_2LINESECTOR, 4, 512*K, 64 }, + { 0x23, CACHE, L3_2LINESECTOR, 8, 1*M, 64 }, + { 0x25, CACHE, L3_2LINESECTOR, 8, 2*M, 64 }, + { 0x29, CACHE, L3_2LINESECTOR, 8, 4*M, 64 }, + { 0x2C, CACHE, L1_DATA, 8, 32*K, 64 }, + { 0x30, CACHE, L1_INST, 8, 32*K, 64 }, + { 0x40, CACHE, L2, NA, 0, NA }, + { 0x41, CACHE, L2, 4, 128*K, 32 }, + { 0x42, CACHE, L2, 4, 256*K, 32 }, + { 0x43, CACHE, L2, 4, 512*K, 32 }, + { 0x44, CACHE, L2, 4, 1*M, 32 }, + { 0x45, CACHE, L2, 4, 2*M, 32 }, + { 0x46, CACHE, L3, 4, 4*M, 64 }, + { 0x47, CACHE, L3, 8, 8*M, 64 }, + { 0x48, CACHE, L2, 12, 3*M, 64 }, + { 0x49, CACHE, L2, 16, 4*M, 64 }, + { 0x4A, CACHE, L3, 12, 6*M, 64 }, + { 0x4B, CACHE, L3, 16, 8*M, 64 }, + { 0x4C, CACHE, L3, 12, 12*M, 64 }, + { 0x4D, CACHE, L3, 16, 16*M, 64 }, + { 0x4E, CACHE, L2, 24, 6*M, 64 }, + { 0x4F, TLB, INST, NA, SMALL, 32 }, + { 0x50, TLB, INST, NA, BOTH, 64 }, + { 0x51, TLB, INST, NA, BOTH, 128 }, + { 0x52, TLB, INST, NA, BOTH, 256 }, + { 0x55, TLB, INST, FULLY, BOTH, 7 }, + { 0x56, TLB, DATA0, 4, LARGE, 16 }, + { 0x57, TLB, DATA0, 4, SMALL, 16 }, + { 0x59, TLB, DATA0, FULLY, SMALL, 16 }, + { 0x5A, TLB, DATA0, 4, LARGE, 32 }, + { 0x5B, TLB, DATA, NA, BOTH, 64 }, + { 0x5C, TLB, DATA, NA, BOTH, 128 }, + { 0x5D, TLB, DATA, NA, BOTH, 256 }, + { 0x60, CACHE, L1, 16*K, 8, 64 }, + { 0x61, CACHE, L1, 4, 8*K, 64 }, + { 0x62, CACHE, L1, 4, 16*K, 64 }, + { 0x63, CACHE, L1, 4, 32*K, 64 }, + { 0x70, CACHE, TRACE, 8, 12*K, NA }, + { 0x71, CACHE, TRACE, 8, 16*K, NA }, + { 0x72, CACHE, TRACE, 8, 32*K, NA }, + { 0x78, CACHE, L2, 4, 1*M, 64 }, + { 0x79, CACHE, L2_2LINESECTOR, 8, 128*K, 64 }, + { 0x7A, CACHE, L2_2LINESECTOR, 8, 256*K, 64 }, + { 0x7B, CACHE, L2_2LINESECTOR, 8, 512*K, 64 }, + { 0x7C, CACHE, L2_2LINESECTOR, 8, 1*M, 64 }, + { 0x7D, CACHE, L2, 8, 2*M, 64 }, + { 0x7F, CACHE, L2, 2, 512*K, 64 }, + { 0x80, CACHE, L2, 8, 512*K, 64 }, + { 0x82, CACHE, L2, 8, 256*K, 32 }, + { 0x83, CACHE, L2, 8, 512*K, 32 }, + { 0x84, CACHE, L2, 8, 1*M, 32 }, + { 0x85, CACHE, L2, 8, 2*M, 32 }, + { 0x86, CACHE, L2, 4, 512*K, 64 }, + { 0x87, CACHE, L2, 8, 1*M, 64 }, + { 0xB0, TLB, INST, 4, SMALL, 128 }, + { 0xB1, TLB, INST, 4, LARGE, 8 }, + { 0xB2, TLB, INST, 4, SMALL, 64 }, + { 0xB3, TLB, DATA, 4, SMALL, 128 }, + { 0xB4, TLB, DATA1, 4, SMALL, 256 }, + { 0xBA, TLB, DATA1, 4, BOTH, 64 }, + { 0xCA, STLB, DATA1, 4, BOTH, 512 }, + { 0xD0, CACHE, L3, 4, 512*K, 64 }, + { 0xD1, CACHE, L3, 4, 1*M, 64 }, + { 0xD2, CACHE, L3, 4, 2*M, 64 }, + { 0xD6, CACHE, L3, 8, 1*M, 64 }, + { 0xD7, CACHE, L3, 8, 2*M, 64 }, + { 0xD8, CACHE, L3, 8, 4*M, 64 }, + { 0xDC, CACHE, L3, 12, 1536*K, 64 }, + { 0xDD, CACHE, L3, 12, 3*M, 64 }, + { 0xDE, CACHE, L3, 12, 6*M, 64 }, + { 0xE2, CACHE, L3, 16, 2*M, 64 }, + { 0xE3, CACHE, L3, 16, 4*M, 64 }, + { 0xE4, CACHE, L3, 16, 8*M, 64 }, + { 0xF0, PREFETCH, NA, NA, 64, NA }, + { 0xF1, PREFETCH, NA, NA, 128, NA } +}; +#define INTEL_LEAF2_DESC_NUM (sizeof(intel_cpuid_leaf2_descriptor_table) / \ + sizeof(cpuid_cache_descriptor_t)) + +static inline cpuid_cache_descriptor_t * +cpuid_leaf2_find(uint8_t value) +{ + unsigned int i; + + for (i = 0; i < INTEL_LEAF2_DESC_NUM; i++) + if (intel_cpuid_leaf2_descriptor_table[i].value == value) + return &intel_cpuid_leaf2_descriptor_table[i]; + return NULL; +} /* * CPU identification routines. @@ -59,6 +218,27 @@ static i386_cpu_info_t *cpuid_cpu_infop = NULL; static i386_cpu_info_t cpuid_cpu_info; +#if defined(__x86_64__) +static void _do_cpuid(uint32_t selector, uint32_t *result) +{ + do_cpuid(selector, result); +} +#else +static void _do_cpuid(uint32_t selector, uint32_t *result) +{ + if (cpu_mode_is64bit()) { + asm("call _cpuid64" + : "=a" (result[0]), + "=b" (result[1]), + "=c" (result[2]), + "=d" (result[3]) + : "a"(selector)); + } else { + do_cpuid(selector, result); + } +} +#endif + /* this function is Intel-specific */ static void cpuid_set_cache_info( i386_cpu_info_t * info_p ) @@ -76,7 +256,7 @@ cpuid_set_cache_info( i386_cpu_info_t * info_p ) /* Get processor cache descriptor info using leaf 2. We don't use * this internally, but must publish it for KEXTs. */ - do_cpuid(2, cpuid_result); + _do_cpuid(2, cpuid_result); for (j = 0; j < 4; j++) { if ((cpuid_result[j] >> 31) == 1) /* bit31 is validity */ continue; @@ -86,7 +266,7 @@ cpuid_set_cache_info( i386_cpu_info_t * info_p ) for (i = 1; i < info_p->cache_info[0]; i++) { if (i*16 > sizeof(info_p->cache_info)) break; - do_cpuid(2, cpuid_result); + _do_cpuid(2, cpuid_result); for (j = 0; j < 4; j++) { if ((cpuid_result[j] >> 31) == 1) continue; @@ -100,7 +280,7 @@ cpuid_set_cache_info( i386_cpu_info_t * info_p ) * Most processors Mac OS X supports implement this flavor of CPUID. * Loop over each cache on the processor. */ - do_cpuid(0, cpuid_result); + _do_cpuid(0, cpuid_result); if (cpuid_result[eax] >= 4) cpuid_deterministic_supported = TRUE; @@ -152,10 +332,11 @@ cpuid_set_cache_info( i386_cpu_info_t * info_p ) } /* The total size of a cache is: - * ( linesize * sets * associativity ) + * ( linesize * sets * associativity * partitions ) */ if (type != Lnone) { - cache_size = cache_linesize * cache_sets * cache_associativity; + cache_size = cache_linesize * cache_sets * + cache_associativity * cache_partitions; info_p->cache_size[type] = cache_size; info_p->cache_sharing[type] = cache_sharing; info_p->cache_partitions[type] = cache_partitions; @@ -215,72 +396,46 @@ cpuid_set_cache_info( i386_cpu_info_t * info_p ) else panic("no linesize"); /* - * Extract and publish TLB information. + * Extract and publish TLB information from Leaf 2 descriptors. */ for (i = 1; i < sizeof(info_p->cache_info); i++) { - uint8_t desc = info_p->cache_info[i]; + cpuid_cache_descriptor_t *descp; + int id; + int level; + int page; - switch (desc) { - case CPUID_CACHE_ITLB_4K_32_4: - info_p->cpuid_itlb_small = 32; - break; - case CPUID_CACHE_ITLB_4M_2: - info_p->cpuid_itlb_large = 2; - break; - case CPUID_CACHE_DTLB_4K_64_4: - info_p->cpuid_dtlb_small = 64; - break; - case CPUID_CACHE_DTLB_4M_8_4: - info_p->cpuid_dtlb_large = 8; - break; - case CPUID_CACHE_DTLB_4M_32_4: - info_p->cpuid_dtlb_large = 32; - break; - case CPUID_CACHE_ITLB_64: - info_p->cpuid_itlb_small = 64; - info_p->cpuid_itlb_large = 64; - break; - case CPUID_CACHE_ITLB_128: - info_p->cpuid_itlb_small = 128; - info_p->cpuid_itlb_large = 128; - break; - case CPUID_CACHE_ITLB_256: - info_p->cpuid_itlb_small = 256; - info_p->cpuid_itlb_large = 256; - break; - case CPUID_CACHE_DTLB_64: - info_p->cpuid_dtlb_small = 64; - info_p->cpuid_dtlb_large = 64; - break; - case CPUID_CACHE_DTLB_128: - info_p->cpuid_dtlb_small = 128; - info_p->cpuid_dtlb_large = 128; - break; - case CPUID_CACHE_DTLB_256: - info_p->cpuid_dtlb_small = 256; - info_p->cpuid_dtlb_large = 256; - break; - case CPUID_CACHE_ITLB_4M2M_7: - info_p->cpuid_itlb_large = 7; - break; - case CPUID_CACHE_DTLB_4K_16_4: - info_p->cpuid_dtlb_small = 16; - break; - case CPUID_CACHE_DTLB_4M2M_32_4: - info_p->cpuid_dtlb_large = 32; - break; - case CPUID_CACHE_ITLB_4K_128_4: - info_p->cpuid_itlb_small = 128; - break; - case CPUID_CACHE_ITLB_4M_8: - info_p->cpuid_itlb_large = 8; - break; - case CPUID_CACHE_DTLB_4K_128_4: - info_p->cpuid_dtlb_small = 128; - break; - case CPUID_CACHE_DTLB_4K_256_4: - info_p->cpuid_dtlb_small = 256; + descp = cpuid_leaf2_find(info_p->cache_info[i]); + if (descp == NULL) + continue; + + switch (descp->type) { + case TLB: + page = (descp->size == SMALL) ? TLB_SMALL : TLB_LARGE; + /* determine I or D: */ + switch (descp->level) { + case INST: + id = TLB_INST; + break; + case DATA: + case DATA0: + case DATA1: + id = TLB_DATA; + break; + default: + continue; + } + /* determine level: */ + switch (descp->level) { + case DATA1: + level = 1; + break; + default: + level = 0; + } + info_p->cpuid_tlb[id][page][level] = descp->entries; break; + case STLB: + info_p->cpuid_stlb = descp->entries; } } } @@ -289,31 +444,31 @@ static void cpuid_set_generic_info(i386_cpu_info_t *info_p) { uint32_t cpuid_reg[4]; - uint32_t max_extid; char str[128], *p; /* do cpuid 0 to get vendor */ - do_cpuid(0, cpuid_reg); + _do_cpuid(0, cpuid_reg); + info_p->cpuid_max_basic = cpuid_reg[eax]; bcopy((char *)&cpuid_reg[ebx], &info_p->cpuid_vendor[0], 4); /* ug */ bcopy((char *)&cpuid_reg[ecx], &info_p->cpuid_vendor[8], 4); bcopy((char *)&cpuid_reg[edx], &info_p->cpuid_vendor[4], 4); info_p->cpuid_vendor[12] = 0; /* get extended cpuid results */ - do_cpuid(0x80000000, cpuid_reg); - max_extid = cpuid_reg[eax]; + _do_cpuid(0x80000000, cpuid_reg); + info_p->cpuid_max_ext = cpuid_reg[eax]; /* check to see if we can get brand string */ - if (max_extid >= 0x80000004) { + if (info_p->cpuid_max_ext >= 0x80000004) { /* * The brand string 48 bytes (max), guaranteed to * be NUL terminated. */ - do_cpuid(0x80000002, cpuid_reg); + _do_cpuid(0x80000002, cpuid_reg); bcopy((char *)cpuid_reg, &str[0], 16); - do_cpuid(0x80000003, cpuid_reg); + _do_cpuid(0x80000003, cpuid_reg); bcopy((char *)cpuid_reg, &str[16], 16); - do_cpuid(0x80000004, cpuid_reg); + _do_cpuid(0x80000004, cpuid_reg); bcopy((char *)cpuid_reg, &str[32], 16); for (p = str; *p != '\0'; p++) { if (*p != ' ') break; @@ -333,13 +488,13 @@ cpuid_set_generic_info(i386_cpu_info_t *info_p) } /* Get cache and addressing info. */ - if (max_extid >= 0x80000006) { - do_cpuid(0x80000006, cpuid_reg); + if (info_p->cpuid_max_ext >= 0x80000006) { + _do_cpuid(0x80000006, cpuid_reg); info_p->cpuid_cache_linesize = bitfield(cpuid_reg[ecx], 7, 0); info_p->cpuid_cache_L2_associativity = bitfield(cpuid_reg[ecx],15,12); info_p->cpuid_cache_size = bitfield(cpuid_reg[ecx],31,16); - do_cpuid(0x80000008, cpuid_reg); + _do_cpuid(0x80000008, cpuid_reg); info_p->cpuid_address_bits_physical = bitfield(cpuid_reg[eax], 7, 0); info_p->cpuid_address_bits_virtual = @@ -347,7 +502,7 @@ cpuid_set_generic_info(i386_cpu_info_t *info_p) } /* get processor signature and decode */ - do_cpuid(1, cpuid_reg); + _do_cpuid(1, cpuid_reg); info_p->cpuid_signature = cpuid_reg[eax]; info_p->cpuid_stepping = bitfield(cpuid_reg[eax], 3, 0); info_p->cpuid_model = bitfield(cpuid_reg[eax], 7, 4); @@ -370,17 +525,17 @@ cpuid_set_generic_info(i386_cpu_info_t *info_p) else info_p->cpuid_logical_per_package = 1; - if (max_extid >= 0x80000001) { - do_cpuid(0x80000001, cpuid_reg); + if (info_p->cpuid_max_ext >= 0x80000001) { + _do_cpuid(0x80000001, cpuid_reg); info_p->cpuid_extfeatures = quad(cpuid_reg[ecx], cpuid_reg[edx]); } /* Fold in the Invariant TSC feature bit, if present */ - if (max_extid >= 0x80000007) { - do_cpuid(0x80000007, cpuid_reg); + if (info_p->cpuid_max_ext >= 0x80000007) { + _do_cpuid(0x80000007, cpuid_reg); info_p->cpuid_extfeatures |= - cpuid_reg[edx] & CPUID_EXTFEATURE_TSCI; + cpuid_reg[edx] & (uint32_t)CPUID_EXTFEATURE_TSCI; } /* Find the microcode version number a.k.a. signature a.k.a. BIOS ID */ @@ -393,24 +548,26 @@ cpuid_set_generic_info(i386_cpu_info_t *info_p) * (which determines whether SMT/Hyperthreading is active). */ uint64_t msr_core_thread_count = rdmsr64(MSR_CORE_THREAD_COUNT); - info_p->core_count = bitfield(msr_core_thread_count, 31, 16); - info_p->thread_count = bitfield(msr_core_thread_count, 15, 0); + info_p->core_count = bitfield((uint32_t)msr_core_thread_count, 31, 16); + info_p->thread_count = bitfield((uint32_t)msr_core_thread_count, 15, 0); } - if (info_p->cpuid_features & CPUID_FEATURE_MONITOR) { + if (info_p->cpuid_max_basic >= 0x5) { /* * Extract the Monitor/Mwait Leaf info: */ - do_cpuid(5, cpuid_reg); + _do_cpuid(5, cpuid_reg); info_p->cpuid_mwait_linesize_min = cpuid_reg[eax]; info_p->cpuid_mwait_linesize_max = cpuid_reg[ebx]; info_p->cpuid_mwait_extensions = cpuid_reg[ecx]; info_p->cpuid_mwait_sub_Cstates = cpuid_reg[edx]; + } + if (info_p->cpuid_max_basic >= 0x6) { /* - * And the thermal and Power Leaf while we're at it: + * The thermal and Power Leaf: */ - do_cpuid(6, cpuid_reg); + _do_cpuid(6, cpuid_reg); info_p->cpuid_thermal_sensor = bitfield(cpuid_reg[eax], 0, 0); info_p->cpuid_thermal_dynamic_acceleration = @@ -419,11 +576,13 @@ cpuid_set_generic_info(i386_cpu_info_t *info_p) bitfield(cpuid_reg[ebx], 3, 0); info_p->cpuid_thermal_ACNT_MCNT = bitfield(cpuid_reg[ecx], 0, 0); + } + if (info_p->cpuid_max_basic >= 0xa) { /* - * And the Architectural Performance Monitoring Leaf: + * Architectural Performance Monitoring Leaf: */ - do_cpuid(0xa, cpuid_reg); + _do_cpuid(0xa, cpuid_reg); info_p->cpuid_arch_perf_version = bitfield(cpuid_reg[eax], 7, 0); info_p->cpuid_arch_perf_number = @@ -438,7 +597,6 @@ cpuid_set_generic_info(i386_cpu_info_t *info_p) bitfield(cpuid_reg[edx], 4, 0); info_p->cpuid_arch_perf_fixed_width = bitfield(cpuid_reg[edx],12, 5); - } return; @@ -522,6 +680,7 @@ static struct { {CPUID_FEATURE_SSE4_2, "SSE4.2"}, {CPUID_FEATURE_xAPIC, "xAPIC"}, {CPUID_FEATURE_POPCNT, "POPCNT"}, + {CPUID_FEATURE_VMM, "VMM"}, {0, 0} }, extfeature_map[] = { @@ -548,7 +707,7 @@ cpuid_info(void) char * cpuid_get_feature_names(uint64_t features, char *buf, unsigned buf_len) { - int len = -1; + size_t len = -1; char *p = buf; int i; @@ -557,7 +716,7 @@ cpuid_get_feature_names(uint64_t features, char *buf, unsigned buf_len) continue; if (len > 0) *p++ = ' '; - len = min(strlen(feature_map[i].name), (buf_len-1) - (p-buf)); + len = min(strlen(feature_map[i].name), (size_t) ((buf_len-1) - (p-buf))); if (len == 0) break; bcopy(feature_map[i].name, p, len); @@ -570,7 +729,7 @@ cpuid_get_feature_names(uint64_t features, char *buf, unsigned buf_len) char * cpuid_get_extfeature_names(uint64_t extfeatures, char *buf, unsigned buf_len) { - int len = -1; + size_t len = -1; char *p = buf; int i; @@ -579,7 +738,7 @@ cpuid_get_extfeature_names(uint64_t extfeatures, char *buf, unsigned buf_len) continue; if (len > 0) *p++ = ' '; - len = min(strlen(extfeature_map[i].name), (buf_len-1)-(p-buf)); + len = min(strlen(extfeature_map[i].name), (size_t) ((buf_len-1)-(p-buf))); if (len == 0) break; bcopy(extfeature_map[i].name, p, len); @@ -590,25 +749,6 @@ cpuid_get_extfeature_names(uint64_t extfeatures, char *buf, unsigned buf_len) } -#if CONFIG_NO_KPRINTF_STRINGS -void -cpuid_feature_display( - __unused const char *header) -{ -} - -void -cpuid_extfeature_display( - __unused const char *header) -{ -} - -void -cpuid_cpu_display( - __unused const char *header) -{ -} -#else /* CONFIG_NO_KPRINTF_STRINGS */ void cpuid_feature_display( const char *header) @@ -648,7 +788,6 @@ cpuid_cpu_display( kprintf("%s: %s\n", header, cpuid_cpu_info.cpuid_brand_string); } } -#endif /* !CONFIG_NO_KPRINTF_STRINGS */ unsigned int cpuid_family(void) diff --git a/osfmk/i386/cpuid.h b/osfmk/i386/cpuid.h index 8e690a71f..e43ec1282 100644 --- a/osfmk/i386/cpuid.h +++ b/osfmk/i386/cpuid.h @@ -100,6 +100,7 @@ #define CPUID_FEATURE_SSE4_2 _HBit(20) /* Streaming SIMD extensions 4.2 */ #define CPUID_FEATURE_xAPIC _HBit(21) /* Extended APIC Mode */ #define CPUID_FEATURE_POPCNT _HBit(23) /* POPCNT instruction */ +#define CPUID_FEATURE_VMM _HBit(31) /* VMM (Hypervisor) present */ /* * The CPUID_EXTFEATURE_XXX values define 64-bit values @@ -118,98 +119,7 @@ */ #define CPUID_EXTFEATURE_TSCI _Bit(8) /* TSC Invariant */ - -#define CPUID_CACHE_SIZE 16 /* Number of descriptor vales */ - -#define CPUID_CACHE_NULL 0x00 /* NULL */ -#define CPUID_CACHE_ITLB_4K_32_4 0x01 /* Inst TLB: 4K pages, 32 ents, 4-way */ -#define CPUID_CACHE_ITLB_4M_2 0x02 /* Inst TLB: 4M pages, 2 ents */ -#define CPUID_CACHE_DTLB_4K_64_4 0x03 /* Data TLB: 4K pages, 64 ents, 4-way */ -#define CPUID_CACHE_DTLB_4M_8_4 0x04 /* Data TLB: 4M pages, 8 ents, 4-way */ -#define CPUID_CACHE_DTLB_4M_32_4 0x05 /* Data TLB: 4M pages, 32 ents, 4-way */ -#define CPUID_CACHE_L1I_8K 0x06 /* Icache: 8K */ -#define CPUID_CACHE_L1I_16K 0x08 /* Icache: 16K */ -#define CPUID_CACHE_L1I_32K 0x09 /* Icache: 32K, 4-way, 64 bytes */ -#define CPUID_CACHE_L1D_8K 0x0A /* Dcache: 8K */ -#define CPUID_CACHE_L1D_16K 0x0C /* Dcache: 16K */ -#define CPUID_CACHE_L1D_16K_4_32 0x0D /* Dcache: 16K, 4-way, 64 byte, ECC */ -#define CPUID_CACHE_L2_256K_8_64 0x21 /* L2: 256K, 8-way, 64 bytes */ -#define CPUID_CACHE_L3_512K 0x22 /* L3: 512K */ -#define CPUID_CACHE_L3_1M 0x23 /* L3: 1M */ -#define CPUID_CACHE_L3_2M 0x25 /* L3: 2M */ -#define CPUID_CACHE_L3_4M 0x29 /* L3: 4M */ -#define CPUID_CACHE_L1D_32K_8 0x2C /* Dcache: 32K, 8-way, 64 byte */ -#define CPUID_CACHE_L1I_32K_8 0x30 /* Icache: 32K, 8-way */ -#define CPUID_CACHE_L2_128K_S4 0x39 /* L2: 128K, 4-way, sectored */ -#define CPUID_CACHE_L2_128K_S2 0x3B /* L2: 128K, 2-way, sectored */ -#define CPUID_CACHE_L2_256K_S4 0x3C /* L2: 256K, 4-way, sectored */ -#define CPUID_CACHE_NOCACHE 0x40 /* No 2nd level or 3rd-level cache */ -#define CPUID_CACHE_L2_128K 0x41 /* L2: 128K */ -#define CPUID_CACHE_L2_256K 0x42 /* L2: 256K */ -#define CPUID_CACHE_L2_512K 0x43 /* L2: 512K */ -#define CPUID_CACHE_L2_1M_4 0x44 /* L2: 1M, 4-way */ -#define CPUID_CACHE_L2_2M_4 0x45 /* L2: 2M, 4-way */ -#define CPUID_CACHE_L3_4M_4_64 0x46 /* L3: 4M, 4-way, 64 bytes */ -#define CPUID_CACHE_L3_8M_8_64 0x47 /* L3: 8M, 8-way, 64 bytes*/ -#define CPUID_CACHE_L2_3M_12_64 0x48 /* L3: 3M, 8-way, 64 bytes*/ -#define CPUID_CACHE_L2_4M_16_64 0x49 /* L2: 4M, 16-way, 64 bytes */ -#define CPUID_CACHE_L2_6M_12_64 0x4A /* L2: 6M, 12-way, 64 bytes */ -#define CPUID_CACHE_L2_8M_16_64 0x4B /* L2: 8M, 16-way, 64 bytes */ -#define CPUID_CACHE_L2_12M_12_64 0x4C /* L2: 12M, 12-way, 64 bytes */ -#define CPUID_CACHE_L2_16M_16_64 0x4D /* L2: 16M, 16-way, 64 bytes */ -#define CPUID_CACHE_L2_6M_24_64 0x4E /* L2: 6M, 24-way, 64 bytes */ -#define CPUID_CACHE_ITLB_64 0x50 /* Inst TLB: 64 entries */ -#define CPUID_CACHE_ITLB_128 0x51 /* Inst TLB: 128 entries */ -#define CPUID_CACHE_ITLB_256 0x52 /* Inst TLB: 256 entries */ -#define CPUID_CACHE_ITLB_4M2M_7 0x55 /* Inst TLB: 4M/2M, 7 entries */ -#define CPUID_CACHE_DTLB_4M_16_4 0x56 /* Data TLB: 4M, 16 entries, 4-way */ -#define CPUID_CACHE_DTLB_4K_16_4 0x57 /* Data TLB: 4K, 16 entries, 4-way */ -#define CPUID_CACHE_DTLB_4M2M_32_4 0x5A /* Data TLB: 4M/2M, 32 entries */ -#define CPUID_CACHE_DTLB_64 0x5B /* Data TLB: 64 entries */ -#define CPUID_CACHE_DTLB_128 0x5C /* Data TLB: 128 entries */ -#define CPUID_CACHE_DTLB_256 0x5D /* Data TLB: 256 entries */ -#define CPUID_CACHE_L1D_16K_8_64 0x60 /* Data cache: 16K, 8-way, 64 bytes */ -#define CPUID_CACHE_L1D_8K_4_64 0x66 /* Data cache: 8K, 4-way, 64 bytes */ -#define CPUID_CACHE_L1D_16K_4_64 0x67 /* Data cache: 16K, 4-way, 64 bytes */ -#define CPUID_CACHE_L1D_32K_4_64 0x68 /* Data cache: 32K, 4-way, 64 bytes */ -#define CPUID_CACHE_TRACE_12K_8 0x70 /* Trace cache 12K-uop, 8-way */ -#define CPUID_CACHE_TRACE_16K_8 0x71 /* Trace cache 16K-uop, 8-way */ -#define CPUID_CACHE_TRACE_32K_8 0x72 /* Trace cache 32K-uop, 8-way */ -#define CPUID_CACHE_L2_1M_4_64 0x78 /* L2: 1M, 4-way, 64 bytes */ -#define CPUID_CACHE_L2_128K_8_64_2 0x79 /* L2: 128K, 8-way, 64b, 2 lines/sec */ -#define CPUID_CACHE_L2_256K_8_64_2 0x7A /* L2: 256K, 8-way, 64b, 2 lines/sec */ -#define CPUID_CACHE_L2_512K_8_64_2 0x7B /* L2: 512K, 8-way, 64b, 2 lines/sec */ -#define CPUID_CACHE_L2_1M_8_64_2 0x7C /* L2: 1M, 8-way, 64b, 2 lines/sec */ -#define CPUID_CACHE_L2_2M_8_64 0x7D /* L2: 2M, 8-way, 64 bytes */ -#define CPUID_CACHE_L2_512K_2_64 0x7F /* L2: 512K, 2-way, 64 bytes */ -#define CPUID_CACHE_L2_256K_8_32 0x82 /* L2: 256K, 8-way, 32 bytes */ -#define CPUID_CACHE_L2_512K_8_32 0x83 /* L2: 512K, 8-way, 32 bytes */ -#define CPUID_CACHE_L2_1M_8_32 0x84 /* L2: 1M, 8-way, 32 bytes */ -#define CPUID_CACHE_L2_2M_8_32 0x85 /* L2: 2M, 8-way, 32 bytes */ -#define CPUID_CACHE_L2_512K_4_64 0x86 /* L2: 512K, 4-way, 64 bytes */ -#define CPUID_CACHE_L2_1M_8_64 0x87 /* L2: 1M, 8-way, 64 bytes */ -#define CPUID_CACHE_ITLB_4K_128_4 0xB0 /* ITLB: 4KB, 128 entries, 4-way */ -#define CPUID_CACHE_ITLB_4M_4_4 0xB1 /* ITLB: 4MB, 4 entries, 4-way, or */ -#define CPUID_CACHE_ITLB_2M_8_4 0xB1 /* ITLB: 2MB, 8 entries, 4-way, or */ -#define CPUID_CACHE_ITLB_4M_8 0xB1 /* ITLB: 4MB, 8 entries */ -#define CPUID_CACHE_ITLB_4K_64_4 0xB2 /* ITLB: 4KB, 64 entries, 4-way */ -#define CPUID_CACHE_DTLB_4K_128_4 0xB3 /* DTLB: 4KB, 128 entries, 4-way */ -#define CPUID_CACHE_DTLB_4K_256_4 0xB4 /* DTLB: 4KB, 256 entries, 4-way */ -#define CPUID_CACHE_2TLB_4K_512_4 0xB4 /* 2nd-level TLB: 4KB, 512, 4-way */ -#define CPUID_CACHE_L3_512K_4_64 0xD0 /* L3: 512KB, 4-way, 64 bytes */ -#define CPUID_CACHE_L3_1M_4_64 0xD1 /* L3: 1M, 4-way, 64 bytes */ -#define CPUID_CACHE_L3_2M_4_64 0xD2 /* L3: 2M, 4-way, 64 bytes */ -#define CPUID_CACHE_L3_1M_8_64 0xD6 /* L3: 1M, 8-way, 64 bytes */ -#define CPUID_CACHE_L3_2M_8_64 0xD7 /* L3: 2M, 8-way, 64 bytes */ -#define CPUID_CACHE_L3_4M_8_64 0xD8 /* L3: 4M, 8-way, 64 bytes */ -#define CPUID_CACHE_L3_1M5_12_64 0xDC /* L3: 1.5M, 12-way, 64 bytes */ -#define CPUID_CACHE_L3_3M_12_64 0xDD /* L3: 3M, 12-way, 64 bytes */ -#define CPUID_CACHE_L3_6M_12_64 0xDE /* L3: 6M, 12-way, 64 bytes */ -#define CPUID_CACHE_L3_2M_16_64 0xE2 /* L3: 2M, 16-way, 64 bytes */ -#define CPUID_CACHE_L3_4M_16_64 0xE3 /* L3: 4M, 16-way, 64 bytes */ -#define CPUID_CACHE_L3_8M_16_64 0xE4 /* L3: 8M, 16-way, 64 bytes */ -#define CPUID_CACHE_PREFETCH_64 0xF0 /* 64-Byte Prefetching */ -#define CPUID_CACHE_PREFETCH_128 0xF1 /* 128-Byte Prefetching */ +#define CPUID_CACHE_SIZE 16 /* Number of descriptor values */ #define CPUID_MWAIT_EXTENSION _Bit(0) /* enumeration of WMAIT extensions */ #define CPUID_MWAIT_BREAK _Bit(1) /* interrupts are break events */ @@ -336,15 +246,20 @@ typedef struct { uint32_t cpuid_microcode_version; - /* Numbers of tlbs per processor */ - uint32_t cpuid_itlb_small; - uint32_t cpuid_dtlb_small; - uint32_t cpuid_itlb_large; - uint32_t cpuid_dtlb_large; + /* Numbers of tlbs per processor [i|d, small|large, level0|level1] */ + uint32_t cpuid_tlb[2][2][2]; + #define TLB_INST 0 + #define TLB_DATA 1 + #define TLB_SMALL 0 + #define TLB_LARGE 1 + uint32_t cpuid_stlb; uint32_t core_count; uint32_t thread_count; + /* Max leaf ids available from CPUID */ + uint32_t cpuid_max_basic; + uint32_t cpuid_max_ext; } i386_cpu_info_t; #ifdef __cplusplus diff --git a/osfmk/i386/cswitch.s b/osfmk/i386/cswitch.s index 0668c465e..3110cc2c6 100644 --- a/osfmk/i386/cswitch.s +++ b/osfmk/i386/cswitch.s @@ -80,14 +80,15 @@ Entry(Load_context) movl S_ARG0,%ecx /* get thread */ movl TH_KERNEL_STACK(%ecx),%ecx /* get kernel stack */ - lea KERNEL_STACK_SIZE-IKS_SIZE-IEL_SIZE(%ecx),%edx - /* point to stack top */ + lea -IKS_SIZE-IEL_SIZE(%ecx),%edx + add EXT(kernel_stack_size),%edx /* point to stack top */ movl %ecx,%gs:CPU_ACTIVE_STACK /* store stack address */ movl %edx,%gs:CPU_KERNEL_STACK /* store stack top */ movl %edx,%esp movl %edx,%ebp + subl $12, %esp /* align stack */ xorl %eax,%eax /* return zero (no old thread) */ pushl %eax call EXT(thread_continue) @@ -103,7 +104,7 @@ Entry(Switch_context) /* Test for a continuation and skip all state saving if so... */ cmpl $0,4(%esp) jne 5f - movl %gs:CPU_ACTIVE_STACK,%ecx /* get old kernel stack */ + movl %gs:CPU_KERNEL_STACK,%ecx /* get old kernel stack top */ movl %ebx,KSS_EBX(%ecx) /* save registers */ movl %ebp,KSS_EBP(%ecx) movl %edi,KSS_EDI(%ecx) @@ -112,14 +113,15 @@ Entry(Switch_context) movl %esp,KSS_ESP(%ecx) /* save SP */ 5: movl 0(%esp),%eax /* return old thread */ - movl 8(%esp),%ebx /* get new thread */ - movl %ebx,%gs:CPU_ACTIVE_THREAD /* new thread is active */ - movl TH_KERNEL_STACK(%ebx),%ecx /* get its kernel stack */ - lea KERNEL_STACK_SIZE-IKS_SIZE-IEL_SIZE(%ecx),%ebx + movl 8(%esp),%ecx /* get new thread */ + movl %ecx,%gs:CPU_ACTIVE_THREAD /* new thread is active */ + movl TH_KERNEL_STACK(%ecx),%ebx /* get its kernel stack */ + lea -IKS_SIZE-IEL_SIZE(%ebx),%ecx + add EXT(kernel_stack_size),%ecx /* point to stack top */ - movl %ecx,%gs:CPU_ACTIVE_STACK /* set current stack */ - movl %ebx,%gs:CPU_KERNEL_STACK /* set stack top */ + movl %ebx,%gs:CPU_ACTIVE_STACK /* set current stack */ + movl %ecx,%gs:CPU_KERNEL_STACK /* set stack top */ movl KSS_ESP(%ecx),%esp /* switch stacks */ @@ -130,6 +132,7 @@ Entry(Switch_context) jmp *KSS_EIP(%ecx) /* return old thread */ Entry(Thread_continue) + subl $12, %esp /* align stack */ pushl %eax /* push the thread argument */ xorl %ebp,%ebp /* zero frame pointer */ call *%ebx /* call real continuation */ @@ -146,7 +149,7 @@ Entry(Thread_continue) * */ Entry(Shutdown_context) - movl %gs:CPU_ACTIVE_STACK,%ecx /* get old kernel stack */ + movl %gs:CPU_KERNEL_STACK,%ecx /* get old kernel stack */ movl %ebx,KSS_EBX(%ecx) /* save registers */ movl %ebp,KSS_EBP(%ecx) movl %edi,KSS_EDI(%ecx) @@ -154,6 +157,7 @@ Entry(Shutdown_context) popl KSS_EIP(%ecx) /* save return PC */ movl %esp,KSS_ESP(%ecx) /* save SP */ + movl %gs:CPU_ACTIVE_STACK,%ecx /* get old stack */ movl 0(%esp),%eax /* get old thread */ movl %ecx,TH_KERNEL_STACK(%eax) /* save old stack */ movl 4(%esp),%ebx /* get routine to run next */ @@ -161,6 +165,7 @@ Entry(Shutdown_context) movl %gs:CPU_INT_STACK_TOP,%esp /* switch to interrupt stack */ + subl $12, %esp /* align stack */ pushl %esi /* push argument */ call *%ebx /* call routine to run */ hlt /* (should never return) */ diff --git a/osfmk/i386/db_gcc_aout.c b/osfmk/i386/db_gcc_aout.c index 93ddcb531..508146b96 100644 --- a/osfmk/i386/db_gcc_aout.c +++ b/osfmk/i386/db_gcc_aout.c @@ -653,7 +653,7 @@ read_symtab_from_file(fp, symtab_name) table_size = sizeof(int) + symsize + strsize; table_size = (table_size + sizeof(int)-1) & ~(sizeof(int)-1); - result = kmem_alloc_wired(kernel_map, &symtab, table_size); + result = kmem_alloc_kobject(kernel_map, &symtab, table_size); if (result) { boot_printf("[ error %d allocating space for %s symbol table ]\n", result, symtab_name); diff --git a/osfmk/i386/db_interface.c b/osfmk/i386/db_interface.c index 7390ddf04..e4c025bdf 100644 --- a/osfmk/i386/db_interface.c +++ b/osfmk/i386/db_interface.c @@ -100,6 +100,7 @@ int db_active = 0; x86_saved_state32_t *i386_last_saved_statep; x86_saved_state32_t i386_nested_saved_state; unsigned i386_last_kdb_sp; +db_regs_t ddb_regs; /* register state */ extern thread_t db_default_act; extern pt_entry_t *DMAP1; @@ -508,7 +509,7 @@ db_user_to_kernel_address( * back since it's been mapped through a per-cpu window */ mp_disable_preemption(); - + ptp = pmap_pte(task->map->pmap, (vm_map_offset_t)addr); if (ptp == PT_ENTRY_NULL || (*ptp & INTEL_PTE_VALID) == 0) { if (flag) { @@ -520,7 +521,6 @@ db_user_to_kernel_address( return(-1); } src = (vm_offset_t)pte_to_pa(*ptp); - mp_enable_preemption(); *(int *) DMAP1 = INTEL_PTE_VALID | INTEL_PTE_RW | (src & PG_FRAME) | @@ -1013,7 +1013,7 @@ kdb_on( * system reboot */ -extern void kdp_reboot(void); +extern void kdp_machine_reboot(void); void db_reboot( db_expr_t addr, @@ -1021,5 +1021,5 @@ void db_reboot( db_expr_t count, char *modif) { - kdp_reboot(); + kdp_machine_reboot(); } diff --git a/osfmk/i386/db_machdep.h b/osfmk/i386/db_machdep.h index 99577bb30..ca046869d 100644 --- a/osfmk/i386/db_machdep.h +++ b/osfmk/i386/db_machdep.h @@ -66,16 +66,19 @@ #include #include #include +#ifdef __i386__ #include /* for thread_status */ #include #include #include +#endif typedef addr64_t db_addr_t; /* address - unsigned */ typedef uint64_t db_expr_t; /* expression */ +#ifdef __i386__ typedef struct x86_saved_state32 db_regs_t; -db_regs_t ddb_regs; /* register state */ +extern db_regs_t ddb_regs; /* register state */ #define DDB_REGS (&ddb_regs) extern int db_active; /* ddb is active */ @@ -203,5 +206,6 @@ extern void db_chkpmgr(void); #endif /* MACH_KDB */ extern void db_pmgr(db_expr_t addr, int have_addr, db_expr_t count, char * modif); extern void db_nap(db_expr_t addr, int have_addr, db_expr_t count, char * modif); +#endif /* __i386__ */ #endif /* _I386_DB_MACHDEP_H_ */ diff --git a/osfmk/i386/db_trace.c b/osfmk/i386/db_trace.c index b828b615c..a14bb16b5 100644 --- a/osfmk/i386/db_trace.c +++ b/osfmk/i386/db_trace.c @@ -83,7 +83,7 @@ #include extern jmp_buf_t *db_recover; -struct x86_kernel_state32 ddb_null_kregs; +struct x86_kernel_state ddb_null_kregs; extern kmod_info_t *kmod; @@ -116,12 +116,12 @@ struct i386_kregs { char *name; unsigned int offset; } i386_kregs[] = { - { "ebx", (unsigned int)(&((struct x86_kernel_state32 *)0)->k_ebx) }, - { "esp", (unsigned int)(&((struct x86_kernel_state32 *)0)->k_esp) }, - { "ebp", (unsigned int)(&((struct x86_kernel_state32 *)0)->k_ebp) }, - { "edi", (unsigned int)(&((struct x86_kernel_state32 *)0)->k_edi) }, - { "esi", (unsigned int)(&((struct x86_kernel_state32 *)0)->k_esi) }, - { "eip", (unsigned int)(&((struct x86_kernel_state32 *)0)->k_eip) }, + { "ebx", (unsigned int)(&((struct x86_kernel_state *)0)->k_ebx) }, + { "esp", (unsigned int)(&((struct x86_kernel_state *)0)->k_esp) }, + { "ebp", (unsigned int)(&((struct x86_kernel_state *)0)->k_ebp) }, + { "edi", (unsigned int)(&((struct x86_kernel_state *)0)->k_edi) }, + { "esi", (unsigned int)(&((struct x86_kernel_state *)0)->k_esi) }, + { "eip", (unsigned int)(&((struct x86_kernel_state *)0)->k_eip) }, { 0 } }; @@ -592,7 +592,7 @@ db_stack_trace_cmd( callpc = (db_addr_t) (iss32->eip); } else { if (cpu == real_ncpus) { - register struct x86_kernel_state32 *iks; + register struct x86_kernel_state *iks; int r; iks = STACK_IKS(th->kernel_stack); @@ -811,7 +811,7 @@ db_stack_trace_cmd( } } -extern int kdp_vm_read(caddr_t, caddr_t, unsigned int ); +extern mach_vm_size_t kdp_machine_vm_read(mach_vm_address_t, caddr_t, mach_vm_size_t); extern boolean_t kdp_trans_off; /* * Print out 256 bytes of real storage @@ -829,11 +829,11 @@ db_display_real(db_expr_t addr, boolean_t have_addr, db_expr_t count, for(i=0; i<8; i++) { /* - * Do a physical read using kdp_vm_read(), rather than replicating the same + * Do a physical read using kdp_machine_vm_read(), rather than replicating the same * facility */ kdp_trans_off = 1; - read_result = kdp_vm_read(addr, &xbuf[0], 32); + read_result = kdp_machine_vm_read(addr, &xbuf[0], 32); kdp_trans_off = 0; if (read_result != 32) diff --git a/osfmk/i386/fpu.c b/osfmk/i386/fpu.c index 618f2d9b0..5c458843b 100644 --- a/osfmk/i386/fpu.c +++ b/osfmk/i386/fpu.c @@ -70,18 +70,18 @@ #include #include -#include -#include -#include #include #include -#include +#include #include +#include +#include +#include int fp_kind = FP_NO; /* not inited */ zone_t ifps_zone; /* zone for FPU save area */ -#define ALIGNED(addr,size) (((unsigned)(addr)&((size)-1))==0) +#define ALIGNED(addr,size) (((uintptr_t)(addr)&((size)-1))==0) /* Forward */ @@ -113,7 +113,6 @@ configure_mxcsr_capability_mask(struct x86_fpsave_state *ifps) bzero(ifps, sizeof(*ifps)); /* Disable FPU/SSE Device Not Available exceptions */ clear_ts(); - __asm__ volatile("fxsave %0" : "=m" (ifps->fx_save_state)); mxcsr_capability_mask = ifps->fx_save_state.fx_MXCSR_MASK; @@ -215,7 +214,7 @@ fpu_module_init(void) struct x86_fpsave_state *new_ifps; ifps_zone = zinit(sizeof(struct x86_fpsave_state), - THREAD_MAX * sizeof(struct x86_fpsave_state), + thread_max * sizeof(struct x86_fpsave_state), THREAD_CHUNK * sizeof(struct x86_fpsave_state), "x86 fpsave state"); new_ifps = fp_state_alloc(); @@ -479,7 +478,7 @@ fpinit(void) /* Initialize SSE/SSE2 */ __builtin_ia32_ldmxcsr(0x1f80); - } +} /* * Coprocessor not present. @@ -638,6 +637,7 @@ fp_save( /* registers are in FPU */ ifps->fp_valid = TRUE; +#if defined(__i386__) if (!thread_is_64bit(thr_act)) { /* save the compatibility/legacy mode XMM+x87 state */ fxsave(&ifps->fx_save_state); @@ -647,6 +647,10 @@ fp_save( fxsave64(&ifps->fx_save_state); ifps->fp_save_layout = FXSAVE64; } +#elif defined(__x86_64__) + fxsave(&ifps->fx_save_state); + ifps->fp_save_layout = thread_is_64bit(thr_act) ? FXSAVE64 : FXSAVE32; +#endif } } @@ -675,6 +679,7 @@ fp_load( fpinit(); } else { assert(ifps->fp_save_layout == FXSAVE32 || ifps->fp_save_layout == FXSAVE64); +#if defined(__i386__) if (ifps->fp_save_layout == FXSAVE32) { /* Restore the compatibility/legacy mode XMM+x87 state */ fxrstor(&ifps->fx_save_state); @@ -682,6 +687,9 @@ fp_load( else if (ifps->fp_save_layout == FXSAVE64) { fxrstor64(&ifps->fx_save_state); } +#elif defined(__x86_64__) + fxrstor(&ifps->fx_save_state); +#endif } ifps->fp_valid = FALSE; /* in FPU */ } @@ -734,7 +742,7 @@ fpSSEexterrflt(void) assert(ifps->fp_save_layout == FXSAVE32 || ifps->fp_save_layout == FXSAVE64); i386_exception(EXC_ARITHMETIC, EXC_I386_SSEEXTERR, - ifps->fx_save_state.fx_status); + ifps->fx_save_state.fx_MXCSR); /*NOTREACHED*/ } diff --git a/osfmk/i386/fpu.h b/osfmk/i386/fpu.h index cd0b78906..7b6f86a82 100644 --- a/osfmk/i386/fpu.h +++ b/osfmk/i386/fpu.h @@ -62,11 +62,11 @@ * floating-point processor. */ #include -#include #include #include #include #include +#include extern int fp_kind; @@ -89,8 +89,10 @@ extern void fpexterrflt(void); extern void fpSSEexterrflt(void); extern void fpflush(thread_t); extern void fp_setvalid(boolean_t); +#ifdef __i386__ extern void fxsave64(struct x86_fx_save *); extern void fxrstor64(struct x86_fx_save *); +#endif /* * FPU instructions. @@ -135,6 +137,7 @@ static inline void clear_fpu(void) set_ts(); } + /* * Save thread`s FPU context. */ @@ -155,6 +158,7 @@ static inline void fpu_save_context(thread_t thread) /* registers are in FPU - save to memory */ ifps->fp_valid = TRUE; +#if defined(__i386__) if (!thread_is_64bit(thread) || is_saved_state32(thread->machine.pcb->iss)) { /* save the compatibility/legacy mode XMM+x87 state */ fxsave(&ifps->fx_save_state); @@ -167,6 +171,13 @@ static inline void fpu_save_context(thread_t thread) fxsave64(&ifps->fx_save_state); ifps->fp_save_layout = FXSAVE64; } +#elif defined(__x86_64__) + /* for a 64-bit long mode kernel, we can always use plain fxsave */ + fxsave(&ifps->fx_save_state); + ifps->fp_save_layout = thread_is_64bit(thread) ? FXSAVE64 + : FXSAVE32; + +#endif } set_ts(); } diff --git a/osfmk/i386/gdt.c b/osfmk/i386/gdt.c index 4150e7c0d..c3502e06b 100644 --- a/osfmk/i386/gdt.c +++ b/osfmk/i386/gdt.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2006 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2009 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -60,67 +60,51 @@ /* * Global descriptor table. */ -#include -#include -#include -#include -#include +#include -struct fake_descriptor master_gdt[GDTSZ] __attribute__ ((aligned (4096))) = { - [SEL_TO_INDEX(KERNEL_CS)] { /* kernel code */ +struct real_descriptor master_gdt[GDTSZ] __attribute__ ((section("__INITGDT,__data")))= { + [SEL_TO_INDEX(KERNEL32_CS)] MAKE_REAL_DESCRIPTOR( /* kernel 32-bit code */ 0, - 0xfffff, - SZ_32|SZ_G, - ACC_P|ACC_PL_K|ACC_CODE_R, - }, - [SEL_TO_INDEX(KERNEL_DS)] { /* kernel data */ + 0xfffff, + SZ_32|SZ_G, + ACC_P|ACC_PL_K|ACC_CODE_R + ), + [SEL_TO_INDEX(KERNEL_DS)] MAKE_REAL_DESCRIPTOR( /* kernel data */ 0, - 0xfffff, - SZ_32|SZ_G, - ACC_P|ACC_PL_K|ACC_DATA_W - }, - [SEL_TO_INDEX(KERNEL_LDT)] { /* local descriptor table */ - (uint32_t) &master_ldt, - LDTSZ_MIN*sizeof(struct fake_descriptor)-1, - 0, - ACC_P|ACC_PL_K|ACC_LDT - }, /* The slot KERNEL_LDT_2 is reserved. */ - [SEL_TO_INDEX(KERNEL_TSS)] { /* TSS for this processor */ - (uint32_t) &master_ktss, - sizeof(struct i386_tss)-1, - 0, - ACC_P|ACC_PL_K|ACC_TSS - }, /* The slot KERNEL_TSS_2 is reserved. */ - [SEL_TO_INDEX(CPU_DATA_GS)] { /* per-CPU current thread address */ - (uint32_t) &cpu_data_master, - sizeof(cpu_data_t)-1, - SZ_32, - ACC_P|ACC_PL_K|ACC_DATA_W - }, - [SEL_TO_INDEX(USER_LDT)] { /* user local descriptor table */ - (uint32_t) &master_ldt, - LDTSZ_MIN*sizeof(struct fake_descriptor)-1, - 0, - ACC_P|ACC_PL_K|ACC_LDT - }, - [SEL_TO_INDEX(KERNEL64_CS)] { /* kernel 64-bit code */ + 0xfffff, + SZ_32|SZ_G, + ACC_P|ACC_PL_K|ACC_DATA_W + ), + [SEL_TO_INDEX(KERNEL64_CS)] MAKE_REAL_DESCRIPTOR( /* kernel 64-bit code */ 0, 0xfffff, SZ_64|SZ_G, ACC_P|ACC_PL_K|ACC_CODE_R - }, - [SEL_TO_INDEX(KERNEL64_SS)] { /* kernel 64-bit syscall stack */ + ), + [SEL_TO_INDEX(KERNEL64_SS)] MAKE_REAL_DESCRIPTOR( /* kernel 64-bit syscall stack */ 0, 0xfffff, SZ_32|SZ_G, ACC_P|ACC_PL_K|ACC_DATA_W - }, -#if MACH_KDB - [SEL_TO_INDEX(DEBUG_TSS)] { /* TSS for this processor */ - (uint32_t)&master_dbtss, - sizeof(struct i386_tss)-1, - 0, - ACC_P|ACC_PL_K|ACC_TSS - }, -#endif /* MACH_KDB */ + ), +#ifdef __x86_64__ + [SEL_TO_INDEX(USER_CS)] MAKE_REAL_DESCRIPTOR( /* 32-bit user code segment */ + 0, + 0xfffff, + SZ_32|SZ_G, + ACC_P|ACC_PL_U|ACC_CODE_R + ), + [SEL_TO_INDEX(USER_DS)] MAKE_REAL_DESCRIPTOR( /* 32-bit user data segment */ + 0, + 0xfffff, + SZ_32|SZ_G, + ACC_P|ACC_PL_U|ACC_DATA_W + ), + [SEL_TO_INDEX(USER64_CS)] MAKE_REAL_DESCRIPTOR( /* user 64-bit code segment */ + 0, + 0xfffff, + SZ_64|SZ_G, + ACC_P|ACC_PL_U|ACC_CODE_R + ), +#endif }; diff --git a/osfmk/i386/genassym.c b/osfmk/i386/genassym.c index 86b97b4bc..a254013dd 100644 --- a/osfmk/i386/genassym.c +++ b/osfmk/i386/genassym.c @@ -73,19 +73,19 @@ #include #include #include -#include -#include -#include #include +#include +#include +#include +#include +#include #include #include #include -#include #include #include #include #include -#include #include #if CONFIG_DTRACE @@ -93,6 +93,7 @@ #include <../bsd/sys/lockstat.h> #endif + /* * genassym.c is used to produce an * assembly file which, intermingled with unuseful assembly code, @@ -142,31 +143,39 @@ main( #endif /* MACH_LDEBUG */ /* Mutex structure */ - DECLARE("MUTEX_LOCKED", offsetof(mutex_t *, lck_mtx.lck_mtx_locked)); - DECLARE("MUTEX_WAITERS",offsetof(mutex_t *, lck_mtx.lck_mtx_waiters)); - DECLARE("MUTEX_PROMOTED_PRI",offsetof(mutex_t *, lck_mtx.lck_mtx_pri)); -#if MACH_LDEBUG - DECLARE("MUTEX_TYPE", offsetof(mutex_t *, type)); - DECLARE("MUTEX_PC", offsetof(mutex_t *, pc)); - DECLARE("MUTEX_THREAD", offsetof(mutex_t *, thread)); + DECLARE("MUTEX_OWNER", offsetof(lck_mtx_t *, lck_mtx_owner)); + DECLARE("MUTEX_PTR", offsetof(lck_mtx_t *, lck_mtx_ptr)); + DECLARE("MUTEX_STATE", offsetof(lck_mtx_t *, lck_mtx_state)); +#ifdef __i386__ + DECLARE("MUTEX_TYPE", offsetof(lck_mtx_ext_t *, lck_mtx_deb.type)); + DECLARE("MUTEX_PC", offsetof(lck_mtx_ext_t *, lck_mtx_deb.pc)); + DECLARE("MUTEX_THREAD", offsetof(lck_mtx_ext_t *, lck_mtx_deb.thread)); + DECLARE("MUTEX_ATTR", offsetof(lck_mtx_ext_t *, lck_mtx_attr)); + DECLARE("MUTEX_ATTR_DEBUG", LCK_MTX_ATTR_DEBUG); + DECLARE("MUTEX_ATTR_DEBUGb", LCK_MTX_ATTR_DEBUGb); + DECLARE("MUTEX_ATTR_STAT", LCK_MTX_ATTR_STAT); + DECLARE("MUTEX_ATTR_STATb", LCK_MTX_ATTR_STATb); DECLARE("MUTEX_TAG", MUTEX_TAG); -#endif /* MACH_LDEBUG */ +#endif DECLARE("MUTEX_IND", LCK_MTX_TAG_INDIRECT); - DECLARE("MUTEX_DESTROYED", LCK_MTX_TAG_DESTROYED); - DECLARE("MUTEX_LOCKED_AS_SPIN", MUTEX_LOCKED_AS_SPIN); + DECLARE("MUTEX_EXT", LCK_MTX_PTR_EXTENDED); DECLARE("MUTEX_ITAG", offsetof(lck_mtx_t *, lck_mtx_tag)); DECLARE("MUTEX_PTR", offsetof(lck_mtx_t *, lck_mtx_ptr)); DECLARE("MUTEX_ASSERT_OWNED", LCK_MTX_ASSERT_OWNED); DECLARE("MUTEX_ASSERT_NOTOWNED",LCK_MTX_ASSERT_NOTOWNED); + DECLARE("GRP_MTX_STAT_UTIL", offsetof(lck_grp_t *, lck_grp_stat.lck_grp_mtx_stat.lck_grp_mtx_util_cnt)); + DECLARE("GRP_MTX_STAT_MISS", offsetof(lck_grp_t *, lck_grp_stat.lck_grp_mtx_stat.lck_grp_mtx_miss_cnt)); + DECLARE("GRP_MTX_STAT_WAIT", offsetof(lck_grp_t *, lck_grp_stat.lck_grp_mtx_stat.lck_grp_mtx_wait_cnt)); + + /* x86 only */ + DECLARE("MUTEX_DESTROYED", LCK_MTX_TAG_DESTROYED); + /* Per-mutex statistic element */ DECLARE("MTX_ACQ_TSC", offsetof(lck_mtx_ext_t *, lck_mtx_stat)); /* Mutex group statistics elements */ DECLARE("MUTEX_GRP", offsetof(lck_mtx_ext_t *, lck_mtx_grp)); - DECLARE("GRP_MTX_STAT_UTIL", offsetof(lck_grp_t *, lck_grp_stat.lck_grp_mtx_stat.lck_grp_mtx_util_cnt)); - DECLARE("GRP_MTX_STAT_MISS", offsetof(lck_grp_t *, lck_grp_stat.lck_grp_mtx_stat.lck_grp_mtx_miss_cnt)); - DECLARE("GRP_MTX_STAT_WAIT", offsetof(lck_grp_t *, lck_grp_stat.lck_grp_mtx_stat.lck_grp_mtx_wait_cnt)); /* * The use of this field is somewhat at variance with the alias. */ @@ -197,26 +206,42 @@ main( DECLARE("ACT_PCB", offsetof(thread_t, machine.pcb)); DECLARE("ACT_SPF", offsetof(thread_t, machine.specFlags)); DECLARE("ACT_MAP", offsetof(thread_t, map)); - DECLARE("ACT_COPYIO_STATE", offsetof(thread_t, machine.copyio_state)); DECLARE("ACT_PCB_ISS", offsetof(thread_t, machine.xxx_pcb.iss)); DECLARE("ACT_PCB_IDS", offsetof(thread_t, machine.xxx_pcb.ids)); - +#if NCOPY_WINDOWS > 0 + DECLARE("ACT_COPYIO_STATE", offsetof(thread_t, machine.copyio_state)); DECLARE("WINDOWS_CLEAN", WINDOWS_CLEAN); +#endif DECLARE("MAP_PMAP", offsetof(vm_map_t, pmap)); -#define IKS ((size_t) (STACK_IKS(0))) - - DECLARE("KSS_EBX", IKS + offsetof(struct x86_kernel_state32 *, k_ebx)); - DECLARE("KSS_ESP", IKS + offsetof(struct x86_kernel_state32 *, k_esp)); - DECLARE("KSS_EBP", IKS + offsetof(struct x86_kernel_state32 *, k_ebp)); - DECLARE("KSS_EDI", IKS + offsetof(struct x86_kernel_state32 *, k_edi)); - DECLARE("KSS_ESI", IKS + offsetof(struct x86_kernel_state32 *, k_esi)); - DECLARE("KSS_EIP", IKS + offsetof(struct x86_kernel_state32 *, k_eip)); - - DECLARE("IKS_SIZE", sizeof(struct x86_kernel_state32)); - DECLARE("IEL_SIZE", sizeof(struct i386_exception_link)); +#define IEL_SIZE (sizeof(struct i386_exception_link *)) + DECLARE("IEL_SIZE", IEL_SIZE); + DECLARE("IKS_SIZE", sizeof(struct x86_kernel_state)); + /* + * KSS_* are offsets from the top of the kernel stack (cpu_kernel_stack) + */ +#if defined(__i386__) + DECLARE("KSS_EBX", IEL_SIZE + offsetof(struct x86_kernel_state *, k_ebx)); + DECLARE("KSS_ESP", IEL_SIZE + offsetof(struct x86_kernel_state *, k_esp)); + DECLARE("KSS_EBP", IEL_SIZE + offsetof(struct x86_kernel_state *, k_ebp)); + DECLARE("KSS_EDI", IEL_SIZE + offsetof(struct x86_kernel_state *, k_edi)); + DECLARE("KSS_ESI", IEL_SIZE + offsetof(struct x86_kernel_state *, k_esi)); + DECLARE("KSS_EIP", IEL_SIZE + offsetof(struct x86_kernel_state *, k_eip)); +#elif defined(__x86_64__) + DECLARE("KSS_RBX", IEL_SIZE + offsetof(struct x86_kernel_state *, k_rbx)); + DECLARE("KSS_RSP", IEL_SIZE + offsetof(struct x86_kernel_state *, k_rsp)); + DECLARE("KSS_RBP", IEL_SIZE + offsetof(struct x86_kernel_state *, k_rbp)); + DECLARE("KSS_R12", IEL_SIZE + offsetof(struct x86_kernel_state *, k_r12)); + DECLARE("KSS_R13", IEL_SIZE + offsetof(struct x86_kernel_state *, k_r13)); + DECLARE("KSS_R14", IEL_SIZE + offsetof(struct x86_kernel_state *, k_r14)); + DECLARE("KSS_R15", IEL_SIZE + offsetof(struct x86_kernel_state *, k_r15)); + DECLARE("KSS_RIP", IEL_SIZE + offsetof(struct x86_kernel_state *, k_rip)); +#else +#error Unsupported architecture +#endif + DECLARE("PCB_FPS", offsetof(pcb_t, ifps)); DECLARE("PCB_ISS", offsetof(pcb_t, iss)); @@ -245,25 +270,25 @@ main( DECLARE("SS_64", x86_SAVED_STATE64); #define R_(x) offsetof(x86_saved_state_t *, ss_32.x) - DECLARE("R_CS", R_(cs)); - DECLARE("R_SS", R_(ss)); - DECLARE("R_DS", R_(ds)); - DECLARE("R_ES", R_(es)); - DECLARE("R_FS", R_(fs)); - DECLARE("R_GS", R_(gs)); - DECLARE("R_UESP", R_(uesp)); - DECLARE("R_EBP", R_(ebp)); - DECLARE("R_EAX", R_(eax)); - DECLARE("R_EBX", R_(ebx)); - DECLARE("R_ECX", R_(ecx)); - DECLARE("R_EDX", R_(edx)); - DECLARE("R_ESI", R_(esi)); - DECLARE("R_EDI", R_(edi)); - DECLARE("R_TRAPNO", R_(trapno)); - DECLARE("R_ERR", R_(err)); - DECLARE("R_EFLAGS", R_(efl)); - DECLARE("R_EIP", R_(eip)); - DECLARE("R_CR2", R_(cr2)); + DECLARE("R32_CS", R_(cs)); + DECLARE("R32_SS", R_(ss)); + DECLARE("R32_DS", R_(ds)); + DECLARE("R32_ES", R_(es)); + DECLARE("R32_FS", R_(fs)); + DECLARE("R32_GS", R_(gs)); + DECLARE("R32_UESP", R_(uesp)); + DECLARE("R32_EBP", R_(ebp)); + DECLARE("R32_EAX", R_(eax)); + DECLARE("R32_EBX", R_(ebx)); + DECLARE("R32_ECX", R_(ecx)); + DECLARE("R32_EDX", R_(edx)); + DECLARE("R32_ESI", R_(esi)); + DECLARE("R32_EDI", R_(edi)); + DECLARE("R32_TRAPNO", R_(trapno)); + DECLARE("R32_ERR", R_(err)); + DECLARE("R32_EFLAGS", R_(efl)); + DECLARE("R32_EIP", R_(eip)); + DECLARE("R32_CR2", R_(cr2)); DECLARE("ISS32_SIZE", sizeof (x86_saved_state32_t)); #define R64_(x) offsetof(x86_saved_state_t *, ss_64.x) @@ -326,51 +351,67 @@ main( DECLARE("PAGE_MASK", I386_PGBYTES-1); DECLARE("PAGE_SHIFT", 12); DECLARE("NKPT", NKPT); +#ifdef __i386__ DECLARE("KPTDI", KPTDI); +#endif DECLARE("VM_MIN_ADDRESS", VM_MIN_ADDRESS); DECLARE("VM_MAX_ADDRESS", VM_MAX_ADDRESS); DECLARE("KERNELBASE", VM_MIN_KERNEL_ADDRESS); DECLARE("LINEAR_KERNELBASE", LINEAR_KERNEL_ADDRESS); DECLARE("KERNEL_STACK_SIZE", KERNEL_STACK_SIZE); +#ifdef __i386__ DECLARE("KERNEL_UBER_BASE_HI32", KERNEL_UBER_BASE_HI32); +#endif - DECLARE("COMM_PAGE_BASE_ADDR", _COMM_PAGE_BASE_ADDRESS); + DECLARE("ASM_COMM_PAGE32_BASE_ADDRESS", _COMM_PAGE32_BASE_ADDRESS); + DECLARE("ASM_COMM_PAGE32_START_ADDRESS", _COMM_PAGE32_START_ADDRESS); + DECLARE("ASM_COMM_PAGE_SCHED_GEN", _COMM_PAGE_SCHED_GEN); DECLARE("PDESHIFT", PDESHIFT); DECLARE("PTEMASK", PTEMASK); DECLARE("PTEINDX", PTEINDX); - DECLARE("PTE_PFN", INTEL_PTE_PFN); - DECLARE("PTE_V", INTEL_PTE_VALID); - DECLARE("PTE_W", INTEL_PTE_WRITE); - DECLARE("PTE_PS", INTEL_PTE_PS); - DECLARE("PTE_U", INTEL_PTE_USER); - DECLARE("PTE_INVALID", ~INTEL_PTE_VALID); + DECLARE("INTEL_PTE_PFN", INTEL_PTE_PFN); + DECLARE("INTEL_PTE_VALID", INTEL_PTE_VALID); + DECLARE("INTEL_PTE_WRITE", INTEL_PTE_WRITE); + DECLARE("INTEL_PTE_PS", INTEL_PTE_PS); + DECLARE("INTEL_PTE_USER", INTEL_PTE_USER); + DECLARE("INTEL_PTE_INVALID", INTEL_PTE_INVALID); DECLARE("NPGPTD", NPGPTD); - +#if defined(__x86_64__) + DECLARE("INITPT_SEG_BASE",INITPT_SEG_BASE); + DECLARE("INITGDT_SEG_BASE",INITGDT_SEG_BASE); + DECLARE("SLEEP_SEG_BASE",SLEEP_SEG_BASE); + DECLARE("PROT_MODE_GDT_SIZE",PROT_MODE_GDT_SIZE); + DECLARE("KERNEL_PML4_INDEX",KERNEL_PML4_INDEX); +#endif DECLARE("IDTSZ", IDTSZ); DECLARE("GDTSZ", GDTSZ); DECLARE("LDTSZ", LDTSZ); - DECLARE("KERNEL_CS", KERNEL_CS); DECLARE("KERNEL_DS", KERNEL_DS); DECLARE("USER_CS", USER_CS); DECLARE("USER_DS", USER_DS); + DECLARE("KERNEL32_CS", KERNEL32_CS); DECLARE("KERNEL64_CS", KERNEL64_CS); DECLARE("USER64_CS", USER64_CS); DECLARE("KERNEL_TSS", KERNEL_TSS); DECLARE("KERNEL_LDT", KERNEL_LDT); +#ifdef __i386__ DECLARE("DF_TSS", DF_TSS); DECLARE("MC_TSS", MC_TSS); #if MACH_KDB DECLARE("DEBUG_TSS", DEBUG_TSS); #endif /* MACH_KDB */ - DECLARE("CPU_DATA_GS", CPU_DATA_GS); + DECLARE("CPU_DATA_GS", CPU_DATA_GS); +#endif /* __i386__ */ DECLARE("SYSENTER_CS", SYSENTER_CS); DECLARE("SYSENTER_TF_CS",SYSENTER_TF_CS); DECLARE("SYSENTER_DS", SYSENTER_DS); DECLARE("SYSCALL_CS", SYSCALL_CS); +#ifdef __i386__ DECLARE("USER_WINDOW_SEL", USER_WINDOW_SEL); DECLARE("PHYS_WINDOW_SEL", PHYS_WINDOW_SEL); +#endif DECLARE("CPU_THIS", offsetof(cpu_data_t *, cpu_this)); @@ -386,6 +427,8 @@ main( DECLARE("CPU_PREEMPTION_LEVEL", offsetof(cpu_data_t *, cpu_preemption_level)); #endif /* MACH_RT */ + DECLARE("CPU_HIBERNATE", + offsetof(cpu_data_t *, cpu_hibernate)); DECLARE("CPU_INTERRUPT_LEVEL", offsetof(cpu_data_t *, cpu_interrupt_level)); DECLARE("CPU_SIMPLE_LOCK_COUNT", @@ -413,22 +456,30 @@ main( DECLARE("CPU_INT_EVENT_TIME", offsetof(cpu_data_t *, cpu_int_event_time)); +#ifdef __i386__ DECLARE("CPU_HI_ISS", offsetof(cpu_data_t *, cpu_hi_iss)); +#endif DECLARE("CPU_TASK_CR3", offsetof(cpu_data_t *, cpu_task_cr3)); DECLARE("CPU_ACTIVE_CR3", offsetof(cpu_data_t *, cpu_active_cr3)); DECLARE("CPU_KERNEL_CR3", offsetof(cpu_data_t *, cpu_kernel_cr3)); +#ifdef __x86_64__ + DECLARE("CPU_TLB_INVALID", + offsetof(cpu_data_t *, cpu_tlb_invalid)); +#endif DECLARE("CPU_IS64BIT", offsetof(cpu_data_t *, cpu_is64bit)); DECLARE("CPU_TASK_MAP", offsetof(cpu_data_t *, cpu_task_map)); DECLARE("TASK_MAP_32BIT", TASK_MAP_32BIT); - DECLARE("TASK_MAP_64BIT", TASK_MAP_64BIT); + DECLARE("TASK_MAP_64BIT", TASK_MAP_64BIT); +#ifdef __i386__ DECLARE("TASK_MAP_64BIT_SHARED", TASK_MAP_64BIT_SHARED); +#endif DECLARE("CPU_UBER_USER_GS_BASE", offsetof(cpu_data_t *, cpu_uber.cu_user_gs_base)); DECLARE("CPU_UBER_ISF", @@ -469,13 +520,15 @@ main( DECLARE("dgMisc5", offsetof(struct diagWork *, dgMisc5)); DECLARE("INTEL_PTE_KERNEL", INTEL_PTE_VALID|INTEL_PTE_WRITE); - DECLARE("PTDPTDI", PTDPTDI); DECLARE("PDESHIFT", PDESHIFT); DECLARE("PDESIZE", PDESIZE); DECLARE("PTESIZE", PTESIZE); +#ifdef __i386__ + DECLARE("PTDPTDI", PTDPTDI); DECLARE("APTDPTDI", APTDPTDI); DECLARE("HIGH_MEM_BASE", HIGH_MEM_BASE); DECLARE("HIGH_IDT_BASE", pmap_index_to_virt(HIGH_FIXED_IDT)); +#endif DECLARE("KERNELBASEPDE", (LINEAR_KERNEL_ADDRESS >> PDESHIFT) * @@ -500,9 +553,6 @@ main( DECLARE("USL_INTERLOCK", offsetof(usimple_lock_t, interlock)); DECLARE("INTSTACK_SIZE", INTSTACK_SIZE); - DECLARE("TIMER_LOW", offsetof(struct timer *, low_bits)); - DECLARE("TIMER_HIGH", offsetof(struct timer *, high_bits)); - DECLARE("TIMER_HIGHCHK", offsetof(struct timer *, high_bits_check)); DECLARE("KADDR", offsetof(struct boot_args *, kaddr)); DECLARE("KSIZE", offsetof(struct boot_args *, ksize)); DECLARE("MEMORYMAP", offsetof(struct boot_args *, MemoryMap)); @@ -520,12 +570,13 @@ main( offsetof(rtc_nanotime_t *, generation)); /* values from kern/timer.h */ - DECLARE("TIMER_LOW", - offsetof(struct timer *, low_bits)); - DECLARE("TIMER_HIGH", - offsetof(struct timer *, high_bits)); - DECLARE("TIMER_HIGHCHK", - offsetof(struct timer *, high_bits_check)); +#ifdef __LP64__ + DECLARE("TIMER_ALL", offsetof(struct timer *, all_bits)); +#else + DECLARE("TIMER_LOW", offsetof(struct timer *, low_bits)); + DECLARE("TIMER_HIGH", offsetof(struct timer *, high_bits)); + DECLARE("TIMER_HIGHCHK", offsetof(struct timer *, high_bits_check)); +#endif #if !STAT_TIME DECLARE("TIMER_TSTAMP", offsetof(struct timer *, tstamp)); @@ -550,6 +601,7 @@ main( DECLARE("OnProc", OnProc); + #if CONFIG_DTRACE DECLARE("LS_LCK_MTX_LOCK_ACQUIRE", LS_LCK_MTX_LOCK_ACQUIRE); DECLARE("LS_LCK_MTX_TRY_SPIN_LOCK_ACQUIRE", LS_LCK_MTX_TRY_SPIN_LOCK_ACQUIRE); @@ -560,13 +612,11 @@ main( DECLARE("LS_LCK_MTX_EXT_LOCK_ACQUIRE", LS_LCK_MTX_EXT_LOCK_ACQUIRE); DECLARE("LS_LCK_MTX_TRY_EXT_LOCK_ACQUIRE", LS_LCK_MTX_TRY_EXT_LOCK_ACQUIRE); DECLARE("LS_LCK_MTX_EXT_UNLOCK_RELEASE", LS_LCK_MTX_EXT_UNLOCK_RELEASE); - - DECLARE("LS_MUTEX_LOCK_ACQUIRE", LS_MUTEX_LOCK_ACQUIRE); - DECLARE("LS_MUTEX_TRY_SPIN_ACQUIRE", LS_MUTEX_TRY_SPIN_ACQUIRE); - DECLARE("LS_MUTEX_TRY_LOCK_ACQUIRE", LS_MUTEX_TRY_LOCK_ACQUIRE); - DECLARE("LS_MUTEX_UNLOCK_RELEASE", LS_MUTEX_UNLOCK_RELEASE); - DECLARE("LS_MUTEX_LOCK_SPIN_ACQUIRE", LS_MUTEX_LOCK_SPIN_ACQUIRE); - DECLARE("LS_MUTEX_CONVERT_SPIN_ACQUIRE", LS_MUTEX_CONVERT_SPIN_ACQUIRE); + DECLARE("LS_LCK_RW_LOCK_EXCL_ACQUIRE", LS_LCK_RW_LOCK_EXCL_ACQUIRE); + DECLARE("LS_LCK_RW_LOCK_SHARED_TO_EXCL_UPGRADE", LS_LCK_RW_LOCK_SHARED_TO_EXCL_UPGRADE); + DECLARE("LS_LCK_RW_TRY_LOCK_EXCL_ACQUIRE", LS_LCK_RW_TRY_LOCK_EXCL_ACQUIRE); + DECLARE("LS_LCK_RW_TRY_LOCK_SHARED_ACQUIRE", LS_LCK_RW_TRY_LOCK_SHARED_ACQUIRE); + DECLARE("LS_LCK_MTX_LOCK_SPIN_ACQUIRE", LS_LCK_MTX_LOCK_SPIN_ACQUIRE); #endif return (0); diff --git a/osfmk/i386/hibernate_i386.c b/osfmk/i386/hibernate_i386.c index 4409b74bc..f82e45e6a 100644 --- a/osfmk/i386/hibernate_i386.c +++ b/osfmk/i386/hibernate_i386.c @@ -42,7 +42,7 @@ #include #include -#include "i386_lowmem.h" +#include #define MAX_BANKS 32 @@ -63,7 +63,7 @@ hibernate_page_list_allocate(void) hibernate_bitmap_t dram_ranges[MAX_BANKS]; boot_args * args = (boot_args *) PE_state.bootArgs; - mptr = (EfiMemoryRange *)args->MemoryMap; + mptr = (EfiMemoryRange *)ml_static_ptovirt(args->MemoryMap); if (args->MemoryMapDescriptorSize == 0) panic("Invalid memory map descriptor size"); msize = args->MemoryMapDescriptorSize; @@ -89,14 +89,31 @@ hibernate_page_list_allocate(void) case kEfiACPIMemoryNVS: case kEfiPalCode: - if (!num_banks || (base != (1 + dram_ranges[num_banks - 1].last_page))) + for (bank = 0; bank < num_banks; bank++) + { + if (dram_ranges[bank].first_page <= base) + continue; + if ((base + num) == dram_ranges[bank].first_page) + { + dram_ranges[bank].first_page = base; + num = 0; + } + break; + } + if (!num) break; + + if (bank && (base == (1 + dram_ranges[bank - 1].last_page))) + bank--; + else { num_banks++; - if (num_banks >= MAX_BANKS) - break; - dram_ranges[num_banks - 1].first_page = base; + if (num_banks >= MAX_BANKS) break; + bcopy(&dram_ranges[bank], + &dram_ranges[bank + 1], + (num_banks - bank - 1) * sizeof(hibernate_bitmap_t)); + dram_ranges[bank].first_page = base; } - dram_ranges[num_banks - 1].last_page = base + num - 1; + dram_ranges[bank].last_page = base + num - 1; break; // runtime services will be restarted, so no save @@ -129,7 +146,7 @@ hibernate_page_list_allocate(void) if (!list) return (list); - list->list_size = size; + list->list_size = (uint32_t)size; list->page_count = page_count; list->bank_count = num_banks; @@ -168,10 +185,12 @@ hibernate_page_list_set_volatile( hibernate_page_list_t * page_list, { boot_args * args = (boot_args *) PE_state.bootArgs; +#if !defined(x86_64) hibernate_set_page_state(page_list, page_list_wired, I386_HIB_PAGETABLE, I386_HIB_PAGETABLE_COUNT, kIOHibernatePageStateFree); *pagesOut -= I386_HIB_PAGETABLE_COUNT; +#endif if (args->efiRuntimeServicesPageStart) { @@ -192,6 +211,11 @@ hibernate_processor_setup(IOHibernateImageHeader * header) header->runtimePages = args->efiRuntimeServicesPageStart; header->runtimePageCount = args->efiRuntimeServicesPageCount; + if (args->Version == kBootArgsVersion1 && args->Revision >= kBootArgsRevision1_5) { + header->runtimeVirtualPages = args->efiRuntimeServicesVirtualPageStart; + } else { + header->runtimeVirtualPages = 0; + } return (KERN_SUCCESS); } @@ -202,7 +226,19 @@ hibernate_vm_lock(void) if (current_cpu_datap()->cpu_hibernate) { vm_page_lock_queues(); - mutex_lock(&vm_page_queue_free_lock); + lck_mtx_lock(&vm_page_queue_free_lock); + + if (vm_page_local_q) { + uint32_t i; + + for (i = 0; i < vm_page_local_q_count; i++) { + struct vpl *lq; + + lq = &vm_page_local_q[i].vpl_un.vpl; + + VPL_LOCK(&lq->vpl_lock); + } + } } } @@ -211,7 +247,18 @@ hibernate_vm_unlock(void) { if (current_cpu_datap()->cpu_hibernate) { - mutex_unlock(&vm_page_queue_free_lock); + if (vm_page_local_q) { + uint32_t i; + + for (i = 0; i < vm_page_local_q_count; i++) { + struct vpl *lq; + + lq = &vm_page_local_q[i].vpl_un.vpl; + + VPL_UNLOCK(&lq->vpl_lock); + } + } + lck_mtx_unlock(&vm_page_queue_free_lock); vm_page_unlock_queues(); } } diff --git a/osfmk/i386/hibernate_restore.c b/osfmk/i386/hibernate_restore.c new file mode 100644 index 000000000..c1dfd4e16 --- /dev/null +++ b/osfmk/i386/hibernate_restore.c @@ -0,0 +1,74 @@ +/* + * Copyright (c) 2008 Apple Inc. All rights reserved. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. The rights granted to you under the License + * may not be used to create, or enable the creation or redistribution of, + * unlawful or unlicensed copies of an Apple operating system, or to + * circumvent, violate, or enable the circumvention or violation of, any + * terms of an Apple operating system software license agreement. + * + * Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ + */ +#include +#include +#include + +extern pd_entry_t BootstrapPTD[2048]; + +#define TWO_MEG_MASK 0xFFFFFFFFFFE00000ULL + +#define DST_INDEX 2047UL + +static char *dstPtr = (char *)(DST_INDEX << PDSHIFT); + +// src is virtually mapped, not page aligned, +// dst is a physical 4k page aligned ptr, len is one 4K page +// src & dst will not overlap + +void +hibernate_restore_phys_page(uint64_t src, uint64_t dst, uint32_t len, uint32_t procFlags) +{ + (void)procFlags; + uint64_t * d; + uint64_t * s; + uint32_t idx; + + if (src == 0) + return; + + if (dst < (uint64_t) (uintptr_t)dstPtr) + { + d = (uint64_t *) (uintptr_t)dst; + } + else + { + /* Outside 1-1 4G map so set up the mappings for the dest page using 2MB pages */ + BootstrapPTD[DST_INDEX] = (dst & TWO_MEG_MASK) | INTEL_PTE_PS | INTEL_PTE_VALID | INTEL_PTE_WRITE | INTEL_PTE_WRITE; + + /* Invalidate the page tables for this */ + invlpg((uintptr_t) dstPtr); + + /* Mask off the offset from the 2MB window */ + dst &= ~TWO_MEG_MASK; + d = (uint64_t *) (dstPtr + dst); + } + s = (uint64_t *) (uintptr_t)src; + for (idx = 0; idx < (len / (uint32_t)sizeof(uint64_t)); idx++) + d[idx] = s[idx]; +} diff --git a/osfmk/i386/hibernate_restore.s b/osfmk/i386/hibernate_restore.s deleted file mode 100644 index 756de43a4..000000000 --- a/osfmk/i386/hibernate_restore.s +++ /dev/null @@ -1,273 +0,0 @@ -/* - * Copyright (c) 2004 Apple Computer, Inc. All rights reserved. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ - * - * This file contains Original Code and/or Modifications of Original Code - * as defined in and that are subject to the Apple Public Source License - * Version 2.0 (the 'License'). You may not use this file except in - * compliance with the License. The rights granted to you under the License - * may not be used to create, or enable the creation or redistribution of, - * unlawful or unlicensed copies of an Apple operating system, or to - * circumvent, violate, or enable the circumvention or violation of, any - * terms of an Apple operating system software license agreement. - * - * Please obtain a copy of the License at - * http://www.opensource.apple.com/apsl/ and read it before using this file. - * - * The Original Code and all software distributed under the License are - * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER - * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, - * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. - * Please see the License for the specific language governing rights and - * limitations under the License. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ - */ - -#include -#include - -#include -#include - -/* -This code is linked into the kernel but part of the "__HIB" section, which means -its used by code running in the special context of restoring the kernel text and data -from the hibernation image read by the booter. hibernate_kernel_entrypoint() and everything -it calls or references (ie. hibernate_restore_phys_page()) -needs to be careful to only touch memory also in the "__HIB" section. -*/ - -/* - * GAS won't handle an intersegment jump with a relocatable offset. - */ -#define LJMP(segment,address) \ - .byte 0xea ;\ - .long address ;\ - .word segment - -/* Location of temporary page tables */ -#define HPTD (0x13000) -#define HPDPT (0x17000) - -#define LAST_PAGE (0xFFE00000) -#define LAST_PAGE_PDE (0x7ff) - -/* - * fillpse - * eax = physical page address - * ebx = index into page table - * ecx = how many pages to map - * base = base address of page dir/table - * prot = protection bits - */ -#define fillpse(base, prot) \ - shll $3,%ebx ; \ - addl base,%ebx ; \ - orl $(PTE_V|PTE_PS|0x60), %eax ; \ - orl prot,%eax ; \ - xorl %edx, %edx ; \ -1: movl %eax,(%ebx) ; /* low 32b */ \ - addl $4,%ebx ; \ - movl %edx,(%ebx) ; /* high 32b */ \ - addl $(1 << PDESHIFT),%eax ; /* increment physical address 2Mb */ \ - addl $4,%ebx ; /* next entry */ \ - loop 1b - - - -/* Segment Descriptor - * - * 31 24 19 16 7 0 - * ------------------------------------------------------------ - * | | |B| |A| | | |1|0|E|W|A| | - * | BASE 31..24 |G|/|0|V| LIMIT |P|DPL| TYPE | BASE 23:16 | - * | | |D| |L| 19..16| | |1|1|C|R|A| | - * ------------------------------------------------------------ - * | | | - * | BASE 15..0 | LIMIT 15..0 | - * | | | - * ------------------------------------------------------------ - */ - - .align ALIGN -ENTRY(hib_gdt) - .word 0, 0 /* 0x0 : null */ - .byte 0, 0, 0, 0 - - .word 0xffff, 0x0000 /* 0x8 : code */ - .byte 0, 0x9e, 0xcf, 0 - - .word 0xffff, 0x0000 /* 0x10 : data */ - .byte 0, 0x92, 0xcf, 0 - -ENTRY(hib_gdtr) - .word 24 /* limit (8*3 segs) */ - .long EXT(hib_gdt) - -/* - * Hibernation code restarts here. Steal some pages from 0x10000 - * to 0x90000 for pages tables and directories etc to temporarily - * map the hibernation code (put at 0x100000 (phys) by the booter - * and linked to 0xC0100000 by the linker) to 0xC0100000 so it can - * execute. It's self-contained and won't make any references outside - * of itself. - * - * On the way down it has to save IdlePTD (and if PAE also IdlePDPT) - * and after it runs it has to restore those and load IdlePTD (or - * IdlePDPT if PAE) into %cr3 to re-establish the original mappings - */ - - .align ALIGN - .globl EXT(hibernate_machine_entrypoint) -LEXT(hibernate_machine_entrypoint) - cli - - mov %eax, %edi - - POSTCODE(0x1) - - /* Map physical memory from zero to LAST_PAGE */ - xorl %eax, %eax - xorl %ebx, %ebx - movl $(LAST_PAGE_PDE), %ecx - fillpse( $(HPTD), $(PTE_W) ) - - movl $(HPDPT), %ebx - movl $(HPTD), %eax - orl $(PTE_V), %eax - - xorl %edx, %edx ; \ - - movl %eax,(%ebx) ; /* low 32b */ \ - addl $4,%ebx ; \ - movl %edx,(%ebx) ; /* high 32b */ \ - addl $4,%ebx ; \ - addl $(1 << 12),%eax ; /* increment physical address 1Gb */ \ - - movl %eax,(%ebx) ; /* low 32b */ \ - addl $4,%ebx ; \ - movl %edx,(%ebx) ; /* high 32b */ \ - addl $4,%ebx ; \ - addl $(1 << 12),%eax ; /* increment physical address 1Gb */ \ - - movl %eax,(%ebx) ; /* low 32b */ \ - addl $4,%ebx ; \ - movl %edx,(%ebx) ; /* high 32b */ \ - addl $4,%ebx ; \ - addl $(1 << 12),%eax ; /* increment physical address 1Gb */ \ - - movl %eax,(%ebx) ; /* low 32b */ - addl $4,%ebx ; - movl %edx,(%ebx) ; /* high 32b */ \ - addl $4,%ebx ; \ - addl $(1 << 12),%eax ; /* increment physical address 1Gb */ \ - - /* set page dir ptr table addr */ - movl $(HPDPT), %eax - movl %eax, %cr3 - - POSTCODE(0x3) - - movl %cr4,%eax - orl $(CR4_PAE),%eax - movl %eax,%cr4 /* enable page size extensions */ - - movl $(MSR_IA32_EFER), %ecx /* MSR number in ecx */ - rdmsr /* MSR value return in edx: eax */ - orl $(MSR_IA32_EFER_NXE), %eax /* Set NXE bit in low 32-bits */ - wrmsr /* Update Extended Feature Enable reg */ - - movl %cr0, %eax - orl $(CR0_PG|CR0_WP|CR0_PE), %eax - movl %eax, %cr0 /* ready paging */ - - POSTCODE(0x4) - - lgdt EXT(gdtptr) /* load GDT */ - lidt EXT(idtptr) /* load IDT */ - - POSTCODE(0x5) - - LJMP (KERNEL_CS,EXT(hstart)) /* paging on and go to correct vaddr */ - -/* Hib restart code now running with correct addresses */ -LEXT(hstart) - POSTCODE(0x6) - - mov $(KERNEL_DS),%ax /* set kernel data segment */ - mov %ax,%ds - mov %ax,%es - mov %ax,%ss - - mov $0,%ax /* fs must be zeroed; */ - mov %ax,%fs /* some bootstrappers don`t do this */ - mov %ax,%gs - - lea EXT(gIOHibernateRestoreStackEnd),%esp /* switch to the bootup stack */ - - POSTCODE(0x7) - - xorl %eax, %eax /* Video memory - N/A */ - pushl %eax - pushl %eax - pushl %eax - mov %edi, %eax /* Pointer to hibernate header */ - pushl %eax - call EXT(hibernate_kernel_entrypoint) - /* NOTREACHED */ - hlt - -/* -void -hibernate_restore_phys_page(uint64_t src, uint64_t dst, uint32_t len, uint32_t procFlags); -*/ - - .align 5 - .globl EXT(hibernate_restore_phys_page) - - /* XXX can only deal with exactly one page */ -LEXT(hibernate_restore_phys_page) - pushl %edi - pushl %esi - - movl 8+ 4(%esp),%esi /* source virtual address */ - addl $0, %esi - jz 3f /* If source == 0, nothing to do */ - - movl 8+ 16(%esp),%eax /* destination physical address, high 32 bits */ - movl 8+ 12(%esp),%edi /* destination physical address, low 32 bits */ - addl $0, %eax - jne 1f /* need to map, above LAST_PAGE */ - - cmpl $(LAST_PAGE), %edi - jb 2f /* no need to map, below LAST_PAGE */ -1: - /* Map physical address %eax:%edi to virt. address LAST_PAGE (4GB - 2MB) */ - movl %eax, (HPTD + (LAST_PAGE_PDE * 8) + 4) - movl %edi, %eax /* destination physical address */ - andl $(LAST_PAGE), %eax - orl $(PTE_V | PTE_PS | PTE_W), %eax - movl %eax, (HPTD + (LAST_PAGE_PDE * 8)) - orl $(LAST_PAGE), %edi - invlpg (%edi) - -2: - movl 8+ 20(%esp),%edx /* number of bytes */ - cld - /* move longs*/ - movl %edx,%ecx - shrl $2,%ecx - rep - movsl - /* move bytes*/ - movl %edx,%ecx - andl $3,%ecx - rep - movsb -3: - popl %esi - popl %edi - ret diff --git a/osfmk/i386/hpet.c b/osfmk/i386/hpet.c new file mode 100644 index 000000000..994ba06b5 --- /dev/null +++ b/osfmk/i386/hpet.c @@ -0,0 +1,547 @@ +/* + * Copyright (c) 2005-2006 Apple Computer, Inc. All rights reserved. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. The rights granted to you under the License + * may not be used to create, or enable the creation or redistribution of, + * unlawful or unlicensed copies of an Apple operating system, or to + * circumvent, violate, or enable the circumvention or violation of, any + * terms of an Apple operating system software license agreement. + * + * Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#if MACH_KDB +#include +#include +#include +#include +#include +#include +#include +#include +#endif /* MACH_KDB */ + +/* Decimal powers: */ +#define kilo (1000ULL) +#define Mega (kilo * kilo) +#define Giga (kilo * Mega) +#define Tera (kilo * Giga) +#define Peta (kilo * Tera) + +vm_offset_t hpetArea = 0; +uint32_t hpetAreap = 0; +uint64_t hpetFemto = 0; +uint64_t hpetFreq = 0; +uint64_t hpetCvt = 0; /* (TAKE OUT LATER) */ +uint64_t hpetCvtt2n = 0; +uint64_t hpetCvtn2t = 0; +uint64_t tsc2hpet = 0; +uint64_t hpet2tsc = 0; +uint64_t bus2hpet = 0; +uint64_t hpet2bus = 0; + +vm_offset_t rcbaArea = 0; +uint32_t rcbaAreap = 0; + +static int (*hpet_req)(uint32_t apicid, void *arg, hpetRequest_t *hpet) = NULL; +static void *hpet_arg = NULL; + +#if DEBUG +#define DBG(x...) kprintf("DBG: " x) +#else +#define DBG(x...) +#endif + +int +hpet_register_callback(int (*hpet_reqst)(uint32_t apicid, + void *arg, + hpetRequest_t *hpet), + void *arg) +{ + hpet_req = hpet_reqst; + hpet_arg = arg; + return(0); +} + +/* + * This routine is called to obtain an HPET and have it assigned + * to a CPU. It returns 0 if successful and non-zero if one could + * not be assigned. + */ +int +hpet_request(uint32_t cpu) +{ + hpetRequest_t hpetReq; + int rc; + x86_lcpu_t *lcpu; + x86_core_t *core; + x86_pkg_t *pkg; + boolean_t enabled; + + if (hpet_req == NULL) { + return(-1); + } + + /* + * Deal with the case where the CPU # passed in is past the + * value specified in cpus=n in boot-args. + */ + if (cpu >= real_ncpus) { + enabled = ml_set_interrupts_enabled(FALSE); + lcpu = cpu_to_lcpu(cpu); + if (lcpu != NULL) { + core = lcpu->core; + pkg = core->package; + + if (lcpu->primary) { + pkg->flags |= X86PKG_FL_HAS_HPET; + } + } + + ml_set_interrupts_enabled(enabled); + return(0); + } + + rc = (*hpet_req)(ml_get_apicid(cpu), hpet_arg, &hpetReq); + if (rc != 0) { + return(rc); + } + + enabled = ml_set_interrupts_enabled(FALSE); + lcpu = cpu_to_lcpu(cpu); + core = lcpu->core; + pkg = core->package; + + /* + * Compute the address of the HPET. + */ + core->Hpet = (hpetTimer_t *)((uint8_t *)hpetArea + hpetReq.hpetOffset); + core->HpetVec = hpetReq.hpetVector; + + /* + * Enable interrupts + */ + core->Hpet->Config |= Tn_INT_ENB_CNF; + + /* + * Save the configuration + */ + core->HpetCfg = core->Hpet->Config; + core->HpetCmp = 0; + + /* + * If the CPU is the "primary" for the package, then + * add the HPET to the package too. + */ + if (lcpu->primary) { + pkg->Hpet = core->Hpet; + pkg->HpetCfg = core->HpetCfg; + pkg->HpetCmp = core->HpetCmp; + pkg->flags |= X86PKG_FL_HAS_HPET; + } + + ml_set_interrupts_enabled(enabled); + + return(0); +} + +/* + * Map the RCBA area. + */ +static void +map_rcbaArea(void) +{ + /* + * Get RCBA area physical address and map it + */ + outl(cfgAdr, lpcCfg | (0xF0 & 0xFC)); + rcbaAreap = inl(cfgDat | (0xF0 & 0x03)); + rcbaArea = io_map_spec(rcbaAreap & -4096, PAGE_SIZE * 4, VM_WIMG_IO); + kprintf("RCBA: vaddr = %lX, paddr = %08X\n", (unsigned long)rcbaArea, rcbaAreap); +} + +/* + * Initialize the HPET + */ +void +hpet_init(void) +{ + unsigned int *xmod; + + map_rcbaArea(); + + /* + * Is the HPET memory already enabled? + * If not, set address and enable. + */ + xmod = (uint32_t *)(rcbaArea + 0x3404); /* Point to the HPTC */ + uint32_t hptc = *xmod; /* Get HPET config */ + DBG(" current RCBA.HPTC: %08X\n", *xmod); + if(!(hptc & hptcAE)) { + DBG("HPET memory is not enabled, " + "enabling and assigning to 0xFED00000 (hope that's ok)\n"); + *xmod = (hptc & ~3) | hptcAE; + } + + /* + * Get physical address of HPET and map it. + */ + hpetAreap = hpetAddr | ((hptc & 3) << 12); + hpetArea = io_map_spec(hpetAreap & -4096, PAGE_SIZE * 4, VM_WIMG_IO); + kprintf("HPET: vaddr = %lX, paddr = %08X\n", (unsigned long)hpetArea, hpetAreap); + + /* + * Extract the HPET tick rate. + * The period of the HPET is reported in femtoseconds (10**-15s) + * and convert to frequency in hertz. + */ + hpetFemto = (uint32_t)(((hpetReg_t *)hpetArea)->GCAP_ID >> 32); + hpetFreq = (1 * Peta) / hpetFemto; + + /* + * The conversion factor is the number of nanoseconds per HPET tick + * with about 32 bits of fraction. The value is converted to a + * base-2 fixed point number. To convert from HPET to nanoseconds, + * multiply the value by the conversion factor using 96-bit arithmetic, + * then shift right 32 bits. If the value is known to be small, + * 64-bit arithmetic will work. + */ + + /* + * Begin conversion of base 10 femtoseconds to base 2, calculate: + * - HPET ticks to nanoseconds conversion in base 2 fraction (* 2**32) + * - nanoseconds to HPET ticks conversion + */ + hpetCvtt2n = (uint64_t)hpetFemto << 32; + hpetCvtt2n = hpetCvtt2n / 1000000ULL; + hpetCvtn2t = 0xFFFFFFFFFFFFFFFFULL / hpetCvtt2n; + kprintf("HPET: Frequency = %6d.%04dMHz, " + "cvtt2n = %08X.%08X, cvtn2t = %08X.%08X\n", + (uint32_t)(hpetFreq / Mega), (uint32_t)(hpetFreq % Mega), + (uint32_t)(hpetCvtt2n >> 32), (uint32_t)hpetCvtt2n, + (uint32_t)(hpetCvtn2t >> 32), (uint32_t)hpetCvtn2t); + + + /* (TAKE OUT LATER) + * Begin conversion of base 10 femtoseconds to base 2 + * HPET ticks to nanoseconds in base 2 fraction (times 1048576) + */ + hpetCvt = (uint64_t)hpetFemto << 20; + hpetCvt = hpetCvt / 1000000ULL; + + /* Calculate conversion from TSC to HPET */ + tsc2hpet = tmrCvt(tscFCvtt2n, hpetCvtn2t); + DBG(" CVT: TSC to HPET = %08X.%08X\n", + (uint32_t)(tsc2hpet >> 32), (uint32_t)tsc2hpet); + + /* Calculate conversion from HPET to TSC */ + hpet2tsc = tmrCvt(hpetCvtt2n, tscFCvtn2t); + DBG(" CVT: HPET to TSC = %08X.%08X\n", + (uint32_t)(hpet2tsc >> 32), (uint32_t)hpet2tsc); + + /* Calculate conversion from BUS to HPET */ + bus2hpet = tmrCvt(busFCvtt2n, hpetCvtn2t); + DBG(" CVT: BUS to HPET = %08X.%08X\n", + (uint32_t)(bus2hpet >> 32), (uint32_t)bus2hpet); + + /* Calculate conversion from HPET to BUS */ + hpet2bus = tmrCvt(hpetCvtt2n, busFCvtn2t); + DBG(" CVT: HPET to BUS = %08X.%08X\n", + (uint32_t)(hpet2bus >> 32), (uint32_t)hpet2bus); + +#if MACH_KDB + db_display_hpet((hpetReg_t *)hpetArea); /* (BRINGUP) */ +#endif +} + +/* + * This routine is used to get various information about the HPET + * without having to export gobs of globals. It fills in a data + * structure with the info. + */ +void +hpet_get_info(hpetInfo_t *info) +{ + info->hpetCvtt2n = hpetCvtt2n; + info->hpetCvtn2t = hpetCvtn2t; + info->tsc2hpet = tsc2hpet; + info->hpet2tsc = hpet2tsc; + info->bus2hpet = bus2hpet; + info->hpet2bus = hpet2bus; + /* + * XXX + * We're repurposing the rcbaArea so we can use the HPET. + * Eventually we'll rename this correctly. + */ + info->rcbaArea = hpetArea; + info->rcbaAreap = hpetAreap; +} + + +/* + * This routine is called by the HPET driver + * when it assigns an HPET timer to a processor. + * + * XXX with the new callback into the HPET driver, + * this routine will be deprecated. + */ +void +ml_hpet_cfg(uint32_t cpu, uint32_t hpetVect) +{ + uint64_t *hpetVaddr; + hpetTimer_t *hpet; + x86_lcpu_t *lcpu; + x86_core_t *core; + x86_pkg_t *pkg; + boolean_t enabled; + + if(cpu > 1) { + panic("ml_hpet_cfg: invalid cpu = %d\n", cpu); + } + + lcpu = cpu_to_lcpu(cpu); + core = lcpu->core; + pkg = core->package; + + /* + * Only deal with the primary CPU for the package. + */ + if (!lcpu->primary) + return; + + enabled = ml_set_interrupts_enabled(FALSE); + + /* Calculate address of the HPET for this processor */ + hpetVaddr = (uint64_t *)(((uintptr_t)&(((hpetReg_t *)hpetArea)->TIM1_CONF)) + (cpu << 5)); + hpet = (hpetTimer_t *)hpetVaddr; + + DBG("ml_hpet_cfg: HPET for cpu %d at %p, vector = %d\n", + cpu, hpetVaddr, hpetVect); + + /* Save the address and vector of the HPET for this processor */ + core->Hpet = hpet; + core->HpetVec = hpetVect; + + /* + * Enable interrupts + */ + core->Hpet->Config |= Tn_INT_ENB_CNF; + + /* Save the configuration */ + core->HpetCfg = core->Hpet->Config; + core->HpetCmp = 0; + + /* + * We're only doing this for the primary CPU, so go + * ahead and add the HPET to the package too. + */ + pkg->Hpet = core->Hpet; + pkg->HpetVec = core->HpetVec; + pkg->HpetCfg = core->HpetCfg; + pkg->HpetCmp = core->HpetCmp; + pkg->flags |= X86PKG_FL_HAS_HPET; + + ml_set_interrupts_enabled(enabled); +} + +/* + * This is the HPET interrupt handler. + * + * It just hands off to the power management code so that the + * appropriate things get done there. + */ +int +HPETInterrupt(void) +{ + + /* All we do here is to bump the count */ + x86_package()->HpetInt++; + + /* + * Let power management do it's thing. + */ + pmHPETInterrupt(); + + /* Return and show that the 'rupt has been handled... */ + return 1; +} + + +static hpetReg_t saved_hpet; + +void +hpet_save(void) +{ + hpetReg_t *from = (hpetReg_t *) hpetArea; + hpetReg_t *to = &saved_hpet; + + to->GEN_CONF = from->GEN_CONF; + to->TIM0_CONF = from->TIM0_CONF; + to->TIM0_COMP = from->TIM0_COMP; + to->TIM1_CONF = from->TIM1_CONF; + to->TIM1_COMP = from->TIM1_COMP; + to->TIM2_CONF = from->TIM2_CONF; + to->TIM2_COMP = from->TIM2_COMP; + to->MAIN_CNT = from->MAIN_CNT; +} + +void +hpet_restore(void) +{ + hpetReg_t *from = &saved_hpet; + hpetReg_t *to = (hpetReg_t *) hpetArea; + + /* + * Is the HPET memory already enabled? + * If not, set address and enable. + */ + uint32_t *hptcp = (uint32_t *)(rcbaArea + 0x3404); + uint32_t hptc = *hptcp; + if(!(hptc & hptcAE)) { + DBG("HPET memory is not enabled, " + "enabling and assigning to 0xFED00000 (hope that's ok)\n"); + *hptcp = (hptc & ~3) | hptcAE; + } + + to->GEN_CONF = from->GEN_CONF & ~1; + + to->TIM0_CONF = from->TIM0_CONF; + to->TIM0_COMP = from->TIM0_COMP; + to->TIM1_CONF = from->TIM1_CONF; + to->TIM1_COMP = from->TIM1_COMP; + to->TIM2_CONF = from->TIM2_CONF; + to->TIM2_COMP = from->TIM2_COMP; + to->GINTR_STA = -1ULL; + to->MAIN_CNT = from->MAIN_CNT; + + to->GEN_CONF = from->GEN_CONF; +} + +/* + * Read the HPET timer + * + */ +uint64_t +rdHPET(void) +{ + hpetReg_t *hpetp = (hpetReg_t *) hpetArea; + volatile uint32_t *regp = (uint32_t *) &hpetp->MAIN_CNT; + uint32_t high; + uint32_t low; + + do { + high = *(regp + 1); + low = *regp; + } while (high != *(regp + 1)); + + return (((uint64_t) high) << 32) | low; +} + +#if MACH_KDB + +#define HI32(x) ((uint32_t)(((x) >> 32) & 0xFFFFFFFF)) +#define LO32(x) ((uint32_t)((x) & 0xFFFFFFFF)) + +/* + * Displays HPET memory mapped area + * hp + */ +void +db_hpet(__unused db_expr_t addr, __unused int have_addr, __unused db_expr_t count, __unused char *modif) +{ + + db_display_hpet((hpetReg_t *) hpetArea); /* Dump out the HPET + * stuff */ + return; +} + +void +db_display_hpet(hpetReg_t *hpt) +{ + uint64_t cmain; + + cmain = hpt->MAIN_CNT; /* Get the main timer */ + + /* General capabilities */ + db_printf(" GCAP_ID = %08X.%08X\n", + HI32(hpt->GCAP_ID), LO32(hpt->GCAP_ID)); + /* General configuration */ + db_printf(" GEN_CONF = %08X.%08X\n", + HI32(hpt->GEN_CONF), LO32(hpt->GEN_CONF)); + /* General Interrupt status */ + db_printf("GINTR_STA = %08X.%08X\n", + HI32(hpt->GINTR_STA), LO32(hpt->GINTR_STA)); + /* Main counter */ + db_printf(" MAIN_CNT = %08X.%08X\n", + HI32(cmain), LO32(cmain)); + /* Timer 0 config and cap */ + db_printf("TIM0_CONF = %08X.%08X\n", + HI32(hpt->TIM0_CONF), LO32(hpt->TIM0_CONF)); + /* Timer 0 comparator */ + db_printf("TIM0_COMP = %08X.%08X\n", + HI32(hpt->TIM0_COMP), LO32(hpt->TIM0_COMP)); + /* Timer 1 config and cap */ + db_printf("TIM0_CONF = %08X.%08X\n", + HI32(hpt->TIM1_CONF), LO32(hpt->TIM1_CONF)); + /* Timer 1 comparator */ + db_printf("TIM1_COMP = %08X.%08X\n", + HI32(hpt->TIM1_COMP), LO32(hpt->TIM1_COMP)); + /* Timer 2 config and cap */ + db_printf("TIM2_CONF = %08X.%08X\n", + HI32(hpt->TIM2_CONF), LO32(hpt->TIM2_CONF)); + /* Timer 2 comparator */ + db_printf("TIM2_COMP = %08X.%08X\n", + HI32(hpt->TIM2_COMP), LO32(hpt->TIM2_COMP)); + + db_printf("\nHPET Frequency = %d.%05dMHz\n", + (uint32_t) (hpetFreq / 1000000), (uint32_t) (hpetFreq % 1000000)); +} +#endif diff --git a/osfmk/i386/hpet.h b/osfmk/i386/hpet.h new file mode 100644 index 000000000..6bc829bb5 --- /dev/null +++ b/osfmk/i386/hpet.h @@ -0,0 +1,126 @@ +/* + * Copyright (c) 2006 Apple Computer, Inc. All rights reserved. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. The rights granted to you under the License + * may not be used to create, or enable the creation or redistribution of, + * unlawful or unlicensed copies of an Apple operating system, or to + * circumvent, violate, or enable the circumvention or violation of, any + * terms of an Apple operating system software license agreement. + * + * Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ + */ +#ifdef KERNEL_PRIVATE +#ifndef _I386_HPET_H_ +#define _I386_HPET_H_ + +/* + * HPET kernel functions to support the HPET KEXT and the + * power management KEXT. + */ + + +/* + * Memory mapped registers for the HPET + */ +typedef struct hpetReg { + uint64_t GCAP_ID; /* General capabilities */ + uint64_t rsv1; + uint64_t GEN_CONF; /* General configuration */ + uint64_t rsv2; + uint64_t GINTR_STA; /* General Interrupt status */ + uint64_t rsv3[25]; + uint64_t MAIN_CNT; /* Main counter */ + uint64_t rsv4; + uint64_t TIM0_CONF; /* Timer 0 config and cap */ +#define TIM_CONF 0 +#define Tn_INT_ENB_CNF 4 + uint64_t TIM0_COMP; /* Timer 0 comparator */ +#define TIM_COMP 8 + uint64_t rsv5[2]; + uint64_t TIM1_CONF; /* Timer 1 config and cap */ + uint64_t TIM1_COMP; /* Timer 1 comparator */ + uint64_t rsv6[2]; + uint64_t TIM2_CONF; /* Timer 2 config and cap */ + uint64_t TIM2_COMP; /* Timer 2 comparator */ + uint64_t rsv7[2]; +} hpetReg; +typedef struct hpetReg hpetReg_t; + +typedef struct hpetTimer { + uint64_t Config; /* Timer config and capabilities */ + uint64_t Compare; /* Timer comparitor */ +} hpetTimer_t; + +struct hpetInfo +{ + uint64_t hpetCvtt2n; + uint64_t hpetCvtn2t; + uint64_t tsc2hpet; + uint64_t hpet2tsc; + uint64_t bus2hpet; + uint64_t hpet2bus; + uint32_t rcbaArea; + uint32_t rcbaAreap; +}; +typedef struct hpetInfo hpetInfo_t; + +struct hpetRequest +{ + uint32_t flags; + uint32_t hpetOffset; + uint32_t hpetVector; +}; +typedef struct hpetRequest hpetRequest_t; + +#define HPET_REQFL_64BIT 0x00000001 /* Timer is 64 bits */ + +extern uint64_t hpetFemto; +extern uint64_t hpetFreq; +extern uint64_t hpetCvtt2n; +extern uint64_t hpetCvtn2t; +extern uint64_t tsc2hpet; +extern uint64_t hpet2tsc; +extern uint64_t bus2hpet; +extern uint64_t hpet2bus; + +extern vm_offset_t rcbaArea; +extern uint32_t rcbaAreap; + +extern void map_rcbaAread(void); +extern void hpet_init(void); + +extern void hpet_save(void); +extern void hpet_restore(void); + +#ifdef XNU_KERNEL_PRIVATE +extern int HPETInterrupt(void); +#endif + +extern int hpet_register_callback(int (*hpet_reqst)(uint32_t apicid, void *arg, hpetRequest_t *hpet), void *arg); +extern int hpet_request(uint32_t cpu); + +extern uint64_t rdHPET(void); +extern void hpet_get_info(hpetInfo_t *info); + +#define hpetAddr 0xFED00000 +#define hptcAE 0x80 + +#endif /* _I386_HPET_H_ */ + +#endif /* KERNEL_PRIVATE */ diff --git a/pexpert/i386/pe_misc.s b/osfmk/i386/hw_defs.h similarity index 76% rename from pexpert/i386/pe_misc.s rename to osfmk/i386/hw_defs.h index a5b224b70..0fac10f3c 100644 --- a/pexpert/i386/pe_misc.s +++ b/osfmk/i386/hw_defs.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2005 Apple Computer, Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -25,26 +25,21 @@ * * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ */ -#include +#ifndef _I386_HW_DEFS_H_ +#define _I386_HW_DEFS_H_ -/* -** PE_get_timebase() -** -** Entry - %esp contains pointer to 64 bit structure. -** -** Exit - 64 bit structure filled in. -** -*/ -ENTRY(PE_get_timebase) - - movl S_ARG0, %ecx - - lfence - rdtsc - lfence - movl %edx, 0(%ecx) - movl %eax, 4(%ecx) +#define pmMwaitC1 0x00 +#define pmMwaitC2 0x10 +#define pmMwaitC3 0x20 +#define pmMwaitC4 0x30 +#define pmMwaitBrInt 0x1 - ret +#define pmBase 0x400 +#define pmCtl1 0x04 +#define pmCtl2 0x20 +#define pmC3Res 0x54 +#define pmStatus 0x00 +#define msrTSC 0x10 +#endif /* _I386_HW_DEFS_H_ */ diff --git a/osfmk/i386/hw_lock_types.h b/osfmk/i386/hw_lock_types.h index 1cf2d99dc..bfeee9407 100644 --- a/osfmk/i386/hw_lock_types.h +++ b/osfmk/i386/hw_lock_types.h @@ -90,7 +90,7 @@ * later in kern/lock.h.. */ struct hslock { - int lock_data; + long lock_data; }; typedef struct hslock hw_lock_data_t, *hw_lock_t; #define hw_lock_addr(hwl) (&((hwl).lock_data)) diff --git a/osfmk/i386/i386_init.c b/osfmk/i386/i386_init.c index 8005189dd..445c6afed 100644 --- a/osfmk/i386/i386_init.c +++ b/osfmk/i386/i386_init.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2003-2008 Apple Inc. All rights reserved. + * Copyright (c) 2003-2009 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -82,73 +82,352 @@ #include #include #include +#include #include +#include #include #include +#include #include +#if CONFIG_MCA #include +#endif #include #include #include #include #include /* LcksOpts */ +#ifdef __i386__ +#include +#if MACH_KDB +#include +#endif +#endif + +#if DEBUG +#define DBG(x...) kprintf(x) +#else +#define DBG(x...) +#endif #if MACH_KDB #include #endif /* MACH_KDB */ -#include -static boot_args *kernelBootArgs; +int debug_task; + +static boot_args *kernelBootArgs; + +extern int disableConsoleOutput; +extern const char version[]; +extern const char version_variant[]; +extern int nx_enabled; + +extern int noVMX; /* if set, rosetta should not emulate altivec */ + +#ifdef __x86_64__ +extern void *low_eintstack; +#endif + +extern void serial_init(void); + +void *KPTphys; +pd_entry_t *IdlePTD; +#ifdef __i386__ +pd_entry_t *IdlePDPT64; +#endif + + +char *physfree; + +/* + * Note: ALLOCPAGES() can only be used safely within Idle_PTs_init() + * due to the mutation of physfree. + */ +static void * +ALLOCPAGES(int npages) +{ + uintptr_t tmp = (uintptr_t)physfree; + bzero(physfree, npages * PAGE_SIZE); + physfree += npages * PAGE_SIZE; +#ifdef __x86_64__ + tmp += VM_MIN_KERNEL_ADDRESS & ~LOW_4GB_MASK; +#endif + return (void *)tmp; +} + +static void +fillkpt(pt_entry_t *base, int prot, uintptr_t src, int index, int count) +{ + int i; + for (i=0; i> PDESHIFT,1); +#endif + // Fill the lowest level with everything up to physfree + fillkpt(KPTphys, + INTEL_PTE_WRITE, 0, 0, (int)(((uintptr_t)physfree) >> PAGE_SHIFT)); + + // Rewrite the 2nd-lowest level to point to pages of KPTphys. + // This was previously filled statically by idle_pt.c, and thus + // must be done after the KPTphys fill since IdlePTD is in use + fillkpt(IdlePTD, + INTEL_PTE_WRITE, (uintptr_t)ID_MAP_VTOP(KPTphys), 0, NKPT); + + // IdlePDPT entries +#ifdef __i386__ + fillkpt(IdlePDPT, 0, (uintptr_t)IdlePTD, 0, NPGPTD); +#else + fillkpt(IdlePDPT, INTEL_PTE_WRITE, (uintptr_t)ID_MAP_VTOP(IdlePTD), 0, NPGPTD); +#endif + + // Flush the TLB now we're done rewriting the page tables.. + set_cr3(get_cr3()); +} + +/* + * vstart() is called in the natural mode (64bit for K64, 32 for K32) + * on a set of bootstrap pagetables which use large, 2MB pages to map + * all of physical memory in both. See idle_pt.c for details. + * + * In K64 this identity mapping is mirrored the top and bottom 512GB + * slots of PML4. + * + * The bootstrap processor called with argument boot_args_start pointing to + * the boot-args block. The kernel's (4K page) page tables are allocated and + * initialized before switching to these. + * + * Non-bootstrap processors are called with argument boot_args_start NULL. + * These processors switch immediately to the existing kernel page tables. + */ +void +vstart(vm_offset_t boot_args_start) +{ + boolean_t is_boot_cpu = !(boot_args_start == 0); + int cpu; + uint32_t lphysfree; + + postcode(VSTART_ENTRY); + + if (is_boot_cpu) { + /* + * Get startup parameters. + */ + kernelBootArgs = (boot_args *)boot_args_start; + lphysfree = kernelBootArgs->kaddr + kernelBootArgs->ksize; + physfree = (void *)(uintptr_t)((lphysfree + PAGE_SIZE - 1) &~ (PAGE_SIZE - 1)); +#if DEBUG + serial_init(); +#endif + DBG("revision 0x%x\n", kernelBootArgs->Revision); + DBG("version 0x%x\n", kernelBootArgs->Version); + DBG("command line %s\n", kernelBootArgs->CommandLine); + DBG("memory map 0x%x\n", kernelBootArgs->MemoryMap); + DBG("memory map sz 0x%x\n", kernelBootArgs->MemoryMapSize); + DBG("kaddr 0x%x\n", kernelBootArgs->kaddr); + DBG("ksize 0x%x\n", kernelBootArgs->ksize); + DBG("physfree %p\n", physfree); + DBG("bootargs: %p, &ksize: %p &kaddr: %p\n", + kernelBootArgs, + &kernelBootArgs->ksize, + &kernelBootArgs->kaddr); + + postcode(PSTART_PAGE_TABLES); + + Idle_PTs_init(); + + first_avail = (vm_offset_t)ID_MAP_VTOP(physfree); + + cpu = 0; + } else { + /* Find our logical cpu number */ + cpu = lapic_to_cpu[(LAPIC_READ(ID)>>LAPIC_ID_SHIFT) & LAPIC_ID_MASK]; + } -extern int noVMX; /* if set, rosetta should not emulate altivec */ + if(is_boot_cpu) cpu_data_alloc(TRUE); +#ifdef __x86_64__ + if(is_boot_cpu) + cpu_desc_init64(cpu_datap(cpu)); + cpu_desc_load64(cpu_datap(cpu)); +#else + if(is_boot_cpu) + cpu_desc_init(cpu_datap(cpu)); + cpu_desc_load(cpu_datap(cpu)); +#endif + cpu_mode_init(current_cpu_datap()); + + /* enable NX/XD */ + if (cpuid_extfeatures() & CPUID_EXTFEATURE_XD) + wrmsr64(MSR_IA32_EFER, rdmsr64(MSR_IA32_EFER) | MSR_IA32_EFER_NXE); + DBG("vstart() NX/XD enabled\n"); + + +#ifdef __x86_64__ + /* Done with identity mapping */ + IdlePML4[0] = 0; +#endif + + postcode(VSTART_EXIT); +#ifdef __i386__ + if (is_boot_cpu) + i386_init(boot_args_start); + else + i386_init_slave(); + /*NOTREACHED*/ +#else + /* We need to switch to a new per-cpu stack, but we must do this atomically with + * the call to ensure the compiler doesn't assume anything about the stack before + * e.g. tail-call optimisations + */ + if (is_boot_cpu) + { + asm volatile( + "mov %1, %%rdi;" + "mov %0, %%rsp;" + "call _i386_init;" : : "r" + (cpu_datap(cpu)->cpu_int_stack_top), "r" (boot_args_start)); + } + else + { + asm volatile( + "mov %0, %%rsp;" + "call _i386_init_slave;" : : "r" + (cpu_datap(cpu)->cpu_int_stack_top)); + } + /*NOTREACHED*/ +#endif +} /* * Cpu initialization. Running virtual, but without MACH VM - * set up. First C routine called. + * set up. */ void i386_init(vm_offset_t boot_args_start) { unsigned int maxmem; uint64_t maxmemtouse; - unsigned int cpus; - boolean_t legacy_mode; + unsigned int cpus = 0; boolean_t fidn; +#ifdef __i386__ + boolean_t legacy_mode; +#endif + boolean_t IA32e = TRUE; postcode(I386_INIT_ENTRY); - i386_macho_zerofill(); - +#if CONFIG_MCA /* Initialize machine-check handling */ mca_cpu_init(); +#endif /* * Setup boot args given the physical start address. */ kernelBootArgs = (boot_args *) ml_static_ptovirt(boot_args_start); - kernelBootArgs->MemoryMap = (uint32_t) - ml_static_ptovirt((vm_offset_t)kernelBootArgs->MemoryMap); - kernelBootArgs->deviceTreeP = (uint32_t) - ml_static_ptovirt((vm_offset_t)kernelBootArgs->deviceTreeP); + DBG("i386_init(0x%lx) kernelBootArgs=%p\n", + (unsigned long)boot_args_start, kernelBootArgs); master_cpu = 0; - (void) cpu_data_alloc(TRUE); cpu_init(); + postcode(CPU_INIT_D); + PE_init_platform(FALSE, kernelBootArgs); postcode(PE_INIT_PLATFORM_D); + printf_init(); /* Init this in case we need debugger */ panic_init(); /* Init this in case we need debugger */ + /* setup debugging output if one has been chosen */ PE_init_kprintf(FALSE); @@ -174,13 +453,14 @@ i386_init(vm_offset_t boot_args_start) if (!PE_parse_boot_argn("maxmem", &maxmem, sizeof (maxmem))) maxmemtouse = 0; else - maxmemtouse = ((uint64_t)maxmem) * (uint64_t)(1024 * 1024); + maxmemtouse = ((uint64_t)maxmem) * MB; if (PE_parse_boot_argn("cpus", &cpus, sizeof (cpus))) { if ((0 < cpus) && (cpus < max_ncpus)) max_ncpus = cpus; } + /* * debug support for > 4G systems */ @@ -191,21 +471,22 @@ i386_init(vm_offset_t boot_args_start) force_immediate_debugger_NMI = FALSE; else force_immediate_debugger_NMI = fidn; - +#ifdef __i386__ /* * At this point we check whether we are a 64-bit processor * and that we're not restricted to legacy mode, 32-bit operation. */ - boolean_t IA32e = FALSE; if (cpuid_extfeatures() & CPUID_EXTFEATURE_EM64T) { kprintf("EM64T supported"); if (PE_parse_boot_argn("-legacy", &legacy_mode, sizeof (legacy_mode))) { kprintf(" but legacy mode forced\n"); + IA32e = FALSE; } else { - IA32e = TRUE; kprintf(" and will be enabled\n"); } - } + } else + IA32e = FALSE; +#endif if (!(cpuid_extfeatures() & CPUID_EXTFEATURE_XD)) nx_enabled = 0; @@ -236,3 +517,86 @@ i386_init(vm_offset_t boot_args_start) machine_startup(); } + +static void +do_init_slave(boolean_t fast_restart) +{ + void *init_param = FULL_SLAVE_INIT; + + postcode(I386_INIT_SLAVE); + + if (!fast_restart) { + /* Ensure that caching and write-through are enabled */ + set_cr0(get_cr0() & ~(CR0_NW|CR0_CD)); + + DBG("i386_init_slave() CPU%d: phys (%d) active.\n", + get_cpu_number(), get_cpu_phys_number()); + + assert(!ml_get_interrupts_enabled()); + + cpu_mode_init(current_cpu_datap()); + +#if CONFIG_MCA + mca_cpu_init(); +#endif + + lapic_configure(); + LAPIC_DUMP(); + LAPIC_CPU_MAP_DUMP(); + + init_fpu(); + + mtrr_update_cpu(); + } else + init_param = FAST_SLAVE_INIT; + +#if CONFIG_VMX + /* resume VT operation */ + vmx_resume(); +#endif + + if (!fast_restart) + pat_init(); + + cpu_thread_init(); /* not strictly necessary */ + +#ifdef __x86_64__ + /* Re-zero the identity-map for the idle PT's. This MUST be done before + * cpu_running is set so that other slaves can set up their own + * identity-map */ + if (!fast_restart) + IdlePML4[0] = 0; +#endif + + cpu_init(); /* Sets cpu_running which starter cpu waits for */ + + slave_main(init_param); + + panic("do_init_slave() returned from slave_main()"); +} + +/* + * i386_init_slave() is called from pstart. + * We're in the cpu's interrupt stack with interrupts disabled. + * At this point we are in legacy mode. We need to switch on IA32e + * if the mode is set to 64-bits. + */ +void +i386_init_slave(void) +{ + do_init_slave(FALSE); +} + +/* + * i386_init_slave_fast() is called from pmCPUHalt. + * We're running on the idle thread and need to fix up + * some accounting and get it so that the scheduler sees this + * CPU again. + */ +void +i386_init_slave_fast(void) +{ + do_init_slave(TRUE); +} + + diff --git a/osfmk/i386/i386_lock.s b/osfmk/i386/i386_lock.s index e4d7f7c1e..267b4b0db 100644 --- a/osfmk/i386/i386_lock.s +++ b/osfmk/i386/i386_lock.s @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2007 Apple Inc. All rights reserved. + * Copyright (c) 2000-2008 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -42,17 +42,24 @@ #include #include #include - +#include + #include "assym.s" #define PAUSE rep; nop + +#define PUSHF pushf +#define POPF popf +#define CLI cli + + /* * When performance isn't the only concern, it's * nice to build stack frames... */ #define BUILD_STACK_FRAMES (GPROF || \ - ((MACH_LDEBUG || ETAP_LOCK_TRACE) && MACH_KDB)) + ((MACH_LDEBUG) && MACH_KDB)) #if BUILD_STACK_FRAMES @@ -114,20 +121,28 @@ ret -#define M_ILK (%edx) -#define M_LOCKED MUTEX_LOCKED(%edx) -#define M_WAITERS MUTEX_WAITERS(%edx) -#define M_PROMOTED_PRI MUTEX_PROMOTED_PRI(%edx) -#define M_ITAG MUTEX_ITAG(%edx) -#define M_PTR MUTEX_PTR(%edx) -#if MACH_LDEBUG -#define M_TYPE MUTEX_TYPE(%edx) -#define M_PC MUTEX_PC(%edx) -#define M_THREAD MUTEX_THREAD(%edx) -#endif /* MACH_LDEBUG */ +/* For x86_64, the varargs ABI requires that %al indicate + * how many SSE register contain arguments. In our case, 0 */ +#if __i386__ +#define LOAD_STRING_ARG0(label) pushl $##label ; +#define LOAD_ARG1(x) pushl x ; +#define CALL_PANIC() call EXT(panic) ; +#else +#define LOAD_STRING_ARG0(label) leaq label(%rip), %rdi ; +#define LOAD_ARG1(x) movq x, %rsi ; +#define CALL_PANIC() xorb %al,%al ; call EXT(panic) ; +#endif -#include -#define CX(addr,reg) addr(,reg,4) +#define CHECK_UNLOCK(current, owner) \ + cmp current, owner ; \ + je 1f ; \ + LOAD_STRING_ARG0(2f) ; \ + CALL_PANIC() ; \ + hlt ; \ + .data ; \ +2: String "Mutex unlock attempted from non-owner thread"; \ + .text ; \ +1: #if MACH_LDEBUG /* @@ -142,8 +157,8 @@ #define CHECK_MUTEX_TYPE() \ cmpl $ MUTEX_TAG,M_TYPE ; \ je 1f ; \ - pushl $2f ; \ - call EXT(panic) ; \ + LOAD_STRING_ARG0(2f) ; \ + CALL_PANIC() ; \ hlt ; \ .data ; \ 2: String "not a mutex!" ; \ @@ -158,110 +173,80 @@ */ #if MACH_RT #define CHECK_PREEMPTION_LEVEL() \ + cmpl $0,%gs:CPU_HIBERNATE ; \ + jne 1f ; \ cmpl $0,%gs:CPU_PREEMPTION_LEVEL ; \ je 1f ; \ - pushl $2f ; \ - call EXT(panic) ; \ + LOAD_ARG1(%gs:CPU_PREEMPTION_LEVEL) ; \ + LOAD_STRING_ARG0(2f) ; \ + CALL_PANIC() ; \ hlt ; \ .data ; \ -2: String "preemption_level != 0!" ; \ +2: String "preemption_level(%d) != 0!" ; \ .text ; \ 1: #else /* MACH_RT */ #define CHECK_PREEMPTION_LEVEL() #endif /* MACH_RT */ -#define CHECK_NO_SIMPLELOCKS() \ - cmpl $0,%gs:CPU_SIMPLE_LOCK_COUNT ; \ - je 1f ; \ - pushl $2f ; \ - call EXT(panic) ; \ - hlt ; \ - .data ; \ -2: String "simple_locks_held!" ; \ - .text ; \ -1: - -/* - * Verifies return to the correct thread in "unlock" situations. - */ -#define CHECK_THREAD(thd) \ - movl %gs:CPU_ACTIVE_THREAD,%ecx ; \ - testl %ecx,%ecx ; \ - je 1f ; \ - cmpl %ecx,thd ; \ - je 1f ; \ - pushl $2f ; \ - call EXT(panic) ; \ - hlt ; \ - .data ; \ -2: String "wrong thread!" ; \ - .text ; \ -1: - -#define CHECK_MYLOCK(thd) \ - movl %gs:CPU_ACTIVE_THREAD,%ecx ; \ - testl %ecx,%ecx ; \ - je 1f ; \ - cmpl %ecx,thd ; \ +#define CHECK_MYLOCK(current, owner) \ + cmp current, owner ; \ jne 1f ; \ - pushl $2f ; \ - call EXT(panic) ; \ + LOAD_STRING_ARG0(2f) ; \ + CALL_PANIC() ; \ hlt ; \ .data ; \ -2: String "mylock attempt!" ; \ +2: String "Attempt to recursively lock a non-recursive lock"; \ .text ; \ 1: -#define METER_SIMPLE_LOCK_LOCK(reg) \ - pushl reg ; \ - call EXT(meter_simple_lock) ; \ - popl reg - -#define METER_SIMPLE_LOCK_UNLOCK(reg) \ - pushl reg ; \ - call EXT(meter_simple_unlock) ; \ - popl reg - #else /* MACH_LDEBUG */ #define CHECK_MUTEX_TYPE() -#define CHECK_SIMPLE_LOCK_TYPE -#define CHECK_THREAD(thd) #define CHECK_PREEMPTION_LEVEL() -#define CHECK_NO_SIMPLELOCKS() #define CHECK_MYLOCK(thd) -#define METER_SIMPLE_LOCK_LOCK(reg) -#define METER_SIMPLE_LOCK_UNLOCK(reg) #endif /* MACH_LDEBUG */ #define PREEMPTION_DISABLE \ - incl %gs:CPU_PREEMPTION_LEVEL + incl %gs:CPU_PREEMPTION_LEVEL #define PREEMPTION_ENABLE \ decl %gs:CPU_PREEMPTION_LEVEL ; \ jne 9f ; \ - pushf ; \ - testl $ EFL_IF,(%esp) ; \ + PUSHF ; \ + testl $ EFL_IF,S_PC ; \ je 8f ; \ - cli ; \ + CLI ; \ movl %gs:CPU_PENDING_AST,%eax ; \ testl $ AST_URGENT,%eax ; \ je 8f ; \ movl %gs:CPU_INTERRUPT_LEVEL,%eax ; \ testl %eax,%eax ; \ jne 8f ; \ - popf ; \ + POPF ; \ int $(T_PREEMPT) ; \ jmp 9f ; \ 8: \ - popf ; \ + POPF ; \ 9: #if CONFIG_DTRACE + + .globl _lockstat_probe + .globl _lockstat_probemap + +/* + * LOCKSTAT_LABEL creates a dtrace symbol which contains + * a pointer into the lock code function body. At that + * point is a "ret" instruction that can be patched into + * a "nop" + */ + +#if defined(__i386__) + #define LOCKSTAT_LABEL(lab) \ .data ;\ .globl lab ;\ @@ -270,9 +255,6 @@ .text ;\ 9: - .globl _lockstat_probe - .globl _lockstat_probemap - #define LOCKSTAT_RECORD(id, lck) \ push %ebp ; \ mov %esp,%ebp ; \ @@ -295,29 +277,58 @@ 9: leave /* ret - left to subsequent code, e.g. return values */ -#define LOCKSTAT_RECORD2(id, lck, arg) \ - push %ebp ; \ - mov %esp,%ebp ; \ - sub $0x38,%esp /* size of dtrace_probe args */ ; \ - movl _lockstat_probemap + (id * 4),%eax ; \ - test %eax,%eax ; \ - je 9f ; \ - movl $0,36(%esp) ; \ - movl $0,40(%esp) ; \ - movl $0,28(%esp) ; \ - movl $0,32(%esp) ; \ - movl $0,20(%esp) ; \ - movl $0,24(%esp) ; \ - movl $0,12(%esp) ; \ - movl $0,16(%esp) ; \ - movl lck,4(%esp) /* copy lock pointer to arg 1 */ ; \ - movl arg,8(%esp) ; \ - movl %eax,(%esp) ; \ - call *_lockstat_probe ; \ +#elif defined(__x86_64__) +#define LOCKSTAT_LABEL(lab) \ + .data ;\ + .globl lab ;\ + lab: ;\ + .quad 9f ;\ + .text ;\ + 9: + +#define LOCKSTAT_RECORD(id, lck) \ + push %rbp ; \ + mov %rsp,%rbp ; \ + movl _lockstat_probemap + (id * 4)(%rip),%eax ; \ + test %eax,%eax ; \ + je 9f ; \ + mov lck, %rsi ; \ + mov %rax, %rdi ; \ + mov $0, %rdx ; \ + mov $0, %rcx ; \ + mov $0, %r8 ; \ + mov $0, %r9 ; \ + call *_lockstat_probe(%rip) ; \ 9: leave /* ret - left to subsequent code, e.g. return values */ +#else +#error Unsupported architecture #endif +#endif /* CONFIG_DTRACE */ +/* + * For most routines, the hw_lock_t pointer is loaded into a + * register initially, and then either a byte or register-sized + * word is loaded/stored to the pointer + */ + +#if defined(__i386__) +#define HW_LOCK_REGISTER %edx +#define LOAD_HW_LOCK_REGISTER mov L_ARG0, HW_LOCK_REGISTER +#define HW_LOCK_THREAD_REGISTER %ecx +#define LOAD_HW_LOCK_THREAD_REGISTER mov %gs:CPU_ACTIVE_THREAD, HW_LOCK_THREAD_REGISTER +#define HW_LOCK_MOV_WORD movl +#define HW_LOCK_EXAM_REGISTER %eax +#elif defined(__x86_64__) +#define HW_LOCK_REGISTER %rdi +#define LOAD_HW_LOCK_REGISTER +#define HW_LOCK_THREAD_REGISTER %rcx +#define LOAD_HW_LOCK_THREAD_REGISTER mov %gs:CPU_ACTIVE_THREAD, HW_LOCK_THREAD_REGISTER +#define HW_LOCK_MOV_WORD movq +#define HW_LOCK_EXAM_REGISTER %rax +#else +#error Unsupported architecture +#endif /* * void hw_lock_init(hw_lock_t) @@ -325,8 +336,8 @@ * Initialize a hardware lock. */ LEAF_ENTRY(hw_lock_init) - movl L_ARG0,%edx /* fetch lock pointer */ - movl $0,(%edx) /* clear the lock */ + LOAD_HW_LOCK_REGISTER /* fetch lock pointer */ + HW_LOCK_MOV_WORD $0, (HW_LOCK_REGISTER) /* clear the lock */ LEAF_RET @@ -336,8 +347,8 @@ LEAF_ENTRY(hw_lock_init) * Initialize a hardware byte lock. */ LEAF_ENTRY(hw_lock_byte_init) - movl L_ARG0,%edx /* fetch lock pointer */ - movb $0,(%edx) /* clear the lock */ + LOAD_HW_LOCK_REGISTER /* fetch lock pointer */ + movb $0, (HW_LOCK_REGISTER) /* clear the lock */ LEAF_RET /* @@ -347,15 +358,15 @@ LEAF_ENTRY(hw_lock_byte_init) * MACH_RT: also return with preemption disabled. */ LEAF_ENTRY(hw_lock_lock) - movl L_ARG0,%edx /* fetch lock pointer */ - - movl %gs:CPU_ACTIVE_THREAD,%ecx + LOAD_HW_LOCK_REGISTER /* fetch lock pointer */ + LOAD_HW_LOCK_THREAD_REGISTER /* get thread pointer */ + PREEMPTION_DISABLE 1: - movl (%edx), %eax - testl %eax,%eax /* lock locked? */ + mov (HW_LOCK_REGISTER), HW_LOCK_EXAM_REGISTER + test HW_LOCK_EXAM_REGISTER,HW_LOCK_EXAM_REGISTER /* lock locked? */ jne 3f /* branch if so */ - lock; cmpxchgl %ecx,(%edx) /* try to acquire the HW lock */ + lock; cmpxchg HW_LOCK_THREAD_REGISTER,(HW_LOCK_REGISTER) /* try to acquire the HW lock */ jne 3f movl $1,%eax /* In case this was a timeout call */ LEAF_RET /* if yes, then nothing left to do */ @@ -371,14 +382,14 @@ LEAF_ENTRY(hw_lock_lock) */ LEAF_ENTRY(hw_lock_byte_lock) - movl L_ARG0,%edx /* Load lock pointer */ + LOAD_HW_LOCK_REGISTER /* Load lock pointer */ PREEMPTION_DISABLE movl $1, %ecx /* Set lock value */ 1: - movb (%edx), %al /* Load byte at address */ + movb (HW_LOCK_REGISTER), %al /* Load byte at address */ testb %al,%al /* lock locked? */ jne 3f /* branch if so */ - lock; cmpxchgb %cl,(%edx) /* attempt atomic compare exchange */ + lock; cmpxchg %cl,(HW_LOCK_REGISTER) /* attempt atomic compare exchange */ jne 3f LEAF_RET /* if yes, then nothing left to do */ 3: @@ -393,17 +404,19 @@ LEAF_ENTRY(hw_lock_byte_lock) */ LEAF_ENTRY(hw_lock_to) 1: - movl L_ARG0,%edx /* fetch lock pointer */ - movl %gs:CPU_ACTIVE_THREAD,%ecx + LOAD_HW_LOCK_REGISTER /* fetch lock pointer */ + LOAD_HW_LOCK_THREAD_REGISTER + /* * Attempt to grab the lock immediately * - fastpath without timeout nonsense. */ PREEMPTION_DISABLE - movl (%edx), %eax - testl %eax,%eax /* lock locked? */ + + mov (HW_LOCK_REGISTER), HW_LOCK_EXAM_REGISTER + test HW_LOCK_EXAM_REGISTER,HW_LOCK_EXAM_REGISTER /* lock locked? */ jne 2f /* branch if so */ - lock; cmpxchgl %ecx,(%edx) /* try to acquire the HW lock */ + lock; cmpxchg HW_LOCK_THREAD_REGISTER,(HW_LOCK_REGISTER) /* try to acquire the HW lock */ jne 2f /* branch on failure */ movl $1,%eax LEAF_RET @@ -415,6 +428,7 @@ LEAF_ENTRY(hw_lock_to) * and then spin re-checking the lock but pausing * every so many (INNER_LOOP_COUNT) spins to check for timeout. */ +#if __i386__ movl L_ARG1,%ecx /* fetch timeout */ push %edi push %ebx @@ -427,32 +441,65 @@ LEAF_ENTRY(hw_lock_to) adcl $0,%edx /* add carry */ mov %edx,%ecx mov %eax,%ebx /* %ecx:%ebx is the timeout expiry */ + mov %edi, %edx /* load lock back into %edx */ +#else + push %r9 + lfence + rdtsc /* read cyclecount into %edx:%eax */ + lfence + shlq $32, %rdx + orq %rdx, %rax /* load 64-bit quantity into %rax */ + addq %rax, %rsi /* %rsi is the timeout expiry */ +#endif + 4: /* * The inner-loop spin to look for the lock being freed. */ - mov $(INNER_LOOP_COUNT),%edx +#if __i386__ + mov $(INNER_LOOP_COUNT),%edi +#else + mov $(INNER_LOOP_COUNT),%r9 +#endif 5: PAUSE /* pause for hyper-threading */ - movl (%edi),%eax /* spin checking lock value in cache */ - testl %eax,%eax + mov (HW_LOCK_REGISTER),HW_LOCK_EXAM_REGISTER /* spin checking lock value in cache */ + test HW_LOCK_EXAM_REGISTER,HW_LOCK_EXAM_REGISTER je 6f /* zero => unlocked, try to grab it */ - decl %edx /* decrement inner loop count */ +#if __i386__ + decl %edi /* decrement inner loop count */ +#else + decq %r9 /* decrement inner loop count */ +#endif jnz 5b /* time to check for timeout? */ - + /* * Here after spinning INNER_LOOP_COUNT times, check for timeout */ +#if __i386__ + mov %edx,%edi /* Save %edx */ lfence rdtsc /* cyclecount into %edx:%eax */ lfence - cmpl %ecx,%edx /* compare high-order 32-bits */ + xchg %edx,%edi /* cyclecount into %edi:%eax */ + cmpl %ecx,%edi /* compare high-order 32-bits */ jb 4b /* continue spinning if less, or */ cmpl %ebx,%eax /* compare low-order 32-bits */ jb 4b /* continue if less, else bail */ xor %eax,%eax /* with 0 return value */ pop %ebx pop %edi +#else + lfence + rdtsc /* cyclecount into %edx:%eax */ + lfence + shlq $32, %rdx + orq %rdx, %rax /* load 64-bit quantity into %rax */ + cmpq %rsi, %rax /* compare to timeout */ + jb 4b /* continue spinning if less, or */ + xor %rax,%rax /* with 0 return value */ + pop %r9 +#endif LEAF_RET 6: @@ -460,12 +507,16 @@ LEAF_ENTRY(hw_lock_to) * Here to try to grab the lock that now appears to be free * after contention. */ - movl %gs:CPU_ACTIVE_THREAD,%edx - lock; cmpxchgl %edx,(%edi) /* try to acquire the HW lock */ + LOAD_HW_LOCK_THREAD_REGISTER + lock; cmpxchg HW_LOCK_THREAD_REGISTER,(HW_LOCK_REGISTER) /* try to acquire the HW lock */ jne 4b /* no - spin again */ movl $1,%eax /* yes */ +#if __i386__ pop %ebx pop %edi +#else + pop %r9 +#endif LEAF_RET /* @@ -475,10 +526,11 @@ LEAF_ENTRY(hw_lock_to) * MACH_RT: release preemption level. */ LEAF_ENTRY(hw_lock_unlock) - movl L_ARG0,%edx /* fetch lock pointer */ - movl $0,(%edx) /* clear the lock */ + LOAD_HW_LOCK_REGISTER /* fetch lock pointer */ + HW_LOCK_MOV_WORD $0, (HW_LOCK_REGISTER) /* clear the lock */ PREEMPTION_ENABLE LEAF_RET + /* * void hw_lock_byte_unlock(uint8_t *lock_byte) * @@ -487,28 +539,8 @@ LEAF_ENTRY(hw_lock_unlock) */ LEAF_ENTRY(hw_lock_byte_unlock) - movl L_ARG0,%edx /* Load lock pointer */ - movb $0,(%edx) /* Clear the lock byte */ - PREEMPTION_ENABLE - LEAF_RET - -/* - * void i386_lock_unlock_with_flush(hw_lock_t) - * - * Unconditionally release lock, followed by a cacheline flush of - * the line corresponding to the lock dword. This routine is currently - * used with certain locks which are susceptible to lock starvation, - * minimizing cache affinity for lock acquisitions. A queued spinlock - * or other mechanism that ensures fairness would obviate the need - * for this routine, but ideally few or no spinlocks should exhibit - * enough contention to require such measures. - * MACH_RT: release preemption level. - */ -LEAF_ENTRY(i386_lock_unlock_with_flush) - movl L_ARG0,%edx /* Fetch lock pointer */ - movl $0,(%edx) /* Clear the lock */ - mfence /* Serialize prior stores */ - clflush (%edx) /* Write back and invalidate line */ + LOAD_HW_LOCK_REGISTER /* Load lock pointer */ + movb $0, (HW_LOCK_REGISTER) /* Clear the lock byte */ PREEMPTION_ENABLE LEAF_RET @@ -517,16 +549,16 @@ LEAF_ENTRY(i386_lock_unlock_with_flush) * MACH_RT: returns with preemption disabled on success. */ LEAF_ENTRY(hw_lock_try) - movl L_ARG0,%edx /* fetch lock pointer */ - - movl %gs:CPU_ACTIVE_THREAD,%ecx + LOAD_HW_LOCK_REGISTER /* fetch lock pointer */ + LOAD_HW_LOCK_THREAD_REGISTER PREEMPTION_DISABLE - movl (%edx),%eax - testl %eax,%eax + + mov (HW_LOCK_REGISTER),HW_LOCK_EXAM_REGISTER + test HW_LOCK_EXAM_REGISTER,HW_LOCK_EXAM_REGISTER jne 1f - lock; cmpxchgl %ecx,(%edx) /* try to acquire the HW lock */ + lock; cmpxchg HW_LOCK_THREAD_REGISTER,(HW_LOCK_REGISTER) /* try to acquire the HW lock */ jne 1f - + movl $1,%eax /* success */ LEAF_RET @@ -541,63 +573,69 @@ LEAF_ENTRY(hw_lock_try) * N.B. Racy, of course. */ LEAF_ENTRY(hw_lock_held) - movl L_ARG0,%edx /* fetch lock pointer */ - - movl (%edx),%eax /* check lock value */ - testl %eax,%eax + LOAD_HW_LOCK_REGISTER /* fetch lock pointer */ + mov (HW_LOCK_REGISTER),HW_LOCK_EXAM_REGISTER /* check lock value */ + test HW_LOCK_EXAM_REGISTER,HW_LOCK_EXAM_REGISTER movl $1,%ecx cmovne %ecx,%eax /* 0 => unlocked, 1 => locked */ LEAF_RET -LEAF_ENTRY(mutex_init) - movl L_ARG0,%edx /* fetch lock pointer */ - xorl %eax,%eax - movl %eax,M_ILK /* clear interlock */ - movl %eax,M_LOCKED /* clear locked flag */ - movw %ax,M_WAITERS /* init waiter count */ - movw %ax,M_PROMOTED_PRI - -#if MACH_LDEBUG - movl $ MUTEX_TAG,M_TYPE /* set lock type */ - movl %eax,M_PC /* init caller pc */ - movl %eax,M_THREAD /* and owning thread */ -#endif - - LEAF_RET /* * Reader-writer lock fastpaths. These currently exist for the - * shared lock acquire and release paths (where they reduce overhead - * considerably)--more can be added as necessary (DRK). + * shared lock acquire, the exclusive lock acquire, the shared to + * exclusive upgrade and the release paths (where they reduce overhead + * considerably) -- these are by far the most frequently used routines + * + * The following should reflect the layout of the bitfield embedded within + * the lck_rw_t structure (see i386/locks.h). */ +#define LCK_RW_INTERLOCK (0x1 << 16) + +#define LCK_RW_PRIV_EXCL (0x1 << 24) +#define LCK_RW_WANT_UPGRADE (0x2 << 24) +#define LCK_RW_WANT_WRITE (0x4 << 24) +#define LCK_R_WAITING (0x8 << 24) +#define LCK_W_WAITING (0x10 << 24) + +#define LCK_RW_SHARED_MASK (0xffff) /* - * These should reflect the layout of the bitfield embedded within - * the lck_rw_t structure (see i386/locks.h). + * For most routines, the lck_rw_t pointer is loaded into a + * register initially, and the flags bitfield loaded into another + * register and examined */ -#define LCK_RW_INTERLOCK 0x1 -#define LCK_RW_WANT_UPGRADE 0x2 -#define LCK_RW_WANT_WRITE 0x4 -#define LCK_R_WAITING 0x8 -#define LCK_W_WAITING 0x10 - -#define RW_LOCK_SHARED_MASK ((LCK_RW_INTERLOCK<<16) | \ - ((LCK_RW_WANT_UPGRADE|LCK_RW_WANT_WRITE) << 24)) + +#if defined(__i386__) +#define LCK_RW_REGISTER %edx +#define LOAD_LCK_RW_REGISTER mov S_ARG0, LCK_RW_REGISTER +#define LCK_RW_FLAGS_REGISTER %eax +#define LOAD_LCK_RW_FLAGS_REGISTER mov (LCK_RW_REGISTER), LCK_RW_FLAGS_REGISTER +#elif defined(__x86_64__) +#define LCK_RW_REGISTER %rdi +#define LOAD_LCK_RW_REGISTER +#define LCK_RW_FLAGS_REGISTER %eax +#define LOAD_LCK_RW_FLAGS_REGISTER mov (LCK_RW_REGISTER), LCK_RW_FLAGS_REGISTER +#else +#error Unsupported architecture +#endif + +#define RW_LOCK_SHARED_MASK (LCK_RW_INTERLOCK | LCK_RW_WANT_UPGRADE | LCK_RW_WANT_WRITE) /* - * void lck_rw_lock_shared(lck_rw_t*) + * void lck_rw_lock_shared(lck_rw_t *) * */ - Entry(lck_rw_lock_shared) - movl S_ARG0, %edx + LOAD_LCK_RW_REGISTER 1: - movl (%edx), %eax /* Load state bitfield and interlock */ + LOAD_LCK_RW_FLAGS_REGISTER /* Load state bitfield and interlock */ testl $(RW_LOCK_SHARED_MASK), %eax /* Eligible for fastpath? */ jne 3f - movl %eax, %ecx + + movl %eax, %ecx /* original value in %eax for cmpxchgl */ incl %ecx /* Increment reader refcount */ lock - cmpxchgl %ecx, (%edx) /* Attempt atomic exchange */ + cmpxchgl %ecx, (LCK_RW_REGISTER) /* Attempt atomic exchange */ jne 2f #if CONFIG_DTRACE @@ -608,11 +646,10 @@ Entry(lck_rw_lock_shared) */ LOCKSTAT_LABEL(_lck_rw_lock_shared_lockstat_patch_point) ret - /* Fall thru when patched, counting on lock pointer in %edx */ - LOCKSTAT_RECORD(LS_LCK_RW_LOCK_SHARED_ACQUIRE, %edx) + /* Fall thru when patched, counting on lock pointer in LCK_RW_REGISTER */ + LOCKSTAT_RECORD(LS_LCK_RW_LOCK_SHARED_ACQUIRE, LCK_RW_REGISTER) #endif ret - 2: PAUSE jmp 1b @@ -620,1455 +657,1330 @@ Entry(lck_rw_lock_shared) jmp EXT(lck_rw_lock_shared_gen) + +#define RW_TRY_LOCK_SHARED_MASK (LCK_RW_WANT_UPGRADE | LCK_RW_WANT_WRITE) /* - * lck_rw_type_t lck_rw_done(lck_rw_t*) + * void lck_rw_try_lock_shared(lck_rw_t *) * */ - -.data -rwl_release_error_str: - .asciz "Releasing non-exclusive RW lock without a reader refcount!" -.text - -#define RW_LOCK_RELEASE_MASK ((LCK_RW_INTERLOCK<<16) | \ - ((LCK_RW_WANT_UPGRADE|LCK_RW_WANT_WRITE|LCK_R_WAITING|LCK_W_WAITING) << 24)) -Entry(lck_rw_done) - movl S_ARG0, %edx +Entry(lck_rw_try_lock_shared) + LOAD_LCK_RW_REGISTER 1: - movl (%edx), %eax /* Load state bitfield and interlock */ - testl $(RW_LOCK_RELEASE_MASK), %eax /* Eligible for fastpath? */ - jne 3f - movl %eax, %ecx - /* Assert refcount */ - testl $(0xFFFF), %ecx - jne 5f - movl $(rwl_release_error_str), S_ARG0 - jmp EXT(panic) -5: - decl %ecx /* Decrement reader count */ + LOAD_LCK_RW_FLAGS_REGISTER /* Load state bitfield and interlock */ + testl $(LCK_RW_INTERLOCK), %eax + jne 2f + testl $(RW_TRY_LOCK_SHARED_MASK), %eax + jne 3f /* lock is busy */ + + movl %eax, %ecx /* original value in %eax for cmpxchgl */ + incl %ecx /* Increment reader refcount */ lock - cmpxchgl %ecx, (%edx) + cmpxchgl %ecx, (LCK_RW_REGISTER) /* Attempt atomic exchange */ jne 2f - movl $(RW_SHARED), %eax /* Indicate that the lock was shared */ + #if CONFIG_DTRACE - /* Dtrace lockstat probe: LS_RW_DONE_RELEASE as reader */ - LOCKSTAT_LABEL(_lck_rw_done_lockstat_patch_point) - ret + movl $1, %eax /* - * Note: Dtrace's convention is 0 ==> reader, which is - * a different absolute value than $(RW_SHARED) - * %edx contains the lock address already from the above + * Dtrace lockstat event: LS_LCK_RW_TRY_LOCK_SHARED_ACQUIRE + * Implemented by swapping between return and no-op instructions. + * See bsd/dev/dtrace/lockstat.c. */ - LOCKSTAT_RECORD2(LS_LCK_RW_DONE_RELEASE, %edx, $0) - movl $(RW_SHARED), %eax /* Indicate that the lock was shared */ + LOCKSTAT_LABEL(_lck_rw_try_lock_shared_lockstat_patch_point) + ret + /* Fall thru when patched, counting on lock pointer in LCK_RW_REGISTER */ + LOCKSTAT_RECORD(LS_LCK_RW_LOCK_SHARED_ACQUIRE, LCK_RW_REGISTER) #endif + movl $1, %eax /* return TRUE */ ret - 2: PAUSE jmp 1b 3: - jmp EXT(lck_rw_done_gen) - - -NONLEAF_ENTRY2(mutex_lock_spin,_mutex_lock_spin) - - movl B_ARG0,%edx /* fetch lock pointer */ - pushf /* save interrupt state */ - - CHECK_MUTEX_TYPE() - CHECK_NO_SIMPLELOCKS() - CHECK_PREEMPTION_LEVEL() + xorl %eax, %eax + ret - movl M_ILK,%eax /* read interlock */ - testl %eax,%eax /* unlocked? */ - jne Lmls_ilk_loop /* no, go spin */ -Lmls_retry: - cli /* disable interrupts */ - movl %gs:CPU_ACTIVE_THREAD,%ecx - - /* eax == 0 at this point */ - lock; cmpxchgl %ecx,M_ILK /* atomic compare and exchange */ - jne Lmls_ilk_fail /* branch on failure to spin loop */ - - movl M_LOCKED,%ecx /* get lock owner */ - testl %ecx,%ecx /* is the mutex locked? */ - jne Lml_fail /* yes, fall back to a normal mutex lock */ - movl $(MUTEX_LOCKED_AS_SPIN),M_LOCKED /* indicate ownership as a spin lock */ -#if MACH_LDEBUG - movl %gs:CPU_ACTIVE_THREAD,%ecx - movl %ecx,M_THREAD - movl B_PC,%ecx - movl %ecx,M_PC -#endif - PREEMPTION_DISABLE - popf /* restore interrupt state */ - leave /* return with the interlock held */ -#if CONFIG_DTRACE - LOCKSTAT_LABEL(_mutex_lock_spin_lockstat_patch_point) +#define RW_LOCK_EXCLUSIVE_HELD (LCK_RW_WANT_WRITE | LCK_RW_WANT_UPGRADE) +/* + * int lck_rw_grab_shared(lck_rw_t *) + * + */ +Entry(lck_rw_grab_shared) + LOAD_LCK_RW_REGISTER +1: + LOAD_LCK_RW_FLAGS_REGISTER /* Load state bitfield and interlock */ + testl $(LCK_RW_INTERLOCK), %eax + jne 5f + testl $(RW_LOCK_EXCLUSIVE_HELD), %eax + jne 3f +2: + movl %eax, %ecx /* original value in %eax for cmpxchgl */ + incl %ecx /* Increment reader refcount */ + lock + cmpxchgl %ecx, (LCK_RW_REGISTER) /* Attempt atomic exchange */ + jne 4f + + movl $1, %eax /* return success */ ret - /* %edx contains the lock address from above */ - LOCKSTAT_RECORD(LS_MUTEX_LOCK_SPIN_ACQUIRE, %edx) -#endif +3: + testl $(LCK_RW_SHARED_MASK), %eax + je 4f + testl $(LCK_RW_PRIV_EXCL), %eax + je 2b +4: + xorl %eax, %eax /* return failure */ ret - -Lmls_ilk_fail: - popf /* restore interrupt state */ - pushf /* resave interrupt state on stack */ - -Lmls_ilk_loop: +5: PAUSE - movl M_ILK,%eax /* read interlock */ - testl %eax,%eax /* unlocked? */ - je Lmls_retry /* yes, go for it */ - jmp Lmls_ilk_loop /* no, keep spinning */ - - -NONLEAF_ENTRY2(mutex_lock,_mutex_lock) - - movl B_ARG0,%edx /* fetch lock pointer */ - pushf /* save interrupt state */ - - CHECK_MUTEX_TYPE() - CHECK_NO_SIMPLELOCKS() - CHECK_PREEMPTION_LEVEL() - - movl M_ILK,%eax /* is interlock held */ - testl %eax,%eax - jne Lml_ilk_loop /* yes, go do the spin loop */ -Lml_retry: - cli /* disable interrupts */ - movl %gs:CPU_ACTIVE_THREAD,%ecx + jmp 1b - /* eax == 0 at this point */ - lock; cmpxchgl %ecx,M_ILK /* atomic compare and exchange */ - jne Lml_ilk_fail /* branch on failure to spin loop */ - movl M_LOCKED,%ecx /* get lock owner */ - testl %ecx,%ecx /* is the mutex locked? */ - jne Lml_fail /* yes, we lose */ -Lml_acquire: - movl %gs:CPU_ACTIVE_THREAD,%ecx - movl %ecx,M_LOCKED + +#define RW_LOCK_EXCLUSIVE_MASK (LCK_RW_SHARED_MASK | LCK_RW_INTERLOCK | \ + LCK_RW_WANT_UPGRADE | LCK_RW_WANT_WRITE) +/* + * void lck_rw_lock_exclusive(lck_rw_t*) + * + */ +Entry(lck_rw_lock_exclusive) + LOAD_LCK_RW_REGISTER +1: + LOAD_LCK_RW_FLAGS_REGISTER /* Load state bitfield, interlock and shared count */ + testl $(RW_LOCK_EXCLUSIVE_MASK), %eax /* Eligible for fastpath? */ + jne 3f /* no, go slow */ -#if MACH_LDEBUG - movl %ecx,M_THREAD - movl B_PC,%ecx - movl %ecx,M_PC -#endif - cmpw $0,M_WAITERS /* are there any waiters? */ - jne Lml_waiters /* yes, more work to do */ -Lml_return: - xorl %eax,%eax - movl %eax,M_ILK + movl %eax, %ecx /* original value in %eax for cmpxchgl */ + orl $(LCK_RW_WANT_WRITE), %ecx + lock + cmpxchgl %ecx, (LCK_RW_REGISTER) /* Attempt atomic exchange */ + jne 2f - popf /* restore interrupt state */ - leave #if CONFIG_DTRACE - LOCKSTAT_LABEL(_mutex_lock_lockstat_patch_point) + /* + * Dtrace lockstat event: LS_LCK_RW_LOCK_EXCL_ACQUIRE + * Implemented by swapping between return and no-op instructions. + * See bsd/dev/dtrace/lockstat.c. + */ + LOCKSTAT_LABEL(_lck_rw_lock_exclusive_lockstat_patch_point) ret - /* %edx still contains the lock pointer */ - LOCKSTAT_RECORD(LS_MUTEX_LOCK_ACQUIRE, %edx) + /* Fall thru when patched, counting on lock pointer in LCK_RW_REGISTER */ + LOCKSTAT_RECORD(LS_LCK_RW_LOCK_SHARED_ACQUIRE, LCK_RW_REGISTER) #endif ret - - /* - * We got the mutex, but there are waiters. Update information - * on waiters. - */ -Lml_waiters: - pushl %edx /* save mutex address */ - pushl %edx - call EXT(lck_mtx_lock_acquire) - addl $4,%esp - popl %edx /* restore mutex address */ - jmp Lml_return - -Lml_restart: -Lml_ilk_fail: - popf /* restore interrupt state */ - pushf /* resave interrupt state on stack */ - -Lml_ilk_loop: +2: PAUSE - movl M_ILK,%eax /* read interlock */ - testl %eax,%eax /* unlocked? */ - je Lml_retry /* yes, go try to grab it */ - jmp Lml_ilk_loop /* no - keep spinning */ - -Lml_fail: - /* - * Check if the owner is on another processor and therefore - * we should try to spin before blocking. - */ - testl $(OnProc),ACT_SPF(%ecx) - jz Lml_block + jmp 1b +3: + jmp EXT(lck_rw_lock_exclusive_gen) - /* - * Here if owner is on another processor: - * - release the interlock - * - spin on the holder until release or timeout - * - in either case re-acquire the interlock - * - if released, acquire it - * - otherwise drop thru to block. - */ - xorl %eax,%eax - movl %eax,M_ILK /* zero interlock */ - popf - pushf /* restore interrupt state */ - - push %edx /* lock address */ - call EXT(lck_mtx_lock_spinwait) /* call out to do spinning */ - addl $4,%esp - movl B_ARG0,%edx /* refetch mutex address */ - - /* Re-acquire interlock - interrupts currently enabled */ - movl M_ILK,%eax /* is interlock held */ - testl %eax,%eax - jne Lml_ilk_reloop /* yes, go do the spin loop */ -Lml_reget_retry: - cli /* disable interrupts */ - movl %gs:CPU_ACTIVE_THREAD,%ecx - - /* eax == 0 at this point */ - lock; cmpxchgl %ecx,M_ILK /* atomic compare and exchange */ - jne Lml_ilk_refail /* branch on failure to spin loop */ - - movl M_LOCKED,%ecx /* get lock owner */ - testl %ecx,%ecx /* is the mutex free? */ - je Lml_acquire /* yes, acquire */ - -Lml_block: - CHECK_MYLOCK(M_THREAD) - pushl M_LOCKED - pushl %edx /* push mutex address */ - call EXT(lck_mtx_lock_wait) /* wait for the lock */ - addl $8,%esp /* returns with interlock dropped */ - movl B_ARG0,%edx /* refetch mutex address */ - jmp Lml_restart /* and start over */ - -Lml_ilk_refail: - popf /* restore interrupt state */ - pushf /* resave interrupt state on stack */ - -Lml_ilk_reloop: - PAUSE - movl M_ILK,%eax /* read interlock */ - testl %eax,%eax /* unlocked? */ - je Lml_reget_retry /* yes, go try to grab it */ - jmp Lml_ilk_reloop /* no - keep spinning */ +#define RW_TRY_LOCK_EXCLUSIVE_MASK (LCK_RW_SHARED_MASK | LCK_RW_WANT_UPGRADE | LCK_RW_WANT_WRITE) +/* + * void lck_rw_try_lock_exclusive(lck_rw_t *) + * + * Tries to get a write lock. + * + * Returns FALSE if the lock is not held on return. + */ +Entry(lck_rw_try_lock_exclusive) + LOAD_LCK_RW_REGISTER +1: + LOAD_LCK_RW_FLAGS_REGISTER /* Load state bitfield, interlock and shared count */ + testl $(LCK_RW_INTERLOCK), %eax + jne 2f + testl $(RW_TRY_LOCK_EXCLUSIVE_MASK), %eax + jne 3f /* can't get it */ -NONLEAF_ENTRY2(mutex_try_spin,_mutex_try_spin) - - movl B_ARG0,%edx /* fetch lock pointer */ - pushf /* save interrupt state */ - - CHECK_MUTEX_TYPE() - CHECK_NO_SIMPLELOCKS() - - movl M_ILK,%eax - testl %eax,%eax /* is the interlock held? */ - jne Lmts_ilk_loop /* yes, go to spin loop */ -Lmts_retry: - cli /* disable interrupts */ - movl %gs:CPU_ACTIVE_THREAD,%ecx - - /* eax == 0 at this point */ - lock; cmpxchgl %ecx,M_ILK /* atomic compare and exchange */ - jne Lmts_ilk_fail /* branch on failure to spin loop */ - - movl M_LOCKED,%ecx /* get lock owner */ - testl %ecx,%ecx /* is the mutex locked? */ - jne Lmt_fail /* yes, we lose */ -Lmts_acquire: - movl $(MUTEX_LOCKED_AS_SPIN),M_LOCKED /* indicate ownership as a spin lock */ + movl %eax, %ecx /* original value in %eax for cmpxchgl */ + orl $(LCK_RW_WANT_WRITE), %ecx + lock + cmpxchgl %ecx, (LCK_RW_REGISTER) /* Attempt atomic exchange */ + jne 2f -#if MACH_LDEBUG - movl %gs:CPU_ACTIVE_THREAD,%ecx - movl %ecx,M_THREAD - movl B_PC,%ecx - movl %ecx,M_PC -#endif - PREEMPTION_DISABLE /* no, return with interlock held */ - popf /* restore interrupt state */ - movl $1,%eax - leave #if CONFIG_DTRACE - LOCKSTAT_LABEL(_mutex_try_spin_lockstat_patch_point) + movl $1, %eax + /* + * Dtrace lockstat event: LS_LCK_RW_TRY_LOCK_EXCL_ACQUIRE + * Implemented by swapping between return and no-op instructions. + * See bsd/dev/dtrace/lockstat.c. + */ + LOCKSTAT_LABEL(_lck_rw_try_lock_exclusive_lockstat_patch_point) ret - /* %edx inherits the lock pointer from above */ - LOCKSTAT_RECORD(LS_MUTEX_TRY_SPIN_ACQUIRE, %edx) - movl $1,%eax + /* Fall thru when patched, counting on lock pointer in LCK_RW_REGISTER */ + LOCKSTAT_RECORD(LS_LCK_RW_LOCK_SHARED_ACQUIRE, LCK_RW_REGISTER) #endif + movl $1, %eax /* return TRUE */ ret - -Lmts_ilk_fail: - popf /* restore interrupt state */ - pushf /* resave interrupt state on stack */ - -Lmts_ilk_loop: +2: PAUSE - /* - * need to do this check outside of the interlock in - * case this lock is held as a simple lock which means - * we won't be able to take the interlock - */ - movl M_LOCKED,%eax - testl %eax,%eax /* is the mutex locked? */ - jne Lmt_fail_no_ilk /* yes, go return failure */ - - movl M_ILK,%eax /* read interlock */ - testl %eax,%eax /* unlocked? */ - je Lmts_retry /* yes, go try to grab it */ - jmp Lmts_ilk_loop /* keep spinning */ - - - -NONLEAF_ENTRY2(mutex_try,_mutex_try) - - movl B_ARG0,%edx /* fetch lock pointer */ - pushf /* save interrupt state */ - - CHECK_MUTEX_TYPE() - CHECK_NO_SIMPLELOCKS() + jmp 1b +3: + xorl %eax, %eax /* return FALSE */ + ret - movl M_ILK,%eax /* read interlock */ - testl %eax,%eax /* unlocked? */ - jne Lmt_ilk_loop /* yes, go try to grab it */ -Lmt_retry: - cli /* disable interrupts */ - movl %gs:CPU_ACTIVE_THREAD,%ecx - /* eax == 0 at this point */ - lock; cmpxchgl %ecx,M_ILK /* atomic compare and exchange */ - jne Lmt_ilk_fail /* branch on failure to spin loop */ - movl M_LOCKED,%ecx /* get lock owner */ - testl %ecx,%ecx /* is the mutex locked? */ - jne Lmt_fail /* yes, we lose */ -Lmt_acquire: - movl %gs:CPU_ACTIVE_THREAD,%ecx - movl %ecx,M_LOCKED +/* + * void lck_rw_lock_shared_to_exclusive(lck_rw_t*) + * + * fastpath can be taken if + * the current rw_shared_count == 1 + * AND the interlock is clear + * AND RW_WANT_UPGRADE is not set + * + * note that RW_WANT_WRITE could be set, but will not + * be indicative of an exclusive hold since we have + * a read count on the lock that we have not yet released + * we can blow by that state since the lck_rw_lock_exclusive + * function will block until rw_shared_count == 0 and + * RW_WANT_UPGRADE is clear... it does this check behind + * the interlock which we are also checking for + * + * to make the transition we must be able to atomically + * set RW_WANT_UPGRADE and get rid of the read count we hold + */ +Entry(lck_rw_lock_shared_to_exclusive) + LOAD_LCK_RW_REGISTER +1: + LOAD_LCK_RW_FLAGS_REGISTER /* Load state bitfield, interlock and shared count */ + testl $(LCK_RW_INTERLOCK), %eax + jne 7f + testl $(LCK_RW_WANT_UPGRADE), %eax + jne 2f -#if MACH_LDEBUG - movl %ecx,M_THREAD - movl B_PC,%ecx - movl %ecx,M_PC -#endif - cmpw $0,M_WAITERS /* are there any waiters? */ - jne Lmt_waiters /* yes, more work to do */ -Lmt_return: - xorl %eax,%eax - movl %eax,M_ILK - popf /* restore interrupt state */ + movl %eax, %ecx /* original value in %eax for cmpxchgl */ + orl $(LCK_RW_WANT_UPGRADE), %ecx /* ask for WANT_UPGRADE */ + decl %ecx /* and shed our read count */ + lock + cmpxchgl %ecx, (LCK_RW_REGISTER) /* Attempt atomic exchange */ + jne 7f + /* we now own the WANT_UPGRADE */ + testl $(LCK_RW_SHARED_MASK), %ecx /* check to see if all of the readers are drained */ + jne 8f /* if not, we need to go wait */ - movl $1,%eax - leave #if CONFIG_DTRACE - LOCKSTAT_LABEL(_mutex_try_lockstat_patch_point) + movl $1, %eax + /* + * Dtrace lockstat event: LS_LCK_RW_LOCK_SHARED_TO_EXCL_UPGRADE + * Implemented by swapping between return and no-op instructions. + * See bsd/dev/dtrace/lockstat.c. + */ + LOCKSTAT_LABEL(_lck_rw_lock_shared_to_exclusive_lockstat_patch_point) ret - /* inherit the lock pointer in %edx from above */ - LOCKSTAT_RECORD(LS_MUTEX_TRY_LOCK_ACQUIRE, %edx) - movl $1,%eax + /* Fall thru when patched, counting on lock pointer in LCK_RW_REGISTER */ + LOCKSTAT_RECORD(LS_LCK_RW_LOCK_SHARED_ACQUIRE, LCK_RW_REGISTER) #endif + movl $1, %eax /* return success */ ret + +2: /* someone else already holds WANT_UPGRADE */ + movl %eax, %ecx /* original value in %eax for cmpxchgl */ + decl %ecx /* shed our read count */ + testl $(LCK_RW_SHARED_MASK), %ecx + jne 3f /* we were the last reader */ + andl $(~LCK_W_WAITING), %ecx /* so clear the wait indicator */ +3: + lock + cmpxchgl %ecx, (LCK_RW_REGISTER) /* Attempt atomic exchange */ + jne 7f + +#if __i386__ + pushl %eax /* go check to see if we need to */ + push %edx /* wakeup anyone */ + call EXT(lck_rw_lock_shared_to_exclusive_failure) + addl $8, %esp +#else + mov %eax, %esi /* put old flags as second arg */ + /* lock is alread in %rdi */ + call EXT(lck_rw_lock_shared_to_exclusive_failure) +#endif + ret /* and pass the failure return along */ +7: + PAUSE + jmp 1b +8: + jmp EXT(lck_rw_lock_shared_to_exclusive_success) -Lmt_waiters: - pushl %edx /* save mutex address */ - pushl %edx - call EXT(lck_mtx_lock_acquire) - addl $4,%esp - popl %edx /* restore mutex address */ - jmp Lmt_return -Lmt_ilk_fail: - popf /* restore interrupt state */ - pushf /* resave interrupt state on stack */ - -Lmt_ilk_loop: - PAUSE + + .cstring +rwl_release_error_str: + .asciz "Releasing non-exclusive RW lock without a reader refcount!" + .text + +/* + * lck_rw_type_t lck_rw_done(lck_rw_t *) + * + */ +Entry(lck_rw_done) + LOAD_LCK_RW_REGISTER +1: + LOAD_LCK_RW_FLAGS_REGISTER /* Load state bitfield, interlock and reader count */ + testl $(LCK_RW_INTERLOCK), %eax + jne 7f /* wait for interlock to clear */ + + movl %eax, %ecx /* keep original value in %eax for cmpxchgl */ + testl $(LCK_RW_SHARED_MASK), %ecx /* if reader count == 0, must be exclusive lock */ + je 2f + decl %ecx /* Decrement reader count */ + testl $(LCK_RW_SHARED_MASK), %ecx /* if reader count has now gone to 0, check for waiters */ + je 4f + jmp 6f +2: + testl $(LCK_RW_WANT_UPGRADE), %ecx + je 3f + andl $(~LCK_RW_WANT_UPGRADE), %ecx + jmp 4f +3: + testl $(LCK_RW_WANT_WRITE), %ecx + je 8f /* lock is not 'owned', go panic */ + andl $(~LCK_RW_WANT_WRITE), %ecx +4: /* - * need to do this check outside of the interlock in - * case this lock is held as a simple lock which means - * we won't be able to take the interlock - */ - movl M_LOCKED,%eax /* get lock owner */ - testl %eax,%eax /* is the mutex locked? */ - jne Lmt_fail_no_ilk /* yes, go return failure */ - - movl M_ILK,%eax /* read interlock */ - testl %eax,%eax /* unlocked? */ - je Lmt_retry /* yes, go try to grab it */ - jmp Lmt_ilk_loop /* no - keep spinning */ - -Lmt_fail: - xorl %eax,%eax - movl %eax,M_ILK - -Lmt_fail_no_ilk: - xorl %eax,%eax - popf /* restore interrupt state */ - NONLEAF_RET - + * test the original values to match what + * lck_rw_done_gen is going to do to determine + * which wakeups need to happen... + * + * if !(fake_lck->lck_rw_priv_excl && fake_lck->lck_w_waiting) + */ + testl $(LCK_W_WAITING), %eax + je 5f + andl $(~LCK_W_WAITING), %ecx + + testl $(LCK_RW_PRIV_EXCL), %eax + jne 6f +5: + andl $(~LCK_R_WAITING), %ecx +6: + lock + cmpxchgl %ecx, (LCK_RW_REGISTER) /* Attempt atomic exchange */ + jne 7f + +#if __i386__ + pushl %eax + push %edx + call EXT(lck_rw_done_gen) + addl $8, %esp +#else + mov %eax,%esi /* old flags in %rsi */ + /* lock is in %rdi already */ + call EXT(lck_rw_done_gen) +#endif + ret +7: + PAUSE + jmp 1b +8: + LOAD_STRING_ARG0(rwl_release_error_str) + CALL_PANIC() + + +/* + * lck_rw_type_t lck_rw_lock_exclusive_to_shared(lck_rw_t *) + * + */ +Entry(lck_rw_lock_exclusive_to_shared) + LOAD_LCK_RW_REGISTER +1: + LOAD_LCK_RW_FLAGS_REGISTER /* Load state bitfield, interlock and reader count */ + testl $(LCK_RW_INTERLOCK), %eax + jne 6f /* wait for interlock to clear */ + + movl %eax, %ecx /* keep original value in %eax for cmpxchgl */ + incl %ecx /* Increment reader count */ + + testl $(LCK_RW_WANT_UPGRADE), %ecx + je 2f + andl $(~LCK_RW_WANT_UPGRADE), %ecx + jmp 3f +2: + andl $(~LCK_RW_WANT_WRITE), %ecx +3: + /* + * test the original values to match what + * lck_rw_lock_exclusive_to_shared_gen is going to do to determine + * which wakeups need to happen... + * + * if !(fake_lck->lck_rw_priv_excl && fake_lck->lck_w_waiting) + */ + testl $(LCK_W_WAITING), %eax + je 4f + testl $(LCK_RW_PRIV_EXCL), %eax + jne 5f +4: + andl $(~LCK_R_WAITING), %ecx +5: + lock + cmpxchgl %ecx, (LCK_RW_REGISTER) /* Attempt atomic exchange */ + jne 6f + +#if __i386__ + pushl %eax + push %edx + call EXT(lck_rw_lock_exclusive_to_shared_gen) + addl $8, %esp +#else + mov %eax,%esi + call EXT(lck_rw_lock_exclusive_to_shared_gen) +#endif + ret +6: + PAUSE + jmp 1b -LEAF_ENTRY(mutex_convert_spin) - movl L_ARG0,%edx /* fetch lock pointer */ - movl M_LOCKED,%ecx /* is this the spin variant of the mutex */ - cmpl $(MUTEX_LOCKED_AS_SPIN),%ecx - jne Lmcs_exit /* already owned as a mutex, just return */ - movl M_ILK,%ecx /* convert from spin version to mutex */ - movl %ecx,M_LOCKED /* take control of the mutex */ +/* + * int lck_rw_grab_want(lck_rw_t *) + * + */ +Entry(lck_rw_grab_want) + LOAD_LCK_RW_REGISTER +1: + LOAD_LCK_RW_FLAGS_REGISTER /* Load state bitfield, interlock and reader count */ + testl $(LCK_RW_INTERLOCK), %eax + jne 3f /* wait for interlock to clear */ + testl $(LCK_RW_WANT_WRITE), %eax /* want_write has been grabbed by someone else */ + jne 2f /* go return failure */ - cmpw $0,M_WAITERS /* are there any waiters? */ - jne Lmcs_waiters /* yes, more work to do */ - -Lmcs_return: - xorl %ecx,%ecx - movl %ecx,M_ILK /* clear interlock */ - PREEMPTION_ENABLE -Lmcs_exit: -#if CONFIG_DTRACE - LOCKSTAT_LABEL(_mutex_convert_spin_lockstat_patch_point) + movl %eax, %ecx /* original value in %eax for cmpxchgl */ + orl $(LCK_RW_WANT_WRITE), %ecx + lock + cmpxchgl %ecx, (LCK_RW_REGISTER) /* Attempt atomic exchange */ + jne 2f + /* we now own want_write */ + movl $1, %eax /* return success */ ret - /* inherit %edx from above */ - LOCKSTAT_RECORD(LS_MUTEX_CONVERT_SPIN_ACQUIRE, %edx) -#endif +2: + xorl %eax, %eax /* return failure */ ret +3: + PAUSE + jmp 1b + +#define RW_LOCK_SHARED_OR_UPGRADE_MASK (LCK_RW_SHARED_MASK | LCK_RW_INTERLOCK | LCK_RW_WANT_UPGRADE) +/* + * int lck_rw_held_read_or_upgrade(lck_rw_t *) + * + */ +Entry(lck_rw_held_read_or_upgrade) + LOAD_LCK_RW_REGISTER + LOAD_LCK_RW_FLAGS_REGISTER /* Load state bitfield, interlock and reader count */ + andl $(RW_LOCK_SHARED_OR_UPGRADE_MASK), %eax + ret -Lmcs_waiters: - pushl %edx /* save mutex address */ - pushl %edx - call EXT(lck_mtx_lock_acquire) - addl $4,%esp - popl %edx /* restore mutex address */ - jmp Lmcs_return +/* + * N.B.: On x86, statistics are currently recorded for all indirect mutexes. + * Also, only the acquire attempt count (GRP_MTX_STAT_UTIL) is maintained + * as a 64-bit quantity (this matches the existing PowerPC implementation, + * and the new x86 specific statistics are also maintained as 32-bit + * quantities). + * + * + * Enable this preprocessor define to record the first miss alone + * By default, we count every miss, hence multiple misses may be + * recorded for a single lock acquire attempt via lck_mtx_lock + */ +#undef LOG_FIRST_MISS_ALONE -NONLEAF_ENTRY(mutex_unlock) - movl B_ARG0,%edx /* fetch lock pointer */ +/* + * This preprocessor define controls whether the R-M-W update of the + * per-group statistics elements are atomic (LOCK-prefixed) + * Enabled by default. + */ +#define ATOMIC_STAT_UPDATES 1 - movl M_LOCKED,%ecx /* is this the spin variant of the mutex */ - cmpl $(MUTEX_LOCKED_AS_SPIN),%ecx - jne Lmu_enter /* no, go treat like a real mutex */ +#if defined(ATOMIC_STAT_UPDATES) +#define LOCK_IF_ATOMIC_STAT_UPDATES lock +#else +#define LOCK_IF_ATOMIC_STAT_UPDATES +#endif /* ATOMIC_STAT_UPDATES */ - cmpw $0,M_WAITERS /* are there any waiters? */ - jne Lmus_wakeup /* yes, more work to do */ -Lmus_drop_ilk: - xorl %ecx,%ecx - movl %ecx,M_LOCKED /* yes, clear the spin indicator */ - movl %ecx,M_ILK /* release the interlock */ - PREEMPTION_ENABLE /* and re-enable preemption */ - leave -#if CONFIG_DTRACE - LOCKSTAT_LABEL(_mutex_unlock_lockstat_patch_point) - ret - /* inherit lock pointer in %edx from above */ - LOCKSTAT_RECORD(LS_MUTEX_UNLOCK_RELEASE, %edx) +/* + * For most routines, the lck_mtx_t pointer is loaded into a + * register initially, and the owner field checked for indirection. + * Eventually the lock owner is loaded into a register and examined. + */ + +#define M_OWNER MUTEX_OWNER +#define M_PTR MUTEX_PTR +#define M_STATE MUTEX_STATE + +#if defined(__i386__) + +#define LMTX_ARG0 B_ARG0 +#define LMTX_ARG1 B_ARG1 +#define LMTX_REG %edx +#define LMTX_A_REG %eax +#define LMTX_A_REG32 %eax +#define LMTX_C_REG %ecx +#define LMTX_C_REG32 %ecx +#define LMTX_D_REG %edx +#define LMTX_RET_REG %eax +#define LMTX_LGROUP_REG %esi +#define LMTX_SSTATE_REG %edi +#define LOAD_LMTX_REG(arg) mov arg, LMTX_REG +#define LOAD_REG_ARG0(reg) push reg +#define LOAD_REG_ARG1(reg) push reg +#define LMTX_CHK_EXTENDED cmp LMTX_REG, LMTX_ARG0 +#define LMTX_ASSERT_OWNED cmpl $(MUTEX_ASSERT_OWNED), LMTX_ARG1 + +#define LMTX_ENTER_EXTENDED \ + mov M_PTR(LMTX_REG), LMTX_REG ; \ + push LMTX_LGROUP_REG ; \ + push LMTX_SSTATE_REG ; \ + xor LMTX_SSTATE_REG, LMTX_SSTATE_REG ; \ + mov MUTEX_GRP(LMTX_REG), LMTX_LGROUP_REG ; \ + LOCK_IF_ATOMIC_STAT_UPDATES ; \ + addl $1, GRP_MTX_STAT_UTIL(LMTX_LGROUP_REG) ; \ + jnc 11f ; \ + incl GRP_MTX_STAT_UTIL+4(LMTX_LGROUP_REG) ; \ +11: + +#define LMTX_EXIT_EXTENDED \ + pop LMTX_SSTATE_REG ; \ + pop LMTX_LGROUP_REG + + +#define LMTX_CHK_EXTENDED_EXIT \ + cmp LMTX_REG, LMTX_ARG0 ; \ + je 12f ; \ + pop LMTX_SSTATE_REG ; \ + pop LMTX_LGROUP_REG ; \ +12: + + +#if LOG_FIRST_MISS_ALONE +#define LMTX_UPDATE_MISS \ + test $1, LMTX_SSTATE_REG ; \ + jnz 11f ; \ + LOCK_IF_ATOMIC_STAT_UPDATES ; \ + incl GRP_MTX_STAT_MISS(LMTX_LGROUP_REG) ; \ + or $1, LMTX_SSTATE_REG ; \ +11: +#else +#define LMTX_UPDATE_MISS \ + LOCK_IF_ATOMIC_STAT_UPDATES ; \ + incl GRP_MTX_STAT_MISS(LMTX_LGROUP_REG) #endif - ret -Lmus_wakeup: - pushl %edx /* save mutex address */ - pushl %edx /* push mutex address */ - call EXT(lck_mtx_unlockspin_wakeup) /* yes, wake a thread */ - addl $4,%esp - popl %edx /* restore mutex pointer */ - jmp Lmus_drop_ilk + +#if LOG_FIRST_MISS_ALONE +#define LMTX_UPDATE_WAIT \ + test $2, LMTX_SSTATE_REG ; \ + jnz 11f ; \ + LOCK_IF_ATOMIC_STAT_UPDATES ; \ + incl GRP_MTX_STAT_WAIT(LMTX_LGROUP_REG) ; \ + or $2, LMTX_SSTATE_REG ; \ +11: +#else +#define LMTX_UPDATE_WAIT \ + LOCK_IF_ATOMIC_STAT_UPDATES ; \ + incl GRP_MTX_STAT_WAIT(LMTX_LGROUP_REG) +#endif -Lmu_enter: - pushf /* save interrupt state */ + +/* + * Record the "direct wait" statistic, which indicates if a + * miss proceeded to block directly without spinning--occurs + * if the owner of the mutex isn't running on another processor + * at the time of the check. + */ +#define LMTX_UPDATE_DIRECT_WAIT \ + LOCK_IF_ATOMIC_STAT_UPDATES ; \ + incl GRP_MTX_STAT_DIRECT_WAIT(LMTX_LGROUP_REG) - CHECK_MUTEX_TYPE() - CHECK_THREAD(M_THREAD) + +#define LMTX_CALLEXT1(func_name) \ + push LMTX_REG ; \ + push LMTX_REG ; \ + call EXT(func_name) ; \ + add $4, %esp ; \ + pop LMTX_REG + +#define LMTX_CALLEXT2(func_name, reg) \ + push LMTX_REG ; \ + push reg ; \ + push LMTX_REG ; \ + call EXT(func_name) ; \ + add $8, %esp ; \ + pop LMTX_REG + +#elif defined(__x86_64__) + +#define LMTX_ARG0 %rdi +#define LMTX_ARG1 %rsi +#define LMTX_REG_ORIG %rdi +#define LMTX_REG %rdx +#define LMTX_A_REG %rax +#define LMTX_A_REG32 %eax +#define LMTX_C_REG %rcx +#define LMTX_C_REG32 %ecx +#define LMTX_D_REG %rdx +#define LMTX_RET_REG %rax +#define LMTX_LGROUP_REG %r10 +#define LMTX_SSTATE_REG %r11 +#define LOAD_LMTX_REG(arg) mov %rdi, %rdx +#define LOAD_REG_ARG0(reg) mov reg, %rdi +#define LOAD_REG_ARG1(reg) mov reg, %rsi +#define LMTX_CHK_EXTENDED cmp LMTX_REG, LMTX_REG_ORIG +#define LMTX_ASSERT_OWNED cmp $(MUTEX_ASSERT_OWNED), LMTX_ARG1 + +#define LMTX_ENTER_EXTENDED \ + mov M_PTR(LMTX_REG), LMTX_REG ; \ + xor LMTX_SSTATE_REG, LMTX_SSTATE_REG ; \ + mov MUTEX_GRP(LMTX_REG), LMTX_LGROUP_REG ; \ + LOCK_IF_ATOMIC_STAT_UPDATES ; \ + incq GRP_MTX_STAT_UTIL(LMTX_LGROUP_REG) + +#define LMTX_EXIT_EXTENDED + +#define LMTX_CHK_EXTENDED_EXIT + + +#if LOG_FIRST_MISS_ALONE +#define LMTX_UPDATE_MISS \ + test $1, LMTX_SSTATE_REG ; \ + jnz 11f ; \ + LOCK_IF_ATOMIC_STAT_UPDATES ; \ + incl GRP_MTX_STAT_MISS(LMTX_LGROUP_REG) ; \ + or $1, LMTX_SSTATE_REG ; \ +11: +#else +#define LMTX_UPDATE_MISS \ + LOCK_IF_ATOMIC_STAT_UPDATES ; \ + incl GRP_MTX_STAT_MISS(LMTX_LGROUP_REG) +#endif + - movl M_ILK,%eax /* read interlock */ - testl %eax,%eax /* unlocked? */ - jne Lmu_ilk_loop /* yes, go try to grab it */ -Lmu_retry: - cli /* disable interrupts */ - movl %gs:CPU_ACTIVE_THREAD,%ecx +#if LOG_FIRST_MISS_ALONE +#define LMTX_UPDATE_WAIT \ + test $2, LMTX_SSTATE_REG ; \ + jnz 11f ; \ + LOCK_IF_ATOMIC_STAT_UPDATES ; \ + incl GRP_MTX_STAT_WAIT(LMTX_LGROUP_REG) ; \ + or $2, LMTX_SSTATE_REG ; \ +11: +#else +#define LMTX_UPDATE_WAIT \ + LOCK_IF_ATOMIC_STAT_UPDATES ; \ + incl GRP_MTX_STAT_WAIT(LMTX_LGROUP_REG) +#endif - /* eax == 0 at this point */ - lock; cmpxchgl %ecx,M_ILK /* atomic compare and exchange */ - jne Lmu_ilk_fail /* branch on failure to spin loop */ - cmpw $0,M_WAITERS /* are there any waiters? */ - jne Lmu_wakeup /* yes, more work to do */ +/* + * Record the "direct wait" statistic, which indicates if a + * miss proceeded to block directly without spinning--occurs + * if the owner of the mutex isn't running on another processor + * at the time of the check. + */ +#define LMTX_UPDATE_DIRECT_WAIT \ + LOCK_IF_ATOMIC_STAT_UPDATES ; \ + incl GRP_MTX_STAT_DIRECT_WAIT(LMTX_LGROUP_REG) -Lmu_doit: -#if MACH_LDEBUG - movl $0,M_THREAD /* disown thread */ -#endif - xorl %ecx,%ecx - movl %ecx,M_LOCKED /* unlock the mutex */ - movl %ecx,M_ILK /* release the interlock */ - popf /* restore interrupt state */ - leave -#if CONFIG_DTRACE - LOCKSTAT_LABEL(_mutex_unlock2_lockstat_patch_point) - ret - /* inherit %edx from above */ - LOCKSTAT_RECORD(LS_MUTEX_UNLOCK_RELEASE, %edx) + +#define LMTX_CALLEXT1(func_name) \ + LMTX_CHK_EXTENDED ; \ + je 12f ; \ + push LMTX_LGROUP_REG ; \ + push LMTX_SSTATE_REG ; \ +12: push LMTX_REG_ORIG ; \ + push LMTX_REG ; \ + mov LMTX_REG, LMTX_ARG0 ; \ + call EXT(func_name) ; \ + pop LMTX_REG ; \ + pop LMTX_REG_ORIG ; \ + LMTX_CHK_EXTENDED ; \ + je 12f ; \ + pop LMTX_SSTATE_REG ; \ + pop LMTX_LGROUP_REG ; \ +12: + +#define LMTX_CALLEXT2(func_name, reg) \ + LMTX_CHK_EXTENDED ; \ + je 12f ; \ + push LMTX_LGROUP_REG ; \ + push LMTX_SSTATE_REG ; \ +12: push LMTX_REG_ORIG ; \ + push LMTX_REG ; \ + mov reg, LMTX_ARG1 ; \ + mov LMTX_REG, LMTX_ARG0 ; \ + call EXT(func_name) ; \ + pop LMTX_REG ; \ + pop LMTX_REG_ORIG ; \ + LMTX_CHK_EXTENDED ; \ + je 12f ; \ + pop LMTX_SSTATE_REG ; \ + pop LMTX_LGROUP_REG ; \ +12: + +#else +#error Unsupported architecture #endif - ret -Lmu_ilk_fail: - popf /* restore interrupt state */ - pushf /* resave interrupt state on stack */ -Lmu_ilk_loop: - PAUSE - movl M_ILK,%eax /* read interlock */ - testl %eax,%eax /* unlocked? */ - je Lmu_retry /* yes, go try to grab it */ - jmp Lmu_ilk_loop /* no - keep spinning */ - -Lmu_wakeup: - pushl M_LOCKED - pushl %edx /* push mutex address */ - call EXT(lck_mtx_unlock_wakeup)/* yes, wake a thread */ - addl $8,%esp - movl B_ARG0,%edx /* restore lock pointer */ - jmp Lmu_doit +#define M_WAITERS_MSK 0x0000ffff +#define M_PRIORITY_MSK 0x00ff0000 +#define M_ILOCKED_MSK 0x01000000 +#define M_MLOCKED_MSK 0x02000000 +#define M_PROMOTED_MSK 0x04000000 +#define M_SPIN_MSK 0x08000000 + + /* * void lck_mtx_assert(lck_mtx_t* l, unsigned int) - * void _mutex_assert(mutex_t, unsigned int) * Takes the address of a lock, and an assertion type as parameters. * The assertion can take one of two forms determine by the type * parameter: either the lock is held by the current thread, and the * type is LCK_MTX_ASSERT_OWNED, or it isn't and the type is - * LCK_MTX_ASSERT_NOT_OWNED. Calls panic on assertion failure. + * LCK_MTX_ASSERT_NOTOWNED. Calls panic on assertion failure. * */ -Entry(lck_mtx_assert) -Entry(_mutex_assert) - movl S_ARG0,%edx /* Load lock address */ - movl %gs:CPU_ACTIVE_THREAD,%ecx /* Load current thread */ - - cmpl $(MUTEX_IND),M_ITAG /* Is this an indirect mutex? */ - cmove M_PTR,%edx /* If so, take indirection */ +NONLEAF_ENTRY(lck_mtx_assert) + LOAD_LMTX_REG(B_ARG0) /* Load lock address */ + mov %gs:CPU_ACTIVE_THREAD, LMTX_A_REG /* Load current thread */ - movl M_LOCKED,%eax /* Load lock word */ - cmpl $(MUTEX_LOCKED_AS_SPIN),%eax /* check for spin variant */ - cmove M_ILK,%eax /* yes, spin lock owner is in the interlock */ + mov M_OWNER(LMTX_REG), LMTX_C_REG + cmp $(MUTEX_IND), LMTX_C_REG /* Is this an indirect mutex? */ + cmove M_PTR(LMTX_REG), LMTX_REG /* If so, take indirection */ - cmpl $(MUTEX_ASSERT_OWNED),S_ARG1 /* Determine assert type */ + mov M_OWNER(LMTX_REG), LMTX_C_REG /* Load owner */ + LMTX_ASSERT_OWNED jne 2f /* Assert ownership? */ - cmpl %eax,%ecx /* Current thread match? */ + cmp LMTX_A_REG, LMTX_C_REG /* Current thread match? */ jne 3f /* no, go panic */ + testl $(M_ILOCKED_MSK | M_MLOCKED_MSK), M_STATE(LMTX_REG) + je 3f 1: /* yes, we own it */ - ret /* just return */ + NONLEAF_RET 2: - cmpl %eax,%ecx /* Current thread match? */ + cmp LMTX_A_REG, LMTX_C_REG /* Current thread match? */ jne 1b /* No, return */ - movl %edx,S_ARG1 /* Prep assertion failure */ - movl $(mutex_assert_owned_str),S_ARG0 + LOAD_REG_ARG1(LMTX_REG) + LOAD_STRING_ARG0(mutex_assert_owned_str) jmp 4f 3: - movl %edx,S_ARG1 /* Prep assertion failure */ - movl $(mutex_assert_not_owned_str),S_ARG0 + LOAD_REG_ARG1(LMTX_REG) + LOAD_STRING_ARG0(mutex_assert_not_owned_str) 4: - jmp EXT(panic) + CALL_PANIC() + + +lck_mtx_destroyed: + LOAD_REG_ARG1(LMTX_REG) + LOAD_STRING_ARG0(mutex_interlock_destroyed_str) + CALL_PANIC() + .data mutex_assert_not_owned_str: .asciz "mutex (%p) not owned\n" mutex_assert_owned_str: .asciz "mutex (%p) owned\n" +mutex_interlock_destroyed_str: + .asciz "trying to interlock destroyed mutex (%p)" .text -/* This preprocessor define controls whether the R-M-W update of the - * per-group statistics elements are atomic (LOCK-prefixed) - * Enabled by default. - */ -#define ATOMIC_STAT_UPDATES 1 - -#if defined(ATOMIC_STAT_UPDATES) -#define LOCK_IF_ATOMIC_STAT_UPDATES lock -#else -#define LOCK_IF_ATOMIC_STAT_UPDATES -#endif /* ATOMIC_STAT_UPDATES */ /* * lck_mtx_lock() * lck_mtx_try_lock() - * lck_mutex_unlock() + * lck_mtx_unlock() * lck_mtx_lock_spin() * lck_mtx_convert_spin() - * - * These are variants of mutex_lock(), mutex_try(), mutex_unlock() - * mutex_lock_spin and mutex_convert_spin without - * DEBUG checks (which require fields not present in lck_mtx_t's). */ - + NONLEAF_ENTRY(lck_mtx_lock_spin) + LOAD_LMTX_REG(B_ARG0) /* fetch lock pointer */ - movl B_ARG0,%edx /* fetch lock pointer */ - pushf /* save interrupt state */ - - CHECK_NO_SIMPLELOCKS() CHECK_PREEMPTION_LEVEL() - movl M_ILK,%eax /* read interlock */ - testl %eax,%eax /* unlocked? */ - jne Llmls_eval_ilk /* no, go see if indirect */ -Llmls_retry: - cli /* disable interrupts */ - movl %gs:CPU_ACTIVE_THREAD,%ecx + mov M_STATE(LMTX_REG), LMTX_C_REG32 + test $(M_ILOCKED_MSK), LMTX_C_REG /* is the interlock held */ + je Llmls_enter /* no - can't be INDIRECT or DESTROYED */ - /* eax == 0 at this point */ - lock; cmpxchgl %ecx,M_ILK /* atomic compare and exchange */ - jne Llmls_ilk_fail /* branch on failure to spin loop */ + mov M_OWNER(LMTX_REG), LMTX_A_REG + cmp $(MUTEX_DESTROYED), LMTX_A_REG /* check to see if its marked destroyed */ + je lck_mtx_destroyed + cmp $(MUTEX_IND), LMTX_A_REG /* Is this an indirect mutex */ + jne Llmls_loop - movl M_LOCKED,%ecx /* get lock owner */ - testl %ecx,%ecx /* is the mutex locked? */ - jne Llml_fail /* yes, fall back to a normal mutex */ + LMTX_ENTER_EXTENDED -Llmls_acquire: - movl $(MUTEX_LOCKED_AS_SPIN),M_LOCKED /* indicate ownership as a spin lock */ - PREEMPTION_DISABLE - popf /* restore interrupt state */ - NONLEAF_RET /* return with the interlock held */ - -Llmls_ilk_fail: - popf /* restore interrupt state */ - pushf /* resave interrupt state on stack */ + mov M_STATE(LMTX_REG), LMTX_C_REG32 + test $(M_SPIN_MSK), LMTX_C_REG + je Llmls_loop -Llmls_ilk_loop: + LMTX_UPDATE_MISS +Llmls_loop: PAUSE - movl M_ILK,%eax /* read interlock */ - testl %eax,%eax /* unlocked? */ - je Llmls_retry /* yes - go try to grab it */ - - cmpl $(MUTEX_DESTROYED),%eax /* check to see if its marked destroyed */ - jne Llmls_ilk_loop /* no - keep spinning */ - - pushl %edx - call EXT(lck_mtx_interlock_panic) - /* - * shouldn't return from here, but just in case - */ - popl %edx - jmp Llmls_ilk_loop - - -Llmls_eval_ilk: - cmpl $(MUTEX_IND),M_ITAG /* Is this an indirect mutex? */ - cmove M_PTR,%edx /* If so, take indirection */ - jne Llmls_ilk_loop /* If not, go to spin loop */ - -Llmls_lck_ext: - pushl %esi /* Used to hold the lock group ptr */ - pushl %edi /* Used for stat update records */ - movl MUTEX_GRP(%edx),%esi /* Load lock group */ - xorl %edi,%edi /* Clear stat update records */ - /* 64-bit increment of acquire attempt statistic (per-group) */ - LOCK_IF_ATOMIC_STAT_UPDATES - addl $1, GRP_MTX_STAT_UTIL(%esi) - jnc 1f - incl GRP_MTX_STAT_UTIL+4(%esi) -1: - movl M_ILK,%eax /* read interlock */ - testl %eax,%eax /* unlocked? */ - jne Llmls_ext_ilk_loop /* no, go to spin loop */ -Llmls_ext_retry: - cli /* disable interrupts */ - movl %gs:CPU_ACTIVE_THREAD,%ecx - - /* eax == 0 at this point */ - lock; cmpxchgl %ecx,M_ILK /* atomic compare and exchange */ - jne Llmls_ext_ilk_fail /* branch on failure to retry */ - - movl M_LOCKED,%ecx /* get lock owner */ - testl %ecx,%ecx /* is the mutex locked? */ - jne Llml_ext_fail /* yes, we lose */ - - popl %edi - popl %esi - jmp Llmls_acquire - -Llmls_ext_ilk_fail: - /* - * Slow path: call out to do the spinning. - */ - movl 8(%esp),%ecx - pushl %ecx - popf /* restore interrupt state */ - -Llmls_ext_ilk_loop: - PAUSE - movl M_ILK,%eax /* read interlock */ - testl %eax,%eax /* unlocked? */ - je Llmls_ext_retry /* yes - go try to grab it */ - - cmpl $(MUTEX_DESTROYED),%eax /* check to see if its marked destroyed */ - jne Llmls_ext_ilk_loop /* no - keep spinning */ - - pushl %edx - call EXT(lck_mtx_interlock_panic) - /* - * shouldn't return from here, but just in case - */ - popl %edx - jmp Llmls_ext_ilk_loop /* no - keep spinning */ - - - -NONLEAF_ENTRY(lck_mtx_lock) - - movl B_ARG0,%edx /* fetch lock pointer */ - pushf /* save interrupt state */ + mov M_STATE(LMTX_REG), LMTX_C_REG32 + + test $(M_ILOCKED_MSK), LMTX_C_REG /* is the interlock held */ + jne Llmls_loop +Llmls_enter: + test $(M_MLOCKED_MSK), LMTX_C_REG /* is the mutex locked */ + jne Llml_contended /* fall back to normal mutex handling */ + + PUSHF /* save interrupt state */ + mov LMTX_C_REG, LMTX_A_REG /* eax contains snapshot for cmpxchgl */ + or $(M_ILOCKED_MSK | M_SPIN_MSK), LMTX_C_REG + CLI /* disable interrupts */ + lock + cmpxchg LMTX_C_REG32, M_STATE(LMTX_REG) /* atomic compare and exchange */ + jne 1f - CHECK_NO_SIMPLELOCKS() - CHECK_PREEMPTION_LEVEL() + mov %gs:CPU_ACTIVE_THREAD, LMTX_A_REG + mov LMTX_A_REG, M_OWNER(LMTX_REG) /* record owner of interlock */ - movl M_ILK,%eax /* read interlock */ - testl %eax,%eax /* unlocked? */ - jne Llml_eval_ilk /* no, go see if indirect */ -Llml_retry: - cli /* disable interrupts */ - movl %gs:CPU_ACTIVE_THREAD,%ecx - - /* eax == 0 at this point */ - lock; cmpxchgl %ecx,M_ILK /* atomic compare and exchange */ - jne Llml_ilk_fail /* branch on failure to spin loop */ - - movl M_LOCKED,%ecx /* get lock owner */ - testl %ecx,%ecx /* is the mutex locked? */ - jne Llml_fail /* yes, we lose */ -Llml_acquire: - movl %gs:CPU_ACTIVE_THREAD,%ecx - movl %ecx,M_LOCKED - - cmpw $0,M_WAITERS /* are there any waiters? */ - jne Lml_waiters /* yes, more work to do */ -Llml_return: - xorl %eax,%eax - movl %eax,M_ILK + PREEMPTION_DISABLE + POPF /* restore interrupt state */ - popf /* restore interrupt state */ + LMTX_CHK_EXTENDED_EXIT + /* return with the interlock held and preemption disabled */ leave #if CONFIG_DTRACE - LOCKSTAT_LABEL(_lck_mtx_lock_lockstat_patch_point) + LOCKSTAT_LABEL(_lck_mtx_lock_spin_lockstat_patch_point) ret - /* inherit lock pointer in %edx above */ - LOCKSTAT_RECORD(LS_LCK_MTX_LOCK_ACQUIRE, %edx) + /* inherit lock pointer in LMTX_REG above */ + LOCKSTAT_RECORD(LS_LCK_MTX_LOCK_SPIN_ACQUIRE, LMTX_REG) #endif ret -Llml_waiters: - pushl %edx /* save mutex address */ - pushl %edx - call EXT(lck_mtx_lock_acquire) - addl $4,%esp - popl %edx /* restore mutex address */ - jmp Llml_return +1: + POPF /* restore interrupt state */ + jmp Llmls_loop -Llml_restart: -Llml_ilk_fail: - popf /* restore interrupt state */ - pushf /* resave interrupt state on stack */ -Llml_ilk_loop: - PAUSE - movl M_ILK,%eax /* read interlock */ - testl %eax,%eax /* unlocked? */ - je Llml_retry /* yes - go try to grab it */ + +NONLEAF_ENTRY(lck_mtx_lock) + LOAD_LMTX_REG(B_ARG0) /* fetch lock pointer */ - cmpl $(MUTEX_DESTROYED),%eax /* check to see if its marked destroyed */ - jne Llml_ilk_loop /* no - keep spinning */ + CHECK_PREEMPTION_LEVEL() - pushl %edx - call EXT(lck_mtx_interlock_panic) - /* - * shouldn't return from here, but just in case - */ - popl %edx - jmp Llml_ilk_loop /* no - keep spinning */ + mov M_STATE(LMTX_REG), LMTX_C_REG32 + test $(M_ILOCKED_MSK), LMTX_C_REG /* is the interlock held */ + je Llml_enter /* no - can't be INDIRECT or DESTROYED */ -Llml_fail: - /* - * Check if the owner is on another processor and therefore - * we should try to spin before blocking. - */ - testl $(OnProc),ACT_SPF(%ecx) - jz Llml_block + mov M_OWNER(LMTX_REG), LMTX_A_REG + cmp $(MUTEX_DESTROYED), LMTX_A_REG /* check to see if its marked destroyed */ + je lck_mtx_destroyed + cmp $(MUTEX_IND), LMTX_A_REG /* Is this an indirect mutex? */ + jne Llml_loop - /* - * Here if owner is on another processor: - * - release the interlock - * - spin on the holder until release or timeout - * - in either case re-acquire the interlock - * - if released, acquire it - * - otherwise drop thru to block. - */ - xorl %eax,%eax - movl %eax,M_ILK /* zero interlock */ - popf - pushf /* restore interrupt state */ - pushl %edx /* save mutex address */ - pushl %edx - call EXT(lck_mtx_lock_spinwait) - addl $4,%esp - popl %edx /* restore mutex address */ - - /* Re-acquire interlock */ - movl M_ILK,%eax /* read interlock */ - testl %eax,%eax /* unlocked? */ - jne Llml_ilk_refail /* no, go to spin loop */ -Llml_reget_retry: - cli /* disable interrupts */ - movl %gs:CPU_ACTIVE_THREAD,%ecx - - /* eax == 0 at this point */ - lock; cmpxchgl %ecx,M_ILK /* atomic compare and exchange */ - jne Llml_ilk_refail /* branch on failure to retry */ - - movl M_LOCKED,%ecx /* get lock owner */ - testl %ecx,%ecx /* is the mutex free? */ - je Llml_acquire /* yes, acquire */ - -Llml_block: - CHECK_MYLOCK(M_THREAD) - pushl %edx /* save mutex address */ - pushl M_LOCKED - pushl %edx /* push mutex address */ - /* - * N.B.: lck_mtx_lock_wait is called here with interrupts disabled - * Consider reworking. - */ - call EXT(lck_mtx_lock_wait) /* wait for the lock */ - addl $8,%esp - popl %edx /* restore mutex address */ - jmp Llml_restart /* and start over */ + LMTX_ENTER_EXTENDED -Llml_ilk_refail: - popf /* restore interrupt state */ - pushf /* resave interrupt state on stack */ + mov M_STATE(LMTX_REG), LMTX_C_REG32 + test $(M_SPIN_MSK), LMTX_C_REG + je Llml_loop -Llml_ilk_reloop: + LMTX_UPDATE_MISS +Llml_loop: PAUSE - movl M_ILK,%eax /* read interlock */ - testl %eax,%eax /* unlocked? */ - je Llml_reget_retry /* yes - go try to grab it */ - - cmpl $(MUTEX_DESTROYED),%eax /* check to see if its marked destroyed */ - jne Llml_ilk_reloop /* no - keep spinning */ + mov M_STATE(LMTX_REG), LMTX_C_REG32 - pushl %edx - call EXT(lck_mtx_interlock_panic) - /* - * shouldn't return from here, but just in case - */ - popl %edx - jmp Llml_ilk_reloop /* no - keep spinning */ + test $(M_ILOCKED_MSK), LMTX_C_REG + jne Llml_loop +Llml_enter: + test $(M_MLOCKED_MSK), LMTX_C_REG + jne Llml_contended /* mutex owned by someone else, go contend for it */ + mov LMTX_C_REG, LMTX_A_REG /* eax contains snapshot for cmpxchgl */ + or $(M_MLOCKED_MSK), LMTX_C_REG + lock + cmpxchg LMTX_C_REG32, M_STATE(LMTX_REG) /* atomic compare and exchange */ + jne Llml_loop -Llml_eval_ilk: - cmpl $(MUTEX_IND),M_ITAG /* Is this an indirect mutex? */ - cmove M_PTR,%edx /* If so, take indirection */ - jne Llml_ilk_loop /* If not, go to spin loop */ + mov %gs:CPU_ACTIVE_THREAD, LMTX_A_REG + mov LMTX_A_REG, M_OWNER(LMTX_REG) /* record owner of mutex */ -/* - * Entry into statistics codepath for lck_mtx_lock: - * EDX: real lock pointer - * first dword on stack contains flags - */ +Llml_acquired: + testl $(M_WAITERS_MSK), M_STATE(LMTX_REG) + je 1f -/* Enable this preprocessor define to record the first miss alone - * By default, we count every miss, hence multiple misses may be - * recorded for a single lock acquire attempt via lck_mtx_lock - */ -#undef LOG_FIRST_MISS_ALONE - -/* - * N.B.: On x86, statistics are currently recorded for all indirect mutexes. - * Also, only the acquire attempt count (GRP_MTX_STAT_UTIL) is maintained - * as a 64-bit quantity (this matches the existing PowerPC implementation, - * and the new x86 specific statistics are also maintained as 32-bit - * quantities). - */ - -Llml_lck_ext: - pushl %esi /* Used to hold the lock group ptr */ - pushl %edi /* Used for stat update records */ - movl MUTEX_GRP(%edx),%esi /* Load lock group */ - xorl %edi,%edi /* Clear stat update records */ - /* 64-bit increment of acquire attempt statistic (per-group) */ - LOCK_IF_ATOMIC_STAT_UPDATES - addl $1, GRP_MTX_STAT_UTIL(%esi) - jnc 1f - incl GRP_MTX_STAT_UTIL+4(%esi) -1: - movl M_ILK,%eax /* read interlock */ - testl %eax,%eax /* unlocked? */ - jne Llml_ext_ilk_loop /* no, go to spin loop */ -Llml_ext_get_hw: - cli - movl %gs:CPU_ACTIVE_THREAD,%ecx - - /* eax == 0 at this point */ - lock; cmpxchgl %ecx,M_ILK /* atomic compare and exchange */ - jne Llml_ext_ilk_fail /* branch on failure to retry */ - - movl M_LOCKED,%ecx /* get lock owner */ - testl %ecx,%ecx /* is the mutex locked? */ - jne Llml_ext_fail /* yes, we lose */ - -Llml_ext_acquire: - movl %gs:CPU_ACTIVE_THREAD,%ecx - movl %ecx,M_LOCKED - - cmpw $0,M_WAITERS /* are there any waiters? */ - jne Llml_ext_waiters /* yes, more work to do */ -Llml_ext_return: - xorl %eax,%eax - movl %eax,M_ILK + LMTX_CALLEXT1(lck_mtx_lock_acquire_x86) +1: + LMTX_CHK_EXTENDED /* is this an extended mutex */ + jne 2f - popl %edi - popl %esi - popf /* restore interrupt state */ + leave +#if CONFIG_DTRACE + LOCKSTAT_LABEL(_lck_mtx_lock_lockstat_patch_point) + ret + /* inherit lock pointer in LMTX_REG above */ + LOCKSTAT_RECORD(LS_LCK_MTX_LOCK_ACQUIRE, LMTX_REG) +#endif + ret +2: + LMTX_EXIT_EXTENDED leave #if CONFIG_DTRACE LOCKSTAT_LABEL(_lck_mtx_lock_ext_lockstat_patch_point) ret - /* inherit lock pointer in %edx above */ - LOCKSTAT_RECORD(LS_LCK_MTX_EXT_LOCK_ACQUIRE, %edx) + /* inherit lock pointer in LMTX_REG above */ + LOCKSTAT_RECORD(LS_LCK_MTX_EXT_LOCK_ACQUIRE, LMTX_REG) #endif ret - -Llml_ext_waiters: - pushl %edx /* save mutex address */ - pushl %edx - call EXT(lck_mtx_lock_acquire) - addl $4,%esp - popl %edx /* restore mutex address */ - jmp Llml_ext_return - -Llml_ext_restart: -Llml_ext_ilk_fail: - movl 8(%esp),%ecx - pushl %ecx - popf /* restore interrupt state */ - -Llml_ext_ilk_loop: - PAUSE - movl M_ILK,%eax /* read interlock */ - testl %eax,%eax /* unlocked? */ - je Llml_ext_get_hw /* yes - go try to grab it */ - - cmpl $(MUTEX_DESTROYED),%eax /* check to see if its marked destroyed */ - jne Llml_ext_ilk_loop /* no - keep spinning */ - - pushl %edx - call EXT(lck_mtx_interlock_panic) - /* - * shouldn't return from here, but just in case - */ - popl %edx - jmp Llml_ext_ilk_loop - - -Llml_ext_fail: -#ifdef LOG_FIRST_MISS_ALONE - testl $1, %edi - jnz 1f -#endif /* LOG_FIRST_MISS_ALONE */ - /* Record that a lock acquire attempt missed (per-group statistic) */ - LOCK_IF_ATOMIC_STAT_UPDATES - incl GRP_MTX_STAT_MISS(%esi) -#ifdef LOG_FIRST_MISS_ALONE - orl $1, %edi -#endif /* LOG_FIRST_MISS_ALONE */ -1: - /* - * Check if the owner is on another processor and therefore - * we should try to spin before blocking. - */ - testl $(OnProc),ACT_SPF(%ecx) - jnz 2f - /* - * Record the "direct wait" statistic, which indicates if a - * miss proceeded to block directly without spinning--occurs - * if the owner of the mutex isn't running on another processor - * at the time of the check. - */ - LOCK_IF_ATOMIC_STAT_UPDATES - incl GRP_MTX_STAT_DIRECT_WAIT(%esi) - jmp Llml_ext_block -2: - /* - * Here if owner is on another processor: - * - release the interlock - * - spin on the holder until release or timeout - * - in either case re-acquire the interlock - * - if released, acquire it - * - otherwise drop thru to block. - */ - xorl %eax,%eax - movl %eax,M_ILK /* zero interlock */ - - pushl 8(%esp) /* Make another copy of EFLAGS image */ - popf /* Restore interrupt state */ - pushl %edx /* save mutex address */ - pushl %edx - call EXT(lck_mtx_lock_spinwait) - addl $4,%esp - popl %edx /* restore mutex address */ - - /* Re-acquire interlock */ - movl M_ILK,%eax /* read interlock */ - testl %eax,%eax /* unlocked? */ - jne Llml_ext_ilk_refail /* no, go to spin loop */ -Llml_ext_reget_retry: - cli /* disable interrupts */ - movl %gs:CPU_ACTIVE_THREAD,%ecx - - /* eax == 0 at this point */ - lock; cmpxchgl %ecx,M_ILK /* atomic compare and exchange */ - jne Llml_ext_ilk_refail /* branch on failure to spin loop */ - - movl M_LOCKED,%ecx /* get lock owner */ - testl %ecx,%ecx /* is the mutex free? */ - je Llml_ext_acquire /* yes, acquire */ - -Llml_ext_block: - /* If we wanted to count waits just once per lock acquire, we'd - * skip over the stat update here - */ - LOCK_IF_ATOMIC_STAT_UPDATES - /* Record that a lock miss proceeded to block */ - incl GRP_MTX_STAT_WAIT(%esi) -1: - CHECK_MYLOCK(M_THREAD) - pushl %edx /* save mutex address */ - pushl M_LOCKED - pushl %edx /* push mutex address */ - /* - * N.B.: lck_mtx_lock_wait is called here with interrupts disabled - * Consider reworking. - */ - call EXT(lck_mtx_lock_wait) /* wait for the lock */ - addl $8,%esp - popl %edx /* restore mutex address */ - jmp Llml_ext_restart /* and start over */ - -Llml_ext_ilk_refail: - movl 8(%esp),%ecx - pushl %ecx - popf /* restore interrupt state */ -Llml_ext_ilk_reloop: - PAUSE - movl M_ILK,%eax /* read interlock */ - testl %eax,%eax /* unlocked? */ - je Llml_ext_reget_retry /* yes - go try to grab it */ - cmpl $(MUTEX_DESTROYED),%eax /* check to see if its marked destroyed */ - jne Llml_ext_ilk_reloop /* no - keep spinning */ +Llml_contended: + LMTX_CHK_EXTENDED /* is this an extended mutex */ + je 0f + LMTX_UPDATE_MISS +0: + LMTX_CALLEXT1(lck_mtx_lock_spinwait_x86) + + test LMTX_RET_REG, LMTX_RET_REG + je Llml_acquired /* acquired mutex */ + cmp $1, LMTX_RET_REG /* check for direct wait status */ + je 2f + LMTX_CHK_EXTENDED /* is this an extended mutex */ + je 2f + LMTX_UPDATE_DIRECT_WAIT +2: + mov M_STATE(LMTX_REG), LMTX_C_REG32 + test $(M_ILOCKED_MSK), LMTX_C_REG + jne 6f + + PUSHF /* save state of interrupt mask */ + mov LMTX_C_REG, LMTX_A_REG /* eax contains snapshot for cmpxchgl */ + or $(M_ILOCKED_MSK), LMTX_C_REG /* try to take the interlock */ + CLI /* disable interrupts */ + lock + cmpxchg LMTX_C_REG32, M_STATE(LMTX_REG) /* atomic compare and exchange */ + jne 5f - pushl %edx - call EXT(lck_mtx_interlock_panic) - /* - * shouldn't return from here, but just in case - */ - popl %edx - jmp Llml_ext_ilk_reloop + test $(M_MLOCKED_MSK), LMTX_C_REG /* we've got the interlock and */ + jne 3f + or $(M_MLOCKED_MSK), LMTX_C_REG /* the mutex is free... grab it directly */ + and $(~M_ILOCKED_MSK), LMTX_C_REG + + mov %gs:CPU_ACTIVE_THREAD, LMTX_A_REG + mov LMTX_A_REG, M_OWNER(LMTX_REG) /* record owner of mutex */ + mov LMTX_C_REG32, M_STATE(LMTX_REG) /* now drop the interlock */ + POPF /* restore interrupt state */ + jmp Llml_acquired +3: /* interlock held, mutex busy */ + PREEMPTION_DISABLE + POPF /* restore interrupt state */ + + LMTX_CHK_EXTENDED /* is this an extended mutex */ + je 4f + LMTX_UPDATE_WAIT +4: + LMTX_CALLEXT1(lck_mtx_lock_wait_x86) + jmp Llml_contended +5: + POPF /* restore interrupt state */ +6: + PAUSE + jmp 2b + NONLEAF_ENTRY(lck_mtx_try_lock_spin) + LOAD_LMTX_REG(B_ARG0) /* fetch lock pointer */ - movl B_ARG0,%edx /* fetch lock pointer */ - pushf /* save interrupt state */ + mov M_STATE(LMTX_REG), LMTX_C_REG32 + test $(M_ILOCKED_MSK), LMTX_C_REG /* is the interlock held */ + je Llmts_enter /* no - can't be INDIRECT or DESTROYED */ - CHECK_NO_SIMPLELOCKS() - CHECK_PREEMPTION_LEVEL() + mov M_OWNER(LMTX_REG), LMTX_A_REG + cmp $(MUTEX_DESTROYED), LMTX_A_REG /* check to see if its marked destroyed */ + je lck_mtx_destroyed + cmp $(MUTEX_IND), LMTX_A_REG /* Is this an indirect mutex? */ + jne Llmts_enter - movl M_ILK,%eax /* read interlock */ - testl %eax,%eax /* unlocked? */ - jne Llmts_eval_ilk /* no, go see if indirect */ -Llmts_retry: - cli /* disable interrupts */ - movl %gs:CPU_ACTIVE_THREAD,%ecx - - /* eax == 0 at this point */ - lock; cmpxchgl %ecx,M_ILK /* atomic compare and exchange */ - jne Llmts_ilk_fail /* branch on failure to retry */ + LMTX_ENTER_EXTENDED +Llmts_loop: + PAUSE + mov M_STATE(LMTX_REG), LMTX_C_REG32 +Llmts_enter: + test $(M_MLOCKED_MSK | M_SPIN_MSK), LMTX_C_REG + jne Llmts_fail + test $(M_ILOCKED_MSK), LMTX_C_REG + jne Llmts_loop + + PUSHF /* save interrupt state */ + mov LMTX_C_REG, LMTX_A_REG /* eax contains snapshot for cmpxchgl */ + or $(M_ILOCKED_MSK | M_SPIN_MSK), LMTX_C_REG + CLI /* disable interrupts */ + lock + cmpxchg LMTX_C_REG32, M_STATE(LMTX_REG) /* atomic compare and exchange */ + jne 3f - movl M_LOCKED,%ecx /* get lock owner */ - testl %ecx,%ecx /* is the mutex locked? */ - jne Llmt_fail /* yes, we lose */ + mov %gs:CPU_ACTIVE_THREAD, LMTX_A_REG + mov LMTX_A_REG, M_OWNER(LMTX_REG) /* record owner of mutex */ - movl $(MUTEX_LOCKED_AS_SPIN),M_LOCKED /* no, indicate ownership as a spin lock */ - PREEMPTION_DISABLE /* and return with interlock held */ + PREEMPTION_DISABLE + POPF /* restore interrupt state */ - movl $1,%eax /* return success */ - popf /* restore interrupt state */ + LMTX_CHK_EXTENDED_EXIT leave + #if CONFIG_DTRACE + mov $1, LMTX_RET_REG /* return success */ LOCKSTAT_LABEL(_lck_mtx_try_lock_spin_lockstat_patch_point) ret - /* inherit lock pointer in %edx above */ - LOCKSTAT_RECORD(LS_LCK_MTX_TRY_SPIN_LOCK_ACQUIRE, %edx) - movl $1,%eax /* return success */ + /* inherit lock pointer in LMTX_REG above */ + LOCKSTAT_RECORD(LS_LCK_MTX_TRY_SPIN_LOCK_ACQUIRE, LMTX_REG) #endif + mov $1, LMTX_RET_REG /* return success */ ret +3: + POPF /* restore interrupt state */ + jmp Llmts_loop -Llmts_ilk_fail: - popf /* restore interrupt state */ - pushf /* resave interrupt state */ -Llmts_ilk_loop: - PAUSE - /* - * need to do this check outside of the interlock in - * case this lock is held as a simple lock which means - * we won't be able to take the interlock - */ - movl M_LOCKED,%eax /* get lock owner */ - testl %eax,%eax /* is the mutex locked? */ - jne Llmt_fail_no_ilk /* yes, go return failure */ - - movl M_ILK,%eax /* read interlock */ - testl %eax,%eax /* unlocked? */ - je Llmts_retry /* yes - go try to grab it */ - - cmpl $(MUTEX_DESTROYED),%eax /* check to see if its marked destroyed */ - jne Llmts_ilk_loop /* no - keep spinning */ - - pushl %edx - call EXT(lck_mtx_interlock_panic) - /* - * shouldn't return from here, but just in case - */ - popl %edx - jmp Llmts_ilk_loop - -Llmts_eval_ilk: - cmpl $(MUTEX_IND),M_ITAG /* Is this an indirect mutex? */ - cmove M_PTR,%edx /* If so, take indirection */ - jne Llmts_ilk_loop /* If not, go to spin loop */ - - /* - * bump counter on indirect lock - */ - pushl %esi /* Used to hold the lock group ptr */ - movl MUTEX_GRP(%edx),%esi /* Load lock group */ - /* 64-bit increment of acquire attempt statistic (per-group) */ - LOCK_IF_ATOMIC_STAT_UPDATES - addl $1, GRP_MTX_STAT_UTIL(%esi) - jnc 1f - incl GRP_MTX_STAT_UTIL+4(%esi) -1: - popl %esi - jmp Llmts_ilk_loop - - NONLEAF_ENTRY(lck_mtx_try_lock) + LOAD_LMTX_REG(B_ARG0) /* fetch lock pointer */ - movl B_ARG0,%edx /* fetch lock pointer */ - pushf /* save interrupt state */ + mov M_STATE(LMTX_REG), LMTX_C_REG32 + test $(M_ILOCKED_MSK), LMTX_C_REG /* is the interlock held */ + je Llmt_enter /* no - can't be INDIRECT or DESTROYED */ - CHECK_NO_SIMPLELOCKS() - CHECK_PREEMPTION_LEVEL() + mov M_OWNER(LMTX_REG), LMTX_A_REG + cmp $(MUTEX_DESTROYED), LMTX_A_REG /* check to see if its marked destroyed */ + je lck_mtx_destroyed + cmp $(MUTEX_IND), LMTX_A_REG /* Is this an indirect mutex? */ + jne Llmt_enter - movl M_ILK,%eax /* read interlock */ - testl %eax,%eax /* unlocked? */ - jne Llmt_eval_ilk /* no, go see if indirect */ -Llmt_retry: - cli /* disable interrupts */ - movl %gs:CPU_ACTIVE_THREAD,%ecx - - /* eax == 0 at this point */ - lock; cmpxchgl %ecx,M_ILK /* atomic compare and exchange */ - jne Llmt_ilk_fail /* branch on failure to retry */ - - movl M_LOCKED,%ecx /* get lock owner */ - testl %ecx,%ecx /* is the mutex locked? */ - jne Llmt_fail /* yes, we lose */ -Llmt_acquire: - movl %gs:CPU_ACTIVE_THREAD,%ecx - movl %ecx,M_LOCKED - - cmpw $0,M_WAITERS /* are there any waiters? */ - jne Llmt_waiters /* yes, more work to do */ -Llmt_return: - xorl %eax,%eax - movl %eax,M_ILK + LMTX_ENTER_EXTENDED +Llmt_loop: + PAUSE + mov M_STATE(LMTX_REG), LMTX_C_REG32 +Llmt_enter: + test $(M_MLOCKED_MSK | M_SPIN_MSK), LMTX_C_REG + jne Llmt_fail + test $(M_ILOCKED_MSK), LMTX_C_REG + jne Llmt_loop + + mov LMTX_C_REG, LMTX_A_REG /* eax contains snapshot for cmpxchgl */ + or $(M_MLOCKED_MSK), LMTX_C_REG + lock + cmpxchg LMTX_C_REG32, M_STATE(LMTX_REG) /* atomic compare and exchange */ + jne Llmt_loop - popf /* restore interrupt state */ + mov %gs:CPU_ACTIVE_THREAD, LMTX_A_REG + mov LMTX_A_REG, M_OWNER(LMTX_REG) /* record owner of mutex */ - movl $1,%eax /* return success */ + LMTX_CHK_EXTENDED_EXIT + + test $(M_WAITERS_MSK), LMTX_C_REG + je 2f + LMTX_CALLEXT1(lck_mtx_lock_acquire_x86) +2: leave + #if CONFIG_DTRACE + mov $1, LMTX_RET_REG /* return success */ /* Dtrace probe: LS_LCK_MTX_TRY_LOCK_ACQUIRE */ LOCKSTAT_LABEL(_lck_mtx_try_lock_lockstat_patch_point) ret - /* inherit lock pointer in %edx from above */ - LOCKSTAT_RECORD(LS_LCK_MTX_TRY_LOCK_ACQUIRE, %edx) - movl $1,%eax /* return success */ -#endif + /* inherit lock pointer in LMTX_REG from above */ + LOCKSTAT_RECORD(LS_LCK_MTX_TRY_LOCK_ACQUIRE, LMTX_REG) +#endif + mov $1, LMTX_RET_REG /* return success */ ret -Llmt_waiters: - pushl %edx /* save mutex address */ - pushl %edx - call EXT(lck_mtx_lock_acquire) - addl $4,%esp - popl %edx /* restore mutex address */ - jmp Llmt_return - -Llmt_ilk_fail: - popf /* restore interrupt state */ - pushf /* resave interrupt state */ - -Llmt_ilk_loop: - PAUSE - /* - * need to do this check outside of the interlock in - * case this lock is held as a simple lock which means - * we won't be able to take the interlock - */ - movl M_LOCKED,%eax /* get lock owner */ - testl %eax,%eax /* is the mutex locked? */ - jne Llmt_fail_no_ilk /* yes, go return failure */ - - movl M_ILK,%eax /* read interlock */ - testl %eax,%eax /* unlocked? */ - je Llmt_retry /* yes - go try to grab it */ - - cmpl $(MUTEX_DESTROYED),%eax /* check to see if its marked destroyed */ - jne Llmt_ilk_loop /* no - keep spinning */ - - pushl %edx - call EXT(lck_mtx_interlock_panic) - /* - * shouldn't return from here, but just in case - */ - popl %edx - jmp Llmt_ilk_loop Llmt_fail: - xorl %eax,%eax /* Zero interlock value */ - movl %eax,M_ILK - -Llmt_fail_no_ilk: - popf /* restore interrupt state */ - - cmpl %edx,B_ARG0 - jne Llmt_fail_indirect - - xorl %eax,%eax - /* Note that we don't record a dtrace event for trying and missing */ - NONLEAF_RET - -Llmt_fail_indirect: - pushl %esi /* Used to hold the lock group ptr */ - movl MUTEX_GRP(%edx),%esi /* Load lock group */ - - /* Record mutex acquire attempt miss statistic */ - LOCK_IF_ATOMIC_STAT_UPDATES - incl GRP_MTX_STAT_MISS(%esi) - - popl %esi - xorl %eax,%eax +Llmts_fail: + LMTX_CHK_EXTENDED /* is this an extended mutex */ + je 0f + LMTX_UPDATE_MISS + LMTX_EXIT_EXTENDED +0: + xor LMTX_RET_REG, LMTX_RET_REG NONLEAF_RET -Llmt_eval_ilk: - cmpl $(MUTEX_IND),M_ITAG /* Is this an indirect mutex? */ - cmove M_PTR,%edx /* If so, take indirection */ - jne Llmt_ilk_loop /* If not, go to spin loop */ - - /* - * bump counter for indirect lock - */ - pushl %esi /* Used to hold the lock group ptr */ - movl MUTEX_GRP(%edx),%esi /* Load lock group */ - - /* 64-bit increment of acquire attempt statistic (per-group) */ - LOCK_IF_ATOMIC_STAT_UPDATES - addl $1, GRP_MTX_STAT_UTIL(%esi) - jnc 1f - incl GRP_MTX_STAT_UTIL+4(%esi) -1: - pop %esi - jmp Llmt_ilk_loop +NONLEAF_ENTRY(lck_mtx_convert_spin) + LOAD_LMTX_REG(B_ARG0) /* fetch lock pointer */ -LEAF_ENTRY(lck_mtx_convert_spin) - movl L_ARG0,%edx /* fetch lock pointer */ + mov M_OWNER(LMTX_REG), LMTX_A_REG + cmp $(MUTEX_IND), LMTX_A_REG /* Is this an indirect mutex? */ + cmove M_PTR(LMTX_REG), LMTX_REG /* If so, take indirection */ - cmpl $(MUTEX_IND),M_ITAG /* Is this an indirect mutex? */ - cmove M_PTR,%edx /* If so, take indirection */ - - movl M_LOCKED,%ecx /* is this the spin variant of the mutex */ - cmpl $(MUTEX_LOCKED_AS_SPIN),%ecx - jne Llmcs_exit /* already owned as a mutex, just return */ + mov M_STATE(LMTX_REG), LMTX_C_REG32 + test $(M_MLOCKED_MSK), LMTX_C_REG /* already owned as a mutex, just return */ + jne 2f +1: + and $(~(M_ILOCKED_MSK | M_SPIN_MSK)), LMTX_C_REG /* convert from spin version to mutex */ + or $(M_MLOCKED_MSK), LMTX_C_REG + mov LMTX_C_REG32, M_STATE(LMTX_REG) /* since I own the interlock, I don't need an atomic update */ - movl M_ILK,%ecx /* convert from spin version to mutex */ - movl %ecx,M_LOCKED /* take control of the mutex */ + PREEMPTION_ENABLE /* only %eax is consumed */ - cmpw $0,M_WAITERS /* are there any waiters? */ - jne Llmcs_waiters /* yes, more work to do */ + test $(M_WAITERS_MSK), LMTX_C_REG /* are there any waiters? */ + je 2f -Llmcs_return: - xorl %ecx,%ecx - movl %ecx,M_ILK /* clear interlock */ - PREEMPTION_ENABLE -Llmcs_exit: - LEAF_RET + LMTX_CALLEXT1(lck_mtx_lock_acquire_x86) +2: + NONLEAF_RET -Llmcs_waiters: - pushl %edx /* save mutex address */ - pushl %edx - call EXT(lck_mtx_lock_acquire) - addl $4,%esp - popl %edx /* restore mutex address */ - jmp Llmcs_return - - +#if defined(__i386__) NONLEAF_ENTRY(lck_mtx_unlock) + LOAD_LMTX_REG(B_ARG0) /* fetch lock pointer */ + mov M_OWNER(LMTX_REG), LMTX_A_REG + test LMTX_A_REG, LMTX_A_REG + jnz Llmu_prim + leave + ret +NONLEAF_ENTRY(lck_mtx_unlock_darwin10) +#else +NONLEAF_ENTRY(lck_mtx_unlock) +#endif + LOAD_LMTX_REG(B_ARG0) /* fetch lock pointer */ + mov M_OWNER(LMTX_REG), LMTX_A_REG +Llmu_prim: + cmp $(MUTEX_IND), LMTX_A_REG /* Is this an indirect mutex? */ + je Llmu_ext +0: + mov M_STATE(LMTX_REG), LMTX_C_REG32 + test $(M_MLOCKED_MSK), LMTX_C_REG /* check for full mutex */ + jne 1f - movl B_ARG0,%edx /* fetch lock pointer */ - - cmpl $(MUTEX_IND),M_ITAG /* Is this an indirect mutex? */ - cmove M_PTR,%edx /* If so, take indirection */ - - movl M_LOCKED,%ecx /* is this the spin variant of the mutex */ - cmpl $(MUTEX_LOCKED_AS_SPIN),%ecx - jne Llmu_enter /* no, go treat like a real mutex */ - - cmpw $0,M_WAITERS /* are there any waiters? */ - jne Llmus_wakeup /* yes, more work to do */ - -Llmu_drop_ilk: - xorl %eax,%eax - movl %eax,M_LOCKED /* clear spin indicator */ - movl %eax,M_ILK /* release the interlock */ + xor LMTX_A_REG, LMTX_A_REG + mov LMTX_A_REG, M_OWNER(LMTX_REG) + mov LMTX_C_REG, LMTX_A_REG /* keep original state in %ecx for later evaluation */ + and $(~(M_ILOCKED_MSK | M_SPIN_MSK | M_PROMOTED_MSK)), LMTX_A_REG + mov LMTX_A_REG32, M_STATE(LMTX_REG) /* since I own the interlock, I don't need an atomic update */ + + PREEMPTION_ENABLE /* need to re-enable preemption - clobbers eax */ + jmp 2f +1: + test $(M_ILOCKED_MSK), LMTX_C_REG /* have to wait for interlock to clear */ + jne 7f + + PUSHF /* save interrupt state */ + mov LMTX_C_REG, LMTX_A_REG /* eax contains snapshot for cmpxchgl */ + and $(~M_MLOCKED_MSK), LMTX_C_REG /* drop mutex */ + or $(M_ILOCKED_MSK), LMTX_C_REG /* pick up interlock */ + CLI + lock + cmpxchg LMTX_C_REG32, M_STATE(LMTX_REG) /* atomic compare and exchange */ + jne 6f /* branch on failure to spin loop */ + + xor LMTX_A_REG, LMTX_A_REG + mov LMTX_A_REG, M_OWNER(LMTX_REG) + mov LMTX_C_REG, LMTX_A_REG /* keep original state in %ecx for later evaluation */ + and $(~(M_ILOCKED_MSK | M_PROMOTED_MSK)), LMTX_A_REG + mov LMTX_A_REG32, M_STATE(LMTX_REG) /* since I own the interlock, I don't need an atomic update */ + POPF /* restore interrupt state */ +2: + test $(M_PROMOTED_MSK | M_WAITERS_MSK), LMTX_C_REG + je 3f + and $(M_PROMOTED_MSK), LMTX_C_REG + + LMTX_CALLEXT2(lck_mtx_unlock_wakeup_x86, LMTX_C_REG) +3: + LMTX_CHK_EXTENDED + jne 4f - PREEMPTION_ENABLE /* and re-enable preemption */ leave #if CONFIG_DTRACE /* Dtrace: LS_LCK_MTX_UNLOCK_RELEASE */ LOCKSTAT_LABEL(_lck_mtx_unlock_lockstat_patch_point) ret - /* inherit lock pointer in %edx from above */ - LOCKSTAT_RECORD(LS_LCK_MTX_UNLOCK_RELEASE, %edx) + /* inherit lock pointer in LMTX_REG from above */ + LOCKSTAT_RECORD(LS_LCK_MTX_UNLOCK_RELEASE, LMTX_REG) #endif ret - -Llmus_wakeup: - pushl %edx /* save mutex address */ - pushl %edx /* push mutex address */ - call EXT(lck_mtx_unlockspin_wakeup) /* yes, wake a thread */ - addl $4,%esp - popl %edx /* restore mutex pointer */ - jmp Llmu_drop_ilk - - -Llmu_enter: - pushf /* save interrupt state */ - - movl M_ILK,%eax /* read interlock */ - testl %eax,%eax /* unlocked? */ - jne Llmu_ilk_loop /* no - go to spin loop */ -Llmu_retry: - cli /* disable interrupts */ - movl %gs:CPU_ACTIVE_THREAD,%ecx - - /* eax == 0 at this point */ - lock; cmpxchgl %ecx,M_ILK /* atomic compare and exchange */ - jne Llmu_ilk_fail /* branch on failure to spin loop */ - - cmpw $0,M_WAITERS /* are there any waiters? */ - jne Llmu_wakeup /* yes, more work to do */ - -Llmu_doit: - xorl %ecx,%ecx - movl %ecx,M_LOCKED /* unlock the mutex */ - movl %ecx,M_ILK /* clear the interlock */ - - popf /* restore interrupt state */ +4: leave #if CONFIG_DTRACE - LOCKSTAT_LABEL(_lck_mtx_unlock2_lockstat_patch_point) + /* Dtrace: LS_LCK_MTX_EXT_UNLOCK_RELEASE */ + LOCKSTAT_LABEL(_lck_mtx_ext_unlock_lockstat_patch_point) ret - /* inherit lock pointer in %edx above */ - LOCKSTAT_RECORD(LS_LCK_MTX_UNLOCK_RELEASE, %edx) + /* inherit lock pointer in LMTX_REG from above */ + LOCKSTAT_RECORD(LS_LCK_MTX_EXT_UNLOCK_RELEASE, LMTX_REG) #endif ret +6: + POPF /* restore interrupt state */ +7: + PAUSE + mov M_STATE(LMTX_REG), LMTX_C_REG32 + jmp 1b +Llmu_ext: + mov M_PTR(LMTX_REG), LMTX_REG + mov M_OWNER(LMTX_REG), LMTX_A_REG + mov %gs:CPU_ACTIVE_THREAD, LMTX_C_REG + CHECK_UNLOCK(LMTX_C_REG, LMTX_A_REG) + jmp 0b + + +LEAF_ENTRY(lck_mtx_lock_decr_waiter) + LOAD_LMTX_REG(L_ARG0) /* fetch lock pointer - no indirection here */ +1: + mov M_STATE(LMTX_REG), LMTX_C_REG32 + + test $(M_WAITERS_MSK), LMTX_C_REG + je 2f + test $(M_ILOCKED_MSK), LMTX_C_REG /* have to wait for interlock to clear */ + jne 3f + + mov LMTX_C_REG, LMTX_A_REG /* eax contains snapshot for cmpxchgl */ + dec LMTX_C_REG /* decrement waiter count */ + lock + cmpxchg LMTX_C_REG32, M_STATE(LMTX_REG) /* atomic compare and exchange */ + jne 3f /* branch on failure to spin loop */ -Llmu_ilk_fail: - popf /* restore interrupt state */ - pushf /* resave interrupt state */ + mov $1, LMTX_RET_REG + LEAF_RET +2: + xor LMTX_RET_REG, LMTX_RET_REG + LEAF_RET +3: + PAUSE + jmp 1b -Llmu_ilk_loop: + + +LEAF_ENTRY(lck_mtx_lock_get_pri) + LOAD_LMTX_REG(L_ARG0) /* fetch lock pointer - no indirection here */ +1: + mov M_STATE(LMTX_REG), LMTX_C_REG32 + + test $(M_WAITERS_MSK), LMTX_C_REG + jne 2f + test $(M_ILOCKED_MSK), LMTX_C_REG /* have to wait for interlock to clear */ + jne 3f + + mov LMTX_C_REG, LMTX_A_REG /* eax contains snapshot for cmpxchgl */ + and $(~M_PRIORITY_MSK), LMTX_C_REG /* no waiters, reset mutex priority to 0 */ + lock + cmpxchg LMTX_C_REG32, M_STATE(LMTX_REG) /* atomic compare and exchange */ + jne 3f /* branch on failure to spin loop */ + + xor LMTX_RET_REG, LMTX_RET_REG /* return mutex priority == 0 */ + LEAF_RET +2: + mov LMTX_C_REG, LMTX_RET_REG + and $(M_PRIORITY_MSK), LMTX_RET_REG + shr $16, LMTX_RET_REG /* return current mutex priority */ + LEAF_RET +3: PAUSE - movl M_ILK,%eax /* read interlock */ - testl %eax,%eax /* unlocked? */ - je Llmu_retry /* yes - go try to grab it */ + jmp 1b + + - cmpl $(MUTEX_DESTROYED),%eax /* check to see if its marked destroyed */ - jne Llmu_ilk_loop /* no - keep spinning */ - pushl %edx - call EXT(lck_mtx_interlock_panic) - /* - * shouldn't return from here, but just in case - */ - popl %edx - jmp Llmu_ilk_loop +LEAF_ENTRY(lck_mtx_ilk_unlock) + LOAD_LMTX_REG(L_ARG0) /* fetch lock pointer - no indirection here */ -Llmu_wakeup: - pushl %edx /* save mutex address */ - pushl M_LOCKED - pushl %edx /* push mutex address */ - call EXT(lck_mtx_unlock_wakeup)/* yes, wake a thread */ - addl $8,%esp - popl %edx /* restore mutex pointer */ - xorl %ecx,%ecx - movl %ecx,M_LOCKED /* unlock the mutex */ + andl $(~M_ILOCKED_MSK), M_STATE(LMTX_REG) - movl %ecx,M_ILK + PREEMPTION_ENABLE /* need to re-enable preemption */ - popf /* restore interrupt state */ + LEAF_RET + - leave -#if CONFIG_DTRACE - /* Dtrace: LS_LCK_MTX_EXT_UNLOCK_RELEASE */ - LOCKSTAT_LABEL(_lck_mtx_ext_unlock_lockstat_patch_point) - ret - /* inherit lock pointer in %edx from above */ - LOCKSTAT_RECORD(LS_LCK_MTX_EXT_UNLOCK_RELEASE, %edx) -#endif - ret + +LEAF_ENTRY(lck_mtx_lock_grab_mutex) + LOAD_LMTX_REG(L_ARG0) /* fetch lock pointer - no indirection here */ + mov M_STATE(LMTX_REG), LMTX_C_REG32 -LEAF_ENTRY(lck_mtx_ilk_unlock) - movl L_ARG0,%edx /* no indirection here */ + test $(M_ILOCKED_MSK | M_MLOCKED_MSK), LMTX_C_REG /* can't have the mutex yet */ + jne 2f - xorl %eax,%eax - movl %eax,M_ILK + mov LMTX_C_REG, LMTX_A_REG /* eax contains snapshot for cmpxchgl */ + or $(M_MLOCKED_MSK), LMTX_C_REG + lock + cmpxchg LMTX_C_REG32, M_STATE(LMTX_REG) /* atomic compare and exchange */ + jne 2f /* branch on failure to spin loop */ + mov %gs:CPU_ACTIVE_THREAD, LMTX_A_REG + mov LMTX_A_REG, M_OWNER(LMTX_REG) /* record owner of mutex */ + + mov $1, LMTX_RET_REG /* return success */ + LEAF_RET +2: + xor LMTX_RET_REG, LMTX_RET_REG /* return failure */ LEAF_RET + + +LEAF_ENTRY(lck_mtx_lock_mark_promoted) + LOAD_LMTX_REG(L_ARG0) /* fetch lock pointer - no indirection here */ +1: + mov M_STATE(LMTX_REG), LMTX_C_REG32 + test $(M_PROMOTED_MSK), LMTX_C_REG + jne 3f + test $(M_ILOCKED_MSK), LMTX_C_REG /* have to wait for interlock to clear */ + jne 2f + + mov LMTX_C_REG, LMTX_A_REG /* eax contains snapshot for cmpxchgl */ + or $(M_PROMOTED_MSK), LMTX_C_REG + lock + cmpxchg LMTX_C_REG32, M_STATE(LMTX_REG) /* atomic compare and exchange */ + jne 2f /* branch on failure to spin loop */ + + mov $1, LMTX_RET_REG + LEAF_RET +2: + PAUSE + jmp 1b +3: + xor LMTX_RET_REG, LMTX_RET_REG + LEAF_RET + + + +LEAF_ENTRY(lck_mtx_lock_mark_destroyed) + LOAD_LMTX_REG(L_ARG0) +1: + mov M_OWNER(LMTX_REG), LMTX_A_REG + + cmp $(MUTEX_DESTROYED), LMTX_A_REG /* check to see if its marked destroyed */ + je 3f + cmp $(MUTEX_IND), LMTX_A_REG /* Is this an indirect mutex? */ + jne 2f + + movl $(MUTEX_DESTROYED), M_OWNER(LMTX_REG) /* convert to destroyed state */ + jmp 3f +2: + mov M_STATE(LMTX_REG), LMTX_C_REG32 + + test $(M_ILOCKED_MSK), LMTX_C_REG /* have to wait for interlock to clear */ + jne 5f + + PUSHF /* save interrupt state */ + mov LMTX_C_REG, LMTX_A_REG /* eax contains snapshot for cmpxchgl */ + or $(M_ILOCKED_MSK), LMTX_C_REG + CLI + lock + cmpxchg LMTX_C_REG32, M_STATE(LMTX_REG) /* atomic compare and exchange */ + jne 4f /* branch on failure to spin loop */ + movl $(MUTEX_DESTROYED), M_OWNER(LMTX_REG) /* convert to destroyed state */ + POPF /* restore interrupt state */ +3: + LEAF_RET /* return with M_ILOCKED set */ +4: + POPF /* restore interrupt state */ +5: + PAUSE + jmp 1b + + + LEAF_ENTRY(_disable_preemption) #if MACH_RT _DISABLE_PREEMPTION @@ -2080,12 +1992,17 @@ LEAF_ENTRY(_enable_preemption) #if MACH_ASSERT cmpl $0,%gs:CPU_PREEMPTION_LEVEL jg 1f +#if __i386__ pushl %gs:CPU_PREEMPTION_LEVEL - pushl $2f - call EXT(panic) +#else + movl %gs:CPU_PREEMPTION_LEVEL,%esi +#endif + LOAD_STRING_ARG0(_enable_preemption_less_than_zero) + CALL_PANIC() hlt - .data -2: String "_enable_preemption: preemption_level(%d) < 0!" + .cstring +_enable_preemption_less_than_zero: + .asciz "_enable_preemption: preemption_level(%d) < 0!" .text 1: #endif /* MACH_ASSERT */ @@ -2098,11 +2015,12 @@ LEAF_ENTRY(_enable_preemption_no_check) #if MACH_ASSERT cmpl $0,%gs:CPU_PREEMPTION_LEVEL jg 1f - pushl $2f - call EXT(panic) + LOAD_STRING_ARG0(_enable_preemption_no_check_less_than_zero) + CALL_PANIC() hlt - .data -2: String "_enable_preemption_no_check: preemption_level <= 0!" + .cstring +_enable_preemption_no_check_less_than_zero: + .asciz "_enable_preemption_no_check: preemption_level <= 0!" .text 1: #endif /* MACH_ASSERT */ @@ -2122,12 +2040,17 @@ LEAF_ENTRY(_mp_enable_preemption) #if MACH_ASSERT cmpl $0,%gs:CPU_PREEMPTION_LEVEL jg 1f +#if __i386__ pushl %gs:CPU_PREEMPTION_LEVEL - pushl $2f - call EXT(panic) +#else + movl %gs:CPU_PREEMPTION_LEVEL,%esi +#endif + LOAD_STRING_ARG0(_mp_enable_preemption_less_than_zero) + CALL_PANIC() hlt - .data -2: String "_mp_enable_preemption: preemption_level (%d) <= 0!" + .cstring +_mp_enable_preemption_less_than_zero: + .asciz "_mp_enable_preemption: preemption_level (%d) <= 0!" .text 1: #endif /* MACH_ASSERT */ @@ -2140,11 +2063,12 @@ LEAF_ENTRY(_mp_enable_preemption_no_check) #if MACH_ASSERT cmpl $0,%gs:CPU_PREEMPTION_LEVEL jg 1f - pushl $2f - call EXT(panic) + LOAD_STRING_ARG0(_mp_enable_preemption_no_check_less_than_zero) + CALL_PANIC() hlt - .data -2: String "_mp_enable_preemption_no_check: preemption_level <= 0!" + .cstring +_mp_enable_preemption_no_check_less_than_zero: + .asciz "_mp_enable_preemption_no_check: preemption_level <= 0!" .text 1: #endif /* MACH_ASSERT */ @@ -2152,6 +2076,7 @@ LEAF_ENTRY(_mp_enable_preemption_no_check) #endif /* MACH_RT */ LEAF_RET +#if __i386__ LEAF_ENTRY(i_bit_set) movl L_ARG0,%edx @@ -2261,5 +2186,103 @@ LEAF_ENTRY(hw_atomic_and_noret) movl L_ARG0, %ecx /* Load address of operand */ movl L_ARG1, %edx /* Load mask */ lock - andl %edx, (%ecx) /* Atomic OR */ + andl %edx, (%ecx) /* Atomic AND */ + LEAF_RET + +#else /* !__i386__ */ + +LEAF_ENTRY(i_bit_set) + lock + bts %edi,(%rsi) + LEAF_RET + +LEAF_ENTRY(i_bit_clear) + lock + btr %edi,(%rsi) + LEAF_RET + + +LEAF_ENTRY(bit_lock) +1: + lock + bts %edi,(%rsi) + jb 1b + LEAF_RET + + +LEAF_ENTRY(bit_lock_try) + lock + bts %edi,(%rsi) + jb bit_lock_failed + movl $1, %eax LEAF_RET +bit_lock_failed: + xorl %eax,%eax + LEAF_RET + +LEAF_ENTRY(bit_unlock) + lock + btr %edi,(%rsi) + LEAF_RET + + +/* + * Atomic primitives, prototyped in kern/simple_lock.h + */ +LEAF_ENTRY(hw_atomic_add) + movl %esi, %eax /* Load addend */ + lock + xaddl %eax, (%rdi) /* Atomic exchange and add */ + addl %esi, %eax /* Calculate result */ + LEAF_RET + +LEAF_ENTRY(hw_atomic_sub) + negl %esi + movl %esi, %eax + lock + xaddl %eax, (%rdi) /* Atomic exchange and add */ + addl %esi, %eax /* Calculate result */ + LEAF_RET + +LEAF_ENTRY(hw_atomic_or) + movl (%rdi), %eax +1: + movl %esi, %edx /* Load mask */ + orl %eax, %edx + lock + cmpxchgl %edx, (%rdi) /* Atomic CAS */ + jne 1b + movl %edx, %eax /* Result */ + LEAF_RET +/* + * A variant of hw_atomic_or which doesn't return a value. + * The implementation is thus comparatively more efficient. + */ + +LEAF_ENTRY(hw_atomic_or_noret) + lock + orl %esi, (%rdi) /* Atomic OR */ + LEAF_RET + + +LEAF_ENTRY(hw_atomic_and) + movl (%rdi), %eax +1: + movl %esi, %edx /* Load mask */ + andl %eax, %edx + lock + cmpxchgl %edx, (%rdi) /* Atomic CAS */ + jne 1b + movl %edx, %eax /* Result */ + LEAF_RET +/* + * A variant of hw_atomic_and which doesn't return a value. + * The implementation is thus comparatively more efficient. + */ + +LEAF_ENTRY(hw_atomic_and_noret) + lock + andl %esi, (%rdi) /* Atomic OR */ + LEAF_RET + +#endif /* !__i386 __ */ diff --git a/osfmk/i386/i386_lowmem.h b/osfmk/i386/i386_lowmem.h index ffbb00b57..1e571cd59 100644 --- a/osfmk/i386/i386_lowmem.h +++ b/osfmk/i386/i386_lowmem.h @@ -32,10 +32,23 @@ #ifdef __APPLE_API_PRIVATE -#define I386_LOWMEM_RESERVED 0x18 +/* The kernel is linked at VM_MIN_KERNEL_ADDRESS + 0x100000 */ +#define I386_KERNEL_IMAGE_BASE_PAGE 0x100 -#define I386_HIB_PAGETABLE 0x13 -#define I386_HIB_PAGETABLE_COUNT 5 +#if defined(__i386__) +#define I386_LOWMEM_RESERVED 0x18 + +#define I386_HIB_PAGETABLE 0x13 +#define I386_HIB_PAGETABLE_COUNT 5 + +#elif defined(__x86_64__) +/* For K64, only 3 pages are reserved + * - physical page zero, a gap page, and then real-mode-bootstrap/lowGlo. + * Note that the kernel virtual address 0xffffff8000002000 is re-mapped + * to the low globals and that physical page, 0x2000, is used by the bootstrap. + */ +#define I386_LOWMEM_RESERVED 3 +#endif #endif /* __APPLE_API_PRIVATE */ diff --git a/osfmk/i386/i386_vm_init.c b/osfmk/i386/i386_vm_init.c index 40086ffd1..f2815aae4 100644 --- a/osfmk/i386/i386_vm_init.c +++ b/osfmk/i386/i386_vm_init.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2003-2006 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2003-2008 Apple Computer, Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -74,13 +74,23 @@ #include #include #include -#include #include -#include +#include #include #include #include -#include "i386_lowmem.h" +#include +#include + +#include +#include + +#if DEBUG +#define DBG(x...) kprintf("DBG: " x) +#define PRINT_PMAP_MEMORY_TABLE +#else +#define DBG(x...) +#endif vm_size_t mem_size = 0; vm_offset_t first_avail = 0;/* first after page tables */ @@ -92,7 +102,7 @@ uint64_t sane_size = 0; /* Memory size to use for defaults calculations */ #define MAXBOUNCEPOOL (128 * 1024 * 1024) #define MAXLORESERVE ( 32 * 1024 * 1024) -extern int bsd_mbuf_cluster_reserve(void); +extern unsigned int bsd_mbuf_cluster_reserve(void); uint32_t bounce_pool_base = 0; @@ -106,46 +116,34 @@ vm_offset_t virtual_avail, virtual_end; static pmap_paddr_t avail_remaining; vm_offset_t static_memory_end = 0; -#include -vm_offset_t edata, etext, end; - -/* - * _mh_execute_header is the mach_header for the currently executing - * 32 bit kernel - */ -extern struct mach_header _mh_execute_header; -void *sectTEXTB; int sectSizeTEXT; -void *sectDATAB; int sectSizeDATA; -void *sectOBJCB; int sectSizeOBJC; -void *sectLINKB; int sectSizeLINK; -void *sectPRELINKB; int sectSizePRELINK; -void *sectHIBB; int sectSizeHIB; - -extern void *getsegdatafromheader(struct mach_header *, const char *, int *); -extern struct segment_command *getsegbyname(const char *); -extern struct section *firstsect(struct segment_command *); -extern struct section *nextsect(struct segment_command *, struct section *); +vm_offset_t sHIB, eHIB, stext, etext, sdata, edata, end; +boolean_t kernel_text_ps_4K = TRUE; +boolean_t wpkernel = TRUE; -void -i386_macho_zerofill(void) -{ - struct segment_command *sgp; - struct section *sp; - - sgp = getsegbyname("__DATA"); - if (sgp) { - sp = firstsect(sgp); - if (sp) { - do { - if ((sp->flags & S_ZEROFILL)) - bzero((char *) sp->addr, sp->size); - } while ((sp = nextsect(sgp, sp))); - } - } +extern void *KPTphys; - return; -} +/* + * _mh_execute_header is the mach_header for the currently executing kernel + */ +void *sectTEXTB; unsigned long sectSizeTEXT; +void *sectDATAB; unsigned long sectSizeDATA; +void *sectOBJCB; unsigned long sectSizeOBJC; +void *sectLINKB; unsigned long sectSizeLINK; +void *sectPRELINKB; unsigned long sectSizePRELINK; +void *sectHIBB; unsigned long sectSizeHIB; +void *sectINITPTB; unsigned long sectSizeINITPT; +extern int srv; + +extern uint64_t firmware_Conventional_bytes; +extern uint64_t firmware_RuntimeServices_bytes; +extern uint64_t firmware_ACPIReclaim_bytes; +extern uint64_t firmware_ACPINVS_bytes; +extern uint64_t firmware_PalCode_bytes; +extern uint64_t firmware_Reserved_bytes; +extern uint64_t firmware_Unusable_bytes; +extern uint64_t firmware_other_bytes; +uint64_t firmware_MMIO_bytes; /* * Basic VM initialization. @@ -184,12 +182,34 @@ i386_vm_init(uint64_t maxmem, &_mh_execute_header, "__LINKEDIT", §SizeLINK); sectHIBB = (void *)getsegdatafromheader( &_mh_execute_header, "__HIB", §SizeHIB); + sectINITPTB = (void *)getsegdatafromheader( + &_mh_execute_header, "__INITPT", §SizeINITPT); sectPRELINKB = (void *) getsegdatafromheader( - &_mh_execute_header, "__PRELINK", §SizePRELINK); + &_mh_execute_header, "__PRELINK_TEXT", §SizePRELINK); + sHIB = (vm_offset_t) sectHIBB; + eHIB = (vm_offset_t) sectHIBB + sectSizeHIB; + /* Zero-padded from ehib to stext if text is 2M-aligned */ + stext = (vm_offset_t) sectTEXTB; etext = (vm_offset_t) sectTEXTB + sectSizeTEXT; + /* Zero-padded from etext to sdata if text is 2M-aligned */ + sdata = (vm_offset_t) sectDATAB; edata = (vm_offset_t) sectDATAB + sectSizeDATA; +#if DEBUG + kprintf("sectTEXTB = %p\n", sectTEXTB); + kprintf("sectDATAB = %p\n", sectDATAB); + kprintf("sectOBJCB = %p\n", sectOBJCB); + kprintf("sectLINKB = %p\n", sectLINKB); + kprintf("sectHIBB = %p\n", sectHIBB); + kprintf("sectPRELINKB = %p\n", sectPRELINKB); + kprintf("eHIB = %p\n", (void *) eHIB); + kprintf("stext = %p\n", (void *) stext); + kprintf("etext = %p\n", (void *) etext); + kprintf("sdata = %p\n", (void *) sdata); + kprintf("edata = %p\n", (void *) edata); +#endif + vm_set_page_size(); /* @@ -206,7 +226,7 @@ i386_vm_init(uint64_t maxmem, pmap_memory_region_count = pmap_memory_region_current = 0; fap = (ppnum_t) i386_btop(first_avail); - mptr = (EfiMemoryRange *)args->MemoryMap; + mptr = (EfiMemoryRange *)ml_static_ptovirt((vm_offset_t)args->MemoryMap); if (args->MemoryMapDescriptorSize == 0) panic("Invalid memory map descriptor size"); msize = args->MemoryMapDescriptorSize; @@ -216,13 +236,16 @@ i386_vm_init(uint64_t maxmem, for (i = 0; i < mcount; i++, mptr = (EfiMemoryRange *)(((vm_offset_t)mptr) + msize)) { ppnum_t base, top; + uint64_t region_bytes = 0; if (pmap_memory_region_count >= PMAP_MEMORY_REGIONS_SIZE) { kprintf("WARNING: truncating memory region count at %d\n", pmap_memory_region_count); break; } base = (ppnum_t) (mptr->PhysicalStart >> I386_PGSHIFT); - top = (ppnum_t) ((mptr->PhysicalStart) >> I386_PGSHIFT) + mptr->NumberOfPages - 1; + top = (ppnum_t) (((mptr->PhysicalStart) >> I386_PGSHIFT) + mptr->NumberOfPages - 1); + region_bytes = (uint64_t)(mptr->NumberOfPages << I386_PGSHIFT); + pmap_type = mptr->Type; switch (mptr->Type) { case kEfiLoaderCode: @@ -234,31 +257,60 @@ i386_vm_init(uint64_t maxmem, * Consolidate usable memory types into one. */ pmap_type = kEfiConventionalMemory; - sane_size += (uint64_t)(mptr->NumberOfPages << I386_PGSHIFT); + sane_size += region_bytes; + firmware_Conventional_bytes += region_bytes; break; + /* + * sane_size should reflect the total amount of physical + * RAM in the system, not just the amount that is + * available for the OS to use. + * FIXME:Consider deriving this value from SMBIOS tables + * rather than reverse engineering the memory map. + * Alternatively, see + * Memory map should + * describe all memory + * Firmware on some systems guarantees that the memory + * map is complete via the "RomReservedMemoryTracked" + * feature field--consult that where possible to + * avoid the "round up to 128M" workaround below. + */ case kEfiRuntimeServicesCode: case kEfiRuntimeServicesData: + firmware_RuntimeServices_bytes += region_bytes; + sane_size += region_bytes; + break; case kEfiACPIReclaimMemory: + firmware_ACPIReclaim_bytes += region_bytes; + sane_size += region_bytes; + break; case kEfiACPIMemoryNVS: + firmware_ACPINVS_bytes += region_bytes; + sane_size += region_bytes; + break; case kEfiPalCode: - /* - * sane_size should reflect the total amount of physical ram - * in the system, not just the amount that is available for - * the OS to use - */ - sane_size += (uint64_t)(mptr->NumberOfPages << I386_PGSHIFT); - /* fall thru */ + firmware_PalCode_bytes += region_bytes; + sane_size += region_bytes; + break; + + case kEfiReservedMemoryType: + firmware_Reserved_bytes += region_bytes; + break; case kEfiUnusableMemory: + firmware_Unusable_bytes += region_bytes; + break; case kEfiMemoryMappedIO: case kEfiMemoryMappedIOPortSpace: - case kEfiReservedMemoryType: + firmware_MMIO_bytes += region_bytes; + break; default: - pmap_type = mptr->Type; + firmware_other_bytes += region_bytes; + break; } - kprintf("EFI region: type = %u/%d, base = 0x%x, top = 0x%x\n", mptr->Type, pmap_type, base, top); + kprintf("EFI region %d: type %u/%d, base 0x%x, top 0x%x\n", + i, mptr->Type, pmap_type, base, top); if (maxpg) { if (base >= maxpg) @@ -347,69 +399,65 @@ i386_vm_init(uint64_t maxmem, } } - #ifdef PRINT_PMAP_MEMORY_TABLE { unsigned int j; pmap_memory_region_t *p = pmap_memory_regions; - vm_offset_t region_start, region_end; - vm_offset_t efi_start, efi_end; + addr64_t region_start, region_end; + addr64_t efi_start, efi_end; for (j=0;jtype, - p->base << I386_PGSHIFT, p->alloc << I386_PGSHIFT, p->end << I386_PGSHIFT); - region_start = p->base << I386_PGSHIFT; - region_end = (p->end << I386_PGSHIFT) - 1; - mptr = args->MemoryMap; + kprintf("pmap region %d type %d base 0x%llx alloc 0x%llx top 0x%llx\n", + j, p->type, + (addr64_t) p->base << I386_PGSHIFT, + (addr64_t) p->alloc << I386_PGSHIFT, + (addr64_t) p->end << I386_PGSHIFT); + region_start = (addr64_t) p->base << I386_PGSHIFT; + region_end = ((addr64_t) p->end << I386_PGSHIFT) - 1; + mptr = (EfiMemoryRange *) ml_static_ptovirt((vm_offset_t)args->MemoryMap); for (i=0; iType != kEfiLoaderCode && mptr->Type != kEfiLoaderData && mptr->Type != kEfiBootServicesCode && mptr->Type != kEfiBootServicesData && mptr->Type != kEfiConventionalMemory) { - efi_start = (vm_offset_t)mptr->PhysicalStart; + efi_start = (addr64_t)mptr->PhysicalStart; efi_end = efi_start + ((vm_offset_t)mptr->NumberOfPages << I386_PGSHIFT) - 1; if ((efi_start >= region_start && efi_start <= region_end) || (efi_end >= region_start && efi_end <= region_end)) { kprintf(" *** Overlapping region with EFI runtime region %d\n", i); } - } - + } } - } + } } #endif avail_start = first_avail; mem_actual = sane_size; -#define MEG (1024*1024ULL) -#define GIG (1024*MEG) - /* * For user visible memory size, round up to 128 Mb - accounting for the various stolen memory * not reported by EFI. */ - sane_size = (sane_size + 128 * MEG - 1) & ~((uint64_t)(128 * MEG - 1)); + sane_size = (sane_size + 128 * MB - 1) & ~((uint64_t)(128 * MB - 1)); -#if defined(__i386__) -#define K32_MAXMEM (32*GIG) /* - * For K32 we cap at K32_MAXMEM GB (currently 32GB). + * We cap at KERNEL_MAXMEM bytes (currently 32GB for K32, 64GB for K64). * Unless overriden by the maxmem= boot-arg * -- which is a non-zero maxmem argument to this function. */ - if (maxmem == 0 && sane_size > K32_MAXMEM) { - maxmem = K32_MAXMEM; - printf("Physical memory %lld bytes capped at %dGB for 32-bit kernel\n", - sane_size, (uint32_t) (K32_MAXMEM/GIG)); + if (maxmem == 0 && sane_size > KERNEL_MAXMEM) { + maxmem = KERNEL_MAXMEM; + printf("Physical memory %lld bytes capped at %dGB\n", + sane_size, (uint32_t) (KERNEL_MAXMEM/GB)); } -#endif + /* * if user set maxmem, reduce memory sizes */ if ( (maxmem > (uint64_t)first_avail) && (maxmem < sane_size)) { - ppnum_t discarded_pages = (sane_size - maxmem) >> I386_PGSHIFT; + ppnum_t discarded_pages = (ppnum_t)((sane_size - maxmem) >> I386_PGSHIFT); ppnum_t highest_pn = 0; ppnum_t cur_alloc = 0; uint64_t pages_to_use; @@ -452,18 +500,24 @@ i386_vm_init(uint64_t maxmem, mem_size = (vm_size_t)sane_size; max_mem = sane_size; - kprintf("Physical memory %llu MB\n", sane_size/MEG); + kprintf("Physical memory %llu MB\n", sane_size/MB); if (!PE_parse_boot_argn("max_valid_dma_addr", &maxdmaaddr, sizeof (maxdmaaddr))) - max_valid_dma_address = 1024ULL * 1024ULL * 4096ULL; + max_valid_dma_address = 4 * GB; else - max_valid_dma_address = ((uint64_t) maxdmaaddr) * 1024ULL * 1024ULL; + max_valid_dma_address = ((uint64_t) maxdmaaddr) * MB; if (!PE_parse_boot_argn("maxbouncepool", &maxbouncepoolsize, sizeof (maxbouncepoolsize))) maxbouncepoolsize = MAXBOUNCEPOOL; else maxbouncepoolsize = maxbouncepoolsize * (1024 * 1024); + /* since bsd_mbuf_cluster_reserve() is going to be called, we need to check for server */ + if (PE_parse_boot_argn("srv", &srv, sizeof (srv))) { + srv = 1; + } + + /* * bsd_mbuf_cluster_reserve depends on sane_size being set * in order to correctly determine the size of the mbuf pool @@ -494,19 +548,36 @@ i386_vm_init(uint64_t maxmem, unsigned int pmap_free_pages(void) { - return avail_remaining; + return (unsigned int)avail_remaining; } +#if defined(__LP64__) +/* On large memory systems, early allocations should prefer memory from the + * last region, which is typically all physical memory >4GB. This is used + * by pmap_steal_memory and pmap_pre_expand during init only. */ +boolean_t +pmap_next_page_k64( ppnum_t *pn) +{ + if(max_mem >= (32*GB)) { + pmap_memory_region_t *last_region = &pmap_memory_regions[pmap_memory_region_count-1]; + if (last_region->alloc != last_region->end) { + *pn = last_region->alloc++; + avail_remaining--; + return TRUE; + } + } + return pmap_next_page(pn); +} +#endif boolean_t pmap_next_page( ppnum_t *pn) { - if (avail_remaining) while (pmap_memory_region_current < pmap_memory_region_count) { - if (pmap_memory_regions[pmap_memory_region_current].alloc == - pmap_memory_regions[pmap_memory_region_current].end) { - pmap_memory_region_current++; + if (pmap_memory_regions[pmap_memory_region_current].alloc == + pmap_memory_regions[pmap_memory_region_current].end) { + pmap_memory_region_current++; continue; } *pn = pmap_memory_regions[pmap_memory_region_current].alloc++; @@ -525,7 +596,6 @@ pmap_valid_page( unsigned int i; pmap_memory_region_t *pmptr = pmap_memory_regions; - assert(pn); for (i = 0; i < pmap_memory_region_count; i++, pmptr++) { if ( (pn >= pmptr->base) && (pn <= pmptr->end) ) return TRUE; @@ -558,3 +628,205 @@ reserve_bouncepool(uint32_t bounce_pool_wanted) avail_remaining -= pages_needed; } } + +/* + * Called once VM is fully initialized so that we can release unused + * sections of low memory to the general pool. + * Also complete the set-up of identity-mapped sections of the kernel: + * 1) write-protect kernel text + * 2) map kernel text using large pages if possible + * 3) read and write-protect page zero (for K32) + * 4) map the global page at the appropriate virtual address. + * + * Use of large pages + * ------------------ + * To effectively map and write-protect all kernel text pages, the text + * must be 2M-aligned at the base, and the data section above must also be + * 2M-aligned. That is, there's padding below and above. This is achieved + * through linker directives. Large pages are used only if this alignment + * exists (and not overriden by the -kernel_text_page_4K boot-arg). The + * memory layout is: + * + * : : + * | __DATA | + * sdata: ================== 2Meg + * | | + * | zero-padding | + * | | + * etext: ------------------ + * | | + * : : + * | | + * | __TEXT | + * | | + * : : + * | | + * stext: ================== 2Meg + * | | + * | zero-padding | + * | | + * eHIB: ------------------ + * | __HIB | + * : : + * + * Prior to changing the mapping from 4K to 2M, the zero-padding pages + * [eHIB,stext] and [etext,sdata] are ml_static_mfree()'d. Then all the + * 4K pages covering [stext,etext] are coalesced as 2M large pages. + * The now unused level-1 PTE pages are also freed. + */ +void +pmap_lowmem_finalize(void) +{ + spl_t spl; + int i; + + /* Check the kernel is linked at the expected base address */ + if (i386_btop(kvtophys((vm_offset_t) &IdlePML4)) != + I386_KERNEL_IMAGE_BASE_PAGE) + panic("pmap_lowmem_finalize() unexpected kernel base address"); + + /* + * Free all pages in pmap regions below the base: + * rdar://6332712 + * We can't free all the pages to VM that EFI reports available. + * Pages in the range 0xc0000-0xff000 aren't safe over sleep/wake. + * There's also a size miscalculation here: pend is one page less + * than it should be but this is not fixed to be backwards + * compatible. + * Due to this current EFI limitation, we take only the first + * entry in the memory region table. However, the loop is retained + * (with the intended termination criteria commented out) in the + * hope that some day we can free all low-memory ranges. + */ + for (i = 0; +// pmap_memory_regions[i].end <= I386_KERNEL_IMAGE_BASE_PAGE; + i < 1; + i++) { + vm_offset_t pbase = (vm_offset_t)i386_ptob(pmap_memory_regions[i].base); + vm_offset_t pend = (vm_offset_t)i386_ptob(pmap_memory_regions[i].end); +// vm_offset_t pend = i386_ptob(pmap_memory_regions[i].end+1); + + DBG("ml_static_mfree(%p,%p) for pmap region %d\n", + (void *) ml_static_ptovirt(pbase), + (void *) (pend - pbase), i); + ml_static_mfree(ml_static_ptovirt(pbase), pend - pbase); + } + + /* + * If text and data are both 2MB-aligned, + * we can map text with large-pages, + * unless the -kernel_text_ps_4K boot-arg overrides. + */ + if ((stext & I386_LPGMASK) == 0 && (sdata & I386_LPGMASK) == 0) { + kprintf("Kernel text is 2MB aligned"); + kernel_text_ps_4K = FALSE; + if (PE_parse_boot_argn("-kernel_text_ps_4K", + &kernel_text_ps_4K, + sizeof (kernel_text_ps_4K))) + kprintf(" but will be mapped with 4K pages\n"); + else + kprintf(" and will be mapped with 2M pages\n"); + } + + (void) PE_parse_boot_argn("wpkernel", &wpkernel, sizeof (wpkernel)); + if (wpkernel) + kprintf("Kernel text %p-%p to be write-protected\n", + (void *) stext, (void *) etext); + + spl = splhigh(); + + /* + * Scan over text if mappings are to be changed: + * - Remap kernel text readonly unless the "wpkernel" boot-arg is 0 + * - Change to large-pages if possible and not overriden. + */ + if (kernel_text_ps_4K && wpkernel) { + vm_offset_t myva; + for (myva = stext; myva < etext; myva += PAGE_SIZE) { + pt_entry_t *ptep; + + ptep = pmap_pte(kernel_pmap, (vm_map_offset_t)myva); + if (ptep) + pmap_store_pte(ptep, *ptep & ~INTEL_PTE_RW); + } + } + + if (!kernel_text_ps_4K) { + vm_offset_t myva; + + /* + * Release zero-filled page padding used for 2M-alignment. + */ + DBG("ml_static_mfree(%p,%p) for padding below text\n", + (void *) eHIB, (void *) (stext - eHIB)); + ml_static_mfree(eHIB, stext - eHIB); + DBG("ml_static_mfree(%p,%p) for padding above text\n", + (void *) etext, (void *) (sdata - etext)); + ml_static_mfree(etext, sdata - etext); + + /* + * Coalesce text pages into large pages. + */ + for (myva = stext; myva < sdata; myva += I386_LPGBYTES) { + pt_entry_t *ptep; + vm_offset_t pte_phys; + pt_entry_t *pdep; + pt_entry_t pde; + + pdep = pmap_pde(kernel_pmap, (vm_map_offset_t)myva); + ptep = pmap_pte(kernel_pmap, (vm_map_offset_t)myva); + DBG("myva: %p pdep: %p ptep: %p\n", + (void *) myva, (void *) pdep, (void *) ptep); + if ((*ptep & INTEL_PTE_VALID) == 0) + continue; + pte_phys = (vm_offset_t)(*ptep & PG_FRAME); + pde = *pdep & PTMASK; /* page attributes from pde */ + pde |= INTEL_PTE_PS; /* make it a 2M entry */ + pde |= pte_phys; /* take page frame from pte */ + + if (wpkernel) + pde &= ~INTEL_PTE_RW; + DBG("pmap_store_pte(%p,0x%llx)\n", + (void *)pdep, pde); + pmap_store_pte(pdep, pde); + + /* + * Free the now-unused level-1 pte. + * Note: ptep is a virtual address to the pte in the + * recursive map. We can't use this address to free + * the page. Instead we need to compute its address + * in the Idle PTEs in "low memory". + */ + vm_offset_t vm_ptep = (vm_offset_t) KPTphys + + (pte_phys >> PTPGSHIFT); + DBG("ml_static_mfree(%p,0x%x) for pte\n", + (void *) vm_ptep, PAGE_SIZE); + ml_static_mfree(vm_ptep, PAGE_SIZE); + } + + /* Change variable read by sysctl machdep.pmap */ + pmap_kernel_text_ps = I386_LPGBYTES; + } + +#if defined(__i386__) + /* no matter what, kernel page zero is not accessible */ + pmap_store_pte(pmap_pte(kernel_pmap, 0), INTEL_PTE_INVALID); +#endif + + /* map lowmem global page into fixed addr */ + pt_entry_t *pte = NULL; + if (0 == (pte = pmap_pte(kernel_pmap, + VM_MIN_KERNEL_LOADED_ADDRESS + 0x2000))) + panic("lowmem pte"); + /* make sure it is defined on page boundary */ + assert(0 == ((vm_offset_t) &lowGlo & PAGE_MASK)); + pmap_store_pte(pte, kvtophys((vm_offset_t)&lowGlo) + | INTEL_PTE_REF + | INTEL_PTE_MOD + | INTEL_PTE_WIRED + | INTEL_PTE_VALID + | INTEL_PTE_RW); + splx(spl); + flush_tlb(); +} + diff --git a/osfmk/i386/idle_pt.c b/osfmk/i386/idle_pt.c new file mode 100644 index 000000000..ebbfc556d --- /dev/null +++ b/osfmk/i386/idle_pt.c @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2008 Apple Inc. All rights reserved. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. The rights granted to you under the License + * may not be used to create, or enable the creation or redistribution of, + * unlawful or unlicensed copies of an Apple operating system, or to + * circumvent, violate, or enable the circumvention or violation of, any + * terms of an Apple operating system software license agreement. + * + * Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ + */ +#include + +pml4_entry_t IdlePML4[PTE_PER_PAGE] __attribute__((section("__INITPT, __data"))) = { +#ifdef __x86_64__ + [ 0] = ((uint64_t)(INITPT_SEG_BASE + PAGE_SIZE) | INTEL_PTE_VALID | INTEL_PTE_WRITE), + [KERNEL_PML4_INDEX] = ((uint64_t)(INITPT_SEG_BASE + PAGE_SIZE) | INTEL_PTE_VALID | INTEL_PTE_WRITE), +#endif + }; + +#if defined(__x86_64__) +#define PDPT_PROT (INTEL_PTE_VALID | INTEL_PTE_WRITE) +#elif defined(__i386__) +#define PDPT_PROT (INTEL_PTE_VALID) +#endif +pdpt_entry_t IdlePDPT[PTE_PER_PAGE] __attribute__((section("__INITPT, __data"))) = { + [0] = ((uint64_t)(INITPT_SEG_BASE + 2*PAGE_SIZE) | PDPT_PROT), + [1] = ((uint64_t)(INITPT_SEG_BASE + 3*PAGE_SIZE) | PDPT_PROT), + [2] = ((uint64_t)(INITPT_SEG_BASE + 4*PAGE_SIZE) | PDPT_PROT), + [3] = ((uint64_t)(INITPT_SEG_BASE + 5*PAGE_SIZE) | PDPT_PROT), + }; + +#if NPGPTD != 4 +#error Please update idle_pt.c to reflect the new value of NPGPTD +#endif + +#define ID_MAP_2MEG(x) [(x)] = ((((uint64_t)(x)) << 21) | (INTEL_PTE_PS | INTEL_PTE_VALID | INTEL_PTE_WRITE)), + +#define L0(x,n) x(n) +#define L1(x,n) L0(x,n-1) L0(x,n) +#define L2(x,n) L1(x,n-2) L1(x,n) +#define L3(x,n) L2(x,n-4) L2(x,n) +#define L4(x,n) L3(x,n-8) L3(x,n) +#define L5(x,n) L4(x,n-16) L4(x,n) +#define L6(x,n) L5(x,n-32) L5(x,n) +#define L7(x,n) L6(x,n-64) L6(x,n) +#define L8(x,n) L7(x,n-128) L7(x,n) +#define L9(x,n) L8(x,n-256) L8(x,n) +#define L10(x,n) L9(x,n-512) L9(x,n) +#define L11(x,n) L10(x,n-1024) L10(x,n) + +#define FOR_0_TO_2047(x) L11(x,2047) + +pd_entry_t BootstrapPTD[2048] __attribute__((section("__INITPT, __data"))) = { + FOR_0_TO_2047(ID_MAP_2MEG) +}; diff --git a/osfmk/i386/idt.s b/osfmk/i386/idt.s index bc45d5d4a..17e6331ea 100644 --- a/osfmk/i386/idt.s +++ b/osfmk/i386/idt.s @@ -91,8 +91,8 @@ .byte type ;\ .text -#define IDT_ENTRY(vec,type) IDT_BASE_ENTRY(vec,KERNEL_CS,type) -#define IDT_ENTRY_INT(vec,type) IDT_BASE_ENTRY_INT(vec,KERNEL_CS,type) +#define IDT_ENTRY(vec,type) IDT_BASE_ENTRY(vec,KERNEL32_CS,type) +#define IDT_ENTRY_INT(vec,type) IDT_BASE_ENTRY_INT(vec,KERNEL32_CS,type) /* * No error code. Clear error code and push trap number. @@ -179,6 +179,7 @@ L_ ## n: ;\ Entry(master_idt) Entry(hi_remap_data) .text + .align 12 Entry(hi_remap_text) EXCEPTION(0x00,t_zero_div) @@ -620,7 +621,7 @@ hi_sysenter_2: pushl %eax /* err/eax - syscall code */ pushl $0 /* clear trap number slot */ pusha /* save the general registers */ - orl $(EFL_IF),R_EFLAGS-R_EDI(%esp) /* (edi was last reg pushed) */ + orl $(EFL_IF),R32_EFLAGS-R32_EDI(%esp) /* (edi was last reg pushed) */ movl $ EXT(lo_sysenter),%ebx enter_lohandler: pushl %ds @@ -646,7 +647,7 @@ enter_lohandler1: movl %ecx,%cr3 movl %ecx,%gs:CPU_ACTIVE_CR3 1: - testb $3,R_CS(%esp) + testb $3,R32_CS(%esp) jz 2f movl %esp,%edx /* came from user mode */ subl %gs:CPU_HI_ISS,%edx @@ -658,7 +659,7 @@ enter_lohandler1: movl $0, %ecx /* If so, reset DR7 (the control) */ movl %ecx, %dr7 2: - movl R_TRAPNO(%esp),%ecx // Get the interrupt vector + movl R32_TRAPNO(%esp),%ecx // Get the interrupt vector addl $1,%gs:hwIntCnt(,%ecx,4) // Bump the count jmp *%ebx @@ -670,7 +671,7 @@ Entry(hi_page_fault) pushl $(T_PAGE_FAULT) /* mark a page fault trap */ pusha /* save the general registers */ movl %cr2,%eax /* get the faulting address */ - movl %eax,R_CR2-R_EDI(%esp) /* save in esp save slot */ + movl %eax,R32_CR2-R32_EDI(%esp)/* save in esp save slot */ movl $ EXT(lo_alltraps),%ebx jmp enter_lohandler @@ -834,8 +835,8 @@ push_gs: pushl %gs /* restore gs. */ push_none: pushl $(SS_32) /* 32-bit state flavor */ - movl %eax,R_TRAPNO(%esp) /* set trap number */ - movl %edx,R_ERR(%esp) /* set error code */ + movl %eax,R32_TRAPNO(%esp) /* set trap number */ + movl %edx,R32_ERR(%esp) /* set error code */ /* now treat as fault from user */ /* except that segment registers are */ /* already pushed */ diff --git a/osfmk/i386/idt64.s b/osfmk/i386/idt64.s index 64df3f215..8efe5ed44 100644 --- a/osfmk/i386/idt64.s +++ b/osfmk/i386/idt64.s @@ -573,27 +573,27 @@ L_32bit_return: /* * Restore registers into the machine state for iret. */ - movl R_EIP(%rsp), %eax + movl R32_EIP(%rsp), %eax movl %eax, ISC32_RIP(%rsp) - movl R_EFLAGS(%rsp), %eax + movl R32_EFLAGS(%rsp), %eax movl %eax, ISC32_RFLAGS(%rsp) - movl R_CS(%rsp), %eax + movl R32_CS(%rsp), %eax movl %eax, ISC32_CS(%rsp) - movl R_UESP(%rsp), %eax + movl R32_UESP(%rsp), %eax movl %eax, ISC32_RSP(%rsp) - movl R_SS(%rsp), %eax + movl R32_SS(%rsp), %eax movl %eax, ISC32_SS(%rsp) /* * Restore general 32-bit registers */ - movl R_EAX(%rsp), %eax - movl R_EBX(%rsp), %ebx - movl R_ECX(%rsp), %ecx - movl R_EDX(%rsp), %edx - movl R_EBP(%rsp), %ebp - movl R_ESI(%rsp), %esi - movl R_EDI(%rsp), %edi + movl R32_EAX(%rsp), %eax + movl R32_EBX(%rsp), %ebx + movl R32_ECX(%rsp), %ecx + movl R32_EDX(%rsp), %edx + movl R32_EBP(%rsp), %ebp + movl R32_ESI(%rsp), %esi + movl R32_EDI(%rsp), %edi /* * Restore segment registers. We make take an exception here but @@ -602,13 +602,13 @@ L_32bit_return: */ swapgs EXT(ret32_set_ds): - movw R_DS(%rsp), %ds + movw R32_DS(%rsp), %ds EXT(ret32_set_es): - movw R_ES(%rsp), %es + movw R32_ES(%rsp), %es EXT(ret32_set_fs): - movw R_FS(%rsp), %fs + movw R32_FS(%rsp), %fs EXT(ret32_set_gs): - movw R_GS(%rsp), %gs + movw R32_GS(%rsp), %gs add $(ISC32_OFFSET)+8+8, %rsp /* pop compat frame + trapno/trapfn and error */ @@ -752,7 +752,7 @@ L_syscall_continue: mov %gs:CPU_UBER_TMP, %rcx mov %rcx, ISF64_RSP(%rsp) /* user stack */ mov %rax, ISF64_ERR(%rsp) /* err/rax - syscall code */ - movl $(0), ISF64_TRAPNO(%rsp) /* trapno */ + movl $(T_SYSCALL), ISF64_TRAPNO(%rsp) /* trapno */ movl $(LO_SYSCALL), ISF64_TRAPFN(%rsp) jmp L_64bit_enter /* this can only be a 64-bit task */ @@ -794,7 +794,7 @@ Entry(hi64_sysenter) L_sysenter_continue: push %rdx /* eip */ push %rax /* err/eax - syscall code */ - push $(0) + push $(T_SYSENTER) orl $(EFL_IF), ISF64_RFLAGS(%rsp) movl $(LO_MACH_SCALL), ISF64_TRAPFN(%rsp) testl %eax, %eax @@ -839,45 +839,49 @@ L_32bit_enter: /* * Save segment regs */ - mov %ds, R_DS(%rsp) - mov %es, R_ES(%rsp) - mov %fs, R_FS(%rsp) - mov %gs, R_GS(%rsp) + mov %ds, R32_DS(%rsp) + mov %es, R32_ES(%rsp) + mov %fs, R32_FS(%rsp) + mov %gs, R32_GS(%rsp) /* * Save general 32-bit registers */ - mov %eax, R_EAX(%rsp) - mov %ebx, R_EBX(%rsp) - mov %ecx, R_ECX(%rsp) - mov %edx, R_EDX(%rsp) - mov %ebp, R_EBP(%rsp) - mov %esi, R_ESI(%rsp) - mov %edi, R_EDI(%rsp) + mov %eax, R32_EAX(%rsp) + mov %ebx, R32_EBX(%rsp) + mov %ecx, R32_ECX(%rsp) + mov %edx, R32_EDX(%rsp) + mov %ebp, R32_EBP(%rsp) + mov %esi, R32_ESI(%rsp) + mov %edi, R32_EDI(%rsp) /* Unconditionally save cr2; only meaningful on page faults */ mov %cr2, %rax - mov %eax, R_CR2(%rsp) + mov %eax, R32_CR2(%rsp) /* * Copy registers already saved in the machine state * (in the interrupt stack frame) into the compat save area. */ mov ISC32_RIP(%rsp), %eax - mov %eax, R_EIP(%rsp) + mov %eax, R32_EIP(%rsp) mov ISC32_RFLAGS(%rsp), %eax - mov %eax, R_EFLAGS(%rsp) + mov %eax, R32_EFLAGS(%rsp) mov ISC32_CS(%rsp), %eax - mov %eax, R_CS(%rsp) + mov %eax, R32_CS(%rsp) + testb $3, %al + jz 1f + xor %ebp, %ebp +1: mov ISC32_RSP(%rsp), %eax - mov %eax, R_UESP(%rsp) + mov %eax, R32_UESP(%rsp) mov ISC32_SS(%rsp), %eax - mov %eax, R_SS(%rsp) + mov %eax, R32_SS(%rsp) L_32bit_enter_after_fault: mov ISC32_TRAPNO(%rsp), %ebx /* %ebx := trapno for later */ - mov %ebx, R_TRAPNO(%rsp) + mov %ebx, R32_TRAPNO(%rsp) mov ISC32_ERR(%rsp), %eax - mov %eax, R_ERR(%rsp) + mov %eax, R32_ERR(%rsp) mov ISC32_TRAPFN(%rsp), %edx /* @@ -934,7 +938,6 @@ L_enter_lohandler2: movl %ecx, %dr7 1: addl $1,%gs:hwIntCnt(,%ebx,4) // Bump the trap/intr count - /* Dispatch the designated lo handler */ jmp *%edx @@ -991,6 +994,10 @@ L_64bit_enter_after_fault: mov R64_TRAPNO(%rsp), %ebx mov R64_TRAPFN(%rsp), %edx + testb $3, ISF64_CS+ISS64_OFFSET(%rsp) + jz 1f + xor %rbp, %rbp +1: jmp L_enter_lohandler2 Entry(hi64_page_fault) diff --git a/osfmk/i386/ktss.c b/osfmk/i386/ktss.c index f0b69591c..973fc395a 100644 --- a/osfmk/i386/ktss.c +++ b/osfmk/i386/ktss.c @@ -63,9 +63,10 @@ * only to hold the kernel stack pointer for the current thread. */ #include -#include +#include #include +#ifdef __i386__ struct i386_tss master_ktss __attribute__ ((section ("__DESC, master_ktss"))) __attribute__ ((aligned (4096))) = { @@ -99,6 +100,7 @@ struct i386_tss master_ktss beyond end of TSS segment, so no bitmap */ }; +#endif /* * The transient stack for sysenter. @@ -116,6 +118,7 @@ struct x86_64_tss master_ktss64 __attribute__ ((aligned (4096))) = { }; #endif /* X86_64 */ +#ifdef __i386__ /* * Task structure for double-fault handler: */ @@ -129,7 +132,7 @@ struct i386_tss master_dftss 0, /* ss1 */ 0, /* esp2 */ 0, /* ss2 */ - 0, /* cr3 */ + (int) IdlePDPT, /* cr3 */ (int) &df_task_start, /* eip */ 0, /* eflags */ 0, /* eax */ @@ -141,7 +144,7 @@ struct i386_tss master_dftss 0, /* esi */ 0, /* edi */ KERNEL_DS, /* es */ - KERNEL_CS, /* cs */ + KERNEL32_CS, /* cs */ KERNEL_DS, /* ss */ KERNEL_DS, /* ds */ KERNEL_DS, /* fs */ @@ -167,7 +170,7 @@ struct i386_tss master_mctss 0, /* ss1 */ 0, /* esp2 */ 0, /* ss2 */ - 0, /* cr3 */ + (int) IdlePDPT, /* cr3 */ (int) &mc_task_start, /* eip */ 0, /* eflags */ 0, /* eax */ @@ -179,7 +182,7 @@ struct i386_tss master_mctss 0, /* esi */ 0, /* edi */ KERNEL_DS, /* es */ - KERNEL_CS, /* cs */ + KERNEL32_CS, /* cs */ KERNEL_DS, /* ss */ KERNEL_DS, /* ds */ KERNEL_DS, /* fs */ @@ -203,7 +206,7 @@ struct i386_tss master_dbtss 0, /* ss1 */ 0, /* esp2 */ 0, /* ss2 */ - 0, /* cr3 */ + (int) IdlePDPT, /* cr3 */ 0, /* eip */ 0, /* eflags */ 0, /* eax */ @@ -215,7 +218,7 @@ struct i386_tss master_dbtss 0, /* esi */ 0, /* edi */ KERNEL_DS, /* es */ - KERNEL_CS, /* cs */ + KERNEL32_CS, /* cs */ KERNEL_DS, /* ss */ KERNEL_DS, /* ds */ KERNEL_DS, /* fs */ @@ -228,3 +231,4 @@ struct i386_tss master_dbtss }; #endif /* MACH_KDB */ +#endif diff --git a/osfmk/i386/lapic.c b/osfmk/i386/lapic.c index ef37b72fa..0206d0986 100644 --- a/osfmk/i386/lapic.c +++ b/osfmk/i386/lapic.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2008 Apple Inc. All rights reserved. + * Copyright (c) 2008-2009 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -50,9 +50,14 @@ #include #include #include -#include #include +#if CONFIG_MCA #include +#endif + +#if CONFIG_COUNTERS +#include +#endif #if MACH_KDB #include @@ -129,6 +134,18 @@ ml_get_apicid(uint32_t cpu) } +uint32_t +ml_get_cpuid(uint32_t lapic_index) +{ + if(lapic_index >= (uint32_t)MAX_CPUS) + return 0xFFFFFFFF; /* Return -1 if cpu too big */ + + /* Return the cpu ID (or -1 if not configured) */ + return (uint32_t)lapic_to_cpu[lapic_index]; + +} + + #ifdef MP_DEBUG static void lapic_cpu_map_dump(void) @@ -166,14 +183,14 @@ lapic_init(void) is_boot_processor = (lo & MSR_IA32_APIC_BASE_BSP) != 0; is_lapic_enabled = (lo & MSR_IA32_APIC_BASE_ENABLE) != 0; lapic_base = (lo & MSR_IA32_APIC_BASE_BASE); - kprintf("MSR_IA32_APIC_BASE 0x%x %s %s\n", lapic_base, + kprintf("MSR_IA32_APIC_BASE %p %s %s\n", (void *) lapic_base, is_lapic_enabled ? "enabled" : "disabled", is_boot_processor ? "BSP" : "AP"); if (!is_boot_processor || !is_lapic_enabled) panic("Unexpected local APIC state\n"); /* Establish a map to the local apic */ - lapic_start = vm_map_min(kernel_map); + lapic_start = (vm_offset_t)vm_map_min(kernel_map); result = vm_map_find_space(kernel_map, (vm_map_address_t *) &lapic_start, round_page(LAPIC_SIZE), 0, @@ -194,7 +211,7 @@ lapic_init(void) lapic_id = (unsigned long)(lapic_start + LAPIC_ID); if ((LAPIC_READ(VERSION)&LAPIC_VERSION_MASK) < 0x14) { - printf("Local APIC version 0x%x, 0x14 or greater expected\n", + panic("Local APIC version 0x%x, 0x14 or more expected\n", (LAPIC_READ(VERSION)&LAPIC_VERSION_MASK)); } @@ -249,9 +266,9 @@ lapic_dump(void) #define IP(lvt) \ (LAPIC_READ(lvt)&LAPIC_LVT_IP_PLRITY_LOW)? "Low " : "High" - kprintf("LAPIC %d at 0x%x version 0x%x\n", + kprintf("LAPIC %d at %p version 0x%x\n", (LAPIC_READ(ID)>>LAPIC_ID_SHIFT)&LAPIC_ID_MASK, - lapic_start, + (void *) lapic_start, LAPIC_READ(VERSION)&LAPIC_VERSION_MASK); kprintf("Priorities: Task 0x%x Arbitration 0x%x Processor 0x%x\n", LAPIC_READ(TPR)&LAPIC_TPR_MASK, @@ -264,12 +281,14 @@ lapic_dump(void) BOOL(LAPIC_READ(SVR)&LAPIC_SVR_ENABLE), BOOL(!(LAPIC_READ(SVR)&LAPIC_SVR_FOCUS_OFF)), LAPIC_READ(SVR) & LAPIC_SVR_MASK); +#if CONFIG_MCA if (mca_is_cmci_present()) kprintf("LVT_CMCI: Vector 0x%02x [%s] %s %cmasked\n", VEC(LVT_CMCI), DM(LVT_CMCI), DS(LVT_CMCI), MASK(LVT_CMCI)); +#endif kprintf("LVT_TIMER: Vector 0x%02x %s %cmasked %s\n", VEC(LVT_TIMER), DS(LVT_TIMER), @@ -460,9 +479,11 @@ lapic_configure(void) /* Thermal: unmasked */ LAPIC_WRITE(LVT_THERMAL, LAPIC_VECTOR(THERMAL)); +#if CONFIG_MCA /* CMCI, if available */ if (mca_is_cmci_present()) LAPIC_WRITE(LVT_CMCI, LAPIC_VECTOR(CMCI)); +#endif if (((cpu_number() == master_cpu) && lapic_errors_masked == FALSE) || (cpu_number() != master_cpu)) { @@ -473,7 +494,7 @@ lapic_configure(void) void lapic_set_timer( - boolean_t interrupt, + boolean_t interrupt_unmasked, lapic_timer_mode_t mode, lapic_timer_divide_t divisor, lapic_timer_count_t initial_count) @@ -484,7 +505,7 @@ lapic_set_timer( state = ml_set_interrupts_enabled(FALSE); timer_vector = LAPIC_READ(LVT_TIMER); timer_vector &= ~(LAPIC_LVT_MASKED|LAPIC_LVT_PERIODIC);; - timer_vector |= interrupt ? 0 : LAPIC_LVT_MASKED; + timer_vector |= interrupt_unmasked ? 0 : LAPIC_LVT_MASKED; timer_vector |= (mode == periodic) ? LAPIC_LVT_PERIODIC : 0; LAPIC_WRITE(LVT_TIMER, timer_vector); LAPIC_WRITE(TIMER_DIVIDE_CONFIG, divisor); @@ -526,6 +547,10 @@ lapic_end_of_interrupt(void) _lapic_end_of_interrupt(); } +void lapic_unmask_perfcnt_interrupt(void) { + LAPIC_WRITE(LVT_PERFCNT, LAPIC_VECTOR(PERFCNT)); +} + void lapic_set_intr_func(int vector, i386_intr_func_t func) { @@ -539,6 +564,7 @@ lapic_set_intr_func(int vector, i386_intr_func_t func) case LAPIC_THERMAL_INTERRUPT: case LAPIC_PERFCNT_INTERRUPT: case LAPIC_CMCI_INTERRUPT: + case LAPIC_PM_INTERRUPT: lapic_intr_func[vector] = func; break; default: @@ -548,14 +574,14 @@ lapic_set_intr_func(int vector, i386_intr_func_t func) } int -lapic_interrupt(int interrupt, x86_saved_state_t *state) +lapic_interrupt(int interrupt_num, x86_saved_state_t *state) { int retval = 0; int esr = -1; - interrupt -= lapic_interrupt_base; - if (interrupt < 0) { - if (interrupt == (LAPIC_NMI_INTERRUPT - lapic_interrupt_base) && + interrupt_num -= lapic_interrupt_base; + if (interrupt_num < 0) { + if (interrupt_num == (LAPIC_NMI_INTERRUPT - lapic_interrupt_base) && lapic_intr_func[LAPIC_NMI_INTERRUPT] != NULL) { retval = (*lapic_intr_func[LAPIC_NMI_INTERRUPT])(state); _lapic_end_of_interrupt(); @@ -565,22 +591,33 @@ lapic_interrupt(int interrupt, x86_saved_state_t *state) return 0; } - switch(interrupt) { + switch(interrupt_num) { case LAPIC_TIMER_INTERRUPT: case LAPIC_THERMAL_INTERRUPT: - case LAPIC_PERFCNT_INTERRUPT: case LAPIC_INTERPROCESSOR_INTERRUPT: - if (lapic_intr_func[interrupt] != NULL) - (void) (*lapic_intr_func[interrupt])(state); - if (interrupt == LAPIC_PERFCNT_INTERRUPT) - /* Clear interrupt masked */ - LAPIC_WRITE(LVT_PERFCNT, LAPIC_VECTOR(PERFCNT)); + case LAPIC_PM_INTERRUPT: + if (lapic_intr_func[interrupt_num] != NULL) + (void) (*lapic_intr_func[interrupt_num])(state); _lapic_end_of_interrupt(); retval = 1; break; + case LAPIC_PERFCNT_INTERRUPT: + /* If a function has been registered, invoke it. Otherwise, + * pass up to IOKit. + */ + if (lapic_intr_func[interrupt_num] != NULL) { + (void) (*lapic_intr_func[interrupt_num])(state); + /* Unmask the interrupt since we don't expect legacy users + * to be responsible for it. + */ + lapic_unmask_perfcnt_interrupt(); + _lapic_end_of_interrupt(); + retval = 1; + } + break; case LAPIC_CMCI_INTERRUPT: - if (lapic_intr_func[interrupt] != NULL) - (void) (*lapic_intr_func[interrupt])(state); + if (lapic_intr_func[interrupt_num] != NULL) + (void) (*lapic_intr_func[interrupt_num])(state); /* return 0 for plaform expert to handle */ break; case LAPIC_ERROR_INTERRUPT: @@ -634,6 +671,19 @@ lapic_interrupt(int interrupt, x86_saved_state_t *state) /* No EOI required here */ retval = 1; break; + case LAPIC_PMC_SW_INTERRUPT: + { +#if CONFIG_COUNTERS + thread_t old, new; + ml_get_csw_threads(&old, &new); + + if (pmc_context_switch(old, new) == TRUE) { + retval = 1; + /* No EOI required for SWI */ + } +#endif /* CONFIG_COUNTERS */ + } + break; } return retval; @@ -672,3 +722,23 @@ lapic_smm_restore(void) ml_set_interrupts_enabled(state); } +void +lapic_send_ipi(int cpu, int vector) +{ + boolean_t state; + + if (vector < lapic_interrupt_base) + vector += lapic_interrupt_base; + + state = ml_set_interrupts_enabled(FALSE); + + /* Wait for pending outgoing send to complete */ + while (LAPIC_READ(ICR) & LAPIC_ICR_DS_PENDING) { + cpu_pause(); + } + + LAPIC_WRITE(ICRD, cpu_to_lapic[cpu] << LAPIC_ICRD_DEST_SHIFT); + LAPIC_WRITE(ICR, vector | LAPIC_ICR_DM_FIXED); + + (void) ml_set_interrupts_enabled(state); +} diff --git a/osfmk/i386/lapic.h b/osfmk/i386/lapic.h index b37b3a789..e8387bb21 100644 --- a/osfmk/i386/lapic.h +++ b/osfmk/i386/lapic.h @@ -174,12 +174,17 @@ typedef uint32_t lapic_timer_count_t; #define LAPIC_ERROR_INTERRUPT 0xB #define LAPIC_SPURIOUS_INTERRUPT 0xA #define LAPIC_CMCI_INTERRUPT 0x9 +#define LAPIC_PMC_SW_INTERRUPT 0x8 +#define LAPIC_PM_INTERRUPT 0x7 + +#define LAPIC_PMC_SWI_VECTOR (LAPIC_DEFAULT_INTERRUPT_BASE + LAPIC_PMC_SW_INTERRUPT) + /* The vector field is ignored for NMI interrupts via the LAPIC * or otherwise, so this is not an offset from the interrupt * base. */ #define LAPIC_NMI_INTERRUPT 0x2 -#define LAPIC_FUNC_TABLE_SIZE LAPIC_PERFCNT_INTERRUPT +#define LAPIC_FUNC_TABLE_SIZE (LAPIC_PERFCNT_INTERRUPT + 1) #define LAPIC_WRITE(reg,val) \ *((volatile uint32_t *)(lapic_start + LAPIC_##reg)) = (val) @@ -206,11 +211,15 @@ extern void lapic_dump(void); extern int lapic_interrupt( int interrupt, x86_saved_state_t *state); extern void lapic_end_of_interrupt(void); +extern void lapic_unmask_perfcnt_interrupt(void); +extern void lapic_send_ipi(int cpu, int interupt); + extern int lapic_to_cpu[]; extern int cpu_to_lapic[]; extern int lapic_interrupt_base; extern void lapic_cpu_map(int lapic, int cpu_num); extern uint32_t ml_get_apicid(uint32_t cpu); +extern uint32_t ml_get_cpuid(uint32_t lapic_index); extern void lapic_set_timer( boolean_t interrupt, @@ -243,6 +252,10 @@ static inline void lapic_set_cmci_func(i386_intr_func_t func) { lapic_set_intr_func(LAPIC_VECTOR(CMCI), func); } +static inline void lapic_set_pm_func(i386_intr_func_t func) +{ + lapic_set_intr_func(LAPIC_VECTOR(PM), func); +} #ifdef MP_DEBUG #define LAPIC_CPU_MAP_DUMP() lapic_cpu_map_dump() diff --git a/osfmk/i386/ldt.c b/osfmk/i386/ldt.c index 428bf9226..91416fd20 100644 --- a/osfmk/i386/ldt.c +++ b/osfmk/i386/ldt.c @@ -57,50 +57,49 @@ */ /* - * "Local" descriptor table. At the moment, all tasks use the + * "Local" descriptor table. At the moment, all tasks use the * same LDT. */ #include -#include -#include -#include -struct fake_descriptor master_ldt[LDTSZ] __attribute__ ((aligned (4096))) = { - [SEL_TO_INDEX(SYSENTER_CS)] { /* kernel code (sysenter) */ +struct real_descriptor master_ldt[LDTSZ] __attribute__ ((aligned (4096))) = { +#ifdef __i386__ + [SEL_TO_INDEX(SYSENTER_CS)] MAKE_REAL_DESCRIPTOR( /* kernel code (sysenter) */ 0, - 0xfffff, - SZ_32|SZ_G, - ACC_P|ACC_PL_K|ACC_CODE_R - }, - [SEL_TO_INDEX(SYSENTER_DS)] { /* kernel data (sysenter) */ + 0xfffff, + SZ_32|SZ_G, + ACC_P|ACC_PL_K|ACC_CODE_R + ), + [SEL_TO_INDEX(SYSENTER_DS)] MAKE_REAL_DESCRIPTOR( /* kernel data (sysenter) */ 0, - 0xfffff, - SZ_32|SZ_G, - ACC_P|ACC_PL_K|ACC_DATA_W - }, - [SEL_TO_INDEX(USER_CS)] { /* user code segment */ + 0xfffff, + SZ_32|SZ_G, + ACC_P|ACC_PL_K|ACC_DATA_W + ), + [SEL_TO_INDEX(USER_CS)] MAKE_REAL_DESCRIPTOR( /* user code segment */ 0, - 0xfffff, - SZ_32|SZ_G, - ACC_P|ACC_PL_U|ACC_CODE_R - }, - [SEL_TO_INDEX(USER_DS)] { /* user data segment */ + 0xfffff, + SZ_32|SZ_G, + ACC_P|ACC_PL_U|ACC_CODE_R + ), + [SEL_TO_INDEX(USER_DS)] MAKE_REAL_DESCRIPTOR( /* user data segment */ 0, - 0xfffff, - SZ_32|SZ_G, - ACC_P|ACC_PL_U|ACC_DATA_W - }, - [SEL_TO_INDEX(USER64_CS)] { /* user 64-bit code segment */ + 0xfffff, + SZ_32|SZ_G, + ACC_P|ACC_PL_U|ACC_DATA_W + ), + [SEL_TO_INDEX(USER64_CS)] MAKE_REAL_DESCRIPTOR( /* user 64-bit code segment */ 0, 0xfffff, SZ_64|SZ_G, ACC_P|ACC_PL_U|ACC_CODE_R - }, - [SEL_TO_INDEX(USER_CTHREAD)] { /* user cthread segment */ + ), +#endif + [SEL_TO_INDEX(USER_CTHREAD)] MAKE_REAL_DESCRIPTOR( /* user cthread segment */ 0, - 0xfffff, - SZ_32|SZ_G, - ACC_P|ACC_PL_U|ACC_DATA_W - }, + 0xfffff, + SZ_32|SZ_G, + ACC_P|ACC_PL_U|ACC_DATA_W + ), }; diff --git a/osfmk/i386/lock.h b/osfmk/i386/lock.h index f497a82b2..d353be3c7 100644 --- a/osfmk/i386/lock.h +++ b/osfmk/i386/lock.h @@ -83,16 +83,6 @@ #include #include -typedef struct { - lck_mtx_t lck_mtx; /* inlined lck_mtx, need to be first */ -#if MACH_LDEBUG - int type; -#define MUTEX_TAG 0x4d4d - vm_offset_t pc; - vm_offset_t thread; -#endif /* MACH_LDEBUG */ -} mutex_t; - typedef lck_rw_t lock_t; extern unsigned int LockTimeOutTSC; /* Lock timeout in TSC ticks */ @@ -140,15 +130,6 @@ extern unsigned int LockTimeOut; /* Lock timeout in absolute time */ : \ "r" (bit), "m" (*(volatile int *)(l))); -static inline unsigned long i_bit_isset(unsigned int test, volatile unsigned long *word) -{ - int bit; - - __asm__ volatile("btl %2,%1\n\tsbbl %0,%0" : "=r" (bit) - : "m" (word), "ir" (test)); - return bit; -} - static inline char xchgb(volatile char * cp, char new); static inline void atomic_incl(volatile long * p, long delta); @@ -177,31 +158,10 @@ static inline char xchgb(volatile char * cp, char new) return (old); } -/* - * Compare and exchange: - * - returns failure (0) if the location did not contain the old value, - * - returns success (1) if the location was set to the new value. - */ -static inline uint32_t -atomic_cmpxchg(uint32_t *p, uint32_t old, uint32_t new) -{ - uint32_t res = old; - - __asm__ volatile( - "lock; cmpxchgl %1,%2; \n\t" - " setz %%al; \n\t" - " movzbl %%al,%0" - : "+a" (res) /* %0: old value to compare, returns success */ - : "r" (new), /* %1: new value to set */ - "m" (*(p)) /* %2: memory address */ - : "memory"); - return (res); -} - static inline void atomic_incl(volatile long * p, long delta) { __asm__ volatile (" lock \n \ - addl %0,%1" : \ + add %0,%1" : \ : \ "r" (delta), "m" (*(volatile long *)p)); } @@ -225,7 +185,7 @@ static inline void atomic_incb(volatile char * p, char delta) static inline void atomic_decl(volatile long * p, long delta) { __asm__ volatile (" lock \n \ - subl %0,%1" : \ + sub %0,%1" : \ : \ "r" (delta), "m" (*(volatile long *)p)); } @@ -235,7 +195,7 @@ static inline int atomic_decl_and_test(volatile long * p, long delta) uint8_t ret; __asm__ volatile ( " lock \n\t" - " subl %1,%2 \n\t" + " sub %1,%2 \n\t" " sete %0" : "=qm" (ret) : "r" (delta), "m" (*(volatile long *)p)); diff --git a/osfmk/i386/locks.h b/osfmk/i386/locks.h index 0ad756ed2..d74e94156 100644 --- a/osfmk/i386/locks.h +++ b/osfmk/i386/locks.h @@ -45,7 +45,8 @@ extern unsigned int LcksOpts; #ifdef MACH_KERNEL_PRIVATE typedef struct { - unsigned int lck_spin_data[10]; /* XXX - usimple_lock_data_t */ + unsigned long interlock; + unsigned long lck_spin_pad[9]; /* XXX - usimple_lock_data_t */ } lck_spin_t; #define LCK_SPIN_TAG_DESTROYED 0x00002007 /* lock marked as Destroyed */ @@ -53,7 +54,7 @@ typedef struct { #else #ifdef KERNEL_PRIVATE typedef struct { - unsigned int opaque[10]; + unsigned long opaque[10]; } lck_spin_t; #else typedef struct __lck_spin_t__ lck_spin_t; @@ -64,37 +65,55 @@ typedef struct __lck_spin_t__ lck_spin_t; typedef struct _lck_mtx_ { union { struct { - unsigned int lck_mtxd_ilk; - unsigned int lck_mtxd_locked; - unsigned short lck_mtxd_waiters; - unsigned short lck_mtxd_pri; + volatile uintptr_t lck_mtxd_owner; + unsigned long lck_mtxd_ptr; + volatile uint32_t lck_mtxd_waiters:16, + lck_mtxd_pri:8, + lck_mtxd_ilocked:1, + lck_mtxd_mlocked:1, + lck_mtxd_promoted:1, + lck_mtxd_spin:1, + lck_mtxd_pad4:4; /* padding */ +#ifdef __x86_64__ + unsigned int lck_mtxd_pad; +#endif } lck_mtxd; struct { - unsigned int lck_mtxi_tag; + unsigned long lck_mtxi_tag; struct _lck_mtx_ext_ *lck_mtxi_ptr; - unsigned int lck_mtxi_pad8; + unsigned long lck_mtxi_pad; } lck_mtxi; } lck_mtx_sw; } lck_mtx_t; -#define lck_mtx_ilk lck_mtx_sw.lck_mtxd.lck_mtxd_ilk -#define lck_mtx_locked lck_mtx_sw.lck_mtxd.lck_mtxd_locked +#define lck_mtx_owner lck_mtx_sw.lck_mtxd.lck_mtxd_owner #define lck_mtx_waiters lck_mtx_sw.lck_mtxd.lck_mtxd_waiters #define lck_mtx_pri lck_mtx_sw.lck_mtxd.lck_mtxd_pri +#define lck_mtx_ilocked lck_mtx_sw.lck_mtxd.lck_mtxd_ilocked +#define lck_mtx_mlocked lck_mtx_sw.lck_mtxd.lck_mtxd_mlocked +#define lck_mtx_promoted lck_mtx_sw.lck_mtxd.lck_mtxd_promoted +#define lck_mtx_spin lck_mtx_sw.lck_mtxd.lck_mtxd_spin #define lck_mtx_tag lck_mtx_sw.lck_mtxi.lck_mtxi_tag #define lck_mtx_ptr lck_mtx_sw.lck_mtxi.lck_mtxi_ptr +#define lck_mtx_state lck_mtx_sw.lck_mtxi.lck_mtxi_pad #define LCK_MTX_TAG_INDIRECT 0x00001007 /* lock marked as Indirect */ #define LCK_MTX_TAG_DESTROYED 0x00002007 /* lock marked as Destroyed */ +#define LCK_MTX_PTR_EXTENDED 0x00003007 /* lock is extended version */ -#define MUTEX_LOCKED_AS_SPIN 0x00004001 /* used to indicate that the mutex */ - /* was acquired as a spin lock - stored in lck_mtxd_locked */ /* Adaptive spin before blocking */ extern unsigned int MutexSpin; -extern void lck_mtx_lock_spinwait(lck_mtx_t *lck); +extern int lck_mtx_lock_spinwait_x86(lck_mtx_t *mutex); +extern void lck_mtx_lock_wait_x86(lck_mtx_t *mutex); +extern void lck_mtx_lock_acquire_x86(lck_mtx_t *mutex); +extern void lck_mtx_unlock_wakeup_x86(lck_mtx_t *mutex, int owner_was_promoted); -extern void lck_mtx_interlock_panic(lck_mtx_t *lck); +extern void lck_mtx_lock_mark_destroyed(lck_mtx_t *mutex); +extern int lck_mtx_lock_mark_promoted(lck_mtx_t *mutex); +extern int lck_mtx_lock_decr_waiter(lck_mtx_t *mutex); +extern int lck_mtx_lock_grab_mutex(lck_mtx_t *mutex); +extern integer_t lck_mtx_lock_get_pri(lck_mtx_t *mutex); extern void hw_lock_byte_init(uint8_t *lock_byte); extern void hw_lock_byte_lock(uint8_t *lock_byte); @@ -102,6 +121,9 @@ extern void hw_lock_byte_unlock(uint8_t *lock_byte); typedef struct { unsigned int type; +#ifdef __x86_64__ + unsigned int pad4; +#endif vm_offset_t pc; vm_offset_t thread; } lck_mtx_deb_t; @@ -116,8 +138,14 @@ typedef struct _lck_mtx_ext_ { lck_mtx_t lck_mtx; struct _lck_grp_ *lck_mtx_grp; unsigned int lck_mtx_attr; +#ifdef __x86_64__ + unsigned int lck_mtx_pad1; +#endif lck_mtx_deb_t lck_mtx_deb; uint64_t lck_mtx_stat; +#ifdef __x86_64__ + unsigned int lck_mtx_pad2[2]; +#endif } lck_mtx_ext_t; #define LCK_MTX_ATTR_DEBUG 0x1 @@ -128,18 +156,24 @@ typedef struct _lck_mtx_ext_ { #else #ifdef KERNEL_PRIVATE typedef struct { - unsigned int opaque[3]; + unsigned long opaque[3]; } lck_mtx_t; + +typedef struct { + unsigned long opaque[10]; +} lck_mtx_ext_t; + #else -typedef struct __lck_mtx_t__ lck_mtx_t; +typedef struct __lck_mtx_t__ lck_mtx_t; +typedef struct __lck_mtx_ext_t__ lck_mtx_ext_t; #endif #endif #ifdef MACH_KERNEL_PRIVATE #pragma pack(1) /* Make sure the structure stays as we defined it */ -typedef struct { +typedef struct _lck_rw_t_internal_ { volatile uint16_t lck_rw_shared_count; /* No. of accepted readers */ - uint8_t lck_rw_interlock; /* Interlock byte */ + uint8_t lck_rw_interlock; /* Interlock byte */ volatile uint8_t lck_rw_priv_excl:1, /* Writers prioritized if set */ lck_rw_want_upgrade:1, /* Read-to-write upgrade waiting */ @@ -147,12 +181,15 @@ typedef struct { lck_r_waiting:1, /* Reader is sleeping on lock */ lck_w_waiting:1, /* Writer is sleeping on lock */ lck_rw_can_sleep:1, /* Can attempts to lock go to sleep? */ - lck_rw_pad6:2; /* padding */ + lck_rw_padb6:2; /* padding */ - unsigned int lck_rw_tag; /* This can be obsoleted when stats + uint32_t lck_rw_tag; /* This can be obsoleted when stats * are in */ - unsigned int lck_rw_pad8; + uint32_t lck_rw_pad8; +#ifdef __x86_64__ + uint32_t lck_rw_pad12; +#endif } lck_rw_t; #pragma pack() @@ -171,9 +208,14 @@ typedef struct { #else #ifdef KERNEL_PRIVATE +#pragma pack(1) typedef struct { - unsigned int opaque[3]; + uint32_t opaque[3]; +#ifdef __x86_64__ + uint32_t opaque4; +#endif } lck_rw_t; +#pragma pack() #else typedef struct __lck_rw_t__ lck_rw_t; #endif diff --git a/osfmk/i386/locks_i386.c b/osfmk/i386/locks_i386.c index 38d332b00..3d3e5a09f 100644 --- a/osfmk/i386/locks_i386.c +++ b/osfmk/i386/locks_i386.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2007 Apple Inc. All rights reserved. + * Copyright (c) 2000-2008 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -77,6 +77,7 @@ #include #include +#include /* mp_recent_debugger_activity() */ #if MACH_KDB #include #include @@ -84,7 +85,7 @@ #include #endif /* MACH_KDB */ -#include +#include #include @@ -105,21 +106,25 @@ #define LCK_RW_LCK_SH_TO_EX1_CODE 0x104 #define LCK_RW_LCK_EX_TO_SH_CODE 0x105 -#define LCK_MTX_LCK_SPIN 0x200 +#define LCK_RW_LCK_EX_WRITER_SPIN_CODE 0x106 +#define LCK_RW_LCK_EX_WRITER_WAIT_CODE 0x107 +#define LCK_RW_LCK_EX_READER_SPIN_CODE 0x108 +#define LCK_RW_LCK_EX_READER_WAIT_CODE 0x109 +#define LCK_RW_LCK_SHARED_SPIN_CODE 0x110 +#define LCK_RW_LCK_SHARED_WAIT_CODE 0x111 +#define LCK_RW_LCK_SH_TO_EX_SPIN_CODE 0x112 +#define LCK_RW_LCK_SH_TO_EX_WAIT_CODE 0x113 + #define ANY_LOCK_DEBUG (USLOCK_DEBUG || LOCK_DEBUG || MUTEX_DEBUG) unsigned int LcksOpts=0; -unsigned int lock_wait_time[2] = { (unsigned int)-1, 0 } ; /* Forwards */ #if MACH_KDB void db_print_simple_lock( simple_lock_t addr); - -void db_print_mutex( - mutex_t * addr); #endif /* MACH_KDB */ @@ -131,9 +136,6 @@ int uslock_check = 1; int max_lock_loops = 100000000; decl_simple_lock_data(extern , printf_lock) decl_simple_lock_data(extern , panic_lock) -#if MACH_KDB -decl_simple_lock_data(extern , kdb_lock) -#endif /* MACH_KDB */ #endif /* USLOCK_DEBUG */ @@ -146,7 +148,7 @@ typedef void *pc_t; #define INVALID_PC ((void *) VM_MAX_KERNEL_ADDRESS) #define INVALID_THREAD ((void *) VM_MAX_KERNEL_ADDRESS) #if ANY_LOCK_DEBUG -#define OBTAIN_PC(pc,l) ((pc) = (void *) GET_RETURN_PC(&(l))) +#define OBTAIN_PC(pc) ((pc) = GET_RETURN_PC()) #define DECL_PC(pc) pc_t pc; #else /* ANY_LOCK_DEBUG */ #define DECL_PC(pc) @@ -154,9 +156,9 @@ typedef void *pc_t; /* * Eliminate lint complaints about unused local pc variables. */ -#define OBTAIN_PC(pc,l) ++pc +#define OBTAIN_PC(pc) ++pc #else /* lint */ -#define OBTAIN_PC(pc,l) +#define OBTAIN_PC(pc) #endif /* lint */ #endif /* USLOCK_DEBUG */ @@ -178,6 +180,12 @@ int usld_lock_common_checks(usimple_lock_t, char *); #define USLDBG(stmt) #endif /* USLOCK_DEBUG */ + +extern int lck_rw_grab_want(lck_rw_t *lck); +extern int lck_rw_grab_shared(lck_rw_t *lck); +extern int lck_rw_held_read_or_upgrade(lck_rw_t *lck); + + /* * Forward definitions */ @@ -185,9 +193,25 @@ int usld_lock_common_checks(usimple_lock_t, char *); void lck_rw_lock_shared_gen( lck_rw_t *lck); -lck_rw_type_t lck_rw_done_gen( +void lck_rw_lock_exclusive_gen( + lck_rw_t *lck); + +boolean_t lck_rw_lock_shared_to_exclusive_success( lck_rw_t *lck); +boolean_t lck_rw_lock_shared_to_exclusive_failure( + lck_rw_t *lck, + int prior_lock_state); + +void lck_rw_lock_exclusive_to_shared_gen( + lck_rw_t *lck, + int prior_lock_state); + +lck_rw_type_t lck_rw_done_gen( + lck_rw_t *lck, + int prior_lock_state); + + /* * Routine: lck_spin_alloc_init */ @@ -238,9 +262,9 @@ lck_spin_destroy( lck_spin_t *lck, lck_grp_t *grp) { - if (lck->lck_spin_data[0] == LCK_SPIN_TAG_DESTROYED) + if (lck->interlock == LCK_SPIN_TAG_DESTROYED) return; - lck->lck_spin_data[0] = LCK_SPIN_TAG_DESTROYED; + lck->interlock = LCK_SPIN_TAG_DESTROYED; lck_grp_lckcnt_decr(grp, LCK_TYPE_SPIN); lck_grp_deallocate(grp); return; @@ -310,12 +334,17 @@ usimple_lock( #ifndef MACHINE_SIMPLE_LOCK DECL_PC(pc); - OBTAIN_PC(pc, l); + OBTAIN_PC(pc); USLDBG(usld_lock_pre(l, pc)); - if(!hw_lock_to(&l->interlock, LockTimeOutTSC)) /* Try to get the lock with a timeout */ - panic("simple lock deadlock detection: lock=%p, cpu=%d, owning thread=0x%x", l, cpu_number(), l->interlock.lock_data); - + if(!hw_lock_to(&l->interlock, LockTimeOutTSC)) {/* Try to get the lock + * with a timeout */ + boolean_t uslock_acquired = FALSE; + while (mp_recent_debugger_activity() && + !(uslock_acquired = hw_lock_to(&l->interlock, LockTimeOutTSC))); + if (uslock_acquired == FALSE) + panic("Spinlock acquisition timed out: lock=%p, lock owner thread=0x%lx, current_thread: %p", l, (uintptr_t)l->interlock.lock_data, current_thread()); + } USLDBG(usld_lock_post(l, pc)); #else simple_lock((simple_lock_t)l); @@ -337,7 +366,7 @@ usimple_unlock( #ifndef MACHINE_SIMPLE_LOCK DECL_PC(pc); - OBTAIN_PC(pc, l); + OBTAIN_PC(pc); USLDBG(usld_unlock(l, pc)); hw_lock_unlock(&l->interlock); #else @@ -366,7 +395,7 @@ usimple_lock_try( unsigned int success; DECL_PC(pc); - OBTAIN_PC(pc, l); + OBTAIN_PC(pc); USLDBG(usld_lock_try_pre(l, pc)); if ((success = hw_lock_try(&l->interlock))) { USLDBG(usld_lock_try_post(l, pc)); @@ -430,10 +459,10 @@ usld_lock_common_checks( if (l == USIMPLE_LOCK_NULL) panic("%s: null lock pointer", caller); if (l->lock_type != USLOCK_TAG) - panic("%s: 0x%x is not a usimple lock", caller, (integer_t) l); + panic("%s: 0x%p is not a usimple lock", caller, l); if (!(l->debug.state & USLOCK_INIT)) - panic("%s: 0x%x is not an initialized lock", - caller, (integer_t) l); + panic("%s: %p is not an initialized lock", + caller, l); return USLOCK_CHECKING(l); } @@ -495,11 +524,11 @@ usld_lock_post( return; if (!((l->debug.state & ~USLOCK_TAKEN) == USLOCK_INITIALIZED)) - panic("%s: lock 0x%x became uninitialized", - caller, (integer_t) l); + panic("%s: lock %p became uninitialized", + caller, l); if ((l->debug.state & USLOCK_TAKEN)) - panic("%s: lock 0x%x became TAKEN by someone else", - caller, (integer_t) l); + panic("%s: lock 0x%p became TAKEN by someone else", + caller, l); mycpu = cpu_number(); l->debug.lock_thread = (void *)current_thread(); @@ -534,14 +563,14 @@ usld_unlock( mycpu = cpu_number(); if (!(l->debug.state & USLOCK_TAKEN)) - panic("%s: lock 0x%x hasn't been taken", - caller, (integer_t) l); + panic("%s: lock 0x%p hasn't been taken", + caller, l); if (l->debug.lock_thread != (void *) current_thread()) - panic("%s: unlocking lock 0x%x, owned by thread %p", - caller, (integer_t) l, l->debug.lock_thread); + panic("%s: unlocking lock 0x%p, owned by thread %p", + caller, l, l->debug.lock_thread); if (l->debug.lock_cpu != mycpu) { - printf("%s: unlocking lock 0x%x on cpu 0x%x", - caller, (integer_t) l, mycpu); + printf("%s: unlocking lock 0x%p on cpu 0x%x", + caller, l, mycpu); printf(" (acquired on cpu 0x%x)\n", l->debug.lock_cpu); panic("%s", caller); } @@ -596,11 +625,11 @@ usld_lock_try_post( return; if (!((l->debug.state & ~USLOCK_TAKEN) == USLOCK_INITIALIZED)) - panic("%s: lock 0x%x became uninitialized", - caller, (integer_t) l); + panic("%s: lock 0x%p became uninitialized", + caller, l); if ((l->debug.state & USLOCK_TAKEN)) - panic("%s: lock 0x%x became TAKEN by someone else", - caller, (integer_t) l); + panic("%s: lock 0x%p became TAKEN by someone else", + caller, l); mycpu = cpu_number(); l->debug.lock_thread = (void *) current_thread(); @@ -631,8 +660,8 @@ usl_trace( if (traced_lock == l) { XPR(XPR_SLOCK, "seq %d, cpu %d, %s @ %x\n", - (integer_t) lock_seq, (integer_t) mycpu, - (integer_t) op_name, (integer_t) pc, 0); + (uintptr_t) lock_seq, (uintptr_t) mycpu, + (uintptr_t) op_name, (uintptr_t) pc, 0); lock_seq++; } } @@ -699,6 +728,7 @@ lock_init( l->lck_rw_can_sleep = can_sleep; l->lck_rw_tag = tag; l->lck_rw_priv_excl = 1; + l->lck_r_waiting = l->lck_w_waiting = 0; } @@ -768,9 +798,11 @@ lck_rw_alloc_init( lck_attr_t *attr) { lck_rw_t *lck; - if ((lck = (lck_rw_t *)kalloc(sizeof(lck_rw_t))) != 0) + if ((lck = (lck_rw_t *)kalloc(sizeof(lck_rw_t))) != 0) { + bzero(lck, sizeof(lck_rw_t)); lck_rw_init(lck, grp, attr); - + } + return(lck); } @@ -802,6 +834,7 @@ lck_rw_init( lck->lck_rw_want_upgrade = FALSE; lck->lck_rw_shared_count = 0; lck->lck_rw_can_sleep = TRUE; + lck->lck_r_waiting = lck->lck_w_waiting = 0; lck->lck_rw_tag = 0; lck->lck_rw_priv_excl = ((lck_attr->lck_attr_val & LCK_ATTR_RW_SHARED_PRIORITY) == 0); @@ -816,7 +849,8 @@ lck_rw_init( void lck_rw_destroy( lck_rw_t *lck, - lck_grp_t *grp) { + lck_grp_t *grp) +{ if (lck->lck_rw_tag == LCK_RW_TAG_DESTROYED) return; lck->lck_rw_tag = LCK_RW_TAG_DESTROYED; @@ -876,92 +910,132 @@ lck_rw_lock_pause(boolean_t interrupts_enabled) cpu_pause(); } + +/* + * compute the deadline to spin against when + * waiting for a change of state on a lck_rw_t + */ +static inline uint64_t +lck_rw_deadline_for_spin(lck_rw_t *lck) +{ + if (lck->lck_rw_can_sleep) { + if (lck->lck_r_waiting || lck->lck_w_waiting || lck->lck_rw_shared_count > machine_info.max_cpus) { + /* + * there are already threads waiting on this lock... this + * implies that they have spun beyond their deadlines waiting for + * the desired state to show up so we will not bother spinning at this time... + * or + * the current number of threads sharing this lock exceeds our capacity to run them + * concurrently and since all states we're going to spin for require the rw_shared_count + * to be at 0, we'll not bother spinning since the latency for this to happen is + * unpredictable... + */ + return (mach_absolute_time()); + } + return (mach_absolute_time() + MutexSpin); + } else + return (mach_absolute_time() + (100000LL * 1000000000LL)); +} + + /* * Routine: lck_rw_lock_exclusive */ void -lck_rw_lock_exclusive( +lck_rw_lock_exclusive_gen( lck_rw_t *lck) { - int i; - wait_result_t res; -#if MACH_LDEBUG - int decrementer; -#endif /* MACH_LDEBUG */ - boolean_t istate; -#if CONFIG_DTRACE - uint64_t wait_interval = 0; - int slept = 0; - int readers_at_sleep; -#endif + uint64_t deadline = 0; + int slept = 0; + int gotlock = 0; + int lockheld = 0; + wait_result_t res = 0; + boolean_t istate = -1; - istate = lck_interlock_lock(lck); #if CONFIG_DTRACE - readers_at_sleep = lck->lck_rw_shared_count; + boolean_t dtrace_ls_initialized = FALSE; + boolean_t dtrace_rwl_excl_spin, dtrace_rwl_excl_block, dtrace_ls_enabled= FALSE; + uint64_t wait_interval = 0; + int readers_at_sleep = 0; #endif -#if MACH_LDEBUG - decrementer = DECREMENTER_TIMEOUT; -#endif /* MACH_LDEBUG */ - /* * Try to acquire the lck_rw_want_write bit. */ - while (lck->lck_rw_want_write) { + while ( !lck_rw_grab_want(lck)) { - KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_EXCLUSIVE_CODE) | DBG_FUNC_START, (int)lck, 0, 0, 0, 0); - /* - * Either sleeping or spinning is happening, start - * a timing of our delay interval now. - */ #if CONFIG_DTRACE - if ((lockstat_probemap[LS_LCK_RW_LOCK_EXCL_SPIN] || lockstat_probemap[LS_LCK_RW_LOCK_EXCL_BLOCK]) && wait_interval == 0) { - wait_interval = mach_absolute_time(); - } else { - wait_interval = -1; + if (dtrace_ls_initialized == FALSE) { + dtrace_ls_initialized = TRUE; + dtrace_rwl_excl_spin = (lockstat_probemap[LS_LCK_RW_LOCK_EXCL_SPIN] != 0); + dtrace_rwl_excl_block = (lockstat_probemap[LS_LCK_RW_LOCK_EXCL_BLOCK] != 0); + dtrace_ls_enabled = dtrace_rwl_excl_spin || dtrace_rwl_excl_block; + if (dtrace_ls_enabled) { + /* + * Either sleeping or spinning is happening, + * start a timing of our delay interval now. + */ + readers_at_sleep = lck->lck_rw_shared_count; + wait_interval = mach_absolute_time(); + } } #endif + if (istate == -1) + istate = ml_get_interrupts_enabled(); + deadline = lck_rw_deadline_for_spin(lck); + + KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_EX_WRITER_SPIN_CODE) | DBG_FUNC_START, (int)lck, 0, 0, 0, 0); + + while (((gotlock = lck_rw_grab_want(lck)) == 0) && mach_absolute_time() < deadline) + lck_rw_lock_pause(istate); + + KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_EX_WRITER_SPIN_CODE) | DBG_FUNC_END, (int)lck, 0, 0, gotlock, 0); + + if (gotlock) + break; + /* + * if we get here, the deadline has expired w/o us + * being able to grab the lock exclusively + * check to see if we're allowed to do a thread_block + */ + if (lck->lck_rw_can_sleep) { - i = lock_wait_time[lck->lck_rw_can_sleep ? 1 : 0]; - if (i != 0) { - lck_interlock_unlock(lck, istate); -#if MACH_LDEBUG - if (!--decrementer) - Debugger("timeout - lck_rw_want_write"); -#endif /* MACH_LDEBUG */ - while (--i != 0 && lck->lck_rw_want_write) - lck_rw_lock_pause(istate); istate = lck_interlock_lock(lck); - } - if (lck->lck_rw_can_sleep && lck->lck_rw_want_write) { - lck->lck_w_waiting = TRUE; - res = assert_wait(RW_LOCK_WRITER_EVENT(lck), THREAD_UNINT); - if (res == THREAD_WAITING) { - lck_interlock_unlock(lck, istate); - res = thread_block(THREAD_CONTINUE_NULL); -#if CONFIG_DTRACE - slept = 1; -#endif - istate = lck_interlock_lock(lck); - } - } - KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_EXCLUSIVE_CODE) | DBG_FUNC_END, (int)lck, res, 0, 0, 0); - } - lck->lck_rw_want_write = TRUE; + if (lck->lck_rw_want_write) { - /* Wait for readers (and upgrades) to finish */ + KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_EX_WRITER_WAIT_CODE) | DBG_FUNC_START, (int)lck, 0, 0, 0, 0); -#if MACH_LDEBUG - decrementer = DECREMENTER_TIMEOUT; -#endif /* MACH_LDEBUG */ - while ((lck->lck_rw_shared_count != 0) || lck->lck_rw_want_upgrade) { + lck->lck_w_waiting = TRUE; - i = lock_wait_time[lck->lck_rw_can_sleep ? 1 : 0]; + res = assert_wait(RW_LOCK_WRITER_EVENT(lck), THREAD_UNINT); + lck_interlock_unlock(lck, istate); - KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_EXCLUSIVE1_CODE) | DBG_FUNC_START, - (int)lck, lck->lck_rw_shared_count, lck->lck_rw_want_upgrade, i, 0); + if (res == THREAD_WAITING) { + res = thread_block(THREAD_CONTINUE_NULL); + slept++; + } + KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_EX_WRITER_WAIT_CODE) | DBG_FUNC_END, (int)lck, res, slept, 0, 0); + } else { + lck->lck_rw_want_write = TRUE; + lck_interlock_unlock(lck, istate); + break; + } + } + } + /* + * Wait for readers (and upgrades) to finish... + * the test for these conditions must be done simultaneously with + * a check of the interlock not being held since + * the rw_shared_count will drop to 0 first and then want_upgrade + * will be set to 1 in the shared_to_exclusive scenario... those + * adjustments are done behind the interlock and represent an + * atomic change in state and must be considered as such + * however, once we see the read count at 0, the want_upgrade not set + * and the interlock not held, we are safe to proceed + */ + while (lck_rw_held_read_or_upgrade(lck)) { #if CONFIG_DTRACE /* @@ -970,42 +1044,69 @@ lck_rw_lock_exclusive( * to -1 we don't have accurate data so we cannot later * decide to record a dtrace spin or sleep event. */ - if ((lockstat_probemap[LS_LCK_RW_LOCK_EXCL_SPIN] || lockstat_probemap[LS_LCK_RW_LOCK_EXCL_BLOCK]) && wait_interval == 0) { - wait_interval = mach_absolute_time(); - } else { - wait_interval = (unsigned) -1; + if (dtrace_ls_initialized == FALSE) { + dtrace_ls_initialized = TRUE; + dtrace_rwl_excl_spin = (lockstat_probemap[LS_LCK_RW_LOCK_EXCL_SPIN] != 0); + dtrace_rwl_excl_block = (lockstat_probemap[LS_LCK_RW_LOCK_EXCL_BLOCK] != 0); + dtrace_ls_enabled = dtrace_rwl_excl_spin || dtrace_rwl_excl_block; + if (dtrace_ls_enabled) { + /* + * Either sleeping or spinning is happening, + * start a timing of our delay interval now. + */ + readers_at_sleep = lck->lck_rw_shared_count; + wait_interval = mach_absolute_time(); + } } #endif + if (istate == -1) + istate = ml_get_interrupts_enabled(); + + deadline = lck_rw_deadline_for_spin(lck); + + KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_EX_READER_SPIN_CODE) | DBG_FUNC_START, (int)lck, 0, 0, 0, 0); + + while ((lockheld = lck_rw_held_read_or_upgrade(lck)) && mach_absolute_time() < deadline) + lck_rw_lock_pause(istate); + + KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_EX_READER_SPIN_CODE) | DBG_FUNC_END, (int)lck, 0, 0, lockheld, 0); + + if ( !lockheld) + break; + /* + * if we get here, the deadline has expired w/o us + * being able to grab the lock exclusively + * check to see if we're allowed to do a thread_block + */ + if (lck->lck_rw_can_sleep) { - if (i != 0) { - lck_interlock_unlock(lck, istate); -#if MACH_LDEBUG - if (!--decrementer) - Debugger("timeout - wait for readers"); -#endif /* MACH_LDEBUG */ - while (--i != 0 && (lck->lck_rw_shared_count != 0 || - lck->lck_rw_want_upgrade)) - lck_rw_lock_pause(istate); istate = lck_interlock_lock(lck); - } - if (lck->lck_rw_can_sleep && (lck->lck_rw_shared_count != 0 || lck->lck_rw_want_upgrade)) { - lck->lck_w_waiting = TRUE; - res = assert_wait(RW_LOCK_WRITER_EVENT(lck), THREAD_UNINT); - if (res == THREAD_WAITING) { + if (lck->lck_rw_shared_count != 0 || lck->lck_rw_want_upgrade) { + KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_EX_READER_WAIT_CODE) | DBG_FUNC_START, (int)lck, 0, 0, 0, 0); + + lck->lck_w_waiting = TRUE; + + res = assert_wait(RW_LOCK_WRITER_EVENT(lck), THREAD_UNINT); lck_interlock_unlock(lck, istate); - res = thread_block(THREAD_CONTINUE_NULL); -#if CONFIG_DTRACE - slept = 1; -#endif - istate = lck_interlock_lock(lck); + + if (res == THREAD_WAITING) { + res = thread_block(THREAD_CONTINUE_NULL); + slept++; + } + KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_EX_READER_WAIT_CODE) | DBG_FUNC_END, (int)lck, res, slept, 0, 0); + } else { + lck_interlock_unlock(lck, istate); + /* + * must own the lock now, since we checked for + * readers or upgrade owner behind the interlock + * no need for a call to 'lck_rw_held_read_or_upgrade' + */ + break; } } - KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_EXCLUSIVE1_CODE) | DBG_FUNC_END, - (int)lck, lck->lck_rw_shared_count, lck->lck_rw_want_upgrade, res, 0); } - lck_interlock_unlock(lck, istate); #if CONFIG_DTRACE /* * Decide what latencies we suffered that are Dtrace events. @@ -1016,7 +1117,7 @@ lck_rw_lock_exclusive( * If we have set wait_interval to -1, then dtrace was not enabled when we * started sleeping/spinning so we don't record this event. */ - if (wait_interval != 0 && wait_interval != (unsigned) -1) { + if (dtrace_ls_enabled == TRUE) { if (slept == 0) { LOCKSTAT_RECORD2(LS_LCK_RW_LOCK_EXCL_SPIN, lck, mach_absolute_time() - wait_interval, 1); @@ -1039,67 +1140,57 @@ lck_rw_lock_exclusive( /* * Routine: lck_rw_done_gen + * + * called from the assembly language wrapper... + * prior_lock_state is the value in the 1st + * word of the lock at the time of a successful + * atomic compare and exchange with the new value... + * it represents the state of the lock before we + * decremented the rw_shared_count or cleared either + * rw_want_upgrade or rw_want_write and + * the lck_x_waiting bits... since the wrapper + * routine has already changed the state atomically, + * we just need to decide if we should + * wake up anyone and what value to return... we do + * this by examining the state of the lock before + * we changed it */ lck_rw_type_t lck_rw_done_gen( - lck_rw_t *lck) + lck_rw_t *lck, + int prior_lock_state) { - boolean_t wakeup_readers = FALSE; - boolean_t wakeup_writers = FALSE; - lck_rw_type_t lck_rw_type; - boolean_t istate; - - istate = lck_interlock_lock(lck); - - if (lck->lck_rw_shared_count != 0) { - lck_rw_type = LCK_RW_TYPE_SHARED; - lck->lck_rw_shared_count--; - } - else { - lck_rw_type = LCK_RW_TYPE_EXCLUSIVE; - if (lck->lck_rw_want_upgrade) - lck->lck_rw_want_upgrade = FALSE; - else - lck->lck_rw_want_write = FALSE; - } + lck_rw_t *fake_lck; + lck_rw_type_t lock_type; /* - * There is no reason to wakeup a waiting thread - * if the read-count is non-zero. Consider: - * we must be dropping a read lock - * threads are waiting only if one wants a write lock - * if there are still readers, they can't proceed + * prior_lock state is a snapshot of the 1st word of the + * lock in question... we'll fake up a pointer to it + * and carefully not access anything beyond whats defined + * in the first word of a lck_rw_t */ + fake_lck = (lck_rw_t *)&prior_lock_state; - if (lck->lck_rw_shared_count == 0) { - if (lck->lck_w_waiting) { - lck->lck_w_waiting = FALSE; - wakeup_writers = TRUE; - } - if (!(lck->lck_rw_priv_excl && wakeup_writers == TRUE) && - lck->lck_r_waiting) { - lck->lck_r_waiting = FALSE; - wakeup_readers = TRUE; - } - } - - lck_interlock_unlock(lck, istate); + if (fake_lck->lck_rw_shared_count <= 1) { + if (fake_lck->lck_w_waiting) + thread_wakeup(RW_LOCK_WRITER_EVENT(lck)); - if (wakeup_readers) - thread_wakeup(RW_LOCK_READER_EVENT(lck)); - if (wakeup_writers) - thread_wakeup(RW_LOCK_WRITER_EVENT(lck)); + if (!(fake_lck->lck_rw_priv_excl && fake_lck->lck_w_waiting) && fake_lck->lck_r_waiting) + thread_wakeup(RW_LOCK_READER_EVENT(lck)); + } + if (fake_lck->lck_rw_shared_count) + lock_type = LCK_RW_TYPE_SHARED; + else + lock_type = LCK_RW_TYPE_EXCLUSIVE; #if CONFIG_DTRACE - LOCKSTAT_RECORD(LS_LCK_RW_DONE_RELEASE, lck, (lck_rw_type == LCK_RW_TYPE_EXCLUSIVE ? 1 : 0)); + LOCKSTAT_RECORD(LS_LCK_RW_DONE_RELEASE, lck, lock_type == LCK_RW_TYPE_SHARED ? 0 : 1); #endif - return(lck_rw_type); + return(lock_type); } - - /* * Routine: lck_rw_unlock */ @@ -1168,82 +1259,98 @@ lck_rw_lock( /* * Routine: lck_rw_lock_shared_gen + * Function: + * assembly fast path code has determined that this lock + * is held exclusively... this is where we spin/block + * until we can acquire the lock in the shared mode */ void lck_rw_lock_shared_gen( lck_rw_t *lck) { - int i; - wait_result_t res; -#if MACH_LDEBUG - int decrementer; -#endif /* MACH_LDEBUG */ - boolean_t istate; + uint64_t deadline = 0; + int gotlock = 0; + int slept = 0; + wait_result_t res = 0; + boolean_t istate = -1; + #if CONFIG_DTRACE uint64_t wait_interval = 0; - int slept = 0; - int readers_at_sleep; + int readers_at_sleep = 0; + boolean_t dtrace_ls_initialized = FALSE; + boolean_t dtrace_rwl_shared_spin, dtrace_rwl_shared_block, dtrace_ls_enabled = FALSE; #endif - istate = lck_interlock_lock(lck); + while ( !lck_rw_grab_shared(lck)) { + #if CONFIG_DTRACE - readers_at_sleep = lck->lck_rw_shared_count; + if (dtrace_ls_initialized == FALSE) { + dtrace_ls_initialized = TRUE; + dtrace_rwl_shared_spin = (lockstat_probemap[LS_LCK_RW_LOCK_SHARED_SPIN] != 0); + dtrace_rwl_shared_block = (lockstat_probemap[LS_LCK_RW_LOCK_SHARED_BLOCK] != 0); + dtrace_ls_enabled = dtrace_rwl_shared_spin || dtrace_rwl_shared_block; + if (dtrace_ls_enabled) { + /* + * Either sleeping or spinning is happening, + * start a timing of our delay interval now. + */ + readers_at_sleep = lck->lck_rw_shared_count; + wait_interval = mach_absolute_time(); + } + } #endif + if (istate == -1) + istate = ml_get_interrupts_enabled(); -#if MACH_LDEBUG - decrementer = DECREMENTER_TIMEOUT; -#endif /* MACH_LDEBUG */ - while ((lck->lck_rw_want_write || lck->lck_rw_want_upgrade) && - ((lck->lck_rw_shared_count == 0) || lck->lck_rw_priv_excl)) { + deadline = lck_rw_deadline_for_spin(lck); - i = lock_wait_time[lck->lck_rw_can_sleep ? 1 : 0]; + KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_SHARED_SPIN_CODE) | DBG_FUNC_START, + (int)lck, lck->lck_rw_want_write, lck->lck_rw_want_upgrade, 0, 0); - KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_SHARED_CODE) | DBG_FUNC_START, - (int)lck, lck->lck_rw_want_write, lck->lck_rw_want_upgrade, i, 0); -#if CONFIG_DTRACE - if ((lockstat_probemap[LS_LCK_RW_LOCK_SHARED_SPIN] || lockstat_probemap[LS_LCK_RW_LOCK_SHARED_BLOCK]) && wait_interval == 0) { - wait_interval = mach_absolute_time(); - } else { - wait_interval = -1; - } -#endif + while (((gotlock = lck_rw_grab_shared(lck)) == 0) && mach_absolute_time() < deadline) + lck_rw_lock_pause(istate); + + KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_SHARED_SPIN_CODE) | DBG_FUNC_END, + (int)lck, lck->lck_rw_want_write, lck->lck_rw_want_upgrade, gotlock, 0); + + if (gotlock) + break; + /* + * if we get here, the deadline has expired w/o us + * being able to grab the lock for read + * check to see if we're allowed to do a thread_block + */ + if (lck->lck_rw_can_sleep) { - if (i != 0) { - lck_interlock_unlock(lck, istate); -#if MACH_LDEBUG - if (!--decrementer) - Debugger("timeout - wait no writers"); -#endif /* MACH_LDEBUG */ - while (--i != 0 && - (lck->lck_rw_want_write || lck->lck_rw_want_upgrade) && - ((lck->lck_rw_shared_count == 0) || lck->lck_rw_priv_excl)) - lck_rw_lock_pause(istate); istate = lck_interlock_lock(lck); - } - if (lck->lck_rw_can_sleep && - (lck->lck_rw_want_write || lck->lck_rw_want_upgrade) && - ((lck->lck_rw_shared_count == 0) || lck->lck_rw_priv_excl)) { - lck->lck_r_waiting = TRUE; - res = assert_wait(RW_LOCK_READER_EVENT(lck), THREAD_UNINT); - if (res == THREAD_WAITING) { + if ((lck->lck_rw_want_write || lck->lck_rw_want_upgrade) && + ((lck->lck_rw_shared_count == 0) || lck->lck_rw_priv_excl)) { + + KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_SHARED_WAIT_CODE) | DBG_FUNC_START, + (int)lck, lck->lck_rw_want_write, lck->lck_rw_want_upgrade, 0, 0); + + lck->lck_r_waiting = TRUE; + + res = assert_wait(RW_LOCK_READER_EVENT(lck), THREAD_UNINT); lck_interlock_unlock(lck, istate); - res = thread_block(THREAD_CONTINUE_NULL); -#if CONFIG_DTRACE - slept = 1; -#endif - istate = lck_interlock_lock(lck); + + if (res == THREAD_WAITING) { + res = thread_block(THREAD_CONTINUE_NULL); + slept++; + } + KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_SHARED_WAIT_CODE) | DBG_FUNC_END, + (int)lck, res, slept, 0, 0); + } else { + lck->lck_rw_shared_count++; + lck_interlock_unlock(lck, istate); + break; } } - KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_SHARED_CODE) | DBG_FUNC_END, - (int)lck, lck->lck_rw_want_write, lck->lck_rw_want_upgrade, res, 0); } - lck->lck_rw_shared_count++; - - lck_interlock_unlock(lck, istate); #if CONFIG_DTRACE - if (wait_interval != 0 && wait_interval != (unsigned) -1) { + if (dtrace_ls_enabled == TRUE) { if (slept == 0) { LOCKSTAT_RECORD2(LS_LCK_RW_LOCK_SHARED_SPIN, lck, mach_absolute_time() - wait_interval, 0); } else { @@ -1258,114 +1365,137 @@ lck_rw_lock_shared_gen( /* - * Routine: lck_rw_lock_shared_to_exclusive + * Routine: lck_rw_lock_shared_to_exclusive_failure * Function: - * Improves a read-only lock to one with - * write permission. If another reader has - * already requested an upgrade to a write lock, - * no lock is held upon return. - * - * Returns FALSE if the upgrade *failed*. + * assembly fast path code has already dropped our read + * count and determined that someone else owns 'lck_rw_want_upgrade' + * if 'lck_rw_shared_count' == 0, its also already dropped 'lck_w_waiting' + * all we need to do here is determine if a wakeup is needed */ - boolean_t -lck_rw_lock_shared_to_exclusive( - lck_rw_t *lck) +lck_rw_lock_shared_to_exclusive_failure( + lck_rw_t *lck, + int prior_lock_state) { - int i; - boolean_t do_wakeup = FALSE; - wait_result_t res; -#if MACH_LDEBUG - int decrementer; -#endif /* MACH_LDEBUG */ - boolean_t istate; -#if CONFIG_DTRACE - uint64_t wait_interval = 0; - int slept = 0; - int readers_at_sleep = 0; -#endif - - istate = lck_interlock_lock(lck); + lck_rw_t *fake_lck; - lck->lck_rw_shared_count--; - - if (lck->lck_rw_want_upgrade) { - KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_SH_TO_EX_CODE) | DBG_FUNC_START, - (int)lck, lck->lck_rw_shared_count, lck->lck_rw_want_upgrade, 0, 0); + /* + * prior_lock state is a snapshot of the 1st word of the + * lock in question... we'll fake up a pointer to it + * and carefully not access anything beyond whats defined + * in the first word of a lck_rw_t + */ + fake_lck = (lck_rw_t *)&prior_lock_state; + if (fake_lck->lck_w_waiting && fake_lck->lck_rw_shared_count == 1) { /* * Someone else has requested upgrade. - * Since we've released a read lock, wake - * him up. + * Since we've released the read lock, wake + * him up if he's blocked waiting */ - if (lck->lck_w_waiting && (lck->lck_rw_shared_count == 0)) { - lck->lck_w_waiting = FALSE; - do_wakeup = TRUE; - } - - lck_interlock_unlock(lck, istate); + thread_wakeup(RW_LOCK_WRITER_EVENT(lck)); + } + KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_SH_TO_EX_CODE) | DBG_FUNC_NONE, + (int)lck, lck->lck_rw_shared_count, lck->lck_rw_want_upgrade, 0, 0); - if (do_wakeup) - thread_wakeup(RW_LOCK_WRITER_EVENT(lck)); + return (FALSE); +} - KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_SH_TO_EX_CODE) | DBG_FUNC_END, - (int)lck, lck->lck_rw_shared_count, lck->lck_rw_want_upgrade, 0, 0); - return (FALSE); - } +/* + * Routine: lck_rw_lock_shared_to_exclusive_failure + * Function: + * assembly fast path code has already dropped our read + * count and successfully acquired 'lck_rw_want_upgrade' + * we just need to wait for the rest of the readers to drain + * and then we can return as the exclusive holder of this lock + */ +boolean_t +lck_rw_lock_shared_to_exclusive_success( + lck_rw_t *lck) +{ + uint64_t deadline = 0; + int slept = 0; + int still_shared = 0; + wait_result_t res; + boolean_t istate = -1; - lck->lck_rw_want_upgrade = TRUE; +#if CONFIG_DTRACE + uint64_t wait_interval = 0; + int readers_at_sleep = 0; + boolean_t dtrace_ls_initialized = FALSE; + boolean_t dtrace_rwl_shared_to_excl_spin, dtrace_rwl_shared_to_excl_block, dtrace_ls_enabled = FALSE; +#endif -#if MACH_LDEBUG - decrementer = DECREMENTER_TIMEOUT; -#endif /* MACH_LDEBUG */ while (lck->lck_rw_shared_count != 0) { + #if CONFIG_DTRACE - if (lockstat_probemap[LS_LCK_RW_LOCK_SHARED_TO_EXCL_SPIN] && wait_interval == 0) { - wait_interval = mach_absolute_time(); - readers_at_sleep = lck->lck_rw_shared_count; - } else { - wait_interval = -1; + if (dtrace_ls_initialized == FALSE) { + dtrace_ls_initialized = TRUE; + dtrace_rwl_shared_to_excl_spin = (lockstat_probemap[LS_LCK_RW_LOCK_SHARED_TO_EXCL_SPIN] != 0); + dtrace_rwl_shared_to_excl_block = (lockstat_probemap[LS_LCK_RW_LOCK_SHARED_TO_EXCL_BLOCK] != 0); + dtrace_ls_enabled = dtrace_rwl_shared_to_excl_spin || dtrace_rwl_shared_to_excl_block; + if (dtrace_ls_enabled) { + /* + * Either sleeping or spinning is happening, + * start a timing of our delay interval now. + */ + readers_at_sleep = lck->lck_rw_shared_count; + wait_interval = mach_absolute_time(); + } } #endif - i = lock_wait_time[lck->lck_rw_can_sleep ? 1 : 0]; - - KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_SH_TO_EX1_CODE) | DBG_FUNC_START, - (int)lck, lck->lck_rw_shared_count, i, 0, 0); - - if (i != 0) { - lck_interlock_unlock(lck, istate); -#if MACH_LDEBUG - if (!--decrementer) - Debugger("timeout - lck_rw_shared_count"); -#endif /* MACH_LDEBUG */ - while (--i != 0 && lck->lck_rw_shared_count != 0) - lck_rw_lock_pause(istate); + if (istate == -1) + istate = ml_get_interrupts_enabled(); + + deadline = lck_rw_deadline_for_spin(lck); + + KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_SH_TO_EX_SPIN_CODE) | DBG_FUNC_START, + (int)lck, lck->lck_rw_shared_count, 0, 0, 0); + + while ((still_shared = lck->lck_rw_shared_count) && mach_absolute_time() < deadline) + lck_rw_lock_pause(istate); + + KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_SH_TO_EX_SPIN_CODE) | DBG_FUNC_END, + (int)lck, lck->lck_rw_shared_count, 0, 0, 0); + + if ( !still_shared) + break; + /* + * if we get here, the deadline has expired w/o + * the rw_shared_count having drained to 0 + * check to see if we're allowed to do a thread_block + */ + if (lck->lck_rw_can_sleep) { + istate = lck_interlock_lock(lck); - } + + if (lck->lck_rw_shared_count != 0) { + KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_SH_TO_EX_WAIT_CODE) | DBG_FUNC_START, + (int)lck, lck->lck_rw_shared_count, 0, 0, 0); + + lck->lck_w_waiting = TRUE; - if (lck->lck_rw_can_sleep && lck->lck_rw_shared_count != 0) { - lck->lck_w_waiting = TRUE; - res = assert_wait(RW_LOCK_WRITER_EVENT(lck), THREAD_UNINT); - if (res == THREAD_WAITING) { + res = assert_wait(RW_LOCK_WRITER_EVENT(lck), THREAD_UNINT); lck_interlock_unlock(lck, istate); - res = thread_block(THREAD_CONTINUE_NULL); -#if CONFIG_DTRACE - slept = 1; -#endif - istate = lck_interlock_lock(lck); + + if (res == THREAD_WAITING) { + res = thread_block(THREAD_CONTINUE_NULL); + slept++; + } + KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_SH_TO_EX_WAIT_CODE) | DBG_FUNC_END, + (int)lck, res, slept, 0, 0); + } else { + lck_interlock_unlock(lck, istate); + break; } } - KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_SH_TO_EX1_CODE) | DBG_FUNC_END, - (int)lck, lck->lck_rw_shared_count, 0, 0, 0); } - - lck_interlock_unlock(lck, istate); #if CONFIG_DTRACE /* * We infer whether we took the sleep/spin path above by checking readers_at_sleep. */ - if (wait_interval != 0 && wait_interval != (unsigned) -1 && readers_at_sleep) { + if (dtrace_ls_enabled == TRUE) { if (slept == 0) { LOCKSTAT_RECORD2(LS_LCK_RW_LOCK_SHARED_TO_EXCL_SPIN, lck, mach_absolute_time() - wait_interval, 0); } else { @@ -1374,50 +1504,48 @@ lck_rw_lock_shared_to_exclusive( (readers_at_sleep == 0 ? 1 : 0), readers_at_sleep); } } - LOCKSTAT_RECORD(LS_LCK_RW_LOCK_SHARED_TO_EXCL_UPGRADE, lck, 1); #endif return (TRUE); } + /* * Routine: lck_rw_lock_exclusive_to_shared + * Function: + * assembly fast path has already dropped + * our exclusive state and bumped lck_rw_shared_count + * all we need to do here is determine if anyone + * needs to be awakened. */ void -lck_rw_lock_exclusive_to_shared( - lck_rw_t *lck) +lck_rw_lock_exclusive_to_shared_gen( + lck_rw_t *lck, + int prior_lock_state) { - boolean_t wakeup_readers = FALSE; - boolean_t wakeup_writers = FALSE; - boolean_t istate; - - KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_EX_TO_SH_CODE) | DBG_FUNC_START, - (int)lck, lck->lck_rw_want_write, lck->lck_rw_want_upgrade, 0, 0); - - istate = lck_interlock_lock(lck); + lck_rw_t *fake_lck; - lck->lck_rw_shared_count++; - if (lck->lck_rw_want_upgrade) - lck->lck_rw_want_upgrade = FALSE; - else - lck->lck_rw_want_write = FALSE; - - if (lck->lck_w_waiting) { - lck->lck_w_waiting = FALSE; - wakeup_writers = TRUE; - } - if (!(lck->lck_rw_priv_excl && wakeup_writers == TRUE) && - lck->lck_r_waiting) { - lck->lck_r_waiting = FALSE; - wakeup_readers = TRUE; - } + /* + * prior_lock state is a snapshot of the 1st word of the + * lock in question... we'll fake up a pointer to it + * and carefully not access anything beyond whats defined + * in the first word of a lck_rw_t + */ + fake_lck = (lck_rw_t *)&prior_lock_state; - lck_interlock_unlock(lck, istate); + KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_EX_TO_SH_CODE) | DBG_FUNC_START, + (int)lck, fake_lck->lck_rw_want_write, fake_lck->lck_rw_want_upgrade, 0, 0); - if (wakeup_readers) + /* + * don't wake up anyone waiting to take the lock exclusively + * since we hold a read count... when the read count drops to 0, + * the writers will be woken. + * + * wake up any waiting readers if we don't have any writers waiting, + * or the lock is NOT marked as rw_priv_excl (writers have privilege) + */ + if (!(fake_lck->lck_rw_priv_excl && fake_lck->lck_w_waiting) && fake_lck->lck_r_waiting) thread_wakeup(RW_LOCK_READER_EVENT(lck)); - if (wakeup_writers) - thread_wakeup(RW_LOCK_WRITER_EVENT(lck)); KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_EX_TO_SH_CODE) | DBG_FUNC_END, (int)lck, lck->lck_rw_want_write, lck->lck_rw_want_upgrade, lck->lck_rw_shared_count, 0); @@ -1445,74 +1573,6 @@ lck_rw_try_lock( return(FALSE); } -/* - * Routine: lck_rw_try_lock_exclusive - * Function: - * Tries to get a write lock. - * - * Returns FALSE if the lock is not held on return. - */ - -boolean_t -lck_rw_try_lock_exclusive( - lck_rw_t *lck) -{ - boolean_t istate; - - istate = lck_interlock_lock(lck); - - if (lck->lck_rw_want_write || lck->lck_rw_want_upgrade || lck->lck_rw_shared_count) { - /* - * Can't get lock. - */ - lck_interlock_unlock(lck, istate); - return(FALSE); - } - - /* - * Have lock. - */ - - lck->lck_rw_want_write = TRUE; - - lck_interlock_unlock(lck, istate); - -#if CONFIG_DTRACE - LOCKSTAT_RECORD(LS_LCK_RW_TRY_LOCK_EXCL_ACQUIRE, lck, 1); -#endif - return(TRUE); -} - -/* - * Routine: lck_rw_try_lock_shared - * Function: - * Tries to get a read lock. - * - * Returns FALSE if the lock is not held on return. - */ - -boolean_t -lck_rw_try_lock_shared( - lck_rw_t *lck) -{ - boolean_t istate; - - istate = lck_interlock_lock(lck); -/* No reader priority check here... */ - if (lck->lck_rw_want_write || lck->lck_rw_want_upgrade) { - lck_interlock_unlock(lck, istate); - return(FALSE); - } - - lck->lck_rw_shared_count++; - - lck_interlock_unlock(lck, istate); - -#if CONFIG_DTRACE - LOCKSTAT_RECORD(LS_LCK_RW_TRY_LOCK_SHARED_ACQUIRE, lck, 0); -#endif - return(TRUE); -} void lck_rw_assert( @@ -1543,7 +1603,7 @@ lck_rw_assert( break; } - panic("rw lock (%p) not held (mode=%u)\n", lck, type); + panic("rw lock (%p) not held (mode=%u), first word %08x\n", lck, type, *(uint32_t *)lck); } /* @@ -1594,6 +1654,8 @@ lck_mtx_ext_init( if (grp->lck_grp_attr & LCK_GRP_ATTR_STAT) lck->lck_mtx_attr |= LCK_MTX_ATTR_STAT; + + lck->lck_mtx.lck_mtx_ptr = (void *)LCK_MTX_PTR_EXTENDED; } /* @@ -1618,12 +1680,17 @@ lck_mtx_init( lck_mtx_ext_init(lck_ext, grp, lck_attr); lck->lck_mtx_tag = LCK_MTX_TAG_INDIRECT; lck->lck_mtx_ptr = lck_ext; + lck->lck_mtx_ilocked = 1; } } else { - lck->lck_mtx_ilk = 0; - lck->lck_mtx_locked = 0; + lck->lck_mtx_owner = 0; + lck->lck_mtx_ptr = 0; lck->lck_mtx_waiters = 0; lck->lck_mtx_pri = 0; + lck->lck_mtx_ilocked = 0; + lck->lck_mtx_mlocked = 0; + lck->lck_mtx_promoted = 0; + lck->lck_mtx_spin = 0; } lck_grp_reference(grp); lck_grp_lckcnt_incr(grp, LCK_TYPE_MTX); @@ -1650,11 +1717,16 @@ lck_mtx_init_ext( lck_mtx_ext_init(lck_ext, grp, lck_attr); lck->lck_mtx_tag = LCK_MTX_TAG_INDIRECT; lck->lck_mtx_ptr = lck_ext; + lck->lck_mtx_ilocked = 1; } else { - lck->lck_mtx_ilk = 0; - lck->lck_mtx_locked = 0; + lck->lck_mtx_owner = 0; + lck->lck_mtx_ptr = 0; lck->lck_mtx_waiters = 0; lck->lck_mtx_pri = 0; + lck->lck_mtx_ilocked = 0; + lck->lck_mtx_mlocked = 0; + lck->lck_mtx_promoted = 0; + lck->lck_mtx_spin = 0; } lck_grp_reference(grp); lck_grp_lckcnt_incr(grp, LCK_TYPE_MTX); @@ -1673,7 +1745,9 @@ lck_mtx_destroy( if (lck->lck_mtx_tag == LCK_MTX_TAG_DESTROYED) return; lck_is_indirect = (lck->lck_mtx_tag == LCK_MTX_TAG_INDIRECT); - lck->lck_mtx_tag = LCK_MTX_TAG_DESTROYED; + + lck_mtx_lock_mark_destroyed(lck); + if (lck_is_indirect) kfree(lck->lck_mtx_ptr, sizeof(lck_mtx_ext_t)); lck_grp_lckcnt_decr(grp, LCK_TYPE_MTX); @@ -1681,8 +1755,119 @@ lck_mtx_destroy( return; } + +#define LCK_MTX_LCK_WAIT_CODE 0x20 +#define LCK_MTX_LCK_WAKEUP_CODE 0x21 +#define LCK_MTX_LCK_SPIN_CODE 0x22 +#define LCK_MTX_LCK_ACQUIRE_CODE 0x23 +#define LCK_MTX_LCK_DEMOTE_CODE 0x24 + + +/* + * Routine: lck_mtx_unlock_wakeup_x86 + * + * Invoked on unlock when there is contention. + * + */ +void +lck_mtx_unlock_wakeup_x86 ( + lck_mtx_t *mutex, + int owner_was_promoted) +{ + + KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_LCK_WAKEUP_CODE) | DBG_FUNC_START, (int)mutex, owner_was_promoted, mutex->lck_mtx_waiters, 0, 0); + + if (lck_mtx_lock_decr_waiter(mutex)) + thread_wakeup_one((event_t)(((unsigned int*)mutex)+(sizeof(lck_mtx_t)-1)/sizeof(unsigned int))); + + if (owner_was_promoted) { + thread_t thread = current_thread(); + + + KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_LCK_DEMOTE_CODE) | DBG_FUNC_NONE, (uintptr_t)thread_tid(thread), thread->promotions, + thread->sched_mode & TH_MODE_PROMOTED, 0, 0); + + if (thread->promotions > 0) { + spl_t s = splsched(); + + thread_lock(thread); + + if (--thread->promotions == 0 && (thread->sched_mode & TH_MODE_PROMOTED)) { + + thread->sched_mode &= ~TH_MODE_PROMOTED; + + if (thread->sched_mode & TH_MODE_ISDEPRESSED) { + KERNEL_DEBUG_CONSTANT( + MACHDBG_CODE(DBG_MACH_SCHED,MACH_DEMOTE) | DBG_FUNC_NONE, + thread->sched_pri, DEPRESSPRI, 0, mutex, 0); + + set_sched_pri(thread, DEPRESSPRI); + } + else { + if (thread->priority < thread->sched_pri) { + KERNEL_DEBUG_CONSTANT( + MACHDBG_CODE(DBG_MACH_SCHED,MACH_DEMOTE) | DBG_FUNC_NONE, + thread->sched_pri, thread->priority, 0, mutex, 0); + + compute_priority(thread, FALSE); + } + } + } + thread_unlock(thread); + splx(s); + } + } + KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_LCK_WAKEUP_CODE) | DBG_FUNC_END, (int)mutex, 0, mutex->lck_mtx_waiters, 0, 0); +} + + +/* + * Routine: lck_mtx_lock_acquire_x86 + * + * Invoked on acquiring the mutex when there is + * contention. + * mutex is owned... interlock is not held + */ +void +lck_mtx_lock_acquire_x86( + lck_mtx_t *mutex) +{ + thread_t thread = current_thread(); + integer_t priority; + + KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_LCK_ACQUIRE_CODE) | DBG_FUNC_START, (int)mutex, 0, mutex->lck_mtx_waiters, 0, 0); + + priority = lck_mtx_lock_get_pri(mutex); + + if (thread->sched_pri < priority) { + + if (lck_mtx_lock_mark_promoted(mutex)) { + spl_t s = splsched(); + + thread_lock(thread); + + if (thread->sched_pri < priority) { + + KERNEL_DEBUG_CONSTANT( + MACHDBG_CODE(DBG_MACH_SCHED,MACH_PROMOTE) | DBG_FUNC_NONE, + thread->sched_pri, priority, 0, mutex, 0); + + set_sched_pri(thread, priority); + } + thread->promotions++; + thread->sched_mode |= TH_MODE_PROMOTED; + + thread_unlock(thread); + splx(s); + } + } + KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_LCK_ACQUIRE_CODE) | DBG_FUNC_END, (int)mutex, 0, mutex->lck_mtx_waiters, 0, 0); +} + + + /* - * Routine: lck_mtx_lock_spinwait + * Routine: lck_mtx_lock_spinwait_x86 * * Invoked trying to acquire a mutex when there is contention but * the holder is running on another processor. We spin for up to a maximum @@ -1690,42 +1875,50 @@ lck_mtx_destroy( * * Called with the interlock unlocked. */ -void -lck_mtx_lock_spinwait( - lck_mtx_t *lck) +int +lck_mtx_lock_spinwait_x86( + lck_mtx_t *mutex) { - thread_t holder; - volatile lck_mtx_t *mutex; - uint64_t deadline; - - if (lck->lck_mtx_tag != LCK_MTX_TAG_INDIRECT) - mutex = lck; - else - mutex = &lck->lck_mtx_ptr->lck_mtx; + thread_t holder; + uint64_t deadline; + int retval = 1; + int loopcount = 0; KERNEL_DEBUG( - MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_LCK_SPIN) | DBG_FUNC_NONE, - (int)lck, (int)mutex->lck_mtx_locked, 0, 0, 0); + MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_LCK_SPIN_CODE) | DBG_FUNC_START, + (int)mutex, (int)mutex->lck_mtx_owner, mutex->lck_mtx_waiters, 0, 0); deadline = mach_absolute_time() + MutexSpin; + /* * Spin while: * - mutex is locked, and - * - its locked as a spin lock, or + * - its locked as a spin lock, and * - owner is running on another processor, and * - owner (processor) is not idling, and * - we haven't spun for long enough. */ - while ((holder = (thread_t) mutex->lck_mtx_locked) != NULL) { - if ((holder == (thread_t)MUTEX_LOCKED_AS_SPIN) || - ((holder->machine.specFlags & OnProc) != 0 && - (holder->state & TH_IDLE) == 0 && - mach_absolute_time() < deadline)) { - cpu_pause(); - continue; + do { + if (lck_mtx_lock_grab_mutex(mutex)) { + retval = 0; + break; } - break; - } + if ((holder = (thread_t) mutex->lck_mtx_owner) != NULL) { + + if ( !(holder->machine.specFlags & OnProc) || + (holder->state & TH_IDLE)) { + if (loopcount == 0) + retval = 2; + break; + } + } + cpu_pause(); + + loopcount++; + + } while (mach_absolute_time() < deadline); + + #if CONFIG_DTRACE /* * We've already kept a count via deadline of how long we spun. @@ -1737,27 +1930,113 @@ lck_mtx_lock_spinwait( * penalize only lock groups that have debug/stats enabled * with dtrace processing if desired. */ - if (lck->lck_mtx_tag != LCK_MTX_TAG_INDIRECT) { - LOCKSTAT_RECORD(LS_LCK_MTX_LOCK_SPIN, lck, + if (mutex->lck_mtx_ptr != (void *)LCK_MTX_PTR_EXTENDED) { + LOCKSTAT_RECORD(LS_LCK_MTX_LOCK_SPIN, mutex, mach_absolute_time() - (deadline - MutexSpin)); } else { - LOCKSTAT_RECORD(LS_LCK_MTX_EXT_LOCK_SPIN, lck, + LOCKSTAT_RECORD(LS_LCK_MTX_EXT_LOCK_SPIN, mutex, mach_absolute_time() - (deadline - MutexSpin)); } /* The lockstat acquire event is recorded by the assembly code beneath us. */ #endif + + KERNEL_DEBUG( + MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_LCK_SPIN_CODE) | DBG_FUNC_END, + (int)mutex, (int)mutex->lck_mtx_owner, mutex->lck_mtx_waiters, retval, 0); + + return retval; } + + /* - * Called from assembly code when a destroyed mutex is detected - * during a lock/unlock/try/convert + * Routine: lck_mtx_lock_wait_x86 + * + * Invoked in order to wait on contention. + * + * Called with the interlock locked and + * returns it unlocked. */ - void -lck_mtx_interlock_panic( - lck_mtx_t *lck) +lck_mtx_lock_wait_x86 ( + lck_mtx_t *mutex) { - panic("trying to interlock destroyed mutex %p", lck); + thread_t self = current_thread(); + thread_t holder; + integer_t priority; + integer_t old_lck_mtx_pri; + spl_t s; +#if CONFIG_DTRACE + uint64_t sleep_start = 0; + + if (lockstat_probemap[LS_LCK_MTX_LOCK_BLOCK] || lockstat_probemap[LS_LCK_MTX_EXT_LOCK_BLOCK]) { + sleep_start = mach_absolute_time(); + } +#endif + KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_LCK_WAIT_CODE) | DBG_FUNC_START, (int)mutex, (int)mutex->lck_mtx_owner, mutex->lck_mtx_waiters, 0, 0); + + priority = self->sched_pri; + + if (priority < self->priority) + priority = self->priority; + if (priority < BASEPRI_DEFAULT) + priority = BASEPRI_DEFAULT; + + if (mutex->lck_mtx_waiters == 0) + old_lck_mtx_pri = 0; + else + old_lck_mtx_pri = mutex->lck_mtx_pri; + + if (old_lck_mtx_pri < priority) + mutex->lck_mtx_pri = priority; + + if ( (holder = (thread_t)mutex->lck_mtx_owner) ) { + + s = splsched(); + thread_lock(holder); + + if (holder->sched_pri < priority) { + KERNEL_DEBUG_CONSTANT( + MACHDBG_CODE(DBG_MACH_SCHED, MACH_PROMOTE) | DBG_FUNC_NONE, + holder->sched_pri, priority, holder, mutex, 0); + + set_sched_pri(holder, priority); + + if (mutex->lck_mtx_promoted == 0) { + holder->promotions++; + holder->sched_mode |= TH_MODE_PROMOTED; + + mutex->lck_mtx_promoted = 1; + } + } + thread_unlock(holder); + splx(s); + } + mutex->lck_mtx_waiters++; + + assert_wait((event_t)(((unsigned int*)mutex)+((sizeof(lck_mtx_t)-1)/sizeof(unsigned int))), THREAD_UNINT); + + lck_mtx_ilk_unlock(mutex); + + thread_block(THREAD_CONTINUE_NULL); + + KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_LCK_WAIT_CODE) | DBG_FUNC_END, (int)mutex, (int)mutex->lck_mtx_owner, mutex->lck_mtx_waiters, 0, 0); + +#if CONFIG_DTRACE + /* + * Record the Dtrace lockstat probe for blocking, block time + * measured from when we were entered. + */ + if (sleep_start) { + if (mutex->lck_mtx_ptr != (void *)LCK_MTX_PTR_EXTENDED) { + LOCKSTAT_RECORD(LS_LCK_MTX_LOCK_BLOCK, mutex, + mach_absolute_time() - sleep_start); + } else { + LOCKSTAT_RECORD(LS_LCK_MTX_EXT_LOCK_BLOCK, mutex, + mach_absolute_time() - sleep_start); + } + } +#endif } @@ -1779,56 +2058,12 @@ db_show_one_lock( TRUE, (db_expr_t)0, (char *)0); } -#endif /* MACH_KDB */ - -/* - * The C portion of the mutex package. These routines are only invoked - * if the optimized assembler routines can't do the work. - */ - -/* - * Routine: lock_alloc - * Function: - * Allocate a mutex for external users who cannot - * hard-code the structure definition into their - * objects. - * For now just use kalloc, but a zone is probably - * warranted. - */ -mutex_t * -mutex_alloc( - unsigned short tag) -{ - mutex_t *m; - - if ((m = (mutex_t *)kalloc(sizeof(mutex_t))) != 0) - mutex_init(m, tag); - return(m); -} - -/* - * Routine: mutex_free - * Function: - * Free a mutex allocated for external users. - * For now just use kfree, but a zone is probably - * warranted. - */ -void -mutex_free( - mutex_t *m) -{ - kfree(m, sizeof(mutex_t)); -} - - -#if MACH_KDB /* * Routines to print out simple_locks and mutexes in a nicely-formatted * fashion. */ const char *simple_lock_labels = "ENTRY ILK THREAD DURATION CALLER"; -const char *mutex_labels = "ENTRY LOCKED WAITERS THREAD CALLER"; void db_show_one_simple_lock ( @@ -1865,37 +2100,4 @@ db_print_simple_lock ( db_printf ("\n"); } -void -db_show_one_mutex ( - db_expr_t addr, - boolean_t have_addr, - __unused db_expr_t count, - __unused char * modif) -{ - mutex_t * maddr = (mutex_t *)((vm_offset_t) addr); - - if (maddr == (mutex_t *)0 || !have_addr) - db_error ("No mutex\n"); -#if MACH_LDEBUG - else if (maddr->type != MUTEX_TAG) - db_error ("Not a mutex\n"); -#endif /* MACH_LDEBUG */ - - db_printf ("%s\n", mutex_labels); - db_print_mutex (maddr); -} - -void -db_print_mutex ( - mutex_t * addr) -{ - db_printf ("%08x %6d %7d", - addr, *addr, addr->lck_mtx.lck_mtx_waiters); -#if MACH_LDEBUG - db_printf (" %08x ", addr->thread); - db_printsym (addr->pc, DB_STGY_ANY); -#endif /* MACH_LDEBUG */ - db_printf ("\n"); -} - #endif /* MACH_KDB */ diff --git a/osfmk/i386/locore.s b/osfmk/i386/locore.s index 5b57ee4cc..b58b7ece7 100644 --- a/osfmk/i386/locore.s +++ b/osfmk/i386/locore.s @@ -72,12 +72,17 @@ #include #include #include +#include #define _ARCH_I386_ASM_HELP_H_ /* Prevent inclusion of user header */ #include #include + +#define CLI cli +#define STI sti + /* * PTmap is recursive pagemap at top of virtual address space. * Within PTmap, the page directory can be found (third indirection). @@ -190,7 +195,16 @@ LEXT(recover_table) ;\ .align 2 ;\ .globl EXT(recover_table_end) ;\ LEXT(recover_table_end) ;\ - .text + .long 0 /* workaround see comment below */ ;\ + .text ; + +/* TODO FIXME + * the .long 0 is to work around a linker bug (insert radar# here) + * basically recover_table_end has zero size and bumps up right against saved_esp in acpi_wakeup.s + * recover_table_end is in __RECOVER,__vectors and saved_esp is in __SLEEP,__data, but they're right next to each + * other and so the linker combines them and incorrectly relocates everything referencing recover_table_end to point + * into the SLEEP section + */ /* * Allocate recovery and table. @@ -245,12 +259,12 @@ Entry(timer_grab) /* * Add 64-bit delta in register dreg : areg to timer pointed to by register treg. */ -#define TIMER_UPDATE(treg,dreg,areg) \ - addl TIMER_LOW(treg),areg /* add low bits */ ; \ - adcl dreg,TIMER_HIGH(treg) /* add carry high bits */ ; \ - movl areg,TIMER_LOW(treg) /* store updated low bit */ ; \ - movl TIMER_HIGH(treg),dreg /* copy high bits */ ; \ - movl dreg,TIMER_HIGHCHK(treg) /* to high check */ +#define TIMER_UPDATE(treg,dreg,areg,offset) \ + addl (TIMER_LOW+(offset))(treg),areg /* add low bits */ ;\ + adcl dreg,(TIMER_HIGH+(offset))(treg) /* add carry high bits */ ;\ + movl areg,(TIMER_LOW+(offset))(treg) /* store updated low bit */ ;\ + movl (TIMER_HIGH+(offset))(treg),dreg /* copy high bits */ ;\ + movl dreg,(TIMER_HIGHCHK+(offset))(treg) /* to high check */ /* * Add time delta to old timer and start new. @@ -259,22 +273,21 @@ Entry(timer_grab) NANOTIME /* edx:eax nanosecs */ ; \ movl %eax,%esi /* save timestamp */ ; \ movl %edx,%edi /* save timestamp */ ; \ + movl %gs:CPU_ACTIVE_THREAD,%ecx /* get current thread */ ; \ + subl (old##_TIMER)+TIMER_TSTAMP(%ecx),%eax /* compute elapsed time */ ; \ + sbbl (old##_TIMER)+TIMER_TSTAMP+4(%ecx),%edx /* compute elapsed time */ ; \ + TIMER_UPDATE(%ecx,%edx,%eax,old##_TIMER) /* update timer */ ; \ + movl %esi,(new##_TIMER)+TIMER_TSTAMP(%ecx) /* set timestamp */ ; \ + movl %edi,(new##_TIMER)+TIMER_TSTAMP+4(%ecx) /* set timestamp */ ; \ + leal (new##_TIMER)(%ecx), %ecx /* compute new timer pointer */ ; \ movl %gs:CPU_PROCESSOR,%ebx /* get current processor */ ; \ - movl THREAD_TIMER(%ebx),%ecx /* get current timer */ ; \ - subl TIMER_TSTAMP(%ecx),%eax /* compute elapsed time */ ; \ - sbbl TIMER_TSTAMP+4(%ecx),%edx /* compute elapsed time */ ; \ - TIMER_UPDATE(%ecx,%edx,%eax) /* update timer */ ; \ - addl $(new##_TIMER-old##_TIMER),%ecx /* point to new timer */ ; \ - movl %esi,TIMER_TSTAMP(%ecx) /* set timestamp */ ; \ - movl %edi,TIMER_TSTAMP+4(%ecx) /* set timestamp */ ; \ movl %ecx,THREAD_TIMER(%ebx) /* set current timer */ ; \ movl %esi,%eax /* restore timestamp */ ; \ movl %edi,%edx /* restore timestamp */ ; \ - movl CURRENT_STATE(%ebx),%ecx /* current state */ ; \ - subl TIMER_TSTAMP(%ecx),%eax /* compute elapsed time */ ; \ - sbbl TIMER_TSTAMP+4(%ecx),%edx /* compute elapsed time */ ; \ - TIMER_UPDATE(%ecx,%edx,%eax) /* update timer */ ; \ - addl $(new##_STATE-old##_STATE),%ecx /* point to new state */ ; \ + subl (old##_STATE)+TIMER_TSTAMP(%ebx),%eax /* compute elapsed time */ ; \ + sbbl (old##_STATE)+TIMER_TSTAMP+4(%ebx),%edx /* compute elapsed time */ ; \ + TIMER_UPDATE(%ebx,%edx,%eax,old##_STATE) /* update timer */ ; \ + leal (new##_STATE)(%ebx),%ecx /* compute new state pointer */ ; \ movl %ecx,CURRENT_STATE(%ebx) /* set current state */ ; \ movl %esi,TIMER_TSTAMP(%ecx) /* set timestamp */ ; \ movl %edi,TIMER_TSTAMP+4(%ecx) /* set timestamp */ @@ -306,7 +319,7 @@ Entry(timer_grab) movl THREAD_TIMER(%ebx),%ecx /* get current timer */ ; \ subl TIMER_TSTAMP(%ecx),%eax /* compute elapsed time */ ; \ sbbl TIMER_TSTAMP+4(%ecx),%edx /* compute elapsed time */ ; \ - TIMER_UPDATE(%ecx,%edx,%eax) /* update timer */ ; \ + TIMER_UPDATE(%ecx,%edx,%eax,0) /* update timer */ ; \ movl KERNEL_TIMER(%ebx),%ecx /* point to kernel timer */ ; \ movl %esi,TIMER_TSTAMP(%ecx) /* set timestamp */ ; \ movl %edi,TIMER_TSTAMP+4(%ecx) /* set timestamp */ ; \ @@ -316,7 +329,7 @@ Entry(timer_grab) pushl %ecx /* save state */ ; \ subl TIMER_TSTAMP(%ecx),%eax /* compute elapsed time */ ; \ sbbl TIMER_TSTAMP+4(%ecx),%edx /* compute elapsed time */ ; \ - TIMER_UPDATE(%ecx,%edx,%eax) /* update timer */ ; \ + TIMER_UPDATE(%ecx,%edx,%eax,0) /* update timer */ ; \ leal IDLE_STATE(%ebx),%eax /* get idle state */ ; \ cmpl %eax,%ecx /* compare current state */ ; \ je 0f /* skip if equal */ ; \ @@ -340,7 +353,7 @@ Entry(timer_grab) movl KERNEL_TIMER(%ebx),%ecx /* point to kernel timer */ ; \ subl TIMER_TSTAMP(%ecx),%eax /* compute elapsed time */ ; \ sbbl TIMER_TSTAMP+4(%ecx),%edx /* compute elapsed time */ ; \ - TIMER_UPDATE(%ecx,%edx,%eax) /* update timer */ ; \ + TIMER_UPDATE(%ecx,%edx,%eax,0) /* update timer */ ; \ movl THREAD_TIMER(%ebx),%ecx /* interrupted timer */ ; \ movl %esi,TIMER_TSTAMP(%ecx) /* set timestamp */ ; \ movl %edi,TIMER_TSTAMP+4(%ecx) /* set timestamp */ ; \ @@ -349,7 +362,7 @@ Entry(timer_grab) movl CURRENT_STATE(%ebx),%ecx /* get current state */ ; \ subl TIMER_TSTAMP(%ecx),%eax /* compute elapsed time */ ; \ sbbl TIMER_TSTAMP+4(%ecx),%edx /* compute elapsed time */ ; \ - TIMER_UPDATE(%ecx,%edx,%eax) /* update timer */ ; \ + TIMER_UPDATE(%ecx,%edx,%eax,0) /* update timer */ ; \ popl %ecx /* restore state */ ; \ movl %ecx,CURRENT_STATE(%ebx) /* set current state */ ; \ movl %esi,TIMER_TSTAMP(%ecx) /* set timestamp */ ; \ @@ -452,8 +465,8 @@ Entry(db_task_start) movl %esp,%edx subl $(ISS32_SIZE),%edx movl %edx,%esp /* allocate x86_saved_state on stack */ - movl %eax,R_ERR(%esp) - movl %ebx,R_TRAPNO(%esp) + movl %eax,R32_ERR(%esp) + movl %ebx,R32_TRAPNO(%esp) pushl %edx CPU_NUMBER(%edx) movl CX(EXT(master_dbtss),%edx),%edx @@ -472,15 +485,23 @@ Entry(db_task_start) /* * Called as a function, makes the current thread * return from the kernel as if from an exception. + * We will consult with DTrace if this is a + * newly created thread and we need to fire a probe. */ .globl EXT(thread_exception_return) .globl EXT(thread_bootstrap_return) -LEXT(thread_exception_return) LEXT(thread_bootstrap_return) - cli +#if CONFIG_DTRACE + call EXT(dtrace_thread_bootstrap) +#endif + +LEXT(thread_exception_return) + CLI movl %gs:CPU_KERNEL_STACK,%ecx + movl (%ecx),%esp /* switch back to PCB stack */ + xorl %ecx,%ecx /* don't check if we're in the PFZ */ jmp EXT(return_from_trap) Entry(call_continuation) @@ -509,14 +530,14 @@ Entry(call_continuation) * cr3 -> kernel directory * esp -> low based stack * gs -> CPU_DATA_GS - * cs -> KERNEL_CS + * cs -> KERNEL32_CS * ss/ds/es -> KERNEL_DS * * interrupts disabled * direction flag cleared */ Entry(lo_alltraps) - movl R_CS(%esp),%eax /* assume 32-bit state */ + movl R32_CS(%esp),%eax /* assume 32-bit state */ cmpl $(SS_64),SS_FLAVOR(%esp)/* 64-bit? */ jne 1f movl R64_CS(%esp),%eax /* 64-bit user mode */ @@ -548,30 +569,63 @@ Entry(lo_alltraps) CCALL1(user_trap, %ebx) /* call user trap routine */ cli /* hold off intrs - critical section */ popl %esp /* switch back to PCB stack */ - + xorl %ecx,%ecx /* don't check if we're in the PFZ */ + /* * Return from trap or system call, checking for ASTs. * On lowbase PCB stack with intrs disabled */ LEXT(return_from_trap) - movl %gs:CPU_PENDING_AST,%eax - testl %eax,%eax + movl %gs:CPU_PENDING_AST, %eax + testl %eax, %eax je EXT(return_to_user) /* branch if no AST */ - movl %gs:CPU_KERNEL_STACK,%ebx - xchgl %ebx,%esp /* switch to kernel stack */ - sti /* interrupts always enabled on return to user mode */ +LEXT(return_from_trap_with_ast) + movl %gs:CPU_KERNEL_STACK, %ebx + xchgl %ebx, %esp /* switch to kernel stack */ + testl %ecx, %ecx /* see if we need to check for an EIP in the PFZ */ + je 2f /* no, go handle the AST */ + cmpl $(SS_64), SS_FLAVOR(%ebx) /* are we a 64-bit task? */ + je 1f + /* no... 32-bit user mode */ + movl R32_EIP(%ebx), %eax + pushl %ebx /* save PCB stack */ + xorl %ebp, %ebp /* clear frame pointer */ + CCALL1(commpage_is_in_pfz32, %eax) + popl %ebx /* retrieve pointer to PCB stack */ + testl %eax, %eax + je 2f /* not in the PFZ... go service AST */ + movl %eax, R32_EBX(%ebx) /* let the PFZ know we've pended an AST */ + xchgl %ebx, %esp /* switch back to PCB stack */ + jmp EXT(return_to_user) +1: /* 64-bit user mode */ + movl R64_RIP(%ebx), %ecx + movl R64_RIP+4(%ebx), %eax pushl %ebx /* save PCB stack */ - xorl %ebp,%ebp /* Clear framepointer */ + xorl %ebp, %ebp /* clear frame pointer */ + CCALL2(commpage_is_in_pfz64, %ecx, %eax) + popl %ebx /* retrieve pointer to PCB stack */ + testl %eax, %eax + je 2f /* not in the PFZ... go service AST */ + movl %eax, R64_RBX(%ebx) /* let the PFZ know we've pended an AST */ + xchgl %ebx, %esp /* switch back to PCB stack */ + jmp EXT(return_to_user) +2: + STI /* interrupts always enabled on return to user mode */ + pushl %ebx /* save PCB stack */ + xorl %ebp, %ebp /* Clear framepointer */ CCALL1(i386_astintr, $0) /* take the AST */ - cli + CLI + popl %esp /* switch back to PCB stack (w/exc link) */ + + xorl %ecx, %ecx /* don't check if we're in the PFZ */ jmp EXT(return_from_trap) /* and check again (rare) */ LEXT(return_to_user) TIME_TRAP_UEXIT - + LEXT(ret_to_user) cmpl $0, %gs:CPU_IS64BIT je EXT(lo_ret_to_user) @@ -586,7 +640,7 @@ LEXT(ret_to_user) */ trap_from_kernel: movl %esp, %eax /* saved state addr */ - pushl R_EIP(%esp) /* Simulate a CALL from fault point */ + pushl R32_EIP(%esp) /* Simulate a CALL from fault point */ pushl %ebp /* Extend framepointer chain */ movl %esp, %ebp CCALL1(kernel_trap, %eax) /* Call kernel trap handler */ @@ -597,16 +651,16 @@ trap_from_kernel: movl %gs:CPU_PENDING_AST,%eax /* get pending asts */ testl $ AST_URGENT,%eax /* any urgent preemption? */ je ret_to_kernel /* no, nothing to do */ - cmpl $ T_PREEMPT,R_TRAPNO(%esp) + cmpl $ T_PREEMPT,R32_TRAPNO(%esp) je ret_to_kernel /* T_PREEMPT handled in kernel_trap() */ - testl $ EFL_IF,R_EFLAGS(%esp) /* interrupts disabled? */ + testl $ EFL_IF,R32_EFLAGS(%esp) /* interrupts disabled? */ je ret_to_kernel cmpl $0,%gs:CPU_PREEMPTION_LEVEL /* preemption disabled? */ jne ret_to_kernel movl %gs:CPU_KERNEL_STACK,%eax movl %esp,%ecx xorl %eax,%ecx - andl $(-KERNEL_STACK_SIZE),%ecx + and EXT(kernel_stack_mask),%ecx testl %ecx,%ecx /* are we on the kernel stack? */ jne ret_to_kernel /* no, skip it */ @@ -627,7 +681,7 @@ ret_to_kernel: * cr3 -> kernel directory * esp -> low based stack * gs -> CPU_DATA_GS - * cs -> KERNEL_CS + * cs -> KERNEL32_CS * ss/ds/es -> KERNEL_DS * * interrupts disabled @@ -675,7 +729,7 @@ Entry(lo_allintrs) incl %gs:CPU_INTERRUPT_LEVEL movl %gs:CPU_INT_STATE, %eax - CCALL1(PE_incoming_interrupt, %eax) /* call generic interrupt routine */ + CCALL1(interrupt, %eax) /* call generic interrupt routine */ cli /* just in case we returned with intrs enabled */ xorl %eax,%eax @@ -704,7 +758,7 @@ Entry(lo_allintrs) popl %esp /* switch back to old stack */ /* Load interrupted code segment into %eax */ - movl R_CS(%esp),%eax /* assume 32-bit state */ + movl R32_CS(%esp),%eax /* assume 32-bit state */ cmpl $(SS_64),SS_FLAVOR(%esp)/* 64-bit? */ jne 3f movl R64_CS(%esp),%eax /* 64-bit user mode */ @@ -726,7 +780,7 @@ Entry(lo_allintrs) movl %gs:CPU_KERNEL_STACK,%eax movl %esp,%ecx xorl %eax,%ecx - andl $(-KERNEL_STACK_SIZE),%ecx + and EXT(kernel_stack_mask),%ecx testl %ecx,%ecx /* are we on the kernel stack? */ jne ret_to_kernel /* no, skip it */ @@ -748,7 +802,7 @@ int_from_intstack: incl %gs:CPU_INTERRUPT_LEVEL movl %esp, %edx /* x86_saved_state */ - CCALL1(PE_incoming_interrupt, %edx) + CCALL1(interrupt, %edx) decl %gs:CPU_INTERRUPT_LEVEL decl %gs:CPU_PREEMPTION_LEVEL @@ -765,7 +819,8 @@ ast_from_interrupt_user: TIME_TRAP_UENTRY - jmp EXT(return_from_trap) /* return */ + movl $1, %ecx /* check if we're in the PFZ */ + jmp EXT(return_from_trap_with_ast) /* return */ /******************************************************************************************************* @@ -777,7 +832,7 @@ ast_from_interrupt_user: * cr3 -> kernel directory * esp -> low based stack * gs -> CPU_DATA_GS - * cs -> KERNEL_CS + * cs -> KERNEL32_CS * ss/ds/es -> KERNEL_DS * * interrupts disabled @@ -789,7 +844,7 @@ Entry(lo_sysenter) * We can be here either for a mach syscall or a unix syscall, * as indicated by the sign of the code: */ - movl R_EAX(%esp),%eax + movl R32_EAX(%esp),%eax testl %eax,%eax js EXT(lo_mach_scall) /* < 0 => mach */ /* > 0 => unix */ @@ -928,7 +983,7 @@ Entry(lo_diag_scall) * cr3 -> kernel directory * esp -> low based stack * gs -> CPU_DATA_GS - * cs -> KERNEL_CS + * cs -> KERNEL32_CS * ss/ds/es -> KERNEL_DS * * interrupts disabled @@ -1280,103 +1335,6 @@ copyout_fail: #endif /* MACH_ASSERT */ - -#if MACH_KDB || MACH_ASSERT - -/* - * Following routines are also defined as macros in i386/pio.h - * Compile then when MACH_KDB is configured so that they - * can be invoked from the debugger. - */ - -/* - * void outb(unsigned char *io_port, - * unsigned char byte) - * - * Output a byte to an IO port. - */ -ENTRY(outb) - PUSH_FRAME - ILL_ON_SLAVE - movl ARG0,%edx /* IO port address */ - movl ARG1,%eax /* data to output */ - outb %al,%dx /* send it out */ - POP_FRAME - ret - -/* - * unsigned char inb(unsigned char *io_port) - * - * Input a byte from an IO port. - */ -ENTRY(inb) - PUSH_FRAME - ILL_ON_SLAVE - movl ARG0,%edx /* IO port address */ - xor %eax,%eax /* clear high bits of register */ - inb %dx,%al /* get the byte */ - POP_FRAME - ret - -/* - * void outw(unsigned short *io_port, - * unsigned short word) - * - * Output a word to an IO port. - */ -ENTRY(outw) - PUSH_FRAME - ILL_ON_SLAVE - movl ARG0,%edx /* IO port address */ - movl ARG1,%eax /* data to output */ - outw %ax,%dx /* send it out */ - POP_FRAME - ret - -/* - * unsigned short inw(unsigned short *io_port) - * - * Input a word from an IO port. - */ -ENTRY(inw) - PUSH_FRAME - ILL_ON_SLAVE - movl ARG0,%edx /* IO port address */ - xor %eax,%eax /* clear high bits of register */ - inw %dx,%ax /* get the word */ - POP_FRAME - ret - -/* - * void outl(unsigned int *io_port, - * unsigned int byte) - * - * Output an int to an IO port. - */ -ENTRY(outl) - PUSH_FRAME - ILL_ON_SLAVE - movl ARG0,%edx /* IO port address*/ - movl ARG1,%eax /* data to output */ - outl %eax,%dx /* send it out */ - POP_FRAME - ret - -/* - * unsigned int inl(unsigned int *io_port) - * - * Input an int from an IO port. - */ -ENTRY(inl) - PUSH_FRAME - ILL_ON_SLAVE - movl ARG0,%edx /* IO port address */ - inl %dx,%eax /* get the int */ - POP_FRAME - ret - -#endif /* MACH_KDB || MACH_ASSERT*/ - /* * void loutb(unsigned byte *io_port, * unsigned byte *data, @@ -1721,7 +1679,7 @@ ENTRY(mul_scale) * Double-fault exception handler task. The last gasp... */ Entry(df_task_start) - CCALL1(panic_double_fault, $(T_DOUBLE_FAULT)) + CCALL1(panic_double_fault32, $(T_DOUBLE_FAULT)) hlt @@ -1729,7 +1687,7 @@ Entry(df_task_start) * machine-check handler task. The last gasp... */ Entry(mc_task_start) - CCALL1(panic_machine_check, $(T_MACHINE_CHECK)) + CCALL1(panic_machine_check32, $(T_MACHINE_CHECK)) hlt /* diff --git a/osfmk/i386/loose_ends.c b/osfmk/i386/loose_ends.c index d5ed99e51..6df816c55 100644 --- a/osfmk/i386/loose_ends.c +++ b/osfmk/i386/loose_ends.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2006 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2008 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -69,6 +69,7 @@ #include #include #include +#include #include #include #include @@ -77,6 +78,7 @@ #include #include + #if 0 #undef KERNEL_DEBUG @@ -103,10 +105,13 @@ void machine_callstack(natural_t *buf, vm_size_t callstack_max); #define value_64bit(value) ((value) & 0xFFFFFFFF00000000LL) #define low32(x) ((unsigned int)((x) & 0x00000000FFFFFFFFLL)) + + + void bzero_phys_nc( addr64_t src64, - vm_size_t bytes) + uint32_t bytes) { bzero_phys(src64,bytes); } @@ -114,7 +119,7 @@ bzero_phys_nc( void bzero_phys( addr64_t src64, - vm_size_t bytes) + uint32_t bytes) { mapwindow_t *map; @@ -188,7 +193,7 @@ ovbcopy( /* - * Read data from a physical address. Memory should not be cache inhibited. + * Read data from a physical address. */ @@ -244,8 +249,6 @@ ml_phys_read_long_long(pmap_paddr_t paddr ) return result; } - - unsigned int ml_phys_read( vm_offset_t paddr) { return ml_phys_read_data((pmap_paddr_t)paddr, 4); @@ -299,7 +302,7 @@ unsigned long long ml_phys_read_double_64(addr64_t paddr64) /* - * Write data to a physical address. Memory should not be cache inhibited. + * Write data to a physical address. */ static void @@ -414,7 +417,7 @@ ml_probe_read(vm_offset_t paddr, unsigned int *val) if ((PAGE_SIZE - (paddr & PAGE_MASK)) < 4) return FALSE; - *val = ml_phys_read((pmap_paddr_t)paddr); + *val = ml_phys_read(paddr); return TRUE; } @@ -559,13 +562,13 @@ void dcache_incoherent_io_store64(addr64_t pa, unsigned int count) istate = ml_set_interrupts_enabled(FALSE); - offset = pa & (linesize - 1); + offset = (uint32_t)(pa & (linesize - 1)); addr = pa - offset; map = pmap_get_mapwindow((pt_entry_t)(i386_ptob(atop_64(addr)) | INTEL_PTE_VALID)); count += offset; - offset = addr & ((addr64_t) (page_size - 1)); + offset = (uint32_t)(addr & ((addr64_t) (page_size - 1))); chunk = page_size - offset; do @@ -735,7 +738,6 @@ static int copyio_phys(addr64_t, addr64_t, vm_size_t, int); #define COPYOUTPHYS 4 - void inval_copy_windows(thread_t thread) { int i; @@ -746,7 +748,7 @@ void inval_copy_windows(thread_t thread) thread->machine.nxt_window = 0; thread->machine.copyio_state = WINDOWS_DIRTY; - KERNEL_DEBUG(0xeff70058 | DBG_FUNC_NONE, (int)thread, (int)thread->map, 0, 0, 0); + KERNEL_DEBUG(0xeff70058 | DBG_FUNC_NONE, (uintptr_t)thread_tid(thread), (int)thread->map, 0, 0, 0); } @@ -825,7 +827,7 @@ copyio(int copy_type, user_addr_t user_addr, char *kernel_addr, return (error); } user_base = user_addr & ~((user_addr_t)(NBPDE - 1)); - user_offset = user_addr & (NBPDE - 1); + user_offset = (vm_offset_t)(user_addr & (NBPDE - 1)); KERNEL_DEBUG(debug_type | DBG_FUNC_NONE, (int)(user_base >> 32), (int)user_base, (int)user_offset, 0, 0); @@ -892,7 +894,7 @@ copyio(int copy_type, user_addr_t user_addr, char *kernel_addr, kpdp += window_index; if ((*kpdp & PG_FRAME) != (*updp & PG_FRAME)) { - panic("copyio: user pdp mismatch - kpdp = 0x%x, updp = 0x%x\n", kpdp, updp); + panic("copyio: user pdp mismatch - kpdp = 0x%qx, updp = 0x%qx\n", *kpdp, *updp); } (void) ml_set_interrupts_enabled(istate); } @@ -1079,6 +1081,7 @@ copyio_phys(addr64_t source, addr64_t sink, vm_size_t csize, int which) * flushing the tlb after it reloaded the page table from machine.physwindow_pte */ istate = ml_set_interrupts_enabled(FALSE); + pmap_store_pte((current_cpu_datap()->cpu_physwindow_ptep), pentry); (void) ml_set_interrupts_enabled(istate); @@ -1121,13 +1124,13 @@ copyinstr(const user_addr_t user_addr, char *kernel_addr, vm_size_t nbytes, vm_ int copyoutmsg(const char *kernel_addr, user_addr_t user_addr, vm_size_t nbytes) { - return (copyio(COPYOUT, user_addr, kernel_addr, nbytes, NULL, 0)); + return (copyio(COPYOUT, user_addr, (char *)(uintptr_t)kernel_addr, nbytes, NULL, 0)); } int copyout(const void *kernel_addr, user_addr_t user_addr, vm_size_t nbytes) { - return (copyio(COPYOUT, user_addr, kernel_addr, nbytes, NULL, 0)); + return (copyio(COPYOUT, user_addr, (char *)(uintptr_t)kernel_addr, nbytes, NULL, 0)); } @@ -1208,3 +1211,15 @@ kdp_register_callout(void) { } #endif + +#if !CONFIG_VMX +int host_vmxon(boolean_t exclusive __unused) +{ + return VMX_UNSUPPORTED; +} + +void host_vmxoff(void) +{ + return; +} +#endif diff --git a/osfmk/i386/lowglobals.h b/osfmk/i386/lowglobals.h index a8e5ddd52..5b8d2ba97 100644 --- a/osfmk/i386/lowglobals.h +++ b/osfmk/i386/lowglobals.h @@ -32,6 +32,12 @@ #ifndef _LOW_MEMORY_GLOBALS_H_ #define _LOW_MEMORY_GLOBALS_H_ +#if defined(__x86_64__) +#include +#elif !defined(__i386__) +#error Wrong architecture - this file is meant for i386 +#endif + #include #include #include diff --git a/osfmk/i386/machdep_call.c b/osfmk/i386/machdep_call.c index a21a9a792..9152b8741 100644 --- a/osfmk/i386/machdep_call.c +++ b/osfmk/i386/machdep_call.c @@ -43,8 +43,8 @@ extern kern_return_t kern_invalid(void); machdep_call_t machdep_call_table[] = { - MACHDEP_CALL_ROUTINE(thread_get_cthread_self,0), - MACHDEP_CALL_ROUTINE(thread_set_cthread_self,1), + MACHDEP_CALL_ROUTINE(kern_invalid,0), + MACHDEP_CALL_ROUTINE(kern_invalid,0), MACHDEP_CALL_ROUTINE(kern_invalid,0), MACHDEP_CALL_ROUTINE(thread_fast_set_cthread_self,1), MACHDEP_CALL_ROUTINE(thread_set_user_ldt,3), diff --git a/osfmk/i386/machdep_call.h b/osfmk/i386/machdep_call.h index bfc7c55ec..63cbf08cb 100644 --- a/osfmk/i386/machdep_call.h +++ b/osfmk/i386/machdep_call.h @@ -68,8 +68,6 @@ extern machdep_call_t machdep_call_table64[]; extern int machdep_call_count; -extern kern_return_t thread_get_cthread_self(void); -extern kern_return_t thread_set_cthread_self(uint32_t); extern kern_return_t thread_fast_set_cthread_self(uint32_t); extern kern_return_t thread_fast_set_cthread_self64(uint64_t); extern kern_return_t thread_set_user_ldt(uint32_t,uint32_t,uint32_t); diff --git a/osfmk/i386/machine_check.h b/osfmk/i386/machine_check.h index 7ecf69403..e940fa8c0 100644 --- a/osfmk/i386/machine_check.h +++ b/osfmk/i386/machine_check.h @@ -29,6 +29,10 @@ #ifndef _I386_MACHINE_CHECK_H_ #define _I386_MACHINE_CHECK_H_ +#include + +#include + /* * This header defines the machine check architecture for Pentium4 and Xeon. */ @@ -167,7 +171,6 @@ typedef union { #define MC8_MMM_WRITE 2 #define MC8_MMM_ADDRESS_COMMAND 3 #define MC8_MMM_RESERVED 4 - typedef union { struct { uint64_t reserved1 :BITS(15,0); diff --git a/osfmk/i386/machine_routines.c b/osfmk/i386/machine_routines.c index 019d7f82f..d0307ca7f 100644 --- a/osfmk/i386/machine_routines.c +++ b/osfmk/i386/machine_routines.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2008 Apple Inc. All rights reserved. + * Copyright (c) 2000-2009 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -36,19 +36,18 @@ #include #include #include -#include #include #include #include -#include -#include #include -#include #include #include +#include #include +#include +#include #if MACH_KDB -#include +#include #include #include #include @@ -64,15 +63,15 @@ #define DBG(x...) #endif -extern thread_t Shutdown_context(thread_t thread, void (*doshutdown)(processor_t),processor_t processor); + extern void wakeup(void *); -extern unsigned KernelRelocOffset; static int max_cpus_initialized = 0; unsigned int LockTimeOut; unsigned int LockTimeOutTSC; unsigned int MutexSpin; +uint64_t LastDebuggerEntryAllowance; #define MAX_CPUS_SET 0x1 #define MAX_CPUS_WAIT 0x2 @@ -102,18 +101,15 @@ void ml_get_bouncepool_info(vm_offset_t *phys_addr, vm_size_t *size) } -vm_offset_t -ml_boot_ptovirt( - vm_offset_t paddr) -{ - return (vm_offset_t)((paddr-KernelRelocOffset) | LINEAR_KERNEL_ADDRESS); -} - vm_offset_t ml_static_ptovirt( vm_offset_t paddr) { - return (vm_offset_t)((unsigned) paddr | LINEAR_KERNEL_ADDRESS); +#if defined(__x86_64__) + return (vm_offset_t)(((unsigned long) paddr) | VM_MIN_KERNEL_ADDRESS); +#else + return (vm_offset_t)((paddr) | LINEAR_KERNEL_ADDRESS); +#endif } @@ -126,17 +122,18 @@ ml_static_mfree( vm_offset_t vaddr, vm_size_t size) { - vm_offset_t vaddr_cur; + addr64_t vaddr_cur; ppnum_t ppn; -// if (vaddr < VM_MIN_KERNEL_ADDRESS) return; + assert(vaddr >= VM_MIN_KERNEL_ADDRESS); assert((vaddr & (PAGE_SIZE-1)) == 0); /* must be page aligned */ + for (vaddr_cur = vaddr; - vaddr_cur < round_page_32(vaddr+size); + vaddr_cur < round_page_64(vaddr+size); vaddr_cur += PAGE_SIZE) { - ppn = pmap_find_phys(kernel_pmap, (addr64_t)vaddr_cur); + ppn = pmap_find_phys(kernel_pmap, vaddr_cur); if (ppn != (vm_offset_t)NULL) { kernel_pmap->stats.resident_count++; if (kernel_pmap->stats.resident_count > @@ -144,7 +141,7 @@ ml_static_mfree( kernel_pmap->stats.resident_max = kernel_pmap->stats.resident_count; } - pmap_remove(kernel_pmap, (addr64_t)vaddr_cur, (addr64_t)(vaddr_cur+PAGE_SIZE)); + pmap_remove(kernel_pmap, vaddr_cur, vaddr_cur+PAGE_SIZE); vm_page_create(ppn,(ppn+1)); vm_page_wire_count--; } @@ -156,7 +153,7 @@ ml_static_mfree( vm_offset_t ml_vtophys( vm_offset_t vaddr) { - return kvtophys(vaddr); + return (vm_offset_t)kvtophys(vaddr); } /* @@ -182,11 +179,11 @@ vm_size_t ml_nofault_copy( break; if (!pmap_valid_page(i386_btop(cur_phys_dst)) || !pmap_valid_page(i386_btop(cur_phys_src))) break; - count = PAGE_SIZE - (cur_phys_src & PAGE_MASK); + count = (uint32_t)(PAGE_SIZE - (cur_phys_src & PAGE_MASK)); if (count > (PAGE_SIZE - (cur_phys_dst & PAGE_MASK))) - count = PAGE_SIZE - (cur_phys_dst & PAGE_MASK); + count = (uint32_t)(PAGE_SIZE - (cur_phys_dst & PAGE_MASK)); if (count > size) - count = size; + count = (uint32_t)size; bcopy_phys(cur_phys_src, cur_phys_dst, count); @@ -207,12 +204,14 @@ void ml_init_interrupt(void) (void) ml_set_interrupts_enabled(TRUE); } + + /* Get Interrupts Enabled */ boolean_t ml_get_interrupts_enabled(void) { unsigned long flags; - __asm__ volatile("pushf; popl %0" : "=r" (flags)); + __asm__ volatile("pushf; pop %0" : "=r" (flags)); return (flags & EFL_IF) != 0; } @@ -221,7 +220,7 @@ boolean_t ml_set_interrupts_enabled(boolean_t enable) { unsigned long flags; - __asm__ volatile("pushf; popl %0" : "=r" (flags)); + __asm__ volatile("pushf; pop %0" : "=r" (flags)); if (enable) { ast_t *myast; @@ -296,27 +295,14 @@ void machine_signal_idle( processor_t processor) { - cpu_interrupt(processor->cpu_num); + cpu_interrupt(processor->cpu_id); } -thread_t -machine_processor_shutdown( - thread_t thread, - void (*doshutdown)(processor_t), - processor_t processor) -{ - vmx_suspend(); - fpu_save_context(thread); - return(Shutdown_context(thread, doshutdown, processor)); -} - -kern_return_t -ml_processor_register( - cpu_id_t cpu_id, - uint32_t lapic_id, - processor_t *processor_out, - ipi_handler_t *ipi_handler, - boolean_t boot_cpu) +static kern_return_t +register_cpu( + uint32_t lapic_id, + processor_t *processor_out, + boolean_t boot_cpu ) { int target_cpu; cpu_data_t *this_cpu_datap; @@ -331,7 +317,9 @@ ml_processor_register( lapic_cpu_map(lapic_id, target_cpu); - this_cpu_datap->cpu_id = cpu_id; + /* The cpu_id is not known at registration phase. Just do + * lapic_id for now + */ this_cpu_datap->cpu_phys_number = lapic_id; this_cpu_datap->cpu_console_buf = console_cpu_alloc(boot_cpu); @@ -349,9 +337,11 @@ ml_processor_register( pmCPUStateInit(); +#if NCOPY_WINDOWS > 0 this_cpu_datap->cpu_pmap = pmap_cpu_alloc(boot_cpu); if (this_cpu_datap->cpu_pmap == NULL) goto failed; +#endif this_cpu_datap->cpu_processor = cpu_processor_alloc(boot_cpu); if (this_cpu_datap->cpu_processor == NULL) @@ -364,27 +354,73 @@ ml_processor_register( } *processor_out = this_cpu_datap->cpu_processor; - *ipi_handler = NULL; - - if (target_cpu == machine_info.max_cpus - 1) { - /* - * All processors are now registered but not started (except - * for this "in-limbo" boot processor). We call to the machine - * topology code to finalize and activate the topology. - */ - cpu_topology_start(); - } return KERN_SUCCESS; failed: cpu_processor_free(this_cpu_datap->cpu_processor); +#if NCOPY_WINDOWS > 0 pmap_cpu_free(this_cpu_datap->cpu_pmap); +#endif chudxnu_cpu_free(this_cpu_datap->cpu_chud); console_cpu_free(this_cpu_datap->cpu_console_buf); return KERN_FAILURE; } + +kern_return_t +ml_processor_register( + cpu_id_t cpu_id, + uint32_t lapic_id, + processor_t *processor_out, + boolean_t boot_cpu, + boolean_t start ) +{ + static boolean_t done_topo_sort = FALSE; + static uint32_t num_registered = 0; + + /* Register all CPUs first, and track max */ + if( start == FALSE ) + { + num_registered++; + + DBG( "registering CPU lapic id %d\n", lapic_id ); + + return register_cpu( lapic_id, processor_out, boot_cpu ); + } + + /* Sort by topology before we start anything */ + if( !done_topo_sort ) + { + DBG( "about to start CPUs. %d registered\n", num_registered ); + + cpu_topology_sort( num_registered ); + done_topo_sort = TRUE; + } + + /* Assign the cpu ID */ + uint32_t cpunum = -1; + cpu_data_t *this_cpu_datap = NULL; + + /* find cpu num and pointer */ + cpunum = ml_get_cpuid( lapic_id ); + + if( cpunum == 0xFFFFFFFF ) /* never heard of it? */ + panic( "trying to start invalid/unregistered CPU %d\n", lapic_id ); + + this_cpu_datap = cpu_datap(cpunum); + + /* fix the CPU id */ + this_cpu_datap->cpu_id = cpu_id; + + /* output arg */ + *processor_out = this_cpu_datap->cpu_processor; + + /* OK, try and start this CPU */ + return cpu_topology_start_cpu( cpunum ); +} + + void ml_cpu_get_info(ml_cpu_info_t *cpu_infop) { @@ -398,7 +434,7 @@ ml_cpu_get_info(ml_cpu_info_t *cpu_infop) * Are we supporting MMX/SSE/SSE2/SSE3? * As distinct from whether the cpu has these capabilities. */ - os_supports_sse = get_cr4() & CR4_XMM; + os_supports_sse = !!(get_cr4() & CR4_XMM); if ((cpuid_features() & CPUID_FEATURE_SSE4_2) && os_supports_sse) cpu_infop->vector_unit = 8; else if ((cpuid_features() & CPUID_FEATURE_SSE4_1) && os_supports_sse) @@ -454,7 +490,7 @@ ml_init_max_cpus(unsigned long max_cpus) * that the kernel supports or that the "cpus=" * boot-arg has set. Here we take int minimum. */ - machine_info.max_cpus = MIN(max_cpus, max_ncpus); + machine_info.max_cpus = (integer_t)MIN(max_cpus, max_ncpus); } if (max_cpus_initialized == MAX_CPUS_WAIT) wakeup((event_t)&max_cpus_initialized); @@ -486,10 +522,15 @@ void ml_init_lock_timeout(void) { uint64_t abstime; - uint32_t mtxspin; + uint32_t mtxspin; + uint64_t default_timeout_ns = NSEC_PER_SEC>>2; + uint32_t slto; + + if (PE_parse_boot_argn("slto_us", &slto, sizeof (slto))) + default_timeout_ns = slto * NSEC_PER_USEC; /* LockTimeOut is absolutetime, LockTimeOutTSC is in TSC ticks */ - nanoseconds_to_absolutetime(NSEC_PER_SEC>>2, &abstime); + nanoseconds_to_absolutetime(default_timeout_ns, &abstime); LockTimeOut = (uint32_t) abstime; LockTimeOutTSC = (uint32_t) tmrCvt(abstime, tscFCvtn2t); @@ -501,6 +542,8 @@ ml_init_lock_timeout(void) nanoseconds_to_absolutetime(10*NSEC_PER_USEC, &abstime); } MutexSpin = (unsigned int)abstime; + + nanoseconds_to_absolutetime(2 * NSEC_PER_SEC, &LastDebuggerEntryAllowance); } /* @@ -570,6 +613,7 @@ void ml_cpu_set_ldt(int selector) current_cpu_datap()->cpu_ldt == KERNEL_LDT) return; +#if defined(__i386__) /* * If 64bit this requires a mode switch (and back). */ @@ -577,7 +621,10 @@ void ml_cpu_set_ldt(int selector) ml_64bit_lldt(selector); else lldt(selector); - current_cpu_datap()->cpu_ldt = selector; +#else + lldt(selector); +#endif + current_cpu_datap()->cpu_ldt = selector; } void ml_fp_setvalid(boolean_t value) @@ -590,6 +637,16 @@ uint64_t ml_cpu_int_event_time(void) return current_cpu_datap()->cpu_int_event_time; } +vm_offset_t ml_stack_remaining(void) +{ + uintptr_t local = (uintptr_t) &local; + + if (ml_at_interrupt_context() != 0) { + return (local - (current_cpu_datap()->cpu_int_stack_top - INTSTACK_SIZE)); + } else { + return (local - current_thread()->kernel_stack); + } +} #if MACH_KDB diff --git a/osfmk/i386/machine_routines.h b/osfmk/i386/machine_routines.h index cf0af4761..0112edf0c 100644 --- a/osfmk/i386/machine_routines.h +++ b/osfmk/i386/machine_routines.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2007 Apple Inc. All rights reserved. + * Copyright (c) 2000-2009 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -42,6 +42,8 @@ __BEGIN_DECLS +#ifdef XNU_KERNEL_PRIVATE + /* are we a 64 bit platform ? */ boolean_t ml_is64bit(void); @@ -65,21 +67,77 @@ void ml_cpu_set_ldt(int); /* Initialize Interrupts */ void ml_init_interrupt(void); -/* Get Interrupts Enabled */ -boolean_t ml_get_interrupts_enabled(void); - -/* Set Interrupts Enabled */ -boolean_t ml_set_interrupts_enabled(boolean_t enable); - -/* Check if running at interrupt context */ -boolean_t ml_at_interrupt_context(void); /* Generate a fake interrupt */ void ml_cause_interrupt(void); +/* Initialize Interrupts */ +void ml_install_interrupt_handler( + void *nub, + int source, + void *target, + IOInterruptHandler handler, + void *refCon); + void ml_get_timebase(unsigned long long *timestamp); void ml_init_lock_timeout(void); +vm_offset_t +ml_static_ptovirt( + vm_offset_t); + +void ml_static_mfree( + vm_offset_t, + vm_size_t); + +/* boot memory allocation */ +vm_offset_t ml_static_malloc( + vm_size_t size); + +/* virtual to physical on wired pages */ +vm_offset_t ml_vtophys( + vm_offset_t vaddr); + +vm_size_t ml_nofault_copy( + vm_offset_t virtsrc, vm_offset_t virtdst, vm_size_t size); + +/* Machine topology info */ +uint64_t ml_cpu_cache_size(unsigned int level); +uint64_t ml_cpu_cache_sharing(unsigned int level); + +/* Initialize the maximum number of CPUs */ +void ml_init_max_cpus( + unsigned long max_cpus); + +extern void ml_cpu_up(void); +extern void ml_cpu_down(void); + +void bzero_phys_nc( + addr64_t phys_address, + uint32_t length); + +#if defined(PEXPERT_KERNEL_PRIVATE) || defined(MACH_KERNEL_PRIVATE) +/* IO memory map services */ + +/* Map memory map IO space */ +vm_offset_t ml_io_map( + vm_offset_t phys_addr, + vm_size_t size); + +extern uint32_t bounce_pool_base; +extern uint32_t bounce_pool_size; + +void ml_get_bouncepool_info( + vm_offset_t *phys_addr, + vm_size_t *size); + + +#endif /* PEXPERT_KERNEL_PRIVATE || MACH_KERNEL_PRIVATE */ + +#endif /* XNU_KERNEL_PRIVATE */ + +#ifdef KERNEL_PRIVATE + /* Type for the Time Base Enable function */ typedef void (*time_base_enable_t)(cpu_id_t cpu_id, boolean_t enable); @@ -100,31 +158,13 @@ typedef struct ml_processor_info ml_processor_info_t; /* Register a processor */ -kern_return_t ml_processor_register( - cpu_id_t cpu_id, - uint32_t lapic_id, - processor_t *processor, - ipi_handler_t *ipi_handler, - boolean_t boot_cpu); - -/* Initialize Interrupts */ -void ml_install_interrupt_handler( - void *nub, - int source, - void *target, - IOInterruptHandler handler, - void *refCon); - -#ifdef __APPLE_API_UNSTABLE -vm_offset_t -ml_static_ptovirt( - vm_offset_t); - -#ifdef XNU_KERNEL_PRIVATE -vm_offset_t -ml_boot_ptovirt( - vm_offset_t); -#endif +kern_return_t +ml_processor_register( + cpu_id_t cpu_id, + uint32_t lapic_id, + processor_t *processor_out, + boolean_t boot_cpu, + boolean_t start ); /* PCI config cycle probing */ boolean_t ml_probe_read( @@ -190,17 +230,6 @@ void ml_phys_write_double( void ml_phys_write_double_64( addr64_t paddr, unsigned long long data); -void ml_static_mfree( - vm_offset_t, - vm_size_t); - -/* virtual to physical on wired pages */ -vm_offset_t ml_vtophys( - vm_offset_t vaddr); - -vm_size_t ml_nofault_copy( - vm_offset_t virtsrc, vm_offset_t virtdst, vm_size_t size); - /* Struct for ml_cpu_get_info */ struct ml_cpu_info { unsigned long vector_unit; @@ -218,45 +247,6 @@ typedef struct ml_cpu_info ml_cpu_info_t; /* Get processor info */ void ml_cpu_get_info(ml_cpu_info_t *ml_cpu_info); -/* Machine topology info */ -uint64_t ml_cpu_cache_size(unsigned int level); -uint64_t ml_cpu_cache_sharing(unsigned int level); - -#endif /* __APPLE_API_UNSTABLE */ - -#ifdef __APPLE_API_PRIVATE -#if defined(PEXPERT_KERNEL_PRIVATE) || defined(MACH_KERNEL_PRIVATE) -/* IO memory map services */ - -/* Map memory map IO space */ -vm_offset_t ml_io_map( - vm_offset_t phys_addr, - vm_size_t size); - -/* boot memory allocation */ -vm_offset_t ml_static_malloc( - vm_size_t size); - - -extern uint32_t bounce_pool_base; -extern uint32_t bounce_pool_size; - -void ml_get_bouncepool_info( - vm_offset_t *phys_addr, - vm_size_t *size); - - -#endif /* PEXPERT_KERNEL_PRIVATE || MACH_KERNEL_PRIVATE */ - -/* Zero bytes starting at a physical address */ -void bzero_phys( - addr64_t phys_address, - uint32_t length); - -void bzero_phys_nc( - addr64_t phys_address, - uint32_t length); - void ml_thread_policy( thread_t thread, unsigned policy_id, @@ -267,17 +257,10 @@ void ml_thread_policy( #define MACHINE_NETWORK_WORKLOOP 0x00000001 #define MACHINE_NETWORK_NETISR 0x00000002 -/* Initialize the maximum number of CPUs */ -void ml_init_max_cpus( - unsigned long max_cpus); - /* Return the maximum number of CPUs set by ml_init_max_cpus() */ int ml_get_max_cpus( void); -extern void ml_cpu_up(void); -extern void ml_cpu_down(void); - /* * The following are in pmCPU.c not machine_routines.c. */ @@ -293,7 +276,28 @@ extern uint64_t tmrCvt(uint64_t time, uint64_t conversion); extern uint64_t ml_cpu_int_event_time(void); -#endif /* __APPLE_API_PRIVATE */ +#endif /* KERNEL_PRIVATE */ + +/* Get Interrupts Enabled */ +boolean_t ml_get_interrupts_enabled(void); + +/* Set Interrupts Enabled */ +boolean_t ml_set_interrupts_enabled(boolean_t enable); + +/* Check if running at interrupt context */ +boolean_t ml_at_interrupt_context(void); + +/* Zero bytes starting at a physical address */ +void bzero_phys( + addr64_t phys_address, + uint32_t length); + +/* Bytes available on current stack */ +vm_offset_t ml_stack_remaining(void); + +#if CONFIG_COUNTERS +void ml_get_csw_threads(thread_t * /*old*/, thread_t * /*new*/); +#endif /* CONFIG_COUNTERS */ __END_DECLS diff --git a/osfmk/i386/machine_routines_asm.s b/osfmk/i386/machine_routines_asm.s index bb4095af3..249c0ebf7 100644 --- a/osfmk/i386/machine_routines_asm.s +++ b/osfmk/i386/machine_routines_asm.s @@ -135,6 +135,13 @@ LEXT(tmrCvt) ret // Leave... + +/* void _rtc_nanotime_store(uint64_t tsc, + uint64_t nsec, + uint32_t scale, + uint32_t shift, + rtc_nanotime_t *dst) ; +*/ .globl EXT(_rtc_nanotime_store) .align FALIGN diff --git a/osfmk/i386/machine_task.c b/osfmk/i386/machine_task.c new file mode 100644 index 000000000..c05d69bef --- /dev/null +++ b/osfmk/i386/machine_task.c @@ -0,0 +1,257 @@ +/* + * Copyright (c) 2000-2008 Apple Inc. All rights reserved. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. The rights granted to you under the License + * may not be used to create, or enable the creation or redistribution of, + * unlawful or unlicensed copies of an Apple operating system, or to + * circumvent, violate, or enable the circumvention or violation of, any + * terms of an Apple operating system software license agreement. + * + * Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ + */ +/* + * @OSF_COPYRIGHT@ + */ +/* + * Mach Operating System + * Copyright (c) 1991,1990 Carnegie Mellon University + * All Rights Reserved. + * + * Permission to use, copy, modify and distribute this software and its + * documentation is hereby granted, provided that both the copyright + * notice and this permission notice appear in all copies of the + * software, derivative works or modified versions, and any portions + * thereof, and that both notices appear in supporting documentation. + * + * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" + * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR + * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. + * + * Carnegie Mellon requests users of this software to return to + * + * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU + * School of Computer Science + * Carnegie Mellon University + * Pittsburgh PA 15213-3890 + * + * any improvements or extensions that they make and grant Carnegie Mellon + * the rights to redistribute these changes. + */ + +#include +#include +#include + +extern zone_t ids_zone; + +kern_return_t +machine_task_set_state( + task_t task, + int flavor, + thread_state_t state, + mach_msg_type_number_t state_count) +{ + switch (flavor) { + case x86_DEBUG_STATE32: + { + x86_debug_state32_t *tstate = (x86_debug_state32_t*) state; + if ((task_has_64BitAddr(task)) || + (state_count != x86_DEBUG_STATE32_COUNT) || + (!debug_state_is_valid32(tstate))) { + return KERN_INVALID_ARGUMENT; + } + + if (task->task_debug == NULL) { + task->task_debug = zalloc(ids_zone); + } + + copy_debug_state32(tstate, (x86_debug_state32_t*) task->task_debug, FALSE); + + return KERN_SUCCESS; + break; + } + case x86_DEBUG_STATE64: + { + x86_debug_state64_t *tstate = (x86_debug_state64_t*) state; + + if ((!task_has_64BitAddr(task)) || + (state_count != x86_DEBUG_STATE64_COUNT) || + (!debug_state_is_valid64(tstate))) { + return KERN_INVALID_ARGUMENT; + } + + if (task->task_debug == NULL) { + task->task_debug = zalloc(ids_zone); + } + + copy_debug_state64(tstate, (x86_debug_state64_t*) task->task_debug, FALSE); + + return KERN_SUCCESS; + break; + } + case x86_DEBUG_STATE: + { + x86_debug_state_t *tstate = (x86_debug_state_t*) state; + + if (state_count != x86_DEBUG_STATE_COUNT) { + return KERN_INVALID_ARGUMENT; + } + + if ((tstate->dsh.flavor == x86_DEBUG_STATE32) && + (tstate->dsh.count == x86_DEBUG_STATE32_COUNT) && + (!task_has_64BitAddr(task)) && + debug_state_is_valid32(&tstate->uds.ds32)) { + + if (task->task_debug == NULL) { + task->task_debug = zalloc(ids_zone); + } + + copy_debug_state32(&tstate->uds.ds32, (x86_debug_state32_t*) task->task_debug, FALSE); + return KERN_SUCCESS; + + } else if ((tstate->dsh.flavor == x86_DEBUG_STATE64) && + (tstate->dsh.count == x86_DEBUG_STATE64_COUNT) && + task_has_64BitAddr(task) && + debug_state_is_valid64(&tstate->uds.ds64)) { + + if (task->task_debug == NULL) { + task->task_debug = zalloc(ids_zone); + } + + copy_debug_state64(&tstate->uds.ds64, (x86_debug_state64_t*) task->task_debug, FALSE); + return KERN_SUCCESS; + } else { + return KERN_INVALID_ARGUMENT; + } + + break; + } + default: + { + return KERN_INVALID_ARGUMENT; + break; + } + } +} + +kern_return_t +machine_task_get_state(task_t task, + int flavor, + thread_state_t state, + mach_msg_type_number_t *state_count) +{ + switch (flavor) { + case x86_DEBUG_STATE32: + { + x86_debug_state32_t *tstate = (x86_debug_state32_t*) state; + + if ((task_has_64BitAddr(task)) || (*state_count != x86_DEBUG_STATE32_COUNT)) { + return KERN_INVALID_ARGUMENT; + } + + if (task->task_debug == NULL) { + bzero(state, sizeof(*tstate)); + } else { + copy_debug_state32((x86_debug_state32_t*) task->task_debug, tstate, TRUE); + } + + return KERN_SUCCESS; + break; + } + case x86_DEBUG_STATE64: + { + x86_debug_state64_t *tstate = (x86_debug_state64_t*) state; + + if ((!task_has_64BitAddr(task)) || (*state_count != x86_DEBUG_STATE64_COUNT)) { + return KERN_INVALID_ARGUMENT; + } + + if (task->task_debug == NULL) { + bzero(state, sizeof(*tstate)); + } else { + copy_debug_state64((x86_debug_state64_t*) task->task_debug, tstate, TRUE); + } + + return KERN_SUCCESS; + break; + } + case x86_DEBUG_STATE: + { + x86_debug_state_t *tstate = (x86_debug_state_t*)state; + + if (*state_count != x86_DEBUG_STATE_COUNT) + return(KERN_INVALID_ARGUMENT); + + if (task_has_64BitAddr(task)) { + tstate->dsh.flavor = x86_DEBUG_STATE64; + tstate->dsh.count = x86_DEBUG_STATE64_COUNT; + + if (task->task_debug == NULL) { + bzero(&tstate->uds.ds64, sizeof(tstate->uds.ds64)); + } else { + copy_debug_state64((x86_debug_state64_t*)task->task_debug, &tstate->uds.ds64, TRUE); + } + } else { + tstate->dsh.flavor = x86_DEBUG_STATE32; + tstate->dsh.count = x86_DEBUG_STATE32_COUNT; + + if (task->task_debug == NULL) { + bzero(&tstate->uds.ds32, sizeof(tstate->uds.ds32)); + } else { + copy_debug_state32((x86_debug_state32_t*)task->task_debug, &tstate->uds.ds32, TRUE); + } + } + + return KERN_SUCCESS; + break; + } + default: + { + return KERN_INVALID_ARGUMENT; + break; + } + } +} + +/* + * Set initial default state on a thread as stored in the MACHINE_TASK data. + * Note: currently only debug state is supported. + */ +kern_return_t +machine_thread_inherit_taskwide( + thread_t thread, + task_t parent_task) +{ + if (parent_task->task_debug) { + int flavor; + mach_msg_type_number_t count; + + if (task_has_64BitAddr(parent_task)) { + flavor = x86_DEBUG_STATE64; + count = x86_DEBUG_STATE64_COUNT; + } else { + flavor = x86_DEBUG_STATE32; + count = x86_DEBUG_STATE32_COUNT; + } + + return machine_thread_set_state(thread, flavor, parent_task->task_debug, count); + } + + return KERN_SUCCESS; +} diff --git a/osfmk/i386/mcount.s b/osfmk/i386/mcount.s index 822d643e0..0246ba152 100644 --- a/osfmk/i386/mcount.s +++ b/osfmk/i386/mcount.s @@ -41,6 +41,10 @@ Entry(mcount) // Check that this cpu is ready. // This delays the start of mcounting until a cpu is really prepared. // + mov %gs, %ax + test %ax, %ax + jz 1f + movl %gs:CPU_RUNNING,%eax testl %eax,%eax jz 1f diff --git a/osfmk/i386/misc_protos.h b/osfmk/i386/misc_protos.h index 93d45455d..025396b31 100644 --- a/osfmk/i386/misc_protos.h +++ b/osfmk/i386/misc_protos.h @@ -32,24 +32,28 @@ #ifndef _I386_MISC_PROTOS_H_ #define _I386_MISC_PROTOS_H_ -#include +#include struct boot_args; struct cpu_data; +extern void vstart(vm_offset_t); extern void i386_init(vm_offset_t); -extern void i386_macho_zerofill(void); extern void i386_vm_init( uint64_t, boolean_t, struct boot_args *); +#ifdef __i386__ extern void cpu_IA32e_enable(struct cpu_data *); extern void cpu_IA32e_disable(struct cpu_data *); extern void ml_load_desc64(void); -extern void ml_64bit_wrmsr64(uint32_t msr, uint64_t value); extern void ml_64bit_lldt(int); +#endif + +#if NCOPY_WINDOWS > 0 extern void cpu_userwindow_init(int); extern void cpu_physwindow_init(int); +#endif extern void machine_startup(void); @@ -64,7 +68,9 @@ extern void remote_kdb(void); extern void clear_kdb_intr(void); extern void draw_panic_dialog(void); extern void cpu_init(void); +#ifdef __i386__ extern void cpu_shutdown(void); +#endif extern void fix_desc( void * desc, int num_desc); @@ -84,6 +90,7 @@ extern void blkclr( const char *from, int nbytes); +#ifdef __i386__ extern unsigned int div_scale( unsigned int dividend, unsigned int divisor, @@ -93,10 +100,13 @@ extern unsigned int mul_scale( unsigned int multiplicand, unsigned int multiplier, unsigned int *scale); +#endif /* Move arbitrarily-aligned data from one physical address to another */ extern void bcopy_phys(addr64_t from, addr64_t to, vm_size_t nbytes); +extern void ml_copy_phys(addr64_t, addr64_t, vm_size_t); + /* Flush all cachelines for a page. */ extern void cache_flush_page_phys(ppnum_t pa); @@ -128,8 +138,30 @@ extern void rtc_clock_stepped( uint32_t old_frequency); extern void rtc_clock_napped(uint64_t, uint64_t); -extern void x86_lowmem_free(void); +extern void pmap_lowmem_finalize(void); thread_t Switch_context(thread_t, thread_continue_t, thread_t); +thread_t Shutdown_context(thread_t thread, void (*doshutdown)(processor_t),processor_t processor); + +#ifdef __x86_64__ +uint64_t x86_64_pre_sleep(void); +void x86_64_post_sleep(uint64_t new_cr3); +#endif + +boolean_t +debug_state_is_valid32(x86_debug_state32_t *ds); + +boolean_t +debug_state_is_valid64(x86_debug_state64_t *ds); + +void +copy_debug_state32(x86_debug_state32_t *src, x86_debug_state32_t *target, boolean_t all); + +void +copy_debug_state64(x86_debug_state64_t *src, x86_debug_state64_t *target, boolean_t all); + +/* Fast-restart parameters */ +#define FULL_SLAVE_INIT (NULL) +#define FAST_SLAVE_INIT ((void *)(uintptr_t)1) #endif /* _I386_MISC_PROTOS_H_ */ diff --git a/osfmk/i386/mp.c b/osfmk/i386/mp.c index 00db14a66..e11c8f6c5 100644 --- a/osfmk/i386/mp.c +++ b/osfmk/i386/mp.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2008 Apple Inc. All rights reserved. + * Copyright (c) 2000-2009 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -54,33 +54,33 @@ #include +#include +#include +#include +#include +#include +#include +#include #include #include -#include #include #include -#include #include -#include +#include #include -#include #include -#include -#include -#include -#include -#include -#include -#include #include +#if CONFIG_MCA #include +#endif +#include #include #include #include #if MACH_KDB -#include +#include #include #include #include @@ -98,8 +98,6 @@ #define PAUSE #endif /* MP_DEBUG */ -#define FULL_SLAVE_INIT (NULL) -#define FAST_SLAVE_INIT ((void *)(uintptr_t)1) void slave_boot_init(void); @@ -109,7 +107,7 @@ volatile boolean_t mp_kdb_trap = FALSE; volatile long mp_kdb_ncpus = 0; #endif -static void mp_kdp_wait(boolean_t flush); +static void mp_kdp_wait(boolean_t flush, boolean_t isNMI); static void mp_rendezvous_action(void); static void mp_broadcast_action(void); @@ -117,37 +115,56 @@ static boolean_t cpu_signal_pending(int cpu, mp_event_t event); static int cpu_signal_handler(x86_saved_state_t *regs); static int NMIInterruptHandler(x86_saved_state_t *regs); -boolean_t smp_initialized = FALSE; +boolean_t smp_initialized = FALSE; volatile boolean_t force_immediate_debugger_NMI = FALSE; volatile boolean_t pmap_tlb_flush_timeout = FALSE; - decl_simple_lock_data(,mp_kdp_lock); -decl_mutex_data(static, mp_cpu_boot_lock); +decl_lck_mtx_data(static, mp_cpu_boot_lock); +lck_mtx_ext_t mp_cpu_boot_lock_ext; /* Variables needed for MP rendezvous. */ decl_simple_lock_data(,mp_rv_lock); -static void (*mp_rv_setup_func)(void *arg); -static void (*mp_rv_action_func)(void *arg); -static void (*mp_rv_teardown_func)(void *arg); -static void *mp_rv_func_arg; -static int mp_rv_ncpus; +static void (*mp_rv_setup_func)(void *arg); +static void (*mp_rv_action_func)(void *arg); +static void (*mp_rv_teardown_func)(void *arg); +static void *mp_rv_func_arg; +static volatile int mp_rv_ncpus; /* Cache-aligned barriers: */ static volatile long mp_rv_entry __attribute__((aligned(64))); static volatile long mp_rv_exit __attribute__((aligned(64))); static volatile long mp_rv_complete __attribute__((aligned(64))); +volatile uint64_t debugger_entry_time; +volatile uint64_t debugger_exit_time; +#if MACH_KDP + +static struct _kdp_xcpu_call_func { + kdp_x86_xcpu_func_t func; + void *arg0, *arg1; + volatile long ret; + volatile uint16_t cpu; +} kdp_xcpu_call_func = { + .cpu = KDP_XCPU_NONE +}; + +#endif + /* Variables needed for MP broadcast. */ static void (*mp_bc_action_func)(void *arg); static void *mp_bc_func_arg; static int mp_bc_ncpus; static volatile long mp_bc_count; -decl_mutex_data(static, mp_bc_lock); +decl_lck_mtx_data(static, mp_bc_lock); +lck_mtx_ext_t mp_bc_lock_ext; static volatile int debugger_cpu = -1; static void mp_cpus_call_action(void); static void mp_call_PM(void); +char mp_slave_stack[PAGE_SIZE] __attribute__((aligned(PAGE_SIZE))); // Temp stack for slave init + + #if GPROF /* * Initialize dummy structs for profiling. These aren't used but @@ -167,13 +184,20 @@ struct profile_vars *_profile_vars_cpus[MAX_CPUS] = { &_profile_vars }; #define GPROF_INIT() #endif /* GPROF */ +static lck_grp_t smp_lck_grp; +static lck_grp_attr_t smp_lck_grp_attr; + +extern void slave_pstart(void); + void smp_init(void) { simple_lock_init(&mp_kdp_lock, 0); simple_lock_init(&mp_rv_lock, 0); - mutex_init(&mp_cpu_boot_lock, 0); - mutex_init(&mp_bc_lock, 0); + lck_grp_attr_setdefault(&smp_lck_grp_attr); + lck_grp_init(&smp_lck_grp, "i386_smp", &smp_lck_grp_attr); + lck_mtx_init_ext(&mp_cpu_boot_lock, &mp_cpu_boot_lock_ext, &smp_lck_grp, LCK_ATTR_NULL); + lck_mtx_init_ext(&mp_bc_lock, &mp_bc_lock_ext, &smp_lck_grp, LCK_ATTR_NULL); console_init(); /* Local APIC? */ @@ -190,7 +214,7 @@ smp_init(void) GPROF_INIT(); DBGLOG_CPU_INIT(master_cpu); - slave_boot_init(); + install_real_mode_bootstrap(slave_pstart); smp_initialized = TRUE; @@ -210,11 +234,51 @@ mp_wait_for_cpu_up(int slot_num, unsigned int iters, unsigned int usecdelay) } } +/* + * Quickly bring a CPU back online which has been halted. + */ +kern_return_t +intel_startCPU_fast(int slot_num) +{ + kern_return_t rc; + + /* + * Try to perform a fast restart + */ + rc = pmCPUExitHalt(slot_num); + if (rc != KERN_SUCCESS) + /* + * The CPU was not eligible for a fast restart. + */ + return(rc); + + /* + * Wait until the CPU is back online. + */ + mp_disable_preemption(); + + /* + * We use short pauses (1us) for low latency. 30,000 iterations is + * longer than a full restart would require so it should be more + * than long enough. + */ + mp_wait_for_cpu_up(slot_num, 30000, 1); + mp_enable_preemption(); + + /* + * Check to make sure that the CPU is really running. If not, + * go through the slow path. + */ + if (cpu_datap(slot_num)->cpu_running) + return(KERN_SUCCESS); + else + return(KERN_FAILURE); +} + typedef struct { - int target_cpu; - int target_lapic; - int starter_cpu; - boolean_t is_nehalem; + int target_cpu; + int target_lapic; + int starter_cpu; } processor_start_info_t; static processor_start_info_t start_info; @@ -231,16 +295,10 @@ start_cpu(void *arg) LAPIC_WRITE(ICRD, psip->target_lapic << LAPIC_ICRD_DEST_SHIFT); LAPIC_WRITE(ICR, LAPIC_ICR_DM_INIT); - delay(psip->is_nehalem ? 100 : 10000); + delay(100); LAPIC_WRITE(ICRD, psip->target_lapic << LAPIC_ICRD_DEST_SHIFT); - LAPIC_WRITE(ICR, LAPIC_ICR_DM_STARTUP|(MP_BOOT>>12)); - - if (!psip->is_nehalem) { - delay(200); - LAPIC_WRITE(ICRD, psip->target_lapic << LAPIC_ICRD_DEST_SHIFT); - LAPIC_WRITE(ICR, LAPIC_ICR_DM_STARTUP|(MP_BOOT>>12)); - } + LAPIC_WRITE(ICR, LAPIC_ICR_DM_STARTUP|(REAL_MODE_BOOTSTRAP_OFFSET>>12)); #ifdef POSTCODE_DELAY /* Wait much longer if postcodes are displayed for a delay period. */ @@ -249,6 +307,12 @@ start_cpu(void *arg) mp_wait_for_cpu_up(psip->target_cpu, i*100, 100); } +extern char prot_mode_gdt[]; +extern char slave_boot_base[]; +extern char real_mode_bootstrap_base[]; +extern char real_mode_bootstrap_end[]; +extern char slave_boot_end[]; + kern_return_t intel_startCPU( int slot_num) @@ -268,44 +332,39 @@ intel_startCPU( * Propagate processor mode to slave. */ if (cpu_mode_is64bit()) - cpu_desc_init64(cpu_datap(slot_num), FALSE); + cpu_desc_init64(cpu_datap(slot_num)); else - cpu_desc_init(cpu_datap(slot_num), FALSE); + cpu_desc_init(cpu_datap(slot_num)); /* Serialize use of the slave boot stack, etc. */ - mutex_lock(&mp_cpu_boot_lock); + lck_mtx_lock(&mp_cpu_boot_lock); istate = ml_set_interrupts_enabled(FALSE); if (slot_num == get_cpu_number()) { ml_set_interrupts_enabled(istate); - mutex_unlock(&mp_cpu_boot_lock); + lck_mtx_unlock(&mp_cpu_boot_lock); return KERN_SUCCESS; } - start_info.starter_cpu = cpu_number(); - start_info.is_nehalem = (cpuid_info()->cpuid_model - == CPUID_MODEL_NEHALEM); - start_info.target_cpu = slot_num; + start_info.starter_cpu = cpu_number(); + start_info.target_cpu = slot_num; start_info.target_lapic = lapic; /* - * For Nehalem, perform the processor startup with all running + * Perform the processor startup sequence with all running * processors rendezvous'ed. This is required during periods when * the cache-disable bit is set for MTRR/PAT initialization. */ - if (start_info.is_nehalem) - mp_rendezvous_no_intrs(start_cpu, (void *) &start_info); - else - start_cpu((void *) &start_info); + mp_rendezvous_no_intrs(start_cpu, (void *) &start_info); ml_set_interrupts_enabled(istate); - mutex_unlock(&mp_cpu_boot_lock); + lck_mtx_unlock(&mp_cpu_boot_lock); if (!cpu_datap(slot_num)->cpu_running) { kprintf("Failed to start CPU %02d\n", slot_num); printf("Failed to start CPU %02d, rebooting...\n", slot_num); delay(1000000); - cpu_shutdown(); + halt_cpu(); return KERN_SUCCESS; } else { kprintf("Started cpu %d (lapic id %08x)\n", slot_num, lapic); @@ -313,90 +372,6 @@ intel_startCPU( } } -/* - * Quickly bring a CPU back online which has been halted. - */ -kern_return_t -intel_startCPU_fast(int slot_num) -{ - kern_return_t rc; - - /* - * Try to perform a fast restart - */ - rc = pmCPUExitHalt(slot_num); - if (rc != KERN_SUCCESS) - /* - * The CPU was not eligible for a fast restart. - */ - return(rc); - - /* - * Wait until the CPU is back online. - */ - mp_disable_preemption(); - - /* - * We use short pauses (1us) for low latency. 30,000 iterations is - * longer than a full restart would require so it should be more - * than long enough. - */ - mp_wait_for_cpu_up(slot_num, 30000, 1); - mp_enable_preemption(); - - /* - * Check to make sure that the CPU is really running. If not, - * go through the slow path. - */ - if (cpu_datap(slot_num)->cpu_running) - return(KERN_SUCCESS); - else - return(KERN_FAILURE); -} - -extern char slave_boot_base[]; -extern char slave_boot_end[]; -extern void slave_pstart(void); - -void -slave_boot_init(void) -{ - DBG("V(slave_boot_base)=%p P(slave_boot_base)=%p MP_BOOT=%p sz=0x%x\n", - slave_boot_base, - kvtophys((vm_offset_t) slave_boot_base), - MP_BOOT, - slave_boot_end-slave_boot_base); - - /* - * Copy the boot entry code to the real-mode vector area MP_BOOT. - * This is in page 1 which has been reserved for this purpose by - * machine_startup() from the boot processor. - * The slave boot code is responsible for switching to protected - * mode and then jumping to the common startup, _start(). - */ - bcopy_phys(kvtophys((vm_offset_t) slave_boot_base), - (addr64_t) MP_BOOT, - slave_boot_end-slave_boot_base); - - /* - * Zero a stack area above the boot code. - */ - DBG("bzero_phys 0x%x sz 0x%x\n",MP_BOOTSTACK+MP_BOOT-0x400, 0x400); - bzero_phys((addr64_t)MP_BOOTSTACK+MP_BOOT-0x400, 0x400); - - /* - * Set the location at the base of the stack to point to the - * common startup entry. - */ - DBG("writing 0x%x at phys 0x%x\n", - kvtophys((vm_offset_t) &slave_pstart), MP_MACH_START+MP_BOOT); - ml_phys_write_word(MP_MACH_START+MP_BOOT, - (unsigned int)kvtophys((vm_offset_t) &slave_pstart)); - - /* Flush caches */ - __asm__("wbinvd"); -} - #if MP_DEBUG cpu_signal_event_log_t *cpu_signal[MAX_CPUS]; cpu_signal_event_log_t *cpu_handle[MAX_CPUS]; @@ -432,10 +407,9 @@ cpu_signal_handler(x86_saved_state_t *regs) * current thread's stack (if any) is synchronized with the * context at the moment of the interrupt, to facilitate * access through the debugger. - * XXX 64-bit state? */ - sync_iss_to_iks(saved_state32(regs)); - mp_kdp_wait(TRUE); + sync_iss_to_iks(regs); + mp_kdp_wait(TRUE, FALSE); } else #endif /* MACH_KDP */ if (i_bit(MP_TLB_FLUSH, my_word)) { @@ -488,7 +462,11 @@ NMIInterruptHandler(x86_saved_state_t *regs) void *stackptr; sync_iss_to_iks_unconditionally(regs); +#if defined (__i386__) __asm__ volatile("movl %%ebp, %0" : "=m" (stackptr)); +#elif defined (__x86_64__) + __asm__ volatile("movq %%rbp, %0" : "=m" (stackptr)); +#endif if (cpu_number() == debugger_cpu) goto NMExit; @@ -496,23 +474,19 @@ NMIInterruptHandler(x86_saved_state_t *regs) if (pmap_tlb_flush_timeout == TRUE && current_cpu_datap()->cpu_tlb_invalid) { char pstr[128]; snprintf(&pstr[0], sizeof(pstr), "Panic(CPU %d): Unresponsive processor\n", cpu_number()); - panic_i386_backtrace(stackptr, 10, &pstr[0], TRUE, regs); - panic_io_port_read(); - mca_check_save(); - if (pmsafe_debug) - pmSafeMode(¤t_cpu_datap()->lcpu, PM_SAFE_FL_SAFE); - for(;;) { - cpu_pause(); - } + panic_i386_backtrace(stackptr, 16, &pstr[0], TRUE, regs); } - mp_kdp_wait(FALSE); + +#if MACH_KDP + mp_kdp_wait(FALSE, pmap_tlb_flush_timeout); +#endif NMExit: return 1; } #ifdef MP_DEBUG -int max_lock_loops = 1000000; -int trappedalready = 0; /* (BRINGUP */ +int max_lock_loops = 100000000; +int trappedalready = 0; /* (BRINGUP) */ #endif /* MP_DEBUG */ static void @@ -590,7 +564,7 @@ cpu_NMI_interrupt(int cpu) } } -static volatile void (*mp_PM_func)(void) = NULL; +static void (* volatile mp_PM_func)(void) = NULL; static void mp_call_PM(void) @@ -663,7 +637,7 @@ i386_signal_cpus(mp_event_t event, mp_sync_t mode) unsigned int cpu; unsigned int my_cpu = cpu_number(); - assert(hw_lock_held(&x86_topo_lock)); + assert(hw_lock_held((hw_lock_t)&x86_topo_lock)); for (cpu = 0; cpu < real_ncpus; cpu++) { if (cpu == my_cpu || !cpu_datap(cpu)->cpu_running) @@ -682,7 +656,7 @@ i386_active_cpus(void) unsigned int cpu; unsigned int ncpus = 0; - assert(hw_lock_held(&x86_topo_lock)); + assert(hw_lock_held((hw_lock_t)&x86_topo_lock)); for (cpu = 0; cpu < real_ncpus; cpu++) { if (cpu_datap(cpu)->cpu_running) @@ -716,6 +690,7 @@ mp_rendezvous_action(void) intrs_enabled = ml_get_interrupts_enabled(); + /* spin on entry rendezvous */ atomic_incl(&mp_rv_entry, 1); while (mp_rv_entry < mp_rv_ncpus) { @@ -734,7 +709,6 @@ mp_rendezvous_action(void) handle_pending_TLB_flushes(); cpu_pause(); } - /* teardown function */ if (mp_rv_teardown_func != NULL) mp_rv_teardown_func(mp_rv_func_arg); @@ -981,8 +955,8 @@ mp_broadcast_action(void) mp_bc_action_func(mp_bc_func_arg); /* if we're the last one through, wake up the instigator */ - if (atomic_decl_and_test((volatile long *)&mp_bc_count, 1)) - thread_wakeup(((event_t)(unsigned int *) &mp_bc_count)); + if (atomic_decl_and_test(&mp_bc_count, 1)) + thread_wakeup(((event_t)(uintptr_t) &mp_bc_count)); } /* @@ -1002,13 +976,13 @@ mp_broadcast( } /* obtain broadcast lock */ - mutex_lock(&mp_bc_lock); + lck_mtx_lock(&mp_bc_lock); /* set static function pointers */ mp_bc_action_func = action_func; mp_bc_func_arg = arg; - assert_wait(&mp_bc_count, THREAD_UNINT); + assert_wait((event_t)(uintptr_t)&mp_bc_count, THREAD_UNINT); /* * signal other processors, which will call mp_broadcast_action() @@ -1029,7 +1003,7 @@ mp_broadcast( clear_wait(current_thread(), THREAD_AWAKENED); /* release lock */ - mutex_unlock(&mp_bc_lock); + lck_mtx_unlock(&mp_bc_lock); } void @@ -1104,14 +1078,16 @@ mp_kdp_enter(void) */ mp_kdp_state = ml_set_interrupts_enabled(FALSE); simple_lock(&mp_kdp_lock); - + debugger_entry_time = mach_absolute_time(); if (pmsafe_debug) pmSafeMode(¤t_cpu_datap()->lcpu, PM_SAFE_FL_SAFE); while (mp_kdp_trap) { simple_unlock(&mp_kdp_lock); DBG("mp_kdp_enter() race lost\n"); - mp_kdp_wait(TRUE); +#if MACH_KDP + mp_kdp_wait(TRUE, FALSE); +#endif simple_lock(&mp_kdp_lock); } my_cpu = cpu_number(); @@ -1189,23 +1165,56 @@ cpu_signal_pending(int cpu, mp_event_t event) retval = TRUE; return retval; } - + +long kdp_x86_xcpu_invoke(const uint16_t lcpu, kdp_x86_xcpu_func_t func, + void *arg0, void *arg1) +{ + if (lcpu > (real_ncpus - 1)) + return -1; + + if (func == NULL) + return -1; + + kdp_xcpu_call_func.func = func; + kdp_xcpu_call_func.ret = -1; + kdp_xcpu_call_func.arg0 = arg0; + kdp_xcpu_call_func.arg1 = arg1; + kdp_xcpu_call_func.cpu = lcpu; + DBG("Invoking function %p on CPU %d\n", func, (int32_t)lcpu); + while (kdp_xcpu_call_func.cpu != KDP_XCPU_NONE) + cpu_pause(); + return kdp_xcpu_call_func.ret; +} + +static void +kdp_x86_xcpu_poll(void) +{ + if ((uint16_t)cpu_number() == kdp_xcpu_call_func.cpu) { + kdp_xcpu_call_func.ret = + kdp_xcpu_call_func.func(kdp_xcpu_call_func.arg0, + kdp_xcpu_call_func.arg1, + cpu_number()); + kdp_xcpu_call_func.cpu = KDP_XCPU_NONE; + } +} static void -mp_kdp_wait(boolean_t flush) +mp_kdp_wait(boolean_t flush, boolean_t isNMI) { DBG("mp_kdp_wait()\n"); /* If an I/O port has been specified as a debugging aid, issue a read */ panic_io_port_read(); +#if CONFIG_MCA /* If we've trapped due to a machine-check, save MCA registers */ mca_check_save(); +#endif if (pmsafe_debug) pmSafeMode(¤t_cpu_datap()->lcpu, PM_SAFE_FL_SAFE); atomic_incl((volatile long *)&mp_kdp_ncpus, 1); - while (mp_kdp_trap) { + while (mp_kdp_trap || (isNMI == TRUE)) { /* * A TLB shootdown request may be pending--this would result * in the requesting processor waiting in PMAP_UPDATE_TLBS() @@ -1214,6 +1223,8 @@ mp_kdp_wait(boolean_t flush) */ if (flush) handle_pending_TLB_flushes(); + + kdp_x86_xcpu_poll(); cpu_pause(); } @@ -1230,6 +1241,9 @@ mp_kdp_exit(void) DBG("mp_kdp_exit()\n"); debugger_cpu = -1; atomic_decl((volatile long *)&mp_kdp_ncpus, 1); + + debugger_exit_time = mach_absolute_time(); + mp_kdp_trap = FALSE; __asm__ volatile("mfence"); @@ -1255,6 +1269,12 @@ mp_kdp_exit(void) } #endif /* MACH_KDP */ +boolean_t +mp_recent_debugger_activity() { + return (((mach_absolute_time() - debugger_entry_time) < LastDebuggerEntryAllowance) || + ((mach_absolute_time() - debugger_exit_time) < LastDebuggerEntryAllowance)); +} + /*ARGSUSED*/ void init_ast_check( @@ -1266,7 +1286,7 @@ void cause_ast_check( processor_t processor) { - int cpu = processor->cpu_num; + int cpu = processor->cpu_id; if (cpu != cpu_number()) { i386_signal_cpu(cpu, MP_AST, ASYNC); @@ -1371,75 +1391,6 @@ mp_kdb_exit(void) #endif /* MACH_KDB */ -static void -do_init_slave(boolean_t fast_restart) -{ - void *init_param = FULL_SLAVE_INIT; - - postcode(I386_INIT_SLAVE); - - if (!fast_restart) { - /* Ensure that caching and write-through are enabled */ - set_cr0(get_cr0() & ~(CR0_NW|CR0_CD)); - - DBG("i386_init_slave() CPU%d: phys (%d) active.\n", - get_cpu_number(), get_cpu_phys_number()); - - assert(!ml_get_interrupts_enabled()); - - cpu_mode_init(current_cpu_datap()); - - mca_cpu_init(); - - lapic_configure(); - LAPIC_DUMP(); - LAPIC_CPU_MAP_DUMP(); - - init_fpu(); - - mtrr_update_cpu(); - } else - init_param = FAST_SLAVE_INIT; - - /* resume VT operation */ - vmx_resume(); - - if (!fast_restart) - pat_init(); - - cpu_thread_init(); /* not strictly necessary */ - - cpu_init(); /* Sets cpu_running which starter cpu waits for */ - - slave_main(init_param); - - panic("do_init_slave() returned from slave_main()"); -} - -/* - * i386_init_slave() is called from pstart. - * We're in the cpu's interrupt stack with interrupts disabled. - * At this point we are in legacy mode. We need to switch on IA32e - * if the mode is set to 64-bits. - */ -void -i386_init_slave(void) -{ - do_init_slave(FALSE); -} - -/* - * i386_init_slave_fast() is called from pmCPUHalt. - * We're running on the idle thread and need to fix up - * some accounting and get it so that the scheduler sees this - * CPU again. - */ -void -i386_init_slave_fast(void) -{ - do_init_slave(TRUE); -} - void slave_machine_init(void *param) { @@ -1458,7 +1409,7 @@ slave_machine_init(void *param) } } -#undef cpu_number() +#undef cpu_number int cpu_number(void) { return get_cpu_number(); diff --git a/osfmk/i386/mp.h b/osfmk/i386/mp.h index d4b3551e7..694f7c179 100644 --- a/osfmk/i386/mp.h +++ b/osfmk/i386/mp.h @@ -61,9 +61,6 @@ #ifndef _I386_MP_H_ #define _I386_MP_H_ -#ifndef DEBUG -#include -#endif //#define MP_DEBUG 1 #include @@ -77,6 +74,7 @@ #include #include #include +#include #include __BEGIN_DECLS @@ -105,12 +103,14 @@ extern int kdb_debug; extern int kdb_active[]; extern volatile boolean_t mp_kdp_trap; -extern volatile boolean_t force_immediate_debugger_NMI; +extern volatile boolean_t force_immediate_debugger_NMI; extern volatile boolean_t pmap_tlb_flush_timeout; +extern uint64_t LastDebuggerEntryAllowance; extern void mp_kdp_enter(void); extern void mp_kdp_exit(void); +extern boolean_t mp_recent_debugger_activity(void); #if MACH_KDB extern void mp_kdb_exit(void); #endif @@ -136,6 +136,14 @@ extern void mp_rendezvous_break_lock(void); extern void mp_broadcast( void (*action_func)(void *), void *arg); +#if MACH_KDP +typedef long (*kdp_x86_xcpu_func_t) (void *arg0, void *arg1, uint16_t lcpu); + +extern long kdp_x86_xcpu_invoke(const uint16_t lcpu, + kdp_x86_xcpu_func_t func, + void *arg0, void *arg1); +typedef enum {KDP_XCPU_NONE = 0xffff, KDP_CURRENT_LCPU = 0xfffe} kdp_cpu_t; +#endif typedef uint32_t cpu_t; typedef uint32_t cpumask_t; @@ -226,7 +234,7 @@ extern cpu_signal_event_log_t *cpu_handle[]; (vm_offset_t *) hdl_logpp, \ sizeof(cpu_signal_event_log_t)) != KERN_SUCCESS)\ panic("DBGLOG_CPU_INIT cpu_handle allocation failed\n");\ - bzero(*sig_logpp, sizeof(cpu_signal_event_log_t)); \ + bzero(*hdl_logpp, sizeof(cpu_signal_event_log_t)); \ } #else /* MP_DEBUG */ #define DBGLOG(log,_cpu,_event) @@ -235,7 +243,16 @@ extern cpu_signal_event_log_t *cpu_handle[]; #endif /* ASSEMBLER */ -#define i_bit(bit, word) ((long)(*(word)) & ((long)1 << (bit))) +#ifdef ASSEMBLER +#define i_bit(bit, word) ((long)(*(word)) & (1L << (bit))) +#else +// Workaround for 6640051 +static inline long +i_bit_impl(long word, long bit) { + return word & 1L << bit; +} +#define i_bit(bit, word) i_bit_impl((long)(*(word)), bit) +#endif /* @@ -262,6 +279,9 @@ extern cpu_signal_event_log_t *cpu_handle[]; #define MP_DEV_OP_CALLB 3 /* If lock busy, register a pending callback */ #if MACH_RT + +#if defined(__i386__) + #define _DISABLE_PREEMPTION \ incl %gs:CPU_PREEMPTION_LEVEL @@ -280,7 +300,26 @@ extern cpu_signal_event_log_t *cpu_handle[]; #define _ENABLE_PREEMPTION_NO_CHECK \ decl %gs:CPU_PREEMPTION_LEVEL -#if MACH_ASSERT +#elif defined(__x86_64__) + +#define _DISABLE_PREEMPTION \ + incl %gs:CPU_PREEMPTION_LEVEL + +#define _ENABLE_PREEMPTION \ + decl %gs:CPU_PREEMPTION_LEVEL ; \ + jne 9f ; \ + call EXT(kernel_preempt_check) ; \ +9: + +#define _ENABLE_PREEMPTION_NO_CHECK \ + decl %gs:CPU_PREEMPTION_LEVEL + +#else +#error Unsupported architecture +#endif + +/* x86_64 just calls through to the other macro directly */ +#if MACH_ASSERT && defined(__i386__) #define DISABLE_PREEMPTION \ pushl %eax; \ pushl %ecx; \ diff --git a/osfmk/i386/mp_desc.c b/osfmk/i386/mp_desc.c index 798a191df..709e2b4b3 100644 --- a/osfmk/i386/mp_desc.c +++ b/osfmk/i386/mp_desc.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2008 Apple Inc. All rights reserved. + * Copyright (c) 2000-2009 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -63,20 +63,97 @@ #include #include #include +#include #include #include -#include #include +#include #include #include #include +#if CONFIG_MCA #include +#endif #include #include + +#ifdef __x86_64__ +#define K_INTR_GATE (ACC_P|ACC_PL_K|ACC_INTR_GATE) +#define U_INTR_GATE (ACC_P|ACC_PL_U|ACC_INTR_GATE) + +// Declare macros that will declare the externs +#define TRAP(n, name) extern void *name ; +#define TRAP_ERR(n, name) extern void *name ; +#define TRAP_SPC(n, name) extern void *name ; +#define TRAP_IST(n, name) extern void *name ; +#define INTERRUPT(n) extern void *_intr_ ## n ; +#define USER_TRAP(n, name) extern void *name ; +#define USER_TRAP_SPC(n, name) extern void *name ; + +// Include the table to declare the externs +#include "../x86_64/idt_table.h" + +// Undef the macros, then redefine them so we can declare the table +#undef TRAP +#undef TRAP_ERR +#undef TRAP_SPC +#undef TRAP_IST +#undef INTERRUPT +#undef USER_TRAP +#undef USER_TRAP_SPC + +#define TRAP(n, name) \ + [n] { \ + (uintptr_t)&name, \ + KERNEL64_CS, \ + 0, \ + K_INTR_GATE, \ + 0 \ + }, + +#define TRAP_ERR TRAP +#define TRAP_SPC TRAP + +#define TRAP_IST(n, name) \ + [n] { \ + (uintptr_t)&name, \ + KERNEL64_CS, \ + 1, \ + K_INTR_GATE, \ + 0 \ + }, + +#define INTERRUPT(n) \ + [n] { \ + (uintptr_t)&_intr_ ## n,\ + KERNEL64_CS, \ + 0, \ + K_INTR_GATE, \ + 0 \ + }, + +#define USER_TRAP(n, name) \ + [n] { \ + (uintptr_t)&name, \ + KERNEL64_CS, \ + 0, \ + U_INTR_GATE, \ + 0 \ + }, + +#define USER_TRAP_SPC USER_TRAP + + +// Declare the table using the macros we just set up +struct fake_descriptor64 master_idt64[IDTSZ] __attribute__ ((aligned (4096))) = { +#include "../x86_64/idt_table.h" +}; +#endif + /* * The i386 needs an interrupt stack to keep the PCB stack from being * overrun by interrupts. All interrupt stacks MUST lie at lower addresses @@ -95,25 +172,43 @@ extern uint32_t low_eintstack[]; /* top */ * others are allocated dynamically and this array is updated at runtime. */ cpu_data_t cpu_data_master = { - .cpu_this = &cpu_data_master, - .cpu_nanotime = &rtc_nanotime_info, - .cpu_is64bit = FALSE, - .cpu_int_stack_top = (vm_offset_t) low_eintstack, - }; + .cpu_this = &cpu_data_master, + .cpu_nanotime = &rtc_nanotime_info, + .cpu_int_stack_top = (vm_offset_t) low_eintstack, +#ifdef __i386__ + .cpu_is64bit = FALSE, +#else + .cpu_is64bit = TRUE +#endif +}; cpu_data_t *cpu_data_ptr[MAX_CPUS] = { [0] &cpu_data_master }; -decl_simple_lock_data(,cpu_lock); /* protects real_ncpus */ +decl_simple_lock_data(,ncpus_lock); /* protects real_ncpus */ unsigned int real_ncpus = 1; unsigned int max_ncpus = MAX_CPUS; +#ifdef __i386__ extern void *hi_remap_text; #define HI_TEXT(lo_text) \ (((uint32_t)&lo_text - (uint32_t)&hi_remap_text) + HIGH_MEM_BASE) extern void hi_sysenter(void); + +typedef struct { + uint16_t length; + uint32_t offset[2]; +} __attribute__((__packed__)) table_descriptor64_t; + +extern table_descriptor64_t gdtptr64; +extern table_descriptor64_t idtptr64; +#endif extern void hi64_sysenter(void); extern void hi64_syscall(void); +#if defined(__x86_64__) && !defined(UBER64) +#define UBER64(x) ((uintptr_t)x) +#endif + /* * Multiprocessor i386/i486 systems use a separate copy of the * GDT, IDT, LDT, and kernel TSS per processor. The first three @@ -169,7 +264,7 @@ struct fake_descriptor physwindow_desc_pattern = { * in the uber-space remapping window on the kernel. */ struct fake_descriptor64 kernel_ldt_desc64 = { - FAKE_UBER64(&master_ldt), + 0, LDTSZ_MIN*sizeof(struct fake_descriptor)-1, 0, ACC_P|ACC_PL_K|ACC_LDT, @@ -181,244 +276,382 @@ struct fake_descriptor64 kernel_ldt_desc64 = { * It is follows pattern of the KERNEL_LDT. */ struct fake_descriptor64 kernel_tss_desc64 = { - FAKE_UBER64(&master_ktss64), + 0, sizeof(struct x86_64_tss)-1, +#ifdef __x86_64__ + SZ_G, +#else 0, +#endif ACC_P|ACC_PL_K|ACC_TSS, 0 }; +/* + * Convert a descriptor from fake to real format. + * + * Fake descriptor format: + * bytes 0..3 base 31..0 + * bytes 4..5 limit 15..0 + * byte 6 access byte 2 | limit 19..16 + * byte 7 access byte 1 + * + * Real descriptor format: + * bytes 0..1 limit 15..0 + * bytes 2..3 base 15..0 + * byte 4 base 23..16 + * byte 5 access byte 1 + * byte 6 access byte 2 | limit 19..16 + * byte 7 base 31..24 + * + * Fake gate format: + * bytes 0..3 offset + * bytes 4..5 selector + * byte 6 word count << 4 (to match fake descriptor) + * byte 7 access byte 1 + * + * Real gate format: + * bytes 0..1 offset 15..0 + * bytes 2..3 selector + * byte 4 word count + * byte 5 access byte 1 + * bytes 6..7 offset 31..16 + */ +void +fix_desc(void *d, int num_desc) { + //early_kprintf("fix_desc(%x, %x)\n", d, num_desc); + uint8_t *desc = (uint8_t*) d; + + do { + if ((desc[7] & 0x14) == 0x04) { /* gate */ + uint32_t offset; + uint16_t selector; + uint8_t wordcount; + uint8_t acc; + + offset = *((uint32_t*)(desc)); + selector = *((uint32_t*)(desc+4)); + wordcount = desc[6] >> 4; + acc = desc[7]; + + *((uint16_t*)desc) = offset & 0xFFFF; + *((uint16_t*)(desc+2)) = selector; + desc[4] = wordcount; + desc[5] = acc; + *((uint16_t*)(desc+6)) = offset >> 16; + + } else { /* descriptor */ + uint32_t base; + uint16_t limit; + uint8_t acc1, acc2; + + base = *((uint32_t*)(desc)); + limit = *((uint16_t*)(desc+4)); + acc2 = desc[6]; + acc1 = desc[7]; + + *((uint16_t*)(desc)) = limit; + *((uint16_t*)(desc+2)) = base & 0xFFFF; + desc[4] = (base >> 16) & 0xFF; + desc[5] = acc1; + desc[6] = acc2; + desc[7] = base >> 24; + } + desc += 8; + } while (--num_desc); +} + +void +fix_desc64(void *descp, int count) +{ + struct fake_descriptor64 *fakep; + union { + struct real_gate64 gate; + struct real_descriptor64 desc; + } real; + int i; + + fakep = (struct fake_descriptor64 *) descp; + + for (i = 0; i < count; i++, fakep++) { + /* + * Construct the real decriptor locally. + */ + + bzero((void *) &real, sizeof(real)); + + switch (fakep->access & ACC_TYPE) { + case 0: + break; + case ACC_CALL_GATE: + case ACC_INTR_GATE: + case ACC_TRAP_GATE: + real.gate.offset_low16 = fakep->offset64 & 0xFFFF; + real.gate.selector16 = fakep->lim_or_seg & 0xFFFF; + real.gate.IST = fakep->size_or_IST & 0x7; + real.gate.access8 = fakep->access; + real.gate.offset_high16 = (fakep->offset64>>16)&0xFFFF; + real.gate.offset_top32 = (uint32_t)(fakep->offset64>>32); + break; + default: /* Otherwise */ + real.desc.limit_low16 = fakep->lim_or_seg & 0xFFFF; + real.desc.base_low16 = fakep->offset64 & 0xFFFF; + real.desc.base_med8 = (fakep->offset64 >> 16) & 0xFF; + real.desc.access8 = fakep->access; + real.desc.limit_high4 = (fakep->lim_or_seg >> 16) & 0xFF; + real.desc.granularity4 = fakep->size_or_IST; + real.desc.base_high8 = (fakep->offset64 >> 24) & 0xFF; + real.desc.base_top32 = (uint32_t)(fakep->offset64>>32); + } + + /* + * Now copy back over the fake structure. + */ + bcopy((void *) &real, (void *) fakep, sizeof(real)); + } +} + +#ifdef __i386__ void -cpu_desc_init( - cpu_data_t *cdp, - boolean_t is_boot_cpu) +cpu_desc_init(cpu_data_t *cdp) { - cpu_desc_table_t *cdt = cdp->cpu_desc_tablep; cpu_desc_index_t *cdi = &cdp->cpu_desc_index; - if (is_boot_cpu) { - /* - * Master CPU uses the tables built at boot time. - * Just set the index pointers to the high shared-mapping space. - * Note that the sysenter stack uses empty space above the ktss - * in the HIGH_FIXED_KTSS page. In this case we don't map the - * the real master_sstk in low memory. - */ - cdi->cdi_ktss = (struct i386_tss *) - pmap_index_to_virt(HIGH_FIXED_KTSS) ; - cdi->cdi_sstk = (vm_offset_t) (cdi->cdi_ktss + 1) + + if (cdp == &cpu_data_master) { + /* + * Fix up the entries in the GDT to point to + * this LDT and this TSS. + */ + struct fake_descriptor temp_fake_desc; + temp_fake_desc = ldt_desc_pattern; + temp_fake_desc.offset = (vm_offset_t) &master_ldt; + fix_desc(&temp_fake_desc, 1); + *(struct fake_descriptor *) &master_gdt[sel_idx(KERNEL_LDT)] = + temp_fake_desc; + *(struct fake_descriptor *) &master_gdt[sel_idx(USER_LDT)] = + temp_fake_desc; + + temp_fake_desc = tss_desc_pattern; + temp_fake_desc.offset = (vm_offset_t) &master_ktss; + fix_desc(&temp_fake_desc, 1); + *(struct fake_descriptor *) &master_gdt[sel_idx(KERNEL_TSS)] = + temp_fake_desc; + +#if MACH_KDB + temp_fake_desc = tss_desc_pattern; + temp_fake_desc.offset = (vm_offset_t) &master_dbtss; + fix_desc(&temp_fake_desc, 1); + *(struct fake_descriptor *) &master_gdt[sel_idx(DEBUG_TSS)] = + temp_fake_desc; +#endif + + temp_fake_desc = cpudata_desc_pattern; + temp_fake_desc.offset = (vm_offset_t) &cpu_data_master; + fix_desc(&temp_fake_desc, 1); + *(struct fake_descriptor *) &master_gdt[sel_idx(CPU_DATA_GS)] = + temp_fake_desc; + + fix_desc((void *)&master_idt, IDTSZ); + + cdi->cdi_idt.ptr = master_idt; + cdi->cdi_gdt.ptr = (void *)master_gdt; + + + /* + * Master CPU uses the tables built at boot time. + * Just set the index pointers to the high shared-mapping space. + * Note that the sysenter stack uses empty space above the ktss + * in the HIGH_FIXED_KTSS page. In this case we don't map the + * the real master_sstk in low memory. + */ + cdi->cdi_ktss = (struct i386_tss *) + pmap_index_to_virt(HIGH_FIXED_KTSS) ; + cdi->cdi_sstk = (vm_offset_t) (cdi->cdi_ktss + 1) + (vm_offset_t) &master_sstk.top - (vm_offset_t) &master_sstk; -#if MACH_KDB - cdi->cdi_dbtss = (struct i386_tss *) - pmap_index_to_virt(HIGH_FIXED_DBTSS); -#endif /* MACH_KDB */ - cdi->cdi_gdt = (struct fake_descriptor *) - pmap_index_to_virt(HIGH_FIXED_GDT); - cdi->cdi_idt = (struct fake_descriptor *) - pmap_index_to_virt(HIGH_FIXED_IDT); - cdi->cdi_ldt = (struct fake_descriptor *) - pmap_index_to_virt(HIGH_FIXED_LDT_BEGIN); } else { + cpu_desc_table_t *cdt = (cpu_desc_table_t *) cdp->cpu_desc_tablep; + + vm_offset_t cpu_hi_desc; + + cpu_hi_desc = pmap_cpu_high_shared_remap( + cdp->cpu_number, + HIGH_CPU_DESC, + (vm_offset_t) cdt, 1); + + /* + * Per-cpu GDT, IDT, LDT, KTSS descriptors are allocated in one + * block (cpu_desc_table) and double-mapped into high shared space + * in one page window. + * Also, a transient stack for the fast sysenter path. The top of + * which is set at context switch time to point to the PCB using + * the high address. + */ + cdi->cdi_gdt.ptr = (struct fake_descriptor *) (cpu_hi_desc + + offsetof(cpu_desc_table_t, gdt[0])); + cdi->cdi_idt.ptr = (struct fake_descriptor *) (cpu_hi_desc + + offsetof(cpu_desc_table_t, idt[0])); + cdi->cdi_ktss = (struct i386_tss *) (cpu_hi_desc + + offsetof(cpu_desc_table_t, ktss)); + cdi->cdi_sstk = cpu_hi_desc + offsetof(cpu_desc_table_t, sstk.top); - vm_offset_t cpu_hi_desc; - - cpu_hi_desc = pmap_cpu_high_shared_remap(cdp->cpu_number, - HIGH_CPU_DESC, - (vm_offset_t) cdt, 1); - - /* - * Per-cpu GDT, IDT, LDT, KTSS descriptors are allocated in one - * block (cpu_desc_table) and double-mapped into high shared space - * in one page window. - * Also, a transient stack for the fast sysenter path. The top of - * which is set at context switch time to point to the PCB using - * the high address. - */ - cdi->cdi_gdt = (struct fake_descriptor *) (cpu_hi_desc + - offsetof(cpu_desc_table_t, gdt[0])); - cdi->cdi_idt = (struct fake_descriptor *) (cpu_hi_desc + - offsetof(cpu_desc_table_t, idt[0])); - cdi->cdi_ktss = (struct i386_tss *) (cpu_hi_desc + - offsetof(cpu_desc_table_t, ktss)); - cdi->cdi_sstk = cpu_hi_desc + - offsetof(cpu_desc_table_t, sstk.top); - - /* - * LDT descriptors are mapped into a seperate area. - */ - cdi->cdi_ldt = (struct fake_descriptor *) + /* + * LDT descriptors are mapped into a seperate area. + */ + cdi->cdi_ldt = (struct fake_descriptor *) pmap_cpu_high_shared_remap( - cdp->cpu_number, - HIGH_CPU_LDT_BEGIN, - (vm_offset_t) cdp->cpu_ldtp, - HIGH_CPU_LDT_END - HIGH_CPU_LDT_BEGIN + 1); - - /* - * Copy the tables - */ - bcopy((char *)master_idt, - (char *)cdt->idt, - sizeof(master_idt)); - bcopy((char *)master_gdt, - (char *)cdt->gdt, - sizeof(master_gdt)); - bcopy((char *)master_ldt, - (char *)cdp->cpu_ldtp, - sizeof(master_ldt)); - bzero((char *)&cdt->ktss, - sizeof(struct i386_tss)); + cdp->cpu_number, + HIGH_CPU_LDT_BEGIN, + (vm_offset_t) cdp->cpu_ldtp, + HIGH_CPU_LDT_END - HIGH_CPU_LDT_BEGIN + 1); + /* + * Copy the tables + */ + bcopy((char *)master_idt, (char *)cdt->idt, sizeof(master_idt)); + bcopy((char *)master_gdt, (char *)cdt->gdt, sizeof(master_gdt)); + bcopy((char *)master_ldt, (char *)cdp->cpu_ldtp, sizeof(master_ldt)); + bzero((char *)&cdt->ktss, sizeof(struct i386_tss)); #if MACH_KDB - cdi->cdi_dbtss = (struct i386_tss *) (cpu_hi_desc + + cdi->cdi_dbtss = (struct i386_tss *) (cpu_hi_desc + offsetof(cpu_desc_table_t, dbtss)); - bcopy((char *)&master_dbtss, - (char *)&cdt->dbtss, - sizeof(struct i386_tss)); + bcopy((char *)&master_dbtss, + (char *)&cdt->dbtss, + sizeof(struct i386_tss)); #endif /* MACH_KDB */ - /* - * Fix up the entries in the GDT to point to - * this LDT and this TSS. - */ - cdt->gdt[sel_idx(KERNEL_LDT)] = ldt_desc_pattern; - cdt->gdt[sel_idx(KERNEL_LDT)].offset = (vm_offset_t) cdi->cdi_ldt; - fix_desc(&cdt->gdt[sel_idx(KERNEL_LDT)], 1); + /* + * Fix up the entries in the GDT to point to + * this LDT and this TSS. + */ + struct fake_descriptor temp_ldt = ldt_desc_pattern; + temp_ldt.offset = (vm_offset_t)cdi->cdi_ldt; + fix_desc(&temp_ldt, 1); - cdt->gdt[sel_idx(USER_LDT)] = ldt_desc_pattern; - cdt->gdt[sel_idx(USER_LDT)].offset = (vm_offset_t) cdi->cdi_ldt; - fix_desc(&cdt->gdt[sel_idx(USER_LDT)], 1); + cdt->gdt[sel_idx(KERNEL_LDT)] = temp_ldt; + cdt->gdt[sel_idx(USER_LDT)] = temp_ldt; - cdt->gdt[sel_idx(KERNEL_TSS)] = tss_desc_pattern; - cdt->gdt[sel_idx(KERNEL_TSS)].offset = (vm_offset_t) cdi->cdi_ktss; - fix_desc(&cdt->gdt[sel_idx(KERNEL_TSS)], 1); + cdt->gdt[sel_idx(KERNEL_TSS)] = tss_desc_pattern; + cdt->gdt[sel_idx(KERNEL_TSS)].offset = (vm_offset_t) cdi->cdi_ktss; + fix_desc(&cdt->gdt[sel_idx(KERNEL_TSS)], 1); - cdt->gdt[sel_idx(CPU_DATA_GS)] = cpudata_desc_pattern; - cdt->gdt[sel_idx(CPU_DATA_GS)].offset = (vm_offset_t) cdp; - fix_desc(&cdt->gdt[sel_idx(CPU_DATA_GS)], 1); + cdt->gdt[sel_idx(CPU_DATA_GS)] = cpudata_desc_pattern; + cdt->gdt[sel_idx(CPU_DATA_GS)].offset = (vm_offset_t) cdp; + fix_desc(&cdt->gdt[sel_idx(CPU_DATA_GS)], 1); #if MACH_KDB - cdt->gdt[sel_idx(DEBUG_TSS)] = tss_desc_pattern; - cdt->gdt[sel_idx(DEBUG_TSS)].offset = (vm_offset_t) cdi->cdi_dbtss; - fix_desc(&cdt->gdt[sel_idx(DEBUG_TSS)], 1); - - cdt->dbtss.esp0 = (int)(db_task_stack_store + - (INTSTACK_SIZE * (cdp->cpu_number)) - sizeof (natural_t)); - cdt->dbtss.esp = cdt->dbtss.esp0; - cdt->dbtss.eip = (int)&db_task_start; + cdt->gdt[sel_idx(DEBUG_TSS)] = tss_desc_pattern; + cdt->gdt[sel_idx(DEBUG_TSS)].offset = (vm_offset_t) cdi->cdi_dbtss; + fix_desc(&cdt->gdt[sel_idx(DEBUG_TSS)], 1); + + cdt->dbtss.esp0 = (int)(db_task_stack_store + + (INTSTACK_SIZE * (cdp->cpu_number)) - sizeof (natural_t)); + cdt->dbtss.esp = cdt->dbtss.esp0; + cdt->dbtss.eip = (int)&db_task_start; #endif /* MACH_KDB */ - cdt->ktss.ss0 = KERNEL_DS; - cdt->ktss.io_bit_map_offset = 0x0FFF; /* no IO bitmap */ + cdt->ktss.ss0 = KERNEL_DS; + cdt->ktss.io_bit_map_offset = 0x0FFF; /* no IO bitmap */ - cpu_userwindow_init(cdp->cpu_number); - cpu_physwindow_init(cdp->cpu_number); + cpu_userwindow_init(cdp->cpu_number); + cpu_physwindow_init(cdp->cpu_number); } - } +#endif /* __i386__ */ void -cpu_desc_init64( - cpu_data_t *cdp, - boolean_t is_boot_cpu) +cpu_desc_init64(cpu_data_t *cdp) { - cpu_desc_table64_t *cdt = (cpu_desc_table64_t *) - cdp->cpu_desc_tablep; cpu_desc_index_t *cdi = &cdp->cpu_desc_index; - if (is_boot_cpu) { + if (cdp == &cpu_data_master) { /* * Master CPU uses the tables built at boot time. * Just set the index pointers to the low memory space. * Note that in 64-bit mode these are addressed in the * double-mapped window (uber-space). */ - cdi->cdi_ktss = (struct i386_tss *) &master_ktss64; + cdi->cdi_ktss = (void *)&master_ktss64; cdi->cdi_sstk = (vm_offset_t) &master_sstk.top; - cdi->cdi_gdt = master_gdt; - cdi->cdi_idt = (struct fake_descriptor *) &master_idt64; - cdi->cdi_ldt = (struct fake_descriptor *) &master_ldt; + cdi->cdi_gdt.ptr = (void *)master_gdt; + cdi->cdi_idt.ptr = (void *)master_idt64; + cdi->cdi_ldt = (struct fake_descriptor *) master_ldt; + - /* Replace the expanded LDT and TSS slots in the GDT: */ + /* Replace the expanded LDTs and TSS slots in the GDT */ + kernel_ldt_desc64.offset64 = UBER64(&master_ldt); *(struct fake_descriptor64 *) &master_gdt[sel_idx(KERNEL_LDT)] = kernel_ldt_desc64; + *(struct fake_descriptor64 *) &master_gdt[sel_idx(USER_LDT)] = + kernel_ldt_desc64; + kernel_tss_desc64.offset64 = UBER64(&master_ktss64); *(struct fake_descriptor64 *) &master_gdt[sel_idx(KERNEL_TSS)] = kernel_tss_desc64; - /* - * Fix up the expanded descriptors for 64-bit. - */ + /* Fix up the expanded descriptors for 64-bit. */ fix_desc64((void *) &master_idt64, IDTSZ); fix_desc64((void *) &master_gdt[sel_idx(KERNEL_LDT)], 1); + fix_desc64((void *) &master_gdt[sel_idx(USER_LDT)], 1); fix_desc64((void *) &master_gdt[sel_idx(KERNEL_TSS)], 1); /* * Set the double-fault stack as IST1 in the 64-bit TSS */ - master_ktss64.ist1 = UBER64(df_task_stack_end); + master_ktss64.ist1 = UBER64((uintptr_t) df_task_stack_end); } else { + cpu_desc_table64_t *cdt = (cpu_desc_table64_t *) cdp->cpu_desc_tablep; /* * Per-cpu GDT, IDT, KTSS descriptors are allocated in kernel - * heap (cpu_desc_table) and double-mapped in uber-space - * (over 4GB). + * heap (cpu_desc_table) . + * On K32 they're double-mapped in uber-space (over 4GB). * LDT descriptors are mapped into a separate area. */ - cdi->cdi_gdt = (struct fake_descriptor *)cdt->gdt; - cdi->cdi_idt = (struct fake_descriptor *)cdt->idt; - cdi->cdi_ktss = (struct i386_tss *)&cdt->ktss; + cdi->cdi_gdt.ptr = (struct fake_descriptor *)cdt->gdt; + cdi->cdi_idt.ptr = (void *)cdt->idt; + cdi->cdi_ktss = (void *)&cdt->ktss; cdi->cdi_sstk = (vm_offset_t)&cdt->sstk.top; cdi->cdi_ldt = cdp->cpu_ldtp; /* * Copy the tables */ - bcopy((char *)master_idt64, - (char *)cdt->idt, - sizeof(master_idt64)); - bcopy((char *)master_gdt, - (char *)cdt->gdt, - sizeof(master_gdt)); - bcopy((char *)master_ldt, - (char *)cdp->cpu_ldtp, - sizeof(master_ldt)); - bcopy((char *)&master_ktss64, - (char *)&cdt->ktss, - sizeof(struct x86_64_tss)); + bcopy((char *)master_idt64, (char *)cdt->idt, sizeof(master_idt64)); + bcopy((char *)master_gdt, (char *)cdt->gdt, sizeof(master_gdt)); + bcopy((char *)master_ldt, (char *)cdp->cpu_ldtp, sizeof(master_ldt)); + bcopy((char *)&master_ktss64, (char *)&cdt->ktss, sizeof(struct x86_64_tss)); /* * Fix up the entries in the GDT to point to * this LDT and this TSS. */ - kernel_ldt_desc64.offset[0] = (vm_offset_t) cdi->cdi_ldt; + kernel_ldt_desc64.offset64 = UBER64(cdi->cdi_ldt); *(struct fake_descriptor64 *) &cdt->gdt[sel_idx(KERNEL_LDT)] = kernel_ldt_desc64; fix_desc64(&cdt->gdt[sel_idx(KERNEL_LDT)], 1); - kernel_ldt_desc64.offset[0] = (vm_offset_t) cdi->cdi_ldt; + kernel_ldt_desc64.offset64 = UBER64(cdi->cdi_ldt); *(struct fake_descriptor64 *) &cdt->gdt[sel_idx(USER_LDT)] = kernel_ldt_desc64; fix_desc64(&cdt->gdt[sel_idx(USER_LDT)], 1); - kernel_tss_desc64.offset[0] = (vm_offset_t) cdi->cdi_ktss; + kernel_tss_desc64.offset64 = UBER64(cdi->cdi_ktss); *(struct fake_descriptor64 *) &cdt->gdt[sel_idx(KERNEL_TSS)] = kernel_tss_desc64; fix_desc64(&cdt->gdt[sel_idx(KERNEL_TSS)], 1); + /* Set double-fault stack as IST1 */ + cdt->ktss.ist1 = UBER64((unsigned long)cdt->dfstk + sizeof(cdt->dfstk)); +#ifdef __i386__ cdt->gdt[sel_idx(CPU_DATA_GS)] = cpudata_desc_pattern; cdt->gdt[sel_idx(CPU_DATA_GS)].offset = (vm_offset_t) cdp; fix_desc(&cdt->gdt[sel_idx(CPU_DATA_GS)], 1); - /* Set double-fault stack as IST1 */ - cdt->ktss.ist1 = UBER64((unsigned long)cdt->dfstk - + sizeof(cdt->dfstk)); - - /* - * Allocate copyio windows. - */ + /* Allocate copyio windows */ cpu_userwindow_init(cdp->cpu_number); cpu_physwindow_init(cdp->cpu_number); +#endif } /* Require that the top of the sysenter stack is 16-byte aligned */ @@ -426,16 +659,94 @@ cpu_desc_init64( panic("cpu_desc_init64() sysenter stack not 16-byte aligned"); } +#ifdef __i386__ +void +cpu_desc_load(cpu_data_t *cdp) +{ + cpu_desc_index_t *cdi = &cdp->cpu_desc_index; + + cdi->cdi_idt.size = 0x1000 + cdp->cpu_number; + cdi->cdi_gdt.size = sizeof(struct real_descriptor)*GDTSZ - 1; + + lgdt((unsigned long *) &cdi->cdi_gdt); + lidt((unsigned long *) &cdi->cdi_idt); + lldt(KERNEL_LDT); + + set_tr(KERNEL_TSS); + + __asm__ volatile("mov %0, %%gs" : : "rm" ((unsigned short)(CPU_DATA_GS))); +} +#endif /* __i386__ */ + +void +cpu_desc_load64(cpu_data_t *cdp) +{ + cpu_desc_index_t *cdi = &cdp->cpu_desc_index; + +#ifdef __i386__ + /* + * Load up the new descriptors etc + * ml_load_desc64() expects these global pseudo-descriptors: + * gdtptr64 -> per-cpu gdt + * idtptr64 -> per-cpu idt + * These are 10-byte descriptors with 64-bit addresses into + * uber-space. + * + * Refer to commpage/cpu_number.s for the IDT limit trick. + */ + gdtptr64.length = GDTSZ * sizeof(struct real_descriptor64) - 1; + gdtptr64.offset[0] = (uint32_t) cdi->cdi_gdt.ptr; + gdtptr64.offset[1] = KERNEL_UBER_BASE_HI32; + idtptr64.length = 0x1000 + cdp->cpu_number; + idtptr64.offset[0] = (uint32_t) cdi->cdi_idt.ptr; + idtptr64.offset[1] = KERNEL_UBER_BASE_HI32; + + /* Make sure busy bit is cleared in the TSS */ + gdt_desc_p(KERNEL_TSS)->access &= ~ACC_TSS_BUSY; + + ml_load_desc64(); +#else + /* Load the GDT, LDT, IDT and TSS */ + cdi->cdi_gdt.size = sizeof(struct real_descriptor64)*GDTSZ - 1; + cdi->cdi_idt.size = 0x1000 + cdp->cpu_number; + lgdt((unsigned long *) &cdi->cdi_gdt); + lidt((unsigned long *) &cdi->cdi_idt); + lldt(KERNEL_LDT); + set_tr(KERNEL_TSS); + + /* Stuff the pre-cpu data area into the MSR and swapgs to activate */ + wrmsr64(MSR_IA32_KERNEL_GS_BASE, (unsigned long)cdp); +#if GPROF // Hack to enable mcount to work on K64 + __asm__ volatile("mov %0, %%gs" : : "rm" ((unsigned short)(KERNEL_DS))); +#endif + swapgs(); + + cpu_mode_init(cdp); +#endif +} + +#ifdef __i386__ +/* + * Set MSRs for sysenter/sysexit for 32-bit. + */ +static void +fast_syscall_init(__unused cpu_data_t *cdp) +{ + wrmsr(MSR_IA32_SYSENTER_CS, SYSENTER_CS, 0); + wrmsr(MSR_IA32_SYSENTER_EIP, HI_TEXT(hi_sysenter), 0); + wrmsr(MSR_IA32_SYSENTER_ESP, current_sstk(), 0); +} +#endif + /* - * Set MSRs for sysenter/sysexit for 64-bit. + * Set MSRs for sysenter/sysexit and syscall/sysret for 64-bit. */ static void -fast_syscall_init64(void) +fast_syscall_init64(__unused cpu_data_t *cdp) { wrmsr64(MSR_IA32_SYSENTER_CS, SYSENTER_CS); - wrmsr64(MSR_IA32_SYSENTER_EIP, UBER64(hi64_sysenter)); + wrmsr64(MSR_IA32_SYSENTER_EIP, UBER64((uintptr_t) hi64_sysenter)); wrmsr64(MSR_IA32_SYSENTER_ESP, UBER64(current_sstk())); - /* Enable syscall/sysret */ wrmsr64(MSR_IA32_EFER, rdmsr64(MSR_IA32_EFER) | MSR_IA32_EFER_SCE); @@ -444,9 +755,9 @@ fast_syscall_init64(void) * Note USER_CS because sysret uses this + 16 when returning to * 64-bit code. */ - wrmsr64(MSR_IA32_LSTAR, UBER64(hi64_syscall)); - wrmsr64(MSR_IA32_STAR, (((uint64_t)USER_CS) << 48) | - (((uint64_t)KERNEL64_CS) << 32)); + wrmsr64(MSR_IA32_LSTAR, UBER64((uintptr_t) hi64_syscall)); + wrmsr64(MSR_IA32_STAR, (((uint64_t)USER_CS) << 48) | + (((uint64_t)KERNEL64_CS) << 32)); /* * Emulate eflags cleared by sysenter but note that * we also clear the trace trap to avoid the complications @@ -456,28 +767,18 @@ fast_syscall_init64(void) */ wrmsr64(MSR_IA32_FMASK, EFL_DF|EFL_IF|EFL_TF|EFL_NT); +#ifdef __i386__ /* * Set the Kernel GS base MSR to point to per-cpu data in uber-space. * The uber-space handler (hi64_syscall) uses the swapgs instruction. */ - wrmsr64(MSR_IA32_KERNEL_GS_BASE, - UBER64((unsigned long)current_cpu_datap())); + wrmsr64(MSR_IA32_KERNEL_GS_BASE, UBER64(cdp)); #if ONLY_SAFE_FOR_LINDA_SERIAL kprintf("fast_syscall_init64() KERNEL_GS_BASE=0x%016llx\n", - rdmsr64(MSR_IA32_KERNEL_GS_BASE)); + rdmsr64(MSR_IA32_KERNEL_GS_BASE)); +#endif #endif -} - -/* - * Set MSRs for sysenter/sysexit - */ -static void -fast_syscall_init(void) -{ - wrmsr(MSR_IA32_SYSENTER_CS, SYSENTER_CS, 0); - wrmsr(MSR_IA32_SYSENTER_EIP, HI_TEXT(hi_sysenter), 0); - wrmsr(MSR_IA32_SYSENTER_ESP, current_sstk(), 0); } cpu_data_t * @@ -490,26 +791,21 @@ cpu_data_alloc(boolean_t is_boot_cpu) assert(real_ncpus == 1); cdp = &cpu_data_master; if (cdp->cpu_processor == NULL) { - simple_lock_init(&cpu_lock, 0); + simple_lock_init(&ncpus_lock, 0); cdp->cpu_processor = cpu_processor_alloc(TRUE); +#if NCOPY_WINDOWS > 0 cdp->cpu_pmap = pmap_cpu_alloc(TRUE); - cpu_desc_init(cdp, TRUE); - fast_syscall_init(); +#endif queue_init(&cdp->rtclock_timer.queue); cdp->rtclock_timer.deadline = EndOfAllTime; } return cdp; } - /* Check count before making allocations */ - if (real_ncpus >= max_ncpus) - return NULL; - /* * Allocate per-cpu data: */ - ret = kmem_alloc(kernel_map, - (vm_offset_t *) &cdp, sizeof(cpu_data_t)); + ret = kmem_alloc(kernel_map, (vm_offset_t *) &cdp, sizeof(cpu_data_t)); if (ret != KERN_SUCCESS) { printf("cpu_data_alloc() failed, ret=%d\n", ret); goto abort; @@ -533,6 +829,7 @@ cpu_data_alloc(boolean_t is_boot_cpu) bzero((void*) cdp->cpu_int_stack_top, INTSTACK_SIZE); cdp->cpu_int_stack_top += INTSTACK_SIZE; + /* * Allocate descriptor table: * Size depends on cpu mode. @@ -557,18 +854,17 @@ cpu_data_alloc(boolean_t is_boot_cpu) goto abort; } +#if CONFIG_MCA /* Machine-check shadow register allocation. */ mca_cpu_alloc(cdp); +#endif + + simple_lock(&ncpus_lock); - simple_lock(&cpu_lock); - if (real_ncpus >= max_ncpus) { - simple_unlock(&cpu_lock); - goto abort; - } cpu_data_ptr[real_ncpus] = cdp; cdp->cpu_number = real_ncpus; real_ncpus++; - simple_unlock(&cpu_lock); + simple_unlock(&ncpus_lock); cdp->cpu_nanotime = &rtc_nanotime_info; queue_init(&cdp->rtclock_timer.queue); @@ -576,9 +872,9 @@ cpu_data_alloc(boolean_t is_boot_cpu) kprintf("cpu_data_alloc(%d) %p desc_table: %p " "ldt: %p " - "int_stack: 0x%x-0x%x\n", + "int_stack: 0x%lx-0x%lx\n", cdp->cpu_number, cdp, cdp->cpu_desc_tablep, cdp->cpu_ldtp, - cdp->cpu_int_stack_top - INTSTACK_SIZE, cdp->cpu_int_stack_top); + (long)(cdp->cpu_int_stack_top - INTSTACK_SIZE), (long)(cdp->cpu_int_stack_top)); return cdp; @@ -597,44 +893,45 @@ cpu_data_alloc(boolean_t is_boot_cpu) boolean_t valid_user_segment_selectors(uint16_t cs, - uint16_t ss, - uint16_t ds, - uint16_t es, - uint16_t fs, - uint16_t gs) + uint16_t ss, + uint16_t ds, + uint16_t es, + uint16_t fs, + uint16_t gs) { return valid_user_code_selector(cs) && - valid_user_stack_selector(ss) && - valid_user_data_selector(ds) && - valid_user_data_selector(es) && - valid_user_data_selector(fs) && - valid_user_data_selector(gs); + valid_user_stack_selector(ss) && + valid_user_data_selector(ds) && + valid_user_data_selector(es) && + valid_user_data_selector(fs) && + valid_user_data_selector(gs); } +#if NCOPY_WINDOWS > 0 + static vm_offset_t user_window_base = 0; void cpu_userwindow_init(int cpu) { cpu_data_t *cdp = cpu_data_ptr[cpu]; - cpu_desc_index_t *cdi = &cdp->cpu_desc_index; - vm_offset_t user_window; - vm_offset_t vaddr; + vm_offset_t user_window; + vm_offset_t vaddr; int num_cpus; num_cpus = ml_get_max_cpus(); if (cpu >= num_cpus) - panic("cpu_userwindow_init: cpu > num_cpus"); + panic("cpu_userwindow_init: cpu > num_cpus"); if (user_window_base == 0) { - if (vm_allocate(kernel_map, &vaddr, - (NBPDE * NCOPY_WINDOWS * num_cpus) + NBPDE, - VM_FLAGS_ANYWHERE) != KERN_SUCCESS) - panic("cpu_userwindow_init: " - "couldn't allocate user map window"); + if (vm_allocate(kernel_map, &vaddr, + (NBPDE * NCOPY_WINDOWS * num_cpus) + NBPDE, + VM_FLAGS_ANYWHERE) != KERN_SUCCESS) + panic("cpu_userwindow_init: " + "couldn't allocate user map window"); /* * window must start on a page table boundary @@ -660,23 +957,24 @@ cpu_userwindow_init(int cpu) user_window); } - user_window = user_window_base + (cpu * NCOPY_WINDOWS * NBPDE); + user_window = user_window_base + (cpu * NCOPY_WINDOWS * NBPDE); cdp->cpu_copywindow_base = user_window; cdp->cpu_copywindow_pdp = pmap_pde(kernel_pmap, user_window); - cdi->cdi_gdt[sel_idx(USER_WINDOW_SEL)] = userwindow_desc_pattern; - cdi->cdi_gdt[sel_idx(USER_WINDOW_SEL)].offset = user_window; - - fix_desc(&cdi->cdi_gdt[sel_idx(USER_WINDOW_SEL)], 1); +#ifdef __i386__ + cpu_desc_index_t *cdi = &cdp->cpu_desc_index; + cdi->cdi_gdt.ptr[sel_idx(USER_WINDOW_SEL)] = userwindow_desc_pattern; + cdi->cdi_gdt.ptr[sel_idx(USER_WINDOW_SEL)].offset = user_window; + fix_desc(&cdi->cdi_gdt.ptr[sel_idx(USER_WINDOW_SEL)], 1); +#endif /* __i386__ */ } void cpu_physwindow_init(int cpu) { cpu_data_t *cdp = cpu_data_ptr[cpu]; - cpu_desc_index_t *cdi = &cdp->cpu_desc_index; vm_offset_t phys_window = cdp->cpu_physwindow_base; if (phys_window == 0) { @@ -696,67 +994,35 @@ cpu_physwindow_init(int cpu) cdp->cpu_physwindow_base = phys_window; cdp->cpu_physwindow_ptep = vtopte(phys_window); } +#ifdef __i386__ + cpu_desc_index_t *cdi = &cdp->cpu_desc_index; + cdi->cdi_gdt.ptr[sel_idx(PHYS_WINDOW_SEL)] = physwindow_desc_pattern; + cdi->cdi_gdt.ptr[sel_idx(PHYS_WINDOW_SEL)].offset = phys_window; - cdi->cdi_gdt[sel_idx(PHYS_WINDOW_SEL)] = physwindow_desc_pattern; - cdi->cdi_gdt[sel_idx(PHYS_WINDOW_SEL)].offset = phys_window; - - fix_desc(&cdi->cdi_gdt[sel_idx(PHYS_WINDOW_SEL)], 1); + fix_desc(&cdi->cdi_gdt.ptr[sel_idx(PHYS_WINDOW_SEL)], 1); +#endif /* __i386__ */ } +#endif /* NCOPY_WINDOWS > 0 */ - -typedef struct { - uint16_t length; - uint32_t offset[2]; -} __attribute__((__packed__)) table_descriptor64_t; - -extern table_descriptor64_t gdtptr64; -extern table_descriptor64_t idtptr64; /* * Load the segment descriptor tables for the current processor. */ -void -cpu_desc_load64(cpu_data_t *cdp) -{ - cpu_desc_index_t *cdi = &cdp->cpu_desc_index; - - /* - * Load up the new descriptors etc - * ml_load_desc64() expects these global pseudo-descriptors: - * gdtptr64 -> master_gdt - * idtptr64 -> master_idt64 - * These are 10-byte descriptors with 64-bit addresses into - * uber-space. - */ - gdtptr64.length = sizeof(master_gdt) - 1; - gdtptr64.offset[0] = (uint32_t) cdi->cdi_gdt; - gdtptr64.offset[1] = KERNEL_UBER_BASE_HI32; - idtptr64.length = sizeof(master_idt64) - 1; - idtptr64.offset[0] = (uint32_t) cdi->cdi_idt; - idtptr64.offset[1] = KERNEL_UBER_BASE_HI32; - - /* Make sure busy bit is cleared in the TSS */ - gdt_desc_p(KERNEL_TSS)->access &= ~ACC_TSS_BUSY; - - ml_load_desc64(); - -#if ONLY_SAFE_FOR_LINDA_SERIAL - kprintf("64-bit descriptor tables loaded\n"); -#endif -} - void cpu_mode_init(cpu_data_t *cdp) { +#ifdef __i386__ if (cpu_mode_is64bit()) { cpu_IA32e_enable(cdp); cpu_desc_load64(cdp); - fast_syscall_init64(); + fast_syscall_init64(cdp); } else { - fast_syscall_init(); + fast_syscall_init(cdp); } +#else + fast_syscall_init64(cdp); +#endif /* Call for per-cpu pmap mode initialization */ pmap_cpu_init(); - } diff --git a/osfmk/i386/mp_desc.h b/osfmk/i386/mp_desc.h index d9ada40cb..14d186eb1 100644 --- a/osfmk/i386/mp_desc.h +++ b/osfmk/i386/mp_desc.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2006 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2009 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -99,8 +99,8 @@ typedef struct cpu_desc_table64 { uint8_t dfstk[PAGE_SIZE] __attribute__ ((aligned (16))); } cpu_desc_table64_t; -#define current_gdt() (current_cpu_datap()->cpu_desc_index.cdi_gdt) -#define current_idt() (current_cpu_datap()->cpu_desc_index.cdi_idt) +#define current_gdt() (current_cpu_datap()->cpu_desc_index.cdi_gdt.ptr) +#define current_idt() (current_cpu_datap()->cpu_desc_index.cdi_idt.ptr) #define current_ldt() (current_cpu_datap()->cpu_desc_index.cdi_ldt) #define current_ktss() (current_cpu_datap()->cpu_desc_index.cdi_ktss) #define current_dbtss() (current_cpu_datap()->cpu_desc_index.cdi_dbtss) @@ -110,20 +110,16 @@ typedef struct cpu_desc_table64 { #define current_sstk64() ((addr64_t *) current_sstk()) #define gdt_desc_p(sel) \ - ((struct real_descriptor *)¤t_gdt()[sel_idx(sel)]) + (&((struct real_descriptor *)current_gdt())[sel_idx(sel)]) #define ldt_desc_p(sel) \ - ((struct real_descriptor *)¤t_ldt()[sel_idx(sel)]) - -extern void cpu_desc_init( - cpu_data_t *cdp, - boolean_t is_boot_cpu); -extern void cpu_desc_init64( - cpu_data_t *cdp, - boolean_t is_boot_cpu); -extern void cpu_desc_load64( - cpu_data_t *cdp); -extern void cpu_mode_init( - cpu_data_t *cdp); + (&((struct real_descriptor *)current_ldt())[sel_idx(sel)]) + +extern void cpu_mode_init(cpu_data_t *cdp); + +extern void cpu_desc_init(cpu_data_t *cdp); +extern void cpu_desc_init64(cpu_data_t *cdp); +extern void cpu_desc_load(cpu_data_t *cdp); +extern void cpu_desc_load64(cpu_data_t *cdp); static inline boolean_t valid_user_data_selector(uint16_t selector) @@ -193,4 +189,4 @@ valid_user_segment_selectors(uint16_t cs, __END_DECLS -#endif /* _I386_MP_DESC_H_ */ +#endif /* _X86_64_MP_DESC_H_ */ diff --git a/osfmk/i386/mp_slave_boot.s b/osfmk/i386/mp_slave_boot.s deleted file mode 100644 index 15a9d811c..000000000 --- a/osfmk/i386/mp_slave_boot.s +++ /dev/null @@ -1,228 +0,0 @@ -/* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ - * - * This file contains Original Code and/or Modifications of Original Code - * as defined in and that are subject to the Apple Public Source License - * Version 2.0 (the 'License'). You may not use this file except in - * compliance with the License. The rights granted to you under the License - * may not be used to create, or enable the creation or redistribution of, - * unlawful or unlicensed copies of an Apple operating system, or to - * circumvent, violate, or enable the circumvention or violation of, any - * terms of an Apple operating system software license agreement. - * - * Please obtain a copy of the License at - * http://www.opensource.apple.com/apsl/ and read it before using this file. - * - * The Original Code and all software distributed under the License are - * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER - * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, - * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. - * Please see the License for the specific language governing rights and - * limitations under the License. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ - */ -/* - * @OSF_COPYRIGHT@ - */ - -/* - * Mach Operating System - * Copyright (c) 1991,1990 Carnegie Mellon University - * All Rights Reserved. - * - * Permission to use, copy, modify and distribute this software and its - * documentation is hereby granted, provided that both the copyright - * notice and this permission notice appear in all copies of the - * software, derivative works or modified versions, and any portions - * thereof, and that both notices appear in supporting documentation. - * - * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" - * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR - * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. - * - * Carnegie Mellon requests users of this software to return to - * - * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU - * School of Computer Science - * Carnegie Mellon University - * Pittsburgh PA 15213-3890 - * - * any improvements or extensions that they make and grant Carnegie Mellon - * the rights to redistribute these changes. - */ - -#include -#include -#include - -#define CR0_PE_ON 0x1 -#define CR0_PE_OFF 0xfffffffe - - .file "slave_boot.s" - - .text - .align 12 // Page align for single bcopy_phys() - -#define operand_size_prefix .byte 0x66 -#define address_size_prefix .byte 0x67 - -#define LJMP(segment,address) \ - operand_size_prefix ;\ - .byte 0xea ;\ - .long address-EXT(slave_boot_base) ;\ - .word segment - -#define LGDT(address) \ - address_size_prefix ;\ - operand_size_prefix ;\ - .word 0x010f ;\ - .byte 0x15 ;\ - .long address-EXT(slave_boot_base) - -Entry(slave_boot_base) - /* code is loaded at 0x0:0x1000 */ - /* ljmp to the next instruction to set up %cs */ - LJMP(MP_BOOTSEG, EXT(slave_rstart)) - -Entry(slave_rstart) - /* set up %ds */ - mov %cs, %ax - mov %ax, %ds - - POSTCODE(SLAVE_RSTART_ENTRY); - - /* set up %ss and %esp */ - mov %cs, %ax - mov %ax, %ss - mov $(MP_BOOTSTACK), %esp - - /*set up %es */ - mov %ax, %es - - /* change to protected mode */ - operand_size_prefix - call EXT(slave_real_to_prot) - - push MP_MACH_START - call EXT(slave_startprog) - -/* - slave_real_to_prot() - transfer from real mode to protected mode. -*/ - -Entry(slave_real_to_prot) - /* guarantee that interrupt is disabled when in prot mode */ - cli - - POSTCODE(SLAVE_REAL_TO_PROT_ENTRY); - - /* load the gdtr */ - LGDT(EXT(gdtr)) - - /* load the gdtr */ - /* set the PE bit of CR0 */ - mov %cr0, %eax - or $(CR0_PE_ON), %eax - mov %eax, %cr0 - - /* make intrasegment jump to flush the processor pipeline and */ - /* reload CS register */ - LJMP(0x08, xprot) -xprot: - - /* we are in USE32 mode now */ - /* set up the protective mode segment registers : DS, SS, ES */ - mov $0x10, %eax - movw %ax, %ds - movw %ax, %ss - movw %ax, %es - - POSTCODE(SLAVE_REAL_TO_PROT_EXIT); - - ret - -/* - slave_startprog(phyaddr) - start the program on protected mode where phyaddr is the entry point -*/ - -Entry(slave_startprog) - push %ebp - movl %esp, %ebp - - POSTCODE(SLAVE_STARTPROG_ENTRY); - - movl 0x8(%ebp), %ecx /* entry offset */ - movl $0x28, %ebx /* segment */ - push %ebx - push %ecx - - /* set up %ds and %es */ - movl $0x20, %ebx - movw %bx, %ds - movw %bx, %es - - POSTCODE(SLAVE_STARTPROG_EXIT); - - lret - - - . = MP_BOOTGDT-MP_BOOT /* GDT location */ -Entry(Gdt) - -/* Segment Descriptor - * - * 31 24 19 16 7 0 - * ------------------------------------------------------------ - * | | |B| |A| | | |1|0|E|W|A| | - * | BASE 31..24 |G|/|0|V| LIMIT |P|DPL| TYPE | BASE 23:16 | - * | | |D| |L| 19..16| | |1|1|C|R|A| | - * ------------------------------------------------------------ - * | | | - * | BASE 15..0 | LIMIT 15..0 | - * | | | - * ------------------------------------------------------------ - */ - .word 0,0 /* 0x0 : null */ - .byte 0,0,0,0 - - .word 0xffff,MP_BOOT /* 0x8 : boot code */ - .byte 0,0x9e,0xcf,0 - - .word 0xffff,MP_BOOT /* 0x10 : boot data */ - .byte 0,0x92,0xcf,0 - - .word 0xffff,MP_BOOT /* 0x18 : boot code, 16 bits */ - .byte 0,0x9e,0x0,0 - - .word 0xffff,0 /* 0x20 : init data */ - .byte 0,0x9f,0xcf,0 - - .word 0xffff,0 /* 0x28 : init code */ - .byte 0,0x9f,0xcf,0 - -Entry(gdtr) - .short 48 /* limit (8*6 segs) */ - .short MP_BOOTGDT /* base low */ - .short 0 /* base high */ - -Entry(slave_boot_end) - - - - - - - - - - - - - - diff --git a/osfmk/i386/mtrr.c b/osfmk/i386/mtrr.c index cd818d511..9129f40c6 100644 --- a/osfmk/i386/mtrr.c +++ b/osfmk/i386/mtrr.c @@ -30,8 +30,8 @@ #include #include #include -#include #include +#include #include #include @@ -106,7 +106,7 @@ static int var_range_overlap(mtrr_var_range_t * range, addr64_t address, static uint64_t mtrr_phys_mask = PHYS_BITS_TO_MASK(36); #define IA32_MTRR_PHYMASK_VALID 0x0000000000000800ULL -#define IA32_MTRR_PHYSBASE_MASK (mtrr_phys_mask & ~0xFFF) +#define IA32_MTRR_PHYSBASE_MASK (mtrr_phys_mask & ~0x0000000000000FFFULL) #define IA32_MTRR_PHYSBASE_TYPE 0x00000000000000FFULL /* @@ -186,6 +186,42 @@ mtrr_set_fix_ranges(const struct mtrr_fix_range * range) wrmsr64(MSR_IA32_MTRR_FIX4K_C0000 + i, range[3 + i].types); } +static boolean_t +mtrr_check_fix_ranges(const struct mtrr_fix_range * range) +{ + int i; + boolean_t match = TRUE; + + DBG("CPU%d: %s\n", get_cpu_number(), __FUNCTION__); + + /* assume 11 fix range registers */ + match = range[0].types == rdmsr64(MSR_IA32_MTRR_FIX64K_00000) && + range[1].types == rdmsr64(MSR_IA32_MTRR_FIX16K_80000) && + range[2].types == rdmsr64(MSR_IA32_MTRR_FIX16K_A0000); + for (i = 0; match && i < 8; i++) { + match = range[3 + i].types == + rdmsr64(MSR_IA32_MTRR_FIX4K_C0000 + i); + } + + return match; +} + +static boolean_t +mtrr_check_var_ranges(mtrr_var_range_t * range, int count) +{ + int i; + boolean_t match = TRUE; + + DBG("CPU%d: %s\n", get_cpu_number(), __FUNCTION__); + + for (i = 0; match && i < count; i++) { + match = range[i].base == rdmsr64(MSR_IA32_MTRR_PHYSBASE(i)) && + range[i].mask == rdmsr64(MSR_IA32_MTRR_PHYSMASK(i)); + } + + return match; +} + #if MTRR_DEBUG static void mtrr_msr_dump(void) @@ -241,7 +277,7 @@ mtrr_init(void) mtrr_state.MTRRcap = rdmsr64(MSR_IA32_MTRRCAP); mtrr_state.MTRRdefType = rdmsr64(MSR_IA32_MTRR_DEF_TYPE); - mtrr_state.var_count = mtrr_state.MTRRcap & IA32_MTRRCAP_VCNT; + mtrr_state.var_count = (unsigned int)(mtrr_state.MTRRcap & IA32_MTRRCAP_VCNT); /* allocate storage for variable ranges (can block?) */ if (mtrr_state.var_count) { @@ -266,6 +302,7 @@ mtrr_init(void) #if MTRR_DEBUG mtrr_msr_dump(); /* dump firmware settings */ #endif + } /* @@ -277,8 +314,8 @@ mtrr_init(void) static void mtrr_update_action(void * cache_control_type) { - uint32_t cr0, cr4; - uint32_t tmp; + uintptr_t cr0, cr4; + uintptr_t tmp; cr0 = get_cr0(); cr4 = get_cr4(); @@ -375,20 +412,55 @@ mtrr_update_all_cpus(void) } /* - * Update a single CPU with the current MTRR settings. Can be called - * during slave processor initialization to mirror the MTRR settings + * Verify that a processor has been set with the BSP's MTRR settings. Called + * during slave processor initialization to check and set MTRR settings * discovered on the boot processor by mtrr_init(). */ kern_return_t mtrr_update_cpu(void) { + boolean_t match = TRUE; + if (mtrr_initialized == FALSE) return KERN_NOT_SUPPORTED; + DBG("CPU%d: %s\n", get_cpu_number(), __FUNCTION__); + MTRR_LOCK(); - mtrr_update_setup(NULL); - mtrr_update_action(NULL); - mtrr_update_teardown(NULL); + + /* Check MSR_IA32_MTRR_DEF_TYPE MSR */ + match = mtrr_state.MTRRdefType == rdmsr64(MSR_IA32_MTRR_DEF_TYPE); + + /* Check MSR_IA32_MTRRCAP MSR */ + if (match) { + match = mtrr_state.MTRRcap == rdmsr64(MSR_IA32_MTRRCAP); + } + + /* Check variable ranges */ + if (match && mtrr_state.var_count) { + match = mtrr_check_var_ranges(mtrr_state.var_range, + mtrr_state.var_count); + } + + /* Check fixed ranges */ + if (match && (mtrr_state.MTRRcap & IA32_MTRRCAP_FIX)) { + match = mtrr_check_fix_ranges(mtrr_state.fix_range); + } + +#if MTRR_DEBUG + if (!match) + mtrr_msr_dump(); +#endif + if (!match) { + DBG("mtrr_update_cpu() setting MTRR for cpu %d\n", + get_cpu_number()); + mtrr_update_action(NULL); + } +#if MTRR_DEBUG + if (!match) + mtrr_msr_dump(); +#endif + MTRR_UNLOCK(); return KERN_SUCCESS; @@ -414,6 +486,7 @@ mtrr_range_add(addr64_t address, uint64_t length, uint32_t type) return KERN_NOT_SUPPORTED; } + /* check memory type (GPF exception for undefined types) */ if ((type != MTRR_TYPE_UNCACHEABLE) && (type != MTRR_TYPE_WRITECOMBINE) && @@ -554,7 +627,7 @@ var_range_encode(mtrr_var_range_t * range, addr64_t address, uint64_t length, uint32_t type, int valid) { range->base = (address & IA32_MTRR_PHYSBASE_MASK) | - (type & IA32_MTRR_PHYSBASE_TYPE); + (type & (uint32_t)IA32_MTRR_PHYSBASE_TYPE); range->mask = LEN_TO_MASK(length) | (valid ? IA32_MTRR_PHYMASK_VALID : 0); @@ -569,7 +642,7 @@ var_range_overlap(mtrr_var_range_t * range, addr64_t address, int result = 0; /* no overlap, or overlap ok */ v_address = range->base & IA32_MTRR_PHYSBASE_MASK; - v_type = range->base & IA32_MTRR_PHYSBASE_TYPE; + v_type = (uint32_t)(range->base & IA32_MTRR_PHYSBASE_TYPE); v_length = MASK_TO_LEN(range->mask); /* detect range overlap */ @@ -603,10 +676,20 @@ var_range_overlap(mtrr_var_range_t * range, addr64_t address, void pat_init(void) { - if (cpuid_features() & CPUID_FEATURE_PAT) - { - boolean_t istate = ml_set_interrupts_enabled(FALSE); + boolean_t istate; + uint64_t pat; + + if (!(cpuid_features() & CPUID_FEATURE_PAT)) + return; + + istate = ml_set_interrupts_enabled(FALSE); + + pat = rdmsr64(MSR_IA32_CR_PAT); + DBG("CPU%d PAT: was 0x%016llx\n", get_cpu_number(), pat); + + /* Change PA6 attribute field to WC if required */ + if ((pat & ~(0x0FULL << 48)) != (0x01ULL << 48)) { mtrr_update_action(CACHE_CONTROL_PAT); - ml_set_interrupts_enabled(istate); } + ml_set_interrupts_enabled(istate); } diff --git a/osfmk/i386/pcb.c b/osfmk/i386/pcb.c index 70d007f52..36d4fec6e 100644 --- a/osfmk/i386/pcb.c +++ b/osfmk/i386/pcb.c @@ -64,9 +64,6 @@ #include #include -#include -#include - #include #include #include @@ -86,20 +83,29 @@ #include #include -#include +#include +#include #include #include -#include #include #include #include #include -#include #include +#include +#if defined(__i386__) +#include +#endif +#include #include +#include /* LAPIC_PMC_SWI_VECTOR */ #include +#if CONFIG_COUNTERS +#include +#endif /* CONFIG_COUNTERS */ + /* * Maps state flavor to number of words in the state: */ @@ -153,6 +159,49 @@ set_thread_state32(thread_t thread, x86_thread_state32_t *ts); static int set_thread_state64(thread_t thread, x86_thread_state64_t *ts); +#if CONFIG_COUNTERS +static inline void +machine_pmc_cswitch(thread_t /* old */, thread_t /* new */); + +static inline boolean_t +machine_thread_pmc_eligible(thread_t); + +static inline void +pmc_swi(thread_t /* old */, thread_t /*new */); + +static inline boolean_t +machine_thread_pmc_eligible(thread_t t) { + /* + * NOTE: Task-level reservations are propagated to child threads via + * thread_create_internal. Any mutation of task reservations forces a + * recalculate of t_chud (for the pmc flag) for all threads in that task. + * Consequently, we can simply check the current thread's flag against + * THREAD_PMC_FLAG. If the result is non-zero, we SWI for a PMC switch. + */ + return (t != NULL) ? ((t->t_chud & THREAD_PMC_FLAG) ? TRUE : FALSE) : FALSE; +} + +static inline void +pmc_swi(thread_t old, thread_t new) { + current_cpu_datap()->csw_old_thread = old; + current_cpu_datap()->csw_new_thread = new; + __asm__ __volatile__("int %0"::"i"(LAPIC_PMC_SWI_VECTOR):"memory"); +} + +static inline void +machine_pmc_cswitch(thread_t old, thread_t new) { + if (machine_thread_pmc_eligible(old) || machine_thread_pmc_eligible(new)) { + pmc_swi(old, new); + } +} + +void ml_get_csw_threads(thread_t *old, thread_t *new) { + *old = current_cpu_datap()->csw_old_thread; + *new = current_cpu_datap()->csw_new_thread; +} + +#endif /* CONFIG_COUNTERS */ + /* * Don't let an illegal value for dr7 get set. Specifically, * check for undefined settings. Setting these bit patterns @@ -176,9 +225,13 @@ dr7_is_valid(uint32_t *dr7) return (FALSE); /* - * len0-3 pattern "10B" is ok for len on 64-bit. + * len0-3 pattern "10B" is ok for len on Merom and newer processors + * (it signifies an 8-byte wide region). We use the 64bit capability + * of the processor in lieu of the more laborious model/family checks + * as all 64-bit capable processors so far support this. + * Reject an attempt to use this on 64-bit incapable processors. */ - if (current_cpu_datap()->cpu_is64bit == TRUE) + if (current_cpu_datap()->cpu_is64bit == FALSE) for (i = 0, mask1 = 0x3<<18, mask2 = 0x2<<18; i < 4; i++, mask1 <<= 4, mask2 <<= 4) if ((*dr7 & mask1) == mask2) @@ -246,6 +299,67 @@ set_live_debug_state64(cpu_data_t *cdp, x86_debug_state64_t *ds) cdp->cpu_dr7 = ds->dr7; } +boolean_t +debug_state_is_valid32(x86_debug_state32_t *ds) +{ + if (!dr7_is_valid(&ds->dr7)) + return FALSE; + +#if defined(__i386__) + /* + * Only allow local breakpoints and make sure they are not + * in the trampoline code. + */ + if (ds->dr7 & 0x1) + if (ds->dr0 >= (unsigned long)HIGH_MEM_BASE) + return FALSE; + + if (ds->dr7 & (0x1<<2)) + if (ds->dr1 >= (unsigned long)HIGH_MEM_BASE) + return FALSE; + + if (ds->dr7 & (0x1<<4)) + if (ds->dr2 >= (unsigned long)HIGH_MEM_BASE) + return FALSE; + + if (ds->dr7 & (0x1<<6)) + if (ds->dr3 >= (unsigned long)HIGH_MEM_BASE) + return FALSE; +#endif + + return TRUE; +} + +boolean_t +debug_state_is_valid64(x86_debug_state64_t *ds) +{ + if (!dr7_is_valid((uint32_t *)&ds->dr7)) + return FALSE; + + /* + * Don't allow the user to set debug addresses above their max + * value + */ + if (ds->dr7 & 0x1) + if (ds->dr0 >= VM_MAX_PAGE_ADDRESS) + return FALSE; + + if (ds->dr7 & (0x1<<2)) + if (ds->dr1 >= VM_MAX_PAGE_ADDRESS) + return FALSE; + + if (ds->dr7 & (0x1<<4)) + if (ds->dr2 >= VM_MAX_PAGE_ADDRESS) + return FALSE; + + if (ds->dr7 & (0x1<<6)) + if (ds->dr3 >= VM_MAX_PAGE_ADDRESS) + return FALSE; + + return TRUE; +} + + static kern_return_t set_debug_state32(thread_t thread, x86_debug_state32_t *ds) { @@ -255,6 +369,10 @@ set_debug_state32(thread_t thread, x86_debug_state32_t *ds) pcb = thread->machine.pcb; ids = pcb->ids; + if (debug_state_is_valid32(ds) != TRUE) { + return KERN_INVALID_ARGUMENT; + } + if (ids == NULL) { ids = zalloc(ids_zone); bzero(ids, sizeof *ids); @@ -270,41 +388,10 @@ set_debug_state32(thread_t thread, x86_debug_state32_t *ds) } } - if (!dr7_is_valid(&ds->dr7)) - goto err; - - /* - * Only allow local breakpoints and make sure they are not - * in the trampoline code. - */ - if (ds->dr7 & 0x1) - if (ds->dr0 >= (unsigned long)HIGH_MEM_BASE) - goto err; - - if (ds->dr7 & (0x1<<2)) - if (ds->dr1 >= (unsigned long)HIGH_MEM_BASE) - goto err; - - if (ds->dr7 & (0x1<<4)) - if (ds->dr2 >= (unsigned long)HIGH_MEM_BASE) - goto err; - - if (ds->dr7 & (0x1<<6)) - if (ds->dr3 >= (unsigned long)HIGH_MEM_BASE) - goto err; - - ids->dr0 = ds->dr0; - ids->dr1 = ds->dr1; - ids->dr2 = ds->dr2; - ids->dr3 = ds->dr3; - ids->dr6 = ds->dr6; - ids->dr7 = ds->dr7; + copy_debug_state32(ds, ids, FALSE); return (KERN_SUCCESS); - -err: - return (KERN_INVALID_ARGUMENT); } static kern_return_t @@ -316,6 +403,10 @@ set_debug_state64(thread_t thread, x86_debug_state64_t *ds) pcb = thread->machine.pcb; ids = pcb->ids; + if (debug_state_is_valid64(ds) != TRUE) { + return KERN_INVALID_ARGUMENT; + } + if (ids == NULL) { ids = zalloc(ids_zone); bzero(ids, sizeof *ids); @@ -331,40 +422,9 @@ set_debug_state64(thread_t thread, x86_debug_state64_t *ds) } } - if (!dr7_is_valid((uint32_t *)&ds->dr7)) - goto err; - - /* - * Don't allow the user to set debug addresses above their max - * value - */ - if (ds->dr7 & 0x1) - if (ds->dr0 >= VM_MAX_PAGE_ADDRESS) - goto err; - - if (ds->dr7 & (0x1<<2)) - if (ds->dr1 >= VM_MAX_PAGE_ADDRESS) - goto err; - - if (ds->dr7 & (0x1<<4)) - if (ds->dr2 >= VM_MAX_PAGE_ADDRESS) - goto err; - - if (ds->dr7 & (0x1<<6)) - if (ds->dr3 >= VM_MAX_PAGE_ADDRESS) - goto err; - - ids->dr0 = ds->dr0; - ids->dr1 = ds->dr1; - ids->dr2 = ds->dr2; - ids->dr3 = ds->dr3; - ids->dr6 = ds->dr6; - ids->dr7 = ds->dr7; + copy_debug_state64(ds, ids, FALSE); return (KERN_SUCCESS); - -err: - return (KERN_INVALID_ARGUMENT); } static void @@ -375,14 +435,7 @@ get_debug_state32(thread_t thread, x86_debug_state32_t *ds) saved_state = thread->machine.pcb->ids; if (saved_state) { - ds->dr0 = saved_state->dr0; - ds->dr1 = saved_state->dr1; - ds->dr2 = saved_state->dr2; - ds->dr3 = saved_state->dr3; - ds->dr4 = saved_state->dr4; - ds->dr5 = saved_state->dr5; - ds->dr6 = saved_state->dr6; - ds->dr7 = saved_state->dr7; + copy_debug_state32(saved_state, ds, TRUE); } else bzero(ds, sizeof *ds); } @@ -395,14 +448,7 @@ get_debug_state64(thread_t thread, x86_debug_state64_t *ds) saved_state = (x86_debug_state64_t *)thread->machine.pcb->ids; if (saved_state) { - ds->dr0 = saved_state->dr0; - ds->dr1 = saved_state->dr1; - ds->dr2 = saved_state->dr2; - ds->dr3 = saved_state->dr3; - ds->dr4 = saved_state->dr4; - ds->dr5 = saved_state->dr5; - ds->dr6 = saved_state->dr6; - ds->dr7 = saved_state->dr7; + copy_debug_state64(saved_state, ds, TRUE); } else bzero(ds, sizeof *ds); } @@ -423,6 +469,147 @@ consider_machine_adjust(void) } extern void *get_bsduthreadarg(thread_t th); +#if defined(__x86_64__) +static void +act_machine_switch_pcb( thread_t new ) +{ + pcb_t pcb = new->machine.pcb; + struct real_descriptor *ldtp; + mach_vm_offset_t pcb_stack_top; + cpu_data_t *cdp = current_cpu_datap(); + + assert(new->kernel_stack != 0); + + if (!cpu_mode_is64bit()) { + panic("K64 is 64bit!"); + } else if (is_saved_state64(pcb->iss)) { + /* + * The test above is performed against the thread save state + * flavor and not task's 64-bit feature flag because of the + * thread/task 64-bit state divergence that can arise in + * task_set_64bit() x86: the task state is changed before + * the individual thread(s). + */ + x86_saved_state64_tagged_t *iss64; + vm_offset_t isf; + + assert(is_saved_state64(pcb->iss)); + + iss64 = (x86_saved_state64_tagged_t *) pcb->iss; + + /* + * Set pointer to PCB's interrupt stack frame in cpu data. + * Used by syscall and double-fault trap handlers. + */ + isf = (vm_offset_t) &iss64->state.isf; + cdp->cpu_uber.cu_isf = isf; + pcb_stack_top = (vm_offset_t) (iss64 + 1); + /* require 16-byte alignment */ + assert((pcb_stack_top & 0xF) == 0); + + /* Interrupt stack is pcb */ + current_ktss64()->rsp0 = pcb_stack_top; + + /* + * Top of temporary sysenter stack points to pcb stack. + * Although this is not normally used by 64-bit users, + * it needs to be set in case a sysenter is attempted. + */ + *current_sstk64() = pcb_stack_top; + + cdp->cpu_task_map = new->map->pmap->pm_task_map; + + /* + * Enable the 64-bit user code segment, USER64_CS. + * Disable the 32-bit user code segment, USER_CS. + */ + ldt_desc_p(USER64_CS)->access |= ACC_PL_U; + ldt_desc_p(USER_CS)->access &= ~ACC_PL_U; + + /* + * Switch user's GS base if necessary + * by setting the Kernel's GS base MSR + * - this will become the user's on the swapgs when + * returning to user-space. + */ + if (cdp->cpu_uber.cu_user_gs_base != pcb->cthread_self) { + cdp->cpu_uber.cu_user_gs_base = pcb->cthread_self; + wrmsr64(MSR_IA32_KERNEL_GS_BASE, pcb->cthread_self); + } + } else { + x86_saved_state_compat32_t *iss32compat; + vm_offset_t isf; + + assert(is_saved_state32(pcb->iss)); + iss32compat = (x86_saved_state_compat32_t *) pcb->iss; + + pcb_stack_top = (uintptr_t) (iss32compat + 1); + /* require 16-byte alignment */ + assert((pcb_stack_top & 0xF) == 0); + + /* + * Set pointer to PCB's interrupt stack frame in cpu data. + * Used by debug trap handler. + */ + isf = (vm_offset_t) &iss32compat->isf64; + cdp->cpu_uber.cu_isf = isf; + + /* Top of temporary sysenter stack points to pcb stack */ + *current_sstk64() = pcb_stack_top; + + /* Interrupt stack is pcb */ + current_ktss64()->rsp0 = pcb_stack_top; + + cdp->cpu_task_map = TASK_MAP_32BIT; + /* Precalculate pointers to syscall argument store, for use + * in the trampolines. + */ + cdp->cpu_uber_arg_store = (vm_offset_t)get_bsduthreadarg(new); + cdp->cpu_uber_arg_store_valid = (vm_offset_t)&pcb->arg_store_valid; + pcb->arg_store_valid = 0; + + /* + * Disable USER64_CS + * Enable USER_CS + */ + ldt_desc_p(USER64_CS)->access &= ~ACC_PL_U; + ldt_desc_p(USER_CS)->access |= ACC_PL_U; + + /* + * Set the thread`s cthread (a.k.a pthread) + * For 32-bit user this involves setting the USER_CTHREAD + * descriptor in the LDT to point to the cthread data. + * The involves copying in the pre-initialized descriptor. + */ + ldtp = (struct real_descriptor *)current_ldt(); + ldtp[sel_idx(USER_CTHREAD)] = pcb->cthread_desc; + if (pcb->uldt_selector != 0) + ldtp[sel_idx(pcb->uldt_selector)] = pcb->uldt_desc; + cdp->cpu_uber.cu_user_gs_base = pcb->cthread_self; + + /* + * Set the thread`s LDT or LDT entry. + */ + if (new->task == TASK_NULL || new->task->i386_ldt == 0) { + /* + * Use system LDT. + */ + ml_cpu_set_ldt(KERNEL_LDT); + } else { + /* + * Task has its own LDT. + */ + user_ldt_set(new); + } + } + + /* + * Bump the scheduler generation count in the commpage. + * This can be read by user code to detect its preemption. + */ + commpage_sched_gen_inc(); +} +#else static void act_machine_switch_pcb( thread_t new ) { @@ -430,7 +617,7 @@ act_machine_switch_pcb( thread_t new ) struct real_descriptor *ldtp; vm_offset_t pcb_stack_top; vm_offset_t hi_pcb_stack_top; - vm_offset_t hi_iss; + vm_offset_t hi_iss; cpu_data_t *cdp = current_cpu_datap(); assert(new->kernel_stack != 0); @@ -557,6 +744,8 @@ act_machine_switch_pcb( thread_t new ) ldtp[sel_idx(USER_CTHREAD)] = pcb->cthread_desc; if (pcb->uldt_selector != 0) ldtp[sel_idx(pcb->uldt_selector)] = pcb->uldt_desc; + + /* * For 64-bit, we additionally set the 64-bit User GS base * address. On return to 64-bit user, the GS.Base MSR will be written. @@ -584,6 +773,7 @@ act_machine_switch_pcb( thread_t new ) */ commpage_sched_gen_inc(); } +#endif /* * Switch to the first thread on a CPU. @@ -592,6 +782,9 @@ void machine_load_context( thread_t new) { +#if CONFIG_COUNTERS + machine_pmc_cswitch(NULL, new); +#endif new->machine.specFlags |= OnProc; act_machine_switch_pcb(new); Load_context(new); @@ -611,15 +804,30 @@ machine_switch_context( #if MACH_RT assert(current_cpu_datap()->cpu_active_stack == old->kernel_stack); #endif - +#if CONFIG_COUNTERS + machine_pmc_cswitch(old, new); +#endif /* * Save FP registers if in use. */ fpu_save_context(old); + old->machine.specFlags &= ~OnProc; new->machine.specFlags |= OnProc; + /* + * Monitor the stack depth and report new max, + * not worrying about races. + */ + vm_offset_t depth = current_stack_depth(); + if (depth > kernel_stack_depth_max) { + kernel_stack_depth_max = depth; + KERNEL_DEBUG_CONSTANT( + MACHDBG_CODE(DBG_MACH_SCHED, MACH_STACK_DEPTH), + (long) depth, 0, 0, 0, 0); + } + /* * Switch address maps if need be, even if not switching tasks. * (A server activation may be "borrowing" a client map.) @@ -634,6 +842,20 @@ machine_switch_context( return(Switch_context(old, continuation, new)); } +thread_t +machine_processor_shutdown( + thread_t thread, + void (*doshutdown)(processor_t), + processor_t processor) +{ +#if CONFIG_VMX + vmx_suspend(); +#endif + fpu_save_context(thread); + PMAP_SWITCH_CONTEXT(thread, processor->idle_thread, cpu_number()); + return(Shutdown_context(thread, doshutdown, processor)); +} + /* * act_machine_sv_free * release saveareas associated with an act. if flag is true, release @@ -658,13 +880,19 @@ machine_thread_state_initialize( * The initialized state will then be lazily faulted-in, if required. * And if we're target, re-arm the no-fpu trap. */ - if (thread->machine.pcb->ifps) { - (void) fpu_set_fxstate(thread, NULL); + if (thread->machine.pcb->ifps) { + (void) fpu_set_fxstate(thread, NULL); - if (thread == current_thread()) - clear_fpu(); - } - return KERN_SUCCESS; + if (thread == current_thread()) + clear_fpu(); + } + + if (thread->machine.pcb->ids) { + zfree(ids_zone, thread->machine.pcb->ids); + thread->machine.pcb->ids = NULL; + } + + return KERN_SUCCESS; } uint32_t @@ -707,7 +935,7 @@ get_exception_state64(thread_t thread, x86_exception_state64_t *es) saved_state = USER_REGS64(thread); es->trapno = saved_state->isf.trapno; - es->err = saved_state->isf.err; + es->err = (typeof(es->err))saved_state->isf.err; es->faultvaddr = saved_state->cr2; } @@ -729,15 +957,28 @@ set_thread_state32(thread_t thread, x86_thread_state32_t *ts) { x86_saved_state32_t *saved_state; + saved_state = USER_REGS32(thread); /* * Scrub segment selector values: */ - if (ts->cs != USER_CS) ts->cs = USER_CS; + ts->cs = USER_CS; +#ifdef __i386__ if (ts->ss == 0) ts->ss = USER_DS; if (ts->ds == 0) ts->ds = USER_DS; if (ts->es == 0) ts->es = USER_DS; +#else /* __x86_64__ */ + /* + * On a 64 bit kernel, we always override the data segments, + * as the actual selector numbers have changed. This also + * means that we don't support setting the data segments + * manually any more. + */ + ts->ss = USER_DS; + ts->ds = USER_DS; + ts->es = USER_DS; +#endif /* Check segment selectors are safe */ if (!valid_user_segment_selectors(ts->cs, @@ -781,6 +1022,7 @@ set_thread_state64(thread_t thread, x86_thread_state64_t *ts) { x86_saved_state64_t *saved_state; + saved_state = USER_REGS64(thread); if (!IS_USERADDR64_CANONICAL(ts->rsp) || @@ -806,8 +1048,8 @@ set_thread_state64(thread_t thread, x86_thread_state64_t *ts) saved_state->isf.rflags = (ts->rflags & ~EFL_USER_CLEAR) | EFL_USER_SET; saved_state->isf.rip = ts->rip; saved_state->isf.cs = USER64_CS; - saved_state->fs = ts->fs; - saved_state->gs = ts->gs; + saved_state->fs = (uint32_t)ts->fs; + saved_state->gs = (uint32_t)ts->gs; return(KERN_SUCCESS); } @@ -819,6 +1061,7 @@ get_thread_state32(thread_t thread, x86_thread_state32_t *ts) { x86_saved_state32_t *saved_state; + saved_state = USER_REGS32(thread); ts->eax = saved_state->eax; @@ -845,6 +1088,7 @@ get_thread_state64(thread_t thread, x86_thread_state64_t *ts) { x86_saved_state64_t *saved_state; + saved_state = USER_REGS64(thread); ts->r8 = saved_state->r8; @@ -877,13 +1121,19 @@ thread_set_wq_state32(thread_t thread, thread_state_t tstate) x86_thread_state32_t *state; x86_saved_state32_t *saved_state; thread_t curth = current_thread(); + spl_t s=0; + saved_state = USER_REGS32(thread); + state = (x86_thread_state32_t *)tstate; - if (curth != thread) + if (curth != thread) { + s = splsched(); thread_lock(thread); + } + saved_state->ebp = 0; saved_state->eip = state->eip; saved_state->eax = state->eax; saved_state->ebx = state->ebx; @@ -899,8 +1149,11 @@ thread_set_wq_state32(thread_t thread, thread_state_t tstate) saved_state->ds = USER_DS; saved_state->es = USER_DS; - if (curth != thread) + + if (curth != thread) { thread_unlock(thread); + splx(s); + } } @@ -910,13 +1163,18 @@ thread_set_wq_state64(thread_t thread, thread_state_t tstate) x86_thread_state64_t *state; x86_saved_state64_t *saved_state; thread_t curth = current_thread(); + spl_t s=0; + saved_state = USER_REGS64(thread); state = (x86_thread_state64_t *)tstate; - if (curth != thread) + if (curth != thread) { + s = splsched(); thread_lock(thread); + } + saved_state->rbp = 0; saved_state->rdi = state->rdi; saved_state->rsi = state->rsi; saved_state->rdx = state->rdx; @@ -929,8 +1187,11 @@ thread_set_wq_state64(thread_t thread, thread_state_t tstate) saved_state->isf.cs = USER64_CS; saved_state->isf.rflags = EFL_USER_SET; - if (curth != thread) + + if (curth != thread) { thread_unlock(thread); + splx(s); + } } @@ -971,6 +1232,7 @@ machine_thread_set_state( state->gs)) return KERN_INVALID_ARGUMENT; + saved_state = USER_REGS32(thr_act); /* @@ -1008,6 +1270,7 @@ machine_thread_set_state( saved_state->es = state->es; saved_state->fs = state->fs; saved_state->gs = state->gs; + break; } @@ -1039,6 +1302,7 @@ machine_thread_set_state( !IS_USERADDR64_CANONICAL(state->isf.rip)) return KERN_INVALID_ARGUMENT; + saved_state = USER_REGS64(thr_act); /* @@ -1074,6 +1338,7 @@ machine_thread_set_state( saved_state->isf.ss = state->isf.ss; saved_state->fs = state->fs; saved_state->gs = state->gs; + break; } @@ -1549,93 +1814,163 @@ machine_thread_get_kern_state( thread_state_t tstate, mach_msg_type_number_t *count) { + x86_saved_state_t *int_state = current_cpu_datap()->cpu_int_state; /* * This works only for an interrupted kernel thread */ - if (thread != current_thread() || current_cpu_datap()->cpu_int_state == NULL) + if (thread != current_thread() || int_state == NULL) return KERN_FAILURE; - switch(flavor) { - case x86_THREAD_STATE32: - { - x86_thread_state32_t *state; - x86_saved_state32_t *saved_state; + switch (flavor) { + case x86_THREAD_STATE32: { + x86_thread_state32_t *state; + x86_saved_state32_t *saved_state; + + if (!is_saved_state32(int_state) || + *count < x86_THREAD_STATE32_COUNT) + return (KERN_INVALID_ARGUMENT); + + state = (x86_thread_state32_t *) tstate; - if (*count < x86_THREAD_STATE32_COUNT) - return(KERN_INVALID_ARGUMENT); + saved_state = saved_state32(int_state); + /* + * General registers. + */ + state->eax = saved_state->eax; + state->ebx = saved_state->ebx; + state->ecx = saved_state->ecx; + state->edx = saved_state->edx; + state->edi = saved_state->edi; + state->esi = saved_state->esi; + state->ebp = saved_state->ebp; + state->esp = saved_state->uesp; + state->eflags = saved_state->efl; + state->eip = saved_state->eip; + state->cs = saved_state->cs; + state->ss = saved_state->ss; + state->ds = saved_state->ds & 0xffff; + state->es = saved_state->es & 0xffff; + state->fs = saved_state->fs & 0xffff; + state->gs = saved_state->gs & 0xffff; + + *count = x86_THREAD_STATE32_COUNT; - state = (x86_thread_state32_t *)tstate; + return KERN_SUCCESS; + } + + case x86_THREAD_STATE64: { + x86_thread_state64_t *state; + x86_saved_state64_t *saved_state; + + if (!is_saved_state64(int_state) || + *count < x86_THREAD_STATE64_COUNT) + return (KERN_INVALID_ARGUMENT); + + state = (x86_thread_state64_t *) tstate; + + saved_state = saved_state64(int_state); + /* + * General registers. + */ + state->rax = saved_state->rax; + state->rbx = saved_state->rbx; + state->rcx = saved_state->rcx; + state->rdx = saved_state->rdx; + state->rdi = saved_state->rdi; + state->rsi = saved_state->rsi; + state->rbp = saved_state->rbp; + state->rsp = saved_state->isf.rsp; + state->r8 = saved_state->r8; + state->r9 = saved_state->r9; + state->r10 = saved_state->r10; + state->r11 = saved_state->r11; + state->r12 = saved_state->r12; + state->r13 = saved_state->r13; + state->r14 = saved_state->r14; + state->r15 = saved_state->r15; + + state->rip = saved_state->isf.rip; + state->rflags = saved_state->isf.rflags; + state->cs = saved_state->isf.cs; + state->fs = saved_state->fs & 0xffff; + state->gs = saved_state->gs & 0xffff; + *count = x86_THREAD_STATE64_COUNT; + + return KERN_SUCCESS; + } + + case x86_THREAD_STATE: { + x86_thread_state_t *state = NULL; + + if (*count < x86_THREAD_STATE_COUNT) + return (KERN_INVALID_ARGUMENT); + + state = (x86_thread_state_t *) tstate; + + if (is_saved_state32(int_state)) { + x86_saved_state32_t *saved_state = saved_state32(int_state); + + state->tsh.flavor = x86_THREAD_STATE32; + state->tsh.count = x86_THREAD_STATE32_COUNT; - assert(is_saved_state32(current_cpu_datap()->cpu_int_state)); - saved_state = saved_state32(current_cpu_datap()->cpu_int_state); /* * General registers. */ - state->eax = saved_state->eax; - state->ebx = saved_state->ebx; - state->ecx = saved_state->ecx; - state->edx = saved_state->edx; - state->edi = saved_state->edi; - state->esi = saved_state->esi; - state->ebp = saved_state->ebp; - state->esp = saved_state->uesp; - state->eflags = saved_state->efl; - state->eip = saved_state->eip; - state->cs = saved_state->cs; - state->ss = saved_state->ss; - state->ds = saved_state->ds & 0xffff; - state->es = saved_state->es & 0xffff; - state->fs = saved_state->fs & 0xffff; - state->gs = saved_state->gs & 0xffff; - - *count = x86_THREAD_STATE32_COUNT; - - return KERN_SUCCESS; - } - break; + state->uts.ts32.eax = saved_state->eax; + state->uts.ts32.ebx = saved_state->ebx; + state->uts.ts32.ecx = saved_state->ecx; + state->uts.ts32.edx = saved_state->edx; + state->uts.ts32.edi = saved_state->edi; + state->uts.ts32.esi = saved_state->esi; + state->uts.ts32.ebp = saved_state->ebp; + state->uts.ts32.esp = saved_state->uesp; + state->uts.ts32.eflags = saved_state->efl; + state->uts.ts32.eip = saved_state->eip; + state->uts.ts32.cs = saved_state->cs; + state->uts.ts32.ss = saved_state->ss; + state->uts.ts32.ds = saved_state->ds & 0xffff; + state->uts.ts32.es = saved_state->es & 0xffff; + state->uts.ts32.fs = saved_state->fs & 0xffff; + state->uts.ts32.gs = saved_state->gs & 0xffff; + } else if (is_saved_state64(int_state)) { + x86_saved_state64_t *saved_state = saved_state64(int_state); - case x86_THREAD_STATE: - { - // wrap a 32 bit thread state into a 32/64bit clean thread state - x86_thread_state_t *state; - x86_saved_state32_t *saved_state; - - if(*count < x86_THREAD_STATE_COUNT) - return (KERN_INVALID_ARGUMENT); - - state = (x86_thread_state_t *)tstate; - assert(is_saved_state32(current_cpu_datap()->cpu_int_state)); - saved_state = saved_state32(current_cpu_datap()->cpu_int_state); - - state->tsh.flavor = x86_THREAD_STATE32; - state->tsh.count = x86_THREAD_STATE32_COUNT; - - /* - * General registers. - */ - - state->uts.ts32.eax = saved_state->eax; - state->uts.ts32.ebx = saved_state->ebx; - state->uts.ts32.ecx = saved_state->ecx; - state->uts.ts32.edx = saved_state->edx; - state->uts.ts32.edi = saved_state->edi; - state->uts.ts32.esi = saved_state->esi; - state->uts.ts32.ebp = saved_state->ebp; - state->uts.ts32.esp = saved_state->uesp; - state->uts.ts32.eflags = saved_state->efl; - state->uts.ts32.eip = saved_state->eip; - state->uts.ts32.cs = saved_state->cs; - state->uts.ts32.ss = saved_state->ss; - state->uts.ts32.ds = saved_state->ds & 0xffff; - state->uts.ts32.es = saved_state->es & 0xffff; - state->uts.ts32.fs = saved_state->fs & 0xffff; - state->uts.ts32.gs = saved_state->gs & 0xffff; - - *count = x86_THREAD_STATE_COUNT; - return KERN_SUCCESS; + state->tsh.flavor = x86_THREAD_STATE64; + state->tsh.count = x86_THREAD_STATE64_COUNT; + + /* + * General registers. + */ + state->uts.ts64.rax = saved_state->rax; + state->uts.ts64.rbx = saved_state->rbx; + state->uts.ts64.rcx = saved_state->rcx; + state->uts.ts64.rdx = saved_state->rdx; + state->uts.ts64.rdi = saved_state->rdi; + state->uts.ts64.rsi = saved_state->rsi; + state->uts.ts64.rbp = saved_state->rbp; + state->uts.ts64.rsp = saved_state->isf.rsp; + state->uts.ts64.r8 = saved_state->r8; + state->uts.ts64.r9 = saved_state->r9; + state->uts.ts64.r10 = saved_state->r10; + state->uts.ts64.r11 = saved_state->r11; + state->uts.ts64.r12 = saved_state->r12; + state->uts.ts64.r13 = saved_state->r13; + state->uts.ts64.r14 = saved_state->r14; + state->uts.ts64.r15 = saved_state->r15; + + state->uts.ts64.rip = saved_state->isf.rip; + state->uts.ts64.rflags = saved_state->isf.rflags; + state->uts.ts64.cs = saved_state->isf.cs; + state->uts.ts64.fs = saved_state->fs & 0xffff; + state->uts.ts64.gs = saved_state->gs & 0xffff; + } else { + panic("unknown thread state"); } - break; + + *count = x86_THREAD_STATE_COUNT; + return KERN_SUCCESS; + } } return KERN_FAILURE; } @@ -1650,14 +1985,14 @@ machine_thread_create( task_t task) { pcb_t pcb = &thread->machine.xxx_pcb; - struct real_descriptor *ldtp; - pmap_paddr_t paddr; x86_saved_state_t *iss; +#if NCOPY_WINDOWS > 0 inval_copy_windows(thread); thread->machine.physwindow_pte = 0; thread->machine.physwindow_busy = 0; +#endif /* * Allocate pcb only if required. @@ -1696,17 +2031,21 @@ machine_thread_create( iss = (x86_saved_state_t *) &sfc32->ssf.iss32; iss->flavor = x86_SAVED_STATE32; +#if defined(__i386__) #if DEBUG { x86_saved_state_compat32_t *xssc; xssc = (x86_saved_state_compat32_t *) iss; + xssc->pad_for_16byte_alignment[0] = 0x64326432; xssc->pad_for_16byte_alignment[1] = 0x64326432; } -#endif +#endif /* DEBUG */ } else { - x86_sframe32_t *sf32; + x86_sframe32_t *sf32; + struct real_descriptor *ldtp; + pmap_paddr_t paddr; sf32 = (x86_sframe32_t *) pcb->sf; @@ -1714,6 +2053,18 @@ machine_thread_create( iss = (x86_saved_state_t *) &sf32->ssf; iss->flavor = x86_SAVED_STATE32; + pcb->iss_pte0 = pte_kernel_rw(kvtophys((vm_offset_t)iss)); + if (0 == (paddr = pa_to_pte(kvtophys((vm_offset_t)iss + PAGE_SIZE)))) + pcb->iss_pte1 = INTEL_PTE_INVALID; + else + pcb->iss_pte1 = pte_kernel_rw(paddr); + + + ldtp = (struct real_descriptor *) + pmap_index_to_virt(HIGH_FIXED_LDT_BEGIN); + pcb->cthread_desc = ldtp[sel_idx(USER_DS)]; + pcb->uldt_desc = ldtp[sel_idx(USER_DS)]; +#endif /* __i386__ */ } /* * Guarantee that the bootstrapped thread will be in user @@ -1733,18 +2084,10 @@ machine_thread_create( thread->machine.pcb = pcb; simple_lock_init(&pcb->lock, 0); - ldtp = (struct real_descriptor *)pmap_index_to_virt(HIGH_FIXED_LDT_BEGIN); - pcb->cthread_desc = ldtp[sel_idx(USER_DS)]; - pcb->uldt_desc = ldtp[sel_idx(USER_DS)]; - pcb->uldt_selector = 0; - - pcb->iss_pte0 = (uint64_t)pte_kernel_rw(kvtophys((vm_offset_t)pcb->iss)); pcb->arg_store_valid = 0; + pcb->cthread_self = 0; + pcb->uldt_selector = 0; - if (0 == (paddr = pa_to_pte(kvtophys((vm_offset_t)(pcb->iss) + PAGE_SIZE)))) - pcb->iss_pte1 = INTEL_PTE_INVALID; - else - pcb->iss_pte1 = (uint64_t)pte_kernel_rw(paddr); return(KERN_SUCCESS); } @@ -1790,8 +2133,10 @@ machine_thread_switch_addrmode(thread_t thread) /* If we're switching ourselves, reset the pcb addresses etc. */ if (thread == current_thread()) { +#if defined(__i386__) if (current_cpu_datap()->cpu_active_cr3 != kernel_pmap->pm_cr3) pmap_load_kernel_cr3(); +#endif /* defined(__i386) */ act_machine_switch_pcb(thread); } enable_preemption(); @@ -1804,13 +2149,14 @@ machine_thread_switch_addrmode(thread_t thread) * when starting up a new processor */ void -machine_set_current_thread( thread_t thread ) +machine_set_current_thread(thread_t thread) { current_cpu_datap()->cpu_active_thread = thread; } /* - * This is called when a task is termianted. + * This is called when a task is terminated, and also on exec(). + * Clear machine-dependent state that is stored on the task. */ void machine_thread_terminate_self(void) @@ -1822,16 +2168,17 @@ machine_thread_terminate_self(void) self_task->i386_ldt = 0; user_ldt_free(user_ldt); } + + if (self_task->task_debug != NULL) { + zfree(ids_zone, self_task->task_debug); + self_task->task_debug = NULL; + } } } void act_machine_return( -#if CONFIG_NO_PANIC_STRINGS - __unused int code -#else int code -#endif ) { /* @@ -1862,22 +2209,22 @@ machine_thread_init(void) if (cpu_mode_is64bit()) { assert(sizeof(x86_sframe_compat32_t) % 16 == 0); iss_zone = zinit(sizeof(x86_sframe64_t), - THREAD_MAX * sizeof(x86_sframe64_t), + thread_max * sizeof(x86_sframe64_t), THREAD_CHUNK * sizeof(x86_sframe64_t), "x86_64 saved state"); ids_zone = zinit(sizeof(x86_debug_state64_t), - THREAD_MAX * sizeof(x86_debug_state64_t), + thread_max * sizeof(x86_debug_state64_t), THREAD_CHUNK * sizeof(x86_debug_state64_t), "x86_64 debug state"); } else { iss_zone = zinit(sizeof(x86_sframe32_t), - THREAD_MAX * sizeof(x86_sframe32_t), + thread_max * sizeof(x86_sframe32_t), THREAD_CHUNK * sizeof(x86_sframe32_t), "x86 saved state"); ids_zone = zinit(sizeof(x86_debug_state32_t), - THREAD_MAX * (sizeof(x86_debug_state32_t)), + thread_max * (sizeof(x86_debug_state32_t)), THREAD_CHUNK * (sizeof(x86_debug_state32_t)), "x86 debug state"); } @@ -1885,6 +2232,7 @@ machine_thread_init(void) } +#if defined(__i386__) /* * Some routines for debugging activation code */ @@ -1966,8 +2314,8 @@ dump_act(thread_t thr_act) if (thr_act->kernel_stack) { vm_offset_t stack = thr_act->kernel_stack; - printf("\tk_stk %x eip %x ebx %x esp %x iss %p\n", - stack, STACK_IKS(stack)->k_eip, STACK_IKS(stack)->k_ebx, + printf("\tk_stk %lx eip %x ebx %x esp %x iss %p\n", + (long)stack, STACK_IKS(stack)->k_eip, STACK_IKS(stack)->k_ebx, STACK_IKS(stack)->k_esp, STACK_IEL(stack)->saved_state); } @@ -1975,6 +2323,7 @@ dump_act(thread_t thr_act) dump_regs(thr_act); return((int)thr_act); } +#endif user_addr_t get_useraddr(void) @@ -2009,7 +2358,7 @@ machine_stack_detach(thread_t thread) vm_offset_t stack; KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_SCHED, MACH_STACK_DETACH), - thread, thread->priority, + (uintptr_t)thread_tid(thread), thread->priority, thread->sched_pri, 0, 0); @@ -2028,19 +2377,25 @@ machine_stack_attach( thread_t thread, vm_offset_t stack) { - struct x86_kernel_state32 *statep; + struct x86_kernel_state *statep; KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_SCHED, MACH_STACK_ATTACH), - thread, thread->priority, + (uintptr_t)thread_tid(thread), thread->priority, thread->sched_pri, 0, 0); assert(stack); thread->kernel_stack = stack; statep = STACK_IKS(stack); +#if defined(__x86_64__) + statep->k_rip = (unsigned long) Thread_continue; + statep->k_rbx = (unsigned long) thread_continue; + statep->k_rsp = (unsigned long) STACK_IEL(stack); +#else statep->k_eip = (unsigned long) Thread_continue; statep->k_ebx = (unsigned long) thread_continue; statep->k_esp = (unsigned long) STACK_IEL(stack); +#endif return; } @@ -2058,6 +2413,10 @@ machine_stack_handoff(thread_t old, assert(new); assert(old); +#if CONFIG_COUNTERS + machine_pmc_cswitch(old, new); +#endif + stack = old->kernel_stack; if (stack == old->reserved_stack) { assert(new->reserved_stack); @@ -2072,6 +2431,7 @@ machine_stack_handoff(thread_t old, new->kernel_stack = stack; fpu_save_context(old); + old->machine.specFlags &= ~OnProc; new->machine.specFlags |= OnProc; @@ -2236,3 +2596,54 @@ boolean_t x86_sysenter_arg_store_isvalid(thread_t thread); boolean_t x86_sysenter_arg_store_isvalid(thread_t thread) { return (thread->machine.pcb->arg_store_valid); } + +/* + * Duplicate one x86_debug_state32_t to another. "all" parameter + * chooses whether dr4 and dr5 are copied (they are never meant + * to be installed when we do machine_task_set_state() or + * machine_thread_set_state()). + */ +void +copy_debug_state32( + x86_debug_state32_t *src, + x86_debug_state32_t *target, + boolean_t all) +{ + if (all) { + target->dr4 = src->dr4; + target->dr5 = src->dr5; + } + + target->dr0 = src->dr0; + target->dr1 = src->dr1; + target->dr2 = src->dr2; + target->dr3 = src->dr3; + target->dr6 = src->dr6; + target->dr7 = src->dr7; +} + +/* + * Duplicate one x86_debug_state64_t to another. "all" parameter + * chooses whether dr4 and dr5 are copied (they are never meant + * to be installed when we do machine_task_set_state() or + * machine_thread_set_state()). + */ +void +copy_debug_state64( + x86_debug_state64_t *src, + x86_debug_state64_t *target, + boolean_t all) +{ + if (all) { + target->dr4 = src->dr4; + target->dr5 = src->dr5; + } + + target->dr0 = src->dr0; + target->dr1 = src->dr1; + target->dr2 = src->dr2; + target->dr3 = src->dr3; + target->dr6 = src->dr6; + target->dr7 = src->dr7; +} + diff --git a/osfmk/i386/perfmon.c b/osfmk/i386/perfmon.c deleted file mode 100644 index 1dffe6d59..000000000 --- a/osfmk/i386/perfmon.c +++ /dev/null @@ -1,763 +0,0 @@ -/* - * Copyright (c) 2003-2004 Apple Computer, Inc. All rights reserved. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ - * - * This file contains Original Code and/or Modifications of Original Code - * as defined in and that are subject to the Apple Public Source License - * Version 2.0 (the 'License'). You may not use this file except in - * compliance with the License. The rights granted to you under the License - * may not be used to create, or enable the creation or redistribution of, - * unlawful or unlicensed copies of an Apple operating system, or to - * circumvent, violate, or enable the circumvention or violation of, any - * terms of an Apple operating system software license agreement. - * - * Please obtain a copy of the License at - * http://www.opensource.apple.com/apsl/ and read it before using this file. - * - * The Original Code and all software distributed under the License are - * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER - * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, - * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. - * Please see the License for the specific language governing rights and - * limitations under the License. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#if DEBUG -#define DBG(x...) kprintf(x) -#else -#define DBG(x...) -#endif - -static decl_simple_lock_data(,pmc_lock) -static task_t pmc_owner = TASK_NULL; -static int pmc_thread_count = 0; -static boolean_t pmc_inited = FALSE; - -/* PMC Facility Owner: - * TASK_NULL - no one owns it - * kernel_task - owned by pmc - * other task - owned by another task - */ - -/* - * Table of ESCRs and addresses associated with performance counters/CCCRs. - * See Intel SDM Vol 3, Table 15-4 (section 15.9): - */ -static uint16_t pmc_escr_addr_table[18][8] = { - [MSR_BPU_COUNTER0] { - [MSR_BSU_ESCR0] 0x3a0, - [MSR_FSB_ESCR0] 0x3a2, - [MSR_MOB_ESCR0] 0x3aa, - [MSR_PMH_ESCR0] 0x3ac, - [MSR_BPU_ESCR0] 0x3b2, - [MSR_IS_ESCR0] 0x3b4, - [MSR_ITLB_ESCR0] 0x3b6, - [MSR_IX_ESCR0] 0x3c8, - }, - [MSR_BPU_COUNTER1] { - [MSR_BSU_ESCR0] 0x3a0, - [MSR_FSB_ESCR0] 0x3a2, - [MSR_MOB_ESCR0] 0x3aa, - [MSR_PMH_ESCR0] 0x3ac, - [MSR_BPU_ESCR0] 0x3b2, - [MSR_IS_ESCR0] 0x3b4, - [MSR_ITLB_ESCR0] 0x3b6, - [MSR_IX_ESCR0] 0x3c8, - }, - [MSR_BPU_COUNTER2] { - [MSR_BSU_ESCR1] 0x3a1, - [MSR_FSB_ESCR1] 0x3a3, - [MSR_MOB_ESCR1] 0x3ab, - [MSR_PMH_ESCR1] 0x3ad, - [MSR_BPU_ESCR1] 0x3b3, - [MSR_IS_ESCR1] 0x3b5, - [MSR_ITLB_ESCR1] 0x3b7, - [MSR_IX_ESCR1] 0x3c9, - }, - [MSR_BPU_COUNTER3] { - [MSR_BSU_ESCR1] 0x3a1, - [MSR_FSB_ESCR1] 0x3a3, - [MSR_MOB_ESCR1] 0x3ab, - [MSR_PMH_ESCR1] 0x3ad, - [MSR_BPU_ESCR1] 0x3b3, - [MSR_IS_ESCR1] 0x3b5, - [MSR_ITLB_ESCR1] 0x3b7, - [MSR_IX_ESCR1] 0x3c9, - }, - [MSR_MS_COUNTER0] { - [MSR_MS_ESCR1] 0x3c1, - [MSR_TBPU_ESCR1] 0x3c3, - [MSR_TC_ESCR1] 0x3c5, - }, - [MSR_MS_COUNTER1] { - [MSR_MS_ESCR1] 0x3c1, - [MSR_TBPU_ESCR1] 0x3c3, - [MSR_TC_ESCR1] 0x3c5, - }, - [MSR_MS_COUNTER2] { - [MSR_MS_ESCR1] 0x3c1, - [MSR_TBPU_ESCR1] 0x3c3, - [MSR_TC_ESCR1] 0x3c5, - }, - [MSR_MS_COUNTER3] { - [MSR_MS_ESCR1] 0x3c1, - [MSR_TBPU_ESCR1] 0x3c3, - [MSR_TC_ESCR1] 0x3c5, - }, - [MSR_FLAME_COUNTER0] { - [MSR_FIRM_ESCR0] 0x3a4, - [MSR_FLAME_ESCR0] 0x3a6, - [MSR_DAC_ESCR0] 0x3a8, - [MSR_SAT_ESCR0] 0x3ae, - [MSR_U2L_ESCR0] 0x3b0, - }, - [MSR_FLAME_COUNTER1] { - [MSR_FIRM_ESCR0] 0x3a4, - [MSR_FLAME_ESCR0] 0x3a6, - [MSR_DAC_ESCR0] 0x3a8, - [MSR_SAT_ESCR0] 0x3ae, - [MSR_U2L_ESCR0] 0x3b0, - }, - [MSR_FLAME_COUNTER2] { - [MSR_FIRM_ESCR1] 0x3a5, - [MSR_FLAME_ESCR1] 0x3a7, - [MSR_DAC_ESCR1] 0x3a9, - [MSR_SAT_ESCR1] 0x3af, - [MSR_U2L_ESCR1] 0x3b1, - }, - [MSR_FLAME_COUNTER3] { - [MSR_FIRM_ESCR1] 0x3a5, - [MSR_FLAME_ESCR1] 0x3a7, - [MSR_DAC_ESCR1] 0x3a9, - [MSR_SAT_ESCR1] 0x3af, - [MSR_U2L_ESCR1] 0x3b1, - }, - [MSR_IQ_COUNTER0] { - [MSR_CRU_ESCR0] 0x3b8, - [MSR_CRU_ESCR2] 0x3cc, - [MSR_CRU_ESCR4] 0x3e0, - [MSR_IQ_ESCR0] 0x3ba, - [MSR_RAT_ESCR0] 0x3bc, - [MSR_SSU_ESCR0] 0x3be, - [MSR_AFL_ESCR0] 0x3ca, - }, - [MSR_IQ_COUNTER1] { - [MSR_CRU_ESCR0] 0x3b8, - [MSR_CRU_ESCR2] 0x3cc, - [MSR_CRU_ESCR4] 0x3e0, - [MSR_IQ_ESCR0] 0x3ba, - [MSR_RAT_ESCR0] 0x3bc, - [MSR_SSU_ESCR0] 0x3be, - [MSR_AFL_ESCR0] 0x3ca, - }, - [MSR_IQ_COUNTER2] { - [MSR_CRU_ESCR1] 0x3b9, - [MSR_CRU_ESCR3] 0x3cd, - [MSR_CRU_ESCR5] 0x3e1, - [MSR_IQ_ESCR1] 0x3bb, - [MSR_RAT_ESCR1] 0x3bd, - [MSR_AFL_ESCR1] 0x3cb, - }, - [MSR_IQ_COUNTER3] { - [MSR_CRU_ESCR1] 0x3b9, - [MSR_CRU_ESCR3] 0x3cd, - [MSR_CRU_ESCR5] 0x3e1, - [MSR_IQ_ESCR1] 0x3bb, - [MSR_RAT_ESCR1] 0x3bd, - [MSR_AFL_ESCR1] 0x3cb, - }, - [MSR_IQ_COUNTER4] { - [MSR_CRU_ESCR0] 0x3b8, - [MSR_CRU_ESCR2] 0x3cc, - [MSR_CRU_ESCR4] 0x3e0, - [MSR_IQ_ESCR0] 0x3ba, - [MSR_RAT_ESCR0] 0x3bc, - [MSR_SSU_ESCR0] 0x3be, - [MSR_AFL_ESCR0] 0x3ca, - }, - [MSR_IQ_COUNTER5] { - [MSR_CRU_ESCR1] 0x3b9, - [MSR_CRU_ESCR3] 0x3cd, - [MSR_CRU_ESCR5] 0x3e1, - [MSR_IQ_ESCR1] 0x3bb, - [MSR_RAT_ESCR1] 0x3bd, - [MSR_AFL_ESCR1] 0x3cb, - }, -}; -#define PMC_ESCR_ADDR(id,esid) pmc_escr_addr_table[id][esid] - -typedef struct { - pmc_id_t id_max; /* Maximum counter id */ - pmc_machine_t machine_type; /* P6 or P4/Xeon */ - uint32_t msr_counter_base; /* First counter MSR */ - uint32_t msr_control_base; /* First control MSR */ - union { - struct { - boolean_t reserved[2]; - pmc_ovf_func_t *ovf_func[2]; - } P6; - struct { - boolean_t reserved[2]; - pmc_ovf_func_t *ovf_func[2]; - uint32_t msr_global_ctrl; - uint32_t msr_global_ovf_ctrl; - uint32_t msr_global_status; - } Core; - struct { - boolean_t reserved[18]; - pmc_ovf_func_t *ovf_func[18]; -#ifdef DEBUG - pmc_cccr_t cccr_shadow[18]; /* Last cccr set */ - pmc_counter_t counter_shadow[18]; /* Last counter set */ - uint32_t ovfs_unexpected[18]; /* Unexpected intrs */ -#endif - } P4; - }; -} pmc_table_t; - -static pmc_machine_t -_pmc_machine_type(void) -{ - i386_cpu_info_t *infop = cpuid_info(); - - if (strncmp(infop->cpuid_vendor, CPUID_VID_INTEL, sizeof(CPUID_VID_INTEL)) != 0) - return pmc_none; - - if (!pmc_is_available()) - return pmc_none; - - switch (infop->cpuid_family) { - case 0x6: - switch (infop->cpuid_model) { - case 15: - return pmc_Core; - default: - return pmc_P6; - } - case 0xf: - return pmc_P4_Xeon; - default: - return pmc_unknown; - } -} - -static void -pmc_p4_intr(void *state) -{ - pmc_table_t *pmc_table = (pmc_table_t *) x86_lcpu()->pmc; - uint32_t cccr_addr; - pmc_cccr_t cccr; - pmc_id_t id; - int my_logical_cpu = cpu_to_logical_cpu(cpu_number()); - - /* - * Scan through table for reserved counters with overflow and - * with a registered overflow function. - */ - for (id = 0; id <= pmc_table->id_max; id++) { - if (!pmc_table->P4.reserved[id]) - continue; - cccr_addr = pmc_table->msr_control_base + id; - cccr.u_u64 = rdmsr64(cccr_addr); -#ifdef DEBUG - pmc_table->P4.cccr_shadow[id] = cccr; - pmc_table->P4.counter_shadow[id].u64 = - rdmsr64(pmc_table->msr_counter_base + id); -#endif - if (cccr.u_htt.ovf == 0) - continue; - if ((cccr.u_htt.ovf_pmi_t0 == 1 && my_logical_cpu == 0) || - (cccr.u_htt.ovf_pmi_t1 == 1 && my_logical_cpu == 1)) { - if (pmc_table->P4.ovf_func[id]) { - (*pmc_table->P4.ovf_func[id])(id, state); - /* func expected to clear overflow */ - continue; - } - } - /* Clear overflow for unexpected interrupt */ -#ifdef DEBUG - pmc_table->P4.ovfs_unexpected[id]++; -#endif - } -} - -static void -pmc_p6_intr(void *state) -{ - pmc_table_t *pmc_table = (pmc_table_t *) x86_lcpu()->pmc; - pmc_id_t id; - - /* - * Can't determine which counter has overflow - * so call all registered functions. - */ - for (id = 0; id <= pmc_table->id_max; id++) - if (pmc_table->P6.reserved[id] && pmc_table->P6.ovf_func[id]) - (*pmc_table->P6.ovf_func[id])(id, state); -} - -static void -pmc_core_intr(void *state) -{ - pmc_table_t *pmc_table = (pmc_table_t *) x86_lcpu()->pmc; - pmc_id_t id; - pmc_global_status_t ovf_status; - - ovf_status.u64 = rdmsr64(pmc_table->Core.msr_global_status); - /* - * Scan through table for reserved counters with overflow and - * with a registered overflow function. - */ - for (id = 0; id <= pmc_table->id_max; id++) { - if (!pmc_table->Core.reserved[id]) - continue; - if ((id == 0 && ovf_status.fld.PMC0_overflow) || - (id == 1 && ovf_status.fld.PMC1_overflow)) { - if (pmc_table->Core.ovf_func[id]) { - (*pmc_table->Core.ovf_func[id])(id, state); - /* func expected to clear overflow */ - continue; - } - } - } -} - -void * -pmc_alloc(void) -{ - int ret; - pmc_table_t *pmc_table; - pmc_machine_t pmc_type; - - if (!pmc_inited) { - simple_lock_init(&pmc_lock, 0); - pmc_inited = TRUE; - } - - pmc_type = _pmc_machine_type(); - if (pmc_type == pmc_none) { - return NULL; - } - - ret = kmem_alloc(kernel_map, - (void *) &pmc_table, sizeof(pmc_table_t)); - if (ret != KERN_SUCCESS) - panic("pmc_init() kmem_alloc returned %d\n", ret); - bzero((void *)pmc_table, sizeof(pmc_table_t)); - - pmc_table->machine_type = pmc_type; - switch (pmc_type) { - case pmc_P4_Xeon: - pmc_table->id_max = 17; - pmc_table->msr_counter_base = MSR_COUNTER_ADDR(0); - pmc_table->msr_control_base = MSR_CCCR_ADDR(0); - lapic_set_pmi_func((i386_intr_func_t) &pmc_p4_intr); - break; - case pmc_Core: - pmc_table->id_max = 1; - pmc_table->msr_counter_base = MSR_IA32_PMC(0); - pmc_table->msr_control_base = MSR_IA32_PERFEVTSEL(0); - pmc_table->Core.msr_global_ctrl = MSR_PERF_GLOBAL_CTRL; - pmc_table->Core.msr_global_ovf_ctrl = MSR_PERF_GLOBAL_OVF_CTRL; - pmc_table->Core.msr_global_status = MSR_PERF_GLOBAL_STATUS; - lapic_set_pmi_func((i386_intr_func_t) &pmc_core_intr); - break; - case pmc_P6: - pmc_table->id_max = 1; - pmc_table->msr_counter_base = MSR_P6_COUNTER_ADDR(0); - pmc_table->msr_control_base = MSR_P6_PES_ADDR(0); - lapic_set_pmi_func((i386_intr_func_t) &pmc_p6_intr); - break; - default: - break; - } - DBG("pmc_alloc() type=%d msr_counter_base=%p msr_control_base=%p\n", - pmc_table->machine_type, - (void *) pmc_table->msr_counter_base, - (void *) pmc_table->msr_control_base); - return (void *) pmc_table; -} - - -static inline pmc_table_t * -pmc_table_valid(pmc_id_t id) -{ - x86_lcpu_t *my_lcpu = x86_lcpu(); - pmc_table_t *pmc; - - assert(my_lcpu != NULL); - - pmc = (pmc_table_t *) my_lcpu->pmc; - if ((pmc == NULL) || - (id > pmc->id_max) || - (pmc->machine_type == pmc_P4_Xeon && !pmc->P4.reserved[id]) || - (pmc->machine_type == pmc_P6 && !pmc->P6.reserved[id]) || - (pmc->machine_type == pmc_Core && !pmc->Core.reserved[id])) - return NULL; - return pmc; -} - -int -pmc_machine_type(pmc_machine_t *type) -{ - x86_lcpu_t *my_lcpu = x86_lcpu(); - pmc_table_t *pmc_table; - - assert(my_lcpu != NULL); - - pmc_table = (pmc_table_t *) my_lcpu->pmc; - if (pmc_table == NULL) - return KERN_FAILURE; - - *type = pmc_table->machine_type; - - return KERN_SUCCESS; -} - -int -pmc_reserve(pmc_id_t id) -{ - x86_lcpu_t *my_lcpu = x86_lcpu(); - pmc_table_t *pmc_table; - - assert(my_lcpu != NULL); - - pmc_table = (pmc_table_t *) my_lcpu->pmc; - if (pmc_table == NULL) - return KERN_FAILURE; - if (id > pmc_table->id_max) - return KERN_INVALID_ARGUMENT; - switch (pmc_table->machine_type) { - case pmc_P4_Xeon: - if (pmc_table->P4.reserved[id]) - return KERN_FAILURE; - pmc_table->P4.reserved[id] = TRUE; - return KERN_SUCCESS; - case pmc_P6: - if (pmc_table->P6.reserved[id]) - return KERN_FAILURE; - pmc_table->P6.reserved[id] = TRUE; - return KERN_SUCCESS; - case pmc_Core: - if (pmc_table->Core.reserved[id]) - return KERN_FAILURE; - pmc_table->Core.reserved[id] = TRUE; - pmc_global_ctrl_t ctrl; - ctrl.u64 = rdmsr64(pmc_table->Core.msr_global_ctrl); - if (id == 0) - ctrl.fld.PMC0_enable = 1; - else - ctrl.fld.PMC1_enable = 1; - wrmsr64(pmc_table->Core.msr_global_ctrl, ctrl.u64); - return KERN_SUCCESS; - default: - return KERN_FAILURE; - } -} - -boolean_t -pmc_is_reserved(pmc_id_t id) -{ - return pmc_table_valid(id) != NULL; -} - -int -pmc_free(pmc_id_t id) -{ - pmc_table_t *pmc_table = pmc_table_valid(id); - - if (pmc_table == NULL) - return KERN_INVALID_ARGUMENT; - - pmc_cccr_write(id, 0x0ULL); - switch (pmc_table->machine_type) { - case pmc_P4_Xeon: - pmc_table->P4.reserved[id] = FALSE; - pmc_table->P4.ovf_func[id] = NULL; - break; - case pmc_P6: - pmc_table->P6.reserved[id] = FALSE; - pmc_table->P6.ovf_func[id] = NULL; - break; - case pmc_Core: - pmc_table->Core.reserved[id] = FALSE; - pmc_table->Core.ovf_func[id] = NULL; - pmc_global_ctrl_t ctrl; - ctrl.u64 = rdmsr64(pmc_table->Core.msr_global_ctrl); - if (id == 0) - ctrl.fld.PMC0_enable = 0; - else - ctrl.fld.PMC1_enable = 0; - wrmsr64(pmc_table->Core.msr_global_ctrl, ctrl.u64); - break; - default: - return KERN_INVALID_ARGUMENT; - } - - return KERN_SUCCESS; -} - -int -pmc_counter_read(pmc_id_t id, pmc_counter_t *val) -{ - pmc_table_t *pmc_table = pmc_table_valid(id); - - if (pmc_table == NULL) - return KERN_INVALID_ARGUMENT; - - *(uint64_t *)val = rdmsr64(pmc_table->msr_counter_base + id); - - return KERN_SUCCESS; -} - -int -pmc_counter_write(pmc_id_t id, pmc_counter_t *val) -{ - pmc_table_t *pmc_table = pmc_table_valid(id); - - if (pmc_table == NULL) - return KERN_INVALID_ARGUMENT; - - wrmsr64(pmc_table->msr_counter_base + id, *(uint64_t *)val); - - return KERN_SUCCESS; -} - -int -pmc_cccr_read(pmc_id_t id, pmc_cccr_t *cccr) -{ - pmc_table_t *pmc_table = pmc_table_valid(id); - - if (pmc_table == NULL) - return KERN_INVALID_ARGUMENT; - - if (pmc_table->machine_type != pmc_P4_Xeon) - return KERN_FAILURE; - - *(uint64_t *)cccr = rdmsr64(pmc_table->msr_control_base + id); - - return KERN_SUCCESS; -} - -int -pmc_cccr_write(pmc_id_t id, pmc_cccr_t *cccr) -{ - pmc_table_t *pmc_table = pmc_table_valid(id); - - if (pmc_table == NULL) - return KERN_INVALID_ARGUMENT; - - if (pmc_table->machine_type != pmc_P4_Xeon) - return KERN_FAILURE; - - wrmsr64(pmc_table->msr_control_base + id, *(uint64_t *)cccr); - - return KERN_SUCCESS; -} - -int -pmc_evtsel_read(pmc_id_t id, pmc_evtsel_t *evtsel) -{ - pmc_table_t *pmc_table = pmc_table_valid(id); - - if (pmc_table == NULL) - return KERN_INVALID_ARGUMENT; - - if (!(pmc_table->machine_type == pmc_P6 || - pmc_table->machine_type == pmc_Core)) - return KERN_FAILURE; - - evtsel->u64 = rdmsr64(pmc_table->msr_control_base + id); - - return KERN_SUCCESS; -} - -int -pmc_evtsel_write(pmc_id_t id, pmc_evtsel_t *evtsel) -{ - pmc_table_t *pmc_table = pmc_table_valid(id); - - if (pmc_table == NULL) - return KERN_INVALID_ARGUMENT; - - if (!(pmc_table->machine_type == pmc_P6 || - pmc_table->machine_type == pmc_Core)) - return KERN_FAILURE; - - wrmsr64(pmc_table->msr_control_base + id, evtsel->u64); - - return KERN_SUCCESS; -} - -int -pmc_escr_read(pmc_id_t id, pmc_escr_id_t esid, pmc_escr_t *escr) -{ - uint32_t addr; - pmc_table_t *pmc_table = pmc_table_valid(id); - - if (pmc_table == NULL) - return KERN_INVALID_ARGUMENT; - - if (pmc_table->machine_type != pmc_P4_Xeon) - return KERN_FAILURE; - - if (esid > PMC_ESID_MAX) - return KERN_INVALID_ARGUMENT; - - addr = PMC_ESCR_ADDR(id, esid); - if (addr == 0) - return KERN_INVALID_ARGUMENT; - - *(uint64_t *)escr = rdmsr64(addr); - - return KERN_SUCCESS; -} - -int -pmc_escr_write(pmc_id_t id, pmc_escr_id_t esid, pmc_escr_t *escr) -{ - uint32_t addr; - pmc_table_t *pmc_table = pmc_table_valid(id); - - if (pmc_table == NULL) - return KERN_FAILURE; - - if (pmc_table->machine_type != pmc_P4_Xeon) - return KERN_FAILURE; - - if (esid > PMC_ESID_MAX) - return KERN_INVALID_ARGUMENT; - - addr = PMC_ESCR_ADDR(id, esid); - if (addr == 0) - return KERN_INVALID_ARGUMENT; - - wrmsr64(addr, *(uint64_t *)escr); - - return KERN_SUCCESS; -} - -int -pmc_set_ovf_func(pmc_id_t id, pmc_ovf_func_t func) -{ - pmc_table_t *pmc_table = pmc_table_valid(id); - - if (pmc_table == NULL) - return KERN_INVALID_ARGUMENT; - - switch (pmc_table->machine_type) { - case pmc_P4_Xeon: - pmc_table->P4.ovf_func[id] = func; - break; - case pmc_P6: - pmc_table->P6.ovf_func[id] = func; - break; - case pmc_Core: - pmc_table->Core.ovf_func[id] = func; - break; - default: - return KERN_INVALID_ARGUMENT; - } - - return KERN_SUCCESS; -} - -int -pmc_acquire(task_t task) -{ - kern_return_t retval = KERN_SUCCESS; - - if (!pmc_inited) - return KERN_FAILURE; - - simple_lock(&pmc_lock); - - if(pmc_owner == task) { - DBG("pmc_acquire - " - "ACQUIRED: already owner\n"); - retval = KERN_SUCCESS; - /* already own it */ - } else if(pmc_owner == TASK_NULL) { /* no one owns it */ - pmc_owner = task; - pmc_thread_count = 0; - DBG("pmc_acquire - " - "ACQUIRED: no current owner - made new owner\n"); - retval = KERN_SUCCESS; - } else { /* someone already owns it */ - if(pmc_owner == kernel_task) { - if(pmc_thread_count == 0) { - /* kernel owns it but no threads using it */ - pmc_owner = task; - pmc_thread_count = 0; - DBG("pmc_acquire - " - "ACQUIRED: owned by kernel, no threads\n"); - retval = KERN_SUCCESS; - } else { - DBG("pmc_acquire - " - "DENIED: owned by kernel, in use\n"); - retval = KERN_RESOURCE_SHORTAGE; - } - } else { /* non-kernel owner */ - DBG("pmc_acquire - " - "DENIED: owned by another task\n"); - retval = KERN_RESOURCE_SHORTAGE; - } - } - - simple_unlock(&pmc_lock); - return retval; -} - -int -pmc_release(task_t task) -{ - kern_return_t retval = KERN_SUCCESS; - task_t old_pmc_owner = pmc_owner; - - if (!pmc_inited) - return KERN_FAILURE; - - simple_lock(&pmc_lock); - - if(task != pmc_owner) { - retval = KERN_NO_ACCESS; - } else { - if(old_pmc_owner == kernel_task) { - if(pmc_thread_count>0) { - DBG("pmc_release - " - "NOT RELEASED: owned by kernel, in use\n"); - retval = KERN_NO_ACCESS; - } else { - DBG("pmc_release - " - "RELEASED: was owned by kernel\n"); - pmc_owner = TASK_NULL; - retval = KERN_SUCCESS; - } - } else { - DBG("pmc_release - " - "RELEASED: was owned by user\n"); - pmc_owner = TASK_NULL; - retval = KERN_SUCCESS; - } - } - - simple_unlock(&pmc_lock); - return retval; -} - diff --git a/osfmk/i386/perfmon.h b/osfmk/i386/perfmon.h deleted file mode 100644 index b943d1817..000000000 --- a/osfmk/i386/perfmon.h +++ /dev/null @@ -1,424 +0,0 @@ -/* - * Copyright (c) 2003 Apple Computer, Inc. All rights reserved. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ - * - * This file contains Original Code and/or Modifications of Original Code - * as defined in and that are subject to the Apple Public Source License - * Version 2.0 (the 'License'). You may not use this file except in - * compliance with the License. The rights granted to you under the License - * may not be used to create, or enable the creation or redistribution of, - * unlawful or unlicensed copies of an Apple operating system, or to - * circumvent, violate, or enable the circumvention or violation of, any - * terms of an Apple operating system software license agreement. - * - * Please obtain a copy of the License at - * http://www.opensource.apple.com/apsl/ and read it before using this file. - * - * The Original Code and all software distributed under the License are - * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER - * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, - * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. - * Please see the License for the specific language governing rights and - * limitations under the License. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ - */ -#ifndef _I386_PERFMON_H_ -#define _I386_PERFMON_H_ - -#include - -/* - * Handy macros for bit/bitfield definition and manipulations: - */ -#define bit(n) (1ULL << (n)) -#define field(n,m) ((bit((m)+1)-1) & ~(bit(n)-1)) -#define field_nbit(fld) (ffs(fld)-1) -#define field_select(fld,x) ((x) & (fld)) -#define field_clear(fld,x) ((x) & ~(fld)) -#define field_unshift(fld,x) ((x) >> field_nbit(fld)) -#define field_shift(fld,x) ((x) << field_nbit(fld)) -#define field_get(fld,x) (field_unshift(fld,field_select(fld,x))) -#define field_set(fld,x,val) (field_clear(fld,x) | field_shift(fld,val)) - -#define PERFMON_AVAILABLE bit(7) -#define BTS_UNAVAILABLE bit(11) - -static inline boolean_t -pmc_is_available(void) -{ - uint32_t lo; - uint32_t hi; - int ret; - - ret = rdmsr_carefully(MSR_IA32_MISC_ENABLE, &lo, &hi); - - return (ret == 0) && ((lo & PERFMON_AVAILABLE) != 0); -} - -/* - * Counter layout: - */ -#define PMC_COUNTER_COUNTER field(0,39) -#define PMC_COUNTER_RESERVED field(40,64) -#define PMC_COUNTER_MAX ((uint64_t) PMC_COUNTER_COUNTER) -typedef union { - struct { - uint64_t counter : 40; - uint64_t reserved : 24; - } fld; - uint64_t u64; -} pmc_counter_t; -#define PMC_COUNTER_ZERO { .u64 = 0 } - - -/* - * There are 2 basic flavors of PMCs: P6 (or Core) and P4/Xeon: - */ -typedef enum { - pmc_none = 0, - pmc_P6, - pmc_P4_Xeon, - pmc_Core, - pmc_unknown -} pmc_machine_t; - -#define MSR_PERF_FIXED_CTR(n) (0x309 + (n)) -#define MSR_PERF_INST_RETIRED MSR_PERF_FIXED_CTR(0) -#define MSR_PERF_CPU_CLK_UNHALTED_CORE MSR_PERF_FIXED_CTR(1) -#define MSR_PERF_CPU_CLK_UNHALTED_REF MSR_PERF_FIXED_CTR(2) - -#define MSR_PERF_FIXED_CTR_CTRL (0x38d) -typedef union { - struct { - uint64_t FIXED_CTR0_enable :2; - uint64_t reserved0 :1; - uint64_t FIXED_CTR0_pmi :1; - uint64_t FIXED_CTR1_enable :2; - uint64_t reserved1 :1; - uint64_t FIXED_CTR1_pmi :1; - uint64_t FIXED_CTR2_enable :2; - uint64_t reserved2 :1; - uint64_t FIXED_CTR2_pmi :1; - } fld; - uint64_t u64; -} pmc_fixed_ctr_ctrl_t; - -#define MSR_PERF_GLOBAL_STATUS (0x38e) -typedef union { - struct { - uint64_t PMC0_overflow : 1; - uint64_t PMC1_overflow : 1; - uint64_t reserved1 : 30; - uint64_t FIXED_CTR0_overflow : 1; - uint64_t FIXED_CTR1_overflow : 1; - uint64_t FIXED_CTR2_overflow : 1; - uint64_t reserved2 : 27; - uint64_t ovf_buffer : 1; - uint64_t cond_changed : 1; - } fld; - uint64_t u64; -} pmc_global_status_t; - -#define MSR_PERF_GLOBAL_CTRL (0x38f) -typedef union { - struct { - uint64_t PMC0_enable : 1; - uint64_t PMC1_enable : 1; - uint64_t reserved1 : 30; - uint64_t FIXED_CTR0_enable : 1; - uint64_t FIXED_CTR1_enable : 1; - uint64_t FIXED_CTR2_enable : 1; - } fld; - uint64_t u64; -} pmc_global_ctrl_t; - -#define MSR_PERF_GLOBAL_OVF_CTRL (0x390) -typedef union { - struct { - uint64_t PMC0_clr_overflow : 1; - uint64_t PMC1_clr_overflow : 1; - uint64_t reserved1 : 30; - uint64_t FIXED_CTR0_clr_overflow : 1; - uint64_t FIXED_CTR1_clr_overflow : 1; - uint64_t FIXED_CTR2_clr_overflow : 1; - uint64_t reserved2 : 27; - uint64_t clr_ovf_buffer : 1; - uint64_t clr_cond_changed : 1; - } fld; - uint64_t u64; -} pmc_global_ovf_ctrl; - -/* - * P6 MSRs... - */ -#define MSR_P6_COUNTER_ADDR(n) (0x0c1 + (n)) -#define MSR_P6_PES_ADDR(n) (0x186 + (n)) -#define MSR_IA32_PMC(n) (0x0c1 + (n)) -#define MSR_IA32_PERFEVTSEL(n) (0x186 + (n)) - -typedef union { - struct { - uint64_t event_select : 8; - uint64_t umask : 8; - uint64_t usr : 1; - uint64_t os : 1; - uint64_t e : 1; - uint64_t pc : 1; - uint64_t apic_int : 1; - uint64_t reserved1 : 1; - uint64_t en : 1; - uint64_t inv : 1; - uint64_t cmask : 8; - } fld; - uint64_t u64; -} pmc_evtsel_t; -#define PMC_EVTSEL_ZERO { .u64 = 0ULL } - - -/* - * Non-architectural event selectors. See Vol 3b, section 18.13: - */ -#define PMC_EVTSEL_ALLCORES (bit(15)|bit(14)) -#define PMC_EVTSEL_THISCORE (bit(14)) -#define PMC_EVTSEL_ALLAGENTS (bit(14)) -#define PMC_EVTSEL_THISAGENT (0ULL) } -#define PMC_EVTSEL_PREFETCH_ALL (bit(13)|bit(12)) -#define PMC_EVTSEL_PREFETCH_HW (bit(12)) -#define PMC_EVTSEL_PREFETCH_NOHW (0ULL) -#define PMC_EVTSEL_MESI_MOD (bit(11)) -#define PMC_EVTSEL_MESI_EXCL (bit(10)) -#define PMC_EVTSEL_MESI_SHRD (bit(9)) -#define PMC_EVTSEL_MESI_INVAL (bit(8)) - -#define PMC_EVTSEL_SNOOP_HITM (bit(11)) -#define PMC_EVTSEL_SNOOP_HIT (bit(9)) -#define PMC_EVTSEL_SNOOP_CLEAN (bit(8)) -#define PMC_EVTSEL_SNOOP_CMP2I (bit(9)) -#define PMC_EVTSEL_SNOOP_CMP2S (bit(8)) - -#define PMC_CORE_ITLB_MISS_RETIRED \ - { .fld.event_select = 0xc9, .fld.umask = 0 } -#define PMC_CORE_MEM_LOAD_RETIRED_L1D_MISS \ - { .fld.event_select = 0xcb, .fld.umask = 1 } -#define PMC_CORE_MEM_LOAD_RETIRED_L1D_LINE_MISS \ - { .fld.event_select = 0xcb, .fld.umask = 2 } -#define PMC_CORE_MEM_LOAD_RETIRED_L2_MISS \ - { .fld.event_select = 0xcb, .fld.umask = 4 } -#define PMC_CORE_MEM_LOAD_RETIRED_L2_LINE_MISS \ - { .fld.event_select = 0xcb, .fld.umask = 8 } -#define PMC_CORE_MEM_LOAD_RETIRED_DTLB_MISS \ - { .fld.event_select = 0xcb, .fld.umask = 0x10 } - -#define MSR_P6_PERFCTR0 0 -#define MSR_P6_PERFCTR1 1 - -/* - * P4/Xeon MSRs... - */ -#define MSR_COUNTER_ADDR(n) (0x300 + (n)) -#define MSR_CCCR_ADDR(n) (0x360 + (n)) - -typedef enum { - MSR_BPU_COUNTER0 = 0, - MSR_BPU_COUNTER1 = 1, - #define MSR_BSU_ESCR0 7 - #define MSR_FSB_ESCR0 6 - #define MSR_MOB_ESCR0 2 - #define MSR_PMH_ESCR0 4 - #define MSR_BPU_ESCR0 0 - #define MSR_IS_ESCR0 1 - #define MSR_ITLB_ESCR0 3 - #define MSR_IX_ESCR0 5 - MSR_BPU_COUNTER2 = 2, - MSR_BPU_COUNTER3 = 3, - #define MSR_BSU_ESCR1 7 - #define MSR_FSB_ESCR1 6 - #define MSR_MOB_ESCR1 2 - #define MSR_PMH_ESCR1 4 - #define MSR_BPU_ESCR1 0 - #define MSR_IS_ESCR1 1 - #define MSR_ITLB_ESCR1 3 - #define MSR_IX_ESCR1 5 - MSR_MS_COUNTER0 = 4, - MSR_MS_COUNTER1 = 5, - #define MSR_MS_ESCR0 0 - #define MSR_TBPU_ESCR0 2 - #define MSR_TC_ESCR0 1 - MSR_MS_COUNTER2 = 6, - MSR_MS_COUNTER3 = 7, - #define MSR_MS_ESCR1 0 - #define MSR_TBPU_ESCR1 2 - #define MSR_TC_ESCR1 1 - MSR_FLAME_COUNTER0 = 8, - MSR_FLAME_COUNTER1 = 9, - #define MSR_FIRM_ESCR0 1 - #define MSR_FLAME_ESCR0 0 - #define MSR_DAC_ESCR0 5 - #define MSR_SAT_ESCR0 2 - #define MSR_U2L_ESCR0 3 - MSR_FLAME_COUNTER2 = 10, - MSR_FLAME_COUNTER3 = 11, - #define MSR_FIRM_ESCR1 1 - #define MSR_FLAME_ESCR1 0 - #define MSR_DAC_ESCR1 5 - #define MSR_SAT_ESCR1 2 - #define MSR_U2L_ESCR1 3 - MSR_IQ_COUNTER0 = 12, - MSR_IQ_COUNTER1 = 13, - MSR_IQ_COUNTER4 = 16, - #define MSR_CRU_ESCR0 4 - #define MSR_CRU_ESCR2 5 - #define MSR_CRU_ESCR4 6 - #define MSR_IQ_ESCR0 0 - #define MSR_RAT_ESCR0 2 - #define MSR_SSU_ESCR0 3 - #define MSR_AFL_ESCR0 1 - MSR_IQ_COUNTER2 = 14, - MSR_IQ_COUNTER3 = 15, - MSR_IQ_COUNTER5 = 17, - #define MSR_CRU_ESCR1 4 - #define MSR_CRU_ESCR3 5 - #define MSR_CRU_ESCR5 6 - #define MSR_IQ_ESCR1 0 - #define MSR_RAT_ESCR1 2 - #define MSR_AFL_ESCR1 1 -} pmc_id_t; - -typedef int pmc_escr_id_t; -#define PMC_ESID_MAX 7 - -/* - * ESCR MSR layout: - */ -#define PMC_ECSR_NOHTT_RESERVED field(0,1) -#define PMC_ECSR_T0_USR bit(0) -#define PMC_ECSR_T0_OS bit(1) -#define PMC_ECSR_T1_USR bit(2) -#define PMC_ECSR_T1_OS bit(3) -#define PMC_ECSR_USR bit(2) -#define PMC_ECSR_OS bit(3) -#define PMC_ECSR_TAG_ENABLE bit(4) -#define PMC_ECSR_TAG_VALUE field(5,8) -#define PMC_ECSR_EVENT_MASK field(9,24) -#define PMC_ECSR_EVENT_SELECT field(25,30) -#define PMC_ECSR_RESERVED2 field(30,64) -typedef struct { - uint64_t reserved1 : 2; - uint64_t usr : 1; - uint64_t os : 1; - uint64_t tag_enable : 1; - uint64_t tag_value : 4; - uint64_t event_mask : 16; - uint64_t event_select : 6; - uint64_t reserved2 : 33; -} pmc_escr_nohtt_t; -typedef struct { - uint64_t t0_usr : 1; - uint64_t t0_os : 1; - uint64_t t1_usr : 1; - uint64_t t1_os : 1; - uint64_t tag_enable : 1; - uint64_t tag_value : 4; - uint64_t event_mask : 16; - uint64_t event_select : 6; - uint64_t reserved2 : 33; -} pmc_escr_htt_t; -typedef union { - pmc_escr_nohtt_t u_nohtt; - pmc_escr_htt_t u_htt; - uint64_t u_u64; -} pmc_escr_t; -#define PMC_ESCR_ZERO { .u_u64 = 0ULL } - -/* - * CCCR MSR layout: - */ -#define PMC_CCCR_RESERVED1 field(1,11) -#define PMC_CCCR_ENABLE bit(12) -#define PMC_CCCR_ECSR_SELECT field(13,15) -#define PMC_CCCR_RESERVED2 field(16,17) -#define PMC_CCCR_HTT_ACTIVE field(16,17) -#define PMC_CCCR_COMPARE bit(18) -#define PMC_CCCR_COMPLEMENT bit(19) -#define PMC_CCCR_THRESHOLD field(20,23) -#define PMC_CCCR_EDGE bit(24) -#define PMC_CCCR_FORCE_OVF bit(25) -#define PMC_CCCR_OVF_PMI bit(26) -#define PMC_CCCR_NOHTT_RESERVED2 field(27,29) -#define PMC_CCCR_OVF_PMI_T0 bit(26) -#define PMC_CCCR_OVF_PMI_T1 bit(27) -#define PMC_CCCR_HTT_RESERVED2 field(28,29) -#define PMC_CCCR_CASCADE bit(30) -#define PMC_CCCR_OVF bit(31) -typedef struct { - uint64_t reserved1 : 12; - uint64_t enable : 1; - uint64_t escr_select : 3; - uint64_t reserved2 : 2; - uint64_t compare : 1; - uint64_t complement : 1; - uint64_t threshold : 4; - uint64_t edge : 1; - uint64_t force_ovf : 1; - uint64_t ovf_pmi : 1; - uint64_t reserved3 : 3; - uint64_t cascade : 1; - uint64_t ovf : 1; - uint64_t reserved4 : 32; -} pmc_cccr_nohtt_t; -typedef struct { - uint64_t reserved1 : 12; - uint64_t enable : 1; - uint64_t escr_select : 3; - uint64_t active_thread : 2; - uint64_t compare : 1; - uint64_t complement : 1; - uint64_t threshold : 4; - uint64_t edge : 1; - uint64_t force_OVF : 1; - uint64_t ovf_pmi_t0 : 1; - uint64_t ovf_pmi_t1 : 1; - uint64_t reserved3 : 2; - uint64_t cascade : 1; - uint64_t ovf : 1; - uint64_t reserved4 : 32; -} pmc_cccr_htt_t; -typedef union { - pmc_cccr_nohtt_t u_nohtt; - pmc_cccr_htt_t u_htt; - uint64_t u_u64; -} pmc_cccr_t; -#define PMC_CCCR_ZERO { .u_u64 = 0ULL } - -typedef void (pmc_ovf_func_t)(pmc_id_t id, void *state); - -/* - * In-kernel PMC access primitives: - */ -/* Generic: */ -extern void *pmc_alloc(void); -extern int pmc_machine_type(pmc_machine_t *type); -extern boolean_t pmc_is_reserved(pmc_id_t id); -extern int pmc_reserve(pmc_id_t id); -extern int pmc_free(pmc_id_t id); -extern int pmc_counter_read(pmc_id_t id, pmc_counter_t *val); -extern int pmc_counter_write(pmc_id_t id, pmc_counter_t *val); - -/* P6-specific: */ -extern int pmc_evtsel_read(pmc_id_t id, pmc_evtsel_t *evtsel); -extern int pmc_evtsel_write(pmc_id_t id, pmc_evtsel_t *evtsel); - -/* P4/Xeon-specific: */ -extern int pmc_cccr_read(pmc_id_t id, pmc_cccr_t *cccr); -extern int pmc_cccr_write(pmc_id_t id, pmc_cccr_t *cccr); -extern int pmc_escr_read(pmc_id_t id, pmc_escr_id_t esid, pmc_escr_t *escr); -extern int pmc_escr_write(pmc_id_t id, pmc_escr_id_t esid, pmc_escr_t *escr); -extern int pmc_set_ovf_func(pmc_id_t id, pmc_ovf_func_t *func); - -extern int pmc_acquire(task_t); -extern int pmc_release(task_t); - -#endif /* _I386_PERFMON_H_ */ diff --git a/osfmk/i386/phys.c b/osfmk/i386/phys.c index 7269aa283..c8a2f5206 100644 --- a/osfmk/i386/phys.c +++ b/osfmk/i386/phys.c @@ -93,7 +93,7 @@ */ void pmap_zero_page( - ppnum_t pn) + ppnum_t pn) { assert(pn != vm_page_fictitious_addr); assert(pn != vm_page_guard_addr); @@ -106,14 +106,14 @@ pmap_zero_page( */ void pmap_zero_part_page( - ppnum_t pn, + ppnum_t pn, vm_offset_t offset, vm_size_t len) { assert(pn != vm_page_fictitious_addr); assert(pn != vm_page_guard_addr); assert(offset + len <= PAGE_SIZE); - bzero_phys((addr64_t)(i386_ptob(pn) + offset), len); + bzero_phys((addr64_t)(i386_ptob(pn) + offset), (uint32_t)len); } /* @@ -137,8 +137,8 @@ pmap_copy_part_page( src = i386_ptob(psrc); dst = i386_ptob(pdst); - assert((((uint32_t)dst & PAGE_MASK) + dst_offset + len) <= PAGE_SIZE); - assert((((uint32_t)src & PAGE_MASK) + src_offset + len) <= PAGE_SIZE); + assert((((uintptr_t)dst & PAGE_MASK) + dst_offset + len) <= PAGE_SIZE); + assert((((uintptr_t)src & PAGE_MASK) + src_offset + len) <= PAGE_SIZE); bcopy_phys((addr64_t)src + (src_offset & INTEL_OFFMASK), (addr64_t)dst + (dst_offset & INTEL_OFFMASK), @@ -151,17 +151,20 @@ pmap_copy_part_page( */ void pmap_copy_part_lpage( - vm_offset_t src, - ppnum_t pdst, - vm_offset_t dst_offset, - vm_size_t len) + __unused vm_offset_t src, + __unused ppnum_t pdst, + __unused vm_offset_t dst_offset, + __unused vm_size_t len) { +#ifdef __i386__ mapwindow_t *map; +#endif assert(pdst != vm_page_fictitious_addr); assert(pdst != vm_page_guard_addr); assert((dst_offset + len) <= PAGE_SIZE); +#ifdef __i386__ mp_disable_preemption(); map = pmap_get_mapwindow(INTEL_PTE_VALID | INTEL_PTE_RW | (i386_ptob(pdst) & PG_FRAME) | @@ -172,6 +175,7 @@ pmap_copy_part_lpage( pmap_put_mapwindow(map); mp_enable_preemption(); +#endif } /* @@ -180,17 +184,20 @@ pmap_copy_part_lpage( */ void pmap_copy_part_rpage( - ppnum_t psrc, - vm_offset_t src_offset, - vm_offset_t dst, - vm_size_t len) + __unused ppnum_t psrc, + __unused vm_offset_t src_offset, + __unused vm_offset_t dst, + __unused vm_size_t len) { +#ifdef __i386__ mapwindow_t *map; +#endif assert(psrc != vm_page_fictitious_addr); assert(psrc != vm_page_guard_addr); assert((src_offset + len) <= PAGE_SIZE); +#ifdef __i386__ mp_disable_preemption(); map = pmap_get_mapwindow(INTEL_PTE_VALID | INTEL_PTE_RW | (i386_ptob(psrc) & PG_FRAME) | @@ -201,6 +208,7 @@ pmap_copy_part_rpage( pmap_put_mapwindow(map); mp_enable_preemption(); +#endif } /* @@ -212,17 +220,56 @@ addr64_t kvtophys( vm_offset_t addr) { - pt_entry_t *ptep; pmap_paddr_t pa; - - mp_disable_preemption(); - if ((ptep = pmap_pte(kernel_pmap, (vm_map_offset_t)addr)) == PT_ENTRY_NULL) { - pa = 0; - } else { - pa = pte_to_pa(*ptep) | (addr & INTEL_OFFMASK); - } - mp_enable_preemption_no_check(); + + pa = ((pmap_paddr_t)pmap_find_phys(kernel_pmap, addr)) << INTEL_PGSHIFT; + if (pa) + pa |= (addr & INTEL_OFFMASK); return ((addr64_t)pa); } +__private_extern__ void ml_copy_phys(addr64_t src64, addr64_t dst64, vm_size_t bytes) { + void *src, *dst; + + mp_disable_preemption(); +#if NCOPY_WINDOWS > 0 + mapwindow_t *src_map, *dst_map; + /* We rely on MTRRs here */ + src_map = pmap_get_mapwindow((pt_entry_t)(INTEL_PTE_VALID | ((pmap_paddr_t)src64 & PG_FRAME) | INTEL_PTE_REF)); + dst_map = pmap_get_mapwindow((pt_entry_t)(INTEL_PTE_VALID | INTEL_PTE_RW | ((pmap_paddr_t)dst64 & PG_FRAME) | INTEL_PTE_REF | INTEL_PTE_MOD)); + src = (void *) ((uintptr_t)src_map->prv_CADDR | ((uint32_t)src64 & INTEL_OFFMASK)); + dst = (void *) ((uintptr_t)dst_map->prv_CADDR | ((uint32_t)dst64 & INTEL_OFFMASK)); +#elif defined(__x86_64__) + src = PHYSMAP_PTOV(src64); + dst = PHYSMAP_PTOV(dst64); +#endif + /* ensure we stay within a page */ + if (((((uint32_t)src64 & (I386_PGBYTES-1)) + bytes) > I386_PGBYTES) || ((((uint32_t)dst64 & (I386_PGBYTES-1)) + bytes) > I386_PGBYTES) ) { + panic("ml_copy_phys spans pages, src: 0x%llx, dst: 0x%llx", src64, dst64); + } + + switch (bytes) { + case 1: + *((uint8_t *) dst) = *((uint8_t *) src); + break; + case 2: + *((uint16_t *) dst) = *((uint16_t *) src); + break; + case 4: + *((uint32_t *) dst) = *((uint32_t *) src); + break; + /* Should perform two 32-bit reads */ + case 8: + *((uint64_t *) dst) = *((uint64_t *) src); + break; + default: + bcopy(src, dst, bytes); + break; + } +#if NCOPY_WINDOWS > 0 + pmap_put_mapwindow(src_map); + pmap_put_mapwindow(dst_map); +#endif + mp_enable_preemption(); +} diff --git a/osfmk/i386/pio.h b/osfmk/i386/pio.h index 48cf259ae..b90616545 100644 --- a/osfmk/i386/pio.h +++ b/osfmk/i386/pio.h @@ -58,66 +58,5 @@ #ifndef I386_PIO_H #define I386_PIO_H #include - -#if !MACH_ASSERT #include -#else -typedef unsigned short i386_ioport_t; - -/* read a longword */ -extern unsigned long inl( - i386_ioport_t port); -/* read a shortword */ -extern unsigned short inw( - i386_ioport_t port); -/* read a byte */ -extern unsigned char inb( - i386_ioport_t port); -/* write a longword */ -extern void outl( - i386_ioport_t port, - unsigned long datum); -/* write a word */ -extern void outw( - i386_ioport_t port, - unsigned short datum); -/* write a longword */ -extern void outb( - i386_ioport_t port, - unsigned char datum); - -/* input an array of longwords */ -extern void linl( - i386_ioport_t port, - int * data, - int count); -/* output an array of longwords */ -extern void loutl( - i386_ioport_t port, - int * data, - int count); - -/* input an array of words */ -extern void linw( - i386_ioport_t port, - int * data, - int count); -/* output an array of words */ -extern void loutw( - i386_ioport_t port, - int * data, - int count); - -/* input an array of bytes */ -extern void linb( - i386_ioport_t port, - char * data, - int count); -/* output an array of bytes */ -extern void loutb( - i386_ioport_t port, - char * data, - int count); -#endif /* !MACH_ASSERT */ - #endif /* I386_PIO_H */ diff --git a/osfmk/i386/pmCPU.c b/osfmk/i386/pmCPU.c index 8decbb943..e3142fe4c 100644 --- a/osfmk/i386/pmCPU.c +++ b/osfmk/i386/pmCPU.c @@ -31,14 +31,14 @@ * * Implements the "wrappers" to the KEXT. */ -#include -#include -#include -#include -#include #include +#include #include +#include #include +#include +#include +#include #include #include #include @@ -46,6 +46,7 @@ #include #include #include +#include /* * Kernel parameter determining whether threads are halted unconditionally @@ -106,7 +107,6 @@ machine_idle(void) goto out; my_cpu->lcpu.state = LCPU_IDLE; - my_cpu->lcpu.flags |= X86CORE_FL_IDLE; DBGLOG(cpu_handle, cpu_number(), MP_IDLE); MARK_CPU_IDLE(cpu_number()); @@ -130,7 +130,6 @@ machine_idle(void) */ MARK_CPU_ACTIVE(cpu_number()); DBGLOG(cpu_handle, cpu_number(), MP_UNIDLE); - my_cpu->lcpu.flags &= ~(X86CORE_FL_IDLE | X86CORE_FL_WAKEUP); my_cpu->lcpu.state = LCPU_RUN; /* @@ -164,7 +163,7 @@ pmCPUHalt(uint32_t reason) default: __asm__ volatile ("cli"); - if (pmInitDone + if (pmInitDone && pmDispatch != NULL && pmDispatch->pmCPUHalt != NULL) { /* @@ -284,7 +283,7 @@ pmCPUGetDeadline(cpu_data_t *cpu) { uint64_t deadline = EndOfAllTime; - if (pmInitDone + if (pmInitDone && pmDispatch != NULL && pmDispatch->GetDeadline != NULL) deadline = (*pmDispatch->GetDeadline)(&cpu->lcpu); @@ -299,7 +298,7 @@ pmCPUGetDeadline(cpu_data_t *cpu) uint64_t pmCPUSetDeadline(cpu_data_t *cpu, uint64_t deadline) { - if (pmInitDone + if (pmInitDone && pmDispatch != NULL && pmDispatch->SetDeadline != NULL) deadline = (*pmDispatch->SetDeadline)(&cpu->lcpu, deadline); @@ -327,7 +326,6 @@ pmCPUExitIdle(cpu_data_t *cpu) { boolean_t do_ipi; - cpu->lcpu.flags |= X86CORE_FL_WAKEUP; if (pmInitDone && pmDispatch != NULL && pmDispatch->exitIdle != NULL) @@ -335,9 +333,6 @@ pmCPUExitIdle(cpu_data_t *cpu) else do_ipi = TRUE; - if (do_ipi) - cpu->lcpu.flags &= ~X86CORE_FL_WAKEUP; - return(do_ipi); } @@ -618,6 +613,12 @@ pmReSyncDeadlines(int cpu) cpu_PM_interrupt(cpu); } +static void +pmSendIPI(int cpu) +{ + lapic_send_ipi(cpu, LAPIC_PM_INTERRUPT); +} + /* * Called by the power management kext to register itself and to get the * callbacks it might need into other kernel functions. This interface @@ -646,6 +647,7 @@ pmKextRegister(uint32_t version, pmDispatch_t *cpuFuncs, callbacks->LCPUtoProcessor = pmLCPUtoProcessor; callbacks->ThreadBind = thread_bind; callbacks->GetSavedRunCount = pmGetSavedRunCount; + callbacks->pmSendIPI = pmSendIPI; callbacks->topoParms = &topoParms; } else { panic("Version mis-match between Kernel and CPU PM"); @@ -653,6 +655,10 @@ pmKextRegister(uint32_t version, pmDispatch_t *cpuFuncs, if (cpuFuncs != NULL) { pmDispatch = cpuFuncs; + + if (pmDispatch->pmIPIHandler != NULL) { + lapic_set_pm_func((i386_intr_func_t)pmDispatch->pmIPIHandler); + } } } diff --git a/osfmk/i386/pmCPU.h b/osfmk/i386/pmCPU.h index cbfaebe65..65c7b9e9c 100644 --- a/osfmk/i386/pmCPU.h +++ b/osfmk/i386/pmCPU.h @@ -37,19 +37,21 @@ * This value should be changed each time that pmDsipatch_t or pmCallBacks_t * changes. */ -#define PM_DISPATCH_VERSION 16 +#define PM_DISPATCH_VERSION 17 /* * Dispatch table for functions that get installed when the power * management KEXT loads. + * + * pmDispatch_t is the set of functions that the kernel can use to call + * into the power management KEXT. + * + * pmCallBacks_t is the set of functions that the power management kext + * can call to get at specific kernel functions. */ typedef struct { int (*pmCPUStateInit)(void); - - /* - * The following are the 'C' State interfaces. - */ void (*cstateInit)(void); uint64_t (*cstateMachineIdle)(uint64_t maxIdleDuration); uint64_t (*GetDeadline)(x86_lcpu_t *lcpu); @@ -72,8 +74,10 @@ typedef struct void (*markAllCPUsOff)(void); void (*pmSetRunCount)(uint32_t count); boolean_t (*pmIsCPUUnAvailable)(x86_lcpu_t *lcpu); + int (*pmIPIHandler)(void *state); } pmDispatch_t; + typedef struct { int (*setRTCPop)(uint64_t time); void (*resyncDeadlines)(int cpu); @@ -92,6 +96,7 @@ typedef struct { processor_t (*LCPUtoProcessor)(int lcpu); processor_t (*ThreadBind)(processor_t proc); uint32_t (*GetSavedRunCount)(void); + void (*pmSendIPI)(int cpu); x86_topology_parameters_t *topoParms; } pmCallBacks_t; diff --git a/osfmk/i386/pmap.c b/osfmk/i386/pmap.c index 75685d57d..311763f1f 100644 --- a/osfmk/i386/pmap.c +++ b/osfmk/i386/pmap.c @@ -124,13 +124,14 @@ #include #include #include -#include #include #include #include #include #include #include +#include +#include #if MACH_KDB #include @@ -143,8 +144,8 @@ #include #include +#include -#include /* #define DEBUGINTERRUPTS 1 uncomment to ensure pmap callers have interrupts enabled */ #ifdef DEBUGINTERRUPTS @@ -160,29 +161,11 @@ #include #endif /* IWANTTODEBUG */ -//#define PMAP_TRACES 1 -#ifdef PMAP_TRACES -boolean_t pmap_trace = FALSE; -#define PMAP_TRACE(x,a,b,c,d,e) \ - if (pmap_trace) { \ - KERNEL_DEBUG_CONSTANT(x,a,b,c,d,e); \ - } -#else -#define PMAP_TRACE(x,a,b,c,d,e) KERNEL_DEBUG(x,a,b,c,d,e) -#endif /* PMAP_TRACES */ - /* * Forward declarations for internal functions. */ -void pmap_expand_pml4( - pmap_t map, - vm_map_offset_t v); -void pmap_expand_pdpt( - pmap_t map, - vm_map_offset_t v); - -void pmap_remove_range( +void pmap_remove_range( pmap_t pmap, vm_map_offset_t va, pt_entry_t *spte, @@ -203,11 +186,6 @@ void phys_attribute_set( void pmap_set_reference( ppnum_t pn); -void pmap_movepage( - unsigned long from, - unsigned long to, - vm_size_t size); - boolean_t phys_page_exists( ppnum_t pn); @@ -218,8 +196,6 @@ void dump_4GB_pdpt(pmap_t p); void dump_4GB_pdpt_thread(thread_t tp); #endif -#define iswired(pte) ((pte) & INTEL_PTE_WIRED) - int nx_enabled = 1; /* enable no-execute protection */ #ifdef CONFIG_EMBEDDED int allow_data_exec = 0; /* no exec from data, embedded is hardcore like that */ @@ -228,7 +204,8 @@ int allow_data_exec = VM_ABI_32; /* 32-bit apps may execute data by default, 64 #endif int allow_stack_exec = 0; /* No apps may execute from the stack by default */ -int cpu_64bit = 0; +boolean_t cpu_64bit = FALSE; +boolean_t pmap_trace = FALSE; /* * when spinning through pmap_remove @@ -533,17 +510,6 @@ uint64_t pde_mapped_size; * previously. */ -/* - * pmap locking - */ - -#define PMAP_LOCK(pmap) { \ - simple_lock(&(pmap)->lock); \ -} - -#define PMAP_UNLOCK(pmap) { \ - simple_unlock(&(pmap)->lock); \ -} /* * PV locking @@ -587,15 +553,8 @@ extern int max_lock_loops; #define LOOP_CHECK(msg, pmap) #endif /* USLOCK_DEBUG */ - -static void pmap_flush_tlbs(pmap_t pmap); - -#define PMAP_UPDATE_TLBS(pmap, s, e) \ - pmap_flush_tlbs(pmap) - - -#define MAX_TBIS_SIZE 32 /* > this -> TBIA */ /* XXX */ - +unsigned pmap_memory_region_count; +unsigned pmap_memory_region_current; pmap_memory_region_t pmap_memory_regions[PMAP_MEMORY_REGIONS_SIZE]; @@ -619,6 +578,7 @@ unsigned int inuse_ptepages_count = 0; addr64_t kernel64_cr3; boolean_t no_shared_cr3 = FALSE; /* -no_shared_cr3 boot arg */ + /* * Pmap cache. Cache is threaded through ref_count field of pmap. * Max will eventually be constant -- variable for experimentation. @@ -632,7 +592,6 @@ decl_simple_lock_data(,pmap_cache_lock) extern char end; static int nkpt; -extern uint32_t lowGlo; pt_entry_t *DMAP1, *DMAP2; caddr_t DADDR1; @@ -832,6 +791,8 @@ pmap_pte(pmap_t pmap, vm_map_offset_t vaddr) pde = pmap_pde(pmap,vaddr); if (pde && ((*pde & INTEL_PTE_VALID))) { + if (*pde & INTEL_PTE_PS) + return pde; if (pmap == kernel_pmap) return (vtopte(vaddr)); /* compat kernel still has pte's mapped */ #if TESTING @@ -906,7 +867,7 @@ pmap_map_bd( unsigned int flags) { pt_entry_t template; - pt_entry_t *pte; + pt_entry_t *pte; spl_t spl; template = pa_to_pte(start_addr) @@ -924,6 +885,7 @@ pmap_map_bd( if (prot & VM_PROT_WRITE) template |= INTEL_PTE_WRITE; + while (start_addr < end_addr) { spl = splhigh(); pte = pmap_pte(kernel_pmap, (vm_map_offset_t)virt); @@ -935,18 +897,16 @@ pmap_map_bd( pte_increment_pa(template); virt += PAGE_SIZE; start_addr += PAGE_SIZE; - } + } + flush_tlb(); return(virt); } -extern char *first_avail; -extern vm_offset_t virtual_avail, virtual_end; -extern pmap_paddr_t avail_start, avail_end; -extern vm_offset_t etext; -extern void *sectHIBB; -extern int sectSizeHIB; +extern char *first_avail; +extern vm_offset_t virtual_avail, virtual_end; +extern pmap_paddr_t avail_start, avail_end; void pmap_cpu_init(void) @@ -1022,13 +982,13 @@ pmap_init_high_shared(void) { vm_offset_t haddr; - struct __gdt_desc_struct gdt_desc = {0,0,0}; - struct __idt_desc_struct idt_desc = {0,0,0}; spl_t s; #if MACH_KDB struct i386_tss *ttss; #endif + cpu_desc_index_t * cdi = &cpu_data_master.cpu_desc_index; + kprintf("HIGH_MEM_BASE 0x%x fixed per-cpu begin 0x%x\n", HIGH_MEM_BASE,pmap_index_to_virt(HIGH_FIXED_CPUS_BEGIN)); s = splhigh(); @@ -1041,46 +1001,48 @@ pmap_init_high_shared(void) haddr = pmap_high_shared_remap(HIGH_FIXED_TRAMPS, (vm_offset_t) &hi_remap_text, 3); kprintf("tramp: 0x%x, ",haddr); - printf("hi mem tramps at 0x%x\n",haddr); /* map gdt up high and update ptr for reload */ haddr = pmap_high_shared_remap(HIGH_FIXED_GDT, (vm_offset_t) master_gdt, 1); - __asm__ __volatile__("sgdt %0": "=m" (gdt_desc): :"memory"); - gdt_desc.address = haddr; + cdi->cdi_gdt.ptr = (void *)haddr; kprintf("GDT: 0x%x, ",haddr); /* map ldt up high */ haddr = pmap_high_shared_remap(HIGH_FIXED_LDT_BEGIN, (vm_offset_t) master_ldt, HIGH_FIXED_LDT_END - HIGH_FIXED_LDT_BEGIN + 1); + cdi->cdi_ldt = (struct fake_descriptor *)haddr; kprintf("LDT: 0x%x, ",haddr); /* put new ldt addr into gdt */ - master_gdt[sel_idx(KERNEL_LDT)] = ldt_desc_pattern; - master_gdt[sel_idx(KERNEL_LDT)].offset = (vm_offset_t) haddr; - fix_desc(&master_gdt[sel_idx(KERNEL_LDT)], 1); - master_gdt[sel_idx(USER_LDT)] = ldt_desc_pattern; - master_gdt[sel_idx(USER_LDT)].offset = (vm_offset_t) haddr; - fix_desc(&master_gdt[sel_idx(USER_LDT)], 1); + struct fake_descriptor temp_fake_desc; + temp_fake_desc = ldt_desc_pattern; + temp_fake_desc.offset = (vm_offset_t) haddr; + fix_desc(&temp_fake_desc, 1); + + *(struct fake_descriptor *) &master_gdt[sel_idx(KERNEL_LDT)] = temp_fake_desc; + *(struct fake_descriptor *) &master_gdt[sel_idx(USER_LDT)] = temp_fake_desc; /* map idt up high */ haddr = pmap_high_shared_remap(HIGH_FIXED_IDT, (vm_offset_t) master_idt, 1); - __asm__ __volatile__("sidt %0" : "=m" (idt_desc)); - idt_desc.address = haddr; + cdi->cdi_idt.ptr = (void *)haddr; kprintf("IDT: 0x%x, ", haddr); /* remap ktss up high and put new high addr into gdt */ haddr = pmap_high_shared_remap(HIGH_FIXED_KTSS, (vm_offset_t) &master_ktss, 1); - master_gdt[sel_idx(KERNEL_TSS)] = tss_desc_pattern; - master_gdt[sel_idx(KERNEL_TSS)].offset = (vm_offset_t) haddr; - fix_desc(&master_gdt[sel_idx(KERNEL_TSS)], 1); + + temp_fake_desc = tss_desc_pattern; + temp_fake_desc.offset = (vm_offset_t) haddr; + fix_desc(&temp_fake_desc, 1); + *(struct fake_descriptor *) &master_gdt[sel_idx(KERNEL_TSS)] = temp_fake_desc; kprintf("KTSS: 0x%x, ",haddr); #if MACH_KDB /* remap dbtss up high and put new high addr into gdt */ haddr = pmap_high_shared_remap(HIGH_FIXED_DBTSS, (vm_offset_t) &master_dbtss, 1); - master_gdt[sel_idx(DEBUG_TSS)] = tss_desc_pattern; - master_gdt[sel_idx(DEBUG_TSS)].offset = (vm_offset_t) haddr; - fix_desc(&master_gdt[sel_idx(DEBUG_TSS)], 1); + temp_fake_desc = tss_desc_pattern; + temp_fake_desc.offset = (vm_offset_t) haddr; + fix_desc(&temp_fake_desc, 1); + *(struct fake_descriptor *)&master_gdt[sel_idx(DEBUG_TSS)] = temp_fake_desc; ttss = (struct i386_tss *)haddr; kprintf("DBTSS: 0x%x, ",haddr); #endif /* MACH_KDB */ @@ -1088,24 +1050,22 @@ pmap_init_high_shared(void) /* remap dftss up high and put new high addr into gdt */ haddr = pmap_high_shared_remap(HIGH_FIXED_DFTSS, (vm_offset_t) &master_dftss, 1); - master_gdt[sel_idx(DF_TSS)] = tss_desc_pattern; - master_gdt[sel_idx(DF_TSS)].offset = (vm_offset_t) haddr; - fix_desc(&master_gdt[sel_idx(DF_TSS)], 1); + temp_fake_desc = tss_desc_pattern; + temp_fake_desc.offset = (vm_offset_t) haddr; + fix_desc(&temp_fake_desc, 1); + *(struct fake_descriptor *) &master_gdt[sel_idx(DF_TSS)] = temp_fake_desc; kprintf("DFTSS: 0x%x\n",haddr); /* remap mctss up high and put new high addr into gdt */ haddr = pmap_high_shared_remap(HIGH_FIXED_DFTSS, (vm_offset_t) &master_mctss, 1); - master_gdt[sel_idx(MC_TSS)] = tss_desc_pattern; - master_gdt[sel_idx(MC_TSS)].offset = (vm_offset_t) haddr; - fix_desc(&master_gdt[sel_idx(MC_TSS)], 1); + temp_fake_desc = tss_desc_pattern; + temp_fake_desc.offset = (vm_offset_t) haddr; + fix_desc(&temp_fake_desc, 1); + *(struct fake_descriptor *) &master_gdt[sel_idx(MC_TSS)] = temp_fake_desc; kprintf("MCTSS: 0x%x\n",haddr); - __asm__ __volatile__("lgdt %0": "=m" (gdt_desc)); - __asm__ __volatile__("lidt %0": "=m" (idt_desc)); - kprintf("gdt/idt reloaded, "); - set_tr(KERNEL_TSS); - kprintf("tr reset to KERNEL_TSS\n"); + cpu_desc_load(&cpu_data_master); } @@ -1113,19 +1073,6 @@ pmap_init_high_shared(void) * Bootstrap the system enough to run with virtual memory. * Map the kernel's code and data, and allocate the system page table. * Called with mapping OFF. Page_size must already be set. - * - * Parameters: - * load_start: PA where kernel was loaded - * avail_start PA of first available physical page - - * after kernel page tables - * avail_end PA of last available physical page - * virtual_avail VA of first available page - - * after kernel page tables - * virtual_end VA of last available page - - * end of kernel address space - * - * &start_text start of kernel text - * &etext end of kernel text */ void @@ -1136,7 +1083,6 @@ pmap_bootstrap( vm_offset_t va; pt_entry_t *pte; int i; - int wpkernel, boot_arg; pdpt_entry_t *pdpt; spl_t s; @@ -1160,11 +1106,12 @@ pmap_bootstrap( kernel_pmap->pm_pdpt = pdpt; kernel_pmap->pm_cr3 = (pmap_paddr_t)((int)IdlePDPT); + va = (vm_offset_t)kernel_pmap->dirbase; /* setup self referential mapping(s) */ for (i = 0; i< NPGPTD; i++, pdpt++) { pmap_paddr_t pa; - pa = (pmap_paddr_t) kvtophys(va + i386_ptob(i)); + pa = (pmap_paddr_t) kvtophys((vm_offset_t)(va + i386_ptob(i))); pmap_store_pte( (pd_entry_t *) (kernel_pmap->dirbase + PTDPTDI + i), (pa & PG_FRAME) | INTEL_PTE_VALID | INTEL_PTE_RW | INTEL_PTE_REF | @@ -1186,7 +1133,7 @@ pmap_bootstrap( splx(s); nkpt = NKPT; - inuse_ptepages_count += NKPT; + OSAddAtomic(NKPT, &inuse_ptepages_count); virtual_avail = (vm_offset_t)VADDR(KPTDI,0) + (vm_offset_t)first_avail; virtual_end = (vm_offset_t)(VM_MAX_KERNEL_ADDRESS); @@ -1225,42 +1172,6 @@ pmap_bootstrap( } printf("npvhash=%d\n",npvhash); - wpkernel = 1; - if (PE_parse_boot_argn("wpkernel", &boot_arg, sizeof (boot_arg))) { - if (boot_arg == 0) - wpkernel = 0; - } - - s = splhigh(); - - /* Remap kernel text readonly unless the "wpkernel" boot-arg is present - * and set to 0. - */ - if (wpkernel) - { - vm_offset_t myva; - pt_entry_t *ptep; - - for (myva = i386_round_page(MP_BOOT + MP_BOOTSTACK); myva < etext; myva += PAGE_SIZE) { - if (myva >= (vm_offset_t)sectHIBB && myva < ((vm_offset_t)sectHIBB + sectSizeHIB)) - continue; - ptep = pmap_pte(kernel_pmap, (vm_map_offset_t)myva); - if (ptep) - pmap_store_pte(ptep, *ptep & ~INTEL_PTE_RW); - } - } - - /* no matter what, kernel page zero is not accessible */ - pte = pmap_pte(kernel_pmap, 0); - pmap_store_pte(pte, INTEL_PTE_INVALID); - - /* map lowmem global page into fixed addr 0x2000 */ - if (0 == (pte = pmap_pte(kernel_pmap,0x2000))) panic("lowmem pte"); - assert(0 == ((vm_offset_t) &lowGlo & PAGE_MASK)); /* make sure it is defined on page boundary */ - pmap_store_pte(pte, kvtophys((vm_offset_t)&lowGlo)|INTEL_PTE_VALID|INTEL_PTE_REF|INTEL_PTE_MOD|INTEL_PTE_WIRED|INTEL_PTE_RW); - splx(s); - flush_tlb(); - simple_lock_init(&kernel_pmap->lock, 0); simple_lock_init(&pv_hashed_free_list_lock, 0); simple_lock_init(&pv_hashed_kern_free_list_lock, 0); @@ -1271,7 +1182,7 @@ pmap_bootstrap( pde_mapped_size = PDE_MAPPED_SIZE; if (cpu_64bit) { - pdpt_entry_t *ppdpt = (pdpt_entry_t *)IdlePDPT; + pdpt_entry_t *ppdpt = IdlePDPT; pdpt_entry_t *ppdpt64 = (pdpt_entry_t *)IdlePDPT64; pdpt_entry_t *ppml4 = (pdpt_entry_t *)IdlePML4; int istate = ml_set_interrupts_enabled(FALSE); @@ -1304,7 +1215,7 @@ pmap_bootstrap( kernel64_cr3 = (addr64_t) kernel_pmap->pm_cr3; /* Re-initialize descriptors and prepare to switch modes */ - cpu_desc_init64(&cpu_data_master, TRUE); + cpu_desc_init64(&cpu_data_master); current_cpu_datap()->cpu_is64bit = TRUE; current_cpu_datap()->cpu_active_cr3 = kernel64_cr3; @@ -1313,8 +1224,11 @@ pmap_bootstrap( ml_set_interrupts_enabled(istate); } - /* Set 64-bit mode if required. */ + /* Sets 64-bit mode if required. */ cpu_mode_init(&cpu_data_master); + /* Update in-kernel CPUID information if we're now in 64-bit mode */ + if (IA32e) + cpuid_set_info(); kernel_pmap->pm_hold = (vm_offset_t)kernel_pmap->pm_pml4; @@ -1375,7 +1289,7 @@ pmap_init(void) * so we cover all memory */ - npages = i386_btop(avail_end); + npages = (long)i386_btop(avail_end); s = (vm_size_t) (sizeof(struct pv_rooted_entry) * npages + (sizeof (struct pv_hashed_entry_t *) * (npvhash+1)) + pv_lock_table_size(npages) @@ -1383,7 +1297,9 @@ pmap_init(void) + npages); s = round_page(s); - if (kmem_alloc_wired(kernel_map, &addr, s) != KERN_SUCCESS) + if (kernel_memory_allocate(kernel_map, &addr, s, 0, + KMA_KOBJECT | KMA_PERMANENT) + != KERN_SUCCESS) panic("pmap_init"); memset((char *)addr, 0, s); @@ -1414,10 +1330,11 @@ pmap_init(void) ppnum_t last_pn; pmap_memory_region_t *pmptr = pmap_memory_regions; - last_pn = i386_btop(avail_end); + last_pn = (ppnum_t)i386_btop(avail_end); for (i = 0; i < pmap_memory_region_count; i++, pmptr++) { if (pmptr->type == kEfiConventionalMemory) { + for (pn = pmptr->base; pn <= pmptr->end; pn++) { if (pn < last_pn) { pmap_phys_attributes[pn] |= PHYS_MANAGED; @@ -1473,18 +1390,6 @@ pmap_init(void) } -void -x86_lowmem_free(void) -{ - /* free lowmem pages back to the vm system. we had to defer doing this - until the vm system was fully up. - the actual pages that are released are determined by which - pages the memory sizing code puts into the region table */ - - ml_static_mfree((vm_offset_t) i386_ptob(pmap_memory_regions[0].base), - (vm_size_t) i386_ptob(pmap_memory_regions[0].end - pmap_memory_regions[0].base)); -} - #define managed_page(x) ( (unsigned int)x <= last_managed_page && (pmap_phys_attributes[x] & PHYS_MANAGED) ) @@ -1518,8 +1423,8 @@ pmap_verify_free( boolean_t pmap_is_empty( pmap_t pmap, - vm_map_offset_t vstart, - vm_map_offset_t vend) + vm_map_offset_t va_start, + vm_map_offset_t va_end) { vm_map_offset_t offset; ppnum_t phys_page; @@ -1527,8 +1432,20 @@ pmap_is_empty( if (pmap == PMAP_NULL) { return TRUE; } - for (offset = vstart; - offset < vend; + + /* + * Check the resident page count + * - if it's zero, the pmap is completely empty. + * This short-circuit test prevents a virtual address scan which is + * painfully slow for 64-bit spaces. + * This assumes the count is correct + * .. the debug kernel ought to be checking perhaps by page table walk. + */ + if (pmap->stats.resident_count == 0) + return TRUE; + + for (offset = va_start; + offset < va_end; offset += PAGE_SIZE_64) { phys_page = pmap_find_phys(pmap, offset); if (phys_page) { @@ -1548,7 +1465,7 @@ pmap_is_empty( } kprintf("pmap_is_empty(%p,0x%llx,0x%llx): " "page %d at 0x%llx\n", - pmap, vstart, vend, phys_page, offset); + pmap, va_start, va_end, phys_page, offset); return FALSE; } } @@ -1617,8 +1534,8 @@ pmap_create( /* legacy 32 bit setup */ /* in the legacy case the pdpt layer is hardwired to 4 entries and each * entry covers 1GB of addr space */ - if (KERN_SUCCESS != kmem_alloc_wired(kernel_map, (vm_offset_t *)(&p->dirbase), NBPTD)) - panic("pmap_create kmem_alloc_wired"); + if (KERN_SUCCESS != kmem_alloc_kobject(kernel_map, (vm_offset_t *)(&p->dirbase), NBPTD)) + panic("pmap_create kmem_alloc_kobject"); p->pm_hold = (vm_offset_t)zalloc(pdpt_zone); if ((vm_offset_t)NULL == p->pm_hold) { panic("pdpt zalloc"); @@ -1636,7 +1553,7 @@ pmap_create( template = cpu_64bit ? INTEL_PTE_VALID|INTEL_PTE_RW|INTEL_PTE_USER|INTEL_PTE_REF : INTEL_PTE_VALID; for (i = 0; i< NPGPTD; i++, pdpt++ ) { pmap_paddr_t pa; - pa = (pmap_paddr_t) kvtophys(va + i386_ptob(i)); + pa = (pmap_paddr_t) kvtophys((vm_offset_t)(va + i386_ptob(i))); pmap_store_pte(pdpt, pa | template); } @@ -1649,15 +1566,13 @@ pmap_create( /* 64 bit setup */ /* alloc the pml4 page in kernel vm */ - if (KERN_SUCCESS != kmem_alloc_wired(kernel_map, (vm_offset_t *)(&p->pm_hold), PAGE_SIZE)) - panic("pmap_create kmem_alloc_wired pml4"); + if (KERN_SUCCESS != kmem_alloc_kobject(kernel_map, (vm_offset_t *)(&p->pm_hold), PAGE_SIZE)) + panic("pmap_create kmem_alloc_kobject pml4"); memset((char *)p->pm_hold, 0, PAGE_SIZE); p->pm_cr3 = (pmap_paddr_t)kvtophys((vm_offset_t)p->pm_hold); - vm_page_lock_queues(); - inuse_ptepages_count++; - vm_page_unlock_queues(); + OSAddAtomic(1, &inuse_ptepages_count); /* allocate the vm_objs to hold the pdpt, pde and pte pages */ @@ -1852,9 +1767,7 @@ pmap_destroy( * pmap structure. */ if (!cpu_64bit) { - vm_page_lock_queues(); - inuse_ptepages_count -= p->pm_obj->resident_page_count; - vm_page_unlock_queues(); + OSAddAtomic(-p->pm_obj->resident_page_count, &inuse_ptepages_count); kmem_free(kernel_map, (vm_offset_t)p->dirbase, NBPTD); zfree(pdpt_zone, (void *)p->pm_hold); @@ -1877,9 +1790,7 @@ pmap_destroy( inuse_ptepages += p->pm_obj->resident_page_count; vm_object_deallocate(p->pm_obj); - vm_page_lock_queues(); - inuse_ptepages_count -= inuse_ptepages; - vm_page_unlock_queues(); + OSAddAtomic(-inuse_ptepages, &inuse_ptepages_count); } zfree(pmap_zone, p); @@ -2118,14 +2029,14 @@ pmap_remove_range( panic("pmap_remove_range: resident_count"); #endif assert(pmap->stats.resident_count >= num_removed); - OSAddAtomic(-num_removed, (SInt32 *) &pmap->stats.resident_count); + OSAddAtomic(-num_removed, &pmap->stats.resident_count); #if TESTING if (pmap->stats.wired_count < num_unwired) panic("pmap_remove_range: wired_count"); #endif assert(pmap->stats.wired_count >= num_unwired); - OSAddAtomic(-num_unwired, (SInt32 *) &pmap->stats.wired_count); + OSAddAtomic(-num_unwired, &pmap->stats.wired_count); return; } @@ -2214,7 +2125,6 @@ pmap_remove( orig_s64 = s64; while (s64 < e64) { - l64 = (s64 + pde_mapped_size) & ~(pde_mapped_size-1); if (l64 > e64) l64 = e64; @@ -2307,6 +2217,7 @@ pmap_page_protect( LOCK_PVH(pai); + /* * Walk down PV list, changing or removing all mappings. */ @@ -2323,8 +2234,7 @@ pmap_page_protect( pte = pmap_pte(pmap, vaddr); if (0 == pte) { - kprintf("pmap_page_protect pmap %p pn 0x%x vaddr 0x%llx\n",pmap, pn, vaddr); - panic("pmap_page_protect"); + panic("pmap_page_protect: Missing PTE, pmap: %p, pn: 0x%x vaddr: 0x%llx, prot: %d kernel_pmap: %p", pmap, pn, vaddr, prot, kernel_pmap); } nexth = (pv_hashed_entry_t)queue_next(&pvh_e->qlink); /* if there is one */ @@ -2350,7 +2260,7 @@ pmap_page_protect( panic("pmap_page_protect: resident_count"); #endif assert(pmap->stats.resident_count >= 1); - OSAddAtomic(-1, (SInt32 *) &pmap->stats.resident_count); + OSAddAtomic(-1, &pmap->stats.resident_count); /* * Deal with the pv_rooted_entry. @@ -2391,6 +2301,7 @@ pmap_page_protect( pvh_e = nexth; } while ((pv_e = (pv_rooted_entry_t)nexth) != pv_h); + /* * If pv_head mapping was removed, fix it up. */ @@ -2683,12 +2594,12 @@ pmap_enter( if (wired) { template |= INTEL_PTE_WIRED; if (!iswired(*pte)) - OSAddAtomic(+1, (SInt32 *) &pmap->stats.wired_count); + OSAddAtomic(+1, &pmap->stats.wired_count); } else { if (iswired(*pte)) { assert(pmap->stats.wired_count >= 1); - OSAddAtomic(-1, (SInt32 *) &pmap->stats.wired_count); + OSAddAtomic(-1, &pmap->stats.wired_count); } } @@ -2733,13 +2644,12 @@ pmap_enter( pmap_store_pte(pte, 0); if (managed_page(pai)) { - #if TESTING if (pmap->stats.resident_count < 1) panic("pmap_enter: resident_count"); #endif assert(pmap->stats.resident_count >= 1); - OSAddAtomic(-1, (SInt32 *) &pmap->stats.resident_count); + OSAddAtomic(-1, &pmap->stats.resident_count); if (iswired(*pte)) { @@ -2748,7 +2658,7 @@ pmap_enter( panic("pmap_enter: wired_count"); #endif assert(pmap->stats.wired_count >= 1); - OSAddAtomic(-1, (SInt32 *) &pmap->stats.wired_count); + OSAddAtomic(-1, &pmap->stats.wired_count); } pmap_phys_attributes[pai] |= oattr; @@ -2820,7 +2730,6 @@ pmap_enter( } } else { - /* * old_pa is not managed. * Do removal part of accounting. @@ -2828,7 +2737,7 @@ pmap_enter( if (iswired(*pte)) { assert(pmap->stats.wired_count >= 1); - OSAddAtomic(-1, (SInt32 *) &pmap->stats.wired_count); + OSAddAtomic(-1, &pmap->stats.wired_count); } } } @@ -2913,7 +2822,7 @@ pmap_enter( * only count the mapping * for 'managed memory' */ - OSAddAtomic(+1, (SInt32 *) &pmap->stats.resident_count); + OSAddAtomic(+1, &pmap->stats.resident_count); if (pmap->stats.resident_count > pmap->stats.resident_max) { pmap->stats.resident_max = pmap->stats.resident_count; } @@ -2943,7 +2852,7 @@ pmap_enter( if (wired) { template |= INTEL_PTE_WIRED; - OSAddAtomic(+1, (SInt32 *) &pmap->stats.wired_count); + OSAddAtomic(+1, &pmap->stats.wired_count); } pmap_store_pte(pte, template); @@ -2999,7 +2908,7 @@ pmap_change_wiring( /* * wiring down mapping */ - OSAddAtomic(+1, (SInt32 *) &map->stats.wired_count); + OSAddAtomic(+1, &map->stats.wired_count); pmap_update_pte(pte, *pte, (*pte | INTEL_PTE_WIRED)); } else if (!wired && iswired(*pte)) { @@ -3007,31 +2916,13 @@ pmap_change_wiring( * unwiring mapping */ assert(map->stats.wired_count >= 1); - OSAddAtomic(-1, (SInt32 *) &map->stats.wired_count); + OSAddAtomic(-1, &map->stats.wired_count); pmap_update_pte(pte, *pte, (*pte & ~INTEL_PTE_WIRED)); } PMAP_UNLOCK(map); } -ppnum_t -pmap_find_phys(pmap_t pmap, addr64_t va) -{ - pt_entry_t *ptp; - ppnum_t ppn; - - mp_disable_preemption(); - - ptp = pmap_pte(pmap, va); - if (PT_ENTRY_NULL == ptp) { - ppn = 0; - } else { - ppn = (ppnum_t) i386_btop(pte_to_pa(*ptp)); - } - mp_enable_preemption(); - - return ppn; -} /* * Routine: pmap_extract @@ -3055,7 +2946,7 @@ pmap_extract( ppn = pmap_find_phys(pmap, vaddr); if (ppn) { - paddr = ((vm_offset_t)i386_ptob(ppn)) | (vaddr & INTEL_OFFMASK); + paddr = ((vm_offset_t)i386_ptob(ppn)) | ((vm_offset_t)vaddr & INTEL_OFFMASK); } return (paddr); } @@ -3098,11 +2989,12 @@ pmap_expand_pml4( */ pmap_zero_page(pn); - vm_page_lock_queues(); + vm_page_lockspin_queues(); vm_page_wire(m); - inuse_ptepages_count++; vm_page_unlock_queues(); + OSAddAtomic(1, &inuse_ptepages_count); + /* Take the oject lock (mutex) before the PMAP_LOCK (spinlock) */ vm_object_lock(map->pm_obj_pml4); @@ -3114,11 +3006,9 @@ pmap_expand_pml4( PMAP_UNLOCK(map); vm_object_unlock(map->pm_obj_pml4); - vm_page_lock_queues(); - vm_page_free(m); - inuse_ptepages_count--; - vm_page_unlock_queues(); + VM_PAGE_FREE(m); + OSAddAtomic(-1, &inuse_ptepages_count); return; } @@ -3188,11 +3078,12 @@ pmap_expand_pdpt( */ pmap_zero_page(pn); - vm_page_lock_queues(); + vm_page_lockspin_queues(); vm_page_wire(m); - inuse_ptepages_count++; vm_page_unlock_queues(); + OSAddAtomic(1, &inuse_ptepages_count); + /* Take the oject lock (mutex) before the PMAP_LOCK (spinlock) */ vm_object_lock(map->pm_obj_pdpt); @@ -3204,11 +3095,9 @@ pmap_expand_pdpt( PMAP_UNLOCK(map); vm_object_unlock(map->pm_obj_pdpt); - vm_page_lock_queues(); - vm_page_free(m); - inuse_ptepages_count--; - vm_page_unlock_queues(); + VM_PAGE_FREE(m); + OSAddAtomic(-1, &inuse_ptepages_count); return; } @@ -3300,11 +3189,12 @@ pmap_expand( */ pmap_zero_page(pn); - vm_page_lock_queues(); + vm_page_lockspin_queues(); vm_page_wire(m); - inuse_ptepages_count++; vm_page_unlock_queues(); + OSAddAtomic(1, &inuse_ptepages_count); + /* Take the oject lock (mutex) before the PMAP_LOCK (spinlock) */ vm_object_lock(map->pm_obj); @@ -3317,11 +3207,9 @@ pmap_expand( PMAP_UNLOCK(map); vm_object_unlock(map->pm_obj); - vm_page_lock_queues(); - vm_page_free(m); - inuse_ptepages_count--; - vm_page_unlock_queues(); + VM_PAGE_FREE(m); + OSAddAtomic(-1, &inuse_ptepages_count); return; } @@ -3470,10 +3358,9 @@ pmap_collect( if (m == VM_PAGE_NULL) panic("pmap_collect: pte page not in object"); - vm_page_lock_queues(); - vm_page_free(m); - inuse_ptepages_count--; - vm_page_unlock_queues(); + VM_PAGE_FREE(m); + + OSAddAtomic(-1, &inuse_ptepages_count); vm_object_unlock(p->pm_obj); } @@ -3555,6 +3442,7 @@ phys_attribute_clear( return; } + PMAP_TRACE(PMAP_CODE(PMAP__ATTRIBUTE_CLEAR) | DBG_FUNC_START, (int) pn, bits, 0, 0, 0); @@ -3655,6 +3543,7 @@ phys_attribute_test( attributes = pmap_phys_attributes[pai] & bits; + /* * Walk down PV list, checking the mappings until we * reach the end or we've found the attributes we've asked for @@ -3683,9 +3572,8 @@ phys_attribute_test( /* * pick up modify and/or reference bits from this mapping */ - pte = pmap_pte(pmap, va); - attributes |= *pte & bits; + attributes |= (int)(*pte & bits); } @@ -4268,176 +4156,14 @@ pmap_put_mapwindow(mapwindow_t *mp) pmap_store_pte(mp->prv_CMAP, 0); } - -/* - * The Intel platform can nest at the PDE level, so NBPDE (i.e. 2MB) at a time, - * on a NBPDE boundary. - */ -uint64_t pmap_nesting_size_min = NBPDE; -uint64_t pmap_nesting_size_max = 0 - (uint64_t)NBPDE; /* no limit, really... */ - -/* - * kern_return_t pmap_nest(grand, subord, vstart, size) - * - * grand = the pmap that we will nest subord into - * subord = the pmap that goes into the grand - * vstart = start of range in pmap to be inserted - * nstart = start of range in pmap nested pmap - * size = Size of nest area (up to 16TB) - * - * Inserts a pmap into another. This is used to implement shared segments. - * - * on x86 this is very limited right now. must be exactly 1 segment. - * - * Note that we depend upon higher level VM locks to insure that things don't change while - * we are doing this. For example, VM should not be doing any pmap enters while it is nesting - * or do 2 nests at once. - */ - - -kern_return_t pmap_nest(pmap_t grand, pmap_t subord, addr64_t vstart, addr64_t nstart, uint64_t size) { - - vm_map_offset_t vaddr, nvaddr; - pd_entry_t *pde,*npde; - unsigned int i; - uint64_t num_pde; - - // do validity tests - if (size & (pmap_nesting_size_min-1)) return KERN_INVALID_VALUE; - if(vstart & (pmap_nesting_size_min-1)) return KERN_INVALID_VALUE; - if(nstart & (pmap_nesting_size_min-1)) return KERN_INVALID_VALUE; - if((size >> 28) > 65536) return KERN_INVALID_VALUE; /* Max size we can nest is 16TB */ - if(size == 0) { - panic("pmap_nest: size is invalid - %016llX\n", size); - } - - PMAP_TRACE(PMAP_CODE(PMAP__NEST) | DBG_FUNC_START, - (int) grand, (int) subord, - (int) (vstart>>32), (int) vstart, 0); - - subord->pm_shared = TRUE; - nvaddr = (vm_map_offset_t)nstart; - num_pde = size >> PDESHIFT; - - PMAP_LOCK(subord); - for (i = 0; i < num_pde; i++) { - npde = pmap_pde(subord, nvaddr); - while (0 == npde || ((*npde & INTEL_PTE_VALID) == 0)) { - PMAP_UNLOCK(subord); - pmap_expand(subord, nvaddr); // pmap_expand handles races - PMAP_LOCK(subord); - npde = pmap_pde(subord, nvaddr); - } - nvaddr += NBPDE; - } - - PMAP_UNLOCK(subord); - - vaddr = (vm_map_offset_t)vstart; - - PMAP_LOCK(grand); - - for (i = 0;i < num_pde; i++) { - pd_entry_t tpde; - - npde = pmap_pde(subord, nstart); - if (npde == 0) - panic("pmap_nest: no npde, subord %p nstart 0x%llx", subord, nstart); - tpde = *npde; - nstart += NBPDE; - pde = pmap_pde(grand, vaddr); -/* Legacy mode does not require expansion. - * DRK: consider a debug mode test to verify that no PTEs are extant within - * this range. - */ - if ((0 == pde) && cpu_64bit) { - PMAP_UNLOCK(grand); - pmap_expand_pdpt(grand, vaddr); - PMAP_LOCK(grand); - pde = pmap_pde(grand, vaddr); - } - - if (pde == 0) - panic("pmap_nest: no pde, grand %p vaddr 0x%llx", grand, vaddr); - vaddr += NBPDE; - pmap_store_pte(pde, tpde); - } - - /* XXX FBDP: why do we need to flush here ? */ - PMAP_UPDATE_TLBS(grand, vstart, vstart + size - 1); - - PMAP_UNLOCK(grand); - - PMAP_TRACE(PMAP_CODE(PMAP__NEST) | DBG_FUNC_END, 0, 0, 0, 0, 0); - - return KERN_SUCCESS; -} - -/* - * kern_return_t pmap_unnest(grand, vaddr) - * - * grand = the pmap that we will nest subord into - * vaddr = start of range in pmap to be unnested - * - * Removes a pmap from another. This is used to implement shared segments. - * On the current PPC processors, this is limited to segment (256MB) aligned - * segment sized ranges. - */ - -kern_return_t pmap_unnest(pmap_t grand, addr64_t vaddr, uint64_t size) { - - pd_entry_t *pde; - unsigned int i; - unsigned int num_pde; - addr64_t vstart, vend; - - PMAP_TRACE(PMAP_CODE(PMAP__NEST) | DBG_FUNC_START, - (int) grand, - (int) (vaddr>>32), (int) vaddr, 0, 0); - - if ((size & (pmap_nesting_size_min-1)) || - (vaddr & (pmap_nesting_size_min-1))) { - panic("pmap_unnest(%p,0x%llx,0x%llx): unaligned...\n", - grand, vaddr, size); - } - - /* align everything to PDE boundaries */ - vstart = vaddr & ~(NBPDE-1); - vend = (vaddr + size + NBPDE - 1) & ~(NBPDE-1); - size = vend - vstart; - - PMAP_LOCK(grand); - - // invalidate all pdes for segment at vaddr in pmap grand - - num_pde = size >> PDESHIFT; - - vaddr = vstart; - for (i=0;i> 2); + deadline = mach_absolute_time() + (LockTimeOut); while (mach_absolute_time() < deadline) cpu_pause(); } - /* * Called with pmap locked, we: * - scan through per-cpu data to see which other cpus need to flush @@ -4573,6 +4298,8 @@ pmap_flush_tlbs(pmap_t pmap) */ while (cpus_to_respond != 0) { if (mach_absolute_time() > deadline) { + if (mp_recent_debugger_activity()) + continue; if (!panic_active()) { pmap_tlb_flush_timeout = TRUE; pmap_cpuset_NMIPI(cpus_to_respond); @@ -4596,7 +4323,6 @@ pmap_flush_tlbs(pmap_t pmap) } } } - /* * Flush local tlb if required. * We need this flush even if the pmap being changed @@ -4606,6 +4332,10 @@ pmap_flush_tlbs(pmap_t pmap) if (flush_self) flush_tlb(); + if ((pmap == kernel_pmap) && (flush_self != TRUE)) { + panic("pmap_flush_tlbs: pmap == kernel_pmap && flush_self != TRUE; kernel CR3: 0x%llX, CPU active CR3: 0x%llX, CPU Task Map: %d", kernel_pmap->pm_cr3, current_cpu_datap()->cpu_active_cr3, current_cpu_datap()->cpu_task_map); + } + PMAP_TRACE(PMAP_CODE(PMAP__FLUSH_TLBS) | DBG_FUNC_END, (int) pmap, cpus_to_signal, flush_self, 0, 0); } @@ -4734,3 +4464,4 @@ void dump_4GB_pdpt_thread(thread_t tp) #endif + diff --git a/osfmk/i386/pmap.h b/osfmk/i386/pmap.h index 90f9ff8d7..9e6d65d20 100644 --- a/osfmk/i386/pmap.h +++ b/osfmk/i386/pmap.h @@ -111,10 +111,20 @@ #define PTESHIFT 12 + +#define INITPT_SEG_BASE 0x100000 +#define INITGDT_SEG_BASE 0x106000 +#define SLEEP_SEG_BASE 0x107000 + +#ifdef __x86_64__ +#define LOW_4GB_MASK ((vm_offset_t)0x00000000FFFFFFFFUL) +#endif + #define PDESIZE sizeof(pd_entry_t) /* for assembly files */ #define PTESIZE sizeof(pt_entry_t) /* for assembly files */ #define INTEL_OFFMASK (I386_PGBYTES - 1) +#define INTEL_LOFFMASK (I386_LPGBYTES - 1) #define PG_FRAME 0x000FFFFFFFFFF000ULL #define NPTEPG (PAGE_SIZE/(sizeof (pt_entry_t))) #define NPTDPG (PAGE_SIZE/(sizeof (pd_entry_t))) @@ -125,6 +135,8 @@ #define NBPDE (1 << PDESHIFT) #define PDEMASK (NBPDE - 1) +#define PTE_PER_PAGE 512 /* number of PTE's per page on any level */ + /* cleanly define parameters for all the page table levels */ typedef uint64_t pml4_entry_t; #define NPML4PG (PAGE_SIZE/(sizeof (pml4_entry_t))) @@ -160,12 +172,20 @@ typedef uint64_t pt_entry_t; typedef uint64_t pmap_paddr_t; +/* superpages */ +#ifdef __x86_64__ +#define SUPERPAGE_NBASEPAGES 512 +#else +#define SUPERPAGE_NBASEPAGES 1 /* we don't support superpages on i386 */ +#endif + /* * Atomic 64-bit store of a page table entry. */ static inline void pmap_store_pte(pt_entry_t *entryp, pt_entry_t value) { +#ifdef __i386__ /* * Load the new value into %ecx:%ebx * Load the old value into %edx:%eax @@ -184,6 +204,13 @@ pmap_store_pte(pt_entry_t *entryp, pt_entry_t value) "b" ((uint32_t)value), "c" ((uint32_t)(value >> 32)) : "eax", "edx", "memory"); +#else + /* + * In the 32-bit kernel a compare-and-exchange loop was + * required to provide atomicity. For K64, life is easier: + */ + *entryp = value; +#endif } /* @@ -194,6 +221,7 @@ pmap_cmpx_pte(pt_entry_t *entryp, pt_entry_t old, pt_entry_t new) { boolean_t ret; +#ifdef __i386__ /* * Load the old value into %edx:%eax * Load the new value into %ecx:%ebx @@ -212,6 +240,24 @@ pmap_cmpx_pte(pt_entry_t *entryp, pt_entry_t old, pt_entry_t new) "b" ((uint32_t)new), "c" ((uint32_t)(new >> 32)) : "memory"); +#else + /* + * Load the old value into %rax + * Load the new value into another register + * Compare-exchange-quad at address entryp + * If the compare succeeds, the new value is stored, return TRUE. + * Otherwise, no swap is made, return FALSE. + */ + asm volatile( + " lock; cmpxchgq %2,(%3) \n\t" + " setz %%al \n\t" + " movzbl %%al,%0" + : "=a" (ret) + : "a" (old), + "r" (new), + "r" (entryp) + : "memory"); +#endif return ret; } @@ -225,6 +271,7 @@ pmap_cmpx_pte(pt_entry_t *entryp, pt_entry_t old, pt_entry_t new) #define NPDEPGS (NPDPTPGS * (PAGE_SIZE/(sizeof (pd_entry_t)))) #define NPTEPGS (NPDEPGS * (PAGE_SIZE/(sizeof (pt_entry_t)))) +#ifdef __i386__ /* * The 64-bit kernel is remapped in uber-space which is at the base * the highest 4th-level directory (KERNEL_UBER_PML4_INDEX). That is, @@ -233,6 +280,13 @@ pmap_cmpx_pte(pt_entry_t *entryp, pt_entry_t old, pt_entry_t new) #define KERNEL_UBER_PML4_INDEX 511 #define KERNEL_UBER_BASE (0ULL - NBPML4) #define KERNEL_UBER_BASE_HI32 ((uint32_t)(KERNEL_UBER_BASE >> 32)) +#else +#define KERNEL_PML4_INDEX 511 +#define KERNEL_KEXTS_INDEX 510 /* Home of KEXTs - the basement */ +#define KERNEL_PHYSMAP_INDEX 509 /* virtual to physical map */ +#define KERNEL_BASE (0ULL - NBPML4) +#define KERNEL_BASEMENT (KERNEL_BASE - NBPML4) +#endif #define VM_WIMG_COPYBACK VM_MEM_COHERENT #define VM_WIMG_DEFAULT VM_MEM_COHERENT @@ -246,8 +300,18 @@ pmap_cmpx_pte(pt_entry_t *entryp, pt_entry_t old, pt_entry_t new) /* * Pte related macros */ +#ifdef __i386__ #define VADDR(pdi, pti) ((vm_offset_t)(((pdi)<> PDESHIFT) & PDEMASK) +#define pdptnum(pmap, a) (((vm_offset_t)(a) >> PDPTSHIFT) & PDPTMASK) +#define pdenum(pmap, a) (((vm_offset_t)(a) >> PDESHIFT) & PDEMASK) +#define PMAP_INVALID_PDPTNUM (~0ULL) +#ifdef __i386__ #define pdeidx(pmap, a) (((a) >> PDSHIFT) & ((1ULL<<(48 - PDSHIFT)) -1)) #define pdptidx(pmap, a) (((a) >> PDPTSHIFT) & ((1ULL<<(48 - PDPTSHIFT)) -1)) #define pml4idx(pmap, a) (((a) >> PML4SHIFT) & ((1ULL<<(48 - PML4SHIFT)) -1)) +#else +#define VAMASK ((1ULL<<48)-1) +#define pml4idx(pmap, a) ((((a) & VAMASK) >> PML4SHIFT) & \ + ((1ULL<<(48 - PML4SHIFT))-1)) +#define pdptidx(pmap, a) ((((a) & PML4MASK) >> PDPTSHIFT) & \ + ((1ULL<<(48 - PDPTSHIFT))-1)) +#define pdeidx(pmap, a) ((((a) & PML4MASK) >> PDSHIFT) & \ + ((1ULL<<(48 - PDSHIFT)) - 1)) +#endif /* * Convert page descriptor index to user virtual address @@ -344,11 +422,12 @@ enum high_fixed_addresses { #define INTEL_PTE_NCACHE 0x00000010 #define INTEL_PTE_REF 0x00000020 #define INTEL_PTE_MOD 0x00000040 -#define INTEL_PTE_PS 0x00000080 -#define INTEL_PTE_GLOBAL 0x00000100 +#define INTEL_PTE_PS 0x00000080 +#define INTEL_PTE_PTA 0x00000080 +#define INTEL_PTE_GLOBAL 0x00000100 #define INTEL_PTE_WIRED 0x00000200 +#define INTEL_PDPTE_NESTED 0x00000400 #define INTEL_PTE_PFN PG_FRAME -#define INTEL_PTE_PTA 0x00000080 #define INTEL_PTE_NX (1ULL << 63) @@ -379,31 +458,43 @@ enum high_fixed_addresses { * and directories. */ -extern pt_entry_t PTmap[], APTmap[], Upte; -extern pd_entry_t PTD[], APTD[], PTDpde[], APTDpde[], Upde; - -extern pd_entry_t *IdlePTD; /* physical address of "Idle" state directory */ -extern pdpt_entry_t *IdlePDPT; - -extern pmap_paddr_t lo_kernel_cr3; - -extern pml4_entry_t *IdlePML4; -extern pdpt_entry_t *IdlePDPT64; -extern addr64_t kernel64_cr3; -extern boolean_t no_shared_cr3; - -extern uint64_t pmap_pv_hashlist_walks; -extern uint64_t pmap_pv_hashlist_cnts; -extern uint32_t pmap_pv_hashlist_max; - +#ifdef __i386__ +extern pt_entry_t PTmap[], APTmap[], Upte; +extern pd_entry_t PTD[], APTD[], PTDpde[], APTDpde[], Upde; +extern pmap_paddr_t lo_kernel_cr3; +extern pdpt_entry_t *IdlePDPT64; +#else +extern pt_entry_t *PTmap; +#endif +extern boolean_t no_shared_cr3; +extern addr64_t kernel64_cr3; +extern pd_entry_t *IdlePTD; /* physical addr of "Idle" state PTD */ +extern pdpt_entry_t IdlePDPT[]; +extern pml4_entry_t IdlePML4[]; + +extern uint64_t pmap_pv_hashlist_walks; +extern uint64_t pmap_pv_hashlist_cnts; +extern uint32_t pmap_pv_hashlist_max; +extern uint32_t pmap_kernel_text_ps; + +#ifdef __i386__ /* + * ** i386 ** * virtual address to page table entry and * to physical address. Likewise for alternate address space. * Note: these work recursively, thus vtopte of a pte will give * the corresponding pde that in turn maps it. */ + #define vtopte(va) (PTmap + i386_btop((vm_offset_t)va)) +#endif + +#ifdef __x86_64__ +#define ID_MAP_VTOP(x) ((void *)(((uint64_t)(x)) & LOW_4GB_MASK)) +#define PHYSMAP_BASE KVADDR(KERNEL_PHYSMAP_INDEX,0,0,0) +#define PHYSMAP_PTOV(x) ((void *)(((uint64_t)(x)) + PHYSMAP_BASE)) +#endif typedef volatile long cpu_set; /* set of CPUs - must be <= 32 */ /* changed by other processors */ @@ -422,14 +513,18 @@ struct md_page { struct pmap { pd_entry_t *dirbase; /* page directory pointer */ +#ifdef __i386__ pmap_paddr_t pdirbase; /* phys. address of dirbase */ +#endif vm_object_t pm_obj; /* object to hold pde's */ int ref_count; /* reference count */ int nx_enabled; task_map_t pm_task_map; decl_simple_lock_data(,lock) /* lock on map */ struct pmap_statistics stats; /* map statistics */ +#ifdef __i386__ vm_offset_t pm_hold; /* true pdpt zalloc addr */ +#endif pmap_paddr_t pm_cr3; /* physical addr */ pdpt_entry_t *pm_pdpt; /* KVA of 3rd level page */ pml4_entry_t *pm_pml4; /* VKA of top level */ @@ -440,6 +535,7 @@ struct pmap { }; +#if NCOPY_WINDOWS > 0 #define PMAP_PDPT_FIRST_WINDOW 0 #define PMAP_PDPT_NWINDOWS 4 #define PMAP_PDE_FIRST_WINDOW (PMAP_PDPT_NWINDOWS) @@ -466,7 +562,7 @@ typedef struct cpu_pmap { extern mapwindow_t *pmap_get_mapwindow(pt_entry_t pentry); extern void pmap_put_mapwindow(mapwindow_t *map); - +#endif typedef struct pmap_memory_regions { ppnum_t base; @@ -475,16 +571,31 @@ typedef struct pmap_memory_regions { uint32_t type; } pmap_memory_region_t; -unsigned pmap_memory_region_count; -unsigned pmap_memory_region_current; +extern unsigned pmap_memory_region_count; +extern unsigned pmap_memory_region_current; #define PMAP_MEMORY_REGIONS_SIZE 128 extern pmap_memory_region_t pmap_memory_regions[]; -static inline void set_dirbase(pmap_t tpmap, __unused int tcpu) { - current_cpu_datap()->cpu_task_cr3 = (pmap_paddr_t)((tpmap)->pm_cr3); +static inline void +set_dirbase(pmap_t tpmap, __unused thread_t thread) { + current_cpu_datap()->cpu_task_cr3 = tpmap->pm_cr3; current_cpu_datap()->cpu_task_map = tpmap->pm_task_map; +#ifndef __i386__ + /* + * Switch cr3 if necessary + * - unless running with no_shared_cr3 debugging mode + * and we're not on the kernel's cr3 (after pre-empted copyio) + */ + if (!no_shared_cr3) { + if (get_cr3() != tpmap->pm_cr3) + set_cr3(tpmap->pm_cr3); + } else { + if (get_cr3() != current_cpu_datap()->cpu_kernel_cr3) + set_cr3(current_cpu_datap()->cpu_kernel_cr3); + } +#endif } /* @@ -547,6 +658,7 @@ extern int pmap_list_resident_pages( vm_offset_t *listp, int space); +#ifdef __i386__ extern void pmap_commpage32_init( vm_offset_t kernel, vm_offset_t user, @@ -556,10 +668,14 @@ extern void pmap_commpage64_init( vm_map_offset_t user, int count); +#endif + +#if NCOPY_WINDOWS > 0 extern struct cpu_pmap *pmap_cpu_alloc( boolean_t is_boot_cpu); extern void pmap_cpu_free( struct cpu_pmap *cp); +#endif extern void pmap_map_block( pmap_t pmap, @@ -576,6 +692,7 @@ extern ppnum_t pmap_find_phys(pmap_t map, addr64_t va); extern void pmap_cpu_init(void); extern void pmap_disable_NX(pmap_t pmap); +#ifdef __i386__ extern void pmap_set_4GB_pagezero(pmap_t pmap); extern void pmap_clear_4GB_pagezero(pmap_t pmap); extern void pmap_load_kernel_cr3(void); @@ -584,6 +701,7 @@ extern vm_offset_t pmap_high_map_vaddr(enum high_cpu_types); extern vm_offset_t pmap_high_map(pt_entry_t, enum high_cpu_types); extern vm_offset_t pmap_cpu_high_shared_remap(int, enum high_cpu_types, vm_offset_t, int); extern vm_offset_t pmap_high_shared_remap(enum high_fixed_addresses, vm_offset_t, int); +#endif extern void pt_fake_zone_info(int *, vm_size_t *, vm_size_t *, vm_size_t *, vm_size_t *, int *, int *); @@ -596,54 +714,23 @@ extern void pt_fake_zone_info(int *, vm_size_t *, vm_size_t *, vm_size_t *, vm_s #include -#if defined(PMAP_ACTIVATE_KERNEL) -#undef PMAP_ACTIVATE_KERNEL -#undef PMAP_DEACTIVATE_KERNEL -#undef PMAP_ACTIVATE_USER -#undef PMAP_DEACTIVATE_USER -#endif - - -#define PMAP_ACTIVATE_KERNEL(my_cpu) { \ - spl_t spl; \ - \ - spl = splhigh(); \ - if (current_cpu_datap()->cpu_tlb_invalid) \ - process_pmap_updates(); \ - splx(spl); \ -} - -#define PMAP_DEACTIVATE_KERNEL(my_cpu) { \ - spl_t spl; \ - \ - spl = splhigh(); \ - process_pmap_updates(); \ - splx(spl); \ -} - - -#define PMAP_ACTIVATE_MAP(map, my_cpu) { \ + +#define PMAP_ACTIVATE_MAP(map, thread) { \ register pmap_t tpmap; \ \ tpmap = vm_map_pmap(map); \ - set_dirbase(tpmap, my_cpu); \ + set_dirbase(tpmap, thread); \ } -#define PMAP_DEACTIVATE_MAP(map, my_cpu) \ +#ifdef __i386__ +#define PMAP_DEACTIVATE_MAP(map, thread) \ if (vm_map_pmap(map)->pm_task_map == TASK_MAP_64BIT_SHARED) \ pmap_load_kernel_cr3(); - - -#define PMAP_ACTIVATE_USER(th, my_cpu) { \ - spl_t spl; \ - \ - spl = splhigh(); \ - PMAP_ACTIVATE_MAP(th->map, my_cpu) \ - splx(spl); \ -} - -#define PMAP_DEACTIVATE_USER(th, my_cpu) +#else +#define PMAP_DEACTIVATE_MAP(map, my_cpu) +#endif +#if defined(__i386__) #define PMAP_SWITCH_CONTEXT(old_th, new_th, my_cpu) { \ spl_t spl; \ @@ -654,9 +741,9 @@ extern void pt_fake_zone_info(int *, vm_size_t *, vm_size_t *, vm_size_t *, vm_s \ need_flush = 0; \ spl = splhigh(); \ - if (old_th->map != new_th->map) { \ - PMAP_DEACTIVATE_MAP(old_th->map, my_cpu); \ - PMAP_ACTIVATE_MAP(new_th->map, my_cpu); \ + if ((old_th->map != new_th->map) || (new_th->task != old_th->task)) { \ + PMAP_DEACTIVATE_MAP(old_th->map, old_th); \ + PMAP_ACTIVATE_MAP(new_th->map, new_th); \ } \ kpdp = current_cpu_datap()->cpu_copywindow_pdp; \ for (i = 0; i < NCOPY_WINDOWS; i++) { \ @@ -682,16 +769,41 @@ extern void pt_fake_zone_info(int *, vm_size_t *, vm_size_t *, vm_size_t *, vm_s flush_tlb(); \ } +#else /* __x86_64__ */ +#define PMAP_SWITCH_CONTEXT(old_th, new_th, my_cpu) { \ + spl_t spl; \ + \ + spl = splhigh(); \ + if (old_th->map != new_th->map) { \ + PMAP_DEACTIVATE_MAP(old_th->map, old_th); \ + PMAP_ACTIVATE_MAP(new_th->map, new_th); \ + } \ + splx(spl); \ +} +#endif /* __i386__ */ + +#ifdef __i386__ #define PMAP_SWITCH_USER(th, new_map, my_cpu) { \ spl_t spl; \ \ spl = splhigh(); \ - PMAP_DEACTIVATE_MAP(th->map, my_cpu); \ + PMAP_DEACTIVATE_MAP(th->map, th); \ th->map = new_map; \ - PMAP_ACTIVATE_MAP(th->map, my_cpu); \ + PMAP_ACTIVATE_MAP(th->map, th); \ splx(spl); \ inval_copy_windows(th); \ } +#else +#define PMAP_SWITCH_USER(th, new_map, my_cpu) { \ + spl_t spl; \ + \ + spl = splhigh(); \ + PMAP_DEACTIVATE_MAP(th->map, th); \ + th->map = new_map; \ + PMAP_ACTIVATE_MAP(th->map, th); \ + splx(spl); \ +} +#endif /* * Marking the current cpu's cr3 inactive is achieved by setting its lsb. @@ -712,6 +824,24 @@ extern void pt_fake_zone_info(int *, vm_size_t *, vm_size_t *, vm_size_t *, vm_s #define CPU_GET_ACTIVE_CR3(cpu) \ (cpu_datap(cpu)->cpu_active_cr3 & ~1) +#define CPU_GET_TASK_CR3(cpu) \ + (cpu_datap(cpu)->cpu_task_cr3) + +/* + * Mark this cpu idle, and remove it from the active set, + * since it is not actively using any pmap. Signal_cpus + * will notice that it is idle, and avoid signaling it, + * but will queue the update request for when the cpu + * becomes active. + */ +#if defined(__x86_64__) +#define MARK_CPU_IDLE(my_cpu) { \ + int s = splhigh(); \ + CPU_CR3_MARK_INACTIVE(); \ + __asm__ volatile("mfence"); \ + splx(s); \ +} +#else /* __i386__ native */ #define MARK_CPU_IDLE(my_cpu) { \ /* \ * Mark this cpu idle, and remove it from the active set, \ @@ -729,6 +859,7 @@ extern void pt_fake_zone_info(int *, vm_size_t *, vm_size_t *, vm_size_t *, vm_s __asm__ volatile("mfence"); \ splx(s); \ } +#endif /* __i386__ */ #define MARK_CPU_ACTIVE(my_cpu) { \ \ @@ -771,6 +902,7 @@ extern boolean_t pmap_is_empty(pmap_t pmap, vm_map_offset_t start, vm_map_offset_t end); + #endif /* ASSEMBLER */ diff --git a/iokit/IOKit/pwr_mgt/IOPMPagingPlexus.h b/osfmk/i386/pmap_internal.h similarity index 58% rename from iokit/IOKit/pwr_mgt/IOPMPagingPlexus.h rename to osfmk/i386/pmap_internal.h index 74fb00731..1a1105399 100644 --- a/iokit/IOKit/pwr_mgt/IOPMPagingPlexus.h +++ b/osfmk/i386/pmap_internal.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 1998-2000 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2009 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -25,44 +25,52 @@ * * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ */ -/* - * Copyright (c) 1998 Apple Computer, Inc. All rights reserved. - * - * HISTORY - * - */ - -#ifndef _IOKIT_IOPMPAGINGPLEXUS_H -#define _IOKIT_IOPMPAGINGPLEXUS_H +#include +#include -#include +#ifdef MACH_KERNEL_PRIVATE -/* Deprecated in Mac OS X version 10.0 - - Under no circumstances should any new software use or reference IOPMPagingPlexus. +/* + * pmap locking */ -class IOPMPagingPlexus : public IOService -{ - OSDeclareDefaultStructors(IOPMPagingPlexus) +#define PMAP_LOCK(pmap) { \ + simple_lock(&(pmap)->lock); \ +} -protected: +#define PMAP_UNLOCK(pmap) { \ + simple_unlock(&(pmap)->lock); \ +} - bool systemBooting; // true until preferences received. Then we act. - IOLock * ourLock; - -public: +extern void pmap_flush_tlbs(pmap_t pmap); - virtual bool start ( IOService * ); - virtual IOReturn setAggressiveness ( unsigned long, unsigned long ); - -protected: +#define PMAP_UPDATE_TLBS(pmap, s, e) \ + pmap_flush_tlbs(pmap) - virtual IOService * findProvider ( IOService * ); - virtual void processSiblings ( IOService * ); - virtual void processChildren ( void ); +#define iswired(pte) ((pte) & INTEL_PTE_WIRED) -}; +#ifdef PMAP_TRACES +extern boolean_t pmap_trace; +#define PMAP_TRACE(x,a,b,c,d,e) \ + if (pmap_trace) { \ + KERNEL_DEBUG_CONSTANT(x,a,b,c,d,e); \ + } +#else +#define PMAP_TRACE(x,a,b,c,d,e) KERNEL_DEBUG(x,a,b,c,d,e) +#endif /* PMAP_TRACES */ +void pmap_expand_pml4( + pmap_t map, + vm_map_offset_t v); + +void pmap_expand_pdpt( + pmap_t map, + vm_map_offset_t v); +#if defined(__x86_64__) +extern const boolean_t cpu_64bit; +#else +extern boolean_t cpu_64bit; #endif + +#endif /* MACH_KERNEL_PRIVATE */ diff --git a/osfmk/i386/pmap_x86_common.c b/osfmk/i386/pmap_x86_common.c new file mode 100644 index 000000000..de9b75835 --- /dev/null +++ b/osfmk/i386/pmap_x86_common.c @@ -0,0 +1,317 @@ +/* + * Copyright (c) 2000-2009 Apple Inc. All rights reserved. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. The rights granted to you under the License + * may not be used to create, or enable the creation or redistribution of, + * unlawful or unlicensed copies of an Apple operating system, or to + * circumvent, violate, or enable the circumvention or violation of, any + * terms of an Apple operating system software license agreement. + * + * Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ + */ +#include +#include +#include +/* + * The Intel platform can nest at the PDE level, so NBPDE (i.e. 2MB) at a time, + * on a NBPDE boundary. + */ + +/* These symbols may be referenced directly by VM */ +uint64_t pmap_nesting_size_min = NBPDE; +uint64_t pmap_nesting_size_max = 0 - (uint64_t)NBPDE; + +/* + * kern_return_t pmap_nest(grand, subord, va_start, size) + * + * grand = the pmap that we will nest subord into + * subord = the pmap that goes into the grand + * va_start = start of range in pmap to be inserted + * nstart = start of range in pmap nested pmap + * size = Size of nest area (up to 16TB) + * + * Inserts a pmap into another. This is used to implement shared segments. + * + * Note that we depend upon higher level VM locks to insure that things don't change while + * we are doing this. For example, VM should not be doing any pmap enters while it is nesting + * or do 2 nests at once. + */ + +/* + * This routine can nest subtrees either at the PDPT level (1GiB) or at the + * PDE level (2MiB). We currently disallow disparate offsets for the "subord" + * container and the "grand" parent. A minor optimization to consider for the + * future: make the "subord" truly a container rather than a full-fledged + * pagetable hierarchy which can be unnecessarily sparse (DRK). + */ + +kern_return_t pmap_nest(pmap_t grand, pmap_t subord, addr64_t va_start, addr64_t nstart, uint64_t size) { + vm_map_offset_t vaddr, nvaddr; + pd_entry_t *pde,*npde; + unsigned int i; + uint64_t num_pde; + + if ((size & (pmap_nesting_size_min-1)) || + (va_start & (pmap_nesting_size_min-1)) || + (nstart & (pmap_nesting_size_min-1)) || + ((size >> 28) > 65536)) /* Max size we can nest is 16TB */ + return KERN_INVALID_VALUE; + + if(size == 0) { + panic("pmap_nest: size is invalid - %016llX\n", size); + } + + if (va_start != nstart) + panic("pmap_nest: va_start(0x%llx) != nstart(0x%llx)\n", va_start, nstart); + + PMAP_TRACE(PMAP_CODE(PMAP__NEST) | DBG_FUNC_START, + (int) grand, (int) subord, + (int) (va_start>>32), (int) va_start, 0); + + nvaddr = (vm_map_offset_t)nstart; + num_pde = size >> PDESHIFT; + + PMAP_LOCK(subord); + + subord->pm_shared = TRUE; + + for (i = 0; i < num_pde;) { + if (((nvaddr & PDPTMASK) == 0) && (num_pde - i) >= NPDEPG && cpu_64bit) { + + npde = pmap64_pdpt(subord, nvaddr); + + while (0 == npde || ((*npde & INTEL_PTE_VALID) == 0)) { + PMAP_UNLOCK(subord); + pmap_expand_pdpt(subord, nvaddr); + PMAP_LOCK(subord); + npde = pmap64_pdpt(subord, nvaddr); + } + *npde |= INTEL_PDPTE_NESTED; + nvaddr += NBPDPT; + i += (uint32_t)NPDEPG; + } + else { + npde = pmap_pde(subord, nvaddr); + + while (0 == npde || ((*npde & INTEL_PTE_VALID) == 0)) { + PMAP_UNLOCK(subord); + pmap_expand(subord, nvaddr); + PMAP_LOCK(subord); + npde = pmap_pde(subord, nvaddr); + } + nvaddr += NBPDE; + i++; + } + } + + PMAP_UNLOCK(subord); + + vaddr = (vm_map_offset_t)va_start; + + PMAP_LOCK(grand); + + for (i = 0;i < num_pde;) { + pd_entry_t tpde; + + if (((vaddr & PDPTMASK) == 0) && ((num_pde - i) >= NPDEPG) && cpu_64bit) { + npde = pmap64_pdpt(subord, vaddr); + if (npde == 0) + panic("pmap_nest: no PDPT, subord %p nstart 0x%llx", subord, vaddr); + tpde = *npde; + pde = pmap64_pdpt(grand, vaddr); + if (0 == pde) { + PMAP_UNLOCK(grand); + pmap_expand_pml4(grand, vaddr); + PMAP_LOCK(grand); + pde = pmap64_pdpt(grand, vaddr); + } + if (pde == 0) + panic("pmap_nest: no PDPT, grand %p vaddr 0x%llx", grand, vaddr); + pmap_store_pte(pde, tpde); + vaddr += NBPDPT; + i += (uint32_t) NPDEPG; + } + else { + npde = pmap_pde(subord, nstart); + if (npde == 0) + panic("pmap_nest: no npde, subord %p nstart 0x%llx", subord, nstart); + tpde = *npde; + nstart += NBPDE; + pde = pmap_pde(grand, vaddr); + if ((0 == pde) && cpu_64bit) { + PMAP_UNLOCK(grand); + pmap_expand_pdpt(grand, vaddr); + PMAP_LOCK(grand); + pde = pmap_pde(grand, vaddr); + } + + if (pde == 0) + panic("pmap_nest: no pde, grand %p vaddr 0x%llx", grand, vaddr); + vaddr += NBPDE; + pmap_store_pte(pde, tpde); + i++; + } + } + + PMAP_UNLOCK(grand); + + PMAP_TRACE(PMAP_CODE(PMAP__NEST) | DBG_FUNC_END, 0, 0, 0, 0, 0); + + return KERN_SUCCESS; +} + +/* + * kern_return_t pmap_unnest(grand, vaddr) + * + * grand = the pmap that we will un-nest subord from + * vaddr = start of range in pmap to be unnested + * + * Removes a pmap from another. This is used to implement shared segments. + */ + +kern_return_t pmap_unnest(pmap_t grand, addr64_t vaddr, uint64_t size) { + + pd_entry_t *pde; + unsigned int i; + uint64_t num_pde; + addr64_t va_start, va_end; + uint64_t npdpt = PMAP_INVALID_PDPTNUM; + + PMAP_TRACE(PMAP_CODE(PMAP__UNNEST) | DBG_FUNC_START, + (int) grand, + (int) (vaddr>>32), (int) vaddr, 0, 0); + + if ((size & (pmap_nesting_size_min-1)) || + (vaddr & (pmap_nesting_size_min-1))) { + panic("pmap_unnest(%p,0x%llx,0x%llx): unaligned...\n", + grand, vaddr, size); + } + + /* align everything to PDE boundaries */ + va_start = vaddr & ~(NBPDE-1); + va_end = (vaddr + size + NBPDE - 1) & ~(NBPDE-1); + size = va_end - va_start; + + PMAP_LOCK(grand); + + num_pde = size >> PDESHIFT; + vaddr = va_start; + + for (i = 0; i < num_pde; ) { + if ((pdptnum(grand, vaddr) != npdpt) && cpu_64bit) { + npdpt = pdptnum(grand, vaddr); + pde = pmap64_pdpt(grand, vaddr); + if (pde && (*pde & INTEL_PDPTE_NESTED)) { + pmap_store_pte(pde, (pd_entry_t)0); + i += (uint32_t) NPDEPG; + vaddr += NBPDPT; + continue; + } + } + pde = pmap_pde(grand, (vm_map_offset_t)vaddr); + if (pde == 0) + panic("pmap_unnest: no pde, grand %p vaddr 0x%llx\n", grand, vaddr); + pmap_store_pte(pde, (pd_entry_t)0); + i++; + vaddr += NBPDE; + } + + PMAP_UPDATE_TLBS(grand, va_start, va_end); + + PMAP_UNLOCK(grand); + + PMAP_TRACE(PMAP_CODE(PMAP__UNNEST) | DBG_FUNC_END, 0, 0, 0, 0, 0); + + return KERN_SUCCESS; +} + +/* Invoked by the Mach VM to determine the platform specific unnest region */ + +boolean_t pmap_adjust_unnest_parameters(pmap_t p, vm_map_offset_t *s, vm_map_offset_t *e) { + pd_entry_t *pdpte; + boolean_t rval = FALSE; + + if (!cpu_64bit) + return rval; + + PMAP_LOCK(p); + + pdpte = pmap64_pdpt(p, *s); + if (pdpte && (*pdpte & INTEL_PDPTE_NESTED)) { + *s &= ~(NBPDPT -1); + rval = TRUE; + } + + pdpte = pmap64_pdpt(p, *e); + if (pdpte && (*pdpte & INTEL_PDPTE_NESTED)) { + *e = ((*e + NBPDPT) & ~(NBPDPT -1)); + rval = TRUE; + } + + PMAP_UNLOCK(p); + + return rval; +} + +/* + * pmap_find_phys returns the (4K) physical page number containing a + * given virtual address in a given pmap. + * Note that pmap_pte may return a pde if this virtual address is + * mapped by a large page and this is taken into account in order + * to return the correct page number in this case. + */ +ppnum_t +pmap_find_phys(pmap_t pmap, addr64_t va) +{ + pt_entry_t *ptp; + pd_entry_t *pdep; + ppnum_t ppn = 0; + pd_entry_t pde; + pt_entry_t pte; + + mp_disable_preemption(); + + /* This refcount test is a band-aid--several infrastructural changes + * are necessary to eliminate invocation of this routine from arbitrary + * contexts. + */ + + if (!pmap->ref_count) + goto pfp_exit; + + pdep = pmap_pde(pmap, va); + + if ((pdep != PD_ENTRY_NULL) && ((pde = *pdep) & INTEL_PTE_VALID)) { + if (pde & INTEL_PTE_PS) { + ppn = (ppnum_t) i386_btop(pte_to_pa(pde)); + ppn += (ppnum_t) ptenum(va); + } + else { + ptp = pmap_pte(pmap, va); + if ((PT_ENTRY_NULL != ptp) && (((pte = *ptp) & INTEL_PTE_VALID) != 0)) { + ppn = (ppnum_t) i386_btop(pte_to_pa(pte)); + } + } + } +pfp_exit: + mp_enable_preemption(); + + return ppn; +} + diff --git a/osfmk/i386/postcode.h b/osfmk/i386/postcode.h index 42a4627b0..498a88143 100644 --- a/osfmk/i386/postcode.h +++ b/osfmk/i386/postcode.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2003 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2008 Apple Computer, Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -29,10 +29,6 @@ #ifndef _I386_POSTCODE_H_ #define _I386_POSTCODE_H_ -#ifndef DEBUG -#include -#endif - /* Define this to delay about 1 sec after posting each code */ //#define POSTCODE_DELAY 1 @@ -55,15 +51,28 @@ CPU_PAUSE(); \ decl %eax; \ jne 1b +#define POSTCODE_AX \ + outw %ax,$(POSTPORT); \ + movl $(SPINCOUNT), %eax; \ +1: \ + CPU_PAUSE(); \ + decl %eax; \ + jne 1b #else #define POSTCODE_AL \ outb %al,$(POSTPORT) +#define POSTCODE_AX \ + outw %ax,$(POSTPORT) #endif /* POSTCODE_DELAY */ #define POSTCODE(XX) \ mov $(XX), %al; \ POSTCODE_AL +#define POSTCODE2(XXXX) \ + mov $(XXXX), %ax; \ + POSTCODE_AX + /* Output byte value to postcode, without destoying register eax */ #define POSTCODE_SAVE_EAX(XX) \ push %eax; \ @@ -94,7 +103,10 @@ #else /* DEBUG */ #define POSTCODE_AL +#define POSTCODE_AX #define POSTCODE(X) +#define POSTCODE2(X) +#define POSTCODE_SAVE_EAX(X) #define POSTCODE32_EBX #endif /* DEBUG */ @@ -106,13 +118,18 @@ #define _PSTART_RELOC 0xFE #define PSTART_ENTRY 0xFD #define PSTART_PAGE_TABLES 0xFC +#if defined(__x86_64__) +#define PSTART_BEFORE_ID_MAP 0xFB +#else #define PSTART_BEFORE_PAGING 0xFB +#endif #define VSTART_ENTRY 0xFA #define VSTART_STACK_SWITCH 0xF9 -#define VSTART_EXIT 0xF8 -#define I386_INIT_ENTRY 0xF7 -#define CPU_INIT_D 0xF6 -#define PE_INIT_PLATFORM_D 0xF5 +#define VSTART_BEFORE_PAGING 0xF8 +#define VSTART_EXIT 0xF7 +#define I386_INIT_ENTRY 0xF6 +#define CPU_INIT_D 0xF5 +#define PE_INIT_PLATFORM_D 0xF4 #define SLAVE_RSTART_ENTRY 0xEF #define SLAVE_REAL_TO_PROT_ENTRY 0xEE @@ -121,10 +138,12 @@ #define SLAVE_STARTPROG_EXIT 0xEB #define SLAVE_PSTART_ENTRY 0xEA #define SLAVE_PSTART_EXIT 0xE9 +#if defined(__i386__) #define SLAVE_VSTART_ENTRY 0xE8 #define SLAVE_VSTART_DESC_INIT 0xE7 #define SLAVE_VSTART_STACK_SWITCH 0xE6 #define SLAVE_VSTART_EXIT 0xE5 +#endif #define I386_INIT_SLAVE 0xE4 #define PANIC_DOUBLE_FAULT 0xDF /* Double Fault exception */ @@ -162,6 +181,11 @@ _postcode(uint8_t xx) { asm volatile("outb %0, %1" : : "a" (xx), "N" (POSTPORT)); } +inline static void +_postcode2(uint16_t xxxx) +{ + asm volatile("outw %0, %1" : : "a" (xxxx), "N" (POSTPORT)); +} #if DEBUG inline static void postcode(uint8_t xx) @@ -171,8 +195,17 @@ postcode(uint8_t xx) _postcode_delay(SPINCOUNT); #endif } +inline static void +postcode2(uint8_t xxxx) +{ + _postcode2(xxxx); +#if POSTCODE_DELAY + _postcode_delay(SPINCOUNT); +#endif +} #else #define postcode(xx) do {} while(0) +#define postcode2(xxxx) do {} while(0) #endif #endif diff --git a/osfmk/i386/proc_reg.h b/osfmk/i386/proc_reg.h index a8eefb5b1..a22ccd03b 100644 --- a/osfmk/i386/proc_reg.h +++ b/osfmk/i386/proc_reg.h @@ -160,50 +160,51 @@ #ifndef ASSEMBLER #include +#include + __BEGIN_DECLS -#define set_ts() \ - set_cr0(get_cr0() | CR0_TS) +#define set_ts() set_cr0(get_cr0() | CR0_TS) -static inline unsigned int get_cr0(void) +static inline uintptr_t get_cr0(void) { - register unsigned int cr0; + uintptr_t cr0; __asm__ volatile("mov %%cr0, %0" : "=r" (cr0)); return(cr0); } -static inline void set_cr0(unsigned int value) +static inline void set_cr0(uintptr_t value) { __asm__ volatile("mov %0, %%cr0" : : "r" (value)); } -static inline unsigned int get_cr2(void) +static inline uintptr_t get_cr2(void) { - register unsigned int cr2; + uintptr_t cr2; __asm__ volatile("mov %%cr2, %0" : "=r" (cr2)); return(cr2); } -static inline unsigned int get_cr3(void) +static inline uintptr_t get_cr3(void) { - register unsigned int cr3; + register uintptr_t cr3; __asm__ volatile("mov %%cr3, %0" : "=r" (cr3)); return(cr3); } -static inline void set_cr3(unsigned int value) +static inline void set_cr3(uintptr_t value) { __asm__ volatile("mov %0, %%cr3" : : "r" (value)); } -static inline uint32_t get_cr4(void) +static inline uintptr_t get_cr4(void) { - uint32_t cr4; + uintptr_t cr4; __asm__ volatile("mov %%cr4, %0" : "=r" (cr4)); return(cr4); } -static inline void set_cr4(uint32_t value) +static inline void set_cr4(uintptr_t value) { __asm__ volatile("mov %0, %%cr4" : : "r" (value)); } @@ -237,19 +238,51 @@ static inline void lldt(unsigned int seg) __asm__ volatile("lldt %0" : : "rm" ((unsigned short)(seg))); } +static inline void lgdt(uintptr_t *desc) +{ + __asm__ volatile("lgdt %0" : : "m" (*desc)); +} + +static inline void lidt(uintptr_t *desc) +{ + __asm__ volatile("lidt %0" : : "m" (*desc)); +} + +static inline void swapgs(void) +{ + __asm__ volatile("swapgs"); +} + #ifdef MACH_KERNEL_PRIVATE + + + +#ifdef __i386__ + +#include + +extern void cpuid64(uint32_t); extern void flush_tlb64(void); extern uint64_t get64_cr3(void); extern void set64_cr3(uint64_t); static inline void flush_tlb(void) { - unsigned long cr3_temp; if (cpu_mode_is64bit()) { flush_tlb64(); - return; + } else { + set_cr3(get_cr3()); } - __asm__ volatile("movl %%cr3, %0; movl %0, %%cr3" : "=r" (cr3_temp) :: "memory"); } +#elif defined(__x86_64__) +static inline void flush_tlb(void) +{ + set_cr3(get_cr3()); +} +#else +#error Unsupported architecture +#endif + + #endif /* MACH_KERNEL_PRIVATE */ static inline void wbinvd(void) @@ -257,7 +290,7 @@ static inline void wbinvd(void) __asm__ volatile("wbinvd"); } -static inline void invlpg(unsigned long addr) +static inline void invlpg(uintptr_t addr) { __asm__ volatile("invlpg (%0)" :: "r" (addr) : "memory"); } @@ -282,6 +315,8 @@ static inline void invlpg(unsigned long addr) #define rdpmc(counter,lo,hi) \ __asm__ volatile("rdpmc" : "=a" (lo), "=d" (hi) : "c" (counter)) +#ifdef __i386__ + static inline uint64_t rdmsr64(uint32_t msr) { uint64_t ret; @@ -311,6 +346,41 @@ static inline uint64_t rdtscp64(uint32_t *aux) return ret; } +#elif defined(__x86_64__) + +static inline uint64_t rdmsr64(uint32_t msr) +{ + uint32_t lo=0, hi=0; + rdmsr(msr, lo, hi); + return (((uint64_t)hi) << 32) | ((uint64_t)lo); +} + +static inline void wrmsr64(uint32_t msr, uint64_t val) +{ + wrmsr(msr, (val & 0xFFFFFFFFUL), ((val >> 32) & 0xFFFFFFFFUL)); +} + +static inline uint64_t rdtsc64(void) +{ + uint32_t lo, hi; + rdtsc(lo, hi); + return (((uint64_t)hi) << 32) | ((uint64_t)lo); +} + +static inline uint64_t rdtscp64(uint32_t *aux) +{ + uint32_t lo, hi; + __asm__ volatile("rdtscp; mov %%ecx, %1" + : "=a" (lo), "=d" (hi), "=m" (*aux) + : + : "ecx"); + return (((uint64_t)hi) << 32) | ((uint64_t)lo); +} + +#else +#error Unsupported architecture +#endif + /* * rdmsr_carefully() returns 0 when the MSR has been read successfully, * or non-zero (1) if the MSR does not exist. diff --git a/osfmk/i386/rtclock.c b/osfmk/i386/rtclock.c index 4b06c8a1e..6c1b18483 100644 --- a/osfmk/i386/rtclock.c +++ b/osfmk/i386/rtclock.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2008 Apple Inc. All rights reserved. + * Copyright (c) 2000-2009 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -57,15 +57,14 @@ #include /* for kernel_map */ #include #include -#include -#include #include -#include #include -#include #include -#include +#include #include +#include +#include +#include #include #include #include @@ -83,6 +82,8 @@ int rtclock_init(void); uint64_t rtc_decrementer_min; +uint64_t tsc_rebase_abs_time = 0; + void rtclock_intr(x86_saved_state_t *regs); static uint64_t maxDec; /* longest interval our hardware timer can handle (nsec) */ @@ -101,6 +102,7 @@ rtc_nanotime_t rtc_nanotime_info = {0,0,0,0,1,0}; static inline uint64_t _tsc_to_nanoseconds(uint64_t value) { +#if defined(__i386__) asm volatile("movl %%edx,%%esi ;" "mull %%ecx ;" "movl %%edx,%%edi ;" @@ -111,10 +113,60 @@ _tsc_to_nanoseconds(uint64_t value) : "+A" (value) : "c" (current_cpu_datap()->cpu_nanotime->scale) : "esi", "edi"); +#elif defined(__x86_64__) + asm volatile("mul %%rcx;" + "shrq $32, %%rax;" + "shlq $32, %%rdx;" + "orq %%rdx, %%rax;" + : "=a"(value) + : "a"(value), "c"(rtc_nanotime_info.scale) + : "rdx", "cc" ); +#else +#error Unsupported architecture +#endif return (value); } +static inline uint32_t +_absolutetime_to_microtime(uint64_t abstime, clock_sec_t *secs, clock_usec_t *microsecs) +{ + uint32_t remain; +#if defined(__i386__) + asm volatile( + "divl %3" + : "=a" (*secs), "=d" (remain) + : "A" (abstime), "r" (NSEC_PER_SEC)); + asm volatile( + "divl %3" + : "=a" (*microsecs) + : "0" (remain), "d" (0), "r" (NSEC_PER_USEC)); +#elif defined(__x86_64__) + *secs = abstime / (uint64_t)NSEC_PER_SEC; + remain = (uint32_t)(abstime % (uint64_t)NSEC_PER_SEC); + *microsecs = remain / NSEC_PER_USEC; +#else +#error Unsupported architecture +#endif + return remain; +} + +static inline void +_absolutetime_to_nanotime(uint64_t abstime, clock_sec_t *secs, clock_usec_t *nanosecs) +{ +#if defined(__i386__) + asm volatile( + "divl %3" + : "=a" (*secs), "=d" (*nanosecs) + : "A" (abstime), "r" (NSEC_PER_SEC)); +#elif defined(__x86_64__) + *secs = abstime / (uint64_t)NSEC_PER_SEC; + *nanosecs = (clock_usec_t)(abstime % (uint64_t)NSEC_PER_SEC); +#else +#error Unsupported architecture +#endif +} + static uint32_t deadline_to_decrementer( uint64_t deadline, @@ -123,10 +175,10 @@ deadline_to_decrementer( uint64_t delta; if (deadline <= now) - return rtc_decrementer_min; + return (uint32_t)rtc_decrementer_min; else { delta = deadline - now; - return MIN(MAX(rtc_decrementer_min,delta),maxDec); + return (uint32_t)MIN(MAX(rtc_decrementer_min,delta),maxDec); } } @@ -359,13 +411,16 @@ static void rtc_set_timescale(uint64_t cycles) { rtc_nanotime_t *rntp = current_cpu_datap()->cpu_nanotime; - rntp->scale = ((uint64_t)NSEC_PER_SEC << 32) / cycles; + rntp->scale = (uint32_t)(((uint64_t)NSEC_PER_SEC << 32) / cycles); if (cycles <= SLOW_TSC_THRESHOLD) - rntp->shift = cycles; + rntp->shift = (uint32_t)cycles; else rntp->shift = 32; + if (tsc_rebase_abs_time == 0) + tsc_rebase_abs_time = mach_absolute_time(); + rtc_nanotime_init(0); } @@ -395,33 +450,22 @@ rtc_export_speed(uint64_t cyc_per_sec) void clock_get_system_microtime( - uint32_t *secs, - uint32_t *microsecs) + clock_sec_t *secs, + clock_usec_t *microsecs) { uint64_t now = rtc_nanotime_read(); - uint32_t remain; - asm volatile( - "divl %3" - : "=a" (*secs), "=d" (remain) - : "A" (now), "r" (NSEC_PER_SEC)); - asm volatile( - "divl %3" - : "=a" (*microsecs) - : "0" (remain), "d" (0), "r" (NSEC_PER_USEC)); + _absolutetime_to_microtime(now, secs, microsecs); } void clock_get_system_nanotime( - uint32_t *secs, - uint32_t *nanosecs) + clock_sec_t *secs, + clock_nsec_t *nanosecs) { uint64_t now = rtc_nanotime_read(); - asm volatile( - "divl %3" - : "=a" (*secs), "=d" (*nanosecs) - : "A" (now), "r" (NSEC_PER_SEC)); + _absolutetime_to_nanotime(now, secs, nanosecs); } void @@ -429,24 +473,15 @@ clock_gettimeofday_set_commpage( uint64_t abstime, uint64_t epoch, uint64_t offset, - uint32_t *secs, - uint32_t *microsecs) + clock_sec_t *secs, + clock_usec_t *microsecs) { - uint64_t now = abstime; + uint64_t now = abstime + offset; uint32_t remain; - now += offset; - - asm volatile( - "divl %3" - : "=a" (*secs), "=d" (remain) - : "A" (now), "r" (NSEC_PER_SEC)); - asm volatile( - "divl %3" - : "=a" (*microsecs) - : "0" (remain), "d" (0), "r" (NSEC_PER_USEC)); + remain = _absolutetime_to_microtime(now, secs, microsecs); - *secs += epoch; + *secs += (clock_sec_t)epoch; commpage_set_timestamp(abstime - remain, *secs); } @@ -484,7 +519,8 @@ rtclock_intr( regs = saved_state64(tregs); - user_mode = TRUE; + if (regs->isf.cs & 0x03) + user_mode = TRUE; rip = regs->isf.rip; } else { x86_saved_state32_t *regs; @@ -499,7 +535,7 @@ rtclock_intr( /* Log the interrupt service latency (-ve value expected by tool) */ KERNEL_DEBUG_CONSTANT( MACHDBG_CODE(DBG_MACH_EXCP_DECI, 0) | DBG_FUNC_NONE, - -latency, (uint32_t)rip, user_mode, 0, 0); + -(int32_t)latency, (uint32_t)rip, user_mode, 0, 0); /* call the generic etimer */ etimer_intr(user_mode, rip); @@ -509,6 +545,7 @@ rtclock_intr( * Request timer pop from the hardware */ + int setPop( uint64_t time) @@ -545,37 +582,25 @@ clock_interval_to_absolutetime_interval( void absolutetime_to_microtime( uint64_t abstime, - uint32_t *secs, - uint32_t *microsecs) + clock_sec_t *secs, + clock_usec_t *microsecs) { - uint32_t remain; - - asm volatile( - "divl %3" - : "=a" (*secs), "=d" (remain) - : "A" (abstime), "r" (NSEC_PER_SEC)); - asm volatile( - "divl %3" - : "=a" (*microsecs) - : "0" (remain), "d" (0), "r" (NSEC_PER_USEC)); + _absolutetime_to_microtime(abstime, secs, microsecs); } void absolutetime_to_nanotime( uint64_t abstime, - uint32_t *secs, - uint32_t *nanosecs) + clock_sec_t *secs, + clock_nsec_t *nanosecs) { - asm volatile( - "divl %3" - : "=a" (*secs), "=d" (*nanosecs) - : "A" (abstime), "r" (NSEC_PER_SEC)); + _absolutetime_to_nanotime(abstime, secs, nanosecs); } void nanotime_to_absolutetime( - uint32_t secs, - uint32_t nanosecs, + clock_sec_t secs, + clock_nsec_t nanosecs, uint64_t *result) { *result = ((uint64_t)secs * NSEC_PER_SEC) + nanosecs; diff --git a/osfmk/i386/rtclock.h b/osfmk/i386/rtclock.h index 6f3406a8c..ec3e922d8 100644 --- a/osfmk/i386/rtclock.h +++ b/osfmk/i386/rtclock.h @@ -57,6 +57,8 @@ typedef struct rtc_nanotime { struct cpu_data; +extern uint64_t tsc_rebase_abs_time; + extern void _rtc_nanotime_store( uint64_t tsc, uint64_t nsec, @@ -68,7 +70,7 @@ extern uint64_t _rtc_nanotime_read( rtc_nanotime_t *rntp, int slow); -extern rtc_nanotime_t rtc_nanotime_info; +extern rtc_nanotime_t rtc_nanotime_info; #endif #define SLOW_TSC_THRESHOLD 1000067800 /* TSC is too slow for regular nanotime() algorithm */ diff --git a/osfmk/i386/sched_param.h b/osfmk/i386/sched_param.h index 5a5677f53..113049fdd 100644 --- a/osfmk/i386/sched_param.h +++ b/osfmk/i386/sched_param.h @@ -64,4 +64,4 @@ #ifndef _I386_SCHED_PARAM_H_ #define _I386_SCHED_PARAM_H_ -#endif _I386_SCHED_PARAM_H_ +#endif /* _I386_SCHED_PARAM_H_ */ diff --git a/osfmk/i386/seg.h b/osfmk/i386/seg.h index 9b80f01a0..37d2b48ca 100644 --- a/osfmk/i386/seg.h +++ b/osfmk/i386/seg.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2006 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2009 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -55,9 +55,12 @@ */ #ifndef _I386_SEG_H_ #define _I386_SEG_H_ - +#ifdef MACH_KERNEL #include -#ifndef ASSEMBLER +#else +#define MACH_KDB 0 +#endif /* MACH_KERNEL */ +#ifndef __ASSEMBLER__ #include #include #include @@ -97,9 +100,13 @@ selector_to_sel(uint16_t selector) /* kernel ldt entries */ #if MACH_KDB -#define GDTSZ 19 +#define GDTSZ 20 #else -#define GDTSZ 18 +#define GDTSZ 19 +#endif + +#ifdef __x86_64__ +#define PROT_MODE_GDT_SIZE 48 /* size of prot_mode_gdt in bytes */ #endif /* @@ -150,6 +157,16 @@ struct real_gate64 { reserved32:32; /* reserved/zero */ }; +#define MAKE_REAL_DESCRIPTOR(base,lim,gran,acc) { \ + .limit_low = lim & 0xffff, \ + .limit_high = (lim >> 16) & 0xf, \ + .base_low = base & 0xffff, \ + .base_med = (base >> 16) & 0xff, \ + .base_high = (base >> 24) & 0xff, \ + .access = acc, \ + .granularity = gran \ +} + /* * We build descriptors and gates in a 'fake' format to let the * fields be contiguous. We shuffle them into the real format @@ -164,7 +181,7 @@ struct fake_descriptor { uint32_t access:8; /* access */ }; struct fake_descriptor64 { - uint32_t offset[2]; /* offset [0..31,32..63] */ + uint64_t offset64; /* offset [0..31,32..63] */ uint32_t lim_or_seg:20; /* limit */ /* or segment, for gate */ uint32_t size_or_IST:4; /* size/granularity */ @@ -172,22 +189,22 @@ struct fake_descriptor64 { uint32_t access:8; /* access */ uint32_t reserved:32; /* reserved/zero */ }; +#ifdef __i386__ #define FAKE_UBER64(addr32) { (uint32_t) (addr32), KERNEL_UBER_BASE_HI32 } #define FAKE_COMPAT(addr32) { (uint32_t) (addr32), 0x0 } #define UBER64(addr32) ((addr64_t) (uintptr_t)addr32 + KERNEL_UBER_BASE) +#endif /* * Boot-time data for master (or only) CPU */ extern struct fake_descriptor master_idt[IDTSZ]; -extern struct fake_descriptor master_gdt[GDTSZ]; -extern struct fake_descriptor master_ldt[LDTSZ]; +extern struct real_descriptor master_gdt[GDTSZ]; +extern struct real_descriptor master_ldt[LDTSZ]; extern struct i386_tss master_ktss; extern struct sysenter_stack master_sstk; extern struct fake_descriptor64 master_idt64[IDTSZ]; -extern struct fake_descriptor64 kernel_ldt_desc64; -extern struct fake_descriptor64 kernel_tss_desc64; extern struct x86_64_tss master_ktss64; __BEGIN_DECLS @@ -265,6 +282,7 @@ __END_DECLS #define NULL_SEG 0 +#ifdef __i386__ /* * User descriptors for MACH - 32-bit flat address space */ @@ -287,7 +305,7 @@ __END_DECLS /* * Kernel descriptors for MACH - 32-bit flat address space. */ -#define KERNEL_CS 0x08 /* kernel code */ +#define KERNEL32_CS 0x08 /* kernel code */ #define KERNEL_DS 0x10 /* kernel data */ #define KERNEL_LDT 0x18 /* master LDT */ #define KERNEL_LDT_2 0x20 /* master LDT expanded for 64-bit */ @@ -304,28 +322,52 @@ __END_DECLS #define USER_TSS 0x60 #define FPE_CS 0x68 -#define USER_WINDOW_SEL 0x70 /* window for copyin/copyout */ -#define PHYS_WINDOW_SEL 0x78 /* window for copyin/copyout */ +#else // __x86_64__ -#define KERNEL64_CS 0x80 /* kernel 64-bit code */ -#define KERNEL64_SS 0x88 /* kernel 64-bit (syscall) stack */ +/* + * Kernel descriptors for MACH - 64-bit flat address space. + */ +#define KERNEL64_CS 0x08 /* 1: First entry */ +#define SYSENTER_CS 0x0b /* alias to KERNEL64_CS */ +#define KERNEL64_SS 0x10 /* 2: must be SYSENTER_CS + 8 */ +#define USER_CS 0x1b /* 3: must be SYSENTER_CS + 16 */ +#define USER_DS 0x23 /* 4: must be SYSENTER_CS + 24 */ +#define USER64_CS 0x2b /* 5: must be SYSENTER_CS + 32 */ +#define USER64_DS USER_DS /* nothing special about 64bit DS */ +#define KERNEL_LDT 0x30 /* 6: */ + /* 7: other 8 bytes of KERNEL_LDT */ +#define KERNEL_TSS 0x40 /* 8: */ + /* 9: other 8 bytes of KERNEL_TSS */ +#define KERNEL32_CS 0x50 /* 10: */ +#define USER_LDT 0x58 /* 11: */ + /* 12: other 8 bytes of USER_LDT */ +#define KERNEL_DS 0x80 /* 16: */ +#define SYSCALL_CS 0x8f /* 17: 64-bit syscall pseudo-segment */ + +#endif + +#ifdef __i386__ +#define USER_WINDOW_SEL 0x70 /* 14: window for copyin/copyout */ +#define PHYS_WINDOW_SEL 0x78 /* 15: window for copyin/copyout */ + +#define KERNEL64_CS 0x80 /* 16: kernel 64-bit code */ +#define KERNEL64_SS 0x88 /* 17: kernel 64-bit (syscall) stack */ +#else // __x86_64__ +#define SYSENTER_TF_CS (USER_CS|0x10000) +#define SYSENTER_DS KERNEL64_SS /* sysenter kernel data segment */ +#endif #if MACH_KDB -#define DEBUG_TSS 0x90 /* debug TSS (uniprocessor) */ +#define DEBUG_TSS 0x90 /* 18: debug TSS (uniprocessor) */ #endif -#ifndef __ASSEMBLER__ -struct __gdt_desc_struct { - unsigned short size; - unsigned long address __attribute__((packed)); - unsigned short pad; -} __attribute__ ((packed)); - -struct __idt_desc_struct { - unsigned short size; - unsigned long address __attribute__((packed)); - unsigned short pad; -} __attribute__ ((packed)); -#endif /* __ASSEMBLER__ */ +#ifdef __x86_64__ +/* + * 64-bit kernel LDT descriptors + */ +#define USER_CTHREAD 0x0f /* user cthread area */ +#define USER_SETTABLE 0x1f /* start of user settable ldt entries */ +#define USLDTSZ 10 /* number of user settable entries */ +#endif #endif /* _I386_SEG_H_ */ diff --git a/osfmk/i386/simple_lock.h b/osfmk/i386/simple_lock.h index ad2c33893..fb30ba83f 100644 --- a/osfmk/i386/simple_lock.h +++ b/osfmk/i386/simple_lock.h @@ -105,7 +105,7 @@ extern void i386_lock_unlock_with_flush( #else typedef struct slock { - unsigned int lock_data[10]; + unsigned long lock_data[10]; } usimple_lock_data_t, *usimple_lock_t; #endif /* defined(MACH_KERNEL_PRIVATE) && defined(__APPLE_API_PRIVATE) */ diff --git a/osfmk/i386/start.s b/osfmk/i386/start.s index e063283aa..b0ba8110a 100644 --- a/osfmk/i386/start.s +++ b/osfmk/i386/start.s @@ -57,7 +57,6 @@ */ #include -#include #include #include @@ -67,28 +66,15 @@ #define CX(addr,reg) addr(,reg,4) #include -#include +#include #include -/* - * GAS won't handle an intersegment jump with a relocatable offset. - */ -#define LJMP(segment,address) \ - .byte 0xea ;\ - .long address ;\ - .word segment - - - -#define PA(addr) (addr) -#define VA(addr) (addr) - /* * Interrupt and bootup stack for initial processor. */ - /* in the __HIB section since the hibernate restore code uses this stack. */ - .section __HIB, __data +/* in the __HIB section since the hibernate restore code uses this stack. */ + .section __HIB, __data .align 12 .globl EXT(low_intstack) @@ -109,22 +95,14 @@ EXT(gIOHibernateRestoreStackEnd): .align ALIGN .globl EXT(gdtptr) /* align below properly */ - .word 0 + .word 0 LEXT(gdtptr) .word Times(8,GDTSZ)-1 .long EXT(master_gdt) - .align ALIGN - .globl EXT(idtptr) - /* align below properly */ - .word 0 -LEXT(idtptr) - .word Times(8,IDTSZ)-1 - .long EXT(master_idt) + /* back to the regular __DATA section. */ - /* back to the regular __DATA section. */ - - .section __DATA, __data + .section __DATA, __data /* * Stack for last-gasp double-fault handler. @@ -147,7 +125,6 @@ EXT(mc_task_stack): .globl EXT(mc_task_stack_end) EXT(mc_task_stack_end): - #if MACH_KDB /* * Kernel debugger stack for each processor. @@ -174,78 +151,8 @@ EXT(kgdb_stack_store): .set ., .+(INTSTACK_SIZE*MAX_CPUS) #endif /* MACH_KDB */ - .data -physfree: - .long 0 /* phys addr of next free page */ - - .globl EXT(IdlePTD) -EXT(IdlePTD): - .long 0 /* phys addr of kernel PTD */ -#ifdef PAE - .globl EXT(IdlePDPT) -EXT(IdlePDPT): - .long 0 /* phys addr of kernel PDPT */ -#endif -#ifdef X86_64 - .globl EXT(IdlePML4) -EXT(IdlePML4): - .long 0 - .globl EXT(IdlePDPT64) -EXT(IdlePDPT64): - .long 0 -#endif - -KPTphys: - .long 0 /* phys addr of kernel page tables */ - - .globl EXT(KernelRelocOffset) -EXT(KernelRelocOffset): - .long 0 /* Kernel relocation offset */ - - -/* Some handy macros */ - -#define ALLOCPAGES(npages) \ - movl PA(physfree), %esi ; \ - movl $((npages) * PAGE_SIZE), %eax ; \ - addl %esi, %eax ; \ - movl %eax, PA(physfree) ; \ - movl %esi, %edi ; \ - movl $((npages) * PAGE_SIZE / 4),%ecx ; \ - xorl %eax,%eax ; \ - cld ; \ - rep ; \ - stosl - -/* - * fillkpt - * eax = page frame address - * ebx = index into page table - * ecx = how many pages to map - * base = base address of page dir/table - * prot = protection bits - */ -#define fillkpt(base, prot) \ - shll $(PTEINDX),%ebx ; \ - addl base,%ebx ; \ - orl $(PTE_V) ,%eax ; \ - orl prot,%eax ; \ -1: movl %eax,(%ebx) ; \ - addl $(PAGE_SIZE),%eax ; /* increment physical address */ \ - addl $(PTESIZE),%ebx ; /* next pte */ \ - loop 1b - -/* - * fillkptphys(prot) - * eax = physical address - * ecx = how many pages to map - * prot = protection bits - */ -#define fillkptphys(prot) \ - movl %eax, %ebx ; \ - shrl $(PAGE_SHIFT), %ebx ; \ - fillkpt(PA(KPTphys), prot) + /* * BSP CPU start here. * eax points to kernbootstruct @@ -257,330 +164,55 @@ EXT(KernelRelocOffset): .text .align ALIGN .globl EXT(_start) - .globl EXT(_pstart) LEXT(_start) -LEXT(_pstart) - mov %ds, %bx - mov %bx, %es - mov %eax, %ebp // Move kernbootstruct to ebp - POSTCODE(_PSTART_ENTRY) - movl KADDR(%ebp), %ebx // Load boot image phys addr - movl %ebx, %edx // Set edx with boot load phys addr - addl KSIZE(%ebp), %edx // Add boot image size - addl $(NBPG-1), %edx // Round to a page size - andl $(-NBPG), %edx // Set edx to first free page - movl %edx, %esp // Set temporay stack - addl $(NBPG), %esp // add page size - call Ls1 -Ls1: popl %esi // Get return address - cmpl $(PA(Ls1)), %esi // Compare with static physicall addr - je EXT(pstart) // Branch if equal - subl $(PA(Ls1)), %esi // Extract relocation offset - movl %esi, %esp // Store relocation offset in esp - leal (PA(Lreloc_start))(%esp),%esi - // Set esi to reloc_start boot phys addr - movl %edx, %edi // Set edi to first free page - movl $(Lreloc_end-Lreloc_start), %ecx - // Set ecx to copy code size - cld // count up - rep - movsb // copy reloc copy code - wbinvd // Write back and Invalidate cache - movl %ebx, %esi // Set esi to kernbootstruct kaddr - movl KADDR(%ebp), %edi // Load boot image phys addr - subl %esp, %edi // Adjust to static phys addr - movl KSIZE(%ebp), %ecx // Set ecx to kernbootstruct ksize - addl $(NBPG-1), %ecx // Add NBPG-1 to ecx - andl $(-NBPG), %ecx // Truncate ecx to a page aligned addr - shrl $2, %ecx // Divide ecx by 4 - movl %esp, (PA(EXT(KernelRelocOffset)))(%esp) - // Store relocation offset - movl %edi, KADDR(%ebp) // Relocate kaddr in kernbootstruct - subl %esp, MEMORYMAP(%ebp) // And relocate MemoryMap - subl %esp, DEVICETREEP(%ebp) // And relocate deviceTreeP - subl %esp, %ebp // Set ebp with relocated phys addr - jmp *%edx // Branch to relocated copy code -Lreloc_start: - POSTCODE(_PSTART_RELOC) - rep - movsl // Copy boot image at BASE_KERNEL_PADDR - wbinvd // Write back and Invalidate cache - movl $(PA(EXT(pstart))), %edx // Set branch target - jmp *%edx // Far jmp to pstart phys addr -Lreloc_end: - /* NOTREACHED */ - hlt + mov %ds, %bx + mov %bx, %es + mov %eax, %ebp /* Move kernbootstruct to ebp */ + mov %eax, %ebx /* get pointer to kernbootstruct */ - .text - .globl __start - .set __start, PA(EXT(_pstart)) - -/* - * BSP CPU continues here after possible relocation. - * ebp points to kernbootstruct - */ - .align ALIGN - .globl EXT(pstart) -LEXT(pstart) - mov %ebp, %ebx /* get pointer to kernbootstruct */ + mov $EXT(low_eintstack),%esp /* switch to the bootup stack */ POSTCODE(PSTART_ENTRY) - mov $0,%ax /* fs must be zeroed; */ - mov %ax,%fs /* some bootstrappers don`t do this */ - mov %ax,%gs + lgdt EXT(gdtptr) /* load GDT */ -/* - * Get startup parameters. - */ - movl KADDR(%ebx), %eax - addl KSIZE(%ebx), %eax - addl $(NBPG-1),%eax - andl $(-NBPG), %eax - movl %eax, PA(physfree) + mov $(KERNEL_DS),%ax /* set kernel data segment */ + mov %ax, %ds + mov %ax, %es + mov %ax, %ss + xor %ax, %ax /* fs must be zeroed; */ + mov %ax, %fs /* some bootstrappers don`t do this */ + mov %ax, %gs cld -/* allocate kernel page table pages */ - ALLOCPAGES(NKPT) - movl %esi,PA(KPTphys) - -#ifdef X86_64 -/* allocate PML4 page */ - ALLOCPAGES(1) - movl %esi,EXT(IdlePML4) -/* allocate new 3rd level directory page */ - ALLOCPAGES(1) - movl %esi,EXT(IdlePDPT64) -#endif - -#ifdef PAE -/* allocate Page Table Directory Page */ - ALLOCPAGES(1) - movl %esi,PA(EXT(IdlePDPT)) -#endif - -/* allocate kernel page directory page */ - ALLOCPAGES(NPGPTD) - movl %esi,PA(EXT(IdlePTD)) - -/* map from zero to end of kernel */ - xorl %eax,%eax - movl PA(physfree),%ecx - shrl $(PAGE_SHIFT),%ecx - fillkptphys( $(PTE_W) ) - -/* map page directory */ -#ifdef PAE - movl PA(EXT(IdlePDPT)), %eax - movl $1, %ecx - fillkptphys( $(PTE_W) ) - - movl PA(EXT(IdlePDPT64)), %eax - movl $1, %ecx - fillkptphys( $(PTE_W) ) -#endif - movl PA(EXT(IdlePTD)),%eax - movl $(NPGPTD), %ecx - fillkptphys( $(PTE_W) ) - -/* install a pde for temp double map of bottom of VA */ - movl PA(KPTphys),%eax - xorl %ebx,%ebx - movl $(NKPT), %ecx - fillkpt(PA(EXT(IdlePTD)), $(PTE_W)) - -/* install pde's for page tables */ - movl PA(KPTphys),%eax - movl $(KPTDI),%ebx - movl $(NKPT),%ecx - fillkpt(PA(EXT(IdlePTD)), $(PTE_W)) - -/* install a pde recursively mapping page directory as a page table */ - movl PA(EXT(IdlePTD)),%eax - movl $(PTDPTDI),%ebx - movl $(NPGPTD),%ecx - fillkpt(PA(EXT(IdlePTD)), $(PTE_W)) - -#ifdef PAE - movl PA(EXT(IdlePTD)), %eax - xorl %ebx, %ebx - movl $(NPGPTD), %ecx - fillkpt(PA(EXT(IdlePDPT)), $0) -#endif - -/* install a pde page for commpage use up in high memory */ - - movl PA(physfree),%eax /* grab next phys page */ - movl %eax,%ebx - addl $(PAGE_SIZE),%ebx - movl %ebx,PA(physfree) /* show next free phys pg */ - movl $(COMM_PAGE_BASE_ADDR),%ebx - shrl $(PDESHIFT),%ebx /* index into pde page */ - movl $(1), %ecx /* # pdes to store */ - fillkpt(PA(EXT(IdlePTD)), $(PTE_W|PTE_U)) /* user has access! */ - - movl PA(physfree),%edi - movl %edi,PA(EXT(first_avail)) /* save first available phys addr */ - -#ifdef PAE -/* - * We steal 0x4000 for a temp pdpt and 0x5000-0x8000 - * for temp pde pages in the PAE case. Once we are - * running at the proper virtual address we switch to - * the PDPT/PDE's the master is using */ - - /* clear pdpt page to be safe */ - xorl %eax, %eax - movl $(PAGE_SIZE),%ecx - movl $(0x4000),%edi - cld - rep - stosb - - /* build temp pdpt */ - movl $(0x5000), %eax - xorl %ebx, %ebx - movl $(NPGPTD), %ecx - fillkpt($(0x4000), $0) - - /* copy the NPGPTD pages of pdes */ - movl PA(EXT(IdlePTD)),%eax - movl $0x5000,%ebx - movl $((PTEMASK+1)*NPGPTD),%ecx -1: movl 0(%eax),%edx - movl %edx,0(%ebx) - movl 4(%eax),%edx - movl %edx,4(%ebx) - addl $(PTESIZE),%eax - addl $(PTESIZE),%ebx - loop 1b -#else -/* create temp pde for slaves to use - use unused lomem page and copy in IdlePTD */ - movl PA(EXT(IdlePTD)),%eax - movl $0x4000,%ebx - movl $(PTEMASK+1),%ecx -1: movl 0(%eax),%edx - movl %edx,0(%ebx) - addl $(PTESIZE),%eax - addl $(PTESIZE),%ebx - loop 1b -#endif - - POSTCODE(PSTART_PAGE_TABLES) - -/* - * Fix initial descriptor tables. - */ - lea PA(EXT(master_idt)),%esi /* fix IDT */ - movl $(IDTSZ),%ecx - movl $(PA(fix_idt_ret)),%ebx - jmp fix_desc_common /* (cannot use stack) */ -fix_idt_ret: - - lea PA(EXT(master_gdt)),%esi /* fix GDT */ - movl $(GDTSZ),%ecx - movl $(PA(fix_gdt_ret)),%ebx - jmp fix_desc_common /* (cannot use stack) */ -fix_gdt_ret: - - lea PA(EXT(master_ldt)),%esi /* fix LDT */ - movl $(LDTSZ),%ecx - movl $(PA(fix_ldt_ret)),%ebx - jmp fix_desc_common /* (cannot use stack) */ -fix_ldt_ret: - -/* - * - */ - - lgdt PA(EXT(gdtptr)) /* load GDT */ - lidt PA(EXT(idtptr)) /* load IDT */ + /* "The Aussie Maneuver" ("Myria" variant) */ + pushl $(0xcb<<24)|KERNEL32_CS /* reload CS */ + call .-1 +paging: + andl $0xfffffff0, %esp /* align stack */ + subl $0xc, %esp + pushl %ebp /* push boot args addr */ + xorl %ebp, %ebp /* zero frame pointer */ + POSTCODE(PSTART_BEFORE_PAGING) /* * Turn on paging. */ -#ifdef PAE - movl PA(EXT(IdlePDPT)), %eax + movl $EXT(IdlePDPT), %eax /* CR3 */ movl %eax, %cr3 - - movl %cr4, %eax + movl %cr4, %eax /* PAE */ orl $(CR4_PAE), %eax movl %eax, %cr4 - - movl $0x80000001, %eax - cpuid - and $(CPUID_EXTFEATURE_XD), %edx /* clear all but bit 20 */ - cmp $0, %edx /* skip setting NXE if 20 is not set */ - je 1f + movl %cr0,%eax /* paging */ + orl $(CR0_PG|CR0_WP),%eax + movl %eax,%cr0 - movl $(MSR_IA32_EFER), %ecx /* MSR number in ecx */ - rdmsr /* MSR value return in edx: eax */ - orl $(MSR_IA32_EFER_NXE), %eax /* Set NXE bit in low 32-bits */ - wrmsr /* Update Extended Feature Enable reg */ -1: - -#else - movl PA(EXT(IdlePTD)), %eax - movl %eax,%cr3 -#endif - - movl %cr0,%eax - orl $(CR0_PG|CR0_WP|CR0_PE),%eax - movl %eax,%cr0 /* to enable paging */ - - LJMP(KERNEL_CS,EXT(vstart)) /* switch to kernel code segment */ - -/* - * BSP is now running with correct addresses. - */ -LEXT(vstart) - POSTCODE(VSTART_ENTRY) ; - - mov $(KERNEL_DS),%ax /* set kernel data segment */ - mov %ax,%ds - mov %ax,%es - mov %ax,%ss - mov %ax,EXT(master_ktss)+TSS_SS0 /* set kernel stack segment */ - /* for traps to kernel */ - -#if MACH_KDB - mov %ax,EXT(master_dbtss)+TSS_SS0 /* likewise for debug task switch */ - mov %cr3,%eax /* get PDBR into debug TSS */ - mov %eax,EXT(master_dbtss)+TSS_PDBR - mov $0,%eax -#endif - mov %cr3,%eax /* get PDBR into DF TSS */ - mov %eax,EXT(master_dftss)+TSS_PDBR - mov %eax,EXT(master_mctss)+TSS_PDBR - - movw $(KERNEL_LDT),%ax /* get LDT segment */ - lldt %ax /* load LDT */ -#if MACH_KDB - mov %ax,EXT(master_ktss)+TSS_LDT /* store LDT in two TSS, as well... */ - mov %ax,EXT(master_dbtss)+TSS_LDT /* ...matters if we switch tasks */ -#endif - movw $(KERNEL_TSS),%ax - ltr %ax /* set up KTSS */ - - mov $(CPU_DATA_GS),%ax - mov %ax,%gs - - POSTCODE(VSTART_STACK_SWITCH) - - lea EXT(low_eintstack),%esp /* switch to the bootup stack */ - pushl %ebp /* push boot args addr */ - xorl %ebp,%ebp /* clear stack frame ptr */ - - POSTCODE(VSTART_EXIT) - - call EXT(i386_init) /* run C code */ + call EXT(vstart) /* run C code */ /*NOTREACHED*/ hlt - /* * AP (slave) CPUs enter here. * @@ -593,237 +225,130 @@ LEXT(vstart) LEXT(slave_pstart) cli /* disable interrupts, so we don`t */ /* need IDT for a while */ + xor %ebp, %ebp // zero boot cpu + mov $EXT(mp_slave_stack)+PAGE_SIZE, %esp; + jmp paging - POSTCODE(SLAVE_PSTART_ENTRY) -/* - * Turn on paging. - */ -#ifdef PAE - movl %cr4, %eax - orl $(CR4_PAE), %eax - movl %eax, %cr4 - - movl $(MSR_IA32_EFER), %ecx /* MSR number in ecx */ - rdmsr /* MSR value return in edx: eax */ - orl $(MSR_IA32_EFER_NXE), %eax /* Set NXE bit in low 32-bits */ - wrmsr /* Update Extended Feature Enable reg */ -#endif - movl $(0x4000),%eax /* tmp until we get mapped */ - movl %eax,%cr3 - - movl %cr0,%eax - orl $(CR0_PG|CR0_WP|CR0_PE),%eax - movl %eax,%cr0 /* to enable paging */ - - POSTCODE(SLAVE_PSTART_EXIT) - - movl $(EXT(spag_start)),%edx /* first paged code address */ - jmp *%edx /* flush prefetch queue */ - -/* - * We are now paging, and can run with correct addresses. - */ -LEXT(spag_start) - - lgdt PA(EXT(gdtptr)) /* load GDT */ - lidt PA(EXT(idtptr)) /* load IDT */ - - LJMP(KERNEL_CS,EXT(slave_vstart)) /* switch to kernel code segment */ - - -/* - * Slave is now running with correct addresses. - */ -LEXT(slave_vstart) - - POSTCODE(SLAVE_VSTART_ENTRY) - -#ifdef PAE - movl PA(EXT(IdlePDPT)), %eax - movl %eax, %cr3 -#else - movl PA(EXT(IdlePTD)), %eax - movl %eax, %cr3 -#endif - - mov $(KERNEL_DS),%ax /* set kernel data segment */ - mov %ax,%ds - mov %ax,%es - mov %ax,%ss - - /* - * We're not quite through with the boot stack - * but we need to reset the stack pointer to the correct virtual - * address. - * And we need to offset above the address of pstart. - */ - movl $(VA(MP_BOOTSTACK+MP_BOOT+4)), %esp - -/* - * Switch to the per-cpu descriptor tables - */ - POSTCODE(SLAVE_VSTART_DESC_INIT) - - CPU_NUMBER_FROM_LAPIC(%eax) - movl CX(EXT(cpu_data_ptr),%eax),%ecx - - movw $(GDTSZ*8-1),0(%esp) /* set GDT size in GDT descriptor */ - movl CPU_DESC_INDEX+CDI_GDT(%ecx),%edx - movl %edx,2(%esp) /* point to local GDT (linear addr) */ - lgdt 0(%esp) /* load new GDT */ - - movw $(IDTSZ*8-1),0(%esp) /* set IDT size in IDT descriptor */ - movl CPU_DESC_INDEX+CDI_IDT(%ecx),%edx - movl %edx,2(%esp) /* point to local IDT (linear addr) */ - lidt 0(%esp) /* load new IDT */ - movw $(KERNEL_LDT),%ax /* get LDT segment */ - lldt %ax /* load LDT */ - - movw $(KERNEL_TSS),%ax - ltr %ax /* load new KTSS */ +/* Code to get from real mode to protected mode */ - mov $(CPU_DATA_GS),%ax - mov %ax,%gs +#define operand_size_prefix .byte 0x66 +#define address_size_prefix .byte 0x67 +#define cs_base_prefix .byte 0x2e -/* - * Get stack top from pre-cpu data and switch - */ - POSTCODE(SLAVE_VSTART_STACK_SWITCH) - - movl %gs:CPU_INT_STACK_TOP,%esp - xorl %ebp,%ebp /* for completeness */ - - POSTCODE(SLAVE_VSTART_EXIT) - - call EXT(i386_init_slave) /* start MACH */ - /*NOTREACHED*/ - hlt +#undef LJMP +#define LJMP(segment,address) \ + operand_size_prefix ;\ + .byte 0xea ;\ + .long address-EXT(real_mode_bootstrap_base) ;\ + .word segment -/* - * Convert a descriptor from fake to real format. - * - * Calls from assembly code: - * %ebx = return address (physical) CANNOT USE STACK - * %esi = descriptor table address (physical) - * %ecx = number of descriptors - * - * Calls from C: - * 0(%esp) = return address - * 4(%esp) = descriptor table address (physical) - * 8(%esp) = number of descriptors - * - * Fake descriptor format: - * bytes 0..3 base 31..0 - * bytes 4..5 limit 15..0 - * byte 6 access byte 2 | limit 19..16 - * byte 7 access byte 1 - * - * Real descriptor format: - * bytes 0..1 limit 15..0 - * bytes 2..3 base 15..0 - * byte 4 base 23..16 - * byte 5 access byte 1 - * byte 6 access byte 2 | limit 19..16 - * byte 7 base 31..24 - * - * Fake gate format: - * bytes 0..3 offset - * bytes 4..5 selector - * byte 6 word count << 4 (to match fake descriptor) - * byte 7 access byte 1 - * - * Real gate format: - * bytes 0..1 offset 15..0 - * bytes 2..3 selector - * byte 4 word count - * byte 5 access byte 1 - * bytes 6..7 offset 31..16 - */ - .globl EXT(fix_desc) -LEXT(fix_desc) - pushl %ebp /* set up */ - movl %esp,%ebp /* stack frame */ - pushl %esi /* save registers */ - pushl %ebx - movl B_ARG0,%esi /* point to first descriptor */ - movl B_ARG1,%ecx /* get number of descriptors */ - lea 0f,%ebx /* get return address */ - jmp fix_desc_common /* call internal routine */ -0: popl %ebx /* restore registers */ - popl %esi - leave /* pop stack frame */ - ret /* return */ - -fix_desc_common: -0: - movw 6(%esi),%dx /* get access byte */ - movb %dh,%al - andb $0x14,%al - cmpb $0x04,%al /* gate or descriptor? */ - je 1f - -/* descriptor */ - movl 0(%esi),%eax /* get base in eax */ - rol $16,%eax /* swap 15..0 with 31..16 */ - /* (15..0 in correct place) */ - movb %al,%dl /* combine bits 23..16 with ACC1 */ - /* in dh/dl */ - movb %ah,7(%esi) /* store bits 31..24 in correct place */ - movw 4(%esi),%ax /* move limit bits 0..15 to word 0 */ - movl %eax,0(%esi) /* store (bytes 0..3 correct) */ - movw %dx,4(%esi) /* store bytes 4..5 */ - jmp 2f - -/* gate */ +#define LGDT(address) \ + cs_base_prefix ;\ + address_size_prefix ;\ + operand_size_prefix ;\ + .word 0x010f ;\ + .byte 0x15 ;\ + .long address-EXT(real_mode_bootstrap_base) + +.section __HIB,__text +.align 12 /* Page align for single bcopy_phys() */ +.code32 +Entry(real_mode_bootstrap_base) + cli + + LGDT(EXT(protected_mode_gdtr)) + + /* set the PE bit of CR0 */ + mov %cr0, %eax + inc %eax + mov %eax, %cr0 + + /* reload CS register */ + LJMP(KERNEL32_CS, 1f + REAL_MODE_BOOTSTRAP_OFFSET) 1: - movw 4(%esi),%ax /* get selector */ - shrb $4,%dl /* shift word count to proper place */ - movw %dx,4(%esi) /* store word count / ACC1 */ - movw 2(%esi),%dx /* get offset 16..31 */ - movw %dx,6(%esi) /* store in correct place */ - movw %ax,2(%esi) /* store selector in correct place */ -2: - addl $8,%esi /* bump to next descriptor */ - loop 0b /* repeat */ - jmp *%ebx /* all done */ - -/* - * put arg in kbd leds and spin a while - * eats eax, ecx, edx - */ -#define K_RDWR 0x60 -#define K_CMD_LEDS 0xed -#define K_STATUS 0x64 -#define K_IBUF_FULL 0x02 /* input (to kbd) buffer full */ -#define K_OBUF_FULL 0x01 /* output (from kbd) buffer full */ - -ENTRY(set_kbd_leds) - mov S_ARG0,%cl /* save led value */ - -0: inb $(K_STATUS),%al /* get kbd status */ - testb $(K_IBUF_FULL),%al /* input busy? */ - jne 0b /* loop until not */ - mov $(K_CMD_LEDS),%al /* K_CMD_LEDS */ - outb %al,$(K_RDWR) /* to kbd */ - -0: inb $(K_STATUS),%al /* get kbd status */ - testb $(K_OBUF_FULL),%al /* output present? */ - je 0b /* loop if not */ + /* we are in protected mode now */ + /* set up the segment registers */ + mov $KERNEL_DS, %eax + movw %ax, %ds + movw %ax, %es + movw %ax, %ss + mov $0, %ax + movw %ax, %fs + movw %ax, %gs + + POSTCODE(SLAVE_STARTPROG_ENTRY); + + mov PROT_MODE_START+REAL_MODE_BOOTSTRAP_OFFSET, %ecx + jmp *%ecx + +Entry(protected_mode_gdtr) + .short 160 /* limit (8*6 segs) */ + .long EXT(master_gdt) - inb $(K_RDWR),%al /* read status (and discard) */ +Entry(real_mode_bootstrap_end) -0: inb $(K_STATUS),%al /* get kbd status */ - testb $(K_IBUF_FULL),%al /* input busy? */ - jne 0b /* loop until not */ - - mov %cl,%al /* move led value */ - outb %al,$(K_RDWR) /* to kbd */ +.section __HIB,__text + .align ALIGN + .globl EXT(hibernate_machine_entrypoint) +LEXT(hibernate_machine_entrypoint) + mov %eax, %edi // save header pointer + /* restore gdt */ + lgdt EXT(protected_mode_gdtr) + + /* setup the protected mode segment registers */ + mov $KERNEL_DS, %eax + movw %ax, %ds + movw %ax, %es + movw %ax, %ss + mov $0,%ax /* fs must be zeroed; */ + mov %ax,%fs + mov %ax,%gs + + /* set up the page tables to use BootstrapPTD + * as done in idle_pt.c, but this must be done programatically */ + mov $EXT(IdlePDPT), %eax + mov $EXT(BootstrapPTD) + (INTEL_PTE_VALID), %ecx + mov $0x0, %edx + mov %ecx, (0*8+0)(%eax) + mov %edx, (0*8+4)(%eax) + add $(PAGE_SIZE), %ecx + mov %ecx, (1*8+0)(%eax) + mov %edx, (1*8+4)(%eax) + add $(PAGE_SIZE), %ecx + mov %ecx, (2*8+0)(%eax) + mov %edx, (2*8+4)(%eax) + add $(PAGE_SIZE), %ecx + mov %ecx, (3*8+0)(%eax) + mov %edx, (3*8+4)(%eax) + mov %eax, %cr3 + + + movl %cr4,%eax + orl $(CR4_PAE),%eax + movl %eax,%cr4 /* enable page size extensions */ - movl $10000000,%ecx /* spin */ -0: nop - nop - loop 0b /* a while */ + movl $(MSR_IA32_EFER), %ecx /* MSR number in ecx */ + rdmsr /* MSR value return in edx: eax */ + orl $(MSR_IA32_EFER_NXE), %eax /* Set NXE bit in low 32-bits */ + wrmsr /* Update Extended Feature Enable reg */ - ret + movl %cr0, %eax + orl $(CR0_PG|CR0_WP), %eax + movl %eax, %cr0 /* ready paging */ + + mov $EXT(gIOHibernateRestoreStackEnd), %esp /* setup stack */ + xorl %ebp, %ebp /* zero frame pointer */ + + ljmpl $(KERNEL32_CS), $Ltemp +Ltemp: + xorl %eax, %eax /* Video memory - N/A */ + pushl %eax + pushl %eax + pushl %eax + mov %edi, %eax /* Pointer to hibernate header */ + pushl %eax + call EXT(hibernate_kernel_entrypoint) + /* NOTREACHED */ + hlt diff --git a/osfmk/i386/start64.s b/osfmk/i386/start64.s index e822e6c12..9c7188711 100644 --- a/osfmk/i386/start64.s +++ b/osfmk/i386/start64.s @@ -87,25 +87,6 @@ Entry(ml_load_desc64) ret -Entry(ml_64bit_wrmsr64) - /* (uint32_t msr, uint64_t value) */ - /* (uint32_t msr, uint32_t lo, uint32_t hi) */ - - FRAME - - ENTER_64BIT_MODE() - - movl B_ARG0, %ecx - movl B_ARG1, %eax - movl B_ARG2, %edx - wrmsr - - ENTER_COMPAT_MODE() - - EMARF - ret - - Entry(ml_64bit_lldt) /* (int32_t selector) */ @@ -194,16 +175,11 @@ Entry(get64_cr3) /* FXSAVE and FXRSTOR operate in a mode dependent fashion, hence these variants. * Must be called with interrupts disabled. - * We clear pending x87 exceptions here; this is technically incorrect, since we - * should propagate those to the user, but the compatibility mode kernel is - * currently not prepared to handle exceptions originating in 64-bit kernel mode. - * However, it may be possible to work around this should it prove necessary. */ Entry(fxsave64) movl S_ARG0,%eax ENTER_64BIT_MODE() - fnclex fxsave 0(%eax) ENTER_COMPAT_MODE() ret @@ -211,7 +187,13 @@ Entry(fxsave64) Entry(fxrstor64) movl S_ARG0,%eax ENTER_64BIT_MODE() - fnclex fxrstor 0(%rax) ENTER_COMPAT_MODE() ret + +Entry(cpuid64) + ENTER_64BIT_MODE() + cpuid + ENTER_COMPAT_MODE() + ret + diff --git a/osfmk/i386/startup64.c b/osfmk/i386/startup64.c index b252c496a..c85bf1955 100644 --- a/osfmk/i386/startup64.c +++ b/osfmk/i386/startup64.c @@ -57,7 +57,6 @@ #include #include #include -#include #include #include @@ -66,6 +65,7 @@ #include +#ifdef __i386__ void cpu_IA32e_enable(cpu_data_t *cdp) { @@ -185,56 +185,7 @@ cpu_IA32e_disable(cpu_data_t *cdp) postcode(CPU_IA32_DISABLE_EXIT); } - -void -fix_desc64(void *descp, int count) -{ - struct fake_descriptor64 *fakep; - union { - struct real_gate64 gate; - struct real_descriptor64 desc; - } real; - int i; - - fakep = (struct fake_descriptor64 *) descp; - - for (i = 0; i < count; i++, fakep++) { - /* - * Construct the real decriptor locally. - */ - - bzero((void *) &real, sizeof(real)); - - switch (fakep->access & ACC_TYPE) { - case 0: - break; - case ACC_CALL_GATE: - case ACC_INTR_GATE: - case ACC_TRAP_GATE: - real.gate.offset_low16 = fakep->offset[0] & 0xFFFF; - real.gate.selector16 = fakep->lim_or_seg & 0xFFFF; - real.gate.IST = fakep->size_or_IST & 0x7; - real.gate.access8 = fakep->access; - real.gate.offset_high16 = (fakep->offset[0]>>16)&0xFFFF; - real.gate.offset_top32 = (uint32_t)fakep->offset[1]; - break; - default: /* Otherwise */ - real.desc.limit_low16 = fakep->lim_or_seg & 0xFFFF; - real.desc.base_low16 = fakep->offset[0] & 0xFFFF; - real.desc.base_med8 = (fakep->offset[0] >> 16) & 0xFF; - real.desc.access8 = fakep->access; - real.desc.limit_high4 = (fakep->lim_or_seg >> 16) & 0xFF; - real.desc.granularity4 = fakep->size_or_IST; - real.desc.base_high8 = (fakep->offset[0] >> 24) & 0xFF; - real.desc.base_top32 = (uint32_t) fakep->offset[1]; - } - - /* - * Now copy back over the fake structure. - */ - bcopy((void *) &real, (void *) fakep, sizeof(real)); - } -} +#endif #if DEBUG extern void dump_gdt(void *); @@ -313,7 +264,11 @@ dump_frame64(x86_saved_state64_t *sp) kprintf("%p: 0x%016llx\n", ip, *ip); kprintf("sp->isf.trapno: 0x%08x\n", sp->isf.trapno); +#ifdef __i386__ kprintf("sp->isf.trapfn: 0x%08x\n", sp->isf.trapfn); +#else + kprintf("sp->isf.trapfn: 0x%016llx\n", sp->isf.trapfn); +#endif kprintf("sp->isf.err: 0x%016llx\n", sp->isf.err); kprintf("sp->isf.rip: 0x%016llx\n", sp->isf.rip); kprintf("sp->isf.cs: 0x%016llx\n", sp->isf.cs); diff --git a/osfmk/i386/task.h b/osfmk/i386/task.h index b4218d665..b2bedd46a 100644 --- a/osfmk/i386/task.h +++ b/osfmk/i386/task.h @@ -60,4 +60,7 @@ #include -#define MACHINE_TASK struct user_ldt * i386_ldt; +#define MACHINE_TASK \ + struct user_ldt * i386_ldt; \ + void* task_debug; + diff --git a/osfmk/i386/thread.h b/osfmk/i386/thread.h index badd5491f..0ac0ee06f 100644 --- a/osfmk/i386/thread.h +++ b/osfmk/i386/thread.h @@ -78,8 +78,11 @@ #include #include +#include + + /* - * x86_saved_state32/64: + * i386_saved_state: * * Has been exported to servers. See: mach/i386/thread_status.h * @@ -107,14 +110,14 @@ struct x86_fpsave_state { /* - * x86_kernel_state32: + * x86_kernel_state: * * This structure corresponds to the state of kernel registers * as saved in a context-switch. It lives at the base of the stack. - * kernel only runs in 32 bit mode for now */ -struct x86_kernel_state32 { +#ifdef __i386__ +struct x86_kernel_state { int k_ebx; /* kernel context */ int k_esp; int k_ebp; @@ -123,12 +126,24 @@ struct x86_kernel_state32 { int k_eip; /* * Kernel stacks are 16-byte aligned with a 4-byte i386_exception_link at - * the top, followed by an x86_kernel_state32. After both structs have + * the top, followed by an x86_kernel_state. After both structs have * been pushed, we want to be 16-byte aligned. A dummy int gets us there. */ int dummy; }; - +#else +struct x86_kernel_state { + unsigned long k_rbx; /* kernel context */ + unsigned long k_rsp; + unsigned long k_rbp; + unsigned long k_r12; + unsigned long k_r13; + unsigned long k_r14; + unsigned long k_r15; + unsigned long k_rip; + unsigned long dummy; +}; +#endif typedef struct pcb { void *sf; @@ -147,7 +162,6 @@ typedef struct pcb { uint32_t arg_store_valid; } *pcb_t; - /* * Maps state flavor to number of words in the state: */ @@ -171,11 +185,11 @@ struct machine_thread { pcb_t pcb; uint32_t specFlags; -#define OnProc 0x1 -#if CONFIG_DTRACE -#define CopyIOActive 0x2 /* Checked to ensure DTrace actions do not re-enter copyio(). */ -#endif /* CONFIG_DTRACE */ +#define OnProc 0x1 +#define CopyIOActive 0x2 /* Checked to ensure DTrace actions do not re-enter copyio(). */ +#if NCOPY_WINDOWS > 0 + struct { user_addr_t user_base; } copy_window[NCOPY_WINDOWS]; @@ -187,6 +201,7 @@ struct machine_thread { #define WINDOWS_OPENED 3 uint64_t physwindow_pte; int physwindow_busy; +#endif }; @@ -210,21 +225,43 @@ struct i386_exception_link { /* * On the kernel stack is: * stack: ... - * struct i386_exception_link - * struct i386_kernel_state - * stack+KERNEL_STACK_SIZE + * struct i386_exception_link (pointer to user state) + * struct x86_kernel_state + * stack+kernel_stack_size */ #define STACK_IKS(stack) \ - ((struct x86_kernel_state32 *)((stack) + KERNEL_STACK_SIZE) - 1) + ((struct x86_kernel_state *)((stack) + kernel_stack_size) - 1) #define STACK_IEL(stack) \ ((struct i386_exception_link *)STACK_IKS(stack) - 1) +/* + * Return the current stack depth + * including x86_kernel_state and i386_exception_link + */ +static inline vm_offset_t +current_stack_depth(void) +{ + vm_offset_t stack_ptr; + + assert(get_preemption_level() > 0 || !ml_get_interrupts_enabled()); + +#if defined(__x86_64__) + __asm__ volatile("mov %%rsp, %0" : "=m" (stack_ptr)); +#else + __asm__ volatile("mov %%esp, %0" : "=m" (stack_ptr)); +#endif + return (current_cpu_datap()->cpu_kernel_stack + + sizeof(struct x86_kernel_state) + + sizeof(struct i386_exception_link *) + - stack_ptr); +} + /* * Return address of the function that called current function, given * address of the first parameter of current function. */ -#define GET_RETURN_PC(addr) (*((vm_offset_t *)addr - 1)) +#define GET_RETURN_PC(addr) (__builtin_return_address(0)) /* * Defining this indicates that MD code will supply an exception() diff --git a/osfmk/i386/trap.c b/osfmk/i386/trap.c index b263be9ff..8f27f5e58 100644 --- a/osfmk/i386/trap.c +++ b/osfmk/i386/trap.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2006 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2008 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -71,6 +71,7 @@ #include #include #include /* panic_io_port_read() */ +#include #include #include @@ -89,6 +90,7 @@ #include #include #include +#include #include @@ -109,21 +111,30 @@ #include #include #include +#if CONFIG_MCA #include +#endif #include +#include extern void throttle_lowpri_io(boolean_t); + /* * Forward declarations */ static void user_page_fault_continue(kern_return_t kret); +#ifdef __i386__ static void panic_trap(x86_saved_state32_t *saved_state); static void set_recovery_ip(x86_saved_state32_t *saved_state, vm_offset_t ip); +#else +static void panic_trap(x86_saved_state64_t *saved_state); +static void set_recovery_ip(x86_saved_state64_t *saved_state, vm_offset_t ip); +#endif -perfCallback perfTrapHook = NULL; /* Pointer to CHUD trap hook routine */ -perfCallback perfASTHook = NULL; /* Pointer to CHUD AST hook routine */ +volatile perfCallback perfTrapHook = NULL; /* Pointer to CHUD trap hook routine */ +volatile perfCallback perfASTHook = NULL; /* Pointer to CHUD AST hook routine */ #if CONFIG_DTRACE /* See */ @@ -137,34 +148,59 @@ thread_syscall_return( kern_return_t ret) { thread_t thr_act = current_thread(); + boolean_t is_mach; + int code; + if (thread_is_64bit(thr_act)) { x86_saved_state64_t *regs; regs = USER_REGS64(thr_act); - if (kdebug_enable && ((regs->rax & SYSCALL_CLASS_MASK) == (SYSCALL_CLASS_MACH << SYSCALL_CLASS_SHIFT))) { + code = (int) (regs->rax & SYSCALL_NUMBER_MASK); + is_mach = (regs->rax & SYSCALL_CLASS_MASK) + == (SYSCALL_CLASS_MACH << SYSCALL_CLASS_SHIFT); + if (kdebug_enable && is_mach) { /* Mach trap */ KERNEL_DEBUG_CONSTANT( - MACHDBG_CODE(DBG_MACH_EXCP_SC, ((int) (regs->rax & SYSCALL_NUMBER_MASK))) - | DBG_FUNC_END, - ret, 0, 0, 0, 0); + MACHDBG_CODE(DBG_MACH_EXCP_SC,code)|DBG_FUNC_END, + ret, 0, 0, 0, 0); } regs->rax = ret; - +#if DEBUG + if (is_mach) + DEBUG_KPRINT_SYSCALL_MACH( + "thread_syscall_return: 64-bit mach ret=%u\n", + ret); + else + DEBUG_KPRINT_SYSCALL_UNIX( + "thread_syscall_return: 64-bit unix ret=%u\n", + ret); +#endif } else { x86_saved_state32_t *regs; regs = USER_REGS32(thr_act); - if (kdebug_enable && ((int) regs->eax < 0)) { + code = ((int) regs->eax); + is_mach = (code < 0); + if (kdebug_enable && is_mach) { /* Mach trap */ KERNEL_DEBUG_CONSTANT( - MACHDBG_CODE(DBG_MACH_EXCP_SC, -((int) regs->eax)) - | DBG_FUNC_END, - ret, 0, 0, 0, 0); + MACHDBG_CODE(DBG_MACH_EXCP_SC,-code)|DBG_FUNC_END, + ret, 0, 0, 0, 0); } regs->eax = ret; +#if DEBUG + if (is_mach) + DEBUG_KPRINT_SYSCALL_MACH( + "thread_syscall_return: 32-bit mach ret=%u\n", + ret); + else + DEBUG_KPRINT_SYSCALL_UNIX( + "thread_syscall_return: 32-bit unix ret=%u\n", + ret); +#endif } throttle_lowpri_io(TRUE); @@ -185,6 +221,7 @@ thread_kdb_return(void) thread_t thr_act = current_thread(); x86_saved_state_t *iss = USER_STATE(thr_act); + if (is_saved_state64(iss)) { x86_saved_state64_t *regs; @@ -217,6 +254,8 @@ user_page_fault_continue( ast_t *myast; boolean_t intr; user_addr_t vaddr; + + #if MACH_KDB x86_saved_state_t *regs = USER_STATE(thread); int err; @@ -233,7 +272,7 @@ user_page_fault_continue( #if MACH_KDB trapno = uregs->isf.trapno; - err = uregs->isf.err; + err = (int)uregs->isf.err; #endif vaddr = (user_addr_t)uregs->cr2; } else { @@ -282,6 +321,7 @@ user_page_fault_continue( } #endif /* MACH_KDB */ + i386_exception(EXC_BAD_ACCESS, kr, vaddr); /*NOTREACHED*/ } @@ -290,8 +330,8 @@ user_page_fault_continue( * Fault recovery in copyin/copyout routines. */ struct recovery { - uint32_t fault_addr; - uint32_t recover_addr; + uintptr_t fault_addr; + uintptr_t recover_addr; }; extern struct recovery recover_table[]; @@ -300,26 +340,160 @@ extern struct recovery recover_table_end[]; const char * trap_type[] = {TRAP_NAMES}; unsigned TRAP_TYPES = sizeof(trap_type)/sizeof(trap_type[0]); +#if defined(__x86_64__) && DEBUG +static void +print_state(x86_saved_state64_t *saved_state) +{ + kprintf("current_cpu_datap() 0x%lx\n", (uintptr_t)current_cpu_datap()); + kprintf("Current GS base MSR 0x%llx\n", rdmsr64(MSR_IA32_GS_BASE)); + kprintf("Kernel GS base MSR 0x%llx\n", rdmsr64(MSR_IA32_KERNEL_GS_BASE)); + kprintf("state at 0x%lx:\n", (uintptr_t) saved_state); + + kprintf(" rdi 0x%llx\n", saved_state->rdi); + kprintf(" rsi 0x%llx\n", saved_state->rsi); + kprintf(" rdx 0x%llx\n", saved_state->rdx); + kprintf(" r10 0x%llx\n", saved_state->r10); + kprintf(" r8 0x%llx\n", saved_state->r8); + kprintf(" r9 0x%llx\n", saved_state->r9); + kprintf(" v_arg6 0x%llx\n", saved_state->v_arg6); + kprintf(" v_arg7 0x%llx\n", saved_state->v_arg7); + kprintf(" v_arg8 0x%llx\n", saved_state->v_arg8); + + kprintf(" cr2 0x%llx\n", saved_state->cr2); + kprintf("real cr2 0x%lx\n", get_cr2()); + kprintf(" r15 0x%llx\n", saved_state->r15); + kprintf(" r14 0x%llx\n", saved_state->r14); + kprintf(" r13 0x%llx\n", saved_state->r13); + kprintf(" r12 0x%llx\n", saved_state->r12); + kprintf(" r11 0x%llx\n", saved_state->r11); + kprintf(" rbp 0x%llx\n", saved_state->rbp); + kprintf(" rbx 0x%llx\n", saved_state->rbx); + kprintf(" rcx 0x%llx\n", saved_state->rcx); + kprintf(" rax 0x%llx\n", saved_state->rax); + + kprintf(" gs 0x%x\n", saved_state->gs); + kprintf(" fs 0x%x\n", saved_state->fs); + + kprintf(" isf.trapno 0x%x\n", saved_state->isf.trapno); + kprintf(" isf._pad 0x%x\n", saved_state->isf._pad); + kprintf(" isf.trapfn 0x%llx\n", saved_state->isf.trapfn); + kprintf(" isf.err 0x%llx\n", saved_state->isf.err); + kprintf(" isf.rip 0x%llx\n", saved_state->isf.rip); + kprintf(" isf.cs 0x%llx\n", saved_state->isf.cs); + kprintf(" isf.rflags 0x%llx\n", saved_state->isf.rflags); + kprintf(" isf.rsp 0x%llx\n", saved_state->isf.rsp); + kprintf(" isf.ss 0x%llx\n", saved_state->isf.ss); +} +/* + * K64 debug - fatal handler for debug code in the trap vectors. + */ +extern void +panic_idt64(x86_saved_state_t *rsp); +void +panic_idt64(x86_saved_state_t *rsp) +{ + print_state(saved_state64(rsp)); + panic("panic_idt64"); +} +#endif + +extern void PE_incoming_interrupt(int interrupt); + +/* + * Handle interrupts: + * - local APIC interrupts (IPIs, timers, etc) are handled by the kernel, + * - device interrupts go to the platform expert. + */ +void +interrupt(x86_saved_state_t *state) +{ + uint64_t rip; + uint64_t rsp; + int interrupt_num; + boolean_t user_mode = FALSE; + + + if (is_saved_state64(state) == TRUE) { + x86_saved_state64_t *state64; + + state64 = saved_state64(state); + rip = state64->isf.rip; + rsp = state64->isf.rsp; + interrupt_num = state64->isf.trapno; +#ifdef __x86_64__ + if(state64->isf.cs & 0x03) +#endif + user_mode = TRUE; + } else { + x86_saved_state32_t *state32; + + state32 = saved_state32(state); + if (state32->cs & 0x03) + user_mode = TRUE; + rip = state32->eip; + rsp = state32->uesp; + interrupt_num = state32->trapno; + } + + KERNEL_DEBUG_CONSTANT( + MACHDBG_CODE(DBG_MACH_EXCP_INTR, 0) | DBG_FUNC_START, + interrupt_num, (long) rip, user_mode, 0, 0); + + /* + * Handle local APIC interrupts + * else call platform expert for devices. + */ + if (!lapic_interrupt(interrupt_num, state)) + PE_incoming_interrupt(interrupt_num); + + KERNEL_DEBUG_CONSTANT( + MACHDBG_CODE(DBG_MACH_EXCP_INTR, 0) | DBG_FUNC_END, + 0, 0, 0, 0, 0); + + /* + * Having serviced the interrupt first, look at the interrupted stack depth. + */ + if (!user_mode) { + uint64_t depth = current_cpu_datap()->cpu_kernel_stack + + sizeof(struct x86_kernel_state) + + sizeof(struct i386_exception_link *) + - rsp; + if (depth > kernel_stack_depth_max) { + kernel_stack_depth_max = (vm_offset_t)depth; + KERNEL_DEBUG_CONSTANT( + MACHDBG_CODE(DBG_MACH_SCHED, MACH_STACK_DEPTH), + (long) depth, (long) rip, 0, 0, 0); + } + } +} static inline void reset_dr7(void) { - uint32_t dr7 = 0x400; /* magic dr7 reset value */ - __asm__ volatile("movl %0,%%dr7" : : "r" (dr7)); + long dr7 = 0x400; /* magic dr7 reset value; 32 bit on i386, 64 bit on x86_64 */ + __asm__ volatile("mov %0,%%dr7" : : "r" (dr7)); } #if MACH_KDP unsigned kdp_has_active_watchpoints = 0; +#define NO_WATCHPOINTS (!kdp_has_active_watchpoints) +#else +#define NO_WATCHPOINTS 1 #endif /* * Trap from kernel mode. Only page-fault errors are recoverable, * and then only in special circumstances. All other errors are * fatal. Return value indicates if trap was handled. */ + void kernel_trap( x86_saved_state_t *state) { +#ifdef __i386__ x86_saved_state32_t *saved_state; +#else + x86_saved_state64_t *saved_state; +#endif int code; user_addr_t vaddr; int type; @@ -331,38 +505,51 @@ kernel_trap( vm_prot_t prot; struct recovery *rp; vm_offset_t kern_ip; +#if NCOPY_WINDOWS > 0 int fault_in_copy_window = -1; +#endif int is_user = 0; -#if MACH_KDB +#if MACH_KDB pt_entry_t *pte; #endif /* MACH_KDB */ - + thread = current_thread(); +#ifdef __i386__ if (is_saved_state64(state)) panic("kernel_trap(%p) with 64-bit state", state); saved_state = saved_state32(state); - vaddr = (user_addr_t)saved_state->cr2; type = saved_state->trapno; code = saved_state->err & 0xffff; intr = (saved_state->efl & EFL_IF) != 0; /* state of ints at trap */ - kern_ip = (vm_offset_t)saved_state->eip; +#else + if (is_saved_state32(state)) + panic("kernel_trap(%p) with 32-bit state", state); + saved_state = saved_state64(state); + vaddr = (user_addr_t)saved_state->cr2; + type = saved_state->isf.trapno; + code = (int)(saved_state->isf.err & 0xffff); + intr = (saved_state->isf.rflags & EFL_IF) != 0; /* state of ints at trap */ + kern_ip = (vm_offset_t)saved_state->isf.rip; +#endif myast = ast_pending(); - if (perfASTHook) { + perfCallback fn = perfASTHook; + if (fn) { if (*myast & AST_CHUD_ALL) - perfASTHook(type, NULL, 0, 0); + fn(type, NULL, 0, 0); } else *myast &= ~AST_CHUD_ALL; /* * Is there a hook? */ - if (perfTrapHook) { - if (perfTrapHook(type, NULL, 0, 0) == KERN_SUCCESS) { + fn = perfTrapHook; + if (fn) { + if (fn(type, NULL, 0, 0) == KERN_SUCCESS) { /* * If it succeeds, we are done... */ @@ -401,12 +588,13 @@ kernel_trap( map = kernel_map; if (thread != THREAD_NULL && thread->map != kernel_map) { - vm_offset_t copy_window_base; +#if NCOPY_WINDOWS > 0 + vm_offset_t copy_window_base; vm_offset_t kvaddr; int window_index; kvaddr = (vm_offset_t)vaddr; - /* + /* * must determine if fault occurred in * the copy window while pre-emption is * disabled for this processor so that @@ -429,10 +617,29 @@ kernel_trap( } is_user = -1; } +#else + if (vaddr < VM_MAX_USER_PAGE_ADDRESS) { + /* fault occurred in userspace */ + map = thread->map; + is_user = -1; + /* + * If we're not sharing cr3 with the user + * and we faulted in copyio, + * then switch cr3 here and dismiss the fault. + */ + if (no_shared_cr3 && + (thread->machine.specFlags&CopyIOActive) && + map->pmap->pm_cr3 != get_cr3()) { + set_cr3(map->pmap->pm_cr3); + return; + } + } +#endif } } - KERNEL_DEBUG_CONSTANT((MACHDBG_CODE(DBG_MACH_EXCP_KTRAP_x86, type)) | DBG_FUNC_NONE, - (int)(vaddr >> 32), (int)vaddr, is_user, kern_ip, 0); + KERNEL_DEBUG_CONSTANT( + (MACHDBG_CODE(DBG_MACH_EXCP_KTRAP_x86, type)) | DBG_FUNC_NONE, + (unsigned)(vaddr >> 32), (unsigned)vaddr, is_user, kern_ip, 0); (void) ml_set_interrupts_enabled(intr); @@ -455,11 +662,10 @@ kernel_trap( fpSSEexterrflt(); return; case T_DEBUG: -#if MACH_KDP - if ((saved_state->efl & EFL_TF) == 0 - && !kdp_has_active_watchpoints) +#ifdef __i386__ + if ((saved_state->efl & EFL_TF) == 0 && NO_WATCHPOINTS) #else - if ((saved_state->efl & EFL_TF) == 0) + if ((saved_state->isf.rflags & EFL_TF) == 0 && NO_WATCHPOINTS) #endif { /* We've somehow encountered a debug @@ -471,6 +677,10 @@ kernel_trap( return; } goto debugger_entry; +#ifdef __x86_64__ + case T_INT3: + goto debugger_entry; +#endif case T_PAGE_FAULT: /* * If the current map is a submap of the kernel map, @@ -539,7 +749,7 @@ kernel_trap( #endif /* MACH_KDB */ if (result == KERN_SUCCESS) { - +#if NCOPY_WINDOWS > 0 if (fault_in_copy_window != -1) { pt_entry_t *updp; pt_entry_t *kpdp; @@ -555,8 +765,8 @@ kernel_trap( * for a TLB flush in either case */ - ml_set_interrupts_enabled(FALSE); - updp = pmap_pde(map->pmap, thread->machine.copy_window[fault_in_copy_window].user_base); + ml_set_interrupts_enabled(FALSE); + updp = pmap_pde(map->pmap, thread->machine.copy_window[fault_in_copy_window].user_base); assert(updp); if (0 == updp) panic("trap: updp 0"); /* XXX DEBUG */ kpdp = current_cpu_datap()->cpu_copywindow_pdp; @@ -564,12 +774,13 @@ kernel_trap( #if JOE_DEBUG if (*kpdp && (*kpdp & PG_FRAME) != (*updp & PG_FRAME)) - panic("kernel_fault: user pdp doesn't match - updp = 0x%x, kpdp = 0x%x\n", updp, kpdp); + panic("kernel_fault: user pdp doesn't match - updp = 0x%qx, kpdp = 0x%qx\n", *updp, *kpdp); #endif pmap_store_pte(kpdp, *updp); (void) ml_set_interrupts_enabled(intr); } +#endif /* NCOPY_WINDOWS > 0 */ return; } /* @@ -580,6 +791,9 @@ kernel_trap( #endif /* CONFIG_DTRACE */ case T_GENERAL_PROTECTION: +#if defined(__x86_64__) && DEBUG + print_state(saved_state); +#endif /* * If there is a failure recovery address * for this fault, go there. @@ -595,7 +809,7 @@ kernel_trap( * Check thread recovery address also. */ if (thread->recover) { - set_recovery_ip(saved_state, thread->recover); + set_recovery_ip(saved_state, thread->recover); thread->recover = 0; return; } @@ -621,13 +835,13 @@ kernel_trap( * context at the moment of the trap, to facilitate * access through the debugger. */ - sync_iss_to_iks(saved_state); + sync_iss_to_iks(state); #if MACH_KDB restart_debugger: #endif /* MACH_KDB */ #if MACH_KDP if (current_debugger != KDB_CUR_DB) { - if (kdp_i386_trap(type, saved_state, result, vaddr)) + if (kdp_i386_trap(type, saved_state, result, (vm_offset_t)vaddr)) return; } else { #endif /* MACH_KDP */ @@ -653,13 +867,22 @@ kernel_trap( } +#ifdef __i386__ static void set_recovery_ip(x86_saved_state32_t *saved_state, vm_offset_t ip) { saved_state->eip = ip; } +#else +static void +set_recovery_ip(x86_saved_state64_t *saved_state, vm_offset_t ip) +{ + saved_state->isf.rip = ip; +} +#endif +#ifdef __i386__ static void panic_trap(x86_saved_state32_t *regs) { @@ -668,7 +891,6 @@ panic_trap(x86_saved_state32_t *regs) uint32_t cr2 = get_cr2(); uint32_t cr3 = get_cr3(); uint32_t cr4 = get_cr4(); - /* * Issue an I/O port read if one has been requested - this is an * event logic analyzers can use as a trigger point. @@ -699,30 +921,15 @@ panic_trap(x86_saved_state32_t *regs) */ cr0 = 0; } - -extern void kprintf_break_lock(void); - - -/* - * Called from locore on a special reserved stack after a double-fault - * is taken in kernel space. - * Kernel stack overflow is one route here. - */ -void -panic_double_fault( -#if CONFIG_NO_PANIC_STRINGS - __unused int code #else - int code -#endif - ) +static void +panic_trap(x86_saved_state64_t *regs) { -#if MACH_KDP || !CONFIG_NO_PANIC_STRINGS - struct i386_tss *my_ktss = current_ktss(); -#endif - - /* Set postcode (DEBUG only) */ - postcode(PANIC_DOUBLE_FAULT); + const char *trapname = "Unknown"; + uint64_t cr0 = get_cr0(); + uint64_t cr2 = get_cr2(); + uint64_t cr3 = get_cr3(); + uint64_t cr4 = get_cr4(); /* * Issue an I/O port read if one has been requested - this is an @@ -730,51 +937,49 @@ panic_double_fault( */ panic_io_port_read(); - /* - * Break kprintf lock in case of recursion, - * and record originally faulted instruction address. - */ - kprintf_break_lock(); + kprintf("panic trap number 0x%x, rip 0x%016llx\n", + regs->isf.trapno, regs->isf.rip); + kprintf("cr0 0x%016llx cr2 0x%016llx cr3 0x%016llx cr4 0x%016llx\n", + cr0, cr2, cr3, cr4); -#if MACH_KDP + if (regs->isf.trapno < TRAP_TYPES) + trapname = trap_type[regs->isf.trapno]; +#undef panic + panic("Kernel trap at 0x%016llx, type %d=%s, registers:\n" + "CR0: 0x%016llx, CR2: 0x%016llx, CR3: 0x%016llx, CR4: 0x%016llx\n" + "RAX: 0x%016llx, RBX: 0x%016llx, RCX: 0x%016llx, RDX: 0x%016llx\n" + "RSP: 0x%016llx, RBP: 0x%016llx, RSI: 0x%016llx, RDI: 0x%016llx\n" + "R8: 0x%016llx, R9: 0x%016llx, R10: 0x%016llx, R11: 0x%016llx\n" + "R12: 0x%016llx, R13: 0x%016llx, R14: 0x%016llx, R15: 0x%016llx\n" + "RFL: 0x%016llx, RIP: 0x%016llx, CS: 0x%016llx, SS: 0x%016llx\n" + "Error code: 0x%016llx\n", + regs->isf.rip, regs->isf.trapno, trapname, + cr0, cr2, cr3, cr4, + regs->rax, regs->rbx, regs->rcx, regs->rdx, + regs->isf.rsp, regs->rbp, regs->rsi, regs->rdi, + regs->r8, regs->r9, regs->r10, regs->r11, + regs->r12, regs->r13, regs->r14, regs->r15, + regs->isf.rflags, regs->isf.rip, regs->isf.cs, regs->isf.ss, + regs->isf.err); /* - * Print backtrace leading to first fault: + * This next statement is not executed, + * but it's needed to stop the compiler using tail call optimization + * for the panic call - which confuses the subsequent backtrace. */ - panic_i386_backtrace((void *) my_ktss->ebp, 10, NULL, FALSE, NULL); -#endif - - panic("Double fault at 0x%08x, thread:%p, code:0x%x, " - "registers:\n" - "CR0: 0x%08x, CR2: 0x%08x, CR3: 0x%08x, CR4: 0x%08x\n" - "EAX: 0x%08x, EBX: 0x%08x, ECX: 0x%08x, EDX: 0x%08x\n" - "ESP: 0x%08x, EBP: 0x%08x, ESI: 0x%08x, EDI: 0x%08x\n" - "EFL: 0x%08x, EIP: 0x%08x\n", - my_ktss->eip, current_thread(), code, - get_cr0(), get_cr2(), get_cr3(), get_cr4(), - my_ktss->eax, my_ktss->ebx, my_ktss->ecx, my_ktss->edx, - my_ktss->esp, my_ktss->ebp, my_ktss->esi, my_ktss->edi, - my_ktss->eflags, my_ktss->eip); + cr0 = 0; } +#endif +extern void kprintf_break_lock(void); -/* - * Called from locore on a special reserved stack after a machine-check - */ -void -panic_machine_check( -#if CONFIG_NO_PANIC_STRINGS - __unused int code -#else - int code -#endif - ) +#ifdef __i386__ +static void +panic_32(__unused int code, __unused int pc, __unused const char *msg, boolean_t do_mca_dump, boolean_t do_bt) { -#if !CONFIG_NO_PANIC_STRINGS struct i386_tss *my_ktss = current_ktss(); -#endif /* Set postcode (DEBUG only) */ - postcode(PANIC_MACHINE_CHECK); + postcode(pc); /* * Issue an I/O port read if one has been requested - this is an @@ -788,32 +993,63 @@ panic_machine_check( */ kprintf_break_lock(); - /* - * Dump the contents of the machine check MSRs (if any). - */ - mca_dump(); + if (do_mca_dump) { +#if CONFIG_MCA + /* + * Dump the contents of the machine check MSRs (if any). + */ + mca_dump(); +#endif + } +#if MACH_KDP /* - * And that's all folks, we don't attempt recovery... + * Print backtrace leading to first fault: */ - panic("Machine-check at 0x%08x, thread:%p, code:0x%x, " + if (do_bt) + panic_i386_backtrace((void *) my_ktss->ebp, 10, NULL, FALSE, NULL); +#endif + + panic("%s at 0x%08x, thread:%p, code:0x%x, " "registers:\n" "CR0: 0x%08x, CR2: 0x%08x, CR3: 0x%08x, CR4: 0x%08x\n" "EAX: 0x%08x, EBX: 0x%08x, ECX: 0x%08x, EDX: 0x%08x\n" "ESP: 0x%08x, EBP: 0x%08x, ESI: 0x%08x, EDI: 0x%08x\n" "EFL: 0x%08x, EIP: 0x%08x\n", + msg, my_ktss->eip, current_thread(), code, - get_cr0(), get_cr2(), get_cr3(), get_cr4(), + (uint32_t)get_cr0(), (uint32_t)get_cr2(), (uint32_t)get_cr3(), (uint32_t)get_cr4(), my_ktss->eax, my_ktss->ebx, my_ktss->ecx, my_ktss->edx, my_ktss->esp, my_ktss->ebp, my_ktss->esi, my_ktss->edi, my_ktss->eflags, my_ktss->eip); } +/* + * Called from locore on a special reserved stack after a double-fault + * is taken in kernel space. + * Kernel stack overflow is one route here. + */ void -panic_double_fault64(x86_saved_state_t *esp) +panic_double_fault32(int code) +{ + panic_32(code, PANIC_DOUBLE_FAULT, "Double fault", FALSE, TRUE); +} + +/* + * Called from locore on a special reserved stack after a machine-check + */ +void +panic_machine_check32(int code) +{ + panic_32(code, PANIC_MACHINE_CHECK, "Machine-check", TRUE, FALSE); +} +#endif /* __i386__ */ + +static void +panic_64(x86_saved_state_t *sp, __unused int pc, __unused const char *msg, boolean_t do_mca_dump) { /* Set postcode (DEBUG only) */ - postcode(PANIC_DOUBLE_FAULT); + postcode(pc); /* * Issue an I/O port read if one has been requested - this is an @@ -827,14 +1063,22 @@ panic_double_fault64(x86_saved_state_t *esp) */ kprintf_break_lock(); + if (do_mca_dump) { +#if CONFIG_MCA + /* + * Dump the contents of the machine check MSRs (if any). + */ + mca_dump(); +#endif + } + +#ifdef __i386__ /* * Dump the interrupt stack frame at last kernel entry. */ - if (is_saved_state64(esp)) { -#if !CONFIG_NO_PANIC_STRINGS - x86_saved_state64_t *ss64p = saved_state64(esp); -#endif - panic("Double fault thread:%p, trapno:0x%x, err:0x%qx, " + if (is_saved_state64(sp)) { + x86_saved_state64_t *ss64p = saved_state64(sp); + panic("%s thread:%p, trapno:0x%x, err:0x%qx, " "registers:\n" "CR0: 0x%08x, CR2: 0x%08x, CR3: 0x%08x, CR4: 0x%08x\n" "RAX: 0x%016qx, RBX: 0x%016qx, RCX: 0x%016qx, RDX: 0x%016qx\n" @@ -842,95 +1086,63 @@ panic_double_fault64(x86_saved_state_t *esp) "R8: 0x%016qx, R9: 0x%016qx, R10: 0x%016qx, R11: 0x%016qx\n" "R12: 0x%016qx, R13: 0x%016qx, R14: 0x%016qx, R15: 0x%016qx\n" "RFL: 0x%016qx, RIP: 0x%016qx, CR2: 0x%016qx\n", + msg, current_thread(), ss64p->isf.trapno, ss64p->isf.err, - get_cr0(), get_cr2(), get_cr3(), get_cr4(), + (uint32_t)get_cr0(), (uint32_t)get_cr2(), (uint32_t)get_cr3(), (uint32_t)get_cr4(), ss64p->rax, ss64p->rbx, ss64p->rcx, ss64p->rdx, ss64p->isf.rsp, ss64p->rbp, ss64p->rsi, ss64p->rdi, ss64p->r8, ss64p->r9, ss64p->r10, ss64p->r11, ss64p->r12, ss64p->r13, ss64p->r14, ss64p->r15, ss64p->isf.rflags, ss64p->isf.rip, ss64p->cr2); } else { -#if !CONFIG_NO_PANIC_STRINGS - x86_saved_state32_t *ss32p = saved_state32(esp); -#endif - panic("Double fault at 0x%08x, thread:%p, trapno:0x%x, err:0x%x)," + x86_saved_state32_t *ss32p = saved_state32(sp); + panic("%s at 0x%08x, thread:%p, trapno:0x%x, err:0x%x," "registers:\n" "CR0: 0x%08x, CR2: 0x%08x, CR3: 0x%08x, CR4: 0x%08x\n" "EAX: 0x%08x, EBX: 0x%08x, ECX: 0x%08x, EDX: 0x%08x\n" "ESP: 0x%08x, EBP: 0x%08x, ESI: 0x%08x, EDI: 0x%08x\n" "EFL: 0x%08x, EIP: 0x%08x\n", - ss32p->eip, current_thread(), ss32p->trapno, ss32p->err, - get_cr0(), get_cr2(), get_cr3(), get_cr4(), + msg, + ss32p->eip, current_thread(), ss32p->trapno, ss32p->err, + (uint32_t)get_cr0(), (uint32_t)get_cr2(), (uint32_t)get_cr3(), (uint32_t)get_cr4(), ss32p->eax, ss32p->ebx, ss32p->ecx, ss32p->edx, ss32p->uesp, ss32p->ebp, ss32p->esi, ss32p->edi, ss32p->efl, ss32p->eip); } +#else + x86_saved_state64_t *regs = saved_state64(sp); + panic("%s thread:%p at 0x%016llx, registers:\n" + "CR0: 0x%016lx, CR2: 0x%016lx, CR3: 0x%016lx, CR4: 0x%016lx\n" + "RAX: 0x%016llx, RBX: 0x%016llx, RCX: 0x%016llx, RDX: 0x%016llx\n" + "RSP: 0x%016llx, RBP: 0x%016llx, RSI: 0x%016llx, RDI: 0x%016llx\n" + "R8: 0x%016llx, R9: 0x%016llx, R10: 0x%016llx, R11: 0x%016llx\n" + "R12: 0x%016llx, R13: 0x%016llx, R14: 0x%016llx, R15: 0x%016llx\n" + "RFL: 0x%016llx, RIP: 0x%016llx, CS: 0x%016llx, SS: 0x%016llx\n" + "Error code: 0x%016llx\n", + msg, + current_thread(), regs->isf.rip, + get_cr0(), get_cr2(), get_cr3(), get_cr4(), + regs->rax, regs->rbx, regs->rcx, regs->rdx, + regs->isf.rsp, regs->rbp, regs->rsi, regs->rdi, + regs->r8, regs->r9, regs->r10, regs->r11, + regs->r12, regs->r13, regs->r14, regs->r15, + regs->isf.rflags, regs->isf.rip, regs->isf.cs, regs->isf.ss, + regs->isf.err); +#endif } -/* - * Machine check handler for 64-bit. - */ void -panic_machine_check64(x86_saved_state_t *esp) +panic_double_fault64(x86_saved_state_t *sp) { - /* Set postcode (DEBUG only) */ - postcode(PANIC_MACHINE_CHECK); + panic_64(sp, PANIC_DOUBLE_FAULT, "Double fault", FALSE); - /* - * Issue an I/O port read if one has been requested - this is an - * event logic analyzers can use as a trigger point. - */ - panic_io_port_read(); - - /* - * Break kprintf lock in case of recursion, - * and record originally faulted instruction address. - */ - kprintf_break_lock(); +} +void - /* - * Dump the contents of the machine check MSRs (if any). - */ - mca_dump(); +panic_machine_check64(x86_saved_state_t *sp) +{ + panic_64(sp, PANIC_MACHINE_CHECK, "Machine Check", TRUE); - /* - * And that's all folks, we don't attempt recovery... - */ - if (is_saved_state64(esp)) { -#if !CONFIG_NO_PANIC_STRINGS - x86_saved_state64_t *ss64p = saved_state64(esp); -#endif - panic("Machine Check thread:%p, trapno:0x%x, err:0x%qx, " - "registers:\n" - "CR0: 0x%08x, CR2: 0x%08x, CR3: 0x%08x, CR4: 0x%08x\n" - "RAX: 0x%016qx, RBX: 0x%016qx, RCX: 0x%016qx, RDX: 0x%016qx\n" - "RSP: 0x%016qx, RBP: 0x%016qx, RSI: 0x%016qx, RDI: 0x%016qx\n" - "R8: 0x%016qx, R9: 0x%016qx, R10: 0x%016qx, R11: 0x%016qx\n" - "R12: 0x%016qx, R13: 0x%016qx, R14: 0x%016qx, R15: 0x%016qx\n" - "RFL: 0x%016qx, RIP: 0x%016qx\n", - current_thread(), ss64p->isf.trapno, ss64p->isf.err, - get_cr0(), get_cr2(), get_cr3(), get_cr4(), - ss64p->rax, ss64p->rbx, ss64p->rcx, ss64p->rdx, - ss64p->isf.rsp, ss64p->rbp, ss64p->rsi, ss64p->rdi, - ss64p->r8, ss64p->r9, ss64p->r10, ss64p->r11, - ss64p->r12, ss64p->r13, ss64p->r14, ss64p->r15, - ss64p->isf.rflags, ss64p->isf.rip); - } else { -#if !CONFIG_NO_PANIC_STRINGS - x86_saved_state32_t *ss32p = saved_state32(esp); -#endif - panic("Machine Check at 0x%08x, thread:%p, trapno:0x%x, err:0x%x, " - "registers:\n" - "CR0: 0x%08x, CR2: 0x%08x, CR3: 0x%08x, CR4: 0x%08x\n" - "EAX: 0x%08x, EBX: 0x%08x, ECX: 0x%08x, EDX: 0x%08x\n" - "ESP: 0x%08x, EBP: 0x%08x, ESI: 0x%08x, EDI: 0x%08x\n" - "EFL: 0x%08x, EIP: 0x%08x\n", - ss32p->eip, current_thread(), ss32p->trapno, ss32p->err, - get_cr0(), get_cr2(), get_cr3(), get_cr4(), - ss32p->eax, ss32p->ebx, ss32p->ecx, ss32p->edx, - ss32p->uesp, ss32p->ebp, ss32p->esi, ss32p->edi, - ss32p->efl, ss32p->eip); - } } #if CONFIG_DTRACE @@ -965,7 +1177,7 @@ user_trap( regs = saved_state64(saved_state); type = regs->isf.trapno; - err = regs->isf.err & 0xffff; + err = (int)regs->isf.err & 0xffff; vaddr = (user_addr_t)regs->cr2; rip = (user_addr_t)regs->isf.rip; } else { @@ -979,8 +1191,10 @@ user_trap( rip = (user_addr_t)regs->eip; } - KERNEL_DEBUG_CONSTANT((MACHDBG_CODE(DBG_MACH_EXCP_UTRAP_x86, type)) | DBG_FUNC_NONE, - (int)(vaddr>>32), (int)vaddr, (int)(rip>>32), (int)rip, 0); + KERNEL_DEBUG_CONSTANT( + (MACHDBG_CODE(DBG_MACH_EXCP_UTRAP_x86, type)) | DBG_FUNC_NONE, + (unsigned)(vaddr>>32), (unsigned)vaddr, + (unsigned)(rip>>32), (unsigned)rip, 0); code = 0; subcode = 0; @@ -991,17 +1205,19 @@ user_trap( saved_state, type, vaddr); #endif myast = ast_pending(); - if (perfASTHook) { + perfCallback fn = perfASTHook; + if (fn) { if (*myast & AST_CHUD_ALL) { - perfASTHook(type, saved_state, 0, 0); + fn(type, saved_state, 0, 0); } } else { *myast &= ~AST_CHUD_ALL; } /* Is there a hook? */ - if (perfTrapHook) { - if (perfTrapHook(type, saved_state, 0, 0) == KERN_SUCCESS) + fn = perfTrapHook; + if (fn) { + if (fn(type, saved_state, 0, 0) == KERN_SUCCESS) return; /* If it succeeds, we are done... */ } @@ -1010,6 +1226,9 @@ user_trap( * Avoid needlessly calling tempDTraceTrapHook here, and let the * INT_3 case handle them. */ + DEBUG_KPRINT_SYSCALL_MASK(1, + "user_trap: type=0x%x(%s) err=0x%x cr2=%p rip=%p\n", + type, trap_type[type], err, (void *)(long) vaddr, (void *)(long) rip); switch (type) { @@ -1021,7 +1240,7 @@ user_trap( case T_DEBUG: { pcb_t pcb; - unsigned int clear = 0; + long clear = 0; /* 32 bit for i386, 64 bit for x86_64 */ /* * get dr6 and set it in the thread's pcb before * returning to userland @@ -1034,17 +1253,16 @@ user_trap( * because the high order bits are not * used on x86_64 */ + unsigned long dr6_temp; /* 32 bit for i386, 64 bit for x86_64 */ + __asm__ volatile ("mov %%db6, %0" : "=r" (dr6_temp)); /* Register constraint by necessity */ if (thread_is_64bit(thread)) { - uint32_t dr6; x86_debug_state64_t *ids = pcb->ids; - dr6 = (uint32_t)ids->dr6; - __asm__ volatile ("movl %%db6, %0" : "=r" (dr6)); - ids->dr6 = dr6; + ids->dr6 = dr6_temp; } else { /* 32 bit thread */ x86_debug_state32_t *ids = pcb->ids; - __asm__ volatile ("movl %%db6, %0" : "=r" (ids->dr6)); + ids->dr6 = (uint32_t) dr6_temp; } - __asm__ volatile ("movl %0, %%db6" : : "r" (clear)); + __asm__ volatile ("mov %0, %%db6" : : "r" (clear)); } exc = EXC_BREAKPOINT; code = EXC_I386_SGL; @@ -1222,6 +1440,8 @@ i386_exception( { mach_exception_data_type_t codes[EXCEPTION_CODE_MAX]; + DEBUG_KPRINT_SYSCALL_MACH("i386_exception: exc=%d code=0x%llx subcode=0x%llx\n", + exc, code, subcode); codes[0] = code; /* new exception interface */ codes[1] = subcode; exception_triage(exc, codes, 2); @@ -1229,6 +1449,7 @@ i386_exception( } + void kernel_preempt_check(void) { @@ -1306,22 +1527,24 @@ db_i386_state( */ void -sync_iss_to_iks(x86_saved_state32_t *saved_state) +sync_iss_to_iks(x86_saved_state_t *saved_state) { - struct x86_kernel_state32 *iks; + struct x86_kernel_state *iks; vm_offset_t kstack; boolean_t record_active_regs = FALSE; if ((kstack = current_thread()->kernel_stack) != 0) { - x86_saved_state32_t *regs; - - regs = saved_state; +#ifdef __i386__ + x86_saved_state32_t *regs = saved_state32(saved_state); +#else + x86_saved_state64_t *regs = saved_state64(saved_state); +#endif iks = STACK_IKS(kstack); - /* - * Did we take the trap/interrupt in kernel mode? - */ + + /* Did we take the trap/interrupt in kernel mode? */ +#ifdef __i386__ if (regs == USER_REGS32(current_thread())) record_active_regs = TRUE; else { @@ -1332,21 +1555,47 @@ sync_iss_to_iks(x86_saved_state32_t *saved_state) iks->k_esi = regs->esi; iks->k_eip = regs->eip; } +#else + if (regs == USER_REGS64(current_thread())) + record_active_regs = TRUE; + else { + iks->k_rbx = regs->rbx; + iks->k_rsp = regs->isf.rsp; + iks->k_rbp = regs->rbp; + iks->k_r12 = regs->r12; + iks->k_r13 = regs->r13; + iks->k_r14 = regs->r14; + iks->k_r15 = regs->r15; + iks->k_rip = regs->isf.rip; + } +#endif } if (record_active_regs == TRUE) { - /* - * Show the trap handler path - */ +#ifdef __i386__ + /* Show the trap handler path */ __asm__ volatile("movl %%ebx, %0" : "=m" (iks->k_ebx)); __asm__ volatile("movl %%esp, %0" : "=m" (iks->k_esp)); __asm__ volatile("movl %%ebp, %0" : "=m" (iks->k_ebp)); __asm__ volatile("movl %%edi, %0" : "=m" (iks->k_edi)); __asm__ volatile("movl %%esi, %0" : "=m" (iks->k_esi)); - /* - * "Current" instruction pointer - */ + /* "Current" instruction pointer */ __asm__ volatile("movl $1f, %0\n1:" : "=m" (iks->k_eip)); +#else + /* Show the trap handler path */ + __asm__ volatile("movq %%rbx, %0" : "=m" (iks->k_rbx)); + __asm__ volatile("movq %%rsp, %0" : "=m" (iks->k_rsp)); + __asm__ volatile("movq %%rbp, %0" : "=m" (iks->k_rbp)); + __asm__ volatile("movq %%r12, %0" : "=m" (iks->k_r12)); + __asm__ volatile("movq %%r13, %0" : "=m" (iks->k_r13)); + __asm__ volatile("movq %%r14, %0" : "=m" (iks->k_r14)); + __asm__ volatile("movq %%r15, %0" : "=m" (iks->k_r15)); + /* "Current" instruction pointer */ + __asm__ volatile("leaq 1f(%%rip), %%rax; mov %%rax, %0\n1:" + : "=m" (iks->k_rip) + : + : "rax"); +#endif } } @@ -1358,22 +1607,31 @@ sync_iss_to_iks(x86_saved_state32_t *saved_state) */ void sync_iss_to_iks_unconditionally(__unused x86_saved_state_t *saved_state) { - struct x86_kernel_state32 *iks; + struct x86_kernel_state *iks; vm_offset_t kstack; if ((kstack = current_thread()->kernel_stack) != 0) { iks = STACK_IKS(kstack); - /* - * Display the trap handler path. - */ +#ifdef __i386__ + /* Display the trap handler path */ __asm__ volatile("movl %%ebx, %0" : "=m" (iks->k_ebx)); __asm__ volatile("movl %%esp, %0" : "=m" (iks->k_esp)); __asm__ volatile("movl %%ebp, %0" : "=m" (iks->k_ebp)); __asm__ volatile("movl %%edi, %0" : "=m" (iks->k_edi)); __asm__ volatile("movl %%esi, %0" : "=m" (iks->k_esi)); - /* - * "Current" instruction pointer. - */ + /* "Current" instruction pointer */ __asm__ volatile("movl $1f, %0\n1:" : "=m" (iks->k_eip)); +#else + /* Display the trap handler path */ + __asm__ volatile("movq %%rbx, %0" : "=m" (iks->k_rbx)); + __asm__ volatile("movq %%rsp, %0" : "=m" (iks->k_rsp)); + __asm__ volatile("movq %%rbp, %0" : "=m" (iks->k_rbp)); + __asm__ volatile("movq %%r12, %0" : "=m" (iks->k_r12)); + __asm__ volatile("movq %%r13, %0" : "=m" (iks->k_r13)); + __asm__ volatile("movq %%r14, %0" : "=m" (iks->k_r14)); + __asm__ volatile("movq %%r15, %0" : "=m" (iks->k_r15)); + /* "Current" instruction pointer */ + __asm__ volatile("leaq 1f(%%rip), %%rax; mov %%rax, %0\n1:" : "=m" (iks->k_rip)::"rax"); +#endif } } diff --git a/osfmk/i386/trap.h b/osfmk/i386/trap.h index 9ae4a8b5f..ff00c7476 100644 --- a/osfmk/i386/trap.h +++ b/osfmk/i386/trap.h @@ -84,6 +84,15 @@ #define T_SSE_FLOAT_ERROR 19 /* 20-126 */ #define T_DTRACE_RET 127 + +/* The SYSENTER and SYSCALL trap numbers are software constructs. + * These exceptions are dispatched directly to the system call handlers. + * See also the "software interrupt codes" section of + * osfmk/mach/i386/syscall_sw.h + */ +#define T_SYSENTER 0x84 +#define T_SYSCALL 0x85 + #define T_PREEMPT 255 #define TRAP_NAMES "divide error", "debug trap", "NMI", "breakpoint", \ @@ -115,7 +124,7 @@ extern void i386_exception( mach_exception_code_t code, mach_exception_subcode_t subcode); -extern void sync_iss_to_iks(x86_saved_state32_t *regs); +extern void sync_iss_to_iks(x86_saved_state_t *regs); extern void sync_iss_to_iks_unconditionally( x86_saved_state_t *regs); @@ -124,12 +133,13 @@ extern void kernel_trap(x86_saved_state_t *regs); extern void user_trap(x86_saved_state_t *regs); -extern void panic_double_fault(int code); +extern void interrupt(x86_saved_state_t *regs); +#ifdef __i386__ +extern void panic_double_fault32(int code); +extern void panic_machine_check32(int code); +#endif extern void panic_double_fault64(x86_saved_state_t *regs); - -extern void panic_machine_check(int code); - extern void panic_machine_check64(x86_saved_state_t *regs); extern void i386_astintr(int preemption); @@ -141,15 +151,19 @@ typedef kern_return_t (*perfCallback)( int unused1, int unused2); -extern perfCallback perfTrapHook; -extern perfCallback perfASTHook; -extern perfCallback perfIntHook; +extern volatile perfCallback perfTrapHook; +extern volatile perfCallback perfASTHook; +extern volatile perfCallback perfIntHook; extern void panic_i386_backtrace(void *, int, const char *, boolean_t, x86_saved_state_t *); #if MACH_KDP extern boolean_t kdp_i386_trap( unsigned int, +#ifdef __i386__ x86_saved_state32_t *, +#else + x86_saved_state64_t *, +#endif kern_return_t, vm_offset_t); #endif /* MACH_KDP */ diff --git a/osfmk/i386/tsc.c b/osfmk/i386/tsc.c index 669bc401f..6744a9097 100644 --- a/osfmk/i386/tsc.c +++ b/osfmk/i386/tsc.c @@ -53,19 +53,18 @@ #include /* for kernel_map */ #include #include -#include -#include #include -#include -#include #include +#include #include +#include +#include +#include #include #include #include #include #include -#include uint64_t busFCvtt2n = 0; uint64_t busFCvtn2t = 0; @@ -172,14 +171,10 @@ tsc_init(void) * value. See 6036811. */ if (busFreq == 0) - busFreq = BASE_NHM_CLOCK_SOURCE; + busFreq = BASE_NHM_CLOCK_SOURCE; cpu_mhz = tscGranularity * BASE_NHM_CLOCK_SOURCE; - kprintf("[NHM] Maximum Non-Turbo Ratio = [%d]\n", - (uint32_t)tscGranularity); - kprintf("[NHM] CPU: Frequency = %6d.%04dMhz\n", - (uint32_t)(cpu_mhz / Mega), (uint32_t)(cpu_mhz % Mega)); break; } default: { diff --git a/osfmk/i386/tsc.h b/osfmk/i386/tsc.h index e702ec234..f6c5eba78 100644 --- a/osfmk/i386/tsc.h +++ b/osfmk/i386/tsc.h @@ -40,7 +40,7 @@ #ifndef _I386_TSC_H_ #define _I386_TSC_H_ -#define BASE_NHM_CLOCK_SOURCE 139806638ULL +#define BASE_NHM_CLOCK_SOURCE 133333333ULL #define IA32_PERF_STS 0x198 extern uint64_t busFCvtt2n; diff --git a/osfmk/i386/tss.h b/osfmk/i386/tss.h index 1426d5a41..866d2056b 100644 --- a/osfmk/i386/tss.h +++ b/osfmk/i386/tss.h @@ -110,6 +110,7 @@ struct sysenter_stack { uint64_t top; /* Top and pointer to ISS in PCS */ }; +#pragma pack(4) struct x86_64_tss { uint32_t reserved1; uint64_t rsp0; /* stack pointer for CPL0 */ @@ -130,4 +131,5 @@ struct x86_64_tss { uint16_t io_bit_map_offset; /* offset to IO permission bit map */ }; +#pragma pack() #endif /* _I386_TSS_H_ */ diff --git a/osfmk/i386/user_ldt.c b/osfmk/i386/user_ldt.c index e06afda78..6e32ba389 100644 --- a/osfmk/i386/user_ldt.c +++ b/osfmk/i386/user_ldt.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2006 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2009 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -68,14 +68,14 @@ #include -#include -#include -#include -#include -#include #include +#include #include #include +#include +#include +#include +#include #include @@ -264,7 +264,7 @@ i386_set_ldt( * and we need to make sure the new LDT is in place * throughout the task before returning to the user. */ - mp_rendezvous_no_intrs(user_ldt_set_action, task); + mp_broadcast(user_ldt_set_action, task); task_unlock(task); diff --git a/osfmk/i386/user_ldt.h b/osfmk/i386/user_ldt.h index e83c16dca..8285cb4df 100644 --- a/osfmk/i386/user_ldt.h +++ b/osfmk/i386/user_ldt.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2008 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * diff --git a/osfmk/i386/vmx.h b/osfmk/i386/vmx.h index 132baf088..e1776c521 100644 --- a/osfmk/i386/vmx.h +++ b/osfmk/i386/vmx.h @@ -29,6 +29,12 @@ #ifndef _I386_VMX_H_ #define _I386_VMX_H_ +#if defined(__cplusplus) +extern "C" { +#endif + +#include + /* * Error codes */ @@ -37,7 +43,11 @@ #define VMX_INUSE 2 /* VT is being exclusively used already */ /* SPI */ -int host_vmxon(int exclusive); +int host_vmxon(boolean_t exclusive); void host_vmxoff(void); +#if defined(__cplusplus) +} +#endif + #endif diff --git a/osfmk/i386/vmx/vmx_asm.h b/osfmk/i386/vmx/vmx_asm.h index 78722ec86..bd0de4688 100644 --- a/osfmk/i386/vmx/vmx_asm.h +++ b/osfmk/i386/vmx/vmx_asm.h @@ -35,10 +35,6 @@ #include #include -#ifndef DEBUG -#include -#endif - #define VMX_FAIL_INVALID -1 #define VMX_FAIL_VALID -2 #define VMX_SUCCEED 0 @@ -61,7 +57,7 @@ static inline void enter_compat_mode(void) { ".word %P0 \n\t" ".code32 \n\t" "5:" - :: "i" (KERNEL_CS) + :: "i" (KERNEL32_CS) ); } @@ -99,6 +95,9 @@ static inline int __vmxoff(void) { int result; +#if defined (__x86_64__) + __VMXOFF(result); +#else if (ml_is64bit()) { /* don't put anything between these lines! */ enter_64bit_mode(); @@ -107,6 +106,7 @@ __vmxoff(void) } else { __VMXOFF(result); } +#endif return result; } @@ -118,6 +118,9 @@ __vmxoff(void) __vmxon(addr64_t *v) { int result; +#if defined (__x86_64__) + __VMXON(v, result); +#else if (ml_is64bit()) { /* don't put anything between these lines! */ enter_64bit_mode(); @@ -126,6 +129,7 @@ __vmxon(addr64_t *v) } else { __VMXON(v, result); } +#endif return result; } diff --git a/osfmk/i386/vmx/vmx_cpu.c b/osfmk/i386/vmx/vmx_cpu.c index c86af004e..34bd07acc 100644 --- a/osfmk/i386/vmx/vmx_cpu.c +++ b/osfmk/i386/vmx/vmx_cpu.c @@ -29,6 +29,7 @@ #include #include #include +#include #include #include #include @@ -36,12 +37,11 @@ #include #include #include /* for host_info() */ -#include #define VMX_KPRINTF(x...) /* kprintf("vmx: " x) */ int vmx_use_count = 0; -int vmx_exclusive = 0; +boolean_t vmx_exclusive = FALSE; decl_simple_lock_data(static,vmx_use_count_lock) /* ----------------------------------------------------------------------------- @@ -72,7 +72,7 @@ vmxon_is_enabled(void) static inline boolean_t vmx_is_cr0_valid(vmx_specs_t *specs) { - uint32_t cr0 = get_cr0(); + uintptr_t cr0 = get_cr0(); return (0 == ((~cr0 & specs->cr0_fixed_0)|(cr0 & ~specs->cr0_fixed_1))); } @@ -83,7 +83,7 @@ vmx_is_cr0_valid(vmx_specs_t *specs) static inline boolean_t vmx_is_cr4_valid(vmx_specs_t *specs) { - uint32_t cr4 = get_cr4(); + uintptr_t cr4 = get_cr4(); return (0 == ((~cr4 & specs->cr4_fixed_0)|(cr4 & ~specs->cr4_fixed_1))); } @@ -142,29 +142,29 @@ vmx_get_specs() #define bitfield(x,f) ((x >> f##_BIT) & f##_MASK) /* Obtain and decode VMX general capabilities */ msr_image = rdmsr64(MSR_IA32_VMX_BASIC); - specs->vmcs_id = msr_image & VMX_VCR_VMCS_REV_ID; + specs->vmcs_id = (uint32_t)(msr_image & VMX_VCR_VMCS_REV_ID); specs->vmcs_mem_type = bitfield(msr_image, VMX_VCR_VMCS_MEM_TYPE) != 0; specs->vmcs_size = bitfield(msr_image, VMX_VCR_VMCS_SIZE); /* Obtain allowed settings for pin-based execution controls */ msr_image = rdmsr64(MSR_IA32_VMXPINBASED_CTLS); - specs->pin_exctls_0 = msr_image & 0xFFFFFFFF; - specs->pin_exctls_1 = msr_image >> 32; + specs->pin_exctls_0 = (uint32_t)(msr_image & 0xFFFFFFFF); + specs->pin_exctls_1 = (uint32_t)(msr_image >> 32); /* Obtain allowed settings for processor-based execution controls */ msr_image = rdmsr64(MSR_IA32_PROCBASED_CTLS); - specs->proc_exctls_0 = msr_image & 0xFFFFFFFF; - specs->proc_exctls_1 = msr_image >> 32; + specs->proc_exctls_0 = (uint32_t)(msr_image & 0xFFFFFFFF); + specs->proc_exctls_1 = (uint32_t)(msr_image >> 32); /* Obtain allowed settings for VM-exit controls */ msr_image = rdmsr64(MSR_IA32_VMX_EXIT_CTLS); - specs->exit_ctls_0 = msr_image & 0xFFFFFFFF; - specs->exit_ctls_1 = msr_image >> 32; + specs->exit_ctls_0 = (uint32_t)(msr_image & 0xFFFFFFFF); + specs->exit_ctls_1 = (uint32_t)(msr_image >> 32); /* Obtain allowed settings for VM-entry controls */ msr_image = rdmsr64(MSR_IA32_VMX_ENTRY_CTLS); - specs->enter_ctls_0 = msr_image & 0xFFFFFFFF; - specs->enter_ctls_0 = msr_image >> 32; + specs->enter_ctls_0 = (uint32_t)(msr_image & 0xFFFFFFFF); + specs->enter_ctls_0 = (uint32_t)(msr_image >> 32); /* Obtain and decode miscellaneous capabilities */ msr_image = rdmsr64(MSR_IA32_VMX_MISC); @@ -173,16 +173,16 @@ vmx_get_specs() specs->act_SIPI = bitfield(msr_image, VMX_VCR_ACT_SIPI) != 0; specs->act_CSTATE = bitfield(msr_image, VMX_VCR_ACT_CSTATE) != 0; specs->cr3_targs = bitfield(msr_image, VMX_VCR_CR3_TARGS); - specs->max_msrs = 512 * (1 + bitfield(msr_image, VMX_VCR_MAX_MSRS)); - specs->mseg_id = bitfield(msr_image, VMX_VCR_MSEG_ID); + specs->max_msrs = (uint32_t)(512 * (1 + bitfield(msr_image, VMX_VCR_MAX_MSRS))); + specs->mseg_id = (uint32_t)bitfield(msr_image, VMX_VCR_MSEG_ID); /* Obtain VMX-fixed bits in CR0 */ - specs->cr0_fixed_0 = rdmsr64(MSR_IA32_VMX_CR0_FIXED0) & 0xFFFFFFFF; - specs->cr0_fixed_1 = rdmsr64(MSR_IA32_VMX_CR0_FIXED1) & 0xFFFFFFFF; + specs->cr0_fixed_0 = (uint32_t)rdmsr64(MSR_IA32_VMX_CR0_FIXED0) & 0xFFFFFFFF; + specs->cr0_fixed_1 = (uint32_t)rdmsr64(MSR_IA32_VMX_CR0_FIXED1) & 0xFFFFFFFF; /* Obtain VMX-fixed bits in CR4 */ - specs->cr4_fixed_0 = rdmsr64(MSR_IA32_VMX_CR4_FIXED0) & 0xFFFFFFFF; - specs->cr4_fixed_1 = rdmsr64(MSR_IA32_VMX_CR4_FIXED1) & 0xFFFFFFFF; + specs->cr4_fixed_0 = (uint32_t)rdmsr64(MSR_IA32_VMX_CR4_FIXED0) & 0xFFFFFFFF; + specs->cr4_fixed_1 = (uint32_t)rdmsr64(MSR_IA32_VMX_CR4_FIXED1) & 0xFFFFFFFF; } /* ----------------------------------------------------------------------------- @@ -299,6 +299,7 @@ int host_vmxon(boolean_t exclusive) { int error; + boolean_t do_it = FALSE; /* do the cpu sync outside of the area holding the lock */ if (!vmx_globally_available()) return VMX_UNSUPPORTED; @@ -307,20 +308,22 @@ host_vmxon(boolean_t exclusive) if (vmx_exclusive) { error = VMX_INUSE; - goto out; - } - vmx_use_count++; - if (vmx_use_count == 1) { /* was turned off before */ - vmx_allocate_vmxon_regions(); - mp_rendezvous(NULL, (void (*)(void *))vmx_on, NULL, NULL); + } else { + vmx_use_count++; + if (vmx_use_count == 1) /* was turned off before */ + do_it = TRUE; + vmx_exclusive = exclusive; + + VMX_KPRINTF("VMX use count: %d\n", vmx_use_count); + error = VMX_OK; } - vmx_exclusive = exclusive; - VMX_KPRINTF("VMX use count: %d\n", vmx_use_count); - error = VMX_OK; -out: simple_unlock(&vmx_use_count_lock); + if (do_it) { + vmx_allocate_vmxon_regions(); + mp_rendezvous(NULL, (void (*)(void *))vmx_on, NULL, NULL); + } return error; } @@ -331,19 +334,24 @@ host_vmxon(boolean_t exclusive) void host_vmxoff() { + boolean_t do_it = FALSE; /* do the cpu sync outside of the area holding the lock */ + simple_lock(&vmx_use_count_lock); if (vmx_use_count) { vmx_use_count--; - vmx_exclusive = 0; - if (!vmx_use_count) { - mp_rendezvous(NULL, (void (*)(void *))vmx_off, NULL, NULL); - vmx_free_vmxon_regions(); - } + vmx_exclusive = FALSE; + if (!vmx_use_count) + do_it = TRUE; } simple_unlock(&vmx_use_count_lock); + if (do_it) { + mp_rendezvous(NULL, (void (*)(void *))vmx_off, NULL, NULL); + vmx_free_vmxon_regions(); + } + VMX_KPRINTF("VMX use count: %d\n", vmx_use_count); } diff --git a/osfmk/i386/vmx/vmx_shims.c b/osfmk/i386/vmx/vmx_shims.c index f0fe27733..2cedc19bd 100644 --- a/osfmk/i386/vmx/vmx_shims.c +++ b/osfmk/i386/vmx/vmx_shims.c @@ -39,7 +39,7 @@ vmx_pcalloc(void) { char *pptr; kern_return_t ret; - ret = kmem_alloc_wired(kernel_map, (vm_offset_t *)&pptr, PAGE_SIZE); + ret = kmem_alloc_kobject(kernel_map, (vm_offset_t *)&pptr, PAGE_SIZE); if (ret != KERN_SUCCESS) return (NULL); bzero(pptr, PAGE_SIZE); return (pptr); @@ -48,7 +48,7 @@ vmx_pcalloc(void) addr64_t vmx_paddr(void *va) { - return (ptoa_64(pmap_find_phys(kernel_pmap, (addr64_t)(uint32_t)va))); + return (ptoa_64(pmap_find_phys(kernel_pmap, (addr64_t)(uintptr_t)va))); } void diff --git a/osfmk/ipc/Makefile b/osfmk/ipc/Makefile index 27ef16887..a8dcf06ad 100644 --- a/osfmk/ipc/Makefile +++ b/osfmk/ipc/Makefile @@ -9,15 +9,20 @@ include $(MakeInc_def) DATAFILES = -EXPORT_ONLY_FILES = \ +EXPORT_ONLY_FILES = \ ipc_types.h +EXPORT_PRIVATE_FILES = \ + ipc_port.h + INSTALL_MI_LIST = ${DATAFILES} INSTALL_MI_DIR = ipc EXPORT_MI_LIST = ${DATAFILES} ${EXPORT_ONLY_FILES} +INSTALL_KF_MI_LCL_LIST = ${EXPORT_ONLY_FILES} ${EXPORT_PRIVATE_FILES} + EXPORT_MI_DIR = ipc include $(MakeInc_rule) diff --git a/osfmk/ipc/ipc_hash.c b/osfmk/ipc/ipc_hash.c index 19f36a890..2f43a63cf 100644 --- a/osfmk/ipc/ipc_hash.c +++ b/osfmk/ipc/ipc_hash.c @@ -211,15 +211,15 @@ ipc_hash_index_t ipc_hash_global_mask; ipc_hash_global_mask) typedef struct ipc_hash_global_bucket { - decl_mutex_data(, ihgb_lock_data) + decl_lck_mtx_data(, ihgb_lock_data) ipc_tree_entry_t ihgb_head; } *ipc_hash_global_bucket_t; #define IHGB_NULL ((ipc_hash_global_bucket_t) 0) -#define ihgb_lock_init(ihgb) mutex_init(&(ihgb)->ihgb_lock_data, 0) -#define ihgb_lock(ihgb) mutex_lock(&(ihgb)->ihgb_lock_data) -#define ihgb_unlock(ihgb) mutex_unlock(&(ihgb)->ihgb_lock_data) +#define ihgb_lock_init(ihgb) lck_mtx_init(&(ihgb)->ihgb_lock_data, &ipc_lck_grp, &ipc_lck_attr) +#define ihgb_lock(ihgb) lck_mtx_lock(&(ihgb)->ihgb_lock_data) +#define ihgb_unlock(ihgb) lck_mtx_unlock(&(ihgb)->ihgb_lock_data) ipc_hash_global_bucket_t ipc_hash_global_table; @@ -394,7 +394,7 @@ ipc_hash_global_delete( */ #define IH_LOCAL_HASH(obj, size) \ - ((((mach_port_index_t) (obj)) >> 6) % (size)) + ((mach_port_index_t)((((uintptr_t) (obj)) >> 6) % (size))) /* * Routine: ipc_hash_local_lookup diff --git a/osfmk/ipc/ipc_init.c b/osfmk/ipc/ipc_init.c index 1267d06e8..7417911d1 100644 --- a/osfmk/ipc/ipc_init.c +++ b/osfmk/ipc/ipc_init.c @@ -111,10 +111,16 @@ vm_map_t ipc_kernel_copy_map; vm_size_t ipc_kernel_copy_map_size = IPC_KERNEL_COPY_MAP_SIZE; vm_size_t ipc_kmsg_max_vm_space = (IPC_KERNEL_COPY_MAP_SIZE * 7)/8; -int ipc_space_max = SPACE_MAX; -int ipc_tree_entry_max = ITE_MAX; -int ipc_port_max = PORT_MAX; -int ipc_pset_max = SET_MAX; +int ipc_space_max; +int ipc_tree_entry_max; +int ipc_port_max; +int ipc_pset_max; + + +lck_grp_t ipc_lck_grp; +lck_attr_t ipc_lck_attr; + +static lck_grp_attr_t ipc_lck_grp_attr; extern void ikm_cache_init(void); @@ -129,7 +135,11 @@ void ipc_bootstrap(void) { kern_return_t kr; - + + lck_grp_attr_setdefault(&ipc_lck_grp_attr); + lck_grp_init(&ipc_lck_grp, "ipc", &ipc_lck_grp_attr); + lck_attr_setdefault(&ipc_lck_attr); + ipc_port_multiple_lock_init(); ipc_port_timestamp_lock_init(); diff --git a/osfmk/ipc/ipc_kmsg.c b/osfmk/ipc/ipc_kmsg.c index 92a903209..9adfe5b83 100644 --- a/osfmk/ipc/ipc_kmsg.c +++ b/osfmk/ipc/ipc_kmsg.c @@ -83,6 +83,7 @@ #include #include +#include #include #include #include @@ -94,6 +95,8 @@ #include #include +#include + #include #include #include @@ -119,6 +122,366 @@ #include #endif +#if DEBUG +#define DEBUG_MSGS_K64 1 +#endif + +#pragma pack(4) + +typedef struct +{ + mach_msg_bits_t msgh_bits; + mach_msg_size_t msgh_size; + uint32_t msgh_remote_port; + uint32_t msgh_local_port; + mach_msg_size_t msgh_reserved; + mach_msg_id_t msgh_id; +} mach_msg_legacy_header_t; + +typedef struct +{ + mach_msg_legacy_header_t header; + mach_msg_body_t body; +} mach_msg_legacy_base_t; + +typedef struct +{ + mach_port_name_t name; + mach_msg_size_t pad1; + uint32_t pad2 : 16; + mach_msg_type_name_t disposition : 8; + mach_msg_descriptor_type_t type : 8; +} mach_msg_legacy_port_descriptor_t; + + +typedef union +{ + mach_msg_legacy_port_descriptor_t port; + mach_msg_ool_descriptor32_t out_of_line32; + mach_msg_ool_ports_descriptor32_t ool_ports32; + mach_msg_type_descriptor_t type; +} mach_msg_legacy_descriptor_t; + +#pragma pack() + +#define LEGACY_HEADER_SIZE_DELTA ((mach_msg_size_t)(sizeof(mach_msg_header_t) - sizeof(mach_msg_legacy_header_t))) +// END LP64 fixes + + +#if DEBUG_MSGS_K64 +extern void ipc_pset_print64( + ipc_pset_t pset); + +extern void ipc_kmsg_print64( + ipc_kmsg_t kmsg, + const char *str); + +extern void ipc_msg_print64( + mach_msg_header_t *msgh); + +extern ipc_port_t ipc_name_to_data64( + task_t task, + mach_port_name_t name); + +/* + * Forward declarations + */ +void ipc_msg_print_untyped64( + mach_msg_body_t *body); + +const char * ipc_type_name64( + int type_name, + boolean_t received); + +void ipc_print_type_name64( + int type_name); + +const char * +msgh_bit_decode64( + mach_msg_bits_t bit); + +const char * +mm_copy_options_string64( + mach_msg_copy_options_t option); + +void db_print_msg_uid64(mach_msg_header_t *); + +static void +ipc_msg_body_print64(void *body, int size) +{ + uint32_t *word = (uint32_t *) body; + uint32_t *end = (uint32_t *)(((uintptr_t) body) + size + - sizeof(mach_msg_header_t)); + int i; + + kprintf(" body(%p-%p):\n %p: ", body, end, word); + for (;;) { + for (i = 0; i < 8; i++, word++) { + if (word >= end) { + kprintf("\n"); + return; + } + kprintf("%08x ", *word); + } + kprintf("\n %p: ", word); + } +} + + +const char * +ipc_type_name64( + int type_name, + boolean_t received) +{ + switch (type_name) { + case MACH_MSG_TYPE_PORT_NAME: + return "port_name"; + + case MACH_MSG_TYPE_MOVE_RECEIVE: + if (received) { + return "port_receive"; + } else { + return "move_receive"; + } + + case MACH_MSG_TYPE_MOVE_SEND: + if (received) { + return "port_send"; + } else { + return "move_send"; + } + + case MACH_MSG_TYPE_MOVE_SEND_ONCE: + if (received) { + return "port_send_once"; + } else { + return "move_send_once"; + } + + case MACH_MSG_TYPE_COPY_SEND: + return "copy_send"; + + case MACH_MSG_TYPE_MAKE_SEND: + return "make_send"; + + case MACH_MSG_TYPE_MAKE_SEND_ONCE: + return "make_send_once"; + + default: + return (char *) 0; + } +} + +void +ipc_print_type_name64( + int type_name) +{ + const char *name = ipc_type_name64(type_name, TRUE); + if (name) { + kprintf("%s", name); + } else { + kprintf("type%d", type_name); + } +} + +/* + * ipc_kmsg_print64 [ debug ] + */ +void +ipc_kmsg_print64( + ipc_kmsg_t kmsg, + const char *str) +{ + kprintf("%s kmsg=%p:\n", str, kmsg); + kprintf(" next=%p, prev=%p, size=%d", + kmsg->ikm_next, + kmsg->ikm_prev, + kmsg->ikm_size); + kprintf("\n"); + ipc_msg_print64(kmsg->ikm_header); +} + +const char * +msgh_bit_decode64( + mach_msg_bits_t bit) +{ + switch (bit) { + case MACH_MSGH_BITS_COMPLEX: return "complex"; + case MACH_MSGH_BITS_CIRCULAR: return "circular"; + default: return (char *) 0; + } +} + +/* + * ipc_msg_print64 [ debug ] + */ +void +ipc_msg_print64( + mach_msg_header_t *msgh) +{ + mach_msg_bits_t mbits; + unsigned int bit, i; + const char *bit_name; + int needs_comma; + + mbits = msgh->msgh_bits; + kprintf(" msgh_bits=0x%x: l=0x%x,r=0x%x\n", + mbits, + MACH_MSGH_BITS_LOCAL(msgh->msgh_bits), + MACH_MSGH_BITS_REMOTE(msgh->msgh_bits)); + + mbits = MACH_MSGH_BITS_OTHER(mbits) & MACH_MSGH_BITS_USED; + kprintf(" decoded bits: "); + needs_comma = 0; + for (i = 0, bit = 1; i < sizeof(mbits) * 8; ++i, bit <<= 1) { + if ((mbits & bit) == 0) + continue; + bit_name = msgh_bit_decode64((mach_msg_bits_t)bit); + if (bit_name) + kprintf("%s%s", needs_comma ? "," : "", bit_name); + else + kprintf("%sunknown(0x%x),", needs_comma ? "," : "", bit); + ++needs_comma; + } + if (msgh->msgh_bits & ~MACH_MSGH_BITS_USED) { + kprintf("%sunused=0x%x,", needs_comma ? "," : "", + msgh->msgh_bits & ~MACH_MSGH_BITS_USED); + } + kprintf("\n"); + + needs_comma = 1; + if (msgh->msgh_remote_port) { + kprintf(" remote=%p(", msgh->msgh_remote_port); + ipc_print_type_name64(MACH_MSGH_BITS_REMOTE(msgh->msgh_bits)); + kprintf(")"); + } else { + kprintf(" remote=null"); + } + + if (msgh->msgh_local_port) { + kprintf("%slocal=%p(", needs_comma ? "," : "", + msgh->msgh_local_port); + ipc_print_type_name64(MACH_MSGH_BITS_LOCAL(msgh->msgh_bits)); + kprintf(")\n"); + } else { + kprintf("local=null\n"); + } + + kprintf(" msgh_id=%d, size=%d\n", + msgh->msgh_id, + msgh->msgh_size); + + if (mbits & MACH_MSGH_BITS_COMPLEX) { + ipc_msg_print_untyped64((mach_msg_body_t *) (msgh + 1)); + } + + ipc_msg_body_print64((void *)(msgh + 1), msgh->msgh_size); +} + + +const char * +mm_copy_options_string64( + mach_msg_copy_options_t option) +{ + const char *name; + + switch (option) { + case MACH_MSG_PHYSICAL_COPY: + name = "PHYSICAL"; + break; + case MACH_MSG_VIRTUAL_COPY: + name = "VIRTUAL"; + break; + case MACH_MSG_OVERWRITE: + name = "OVERWRITE"; + break; + case MACH_MSG_ALLOCATE: + name = "ALLOCATE"; + break; + case MACH_MSG_KALLOC_COPY_T: + name = "KALLOC_COPY_T"; + break; + default: + name = "unknown"; + break; + } + return name; +} + +void +ipc_msg_print_untyped64( + mach_msg_body_t *body) +{ + mach_msg_descriptor_t *saddr, *send; + mach_msg_descriptor_type_t type; + + kprintf(" %d descriptors: \n", body->msgh_descriptor_count); + + saddr = (mach_msg_descriptor_t *) (body + 1); + send = saddr + body->msgh_descriptor_count; + + for ( ; saddr < send; saddr++ ) { + + type = saddr->type.type; + + switch (type) { + + case MACH_MSG_PORT_DESCRIPTOR: { + mach_msg_port_descriptor_t *dsc; + + dsc = &saddr->port; + kprintf(" PORT name = %p disp = ", dsc->name); + ipc_print_type_name64(dsc->disposition); + kprintf("\n"); + break; + } + case MACH_MSG_OOL_VOLATILE_DESCRIPTOR: + case MACH_MSG_OOL_DESCRIPTOR: { + mach_msg_ool_descriptor_t *dsc; + + dsc = (mach_msg_ool_descriptor_t *) &saddr->out_of_line; + kprintf(" OOL%s addr = %p size = 0x%x copy = %s %s\n", + type == MACH_MSG_OOL_DESCRIPTOR ? "" : " VOLATILE", + dsc->address, dsc->size, + mm_copy_options_string64(dsc->copy), + dsc->deallocate ? "DEALLOC" : ""); + break; + } + case MACH_MSG_OOL_PORTS_DESCRIPTOR : { + mach_msg_ool_ports_descriptor_t *dsc; + + dsc = (mach_msg_ool_ports_descriptor_t *) &saddr->ool_ports; + + kprintf(" OOL_PORTS addr = %p count = 0x%x ", + dsc->address, dsc->count); + kprintf("disp = "); + ipc_print_type_name64(dsc->disposition); + kprintf(" copy = %s %s\n", + mm_copy_options_string64(dsc->copy), + dsc->deallocate ? "DEALLOC" : ""); + break; + } + + default: { + kprintf(" UNKNOWN DESCRIPTOR 0x%x\n", type); + break; + } + } + } +} + +#define DEBUG_IPC_KMSG_PRINT(kmsg,string) \ + if (DEBUG_KPRINT_SYSCALL_PREDICATE(DEBUG_KPRINT_SYSCALL_IPC_MASK)) { \ + ipc_kmsg_print64(kmsg, string); \ + } +#define DEBUG_IPC_MSG_BODY_PRINT(body,size) \ + if (DEBUG_KPRINT_SYSCALL_PREDICATE(DEBUG_KPRINT_SYSCALL_IPC_MASK)) { \ + ipc_msg_body_print64(body,size);\ + } +#else /* !DEBUG_MSGS_K64 */ +#define DEBUG_IPC_KMSG_PRINT(kmsg,string) +#define DEBUG_IPC_MSG_BODY_PRINT(body,size) +#endif /* !DEBUG_MSGS_K64 */ extern vm_map_t ipc_kernel_copy_map; extern vm_size_t ipc_kmsg_max_vm_space; @@ -136,8 +499,8 @@ extern vm_size_t msg_ool_size_small; #define OTHER_OOL_PORTS_DESCRIPTOR mach_msg_ool_ports_descriptor64_t #endif -#define DESC_SIZE_ADJUSTMENT (sizeof(OTHER_OOL_DESCRIPTOR) - \ - sizeof(mach_msg_ool_descriptor_t)) +#define DESC_SIZE_ADJUSTMENT ((mach_msg_size_t)(sizeof(mach_msg_ool_descriptor64_t) - \ + sizeof(mach_msg_ool_descriptor32_t))) /* scatter list macros */ @@ -214,12 +577,11 @@ ipc_kmsg_alloc( mach_msg_size_t max_expanded_size; ipc_kmsg_t kmsg; -#if !defined(__LP64__) /* * LP64support - * Pad the allocation in case we need to expand the * message descrptors for user spaces with pointers larger than - * the kernel's own. We don't know how many descriptors + * the kernel's own, or vice versa. We don't know how many descriptors * there are yet, so just assume the whole body could be * descriptors (if there could be any at all). * @@ -228,17 +590,16 @@ ipc_kmsg_alloc( * forward as we process them than it is to push all the * data backwards. */ - mach_msg_size_t size = msg_and_trailer_size - MAX_TRAILER_SIZE; if (size > sizeof(mach_msg_base_t)) { - mach_msg_size_t max_desc = ((size - sizeof(mach_msg_base_t)) / - sizeof(mach_msg_ool_descriptor_t)) * - DESC_SIZE_ADJUSTMENT; + mach_msg_size_t max_desc = (mach_msg_size_t)(((size - sizeof(mach_msg_base_t)) / + sizeof(mach_msg_ool_descriptor32_t)) * + DESC_SIZE_ADJUSTMENT); if (msg_and_trailer_size >= MACH_MSG_SIZE_MAX - max_desc) return IKM_NULL; + max_expanded_size = msg_and_trailer_size + max_desc; } else -#endif max_expanded_size = msg_and_trailer_size; if (max_expanded_size > ikm_less_overhead(MACH_MSG_SIZE_MAX)) @@ -516,14 +877,12 @@ ipc_kmsg_clean_body( mach_msg_type_number_t number, mach_msg_descriptor_t *saddr) { - mach_msg_descriptor_t *eaddr; + mach_msg_type_number_t i; if ( number == 0 ) return; - eaddr = saddr + number; - - for ( ; saddr < eaddr; saddr++ ) { + for (i = 0 ; i < number; i++, saddr++ ) { switch (saddr->type.type) { @@ -544,7 +903,7 @@ ipc_kmsg_clean_body( case MACH_MSG_OOL_DESCRIPTOR : { mach_msg_ool_descriptor_t *dsc; - dsc = &saddr->out_of_line; + dsc = (mach_msg_ool_descriptor_t *)&saddr->out_of_line; /* * Destroy memory carried in the message @@ -561,7 +920,7 @@ ipc_kmsg_clean_body( mach_msg_type_number_t j; mach_msg_ool_ports_descriptor_t *dsc; - dsc = &saddr->ool_ports; + dsc = (mach_msg_ool_ports_descriptor_t *)&saddr->ool_ports; objects = (ipc_object_t *) dsc->address; if (dsc->count == 0) { @@ -586,7 +945,7 @@ ipc_kmsg_clean_body( assert(dsc->count != 0); kfree(dsc->address, - (vm_size_t) dsc->count * sizeof(mach_port_name_t)); + (vm_size_t) dsc->count * sizeof(mach_port_t)); break; } default : { @@ -739,26 +1098,76 @@ ipc_kmsg_get( mach_msg_size_t msg_and_trailer_size; ipc_kmsg_t kmsg; mach_msg_max_trailer_t *trailer; + mach_msg_legacy_base_t legacy_base; + mach_msg_size_t len_copied; + legacy_base.body.msgh_descriptor_count = 0; - if ((size < sizeof(mach_msg_header_t)) || (size & 3)) + if ((size < sizeof(mach_msg_legacy_header_t)) || (size & 3)) return MACH_SEND_MSG_TOO_SMALL; if (size > MACH_MSG_SIZE_MAX - MAX_TRAILER_SIZE) return MACH_SEND_TOO_LARGE; - msg_and_trailer_size = size + MAX_TRAILER_SIZE; + if(size == sizeof(mach_msg_legacy_header_t)) + len_copied = sizeof(mach_msg_legacy_header_t); + else + len_copied = sizeof(mach_msg_legacy_base_t); - kmsg = ipc_kmsg_alloc(msg_and_trailer_size); + if (copyinmsg(msg_addr, (char *)&legacy_base, len_copied)) + return MACH_SEND_INVALID_DATA; + + msg_addr += sizeof(legacy_base.header); +#if defined(__LP64__) + size += LEGACY_HEADER_SIZE_DELTA; +#endif + if (DEBUG_KPRINT_SYSCALL_PREDICATE(DEBUG_KPRINT_SYSCALL_IPC_MASK)) { + unsigned int j; + for (j=0; jikm_header, size)) { + kmsg->ikm_header->msgh_size = size; + kmsg->ikm_header->msgh_bits = legacy_base.header.msgh_bits; + kmsg->ikm_header->msgh_remote_port = CAST_MACH_NAME_TO_PORT(legacy_base.header.msgh_remote_port); + kmsg->ikm_header->msgh_local_port = CAST_MACH_NAME_TO_PORT(legacy_base.header.msgh_local_port); + kmsg->ikm_header->msgh_reserved = legacy_base.header.msgh_reserved; + kmsg->ikm_header->msgh_id = legacy_base.header.msgh_id; + + DEBUG_KPRINT_SYSCALL_IPC("ipc_kmsg_get header:\n" + " size: 0x%.8x\n" + " bits: 0x%.8x\n" + " remote_port: %p\n" + " local_port: %p\n" + " reserved: 0x%.8x\n" + " id: %.8d\n", + kmsg->ikm_header->msgh_size, + kmsg->ikm_header->msgh_bits, + kmsg->ikm_header->msgh_remote_port, + kmsg->ikm_header->msgh_local_port, + kmsg->ikm_header->msgh_reserved, + kmsg->ikm_header->msgh_id); + + if (copyinmsg(msg_addr, (char *)(kmsg->ikm_header + 1), size - (mach_msg_size_t)sizeof(mach_msg_header_t))) { ipc_kmsg_free(kmsg); return MACH_SEND_INVALID_DATA; } - kmsg->ikm_header->msgh_size = size; + if (DEBUG_KPRINT_SYSCALL_PREDICATE(DEBUG_KPRINT_SYSCALL_IPC_MASK)) + { + kprintf("body: size: %lu\n", (size - sizeof(mach_msg_header_t))); + uint32_t i; + for(i=0;i*4 < (size - sizeof(mach_msg_header_t));i++) + { + kprintf("%.4x\n",((uint32_t *)(kmsg->ikm_header + 1))[i]); + } + } + DEBUG_IPC_KMSG_PRINT(kmsg, "ipc_kmsg_get()"); /* * I reserve for the trailer the largest space (MAX_TRAILER_SIZE) @@ -771,7 +1180,7 @@ ipc_kmsg_get( trailer->msgh_audit = current_thread()->task->audit_token; trailer->msgh_trailer_type = MACH_MSG_TRAILER_FORMAT_0; trailer->msgh_trailer_size = MACH_MSG_TRAILER_MINIMUM_SIZE; - + #ifdef ppc if(trcWork.traceMask) dbgTrace(0x1100, (unsigned int)kmsg->ikm_header->msgh_id, (unsigned int)kmsg->ikm_header->msgh_remote_port, @@ -787,7 +1196,7 @@ ipc_kmsg_get( } else trailer->msgh_labels.sender = 0; #else - trailer->msgh_labels.sender = 0; + trailer->msgh_labels.sender = 0; #endif *kmsgp = kmsg; @@ -834,6 +1243,9 @@ ipc_kmsg_get_from_kernel( * clients. These are set up for those kernel clients * which cannot afford to wait. */ +#ifndef __LP64__ + /* LP64todo - does the prealloc kmsg need ikm_header padding? + */ if (IP_PREALLOC(dest_port)) { ip_lock(dest_port); if (!ip_active(dest_port)) { @@ -852,7 +1264,10 @@ ipc_kmsg_get_from_kernel( } ikm_prealloc_set_inuse(kmsg, dest_port); ip_unlock(dest_port); - } else { + } + else +#endif /* !__LP64__ */ + { kmsg = ipc_kmsg_alloc(msg_and_trailer_size); if (kmsg == IKM_NULL) return MACH_SEND_NO_BUFFER; @@ -908,6 +1323,8 @@ ipc_kmsg_send( mach_msg_timeout_t send_timeout) { ipc_port_t port; + mach_msg_return_t error = MACH_MSG_SUCCESS; + spl_t s; port = (ipc_port_t) kmsg->ikm_header->msgh_remote_port; assert(IP_VALID(port)); @@ -975,11 +1392,25 @@ ipc_kmsg_send( /* * We have a valid message and a valid reference on the port. - * we can unlock the port and call mqueue_send() on it's message - * queue. + * we can unlock the port and call mqueue_send() on its message + * queue. Lock message queue while port is locked. */ + s = splsched(); + imq_lock(&port->ip_messages); ip_unlock(port); - return (ipc_mqueue_send(&port->ip_messages, kmsg, option, send_timeout)); + error = ipc_mqueue_send(&port->ip_messages, kmsg, option, + send_timeout, s); + + /* + * If the port has been destroyed while we wait, treat the message + * as a successful delivery (like we do for an inactive port). + */ + if (error == MACH_SEND_INVALID_DEST) { + kmsg->ikm_header->msgh_remote_port = MACH_PORT_NULL; + ipc_kmsg_destroy(kmsg); + return MACH_MSG_SUCCESS; + } + return error; } /* @@ -1004,6 +1435,56 @@ ipc_kmsg_put( { mach_msg_return_t mr; + DEBUG_IPC_KMSG_PRINT(kmsg, "ipc_kmsg_put()"); + + + DEBUG_KPRINT_SYSCALL_IPC("ipc_kmsg_put header:\n" + " size: 0x%.8x\n" + " bits: 0x%.8x\n" + " remote_port: %p\n" + " local_port: %p\n" + " reserved: 0x%.8x\n" + " id: %.8d\n", + kmsg->ikm_header->msgh_size, + kmsg->ikm_header->msgh_bits, + kmsg->ikm_header->msgh_remote_port, + kmsg->ikm_header->msgh_local_port, + kmsg->ikm_header->msgh_reserved, + kmsg->ikm_header->msgh_id); + +#if defined(__LP64__) + if (current_task() != kernel_task) { /* don't if receiver expects fully-cooked in-kernel msg; ux_exception */ + mach_msg_legacy_header_t *legacy_header = + (mach_msg_legacy_header_t *)((vm_offset_t)(kmsg->ikm_header) + LEGACY_HEADER_SIZE_DELTA); + + mach_msg_bits_t bits = kmsg->ikm_header->msgh_bits; + mach_msg_size_t msg_size = kmsg->ikm_header->msgh_size; + mach_port_name_t remote_port = CAST_MACH_PORT_TO_NAME(kmsg->ikm_header->msgh_remote_port); + mach_port_name_t local_port = CAST_MACH_PORT_TO_NAME(kmsg->ikm_header->msgh_local_port); + mach_msg_size_t reserved = kmsg->ikm_header->msgh_reserved; + mach_msg_id_t id = kmsg->ikm_header->msgh_id; + + legacy_header->msgh_id = id; + legacy_header->msgh_reserved = reserved; + legacy_header->msgh_local_port = local_port; + legacy_header->msgh_remote_port = remote_port; + legacy_header->msgh_size = msg_size - LEGACY_HEADER_SIZE_DELTA; + legacy_header->msgh_bits = bits; + + size -= LEGACY_HEADER_SIZE_DELTA; + kmsg->ikm_header = (mach_msg_header_t *)legacy_header; + } +#endif + + if (DEBUG_KPRINT_SYSCALL_PREDICATE(DEBUG_KPRINT_SYSCALL_IPC_MASK)) { + kprintf("ipc_kmsg_put header+body: %d\n", (size)); + uint32_t i; + for(i=0;i*4 < size;i++) + { + kprintf("%.4x\n",((uint32_t *)kmsg->ikm_header)[i]); + } + kprintf("type: %d\n", ((mach_msg_type_descriptor_t *)(((mach_msg_base_t *)kmsg->ikm_header)+1))->type); + } if (copyoutmsg((const char *) kmsg->ikm_header, msg_addr, size)) mr = MACH_RCV_INVALID_DATA; else @@ -1078,8 +1559,8 @@ ipc_kmsg_copyin_header( mach_port_name_t notify) { mach_msg_bits_t mbits = msg->msgh_bits & MACH_MSGH_BITS_USER; - mach_port_name_t dest_name = (mach_port_name_t)msg->msgh_remote_port; - mach_port_name_t reply_name = (mach_port_name_t)msg->msgh_local_port; + mach_port_name_t dest_name = CAST_MACH_PORT_TO_NAME(msg->msgh_remote_port); + mach_port_name_t reply_name = CAST_MACH_PORT_TO_NAME(msg->msgh_local_port); kern_return_t kr; mach_msg_type_name_t dest_type = MACH_MSGH_BITS_REMOTE(mbits); @@ -1321,7 +1802,7 @@ ipc_kmsg_copyin_header( if (IE_BITS_TYPE(entry->ie_bits) == MACH_PORT_TYPE_NONE) ipc_entry_dealloc(space, dest_name, entry); - reply_port = (ipc_object_t) reply_name; + reply_port = (ipc_object_t)CAST_MACH_NAME_TO_PORT(reply_name); reply_soright = IP_NULL; } else { ipc_entry_t dest_entry, reply_entry; @@ -1446,6 +1927,314 @@ ipc_kmsg_copyin_header( return MACH_SEND_INVALID_DEST; } +mach_msg_descriptor_t *ipc_kmsg_copyin_port_descriptor( + volatile mach_msg_port_descriptor_t *dsc, + mach_msg_legacy_port_descriptor_t *user_dsc, + ipc_space_t space, + ipc_object_t dest, + ipc_kmsg_t kmsg, + mach_msg_return_t *mr); + +void ipc_print_type_name( + int type_name); +mach_msg_descriptor_t * +ipc_kmsg_copyin_port_descriptor( + volatile mach_msg_port_descriptor_t *dsc, + mach_msg_legacy_port_descriptor_t *user_dsc_in, + ipc_space_t space, + ipc_object_t dest, + ipc_kmsg_t kmsg, + mach_msg_return_t *mr) +{ + volatile mach_msg_legacy_port_descriptor_t *user_dsc = user_dsc_in; + mach_msg_type_name_t user_disp; + mach_msg_type_name_t result_disp; + mach_port_name_t name; + ipc_object_t object; + + user_disp = user_dsc->disposition; + result_disp = ipc_object_copyin_type(user_disp); + + name = (mach_port_name_t)user_dsc->name; + if (MACH_PORT_VALID(name)) { + + kern_return_t kr = ipc_object_copyin(space, name, user_disp, &object); + if (kr != KERN_SUCCESS) { + *mr = MACH_SEND_INVALID_RIGHT; + return NULL; + } + + if ((result_disp == MACH_MSG_TYPE_PORT_RECEIVE) && + ipc_port_check_circularity((ipc_port_t) object, + (ipc_port_t) dest)) { + kmsg->ikm_header->msgh_bits |= MACH_MSGH_BITS_CIRCULAR; + } + dsc->name = (ipc_port_t) object; + } else { + dsc->name = CAST_MACH_NAME_TO_PORT(name); + } + dsc->disposition = result_disp; + dsc->type = MACH_MSG_PORT_DESCRIPTOR; + + dsc->pad_end = 0; // debug, unnecessary + + return (mach_msg_descriptor_t *)(user_dsc_in+1); +} + +mach_msg_descriptor_t * ipc_kmsg_copyin_ool_descriptor( + mach_msg_ool_descriptor_t *dsc, + mach_msg_descriptor_t *user_dsc, + int is_64bit, + vm_offset_t *paddr, + vm_map_copy_t *copy, + vm_size_t *space_needed, + vm_map_t map, + mach_msg_return_t *mr); +mach_msg_descriptor_t * +ipc_kmsg_copyin_ool_descriptor( + mach_msg_ool_descriptor_t *dsc, + mach_msg_descriptor_t *user_dsc, + int is_64bit, + vm_offset_t *paddr, + vm_map_copy_t *copy, + vm_size_t *space_needed, + vm_map_t map, + mach_msg_return_t *mr) +{ + vm_size_t length; + boolean_t dealloc; + mach_msg_copy_options_t copy_options; + mach_vm_offset_t addr; + mach_msg_descriptor_type_t dsc_type; + + if (is_64bit) { + mach_msg_ool_descriptor64_t *user_ool_dsc = (typeof(user_ool_dsc))user_dsc; + + addr = (mach_vm_offset_t) user_ool_dsc->address; + length = user_ool_dsc->size; + dealloc = user_ool_dsc->deallocate; + copy_options = user_ool_dsc->copy; + dsc_type = user_ool_dsc->type; + + user_dsc = (typeof(user_dsc))(user_ool_dsc+1); + } else { + mach_msg_ool_descriptor32_t *user_ool_dsc = (typeof(user_ool_dsc))user_dsc; + + addr = CAST_USER_ADDR_T(user_ool_dsc->address); + dealloc = user_ool_dsc->deallocate; + copy_options = user_ool_dsc->copy; + dsc_type = user_ool_dsc->type; + length = user_ool_dsc->size; + + user_dsc = (typeof(user_dsc))(user_ool_dsc+1); + } + + dsc->size = (mach_msg_size_t)length; + dsc->deallocate = dealloc; + dsc->copy = copy_options; + dsc->type = dsc_type; + + if (length == 0) { + dsc->address = NULL; + } else if ((length >= MSG_OOL_SIZE_SMALL) && + (copy_options == MACH_MSG_PHYSICAL_COPY) && !dealloc) { + + /* + * If the request is a physical copy and the source + * is not being deallocated, then allocate space + * in the kernel's pageable ipc copy map and copy + * the data in. The semantics guarantee that the + * data will have been physically copied before + * the send operation terminates. Thus if the data + * is not being deallocated, we must be prepared + * to page if the region is sufficiently large. + */ + if (copyin(addr, (char *)*paddr, length)) { + *mr = MACH_SEND_INVALID_MEMORY; + return NULL; + } + + /* + * The kernel ipc copy map is marked no_zero_fill. + * If the transfer is not a page multiple, we need + * to zero fill the balance. + */ + if (!page_aligned(length)) { + (void) memset((void *) (*paddr + length), 0, + round_page(length) - length); + } + if (vm_map_copyin(ipc_kernel_copy_map, (vm_map_address_t)*paddr, + (vm_map_size_t)length, TRUE, copy) != KERN_SUCCESS) { + *mr = MACH_MSG_VM_KERNEL; + return NULL; + } + dsc->address = (void *)*copy; + *paddr += round_page(length); + *space_needed -= round_page(length); + } else { + + /* + * Make a vm_map_copy_t of the of the data. If the + * data is small, this will do an optimized physical + * copy. Otherwise, it will do a virtual copy. + * + * NOTE: A virtual copy is OK if the original is being + * deallocted, even if a physical copy was requested. + */ + kern_return_t kr = vm_map_copyin(map, addr, + (vm_map_size_t)length, dealloc, copy); + if (kr != KERN_SUCCESS) { + *mr = (kr == KERN_RESOURCE_SHORTAGE) ? + MACH_MSG_VM_KERNEL : + MACH_SEND_INVALID_MEMORY; + return NULL; + } + dsc->address = (void *)*copy; + } + return user_dsc; +} + +mach_msg_descriptor_t * ipc_kmsg_copyin_ool_ports_descriptor( + mach_msg_ool_ports_descriptor_t *dsc, + mach_msg_descriptor_t *user_dsc, + int is_64bit, + vm_map_t map, + ipc_space_t space, + ipc_object_t dest, + ipc_kmsg_t kmsg, + mach_msg_return_t *mr); +mach_msg_descriptor_t * +ipc_kmsg_copyin_ool_ports_descriptor( + mach_msg_ool_ports_descriptor_t *dsc, + mach_msg_descriptor_t *user_dsc, + int is_64bit, + vm_map_t map, + ipc_space_t space, + ipc_object_t dest, + ipc_kmsg_t kmsg, + mach_msg_return_t *mr) +{ + void *data; + ipc_object_t *objects; + unsigned int i; + mach_vm_offset_t addr; + mach_msg_type_name_t user_disp; + mach_msg_type_name_t result_disp; + mach_msg_type_number_t count; + mach_msg_copy_options_t copy_option; + boolean_t deallocate; + mach_msg_descriptor_type_t type; + vm_size_t ports_length, names_length; + + if (is_64bit) { + mach_msg_ool_ports_descriptor64_t *user_ool_dsc = (typeof(user_ool_dsc))user_dsc; + + addr = (mach_vm_offset_t)user_ool_dsc->address; + count = user_ool_dsc->count; + deallocate = user_ool_dsc->deallocate; + copy_option = user_ool_dsc->copy; + user_disp = user_ool_dsc->disposition; + type = user_ool_dsc->type; + + user_dsc = (typeof(user_dsc))(user_ool_dsc+1); + } else { + mach_msg_ool_ports_descriptor32_t *user_ool_dsc = (typeof(user_ool_dsc))user_dsc; + + addr = CAST_USER_ADDR_T(user_ool_dsc->address); + count = user_ool_dsc->count; + deallocate = user_ool_dsc->deallocate; + copy_option = user_ool_dsc->copy; + user_disp = user_ool_dsc->disposition; + type = user_ool_dsc->type; + + user_dsc = (typeof(user_dsc))(user_ool_dsc+1); + } + + dsc->deallocate = deallocate; + dsc->copy = copy_option; + dsc->type = type; + dsc->count = count; + dsc->address = NULL; /* for now */ + + result_disp = ipc_object_copyin_type(user_disp); + dsc->disposition = result_disp; + + if (count > (INT_MAX / sizeof(mach_port_t))) { + *mr = MACH_SEND_TOO_LARGE; + return NULL; + } + + /* calculate length of data in bytes, rounding up */ + ports_length = count * sizeof(mach_port_t); + names_length = count * sizeof(mach_port_name_t); + + if (ports_length == 0) { + return user_dsc; + } + + data = kalloc(ports_length); + + if (data == NULL) { + *mr = MACH_SEND_NO_BUFFER; + return NULL; + } + +#ifdef __LP64__ + mach_port_name_t *names = &((mach_port_name_t *)data)[count]; +#else + mach_port_name_t *names = ((mach_port_name_t *)data); +#endif + + if (copyinmap(map, addr, names, names_length) != KERN_SUCCESS) { + kfree(data, ports_length); + *mr = MACH_SEND_INVALID_MEMORY; + return NULL; + } + + if (deallocate) { + (void) mach_vm_deallocate(map, addr, (mach_vm_size_t)ports_length); + } + + objects = (ipc_object_t *) data; + dsc->address = data; + + for ( i = 0; i < count; i++) { + mach_port_name_t name = names[i]; + ipc_object_t object; + + if (!MACH_PORT_VALID(name)) { + objects[i] = (ipc_object_t)CAST_MACH_NAME_TO_PORT(name); + continue; + } + + kern_return_t kr = ipc_object_copyin(space, name, user_disp, &object); + + if (kr != KERN_SUCCESS) { + unsigned int j; + + for(j = 0; j < i; j++) { + object = objects[j]; + if (IPC_OBJECT_VALID(object)) + ipc_object_destroy(object, result_disp); + } + kfree(data, ports_length); + dsc->address = NULL; + *mr = MACH_SEND_INVALID_RIGHT; + return NULL; + } + + if ((dsc->disposition == MACH_MSG_TYPE_PORT_RECEIVE) && + ipc_port_check_circularity( + (ipc_port_t) object, + (ipc_port_t) dest)) + kmsg->ikm_header->msgh_bits |= MACH_MSGH_BITS_CIRCULAR; + + objects[i] = object; + } + + return user_dsc; +} + /* * Routine: ipc_kmsg_copyin_body * Purpose: @@ -1468,8 +2257,6 @@ ipc_kmsg_copyin_header( * MACH_MSG_INVALID_RT_DESCRIPTOR Dealloc and RT are incompatible */ -#define DESC_COUNT_SMALL 64 - mach_msg_return_t ipc_kmsg_copyin_body( ipc_kmsg_t kmsg, @@ -1479,18 +2266,18 @@ ipc_kmsg_copyin_body( ipc_object_t dest; mach_msg_body_t *body; mach_msg_descriptor_t *daddr, *naddr; + mach_msg_descriptor_t *user_addr, *kern_addr; mach_msg_type_number_t dsc_count; - boolean_t differs = MAP_SIZE_DIFFERS(map); + boolean_t is_task_64bit = (map->max_offset > VM_MAX_ADDRESS); boolean_t complex = FALSE; vm_size_t space_needed = 0; - vm_size_t desc_size_space[DESC_COUNT_SMALL]; - vm_size_t *user_desc_sizes = NULL; vm_offset_t paddr = 0; vm_map_copy_t copy = VM_MAP_COPY_NULL; - kern_return_t kr; mach_msg_type_number_t i; mach_msg_return_t mr = MACH_MSG_SUCCESS; - + + vm_size_t descriptor_size = 0; + /* * Determine if the target is a kernel port. */ @@ -1502,15 +2289,6 @@ ipc_kmsg_copyin_body( if (dsc_count == 0) return MACH_MSG_SUCCESS; - if (differs) { - user_desc_sizes = (dsc_count <= DESC_COUNT_SMALL) ? - &desc_size_space : kalloc(dsc_count * sizeof(vm_size_t)); - if (user_desc_sizes == NULL) { - ipc_kmsg_clean_partial(kmsg, 0, NULL, 0, 0); - return KERN_RESOURCE_SHORTAGE; - } - } - /* * Make an initial pass to determine kernal VM space requirements for * physical copies and possible contraction of the descriptors from @@ -1521,21 +2299,22 @@ ipc_kmsg_copyin_body( daddr = naddr; /* make sure the descriptor fits in the message */ - if (differs) { + if (is_task_64bit) { switch (daddr->type.type) { case MACH_MSG_OOL_DESCRIPTOR: case MACH_MSG_OOL_VOLATILE_DESCRIPTOR: case MACH_MSG_OOL_PORTS_DESCRIPTOR: - user_desc_sizes[i] = sizeof(OTHER_OOL_DESCRIPTOR); - break; + descriptor_size += 16; + naddr = (typeof(naddr))((vm_offset_t)daddr + 16); + break; default: - user_desc_sizes[i] = sizeof(*daddr); - break; + descriptor_size += 12; + naddr = (typeof(naddr))((vm_offset_t)daddr + 12); + break; } - naddr = (mach_msg_descriptor_t *) - ((vm_offset_t)daddr + user_desc_sizes[i]); } else { - naddr = daddr + 1; + descriptor_size += 12; + naddr = (typeof(naddr))((vm_offset_t)daddr + 12); } if (naddr > (mach_msg_descriptor_t *) @@ -1550,8 +2329,8 @@ ipc_kmsg_copyin_body( case MACH_MSG_OOL_DESCRIPTOR: case MACH_MSG_OOL_VOLATILE_DESCRIPTOR: - size = (differs) ? - ((OTHER_OOL_DESCRIPTOR *)daddr)->size : + size = (is_task_64bit) ? + ((mach_msg_ool_descriptor64_t *)daddr)->size : daddr->out_of_line.size; if (daddr->out_of_line.copy != MACH_MSG_PHYSICAL_COPY && @@ -1592,308 +2371,70 @@ ipc_kmsg_copyin_body( * space. */ if (space_needed) { - if (vm_allocate(ipc_kernel_copy_map, &paddr, space_needed, VM_FLAGS_ANYWHERE) != - KERN_SUCCESS) { - ipc_kmsg_clean_partial(kmsg, 0, NULL, 0, 0); - mr = MACH_MSG_VM_KERNEL; - goto out; - } + if (vm_allocate(ipc_kernel_copy_map, &paddr, space_needed, + VM_FLAGS_ANYWHERE) != KERN_SUCCESS) { + ipc_kmsg_clean_partial(kmsg, 0, NULL, 0, 0); + mr = MACH_MSG_VM_KERNEL; + goto out; + } } - /* - * handle the OOL regions and port descriptors. - * We process them in reverse order starting with the last one - * scanned above. That way, we can compact them up against - * the message body (if the user-descriptor size is larger than - * the kernel representation). - */ - naddr -= 1; - do { - - switch (daddr->type.type) { - - /* port descriptors are the same size everywhere, how nice */ - case MACH_MSG_PORT_DESCRIPTOR: { - mach_msg_type_name_t user_disp; - mach_msg_type_name_t result_disp; - mach_port_name_t name; - ipc_object_t object; - volatile mach_msg_port_descriptor_t *dsc; - volatile mach_msg_port_descriptor_t *user_dsc; - - user_dsc = &daddr->port; - dsc = &naddr->port; - - user_disp = user_dsc->disposition; - result_disp = ipc_object_copyin_type(user_disp); - - name = (mach_port_name_t)user_dsc->name; - if (MACH_PORT_VALID(name)) { - - kr = ipc_object_copyin(space, name, user_disp, &object); - if (kr != KERN_SUCCESS) { - mr = MACH_SEND_INVALID_RIGHT; - break; - } - - if ((result_disp == MACH_MSG_TYPE_PORT_RECEIVE) && - ipc_port_check_circularity((ipc_port_t) object, - (ipc_port_t) dest)) { - kmsg->ikm_header->msgh_bits |= MACH_MSGH_BITS_CIRCULAR; - } - dsc->name = (ipc_port_t) object; - } else { - dsc->name = (mach_port_t)name; - } - dsc->disposition = result_disp; - dsc->type = MACH_MSG_PORT_DESCRIPTOR; - complex = TRUE; - break; - } - - /* out of line descriptors differ in size between 32 and 64 bit processes */ - case MACH_MSG_OOL_VOLATILE_DESCRIPTOR: - case MACH_MSG_OOL_DESCRIPTOR: { - vm_size_t length; - boolean_t dealloc; - mach_msg_copy_options_t copy_options; - mach_vm_offset_t addr; - mach_msg_descriptor_type_t dsc_type; - - volatile mach_msg_ool_descriptor_t *dsc; - - if (differs) { - volatile OTHER_OOL_DESCRIPTOR *user_dsc; - - user_dsc = (OTHER_OOL_DESCRIPTOR *)&daddr->out_of_line; - addr = (mach_vm_offset_t) user_dsc->address; - length = user_dsc->size; - dealloc = user_dsc->deallocate; - copy_options = user_dsc->copy; - dsc_type = user_dsc->type; - } else { - volatile mach_msg_ool_descriptor_t *user_dsc; - - user_dsc = &daddr->out_of_line; - addr = CAST_USER_ADDR_T(user_dsc->address); - dealloc = user_dsc->deallocate; - copy_options = user_dsc->copy; - dsc_type = user_dsc->type; - length = user_dsc->size; - } - - dsc = &naddr->out_of_line; - dsc->size = length; - dsc->deallocate = dealloc; - dsc->copy = copy_options; - dsc->type = dsc_type; - - if (length == 0) { - dsc->address = NULL; - } else if ((length >= MSG_OOL_SIZE_SMALL) && - (copy_options == MACH_MSG_PHYSICAL_COPY) && !dealloc) { - - /* - * If the request is a physical copy and the source - * is not being deallocated, then allocate space - * in the kernel's pageable ipc copy map and copy - * the data in. The semantics guarantee that the - * data will have been physically copied before - * the send operation terminates. Thus if the data - * is not being deallocated, we must be prepared - * to page if the region is sufficiently large. - */ - if (copyin(addr, (char *) paddr, length)) { - mr = MACH_SEND_INVALID_MEMORY; - break; - } - - /* - * The kernel ipc copy map is marked no_zero_fill. - * If the transfer is not a page multiple, we need - * to zero fill the balance. - */ - if (!page_aligned(length)) { - (void) memset((void *) (paddr + length), 0, - round_page(length) - length); - } - if (vm_map_copyin(ipc_kernel_copy_map, (vm_map_address_t)paddr, - (vm_map_size_t)length, TRUE, ©) != KERN_SUCCESS) { - mr = MACH_MSG_VM_KERNEL; - break; - } - dsc->address = (void *) copy; - paddr += round_page(length); - space_needed -= round_page(length); - } else { - - /* - * Make a vm_map_copy_t of the of the data. If the - * data is small, this will do an optimized physical - * copy. Otherwise, it will do a virtual copy. - * - * NOTE: A virtual copy is OK if the original is being - * deallocted, even if a physical copy was requested. - */ - kr = vm_map_copyin(map, addr, - (vm_map_size_t)length, dealloc, ©); - if (kr != KERN_SUCCESS) { - mr = (kr == KERN_RESOURCE_SHORTAGE) ? - MACH_MSG_VM_KERNEL : - MACH_SEND_INVALID_MEMORY; - break; - } - dsc->address = (void *) copy; - } - complex = TRUE; - break; - } - case MACH_MSG_OOL_PORTS_DESCRIPTOR: { - vm_size_t length; - void *data; - ipc_object_t *objects; - unsigned int j; - mach_vm_offset_t addr; - mach_msg_type_name_t user_disp; - mach_msg_type_name_t result_disp; - mach_msg_type_number_t count; - mach_msg_copy_options_t copy_option; - boolean_t deallocate; - - volatile mach_msg_ool_ports_descriptor_t *dsc; - - if (differs) { - volatile OTHER_OOL_PORTS_DESCRIPTOR *user_dsc; - - user_dsc = (OTHER_OOL_PORTS_DESCRIPTOR *)&daddr->ool_ports; - addr = (mach_vm_offset_t)user_dsc->address; - count = user_dsc->count; - deallocate = user_dsc->deallocate; - copy_option = user_dsc->copy; - user_disp = user_dsc->disposition; - } else { - volatile mach_msg_ool_ports_descriptor_t *user_dsc; - - user_dsc = &daddr->ool_ports; - addr = CAST_USER_ADDR_T(user_dsc->address); - count = user_dsc->count; - deallocate = user_dsc->deallocate; - copy_option = user_dsc->copy; - user_disp = user_dsc->disposition; - } - - dsc = &naddr->ool_ports; - dsc->deallocate = deallocate; - dsc->copy = copy_option; - dsc->type = daddr->type.type; - dsc->count = count; - dsc->address = NULL; /* for now */ - - result_disp = ipc_object_copyin_type(user_disp); - dsc->disposition = result_disp; - - /* calculate length of data in bytes, rounding up */ - length = count * sizeof(mach_port_name_t); - - if (length == 0) { - complex = TRUE; - break; - } - - data = kalloc(length); - - if (data == NULL) { - mr = MACH_SEND_NO_BUFFER; - break; - } - - if (copyinmap(map, addr, data, length) != KERN_SUCCESS) { - kfree(data, length); - mr = MACH_SEND_INVALID_MEMORY; - break; - } - - if (deallocate) { - (void) mach_vm_deallocate(map, addr, (mach_vm_size_t)length); - } - - objects = (ipc_object_t *) data; - dsc->address = data; - - for ( j = 0; j < count; j++) { - mach_port_name_t port = (mach_port_name_t) objects[j]; - ipc_object_t object; - - if (!MACH_PORT_VALID(port)) - continue; - - kr = ipc_object_copyin(space, port, user_disp, &object); - - if (kr != KERN_SUCCESS) { - unsigned int k; + /* user_addr = just after base as it was copied in */ + user_addr = (mach_msg_descriptor_t *)((vm_offset_t)kmsg->ikm_header + sizeof(mach_msg_base_t)); + /* Shift the mach_msg_base_t down to make for dsc_count*16bytes of descriptors */ + if(descriptor_size != 16*dsc_count) { + vm_offset_t dsc_adjust = 16*dsc_count - descriptor_size; + memmove((char *)(((vm_offset_t)kmsg->ikm_header) - dsc_adjust), kmsg->ikm_header, sizeof(mach_msg_base_t)); + kmsg->ikm_header = (mach_msg_header_t *)((vm_offset_t)kmsg->ikm_header - dsc_adjust); + /* Update the message size for the larger in-kernel representation */ + kmsg->ikm_header->msgh_size += (mach_msg_size_t)dsc_adjust; + } - for(k = 0; k < j; k++) { - object = objects[k]; - if (IPC_OBJECT_VALID(object)) - ipc_object_destroy(object, result_disp); - } - kfree(data, length); - dsc->address = NULL; - mr = MACH_SEND_INVALID_RIGHT; - break; - } - - if ((dsc->disposition == MACH_MSG_TYPE_PORT_RECEIVE) && - ipc_port_check_circularity( - (ipc_port_t) object, - (ipc_port_t) dest)) - kmsg->ikm_header->msgh_bits |= MACH_MSGH_BITS_CIRCULAR; - - objects[j] = object; - } - - complex = TRUE; - break; - } - default: { - /* - * Invalid descriptor - */ - mr = MACH_SEND_INVALID_TYPE; - break; - } - } - if (MACH_MSG_SUCCESS != mr) { - ipc_kmsg_clean_partial(kmsg, dsc_count - i, - naddr + 1, paddr, space_needed); - goto out; - } + /* kern_addr = just after base after it has been (conditionally) moved */ + kern_addr = (mach_msg_descriptor_t *)((vm_offset_t)kmsg->ikm_header + sizeof(mach_msg_base_t)); + + /* handle the OOL regions and port descriptors. */ + for(i=0;itype.type) { + case MACH_MSG_PORT_DESCRIPTOR: + user_addr = ipc_kmsg_copyin_port_descriptor((mach_msg_port_descriptor_t *)kern_addr, + (mach_msg_legacy_port_descriptor_t *)user_addr, space, dest, kmsg, &mr); + kern_addr++; + complex = TRUE; + break; + case MACH_MSG_OOL_VOLATILE_DESCRIPTOR: + case MACH_MSG_OOL_DESCRIPTOR: + user_addr = ipc_kmsg_copyin_ool_descriptor((mach_msg_ool_descriptor_t *)kern_addr, + user_addr, is_task_64bit, &paddr, ©, &space_needed, map, &mr); + kern_addr++; + complex = TRUE; + break; + case MACH_MSG_OOL_PORTS_DESCRIPTOR: + user_addr = ipc_kmsg_copyin_ool_ports_descriptor((mach_msg_ool_ports_descriptor_t *)kern_addr, + user_addr, is_task_64bit, map, space, dest, kmsg, &mr); + kern_addr++; + complex = TRUE; + break; + default: + /* Invalid descriptor */ + mr = MACH_SEND_INVALID_TYPE; + break; + } - } while (--i > 0 - && - (daddr = (differs) ? (mach_msg_descriptor_t *)((vm_offset_t)(daddr) - - user_desc_sizes[i - 1]) : daddr - 1) - && - naddr--); + if (MACH_MSG_SUCCESS != mr) { + /* clean from start of message descriptors to i */ + ipc_kmsg_clean_partial(kmsg, i, + (mach_msg_descriptor_t *)((mach_msg_base_t *)kmsg->ikm_header + 1), + paddr, space_needed); + goto out; + } + } /* End of loop */ if (!complex) { kmsg->ikm_header->msgh_bits &= ~MACH_MSGH_BITS_COMPLEX; } - - if (differs && naddr != daddr) { - mach_msg_base_t *old_base = (mach_msg_base_t *)kmsg->ikm_header; - mach_msg_base_t *new_base = (mach_msg_base_t *)naddr - 1; - - memmove(new_base, old_base, sizeof(mach_msg_base_t)); - new_base->header.msgh_size -= (vm_offset_t)naddr - (vm_offset_t)daddr; - kmsg->ikm_header = &new_base->header; - } - out: - if (differs && dsc_count > DESC_COUNT_SMALL) - kfree(user_desc_sizes, body->msgh_descriptor_count * sizeof(vm_size_t)); - return mr; } @@ -1936,10 +2477,29 @@ ipc_kmsg_copyin( if (mr != MACH_MSG_SUCCESS) return mr; + DEBUG_KPRINT_SYSCALL_IPC("ipc_kmsg_copyin header:\n%.8x\n%.8x\n%p\n%p\n%.8x\n%.8x\n", + kmsg->ikm_header->msgh_size, + kmsg->ikm_header->msgh_bits, + kmsg->ikm_header->msgh_remote_port, + kmsg->ikm_header->msgh_local_port, + kmsg->ikm_header->msgh_reserved, + kmsg->ikm_header->msgh_id); + if ((kmsg->ikm_header->msgh_bits & MACH_MSGH_BITS_COMPLEX) == 0) return MACH_MSG_SUCCESS; - return( ipc_kmsg_copyin_body( kmsg, space, map) ); + mr = ipc_kmsg_copyin_body( kmsg, space, map); + + if (DEBUG_KPRINT_SYSCALL_PREDICATE(DEBUG_KPRINT_SYSCALL_IPC_MASK)) + { + kprintf("body:\n"); + uint32_t i; + for(i=0;i*4 < (kmsg->ikm_header->msgh_size - sizeof(mach_msg_header_t));i++) + { + kprintf("%.4x\n",((uint32_t *)(kmsg->ikm_header + 1))[i]); + } + } + return mr; } /* @@ -1959,7 +2519,138 @@ ipc_kmsg_copyin( */ void -ipc_kmsg_copyin_from_kernel( +ipc_kmsg_copyin_from_kernel( + ipc_kmsg_t kmsg) +{ + mach_msg_bits_t bits = kmsg->ikm_header->msgh_bits; + mach_msg_type_name_t rname = MACH_MSGH_BITS_REMOTE(bits); + mach_msg_type_name_t lname = MACH_MSGH_BITS_LOCAL(bits); + ipc_object_t remote = (ipc_object_t) kmsg->ikm_header->msgh_remote_port; + ipc_object_t local = (ipc_object_t) kmsg->ikm_header->msgh_local_port; + + /* translate the destination and reply ports */ + + ipc_object_copyin_from_kernel(remote, rname); + if (IO_VALID(local)) + ipc_object_copyin_from_kernel(local, lname); + + /* + * The common case is a complex message with no reply port, + * because that is what the memory_object interface uses. + */ + + if (bits == (MACH_MSGH_BITS_COMPLEX | + MACH_MSGH_BITS(MACH_MSG_TYPE_COPY_SEND, 0))) { + bits = (MACH_MSGH_BITS_COMPLEX | + MACH_MSGH_BITS(MACH_MSG_TYPE_PORT_SEND, 0)); + + kmsg->ikm_header->msgh_bits = bits; + } else { + bits = (MACH_MSGH_BITS_OTHER(bits) | + MACH_MSGH_BITS(ipc_object_copyin_type(rname), + ipc_object_copyin_type(lname))); + + kmsg->ikm_header->msgh_bits = bits; + if ((bits & MACH_MSGH_BITS_COMPLEX) == 0) + return; + } + { + mach_msg_descriptor_t *saddr; + mach_msg_body_t *body; + mach_msg_type_number_t i, count; + + body = (mach_msg_body_t *) (kmsg->ikm_header + 1); + saddr = (mach_msg_descriptor_t *) (body + 1); + count = body->msgh_descriptor_count; + + for (i = 0; i < count; i++, saddr++) { + + switch (saddr->type.type) { + + case MACH_MSG_PORT_DESCRIPTOR: { + mach_msg_type_name_t name; + ipc_object_t object; + mach_msg_port_descriptor_t *dsc; + + dsc = &saddr->port; + + /* this is really the type SEND, SEND_ONCE, etc. */ + name = dsc->disposition; + object = (ipc_object_t) dsc->name; + dsc->disposition = ipc_object_copyin_type(name); + + if (!IO_VALID(object)) { + break; + } + + ipc_object_copyin_from_kernel(object, name); + + /* CDY avoid circularity when the destination is also */ + /* the kernel. This check should be changed into an */ + /* assert when the new kobject model is in place since*/ + /* ports will not be used in kernel to kernel chats */ + + if (((ipc_port_t)remote)->ip_receiver != ipc_space_kernel) { + if ((dsc->disposition == MACH_MSG_TYPE_PORT_RECEIVE) && + ipc_port_check_circularity((ipc_port_t) object, + (ipc_port_t) remote)) { + kmsg->ikm_header->msgh_bits |= + MACH_MSGH_BITS_CIRCULAR; + } + } + break; + } + case MACH_MSG_OOL_VOLATILE_DESCRIPTOR: + case MACH_MSG_OOL_DESCRIPTOR: { + /* + * The sender should supply ready-made memory, i.e. + * a vm_map_copy_t, so we don't need to do anything. + */ + break; + } + case MACH_MSG_OOL_PORTS_DESCRIPTOR: { + ipc_object_t *objects; + unsigned int j; + mach_msg_type_name_t name; + mach_msg_ool_ports_descriptor_t *dsc; + + dsc = (mach_msg_ool_ports_descriptor_t *)&saddr->ool_ports; + + /* this is really the type SEND, SEND_ONCE, etc. */ + name = dsc->disposition; + dsc->disposition = ipc_object_copyin_type(name); + + objects = (ipc_object_t *) dsc->address; + + for ( j = 0; j < dsc->count; j++) { + ipc_object_t object = objects[j]; + + if (!IO_VALID(object)) + continue; + + ipc_object_copyin_from_kernel(object, name); + + if ((dsc->disposition == MACH_MSG_TYPE_PORT_RECEIVE) && + ipc_port_check_circularity( + (ipc_port_t) object, + (ipc_port_t) remote)) + kmsg->ikm_header->msgh_bits |= MACH_MSGH_BITS_CIRCULAR; + } + break; + } + default: { +#if MACH_ASSERT + panic("ipc_kmsg_copyin_from_kernel: bad descriptor"); +#endif /* MACH_ASSERT */ + } + } + } + } +} + +#if IKM_SUPPORT_LEGACY +void +ipc_kmsg_copyin_from_kernel_legacy( ipc_kmsg_t kmsg) { mach_msg_bits_t bits = kmsg->ikm_header->msgh_bits; @@ -1995,28 +2686,42 @@ ipc_kmsg_copyin_from_kernel( return; } { - mach_msg_descriptor_t *saddr, *eaddr; + mach_msg_legacy_descriptor_t *saddr; + mach_msg_descriptor_t *daddr; mach_msg_body_t *body; + mach_msg_type_number_t i, count; body = (mach_msg_body_t *) (kmsg->ikm_header + 1); - saddr = (mach_msg_descriptor_t *) (body + 1); - eaddr = (mach_msg_descriptor_t *) saddr + body->msgh_descriptor_count; - - for ( ; saddr < eaddr; saddr++) { + saddr = (typeof(saddr)) (body + 1); + count = body->msgh_descriptor_count; + + if(count) { + vm_offset_t dsc_adjust = 4*count; + memmove((char *)(((vm_offset_t)kmsg->ikm_header) - dsc_adjust), kmsg->ikm_header, sizeof(mach_msg_base_t)); + kmsg->ikm_header = (mach_msg_header_t *)((vm_offset_t)kmsg->ikm_header - dsc_adjust); + /* Update the message size for the larger in-kernel representation */ + kmsg->ikm_header->msgh_size += dsc_adjust; + } + daddr = (mach_msg_descriptor_t *)((vm_offset_t)kmsg->ikm_header + sizeof(mach_msg_base_t)); + for (i = 0; i < count; i++, saddr++, daddr++) { switch (saddr->type.type) { case MACH_MSG_PORT_DESCRIPTOR: { mach_msg_type_name_t name; ipc_object_t object; - mach_msg_port_descriptor_t *dsc; + mach_msg_legacy_port_descriptor_t *dsc; + mach_msg_port_descriptor_t *dest_dsc; - dsc = &saddr->port; + dsc = (typeof(dsc))&saddr->port; + dest_dsc = &daddr->port; /* this is really the type SEND, SEND_ONCE, etc. */ name = dsc->disposition; - object = (ipc_object_t) dsc->name; - dsc->disposition = ipc_object_copyin_type(name); + object = (ipc_object_t) CAST_MACH_NAME_TO_PORT(dsc->name); + dest_dsc->disposition = ipc_object_copyin_type(name); + dest_dsc->name = (mach_port_t)object; + dest_dsc->type = MACH_MSG_PORT_DESCRIPTOR; if (!IO_VALID(object)) { break; @@ -2030,7 +2735,7 @@ ipc_kmsg_copyin_from_kernel( /* ports will not be used in kernel to kernel chats */ if (((ipc_port_t)remote)->ip_receiver != ipc_space_kernel) { - if ((dsc->disposition == MACH_MSG_TYPE_PORT_RECEIVE) && + if ((dest_dsc->disposition == MACH_MSG_TYPE_PORT_RECEIVE) && ipc_port_check_circularity((ipc_port_t) object, (ipc_port_t) remote)) { kmsg->ikm_header->msgh_bits |= @@ -2040,28 +2745,47 @@ ipc_kmsg_copyin_from_kernel( break; } case MACH_MSG_OOL_VOLATILE_DESCRIPTOR: - case MACH_MSG_OOL_DESCRIPTOR: { - /* - * The sender should supply ready-made memory, i.e. - * a vm_map_copy_t, so we don't need to do anything. - */ + case MACH_MSG_OOL_DESCRIPTOR: { + /* The sender should supply ready-made memory, i.e. a vm_map_copy_t + * so we don't need to do anything special. */ + + mach_msg_ool_descriptor32_t *source_dsc = &saddr->out_of_line32; + mach_msg_ool_descriptor_t *dest_dsc = (typeof(dest_dsc))&daddr->out_of_line; + + vm_offset_t address = source_dsc->address; + vm_size_t size = source_dsc->size; + boolean_t deallocate = source_dsc->deallocate; + mach_msg_copy_options_t copy = source_dsc->copy; + mach_msg_descriptor_type_t type = source_dsc->type; + + dest_dsc->address = (void *)address; + dest_dsc->size = size; + dest_dsc->deallocate = deallocate; + dest_dsc->copy = copy; + dest_dsc->type = type; break; } - case MACH_MSG_OOL_PORTS_DESCRIPTOR: { + case MACH_MSG_OOL_PORTS_DESCRIPTOR: { ipc_object_t *objects; unsigned int j; mach_msg_type_name_t name; - mach_msg_ool_ports_descriptor_t *dsc; + mach_msg_ool_ports_descriptor_t *dest_dsc; - dsc = &saddr->ool_ports; + mach_msg_ool_ports_descriptor32_t *source_dsc = &saddr->ool_ports32; + dest_dsc = (typeof(dest_dsc))&daddr->ool_ports; + + boolean_t deallocate = source_dsc->deallocate; + mach_msg_copy_options_t copy = source_dsc->copy; + mach_msg_size_t port_count = source_dsc->count; + mach_msg_type_name_t disposition = source_dsc->disposition; /* this is really the type SEND, SEND_ONCE, etc. */ - name = dsc->disposition; - dsc->disposition = ipc_object_copyin_type(name); + name = disposition; + disposition = ipc_object_copyin_type(name); - objects = (ipc_object_t *) dsc->address; + objects = (ipc_object_t *) (uintptr_t)source_dsc->address; - for ( j = 0; j < dsc->count; j++) { + for ( j = 0; j < port_count; j++) { ipc_object_t object = objects[j]; if (!IO_VALID(object)) @@ -2069,12 +2793,19 @@ ipc_kmsg_copyin_from_kernel( ipc_object_copyin_from_kernel(object, name); - if ((dsc->disposition == MACH_MSG_TYPE_PORT_RECEIVE) && + if ((disposition == MACH_MSG_TYPE_PORT_RECEIVE) && ipc_port_check_circularity( (ipc_port_t) object, (ipc_port_t) remote)) kmsg->ikm_header->msgh_bits |= MACH_MSGH_BITS_CIRCULAR; } + + dest_dsc->address = objects; + dest_dsc->deallocate = deallocate; + dest_dsc->copy = copy; + dest_dsc->disposition = disposition; + dest_dsc->type = MACH_MSG_OOL_PORTS_DESCRIPTOR; + dest_dsc->count = port_count; break; } default: { @@ -2086,6 +2817,7 @@ ipc_kmsg_copyin_from_kernel( } } } +#endif /* IKM_SUPPORT_LEGACY */ /* * Routine: ipc_kmsg_copyout_header @@ -2167,7 +2899,6 @@ ipc_kmsg_copyout_header( ipc_port_request_index_t request; if (!space->is_active) { - printf("ipc_kmsg_copyout_header: dead space\n"); is_write_unlock(space); return (MACH_RCV_HEADER_ERROR| MACH_MSG_IPC_SPACE); @@ -2218,7 +2949,7 @@ ipc_kmsg_copyout_header( goto copyout_dest; } - reply_name = (mach_port_name_t)reply; + reply_name = CAST_MACH_PORT_TO_NAME(reply); kr = ipc_entry_get(space, &reply_name, &entry); if (kr != KERN_SUCCESS) { ip_unlock(reply); @@ -2319,7 +3050,6 @@ ipc_kmsg_copyout_header( is_read_lock(space); if (!space->is_active) { - printf("ipc_kmsg_copyout_header: dead space2\n"); is_read_unlock(space); return MACH_RCV_HEADER_ERROR|MACH_MSG_IPC_SPACE; } @@ -2345,7 +3075,7 @@ ipc_kmsg_copyout_header( ip_lock(dest); is_read_unlock(space); - reply_name = (mach_port_name_t) reply; + reply_name = CAST_MACH_PORT_TO_NAME(reply); } /* @@ -2426,8 +3156,8 @@ ipc_kmsg_copyout_header( msg->msgh_bits = (MACH_MSGH_BITS_OTHER(mbits) | MACH_MSGH_BITS(reply_type, dest_type)); - msg->msgh_local_port = (ipc_port_t)dest_name; - msg->msgh_remote_port = (ipc_port_t)reply_name; + msg->msgh_local_port = CAST_MACH_NAME_TO_PORT(dest_name); + msg->msgh_remote_port = CAST_MACH_NAME_TO_PORT(reply_name); } return MACH_MSG_SUCCESS; @@ -2460,7 +3190,7 @@ ipc_kmsg_copyout_object( kern_return_t kr; if (!IO_VALID(object)) { - *namep = (mach_port_name_t) object; + *namep = CAST_MACH_PORT_TO_NAME(object); return MACH_MSG_SUCCESS; } @@ -2483,6 +3213,331 @@ ipc_kmsg_copyout_object( return MACH_MSG_SUCCESS; } +mach_msg_descriptor_t * +ipc_kmsg_copyout_port_descriptor(mach_msg_descriptor_t *dsc, + mach_msg_descriptor_t *user_dsc, + ipc_space_t space, + kern_return_t *mr); +mach_msg_descriptor_t * +ipc_kmsg_copyout_port_descriptor(mach_msg_descriptor_t *dsc, + mach_msg_descriptor_t *dest_dsc, + ipc_space_t space, + kern_return_t *mr) +{ + mach_port_t port; + mach_port_name_t name; + mach_msg_type_name_t disp; + + + /* Copyout port right carried in the message */ + port = dsc->port.name; + disp = dsc->port.disposition; + *mr |= ipc_kmsg_copyout_object(space, + (ipc_object_t)port, + disp, + &name); + + if(current_task() == kernel_task) + { + mach_msg_port_descriptor_t *user_dsc = (typeof(user_dsc))dest_dsc; + user_dsc--; // point to the start of this port descriptor + user_dsc->name = CAST_MACH_NAME_TO_PORT(name); + user_dsc->disposition = disp; + user_dsc->type = MACH_MSG_PORT_DESCRIPTOR; + dest_dsc = (typeof(dest_dsc))user_dsc; + } else { + mach_msg_legacy_port_descriptor_t *user_dsc = (typeof(user_dsc))dest_dsc; + user_dsc--; // point to the start of this port descriptor + user_dsc->name = CAST_MACH_PORT_TO_NAME(name); + user_dsc->disposition = disp; + user_dsc->type = MACH_MSG_PORT_DESCRIPTOR; + dest_dsc = (typeof(dest_dsc))user_dsc; + } + + return (mach_msg_descriptor_t *)dest_dsc; +} + +mach_msg_descriptor_t * +ipc_kmsg_copyout_ool_descriptor(mach_msg_ool_descriptor_t *dsc, mach_msg_descriptor_t *user_dsc, int is_64bit, vm_map_t map, mach_msg_return_t *mr); +mach_msg_descriptor_t * +ipc_kmsg_copyout_ool_descriptor(mach_msg_ool_descriptor_t *dsc, mach_msg_descriptor_t *user_dsc, int is_64bit, vm_map_t map, mach_msg_return_t *mr) +{ + vm_map_copy_t copy; + mach_vm_offset_t rcv_addr; + mach_msg_copy_options_t copy_options; + mach_msg_size_t size; + mach_msg_descriptor_type_t dsc_type; + + //SKIP_PORT_DESCRIPTORS(saddr, sdsc_count); + + copy = (vm_map_copy_t) dsc->address; + size = dsc->size; + copy_options = dsc->copy; + assert(copy_options != MACH_MSG_KALLOC_COPY_T); + dsc_type = dsc->type; + rcv_addr = 0; + + if (copy != VM_MAP_COPY_NULL) { + /* + * Check to see if there is an overwrite descriptor + * specified in the scatter list for this ool data. + * The descriptor has already been verified. + */ +#if 0 + if (saddr != MACH_MSG_DESCRIPTOR_NULL) { + if (differs) { + OTHER_OOL_DESCRIPTOR *scatter_dsc; + + scatter_dsc = (OTHER_OOL_DESCRIPTOR *)saddr; + if (scatter_dsc->copy == MACH_MSG_OVERWRITE) { + rcv_addr = (mach_vm_offset_t) scatter_dsc->address; + copy_options = MACH_MSG_OVERWRITE; + } else { + copy_options = MACH_MSG_VIRTUAL_COPY; + } + } else { + mach_msg_ool_descriptor_t *scatter_dsc; + + scatter_dsc = &saddr->out_of_line; + if (scatter_dsc->copy == MACH_MSG_OVERWRITE) { + rcv_addr = CAST_USER_ADDR_T(scatter_dsc->address); + copy_options = MACH_MSG_OVERWRITE; + } else { + copy_options = MACH_MSG_VIRTUAL_COPY; + } + } + INCREMENT_SCATTER(saddr, sdsc_count, differs); + } +#endif + + + /* + * Whether the data was virtually or physically + * copied we have a vm_map_copy_t for it. + * If there's an overwrite region specified + * overwrite it, otherwise do a virtual copy out. + */ + kern_return_t kr; + if (copy_options == MACH_MSG_OVERWRITE && rcv_addr != 0) { + kr = vm_map_copy_overwrite(map, rcv_addr, + copy, TRUE); + } else { + kr = vm_map_copyout(map, &rcv_addr, copy); + } + if (kr != KERN_SUCCESS) { + if (kr == KERN_RESOURCE_SHORTAGE) + *mr |= MACH_MSG_VM_KERNEL; + else + *mr |= MACH_MSG_VM_SPACE; + vm_map_copy_discard(copy); + rcv_addr = 0; + size = 0; + } + } else { + rcv_addr = 0; + size = 0; + } + + /* + * Now update the descriptor as the user would see it. + * This may require expanding the descriptor to the user + * visible size. There is already space allocated for + * this in what naddr points to. + */ + if(current_task() == kernel_task) + { + mach_msg_ool_descriptor_t *user_ool_dsc = (typeof(user_ool_dsc))user_dsc; + user_ool_dsc--; + + user_ool_dsc->address = (void *)(uintptr_t)rcv_addr; + user_ool_dsc->deallocate = (copy_options == MACH_MSG_VIRTUAL_COPY) ? + TRUE : FALSE; + user_ool_dsc->copy = copy_options; + user_ool_dsc->type = dsc_type; + user_ool_dsc->size = size; + + user_dsc = (typeof(user_dsc))user_ool_dsc; + } else if (is_64bit) { + mach_msg_ool_descriptor64_t *user_ool_dsc = (typeof(user_ool_dsc))user_dsc; + user_ool_dsc--; + + user_ool_dsc->address = rcv_addr; + user_ool_dsc->deallocate = (copy_options == MACH_MSG_VIRTUAL_COPY) ? + TRUE : FALSE; + user_ool_dsc->copy = copy_options; + user_ool_dsc->type = dsc_type; + user_ool_dsc->size = size; + + user_dsc = (typeof(user_dsc))user_ool_dsc; + } else { + mach_msg_ool_descriptor32_t *user_ool_dsc = (typeof(user_ool_dsc))user_dsc; + user_ool_dsc--; + + user_ool_dsc->address = CAST_DOWN_EXPLICIT(uint32_t, rcv_addr); + user_ool_dsc->size = size; + user_ool_dsc->deallocate = (copy_options == MACH_MSG_VIRTUAL_COPY) ? + TRUE : FALSE; + user_ool_dsc->copy = copy_options; + user_ool_dsc->type = dsc_type; + + user_dsc = (typeof(user_dsc))user_ool_dsc; + } + return user_dsc; +} + +mach_msg_descriptor_t * +ipc_kmsg_copyout_ool_ports_descriptor(mach_msg_ool_ports_descriptor_t *dsc, + mach_msg_descriptor_t *user_dsc, + int is_64bit, + vm_map_t map, + ipc_space_t space, + ipc_kmsg_t kmsg, + mach_msg_return_t *mr); +mach_msg_descriptor_t * +ipc_kmsg_copyout_ool_ports_descriptor(mach_msg_ool_ports_descriptor_t *dsc, + mach_msg_descriptor_t *user_dsc, + int is_64bit, + vm_map_t map, + ipc_space_t space, + ipc_kmsg_t kmsg, + mach_msg_return_t *mr) +{ + mach_vm_offset_t rcv_addr; + mach_msg_type_name_t disp; + mach_msg_type_number_t count, i; + vm_size_t ports_length, names_length; + + mach_msg_copy_options_t copy_options = MACH_MSG_VIRTUAL_COPY; + + //SKIP_PORT_DESCRIPTORS(saddr, sdsc_count); + + count = dsc->count; + disp = dsc->disposition; + ports_length = count * sizeof(mach_port_t); + names_length = count * sizeof(mach_port_name_t); + + if (ports_length != 0 && dsc->address != 0) { + + /* + * Check to see if there is an overwrite descriptor + * specified in the scatter list for this ool data. + * The descriptor has already been verified. + */ +#if 0 + if (saddr != MACH_MSG_DESCRIPTOR_NULL) { + if (differs) { + OTHER_OOL_DESCRIPTOR *scatter_dsc; + + scatter_dsc = (OTHER_OOL_DESCRIPTOR *)saddr; + rcv_addr = (mach_vm_offset_t) scatter_dsc->address; + copy_options = scatter_dsc->copy; + } else { + mach_msg_ool_descriptor_t *scatter_dsc; + + scatter_dsc = &saddr->out_of_line; + rcv_addr = CAST_USER_ADDR_T(scatter_dsc->address); + copy_options = scatter_dsc->copy; + } + INCREMENT_SCATTER(saddr, sdsc_count, differs); + } +#endif + + if (copy_options == MACH_MSG_VIRTUAL_COPY) { + /* + * Dynamically allocate the region + */ + int anywhere = VM_MAKE_TAG(VM_MEMORY_MACH_MSG)| + VM_FLAGS_ANYWHERE; + + kern_return_t kr; + if ((kr = mach_vm_allocate(map, &rcv_addr, + (mach_vm_size_t)names_length, + anywhere)) != KERN_SUCCESS) { + ipc_kmsg_clean_body(kmsg, 1, (mach_msg_descriptor_t *)dsc); + rcv_addr = 0; + + if (kr == KERN_RESOURCE_SHORTAGE){ + *mr |= MACH_MSG_VM_KERNEL; + } else { + *mr |= MACH_MSG_VM_SPACE; + } + } + } + + /* + * Handle the port rights and copy out the names + * for those rights out to user-space. + */ + if (rcv_addr != 0) { + mach_port_t *objects = (mach_port_t *) dsc->address; + mach_port_name_t *names = (mach_port_name_t *) dsc->address; + + /* copyout port rights carried in the message */ + + for ( i = 0; i < count ; i++) { + ipc_object_t object = (ipc_object_t)objects[i]; + + *mr |= ipc_kmsg_copyout_object(space, object, + disp, &names[i]); + } + + /* copyout to memory allocated above */ + void *data = dsc->address; + if (copyoutmap(map, data, rcv_addr, names_length) != KERN_SUCCESS) + *mr |= MACH_MSG_VM_SPACE; + kfree(data, ports_length); + } + } else { + rcv_addr = 0; + } + + /* + * Now update the descriptor based on the information + * calculated above. + */ + if(current_task() == kernel_task) { + mach_msg_ool_ports_descriptor_t *user_ool_dsc = (typeof(user_ool_dsc))user_dsc; + user_ool_dsc--; + + user_ool_dsc->address = (void *)(uintptr_t)rcv_addr; + user_ool_dsc->deallocate = (copy_options == MACH_MSG_VIRTUAL_COPY) ? + TRUE : FALSE; + user_ool_dsc->copy = copy_options; + user_ool_dsc->disposition = disp; + user_ool_dsc->type = MACH_MSG_OOL_PORTS_DESCRIPTOR; + user_ool_dsc->count = count; + + user_dsc = (typeof(user_dsc))user_ool_dsc; + } if (is_64bit) { + mach_msg_ool_ports_descriptor64_t *user_ool_dsc = (typeof(user_ool_dsc))user_dsc; + user_ool_dsc--; + + user_ool_dsc->address = rcv_addr; + user_ool_dsc->deallocate = (copy_options == MACH_MSG_VIRTUAL_COPY) ? + TRUE : FALSE; + user_ool_dsc->copy = copy_options; + user_ool_dsc->disposition = disp; + user_ool_dsc->type = MACH_MSG_OOL_PORTS_DESCRIPTOR; + user_ool_dsc->count = count; + + user_dsc = (typeof(user_dsc))user_ool_dsc; + } else { + mach_msg_ool_ports_descriptor32_t *user_ool_dsc = (typeof(user_ool_dsc))user_dsc; + user_ool_dsc--; + + user_ool_dsc->address = CAST_DOWN_EXPLICIT(uint32_t, rcv_addr); + user_ool_dsc->count = count; + user_ool_dsc->deallocate = (copy_options == MACH_MSG_VIRTUAL_COPY) ? + TRUE : FALSE; + user_ool_dsc->copy = copy_options; + user_ool_dsc->disposition = disp; + user_ool_dsc->type = MACH_MSG_OOL_PORTS_DESCRIPTOR; + + user_dsc = (typeof(user_dsc))user_ool_dsc; + } + return user_dsc; +} + /* * Routine: ipc_kmsg_copyout_body * Purpose: @@ -2510,22 +3565,22 @@ ipc_kmsg_copyout_body( mach_msg_body_t *slist) { mach_msg_body_t *body; - mach_msg_descriptor_t *daddr, *naddr; + mach_msg_descriptor_t *kern_dsc, *user_dsc; mach_msg_descriptor_t *saddr; - mach_msg_type_number_t i, dsc_count, sdsc_count; + mach_msg_type_number_t dsc_count, sdsc_count; + int i; mach_msg_return_t mr = MACH_MSG_SUCCESS; - kern_return_t kr; - void *data; - boolean_t differs = MAP_SIZE_DIFFERS(map); + boolean_t is_task_64bit = (map->max_offset > VM_MAX_ADDRESS); body = (mach_msg_body_t *) (kmsg->ikm_header + 1); dsc_count = body->msgh_descriptor_count; - daddr = (mach_msg_descriptor_t *) (body + 1); + kern_dsc = (mach_msg_descriptor_t *) (body + 1); + /* Point user_dsc just after the end of all the descriptors */ + user_dsc = &kern_dsc[dsc_count]; - /* - * Do scatter list setup - */ + /* Do scatter list setup */ if (slist != MACH_MSG_BODY_NULL) { + panic("Scatter lists disabled"); saddr = (mach_msg_descriptor_t *) (slist + 1); sdsc_count = slist->msgh_descriptor_count; } @@ -2534,309 +3589,36 @@ ipc_kmsg_copyout_body( sdsc_count = 0; } - /* - * Compute the true size of the resulting descriptors - * after potential expansion and adjust the header - * and body location accordingly. - */ - if (differs) { - mach_msg_size_t dsc_adjust; - - naddr = daddr; - dsc_adjust = 0; - for (i = 0; i < dsc_count; i++, naddr++) - switch (naddr->type.type) { - case MACH_MSG_OOL_DESCRIPTOR: - case MACH_MSG_OOL_VOLATILE_DESCRIPTOR: - case MACH_MSG_OOL_PORTS_DESCRIPTOR: - dsc_adjust += DESC_SIZE_ADJUSTMENT; - break; - default: - break; - } - if (dsc_adjust) { - mach_msg_base_t *old_base = (mach_msg_base_t *)kmsg->ikm_header; - mach_msg_base_t *new_base; - - new_base = (mach_msg_base_t *)((vm_offset_t)old_base - dsc_adjust); - memmove(new_base, old_base, sizeof(mach_msg_base_t)); - kmsg->ikm_header = &new_base->header; - kmsg->ikm_header->msgh_size += dsc_adjust; - naddr = (mach_msg_descriptor_t *)(new_base + 1); - } else { - naddr = daddr; - } - } else { - naddr = daddr; + /* Now process the descriptors */ + for (i = dsc_count-1; i >= 0; i--) { + switch (kern_dsc[i].type.type) { + + case MACH_MSG_PORT_DESCRIPTOR: + user_dsc = ipc_kmsg_copyout_port_descriptor(&kern_dsc[i], user_dsc, space, &mr); + break; + case MACH_MSG_OOL_VOLATILE_DESCRIPTOR: + case MACH_MSG_OOL_DESCRIPTOR : + user_dsc = ipc_kmsg_copyout_ool_descriptor( + (mach_msg_ool_descriptor_t *)&kern_dsc[i], user_dsc, is_task_64bit, map, &mr); + break; + case MACH_MSG_OOL_PORTS_DESCRIPTOR : + user_dsc = ipc_kmsg_copyout_ool_ports_descriptor( + (mach_msg_ool_ports_descriptor_t *)&kern_dsc[i], user_dsc, is_task_64bit, map, space, kmsg, &mr); + break; + default : { + panic("untyped IPC copyout body: invalid message descriptor"); + } + } } - /* - * Now process the descriptors - */ - for ( i = 0; i < dsc_count; i++, daddr++ ) { - switch (daddr->type.type) { - - case MACH_MSG_PORT_DESCRIPTOR: { - volatile mach_msg_port_descriptor_t *dsc; - volatile mach_msg_port_descriptor_t *user_dsc; - mach_port_t port; - mach_port_name_t name; - mach_msg_type_name_t disp; - - /* - * Copyout port right carried in the message - */ - dsc = &daddr->port; - user_dsc = &naddr->port; - port = dsc->name; - disp = dsc->disposition; - mr |= ipc_kmsg_copyout_object(space, - (ipc_object_t)port, - disp, - &name); - user_dsc->name = (mach_port_t)name; - user_dsc->disposition = disp; - user_dsc->type = MACH_MSG_PORT_DESCRIPTOR; - naddr++; - break; - } - - case MACH_MSG_OOL_VOLATILE_DESCRIPTOR: - case MACH_MSG_OOL_DESCRIPTOR : { - vm_map_copy_t copy; - mach_vm_offset_t rcv_addr; - mach_msg_ool_descriptor_t *dsc; - mach_msg_copy_options_t copy_options; - mach_msg_size_t size; - mach_msg_descriptor_type_t dsc_type; - - SKIP_PORT_DESCRIPTORS(saddr, sdsc_count); - - dsc = &daddr->out_of_line; - copy = (vm_map_copy_t) dsc->address; - size = dsc->size; - copy_options = dsc->copy; - assert(copy_options != MACH_MSG_KALLOC_COPY_T); - dsc_type = dsc->type; - - if (copy != VM_MAP_COPY_NULL) { - /* - * Check to see if there is an overwrite descriptor - * specified in the scatter list for this ool data. - * The descriptor has already been verified. - */ - if (saddr != MACH_MSG_DESCRIPTOR_NULL) { - if (differs) { - OTHER_OOL_DESCRIPTOR *scatter_dsc; - - scatter_dsc = (OTHER_OOL_DESCRIPTOR *)saddr; - if (scatter_dsc->copy == MACH_MSG_OVERWRITE) { - rcv_addr = (mach_vm_offset_t) scatter_dsc->address; - copy_options = MACH_MSG_OVERWRITE; - } else { - rcv_addr = 0; - copy_options = MACH_MSG_VIRTUAL_COPY; - } - } else { - mach_msg_ool_descriptor_t *scatter_dsc; - - scatter_dsc = &saddr->out_of_line; - if (scatter_dsc->copy == MACH_MSG_OVERWRITE) { - rcv_addr = CAST_USER_ADDR_T(scatter_dsc->address); - copy_options = MACH_MSG_OVERWRITE; - } else { - rcv_addr = 0; - copy_options = MACH_MSG_VIRTUAL_COPY; - } - } - INCREMENT_SCATTER(saddr, sdsc_count, differs); - } - - - /* - * Whether the data was virtually or physically - * copied we have a vm_map_copy_t for it. - * If there's an overwrite region specified - * overwrite it, otherwise do a virtual copy out. - */ - if (copy_options == MACH_MSG_OVERWRITE) { - kr = vm_map_copy_overwrite(map, rcv_addr, - copy, TRUE); - } else { - kr = vm_map_copyout(map, &rcv_addr, copy); - } - if (kr != KERN_SUCCESS) { - if (kr == KERN_RESOURCE_SHORTAGE) - mr |= MACH_MSG_VM_KERNEL; - else - mr |= MACH_MSG_VM_SPACE; - vm_map_copy_discard(copy); - rcv_addr = 0; - size = 0; - } - } else { - rcv_addr = 0; - size = 0; - } - - /* - * Now update the descriptor as the user would see it. - * This may require expanding the descriptor to the user - * visible size. There is already space allocated for - * this in what naddr points to. - */ - if (differs) { - volatile OTHER_OOL_DESCRIPTOR *user_dsc; - - user_dsc = (OTHER_OOL_DESCRIPTOR *)naddr; - user_dsc->address = rcv_addr; - user_dsc->deallocate = (copy_options == MACH_MSG_VIRTUAL_COPY) ? - TRUE : FALSE; - user_dsc->copy = copy_options; - user_dsc->type = dsc_type; - user_dsc->size = size; - naddr = (mach_msg_descriptor_t *)((OTHER_OOL_DESCRIPTOR *)naddr + 1); - } else { - volatile mach_msg_ool_descriptor_t *user_dsc; - - user_dsc = &naddr->out_of_line; - user_dsc->address = CAST_DOWN(void *, rcv_addr); - user_dsc->size = size; - user_dsc->deallocate = (copy_options == MACH_MSG_VIRTUAL_COPY) ? - TRUE : FALSE; - user_dsc->copy = copy_options; - user_dsc->type = dsc_type; - naddr++; - } - break; - } - - case MACH_MSG_OOL_PORTS_DESCRIPTOR : { - mach_vm_offset_t rcv_addr; - mach_port_name_t *objects; - mach_msg_type_name_t disp; - mach_msg_type_number_t count, j; - vm_size_t length; - - volatile mach_msg_ool_ports_descriptor_t *dsc; - mach_msg_copy_options_t copy_options = MACH_MSG_VIRTUAL_COPY; - - SKIP_PORT_DESCRIPTORS(saddr, sdsc_count); - - dsc = &daddr->ool_ports; - count = dsc->count; - disp = dsc->disposition; - length = count * sizeof(mach_port_name_t); - - if (length != 0 && dsc->address != 0) { - - /* - * Check to see if there is an overwrite descriptor - * specified in the scatter list for this ool data. - * The descriptor has already been verified. - */ - if (saddr != MACH_MSG_DESCRIPTOR_NULL) { - if (differs) { - OTHER_OOL_DESCRIPTOR *scatter_dsc; - - scatter_dsc = (OTHER_OOL_DESCRIPTOR *)saddr; - rcv_addr = (mach_vm_offset_t) scatter_dsc->address; - copy_options = scatter_dsc->copy; - } else { - mach_msg_ool_descriptor_t *scatter_dsc; - - scatter_dsc = &saddr->out_of_line; - rcv_addr = CAST_USER_ADDR_T(scatter_dsc->address); - copy_options = scatter_dsc->copy; - } - INCREMENT_SCATTER(saddr, sdsc_count, differs); - } - - if (copy_options == MACH_MSG_VIRTUAL_COPY) { - /* - * Dynamically allocate the region - */ - int anywhere = VM_MAKE_TAG(VM_MEMORY_MACH_MSG)| - VM_FLAGS_ANYWHERE; - - if ((kr = mach_vm_allocate(map, &rcv_addr, - (mach_vm_size_t)length, - anywhere)) != KERN_SUCCESS) { - ipc_kmsg_clean_body(kmsg, 1, daddr); - rcv_addr = 0; - - if (kr == KERN_RESOURCE_SHORTAGE){ - mr |= MACH_MSG_VM_KERNEL; - } else { - mr |= MACH_MSG_VM_SPACE; - } - } - } - - - /* - * Handle the port rights and copy out the names - * for those rights out to user-space. - */ - if (rcv_addr != 0) { - objects = (mach_port_name_t *) dsc->address ; - - /* copyout port rights carried in the message */ - - for ( j = 0; j < count ; j++) { - ipc_object_t object = - (ipc_object_t) objects[j]; - - mr |= ipc_kmsg_copyout_object(space, object, - disp, &objects[j]); - } - - /* copyout to memory allocated above */ - data = dsc->address; - if (copyoutmap(map, data, rcv_addr, length) != KERN_SUCCESS) - mr |= MACH_MSG_VM_SPACE; - kfree(data, length); - } - } else { - rcv_addr = 0; - } - - /* - * Now update the descriptor based on the information - * calculated above. - */ - if (differs) { - volatile OTHER_OOL_PORTS_DESCRIPTOR *user_dsc; - - user_dsc = (OTHER_OOL_PORTS_DESCRIPTOR *)naddr; - user_dsc->address = rcv_addr; - user_dsc->deallocate = (copy_options == MACH_MSG_VIRTUAL_COPY) ? - TRUE : FALSE; - user_dsc->copy = copy_options; - user_dsc->disposition = disp; - user_dsc->type = MACH_MSG_OOL_PORTS_DESCRIPTOR; - user_dsc->count = count; - naddr = (mach_msg_descriptor_t *)((OTHER_OOL_PORTS_DESCRIPTOR *)naddr + 1); - } else { - volatile mach_msg_ool_ports_descriptor_t *user_dsc; - - user_dsc = &naddr->ool_ports; - user_dsc->address = CAST_DOWN(void *, rcv_addr); - user_dsc->count = count; - user_dsc->deallocate = (copy_options == MACH_MSG_VIRTUAL_COPY) ? - TRUE : FALSE; - user_dsc->copy = copy_options; - user_dsc->disposition = disp; - user_dsc->type = MACH_MSG_OOL_PORTS_DESCRIPTOR; - naddr++; - } - break; - } - default : { - panic("untyped IPC copyout body: invalid message descriptor"); - } - } + if(user_dsc != kern_dsc) { + vm_offset_t dsc_adjust = (vm_offset_t)user_dsc - (vm_offset_t)kern_dsc; + memmove((char *)((vm_offset_t)kmsg->ikm_header + dsc_adjust), kmsg->ikm_header, sizeof(mach_msg_base_t)); + kmsg->ikm_header = (mach_msg_header_t *)((vm_offset_t)kmsg->ikm_header + dsc_adjust); + /* Update the message size for the smaller user representation */ + kmsg->ikm_header->msgh_size -= (mach_msg_size_t)dsc_adjust; } + return mr; } @@ -2858,35 +3640,42 @@ ipc_kmsg_copyout_size( ipc_kmsg_t kmsg, vm_map_t map) { - mach_msg_size_t send_size; + mach_msg_size_t send_size; - send_size = kmsg->ikm_header->msgh_size; + send_size = kmsg->ikm_header->msgh_size; - if ((kmsg->ikm_header->msgh_bits & MACH_MSGH_BITS_COMPLEX) && - MAP_SIZE_DIFFERS(map)) { + boolean_t is_task_64bit = (map->max_offset > VM_MAX_ADDRESS); - mach_msg_body_t *body; - mach_msg_descriptor_t *saddr, *eaddr; +#if defined(__LP64__) + send_size -= LEGACY_HEADER_SIZE_DELTA; +#endif - body = (mach_msg_body_t *) (kmsg->ikm_header + 1); - saddr = (mach_msg_descriptor_t *) (body + 1); - eaddr = saddr + body->msgh_descriptor_count; - - for ( ; saddr < eaddr; saddr++ ) { - switch (saddr->type.type) { + if (kmsg->ikm_header->msgh_bits & MACH_MSGH_BITS_COMPLEX) { - case MACH_MSG_OOL_DESCRIPTOR: - case MACH_MSG_OOL_VOLATILE_DESCRIPTOR: - case MACH_MSG_OOL_PORTS_DESCRIPTOR: - send_size += DESC_SIZE_ADJUSTMENT; - break; + mach_msg_body_t *body; + mach_msg_descriptor_t *saddr, *eaddr; - default: - break; - } - } - } - return send_size; + body = (mach_msg_body_t *) (kmsg->ikm_header + 1); + saddr = (mach_msg_descriptor_t *) (body + 1); + eaddr = saddr + body->msgh_descriptor_count; + + for ( ; saddr < eaddr; saddr++ ) { + switch (saddr->type.type) { + case MACH_MSG_OOL_DESCRIPTOR: + case MACH_MSG_OOL_VOLATILE_DESCRIPTOR: + case MACH_MSG_OOL_PORTS_DESCRIPTOR: + if(!is_task_64bit) + send_size -= DESC_SIZE_ADJUSTMENT; + break; + case MACH_MSG_PORT_DESCRIPTOR: + send_size -= DESC_SIZE_ADJUSTMENT; + break; + default: + break; + } + } + } + return send_size; } /* @@ -2919,7 +3708,6 @@ ipc_kmsg_copyout( mr = ipc_kmsg_copyout_header(kmsg->ikm_header, space, notify); if (mr != MACH_MSG_SUCCESS) { - printf("ipc_kmsg_copyout: ipc_kmsg_copyout_header failed: %d\n", mr); return mr; } @@ -2974,8 +3762,8 @@ ipc_kmsg_copyout_pseudo( ipc_kmsg_copyout_object(space, reply, reply_type, &reply_name)); kmsg->ikm_header->msgh_bits = mbits &~ MACH_MSGH_BITS_CIRCULAR; - kmsg->ikm_header->msgh_remote_port = (ipc_port_t)dest_name; - kmsg->ikm_header->msgh_local_port = (ipc_port_t)reply_name; + kmsg->ikm_header->msgh_remote_port = CAST_MACH_NAME_TO_PORT(dest_name); + kmsg->ikm_header->msgh_local_port = CAST_MACH_NAME_TO_PORT(reply_name); if (mbits & MACH_MSGH_BITS_COMPLEX) { mr |= ipc_kmsg_copyout_body(kmsg, space, map, slist); @@ -3027,12 +3815,12 @@ ipc_kmsg_copyout_dest( ipc_object_destroy(reply, reply_type); reply_name = MACH_PORT_NULL; } else - reply_name = (mach_port_name_t) reply; + reply_name = CAST_MACH_PORT_TO_NAME(reply); kmsg->ikm_header->msgh_bits = (MACH_MSGH_BITS_OTHER(mbits) | MACH_MSGH_BITS(reply_type, dest_type)); - kmsg->ikm_header->msgh_local_port = (ipc_port_t)dest_name; - kmsg->ikm_header->msgh_remote_port = (ipc_port_t)reply_name; + kmsg->ikm_header->msgh_local_port = CAST_MACH_NAME_TO_PORT(dest_name); + kmsg->ikm_header->msgh_remote_port = CAST_MACH_NAME_TO_PORT(reply_name); if (mbits & MACH_MSGH_BITS_COMPLEX) { mach_msg_body_t *body; @@ -3078,11 +3866,14 @@ ipc_kmsg_get_scatter( mach_msg_descriptor_t *gstart, *gend; mach_msg_descriptor_t *sstart, *send; +#if defined(__LP64__) + panic("ipc_kmsg_get_scatter called!"); +#endif if (slist_size < sizeof(mach_msg_base_t)) return MACH_MSG_BODY_NULL; - slist_size -= sizeof(mach_msg_header_t); + slist_size -= (mach_msg_size_t)sizeof(mach_msg_header_t); slist = (mach_msg_body_t *)kalloc(slist_size); if (slist == MACH_MSG_BODY_NULL) return slist; @@ -3188,7 +3979,11 @@ ipc_kmsg_free_scatter( mach_msg_body_t *slist, mach_msg_size_t slist_size) { - slist_size -= sizeof(mach_msg_header_t); +#if defined(__LP64__) + panic("%s called; halting!", __func__); +#endif + + slist_size -= (mach_msg_size_t)sizeof(mach_msg_header_t); kfree(slist, slist_size); } @@ -3234,14 +4029,134 @@ ipc_kmsg_copyout_to_kernel( dest_name = MACH_PORT_DEAD; } - reply_name = (mach_port_name_t) reply; + reply_name = CAST_MACH_PORT_TO_NAME(reply); + + kmsg->ikm_header->msgh_bits = + (MACH_MSGH_BITS_OTHER(kmsg->ikm_header->msgh_bits) | + MACH_MSGH_BITS(reply_type, dest_type)); + kmsg->ikm_header->msgh_local_port = CAST_MACH_NAME_TO_PORT(dest_name); + kmsg->ikm_header->msgh_remote_port = CAST_MACH_NAME_TO_PORT(reply_name); +} + +#if IKM_SUPPORT_LEGACY +void +ipc_kmsg_copyout_to_kernel_legacy( + ipc_kmsg_t kmsg, + ipc_space_t space) +{ + ipc_object_t dest; + ipc_object_t reply; + mach_msg_type_name_t dest_type; + mach_msg_type_name_t reply_type; + mach_port_name_t dest_name, reply_name; + + dest = (ipc_object_t) kmsg->ikm_header->msgh_remote_port; + reply = (ipc_object_t) kmsg->ikm_header->msgh_local_port; + dest_type = MACH_MSGH_BITS_REMOTE(kmsg->ikm_header->msgh_bits); + reply_type = MACH_MSGH_BITS_LOCAL(kmsg->ikm_header->msgh_bits); + + assert(IO_VALID(dest)); + + io_lock(dest); + if (io_active(dest)) { + ipc_object_copyout_dest(space, dest, dest_type, &dest_name); + /* dest is unlocked */ + } else { + io_release(dest); + io_check_unlock(dest); + dest_name = MACH_PORT_DEAD; + } + + reply_name = CAST_MACH_PORT_TO_NAME(reply); kmsg->ikm_header->msgh_bits = (MACH_MSGH_BITS_OTHER(kmsg->ikm_header->msgh_bits) | MACH_MSGH_BITS(reply_type, dest_type)); - kmsg->ikm_header->msgh_local_port = (ipc_port_t)dest_name; - kmsg->ikm_header->msgh_remote_port = (ipc_port_t)reply_name; + kmsg->ikm_header->msgh_local_port = CAST_MACH_NAME_TO_PORT(dest_name); + kmsg->ikm_header->msgh_remote_port = CAST_MACH_NAME_TO_PORT(reply_name); + + mach_msg_descriptor_t *saddr; + mach_msg_legacy_descriptor_t *daddr; + mach_msg_type_number_t i, count = ((mach_msg_base_t *)kmsg->ikm_header)->body.msgh_descriptor_count; + saddr = (mach_msg_descriptor_t *) (((mach_msg_base_t *)kmsg->ikm_header) + 1); + saddr = &saddr[count-1]; + daddr = (mach_msg_legacy_descriptor_t *)&saddr[count]; + daddr--; + + vm_offset_t dsc_adjust = 0; + + for (i = 0; i < count; i++, saddr--, daddr--) { + switch (saddr->type.type) { + case MACH_MSG_PORT_DESCRIPTOR: { + mach_msg_port_descriptor_t *dsc = &saddr->port; + mach_msg_legacy_port_descriptor_t *dest_dsc = &daddr->port; + + mach_port_t name = dsc->name; + mach_msg_type_name_t disposition = dsc->disposition; + + dest_dsc->name = CAST_MACH_PORT_TO_NAME(name); + dest_dsc->disposition = disposition; + dest_dsc->type = MACH_MSG_PORT_DESCRIPTOR; + break; + } + case MACH_MSG_OOL_VOLATILE_DESCRIPTOR: + case MACH_MSG_OOL_DESCRIPTOR: { + /* The sender should supply ready-made memory, i.e. a vm_map_copy_t + * so we don't need to do anything special. */ + + mach_msg_ool_descriptor_t *source_dsc = (typeof(source_dsc))&saddr->out_of_line; + + mach_msg_ool_descriptor32_t *dest_dsc = &daddr->out_of_line32; + + vm_offset_t address = (vm_offset_t)source_dsc->address; + vm_size_t size = source_dsc->size; + boolean_t deallocate = source_dsc->deallocate; + mach_msg_copy_options_t copy = source_dsc->copy; + mach_msg_descriptor_type_t type = source_dsc->type; + + dest_dsc->address = address; + dest_dsc->size = size; + dest_dsc->deallocate = deallocate; + dest_dsc->copy = copy; + dest_dsc->type = type; + break; + } + case MACH_MSG_OOL_PORTS_DESCRIPTOR: { + mach_msg_ool_ports_descriptor_t *source_dsc = (typeof(source_dsc))&saddr->ool_ports; + + mach_msg_ool_ports_descriptor32_t *dest_dsc = &daddr->ool_ports32; + + vm_offset_t address = (vm_offset_t)source_dsc->address; + vm_size_t port_count = source_dsc->count; + boolean_t deallocate = source_dsc->deallocate; + mach_msg_copy_options_t copy = source_dsc->copy; + mach_msg_descriptor_type_t type = source_dsc->type; + + dest_dsc->address = address; + dest_dsc->count = port_count; + dest_dsc->deallocate = deallocate; + dest_dsc->copy = copy; + dest_dsc->type = type; + break; + } + default: { +#if MACH_ASSERT + panic("ipc_kmsg_copyin_from_kernel: bad descriptor"); +#endif /* MACH_ASSERT */ + } + } + } + + if(count) { + dsc_adjust = 4*count; + memmove((char *)((vm_offset_t)kmsg->ikm_header + dsc_adjust), kmsg->ikm_header, sizeof(mach_msg_base_t)); + kmsg->ikm_header = (mach_msg_header_t *)((vm_offset_t)kmsg->ikm_header + dsc_adjust); + /* Update the message size for the smaller user representation */ + kmsg->ikm_header->msgh_size -= dsc_adjust; + } } +#endif /* IKM_SUPPORT_LEGACY */ + #include #if MACH_KDB @@ -3258,9 +4173,6 @@ const char * ipc_type_name( int type_name, boolean_t received); -void ipc_print_type_name( - int type_name); - const char * msgh_bit_decode( mach_msg_bits_t bit); diff --git a/osfmk/ipc/ipc_kmsg.h b/osfmk/ipc/ipc_kmsg.h index db7e6acf4..db4df8ad5 100644 --- a/osfmk/ipc/ipc_kmsg.h +++ b/osfmk/ipc/ipc_kmsg.h @@ -104,6 +104,11 @@ struct ipc_kmsg { mach_msg_header_t *ikm_header; }; +#if defined(__i386__) || defined(__arm__) +#define IKM_SUPPORT_LEGACY 1 +#else +#define IKM_SUPPORT_LEGACY 0 +#endif #define IKM_OVERHEAD (sizeof(struct ipc_kmsg)) @@ -320,6 +325,11 @@ extern mach_msg_return_t ipc_kmsg_copyin( extern void ipc_kmsg_copyin_from_kernel( ipc_kmsg_t kmsg); +#if IKM_SUPPORT_LEGACY +extern void ipc_kmsg_copyin_from_kernel_legacy( + ipc_kmsg_t kmsg); +#endif + /* Copyout port rights in the header of a message */ extern mach_msg_return_t ipc_kmsg_copyout_header( mach_msg_header_t *msg, @@ -371,6 +381,12 @@ extern void ipc_kmsg_copyout_to_kernel( ipc_kmsg_t kmsg, ipc_space_t space); +#if IKM_SUPPORT_LEGACY +extern void ipc_kmsg_copyout_to_kernel_legacy( + ipc_kmsg_t kmsg, + ipc_space_t space); +#endif + /* get a scatter list and check consistency */ extern mach_msg_body_t *ipc_kmsg_get_scatter( mach_vm_address_t msg_addr, diff --git a/osfmk/ipc/ipc_labelh.h b/osfmk/ipc/ipc_labelh.h index b9ca1f5bd..5eba16ca3 100644 --- a/osfmk/ipc/ipc_labelh.h +++ b/osfmk/ipc/ipc_labelh.h @@ -65,7 +65,7 @@ typedef struct ipc_labelh int lh_type; struct label lh_label; ipc_port_t lh_port; - decl_mutex_data(, lh_lock_data) + decl_lck_mtx_data(, lh_lock_data) } *ipc_labelh_t; #define LABELH_TYPE_KERN 0 @@ -88,9 +88,9 @@ MACRO_END extern zone_t ipc_labelh_zone; -#define lh_lock_init(lh) mutex_init(&(lh)->lh_lock_data, 0) -#define lh_lock(lh) mutex_lock(&(lh)->lh_lock_data) -#define lh_unlock(lh) mutex_unlock(&(lh)->lh_lock_data) +#define lh_lock_init(lh) lck_mtx_init(&(lh)->lh_lock_data, &ipc_lck_grp, &ipc_lck_attr) +#define lh_lock(lh) lck_mtx_lock(&(lh)->lh_lock_data) +#define lh_unlock(lh) lck_mtx_unlock(&(lh)->lh_lock_data) /* * Check the number of references the label handle has left. diff --git a/osfmk/ipc/ipc_mqueue.c b/osfmk/ipc/ipc_mqueue.c index 316babd8d..9d17b81b9 100644 --- a/osfmk/ipc/ipc_mqueue.c +++ b/osfmk/ipc/ipc_mqueue.c @@ -90,7 +90,9 @@ #include #include -#include +#ifdef __LP64__ +#include +#endif #if CONFIG_MACF_MACH #include @@ -99,8 +101,6 @@ int ipc_mqueue_full; /* address is event for queue space */ int ipc_mqueue_rcv; /* address is event for message arrival */ -#define TR_ENABLE 0 - /* forward declarations */ void ipc_mqueue_receive_results(wait_result_t result); @@ -115,7 +115,7 @@ ipc_mqueue_init( boolean_t is_set) { if (is_set) { - wait_queue_set_init(&mqueue->imq_set_queue, SYNC_POLICY_FIFO); + wait_queue_set_init(&mqueue->imq_set_queue, SYNC_POLICY_FIFO|SYNC_POLICY_PREPOST); } else { wait_queue_init(&mqueue->imq_wait_queue, SYNC_POLICY_FIFO); ipc_kmsg_queue_init(&mqueue->imq_messages); @@ -245,6 +245,7 @@ ipc_mqueue_add( for (;;) { thread_t th; + mach_msg_size_t msize; th = wait_queue_wakeup64_identity_locked( port_waitq, @@ -256,6 +257,17 @@ ipc_mqueue_add( if (th == THREAD_NULL) goto leave; + /* + * If the receiver waited with a facility not directly + * related to Mach messaging, then it isn't prepared to get + * handed the message directly. Just set it running, and + * go look for another thread that can. + */ + if (th->ith_state != MACH_RCV_IN_PROGRESS) { + thread_unlock(th); + continue; + } + /* * Found a receiver. see if they can handle the message * correctly (the message is not too large for them, or @@ -264,15 +276,16 @@ ipc_mqueue_add( * the list and let them go back and figure it out and * just move onto the next. */ + msize = ipc_kmsg_copyout_size(kmsg, th->map); if (th->ith_msize < - kmsg->ikm_header->msgh_size + - REQUESTED_TRAILER_SIZE(th->ith_option)) { + (msize + REQUESTED_TRAILER_SIZE(th->ith_option))) { th->ith_state = MACH_RCV_TOO_LARGE; - th->ith_msize = kmsg->ikm_header->msgh_size; + th->ith_msize = msize; if (th->ith_option & MACH_RCV_LARGE) { /* * let him go without message */ + th->ith_receiver_name = port_mqueue->imq_receiver_name; th->ith_kmsg = IKM_NULL; th->ith_seqno = 0; thread_unlock(th); @@ -344,12 +357,12 @@ ipc_mqueue_changed( mach_msg_return_t ipc_mqueue_send( ipc_mqueue_t mqueue, - ipc_kmsg_t kmsg, + ipc_kmsg_t kmsg, mach_msg_option_t option, - mach_msg_timeout_t send_timeout) + mach_msg_timeout_t send_timeout, + spl_t s) { int wresult; - spl_t s; /* * Don't block if: @@ -357,9 +370,6 @@ ipc_mqueue_send( * 2) Caller used the MACH_SEND_ALWAYS internal option. * 3) Message is sent to a send-once right. */ - s = splsched(); - imq_lock(mqueue); - if (!imq_full(mqueue) || (!imq_full_kernel(mqueue) && ((option & MACH_SEND_ALWAYS) || @@ -420,6 +430,8 @@ ipc_mqueue_send( return MACH_SEND_INTERRUPTED; case THREAD_RESTART: + /* mqueue is being destroyed */ + return MACH_SEND_INVALID_DEST; default: panic("ipc_mqueue_send"); } @@ -477,7 +489,6 @@ ipc_mqueue_post( register ipc_mqueue_t mqueue, register ipc_kmsg_t kmsg) { - spl_t s; /* @@ -491,6 +502,7 @@ ipc_mqueue_post( for (;;) { wait_queue_t waitq = &mqueue->imq_wait_queue; thread_t receiver; + mach_msg_size_t msize; receiver = wait_queue_wakeup64_identity_locked( waitq, @@ -507,16 +519,28 @@ ipc_mqueue_post( ipc_kmsg_enqueue_macro(&mqueue->imq_messages, kmsg); break; } - + + /* + * If the receiver waited with a facility not directly + * related to Mach messaging, then it isn't prepared to get + * handed the message directly. Just set it running, and + * go look for another thread that can. + */ + if (receiver->ith_state != MACH_RCV_IN_PROGRESS) { + thread_unlock(receiver); + continue; + } + + /* * We found a waiting thread. * If the message is too large or the scatter list is too small * the thread we wake up will get that as its status. */ + msize = ipc_kmsg_copyout_size(kmsg, receiver->map); if (receiver->ith_msize < - (kmsg->ikm_header->msgh_size) + - REQUESTED_TRAILER_SIZE(receiver->ith_option)) { - receiver->ith_msize = kmsg->ikm_header->msgh_size; + (msize + REQUESTED_TRAILER_SIZE(receiver->ith_option))) { + receiver->ith_msize = msize; receiver->ith_state = MACH_RCV_TOO_LARGE; } else { receiver->ith_state = MACH_MSG_SUCCESS; @@ -650,16 +674,47 @@ ipc_mqueue_receive_continue( void ipc_mqueue_receive( - ipc_mqueue_t mqueue, - mach_msg_option_t option, - mach_msg_size_t max_size, - mach_msg_timeout_t rcv_timeout, - int interruptible) + ipc_mqueue_t mqueue, + mach_msg_option_t option, + mach_msg_size_t max_size, + mach_msg_timeout_t rcv_timeout, + int interruptible) +{ + wait_result_t wresult; + thread_t self = current_thread(); + + wresult = ipc_mqueue_receive_on_thread(mqueue, option, max_size, + rcv_timeout, interruptible, + self); + if (wresult == THREAD_NOT_WAITING) + return; + + if (wresult == THREAD_WAITING) { + counter((interruptible == THREAD_ABORTSAFE) ? + c_ipc_mqueue_receive_block_user++ : + c_ipc_mqueue_receive_block_kernel++); + + if (self->ith_continuation) + thread_block(ipc_mqueue_receive_continue); + /* NOTREACHED */ + + wresult = thread_block(THREAD_CONTINUE_NULL); + } + ipc_mqueue_receive_results(wresult); +} + +wait_result_t +ipc_mqueue_receive_on_thread( + ipc_mqueue_t mqueue, + mach_msg_option_t option, + mach_msg_size_t max_size, + mach_msg_timeout_t rcv_timeout, + int interruptible, + thread_t thread) { ipc_kmsg_queue_t kmsgs; wait_result_t wresult; - thread_t self; - uint64_t deadline; + uint64_t deadline; spl_t s; #if CONFIG_MACF_MACH ipc_labelh_t lh; @@ -669,33 +724,40 @@ ipc_mqueue_receive( s = splsched(); imq_lock(mqueue); - self = current_thread(); if (imq_is_set(mqueue)) { - wait_queue_link_t wql; - ipc_mqueue_t port_mq; queue_t q; - q = &mqueue->imq_setlinks; + q = &mqueue->imq_preposts; /* * If we are waiting on a portset mqueue, we need to see if - * any of the member ports have work for us. If so, try to - * deliver one of those messages. By holding the portset's + * any of the member ports have work for us. Ports that + * have (or recently had) messages will be linked in the + * prepost queue for the portset. By holding the portset's * mqueue lock during the search, we tie up any attempts by * mqueue_deliver or portset membership changes that may - * cross our path. But this is a lock order violation, so we - * have to do it "softly." If we don't find a message waiting - * for us, we will assert our intention to wait while still - * holding that lock. When we release the lock, the deliver/ - * change will succeed and find us. + * cross our path. */ search_set: - queue_iterate(q, wql, wait_queue_link_t, wql_setlinks) { + while(!queue_empty(q)) { + wait_queue_link_t wql; + ipc_mqueue_t port_mq; + + queue_remove_first(q, wql, wait_queue_link_t, wql_preposts); + assert(!wql_is_preposted(wql)); + + /* + * This is a lock order violation, so we have to do it + * "softly," putting the link back on the prepost list + * if it fails (at the tail is fine since the order of + * handling messages from different sources in a set is + * not guaranteed and we'd like to skip to the next source + * if one is available). + */ port_mq = (ipc_mqueue_t)wql->wql_queue; - kmsgs = &port_mq->imq_messages; - if (!imq_lock_try(port_mq)) { + queue_enter(q, wql, wait_queue_link_t, wql_preposts); imq_unlock(mqueue); splx(s); mutex_pause(0); @@ -705,42 +767,46 @@ ipc_mqueue_receive( } /* - * If there is still a message to be had, we will - * try to select it (may not succeed because of size - * and options). In any case, we deliver those - * results back to the user. - * - * We also move the port's linkage to the tail of the - * list for this set (fairness). Future versions will - * sort by timestamp or priority. + * If there are no messages on this queue, just skip it + * (we already removed the link from the set's prepost queue). */ + kmsgs = &port_mq->imq_messages; if (ipc_kmsg_queue_first(kmsgs) == IKM_NULL) { imq_unlock(port_mq); continue; } - queue_remove(q, wql, wait_queue_link_t, wql_setlinks); - queue_enter(q, wql, wait_queue_link_t, wql_setlinks); + + /* + * There are messages, so reinsert the link back + * at the tail of the preposted queue (for fairness) + * while we still have the portset mqueue locked. + */ + queue_enter(q, wql, wait_queue_link_t, wql_preposts); imq_unlock(mqueue); - ipc_mqueue_select(port_mq, option, max_size); + /* + * Continue on to handling the message with just + * the port mqueue locked. + */ + ipc_mqueue_select_on_thread(port_mq, option, max_size, thread); imq_unlock(port_mq); #if CONFIG_MACF_MACH - if (self->ith_kmsg != NULL && - self->ith_kmsg->ikm_sender != NULL) { - lh = self->ith_kmsg->ikm_sender->label; - task = current_task(); - tasklabel_lock(task); + if (thread->task != TASK_NULL && + thread->ith_kmsg != NULL && + thread->ith_kmsg->ikm_sender != NULL) { + lh = thread->ith_kmsg->ikm_sender->label; + tasklabel_lock(thread->task); ip_lock(lh->lh_port); - rc = mac_port_check_receive(&task->maclabel, - &lh->lh_label); + rc = mac_port_check_receive(&thread->task->maclabel, + &lh->lh_label); ip_unlock(lh->lh_port); - tasklabel_unlock(task); + tasklabel_unlock(thread->task); if (rc) - self->ith_state = MACH_RCV_INVALID_DATA; + thread->ith_state = MACH_RCV_INVALID_DATA; } #endif splx(s); - return; + return THREAD_NOT_WAITING; } @@ -751,28 +817,28 @@ ipc_mqueue_receive( */ kmsgs = &mqueue->imq_messages; if (ipc_kmsg_queue_first(kmsgs) != IKM_NULL) { - ipc_mqueue_select(mqueue, option, max_size); + ipc_mqueue_select_on_thread(mqueue, option, max_size, thread); imq_unlock(mqueue); #if CONFIG_MACF_MACH - if (self->ith_kmsg != NULL && - self->ith_kmsg->ikm_sender != NULL) { - lh = self->ith_kmsg->ikm_sender->label; - task = current_task(); - tasklabel_lock(task); + if (thread->task != TASK_NULL && + thread->ith_kmsg != NULL && + thread->ith_kmsg->ikm_sender != NULL) { + lh = thread->ith_kmsg->ikm_sender->label; + tasklabel_lock(thread->task); ip_lock(lh->lh_port); - rc = mac_port_check_receive(&task->maclabel, - &lh->lh_label); + rc = mac_port_check_receive(&thread->task->maclabel, + &lh->lh_label); ip_unlock(lh->lh_port); - tasklabel_unlock(task); + tasklabel_unlock(thread->task); if (rc) - self->ith_state = MACH_RCV_INVALID_DATA; + thread->ith_state = MACH_RCV_INVALID_DATA; } #endif splx(s); - return; + return THREAD_NOT_WAITING; } } - + /* * Looks like we'll have to block. The mqueue we will * block on (whether the set's or the local port's) is @@ -782,15 +848,15 @@ ipc_mqueue_receive( if (rcv_timeout == 0) { imq_unlock(mqueue); splx(s); - self->ith_state = MACH_RCV_TIMED_OUT; - return; + thread->ith_state = MACH_RCV_TIMED_OUT; + return THREAD_NOT_WAITING; } } - thread_lock(self); - self->ith_state = MACH_RCV_IN_PROGRESS; - self->ith_option = option; - self->ith_msize = max_size; + thread_lock(thread); + thread->ith_state = MACH_RCV_IN_PROGRESS; + thread->ith_option = option; + thread->ith_msize = max_size; if (option & MACH_RCV_TIMEOUT) clock_interval_to_deadline(rcv_timeout, 1000*NSEC_PER_USEC, &deadline); @@ -798,55 +864,45 @@ ipc_mqueue_receive( deadline = 0; wresult = wait_queue_assert_wait64_locked(&mqueue->imq_wait_queue, - IPC_MQUEUE_RECEIVE, - interruptible, deadline, - self); - thread_unlock(self); + IPC_MQUEUE_RECEIVE, + interruptible, deadline, + thread); + /* preposts should be detected above, not here */ + if (wresult == THREAD_AWAKENED) + panic("ipc_mqueue_receive_on_thread: sleep walking"); + + thread_unlock(thread); imq_unlock(mqueue); splx(s); - - if (wresult == THREAD_WAITING) { - counter((interruptible == THREAD_ABORTSAFE) ? - c_ipc_mqueue_receive_block_user++ : - c_ipc_mqueue_receive_block_kernel++); - - if (self->ith_continuation) - thread_block(ipc_mqueue_receive_continue); - /* NOTREACHED */ - - wresult = thread_block(THREAD_CONTINUE_NULL); - } - ipc_mqueue_receive_results(wresult); + return wresult; } /* - * Routine: ipc_mqueue_select + * Routine: ipc_mqueue_select_on_thread * Purpose: * A receiver discovered that there was a message on the queue * before he had to block. Pick the message off the queue and - * "post" it to himself. + * "post" it to thread. * Conditions: * mqueue locked. + * thread not locked. * There is a message. * Returns: * MACH_MSG_SUCCESS Actually selected a message for ourselves. * MACH_RCV_TOO_LARGE May or may not have pull it, but it is large */ void -ipc_mqueue_select( +ipc_mqueue_select_on_thread( ipc_mqueue_t mqueue, mach_msg_option_t option, - mach_msg_size_t max_size) + mach_msg_size_t max_size, + thread_t thread) { - thread_t self = current_thread(); ipc_kmsg_t kmsg; - mach_msg_return_t mr; + mach_msg_return_t mr = MACH_MSG_SUCCESS; mach_msg_size_t rcv_size; - mr = MACH_MSG_SUCCESS; - - /* * Do some sanity checking of our ability to receive * before pulling the message off the queue. @@ -860,28 +916,79 @@ ipc_mqueue_select( * the queue, instead return the appropriate error * (and size needed). */ - rcv_size = ipc_kmsg_copyout_size(kmsg, self->map); + rcv_size = ipc_kmsg_copyout_size(kmsg, thread->map); if (rcv_size + REQUESTED_TRAILER_SIZE(option) > max_size) { mr = MACH_RCV_TOO_LARGE; if (option & MACH_RCV_LARGE) { - self->ith_kmsg = IKM_NULL; - self->ith_msize = rcv_size; - self->ith_seqno = 0; - self->ith_state = mr; + thread->ith_receiver_name = mqueue->imq_receiver_name; + thread->ith_kmsg = IKM_NULL; + thread->ith_msize = rcv_size; + thread->ith_seqno = 0; + thread->ith_state = mr; return; } } ipc_kmsg_rmqueue_first_macro(&mqueue->imq_messages, kmsg); ipc_mqueue_release_msgcount(mqueue); - self->ith_seqno = mqueue->imq_seqno++; - self->ith_kmsg = kmsg; - self->ith_state = mr; + thread->ith_seqno = mqueue->imq_seqno++; + thread->ith_kmsg = kmsg; + thread->ith_state = mr; current_task()->messages_received++; return; } +/* + * Routine: ipc_mqueue_peek + * Purpose: + * Peek at a message queue to see if it has any messages + * (in it or contained message queues for a set). + * + * Conditions: + * Locks may be held by callers, so this routine cannot block. + * Caller holds reference on the message queue. + */ +int +ipc_mqueue_peek(ipc_mqueue_t mq) +{ + wait_queue_link_t wql; + queue_t q; + spl_t s; + + if (!imq_is_set(mq)) + return (ipc_kmsg_queue_first(&mq->imq_messages) != IKM_NULL); + + /* + * Don't block trying to get the lock. + */ + s = splsched(); + if (!imq_lock_try(mq)) { + splx(s); + return -1; + } + + /* + * peek at the contained port message queues, return as soon as + * we spot a message on one of the message queues linked on the + * prepost list. + */ + q = &mq->imq_preposts; + queue_iterate(q, wql, wait_queue_link_t, wql_preposts) { + ipc_mqueue_t port_mq = (ipc_mqueue_t)wql->wql_queue; + ipc_kmsg_queue_t kmsgs = &port_mq->imq_messages; + + if (ipc_kmsg_queue_first(kmsgs) != IKM_NULL) { + imq_unlock(mq); + splx(s); + return 1; + } + } + imq_unlock(mq); + splx(s); + return 0; +} + /* * Routine: ipc_mqueue_destroy * Purpose: @@ -909,7 +1016,7 @@ ipc_mqueue_destroy( wait_queue_wakeup64_all_locked( &mqueue->imq_wait_queue, IPC_MQUEUE_FULL, - THREAD_AWAKENED, + THREAD_RESTART, FALSE); kmqueue = &mqueue->imq_messages; diff --git a/osfmk/ipc/ipc_mqueue.h b/osfmk/ipc/ipc_mqueue.h index 4ef47d969..90d3322cf 100644 --- a/osfmk/ipc/ipc_mqueue.h +++ b/osfmk/ipc/ipc_mqueue.h @@ -73,12 +73,15 @@ #include #include #include +#include #include #include #include #include +#include + typedef struct ipc_mqueue { union { struct { @@ -86,10 +89,14 @@ typedef struct ipc_mqueue { struct ipc_kmsg_queue messages; mach_port_msgcount_t msgcount; mach_port_msgcount_t qlimit; - mach_port_seqno_t seqno; + mach_port_seqno_t seqno; + mach_port_name_t receiver_name; boolean_t fullwaiters; } port; - struct wait_queue_set set_queue; + struct { + struct wait_queue_set set_queue; + mach_port_name_t local_name; + } pset; } data; } *ipc_mqueue_t; @@ -100,10 +107,13 @@ typedef struct ipc_mqueue { #define imq_msgcount data.port.msgcount #define imq_qlimit data.port.qlimit #define imq_seqno data.port.seqno +#define imq_receiver_name data.port.receiver_name #define imq_fullwaiters data.port.fullwaiters -#define imq_set_queue data.set_queue -#define imq_setlinks data.set_queue.wqs_setlinks +#define imq_set_queue data.pset.set_queue +#define imq_setlinks data.pset.set_queue.wqs_setlinks +#define imq_preposts data.pset.set_queue.wqs_preposts +#define imq_local_name data.pset.local_name #define imq_is_set(mq) wait_queue_is_set(&(mq)->imq_set_queue) #define imq_lock(mq) wait_queue_lock(&(mq)->imq_wait_queue) @@ -115,10 +125,10 @@ typedef struct ipc_mqueue { #define imq_full_kernel(mq) ((mq)->imq_msgcount >= MACH_PORT_QLIMIT_KERNEL) extern int ipc_mqueue_full; -extern int ipc_mqueue_rcv; +// extern int ipc_mqueue_rcv; #define IPC_MQUEUE_FULL CAST_EVENT64_T(&ipc_mqueue_full) -#define IPC_MQUEUE_RECEIVE CAST_EVENT64_T(&ipc_mqueue_rcv) +#define IPC_MQUEUE_RECEIVE NO_EVENT64 /* * Exported interfaces @@ -165,7 +175,8 @@ extern mach_msg_return_t ipc_mqueue_send( ipc_mqueue_t mqueue, ipc_kmsg_t kmsg, mach_msg_option_t option, - mach_msg_timeout_t timeout_val); + mach_msg_timeout_t timeout_val, + spl_t s); /* Deliver message to message queue or waiting receiver */ extern void ipc_mqueue_post( @@ -180,16 +191,30 @@ extern void ipc_mqueue_receive( mach_msg_timeout_t timeout_val, int interruptible); +/* Receive a message from a message queue using a specified thread */ +extern wait_result_t ipc_mqueue_receive_on_thread( + ipc_mqueue_t mqueue, + mach_msg_option_t option, + mach_msg_size_t max_size, + mach_msg_timeout_t rcv_timeout, + int interruptible, + thread_t thread); + /* Continuation routine for message receive */ extern void ipc_mqueue_receive_continue( void *param, wait_result_t wresult); /* Select a message from a queue and try to post it to ourself */ -extern void ipc_mqueue_select( +extern void ipc_mqueue_select_on_thread( ipc_mqueue_t mqueue, mach_msg_option_t option, - mach_msg_size_t max_size); + mach_msg_size_t max_size, + thread_t thread); + +/* Peek into a messaqe queue to see if there are messages */ +extern int ipc_mqueue_peek( + ipc_mqueue_t mqueue); /* Clear a message count reservation */ extern void ipc_mqueue_release_msgcount( diff --git a/osfmk/ipc/ipc_object.c b/osfmk/ipc/ipc_object.c index 0ca7095ee..4b1dc1c85 100644 --- a/osfmk/ipc/ipc_object.c +++ b/osfmk/ipc/ipc_object.c @@ -355,7 +355,7 @@ ipc_object_alloc( } io_lock_init(object); - *namep = (mach_port_name_t)object; + *namep = CAST_MACH_PORT_TO_NAME(object); kr = ipc_entry_alloc(space, namep, &entry); if (kr != KERN_SUCCESS) { io_free(otype, object); @@ -728,7 +728,7 @@ ipc_object_copyout( break; } - name = (mach_port_name_t)object; + name = CAST_MACH_PORT_TO_NAME(object); kr = ipc_entry_get(space, &name, &entry); if (kr != KERN_SUCCESS) { /* unlocks/locks space, so must start again */ @@ -1019,7 +1019,7 @@ struct label *io_getlabel (ipc_object_t objp) return &port->ip_label; } #endif -#if MACH_ASSERT || CONFIG_MACF_MACH + /* * Check whether the object is a port if so, free it. But * keep track of that fact. @@ -1042,9 +1042,9 @@ io_free( mac_port_label_destroy(&port->ip_label); #endif } + io_lock_destroy(object); zfree(ipc_object_zones[otype], object); } -#endif /* MACH_ASSER || MAC */ #include #if MACH_KDB @@ -1093,6 +1093,7 @@ const char *ikot_print_array[IKOT_MAX_TYPE] = { "(IOKIT_OBJECT) ", /* 30 */ "(UPL) ", "(MEM_OBJ_CONTROL) ", + "(AU_SESSIONPORT) ", /* 33 */ #if CONFIG_MACF_MACH "(LABELH) ", #endif diff --git a/osfmk/ipc/ipc_object.h b/osfmk/ipc/ipc_object.h index a3863ffa8..003707f59 100644 --- a/osfmk/ipc/ipc_object.h +++ b/osfmk/ipc/ipc_object.h @@ -87,8 +87,6 @@ typedef natural_t ipc_object_bits_t; typedef natural_t ipc_object_type_t; /* - * There is no lock in the ipc_object; it is in the enclosing kernel - * data structure (rpc_common_data) used by both ipc_port and ipc_pset. * The ipc_object is used to both tag and reference count these two data * structures, and (Noto Bene!) pointers to either of these or the * ipc_object at the head of these are freely cast back and forth; hence @@ -100,17 +98,28 @@ typedef natural_t ipc_object_type_t; * (with which lock size varies). */ struct ipc_object { + ipc_object_bits_t io_bits; ipc_object_refs_t io_references; + decl_lck_mtx_data(, io_lock_data) +}; + +/* + * If another object type needs to participate in io_kotype()-based + * dispatching, it must include a stub structure as the first + * element + */ +struct ipc_object_header { ipc_object_bits_t io_bits; - mach_port_name_t io_receiver_name; - decl_mutex_data(, io_lock_data) +#ifdef __LP64__ + natural_t io_padding; /* pad to natural boundary */ +#endif }; /* * Legacy defines. Should use IPC_OBJECT_NULL, etc... */ #define IO_NULL ((ipc_object_t) 0) -#define IO_DEAD ((ipc_object_t) -1) +#define IO_DEAD ((ipc_object_t) ~0UL) #define IO_VALID(io) (((io) != IO_NULL) && ((io) != IO_DEAD)) /* @@ -147,33 +156,24 @@ extern zone_t ipc_object_zones[IOT_NUMBER]; #define io_alloc(otype) \ ((ipc_object_t) zalloc(ipc_object_zones[(otype)])) -#if MACH_ASSERT || CONFIG_MACF_MACH -/* - * Call the routine for io_free so that checking can be performed. - */ extern void io_free( unsigned int otype, ipc_object_t object); -#else /* MACH_ASSERT || MAC_MACH */ -#define io_free(otype, io) \ - zfree(ipc_object_zones[(otype)], (io)) -#endif /* MACH_ASSERT || MAC_MACH */ - /* - * Here we depend on the ipc_object being first within the ipc_common_data, - * which is first within the rpc_common_data, which in turn must be first - * within any kernel data structure needing to lock an ipc_object + * Here we depend on the ipc_object being first within the kernel struct * (ipc_port and ipc_pset). */ #define io_lock_init(io) \ - mutex_init(&(io)->io_lock_data, 0) + lck_mtx_init(&(io)->io_lock_data, &ipc_lck_grp, &ipc_lck_attr) +#define io_lock_destroy(io) \ + lck_mtx_destroy(&(io)->io_lock_data, &ipc_lck_grp) #define io_lock(io) \ - mutex_lock(&(io)->io_lock_data) + lck_mtx_lock(&(io)->io_lock_data) #define io_lock_try(io) \ - mutex_try(&(io)->io_lock_data) + lck_mtx_try_lock(&(io)->io_lock_data) #define io_unlock(io) \ - mutex_unlock(&(io)->io_lock_data) + lck_mtx_unlock(&(io)->io_lock_data) #define _VOLATILE_ volatile diff --git a/osfmk/ipc/ipc_port.c b/osfmk/ipc/ipc_port.c index b531befff..eaa7bad40 100644 --- a/osfmk/ipc/ipc_port.c +++ b/osfmk/ipc/ipc_port.c @@ -102,8 +102,10 @@ #include -decl_mutex_data(, ipc_port_multiple_lock_data) -decl_mutex_data(, ipc_port_timestamp_lock_data) +decl_lck_mtx_data(, ipc_port_multiple_lock_data) +decl_lck_mtx_data(, ipc_port_timestamp_lock_data) +lck_mtx_ext_t ipc_port_multiple_lock_data_ext; +lck_mtx_ext_t ipc_port_timestamp_lock_data_ext; ipc_port_timestamp_t ipc_port_timestamp_data; #if MACH_ASSERT @@ -471,6 +473,7 @@ ipc_port_init( port->ip_pset_count = 0; port->ip_premsg = IKM_NULL; + port->ip_context = 0; #if MACH_ASSERT ipc_port_init_debug(port); @@ -989,7 +992,7 @@ ipc_port_copyout_send( name = MACH_PORT_NULL; } } else - name = (mach_port_name_t) sright; + name = CAST_MACH_PORT_TO_NAME(sright); return name; } @@ -1206,7 +1209,8 @@ ipc_port_dealloc_special( * deallocation is intercepted via io_free. */ queue_head_t port_alloc_queue; -decl_mutex_data(,port_alloc_queue_lock) +decl_lck_mtx_data(,port_alloc_queue_lock) +lck_mtx_ext_t port_alloc_queue_lock_ext; unsigned long port_count = 0; unsigned long port_count_warning = 20000; @@ -1230,7 +1234,7 @@ void ipc_port_debug_init(void) { queue_init(&port_alloc_queue); - mutex_init(&port_alloc_queue_lock, 0); + lck_mtx_init_ext(&port_alloc_queue_lock, &port_alloc_queue_lock_ext, &ipc_lck_grp, &ipc_lck_attr); } @@ -1259,12 +1263,12 @@ ipc_port_init_debug( machine_callstack(&port->ip_callstack[0], IP_CALLSTACK_MAX); #if 0 - mutex_lock(&port_alloc_queue_lock); + lck_mtx_lock(&port_alloc_queue_lock); ++port_count; if (port_count_warning > 0 && port_count >= port_count_warning) assert(port_count < port_count_warning); queue_enter(&port_alloc_queue, port, ipc_port_t, ip_port_links); - mutex_unlock(&port_alloc_queue_lock); + lck_mtx_unlock(&port_alloc_queue_lock); #endif } @@ -1285,11 +1289,11 @@ void ipc_port_track_dealloc( ipc_port_t port) { - mutex_lock(&port_alloc_queue_lock); + lck_mtx_lock(&port_alloc_queue_lock); assert(port_count > 0); --port_count; queue_remove(&port_alloc_queue, port, ipc_port_t, ip_port_links); - mutex_unlock(&port_alloc_queue_lock); + lck_mtx_unlock(&port_alloc_queue_lock); } #endif diff --git a/osfmk/ipc/ipc_port.h b/osfmk/ipc/ipc_port.h index 6b409dc2d..7249fe96a 100644 --- a/osfmk/ipc/ipc_port.h +++ b/osfmk/ipc/ipc_port.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2008 Apple Computer, Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -72,6 +72,8 @@ #ifndef _IPC_IPC_PORT_H_ #define _IPC_IPC_PORT_H_ +#if MACH_KERNEL_PRIVATE + #include #include #include @@ -111,10 +113,12 @@ typedef unsigned int ipc_port_timestamp_t; struct ipc_port { /* - * Initial sub-structure in common with ipc_pset and rpc_port - * First element is an ipc_object + * Initial sub-structure in common with ipc_pset + * First element is an ipc_object second is a + * message queue */ struct ipc_object ip_object; + struct ipc_mqueue ip_messages; union { struct ipc_space *receiver; @@ -132,8 +136,8 @@ struct ipc_port { struct ipc_port_request *ip_dnrequests; unsigned int ip_pset_count; - struct ipc_mqueue ip_messages; struct ipc_kmsg *ip_premsg; + mach_vm_address_t ip_context; #if NORMA_VM /* @@ -154,22 +158,23 @@ struct ipc_port { natural_t ip_callstack[IP_CALLSTACK_MAX]; /* stack trace */ unsigned long ip_spares[IP_NSPARES]; /* for debugging */ #endif /* MACH_ASSERT */ - int alias; + uintptr_t alias; -//#if MAC +#if CONFIG_MACF_MACH struct label ip_label; -//#endif +#endif }; #define ip_references ip_object.io_references #define ip_bits ip_object.io_bits -#define ip_receiver_name ip_object.io_receiver_name #define ip_receiver data.receiver #define ip_destination data.destination #define ip_timestamp data.timestamp +#define ip_receiver_name ip_messages.imq_receiver_name + #define IP_NULL IPC_PORT_NULL #define IP_DEAD IPC_PORT_DEAD #define IP_VALID(port) IPC_PORT_VALID(port) @@ -229,22 +234,26 @@ struct ipc_port_request { #define ipr_soright notify.port #define ipr_name name.name +extern lck_grp_t ipc_lck_grp; +extern lck_attr_t ipc_lck_attr; + /* * Taking the ipc_port_multiple lock grants the privilege * to lock multiple ports at once. No ports must locked * when it is taken. */ -decl_mutex_data(extern,ipc_port_multiple_lock_data) +decl_lck_mtx_data(extern,ipc_port_multiple_lock_data) +extern lck_mtx_ext_t ipc_port_multiple_lock_data_ext; #define ipc_port_multiple_lock_init() \ - mutex_init(&ipc_port_multiple_lock_data, 0) + lck_mtx_init_ext(&ipc_port_multiple_lock_data, &ipc_port_multiple_lock_data_ext, &ipc_lck_grp, &ipc_lck_attr) #define ipc_port_multiple_lock() \ - mutex_lock(&ipc_port_multiple_lock_data) + lck_mtx_lock(&ipc_port_multiple_lock_data) #define ipc_port_multiple_unlock() \ - mutex_unlock(&ipc_port_multiple_lock_data) + lck_mtx_unlock(&ipc_port_multiple_lock_data) /* * The port timestamp facility provides timestamps @@ -252,17 +261,19 @@ decl_mutex_data(extern,ipc_port_multiple_lock_data) * mach_port_names with port death. */ -decl_mutex_data(extern,ipc_port_timestamp_lock_data) +decl_lck_mtx_data(extern,ipc_port_timestamp_lock_data) +extern lck_mtx_ext_t ipc_port_timestamp_lock_data_ext; + extern ipc_port_timestamp_t ipc_port_timestamp_data; #define ipc_port_timestamp_lock_init() \ - mutex_init(&ipc_port_timestamp_lock_data, 0) + lck_mtx_init_ext(&ipc_port_timestamp_lock_data, &ipc_port_timestamp_lock_data_ext, &ipc_lck_grp, &ipc_lck_attr) #define ipc_port_timestamp_lock() \ - mutex_lock(&ipc_port_timestamp_lock_data) + lck_mtx_lock(&ipc_port_timestamp_lock_data) #define ipc_port_timestamp_unlock() \ - mutex_unlock(&ipc_port_timestamp_lock_data) + lck_mtx_unlock(&ipc_port_timestamp_lock_data) /* Retrieve a port timestamp value */ extern ipc_port_timestamp_t ipc_port_timestamp(void); @@ -400,10 +411,18 @@ extern mach_port_name_t ipc_port_copyout_send( ipc_port_t sright, ipc_space_t space); +#endif /* MACH_KERNEL_PRIVATE */ + +#if KERNEL_PRIVATE + /* Release a (valid) naked send right */ extern void ipc_port_release_send( ipc_port_t port); +#endif /* KERNEL_PRIVATE */ + +#if MACH_KERNEL_PRIVATE + /* Make a naked send-once right from a receive right */ extern ipc_port_t ipc_port_make_sonce( ipc_port_t port); @@ -450,4 +469,6 @@ extern void ipc_port_debug_init(void); #define ipc_port_release(port) \ ipc_object_release(&(port)->ip_object) +#endif /* MACH_KERNEL_PRIVATE */ + #endif /* _IPC_IPC_PORT_H_ */ diff --git a/osfmk/ipc/ipc_pset.c b/osfmk/ipc/ipc_pset.c index bf805af55..4e0dd2b68 100644 --- a/osfmk/ipc/ipc_pset.c +++ b/osfmk/ipc/ipc_pset.c @@ -76,6 +76,9 @@ #include #include + +#include + /* * Routine: ipc_pset_alloc * Purpose: @@ -282,18 +285,246 @@ ipc_pset_destroy( */ ipc_mqueue_remove_all(&pset->ips_messages); + /* + * Set all waiters on the portset running to + * discover the change. + */ s = splsched(); imq_lock(&pset->ips_messages); ipc_mqueue_changed(&pset->ips_messages); imq_unlock(&pset->ips_messages); splx(s); - /* XXXX Perhaps ought to verify ips_thread_pool is empty */ - ips_release(pset); /* consume the ref our caller gave us */ ips_check_unlock(pset); } +/* Kqueue EVFILT_MACHPORT support */ + +#include + +static int filt_machportattach(struct knote *kn); +static void filt_machportdetach(struct knote *kn); +static int filt_machport(struct knote *kn, long hint); +static void filt_machporttouch(struct knote *kn, struct kevent64_s *kev, long type); +static int filt_machportpeek(struct knote *kn); +struct filterops machport_filtops = { + .f_attach = filt_machportattach, + .f_detach = filt_machportdetach, + .f_event = filt_machport, + .f_touch = filt_machporttouch, + .f_peek = filt_machportpeek, +}; + +static int +filt_machportattach( + struct knote *kn) +{ + mach_port_name_t name = (mach_port_name_t)kn->kn_kevent.ident; + ipc_pset_t pset = IPS_NULL; + int result = ENOSYS; + kern_return_t kr; + + kr = ipc_object_translate(current_space(), name, + MACH_PORT_RIGHT_PORT_SET, + (ipc_object_t *)&pset); + if (kr != KERN_SUCCESS) { + result = (kr == KERN_INVALID_NAME ? ENOENT : ENOTSUP); + goto done; + } + /* We've got a lock on pset */ + + /* keep a reference for the knote */ + kn->kn_ptr.p_pset = pset; + ips_reference(pset); + + /* + * Bind the portset wait queue directly to knote/kqueue. + * This allows us to just use wait_queue foo to effect a wakeup, + * rather than having to call knote() from the Mach code on each + * message. + */ + result = knote_link_wait_queue(kn, &pset->ips_messages.imq_wait_queue); + ips_unlock(pset); +done: + return result; +} + +static void +filt_machportdetach( + struct knote *kn) +{ + ipc_pset_t pset = kn->kn_ptr.p_pset; + + /* + * Unlink the portset wait queue from knote/kqueue, + * and release our reference on the portset. + */ + ips_lock(pset); + knote_unlink_wait_queue(kn, &pset->ips_messages.imq_wait_queue); + ips_release(kn->kn_ptr.p_pset); + kn->kn_ptr.p_pset = IPS_NULL; + ips_check_unlock(pset); +} + +static int +filt_machport( + struct knote *kn, + __unused long hint) +{ + mach_port_name_t name = (mach_port_name_t)kn->kn_kevent.ident; + ipc_pset_t pset = IPS_NULL; + wait_result_t wresult; + thread_t self = current_thread(); + kern_return_t kr; + mach_msg_option_t option; + mach_msg_size_t size; + + /* never called from below */ + assert(hint == 0); + + /* + * called from user context. Have to validate the + * name. If it changed, we have an EOF situation. + */ + kr = ipc_object_translate(current_space(), name, + MACH_PORT_RIGHT_PORT_SET, + (ipc_object_t *)&pset); + if (kr != KERN_SUCCESS || pset != kn->kn_ptr.p_pset || !ips_active(pset)) { + kn->kn_data = 0; + kn->kn_flags |= (EV_EOF | EV_ONESHOT); + if (pset != IPS_NULL) + ips_check_unlock(pset); + return(1); + } + + /* just use the reference from here on out */ + ips_reference(pset); + ips_unlock(pset); + + /* + * Only honor supported receive options. If no options are + * provided, just force a MACH_RCV_TOO_LARGE to detect the + * name of the port and sizeof the waiting message. + */ + option = kn->kn_sfflags & (MACH_RCV_MSG|MACH_RCV_LARGE|MACH_RCV_TRAILER_MASK); + if (option & MACH_RCV_MSG) { + self->ith_msg_addr = (mach_vm_address_t) kn->kn_ext[0]; + size = (mach_msg_size_t)kn->kn_ext[1]; + } else { + option = MACH_RCV_LARGE; + self->ith_msg_addr = 0; + size = 0; + } + + /* + * Set up to receive a message or the notification of a + * too large message. But never allow this call to wait. + * If the user provided aditional options, like trailer + * options, pass those through here. But we don't support + * scatter lists through this interface. + */ + self->ith_object = (ipc_object_t)pset; + self->ith_msize = size; + self->ith_option = option; + self->ith_scatter_list_size = 0; + self->ith_receiver_name = MACH_PORT_NULL; + self->ith_continuation = NULL; + option |= MACH_RCV_TIMEOUT; // never wait + assert((self->ith_state = MACH_RCV_IN_PROGRESS) == MACH_RCV_IN_PROGRESS); + + wresult = ipc_mqueue_receive_on_thread( + &pset->ips_messages, + option, + size, /* max_size */ + 0, /* immediate timeout */ + THREAD_INTERRUPTIBLE, + self); + assert(wresult == THREAD_NOT_WAITING); + assert(self->ith_state != MACH_RCV_IN_PROGRESS); + + /* + * If we timed out, just release the reference on the + * portset and return zero. + */ + if (self->ith_state == MACH_RCV_TIMED_OUT) { + ipc_pset_release(pset); + return 0; + } + + /* + * If we weren't attempting to receive a message + * directly, we need to return the port name in + * the kevent structure. + */ + if ((option & MACH_RCV_MSG) != MACH_RCV_MSG) { + assert(self->ith_state == MACH_RCV_TOO_LARGE); + assert(self->ith_kmsg == IKM_NULL); + kn->kn_data = self->ith_receiver_name; + ipc_pset_release(pset); + return 1; + } + + /* + * Attempt to receive the message directly, returning + * the results in the fflags field. + */ + assert(option & MACH_RCV_MSG); + kn->kn_data = MACH_PORT_NULL; + kn->kn_ext[1] = self->ith_msize; + kn->kn_fflags = mach_msg_receive_results(); + /* kmsg and pset reference consumed */ + return 1; +} + +static void +filt_machporttouch(struct knote *kn, struct kevent64_s *kev, long type) +{ + switch (type) { + case EVENT_REGISTER: + kn->kn_sfflags = kev->fflags; + kn->kn_sdata = kev->data; + break; + case EVENT_PROCESS: + *kev = kn->kn_kevent; + if (kn->kn_flags & EV_CLEAR) { + kn->kn_data = 0; + kn->kn_fflags = 0; + } + break; + default: + panic("filt_machporttouch() - invalid type (%ld)", type); + break; + } +} + +/* + * Peek to see if the portset associated with the knote has any + * events. This pre-hook is called when a filter uses the stay- + * on-queue mechanism (as the knote_link_wait_queue mechanism + * does). + * + * This is called with the kqueue that the knote belongs to still + * locked (thus holding a reference on the knote, but restricting + * also restricting our ability to take other locks). + * + * Just peek at the pre-post status of the portset's wait queue + * to determine if it has anything interesting. We can do it + * without holding the lock, as it is just a snapshot in time + * (if this is used as part of really waiting for events, we + * will catch changes in this status when the event gets posted + * up to the knote's kqueue). + */ +static int +filt_machportpeek(struct knote *kn) +{ + ipc_pset_t pset = kn->kn_ptr.p_pset; + ipc_mqueue_t set_mq = &pset->ips_messages; + + return (ipc_mqueue_peek(set_mq)); +} + + #include #if MACH_KDB diff --git a/osfmk/ipc/ipc_pset.h b/osfmk/ipc/ipc_pset.h index c575a71ad..26c1f26c0 100644 --- a/osfmk/ipc/ipc_pset.h +++ b/osfmk/ipc/ipc_pset.h @@ -86,7 +86,7 @@ struct ipc_pset { }; #define ips_references ips_object.io_references -#define ips_local_name ips_object.io_receiver_name +#define ips_local_name ips_messages.imq_local_name #define ips_active(pset) io_active(&(pset)->ips_object) diff --git a/osfmk/ipc/ipc_space.h b/osfmk/ipc/ipc_space.h index 792fe6894..39c2e45a4 100644 --- a/osfmk/ipc/ipc_space.h +++ b/osfmk/ipc/ipc_space.h @@ -111,10 +111,10 @@ typedef natural_t ipc_space_refs_t; struct ipc_space { - decl_mutex_data(,is_ref_lock_data) + decl_lck_mtx_data(,is_ref_lock_data) ipc_space_refs_t is_references; - decl_mutex_data(,is_lock_data) + decl_lck_mtx_data(,is_lock_data) boolean_t is_active; /* is the space alive? */ boolean_t is_growing; /* is the space growing? */ ipc_entry_t is_table; /* an array of entries */ @@ -147,43 +147,50 @@ extern ipc_space_t default_pager_space; #define is_fast_space(is) ((is)->is_fast) -#define is_ref_lock_init(is) mutex_init(&(is)->is_ref_lock_data, 0) +#define is_ref_lock_init(is) lck_mtx_init(&(is)->is_ref_lock_data, &ipc_lck_grp, &ipc_lck_attr) +#define is_ref_lock_destroy(is) lck_mtx_destroy(&(is)->is_ref_lock_data, &ipc_lck_grp) #define ipc_space_reference_macro(is) \ MACRO_BEGIN \ - mutex_lock(&(is)->is_ref_lock_data); \ + lck_mtx_lock(&(is)->is_ref_lock_data); \ assert((is)->is_references > 0); \ (is)->is_references++; \ - mutex_unlock(&(is)->is_ref_lock_data); \ + lck_mtx_unlock(&(is)->is_ref_lock_data); \ MACRO_END #define ipc_space_release_macro(is) \ MACRO_BEGIN \ ipc_space_refs_t _refs; \ \ - mutex_lock(&(is)->is_ref_lock_data); \ + lck_mtx_lock(&(is)->is_ref_lock_data); \ assert((is)->is_references > 0); \ _refs = --(is)->is_references; \ - mutex_unlock(&(is)->is_ref_lock_data); \ + lck_mtx_unlock(&(is)->is_ref_lock_data); \ \ - if (_refs == 0) \ + if (_refs == 0) { \ + is_lock_destroy(is); \ + is_ref_lock_destroy(is); \ is_free(is); \ + } \ MACRO_END -#define is_lock_init(is) mutex_init(&(is)->is_lock_data, 0) - -#define is_read_lock(is) mutex_lock(&(is)->is_lock_data) -#define is_read_unlock(is) mutex_unlock(&(is)->is_lock_data) -#define is_read_sleep(is) thread_sleep_mutex((event_t)(is), \ - &(is)->is_lock_data, \ - THREAD_UNINT) - -#define is_write_lock(is) mutex_lock(&(is)->is_lock_data) -#define is_write_lock_try(is) mutex_try(&(is)->is_lock_data) -#define is_write_unlock(is) mutex_unlock(&(is)->is_lock_data) -#define is_write_sleep(is) thread_sleep_mutex((event_t)(is), \ - &(is)->is_lock_data, \ - THREAD_UNINT) +#define is_lock_init(is) lck_mtx_init(&(is)->is_lock_data, &ipc_lck_grp, &ipc_lck_attr) +#define is_lock_destroy(is) lck_mtx_destroy(&(is)->is_lock_data, &ipc_lck_grp) + +#define is_read_lock(is) lck_mtx_lock(&(is)->is_lock_data) +#define is_read_unlock(is) lck_mtx_unlock(&(is)->is_lock_data) +#define is_read_sleep(is) lck_mtx_sleep(&(is)->is_lock_data, \ + LCK_SLEEP_DEFAULT, \ + (event_t)(is), \ + THREAD_UNINT) + +#define is_write_lock(is) lck_mtx_lock(&(is)->is_lock_data) +#define is_write_lock_try(is) lck_mtx_try_lock(&(is)->is_lock_data) +#define is_write_unlock(is) lck_mtx_unlock(&(is)->is_lock_data) +#define is_write_sleep(is) lck_mtx_sleep(&(is)->is_lock_data, \ + LCK_SLEEP_DEFAULT, \ + (event_t)(is), \ + THREAD_UNINT) #define is_reference(is) ipc_space_reference(is) #define is_release(is) ipc_space_release(is) diff --git a/osfmk/ipc/ipc_table.c b/osfmk/ipc/ipc_table.c index 090bc78b2..bfbac619e 100644 --- a/osfmk/ipc/ipc_table.c +++ b/osfmk/ipc/ipc_table.c @@ -111,7 +111,7 @@ ipc_table_fill( (index < num) && (size < PAGE_SIZE); size <<= 1) { if (size >= minsize) { - its[index].its_size = size / elemsize; + its[index].its_size = (ipc_table_elems_t)(size / elemsize); index++; } } @@ -125,7 +125,7 @@ ipc_table_fill( (period < 15) && (index < num); period++, size += incrsize) { if (size >= minsize) { - its[index].its_size = size / elemsize; + its[index].its_size = (ipc_table_elems_t)(size / elemsize); index++; } } diff --git a/osfmk/ipc/mach_debug.c b/osfmk/ipc/mach_debug.c index ed8745e3f..04442f1fd 100644 --- a/osfmk/ipc/mach_debug.c +++ b/osfmk/ipc/mach_debug.c @@ -329,7 +329,7 @@ mach_port_space_info( if (entry->ie_request) iin->iin_type |= MACH_PORT_TYPE_DNREQUEST; iin->iin_urefs = IE_BITS_UREFS(bits); - iin->iin_object = (vm_offset_t) entry->ie_object; + iin->iin_object = (natural_t)(uintptr_t)entry->ie_object; iin->iin_next = entry->ie_next; iin->iin_hash = entry->ie_index; } @@ -352,7 +352,7 @@ mach_port_space_info( if (entry->ie_request) iin->iin_type |= MACH_PORT_TYPE_DNREQUEST; iin->iin_urefs = IE_BITS_UREFS(bits); - iin->iin_object = (vm_offset_t) entry->ie_object; + iin->iin_object = (natural_t)(uintptr_t)entry->ie_object; iin->iin_next = entry->ie_next; iin->iin_hash = entry->ie_index; @@ -482,10 +482,13 @@ mach_port_dnrequest_info( #endif /* MACH_IPC_DEBUG */ /* - * Routine: mach_port_kernel_object [kernel call] + * Routine: mach_port_kobject [kernel call] * Purpose: * Retrieve the type and address of the kernel object - * represented by a send or receive right. + * represented by a send or receive right. Returns + * the kernel address in a mach_vm_address_t to + * mask potential differences in kernel address space + * size. * Conditions: * Nothing locked. * Returns: @@ -499,21 +502,21 @@ mach_port_dnrequest_info( #if !MACH_IPC_DEBUG kern_return_t -mach_port_kernel_object( +mach_port_kobject( __unused ipc_space_t space, __unused mach_port_name_t name, - __unused unsigned int *typep, - __unused vm_offset_t *addrp) + __unused natural_t *typep, + __unused mach_vm_address_t *addrp) { return KERN_FAILURE; } #else kern_return_t -mach_port_kernel_object( +mach_port_kobject( ipc_space_t space, mach_port_name_t name, - unsigned int *typep, - vm_offset_t *addrp) + natural_t *typep, + mach_vm_address_t *addrp) { ipc_entry_t entry; ipc_port_t port; @@ -544,9 +547,53 @@ mach_port_kernel_object( } *typep = (unsigned int) ip_kotype(port); - *addrp = (vm_offset_t) port->ip_kobject; + *addrp = (mach_vm_address_t)port->ip_kobject; ip_unlock(port); return KERN_SUCCESS; } #endif /* MACH_IPC_DEBUG */ +/* + * Routine: mach_port_kernel_object [Legacy kernel call] + * Purpose: + * Retrieve the type and address of the kernel object + * represented by a send or receive right. Hard-coded + * to return only the low-order 32-bits of the kernel + * object. + * Conditions: + * Nothing locked. + * Returns: + * KERN_SUCCESS Retrieved kernel object info. + * KERN_INVALID_TASK The space is null. + * KERN_INVALID_TASK The space is dead. + * KERN_INVALID_NAME The name doesn't denote a right. + * KERN_INVALID_RIGHT Name doesn't denote + * send or receive rights. + */ + +#if !MACH_IPC_DEBUG +kern_return_t +mach_port_kernel_object( + __unused ipc_space_t space, + __unused mach_port_name_t name, + __unused unsigned int *typep, + __unused unsigned int *addrp) +{ + return KERN_FAILURE; +} +#else +kern_return_t +mach_port_kernel_object( + ipc_space_t space, + mach_port_name_t name, + unsigned int *typep, + unsigned int *addrp) +{ + mach_vm_address_t addr = 0; + kern_return_t kr; + + kr = mach_port_kobject(space, name, typep, &addr); + *addrp = (unsigned int) addr; + return kr; +} +#endif /* MACH_IPC_DEBUG */ diff --git a/osfmk/ipc/mach_msg.c b/osfmk/ipc/mach_msg.c index 8d3f17e4b..8137915f0 100644 --- a/osfmk/ipc/mach_msg.c +++ b/osfmk/ipc/mach_msg.c @@ -136,8 +136,6 @@ mach_msg_return_t mach_msg_receive( mach_msg_size_t slist_size); -mach_msg_return_t mach_msg_receive_results(void); - mach_msg_return_t msg_receive_error( ipc_kmsg_t kmsg, mach_vm_address_t msg_addr, @@ -316,16 +314,18 @@ mach_msg_receive_results(void) round_msg(kmsg->ikm_header->msgh_size)); if (option & MACH_RCV_TRAILER_MASK) { trailer->msgh_seqno = seqno; + trailer->msgh_context = + kmsg->ikm_header->msgh_remote_port->ip_context; trailer->msgh_trailer_size = REQUESTED_TRAILER_SIZE(option); - - if (option & MACH_RCV_TRAILER_ELEMENTS (MACH_RCV_TRAILER_AV)) { + if (MACH_RCV_TRAILER_ELEMENTS(option) >= + MACH_RCV_TRAILER_ELEMENTS(MACH_RCV_TRAILER_AV)){ #if CONFIG_MACF_MACH if (kmsg->ikm_sender != NULL && IP_VALID(kmsg->ikm_header->msgh_remote_port) && mac_port_check_method(kmsg->ikm_sender, &kmsg->ikm_sender->maclabel, - &((ipc_port_t)kmsg->ikm_header->msgh_remote_port)->ip_label, + &kmsg->ikm_header->msgh_remote_port->ip_label, kmsg->ikm_header->msgh_id) == 0) trailer->msgh_ad = 1; else @@ -452,185 +452,6 @@ mach_msg_receive_continue(void) (*self->ith_continuation)(mach_msg_receive_results()); } -/* - * Toggle this to compile the hotpath in/out - * If compiled in, the run-time toggle "enable_hotpath" below - * eases testing & debugging - */ -#define ENABLE_HOTPATH 1 /* Hacked on for now */ - -#if ENABLE_HOTPATH -/* - * These counters allow tracing of hotpath behavior under test loads. - * A couple key counters are unconditional (see below). - */ -#define HOTPATH_DEBUG 0 /* Toggle to include lots of counters */ -#if HOTPATH_DEBUG -#define HOT(expr) expr - -unsigned int c_mmot_FIRST = 0; /* Unused First Counter */ -unsigned int c_mmot_combined_S_R = 0; /* hotpath candidates */ -unsigned int c_mach_msg_trap_switch_fast = 0; /* hotpath successes */ -unsigned int c_mmot_kernel_send = 0; /* kernel server */ -unsigned int c_mmot_cold_000 = 0; /* see below ... */ -unsigned int c_mmot_smallsendsize = 0; -unsigned int c_mmot_oddsendsize = 0; -unsigned int c_mmot_bigsendsize = 0; -unsigned int c_mmot_copyinmsg_fail = 0; -unsigned int c_mmot_g_slow_copyin3 = 0; -unsigned int c_mmot_cold_006 = 0; -unsigned int c_mmot_cold_007 = 0; -unsigned int c_mmot_cold_008 = 0; -unsigned int c_mmot_cold_009 = 0; -unsigned int c_mmot_cold_010 = 0; -unsigned int c_mmot_cold_012 = 0; -unsigned int c_mmot_cold_013 = 0; -unsigned int c_mmot_cold_014 = 0; -unsigned int c_mmot_cold_016 = 0; -unsigned int c_mmot_cold_018 = 0; -unsigned int c_mmot_cold_019 = 0; -unsigned int c_mmot_cold_020 = 0; -unsigned int c_mmot_cold_021 = 0; -unsigned int c_mmot_cold_022 = 0; -unsigned int c_mmot_cold_023 = 0; -unsigned int c_mmot_cold_024 = 0; -unsigned int c_mmot_cold_025 = 0; -unsigned int c_mmot_cold_026 = 0; -unsigned int c_mmot_cold_027 = 0; -unsigned int c_mmot_hot_fSR_ok = 0; -unsigned int c_mmot_cold_029 = 0; -unsigned int c_mmot_cold_030 = 0; -unsigned int c_mmot_cold_031 = 0; -unsigned int c_mmot_cold_032 = 0; -unsigned int c_mmot_cold_033 = 0; -unsigned int c_mmot_bad_rcvr = 0; -unsigned int c_mmot_rcvr_swapped = 0; -unsigned int c_mmot_rcvr_locked = 0; -unsigned int c_mmot_rcvr_tswapped = 0; -unsigned int c_mmot_rcvr_freed = 0; -unsigned int c_mmot_g_slow_copyout6 = 0; -unsigned int c_mmot_g_slow_copyout5 = 0; -unsigned int c_mmot_cold_037 = 0; -unsigned int c_mmot_cold_038 = 0; -unsigned int c_mmot_cold_039 = 0; -unsigned int c_mmot_g_slow_copyout4 = 0; -unsigned int c_mmot_g_slow_copyout3 = 0; -unsigned int c_mmot_hot_ok1 = 0; -unsigned int c_mmot_hot_ok2 = 0; -unsigned int c_mmot_hot_ok3 = 0; -unsigned int c_mmot_g_slow_copyout1 = 0; -unsigned int c_mmot_g_slow_copyout2 = 0; -unsigned int c_mmot_getback_fast_copyin = 0; -unsigned int c_mmot_cold_048 = 0; -unsigned int c_mmot_getback_FastSR = 0; -unsigned int c_mmot_cold_050 = 0; -unsigned int c_mmot_cold_051 = 0; -unsigned int c_mmot_cold_052 = 0; -unsigned int c_mmot_cold_053 = 0; -unsigned int c_mmot_fastkernelreply = 0; -unsigned int c_mmot_cold_055 = 0; -unsigned int c_mmot_getback_fast_put = 0; -unsigned int c_mmot_LAST = 0; /* End Marker - Unused */ - -void db_mmot_zero_counters(void); /* forward; */ -void db_mmot_show_counters(void); /* forward; */ - -void /* Call from the debugger to clear all counters */ -db_mmot_zero_counters(void) -{ - register unsigned int *ip = &c_mmot_FIRST; - while (ip <= &c_mmot_LAST) - *ip++ = 0; -} - -void /* Call from the debugger to show all counters */ -db_mmot_show_counters(void) -{ -#define xx(str) printf("%s: %d\n", # str, str); - - xx(c_mmot_combined_S_R); - xx(c_mach_msg_trap_switch_fast); - xx(c_mmot_kernel_send); - xx(c_mmot_cold_000); - xx(c_mmot_smallsendsize); - xx(c_mmot_oddsendsize); - xx(c_mmot_bigsendsize); - xx(c_mmot_copyinmsg_fail); - xx(c_mmot_g_slow_copyin3); - xx(c_mmot_cold_006); - xx(c_mmot_cold_007); - xx(c_mmot_cold_008); - xx(c_mmot_cold_009); - xx(c_mmot_cold_010); - xx(c_mmot_cold_012); - xx(c_mmot_cold_013); - xx(c_mmot_cold_014); - xx(c_mmot_cold_016); - xx(c_mmot_cold_018); - xx(c_mmot_cold_019); - xx(c_mmot_cold_020); - xx(c_mmot_cold_021); - xx(c_mmot_cold_022); - xx(c_mmot_cold_023); - xx(c_mmot_cold_024); - xx(c_mmot_cold_025); - xx(c_mmot_cold_026); - xx(c_mmot_cold_027); - xx(c_mmot_hot_fSR_ok); - xx(c_mmot_cold_029); - xx(c_mmot_cold_030); - xx(c_mmot_cold_031); - xx(c_mmot_cold_032); - xx(c_mmot_cold_033); - xx(c_mmot_bad_rcvr); - xx(c_mmot_rcvr_swapped); - xx(c_mmot_rcvr_locked); - xx(c_mmot_rcvr_tswapped); - xx(c_mmot_rcvr_freed); - xx(c_mmot_g_slow_copyout6); - xx(c_mmot_g_slow_copyout5); - xx(c_mmot_cold_037); - xx(c_mmot_cold_038); - xx(c_mmot_cold_039); - xx(c_mmot_g_slow_copyout4); - xx(c_mmot_g_slow_copyout3); - xx(c_mmot_g_slow_copyout1); - xx(c_mmot_hot_ok3); - xx(c_mmot_hot_ok2); - xx(c_mmot_hot_ok1); - xx(c_mmot_g_slow_copyout2); - xx(c_mmot_getback_fast_copyin); - xx(c_mmot_cold_048); - xx(c_mmot_getback_FastSR); - xx(c_mmot_cold_050); - xx(c_mmot_cold_051); - xx(c_mmot_cold_052); - xx(c_mmot_cold_053); - xx(c_mmot_fastkernelreply); - xx(c_mmot_cold_055); - xx(c_mmot_getback_fast_put); - -#undef xx -} - -#else /* !HOTPATH_DEBUG */ - -/* - * Duplicate just these few so we can always do a quick sanity check - */ -unsigned int c_mmot_combined_S_R = 0; /* hotpath candidates */ -unsigned int c_mach_msg_trap_switch_fast = 0; /* hotpath successes */ -unsigned int c_mmot_kernel_send = 0; /* kernel server calls */ -#define HOT(expr) /* no optional counters */ - -#endif /* !HOTPATH_DEBUG */ - -#if CONFIG_MACF_MACH -boolean_t enable_hotpath = FALSE; /* XXX - push MAC into HOTPATH too */ -#else -boolean_t enable_hotpath = TRUE; /* Patchable, just in case ... */ -#endif -#endif /* HOTPATH_ENABLE */ /* * Routine: mach_msg_overwrite_trap [mach trap] @@ -658,1183 +479,10 @@ mach_msg_overwrite_trap( __unused mach_port_seqno_t temp_seqno = 0; mach_msg_return_t mr = MACH_MSG_SUCCESS; -#if ENABLE_HOTPATH - /* mask out some of the options before entering the hot path */ - mach_msg_option_t masked_option = - option & ~(MACH_SEND_TRAILER|MACH_RCV_TRAILER_MASK|MACH_RCV_LARGE); - register mach_msg_header_t *hdr; - - if ((masked_option == (MACH_SEND_MSG|MACH_RCV_MSG)) && enable_hotpath) { - thread_t self = current_thread(); - mach_msg_format_0_trailer_t *trailer; - ipc_space_t space = self->task->itk_space; - ipc_kmsg_t kmsg; - register ipc_port_t dest_port; - ipc_object_t rcv_object; - ipc_mqueue_t rcv_mqueue; - mach_msg_size_t reply_size; - - c_mmot_combined_S_R++; - - /* - * This case is divided into ten sections, each - * with a label. There are five optimized - * sections and six unoptimized sections, which - * do the same thing but handle all possible - * cases and are slower. - * - * The five sections for an RPC are - * 1) Get request message into a buffer. - * 2) Copyin request message and rcv_name. - * (fast_copyin or slow_copyin) - * 3) Enqueue request and dequeue reply. - * (fast_send_receive or - * slow_send and slow_receive) - * 4) Copyout reply message. - * (fast_copyout or slow_copyout) - * 5) Put reply message to user's buffer. - * - * Keep the locking hierarchy firmly in mind. - * (First spaces, then ports, then port sets, - * then message queues.) Only a non-blocking - * attempt can be made to acquire locks out of - * order, or acquire two locks on the same level. - * Acquiring two locks on the same level will - * fail if the objects are really the same, - * unless simple locking is disabled. This is OK, - * because then the extra unlock does nothing. - * - * There are two major reasons these RPCs can't use - * ipc_thread_switch, and use slow_send/slow_receive: - * 1) Kernel RPCs. - * 2) Servers fall behind clients, so - * client doesn't find a blocked server thread and - * server finds waiting messages and can't block. - */ - - mr = ipc_kmsg_get(msg_addr, send_size, &kmsg); - if (mr != KERN_SUCCESS) { - return mr; - } - hdr = kmsg->ikm_header; - trailer = (mach_msg_format_0_trailer_t *) ((vm_offset_t) hdr + - send_size); - - /* - * fast_copyin: - * - * optimized ipc_kmsg_copyin/ipc_mqueue_copyin - * - * We have the request message data in kmsg. - * Must still do copyin, send, receive, etc. - * - * If the message isn't simple, we can't combine - * ipc_kmsg_copyin_header and ipc_mqueue_copyin, - * because copyin of the message body might - * affect rcv_name. - */ - - switch (hdr->msgh_bits) { - case MACH_MSGH_BITS(MACH_MSG_TYPE_COPY_SEND, - MACH_MSG_TYPE_MAKE_SEND_ONCE): { - register ipc_entry_t table; - register ipc_entry_num_t size; - register ipc_port_t reply_port; - - /* sending a request message */ - - { - register mach_port_index_t index; - register mach_port_gen_t gen; - - { - register mach_port_name_t reply_name = - (mach_port_name_t)hdr->msgh_local_port; - - if (reply_name != rcv_name) { - HOT(c_mmot_g_slow_copyin3++); - goto slow_copyin; - } - - /* optimized ipc_entry_lookup of reply_name */ - - index = MACH_PORT_INDEX(reply_name); - gen = MACH_PORT_GEN(reply_name); - - is_read_lock(space); - assert(space->is_active); - - size = space->is_table_size; - table = space->is_table; - - { - register ipc_entry_t entry; - register ipc_entry_bits_t bits; - - if (index < size) { - entry = &table[index]; - bits = entry->ie_bits; - if (IE_BITS_GEN(bits) != gen || - (bits & IE_BITS_COLLISION)) { - entry = IE_NULL; - } - } else { - entry = IE_NULL; - bits = 0; - } - if (entry == IE_NULL) { - entry = ipc_entry_lookup(space, reply_name); - if (entry == IE_NULL) { - HOT(c_mmot_cold_006++); - goto abort_request_copyin; - } - bits = entry->ie_bits; - } - - /* check type bit */ - - if (! (bits & MACH_PORT_TYPE_RECEIVE)) { - HOT(c_mmot_cold_007++); - goto abort_request_copyin; - } - - reply_port = (ipc_port_t) entry->ie_object; - assert(reply_port != IP_NULL); - } - } - } - - /* optimized ipc_entry_lookup of dest_name */ - - { - register mach_port_index_t index; - register mach_port_gen_t gen; - - { - register mach_port_name_t dest_name = - (mach_port_name_t)hdr->msgh_remote_port; - - index = MACH_PORT_INDEX(dest_name); - gen = MACH_PORT_GEN(dest_name); - - { - register ipc_entry_t entry; - register ipc_entry_bits_t bits; - - if (index < size) { - entry = &table[index]; - bits = entry->ie_bits; - if (IE_BITS_GEN(bits) != gen || - (bits & IE_BITS_COLLISION)) { - entry = IE_NULL; - } - } else { - entry = IE_NULL; - bits = 0; - } - if (entry == IE_NULL) { - entry = ipc_entry_lookup(space, dest_name); - if (entry == IE_NULL) { - HOT(c_mmot_cold_008++); - goto abort_request_copyin; - } - bits = entry->ie_bits; - } - - /* check type bit */ - - if (! (bits & MACH_PORT_TYPE_SEND)) { - HOT(c_mmot_cold_009++); - goto abort_request_copyin; - } - - assert(IE_BITS_UREFS(bits) > 0); - - dest_port = (ipc_port_t) entry->ie_object; - assert(dest_port != IP_NULL); - } - } - } - - /* - * To do an atomic copyin, need simultaneous - * locks on both ports and the space. If - * dest_port == reply_port, and simple locking is - * enabled, then we will abort. Otherwise it's - * OK to unlock twice. - */ - - ip_lock(dest_port); - if (!ip_active(dest_port) || - !ip_lock_try(reply_port)) { - ip_unlock(dest_port); - HOT(c_mmot_cold_010++); - goto abort_request_copyin; - } - is_read_unlock(space); - - assert(dest_port->ip_srights > 0); - dest_port->ip_srights++; - ip_reference(dest_port); - - assert(ip_active(reply_port)); - assert(reply_port->ip_receiver_name == - (mach_port_name_t)hdr->msgh_local_port); - assert(reply_port->ip_receiver == space); - - reply_port->ip_sorights++; - ip_reference(reply_port); - - hdr->msgh_bits = - MACH_MSGH_BITS(MACH_MSG_TYPE_PORT_SEND, - MACH_MSG_TYPE_PORT_SEND_ONCE); - hdr->msgh_remote_port = dest_port; - hdr->msgh_local_port = reply_port; - - /* make sure we can queue to the destination */ - - if (dest_port->ip_receiver == ipc_space_kernel) { - /* - * The kernel server has a reference to - * the reply port, which it hands back - * to us in the reply message. We do - * not need to keep another reference to - * it. - */ - ip_unlock(reply_port); - - assert(ip_active(dest_port)); - dest_port->ip_messages.imq_seqno++; - ip_unlock(dest_port); - goto kernel_send; - } - - if (imq_full(&dest_port->ip_messages)) { - HOT(c_mmot_cold_013++); - goto abort_request_send_receive; - } - - /* optimized ipc_mqueue_copyin */ - - rcv_object = (ipc_object_t) reply_port; - io_reference(rcv_object); - rcv_mqueue = &reply_port->ip_messages; - io_unlock(rcv_object); - HOT(c_mmot_hot_fSR_ok++); - goto fast_send_receive; - - abort_request_copyin: - is_read_unlock(space); - goto slow_copyin; - - abort_request_send_receive: - ip_unlock(dest_port); - ip_unlock(reply_port); - goto slow_send; - } - - case MACH_MSGH_BITS(MACH_MSG_TYPE_MOVE_SEND_ONCE, 0): { - register ipc_entry_num_t size; - register ipc_entry_t table; - - /* sending a reply message */ - - { - register mach_port_name_t reply_name = - (mach_port_name_t)hdr->msgh_local_port; - - if (reply_name != MACH_PORT_NULL) { - HOT(c_mmot_cold_018++); - goto slow_copyin; - } - } - - is_write_lock(space); - assert(space->is_active); - - /* optimized ipc_entry_lookup */ - - size = space->is_table_size; - table = space->is_table; - - { - register ipc_entry_t entry; - register mach_port_gen_t gen; - register mach_port_index_t index; - - { - register mach_port_name_t dest_name = - (mach_port_name_t)hdr->msgh_remote_port; - - index = MACH_PORT_INDEX(dest_name); - gen = MACH_PORT_GEN(dest_name); - } - - if (index >= size) { - HOT(c_mmot_cold_019++); - goto abort_reply_dest_copyin; - } - - entry = &table[index]; - - /* check generation, collision bit, and type bit */ - - if ((entry->ie_bits & (IE_BITS_GEN_MASK| - IE_BITS_COLLISION| - MACH_PORT_TYPE_SEND_ONCE)) != - (gen | MACH_PORT_TYPE_SEND_ONCE)) { - HOT(c_mmot_cold_020++); - goto abort_reply_dest_copyin; - } - - /* optimized ipc_right_copyin */ - - assert(IE_BITS_TYPE(entry->ie_bits) == - MACH_PORT_TYPE_SEND_ONCE); - assert(IE_BITS_UREFS(entry->ie_bits) == 1); - - if (entry->ie_request != 0) { - HOT(c_mmot_cold_021++); - goto abort_reply_dest_copyin; - } - - dest_port = (ipc_port_t) entry->ie_object; - assert(dest_port != IP_NULL); - - ip_lock(dest_port); - if (!ip_active(dest_port)) { - ip_unlock(dest_port); - HOT(c_mmot_cold_022++); - goto abort_reply_dest_copyin; - } - - assert(dest_port->ip_sorights > 0); - - /* optimized ipc_entry_dealloc */ - - - entry->ie_bits = gen; - entry->ie_next = table->ie_next; - table->ie_next = index; - entry->ie_object = IO_NULL; - } - - hdr->msgh_bits = - MACH_MSGH_BITS(MACH_MSG_TYPE_PORT_SEND_ONCE, - 0); - hdr->msgh_remote_port = dest_port; - - /* make sure we can queue to the destination */ - - assert(dest_port->ip_receiver != ipc_space_kernel); - - /* optimized ipc_entry_lookup/ipc_mqueue_copyin */ - - { - register ipc_entry_t entry; - register ipc_entry_bits_t bits; - - { - register mach_port_index_t index; - register mach_port_gen_t gen; - - index = MACH_PORT_INDEX(rcv_name); - gen = MACH_PORT_GEN(rcv_name); - - if (index < size) { - entry = &table[index]; - bits = entry->ie_bits; - if (IE_BITS_GEN(bits) != gen || - (bits & IE_BITS_COLLISION)) { - entry = IE_NULL; - } - } else { - entry = IE_NULL; - bits = 0; - } - if (entry == IE_NULL) { - entry = ipc_entry_lookup(space, rcv_name); - if (entry == IE_NULL) { - HOT(c_mmot_cold_024++); - goto abort_reply_rcv_copyin; - } - bits = entry->ie_bits; - } - - } - - /* check type bits; looking for receive or set */ -#if 0 - /* - * JMM - The check below for messages in the receive - * mqueue is insufficient to work with port sets, since - * the messages stay in the port queues. For now, don't - * allow portsets (but receiving on portsets when sending - * a message to a send-once right is actually a very - * common case (so we should re-enable). - */ - if (bits & MACH_PORT_TYPE_PORT_SET) { - register ipc_pset_t rcv_pset; - - rcv_pset = (ipc_pset_t) entry->ie_object; - assert(rcv_pset != IPS_NULL); - - ips_lock(rcv_pset); - assert(ips_active(rcv_pset)); - - rcv_object = (ipc_object_t) rcv_pset; - rcv_mqueue = &rcv_pset->ips_messages; - } else -#endif /* 0 */ - if (bits & MACH_PORT_TYPE_RECEIVE) { - register ipc_port_t rcv_port; - - rcv_port = (ipc_port_t) entry->ie_object; - assert(rcv_port != IP_NULL); - - if (!ip_lock_try(rcv_port)) { - HOT(c_mmot_cold_025++); - goto abort_reply_rcv_copyin; - } - assert(ip_active(rcv_port)); - - if (rcv_port->ip_pset_count != 0) { - ip_unlock(rcv_port); - HOT(c_mmot_cold_026++); - goto abort_reply_rcv_copyin; - } - - rcv_object = (ipc_object_t) rcv_port; - rcv_mqueue = &rcv_port->ip_messages; - } else { - HOT(c_mmot_cold_027++); - goto abort_reply_rcv_copyin; - } - } - - is_write_unlock(space); - io_reference(rcv_object); - io_unlock(rcv_object); - HOT(c_mmot_hot_fSR_ok++); - goto fast_send_receive; - - abort_reply_dest_copyin: - is_write_unlock(space); - HOT(c_mmot_cold_029++); - goto slow_copyin; - - abort_reply_rcv_copyin: - ip_unlock(dest_port); - is_write_unlock(space); - HOT(c_mmot_cold_030++); - goto slow_send; - } - - default: - HOT(c_mmot_cold_031++); - goto slow_copyin; - } - /*NOTREACHED*/ - - fast_send_receive: - /* - * optimized ipc_mqueue_send/ipc_mqueue_receive - * - * Finished get/copyin of kmsg and copyin of rcv_name. - * space is unlocked, dest_port is locked, - * we can queue kmsg to dest_port, - * rcv_mqueue is set, and rcv_object holds a ref - * so the mqueue cannot go away. - * - * JMM - For now, rcv_object is just a port. Portsets - * are disabled for the time being. - */ - - assert(ip_active(dest_port)); - assert(dest_port->ip_receiver != ipc_space_kernel); -// assert(!imq_full(&dest_port->ip_messages) || -// (MACH_MSGH_BITS_REMOTE(hdr->msgh_bits) == -// MACH_MSG_TYPE_PORT_SEND_ONCE)); - assert((hdr->msgh_bits & MACH_MSGH_BITS_CIRCULAR) == 0); - - { - register ipc_mqueue_t dest_mqueue; - wait_queue_t waitq; - thread_t receiver; - spl_t s; - - s = splsched(); - dest_mqueue = &dest_port->ip_messages; - waitq = &dest_mqueue->imq_wait_queue; - imq_lock(dest_mqueue); - - get_next_receiver: - receiver = wait_queue_wakeup64_identity_locked(waitq, - IPC_MQUEUE_RECEIVE, - THREAD_AWAKENED, - FALSE); - /* queue still locked, receiver thread locked (if any) */ - - if ( receiver == THREAD_NULL ) { - imq_unlock(dest_mqueue); - splx(s); - - ip_unlock(dest_port); - ipc_object_release(rcv_object); - HOT(c_mmot_cold_032++); - goto slow_send; - } - - /* - * Check that the receiver can handle the size of the message. - * If not, and the receiver just wants to be informed of that - * fact, set it running and try to find another thread. - * - * If he didn't want the "too large" message left on the queue, - * give it to him anyway, he'll consume it as part of his receive - * processing. - */ - if (receiver->ith_msize < - ipc_kmsg_copyout_size(kmsg, receiver->map) + - REQUESTED_TRAILER_SIZE(receiver->ith_option)) - { - receiver->ith_msize = kmsg->ikm_header->msgh_size; - receiver->ith_state = MACH_RCV_TOO_LARGE; - - if ((receiver->ith_option & MACH_RCV_LARGE) != 0) { - receiver->ith_kmsg = IKM_NULL; - receiver->ith_seqno = 0; - thread_unlock(receiver); - HOT(c_mmot_bad_rcvr++); - goto get_next_receiver; - } - } else { - receiver->ith_state = MACH_MSG_SUCCESS; - } - - /* At this point we are committed to do the message handoff. */ - c_mach_msg_trap_switch_fast++; - - /* - * Store the kmsg and seqno where the receiver can pick it up. - * and set it running. - */ - receiver->ith_kmsg = kmsg; - receiver->ith_seqno = dest_mqueue->imq_seqno++; - thread_unlock(receiver); - - imq_unlock(dest_mqueue); - ip_unlock(dest_port); - current_task()->messages_sent++; - - /* - * Now prepare to wait on our receive queue. But we have to make - * sure the queue doesn't already have messages. If it does, we'll - * have to do a slow receive. - * - * JMM - Need to make this check appropriate for portsets as - * well before re-enabling them. - */ - imq_lock(rcv_mqueue); - - if (ipc_kmsg_queue_first(&rcv_mqueue->imq_messages) != IKM_NULL) { - imq_unlock(rcv_mqueue); - splx(s); - HOT(c_mmot_cold_033++); - goto slow_receive; - } - - /* - * Put self on receive port's queue. - * Also save state that the sender of - * our reply message needs to determine if it - * can hand off directly back to us. - */ - thread_lock(self); - self->ith_msg_addr = (rcv_msg_addr) ? rcv_msg_addr : msg_addr; - self->ith_object = rcv_object; /* still holds reference */ - self->ith_msize = rcv_size; - self->ith_option = option; - self->ith_scatter_list_size = scatter_list_size; - self->ith_continuation = thread_syscall_return; - - waitq = &rcv_mqueue->imq_wait_queue; - (void)wait_queue_assert_wait64_locked(waitq, - IPC_MQUEUE_RECEIVE, - THREAD_ABORTSAFE, 0, - self); - thread_unlock(self); - imq_unlock(rcv_mqueue); - splx(s); - thread_block(ipc_mqueue_receive_continue); - /* NOTREACHED */ - } - - fast_copyout: - /* - * Nothing locked and no references held, except - * we have kmsg with msgh_seqno filled in. Must - * still check against rcv_size and do - * ipc_kmsg_copyout/ipc_kmsg_put. - */ - - reply_size = send_size + trailer->msgh_trailer_size; - if (rcv_size < reply_size) { - HOT(c_mmot_g_slow_copyout6++); - goto slow_copyout; - } - - /* optimized ipc_kmsg_copyout/ipc_kmsg_copyout_header */ - - switch (hdr->msgh_bits) { - case MACH_MSGH_BITS(MACH_MSG_TYPE_PORT_SEND, - MACH_MSG_TYPE_PORT_SEND_ONCE): { - ipc_port_t reply_port = - (ipc_port_t) hdr->msgh_local_port; - mach_port_name_t dest_name, reply_name; - - /* receiving a request message */ - - if (!IP_VALID(reply_port)) { - HOT(c_mmot_g_slow_copyout5++); - goto slow_copyout; - } - - is_write_lock(space); - assert(space->is_active); - - /* - * To do an atomic copyout, need simultaneous - * locks on both ports and the space. If - * dest_port == reply_port, and simple locking is - * enabled, then we will abort. Otherwise it's - * OK to unlock twice. - */ - - ip_lock(dest_port); - if (!ip_active(dest_port) || - !ip_lock_try(reply_port)) { - HOT(c_mmot_cold_037++); - goto abort_request_copyout; - } - - if (!ip_active(reply_port)) { - ip_unlock(reply_port); - HOT(c_mmot_cold_038++); - goto abort_request_copyout; - } - - assert(reply_port->ip_sorights > 0); - ip_unlock(reply_port); - - { - register ipc_entry_t table; - register ipc_entry_t entry; - register mach_port_index_t index; - - /* optimized ipc_entry_get */ - - table = space->is_table; - index = table->ie_next; - - if (index == 0) { - HOT(c_mmot_cold_039++); - goto abort_request_copyout; - } - - entry = &table[index]; - table->ie_next = entry->ie_next; - entry->ie_request = 0; - - { - register mach_port_gen_t gen; - - assert((entry->ie_bits &~ IE_BITS_GEN_MASK) == 0); - gen = IE_BITS_NEW_GEN(entry->ie_bits); - - reply_name = MACH_PORT_MAKE(index, gen); - - /* optimized ipc_right_copyout */ - - entry->ie_bits = gen | (MACH_PORT_TYPE_SEND_ONCE | 1); - } - - assert(MACH_PORT_VALID(reply_name)); - entry->ie_object = (ipc_object_t) reply_port; - is_write_unlock(space); - } - - /* optimized ipc_object_copyout_dest */ - - assert(dest_port->ip_srights > 0); - ip_release(dest_port); - - if (dest_port->ip_receiver == space) - dest_name = dest_port->ip_receiver_name; - else - dest_name = MACH_PORT_NULL; - - if ((--dest_port->ip_srights == 0) && - (dest_port->ip_nsrequest != IP_NULL)) { - ipc_port_t nsrequest; - mach_port_mscount_t mscount; - - /* a rather rare case */ - - nsrequest = dest_port->ip_nsrequest; - mscount = dest_port->ip_mscount; - dest_port->ip_nsrequest = IP_NULL; - ip_unlock(dest_port); - ipc_notify_no_senders(nsrequest, mscount); - } else - ip_unlock(dest_port); - - hdr->msgh_bits = - MACH_MSGH_BITS(MACH_MSG_TYPE_PORT_SEND_ONCE, - MACH_MSG_TYPE_PORT_SEND); - hdr->msgh_remote_port = (mach_port_t)reply_name; - hdr->msgh_local_port = (mach_port_t)dest_name; - HOT(c_mmot_hot_ok1++); - goto fast_put; - - abort_request_copyout: - ip_unlock(dest_port); - is_write_unlock(space); - HOT(c_mmot_g_slow_copyout4++); - goto slow_copyout; - } - - case MACH_MSGH_BITS(MACH_MSG_TYPE_PORT_SEND_ONCE, 0): { - register mach_port_name_t dest_name; - - /* receiving a reply message */ - - ip_lock(dest_port); - if (!ip_active(dest_port)) { - ip_unlock(dest_port); - HOT(c_mmot_g_slow_copyout3++); - goto slow_copyout; - } - - /* optimized ipc_object_copyout_dest */ - - assert(dest_port->ip_sorights > 0); - - if (dest_port->ip_receiver == space) { - ip_release(dest_port); - dest_port->ip_sorights--; - dest_name = dest_port->ip_receiver_name; - ip_unlock(dest_port); - } else { - ip_unlock(dest_port); - - ipc_notify_send_once(dest_port); - dest_name = MACH_PORT_NULL; - } - - hdr->msgh_bits = MACH_MSGH_BITS(0, - MACH_MSG_TYPE_PORT_SEND_ONCE); - hdr->msgh_remote_port = MACH_PORT_NULL; - hdr->msgh_local_port = (ipc_port_t)dest_name; - HOT(c_mmot_hot_ok2++); - goto fast_put; - } - - case MACH_MSGH_BITS_COMPLEX| - MACH_MSGH_BITS(MACH_MSG_TYPE_PORT_SEND_ONCE, 0): { - register mach_port_name_t dest_name; - - /* receiving a complex reply message */ - - ip_lock(dest_port); - if (!ip_active(dest_port)) { - ip_unlock(dest_port); - HOT(c_mmot_g_slow_copyout1++); - goto slow_copyout; - } - - /* optimized ipc_object_copyout_dest */ - - assert(dest_port->ip_sorights > 0); - - if (dest_port->ip_receiver == space) { - ip_release(dest_port); - dest_port->ip_sorights--; - dest_name = dest_port->ip_receiver_name; - ip_unlock(dest_port); - } else { - ip_unlock(dest_port); - - ipc_notify_send_once(dest_port); - dest_name = MACH_PORT_NULL; - } - - hdr->msgh_bits = - MACH_MSGH_BITS_COMPLEX | - MACH_MSGH_BITS(0, MACH_MSG_TYPE_PORT_SEND_ONCE); - hdr->msgh_remote_port = MACH_PORT_NULL; - hdr->msgh_local_port = (mach_port_t)dest_name; - - mr = ipc_kmsg_copyout_body(kmsg, space, - current_map(), - MACH_MSG_BODY_NULL); - /* hdr and send_size may be invalid now - done use */ - if (mr != MACH_MSG_SUCCESS) { - if (ipc_kmsg_put(msg_addr, kmsg, - kmsg->ikm_header->msgh_size + - trailer->msgh_trailer_size) == - MACH_RCV_INVALID_DATA) - return MACH_RCV_INVALID_DATA; - else - return mr | MACH_RCV_BODY_ERROR; - } - HOT(c_mmot_hot_ok3++); - goto fast_put; - } - - default: - HOT(c_mmot_g_slow_copyout2++); - goto slow_copyout; - } - /*NOTREACHED*/ - - fast_put: - mr = ipc_kmsg_put(rcv_msg_addr ? rcv_msg_addr : msg_addr, - kmsg, - kmsg->ikm_header->msgh_size + - trailer->msgh_trailer_size); - if (mr != MACH_MSG_SUCCESS) { - return MACH_RCV_INVALID_DATA; - } - current_task()->messages_received++; - return mr; - - - /* BEGINNING OF WARM PATH */ - - /* - * The slow path has a few non-register temporary - * variables used only for call-by-reference. - */ - - slow_copyin: - { - register mach_port_name_t reply_name = - (mach_port_name_t)hdr->msgh_local_port; - - - /* - * We have the message data in kmsg, but - * we still need to copyin, send it, - * receive a reply, and do copyout. - */ - - mr = ipc_kmsg_copyin(kmsg, space, current_map(), - MACH_PORT_NULL); - if (mr != MACH_MSG_SUCCESS) { - ipc_kmsg_free(kmsg); - return(mr); - } - - /* - * LP64support - We have to recompute the header pointer - * and send_size - as they could have changed during the - * complex copyin. - */ - hdr = kmsg->ikm_header; - send_size = hdr->msgh_size; - - /* try to get back on optimized path */ - if ((reply_name != rcv_name) || - (hdr->msgh_bits & MACH_MSGH_BITS_CIRCULAR)) { - HOT(c_mmot_cold_048++); - goto slow_send; - } - - dest_port = (ipc_port_t) hdr->msgh_remote_port; - assert(IP_VALID(dest_port)); - - ip_lock(dest_port); - if (!ip_active(dest_port)) { - ip_unlock(dest_port); - goto slow_send; - } - - if (dest_port->ip_receiver == ipc_space_kernel) { - dest_port->ip_messages.imq_seqno++; - ip_unlock(dest_port); - goto kernel_send; - } - - if (!imq_full(&dest_port->ip_messages) || - (MACH_MSGH_BITS_REMOTE(hdr->msgh_bits) == - MACH_MSG_TYPE_PORT_SEND_ONCE)) - { - /* - * Try an optimized ipc_mqueue_copyin. - * It will work if this is a request message. - */ - - register ipc_port_t reply_port; - - reply_port = (ipc_port_t) hdr->msgh_local_port; - if (IP_VALID(reply_port)) { - if (ip_lock_try(reply_port)) { - if (ip_active(reply_port) && - reply_port->ip_receiver == space && - reply_port->ip_receiver_name == rcv_name && - reply_port->ip_pset_count == 0) - { - /* Grab a reference to the reply port. */ - rcv_object = (ipc_object_t) reply_port; - io_reference(rcv_object); - rcv_mqueue = &reply_port->ip_messages; - io_unlock(rcv_object); - HOT(c_mmot_getback_FastSR++); - goto fast_send_receive; - } - ip_unlock(reply_port); - } - } - } - - ip_unlock(dest_port); - HOT(c_mmot_cold_050++); - goto slow_send; - - kernel_send: - /* - * Special case: send message to kernel services. - * The request message has been copied into the - * kmsg. Nothing is locked. - */ - - { - register ipc_port_t reply_port; - spl_t s; - - /* - * Perform the kernel function. - */ - c_mmot_kernel_send++; - - current_task()->messages_sent++; - - kmsg = ipc_kobject_server(kmsg); - if (kmsg == IKM_NULL) { - /* - * No reply. Take the - * slow receive path. - */ - HOT(c_mmot_cold_051++); - goto slow_get_rcv_port; - } - - /* - * Check that: - * the reply port is alive - * we hold the receive right - * the name has not changed. - * the port is not in a set - * If any of these are not true, - * we cannot directly receive the reply - * message. - */ - hdr = kmsg->ikm_header; - send_size = hdr->msgh_size; - trailer = (mach_msg_format_0_trailer_t *) ((vm_offset_t) hdr + - round_msg(send_size)); - reply_port = (ipc_port_t) hdr->msgh_remote_port; - ip_lock(reply_port); - - if ((!ip_active(reply_port)) || - (reply_port->ip_receiver != space) || - (reply_port->ip_receiver_name != rcv_name) || - (reply_port->ip_pset_count != 0)) - { - /* try to enqueue by sending with an immediate timeout */ - ip_unlock(reply_port); - mr = ipc_kmsg_send(kmsg, MACH_SEND_TIMEOUT, 0); - if (mr != MACH_MSG_SUCCESS) { - ipc_kmsg_destroy(kmsg); - } - HOT(c_mmot_cold_052++); - goto slow_get_rcv_port; - } - - s = splsched(); - rcv_mqueue = &reply_port->ip_messages; - imq_lock(rcv_mqueue); - - /* keep port locked, and don`t change ref count yet */ - - /* - * If there are messages on the port - * or other threads waiting for a message, - * we cannot directly receive the reply. - * Try to enqueue it by sending with an - * immediate timeout. - */ - if (!wait_queue_empty(&rcv_mqueue->imq_wait_queue) || - (ipc_kmsg_queue_first(&rcv_mqueue->imq_messages) != IKM_NULL)) - { - imq_unlock(rcv_mqueue); - splx(s); - ip_unlock(reply_port); - mr = ipc_kmsg_send(kmsg, MACH_SEND_TIMEOUT, 0); - if (mr != MACH_MSG_SUCCESS) { - ipc_kmsg_destroy(kmsg); - } - HOT(c_mmot_cold_053++); - goto slow_get_rcv_port; - } - - /* - * We can directly receive this reply. - * Since there were no messages queued - * on the reply port, there should be - * no threads blocked waiting to send. - */ - dest_port = reply_port; - temp_seqno = rcv_mqueue->imq_seqno++; - imq_unlock(rcv_mqueue); - splx(s); - - /* - * inline ipc_object_release. - * Port is still locked. - * Reference count was not incremented. - */ - ip_check_unlock(reply_port); - - if (option & MACH_RCV_TRAILER_MASK) { - trailer->msgh_seqno = temp_seqno; - trailer->msgh_trailer_size = REQUESTED_TRAILER_SIZE(option); - } - /* copy out the kernel reply */ - HOT(c_mmot_fastkernelreply++); - goto fast_copyout; - } - - slow_send: - /* - * Nothing is locked. We have acquired kmsg, but - * we still need to send it and receive a reply. - */ - - mr = ipc_kmsg_send(kmsg, MACH_MSG_OPTION_NONE, - MACH_MSG_TIMEOUT_NONE); - if (mr != MACH_MSG_SUCCESS) { - mr |= ipc_kmsg_copyout_pseudo(kmsg, space, - current_map(), - MACH_MSG_BODY_NULL); - - (void) ipc_kmsg_put(msg_addr, kmsg, - kmsg->ikm_header->msgh_size); - return(mr); - } - - slow_get_rcv_port: - /* - * We have sent the message. Copy in the receive port. - */ - mr = ipc_mqueue_copyin(space, rcv_name, - &rcv_mqueue, &rcv_object); - if (mr != MACH_MSG_SUCCESS) { - return(mr); - } - /* hold ref for rcv_object */ - - /* - * - * Now we have sent the request and copied in rcv_name, - * and hold ref for rcv_object (to keep mqueue alive). - * Just receive a reply and try to get back to fast path. - */ - - slow_receive: - self->ith_continuation = (void (*)(mach_msg_return_t))0; - ipc_mqueue_receive(rcv_mqueue, - MACH_MSG_OPTION_NONE, - MACH_MSG_SIZE_MAX, - MACH_MSG_TIMEOUT_NONE, - THREAD_ABORTSAFE); - - mr = self->ith_state; - temp_seqno = self->ith_seqno; - - ipc_object_release(rcv_object); - - if (mr != MACH_MSG_SUCCESS) { - return(mr); - } - - kmsg = self->ith_kmsg; - hdr = kmsg->ikm_header; - send_size = hdr->msgh_size; - trailer = (mach_msg_format_0_trailer_t *) ((vm_offset_t) hdr + - round_msg(send_size)); - if (option & MACH_RCV_TRAILER_MASK) { - trailer->msgh_seqno = temp_seqno; - trailer->msgh_trailer_size = REQUESTED_TRAILER_SIZE(option); - } - dest_port = (ipc_port_t) hdr->msgh_remote_port; - HOT(c_mmot_cold_055++); - goto fast_copyout; - - slow_copyout: - /* - * Nothing locked and no references held, except - * we have kmsg with msgh_seqno filled in. Must - * still check against rcv_size and do - * ipc_kmsg_copyout/ipc_kmsg_put. - */ - - /* LP64support - have to compute real size as it would be received */ - reply_size = ipc_kmsg_copyout_size(kmsg, current_map()) + - REQUESTED_TRAILER_SIZE(option); - temp_seqno = trailer->msgh_seqno; - if (rcv_size < reply_size) { - if (msg_receive_error(kmsg, msg_addr, option, temp_seqno, - space) == MACH_RCV_INVALID_DATA) { - mr = MACH_RCV_INVALID_DATA; - return(mr); - } - else { - mr = MACH_RCV_TOO_LARGE; - return(mr); - } - } - - mr = ipc_kmsg_copyout(kmsg, space, current_map(), - MACH_PORT_NULL, MACH_MSG_BODY_NULL); - if (mr != MACH_MSG_SUCCESS) { - if ((mr &~ MACH_MSG_MASK) == MACH_RCV_BODY_ERROR) { - if (ipc_kmsg_put(msg_addr, kmsg, reply_size) == - MACH_RCV_INVALID_DATA) - mr = MACH_RCV_INVALID_DATA; - } - else { - if (msg_receive_error(kmsg, msg_addr, option, - temp_seqno, space) == MACH_RCV_INVALID_DATA) - mr = MACH_RCV_INVALID_DATA; - } - - return(mr); - } - - /* try to get back on optimized path */ - HOT(c_mmot_getback_fast_put++); - goto fast_put; - - /*NOTREACHED*/ - } - } /* END OF HOT PATH */ -#endif /* ENABLE_HOTPATH */ - + vm_map_t map = current_map(); + if (option & MACH_SEND_MSG) { ipc_space_t space = current_space(); - vm_map_t map = current_map(); ipc_kmsg_t kmsg; mr = ipc_kmsg_get(msg_addr, send_size, &kmsg); @@ -1892,6 +540,7 @@ mach_msg_overwrite_trap( self->ith_msize = rcv_size; self->ith_option = option; self->ith_scatter_list_size = scatter_list_size; + self->ith_receiver_name = MACH_PORT_NULL; self->ith_continuation = thread_syscall_return; ipc_mqueue_receive(mqueue, option, rcv_size, msg_timeout, THREAD_ABORTSAFE); @@ -1946,7 +595,10 @@ msg_receive_error( mach_port_seqno_t seqno, ipc_space_t space) { - mach_msg_format_0_trailer_t *trailer; + mach_msg_max_trailer_t *trailer; + mach_vm_address_t context; + + context = kmsg->ikm_header->msgh_remote_port->ip_context; /* * Copy out the destination port in the message. @@ -1957,7 +609,7 @@ msg_receive_error( /* * Build a minimal message with the requested trailer. */ - trailer = (mach_msg_format_0_trailer_t *) + trailer = (mach_msg_max_trailer_t *) ((vm_offset_t)kmsg->ikm_header + round_msg(sizeof(mach_msg_header_t))); kmsg->ikm_header->msgh_size = sizeof(mach_msg_header_t); @@ -1965,6 +617,7 @@ msg_receive_error( (char *)trailer, sizeof(trailer_template)); if (option & MACH_RCV_TRAILER_MASK) { + trailer->msgh_context = context; trailer->msgh_seqno = seqno; trailer->msgh_trailer_size = REQUESTED_TRAILER_SIZE(option); } diff --git a/osfmk/ipc/mach_port.c b/osfmk/ipc/mach_port.c index e220fc9fb..28d0fbc87 100644 --- a/osfmk/ipc/mach_port.c +++ b/osfmk/ipc/mach_port.c @@ -992,6 +992,88 @@ mach_port_set_seqno( return KERN_SUCCESS; } +/* + * Routine: mach_port_get_context [kernel call] + * Purpose: + * Returns a receive right's context pointer. + * Conditions: + * Nothing locked. + * Returns: + * KERN_SUCCESS Set context pointer. + * KERN_INVALID_TASK The space is null. + * KERN_INVALID_TASK The space is dead. + * KERN_INVALID_NAME The name doesn't denote a right. + * KERN_INVALID_RIGHT Name doesn't denote receive rights. + */ + +kern_return_t +mach_port_get_context( + ipc_space_t space, + mach_port_name_t name, + mach_vm_address_t *context) +{ + ipc_port_t port; + kern_return_t kr; + + if (space == IS_NULL) + return KERN_INVALID_TASK; + + if (!MACH_PORT_VALID(name)) + return KERN_INVALID_RIGHT; + + kr = ipc_port_translate_receive(space, name, &port); + if (kr != KERN_SUCCESS) + return kr; + + /* port is locked and active */ + *context = port->ip_context; + + ip_unlock(port); + return KERN_SUCCESS; +} + + +/* + * Routine: mach_port_set_context [kernel call] + * Purpose: + * Changes a receive right's context pointer. + * Conditions: + * Nothing locked. + * Returns: + * KERN_SUCCESS Set context pointer. + * KERN_INVALID_TASK The space is null. + * KERN_INVALID_TASK The space is dead. + * KERN_INVALID_NAME The name doesn't denote a right. + * KERN_INVALID_RIGHT Name doesn't denote receive rights. + */ + +kern_return_t +mach_port_set_context( + ipc_space_t space, + mach_port_name_t name, + mach_vm_address_t context) +{ + ipc_port_t port; + kern_return_t kr; + + if (space == IS_NULL) + return KERN_INVALID_TASK; + + if (!MACH_PORT_VALID(name)) + return KERN_INVALID_RIGHT; + + kr = ipc_port_translate_receive(space, name, &port); + if (kr != KERN_SUCCESS) + return kr; + + /* port is locked and active */ + port->ip_context = context; + + ip_unlock(port); + return KERN_SUCCESS; +} + + /* * Routine: mach_port_gst_helper * Purpose: @@ -1103,7 +1185,7 @@ mach_port_get_set_status( /* the port set must be active */ names = (mach_port_name_t *) addr; - maxnames = size / sizeof(mach_port_name_t); + maxnames = (ipc_entry_num_t)(size / sizeof(mach_port_name_t)); actual = 0; table = space->is_table; diff --git a/osfmk/kdp/Makefile b/osfmk/kdp/Makefile index 38d35f441..3382d9243 100644 --- a/osfmk/kdp/Makefile +++ b/osfmk/kdp/Makefile @@ -7,26 +7,14 @@ export MakeInc_dir=${SRCROOT}/makedefs/MakeInc.dir include $(MakeInc_cmd) include $(MakeInc_def) -INSTINC_SUBDIRS = \ +INSTINC_SUBDIRS = -INSTINC_SUBDIRS_PPC = \ - -INSTINC_SUBDIRS_I386 = \ - -INSTINC_SUBDIRS_ARM = \ - -EXPINC_SUBDIRS = \ - -EXPINC_SUBDIRS_PPC = \ - -EXPINC_SUBDIRS_I386 = \ - -EXPINC_SUBDIRS_ARM = \ +EXPINC_SUBDIRS = DATAFILES = \ kdp_callout.h \ kdp_en_debugger.h - + EXPORT_MI_LIST = ${DATAFILES} EXPORT_MI_DIR = kdp diff --git a/osfmk/kdp/kdp.c b/osfmk/kdp/kdp.c index 357f19ec7..21a29d47a 100644 --- a/osfmk/kdp/kdp.c +++ b/osfmk/kdp/kdp.c @@ -27,6 +27,7 @@ */ #include +#include #include #include @@ -41,9 +42,6 @@ #include #include -int kdp_vm_read( caddr_t, caddr_t, unsigned int); -int kdp_vm_write( caddr_t, caddr_t, unsigned int); - #define DO_ALIGN 1 /* align all packet data accesses */ #define KDP_TEST_HARNESS 0 @@ -54,7 +52,7 @@ int kdp_vm_write( caddr_t, caddr_t, unsigned int); #endif static kdp_dispatch_t - dispatch_table[KDP_HOSTREBOOT - KDP_CONNECT +1] = + dispatch_table[KDP_INVALID_REQUEST-KDP_CONNECT] = { /* 0 */ kdp_connect, /* 1 */ kdp_disconnect, @@ -75,23 +73,36 @@ static kdp_dispatch_t /*10 */ kdp_breakpoint_remove, /*11 */ kdp_regions, /*12 */ kdp_reattach, -/*13 */ (kdp_dispatch_t)kdp_reboot +/*13 */ kdp_reboot, +/*14 */ kdp_readmem64, +/*15 */ kdp_writemem64, +/*16 */ kdp_breakpoint64_set, +/*17 */ kdp_breakpoint64_remove, +/*18 */ kdp_kernelversion, +/*19 */ kdp_readphysmem64, +/*20 */ kdp_writephysmem64, +/*21 */ kdp_readioport, +/*22 */ kdp_writeioport, +/*23 */ kdp_readmsr64, +/*24 */ kdp_writemsr64, }; kdp_glob_t kdp; - #define MAX_BREAKPOINTS 100 -#define KDP_MAX_BREAKPOINTS 100 -#define BREAKPOINT_NOT_FOUND 101 -#define BREAKPOINT_ALREADY_SET 102 - -#define KDP_VERSION 10 +/* + * Version 11 of the KDP Protocol adds support for 64-bit wide memory + * addresses (read/write and breakpoints) as well as a dedicated + * kernelversion request. Version 12 adds read/writing of physical + * memory with 64-bit wide memory addresses. + */ +#define KDP_VERSION 12 typedef struct{ - unsigned int address; - unsigned int old_instruction; + mach_vm_address_t address; + uint32_t bytesused; + uint8_t oldbytes[MAX_BREAKINSN_BYTES]; } kdp_breakpoint_record_t; static kdp_breakpoint_record_t breakpoint_list[MAX_BREAKPOINTS]; @@ -101,24 +112,6 @@ int reattach_wait = 0; int noresume_on_disconnect = 0; extern unsigned int return_on_panic; -#define MAXCOMLEN 16 - -struct thread_snapshot { - uint32_t snapshot_magic; - thread_t thread_id; - int32_t state; - wait_queue_t wait_queue; - event64_t wait_event; - vm_offset_t kernel_stack; - vm_offset_t reserved_stack; - thread_continue_t continuation; - uint32_t nkern_frames; - char user64_p; - uint32_t nuser_frames; - int32_t pid; - char p_comm[MAXCOMLEN + 1]; -}; - typedef struct thread_snapshot *thread_snapshot_t; extern int @@ -136,9 +129,23 @@ kdp_snapshot_postflight(void); static int pid_from_task(task_t task); +kdp_error_t +kdp_set_breakpoint_internal( + mach_vm_address_t address + ); + +kdp_error_t +kdp_remove_breakpoint_internal( + mach_vm_address_t address + ); + + int kdp_stackshot(int pid, void *tracebuf, uint32_t tracebuf_size, unsigned trace_options, uint32_t *pbytesTraced); +boolean_t kdp_copyin(pmap_t, uint64_t, void *, size_t); +extern void bcopy_phys(addr64_t, addr64_t, vm_size_t); + extern char version[]; boolean_t @@ -173,7 +180,7 @@ kdp_packet( } req = rd->hdr.request; - if (req > KDP_HOSTREBOOT) { + if (req >= KDP_INVALID_REQUEST) { printf("kdp_packet bad request %x len %d seq %x key %x\n", rd->hdr.request, rd->hdr.len, rd->hdr.seq, rd->hdr.key); @@ -319,6 +326,8 @@ kdp_hostinfo( if (plen < sizeof (*rq)) return (FALSE); + dprintf(("kdp_hostinfo\n")); + rp->hdr.is_reply = 1; rp->hdr.len = sizeof (*rp); @@ -330,6 +339,35 @@ kdp_hostinfo( return (TRUE); } +static boolean_t +kdp_kernelversion( + kdp_pkt_t *pkt, + int *len, + unsigned short *reply_port +) +{ + kdp_kernelversion_req_t *rq = &pkt->kernelversion_req; + size_t plen = *len; + kdp_kernelversion_reply_t *rp = &pkt->kernelversion_reply; + size_t slen; + + if (plen < sizeof (*rq)) + return (FALSE); + + rp->hdr.is_reply = 1; + rp->hdr.len = sizeof (*rp); + + dprintf(("kdp_kernelversion\n")); + slen = strlcpy(rp->version, version, MAX_KDP_DATA_SIZE); + + rp->hdr.len += slen + 1; /* strlcpy returns the amount copied with NUL */ + + *reply_port = kdp.reply_port; + *len = rp->hdr.len; + + return (TRUE); +} + static boolean_t kdp_suspend( kdp_pkt_t *pkt, @@ -394,7 +432,7 @@ kdp_writemem( kdp_writemem_req_t *rq = &pkt->writemem_req; size_t plen = *len; kdp_writemem_reply_t *rp = &pkt->writemem_reply; - int cnt; + mach_vm_size_t cnt; if (plen < sizeof (*rq)) return (FALSE); @@ -404,7 +442,7 @@ kdp_writemem( else { dprintf(("kdp_writemem addr %x size %d\n", rq->address, rq->nbytes)); - cnt = kdp_vm_write((caddr_t)rq->data, (caddr_t)rq->address, rq->nbytes); + cnt = kdp_machine_vm_write((caddr_t)rq->data, (mach_vm_address_t)rq->address, rq->nbytes); rp->error = KDPERR_NO_ERROR; } @@ -417,6 +455,70 @@ kdp_writemem( return (TRUE); } +static boolean_t +kdp_writemem64( + kdp_pkt_t *pkt, + int *len, + unsigned short *reply_port +) +{ + kdp_writemem64_req_t *rq = &pkt->writemem64_req; + size_t plen = *len; + kdp_writemem64_reply_t *rp = &pkt->writemem64_reply; + mach_vm_size_t cnt; + + if (plen < sizeof (*rq)) + return (FALSE); + + if (rq->nbytes > MAX_KDP_DATA_SIZE) + rp->error = KDPERR_BAD_NBYTES; + else { + dprintf(("kdp_writemem64 addr %llx size %d\n", rq->address, rq->nbytes)); + + cnt = kdp_machine_vm_write((caddr_t)rq->data, (mach_vm_address_t)rq->address, (mach_vm_size_t)rq->nbytes); + rp->error = KDPERR_NO_ERROR; + } + + rp->hdr.is_reply = 1; + rp->hdr.len = sizeof (*rp); + + *reply_port = kdp.reply_port; + *len = rp->hdr.len; + + return (TRUE); +} + +static boolean_t +kdp_writephysmem64( + kdp_pkt_t *pkt, + int *len, + unsigned short *reply_port +) +{ + kdp_writephysmem64_req_t *rq = &pkt->writephysmem64_req; + size_t plen = *len; + kdp_writephysmem64_reply_t *rp = &pkt->writephysmem64_reply; + + if (plen < sizeof (*rq)) + return (FALSE); + + if (rq->nbytes > MAX_KDP_DATA_SIZE) + rp->error = KDPERR_BAD_NBYTES; + else { + dprintf(("kdp_writephysmem64 addr %llx size %d\n", rq->address, rq->nbytes)); + kdp_machine_phys_write(rq, rq->data, rq->lcpu); + rp->error = KDPERR_NO_ERROR; + } + + rp->hdr.is_reply = 1; + rp->hdr.len = sizeof (*rp); + + *reply_port = kdp.reply_port; + *len = rp->hdr.len; + + return (TRUE); +} + static boolean_t kdp_readmem( kdp_pkt_t *pkt, @@ -427,10 +529,11 @@ kdp_readmem( kdp_readmem_req_t *rq = &pkt->readmem_req; size_t plen = *len; kdp_readmem_reply_t *rp = &pkt->readmem_reply; - int cnt; + mach_vm_size_t cnt; #if __i386__ || __arm__ void *pversion = &version; #endif + if (plen < sizeof (*rq)) return (FALSE); @@ -452,12 +555,13 @@ kdp_readmem( * a table) is implemented on these architectures, as with PPC. * N.B.: x86 now has a low global page, and the version indirection * is pinned at 0x201C. We retain the 0x501C address override - * for compatibility. + * for compatibility. Future architectures should instead use + * the KDP_KERNELVERSION request. */ - if (rq->address == (void *)0x501C) - rq->address = &pversion; + if (rq->address == 0x501C) + rq->address = (uintptr_t)&pversion; #endif - cnt = kdp_vm_read((caddr_t)rq->address, (caddr_t)rp->data, n); + cnt = kdp_machine_vm_read((mach_vm_address_t)rq->address, (caddr_t)rp->data, n); rp->error = KDPERR_NO_ERROR; rp->hdr.len += cnt; @@ -469,6 +573,78 @@ kdp_readmem( return (TRUE); } +static boolean_t +kdp_readmem64( + kdp_pkt_t *pkt, + int *len, + unsigned short *reply_port +) +{ + kdp_readmem64_req_t *rq = &pkt->readmem64_req; + size_t plen = *len; + kdp_readmem64_reply_t *rp = &pkt->readmem64_reply; + mach_vm_size_t cnt; + + if (plen < sizeof (*rq)) + return (FALSE); + + rp->hdr.is_reply = 1; + rp->hdr.len = sizeof (*rp); + + if (rq->nbytes > MAX_KDP_DATA_SIZE) + rp->error = KDPERR_BAD_NBYTES; + else { + + dprintf(("kdp_readmem64 addr %llx size %d\n", rq->address, rq->nbytes)); + + cnt = kdp_machine_vm_read((mach_vm_address_t)rq->address, (caddr_t)rp->data, rq->nbytes); + rp->error = KDPERR_NO_ERROR; + + rp->hdr.len += cnt; + } + + *reply_port = kdp.reply_port; + *len = rp->hdr.len; + + return (TRUE); +} + +static boolean_t +kdp_readphysmem64( + kdp_pkt_t *pkt, + int *len, + unsigned short *reply_port +) +{ + kdp_readphysmem64_req_t *rq = &pkt->readphysmem64_req; + size_t plen = *len; + kdp_readphysmem64_reply_t *rp = &pkt->readphysmem64_reply; + int cnt; + + if (plen < sizeof (*rq)) + return (FALSE); + + rp->hdr.is_reply = 1; + rp->hdr.len = sizeof (*rp); + + if (rq->nbytes > MAX_KDP_DATA_SIZE) + rp->error = KDPERR_BAD_NBYTES; + else { + + dprintf(("kdp_readphysmem64 addr %llx size %d\n", rq->address, rq->nbytes)); + + cnt = (int)kdp_machine_phys_read(rq, rp->data, rq->lcpu); + rp->error = KDPERR_NO_ERROR; + + rp->hdr.len += cnt; + } + + *reply_port = kdp.reply_port; + *len = rp->hdr.len; + + return (TRUE); +} + static boolean_t kdp_maxbytes( kdp_pkt_t *pkt, @@ -516,15 +692,11 @@ kdp_version( dprintf(("kdp_version\n")); rp->version = KDP_VERSION; -#if __ppc__ if (!(kdp_flag & KDP_BP_DIS)) rp->feature = KDP_FEATURE_BP; else rp->feature = 0; -#else - rp->feature = 0; -#endif - + *reply_port = kdp.reply_port; *len = rp->hdr.len; @@ -554,7 +726,7 @@ kdp_regions( r = rp->regions; rp->nregions = 0; - r->address = NULL; + r->address = 0; r->nbytes = 0xffffffff; r->protection = VM_PROT_ALL; r++; rp->nregions++; @@ -582,7 +754,7 @@ kdp_writeregs( if (plen < sizeof (*rq)) return (FALSE); - size = rq->hdr.len - sizeof(kdp_hdr_t) - sizeof(unsigned int); + size = rq->hdr.len - (unsigned)sizeof(kdp_hdr_t) - (unsigned)sizeof(unsigned int); rp->error = kdp_machine_write_regs(rq->cpu, rq->flavor, rq->data, &size); rp->hdr.is_reply = 1; @@ -621,130 +793,223 @@ kdp_readregs( return (TRUE); } -static boolean_t + +boolean_t kdp_breakpoint_set( - kdp_pkt_t *pkt, - int *len, - unsigned short *reply_port + kdp_pkt_t *pkt, + int *len, + unsigned short *reply_port ) { - kdp_breakpoint_req_t *rq = &pkt->breakpoint_req; - kdp_breakpoint_reply_t *rp = &pkt->breakpoint_reply; - size_t plen = *len; - int cnt, i; - unsigned int old_instruction = 0; - unsigned int breakinstr = kdp_ml_get_breakinsn(); - - if(breakpoints_initialized == 0) - { - for(i=0;(i < MAX_BREAKPOINTS); breakpoint_list[i].address=0, i++); - breakpoints_initialized++; - } - if (plen < sizeof (*rq)) - return (FALSE); - cnt = kdp_vm_read((caddr_t)rq->address, (caddr_t)(&old_instruction), sizeof(int)); - - if (old_instruction==breakinstr) - { - printf("A trap was already set at that address, not setting new breakpoint\n"); - rp->error = BREAKPOINT_ALREADY_SET; - - rp->hdr.is_reply = 1; - rp->hdr.len = sizeof (*rp); - *reply_port = kdp.reply_port; - *len = rp->hdr.len; - - return (TRUE); - } + kdp_breakpoint_req_t *rq = &pkt->breakpoint_req; + kdp_breakpoint_reply_t *rp = &pkt->breakpoint_reply; + size_t plen = *len; + kdp_error_t kerr; + + if (plen < sizeof (*rq)) + return (FALSE); + + dprintf(("kdp_breakpoint_set %x\n", rq->address)); - for(i=0;(i < MAX_BREAKPOINTS) && (breakpoint_list[i].address != 0); i++); + kerr = kdp_set_breakpoint_internal((mach_vm_address_t)rq->address); + + rp->error = kerr; + + rp->hdr.is_reply = 1; + rp->hdr.len = sizeof (*rp); + *reply_port = kdp.reply_port; + *len = rp->hdr.len; + + return (TRUE); +} - if (i == MAX_BREAKPOINTS) - { - rp->error = KDP_MAX_BREAKPOINTS; - - rp->hdr.is_reply = 1; - rp->hdr.len = sizeof (*rp); - *reply_port = kdp.reply_port; - *len = rp->hdr.len; - - return (TRUE); - } - breakpoint_list[i].address = rq->address; - breakpoint_list[i].old_instruction = old_instruction; +boolean_t +kdp_breakpoint64_set( + kdp_pkt_t *pkt, + int *len, + unsigned short *reply_port +) +{ + kdp_breakpoint64_req_t *rq = &pkt->breakpoint64_req; + kdp_breakpoint64_reply_t *rp = &pkt->breakpoint64_reply; + size_t plen = *len; + kdp_error_t kerr; + + if (plen < sizeof (*rq)) + return (FALSE); + + dprintf(("kdp_breakpoint64_set %llx\n", rq->address)); - cnt = kdp_vm_write((caddr_t)&breakinstr, (caddr_t)rq->address, sizeof(&breakinstr)); + kerr = kdp_set_breakpoint_internal((mach_vm_address_t)rq->address); + + rp->error = kerr; + + rp->hdr.is_reply = 1; + rp->hdr.len = sizeof (*rp); + *reply_port = kdp.reply_port; + *len = rp->hdr.len; + + return (TRUE); +} - rp->error = KDPERR_NO_ERROR; - rp->hdr.is_reply = 1; - rp->hdr.len = sizeof (*rp); - *reply_port = kdp.reply_port; - *len = rp->hdr.len; +boolean_t +kdp_breakpoint_remove( + kdp_pkt_t *pkt, + int *len, + unsigned short *reply_port +) +{ + kdp_breakpoint_req_t *rq = &pkt->breakpoint_req; + kdp_breakpoint_reply_t *rp = &pkt->breakpoint_reply; + size_t plen = *len; + kdp_error_t kerr; + if (plen < sizeof (*rq)) + return (FALSE); + + dprintf(("kdp_breakpoint_remove %x\n", rq->address)); - return (TRUE); + kerr = kdp_remove_breakpoint_internal((mach_vm_address_t)rq->address); + + rp->error = kerr; + + rp->hdr.is_reply = 1; + rp->hdr.len = sizeof (*rp); + *reply_port = kdp.reply_port; + *len = rp->hdr.len; + + return (TRUE); } -static boolean_t -kdp_breakpoint_remove( - kdp_pkt_t *pkt, - int *len, - unsigned short *reply_port +boolean_t +kdp_breakpoint64_remove( + kdp_pkt_t *pkt, + int *len, + unsigned short *reply_port ) { - kdp_breakpoint_req_t *rq = &pkt->breakpoint_req; - kdp_breakpoint_reply_t *rp = &pkt->breakpoint_reply; - size_t plen = *len; - int cnt,i; + kdp_breakpoint64_req_t *rq = &pkt->breakpoint64_req; + kdp_breakpoint64_reply_t *rp = &pkt->breakpoint64_reply; + size_t plen = *len; + kdp_error_t kerr; + + if (plen < sizeof (*rq)) + return (FALSE); + + dprintf(("kdp_breakpoint64_remove %llx\n", rq->address)); - if (plen < sizeof (*rq)) - return (FALSE); + kerr = kdp_remove_breakpoint_internal((mach_vm_address_t)rq->address); + + rp->error = kerr; + + rp->hdr.is_reply = 1; + rp->hdr.len = sizeof (*rp); + *reply_port = kdp.reply_port; + *len = rp->hdr.len; + + return (TRUE); +} - for(i=0;(i < MAX_BREAKPOINTS) && (breakpoint_list[i].address != rq->address); i++); - if (i == MAX_BREAKPOINTS) - { - rp->error = BREAKPOINT_NOT_FOUND; - rp->hdr.is_reply = 1; - rp->hdr.len = sizeof (*rp); - *reply_port = kdp.reply_port; - *len = rp->hdr.len; - return (TRUE); /* Check if it needs to be FALSE in case of error */ +kdp_error_t +kdp_set_breakpoint_internal( + mach_vm_address_t address + ) +{ + + uint8_t breakinstr[MAX_BREAKINSN_BYTES], oldinstr[MAX_BREAKINSN_BYTES]; + uint32_t breakinstrsize = sizeof(breakinstr); + mach_vm_size_t cnt; + int i; + + kdp_machine_get_breakinsn(breakinstr, &breakinstrsize); + + if(breakpoints_initialized == 0) + { + for(i=0;(i < MAX_BREAKPOINTS); breakpoint_list[i].address=0, i++); + breakpoints_initialized++; } + + cnt = kdp_machine_vm_read(address, (caddr_t)&oldinstr, (mach_vm_size_t)breakinstrsize); + + if (0 == memcmp(oldinstr, breakinstr, breakinstrsize)) { + printf("A trap was already set at that address, not setting new breakpoint\n"); + + return KDPERR_BREAKPOINT_ALREADY_SET; + } + + for(i=0;(i < MAX_BREAKPOINTS) && (breakpoint_list[i].address != 0); i++); + + if (i == MAX_BREAKPOINTS) { + return KDPERR_MAX_BREAKPOINTS; + } + + breakpoint_list[i].address = address; + memcpy(breakpoint_list[i].oldbytes, oldinstr, breakinstrsize); + breakpoint_list[i].bytesused = breakinstrsize; + + cnt = kdp_machine_vm_write((caddr_t)&breakinstr, address, breakinstrsize); + + return KDPERR_NO_ERROR; +} - breakpoint_list[i].address = 0; - cnt = kdp_vm_write((caddr_t)&(breakpoint_list[i].old_instruction), (caddr_t)rq->address, sizeof(int)); - rp->error = KDPERR_NO_ERROR; - rp->hdr.is_reply = 1; - rp->hdr.len = sizeof (*rp); - *reply_port = kdp.reply_port; - *len = rp->hdr.len; - - return (TRUE); +kdp_error_t +kdp_remove_breakpoint_internal( + mach_vm_address_t address + ) +{ + mach_vm_size_t cnt; + int i; + + for(i=0;(i < MAX_BREAKPOINTS) && (breakpoint_list[i].address != address); i++); + + if (i == MAX_BREAKPOINTS) + { + return KDPERR_BREAKPOINT_NOT_FOUND; + } + + breakpoint_list[i].address = 0; + cnt = kdp_machine_vm_write((caddr_t)&breakpoint_list[i].oldbytes, address, breakpoint_list[i].bytesused); + + return KDPERR_NO_ERROR; } boolean_t kdp_remove_all_breakpoints(void) { - int i; - boolean_t breakpoint_found = FALSE; - - if (breakpoints_initialized) + int i; + boolean_t breakpoint_found = FALSE; + + if (breakpoints_initialized) { - for(i=0;i < MAX_BREAKPOINTS; i++) - { - if (breakpoint_list[i].address) - { - kdp_vm_write((caddr_t)&(breakpoint_list[i].old_instruction), (caddr_t)breakpoint_list[i].address, sizeof(int)); - breakpoint_found = TRUE; - breakpoint_list[i].address = 0; - } - } - if (breakpoint_found) - printf("kdp_remove_all_breakpoints: found extant breakpoints, removing them.\n"); + for(i=0;i < MAX_BREAKPOINTS; i++) + { + if (breakpoint_list[i].address) + { + kdp_machine_vm_write((caddr_t)&(breakpoint_list[i].oldbytes), (mach_vm_address_t)breakpoint_list[i].address, (mach_vm_size_t)breakpoint_list[i].bytesused); + breakpoint_found = TRUE; + breakpoint_list[i].address = 0; + } + } + + if (breakpoint_found) + printf("kdp_remove_all_breakpoints: found extant breakpoints, removing them.\n"); } - return breakpoint_found; + return breakpoint_found; } +boolean_t +kdp_reboot( + __unused kdp_pkt_t *pkt, + __unused int *len, + __unused unsigned short *reply_port +) +{ + dprintf(("kdp_reboot\n")); + + kdp_machine_reboot(); + + return (TRUE); // no, not really, we won't return +} #define MAX_FRAMES 1000 @@ -758,6 +1023,32 @@ static int pid_from_task(task_t task) return pid; } +boolean_t +kdp_copyin(pmap_t p, uint64_t uaddr, void *dest, size_t size) { + size_t rem = size; + char *kvaddr = dest; + + while (rem) { + ppnum_t upn = pmap_find_phys(p, uaddr); + uint64_t phys_src = (upn << PAGE_SHIFT) | (uaddr & PAGE_MASK); + uint64_t phys_dest = kvtophys((vm_offset_t)kvaddr); + uint64_t src_rem = PAGE_SIZE - (phys_src & PAGE_MASK); + uint64_t dst_rem = PAGE_SIZE - (phys_dest & PAGE_MASK); + size_t cur_size = (uint32_t) MIN(src_rem, dst_rem); + cur_size = MIN(cur_size, rem); + + if (upn && pmap_valid_page(upn) && phys_dest) { + bcopy_phys(phys_src, phys_dest, cur_size); + } + else + break; + uaddr += cur_size; + kvaddr += cur_size; + rem -= cur_size; + } + return (rem == 0); +} + int kdp_stackshot(int pid, void *tracebuf, uint32_t tracebuf_size, unsigned trace_options, uint32_t *pbytesTraced) { @@ -771,6 +1062,8 @@ kdp_stackshot(int pid, void *tracebuf, uint32_t tracebuf_size, unsigned trace_op int nframes = trace_options; thread_snapshot_t tsnap = NULL; unsigned framesize = 2 * sizeof(vm_offset_t); + boolean_t dispatch_p = ((trace_options & STACKSHOT_GET_DQ) != 0); + uint16_t dispatch_offset = (trace_options & STACKSHOT_DISPATCH_OFFSET_MASK) >> STACKSHOT_DISPATCH_OFFSET_SHIFT; struct task ctask; struct thread cthread; @@ -791,42 +1084,63 @@ kdp_stackshot(int pid, void *tracebuf, uint32_t tracebuf_size, unsigned trace_op } /* Populate the thread snapshot header */ tsnap = (thread_snapshot_t) tracepos; - tsnap->thread_id = thread; + tsnap->thread_id = (uint64_t) (uintptr_t)thread; tsnap->state = thread->state; - tsnap->wait_queue = thread->wait_queue; tsnap->wait_event = thread->wait_event; - tsnap->kernel_stack = thread->kernel_stack; - tsnap->reserved_stack = thread->reserved_stack; - tsnap->continuation = thread->continuation; + tsnap->continuation = (uint64_t) (uintptr_t) thread->continuation; /* Add the BSD process identifiers */ if ((tsnap->pid = pid_from_task(task)) != -1) - proc_name_kdp(task, tsnap->p_comm, MAXCOMLEN + 1); + proc_name_kdp(task, tsnap->p_comm, sizeof(tsnap->p_comm)); else tsnap->p_comm[0] = '\0'; tsnap->snapshot_magic = 0xfeedface; tracepos += sizeof(struct thread_snapshot); - + tsnap->ss_flags = 0; + + if (dispatch_p && (task != kernel_task) && (task->active) && (task->map)) { + uint64_t dqkeyaddr = thread_dispatchqaddr(thread); + if (dqkeyaddr != 0) { + boolean_t task64 = task_has_64BitAddr(task); + uint64_t dqaddr = 0; + if (kdp_copyin(task->map->pmap, dqkeyaddr, &dqaddr, (task64 ? 8 : 4)) && (dqaddr != 0)) { + uint64_t dqserialnumaddr = dqaddr + dispatch_offset; + uint64_t dqserialnum = 0; + if (kdp_copyin(task->map->pmap, dqserialnumaddr, &dqserialnum, (task64 ? 8 : 4))) { + tsnap->ss_flags |= kHasDispatchSerial; + *(uint64_t *)tracepos = dqserialnum; + tracepos += 8; + } + } + } + } /* Call through to the machine specific trace routines * Frames are added past the snapshot header. */ - if (tsnap->kernel_stack != 0) + if (thread->kernel_stack != 0) { +#if defined(__LP64__) + tracebytes = machine_trace_thread64(thread, tracepos, tracebound, nframes, FALSE); + tsnap->ss_flags |= kKernel64_p; + framesize = 16; +#else tracebytes = machine_trace_thread(thread, tracepos, tracebound, nframes, FALSE); - tsnap->nkern_frames = tracebytes/(2 * sizeof(vm_offset_t)); + framesize = 8; +#endif + } + tsnap->nkern_frames = tracebytes/framesize; tracepos += tracebytes; tracebytes = 0; - tsnap->user64_p = 0; /* Trace user stack, if any */ if (thread->task->map != kernel_map) { -/* 64-bit task? */ + /* 64-bit task? */ if (task_has_64BitAddr(thread->task)) { tracebytes = machine_trace_thread64(thread, tracepos, tracebound, nframes, TRUE); - tsnap->user64_p = 1; - framesize = 2 * sizeof(addr64_t); + tsnap->ss_flags |= kUser64_p; + framesize = 16; } else { tracebytes = machine_trace_thread(thread, tracepos, tracebound, nframes, TRUE); - framesize = 2 * sizeof(vm_offset_t); + framesize = 8; } } tsnap->nuser_frames = tracebytes/framesize; @@ -839,7 +1153,128 @@ kdp_stackshot(int pid, void *tracebuf, uint32_t tracebuf_size, unsigned trace_op /* Release stack snapshot wait indicator */ kdp_snapshot_postflight(); - *pbytesTraced = tracepos - (char *) tracebuf; + *pbytesTraced = (uint32_t)(tracepos - (char *) tracebuf); return error; } + +static boolean_t +kdp_readioport(kdp_pkt_t *pkt, + int *len, + unsigned short *reply_port + ) +{ + kdp_readioport_req_t *rq = &pkt->readioport_req; + kdp_readioport_reply_t *rp = &pkt->readioport_reply; + size_t plen = *len; + + if (plen < sizeof (*rq)) + return (FALSE); + + rp->hdr.is_reply = 1; + rp->hdr.len = sizeof (*rp); + + if (rq->nbytes > MAX_KDP_DATA_SIZE) + rp->error = KDPERR_BAD_NBYTES; + else { +#if KDP_TEST_HARNESS + uint16_t addr = rq->address; +#endif + uint16_t size = rq->nbytes; + dprintf(("kdp_readioport addr %x size %d\n", addr, size)); + + rp->error = kdp_machine_ioport_read(rq, rp->data, rq->lcpu); + if (rp->error == KDPERR_NO_ERROR) + rp->hdr.len += size; + } + + *reply_port = kdp.reply_port; + *len = rp->hdr.len; + + return (TRUE); +} + +static boolean_t +kdp_writeioport( + kdp_pkt_t *pkt, + int *len, + unsigned short *reply_port + ) +{ + kdp_writeioport_req_t *rq = &pkt->writeioport_req; + kdp_writeioport_reply_t *rp = &pkt->writeioport_reply; + size_t plen = *len; + + if (plen < sizeof (*rq)) + return (FALSE); + + if (rq->nbytes > MAX_KDP_DATA_SIZE) + rp->error = KDPERR_BAD_NBYTES; + else { + dprintf(("kdp_writeioport addr %x size %d\n", rq->address, + rq->nbytes)); + + rp->error = kdp_machine_ioport_write(rq, rq->data, rq->lcpu); + } + + rp->hdr.is_reply = 1; + rp->hdr.len = sizeof (*rp); + + *reply_port = kdp.reply_port; + *len = rp->hdr.len; + + return (TRUE); +} + +static boolean_t +kdp_readmsr64(kdp_pkt_t *pkt, + int *len, + unsigned short *reply_port + ) +{ + kdp_readmsr64_req_t *rq = &pkt->readmsr64_req; + kdp_readmsr64_reply_t *rp = &pkt->readmsr64_reply; + size_t plen = *len; + + if (plen < sizeof (*rq)) + return (FALSE); + + rp->hdr.is_reply = 1; + rp->hdr.len = sizeof (*rp); + + dprintf(("kdp_readmsr64 lcpu %x addr %x\n", rq->lcpu, rq->address)); + rp->error = kdp_machine_msr64_read(rq, rp->data, rq->lcpu); + if (rp->error == KDPERR_NO_ERROR) + rp->hdr.len += sizeof(uint64_t); + + *reply_port = kdp.reply_port; + *len = rp->hdr.len; + + return (TRUE); +} + +static boolean_t +kdp_writemsr64( + kdp_pkt_t *pkt, + int *len, + unsigned short *reply_port + ) +{ + kdp_writemsr64_req_t *rq = &pkt->writemsr64_req; + kdp_writemsr64_reply_t *rp = &pkt->writemsr64_reply; + size_t plen = *len; + + if (plen < sizeof (*rq)) + return (FALSE); + + dprintf(("kdp_writemsr64 lcpu %x addr %x\n", rq->lcpu, rq->address)); + rp->error = kdp_machine_msr64_write(rq, rq->data, rq->lcpu); + + rp->hdr.is_reply = 1; + rp->hdr.len = sizeof (*rp); + + *reply_port = kdp.reply_port; + *len = rp->hdr.len; + + return (TRUE); +} diff --git a/osfmk/kdp/kdp_core.h b/osfmk/kdp/kdp_core.h index aa54f350d..d2b59ebfb 100644 --- a/osfmk/kdp/kdp_core.h +++ b/osfmk/kdp/kdp_core.h @@ -44,6 +44,12 @@ #define KDP_ERROR 5 /* error code */ #define KDP_SEEK 6 /* Seek to specified offset */ #define KDP_EOF 7 /* signal end of file */ + +#if defined(__LP64__) +#define KDP_FEATURE_MASK_STRING "features" +enum {KDP_FEATURE_LARGE_CRASHDUMPS = 1}; +extern uint32_t kdp_crashdump_feature_mask; +#endif struct corehdr { short th_opcode; /* packet type */ union { @@ -80,7 +86,7 @@ void abort_panic_transfer (void); struct corehdr *create_panic_header(unsigned int request, const char *corename, unsigned length, unsigned block); int kdp_send_crashdump_pkt(unsigned int request, char *corename, - unsigned int length, void *panic_data); + uint64_t length, void *panic_data); int kdp_send_crashdump_data(unsigned int request, char *corename, - unsigned int length, caddr_t txstart); + uint64_t length, caddr_t txstart); diff --git a/osfmk/kdp/kdp_internal.h b/osfmk/kdp/kdp_internal.h index 8452a2009..d619f6d22 100644 --- a/osfmk/kdp/kdp_internal.h +++ b/osfmk/kdp/kdp_internal.h @@ -32,6 +32,8 @@ #include #include +#include +#include typedef struct { unsigned short reply_port; @@ -58,6 +60,7 @@ extern volatile int kdp_flag; #define PANIC_CORE_ON_NMI 0x20 #define DBG_POST_CORE 0x40 #define PANIC_LOG_DUMP 0x80 +#define REBOOT_POST_CORE 0x100 typedef boolean_t (*kdp_dispatch_t) ( kdp_pkt_t *, @@ -103,7 +106,7 @@ kdp_panic( extern void -kdp_reboot( +kdp_machine_reboot( void ); @@ -155,12 +158,46 @@ kdp_sync_cache( void ); -unsigned int -kdp_ml_get_breakinsn( - void +/* Return a byte array that can be byte-copied to a memory address + * to trap into the debugger. Must be 4 bytes or less in the current + * implementation + */ +#define MAX_BREAKINSN_BYTES 4 + +void +kdp_machine_get_breakinsn( + uint8_t *bytes, + uint32_t *size ); extern void kdp_ml_enter_debugger( void ); + +mach_vm_size_t +kdp_machine_vm_read( mach_vm_address_t, caddr_t, mach_vm_size_t); + +mach_vm_size_t +kdp_machine_vm_write( caddr_t, mach_vm_address_t, mach_vm_size_t); + +mach_vm_size_t +kdp_machine_phys_read(kdp_readphysmem64_req_t *rq, caddr_t /* data */, + uint16_t /* lcpu */); + +mach_vm_size_t +kdp_machine_phys_write(kdp_writephysmem64_req_t *rq, caddr_t /* data */, + uint16_t /* lcpu */); + +int +kdp_machine_ioport_read(kdp_readioport_req_t *, caddr_t /* data */, uint16_t /* lcpu */); + +int +kdp_machine_ioport_write(kdp_writeioport_req_t *, caddr_t /* data */, uint16_t /* lcpu */); + +int +kdp_machine_msr64_read(kdp_readmsr64_req_t *, caddr_t /* data */, uint16_t /* lcpu */); + +int +kdp_machine_msr64_write(kdp_writemsr64_req_t *, caddr_t /* data */, uint16_t /* lcpu */); + diff --git a/osfmk/kdp/kdp_private.h b/osfmk/kdp/kdp_private.h index c4ef6bf8a..ac7229676 100644 --- a/osfmk/kdp/kdp_private.h +++ b/osfmk/kdp/kdp_private.h @@ -93,6 +93,13 @@ kdp_version( unsigned short * ); +static boolean_t +kdp_kernelversion( + kdp_pkt_t *, + int *, + unsigned short * +); + static boolean_t kdp_regions( kdp_pkt_t *, @@ -114,6 +121,20 @@ kdp_readmem( unsigned short * ); +static boolean_t +kdp_readmem64( + kdp_pkt_t *, + int *, + unsigned short * +); + +static boolean_t +kdp_readphysmem64( + kdp_pkt_t *, + int *, + unsigned short * +); + static boolean_t kdp_writemem( kdp_pkt_t *, @@ -121,6 +142,20 @@ kdp_writemem( unsigned short * ); +static boolean_t +kdp_writemem64( + kdp_pkt_t *, + int *, + unsigned short * +); + +static boolean_t +kdp_writephysmem64( + kdp_pkt_t *, + int *, + unsigned short * +); + static boolean_t kdp_resumecpus( kdp_pkt_t *, @@ -135,6 +170,14 @@ kdp_breakpoint_set( unsigned short *t ); +static boolean_t +kdp_breakpoint64_set( + kdp_pkt_t *, + int *, + unsigned short *t +); + + static boolean_t kdp_breakpoint_remove( kdp_pkt_t *, @@ -142,3 +185,29 @@ kdp_breakpoint_remove( unsigned short * ); +static boolean_t +kdp_breakpoint64_remove( + kdp_pkt_t *, + int *, + unsigned short * +); + + +static boolean_t +kdp_reboot( + kdp_pkt_t *, + int *, + unsigned short * +); + +static boolean_t +kdp_readioport(kdp_pkt_t *, int *, unsigned short *); + +static boolean_t +kdp_writeioport(kdp_pkt_t *, int *, unsigned short *); + +static boolean_t +kdp_readmsr64(kdp_pkt_t *, int *, unsigned short *); + +static boolean_t +kdp_writemsr64(kdp_pkt_t *, int *, unsigned short *); diff --git a/osfmk/kdp/kdp_protocol.h b/osfmk/kdp/kdp_protocol.h index 1eb174cec..723382ca5 100644 --- a/osfmk/kdp/kdp_protocol.h +++ b/osfmk/kdp/kdp_protocol.h @@ -26,11 +26,15 @@ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ */ +#ifndef _KDP_PROTOCOL_H_ +#define _KDP_PROTOCOL_H_ + /* * Definition of remote debugger protocol. */ #include +#include /* * Retransmit parameters @@ -42,6 +46,8 @@ #endif /* DDEBUG_DEBUG || DEBUG_DEBUG */ #define KDP_REXMIT_TRIES 8 /* xmit 8 times, then give up */ +#define KDP_PACKED __attribute__((packed)) + /* * (NMI) Attention Max Wait Time * Remote will resume unless KDP requests is received within this @@ -98,19 +104,40 @@ typedef enum { KDP_REATTACH, /* remote reboot request */ - KDP_HOSTREBOOT + KDP_HOSTREBOOT, + + /* memory access (64-bit wide addresses). Version 11 protocol */ + KDP_READMEM64, KDP_WRITEMEM64, + + /* breakpoint control (64-bit wide addresses). Version 11 protocol */ + KDP_BREAKPOINT64_SET, KDP_BREAKPOINT64_REMOVE, + + /* kernel version string, like "xnu-1234.5~6". Version 11 protocol */ + KDP_KERNELVERSION, + + /* physical memory access (64-bit wide addresses). Version 12 protocol */ + KDP_READPHYSMEM64, KDP_WRITEPHYSMEM64, + + /* ioport access (8-, 16-, and 32-bit) */ + KDP_READIOPORT, KDP_WRITEIOPORT, + + /* msr access (64-bit) */ + KDP_READMSR64, KDP_WRITEMSR64, + + /* keep this last */ + KDP_INVALID_REQUEST } kdp_req_t; /* * Common KDP packet header */ typedef struct { - kdp_req_t request:7; /* request type */ + kdp_req_t request:7; /* kdp_req_t, request type */ unsigned is_reply:1; /* 0 => request, 1 => reply */ unsigned seq:8; /* sequence number within session */ unsigned len:16; /* length of entire pkt including hdr */ unsigned key; /* session key */ -} kdp_hdr_t; +} KDP_PACKED kdp_hdr_t; /* * KDP errors @@ -119,7 +146,11 @@ typedef enum { KDPERR_NO_ERROR = 0, KDPERR_ALREADY_CONNECTED, KDPERR_BAD_NBYTES, - KDPERR_BADFLAVOR /* bad flavor in w/r regs */ + KDPERR_BADFLAVOR, /* bad flavor in w/r regs */ + KDPERR_MAX_BREAKPOINTS = 100, + KDPERR_BREAKPOINT_NOT_FOUND = 101, + KDPERR_BREAKPOINT_ALREADY_SET = 102 + } kdp_error_t; /* @@ -131,163 +162,289 @@ typedef enum { */ typedef struct { /* KDP_CONNECT request */ kdp_hdr_t hdr; - unsigned short req_reply_port; /* udp port which to send replies */ - unsigned short exc_note_port; /* udp port which to send exc notes */ - char greeting[0]; /* "greetings", null-terminated */ -} kdp_connect_req_t; + uint16_t req_reply_port; /* udp port which to send replies */ + uint16_t exc_note_port; /* udp port which to send exc notes */ + char greeting[0]; /* "greetings", nul-terminated */ +} KDP_PACKED kdp_connect_req_t; typedef struct { /* KDP_CONNECT reply */ kdp_hdr_t hdr; kdp_error_t error; -} kdp_connect_reply_t; +} KDP_PACKED kdp_connect_reply_t; /* * KDP_DISCONNECT */ typedef struct { /* KDP_DISCONNECT request */ kdp_hdr_t hdr; -} kdp_disconnect_req_t; +} KDP_PACKED kdp_disconnect_req_t; typedef struct { /* KDP_DISCONNECT reply */ kdp_hdr_t hdr; -} kdp_disconnect_reply_t; +} KDP_PACKED kdp_disconnect_reply_t; /* * KDP_REATTACH */ typedef struct { kdp_hdr_t hdr; - unsigned short req_reply_port; /* udp port which to send replies */ -} kdp_reattach_req_t; + uint16_t req_reply_port; /* udp port which to send replies */ +} KDP_PACKED kdp_reattach_req_t; /* * KDP_HOSTINFO */ typedef struct { /* KDP_HOSTINFO request */ kdp_hdr_t hdr; -} kdp_hostinfo_req_t; +} KDP_PACKED kdp_hostinfo_req_t; typedef struct { - unsigned cpus_mask; /* bit is 1 if cpu present */ - int cpu_type; - int cpu_subtype; -} kdp_hostinfo_t; + uint32_t cpus_mask; /* bit is 1 if cpu present */ + uint32_t cpu_type; + uint32_t cpu_subtype; +} KDP_PACKED kdp_hostinfo_t; typedef struct { /* KDP_HOSTINFO reply */ kdp_hdr_t hdr; kdp_hostinfo_t hostinfo; -} kdp_hostinfo_reply_t; +} KDP_PACKED kdp_hostinfo_reply_t; /* * KDP_VERSION */ typedef struct { /* KDP_VERSION request */ kdp_hdr_t hdr; -} kdp_version_req_t; +} KDP_PACKED kdp_version_req_t; #define KDP_FEATURE_BP 0x1 /* local breakpoint support */ -typedef struct { /* KDP_REGIONS reply */ +typedef struct { /* KDP_VERSION reply */ kdp_hdr_t hdr; - unsigned version; - unsigned feature; - unsigned pad0; - unsigned pad1; -} kdp_version_reply_t; + uint32_t version; + uint32_t feature; + uint32_t pad0; + uint32_t pad1; +} KDP_PACKED kdp_version_reply_t; + +#define VM_PROT_VOLATILE ((vm_prot_t) 0x08) /* not cacheable */ +#define VM_PROT_SPARSE ((vm_prot_t) 0x10) /* sparse addr space */ /* * KDP_REGIONS */ typedef struct { /* KDP_REGIONS request */ kdp_hdr_t hdr; -} kdp_regions_req_t; - -#define VM_PROT_VOLATILE ((vm_prot_t) 0x08) /* not cacheable */ -#define VM_PROT_SPARSE ((vm_prot_t) 0x10) /* sparse addr space */ +} KDP_PACKED kdp_regions_req_t; typedef struct { - void *address; - unsigned nbytes; - vm_prot_t protection; -} kdp_region_t; + uint32_t address; + uint32_t nbytes; + uint32_t protection; /* vm_prot_t */ +} KDP_PACKED kdp_region_t; typedef struct { /* KDP_REGIONS reply */ kdp_hdr_t hdr; - unsigned nregions; + uint32_t nregions; kdp_region_t regions[0]; -} kdp_regions_reply_t; +} KDP_PACKED kdp_regions_reply_t; /* * KDP_MAXBYTES */ typedef struct { /* KDP_MAXBYTES request */ kdp_hdr_t hdr; -} kdp_maxbytes_req_t; +} KDP_PACKED kdp_maxbytes_req_t; typedef struct { /* KDP_MAXBYTES reply */ kdp_hdr_t hdr; - unsigned max_bytes; -} kdp_maxbytes_reply_t; + uint32_t max_bytes; +} KDP_PACKED kdp_maxbytes_reply_t; /* * KDP_READMEM */ typedef struct { /* KDP_READMEM request */ kdp_hdr_t hdr; - void *address; - unsigned nbytes; -} kdp_readmem_req_t; + uint32_t address; + uint32_t nbytes; +} KDP_PACKED kdp_readmem_req_t; typedef struct { /* KDP_READMEM reply */ kdp_hdr_t hdr; kdp_error_t error; char data[0]; -} kdp_readmem_reply_t; +} KDP_PACKED kdp_readmem_reply_t; + +/* + * KDP_READMEM64 + */ +typedef struct { /* KDP_READMEM64 request */ + kdp_hdr_t hdr; + uint64_t address; + uint32_t nbytes; +} KDP_PACKED kdp_readmem64_req_t; + +typedef struct { /* KDP_READMEM64 reply */ + kdp_hdr_t hdr; + kdp_error_t error; + char data[0]; +} KDP_PACKED kdp_readmem64_reply_t; + +/* + * KDP_READPHYSMEM64 + */ +typedef struct { /* KDP_READPHYSMEM64 request */ + kdp_hdr_t hdr; + uint64_t address; + uint32_t nbytes; + uint16_t lcpu; +} KDP_PACKED kdp_readphysmem64_req_t; + +typedef struct { /* KDP_READPHYSMEM64 reply */ + kdp_hdr_t hdr; + kdp_error_t error; + char data[0]; +} KDP_PACKED kdp_readphysmem64_reply_t; /* * KDP_WRITEMEM */ typedef struct { /* KDP_WRITEMEM request */ kdp_hdr_t hdr; - void *address; - unsigned nbytes; + uint32_t address; + uint32_t nbytes; char data[0]; -} kdp_writemem_req_t; +} KDP_PACKED kdp_writemem_req_t; typedef struct { /* KDP_WRITEMEM reply */ kdp_hdr_t hdr; kdp_error_t error; -} kdp_writemem_reply_t; +} KDP_PACKED kdp_writemem_reply_t; + +/* + * KDP_WRITEMEM64 + */ +typedef struct { /* KDP_WRITEMEM64 request */ + kdp_hdr_t hdr; + uint64_t address; + uint32_t nbytes; + char data[0]; +} KDP_PACKED kdp_writemem64_req_t; + +typedef struct { /* KDP_WRITEMEM64 reply */ + kdp_hdr_t hdr; + kdp_error_t error; +} KDP_PACKED kdp_writemem64_reply_t; + +/* + * KDP_WRITEPHYSMEM64 + */ +typedef struct { /* KDP_WRITEPHYSMEM64 request */ + kdp_hdr_t hdr; + uint64_t address; + uint32_t nbytes; + uint16_t lcpu; + char data[0]; +} KDP_PACKED kdp_writephysmem64_req_t; + +typedef struct { /* KDP_WRITEPHYSMEM64 reply */ + kdp_hdr_t hdr; + kdp_error_t error; +} KDP_PACKED kdp_writephysmem64_reply_t; + +/* + * KDP_WRITEIOPORT + */ +typedef struct { /* KDP_WRITEIOPORT request */ + kdp_hdr_t hdr; + uint16_t lcpu; + uint16_t address; + uint16_t nbytes; + char data[0]; +} KDP_PACKED kdp_writeioport_req_t; + +typedef struct { /* KDP_WRITEIOPORT reply */ + kdp_hdr_t hdr; + kdp_error_t error; +} KDP_PACKED kdp_writeioport_reply_t; + +/* + * KDP_READIOPORT + */ +typedef struct { /* KDP_READIOPORT request */ + kdp_hdr_t hdr; + uint16_t lcpu; + uint16_t address; + uint16_t nbytes; +} KDP_PACKED kdp_readioport_req_t; + +typedef struct { /* KDP_READIOPORT reply */ + kdp_hdr_t hdr; + kdp_error_t error; + char data[0]; +} KDP_PACKED kdp_readioport_reply_t; + + +/* + * KDP_WRITEMSR64 + */ +typedef struct { /* KDP_WRITEMSR64 request */ + kdp_hdr_t hdr; + uint32_t address; + uint16_t lcpu; + char data[0]; +} KDP_PACKED kdp_writemsr64_req_t; + +typedef struct { /* KDP_WRITEMSR64 reply */ + kdp_hdr_t hdr; + kdp_error_t error; +} KDP_PACKED kdp_writemsr64_reply_t; + +/* + * KDP_READMSR64 + */ +typedef struct { /* KDP_READMSR64 request */ + kdp_hdr_t hdr; + uint32_t address; + uint16_t lcpu; +} KDP_PACKED kdp_readmsr64_req_t; + +typedef struct { /* KDP_READMSR64 reply */ + kdp_hdr_t hdr; + kdp_error_t error; + char data[0]; +} KDP_PACKED kdp_readmsr64_reply_t; + /* * KDP_READREGS */ typedef struct { /* KDP_READREGS request */ kdp_hdr_t hdr; - unsigned cpu; - unsigned flavor; -} kdp_readregs_req_t; + uint32_t cpu; + uint32_t flavor; +} KDP_PACKED kdp_readregs_req_t; typedef struct { /* KDP_READREGS reply */ kdp_hdr_t hdr; kdp_error_t error; /* could be KDPERR_BADFLAVOR */ char data[0]; -} kdp_readregs_reply_t; +} KDP_PACKED kdp_readregs_reply_t; /* * KDP_WRITEREGS */ typedef struct { /* KDP_WRITEREGS request */ kdp_hdr_t hdr; - unsigned cpu; - unsigned flavor; + uint32_t cpu; + uint32_t flavor; char data[0]; -} kdp_writeregs_req_t; +} KDP_PACKED kdp_writeregs_req_t; typedef struct { /* KDP_WRITEREGS reply */ kdp_hdr_t hdr; kdp_error_t error; -} kdp_writeregs_reply_t; +} KDP_PACKED kdp_writeregs_reply_t; /* * KDP_LOAD @@ -295,57 +452,75 @@ typedef struct { /* KDP_WRITEREGS reply */ typedef struct { /* KDP_LOAD request */ kdp_hdr_t hdr; char file_args[0]; -} kdp_load_req_t; +} KDP_PACKED kdp_load_req_t; typedef struct { /* KDP_LOAD reply */ kdp_hdr_t hdr; kdp_error_t error; -} kdp_load_reply_t; +} KDP_PACKED kdp_load_reply_t; /* * KDP_IMAGEPATH */ typedef struct { /* KDP_IMAGEPATH request */ kdp_hdr_t hdr; -} kdp_imagepath_req_t; +} KDP_PACKED kdp_imagepath_req_t; typedef struct { /* KDP_IMAGEPATH reply */ kdp_hdr_t hdr; char path[0]; -} kdp_imagepath_reply_t; +} KDP_PACKED kdp_imagepath_reply_t; /* * KDP_SUSPEND */ typedef struct { /* KDP_SUSPEND request */ kdp_hdr_t hdr; -} kdp_suspend_req_t; +} KDP_PACKED kdp_suspend_req_t; typedef struct { /* KDP_SUSPEND reply */ kdp_hdr_t hdr; -} kdp_suspend_reply_t; +} KDP_PACKED kdp_suspend_reply_t; /* * KDP_RESUMECPUS */ typedef struct { /* KDP_RESUMECPUS request */ kdp_hdr_t hdr; - unsigned cpu_mask; -} kdp_resumecpus_req_t; + uint32_t cpu_mask; +} KDP_PACKED kdp_resumecpus_req_t; typedef struct { /* KDP_RESUMECPUS reply */ kdp_hdr_t hdr; -} kdp_resumecpus_reply_t; +} KDP_PACKED kdp_resumecpus_reply_t; + +/* + * KDP_BREAKPOINT_SET and KDP_BREAKPOINT_REMOVE + */ typedef struct { kdp_hdr_t hdr; - unsigned long address; -} kdp_breakpoint_req_t; + uint32_t address; +} KDP_PACKED kdp_breakpoint_req_t; typedef struct { kdp_hdr_t hdr; kdp_error_t error; -} kdp_breakpoint_reply_t; +} KDP_PACKED kdp_breakpoint_reply_t; + +/* + * KDP_BREAKPOINT64_SET and KDP_BREAKPOINT64_REMOVE + */ + +typedef struct { + kdp_hdr_t hdr; + uint64_t address; +} KDP_PACKED kdp_breakpoint64_req_t; + +typedef struct { + kdp_hdr_t hdr; + kdp_error_t error; +} KDP_PACKED kdp_breakpoint64_reply_t; /* * Exception notifications @@ -353,25 +528,38 @@ typedef struct { * the remote debugger to the gdb agent KDB.) */ typedef struct { /* exc. info for one cpu */ - unsigned cpu; + uint32_t cpu; /* * Following info is defined as * per */ - unsigned exception; - unsigned code; - unsigned subcode; -} kdp_exc_info_t; + uint32_t exception; + uint32_t code; + uint32_t subcode; +} KDP_PACKED kdp_exc_info_t; typedef struct { /* KDP_EXCEPTION notification */ kdp_hdr_t hdr; - unsigned n_exc_info; + uint32_t n_exc_info; kdp_exc_info_t exc_info[0]; -} kdp_exception_t; +} KDP_PACKED kdp_exception_t; typedef struct { /* KDP_EXCEPTION acknowledgement */ kdp_hdr_t hdr; -} kdp_exception_ack_t; +} KDP_PACKED kdp_exception_ack_t; + +/* + * KDP_KERNELVERSION + */ +typedef struct { /* KDP_KERNELVERSION request */ + kdp_hdr_t hdr; +} KDP_PACKED kdp_kernelversion_req_t; + +typedef struct { /* KDP_KERNELVERSION reply */ + kdp_hdr_t hdr; + char version[0]; +} KDP_PACKED kdp_kernelversion_reply_t; + /* * Child termination messages @@ -386,13 +574,13 @@ typedef enum { typedef struct { /* KDP_TERMINATION notification */ kdp_hdr_t hdr; - kdp_termination_code_t term_code; - unsigned exit_code; -} kdp_termination_t; + uint32_t term_code; /* kdp_termination_code_t */ + uint32_t exit_code; +} KDP_PACKED kdp_termination_t; typedef struct { kdp_hdr_t hdr; -} kdp_termination_ack_t; +} KDP_PACKED kdp_termination_ack_t; typedef union { kdp_hdr_t hdr; @@ -408,8 +596,16 @@ typedef union { kdp_maxbytes_reply_t maxbytes_reply; kdp_readmem_req_t readmem_req; kdp_readmem_reply_t readmem_reply; + kdp_readmem64_req_t readmem64_req; + kdp_readmem64_reply_t readmem64_reply; + kdp_readphysmem64_req_t readphysmem64_req; + kdp_readphysmem64_reply_t readphysmem64_reply; kdp_writemem_req_t writemem_req; kdp_writemem_reply_t writemem_reply; + kdp_writemem64_req_t writemem64_req; + kdp_writemem64_reply_t writemem64_reply; + kdp_writephysmem64_req_t writephysmem64_req; + kdp_writephysmem64_reply_t writephysmem64_reply; kdp_readregs_req_t readregs_req; kdp_readregs_reply_t readregs_reply; kdp_writeregs_req_t writeregs_req; @@ -428,10 +624,24 @@ typedef union { kdp_termination_ack_t termination_ack; kdp_breakpoint_req_t breakpoint_req; kdp_breakpoint_reply_t breakpoint_reply; + kdp_breakpoint64_req_t breakpoint64_req; + kdp_breakpoint64_reply_t breakpoint64_reply; kdp_reattach_req_t reattach_req; kdp_regions_req_t regions_req; kdp_regions_reply_t regions_reply; + kdp_kernelversion_req_t kernelversion_req; + kdp_kernelversion_reply_t kernelversion_reply; + kdp_readioport_req_t readioport_req; + kdp_readioport_reply_t readioport_reply; + kdp_writeioport_req_t writeioport_req; + kdp_writeioport_reply_t writeioport_reply; + kdp_readmsr64_req_t readmsr64_req; + kdp_readmsr64_reply_t readmsr64_reply; + kdp_writemsr64_req_t writemsr64_req; + kdp_writemsr64_reply_t writemsr64_reply; } kdp_pkt_t; #define MAX_KDP_PKT_SIZE 1200 /* max packet size */ #define MAX_KDP_DATA_SIZE 1024 /* max r/w data per packet */ + +#endif // _KDP_PROTOCOL_H_ diff --git a/osfmk/kdp/kdp_udp.c b/osfmk/kdp/kdp_udp.c index 1575afcca..c43049684 100644 --- a/osfmk/kdp/kdp_udp.c +++ b/osfmk/kdp/kdp_udp.c @@ -87,6 +87,17 @@ static struct { boolean_t input; } pkt, saved_reply; +/* + * Support relatively small request/responses here. + * If kgmacros needs to make a larger request, increase + * this buffer size + */ +static struct { + unsigned char data[128]; + unsigned int len; + boolean_t input; +} manual_pkt; + struct { struct { struct in_addr in; @@ -115,7 +126,7 @@ static kdp_send_t kdp_en_send_pkt; static kdp_receive_t kdp_en_recv_pkt; -static u_long kdp_current_ip_address = 0; +static uint32_t kdp_current_ip_address = 0; static struct ether_addr kdp_current_mac_address = {{0, 0, 0, 0, 0, 0}}; static void *kdp_current_ifp; @@ -147,7 +158,6 @@ static unsigned int last_panic_port = CORE_REMOTE_PORT; unsigned int SEGSIZE = 512; -__unused static unsigned int PANIC_PKTSIZE = 518; static char panicd_ip_str[20]; static char router_ip_str[20]; @@ -159,7 +169,6 @@ extern unsigned int not_in_kdp; extern unsigned int disableConsoleOutput; -extern int kdp_vm_read( caddr_t, caddr_t, unsigned int); extern void kdp_call(void); extern boolean_t kdp_call_kdb(void); extern int kern_dump(void); @@ -175,6 +184,10 @@ static void kdp_process_arp_reply(struct ether_arp *); static boolean_t kdp_arp_resolve(uint32_t, struct ether_addr *); static volatile unsigned kdp_reentry_deadline; +#if defined(__LP64__) +uint32_t kdp_crashdump_feature_mask = KDP_FEATURE_LARGE_CRASHDUMPS; +static uint32_t kdp_feature_large_crashdumps; +#endif static boolean_t gKDPDebug = FALSE; #define KDP_DEBUG(...) if (gKDPDebug) printf(__VA_ARGS__); @@ -233,7 +246,9 @@ kdp_register_send_receive( kdp_timer_callout_init(); PE_parse_boot_argn("debug", &debug, sizeof (debug)); - +#if defined(__LP64__) + kdp_crashdump_feature_mask = htonl(kdp_crashdump_feature_mask); +#endif if (!debug) return; @@ -262,6 +277,9 @@ kdp_register_send_receive( if (PE_parse_boot_argn("_panicd_ip", panicd_ip_str, sizeof (panicd_ip_str))) panicd_specified = TRUE; + if ((debug & DB_REBOOT_POST_CORE) && (panicd_specified == TRUE)) + kdp_flag |= REBOOT_POST_CORE; + if (PE_parse_boot_argn("_router_ip", router_ip_str, sizeof (router_ip_str))) router_specified = TRUE; @@ -381,14 +399,14 @@ kdp_reply( if (!pkt.input) kdp_panic("kdp_reply"); - pkt.off -= sizeof (struct udpiphdr); + pkt.off -= (unsigned int)sizeof (struct udpiphdr); #if DO_ALIGN bcopy((char *)&pkt.data[pkt.off], (char *)ui, sizeof(*ui)); #else ui = (struct udpiphdr *)&pkt.data[pkt.off]; #endif - ui->ui_next = ui->ui_prev = NULL; + ui->ui_next = ui->ui_prev = 0; ui->ui_x1 = 0; ui->ui_pr = IPPROTO_UDP; ui->ui_len = htons((u_short)pkt.len + sizeof (struct udphdr)); @@ -416,9 +434,9 @@ kdp_reply( bcopy((char *)ip, (char *)&pkt.data[pkt.off], sizeof(*ip)); #endif - pkt.len += sizeof (struct udpiphdr); + pkt.len += (unsigned int)sizeof (struct udpiphdr); - pkt.off -= sizeof (struct ether_header); + pkt.off -= (unsigned int)sizeof (struct ether_header); eh = (struct ether_header *)&pkt.data[pkt.off]; enaddr_copy(eh->ether_shost, &tmp_enaddr); @@ -426,7 +444,7 @@ kdp_reply( enaddr_copy(&tmp_enaddr, eh->ether_dhost); eh->ether_type = htons(ETHERTYPE_IP); - pkt.len += sizeof (struct ether_header); + pkt.len += (unsigned int)sizeof (struct ether_header); // save reply for possible retransmission bcopy((char *)&pkt, (char *)&saved_reply, sizeof(pkt)); @@ -449,14 +467,14 @@ kdp_send( if (pkt.input) kdp_panic("kdp_send"); - pkt.off -= sizeof (struct udpiphdr); + pkt.off -= (unsigned int)sizeof (struct udpiphdr); #if DO_ALIGN bcopy((char *)&pkt.data[pkt.off], (char *)ui, sizeof(*ui)); #else ui = (struct udpiphdr *)&pkt.data[pkt.off]; #endif - ui->ui_next = ui->ui_prev = NULL; + ui->ui_next = ui->ui_prev = 0; ui->ui_x1 = 0; ui->ui_pr = IPPROTO_UDP; ui->ui_len = htons((u_short)pkt.len + sizeof (struct udphdr)); @@ -483,16 +501,16 @@ kdp_send( bcopy((char *)ip, (char *)&pkt.data[pkt.off], sizeof(*ip)); #endif - pkt.len += sizeof (struct udpiphdr); + pkt.len += (unsigned int)sizeof (struct udpiphdr); - pkt.off -= sizeof (struct ether_header); + pkt.off -= (unsigned int)sizeof (struct ether_header); eh = (struct ether_header *)&pkt.data[pkt.off]; enaddr_copy(&adr.loc.ea, eh->ether_shost); enaddr_copy(&adr.rmt.ea, eh->ether_dhost); eh->ether_type = htons(ETHERTYPE_IP); - pkt.len += sizeof (struct ether_header); + pkt.len += (unsigned int)sizeof (struct ether_header); (*kdp_en_send_pkt)(&pkt.data[pkt.off], pkt.len); } @@ -539,7 +557,7 @@ kdp_get_mac_addr(void) unsigned int kdp_get_ip_address(void) { - return kdp_current_ip_address; + return (unsigned int)kdp_current_ip_address; } void @@ -554,7 +572,7 @@ kdp_arp_dispatch(void) struct ether_arp aligned_ea, *ea = &aligned_ea; unsigned arp_header_offset; - arp_header_offset = sizeof(struct ether_header) + pkt.off; + arp_header_offset = (unsigned)sizeof(struct ether_header) + pkt.off; memcpy((void *)ea, (void *)&pkt.data[arp_header_offset], sizeof(*ea)); switch(ntohs(ea->arp_op)) { @@ -599,7 +617,7 @@ kdp_arp_reply(struct ether_arp *ea) struct ether_addr my_enaddr; eh = (struct ether_header *)&pkt.data[pkt.off]; - pkt.off += sizeof(struct ether_header); + pkt.off += (unsigned int)sizeof(struct ether_header); if(ntohs(ea->arp_op) != ARPOP_REQUEST) return; @@ -633,7 +651,7 @@ kdp_arp_reply(struct ether_arp *ea) (void)memcpy(eh->ether_shost, &my_enaddr, sizeof(eh->ether_shost)); eh->ether_type = htons(ETHERTYPE_ARP); (void)memcpy(&pkt.data[pkt.off], ea, sizeof(*ea)); - pkt.off -= sizeof (struct ether_header); + pkt.off -= (unsigned int)sizeof (struct ether_header); /* pkt.len is still the length we want, ether_header+ether_arp */ (*kdp_en_send_pkt)(&pkt.data[pkt.off], pkt.len); } @@ -681,7 +699,7 @@ kdp_poll(void) if (pkt.len < (sizeof (struct ether_header) + sizeof (struct udpiphdr))) return; - pkt.off += sizeof (struct ether_header); + pkt.off += (unsigned int)sizeof (struct ether_header); if (ntohs(eh->ether_type) != ETHERTYPE_IP) { return; } @@ -694,7 +712,7 @@ kdp_poll(void) ip = (struct ip *)&pkt.data[pkt.off]; #endif - pkt.off += sizeof (struct udpiphdr); + pkt.off += (unsigned int)sizeof (struct udpiphdr); if (ui->ui_pr != IPPROTO_UDP) { return; } @@ -733,7 +751,7 @@ kdp_poll(void) /* * Calculate kdp packet length. */ - pkt.len = ntohs((u_short)ui->ui_ulen) - sizeof (struct udphdr); + pkt.len = ntohs((u_short)ui->ui_ulen) - (unsigned int)sizeof (struct udphdr); pkt.input = TRUE; } @@ -863,6 +881,23 @@ kdp_handler( goto again; } + /* This is a manual side-channel to the main KDP protocol. + * A client like GDB/kgmacros can manually construct + * a request, set the input flag, issue a dummy KDP request, + * and then manually collect the result + */ + if (manual_pkt.input) { + kdp_hdr_t *manual_hdr = (kdp_hdr_t *)&manual_pkt.data; + unsigned short manual_port_unused = 0; + if (!manual_hdr->is_reply) { + /* process */ + kdp_packet((unsigned char *)&manual_pkt.data, + (int *)&manual_pkt.len, + &manual_port_unused); + } + manual_pkt.input = 0; + } + if (kdp_packet((unsigned char*)&pkt.data[pkt.off], (int *)&pkt.len, (unsigned short *)&reply_port)) { @@ -945,7 +980,7 @@ kdp_connection_wait(void) return; case 'r': printf("Rebooting...\n"); - kdp_reboot(); + kdp_machine_reboot(); break; #if MACH_KDB case 'k': @@ -968,7 +1003,7 @@ kdp_connection_wait(void) hdr = (kdp_hdr_t *)&pkt.data[pkt.off]; #endif if (hdr->request == KDP_HOSTREBOOT) { - kdp_reboot(); + kdp_machine_reboot(); /* should not return! */ } if (((hdr->request == KDP_CONNECT) || (hdr->request == KDP_REATTACH)) && @@ -1091,8 +1126,9 @@ kdp_raise_exception( if (((kdp_flag & KDP_PANIC_DUMP_ENABLED) || (kdp_flag & PANIC_LOG_DUMP)) && (panicstr != (char *) 0)) { - kdp_panic_dump(); + if (kdp_flag & REBOOT_POST_CORE) + kdp_machine_reboot(); } else if ((kdp_flag & PANIC_CORE_ON_NMI) && (panicstr == (char *) 0) && @@ -1144,7 +1180,7 @@ kdp_raise_exception( * available, it should work automatically. */ if (1 == flag_kdp_trigger_reboot) { - kdp_reboot(); + kdp_machine_reboot(); /* If we're still around, reset the flag */ flag_kdp_trigger_reboot = 0; } @@ -1182,21 +1218,25 @@ create_panic_header(unsigned int request, const char *corename, struct corehdr *coreh; const char *mode = "octet"; char modelen = strlen(mode); - +#if defined(__LP64__) + size_t fmask_size = sizeof(KDP_FEATURE_MASK_STRING) + sizeof(kdp_crashdump_feature_mask); +#else + size_t fmask_size = 0; +#endif pkt.off = sizeof (struct ether_header); - pkt.len = length + ((request == KDP_WRQ) ? modelen : 0) + - (corename ? strlen(corename): 0) + sizeof(struct corehdr); + pkt.len = (unsigned int)(length + ((request == KDP_WRQ) ? modelen + fmask_size : 0) + + (corename ? strlen(corename): 0) + sizeof(struct corehdr)); #if DO_ALIGN bcopy((char *)&pkt.data[pkt.off], (char *)ui, sizeof(*ui)); #else ui = (struct udpiphdr *)&pkt.data[pkt.off]; #endif - ui->ui_next = ui->ui_prev = NULL; + ui->ui_next = ui->ui_prev = 0; ui->ui_x1 = 0; ui->ui_pr = IPPROTO_UDP; ui->ui_len = htons((u_short)pkt.len + sizeof (struct udphdr)); - ui->ui_src.s_addr = kdp_current_ip_address; + ui->ui_src.s_addr = (uint32_t)kdp_current_ip_address; /* Already in network byte order via inet_aton() */ ui->ui_dst.s_addr = panic_server_ip; ui->ui_sport = htons(panicd_port); @@ -1220,9 +1260,9 @@ create_panic_header(unsigned int request, const char *corename, bcopy((char *)ip, (char *)&pkt.data[pkt.off], sizeof(*ip)); #endif - pkt.len += sizeof (struct udpiphdr); + pkt.len += (unsigned int)sizeof (struct udpiphdr); - pkt.off += sizeof (struct udpiphdr); + pkt.off += (unsigned int)sizeof (struct udpiphdr); coreh = (struct corehdr *) &pkt.data[pkt.off]; coreh->th_opcode = htons((u_short)request); @@ -1236,26 +1276,31 @@ create_panic_header(unsigned int request, const char *corename, *cp++ = '\0'; cp += strlcpy (cp, mode, KDP_MAXPACKET - strlen(corename)); *cp++ = '\0'; +#if defined(__LP64__) + cp += strlcpy(cp, KDP_FEATURE_MASK_STRING, sizeof(KDP_FEATURE_MASK_STRING)); + *cp++ = '\0'; /* Redundant */ + bcopy(&kdp_crashdump_feature_mask, cp, sizeof(kdp_crashdump_feature_mask)); +#endif } else { coreh->th_block = htonl((unsigned int) block); } - pkt.off -= sizeof (struct udpiphdr); - pkt.off -= sizeof (struct ether_header); + pkt.off -= (unsigned int)sizeof (struct udpiphdr); + pkt.off -= (unsigned int)sizeof (struct ether_header); eh = (struct ether_header *)&pkt.data[pkt.off]; enaddr_copy(&kdp_current_mac_address, eh->ether_shost); enaddr_copy(&destination_mac, eh->ether_dhost); eh->ether_type = htons(ETHERTYPE_IP); - pkt.len += sizeof (struct ether_header); + pkt.len += (unsigned int)sizeof (struct ether_header); return coreh; } int kdp_send_crashdump_data(unsigned int request, char *corename, - unsigned int length, caddr_t txstart) + uint64_t length, caddr_t txstart) { caddr_t txend = txstart + length; int panic_error = 0; @@ -1275,10 +1320,10 @@ int kdp_send_crashdump_data(unsigned int request, char *corename, } txstart += SEGSIZE; if (!(panic_block % 2000)) - printf("."); + kdb_printf_unbuffered("."); } if (txstart < txend) { - kdp_send_crashdump_pkt(request, corename, (txend - txstart), txstart); + kdp_send_crashdump_pkt(request, corename, (unsigned int)(txend - txstart), txstart); } } return 0; @@ -1286,7 +1331,7 @@ int kdp_send_crashdump_data(unsigned int request, char *corename, int kdp_send_crashdump_pkt(unsigned int request, char *corename, - unsigned int length, void *panic_data) + uint64_t length, void *panic_data) { struct corehdr *th = NULL; int poll_count = 2500; @@ -1314,14 +1359,19 @@ kdp_send_crashdump_pkt(unsigned int request, char *corename, if (tretries > 2) printf("TX retry #%d ", tretries ); - th = create_panic_header(request, corename, length, panic_block); + th = create_panic_header(request, corename, (unsigned)length, panic_block); if (request == KDP_DATA) { - if (!kdp_vm_read((caddr_t) panic_data, (caddr_t) th->th_data, length)) { - memset ((caddr_t) th->th_data, 'X', length); + if (!kdp_machine_vm_read((mach_vm_address_t)(intptr_t)panic_data, (caddr_t) th->th_data, length)) { + memset ((caddr_t) th->th_data, 'X', (size_t)length); } } else if (request == KDP_SEEK) { +#if defined(__LP64__) + if (kdp_feature_large_crashdumps) + *(uint64_t *) th->th_data = OSSwapHostToBigInt64((*(uint64_t *) panic_data)); + else +#endif *(unsigned int *) th->th_data = htonl(*(unsigned int *) panic_data); } @@ -1339,7 +1389,17 @@ kdp_send_crashdump_pkt(unsigned int request, char *corename, pkt.input = FALSE; th = (struct corehdr *) &pkt.data[pkt.off]; - +#if defined(__LP64__) + if (request == KDP_WRQ) { + uint16_t opcode64 = ntohs(th->th_opcode); + uint16_t features64 = (opcode64 & 0xFF00)>>8; + if ((opcode64 & 0xFF) == KDP_ACK) { + kdp_feature_large_crashdumps = features64 & KDP_FEATURE_LARGE_CRASHDUMPS; + printf("Protocol features: 0x%x\n", (uint32_t) features64); + th->th_opcode = htons(KDP_ACK); + } + } +#endif if (ntohs(th->th_opcode) == KDP_ACK && ntohl(th->th_block) == panic_block) { } else @@ -1457,7 +1517,7 @@ kdp_panic_dump(void) int panic_error; uint64_t abstime; - uint32_t current_ip = ntohl(kdp_current_ip_address); + uint32_t current_ip = ntohl((uint32_t)kdp_current_ip_address); if (flag_panic_dump_in_progress) { printf("System dump aborted.\n"); @@ -1543,7 +1603,7 @@ kdp_panic_dump(void) /* Just the panic log requested */ if ((panicstr != (char *) 0) && (kdp_flag & PANIC_LOG_DUMP)) { printf("Transmitting panic log, please wait: "); - kdp_send_crashdump_data(KDP_DATA, corename, (debug_buf_ptr - debug_buf), debug_buf); + kdp_send_crashdump_data(KDP_DATA, corename, (unsigned int)(debug_buf_ptr - debug_buf), debug_buf); kdp_send_crashdump_pkt (KDP_EOF, NULL, 0, ((void *) 0)); printf("Please file a bug report on this panic, if possible.\n"); goto panic_dump_exit; @@ -1648,6 +1708,7 @@ kdp_init(void) struct in_addr ipaddr; struct ether_addr macaddr; + #if CONFIG_EMBEDDED //serial will be the debugger, unless match name is explicitly provided, and it's not "serial" if(PE_parse_boot_argn("kdp_match_name", kdpname, sizeof(kdpname)) && strncmp(kdpname, "serial", sizeof(kdpname)) != 0) diff --git a/osfmk/kdp/kdp_udp.h b/osfmk/kdp/kdp_udp.h index debbc571f..c057dc4b1 100644 --- a/osfmk/kdp/kdp_udp.h +++ b/osfmk/kdp/kdp_udp.h @@ -32,6 +32,7 @@ #include #include /* OSSwap functions */ +#include #define ETHERMTU 1500 #define ETHERHDRSIZE 14 @@ -39,7 +40,7 @@ #define KDP_MAXPACKET (ETHERHDRSIZE + ETHERMTU + ETHERCRC) struct in_addr { - u_long s_addr; + uint32_t s_addr; }; struct ether_addr { @@ -52,7 +53,7 @@ extern struct ether_addr kdp_get_mac_addr(void); unsigned int kdp_get_ip_address(void); struct ipovly { - caddr_t ih_next, ih_prev; /* for protocol sequence q's */ + uint32_t ih_next, ih_prev; /* for protocol sequence q's */ u_char ih_x1; /* (unused) */ u_char ih_pr; /* protocol */ short ih_len; /* protocol length */ @@ -85,7 +86,7 @@ struct udpiphdr { struct ip { union { - u_long ip_w; + uint32_t ip_w; struct { unsigned int #ifdef __LITTLE_ENDIAN__ diff --git a/osfmk/kdp/ml/i386/kdp_machdep.c b/osfmk/kdp/ml/i386/kdp_machdep.c index 35441c45c..8beb2959e 100644 --- a/osfmk/kdp/ml/i386/kdp_machdep.c +++ b/osfmk/kdp/ml/i386/kdp_machdep.c @@ -39,6 +39,7 @@ #include #include /* for PE_halt_restart */ #include /* for halt_all_cpus */ +#include #include #include @@ -73,8 +74,6 @@ machine_trace_thread64(thread_t thread, char *tracepos, char *tracebound, int nf unsigned machine_read64(addr64_t srcaddr, caddr_t dstaddr, uint32_t len); -extern unsigned kdp_vm_read(caddr_t src, caddr_t dst, unsigned len); - static void kdp_callouts(kdp_event_t event); void @@ -279,11 +278,7 @@ kdp_machine_hostinfo( void kdp_panic( -#if CONFIG_NO_KPRINTF_STRINGS - __unused const char *msg -#else const char *msg -#endif ) { kprintf("kdp panic: %s\n", msg); @@ -292,7 +287,7 @@ kdp_panic( void -kdp_reboot(void) +kdp_machine_reboot(void) { printf("Attempting system restart..."); /* Call the platform specific restart*/ @@ -489,14 +484,17 @@ kdp_call_kdb( return(FALSE); } -unsigned int -kdp_ml_get_breakinsn(void) +void +kdp_machine_get_breakinsn( + uint8_t *bytes, + uint32_t *size +) { - return 0xcc; + bytes[0] = 0xcc; + *size = 1; } extern pmap_t kdp_pmap; -extern uint32_t kdp_src_high32; #define RETURN_OFFSET 4 int @@ -543,25 +541,27 @@ machine_trace_thread(thread_t thread, char *tracepos, char *tracebound, int nfra if (!stackptr || (stackptr == fence)) { break; } - /* Stack grows downward */ - if (stackptr < prevsp) { - break; - } + /* Unaligned frame */ if (stackptr & 0x0000003) { break; } + if (stackptr > stacklimit) { break; } + + if (stackptr <= prevsp) { + break; + } - if (kdp_vm_read((caddr_t) (stackptr + RETURN_OFFSET), (caddr_t) tracebuf, sizeof(caddr_t)) != sizeof(caddr_t)) { + if (kdp_machine_vm_read((mach_vm_address_t)(stackptr + RETURN_OFFSET), (caddr_t) tracebuf, sizeof(caddr_t)) != sizeof(caddr_t)) { break; } tracebuf++; prevsp = stackptr; - if (kdp_vm_read((caddr_t) stackptr, (caddr_t) &stackptr, sizeof(caddr_t)) != sizeof(caddr_t)) { + if (kdp_machine_vm_read((mach_vm_address_t)stackptr, (caddr_t) &stackptr, sizeof(caddr_t)) != sizeof(caddr_t)) { *tracebuf++ = 0; break; } @@ -577,14 +577,7 @@ machine_trace_thread(thread_t thread, char *tracepos, char *tracebound, int nfra unsigned machine_read64(addr64_t srcaddr, caddr_t dstaddr, uint32_t len) { - uint32_t kdp_vm_read_low32; - unsigned retval; - - kdp_src_high32 = srcaddr >> 32; - kdp_vm_read_low32 = srcaddr & 0x00000000FFFFFFFFUL; - retval = kdp_vm_read((caddr_t)kdp_vm_read_low32, dstaddr, len); - kdp_src_high32 = 0; - return retval; + return (unsigned)kdp_machine_vm_read(srcaddr, dstaddr, len); } int @@ -607,11 +600,6 @@ machine_trace_thread64(thread_t thread, char *tracepos, char *tracebound, int nf stacklimit = 0xffffffffffffffffULL; kdp_pmap = thread->task->map->pmap; } - else { - /* DRK: This would need to adapt for a 64-bit kernel, if any */ - stackptr = STACK_IKS(thread->kernel_stack)->k_ebp; - init_rip = STACK_IKS(thread->kernel_stack)->k_eip; - } *tracebuf++ = init_rip; @@ -627,9 +615,7 @@ machine_trace_thread64(thread_t thread, char *tracepos, char *tracebound, int nf if (!stackptr || (stackptr == fence)){ break; } - if (stackptr < prevsp) { - break; - } + if (stackptr & 0x0000003) { break; } @@ -637,6 +623,10 @@ machine_trace_thread64(thread_t thread, char *tracepos, char *tracebound, int nf break; } + if (stackptr <= prevsp) { + break; + } + if (machine_read64(stackptr + RETURN_OFFSET64, (caddr_t) tracebuf, sizeof(addr64_t)) != sizeof(addr64_t)) { break; } @@ -683,9 +673,7 @@ kdp_register_callout( do { list_head = kdp_callout_list; kcp->callout_next = list_head; - } while(!atomic_cmpxchg((uint32_t *) &kdp_callout_list, - (uint32_t) list_head, - (uint32_t) kcp)); + } while (!OSCompareAndSwapPtr(list_head, kcp, (void * volatile *)&kdp_callout_list)); } /* diff --git a/osfmk/kdp/ml/i386/kdp_vm.c b/osfmk/kdp/ml/i386/kdp_vm.c index 9b3b85ad9..752db7b2b 100644 --- a/osfmk/kdp/ml/i386/kdp_vm.c +++ b/osfmk/kdp/ml/i386/kdp_vm.c @@ -44,23 +44,8 @@ #include #include -unsigned kdp_vm_read( caddr_t, caddr_t, unsigned); -unsigned kdp_vm_write( caddr_t, caddr_t, unsigned); - -boolean_t kdp_read_io; -boolean_t kdp_trans_off; -uint32_t kdp_src_high32; -extern pmap_paddr_t avail_start, avail_end; - -extern void bcopy_phys(addr64_t from, addr64_t to, int size); -static addr64_t kdp_vtophys(pmap_t pmap, addr64_t va); - -pmap_t kdp_pmap = 0; - -unsigned int not_in_kdp = 1; /* Cleared when we begin to access vm functions in kdp */ - extern vm_offset_t sectTEXTB, sectDATAB, sectLINKB, sectPRELINKB; -extern int sectSizeTEXT, sectSizeDATA, sectSizeLINK, sectSizePRELINK; +extern unsigned long sectSizeTEXT, sectSizeDATA, sectSizeLINK, sectSizePRELINK; int kern_dump(void); int kdp_dump_trap(int type, x86_saved_state32_t *regs); @@ -86,128 +71,6 @@ typedef struct { char command_buffer[512]; -static addr64_t -kdp_vtophys( - pmap_t pmap, - addr64_t va) -{ - addr64_t pa; - ppnum_t pp; - - pp = pmap_find_phys(pmap, va); - if(!pp) return 0; - - pa = ((addr64_t)pp << 12) | (va & 0x0000000000000FFFULL); - return(pa); -} - -/* - * - */ -unsigned kdp_vm_read( - caddr_t src, - caddr_t dst, - unsigned len) -{ - addr64_t cur_virt_src = (addr64_t)((unsigned int)src | (((uint64_t)kdp_src_high32) << 32)); - addr64_t cur_virt_dst = (addr64_t)((unsigned int)dst); - addr64_t cur_phys_dst, cur_phys_src; - unsigned resid = len; - unsigned cnt = 0; - pmap_t src_pmap = kernel_pmap; - -/* If a different pmap has been specified with kdp_pmap, use it to translate the - * source (cur_virt_src); otherwise, the source is translated using the - * kernel_pmap. - */ - if (kdp_pmap) - src_pmap = kdp_pmap; - - while (resid != 0) { -/* Translate, unless kdp_trans_off is set */ - if (!kdp_trans_off) { - if (!(cur_phys_src = kdp_vtophys(src_pmap, - cur_virt_src))) - goto exit; - } - else - cur_phys_src = cur_virt_src; - -/* Always translate the destination buffer using the kernel_pmap */ - if(!(cur_phys_dst = kdp_vtophys(kernel_pmap, cur_virt_dst))) - goto exit; - - /* Validate physical page numbers unless kdp_read_io is set */ - if (kdp_read_io == FALSE) - if (!pmap_valid_page(i386_btop(cur_phys_dst)) || !pmap_valid_page(i386_btop(cur_phys_src))) - goto exit; - -/* Get length left on page */ - cnt = PAGE_SIZE - (cur_phys_src & PAGE_MASK); - if (cnt > (PAGE_SIZE - (cur_phys_dst & PAGE_MASK))) - cnt = PAGE_SIZE - (cur_phys_dst & PAGE_MASK); - if (cnt > resid) - cnt = resid; - -/* Do a physical copy */ - bcopy_phys(cur_phys_src, cur_phys_dst, cnt); - - cur_virt_src += cnt; - cur_virt_dst += cnt; - resid -= cnt; - } -exit: - return (len - resid); -} - -/* - * - */ -unsigned kdp_vm_write( - caddr_t src, - caddr_t dst, - unsigned len) -{ - addr64_t cur_virt_src, cur_virt_dst; - addr64_t cur_phys_src, cur_phys_dst; - unsigned resid, cnt, cnt_src, cnt_dst; - -#ifdef KDP_VM_WRITE_DEBUG - printf("kdp_vm_write: src %x dst %x len %x - %08X %08X\n", src, dst, len, ((unsigned long *)src)[0], ((unsigned long *)src)[1]); -#endif - - cur_virt_src = (addr64_t)((unsigned int)src); - cur_virt_dst = (addr64_t)((unsigned int)dst); - - resid = len; - - while (resid != 0) { - if ((cur_phys_dst = kdp_vtophys(kernel_pmap, cur_virt_dst)) == 0) - goto exit; - - if ((cur_phys_src = kdp_vtophys(kernel_pmap, cur_virt_src)) == 0) - goto exit; - - cnt_src = ((cur_phys_src + PAGE_SIZE) & (PAGE_MASK)) - cur_phys_src; - cnt_dst = ((cur_phys_dst + PAGE_SIZE) & (PAGE_MASK)) - cur_phys_dst; - - if (cnt_src > cnt_dst) - cnt = cnt_dst; - else - cnt = cnt_src; - if (cnt > resid) - cnt = resid; - - bcopy_phys(cur_phys_src, cur_phys_dst, cnt); /* Copy stuff over */ - - cur_virt_src +=cnt; - cur_virt_dst +=cnt; - resid -= cnt; - } -exit: - return (len - resid); -} - static void kern_collectth_state(thread_t thread, tir_t *t) { @@ -246,7 +109,7 @@ kern_collectth_state(thread_t thread, tir_t *t) vm_offset_t kstack; bzero(tstate, x86_THREAD_STATE32_COUNT * sizeof(int)); if ((kstack = thread->kernel_stack) != 0){ - struct x86_kernel_state32 *iks = STACK_IKS(kstack); + struct x86_kernel_state *iks = STACK_IKS(kstack); tstate->ebx = iks->k_ebx; tstate->esp = iks->k_esp; tstate->ebp = iks->k_ebp; @@ -319,8 +182,6 @@ kern_dump(void) map = kernel_map; - not_in_kdp = 0; /* Signal vm functions not to acquire locks */ - thread_count = 1; segment_count = get_vmmap_entries(map); diff --git a/osfmk/kdp/ml/i386/kdp_x86_common.c b/osfmk/kdp/ml/i386/kdp_x86_common.c new file mode 100644 index 000000000..8f08df116 --- /dev/null +++ b/osfmk/kdp/ml/i386/kdp_x86_common.c @@ -0,0 +1,374 @@ +/* + * Copyright (c) 2008 Apple Inc. All rights reserved. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. The rights granted to you under the License + * may not be used to create, or enable the creation or redistribution of, + * unlawful or unlicensed copies of an Apple operating system, or to + * circumvent, violate, or enable the circumvention or violation of, any + * terms of an Apple operating system software license agreement. + * + * Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ + */ + +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +#include +#include + +#include +#include + +// #define KDP_VM_READ_DEBUG 1 +// #define KDP_VM_WRITE_DEBUG 1 + +boolean_t kdp_read_io; +boolean_t kdp_trans_off; + +static addr64_t kdp_vtophys(pmap_t pmap, addr64_t va); + +pmap_t kdp_pmap = 0; + +static addr64_t +kdp_vtophys( + pmap_t pmap, + addr64_t va) +{ + addr64_t pa; + ppnum_t pp; + + pp = pmap_find_phys(pmap, va); + if(!pp) return 0; + + pa = ((addr64_t)pp << 12) | (va & 0x0000000000000FFFULL); + + return(pa); +} + +mach_vm_size_t +kdp_machine_vm_read( mach_vm_address_t src, caddr_t dst, mach_vm_size_t len) +{ + addr64_t cur_virt_src = (addr64_t)src; + addr64_t cur_virt_dst = (addr64_t)(intptr_t)dst; + addr64_t cur_phys_dst, cur_phys_src; + mach_vm_size_t resid = len; + mach_vm_size_t cnt = 0, cnt_src, cnt_dst; + pmap_t src_pmap = kernel_pmap; + +#ifdef KDP_VM_READ_DEBUG + printf("kdp_vm_read: src %llx dst %p len %llx\n", src, (void *)dst, len); +#endif + + if (kdp_trans_off) { + kdp_readphysmem64_req_t rq; + mach_vm_size_t ret; + + rq.address = src; + rq.nbytes = (uint32_t)len; + ret = kdp_machine_phys_read(&rq, dst, KDP_CURRENT_LCPU); + return ret; + } + +/* If a different pmap has been specified with kdp_pmap, use it to translate the + * source (cur_virt_src); otherwise, the source is translated using the + * kernel_pmap. + */ + if (kdp_pmap) + src_pmap = kdp_pmap; + + while (resid != 0) { + if (!(cur_phys_src = kdp_vtophys(src_pmap, + cur_virt_src))) + goto exit; + +/* Always translate the destination buffer using the kernel_pmap */ + if(!(cur_phys_dst = kdp_vtophys(kernel_pmap, cur_virt_dst))) + goto exit; + + /* Validate physical page numbers unless kdp_read_io is set */ + if (kdp_read_io == FALSE) + if (!pmap_valid_page(i386_btop(cur_phys_dst)) || !pmap_valid_page(i386_btop(cur_phys_src))) + goto exit; + +/* Get length left on page */ + cnt_src = PAGE_SIZE - (cur_phys_src & PAGE_MASK); + cnt_dst = PAGE_SIZE - (cur_phys_dst & PAGE_MASK); + if (cnt_src > cnt_dst) + cnt = cnt_dst; + else + cnt = cnt_src; + if (cnt > resid) + cnt = resid; + +/* Do a physical copy */ + ml_copy_phys(cur_phys_src, cur_phys_dst, (vm_size_t)cnt); + + cur_virt_src += cnt; + cur_virt_dst += cnt; + resid -= cnt; + } +exit: + return (len - resid); +} + +mach_vm_size_t +kdp_machine_phys_read(kdp_readphysmem64_req_t *rq, caddr_t dst, + uint16_t lcpu) +{ + mach_vm_address_t src = rq->address; + mach_vm_size_t len = rq->nbytes; + + addr64_t cur_virt_dst; + addr64_t cur_phys_dst, cur_phys_src; + mach_vm_size_t resid = len; + mach_vm_size_t cnt = 0, cnt_src, cnt_dst; + + if ((lcpu != KDP_CURRENT_LCPU) && (lcpu != cpu_number())) { + return (mach_vm_size_t) + kdp_x86_xcpu_invoke(lcpu, (kdp_x86_xcpu_func_t)kdp_machine_phys_read, rq, dst); + } + +#ifdef KDP_VM_READ_DEBUG + printf("kdp_phys_read: src %llx dst %p len %llx\n", src, (void *)dst, len); +#endif + + cur_virt_dst = (addr64_t)(intptr_t)dst; + cur_phys_src = (addr64_t)src; + + while (resid != 0) { + + if(!(cur_phys_dst = kdp_vtophys(kernel_pmap, cur_virt_dst))) + goto exit; + +/* Get length left on page */ + cnt_src = PAGE_SIZE - (cur_phys_src & PAGE_MASK); + cnt_dst = PAGE_SIZE - (cur_phys_dst & PAGE_MASK); + if (cnt_src > cnt_dst) + cnt = cnt_dst; + else + cnt = cnt_src; + if (cnt > resid) + cnt = resid; + + /* Do a physical copy; use ml_copy_phys() in the event this is + * a short read with potential side effects. + */ + ml_copy_phys(cur_phys_src, cur_phys_dst, (vm_size_t)cnt); + cur_phys_src += cnt; + cur_virt_dst += cnt; + resid -= cnt; + } +exit: + return (len - resid); +} + +/* + * + */ +mach_vm_size_t +kdp_machine_vm_write( caddr_t src, mach_vm_address_t dst, mach_vm_size_t len) +{ + addr64_t cur_virt_src, cur_virt_dst; + addr64_t cur_phys_src, cur_phys_dst; + unsigned resid, cnt, cnt_src, cnt_dst; + +#ifdef KDP_VM_WRITE_DEBUG + printf("kdp_vm_write: src %p dst %llx len %llx - %08X %08X\n", (void *)src, dst, len, ((unsigned int *)src)[0], ((unsigned int *)src)[1]); +#endif + + cur_virt_src = (addr64_t)(intptr_t)src; + cur_virt_dst = (addr64_t)dst; + + resid = (unsigned)len; + + while (resid != 0) { + if ((cur_phys_dst = kdp_vtophys(kernel_pmap, cur_virt_dst)) == 0) + goto exit; + + if ((cur_phys_src = kdp_vtophys(kernel_pmap, cur_virt_src)) == 0) + goto exit; + + /* Copy as many bytes as possible without crossing a page */ + cnt_src = (unsigned)(PAGE_SIZE - (cur_phys_src & PAGE_MASK)); + cnt_dst = (unsigned)(PAGE_SIZE - (cur_phys_dst & PAGE_MASK)); + + if (cnt_src > cnt_dst) + cnt = cnt_dst; + else + cnt = cnt_src; + if (cnt > resid) + cnt = resid; + + ml_copy_phys(cur_phys_src, cur_phys_dst, cnt); /* Copy stuff over */ + + cur_virt_src +=cnt; + cur_virt_dst +=cnt; + resid -= cnt; + } +exit: + return (len - resid); +} + +/* + * + */ +mach_vm_size_t +kdp_machine_phys_write(kdp_writephysmem64_req_t *rq, caddr_t src, + uint16_t lcpu) +{ + mach_vm_address_t dst = rq->address; + mach_vm_size_t len = rq->nbytes; + addr64_t cur_virt_src; + addr64_t cur_phys_src, cur_phys_dst; + unsigned resid, cnt, cnt_src, cnt_dst; + + if ((lcpu != KDP_CURRENT_LCPU) && (lcpu != cpu_number())) { + return (mach_vm_size_t) + kdp_x86_xcpu_invoke(lcpu, (kdp_x86_xcpu_func_t)kdp_machine_phys_write, rq, src); + } + +#ifdef KDP_VM_WRITE_DEBUG + printf("kdp_phys_write: src %p dst %llx len %llx - %08X %08X\n", (void *)src, dst, len, ((unsigned int *)src)[0], ((unsigned int *)src)[1]); +#endif + + cur_virt_src = (addr64_t)(intptr_t)src; + cur_phys_dst = (addr64_t)dst; + + resid = (unsigned)len; + + while (resid != 0) { + if ((cur_phys_src = kdp_vtophys(kernel_pmap, cur_virt_src)) == 0) + goto exit; + + /* Copy as many bytes as possible without crossing a page */ + cnt_src = (unsigned)(PAGE_SIZE - (cur_phys_src & PAGE_MASK)); + cnt_dst = (unsigned)(PAGE_SIZE - (cur_phys_dst & PAGE_MASK)); + + if (cnt_src > cnt_dst) + cnt = cnt_dst; + else + cnt = cnt_src; + if (cnt > resid) + cnt = resid; + + ml_copy_phys(cur_phys_src, cur_phys_dst, cnt); /* Copy stuff over */ + + cur_virt_src +=cnt; + cur_phys_dst +=cnt; + resid -= cnt; + } + +exit: + return (len - resid); +} + +int +kdp_machine_ioport_read(kdp_readioport_req_t *rq, caddr_t data, uint16_t lcpu) +{ + uint16_t addr = rq->address; + uint16_t size = rq->nbytes; + + if ((lcpu != KDP_CURRENT_LCPU) && (lcpu != cpu_number())) { + return (int) kdp_x86_xcpu_invoke(lcpu, (kdp_x86_xcpu_func_t)kdp_machine_ioport_read, rq, data); + } + + switch (size) + { + case 1: + *((uint8_t *) data) = inb(addr); + break; + case 2: + *((uint16_t *) data) = inw(addr); + break; + case 4: + *((uint32_t *) data) = inl(addr); + break; + default: + return KDPERR_BADFLAVOR; + break; + } + + return KDPERR_NO_ERROR; +} + +int +kdp_machine_ioport_write(kdp_writeioport_req_t *rq, caddr_t data, uint16_t lcpu) +{ + uint16_t addr = rq->address; + uint16_t size = rq->nbytes; + + if ((lcpu != KDP_CURRENT_LCPU) && (lcpu != cpu_number())) { + return (int) kdp_x86_xcpu_invoke(lcpu, (kdp_x86_xcpu_func_t)kdp_machine_ioport_write, rq, data); + } + + switch (size) + { + case 1: + outb(addr, *((uint8_t *) data)); + break; + case 2: + outw(addr, *((uint16_t *) data)); + break; + case 4: + outl(addr, *((uint32_t *) data)); + break; + default: + return KDPERR_BADFLAVOR; + break; + } + + return KDPERR_NO_ERROR; +} + +int +kdp_machine_msr64_read(kdp_readmsr64_req_t *rq, caddr_t data, uint16_t lcpu) +{ + uint64_t *value = (uint64_t *) data; + uint32_t msr = rq->address; + + if ((lcpu != KDP_CURRENT_LCPU) && (lcpu != cpu_number())) { + return (int) kdp_x86_xcpu_invoke(lcpu, (kdp_x86_xcpu_func_t)kdp_machine_msr64_read, rq, data); + } + + *value = rdmsr64(msr); + return KDPERR_NO_ERROR; +} + +int +kdp_machine_msr64_write(kdp_writemsr64_req_t *rq, caddr_t data, uint16_t lcpu) +{ + uint64_t *value = (uint64_t *) data; + uint32_t msr = rq->address; + + if ((lcpu != KDP_CURRENT_LCPU) && (lcpu != cpu_number())) { + return (int) kdp_x86_xcpu_invoke(lcpu, (kdp_x86_xcpu_func_t)kdp_machine_msr64_write, rq, data); + } + + wrmsr64(msr, *value); + return KDPERR_NO_ERROR; +} diff --git a/osfmk/kdp/ml/ppc/kdp_machdep.c b/osfmk/kdp/ml/ppc/kdp_machdep.c index 14a88fcb0..e1e89331d 100644 --- a/osfmk/kdp/ml/ppc/kdp_machdep.c +++ b/osfmk/kdp/ml/ppc/kdp_machdep.c @@ -54,10 +54,6 @@ int kdp_getc(void); boolean_t kdp_call_kdb(void); extern pmap_t kdp_pmap; -extern uint32_t kdp_src_high32; - - -extern unsigned kdp_vm_read(caddr_t src, caddr_t dst, unsigned len); int machine_trace_thread(thread_t thread, char *tracepos, char *tracebound, int nframes, boolean_t user_p); @@ -439,7 +435,7 @@ kdp_panic( extern void halt_all_cpus(boolean_t); void -kdp_reboot(void) +kdp_machine_reboot(void) { printf("Attempting system restart..."); /* Call the platform specific restart*/ @@ -624,9 +620,7 @@ static void kdp_print_registers(struct savearea *state) printf("lr = 0x%08llx\t\t",state->save_lr); printf("ctr = 0x%08llx\n",state->save_ctr); printf("srr0(iar) = 0x%08llx\t\t",state->save_srr0); - printf("srr1(msr) = 0x%08B\n",state->save_srr1, - "\x10\x11""EE\x12PR\x13""FP\x14ME\x15""FE0\x16SE\x18" - "FE1\x19""AL\x1a""EP\x1bIT\x1c""DT"); + printf("srr1(msr) = 0x%08llx\n",state->save_srr1); printf("\n"); } @@ -647,10 +641,16 @@ kdp_print_backtrace( while(1); } -unsigned int kdp_ml_get_breakinsn(void) +void +kdp_machine_get_breakinsn( + uint8_t *bytes, + uint32_t *size +) { - return 0x7fe00008; + *(uint32_t *)bytes = 0x7fe00008; + *size = sizeof(uint32_t); } + #define LR_OFFSET 8 #define LR_OFFSET64 16 @@ -705,14 +705,14 @@ machine_trace_thread(thread_t thread, char *tracepos, char *tracebound, int nfra break; } /* Assume there's a saved link register, and read it */ - if (kdp_vm_read((caddr_t) (stackptr + LR_OFFSET), (caddr_t) tracebuf, sizeof(caddr_t)) != sizeof(caddr_t)) { + if (kdp_machine_vm_read((caddr_t) (stackptr + LR_OFFSET), (caddr_t) tracebuf, sizeof(caddr_t)) != sizeof(caddr_t)) { break; } tracebuf++; prevsp = stackptr; /* Next frame */ - if (kdp_vm_read((caddr_t) stackptr, (caddr_t) &stackptr, sizeof(caddr_t)) != sizeof(caddr_t)) { + if (kdp_machine_vm_read((caddr_t) stackptr, (caddr_t) &stackptr, sizeof(caddr_t)) != sizeof(caddr_t)) { *tracebuf++ = 0; break; } @@ -726,13 +726,9 @@ machine_trace_thread(thread_t thread, char *tracepos, char *tracebound, int nfra unsigned machine_read64(addr64_t srcaddr, caddr_t dstaddr, uint32_t len) { - uint32_t kdp_vm_read_low32; unsigned retval; - kdp_src_high32 = srcaddr >> 32; - kdp_vm_read_low32 = srcaddr & 0x00000000FFFFFFFFUL; - retval = kdp_vm_read((caddr_t)kdp_vm_read_low32, dstaddr, len); - kdp_src_high32 = 0; + retval = kdp_machine_vm_read(srcaddr, dstaddr, len); return retval; } @@ -805,3 +801,27 @@ kdp_ml_enter_debugger(void) { __asm__ __volatile__("tw 4,r3,r3"); } + +int +kdp_machine_ioport_read(kdp_readioport_req_t *rq, caddr_t data, uint16_t lcpu) +{ + return 0; +} + +int +kdp_machine_ioport_write(kdp_writeioport_req_t *rq, caddr_t data, uint16_t lcpu) +{ + return 0; +} + +int +kdp_machine_msr64_read(kdp_readmsr64_req_t *rq, caddr_t data, uint16_t lcpu) +{ + return 0; +} + +int +kdp_machine_msr64_write(kdp_writemsr64_req_t *rq, __unused caddr_t data, uint16_t lcpu) +{ + return 0; +} diff --git a/osfmk/kdp/ml/ppc/kdp_vm.c b/osfmk/kdp/ml/ppc/kdp_vm.c index 2c0120e69..737fd862d 100644 --- a/osfmk/kdp/ml/ppc/kdp_vm.c +++ b/osfmk/kdp/ml/ppc/kdp_vm.c @@ -58,13 +58,9 @@ pmap_t kdp_pmap; boolean_t kdp_trans_off; boolean_t kdp_read_io; -uint32_t kdp_src_high32; - -unsigned kdp_vm_read( caddr_t, caddr_t, unsigned); -unsigned kdp_vm_write( caddr_t, caddr_t, unsigned); extern vm_offset_t sectTEXTB, sectDATAB, sectLINKB, sectPRELINKB; -extern int sectSizeTEXT, sectSizeDATA, sectSizeLINK, sectSizePRELINK; +extern unsigned long sectSizeTEXT, sectSizeDATA, sectSizeLINK, sectSizePRELINK; static addr64_t kdp_vtophys(pmap_t pmap, addr64_t va); int kern_dump(void); @@ -88,8 +84,6 @@ typedef struct { int tstate_size; } tir_t; -unsigned int not_in_kdp = 1; /* Cleared when we begin to access vm functions in kdp */ - char command_buffer[512]; /* @@ -115,10 +109,8 @@ kdp_vtophys( * when translating src. */ -unsigned kdp_vm_read( - caddr_t src, - caddr_t dst, - unsigned len) +mach_vm_size_t +kdp_machine_vm_read( mach_vm_address_t src, caddr_t dst, mach_vm_size_t len) { addr64_t cur_virt_src, cur_virt_dst; addr64_t cur_phys_src, cur_phys_dst; @@ -127,11 +119,11 @@ unsigned kdp_vm_read( pmap_t pmap; #ifdef KDP_VM_READ_DEBUG - kprintf("kdp_vm_read1: src %x dst %x len %x - %08X %08X\n", src, dst, len, ((unsigned long *)src)[0], ((unsigned long *)src)[1]); + kprintf("kdp_machine_vm_read1: src %llx dst %llx len %x - %08X %08X\n", src, dst, len, ((unsigned long *)src)[0], ((unsigned long *)src)[1]); #endif - cur_virt_src = (addr64_t)((unsigned int)src | (((uint64_t)kdp_src_high32) << 32)); - cur_virt_dst = (addr64_t)((unsigned int)dst); + cur_virt_src = (addr64_t)src; + cur_virt_dst = (addr64_t)(intptr_t)dst; if (kdp_trans_off) { resid = len; /* Get the length to copy */ @@ -182,7 +174,7 @@ unsigned kdp_vm_read( if (cnt > resid) cnt = resid; #ifdef KDP_VM_READ_DEBUG - kprintf("kdp_vm_read2: pmap %08X, virt %016LLX, phys %016LLX\n", + kprintf("kdp_machine_vm_read2: pmap %08X, virt %016LLX, phys %016LLX\n", pmap, cur_virt_src, cur_phys_src); #endif @@ -195,19 +187,23 @@ unsigned kdp_vm_read( } exit: #ifdef KDP_VM_READ_DEBUG - kprintf("kdp_vm_read: ret %08X\n", len-resid); + kprintf("kdp_machine_vm_read: ret %08X\n", len-resid); #endif return (len - resid); } +mach_vm_size_t +kdp_machine_phys_read(kdp_readphysmem64_req_t *rq __unused, caddr_t dst __unused, uint16_t lcpu __unused) +{ + return 0; /* unimplemented */ +} + /* * */ -unsigned kdp_vm_write( - caddr_t src, - caddr_t dst, - unsigned len) -{ +mach_vm_size_t +kdp_machine_vm_write( caddr_t src, mach_vm_address_t dst, mach_vm_size_t len) +{ addr64_t cur_virt_src, cur_virt_dst; addr64_t cur_phys_src, cur_phys_dst; unsigned resid, cnt, cnt_src, cnt_dst; @@ -216,8 +212,8 @@ unsigned kdp_vm_write( printf("kdp_vm_write: src %x dst %x len %x - %08X %08X\n", src, dst, len, ((unsigned long *)src)[0], ((unsigned long *)src)[1]); #endif - cur_virt_src = (addr64_t)((unsigned int)src); - cur_virt_dst = (addr64_t)((unsigned int)dst); + cur_virt_src = (addr64_t)(intptr_t)src; + cur_virt_dst = (addr64_t)dst; resid = len; @@ -249,6 +245,12 @@ unsigned kdp_vm_write( return (len - resid); } +mach_vm_size_t +kdp_machine_phys_write(kdp_writephysmem64_req_t *rq __unused, caddr_t src __unused, + uint16_t lcpu __unused) +{ + return 0; /* unimplemented */ +} static void kern_collectth_state(thread_t thread, tir_t *t) @@ -342,7 +344,6 @@ kern_dump(void) unsigned int num_sects_txed = 0; map = kernel_map; - not_in_kdp = 0; /* Tell vm functions not to acquire locks */ thread_count = 1; segment_count = get_vmmap_entries(map); diff --git a/osfmk/kdp/ml/x86_64/kdp_machdep.c b/osfmk/kdp/ml/x86_64/kdp_machdep.c new file mode 100644 index 000000000..1da2a0133 --- /dev/null +++ b/osfmk/kdp/ml/x86_64/kdp_machdep.c @@ -0,0 +1,698 @@ +/* + * Copyright (c) 2000-2006 Apple Computer, Inc. All rights reserved. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. The rights granted to you under the License + * may not be used to create, or enable the creation or redistribution of, + * unlawful or unlicensed copies of an Apple operating system, or to + * circumvent, violate, or enable the circumvention or violation of, any + * terms of an Apple operating system software license agreement. + * + * Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include /* for PE_halt_restart */ +#include /* for halt_all_cpus */ +#include + +#include +#include +#include +#include +#include + +#define KDP_TEST_HARNESS 0 +#if KDP_TEST_HARNESS +#define dprintf(x) printf x +#else +#define dprintf(x) +#endif + +extern cpu_type_t cpuid_cputype(void); +extern cpu_subtype_t cpuid_cpusubtype(void); + +void print_saved_state(void *); +void kdp_call(void); +int kdp_getc(void); +boolean_t kdp_call_kdb(void); +void kdp_getstate(x86_thread_state64_t *); +void kdp_setstate(x86_thread_state64_t *); +void kdp_print_phys(int); + +int +machine_trace_thread(thread_t thread, char *tracepos, char *tracebound, int nframes, boolean_t user_p); + +int +machine_trace_thread64(thread_t thread, char *tracepos, char *tracebound, int nframes, boolean_t user_p); + +unsigned +machine_read64(addr64_t srcaddr, caddr_t dstaddr, uint32_t len); + +static void kdp_callouts(kdp_event_t event); + +void +kdp_exception( + unsigned char *pkt, + int *len, + unsigned short *remote_port, + unsigned int exception, + unsigned int code, + unsigned int subcode +) +{ + kdp_exception_t *rq = (kdp_exception_t *)pkt; + + rq->hdr.request = KDP_EXCEPTION; + rq->hdr.is_reply = 0; + rq->hdr.seq = kdp.exception_seq; + rq->hdr.key = 0; + rq->hdr.len = sizeof (*rq); + + rq->n_exc_info = 1; + rq->exc_info[0].cpu = 0; + rq->exc_info[0].exception = exception; + rq->exc_info[0].code = code; + rq->exc_info[0].subcode = subcode; + + rq->hdr.len += rq->n_exc_info * sizeof (kdp_exc_info_t); + + bcopy((char *)rq, (char *)pkt, rq->hdr.len); + + kdp.exception_ack_needed = TRUE; + + *remote_port = kdp.exception_port; + *len = rq->hdr.len; +} + +boolean_t +kdp_exception_ack( + unsigned char *pkt, + int len +) +{ + kdp_exception_ack_t *rq = (kdp_exception_ack_t *)pkt; + + if (((unsigned int) len) < sizeof (*rq)) + return(FALSE); + + if (!rq->hdr.is_reply || rq->hdr.request != KDP_EXCEPTION) + return(FALSE); + + dprintf(("kdp_exception_ack seq %x %x\n", rq->hdr.seq, kdp.exception_seq)); + + if (rq->hdr.seq == kdp.exception_seq) { + kdp.exception_ack_needed = FALSE; + kdp.exception_seq++; + } + return(TRUE); +} + +void +kdp_getstate( + x86_thread_state64_t *state +) +{ + x86_saved_state64_t *saved_state; + + saved_state = (x86_saved_state64_t *)kdp.saved_state; + + state->rax = saved_state->rax; + state->rbx = saved_state->rbx; + state->rcx = saved_state->rcx; + state->rdx = saved_state->rdx; + state->rdi = saved_state->rdi; + state->rsi = saved_state->rsi; + state->rbp = saved_state->rbp; + + state->r8 = saved_state->r8; + state->r9 = saved_state->r9; + state->r10 = saved_state->r10; + state->r11 = saved_state->r11; + state->r12 = saved_state->r12; + state->r13 = saved_state->r13; + state->r14 = saved_state->r14; + state->r15 = saved_state->r15; + + state->rsp = saved_state->isf.rsp; + state->rflags = saved_state->isf.rflags; + state->rip = saved_state->isf.rip; + + state->cs = saved_state->isf.cs; + state->fs = saved_state->fs; + state->gs = saved_state->gs; +} + + +void +kdp_setstate( + x86_thread_state64_t *state +) +{ + x86_saved_state64_t *saved_state; + + saved_state = (x86_saved_state64_t *)kdp.saved_state; + saved_state->rax = state->rax; + saved_state->rbx = state->rbx; + saved_state->rcx = state->rcx; + saved_state->rdx = state->rdx; + saved_state->rdi = state->rdi; + saved_state->rsi = state->rsi; + saved_state->rbp = state->rbp; + saved_state->r8 = state->r8; + saved_state->r9 = state->r9; + saved_state->r10 = state->r10; + saved_state->r11 = state->r11; + saved_state->r12 = state->r12; + saved_state->r13 = state->r13; + saved_state->r14 = state->r14; + saved_state->r15 = state->r15; + + saved_state->isf.rflags = state->rflags; + saved_state->isf.rsp = state->rsp; + saved_state->isf.rip = state->rip; + + saved_state->fs = (uint32_t)state->fs; + saved_state->gs = (uint32_t)state->gs; +} + + +kdp_error_t +kdp_machine_read_regs( + __unused unsigned int cpu, + unsigned int flavor, + char *data, + int *size +) +{ + static x86_float_state64_t null_fpstate; + + switch (flavor) { + + case x86_THREAD_STATE64: + dprintf(("kdp_readregs THREAD_STATE64\n")); + kdp_getstate((x86_thread_state64_t *)data); + *size = sizeof (x86_thread_state64_t); + return KDPERR_NO_ERROR; + + case x86_FLOAT_STATE64: + dprintf(("kdp_readregs THREAD_FPSTATE64\n")); + *(x86_float_state64_t *)data = null_fpstate; + *size = sizeof (x86_float_state64_t); + return KDPERR_NO_ERROR; + + default: + dprintf(("kdp_readregs bad flavor %d\n", flavor)); + *size = 0; + return KDPERR_BADFLAVOR; + } +} + +kdp_error_t +kdp_machine_write_regs( + __unused unsigned int cpu, + unsigned int flavor, + char *data, + __unused int *size +) +{ + switch (flavor) { + + case x86_THREAD_STATE64: + dprintf(("kdp_writeregs THREAD_STATE64\n")); + kdp_setstate((x86_thread_state64_t *)data); + return KDPERR_NO_ERROR; + + case x86_FLOAT_STATE64: + dprintf(("kdp_writeregs THREAD_FPSTATE64\n")); + return KDPERR_NO_ERROR; + + default: + dprintf(("kdp_writeregs bad flavor %d\n", flavor)); + return KDPERR_BADFLAVOR; + } +} + + + +void +kdp_machine_hostinfo( + kdp_hostinfo_t *hostinfo +) +{ + int i; + + hostinfo->cpus_mask = 0; + + for (i = 0; i < machine_info.max_cpus; i++) { + if (cpu_data_ptr[i] == NULL) + continue; + + hostinfo->cpus_mask |= (1 << i); + } + + hostinfo->cpu_type = cpuid_cputype() | CPU_ARCH_ABI64; + hostinfo->cpu_subtype = cpuid_cpusubtype(); +} + +void +kdp_panic( + const char *msg +) +{ + kprintf("kdp panic: %s\n", msg); + __asm__ volatile("hlt"); +} + + +void +kdp_machine_reboot(void) +{ + printf("Attempting system restart..."); + /* Call the platform specific restart*/ + if (PE_halt_restart) + (*PE_halt_restart)(kPERestartCPU); + /* If we do reach this, give up */ + halt_all_cpus(TRUE); +} + +int +kdp_intr_disbl(void) +{ + return splhigh(); +} + +void +kdp_intr_enbl(int s) +{ + splx(s); +} + +int +kdp_getc(void) +{ + return cnmaygetc(); +} + +void +kdp_us_spin(int usec) +{ + delay(usec/100); +} + +void print_saved_state(void *state) +{ + x86_saved_state64_t *saved_state; + + saved_state = state; + + kprintf("pc = 0x%llx\n", saved_state->isf.rip); + kprintf("cr2= 0x%llx\n", saved_state->cr2); + kprintf("rp = TODO FIXME\n"); + kprintf("sp = %p\n", saved_state); + +} + +void +kdp_sync_cache(void) +{ + return; /* No op here. */ +} + +void +kdp_call(void) +{ + __asm__ volatile ("int $3"); /* Let the processor do the work */ +} + + +typedef struct _cframe_t { + struct _cframe_t *prev; + unsigned caller; + unsigned args[0]; +} cframe_t; + +extern pt_entry_t *DMAP2; +extern caddr_t DADDR2; + +void +kdp_print_phys(int src) +{ + unsigned int *iptr; + int i; + + *(int *) DMAP2 = 0x63 | (src & 0xfffff000); + invlpg((uintptr_t) DADDR2); + iptr = (unsigned int *) DADDR2; + for (i = 0; i < 100; i++) { + kprintf("0x%x ", *iptr++); + if ((i % 8) == 0) + kprintf("\n"); + } + kprintf("\n"); + *(int *) DMAP2 = 0; + +} + +boolean_t +kdp_i386_trap( + unsigned int trapno, + x86_saved_state64_t *saved_state, + kern_return_t result, + vm_offset_t va +) +{ + unsigned int exception, subcode = 0, code; + + if (trapno != T_INT3 && trapno != T_DEBUG) { + kprintf("Debugger: Unexpected kernel trap number: " + "0x%x, RIP: 0x%llx, CR2: 0x%llx\n", + trapno, saved_state->isf.rip, saved_state->cr2); + if (!kdp.is_conn) + return FALSE; + } + + mp_kdp_enter(); + kdp_callouts(KDP_EVENT_ENTER); + + if (saved_state->isf.rflags & EFL_TF) { + enable_preemption_no_check(); + } + + switch (trapno) { + + case T_DIVIDE_ERROR: + exception = EXC_ARITHMETIC; + code = EXC_I386_DIVERR; + break; + + case T_OVERFLOW: + exception = EXC_SOFTWARE; + code = EXC_I386_INTOFLT; + break; + + case T_OUT_OF_BOUNDS: + exception = EXC_ARITHMETIC; + code = EXC_I386_BOUNDFLT; + break; + + case T_INVALID_OPCODE: + exception = EXC_BAD_INSTRUCTION; + code = EXC_I386_INVOPFLT; + break; + + case T_SEGMENT_NOT_PRESENT: + exception = EXC_BAD_INSTRUCTION; + code = EXC_I386_SEGNPFLT; + subcode = (unsigned int)saved_state->isf.err; + break; + + case T_STACK_FAULT: + exception = EXC_BAD_INSTRUCTION; + code = EXC_I386_STKFLT; + subcode = (unsigned int)saved_state->isf.err; + break; + + case T_GENERAL_PROTECTION: + exception = EXC_BAD_INSTRUCTION; + code = EXC_I386_GPFLT; + subcode = (unsigned int)saved_state->isf.err; + break; + + case T_PAGE_FAULT: + exception = EXC_BAD_ACCESS; + code = result; + subcode = (unsigned int)va; + break; + + case T_WATCHPOINT: + exception = EXC_SOFTWARE; + code = EXC_I386_ALIGNFLT; + break; + + case T_DEBUG: + case T_INT3: + exception = EXC_BREAKPOINT; + code = EXC_I386_BPTFLT; + break; + + default: + exception = EXC_BAD_INSTRUCTION; + code = trapno; + break; + } + + kdp_raise_exception(exception, code, subcode, saved_state); + /* If the instruction single step bit is set, disable kernel preemption + */ + if (saved_state->isf.rflags & EFL_TF) { + disable_preemption(); + } + + kdp_callouts(KDP_EVENT_EXIT); + mp_kdp_exit(); + + return TRUE; +} + +boolean_t +kdp_call_kdb( + void) +{ + return(FALSE); +} + +void +kdp_machine_get_breakinsn( + uint8_t *bytes, + uint32_t *size +) +{ + bytes[0] = 0xcc; + *size = 1; +} + +extern pmap_t kdp_pmap; + +#define RETURN_OFFSET 4 + +int +machine_trace_thread(thread_t thread, char *tracepos, char *tracebound, int nframes, boolean_t user_p) +{ + uint32_t *tracebuf = (uint32_t *)tracepos; + uint32_t fence = 0; + uint32_t stackptr = 0; + uint32_t stacklimit = 0xfc000000; + int framecount = 0; + uint32_t init_eip = 0; + uint32_t prevsp = 0; + uint32_t framesize = 2 * sizeof(vm_offset_t); + + if (user_p) { + x86_saved_state32_t *iss32; + + iss32 = USER_REGS32(thread); + init_eip = iss32->eip; + stackptr = iss32->ebp; + + stacklimit = 0xffffffff; + kdp_pmap = thread->task->map->pmap; + } + else + panic("32-bit trace attempted on 64-bit kernel"); + + *tracebuf++ = init_eip; + + for (framecount = 0; framecount < nframes; framecount++) { + + if ((tracebound - ((char *)tracebuf)) < (4 * framesize)) { + tracebuf--; + break; + } + + *tracebuf++ = stackptr; +/* Invalid frame, or hit fence */ + if (!stackptr || (stackptr == fence)) { + break; + } + + /* Unaligned frame */ + if (stackptr & 0x0000003) { + break; + } + + if (stackptr <= prevsp) { + break; + } + + if (stackptr > stacklimit) { + break; + } + + if (kdp_machine_vm_read((mach_vm_address_t)(stackptr + RETURN_OFFSET), (caddr_t) tracebuf, sizeof(*tracebuf)) != sizeof(*tracebuf)) { + break; + } + tracebuf++; + + prevsp = stackptr; + if (kdp_machine_vm_read((mach_vm_address_t)stackptr, (caddr_t) &stackptr, sizeof(stackptr)) != sizeof(stackptr)) { + *tracebuf++ = 0; + break; + } + } + + kdp_pmap = 0; + + return (uint32_t) (((char *) tracebuf) - tracepos); +} + + +#define RETURN_OFFSET64 8 +/* Routine to encapsulate the 64-bit address read hack*/ +unsigned +machine_read64(addr64_t srcaddr, caddr_t dstaddr, uint32_t len) +{ + return (unsigned)kdp_machine_vm_read(srcaddr, dstaddr, len); +} + +int +machine_trace_thread64(thread_t thread, char *tracepos, char *tracebound, int nframes, boolean_t user_p) +{ + uint64_t *tracebuf = (uint64_t *)tracepos; + uint32_t fence = 0; + addr64_t stackptr = 0; + int framecount = 0; + addr64_t init_rip = 0; + addr64_t prevsp = 0; + unsigned framesize = 2 * sizeof(addr64_t); + + if (user_p) { + x86_saved_state64_t *iss64; + iss64 = USER_REGS64(thread); + init_rip = iss64->isf.rip; + stackptr = iss64->rbp; + kdp_pmap = thread->task->map->pmap; + } + else { + stackptr = STACK_IKS(thread->kernel_stack)->k_rbp; + init_rip = STACK_IKS(thread->kernel_stack)->k_rip; + kdp_pmap = 0; + } + + *tracebuf++ = init_rip; + + for (framecount = 0; framecount < nframes; framecount++) { + + if ((uint32_t)(tracebound - ((char *)tracebuf)) < (4 * framesize)) { + tracebuf--; + break; + } + + *tracebuf++ = stackptr; + + if (!stackptr || (stackptr == fence)){ + break; + } + + if (stackptr & 0x0000003) { + break; + } + + if (stackptr <= prevsp) { + break; + } + + if (machine_read64(stackptr + RETURN_OFFSET64, (caddr_t) tracebuf, sizeof(addr64_t)) != sizeof(addr64_t)) { + break; + } + tracebuf++; + + prevsp = stackptr; + if (machine_read64(stackptr, (caddr_t) &stackptr, sizeof(addr64_t)) != sizeof(addr64_t)) { + *tracebuf++ = 0; + break; + } + } + + kdp_pmap = NULL; + + return (uint32_t) (((char *) tracebuf) - tracepos); +} + +static struct kdp_callout { + struct kdp_callout *callout_next; + kdp_callout_fn_t callout_fn; + void *callout_arg; +} *kdp_callout_list = NULL; + + +/* + * Called from kernel context to register a kdp event callout. + */ +void +kdp_register_callout( + kdp_callout_fn_t fn, + void *arg) +{ + struct kdp_callout *kcp; + struct kdp_callout *list_head; + + kcp = kalloc(sizeof(*kcp)); + if (kcp == NULL) + panic("kdp_register_callout() kalloc failed"); + + kcp->callout_fn = fn; + kcp->callout_arg = arg; + + /* Lock-less list insertion using compare and exchange. */ + do { + list_head = kdp_callout_list; + kcp->callout_next = list_head; + } while (!OSCompareAndSwapPtr(list_head, kcp, (void * volatile *)&kdp_callout_list)); +} + +/* + * Called at exception/panic time when extering or exiting kdp. + * We are single-threaded at this time and so we don't use locks. + */ +static void +kdp_callouts(kdp_event_t event) +{ + struct kdp_callout *kcp = kdp_callout_list; + + while (kcp) { + kcp->callout_fn(kcp->callout_arg, event); + kcp = kcp->callout_next; + } +} + +void +kdp_ml_enter_debugger(void) +{ + __asm__ __volatile__("int3"); +} diff --git a/osfmk/kdp/ml/x86_64/kdp_vm.c b/osfmk/kdp/ml/x86_64/kdp_vm.c new file mode 100644 index 000000000..8a80e7a3a --- /dev/null +++ b/osfmk/kdp/ml/x86_64/kdp_vm.c @@ -0,0 +1,379 @@ +/* + * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. The rights granted to you under the License + * may not be used to create, or enable the creation or redistribution of, + * unlawful or unlicensed copies of an Apple operating system, or to + * circumvent, violate, or enable the circumvention or violation of, any + * terms of an Apple operating system software license agreement. + * + * Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ + */ +#include +#include +#include +#include + +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +extern vm_offset_t sectTEXTB, sectDATAB, sectLINKB, sectPRELINKB; +extern unsigned long sectSizeTEXT, sectSizeDATA, sectSizeLINK, sectSizePRELINK; + +int kern_dump(void); +int kdp_dump_trap(int type, x86_saved_state64_t *regs); + +typedef struct { + int flavor; /* the number for this flavor */ + mach_msg_type_number_t count; /* count of ints in this flavor */ +} mythread_state_flavor_t; + +static mythread_state_flavor_t thread_flavor_array [] = { + {x86_THREAD_STATE64, x86_THREAD_STATE64_COUNT} +}; + +static int kdp_mynum_flavors = 1; +static int MAX_TSTATE_FLAVORS = 1; + +typedef struct { + vm_offset_t header; + int hoffset; + mythread_state_flavor_t *flavors; + int tstate_size; +} tir_t; + +char command_buffer[512]; + +static void +kern_collectth_state(thread_t thread, tir_t *t) +{ + vm_offset_t header; + int hoffset, i ; + mythread_state_flavor_t *flavors; + struct thread_command *tc; + /* + * Fill in thread command structure. + */ + header = t->header; + hoffset = t->hoffset; + flavors = t->flavors; + + tc = (struct thread_command *) (header + hoffset); + tc->cmd = LC_THREAD; + tc->cmdsize = (uint32_t)sizeof(struct thread_command) + t->tstate_size; + hoffset += (uint32_t)sizeof(struct thread_command); + /* + * Follow with a struct thread_state_flavor and + * the appropriate thread state struct for each + * thread state flavor. + */ + for (i = 0; i < kdp_mynum_flavors; i++) { + *(mythread_state_flavor_t *)(header+hoffset) = + flavors[i]; + hoffset += (uint32_t)sizeof(mythread_state_flavor_t); + /* Locate and obtain the non-volatile register context + * for this kernel thread. This should ideally be + * encapsulated in machine_thread_get_kern_state() + * but that routine appears to have been co-opted + * by CHUD to obtain pre-interrupt state. + */ + if (flavors[i].flavor == x86_THREAD_STATE64) { + x86_thread_state64_t *tstate = (x86_thread_state64_t *) (header + hoffset); + vm_offset_t kstack; + bzero(tstate, x86_THREAD_STATE64_COUNT * sizeof(int)); + if ((kstack = thread->kernel_stack) != 0){ + struct x86_kernel_state *iks = STACK_IKS(kstack); + tstate->rbx = iks->k_rbx; + tstate->rsp = iks->k_rsp; + tstate->rbp = iks->k_rbp; + tstate->r12 = iks->k_r12; + tstate->r13 = iks->k_r13; + tstate->r14 = iks->k_r14; + tstate->r15 = iks->k_r15; + tstate->rip = iks->k_rip; + } + } + else if (machine_thread_get_kern_state(thread, + flavors[i].flavor, (thread_state_t) (header+hoffset), + &flavors[i].count) != KERN_SUCCESS) + printf ("Failure in machine_thread_get_kern_state()\n"); + hoffset += (uint32_t)(flavors[i].count*sizeof(int)); + } + + t->hoffset = hoffset; +} + +/* Intended to be called from the kernel trap handler if an unrecoverable fault + * occurs during a crashdump (which shouldn't happen since we validate mappings + * and so on). This should be reworked to attempt some form of recovery. + */ +int +kdp_dump_trap( + int type, + __unused x86_saved_state64_t *saved_state) +{ + printf ("An unexpected trap (type %d) occurred during the system dump, terminating.\n", type); + kdp_send_crashdump_pkt (KDP_EOF, NULL, 0, ((void *) 0)); + abort_panic_transfer(); + kdp_flag &= ~KDP_PANIC_DUMP_ENABLED; + kdp_flag &= ~PANIC_CORE_ON_NMI; + kdp_flag &= ~PANIC_LOG_DUMP; + + kdp_reset(); + + kdp_raise_exception(EXC_BAD_ACCESS, 0, 0, kdp.saved_state); + return( 0 ); +} + +int +kern_dump(void) +{ + vm_map_t map; + unsigned int thread_count, segment_count; + unsigned int command_size = 0, header_size = 0, tstate_size = 0; + uint64_t hoffset = 0, foffset = 0, nfoffset = 0; + unsigned int max_header_size = 0; + vm_offset_t header, txstart; + vm_map_offset_t vmoffset; + struct mach_header_64 *mh64; + struct segment_command_64 *sc64; + mach_vm_size_t size = 0; + vm_prot_t prot = 0; + vm_prot_t maxprot = 0; + vm_inherit_t inherit = 0; + mythread_state_flavor_t flavors[MAX_TSTATE_FLAVORS]; + vm_size_t nflavors; + vm_size_t i; + uint32_t nesting_depth = 0; + kern_return_t kret = 0; + struct vm_region_submap_info_64 vbr; + mach_msg_type_number_t vbrcount = 0; + tir_t tir1; + + int error = 0; + int panic_error = 0; + unsigned int mach_section_count = 0; + + map = kernel_map; + + thread_count = 1; + segment_count = get_vmmap_entries(map); + + printf("Kernel map has %d entries\n", segment_count); + + nflavors = kdp_mynum_flavors; + bcopy((char *)thread_flavor_array,(char *) flavors,sizeof(thread_flavor_array)); + + for (i = 0; i < nflavors; i++) + tstate_size += (uint32_t)(sizeof(mythread_state_flavor_t) + + (flavors[i].count * sizeof(int))); + + command_size = (uint32_t)((segment_count + mach_section_count) * + sizeof(struct segment_command_64) + + thread_count * sizeof(struct thread_command) + + tstate_size * thread_count); + + header_size = command_size + (uint32_t)sizeof(struct mach_header_64); + header = (vm_offset_t) command_buffer; + + /* + * Set up Mach-O header for currently executing 32 bit kernel. + */ + printf ("Generated Mach-O header size was %d\n", header_size); + + mh64 = (struct mach_header_64 *) header; + mh64->magic = MH_MAGIC_64; + mh64->cputype = cpu_type(); + mh64->cpusubtype = cpu_subtype(); + mh64->filetype = MH_CORE; + mh64->ncmds = segment_count + thread_count + mach_section_count; + mh64->sizeofcmds = command_size; + mh64->flags = 0; + mh64->reserved = 0; + + hoffset = sizeof(struct mach_header_64); /* offset into header */ + foffset = (uint32_t)round_page(header_size); /* offset into file */ + /* Padding */ + if ((foffset - header_size) < (4*sizeof(struct segment_command_64))) { + foffset += (uint32_t)((4*sizeof(struct segment_command_64)) - (foffset-header_size)); + } + + max_header_size = (unsigned int)foffset; + + vmoffset = vm_map_min(map); + + /* Transmit the Mach-O MH_CORE header, and seek forward past the + * area reserved for the segment and thread commands + * to begin data transmission + */ + if ((panic_error = kdp_send_crashdump_pkt (KDP_SEEK, NULL, sizeof(nfoffset) , &nfoffset)) < 0) { + printf ("kdp_send_crashdump_pkt failed with error %d\n", panic_error); + error = panic_error; + goto out; + } + + if ((panic_error = kdp_send_crashdump_data (KDP_DATA, NULL, sizeof(struct mach_header_64), (caddr_t) mh64) < 0)) { + printf ("kdp_send_crashdump_data failed with error %d\n", panic_error); + error = panic_error; + goto out; + } + if ((panic_error = kdp_send_crashdump_pkt (KDP_SEEK, NULL, sizeof(foffset) , &foffset) < 0)) { + printf ("kdp_send_crashdump_pkt failed with error %d\n", panic_error); + error = panic_error; + goto out; + } + printf ("Transmitting kernel state, please wait: "); + + while ((segment_count > 0) || (kret == KERN_SUCCESS)){ + + while (1) { + + /* + * Get region information for next region. + */ + + vbrcount = VM_REGION_SUBMAP_INFO_COUNT_64; + if((kret = mach_vm_region_recurse(map, + &vmoffset, &size, &nesting_depth, + (vm_region_recurse_info_t)&vbr, + &vbrcount)) != KERN_SUCCESS) { + break; + } + + if(vbr.is_submap) { + nesting_depth++; + continue; + } else { + break; + } + } + + if(kret != KERN_SUCCESS) + break; + + prot = vbr.protection; + maxprot = vbr.max_protection; + inherit = vbr.inheritance; + + /* + * Fill in segment command structure. + */ + + if (hoffset > max_header_size) + break; + sc64 = (struct segment_command_64 *) (header); + sc64->cmd = LC_SEGMENT_64; + sc64->cmdsize = sizeof(struct segment_command_64); + sc64->segname[0] = 0; + sc64->vmaddr = vmoffset; + sc64->vmsize = size; + sc64->fileoff = foffset; + sc64->filesize = size; + sc64->maxprot = maxprot; + sc64->initprot = prot; + sc64->nsects = 0; + + if ((panic_error = kdp_send_crashdump_pkt (KDP_SEEK, NULL, sizeof(hoffset) , &hoffset)) < 0) { + printf ("kdp_send_crashdump_pkt failed with error %d\n", panic_error); + error = panic_error; + goto out; + } + + if ((panic_error = kdp_send_crashdump_data (KDP_DATA, NULL, sizeof(struct segment_command_64) , (caddr_t) sc64)) < 0) { + printf ("kdp_send_crashdump_data failed with error %d\n", panic_error); + error = panic_error; + goto out; + } + + /* Do not transmit memory tagged VM_MEMORY_IOKIT - instead, + * seek past that region on the server - this creates a + * hole in the file. + */ + + if ((vbr.user_tag != VM_MEMORY_IOKIT)) { + + if ((panic_error = kdp_send_crashdump_pkt (KDP_SEEK, NULL, sizeof(foffset) , &foffset)) < 0) { + printf ("kdp_send_crashdump_pkt failed with error %d\n", panic_error); + error = panic_error; + goto out; + } + + txstart = vmoffset; + + if ((panic_error = kdp_send_crashdump_data (KDP_DATA, NULL, (unsigned int)size, (caddr_t) txstart)) < 0) { + printf ("kdp_send_crashdump_data failed with error %d\n", panic_error); + error = panic_error; + goto out; + } + } + + hoffset += (unsigned int)sizeof(struct segment_command_64); + foffset += (unsigned int)size; + vmoffset += size; + segment_count--; + } + tir1.header = header; + tir1.hoffset = 0; + tir1.flavors = flavors; + tir1.tstate_size = tstate_size; + + /* Now send out the LC_THREAD load command, with the thread information + * for the current activation. + * Note that the corefile can contain LC_SEGMENT commands with file + * offsets that point past the edge of the corefile, in the event that + * the last N VM regions were all I/O mapped or otherwise + * non-transferable memory, not followed by a normal VM region; + * i.e. there will be no hole that reaches to the end of the core file. + */ + kern_collectth_state (current_thread(), &tir1); + + if ((panic_error = kdp_send_crashdump_pkt (KDP_SEEK, NULL, sizeof(hoffset) , &hoffset)) < 0) { + printf ("kdp_send_crashdump_pkt failed with error %d\n", panic_error); + error = panic_error; + goto out; + } + + if ((panic_error = kdp_send_crashdump_data (KDP_DATA, NULL, tir1.hoffset , (caddr_t) header)) < 0) { + printf ("kdp_send_crashdump_data failed with error %d\n", panic_error); + error = panic_error; + goto out; + } + + /* last packet */ + if ((panic_error = kdp_send_crashdump_pkt (KDP_EOF, NULL, 0, ((void *) 0))) < 0) + { + printf ("kdp_send_crashdump_pkt failed with error %d\n", panic_error); + error = panic_error; + goto out; + } +out: + return (error); +} diff --git a/osfmk/kern/Makefile b/osfmk/kern/Makefile index c07d5f29b..0ff09ef28 100644 --- a/osfmk/kern/Makefile +++ b/osfmk/kern/Makefile @@ -12,6 +12,7 @@ DATAFILES = EXPORT_ONLY_FILES = \ affinity.h \ assert.h \ + audit_sessionport.h \ clock.h \ cpu_number.h \ cpu_data.h \ @@ -19,6 +20,7 @@ EXPORT_ONLY_FILES = \ etimer.h \ ipc_mig.h \ kalloc.h \ + kext_alloc.h \ kern_types.h \ lock.h \ locks.h \ diff --git a/osfmk/kern/affinity.c b/osfmk/kern/affinity.c index 1b319c753..eb5095459 100644 --- a/osfmk/kern/affinity.c +++ b/osfmk/kern/affinity.c @@ -59,8 +59,8 @@ #endif struct affinity_space { - mutex_t aspc_lock; - uint32_t aspc_task_count; + lck_mtx_t aspc_lock; + uint32_t aspc_task_count; queue_head_t aspc_affinities; }; typedef struct affinity_space *affinity_space_t; @@ -149,7 +149,7 @@ thread_affinity_set(thread_t thread, uint32_t tag) return KERN_TERMINATED; } - mutex_lock(&aspc->aspc_lock); + lck_mtx_lock(&aspc->aspc_lock); aset = thread->affinity_set; if (aset != NULL) { /* @@ -179,7 +179,7 @@ thread_affinity_set(thread_t thread, uint32_t tag) } else { aset = affinity_set_alloc(); if (aset == NULL) { - mutex_unlock(&aspc->aspc_lock); + lck_mtx_unlock(&aspc->aspc_lock); thread_mtx_unlock(thread); return KERN_RESOURCE_SHORTAGE; } @@ -192,7 +192,7 @@ thread_affinity_set(thread_t thread, uint32_t tag) affinity_set_add(aset, thread); } - mutex_unlock(&aspc->aspc_lock); + lck_mtx_unlock(&aspc->aspc_lock); thread_mtx_unlock(thread); /* @@ -225,10 +225,10 @@ task_affinity_create(task_t parent_task, task_t child_task) * Bump the task reference count on the shared namespace and * give it to the child. */ - mutex_lock(&aspc->aspc_lock); + lck_mtx_lock(&aspc->aspc_lock); aspc->aspc_task_count++; child_task->affinity_space = aspc; - mutex_unlock(&aspc->aspc_lock); + lck_mtx_unlock(&aspc->aspc_lock); } /* @@ -243,19 +243,21 @@ task_affinity_deallocate(task_t task) DBG("task_affinity_deallocate(%p) aspc %p task_count %d\n", task, aspc, aspc->aspc_task_count); - mutex_lock(&aspc->aspc_lock); + lck_mtx_lock(&aspc->aspc_lock); if (--(aspc->aspc_task_count) == 0) { assert(queue_empty(&aspc->aspc_affinities)); - mutex_unlock(&aspc->aspc_lock); + lck_mtx_unlock(&aspc->aspc_lock); affinity_space_free(aspc); } else { - mutex_unlock(&aspc->aspc_lock); + lck_mtx_unlock(&aspc->aspc_lock); } } /* * task_affinity_info() * Return affinity tag info (number, min, max) for the task. + * + * Conditions: task is locked. */ kern_return_t task_affinity_info( @@ -274,10 +276,9 @@ task_affinity_info( info->min = THREAD_AFFINITY_TAG_NULL; info->max = THREAD_AFFINITY_TAG_NULL; - task_lock(task); aspc = task->affinity_space; if (aspc) { - mutex_lock(&aspc->aspc_lock); + lck_mtx_lock(&aspc->aspc_lock); queue_iterate(&aspc->aspc_affinities, aset, affinity_set_t, aset_affinities) { info->set_count++; @@ -289,9 +290,8 @@ task_affinity_info( info->max = aset->aset_tag; } info->task_count = aspc->aspc_task_count; - mutex_unlock(&aspc->aspc_lock); + lck_mtx_unlock(&aspc->aspc_lock); } - task_unlock(task); return KERN_SUCCESS; } @@ -318,9 +318,9 @@ thread_affinity_dup(thread_t parent, thread_t child) assert(aspc == parent->task->affinity_space); assert(aspc == child->task->affinity_space); - mutex_lock(&aspc->aspc_lock); + lck_mtx_lock(&aspc->aspc_lock); affinity_set_add(aset, child); - mutex_unlock(&aspc->aspc_lock); + lck_mtx_unlock(&aspc->aspc_lock); thread_mtx_unlock(parent); } @@ -339,11 +339,23 @@ thread_affinity_terminate(thread_t thread) DBG("thread_affinity_terminate(%p)\n", thread); aspc = aset->aset_space; - mutex_lock(&aspc->aspc_lock); + lck_mtx_lock(&aspc->aspc_lock); if (affinity_set_remove(aset, thread)) { affinity_set_free(aset); } - mutex_unlock(&aspc->aspc_lock); + lck_mtx_unlock(&aspc->aspc_lock); +} + +/* + * thread_affinity_exec() + * Called from execve() to cancel any current affinity - a new image implies + * the calling thread terminates any expressed or inherited affinity. + */ +void +thread_affinity_exec(thread_t thread) +{ + if (thread->affinity_set != AFFINITY_SET_NULL) + thread_affinity_terminate(thread); } /* @@ -358,7 +370,7 @@ affinity_space_alloc(void) if (aspc == NULL) return NULL; - mutex_init(&aspc->aspc_lock, 0); + lck_mtx_init(&aspc->aspc_lock, &task_lck_grp, &task_lck_attr); queue_init(&aspc->aspc_affinities); aspc->aspc_task_count = 1; @@ -529,7 +541,7 @@ affinity_set_place(affinity_space_t aspc, affinity_set_t new_aset) if (affinity_sets_mapping == 0) i_least_occupied = 0; else - i_least_occupied = ((unsigned int)aspc % 127) % num_cpu_asets; + i_least_occupied = (unsigned int)(((uintptr_t)aspc % 127) % num_cpu_asets); for (i = 0; i < num_cpu_asets; i++) { unsigned int j = (i_least_occupied + i) % num_cpu_asets; if (set_occupancy[j] == 0) { diff --git a/osfmk/kern/affinity.h b/osfmk/kern/affinity.h index e634e7122..167bfd2d8 100644 --- a/osfmk/kern/affinity.h +++ b/osfmk/kern/affinity.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2007 Apple Inc. All rights reserved. + * Copyright (c) 2007-2008 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -25,8 +25,13 @@ * * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ */ -#ifndef _KERN_CPU_AFFINITY_H_ -#define _KERN_CPU_AFFINITY_H_ + +#ifdef XNU_KERNEL_PRIVATE + +#ifndef _KERN_AFFINITY_H_ +#define _KERN_AFFINITY_H_ + +#ifdef MACH_KERNEL_PRIVATE #include #include @@ -50,8 +55,6 @@ struct affinity_set { }; extern boolean_t thread_affinity_is_supported(void); -extern kern_return_t thread_affinity_set(thread_t thread, uint32_t tag); -extern uint32_t thread_affinity_get(thread_t thread); extern void thread_affinity_dup(thread_t parent, thread_t child); extern void thread_affinity_terminate(thread_t thread); extern void task_affinity_create( @@ -64,4 +67,12 @@ extern kern_return_t task_affinity_info( task_info_t, mach_msg_type_number_t *); -#endif /* _KERN_CPU_AFFINITY_H_ */ +#endif /* MACH_KERNEL_PRIVATE */ + +extern kern_return_t thread_affinity_set(thread_t thread, uint32_t tag); +extern uint32_t thread_affinity_get(thread_t thread); +extern void thread_affinity_exec(thread_t thread); + +#endif /* _KERN_AFFINITY_H_ */ + +#endif /* XNU_KERNEL_PRIVATE */ diff --git a/osfmk/kern/assert.h b/osfmk/kern/assert.h index 1b528a4b3..c704dca5f 100644 --- a/osfmk/kern/assert.h +++ b/osfmk/kern/assert.h @@ -74,6 +74,11 @@ extern void Assert( const char *file, int line, const char *expression); + +#if CONFIG_NO_PANIC_STRINGS +#define Assert(file, line, ex) (Assert)("", line, "") +#endif + __END_DECLS #if MACH_ASSERT diff --git a/osfmk/kern/ast.c b/osfmk/kern/ast.c index 9a0e95ec6..b6540f92d 100644 --- a/osfmk/kern/ast.c +++ b/osfmk/kern/ast.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2008 Apple Inc. All rights reserved. + * Copyright (c) 2000-2009 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -81,6 +81,7 @@ #include #include // for CHUD AST hook + void ast_init(void) { @@ -98,21 +99,21 @@ ast_taken( boolean_t preempt_trap = (reasons == AST_PREEMPTION); ast_t *myast = ast_pending(); thread_t thread = current_thread(); + perfCallback perf_hook = perfASTHook; /* * CHUD hook - all threads including idle processor threads */ - if(perfASTHook) { - if(*myast & AST_CHUD_ALL) { - perfASTHook(0, NULL, 0, 0); + if (perf_hook) { + if (*myast & AST_CHUD_ALL) { + (*perf_hook)(0, NULL, 0, 0); - if(*myast == AST_NONE) { - return; // nothing left to do - } + if (*myast == AST_NONE) + return; } - } else { - *myast &= ~AST_CHUD_ALL; } + else + *myast &= ~AST_CHUD_ALL; reasons &= *myast; *myast &= ~reasons; diff --git a/osfmk/kern/ast.h b/osfmk/kern/ast.h index 93567aca7..b6f42e4ec 100644 --- a/osfmk/kern/ast.h +++ b/osfmk/kern/ast.h @@ -182,11 +182,11 @@ MACRO_END * be followed by ast_propagate(). */ #define thread_ast_set(act, reason) \ - ((void)hw_atomic_or(&(act)->ast, (reason))) + (hw_atomic_or_noret(&(act)->ast, (reason))) #define thread_ast_clear(act, reason) \ - ((void)hw_atomic_and(&(act)->ast, ~(reason))) + (hw_atomic_and_noret(&(act)->ast, ~(reason))) #define thread_ast_clear_all(act) \ - ((void)hw_atomic_and(&(act)->ast, AST_NONE)) + (hw_atomic_and_noret(&(act)->ast, AST_NONE)) #ifdef MACH_BSD diff --git a/osfmk/kern/audit_sessionport.c b/osfmk/kern/audit_sessionport.c new file mode 100644 index 000000000..f42000464 --- /dev/null +++ b/osfmk/kern/audit_sessionport.c @@ -0,0 +1,176 @@ +/* + * Copyright (c) 2008 Apple Inc. All rights reserved. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. The rights granted to you under the License + * may not be used to create, or enable the creation or redistribution of, + * unlawful or unlicensed copies of an Apple operating system, or to + * circumvent, violate, or enable the circumvention or violation of, any + * terms of an Apple operating system software license agreement. + * + * Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ + */ +#include +#include +#include +#include +#include + +#if CONFIG_AUDIT +/* + * audit_session_mksend + * + * Description: Obtain a send right for given audit session information. + * + * Parameters: *aia_p Audit session information to assosiate with + * the new port. + * *sessionport Pointer to the current session port. This may + * actually be set to IPC_PORT_NULL. + * + * Returns: !NULL Resulting send right. + * NULL Failed to allocate port (due to lack of memory + * resources). + * + * *sessionport The session port that may have been allocated. + * + * Notes: On return, sendport will be set to the new send right on success, + * or null/dead on error. + */ +ipc_port_t +audit_session_mksend(struct auditinfo_addr *aia_p, ipc_port_t *sessionport) +{ + ipc_port_t notifyport; + ipc_port_t sendport = IPC_PORT_NULL; + + /* + * If we have an existing, active session port then use it. + */ + sendport = ipc_port_make_send(*sessionport); + if (IP_VALID(sendport)) { + ip_lock(sendport); + if (ip_active(sendport) && + IKOT_AU_SESSIONPORT == ip_kotype(sendport)) { + ip_unlock(sendport); + return (sendport); + } + ip_unlock(sendport); + ipc_port_release_send(sendport); + } + + /* + * Otherwise, create a new one for this session. + */ + *sessionport = ipc_port_alloc_kernel(); + if (IP_VALID(*sessionport)) { + ipc_kobject_set(*sessionport, (ipc_kobject_t)aia_p, + IKOT_AU_SESSIONPORT); + + /* Request a no-senders notification. */ + notifyport = ipc_port_make_sonce(*sessionport); + ip_lock(*sessionport); + /* unlocked by ipc_port_nsrequest */ + ipc_port_nsrequest(*sessionport, 1, notifyport, ¬ifyport); + } + sendport = ipc_port_make_send(*sessionport); + + return (sendport); +} + + +/* + * audit_session_porttoaia + * + * Description: Obtain the audit session info associated with the given port. + + * Parameters: port A Mach port. + * + * Returns: NULL The given Mach port did not reference audit + * session info. + * !NULL The audit session info that is associated with + * the Mach port. + * + * Notes: The caller must have a reference on the sessionport. + */ +struct auditinfo_addr * +audit_session_porttoaia(ipc_port_t port) +{ + struct auditinfo_addr *aia_p = NULL; + + if (IP_VALID(port)) { + ip_lock(port); + if (ip_active(port) && IKOT_AU_SESSIONPORT == ip_kotype(port)) + aia_p = (struct auditinfo_addr *)port->ip_kobject; + ip_unlock(port); + } + + return (aia_p); +} + + +/* + * audit_session_nosenders + * + * Description: Handle a no-senders notification for a sessionport. + * + * Parameters: msg A Mach no-senders notification message. + * + * Notes: It is possible that new send rights are created after a + * no-senders notification has been sent (i.e. via audit_session_mksend). + * We check the port's mscount against the notification's not_count + * to detect when this happens, and re-arm the notification in that + * case. + * + * In the normal case (no new senders), we first mark the port + * as dying by setting its object type to IKOT_NONE so that + * audit_session_mksend will no longer use it to create + * additional send rights. We can then safely call + * audit_session_port_destroy with no locks. + */ +void +audit_session_nosenders(mach_msg_header_t *msg) +{ + mach_no_senders_notification_t *notification = (void *)msg; + ipc_port_t port = notification->not_header.msgh_remote_port; + ipc_port_t notifyport; + struct auditinfo_addr *port_aia_p = NULL; + + if (!IP_VALID(port)) + return; + ip_lock(port); + if (ip_active(port) && IKOT_AU_SESSIONPORT == ip_kotype(port)) { + port_aia_p = (struct auditinfo_addr *)port->ip_kobject; + assert(NULL != port_aia_p); + if (port->ip_mscount <= notification->not_count) + ipc_kobject_set_atomically(port, IKO_NULL, IKOT_NONE); + else { + /* re-arm the notification */ + ip_unlock(port); + notifyport = ipc_port_make_sonce(port); + ip_lock(port); + /* unlocked by ipc_port_nsrequest */ + ipc_port_nsrequest(port, port->ip_mscount, notifyport, + ¬ifyport); + return; + } + } + ip_unlock(port); + if (NULL != port_aia_p) + audit_session_portaiadestroy(port_aia_p); + ipc_port_dealloc_kernel(port); +} +#endif /* CONFIG_AUDIT */ diff --git a/libsa/mach.c b/osfmk/kern/audit_sessionport.h similarity index 71% rename from libsa/mach.c rename to osfmk/kern/audit_sessionport.h index 1507b6a5b..5a26f3451 100644 --- a/libsa/mach.c +++ b/osfmk/kern/audit_sessionport.h @@ -1,8 +1,8 @@ /* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2008 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ - * + * * This file contains Original Code and/or Modifications of Original Code * as defined in and that are subject to the Apple Public Source License * Version 2.0 (the 'License'). You may not use this file except in @@ -11,10 +11,10 @@ * unlawful or unlicensed copies of an Apple operating system, or to * circumvent, violate, or enable the circumvention or violation of, any * terms of an Apple operating system software license agreement. - * + * * Please obtain a copy of the License at * http://www.opensource.apple.com/apsl/ and read it before using this file. - * + * * The Original Code and all software distributed under the License are * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, @@ -22,26 +22,20 @@ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. * Please see the License for the specific language governing rights and * limitations under the License. - * + * * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ */ -#include -#include -#include -#include -#include +#ifdef KERNEL_PRIVATE +#ifndef _KERN_AUDIT_SESSIONPORT_H_ +#define _KERN_AUDIT_SESSIONPORT_H_ +struct auditinfo_addr; -__private_extern__ -vm_map_t mach_task_self(void) { - return kernel_map; -} +ipc_port_t audit_session_mksend(struct auditinfo_addr *aia_p, + ipc_port_t *sessionport); +struct auditinfo_addr *audit_session_porttoaia(ipc_port_t); +void audit_session_portaiadestroy(struct auditinfo_addr *); +void audit_session_nosenders(mach_msg_header_t *); -__private_extern__ -char * mach_error_string(int errnum) { - char * string = (char *)malloc(80); - if (string) { - sprintf(string, "mach error # %d", errnum); - } - return string; -} +#endif /* _KERN_AUDIT_SESSIONPORT_H_ */ +#endif /* KERNEL_PRIVATE */ diff --git a/osfmk/kern/bsd_kern.c b/osfmk/kern/bsd_kern.c index c13e40826..822c07ce9 100644 --- a/osfmk/kern/bsd_kern.c +++ b/osfmk/kern/bsd_kern.c @@ -26,6 +26,7 @@ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ */ #include +#include #include #include @@ -277,12 +278,12 @@ int is_64signalregset(void) } /* - * The old map reference is returned. + * Swap in a new map for the task/thread pair; the old map reference is + * returned. */ vm_map_t -swap_task_map(task_t task,vm_map_t map) +swap_task_map(task_t task, thread_t thread, vm_map_t map) { - thread_t thread = current_thread(); vm_map_t old_map; if (task != thread->task) @@ -293,7 +294,9 @@ swap_task_map(task_t task,vm_map_t map) thread->map = task->map = map; task_unlock(task); +#if (defined(__i386__) || defined(__x86_64__)) && NCOPY_WINDOWS > 0 inval_copy_windows(thread); +#endif return old_map; } @@ -306,6 +309,17 @@ pmap_t get_task_pmap(task_t t) return(t->map->pmap); } +/* + * + */ +uint64_t get_task_resident_size(task_t task) +{ + vm_map_t map; + + map = (task == kernel_task) ? kernel_map: task->map; + return((uint64_t)pmap_resident_count(map->pmap) * PAGE_SIZE_64); +} + /* * */ @@ -586,7 +600,7 @@ fill_taskthreadinfo(task_t task, uint64_t thaddr, struct proc_threadinfo_interna !queue_end(&task->threads, (queue_entry_t)thact); ) { #if defined(__ppc__) || defined(__arm__) if (thact->machine.cthread_self == thaddr) -#elif defined (__i386__) +#elif defined (__i386__) || defined (__x86_64__) if (thact->machine.pcb->cthread_self == thaddr) #else #error architecture not supported @@ -617,6 +631,7 @@ fill_taskthreadinfo(task_t task, uint64_t thaddr, struct proc_threadinfo_interna if ((vpp != NULL) && (thact->uthread != NULL)) bsd_threadcdir(thact->uthread, vpp, vidp); + bsd_getthreadname(thact->uthread,ptinfo->pth_name); err = 0; goto out; } @@ -645,7 +660,7 @@ fill_taskthreadlist(task_t task, void * buffer, int thcount) !queue_end(&task->threads, (queue_entry_t)thact); ) { #if defined(__ppc__) || defined(__arm__) thaddr = thact->machine.cthread_self; -#elif defined (__i386__) +#elif defined (__i386__) || defined (__x86_64__) thaddr = thact->machine.pcb->cthread_self; #else #error architecture not supported @@ -659,7 +674,7 @@ fill_taskthreadlist(task_t task, void * buffer, int thcount) out: task_unlock(task); - return(numthr * sizeof(uint64_t)); + return (int)(numthr * sizeof(uint64_t)); } diff --git a/osfmk/kern/clock.c b/osfmk/kern/clock.c index 4a762bec5..fd2e29797 100644 --- a/osfmk/kern/clock.c +++ b/osfmk/kern/clock.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2005 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2008 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -49,12 +49,19 @@ uint32_t hz_tick_interval = 1; -#if CONFIG_DTRACE -static void clock_track_calend_nowait(void); -#endif decl_simple_lock_data(static,clock_lock) +#define clock_lock() \ + simple_lock(&clock_lock) + +#define clock_unlock() \ + simple_unlock(&clock_lock) + +#define clock_lock_init() \ + simple_lock_init(&clock_lock, 0) + + /* * Time of day (calendar) variables. * @@ -68,17 +75,14 @@ static struct clock_calend { uint64_t epoch; uint64_t offset; - int64_t adjtotal; /* Nanosecond remaining total adjustment */ - uint64_t adjdeadline; /* Absolute time value for next adjustment period */ - uint32_t adjinterval; /* Absolute time interval of adjustment period */ + int32_t adjdelta; /* Nanosecond time delta for this adjustment period */ uint64_t adjstart; /* Absolute time value for start of this adjustment period */ uint32_t adjoffset; /* Absolute time offset for this adjustment period as absolute value */ - uint32_t adjactive; - timer_call_data_t adjcall; } clock_calend; -#if CONFIG_DTRACE +#if CONFIG_DTRACE + /* * Unlocked calendar flipflop; this is used to track a clock_calend such * that we can safely access a snapshot of a valid clock_calend structure @@ -93,6 +97,9 @@ static struct unlocked_clock_calend { struct clock_calend calend; /* copy of calendar */ uint32_t gen; /* generation count */ } flipflop[ 2]; + +static void clock_track_calend_nowait(void); + #endif /* @@ -102,9 +109,16 @@ static struct unlocked_clock_calend { #define calend_adjskew (40 * NSEC_PER_USEC) /* "standard" skew, ns / period */ #define calend_adjbig (NSEC_PER_SEC) /* use 10x skew above adjbig ns */ +static int64_t calend_adjtotal; /* Nanosecond remaining total adjustment */ +static uint64_t calend_adjdeadline; /* Absolute time value for next adjustment period */ +static uint32_t calend_adjinterval; /* Absolute time interval of adjustment period */ + +static timer_call_data_t calend_adjcall; +static uint32_t calend_adjactive; + static uint32_t calend_set_adjustment( - int32_t *secs, - int32_t *microsecs); + long *secs, + int *microsecs); static void calend_adjust_call(void); static uint32_t calend_adjust(void); @@ -126,7 +140,7 @@ MACRO_END #define TIME_SUB(rsecs, secs, rfrac, frac, unit) \ MACRO_BEGIN \ - if ((int32_t)((rfrac) -= (frac)) < 0) { \ + if ((int)((rfrac) -= (frac)) < 0) { \ (rfrac) += (unit); \ (rsecs) -= 1; \ } \ @@ -141,9 +155,9 @@ MACRO_END void clock_config(void) { - simple_lock_init(&clock_lock, 0); + clock_lock_init(); - timer_call_setup(&clock_calend.adjcall, (timer_call_func_t)calend_adjust_call, NULL); + timer_call_setup(&calend_adjcall, (timer_call_func_t)calend_adjust_call, NULL); thread_call_setup(&calend_wakecall, (thread_call_func_t)IOKitResetTime, NULL); clock_oldconfig(); @@ -179,10 +193,10 @@ clock_timebase_init(void) uint64_t abstime; nanoseconds_to_absolutetime(calend_adjperiod, &abstime); - clock_calend.adjinterval = abstime; + calend_adjinterval = (uint32_t)abstime; nanoseconds_to_absolutetime(NSEC_PER_SEC / 100, &abstime); - hz_tick_interval = abstime; + hz_tick_interval = (uint32_t)abstime; sched_timebase_init(); } @@ -218,14 +232,14 @@ mach_timebase_info_trap( */ void clock_get_calendar_microtime( - uint32_t *secs, - uint32_t *microsecs) + clock_sec_t *secs, + clock_usec_t *microsecs) { uint64_t now; spl_t s; s = splclock(); - simple_lock(&clock_lock); + clock_lock(); now = mach_absolute_time(); @@ -233,7 +247,7 @@ clock_get_calendar_microtime( uint32_t t32; if (now > clock_calend.adjstart) { - t32 = now - clock_calend.adjstart; + t32 = (uint32_t)(now - clock_calend.adjstart); if (t32 > clock_calend.adjoffset) now -= clock_calend.adjoffset; @@ -246,9 +260,9 @@ clock_get_calendar_microtime( absolutetime_to_microtime(now, secs, microsecs); - *secs += clock_calend.epoch; + *secs += (clock_sec_t)clock_calend.epoch; - simple_unlock(&clock_lock); + clock_unlock(); splx(s); } @@ -264,14 +278,14 @@ clock_get_calendar_microtime( */ void clock_get_calendar_nanotime( - uint32_t *secs, - uint32_t *nanosecs) + clock_sec_t *secs, + clock_nsec_t *nanosecs) { uint64_t now; spl_t s; s = splclock(); - simple_lock(&clock_lock); + clock_lock(); now = mach_absolute_time(); @@ -279,7 +293,7 @@ clock_get_calendar_nanotime( uint32_t t32; if (now > clock_calend.adjstart) { - t32 = now - clock_calend.adjstart; + t32 = (uint32_t)(now - clock_calend.adjstart); if (t32 > clock_calend.adjoffset) now -= clock_calend.adjoffset; @@ -293,9 +307,9 @@ clock_get_calendar_nanotime( absolutetime_to_microtime(now, secs, nanosecs); *nanosecs *= NSEC_PER_USEC; - *secs += clock_calend.epoch; + *secs += (clock_sec_t)clock_calend.epoch; - simple_unlock(&clock_lock); + clock_unlock(); splx(s); } @@ -312,14 +326,14 @@ clock_get_calendar_nanotime( */ void clock_gettimeofday( - uint32_t *secs, - uint32_t *microsecs) + clock_sec_t *secs, + clock_usec_t *microsecs) { uint64_t now; spl_t s; s = splclock(); - simple_lock(&clock_lock); + clock_lock(); now = mach_absolute_time(); @@ -330,7 +344,7 @@ clock_gettimeofday( uint32_t t32; if (now > clock_calend.adjstart) { - t32 = now - clock_calend.adjstart; + t32 = (uint32_t)(now - clock_calend.adjstart); if (t32 > clock_calend.adjoffset) now -= clock_calend.adjoffset; @@ -342,10 +356,10 @@ clock_gettimeofday( absolutetime_to_microtime(now, secs, microsecs); - *secs += clock_calend.epoch; + *secs += (clock_sec_t)clock_calend.epoch; } - simple_unlock(&clock_lock); + clock_unlock(); splx(s); } @@ -363,18 +377,18 @@ clock_gettimeofday( */ void clock_set_calendar_microtime( - uint32_t secs, - uint32_t microsecs) + clock_sec_t secs, + clock_usec_t microsecs) { - uint32_t sys, microsys; - uint32_t newsecs; - spl_t s; + clock_sec_t sys; + clock_usec_t microsys; + clock_sec_t newsecs; + spl_t s; - newsecs = (microsecs < 500*USEC_PER_SEC)? - secs: secs + 1; + newsecs = (microsecs < 500*USEC_PER_SEC)? secs: secs + 1; s = splclock(); - simple_lock(&clock_lock); + clock_lock(); commpage_disable_timestamp(); @@ -399,9 +413,9 @@ clock_set_calendar_microtime( /* * Cancel any adjustment in progress. */ - clock_calend.adjdelta = clock_calend.adjtotal = 0; + calend_adjtotal = clock_calend.adjdelta = 0; - simple_unlock(&clock_lock); + clock_unlock(); /* * Set the new value for the platform clock. @@ -432,16 +446,16 @@ clock_set_calendar_microtime( void clock_initialize_calendar(void) { - uint32_t sys, microsys; - uint32_t microsecs = 0, secs = PEGetGMTTimeOfDay(); - spl_t s; + clock_sec_t sys, secs = PEGetGMTTimeOfDay(); + clock_usec_t microsys, microsecs = 0; + spl_t s; s = splclock(); - simple_lock(&clock_lock); + clock_lock(); commpage_disable_timestamp(); - if ((int32_t)secs >= (int32_t)clock_boottime) { + if ((long)secs >= (long)clock_boottime) { /* * Initialize the boot time based on the platform clock. */ @@ -464,10 +478,10 @@ clock_initialize_calendar(void) /* * Cancel any adjustment in progress. */ - clock_calend.adjdelta = clock_calend.adjtotal = 0; + calend_adjtotal = clock_calend.adjdelta = 0; } - simple_unlock(&clock_lock); + clock_unlock(); splx(s); /* @@ -487,11 +501,19 @@ clock_initialize_calendar(void) */ void clock_get_boottime_nanotime( - uint32_t *secs, - uint32_t *nanosecs) + clock_sec_t *secs, + clock_nsec_t *nanosecs) { - *secs = clock_boottime; + spl_t s; + + s = splclock(); + clock_lock(); + + *secs = (clock_sec_t)clock_boottime; *nanosecs = 0; + + clock_unlock(); + splx(s); } /* @@ -504,33 +526,33 @@ clock_get_boottime_nanotime( */ void clock_adjtime( - int32_t *secs, - int32_t *microsecs) + long *secs, + int *microsecs) { uint32_t interval; spl_t s; s = splclock(); - simple_lock(&clock_lock); + clock_lock(); interval = calend_set_adjustment(secs, microsecs); if (interval != 0) { - clock_calend.adjdeadline = mach_absolute_time() + interval; - if (!timer_call_enter(&clock_calend.adjcall, clock_calend.adjdeadline)) - clock_calend.adjactive++; + calend_adjdeadline = mach_absolute_time() + interval; + if (!timer_call_enter(&calend_adjcall, calend_adjdeadline)) + calend_adjactive++; } else - if (timer_call_cancel(&clock_calend.adjcall)) - clock_calend.adjactive--; + if (timer_call_cancel(&calend_adjcall)) + calend_adjactive--; - simple_unlock(&clock_lock); + clock_unlock(); splx(s); } static uint32_t calend_set_adjustment( - int32_t *secs, - int32_t *microsecs) + long *secs, + int *microsecs) { uint64_t now, t64; int64_t total, ototal; @@ -542,7 +564,7 @@ calend_set_adjustment( now = mach_absolute_time(); - ototal = clock_calend.adjtotal; + ototal = calend_adjtotal; if (total != 0) { int32_t delta = calend_adjskew; @@ -551,35 +573,35 @@ calend_set_adjustment( if (total > calend_adjbig) delta *= 10; if (delta > total) - delta = total; + delta = (int32_t)total; nanoseconds_to_absolutetime((uint64_t)delta, &t64); - clock_calend.adjoffset = t64; + clock_calend.adjoffset = (uint32_t)t64; } else { if (total < -calend_adjbig) delta *= 10; delta = -delta; if (delta < total) - delta = total; + delta = (int32_t)total; clock_calend.adjstart = now; nanoseconds_to_absolutetime((uint64_t)-delta, &t64); - clock_calend.adjoffset = t64; + clock_calend.adjoffset = (uint32_t)t64; } - clock_calend.adjtotal = total; + calend_adjtotal = total; clock_calend.adjdelta = delta; - interval = clock_calend.adjinterval; + interval = calend_adjinterval; } else - clock_calend.adjdelta = clock_calend.adjtotal = 0; + calend_adjtotal = clock_calend.adjdelta = 0; if (ototal != 0) { - *secs = ototal / NSEC_PER_SEC; - *microsecs = (ototal % NSEC_PER_SEC) / NSEC_PER_USEC; + *secs = (long)(ototal / NSEC_PER_SEC); + *microsecs = (int)((ototal % NSEC_PER_SEC) / NSEC_PER_USEC); } else *secs = *microsecs = 0; @@ -598,20 +620,19 @@ calend_adjust_call(void) spl_t s; s = splclock(); - simple_lock(&clock_lock); + clock_lock(); - if (--clock_calend.adjactive == 0) { + if (--calend_adjactive == 0) { interval = calend_adjust(); if (interval != 0) { - clock_deadline_for_periodic_event(interval, mach_absolute_time(), - &clock_calend.adjdeadline); + clock_deadline_for_periodic_event(interval, mach_absolute_time(), &calend_adjdeadline); - if (!timer_call_enter(&clock_calend.adjcall, clock_calend.adjdeadline)) - clock_calend.adjactive++; + if (!timer_call_enter(&calend_adjcall, calend_adjdeadline)) + calend_adjactive++; } } - simple_unlock(&clock_lock); + clock_unlock(); splx(s); } @@ -631,24 +652,24 @@ calend_adjust(void) if (delta > 0) { clock_calend.offset += clock_calend.adjoffset; - clock_calend.adjtotal -= delta; - if (delta > clock_calend.adjtotal) { - clock_calend.adjdelta = delta = clock_calend.adjtotal; + calend_adjtotal -= delta; + if (delta > calend_adjtotal) { + clock_calend.adjdelta = delta = (int32_t)calend_adjtotal; nanoseconds_to_absolutetime((uint64_t)delta, &t64); - clock_calend.adjoffset = t64; + clock_calend.adjoffset = (uint32_t)t64; } } else if (delta < 0) { clock_calend.offset -= clock_calend.adjoffset; - clock_calend.adjtotal -= delta; - if (delta < clock_calend.adjtotal) { - clock_calend.adjdelta = delta = clock_calend.adjtotal; + calend_adjtotal -= delta; + if (delta < calend_adjtotal) { + clock_calend.adjdelta = delta = (int32_t)calend_adjtotal; nanoseconds_to_absolutetime((uint64_t)-delta, &t64); - clock_calend.adjoffset = t64; + clock_calend.adjoffset = (uint32_t)t64; } if (clock_calend.adjdelta != 0) @@ -656,7 +677,7 @@ calend_adjust(void) } if (clock_calend.adjdelta != 0) - interval = clock_calend.adjinterval; + interval = calend_adjinterval; #if CONFIG_DTRACE clock_track_calend_nowait(); @@ -793,7 +814,7 @@ clock_deadline_for_periodic_event( } } -#if CONFIG_DTRACE +#if CONFIG_DTRACE /* * clock_get_calendar_nanotime_nowait @@ -809,8 +830,8 @@ clock_deadline_for_periodic_event( */ void clock_get_calendar_nanotime_nowait( - uint32_t *secs, - uint32_t *nanosecs) + clock_sec_t *secs, + clock_nsec_t *nanosecs) { int i = 0; uint64_t now; @@ -845,7 +866,7 @@ clock_get_calendar_nanotime_nowait( uint32_t t32; if (now > stable.calend.adjstart) { - t32 = now - stable.calend.adjstart; + t32 = (uint32_t)(now - stable.calend.adjstart); if (t32 > stable.calend.adjoffset) now -= stable.calend.adjoffset; @@ -859,7 +880,7 @@ clock_get_calendar_nanotime_nowait( absolutetime_to_microtime(now, secs, nanosecs); *nanosecs *= NSEC_PER_USEC; - *secs += stable.calend.epoch; + *secs += (clock_sec_t)stable.calend.epoch; } static void @@ -889,4 +910,5 @@ clock_track_calend_nowait(void) (void)hw_atomic_add(&flipflop[i].gen, 1); } } -#endif /* CONFIG_DTRACE */ + +#endif /* CONFIG_DTRACE */ diff --git a/osfmk/kern/clock.h b/osfmk/kern/clock.h index 5ca49ea74..d456198db 100644 --- a/osfmk/kern/clock.h +++ b/osfmk/kern/clock.h @@ -44,6 +44,18 @@ #include +#ifdef __LP64__ + +typedef unsigned long clock_sec_t; +typedef unsigned int clock_usec_t, clock_nsec_t; + +#else /* __LP64__ */ + +typedef uint32_t clock_sec_t; +typedef uint32_t clock_usec_t, clock_nsec_t; + +#endif /* __LP64__ */ + #ifdef MACH_KERNEL_PRIVATE #include @@ -102,8 +114,8 @@ extern void clock_gettimeofday_set_commpage( uint64_t abstime, uint64_t epoch, uint64_t offset, - uint32_t *secs, - uint32_t *microsecs); + clock_sec_t *secs, + clock_usec_t *microsecs); extern void machine_delay_until( uint64_t deadline); @@ -123,12 +135,12 @@ extern uint32_t hz_tick_interval; extern void absolutetime_to_nanotime( uint64_t abstime, - uint32_t *secs, - uint32_t *nanosecs); + clock_sec_t *secs, + clock_nsec_t *nanosecs); extern void nanotime_to_absolutetime( - uint32_t secs, - uint32_t nanosecs, + clock_sec_t secs, + clock_nsec_t nanosecs, uint64_t *result); #endif /* MACH_KERNEL_PRIVATE */ @@ -138,63 +150,60 @@ __BEGIN_DECLS #ifdef XNU_KERNEL_PRIVATE extern void clock_adjtime( - int32_t *secs, - int32_t *microsecs); + long *secs, + int *microsecs); extern void clock_initialize_calendar(void); extern void clock_wakeup_calendar(void); extern void clock_gettimeofday( - uint32_t *secs, - uint32_t *microsecs); + clock_sec_t *secs, + clock_usec_t *microsecs); extern void clock_set_calendar_microtime( - uint32_t secs, - uint32_t microsecs); + clock_sec_t secs, + clock_usec_t microsecs); extern void clock_get_boottime_nanotime( - uint32_t *secs, - uint32_t *nanosecs); + clock_sec_t *secs, + clock_nsec_t *nanosecs); extern void absolutetime_to_microtime( - uint64_t abstime, - uint32_t *secs, - uint32_t *microsecs); + uint64_t abstime, + clock_sec_t *secs, + clock_usec_t *microsecs); extern void clock_deadline_for_periodic_event( uint64_t interval, uint64_t abstime, uint64_t *deadline); +#if CONFIG_DTRACE + +extern void clock_get_calendar_nanotime_nowait( + clock_sec_t *secs, + clock_nsec_t *nanosecs); + +#endif /* CONFIG_DTRACE */ + #endif /* XNU_KERNEL_PRIVATE */ extern void clock_get_calendar_microtime( - uint32_t *secs, - uint32_t *microsecs); + clock_sec_t *secs, + clock_usec_t *microsecs); extern void clock_get_calendar_nanotime( - uint32_t *secs, - uint32_t *nanosecs); - -/* - * Gah! This file is included everywhere. The other domains do not correctly - * include config_dtrace headers, so this isn't being defined. The last test - * I ran stopped with a build failure in pexpert/i386/kd.c - */ -#if CONFIG_DTRACE -extern void clock_get_calendar_nanotime_nowait( - uint32_t *secs, - uint32_t *nanosecs); -#endif /* CONFIG_DTRACE */ + clock_sec_t *secs, + clock_nsec_t *nanosecs); extern void clock_get_system_microtime( - uint32_t *secs, - uint32_t *microsecs); + clock_sec_t *secs, + clock_usec_t *microsecs); extern void clock_get_system_nanotime( - uint32_t *secs, - uint32_t *nanosecs); + clock_sec_t *secs, + clock_nsec_t *nanosecs); extern void clock_timebase_info( mach_timebase_info_t info); @@ -233,6 +242,8 @@ extern void nanoseconds_to_absolutetime( * Obsolete interfaces. */ +#ifndef __LP64__ + #define MACH_TIMESPEC_SEC_MAX (0 - 1) #define MACH_TIMESPEC_NSEC_MAX (NSEC_PER_SEC - 1) @@ -261,11 +272,24 @@ extern mach_timespec_t clock_get_system_value(void); extern mach_timespec_t clock_get_calendar_value(void); +#else /* __LP64__ */ + +#ifdef XNU_KERNEL_PRIVATE + +#define MACH_TIMESPEC_ZERO ((mach_timespec_t) { 0, 0 } ) + +#endif /* XNU_KERNEL_PRIVATE */ + +#endif /* __LP64__ */ + extern void delay_for_interval( uint32_t interval, uint32_t scale_factor); + #ifndef MACH_KERNEL_PRIVATE +#ifndef __LP64__ + #ifndef ABSOLUTETIME_SCALAR_TYPE #define clock_get_uptime(a) \ @@ -294,7 +318,9 @@ extern void delay_for_interval( #endif /* ABSOLUTETIME_SCALAR_TYPE */ -#endif /* !MACH_KERNEL_PRIVATE */ +#endif /* __LP64__ */ + +#endif /* MACH_KERNEL_PRIVATE */ #endif /* KERNEL_PRIVATE */ diff --git a/osfmk/kern/clock_oldops.c b/osfmk/kern/clock_oldops.c index b69e91938..c268382d9 100644 --- a/osfmk/kern/clock_oldops.c +++ b/osfmk/kern/clock_oldops.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2006 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2008 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -298,7 +298,12 @@ kern_return_t rtclock_gettime( mach_timespec_t *time) /* OUT */ { - clock_get_system_nanotime(&time->tv_sec, (uint32_t *)&time->tv_nsec); + clock_sec_t secs; + clock_nsec_t nsecs; + + clock_get_system_nanotime(&secs, &nsecs); + time->tv_sec = (unsigned int)secs; + time->tv_nsec = nsecs; return (KERN_SUCCESS); } @@ -307,7 +312,12 @@ kern_return_t calend_gettime( mach_timespec_t *time) /* OUT */ { - clock_get_calendar_nanotime(&time->tv_sec, (uint32_t *)&time->tv_nsec); + clock_sec_t secs; + clock_nsec_t nsecs; + + clock_get_calendar_nanotime(&secs, &nsecs); + time->tv_sec = (unsigned int)secs; + time->tv_nsec = nsecs; return (KERN_SUCCESS); } @@ -785,6 +795,8 @@ check_time( return ((result >= 0)? result: 0); } +#ifndef __LP64__ + mach_timespec_t clock_get_system_value(void) { @@ -806,3 +818,5 @@ clock_get_calendar_value(void) return value; } + +#endif /* __LP64__ */ diff --git a/osfmk/kern/debug.c b/osfmk/kern/debug.c index 599d1670e..acec72979 100644 --- a/osfmk/kern/debug.c +++ b/osfmk/kern/debug.c @@ -60,6 +60,7 @@ #include #include +#include #include #include #include @@ -79,13 +80,16 @@ #include #endif -#ifdef __i386__ +#if defined(__i386__) || defined(__x86_64__) #include #include #endif #include +#include +#include + unsigned int halt_in_debugger = 0; unsigned int switch_debugger = 0; unsigned int current_debugger = 0; @@ -132,15 +136,16 @@ struct pasc { typedef struct pasc pasc_t; +/* Prevent CPP from breaking the definition below */ +#if CONFIG_NO_PANIC_STRINGS +#undef Assert +#endif + void Assert( const char *file, int line, -#if CONFIG_NO_PANIC_STRINGS - __unused const char *expression -#else const char *expression -#endif ) { int saved_return_on_panic; @@ -193,7 +198,7 @@ debug_log_init(void) debug_buf_size = sizeof(debug_buf); } -#if __i386__ +#if defined(__i386__) || defined(__x86_64__) #define panic_stop() pmCPUHalt(PM_HALT_PANIC) #define panic_safe() pmSafeMode(x86_lcpu(), PM_SAFE_FL_SAFE) #define panic_normal() pmSafeMode(x86_lcpu(), PM_SAFE_FL_NORMAL) @@ -203,7 +208,17 @@ debug_log_init(void) #define panic_normal() #endif -#undef panic(...) +/* + * Prevent CPP from breaking the definition below, + * since all clients get a #define to prepend line numbers + */ +#undef panic + +void _consume_panic_args(int a __unused, ...) +{ + panic(NULL); +} + void panic(const char *str, ...) { @@ -212,6 +227,10 @@ panic(const char *str, ...) thread_t thread; wait_queue_t wq; + + if (kdebug_enable) + kdbg_dump_trace_to_file("/var/tmp/panic.trace"); + s = splhigh(); disable_preemption(); @@ -260,7 +279,7 @@ panic(const char *str, ...) panicwait = 1; PANIC_UNLOCK(); - kdb_printf("panic(cpu %d caller 0x%08lX): ", (unsigned) paniccpu, panic_caller); + kdb_printf("panic(cpu %d caller 0x%lx): ", (unsigned) paniccpu, panic_caller); if (str) { va_start(listp, str); _doprnt(str, &listp, consdebug_putc, 0); @@ -422,18 +441,19 @@ __private_extern__ void panic_display_system_configuration(void) { panic_display_model_name(); panic_display_uptime(); config_displayed = TRUE; + panic_display_zprint(); + kext_dump_panic_lists(&kdb_log); } } extern zone_t first_zone; extern unsigned int num_zones, stack_total; -#if defined(__i386__) +#if defined(__i386__) || defined (__x86_64__) extern unsigned int inuse_ptepages_count; #endif extern boolean_t panic_include_zprint; -extern vm_size_t kalloc_large_total; __private_extern__ void panic_display_zprint() { @@ -460,17 +480,18 @@ __private_extern__ void panic_display_zprint() } } - kdb_printf("Kernel Stacks:%lu\n",(uintptr_t)(KERNEL_STACK_SIZE * stack_total)); -#if defined(__i386__) + kdb_printf("Kernel Stacks:%lu\n",(uintptr_t)(kernel_stack_size * stack_total)); + +#if defined(__i386__) || defined (__x86_64__) kdb_printf("PageTables:%lu\n",(uintptr_t)(PAGE_SIZE * inuse_ptepages_count)); #endif + kdb_printf("Kalloc.Large:%lu\n",(uintptr_t)kalloc_large_total); } } #if !MACH_KDP static struct ether_addr kdp_current_mac_address = {{0, 0, 0, 0, 0, 0}}; -unsigned int not_in_kdp = 1; /* XXX ugly forward declares to stop warnings */ void *kdp_get_interface(void); diff --git a/osfmk/kern/debug.h b/osfmk/kern/debug.h index cdf94989f..d4ad172b9 100644 --- a/osfmk/kern/debug.h +++ b/osfmk/kern/debug.h @@ -32,6 +32,40 @@ #include #include +#ifdef __APPLE_API_PRIVATE +#ifdef __APPLE_API_UNSTABLE + +struct thread_snapshot { + uint32_t snapshot_magic; + uint32_t nkern_frames; + uint32_t nuser_frames; + int32_t pid; + uint64_t wait_event; + uint64_t continuation; + uint64_t thread_id; + int32_t state; + char ss_flags; + /* We restrict ourselves to a statically defined + * (current as of 2009) length for the + * p_comm string, due to scoping issues (osfmk/bsd and user/kernel + * binary compatibility). + */ + char p_comm[17]; +} __attribute__ ((packed)); + +enum { + kUser64_p = 0x1, + kKernel64_p = 0x2, + kHasDispatchSerial = 0x4 +}; + +enum {STACKSHOT_GET_DQ = 1}; +#define STACKSHOT_DISPATCH_OFFSET_MASK 0xffff0000 +#define STACKSHOT_DISPATCH_OFFSET_SHIFT 16 + +#endif /* __APPLE_API_UNSTABLE */ +#endif /* __APPLE_API_PRIVATE */ + #ifdef KERNEL_PRIVATE extern unsigned int systemLogDiags; @@ -99,20 +133,78 @@ void panic_display_zprint(void); #define DB_LOG_PI_SCRN 0x100 #define DB_KDP_GETC_ENA 0x200 -#define DB_KERN_DUMP_ON_PANIC 0x400 /* Trigger core dump on panic*/ -#define DB_KERN_DUMP_ON_NMI 0x800 /* Trigger core dump on NMI */ -#define DB_DBG_POST_CORE 0x1000 /*Wait in debugger after NMI core */ -#define DB_PANICLOG_DUMP 0x2000 /* Send paniclog on panic,not core*/ +#define DB_KERN_DUMP_ON_PANIC 0x400 /* Trigger core dump on panic*/ +#define DB_KERN_DUMP_ON_NMI 0x800 /* Trigger core dump on NMI */ +#define DB_DBG_POST_CORE 0x1000 /*Wait in debugger after NMI core */ +#define DB_PANICLOG_DUMP 0x2000 /* Send paniclog on panic,not core*/ +#define DB_REBOOT_POST_CORE 0x4000 /* Attempt to reboot after + * post-panic crashdump/paniclog + * dump. + */ +#if DEBUG +/* + * For the DEBUG kernel, support the following: + * sysctl -w debug.kprint_syscall= + * sysctl -w debug.kprint_syscall_process= + * should be an OR of the masks below + * for UNIX, MACH, MDEP, or IPC. This debugging aid + * assumes the task/process is locked/wired and will + * not go away during evaluation. If no process is + * specified, all processes will be traced + */ +extern int debug_kprint_syscall; +extern int debug_kprint_current_process(const char **namep); +#define DEBUG_KPRINT_SYSCALL_PREDICATE_INTERNAL(mask, namep) \ + ( (debug_kprint_syscall & (mask)) && debug_kprint_current_process(namep) ) +#define DEBUG_KPRINT_SYSCALL_MASK(mask, fmt, args...) do { \ + const char *dks_name = NULL; \ + if (DEBUG_KPRINT_SYSCALL_PREDICATE_INTERNAL(mask, &dks_name)) { \ + kprintf("[%s%s%p]" fmt, dks_name ? dks_name : "", \ + dks_name ? "@" : "", current_thread(), args); \ + } \ + } while (0) +#else /* !DEBUG */ +#define DEBUG_KPRINT_SYSCALL_PREDICATE_INTERNAL(mask, namep) (0) +#define DEBUG_KPRINT_SYSCALL_MASK(mask, fmt, args...) do { } while(0) +#endif /* !DEBUG */ + +enum { + DEBUG_KPRINT_SYSCALL_UNIX_MASK = 1 << 0, + DEBUG_KPRINT_SYSCALL_MACH_MASK = 1 << 1, + DEBUG_KPRINT_SYSCALL_MDEP_MASK = 1 << 2, + DEBUG_KPRINT_SYSCALL_IPC_MASK = 1 << 3 +}; + +#define DEBUG_KPRINT_SYSCALL_PREDICATE(mask) \ + DEBUG_KPRINT_SYSCALL_PREDICATE_INTERNAL(mask, NULL) +#define DEBUG_KPRINT_SYSCALL_UNIX(fmt, args...) \ + DEBUG_KPRINT_SYSCALL_MASK(DEBUG_KPRINT_SYSCALL_UNIX_MASK,fmt,args) +#define DEBUG_KPRINT_SYSCALL_MACH(fmt, args...) \ + DEBUG_KPRINT_SYSCALL_MASK(DEBUG_KPRINT_SYSCALL_MACH_MASK,fmt,args) +#define DEBUG_KPRINT_SYSCALL_MDEP(fmt, args...) \ + DEBUG_KPRINT_SYSCALL_MASK(DEBUG_KPRINT_SYSCALL_MDEP_MASK,fmt,args) +#define DEBUG_KPRINT_SYSCALL_IPC(fmt, args...) \ + DEBUG_KPRINT_SYSCALL_MASK(DEBUG_KPRINT_SYSCALL_IPC_MASK,fmt,args) #endif /* KERNEL_PRIVATE */ __BEGIN_DECLS extern void panic(const char *string, ...) __printflike(1,2); + +#if KERNEL_PRIVATE +void _consume_panic_args(int, ...); +#endif + #if CONFIG_NO_PANIC_STRINGS +#if KERNEL_PRIVATE +#define panic_plain(x, ...) _consume_panic_args( 0, ## __VA_ARGS__ ) +#define panic(x, ...) _consume_panic_args( 0, ## __VA_ARGS__ ) +#else #define panic_plain(...) (panic)((char *)0) #define panic(...) (panic)((char *)0) +#endif #else /* CONFIGS_NO_PANIC_STRINGS */ #define panic_plain(ex, ...) \ (panic)(ex, ## __VA_ARGS__) diff --git a/osfmk/kern/etimer.h b/osfmk/kern/etimer.h index 29eebc4a8..48ec75e52 100644 --- a/osfmk/kern/etimer.h +++ b/osfmk/kern/etimer.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2004-2007 Apple Inc. All rights reserved. + * Copyright (c) 2004-2008 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -37,12 +37,13 @@ * real-time clock. */ -#ifndef _ETIMER_H_ -#define _ETIMER_H_ +#ifdef KERNEL_PRIVATE + +#ifndef _KERN_ETIMER_H_ +#define _KERN_ETIMER_H_ #define EndOfAllTime 0xFFFFFFFFFFFFFFFFULL -/* extern void rtclock_intr(int inuser, uint64_t iaddr); - this is currently MD */ typedef void (*etimer_intr_t)(int, uint64_t); extern int setTimerReq(void); @@ -53,18 +54,6 @@ extern int setPop(uint64_t time); extern void etimer_resync_deadlines(void); -#if 0 /* this is currently still MD */ -#pragma pack(push,4) -struct rtclock_timer_t { - uint64_t deadline; - uint32_t - /*boolean_t*/ is_set:1, - has_expired:1, - :0; -}; -#pragma pack(pop) -typedef struct rtclock_timer_t rtclock_timer_t; -#endif /* MD */ - +#endif /* _KERN_ETIMER_H_ */ -#endif /* _ETIMER_H_ */ +#endif /* KERNEL_PRIVATE */ diff --git a/osfmk/kern/exception.c b/osfmk/kern/exception.c index 0122c44b1..a3578e1a0 100644 --- a/osfmk/kern/exception.c +++ b/osfmk/kern/exception.c @@ -117,7 +117,7 @@ kern_return_t exception_deliver( mach_exception_data_t code, mach_msg_type_number_t codeCnt, struct exception_action *excp, - mutex_t *mutex); + lck_mtx_t *mutex); #ifdef MACH_BSD kern_return_t bsd_exception( @@ -145,7 +145,7 @@ exception_deliver( mach_exception_data_t code, mach_msg_type_number_t codeCnt, struct exception_action *excp, - mutex_t *mutex) + lck_mtx_t *mutex) { ipc_port_t exc_port; exception_data_type_t small_code[EXCEPTION_CODE_MAX]; @@ -168,16 +168,16 @@ exception_deliver( * the port from disappearing between now and when * ipc_object_copyin_from_kernel is finally called. */ - mutex_lock(mutex); + lck_mtx_lock(mutex); exc_port = excp->port; if (!IP_VALID(exc_port)) { - mutex_unlock(mutex); + lck_mtx_unlock(mutex); return KERN_FAILURE; } ip_lock(exc_port); if (!ip_active(exc_port)) { ip_unlock(exc_port); - mutex_unlock(mutex); + lck_mtx_unlock(mutex); return KERN_FAILURE; } ip_reference(exc_port); @@ -186,14 +186,14 @@ exception_deliver( flavor = excp->flavor; behavior = excp->behavior; - mutex_unlock(mutex); + lck_mtx_unlock(mutex); code64 = (behavior & MACH_EXCEPTION_CODES); behavior &= ~MACH_EXCEPTION_CODES; if (!code64) { - small_code[0] = CAST_DOWN(exception_data_type_t, code[0]); - small_code[1] = CAST_DOWN(exception_data_type_t, code[1]); + small_code[0] = CAST_DOWN_EXPLICIT(exception_data_type_t, code[0]); + small_code[1] = CAST_DOWN_EXPLICIT(exception_data_type_t, code[1]); } @@ -323,7 +323,7 @@ exception_triage( task_t task; host_priv_t host_priv; struct exception_action *excp; - mutex_t *mutex; + lck_mtx_t *mutex; kern_return_t kr; assert(exception != EXC_RPC_ALERT); @@ -335,7 +335,7 @@ exception_triage( * Try to raise the exception at the activation level. */ thread = current_thread(); - mutex = mutex_addr(thread->mutex); + mutex = &thread->mutex; excp = &thread->exc_actions[exception]; kr = exception_deliver(thread, exception, code, codeCnt, excp, mutex); if (kr == KERN_SUCCESS || kr == MACH_RCV_PORT_DIED) @@ -345,7 +345,7 @@ exception_triage( * Maybe the task level will handle it. */ task = current_task(); - mutex = mutex_addr(task->lock); + mutex = &task->lock; excp = &task->exc_actions[exception]; kr = exception_deliver(thread, exception, code, codeCnt, excp, mutex); if (kr == KERN_SUCCESS || kr == MACH_RCV_PORT_DIED) @@ -355,7 +355,7 @@ exception_triage( * How about at the host level? */ host_priv = host_priv_self(); - mutex = mutex_addr(host_priv->lock); + mutex = &host_priv->lock; excp = &host_priv->exc_actions[exception]; kr = exception_deliver(thread, exception, code, codeCnt, excp, mutex); if (kr == KERN_SUCCESS || kr == MACH_RCV_PORT_DIED) @@ -393,7 +393,7 @@ bsd_exception( { task_t task; struct exception_action *excp; - mutex_t *mutex; + lck_mtx_t *mutex; thread_t self = current_thread(); kern_return_t kr; @@ -401,7 +401,7 @@ bsd_exception( * Maybe the task level will handle it. */ task = current_task(); - mutex = mutex_addr(task->lock); + mutex = &task->lock; excp = &task->exc_actions[exception]; kr = exception_deliver(self, exception, code, codeCnt, excp, mutex); @@ -438,6 +438,7 @@ kern_return_t abnormal_exit_notify(mach_exception_data_type_t exccode, */ kern_return_t sys_perf_notify(thread_t thread, int pid) { + host_priv_t hostp; struct exception_action *excp; ipc_port_t xport; diff --git a/osfmk/kern/hibernate.c b/osfmk/kern/hibernate.c index 27a089239..3bf72594e 100644 --- a/osfmk/kern/hibernate.c +++ b/osfmk/kern/hibernate.c @@ -42,411 +42,6 @@ #include #include -/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ - -static vm_page_t hibernate_gobble_queue; - -/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ - -static void -hibernate_page_list_zero(hibernate_page_list_t *list) -{ - uint32_t bank; - hibernate_bitmap_t * bitmap; - - bitmap = &list->bank_bitmap[0]; - for (bank = 0; bank < list->bank_count; bank++) - { - uint32_t last_bit; - - bzero((void *) &bitmap->bitmap[0], bitmap->bitmapwords << 2); - // set out-of-bound bits at end of bitmap. - last_bit = ((bitmap->last_page - bitmap->first_page + 1) & 31); - if (last_bit) - bitmap->bitmap[bitmap->bitmapwords - 1] = (0xFFFFFFFF >> last_bit); - - bitmap = (hibernate_bitmap_t *) &bitmap->bitmap[bitmap->bitmapwords]; - } -} - - -static boolean_t -consider_discard(vm_page_t m) -{ - vm_object_t object = NULL; - int refmod_state; - boolean_t discard = FALSE; - - do - { - if(m->private) - panic("consider_discard: private"); - - if (!vm_object_lock_try(m->object)) - break; - - object = m->object; - - if (m->wire_count != 0) - break; - if (m->precious) - break; - - if (m->busy || !object->alive) - /* - * Somebody is playing with this page. - */ - break; - - if (m->absent || m->unusual || m->error) - /* - * If it's unusual in anyway, ignore it - */ - break; - - if (m->cleaning) - break; - - if (m->laundry || m->list_req_pending) - break; - - if (!m->dirty) - { - refmod_state = pmap_get_refmod(m->phys_page); - - if (refmod_state & VM_MEM_REFERENCED) - m->reference = TRUE; - if (refmod_state & VM_MEM_MODIFIED) - m->dirty = TRUE; - } - - /* - * If it's clean or purgeable we can discard the page on wakeup. - * JMM - consider purgeable (volatile or empty) objects here as well. - */ - discard = (!m->dirty) - || (VM_PURGABLE_VOLATILE == object->purgable) - || (VM_PURGABLE_EMPTY == m->object->purgable); - } - while (FALSE); - - if (object) - vm_object_unlock(object); - - return (discard); -} - - -static void -discard_page(vm_page_t m) -{ - if (m->absent || m->unusual || m->error) - /* - * If it's unusual in anyway, ignore - */ - return; - - if (m->pmapped == TRUE) - { - __unused int refmod_state = pmap_disconnect(m->phys_page); - } - - if (m->laundry) - panic("discard_page(%p) laundry", m); - if (m->private) - panic("discard_page(%p) private", m); - if (m->fictitious) - panic("discard_page(%p) fictitious", m); - - if (VM_PURGABLE_VOLATILE == m->object->purgable) - { - assert(m->object->objq.next != NULL && m->object->objq.prev != NULL); /* object should be on a queue */ - purgeable_q_t old_queue=vm_purgeable_object_remove(m->object); - assert(old_queue); - /* No need to lock page queue for token delete, hibernate_vm_unlock() - makes sure these locks are uncontended before sleep */ - vm_purgeable_token_delete_first(old_queue); - m->object->purgable = VM_PURGABLE_EMPTY; - } - - if (m->tabled) - vm_page_remove(m); - - vm_page_free(m); -} - -/* - Bits zero in the bitmaps => needs to be saved. All pages default to be saved, - pages known to VM to not need saving are subtracted. - Wired pages to be saved are present in page_list_wired, pageable in page_list. -*/ - -void -hibernate_page_list_setall(hibernate_page_list_t * page_list, - hibernate_page_list_t * page_list_wired, - uint32_t * pagesOut) -{ - uint64_t start, end, nsec; - vm_page_t m; - uint32_t pages = page_list->page_count; - uint32_t count_zf = 0, count_throttled = 0; - uint32_t count_inactive = 0, count_active = 0, count_speculative = 0; - uint32_t count_wire = pages; - uint32_t count_discard_active = 0; - uint32_t count_discard_inactive = 0; - uint32_t count_discard_purgeable = 0; - uint32_t count_discard_speculative = 0; - uint32_t i; - uint32_t bank; - hibernate_bitmap_t * bitmap; - hibernate_bitmap_t * bitmap_wired; - - - HIBLOG("hibernate_page_list_setall start\n"); - - clock_get_uptime(&start); - - hibernate_page_list_zero(page_list); - hibernate_page_list_zero(page_list_wired); - - m = (vm_page_t) hibernate_gobble_queue; - while(m) - { - pages--; - count_wire--; - hibernate_page_bitset(page_list, TRUE, m->phys_page); - hibernate_page_bitset(page_list_wired, TRUE, m->phys_page); - m = (vm_page_t) m->pageq.next; - } - - for( i = 0; i < vm_colors; i++ ) - { - queue_iterate(&vm_page_queue_free[i], - m, - vm_page_t, - pageq) - { - pages--; - count_wire--; - hibernate_page_bitset(page_list, TRUE, m->phys_page); - hibernate_page_bitset(page_list_wired, TRUE, m->phys_page); - } - } - - queue_iterate(&vm_lopage_queue_free, - m, - vm_page_t, - pageq) - { - pages--; - count_wire--; - hibernate_page_bitset(page_list, TRUE, m->phys_page); - hibernate_page_bitset(page_list_wired, TRUE, m->phys_page); - } - - queue_iterate( &vm_page_queue_throttled, - m, - vm_page_t, - pageq ) - { - if ((kIOHibernateModeDiscardCleanInactive & gIOHibernateMode) - && consider_discard(m)) - { - hibernate_page_bitset(page_list, TRUE, m->phys_page); - count_discard_inactive++; - } - else - count_throttled++; - count_wire--; - hibernate_page_bitset(page_list_wired, TRUE, m->phys_page); - } - - queue_iterate( &vm_page_queue_zf, - m, - vm_page_t, - pageq ) - { - if ((kIOHibernateModeDiscardCleanInactive & gIOHibernateMode) - && consider_discard(m)) - { - hibernate_page_bitset(page_list, TRUE, m->phys_page); - if (m->dirty) - count_discard_purgeable++; - else - count_discard_inactive++; - } - else - count_zf++; - count_wire--; - hibernate_page_bitset(page_list_wired, TRUE, m->phys_page); - } - - queue_iterate( &vm_page_queue_inactive, - m, - vm_page_t, - pageq ) - { - if ((kIOHibernateModeDiscardCleanInactive & gIOHibernateMode) - && consider_discard(m)) - { - hibernate_page_bitset(page_list, TRUE, m->phys_page); - if (m->dirty) - count_discard_purgeable++; - else - count_discard_inactive++; - } - else - count_inactive++; - count_wire--; - hibernate_page_bitset(page_list_wired, TRUE, m->phys_page); - } - - for( i = 0; i <= VM_PAGE_MAX_SPECULATIVE_AGE_Q; i++ ) - { - queue_iterate(&vm_page_queue_speculative[i].age_q, - m, - vm_page_t, - pageq) - { - if ((kIOHibernateModeDiscardCleanInactive & gIOHibernateMode) - && consider_discard(m)) - { - hibernate_page_bitset(page_list, TRUE, m->phys_page); - count_discard_speculative++; - } - else - count_speculative++; - count_wire--; - hibernate_page_bitset(page_list_wired, TRUE, m->phys_page); - } - } - - queue_iterate( &vm_page_queue_active, - m, - vm_page_t, - pageq ) - { - if ((kIOHibernateModeDiscardCleanActive & gIOHibernateMode) - && consider_discard(m)) - { - hibernate_page_bitset(page_list, TRUE, m->phys_page); - if (m->dirty) - count_discard_purgeable++; - else - count_discard_active++; - } - else - count_active++; - count_wire--; - hibernate_page_bitset(page_list_wired, TRUE, m->phys_page); - } - - // pull wired from hibernate_bitmap - - bitmap = &page_list->bank_bitmap[0]; - bitmap_wired = &page_list_wired->bank_bitmap[0]; - for (bank = 0; bank < page_list->bank_count; bank++) - { - for (i = 0; i < bitmap->bitmapwords; i++) - bitmap->bitmap[i] = bitmap->bitmap[i] | ~bitmap_wired->bitmap[i]; - bitmap = (hibernate_bitmap_t *) &bitmap->bitmap [bitmap->bitmapwords]; - bitmap_wired = (hibernate_bitmap_t *) &bitmap_wired->bitmap[bitmap_wired->bitmapwords]; - } - - // machine dependent adjustments - hibernate_page_list_setall_machine(page_list, page_list_wired, &pages); - - clock_get_uptime(&end); - absolutetime_to_nanoseconds(end - start, &nsec); - HIBLOG("hibernate_page_list_setall time: %qd ms\n", nsec / 1000000ULL); - - HIBLOG("pages %d, wire %d, act %d, inact %d, spec %d, zf %d, throt %d, could discard act %d inact %d purgeable %d spec %d\n", - pages, count_wire, count_active, count_inactive, count_speculative, count_zf, count_throttled, - count_discard_active, count_discard_inactive, count_discard_purgeable, count_discard_speculative); - - *pagesOut = pages - count_discard_active - count_discard_inactive - count_discard_purgeable - count_discard_speculative; -} - -void -hibernate_page_list_discard(hibernate_page_list_t * page_list) -{ - uint64_t start, end, nsec; - vm_page_t m; - vm_page_t next; - uint32_t i; - uint32_t count_discard_active = 0; - uint32_t count_discard_inactive = 0; - uint32_t count_discard_purgeable = 0; - uint32_t count_discard_speculative = 0; - - clock_get_uptime(&start); - - m = (vm_page_t) queue_first(&vm_page_queue_zf); - while (m && !queue_end(&vm_page_queue_zf, (queue_entry_t)m)) - { - next = (vm_page_t) m->pageq.next; - if (hibernate_page_bittst(page_list, m->phys_page)) - { - if (m->dirty) - count_discard_purgeable++; - else - count_discard_inactive++; - discard_page(m); - } - m = next; - } - - for( i = 0; i <= VM_PAGE_MAX_SPECULATIVE_AGE_Q; i++ ) - { - m = (vm_page_t) queue_first(&vm_page_queue_speculative[i].age_q); - while (m && !queue_end(&vm_page_queue_speculative[i].age_q, (queue_entry_t)m)) - { - next = (vm_page_t) m->pageq.next; - if (hibernate_page_bittst(page_list, m->phys_page)) - { - count_discard_speculative++; - discard_page(m); - } - m = next; - } - } - - m = (vm_page_t) queue_first(&vm_page_queue_inactive); - while (m && !queue_end(&vm_page_queue_inactive, (queue_entry_t)m)) - { - next = (vm_page_t) m->pageq.next; - if (hibernate_page_bittst(page_list, m->phys_page)) - { - if (m->dirty) - count_discard_purgeable++; - else - count_discard_inactive++; - discard_page(m); - } - m = next; - } - - m = (vm_page_t) queue_first(&vm_page_queue_active); - while (m && !queue_end(&vm_page_queue_active, (queue_entry_t)m)) - { - next = (vm_page_t) m->pageq.next; - if (hibernate_page_bittst(page_list, m->phys_page)) - { - if (m->dirty) - count_discard_purgeable++; - else - count_discard_active++; - discard_page(m); - } - m = next; - } - - clock_get_uptime(&end); - absolutetime_to_nanoseconds(end - start, &nsec); - HIBLOG("hibernate_page_list_discard time: %qd ms, discarded act %d inact %d purgeable %d spec %d\n", - nsec / 1000000ULL, - count_discard_active, count_discard_inactive, count_discard_purgeable, count_discard_speculative); -} /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ @@ -460,8 +55,7 @@ hibernate_setup(IOHibernateImageHeader * header, { hibernate_page_list_t * page_list = NULL; hibernate_page_list_t * page_list_wired = NULL; - vm_page_t m; - uint32_t i, gobble_count; + uint32_t gobble_count; *page_list_ret = NULL; *page_list_wired_ret = NULL; @@ -480,7 +74,7 @@ hibernate_setup(IOHibernateImageHeader * header, *encryptedswap = dp_encryption; // pages we could force out to reduce hibernate image size - gobble_count = (((uint64_t) page_list->page_count) * ((uint64_t) free_page_ratio)) / 100; + gobble_count = (uint32_t)((((uint64_t) page_list->page_count) * ((uint64_t) free_page_ratio)) / 100); // no failures hereafter @@ -490,33 +84,7 @@ hibernate_setup(IOHibernateImageHeader * header, header->processorFlags, gobble_count); if (gobble_count) - { - uint64_t start, end, timeout, nsec; - clock_interval_to_deadline(free_page_time, 1000 * 1000 /*ms*/, &timeout); - clock_get_uptime(&start); - - for (i = 0; i < gobble_count; i++) - { - while (VM_PAGE_NULL == (m = vm_page_grab())) - { - clock_get_uptime(&end); - if (end >= timeout) - break; - VM_PAGE_WAIT(); - } - if (!m) - break; - m->busy = FALSE; - vm_page_gobble(m); - - m->pageq.next = (queue_entry_t) hibernate_gobble_queue; - hibernate_gobble_queue = m; - } - - clock_get_uptime(&end); - absolutetime_to_nanoseconds(end - start, &nsec); - HIBLOG("Gobbled %d pages, time: %qd ms\n", i, nsec / 1000000ULL); - } + hibernate_gobble_pages(gobble_count, free_page_time); *page_list_ret = page_list; *page_list_wired_ret = page_list_wired; @@ -528,21 +96,7 @@ kern_return_t hibernate_teardown(hibernate_page_list_t * page_list, hibernate_page_list_t * page_list_wired) { - vm_page_t m, next; - uint32_t count = 0; - - m = (vm_page_t) hibernate_gobble_queue; - while(m) - { - next = (vm_page_t) m->pageq.next; - vm_page_free(m); - count++; - m = next; - } - hibernate_gobble_queue = VM_PAGE_NULL; - - if (count) - HIBLOG("Freed %d pages\n", count); + hibernate_free_gobble_pages(); if (page_list) kfree(page_list, page_list->list_size); diff --git a/osfmk/kern/host.c b/osfmk/kern/host.c index 0f8de2841..7a3fbf595 100644 --- a/osfmk/kern/host.c +++ b/osfmk/kern/host.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2008 Apple Inc. All rights reserved. + * Copyright (c) 2000-2009 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -153,7 +153,7 @@ host_info( case HOST_BASIC_INFO: { register host_basic_info_t basic_info; - register int master_num; + register int master_id; /* * Basic information about this host. @@ -166,12 +166,12 @@ host_info( basic_info->memory_size = machine_info.memory_size; basic_info->max_cpus = machine_info.max_cpus; basic_info->avail_cpus = processor_avail_count; - master_num = master_processor->cpu_num; - basic_info->cpu_type = slot_type(master_num); - basic_info->cpu_subtype = slot_subtype(master_num); + master_id = master_processor->cpu_id; + basic_info->cpu_type = slot_type(master_id); + basic_info->cpu_subtype = slot_subtype(master_id); if (*count >= HOST_BASIC_INFO_COUNT) { - basic_info->cpu_threadtype = slot_threadtype(master_num); + basic_info->cpu_threadtype = slot_threadtype(master_id); basic_info->physical_cpu = machine_info.physical_cpu; basic_info->physical_cpu_max = machine_info.physical_cpu_max; basic_info->logical_cpu = machine_info.logical_cpu; @@ -263,6 +263,7 @@ host_statistics( host_info_t info, mach_msg_type_number_t *count) { + uint32_t i; if (host == HOST_NULL) return (KERN_INVALID_HOST); @@ -290,8 +291,9 @@ host_statistics( case HOST_VM_INFO: { register processor_t processor; - register vm_statistics_t stat; - vm_statistics_data_t host_vm_stat; + register vm_statistics64_t stat; + vm_statistics64_data_t host_vm_stat; + vm_statistics_t stat32; mach_msg_type_number_t original_count; if (*count < HOST_VM_INFO_REV0_COUNT) @@ -320,20 +322,34 @@ host_statistics( simple_unlock(&processor_list_lock); } - stat = (vm_statistics_t) info; - - stat->free_count = vm_page_free_count + vm_page_speculative_count; - stat->active_count = vm_page_active_count; - stat->inactive_count = vm_page_inactive_count; - stat->wire_count = vm_page_wire_count; - stat->zero_fill_count = host_vm_stat.zero_fill_count; - stat->reactivations = host_vm_stat.reactivations; - stat->pageins = host_vm_stat.pageins; - stat->pageouts = host_vm_stat.pageouts; - stat->faults = host_vm_stat.faults; - stat->cow_faults = host_vm_stat.cow_faults; - stat->lookups = host_vm_stat.lookups; - stat->hits = host_vm_stat.hits; + stat32 = (vm_statistics_t) info; + + stat32->free_count = VM_STATISTICS_TRUNCATE_TO_32_BIT(vm_page_free_count + vm_page_speculative_count); + stat32->active_count = VM_STATISTICS_TRUNCATE_TO_32_BIT(vm_page_active_count); + + if (vm_page_local_q) { + for (i = 0; i < vm_page_local_q_count; i++) { + struct vpl *lq; + + lq = &vm_page_local_q[i].vpl_un.vpl; + + stat32->active_count += VM_STATISTICS_TRUNCATE_TO_32_BIT(lq->vpl_count); + } + } + stat32->inactive_count = VM_STATISTICS_TRUNCATE_TO_32_BIT(vm_page_inactive_count); +#if CONFIG_EMBEDDED + stat32->wire_count = VM_STATISTICS_TRUNCATE_TO_32_BIT(vm_page_wire_count); +#else + stat32->wire_count = VM_STATISTICS_TRUNCATE_TO_32_BIT(vm_page_wire_count + vm_page_throttled_count); +#endif + stat32->zero_fill_count = VM_STATISTICS_TRUNCATE_TO_32_BIT(host_vm_stat.zero_fill_count); + stat32->reactivations = VM_STATISTICS_TRUNCATE_TO_32_BIT(host_vm_stat.reactivations); + stat32->pageins = VM_STATISTICS_TRUNCATE_TO_32_BIT(host_vm_stat.pageins); + stat32->pageouts = VM_STATISTICS_TRUNCATE_TO_32_BIT(host_vm_stat.pageouts); + stat32->faults = VM_STATISTICS_TRUNCATE_TO_32_BIT(host_vm_stat.faults); + stat32->cow_faults = VM_STATISTICS_TRUNCATE_TO_32_BIT(host_vm_stat.cow_faults); + stat32->lookups = VM_STATISTICS_TRUNCATE_TO_32_BIT(host_vm_stat.lookups); + stat32->hits = VM_STATISTICS_TRUNCATE_TO_32_BIT(host_vm_stat.hits); /* * Fill in extra info added in later revisions of the @@ -344,16 +360,19 @@ host_statistics( *count = HOST_VM_INFO_REV0_COUNT; /* rev0 already filled in */ if (original_count >= HOST_VM_INFO_REV1_COUNT) { /* rev1 added "purgeable" info */ - stat->purgeable_count = vm_page_purgeable_count; - stat->purges = vm_page_purged_count; + stat32->purgeable_count = VM_STATISTICS_TRUNCATE_TO_32_BIT(vm_page_purgeable_count); + stat32->purges = VM_STATISTICS_TRUNCATE_TO_32_BIT(vm_page_purged_count); *count = HOST_VM_INFO_REV1_COUNT; } + if (original_count >= HOST_VM_INFO_REV2_COUNT) { /* rev2 added "speculative" info */ - stat->speculative_count = vm_page_speculative_count; + stat32->speculative_count = VM_STATISTICS_TRUNCATE_TO_32_BIT(vm_page_speculative_count); *count = HOST_VM_INFO_REV2_COUNT; } + /* rev3 changed some of the fields to be 64-bit*/ + return (KERN_SUCCESS); } @@ -365,10 +384,11 @@ host_statistics( if (*count < HOST_CPU_LOAD_INFO_COUNT) return (KERN_FAILURE); -#define GET_TICKS_VALUE(processor, state, timer) \ -MACRO_BEGIN \ - cpu_load_info->cpu_ticks[(state)] += \ - timer_grab(&PROCESSOR_DATA(processor, timer)) / hz_tick_interval; \ +#define GET_TICKS_VALUE(processor, state, timer) \ +MACRO_BEGIN \ + cpu_load_info->cpu_ticks[(state)] += \ + (uint32_t)(timer_grab(&PROCESSOR_DATA(processor, timer)) \ + / hz_tick_interval); \ MACRO_END cpu_load_info = (host_cpu_load_info_t)info; @@ -404,6 +424,100 @@ MACRO_END } } + +kern_return_t +host_statistics64( + host_t host, + host_flavor_t flavor, + host_info64_t info, + mach_msg_type_number_t *count) +{ + uint32_t i; + + if (host == HOST_NULL) + return (KERN_INVALID_HOST); + + switch(flavor) { + + case HOST_VM_INFO64: /* We were asked to get vm_statistics64 */ + { + register processor_t processor; + register vm_statistics64_t stat; + vm_statistics64_data_t host_vm_stat; + + if (*count < HOST_VM_INFO64_COUNT) + return (KERN_FAILURE); + + processor = processor_list; + stat = &PROCESSOR_DATA(processor, vm_stat); + host_vm_stat = *stat; + + if (processor_count > 1) { + simple_lock(&processor_list_lock); + + while ((processor = processor->processor_list) != NULL) { + stat = &PROCESSOR_DATA(processor, vm_stat); + + host_vm_stat.zero_fill_count += stat->zero_fill_count; + host_vm_stat.reactivations += stat->reactivations; + host_vm_stat.pageins += stat->pageins; + host_vm_stat.pageouts += stat->pageouts; + host_vm_stat.faults += stat->faults; + host_vm_stat.cow_faults += stat->cow_faults; + host_vm_stat.lookups += stat->lookups; + host_vm_stat.hits += stat->hits; + } + + simple_unlock(&processor_list_lock); + } + + stat = (vm_statistics64_t) info; + + stat->free_count = vm_page_free_count + vm_page_speculative_count; + stat->active_count = vm_page_active_count; + + if (vm_page_local_q) { + for (i = 0; i < vm_page_local_q_count; i++) { + struct vpl *lq; + + lq = &vm_page_local_q[i].vpl_un.vpl; + + stat->active_count += lq->vpl_count; + } + } + stat->inactive_count = vm_page_inactive_count; +#if CONFIG_EMBEDDED + stat->wire_count = vm_page_wire_count; +#else + stat->wire_count = vm_page_wire_count + vm_page_throttled_count; +#endif + stat->zero_fill_count = host_vm_stat.zero_fill_count; + stat->reactivations = host_vm_stat.reactivations; + stat->pageins = host_vm_stat.pageins; + stat->pageouts = host_vm_stat.pageouts; + stat->faults = host_vm_stat.faults; + stat->cow_faults = host_vm_stat.cow_faults; + stat->lookups = host_vm_stat.lookups; + stat->hits = host_vm_stat.hits; + + /* rev1 added "purgable" info */ + stat->purgeable_count = vm_page_purgeable_count; + stat->purges = vm_page_purged_count; + + /* rev2 added "speculative" info */ + stat->speculative_count = vm_page_speculative_count; + + *count = HOST_VM_INFO64_COUNT; + + return(KERN_SUCCESS); + } + + default: /* If we didn't recognize the flavor, send to host_statistics */ + return(host_statistics(host, flavor, (host_info_t) info, count)); + } +} + + /* * Get host statistics that require privilege. * None for now, just call the un-privileged version. @@ -656,7 +770,7 @@ host_get_special_port( ipc_port_t port; if (host_priv == HOST_PRIV_NULL || - id == HOST_SECURITY_PORT || id > HOST_MAX_SPECIAL_PORT ) + id == HOST_SECURITY_PORT || id > HOST_MAX_SPECIAL_PORT || id < 0) return KERN_INVALID_ARGUMENT; host_lock(host_priv); diff --git a/osfmk/kern/host.h b/osfmk/kern/host.h index 8381a868b..3c64c3b08 100644 --- a/osfmk/kern/host.h +++ b/osfmk/kern/host.h @@ -69,7 +69,7 @@ #ifdef MACH_KERNEL_PRIVATE -#include +#include #include #include #include @@ -77,7 +77,7 @@ struct host { - decl_mutex_data(,lock) /* lock to protect exceptions */ + decl_lck_mtx_data(,lock) /* lock to protect exceptions */ ipc_port_t special[HOST_MAX_SPECIAL_PORT + 1]; struct exception_action exc_actions[EXC_TYPES_COUNT]; }; @@ -86,8 +86,8 @@ typedef struct host host_data_t; extern host_data_t realhost; -#define host_lock(host) mutex_lock(&(host)->lock) -#define host_unlock(host) mutex_unlock(&(host)->lock) +#define host_lock(host) lck_mtx_lock(&(host)->lock) +#define host_unlock(host) lck_mtx_unlock(&(host)->lock) #endif /* MACH_KERNEL_PRIVATE */ diff --git a/osfmk/kern/host_notify.c b/osfmk/kern/host_notify.c index 064aab94e..769d1cc2e 100644 --- a/osfmk/kern/host_notify.c +++ b/osfmk/kern/host_notify.c @@ -45,8 +45,13 @@ #include "mach/host_notify_reply.h" +decl_lck_mtx_data(,host_notify_lock) + +lck_mtx_ext_t host_notify_lock_ext; +lck_grp_t host_notify_lock_grp; +lck_attr_t host_notify_lock_attr; +static lck_grp_attr_t host_notify_lock_grp_attr; static zone_t host_notify_zone; -decl_mutex_data(static,host_notify_lock) static queue_head_t host_notify_queue[HOST_NOTIFY_TYPE_MAX+1]; @@ -68,7 +73,11 @@ host_notify_init(void) for (i = 0; i <= HOST_NOTIFY_TYPE_MAX; i++) queue_init(&host_notify_queue[i]); - mutex_init(&host_notify_lock, 0); + lck_grp_attr_setdefault(&host_notify_lock_grp_attr); + lck_grp_init(&host_notify_lock_grp, "host_notify", &host_notify_lock_grp_attr); + lck_attr_setdefault(&host_notify_lock_attr); + + lck_mtx_init_ext(&host_notify_lock, &host_notify_lock_ext, &host_notify_lock_grp, &host_notify_lock_attr); i = sizeof (struct host_notify_entry); host_notify_zone = @@ -96,13 +105,13 @@ host_request_notification( if (entry == NULL) return (KERN_RESOURCE_SHORTAGE); - mutex_lock(&host_notify_lock); + lck_mtx_lock(&host_notify_lock); ip_lock(port); if (!ip_active(port) || ip_kotype(port) != IKOT_NONE) { ip_unlock(port); - mutex_unlock(&host_notify_lock); + lck_mtx_unlock(&host_notify_lock); zfree(host_notify_zone, entry); return (KERN_FAILURE); @@ -113,7 +122,7 @@ host_request_notification( ip_unlock(port); enqueue_tail(&host_notify_queue[notify_type], (queue_entry_t)entry); - mutex_unlock(&host_notify_lock); + lck_mtx_unlock(&host_notify_lock); return (KERN_SUCCESS); } @@ -124,7 +133,7 @@ host_notify_port_destroy( { host_notify_t entry; - mutex_lock(&host_notify_lock); + lck_mtx_lock(&host_notify_lock); ip_lock(port); if (ip_kotype(port) == IKOT_HOST_NOTIFY) { @@ -135,7 +144,7 @@ host_notify_port_destroy( assert(entry->port == port); remqueue(NULL, (queue_entry_t)entry); - mutex_unlock(&host_notify_lock); + lck_mtx_unlock(&host_notify_lock); zfree(host_notify_zone, entry); ipc_port_release_sonce(port); @@ -143,7 +152,7 @@ host_notify_port_destroy( } ip_unlock(port); - mutex_unlock(&host_notify_lock); + lck_mtx_unlock(&host_notify_lock); } static void @@ -154,7 +163,7 @@ host_notify_all( { queue_t notify_queue = &host_notify_queue[notify_type]; - mutex_lock(&host_notify_lock); + lck_mtx_lock(&host_notify_lock); if (!queue_empty(notify_queue)) { queue_head_t send_queue; @@ -183,18 +192,18 @@ host_notify_all( ipc_kobject_set_atomically(port, IKO_NULL, IKOT_NONE); ip_unlock(port); - mutex_unlock(&host_notify_lock); + lck_mtx_unlock(&host_notify_lock); zfree(host_notify_zone, entry); msg->msgh_remote_port = port; - (void) mach_msg_send_from_kernel(msg, msg_size); + (void) mach_msg_send_from_kernel_proper(msg, msg_size); - mutex_lock(&host_notify_lock); + lck_mtx_lock(&host_notify_lock); } } - mutex_unlock(&host_notify_lock); + lck_mtx_unlock(&host_notify_lock); } void diff --git a/osfmk/kern/host_statistics.h b/osfmk/kern/host_statistics.h index 582105edc..a1471c6f4 100644 --- a/osfmk/kern/host_statistics.h +++ b/osfmk/kern/host_statistics.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2009 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -43,9 +43,16 @@ #include +#if defined(__ppc__) /* On ppc, vm statistics are still 32-bit */ #define VM_STAT_INCR(event) \ MACRO_BEGIN \ - OSAddAtomic(1, (SInt32 *)(&(PROCESSOR_DATA(current_processor(), vm_stat).event))); \ + OSAddAtomic(1, (SInt32 *) (&(PROCESSOR_DATA(current_processor(), vm_stat).event))); \ MACRO_END +#else /* !(defined(__ppc__)) */ +#define VM_STAT_INCR(event) \ +MACRO_BEGIN \ + OSAddAtomic64(1, (SInt64 *) (&(PROCESSOR_DATA(current_processor(), vm_stat).event))); \ +MACRO_END +#endif /* !(defined(__ppc__)) */ #endif /* _KERN_HOST_STATISTICS_H_ */ diff --git a/osfmk/kern/ipc_host.c b/osfmk/kern/ipc_host.c index 69d620e8c..65f1035fe 100644 --- a/osfmk/kern/ipc_host.c +++ b/osfmk/kern/ipc_host.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2007 Apple Inc. All rights reserved. + * Copyright (c) 2000-2009 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -81,14 +81,6 @@ * Forward declarations */ -void -ipc_processor_terminate( - processor_t processor); - -void -ipc_processor_disable( - processor_t processor); - boolean_t ref_pset_port_locked( ipc_port_t port, boolean_t matchn, processor_set_t *ppset); @@ -97,12 +89,15 @@ ref_pset_port_locked( * ipc_host_init: set up various things. */ +extern lck_grp_t host_notify_lock_grp; +extern lck_attr_t host_notify_lock_attr; + void ipc_host_init(void) { ipc_port_t port; int i; - mutex_init(&realhost.lock, 0); + lck_mtx_init(&realhost.lock, &host_notify_lock_grp, &host_notify_lock_attr); /* * Allocate and set up the two host ports. @@ -205,51 +200,6 @@ ipc_processor_enable( myport = processor->processor_self; ipc_kobject_set(myport, (ipc_kobject_t) processor, IKOT_PROCESSOR); } - -/* - * ipc_processor_disable: - * - * Disable ipc control of processor by clearing port object. - */ -void -ipc_processor_disable( - processor_t processor) -{ - ipc_port_t myport; - - myport = processor->processor_self; - if (myport == IP_NULL) - return; - ipc_kobject_set(myport, IKO_NULL, IKOT_NONE); -} - -/* - * ipc_processor_terminate: - * - * Processor is off-line. Destroy ipc control port. - */ -void -ipc_processor_terminate( - processor_t processor) -{ - ipc_port_t myport; - spl_t s; - - s = splsched(); - processor_lock(processor); - myport = processor->processor_self; - if (myport == IP_NULL) { - processor_unlock(processor); - splx(s); - return; - } - - processor->processor_self = IP_NULL; - processor_unlock(processor); - splx(s); - - ipc_port_dealloc_kernel(myport); -} /* * ipc_pset_init: @@ -480,6 +430,7 @@ convert_host_to_port( * Purpose: * Convert from a processor to a port. * Produces a naked send right which may be invalid. + * Processors are not reference counted, so nothing to release. * Conditions: * Nothing locked. */ @@ -488,20 +439,10 @@ ipc_port_t convert_processor_to_port( processor_t processor) { - ipc_port_t port; - spl_t s; - - s = splsched(); - processor_lock(processor); - - if (processor->processor_self != IP_NULL) - port = ipc_port_make_send(processor->processor_self); - else - port = IP_NULL; - - processor_unlock(processor); - splx(s); + ipc_port_t port = processor->processor_self; + if (port != IP_NULL) + port = ipc_port_make_send(port); return port; } @@ -509,8 +450,8 @@ convert_processor_to_port( * Routine: convert_pset_to_port * Purpose: * Convert from a pset to a port. - * Produces a naked send right - * which may be invalid. + * Produces a naked send right which may be invalid. + * Processor sets are not reference counted, so nothing to release. * Conditions: * Nothing locked. */ @@ -531,8 +472,8 @@ convert_pset_to_port( * Routine: convert_pset_name_to_port * Purpose: * Convert from a pset to a port. - * Produces a naked send right - * which may be invalid. + * Produces a naked send right which may be invalid. + * Processor sets are not reference counted, so nothing to release. * Conditions: * Nothing locked. */ @@ -608,7 +549,7 @@ host_set_exception_ports( assert(host_priv == &realhost); - if (exception_mask & ~EXC_MASK_ALL) { + if (exception_mask & ~EXC_MASK_VALID) { return KERN_INVALID_ARGUMENT; } @@ -685,7 +626,7 @@ host_get_exception_ports( if (host_priv == HOST_PRIV_NULL) return KERN_INVALID_ARGUMENT; - if (exception_mask & ~EXC_MASK_ALL) { + if (exception_mask & ~EXC_MASK_VALID) { return KERN_INVALID_ARGUMENT; } @@ -750,7 +691,7 @@ host_swap_exception_ports( if (host_priv == HOST_PRIV_NULL) return KERN_INVALID_ARGUMENT; - if (exception_mask & ~EXC_MASK_ALL) { + if (exception_mask & ~EXC_MASK_VALID) { return KERN_INVALID_ARGUMENT; } diff --git a/osfmk/kern/ipc_kobject.c b/osfmk/kern/ipc_kobject.c index 646788daa..59af4594b 100644 --- a/osfmk/kern/ipc_kobject.c +++ b/osfmk/kern/ipc_kobject.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2007 Apple Inc. All rights reserved. + * Copyright (c) 2000-2008 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -84,6 +84,7 @@ #include #include #include +#include #include #include @@ -101,9 +102,10 @@ #include #include #include -#include #include -#include +#if VM32_SUPPORT +#include +#endif #include #include @@ -111,6 +113,10 @@ #include +#if CONFIG_AUDIT +#include +#endif + #if MACH_MACHINE_ROUTINES #include #endif /* MACH_MACHINE_ROUTINES */ @@ -183,10 +189,11 @@ const struct mig_subsystem *mig_e[] = { (const struct mig_subsystem *)&memory_object_name_subsystem, (const struct mig_subsystem *)&lock_set_subsystem, (const struct mig_subsystem *)&ledger_subsystem, - (const struct mig_subsystem *)&semaphore_subsystem, (const struct mig_subsystem *)&task_subsystem, (const struct mig_subsystem *)&thread_act_subsystem, - (const struct mig_subsystem *)&vm_map_subsystem, +#if VM32_SUPPORT + (const struct mig_subsystem *)&vm32_map_subsystem, +#endif (const struct mig_subsystem *)&UNDReply_subsystem, (const struct mig_subsystem *)&default_pager_object_subsystem, @@ -546,7 +553,13 @@ ipc_kobject_notify( reply_header->msgh_remote_port = MACH_PORT_NULL; return TRUE; } - +#if CONFIG_AUDIT + if (ip_kotype(port) == IKOT_AU_SESSIONPORT) { + audit_session_nosenders(request_header); + return TRUE; + } +#endif + break; case MACH_NOTIFY_PORT_DELETED: diff --git a/osfmk/kern/ipc_kobject.h b/osfmk/kern/ipc_kobject.h index b2e7d1340..695b55e52 100644 --- a/osfmk/kern/ipc_kobject.h +++ b/osfmk/kern/ipc_kobject.h @@ -120,12 +120,13 @@ typedef natural_t ipc_kobject_type_t; #define IKOT_IOKIT_OBJECT 30 #define IKOT_UPL 31 #define IKOT_MEM_OBJ_CONTROL 32 -#define IKOT_LABELH 33 +#define IKOT_AU_SESSIONPORT 33 +#define IKOT_LABELH 34 /* * Add new entries here and adjust IKOT_UNKNOWN. * Please keep ipc/ipc_object.c:ikot_print_array up to date. */ -#define IKOT_UNKNOWN 34 /* magic catchall */ +#define IKOT_UNKNOWN 35 /* magic catchall */ #define IKOT_MAX_TYPE (IKOT_UNKNOWN+1) /* # of IKOT_ types */ diff --git a/osfmk/kern/ipc_mig.c b/osfmk/kern/ipc_mig.c index 0dbe02f08..b437edd9b 100644 --- a/osfmk/kern/ipc_mig.c +++ b/osfmk/kern/ipc_mig.c @@ -81,6 +81,8 @@ #include #include +#include + /* * Routine: mach_msg_send_from_kernel * Purpose: @@ -98,6 +100,13 @@ * or destination is above kernel limit */ +#if IKM_SUPPORT_LEGACY + +#undef mach_msg_send_from_kernel +mach_msg_return_t mach_msg_send_from_kernel( + mach_msg_header_t *msg, + mach_msg_size_t send_size); + mach_msg_return_t mach_msg_send_from_kernel( mach_msg_header_t *msg, @@ -106,7 +115,34 @@ mach_msg_send_from_kernel( ipc_kmsg_t kmsg; mach_msg_return_t mr; - if (!MACH_PORT_VALID((mach_port_name_t)msg->msgh_remote_port)) + if (!MACH_PORT_VALID(CAST_MACH_PORT_TO_NAME(msg->msgh_remote_port))) + return MACH_SEND_INVALID_DEST; + + mr = ipc_kmsg_get_from_kernel(msg, send_size, &kmsg); + if (mr != MACH_MSG_SUCCESS) + return mr; + + ipc_kmsg_copyin_from_kernel_legacy(kmsg); + + mr = ipc_kmsg_send_always(kmsg); + if (mr != MACH_MSG_SUCCESS) { + ipc_kmsg_destroy(kmsg); + } + + return mr; +} + +#endif /* IKM_SUPPORT_LEGACY */ + +mach_msg_return_t +mach_msg_send_from_kernel_proper( + mach_msg_header_t *msg, + mach_msg_size_t send_size) +{ + ipc_kmsg_t kmsg; + mach_msg_return_t mr; + + if (!MACH_PORT_VALID(CAST_MACH_PORT_TO_NAME(msg->msgh_remote_port))) return MACH_SEND_INVALID_DEST; mr = ipc_kmsg_get_from_kernel(msg, send_size, &kmsg); @@ -123,6 +159,8 @@ mach_msg_send_from_kernel( return mr; } +#if IKM_SUPPORT_LEGACY + mach_msg_return_t mach_msg_send_from_kernel_with_options( mach_msg_header_t *msg, @@ -133,14 +171,14 @@ mach_msg_send_from_kernel_with_options( ipc_kmsg_t kmsg; mach_msg_return_t mr; - if (!MACH_PORT_VALID((mach_port_name_t)msg->msgh_remote_port)) + if (!MACH_PORT_VALID(CAST_MACH_PORT_TO_NAME(msg->msgh_remote_port))) return MACH_SEND_INVALID_DEST; mr = ipc_kmsg_get_from_kernel(msg, send_size, &kmsg); if (mr != MACH_MSG_SUCCESS) return mr; - ipc_kmsg_copyin_from_kernel(kmsg); + ipc_kmsg_copyin_from_kernel_legacy(kmsg); mr = ipc_kmsg_send(kmsg, option, timeout_val); if (mr != MACH_MSG_SUCCESS) { ipc_kmsg_destroy(kmsg); @@ -149,6 +187,8 @@ mach_msg_send_from_kernel_with_options( return mr; } +#endif /* IKM_SUPPORT_LEGACY */ + /* * Routine: mach_msg_rpc_from_kernel * Purpose: @@ -164,11 +204,47 @@ mach_msg_send_from_kernel_with_options( * MACH_RCV_PORT_DIED The reply port was deallocated. */ +mach_msg_return_t mach_msg_rpc_from_kernel_body(mach_msg_header_t *msg, + mach_msg_size_t send_size, mach_msg_size_t rcv_size, boolean_t legacy); + +#if IKM_SUPPORT_LEGACY + +#undef mach_msg_rpc_from_kernel mach_msg_return_t mach_msg_rpc_from_kernel( + mach_msg_header_t *msg, + mach_msg_size_t send_size, + mach_msg_size_t rcv_size); + +mach_msg_return_t +mach_msg_rpc_from_kernel( + mach_msg_header_t *msg, + mach_msg_size_t send_size, + mach_msg_size_t rcv_size) +{ + return mach_msg_rpc_from_kernel_body(msg, send_size, rcv_size, TRUE); +} + +#endif /* IKM_SUPPORT_LEGACY */ + +mach_msg_return_t +mach_msg_rpc_from_kernel_proper( mach_msg_header_t *msg, mach_msg_size_t send_size, mach_msg_size_t rcv_size) +{ + return mach_msg_rpc_from_kernel_body(msg, send_size, rcv_size, FALSE); +} + +mach_msg_return_t +mach_msg_rpc_from_kernel_body( + mach_msg_header_t *msg, + mach_msg_size_t send_size, + mach_msg_size_t rcv_size, +#if !IKM_SUPPORT_LEGACY + __unused +#endif + boolean_t legacy) { thread_t self = current_thread(); ipc_port_t reply; @@ -176,7 +252,7 @@ mach_msg_rpc_from_kernel( mach_port_seqno_t seqno; mach_msg_return_t mr; - assert(MACH_PORT_VALID((mach_port_name_t)msg->msgh_remote_port)); + assert(MACH_PORT_VALID(CAST_MACH_PORT_TO_NAME(msg->msgh_remote_port))); assert(msg->msgh_local_port == MACH_PORT_NULL); mr = ipc_kmsg_get_from_kernel(msg, send_size, &kmsg); @@ -199,7 +275,14 @@ mach_msg_rpc_from_kernel( ipc_port_reference(reply); - ipc_kmsg_copyin_from_kernel(kmsg); +#if IKM_SUPPORT_LEGACY + if(legacy) + ipc_kmsg_copyin_from_kernel_legacy(kmsg); + else + ipc_kmsg_copyin_from_kernel(kmsg); +#else + ipc_kmsg_copyin_from_kernel(kmsg); +#endif mr = ipc_kmsg_send_always(kmsg); if (mr != MACH_MSG_SUCCESS) { @@ -283,7 +366,14 @@ mach_msg_rpc_from_kernel( * We don't have to put them anywhere; just leave them * as they are. */ - ipc_kmsg_copyout_to_kernel(kmsg, ipc_space_reply); +#if IKM_SUPPORT_LEGACY + if(legacy) + ipc_kmsg_copyout_to_kernel_legacy(kmsg, ipc_space_reply); + else + ipc_kmsg_copyout_to_kernel(kmsg, ipc_space_reply); +#else + ipc_kmsg_copyout_to_kernel(kmsg, ipc_space_reply); +#endif ipc_kmsg_put_to_kernel(msg, kmsg, rcv_size); return mr; } @@ -322,7 +412,7 @@ mach_msg_overwrite( ipc_kmsg_t kmsg; mach_port_seqno_t seqno; mach_msg_return_t mr; - mach_msg_format_0_trailer_t *trailer; + mach_msg_max_trailer_t *trailer; if (option & MACH_SEND_MSG) { mach_msg_size_t msg_and_trailer_size; @@ -398,10 +488,12 @@ mach_msg_overwrite( if (mr != MACH_MSG_SUCCESS) return mr; - trailer = (mach_msg_format_0_trailer_t *) + trailer = (mach_msg_max_trailer_t *) ((vm_offset_t)kmsg->ikm_header + kmsg->ikm_header->msgh_size); if (option & MACH_RCV_TRAILER_MASK) { trailer->msgh_seqno = seqno; + trailer->msgh_context = + kmsg->ikm_header->msgh_remote_port->ip_context; trailer->msgh_trailer_size = REQUESTED_TRAILER_SIZE(option); } @@ -457,7 +549,6 @@ void mig_dealloc_reply_port( __unused mach_port_t reply_port) { - panic("mig_dealloc_reply_port"); } /* @@ -642,8 +733,8 @@ convert_mig_object_to_port( assert(previous == IP_NULL); - if (hw_compare_and_store((uint32_t)IP_NULL, (uint32_t)port, - (uint32_t *)&mig_object->port)) { + if (OSCompareAndSwapPtr((void *)IP_NULL, (void *)port, + (void * volatile *)&mig_object->port)) { deallocate = FALSE; } else { ipc_port_dealloc_kernel(port); diff --git a/osfmk/kern/ipc_mig.h b/osfmk/kern/ipc_mig.h index 3ddfc945d..06d3ae97e 100644 --- a/osfmk/kern/ipc_mig.h +++ b/osfmk/kern/ipc_mig.h @@ -131,16 +131,21 @@ __BEGIN_DECLS /* Send a message from the kernel */ -extern mach_msg_return_t mach_msg_send_from_kernel( + +extern mach_msg_return_t mach_msg_send_from_kernel_proper( mach_msg_header_t *msg, mach_msg_size_t send_size); +#define mach_msg_send_from_kernel mach_msg_send_from_kernel_proper -extern mach_msg_return_t mach_msg_rpc_from_kernel( +extern mach_msg_return_t +mach_msg_rpc_from_kernel_proper( mach_msg_header_t *msg, mach_msg_size_t send_size, mach_msg_size_t rcv_size); +#define mach_msg_rpc_from_kernel mach_msg_rpc_from_kernel_proper + extern mach_msg_return_t mach_msg_send_from_kernel_with_options( mach_msg_header_t *msg, mach_msg_size_t send_size, diff --git a/osfmk/kern/ipc_sync.c b/osfmk/kern/ipc_sync.c index b2774b520..ab24fe06f 100644 --- a/osfmk/kern/ipc_sync.c +++ b/osfmk/kern/ipc_sync.c @@ -104,11 +104,12 @@ convert_semaphore_to_port (semaphore_t semaphore) { ipc_port_t port; - if (semaphore != SEMAPHORE_NULL) - port = ipc_port_make_send(semaphore->port); - else - port = IP_NULL; + if (semaphore == SEMAPHORE_NULL) + return (IP_NULL); + /* caller is donating a reference */ + port = ipc_port_make_send(semaphore->port); + semaphore_dereference(semaphore); return (port); } @@ -134,11 +135,12 @@ convert_lock_set_to_port (lock_set_t lock_set) { ipc_port_t port; - if (lock_set != LOCK_SET_NULL) - port = ipc_port_make_send(lock_set->port); - else - port = IP_NULL; + if (lock_set == LOCK_SET_NULL) + return IP_NULL; + /* caller is donating a reference */ + port = ipc_port_make_send(lock_set->port); + lock_set_dereference(lock_set); return (port); } diff --git a/osfmk/kern/ipc_tt.c b/osfmk/kern/ipc_tt.c index e14a6fe85..019dacd6b 100644 --- a/osfmk/kern/ipc_tt.c +++ b/osfmk/kern/ipc_tt.c @@ -168,7 +168,6 @@ ipc_task_init( task->itk_bootstrap = IP_NULL; task->itk_seatbelt = IP_NULL; task->itk_gssd = IP_NULL; - task->itk_automountd = IP_NULL; task->itk_task_access = IP_NULL; for (i = 0; i < TASK_PORT_REGISTER_MAX; i++) @@ -207,9 +206,6 @@ ipc_task_init( task->itk_gssd = ipc_port_copy_send(parent->itk_gssd); - task->itk_automountd = - ipc_port_copy_send(parent->itk_automountd); - task->itk_task_access = ipc_port_copy_send(parent->itk_task_access); @@ -323,9 +319,6 @@ ipc_task_terminate( if (IP_VALID(task->itk_gssd)) ipc_port_release_send(task->itk_gssd); - if (IP_VALID(task->itk_automountd)) - ipc_port_release_send(task->itk_automountd); - if (IP_VALID(task->itk_task_access)) ipc_port_release_send(task->itk_task_access); @@ -339,6 +332,8 @@ ipc_task_terminate( /* destroy the kernel ports */ ipc_port_dealloc_kernel(kport); ipc_port_dealloc_kernel(nport); + + itk_lock_destroy(task); } /* @@ -883,10 +878,6 @@ task_get_special_port( port = ipc_port_copy_send(task->itk_task_access); break; - case TASK_AUTOMOUNTD_PORT: - port = ipc_port_copy_send(task->itk_automountd); - break; - default: itk_unlock(task); return KERN_INVALID_ARGUMENT; @@ -958,10 +949,6 @@ task_set_special_port( whichp = &task->itk_task_access; break; - case TASK_AUTOMOUNTD_PORT: - whichp = &task->itk_automountd; - break; - default: return KERN_INVALID_ARGUMENT; }/* switch */ @@ -1524,7 +1511,7 @@ thread_set_exception_ports( if (thread == THREAD_NULL) return (KERN_INVALID_ARGUMENT); - if (exception_mask & ~EXC_MASK_ALL) + if (exception_mask & ~EXC_MASK_VALID) return (KERN_INVALID_ARGUMENT); if (IP_VALID(new_port)) { @@ -1595,7 +1582,7 @@ task_set_exception_ports( if (task == TASK_NULL) return (KERN_INVALID_ARGUMENT); - if (exception_mask & ~EXC_MASK_ALL) + if (exception_mask & ~EXC_MASK_VALID) return (KERN_INVALID_ARGUMENT); if (IP_VALID(new_port)) { @@ -1691,7 +1678,7 @@ thread_swap_exception_ports( if (thread == THREAD_NULL) return (KERN_INVALID_ARGUMENT); - if (exception_mask & ~EXC_MASK_ALL) + if (exception_mask & ~EXC_MASK_VALID) return (KERN_INVALID_ARGUMENT); if (IP_VALID(new_port)) { @@ -1787,7 +1774,7 @@ task_swap_exception_ports( if (task == TASK_NULL) return (KERN_INVALID_ARGUMENT); - if (exception_mask & ~EXC_MASK_ALL) + if (exception_mask & ~EXC_MASK_VALID) return (KERN_INVALID_ARGUMENT); if (IP_VALID(new_port)) { @@ -1896,7 +1883,7 @@ thread_get_exception_ports( if (thread == THREAD_NULL) return (KERN_INVALID_ARGUMENT); - if (exception_mask & ~EXC_MASK_ALL) + if (exception_mask & ~EXC_MASK_VALID) return (KERN_INVALID_ARGUMENT); thread_mtx_lock(thread); @@ -1958,7 +1945,7 @@ task_get_exception_ports( if (task == TASK_NULL) return (KERN_INVALID_ARGUMENT); - if (exception_mask & ~EXC_MASK_ALL) + if (exception_mask & ~EXC_MASK_VALID) return (KERN_INVALID_ARGUMENT); itk_lock(task); diff --git a/osfmk/kern/kalloc.c b/osfmk/kern/kalloc.c index 3d9d42ca8..03c55052d 100644 --- a/osfmk/kern/kalloc.c +++ b/osfmk/kern/kalloc.c @@ -92,7 +92,10 @@ vm_size_t kalloc_kernmap_size; /* size of kallocs that can come from kernel map unsigned int kalloc_large_inuse; vm_size_t kalloc_large_total; vm_size_t kalloc_large_max; -vm_size_t kalloc_largest_allocated = 0; +volatile vm_size_t kalloc_largest_allocated = 0; + +vm_offset_t kalloc_map_min; +vm_offset_t kalloc_map_max; /* * All allocations of size less than kalloc_max are rounded to the @@ -185,20 +188,26 @@ kalloc_init( /* * Scale the kalloc_map_size to physical memory size: stay below - * 1/8th the total zone map size, or 128 MB. + * 1/8th the total zone map size, or 128 MB (for a 32-bit kernel). */ - kalloc_map_size = sane_size >> 5; + kalloc_map_size = (vm_size_t)(sane_size >> 5); +#if !__LP64__ if (kalloc_map_size > KALLOC_MAP_SIZE_MAX) kalloc_map_size = KALLOC_MAP_SIZE_MAX; +#endif /* !__LP64__ */ if (kalloc_map_size < KALLOC_MAP_SIZE_MIN) kalloc_map_size = KALLOC_MAP_SIZE_MIN; retval = kmem_suballoc(kernel_map, &min, kalloc_map_size, - FALSE, VM_FLAGS_ANYWHERE, &kalloc_map); + FALSE, VM_FLAGS_ANYWHERE | VM_FLAGS_PERMANENT, + &kalloc_map); if (retval != KERN_SUCCESS) panic("kalloc_init: kmem_suballoc failed"); + kalloc_map_min = min; + kalloc_map_max = min + kalloc_map_size - 1; + /* * Ensure that zones up to size 8192 bytes exist. * This is desirable because messages are allocated @@ -212,6 +221,7 @@ kalloc_init( kalloc_max_prerounded = kalloc_max / 2 + 1; /* size it to be more than 16 times kalloc_max (256k) for allocations from kernel map */ kalloc_kernmap_size = (kalloc_max * 16) + 1; + kalloc_largest_allocated = kalloc_kernmap_size; /* * Allocate a zone for each size we are going to handle. @@ -242,7 +252,7 @@ kalloc_canblock( /* * If size is too large for a zone, then use kmem_alloc. - * (We use kmem_alloc instead of kmem_alloc_wired so that + * (We use kmem_alloc instead of kmem_alloc_kobject so that * krealloc can use kmem_realloc.) */ @@ -255,17 +265,27 @@ kalloc_canblock( } if (size >= kalloc_kernmap_size) { + volatile vm_offset_t prev_largest; alloc_map = kernel_map; - - if (size > kalloc_largest_allocated) - kalloc_largest_allocated = size; + /* Thread-safe version of the workaround for 4740071 + * (a double FREE()) + */ + do { + prev_largest = kalloc_largest_allocated; + } while ((size > prev_largest) && !OSCompareAndSwap((UInt32)prev_largest, (UInt32)size, (volatile UInt32 *) &kalloc_largest_allocated)); } else alloc_map = kalloc_map; - if (kmem_alloc(alloc_map, (vm_offset_t *)&addr, size) != KERN_SUCCESS) - addr = NULL; + if (kmem_alloc(alloc_map, (vm_offset_t *)&addr, size) != KERN_SUCCESS) { + if (alloc_map != kernel_map) { + if (kmem_alloc(kernel_map, (vm_offset_t *)&addr, size) != KERN_SUCCESS) + addr = NULL; + } + else + addr = NULL; + } - if (addr) { + if (addr != NULL) { kalloc_large_inuse++; kalloc_large_total += size; @@ -453,6 +473,8 @@ kget( return(zget(k_zone[zindex])); } +volatile SInt32 kfree_nop_count = 0; + void kfree( void *data, @@ -460,15 +482,14 @@ kfree( { register int zindex; register vm_size_t freesize; - vm_map_t alloc_map = VM_MAP_NULL; + vm_map_t alloc_map = kernel_map; /* if size was too large for a zone, then use kmem_free */ if (size >= kalloc_max_prerounded) { - if (size >= kalloc_kernmap_size) { - alloc_map = kernel_map; - - if (size > kalloc_largest_allocated) + if ((((vm_offset_t) data) >= kalloc_map_min) && (((vm_offset_t) data) <= kalloc_map_max)) + alloc_map = kalloc_map; + if (size > kalloc_largest_allocated) { /* * work around double FREEs of small MALLOCs * this use to end up being a nop @@ -488,9 +509,10 @@ kfree( * to the above scenario, but it would still be wrong and * cause serious damage. */ + + OSAddAtomic(1, &kfree_nop_count); return; - } else - alloc_map = kalloc_map; + } kmem_free(alloc_map, (vm_offset_t)data, size); kalloc_large_total -= size; diff --git a/osfmk/kern/kalloc.h b/osfmk/kern/kalloc.h index a260e4944..7966959f2 100644 --- a/osfmk/kern/kalloc.h +++ b/osfmk/kern/kalloc.h @@ -98,6 +98,7 @@ extern void kalloc_fake_zone_info( int *exhaustable); extern vm_size_t kalloc_max_prerounded; +extern vm_size_t kalloc_large_total; #endif /* MACH_KERNEL_PRIVATE */ diff --git a/osfmk/kern/kern_types.h b/osfmk/kern/kern_types.h index f75adc507..7c3e93616 100644 --- a/osfmk/kern/kern_types.h +++ b/osfmk/kern/kern_types.h @@ -42,7 +42,11 @@ struct zone ; +#ifndef __LP64__ struct wait_queue { unsigned int opaque[2]; uintptr_t opaquep[2]; } ; +#else +struct wait_queue { unsigned char opaque[32]; }; +#endif #endif /* MACH_KERNEL_PRIVATE */ @@ -74,6 +78,7 @@ typedef int wait_result_t; #define THREAD_TIMED_OUT 1 /* timeout expired */ #define THREAD_INTERRUPTED 2 /* aborted/interrupted */ #define THREAD_RESTART 3 /* restart operation entirely */ +#define THREAD_NOT_WAITING 10 /* thread didn't need to wait */ typedef void (*thread_continue_t)(void *, wait_result_t); #define THREAD_CONTINUE_NULL ((thread_continue_t) 0) diff --git a/osfmk/kern/kext_alloc.c b/osfmk/kern/kext_alloc.c new file mode 100644 index 000000000..407efcf16 --- /dev/null +++ b/osfmk/kern/kext_alloc.c @@ -0,0 +1,140 @@ +/* + * Copyright (c) 2008 Apple Inc. All rights reserved. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. The rights granted to you under the License + * may not be used to create, or enable the creation or redistribution of, + * unlawful or unlicensed copies of an Apple operating system, or to + * circumvent, violate, or enable the circumvention or violation of, any + * terms of an Apple operating system software license agreement. + * + * Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ + */ +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +#include +#include + +#define KEXT_ALLOC_MAX_OFFSET (2 * 1024 * 1024 * 1024UL) + +vm_map_t g_kext_map = 0; +static mach_vm_offset_t kext_alloc_base = 0; +static mach_vm_offset_t kext_alloc_max = 0; + +/* + * On x86_64 systems, kernel extension text must remain within 2GB of the + * kernel's text segment. To ensure this happens, we snag 2GB of kernel VM + * as early as possible for kext allocations. + */ +void +kext_alloc_init(void) +{ +#if __x86_64__ + kern_return_t rval = 0; + kernel_segment_command_t *text = NULL; + mach_vm_offset_t text_end, text_start; + mach_vm_size_t text_size; + mach_vm_size_t kext_alloc_size; + + /* Determine the start of the kernel's __TEXT segment and determine the + * lower bound of the allocated submap for kext allocations. + */ + + text = getsegbyname(SEG_TEXT); + text_start = vm_map_trunc_page(text->vmaddr); + text_start &= ~((512ULL * 1024 * 1024 * 1024) - 1); + text_end = vm_map_round_page(text->vmaddr + text->vmsize); + text_size = text_end - text_start; + + kext_alloc_base = text_end - KEXT_ALLOC_MAX_OFFSET; + kext_alloc_size = KEXT_ALLOC_MAX_OFFSET - text_size; + kext_alloc_max = kext_alloc_base + kext_alloc_size; + + /* Allocate the subblock of the kernel map */ + + rval = kmem_suballoc(kernel_map, (vm_offset_t *) &kext_alloc_base, + kext_alloc_size, /* pageable */ TRUE, + VM_FLAGS_FIXED|VM_FLAGS_OVERWRITE|VM_FLAGS_BELOW_MIN, + &g_kext_map); + if (rval != KERN_SUCCESS) { + panic("kext_alloc_init: kmem_suballoc failed 0x%x\n", rval); + } + + if ((kext_alloc_base + kext_alloc_size) > kext_alloc_max) { + panic("kext_alloc_init: failed to get first 2GB\n"); + } + + if (kernel_map->min_offset > kext_alloc_base) { + kernel_map->min_offset = kext_alloc_base; + } + + printf("kext submap [0x%llx - 0x%llx], kernel text [0x%llx - 0x%llx]\n", + kext_alloc_base, kext_alloc_max, text->vmaddr, + text->vmaddr + text->vmsize); +#else + g_kext_map = kernel_map; + kext_alloc_base = VM_MIN_KERNEL_ADDRESS; + kext_alloc_max = VM_MAX_KERNEL_ADDRESS; +#endif /* __x86_64__ */ +} + +kern_return_t +kext_alloc(vm_offset_t *_addr, vm_size_t size, boolean_t fixed) +{ + kern_return_t rval = 0; + mach_vm_offset_t addr = (fixed) ? *_addr : kext_alloc_base; + int flags = (fixed) ? VM_FLAGS_FIXED : VM_FLAGS_ANYWHERE; + + /* Allocate the kext virtual memory */ + rval = mach_vm_allocate(g_kext_map, &addr, size, flags); + if (rval != KERN_SUCCESS) { + printf("vm_allocate failed - %d\n", rval); + goto finish; + } + + /* Check that the memory is reachable by kernel text */ + if ((addr + size) > kext_alloc_max) { + kext_free((vm_offset_t)addr, size); + goto finish; + } + + *_addr = (vm_offset_t)addr; + rval = KERN_SUCCESS; + +finish: + return rval; +} + +void +kext_free(vm_offset_t addr, vm_size_t size) +{ + kern_return_t rval; + + rval = mach_vm_deallocate(g_kext_map, addr, size); + assert(rval == KERN_SUCCESS); +} + diff --git a/osfmk/kern/kext_alloc.h b/osfmk/kern/kext_alloc.h new file mode 100644 index 000000000..0b4c67d15 --- /dev/null +++ b/osfmk/kern/kext_alloc.h @@ -0,0 +1,45 @@ +/* + * Copyright (c) 2008 Apple Inc. All rights reserved. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. The rights granted to you under the License + * may not be used to create, or enable the creation or redistribution of, + * unlawful or unlicensed copies of an Apple operating system, or to + * circumvent, violate, or enable the circumvention or violation of, any + * terms of an Apple operating system software license agreement. + * + * Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ + */ +#ifndef _KEXT_ALLOC_H_ +#define _KEXT_ALLOC_H_ + +#include +#include + +__BEGIN_DECLS + +void kext_alloc_init(void); + +kern_return_t kext_alloc(vm_offset_t *addr, vm_size_t size, boolean_t fixed); + +void kext_free(vm_offset_t addr, vm_size_t size); + +__END_DECLS + +#endif /* _KEXT_ALLOC_H_ */ + diff --git a/osfmk/kern/kmod.c b/osfmk/kern/kmod.c index 1feb3688e..121967342 100644 --- a/osfmk/kern/kmod.c +++ b/osfmk/kern/kmod.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2007 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2008 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -32,7 +32,7 @@ * Version 2.0. */ /* - * Copyright (c) 1999 Apple Computer, Inc. All rights reserved. + * Copyright (c) 1999 Apple Inc. All rights reserved. * * HISTORY * @@ -45,1876 +45,117 @@ #include #include -#include -#include #include #include #include -#include -#include -#include - -#include - -/* - * XXX headers for which prototypes should be in a common include file; - * XXX see libsa/kext.cpp for why. - */ -kern_return_t kmod_create_internal(kmod_info_t *info, kmod_t *id); -kern_return_t kmod_destroy_internal(kmod_t id); -kern_return_t kmod_start_or_stop(kmod_t id, int start, kmod_args_t *data, - mach_msg_type_number_t *dataCount); -kern_return_t kmod_retain(kmod_t id); -kern_return_t kmod_release(kmod_t id); -kern_return_t kmod_queue_cmd(vm_address_t data, vm_size_t size); -kern_return_t kmod_get_info(host_t host, kmod_info_array_t *kmods, - mach_msg_type_number_t *kmodCount); - -static kern_return_t kmod_get_symbol_data(kmod_args_t * data, - mach_msg_type_number_t * dataCount); -static kern_return_t kmod_free_linkedit_data(void); -static kern_return_t kmod_get_kext_uuid( - const char * kext_id, - kmod_args_t * data, - mach_msg_type_number_t * dataCount); - -extern int IODTGetLoaderInfo(const char * key, void ** infoAddr, vm_size_t * infoSize); -extern void IODTFreeLoaderInfo(const char * key, void * infoAddr, vm_size_t infoSize); -/* operates on 32 bit segments */ -extern void OSRuntimeUnloadCPPForSegment(struct segment_command * segment); - -#define WRITE_PROTECT_MODULE_TEXT (0) - -kmod_info_t *kmod; -static int kmod_index = 1; -static int kmod_load_disabled = 0; - -mutex_t * kmod_lock = 0; -static mutex_t * kmod_queue_lock = 0; - -typedef struct cmd_queue_entry { - queue_chain_t links; - vm_address_t data; - vm_size_t size; -} cmd_queue_entry_t; - -queue_head_t kmod_cmd_queue; - -/******************************************************************************* -*******************************************************************************/ -#define KMOD_PANICLIST_SIZE (2 * PAGE_SIZE) - -char * unloaded_kext_paniclist = NULL; -uint32_t unloaded_kext_paniclist_size = 0; -uint32_t unloaded_kext_paniclist_length = 0; -uint64_t last_loaded_timestamp = 0; - -char * loaded_kext_paniclist = NULL; -uint32_t loaded_kext_paniclist_size = 0; -uint32_t loaded_kext_paniclist_length = 0; -uint64_t last_unloaded_timestamp = 0; - -int substitute( - const char * scan_string, - char * string_out, - uint32_t * to_index, - uint32_t * from_index, - const char * substring, - char marker, - char substitution); - -/* identifier_out must be at least KMOD_MAX_NAME bytes. - */ -int substitute( - const char * scan_string, - char * string_out, - uint32_t * to_index, - uint32_t * from_index, - const char * substring, - char marker, - char substitution) -{ - uint32_t substring_length = strnlen(substring, KMOD_MAX_NAME - 1); - - if (!strncmp(scan_string, substring, substring_length)) { - if (marker) { - string_out[(*to_index)++] = marker; - } - string_out[(*to_index)++] = substitution; - (*from_index) += substring_length; - return 1; - } - return 0; -} - -void compactIdentifier( - const char * identifier, - char * identifier_out, - char ** identifier_out_end); - -void compactIdentifier( - const char * identifier, - char * identifier_out, - char ** identifier_out_end) -{ - uint32_t from_index, to_index; - uint32_t scan_from_index = 0; - uint32_t scan_to_index = 0; - subs_entry_t * subs_entry = NULL; - int did_sub = 0; - - from_index = to_index = 0; - identifier_out[0] = '\0'; - - /* Replace certain identifier prefixes with shorter @+character sequences. - */ - for (subs_entry = &kext_identifier_prefix_subs[0]; - subs_entry->substring && !did_sub; - subs_entry++) { - - did_sub = substitute(identifier, identifier_out, - &scan_to_index, &scan_from_index, - subs_entry->substring, /* marker */ '\0', subs_entry->substitute); - } - did_sub = 0; - - /* Now scan through the identifier looking for the common substrings - * and replacing them with shorter !+character sequences. - */ - for (/* see above */; - scan_from_index < KMOD_MAX_NAME - 1 && identifier[scan_from_index]; - /* see loop */) { - - const char * scan_string = &identifier[scan_from_index]; - - did_sub = 0; - - if (scan_from_index) { - for (subs_entry = &kext_identifier_substring_subs[0]; - subs_entry->substring && !did_sub; - subs_entry++) { - - did_sub = substitute(scan_string, identifier_out, - &scan_to_index, &scan_from_index, - subs_entry->substring, '!', subs_entry->substitute); - } - } - - if (!did_sub) { - identifier_out[scan_to_index++] = identifier[scan_from_index++]; - } - } - - identifier_out[scan_to_index] = '\0'; - if (identifier_out_end) { - *identifier_out_end = &identifier_out[scan_to_index]; - } - - return; -} - -/* identPlusVers must be at least 2*KMOD_MAX_NAME in length. - */ -int assemble_identifier_and_version( - kmod_info_t * kmod_info, - char * identPlusVers); -int assemble_identifier_and_version( - kmod_info_t * kmod_info, - char * identPlusVers) -{ - int result = 0; - - compactIdentifier(kmod_info->name, identPlusVers, NULL); - result = strnlen(identPlusVers, KMOD_MAX_NAME - 1); - identPlusVers[result++] = '\t'; // increment for real char - identPlusVers[result] = '\0'; // don't increment for nul char - result = strlcat(identPlusVers, kmod_info->version, KMOD_MAX_NAME); - - return result; -} - -#define LAST_LOADED " - last loaded " -#define LAST_LOADED_TS_WIDTH (16) - -uint32_t save_loaded_kext_paniclist_typed( - const char * prefix, - int invertFlag, - int libsFlag, - char * paniclist, - uint32_t list_size, - uint32_t * list_length_ptr, - int (*printf_func)(const char *fmt, ...)); -uint32_t save_loaded_kext_paniclist_typed( - const char * prefix, - int invertFlag, - int libsFlag, - char * paniclist, - uint32_t list_size, - uint32_t * list_length_ptr, - int (*printf_func)(const char *fmt, ...)) -{ - uint32_t result = 0; - int error = 0; - kmod_info_t * kmod_info; - - for (kmod_info = kmod; - kmod_info && (*list_length_ptr + 1 < list_size); - kmod_info = kmod_info->next) { - - int match; - char identPlusVers[2*KMOD_MAX_NAME]; - uint32_t identPlusVersLength; - char timestampBuffer[17]; // enough for a uint64_t - - if (!pmap_find_phys(kernel_pmap, (addr64_t)((uintptr_t)kmod_info))) { - (*printf_func)("kmod scan stopped due to missing kmod page: %p\n", - kmod_info); - error = 1; - goto finish; - } - - /* Skip all built-in/fake entries. - */ - if (!kmod_info->address) { - continue; - } - - /* Filter for kmod name (bundle identifier). - */ - match = !strncmp(kmod_info->name, prefix, strnlen(prefix, KMOD_MAX_NAME)); - if ((match && invertFlag) || (!match && !invertFlag)) { - continue; - } - - /* Filter for libraries. This isn't a strictly correct check, - * but any kext that does have references to it has to be a library. - * A kext w/o references may or may not be a library. - */ - if ((libsFlag == 0 && kmod_info->reference_count) || - (libsFlag == 1 && !kmod_info->reference_count)) { - - continue; - } - - identPlusVersLength = assemble_identifier_and_version(kmod_info, - identPlusVers); - if (!identPlusVersLength) { - printf_func("error saving loaded kext info\n"); - goto finish; - } - - /* We're going to note the last-loaded kext in the list. - */ - if (kmod_info == kmod) { - snprintf(timestampBuffer, sizeof(timestampBuffer), "%llu", - last_loaded_timestamp); - identPlusVersLength += sizeof(LAST_LOADED) - 1 + - strnlen(timestampBuffer, sizeof(timestampBuffer)); - } - - /* Adding 1 for the newline. - */ - if (*list_length_ptr + identPlusVersLength + 1 >= list_size) { - goto finish; - } - - *list_length_ptr = strlcat(paniclist, identPlusVers, list_size); - if (kmod_info == kmod) { - *list_length_ptr = strlcat(paniclist, LAST_LOADED, list_size); - *list_length_ptr = strlcat(paniclist, timestampBuffer, list_size); - } - *list_length_ptr = strlcat(paniclist, "\n", list_size); - } - -finish: - if (!error) { - if (*list_length_ptr + 1 <= list_size) { - result = list_size - (*list_length_ptr + 1); - } - } - - return result; -} - -void save_loaded_kext_paniclist( - int (*printf_func)(const char *fmt, ...)); - -void save_loaded_kext_paniclist( - int (*printf_func)(const char *fmt, ...)) -{ - char * newlist = NULL; - uint32_t newlist_size = 0; - uint32_t newlist_length = 0; - - newlist_length = 0; - newlist_size = KMOD_PANICLIST_SIZE; - newlist = (char *)kalloc(newlist_size); - - if (!newlist) { - printf_func("couldn't allocate kext panic log buffer\n"); - goto finish; - } - - newlist[0] = '\0'; - - // non-"com.apple." kexts - if (!save_loaded_kext_paniclist_typed("com.apple.", /* invert? */ 1, - /* libs? */ -1, newlist, newlist_size, &newlist_length, - printf_func)) { - - goto finish; - } - // "com.apple." nonlibrary kexts - if (!save_loaded_kext_paniclist_typed("com.apple.", /* invert? */ 0, - /* libs? */ 0, newlist, newlist_size, &newlist_length, - printf_func)) { - - goto finish; - } - // "com.apple." library kexts - if (!save_loaded_kext_paniclist_typed("com.apple.", /* invert? */ 0, - /* libs? */ 1, newlist, newlist_size, &newlist_length, - printf_func)) { - - goto finish; - } - - if (loaded_kext_paniclist) { - kfree(loaded_kext_paniclist, loaded_kext_paniclist_size); - } - loaded_kext_paniclist = newlist; - loaded_kext_paniclist_size = newlist_size; - loaded_kext_paniclist_length = newlist_length; - -finish: - return; -} - -void save_unloaded_kext_paniclist( - kmod_info_t * kmod_info, - int (*printf_func)(const char *fmt, ...)); -void save_unloaded_kext_paniclist( - kmod_info_t * kmod_info, - int (*printf_func)(const char *fmt, ...)) -{ - char * newlist = NULL; - uint32_t newlist_size = 0; - uint32_t newlist_length = 0; - char identPlusVers[2*KMOD_MAX_NAME]; - uint32_t identPlusVersLength; - - identPlusVersLength = assemble_identifier_and_version(kmod_info, - identPlusVers); - if (!identPlusVersLength) { - printf_func("error saving unloaded kext info\n"); - goto finish; - } - - newlist_length = identPlusVersLength; - newlist_size = newlist_length + 1; - newlist = (char *)kalloc(newlist_size); - - if (!newlist) { - printf_func("couldn't allocate kext panic log buffer\n"); - goto finish; - } - - newlist[0] = '\0'; - - strlcpy(newlist, identPlusVers, newlist_size); - - if (unloaded_kext_paniclist) { - kfree(unloaded_kext_paniclist, unloaded_kext_paniclist_size); - } - unloaded_kext_paniclist = newlist; - unloaded_kext_paniclist_size = newlist_size; - unloaded_kext_paniclist_length = newlist_length; - -finish: - return; -} - -// proto is in header -void record_kext_unload(kmod_t kmod_id) -{ - kmod_info_t * kmod_info = NULL; - - mutex_lock(kmod_lock); - - kmod_info = kmod_lookupbyid(kmod_id); - if (kmod_info) { - clock_get_uptime(&last_unloaded_timestamp); - save_unloaded_kext_paniclist(kmod_info, &printf); - } - mutex_unlock(kmod_lock); - return; -} - -void dump_kext_info(int (*printf_func)(const char *fmt, ...)) -{ - printf_func("unloaded kexts:\n"); - if (unloaded_kext_paniclist && (pmap_find_phys(kernel_pmap, (addr64_t) (uintptr_t) unloaded_kext_paniclist))) { - printf_func("%.*s - last unloaded %llu\n", - unloaded_kext_paniclist_length, unloaded_kext_paniclist, - last_unloaded_timestamp); - } else { - printf_func("(none)\n"); - } - printf_func("loaded kexts:\n"); - if (loaded_kext_paniclist && (pmap_find_phys(kernel_pmap, (addr64_t) (uintptr_t) loaded_kext_paniclist)) && loaded_kext_paniclist[0]) { - printf_func("%.*s", loaded_kext_paniclist_length, loaded_kext_paniclist); - } else { - printf_func("(none)\n"); - } - return; -} - -/******************************************************************************* -*******************************************************************************/ -void -kmod_init(void) -{ - kmod_lock = mutex_alloc(0); - kmod_queue_lock = mutex_alloc(0); - queue_init(&kmod_cmd_queue); -} - -kmod_info_t * -kmod_lookupbyid(kmod_t id) -{ - kmod_info_t *k = NULL; - - k = kmod; - while (k) { - if (k->id == id) break; - k = k->next; - } - - return k; -} - -kmod_info_t * -kmod_lookupbyname(const char * name) -{ - kmod_info_t *k = NULL; - - k = kmod; - while (k) { - if (!strncmp(k->name, name, sizeof(k->name))) - break; - k = k->next; - } - - return k; -} - -// get the id of a kext in a given range, if the address is not in a kext -// -1 is returned -int kmod_lookupidbyaddress_locked(vm_address_t addr) -{ - kmod_info_t *k = 0; - - mutex_lock(kmod_queue_lock); - k = kmod; - if(NULL != k) { - while (k) { - if ((k->address <= addr) && ((k->address + k->size) > addr)) { - break; - } - k = k->next; - } - mutex_unlock(kmod_queue_lock); - } else { - mutex_unlock(kmod_queue_lock); - return -1; - } - - if(NULL == k) { - return -1; - } else { - return k->id; - } -} - -kmod_info_t * -kmod_lookupbyaddress(vm_address_t addr) -{ - kmod_info_t *k = 0; - - k = kmod; - while (k) { - if ((k->address <= addr) && ((k->address + k->size) > addr)) break; - k = k->next; - } - - return k; -} - -kmod_info_t * -kmod_lookupbyid_locked(kmod_t id) -{ - kmod_info_t *k = NULL; - kmod_info_t *kc = NULL; - - kc = (kmod_info_t *)kalloc(sizeof(kmod_info_t)); - if (!kc) return kc; - - mutex_lock(kmod_lock); - k = kmod_lookupbyid(id); - if (k) { - bcopy((char*)k, (char *)kc, sizeof(kmod_info_t)); - } - - mutex_unlock(kmod_lock); - - if (k == 0) { - kfree(kc, sizeof(kmod_info_t)); - kc = NULL; - } - return kc; -} - -kmod_info_t * -kmod_lookupbyname_locked(const char * name) -{ - kmod_info_t *k = NULL; - kmod_info_t *kc = NULL; - - kc = (kmod_info_t *)kalloc(sizeof(kmod_info_t)); - if (!kc) return kc; - - mutex_lock(kmod_lock); - k = kmod_lookupbyname(name); - if (k) { - bcopy((char *)k, (char *)kc, sizeof(kmod_info_t)); - } - - mutex_unlock(kmod_lock); - - if (k == 0) { - kfree(kc, sizeof(kmod_info_t)); - kc = NULL; - } - return kc; -} - -// XXX add a nocopy flag?? - -kern_return_t -kmod_queue_cmd(vm_address_t data, vm_size_t size) -{ - kern_return_t rc; - cmd_queue_entry_t *e = (cmd_queue_entry_t *)kalloc(sizeof(struct cmd_queue_entry)); - if (!e) return KERN_RESOURCE_SHORTAGE; - - rc = kmem_alloc(kernel_map, &e->data, size); - if (rc != KERN_SUCCESS) { - kfree(e, sizeof(struct cmd_queue_entry)); - return rc; - } - e->size = size; - bcopy((void *)data, (void *)e->data, size); - - mutex_lock(kmod_queue_lock); - enqueue_tail(&kmod_cmd_queue, (queue_entry_t)e); - mutex_unlock(kmod_queue_lock); - - thread_wakeup_one((event_t)&kmod_cmd_queue); - - return KERN_SUCCESS; -} - -kern_return_t -kmod_load_extension(char *name) -{ - kmod_load_extension_cmd_t data; - - if (kmod_load_disabled) { - return KERN_NO_ACCESS; - } - - data.type = KMOD_LOAD_EXTENSION_PACKET; - strncpy(data.name, name, sizeof(data.name)); - - return kmod_queue_cmd((vm_address_t)&data, sizeof(data)); -} - -kern_return_t -kmod_load_extension_with_dependencies(char *name, char **dependencies) -{ - kern_return_t result; - kmod_load_with_dependencies_cmd_t * data; - vm_size_t size; - char **c; - int i, count = 0; - - if (kmod_load_disabled) { - return KERN_NO_ACCESS; - } - - c = dependencies; - if (c) { - while (*c) { - count++; c++; - } - } - size = sizeof(int) + KMOD_MAX_NAME * (count + 1) + 1; - data = (kmod_load_with_dependencies_cmd_t *)kalloc(size); - if (!data) return KERN_RESOURCE_SHORTAGE; - - data->type = KMOD_LOAD_WITH_DEPENDENCIES_PACKET; - strncpy(data->name, name, KMOD_MAX_NAME); - - c = dependencies; - for (i=0; i < count; i++) { - strncpy(data->dependencies[i], *c, KMOD_MAX_NAME); - c++; - } - data->dependencies[count][0] = 0; - - result = kmod_queue_cmd((vm_address_t)data, size); - kfree(data, size); - return result; -} -kern_return_t -kmod_send_generic(int type, void *generic_data, int size) -{ - kern_return_t result; - kmod_generic_cmd_t * data; - vm_size_t cmd_size; - - // add sizeof(int) for the type field - cmd_size = size + sizeof(int); - data = (kmod_generic_cmd_t *)kalloc(cmd_size); - if (!data) return KERN_RESOURCE_SHORTAGE; - - data->type = type; - bcopy(data->data, generic_data, size); - - result = kmod_queue_cmd((vm_address_t)data, cmd_size); - kfree(data, cmd_size); - return result; -} - -extern vm_offset_t sectPRELINKB; -extern int sectSizePRELINK; -extern int kth_started; - -/* - * Operates only on 32 bit mach keaders on behalf of kernel module loader - * if WRITE_PROTECT_MODULE_TEXT is defined. - */ -kern_return_t -kmod_create_internal(kmod_info_t *info, kmod_t *id) -{ - kern_return_t rc; - boolean_t isPrelink; - - if (!info) return KERN_INVALID_ADDRESS; - - // double check for page alignment - if ((info->address | info->hdr_size) & (PAGE_SIZE - 1)) { - return KERN_INVALID_ADDRESS; - } - - isPrelink = ((info->address >= sectPRELINKB) && (info->address < (sectPRELINKB + sectSizePRELINK))); - if (!isPrelink && kth_started) { - rc = vm_map_wire(kernel_map, info->address + info->hdr_size, - info->address + info->size, VM_PROT_DEFAULT, FALSE); - if (rc != KERN_SUCCESS) { - return rc; - } - } -#if WRITE_PROTECT_MODULE_TEXT - { - struct section * sect = getsectbynamefromheader( - (struct mach_header*) info->address, "__TEXT", "__text"); - - if(sect) { - (void) vm_map_protect(kernel_map, round_page(sect->addr), - trunc_page(sect->addr + sect->size), - VM_PROT_READ|VM_PROT_EXECUTE, TRUE); - } - } -#endif /* WRITE_PROTECT_MODULE_TEXT */ - - mutex_lock(kmod_lock); - - // check to see if already loaded - if (kmod_lookupbyname(info->name)) { - mutex_unlock(kmod_lock); - if (!isPrelink) { - rc = vm_map_unwire(kernel_map, info->address + info->hdr_size, - info->address + info->size, FALSE); - assert(rc == KERN_SUCCESS); - } - return KERN_INVALID_ARGUMENT; - } - - info->id = kmod_index++; - info->reference_count = 0; - - info->next = kmod; - kmod = info; - - *id = info->id; - - clock_get_uptime(&last_loaded_timestamp); - save_loaded_kext_paniclist(&printf); - - mutex_unlock(kmod_lock); - -#if DEBUG - printf("kmod_create: %s (id %d), %d pages loaded at 0x%x, header size 0x%x\n", - info->name, info->id, info->size / PAGE_SIZE, info->address, info->hdr_size); -#endif /* DEBUG */ - - return KERN_SUCCESS; -} - - -kern_return_t -kmod_create(host_priv_t host_priv, - vm_address_t addr, - kmod_t *id) -{ -#ifdef SECURE_KERNEL - return KERN_NOT_SUPPORTED; -#else - kmod_info_t *info; - - if (kmod_load_disabled) { - return KERN_NO_ACCESS; - } - - info = (kmod_info_t *)addr; - - if (host_priv == HOST_PRIV_NULL) return KERN_INVALID_HOST; - return kmod_create_internal(info, id); -#endif -} - -kern_return_t -kmod_create_fake_with_address(const char *name, const char *version, - vm_address_t address, vm_size_t size, - int * return_id) -{ - kmod_info_t *info; - - if (!name || ! version || - (1 + strlen(name) > KMOD_MAX_NAME) || - (1 + strlen(version) > KMOD_MAX_NAME)) { - - return KERN_INVALID_ARGUMENT; - } - - info = (kmod_info_t *)kalloc(sizeof(kmod_info_t)); - if (!info) { - return KERN_RESOURCE_SHORTAGE; - } - - // make de fake - info->info_version = KMOD_INFO_VERSION; - bcopy(name, info->name, 1 + strlen(name)); - bcopy(version, info->version, 1 + strlen(version)); //NIK fixed this part - info->reference_count = 1; // keep it from unloading, starting, stopping - info->reference_list = NULL; - info->address = address; - info->size = size; - info->hdr_size = 0; - info->start = info->stop = NULL; - - mutex_lock(kmod_lock); - - // check to see if already "loaded" - if (kmod_lookupbyname(info->name)) { - mutex_unlock(kmod_lock); - kfree(info, sizeof(kmod_info_t)); - return KERN_INVALID_ARGUMENT; - } - - info->id = kmod_index++; - if (return_id) - *return_id = info->id; - - info->next = kmod; - kmod = info; - - mutex_unlock(kmod_lock); - - return KERN_SUCCESS; -} - -kern_return_t -kmod_create_fake(const char *name, const char *version) -{ - return kmod_create_fake_with_address(name, version, 0, 0, NULL); -} - - -static kern_return_t -_kmod_destroy_internal(kmod_t id, boolean_t fake) -{ - kern_return_t rc; - kmod_info_t *k; - kmod_info_t *p; - - mutex_lock(kmod_lock); - - k = p = kmod; - while (k) { - if (k->id == id) { - kmod_reference_t *r, *t; - - if (!fake && (k->reference_count != 0)) { - mutex_unlock(kmod_lock); - return KERN_INVALID_ARGUMENT; - } - - if (k == p) { // first element - kmod = k->next; - } else { - p->next = k->next; - } - mutex_unlock(kmod_lock); - - r = k->reference_list; - while (r) { - r->info->reference_count--; - t = r; - r = r->next; - kfree(t, sizeof(struct kmod_reference)); - } - - if (!fake) - { -#if DEBUG - printf("kmod_destroy: %s (id %d), deallocating %d pages starting at 0x%x\n", - k->name, k->id, k->size / PAGE_SIZE, k->address); -#endif /* DEBUG */ - - if( (k->address >= sectPRELINKB) && (k->address < (sectPRELINKB + sectSizePRELINK))) - { - vm_offset_t - virt = ml_static_ptovirt(k->address); - if( virt) { - ml_static_mfree( virt, k->size); - } - } - else - { - rc = vm_map_unwire(kernel_map, k->address + k->hdr_size, - k->address + k->size, FALSE); - assert(rc == KERN_SUCCESS); - - rc = vm_deallocate(kernel_map, k->address, k->size); - assert(rc == KERN_SUCCESS); - } - } - return KERN_SUCCESS; - } - p = k; - k = k->next; - } - - if (!fake) { - save_loaded_kext_paniclist(&printf); - } - - mutex_unlock(kmod_lock); - - return KERN_INVALID_ARGUMENT; -} - -kern_return_t -kmod_destroy_internal(kmod_t id) -{ - return _kmod_destroy_internal(id, FALSE); -} - -kern_return_t -kmod_destroy(host_priv_t host_priv, - kmod_t id) -{ - if (host_priv == HOST_PRIV_NULL) return KERN_INVALID_HOST; - return _kmod_destroy_internal(id, FALSE); -} - -kern_return_t -kmod_destroy_fake(kmod_t id) -{ - return _kmod_destroy_internal(id, TRUE); -} - -kern_return_t -kmod_start_or_stop( - kmod_t id, - int start, - kmod_args_t *data, - mach_msg_type_number_t *dataCount) -{ - kern_return_t rc = KERN_SUCCESS; - void * user_data = NULL; - kern_return_t (*func)(kmod_info_t *, void *); - kmod_info_t *k; - - if (start && kmod_load_disabled) { - return KERN_NO_ACCESS; - } - - mutex_lock(kmod_lock); - - k = kmod_lookupbyid(id); - if (!k || k->reference_count) { - mutex_unlock(kmod_lock); - rc = KERN_INVALID_ARGUMENT; - goto finish; - } - - if (start) { - func = (void *)k->start; - } else { - func = (void *)k->stop; - } - - mutex_unlock(kmod_lock); - - // - // call kmod entry point - // - if (data && dataCount && *data && *dataCount) { - vm_map_offset_t map_addr; - vm_map_copyout(kernel_map, &map_addr, (vm_map_copy_t)*data); - user_data = CAST_DOWN(void *, map_addr); - } - - rc = (*func)(k, user_data); - -finish: - - if (user_data) { - (void) vm_deallocate(kernel_map, (vm_offset_t)user_data, *dataCount); - } - if (data) *data = NULL; - if (dataCount) *dataCount = 0; - - return rc; -} - - -/* - * The retain and release calls take no user data, but the caller - * may have sent some in error (the MIG definition allows it). - * If this is the case, they will just return that same data - * right back to the caller (since they never touch the *data and - * *dataCount fields). - */ -kern_return_t -kmod_retain(kmod_t id) -{ - kern_return_t rc = KERN_SUCCESS; - - kmod_info_t *t; // reference to - kmod_info_t *f; // reference from - kmod_reference_t *r = NULL; - - r = (kmod_reference_t *)kalloc(sizeof(struct kmod_reference)); - if (!r) { - rc = KERN_RESOURCE_SHORTAGE; - goto finish; - } - - mutex_lock(kmod_lock); - - t = kmod_lookupbyid(KMOD_UNPACK_TO_ID(id)); - f = kmod_lookupbyid(KMOD_UNPACK_FROM_ID(id)); - if (!t || !f) { - mutex_unlock(kmod_lock); - if (r) kfree(r, sizeof(struct kmod_reference)); - rc = KERN_INVALID_ARGUMENT; - goto finish; - } - - r->next = f->reference_list; - r->info = t; - f->reference_list = r; - t->reference_count++; - - mutex_unlock(kmod_lock); - -finish: - - return rc; -} - - -kern_return_t -kmod_release(kmod_t id) -{ - kern_return_t rc = KERN_INVALID_ARGUMENT; - - kmod_info_t *t; // reference to - kmod_info_t *f; // reference from - kmod_reference_t *r = NULL; - kmod_reference_t * p; - - mutex_lock(kmod_lock); - - t = kmod_lookupbyid(KMOD_UNPACK_TO_ID(id)); - f = kmod_lookupbyid(KMOD_UNPACK_FROM_ID(id)); - if (!t || !f) { - rc = KERN_INVALID_ARGUMENT; - goto finish; - } - - p = r = f->reference_list; - while (r) { - if (r->info == t) { - if (p == r) { // first element - f->reference_list = r->next; - } else { - p->next = r->next; - } - r->info->reference_count--; - - mutex_unlock(kmod_lock); - kfree(r, sizeof(struct kmod_reference)); - rc = KERN_SUCCESS; - goto finish; - } - p = r; - r = r->next; - } - - mutex_unlock(kmod_lock); - -finish: - - return rc; -} - - -kern_return_t -kmod_control(host_priv_t host_priv, - kmod_t id, - kmod_control_flavor_t flavor, - kmod_args_t *data, - mach_msg_type_number_t *dataCount) -{ - kern_return_t rc = KERN_SUCCESS; - - /* Only allow non-root access to retrieve kernel symbols or UUID. - */ - if (flavor != KMOD_CNTL_GET_KERNEL_SYMBOLS && - flavor != KMOD_CNTL_GET_UUID) { - - if (host_priv == HOST_PRIV_NULL) return KERN_INVALID_HOST; - } - - switch (flavor) { - - case KMOD_CNTL_START: - case KMOD_CNTL_STOP: - { - rc = kmod_start_or_stop(id, (flavor == KMOD_CNTL_START), - data, dataCount); - break; - } - - case KMOD_CNTL_RETAIN: - { - rc = kmod_retain(id); - break; - } - - case KMOD_CNTL_RELEASE: - { - rc = kmod_release(id); - break; - } - - case KMOD_CNTL_GET_CMD: - { - - cmd_queue_entry_t *e; - - /* Throw away any data the user may have sent in error. - * We must do this, because we are likely to return to - * some data for these commands (thus causing a leak of - * whatever data the user sent us in error). - */ - if (*data && *dataCount) { - vm_map_copy_discard(*data); - *data = NULL; - *dataCount = 0; - } - - mutex_lock(kmod_queue_lock); - - if (queue_empty(&kmod_cmd_queue)) { - wait_result_t res; - - res = thread_sleep_mutex((event_t)&kmod_cmd_queue, - kmod_queue_lock, - THREAD_ABORTSAFE); - if (queue_empty(&kmod_cmd_queue)) { - // we must have been interrupted! - mutex_unlock(kmod_queue_lock); - assert(res == THREAD_INTERRUPTED); - return KERN_ABORTED; - } - } - e = (cmd_queue_entry_t *)dequeue_head(&kmod_cmd_queue); - - mutex_unlock(kmod_queue_lock); - - rc = vm_map_copyin(kernel_map, (vm_map_address_t)e->data, - (vm_map_size_t)e->size, TRUE, (vm_map_copy_t *)data); - if (rc) { - mutex_lock(kmod_queue_lock); - enqueue_head(&kmod_cmd_queue, (queue_entry_t)e); - mutex_unlock(kmod_queue_lock); - *data = NULL; - *dataCount = 0; - return rc; - } - *dataCount = e->size; - - kfree(e, sizeof(struct cmd_queue_entry)); - - break; - } - - case KMOD_CNTL_GET_KERNEL_SYMBOLS: - { - /* Throw away any data the user may have sent in error. - * We must do this, because we are likely to return to - * some data for these commands (thus causing a leak of - * whatever data the user sent us in error). - */ - if (*data && *dataCount) { - vm_map_copy_discard(*data); - *data = NULL; - *dataCount = 0; - } - - return kmod_get_symbol_data(data, dataCount); - break; - } - - case KMOD_CNTL_FREE_LINKEDIT_DATA: - { - return kmod_free_linkedit_data(); - break; - } - - case KMOD_CNTL_GET_UUID: - { - uint32_t id_length = *dataCount; - char * kext_id = NULL; - vm_map_offset_t map_addr; - void * user_data; - kern_return_t result; - - /* Get the bundle id, if provided, and discard the buffer sent down. - */ - if (*data && *dataCount) { - kmem_alloc(kernel_map, (vm_offset_t *)&kext_id, id_length); - if (!kext_id) { - return KERN_FAILURE; - } - - vm_map_copyout(kernel_map, &map_addr, (vm_map_copy_t)*data); - user_data = CAST_DOWN(void *, map_addr); - - memcpy(kext_id, user_data, id_length); - kext_id[id_length-1] = '\0'; - if (user_data) { - (void)vm_deallocate(kernel_map, (vm_offset_t)user_data, *dataCount); - } - *data = NULL; - *dataCount = 0; - } - - result = kmod_get_kext_uuid(kext_id, data, dataCount); - if (kext_id) { - kmem_free(kernel_map, (vm_offset_t)kext_id, id_length); - } - return result; - break; - } - - case KMOD_CNTL_DISABLE_LOAD: - { - kmod_load_disabled = 1; - rc = KERN_SUCCESS; - break; - } - - default: - rc = KERN_INVALID_ARGUMENT; - } - - return rc; -}; - -/******************************************************************************* -* This function creates a dummy symbol file for the running kernel based on data -* in the run-time image. This allows us to correctly link other executables -* (drivers, etc) against the kernel when the kernel image on the root filesystem -* does not match the live kernel, as c can occur during net-booting where the -* actual kernel image is obtained from the network via tftp rather than the root -* device. -* -* If a symbol table is available, then a link-suitable Mach-O file image is -* created containing a Mach Header and an LC_SYMTAB load command followed by the -* the symbol table data for mach_kernel. A UUID load command is also present for -* identification, so we don't link against the wrong kernel. +#include + +/********************************************************************* +********************************************************************** +*** KMOD INTERFACE DEPRECATED AS OF SNOWLEOPARD *** +********************************************************************** +********************************************************************** +* Except for kmod_get_info(), which continues to work for K32 with +* 32-bit clients, all remaining functions in this module remain +* for symbol linkage or MIG support only, +* and return KERN_NOT_SUPPORTED. * -* NOTE: This file supports only 32 bit kernels; adding support for 64 bit -* kernels is possible, but is not necessary yet. -*******************************************************************************/ -extern struct mach_header _mh_execute_header; -static int _linkedit_segment_freed = 0; - -static kern_return_t -kmod_get_symbol_data( - kmod_args_t * symbol_data, - mach_msg_type_number_t * data_size) -{ - kern_return_t result = KERN_FAILURE; - - struct load_command * load_cmd; - struct mach_header * orig_header = &_mh_execute_header; - struct segment_command * orig_text = NULL; - struct segment_command * orig_data = NULL; - struct segment_command * orig_linkedit = NULL; - struct uuid_command * orig_uuid = NULL; - struct symtab_command * orig_symtab = NULL; - struct section * sect; - struct section * const_text = NULL; - - vm_size_t header_size = 0; - vm_offset_t symtab_size; - vm_offset_t total_size; // copied out to 'data_size' - char * buffer = 0; // copied out to 'symbol_data' - - struct mach_header * header; - struct segment_command * seg_cmd = NULL; - struct symtab_command * symtab; - - unsigned int i; - caddr_t addr; - vm_offset_t offset; - - // only want to do these 1st call - static int syms_marked = 0; - - mutex_lock(kmod_lock); - - /***** - * Check for empty out parameter pointers, and zero them if ok. - */ - if (!symbol_data || !data_size) { - result = KERN_INVALID_ARGUMENT; - goto finish; - } - - *symbol_data = NULL; - *data_size = 0; - - if (_linkedit_segment_freed) { - result = KERN_MEMORY_FAILURE; - goto finish; - } - - /***** - * Scan the in-memory kernel's mach header for the parts we need to copy: - * TEXT (for basic file info + const section), DATA (for basic file info), - * LINKEDIT (for the symbol table entries), SYMTAB (for the symbol table - * overall). - */ - load_cmd = (struct load_command *)&orig_header[1]; - for (i = 0; i < orig_header->ncmds; i++) { - if (load_cmd->cmd == LC_SEGMENT) { - struct segment_command * orig_seg_cmd = - (struct segment_command *)load_cmd; - - if (!strncmp(SEG_TEXT, orig_seg_cmd->segname, strlen(SEG_TEXT))) { - orig_text = orig_seg_cmd; - } else if (!strncmp(SEG_DATA, orig_seg_cmd->segname, - strlen(SEG_DATA))) { - - orig_data = orig_seg_cmd; - } else if (!strncmp(SEG_LINKEDIT, orig_seg_cmd->segname, - strlen(SEG_LINKEDIT))) { - - orig_linkedit = orig_seg_cmd; - } - } else if (load_cmd->cmd == LC_UUID) { - orig_uuid = (struct uuid_command *)load_cmd; - } else if (load_cmd->cmd == LC_SYMTAB) { - orig_symtab = (struct symtab_command *)load_cmd; - } - - load_cmd = (struct load_command *)((caddr_t)load_cmd + load_cmd->cmdsize); - } - - /* Bail if any wasn't found. - */ - if (!orig_text || !orig_data || !orig_linkedit || !orig_uuid || !orig_symtab) { - goto finish; - } - - /* Now seek out the const section of the TEXT segment, bailing if not found. - */ - sect = (struct section *)&orig_text[1]; - for (i = 0; i < orig_text->nsects; i++, sect++) { - if (!strncmp("__const", sect->sectname, sizeof("__const"))) { - const_text = sect; - break; - } - } - if (!const_text) { - goto finish; - } - - /***** - * Calculate the total size needed and allocate the buffer. In summing the - * total size, every size before the last must be rounded to a - * page-size increment. - */ - header_size = sizeof(struct mach_header) + - orig_text->cmdsize + orig_data->cmdsize + - orig_uuid->cmdsize + orig_symtab->cmdsize; - symtab_size = (orig_symtab->nsyms * sizeof(struct nlist)) + - orig_symtab->strsize; - total_size = round_page(header_size) + round_page(const_text->size) + - symtab_size; - - (void)kmem_alloc(kernel_map, (vm_offset_t *)&buffer, total_size); - if (!buffer) { - goto finish; - } - bzero((void *)buffer, total_size); - - /***** - * Set up the Mach-O header in the buffer. - */ - header = (struct mach_header *)buffer; - header->magic = orig_header->magic; - header->cputype = orig_header->cputype; - header->cpusubtype = orig_header->cpusubtype; - header->filetype = orig_header->filetype; - header->ncmds = 4; // TEXT, DATA, UUID, SYMTAB - header->sizeofcmds = header_size - sizeof(struct mach_header); - header->flags = orig_header->flags; - - /***** - * Initialize the current file offset and addr; updated as we go through, - * but only for fields that need proper info. - */ - offset = round_page(header_size); - addr = (caddr_t)const_text->addr; - - /***** - * Construct a TEXT segment load command. The only content of the TEXT - * segment that we actually copy is the __TEXT,__const, which contains the - * kernel vtables. The other sections are just filled with unincremented - * addr/offset and zero size and number fields. - */ - seg_cmd = (struct segment_command *)&header[1]; // just past mach header - memcpy(seg_cmd, orig_text, orig_text->cmdsize); - seg_cmd->vmaddr = (unsigned long)addr; - seg_cmd->vmsize = const_text->size; - seg_cmd->fileoff = 0; - seg_cmd->filesize = const_text->size + round_page(header_size); - seg_cmd->maxprot = 0; - seg_cmd->initprot = 0; - seg_cmd->flags = 0; - sect = (struct section *)(seg_cmd + 1); - for (i = 0; i < seg_cmd->nsects; i++, sect++) { - sect->addr = (unsigned long)addr; // only valid for __TEXT,__const - sect->size = 0; - sect->offset = offset; - sect->nreloc = 0; - if (0 == strncmp("__const", sect->sectname, sizeof("__const"))) { - sect->size = const_text->size; - addr += const_text->size; - offset += const_text->size; - const_text = sect; // retarget to constructed section - } - } - offset = round_page(offset); - - /***** - * Now copy the __DATA segment load command, but none of its content. - */ - seg_cmd = (struct segment_command *)((int)seg_cmd + seg_cmd->cmdsize); - memcpy(seg_cmd, orig_data, orig_data->cmdsize); - - seg_cmd->vmaddr = (unsigned long)addr; - seg_cmd->vmsize = 0x1000; // Why not just zero? DATA seg is empty. - seg_cmd->fileoff = offset; - seg_cmd->filesize = 0; - seg_cmd->maxprot = 0; - seg_cmd->initprot = 0; - seg_cmd->flags = 0; - sect = (struct section *)(seg_cmd+1); - for (i = 0; i < seg_cmd->nsects; i++, sect++) { - sect->addr = (unsigned long)addr; - sect->size = 0; - sect->offset = offset; - sect->nreloc = 0; - } - offset = round_page(offset); - - /* Set up LC_UUID command - */ - seg_cmd = (struct segment_command *)((int)seg_cmd + seg_cmd->cmdsize); - memcpy(seg_cmd, orig_uuid, orig_uuid->cmdsize); - - /* Set up LC_SYMTAB command - */ - symtab = (struct symtab_command *)((int)seg_cmd + seg_cmd->cmdsize); - symtab->cmd = LC_SYMTAB; - symtab->cmdsize = sizeof(struct symtab_command); - symtab->symoff = offset; - symtab->nsyms = orig_symtab->nsyms; - symtab->strsize = orig_symtab->strsize; - symtab->stroff = offset + symtab->nsyms * sizeof(struct nlist); - - /* Convert the symbol table in place (yes, in the running kernel) - * from section references to absolute references. - */ - if (!syms_marked) { - struct nlist * sym = (struct nlist *) orig_linkedit->vmaddr; - for (i = 0; i < orig_symtab->nsyms; i++, sym++) { - if ((sym->n_type & N_TYPE) == N_SECT) { - sym->n_sect = NO_SECT; - sym->n_type = (sym->n_type & ~N_TYPE) | N_ABS; - } - } - syms_marked = 1; - } - - /***** - * Copy the contents of the __TEXT,__const section and the linkedit symbol - * data into the constructed object file buffer. The header has already been - * filled in. - */ - memcpy(buffer + const_text->offset, (void *)const_text->addr, const_text->size); - memcpy(buffer + symtab->symoff, (void *)orig_linkedit->vmaddr, symtab_size); - - result = vm_map_copyin(kernel_map, - (vm_offset_t)buffer, - (vm_map_size_t)total_size, - /* src_destroy */ TRUE, - (vm_map_copy_t *)symbol_data); - if (result != KERN_SUCCESS) { - kmem_free(kernel_map, (vm_offset_t)buffer, total_size); - *symbol_data = NULL; - *data_size = 0; - goto finish; - } else { - *data_size = total_size; - } - -finish: - mutex_unlock(kmod_lock); - return result; -} - -/******************************************************************************* -* Drop the LINKEDIT segment from the running kernel to recover wired memory. -* This is invoked by kextd after it has successfully determined a file is -* available in the root filesystem to link against (either a symbol file it -* wrote, or /mach_kernel). -*******************************************************************************/ -// in IOCatalogue.cpp -extern int kernelLinkerPresent; - -static kern_return_t -kmod_free_linkedit_data(void) -{ - kern_return_t result = KERN_FAILURE; - - const char * dt_kernel_header_name = "Kernel-__HEADER"; - const char * dt_kernel_symtab_name = "Kernel-__SYMTAB"; - struct mach_header_t * dt_mach_header = NULL; - vm_size_t dt_mach_header_size = 0; - struct symtab_command *dt_symtab = NULL; - vm_size_t dt_symtab_size = 0; - int dt_result; - - struct segment_command * segmentLE; - boolean_t keepsyms = FALSE; - const char * segment_name = "__LINKEDIT"; -#if __ppc__ || __arm__ - const char * devtree_segment_name = "Kernel-__LINKEDIT"; - void * segment_paddress; - vm_size_t segment_size; -#endif - - mutex_lock(kmod_lock); - - /* The semantic is "make sure the linkedit segment is freed", so if we - * previously did it, it's a success. - */ - if (_linkedit_segment_freed) { - result = KERN_SUCCESS; - goto finish; - } else if (kernelLinkerPresent) { - // The in-kernel linker requires the linkedit segment to function. - // Refuse to dump if it's still around. - // XXX: We need a dedicated error return code for this. - printf("can't remove kernel __LINKEDIT segment - in-kernel linker needs it\n"); - result = KERN_MEMORY_FAILURE; - goto finish; - } - - /* Dispose of unnecessary stuff that the booter didn't need to load. - */ - dt_result = IODTGetLoaderInfo(dt_kernel_header_name, - (void **)&dt_mach_header, &dt_mach_header_size); - if (dt_result == 0 && dt_mach_header) { - IODTFreeLoaderInfo(dt_kernel_header_name, (void *)dt_mach_header, - round_page_32(dt_mach_header_size)); - } - dt_result = IODTGetLoaderInfo(dt_kernel_symtab_name, - (void **)&dt_symtab, &dt_symtab_size); - if (dt_result == 0 && dt_symtab) { - IODTFreeLoaderInfo(dt_kernel_symtab_name, (void *)dt_symtab, - round_page_32(dt_symtab_size)); - } - - PE_parse_boot_argn("keepsyms", &keepsyms, sizeof (keepsyms)); - - segmentLE = getsegbyname(segment_name); - if (!segmentLE) { - printf("error removing kernel __LINKEDIT segment\n"); - goto finish; - } - OSRuntimeUnloadCPPForSegment(segmentLE); -#if __ppc__ || __arm__ - if (!keepsyms && 0 == IODTGetLoaderInfo(devtree_segment_name, - &segment_paddress, &segment_size)) { - - IODTFreeLoaderInfo(devtree_segment_name, (void *)segment_paddress, - (int)segment_size); - } -#elif __i386__ - if (!keepsyms && segmentLE->vmaddr && segmentLE->vmsize) { - ml_static_mfree(segmentLE->vmaddr, segmentLE->vmsize); - } +* Some kernel-internal portions have been moved to +* libkern/OSKextLib.cpp and libkern/c++/OSKext.cpp. +**********************************************************************/ + +// bsd/sys/proc.h +extern void proc_selfname(char * buf, int size); + +#define NOT_SUPPORTED_USER64() \ + do { \ + char procname[64] = "unknown"; \ + proc_selfname(procname, sizeof(procname)); \ + printf("%s is not supported for 64-bit clients (called from %s)\n", \ + __FUNCTION__, procname); \ + } while (0) + +#define NOT_SUPPORTED_KERNEL() \ + do { \ + char procname[64] = "unknown"; \ + proc_selfname(procname, sizeof(procname)); \ + printf("%s is not supported on this kernel architecture (called from %s)\n", \ + __FUNCTION__, procname); \ + } while (0) + +#if __ppc__ || __i386__ +// in libkern/OSKextLib.cpp +extern kern_return_t kext_get_kmod_info( + kmod_info_array_t * kmod_list, + mach_msg_type_number_t * kmodCount); +#define KMOD_MIG_UNUSED #else -#error arch -#endif - result = KERN_SUCCESS; - -finish: - if (!keepsyms && result == KERN_SUCCESS) { - _linkedit_segment_freed = 1; - } - mutex_unlock(kmod_lock); - return result; -} - -/******************************************************************************* -* Retrieve the UUID load command payload from the running kernel. -*******************************************************************************/ -static kern_return_t -kmod_get_kext_uuid( - const char * kext_id, - kmod_args_t * data, - mach_msg_type_number_t * dataCount) -{ - kern_return_t result = KERN_FAILURE; - kmod_info_t * kmod_info = NULL; - unsigned int i; - char * uuid_data = 0; - struct mach_header * header = &_mh_execute_header; - struct load_command * load_cmd = (struct load_command *)&header[1]; - struct uuid_command * uuid_cmd; +#define KMOD_MIG_UNUSED __unused +#endif /* __ppc__ || __i386__ */ - /* If given no kext ID, retrieve the kernel UUID. - */ - if (!kext_id) { - header = &_mh_execute_header; - } else { - kmod_info = kmod_lookupbyname_locked(kext_id); - if (!kmod_info) { - result = KERN_INVALID_ARGUMENT; - goto finish; - } - - /* If the kmod is build-in, it's part of the kernel, so retrieve the - * kernel UUID. - */ - if (!kmod_info->address) { - header = &_mh_execute_header; - } else { - header = (struct mach_header *)kmod_info->address; - } - } - - load_cmd = (struct load_command *)&header[1]; - - for (i = 0; i < header->ncmds; i++) { - if (load_cmd->cmd == LC_UUID) { - uuid_cmd = (struct uuid_command *)load_cmd; - - /* kmem_alloc() a local buffer that's on a boundary known to work - * with vm_map_copyin(). - */ - result = kmem_alloc(kernel_map, (vm_offset_t *)&uuid_data, - sizeof(uuid_cmd->uuid)); - if (result != KERN_SUCCESS) { - result = KERN_RESOURCE_SHORTAGE; - goto finish; - } - - memcpy(uuid_data, uuid_cmd->uuid, sizeof(uuid_cmd->uuid)); - - result = vm_map_copyin(kernel_map, (vm_offset_t)uuid_data, - sizeof(uuid_cmd->uuid), /* src_destroy */ TRUE, - (vm_map_copy_t *)data); - if (result == KERN_SUCCESS) { - *dataCount = sizeof(uuid_cmd->uuid); - } else { - result = KERN_RESOURCE_SHORTAGE; - kmem_free(kernel_map, (vm_offset_t)uuid_data, - sizeof(uuid_cmd->uuid)); - } - goto finish; - } - - load_cmd = (struct load_command *)((caddr_t)load_cmd + load_cmd->cmdsize); - } - -finish: - return result; -} +/********************************************************************* +* Old MIG routines that are no longer supported. +********************************************************************** +* We have to keep these around for ppc, i386, and x86_64. A 32-bit +* user-space client might call into the 64-bit kernel. Only +* kmod_get_info() retains a functional implementation (ppc/i386). +**********************************************************************/ kern_return_t -kmod_get_info(__unused host_t host, - kmod_info_array_t *kmods, - mach_msg_type_number_t *kmodCount) +kmod_create( + host_priv_t host_priv __unused, + vm_address_t addr __unused, + kmod_t * id __unused) { - vm_offset_t data; - kmod_info_t *k, *p1; - kmod_reference_t *r, *p2; - int ref_count; - unsigned size = 0; - kern_return_t rc = KERN_SUCCESS; - - *kmods = (void *)0; - *kmodCount = 0; - -retry: - mutex_lock(kmod_lock); - size = 0; - k = kmod; - while (k) { - size += sizeof(kmod_info_t); - r = k->reference_list; - while (r) { - size +=sizeof(kmod_reference_t); - r = r->next; - } - k = k->next; - } - mutex_unlock(kmod_lock); - if (!size) return KERN_SUCCESS; - - rc = kmem_alloc(kernel_map, &data, size); - if (rc) return rc; - - // copy kmod into data, retry if kmod's size has changed (grown) - // the copied out data is tweeked to figure what's what at user level - // change the copied out k->next pointers to point to themselves - // change the k->reference into a count, tack the references on - // the end of the data packet in the order they are found - - mutex_lock(kmod_lock); - k = kmod; p1 = (kmod_info_t *)data; - while (k) { - if ((p1 + 1) > (kmod_info_t *)(data + size)) { - mutex_unlock(kmod_lock); - kmem_free(kernel_map, data, size); - goto retry; - } - - *p1 = *k; - if (k->next) p1->next = k; - p1++; k = k->next; - } - - p2 = (kmod_reference_t *)p1; - k = kmod; p1 = (kmod_info_t *)data; - while (k) { - r = k->reference_list; ref_count = 0; - while (r) { - if ((p2 + 1) > (kmod_reference_t *)(data + size)) { - mutex_unlock(kmod_lock); - kmem_free(kernel_map, data, size); - goto retry; - } - // note the last 'k' in the chain has its next == 0 - // since there can only be one like that, - // this case is handled by the caller - *p2 = *r; - p2++; r = r->next; ref_count++; - } - p1->reference_list = (kmod_reference_t *)ref_count; - p1++; k = k->next; - } - mutex_unlock(kmod_lock); - - rc = vm_map_copyin(kernel_map, data, size, TRUE, (vm_map_copy_t *)kmods); - if (rc) { - kmem_free(kernel_map, data, size); - *kmods = NULL; - *kmodCount = 0; - return rc; - } - *kmodCount = size; - - return KERN_SUCCESS; -} - -/* - * Operates only on 32 bit mach keaders on behalf of kernel module loader - */ -static kern_return_t -kmod_call_funcs_in_section(struct mach_header *header, const char *sectName) -{ - typedef void (*Routine)(void); - Routine * routines; - int size, i; - - if (header->magic != MH_MAGIC) { - return KERN_INVALID_ARGUMENT; - } - - routines = (Routine *) getsectdatafromheader(header, SEG_TEXT, /*(char *)*/ sectName, &size); - if (!routines) return KERN_SUCCESS; - - size /= sizeof(Routine); - for (i = 0; i < size; i++) { - (*routines[i])(); - } - - return KERN_SUCCESS; + NOT_SUPPORTED_KERNEL(); + return KERN_NOT_SUPPORTED; } -/* - * Operates only on 32 bit mach keaders on behalf of kernel module loader - */ +/********************************************************************/ kern_return_t -kmod_initialize_cpp(kmod_info_t *info) +kmod_destroy( + host_priv_t host_priv __unused, + kmod_t id __unused) { - return kmod_call_funcs_in_section((struct mach_header *)info->address, "__constructor"); + NOT_SUPPORTED_KERNEL(); + return KERN_NOT_SUPPORTED; } -/* - * Operates only on 32 bit mach keaders on behalf of kernel module loader - */ +/********************************************************************/ kern_return_t -kmod_finalize_cpp(kmod_info_t *info) +kmod_control( + host_priv_t host_priv __unused, + kmod_t id __unused, + kmod_control_flavor_t flavor __unused, + kmod_args_t * data __unused, + mach_msg_type_number_t * dataCount __unused) { - return kmod_call_funcs_in_section((struct mach_header *)info->address, "__destructor"); -} + NOT_SUPPORTED_KERNEL(); + return KERN_NOT_SUPPORTED; +}; +/********************************************************************/ kern_return_t -kmod_default_start(__unused struct kmod_info *ki, __unused void *data) -{ - return KMOD_RETURN_SUCCESS; -} - +kmod_get_info( + host_t host __unused, + kmod_info_array_t * kmod_list KMOD_MIG_UNUSED, + mach_msg_type_number_t * kmodCount KMOD_MIG_UNUSED); kern_return_t -kmod_default_stop(__unused struct kmod_info *ki, __unused void *data) -{ - return KMOD_RETURN_SUCCESS; -} - -static void -kmod_dump_to(vm_offset_t *addr, unsigned int cnt, - int (*printf_func)(const char *fmt, ...)) +kmod_get_info( + host_t host __unused, + kmod_info_array_t * kmod_list KMOD_MIG_UNUSED, + mach_msg_type_number_t * kmodCount KMOD_MIG_UNUSED) { - vm_offset_t * kscan_addr = NULL; - kmod_info_t * k; - kmod_reference_t * r; - unsigned int i; - int found_kmod = 0; - kmod_info_t * stop_kmod = NULL; - - for (k = kmod; k; k = k->next) { - if (pmap_find_phys(kernel_pmap, (addr64_t)((uintptr_t)k)) == 0) { - (*printf_func)(" kmod scan stopped due to missing " - "kmod page: %08x\n", stop_kmod); - break; - } - if (!k->address) { - continue; // skip fake entries for built-in kernel components - } - for (i = 0, kscan_addr = addr; i < cnt; i++, kscan_addr++) { - if ((*kscan_addr >= k->address) && - (*kscan_addr < (k->address + k->size))) { - - if (!found_kmod) { - (*printf_func)(" Kernel loadable modules in backtrace " - "(with dependencies):\n"); - } - found_kmod = 1; - (*printf_func)(" %s(%s)@0x%x->0x%x\n", - k->name, k->version, k->address, k->address + k->size - 1); - - for (r = k->reference_list; r; r = r->next) { - kmod_info_t * rinfo; - - if (pmap_find_phys(kernel_pmap, (addr64_t)((uintptr_t)r)) == 0) { - (*printf_func)(" kmod dependency scan stopped " - "due to missing dependency page: %08x\n", r); - break; - } - - rinfo = r->info; - - if (pmap_find_phys(kernel_pmap, (addr64_t)((uintptr_t)rinfo)) == 0) { - (*printf_func)(" kmod dependency scan stopped " - "due to missing kmod page: %08x\n", rinfo); - break; - } - - if (!rinfo->address) { - continue; // skip fake entries for built-ins - } - - (*printf_func)(" dependency: %s(%s)@0x%x\n", - rinfo->name, rinfo->version, rinfo->address); - } - - break; // only report this kmod for one backtrace address - } - } +#if __ppc__ || __i386__ + if (current_task() != kernel_task && task_has_64BitAddr(current_task())) { + NOT_SUPPORTED_USER64(); + return KERN_NOT_SUPPORTED; } - - return; -} - -void -kmod_dump(vm_offset_t *addr, unsigned int cnt) -{ - kmod_dump_to(addr, cnt, &kdb_printf); -} - -void kmod_dump_log(vm_offset_t *, unsigned); /* gcc 4 warn fix */ - -void -kmod_dump_log(vm_offset_t *addr, unsigned int cnt) -{ - kmod_dump_to(addr, cnt, &printf); + return kext_get_kmod_info(kmod_list, kmodCount); +#else + NOT_SUPPORTED_KERNEL(); + return KERN_NOT_SUPPORTED; +#endif /* __ppc__ || __i386__ */ } diff --git a/osfmk/kern/ledger.c b/osfmk/kern/ledger.c index 30e11cabd..c97771d04 100644 --- a/osfmk/kern/ledger.c +++ b/osfmk/kern/ledger.c @@ -65,6 +65,9 @@ ledger_enter( ledger_t ledger, ledger_item_t amount) { + if (ledger == LEDGER_NULL) + return KERN_SUCCESS; + /* Need to lock the ledger */ ledger_lock(ledger); @@ -211,7 +214,7 @@ kern_return_t ledger_create( (*new_ledger)->ledger_limit = transfer; /* Charge the ledger against the ledger_ledger */ - ledger_ledger->ledger_balance += sizeof(ledger_data_t); + ledger_ledger->ledger_balance += (ledger_item_t)sizeof(ledger_data_t); ledger_unlock(parent_ledger); ledger_unlock(ledger_ledger); @@ -256,7 +259,7 @@ kern_return_t ledger_terminate( (void) ledger_enter(ledger->ledger_parent, ledger->ledger_balance); /* adjust the balance of the creation ledger */ - (void) ledger_enter(ledger->ledger_ledger, -sizeof(*ledger)); + (void) ledger_enter(ledger->ledger_ledger, (ledger_item_t)-sizeof(*ledger)); /* delete the ledger */ ledger_deallocate(ledger); @@ -397,8 +400,10 @@ convert_ledger_to_port( { ipc_port_t port; - port = ipc_port_make_send(ledger->ledger_self); + if (ledger == LEDGER_NULL) + return IP_NULL; + port = ipc_port_make_send(ledger->ledger_self); return port; } @@ -409,7 +414,8 @@ ipc_port_t ledger_copy( ledger_t ledger) { - /* XXX reference counting */ - assert(ledger); + if (ledger == LEDGER_NULL) + return IP_NULL; + return(ipc_port_copy_send(ledger->ledger_self)); } diff --git a/osfmk/kern/lock.h b/osfmk/kern/lock.h index eb5ed024e..8366e26a6 100644 --- a/osfmk/kern/lock.h +++ b/osfmk/kern/lock.h @@ -74,63 +74,6 @@ __BEGIN_DECLS #ifndef MACH_KERNEL_PRIVATE -typedef struct __mutex__ mutex_t; - -#else /* MACH_KERNEL_PRIVATE */ - -#define decl_mutex_data(class,name) class mutex_t name; -#define mutex_addr(m) (&(m)) - -extern void mutex_init( - mutex_t *mutex, - unsigned short tag); - -#ifdef i386 -extern void mutex_try_spin( - mutex_t *mutex); - -extern void mutex_lock_spin( - mutex_t *mutex); - -extern void mutex_convert_spin( - mutex_t *mutex); -#else -#define mutex_try_spin(l) mutex_try(l) -#define mutex_lock_spin(l) mutex_lock(l) -#define mutex_convert_spin(l) do {} while (0) -#endif - -#endif /* MACH_KERNEL_PRIVATE */ - -extern mutex_t *mutex_alloc( - unsigned short tag); - -extern void mutex_free( - mutex_t *mutex); - -extern void mutex_lock( - mutex_t *mutex); - -extern void mutex_unlock( - mutex_t *mutex); - -extern boolean_t mutex_try( - mutex_t *mutex); - -extern void mutex_pause(uint32_t); -extern void mutex_yield(mutex_t *); - -#define MA_OWNED 0x01 -#define MA_NOTOWNED 0x02 - -void _mutex_assert ( - mutex_t *mutex, - unsigned int what); - -#define mutex_assert(a, b) _mutex_assert(a, b) - -#ifndef MACH_KERNEL_PRIVATE - typedef struct __lock__ lock_t; #else /* MACH_KERNEL_PRIVATE */ @@ -176,19 +119,6 @@ extern wait_result_t thread_sleep_usimple_lock( usimple_lock_t lock, wait_interrupt_t interruptible); -/* Sleep, unlocking and then relocking a mutex in the process */ -extern wait_result_t thread_sleep_mutex( - event_t event, - mutex_t *mutex, - wait_interrupt_t interruptible); - -/* Sleep with a deadline, unlocking and then relocking a mutex in the process */ -extern wait_result_t thread_sleep_mutex_deadline( - event_t event, - mutex_t *mutex, - uint64_t deadline, - wait_interrupt_t interruptible); - /* Sleep, unlocking and then relocking a write lock in the process */ extern wait_result_t thread_sleep_lock_write( event_t event, diff --git a/osfmk/kern/locks.c b/osfmk/kern/locks.c index 5718455f9..e31e970c6 100644 --- a/osfmk/kern/locks.c +++ b/osfmk/kern/locks.c @@ -92,11 +92,12 @@ static queue_head_t lck_grp_queue; static unsigned int lck_grp_cnt; -decl_mutex_data(static,lck_grp_lock) +decl_lck_mtx_data(static,lck_grp_lock) +static lck_mtx_ext_t lck_grp_lock_ext; lck_grp_attr_t LockDefaultGroupAttr; -lck_grp_t LockCompatGroup; -lck_attr_t LockDefaultLckAttr; +lck_grp_t LockCompatGroup; +lck_attr_t LockDefaultLckAttr; /* * Routine: lck_mod_init @@ -107,11 +108,30 @@ lck_mod_init( void) { queue_init(&lck_grp_queue); - mutex_init(&lck_grp_lock, 0); - lck_grp_cnt = 0; - lck_grp_attr_setdefault( &LockDefaultGroupAttr); - lck_grp_init( &LockCompatGroup, "Compatibility APIs", LCK_GRP_ATTR_NULL); + + /* + * Need to bootstrap the LockCompatGroup instead of calling lck_grp_init() here. This avoids + * grabbing the lck_grp_lock before it is initialized. + */ + + bzero(&LockCompatGroup, sizeof(lck_grp_t)); + (void) strncpy(LockCompatGroup.lck_grp_name, "Compatibility APIs", LCK_GRP_MAX_NAME); + + if (LcksOpts & enaLkStat) + LockCompatGroup.lck_grp_attr = LCK_GRP_ATTR_STAT; + else + LockCompatGroup.lck_grp_attr = LCK_ATTR_NONE; + + LockCompatGroup.lck_grp_refcnt = 1; + + enqueue_tail(&lck_grp_queue, (queue_entry_t)&LockCompatGroup); + lck_grp_cnt = 1; + + lck_grp_attr_setdefault(&LockDefaultGroupAttr); lck_attr_setdefault(&LockDefaultLckAttr); + + lck_mtx_init_ext(&lck_grp_lock, &lck_grp_lock_ext, &LockCompatGroup, &LockDefaultLckAttr); + } /* @@ -211,10 +231,10 @@ lck_grp_init( grp->lck_grp_refcnt = 1; - mutex_lock(&lck_grp_lock); + lck_mtx_lock(&lck_grp_lock); enqueue_tail(&lck_grp_queue, (queue_entry_t)grp); lck_grp_cnt++; - mutex_unlock(&lck_grp_lock); + lck_mtx_unlock(&lck_grp_lock); } @@ -227,10 +247,10 @@ void lck_grp_free( lck_grp_t *grp) { - mutex_lock(&lck_grp_lock); + lck_mtx_lock(&lck_grp_lock); lck_grp_cnt--; (void)remque((queue_entry_t)grp); - mutex_unlock(&lck_grp_lock); + lck_mtx_unlock(&lck_grp_lock); lck_grp_deallocate(grp); } @@ -477,8 +497,12 @@ lck_mtx_sleep( if (res == THREAD_WAITING) { lck_mtx_unlock(lck); res = thread_block(THREAD_CONTINUE_NULL); - if (!(lck_sleep_action & LCK_SLEEP_UNLOCK)) - lck_mtx_lock(lck); + if (!(lck_sleep_action & LCK_SLEEP_UNLOCK)) { + if ((lck_sleep_action & LCK_SLEEP_SPIN)) + lck_mtx_lock_spin(lck); + else + lck_mtx_lock(lck); + } } else if (lck_sleep_action & LCK_SLEEP_UNLOCK) @@ -571,7 +595,7 @@ lck_mtx_lock_wait ( holder->sched_pri < priority ) { KERNEL_DEBUG_CONSTANT( MACHDBG_CODE(DBG_MACH_SCHED,MACH_PROMOTE) | DBG_FUNC_NONE, - holder->sched_pri, priority, (int)holder, (int)lck, 0); + holder->sched_pri, priority, holder, lck, 0); set_sched_pri(holder, priority); } @@ -652,7 +676,7 @@ lck_mtx_lock_acquire( if (thread->sched_pri < priority) { KERNEL_DEBUG_CONSTANT( MACHDBG_CODE(DBG_MACH_SCHED,MACH_PROMOTE) | DBG_FUNC_NONE, - thread->sched_pri, priority, 0, (int)lck, 0); + thread->sched_pri, priority, 0, lck, 0); set_sched_pri(thread, priority); } @@ -701,7 +725,7 @@ lck_mtx_unlock_wakeup ( if (thread->sched_mode & TH_MODE_ISDEPRESSED) { KERNEL_DEBUG_CONSTANT( MACHDBG_CODE(DBG_MACH_SCHED,MACH_DEMOTE) | DBG_FUNC_NONE, - thread->sched_pri, DEPRESSPRI, 0, (int)lck, 0); + thread->sched_pri, DEPRESSPRI, 0, lck, 0); set_sched_pri(thread, DEPRESSPRI); } @@ -711,7 +735,7 @@ lck_mtx_unlock_wakeup ( MACHDBG_CODE(DBG_MACH_SCHED,MACH_DEMOTE) | DBG_FUNC_NONE, thread->sched_pri, thread->priority, - 0, (int)lck, 0); + 0, lck, 0); } compute_priority(thread, FALSE); @@ -785,26 +809,27 @@ unsigned int mutex_yield_wait = 0; unsigned int mutex_yield_no_wait = 0; void -mutex_yield( - mutex_t *mutex) +lck_mtx_yield( + lck_mtx_t *lck) { - lck_mtx_t *lck; - + int waiters; + #if DEBUG - _mutex_assert(mutex, MA_OWNED); + lck_mtx_assert(lck, LCK_MTX_ASSERT_OWNED); #endif /* DEBUG */ - - lck = (lck_mtx_t *) mutex; + if (lck->lck_mtx_tag == LCK_MTX_TAG_INDIRECT) - lck = &lck->lck_mtx_ptr->lck_mtx; + waiters = lck->lck_mtx_ptr->lck_mtx.lck_mtx_waiters; + else + waiters = lck->lck_mtx_waiters; - if (! lck->lck_mtx_waiters) { + if ( !waiters) { mutex_yield_no_wait++; } else { mutex_yield_wait++; - mutex_unlock(mutex); + lck_mtx_unlock(lck); mutex_pause(0); - mutex_lock(mutex); + lck_mtx_lock(lck); } } @@ -902,13 +927,13 @@ host_lockgroup_info( if (host == HOST_NULL) return KERN_INVALID_HOST; - mutex_lock(&lck_grp_lock); + lck_mtx_lock(&lck_grp_lock); lockgroup_info_size = round_page(lck_grp_cnt * sizeof *lockgroup_info); kr = kmem_alloc_pageable(ipc_kernel_map, &lockgroup_info_addr, lockgroup_info_size); if (kr != KERN_SUCCESS) { - mutex_unlock(&lck_grp_lock); + lck_mtx_unlock(&lck_grp_lock); return(kr); } @@ -952,7 +977,7 @@ host_lockgroup_info( } *lockgroup_infoCntp = lck_grp_cnt; - mutex_unlock(&lck_grp_lock); + lck_mtx_unlock(&lck_grp_lock); used = (*lockgroup_infoCntp) * sizeof *lockgroup_info; @@ -983,23 +1008,19 @@ extern void lock_write_to_read_EXT(lck_rw_t *lock); extern wait_result_t thread_sleep_lock_write_EXT( event_t event, lck_rw_t *lock, wait_interrupt_t interruptible); -extern lck_mtx_t *mutex_alloc_EXT(unsigned short tag); -extern void mutex_free_EXT(lck_mtx_t *mutex); -extern void mutex_init_EXT(lck_mtx_t *mutex, unsigned short tag); -extern void mutex_lock_EXT(lck_mtx_t *mutex); -extern boolean_t mutex_try_EXT(lck_mtx_t *mutex); -extern void mutex_unlock_EXT(lck_mtx_t *mutex); -extern wait_result_t thread_sleep_mutex_EXT( - event_t event, lck_mtx_t *mutex, wait_interrupt_t interruptible); -extern wait_result_t thread_sleep_mutex_deadline_EXT( - event_t event, lck_mtx_t *mutex, uint64_t deadline, wait_interrupt_t interruptible); - extern void usimple_lock_EXT(lck_spin_t *lock); extern void usimple_lock_init_EXT(lck_spin_t *lock, unsigned short tag); extern unsigned int usimple_lock_try_EXT(lck_spin_t *lock); extern void usimple_unlock_EXT(lck_spin_t *lock); extern wait_result_t thread_sleep_usimple_lock_EXT(event_t event, lck_spin_t *lock, wait_interrupt_t interruptible); + +lck_mtx_t* mutex_alloc_EXT(__unused unsigned short tag); +void mutex_free_EXT(lck_mtx_t *mutex); +void mutex_init_EXT(lck_mtx_t *mutex, __unused unsigned short tag); +wait_result_t thread_sleep_mutex_EXT(event_t event, lck_mtx_t *mutex, wait_interrupt_t interruptible); +wait_result_t thread_sleep_mutex_deadline_EXT(event_t event, lck_mtx_t *mutex, uint64_t deadline, wait_interrupt_t interruptible); + lck_rw_t * lock_alloc_EXT( __unused boolean_t can_sleep, @@ -1070,68 +1091,6 @@ thread_sleep_lock_write_EXT( return( lck_rw_sleep(lock, LCK_SLEEP_EXCLUSIVE, event, interruptible)); } -lck_mtx_t * -mutex_alloc_EXT( - __unused unsigned short tag) -{ - return(lck_mtx_alloc_init(&LockCompatGroup, LCK_ATTR_NULL)); -} - -void -mutex_free_EXT( - lck_mtx_t *mutex) -{ - lck_mtx_free(mutex, &LockCompatGroup); -} - -void -mutex_init_EXT( - lck_mtx_t *mutex, - __unused unsigned short tag) -{ - lck_mtx_init(mutex, &LockCompatGroup, LCK_ATTR_NULL); -} - -void -mutex_lock_EXT( - lck_mtx_t *mutex) -{ - lck_mtx_lock(mutex); -} - -boolean_t -mutex_try_EXT( - lck_mtx_t *mutex) -{ - return(lck_mtx_try_lock(mutex)); -} - -void -mutex_unlock_EXT( - lck_mtx_t *mutex) -{ - lck_mtx_unlock(mutex); -} - -wait_result_t -thread_sleep_mutex_EXT( - event_t event, - lck_mtx_t *mutex, - wait_interrupt_t interruptible) -{ - return( lck_mtx_sleep(mutex, LCK_SLEEP_DEFAULT, event, interruptible)); -} - -wait_result_t -thread_sleep_mutex_deadline_EXT( - event_t event, - lck_mtx_t *mutex, - uint64_t deadline, - wait_interrupt_t interruptible) -{ - return( lck_mtx_sleep_deadline(mutex, LCK_SLEEP_DEFAULT, event, interruptible, deadline)); -} - void usimple_lock_EXT( lck_spin_t *lock) @@ -1169,3 +1128,43 @@ thread_sleep_usimple_lock_EXT( { return( lck_spin_sleep(lock, LCK_SLEEP_DEFAULT, event, interruptible)); } +lck_mtx_t * +mutex_alloc_EXT( + __unused unsigned short tag) +{ + return(lck_mtx_alloc_init(&LockCompatGroup, LCK_ATTR_NULL)); +} + +void +mutex_free_EXT( + lck_mtx_t *mutex) +{ + lck_mtx_free(mutex, &LockCompatGroup); +} + +void +mutex_init_EXT( + lck_mtx_t *mutex, + __unused unsigned short tag) +{ + lck_mtx_init(mutex, &LockCompatGroup, LCK_ATTR_NULL); +} + +wait_result_t +thread_sleep_mutex_EXT( + event_t event, + lck_mtx_t *mutex, + wait_interrupt_t interruptible) +{ + return( lck_mtx_sleep(mutex, LCK_SLEEP_DEFAULT, event, interruptible)); +} + +wait_result_t +thread_sleep_mutex_deadline_EXT( + event_t event, + lck_mtx_t *mutex, + uint64_t deadline, + wait_interrupt_t interruptible) +{ + return( lck_mtx_sleep_deadline(mutex, LCK_SLEEP_DEFAULT, event, interruptible, deadline)); +} diff --git a/osfmk/kern/locks.h b/osfmk/kern/locks.h index b936f226a..d23fbc36e 100644 --- a/osfmk/kern/locks.h +++ b/osfmk/kern/locks.h @@ -57,8 +57,9 @@ typedef unsigned int lck_sleep_action_t; #define LCK_SLEEP_UNLOCK 0x01 /* Release the lock and return unheld */ #define LCK_SLEEP_SHARED 0x02 /* Reclaim the lock in shared mode (RW only) */ #define LCK_SLEEP_EXCLUSIVE 0x04 /* Reclaim the lock in exclusive mode (RW only) */ +#define LCK_SLEEP_SPIN 0x08 /* Reclaim the lock in spin mode (mutex only) */ -#define LCK_SLEEP_MASK 0x07 /* Valid actions */ +#define LCK_SLEEP_MASK 0x0f /* Valid actions */ #ifdef MACH_KERNEL_PRIVATE @@ -104,11 +105,11 @@ typedef struct _lck_grp_stat_ { typedef struct _lck_grp_ { queue_chain_t lck_grp_link; - unsigned int lck_grp_refcnt; - unsigned int lck_grp_spincnt; - unsigned int lck_grp_mtxcnt; - unsigned int lck_grp_rwcnt; - unsigned int lck_grp_attr; + uint32_t lck_grp_refcnt; + uint32_t lck_grp_spincnt; + uint32_t lck_grp_mtxcnt; + uint32_t lck_grp_rwcnt; + uint32_t lck_grp_attr; char lck_grp_name[LCK_GRP_MAX_NAME]; lck_grp_stat_t lck_grp_stat; } lck_grp_t; @@ -121,7 +122,7 @@ typedef struct __lck_grp__ lck_grp_t; #ifdef MACH_KERNEL_PRIVATE typedef struct _lck_grp_attr_ { - unsigned int grp_attr_val; + uint32_t grp_attr_val; } lck_grp_attr_t; extern lck_grp_attr_t LockDefaultGroupAttr; @@ -265,6 +266,10 @@ extern wait_result_t lck_spin_sleep_deadline( extern boolean_t lck_spin_try_lock( lck_spin_t *lck); +struct _lck_mtx_ext_; +extern void lck_mtx_init_ext(lck_mtx_t *lck, struct _lck_mtx_ext_ *lck_ext, + lck_grp_t *grp, lck_attr_t *attr); + #endif @@ -278,13 +283,15 @@ extern void lck_mtx_init( lck_mtx_t *lck, lck_grp_t *grp, lck_attr_t *attr); - extern void lck_mtx_lock( lck_mtx_t *lck); +#if defined(__i386__) +extern void lck_mtx_unlock(lck_mtx_t *lck) __DARWIN10_ALIAS(lck_mtx_unlock); +#else extern void lck_mtx_unlock( lck_mtx_t *lck); - +#endif /* __i386__ */ extern void lck_mtx_destroy( lck_mtx_t *lck, lck_grp_t *grp); @@ -311,7 +318,12 @@ extern wait_result_t lck_mtx_sleep_deadline( extern boolean_t lck_mtx_try_lock( lck_mtx_t *lck); -#ifdef i386 +extern void mutex_pause(uint32_t); + +extern void lck_mtx_yield ( + lck_mtx_t *lck); + +#if defined(i386) || defined(x86_64) extern boolean_t lck_mtx_try_lock_spin( lck_mtx_t *lck); @@ -354,9 +366,6 @@ extern void lck_mtx_unlockspin_wakeup( extern boolean_t lck_mtx_ilk_unlock( lck_mtx_t *lck); -struct _lck_mtx_ext_; -extern void lck_mtx_init_ext(lck_mtx_t *lck, struct _lck_mtx_ext_ *lck_ext, - lck_grp_t *grp, lck_attr_t *attr); #endif #define decl_lck_rw_data(class,name) class lck_rw_t name; diff --git a/osfmk/kern/mach_param.h b/osfmk/kern/mach_param.h index 6294d5884..1afd09bf5 100644 --- a/osfmk/kern/mach_param.h +++ b/osfmk/kern/mach_param.h @@ -69,23 +69,23 @@ #ifndef _KERN_MACH_PARAM_H_ #define _KERN_MACH_PARAM_H_ -#define THREAD_MAX CONFIG_THREAD_MAX /* Max number of threads */ -#define TASK_MAX CONFIG_TASK_MAX /* Max number of tasks */ +extern int thread_max, task_threadmax, task_max; + #define THREAD_CHUNK 64 /* Allocation chunk */ #define TASK_CHUNK 64 /* Allocation chunk */ -#define PORT_MAX ((TASK_MAX * 3 + THREAD_MAX) /* kernel */ \ - + (THREAD_MAX * 2) /* user */ \ +#define PORT_MAX ((task_max * 3 + thread_max) /* kernel */ \ + + (thread_max * 2) /* user */ \ + 40000) /* slop for objects */ /* Number of ports, system-wide */ -#define SET_MAX (TASK_MAX + THREAD_MAX + 200) +#define SET_MAX (task_max + thread_max + 200) /* Max number of port sets */ #define ITE_MAX (1 << 16) /* Max number of splay tree entries */ -#define SPACE_MAX (TASK_MAX + 5) /* Max number of IPC spaces */ +#define SPACE_MAX (task_max + 5) /* Max number of IPC spaces */ #define SEMAPHORE_MAX (PORT_MAX >> 1) /* Maximum number of semaphores */ diff --git a/osfmk/kern/machine.c b/osfmk/kern/machine.c index 898dd3bfa..72d91647e 100644 --- a/osfmk/kern/machine.c +++ b/osfmk/kern/machine.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2008 Apple Inc. All rights reserved. + * Copyright (c) 2000-2009 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -226,7 +226,7 @@ processor_shutdown( processor_doshutdown(processor); splx(s); - cpu_exit_wait(processor->cpu_num); + cpu_exit_wait(processor->cpu_id); return (KERN_SUCCESS); } @@ -293,7 +293,7 @@ processor_offline( thread_dispatch(old_thread, new_thread); - PMAP_DEACTIVATE_KERNEL(processor->cpu_num); + PMAP_DEACTIVATE_KERNEL(processor->cpu_id); pset = processor->processor_set; pset_lock(pset); diff --git a/osfmk/kern/misc_protos.h b/osfmk/kern/misc_protos.h index 3590d3c45..0b7d5a0cc 100644 --- a/osfmk/kern/misc_protos.h +++ b/osfmk/kern/misc_protos.h @@ -118,12 +118,26 @@ extern integer_t sprintf(char *buf, const char *fmt, ...) __deprecated; extern int printf(const char *format, ...) __printflike(1,2); +#if KERNEL_PRIVATE +int _consume_printf_args(int, ...); +#endif + +#if CONFIG_NO_PRINTF_STRINGS +#if KERNEL_PRIVATE +#define printf(x, ...) _consume_printf_args( 0, ## __VA_ARGS__ ) +#else +#define printf(x, ...) do {} while (0) +#endif +#endif + extern void dbugprintf(const char *format, ...) __printflike(1,2); extern int kdb_printf(const char *format, ...) __printflike(1,2); extern int kdb_log(const char *format, ...) __printflike(1,2); +extern int kdb_printf_unbuffered(const char *format, ...) __printflike(1,2); + extern void printf_init(void); extern int snprintf(char *, size_t, const char *, ...) __printflike(3,4); @@ -139,7 +153,7 @@ _doprnt( int __doprnt( register const char *fmt, - va_list *argp, + va_list argp, void (*putc)(int, void *), void *arg, int radix); @@ -156,8 +170,12 @@ extern void consdebug_putc(char); extern void consdebug_log(char); +extern void consdebug_putc_unbuffered(char); + extern void cnputc(char); +extern void cnputc_unbuffered(char); + extern int cngetc(void); extern int cnmaygetc(void); @@ -178,7 +196,6 @@ extern void delay( int n); -extern void norma_bootstrap(void); #if DIPC extern boolean_t no_bootstrap_task(void); @@ -195,14 +212,4 @@ user_addr_t get_useraddr(void); /* symbol lookup */ struct kmod_info_t; -extern int syms_formataddr( - vm_offset_t addr, - char *out, - vm_offset_t outsize); - -extern const char *syms_nameforaddr( - vm_offset_t addr, - vm_offset_t *ofs, - kmod_info_t **kmod); - #endif /* _MISC_PROTOS_H_ */ diff --git a/osfmk/kern/mk_timer.c b/osfmk/kern/mk_timer.c index 2fac290ec..2469de4f5 100644 --- a/osfmk/kern/mk_timer.c +++ b/osfmk/kern/mk_timer.c @@ -172,7 +172,7 @@ mk_timer_expire( msg.unused[0] = msg.unused[1] = msg.unused[2] = 0; - (void) mach_msg_send_from_kernel(&msg.header, sizeof (msg)); + (void) mach_msg_send_from_kernel_proper(&msg.header, sizeof (msg)); simple_lock(&timer->lock); } diff --git a/osfmk/kern/page_decrypt.c b/osfmk/kern/page_decrypt.c index f31c69908..9914fe9d0 100644 --- a/osfmk/kern/page_decrypt.c +++ b/osfmk/kern/page_decrypt.c @@ -26,50 +26,36 @@ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ */ +#include #include #include #include -/*#include */ -extern int hz; /* system clock's frequency */ - -/* #include */ -extern int tsleep(void *chan, int pri, const char *wmesg, int timo); - -/* #include */ -#define PZERO 22 /* No longer magic, shouldn't be here. XXX */ - -static int _dsmos_wait_for_callback(const void*,void*); - -static dsmos_page_transform_hook_t dsmos_hook = _dsmos_wait_for_callback; - -int -_dsmos_wait_for_callback(const void* from, void *to) -{ -/* printf("%s\n", __FUNCTION__); */ - while (dsmos_hook == NULL || dsmos_hook == _dsmos_wait_for_callback) - tsleep(&dsmos_hook, PZERO, "dsmos", hz / 10); - - return (*dsmos_hook) (from, to); -} +static dsmos_page_transform_hook_t dsmos_hook = NULL; void dsmos_page_transform_hook(dsmos_page_transform_hook_t hook) { -/* printf("%s\n", __FUNCTION__); */ + printf("DSMOS has arrived\n"); /* set the hook now - new callers will run with it */ dsmos_hook = hook; } int -dsmos_page_transform(const void* from, void *to, __unused unsigned long long src_offset, __unused void *ops) +dsmos_page_transform(const void* from, void *to, unsigned long long src_offset, void *ops) { -/* printf("%s\n", __FUNCTION__); */ - if (dsmos_hook == NULL) - return KERN_FAILURE; - return (*dsmos_hook) (from, to); + static boolean_t first_wait = TRUE; + + if (dsmos_hook == NULL) { + if (first_wait) { + first_wait = FALSE; + printf("Waiting for DSMOS...\n"); + } + return KERN_ABORTED; + } + return (*dsmos_hook) (from, to, src_offset, ops); } @@ -77,4 +63,4 @@ text_crypter_create_hook_t text_crypter_create=NULL; void text_crypter_create_hook_set(text_crypter_create_hook_t hook) { text_crypter_create=hook; -}; +} diff --git a/osfmk/kern/page_decrypt.h b/osfmk/kern/page_decrypt.h index f00202dff..ad81beef3 100644 --- a/osfmk/kern/page_decrypt.h +++ b/osfmk/kern/page_decrypt.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2005-2006 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2005-2008 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -26,13 +26,15 @@ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ */ +#ifdef KERNEL_PRIVATE + #ifndef _KERN_PAGE_DECRYPT_H #define _KERN_PAGE_DECRYPT_H /* * Interface for DSMOS */ -typedef int (*dsmos_page_transform_hook_t) (const void *,void*); +typedef int (*dsmos_page_transform_hook_t) (const void *,void*, unsigned long long, void *); extern void dsmos_page_transform_hook(dsmos_page_transform_hook_t hook); /* exported */ extern int dsmos_page_transform(const void *,void*, unsigned long long, void*); @@ -55,11 +57,8 @@ typedef struct pager_crypt_info pager_crypt_info_t; typedef int (*text_crypter_create_hook_t)(struct pager_crypt_info *crypt_info, const char *id, void *crypt_data); extern void text_crypter_create_hook_set(text_crypter_create_hook_t hook); -//extern kern_return_t text_crypter_create(pager_crypt_info_t *crypt_info, const char *id, -// void *crypt_data); extern text_crypter_create_hook_t text_crypter_create; #endif /* _KERN_PAGE_DECRYPT_H */ - - +#endif /* KERNEL_PRIVATE */ diff --git a/osfmk/kern/printf.c b/osfmk/kern/printf.c index f8376b419..fd04f883a 100644 --- a/osfmk/kern/printf.c +++ b/osfmk/kern/printf.c @@ -94,9 +94,7 @@ * %0m.n zero-padding * %*.* width and precision taken from arguments * - * This version does not implement %f, %e, or %g. It accepts, but - * ignores, an `l' as in %ld, %lo, %lx, and %lu, and therefore will not - * work correctly on machines for which sizeof(long) != sizeof(int). + * This version does not implement %f, %e, or %g. * * As mentioned, this version does not return any reasonable value. * @@ -186,9 +184,18 @@ static char digs[] = "0123456789abcdef"; #if CONFIG_NO_PRINTF_STRINGS -#undef printf(x, ...) +/* Prevent CPP from breaking the definition below */ +#undef printf #endif +int _consume_printf_args(int a __unused, ...) +{ + return 0; +} +void _consume_kprintf_args(int a __unused, ...) +{ +} + static int printnum( unsigned long long int u, /* number to print */ @@ -218,7 +225,7 @@ boolean_t _doprnt_truncates = FALSE; int __doprnt( const char *fmt, - va_list *argp, + va_list argp, /* character output routine */ void (*putc)(int, void *arg), void *arg, @@ -290,7 +297,7 @@ __doprnt( } } else if (c == '*') { - length = va_arg(*argp, int); + length = va_arg(argp, int); c = *++fmt; if (length < 0) { ladjust = !ladjust; @@ -308,13 +315,15 @@ __doprnt( } } else if (c == '*') { - prec = va_arg(*argp, int); + prec = va_arg(argp, int); c = *++fmt; } } if (c == 'l') { c = *++fmt; /* need it if sizeof(int) < sizeof(long) */ + if (sizeof(int)= 0) { u = n; @@ -547,9 +559,9 @@ __doprnt( print_unsigned: if (long_long) { - u = va_arg(*argp, unsigned long long); + u = va_arg(argp, unsigned long long); } else { - u = va_arg(*argp, unsigned long); + u = va_arg(argp, unsigned int); } goto print_num; @@ -575,11 +587,11 @@ __doprnt( u /= base; } while (u != 0); - length -= (&buf[MAXBUF-1] - p); + length -= (int)(&buf[MAXBUF-1] - p); if (sign_char) length--; if (prefix) - length -= strlen(prefix); + length -= (int)strlen(prefix); if (padc == ' ' && !ladjust) { /* blank padding goes before prefix */ @@ -649,7 +661,7 @@ _doprnt( void (*putc)(char), int radix) /* default radix - for '%r' */ { - __doprnt(fmt, argp, dummy_putc, putc, radix); + __doprnt(fmt, *argp, dummy_putc, putc, radix); } #if MP_PRINTF @@ -796,6 +808,18 @@ consdebug_putc(char c) PE_kputc(c); } +void +consdebug_putc_unbuffered(char c) +{ + if ((debug_mode && !disable_debug_output) || !disableConsoleOutput) + cnputc_unbuffered(c); + + debug_putc(c); + + if (!console_is_serial()) + if (!disable_serial_output) + PE_kputc(c); +} void consdebug_log(char c) @@ -825,6 +849,17 @@ kdb_log(const char *fmt, ...) return 0; } +int +kdb_printf_unbuffered(const char *fmt, ...) +{ + va_list listp; + + va_start(listp, fmt); + _doprnt(fmt, &listp, consdebug_putc_unbuffered, 16); + va_end(listp); + return 0; +} + static void copybyte(int c, void *arg) { @@ -851,8 +886,8 @@ sprintf(char *buf, const char *fmt, ...) va_start(listp, fmt); copybyte_str = buf; - __doprnt(fmt, &listp, copybyte, ©byte_str, 16); + __doprnt(fmt, listp, copybyte, ©byte_str, 16); va_end(listp); *copybyte_str = '\0'; - return strlen(buf); + return (int)strlen(buf); } diff --git a/osfmk/kern/priority.c b/osfmk/kern/priority.c index bc0e89a5c..13a3d2e81 100644 --- a/osfmk/kern/priority.c +++ b/osfmk/kern/priority.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2008 Apple Inc. All rights reserved. + * Copyright (c) 2000-2009 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * diff --git a/osfmk/kern/processor.c b/osfmk/kern/processor.c index 9436505b3..341069724 100644 --- a/osfmk/kern/processor.c +++ b/osfmk/kern/processor.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2008 Apple Inc. All rights reserved. + * Copyright (c) 2000-2009 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -92,7 +92,7 @@ queue_head_t tasks; int tasks_count; queue_head_t threads; int threads_count; -decl_mutex_data(,tasks_threads_lock) +decl_lck_mtx_data(,tasks_threads_lock) processor_t processor_list; unsigned int processor_count; @@ -101,8 +101,8 @@ decl_simple_lock_data(,processor_list_lock) uint32_t processor_avail_count; -processor_t master_processor; -int master_cpu = 0; +processor_t master_processor; +int master_cpu = 0; /* Forwards */ kern_return_t processor_set_things( @@ -119,7 +119,6 @@ processor_bootstrap(void) simple_lock_init(&pset_node_lock, 0); - mutex_init(&tasks_threads_lock, 0); queue_init(&tasks); queue_init(&threads); @@ -132,13 +131,13 @@ processor_bootstrap(void) /* * Initialize the given processor for the cpu - * indicated by cpu_num, and assign to the + * indicated by cpu_id, and assign to the * specified processor set. */ void processor_init( processor_t processor, - int cpu_num, + int cpu_id, processor_set_t pset) { run_queue_init(&processor->runq); @@ -147,12 +146,12 @@ processor_init( processor->active_thread = processor->next_thread = processor->idle_thread = THREAD_NULL; processor->processor_set = pset; processor->current_pri = MINPRI; - processor->cpu_num = cpu_num; + processor->cpu_id = cpu_id; timer_call_setup(&processor->quantum_timer, thread_quantum_expire, processor); processor->deadline = UINT64_MAX; processor->timeslice = 0; + processor->processor_meta = PROCESSOR_META_NULL; processor->processor_self = IP_NULL; - simple_lock_init(&processor->lock, 0); processor_data_init(processor); processor->processor_list = NULL; @@ -166,6 +165,24 @@ processor_init( simple_unlock(&processor_list_lock); } +void +processor_meta_init( + processor_t processor, + processor_t primary) +{ + processor_meta_t pmeta = primary->processor_meta; + + if (pmeta == PROCESSOR_META_NULL) { + pmeta = kalloc(sizeof (*pmeta)); + + queue_init(&pmeta->idle_queue); + + pmeta->primary = primary; + } + + processor->processor_meta = pmeta; +} + processor_set_t processor_pset( processor_t processor) @@ -252,13 +269,13 @@ processor_info( processor_info_t info, mach_msg_type_number_t *count) { - register int cpu_num, state; + register int cpu_id, state; kern_return_t result; if (processor == PROCESSOR_NULL) return (KERN_INVALID_ARGUMENT); - cpu_num = processor->cpu_num; + cpu_id = processor->cpu_id; switch (flavor) { @@ -270,14 +287,14 @@ processor_info( return (KERN_FAILURE); basic_info = (processor_basic_info_t) info; - basic_info->cpu_type = slot_type(cpu_num); - basic_info->cpu_subtype = slot_subtype(cpu_num); + basic_info->cpu_type = slot_type(cpu_id); + basic_info->cpu_subtype = slot_subtype(cpu_id); state = processor->state; if (state == PROCESSOR_OFF_LINE) basic_info->running = FALSE; else basic_info->running = TRUE; - basic_info->slot_num = cpu_num; + basic_info->slot_num = cpu_id; if (processor == master_processor) basic_info->is_master = TRUE; else @@ -298,11 +315,11 @@ processor_info( cpu_load_info = (processor_cpu_load_info_t) info; cpu_load_info->cpu_ticks[CPU_STATE_USER] = - timer_grab(&PROCESSOR_DATA(processor, user_state)) / hz_tick_interval; + (uint32_t)(timer_grab(&PROCESSOR_DATA(processor, user_state)) / hz_tick_interval); cpu_load_info->cpu_ticks[CPU_STATE_SYSTEM] = - timer_grab(&PROCESSOR_DATA(processor, system_state)) / hz_tick_interval; + (uint32_t)(timer_grab(&PROCESSOR_DATA(processor, system_state)) / hz_tick_interval); cpu_load_info->cpu_ticks[CPU_STATE_IDLE] = - timer_grab(&PROCESSOR_DATA(processor, idle_state)) / hz_tick_interval; + (uint32_t)(timer_grab(&PROCESSOR_DATA(processor, idle_state)) / hz_tick_interval); cpu_load_info->cpu_ticks[CPU_STATE_NICE] = 0; *count = PROCESSOR_CPU_LOAD_INFO_COUNT; @@ -312,7 +329,7 @@ processor_info( } default: - result = cpu_info(flavor, cpu_num, info, count); + result = cpu_info(flavor, cpu_id, info, count); if (result == KERN_SUCCESS) *host = &realhost; @@ -338,7 +355,7 @@ processor_start( prev = thread_bind(processor); thread_block(THREAD_CONTINUE_NULL); - result = cpu_start(processor->cpu_num); + result = cpu_start(processor->cpu_id); thread_bind(prev); @@ -407,7 +424,7 @@ processor_start( if (processor->processor_self == IP_NULL) ipc_processor_init(processor); - result = cpu_start(processor->cpu_num); + result = cpu_start(processor->cpu_id); if (result != KERN_SUCCESS) { s = splsched(); pset_lock(pset); @@ -442,7 +459,7 @@ processor_control( if (processor == PROCESSOR_NULL) return(KERN_INVALID_ARGUMENT); - return(cpu_control(processor->cpu_num, info, count)); + return(cpu_control(processor->cpu_id, info, count)); } kern_return_t @@ -710,7 +727,7 @@ processor_set_things( addr = NULL; for (;;) { - mutex_lock(&tasks_threads_lock); + lck_mtx_lock(&tasks_threads_lock); if (type == THING_TASK) maxthings = tasks_count; @@ -724,7 +741,7 @@ processor_set_things( break; /* unlock and allocate more memory */ - mutex_unlock(&tasks_threads_lock); + lck_mtx_unlock(&tasks_threads_lock); if (size != 0) kfree(addr, size); @@ -776,7 +793,7 @@ processor_set_things( } - mutex_unlock(&tasks_threads_lock); + lck_mtx_unlock(&tasks_threads_lock); if (actual < maxthings) size_needed = actual * sizeof (mach_port_t); diff --git a/osfmk/kern/processor.h b/osfmk/kern/processor.h index 24603cc11..fcf61d044 100644 --- a/osfmk/kern/processor.h +++ b/osfmk/kern/processor.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2008 Apple Inc. All rights reserved. + * Copyright (c) 2000-2009 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -113,7 +113,15 @@ extern struct pset_node pset_node0; extern queue_head_t tasks, threads; extern int tasks_count, threads_count; -decl_mutex_data(extern,tasks_threads_lock) +decl_lck_mtx_data(extern,tasks_threads_lock) + +struct processor_meta { + queue_head_t idle_queue; + processor_t primary; +}; + +typedef struct processor_meta *processor_meta_t; +#define PROCESSOR_META_NULL ((processor_meta_t) 0) struct processor { queue_chain_t processor_queue;/* idle/active queue link, @@ -127,7 +135,7 @@ struct processor { processor_set_t processor_set; /* assigned set */ int current_pri; /* priority of current thread */ - int cpu_num; /* platform numeric id */ + int cpu_id; /* platform numeric id */ timer_call_data_t quantum_timer; /* timer for quantum expiration */ uint64_t quantum_end; /* time when current quantum ends */ @@ -137,9 +145,9 @@ struct processor { int timeslice; /* quanta before timeslice ends */ struct run_queue runq; /* runq for this processor */ + processor_meta_t processor_meta; struct ipc_port * processor_self; /* port for operations */ - decl_simple_lock_data(,lock) processor_t processor_list; /* all existing processors */ processor_data_t processor_data; /* per-processor data */ @@ -176,10 +184,6 @@ extern processor_t cpu_to_processor( #define pset_unlock(p) simple_unlock(&(p)->sched_lock) #define pset_lock_init(p) simple_lock_init(&(p)->sched_lock, 0) -#define processor_lock(p) simple_lock(&(p)->lock) -#define processor_unlock(p) simple_unlock(&(p)->lock) -#define processor_lock_init(p) simple_lock_init(&(p)->lock, 0) - /* Update hints */ #define pset_pri_hint(ps, p, pri) \ @@ -208,9 +212,13 @@ extern void processor_bootstrap(void) __attribute__((section("__TEXT, initcode" extern void processor_init( processor_t processor, - int cpu_num, + int cpu_id, processor_set_t processor_set) __attribute__((section("__TEXT, initcode"))); +extern void processor_meta_init( + processor_t processor, + processor_t primary); + extern kern_return_t processor_shutdown( processor_t processor); @@ -236,11 +244,11 @@ extern kern_return_t processor_info_count( #define pset_deallocate(x) #define pset_reference(x) -extern void machine_run_count( - uint32_t count); +extern void machine_run_count( + uint32_t count); extern boolean_t machine_cpu_is_inactive( - int num); + int cpu_id); #else /* MACH_KERNEL_PRIVATE */ diff --git a/osfmk/kern/processor_data.h b/osfmk/kern/processor_data.h index 0e3f64705..200ec35f3 100644 --- a/osfmk/kern/processor_data.h +++ b/osfmk/kern/processor_data.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2003-2008 Apple Inc. All rights reserved. + * Copyright (c) 2003-2009 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -60,7 +60,7 @@ struct processor_data { } stack_cache; /* VM event counters */ - vm_statistics_data_t vm_stat; + vm_statistics64_data_t vm_stat; /* IPC free message cache */ struct ikm_cache { diff --git a/osfmk/kern/queue.c b/osfmk/kern/queue.c index 0de366e14..06eba9ebc 100644 --- a/osfmk/kern/queue.c +++ b/osfmk/kern/queue.c @@ -219,13 +219,12 @@ insque( pred->next = entry; } -int +void remque( register queue_entry_t elt) { (elt->next)->prev = elt->prev; (elt->prev)->next = elt->next; - return((int)elt); } #endif diff --git a/osfmk/kern/queue.h b/osfmk/kern/queue.h index 158150719..d0bab0c61 100644 --- a/osfmk/kern/queue.h +++ b/osfmk/kern/queue.h @@ -146,7 +146,7 @@ extern void insque( queue_entry_t pred); /* Dequeue element */ -extern int remque( +extern void remque( queue_entry_t elt); __END_DECLS @@ -225,14 +225,12 @@ insque( pred->next = entry; } -static __inline__ integer_t +static __inline__ void remque( register queue_entry_t elt) { (elt->next)->prev = elt->prev; (elt->prev)->next = elt->next; - - return((integer_t)elt); } #endif /* !__GNUC__ */ diff --git a/osfmk/kern/sched.h b/osfmk/kern/sched.h index 088e84c3d..e1e5ae4c0 100644 --- a/osfmk/kern/sched.h +++ b/osfmk/kern/sched.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2008 Apple Inc. All rights reserved. + * Copyright (c) 2000-2009 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -227,6 +227,9 @@ extern void compute_averunnable( extern void compute_stack_target( void *arg); +extern void compute_memory_pressure( + void *arg); + /* * Conversion factor from usage * to priority. @@ -271,9 +274,9 @@ MACRO_END */ #define thread_timer_delta(thread, delta) \ MACRO_BEGIN \ - (delta) = timer_delta(&(thread)->system_timer, \ + (delta) = (typeof(delta))timer_delta(&(thread)->system_timer, \ &(thread)->system_timer_save); \ - (delta) += timer_delta(&(thread)->user_timer, \ + (delta) += (typeof(delta))timer_delta(&(thread)->user_timer, \ &(thread)->user_timer_save); \ MACRO_END diff --git a/osfmk/kern/sched_average.c b/osfmk/kern/sched_average.c index e1c02d7a0..e20ddff73 100644 --- a/osfmk/kern/sched_average.c +++ b/osfmk/kern/sched_average.c @@ -102,6 +102,7 @@ static struct sched_average { } sched_average[] = { { compute_averunnable, &sched_nrun, SCHED_AVG_SECS(5), 0 }, { compute_stack_target, NULL, SCHED_AVG_SECS(5), 1 }, + { compute_memory_pressure, NULL, SCHED_AVG_SECS(1), 0 }, { NULL, NULL, 0, 0 } }; diff --git a/osfmk/kern/sched_prim.c b/osfmk/kern/sched_prim.c index 4e281607d..ca4a1354f 100644 --- a/osfmk/kern/sched_prim.c +++ b/osfmk/kern/sched_prim.c @@ -101,6 +101,8 @@ #include #include +#include + #include #include @@ -145,8 +147,6 @@ uint32_t sched_run_count, sched_share_count; uint32_t sched_load_average, sched_mach_factor; /* Forwards */ -void wait_queues_init(void) __attribute__((section("__TEXT, initcode"))); - static void load_shift_init(void) __attribute__((section("__TEXT, initcode"))); static void preempt_pri_init(void) __attribute__((section("__TEXT, initcode"))); @@ -154,6 +154,10 @@ static thread_t run_queue_dequeue( run_queue_t runq, integer_t options); +static thread_t choose_thread( + processor_t processor, + int priority); + static thread_t thread_select_idle( thread_t thread, processor_t processor); @@ -210,41 +214,8 @@ boolean_t thread_runnable( * */ -/* - * Waiting protocols and implementation: - * - * Each thread may be waiting for exactly one event; this event - * is set using assert_wait(). That thread may be awakened either - * by performing a thread_wakeup_prim() on its event, - * or by directly waking that thread up with clear_wait(). - * - * The implementation of wait events uses a hash table. Each - * bucket is queue of threads having the same hash function - * value; the chain for the queue (linked list) is the run queue - * field. [It is not possible to be waiting and runnable at the - * same time.] - * - * Locks on both the thread and on the hash buckets govern the - * wait event field and the queue chain field. Because wakeup - * operations only have the event as an argument, the event hash - * bucket must be locked before any thread. - * - * Scheduling operations may also occur at interrupt level; therefore, - * interrupts below splsched() must be prevented when holding - * thread or hash bucket locks. - * - * The wait event hash table declarations are as follows: - */ - -#define NUMQUEUES 59 - -struct wait_queue wait_queues[NUMQUEUES]; - -#define wait_hash(event) \ - ((((int)(event) < 0)? ~(int)(event): (int)(event)) % NUMQUEUES) - int8_t sched_load_shifts[NRQS]; -int sched_preempt_pri[NRQBM]; +int sched_preempt_pri[NRQBM]; void sched_init(void) @@ -262,7 +233,6 @@ sched_init(void) sched_safe_duration = (2 * max_unsafe_quanta / default_preemption_rate) * (1 << SCHED_TICK_SHIFT); - wait_queues_init(); load_shift_init(); preempt_pri_init(); simple_lock_init(&rt_lock, 0); @@ -281,29 +251,29 @@ sched_timebase_init(void) clock_interval_to_absolutetime_interval( std_quantum_us, NSEC_PER_USEC, &abstime); assert((abstime >> 32) == 0 && (uint32_t)abstime != 0); - std_quantum = abstime; + std_quantum = (uint32_t)abstime; /* smallest remaining quantum (250 us) */ clock_interval_to_absolutetime_interval(250, NSEC_PER_USEC, &abstime); assert((abstime >> 32) == 0 && (uint32_t)abstime != 0); - min_std_quantum = abstime; + min_std_quantum = (uint32_t)abstime; /* smallest rt computaton (50 us) */ clock_interval_to_absolutetime_interval(50, NSEC_PER_USEC, &abstime); assert((abstime >> 32) == 0 && (uint32_t)abstime != 0); - min_rt_quantum = abstime; + min_rt_quantum = (uint32_t)abstime; /* maximum rt computation (50 ms) */ clock_interval_to_absolutetime_interval( 50, 1000*NSEC_PER_USEC, &abstime); assert((abstime >> 32) == 0 && (uint32_t)abstime != 0); - max_rt_quantum = abstime; + max_rt_quantum = (uint32_t)abstime; /* scheduler tick interval */ clock_interval_to_absolutetime_interval(USEC_PER_SEC >> SCHED_TICK_SHIFT, NSEC_PER_USEC, &abstime); assert((abstime >> 32) == 0 && (uint32_t)abstime != 0); - sched_tick_interval = abstime; + sched_tick_interval = (uint32_t)abstime; /* * Compute conversion factor from usage to @@ -318,16 +288,6 @@ sched_timebase_init(void) max_poll_computation = max_poll_quanta * std_quantum; } -void -wait_queues_init(void) -{ - register int i; - - for (i = 0; i < NUMQUEUES; i++) { - wait_queue_init(&wait_queues[i], SYNC_POLICY_FIFO); - } -} - /* * Set up values for timeshare * loading factors. @@ -381,6 +341,8 @@ thread_timer_expire( splx(s); } +#ifndef __LP64__ + /* * thread_set_timer: * @@ -444,6 +406,8 @@ thread_cancel_timer(void) splx(s); } +#endif /* __LP64__ */ + /* * thread_unblock: * @@ -523,7 +487,9 @@ thread_unblock( KERNEL_DEBUG_CONSTANT( MACHDBG_CODE(DBG_MACH_SCHED,MACH_MAKE_RUNNABLE) | DBG_FUNC_NONE, - (int)thread, (int)thread->sched_pri, 0, 0, 0); + (uintptr_t)thread_tid(thread), thread->sched_pri, 0, 0, 0); + + DTRACE_SCHED2(wakeup, struct thread *, thread, struct proc *, thread->task->bsd_info); return (result); } @@ -575,6 +541,8 @@ thread_mark_wait_locked( { boolean_t at_safe_point; + assert(thread == current_thread()); + /* * The thread may have certain types of interrupts/aborts masked * off. Even if the wait location says these types of interrupts @@ -590,6 +558,9 @@ thread_mark_wait_locked( !(thread->sched_mode & TH_MODE_ABORT) || (!at_safe_point && (thread->sched_mode & TH_MODE_ABORTSAFELY))) { + + DTRACE_SCHED(sleep); + thread->state |= (interruptible) ? TH_WAIT : (TH_WAIT | TH_UNINT); thread->at_safe_point = at_safe_point; return (thread->wait_result = THREAD_WAITING); @@ -691,7 +662,7 @@ assert_wait_timeout( thread_lock(thread); clock_interval_to_deadline(interval, scale_factor, &deadline); - wresult = wait_queue_assert_wait64_locked(wqueue, (uint32_t)event, + wresult = wait_queue_assert_wait64_locked(wqueue, CAST_DOWN(event64_t, event), interruptible, deadline, thread); thread_unlock(thread); @@ -719,7 +690,7 @@ assert_wait_deadline( wait_queue_lock(wqueue); thread_lock(thread); - wresult = wait_queue_assert_wait64_locked(wqueue, (uint32_t)event, + wresult = wait_queue_assert_wait64_locked(wqueue, CAST_DOWN(event64_t,event), interruptible, deadline, thread); thread_unlock(thread); @@ -784,57 +755,6 @@ thread_sleep_usimple_lock( return res; } -/* - * thread_sleep_mutex: - * - * Cause the current thread to wait until the specified event - * occurs. The specified mutex is unlocked before releasing - * the cpu. The mutex will be re-acquired before returning. - * - * JMM - Add hint to make sure mutex is available before rousting - */ -wait_result_t -thread_sleep_mutex( - event_t event, - mutex_t *mutex, - wait_interrupt_t interruptible) -{ - wait_result_t res; - - res = assert_wait(event, interruptible); - if (res == THREAD_WAITING) { - mutex_unlock(mutex); - res = thread_block(THREAD_CONTINUE_NULL); - mutex_lock(mutex); - } - return res; -} - -/* - * thread_sleep_mutex_deadline: - * - * Cause the current thread to wait until the specified event - * (or deadline) occurs. The specified mutex is unlocked before - * releasing the cpu. The mutex will be re-acquired before returning. - */ -wait_result_t -thread_sleep_mutex_deadline( - event_t event, - mutex_t *mutex, - uint64_t deadline, - wait_interrupt_t interruptible) -{ - wait_result_t res; - - res = assert_wait_deadline(event, interruptible, deadline); - if (res == THREAD_WAITING) { - mutex_unlock(mutex); - res = thread_block(THREAD_CONTINUE_NULL); - mutex_lock(mutex); - } - return res; -} - /* * thread_sleep_lock_write: * @@ -1171,7 +1091,7 @@ thread_select( { processor_set_t pset = processor->processor_set; thread_t new_thread = THREAD_NULL; - boolean_t other_runnable, inactive_state; + boolean_t inactive_state; do { /* @@ -1184,22 +1104,20 @@ thread_select( pset_lock(pset); - inactive_state = processor->state != PROCESSOR_SHUTDOWN && machine_cpu_is_inactive(processor->cpu_num); + inactive_state = processor->state != PROCESSOR_SHUTDOWN && machine_cpu_is_inactive(processor->cpu_id); simple_lock(&rt_lock); - /* - * Check for other runnable threads. - */ - other_runnable = processor->runq.count > 0 || rt_runq.count > 0; - /* * Test to see if the current thread should continue * to run on this processor. Must be runnable, and not * bound to a different processor, nor be in the wrong * processor set. */ - if ( thread->state == TH_RUN && + if ( thread->state == TH_RUN && + (thread->sched_pri >= BASEPRI_RTQUEUES || + processor->processor_meta == PROCESSOR_META_NULL || + processor->processor_meta->primary == processor) && (thread->bound_processor == PROCESSOR_NULL || thread->bound_processor == processor) && (thread->affinity_set == AFFINITY_SET_NULL || @@ -1236,10 +1154,8 @@ thread_select( return (thread); } - if (!inactive_state && - (!other_runnable || - (processor->runq.highq < thread->sched_pri && - rt_runq.highq < thread->sched_pri)) ) { + if (!inactive_state && rt_runq.highq < thread->sched_pri && + (new_thread = choose_thread(processor, thread->sched_pri)) == THREAD_NULL) { simple_unlock(&rt_lock); @@ -1257,14 +1173,13 @@ thread_select( } } - if (other_runnable) { - if (processor->runq.count > 0 && processor->runq.highq >= rt_runq.highq) { + if (new_thread != THREAD_NULL || + (processor->runq.highq >= rt_runq.highq && + (new_thread = choose_thread(processor, MINPRI)) != THREAD_NULL)) { simple_unlock(&rt_lock); - thread = run_queue_dequeue(&processor->runq, SCHED_HEADQ); - if (!inactive_state) { - pset_pri_hint(pset, processor, thread->sched_pri); + pset_pri_hint(pset, processor, new_thread->sched_pri); pset_count_hint(pset, processor, processor->runq.count); } @@ -1272,9 +1187,10 @@ thread_select( processor->deadline = UINT64_MAX; pset_unlock(pset); - return (thread); - } + return (new_thread); + } + if (rt_runq.count > 0) { thread = run_queue_dequeue(&rt_runq, SCHED_HEADQ); simple_unlock(&rt_lock); @@ -1288,6 +1204,10 @@ thread_select( processor->deadline = UINT64_MAX; + /* + * Set processor inactive based on + * indication from the platform code. + */ if (inactive_state) { if (processor->state == PROCESSOR_RUNNING) remqueue(&pset->active_queue, (queue_entry_t)processor); @@ -1327,8 +1247,19 @@ thread_select( remqueue(&pset->active_queue, (queue_entry_t)processor); processor->state = PROCESSOR_IDLE; - enqueue_head(&pset->idle_queue, (queue_entry_t)processor); - pset->low_pri = pset->low_count = processor; + if (processor->processor_meta == PROCESSOR_META_NULL || processor->processor_meta->primary == processor) { + enqueue_head(&pset->idle_queue, (queue_entry_t)processor); + pset->low_pri = pset->low_count = processor; + } + else { + enqueue_head(&processor->processor_meta->idle_queue, (queue_entry_t)processor); + + if (thread->sched_pri < BASEPRI_RTQUEUES) { + pset_unlock(pset); + + return (processor->idle_thread); + } + } } pset_unlock(pset); @@ -1429,6 +1360,57 @@ thread_select_idle( return (new_thread); } +/* + * choose_thread: + * + * Locate a thread to execute from the processor run queue + * and return it. Only choose a thread with greater or equal + * priority. + * + * Associated pset must be locked. Returns THREAD_NULL + * on failure. + */ +static thread_t +choose_thread( + processor_t processor, + int priority) +{ + run_queue_t rq = &processor->runq; + queue_t queue = rq->queues + rq->highq; + int pri = rq->highq, count = rq->count; + thread_t thread; + + while (count > 0 && pri >= priority) { + thread = (thread_t)queue_first(queue); + while (!queue_end(queue, (queue_entry_t)thread)) { + if (thread->bound_processor == PROCESSOR_NULL || + thread->bound_processor == processor) { + remqueue(queue, (queue_entry_t)thread); + + thread->runq = PROCESSOR_NULL; + rq->count--; + if (testbit(pri, sched_preempt_pri)) { + rq->urgency--; assert(rq->urgency >= 0); + } + if (queue_empty(queue)) { + if (pri != IDLEPRI) + clrbit(MAXPRI - pri, rq->bitmap); + rq->highq = MAXPRI - ffsbit(rq->bitmap); + } + + return (thread); + } + count--; + + thread = (thread_t)queue_next((queue_entry_t)thread); + } + + queue--; pri--; + } + + return (THREAD_NULL); +} + /* * Perform a context switch and start executing the new thread. * @@ -1473,9 +1455,12 @@ thread_invoke( void *parameter = self->parameter; processor_t processor; - if (get_preemption_level() != 0) - panic("thread_invoke: preemption_level %d\n", - get_preemption_level()); + if (get_preemption_level() != 0) { + int pl = get_preemption_level(); + panic("thread_invoke: preemption_level %d, possible cause: %s", + pl, (pl < 0 ? "unlocking an unlocked mutex or spinlock" : + "blocking while holding a spinlock, or within interrupt context")); + } assert(self == current_thread()); @@ -1532,11 +1517,15 @@ thread_invoke( PROCESSOR_DATA(processor, kernel_timer) = &thread->system_timer; KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_SCHED, MACH_STACK_HANDOFF)|DBG_FUNC_NONE, - self->reason, (int)thread, self->sched_pri, thread->sched_pri, 0); + self->reason, (uintptr_t)thread_tid(thread), self->sched_pri, thread->sched_pri, 0); -TLOG(1, "thread_invoke: calling machine_stack_handoff\n"); + DTRACE_SCHED2(off__cpu, struct thread *, thread, struct proc *, thread->task->bsd_info); + + TLOG(1, "thread_invoke: calling machine_stack_handoff\n"); machine_stack_handoff(self, thread); + DTRACE_SCHED(on__cpu); + thread_dispatch(self, thread); thread->continuation = thread->parameter = NULL; @@ -1612,7 +1601,9 @@ TLOG(1, "thread_invoke: calling machine_stack_handoff\n"); PROCESSOR_DATA(processor, kernel_timer) = &thread->system_timer; KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_SCHED,MACH_SCHED) | DBG_FUNC_NONE, - (int)self->reason, (int)thread, self->sched_pri, thread->sched_pri, 0); + self->reason, (uintptr_t)thread_tid(thread), self->sched_pri, thread->sched_pri, 0); + + DTRACE_SCHED2(off__cpu, struct thread *, thread, struct proc *, thread->task->bsd_info); /* * This is where we actually switch register context, @@ -1620,7 +1611,9 @@ TLOG(1, "thread_invoke: calling machine_stack_handoff\n"); * as a result of a subsequent context switch. */ thread = machine_switch_context(self, continuation, thread); -TLOG(1,"thread_invoke: returning machine_switch_context: self %p continuation %p thread %p\n", self, continuation, thread); + TLOG(1,"thread_invoke: returning machine_switch_context: self %p continuation %p thread %p\n", self, continuation, thread); + + DTRACE_SCHED(on__cpu); /* * We have been resumed and are set to run. @@ -1674,7 +1667,7 @@ thread_dispatch( */ if ( first_timeslice(processor) && processor->quantum_end > processor->last_dispatch ) - thread->current_quantum = (processor->quantum_end - processor->last_dispatch); + thread->current_quantum = (uint32_t)(processor->quantum_end - processor->last_dispatch); else thread->current_quantum = 0; @@ -1710,9 +1703,7 @@ thread_dispatch( thread->current_quantum = 0; } - thread->last_switch = processor->last_dispatch; - - thread->computation_metered += (thread->last_switch - thread->computation_epoch); + thread->computation_metered += (processor->last_dispatch - thread->computation_epoch); if (!(thread->state & TH_WAIT)) { /* @@ -1775,9 +1766,7 @@ thread_dispatch( processor->timeslice = 1; - self->last_switch = processor->last_dispatch; - - self->computation_epoch = self->last_switch; + self->computation_epoch = processor->last_dispatch; } else { timer_call_cancel(&processor->quantum_timer); @@ -1785,6 +1774,11 @@ thread_dispatch( } } +#include + +uint32_t kdebug_thread_block = 0; + + /* * thread_block_reason: * @@ -1828,6 +1822,15 @@ thread_block_reason( self->continuation = continuation; self->parameter = parameter; + if (kdebug_thread_block && kdebug_enable && self->state != TH_RUN) { + uint32_t bt[8]; + + OSBacktrace((void **)&bt[0], 8); + + KERNEL_DEBUG_CONSTANT(0x140004c | DBG_FUNC_START, bt[0], bt[1], bt[2], bt[3], 0); + KERNEL_DEBUG_CONSTANT(0x140004c | DBG_FUNC_END, bt[4], bt[5], bt[6], bt[7], 0); + } + do { thread_lock(self); new_thread = thread_select(self, processor); @@ -1911,7 +1914,9 @@ thread_continue( register thread_t self = current_thread(); register thread_continue_t continuation; register void *parameter; - + + DTRACE_SCHED(on__cpu); + continuation = self->continuation; parameter = self->parameter; @@ -2241,16 +2246,22 @@ choose_processor( { processor_set_t nset, cset = pset; processor_t processor = thread->last_processor; + processor_meta_t pmeta = PROCESSOR_META_NULL; /* * Prefer the last processor, when appropriate. */ if (processor != PROCESSOR_NULL) { + if (thread->sched_pri < BASEPRI_RTQUEUES && processor->processor_meta != PROCESSOR_META_NULL && + processor->processor_meta->primary->state == PROCESSOR_IDLE) + processor = processor->processor_meta->primary; + if (processor->processor_set != pset || processor->state == PROCESSOR_INACTIVE || processor->state == PROCESSOR_SHUTDOWN || processor->state == PROCESSOR_OFF_LINE) processor = PROCESSOR_NULL; else - if (processor->state == PROCESSOR_IDLE || ( thread->sched_pri > BASEPRI_DEFAULT && processor->current_pri < thread->sched_pri)) + if (processor->state == PROCESSOR_IDLE || + (thread->sched_pri > BASEPRI_DEFAULT && processor->current_pri < thread->sched_pri)) return (processor); } @@ -2275,9 +2286,18 @@ choose_processor( thread->realtime.deadline < processor->deadline) return (processor); + if (pmeta == PROCESSOR_META_NULL) { + if (processor->processor_meta != PROCESSOR_META_NULL && + !queue_empty(&processor->processor_meta->idle_queue)) + pmeta = processor->processor_meta; + } + processor = (processor_t)queue_next((queue_entry_t)processor); } + if (pmeta != PROCESSOR_META_NULL) + return ((processor_t)queue_first(&pmeta->idle_queue)); + processor = PROCESSOR_NULL; } else { @@ -2293,8 +2313,8 @@ choose_processor( else if (cset->low_count != PROCESSOR_NULL && cset->low_count->state != PROCESSOR_INACTIVE && cset->low_count->state != PROCESSOR_SHUTDOWN && cset->low_count->state != PROCESSOR_OFF_LINE && - (processor == PROCESSOR_NULL || - ( thread->sched_pri <= BASEPRI_DEFAULT && cset->low_count->runq.count < processor->runq.count))) { + (processor == PROCESSOR_NULL || (thread->sched_pri <= BASEPRI_DEFAULT && + cset->low_count->runq.count < processor->runq.count))) { processor = cset->low_count; } @@ -2306,6 +2326,12 @@ choose_processor( if (processor != PROCESSOR_NULL) enqueue_tail(&cset->active_queue, (queue_entry_t)processor); } + + if (processor != PROCESSOR_NULL && pmeta == PROCESSOR_META_NULL) { + if (processor->processor_meta != PROCESSOR_META_NULL && + !queue_empty(&processor->processor_meta->idle_queue)) + pmeta = processor->processor_meta; + } } /* @@ -2326,6 +2352,20 @@ choose_processor( * and that the correct processor set is locked. */ do { + if (pmeta != PROCESSOR_META_NULL) { + if (cset != pmeta->primary->processor_set) { + pset_unlock(cset); + + cset = pmeta->primary->processor_set; + pset_lock(cset); + } + + if (!queue_empty(&pmeta->idle_queue)) + return ((processor_t)queue_first(&pmeta->idle_queue)); + + pmeta = PROCESSOR_META_NULL; + } + /* * If we haven't been able to choose a processor, * pick the boot processor and return it. @@ -2475,6 +2515,18 @@ thread_setrun( processor_setrun(processor, thread, options); } +processor_set_t +task_choose_pset( + task_t task) +{ + processor_set_t pset = task->pset_hint; + + if (pset != PROCESSOR_SET_NULL) + pset = choose_next_pset(pset); + + return (pset); +} + /* * processor_queue_shutdown: * @@ -2502,7 +2554,7 @@ processor_queue_shutdown( while (!queue_end(queue, (queue_entry_t)thread)) { next = (thread_t)queue_next((queue_entry_t)thread); - if (thread->bound_processor != processor) { + if (thread->bound_processor == PROCESSOR_NULL) { remqueue(queue, (queue_entry_t)thread); thread->runq = PROCESSOR_NULL; @@ -2591,7 +2643,11 @@ csw_check( if (result != AST_NONE) return (result); - if (machine_cpu_is_inactive(processor->cpu_num)) + if (processor->current_pri < BASEPRI_RTQUEUES && processor->processor_meta != PROCESSOR_META_NULL && + processor->processor_meta->primary != processor) + return (AST_PREEMPT); + + if (machine_cpu_is_inactive(processor->cpu_id)) return (AST_PREEMPT); if (processor->active_thread->state & TH_SUSP) @@ -2762,7 +2818,7 @@ steal_processor_thread( while (count > 0) { thread = (thread_t)queue_first(queue); while (!queue_end(queue, (queue_entry_t)thread)) { - if (thread->bound_processor != processor) { + if (thread->bound_processor == PROCESSOR_NULL) { remqueue(queue, (queue_entry_t)thread); thread->runq = PROCESSOR_NULL; @@ -2858,12 +2914,8 @@ processor_idle( (void)splsched(); -#ifdef __ppc__ - pmsDown(); /* Step power down */ -#endif - KERNEL_DEBUG_CONSTANT( - MACHDBG_CODE(DBG_MACH_SCHED,MACH_IDLE) | DBG_FUNC_START, (int)thread, 0, 0, 0, 0); + MACHDBG_CODE(DBG_MACH_SCHED,MACH_IDLE) | DBG_FUNC_START, (uintptr_t)thread_tid(thread), 0, 0, 0, 0); timer_switch(&PROCESSOR_DATA(processor, system_state), mach_absolute_time(), &PROCESSOR_DATA(processor, idle_state)); @@ -2875,7 +2927,7 @@ processor_idle( (void)splsched(); - if (processor->state == PROCESSOR_INACTIVE && !machine_cpu_is_inactive(processor->cpu_num)) + if (processor->state == PROCESSOR_INACTIVE && !machine_cpu_is_inactive(processor->cpu_id)) break; } @@ -2885,10 +2937,6 @@ processor_idle( pset_lock(pset); -#ifdef __ppc__ - pmsStep(0); /* Step up out of idle power */ -#endif - state = processor->state; if (state == PROCESSOR_DISPATCHING) { /* @@ -2909,7 +2957,7 @@ processor_idle( thread_unlock(new_thread); KERNEL_DEBUG_CONSTANT( - MACHDBG_CODE(DBG_MACH_SCHED,MACH_IDLE) | DBG_FUNC_END, (int)thread, (int)state, 0, 0, 0); + MACHDBG_CODE(DBG_MACH_SCHED,MACH_IDLE) | DBG_FUNC_END, (uintptr_t)thread_tid(thread), state, 0, 0, 0); return (THREAD_NULL); } @@ -2917,7 +2965,7 @@ processor_idle( pset_unlock(pset); KERNEL_DEBUG_CONSTANT( - MACHDBG_CODE(DBG_MACH_SCHED,MACH_IDLE) | DBG_FUNC_END, (int)thread, (int)state, (int)new_thread, 0, 0); + MACHDBG_CODE(DBG_MACH_SCHED,MACH_IDLE) | DBG_FUNC_END, (uintptr_t)thread_tid(thread), state, (uintptr_t)thread_tid(new_thread), 0, 0); return (new_thread); } @@ -2950,7 +2998,7 @@ processor_idle( thread_unlock(new_thread); KERNEL_DEBUG_CONSTANT( - MACHDBG_CODE(DBG_MACH_SCHED,MACH_IDLE) | DBG_FUNC_END, (int)thread, (int)state, 0, 0, 0); + MACHDBG_CODE(DBG_MACH_SCHED,MACH_IDLE) | DBG_FUNC_END, (uintptr_t)thread_tid(thread), state, 0, 0, 0); return (THREAD_NULL); } @@ -2959,7 +3007,7 @@ processor_idle( pset_unlock(pset); KERNEL_DEBUG_CONSTANT( - MACHDBG_CODE(DBG_MACH_SCHED,MACH_IDLE) | DBG_FUNC_END, (int)thread, (int)state, 0, 0, 0); + MACHDBG_CODE(DBG_MACH_SCHED,MACH_IDLE) | DBG_FUNC_END, (uintptr_t)thread_tid(thread), state, 0, 0, 0); return (THREAD_NULL); } @@ -3097,7 +3145,7 @@ time_cswitch(void) abstime = mach_absolute_time(); thread_block(THREAD_CONTINUE_NULL); - new = mach_absolute_time() - abstime; + new = (uint32_t)(mach_absolute_time() - abstime); if (i == 0) accum = hi = low = new; diff --git a/osfmk/kern/sched_prim.h b/osfmk/kern/sched_prim.h index 6cdde8933..d47b67c52 100644 --- a/osfmk/kern/sched_prim.h +++ b/osfmk/kern/sched_prim.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2006 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2008 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -181,6 +181,9 @@ extern void thread_setrun( #define SCHED_HEADQ 2 #define SCHED_PREEMPT 4 +extern processor_set_t task_choose_pset( + task_t task); + /* Bind the current thread to a particular processor */ extern processor_t thread_bind( processor_t processor); @@ -269,6 +272,8 @@ extern boolean_t preemption_enabled(void); #ifdef KERNEL_PRIVATE +#ifndef __LP64__ + /* * Obsolete interfaces. */ @@ -293,6 +298,8 @@ extern void thread_cancel_timer(void); #endif /* MACH_KERNEL_PRIVATE */ +#endif /* __LP64__ */ + #endif /* KERNEL_PRIVATE */ __END_DECLS diff --git a/osfmk/kern/stack.c b/osfmk/kern/stack.c index fe792f997..a59122569 100644 --- a/osfmk/kern/stack.c +++ b/osfmk/kern/stack.c @@ -71,22 +71,52 @@ static unsigned int stack_new_count; /* total new stack allocations */ static vm_offset_t stack_addr_mask; +unsigned int kernel_stack_pages = KERNEL_STACK_SIZE / PAGE_SIZE; +vm_offset_t kernel_stack_size = KERNEL_STACK_SIZE; +vm_offset_t kernel_stack_mask = -KERNEL_STACK_SIZE; +vm_offset_t kernel_stack_depth_max = 0; + /* * The next field is at the base of the stack, * so the low end is left unsullied. */ #define stack_next(stack) \ - (*((vm_offset_t *)((stack) + KERNEL_STACK_SIZE) - 1)) + (*((vm_offset_t *)((stack) + kernel_stack_size) - 1)) + +static inline int +log2(vm_offset_t size) +{ + int result; + for (result = 0; size > 0; result++) + size >>= 1; + return result; +} + +static inline vm_offset_t +roundup_pow2(vm_offset_t size) +{ + return 1UL << (log2(size - 1) + 1); +} void stack_init(void) { simple_lock_init(&stack_lock_data, 0); - if (KERNEL_STACK_SIZE < round_page(KERNEL_STACK_SIZE)) - panic("stack_init: stack size %d not a multiple of page size %d\n", KERNEL_STACK_SIZE, PAGE_SIZE); + if (PE_parse_boot_argn("kernel_stack_pages", + &kernel_stack_pages, + sizeof (kernel_stack_pages))) { + kernel_stack_size = kernel_stack_pages * PAGE_SIZE; + printf("stack_init: kernel_stack_pages=%d kernel_stack_size=%p\n", + kernel_stack_pages, (void *) kernel_stack_size); + } + + if (kernel_stack_size < round_page(kernel_stack_size)) + panic("stack_init: stack size %p not a multiple of page size %d\n", + (void *) kernel_stack_size, PAGE_SIZE); - stack_addr_mask = KERNEL_STACK_SIZE - 1; + stack_addr_mask = roundup_pow2(kernel_stack_size) - 1; + kernel_stack_mask = ~stack_addr_mask; } /* @@ -131,7 +161,7 @@ stack_alloc( guard_flags = KMA_GUARD_FIRST | KMA_GUARD_LAST; if (kernel_memory_allocate(kernel_map, &stack, - KERNEL_STACK_SIZE + (2*PAGE_SIZE), + kernel_stack_size + (2*PAGE_SIZE), stack_addr_mask, KMA_KOBJECT | guard_flags) != KERN_SUCCESS) @@ -271,12 +301,12 @@ stack_collect(void) * back in stack_alloc(). */ - stack = vm_map_trunc_page(stack); + stack = (vm_offset_t)vm_map_trunc_page(stack); stack -= PAGE_SIZE; if (vm_map_remove( kernel_map, stack, - stack + KERNEL_STACK_SIZE+(2*PAGE_SIZE), + stack + kernel_stack_size+(2*PAGE_SIZE), VM_MAP_REMOVE_KUNWIRE) != KERN_SUCCESS) panic("stack_collect: vm_map_remove"); @@ -345,10 +375,10 @@ stack_fake_zone_info(int *count, vm_size_t *cur_size, vm_size_t *max_size, vm_si splx(s); *count = total - free; - *cur_size = KERNEL_STACK_SIZE * total; - *max_size = KERNEL_STACK_SIZE * hiwat; - *elem_size = KERNEL_STACK_SIZE; - *alloc_size = KERNEL_STACK_SIZE; + *cur_size = kernel_stack_size * total; + *max_size = kernel_stack_size * hiwat; + *elem_size = kernel_stack_size; + *alloc_size = kernel_stack_size; *collectable = 1; *exhaustable = 0; } @@ -399,7 +429,7 @@ processor_set_stack_usage( addr = NULL; for (;;) { - mutex_lock(&tasks_threads_lock); + lck_mtx_lock(&tasks_threads_lock); actual = threads_count; @@ -409,7 +439,7 @@ processor_set_stack_usage( if (size_needed <= size) break; - mutex_unlock(&tasks_threads_lock); + lck_mtx_unlock(&tasks_threads_lock); if (size != 0) kfree(addr, size); @@ -432,7 +462,7 @@ processor_set_stack_usage( } assert(i <= actual); - mutex_unlock(&tasks_threads_lock); + lck_mtx_unlock(&tasks_threads_lock); /* calculate maxusage and free thread references */ @@ -452,7 +482,7 @@ processor_set_stack_usage( kfree(addr, size); *totalp = total; - *residentp = *spacep = total * round_page(KERNEL_STACK_SIZE); + *residentp = *spacep = total * round_page(kernel_stack_size); *maxusagep = maxusage; *maxstackp = maxstack; return KERN_SUCCESS; @@ -462,10 +492,10 @@ processor_set_stack_usage( vm_offset_t min_valid_stack_address(void) { - return vm_map_min(kernel_map); + return (vm_offset_t)vm_map_min(kernel_map); } vm_offset_t max_valid_stack_address(void) { - return vm_map_max(kernel_map); + return (vm_offset_t)vm_map_max(kernel_map); } diff --git a/osfmk/kern/startup.c b/osfmk/kern/startup.c index a4f1eebd1..fb673da76 100644 --- a/osfmk/kern/startup.c +++ b/osfmk/kern/startup.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2008 Apple Inc. All rights reserved. + * Copyright (c) 2000-2009 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -78,6 +78,7 @@ #include #include #include +#include #include #include #include @@ -89,6 +90,7 @@ #include #include #include +#include #include #include #include @@ -112,6 +114,10 @@ #include #endif +#if CONFIG_COUNTERS +#include +#endif + #ifdef __ppc__ #include #include @@ -121,19 +127,25 @@ static void kernel_bootstrap_thread(void); static void load_context( thread_t thread); -#ifdef i386 +#if (defined(__i386__) || defined(__x86_64__)) && NCOPY_WINDOWS > 0 extern void cpu_userwindow_init(int); extern void cpu_physwindow_init(int); #endif -#ifdef CONFIG_JETTISON_KERNEL_LINKER -extern void jettison_kernel_linker(void); -#endif +// libkern/OSKextLib.cpp +extern void OSKextRemoveKextBootstrap(void); + +void srv_setup(void); +extern void bsd_srv_setup(int); +extern unsigned int semaphore_max; + /* * Running in virtual memory, on the interrupt stack. */ +extern int srv; + void kernel_bootstrap(void) { @@ -144,23 +156,36 @@ kernel_bootstrap(void) #define kernel_bootstrap_kprintf(x...) /* kprintf("kernel_bootstrap: " x) */ + /* i386_vm_init already checks for this ; do it aagin anyway */ + if (PE_parse_boot_argn("srv", &srv, sizeof (srv))) { + srv = 1; + } + + srv_setup(); + kernel_bootstrap_kprintf("calling lck_mod_init\n"); lck_mod_init(); + kernel_bootstrap_kprintf("calling vm_mem_bootstrap\n"); + vm_mem_bootstrap(); + + kernel_bootstrap_kprintf("calling vm_mem_init\n"); + vm_mem_init(); + + machine_info.memory_size = (uint32_t)mem_size; + machine_info.max_mem = max_mem; + machine_info.major_version = version_major; + machine_info.minor_version = version_minor; + kernel_bootstrap_kprintf("calling sched_init\n"); sched_init(); - kernel_bootstrap_kprintf("calling vm_mem_bootstrap\n"); - vm_mem_bootstrap(); + kernel_bootstrap_kprintf("calling wait_queue_bootstrap\n"); + wait_queue_bootstrap(); kernel_bootstrap_kprintf("calling ipc_bootstrap\n"); ipc_bootstrap(); - kernel_bootstrap_kprintf("calling vm_mem_init\n"); - vm_mem_init(); - - kernel_bootstrap_kprintf("calling kmod_init\n"); - kmod_init(); #if CONFIG_MACF mac_policy_init(); #endif @@ -183,10 +208,6 @@ kernel_bootstrap(void) kernel_bootstrap_kprintf("calling clock_init\n"); clock_init(); - machine_info.memory_size = mem_size; - machine_info.max_mem = max_mem; - machine_info.major_version = version_major; - machine_info.minor_version = version_minor; /* * Initialize the IPC, task, and thread subsystems. @@ -274,7 +295,7 @@ kernel_bootstrap_thread(void) kdp_init(); #endif -#ifdef i386 +#if (defined(__i386__) || defined(__x86_64__)) && NCOPY_WINDOWS > 0 /* * Create and initialize the physical copy window for processor 0 * This is required before starting kicking off IOKit. @@ -282,13 +303,17 @@ kernel_bootstrap_thread(void) cpu_physwindow_init(0); #endif +#if CONFIG_COUNTERS + pmc_bootstrap(); +#endif + #ifdef IOKIT PE_init_iokit(); #endif (void) spllo(); /* Allow interruptions */ -#ifdef i386 +#if (defined(__i386__) || defined(__x86_64__)) && NCOPY_WINDOWS > 0 /* * Create and initialize the copy window for processor 0 * This also allocates window space for all other processors. @@ -316,13 +341,16 @@ kernel_bootstrap_thread(void) bsd_init(); #endif -#ifdef CONFIG_JETTISON_KERNEL_LINKER - /* We do not run kextd, so get rid of the kernel linker now */ - jettison_kernel_linker(); -#endif + /* + * Get rid of segments used to bootstrap kext loading. This removes + * the KLD, PRELINK symtab, LINKEDIT, and symtab segments/load commands. + */ + OSKextRemoveKextBootstrap(); serial_keyboard_init(); /* Start serial keyboard if wanted */ + vm_page_init_local_q(); + thread_bind(PROCESSOR_NULL); /* @@ -407,7 +435,7 @@ load_context( load_context_kprintf("calling processor_up\n"); processor_up(processor); - PMAP_ACTIVATE_KERNEL(processor->cpu_num); + PMAP_ACTIVATE_KERNEL(processor->cpu_id); /* * Acquire a stack if none attached. The panic @@ -441,9 +469,36 @@ load_context( timer_start(&PROCESSOR_DATA(processor, system_state), processor->last_dispatch); PROCESSOR_DATA(processor, current_state) = &PROCESSOR_DATA(processor, system_state); - PMAP_ACTIVATE_USER(thread, processor->cpu_num); + PMAP_ACTIVATE_USER(thread, processor->cpu_id); load_context_kprintf("calling machine_load_context\n"); machine_load_context(thread); /*NOTREACHED*/ } + +void +srv_setup() +{ + int scale = 0; +#if defined(__LP64__) + /* if memory is more than 16G, then apply rules for processes */ + if ((srv != 0) && ((uint64_t)sane_size >= (uint64_t)(16 * 1024 * 1024 *1024ULL))) { + scale = (int)((uint64_t)sane_size / (uint64_t)(8 * 1024 * 1024 *1024ULL)); + /* limit to 128 G */ + if (scale > 16) + scale = 16; + task_max = 2500 * scale; + task_threadmax = task_max; + thread_max = task_max * 5; + } else + scale = 0; +#endif + bsd_srv_setup(scale); + + ipc_space_max = SPACE_MAX; + ipc_tree_entry_max = ITE_MAX; + ipc_port_max = PORT_MAX; + ipc_pset_max = SET_MAX; + semaphore_max = SEMAPHORE_MAX; +} + diff --git a/osfmk/kern/symbols.c b/osfmk/kern/symbols.c deleted file mode 100644 index 3196fb4c2..000000000 --- a/osfmk/kern/symbols.c +++ /dev/null @@ -1,227 +0,0 @@ -/* - * Copyright (c) 2006 Apple Computer, Inc. All rights reserved. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ - * - * This file contains Original Code and/or Modifications of Original Code - * as defined in and that are subject to the Apple Public Source License - * Version 2.0 (the 'License'). You may not use this file except in - * compliance with the License. The rights granted to you under the License - * may not be used to create, or enable the creation or redistribution of, - * unlawful or unlicensed copies of an Apple operating system, or to - * circumvent, violate, or enable the circumvention or violation of, any - * terms of an Apple operating system software license agreement. - * - * Please obtain a copy of the License at - * http://www.opensource.apple.com/apsl/ and read it before using this file. - * - * The Original Code and all software distributed under the License are - * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER - * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, - * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. - * Please see the License for the specific language governing rights and - * limitations under the License. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ - */ - -/*- - * Copyright (c) 2004 Networks Associates Technology, Inc. - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - */ - -#include -#include -#include -#include -#include -#include - -static const struct nlist * -syms_find(const struct nlist *syms, int nsyms, vm_offset_t addr, - vm_offset_t *ofs) -{ - const struct nlist *best = 0; - int i; - - for (i = 0; i < nsyms; i++) { - int st = syms[i].n_type & N_TYPE; - - if (st == N_SECT || st == N_ABS) { - if (syms[i].n_value == addr) { - *ofs = 0; - return (syms+i); - } - else if (syms[i].n_value < addr && - (best == 0 || - (syms[i].n_value > best->n_value))) { - *ofs = addr - syms[i].n_value; - best = syms+i; - } - } - } - - return (best); -} - -static const char * -syms_getname(const struct symtab_command *sc, const char *ss, - const struct nlist *sp) -{ - if (sp->n_un.n_strx == 0) - return (""); - else if ((unsigned)sp->n_un.n_strx > sc->strsize) - return ("*bad string*"); - else - return (ss + sp->n_un.n_strx); -} - -/* Search for a symbol in the given object file, which must either - * have a LINKEDIT segment or have been read directly into memory - * and isload passed as 1. - - * If mh has a LINKEDIT segment, an object file loaded in the normal - * way will have the symbol table available at the load address. - */ - -static const char * -syms_nameforaddr1(const struct mach_header *mh, int isload, - vm_offset_t addr, vm_offset_t *ofs) -{ - const struct symtab_command *sc = NULL; - const struct segment_command *le = NULL; - const struct segment_command *p; - const struct segment_command *sym = NULL; - const struct nlist *syms; - const char *strings; - unsigned int i; - - p = (const struct segment_command *) (&mh[1]); - - for (i = 0; i < mh->ncmds; i++) { - if (p->cmd == LC_SYMTAB) - sc = (const struct symtab_command *) p; - else if (p->cmd == LC_SEGMENT && - !strncmp(p->segname, "__LINKEDIT", sizeof(p->segname))) - le = p; - - /* only try to find a name for an address that came from - * a text section. - */ - if (p->cmd == LC_SEGMENT && - addr >= p->vmaddr && addr < p->vmaddr + p->vmsize) { - unsigned int j; - - const struct section *sp = (const struct section *) - (((const char *) p) + sizeof(struct segment_command)); - - for (j = 0; j < p->nsects; j++) { - if (addr >= sp[j].addr && - addr < sp[j].addr + sp[j].size && - !strncmp (sp[j].sectname, "__text", - sizeof(sp[j].sectname))) { - sym = p; - break; - } - } - } - p = (const struct segment_command *) - (((const char *) p) + p->cmdsize); - } - - if (sc == 0 || sym == NULL) - return (NULL); - - if (!isload) { - syms = (const struct nlist *) (((const char *) mh) + sc->symoff); - strings = ((const char *) mh) + sc->stroff; - } - else if (le) { - syms = (const struct nlist *) le->vmaddr; - strings = (const char *) - (le->vmaddr + sc->nsyms * sizeof(struct nlist)); - } else - return (NULL); - - const struct nlist *sp = syms_find(syms, sc->nsyms, addr, ofs); - if (sp) - return syms_getname(sc, strings, sp); - - return (NULL); -} - -extern struct mach_header _mh_execute_header; -extern kmod_info_t *kmod; - -/* Search for a symbol and return the name, offset, and module in which the - * address was found. A null module means the kernel itself. - */ - -const char * -syms_nameforaddr(vm_offset_t addr, vm_offset_t *ofs, kmod_info_t **km) -{ - const char *name = NULL; - - name = syms_nameforaddr1(&_mh_execute_header, 1, addr, ofs); - if (name) { - *km = NULL; - return (name); - } - - return (NULL); -} - - -/* Format the results of calling syms_nameforaddr into a single string. - * The buffer must be at least 13 bytes long; 80 is recommended. - */ - -int -syms_formataddr(vm_offset_t addr, char *out, vm_offset_t outsize) -{ - vm_offset_t ofs; - kmod_info_t *k = NULL; - const char *name; - - name = syms_nameforaddr(addr, &ofs, &k); - - if (ofs > 0x100000) - name = NULL; - - if (name != NULL) { - if (k != NULL) - snprintf(out, outsize, "0x%08X <%s:%s + %d>", addr, - k->name, name, ofs); - else - snprintf(out, outsize, "0x%08X <%s + %d>", addr, name, - ofs); - - return (1); - } - else { - snprintf(out, outsize, "0x%08X", addr); - return (0); - } -} diff --git a/osfmk/kern/sync_lock.c b/osfmk/kern/sync_lock.c index fd9adc146..174381f5f 100644 --- a/osfmk/kern/sync_lock.c +++ b/osfmk/kern/sync_lock.c @@ -68,9 +68,9 @@ #define ulock_ownership_clear(ul) \ MACRO_BEGIN \ - thread_t th; \ + thread_t th; \ th = (ul)->holder; \ - if (th->active) { \ + if ((th)->active) { \ thread_mtx_lock(th); \ remqueue(&th->held_ulocks, \ (queue_entry_t) (ul)); \ @@ -109,17 +109,24 @@ unsigned int lock_set_event; unsigned int lock_set_handoff; #define LOCK_SET_HANDOFF CAST_EVENT64_T(&lock_set_handoff) + +lck_attr_t lock_set_attr; +lck_grp_t lock_set_grp; +static lck_grp_attr_t lock_set_grp_attr; + + + /* * ROUTINE: lock_set_init [private] * * Initialize the lock_set subsystem. - * - * For now, we don't have anything to do here. */ void lock_set_init(void) { - return; + lck_grp_attr_setdefault(&lock_set_grp_attr); + lck_grp_init(&lock_set_grp, "lock_set", &lock_set_grp_attr); + lck_attr_setdefault(&lock_set_attr); } @@ -158,15 +165,14 @@ lock_set_create ( lock_set_lock_init(lock_set); lock_set->n_ulocks = n_ulocks; - lock_set->ref_count = 1; + lock_set->ref_count = (task == kernel_task) ? 1 : 2; /* one for kernel, one for port */ /* * Create and initialize the lock set port */ lock_set->port = ipc_port_alloc_kernel(); if (lock_set->port == IP_NULL) { - /* This will deallocate the lock set */ - lock_set_dereference(lock_set); + kfree(lock_set, size); return KERN_RESOURCE_SHORTAGE; } @@ -186,6 +192,7 @@ lock_set_create ( ulock->blocked = FALSE; ulock->unstable = FALSE; ulock->ho_wait = FALSE; + ulock->accept_wait = FALSE; wait_queue_init(&ulock->wait_queue, policy); } @@ -277,13 +284,10 @@ lock_set_destroy (task_t task, lock_set_t lock_set) lock_set_ownership_clear(lock_set, task); /* - * Deallocate - * - * Drop the lock set reference, which inturn destroys the - * lock set structure if the reference count goes to zero. + * Drop the lock set reference given to the containing task, + * which inturn destroys the lock set structure if the reference + * count goes to zero. */ - - ipc_port_dealloc_kernel(lock_set->port); lock_set_dereference(lock_set); return KERN_SUCCESS; @@ -552,16 +556,6 @@ ulock_release_internal (ulock_t ulock, thread_t thread) /* wait_queue now unlocked, thread locked */ if (wqthread != THREAD_NULL) { - /* - * JMM - These ownership transfer macros have a - * locking/race problem. To keep the thread from - * changing states on us (nullifying the ownership - * assignment) we need to keep the thread locked - * during the assignment. But we can't because the - * macros take an activation lock, which is a mutex. - * Since this code was already broken before I got - * here, I will leave it for now. - */ thread_unlock(wqthread); splx(s); @@ -646,16 +640,11 @@ lock_handoff (lock_set_t lock_set, int lock_id) * Transfer lock ownership */ if (thread != THREAD_NULL) { - /* - * JMM - These ownership transfer macros have a - * locking/race problem. To keep the thread from - * changing states on us (nullifying the ownership - * assignment) we need to keep the thread locked - * during the assignment. But we can't because the - * macros take a thread mutex lock. - * - * Since this code was already broken before I got - * here, I will leave it for now. + /* + * The thread we are transferring to will try + * to take the lock on the ulock, and therefore + * will wait for us complete the handoff even + * through we set the thread running. */ thread_unlock(thread); splx(s); @@ -699,7 +688,15 @@ lock_handoff (lock_set_t lock_set, int lock_id) */ switch (wait_result) { + case THREAD_AWAKENED: + /* + * we take the ulock lock to syncronize with the + * thread that is accepting ownership. + */ + ulock_lock(ulock); + assert(ulock->holder != current_thread()); + ulock_unlock(ulock); return KERN_SUCCESS; case THREAD_INTERRUPTED: @@ -809,6 +806,15 @@ lock_handoff_accept (lock_set_t lock_set, int lock_id) switch (wait_result) { case THREAD_AWAKENED: + /* + * Take the lock to synchronize with the thread handing + * off the lock to us. We don't want to continue until + * they complete the handoff. + */ + ulock_lock(ulock); + assert(ulock->accept_wait == FALSE); + assert(ulock->holder == current_thread()); + ulock_unlock(ulock); return KERN_SUCCESS; case THREAD_INTERRUPTED: @@ -856,8 +862,9 @@ lock_set_dereference(lock_set_t lock_set) lock_set_unlock(lock_set); if (ref_count == 0) { - size = sizeof(struct lock_set) + - (sizeof(struct ulock) * (lock_set->n_ulocks - 1)); + ipc_port_dealloc_kernel(lock_set->port); + size = (int)(sizeof(struct lock_set) + + (sizeof(struct ulock) * (lock_set->n_ulocks - 1))); kfree(lock_set, size); } } diff --git a/osfmk/kern/sync_lock.h b/osfmk/kern/sync_lock.h index f4f499f1e..589fdfcb8 100644 --- a/osfmk/kern/sync_lock.h +++ b/osfmk/kern/sync_lock.h @@ -46,35 +46,35 @@ #include #include #include -#include +#include typedef struct ulock { - queue_chain_t thread_link; /* ulocks owned by a thread */ + queue_chain_t thread_link; /* ulocks owned by thread MUST BE FIRST */ queue_chain_t held_link; /* ulocks held in the lock set */ queue_chain_t handoff_link; /* ulocks w/ active handoffs */ + struct lock_set *lock_set; /* the retaining lock set */ + thread_t holder; /* thread that holds the lock */ + + struct wait_queue wait_queue; /* queue of blocked threads */ - decl_mutex_data(,lock) /* ulock lock */ + decl_lck_mtx_data(,lock) /* ulock lock */ - struct lock_set *lock_set; /* the retaining lock set */ - thread_t holder; /* thread that holds the lock */ unsigned int /* flags */ /* boolean_t */ blocked:1, /* did threads block waiting? */ /* boolean_t */ unstable:1, /* unstable? (holder died) */ /* boolean_t */ ho_wait:1, /* handoff thread waiting? */ /* boolean_t */ accept_wait:1, /* accepting thread waiting? */ :0; /* force to long boundary */ - - struct wait_queue wait_queue; /* queue of blocked threads */ } Ulock; typedef struct ulock *ulock_t; typedef struct lock_set { - queue_chain_t task_link; /* chain of lock sets owned by a task */ - decl_mutex_data(,lock) /* lock set lock */ + queue_chain_t task_link; /* lock sets owned by a task MUST BE FIRST */ task_t owner; /* task that owns the lock set */ ipc_port_t port; /* lock set port */ - int ref_count; /* reference count */ + decl_lck_mtx_data(,lock) /* lock set lock */ + uint32_t ref_count; /* reference count */ boolean_t active; /* active status */ int n_ulocks; /* number of ulocks in the lock set */ @@ -87,19 +87,22 @@ typedef struct lock_set { #define ULOCK_FREE 0 #define ULOCK_HELD 1 +extern lck_grp_t lock_set_grp; +extern lck_attr_t lock_set_attr; + /* * Data structure internal lock macros */ -#define lock_set_lock_init(ls) mutex_init(&(ls)->lock, 0) -#define lock_set_lock(ls) mutex_lock(&(ls)->lock) -#define lock_set_unlock(ls) mutex_unlock(&(ls)->lock) +#define lock_set_lock_init(ls) lck_mtx_init(&(ls)->lock, &lock_set_grp, &lock_set_attr) +#define lock_set_lock(ls) lck_mtx_lock(&(ls)->lock) +#define lock_set_unlock(ls) lck_mtx_unlock(&(ls)->lock) -#define ulock_lock_init(ul) mutex_init(&(ul)->lock, 0) -#define ulock_lock(ul) mutex_lock(&(ul)->lock) -#define ulock_unlock(ul) mutex_unlock(&(ul)->lock) +#define ulock_lock_init(ul) lck_mtx_init(&(ul)->lock, &lock_set_grp, &lock_set_attr) +#define ulock_lock(ul) lck_mtx_lock(&(ul)->lock) +#define ulock_unlock(ul) lck_mtx_unlock(&(ul)->lock) -extern void lock_set_init(void); +extern void lock_set_init(void) __attribute__((section("__TEXT, initcode"))); extern kern_return_t ulock_release_internal( ulock_t ulock, diff --git a/osfmk/kern/sync_sema.c b/osfmk/kern/sync_sema.c index ec1218383..eef5c13ad 100644 --- a/osfmk/kern/sync_sema.c +++ b/osfmk/kern/sync_sema.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2007 Apple Inc. All rights reserved. + * Copyright (c) 2000-2008 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -62,7 +62,7 @@ static unsigned int semaphore_event; #define SEMAPHORE_EVENT CAST_EVENT64_T(&semaphore_event) zone_t semaphore_zone; -unsigned int semaphore_max = SEMAPHORE_MAX; +unsigned int semaphore_max; /* Forward declarations */ @@ -109,13 +109,27 @@ semaphore_convert_wait_result( void semaphore_wait_continue(void); -kern_return_t +static kern_return_t semaphore_wait_internal( semaphore_t wait_semaphore, semaphore_t signal_semaphore, - mach_timespec_t *wait_timep, + uint64_t deadline, + int option, void (*caller_cont)(kern_return_t)); +static __inline__ uint64_t +semaphore_deadline( + unsigned int sec, + clock_res_t nsec) +{ + uint64_t abstime; + + nanoseconds_to_absolutetime((uint64_t)sec * NSEC_PER_SEC + nsec, &abstime); + clock_absolutetime_interval_to_deadline(abstime, &abstime); + + return (abstime); +} + /* * ROUTINE: semaphore_init [private] * @@ -145,33 +159,33 @@ semaphore_create( int value) { semaphore_t s = SEMAPHORE_NULL; + kern_return_t kret; - - if (task == TASK_NULL || value < 0 || policy > SYNC_POLICY_MAX) { - *new_semaphore = SEMAPHORE_NULL; + *new_semaphore = SEMAPHORE_NULL; + if (task == TASK_NULL || value < 0 || policy > SYNC_POLICY_MAX) return KERN_INVALID_ARGUMENT; - } s = (semaphore_t) zalloc (semaphore_zone); - if (s == SEMAPHORE_NULL) { - *new_semaphore = SEMAPHORE_NULL; + if (s == SEMAPHORE_NULL) return KERN_RESOURCE_SHORTAGE; + + kret = wait_queue_init(&s->wait_queue, policy); /* also inits lock */ + if (kret != KERN_SUCCESS) { + zfree(semaphore_zone, s); + return kret; } - wait_queue_init(&s->wait_queue, policy); /* also inits lock */ s->count = value; - s->ref_count = 1; + s->ref_count = (task == kernel_task) ? 1 : 2; /* * Create and initialize the semaphore port */ s->port = ipc_port_alloc_kernel(); if (s->port == IP_NULL) { - /* This will deallocate the semaphore */ - semaphore_dereference(s); - *new_semaphore = SEMAPHORE_NULL; + zfree(semaphore_zone, s); return KERN_RESOURCE_SHORTAGE; } @@ -259,10 +273,9 @@ semaphore_destroy( /* * Deallocate * - * Drop the semaphore reference, which in turn deallocates the - * semaphore structure if the reference count goes to zero. + * Drop the task's semaphore reference, which in turn deallocates + * the semaphore structure if the reference count goes to zero. */ - ipc_port_dealloc_kernel(semaphore->port); semaphore_dereference(semaphore); return KERN_SUCCESS; } @@ -586,14 +599,14 @@ semaphore_wait_continue(void) * The reference * A reference is held on the signal semaphore. */ -kern_return_t +static kern_return_t semaphore_wait_internal( semaphore_t wait_semaphore, semaphore_t signal_semaphore, - mach_timespec_t *wait_timep, + uint64_t deadline, + int option, void (*caller_cont)(kern_return_t)) { - boolean_t nonblocking; int wait_result; spl_t spl_level; kern_return_t kr = KERN_ALREADY_WAITING; @@ -601,42 +614,22 @@ semaphore_wait_internal( spl_level = splsched(); semaphore_lock(wait_semaphore); - /* - * Decide if we really have to wait. - */ - nonblocking = (wait_timep != (mach_timespec_t *)0) ? - (wait_timep->tv_sec == 0 && wait_timep->tv_nsec == 0) : - FALSE; - if (!wait_semaphore->active) { kr = KERN_TERMINATED; } else if (wait_semaphore->count > 0) { wait_semaphore->count--; kr = KERN_SUCCESS; - } else if (nonblocking) { + } else if (option & SEMAPHORE_TIMEOUT_NOBLOCK) { kr = KERN_OPERATION_TIMED_OUT; } else { - uint64_t abstime; thread_t self = current_thread(); wait_semaphore->count = -1; /* we don't keep an actual count */ thread_lock(self); - - /* - * If it is a timed wait, calculate the wake up deadline. - */ - if (wait_timep != (mach_timespec_t *)0) { - nanoseconds_to_absolutetime((uint64_t)wait_timep->tv_sec * - NSEC_PER_SEC + wait_timep->tv_nsec, &abstime); - clock_absolutetime_interval_to_deadline(abstime, &abstime); - } - else - abstime = 0; - (void)wait_queue_assert_wait64_locked( &wait_semaphore->wait_queue, SEMAPHORE_EVENT, - THREAD_ABORTSAFE, abstime, + THREAD_ABORTSAFE, deadline, self); thread_unlock(self); } @@ -729,8 +722,37 @@ semaphore_wait( return KERN_INVALID_ARGUMENT; return(semaphore_wait_internal(semaphore, - SEMAPHORE_NULL, - (mach_timespec_t *)0, + SEMAPHORE_NULL, + 0ULL, SEMAPHORE_OPTION_NONE, + (void (*)(kern_return_t))0)); +} + +kern_return_t +semaphore_wait_noblock( + semaphore_t semaphore) +{ + + if (semaphore == SEMAPHORE_NULL) + return KERN_INVALID_ARGUMENT; + + return(semaphore_wait_internal(semaphore, + SEMAPHORE_NULL, + 0ULL, SEMAPHORE_TIMEOUT_NOBLOCK, + (void (*)(kern_return_t))0)); +} + +kern_return_t +semaphore_wait_deadline( + semaphore_t semaphore, + uint64_t deadline) +{ + + if (semaphore == SEMAPHORE_NULL) + return KERN_INVALID_ARGUMENT; + + return(semaphore_wait_internal(semaphore, + SEMAPHORE_NULL, + deadline, SEMAPHORE_OPTION_NONE, (void (*)(kern_return_t))0)); } @@ -762,7 +784,7 @@ semaphore_wait_trap_internal( if (kr == KERN_SUCCESS) { kr = semaphore_wait_internal(semaphore, SEMAPHORE_NULL, - (mach_timespec_t *)0, + 0ULL, SEMAPHORE_OPTION_NONE, caller_cont); semaphore_dereference(semaphore); } @@ -781,16 +803,24 @@ kern_return_t semaphore_timedwait( semaphore_t semaphore, mach_timespec_t wait_time) -{ +{ + int option = SEMAPHORE_OPTION_NONE; + uint64_t deadline = 0; + if (semaphore == SEMAPHORE_NULL) return KERN_INVALID_ARGUMENT; if(BAD_MACH_TIMESPEC(&wait_time)) return KERN_INVALID_VALUE; + + if (wait_time.tv_sec == 0 && wait_time.tv_nsec == 0) + option = SEMAPHORE_TIMEOUT_NOBLOCK; + else + deadline = semaphore_deadline(wait_time.tv_sec, wait_time.tv_nsec); return (semaphore_wait_internal(semaphore, SEMAPHORE_NULL, - &wait_time, + deadline, option, (void(*)(kern_return_t))0)); } @@ -822,7 +852,6 @@ semaphore_timedwait_trap_internal( clock_res_t nsec, void (*caller_cont)(kern_return_t)) { - semaphore_t semaphore; mach_timespec_t wait_time; kern_return_t kr; @@ -834,9 +863,17 @@ semaphore_timedwait_trap_internal( kr = port_name_to_semaphore(name, &semaphore); if (kr == KERN_SUCCESS) { + int option = SEMAPHORE_OPTION_NONE; + uint64_t deadline = 0; + + if (sec == 0 && nsec == 0) + option = SEMAPHORE_TIMEOUT_NOBLOCK; + else + deadline = semaphore_deadline(sec, nsec); + kr = semaphore_wait_internal(semaphore, SEMAPHORE_NULL, - &wait_time, + deadline, option, caller_cont); semaphore_dereference(semaphore); } @@ -861,7 +898,7 @@ semaphore_wait_signal( return(semaphore_wait_internal(wait_semaphore, signal_semaphore, - (mach_timespec_t *)0, + 0ULL, SEMAPHORE_OPTION_NONE, (void(*)(kern_return_t))0)); } @@ -894,7 +931,7 @@ semaphore_wait_signal_trap_internal( if (kr == KERN_SUCCESS) { kr = semaphore_wait_internal(wait_semaphore, signal_semaphore, - (mach_timespec_t *)0, + 0ULL, SEMAPHORE_OPTION_NONE, caller_cont); semaphore_dereference(wait_semaphore); } @@ -919,15 +956,23 @@ semaphore_timedwait_signal( semaphore_t signal_semaphore, mach_timespec_t wait_time) { + int option = SEMAPHORE_OPTION_NONE; + uint64_t deadline = 0; + if (wait_semaphore == SEMAPHORE_NULL) return KERN_INVALID_ARGUMENT; if(BAD_MACH_TIMESPEC(&wait_time)) return KERN_INVALID_VALUE; + + if (wait_time.tv_sec == 0 && wait_time.tv_nsec == 0) + option = SEMAPHORE_TIMEOUT_NOBLOCK; + else + deadline = semaphore_deadline(wait_time.tv_sec, wait_time.tv_nsec); return(semaphore_wait_internal(wait_semaphore, signal_semaphore, - &wait_time, + deadline, option, (void(*)(kern_return_t))0)); } @@ -966,9 +1011,17 @@ semaphore_timedwait_signal_trap_internal( if (kr == KERN_SUCCESS) { kr = port_name_to_semaphore(wait_name, &wait_semaphore); if (kr == KERN_SUCCESS) { + int option = SEMAPHORE_OPTION_NONE; + uint64_t deadline = 0; + + if (sec == 0 && nsec == 0) + option = SEMAPHORE_TIMEOUT_NOBLOCK; + else + deadline = semaphore_deadline(sec, nsec); + kr = semaphore_wait_internal(wait_semaphore, signal_semaphore, - &wait_time, + deadline, option, caller_cont); semaphore_dereference(wait_semaphore); } @@ -988,15 +1041,7 @@ void semaphore_reference( semaphore_t semaphore) { - spl_t spl_level; - - spl_level = splsched(); - semaphore_lock(semaphore); - - semaphore->ref_count++; - - semaphore_unlock(semaphore); - splx(spl_level); + (void)hw_atomic_add(&semaphore->ref_count, 1); } /* @@ -1010,20 +1055,14 @@ semaphore_dereference( semaphore_t semaphore) { int ref_count; - spl_t spl_level; if (semaphore != NULL) { - spl_level = splsched(); - semaphore_lock(semaphore); - - ref_count = --(semaphore->ref_count); + ref_count = hw_atomic_sub(&semaphore->ref_count, 1); - semaphore_unlock(semaphore); - splx(spl_level); - - if (ref_count == 0) { + if (ref_count == 0) { assert(wait_queue_empty(&semaphore->wait_queue)); + ipc_port_dealloc_kernel(semaphore->port); zfree(semaphore_zone, semaphore); - } + } } } diff --git a/osfmk/kern/sync_sema.h b/osfmk/kern/sync_sema.h index 43bedc091..1da09b0f3 100644 --- a/osfmk/kern/sync_sema.h +++ b/osfmk/kern/sync_sema.h @@ -53,7 +53,7 @@ typedef struct semaphore { struct wait_queue wait_queue; /* queue of blocked threads & lock */ task_t owner; /* task that owns semaphore */ ipc_port_t port; /* semaphore port */ - int ref_count; /* reference count */ + uint32_t ref_count; /* reference count */ int count; /* current count value */ boolean_t active; /* active status */ } Semaphore; diff --git a/osfmk/kern/syscall_subr.c b/osfmk/kern/syscall_subr.c index 15af1fa7e..3daf1ec38 100644 --- a/osfmk/kern/syscall_subr.c +++ b/osfmk/kern/syscall_subr.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2008 Apple Inc. All rights reserved. + * Copyright (c) 2000-2009 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -74,6 +74,25 @@ #include #include + +#ifdef MACH_BSD +extern void workqueue_thread_yielded(void); +#endif /* MACH_BSD */ + + +/* Called from commpage to take a delayed preemption when exiting + * the "Preemption Free Zone" (PFZ). + */ +kern_return_t +pfz_exit( +__unused struct pfz_exit_args *args) +{ + /* For now, nothing special to do. We'll pick up the ASTs on kernel exit. */ + + return (KERN_SUCCESS); +} + + /* * swtch and swtch_pri both attempt to context switch (logic in * thread_block no-ops the context switch if nothing would happen). @@ -221,6 +240,8 @@ thread_switch( return (KERN_INVALID_ARGUMENT); } + workqueue_thread_yielded(); + /* * Translate the port name if supplied. */ diff --git a/osfmk/kern/syscall_sw.c b/osfmk/kern/syscall_sw.c index 4a8dd5e30..59a402aa3 100644 --- a/osfmk/kern/syscall_sw.c +++ b/osfmk/kern/syscall_sw.c @@ -133,15 +133,15 @@ mach_trap_t mach_trap_table[MACH_TRAP_TABLE_COUNT] = { /* 38 */ MACH_TRAP(semaphore_timedwait_trap, 3, munge_www, munge_ddd), /* 39 */ MACH_TRAP(semaphore_timedwait_signal_trap, 4, munge_wwww, munge_dddd), /* 40 */ MACH_TRAP(kern_invalid, 0, NULL, NULL), -/* 41 */ MACH_TRAP(init_process, 0, NULL, NULL), +/* 41 */ MACH_TRAP(kern_invalid, 0, NULL, NULL), /* 42 */ MACH_TRAP(kern_invalid, 0, NULL, NULL), /* 43 */ MACH_TRAP(map_fd, 5, munge_wwwww, munge_ddddd), /* 44 */ MACH_TRAP(task_name_for_pid, 3, munge_www, munge_ddd), /* 45 */ MACH_TRAP(task_for_pid, 3, munge_www, munge_ddd), /* 46 */ MACH_TRAP(pid_for_task, 2, munge_ww,munge_dd), /* 47 */ MACH_TRAP(kern_invalid, 0, NULL, NULL), -/* 48 */ MACH_TRAP(macx_swapon, 4, munge_wwww, munge_dddd), -/* 49 */ MACH_TRAP(macx_swapoff, 2, munge_ww, munge_dd), +/* 48 */ MACH_TRAP(macx_swapon, 5, munge_lwww, munge_dddd), +/* 49 */ MACH_TRAP(macx_swapoff, 3, munge_lw, munge_dd), /* 50 */ MACH_TRAP(kern_invalid, 0, NULL, NULL), /* 51 */ MACH_TRAP(macx_triggers, 4, munge_wwww, munge_dddd), /* 52 */ MACH_TRAP(macx_backing_store_suspend, 1, munge_w, munge_d), @@ -150,7 +150,7 @@ mach_trap_t mach_trap_table[MACH_TRAP_TABLE_COUNT] = { /* 55 */ MACH_TRAP(kern_invalid, 0, NULL, NULL), /* 56 */ MACH_TRAP(kern_invalid, 0, NULL, NULL), /* 57 */ MACH_TRAP(kern_invalid, 0, NULL, NULL), -/* 58 */ MACH_TRAP(kern_invalid, 0, NULL, NULL), +/* 58 */ MACH_TRAP(pfz_exit, 0, NULL, NULL), /* 59 */ MACH_TRAP(swtch_pri, 0, NULL, NULL), /* 60 */ MACH_TRAP(swtch, 0, NULL, NULL), /* 61 */ MACH_TRAP(thread_switch, 3, munge_www, munge_ddd), @@ -226,6 +226,142 @@ mach_trap_t mach_trap_table[MACH_TRAP_TABLE_COUNT] = { /* 127 */ MACH_TRAP(kern_invalid, 0, NULL, NULL), }; +const char * mach_syscall_name_table[MACH_TRAP_TABLE_COUNT] = { +/* 0 */ "kern_invalid", +/* 1 */ "kern_invalid", +/* 2 */ "kern_invalid", +/* 3 */ "kern_invalid", +/* 4 */ "kern_invalid", +/* 5 */ "kern_invalid", +/* 6 */ "kern_invalid", +/* 7 */ "kern_invalid", +/* 8 */ "kern_invalid", +/* 9 */ "kern_invalid", +/* 10 */ "kern_invalid", +/* 11 */ "kern_invalid", +/* 12 */ "kern_invalid", +/* 13 */ "kern_invalid", +/* 14 */ "kern_invalid", +/* 15 */ "kern_invalid", +/* 16 */ "kern_invalid", +/* 17 */ "kern_invalid", +/* 18 */ "kern_invalid", +/* 19 */ "kern_invalid", +/* 20 */ "kern_invalid", +/* 21 */ "kern_invalid", +/* 22 */ "kern_invalid", +/* 23 */ "kern_invalid", +/* 24 */ "kern_invalid", +/* 25 */ "kern_invalid", +/* 26 */ "mach_reply_port", +/* 27 */ "thread_self_trap", +/* 28 */ "task_self_trap", +/* 29 */ "host_self_trap", +/* 30 */ "kern_invalid", +/* 31 */ "mach_msg_trap", +/* 32 */ "mach_msg_overwrite_trap", +/* 33 */ "semaphore_signal_trap", +/* 34 */ "semaphore_signal_all_trap", +/* 35 */ "semaphore_signal_thread_trap", +/* 36 */ "semaphore_wait_trap", +/* 37 */ "semaphore_wait_signal_trap", +/* 38 */ "semaphore_timedwait_trap", +/* 39 */ "semaphore_timedwait_signal_trap", +/* 40 */ "kern_invalid", +/* 41 */ "kern_invalid", +/* 42 */ "kern_invalid", +/* 43 */ "map_fd", +/* 44 */ "task_name_for_pid", +/* 45 */ "task_for_pid", +/* 46 */ "pid_for_task", +/* 47 */ "kern_invalid", +/* 48 */ "macx_swapon", +/* 49 */ "macx_swapoff", +/* 50 */ "kern_invalid", +/* 51 */ "macx_triggers", +/* 52 */ "macx_backing_store_suspend", +/* 53 */ "macx_backing_store_recovery", +/* 54 */ "kern_invalid", +/* 55 */ "kern_invalid", +/* 56 */ "kern_invalid", +/* 57 */ "kern_invalid", +/* 58 */ "pfz_exit", +/* 59 */ "swtch_pri", +/* 60 */ "swtch", +/* 61 */ "thread_switch", +/* 62 */ "clock_sleep_trap", +/* 63 */ "kern_invalid", +/* traps 64 - 95 reserved (debo) */ +/* 64 */ "kern_invalid", +/* 65 */ "kern_invalid", +/* 66 */ "kern_invalid", +/* 67 */ "kern_invalid", +/* 68 */ "kern_invalid", +/* 69 */ "kern_invalid", +/* 70 */ "kern_invalid", +/* 71 */ "kern_invalid", +/* 72 */ "kern_invalid", +/* 73 */ "kern_invalid", +/* 74 */ "kern_invalid", +/* 75 */ "kern_invalid", +/* 76 */ "kern_invalid", +/* 77 */ "kern_invalid", +/* 78 */ "kern_invalid", +/* 79 */ "kern_invalid", +/* 80 */ "kern_invalid", +/* 81 */ "kern_invalid", +/* 82 */ "kern_invalid", +/* 83 */ "kern_invalid", +/* 84 */ "kern_invalid", +/* 85 */ "kern_invalid", +/* 86 */ "kern_invalid", +/* 87 */ "kern_invalid", +/* 88 */ "kern_invalid", +/* 89 */ "mach_timebase_info_trap", +/* 90 */ "mach_wait_until_trap", +/* 91 */ "mk_timer_create_trap", +/* 92 */ "mk_timer_destroy_trap", +/* 93 */ "mk_timer_arm_trap", +/* 94 */ "mk_timer_cancel_trap", +/* 95 */ "kern_invalid", +/* traps 64 - 95 reserved (debo) */ +/* 96 */ "kern_invalid", +/* 97 */ "kern_invalid", +/* 98 */ "kern_invalid", +/* 99 */ "kern_invalid", +/* traps 100-107 reserved for iokit (esb) */ +/* 100 */ "kern_invalid", +/* 100 */ //"iokit_user_client_trap", +/* 101 */ "kern_invalid", +/* 102 */ "kern_invalid", +/* 103 */ "kern_invalid", +/* 104 */ "kern_invalid", +/* 105 */ "kern_invalid", +/* 106 */ "kern_invalid", +/* 107 */ "kern_invalid", +/* traps 108-127 unused */ +/* 108 */ "kern_invalid", +/* 109 */ "kern_invalid", +/* 110 */ "kern_invalid", +/* 111 */ "kern_invalid", +/* 112 */ "kern_invalid", +/* 113 */ "kern_invalid", +/* 114 */ "kern_invalid", +/* 115 */ "kern_invalid", +/* 116 */ "kern_invalid", +/* 117 */ "kern_invalid", +/* 118 */ "kern_invalid", +/* 119 */ "kern_invalid", +/* 120 */ "kern_invalid", +/* 121 */ "kern_invalid", +/* 122 */ "kern_invalid", +/* 123 */ "kern_invalid", +/* 124 */ "kern_invalid", +/* 125 */ "kern_invalid", +/* 126 */ "kern_invalid", +/* 127 */ "kern_invalid", +}; + int mach_trap_count = (sizeof(mach_trap_table) / sizeof(mach_trap_table[0])); kern_return_t diff --git a/osfmk/kern/task.c b/osfmk/kern/task.c index b7dd90765..aedd993a1 100644 --- a/osfmk/kern/task.c +++ b/osfmk/kern/task.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2008 Apple Inc. All rights reserved. + * Copyright (c) 2000-2009 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -136,6 +136,7 @@ #include #endif + /* * Exported interfaces */ @@ -152,8 +153,17 @@ #include #endif -task_t kernel_task; -zone_t task_zone; +#if CONFIG_COUNTERS +#include +#endif /* CONFIG_COUNTERS */ + +task_t kernel_task; +zone_t task_zone; +lck_attr_t task_lck_attr; +lck_grp_t task_lck_grp; +lck_grp_attr_t task_lck_grp_attr; + +int task_max = CONFIG_TASK_MAX; /* Max number of tasks */ /* Forwards */ @@ -173,6 +183,9 @@ kern_return_t task_set_ledger( ledger_t wired, ledger_t paged); +int check_for_tasksuspend( + task_t task); + void task_backing_store_privileged( task_t task) @@ -189,7 +202,7 @@ task_set_64bit( task_t task, boolean_t is64bit) { -#ifdef __i386__ +#if defined(__i386__) || defined(__x86_64__) thread_t thread; #endif /* __i386__ */ int vm_flags = 0; @@ -214,7 +227,6 @@ task_set_64bit( MACH_VM_MAX_ADDRESS, 0); #ifdef __ppc__ - /* LP64todo - make this clean */ /* * PPC51: ppc64 is limited to 51-bit addresses. * Memory mapped above that limit is handled specially @@ -238,26 +250,50 @@ task_set_64bit( * certain routines may observe the thread as being in an inconsistent * state with respect to its task's 64-bitness. */ -#ifdef __i386__ +#if defined(__i386__) || defined(__x86_64__) + task_lock(task); queue_iterate(&task->threads, thread, thread_t, task_threads) { + thread_mtx_lock(thread); machine_thread_switch_addrmode(thread); + thread_mtx_unlock(thread); } + task_unlock(task); #endif /* __i386__ */ } + +void +task_set_dyld_info(task_t task, mach_vm_address_t addr, mach_vm_size_t size) +{ + task_lock(task); + task->all_image_info_addr = addr; + task->all_image_info_size = size; + task_unlock(task); +} + void task_init(void) { + + lck_grp_attr_setdefault(&task_lck_grp_attr); + lck_grp_init(&task_lck_grp, "task", &task_lck_grp_attr); + lck_attr_setdefault(&task_lck_attr); + lck_mtx_init(&tasks_threads_lock, &task_lck_grp, &task_lck_attr); + task_zone = zinit( sizeof(struct task), - TASK_MAX * sizeof(struct task), + task_max * sizeof(struct task), TASK_CHUNK * sizeof(struct task), "tasks"); /* * Create the kernel task as the first task. */ +#ifdef __LP64__ + if (task_create_internal(TASK_NULL, FALSE, TRUE, &kernel_task) != KERN_SUCCESS) +#else if (task_create_internal(TASK_NULL, FALSE, FALSE, &kernel_task) != KERN_SUCCESS) +#endif panic("task_init\n"); vm_map_deallocate(kernel_task->map); @@ -338,6 +374,7 @@ task_create_internal( /* one ref for just being alive; one for our caller */ new_task->ref_count = 2; + /* if inherit_memory is true, parent_task MUST not be NULL */ if (inherit_memory) new_task->map = vm_map_fork(parent_task->map); else @@ -347,17 +384,17 @@ task_create_internal( /* Inherit memlock limit from parent */ if (parent_task) - vm_map_set_user_wire_limit(new_task->map, parent_task->map->user_wire_limit); + vm_map_set_user_wire_limit(new_task->map, (vm_size_t)parent_task->map->user_wire_limit); - mutex_init(&new_task->lock, 0); + lck_mtx_init(&new_task->lock, &task_lck_grp, &task_lck_attr); queue_init(&new_task->threads); new_task->suspend_count = 0; new_task->thread_count = 0; new_task->active_thread_count = 0; new_task->user_stop_count = 0; - new_task->pset_hint = PROCESSOR_SET_NULL; new_task->role = TASK_UNSPECIFIED; new_task->active = TRUE; + new_task->halting = FALSE; new_task->user_data = NULL; new_task->faults = 0; new_task->cow_faults = 0; @@ -375,8 +412,10 @@ task_create_internal( new_task->bsd_info = NULL; #endif /* MACH_BSD */ -#ifdef __i386__ +#if defined(__i386__) || defined(__x86_64__) new_task->i386_ldt = 0; + new_task->task_debug = NULL; + #endif #ifdef __ppc__ @@ -389,7 +428,6 @@ task_create_internal( new_task->lock_sets_owned = 0; #if CONFIG_MACF_MACH - /*mutex_init(&new_task->labellock, ETAP_NO_TRACE);*/ new_task->label = labelh_new(1); mac_task_label_init (&new_task->maclabel); #endif @@ -405,6 +443,10 @@ task_create_internal( new_task->affinity_space = NULL; +#if CONFIG_COUNTERS + new_task->t_chud = 0U; +#endif + if (parent_task != TASK_NULL) { new_task->sec_token = parent_task->sec_token; new_task->audit_token = parent_task->audit_token; @@ -419,19 +461,29 @@ task_create_internal( convert_port_to_ledger(parent_task->paged_ledger_port)); if(task_has_64BitAddr(parent_task)) task_set_64BitAddr(new_task); + new_task->all_image_info_addr = parent_task->all_image_info_addr; + new_task->all_image_info_size = parent_task->all_image_info_size; -#ifdef __i386__ +#if defined(__i386__) || defined(__x86_64__) if (inherit_memory && parent_task->i386_ldt) new_task->i386_ldt = user_ldt_copy(parent_task->i386_ldt); #endif if (inherit_memory && parent_task->affinity_space) task_affinity_create(parent_task, new_task); + + new_task->pset_hint = parent_task->pset_hint = task_choose_pset(parent_task); } else { new_task->sec_token = KERNEL_SECURITY_TOKEN; new_task->audit_token = KERNEL_AUDIT_TOKEN; new_task->wired_ledger_port = ledger_copy(root_wired_ledger); new_task->paged_ledger_port = ledger_copy(root_paged_ledger); +#ifdef __LP64__ + if(is_64bit) + task_set_64BitAddr(new_task); +#endif + + new_task->pset_hint = PROCESSOR_SET_NULL; } if (kernel_task == TASK_NULL) { @@ -443,10 +495,10 @@ task_create_internal( new_task->max_priority = MAXPRI_USER; } - mutex_lock(&tasks_threads_lock); + lck_mtx_lock(&tasks_threads_lock); queue_enter(&tasks, new_task, task_t, tasks); tasks_count++; - mutex_unlock(&tasks_threads_lock); + lck_mtx_unlock(&tasks_threads_lock); if (vm_backing_store_low && parent_task != NULL) new_task->priv_flags |= (parent_task->priv_flags&VM_BACKING_STORE_PRIV); @@ -480,6 +532,8 @@ task_deallocate( vm_map_deallocate(task->map); is_release(task->itk_space); + lck_mtx_destroy(&task->lock, &task_lck_grp); + #if CONFIG_MACF_MACH labelh_release(task->label); #endif @@ -611,7 +665,6 @@ task_terminate_internal( ipc_space_destroy(task->itk_space); #ifdef __ppc__ - /* LP64todo - make this clean */ /* * PPC51: ppc64 is limited to 51-bit addresses. */ @@ -637,10 +690,10 @@ task_terminate_internal( /* release our shared region */ vm_shared_region_set(task, NULL); - mutex_lock(&tasks_threads_lock); + lck_mtx_lock(&tasks_threads_lock); queue_remove(&tasks, task, task_t, tasks); tasks_count--; - mutex_unlock(&tasks_threads_lock); + lck_mtx_unlock(&tasks_threads_lock); /* * We no longer need to guard against being aborted, so restore @@ -661,16 +714,15 @@ task_terminate_internal( } /* - * task_halt: + * task_start_halt: * * Shut the current task down (except for the current thread) in * preparation for dramatic changes to the task (probably exec). - * We hold the task, terminate all other threads in the task and - * wait for them to terminate, clean up the portspace, and when - * all done, let the current thread go. + * We hold the task and mark all other threads in the task for + * termination. */ kern_return_t -task_halt( +task_start_halt( task_t task) { thread_t thread, self; @@ -684,7 +736,7 @@ task_halt( task_lock(task); - if (!task->active || !self->active) { + if (task->halting || !task->active || !self->active) { /* * Task or current thread is already being terminated. * Hurry up and return out of the current kernel context @@ -696,7 +748,10 @@ task_halt( return (KERN_FAILURE); } + task->halting = TRUE; + if (task->thread_count > 1) { + /* * Mark all the threads to keep them from starting any more * user-level execution. The thread_terminate_internal code @@ -715,15 +770,47 @@ task_halt( task_release_locked(task); } + task_unlock(task); + return KERN_SUCCESS; +} + + +/* + * task_complete_halt: + * + * Complete task halt by waiting for threads to terminate, then clean + * up task resources (VM, port namespace, etc...) and then let the + * current thread go in the (practically empty) task context. + */ +void +task_complete_halt(task_t task) +{ + task_lock(task); + assert(task->halting); + assert(task == current_task()); /* * Give the machine dependent code a chance - * to perform cleanup before ripping apart - * the task. + * to perform cleanup of task-level resources + * associated with the current thread before + * ripping apart the task. + * + * This must be done with the task locked. */ machine_thread_terminate_self(); - task_unlock(task); + /* + * Wait for the other threads to get shut down. + * When the last other thread is reaped, we'll be + * worken up. + */ + if (task->thread_count > 1) { + assert_wait((event_t)&task->halting, THREAD_UNINT); + task_unlock(task); + thread_block(THREAD_CONTINUE_NULL); + } else { + task_unlock(task); + } /* * Destroy all synchronizers owned by the task. @@ -743,7 +830,7 @@ task_halt( vm_map_remove(task->map, task->map->min_offset, task->map->max_offset, VM_MAP_NO_FLAGS); - return (KERN_SUCCESS); + task->halting = FALSE; } /* @@ -1184,24 +1271,37 @@ task_info( task_info_t task_info_out, mach_msg_type_number_t *task_info_count) { + kern_return_t error = KERN_SUCCESS; + if (task == TASK_NULL) return (KERN_INVALID_ARGUMENT); + task_lock(task); + + if ((task != current_task()) && (!task->active)) { + task_unlock(task); + return (KERN_INVALID_ARGUMENT); + } + switch (flavor) { case TASK_BASIC_INFO_32: case TASK_BASIC2_INFO_32: { task_basic_info_32_t basic_info; - vm_map_t map; + vm_map_t map; + clock_sec_t secs; + clock_usec_t usecs; - if (*task_info_count < TASK_BASIC_INFO_32_COUNT) - return (KERN_INVALID_ARGUMENT); + if (*task_info_count < TASK_BASIC_INFO_32_COUNT) { + error = KERN_INVALID_ARGUMENT; + break; + } basic_info = (task_basic_info_32_t)task_info_out; map = (task == kernel_task)? kernel_map: task->map; - basic_info->virtual_size = CAST_DOWN(vm_offset_t,map->size); + basic_info->virtual_size = (typeof(basic_info->virtual_size))map->size; if (flavor == TASK_BASIC2_INFO_32) { /* * The "BASIC2" flavor gets the maximum resident @@ -1213,18 +1313,19 @@ task_info( } basic_info->resident_size *= PAGE_SIZE; - task_lock(task); basic_info->policy = ((task != kernel_task)? POLICY_TIMESHARE: POLICY_RR); basic_info->suspend_count = task->user_stop_count; - absolutetime_to_microtime(task->total_user_time, - (unsigned *)&basic_info->user_time.seconds, - (unsigned *)&basic_info->user_time.microseconds); - absolutetime_to_microtime(task->total_system_time, - (unsigned *)&basic_info->system_time.seconds, - (unsigned *)&basic_info->system_time.microseconds); - task_unlock(task); + absolutetime_to_microtime(task->total_user_time, &secs, &usecs); + basic_info->user_time.seconds = + (typeof(basic_info->user_time.seconds))secs; + basic_info->user_time.microseconds = usecs; + + absolutetime_to_microtime(task->total_system_time, &secs, &usecs); + basic_info->system_time.seconds = + (typeof(basic_info->system_time.seconds))secs; + basic_info->system_time.microseconds = usecs; *task_info_count = TASK_BASIC_INFO_32_COUNT; break; @@ -1233,10 +1334,14 @@ task_info( case TASK_BASIC_INFO_64: { task_basic_info_64_t basic_info; - vm_map_t map; + vm_map_t map; + clock_sec_t secs; + clock_usec_t usecs; - if (*task_info_count < TASK_BASIC_INFO_64_COUNT) - return (KERN_INVALID_ARGUMENT); + if (*task_info_count < TASK_BASIC_INFO_64_COUNT) { + error = KERN_INVALID_ARGUMENT; + break; + } basic_info = (task_basic_info_64_t)task_info_out; @@ -1246,18 +1351,19 @@ task_info( (mach_vm_size_t)(pmap_resident_count(map->pmap)) * PAGE_SIZE_64; - task_lock(task); basic_info->policy = ((task != kernel_task)? POLICY_TIMESHARE: POLICY_RR); basic_info->suspend_count = task->user_stop_count; - absolutetime_to_microtime(task->total_user_time, - (unsigned *)&basic_info->user_time.seconds, - (unsigned *)&basic_info->user_time.microseconds); - absolutetime_to_microtime(task->total_system_time, - (unsigned *)&basic_info->system_time.seconds, - (unsigned *)&basic_info->system_time.microseconds); - task_unlock(task); + absolutetime_to_microtime(task->total_user_time, &secs, &usecs); + basic_info->user_time.seconds = + (typeof(basic_info->user_time.seconds))secs; + basic_info->user_time.microseconds = usecs; + + absolutetime_to_microtime(task->total_system_time, &secs, &usecs); + basic_info->system_time.seconds = + (typeof(basic_info->system_time.seconds))secs; + basic_info->system_time.microseconds = usecs; *task_info_count = TASK_BASIC_INFO_64_COUNT; break; @@ -1268,8 +1374,10 @@ task_info( register task_thread_times_info_t times_info; register thread_t thread; - if (*task_info_count < TASK_THREAD_TIMES_INFO_COUNT) - return (KERN_INVALID_ARGUMENT); + if (*task_info_count < TASK_THREAD_TIMES_INFO_COUNT) { + error = KERN_INVALID_ARGUMENT; + break; + } times_info = (task_thread_times_info_t) task_info_out; times_info->user_time.seconds = 0; @@ -1277,7 +1385,6 @@ task_info( times_info->system_time.seconds = 0; times_info->system_time.microseconds = 0; - task_lock(task); queue_iterate(&task->threads, thread, thread_t, task_threads) { time_value_t user_time, system_time; @@ -1288,7 +1395,6 @@ task_info( time_value_add(×_info->system_time, &system_time); } - task_unlock(task); *task_info_count = TASK_THREAD_TIMES_INFO_COUNT; break; @@ -1299,13 +1405,14 @@ task_info( task_absolutetime_info_t info; register thread_t thread; - if (*task_info_count < TASK_ABSOLUTETIME_INFO_COUNT) - return (KERN_INVALID_ARGUMENT); + if (*task_info_count < TASK_ABSOLUTETIME_INFO_COUNT) { + error = KERN_INVALID_ARGUMENT; + break; + } info = (task_absolutetime_info_t)task_info_out; info->threads_user = info->threads_system = 0; - task_lock(task); info->total_user = task->total_user_time; info->total_system = task->total_system_time; @@ -1322,20 +1429,36 @@ task_info( info->total_system += tval; } - task_unlock(task); *task_info_count = TASK_ABSOLUTETIME_INFO_COUNT; break; } + case TASK_DYLD_INFO: + { + task_dyld_info_t info; + + if (*task_info_count < TASK_DYLD_INFO_COUNT) { + error = KERN_INVALID_ARGUMENT; + break; + } + info = (task_dyld_info_t)task_info_out; + info->all_image_info_addr = task->all_image_info_addr; + info->all_image_info_size = task->all_image_info_size; + *task_info_count = TASK_DYLD_INFO_COUNT; + break; + } + /* OBSOLETE */ case TASK_SCHED_FIFO_INFO: { - if (*task_info_count < POLICY_FIFO_BASE_COUNT) - return (KERN_INVALID_ARGUMENT); + if (*task_info_count < POLICY_FIFO_BASE_COUNT) { + error = KERN_INVALID_ARGUMENT; + break; + } - return (KERN_INVALID_POLICY); + error = KERN_INVALID_POLICY; } /* OBSOLETE */ @@ -1343,19 +1466,19 @@ task_info( { register policy_rr_base_t rr_base; - if (*task_info_count < POLICY_RR_BASE_COUNT) - return (KERN_INVALID_ARGUMENT); + if (*task_info_count < POLICY_RR_BASE_COUNT) { + error = KERN_INVALID_ARGUMENT; + break; + } rr_base = (policy_rr_base_t) task_info_out; - task_lock(task); if (task != kernel_task) { - task_unlock(task); - return (KERN_INVALID_POLICY); + error = KERN_INVALID_POLICY; + break; } rr_base->base_priority = task->priority; - task_unlock(task); rr_base->quantum = std_quantum_us / 1000; @@ -1368,19 +1491,19 @@ task_info( { register policy_timeshare_base_t ts_base; - if (*task_info_count < POLICY_TIMESHARE_BASE_COUNT) - return (KERN_INVALID_ARGUMENT); + if (*task_info_count < POLICY_TIMESHARE_BASE_COUNT) { + error = KERN_INVALID_ARGUMENT; + break; + } ts_base = (policy_timeshare_base_t) task_info_out; - task_lock(task); if (task == kernel_task) { - task_unlock(task); - return (KERN_INVALID_POLICY); + error = KERN_INVALID_POLICY; + break; } ts_base->base_priority = task->priority; - task_unlock(task); *task_info_count = POLICY_TIMESHARE_BASE_COUNT; break; @@ -1390,14 +1513,14 @@ task_info( { register security_token_t *sec_token_p; - if (*task_info_count < TASK_SECURITY_TOKEN_COUNT) - return (KERN_INVALID_ARGUMENT); + if (*task_info_count < TASK_SECURITY_TOKEN_COUNT) { + error = KERN_INVALID_ARGUMENT; + break; + } sec_token_p = (security_token_t *) task_info_out; - task_lock(task); *sec_token_p = task->sec_token; - task_unlock(task); *task_info_count = TASK_SECURITY_TOKEN_COUNT; break; @@ -1407,33 +1530,34 @@ task_info( { register audit_token_t *audit_token_p; - if (*task_info_count < TASK_AUDIT_TOKEN_COUNT) - return (KERN_INVALID_ARGUMENT); + if (*task_info_count < TASK_AUDIT_TOKEN_COUNT) { + error = KERN_INVALID_ARGUMENT; + break; + } audit_token_p = (audit_token_t *) task_info_out; - task_lock(task); *audit_token_p = task->audit_token; - task_unlock(task); *task_info_count = TASK_AUDIT_TOKEN_COUNT; break; } case TASK_SCHED_INFO: - return (KERN_INVALID_ARGUMENT); + error = KERN_INVALID_ARGUMENT; case TASK_EVENTS_INFO: { register task_events_info_t events_info; register thread_t thread; - if (*task_info_count < TASK_EVENTS_INFO_COUNT) - return (KERN_INVALID_ARGUMENT); + if (*task_info_count < TASK_EVENTS_INFO_COUNT) { + error = KERN_INVALID_ARGUMENT; + break; + } events_info = (task_events_info_t) task_info_out; - task_lock(task); events_info->faults = task->faults; events_info->pageins = task->pageins; @@ -1449,24 +1573,26 @@ task_info( events_info->csw += thread->c_switch; } - task_unlock(task); *task_info_count = TASK_EVENTS_INFO_COUNT; break; } case TASK_AFFINITY_TAG_INFO: { - if (*task_info_count < TASK_AFFINITY_TAG_INFO_COUNT) - return (KERN_INVALID_ARGUMENT); + if (*task_info_count < TASK_AFFINITY_TAG_INFO_COUNT) { + error = KERN_INVALID_ARGUMENT; + break; + } - return task_affinity_info(task, task_info_out, task_info_count); + error = task_affinity_info(task, task_info_out, task_info_count); } default: - return (KERN_INVALID_ARGUMENT); + error = KERN_INVALID_ARGUMENT; } - return (KERN_SUCCESS); + task_unlock(task); + return (error); } void @@ -1530,38 +1656,43 @@ __unused uint32_t *microsecs) { thread_t thread = current_thread(); - uint32_t tdelt, secs; + uint32_t tdelt; + clock_sec_t secs; uint64_t tsum; assert(task == current_task()); assert(task->vtimers & which); - tdelt = secs = 0; + secs = tdelt = 0; switch (which) { case TASK_VTIMER_USER: - tdelt = timer_delta(&thread->user_timer, + tdelt = (uint32_t)timer_delta(&thread->user_timer, &thread->vtimer_user_save); + absolutetime_to_microtime(tdelt, &secs, microsecs); break; case TASK_VTIMER_PROF: tsum = timer_grab(&thread->user_timer); tsum += timer_grab(&thread->system_timer); - tdelt = tsum - thread->vtimer_prof_save; - thread->vtimer_prof_save = tsum; + tdelt = (uint32_t)(tsum - thread->vtimer_prof_save); + absolutetime_to_microtime(tdelt, &secs, microsecs); + /* if the time delta is smaller than a usec, ignore */ + if (*microsecs != 0) + thread->vtimer_prof_save = tsum; break; case TASK_VTIMER_RLIM: tsum = timer_grab(&thread->user_timer); tsum += timer_grab(&thread->system_timer); - tdelt = tsum - thread->vtimer_rlim_save; + tdelt = (uint32_t)(tsum - thread->vtimer_rlim_save); thread->vtimer_rlim_save = tsum; + absolutetime_to_microtime(tdelt, &secs, microsecs); break; } - absolutetime_to_microtime(tdelt, &secs, microsecs); } /* @@ -1706,6 +1837,70 @@ task_synchronizer_destroy_all(task_t task) } } +/* + * Install default (machine-dependent) initial thread state + * on the task. Subsequent thread creation will have this initial + * state set on the thread by machine_thread_inherit_taskwide(). + * Flavors and structures are exactly the same as those to thread_set_state() + */ +kern_return_t +task_set_state( + task_t task, + int flavor, + thread_state_t state, + mach_msg_type_number_t state_count) +{ + kern_return_t ret; + + if (task == TASK_NULL) { + return (KERN_INVALID_ARGUMENT); + } + + task_lock(task); + + if (!task->active) { + task_unlock(task); + return (KERN_FAILURE); + } + + ret = machine_task_set_state(task, flavor, state, state_count); + + task_unlock(task); + return ret; +} + +/* + * Examine the default (machine-dependent) initial thread state + * on the task, as set by task_set_state(). Flavors and structures + * are exactly the same as those passed to thread_get_state(). + */ +kern_return_t +task_get_state( + task_t task, + int flavor, + thread_state_t state, + mach_msg_type_number_t *state_count) +{ + kern_return_t ret; + + if (task == TASK_NULL) { + return (KERN_INVALID_ARGUMENT); + } + + task_lock(task); + + if (!task->active) { + task_unlock(task); + return (KERN_FAILURE); + } + + ret = machine_task_get_state(task, flavor, state, state_count); + + task_unlock(task); + return ret; +} + + /* * We need to export some functions to other components that * are currently implemented in macros within the osfmk @@ -1719,6 +1914,16 @@ boolean_t is_kerneltask(task_t t) return (FALSE); } +int +check_for_tasksuspend(task_t task) +{ + + if (task == TASK_NULL) + return (0); + + return (task->suspend_count > 0); +} + #undef current_task task_t current_task(void); task_t current_task(void) diff --git a/osfmk/kern/task.h b/osfmk/kern/task.h index d0cdc4aaa..0e7ea86e2 100644 --- a/osfmk/kern/task.h +++ b/osfmk/kern/task.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2008 Apple Inc. All rights reserved. + * Copyright (c) 2000-2009 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -116,22 +116,25 @@ struct task { /* Synchronization/destruction information */ - decl_mutex_data(,lock) /* Task's lock */ + decl_lck_mtx_data(,lock) /* Task's lock */ uint32_t ref_count; /* Number of references to me */ boolean_t active; /* Task has not been terminated */ + boolean_t halting; /* Task is being halted */ /* Miscellaneous */ vm_map_t map; /* Address space description */ queue_chain_t tasks; /* global list of tasks */ void *user_data; /* Arbitrary data settable via IPC */ - int suspend_count; /* Internal scheduling only */ /* Threads in this task */ queue_head_t threads; + + processor_set_t pset_hint; + struct affinity_space *affinity_space; + int thread_count; uint32_t active_thread_count; - processor_set_t pset_hint; - struct affinity_space *affinity_space; + int suspend_count; /* Internal scheduling only */ /* User-visible scheduling information */ integer_t user_stop_count; /* outstanding stops */ @@ -153,7 +156,7 @@ struct task { uint32_t vtimers; /* IPC structures */ - decl_mutex_data(,itk_lock_data) + decl_lck_mtx_data(,itk_lock_data) struct ipc_port *itk_self; /* not a right, doesn't hold ref */ struct ipc_port *itk_nself; /* not a right, doesn't hold ref */ struct ipc_port *itk_sself; /* a send right */ @@ -164,7 +167,6 @@ struct task { struct ipc_port *itk_seatbelt; /* a send right */ struct ipc_port *itk_gssd; /* yet another send right */ struct ipc_port *itk_task_access; /* and another send right */ - struct ipc_port *itk_automountd;/* a send right */ struct ipc_port *itk_registered[TASK_PORT_REGISTER_MAX]; /* all send rights */ @@ -208,15 +210,22 @@ struct task { #define task_clear_64BitAddr(task) \ ((task)->taskFeatures[0] &= ~tf64BitAddr) + mach_vm_address_t all_image_info_addr; /* dyld __all_image_info */ + mach_vm_size_t all_image_info_size; /* section location and size */ #if CONFIG_MACF_MACH ipc_labelh_t label; #endif +#if CONFIG_COUNTERS +#define TASK_PMC_FLAG 0x1 /* Bit in "t_chud" signifying PMC interest */ + uint32_t t_chud; /* CHUD flags, used for Shark */ +#endif + }; -#define task_lock(task) mutex_lock(&(task)->lock) -#define task_lock_try(task) mutex_try(&(task)->lock) -#define task_unlock(task) mutex_unlock(&(task)->lock) +#define task_lock(task) lck_mtx_lock(&(task)->lock) +#define task_lock_try(task) lck_mtx_try_lock(&(task)->lock) +#define task_unlock(task) lck_mtx_unlock(&(task)->lock) #if CONFIG_MACF_MACH #define maclabel label->lh_label @@ -228,9 +237,10 @@ extern void tasklabel_lock2(task_t a, task_t b); extern void tasklabel_unlock2(task_t a, task_t b); #endif /* MAC_MACH */ -#define itk_lock_init(task) mutex_init(&(task)->itk_lock_data, 0) -#define itk_lock(task) mutex_lock(&(task)->itk_lock_data) -#define itk_unlock(task) mutex_unlock(&(task)->itk_lock_data) +#define itk_lock_init(task) lck_mtx_init(&(task)->itk_lock_data, &ipc_lck_grp, &ipc_lck_attr) +#define itk_lock_destroy(task) lck_mtx_destroy(&(task)->itk_lock_data, &ipc_lck_grp) +#define itk_lock(task) lck_mtx_lock(&(task)->itk_lock_data) +#define itk_unlock(task) lck_mtx_unlock(&(task)->itk_lock_data) #define task_reference_internal(task) \ (void)hw_atomic_add(&(task)->ref_count, 1) @@ -256,6 +266,9 @@ extern void task_init(void) __attribute__((section("__TEXT, initcode"))); #define current_task_fast() (current_thread()->task) #define current_task() current_task_fast() +extern lck_attr_t task_lck_attr; +extern lck_grp_t task_lck_grp; + #else /* MACH_KERNEL_PRIVATE */ __BEGIN_DECLS @@ -281,7 +294,11 @@ extern kern_return_t task_release( task_t task); /* Halt all other threads in the current task */ -extern kern_return_t task_halt( +extern kern_return_t task_start_halt( + task_t task); + +/* Wait for other threads to halt and free halting task resources */ +extern void task_complete_halt( task_t task); extern kern_return_t task_terminate_internal( @@ -321,23 +338,41 @@ extern void task_set_64bit( extern void task_backing_store_privileged( task_t task); -extern int get_task_numactivethreads( - task_t task); +extern void task_set_dyld_info( + task_t task, + mach_vm_address_t addr, + mach_vm_size_t size); + /* Get number of activations in a task */ extern int get_task_numacts( task_t task); +extern int get_task_numactivethreads(task_t task); /* JMM - should just be temporary (implementation in bsd_kern still) */ extern void set_bsdtask_info(task_t,void *); extern vm_map_t get_task_map_reference(task_t); -extern vm_map_t swap_task_map(task_t, vm_map_t); +extern vm_map_t swap_task_map(task_t, thread_t, vm_map_t); extern pmap_t get_task_pmap(task_t); +extern uint64_t get_task_resident_size(task_t); extern boolean_t is_kerneltask(task_t task); extern kern_return_t check_actforsig(task_t task, thread_t thread, int setast); +extern kern_return_t machine_task_get_state( + task_t task, + int flavor, + thread_state_t state, + mach_msg_type_number_t *state_count); + +extern kern_return_t machine_task_set_state( + task_t task, + int flavor, + thread_state_t state, + mach_msg_type_number_t state_count); + + #endif /* XNU_KERNEL_PRIVATE */ #ifdef KERNEL_PRIVATE diff --git a/osfmk/kern/thread.c b/osfmk/kern/thread.c index 9e4360a73..b33a7d2be 100644 --- a/osfmk/kern/thread.c +++ b/osfmk/kern/thread.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2007 Apple Inc. All rights reserved. + * Copyright (c) 2000-2009 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -132,6 +132,9 @@ #include static struct zone *thread_zone; +static lck_grp_attr_t thread_lck_grp_attr; +lck_attr_t thread_lck_attr; +lck_grp_t thread_lck_grp; decl_simple_lock_data(static,thread_stack_lock) static queue_head_t thread_stack_queue; @@ -144,10 +147,17 @@ static struct thread thread_template, init_thread; static void sched_call_null( int type, thread_t thread); + #ifdef MACH_BSD extern void proc_exit(void *); +extern uint64_t get_dispatchqueue_offset_from_proc(void *); #endif /* MACH_BSD */ + extern int debug_task; +int thread_max = CONFIG_THREAD_MAX; /* Max number of threads */ +int task_threadmax = CONFIG_THREAD_MAX; + +static uint64_t thread_unique_id = 0; void thread_bootstrap(void) @@ -200,7 +210,6 @@ thread_bootstrap(void) thread_template.bound_processor = PROCESSOR_NULL; thread_template.last_processor = PROCESSOR_NULL; - thread_template.last_switch = 0; thread_template.sched_call = sched_call_null; @@ -231,8 +240,10 @@ thread_bootstrap(void) thread_template.t_dtrace_vtime = 0; thread_template.t_dtrace_tracing = 0; #endif /* CONFIG_DTRACE */ - + thread_template.t_chud = 0; + thread_template.t_page_creation_count = 0; + thread_template.t_page_creation_time = 0; thread_template.affinity_set = NULL; @@ -245,10 +256,14 @@ thread_init(void) { thread_zone = zinit( sizeof(struct thread), - THREAD_MAX * sizeof(struct thread), + thread_max * sizeof(struct thread), THREAD_CHUNK * sizeof(struct thread), "threads"); - + + lck_grp_attr_setdefault(&thread_lck_grp_attr); + lck_grp_init(&thread_lck_grp, "thread", &thread_lck_grp_attr); + lck_attr_setdefault(&thread_lck_attr); + stack_init(); /* @@ -274,7 +289,9 @@ thread_terminate_self(void) thread_t thread = current_thread(); task_t task; spl_t s; - int lastthread = 0; + int threadcnt; + + DTRACE_PROC(lwp__exit); thread_mtx_lock(thread); @@ -308,23 +325,22 @@ thread_terminate_self(void) thread_lock(thread); } + thread_sched_call(thread, NULL); + thread_unlock(thread); splx(s); thread_policy_reset(thread); + task = thread->task; + uthread_cleanup(task, thread->uthread, task->bsd_info); + threadcnt = hw_atomic_sub(&task->active_thread_count, 1); + /* * If we are the last thread to terminate and the task is * associated with a BSD process, perform BSD process exit. */ - task = thread->task; - uthread_cleanup(task, thread->uthread, task->bsd_info); - if (hw_atomic_sub(&task->active_thread_count, 1) == 0 && - task->bsd_info != NULL) { - lastthread = 1; - } - - if (lastthread != 0) + if (threadcnt == 0 && task->bsd_info != NULL) proc_exit(task->bsd_info); uthread_cred_free(thread->uthread); @@ -405,6 +421,7 @@ thread_deallocate( if (thread->kernel_stack != 0) stack_free(thread); + lck_mtx_destroy(&thread->mutex, &thread_lck_grp); machine_thread_destroy(thread); zfree(thread_zone, thread); @@ -440,12 +457,20 @@ thread_terminate_daemon(void) queue_remove(&task->threads, thread, thread_t, task_threads); task->thread_count--; + + /* + * If the task is being halted, and there is only one thread + * left in the task after this one, then wakeup that thread. + */ + if (task->thread_count == 1 && task->halting) + thread_wakeup((event_t)&task->halting); + task_unlock(task); - mutex_lock(&tasks_threads_lock); + lck_mtx_lock(&tasks_threads_lock); queue_remove(&threads, thread, thread_t, threads); threads_count--; - mutex_unlock(&tasks_threads_lock); + lck_mtx_unlock(&tasks_threads_lock); thread_deallocate(thread); @@ -565,6 +590,10 @@ thread_create_internal( task_t parent_task, integer_t priority, thread_continue_t continuation, + int options, +#define TH_OPTION_NONE 0x00 +#define TH_OPTION_NOCRED 0x01 +#define TH_OPTION_NOSUSP 0x02 thread_t *out_thread) { thread_t new_thread; @@ -573,38 +602,35 @@ thread_create_internal( /* * Allocate a thread and initialize static fields */ - if (first_thread == NULL) + if (first_thread == THREAD_NULL) new_thread = first_thread = current_thread(); else new_thread = (thread_t)zalloc(thread_zone); - if (new_thread == NULL) + if (new_thread == THREAD_NULL) return (KERN_RESOURCE_SHORTAGE); if (new_thread != first_thread) *new_thread = thread_template; #ifdef MACH_BSD - { - new_thread->uthread = uthread_alloc(parent_task, new_thread); - if (new_thread->uthread == NULL) { - zfree(thread_zone, new_thread); - return (KERN_RESOURCE_SHORTAGE); - } + new_thread->uthread = uthread_alloc(parent_task, new_thread, (options & TH_OPTION_NOCRED) != 0); + if (new_thread->uthread == NULL) { + zfree(thread_zone, new_thread); + return (KERN_RESOURCE_SHORTAGE); } #endif /* MACH_BSD */ if (machine_thread_create(new_thread, parent_task) != KERN_SUCCESS) { #ifdef MACH_BSD - { - void *ut = new_thread->uthread; + void *ut = new_thread->uthread; - new_thread->uthread = NULL; - /* cred free may not be necessary */ - uthread_cleanup(parent_task, ut, parent_task->bsd_info); - uthread_cred_free(ut); - uthread_zone_free(ut); - } + new_thread->uthread = NULL; + /* cred free may not be necessary */ + uthread_cleanup(parent_task, ut, parent_task->bsd_info); + uthread_cred_free(ut); + uthread_zone_free(ut); #endif /* MACH_BSD */ + zfree(thread_zone, new_thread); return (KERN_FAILURE); } @@ -614,21 +640,23 @@ thread_create_internal( thread_lock_init(new_thread); wake_lock_init(new_thread); - mutex_init(&new_thread->mutex, 0); + lck_mtx_init(&new_thread->mutex, &thread_lck_grp, &thread_lck_attr); ipc_thread_init(new_thread); queue_init(&new_thread->held_ulocks); new_thread->continuation = continuation; - mutex_lock(&tasks_threads_lock); + lck_mtx_lock(&tasks_threads_lock); task_lock(parent_task); - if ( !parent_task->active || - (parent_task->thread_count >= THREAD_MAX && - parent_task != kernel_task)) { + if ( !parent_task->active || parent_task->halting || + ((options & TH_OPTION_NOSUSP) != 0 && + parent_task->suspend_count > 0) || + (parent_task->thread_count >= task_threadmax && + parent_task != kernel_task) ) { task_unlock(parent_task); - mutex_unlock(&tasks_threads_lock); + lck_mtx_unlock(&tasks_threads_lock); #ifdef MACH_BSD { @@ -643,11 +671,15 @@ thread_create_internal( #endif /* MACH_BSD */ ipc_thread_disable(new_thread); ipc_thread_terminate(new_thread); + lck_mtx_destroy(&new_thread->mutex, &thread_lck_grp); machine_thread_destroy(new_thread); zfree(thread_zone, new_thread); return (KERN_FAILURE); } + /* New threads inherit any default state on the task */ + machine_thread_inherit_taskwide(new_thread, parent_task); + task_reference_internal(parent_task); /* Cache the task's map */ @@ -660,12 +692,24 @@ thread_create_internal( /* So terminating threads don't need to take the task lock to decrement */ hw_atomic_add(&parent_task->active_thread_count, 1); + /* Protected by the tasks_threads_lock */ + new_thread->thread_id = ++thread_unique_id; + queue_enter(&threads, new_thread, thread_t, threads); threads_count++; timer_call_setup(&new_thread->wait_timer, thread_timer_expire, new_thread); timer_call_setup(&new_thread->depress_timer, thread_depress_expire, new_thread); +#if CONFIG_COUNTERS + /* + * If parent task has any reservations, they need to be propagated to this + * thread. + */ + new_thread->t_chud = (TASK_PMC_FLAG == (parent_task->t_chud & TASK_PMC_FLAG)) ? + THREAD_PMC_FLAG : 0U; +#endif + /* Set the thread's scheduling parameters */ if (parent_task != kernel_task) new_thread->sched_mode |= TH_MODE_TIMESHARE; @@ -691,7 +735,7 @@ thread_create_internal( KERNEL_DEBUG_CONSTANT( TRACEDBG_CODE(DBG_TRACE_DATA, 1) | DBG_FUNC_NONE, - (vm_address_t)new_thread, dbg_arg2, 0, 0, 0); + (vm_address_t)(uintptr_t)thread_tid(new_thread), dbg_arg2, 0, 0, 0); kdbg_trace_string(parent_task->bsd_info, &dbg_arg1, &dbg_arg2, &dbg_arg3, &dbg_arg4); @@ -717,7 +761,7 @@ thread_create( if (task == TASK_NULL || task == kernel_task) return (KERN_INVALID_ARGUMENT); - result = thread_create_internal(task, -1, (thread_continue_t)thread_bootstrap_return, &thread); + result = thread_create_internal(task, -1, (thread_continue_t)thread_bootstrap_return, TH_OPTION_NONE, &thread); if (result != KERN_SUCCESS) return (result); @@ -727,7 +771,7 @@ thread_create( thread_hold(thread); task_unlock(task); - mutex_unlock(&tasks_threads_lock); + lck_mtx_unlock(&tasks_threads_lock); *new_thread = thread; @@ -748,7 +792,7 @@ thread_create_running( if (task == TASK_NULL || task == kernel_task) return (KERN_INVALID_ARGUMENT); - result = thread_create_internal(task, -1, (thread_continue_t)thread_bootstrap_return, &thread); + result = thread_create_internal(task, -1, (thread_continue_t)thread_bootstrap_return, TH_OPTION_NONE, &thread); if (result != KERN_SUCCESS) return (result); @@ -756,7 +800,7 @@ thread_create_running( thread, flavor, new_state, new_state_count); if (result != KERN_SUCCESS) { task_unlock(task); - mutex_unlock(&tasks_threads_lock); + lck_mtx_unlock(&tasks_threads_lock); thread_terminate(thread); thread_deallocate(thread); @@ -768,13 +812,42 @@ thread_create_running( thread_mtx_unlock(thread); task_unlock(task); - mutex_unlock(&tasks_threads_lock); + lck_mtx_unlock(&tasks_threads_lock); *new_thread = thread; return (result); } +kern_return_t +thread_create_workq( + task_t task, + thread_t *new_thread) +{ + kern_return_t result; + thread_t thread; + + if (task == TASK_NULL || task == kernel_task) + return (KERN_INVALID_ARGUMENT); + + result = thread_create_internal(task, -1, (thread_continue_t)thread_bootstrap_return, + TH_OPTION_NOCRED | TH_OPTION_NOSUSP, &thread); + if (result != KERN_SUCCESS) + return (result); + + thread->user_stop_count = 1; + thread_hold(thread); + if (task->suspend_count > 0) + thread_hold(thread); + + task_unlock(task); + lck_mtx_unlock(&tasks_threads_lock); + + *new_thread = thread; + + return (KERN_SUCCESS); +} + /* * kernel_thread_create: * @@ -792,12 +865,12 @@ kernel_thread_create( thread_t thread; task_t task = kernel_task; - result = thread_create_internal(task, priority, continuation, &thread); + result = thread_create_internal(task, priority, continuation, TH_OPTION_NONE, &thread); if (result != KERN_SUCCESS) return (result); task_unlock(task); - mutex_unlock(&tasks_threads_lock); + lck_mtx_unlock(&tasks_threads_lock); stack_alloc(thread); assert(thread->kernel_stack != 0); @@ -829,12 +902,12 @@ kernel_thread_start_priority( if (result != KERN_SUCCESS) return (result); + *new_thread = thread; + thread_mtx_lock(thread); thread_start_internal(thread); thread_mtx_unlock(thread); - *new_thread = thread; - return (result); } @@ -847,6 +920,8 @@ kernel_thread_start( return kernel_thread_start_priority(continuation, parameter, -1, new_thread); } +#ifndef __LP64__ + thread_t kernel_thread( task_t task, @@ -867,6 +942,8 @@ kernel_thread( return (thread); } +#endif /* __LP64__ */ + kern_return_t thread_info_internal( register thread_t thread, @@ -909,8 +986,8 @@ thread_info_internal( * then for 5/8 ageing. The correction factor [3/5] is * (1/(5/8) - 1). */ - basic_info->cpu_usage = ((uint64_t)thread->cpu_usage - * TH_USAGE_SCALE) / sched_tick_interval; + basic_info->cpu_usage = (integer_t)(((uint64_t)thread->cpu_usage + * TH_USAGE_SCALE) / sched_tick_interval); basic_info->cpu_usage = (basic_info->cpu_usage * 3) / 5; if (basic_info->cpu_usage > TH_USAGE_SCALE) @@ -955,6 +1032,36 @@ thread_info_internal( return (KERN_SUCCESS); } else + if (flavor == THREAD_IDENTIFIER_INFO) { + register thread_identifier_info_t identifier_info; + + if (*thread_info_count < THREAD_IDENTIFIER_INFO_COUNT) + return (KERN_INVALID_ARGUMENT); + + identifier_info = (thread_identifier_info_t) thread_info_out; + + s = splsched(); + thread_lock(thread); + + identifier_info->thread_id = thread->thread_id; +#if defined(__ppc__) || defined(__arm__) + identifier_info->thread_handle = thread->machine.cthread_self; +#else + identifier_info->thread_handle = thread->machine.pcb->cthread_self; +#endif + if(thread->task->bsd_info) { + identifier_info->dispatch_qaddr = identifier_info->thread_handle + get_dispatchqueue_offset_from_proc(thread->task->bsd_info); + } else { + thread_unlock(thread); + splx(s); + return KERN_INVALID_ARGUMENT; + } + + thread_unlock(thread); + splx(s); + return KERN_SUCCESS; + } + else if (flavor == THREAD_SCHED_TIMESHARE_INFO) { policy_timeshare_info_t ts_info; @@ -1049,13 +1156,16 @@ thread_read_times( time_value_t *user_time, time_value_t *system_time) { - absolutetime_to_microtime(timer_grab(&thread->user_timer), - (unsigned *)&user_time->seconds, - (unsigned *)&user_time->microseconds); + clock_sec_t secs; + clock_usec_t usecs; + + absolutetime_to_microtime(timer_grab(&thread->user_timer), &secs, &usecs); + user_time->seconds = (typeof(user_time->seconds))secs; + user_time->microseconds = usecs; - absolutetime_to_microtime(timer_grab(&thread->system_timer), - (unsigned *)&system_time->seconds, - (unsigned *)&system_time->microseconds); + absolutetime_to_microtime(timer_grab(&thread->system_timer), &secs, &usecs); + system_time->seconds = (typeof(system_time->seconds))secs; + system_time->microseconds = usecs; } kern_return_t @@ -1290,6 +1400,34 @@ thread_static_param( thread_mtx_unlock(thread); } +uint64_t +thread_tid( + thread_t thread) +{ + return (thread != THREAD_NULL? thread->thread_id: 0); +} + +uint64_t +thread_dispatchqaddr( + thread_t thread) +{ + uint64_t dispatchqueue_addr = 0; + uint64_t thread_handle = 0; + + if (thread != THREAD_NULL) { +#if defined(__ppc__) || defined(__arm__) + thread_handle = thread->machine.cthread_self; +#else + thread_handle = thread->machine.pcb->cthread_self; +#endif + + if (thread->task->bsd_info) + dispatchqueue_addr = thread_handle + get_dispatchqueue_offset_from_proc(thread->task->bsd_info); + } + + return (dispatchqueue_addr); +} + /* * Export routines to other components for things that are done as macros * within the osfmk component. @@ -1416,4 +1554,13 @@ vm_offset_t dtrace_set_thread_recover(thread_t thread, vm_offset_t recover) return prev; } +void dtrace_thread_bootstrap(void) +{ + task_t task = current_task(); + if(task->thread_count == 1) { + DTRACE_PROC(start); + } + DTRACE_PROC(lwp__start); + +} #endif /* CONFIG_DTRACE */ diff --git a/osfmk/kern/thread.h b/osfmk/kern/thread.h index 4cca24656..61217f52e 100644 --- a/osfmk/kern/thread.h +++ b/osfmk/kern/thread.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2007 Apple Inc. All rights reserved. + * Copyright (c) 2000-2009 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -221,7 +221,6 @@ struct thread { timer_data_t system_timer; /* system mode timer */ processor_t bound_processor; /* bound to a processor? */ processor_t last_processor; /* processor last dispatched on */ - uint64_t last_switch; /* time of last context switch */ /* Fail-safe computation since last unblock or qualifying yield */ uint64_t computation_metered; @@ -277,6 +276,7 @@ struct thread { mach_msg_size_t msize; /* max size for recvd msg */ mach_msg_option_t option; /* options for receive */ mach_msg_size_t slist_size; /* scatter list size */ + mach_port_name_t receiver_name; /* the receive port name */ struct ipc_kmsg *kmsg; /* received message */ mach_port_seqno_t seqno; /* seqno of recvd message */ mach_msg_continue_t continuation; @@ -299,10 +299,10 @@ struct thread { mach_port_t ith_rpc_reply; /* reply port for kernel RPCs */ /* Ast/Halt data structures */ - vm_offset_t recover; /* page fault recover(copyin/out) */ + vm_offset_t recover; /* page fault recover(copyin/out) */ uint32_t ref_count; /* number of references to me */ - queue_chain_t threads; /* global list of all threads */ + queue_chain_t threads; /* global list of all threads */ /* Activation */ queue_chain_t task_threads; @@ -314,7 +314,7 @@ struct thread { struct task *task; vm_map_t map; - decl_mutex_data(,mutex) + decl_lck_mtx_data(,mutex) /* Kernel holds on this thread */ int suspend_count; @@ -357,7 +357,15 @@ struct thread { int64_t t_dtrace_tracing; /* Thread time under dtrace_probe() */ int64_t t_dtrace_vtime; #endif + +#define T_CHUD_MARKED 0x1 /* this thread is marked by CHUD */ +#define T_IN_CHUD 0x2 /* this thread is already in a CHUD handler */ +#define THREAD_PMC_FLAG 0x4 /* Bit in "t_chud" signifying PMC interest */ + uint32_t t_page_creation_count; + clock_sec_t t_page_creation_time; + uint32_t t_chud; /* CHUD flags, used for Shark */ + uint64_t thread_id; /*system wide unique thread-id*/ }; #define ith_state saved.receive.state @@ -366,6 +374,7 @@ struct thread { #define ith_msize saved.receive.msize #define ith_option saved.receive.option #define ith_scatter_list_size saved.receive.slist_size +#define ith_receiver_name saved.receive.receiver_name #define ith_continuation saved.receive.continuation #define ith_kmsg saved.receive.kmsg #define ith_seqno saved.receive.seqno @@ -417,6 +426,7 @@ extern void thread_hold( extern void thread_release( thread_t thread); + #define thread_lock_init(th) simple_lock_init(&(th)->sched_lock, 0) #define thread_lock(th) simple_lock(&(th)->sched_lock) #define thread_unlock(th) simple_unlock(&(th)->sched_lock) @@ -544,6 +554,9 @@ extern kern_return_t machine_thread_get_kern_state( thread_state_t tstate, mach_msg_type_number_t *count); +extern kern_return_t machine_thread_inherit_taskwide( + thread_t thread, + task_t parent_task); /* * XXX Funnel locks XXX @@ -559,9 +572,9 @@ struct funnel_lock { typedef struct ReturnHandler ReturnHandler; -#define thread_mtx_lock(thread) mutex_lock(&(thread)->mutex) -#define thread_mtx_try(thread) mutex_try(&(thread)->mutex) -#define thread_mtx_unlock(thread) mutex_unlock(&(thread)->mutex) +#define thread_mtx_lock(thread) lck_mtx_lock(&(thread)->mutex) +#define thread_mtx_try(thread) lck_mtx_try_lock(&(thread)->mutex) +#define thread_mtx_unlock(thread) lck_mtx_unlock(&(thread)->mutex) extern void act_execute_returnhandlers(void); @@ -574,6 +587,15 @@ extern void special_handler( void act_machine_sv_free(thread_t, int); +vm_offset_t min_valid_stack_address(void); +vm_offset_t max_valid_stack_address(void); + +extern void funnel_lock( + struct funnel_lock *lock); + +extern void funnel_unlock( + struct funnel_lock *lock); + #else /* MACH_KERNEL_PRIVATE */ __BEGIN_DECLS @@ -592,33 +614,22 @@ __END_DECLS #ifdef KERNEL_PRIVATE -typedef struct funnel_lock funnel_t; - -#ifdef MACH_KERNEL_PRIVATE - -extern void funnel_lock( - funnel_t *lock); - -extern void funnel_unlock( - funnel_t *lock); - -vm_offset_t min_valid_stack_address(void); -vm_offset_t max_valid_stack_address(void); - -#endif /* MACH_KERNEL_PRIVATE */ - __BEGIN_DECLS -extern funnel_t *thread_funnel_get(void); - -extern boolean_t thread_funnel_set( - funnel_t *lock, - boolean_t funneled); +#ifndef __LP64__ extern thread_t kernel_thread( task_t task, void (*start)(void)); +#endif /* __LP64__ */ + +extern uint64_t thread_tid( + thread_t thread); + +extern uint64_t thread_dispatchqaddr( + thread_t thread); + __END_DECLS #endif /* KERNEL_PRIVATE */ @@ -627,12 +638,14 @@ __BEGIN_DECLS #ifdef XNU_KERNEL_PRIVATE +extern kern_return_t thread_create_workq( + task_t task, + thread_t *new_thread); + extern void thread_yield_internal( mach_msg_timeout_t interval); -/* - * XXX Funnel locks XXX - */ +typedef struct funnel_lock funnel_t; #define THR_FUNNEL_NULL (funnel_t *)0 @@ -642,6 +655,12 @@ extern funnel_t *funnel_alloc( extern void funnel_free( funnel_t *lock); +extern funnel_t *thread_funnel_get(void); + +extern boolean_t thread_funnel_set( + funnel_t *lock, + boolean_t funneled); + extern void thread_read_times( thread_t thread, time_value_t *user_time, @@ -686,6 +705,13 @@ extern void thread_static_param( thread_t thread, boolean_t state); +extern kern_return_t thread_policy_set_internal( + thread_t thread, + thread_policy_flavor_t flavor, + thread_policy_t policy_info, + mach_msg_type_number_t count); + + extern task_t get_threadtask(thread_t); #define thread_is_64bit(thd) \ task_has_64BitAddr(get_threadtask(thd)) @@ -693,7 +719,7 @@ extern task_t get_threadtask(thread_t); extern void *get_bsdthread_info(thread_t); extern void set_bsdthread_info(thread_t, void *); -extern void *uthread_alloc(task_t, thread_t); +extern void *uthread_alloc(task_t, thread_t, int); extern void uthread_cleanup(task_t, void *, void *); extern void uthread_zone_free(void *); extern void uthread_cred_free(void *); @@ -715,6 +741,7 @@ extern void dtrace_set_thread_vtime(thread_t, int64_t); extern void dtrace_set_thread_tracing(thread_t, int64_t); extern void dtrace_set_thread_reentering(thread_t, boolean_t); extern vm_offset_t dtrace_set_thread_recover(thread_t, vm_offset_t); +extern void dtrace_thread_bootstrap(void); extern int64_t dtrace_calc_thread_recent_vtime(thread_t); @@ -727,8 +754,21 @@ extern void thread_set_wq_state64( thread_t thread, thread_state_t tstate); +extern vm_offset_t kernel_stack_mask; +extern vm_offset_t kernel_stack_size; +extern vm_offset_t kernel_stack_depth_max; + #endif /* XNU_KERNEL_PRIVATE */ +/*! @function kernel_thread_start + @abstract Create a kernel thread. + @discussion This function takes three input parameters, namely reference to the function that the thread should execute, caller specified data and a reference which is used to return the newly created kernel thread. The function returns KERN_SUCCESS on success or an appropriate kernel code type indicating the error. It may be noted that the caller is responsible for explicitly releasing the reference to the created thread when no longer needed. This should be done by calling thread_deallocate(new_thread). + @param continuation A C-function pointer where the thread will begin execution. + @param parameter Caller specified data to be passed to the new thread. + @param new_thread Reference to the new thread is returned in this parameter. + @result Returns KERN_SUCCESS on success or an appropriate kernel code type. +*/ + extern kern_return_t kernel_thread_start( thread_continue_t continuation, void *parameter, diff --git a/osfmk/kern/thread_act.c b/osfmk/kern/thread_act.c index 4fcb5f957..8c18ffc30 100644 --- a/osfmk/kern/thread_act.c +++ b/osfmk/kern/thread_act.c @@ -77,8 +77,6 @@ #include -#include - void act_abort(thread_t); void install_special_handler_locked(thread_t); void special_handler_continue(void); @@ -87,9 +85,12 @@ void special_handler_continue(void); * Internal routine to mark a thread as started. * Always called with the thread locked. * - * Note: function intentionall declared with the noinline attribute to + * Note: function intentionally declared with the noinline attribute to * prevent multiple declaration of probe symbols in this file; we would * prefer "#pragma noinline", but gcc does not support it. + * PR-6385749 -- the lwp-start probe should fire from within the context + * of the newly created thread. Commented out for now, in case we + * turn it into a dead code probe. */ void thread_start_internal( @@ -97,7 +98,7 @@ thread_start_internal( { clear_wait(thread, THREAD_AWAKENED); thread->started = TRUE; - DTRACE_PROC1(lwp__start, thread_t, thread); + // DTRACE_PROC1(lwp__start, thread_t, thread); } /* @@ -110,8 +111,6 @@ thread_terminate_internal( { kern_return_t result = KERN_SUCCESS; - DTRACE_PROC(lwp__exit); - thread_mtx_lock(thread); if (thread->active) { diff --git a/osfmk/kern/thread_call.c b/osfmk/kern/thread_call.c index 7ae31523c..ab9bab486 100644 --- a/osfmk/kern/thread_call.c +++ b/osfmk/kern/thread_call.c @@ -400,6 +400,8 @@ _remove_from_delayed_queue( return (call_removed); } +#ifndef __LP64__ + /* * thread_call_func: * @@ -448,6 +450,8 @@ thread_call_func( splx(s); } +#endif /* __LP64__ */ + /* * thread_call_func_delayed: * @@ -712,6 +716,8 @@ thread_call_cancel( return (result); } +#ifndef __LP64__ + /* * thread_call_is_delayed: * @@ -744,6 +750,8 @@ thread_call_is_delayed( return (result); } +#endif /* __LP64__ */ + /* * thread_call_wake: * @@ -832,7 +840,7 @@ thread_call_thread( KERNEL_DEBUG_CONSTANT( MACHDBG_CODE(DBG_MACH_SCHED,MACH_CALLOUT) | DBG_FUNC_NONE, - (int)func, (int)param0, (int)param1, 0, 0); + func, param0, param1, 0, 0); (*func)(param0, param1); diff --git a/osfmk/kern/thread_call.h b/osfmk/kern/thread_call.h index fbfa0fb2d..aa38f0dda 100644 --- a/osfmk/kern/thread_call.h +++ b/osfmk/kern/thread_call.h @@ -96,6 +96,8 @@ __BEGIN_DECLS * Obsolete interfaces. */ +#ifndef __LP64__ + extern boolean_t thread_call_is_delayed( thread_call_t call, uint64_t *deadline); @@ -115,8 +117,28 @@ extern boolean_t thread_call_func_cancel( thread_call_param_t param, boolean_t cancel_all); +#else /* __LP64__ */ + +#ifdef XNU_KERNEL_PRIVATE + +extern void thread_call_func_delayed( + thread_call_func_t func, + thread_call_param_t param, + uint64_t deadline); + +extern boolean_t thread_call_func_cancel( + thread_call_func_t func, + thread_call_param_t param, + boolean_t cancel_all); + +#endif /* XNU_KERNEL_PRIVATE */ + +#endif /* __LP64__ */ + #ifndef MACH_KERNEL_PRIVATE +#ifndef __LP64__ + #ifndef ABSOLUTETIME_SCALAR_TYPE #define thread_call_enter_delayed(a, b) \ @@ -133,6 +155,8 @@ extern boolean_t thread_call_func_cancel( #endif /* ABSOLUTETIME_SCALAR_TYPE */ +#endif /* __LP64__ */ + #endif /* MACH_KERNEL_PRIVATE */ __END_DECLS diff --git a/osfmk/kern/thread_policy.c b/osfmk/kern/thread_policy.c index 45cd6ef24..58028df2d 100644 --- a/osfmk/kern/thread_policy.c +++ b/osfmk/kern/thread_policy.c @@ -38,6 +38,8 @@ static void thread_recompute_priority( thread_t thread); + + kern_return_t thread_policy_set( thread_t thread, @@ -45,25 +47,32 @@ thread_policy_set( thread_policy_t policy_info, mach_msg_type_number_t count) { - kern_return_t result = KERN_SUCCESS; - spl_t s; if (thread == THREAD_NULL) return (KERN_INVALID_ARGUMENT); + if (thread->static_param) + return (KERN_SUCCESS); + + return (thread_policy_set_internal(thread, flavor, policy_info, count)); +} + +kern_return_t +thread_policy_set_internal( + thread_t thread, + thread_policy_flavor_t flavor, + thread_policy_t policy_info, + mach_msg_type_number_t count) +{ + kern_return_t result = KERN_SUCCESS; + spl_t s; + thread_mtx_lock(thread); if (!thread->active) { thread_mtx_unlock(thread); return (KERN_TERMINATED); } - - if (thread->static_param) { - thread_mtx_unlock(thread); - - return (KERN_SUCCESS); - } - switch (flavor) { case THREAD_EXTENDED_POLICY: @@ -170,7 +179,6 @@ thread_policy_set( result = KERN_INVALID_ARGUMENT; break; } - info = (thread_precedence_policy_t)policy_info; s = splsched(); @@ -215,7 +223,6 @@ thread_policy_set( } thread_mtx_unlock(thread); - return (result); } diff --git a/osfmk/kern/timer.c b/osfmk/kern/timer.c index b53419857..7cce6afe3 100644 --- a/osfmk/kern/timer.c +++ b/osfmk/kern/timer.c @@ -77,9 +77,13 @@ timer_init( #if !STAT_TIME timer->tstamp = 0; #endif /* STAT_TIME */ +#if defined(__LP64__) + timer->all_bits = 0; +#else timer->low_bits = 0; timer->high_bits = 0; timer->high_bits_check = 0; +#endif /* defined(__LP64__) */ } /* @@ -103,13 +107,17 @@ timer_advance( timer_t timer, uint64_t delta) { +#if defined(__LP64__) + timer->all_bits += delta; +#else uint64_t low; low = delta + timer->low_bits; if (low >> 32) - timer_update(timer, timer->high_bits + (low >> 32), low); + timer_update(timer, (uint32_t)(timer->high_bits + (low >> 32)), (uint32_t)low); else - timer->low_bits = low; + timer->low_bits = (uint32_t)low; +#endif /* defined(__LP64__) */ } #if !STAT_TIME diff --git a/osfmk/kern/timer.h b/osfmk/kern/timer.h index 9c0fae288..abbfcb5e3 100644 --- a/osfmk/kern/timer.h +++ b/osfmk/kern/timer.h @@ -72,9 +72,13 @@ struct timer { #if !STAT_TIME uint64_t tstamp; #endif /* STAT_TIME */ +#if defined(__LP64__) + uint64_t all_bits; +#else uint32_t low_bits; uint32_t high_bits; uint32_t high_bits_check; +#endif }; typedef struct timer timer_data_t, *timer_t; @@ -151,6 +155,13 @@ extern void timer_advance( */ /* Read timer value */ +#if defined(__LP64__) +static inline uint64_t timer_grab( + timer_t timer) +{ + return timer->all_bits; +} +#else extern uint64_t timer_grab( timer_t timer); @@ -159,5 +170,6 @@ extern void timer_update( timer_t timer, uint32_t new_high, uint32_t new_low); +#endif /* defined(__LP64__) */ #endif /* _KERN_TIMER_H_ */ diff --git a/osfmk/kern/timer_call.c b/osfmk/kern/timer_call.c index e091f6707..74c4534a2 100644 --- a/osfmk/kern/timer_call.c +++ b/osfmk/kern/timer_call.c @@ -273,9 +273,9 @@ timer_queue_expire( KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_EXCP_DECI, 2) | DBG_FUNC_START, - (unsigned int)func, - (unsigned int)param0, - (unsigned int)param1, 0, 0); + func, + param0, + param1, 0, 0); #if CONFIG_DTRACE && (DEVELOPMENT || DEBUG ) DTRACE_TMR3(callout__start, timer_call_func_t, func, @@ -294,9 +294,9 @@ timer_queue_expire( KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_EXCP_DECI, 2) | DBG_FUNC_END, - (unsigned int)func, - (unsigned int)param0, - (unsigned int)param1, 0, 0); + func, + param0, + param1, 0, 0); simple_lock(&timer_call_lock); } diff --git a/osfmk/kern/wait_queue.c b/osfmk/kern/wait_queue.c index 187ac8b13..a7a19a024 100644 --- a/osfmk/kern/wait_queue.c +++ b/osfmk/kern/wait_queue.c @@ -67,25 +67,117 @@ #include #include -#include +#include #include #include #include +#include #include #include +#include /* forward declarations */ static boolean_t wait_queue_member_locked( wait_queue_t wq, wait_queue_set_t wq_set); -void wait_queue_unlink_one( - wait_queue_t wq, - wait_queue_set_t *wq_setp); +static void wait_queues_init(void) __attribute__((section("__TEXT, initcode"))); + + +#define WAIT_QUEUE_MAX thread_max +#define WAIT_QUEUE_SET_MAX task_max * 3 +#define WAIT_QUEUE_LINK_MAX PORT_MAX / 2 + (WAIT_QUEUE_MAX * WAIT_QUEUE_SET_MAX) / 64 + +static zone_t _wait_queue_link_zone; +static zone_t _wait_queue_set_zone; +static zone_t _wait_queue_zone; + +/* see rdar://6737748&5561610; we need an unshadowed + * definition of a WaitQueueLink for debugging, + * but it needs to be used somewhere to wind up in + * the dSYM file. */ +volatile WaitQueueLink *unused_except_for_debugging; -kern_return_t wait_queue_set_unlink_all_nofree( - wait_queue_set_t wq_set); + +/* + * Waiting protocols and implementation: + * + * Each thread may be waiting for exactly one event; this event + * is set using assert_wait(). That thread may be awakened either + * by performing a thread_wakeup_prim() on its event, + * or by directly waking that thread up with clear_wait(). + * + * The implementation of wait events uses a hash table. Each + * bucket is queue of threads having the same hash function + * value; the chain for the queue (linked list) is the run queue + * field. [It is not possible to be waiting and runnable at the + * same time.] + * + * Locks on both the thread and on the hash buckets govern the + * wait event field and the queue chain field. Because wakeup + * operations only have the event as an argument, the event hash + * bucket must be locked before any thread. + * + * Scheduling operations may also occur at interrupt level; therefore, + * interrupts below splsched() must be prevented when holding + * thread or hash bucket locks. + * + * The wait event hash table declarations are as follows: + */ + +struct wait_queue boot_wait_queue[1]; +__private_extern__ struct wait_queue *wait_queues = &boot_wait_queue[0]; + +__private_extern__ uint32_t num_wait_queues = 1; + +static uint32_t +compute_wait_hash_size(__unused unsigned cpu_count, __unused uint64_t memsize) { + uint32_t hsize = (uint32_t)round_page_64((thread_max / 11) * sizeof(struct wait_queue)); + uint32_t bhsize; + + if (PE_parse_boot_argn("wqsize", &bhsize, sizeof(bhsize))) + hsize = bhsize; + + return hsize; +} + +static void +wait_queues_init(void) +{ + uint32_t i, whsize; + kern_return_t kret; + + whsize = compute_wait_hash_size(processor_avail_count, machine_info.max_mem); + num_wait_queues = (whsize / ((uint32_t)sizeof(struct wait_queue))) - 1; + + kret = kernel_memory_allocate(kernel_map, (vm_offset_t *) &wait_queues, whsize, 0, KMA_KOBJECT|KMA_NOPAGEWAIT); + + if (kret != KERN_SUCCESS || wait_queues == NULL) + panic("kernel_memory_allocate() failed to allocate wait queues, error: %d, whsize: 0x%x", kret, whsize); + + for (i = 0; i < num_wait_queues; i++) { + wait_queue_init(&wait_queues[i], SYNC_POLICY_FIFO); + } +} + +void +wait_queue_bootstrap(void) +{ + wait_queues_init(); + _wait_queue_zone = zinit(sizeof(struct wait_queue), + WAIT_QUEUE_MAX * sizeof(struct wait_queue), + sizeof(struct wait_queue), + "wait queues"); + _wait_queue_set_zone = zinit(sizeof(struct wait_queue_set), + WAIT_QUEUE_SET_MAX * sizeof(struct wait_queue_set), + sizeof(struct wait_queue_set), + "wait queue sets"); + _wait_queue_link_zone = zinit(sizeof(struct _wait_queue_link), + WAIT_QUEUE_LINK_MAX * sizeof(struct _wait_queue_link), + sizeof(struct _wait_queue_link), + "wait queue links"); +} /* * Routine: wait_queue_init @@ -100,10 +192,11 @@ wait_queue_init( wait_queue_t wq, int policy) { - if (!((policy & SYNC_POLICY_ORDER_MASK) == SYNC_POLICY_FIFO)) + /* only FIFO and LIFO for now */ + if ((policy & SYNC_POLICY_FIXED_PRIORITY) != 0) return KERN_INVALID_ARGUMENT; - wq->wq_fifo = TRUE; + wq->wq_fifo = ((policy & SYNC_POLICY_REVERSED) == 0); wq->wq_type = _WAIT_QUEUE_inited; queue_init(&wq->wq_queue); hw_lock_init(&wq->wq_interlock); @@ -128,11 +221,11 @@ wait_queue_alloc( wait_queue_t wq; kern_return_t ret; - wq = (wait_queue_t) kalloc(sizeof(struct wait_queue)); + wq = (wait_queue_t) zalloc(_wait_queue_zone); if (wq != WAIT_QUEUE_NULL) { ret = wait_queue_init(wq, policy); if (ret != KERN_SUCCESS) { - kfree(wq, sizeof(struct wait_queue)); + zfree(_wait_queue_zone, wq); wq = WAIT_QUEUE_NULL; } } @@ -154,7 +247,7 @@ wait_queue_free( return KERN_INVALID_ARGUMENT; if (!queue_empty(&wq->wq_queue)) return KERN_FAILURE; - kfree(wq, sizeof(struct wait_queue)); + zfree(_wait_queue_zone, wq); return KERN_SUCCESS; } @@ -179,11 +272,11 @@ wait_queue_set_init( wqset->wqs_wait_queue.wq_type = _WAIT_QUEUE_SET_inited; if (policy & SYNC_POLICY_PREPOST) - wqset->wqs_wait_queue.wq_isprepost = TRUE; + wqset->wqs_wait_queue.wq_prepost = TRUE; else - wqset->wqs_wait_queue.wq_isprepost = FALSE; + wqset->wqs_wait_queue.wq_prepost = FALSE; queue_init(&wqset->wqs_setlinks); - wqset->wqs_refcount = 0; + queue_init(&wqset->wqs_preposts); return KERN_SUCCESS; } @@ -200,12 +293,22 @@ kern_return_t wait_queue_sub_clearrefs( wait_queue_set_t wq_set) { + wait_queue_link_t wql; + queue_t q; + spl_t s; + if (!wait_queue_is_set(wq_set)) return KERN_INVALID_ARGUMENT; + s = splsched(); wqs_lock(wq_set); - wq_set->wqs_refcount = 0; + q = &wq_set->wqs_preposts; + while (!queue_empty(q)) { + queue_remove_first(q, wql, wait_queue_link_t, wql_preposts); + assert(!wql_is_preposted(wql)); + } wqs_unlock(wq_set); + splx(s); return KERN_SUCCESS; } @@ -226,13 +329,13 @@ wait_queue_set_alloc( { wait_queue_set_t wq_set; - wq_set = (wait_queue_set_t) kalloc(sizeof(struct wait_queue_set)); + wq_set = (wait_queue_set_t) zalloc(_wait_queue_set_zone); if (wq_set != WAIT_QUEUE_SET_NULL) { kern_return_t ret; ret = wait_queue_set_init(wq_set, policy); if (ret != KERN_SUCCESS) { - kfree(wq_set, sizeof(struct wait_queue_set)); + zfree(_wait_queue_set_zone, wq_set); wq_set = WAIT_QUEUE_SET_NULL; } } @@ -256,7 +359,7 @@ wait_queue_set_free( if (!queue_empty(&wq_set->wqs_wait_queue.wq_queue)) return KERN_FAILURE; - kfree(wq_set, sizeof(struct wait_queue_set)); + zfree(_wait_queue_set_zone, wq_set); return KERN_SUCCESS; } @@ -273,9 +376,11 @@ unsigned int wait_queue_link_size(void) { return sizeof(WaitQueueLink); } /* declare a unique type for wait queue link structures */ static unsigned int _wait_queue_link; +static unsigned int _wait_queue_link_noalloc; static unsigned int _wait_queue_unlinked; #define WAIT_QUEUE_LINK ((void *)&_wait_queue_link) +#define WAIT_QUEUE_LINK_NOALLOC ((void *)&_wait_queue_link_noalloc) #define WAIT_QUEUE_UNLINKED ((void *)&_wait_queue_unlinked) #define WAIT_QUEUE_ELEMENT_CHECK(wq, wqe) \ @@ -293,12 +398,14 @@ static unsigned int _wait_queue_unlinked; (queue_t)(wql) : &(wql)->wql_setlinks))) #define WAIT_QUEUE_SET_LINK_CHECK(wqs, wql) \ - WQASSERT((((wql)->wql_type == WAIT_QUEUE_LINK) && \ + WQASSERT(((((wql)->wql_type == WAIT_QUEUE_LINK) || \ + ((wql)->wql_type == WAIT_QUEUE_LINK_NOALLOC)) && \ ((wql)->wql_setqueue == (wqs)) && \ - ((wql)->wql_queue->wq_type == _WAIT_QUEUE_inited) && \ + (((wql)->wql_queue->wq_type == _WAIT_QUEUE_inited) || \ + ((wql)->wql_queue->wq_type == _WAIT_QUEUE_SET_inited)) && \ (WQSNEXT((wqs), WQSPREV((wqs),(wql))) == (wql))), \ "wait queue set links corruption: wqs=%#x, wql=%#x", \ - (wqs), (wql)) + (wqs), (wql)) #if defined(_WAIT_QUEUE_DEBUG_) @@ -357,7 +464,8 @@ wait_queue_member_locked( wq_element = (wait_queue_element_t) queue_first(q); while (!queue_end(q, (queue_entry_t)wq_element)) { WAIT_QUEUE_ELEMENT_CHECK(wq, wq_element); - if ((wq_element->wqe_type == WAIT_QUEUE_LINK)) { + if ((wq_element->wqe_type == WAIT_QUEUE_LINK) || + (wq_element->wqe_type == WAIT_QUEUE_LINK_NOALLOC)) { wait_queue_link_t wql = (wait_queue_link_t)wq_element; if (wql->wql_setqueue == wq_set) @@ -399,16 +507,18 @@ wait_queue_member( /* - * Routine: wait_queue_link_noalloc + * Routine: wait_queue_link_internal * Purpose: * Insert a set wait queue into a wait queue. This * requires us to link the two together using a wait_queue_link - * structure that we allocate. + * structure that was provided. * Conditions: * The wait queue being inserted must be inited as a set queue + * The wait_queue_link structure must already be properly typed */ +static kern_return_t -wait_queue_link_noalloc( +wait_queue_link_internal( wait_queue_t wq, wait_queue_set_t wq_set, wait_queue_link_t wql) @@ -417,13 +527,13 @@ wait_queue_link_noalloc( queue_t q; spl_t s; - if (!wait_queue_is_queue(wq) || !wait_queue_is_set(wq_set)) + if (!wait_queue_is_valid(wq) || !wait_queue_is_set(wq_set)) return KERN_INVALID_ARGUMENT; /* - * There are probably less threads and sets associated with - * the wait queue, then there are wait queues associated with - * the set. So lets validate it that way. + * There are probably fewer threads and sets associated with + * the wait queue than there are wait queues associated with + * the set. So let's validate it that way. */ s = splsched(); wait_queue_lock(wq); @@ -431,7 +541,8 @@ wait_queue_link_noalloc( wq_element = (wait_queue_element_t) queue_first(q); while (!queue_end(q, (queue_entry_t)wq_element)) { WAIT_QUEUE_ELEMENT_CHECK(wq, wq_element); - if (wq_element->wqe_type == WAIT_QUEUE_LINK && + if ((wq_element->wqe_type == WAIT_QUEUE_LINK || + wq_element->wqe_type == WAIT_QUEUE_LINK_NOALLOC) && ((wait_queue_link_t)wq_element)->wql_setqueue == wq_set) { wait_queue_unlock(wq); splx(s); @@ -448,11 +559,14 @@ wait_queue_link_noalloc( WAIT_QUEUE_SET_CHECK(wq_set); + assert(wql->wql_type == WAIT_QUEUE_LINK || + wql->wql_type == WAIT_QUEUE_LINK_NOALLOC); + wql->wql_queue = wq; + wql_clear_prepost(wql); queue_enter(&wq->wq_queue, wql, wait_queue_link_t, wql_links); wql->wql_setqueue = wq_set; queue_enter(&wq_set->wqs_setlinks, wql, wait_queue_link_t, wql_setlinks); - wql->wql_type = WAIT_QUEUE_LINK; wqs_unlock(wq_set); wait_queue_unlock(wq); @@ -461,6 +575,25 @@ wait_queue_link_noalloc( return KERN_SUCCESS; } +/* + * Routine: wait_queue_link_noalloc + * Purpose: + * Insert a set wait queue into a wait queue. This + * requires us to link the two together using a wait_queue_link + * structure that we allocate. + * Conditions: + * The wait queue being inserted must be inited as a set queue + */ +kern_return_t +wait_queue_link_noalloc( + wait_queue_t wq, + wait_queue_set_t wq_set, + wait_queue_link_t wql) +{ + wql->wql_type = WAIT_QUEUE_LINK_NOALLOC; + return wait_queue_link_internal(wq, wq_set, wql); +} + /* * Routine: wait_queue_link * Purpose: @@ -478,20 +611,21 @@ wait_queue_link( wait_queue_link_t wql; kern_return_t ret; - wql = (wait_queue_link_t) kalloc(sizeof(struct _wait_queue_link)); + wql = (wait_queue_link_t) zalloc(_wait_queue_link_zone); if (wql == WAIT_QUEUE_LINK_NULL) return KERN_RESOURCE_SHORTAGE; - ret = wait_queue_link_noalloc(wq, wq_set, wql); + wql->wql_type = WAIT_QUEUE_LINK; + ret = wait_queue_link_internal(wq, wq_set, wql); if (ret != KERN_SUCCESS) - kfree(wql, sizeof(struct _wait_queue_link)); + zfree(_wait_queue_link_zone, wql); return ret; } /* - * Routine: wait_queue_unlink_nofree + * Routine: wait_queue_unlink_locked * Purpose: * Undo the linkage between a wait queue and a set. */ @@ -508,6 +642,10 @@ wait_queue_unlink_locked( queue_remove(&wq->wq_queue, wql, wait_queue_link_t, wql_links); wql->wql_setqueue = WAIT_QUEUE_SET_NULL; queue_remove(&wq_set->wqs_setlinks, wql, wait_queue_link_t, wql_setlinks); + if (wql_is_preposted(wql)) { + queue_t ppq = &wq_set->wqs_preposts; + queue_remove(ppq, wql, wait_queue_link_t, wql_preposts); + } wql->wql_type = WAIT_QUEUE_UNLINKED; WAIT_QUEUE_CHECK(wq); @@ -532,7 +670,7 @@ wait_queue_unlink( queue_t q; spl_t s; - if (!wait_queue_is_queue(wq) || !wait_queue_is_set(wq_set)) { + if (!wait_queue_is_valid(wq) || !wait_queue_is_set(wq_set)) { return KERN_INVALID_ARGUMENT; } s = splsched(); @@ -542,16 +680,22 @@ wait_queue_unlink( wq_element = (wait_queue_element_t) queue_first(q); while (!queue_end(q, (queue_entry_t)wq_element)) { WAIT_QUEUE_ELEMENT_CHECK(wq, wq_element); - if (wq_element->wqe_type == WAIT_QUEUE_LINK) { + if (wq_element->wqe_type == WAIT_QUEUE_LINK || + wq_element->wqe_type == WAIT_QUEUE_LINK_NOALLOC) { + wql = (wait_queue_link_t)wq_element; if (wql->wql_setqueue == wq_set) { + boolean_t alloced; + + alloced = (wql->wql_type == WAIT_QUEUE_LINK); wqs_lock(wq_set); wait_queue_unlink_locked(wq, wq_set, wql); wqs_unlock(wq_set); wait_queue_unlock(wq); splx(s); - kfree(wql, sizeof(struct _wait_queue_link)); + if (alloced) + zfree(_wait_queue_link_zone, wql); return KERN_SUCCESS; } } @@ -563,65 +707,12 @@ wait_queue_unlink( return KERN_NOT_IN_SET; } - -/* - * Routine: wait_queue_unlinkall_nofree - * Purpose: - * Remove the linkage between a wait queue and all its - * sets. The caller is responsible for freeing - * the wait queue link structures. - */ - -kern_return_t -wait_queue_unlinkall_nofree( - wait_queue_t wq) -{ - wait_queue_element_t wq_element; - wait_queue_element_t wq_next_element; - wait_queue_set_t wq_set; - wait_queue_link_t wql; - queue_head_t links_queue_head; - queue_t links = &links_queue_head; - queue_t q; - spl_t s; - - if (!wait_queue_is_queue(wq)) { - return KERN_INVALID_ARGUMENT; - } - - queue_init(links); - - s = splsched(); - wait_queue_lock(wq); - - q = &wq->wq_queue; - - wq_element = (wait_queue_element_t) queue_first(q); - while (!queue_end(q, (queue_entry_t)wq_element)) { - WAIT_QUEUE_ELEMENT_CHECK(wq, wq_element); - wq_next_element = (wait_queue_element_t) - queue_next((queue_t) wq_element); - - if (wq_element->wqe_type == WAIT_QUEUE_LINK) { - wql = (wait_queue_link_t)wq_element; - wq_set = wql->wql_setqueue; - wqs_lock(wq_set); - wait_queue_unlink_locked(wq, wq_set, wql); - wqs_unlock(wq_set); - } - wq_element = wq_next_element; - } - wait_queue_unlock(wq); - splx(s); - return(KERN_SUCCESS); -} - - /* * Routine: wait_queue_unlink_all * Purpose: - * Remove the linkage between a wait queue and all its sets. - * All the linkage structures are freed. + * Remove the linkage between a wait queue and all its sets. + * All the linkage structures that were allocated internally + * are freed. The others are the caller's responsibility. * Conditions: * Nothing of interest locked. */ @@ -639,7 +730,7 @@ wait_queue_unlink_all( queue_t q; spl_t s; - if (!wait_queue_is_queue(wq)) { + if (!wait_queue_is_valid(wq)) { return KERN_INVALID_ARGUMENT; } @@ -652,17 +743,21 @@ wait_queue_unlink_all( wq_element = (wait_queue_element_t) queue_first(q); while (!queue_end(q, (queue_entry_t)wq_element)) { + boolean_t alloced; + WAIT_QUEUE_ELEMENT_CHECK(wq, wq_element); wq_next_element = (wait_queue_element_t) queue_next((queue_t) wq_element); - if (wq_element->wqe_type == WAIT_QUEUE_LINK) { + alloced = (wq_element->wqe_type == WAIT_QUEUE_LINK); + if (alloced || wq_element->wqe_type == WAIT_QUEUE_LINK_NOALLOC) { wql = (wait_queue_link_t)wq_element; wq_set = wql->wql_setqueue; wqs_lock(wq_set); wait_queue_unlink_locked(wq, wq_set, wql); wqs_unlock(wq_set); - enqueue(links, &wql->wql_links); + if (alloced) + enqueue(links, &wql->wql_links); } wq_element = wq_next_element; } @@ -671,68 +766,18 @@ wait_queue_unlink_all( while(!queue_empty(links)) { wql = (wait_queue_link_t) dequeue(links); - kfree(wql, sizeof(struct _wait_queue_link)); + zfree(_wait_queue_link_zone, wql); } return(KERN_SUCCESS); } -/* - * Routine: wait_queue_set_unlink_all_nofree - * Purpose: - * Remove the linkage between a set wait queue and all its - * member wait queues. The link structures are not freed, nor - * returned. It is the caller's responsibility to track and free - * them. - * Conditions: - * The wait queue being must be a member set queue - */ -kern_return_t -wait_queue_set_unlink_all_nofree( - wait_queue_set_t wq_set) -{ - wait_queue_link_t wql; - wait_queue_t wq; - queue_t q; - spl_t s; - - if (!wait_queue_is_set(wq_set)) { - return KERN_INVALID_ARGUMENT; - } - -retry: - s = splsched(); - wqs_lock(wq_set); - - q = &wq_set->wqs_setlinks; - - wql = (wait_queue_link_t)queue_first(q); - while (!queue_end(q, (queue_entry_t)wql)) { - WAIT_QUEUE_SET_LINK_CHECK(wq_set, wql); - wq = wql->wql_queue; - if (wait_queue_lock_try(wq)) { - wait_queue_unlink_locked(wq, wq_set, wql); - wait_queue_unlock(wq); - wql = (wait_queue_link_t)queue_first(q); - } else { - wqs_unlock(wq_set); - splx(s); - delay(1); - goto retry; - } - } - wqs_unlock(wq_set); - splx(s); - - return(KERN_SUCCESS); -} - /* legacy interface naming */ kern_return_t wait_subqueue_unlink_all( wait_queue_set_t wq_set) { - return wait_queue_set_unlink_all_nofree(wq_set); + return wait_queue_set_unlink_all(wq_set); } @@ -740,7 +785,8 @@ wait_subqueue_unlink_all( * Routine: wait_queue_set_unlink_all * Purpose: * Remove the linkage between a set wait queue and all its - * member wait queues. The link structures are freed. + * member wait queues. The link structures are freed for those + * links which were dynamically allocated. * Conditions: * The wait queue must be a set */ @@ -772,9 +818,13 @@ wait_queue_set_unlink_all( WAIT_QUEUE_SET_LINK_CHECK(wq_set, wql); wq = wql->wql_queue; if (wait_queue_lock_try(wq)) { + boolean_t alloced; + + alloced = (wql->wql_type == WAIT_QUEUE_LINK); wait_queue_unlink_locked(wq, wq_set, wql); wait_queue_unlock(wq); - enqueue(links, &wql->wql_links); + if (alloced) + enqueue(links, &wql->wql_links); wql = (wait_queue_link_t)queue_first(q); } else { wqs_unlock(wq_set); @@ -788,59 +838,11 @@ wait_queue_set_unlink_all( while (!queue_empty (links)) { wql = (wait_queue_link_t) dequeue(links); - kfree(wql, sizeof(struct _wait_queue_link)); + zfree(_wait_queue_link_zone, wql); } return(KERN_SUCCESS); } - -/* - * Routine: wait_queue_unlink_one - * Purpose: - * Find and unlink one set wait queue - * Conditions: - * Nothing of interest locked. - */ -void -wait_queue_unlink_one( - wait_queue_t wq, - wait_queue_set_t *wq_setp) -{ - wait_queue_element_t wq_element; - queue_t q; - spl_t s; - - s = splsched(); - wait_queue_lock(wq); - - q = &wq->wq_queue; - - wq_element = (wait_queue_element_t) queue_first(q); - while (!queue_end(q, (queue_entry_t)wq_element)) { - - if (wq_element->wqe_type == WAIT_QUEUE_LINK) { - wait_queue_link_t wql = (wait_queue_link_t)wq_element; - wait_queue_set_t wq_set = wql->wql_setqueue; - - wqs_lock(wq_set); - wait_queue_unlink_locked(wq, wq_set, wql); - wqs_unlock(wq_set); - wait_queue_unlock(wq); - splx(s); - kfree(wql,sizeof(struct _wait_queue_link)); - *wq_setp = wq_set; - return; - } - - wq_element = (wait_queue_element_t) - queue_next((queue_t) wq_element); - } - wait_queue_unlock(wq); - splx(s); - *wq_setp = WAIT_QUEUE_SET_NULL; -} - - /* * Routine: wait_queue_assert_wait64_locked * Purpose: @@ -868,7 +870,7 @@ wait_queue_assert_wait64_locked( if (wq->wq_type == _WAIT_QUEUE_SET_inited) { wait_queue_set_t wqs = (wait_queue_set_t)wq; - if (wqs->wqs_isprepost && wqs->wqs_refcount > 0) + if (event == NO_EVENT64 && wqs_is_preposted(wqs)) return(THREAD_AWAKENED); } @@ -880,7 +882,7 @@ wait_queue_assert_wait64_locked( */ wait_result = thread_mark_wait_locked(thread, interruptible); if (wait_result == THREAD_WAITING) { - if (thread->options & TH_OPT_VMPRIV) + if (!wq->wq_fifo || thread->options & TH_OPT_VMPRIV) enqueue_head(&wq->wq_queue, (queue_entry_t) thread); else enqueue_tail(&wq->wq_queue, (queue_entry_t) thread); @@ -924,7 +926,7 @@ wait_queue_assert_wait( s = splsched(); wait_queue_lock(wq); thread_lock(thread); - ret = wait_queue_assert_wait64_locked(wq, (event64_t)((uint32_t)event), + ret = wait_queue_assert_wait64_locked(wq, CAST_DOWN(event64_t,event), interruptible, deadline, thread); thread_unlock(thread); wait_queue_unlock(wq); @@ -999,27 +1001,24 @@ _wait_queue_select64_all( /* * We may have to recurse if this is a compound wait queue. */ - if (wq_element->wqe_type == WAIT_QUEUE_LINK) { + if (wq_element->wqe_type == WAIT_QUEUE_LINK || + wq_element->wqe_type == WAIT_QUEUE_LINK_NOALLOC) { wait_queue_link_t wql = (wait_queue_link_t)wq_element; - wait_queue_t set_queue; + wait_queue_set_t set_queue = wql->wql_setqueue; /* - * We have to check the set wait queue. + * We have to check the set wait queue. If it is marked + * as pre-post, and it is the "generic event" then mark + * it pre-posted now (if not already). */ - set_queue = (wait_queue_t)wql->wql_setqueue; - wait_queue_lock(set_queue); - if (set_queue->wq_isprepost) { - wait_queue_set_t wqs = (wait_queue_set_t)set_queue; - - /* - * Preposting is only for sets and wait queue - * is the first element of set - */ - wqs->wqs_refcount++; + wqs_lock(set_queue); + if (event == NO_EVENT64 && set_queue->wqs_prepost && !wql_is_preposted(wql)) { + queue_t ppq = &set_queue->wqs_preposts; + queue_enter(ppq, wql, wait_queue_link_t, wql_preposts); } - if (! wait_queue_empty(set_queue)) - _wait_queue_select64_all(set_queue, event, wake_queue); - wait_queue_unlock(set_queue); + if (! wait_queue_empty(&set_queue->wqs_wait_queue)) + _wait_queue_select64_all(&set_queue->wqs_wait_queue, event, wake_queue); + wqs_unlock(set_queue); } else { /* @@ -1125,7 +1124,7 @@ wait_queue_wakeup_all( // panic("wait_queue_wakeup_all: we did not get the lock on %p\n", wq); /* (BRINGUP) */ // } ret = wait_queue_wakeup64_all_locked( - wq, (event64_t)((uint32_t)event), + wq, CAST_DOWN(event64_t,event), result, TRUE); /* lock released */ splx(s); @@ -1177,7 +1176,7 @@ wait_queue_wakeup64_all( * a locked thread - if one found * Note: * This is where the sync policy of the wait queue comes - * into effect. For now, we just assume FIFO. + * into effect. For now, we just assume FIFO/LIFO. */ static thread_t _wait_queue_select64_one( @@ -1189,8 +1188,6 @@ _wait_queue_select64_one( thread_t t = THREAD_NULL; queue_t q; - assert(wq->wq_fifo); - q = &wq->wq_queue; wq_element = (wait_queue_element_t) queue_first(q); @@ -1202,21 +1199,34 @@ _wait_queue_select64_one( /* * We may have to recurse if this is a compound wait queue. */ - if (wq_element->wqe_type == WAIT_QUEUE_LINK) { + if (wq_element->wqe_type == WAIT_QUEUE_LINK || + wq_element->wqe_type == WAIT_QUEUE_LINK_NOALLOC) { wait_queue_link_t wql = (wait_queue_link_t)wq_element; - wait_queue_t set_queue; + wait_queue_set_t set_queue = wql->wql_setqueue; /* - * We have to check the set wait queue. + * We have to check the set wait queue. If the set + * supports pre-posting, it isn't already preposted, + * and we didn't find a thread in the set, then mark it. + * + * If we later find a thread, there may be a spurious + * pre-post here on this set. The wait side has to check + * for that either pre- or post-wait. */ - set_queue = (wait_queue_t)wql->wql_setqueue; - wait_queue_lock(set_queue); - if (! wait_queue_empty(set_queue)) { - t = _wait_queue_select64_one(set_queue, event); + wqs_lock(set_queue); + if (! wait_queue_empty(&set_queue->wqs_wait_queue)) { + t = _wait_queue_select64_one(&set_queue->wqs_wait_queue, event); } - wait_queue_unlock(set_queue); - if (t != THREAD_NULL) + if (t != THREAD_NULL) { + wqs_unlock(set_queue); return t; + } + if (event == NO_EVENT64 && set_queue->wqs_prepost && !wql_is_preposted(wql)) { + queue_t ppq = &set_queue->wqs_preposts; + queue_enter(ppq, wql, wait_queue_link_t, wql_preposts); + } + wqs_unlock(set_queue); + } else { /* @@ -1319,18 +1329,18 @@ _wait_queue_select64_thread( wqe_next = (wait_queue_element_t) queue_next((queue_t) wq_element); - if (wq_element->wqe_type == WAIT_QUEUE_LINK) { + if (wq_element->wqe_type == WAIT_QUEUE_LINK || + wq_element->wqe_type == WAIT_QUEUE_LINK_NOALLOC) { wait_queue_link_t wql = (wait_queue_link_t)wq_element; - wait_queue_t set_queue; + wait_queue_set_t set_queue = wql->wql_setqueue; - set_queue = (wait_queue_t)wql->wql_setqueue; - wait_queue_lock(set_queue); - if (! wait_queue_empty(set_queue)) { - res = _wait_queue_select64_thread(set_queue, + wqs_lock(set_queue); + if (! wait_queue_empty(&set_queue->wqs_wait_queue)) { + res = _wait_queue_select64_thread(&set_queue->wqs_wait_queue, event, thread); } - wait_queue_unlock(set_queue); + wqs_unlock(set_queue); if (res == KERN_SUCCESS) return KERN_SUCCESS; } @@ -1444,7 +1454,7 @@ wait_queue_wakeup_one( s = splsched(); wait_queue_lock(wq); - thread = _wait_queue_select64_one(wq, (event64_t)((uint32_t)event)); + thread = _wait_queue_select64_one(wq, CAST_DOWN(event64_t,event)); wait_queue_unlock(wq); if (thread) { @@ -1587,7 +1597,7 @@ wait_queue_wakeup_thread( s = splsched(); wait_queue_lock(wq); - res = _wait_queue_select64_thread(wq, (event64_t)((uint32_t)event), thread); + res = _wait_queue_select64_thread(wq, CAST_DOWN(event64_t,event), thread); wait_queue_unlock(wq); if (res == KERN_SUCCESS) { diff --git a/osfmk/kern/wait_queue.h b/osfmk/kern/wait_queue.h index 5a1be3f35..030e82d28 100644 --- a/osfmk/kern/wait_queue.h +++ b/osfmk/kern/wait_queue.h @@ -64,7 +64,7 @@ typedef struct wait_queue { unsigned int /* flags */ /* boolean_t */ wq_type:16, /* only public field */ wq_fifo:1, /* fifo wakeup policy? */ - wq_isprepost:1, /* is waitq preposted? set only */ + wq_prepost:1, /* waitq supports prepost? set only */ :0; /* force to long boundary */ hw_lock_data_t wq_interlock; /* interlock */ queue_head_t wq_queue; /* queue of elements */ @@ -80,12 +80,12 @@ typedef struct wait_queue { typedef struct wait_queue_set { WaitQueue wqs_wait_queue; /* our wait queue */ queue_head_t wqs_setlinks; /* links from set perspective */ - unsigned int wqs_refcount; /* refcount for preposting */ + queue_head_t wqs_preposts; /* preposted links */ } WaitQueueSet; #define wqs_type wqs_wait_queue.wq_type #define wqs_fifo wqs_wait_queue.wq_fifo -#define wqs_isprepost wqs_wait_queue.wq_isprepost +#define wqs_prepost wqs_wait_queue.wq_prepost #define wqs_queue wqs_wait_queue.wq_queue /* @@ -126,6 +126,7 @@ typedef WaitQueueElement *wait_queue_element_t; typedef struct _wait_queue_link { WaitQueueElement wql_element; /* element on master */ queue_chain_t wql_setlinks; /* element on set */ + queue_chain_t wql_preposts; /* element on set prepost list */ wait_queue_set_t wql_setqueue; /* set queue */ } WaitQueueLink; @@ -171,24 +172,23 @@ static inline void wait_queue_lock(wait_queue_t wq) { static inline void wait_queue_unlock(wait_queue_t wq) { assert(wait_queue_held(wq)); -#if defined(__i386__) - /* DRK: On certain x86 systems, this spinlock is susceptible to - * lock starvation. Hence use an unlock variant which performs - * a cacheline flush to minimize cache affinity on acquisition. - */ - i386_lock_unlock_with_flush(&(wq)->wq_interlock); -#else hw_lock_unlock(&(wq)->wq_interlock); -#endif } #define wqs_lock(wqs) wait_queue_lock(&(wqs)->wqs_wait_queue) #define wqs_unlock(wqs) wait_queue_unlock(&(wqs)->wqs_wait_queue) #define wqs_lock_try(wqs) wait_queue__try_lock(&(wqs)->wqs_wait_queue) +#define wqs_is_preposted(wqs) ((wqs)->wqs_prepost && !queue_empty(&(wqs)->wqs_preposts)) + +#define wql_is_preposted(wql) ((wql)->wql_preposts.next != NULL) +#define wql_clear_prepost(wql) ((wql)->wql_preposts.next = (wql)->wql_preposts.prev = NULL) #define wait_queue_assert_possible(thread) \ ((thread)->wait_queue == WAIT_QUEUE_NULL) +/* bootstrap interface - can allocate/link wait_queues and sets after calling this */ +__private_extern__ void wait_queue_bootstrap(void); + /******** Decomposed interfaces (to build higher level constructs) ***********/ /* assert intent to wait on a locked wait queue */ @@ -234,6 +234,34 @@ __private_extern__ kern_return_t wait_queue_wakeup64_thread_locked( wait_result_t result, boolean_t unlock); +__private_extern__ uint32_t num_wait_queues; +__private_extern__ struct wait_queue *wait_queues; +/* The Jenkins "one at a time" hash. + * TBD: There may be some value to unrolling here, + * depending on the architecture. + */ +static inline uint32_t wq_hash(char *key) +{ + uint32_t hash = 0; + size_t i, length = sizeof(char *); + + for (i = 0; i < length; i++) { + hash += key[i]; + hash += (hash << 10); + hash ^= (hash >> 6); + } + + hash += (hash << 3); + hash ^= (hash >> 11); + hash += (hash << 15); + + return hash; +} + +/* TBD: It should be possible to eliminate the divide here */ +#define wait_hash(event) \ + (wq_hash((char *)&event) % (num_wait_queues)) + #endif /* MACH_KERNEL_PRIVATE */ __BEGIN_DECLS @@ -283,9 +311,6 @@ extern kern_return_t wait_queue_unlink( extern kern_return_t wait_queue_unlink_all( wait_queue_t wait_queue); -extern kern_return_t wait_queue_unlinkall_nofree( - wait_queue_t wait_queue); - extern kern_return_t wait_queue_set_unlink_all( wait_queue_set_t set_queue); diff --git a/osfmk/kern/xpr.c b/osfmk/kern/xpr.c index 3a0a6a08d..1b4d16707 100644 --- a/osfmk/kern/xpr.c +++ b/osfmk/kern/xpr.c @@ -139,7 +139,7 @@ xprbootstrap(void) /* leave room at the end for a saved copy of xprptr */ size = nxprbufs * sizeof(struct xprbuf) + sizeof xprptr; - kr = kmem_alloc_wired(kernel_map, &addr, size); + kr = kmem_alloc_kobject(kernel_map, &addr, size); if (kr != KERN_SUCCESS) panic("xprbootstrap"); diff --git a/osfmk/kern/zalloc.c b/osfmk/kern/zalloc.c index f8ac4c12f..d1d08bf96 100644 --- a/osfmk/kern/zalloc.c +++ b/osfmk/kern/zalloc.c @@ -79,7 +79,7 @@ #include #include #include -#include +#include #include #include #include @@ -217,6 +217,7 @@ void zalloc_async( thread_call_param_t p0, thread_call_param_t p1); +void zone_display_zprint( void ); #if ZONE_DEBUG && MACH_KDB int zone_count( @@ -238,7 +239,7 @@ vm_size_t zdata_size; #define lock_zone(zone) \ MACRO_BEGIN \ - lck_mtx_lock(&(zone)->lock); \ + lck_mtx_lock_spin(&(zone)->lock); \ MACRO_END #define unlock_zone(zone) \ @@ -248,7 +249,7 @@ MACRO_END #define zone_wakeup(zone) thread_wakeup((event_t)(zone)) #define zone_sleep(zone) \ - (void) lck_mtx_sleep(&(zone)->lock, 0, (event_t)(zone), THREAD_UNINT); + (void) lck_mtx_sleep(&(zone)->lock, LCK_SLEEP_SPIN, (event_t)(zone), THREAD_UNINT); #define lock_zone_init(zone) \ @@ -262,7 +263,7 @@ MACRO_BEGIN \ &(zone)->lock_grp, &(zone)->lock_attr); \ MACRO_END -#define lock_try_zone(zone) lck_mtx_try_lock(&zone->lock) +#define lock_try_zone(zone) lck_mtx_try_lock_spin(&zone->lock) kern_return_t zget_space( vm_offset_t size, @@ -284,7 +285,13 @@ unsigned int zone_pages; /* * Exclude more than one concurrent garbage collection */ -decl_mutex_data(, zone_gc_lock) +decl_lck_mtx_data(, zone_gc_lock) + +lck_attr_t zone_lck_attr; +lck_grp_t zone_lck_grp; +lck_grp_attr_t zone_lck_grp_attr; +lck_mtx_ext_t zone_lck_ext; + #if !ZONE_ALIAS_ADDR #define from_zone_map(addr, size) \ @@ -724,7 +731,7 @@ zone_steal_memory(void) /* * Fill a zone with enough memory to contain at least nelem elements. - * Memory is obtained with kmem_alloc_wired from the kernel_map. + * Memory is obtained with kmem_alloc_kobject from the kernel_map. * Return the number of elements actually put into the zone, which may * be more than the caller asked for since the memory allocation is * rounded up to a full page. @@ -744,13 +751,13 @@ zfill( return 0; size = nelem * zone->elem_size; size = round_page(size); - kr = kmem_alloc_wired(kernel_map, &memory, size); + kr = kmem_alloc_kobject(kernel_map, &memory, size); if (kr != KERN_SUCCESS) return 0; zone_change(zone, Z_FOREIGN, TRUE); zcram(zone, (void *)memory, size); - nalloc = size / zone->elem_size; + nalloc = (int)(size / zone->elem_size); assert(nalloc >= nelem); return nalloc; @@ -835,7 +842,8 @@ zone_init( vm_size_t zone_table_size; retval = kmem_suballoc(kernel_map, &zone_min, max_zonemap_size, - FALSE, VM_FLAGS_ANYWHERE, &zone_map); + FALSE, VM_FLAGS_ANYWHERE | VM_FLAGS_PERMANENT, + &zone_map); if (retval != KERN_SUCCESS) panic("zone_init: kmem_suballoc failed"); @@ -843,19 +851,25 @@ zone_init( /* * Setup garbage collection information: */ - zone_table_size = atop_32(zone_max - zone_min) * + zone_table_size = atop_kernel(zone_max - zone_min) * sizeof(struct zone_page_table_entry); - if (kmem_alloc_wired(zone_map, (vm_offset_t *) &zone_page_table, + if (kmem_alloc_kobject(zone_map, (vm_offset_t *) &zone_page_table, zone_table_size) != KERN_SUCCESS) panic("zone_init"); zone_min = (vm_offset_t)zone_page_table + round_page(zone_table_size); - zone_pages = atop_32(zone_max - zone_min); + zone_pages = (unsigned int)atop_kernel(zone_max - zone_min); zone_map_min_address = zone_min; zone_map_max_address = zone_max; - mutex_init(&zone_gc_lock, 0); + + lck_grp_attr_setdefault(&zone_lck_grp_attr); + lck_grp_init(&zone_lck_grp, "zones", &zone_lck_grp_attr); + lck_attr_setdefault(&zone_lck_attr); + lck_mtx_init_ext(&zone_gc_lock, &zone_lck_ext, &zone_lck_grp, &zone_lck_attr); + zone_page_init(zone_min, zone_max - zone_min, ZONE_PAGE_UNUSED); } +extern volatile SInt32 kfree_nop_count; /* * zalloc returns an element from the specified zone. @@ -930,7 +944,7 @@ zalloc_canblock( if (zone->collectable) { vm_offset_t space; - vm_size_t alloc_size; + vm_size_t alloc_size; int retry = 0; for (;;) { @@ -960,10 +974,11 @@ zalloc_canblock( if (retry == 2) { zone_gc(); printf("zalloc did gc\n"); + zone_display_zprint(); } if (retry == 3) { - panic_include_zprint = TRUE; - panic("zalloc: \"%s\" (%d elements) retry fail %d", zone->zone_name, zone->count, retval); + panic_include_zprint = TRUE; + panic("zalloc: \"%s\" (%d elements) retry fail %d, kfree_nop_count: %d", zone->zone_name, zone->count, retval, (int)kfree_nop_count); } } else { break; @@ -1211,7 +1226,7 @@ zfree( panic("zfree: freeing to zone_zone breaks zone_gc!"); #endif - TRACE_MACHLEAKS(ZFREE_CODE, ZFREE_CODE_2, zone->elem_size, (int)addr); + TRACE_MACHLEAKS(ZFREE_CODE, ZFREE_CODE_2, zone->elem_size, (uintptr_t)addr); if (zone->collectable && !zone->allows_foreign && !from_zone_map(elem, zone->elem_size)) { @@ -1310,6 +1325,10 @@ zfree( panic("zfree"); } ADD_TO_ZONE(zone, elem); +#if MACH_ASSERT + if (zone->count < 0) + panic("zfree: count < 0!"); +#endif /* * If elements have one or more pages, and memory is low, @@ -1370,7 +1389,7 @@ zone_free_count(zone_t zone) integer_t free_count; lock_zone(zone); - free_count = zone->cur_size/zone->elem_size - zone->count; + free_count = (integer_t)(zone->cur_size/zone->elem_size - zone->count); unlock_zone(zone); assert(free_count >= 0); @@ -1390,7 +1409,7 @@ zprealloc( vm_offset_t addr; if (size != 0) { - if (kmem_alloc_wired(zone_map, &addr, size) != KERN_SUCCESS) + if (kmem_alloc_kobject(zone_map, &addr, size) != KERN_SUCCESS) panic("zprealloc"); zone_page_init(addr, size, ZONE_PAGE_USED); zcram(zone, (void *)addr, size); @@ -1417,8 +1436,8 @@ zone_page_collectable( panic("zone_page_collectable"); #endif - i = atop_32(addr-zone_map_min_address); - j = atop_32((addr+size-1) - zone_map_min_address); + i = (natural_t)atop_kernel(addr-zone_map_min_address); + j = (natural_t)atop_kernel((addr+size-1) - zone_map_min_address); for (zp = zone_page_table + i; i <= j; zp++, i++) if (zp->collect_count == zp->alloc_count) @@ -1443,8 +1462,8 @@ zone_page_keep( panic("zone_page_keep"); #endif - i = atop_32(addr-zone_map_min_address); - j = atop_32((addr+size-1) - zone_map_min_address); + i = (natural_t)atop_kernel(addr-zone_map_min_address); + j = (natural_t)atop_kernel((addr+size-1) - zone_map_min_address); for (zp = zone_page_table + i; i <= j; zp++, i++) zp->collect_count = 0; @@ -1466,8 +1485,8 @@ zone_page_collect( panic("zone_page_collect"); #endif - i = atop_32(addr-zone_map_min_address); - j = atop_32((addr+size-1) - zone_map_min_address); + i = (natural_t)atop_kernel(addr-zone_map_min_address); + j = (natural_t)atop_kernel((addr+size-1) - zone_map_min_address); for (zp = zone_page_table + i; i <= j; zp++, i++) ++zp->collect_count; @@ -1490,8 +1509,8 @@ zone_page_init( panic("zone_page_init"); #endif - i = atop_32(addr-zone_map_min_address); - j = atop_32((addr+size-1) - zone_map_min_address); + i = (natural_t)atop_kernel(addr-zone_map_min_address); + j = (natural_t)atop_kernel((addr+size-1) - zone_map_min_address); for (zp = zone_page_table + i; i <= j; zp++, i++) { zp->alloc_count = value; @@ -1515,8 +1534,8 @@ zone_page_alloc( panic("zone_page_alloc"); #endif - i = atop_32(addr-zone_map_min_address); - j = atop_32((addr+size-1) - zone_map_min_address); + i = (natural_t)atop_kernel(addr-zone_map_min_address); + j = (natural_t)atop_kernel((addr+size-1) - zone_map_min_address); for (zp = zone_page_table + i; i <= j; zp++, i++) { /* @@ -1547,8 +1566,8 @@ zone_page_free_element( panic("zone_page_free_element"); #endif - i = atop_32(addr-zone_map_min_address); - j = atop_32((addr+size-1) - zone_map_min_address); + i = (natural_t)atop_kernel(addr-zone_map_min_address); + j = (natural_t)atop_kernel((addr+size-1) - zone_map_min_address); for (zp = zone_page_table + i; i <= j; zp++, i++) { if (zp->collect_count > 0) @@ -1623,7 +1642,7 @@ zone_gc(void) unsigned int i; struct zone_page_table_entry *zp, *zone_free_pages; - mutex_lock(&zone_gc_lock); + lck_mtx_lock(&zone_gc_lock); simple_lock(&all_zones_lock); max_zones = num_zones; @@ -1860,7 +1879,7 @@ zone_gc(void) ++zgc_stats.pgs_freed; } - mutex_unlock(&zone_gc_lock); + lck_mtx_unlock(&zone_gc_lock); } /* @@ -1870,7 +1889,7 @@ zone_gc(void) */ void -consider_zone_gc(void) +consider_zone_gc(boolean_t force) { /* * By default, don't attempt zone GC more frequently @@ -1882,7 +1901,8 @@ consider_zone_gc(void) if (zone_gc_allowed && ((sched_tick > (zone_gc_last_tick + zone_gc_max_rate)) || - zone_gc_forced)) { + zone_gc_forced || + force)) { zone_gc_forced = FALSE; zone_gc_last_tick = sched_tick; zone_gc(); @@ -1910,7 +1930,7 @@ static struct fake_zone_info fake_zones[] = { .func = mapping_fake_zone_info, }, #endif /* ppc */ -#ifdef i386 +#if defined(__i386__) || defined (__x86_64__) { .name = "page_tables", .func = pt_fake_zone_info, @@ -1943,9 +1963,18 @@ host_zone_info( kern_return_t kr; size_t num_fake_zones; + if (host == HOST_NULL) return KERN_INVALID_HOST; +#if defined(__LP64__) + if (!thread_is_64bit(current_thread())) + return KERN_NOT_SUPPORTED; +#else + if (thread_is_64bit(current_thread())) + return KERN_NOT_SUPPORTED; +#endif + num_fake_zones = sizeof fake_zones / sizeof fake_zones[0]; /* @@ -1954,7 +1983,7 @@ host_zone_info( */ simple_lock(&all_zones_lock); - max_zones = num_zones + num_fake_zones; + max_zones = (unsigned int)(num_zones + num_fake_zones); z = first_zone; simple_unlock(&all_zones_lock); @@ -2073,6 +2102,43 @@ host_zone_info( return KERN_SUCCESS; } +extern unsigned int stack_total; + +#if defined(__i386__) || defined (__x86_64__) +extern unsigned int inuse_ptepages_count; +#endif + +void zone_display_zprint() +{ + unsigned int i; + zone_t the_zone; + + if(first_zone!=NULL) { + the_zone = first_zone; + for (i = 0; i < num_zones; i++) { + if(the_zone->cur_size > (1024*1024)) { + printf("%.20s:\t%lu\n",the_zone->zone_name,(uintptr_t)the_zone->cur_size); + } + + if(the_zone->next_zone == NULL) { + break; + } + + the_zone = the_zone->next_zone; + } + } + + printf("Kernel Stacks:\t%lu\n",(uintptr_t)(kernel_stack_size * stack_total)); + +#if defined(__i386__) || defined (__x86_64__) + printf("PageTables:\t%lu\n",(uintptr_t)(PAGE_SIZE * inuse_ptepages_count)); +#endif + + printf("Kalloc.Large:\t%lu\n",(uintptr_t)kalloc_large_total); +} + + + #if MACH_KDB #include #include @@ -2364,4 +2430,6 @@ zone_debug_disable( z->elem_size -= ZONE_DEBUG_OFFSET; z->active_zones.next = z->active_zones.prev = NULL; } + + #endif /* ZONE_DEBUG */ diff --git a/osfmk/kern/zalloc.h b/osfmk/kern/zalloc.h index e8a1ee29d..b21f71253 100644 --- a/osfmk/kern/zalloc.h +++ b/osfmk/kern/zalloc.h @@ -118,7 +118,7 @@ struct zone { }; extern void zone_gc(void); -extern void consider_zone_gc(void); +extern void consider_zone_gc(boolean_t); /* Steal memory for zone module */ extern void zone_steal_memory(void); diff --git a/osfmk/kextd/Makefile b/osfmk/kextd/Makefile new file mode 100644 index 000000000..d3a065420 --- /dev/null +++ b/osfmk/kextd/Makefile @@ -0,0 +1,65 @@ +export MakeInc_cmd=${SRCROOT}/makedefs/MakeInc.cmd +export MakeInc_def=${SRCROOT}/makedefs/MakeInc.def +export MakeInc_rule=${SRCROOT}/makedefs/MakeInc.rule +export MakeInc_dir=${SRCROOT}/makedefs/MakeInc.dir + +include $(MakeInc_cmd) +include $(MakeInc_def) + +INSTINC_SUBDIRS = + +INSTINC_SUBDIRS_PPC = + +INSTINC_SUBDIRS_I386 = + +EXPINC_SUBDIRS = + +EXPINC_SUBDIRS_PPC = + +EXPINC_SUBDIRS_I386 = + +MIG_DEFS = kextd_mach.defs + +DATAFILES = ${MIG_DEFS} + +INSTALL_MI_LIST = + +INSTALL_MI_LCL_LIST = ${DATAFILES} + +INSTALL_MI_GEN_LIST = + +INSTALL_MI_DIR = kextd + +EXPORT_MI_LIST = \ + ${DATAFILES} + +EXPORT_MI_GEN_LIST = kextd_mach.h + +EXPORT_MI_DIR = kextd + +# +# Build path +# +INCFLAGS_MAKEFILE= -I.. + +MIGKUFLAGS = -DMACH_KERNEL_PRIVATE -DKERNEL_USER=1 -maxonstack 1024 + +MIG_KUHDRS = kextd_mach.h + +MIG_KUSRC = kextd_mach.c kextd_mach.h + +COMP_FILES = ${MIG_KUSRC} + +${COMP_FILES} : kextd_mach.defs + +${MIG_KUSRC} : kextd_mach.defs + @echo MIG $@ + $(_v)${MIG} ${MIGFLAGS} ${MIGKUFLAGS} \ + -user $*.c \ + -header $*.h \ + -server /dev/null \ + -sheader /dev/null \ + $< + +include $(MakeInc_rule) +include $(MakeInc_dir) diff --git a/bsd/kern/sysctl_init.c b/osfmk/kextd/kextd_mach.defs similarity index 79% rename from bsd/kern/sysctl_init.c rename to osfmk/kextd/kextd_mach.defs index 1319ea15b..9597f4a24 100644 --- a/bsd/kern/sysctl_init.c +++ b/osfmk/kextd/kextd_mach.defs @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2007 Apple Inc. All rights reserved. + * Copyright (c) 2006 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -25,9 +25,20 @@ * * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ */ -/* - * NOTICE: This file was modified by SPARTA, Inc. in 2005 to introduce - * support for mandatory and extensible security protections. This notice - * is included in support of clause 2.2 (b) of the Apple Public License, - * Version 2.0. - */ + + +#include +#include + +subsystem +#ifdef KERNEL_USER +KernelUser +#endif +kextd_kernel_request 77000; + +serverprefix svc_; + +/* Ping kextd, launchd starts it if it's not running. */ +simpleroutine kextd_ping( + server : mach_port_t +); diff --git a/osfmk/libsa/errno.h b/osfmk/libsa/errno.h deleted file mode 100644 index e40d14efb..000000000 --- a/osfmk/libsa/errno.h +++ /dev/null @@ -1,90 +0,0 @@ -/* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ - * - * This file contains Original Code and/or Modifications of Original Code - * as defined in and that are subject to the Apple Public Source License - * Version 2.0 (the 'License'). You may not use this file except in - * compliance with the License. The rights granted to you under the License - * may not be used to create, or enable the creation or redistribution of, - * unlawful or unlicensed copies of an Apple operating system, or to - * circumvent, violate, or enable the circumvention or violation of, any - * terms of an Apple operating system software license agreement. - * - * Please obtain a copy of the License at - * http://www.opensource.apple.com/apsl/ and read it before using this file. - * - * The Original Code and all software distributed under the License are - * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER - * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, - * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. - * Please see the License for the specific language governing rights and - * limitations under the License. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ - */ -/* - * @OSF_COPYRIGHT@ - */ -/* - * HISTORY - * - * Revision 1.1.1.1 1998/09/22 21:05:51 wsanchez - * Import of Mac OS X kernel (~semeria) - * - * Revision 1.1.1.1 1998/03/07 02:25:35 wsanchez - * Import of OSF Mach kernel (~mburg) - * - * Revision 1.1.5.1 1997/01/31 15:46:31 emcmanus - * Merged with nmk22b1_shared. - * [1997/01/30 08:42:08 emcmanus] - * - * Revision 1.1.2.5 1996/11/29 13:04:57 emcmanus - * Added EIO for libsa_mach's getclock(). - * [1996/11/29 09:59:19 emcmanus] - * - * Revision 1.1.2.4 1996/11/08 12:02:15 emcmanus - * Replaced errno variable by a macro that calls a function defined - * either in libsa_mach or in a threads library. - * [1996/11/08 11:48:47 emcmanus] - * - * Revision 1.1.2.3 1996/10/14 13:31:46 emcmanus - * Added ETIMEDOUT. - * [1996/10/14 13:29:55 emcmanus] - * - * Revision 1.1.2.2 1996/10/03 17:53:40 emcmanus - * Added new error codes needed by libpthread.a. - * [1996/10/03 16:17:42 emcmanus] - * - * Revision 1.1.2.1 1996/09/30 10:14:32 bruel - * First revision. - * [96/09/30 bruel] - * - * $EndLog$ - */ - -/* - * ANSI C defines EDOM and ERANGE. POSIX defines the remaining values. - * We may at some stage want to surround the extra values with - * #ifdef _POSIX_SOURCE. - * By an extraordinary coincidence, nearly all the values defined here - * correspond exactly to those in OSF/1 and in Linux. Imagine that. - * The exception is ETIMEDOUT, which has different values in the two - * systems. We use the OSF/1 value here. - */ - -extern int *__mach_errno_addr(void); -#define errno (*__mach_errno_addr()) - -#define ESUCCESS 0 /* Success */ -#define EPERM 1 /* Not owner */ -#define ESRCH 3 /* No such process */ -#define EIO 5 /* I/O error */ -#define ENOMEM 12 /* Not enough core */ -#define EBUSY 16 /* Mount device busy */ -#define EINVAL 22 /* Invalid argument */ -#define EDOM 33 /* Argument too large */ -#define ERANGE 34 /* Result too large */ -#define ETIMEDOUT 60 /* Connection timed out */ diff --git a/osfmk/libsa/i386/float.h b/osfmk/libsa/i386/float.h deleted file mode 100644 index 084245798..000000000 --- a/osfmk/libsa/i386/float.h +++ /dev/null @@ -1,54 +0,0 @@ -/* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ - * - * This file contains Original Code and/or Modifications of Original Code - * as defined in and that are subject to the Apple Public Source License - * Version 2.0 (the 'License'). You may not use this file except in - * compliance with the License. The rights granted to you under the License - * may not be used to create, or enable the creation or redistribution of, - * unlawful or unlicensed copies of an Apple operating system, or to - * circumvent, violate, or enable the circumvention or violation of, any - * terms of an Apple operating system software license agreement. - * - * Please obtain a copy of the License at - * http://www.opensource.apple.com/apsl/ and read it before using this file. - * - * The Original Code and all software distributed under the License are - * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER - * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, - * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. - * Please see the License for the specific language governing rights and - * limitations under the License. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ - */ -/* - * @OSF_COPYRIGHT@ - * - */ -/* - * HISTORY - * - * Revision 1.2 1998/09/30 21:21:00 wsanchez - * Merged in IntelMerge1 (mburg: Intel support) - * - * Revision 1.1.2.1 1998/09/30 18:19:49 mburg - * Changes for Intel port - * - * Revision 1.1.1.1 1998/03/07 02:25:36 wsanchez - * Import of OSF Mach kernel (~mburg) - * - * Revision 1.1.2.1 1996/10/10 13:56:09 yp - * Created. - * [96/10/10 yp] - * - * $EndLog$ - */ - -#ifndef _MACHINE_FLOAT_H_ -#define _MACHINE_FLOAT_H_ -# include -#endif /* _MACHINE_FLOAT_H_ */ diff --git a/osfmk/libsa/i386/types.h b/osfmk/libsa/i386/types.h index b7cbda01b..3cbf43855 100644 --- a/osfmk/libsa/i386/types.h +++ b/osfmk/libsa/i386/types.h @@ -56,7 +56,7 @@ #ifndef _MACH_MACHINE_TYPES_H_ #define _MACH_MACHINE_TYPES_H_ 1 -typedef long dev_t; /* device number (major+minor) */ +typedef int dev_t; /* device number (major+minor) */ typedef signed char bit8_t; /* signed 8-bit quantity */ typedef unsigned char u_bit8_t; /* unsigned 8-bit quantity */ diff --git a/osfmk/libsa/ieeefloat.h b/osfmk/libsa/ieeefloat.h deleted file mode 100644 index 2fcda29a7..000000000 --- a/osfmk/libsa/ieeefloat.h +++ /dev/null @@ -1,119 +0,0 @@ -/* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ - * - * This file contains Original Code and/or Modifications of Original Code - * as defined in and that are subject to the Apple Public Source License - * Version 2.0 (the 'License'). You may not use this file except in - * compliance with the License. The rights granted to you under the License - * may not be used to create, or enable the creation or redistribution of, - * unlawful or unlicensed copies of an Apple operating system, or to - * circumvent, violate, or enable the circumvention or violation of, any - * terms of an Apple operating system software license agreement. - * - * Please obtain a copy of the License at - * http://www.opensource.apple.com/apsl/ and read it before using this file. - * - * The Original Code and all software distributed under the License are - * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER - * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, - * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. - * Please see the License for the specific language governing rights and - * limitations under the License. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ - */ -/* - * @OSF_COPYRIGHT@ - * - */ -/* - * HISTORY - * - * Revision 1.1.1.1 1998/09/22 21:05:51 wsanchez - * Import of Mac OS X kernel (~semeria) - * - * Revision 1.1.1.1 1998/03/07 02:25:35 wsanchez - * Import of OSF Mach kernel (~mburg) - * - * Revision 1.1.2.1 1996/10/10 13:56:15 yp - * Created. - * [96/10/10 yp] - * - * $EndLog$ - */ - -#ifndef _IEEEFLOAT_H_ -#define _IEEEFLOAT_H_ - -/* - * Copyright (c) 1989, 1993 - * The Regents of the University of California. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. All advertising materials mentioning features or use of this software - * must display the following acknowledgement: - * This product includes software developed by the University of - * California, Berkeley and its contributors. - * 4. Neither the name of the University nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - * @(#)float.h 8.1 (Berkeley) 6/10/93 - */ - -#define FLT_RADIX 2 /* b */ -#define FLT_ROUNDS 1 /* FP addition rounds to nearest */ - -#define FLT_MANT_DIG 24 /* p */ -#define FLT_EPSILON 1.19209290E-07F /* b**(1-p) */ -#define FLT_DIG 6 /* floor((p-1)*log10(b))+(b == 10) */ -#define FLT_MIN_EXP -125 /* emin */ -#define FLT_MIN 1.17549435E-38F /* b**(emin-1) */ -#define FLT_MIN_10_EXP -37 /* ceil(log10(b**(emin-1))) */ -#define FLT_MAX_EXP 128 /* emax */ -#define FLT_MAX 3.40282347E+38F /* (1-b**(-p))*b**emax */ -#define FLT_MAX_10_EXP 38 /* floor(log10((1-b**(-p))*b**emax)) */ - -#define DBL_MANT_DIG 53 -#define DBL_EPSILON 2.2204460492503131E-16 -#define DBL_DIG 15 -#define DBL_MIN_EXP -1021 -#define DBL_MIN 2.225073858507201E-308 -#define DBL_MIN_10_EXP -307 -#define DBL_MAX_EXP 1024 -#define DBL_MAX 1.797693134862316E+308 -#define DBL_MAX_10_EXP 308 - -#define LDBL_MANT_DIG DBL_MANT_DIG -#define LDBL_EPSILON DBL_EPSILON -#define LDBL_DIG DBL_DIG -#define LDBL_MIN_EXP DBL_MIN_EXP -#define LDBL_MIN DBL_MIN -#define LDBL_MIN_10_EXP DBL_MIN_10_EXP -#define LDBL_MAX_EXP DBL_MAX_EXP -#define LDBL_MAX DBL_MAX -#define LDBL_MAX_10_EXP DBL_MAX_10_EXP - -#endif /* _IEEEFLOAT_H_ */ diff --git a/osfmk/libsa/machine/types.h b/osfmk/libsa/machine/types.h index abe727554..f79adbe87 100644 --- a/osfmk/libsa/machine/types.h +++ b/osfmk/libsa/machine/types.h @@ -29,9 +29,9 @@ #define _MACH_MACHINE_TYPES_H #if defined (__ppc__) -#include "ppc/types.h" -#elif defined (__i386__) -#include "i386/types.h" +#include "libsa/ppc/types.h" +#elif defined (__i386__) || defined (__x86_64__) +#include "libsa/i386/types.h" #else #error architecture not supported #endif diff --git a/osfmk/libsa/math.h b/osfmk/libsa/math.h deleted file mode 100644 index 024862d76..000000000 --- a/osfmk/libsa/math.h +++ /dev/null @@ -1,108 +0,0 @@ -/* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ - * - * This file contains Original Code and/or Modifications of Original Code - * as defined in and that are subject to the Apple Public Source License - * Version 2.0 (the 'License'). You may not use this file except in - * compliance with the License. The rights granted to you under the License - * may not be used to create, or enable the creation or redistribution of, - * unlawful or unlicensed copies of an Apple operating system, or to - * circumvent, violate, or enable the circumvention or violation of, any - * terms of an Apple operating system software license agreement. - * - * Please obtain a copy of the License at - * http://www.opensource.apple.com/apsl/ and read it before using this file. - * - * The Original Code and all software distributed under the License are - * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER - * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, - * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. - * Please see the License for the specific language governing rights and - * limitations under the License. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ - */ -/* - * @OSF_COPYRIGHT@ - * - */ -/* - * HISTORY - * - * Revision 1.1.1.1 1998/09/22 21:05:51 wsanchez - * Import of Mac OS X kernel (~semeria) - * - * Revision 1.1.1.1 1998/03/07 02:25:35 wsanchez - * Import of OSF Mach kernel (~mburg) - * - * Revision 1.1.6.1 1997/01/31 15:46:32 emcmanus - * Merged with nmk22b1_shared. - * [1997/01/30 16:57:28 emcmanus] - * - * Revision 1.1.2.4 1997/01/03 10:11:22 yp - * isnan() prototype for JDK. - * [97/01/03 yp] - * - * Revision 1.1.2.3 1996/11/29 14:33:24 yp - * Added more prototypes. - * [96/11/29 yp] - * - * Revision 1.1.2.2 1996/10/10 13:56:16 yp - * Submitted again (ODE problems). - * [96/10/10 yp] - * - * Revision 1.1.2.1 1996/10/10 09:16:46 yp - * Created. - * [96/10/10 yp] - * - * $EndLog$ - */ - -#ifndef _MATH_H_ -#define _MATH_H_ 1 - -double acos (double); -double acosh (double); -double asin (double); -double asinh (double); -double atan (double); -double atanh (double); -double atan2 (double, double); -double cbrt (double); -double ceil (double); -double copysign (double, double); -double cos (double); -double cosh (double); -double drem (double); -double exp (double); -double expm1 (double); -double fabs (double); -int finite (double); -double floor (double); -double fmod (double, double); -double frexp (double, int *); -int ilogb (double); -int isnan(double); -double ldexp (double, int); -double log (double); -double log10 (double); -double log1p (double); -double logb (double); -double modf (double, double *); -double nextafter (double, double); -double pow (double, double); -double remainder (double, double); -double rint (double); -double scalb (double, double); -double sin (double); -double sinh (double); -double sqrt (double); -double tan (double); -double tanh (double); - -#include - -#endif /* _MATH_H_ */ diff --git a/osfmk/libsa/ppc/float.h b/osfmk/libsa/ppc/float.h deleted file mode 100644 index ba074cdbb..000000000 --- a/osfmk/libsa/ppc/float.h +++ /dev/null @@ -1,55 +0,0 @@ -/* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ - * - * This file contains Original Code and/or Modifications of Original Code - * as defined in and that are subject to the Apple Public Source License - * Version 2.0 (the 'License'). You may not use this file except in - * compliance with the License. The rights granted to you under the License - * may not be used to create, or enable the creation or redistribution of, - * unlawful or unlicensed copies of an Apple operating system, or to - * circumvent, violate, or enable the circumvention or violation of, any - * terms of an Apple operating system software license agreement. - * - * Please obtain a copy of the License at - * http://www.opensource.apple.com/apsl/ and read it before using this file. - * - * The Original Code and all software distributed under the License are - * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER - * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, - * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. - * Please see the License for the specific language governing rights and - * limitations under the License. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ - */ -/* - * @OSF_COPYRIGHT@ - * - */ -/* - * HISTORY - * - * Revision 1.1.1.1 1998/09/22 21:05:51 wsanchez - * Import of Mac OS X kernel (~semeria) - * - * Revision 1.1.1.1 1998/03/07 02:25:35 wsanchez - * Import of OSF Mach kernel (~mburg) - * - * Revision 1.1.2.1 1996/12/09 16:59:00 stephen - * nmklinux_1.0b3_shared into pmk1.1 - * [1996/12/09 11:18:39 stephen] - * - * Revision 1.1.2.1 1996/10/10 13:56:09 yp - * Created. - * [96/10/10 yp] - * - * $EndLog$ - */ - -#ifndef _MACHINE_FLOAT_H_ -#define _MACHINE_FLOAT_H_ -# include -#endif /* _MACHINE_FLOAT_H_ */ diff --git a/osfmk/libsa/ppc/math.h b/osfmk/libsa/ppc/math.h deleted file mode 100644 index a942324b0..000000000 --- a/osfmk/libsa/ppc/math.h +++ /dev/null @@ -1,61 +0,0 @@ -/* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ - * - * This file contains Original Code and/or Modifications of Original Code - * as defined in and that are subject to the Apple Public Source License - * Version 2.0 (the 'License'). You may not use this file except in - * compliance with the License. The rights granted to you under the License - * may not be used to create, or enable the creation or redistribution of, - * unlawful or unlicensed copies of an Apple operating system, or to - * circumvent, violate, or enable the circumvention or violation of, any - * terms of an Apple operating system software license agreement. - * - * Please obtain a copy of the License at - * http://www.opensource.apple.com/apsl/ and read it before using this file. - * - * The Original Code and all software distributed under the License are - * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER - * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, - * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. - * Please see the License for the specific language governing rights and - * limitations under the License. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ - */ -/* - * @OSF_COPYRIGHT@ - * - */ -/* - * HISTORY - * - * Revision 1.1.1.1 1998/09/22 21:05:51 wsanchez - * Import of Mac OS X kernel (~semeria) - * - * Revision 1.1.1.1 1998/03/07 02:25:35 wsanchez - * Import of OSF Mach kernel (~mburg) - * - * Revision 1.1.2.1 1996/12/09 16:59:02 stephen - * nmklinux_1.0b3_shared into pmk1.1 - * [1996/12/09 11:18:44 stephen] - * - * Revision 1.1.2.2 1996/10/10 13:56:07 yp - * Submitted again (ODE problems). - * [96/10/10 yp] - * - * Revision 1.1.2.1 1996/10/10 09:16:43 yp - * Created. - * [96/10/10 yp] - * - * $EndLog$ - */ - -#ifndef _MACHINE_MATH_H_ -#define _MACHINE_MATH_H_ 1 - -#define HUGE_VAL (1.701411733192644270e38) - -#endif /* _MACHINE_MATH_H_ */ diff --git a/osfmk/libsa/ppc/stdarg_apple.h b/osfmk/libsa/ppc/stdarg_apple.h deleted file mode 100644 index 770e7d2f1..000000000 --- a/osfmk/libsa/ppc/stdarg_apple.h +++ /dev/null @@ -1,201 +0,0 @@ -/* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ - * - * This file contains Original Code and/or Modifications of Original Code - * as defined in and that are subject to the Apple Public Source License - * Version 2.0 (the 'License'). You may not use this file except in - * compliance with the License. The rights granted to you under the License - * may not be used to create, or enable the creation or redistribution of, - * unlawful or unlicensed copies of an Apple operating system, or to - * circumvent, violate, or enable the circumvention or violation of, any - * terms of an Apple operating system software license agreement. - * - * Please obtain a copy of the License at - * http://www.opensource.apple.com/apsl/ and read it before using this file. - * - * The Original Code and all software distributed under the License are - * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER - * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, - * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. - * Please see the License for the specific language governing rights and - * limitations under the License. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ - */ -/* stdarg.h for GNU. - Note that the type used in va_arg is supposed to match the - actual type **after default promotions**. - Thus, va_arg (..., short) is not valid. */ - -#ifndef _STDARG_H -#ifndef _ANSI_STDARG_H_ -#ifndef __need___va_list -#define _STDARG_H -#define _ANSI_STDARG_H_ -#endif /* not __need___va_list */ -#undef __need___va_list - -#ifdef __clipper__ -#include -#else -#ifdef __m88k__ -#include -#else -#ifdef __i860__ -#include -#else -#ifdef __hppa__ -#include -#else -#ifdef __mips__ -#include -#else -#ifdef __sparc__ -#include -#else -#ifdef __i960__ -#include -#else -#ifdef __alpha__ -#include -#else -#if defined (__H8300__) || defined (__H8300H__) -#include -#else -#if defined (__PPC__) && defined (_CALL_SYSV) -#include -#else - -/* Define __gnuc_va_list. */ - -#ifndef __GNUC_VA_LIST -#define __GNUC_VA_LIST -#if defined(__svr4__) || defined(_AIX) || defined(_M_UNIX) || defined(__NetBSD__) -typedef char *__gnuc_va_list; -#else -typedef void *__gnuc_va_list; -#endif -#endif - -/* Define the standard macros for the user, - if this invocation was from the user program. */ -#ifdef _STDARG_H - -/* Amount of space required in an argument list for an arg of type TYPE. - TYPE may alternatively be an expression whose type is used. */ - -#if defined(sysV68) -#define __va_rounded_size(TYPE) \ - (((sizeof (TYPE) + sizeof (short) - 1) / sizeof (short)) * sizeof (short)) -#else -#define __va_rounded_size(TYPE) \ - (((sizeof (TYPE) + sizeof (int) - 1) / sizeof (int)) * sizeof (int)) -#endif - -#define va_start(AP, LASTARG) \ - (AP = ((__gnuc_va_list) __builtin_next_arg (LASTARG))) - -#undef va_end -void va_end (__gnuc_va_list); /* Defined in libgcc.a */ -#define va_end(AP) ((void)0) - -/* We cast to void * and then to TYPE * because this avoids - a warning about increasing the alignment requirement. */ - -#if defined (__arm__) || defined (__i386__) || defined (__i860__) || defined (__ns32000__) || defined (__vax__) -/* This is for little-endian machines; small args are padded upward. */ -#define va_arg(AP, TYPE) \ - (AP = (__gnuc_va_list) ((char *) (AP) + __va_rounded_size (TYPE)), \ - *((TYPE *) (void *) ((char *) (AP) - __va_rounded_size (TYPE)))) -#else /* big-endian */ -/* This is for big-endian machines; small args are padded downward. */ -#define va_arg(AP, TYPE) \ - (AP = (__gnuc_va_list) ((char *) (AP) + __va_rounded_size (TYPE)), \ - *((TYPE *) (void *) ((char *) (AP) \ - - ((sizeof (TYPE) < __va_rounded_size (char) \ - ? sizeof (TYPE) : __va_rounded_size (TYPE)))))) -#endif /* big-endian */ -#endif /* _STDARG_H */ - -#endif /* not powerpc with V.4 calling sequence */ -#endif /* not h8300 */ -#endif /* not alpha */ -#endif /* not i960 */ -#endif /* not sparc */ -#endif /* not mips */ -#endif /* not hppa */ -#endif /* not i860 */ -#endif /* not m88k */ -#endif /* not clipper */ - -#ifdef _STDARG_H -/* Define va_list, if desired, from __gnuc_va_list. */ -/* We deliberately do not define va_list when called from - stdio.h, because ANSI C says that stdio.h is not supposed to define - va_list. stdio.h needs to have access to that data type, - but must not use that name. It should use the name __gnuc_va_list, - which is safe because it is reserved for the implementation. */ - -#ifdef _HIDDEN_VA_LIST /* On OSF1, this means varargs.h is "half-loaded". */ -#undef _VA_LIST -#endif - -#ifdef _BSD_VA_LIST -#undef _BSD_VA_LIST -#endif - -#ifdef __svr4__ -/* SVR4.2 uses _VA_LIST for an internal alias for va_list, - so we must avoid testing it and setting it here. - SVR4 uses _VA_LIST as a flag in stdarg.h, but we should - have no conflict with that. */ -#ifndef _VA_LIST_ -#define _VA_LIST_ -#ifdef __i860__ -#ifndef _VA_LIST -#define _VA_LIST va_list -#endif -#endif /* __i860__ */ -typedef __gnuc_va_list va_list; -#endif /* _VA_LIST_ */ -#else /* not __svr4__ */ - -/* The macro _VA_LIST_ is the same thing used by this file in Ultrix. - But on BSD NET2 we must not test or define or undef it. - (Note that the comments in NET 2's ansi.h - are incorrect for _VA_LIST_--see stdio.h!) */ -#if !defined (_VA_LIST_) || defined (__BSD_NET2__) || defined (____386BSD____) || defined (__bsdi__) || defined (__sequent__) || defined (__FreeBSD__) || defined(WINNT) -/* The macro _VA_LIST_DEFINED is used in Windows NT 3.5 */ -#ifndef _VA_LIST_DEFINED -/* The macro _VA_LIST is used in SCO Unix 3.2. */ -#ifndef _VA_LIST -/* The macro _VA_LIST_T_H is used in the Bull dpx2 */ -#ifndef _VA_LIST_T_H -typedef __gnuc_va_list va_list; -#endif /* not _VA_LIST_T_H */ -#endif /* not _VA_LIST */ -#endif /* not _VA_LIST_DEFINED */ -#if !(defined (__BSD_NET2__) || defined (____386BSD____) || defined (__bsdi__) || defined (__sequent__) || defined (__FreeBSD__)) -#define _VA_LIST_ -#endif -#ifndef _VA_LIST -#define _VA_LIST -#endif -#ifndef _VA_LIST_DEFINED -#define _VA_LIST_DEFINED -#endif -#ifndef _VA_LIST_T_H -#define _VA_LIST_T_H -#endif - -#endif /* not _VA_LIST_, except on certain systems */ - -#endif /* not __svr4__ */ - -#endif /* _STDARG_H */ - -#endif /* not _ANSI_STDARG_H_ */ -#endif /* not _STDARG_H */ diff --git a/osfmk/libsa/stdio.h b/osfmk/libsa/stdio.h deleted file mode 100644 index 4deea51e9..000000000 --- a/osfmk/libsa/stdio.h +++ /dev/null @@ -1,79 +0,0 @@ -/* - * Copyright (c) 2000-2006 Apple Computer, Inc. All rights reserved. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ - * - * This file contains Original Code and/or Modifications of Original Code - * as defined in and that are subject to the Apple Public Source License - * Version 2.0 (the 'License'). You may not use this file except in - * compliance with the License. The rights granted to you under the License - * may not be used to create, or enable the creation or redistribution of, - * unlawful or unlicensed copies of an Apple operating system, or to - * circumvent, violate, or enable the circumvention or violation of, any - * terms of an Apple operating system software license agreement. - * - * Please obtain a copy of the License at - * http://www.opensource.apple.com/apsl/ and read it before using this file. - * - * The Original Code and all software distributed under the License are - * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER - * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, - * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. - * Please see the License for the specific language governing rights and - * limitations under the License. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ - */ -/* - * @OSF_COPYRIGHT@ - * - */ -/* - * HISTORY - * - * Revision 1.1.1.1 1998/09/22 21:05:51 wsanchez - * Import of Mac OS X kernel (~semeria) - * - * Revision 1.1.1.1 1998/03/07 02:25:35 wsanchez - * Import of OSF Mach kernel (~mburg) - * - * Revision 1.1.2.3 1996/10/04 11:36:05 emcmanus - * Added fprintf_stderr() prototype, for use by Mach libraries and like - * that might end up being linked with either libc or libsa_mach. - * [1996/10/04 11:31:53 emcmanus] - * - * Revision 1.1.2.2 1996/10/03 17:53:45 emcmanus - * Define NULL. This is currently also (questionably) defined in stdlib.h, - * string.h, and types.h. - * [1996/10/03 16:17:55 emcmanus] - * - * Revision 1.1.2.1 1996/09/17 16:56:18 bruel - * created from standalone mach servers. - * [96/09/17 bruel] - * - * $EndLog$ - */ - -#ifndef _MACH_STDIO_H_ -#define _MACH_STDIO_H_ - -#include - -#ifndef NULL -#define NULL ((void *) 0) -#endif - -/* sprintf() is being deprecated. Please use snprintf() instead. */ -extern int sprintf(char *, const char *, ...) __deprecated; -extern int printf(const char *, ...); -extern int vprintf(const char *, va_list ); - -/* vsprintf() is being deprecated. Please use vsnprintf() instead. */ -extern int vsprintf(char *, const char *, va_list ) __deprecated; - -extern int getchar(void); - -extern int fprintf_stderr(const char *, ...); - -#endif /* _MACH_STDIO_H_ */ diff --git a/osfmk/libsa/types.h b/osfmk/libsa/types.h index e9f8ad7bf..ca12b7efb 100644 --- a/osfmk/libsa/types.h +++ b/osfmk/libsa/types.h @@ -47,7 +47,7 @@ #ifndef _MACH_TYPES_H_ #define _MACH_TYPES_H_ -#include "machine/types.h" +#include "libsa/machine/types.h" #ifndef _SIZE_T #define _SIZE_T @@ -95,4 +95,14 @@ typedef volatile unsigned char vuchar_t; typedef volatile unsigned short vushort_t; typedef volatile unsigned int vuint_t; typedef volatile unsigned long vulong_t; + +/* + * Deprecation macro + */ +#if __GNUC__ >= 3 +#define __deprecated __attribute__((deprecated)) +#else +#define __deprecated /* nothing */ +#endif + #endif /* _MACH_TYPES_H_ */ diff --git a/osfmk/mach-o/loader.h b/osfmk/mach-o/loader.h deleted file mode 100644 index dfbf7ee8b..000000000 --- a/osfmk/mach-o/loader.h +++ /dev/null @@ -1,744 +0,0 @@ -/* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ - * - * This file contains Original Code and/or Modifications of Original Code - * as defined in and that are subject to the Apple Public Source License - * Version 2.0 (the 'License'). You may not use this file except in - * compliance with the License. The rights granted to you under the License - * may not be used to create, or enable the creation or redistribution of, - * unlawful or unlicensed copies of an Apple operating system, or to - * circumvent, violate, or enable the circumvention or violation of, any - * terms of an Apple operating system software license agreement. - * - * Please obtain a copy of the License at - * http://www.opensource.apple.com/apsl/ and read it before using this file. - * - * The Original Code and all software distributed under the License are - * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER - * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, - * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. - * Please see the License for the specific language governing rights and - * limitations under the License. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ - */ -#ifndef _MACHO_LOADER_H_ -#define _MACHO_LOADER_H_ - -/* - * This file describes the format of mach object files. - * - * NOTE: This header is used for manipulationg 32 bit mach object - * withing a 32 bit mach_kernel for the purpose of dealing - * with linking loadable kernel modules. - */ - -/* - * is needed here for the cpu_type_t and cpu_subtype_t types - * and contains the constants for the possible values of these types. - */ -#include - -/* - * is needed here for the vm_prot_t type and contains the - * constants that are or'ed together for the possible values of this type. - */ -#include - -/* - * is expected to define the flavors of the thread - * states and the structures of those flavors for each machine. - */ -#include - -/* - * The mach header appears at the very beginning of the object file. - */ -struct mach_header { - unsigned long magic; /* mach magic number identifier */ - cpu_type_t cputype; /* cpu specifier */ - cpu_subtype_t cpusubtype; /* machine specifier */ - unsigned long filetype; /* type of file */ - unsigned long ncmds; /* number of load commands */ - unsigned long sizeofcmds; /* the size of all the load commands */ - unsigned long flags; /* flags */ -}; - -/* Constant for the magic field of the mach_header */ -#define MH_MAGIC 0xfeedface /* the mach magic number */ -#define MH_CIGAM 0xcefaedfe - -/* - * The layout of the file depends on the filetype. For all but the MH_OBJECT - * file type the segments are padded out and aligned on a segment alignment - * boundary for efficient demand pageing. The MH_EXECUTE, MH_FVMLIB, MH_DYLIB, - * MH_DYLINKER and MH_BUNDLE file types also have the headers included as part - * of their first segment. - * - * The file type MH_OBJECT is a compact format intended as output of the - * assembler and input (and possibly output) of the link editor (the .o - * format). All sections are in one unnamed segment with no segment padding. - * This format is used as an executable format when the file is so small the - * segment padding greatly increases it's size. - * - * The file type MH_PRELOAD is an executable format intended for things that - * not executed under the kernel (proms, stand alones, kernels, etc). The - * format can be executed under the kernel but may demand paged it and not - * preload it before execution. - * - * A core file is in MH_CORE format and can be any in an arbritray legal - * Mach-O file. - * - * Constants for the filetype field of the mach_header - */ -#define MH_OBJECT 0x1 /* relocatable object file */ -#define MH_EXECUTE 0x2 /* demand paged executable file */ -#define MH_FVMLIB 0x3 /* fixed VM shared library file */ -#define MH_CORE 0x4 /* core file */ -#define MH_PRELOAD 0x5 /* preloaded executable file */ -#define MH_DYLIB 0x6 /* dynamicly bound shared library file*/ -#define MH_DYLINKER 0x7 /* dynamic link editor */ -#define MH_BUNDLE 0x8 /* dynamicly bound bundle file */ - -/* Constants for the flags field of the mach_header */ -#define MH_NOUNDEFS 0x1 /* the object file has no undefined - references, can be executed */ -#define MH_INCRLINK 0x2 /* the object file is the output of an - incremental link against a base file - and can't be link edited again */ -#define MH_DYLDLINK 0x4 /* the object file is input for the - dynamic linker and can't be staticly - link edited again */ -#define MH_BINDATLOAD 0x8 /* the object file's undefined - references are bound by the dynamic - linker when loaded. */ -#define MH_PREBOUND 0x10 /* the file has it's dynamic undefined - references prebound. */ - -/* - * The load commands directly follow the mach_header. The total size of all - * of the commands is given by the sizeofcmds field in the mach_header. All - * load commands must have as their first two fields cmd and cmdsize. The cmd - * field is filled in with a constant for that command type. Each command type - * has a structure specifically for it. The cmdsize field is the size in bytes - * of the particular load command structure plus anything that follows it that - * is a part of the load command (i.e. section structures, strings, etc.). To - * advance to the next load command the cmdsize can be added to the offset or - * pointer of the current load command. The cmdsize MUST be a multiple of - * sizeof(long) (this is forever the maximum alignment of any load commands). - * The padded bytes must be zero. All tables in the object file must also - * follow these rules so the file can be memory mapped. Otherwise the pointers - * to these tables will not work well or at all on some machines. With all - * padding zeroed like objects will compare byte for byte. - */ -struct load_command { - unsigned long cmd; /* type of load command */ - unsigned long cmdsize; /* total size of command in bytes */ -}; - -/* Constants for the cmd field of all load commands, the type */ -#define LC_SEGMENT 0x1 /* segment of this file to be mapped */ -#define LC_SYMTAB 0x2 /* link-edit stab symbol table info */ -#define LC_SYMSEG 0x3 /* link-edit gdb symbol table info (obsolete) */ -#define LC_THREAD 0x4 /* thread */ -#define LC_UNIXTHREAD 0x5 /* unix thread (includes a stack) */ -#define LC_LOADFVMLIB 0x6 /* load a specified fixed VM shared library */ -#define LC_IDFVMLIB 0x7 /* fixed VM shared library identification */ -#define LC_IDENT 0x8 /* object identification info (obsolete) */ -#define LC_FVMFILE 0x9 /* fixed VM file inclusion (internal use) */ -#define LC_PREPAGE 0xa /* prepage command (internal use) */ -#define LC_DYSYMTAB 0xb /* dynamic link-edit symbol table info */ -#define LC_LOAD_DYLIB 0xc /* load a dynamicly linked shared library */ -#define LC_ID_DYLIB 0xd /* dynamicly linked shared lib identification */ -#define LC_LOAD_DYLINKER 0xe /* load a dynamic linker */ -#define LC_ID_DYLINKER 0xf /* dynamic linker identification */ -#define LC_PREBOUND_DYLIB 0x10 /* modules prebound for a dynamicly */ - /* linked shared library */ - -#define LC_UUID 0x1b /* the uuid */ - -/* - * A variable length string in a load command is represented by an lc_str - * union. The strings are stored just after the load command structure and - * the offset is from the start of the load command structure. The size - * of the string is reflected in the cmdsize field of the load command. - * Once again any padded bytes to bring the cmdsize field to a multiple - * of sizeof(long) must be zero. - */ -union lc_str { - unsigned long offset; /* offset to the string */ - char *ptr; /* pointer to the string */ -}; - -/* - * The segment load command indicates that a part of this file is to be - * mapped into the task's address space. The size of this segment in memory, - * vmsize, maybe equal to or larger than the amount to map from this file, - * filesize. The file is mapped starting at fileoff to the beginning of - * the segment in memory, vmaddr. The rest of the memory of the segment, - * if any, is allocated zero fill on demand. The segment's maximum virtual - * memory protection and initial virtual memory protection are specified - * by the maxprot and initprot fields. If the segment has sections then the - * section structures directly follow the segment command and their size is - * reflected in cmdsize. - */ -struct segment_command { - unsigned long cmd; /* LC_SEGMENT */ - unsigned long cmdsize; /* includes sizeof section structs */ - char segname[16]; /* segment name */ - unsigned long vmaddr; /* memory address of this segment */ - unsigned long vmsize; /* memory size of this segment */ - unsigned long fileoff; /* file offset of this segment */ - unsigned long filesize; /* amount to map from the file */ - vm_prot_t maxprot; /* maximum VM protection */ - vm_prot_t initprot; /* initial VM protection */ - unsigned long nsects; /* number of sections in segment */ - unsigned long flags; /* flags */ -}; - -/* Constants for the flags field of the segment_command */ -#define SG_HIGHVM 0x1 /* the file contents for this segment is for - the high part of the VM space, the low part - is zero filled (for stacks in core files) */ -#define SG_FVMLIB 0x2 /* this segment is the VM that is allocated by - a fixed VM library, for overlap checking in - the link editor */ -#define SG_NORELOC 0x4 /* this segment has nothing that was relocated - in it and nothing relocated to it, that is - it maybe safely replaced without relocation*/ - -/* - * A segment is made up of zero or more sections. Non-MH_OBJECT files have - * all of their segments with the proper sections in each, and padded to the - * specified segment alignment when produced by the link editor. The first - * segment of a MH_EXECUTE and MH_FVMLIB format file contains the mach_header - * and load commands of the object file before it's first section. The zero - * fill sections are always last in their segment (in all formats). This - * allows the zeroed segment padding to be mapped into memory where zero fill - * sections might be. - * - * The MH_OBJECT format has all of it's sections in one segment for - * compactness. There is no padding to a specified segment boundary and the - * mach_header and load commands are not part of the segment. - * - * Sections with the same section name, sectname, going into the same segment, - * segname, are combined by the link editor. The resulting section is aligned - * to the maximum alignment of the combined sections and is the new section's - * alignment. The combined sections are aligned to their original alignment in - * the combined section. Any padded bytes to get the specified alignment are - * zeroed. - * - * The format of the relocation entries referenced by the reloff and nreloc - * fields of the section structure for mach object files is described in the - * header file . - */ -struct section { - char sectname[16]; /* name of this section */ - char segname[16]; /* segment this section goes in */ - unsigned long addr; /* memory address of this section */ - unsigned long size; /* size in bytes of this section */ - unsigned long offset; /* file offset of this section */ - unsigned long align; /* section alignment (power of 2) */ - unsigned long reloff; /* file offset of relocation entries */ - unsigned long nreloc; /* number of relocation entries */ - unsigned long flags; /* flags (section type and attributes)*/ - unsigned long reserved1; /* reserved */ - unsigned long reserved2; /* reserved */ -}; - -/* - * The flags field of a section structure is separated into two parts a section - * type and section attributes. The section types are mutually exclusive (it - * can only have one type) but the section attributes are not (it may have more - * than one attribute). - */ -#define SECTION_TYPE 0x000000ff /* 256 section types */ -#define SECTION_ATTRIBUTES 0xffffff00 /* 24 section attributes */ - -/* Constants for the type of a section */ -#define S_REGULAR 0x0 /* regular section */ -#define S_ZEROFILL 0x1 /* zero fill on demand section */ -#define S_CSTRING_LITERALS 0x2 /* section with only literal C strings*/ -#define S_4BYTE_LITERALS 0x3 /* section with only 4 byte literals */ -#define S_8BYTE_LITERALS 0x4 /* section with only 8 byte literals */ -#define S_LITERAL_POINTERS 0x5 /* section with only pointers to */ - /* literals */ -/* - * For the two types of symbol pointers sections and the symbol stubs section - * they have indirect symbol table entries. For each of the entries in the - * section the indirect symbol table entries, in corresponding order in the - * indirect symbol table, start at the index stored in the reserved1 field - * of the section structure. Since the indirect symbol table entries - * correspond to the entries in the section the number of indirect symbol table - * entries is inferred from the size of the section divided by the size of the - * entries in the section. For symbol pointers sections the size of the entries - * in the section is 4 bytes and for symbol stubs sections the byte size of the - * stubs is stored in the reserved2 field of the section structure. - */ -#define S_NON_LAZY_SYMBOL_POINTERS 0x6 /* section with only non-lazy - symbol pointers */ -#define S_LAZY_SYMBOL_POINTERS 0x7 /* section with only lazy symbol - pointers */ -#define S_SYMBOL_STUBS 0x8 /* section with only symbol - stubs, byte size of stub in - the reserved2 field */ -#define S_MOD_INIT_FUNC_POINTERS 0x9 /* section with only function - pointers for initialization*/ -/* - * Constants for the section attributes part of the flags field of a section - * structure. - */ -#define SECTION_ATTRIBUTES_USR 0xff000000 /* User setable attributes */ -#define S_ATTR_PURE_INSTRUCTIONS 0x80000000 /* section contains only true - machine instructions */ -#define SECTION_ATTRIBUTES_SYS 0x00ffff00 /* system setable attributes */ -#define S_ATTR_SOME_INSTRUCTIONS 0x00000400 /* section contains some - machine instructions */ -#define S_ATTR_EXT_RELOC 0x00000200 /* section has external - relocation entries */ -#define S_ATTR_LOC_RELOC 0x00000100 /* section has local - relocation entries */ - - -/* - * The names of segments and sections in them are mostly meaningless to the - * link-editor. But there are few things to support traditional UNIX - * executables that require the link-editor and assembler to use some names - * agreed upon by convention. - * - * The initial protection of the "__TEXT" segment has write protection turned - * off (not writeable). - * - * The link-editor will allocate common symbols at the end of the "__common" - * section in the "__DATA" segment. It will create the section and segment - * if needed. - */ - -/* The currently known segment names and the section names in those segments */ - -#define SEG_PAGEZERO "__PAGEZERO" /* the pagezero segment which has no */ - /* protections and catches NULL */ - /* references for MH_EXECUTE files */ - - -#define SEG_TEXT "__TEXT" /* the tradition UNIX text segment */ -#define SECT_TEXT "__text" /* the real text part of the text */ - /* section no headers, and no padding */ -#define SECT_FVMLIB_INIT0 "__fvmlib_init0" /* the fvmlib initialization */ - /* section */ -#define SECT_FVMLIB_INIT1 "__fvmlib_init1" /* the section following the */ - /* fvmlib initialization */ - /* section */ - -#define SEG_DATA "__DATA" /* the tradition UNIX data segment */ -#define SECT_DATA "__data" /* the real initialized data section */ - /* no padding, no bss overlap */ -#define SECT_BSS "__bss" /* the real uninitialized data section*/ - /* no padding */ -#define SECT_COMMON "__common" /* the section common symbols are */ - /* allocated in by the link editor */ - -#define SEG_OBJC "__OBJC" /* objective-C runtime segment */ -#define SECT_OBJC_SYMBOLS "__symbol_table" /* symbol table */ -#define SECT_OBJC_MODULES "__module_info" /* module information */ -#define SECT_OBJC_STRINGS "__selector_strs" /* string table */ -#define SECT_OBJC_REFS "__selector_refs" /* string table */ - -#define SEG_ICON "__ICON" /* the NeXT icon segment */ -#define SECT_ICON_HEADER "__header" /* the icon headers */ -#define SECT_ICON_TIFF "__tiff" /* the icons in tiff format */ - -#define SEG_LINKEDIT "__LINKEDIT" /* the segment containing all structs */ - /* created and maintained by the link */ - /* editor. Created with -seglinkedit */ - /* option to ld(1) for MH_EXECUTE and */ - /* FVMLIB file types only */ - -#define SEG_UNIXSTACK "__UNIXSTACK" /* the unix stack segment */ - -/* - * Fixed virtual memory shared libraries are identified by two things. The - * target pathname (the name of the library as found for execution), and the - * minor version number. The address of where the headers are loaded is in - * header_addr. - */ -struct fvmlib { - union lc_str name; /* library's target pathname */ - unsigned long minor_version; /* library's minor version number */ - unsigned long header_addr; /* library's header address */ -}; - -/* - * A fixed virtual shared library (filetype == MH_FVMLIB in the mach header) - * contains a fvmlib_command (cmd == LC_IDFVMLIB) to identify the library. - * An object that uses a fixed virtual shared library also contains a - * fvmlib_command (cmd == LC_LOADFVMLIB) for each library it uses. - */ -struct fvmlib_command { - unsigned long cmd; /* LC_IDFVMLIB or LC_LOADFVMLIB */ - unsigned long cmdsize; /* includes pathname string */ - struct fvmlib fvmlib; /* the library identification */ -}; - -/* - * Dynamicly linked shared libraries are identified by two things. The - * pathname (the name of the library as found for execution), and the - * compatibility version number. The pathname must match and the compatibility - * number in the user of the library must be greater than or equal to the - * library being used. The time stamp is used to record the time a library was - * built and copied into user so it can be use to determined if the library used - * at runtime is exactly the same as used to built the program. - */ -struct dylib { - union lc_str name; /* library's path name */ - unsigned long timestamp; /* library's build time stamp */ - unsigned long current_version; /* library's current version number */ - unsigned long compatibility_version;/* library's compatibility vers number*/ -}; - -/* - * A dynamicly linked shared library (filetype == MH_DYLIB in the mach header) - * contains a dylib_command (cmd == LC_ID_DYLIB) to identify the library. - * An object that uses a dynamicly linked shared library also contains a - * dylib_command (cmd == LC_LOAD_DYLIB) for each library it uses. - */ -struct dylib_command { - unsigned long cmd; /* LC_ID_DYLIB or LC_LOAD_DYLIB */ - unsigned long cmdsize; /* includes pathname string */ - struct dylib dylib; /* the library identification */ -}; - -/* - * A program (filetype == MH_EXECUTE) or bundle (filetype == MH_BUNDLE) that is - * prebound to it's dynamic libraries has one of these for each library that - * the static linker used in prebinding. It contains a bit vector for the - * modules in the library. The bits indicate which modules are bound (1) and - * which are not (0) from the library. The bit for module 0 is the low bit - * of the first byte. So the bit for the Nth module is: - * (linked_modules[N/8] >> N%8) & 1 - */ -struct prebound_dylib_command { - unsigned long cmd; /* LC_PREBOUND_DYLIB */ - unsigned long cmdsize; /* includes strings */ - union lc_str name; /* library's path name */ - unsigned long nmodules; /* number of modules in library */ - union lc_str linked_modules; /* bit vector of linked modules */ -}; - -/* - * A program that uses a dynamic linker contains a dylinker_command to identify - * the name of the dynamic linker (LC_LOAD_DYLINKER). And a dynamic linker - * contains a dylinker_command to identify the dynamic linker (LC_ID_DYLINKER). - * A file can have at most one of these. - */ -struct dylinker_command { - unsigned long cmd; /* LC_ID_DYLINKER or LC_LOAD_DYLINKER */ - unsigned long cmdsize; /* includes pathname string */ - union lc_str name; /* dynamic linker's path name */ -}; - -/* - * Thread commands contain machine-specific data structures suitable for - * use in the thread state primitives. The machine specific data structures - * follow the struct thread_command as follows. - * Each flavor of machine specific data structure is preceded by an unsigned - * long constant for the flavor of that data structure, an unsigned long - * that is the count of longs of the size of the state data structure and then - * the state data structure follows. This triple may be repeated for many - * flavors. The constants for the flavors, counts and state data structure - * definitions are expected to be in the header file . - * These machine specific data structures sizes must be multiples of - * sizeof(long). The cmdsize reflects the total size of the thread_command - * and all of the sizes of the constants for the flavors, counts and state - * data structures. - * - * For executable objects that are unix processes there will be one - * thread_command (cmd == LC_UNIXTHREAD) created for it by the link-editor. - * This is the same as a LC_THREAD, except that a stack is automatically - * created (based on the shell's limit for the stack size). Command arguments - * and environment variables are copied onto that stack. - */ -struct thread_command { - unsigned long cmd; /* LC_THREAD or LC_UNIXTHREAD */ - unsigned long cmdsize; /* total size of this command */ - /* unsigned long flavor flavor of thread state */ - /* unsigned long count count of longs in thread state */ - /* struct XXX_thread_state state thread state for this flavor */ - /* ... */ -}; - -/* - * The symtab_command contains the offsets and sizes of the link-edit 4.3BSD - * "stab" style symbol table information as described in the header files - * and . - */ -struct symtab_command { - unsigned long cmd; /* LC_SYMTAB */ - unsigned long cmdsize; /* sizeof(struct symtab_command) */ - unsigned long symoff; /* symbol table offset */ - unsigned long nsyms; /* number of symbol table entries */ - unsigned long stroff; /* string table offset */ - unsigned long strsize; /* string table size in bytes */ -}; - -/* - * This is the second set of the symbolic information which is used to support - * the data structures for the dynamicly link editor. - * - * The original set of symbolic information in the symtab_command which contains - * the symbol and string tables must also be present when this load command is - * present. When this load command is present the symbol table is organized - * into three groups of symbols: - * local symbols (static and debugging symbols) - grouped by module - * defined external symbols - grouped by module (sorted by name if not lib) - * undefined external symbols (sorted by name) - * In this load command there are offsets and counts to each of the three groups - * of symbols. - * - * This load command contains a the offsets and sizes of the following new - * symbolic information tables: - * table of contents - * module table - * reference symbol table - * indirect symbol table - * The first three tables above (the table of contents, module table and - * reference symbol table) are only present if the file is a dynamicly linked - * shared library. For executable and object modules, which are files - * containing only one module, the information that would be in these three - * tables is determined as follows: - * table of contents - the defined external symbols are sorted by name - * module table - the file contains only one module so everything in the - * file is part of the module. - * reference symbol table - is the defined and undefined external symbols - * - * For dynamicly linked shared library files this load command also contains - * offsets and sizes to the pool of relocation entries for all sections - * separated into two groups: - * external relocation entries - * local relocation entries - * For executable and object modules the relocation entries continue to hang - * off the section structures. - */ -struct dysymtab_command { - unsigned long cmd; /* LC_DYSYMTAB */ - unsigned long cmdsize; /* sizeof(struct dysymtab_command) */ - - /* - * The symbols indicated by symoff and nsyms of the LC_SYMTAB load command - * are grouped into the following three groups: - * local symbols (further grouped by the module they are from) - * defined external symbols (further grouped by the module they are from) - * undefined symbols - * - * The local symbols are used only for debugging. The dynamic binding - * process may have to use them to indicate to the debugger the local - * symbols for a module that is being bound. - * - * The last two groups are used by the dynamic binding process to do the - * binding (indirectly through the module table and the reference symbol - * table when this is a dynamicly linked shared library file). - */ - unsigned long ilocalsym; /* index to local symbols */ - unsigned long nlocalsym; /* number of local symbols */ - - unsigned long iextdefsym; /* index to externally defined symbols */ - unsigned long nextdefsym; /* number of externally defined symbols */ - - unsigned long iundefsym; /* index to undefined symbols */ - unsigned long nundefsym; /* number of undefined symbols */ - - /* - * For the for the dynamic binding process to find which module a symbol - * is defined in the table of contents is used (analogous to the ranlib - * structure in an archive) which maps defined external symbols to modules - * they are defined in. This exists only in a dynamicly linked shared - * library file. For executable and object modules the defined external - * symbols are sorted by name and is use as the table of contents. - */ - unsigned long tocoff; /* file offset to table of contents */ - unsigned long ntoc; /* number of entries in table of contents */ - - /* - * To support dynamic binding of "modules" (whole object files) the symbol - * table must reflect the modules that the file was created from. This is - * done by having a module table that has indexes and counts into the merged - * tables for each module. The module structure that these two entries - * refer to is described below. This exists only in a dynamicly linked - * shared library file. For executable and object modules the file only - * contains one module so everything in the file belongs to the module. - */ - unsigned long modtaboff; /* file offset to module table */ - unsigned long nmodtab; /* number of module table entries */ - - /* - * To support dynamic module binding the module structure for each module - * indicates the external references (defined and undefined) each module - * makes. For each module there is an offset and a count into the - * reference symbol table for the symbols that the module references. - * This exists only in a dynamicly linked shared library file. For - * executable and object modules the defined external symbols and the - * undefined external symbols indicates the external references. - */ - unsigned long extrefsymoff; /* offset to referenced symbol table */ - unsigned long nextrefsyms; /* number of referenced symbol table entries */ - - /* - * The sections that contain "symbol pointers" and "routine stubs" have - * indexes and (implied counts based on the size of the section and fixed - * size of the entry) into the "indirect symbol" table for each pointer - * and stub. For every section of these two types the index into the - * indirect symbol table is stored in the section header in the field - * reserved1. An indirect symbol table entry is simply a 32bit index into - * the symbol table to the symbol that the pointer or stub is referring to. - * The indirect symbol table is ordered to match the entries in the section. - */ - unsigned long indirectsymoff; /* file offset to the indirect symbol table */ - unsigned long nindirectsyms; /* number of indirect symbol table entries */ - - /* - * To support relocating an individual module in a library file quickly the - * external relocation entries for each module in the library need to be - * accessed efficiently. Since the relocation entries can't be accessed - * through the section headers for a library file they are separated into - * groups of local and external entries further grouped by module. In this - * case the presents of this load command who's extreloff, nextrel, - * locreloff and nlocrel fields are non-zero indicates that the relocation - * entries of non-merged sections are not referenced through the section - * structures (and the reloff and nreloc fields in the section headers are - * set to zero). - * - * Since the relocation entries are not accessed through the section headers - * this requires the r_address field to be something other than a section - * offset to identify the item to be relocated. In this case r_address is - * set to the offset from the vmaddr of the first LC_SEGMENT command. - * - * The relocation entries are grouped by module and the module table - * entries have indexes and counts into them for the group of external - * relocation entries for that the module. - * - * For sections that are merged across modules there must not be any - * remaining external relocation entries for them (for merged sections - * remaining relocation entries must be local). - */ - unsigned long extreloff; /* offset to external relocation entries */ - unsigned long nextrel; /* number of external relocation entries */ - - /* - * All the local relocation entries are grouped together (they are not - * grouped by their module since they are only used if the object is moved - * from it staticly link edited address). - */ - unsigned long locreloff; /* offset to local relocation entries */ - unsigned long nlocrel; /* number of local relocation entries */ - -}; - -/* - * An indirect symbol table entry is simply a 32bit index into the symbol table - * to the symbol that the pointer or stub is refering to. Unless it is for a - * non-lazy symbol pointer section for a defined symbol which strip(1) as - * removed. In which case it has the value INDIRECT_SYMBOL_LOCAL. If the - * symbol was also absolute INDIRECT_SYMBOL_ABS is or'ed with that. - */ -#define INDIRECT_SYMBOL_LOCAL 0x80000000 -#define INDIRECT_SYMBOL_ABS 0x40000000 - - -/* a table of contents entry */ -struct dylib_table_of_contents { - unsigned long symbol_index; /* the defined external symbol - (index into the symbol table) */ - unsigned long module_index; /* index into the module table this symbol - is defined in */ -}; - -/* a module table entry */ -struct dylib_module { - unsigned long module_name; /* the module name (index into string table) */ - - unsigned long iextdefsym; /* index into externally defined symbols */ - unsigned long nextdefsym; /* number of externally defined symbols */ - unsigned long irefsym; /* index into reference symbol table */ - unsigned long nrefsym; /* number of reference symbol table entries */ - unsigned long ilocalsym; /* index into symbols for local symbols */ - unsigned long nlocalsym; /* number of local symbols */ - - unsigned long iextrel; /* index into external relocation entries */ - unsigned long nextrel; /* number of external relocation entries */ - - unsigned long iinit; /* index into the init section */ - unsigned long ninit; /* number of init section entries */ - - unsigned long /* for this module address of the start of */ - objc_module_info_addr; /* the (__OBJC,__module_info) section */ - unsigned long /* for this module size of */ - objc_module_info_size; /* the (__OBJC,__module_info) section */ -}; - -/* - * The entries in the reference symbol table are used when loading the module - * (both by the static and dynamic link editors) and if the module is unloaded - * or replaced. Therefore all external symbols (defined and undefined) are - * listed in the module's reference table. The flags describe the type of - * reference that is being made. The constants for the flags are defined in - * as they are also used for symbol table entries. - */ -struct dylib_reference { - unsigned long isym:24, /* index into the symbol table */ - flags:8; /* flags to indicate the type of reference */ -}; - -/* - * The uuid load command contains a single 128-bit unique random number that - * identifies an object produced by the static link editor. - */ -struct uuid_command { - unsigned long cmd; /* LC_UUID */ - unsigned long cmdsize; /* sizeof(struct uuid_command) */ - unsigned char uuid[16]; /* the 128-bit uuid */ -}; - -/* - * The symseg_command contains the offset and size of the GNU style - * symbol table information as described in the header file . - * The symbol roots of the symbol segments must also be aligned properly - * in the file. So the requirement of keeping the offsets aligned to a - * multiple of a sizeof(long) translates to the length field of the symbol - * roots also being a multiple of a long. Also the padding must again be - * zeroed. (THIS IS OBSOLETE and no longer supported). - */ -struct symseg_command { - unsigned long cmd; /* LC_SYMSEG */ - unsigned long cmdsize; /* sizeof(struct symseg_command) */ - unsigned long offset; /* symbol segment offset */ - unsigned long size; /* symbol segment size in bytes */ -}; - -/* - * The ident_command contains a free format string table following the - * ident_command structure. The strings are null terminated and the size of - * the command is padded out with zero bytes to a multiple of sizeof(long). - * (THIS IS OBSOLETE and no longer supported). - */ -struct ident_command { - unsigned long cmd; /* LC_IDENT */ - unsigned long cmdsize; /* strings that follow this command */ -}; - -/* - * The fvmfile_command contains a reference to a file to be loaded at the - * specified virtual address. (Presently, this command is reserved for NeXT - * internal use. The kernel ignores this command when loading a program into - * memory). - */ -struct fvmfile_command { - unsigned long cmd; /* LC_FVMFILE */ - unsigned long cmdsize; /* includes pathname string */ - union lc_str name; /* files pathname */ - unsigned long header_addr; /* files virtual address */ -}; - -#endif /*_MACHO_LOADER_H_*/ diff --git a/osfmk/mach-o/mach_header.c b/osfmk/mach-o/mach_header.c deleted file mode 100644 index fdf75d1aa..000000000 --- a/osfmk/mach-o/mach_header.c +++ /dev/null @@ -1,612 +0,0 @@ -/* - * Copyright (c) 2000-2006 Apple Computer, Inc. All rights reserved. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ - * - * This file contains Original Code and/or Modifications of Original Code - * as defined in and that are subject to the Apple Public Source License - * Version 2.0 (the 'License'). You may not use this file except in - * compliance with the License. The rights granted to you under the License - * may not be used to create, or enable the creation or redistribution of, - * unlawful or unlicensed copies of an Apple operating system, or to - * circumvent, violate, or enable the circumvention or violation of, any - * terms of an Apple operating system software license agreement. - * - * Please obtain a copy of the License at - * http://www.opensource.apple.com/apsl/ and read it before using this file. - * - * The Original Code and all software distributed under the License are - * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER - * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, - * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. - * Please see the License for the specific language governing rights and - * limitations under the License. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ - */ -/* - * File: kern/mach_header.c - * - * Functions for accessing mach-o headers. - * - * NOTE: This file supports only 32 bit mach headers at the present - * time; it's primary use is by kld, and all externally - * referenced routines at the present time operate against - * the 32 bit mach header _mh_execute_header, which is the - * header for the currently executing kernel. Adding support - * for 64 bit kernels is possible, but is not necessary at the - * present time. - * - * HISTORY - * 27-MAR-97 Umesh Vaishampayan (umeshv@NeXT.com) - * Added getsegdatafromheader(); - * - * 29-Jan-92 Mike DeMoney (mike@next.com) - * Made into machine independent form from machdep/m68k/mach_header.c. - * Ifdef'ed out most of this since I couldn't find any references. - */ - -#include -#include -#include -#include // from libsa - -#ifdef __MACHO__ - -extern struct mach_header _mh_execute_header; - -/* - * return the last address (first avail) - * - * This routine operates against the currently executing kernel only - */ -#ifdef MACH_BSD -__private_extern__ -#endif -vm_offset_t -getlastaddr(void) -{ - struct segment_command *sgp; - vm_offset_t last_addr = 0; - struct mach_header *header = &_mh_execute_header; - unsigned long i; - - sgp = (struct segment_command *) - ((char *)header + sizeof(struct mach_header)); - for (i = 0; i < header->ncmds; i++){ - if ( sgp->cmd == LC_SEGMENT) { - if (sgp->vmaddr + sgp->vmsize > last_addr) - last_addr = sgp->vmaddr + sgp->vmsize; - } - sgp = (struct segment_command *)((char *)sgp + sgp->cmdsize); - } - return last_addr; -} - -#ifdef XXX_MACH_BSD -__private_extern__ -#endif -/* - * This routine operates against the currently executing kernel only - */ -struct mach_header ** -getmachheaders(void) -{ - struct mach_header **tl; - - if (kmem_alloc(kernel_map, (vm_offset_t *) &tl, 2*sizeof(struct mach_header *)) != KERN_SUCCESS) - return NULL; - - tl[0] = &_mh_execute_header; - tl[1] = (struct mach_header *)0; - return tl; -} - -/* - * This routine returns the a pointer to the data for the named section in the - * named segment if it exist in the mach header passed to it. Also it returns - * the size of the section data indirectly through the pointer size. Otherwise - * it returns zero for the pointer and the size. - * - * This routine can operate against any 32 bit mach header. - */ -#ifdef MACH_BSD -__private_extern__ -#endif -void * -getsectdatafromheader( - struct mach_header *mhp, - const char *segname, - const char *sectname, - int *size) -{ - const struct section *sp; - void *result; - - sp = getsectbynamefromheader(mhp, segname, sectname); - if(sp == (struct section *)0){ - *size = 0; - return((char *)0); - } - *size = sp->size; - result = (void *)sp->addr; - return result; -} - -/* - * This routine returns the a pointer to the data for the named segment - * if it exist in the mach header passed to it. Also it returns - * the size of the segment data indirectly through the pointer size. - * Otherwise it returns zero for the pointer and the size. - */ -#ifdef MACH_BSD -__private_extern__ -#endif -void * -getsegdatafromheader( - struct mach_header *mhp, - const char *segname, - int *size) -{ - const struct segment_command *sc; - void *result; - - sc = getsegbynamefromheader(mhp, segname); - if(sc == (struct segment_command *)0){ - *size = 0; - return((char *)0); - } - *size = sc->vmsize; - result = (void *)sc->vmaddr; - return result; -} - -/* - * This routine returns the section structure for the named section in the - * named segment for the mach_header pointer passed to it if it exist. - * Otherwise it returns zero. - * - * This routine can operate against any 32 bit mach header. - */ -#ifdef MACH_BSD -__private_extern__ -#endif -struct section * -getsectbynamefromheader( - struct mach_header *mhp, - const char *segname, - const char *sectname) -{ - struct segment_command *sgp; - struct section *sp; - unsigned long i, j; - - sgp = (struct segment_command *) - ((char *)mhp + sizeof(struct mach_header)); - for(i = 0; i < mhp->ncmds; i++){ - if(sgp->cmd == LC_SEGMENT) - if(strncmp(sgp->segname, segname, sizeof(sgp->segname)) == 0 || - mhp->filetype == MH_OBJECT){ - sp = (struct section *)((char *)sgp + - sizeof(struct segment_command)); - for(j = 0; j < sgp->nsects; j++){ - if(strncmp(sp->sectname, sectname, - sizeof(sp->sectname)) == 0 && - strncmp(sp->segname, segname, - sizeof(sp->segname)) == 0) - return(sp); - sp = (struct section *)((char *)sp + - sizeof(struct section)); - } - } - sgp = (struct segment_command *)((char *)sgp + sgp->cmdsize); - } - return((struct section *)0); -} - -#ifdef MACH_BSD -__private_extern__ -#endif -/* - * This routine can operate against any 32 bit mach header. - */ -struct segment_command * -getsegbynamefromheader( - struct mach_header *header, - const char *seg_name) -{ - struct segment_command *sgp; - unsigned long i; - - sgp = (struct segment_command *) - ((char *)header + sizeof(struct mach_header)); - for (i = 0; i < header->ncmds; i++){ - if ( sgp->cmd == LC_SEGMENT - && !strncmp(sgp->segname, seg_name, sizeof(sgp->segname))) - return sgp; - sgp = (struct segment_command *)((char *)sgp + sgp->cmdsize); - } - return (struct segment_command *)0; -} - - -/* - * For now at least, all the rest of this seems unused. - * NOTE: The constant in here for segment alignment is machine-dependent, - * so if you include this, define a machine dependent constant for it's - * value. - */ -static struct { - struct segment_command seg; - struct section sect; -} fvm_data = { - { - LC_SEGMENT, // cmd - sizeof(fvm_data), // cmdsize - "__USER", // segname - 0, // vmaddr - 0, // vmsize - 0, // fileoff - 0, // filesize - VM_PROT_READ, // maxprot - VM_PROT_READ, // initprot, - 1, // nsects - 0 // flags - }, - { - "", // sectname - "__USER", // segname - 0, // addr - 0, // size - 0, // offset - 4, // align - 0, // reloff - 0, // nreloc - 0, // flags - 0, // reserved1 - 0 // reserved2 - } -}; - -#ifdef MACH_BSD -static -#endif -struct segment_command *fvm_seg; - -static struct fvmfile_command *fvmfilefromheader(struct mach_header *header); -static vm_offset_t getsizeofmacho(struct mach_header *header); - -/* - * Return the first segment_command in the header. - */ -#ifdef MACH_BSD -__private_extern__ -#endif -struct segment_command * -firstseg(void) -{ - return firstsegfromheader(&_mh_execute_header); -} - -#ifdef MACH_BSD -__private_extern__ -#endif -struct segment_command * -firstsegfromheader(struct mach_header *header) -{ - struct segment_command *sgp; - unsigned long i; - - sgp = (struct segment_command *) - ((char *)header + sizeof(struct mach_header)); - for (i = 0; i < header->ncmds; i++){ - if (sgp->cmd == LC_SEGMENT) - return sgp; - sgp = (struct segment_command *)((char *)sgp + sgp->cmdsize); - } - return (struct segment_command *)0; -} - -#ifdef MACH_BSD -__private_extern__ -#endif -/* - * This routine operates against a 32 bit mach segment_command structure - * pointer from the currently executing kernel only, to obtain the - * sequentially next segment_command structure in the currently executing - * kernel - */ -struct segment_command * -nextseg(struct segment_command *sgp) -{ - struct segment_command *this; - - this = nextsegfromheader(&_mh_execute_header, sgp); - - /* - * For the kernel's header add on the faked segment for the - * USER boot code identified by a FVMFILE_COMMAND in the mach header. - */ - if (!this && sgp != fvm_seg) - this = fvm_seg; - - return this; -} - -#ifdef MACH_BSD -__private_extern__ -#endif -/* - * This routine operates against any 32 bit mach segment_command structure - * pointer and the provided 32 bit header, to obtain the sequentially next - * segment_command structure in that header. - */ -struct segment_command * -nextsegfromheader( - struct mach_header *header, - struct segment_command *seg) -{ - struct segment_command *sgp; - unsigned long i; - - sgp = (struct segment_command *) - ((char *)header + sizeof(struct mach_header)); - for (i = 0; i < header->ncmds; i++) { - if (sgp == seg) - break; - sgp = (struct segment_command *)((char *)sgp + sgp->cmdsize); - } - - if (i == header->ncmds) - return (struct segment_command *)0; - - sgp = (struct segment_command *)((char *)sgp + sgp->cmdsize); - for (; i < header->ncmds; i++) { - if (sgp->cmd == LC_SEGMENT) - return sgp; - sgp = (struct segment_command *)((char *)sgp + sgp->cmdsize); - } - - return (struct segment_command *)0; -} - - -/* - * Return the address of the named Mach-O segment from the currently - * executing 32 bit kernel, or NULL. - */ -#ifdef MACH_BSD -__private_extern__ -#endif -struct segment_command * -getsegbyname(const char *seg_name) -{ - struct segment_command *this; - - this = getsegbynamefromheader(&_mh_execute_header, seg_name); - - /* - * For the kernel's header add on the faked segment for the - * USER boot code identified by a FVMFILE_COMMAND in the mach header. - */ - if (!this && strncmp(seg_name, fvm_seg->segname, - sizeof(fvm_seg->segname)) == 0) - this = fvm_seg; - - return this; -} - -/* - * This routine returns the a pointer the section structure of the named - * section in the named segment if it exists in the currently executing - * kernel, which it is presumed to be linked into. Otherwise it returns NULL. - */ -#ifdef MACH_BSD -__private_extern__ -#endif -struct section * -getsectbyname( - const char *segname, - const char *sectname) -{ - return(getsectbynamefromheader( - (struct mach_header *)&_mh_execute_header, segname, sectname)); -} - -#ifdef MACH_BSD -__private_extern__ -#endif -/* - * This routine can operate against any 32 bit segment_command structure to - * return the first 32 bit section immediately following that structure. If - * there are no sections associated with the segment_command structure, it - * returns NULL. - */ -struct section * -firstsect(struct segment_command *sgp) -{ - if (!sgp || sgp->nsects == 0) - return (struct section *)0; - - return (struct section *)(sgp+1); -} - -#ifdef MACH_BSD -__private_extern__ -#endif -/* - * This routine can operate against any 32 bit segment_command structure and - * 32 bit section to return the next consecutive 32 bit section immediately - * following the 32 bit section provided. If there are no sections following - * the provided section, it returns NULL. - */ -struct section * -nextsect(struct segment_command *sgp, struct section *sp) -{ - struct section *fsp = firstsect(sgp); - - if (((unsigned long)(sp - fsp) + 1) >= sgp->nsects) - return (struct section *)0; - - return sp+1; -} - -/* - * This routine can operate against any 32 bit mach header to return the - * first occurring 32 bit fvmfile_command section. If one is not present, - * it returns NULL. - */ -static struct fvmfile_command * -fvmfilefromheader(struct mach_header *header) -{ - struct fvmfile_command *fvp; - unsigned long i; - - fvp = (struct fvmfile_command *) - ((char *)header + sizeof(struct mach_header)); - for (i = 0; i < header->ncmds; i++){ - if (fvp->cmd == LC_FVMFILE) - return fvp; - fvp = (struct fvmfile_command *)((char *)fvp + fvp->cmdsize); - } - return (struct fvmfile_command *)0; -} - -/* - * Create a fake USER seg if a fvmfile_command is present. - * - * This routine operates against the currently executing kernel only - */ -#ifdef MACH_BSD -__private_extern__ -#endif -struct segment_command * -getfakefvmseg(void) -{ - struct segment_command *sgp = getsegbyname("__USER"); - struct fvmfile_command *fvp = fvmfilefromheader(&_mh_execute_header); - struct section *sp; - - if (sgp) - return sgp; - - if (!fvp) - return (struct segment_command *)0; - - fvm_seg = &fvm_data.seg; - sgp = fvm_seg; - sp = &fvm_data.sect; - - sgp->vmaddr = fvp->header_addr; - sgp->vmsize = getsizeofmacho((struct mach_header *)(sgp->vmaddr)); - - strlcpy(sp->sectname, fvp->name.ptr, sizeof(sp->sectname) /* 16 */); - sp->addr = sgp->vmaddr; - sp->size = sgp->vmsize; - -#if DEBUG - printf("fake fvm seg __USER/\"%s\" at 0x%lx, size 0x%lx\n", - sp->sectname, sp->addr, sp->size); -#endif /*DEBUG*/ - return sgp; -} - -/* - * Figure out the size the size of the data associated with a - * loaded mach_header. - * - * This routine operates against the currently executing kernel only - */ -static vm_offset_t -getsizeofmacho(struct mach_header *header) -{ - struct segment_command *sgp; - vm_offset_t last_addr; - - last_addr = 0; - for ( sgp = firstsegfromheader(header) - ; sgp - ; sgp = nextsegfromheader(header, sgp)) - { - if (sgp->fileoff + sgp->filesize > last_addr) - last_addr = sgp->fileoff + sgp->filesize; - } - - return last_addr; -} - -#ifdef MACH_KDB -/* - * This routine returns the section command for the symbol table in the - * named segment for the mach_header pointer passed to it if it exist. - * Otherwise it returns zero. - */ -struct symtab_command * -getsectcmdsymtabfromheader( - struct mach_header *mhp) -{ - struct segment_command *sgp; - unsigned long i; - - sgp = (struct segment_command *) - ((char *)mhp + sizeof(struct mach_header)); - for(i = 0; i < mhp->ncmds; i++){ - if(sgp->cmd == LC_SYMTAB) - return((struct symtab_command *)sgp); - sgp = (struct segment_command *)((char *)sgp + sgp->cmdsize); - } - return(NULL); -} - -boolean_t getsymtab(struct mach_header *header, - vm_offset_t *symtab, - int *nsyms, - vm_offset_t *strtab, - vm_size_t *strtabsize) -{ - struct segment_command *seglink_cmd; - struct symtab_command *symtab_cmd; - - seglink_cmd = NULL; - - if(header->magic != MH_MAGIC) { /* Check if this is a valid header format */ - printf("Attempt to use invalid header (magic = %08lX) to find symbol table\n", - header->magic); /* Tell them what's wrong */ - return (FALSE); /* Bye y'all... */ - } - - seglink_cmd = getsegbynamefromheader(header,"__LINKEDIT"); - if (seglink_cmd == NULL) { - return(FALSE); - } - - symtab_cmd = NULL; - symtab_cmd = getsectcmdsymtabfromheader(header); - if (symtab_cmd == NULL) - return(FALSE); - - *nsyms = symtab_cmd->nsyms; - if(symtab_cmd->nsyms == 0) return (FALSE); /* No symbols */ - - *strtabsize = symtab_cmd->strsize; - if(symtab_cmd->strsize == 0) return (FALSE); /* Symbol length is 0 */ - - *symtab = seglink_cmd->vmaddr + symtab_cmd->symoff - - seglink_cmd->fileoff; - - *strtab = seglink_cmd->vmaddr + symtab_cmd->stroff - - seglink_cmd->fileoff; - - return(TRUE); -} -#endif - -#else - -void * getsegdatafromheader( struct mach_header *mhp, char *segname, int *size) -{ - return 0; -} - -#endif diff --git a/osfmk/mach-o/mach_header.h b/osfmk/mach-o/mach_header.h deleted file mode 100644 index eda433213..000000000 --- a/osfmk/mach-o/mach_header.h +++ /dev/null @@ -1,86 +0,0 @@ -/* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ - * - * This file contains Original Code and/or Modifications of Original Code - * as defined in and that are subject to the Apple Public Source License - * Version 2.0 (the 'License'). You may not use this file except in - * compliance with the License. The rights granted to you under the License - * may not be used to create, or enable the creation or redistribution of, - * unlawful or unlicensed copies of an Apple operating system, or to - * circumvent, violate, or enable the circumvention or violation of, any - * terms of an Apple operating system software license agreement. - * - * Please obtain a copy of the License at - * http://www.opensource.apple.com/apsl/ and read it before using this file. - * - * The Original Code and all software distributed under the License are - * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER - * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, - * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. - * Please see the License for the specific language governing rights and - * limitations under the License. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ - */ -/* - * File: kern/mach_header.h - * - * Definitions for accessing mach-o headers. This header wraps the - * routines defined in osfmk/mach-o/mach_header.c; this is made clear - * by the existance of the getsectcmdsymtabfromheader() prototype. - * - * NOTE: The functions prototyped by this header only operate againt - * 32 bit mach headers. Many of these functions imply the - * currently running kernel, and cannot be used against mach - * headers other than that of the currently running kernel. - * - * HISTORY - * 29-Jan-92 Mike DeMoney (mike@next.com) - * Made into machine independent form from machdep/m68k/mach_header.h. - * Ifdef'ed out most of this since I couldn't find any references. - */ - -#ifndef _KERN_MACH_HEADER_ -#define _KERN_MACH_HEADER_ - -#include -#include - -#if MACH_KERNEL -struct mach_header **getmachheaders(void); -vm_offset_t getlastaddr(void); - -struct segment_command *firstseg(void); -struct segment_command *firstsegfromheader(struct mach_header *header); -struct segment_command *nextseg(struct segment_command *sgp); -struct segment_command *nextsegfromheader( - struct mach_header *header, - struct segment_command *seg); -struct segment_command *getsegbyname(const char *seg_name); -struct segment_command *getsegbynamefromheader( - struct mach_header *header, - const char *seg_name); -void *getsegdatafromheader(struct mach_header *, const char *, int *); -struct section *getsectbyname(const char *seg_name, const char *sect_name); -struct section *getsectbynamefromheader( - struct mach_header *header, - const char *seg_name, - const char *sect_name); -void *getsectdatafromheader(struct mach_header *, const char *, const char *, int *); -struct section *firstsect(struct segment_command *sgp); -struct section *nextsect(struct segment_command *sgp, struct section *sp); -struct fvmlib_command *fvmlib(void); -struct fvmlib_command *fvmlibfromheader(struct mach_header *header); -struct segment_command *getfakefvmseg(void); -#ifdef MACH_KDB -struct symtab_command *getsectcmdsymtabfromheader(struct mach_header *); -boolean_t getsymtab(struct mach_header *, vm_offset_t *, int *, - vm_offset_t *, vm_size_t *); -#endif - -#endif /* KERNEL */ - -#endif /* _KERN_MACH_HEADER_ */ diff --git a/osfmk/mach/Makefile b/osfmk/mach/Makefile index 946bc45e6..eaadb00bd 100644 --- a/osfmk/mach/Makefile +++ b/osfmk/mach/Makefile @@ -19,6 +19,9 @@ INSTINC_SUBDIRS_ARM = \ INSTINC_SUBDIRS_I386 = \ i386 +INSTINC_SUBDIRS_X86_64 = \ + i386 + EXPINC_SUBDIRS = \ machine @@ -28,6 +31,9 @@ EXPINC_SUBDIRS_PPC = \ EXPINC_SUBDIRS_I386 = \ i386 +EXPINC_SUBDIRS_X86_64 = \ + i386 + EXPINC_SUBDIRS_ARM = \ arm @@ -67,7 +73,8 @@ MACH_PRIVATE_DEFS = \ memory_object_control.defs \ memory_object_default.defs \ memory_object_name.defs \ - upl.defs + upl.defs \ + vm32_map.defs # # MIG-generated headers that are traditionally used by user @@ -176,10 +183,6 @@ INSTALL_KF_MI_LCL_LIST = \ mach_interface.h \ $(filter-out mach_traps.h mach_syscalls.h thread_switch.h, ${DATAFILES}) -INSTALL_MI_LCL_LIST = kext_panic_report.h \ - bootstrap.h \ - ${DATAFILES} - INSTALL_MI_GEN_LIST = INSTALL_MI_DIR = mach @@ -282,11 +285,11 @@ MIG_KSHDRS = \ processor_server.h \ processor_set_server.h \ security_server.h \ - semaphore_server.h \ task_server.h \ thread_act_server.h \ upl_server.h \ - vm_map_server.h + vm_map_server.h \ + vm32_map_server.h MIG_KSSRC = \ clock_server.c \ @@ -308,11 +311,11 @@ MIG_KSSRC = \ processor_server.c \ processor_set_server.c \ security_server.c \ - semaphore_server.c \ task_server.c \ thread_act_server.c \ upl_server.c \ - vm_map_server.c + vm_map_server.c \ + vm32_map_server.c # # JMM - diff --git a/osfmk/mach/audit_triggers.defs b/osfmk/mach/audit_triggers.defs index c7e0fcc64..1d6e279b6 100644 --- a/osfmk/mach/audit_triggers.defs +++ b/osfmk/mach/audit_triggers.defs @@ -1,28 +1,34 @@ /* - * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ + * Copyright (c) 2004-2008, Apple Inc. All rights reserved. * - * This file contains Original Code and/or Modifications of Original Code - * as defined in and that are subject to the Apple Public Source License - * Version 2.0 (the 'License'). You may not use this file except in - * compliance with the License. The rights granted to you under the License - * may not be used to create, or enable the creation or redistribution of, - * unlawful or unlicensed copies of an Apple operating system, or to - * circumvent, violate, or enable the circumvention or violation of, any - * terms of an Apple operating system software license agreement. + * @APPLE_BSD_LICENSE_HEADER_START@ * - * Please obtain a copy of the License at - * http://www.opensource.apple.com/apsl/ and read it before using this file. + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: * - * The Original Code and all software distributed under the License are - * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER - * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, - * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. - * Please see the License for the specific language governing rights and - * limitations under the License. + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of Apple Inc. ("Apple") nor the names of + * its contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. * - * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ - */ + * THIS SOFTWARE IS PROVIDED BY APPLE AND ITS CONTRIBUTORS "AS IS" AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL APPLE OR ITS CONTRIBUTORS BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * @APPLE_BSD_LICENSE_HEADER_END@ +*/ /* * Interface definition for the audit logging facility. @@ -41,4 +47,3 @@ simpleroutine audit_triggers( audit_port : mach_port_t; in flags : int); -/* vim: set ft=c : */ diff --git a/osfmk/mach/exception_types.h b/osfmk/mach/exception_types.h index 3f3b487d7..b0631f16b 100644 --- a/osfmk/mach/exception_types.h +++ b/osfmk/mach/exception_types.h @@ -141,9 +141,11 @@ EXC_MASK_SYSCALL | \ EXC_MASK_MACH_SYSCALL | \ EXC_MASK_RPC_ALERT | \ - EXC_MASK_CRASH | \ EXC_MASK_MACHINE) +#ifdef KERNEL_PRIVATE +#define EXC_MASK_VALID (EXC_MASK_ALL | EXC_MASK_CRASH) +#endif /* KERNEL_PRIVATE */ #define FIRST_EXCEPTION 1 /* ZERO is illegal */ diff --git a/osfmk/mach/host_info.h b/osfmk/mach/host_info.h index 003f75101..3bd96bf53 100644 --- a/osfmk/mach/host_info.h +++ b/osfmk/mach/host_info.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2006 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2009 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -77,6 +77,7 @@ * Generic information structure to allow for expansion. */ typedef integer_t *host_info_t; /* varying array of int. */ +typedef integer_t *host_info64_t; /* varying array of int. */ #define HOST_INFO_MAX (1024) /* max array size */ typedef integer_t host_info_data_t[HOST_INFO_MAX]; @@ -181,6 +182,10 @@ typedef struct host_priority_info *host_priority_info_t; #define HOST_VM_INFO 2 /* Virtual memory stats */ #define HOST_CPU_LOAD_INFO 3 /* CPU load stats */ +/* host_statistics64() */ +#define HOST_VM_INFO64 4 /* 64-bit virtual memory stats */ + + struct host_load_info { integer_t avenrun[3]; /* scaled by LOAD_SCALE */ integer_t mach_factor[3]; /* scaled by LOAD_SCALE */ @@ -192,12 +197,21 @@ typedef struct host_load_info *host_load_info_t; (sizeof(host_load_info_data_t)/sizeof(integer_t))) /* in */ +/* vm_statistics64 */ +#define HOST_VM_INFO64_COUNT ((mach_msg_type_number_t) \ + (sizeof(vm_statistics64_data_t)/sizeof(integer_t))) + +/* size of the latest version of the structure */ +#define HOST_VM_INFO64_LATEST_COUNT HOST_VM_INFO64_COUNT + + +/* vm_statistics */ #define HOST_VM_INFO_COUNT ((mach_msg_type_number_t) \ (sizeof(vm_statistics_data_t)/sizeof(integer_t))) /* size of the latest version of the structure */ #define HOST_VM_INFO_LATEST_COUNT HOST_VM_INFO_COUNT -#define HOST_VM_INFO_REV2_COUNT HOST_VM_INFO_LATEST_COUNT +#define HOST_VM_INFO_REV2_COUNT HOST_VM_INFO_LATEST_COUNT /* previous versions: adjust the size according to what was added each time */ #define HOST_VM_INFO_REV1_COUNT /* added "speculative_count" (1 int) */ \ ((mach_msg_type_number_t) \ diff --git a/osfmk/mach/host_priv.defs b/osfmk/mach/host_priv.defs index 6250870a8..3be39868b 100644 --- a/osfmk/mach/host_priv.defs +++ b/osfmk/mach/host_priv.defs @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2004 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -113,7 +113,7 @@ routine host_default_memory_manager( host_priv : host_priv_t; inout default_manager : memory_object_default_t = MACH_MSG_TYPE_MAKE_SEND; - cluster_size : vm_size_t); + cluster_size : memory_object_cluster_size_t); /* @@ -162,7 +162,7 @@ routine vm_allocate_cpm( task : vm_map_t; inout address : vm_address_t; size : vm_size_t; - anywhere : boolean_t); + flags : int); /* * Get list of processors on this host. @@ -183,10 +183,10 @@ routine host_get_clock_control( /* - * kernel module interface - * - * see mach/kmod.h: + * kernel module interface (obsolete as of SnowLeopard) + * see mach/kmod.h */ +/* kmod_ MIG calls now return KERN_NOT_SUPPORTED on PPC/i386/x86_64. */ routine kmod_create( host_priv : host_priv_t; info : vm_address_t; @@ -359,4 +359,22 @@ routine host_get_UNDServer( host : host_priv_t; out server : UNDServerRef); +/* + * Perform an operation with a kernel extension, on the kext loading system, + * or request information about loaded kexts or the state of the kext loading + * system. + * Active operations (load, unload, disable/enable) require host_priv/root access. + * Info retrieval does not. + * + * WARNING: THIS ROUTINE IS PRIVATE TO THE KEXT-MANAGEMENT STACK AND IS + * SUBJECT TO CHANGE AT ANY TIME. + */ +routine kext_request( + host_priv : host_priv_t; + in user_log_flags : uint32_t; + in request_data : pointer_t; + out response_data : pointer_t; + out log_data : pointer_t; + out op_result : kern_return_t); + /* vim: set ft=c : */ diff --git a/osfmk/mach/host_special_ports.h b/osfmk/mach/host_special_ports.h index 9aa0a75c7..755327c5c 100644 --- a/osfmk/mach/host_special_ports.h +++ b/osfmk/mach/host_special_ports.h @@ -83,9 +83,11 @@ #define HOST_DYNAMIC_PAGER_PORT (1 + HOST_MAX_SPECIAL_KERNEL_PORT) #define HOST_AUDIT_CONTROL_PORT (2 + HOST_MAX_SPECIAL_KERNEL_PORT) #define HOST_USER_NOTIFICATION_PORT (3 + HOST_MAX_SPECIAL_KERNEL_PORT) +#define HOST_AUTOMOUNTD_PORT (4 + HOST_MAX_SPECIAL_KERNEL_PORT) #define HOST_LOCKD_PORT (5 + HOST_MAX_SPECIAL_KERNEL_PORT) #define HOST_SEATBELT_PORT (7 + HOST_MAX_SPECIAL_KERNEL_PORT) - +#define HOST_KEXTD_PORT (8 + HOST_MAX_SPECIAL_KERNEL_PORT) +#define HOST_CHUD_PORT (9 + HOST_MAX_SPECIAL_KERNEL_PORT) #define HOST_UNFREED_PORT (10 + HOST_MAX_SPECIAL_KERNEL_PORT) #define HOST_AMFID_PORT (11 + HOST_MAX_SPECIAL_KERNEL_PORT) #define HOST_MAX_SPECIAL_PORT (12 + HOST_MAX_SPECIAL_KERNEL_PORT) @@ -139,12 +141,30 @@ #define host_set_user_notification_port(host, port) \ (host_set_special_port((host), HOST_USER_NOTIFICATION_PORT, (port))) +#define host_get_automountd_port(host, port) \ + (host_get_special_port((host), \ + HOST_LOCAL_NODE, HOST_AUTOMOUNTD_PORT, (port))) +#define host_set_automountd_port(host, port) \ + (host_set_special_port((host), HOST_AUTOMOUNTD_PORT, (port))) + #define host_get_lockd_port(host, port) \ (host_get_special_port((host), \ HOST_LOCAL_NODE, HOST_LOCKD_PORT, (port))) #define host_set_lockd_port(host, port) \ (host_set_special_port((host), HOST_LOCKD_PORT, (port))) +#define host_get_kextd_port(host, port) \ + (host_get_special_port((host), \ + HOST_LOCAL_NODE, HOST_KEXTD_PORT, (port))) +#define host_set_kextd_port(host, port) \ + (host_set_special_port((host), HOST_KEXTD_PORT, (port))) + +#define host_get_chud_port(host, port) \ + (host_get_special_port((host), \ + HOST_LOCAL_NODE, HOST_CHUD_PORT, (port))) +#define host_set_chud_port(host, port) \ + (host_set_special_port((host), HOST_CHUD_PORT, (port))) + #define host_get_unfreed_port(host, port) \ (host_get_special_port((host), \ HOST_LOCAL_NODE, HOST_UNFREED_PORT, (port))) diff --git a/osfmk/mach/i386/boolean.h b/osfmk/mach/i386/boolean.h index cfb425c54..100f7e7b5 100644 --- a/osfmk/mach/i386/boolean.h +++ b/osfmk/mach/i386/boolean.h @@ -65,7 +65,7 @@ #ifndef _MACH_I386_BOOLEAN_H_ #define _MACH_I386_BOOLEAN_H_ -#if defined(__x86_64__) +#if defined(__x86_64__) && !defined(KERNEL) typedef unsigned int boolean_t; #else typedef int boolean_t; diff --git a/osfmk/mach/i386/machine_types.defs b/osfmk/mach/i386/machine_types.defs index f490f14c2..6a356154e 100644 --- a/osfmk/mach/i386/machine_types.defs +++ b/osfmk/mach/i386/machine_types.defs @@ -112,6 +112,16 @@ type mach_vm_size_t = uint64_t; #endif /* MACH_IPC_COMPAT */ +/* + * These are types used internal to Mach to implement the + * legacy 32-bit VM APIs published by the kernel. + */ +#define VM32_SUPPORT 1 + +type vm32_address_t = uint32_t; +type vm32_offset_t = uint32_t; +type vm32_size_t = uint32_t; + #endif /* _MACHINE_VM_TYPES_DEFS_ */ /* vim: set ft=c : */ diff --git a/osfmk/mach/i386/sdt_isa.h b/osfmk/mach/i386/sdt_isa.h index 03947431e..c32239162 100644 --- a/osfmk/mach/i386/sdt_isa.h +++ b/osfmk/mach/i386/sdt_isa.h @@ -37,8 +37,19 @@ #define DTRACE_TOSTRING(s) DTRACE_STRINGIFY(s) #if defined(KERNEL) /* - * For the kernel, set an explicit global label do the symbol can be located + * For the kernel, set an explicit global label so the symbol can be located */ +#ifdef __x86_64__ +#define DTRACE_LAB(p, n) \ + "__dtrace_probeDOLLAR" DTRACE_TOSTRING(__LINE__) DTRACE_STRINGIFY(_##p##___##n) + +#define DTRACE_LABEL(p, n) \ + ".section __DATA, __data\n\t" \ + ".globl " DTRACE_LAB(p, n) "\n\t" \ + DTRACE_LAB(p, n) ":" ".quad 1f""\n\t" \ + ".text" "\n\t" \ + "1:" +#else #define DTRACE_LAB(p, n) \ "__dtrace_probe$" DTRACE_TOSTRING(__LINE__) DTRACE_STRINGIFY(_##p##___##n) @@ -48,6 +59,7 @@ DTRACE_LAB(p, n) ":" ".long 1f""\n\t" \ ".text" "\n\t" \ "1:" +#endif #else /* !KERNEL */ #define DTRACE_LABEL(p, n) \ "__dtrace_probe$" DTRACE_TOSTRING(__LINE__) DTRACE_STRINGIFY(_##p##___##n) ":" "\n\t" @@ -71,7 +83,8 @@ #define DTRACE_NOPS \ "nop" "\n\t" \ - "leal 0(%%esi), %%esi" "\n\t" + "nop" "\n\t" \ + "nop" "\n\t" #define DTRACE_CALL_INSN(p,n) \ "call _dtracetest" DTRACE_STRINGIFY(_##p##_##n) "\n\t" @@ -97,7 +110,7 @@ DTRACE_CALL(provider, name) \ : \ : "r" (__dtrace_args) \ - : "memory", "rdi" \ + : "memory", "rdi" \ ); #define DTRACE_CALL2ARGS(provider, name) \ @@ -106,7 +119,7 @@ DTRACE_CALL(provider, name) \ : \ : "r" (__dtrace_args) \ - : "memory", "rdi", "rsi" \ + : "memory", "rdi", "rsi" \ ); #define DTRACE_CALL3ARGS(provider, name) \ @@ -116,7 +129,7 @@ DTRACE_CALL(provider, name) \ : \ : "r" (__dtrace_args) \ - : "memory", "rdi", "rsi", "rdx" \ + : "memory", "rdi", "rsi", "rdx" \ ); #define DTRACE_CALL4ARGS(provider, name) \ @@ -127,7 +140,7 @@ DTRACE_CALL(provider, name) \ : \ : "r" (__dtrace_args) \ - : "memory", "rdi", "rsi", "rdx", "rcx" \ + : "memory", "rdi", "rsi", "rdx", "rcx" \ ); #define DTRACE_CALL5ARGS(provider, name) \ @@ -139,7 +152,7 @@ DTRACE_CALL(provider, name) \ : \ : "r" (__dtrace_args) \ - : "memory", "rdi", "rsi", "rdx", "rcx", "r8" \ + : "memory", "rdi", "rsi", "rdx", "rcx", "r8" \ ); #define DTRACE_CALL6ARGS(provider, name) \ @@ -152,7 +165,7 @@ DTRACE_CALL(provider, name) \ : \ : "r" (__dtrace_args) \ - : "memory", "rdi", "rsi", "rdx", "rcx", "r8", "r9" \ + : "memory", "rdi", "rsi", "rdx", "rcx", "r8", "r9" \ ); #define DTRACE_CALL7ARGS(provider, name) \ @@ -169,46 +182,7 @@ "addq\t$0x8,%%rsp" "\n\t" \ : \ : "r" (__dtrace_args) \ - : "memory", "rdi", "rsi", "rdx", "rcx", "r8", "r9", "rax" \ - ); - -#define DTRACE_CALL8ARGS(provider, name) \ - asm volatile ("subq\t$0x10,%%rsp" "\n\t" \ - "movq\t0x0(%0),%%rdi" "\n\t" \ - "movq\t0x8(%0),%%rsi" "\n\t" \ - "movq\t0x10(%0),%%rdx" "\n\t" \ - "movq\t0x18(%0),%%rcx" "\n\t" \ - "movq\t0x20(%0),%%r8" "\n\t" \ - "movq\t0x28(%0),%%r9" "\n\t" \ - "movdqa\t0x30(%0),%%xmm1" "\n\t" \ - "movdqa\t%%xmm1,0x0(%%rsp)" "\n\t" \ - DTRACE_CALL(provider, name) \ - "addq\t$0x10,%%rsp" "\n\t" \ - : \ - : "r" (__dtrace_args) \ - : "memory", "rdi", "rsi", "rdx", "rcx", "r8", "r9", "xmm1" \ - ); - -#define DTRACE_CALL9ARGS(provider, name) \ - DTRACE_CALL10ARGS(provider, name) - -#define DTRACE_CALL10ARGS(provider, name) \ - asm volatile ("subq\t$0x20,%%rsp" "\n\t" \ - "movq\t0x0(%0),%%rdi" "\n\t" \ - "movq\t0x8(%0),%%rsi" "\n\t" \ - "movq\t0x10(%0),%%rdx" "\n\t" \ - "movq\t0x18(%0),%%rcx" "\n\t" \ - "movq\t0x20(%0),%%r8" "\n\t" \ - "movq\t0x28(%0),%%r9" "\n\t" \ - "movdqa\t0x30(%0),%%xmm1" "\n\t" \ - "movdqa\t0x40(%0),%%xmm2" "\n\t" \ - "movdqa\t%%xmm1,0x0(%%rsp)" "\n\t" \ - "movdqa\t%%xmm2,0x10(%%rsp)" "\n\t" \ - DTRACE_CALL(provider, name) \ - "addq\t$0x20,%%rsp" "\n\t" \ - : \ - : "r" (__dtrace_args) \ - : "memory", "rdi", "rsi", "rdx", "rcx", "r8", "r9", "xmm1", "xmm2" \ + : "memory", "rdi", "rsi", "rdx", "rcx", "r8", "r9", "rax" \ ); #endif // __x86_64__ @@ -268,7 +242,7 @@ "addl\t$0x10,%%esp" \ : \ : "r" (__dtrace_args) \ - : "memory", "eax" \ + : "memory", "eax" \ ); #define DTRACE_CALL2ARGS(provider, name) \ @@ -281,7 +255,7 @@ "addl\t$0x10,%%esp" \ : \ : "r" (__dtrace_args) \ - : "memory", "eax", "edx" \ + : "memory", "eax", "edx" \ ); #define DTRACE_CALL3ARGS(provider, name) \ @@ -296,7 +270,7 @@ "addl\t$0x10,%%esp" \ : \ : "r" (__dtrace_args) \ - : "memory", "eax", "edx" \ + : "memory", "eax", "edx" \ ); #define DTRACE_CALL4ARGS(provider, name) \ @@ -313,7 +287,7 @@ "addl\t$0x10,%%esp" \ : \ : "r" (__dtrace_args) \ - : "memory", "eax", "edx" \ + : "memory", "eax", "edx" \ ); #define DTRACE_CALL5ARGS(provider, name) \ @@ -332,7 +306,7 @@ "addl\t$0x20,%%esp" \ : \ : "r" (__dtrace_args) \ - : "memory", "eax", "edx" \ + : "memory", "eax", "edx" \ ); #define DTRACE_CALL6ARGS(provider, name) \ @@ -353,7 +327,7 @@ "addl\t$0x20,%%esp" \ : \ : "r" (__dtrace_args) \ - : "memory", "eax", "edx" \ + : "memory", "eax", "edx" \ ); #define DTRACE_CALL7ARGS(provider, name) \ @@ -376,7 +350,7 @@ "addl\t$0x20,%%esp" \ : \ : "r" (__dtrace_args) \ - : "memory", "eax", "edx" \ + : "memory", "eax", "edx" \ ); #define DTRACE_CALL8ARGS(provider, name) \ @@ -401,7 +375,7 @@ "addl\t$0x20,%%esp" \ : \ : "r" (__dtrace_args) \ - : "memory", "eax", "edx" \ + : "memory", "eax", "edx" \ ); #define DTRACE_CALL9ARGS(provider, name) \ @@ -428,7 +402,7 @@ "addl\t$0x30,%%esp" \ : \ : "r" (__dtrace_args) \ - : "memory", "eax", "edx" \ + : "memory", "eax", "edx" \ ); #define DTRACE_CALL10ARGS(provider, name) \ @@ -457,7 +431,7 @@ "addl\t$0x30,%%esp" \ : \ : "r" (__dtrace_args) \ - : "memory", "eax", "edx" \ + : "memory", "eax", "edx" \ ); #endif // __i386__ diff --git a/osfmk/mach/i386/syscall_sw.h b/osfmk/mach/i386/syscall_sw.h index 5b34b8bb3..abef968a7 100644 --- a/osfmk/mach/i386/syscall_sw.h +++ b/osfmk/mach/i386/syscall_sw.h @@ -60,7 +60,6 @@ #include -#if defined(__i386__) /* * Software interrupt codes for 32-bit system call entry: */ @@ -69,6 +68,8 @@ #define MACHDEP_INT 0x82 #define DIAG_INT 0x83 +#if defined(__i386__) + #ifndef KERNEL /* * Syscall entry macros for use in libc: @@ -88,7 +89,8 @@ LEAF(_##trap_name,0) ;\ call __sysenter_trap ;\ END(_##trap_name) -#endif +#endif /* !KERNEL */ + #endif /* defined(__i386__) */ #if defined(__x86_64__) @@ -114,7 +116,8 @@ LEAF(_##trap_name,0) ;\ syscall ;\ END(_##trap_name) -#endif +#endif /* !KERNEL */ + #endif /* defined(__x86_64__) */ /* @@ -146,6 +149,7 @@ END(_##trap_name) #define SYSCALL_CLASS_UNIX 2 /* Unix/BSD */ #define SYSCALL_CLASS_MDEP 3 /* Machine-dependent */ #define SYSCALL_CLASS_DIAG 4 /* Diagnostics */ +#define SYSCALL_CLASS_IPC 5 /* Mach IPC */ /* Macros to simpllfy constructing syscall numbers. */ #define SYSCALL_CONSTRUCT_MACH(syscall_number) \ diff --git a/osfmk/mach/i386/thread_status.h b/osfmk/mach/i386/thread_status.h index 173e79a8b..558d1c071 100644 --- a/osfmk/mach/i386/thread_status.h +++ b/osfmk/mach/i386/thread_status.h @@ -354,6 +354,7 @@ typedef struct x86_saved_state32 x86_saved_state32_t; #define x86_SAVED_STATE32_COUNT ((mach_msg_type_number_t) \ (sizeof (x86_saved_state32_t)/sizeof(unsigned int))) +#pragma pack(4) struct x86_saved_state32_tagged { uint32_t tag; struct x86_saved_state32 state; @@ -381,7 +382,12 @@ typedef struct x86_sframe32 x86_sframe32_t; */ struct x86_64_intr_stack_frame { uint32_t trapno; +#if defined(__LP64__) && defined(KERNEL) + uint32_t _pad; + uint64_t trapfn; +#else uint32_t trapfn; +#endif uint64_t err; uint64_t rip; uint64_t cs; @@ -398,14 +404,20 @@ typedef struct x86_64_intr_stack_frame x86_64_intr_stack_frame_t; */ struct x86_saved_state_compat32 { struct x86_saved_state32_tagged iss32; +#if defined(__LP64__) && defined(KERNEL) +#else uint32_t pad_for_16byte_alignment[2]; +#endif struct x86_64_intr_stack_frame isf64; }; typedef struct x86_saved_state_compat32 x86_saved_state_compat32_t; struct x86_sframe_compat32 { struct x86_64_intr_stack_frame slf; - uint32_t pad_for_16byte_alignment[2]; +#if defined(__LP64__) && defined(KERNEL) +#else + uint32_t pad_for_16byte_alignment[2]; +#endif struct x86_saved_state_compat32 ssf; uint32_t empty[4]; }; @@ -454,6 +466,9 @@ struct x86_saved_state64 { uint32_t gs; uint32_t fs; +#ifdef __x86_64__ + uint32_t _pad_for_alignment[3]; +#endif struct x86_64_intr_stack_frame isf; }; typedef struct x86_saved_state64 x86_saved_state64_t; @@ -468,7 +483,9 @@ typedef struct x86_saved_state64_tagged x86_saved_state64_tagged_t; struct x86_sframe64 { struct x86_64_intr_stack_frame slf; - uint32_t pad_for_16byte_alignment[3]; +#ifdef __i386__ + uint32_t _pad_for_alignment[3]; +#endif struct x86_saved_state64_tagged ssf; }; typedef struct x86_sframe64 x86_sframe64_t; @@ -487,6 +504,7 @@ typedef struct { } x86_saved_state_t; #define ss_32 uss.ss_32 #define ss_64 uss.ss_64 +#pragma pack() static inline boolean_t is_saved_state64(x86_saved_state_t *iss) diff --git a/osfmk/mach/i386/vm_param.h b/osfmk/mach/i386/vm_param.h index f16790f1b..eae58369a 100644 --- a/osfmk/mach/i386/vm_param.h +++ b/osfmk/mach/i386/vm_param.h @@ -92,19 +92,23 @@ #define BYTE_SIZE 8 /* byte size in bits */ -#define I386_PGBYTES 4096 /* bytes per 80386 page */ -#define I386_PGSHIFT 12 /* number of bits to shift for pages */ +#define I386_PGBYTES 4096 /* bytes per 80386 page */ +#define I386_PGSHIFT 12 /* bitshift for pages */ #define PAGE_SIZE I386_PGBYTES #define PAGE_SHIFT I386_PGSHIFT #define PAGE_MASK (PAGE_SIZE - 1) +#define I386_LPGBYTES 2*1024*1024 /* bytes per large page */ +#define I386_LPGSHIFT 21 /* bitshift for large pages */ +#define I386_LPGMASK (I386_LPGBYTES-1) + /* * Convert bytes to pages and convert pages to bytes. * No rounding is used. */ -#define i386_btop(x) (((pmap_paddr_t)(x)) >> I386_PGSHIFT) +#define i386_btop(x) ((ppnum_t)((x) >> I386_PGSHIFT)) #define machine_btop(x) i386_btop(x) #define i386_ptob(x) (((pmap_paddr_t)(x)) << I386_PGSHIFT) @@ -152,7 +156,22 @@ #ifdef KERNEL_PRIVATE /* Kernel-wide values */ -#define VM_MIN_KERNEL_ADDRESS ((vm_offset_t) 0x00001000U) + +#define KB (1024ULL) +#define MB (1024*KB) +#define GB (1024*MB) + +/* + * Maximum physical memory supported. + */ +#define K32_MAXMEM (32*GB) +#define K64_MAXMEM (96*GB) +#if defined(__i386__) +#define KERNEL_MAXMEM K32_MAXMEM +#else +#define KERNEL_MAXMEM K64_MAXMEM +#endif + /* * XXX * The kernel max VM address is limited to 0xFF3FFFFF for now because @@ -161,9 +180,26 @@ * We can't let VM allocate memory from there. */ +#if defined(__i386__) + +#define VM_MIN_KERNEL_ADDRESS ((vm_offset_t) 0x00001000U) +#define VM_MIN_KERNEL_AND_KEXT_ADDRESS VM_MIN_KERNEL_ADDRESS + #define VM_MAX_KERNEL_ADDRESS ((vm_offset_t) 0xFE7FFFFFU) #define KERNEL_STACK_SIZE (I386_PGBYTES*4) +#elif defined(__x86_64__) + +#define VM_MIN_KERNEL_ADDRESS ((vm_offset_t) 0xFFFFFF8000000000UL) +#define VM_MIN_KERNEL_AND_KEXT_ADDRESS (VM_MIN_KERNEL_ADDRESS - 0x80000000ULL) + +#define VM_MAX_KERNEL_ADDRESS ((vm_offset_t) 0xFFFFFFFFFFFFEFFFUL) +#define KERNEL_STACK_SIZE (I386_PGBYTES*4) + +#else +#error unsupported architecture +#endif + #define VM_MAP_MIN_ADDRESS MACH_VM_MIN_ADDRESS #define VM_MAP_MAX_ADDRESS MACH_VM_MAX_ADDRESS @@ -173,17 +209,32 @@ #ifdef MACH_KERNEL_PRIVATE /* For implementing legacy 32-bit interfaces */ -#define VM32_SUPPORT +#define VM32_SUPPORT 1 #define VM32_MIN_ADDRESS ((vm32_offset_t) 0) #define VM32_MAX_ADDRESS ((vm32_offset_t) (VM_MAX_PAGE_ADDRESS & 0xFFFFFFFF)) +#if defined(__i386__) + #define LINEAR_KERNEL_ADDRESS ((vm_offset_t) 0x00000000) #define VM_MIN_KERNEL_LOADED_ADDRESS ((vm_offset_t) 0x00000000U) -#define VM_MAX_KERNEL_LOADED_ADDRESS ((vm_offset_t) 0x1fffffffU) +#define VM_MAX_KERNEL_LOADED_ADDRESS ((vm_offset_t) 0x1FFFFFFFU) #define NCOPY_WINDOWS 4 +#elif defined(__x86_64__) + +#define LINEAR_KERNEL_ADDRESS ((vm_offset_t) 0x00000000) + +#define VM_MIN_KERNEL_LOADED_ADDRESS ((vm_offset_t) 0xFFFFFF8000000000UL) +#define VM_MAX_KERNEL_LOADED_ADDRESS ((vm_offset_t) 0xFFFFFF801FFFFFFFUL) + +#define NCOPY_WINDOWS 0 + +#else +#error unsupported architecture +#endif + /* * Conversion between 80386 pages and VM pages */ diff --git a/osfmk/mach/i386/vm_types.h b/osfmk/mach/i386/vm_types.h index 49c23c831..ef737ac4d 100644 --- a/osfmk/mach/i386/vm_types.h +++ b/osfmk/mach/i386/vm_types.h @@ -70,6 +70,7 @@ #ifndef ASSEMBLER #include +#include #include /* @@ -130,7 +131,7 @@ typedef uint64_t vm_map_size_t; #ifdef MACH_KERNEL_PRIVATE -#ifdef VM32_SUPPORT +#if VM32_SUPPORT /* * These are types used internal to Mach to implement the diff --git a/osfmk/mach/kern_return.h b/osfmk/mach/kern_return.h index 5e7502be9..da73236e3 100644 --- a/osfmk/mach/kern_return.h +++ b/osfmk/mach/kern_return.h @@ -310,6 +310,11 @@ /* Some thread-oriented operation (semaphore_wait) timed out */ +#define KERN_CODESIGN_ERROR 50 + /* During a page fault, indicates that the page was rejected + * as a result of a signature check. + */ + #define KERN_RETURN_MAX 0x100 /* Maximum return value allowable */ diff --git a/osfmk/mach/kmod.h b/osfmk/mach/kmod.h index beaa45af3..a5c5587ff 100644 --- a/osfmk/mach/kmod.h +++ b/osfmk/mach/kmod.h @@ -32,169 +32,155 @@ * Version 2.0. */ -#ifndef _MACH_KMOD_H_ -#define _MACH_KMOD_H_ +#ifndef _MACH_KMOD_H_ +#define _MACH_KMOD_H_ #include +#include #include +__BEGIN_DECLS + +#if PRAGMA_MARK +#pragma mark Basic macros & typedefs +#endif /*********************************************************************** -* kmod_control() commands. 1-5 are long-established. 6-8 are new in -* Leopard and used to reliably get and verify symbol information needed -* to link kexts against the running kernel, or to disable kmod loading -* if such symbol information cannot be found. +* Basic macros & typedefs ***********************************************************************/ -#define KMOD_CNTL_START 1 // call kmod's start routine -#define KMOD_CNTL_STOP 2 // call kmod's stop routine -#define KMOD_CNTL_RETAIN 3 // increase a kmod's reference count -#define KMOD_CNTL_RELEASE 4 // decrease a kmod's reference count -#define KMOD_CNTL_GET_CMD 5 // get kmod load cmd from kernel - -#define KMOD_CNTL_GET_KERNEL_SYMBOLS 6 // get symfile as data buffer -#define KMOD_CNTL_FREE_LINKEDIT_DATA 7 // refuse to create new kmods -#define KMOD_CNTL_GET_KERNEL_UUID 8 // LC_UUID load command payload -#define KMOD_CNTL_GET_UUID 8 // LC_UUID load command payload -#define KMOD_CNTL_DISABLE_LOAD 9 // refuse to create new kmods - -#define KMOD_PACK_IDS(from, to) (((unsigned long)from << 16) | (unsigned long)to) -#define KMOD_UNPACK_FROM_ID(i) ((unsigned long)i >> 16) -#define KMOD_UNPACK_TO_ID(i) ((unsigned long)i & 0xffff) +#define KMOD_MAX_NAME 64 + +#define KMOD_RETURN_SUCCESS KERN_SUCCESS +#define KMOD_RETURN_FAILURE KERN_FAILURE typedef int kmod_t; -typedef int kmod_control_flavor_t; -typedef void* kmod_args_t; -#define KMOD_MAX_NAME 64 +struct kmod_info; +typedef kern_return_t kmod_start_func_t(struct kmod_info * ki, void * data); +typedef kern_return_t kmod_stop_func_t(struct kmod_info * ki, void * data); +#if PRAGMA_MARK +#pragma mark Structure definitions +#endif +/*********************************************************************** +* Structure definitions +* +* All structures must be #pragma pack(4). +***********************************************************************/ #pragma pack(4) -/* LP64todo - not 64-bit safe */ +/* Run-time struct only; never saved to a file */ typedef struct kmod_reference { - struct kmod_reference *next; - struct kmod_info *info; + struct kmod_reference * next; + struct kmod_info * info; } kmod_reference_t; -#pragma pack() - -/**************************************************************************************/ -/* warning any changes to this structure affect the following macros. */ -/**************************************************************************************/ - -#define KMOD_RETURN_SUCCESS KERN_SUCCESS -#define KMOD_RETURN_FAILURE KERN_FAILURE - -typedef kern_return_t kmod_start_func_t(struct kmod_info *ki, void *data); -typedef kern_return_t kmod_stop_func_t(struct kmod_info *ki, void *data); - -#pragma pack(4) - -/* LP64todo - not 64-bit safe */ +/*********************************************************************** +* Warning: Any changes to the kmod_info structure affect the +* KMOD_..._DECL macros below. +***********************************************************************/ +/* The kmod_info_t structure is only safe to use inside the running + * kernel. If you need to work with a kmod_info_t structure outside + * the kernel, please use the compatibility definitions below. + */ typedef struct kmod_info { - struct kmod_info *next; - int info_version; // version of this structure - int id; - char name[KMOD_MAX_NAME]; - char version[KMOD_MAX_NAME]; - int reference_count; // # refs to this - kmod_reference_t *reference_list; // who this refs - vm_address_t address; // starting address - vm_size_t size; // total size - vm_size_t hdr_size; // unwired hdr size - kmod_start_func_t *start; - kmod_stop_func_t *stop; + struct kmod_info * next; + int32_t info_version; // version of this structure + uint32_t id; + char name[KMOD_MAX_NAME]; + char version[KMOD_MAX_NAME]; + int32_t reference_count; // # linkage refs to this + kmod_reference_t * reference_list; // who this refs (links on) + vm_address_t address; // starting address + vm_size_t size; // total size + vm_size_t hdr_size; // unwired hdr size + kmod_start_func_t * start; + kmod_stop_func_t * stop; } kmod_info_t; -#pragma pack() - -typedef kmod_info_t *kmod_info_array_t; - -#define KMOD_INFO_NAME kmod_info -#define KMOD_INFO_VERSION 1 - -#define KMOD_DECL(name, version) \ - static kmod_start_func_t name ## _module_start; \ - static kmod_stop_func_t name ## _module_stop; \ - kmod_info_t KMOD_INFO_NAME = { 0, KMOD_INFO_VERSION, -1, \ - { #name }, { version }, -1, 0, 0, 0, 0, \ - name ## _module_start, \ - name ## _module_stop }; - -#define KMOD_EXPLICIT_DECL(name, version, start, stop) \ - kmod_info_t KMOD_INFO_NAME = { 0, KMOD_INFO_VERSION, -1, \ - { #name }, { version }, -1, 0, 0, 0, 0, \ - start, stop }; - -// the following is useful for libaries that don't need their own start and stop functions -#define KMOD_LIB_DECL(name, version) \ - kmod_info_t KMOD_INFO_NAME = { 0, KMOD_INFO_VERSION, -1, \ - { #name }, { version }, -1, 0, 0, 0, 0, \ - kmod_default_start, \ - kmod_default_stop }; - - -// ************************************************************************************* -// kmod kernel to user commands -// ************************************************************************************* - -#define KMOD_LOAD_EXTENSION_PACKET 1 -#define KMOD_LOAD_WITH_DEPENDENCIES_PACKET 2 - -// for generic packets -#define KMOD_IOKIT_START_RANGE_PACKET 0x1000 -#define KMOD_IOKIT_END_RANGE_PACKET 0x1fff - -typedef struct kmod_load_extension_cmd { - int type; - char name[KMOD_MAX_NAME]; -} kmod_load_extension_cmd_t; - -typedef struct kmod_load_with_dependencies_cmd { - int type; - char name[KMOD_MAX_NAME]; - char dependencies[1][KMOD_MAX_NAME]; -} kmod_load_with_dependencies_cmd_t; - -typedef struct kmod_generic_cmd { - int type; - char data[1]; -} kmod_generic_cmd_t; - -#ifdef KERNEL_PRIVATE - -extern kmod_info_t *kmod_lookupbyname(const char * name); -extern kmod_info_t *kmod_lookupbyid(kmod_t id); -extern kmod_info_t *kmod_lookupbyaddress(vm_address_t address); -extern int kmod_lookupidbyaddress_locked(vm_address_t address); +/* A compatibility definition of kmod_info_t for 32-bit kexts. + */ +typedef struct kmod_info_32_v1 { + uint32_t next_addr; + int32_t info_version; + uint32_t id; + uint8_t name[KMOD_MAX_NAME]; + uint8_t version[KMOD_MAX_NAME]; + int32_t reference_count; + uint32_t reference_list_addr; + uint32_t address; + uint32_t size; + uint32_t hdr_size; + uint32_t start_addr; + uint32_t stop_addr; +} kmod_info_32_v1_t; + +/* A compatibility definition of kmod_info_t for 64-bit kexts. + */ +typedef struct kmod_info_64_v1 { + uint64_t next_addr; + int32_t info_version; + uint32_t id; + uint8_t name[KMOD_MAX_NAME]; + uint8_t version[KMOD_MAX_NAME]; + int32_t reference_count; + uint64_t reference_list_addr; + uint64_t address; + uint64_t size; + uint64_t hdr_size; + uint64_t start_addr; + uint64_t stop_addr; +} kmod_info_64_v1_t; -extern kmod_info_t *kmod_lookupbyname_locked(const char * name); -extern kmod_info_t *kmod_lookupbyid_locked(kmod_t id); -extern kmod_start_func_t kmod_default_start; -extern kmod_stop_func_t kmod_default_stop; +#pragma pack() -__BEGIN_DECLS -extern void kmod_init(void) __attribute__((section("__TEXT, initcode"))); +#if PRAGMA_MARK +#pragma mark Kmod structure declaration macros +#endif +/*********************************************************************** +* Kmod structure declaration macros +***********************************************************************/ +#define KMOD_INFO_NAME kmod_info +#define KMOD_INFO_VERSION 1 + +#define KMOD_DECL(name, version) \ + static kmod_start_func_t name ## _module_start; \ + static kmod_stop_func_t name ## _module_stop; \ + kmod_info_t KMOD_INFO_NAME = { 0, KMOD_INFO_VERSION, -1U, \ + { #name }, { version }, -1, 0, 0, 0, 0, \ + name ## _module_start, \ + name ## _module_stop }; + +#define KMOD_EXPLICIT_DECL(name, version, start, stop) \ + kmod_info_t KMOD_INFO_NAME = { 0, KMOD_INFO_VERSION, -1U, \ + { #name }, { version }, -1, 0, 0, 0, 0, \ + start, stop }; + +#if PRAGMA_MARK +#pragma mark Kernel private declarations +#endif +/*********************************************************************** +* Kernel private declarations. +***********************************************************************/ +#ifdef KERNEL_PRIVATE -extern kern_return_t kmod_create_fake(const char *name, const char *version); -extern kern_return_t kmod_create_fake_with_address(const char *name, const char *version, - vm_address_t address, vm_size_t size, - int * return_id); -extern kern_return_t kmod_destroy_fake(kmod_t id); +/* Implementation now in libkern/OSKextLib.cpp. */ +extern void kmod_panic_dump(vm_offset_t * addr, unsigned int dump_cnt); -extern kern_return_t kmod_load_extension(char *name); -extern kern_return_t kmod_load_extension_with_dependencies(char *name, char **dependencies); -extern kern_return_t kmod_send_generic(int type, void *data, int size); +#endif /* KERNEL_PRIVATE */ -extern kern_return_t kmod_initialize_cpp(kmod_info_t *info); -extern kern_return_t kmod_finalize_cpp(kmod_info_t *info); -void record_kext_unload(kmod_t kmod_id); -void dump_kext_info(int (*printf_func)(const char *fmt, ...)); +#if PRAGMA_MARK +#pragma mark Obsolete kmod stuff +#endif +/*********************************************************************** +* These 3 should be dropped but they're referenced by MIG declarations. +***********************************************************************/ +typedef void * kmod_args_t; +typedef int kmod_control_flavor_t; +typedef kmod_info_t * kmod_info_array_t; -extern void kmod_dump(vm_offset_t *addr, unsigned int dump_cnt); __END_DECLS -#endif /* KERNEL_PRIVATE */ - -#endif /* _MACH_KMOD_H_ */ +#endif /* _MACH_KMOD_H_ */ diff --git a/osfmk/mach/mach_host.defs b/osfmk/mach/mach_host.defs index 6d3e15b45..df309d936 100644 --- a/osfmk/mach/mach_host.defs +++ b/osfmk/mach/mach_host.defs @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2007 Apple Inc. All rights reserved. + * Copyright (c) 2000-2009 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -156,7 +156,11 @@ routine host_get_clock_service( clock_id : clock_id_t; out clock_serv : clock_serv_t); - +/* + * kernel module interface (obsolete as of SnowLeopard) + * see mach/kmod.h + */ +/* kmod_ MIG calls now return KERN_NOT_SUPPORTED on PPC/i386/x86_64. */ routine kmod_get_info( host : host_t; out modules : kmod_args_t); @@ -192,18 +196,8 @@ routine host_ipc_hash_info( out info : hash_info_bucket_array_t, Dealloc); -/* - * JMM - These routines should be on the host_priv port. We need - * to verify the move before putting them there. - */ -routine enable_bluebox( - host : host_t; - in taskID : unsigned; - in TWI_TableStart : unsigned; - in Desc_TableStart : unsigned); - -routine disable_bluebox( - host : host_t); +skip; /* was enable_bluebox */ +skip; /* was disable_bluebox */ /* * JMM - Keep processor_set related items at the end for easy @@ -255,4 +249,13 @@ routine host_lockgroup_info( out lockgroup_info : lockgroup_info_array_t, Dealloc); +/* + * Return 64-bit statistics from this host. + */ +routine host_statistics64( + host_priv : host_t; + flavor : host_flavor_t; + out host_info64_out : host_info64_t, CountInOut); + + /* vim: set ft=c : */ diff --git a/osfmk/mach/mach_norma.defs b/osfmk/mach/mach_norma.defs index 816599fdd..0bc1c6945 100644 --- a/osfmk/mach/mach_norma.defs +++ b/osfmk/mach/mach_norma.defs @@ -141,14 +141,14 @@ subsystem #if KERNEL_USER KernelUser -#endif KERNEL_USER +#endif /* KERNEL_USER */ #if KERNEL_SERVER KernelServer -#endif KERNEL_SERVER +#endif /* KERNEL_SERVER */ mach_norma 555000; #ifdef KERNEL_USER userprefix r_; -#endif KERNEL_USER +#endif /* KERNEL_USER */ #include #include diff --git a/osfmk/mach/mach_port.defs b/osfmk/mach/mach_port.defs index 88156a847..5801ee42c 100644 --- a/osfmk/mach/mach_port.defs +++ b/osfmk/mach/mach_port.defs @@ -406,12 +406,15 @@ routine mach_port_dnrequest_info( * that the given send/receive right represents. * This call is only valid on MACH_IPC_DEBUG kernels. * Otherwise, KERN_FAILURE is returned. + * + * This interface is DEPRECATED in favor of the new + * mach_port_kernel_object64() call (see below). */ routine mach_port_kernel_object( task : ipc_space_t; name : mach_port_name_t; out object_type : unsigned; - out object_addr : vm_offset_t); + out object_addr : unsigned); /* @@ -453,4 +456,37 @@ routine mach_port_extract_member( name : mach_port_name_t; pset : mach_port_name_t); +/* + * Only valid for receive rights. + * Gets the context pointer for the port. + */ + +routine mach_port_get_context( + task : ipc_space_t; + name : mach_port_name_t; + out context : mach_vm_address_t); + +/* + * Only valid for receive rights. + * Sets the context pointer for the port. + */ + +routine mach_port_set_context( + task : ipc_space_t; + name : mach_port_name_t; + context : mach_vm_address_t); + +/* + * Return the type and address of the kernel object + * that the given send/receive right represents. + * This call is only valid on MACH_IPC_DEBUG kernels. + * Otherwise, KERN_FAILURE is returned. + */ +routine mach_port_kobject( + task : ipc_space_t; + name : mach_port_name_t; + out object_type : natural_t; + out object_addr : mach_vm_address_t); + + /* vim: set ft=c : */ diff --git a/osfmk/mach/mach_traps.h b/osfmk/mach/mach_traps.h index 87c4df1f6..38298e27e 100644 --- a/osfmk/mach/mach_traps.h +++ b/osfmk/mach/mach_traps.h @@ -139,24 +139,23 @@ extern kern_return_t semaphore_timedwait_signal_trap( unsigned int sec, clock_res_t nsec); -#if !defined(__LP64__) -/* these should go away altogether - so no 64 legacy please */ - -extern kern_return_t init_process(void); - -#endif /* !defined(__LP64__) */ +extern kern_return_t clock_sleep_trap( + mach_port_name_t clock_name, + sleep_type_t sleep_type, + int sleep_sec, + int sleep_nsec, + mach_timespec_t *wakeup_time); -#if !defined(__LP64__) +#endif /* PRIVATE */ -/* more that should go away so no 64-bit legacy please */ extern kern_return_t macx_swapon( - char *filename, + uint64_t filename, int flags, int size, int priority); extern kern_return_t macx_swapoff( - char *filename, + uint64_t filename, int flags); extern kern_return_t macx_triggers( @@ -171,17 +170,6 @@ extern kern_return_t macx_backing_store_suspend( extern kern_return_t macx_backing_store_recovery( int pid); -#endif /* !defined(__LP64__) */ - -extern kern_return_t clock_sleep_trap( - mach_port_name_t clock_name, - sleep_type_t sleep_type, - int sleep_sec, - int sleep_nsec, - mach_timespec_t *wakeup_time); - -#endif /* PRIVATE */ - extern boolean_t swtch_pri(int pri); extern boolean_t swtch(void); @@ -263,6 +251,8 @@ void munge_dddddd(const void *, void *); void munge_ddddddd(const void *, void *); void munge_dddddddd(const void *, void *); void munge_l(const void *, void *); +void munge_lw(const void *, void *); +void munge_lwww(const void *, void *); void munge_wl(const void *, void *); void munge_wlw(const void *, void *); void munge_wwwl(const void *, void *); @@ -286,6 +276,8 @@ void munge_wwwwwl(const void *, void *); #define munge_ddddddd NULL #define munge_dddddddd NULL #define munge_l NULL +#define munge_lw NULL +#define munge_lwww NULL #define munge_wl NULL #define munge_wlw NULL #define munge_wwwl NULL @@ -295,31 +287,31 @@ void munge_wwwwwl(const void *, void *); #endif /* !__MUNGE_ONCE */ struct kern_invalid_args { - register_t dummy; + int32_t dummy; }; extern kern_return_t kern_invalid( struct kern_invalid_args *args); struct mach_reply_port_args { - register_t dummy; + int32_t dummy; }; extern mach_port_name_t mach_reply_port( struct mach_reply_port_args *args); struct thread_self_trap_args { - register_t dummy; + int32_t dummy; }; extern mach_port_name_t thread_self_trap( struct thread_self_trap_args *args); struct task_self_trap_args { - register_t dummy; + int32_t dummy; }; extern mach_port_name_t task_self_trap( struct task_self_trap_args *args); struct host_self_trap_args { - register_t dummy; + int32_t dummy; }; extern mach_port_name_t host_self_trap( struct host_self_trap_args *args); @@ -389,13 +381,6 @@ struct semaphore_timedwait_signal_trap_args { extern kern_return_t semaphore_timedwait_signal_trap( struct semaphore_timedwait_signal_trap_args *args); -/* not published to LP64 clients */ -struct init_process_args { - register_t dummy; -}; -extern kern_return_t init_process( - struct init_process_args *args); - struct map_fd_args { PAD_ARG_(int, fd); PAD_ARG_(vm_offset_t, offset); @@ -429,9 +414,8 @@ struct pid_for_task_args { extern kern_return_t pid_for_task( struct pid_for_task_args *args); -/* not published to LP64 clients*/ struct macx_swapon_args { - PAD_ARG_(char *, filename); + PAD_ARG_(uint64_t, filename); PAD_ARG_(int, flags); PAD_ARG_(int, size); PAD_ARG_(int, priority); @@ -440,7 +424,7 @@ extern kern_return_t macx_swapon( struct macx_swapon_args *args); struct macx_swapoff_args { - PAD_ARG_(char *, filename); + PAD_ARG_(uint64_t, filename); PAD_ARG_(int, flags); }; extern kern_return_t macx_swapoff( @@ -473,8 +457,14 @@ struct swtch_pri_args { extern boolean_t swtch_pri( struct swtch_pri_args *args); +struct pfz_exit_args { + int32_t dummy; +}; +extern kern_return_t pfz_exit( + struct pfz_exit_args *args); + struct swtch_args { - register_t dummy; + int32_t dummy; }; extern boolean_t swtch( struct swtch_args *args); @@ -510,7 +500,7 @@ extern kern_return_t mach_wait_until_trap( struct mach_wait_until_trap_args *args); struct mk_timer_create_trap_args { - register_t dummy; + int32_t dummy; }; extern mach_port_name_t mk_timer_create_trap( struct mk_timer_create_trap_args *args); diff --git a/osfmk/mach/mach_types.defs b/osfmk/mach/mach_types.defs index 7f7dfc470..c4479bd51 100644 --- a/osfmk/mach/mach_types.defs +++ b/osfmk/mach/mach_types.defs @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2009 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -290,19 +290,30 @@ type host_security_t = mach_port_t #endif /* KERNEL_SERVER */ ; - /* host_info_t: variable-sized inline array that can contain: - * host_basic_info_old_t (5 ints) - * host_basic_info_t (12 ints) - * host_sched_info_t (2 ints) - * kernel_resource_sizes_t (5 ints) - * host_load_info_t (6 ints) - * vm_statistics_t (15 ints) + /* + * host_info_t: variable-sized inline array that can contain: + * + * host_basic_info_old_t (5 ints) + * host_basic_info_t (12 ints) + * host_sched_info_t (2 ints) + * kernel_resource_sizes_t (5 ints) + * host_load_info_t (6 ints) + * vm_statistics32_t (15 ints) + * * If other host_info flavors are added, this definition may * need to be changed. (See mach/{host_info,vm_statistics}.h) */ type host_flavor_t = int; type host_info_t = array[*:15] of integer_t; + + /* + * host_info64_t: variable-sized inline array that can contain: + * + * vm_statistics_t (6 ints and 9 longs) + */ +type host_info64_t = array[*:256] of integer_t; + type processor_t = mach_port_t #if KERNEL_SERVER intran: processor_t convert_port_to_processor(mach_port_t) @@ -416,8 +427,14 @@ type vm_region_flavor_t = int; type vm_region_info_t = array[*:10] of int; type vm_region_recurse_info_t = array[*:19] of int; +type vm_page_info_flavor_t = int; +type vm_page_info_t = array[*:32] of int; + type mach_vm_read_entry_t = array[512] of mach_vm_offset_t; type vm_read_entry_t = array[512] of vm_offset_t; +#if VM32_SUPPORT +type vm32_read_entry_t = array[512] of vm32_offset_t; +#endif type exception_mask_t = int; type exception_behavior_t = int; diff --git a/osfmk/mach/mach_vm.defs b/osfmk/mach/mach_vm.defs index dda18fc4c..8dbb71f66 100644 --- a/osfmk/mach/mach_vm.defs +++ b/osfmk/mach/mach_vm.defs @@ -401,7 +401,7 @@ routine vm_region_64( * THIS INTERFACE IS STILL EVOLVING. */ #if !defined(_MACH_VM_PUBLISH_AS_LOCAL_) -#if !defined(__LP64__) +#if !defined(__LP64__) || KERNEL_SERVER || XNU_KERNEL_PRIVATE || LIBSYSCALL_INTERFACE routine _mach_make_memory_entry( #else routine mach_make_memory_entry( @@ -434,6 +434,16 @@ routine vm_purgable_control( inout state : int); +#if !defined(_MACH_VM_PUBLISH_AS_LOCAL_) +routine mach_vm_page_info( + target_task : vm_map_t; + address : mach_vm_address_t; + flavor : vm_page_info_flavor_t; + out info : vm_page_info_t, CountInOut); +#else +skip; +#endif + /****************************** Legacy section ***************************/ /* The following definitions are exist to provide compatibility with */ /* the legacy APIs. They are no different. We just need to produce */ diff --git a/osfmk/mach/machine/sdt.h b/osfmk/mach/machine/sdt.h index 6041fc3f5..af2b59b1e 100644 --- a/osfmk/mach/machine/sdt.h +++ b/osfmk/mach/machine/sdt.h @@ -32,6 +32,10 @@ #include +#ifdef KERNEL + +#if CONFIG_DTRACE + /* * The following macros are used to create static probes. The argument types * should be no greater than uintptr_t in size each. The behavior of larger @@ -148,9 +152,6 @@ } -#ifdef KERNEL - -#if CONFIG_DTRACE #define DTRACE_SCHED(name) \ DTRACE_PROBE(__sched_, name); diff --git a/osfmk/mach/memory_object.defs b/osfmk/mach/memory_object.defs index 9bc85153a..436e9b290 100644 --- a/osfmk/mach/memory_object.defs +++ b/osfmk/mach/memory_object.defs @@ -171,7 +171,7 @@ routine memory_object_data_initialize( routine memory_object_data_unlock( memory_object : memory_object_t; offset : memory_object_offset_t; - size : memory_object_cluster_size_t; + size : memory_object_size_t; desired_access : vm_prot_t); @@ -186,7 +186,7 @@ routine memory_object_data_unlock( routine memory_object_synchronize( memory_object : memory_object_t; offset : memory_object_offset_t; - size : memory_object_cluster_size_t; + size : memory_object_size_t; sync_flags : vm_sync_t ); /* diff --git a/osfmk/mach/memory_object_control.defs b/osfmk/mach/memory_object_control.defs index fef553a3b..0b2a76ad8 100644 --- a/osfmk/mach/memory_object_control.defs +++ b/osfmk/mach/memory_object_control.defs @@ -97,7 +97,7 @@ routine memory_object_change_attributes( routine memory_object_synchronize_completed ( memory_control : memory_object_control_t; offset : memory_object_offset_t; - length : vm_offset_t); + length : memory_object_size_t); /* * Control use of the data associated with the given @@ -148,7 +148,7 @@ routine memory_object_destroy( routine memory_object_upl_request( memory_control : memory_object_control_t; in offset : memory_object_offset_t; - in size : vm_size_t; + in size : upl_size_t; out upl : upl_t; out page_list : upl_page_info_array_t, CountInOut; in cntrl_flags : integer_t); @@ -156,8 +156,8 @@ routine memory_object_upl_request( routine memory_object_super_upl_request( memory_control : memory_object_control_t; in offset : memory_object_offset_t; - in size : vm_size_t; - in super_size : vm_size_t; + in size : upl_size_t; + in super_size : upl_size_t; out upl : upl_t; out page_list : upl_page_info_array_t, CountInOut; in cntrl_flags : integer_t); @@ -167,6 +167,7 @@ routine memory_object_cluster_size( control : memory_object_control_t; out start : memory_object_offset_t; out length : vm_size_t; + out io_streaming : uint32_t; in fault_info : memory_object_fault_info_t); /* diff --git a/osfmk/mach/memory_object_types.h b/osfmk/mach/memory_object_types.h index 739f0374d..8f3044430 100644 --- a/osfmk/mach/memory_object_types.h +++ b/osfmk/mach/memory_object_types.h @@ -86,6 +86,8 @@ typedef unsigned long long memory_object_size_t; typedef natural_t memory_object_cluster_size_t; typedef natural_t * memory_object_fault_info_t; +typedef unsigned long long vm_object_id_t; + /* * Temporary until real EMMI version gets re-implemented @@ -96,12 +98,19 @@ typedef natural_t * memory_object_fault_info_t; struct memory_object_pager_ops; /* forward declaration */ typedef struct memory_object { + unsigned int _pad1; /* struct ipc_object_header */ +#ifdef __LP64__ + unsigned int _pad2; /* pad to natural boundary */ +#endif const struct memory_object_pager_ops *mo_pager_ops; } *memory_object_t; typedef struct memory_object_control { + unsigned int moc_ikot; /* struct ipc_object_header */ +#ifdef __LP64__ + unsigned int _pad; /* pad to natural boundary */ +#endif struct vm_object *moc_object; - unsigned int moc_ikot; /* XXX fake ip_kotype */ } *memory_object_control_t; typedef const struct memory_object_pager_ops { @@ -124,7 +133,7 @@ typedef const struct memory_object_pager_ops { kern_return_t (*memory_object_data_return)( memory_object_t mem_obj, memory_object_offset_t offset, - vm_size_t size, + memory_object_cluster_size_t size, memory_object_offset_t *resid_offset, int *io_error, boolean_t dirty, @@ -133,16 +142,16 @@ typedef const struct memory_object_pager_ops { kern_return_t (*memory_object_data_initialize)( memory_object_t mem_obj, memory_object_offset_t offset, - vm_size_t size); + memory_object_cluster_size_t size); kern_return_t (*memory_object_data_unlock)( memory_object_t mem_obj, memory_object_offset_t offset, - vm_size_t size, + memory_object_size_t size, vm_prot_t desired_access); kern_return_t (*memory_object_synchronize)( memory_object_t mem_obj, memory_object_offset_t offset, - vm_size_t size, + memory_object_size_t size, vm_sync_t sync_flags); kern_return_t (*memory_object_map)( memory_object_t mem_obj, @@ -236,6 +245,7 @@ typedef int memory_object_return_t; #define MEMORY_OBJECT_COPY_SYNC 0x8 #define MEMORY_OBJECT_DATA_SYNC 0x10 #define MEMORY_OBJECT_IO_SYNC 0x20 +#define MEMORY_OBJECT_DATA_FLUSH_ALL 0x40 /* * Types for the memory object flavor interfaces @@ -383,7 +393,7 @@ typedef struct memory_object_attr_info memory_object_attr_info_data_t; */ #ifdef PRIVATE #define MAX_UPL_TRANSFER 256 -#define MAX_UPL_SIZE 4096 +#define MAX_UPL_SIZE 8192 struct upl_page_info { ppnum_t phys_addr; /* physical page index number */ @@ -450,9 +460,12 @@ typedef uint32_t upl_size_t; /* page-aligned byte size */ #define UPL_WILL_MODIFY 0x00800000 /* caller will modify the pages */ #define UPL_NEED_32BIT_ADDR 0x01000000 +#define UPL_UBC_MSYNC 0x02000000 +#define UPL_UBC_PAGEOUT 0x04000000 +#define UPL_UBC_PAGEIN 0x08000000 /* UPL flags known by this kernel */ -#define UPL_VALID_FLAGS 0x01FFFFFF +#define UPL_VALID_FLAGS 0x0FFFFFFF /* upl abort error flags */ @@ -462,7 +475,7 @@ typedef uint32_t upl_size_t; /* page-aligned byte size */ #define UPL_ABORT_FREE_ON_EMPTY 0x8 /* only implemented in wrappers */ #define UPL_ABORT_DUMP_PAGES 0x10 #define UPL_ABORT_NOTIFY_EMPTY 0x20 -#define UPL_ABORT_ALLOW_ACCESS 0x40 +/* deprecated: #define UPL_ABORT_ALLOW_ACCESS 0x40 */ #define UPL_ABORT_REFERENCE 0x80 /* upl pages check flags */ @@ -525,9 +538,17 @@ typedef uint32_t upl_size_t; /* page-aligned byte size */ * pageout will reenter the FS for the same file currently * being handled in this context. */ - #define UPL_NESTED_PAGEOUT 0x80 +/* + * we've detected a sequential access pattern and + * we are speculatively and aggressively pulling + * pages in... do not count these as real PAGEINs + * w/r to our hard throttle maintenance + */ +#define UPL_IOSTREAMING 0x100 + + /* upl commit flags */ @@ -536,8 +557,10 @@ typedef uint32_t upl_size_t; /* page-aligned byte size */ #define UPL_COMMIT_SET_DIRTY 0x4 #define UPL_COMMIT_INACTIVATE 0x8 #define UPL_COMMIT_NOTIFY_EMPTY 0x10 -#define UPL_COMMIT_ALLOW_ACCESS 0x20 +/* deprecated: #define UPL_COMMIT_ALLOW_ACCESS 0x20 */ #define UPL_COMMIT_CS_VALIDATED 0x40 +#define UPL_COMMIT_CLEAR_PRECIOUS 0x80 +#define UPL_COMMIT_SPECULATE 0x100 #define UPL_COMMIT_KERNEL_ONLY_FLAGS (UPL_COMMIT_CS_VALIDATED) @@ -630,14 +653,27 @@ typedef uint32_t upl_size_t; /* page-aligned byte size */ extern vm_size_t upl_offset_to_pagelist; extern vm_size_t upl_get_internal_pagelist_offset(void); +extern void* upl_get_internal_vectorupl(upl_t); +extern upl_page_info_t* upl_get_internal_vectorupl_pagelist(upl_t); + +/*Use this variant to get the UPL's page list iff:*/ +/*- the upl being passed in is already part of a vector UPL*/ +/*- the page list you want is that of this "sub-upl" and not that of the entire vector-upl*/ + +#define UPL_GET_INTERNAL_PAGE_LIST_SIMPLE(upl) \ + ((upl_page_info_t *)((upl_offset_to_pagelist == 0) ? \ + (uintptr_t)upl + (unsigned int)(upl_offset_to_pagelist = upl_get_internal_pagelist_offset()): \ + (uintptr_t)upl + (unsigned int)upl_offset_to_pagelist)) /* UPL_GET_INTERNAL_PAGE_LIST is only valid on internal objects where the */ /* list request was made with the UPL_INTERNAL flag */ + #define UPL_GET_INTERNAL_PAGE_LIST(upl) \ + ((upl_get_internal_vectorupl(upl) != NULL ) ? (upl_get_internal_vectorupl_pagelist(upl)) : \ ((upl_page_info_t *)((upl_offset_to_pagelist == 0) ? \ - (unsigned int)upl + (unsigned int)(upl_offset_to_pagelist = upl_get_internal_pagelist_offset()): \ - (unsigned int)upl + (unsigned int)upl_offset_to_pagelist)) + (uintptr_t)upl + (unsigned int)(upl_offset_to_pagelist = upl_get_internal_pagelist_offset()): \ + (uintptr_t)upl + (unsigned int)upl_offset_to_pagelist))) __BEGIN_DECLS diff --git a/osfmk/mach/message.h b/osfmk/mach/message.h index 37c9aa3be..9be5f5e90 100644 --- a/osfmk/mach/message.h +++ b/osfmk/mach/message.h @@ -96,7 +96,7 @@ typedef natural_t mach_msg_timeout_t; #define MACH_MSG_TIMEOUT_NONE ((mach_msg_timeout_t) 0) /* - * The kernel uses MACH_MSGH_BITS_COMPLEX as a hint. It it isn't on, it + * The kernel uses MACH_MSGH_BITS_COMPLEX as a hint. If it isn't on, it * assumes the body of the message doesn't contain port rights or OOL * data. The field is set in received messages. A user task must * use caution in interpreting the body of a message if the bit isn't @@ -145,11 +145,11 @@ typedef natural_t mach_msg_timeout_t; /* * Every message starts with a message header. - * Following the message header are zero or more pairs of - * type descriptors (mach_msg_type_t/mach_msg_type_long_t) and - * data values. The size of the message must be specified in bytes, - * and includes the message header, type descriptors, inline - * data, and inline pointer for out-of-line data. + * Following the message header, if the message is complex, are a count + * of type descriptors and the type descriptors themselves + * (mach_msg_descriptor_t). The size of the message must be specified in + * bytes, and includes the message header, descriptor count, descriptors, + * and inline data. * * The msgh_remote_port field specifies the destination of the message. * It must specify a valid send or send-once right for a port. @@ -196,6 +196,18 @@ typedef unsigned int mach_msg_copy_options_t; #define MACH_MSG_KALLOC_COPY_T 4 #endif /* MACH_KERNEL */ +/* + * In a complex mach message, the mach_msg_header_t is followed by + * a descriptor count, then an array of that number of descriptors + * (mach_msg_*_descriptor_t). The type field of mach_msg_type_descriptor_t + * (which any descriptor can be cast to) indicates the flavor of the + * descriptor. + * + * Note that in LP64, the various types of descriptors are no longer all + * the same size as mach_msg_descriptor_t, so the array cannot be indexed + * as expected. + */ + typedef unsigned int mach_msg_descriptor_type_t; #define MACH_MSG_PORT_DESCRIPTOR 0 @@ -216,10 +228,16 @@ typedef struct typedef struct { mach_port_t name; +#if !(defined(KERNEL) && defined(__LP64__)) +// Pad to 8 bytes everywhere except the K64 kernel where mach_port_t is 8 bytes mach_msg_size_t pad1; +#endif unsigned int pad2 : 16; mach_msg_type_name_t disposition : 8; mach_msg_descriptor_type_t type : 8; +#if defined(KERNEL) + uint32_t pad_end; +#endif } mach_msg_port_descriptor_t; typedef struct @@ -255,6 +273,9 @@ typedef struct #if defined(__LP64__) mach_msg_size_t size; #endif +#if defined(KERNEL) && !defined(__LP64__) + uint32_t pad_end; +#endif } mach_msg_ool_descriptor_t; typedef struct @@ -290,6 +311,9 @@ typedef struct #if defined(__LP64__) mach_msg_size_t count; #endif +#if defined(KERNEL) && !defined(__LP64__) + uint32_t pad_end; +#endif } mach_msg_ool_ports_descriptor_t; /* @@ -297,6 +321,15 @@ typedef struct * appropriate in LP64 mode because not all descriptors * are of the same size in that environment. */ +#if defined(__LP64__) && defined(KERNEL) +typedef union +{ + mach_msg_port_descriptor_t port; + mach_msg_ool_descriptor32_t out_of_line; + mach_msg_ool_ports_descriptor32_t ool_ports; + mach_msg_type_descriptor_t type; +} mach_msg_descriptor_t; +#else typedef union { mach_msg_port_descriptor_t port; @@ -304,6 +337,7 @@ typedef union mach_msg_ool_ports_descriptor_t ool_ports; mach_msg_type_descriptor_t type; } mach_msg_descriptor_t; +#endif typedef struct { @@ -386,6 +420,17 @@ typedef struct audit_token_t msgh_audit; } mach_msg_audit_trailer_t; +typedef struct +{ + mach_msg_trailer_type_t msgh_trailer_type; + mach_msg_trailer_size_t msgh_trailer_size; + mach_port_seqno_t msgh_seqno; + security_token_t msgh_sender; + audit_token_t msgh_audit; + mach_vm_address_t msgh_context; +} mach_msg_context_trailer_t; + + typedef struct { mach_port_name_t sender; @@ -402,9 +447,10 @@ typedef struct mach_msg_trailer_size_t msgh_trailer_size; mach_port_seqno_t msgh_seqno; security_token_t msgh_sender; - audit_token_t msgh_audit; - msg_labels_t msgh_labels; + audit_token_t msgh_audit; + mach_vm_address_t msgh_context; int msgh_ad; + msg_labels_t msgh_labels; } mach_msg_mac_trailer_t; #define MACH_MSG_TRAILER_MINIMUM_SIZE sizeof(mach_msg_trailer_t) @@ -419,7 +465,7 @@ typedef struct * MAX_TRAILER_SIZE. */ typedef mach_msg_mac_trailer_t mach_msg_max_trailer_t; -#define MAX_TRAILER_SIZE sizeof(mach_msg_max_trailer_t) +#define MAX_TRAILER_SIZE ((mach_msg_size_t)sizeof(mach_msg_max_trailer_t)) /* * Legacy requirements keep us from ever updating these defines (even @@ -481,31 +527,6 @@ typedef union #define msgh_kind msgh_seqno #define mach_msg_kind_t mach_port_seqno_t -/* - * The msgt_number field specifies the number of data elements. - * The msgt_size field specifies the size of each data element, in bits. - * The msgt_name field specifies the type of each data element. - * If msgt_inline is TRUE, the data follows the type descriptor - * in the body of the message. If msgt_inline is FALSE, then a pointer - * to the data should follow the type descriptor, and the data is - * sent out-of-line. In this case, if msgt_deallocate is TRUE, - * then the out-of-line data is moved (instead of copied) into the message. - * If msgt_longform is TRUE, then the type descriptor is actually - * a mach_msg_type_long_t. - * - * The actual amount of inline data following the descriptor must - * a multiple of the word size. For out-of-line data, this is a - * pointer. For inline data, the supplied data size (calculated - * from msgt_number/msgt_size) is rounded up. This guarantees - * that type descriptors always fall on word boundaries. - * - * For port rights, msgt_size must be 8*sizeof(mach_port_t). - * If the data is inline, msgt_deallocate should be FALSE. - * The msgt_unused bit should be zero. - * The msgt_name, msgt_size, msgt_number fields in - * a mach_msg_type_long_t should be zero. - */ - typedef natural_t mach_msg_type_size_t; typedef natural_t mach_msg_type_number_t; @@ -575,16 +596,16 @@ typedef integer_t mach_msg_option_t; * which is equivalent to a mach_msg_trailer_t. * * XXXMAC: unlike the rest of the MACH_RCV_* flags, MACH_RCV_TRAILER_LABELS - * and MACH_RCV_TRAILER_AV need their own private bit since we only calculate - * their fields when absolutely required. This will cause us problems if - * Apple adds new trailers. + * needs its own private bit since we only calculate its fields when absolutely + * required. */ #define MACH_RCV_TRAILER_NULL 0 #define MACH_RCV_TRAILER_SEQNO 1 #define MACH_RCV_TRAILER_SENDER 2 #define MACH_RCV_TRAILER_AUDIT 3 -#define MACH_RCV_TRAILER_LABELS 4 -#define MACH_RCV_TRAILER_AV 8 +#define MACH_RCV_TRAILER_CTX 4 +#define MACH_RCV_TRAILER_AV 7 +#define MACH_RCV_TRAILER_LABELS 8 #define MACH_RCV_TRAILER_TYPE(x) (((x) & 0xf) << 28) #define MACH_RCV_TRAILER_ELEMENTS(x) (((x) & 0xf) << 24) @@ -593,12 +614,12 @@ typedef integer_t mach_msg_option_t; #define GET_RCV_ELEMENTS(y) (((y) >> 24) & 0xf) /* - * XXXMAC: note that in the case of MACH_RCV_TRAILER_AV and - * MACH_RCV_TRAILER_LABELS, we just fall through to mach_msg_max_trailer_t. + * XXXMAC: note that in the case of MACH_RCV_TRAILER_LABELS, + * we just fall through to mach_msg_max_trailer_t. * This is correct behavior since mach_msg_max_trailer_t is defined as - * mac_msg_mac_trailer_t which is used for the LABELS and AV trailers. - * It also makes things work properly if MACH_RCV_TRAILER_AV or - * MACH_RCV_TRAILER_LABELS are ORed with one of the other options. + * mac_msg_mac_trailer_t which is used for the LABELS trailer. + * It also makes things work properly if MACH_RCV_TRAILER_LABELS is ORed + * with one of the other options. */ #define REQUESTED_TRAILER_SIZE(y) \ ((mach_msg_trailer_size_t) \ @@ -610,7 +631,11 @@ typedef integer_t mach_msg_option_t; sizeof(mach_msg_security_trailer_t) : \ ((GET_RCV_ELEMENTS(y) == MACH_RCV_TRAILER_AUDIT) ? \ sizeof(mach_msg_audit_trailer_t) : \ - sizeof(mach_msg_max_trailer_t)))))) + ((GET_RCV_ELEMENTS(y) == MACH_RCV_TRAILER_CTX) ? \ + sizeof(mach_msg_context_trailer_t) : \ + ((GET_RCV_ELEMENTS(y) == MACH_RCV_TRAILER_AV) ? \ + sizeof(mach_msg_mac_trailer_t) : \ + sizeof(mach_msg_max_trailer_t)))))))) /* * Much code assumes that mach_msg_return_t == kern_return_t. @@ -755,6 +780,10 @@ extern mach_msg_return_t mach_msg( mach_msg_timeout_t timeout, mach_port_name_t notify); +#elif defined(MACH_KERNEL_PRIVATE) + +extern mach_msg_return_t mach_msg_receive_results(void); + #endif /* KERNEL */ __END_DECLS diff --git a/osfmk/mach/port.h b/osfmk/mach/port.h index 5e1186540..9db876f17 100644 --- a/osfmk/mach/port.h +++ b/osfmk/mach/port.h @@ -140,13 +140,21 @@ struct ipc_port ; typedef struct ipc_port *ipc_port_t; -#define IPC_PORT_NULL ((ipc_port_t) 0) -#define IPC_PORT_DEAD ((ipc_port_t)~0) +#define IPC_PORT_NULL ((ipc_port_t) 0UL) +#define IPC_PORT_DEAD ((ipc_port_t)~0UL) #define IPC_PORT_VALID(port) \ ((port) != IPC_PORT_NULL && (port) != IPC_PORT_DEAD) typedef ipc_port_t mach_port_t; +/* + * Since the 32-bit and 64-bit representations of ~0 are different, + * explicitly handle MACH_PORT_DEAD + */ + +#define CAST_MACH_PORT_TO_NAME(x) ((mach_port_name_t)(uintptr_t)(x)) +#define CAST_MACH_NAME_TO_PORT(x) ((x) == MACH_PORT_DEAD ? (mach_port_t)IPC_PORT_DEAD : (mach_port_t)(uintptr_t)(x)) + #else /* KERNEL */ /* diff --git a/osfmk/mach/ppc/machine_types.defs b/osfmk/mach/ppc/machine_types.defs index 018f7809e..f0d5c41f3 100644 --- a/osfmk/mach/ppc/machine_types.defs +++ b/osfmk/mach/ppc/machine_types.defs @@ -111,6 +111,16 @@ type mach_vm_size_t = uint64_t; #endif /* MACH_IPC_COMPAT */ +/* + * These are types used internal to Mach to implement the + * legacy 32-bit VM APIs published by the kernel. + */ +#define VM32_SUPPORT 1 + +type vm32_address_t = uint32_t; +type vm32_offset_t = uint32_t; +type vm32_size_t = uint32_t; + #endif /* _PPC_VM_TYPES_DEFS_ */ /* vim: set ft=c : */ diff --git a/osfmk/mach/ppc/sdt_isa.h b/osfmk/mach/ppc/sdt_isa.h index c10e3e604..558a12406 100644 --- a/osfmk/mach/ppc/sdt_isa.h +++ b/osfmk/mach/ppc/sdt_isa.h @@ -40,7 +40,7 @@ #if defined(KERNEL) /* - * For the kernel, set an explicit global label do the symbol can be located + * For the kernel, set an explicit global label so the symbol can be located */ #define DTRACE_LAB(p, n) \ "__dtrace_probe$" DTRACE_TOSTRING(__LINE__) DTRACE_STRINGIFY(_##p##___##n) diff --git a/osfmk/mach/ppc/vm_param.h b/osfmk/mach/ppc/vm_param.h index ec18cb693..ae7238f01 100644 --- a/osfmk/mach/ppc/vm_param.h +++ b/osfmk/mach/ppc/vm_param.h @@ -78,6 +78,7 @@ /* Kernel-wide values */ #define VM_MIN_KERNEL_ADDRESS ((vm_offset_t) 0x00001000U) +#define VM_MIN_KERNEL_AND_KEXT_ADDRESS VM_MIN_KERNEL_ADDRESS #define VM_MAX_KERNEL_ADDRESS ((vm_offset_t) 0xDFFFFFFFU) #define KERNEL_STACK_SIZE (4 * PPC_PGBYTES) #define INTSTACK_SIZE (5 * PPC_PGBYTES) @@ -88,7 +89,7 @@ #ifdef MACH_KERNEL_PRIVATE /* For implementing legacy 32-bit interfaces */ -#define VM32_SUPPORT +#define VM32_SUPPORT 1 #define VM32_MIN_ADDRESS ((vm32_offset_t) 0) #define VM32_MAX_ADDRESS ((vm32_offset_t) (VM_MAX_PAGE_ADDRESS & 0xFFFFFFFF)) diff --git a/osfmk/mach/ppc/vm_types.h b/osfmk/mach/ppc/vm_types.h index 8a3137bd2..0b3d39485 100644 --- a/osfmk/mach/ppc/vm_types.h +++ b/osfmk/mach/ppc/vm_types.h @@ -70,7 +70,7 @@ #ifndef ASSEMBLER #include -#include +#include #include /* @@ -133,7 +133,7 @@ typedef uint64_t vm_map_size_t; #ifdef MACH_KERNEL_PRIVATE -#ifdef VM32_SUPPORT +#if VM32_SUPPORT /* * These are types used internal to Mach to implement the diff --git a/osfmk/mach/semaphore.h b/osfmk/mach/semaphore.h index f90bbcd24..36ba3d00f 100644 --- a/osfmk/mach/semaphore.h +++ b/osfmk/mach/semaphore.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2005 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2008 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -54,19 +54,62 @@ __BEGIN_DECLS extern kern_return_t semaphore_signal (semaphore_t semaphore); extern kern_return_t semaphore_signal_all (semaphore_t semaphore); + +extern kern_return_t semaphore_wait (semaphore_t semaphore); + +#ifdef KERNEL + +#ifdef __LP64__ + +#ifdef KERNEL_PRIVATE + +extern kern_return_t semaphore_timedwait (semaphore_t semaphore, + mach_timespec_t wait_time); + +#endif /* KERNEL_PRIVATE */ + +#else /* __LP64__ */ + +extern kern_return_t semaphore_timedwait (semaphore_t semaphore, + mach_timespec_t wait_time); + +#endif /* __LP64__ */ + +extern kern_return_t semaphore_wait_deadline (semaphore_t semaphore, + uint64_t deadline); +extern kern_return_t semaphore_wait_noblock (semaphore_t semaphore); + +#ifdef XNU_KERNEL_PRIVATE + +extern kern_return_t semaphore_wait_signal (semaphore_t wait_semaphore, + semaphore_t signal_semaphore); + +extern kern_return_t semaphore_timedwait_signal(semaphore_t wait_semaphore, + semaphore_t signal_semaphore, + mach_timespec_t wait_time); + extern kern_return_t semaphore_signal_thread (semaphore_t semaphore, thread_t thread); -extern kern_return_t semaphore_wait (semaphore_t semaphore); +#endif /* XNU_KERNEL_PRIVATE */ + +#else /* KERNEL */ + extern kern_return_t semaphore_timedwait (semaphore_t semaphore, - mach_timespec_t wait_time); + mach_timespec_t wait_time); + +extern kern_return_t semaphore_timedwait_signal(semaphore_t wait_semaphore, + semaphore_t signal_semaphore, + mach_timespec_t wait_time); extern kern_return_t semaphore_wait_signal (semaphore_t wait_semaphore, semaphore_t signal_semaphore); -extern kern_return_t semaphore_timedwait_signal(semaphore_t wait_semaphore, - semaphore_t signal_semaphore, - mach_timespec_t wait_time); +extern kern_return_t semaphore_signal_thread (semaphore_t semaphore, + thread_t thread); + +#endif /* KERNEL */ + __END_DECLS #ifdef PRIVATE diff --git a/osfmk/mach/shared_memory_server.h b/osfmk/mach/shared_memory_server.h index d2ba99b45..ea7b9127f 100644 --- a/osfmk/mach/shared_memory_server.h +++ b/osfmk/mach/shared_memory_server.h @@ -45,6 +45,8 @@ #ifndef _MACH_SHARED_MEMORY_SERVER_H_ #define _MACH_SHARED_MEMORY_SERVER_H_ +#warning " is deprecated. Please use instead." + #include #include #include diff --git a/osfmk/mach/shared_region.h b/osfmk/mach/shared_region.h index ab5a722e1..1e2143e1a 100644 --- a/osfmk/mach/shared_region.h +++ b/osfmk/mach/shared_region.h @@ -39,15 +39,15 @@ #define SHARED_REGION_SIZE_I386 0x20000000ULL #define SHARED_REGION_NESTING_BASE_I386 0x90000000ULL #define SHARED_REGION_NESTING_SIZE_I386 0x20000000ULL -#define SHARED_REGION_NESTING_MIN_I386 0x00020000ULL -#define SHARED_REGION_NESTING_MAX_I386 0xFFFE0000ULL +#define SHARED_REGION_NESTING_MIN_I386 0x00200000ULL +#define SHARED_REGION_NESTING_MAX_I386 0xFFE00000ULL -#define SHARED_REGION_BASE_X86_64 0x00007FFF60000000ULL -#define SHARED_REGION_SIZE_X86_64 0x000000009FE00000ULL -#define SHARED_REGION_NESTING_BASE_X86_64 0x00007FFF60000000ULL -#define SHARED_REGION_NESTING_SIZE_X86_64 0x000000009FE00000ULL -#define SHARED_REGION_NESTING_MIN_X86_64 0x0000000000020000ULL -#define SHARED_REGION_NESTING_MAX_X86_64 0xFFFFFFFFFFFE0000ULL +#define SHARED_REGION_BASE_X86_64 0x00007FFF70000000ULL +#define SHARED_REGION_SIZE_X86_64 0x000000008FE00000ULL +#define SHARED_REGION_NESTING_BASE_X86_64 0x00007FFF80000000ULL +#define SHARED_REGION_NESTING_SIZE_X86_64 0x0000000040000000ULL +#define SHARED_REGION_NESTING_MIN_X86_64 0x0000000000200000ULL +#define SHARED_REGION_NESTING_MAX_X86_64 0xFFFFFFFFFFE00000ULL #define SHARED_REGION_BASE_PPC 0x90000000ULL #define SHARED_REGION_SIZE_PPC 0x20000000ULL @@ -70,6 +70,48 @@ #define SHARED_REGION_NESTING_MIN_ARM ? #define SHARED_REGION_NESTING_MAX_ARM ? +#if defined(__i386__) +#define SHARED_REGION_BASE SHARED_REGION_BASE_I386 +#define SHARED_REGION_SIZE SHARED_REGION_SIZE_I386 +#define SHARED_REGION_NESTING_BASE SHARED_REGION_NESTING_BASE_I386 +#define SHARED_REGION_NESTING_SIZE SHARED_REGION_NESTING_SIZE_I386 +#define SHARED_REGION_NESTING_MIN SHARED_REGION_NESTING_MIN_I386 +#define SHARED_REGION_NESTING_MAX SHARED_REGION_NESTING_MAX_I386 +#elif defined(__x86_64__) +#define SHARED_REGION_BASE SHARED_REGION_BASE_X86_64 +#define SHARED_REGION_SIZE SHARED_REGION_SIZE_X86_64 +#define SHARED_REGION_NESTING_BASE SHARED_REGION_NESTING_BASE_X86_64 +#define SHARED_REGION_NESTING_SIZE SHARED_REGION_NESTING_SIZE_X86_64 +#define SHARED_REGION_NESTING_MIN SHARED_REGION_NESTING_MIN_X86_64 +#define SHARED_REGION_NESTING_MAX SHARED_REGION_NESTING_MAX_X86_64 +#elif defined(__ppc__) +#define SHARED_REGION_BASE SHARED_REGION_BASE_PPC +#define SHARED_REGION_SIZE SHARED_REGION_SIZE_PPC +#define SHARED_REGION_NESTING_BASE SHARED_REGION_NESTING_BASE_PPC +#define SHARED_REGION_NESTING_SIZE SHARED_REGION_NESTING_SIZE_PPC +#define SHARED_REGION_NESTING_MIN SHARED_REGION_NESTING_MIN_PPC +#define SHARED_REGION_NESTING_MAX SHARED_REGION_NESTING_MAX_PPC +#elif defined(__ppc64__) +#define SHARED_REGION_BASE SHARED_REGION_BASE_PPC64 +#define SHARED_REGION_SIZE SHARED_REGION_SIZE_PPC64 +#define SHARED_REGION_NESTING_BASE SHARED_REGION_NESTING_BASE_PPC64 +#define SHARED_REGION_NESTING_SIZE SHARED_REGION_NESTING_SIZE_PPC64 +#define SHARED_REGION_NESTING_MIN SHARED_REGION_NESTING_MIN_PPC64 +#define SHARED_REGION_NESTING_MAX SHARED_REGION_NESTING_MAX_PPC64 +#endif + +#ifdef KERNEL_PRIVATE + +/* + * This is routine sets the current source of power. + * Arguments: + * 0 if it is external source (connected to power ) + * 1 if it is internal power source ie battery + */ + +void post_sys_powersource(int); + +#endif /* KERNEL_PRIVATE */ /* * All shared_region_* declarations are a private interface * between dyld and the kernel. diff --git a/osfmk/mach/syscall_sw.h b/osfmk/mach/syscall_sw.h index aa7508d76..11e9211f8 100644 --- a/osfmk/mach/syscall_sw.h +++ b/osfmk/mach/syscall_sw.h @@ -92,7 +92,6 @@ kernel_trap(semaphore_timedwait_trap,-38,3) kernel_trap(semaphore_timedwait_signal_trap,-39,4) #if !defined(__LP64__) -kernel_trap(init_process,-41,0) kernel_trap(map_fd,-43,5) #endif /* __LP64__ */ @@ -100,13 +99,16 @@ kernel_trap(task_name_for_pid,-44,3) kernel_trap(task_for_pid,-45,3) kernel_trap(pid_for_task,-46,2) -#if !defined(__LP64__) +#if defined(__LP64__) kernel_trap(macx_swapon,-48, 4) kernel_trap(macx_swapoff,-49, 2) +#else /* __LP64__ */ +kernel_trap(macx_swapon,-48, 5) +kernel_trap(macx_swapoff,-49, 3) +#endif /* __LP64__ */ kernel_trap(macx_triggers,-51, 4) kernel_trap(macx_backing_store_suspend,-52, 1) kernel_trap(macx_backing_store_recovery,-53, 1) -#endif /* __LP64__ */ /* These are currently used by pthreads even on LP64 */ /* But as soon as that is fixed - they will go away there */ diff --git a/osfmk/mach/task.defs b/osfmk/mach/task.defs index d8f69f47c..ceebc9529 100644 --- a/osfmk/mach/task.defs +++ b/osfmk/mach/task.defs @@ -371,4 +371,23 @@ routine task_set_policy( limit : policy_limit_t; change : boolean_t); +/* + * Read the selected state which is to be installed on new + * threads in the task as they are created. + */ +routine task_get_state( + task : task_t; + flavor : thread_state_flavor_t; + out old_state : thread_state_t, CountInOut); + +/* + * Set the selected state information to be installed on + * all subsequently created threads in the task. + */ +routine task_set_state( + task : task_t; + flavor : thread_state_flavor_t; + new_state : thread_state_t); + + /* vim: set ft=c : */ diff --git a/osfmk/mach/task_access.defs b/osfmk/mach/task_access.defs index 7035dbece..1696fd3cf 100644 --- a/osfmk/mach/task_access.defs +++ b/osfmk/mach/task_access.defs @@ -49,4 +49,11 @@ routine check_task_access( target_pid : int32_t; ServerAuditToken caller_cred : audit_token_t); +/* + * Search for a code signature for unsigned executables + */ +routine find_code_signature( + task_access_port : mach_port_t; + new_pid : int32_t); + /* vim: set ft=c : */ diff --git a/osfmk/mach/task_info.h b/osfmk/mach/task_info.h index 1a1b1ec95..cab9c1757 100644 --- a/osfmk/mach/task_info.h +++ b/osfmk/mach/task_info.h @@ -217,6 +217,17 @@ typedef struct task_affinity_tag_info *task_affinity_tag_info_t; #define TASK_AFFINITY_TAG_INFO_COUNT \ (sizeof(task_affinity_tag_info_data_t) / sizeof(natural_t)) +#define TASK_DYLD_INFO 17 /* This is experimental. */ + +struct task_dyld_info { + mach_vm_address_t all_image_info_addr; + mach_vm_size_t all_image_info_size; +}; +typedef struct task_dyld_info task_dyld_info_data_t; +typedef struct task_dyld_info *task_dyld_info_t; +#define TASK_DYLD_INFO_COUNT \ + (sizeof(task_dyld_info_data_t) / sizeof(natural_t)) + #pragma pack() diff --git a/osfmk/mach/task_special_ports.h b/osfmk/mach/task_special_ports.h index 99cd63450..ec980cfe2 100644 --- a/osfmk/mach/task_special_ports.h +++ b/osfmk/mach/task_special_ports.h @@ -92,8 +92,6 @@ typedef int task_special_port_t; #define TASK_ACCESS_PORT 9 /* Permission check for task_for_pid. */ -#define TASK_AUTOMOUNTD_PORT 10 /* Port to talk to the automounter */ - #define task_get_wired_ledger_port(task, port) \ (task_get_special_port((task), TASK_WIRED_LEDGER_PORT, (port))) @@ -140,10 +138,4 @@ typedef int task_special_port_t; #define task_set_task_access_port(task, port) \ (task_set_special_port((task), TASK_ACCESS_PORT, (port))) -#define task_get_automountd_port(task, port) \ - (task_get_special_port((task), TASK_AUTOMOUNTD_PORT, (port))) - -#define task_set_automountd_port(task, port) \ - (task_set_special_port((task), TASK_AUTOMOUNTD_PORT, (port))) - #endif /* _MACH_TASK_SPECIAL_PORTS_H_ */ diff --git a/osfmk/mach/thread_info.h b/osfmk/mach/thread_info.h index e59b3d71a..5f51aeade 100644 --- a/osfmk/mach/thread_info.h +++ b/osfmk/mach/thread_info.h @@ -106,6 +106,19 @@ typedef struct thread_basic_info *thread_basic_info_t; #define THREAD_BASIC_INFO_COUNT ((mach_msg_type_number_t) \ (sizeof(thread_basic_info_data_t) / sizeof(natural_t))) +#define THREAD_IDENTIFIER_INFO 4 /* thread id and other information */ + +struct thread_identifier_info { + uint64_t thread_id; /* system-wide unique 64-bit thread id */ + uint64_t thread_handle; /* handle to be used by libproc */ + uint64_t dispatch_qaddr; /* libdispatch queue address */ +}; + +typedef struct thread_identifier_info thread_identifier_info_data_t; +typedef struct thread_identifier_info *thread_identifier_info_t; +#define THREAD_IDENTIFIER_INFO_COUNT ((mach_msg_type_number_t) \ + (sizeof(thread_identifier_info_data_t) / sizeof(natural_t))) + /* * Scale factor for usage field. */ diff --git a/osfmk/mach/vm32_map.defs b/osfmk/mach/vm32_map.defs new file mode 100644 index 000000000..d28be8de8 --- /dev/null +++ b/osfmk/mach/vm32_map.defs @@ -0,0 +1,284 @@ +/* + * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. The rights granted to you under the License + * may not be used to create, or enable the creation or redistribution of, + * unlawful or unlicensed copies of an Apple operating system, or to + * circumvent, violate, or enable the circumvention or violation of, any + * terms of an Apple operating system software license agreement. + * + * Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ + */ +/* + * @OSF_FREE_COPYRIGHT@ + */ +/* + * Mach Operating System + * Copyright (c) 1991,1990,1989 Carnegie Mellon University + * All Rights Reserved. + * + * Permission to use, copy, modify and distribute this software and its + * documentation is hereby granted, provided that both the copyright + * notice and this permission notice appear in all copies of the + * software, derivative works or modified versions, and any portions + * thereof, and that both notices appear in supporting documentation. + * + * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" + * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR + * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. + * + * Carnegie Mellon requests users of this software to return to + * + * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU + * School of Computer Science + * Carnegie Mellon University + * Pittsburgh PA 15213-3890 + * + * any improvements or extensions that they make and grant Carnegie Mellon + * the rights to redistribute these changes. + */ +/* + */ +/* + * File: mach/vm_map.defs + * + * Exported kernel VM calls for 32-bit client tasks. + */ + +subsystem +#if KERNEL_SERVER + KernelServer +#endif /* KERNEL_SERVER */ + map 3800; + +serverprefix vm32_; + +#include +#include +#include + +#if VM32_SUPPORT + +/* See vm_map.defs for more information */ + +routine region( + target_task : vm_map_t; + inout address : vm32_address_t; + out size : vm32_size_t; + flavor : vm_region_flavor_t; + out info : vm_region_info_t, CountInOut; + out object_name : memory_object_name_t = + MACH_MSG_TYPE_MOVE_SEND + ctype: mach_port_t); + +routine allocate( + target_task : vm_task_entry_t; + inout address : vm32_address_t; + size : vm32_size_t; + flags : int); + +routine deallocate( + target_task : vm_task_entry_t; + address : vm32_address_t; + size : vm32_size_t); + +routine protect( + target_task : vm_task_entry_t; + address : vm32_address_t; + size : vm32_size_t; + set_maximum : boolean_t; + new_protection : vm_prot_t); + +routine inherit( + target_task : vm_task_entry_t; + address : vm32_address_t; + size : vm32_size_t; + new_inheritance : vm_inherit_t); + +routine read( + target_task : vm_map_t; + address : vm32_address_t; + size : vm32_size_t; + out data : pointer_t); + +routine read_list( + target_task : vm_map_t; + inout data_list : vm32_read_entry_t; + count : natural_t); + +routine write( + target_task : vm_map_t; + address : vm32_address_t; + data : pointer_t); + +routine copy( + target_task : vm_map_t; + source_address : vm32_address_t; + size : vm32_size_t; + dest_address : vm32_address_t); + +routine read_overwrite( + target_task : vm_map_t; + address : vm32_address_t; + size : vm32_size_t; + data : vm32_address_t; + out outsize : vm32_size_t); + + +routine msync( + target_task : vm_map_t; + address : vm32_address_t; + size : vm32_size_t; + sync_flags : vm_sync_t ); + +routine behavior_set( + target_task : vm_map_t; + address : vm32_address_t; + size : vm32_size_t; + new_behavior : vm_behavior_t); + +routine map( + target_task : vm_task_entry_t; + inout address : vm32_address_t; + size : vm32_size_t; + mask : vm32_address_t; + flags : int; + object : mem_entry_name_port_t; + offset : vm32_offset_t; + copy : boolean_t; + cur_protection : vm_prot_t; + max_protection : vm_prot_t; + inheritance : vm_inherit_t); + +routine machine_attribute( + target_task : vm_map_t; + address : vm32_address_t; + size : vm32_size_t; + attribute : vm_machine_attribute_t; + inout value : vm_machine_attribute_val_t); + +routine remap( + target_task : vm_map_t; + inout target_address : vm32_address_t; + size : vm32_size_t; + mask : vm32_address_t; + anywhere : boolean_t; + src_task : vm_map_t; + src_address : vm32_address_t; + copy : boolean_t; + out cur_protection : vm_prot_t; + out max_protection : vm_prot_t; + inheritance : vm_inherit_t); + +routine _task_wire( + target_task : vm_map_t; + must_wire : boolean_t); + +routine make_memory_entry( + target_task :vm_map_t; + inout size :vm32_size_t; + offset :vm32_offset_t; + permission :vm_prot_t; + out object_handle :mem_entry_name_port_move_send_t; + parent_entry :mem_entry_name_port_t); + +routine map_page_query( + target_map :vm_map_t; + offset :vm32_offset_t; + out disposition :integer_t; + out ref_count :integer_t); + +routine region_info( + task : vm_map_t; + address : vm32_address_t; + out region : vm_info_region_t; + out objects : vm_info_object_array_t); + +routine mapped_pages_info( + task : vm_map_t; + out pages : page_address_array_t); + +skip; /* was vm_region_object_create */ + +routine region_recurse( + target_task : vm_map_t; + inout address : vm32_address_t; + out size : vm32_size_t; + inout nesting_depth : natural_t; + out info : vm_region_recurse_info_t,CountInOut); + +routine region_recurse_64( + target_task : vm_map_t; + inout address : vm32_address_t; + out size : vm32_size_t; + inout nesting_depth : natural_t; + out info : vm_region_recurse_info_t,CountInOut); + +routine region_info_64( + task : vm_map_t; + address : vm32_address_t; + out region : vm_info_region_64_t; + out objects : vm_info_object_array_t); + +routine region_64( + target_task : vm_map_t; + inout address : vm32_address_t; + out size : vm32_size_t; + flavor : vm_region_flavor_t; + out info : vm_region_info_t, CountInOut; + out object_name : memory_object_name_t = + MACH_MSG_TYPE_MOVE_SEND + ctype: mach_port_t); + +routine make_memory_entry_64( + target_task :vm_map_t; + inout size :memory_object_size_t; + offset :memory_object_offset_t; + permission :vm_prot_t; + out object_handle :mach_port_move_send_t; + parent_entry :mem_entry_name_port_t); + + + +routine map_64( + target_task : vm_task_entry_t; + inout address : vm32_address_t; + size : vm32_size_t; + mask : vm32_address_t; + flags : int; + object : mem_entry_name_port_t; + offset : memory_object_offset_t; + copy : boolean_t; + cur_protection : vm_prot_t; + max_protection : vm_prot_t; + inheritance : vm_inherit_t); + +skip; /* was vm_map_get_upl */ +skip; /* was vm_upl_map */ +skip; /* was vm_upl_unmap */ + +routine purgable_control( + target_task : vm_map_t; + address : vm32_address_t; + control : vm_purgable_t; + inout state : int); + +#endif /* VM32_SUPPORT */ + +/* vim: set ft=c : */ diff --git a/osfmk/mach/vm_behavior.h b/osfmk/mach/vm_behavior.h index 0d12ceef6..ec0ee729c 100644 --- a/osfmk/mach/vm_behavior.h +++ b/osfmk/mach/vm_behavior.h @@ -54,11 +54,25 @@ typedef int vm_behavior_t; */ +/* + * The following behaviors affect the memory region's future behavior + * and are stored in the VM map entry data structure. + */ #define VM_BEHAVIOR_DEFAULT ((vm_behavior_t) 0) /* default */ #define VM_BEHAVIOR_RANDOM ((vm_behavior_t) 1) /* random */ #define VM_BEHAVIOR_SEQUENTIAL ((vm_behavior_t) 2) /* forward sequential */ #define VM_BEHAVIOR_RSEQNTL ((vm_behavior_t) 3) /* reverse sequential */ + +/* + * The following "behaviors" affect the memory region only at the time of the + * call and are not stored in the VM map entry. + */ #define VM_BEHAVIOR_WILLNEED ((vm_behavior_t) 4) /* will need in near future */ #define VM_BEHAVIOR_DONTNEED ((vm_behavior_t) 5) /* dont need in near future */ +#define VM_BEHAVIOR_FREE ((vm_behavior_t) 6) /* free memory without write-back */ +#define VM_BEHAVIOR_ZERO_WIRED_PAGES ((vm_behavior_t) 7) /* zero out the wired pages of an entry if it is being deleted without unwiring them first */ +#define VM_BEHAVIOR_REUSABLE ((vm_behavior_t) 8) +#define VM_BEHAVIOR_REUSE ((vm_behavior_t) 9) +#define VM_BEHAVIOR_CAN_REUSE ((vm_behavior_t) 10) #endif /*_MACH_VM_BEHAVIOR_H_*/ diff --git a/osfmk/mach/vm_param.h b/osfmk/mach/vm_param.h index 13078d58e..468920caa 100644 --- a/osfmk/mach/vm_param.h +++ b/osfmk/mach/vm_param.h @@ -96,6 +96,9 @@ #define atop_64(x) ((uint64_t)(x) >> PAGE_SHIFT) #define ptoa_64(x) ((uint64_t)(x) << PAGE_SHIFT) +#define atop_kernel(x) ((vm_address_t)(x) >> PAGE_SHIFT) +#define ptoa_kernel(x) ((vm_address_t)(x) << PAGE_SHIFT) + /* * While the following block is enabled, the legacy atop and ptoa * macros will behave correctly. If not, they will generate @@ -124,8 +127,8 @@ * address space size) VM types. */ -#define round_page(x) (((vm_offset_t)(x) + PAGE_MASK) & ~((signed)PAGE_MASK)) -#define trunc_page(x) ((vm_offset_t)(x) & ~((signed)PAGE_MASK)) +#define round_page(x) (((vm_offset_t)(x) + PAGE_MASK) & ~((vm_offset_t)PAGE_MASK)) +#define trunc_page(x) ((vm_offset_t)(x) & ~((vm_offset_t)PAGE_MASK)) /* * Round off or truncate to the nearest page. These will work @@ -139,11 +142,10 @@ * associated with the specific VM type should be used. */ -#define round_page_32(x) (((uint32_t)(x) + PAGE_MASK) & ~((signed)PAGE_MASK)) -#define trunc_page_32(x) ((uint32_t)(x) & ~((signed)PAGE_MASK)) -#define round_page_64(x) (((uint64_t)(x) + PAGE_MASK_64) & ~((signed)PAGE_MASK_64)) -#define trunc_page_64(x) ((uint64_t)(x) & ~((signed)PAGE_MASK_64)) - +#define round_page_32(x) (((uint32_t)(x) + PAGE_MASK) & ~((uint32_t)PAGE_MASK)) +#define trunc_page_32(x) ((uint32_t)(x) & ~((uint32_t)PAGE_MASK)) +#define round_page_64(x) (((uint64_t)(x) + PAGE_MASK_64) & ~((uint64_t)PAGE_MASK_64)) +#define trunc_page_64(x) ((uint64_t)(x) & ~((uint64_t)PAGE_MASK_64)) /* * Enable the following block to find uses of xxx_32 macros that should @@ -209,11 +211,26 @@ * an exact page multiple. */ -#define page_aligned(x) ((((vm_object_offset_t) (x)) & PAGE_MASK) == 0) +#define page_aligned(x) (((x) & PAGE_MASK) == 0) extern vm_size_t mem_size; /* 32-bit size of memory - limited by maxmem - deprecated */ extern uint64_t max_mem; /* 64-bit size of memory - limited by maxmem */ +/* + * The default pager does not handle 64-bit offsets inside its objects, + * so this limits the size of anonymous memory objects to 4GB minus 1 page. + * When we need to allocate a chunk of anonymous memory over that size, + * we have to allocate more than one chunk. + */ +#define ANON_MAX_SIZE 0xFFFFF000ULL +/* + * Work-around for + * Break large anonymous memory areas into 128MB chunks to alleviate + * the cost of copying when copy-on-write is not possible because a small + * portion of it being wired. + */ +#define ANON_CHUNK_SIZE (128ULL * 1024 * 1024) /* 128MB */ + #ifdef XNU_KERNEL_PRIVATE extern uint64_t mem_actual; /* 64-bit size of memory - not limited by maxmem */ @@ -230,16 +247,19 @@ extern vm_size_t page_mask; extern int page_shift; /* We need a way to get rid of compiler warnings when we cast from */ -/* a 64 bit value to an address that is 32 bits. */ -/* We know at this point the cast is harmless but sometime in */ -/* the future it may not be. */ -/* When size of an int is no longer equal to size of uintptr_t then */ -/* the compile will fail and we know we need to fix our cast. */ +/* a 64 bit value to an address (which may be 32 bits or 64-bits). */ +/* An intptr_t is used convert the value to the right precision, and */ +/* then to an address. This macro is also used to convert addresses */ +/* to 32-bit integers, which is a hard failure for a 64-bit kernel */ #include #ifndef __CAST_DOWN_CHECK #define __CAST_DOWN_CHECK -typedef char __NEED_TO_CHANGE_CAST_DOWN[ sizeof(uintptr_t) == sizeof(int) ? 0 : -1 ]; -#define CAST_DOWN( type, addr ) ( ((type)((uintptr_t) (addr))) ) + +#define CAST_DOWN( type, addr ) \ + ( ((type)((uintptr_t) (addr)/(sizeof(type) < sizeof(uintptr_t) ? 0 : 1))) ) + +#define CAST_DOWN_EXPLICIT( type, addr ) ( ((type)((uintptr_t) (addr))) ) + #endif /* __CAST_DOWN_CHECK */ #endif /* ASSEMBLER */ diff --git a/osfmk/mach/vm_purgable.h b/osfmk/mach/vm_purgable.h index 32463755b..2d8ca3038 100644 --- a/osfmk/mach/vm_purgable.h +++ b/osfmk/mach/vm_purgable.h @@ -57,6 +57,12 @@ typedef int vm_purgable_t; */ #define VM_PURGABLE_SET_STATE ((vm_purgable_t) 0) /* set state of purgeable object */ #define VM_PURGABLE_GET_STATE ((vm_purgable_t) 1) /* get state of purgeable object */ +#define VM_PURGABLE_PURGE_ALL ((vm_purgable_t) 2) /* purge all volatile objects now */ + +#define VM_PURGABLE_DEBUG_SHIFT 12 +#define VM_PURGABLE_DEBUG_MASK (0x3 << VM_PURGABLE_DEBUG_SHIFT) +#define VM_PURGABLE_DEBUG_EMPTY (0x1 << VM_PURGABLE_DEBUG_SHIFT) +#define VM_PURGABLE_DEBUG_FAULT (0x2 << VM_PURGABLE_DEBUG_SHIFT) /* * Volatile memory ordering groups (group zero objects are purged before group 1, etc... @@ -119,4 +125,10 @@ typedef int vm_purgable_t; #define VM_PURGABLE_EMPTY 2 /* purgeable object is volatile and empty */ #define VM_PURGABLE_DENY 3 /* (mark) object not purgeable */ +#define VM_PURGABLE_ALL_MASKS (VM_PURGABLE_STATE_MASK | \ + VM_VOLATILE_ORDER_MASK | \ + VM_PURGABLE_ORDERING_MASK | \ + VM_PURGABLE_BEHAVIOR_MASK | \ + VM_VOLATILE_GROUP_MASK | \ + VM_PURGABLE_DEBUG_MASK) #endif /* _MACH_VM_PURGABLE_H_ */ diff --git a/osfmk/mach/vm_region.h b/osfmk/mach/vm_region.h index f8b8ec650..ceb42b7b5 100644 --- a/osfmk/mach/vm_region.h +++ b/osfmk/mach/vm_region.h @@ -52,6 +52,11 @@ #pragma pack(4) +// LP64todo: all the current tools are 32bit, obviously never worked for 64b +// so probably should be a real 32b ID vs. ptr. +// Current users just check for equality +typedef uint32_t vm32_object_id_t; + /* * Types defined: * @@ -204,7 +209,7 @@ struct vm_region_submap_info { unsigned char share_mode; /* see enumeration */ boolean_t is_submap; /* submap vs obj */ vm_behavior_t behavior; /* access behavior hint */ - vm_offset_t object_id; /* obj/map name, not a handle */ + vm32_object_id_t object_id; /* obj/map name, not a handle */ unsigned short user_wired_count; }; @@ -230,7 +235,7 @@ struct vm_region_submap_info_64 { unsigned char share_mode; /* see enumeration */ boolean_t is_submap; /* submap vs obj */ vm_behavior_t behavior; /* access behavior hint */ - vm_offset_t object_id; /* obj/map name, not a handle */ + vm32_object_id_t object_id; /* obj/map name, not a handle */ unsigned short user_wired_count; }; @@ -252,7 +257,7 @@ struct vm_region_submap_short_info_64 { unsigned char share_mode; /* see enumeration */ boolean_t is_submap; /* submap vs obj */ vm_behavior_t behavior; /* access behavior hint */ - vm_offset_t object_id; /* obj/map name, not a handle */ + vm32_object_id_t object_id; /* obj/map name, not a handle */ unsigned short user_wired_count; }; @@ -273,11 +278,43 @@ struct vm_read_entry { vm_size_t size; }; +#if VM32_SUPPORT +struct vm32_read_entry { + vm32_address_t address; + vm32_size_t size; +}; +#endif + + #define VM_MAP_ENTRY_MAX (256) typedef struct mach_vm_read_entry mach_vm_read_entry_t[VM_MAP_ENTRY_MAX]; typedef struct vm_read_entry vm_read_entry_t[VM_MAP_ENTRY_MAX]; +#if VM32_SUPPORT +typedef struct vm32_read_entry vm32_read_entry_t[VM_MAP_ENTRY_MAX]; +#endif #pragma pack() + +#define VM_PAGE_INFO_MAX +typedef int *vm_page_info_t; +typedef int vm_page_info_data_t[VM_PAGE_INFO_MAX]; +typedef int vm_page_info_flavor_t; + +#define VM_PAGE_INFO_BASIC 1 +struct vm_page_info_basic { + int disposition; + int ref_count; + vm_object_id_t object_id; + memory_object_offset_t offset; + int depth; +}; +typedef struct vm_page_info_basic *vm_page_info_basic_t; +typedef struct vm_page_info_basic vm_page_info_basic_data_t; + +#define VM_PAGE_INFO_BASIC_COUNT ((mach_msg_type_number_t) \ + (sizeof(vm_page_info_basic_data_t)/sizeof(int))) + + #endif /*_MACH_VM_REGION_H_*/ diff --git a/osfmk/mach/vm_statistics.h b/osfmk/mach/vm_statistics.h index f0bdd1a47..36a79fdd1 100644 --- a/osfmk/mach/vm_statistics.h +++ b/osfmk/mach/vm_statistics.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2007 Apple Inc. All rights reserved. + * Copyright (c) 2000-2009 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -68,6 +68,19 @@ #include + +/* + * vm_statistics + * + * History: + * rev0 - original structure. + * rev1 - added purgable info (purgable_count and purges). + * rev2 - added speculative_count. + * + * Note: you cannot add any new fields to this structure. Add them below in + * vm_statistics64. + */ + struct vm_statistics { natural_t free_count; /* # of pages free */ natural_t active_count; /* # of pages active */ @@ -96,9 +109,76 @@ struct vm_statistics { natural_t speculative_count; /* # of pages speculative */ }; +/* Used by all architectures */ typedef struct vm_statistics *vm_statistics_t; typedef struct vm_statistics vm_statistics_data_t; +#if defined(__ppc__) /* On ppc, vm statistics are still 32-bit */ + +typedef struct vm_statistics *vm_statistics64_t; +typedef struct vm_statistics vm_statistics64_data_t; + +#define VM_STATISTICS_TRUNCATE_TO_32_BIT(value) value + +#else /* !(defined(__ppc__)) */ + +/* + * vm_statistics64 + * + * History: + * rev0 - original structure. + * rev1 - added purgable info (purgable_count and purges). + * rev2 - added speculative_count. + * ---- + * rev3 - changed name to vm_statistics64. + * changed some fields in structure to 64-bit on + * arm, i386 and x86_64 architectures. + * + */ + +struct vm_statistics64 { + natural_t free_count; /* # of pages free */ + natural_t active_count; /* # of pages active */ + natural_t inactive_count; /* # of pages inactive */ + natural_t wire_count; /* # of pages wired down */ + uint64_t zero_fill_count; /* # of zero fill pages */ + uint64_t reactivations; /* # of pages reactivated */ + uint64_t pageins; /* # of pageins */ + uint64_t pageouts; /* # of pageouts */ + uint64_t faults; /* # of faults */ + uint64_t cow_faults; /* # of copy-on-writes */ + uint64_t lookups; /* object cache lookups */ + uint64_t hits; /* object cache hits */ + + /* added for rev1 */ + uint64_t purges; /* # of pages purged */ + natural_t purgeable_count; /* # of pages purgeable */ + + /* added for rev2 */ + /* + * NB: speculative pages are already accounted for in "free_count", + * so "speculative_count" is the number of "free" pages that are + * used to hold data that was read speculatively from disk but + * haven't actually been used by anyone so far. + */ + natural_t speculative_count; /* # of pages speculative */ + +} +; + +typedef struct vm_statistics64 *vm_statistics64_t; +typedef struct vm_statistics64 vm_statistics64_data_t; + +/* + * VM_STATISTICS_TRUNCATE_TO_32_BIT + * + * This is used by host_statistics() to truncate and peg the 64-bit in-kernel values from + * vm_statistics64 to the 32-bit values of the older structure above (vm_statistics). + */ +#define VM_STATISTICS_TRUNCATE_TO_32_BIT(value) ((uint32_t)(((value) > UINT32_MAX ) ? UINT32_MAX : (value))) + +#endif /* !(defined(__ppc__)) */ + /* included for the vm_map_page_query call */ @@ -109,6 +189,7 @@ typedef struct vm_statistics vm_statistics_data_t; #define VM_PAGE_QUERY_PAGE_PAGED_OUT 0x10 #define VM_PAGE_QUERY_PAGE_COPIED 0x20 #define VM_PAGE_QUERY_PAGE_SPECULATIVE 0x40 +#define VM_PAGE_QUERY_PAGE_EXTERNAL 0x80 #define VM_PAGE_QUERY_PAGE_CS_VALIDATED 0x100 #define VM_PAGE_QUERY_PAGE_CS_TAINTED 0x200 @@ -163,16 +244,32 @@ typedef struct pmap_statistics *pmap_statistics_t; #define VM_FLAGS_ANYWHERE 0x0001 #define VM_FLAGS_PURGABLE 0x0002 #define VM_FLAGS_NO_CACHE 0x0010 - #ifdef KERNEL_PRIVATE -#define VM_FLAGS_NO_PMAP_CHECK 0x8000 /* do not check that pmap is empty */ -#define VM_FLAGS_OVERWRITE 0x4000 /* delete any existing mappings first */ -#define VM_FLAGS_BEYOND_MAX 0x2000 /* map beyond the map's limits */ -#define VM_FLAGS_ALREADY 0x1000 /* OK if same mapping already exists */ +#define VM_FLAGS_BELOW_MIN 0x0080 /* map below the map's min offset */ +#define VM_FLAGS_PERMANENT 0x0100 /* mapping can NEVER be unmapped */ +#define VM_FLAGS_GUARD_AFTER 0x0200 /* guard page after the mapping */ +#define VM_FLAGS_GUARD_BEFORE 0x0400 /* guard page before the mapping */ #define VM_FLAGS_SUBMAP 0x0800 /* mapping a VM submap */ +#define VM_FLAGS_ALREADY 0x1000 /* OK if same mapping already exists */ +#define VM_FLAGS_BEYOND_MAX 0x2000 /* map beyond the map's max offset */ +#define VM_FLAGS_OVERWRITE 0x4000 /* delete any existing mappings first */ +#define VM_FLAGS_NO_PMAP_CHECK 0x8000 /* do not check that pmap is empty */ #endif /* KERNEL_PRIVATE */ -#define VM_FLAGS_GUARD_BEFORE 0x0010 -#define VM_FLAGS_GUARD_AFTER 0x0020 + +/* + * VM_FLAGS_SUPERPAGE_MASK + * 3 bits that specify whether large pages should be used instead of + * base pages (!=0), as well as the requested page size. + */ +#define VM_FLAGS_SUPERPAGE_MASK 0x70000 /* bits 0x10000, 0x20000, 0x40000 */ +#define VM_FLAGS_SUPERPAGE_SHIFT 16 + +#define SUPERPAGE_NONE 0 /* no superpages, if all bits are 0 */ +#define VM_FLAGS_SUPERPAGE_NONE (SUPERPAGE_NONE< +#include +#include +#include +#include +#include +#include +#include + +#include + +#include + +#if defined(__i386__) || defined(__x86_64__) +#include +#endif + +#if defined(__ppc__) +#include +#include +#endif + +#if CONFIG_COUNTERS + +/* various debug logging enable */ +#undef DEBUG_COUNTERS + +typedef uint8_t pmc_state_event_t; + +#define PMC_STATE_EVENT_START 0 +#define PMC_STATE_EVENT_STOP 1 +#define PMC_STATE_EVENT_FREE 2 +#define PMC_STATE_EVENT_INTERRUPT 3 +#define PMC_STATE_EVENT_END_OF_INTERRUPT 4 +#define PMC_STATE_EVENT_CONTEXT_IN 5 +#define PMC_STATE_EVENT_CONTEXT_OUT 6 +#define PMC_STATE_EVENT_LOAD_FINISHED 7 +#define PMC_STATE_EVENT_STORE_FINISHED 8 + +/* PMC spin timeouts */ +#define PMC_SPIN_THRESHOLD 10 /* Number of spins to allow before checking mach_absolute_time() */ +#define PMC_SPIN_TIMEOUT_US 10 /* Time in microseconds before the spin causes an assert */ + +uint64_t pmc_spin_timeout_count = 0; /* Number of times where a PMC spin loop causes a timeout */ + +#ifdef DEBUG_COUNTERS +# include +# define COUNTER_DEBUG(...) \ + do { \ + kprintf("[%s:%s][%u] ", __FILE__, __PRETTY_FUNCTION__, cpu_number()); \ + kprintf(__VA_ARGS__); \ + } while(0) + +# define PRINT_PERF_MON(x) \ + do { \ + kprintf("perfmon: %p (obj: %p refCt: %u switchable: %u)\n", \ + x, x->object, x->useCount, \ + x->methods.supports_context_switching ? \ + x->methods.supports_context_switching(x->object) : 0); \ + } while(0) + +static const char const * pmc_state_state_name(pmc_state_t state) { + switch (PMC_STATE_STATE(state)) { + case PMC_STATE_STATE_INVALID: + return "INVALID"; + case PMC_STATE_STATE_STOP: + return "STOP"; + case PMC_STATE_STATE_CAN_RUN: + return "CAN_RUN"; + case PMC_STATE_STATE_LOAD: + return "LOAD"; + case PMC_STATE_STATE_RUN: + return "RUN"; + case PMC_STATE_STATE_STORE: + return "STORE"; + case PMC_STATE_STATE_INTERRUPT: + return "INTERRUPT"; + case PMC_STATE_STATE_DEALLOC: + return "DEALLOC"; + default: + return "UNKNOWN"; + } +} + +static const char const * pmc_state_event_name(pmc_state_event_t event) { + switch (event) { + case PMC_STATE_EVENT_START: + return "START"; + case PMC_STATE_EVENT_STOP: + return "STOP"; + case PMC_STATE_EVENT_FREE: + return "FREE"; + case PMC_STATE_EVENT_INTERRUPT: + return "INTERRUPT"; + case PMC_STATE_EVENT_END_OF_INTERRUPT: + return "END OF INTERRUPT"; + case PMC_STATE_EVENT_CONTEXT_IN: + return "CONTEXT IN"; + case PMC_STATE_EVENT_CONTEXT_OUT: + return "CONTEXT OUT"; + case PMC_STATE_EVENT_LOAD_FINISHED: + return "LOAD_FINISHED"; + case PMC_STATE_EVENT_STORE_FINISHED: + return "STORE_FINISHED"; + default: + return "UNKNOWN"; + } +} + +# define PMC_STATE_FORMAT "<%s, %u, %s%s%s>" +# define PMC_STATE_ARGS(x) pmc_state_state_name(x), PMC_STATE_CONTEXT_COUNT(x), ((PMC_STATE_FLAGS(x) & PMC_STATE_FLAGS_INTERRUPTING) ? "I" : ""), \ + ((PMC_STATE_FLAGS(x) & PMC_STATE_FLAGS_STOPPING) ? "S" : ""), ((PMC_STATE_FLAGS(x) & PMC_STATE_FLAGS_DEALLOCING) ? "D" : "") +#else +# define COUNTER_DEBUG(...) +# define PRINT_PERF_MON(x) +# define PMC_STATE_FORMAT +# define PMC_STATE_ARGS(x) +#endif + +/*!struct + * pmc_config is the data behind a pmc_config_t. + * @member object A pointer to an instance of IOPerformanceCounterConfiguration + * @member method A pointer to a method to call to handle PMI. + * @member interrupt_after_value Cause a PMI after the counter counts this many + * events. + * @member refCon Passed to the @method method as the refCon argument. + */ +struct pmc_config { + pmc_config_object_t object; + volatile pmc_interrupt_method_t method; + uint64_t interrupt_after_value; + void *refCon; +}; + +/* + * Allocation Zones + * + * Two allocation zones - Perf zone small and Perf zone big. + * Each zone has associated maximums, defined below. + * The small zone is the max of the smallest allocation objects (all sizes on + * K64): + * perf_monitor_t - 48 bytes + * perf_monitor_methods_t - 28 bytes + * pmc_reservation_t - 48 bytes + * pmc_config_t - 32 bytes + * perf_small_zone unit size is (on K64) 48 bytes + * perf_small_zone max count must be max number of perf monitors, plus (max + * number of reservations * 2). The "*2" is because each reservation has a + * pmc_config_t within. + * + * Big zone is max of the larger allocation units + * pmc_t - 144 bytes + * pmc_methods_t - 116 bytes + * perf_big_zone unit size is (on K64) 144 bytes + * perf_big_zone max count is the max number of PMCs we support. + */ + +static zone_t perf_small_zone = NULL; +#define MAX_PERF_SMALLS (256 + 8196 + 8196) +#define PERF_SMALL_UNIT_SZ (MAX(MAX(sizeof(struct perf_monitor), \ + sizeof(struct pmc_reservation)), sizeof(struct pmc_config))) + +static zone_t perf_big_zone = NULL; +#define MAX_PERF_BIGS (1024) +#define PERF_BIG_UNIT_SZ (sizeof(struct pmc)) + +/* + * Locks and Lock groups + */ +static lck_grp_t *pmc_lock_grp = LCK_GRP_NULL; +static lck_grp_attr_t *pmc_lock_grp_attr; +static lck_attr_t *pmc_lock_attr; + +/* PMC tracking queue locks */ +static lck_spin_t perf_monitor_queue_spin; /* protects adding and removing from queue */ +static lck_spin_t perf_counters_queue_spin; /* protects adding and removing from queue */ + +/* Reservation tracking queues lock */ +static lck_spin_t reservations_spin; + +/* + * Tracking queues + * + * Keeps track of registered perf monitors and perf counters + */ +static queue_t perf_monitors_queue = NULL; +static volatile uint32_t perf_monitors_count = 0U; + +static queue_t perf_counters_queue = NULL; +static volatile uint32_t perf_counters_count = 0U; + +/* + * Reservation queues + * + * Keeps track of all system, task, and thread-level reservations (both active and + * inactive). + * + * We track them all here (rather than in their respective task or thread only) + * so that we can inspect our tracking data directly (rather than peeking at + * every task and thread) to determine if/when a new reservation would + * constitute a conflict. + */ +static queue_t system_reservations = NULL; +static volatile uint32_t system_reservation_count __attribute__((aligned(4))) = 0U; + +static queue_t task_reservations = NULL; +static volatile uint32_t task_reservation_count __attribute__((aligned(4))) = 0U; + +static queue_t thread_reservations = NULL; +static volatile uint32_t thread_reservation_count __attribute__((aligned(4))) = 0U; + + +#if XNU_KERNEL_PRIVATE + +/* + * init_pmc_locks creates and initializes all the locks and lock groups and lock + * attributes required for the pmc sub-system. + */ +static void init_pmc_locks(void) { + pmc_lock_attr = lck_attr_alloc_init(); + assert(pmc_lock_attr); + + pmc_lock_grp_attr = lck_grp_attr_alloc_init(); + assert(pmc_lock_grp_attr); + + pmc_lock_grp = lck_grp_alloc_init("pmc", pmc_lock_grp_attr); + assert(pmc_lock_grp); + + lck_spin_init(&perf_monitor_queue_spin, pmc_lock_grp, pmc_lock_attr); + lck_spin_init(&perf_counters_queue_spin, pmc_lock_grp, pmc_lock_attr); + + lck_spin_init(&reservations_spin, pmc_lock_grp, pmc_lock_attr); +} + +/* + * init_pmc_zones initializes the allocation zones used by the pmc subsystem + */ +static void init_pmc_zones(void) { + perf_small_zone = zinit(PERF_SMALL_UNIT_SZ, + MAX_PERF_SMALLS * PERF_SMALL_UNIT_SZ, MAX_PERF_SMALLS, + "pmc.small zone"); + + assert(perf_small_zone); + + perf_big_zone = zinit(PERF_BIG_UNIT_SZ, + MAX_PERF_BIGS * PERF_BIG_UNIT_SZ, MAX_PERF_BIGS, + "pmc.big zone"); + + assert(perf_big_zone); +} + +/* + * init_pmc_queues allocates and initializes the tracking queues for + * registering and reserving individual pmcs and perf monitors. + */ +static void init_pmc_queues(void) { + perf_monitors_queue = (queue_t)kalloc(sizeof(queue_t)); + assert(perf_monitors_queue); + + queue_init(perf_monitors_queue); + + perf_counters_queue = (queue_t)kalloc(sizeof(queue_t)); + assert(perf_counters_queue); + + queue_init(perf_counters_queue); + + system_reservations = (queue_t)kalloc(sizeof(queue_t)); + assert(system_reservations); + + queue_init(system_reservations); + + task_reservations = (queue_t)kalloc(sizeof(queue_t)); + assert(task_reservations); + + queue_init(task_reservations); + + thread_reservations = (queue_t)kalloc(sizeof(queue_t)); + assert(thread_reservations); + + queue_init(thread_reservations); +} + +/* + * pmc_bootstrap brings up all the necessary infrastructure required to use the + * pmc sub-system. + */ +__private_extern__ +void pmc_bootstrap(void) { + /* build our alloc zones */ + init_pmc_zones(); + + /* build the locks */ + init_pmc_locks(); + + /* build our tracking queues */ + init_pmc_queues(); +} + +#endif /* XNU_KERNEL_PRIVATE */ + +/* + * Perf Monitor Internals + */ + +static perf_monitor_t perf_monitor_alloc(void) { + /* perf monitors come from the perf small zone */ + return (perf_monitor_t)zalloc(perf_small_zone); +} + +static void perf_monitor_free(void *pm) { + zfree(perf_small_zone, pm); +} + +static void perf_monitor_init(perf_monitor_t pm) { + assert(pm); + + pm->object = NULL; + + bzero(&(pm->methods), sizeof(perf_monitor_methods_t)); + + pm->useCount = 1; /* initial retain count of 1, for caller */ + + pm->link.next = pm->link.prev = (queue_entry_t)NULL; +} + +/* + * perf_monitor_dequeue removes the given perf_monitor_t from the + * perf_monitor_queue, thereby unregistering it with the system. + */ +static void perf_monitor_dequeue(perf_monitor_t pm) { + lck_spin_lock(&perf_monitor_queue_spin); + + /* + * remove the @pm object from the @perf_monitor_queue queue (it is of type + * and has a field called @link that is the queue_link_t + */ + queue_remove(perf_monitors_queue, pm, perf_monitor_t, link); + + perf_monitors_count--; + + lck_spin_unlock(&perf_monitor_queue_spin); +} + +/* + * perf_monitor_enqueue adds the given perf_monitor_t to the perf_monitor_queue, + * thereby registering it for use with the system. + */ +static void perf_monitor_enqueue(perf_monitor_t pm) { + lck_spin_lock(&perf_monitor_queue_spin); + + queue_enter(perf_monitors_queue, pm, perf_monitor_t, link); + + perf_monitors_count++; + + lck_spin_unlock(&perf_monitor_queue_spin); +} + +/* + * perf_monitor_reference increments the reference count for the given + * perf_monitor_t. + */ +static void perf_monitor_reference(perf_monitor_t pm) { + assert(pm); + + OSIncrementAtomic(&(pm->useCount)); +} + +/* + * perf_monitor_deallocate decrements the reference count for the given + * perf_monitor_t. If the reference count hits 0, the object is released back + * to the perf_small_zone via a call to perf_monitor_free(). + */ +static void perf_monitor_deallocate(perf_monitor_t pm) { + assert(pm); + + /* If we just removed the last reference count */ + if(1 == OSDecrementAtomic(&(pm->useCount))) { + /* Free the object */ + perf_monitor_free(pm); + } +} + +/* + * perf_monitor_find attempts to find a perf_monitor_t that corresponds to the + * given C++ object pointer that was used when registering with the subsystem. + * + * If found, the method returns the perf_monitor_t with an extra reference + * placed on the object (or NULL if not + * found). + * + * NOTE: Caller must use perf_monitor_deallocate to remove the extra reference after + * calling perf_monitor_find. + */ +static perf_monitor_t perf_monitor_find(perf_monitor_object_t monitor) { + assert(monitor); + perf_monitor_t element = NULL; + perf_monitor_t found = NULL; + + lck_spin_lock(&perf_monitor_queue_spin); + + queue_iterate(perf_monitors_queue, element, perf_monitor_t, link) { + if(element && element->object == monitor) { + /* We found it - reference the object. */ + perf_monitor_reference(element); + found = element; + break; + } + } + + lck_spin_unlock(&perf_monitor_queue_spin); + + return found; +} + +/* + * perf_monitor_add_pmc adds a newly registered PMC to the perf monitor it is + * aassociated with. + */ +static void perf_monitor_add_pmc(perf_monitor_t pm, pmc_t pmc __unused) { + assert(pm); + assert(pmc); + + /* Today, we merely add a reference count now that a new pmc is attached */ + perf_monitor_reference(pm); +} + +/* + * perf_monitor_remove_pmc removes a newly *un*registered PMC from the perf + * monitor it is associated with. + */ +static void perf_monitor_remove_pmc(perf_monitor_t pm, pmc_t pmc __unused) { + assert(pm); + assert(pmc); + + /* Today, we merely remove a reference count now that the pmc is detached */ + perf_monitor_deallocate(pm); +} + +/* + * Perf Counter internals + */ + +static pmc_t pmc_alloc(void) { + return (pmc_t)zalloc(perf_big_zone); +} + +static void pmc_free(void *pmc) { + zfree(perf_big_zone, pmc); +} + +/* + * pmc_init initializes a newly allocated pmc_t + */ +static void pmc_init(pmc_t pmc) { + assert(pmc); + + pmc->object = NULL; + pmc->monitor = NULL; + + bzero(&pmc->methods, sizeof(pmc_methods_t)); + + /* One reference for the caller */ + pmc->useCount = 1; +} + +/* + * pmc_reference increments the reference count of the given pmc_t + */ +static void pmc_reference(pmc_t pmc) { + assert(pmc); + + OSIncrementAtomic(&(pmc->useCount)); +} + +/* + * pmc_deallocate decrements the reference count of the given pmc_t. If the + * reference count hits zero, the given pmc_t is deallocated and released back + * to the allocation zone. + */ +static void pmc_deallocate(pmc_t pmc) { + assert(pmc); + + /* If we just removed the last reference count */ + if(1 == OSDecrementAtomic(&(pmc->useCount))) { + /* Free the pmc */ + pmc_free(pmc); + } +} + +/* + * pmc_dequeue removes the given, newly *un*registered pmc from the + * perf_counters_queue. + */ +static void pmc_dequeue(pmc_t pmc) { + lck_spin_lock(&perf_counters_queue_spin); + + queue_remove(perf_counters_queue, pmc, pmc_t, link); + + perf_counters_count--; + + lck_spin_unlock(&perf_counters_queue_spin); +} + +/* + * pmc_enqueue adds the given, newly registered pmc to the perf_counters_queue + */ +static void pmc_enqueue(pmc_t pmc) { + lck_spin_lock(&perf_counters_queue_spin); + + queue_enter(perf_counters_queue, pmc, pmc_t, link); + + perf_counters_count++; + + lck_spin_unlock(&perf_counters_queue_spin); +} + +/* + * pmc_find attempts to locate a pmc_t that was registered with the given + * pmc_object_t pointer. If found, it returns the pmc_t with an extra reference + * which must be dropped by the caller by calling pmc_deallocate(). + */ +static pmc_t pmc_find(pmc_object_t object) { + assert(object); + + lck_spin_lock(&perf_counters_queue_spin); + + pmc_t element = NULL; + pmc_t found = NULL; + + queue_iterate(perf_counters_queue, element, pmc_t, link) { + if(element && element->object == object) { + pmc_reference(element); + + found = element; + break; + } + } + + lck_spin_unlock(&perf_counters_queue_spin); + + return found; +} + +/* + * Config internals + */ + +/* Allocate a pmc_config_t */ +static pmc_config_t pmc_config_alloc(pmc_t pmc __unused) { + return (pmc_config_t)zalloc(perf_small_zone); +} + +/* Free a pmc_config_t, and underlying pmc_config_object_t (if needed) */ +static void pmc_config_free(pmc_t pmc, pmc_config_t config) { + assert(pmc); + assert(config); + + if(config->object) { + pmc->methods.free_config(pmc->object, config->object); + config->object = NULL; + } + + zfree(perf_small_zone, config); +} + +static kern_return_t pmc_open(pmc_t pmc) { + assert(pmc); + assert(pmc->object); + assert(pmc->open_object); + + return pmc->methods.open(pmc->object, pmc->open_object); +} + +static kern_return_t pmc_close(pmc_t pmc) { + assert(pmc); + assert(pmc->object); + assert(pmc->open_object); + + return pmc->methods.close(pmc->object, pmc->open_object); +} + +/* + * Reservation Internals + */ + +static kern_return_t pmc_internal_reservation_set_pmc(pmc_reservation_t resv, pmc_t pmc); +static void pmc_internal_reservation_store(pmc_reservation_t reservation); +static void pmc_internal_reservation_load(pmc_reservation_t reservation); + +static pmc_reservation_t reservation_alloc(void) { + /* pmc reservations come from the perf small zone */ + return (pmc_reservation_t)zalloc(perf_small_zone); +} + +/* + * reservation_free deallocates and releases all resources associated with the + * given pmc_reservation_t. This includes freeing the config used to create the + * reservation, decrementing the reference count for the pmc used to create the + * reservation, and deallocating the reservation's memory. + */ +static void reservation_free(pmc_reservation_t resv) { + /* Free config */ + if(resv->config) { + assert(resv->pmc); + + pmc_free_config(resv->pmc, resv->config); + + resv->config = NULL; + } + + /* release PMC */ + (void)pmc_internal_reservation_set_pmc(resv, NULL); + + /* Free reservation */ + zfree(perf_small_zone, resv); +} + +/* + * reservation_init initializes a newly created reservation. + */ +static void reservation_init(pmc_reservation_t resv) { + assert(resv); + + resv->pmc = NULL; + resv->config = NULL; + resv->value = 0ULL; + + resv->flags = 0U; + resv->state = PMC_STATE(PMC_STATE_STATE_STOP, 0, 0); + resv->active_last_context_in = 0U; + + /* + * Since this member is a union, we only need to set either the task + * or thread to NULL. + */ + resv->task = TASK_NULL; +} + +/* + * pmc_internal_reservation_set_pmc sets the pmc associated with the reservation object. If + * there was one set already, it is deallocated (reference is dropped) before + * the new one is set. This methods increases the reference count of the given + * pmc_t. + * + * NOTE: It is okay to pass NULL as the pmc_t - this will have the effect of + * dropping the reference on any previously set pmc, and setting the reservation + * to having no pmc set. + */ +static kern_return_t pmc_internal_reservation_set_pmc(pmc_reservation_t resv, pmc_t pmc) { + assert(resv); + + if(resv->pmc) { + (void)pmc_close(resv->pmc); + pmc_deallocate(resv->pmc); + resv->pmc = NULL; + } + + resv->pmc = pmc; + + if(resv->pmc) { + pmc_reference(resv->pmc); + if(KERN_SUCCESS != pmc_open(resv->pmc)) { + pmc_deallocate(resv->pmc); + resv->pmc = NULL; + + return KERN_FAILURE; + } + } + + return KERN_SUCCESS; +} + +/* + * Used to place reservation into one of the system, task, and thread queues + * Assumes the queue's spin lock is already held. + */ +static void pmc_internal_reservation_enqueue(queue_t queue, pmc_reservation_t resv) { + assert(queue); + assert(resv); + + queue_enter(queue, resv, pmc_reservation_t, link); +} + +static void pmc_internal_reservation_dequeue(queue_t queue, pmc_reservation_t resv) { + assert(queue); + assert(resv); + + queue_remove(queue, resv, pmc_reservation_t, link); +} + +/* Returns TRUE if the reservation applies to the current execution context */ +static boolean_t pmc_internal_reservation_matches_context(pmc_reservation_t resv) { + boolean_t ret = FALSE; + assert(resv); + + if(PMC_FLAG_IS_SYSTEM_SCOPE(resv->flags)) { + ret = TRUE; + } else if(PMC_FLAG_IS_TASK_SCOPE(resv->flags)) { + if(current_task() == resv->task) { + ret = TRUE; + } + } else if(PMC_FLAG_IS_THREAD_SCOPE(resv->flags)) { + if(current_thread() == resv->thread) { + ret = TRUE; + } + } + + return ret; +} + +/* + * pmc_accessible_core_count returns the number of logical cores that can access + * a given @pmc. 0 means every core in the system. + */ +static uint32_t pmc_accessible_core_count(pmc_t pmc) { + assert(pmc); + + uint32_t *cores = NULL; + size_t coreCt = 0UL; + + if(KERN_SUCCESS != pmc->methods.accessible_cores(pmc->object, + &cores, &coreCt)) { + coreCt = 0U; + } + + return (uint32_t)coreCt; +} + +/* spin lock for the queue must already be held */ +/* + * This method will inspect the task/thread of the reservation to see if it + * matches the new incoming one (for thread/task reservations only). Will only + * return TRUE if the task/thread matches. + */ +static boolean_t pmc_internal_reservation_queue_contains_pmc(queue_t queue, pmc_reservation_t +resv) { + assert(queue); + assert(resv); + + boolean_t ret = FALSE; + pmc_reservation_t tmp = NULL; + + queue_iterate(queue, tmp, pmc_reservation_t, link) { + if(tmp) { + if(tmp->pmc == resv->pmc) { + /* PMC matches - make sure scope matches first */ + switch(PMC_FLAG_SCOPE(tmp->flags)) { + case PMC_FLAG_SCOPE_SYSTEM: + /* + * Found a reservation in system queue with same pmc - always a + * conflict. + */ + ret = TRUE; + break; + case PMC_FLAG_SCOPE_THREAD: + /* + * Found one in thread queue with the same PMC as the + * argument. Only a conflict if argument scope isn't + * thread or system, or the threads match. + */ + ret = (PMC_FLAG_SCOPE(resv->flags) != PMC_FLAG_SCOPE_THREAD) || + (tmp->thread == resv->thread); + + if(!ret) { + /* + * so far, no conflict - check that the pmc that is + * being reserved isn't accessible from more than + * one core, if it is, we need to say it's already + * taken. + */ + if(1 != pmc_accessible_core_count(tmp->pmc)) { + ret = TRUE; + } + } + break; + case PMC_FLAG_SCOPE_TASK: + /* + * Follow similar semantics for task scope. + */ + + ret = (PMC_FLAG_SCOPE(resv->flags) != PMC_FLAG_SCOPE_TASK) || + (tmp->task == resv->task); + if(!ret) { + /* + * so far, no conflict - check that the pmc that is + * being reserved isn't accessible from more than + * one core, if it is, we need to say it's already + * taken. + */ + if(1 != pmc_accessible_core_count(tmp->pmc)) { + ret = TRUE; + } + } + + break; + } + + if(ret) break; + } + } + } + + return ret; +} + +/* + * pmc_internal_reservation_validate_for_pmc returns TRUE if the given reservation can be + * added to its target queue without createing conflicts (target queue is + * determined by the reservation's scope flags). Further, this method returns + * FALSE if any level contains a reservation for a PMC that can be accessed from + * more than just 1 core, and the given reservation also wants the same PMC. + */ +static boolean_t pmc_internal_reservation_validate_for_pmc(pmc_reservation_t resv) { + assert(resv); + boolean_t ret = TRUE; + + if(pmc_internal_reservation_queue_contains_pmc(system_reservations, resv) || + pmc_internal_reservation_queue_contains_pmc(task_reservations, resv) || + pmc_internal_reservation_queue_contains_pmc(thread_reservations, resv)) { + ret = FALSE; + } + + return ret; +} + +static void pmc_internal_update_thread_flag(thread_t thread, boolean_t newFlag) { + assert(thread); + + /* See if this thread needs it's PMC flag set */ + pmc_reservation_t tmp = NULL; + + if(!newFlag) { + /* + * If the parent task just dropped its reservation, iterate the thread + * reservations to see if we need to keep the pmc flag set for the given + * thread or not. + */ + lck_spin_lock(&reservations_spin); + + queue_iterate(thread_reservations, tmp, pmc_reservation_t, link) { + if(tmp->thread == thread) { + newFlag = TRUE; + break; + } + } + + lck_spin_unlock(&reservations_spin); + } + + if(newFlag) { + OSBitOrAtomic(THREAD_PMC_FLAG, &thread->t_chud); + } else { + OSBitAndAtomic(~(THREAD_PMC_FLAG), &thread->t_chud); + } +} + +/* + * This operation is (worst case) O(N*M) where N is number of threads in the + * given task, and M is the number of thread reservations in our system. + */ +static void pmc_internal_update_task_flag(task_t task, boolean_t newFlag) { + assert(task); + thread_t thread = NULL; + + if(newFlag) { + OSBitOrAtomic(TASK_PMC_FLAG, &task->t_chud); + } else { + OSBitAndAtomic(~(TASK_PMC_FLAG), &task->t_chud); + } + + task_lock(task); + + queue_iterate(&task->threads, thread, thread_t, task_threads) { + /* propagate the task's mask down to each thread */ + pmc_internal_update_thread_flag(thread, newFlag); + } + + task_unlock(task); +} + +/* + * pmc_internal_reservation_add adds a reservation to the global tracking queues after + * ensuring there are no reservation conflicts. To do this, it takes all the + * spin locks for all the queue (to ensure no other core goes and adds a + * reservation for the same pmc to a queue that has already been checked). + */ +static boolean_t pmc_internal_reservation_add(pmc_reservation_t resv) { + assert(resv); + + boolean_t ret = FALSE; + + /* always lock all three in the same order */ + lck_spin_lock(&reservations_spin); + + /* Check if the reservation can be added without conflicts */ + if(pmc_internal_reservation_validate_for_pmc(resv)) { + ret = TRUE; + } + + if(ret) { + /* add reservation to appropriate scope */ + switch(PMC_FLAG_SCOPE(resv->flags)) { + + /* System-wide counter */ + case PMC_FLAG_SCOPE_SYSTEM: + /* Simply add it to the system queue */ + pmc_internal_reservation_enqueue(system_reservations, resv); + + lck_spin_unlock(&reservations_spin); + + break; + + /* Task-switched counter */ + case PMC_FLAG_SCOPE_TASK: + assert(resv->task); + + /* Not only do we enqueue it in our local queue for tracking */ + pmc_internal_reservation_enqueue(task_reservations, resv); + + lck_spin_unlock(&reservations_spin); + + /* update the task mask, and propagate it to existing threads */ + pmc_internal_update_task_flag(resv->task, TRUE); + break; + + /* Thread-switched counter */ + case PMC_FLAG_SCOPE_THREAD: + assert(resv->thread); + + /* + * Works the same as a task-switched counter, only at + * thread-scope + */ + + pmc_internal_reservation_enqueue(thread_reservations, resv); + + lck_spin_unlock(&reservations_spin); + + pmc_internal_update_thread_flag(resv->thread, TRUE); + break; + } + } else { + lck_spin_unlock(&reservations_spin); + } + + return ret; +} + +static void pmc_internal_reservation_broadcast(pmc_reservation_t reservation, void (*action_func)(void *)) { + uint32_t * cores; + size_t core_cnt; + + /* Get the list of accessible cores */ + if (KERN_SUCCESS == pmc_get_accessible_core_list(reservation->pmc, &cores, &core_cnt)) { + boolean_t intrs_enabled = ml_set_interrupts_enabled(FALSE); + + /* Fast case: the PMC is only accessible from one core and we happen to be on it */ + if (core_cnt == 1 && cores[0] == (uint32_t)cpu_number()) { + action_func(reservation); + } else { + /* Call action_func on every accessible core */ +#if defined(__i386__) || defined(__x86_64__) + size_t ii; + cpumask_t mask = 0; + + /* Build a mask for the accessible cores */ + if (core_cnt > 0) { + for (ii = 0; ii < core_cnt; ii++) { + mask |= cpu_to_cpumask(cores[ii]); + } + } else { + /* core_cnt = 0 really means all cpus */ + mask = CPUMASK_ALL; + } + + /* Have each core run pmc_internal_reservation_stop_cpu asynchronously. */ + mp_cpus_call(mask, ASYNC, action_func, reservation); +#elif defined(__ppc__) + size_t ii; + + if (core_cnt > 0) { + for (ii = 0; ii < core_cnt; ii++) { + if (cores[ii] == (uint32_t)cpu_number()) { + action_func(reservation); + } else { + cpu_signal(cores[ii], SIGPcall, (uint32_t)action_func, (uint32_t)reservation); + } + } + } else { + uint32_t sync; + cpu_broadcast(&sync, (void (*)(uint32_t))action_func, (uint32_t)reservation); + action_func(reservation); + } +#else +#error pmc_reservation_interrupt needs an inter-processor method invocation mechanism for this architecture +#endif + } + + ml_set_interrupts_enabled(intrs_enabled); + } + +} + +/* + * pmc_internal_reservation_remove removes the given reservation from the appropriate + * reservation queue according to its scope. + * + * NOTE: The scope flag must have been set for this method to function. + */ +static void pmc_internal_reservation_remove(pmc_reservation_t resv) { + assert(resv); + + /* + * Due to the way the macros are written, we can't just blindly queue-remove + * the reservation without knowing which queue it's in. We figure this out + * using the reservation's scope flags. + */ + + switch(PMC_FLAG_SCOPE(resv->flags)) { + + case PMC_FLAG_SCOPE_SYSTEM: + lck_spin_lock(&reservations_spin); + pmc_internal_reservation_dequeue(system_reservations, resv); + lck_spin_unlock(&reservations_spin); + break; + + case PMC_FLAG_SCOPE_TASK: + + /* Lock the global spin lock */ + lck_spin_lock(&reservations_spin); + + /* remove from the global queue */ + pmc_internal_reservation_dequeue(task_reservations, resv); + + /* unlock the global */ + lck_spin_unlock(&reservations_spin); + + /* Recalculate task's counter mask */ + pmc_internal_update_task_flag(resv->task, FALSE); + break; + + case PMC_FLAG_SCOPE_THREAD: + lck_spin_lock(&reservations_spin); + + pmc_internal_reservation_dequeue(thread_reservations, resv); + + lck_spin_unlock(&reservations_spin); + + /* recalculate the thread's counter mask */ + pmc_internal_update_thread_flag(resv->thread, FALSE); + + break; + } +} + +/* Reservation State Machine + * + * The PMC subsystem uses a 3-tuple of state information packed into a 32-bit quantity and a + * set of 9 events to provide MP-safe bookkeeping and control flow. The 3-tuple is comprised + * of a state, a count of active contexts, and a set of modifier flags. A state machine defines + * the possible transitions at each event point given the current 3-tuple. Atomicity is handled + * by reading the current 3-tuple, applying the transformations indicated by the state machine + * and then attempting to OSCompareAndSwap the transformed value. If the OSCompareAndSwap fails, + * the process is repeated until either the OSCompareAndSwap succeeds or not valid transitions are + * available. + * + * The state machine is described using tuple notation for the current state and a related notation + * for describing the transformations. For concisness, the flag and state names are abbreviated as + * follows: + * + * states: + * S = STOP + * CR = CAN_RUN + * L = LOAD + * R = RUN + * ST = STORE + * I = INTERRUPT + * D = DEALLOC + * + * flags: + * + * S = STOPPING + * D = DEALLOCING + * I = INTERRUPTING + * + * The tuple notation is formed from the following pattern: + * + * tuple = < state, active-context-count, flags > + * state = S | CR | L | R | ST | I | D + * active-context-count = 0 | >0 | 1 | >1 + * flags = flags flag | blank + * flag = S | D | I + * + * The transform notation is similar, but only describes the modifications made to the current state. + * The notation is formed from the following pattern: + * + * transform = < state, active-context-count, flags > + * state = S | CR | L | R | ST | I | D + * active-context-count = + | - | blank + * flags = flags flag | flags !flag | blank + * flag = S | D | I + * + * And now for the state machine: + * State Start Stop Free Interrupt End Interrupt Context In Context Out Load Finished Store Finishedstatic uint32_t pmc_internal_reservation_next_state(uint32_t current_state, pmc_state_event_t event) { + uint32_t new_state = PMC_STATE(PMC_STATE_STATE_INVALID, 0, 0); + + switch (event) { + case PMC_STATE_EVENT_START: + switch (current_state & ~(PMC_STATE_CONTEXT_COUNT_MASK)) { + case PMC_STATE(PMC_STATE_STATE_INTERRUPT, 0, PMC_STATE_FLAGS_STOPPING): + case PMC_STATE(PMC_STATE_STATE_LOAD, 0, PMC_STATE_FLAGS_STOPPING): + case PMC_STATE(PMC_STATE_STATE_RUN, 0, PMC_STATE_FLAGS_STOPPING): + case PMC_STATE(PMC_STATE_STATE_STOP, 0, PMC_STATE_FLAGS_STOPPING): + case PMC_STATE(PMC_STATE_STATE_STORE, 0, PMC_STATE_FLAGS_STOPPING): + new_state = PMC_STATE_MODIFY(current_state, 0, 0, PMC_STATE_FLAGS_STOPPING); + break; + case PMC_STATE(PMC_STATE_STATE_STOP, 0, 0): + if (PMC_STATE_CONTEXT_COUNT(current_state) == 0) { + new_state = PMC_STATE_MOVE(current_state, PMC_STATE_STATE_CAN_RUN, 0, 0, 0); + } + break; + } + break; + case PMC_STATE_EVENT_STOP: + switch (current_state & ~(PMC_STATE_CONTEXT_COUNT_MASK)) { + case PMC_STATE(PMC_STATE_STATE_CAN_RUN, 0, 0): + new_state = PMC_STATE_MOVE(current_state, PMC_STATE_STATE_STOP, 0, 0, 0); + break; + case PMC_STATE(PMC_STATE_STATE_INTERRUPT, 0, 0): + case PMC_STATE(PMC_STATE_STATE_LOAD, 0, 0): + case PMC_STATE(PMC_STATE_STATE_RUN, 0, 0): + case PMC_STATE(PMC_STATE_STATE_STORE, 0, 0): + new_state = PMC_STATE_MODIFY(current_state, 0, PMC_STATE_FLAGS_STOPPING, 0); + break; + case PMC_STATE(PMC_STATE_STATE_STOP, 0, 0): + if (PMC_STATE_CONTEXT_COUNT(current_state) > 0) { + new_state = PMC_STATE_MODIFY(current_state, 0, PMC_STATE_FLAGS_STOPPING, 0); + } + break; + } + break; + case PMC_STATE_EVENT_FREE: + switch (current_state & ~(PMC_STATE_CONTEXT_COUNT_MASK)) { + case PMC_STATE(PMC_STATE_STATE_CAN_RUN, 0, 0): + new_state = PMC_STATE_MOVE(current_state, PMC_STATE_STATE_DEALLOC, 0, 0, 0); + break; + case PMC_STATE(PMC_STATE_STATE_INTERRUPT, 0, PMC_STATE_FLAGS_STOPPING): + case PMC_STATE(PMC_STATE_STATE_LOAD, 0, PMC_STATE_FLAGS_INTERRUPTING | PMC_STATE_FLAGS_STOPPING): + case PMC_STATE(PMC_STATE_STATE_LOAD, 0, PMC_STATE_FLAGS_STOPPING): + case PMC_STATE(PMC_STATE_STATE_RUN, 0, PMC_STATE_FLAGS_INTERRUPTING | PMC_STATE_FLAGS_STOPPING): + case PMC_STATE(PMC_STATE_STATE_RUN, 0, PMC_STATE_FLAGS_STOPPING): + case PMC_STATE(PMC_STATE_STATE_STOP, 0, PMC_STATE_FLAGS_INTERRUPTING | PMC_STATE_FLAGS_STOPPING): + case PMC_STATE(PMC_STATE_STATE_STORE, 0, PMC_STATE_FLAGS_INTERRUPTING | PMC_STATE_FLAGS_STOPPING): + case PMC_STATE(PMC_STATE_STATE_STORE, 0, PMC_STATE_FLAGS_STOPPING): + new_state = PMC_STATE_MODIFY(current_state, 0, PMC_STATE_FLAGS_DEALLOCING, PMC_STATE_FLAGS_STOPPING); + break; + case PMC_STATE(PMC_STATE_STATE_INTERRUPT, 0, 0): + case PMC_STATE(PMC_STATE_STATE_LOAD, 0, 0): + case PMC_STATE(PMC_STATE_STATE_RUN, 0, 0): + case PMC_STATE(PMC_STATE_STATE_STORE, 0, 0): + new_state = PMC_STATE_MODIFY(current_state, 0, PMC_STATE_FLAGS_DEALLOCING, 0); + break; + case PMC_STATE(PMC_STATE_STATE_STOP, 0, PMC_STATE_FLAGS_STOPPING): + new_state = PMC_STATE_MOVE(current_state, PMC_STATE_STATE_DEALLOC, 0, PMC_STATE_FLAGS_DEALLOCING, PMC_STATE_FLAGS_STOPPING); + break; + case PMC_STATE(PMC_STATE_STATE_STOP, 0, 0): + if (PMC_STATE_CONTEXT_COUNT(current_state) > 0) { + new_state = PMC_STATE_MOVE(current_state, PMC_STATE_STATE_DEALLOC, 0, PMC_STATE_FLAGS_DEALLOCING, 0); + } else { + new_state = PMC_STATE_MOVE(current_state, PMC_STATE_STATE_DEALLOC, 0, 0, 0); + } + break; + } + break; + case PMC_STATE_EVENT_INTERRUPT: + switch (current_state & ~(PMC_STATE_CONTEXT_COUNT_MASK)) { + case PMC_STATE(PMC_STATE_STATE_LOAD, 0, 0): + case PMC_STATE(PMC_STATE_STATE_RUN, 0, 0): + case PMC_STATE(PMC_STATE_STATE_STORE, 0, 0): + new_state = PMC_STATE_MODIFY(current_state, 0, PMC_STATE_FLAGS_INTERRUPTING | PMC_STATE_FLAGS_STOPPING, 0); + break; + } + break; + case PMC_STATE_EVENT_END_OF_INTERRUPT: + switch (current_state & ~(PMC_STATE_CONTEXT_COUNT_MASK)) { + case PMC_STATE(PMC_STATE_STATE_INTERRUPT, 0, PMC_STATE_FLAGS_DEALLOCING): + new_state = PMC_STATE_MOVE(current_state, PMC_STATE_STATE_DEALLOC, 0, 0, PMC_STATE_FLAGS_DEALLOCING); + break; + case PMC_STATE(PMC_STATE_STATE_INTERRUPT, 0, PMC_STATE_FLAGS_STOPPING): + new_state = PMC_STATE_MOVE(current_state, PMC_STATE_STATE_STOP, 0, 0, PMC_STATE_FLAGS_STOPPING); + break; + case PMC_STATE(PMC_STATE_STATE_INTERRUPT, 0, 0): + new_state = PMC_STATE_MOVE(current_state, PMC_STATE_STATE_CAN_RUN, 0, 0, 0); + break; + } + break; + case PMC_STATE_EVENT_CONTEXT_IN: + switch (current_state & ~(PMC_STATE_CONTEXT_COUNT_MASK)) { + case PMC_STATE(PMC_STATE_STATE_CAN_RUN, 0, 0): + new_state = PMC_STATE_MOVE(current_state, PMC_STATE_STATE_LOAD, 1, 0, 0); + break; + case PMC_STATE(PMC_STATE_STATE_LOAD, 0, 0): + case PMC_STATE(PMC_STATE_STATE_RUN, 0, 0): + case PMC_STATE(PMC_STATE_STATE_STORE, 0, 0): + new_state = PMC_STATE_MODIFY(current_state, 1, 0, 0); + break; + case PMC_STATE(PMC_STATE_STATE_STOP, 0, 0): + if (PMC_STATE_CONTEXT_COUNT(current_state) > 0) { + new_state = PMC_STATE_MOVE(current_state, PMC_STATE_STATE_LOAD, 1, 0, 0); + } + break; + } + break; + case PMC_STATE_EVENT_CONTEXT_OUT: + switch (current_state & ~(PMC_STATE_CONTEXT_COUNT_MASK)) { + case PMC_STATE(PMC_STATE_STATE_DEALLOC, 0, PMC_STATE_FLAGS_DEALLOCING): + if (PMC_STATE_CONTEXT_COUNT(current_state) > 1) { + new_state = PMC_STATE_MODIFY(current_state, -1, 0, PMC_STATE_FLAGS_DEALLOCING); + } else { + new_state = PMC_STATE_MODIFY(current_state, -1, 0, 0); + } + break; + case PMC_STATE(PMC_STATE_STATE_LOAD, 0, PMC_STATE_FLAGS_DEALLOCING): + case PMC_STATE(PMC_STATE_STATE_LOAD, 0, PMC_STATE_FLAGS_INTERRUPTING | PMC_STATE_FLAGS_DEALLOCING): + case PMC_STATE(PMC_STATE_STATE_LOAD, 0, PMC_STATE_FLAGS_INTERRUPTING | PMC_STATE_FLAGS_STOPPING): + case PMC_STATE(PMC_STATE_STATE_LOAD, 0, PMC_STATE_FLAGS_STOPPING): + case PMC_STATE(PMC_STATE_STATE_LOAD, 0, 0): + if (PMC_STATE_CONTEXT_COUNT(current_state) > 1) { + new_state = PMC_STATE_MODIFY(current_state, -1, 0, 0); + } + break; + case PMC_STATE(PMC_STATE_STATE_RUN, 0, PMC_STATE_FLAGS_DEALLOCING): + case PMC_STATE(PMC_STATE_STATE_RUN, 0, PMC_STATE_FLAGS_INTERRUPTING | PMC_STATE_FLAGS_DEALLOCING): + case PMC_STATE(PMC_STATE_STATE_RUN, 0, PMC_STATE_FLAGS_INTERRUPTING | PMC_STATE_FLAGS_STOPPING): + case PMC_STATE(PMC_STATE_STATE_RUN, 0, PMC_STATE_FLAGS_STOPPING): + case PMC_STATE(PMC_STATE_STATE_RUN, 0, 0): + if (PMC_STATE_CONTEXT_COUNT(current_state) == 1) { + new_state = PMC_STATE_MOVE(current_state, PMC_STATE_STATE_STORE, -1, 0, 0); + } else { + new_state = PMC_STATE_MODIFY(current_state, -1, 0, 0); + } + break; + case PMC_STATE(PMC_STATE_STATE_STOP, 0, PMC_STATE_FLAGS_INTERRUPTING | PMC_STATE_FLAGS_DEALLOCING): + case PMC_STATE(PMC_STATE_STATE_STOP, 0, PMC_STATE_FLAGS_INTERRUPTING | PMC_STATE_FLAGS_STOPPING): + if (PMC_STATE_CONTEXT_COUNT(current_state) == 1) { + new_state = PMC_STATE_MOVE(current_state, PMC_STATE_STATE_INTERRUPT, -1, 0, PMC_STATE_FLAGS_INTERRUPTING); + } else { + new_state = PMC_STATE_MODIFY(current_state, -1, 0, 0); + } + break; + case PMC_STATE(PMC_STATE_STATE_STOP, 0, PMC_STATE_FLAGS_STOPPING): + if (PMC_STATE_CONTEXT_COUNT(current_state) == 1) { + new_state = PMC_STATE_MODIFY(current_state, -1, 0, PMC_STATE_FLAGS_STOPPING); + } else { + new_state = PMC_STATE_MODIFY(current_state, -1, 0, 0); + } + break; + case PMC_STATE(PMC_STATE_STATE_STOP, 0, 0): + if (PMC_STATE_CONTEXT_COUNT(current_state) > 0) { + if (PMC_STATE_CONTEXT_COUNT(current_state) == 1) { + new_state = PMC_STATE_MOVE(current_state, PMC_STATE_STATE_CAN_RUN, -1, 0, 0); + } else { + new_state = PMC_STATE_MODIFY(current_state, -1, 0, 0); + } + } + break; + case PMC_STATE(PMC_STATE_STATE_STORE, 0, PMC_STATE_FLAGS_DEALLOCING): + case PMC_STATE(PMC_STATE_STATE_STORE, 0, PMC_STATE_FLAGS_INTERRUPTING | PMC_STATE_FLAGS_DEALLOCING): + case PMC_STATE(PMC_STATE_STATE_STORE, 0, PMC_STATE_FLAGS_INTERRUPTING | PMC_STATE_FLAGS_STOPPING): + case PMC_STATE(PMC_STATE_STATE_STORE, 0, PMC_STATE_FLAGS_STOPPING): + case PMC_STATE(PMC_STATE_STATE_STORE, 0, 0): + if (PMC_STATE_CONTEXT_COUNT(current_state) > 0) { + new_state = PMC_STATE_MODIFY(current_state, -1, 0, 0); + } + break; + } + break; + case PMC_STATE_EVENT_LOAD_FINISHED: + switch (current_state & ~(PMC_STATE_CONTEXT_COUNT_MASK)) { + case PMC_STATE(PMC_STATE_STATE_LOAD, 0, PMC_STATE_FLAGS_DEALLOCING): + case PMC_STATE(PMC_STATE_STATE_LOAD, 0, PMC_STATE_FLAGS_INTERRUPTING | PMC_STATE_FLAGS_DEALLOCING): + case PMC_STATE(PMC_STATE_STATE_LOAD, 0, PMC_STATE_FLAGS_INTERRUPTING | PMC_STATE_FLAGS_STOPPING): + case PMC_STATE(PMC_STATE_STATE_LOAD, 0, PMC_STATE_FLAGS_STOPPING): + if (PMC_STATE_CONTEXT_COUNT(current_state) > 1) { + new_state = PMC_STATE_MOVE(current_state, PMC_STATE_STATE_RUN, -1, 0, 0); + } else { + new_state = PMC_STATE_MOVE(current_state, PMC_STATE_STATE_STORE, -1, 0, 0); + } + break; + case PMC_STATE(PMC_STATE_STATE_LOAD, 0, 0): + new_state = PMC_STATE_MOVE(current_state, PMC_STATE_STATE_RUN, 0, 0, 0); + break; + } + break; + case PMC_STATE_EVENT_STORE_FINISHED: + switch (current_state & ~(PMC_STATE_CONTEXT_COUNT_MASK)) { + case PMC_STATE(PMC_STATE_STATE_STORE, 0, PMC_STATE_FLAGS_DEALLOCING): + if (PMC_STATE_CONTEXT_COUNT(current_state) == 0) { + new_state = PMC_STATE_MOVE(current_state, PMC_STATE_STATE_DEALLOC, 0, 0, PMC_STATE_FLAGS_DEALLOCING); + } else { + new_state = PMC_STATE_MOVE(current_state, PMC_STATE_STATE_DEALLOC, 0, 0, 0); + } + break; + case PMC_STATE(PMC_STATE_STATE_STORE, 0, PMC_STATE_FLAGS_INTERRUPTING | PMC_STATE_FLAGS_DEALLOCING): + case PMC_STATE(PMC_STATE_STATE_STORE, 0, PMC_STATE_FLAGS_INTERRUPTING | PMC_STATE_FLAGS_STOPPING): + if (PMC_STATE_CONTEXT_COUNT(current_state) == 0) { + new_state = PMC_STATE_MOVE(current_state, PMC_STATE_STATE_INTERRUPT, 0, 0, PMC_STATE_FLAGS_INTERRUPTING); + } else { + new_state = PMC_STATE_MOVE(current_state, PMC_STATE_STATE_STOP, 0, 0, 0); + } + break; + case PMC_STATE(PMC_STATE_STATE_STORE, 0, PMC_STATE_FLAGS_STOPPING): + if (PMC_STATE_CONTEXT_COUNT(current_state) == 0) { + new_state = PMC_STATE_MOVE(current_state, PMC_STATE_STATE_STOP, 0, 0, PMC_STATE_FLAGS_STOPPING); + } else { + new_state = PMC_STATE_MOVE(current_state, PMC_STATE_STATE_STOP, 0, 0, 0); + } + break; + case PMC_STATE(PMC_STATE_STATE_STORE, 0, 0): + if (PMC_STATE_CONTEXT_COUNT(current_state) == 0) { + new_state = PMC_STATE_MOVE(current_state, PMC_STATE_STATE_CAN_RUN, 0, 0, 0); + } else { + new_state = PMC_STATE_MOVE(current_state, PMC_STATE_STATE_LOAD, 0, 0, 0); + } + break; + } + break; + } + + return new_state; +} + +static uint32_t pmc_internal_reservation_move_for_event(pmc_reservation_t reservation, pmc_state_event_t event, pmc_state_t *old_state_out) { + pmc_state_t oldState; + pmc_state_t newState; + + assert(reservation); + + /* Determine what state change, if any, we need to do. Keep trying until either we succeed doing a transition + * or the there is no valid move. + */ + do { + oldState = reservation->state; + newState = pmc_internal_reservation_next_state(oldState, event); + } while (newState != PMC_STATE_INVALID && !OSCompareAndSwap(oldState, newState, &(reservation->state))); + + if (newState != PMC_STATE_INVALID) { + COUNTER_DEBUG("Moved reservation %p from state "PMC_STATE_FORMAT" to state "PMC_STATE_FORMAT" for event %s\n", reservation, PMC_STATE_ARGS(oldState), PMC_STATE_ARGS(newState), pmc_state_event_name(event)); + } else { + COUNTER_DEBUG("No valid moves for reservation %p in state "PMC_STATE_FORMAT" for event %s\n", reservation, PMC_STATE_ARGS(oldState), pmc_state_event_name(event)); + } + + if (old_state_out != NULL) { + *old_state_out = oldState; + } + + return newState; +} + +static void pmc_internal_reservation_context_out(pmc_reservation_t reservation) { + assert(reservation); + pmc_state_t newState; + pmc_state_t oldState; + + /* Clear that the this reservation was active when this cpu did its last context in */ + OSBitAndAtomic(~(1U << cpu_number()), &(reservation->active_last_context_in)); + + /* Move the state machine */ + if (PMC_STATE_INVALID == (newState = pmc_internal_reservation_move_for_event(reservation, PMC_STATE_EVENT_CONTEXT_OUT, &oldState))) { + return; + } + + /* Do any actions required based on the state change */ + if (PMC_STATE_STATE(newState) == PMC_STATE_STATE_STORE && PMC_STATE_STATE(oldState) != PMC_STATE_STATE_STORE) { + /* Just moved into STORE, so store the reservation. */ + pmc_internal_reservation_store(reservation); + } else if (PMC_STATE_STATE(newState) == PMC_STATE_STATE_DEALLOC && PMC_STATE_CONTEXT_COUNT(newState) == 0 && PMC_STATE_FLAGS(newState) == 0) { + /* Wakeup any thread blocking for this reservation to hit */ + thread_wakeup((event_t)reservation); + } + +} + +static void pmc_internal_reservation_context_in(pmc_reservation_t reservation) { + assert(reservation); + pmc_state_t oldState; + pmc_state_t newState; + + /* Move the state machine */ + if (PMC_STATE_INVALID == (newState = pmc_internal_reservation_move_for_event(reservation, PMC_STATE_EVENT_CONTEXT_IN, &oldState))) { + return; + } + + /* Mark that the reservation was active when this cpu did its last context in */ + OSBitOrAtomic(1U << cpu_number(), &(reservation->active_last_context_in)); + + /* Do any actions required based on the state change */ + if (PMC_STATE_STATE(newState) == PMC_STATE_STATE_LOAD && PMC_STATE_STATE(oldState) != PMC_STATE_STATE_LOAD) { + /* Just moved into LOAD, so load the reservation. */ + pmc_internal_reservation_load(reservation); + } + +} + +static void pmc_internal_reservation_store(pmc_reservation_t reservation) { + assert(reservation); + assert(PMC_STATE_STATE(reservation->state) == PMC_STATE_STATE_STORE); + + assert(reservation->pmc); + assert(reservation->config); + + pmc_state_t newState; + kern_return_t ret = KERN_SUCCESS; + + pmc_t store_pmc = reservation->pmc; + pmc_object_t store_pmc_obj = store_pmc->object; + perf_monitor_t store_pm = store_pmc->monitor; + + /* + * Instruct the Perf Monitor that contains this counter to turn + * off the global disable for this counter. + */ + ret = store_pm->methods.disable_counters(store_pm->object, &store_pmc_obj, 1); + if(KERN_SUCCESS != ret) { + COUNTER_DEBUG(" [error] disable_counters: 0x%x\n", ret); + return; + } + + /* Instruct the counter to disable itself */ + ret = store_pmc->methods.disable(store_pmc_obj); + if(KERN_SUCCESS != ret) { + COUNTER_DEBUG(" [error] disable: 0x%x\n", ret); + } + + /* + * At this point, we're off the hardware, so we don't have to + * set_on_hardare(TRUE) if anything fails from here on. + */ + + /* store the counter value into the reservation's stored count */ + ret = store_pmc->methods.get_count(store_pmc_obj, &reservation->value); + if(KERN_SUCCESS != ret) { + COUNTER_DEBUG(" [error] get_count: 0x%x\n", ret); + return; + } + + /* Advance the state machine now that the STORE is finished */ + if (PMC_STATE_INVALID == (newState = pmc_internal_reservation_move_for_event(reservation, PMC_STATE_EVENT_STORE_FINISHED, NULL))) { + return; + } + + /* Do any actions required based on the state change */ + if (PMC_STATE_STATE(newState) == PMC_STATE_STATE_LOAD) { + /* Just moved into LOAD, so load the reservation. */ + pmc_internal_reservation_load(reservation); + } else if (PMC_STATE_STATE(newState) == PMC_STATE_STATE_DEALLOC && PMC_STATE_CONTEXT_COUNT(newState) == 0 && PMC_STATE_FLAGS(newState) == 0) { + /* Wakeup any thread blocking for this reservation to hit */ + thread_wakeup((event_t)reservation); + } + +} + +static void pmc_internal_reservation_load(pmc_reservation_t reservation) { + assert(reservation); + assert(PMC_STATE_STATE(reservation->state) == PMC_STATE_STATE_LOAD); + + pmc_state_t newState; + kern_return_t ret = KERN_SUCCESS; + + assert(reservation->pmc); + assert(reservation->config); + + pmc_t load_pmc = reservation->pmc; + pmc_object_t load_pmc_obj = load_pmc->object; + perf_monitor_t load_pm = load_pmc->monitor; + + /* Set the control register up with the stored configuration */ + ret = load_pmc->methods.set_config(load_pmc_obj, reservation->config->object); + if(KERN_SUCCESS != ret) { + COUNTER_DEBUG(" [error] set_config: 0x%x\n", ret); + return; + } + + /* load the counter value */ + ret = load_pmc->methods.set_count(load_pmc_obj, reservation->value); + if(KERN_SUCCESS != ret) { + COUNTER_DEBUG(" [error] set_count: 0x%x\n", ret); + return; + } + + /* Locally enable the counter */ + ret = load_pmc->methods.enable(load_pmc_obj); + if(KERN_SUCCESS != ret) { + COUNTER_DEBUG(" [error] enable: 0x%x\n", ret); + return; + } + + /* + * Instruct the Perf Monitor containing the pmc to enable the + * counter. + */ + ret = load_pm->methods.enable_counters(load_pm->object, &load_pmc_obj, 1); + if(KERN_SUCCESS != ret) { + COUNTER_DEBUG(" [error] enable_counters: 0x%x\n", ret); + /* not on the hardware. */ + return; + } + + /* Advance the state machine now that the STORE is finished */ + if (PMC_STATE_INVALID == (newState = pmc_internal_reservation_move_for_event(reservation, PMC_STATE_EVENT_LOAD_FINISHED, NULL))) { + return; + } + + /* Do any actions required based on the state change */ + if (PMC_STATE_STATE(newState) == PMC_STATE_STATE_STORE) { + /* Just moved into STORE, so store the reservation. */ + pmc_internal_reservation_store(reservation); + } + +} + +static void pmc_internal_reservation_start_cpu(void * arg) { + pmc_reservation_t reservation = (pmc_reservation_t)arg; + + assert(reservation); + + if (pmc_internal_reservation_matches_context(reservation)) { + /* We are in context, but the reservation may have already had the context_in method run. Attempt + * to set this cpu's bit in the active_last_context_in mask. If we set it, call context_in. + */ + uint32_t oldMask = OSBitOrAtomic(1U << cpu_number(), &(reservation->active_last_context_in)); + + if ((oldMask & (1U << cpu_number())) == 0) { + COUNTER_DEBUG("Starting already in-context reservation %p for cpu %d\n", reservation, cpu_number()); + + pmc_internal_reservation_context_in(reservation); + } + } +} + +static void pmc_internal_reservation_stop_cpu(void * arg) { + pmc_reservation_t reservation = (pmc_reservation_t)arg; + + assert(reservation); + + if (pmc_internal_reservation_matches_context(reservation)) { + COUNTER_DEBUG("Stopping in-context reservation %p for cpu %d\n", reservation, cpu_number()); + + pmc_internal_reservation_context_out(reservation); + } +} + +/*!fn + * pmc_reservation_interrupt is called when a PMC reservation which was setup + * with an interrupt threshold counts the requested number of events. When the + * underlying counter hits the threshold, an interrupt is generated, and this + * method is called. This method marks the reservation as stopped, and passes + * control off to the user-registered callback method, along with the + * reservation (so that the user can, for example, write a 0 to the counter, and + * restart the reservation). + * This method assumes the reservation has a valid pmc_config_t within. + * + * @param target The pmc_reservation_t that caused the interrupt. + * @param refCon User specified reference constant. + */ +static void pmc_reservation_interrupt(void *target, void *refCon) { + pmc_reservation_t reservation = (pmc_reservation_t)target; + pmc_state_t newState; + uint64_t timeout; + uint32_t spins; + + assert(reservation); + + /* Move the state machine */ + if (PMC_STATE_INVALID == pmc_internal_reservation_move_for_event(reservation, PMC_STATE_EVENT_INTERRUPT, NULL)) { + return; + } + + /* A valid state move has been made, but won't be picked up until a context switch occurs. To cause matching + * contexts that are currently running to update, we do an inter-processor message to run pmc_internal_reservation_stop_cpu + * on every cpu that can access the PMC. + */ + pmc_internal_reservation_broadcast(reservation, pmc_internal_reservation_stop_cpu); + + /* Spin waiting for the state to turn to INTERRUPT */ + nanoseconds_to_absolutetime(PMC_SPIN_TIMEOUT_US * 1000, &timeout); + timeout += mach_absolute_time(); + spins = 0; + while (PMC_STATE_STATE(reservation->state) != PMC_STATE_STATE_INTERRUPT) { + /* Assert if this takes longer than PMC_SPIN_TIMEOUT_US */ + if (++spins > PMC_SPIN_THRESHOLD) { + if (mach_absolute_time() > timeout) { + pmc_spin_timeout_count++; + assert(0); + } + } + + cpu_pause(); + } + + assert(reservation->config); + assert(reservation->config->method); + + /* Call the registered callback handler */ +#if DEBUG_COUNTERS + uint64_t start = mach_absolute_time(); +#endif /* DEBUG */ + + (void)reservation->config->method(reservation, refCon); + +#if DEBUG_COUNTERS + uint64_t end = mach_absolute_time(); + if((end - start) > 5000ULL) { + kprintf("%s - user method %p took %llu ns\n", __FUNCTION__, + reservation->config->method, (end - start)); + } +#endif + + /* Move the state machine */ + if (PMC_STATE_INVALID == (newState = pmc_internal_reservation_move_for_event(reservation, PMC_STATE_EVENT_END_OF_INTERRUPT, NULL))) { + return; + } + + /* Do any post-move actions necessary */ + if (PMC_STATE_STATE(newState) == PMC_STATE_STATE_CAN_RUN) { + pmc_internal_reservation_broadcast(reservation, pmc_internal_reservation_start_cpu); + } else if (PMC_STATE_STATE(newState) == PMC_STATE_STATE_DEALLOC && PMC_STATE_CONTEXT_COUNT(newState) == 0 && PMC_STATE_FLAGS(newState) == 0) { + /* Wakeup any thread blocking for this reservation to hit */ + thread_wakeup((event_t)reservation); + } +} + +/* + * Apple-private KPI for Apple kext's (IOProfileFamily) only + */ + +#if 0 +#pragma mark - +#pragma mark IOProfileFamily private KPI +#endif + +/* + * perf_monitor_register registers a new Performance Monitor, and its associated + * callback methods. The given perf_monitor_object_t is the first argument to + * each callback when they are called. + */ +kern_return_t perf_monitor_register(perf_monitor_object_t monitor, + perf_monitor_methods_t *methods) { + + COUNTER_DEBUG("registering perf monitor %p\n", monitor); + + if(!monitor || !methods) { + return KERN_INVALID_ARGUMENT; + } + + /* Protect against out-of-date driver kexts */ + if(MACH_PERFMON_METHODS_VERSION != methods->perf_monitor_methods_version) { + return KERN_INVALID_ARGUMENT; + } + + /* All methods are required */ + if(!methods->supports_context_switching || !methods->enable_counters || + !methods->disable_counters) { + return KERN_INVALID_ARGUMENT; + } + + /* prevent dupes. */ + perf_monitor_t dupe = perf_monitor_find(monitor); + if(dupe) { + COUNTER_DEBUG("Duplicate registration for %p\n", monitor); + perf_monitor_deallocate(dupe); + return KERN_FAILURE; + } + + perf_monitor_t pm = perf_monitor_alloc(); + if(!pm) { + return KERN_RESOURCE_SHORTAGE; + } + + /* initialize the object */ + perf_monitor_init(pm); + + /* copy in the registration info */ + pm->object = monitor; + memcpy(&(pm->methods), methods, sizeof(perf_monitor_methods_t)); + + /* place it in the tracking queue */ + perf_monitor_enqueue(pm); + + /* debug it */ + PRINT_PERF_MON(pm); + + return KERN_SUCCESS; +} + +/* + * perf_monitor_unregister unregisters a previously registered Perf Monitor, + * looking it up by reference pointer (the same that was used in + * perf_monitor_register()). + */ +kern_return_t perf_monitor_unregister(perf_monitor_object_t monitor) { + kern_return_t ret = KERN_FAILURE; + + COUNTER_DEBUG("unregistering perf monitor %p\n", monitor); + + if(!monitor) { + return KERN_INVALID_ARGUMENT; + } + + perf_monitor_t pm = perf_monitor_find(monitor); + if(pm) { + /* Remove it from the queue. */ + perf_monitor_dequeue(pm); + + /* drop extra retain from find */ + perf_monitor_deallocate(pm); + + /* and release the object */ + perf_monitor_deallocate(pm); + + ret = KERN_SUCCESS; + } else { + COUNTER_DEBUG("could not find a registered pm that matches!\n"); + } + + return ret; +} + +/* + * pmc_register registers a new PMC for use with the pmc subsystem. Each PMC is + * associated with a Perf Monitor. Perf Monitors are looked up by the reference + * pointer that was used to previously register them. + * + * PMCs are registered with a reference pointer (@pmc_object), and a set of + * callback methods. When the given callback methods are called from xnu, the + * first argument will always be the reference pointer used to register the PMC. + * + * NOTE: @monitor must have been successfully registered via + * perf_monitor_register before this method will succeed. + */ +kern_return_t pmc_register(perf_monitor_object_t monitor, pmc_object_t pmc_object, + pmc_methods_t *methods, void *object) { + + COUNTER_DEBUG("%p %p\n", monitor, pmc_object); + + if(!monitor || !pmc_object || !methods || !object) { + return KERN_INVALID_ARGUMENT; + } + + /* Prevent version mismatches */ + if(MACH_PMC_METHODS_VERSION != methods->pmc_methods_version) { + COUNTER_DEBUG("version mismatch\n"); + return KERN_INVALID_ARGUMENT; + } + + /* All methods are required. */ + if(!methods->create_config || + !methods->free_config || + !methods->config_set_value || + !methods->config_set_threshold || + !methods->config_set_handler || + !methods->set_config || + !methods->get_monitor || + !methods->get_name || + !methods->accessible_from_core || + !methods->accessible_cores || + !methods->get_count || + !methods->set_count || + !methods->disable || + !methods->enable || + !methods->open || + !methods->close) { + return KERN_INVALID_ARGUMENT; + } + + /* make sure this perf monitor object is already registered */ + /* + * NOTE: this adds a reference to the parent, so we'll have to drop it in + * any failure code paths from here on out. + */ + perf_monitor_t pm = perf_monitor_find(monitor); + if(!pm) { + COUNTER_DEBUG("Could not find perf monitor for %p\n", monitor); + return KERN_INVALID_ARGUMENT; + } + + /* make a new pmc */ + pmc_t pmc = pmc_alloc(); + if(!pmc) { + /* drop the extra reference from perf_monitor_find() */ + perf_monitor_deallocate(pm); + return KERN_RESOURCE_SHORTAGE; + } + + /* init it */ + pmc_init(pmc); + + pmc->object = pmc_object; + pmc->open_object = object; + + /* copy the callbacks in */ + memcpy(&(pmc->methods), methods, sizeof(pmc_methods_t)); + + pmc->monitor = pm; + + perf_monitor_add_pmc(pmc->monitor, pmc); + + /* enqueue it in our tracking queue */ + pmc_enqueue(pmc); + + /* drop extra reference from perf_monitor_find() */ + perf_monitor_deallocate(pm); + + return KERN_SUCCESS; +} + +/* + * pmc_unregister unregisters a previously registered PMC, looking it up by + * reference point to *both* the Perf Monitor it was created with, and the PMC's + * reference pointer itself. + */ +kern_return_t pmc_unregister(perf_monitor_object_t monitor, pmc_object_t pmc_object) { + COUNTER_DEBUG("%p %p\n", monitor, pmc_object); + + if(!monitor || !pmc_object) { + return KERN_INVALID_ARGUMENT; + } + + pmc_t pmc = pmc_find(pmc_object); + if(!pmc) { + COUNTER_DEBUG("Could not find a matching pmc.\n"); + return KERN_FAILURE; + } + + /* remove it from the global queue */ + pmc_dequeue(pmc); + + perf_monitor_remove_pmc(pmc->monitor, pmc); + + /* remove extra reference count from pmc_find() */ + pmc_deallocate(pmc); + + /* dealloc the pmc */ + pmc_deallocate(pmc); + + return KERN_SUCCESS; +} + +#if 0 +#pragma mark - +#pragma mark KPI +#endif + +/* + * Begin in-kernel and in-kext KPI methods + */ + +/* + * pmc_create_config creates a new configuration area from a given @pmc. + * + * NOTE: This method is not interrupt safe. + */ +kern_return_t pmc_create_config(pmc_t pmc, pmc_config_t *config) { + pmc_config_t tmp = NULL; + + if(!pmc || !config) { + return KERN_INVALID_ARGUMENT; + } + + pmc_reference(pmc); + + tmp = pmc_config_alloc(pmc); + if(tmp) { + tmp->object = pmc->methods.create_config(pmc->object); + + if(!tmp->object) { + pmc_config_free(pmc, tmp); + tmp = NULL; + } else { + tmp->interrupt_after_value = 0ULL; + tmp->method = NULL; + tmp->refCon = NULL; + } + } + + pmc_deallocate(pmc); + + if(!tmp) { + return KERN_RESOURCE_SHORTAGE; + } + + *config = tmp; + + return KERN_SUCCESS; +} + +/* + * pmc_free_config frees a configuration area created from a given @pmc + * + * NOTE: This method is not interrupt safe. + */ +void pmc_free_config(pmc_t pmc, pmc_config_t config) { + assert(pmc); + assert(config); + + pmc_reference(pmc); + + pmc_config_free(pmc, config); + + pmc_deallocate(pmc); +} + +/* + * pmc_config_set_value sets up configuration area key-value pairs. These pairs + * are to be either pre-known, or looked up via CoreProfile.framework. + * + * NOTE: This method is not interrupt safe. + */ +kern_return_t pmc_config_set_value(pmc_t pmc, pmc_config_t config, + uint8_t id, uint64_t value) { + + kern_return_t ret = KERN_INVALID_ARGUMENT; + + if(!pmc || !config) { + return ret; + } + + pmc_reference(pmc); + + ret = pmc->methods.config_set_value(config->object, id, value); + + pmc_deallocate(pmc); + + return ret; +} + +/* + * pmc_config_set_interrupt_threshold modifies a config object, instructing + * the pmc that it should generate a call to the given pmc_interrupt_method_t + * after the counter counts @threshold events. + * + * PMC Threshold handler methods will have the pmc_reservation_t that generated the interrupt + * as the first argument when the interrupt handler is invoked, and the given + * @refCon (which may be NULL) as the second. + * + * See pmc_interrupt_method_t. + * + * NOTE: This method is not interrupt safe. + */ +kern_return_t pmc_config_set_interrupt_threshold(pmc_t pmc, pmc_config_t config, + uint64_t threshold, pmc_interrupt_method_t method, void *refCon) { + kern_return_t ret = KERN_INVALID_ARGUMENT; + + if(!config || !pmc) { + return ret; + } + + assert(config); + assert(pmc); + + pmc_reference(pmc); + + do { + /* + * We have a minor annoyance to side-step here. The driver layer expects + * the config to never change once a reservation has been taken out with + * it. However, in order to have the PMI method have the reservation as + * the first argument (in order to allow the user-method to, for + * example, write a 0 to it, and restart it), we need to create the + * pmc_reservation_t before setting it up in the config object. + * We overcome this by caching the method in the pmc_config_t stand-in, + * and mutating the pmc_config_object_t just before returning a + * reservation (in pmc_reserve() and friends, below). + */ + + /* might as well stash this away too. */ + config->interrupt_after_value = threshold; + config->method = method; + config->refCon = refCon; + + ret = KERN_SUCCESS; + + }while(0); + + pmc_deallocate(pmc); + + return ret; +} + +/* + * pmc_get_pmc_list returns an allocated list of pmc_t's, as well as the number + * of pmc_t's returned. Callers should free this list with a call to + * pmc_free_pmc_list(). + * + * NOTE: This method is not interrupt safe. + */ +kern_return_t pmc_get_pmc_list(pmc_t **pmcs, size_t *pmcCount) { + pmc_t *array = NULL; + pmc_t pmc = NULL; + size_t count = 0UL; + + do { + /* Copy down (to the stack) the count of perf counters */ + vm_size_t size = perf_counters_count; + + /* Allocate that sized chunk */ + array = (pmc_t *)kalloc(sizeof(pmc_t) * size); + if(!array) { + return KERN_RESOURCE_SHORTAGE; + } + + /* Take the spin lock */ + lck_spin_lock(&perf_counters_queue_spin); + + /* verify the size didn't change while we were allocating */ + if(size != perf_counters_count) { + /* + * queue size has changed between alloc and now - go back and + * make another pass. + */ + + /* drop the lock */ + lck_spin_unlock(&perf_counters_queue_spin); + + /* free the block */ + kfree(array, sizeof(pmc_t) * size); + array = NULL; + } + + /* if we get here, and array is NULL, we try again. */ + }while(!array); + + /* copy the bits out */ + queue_iterate(perf_counters_queue, pmc, pmc_t, link) { + if(pmc) { + /* copy out the pointer */ + array[count++] = pmc; + } + } + + lck_spin_unlock(&perf_counters_queue_spin); + + /* return the list and the size */ + *pmcs = array; + *pmcCount = count; + + return KERN_SUCCESS; +} + +/* + * pmc_free_pmc_list frees an array of pmc_t that has been returned from + * pmc_get_pmc_list. + * + * NOTE: This method is not interrupt safe. + */ +void pmc_free_pmc_list(pmc_t *pmcs, size_t pmcCount) { + if(pmcs && pmcCount) { + COUNTER_DEBUG("pmcs: %p pmcCount: %lu\n", pmcs, pmcCount); + + kfree(pmcs, pmcCount * sizeof(pmc_t)); + } +} + +kern_return_t pmc_find_by_name(const char *name, pmc_t **pmcs, size_t *pmcCount) { + kern_return_t ret = KERN_INVALID_ARGUMENT; + + if(!name || !pmcs || !pmcCount) { + return ret; + } + + pmc_t *list = NULL; + size_t count = 0UL; + + if(KERN_SUCCESS == (ret = pmc_get_pmc_list(&list, &count))) { + size_t matchCount = 0UL, ii = 0UL, swapPtr = 0UL; + size_t len = strlen(name); + + for(ii = 0UL; ii < count; ii++) { + const char *pmcName = pmc_get_name(list[ii]); + + if(strlen(pmcName) < len) { + /* + * If the pmc name is shorter than the requested match, it's no + * match, as we're looking for the most specific match(es). + */ + continue; + } + + if(0 == strncmp(name, pmcName, len)) { + pmc_t temp = list[ii]; + + // move matches to the head of the array. + list[ii] = list[swapPtr]; + list[swapPtr] = temp; + swapPtr++; + + // keep a count of the matches + matchCount++; + } + } + + if(matchCount) { + /* + * If we have matches, they are all at the head of the array, so + * just allocate enough space for @matchCount pmc_t's, and copy the + * head of the array to the new allocation. Then free the old + * allocation. + */ + + pmc_t *result = (pmc_t *)kalloc(sizeof(pmc_t) * matchCount); + if(result) { + // copy the matches + memcpy(result, list, sizeof(pmc_t) * matchCount); + + ret = KERN_SUCCESS; + } + + pmc_free_pmc_list(list, count); + + if(!result) { + *pmcs = NULL; + *pmcCount = 0UL; + return KERN_RESOURCE_SHORTAGE; + } + + *pmcs = result; + *pmcCount = matchCount; + } else { + *pmcs = NULL; + *pmcCount = 0UL; + } + } + + return ret; +} + +/* + * pmc_get_name returns a pointer (not copied) to the human-readable name of the + * given pmc. + * + * NOTE: Driver authors must take care to not allocate during this method, as + * this method *IS* interrupt safe. + */ +const char *pmc_get_name(pmc_t pmc) { + assert(pmc); + + const char *name = pmc->methods.get_name(pmc->object); + + return name; +} + +/* + * pmc_get_accessible_core_list returns a pointer to an array of logical core + * numbers (as well as the size of that array) that represent the local cores + * (hardware threads) from which the given @pmc can be accessed directly. + * + * NOTE: This method is interrupt safe. + */ +kern_return_t pmc_get_accessible_core_list(pmc_t pmc, uint32_t **logicalCores, + size_t *logicalCoreCt) { + + kern_return_t ret = KERN_INVALID_ARGUMENT; + + if(!pmc || !logicalCores || !logicalCoreCt) { + return ret; + } + + ret = pmc->methods.accessible_cores(pmc->object, logicalCores, logicalCoreCt); + + return ret; +} + +/* + * pmc_accessible_from_core will return TRUE if the given @pmc is directly + * (e.g., hardware) readable from the given logical core. + * + * NOTE: This method is interrupt safe. + */ +boolean_t pmc_accessible_from_core(pmc_t pmc, uint32_t logicalCore) { + boolean_t ret = FALSE; + + assert(pmc); + + ret = pmc->methods.accessible_from_core(pmc->object, logicalCore); + + return ret; +} + +static boolean_t pmc_reservation_setup_pmi(pmc_reservation_t resv, pmc_config_t config) { + assert(resv); + assert(resv->pmc); + assert(config); + assert(config->object); + + /* If there's no PMI to setup, return success */ + if(config->interrupt_after_value && config->method) { + + /* set the threshold */ + kern_return_t ret = resv->pmc->methods.config_set_threshold(config->object, + config->interrupt_after_value); + + if(KERN_SUCCESS != ret) { + /* + * This is the most useful error message here, as this only happens + * as a result of pmc_reserve*() + */ + COUNTER_DEBUG("Failed to set threshold for pmc %p\n", resv->pmc); + return FALSE; + } + + if(KERN_SUCCESS != resv->pmc->methods.config_set_handler(config->object, + (void *)resv, &pmc_reservation_interrupt, config->refCon)) { + + COUNTER_DEBUG("Failed to set handler for pmc %p\n", resv->pmc); + return FALSE; + } + } + + return TRUE; +} + +/* + * pmc_reserve will attempt to reserve the given @pmc, with a given + * configuration object, for counting system-wide. This method will fail with + * KERN_FAILURE if the given pmc is already reserved at any scope. + * + * This method consumes the given configuration object if it returns + * KERN_SUCCESS. Any other return value indicates the caller + * must free the config object via pmc_free_config(). + * + * NOTE: This method is NOT interrupt safe. + */ +kern_return_t pmc_reserve(pmc_t pmc, pmc_config_t config, + pmc_reservation_t *reservation) { + + if(!pmc || !config || !reservation) { + return KERN_INVALID_ARGUMENT; + } + + pmc_reservation_t resv = reservation_alloc(); + if(!resv) { + return KERN_RESOURCE_SHORTAGE; + } + + reservation_init(resv); + + resv->flags |= PMC_FLAG_SCOPE_SYSTEM; + resv->config = config; + + if(KERN_SUCCESS != pmc_internal_reservation_set_pmc(resv, pmc)) { + resv->config = NULL; + return KERN_FAILURE; + } + + /* enqueue reservation in proper place */ + if(!pmc_internal_reservation_add(resv) || !pmc_reservation_setup_pmi(resv, config)) { + /* Prevent free of config object */ + resv->config = NULL; + + reservation_free(resv); + return KERN_FAILURE; + } + + /* Here's where we setup the PMI method (if needed) */ + + *reservation = resv; + + return KERN_SUCCESS; +} + +/* + * pmc_reserve_task will attempt to reserve the given @pmc with a given + * configuration object, for counting when the given @task is running on any + * logical core that can directly access the given @pmc. This method will fail + * with KERN_FAILURE if the given pmc is already reserved at either system or + * thread scope. + * + * This method consumes the given configuration object if it returns + * KERN_SUCCESS. Any other return value indicates the caller + * must free the config object via pmc_free_config(). + * + * NOTE: You can reserve the same pmc for N different tasks concurrently. + * NOTE: This method is NOT interrupt safe. + */ +kern_return_t pmc_reserve_task(pmc_t pmc, pmc_config_t config, + task_t task, pmc_reservation_t *reservation) { + + if(!pmc || !config || !reservation || !task) { + return KERN_INVALID_ARGUMENT; + } + + if(!pmc->monitor->methods.supports_context_switching(pmc->monitor->object)) { + COUNTER_DEBUG("pmc %p cannot be context switched!\n", pmc); + return KERN_INVALID_ARGUMENT; + } + + pmc_reservation_t resv = reservation_alloc(); + if(!resv) { + return KERN_RESOURCE_SHORTAGE; + } + + reservation_init(resv); + + resv->flags |= PMC_FLAG_SCOPE_TASK; + resv->task = task; + + resv->config = config; + + if(KERN_SUCCESS != pmc_internal_reservation_set_pmc(resv, pmc)) { + resv->config = NULL; + return KERN_FAILURE; + } + + /* enqueue reservation in proper place */ + if(!pmc_internal_reservation_add(resv) || !pmc_reservation_setup_pmi(resv, config)) { + /* Prevent free of config object */ + resv->config = NULL; + + reservation_free(resv); + return KERN_FAILURE; + } + + *reservation = resv; + + return KERN_SUCCESS; +} + +/* + * pmc_reserve_thread will attempt to reserve the given @pmc with a given + * configuration object, for counting when the given @thread is running on any + * logical core that can directly access the given @pmc. This method will fail + * with KERN_FAILURE if the given pmc is already reserved at either system or + * task scope. + * + * This method consumes the given configuration object if it returns + * KERN_SUCCESS. Any other return value indicates the caller + * must free the config object via pmc_free_config(). + * + * NOTE: You can reserve the same pmc for N different threads concurrently. + * NOTE: This method is NOT interrupt safe. + */ +kern_return_t pmc_reserve_thread(pmc_t pmc, pmc_config_t config, + thread_t thread, pmc_reservation_t *reservation) { + if(!pmc || !config || !reservation || !thread) { + return KERN_INVALID_ARGUMENT; + } + + if(!pmc->monitor->methods.supports_context_switching(pmc->monitor->object)) { + COUNTER_DEBUG("pmc %p cannot be context switched!\n", pmc); + return KERN_INVALID_ARGUMENT; + } + + pmc_reservation_t resv = reservation_alloc(); + if(!resv) { + return KERN_RESOURCE_SHORTAGE; + } + + reservation_init(resv); + + resv->flags |= PMC_FLAG_SCOPE_THREAD; + resv->thread = thread; + + resv->config = config; + + if(KERN_SUCCESS != pmc_internal_reservation_set_pmc(resv, pmc)) { + resv->config = NULL; + return KERN_FAILURE; + } + + /* enqueue reservation in proper place */ + if(!pmc_internal_reservation_add(resv) || !pmc_reservation_setup_pmi(resv, config)) { + /* Prevent free of config object */ + resv->config = NULL; + + reservation_free(resv); + return KERN_FAILURE; + } + + *reservation = resv; + + return KERN_SUCCESS; +} + +/* + * pmc_reservation_start instructs the given reservation to start counting as + * soon as possible. + * + * NOTE: This method is interrupt safe. + */ +kern_return_t pmc_reservation_start(pmc_reservation_t reservation) { + pmc_state_t newState; + + if(!reservation) { + return KERN_INVALID_ARGUMENT; + } + + /* Move the state machine */ + if (PMC_STATE_INVALID == (newState = pmc_internal_reservation_move_for_event(reservation, PMC_STATE_EVENT_START, NULL))) { + return KERN_FAILURE; + } + + /* If we are currently in an interrupt, don't bother to broadcast since it won't do anything now and the interrupt will + * broadcast right before it leaves + */ + if (PMC_STATE_STATE(newState) != PMC_STATE_STATE_INTERRUPT) { + /* A valid state move has been made, but won't be picked up until a context switch occurs. To cause matching + * contexts that are currently running to update, we do an inter-processor message to run pmc_internal_reservation_start_cpu + * on every cpu that can access the PMC. + */ + pmc_internal_reservation_broadcast(reservation, pmc_internal_reservation_start_cpu); + } + + return KERN_SUCCESS; +} + +/* + * pmc_reservation_stop instructs the given reservation to stop counting as + * soon as possible. When this method returns, the pmc will be marked as stopping + * and subsequent calls to pmc_reservation_start will succeed. This does not mean + * that the pmc hardware has _actually_ stopped running. Assuming no other changes + * to the reservation state, the pmc hardware _will_ stop shortly. + * + */ +kern_return_t pmc_reservation_stop(pmc_reservation_t reservation) { + pmc_state_t newState; + + if(!reservation) { + return KERN_INVALID_ARGUMENT; + } + + /* Move the state machine */ + if (PMC_STATE_INVALID == (newState = pmc_internal_reservation_move_for_event(reservation, PMC_STATE_EVENT_STOP, NULL))) { + return KERN_FAILURE; + } + + /* If we are currently in an interrupt, don't bother to broadcast since it won't do anything now and the interrupt will + * broadcast right before it leaves. Similarly, if we just moved directly to STOP, don't bother broadcasting. + */ + if (PMC_STATE_STATE(newState) != PMC_STATE_STATE_INTERRUPT && PMC_STATE_STATE(newState) != PMC_STATE_STATE_STOP) { + /* A valid state move has been made, but won't be picked up until a context switch occurs. To cause matching + * contexts that are currently running to update, we do an inter-processor message to run pmc_internal_reservation_stop_cpu + * on every cpu that can access the PMC. + */ + + pmc_internal_reservation_broadcast(reservation, pmc_internal_reservation_stop_cpu); + } + + return KERN_SUCCESS; +} + +/* + * pmc_reservation_read will read the event count associated with a reservation. + * If the caller is current executing in a context that both a) matches the + * reservation's context, and b) can access the reservation's pmc directly, the + * value will be read from hardware. Otherwise, this returns the reservation's + * stored value. + * + * NOTE: This method is interrupt safe. + * NOTE: When not on the interrupt stack, this method may block. + */ +kern_return_t pmc_reservation_read(pmc_reservation_t reservation, uint64_t *value) { + kern_return_t ret = KERN_FAILURE; + uint64_t timeout; + uint32_t spins; + + if(!reservation || !value) { + return KERN_INVALID_ARGUMENT; + } + + nanoseconds_to_absolutetime(PMC_SPIN_TIMEOUT_US * 1000, &timeout); + timeout += mach_absolute_time(); + spins = 0; + do { + uint32_t state = reservation->state; + + if((PMC_STATE_STATE(state) == PMC_STATE_STATE_RUN)) { + /* Attempt read from hardware via drivers. */ + + assert(reservation->pmc); + + ret = reservation->pmc->methods.get_count(reservation->pmc->object, value); + + break; + } else if ((PMC_STATE_STATE(state) == PMC_STATE_STATE_STORE) || + (PMC_STATE_STATE(state) == PMC_STATE_STATE_LOAD)) { + /* Spin */ + /* Assert if this takes longer than PMC_SPIN_TIMEOUT_US */ + if (++spins > PMC_SPIN_THRESHOLD) { + if (mach_absolute_time() > timeout) { + pmc_spin_timeout_count++; + assert(0); + } + } + + cpu_pause(); + } else { + break; + } + } while (1); + + /* If the direct hardware read failed (for whatever reason) */ + if(KERN_SUCCESS != ret) { + /* Read stored value */ + *value = reservation->value; + } + + return KERN_SUCCESS; +} + +/* + * pmc_reservation_write will write the event count associated with a reservation. + * If the caller is current executing in a context that both a) matches the + * reservation's context, and b) can access the reservation's pmc directly, the + * value will be written to hardware. Otherwise, this writes the reservation's + * stored value. + * + * NOTE: This method is interrupt safe. + * NOTE: When not on the interrupt stack, this method may block. + */ +kern_return_t pmc_reservation_write(pmc_reservation_t reservation, uint64_t value) { + kern_return_t ret = KERN_FAILURE; + uint64_t timeout; + uint32_t spins; + + if(!reservation) { + return KERN_INVALID_ARGUMENT; + } + + nanoseconds_to_absolutetime(PMC_SPIN_TIMEOUT_US * 1000, &timeout); + timeout += mach_absolute_time(); + spins = 0; + do { + uint32_t state = reservation->state; + + if((PMC_STATE_STATE(state) == PMC_STATE_STATE_RUN)) { + /* Write to hardware via drivers. */ + assert(reservation->pmc); + + ret = reservation->pmc->methods.set_count(reservation->pmc->object, value); + break; + } else if ((PMC_STATE_STATE(state) == PMC_STATE_STATE_STORE) || + (PMC_STATE_STATE(state) == PMC_STATE_STATE_LOAD)) { + /* Spin */ + /* Assert if this takes longer than PMC_SPIN_TIMEOUT_US */ + if (++spins > PMC_SPIN_THRESHOLD) { + if (mach_absolute_time() > timeout) { + pmc_spin_timeout_count++; + assert(0); + } + } + + cpu_pause(); + } else { + break; + } + } while (1); + + if(KERN_SUCCESS != ret) { + /* Write stored value */ + reservation->value = value; + } + + return KERN_SUCCESS; +} + +/* + * pmc_reservation_free releases a reservation and all associated resources. + * + * NOTE: This method is NOT interrupt safe. + */ +kern_return_t pmc_reservation_free(pmc_reservation_t reservation) { + pmc_state_t newState; + + if(!reservation) { + return KERN_INVALID_ARGUMENT; + } + + /* Move the state machine */ + if (PMC_STATE_INVALID == (newState = pmc_internal_reservation_move_for_event(reservation, PMC_STATE_EVENT_FREE, NULL))) { + return KERN_FAILURE; + } + + /* If we didn't move directly to DEALLOC, help things along */ + if (PMC_STATE_STATE(newState) != PMC_STATE_STATE_DEALLOC) { + /* A valid state move has been made, but won't be picked up until a context switch occurs. To cause matching + * contexts that are currently running to update, we do an inter-processor message to run pmc_internal_reservation_stop_cpu + * on every cpu that can access the PMC. + */ + pmc_internal_reservation_broadcast(reservation, pmc_internal_reservation_stop_cpu); + } + + /* Block until the reservation hits the state */ + while (!(PMC_STATE_STATE(reservation->state) == PMC_STATE_STATE_DEALLOC && PMC_STATE_CONTEXT_COUNT(reservation->state) == 0 && PMC_STATE_FLAGS(reservation->state) == 0)) { + assert_wait((event_t)reservation, THREAD_UNINT); + thread_block(THREAD_CONTINUE_NULL); + } + + /* remove from queues */ + pmc_internal_reservation_remove(reservation); + + /* free reservation */ + reservation_free(reservation); + + return KERN_SUCCESS; +} + +/* + * pmc_context_switch performs all context switching necessary to save all pmc + * state associated with @oldThread (and the task to which @oldThread belongs), + * as well as to restore all pmc state associated with @newThread (and the task + * to which @newThread belongs). + * + * NOTE: This method IS interrupt safe. + */ +boolean_t pmc_context_switch(thread_t oldThread, thread_t newThread) { + pmc_reservation_t resv = NULL; + uint32_t cpuNum = cpu_number(); + + /* Out going thread: save pmc state */ + lck_spin_lock(&reservations_spin); + + /* interate over any reservations */ + queue_iterate(thread_reservations, resv, pmc_reservation_t, link) { + if(resv && oldThread == resv->thread) { + + /* check if we can read the associated pmc from this core. */ + if(pmc_accessible_from_core(resv->pmc, cpuNum)) { + /* save the state At this point, if it fails, it fails. */ + (void)pmc_internal_reservation_context_out(resv); + } + } + } + + queue_iterate(task_reservations, resv, pmc_reservation_t, link) { + if(resv && resv->task == oldThread->task) { + if(pmc_accessible_from_core(resv->pmc, cpuNum)) { + (void)pmc_internal_reservation_context_out(resv); + } + } + } + + /* Incoming task: restore */ + + queue_iterate(thread_reservations, resv, pmc_reservation_t, link) { + if(resv && resv->thread == newThread) { + if(pmc_accessible_from_core(resv->pmc, cpuNum)) { + (void)pmc_internal_reservation_context_in(resv); + } + } + } + + + queue_iterate(task_reservations, resv, pmc_reservation_t, link) { + if(resv && resv->task == newThread->task) { + if(pmc_accessible_from_core(resv->pmc, cpuNum)) { + (void)pmc_internal_reservation_context_in(resv); + } + } + } + + lck_spin_unlock(&reservations_spin); + + return TRUE; +} + +#else /* !CONFIG_COUNTERS */ + +#if 0 +#pragma mark - +#pragma mark Dummy functions +#endif + +/* + * In the case that someone has chosen not to include the PMC KPI in some + * configuration, we still have exports for kexts, so we'll need to define stub + * methods that return failures. + */ +kern_return_t perf_monitor_register(perf_monitor_object_t monitor __unused, + perf_monitor_methods_t *methods __unused) { + return KERN_FAILURE; +} + +kern_return_t perf_monitor_unregister(perf_monitor_object_t monitor __unused) { + return KERN_FAILURE; +} + +kern_return_t pmc_register(perf_monitor_object_t monitor __unused, + pmc_object_t pmc __unused, pmc_methods_t *methods __unused, void *object __unused) { + return KERN_FAILURE; +} + +kern_return_t pmc_unregister(perf_monitor_object_t monitor __unused, + pmc_object_t pmc __unused) { + return KERN_FAILURE; +} + +kern_return_t pmc_create_config(pmc_t pmc __unused, + pmc_config_t *config __unused) { + return KERN_FAILURE; +} + +void pmc_free_config(pmc_t pmc __unused, pmc_config_t config __unused) { +} + +kern_return_t pmc_config_set_value(pmc_t pmc __unused, + pmc_config_t config __unused, uint8_t id __unused, + uint64_t value __unused) { + return KERN_FAILURE; +} + +kern_return_t pmc_config_set_interrupt_threshold(pmc_t pmc __unused, + pmc_config_t config __unused, uint64_t threshold __unused, + pmc_interrupt_method_t method __unused, void *refCon __unused) { + return KERN_FAILURE; +} + +kern_return_t pmc_get_pmc_list(pmc_t **pmcs __unused, size_t *pmcCount __unused) { + return KERN_FAILURE; +} + +void pmc_free_pmc_list(pmc_t *pmcs __unused, size_t pmcCount __unused) { +} + +kern_return_t pmc_find_by_name(const char *name __unused, pmc_t **pmcs __unused, + size_t *pmcCount __unused) { + return KERN_FAILURE; +} + +const char *pmc_get_name(pmc_t pmc __unused) { + return ""; +} + +kern_return_t pmc_get_accessible_core_list(pmc_t pmc __unused, + uint32_t **logicalCores __unused, size_t *logicalCoreCt __unused) { + return KERN_FAILURE; +} + +boolean_t pmc_accessible_from_core(pmc_t pmc __unused, + uint32_t logicalCore __unused) { + return FALSE; +} + +kern_return_t pmc_reserve(pmc_t pmc __unused, + pmc_config_t config __unused, pmc_reservation_t *reservation __unused) { + return KERN_FAILURE; +} + +kern_return_t pmc_reserve_task(pmc_t pmc __unused, + pmc_config_t config __unused, task_t task __unused, + pmc_reservation_t *reservation __unused) { + return KERN_FAILURE; +} + +kern_return_t pmc_reserve_thread(pmc_t pmc __unused, + pmc_config_t config __unused, thread_t thread __unused, + pmc_reservation_t *reservation __unused) { + return KERN_FAILURE; +} + +kern_return_t pmc_reservation_start(pmc_reservation_t reservation __unused) { + return KERN_FAILURE; +} + +kern_return_t pmc_reservation_stop(pmc_reservation_t reservation __unused) { + return KERN_FAILURE; +} + +kern_return_t pmc_reservation_read(pmc_reservation_t reservation __unused, + uint64_t *value __unused) { + return KERN_FAILURE; +} + +kern_return_t pmc_reservation_write(pmc_reservation_t reservation __unused, + uint64_t value __unused) { + return KERN_FAILURE; +} + +kern_return_t pmc_reservation_free(pmc_reservation_t reservation __unused) { + return KERN_FAILURE; +} + + +#endif /* !CONFIG_COUNTERS */ diff --git a/osfmk/pmc/pmc.h b/osfmk/pmc/pmc.h new file mode 100644 index 000000000..ab396a9c6 --- /dev/null +++ b/osfmk/pmc/pmc.h @@ -0,0 +1,709 @@ +/* + * Copyright (c) 2009 Apple Inc. All rights reserved. + * + * @APPLE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this + * file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_LICENSE_HEADER_END@ + */ + +#ifndef _MACH_PMC_H_ +#define _MACH_PMC_H_ + +#ifdef __cplusplus +extern "C" { +#endif + +#include +#include +#include +#include +#include + +/**************************************************************************** + * The four main object types + * + * 1. Performance monitors (perf_monitor_t) - represent the hardware that + * encapsulates a set of performance counters + * 2. Performance Counters (pmc_t) - represents each individual counter + * 3. Performance Counter Configs (pmc_config_t) - represents the settings + * applied to a performance counter (e.g., what to count) + * 4. Performance Counter Reservations (pmc_reservation_t) - represents a config along + * with it's saved counter value, and the context underwhich it will count. + * + ****************************************************************************/ + +/* + * The following objects are in-kernel stand-ins for objects that will be implemented + * in the driver kexts. They are generally instances of C++ objects. We make opaque + * handles for each distinct type for a little bit of type safety when used from the + * kernel layer. These objects are not to be introspected by the kernel at any time, + * only used as arguments in the registered driver methods. + */ + +// IOPerformanceMonitor instances +typedef void * perf_monitor_object_t; + +// IOPerformanceCounter instances +typedef void * pmc_object_t; + +// IOPerformanceCounterConfig instances +typedef void * pmc_config_object_t; + +// END Kext-implemented objects + +// Forward declations +struct pmc_reservation; +typedef struct pmc_reservation *pmc_reservation_t; + +struct pmc_config; +typedef struct pmc_config *pmc_config_t; + +/**************************************************************************** + * Method types for performance monitor driver registration + * + * Driver registration happens with no intervention from the driver writers - + * it is handled automatically by the IOProfileFamily kext. Registration + * happens whenever any IOPerformanceMonitor subclass attaches to the registry. + * Failure to successfully register with the kernel will prevent successful attachment + * to the IORegistry (this way only usable PMCs and Perf Monitors will be shown.) + ****************************************************************************/ + +/*!typedef + * @abstract A pointer to a method that returns whether or not the given performance monitor driver supports context switched counters + * @param pm A registered performance monitor driver object (see perf_monitor_register). + * @result TRUE if the driver supports context switching, FALSE otherwise. + */ +typedef boolean_t (*perfmon_supports_context_switch_method_t)(perf_monitor_object_t pm); + +/*!typedef + * @abstract A pointer to a method that enables a set of counters. + * @discussion Implementations of this method type must be safe to call at interrupt context. + * @param pmcs An array of pmc_object_t instances (non-NULL). + * @param pmcCount The number of elements in the @pmcs array. + * @result KERN_SUCCESS upon successful global enable of the given counters (may return IOKit error codes). + */ +typedef kern_return_t (*perfmon_enable_counters_method_t)(perf_monitor_object_t pm, pmc_object_t *pmcs, uint32_t pmcCount); + +/*!typedef + * @abstract A pointer to a method that disables a set of counters. + * @discussion Implementations of this method type must be safe to call at interrupt context. + * See perfmon_enable_counters_method_t + * @result See perfmon_enable_counters_method_t + */ +typedef kern_return_t (*perfmon_disable_counters_method_t)(perf_monitor_object_t pm, pmc_object_t *pmcs, uint32_t pmcCount); + +#define MACH_PERFMON_METHODS_VERSION 0 + +/*!struct perf_monitor_methods + * @abstract A set of method pointers to be used when interacting with a performance monitor object + * @discussion This structure is the set of driver-implemented callback methods to be used when + * interacting with a new performance monitor from the kernel. + */ +typedef struct perf_monitor_methods { + uint32_t perf_monitor_methods_version; // Always set to MACH_PERFMON_METHODS_VERSION when writing driver kexts + + // All methods are required. + perfmon_supports_context_switch_method_t supports_context_switching; + perfmon_enable_counters_method_t enable_counters; + perfmon_disable_counters_method_t disable_counters; +}perf_monitor_methods_t; + + +/**************************************************************************** + * Method types for performance counter registration + * + * Registration of individual Performance Counters happens after the + * encapsulating Performance Monitor has been registered. This, too, happens + * with no intervention of driver-writers. It happens automatically whenever + * any IOPerformanceCounter subclass attaches to IORegistry. Failure to register + * with the kernel will prevent the IOPerformanceCounter instance from attaching + * to IORegistry. + ****************************************************************************/ + +/*!typedef + * @abstract A pointer to a method that creates a configuration object for a counter + * @discussion Configuration objects create and hold the hardware representation for a set of driver-defined key-value pairs. + * Corresponds to IOPerformanceCounter::createConfiguration() method. + * @param pmc A valid pmc object + * @result NULL on failure, or a pmc_config_t on success. + */ +typedef pmc_config_object_t (*pmc_create_config_method_t)(pmc_object_t pmc); + +/*!typedef + * @abstract A pointer to a method to free a configuration object for a pmc + * @discussion Method should free a pmc config object created with a pmc_create_config_method_t above + * @param pmc The pmc object used to create the config + * @param config The config object to release + */ +typedef void (*pmc_free_config_method_t)(pmc_object_t pmc, pmc_config_object_t config); + +/*!typedef + * @abstract A pointer to a method to set a key-value pair on a config object. + * @discussion Configuration objects take key-value pairs for setting various bits in the pmc configs + * Corresponds to IOPerformanceCounterConfiguration::setValueForId() method. + * @param config Pointer to config object. + * @param id 8-bit integer ID (determined by the driver). + * @param value 64-bit integer value (interpretted by the driver). + * @result KERN_SUCCESS on success, KERN_FAILURE on bad value, KERN_INVALID_ARGUMENT on bad id + */ +typedef kern_return_t (*pmc_config_set_value_method_t)(pmc_config_object_t config, uint8_t id, uint64_t value); + +/*!typedef + * @abstract A pointer to a method that will be called when a Performance Counter causes a PMI interrupt + * @discussion Implementations of this method type must be safe to call at interrupt context. + * @param target The pmc_reservation_t that caused the interrupt + * @param refCon Any value as defined by the end-user who called pmc_config_set_interrupt_threshold + */ +typedef void (*pmc_interrupt_method_t)(void *target, void *refCon); + +/*!typedef + * @abstract A pointer to a method that will set the counter PMI threshold. + * @param config A configuration object + * @param threshold The number of events after which to cause an interrupt + * callback. + */ +typedef kern_return_t (*pmc_config_set_interrupt_threshold_method_t)(pmc_config_object_t config, uint64_t threshold); + +/*!typedef + * @abstract A pointer to a method that will set the method to be called when the counter threshold is reached. + * @param config A configuration object. + * @param target A reference pointer used as the first argument to the callback method. + * @param method A pointer to the method to be called. + * @param refCon A reference pointer to be used as the second argument to the callback method (may be NULL). + */ +typedef kern_return_t (*pmc_config_set_interrupt_threshold_handler_method_t)(pmc_config_object_t config, void *target, pmc_interrupt_method_t method, void *refCon); + +/*!typedef + * @abstract A pointer to a method that will configure a pmc's control registers according to the given configuration object. + * @discussion Implementations of this method type must be safe to call at interrupt context. + * @param pmc The pmc reference object. + * @param config A configuration object. + */ +typedef kern_return_t (*pmc_set_config_method_t)(pmc_object_t pmc, pmc_config_object_t config); + +/*!typedef + * @abstract A pointer to a method that returns the Performance Monitor Object for a counter + * @discussion A pointer to a method that returns the Performance Monitor Object for a counter. + * Implementations of this method type must be safe to call at interrupt context. + * Corresponds to IOPerformanceCounter::getMonitor() method. + * @param pmc A valid pmc object + * @result NULL on failure, or a perf_monitor_object_t on success. + */ +typedef perf_monitor_object_t (*pmc_get_monitor_method_t)(pmc_object_t pmc); + +/*!typedef + * @abstract A pointer to a method that returns the registered name of the PMC. + * @discussion A pointer to a method that returns the registered name of the PMC. + * Corresponds to IOPerformanceCounter::getRegisteredName() method. + * + * NOTE: Driver authors must not allocate or copy the string during this method: + * it may be called from interrupt context or with spin locks held. + * + * @param pmc A valid pmc object. + * @result NULL on failure, or a pointer to the registered name of the pmc. + */ +typedef const char *(*pmc_get_name_method_t)(pmc_object_t pmc); + +/*!typedef + * @abstract A pointer to a method that returns if a pmc is accessible from a given logical core. + * @discussion A pointer to a method that returns if a pmc is accessible from a given logical core. + * Implementations of this method type must be safe to call at interrupt context. + * @param pmc A valid pmc object. + * @param core The logical core number. + * @result TRUE if the pmc can be read in the execution context of the given logical core, FALSE otherwise. + */ +typedef boolean_t (*pmc_is_accessible_from_logical_core_method_t)(pmc_object_t pmc, uint32_t core); + +/*!typedef + * @abstract A pointer to a method that returns an array of the logical cores from which a PMC can be accessed. + * @discussion A pointer to a method that returns an array of the logical cores from which a PMC can be accessed. Resulting array of cores should not be released by xnu. + * Implementations of this method type must be safe to call at interrupt context. + * @param pmc A valid pmc object + * @param cores A value-returned array of logical cores that can access the given PMC. + * @param coreCt A value-return count of the number of entries in the @cores array. + * @result KERN_SUCCESS on success, KERN_FAILURE otherwise. + */ +typedef kern_return_t (*pmc_get_accessible_cores_method_t)(pmc_object_t pmc, uint32_t **cores, size_t *coreCt); + +/*!typedef + * @abstract A pointer to a method that attempts to read the count from the given counter hardware. + * @discussion Implementations of this method type must be safe to call from interrupt context. * @param pmc The counter from which to read + * @param value Storage for the counter's hardware value. + */ +typedef kern_return_t (*pmc_get_count_method_t)(pmc_object_t pmc, uint64_t *value); + +/*!typedef + * @abstract A pointer to a method that attempts to write the count to the given counter hardware. + * @discussion Implementations of this method type must be safe to call from interrupt context. + * @param pmc The counter to which to write. + * @param value The value to write to the hardware. + */ +typedef kern_return_t (*pmc_set_count_method_t)(pmc_object_t pmc, uint64_t value); + + +/*!typedef + * @abstract A pointer to a method that disables the counter hardware for a given PMC. + * @discussion A pointer to a method that disables the counter hardware for + * a given PMC. + * Implementations of this method type must be safe to call at interrupt context. + * @param pmc A valid pmc object. + * @result KERN_SUCCESS on successful disable + */ +typedef kern_return_t (*pmc_disable_method_t)(pmc_object_t pmc); + +/*!typedef + * @abstract A pointer to a method that enables the counter hardware for a given PMC. + * @discussion A pointer to a method that enables the counter hardware for a given PMC. + * Implementations of this method type must be safe to call at interrupt context. + * @param pmc A valid pmc object. + * @result KERN_SUCCESS on successful enable + */ +typedef kern_return_t (*pmc_enable_method_t)(pmc_object_t pmc); + +typedef kern_return_t (*pmc_open_method_t)(pmc_object_t pmc, void *object); +typedef kern_return_t (*pmc_close_method_t)(pmc_object_t pmc, void *object); + +#define MACH_PMC_METHODS_VERSION 0 + +/*! + * @struct pmc_methods + * @abstract Performance Counter Registration methods. + * @discussion This structure represents a set of driver-implemented methods to be used by the kernel + * when interacting with the associated performance counter. Since a Performance Monitor may + * implement any number of distinct types of Performance Counters, each counter registers with + * its own set of callback methods. + */ +typedef struct pmc_methods { + uint32_t pmc_methods_version; // Always set to MACH_PMC_METHODS_VERSION in your driver. + + // All methods are required. + pmc_create_config_method_t create_config; + pmc_free_config_method_t free_config; + pmc_config_set_value_method_t config_set_value; + pmc_config_set_interrupt_threshold_method_t config_set_threshold; + pmc_config_set_interrupt_threshold_handler_method_t config_set_handler; + pmc_set_config_method_t set_config; + + pmc_get_monitor_method_t get_monitor; + pmc_get_name_method_t get_name; + pmc_is_accessible_from_logical_core_method_t accessible_from_core; + pmc_get_accessible_cores_method_t accessible_cores; + pmc_get_count_method_t get_count; + pmc_set_count_method_t set_count; + pmc_disable_method_t disable; + pmc_enable_method_t enable; + pmc_open_method_t open; + pmc_close_method_t close; +}pmc_methods_t; + +/* + * Kext interface Methods + * + * These methods would be exported to apple-internal kexts, but not to 3rd-party kexts, and + * definitely not to user space. + * + * All Performance Monitor and Performance Counter registration (accomplished via the following methods) + * is handled automatically via IOProfileFamily's base classes. However, we'd need to export these + * methods to apple-private KPI so that IOProfileFamily can call these methods when new objects attach + * to the IORegistry. + * + */ + +/*!fn + * @abstract Registers a new performance monitor driver and its associated pointers. + * @discussion Kexts that implement performance monitor drivers will call this method with a + * filled-in perf_monitor_methods_t structure (with version set to MACH_PERFMON_METHODS_VERSION). + * The PMC interface will then register the new driver internally. + * @param monitor A handle to the performance monitor driver instance you are registering. Must not be NULL. + * @param methods A filled-in perf_monitor_methods_t structure with version set to MACH_PERFMON_METHODS_VERSION. + * @result KERN_SUCCESS if the new driver was successfully registered, KERN_INVALID_VALUE if the + * version of the passed-in perf_monitor_methods_t structure does not match that which is expected, + * KERN_RESOURCE_SHORTAGE if the kernel lacks the resources to register another performance monitor + * driver, KERN_INVALID_ARGUMENT if one or both of the arguments is null + */ +kern_return_t perf_monitor_register(perf_monitor_object_t monitor, perf_monitor_methods_t *methods); + +/*!fn + * @abstract Unregisters a performance monitor driver and frees space associated with its pointers. + * @discussion Kexts that implement performance monitor drivers will call this method just before they unload + * to cause the performance monitor they implement to be removed from the kernel's PMC system. + * @param monitor A handle to a performance monitor driver instance that was previously registered with perf_monitor_register + * @result KERN_SUCCESS if the new driver was successfully unregistered, KERN_INVALID_VALUE if the + * passed-in perf_monitor_object_t does not match any registered performance monitor, KERN_INVALID_ARGUMENT if + * the argument is null, KERN_FAILURE if the performance monitor is currently in use. + */ +kern_return_t perf_monitor_unregister(perf_monitor_object_t monitor); + +/*!fn + * @abstract Register a new Performance Counter, and attach it to the given Performance Monitor + * @discussion This method takes a Performance Monitor driver instance that was previously registered + * with perf_monitor_register, and attaches an instance of a Performance Counter + * that will be accessed with the given set of pmc methods. + * @param monitor A handle to a Performance Monitor that was previously registered. + * @param pmc A handle to the Performance Counter instance to be attached to the monitor object + * @param methods A filled-in pmc_methods_t structure with version set to MACH_PMC_METHODS_VERSION + * @param object an Object to be used during the open() and close() methods. Must be a subclass of IOService, cannot be NULL. + * @result KERN_SUCCESS if the new counter was successfully registered and attached, KERN_INVALID_VALUE if the + * version of the passed-in pmc_methods_t structure does not match that which is expected, + * KERN_RESOURCE_SHORTAGE if the kernel lacks the resources to register another performance counter + * instance, KERN_INVALID_ARGUMENT if any of the arguments is null + */ +kern_return_t pmc_register(perf_monitor_object_t monitor, pmc_object_t pmc, + pmc_methods_t *methods, void *object); + +/*!fn + * @abstract Unregisters a Performance Counter + * @discussion Does the reverse of pmc_register. + * @param monitor The registered Performance Monitor from which to remove a pmc. + * @param pmc The Performance Counter to unregister. + * @result KERN_SUCCESS if the counter was successfully unregistered, KERN_INVALID_VALUE if the + * passed-in pmc_object_t does not match any registered performance counter, KERN_INVALID_ARGUMENT if + * any argument is null, KERN_FAILURE if the performance counter is currently in use. + */ +kern_return_t pmc_unregister(perf_monitor_object_t monitor, pmc_object_t pmc); + +/* + * Here begins the interface in-kernel and in-kext users will use to interact with PMCs and + * Performance Monitors. + * + * Basic usage is as follows: find your target counter, create a config for it, setup the config, + * reserve the counter using that config in a given execution context (system, or 1 task, or 1 thread), + * start the counter via the reservation object, stop the counter, and read the counter value similarly from the + * reservation object. When done, release the reservation object. + */ + +/*!struct perf_monitor + * @abstract In-kernel object to track a driver-implemented performance monitor. + */ +typedef struct perf_monitor { + /* + * A reference-pointer used as the first argument to all callback methods + * (to seamlessly work with C++ objects). This is the same value that was + * used in the perf_monitor_register() method. + */ + perf_monitor_object_t object; + + // Copy of the pointers used to interact with the above instance + perf_monitor_methods_t methods; + + // reference counted + uint32_t useCount; + + // link to other perf monitors + queue_chain_t link; +}*perf_monitor_t; + +/*!struct pmc + * @abstract In-kernel object to track an individual driver-implemented performance counter + */ +typedef struct pmc { + /* + * A reference-pointer used as the first argument to all callback methods + * (to seamlessly work with C++ objects). This is the same value that was + * used in the pmc_register() method. + */ + pmc_object_t object; + + /* Copy of the pointers used to interact with the above instance */ + pmc_methods_t methods; + + /* Object to be used during open/close methods */ + void *open_object; + + /* reference counted */ + uint32_t useCount; + + /* link to parent */ + perf_monitor_t monitor; + + /* link to other PMCs */ + queue_chain_t link; +}*pmc_t; + +// Scope flags (highest order bits) +#define PMC_FLAG_SCOPE_SYSTEM 0x80000000U +#define PMC_FLAG_SCOPE_TASK 0x40000000U +#define PMC_FLAG_SCOPE_THREAD 0x20000000U +#define PMC_SCOPE_MASK 0xE0000000U + +#define PMC_FLAG_IS_SYSTEM_SCOPE(x) \ + ((x & PMC_FLAG_SCOPE_SYSTEM) == PMC_FLAG_SCOPE_SYSTEM) + +#define PMC_FLAG_IS_TASK_SCOPE(x) \ + ((x & PMC_FLAG_SCOPE_TASK) == PMC_FLAG_SCOPE_TASK) + +#define PMC_FLAG_IS_THREAD_SCOPE(x) \ + ((x & PMC_FLAG_SCOPE_THREAD) == PMC_FLAG_SCOPE_THREAD) + +#define PMC_FLAG_SCOPE(x) (x & PMC_SCOPE_MASK) + +/* + * Reservation state + * + * The state of a reservation is actually a 3-tuple of the current state, an active context count, + * and a set of modifier flags. To avoid using locks, these are combined into a single uint32_t + * that can be modified with OSCompareAndSwap. + * + */ + +typedef uint32_t pmc_state_t; + +#define PMC_STATE_STATE_INVALID 0x00000000U +#define PMC_STATE_STATE_STOP 0x10000000U +#define PMC_STATE_STATE_CAN_RUN 0x20000000U +#define PMC_STATE_STATE_LOAD 0x30000000U +#define PMC_STATE_STATE_RUN 0x40000000U +#define PMC_STATE_STATE_STORE 0x50000000U +#define PMC_STATE_STATE_INTERRUPT 0x60000000U +#define PMC_STATE_STATE_DEALLOC 0x70000000U + +#define PMC_STATE_STATE_MASK 0xF0000000U + +#define PMC_STATE_STATE(x) ((x) & PMC_STATE_STATE_MASK) +#define PMC_STATE_STATE_SET(x, state) (((x) & ~(PMC_STATE_STATE_MASK)) | state) + +#define PMC_STATE_FLAGS_STOPPING 0x08000000U +#define PMC_STATE_FLAGS_DEALLOCING 0x04000000U +#define PMC_STATE_FLAGS_INTERRUPTING 0x02000000U + +#define PMC_STATE_FLAGS_MASK 0x0F000000U + +#define PMC_STATE_FLAGS(x) ((x) & PMC_STATE_FLAGS_MASK) +#define PMC_STATE_FLAGS_MODIFY(x, set, clear) (((x) & ~(clear)) | set) + +#define PMC_STATE_CONTEXT_COUNT_MASK 0x0000FFFFU + +#define PMC_STATE_CONTEXT_COUNT(x) ((x) & PMC_STATE_CONTEXT_COUNT_MASK) +#define PMC_STATE_CONTEXT_COUNT_MODIFY(x, mod) (((PMC_STATE_CONTEXT_COUNT(x) + (mod)) < PMC_STATE_CONTEXT_COUNT_MASK) ? (x) + (mod) : PMC_STATE_CONTEXT_COUNT_MASK) + +#define PMC_STATE(state, context_count, flags) (PMC_STATE_STATE(state) | PMC_STATE_FLAGS(flags) | PMC_STATE_CONTEXT_COUNT(context_count)) +#define PMC_STATE_MODIFY(x, context_count_mod, flags_set, flags_clear) (PMC_STATE_FLAGS_MODIFY(PMC_STATE_CONTEXT_COUNT_MODIFY(x, context_count_mod), flags_set, flags_clear)) +#define PMC_STATE_MOVE(x, state, context_count_mod, flags_set, flags_clear) (PMC_STATE_STATE_SET(PMC_STATE_MODIFY(x, context_count_mod, flags_set, flags_clear), state)) + +#define PMC_STATE_INVALID PMC_STATE(PMC_STATE_STATE_INVALID, 0, 0) + +/*!struct pmc_reservation + * @abstract In-kernel object to track an individual reservation + */ +struct pmc_reservation { + pmc_t pmc; // Pointer to in-kernel pmc which is reserved + pmc_config_t config; // counter configuration + + // stored counter value + uint64_t value; + + // TODO: Add mach-port (user-export object?) + + volatile uint32_t flags __attribute__((aligned(4))); + volatile pmc_state_t state __attribute__((aligned(4))); + volatile uint32_t active_last_context_in __attribute__((aligned(4))); + + union { + task_t task; // not retained + thread_t thread; // not retained + }; + + queue_chain_t link; +}; + +// END Kernel-objects + + +// Methods exported to kernel (and kext) consumers + +/*!fn + * @abstract Creates a new configuration object for the given pmc. + * @discussion This method is not interrupt safe. + * @param pmc The Perf Counter for which to create a configuration. + * @param config A value-return configuration object. + */ +kern_return_t pmc_create_config(pmc_t pmc, pmc_config_t *config); + +/*!fn + * @abstract Releases a configuration object for the given pmc. + * @discussion This method is not interrupt safe. + * @param pmc The Perf Counter for which to release a configuration. + * @param config A configuration object to be released. + */ +void pmc_free_config(pmc_t pmc, pmc_config_t config); + +/*!fn + * @abstract Setup the configuration + * @discussion Configurations for counter are architecture-neutral key-value pairs (8bit key, 64bit value). Meanings of the keys and values are defined by the driver-writer and are listed in XML form available for interrogation via the CoreProfile framework. This method is not interrupt safe. + * @result KERN_SUCCESS on success. + */ +kern_return_t pmc_config_set_value(pmc_t pmc, pmc_config_t config, uint8_t id, uint64_t value); + +/*!fn + * @abstract Interrupt Threshold Setup + * @discussion In order to configure a PMC to use PMI (cause an interrupt after so-many events occur), use this method, and provide a function to be called after the interrupt occurs, along with a reference context. PMC Threshold handler methods will have the pmc that generated the interrupt as the first argument when the interrupt handler is invoked, and the given @refCon (which may be NULL) as the second. This method is not interrupt safe. + */ +kern_return_t pmc_config_set_interrupt_threshold(pmc_t pmc, pmc_config_t config, uint64_t threshold, pmc_interrupt_method_t method, void *refCon); + +/*!fn + * @abstract Returns an allocated list of all pmc_t's known to the kernel. + * @discussion Callers should free the resultant list via pmc_free_pmc_list. This method is not interrupt safe. + * @param pmcs Storage for the resultant pmc_t array pointer. + * @param pmcCount Storage for the resultant count of pmc_t's. + */ +kern_return_t pmc_get_pmc_list(pmc_t **pmcs, size_t *pmcCount); + +/*!fn + * @abstract Free a previously allocated list of pmcs. + * @discussion This method is not interrupt safe. + * @param pmcs PMC list to free. + * @param pmcCount Number of pmc_t's in list. + */ +void pmc_free_pmc_list(pmc_t *pmcs, size_t pmcCount); + +/*!fn + * @abstract Finds pmcs by partial string matching. + * @discussion This method returns a list of pmcs (similar to pmc_get_pmc_list) whose names match the given string up to it's length. For example, searching for "ia32" would return pmcs "ia32gp0" and "ia32gp1". Results should be released by the caller using pmc_free_pmc_list + * @param name Partial string to search for. + * @param pmcs Storage for the resultant pmc_t array pointer. + * @param pmcCount Storage for the resultant count of pmc_t's. + */ +kern_return_t pmc_find_by_name(const char *name, pmc_t **pmcs, size_t *pmcCount); + +/*!fn + * @abstract Returns a pointer to the human-readable name of the given pmc. + * @discussion The returned pointer is not a copy, and does not need to be freed. This method is interrupt safe. + * @param pmc The PMC whose name should be returned. + */ +const char *pmc_get_name(pmc_t pmc); + +/*!fn + * @abstract Returns a list of logical cores from which the given pmc can be read from or written to. + * @discussion This method can return a NULL list with count of 0 -- this indicates any core can read the given pmc. This method does not allocate the list, therefore callers should take care not to mutate or free the resultant list. This method is interrupt safe. + * @param pmc The PMC for which to return the cores that can read/write it. + * @param logicalCores Storage for the pointer to the list. + * @param logicalCoreCt Value-return number of elements in the returned list. 0 indicates all cores can read/write the given pmc. + */ +kern_return_t pmc_get_accessible_core_list(pmc_t pmc, uint32_t **logicalCores, size_t *logicalCoreCt); + +/*!fn + * @abstract Returns TRUE if the given logical core can read/write the given PMC. + * @discussion This method is interrupt safe. + * @param pmc The PMC to test + * @param logicalCore The core from which to test. + */ +boolean_t pmc_accessible_from_core(pmc_t pmc, uint32_t logicalCore); + +/* + * BEGIN PMC Reservations + * + * These are how you reserve a PMC, start and stop it counting, and read and write + * its value. + */ + +/*!fn + * @abstract Reserve a PMC for System-wide counting. + * @discussion This method will attempt to reserve the given pmc at system-scope. It will configure the given pmc to count the event indicated by the given configuration object. This method consumes the given configuration object if the return value is KERN_SUCCESS - any other return value indicates the caller should free the configuration object via pmc_free_config. This method is not interrupt safe. + * @param pmc The PMC to reserve. + * @param config The configuration object to use with the given pmc. + * @param reservation A value-return reservation object to be used in pmc_reservation_* methods. + * @result This method will return one of the following values: + * KERN_SUCCESS: The given pmc was successfully reserved in system-scope; the given config object has been consumed and should not be freed by the caller, + * KERN_FAILURE: The given pmc is already reserved in a conflicting scope, + * KERN_INVALID_ARGUMENT: All three arguments are required to be non-NULL, but at least one is NULL, + * KERN_RESOURCE_SHORTAGE: Could not allocate a new reservation object. + */ +kern_return_t pmc_reserve(pmc_t pmc, pmc_config_t config, pmc_reservation_t *reservation); + + +/*!fn + * @abstract Reserve a PMC for task-wide counting. + * @discussion This method will attempt to reserve the given pmc for task-wide counting. The resulting reservation will only count when the task is running on one of the logical cores that can read the given pmc. The semantics of this method are the same as pmc_reserve in all other respects. + * @param pmc The PMC to reserve + * @param config The configuration object to use. + * @param task The task for which to enable the counter. + * @param reservation A value-return reservation object. + * @result See pmc_reserve + */ +kern_return_t pmc_reserve_task(pmc_t pmc, pmc_config_t config, task_t task, pmc_reservation_t *reservation); + +/*!fn + * @abstract Reserve a PMC for thread-wide counting. + * @discussion This method will attempt to reserve the given pmc for thread-wide counting. The resulting reservation will only count when the thread is running on one of the logical cores that can read the given pmc. The semantics of this method are the same as pmc_reserve_task in all other respects. + * @param pmc The PMC to reserve + * @param config The configuration object to use. + * @param thread The thread for which to enable the counter. + * @param reservation A value-return reservation object. + * @result See pmc_reserve + */ +kern_return_t pmc_reserve_thread(pmc_t pmc, pmc_config_t config, thread_t thread, pmc_reservation_t *reservation); + +/*!fn + * @abstract Start counting + * @discussion This method instructs the given reservation to start counting as soon as possible. If the reservation is for a thread (or task) other than the current thread, or for a pmc that is not accessible from the current logical core, the reservation will start counting the next time the thread (or task) runs on a logical core than can access the pmc. This method is interrupt safe. If this method is called from outside of interrupt context, it may block. + * @param reservation The reservation to start counting + */ +kern_return_t pmc_reservation_start(pmc_reservation_t reservation); + +/*!fn + * @abstract Stop counting + * @discussion This method instructs the given reservation to stop counting as soon as possible. If the reservation is for a thread (or task) other than the current thread, or for a pmc that is not accessible from the current logical core, the reservation will stop counting the next time the thread (or task) ceases to run on a logical core than can access the pmc. This method is interrupt safe. If called form outside of interrupt context, this method may block. + * @param reservation The reservation to stop counting + */ +kern_return_t pmc_reservation_stop(pmc_reservation_t reservation); + +/*!fn + * @abstract Read the counter value + * @discussion This method will read the event count associated with the given reservation. If the pmc is currently on hardware, and the caller is currently executing in a context that both a) matches the reservation's context, and b) can access the reservation's pmc directly, the value will be read directly from the hardware. Otherwise, the value stored in the reservation is returned. This method is interrupt safe. If the caller is calling from outside of interrupt context, this method may block. + * @param reservation The reservation whose value to read. + * @param value Value-return event count + */ +kern_return_t pmc_reservation_read(pmc_reservation_t reservation, uint64_t *value); + +/*!fn + * @abstract Write the counter value + * @discussion This method will write the event count associated with the given reservation. If the pmc is currently on hardware, and the caller is currently executing in a context that both a) matches the reservation's context, and b) can access the reservation's pmc directly, the value will be written directly to the hardware. Otherwise, the value stored in the reservation is overwritten. This method is interrupt safe. If the caller is calling from outside of interrupt context, this method may block. + * @param reservation The reservation to write. + * @param value The event count to write + */ +kern_return_t pmc_reservation_write(pmc_reservation_t reservation, uint64_t value); + +/*!fn + * @abstract Free a reservation and all associated resources. + * @discussion This method will free the resources associated with the given reservation and release the associated PMC back to general availability. If the reservation is currently counting, it will be stopped prior to release. This method is not interrupt safe. + * @param reservation The reservation to free + */ +kern_return_t pmc_reservation_free(pmc_reservation_t reservation); + +#if XNU_KERNEL_PRIVATE + +/*!fn + * @abstract Brings up all the necessary infrastructure required to use the pmc sub-system. + * @discussion For xnu-internal startup routines only. + */ +void pmc_bootstrap(void); + +/*!fn + * @abstract Performs a pmc context switch. + * @discussion This method will save all PMCs reserved for oldThread (and the task associated with oldThread), as well as restore all PMCs reserved for newThread (and the task associated with newThread). This method is for xnu-internal context switching routines only. + */ +boolean_t pmc_context_switch(thread_t oldThread, thread_t newThread); + +#endif // XNU_KERNEL_PRIVATE + +#ifdef __cplusplus +}; +#endif + +#endif // _MACH_PMC_H_ + diff --git a/osfmk/ppc/Diagnostics.c b/osfmk/ppc/Diagnostics.c index df6f7e01d..d6aa269c8 100644 --- a/osfmk/ppc/Diagnostics.c +++ b/osfmk/ppc/Diagnostics.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2008 Apple Inc. All rights reserved. + * Copyright (c) 2000-2009 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -468,7 +468,7 @@ int diagCall(struct savearea *save) { prssr = (processor_t)port->ip_kobject; /* Extract the processor */ is_write_unlock(current_space()); /* All done with the space now, unlock it */ - save->save_r3 = (uint64_t)(uint32_t)PerProcTable[prssr->cpu_num].ppe_vaddr; /* Pass back ther per proc */ + save->save_r3 = (uint64_t)(uint32_t)PerProcTable[prssr->cpu_id].ppe_vaddr; /* Pass back ther per proc */ return -1; /* Return and check asts */ /* @@ -485,7 +485,7 @@ int diagCall(struct savearea *save) { addrs = 0; /* Clear just in case */ ret = kmem_alloc_contig(kernel_map, &addrs, (vm_size_t)save->save_r4, - PAGE_MASK, 0, 0); /* That which does not make us stronger, kills us... */ + PAGE_MASK, 0, 0, FALSE); /* That which does not make us stronger, kills us... */ if(ret != KERN_SUCCESS) addrs = 0; /* Pass 0 if error */ save->save_r3 = (uint64_t)addrs; /* Pass back whatever */ diff --git a/osfmk/ppc/PseudoKernel.c b/osfmk/ppc/PseudoKernel.c index 66dd94e2e..fc2a10ecc 100644 --- a/osfmk/ppc/PseudoKernel.c +++ b/osfmk/ppc/PseudoKernel.c @@ -217,6 +217,11 @@ void bbSetRupt(ReturnHandler *rh, thread_t act) { } +kern_return_t +enable_bluebox(host_t host, unsigned _taskID, unsigned _TWI_TableStart, + unsigned _Desc_TableStart); +kern_return_t disable_bluebox( host_t host ); + /* * This function is used to enable the firmware assist code for bluebox traps, system calls * and interrupts. diff --git a/osfmk/ppc/bcopytest.c b/osfmk/ppc/bcopytest.c index 5903c43c4..bcc86bfb4 100644 --- a/osfmk/ppc/bcopytest.c +++ b/osfmk/ppc/bcopytest.c @@ -56,12 +56,12 @@ void bcopytest(void) { db_printf("bcopy test\n"); - retr = kmem_alloc_wired(kernel_map, (vm_offset_t *)&sink, (1024*1024)+4096); /* Get sink area */ + retr = kmem_alloc_kobject(kernel_map, (vm_offset_t *)&sink, (1024*1024)+4096); /* Get sink area */ if(retr != KERN_SUCCESS) { /* Did we find any memory at all? */ panic("bcopytest: Whoops... no memory for sink\n"); } - retr = kmem_alloc_wired(kernel_map, (vm_offset_t *)&source, (1024*1024)+4096); /* Get source area */ + retr = kmem_alloc_kobject(kernel_map, (vm_offset_t *)&source, (1024*1024)+4096); /* Get source area */ if(retr != KERN_SUCCESS) { /* Did we find any memory at all? */ panic("bcopytest: Whoops... no memory for source\n"); } diff --git a/osfmk/ppc/commpage/commpage.h b/osfmk/ppc/commpage/commpage.h index ec176ec4e..64a139faf 100644 --- a/osfmk/ppc/commpage/commpage.h +++ b/osfmk/ppc/commpage/commpage.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2003 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2003-2008 Apple Computer, Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -83,6 +83,7 @@ extern char *commPagePtr64; // virt address of 64-bit commpage in kernel map extern void commpage_set_timestamp(uint64_t tbr, uint64_t secs, uint32_t ticks_per_sec); #define commpage_disable_timestamp() commpage_set_timestamp( 0, 0, 0 ) +#define commpage_set_memory_pressure( pressure ) extern int commpage_time_dcba( void ); diff --git a/osfmk/ppc/cpu.c b/osfmk/ppc/cpu.c index aa6727c90..774b94bbd 100644 --- a/osfmk/ppc/cpu.c +++ b/osfmk/ppc/cpu.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2009 Apple Inc. All rights reserved. + * Copyright (c) 2000-2008 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -59,7 +59,7 @@ #include #include -decl_mutex_data(static,ppt_lock); +#include unsigned int real_ncpus = 1; unsigned int max_ncpus = MAX_CPUS; @@ -104,7 +104,6 @@ cpu_bootstrap( { simple_lock_init(&rht_lock,0); simple_lock_init(&SignalReadyLock,0); - mutex_init(&ppt_lock,0); } @@ -195,7 +194,7 @@ cpu_machine_init( proc_info->cpu_flags |= BootDone|SignalReady; if (proc_info != mproc_info) { if (proc_info->ppXFlags & SignalReadyWait) { - (void)hw_atomic_and(&proc_info->ppXFlags, ~SignalReadyWait); + hw_atomic_and_noret(&proc_info->ppXFlags, ~SignalReadyWait); thread_wakeup(&proc_info->cpu_flags); } simple_unlock(&SignalReadyLock); @@ -223,7 +222,7 @@ cpu_per_proc_alloc( return (struct per_proc_info *)NULL; } - if ((debugger_stack = kalloc(KERNEL_STACK_SIZE)) == 0) { + if ((debugger_stack = kalloc(kernel_stack_size)) == 0) { kfree(proc_info, sizeof(struct per_proc_info)); kfree(interrupt_stack, INTSTACK_SIZE); return (struct per_proc_info *)NULL; @@ -239,7 +238,7 @@ cpu_per_proc_alloc( proc_info->pf = BootProcInfo.pf; proc_info->istackptr = (vm_offset_t)interrupt_stack + INTSTACK_SIZE - FM_SIZE; proc_info->intstack_top_ss = proc_info->istackptr; - proc_info->debstackptr = (vm_offset_t)debugger_stack + KERNEL_STACK_SIZE - FM_SIZE; + proc_info->debstackptr = (vm_offset_t)debugger_stack + kernel_stack_size - FM_SIZE; proc_info->debstack_top_ss = proc_info->debstackptr; queue_init(&proc_info->rtclock_timer.queue); @@ -262,7 +261,7 @@ cpu_per_proc_free( if (proc_info->cpu_number == master_cpu) return; kfree((void *)(proc_info->intstack_top_ss - INTSTACK_SIZE + FM_SIZE), INTSTACK_SIZE); - kfree((void *)(proc_info->debstack_top_ss - KERNEL_STACK_SIZE + FM_SIZE), KERNEL_STACK_SIZE); + kfree((void *)(proc_info->debstack_top_ss - kernel_stack_size + FM_SIZE), kernel_stack_size); kfree((void *)proc_info, sizeof(struct per_proc_info)); /* Release the per_proc */ } @@ -276,20 +275,18 @@ cpu_per_proc_register( struct per_proc_info *proc_info ) { - int cpu; - - mutex_lock(&ppt_lock); - if (real_ncpus >= max_ncpus) { - mutex_unlock(&ppt_lock); + int cpu; + + cpu = OSIncrementAtomic(&real_ncpus); + + if (real_ncpus > max_ncpus) { return KERN_FAILURE; } - cpu = real_ncpus; + proc_info->cpu_number = cpu; PerProcTable[cpu].ppe_vaddr = proc_info; PerProcTable[cpu].ppe_paddr = (addr64_t)pmap_find_phys(kernel_pmap, (addr64_t)(unsigned int)proc_info) << PAGE_SHIFT; eieio(); - real_ncpus++; - mutex_unlock(&ppt_lock); return KERN_SUCCESS; } @@ -373,7 +370,7 @@ cpu_start( } else { simple_lock(&SignalReadyLock); if (!((*(volatile short *)&proc_info->cpu_flags) & SignalReady)) { - (void)hw_atomic_or(&proc_info->ppXFlags, SignalReadyWait); + hw_atomic_or_noret(&proc_info->ppXFlags, SignalReadyWait); thread_sleep_simple_lock((event_t)&proc_info->cpu_flags, &SignalReadyLock, THREAD_UNINT); } @@ -431,10 +428,8 @@ cpu_sleep( proc_info->running = FALSE; - if (proc_info->cpu_number != master_cpu) { - timer_queue_shutdown(&proc_info->rtclock_timer.queue); - proc_info->rtclock_timer.deadline = EndOfAllTime; - } + timer_queue_shutdown(&proc_info->rtclock_timer.queue); + proc_info->rtclock_timer.deadline = EndOfAllTime; fowner = proc_info->FPU_owner; /* Cache this */ if(fowner) /* If anyone owns FPU, save it */ diff --git a/osfmk/ppc/db_interface.c b/osfmk/ppc/db_interface.c index f6f5bca5b..3109d1b5e 100644 --- a/osfmk/ppc/db_interface.c +++ b/osfmk/ppc/db_interface.c @@ -77,6 +77,7 @@ struct savearea *ppc_last_saved_statep; struct savearea ppc_nested_saved_state; unsigned ppc_last_kdb_sp; +db_regs_t ddb_regs; /* register state */ extern int debugger_cpu; /* Current cpu running debugger */ diff --git a/osfmk/ppc/db_machdep.h b/osfmk/ppc/db_machdep.h index 20a2f1169..cb9162c4e 100644 --- a/osfmk/ppc/db_machdep.h +++ b/osfmk/ppc/db_machdep.h @@ -75,7 +75,7 @@ typedef addr64_t db_addr_t; /* address - unsigned */ typedef uint64_t db_expr_t; /* expression - signed??? try unsigned */ typedef struct savearea db_regs_t; -db_regs_t ddb_regs; /* register state */ +extern db_regs_t ddb_regs; /* register state */ #define DDB_REGS (&ddb_regs) extern int db_active; /* ddb is active */ diff --git a/osfmk/ppc/hibernate_ppc.c b/osfmk/ppc/hibernate_ppc.c index 1adc102f7..c807d26a9 100644 --- a/osfmk/ppc/hibernate_ppc.c +++ b/osfmk/ppc/hibernate_ppc.c @@ -141,7 +141,7 @@ hibernate_vm_lock(void) if (getPerProc()->hibernate) { vm_page_lock_queues(); - mutex_lock(&vm_page_queue_free_lock); + lck_mtx_lock(&vm_page_queue_free_lock); } } @@ -150,7 +150,7 @@ hibernate_vm_unlock(void) { if (getPerProc()->hibernate) { - mutex_unlock(&vm_page_queue_free_lock); + lck_mtx_unlock(&vm_page_queue_free_lock); vm_page_unlock_queues(); } } diff --git a/osfmk/ppc/hw_counters.h b/osfmk/ppc/hw_counters.h deleted file mode 100644 index 99b984a50..000000000 --- a/osfmk/ppc/hw_counters.h +++ /dev/null @@ -1,97 +0,0 @@ -/* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ - * - * This file contains Original Code and/or Modifications of Original Code - * as defined in and that are subject to the Apple Public Source License - * Version 2.0 (the 'License'). You may not use this file except in - * compliance with the License. The rights granted to you under the License - * may not be used to create, or enable the creation or redistribution of, - * unlawful or unlicensed copies of an Apple operating system, or to - * circumvent, violate, or enable the circumvention or violation of, any - * terms of an Apple operating system software license agreement. - * - * Please obtain a copy of the License at - * http://www.opensource.apple.com/apsl/ and read it before using this file. - * - * The Original Code and all software distributed under the License are - * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER - * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, - * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. - * Please see the License for the specific language governing rights and - * limitations under the License. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ - */ -/* - * - * Hardware specific performance counters - */ -#ifndef _HW_COUNTERS_H_ -#define _HW_COUNTERS_H_ - -#ifndef __ppc__ -#error This file is only useful on PowerPC. -#endif - -#pragma pack(4) /* Make sure the structure stays as we defined it */ - -typedef struct hw_counters { - - unsigned int hw_InVains; /* In vain */ - unsigned int hw_Resets; /* Reset */ - unsigned int hw_MachineChecks; /* Machine check */ - unsigned int hw_DSIs; /* DSIs */ - unsigned int hw_ISIs; /* ISIs */ - unsigned int hw_Externals; /* Externals */ - unsigned int hw_Alignments; /* Alignment */ - unsigned int hw_Programs; /* Program */ - unsigned int hw_FloatPointUnavailable; /* Floating point */ - unsigned int hw_Decrementers; /* Decrementer */ - unsigned int hw_IOErrors; /* I/O error */ - unsigned int hw_rsvd0; /* Reserved */ - unsigned int hw_SystemCalls; /* System call */ - unsigned int hw_Traces; /* Trace */ - unsigned int hw_FloatingPointAssists; /* Floating point assist */ - unsigned int hw_PerformanceMonitors; /* Performance monitor */ - unsigned int hw_Altivecs; /* VMX */ - unsigned int hw_rsvd1; /* Reserved */ - unsigned int hw_rsvd2; /* Reserved */ - unsigned int hw_rsvd3; /* Reserved */ - unsigned int hw_InstBreakpoints; /* Instruction breakpoint */ - unsigned int hw_SystemManagements; /* System management */ - unsigned int hw_AltivecAssists; /* Altivec Assist */ - unsigned int hw_Thermal; /* Thermals */ - unsigned int hw_rsvd5; /* Reserved */ - unsigned int hw_rsvd6; /* Reserved */ - unsigned int hw_rsvd7; /* Reserved */ - unsigned int hw_rsvd8; /* Reserved */ - unsigned int hw_rsvd9; /* Reserved */ - unsigned int hw_rsvd10; /* Reserved */ - unsigned int hw_rsvd11; /* Reserved */ - unsigned int hw_rsvd12; /* Reserved */ - unsigned int hw_rsvd13; /* Reserved */ - unsigned int hw_Trace601; /* Trace */ - unsigned int hw_SIGPs; /* SIGP */ - unsigned int hw_Preemptions; /* Preemption */ - unsigned int hw_ContextSwitchs; /* Context switch */ - unsigned int hw_Shutdowns; /* Shutdowns */ - unsigned int hw_Chokes; /* System ABENDs */ - unsigned int hw_DataSegments; /* Data Segment Interruptions */ - unsigned int hw_InstructionSegments; /* Instruction Segment Interruptions */ - unsigned int hw_SoftPatches; /* Soft Patch interruptions */ - unsigned int hw_Maintenances; /* Maintenance interruptions */ - unsigned int hw_Instrumentations; /* Instrumentation interruptions */ - unsigned int hw_rsvd14; /* Reswerved */ - unsigned int hw_hdec; /* Hypervisor decrementer */ - - unsigned int hw_spare[18]; /* Pad to 256 bytes */ - -} hw_counters; -#pragma pack() - -extern hw_counters hw_counts(NCPUS); - -#endif /* _HW_COUNTERS_H_ */ diff --git a/osfmk/ppc/hw_lock.s b/osfmk/ppc/hw_lock.s index 4fae9fe6e..880bbf6ef 100644 --- a/osfmk/ppc/hw_lock.s +++ b/osfmk/ppc/hw_lock.s @@ -543,8 +543,12 @@ LEXT(hw_lock_held) */ .align 5 .globl EXT(hw_compare_and_store) + .globl EXT(OSCompareAndSwap) + .globl EXT(OSCompareAndSwapPtr) LEXT(hw_compare_and_store) +LEXT(OSCompareAndSwap) +LEXT(OSCompareAndSwapPtr) mr r6,r3 ; Save the old value @@ -656,13 +660,13 @@ andtry: lwarx r3,0,r6 ; Grab the area value * anchor is the pointer to the first element * element is the pointer to the element to insert * disp is the displacement into the element to the chain pointer - * - * NOTE: OSEnqueueAtomic() is aliased to this, see xnu/libkern/Makefile */ .align 5 .globl EXT(hw_queue_atomic) + .globl EXT(OSEnqueueAtomic) LEXT(hw_queue_atomic) +LEXT(OSEnqueueAtomic) mr r7,r4 ; Make end point the same as start mr r8,r5 ; Copy the displacement also @@ -701,13 +705,13 @@ hw_queue_comm: * anchor is the pointer to the first element * disp is the displacement into the element to the chain pointer * Returns element if found, 0 if empty. - * - * NOTE: OSDequeueAtomic() is aliased to this, see xnu/libkern/Makefile */ .align 5 .globl EXT(hw_dequeue_atomic) + .globl EXT(OSDequeueAtomic) LEXT(hw_dequeue_atomic) +LEXT(OSDequeueAtomic) mr r5,r3 ; Save the anchor @@ -838,51 +842,7 @@ mylock_attempt: bne 2b __ASMNL__ \ 3: -/* - * void mutex_init(mutex_t* l, etap_event_t etap) - * - */ .align 5 - .globl EXT(mutex_init) -LEXT(mutex_init) - - PROLOG(0) - li r10,0 - stw r10,MUTEX_DATA(r3) ; clear lock word - sth r10,MUTEX_WAITERS(r3) ; init waiter count - sth r10,MUTEX_PROMOTED_PRI(r3) -#if MACH_LDEBUG - li r11,MUTEX_ATTR_DEBUG - stw r10,MUTEX_STACK(r3) ; init caller pc - stw r10,MUTEX_THREAD(r3) ; and owning thread - li r9, MUTEX_TAG - stw r9, MUTEX_TYPE(r3) ; set lock type - stw r11,MUTEX_ATTR(r3) - addi r8,r3,MUTEX_STACK-4 - li r9,MUTEX_FRAMES -mlistck: - stwu r10,4(r8) ; init stack - subi r9,r9,1 - cmpi cr0,r9,0 - bne mlistck -#endif /* MACH_LDEBUG */ - EPILOG - blr - -/* - * void lck_mtx_lock_ext(lck_mtx_ext_t*) - * - */ - .align 5 - .globl EXT(lck_mtx_lock_ext) -LEXT(lck_mtx_lock_ext) -#if MACH_LDEBUG - .globl EXT(mutex_lock) -LEXT(mutex_lock) - - .globl EXT(_mutex_lock) -LEXT(_mutex_lock) -#endif mr r11,r3 ; Save lock addr mlckeEnter: lwz r0,MUTEX_ATTR(r3) @@ -890,28 +850,28 @@ mlckeEnter: CHECK_SETUP(r12) CHECK_MUTEX_TYPE() - bf MUTEX_ATTR_DEBUGb,L_mutex_lock_assert_wait_2 + bf MUTEX_ATTR_DEBUGb,L_mtx_lock_assert_wait_2 PROLOG(0) bl EXT(assert_wait_possible) mr. r3,r3 - bne L_mutex_lock_assert_wait_1 - lis r3,hi16(L_mutex_lock_assert_wait_panic_str) - ori r3,r3,lo16(L_mutex_lock_assert_wait_panic_str) + bne L_mtx_lock_assert_wait_1 + lis r3,hi16(L_mtx_lock_assert_wait_panic_str) + ori r3,r3,lo16(L_mtx_lock_assert_wait_panic_str) bl EXT(panic) BREAKPOINT_TRAP ; We die here anyway .data -L_mutex_lock_assert_wait_panic_str: +L_mtx_lock_assert_wait_panic_str: STRINGD "mutex lock attempt with assert_wait_possible false\n\000" .text -L_mutex_lock_assert_wait_1: +L_mtx_lock_assert_wait_1: lwz r3,FM_ARG0(r1) lwz r11,FM_ARG0+0x04(r1) lwz r2,(FM_ALIGN(0)+FM_SIZE+FM_CR_SAVE)(r1) mtcr r2 EPILOG -L_mutex_lock_assert_wait_2: +L_mtx_lock_assert_wait_2: mfsprg r6,1 ; load the current thread bf MUTEX_ATTR_STATb,mlckestatskip ; Branch if no stat @@ -981,14 +941,6 @@ mlckespin01: .globl EXT(lck_mtx_lock) LEXT(lck_mtx_lock) -#if !MACH_LDEBUG - .globl EXT(mutex_lock) -LEXT(mutex_lock) - - .globl EXT(_mutex_lock) -LEXT(_mutex_lock) -#endif - mfsprg r6,1 ; load the current thread lwz r5,MUTEX_DATA(r3) ; Get the lock quickly mr r11,r3 ; Save lock addr @@ -1217,12 +1169,6 @@ mlStatSkip2: .align 5 .globl EXT(lck_mtx_try_lock_ext) LEXT(lck_mtx_try_lock_ext) -#if MACH_LDEBUG - .globl EXT(mutex_try) -LEXT(mutex_try) - .globl EXT(_mutex_try) -LEXT(_mutex_try) -#endif mr r11,r3 ; Save lock addr mlteEnter: lwz r0,MUTEX_ATTR(r3) @@ -1247,7 +1193,7 @@ mlteStatSkip: mfsprg r6,1 ; load the current thread lwz r5,MUTEX_DATA(r3) ; Get the lock value mr. r5,r5 ; Quick check - bne-- L_mutex_try_slow ; Can not get it now... + bne-- L_mtx_try_slow ; Can not get it now... mfmsr r9 ; Get the MSR value lis r0,hi16(MASK(MSR_VEC)) ; Get vector enable ori r0,r0,lo16(MASK(MSR_FP)) ; Get FP enable @@ -1281,7 +1227,7 @@ mlteSlowX: li r5,lgKillResv ; Killing field stwcx. r5,0,r5 ; Kill reservation mtmsr r9 ; Say, any interrupts pending? - b L_mutex_try_slow + b L_mtx_try_slow /* @@ -1291,12 +1237,6 @@ mlteSlowX: .align 5 .globl EXT(lck_mtx_try_lock) LEXT(lck_mtx_try_lock) -#if !MACH_LDEBUG - .globl EXT(mutex_try) -LEXT(mutex_try) - .globl EXT(_mutex_try) -LEXT(_mutex_try) -#endif mfsprg r6,1 ; load the current thread lwz r5,MUTEX_DATA(r3) ; Get the lock value @@ -1329,7 +1269,7 @@ mltSlow02: li r0,0 mtcrf 1,r0 ; Set cr7 to zero -L_mutex_try_slow: +L_mtx_try_slow: PROLOG(0) lwz r6,MUTEX_DATA(r3) ; Quick check @@ -1411,21 +1351,6 @@ mtFail: li r3,0 ; Set failure code blr ; Return... -/* - * void mutex_unlock(mutex_t* l) - * - */ - .align 5 - .globl EXT(mutex_unlock) -LEXT(mutex_unlock) - - sync - mr r11,r3 ; Save lock addr -#if MACH_LDEBUG - b mlueEnter1 -#else - b mluEnter1 -#endif /* * void lck_mtx_ext_unlock(lck_mtx_ext_t* l) @@ -1434,10 +1359,6 @@ LEXT(mutex_unlock) .align 5 .globl EXT(lck_mtx_ext_unlock) LEXT(lck_mtx_ext_unlock) -#if MACH_LDEBUG - .globl EXT(mutex_unlock_rwcmb) -LEXT(mutex_unlock_rwcmb) -#endif mlueEnter: .globl EXT(mulckePatch_isync) LEXT(mulckePatch_isync) @@ -1455,7 +1376,7 @@ mlueEnter1: lwz r5,MUTEX_DATA(r3) ; Get the lock rlwinm. r4,r5,0,30,31 ; Quick check - bne-- L_mutex_unlock_slow ; Can not get it now... + bne-- L_mtx_unlock_slow ; Can not get it now... mfmsr r9 ; Get the MSR value lis r0,hi16(MASK(MSR_VEC)) ; Get vector enable ori r0,r0,lo16(MASK(MSR_FP)) ; Get FP enable @@ -1479,7 +1400,7 @@ mlueSlowX: li r5,lgKillResv ; Killing field stwcx. r5,0,r5 ; Dump reservation mtmsr r9 ; Say, any interrupts pending? - b L_mutex_unlock_slow ; Join slow path... + b L_mtx_unlock_slow ; Join slow path... /* * void lck_mtx_unlock(lck_mtx_t* l) @@ -1488,10 +1409,6 @@ mlueSlowX: .align 5 .globl EXT(lck_mtx_unlock) LEXT(lck_mtx_unlock) -#if !MACH_LDEBUG - .globl EXT(mutex_unlock_rwcmb) -LEXT(mutex_unlock_rwcmb) -#endif mluEnter: .globl EXT(mulckPatch_isync) LEXT(mulckPatch_isync) @@ -1524,14 +1441,14 @@ mluLoop: mluSlow0: cmpli cr0,r5,MUTEX_IND ; Is it a mutex indirect - bne-- L_mutex_unlock_slow ; No, go handle contention + bne-- L_mtx_unlock_slow ; No, go handle contention lwz r3,MUTEX_PTR(r3) ; load mutex ext pointer b mlueEnter1 mluSlowX: li r5,lgKillResv ; Killing field stwcx. r5,0,r5 ; Dump reservation -L_mutex_unlock_slow: +L_mtx_unlock_slow: PROLOG(0) @@ -1578,8 +1495,6 @@ muUnlock: .align 5 .globl EXT(lck_mtx_assert) LEXT(lck_mtx_assert) - .globl EXT(_mutex_assert) -LEXT(_mutex_assert) mr r11,r3 maEnter: lwz r5,MUTEX_DATA(r3) diff --git a/osfmk/ppc/interrupt.c b/osfmk/ppc/interrupt.c index 5ae0bc8f7..e1be2769d 100644 --- a/osfmk/ppc/interrupt.c +++ b/osfmk/ppc/interrupt.c @@ -48,7 +48,7 @@ #include #include -perfCallback perfIntHook; /* Pointer to CHUD trap hook routine */ +volatile perfCallback perfIntHook; /* Pointer to CHUD trap hook routine */ #if CONFIG_DTRACE #if (DEVELOPMENT || DEBUG ) @@ -87,8 +87,9 @@ struct savearea * interrupt( disable_preemption(); - if(perfIntHook) { /* Is there a hook? */ - if(perfIntHook(type, ssp, dsisr, dar) == KERN_SUCCESS) return ssp; /* If it succeeds, we are done... */ + perfCallback fn = perfIntHook; + if(fn) { /* Is there a hook? */ + if(fn(type, ssp, dsisr, dar) == KERN_SUCCESS) return ssp; /* If it succeeds, we are done... */ } #if CONFIG_DTRACE diff --git a/osfmk/ppc/lock.h b/osfmk/ppc/lock.h index 376ff4991..0628f554f 100644 --- a/osfmk/ppc/lock.h +++ b/osfmk/ppc/lock.h @@ -71,12 +71,6 @@ #include #include -#if !MACH_LDEBUG -typedef lck_mtx_t mutex_t; -#else -typedef lck_mtx_ext_t mutex_t; -#endif /* !MACH_LDEBUG */ - #if !MACH_LDEBUG typedef lck_rw_t lock_t; #else @@ -85,8 +79,6 @@ typedef lck_rw_ext_t lock_t; extern unsigned int LockTimeOut; /* Number of hardware ticks of a lock timeout */ -#define mutex_unlock(l) mutex_unlock_rwcmb(l) - #endif /* MACH_KERNEL_PRIVATE */ #endif /* _PPC_LOCK_H_ */ diff --git a/osfmk/ppc/locks.h b/osfmk/ppc/locks.h index a13399a0b..639a820a8 100644 --- a/osfmk/ppc/locks.h +++ b/osfmk/ppc/locks.h @@ -131,8 +131,13 @@ typedef struct _lck_mtx_ext_ { typedef struct { unsigned int opaque[3]; } lck_mtx_t; + +typedef struct { + unsigned int opaque[16]; +} lck_mtx_ext_t; #else -typedef struct __lck_mtx_t__ lck_mtx_t; +typedef struct __lck_mtx_t__ lck_mtx_t; +typedef struct __lck_mtx_ext_t__ lck_mtx_ext_t; #endif #endif diff --git a/osfmk/ppc/locks_ppc.c b/osfmk/ppc/locks_ppc.c index c747215f2..c734043f5 100644 --- a/osfmk/ppc/locks_ppc.c +++ b/osfmk/ppc/locks_ppc.c @@ -1449,7 +1449,7 @@ lck_rw_ext_backtrace( while (frame < LCK_FRAMES_MAX) { stackptr_prev = stackptr; stackptr = ( unsigned int *)*stackptr; - if ( (((unsigned int)stackptr_prev) ^ ((unsigned int)stackptr)) > 8192) + if ( (((unsigned int)stackptr_prev) - ((unsigned int)stackptr)) > 8192) break; lck->lck_rw_deb.stack[frame] = *(stackptr+2); frame++; @@ -2167,36 +2167,6 @@ void lck_mtx_ext_init( lck_grp_t *grp, lck_attr_t *attr); -/* - * Routine: mutex_alloc - * Function: - * Allocate a mutex for external users who cannot - * hard-code the structure definition into their - * objects. - * For now just use kalloc, but a zone is probably - * warranted. - */ -mutex_t * -mutex_alloc( - unsigned short tag) -{ - mutex_t *m; - - if ((m = (mutex_t *)kalloc(sizeof(mutex_t))) != 0) - mutex_init(m, tag); - return(m); -} - -/* - * Routine: mutex_free - */ -void -mutex_free( - mutex_t *m) -{ - kfree((void *)m, sizeof(mutex_t)); -} - /* * Routine: lck_mtx_alloc_init */ @@ -2335,14 +2305,10 @@ lck_mtx_destroy( */ const char *simple_lock_labels = "ENTRY ILK THREAD DURATION CALLER"; -const char *mutex_labels = "ENTRY LOCKED WAITERS THREAD CALLER"; void db_print_simple_lock( simple_lock_t addr); -void db_print_mutex( - mutex_t * addr); - void db_show_one_simple_lock (db_expr_t addr, boolean_t have_addr, __unused db_expr_t count, @@ -2376,37 +2342,6 @@ db_print_simple_lock ( db_printf ("\n"); } -void -db_show_one_mutex (db_expr_t addr, boolean_t have_addr, - __unused db_expr_t count, - __unused char *modif) -{ - mutex_t * maddr = (mutex_t *)(unsigned long)addr; - - if (maddr == (mutex_t *)0 || !have_addr) - db_error ("No mutex\n"); -#if MACH_LDEBUG - else if (maddr->lck_mtx_deb.type != MUTEX_TAG) - db_error ("Not a mutex\n"); -#endif /* MACH_LDEBUG */ - - db_printf ("%s\n", mutex_labels); - db_print_mutex (maddr); -} - -void -db_print_mutex ( - mutex_t * addr) -{ - db_printf ("%08x %6d %7d", - addr, *addr, addr->lck_mtx.lck_mtx_waiters); -#if MACH_LDEBUG - db_printf (" %08x ", addr->lck_mtx_deb.thread); - db_printsym (addr->lck_mtx_deb.stack[0], DB_STGY_ANY); -#endif /* MACH_LDEBUG */ - db_printf ("\n"); -} - void db_show_one_lock( lock_t *lock) diff --git a/osfmk/ppc/machine_routines.c b/osfmk/ppc/machine_routines.c index ad4add6f0..7edacae01 100644 --- a/osfmk/ppc/machine_routines.c +++ b/osfmk/ppc/machine_routines.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2008 Apple Inc. All rights reserved. + * Copyright (c) 2000-2009 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -50,9 +50,7 @@ unsigned int LockTimeOut = 1250000000; unsigned int MutexSpin = 0; -decl_mutex_data(static,mcpus_lock); -unsigned int mcpus_lock_initialized = 0; -unsigned int mcpus_state = 0; +static int max_cpus_initialized = 0; uint32_t warFlags = 0; #define warDisMBpoff 0x80000000 @@ -495,35 +493,30 @@ ml_enable_nap(int target_cpu, boolean_t nap_enabled) * Function: */ void -ml_init_max_cpus(unsigned int mcpus) +ml_init_max_cpus(unsigned int max_cpus) { + boolean_t current_state; - if (hw_compare_and_store(0,1,&mcpus_lock_initialized)) - mutex_init(&mcpus_lock,0); - mutex_lock(&mcpus_lock); - if ((mcpus_state & MAX_CPUS_SET) - || (mcpus == 0) - || (mcpus > MAX_CPUS)) - panic("ml_init_max_cpus(): Invalid call, max_cpus: %d\n", mcpus); - - machine_info.max_cpus = mcpus; - machine_info.physical_cpu_max = mcpus; - machine_info.logical_cpu_max = mcpus; - mcpus_state |= MAX_CPUS_SET; - - if (mcpus_state & MAX_CPUS_WAIT) { - mcpus_state |= ~MAX_CPUS_WAIT; - thread_wakeup((event_t)&mcpus_state); + current_state = ml_set_interrupts_enabled(FALSE); + if (max_cpus_initialized != MAX_CPUS_SET) { + if (max_cpus > 0 && max_cpus <= MAX_CPUS) { + /* + * Note: max_ncpus is the maximum number + * that the kernel supports or that the "cpus=" + * boot-arg has set. Here we take int minimum. + */ + machine_info.max_cpus = MIN(max_cpus, max_ncpus); + machine_info.physical_cpu_max = max_cpus; + machine_info.logical_cpu_max = max_cpus; + } + if (max_cpus_initialized == MAX_CPUS_WAIT) + wakeup((event_t)&max_cpus_initialized); + max_cpus_initialized = MAX_CPUS_SET; } - mutex_unlock(&mcpus_lock); - + if (machine_info.logical_cpu_max == 1) { - struct patch_up *patch_up_ptr; - boolean_t current_state; - - patch_up_ptr = &patch_up_table[0]; + struct patch_up *patch_up_ptr = &patch_up_table[0]; - current_state = ml_set_interrupts_enabled(FALSE); while (patch_up_ptr->addr != NULL) { /* * Patch for V=R kernel text section @@ -533,8 +526,9 @@ ml_init_max_cpus(unsigned int mcpus) sync_cache64((addr64_t)((unsigned int)(patch_up_ptr->addr)),4); patch_up_ptr++; } - (void) ml_set_interrupts_enabled(current_state); } + + (void) ml_set_interrupts_enabled(current_state); } /* @@ -544,15 +538,15 @@ ml_init_max_cpus(unsigned int mcpus) unsigned int ml_get_max_cpus(void) { - if (hw_compare_and_store(0,1,&mcpus_lock_initialized)) - mutex_init(&mcpus_lock,0); - mutex_lock(&mcpus_lock); - if (!(mcpus_state & MAX_CPUS_SET)) { - mcpus_state |= MAX_CPUS_WAIT; - thread_sleep_mutex((event_t)&mcpus_state, - &mcpus_lock, THREAD_UNINT); + boolean_t current_state; + + current_state = ml_set_interrupts_enabled(FALSE); + if (max_cpus_initialized != MAX_CPUS_SET) { + max_cpus_initialized = MAX_CPUS_WAIT; + assert_wait((event_t)&max_cpus_initialized, THREAD_UNINT); + (void)thread_block(THREAD_CONTINUE_NULL); } - mutex_unlock(&mcpus_lock); + (void) ml_set_interrupts_enabled(current_state); return(machine_info.max_cpus); } @@ -830,3 +824,14 @@ machine_cpu_is_inactive(__unused int num) { return(FALSE); } + +vm_offset_t ml_stack_remaining(void) +{ + uintptr_t local = (uintptr_t) &local; + + if (ml_at_interrupt_context()) { + return (local - (getPerProc()->intstack_top_ss - INTSTACK_SIZE)); + } else { + return (local - current_thread()->kernel_stack); + } +} diff --git a/osfmk/ppc/machine_routines.h b/osfmk/ppc/machine_routines.h index 08cb102e7..3fcaf77ef 100644 --- a/osfmk/ppc/machine_routines.h +++ b/osfmk/ppc/machine_routines.h @@ -37,6 +37,11 @@ #include #include +#include +#include + +__BEGIN_DECLS + /* Get Interrupts Enabled */ extern boolean_t ml_get_interrupts_enabled( void); @@ -216,6 +221,9 @@ extern void bzero_phys_nc( addr64_t phys_address, uint32_t length); +/* Bytes available on current stack */ +vm_offset_t ml_stack_remaining(void); + #endif /* KERNEL_PRIVATE */ #ifdef XNU_KERNEL_PRIVATE @@ -324,4 +332,6 @@ extern int boffSettingsInit; #endif /* KERNEL_PRIVATE */ +__END_DECLS + #endif /* _PPC_MACHINE_ROUTINES_H_ */ diff --git a/libsa/libsa/i386/setjmp.h b/osfmk/ppc/machine_task.c similarity index 78% rename from libsa/libsa/i386/setjmp.h rename to osfmk/ppc/machine_task.c index 33232dade..5decd0ce2 100644 --- a/libsa/libsa/i386/setjmp.h +++ b/osfmk/ppc/machine_task.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2008 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -53,15 +53,33 @@ * any improvements or extensions that they make and grant Carnegie Mellon * the rights to redistribute these changes. */ -/* - */ -/* - * Setjmp/longjmp buffer for i386. - */ -#ifndef _I386_SETJMP_H_ -#define _I386_SETJMP_H_ +#include +#include + +kern_return_t +machine_task_set_state( + __unused task_t task, + __unused int flavor, + __unused thread_state_t state, + __unused mach_msg_type_number_t state_count) +{ + return KERN_FAILURE; +} -typedef int jmp_buf[6]; /* ebx, esi, edi, ebp, esp, eip */ +kern_return_t +machine_task_get_state(__unused task_t task, + __unused int flavor, + __unused thread_state_t state, + __unused mach_msg_type_number_t *state_count) +{ + return KERN_FAILURE; +} -#endif /* _I386_SETJMP_H_ */ +kern_return_t +machine_thread_inherit_taskwide( + __unused thread_t thread, + __unused task_t parent_task) +{ + return KERN_FAILURE; +} diff --git a/osfmk/ppc/mappings.c b/osfmk/ppc/mappings.c index b6430b79b..5da3b85d7 100644 --- a/osfmk/ppc/mappings.c +++ b/osfmk/ppc/mappings.c @@ -768,7 +768,12 @@ mapping_phys_lookup(ppnum_t pp, unsigned int *pindex) } +boolean_t +pmap_valid_page(ppnum_t pn) { + unsigned int tmp; + return (mapping_phys_lookup(pn, &tmp) != 0); +} /* @@ -833,7 +838,7 @@ void mapping_adjust(void) { /* Adjust free mappings */ splx(s); /* Restore 'rupts */ for(; allocsize > 0; allocsize >>= 1) { /* Try allocating in descending halves */ - retr = kmem_alloc_wired(mapping_map, (vm_offset_t *)&mbn, PAGE_SIZE * allocsize); /* Find a virtual address to use */ + retr = kmem_alloc_kobject(mapping_map, (vm_offset_t *)&mbn, PAGE_SIZE * allocsize); /* Find a virtual address to use */ if((retr != KERN_SUCCESS) && (allocsize == 1)) { /* Did we find any memory at all? */ break; } @@ -1389,7 +1394,7 @@ void mapping_prealloc(unsigned int size) { /* Preallocates mapppings for lar splx(s); /* Restore 'rupts */ for(i = 0; i < nmapb; i++) { /* Allocate 'em all */ - retr = kmem_alloc_wired(mapping_map, (vm_offset_t *)&mbn, PAGE_SIZE); /* Find a virtual address to use */ + retr = kmem_alloc_kobject(mapping_map, (vm_offset_t *)&mbn, PAGE_SIZE); /* Find a virtual address to use */ if(retr != KERN_SUCCESS) /* Did we get some memory? */ break; mapping_free_init((vm_offset_t)mbn, -1, 0); /* Initialize on to the release queue */ @@ -1458,7 +1463,7 @@ void mapping_free_prime(void) { /* Primes the mapping block release list #endif for(i = 0; i < nmapb; i++) { /* Allocate 'em all */ - retr = kmem_alloc_wired(mapping_map, (vm_offset_t *)&mbn, PAGE_SIZE); /* Find a virtual address to use */ + retr = kmem_alloc_kobject(mapping_map, (vm_offset_t *)&mbn, PAGE_SIZE); /* Find a virtual address to use */ if(retr != KERN_SUCCESS) { /* Did we get some memory? */ panic("Whoops... Not a bit of wired memory left for anyone\n"); } diff --git a/osfmk/ppc/misc_protos.h b/osfmk/ppc/misc_protos.h index 27ad339bb..d3eddc42a 100644 --- a/osfmk/ppc/misc_protos.h +++ b/osfmk/ppc/misc_protos.h @@ -118,7 +118,7 @@ extern void draw_panic_dialog( extern void commit_paniclog( void); -#ifdef DEBUG +#if DEBUG #define DPRINTF(x) { printf("%s : ",__FUNCTION__);printf x; } #endif /* DEBUG */ diff --git a/osfmk/ppc/model_dep.c b/osfmk/ppc/model_dep.c index e6dc6435f..9eff5b0bb 100644 --- a/osfmk/ppc/model_dep.c +++ b/osfmk/ppc/model_dep.c @@ -74,6 +74,8 @@ #include #include +#include + #include #include #include @@ -450,10 +452,8 @@ print_backtrace(struct savearea *ssp) while(pbtcnt); /* Wait for completion */ pbt_exit: - panic_display_system_configuration(); - panic_display_zprint(); - dump_kext_info(&kdb_log); - return; + panic_display_system_configuration(); + return; } void @@ -527,7 +527,7 @@ void dump_backtrace(struct savearea *sv, unsigned int stackptr, unsigned int fen } kdb_printf("\n"); if(i >= DUMPFRAMES) kdb_printf(" backtrace continues...\n"); /* Say we terminated early */ - if(i) kmod_dump((vm_offset_t *)&bframes[0], i); /* Show what kmods are in trace */ + if(i) kmod_panic_dump((vm_offset_t *)&bframes[0], i); /* Show what kmods are in trace */ } diff --git a/osfmk/ppc/mp.h b/osfmk/ppc/mp.h index 4b187fb69..9b2dde5a2 100644 --- a/osfmk/ppc/mp.h +++ b/osfmk/ppc/mp.h @@ -35,10 +35,4 @@ #include #include -#if NCPUS > 1 -extern void interrupt_stack_alloc(void); - -extern unsigned int wncpu; -#endif /* NCPUS > 1 */ - #endif /* _PPC_MP_H_ */ diff --git a/osfmk/ppc/pcb.c b/osfmk/ppc/pcb.c index 38569fc94..a38687b14 100644 --- a/osfmk/ppc/pcb.c +++ b/osfmk/ppc/pcb.c @@ -499,7 +499,7 @@ machine_stack_detach( vm_offset_t stack; KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_SCHED,MACH_STACK_DETACH), - thread, thread->priority, + (uintptr_t)thread_tid(thread), thread->priority, thread->sched_pri, 0, 0); act_machine_sv_free(thread, 0); /* XXX flag == 0 OK? */ @@ -530,7 +530,7 @@ machine_stack_attach( struct savearea *sv; KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_SCHED,MACH_STACK_ATTACH), - thread, thread->priority, + (uintptr_t)thread_tid(thread), thread->priority, thread->sched_pri, 0, 0); assert(stack); diff --git a/osfmk/ppc/pmap.c b/osfmk/ppc/pmap.c index 72ccbeeaf..495124287 100644 --- a/osfmk/ppc/pmap.c +++ b/osfmk/ppc/pmap.c @@ -1733,7 +1733,7 @@ kern_return_t pmap_unnest(pmap_t grand, addr64_t vaddr, uint64_t size) { panic("pmap_unnest: Attempt to unnest something that is not at start of nest - va = %016llX\n", vaddr); } - (void)hw_atomic_and(&mp->mpFlags, ~mpPerm); /* Show that this mapping is now removable */ + hw_atomic_and_noret(&mp->mpFlags, ~mpPerm); /* Show that this mapping is now removable */ mapping_drop_busy(mp); /* Go ahead and release the mapping now */ @@ -1781,6 +1781,9 @@ kern_return_t pmap_unnest(pmap_t grand, addr64_t vaddr, uint64_t size) { return KERN_SUCCESS; /* Bye, bye, butterfly... */ } +boolean_t pmap_adjust_unnest_parameters(__unused pmap_t p, __unused vm_map_offset_t *s, __unused vm_map_offset_t *e) { + return FALSE; /* Not implemented on PowerPC */ +} /* * void MapUserMemoryWindowInit(void) diff --git a/osfmk/ppc/pmap.h b/osfmk/ppc/pmap.h index 3edad1d4b..24db51ea2 100644 --- a/osfmk/ppc/pmap.h +++ b/osfmk/ppc/pmap.h @@ -281,6 +281,9 @@ extern pmapTransTab *pmapTrans; /* Space to pmap translate table */ /* write combining mode, aka store gather */ #define VM_WIMG_WCOMB (VM_MEM_NOT_CACHEABLE | VM_MEM_COHERENT) +/* superpages */ +#define SUPERPAGE_NBASEPAGES 1 /* we don't support superpages on PowerPC */ + /* * prototypes. */ @@ -323,6 +326,10 @@ extern int pmap_list_resident_pages( int space); extern void pmap_init_sharedpage(vm_offset_t cpg); extern void pmap_disable_NX(pmap_t pmap); + +extern boolean_t pmap_valid_page( + ppnum_t pn); + /* Not required for ppc: */ static inline void pmap_set_4GB_pagezero(__unused pmap_t pmap) {} static inline void pmap_clear_4GB_pagezero(__unused pmap_t pmap) {} diff --git a/osfmk/ppc/ppc_init.c b/osfmk/ppc/ppc_init.c index ccdbb8bb9..9be44aed8 100644 --- a/osfmk/ppc/ppc_init.c +++ b/osfmk/ppc/ppc_init.c @@ -157,7 +157,7 @@ ppc_init( BootProcInfo.cpu_flags = 0; BootProcInfo.istackptr = 0; /* we're on the interrupt stack */ BootProcInfo.intstack_top_ss = (vm_offset_t)&intstack + INTSTACK_SIZE - FM_SIZE; - BootProcInfo.debstack_top_ss = (vm_offset_t)&debstack + KERNEL_STACK_SIZE - FM_SIZE; + BootProcInfo.debstack_top_ss = (vm_offset_t)&debstack + kernel_stack_size - FM_SIZE; BootProcInfo.debstackptr = BootProcInfo.debstack_top_ss; BootProcInfo.interrupts_enabled = 0; BootProcInfo.pending_ast = AST_NONE; diff --git a/osfmk/ppc/ppc_vm_init.c b/osfmk/ppc/ppc_vm_init.c index 0ff41cf4d..e94b6b545 100644 --- a/osfmk/ppc/ppc_vm_init.c +++ b/osfmk/ppc/ppc_vm_init.c @@ -57,7 +57,7 @@ #include #include -#include +#include extern const char version[]; extern const char version_variant[]; @@ -94,19 +94,18 @@ vm_offset_t first_avail; vm_offset_t static_memory_end; addr64_t vm_last_addr = VM_MAX_KERNEL_ADDRESS; /* Highest kernel virtual address known to the VM system */ -extern struct mach_header _mh_execute_header; vm_offset_t sectTEXTB; -int sectSizeTEXT; +unsigned long sectSizeTEXT; vm_offset_t sectDATAB; -int sectSizeDATA; +unsigned long sectSizeDATA; vm_offset_t sectLINKB; -int sectSizeLINK; +unsigned long sectSizeLINK; vm_offset_t sectKLDB; -int sectSizeKLD; +unsigned long sectSizeKLD; vm_offset_t sectPRELINKB; -int sectSizePRELINK; +unsigned long sectSizePRELINK; vm_offset_t sectHIBB; -int sectSizeHIB; +unsigned long sectSizeHIB; vm_offset_t end, etext, edata; @@ -224,7 +223,7 @@ void ppc_vm_init(uint64_t mem_limit, boot_args *args) sectHIBB = (vm_offset_t)(uint32_t *)getsegdatafromheader( &_mh_execute_header, "__HIB", §SizeHIB); sectPRELINKB = (vm_offset_t)(uint32_t *)getsegdatafromheader( - &_mh_execute_header, "__PRELINK", §SizePRELINK); + &_mh_execute_header, "__PRELINK_TEXT", §SizePRELINK); etext = (vm_offset_t) sectTEXTB + sectSizeTEXT; edata = (vm_offset_t) sectDATAB + sectSizeDATA; diff --git a/osfmk/ppc/savearea.c b/osfmk/ppc/savearea.c index 42f857162..0e95c5ff1 100644 --- a/osfmk/ppc/savearea.c +++ b/osfmk/ppc/savearea.c @@ -287,7 +287,7 @@ void save_adjust(void) { while(saveanchor.saveadjust > 0) { /* Keep going until we have enough */ - ret = kmem_alloc_wired(kernel_map, (vm_offset_t *)&freepage, PAGE_SIZE); /* Get a page for free pool */ + ret = kmem_alloc_kobject(kernel_map, (vm_offset_t *)&freepage, PAGE_SIZE); /* Get a page for free pool */ if(ret != KERN_SUCCESS) { /* Did we get some memory? */ panic("Whoops... Not a bit of wired memory left for saveareas\n"); } diff --git a/osfmk/ppc/trap.c b/osfmk/ppc/trap.c index 0843dcd44..c30bf7381 100644 --- a/osfmk/ppc/trap.c +++ b/osfmk/ppc/trap.c @@ -62,8 +62,8 @@ #include -perfCallback perfTrapHook; /* Pointer to CHUD trap hook routine */ -perfCallback perfASTHook; /* Pointer to CHUD AST hook routine */ +volatile perfCallback perfTrapHook; /* Pointer to CHUD trap hook routine */ +volatile perfCallback perfASTHook; /* Pointer to CHUD AST hook routine */ #if CONFIG_DTRACE extern kern_return_t dtrace_user_probe(ppc_saved_state_t *sv); @@ -143,16 +143,18 @@ struct savearea *trap(int trapno, #endif /* MACH_BSD */ myast = ast_pending(); - if(perfASTHook) { + perfCallback fn = perfASTHook; + if(fn) { if(*myast & AST_CHUD_ALL) { - perfASTHook(trapno, ssp, dsisr, (unsigned int)dar); + fn(trapno, ssp, dsisr, (unsigned int)dar); } } else { *myast &= ~AST_CHUD_ALL; } - if(perfTrapHook) { /* Is there a hook? */ - if(perfTrapHook(trapno, ssp, dsisr, (unsigned int)dar) == KERN_SUCCESS) return ssp; /* If it succeeds, we are done... */ + fn = perfTrapHook; + if(fn) { /* Is there a hook? */ + if(fn(trapno, ssp, dsisr, (unsigned int)dar) == KERN_SUCCESS) return ssp; /* If it succeeds, we are done... */ } #if CONFIG_DTRACE @@ -950,7 +952,7 @@ void unresolved_kernel_trap(int trapno, */ if( panicDebugging ) (void)Call_Debugger(trapno, ssp); - panic_plain(message); + panic_plain("%s", message); } const char *corr[2] = {"uncorrected", "corrected "}; diff --git a/osfmk/ppc/trap.h b/osfmk/ppc/trap.h index 372526236..2a4a33ca8 100644 --- a/osfmk/ppc/trap.h +++ b/osfmk/ppc/trap.h @@ -85,9 +85,9 @@ extern struct savearea* trap(int trapno, typedef kern_return_t (*perfCallback)(int trapno, struct savearea *ss, unsigned int dsisr, addr64_t dar); -extern perfCallback perfTrapHook; -extern perfCallback perfASTHook; -extern perfCallback perfIntHook; +extern volatile perfCallback perfTrapHook; +extern volatile perfCallback perfASTHook; +extern volatile perfCallback perfIntHook; extern struct savearea* interrupt(int intno, struct savearea *ss, diff --git a/osfmk/ppc/vmachmon.c b/osfmk/ppc/vmachmon.c index ec7ab941b..f8d7caac6 100644 --- a/osfmk/ppc/vmachmon.c +++ b/osfmk/ppc/vmachmon.c @@ -140,7 +140,7 @@ static pmap_vmm_ext *vmm_build_shadow_hash(pmap_t pmap) panic("vmm_build_shadow_hash: too little pmap_vmm_ext free space\n"); } - ret = kmem_alloc_wired(kernel_map, (vm_offset_t *)&ext, PAGE_SIZE); + ret = kmem_alloc_kobject(kernel_map, (vm_offset_t *)&ext, PAGE_SIZE); /* Allocate a page-sized extension block */ if (ret != KERN_SUCCESS) return (NULL); /* Return NULL for failed allocate */ bzero((char *)ext, PAGE_SIZE); /* Zero the entire extension block page */ @@ -169,7 +169,7 @@ static pmap_vmm_ext *vmm_build_shadow_hash(pmap_t pmap) for (idx = 0; idx < pages; idx++) { mapping_t *map; uint32_t mapIdx; - ret = kmem_alloc_wired(kernel_map, &ext->vmxHashPgList[idx], PAGE_SIZE); + ret = kmem_alloc_kobject(kernel_map, &ext->vmxHashPgList[idx], PAGE_SIZE); /* Allocate a hash-table page */ if (ret != KERN_SUCCESS) goto fail; /* Allocation failed, exit through cleanup */ bzero((char *)ext->vmxHashPgList[idx], PAGE_SIZE); /* Zero the page */ @@ -1939,7 +1939,7 @@ int vmm_stop_vm(struct savearea *save) for(cvi = 0; cvi < kVmmMaxContexts; cvi++) { /* Search slots */ if((0x80000000 & vmmask) && (CTable->vmmc[cvi].vmmFlags & vmmInUse)) { /* See if we need to stop and if it is in use */ - (void)hw_atomic_or(&CTable->vmmc[cvi].vmmFlags, vmmXStop); /* Set this one to stop */ + hw_atomic_or_noret(&CTable->vmmc[cvi].vmmFlags, vmmXStop); /* Set this one to stop */ } vmmask = vmmask << 1; /* Slide mask over */ } diff --git a/osfmk/profiling/Makefile b/osfmk/profiling/Makefile index 410bab236..e037d5041 100644 --- a/osfmk/profiling/Makefile +++ b/osfmk/profiling/Makefile @@ -16,6 +16,9 @@ INSTINC_SUBDIRS_PPC = \ INSTINC_SUBDIRS_I386 = \ i386 +INSTINC_SUBDIRS_X86_64 = \ + x86_64 + INSTINC_SUBDIRS_ARM = \ arm @@ -31,6 +34,9 @@ EXPINC_SUBDIRS_I386 = \ EXPINC_SUBDIRS_ARM = \ arm +EXPINC_SUBDIRS_X86_64 = \ + x86_64 + MIG_DEFS = \ MIG_HDRS = \ diff --git a/osfmk/profiling/i386/profile-md.c b/osfmk/profiling/i386/profile-md.c index c1390d7fd..ff5c91d1b 100644 --- a/osfmk/profiling/i386/profile-md.c +++ b/osfmk/profiling/i386/profile-md.c @@ -242,13 +242,7 @@ static void _profile_reset_alloc(struct profile_vars *, extern void _bogus_function(void); -#if NCPUS > 1 -struct profile_vars *_profile_vars_cpus[NCPUS] = { &_profile_vars }; -struct profile_vars _profile_vars_aux[NCPUS-1]; -#define PROFILE_VARS(cpu) (_profile_vars_cpus[(cpu)]) -#else #define PROFILE_VARS(cpu) (&_profile_vars) -#endif void * _profile_alloc_pages (size_t size) diff --git a/osfmk/profiling/i386/profile-md.h b/osfmk/profiling/i386/profile-md.h index bbc8d2df8..b30deae93 100644 --- a/osfmk/profiling/i386/profile-md.h +++ b/osfmk/profiling/i386/profile-md.h @@ -157,8 +157,13 @@ * Integer types used. */ -typedef long prof_ptrint_t; /* hold either pointer or signed int */ -typedef unsigned long prof_uptrint_t; /* hold either pointer or unsigned int */ +/* + * These hold either a pointer or a signed/unsigned int. + * They are 32 bit on i386 and 64 bit on x86_64. + */ +typedef long prof_ptrint_t; +typedef unsigned long prof_uptrint_t; + typedef long prof_lock_t; /* lock word type */ typedef unsigned char prof_flag_t; /* type for boolean flags */ @@ -166,11 +171,17 @@ typedef unsigned char prof_flag_t; /* type for boolean flags */ * Double precision counter. */ +/* These are 64 bit on both i386 and x86_64 */ +#ifdef __i386__ typedef struct prof_cnt_t { prof_uptrint_t low; /* low 32 bits of counter */ prof_uptrint_t high; /* high 32 bits of counter */ } prof_cnt_t; +#else +typedef unsigned long prof_cnt_t; +#endif +#ifdef __i386__ #if defined(__GNUC__) && !defined(lint) #define PROF_CNT_INC(cnt) \ __asm__("addl $1,%0; adcl $0,%1" \ @@ -208,6 +219,14 @@ typedef struct prof_cnt_t { #define PROF_CNT_SUB(cnt,val) (((((cnt).low - (val)) > (cnt).low) ? ((cnt).high--) : 0), ((cnt).low -= (val))) #define PROF_CNT_LSUB(cnt,val) (PROF_CNT_SUB(cnt,(val).low), (cnt).high -= (val).high) #endif +#else +/* x86_64 */ +#define PROF_CNT_INC(cnt) (cnt++) +#define PROF_CNT_ADD(cnt,val) (cnt+=val) +#define PROF_CNT_LADD(cnt,val) (cnt+=val) +#define PROF_CNT_SUB(cnt,val) (cnt-=val) +#define PROF_CNT_LSUB(cnt,val) (cnt-=val) +#endif #define PROF_ULONG_TO_CNT(cnt,val) (((cnt).high = 0), ((cnt).low = val)) #define PROF_CNT_OVERFLOW(cnt,high,low) (((high) = (cnt).high), ((low) = (cnt).low)) diff --git a/osfmk/profiling/machine/profile-md.h b/osfmk/profiling/machine/profile-md.h index b1c30115b..66f783531 100644 --- a/osfmk/profiling/machine/profile-md.h +++ b/osfmk/profiling/machine/profile-md.h @@ -30,7 +30,7 @@ #if defined (__ppc__) #include "profiling/ppc/profile-md.h" -#elif defined (__i386__) +#elif defined (__i386__) || defined (__x86_64__) #include "profiling/i386/profile-md.h" #else #error architecture not supported diff --git a/osfmk/profiling/profile-mk.c b/osfmk/profiling/profile-mk.c index 6a912bb69..43e1376ea 100644 --- a/osfmk/profiling/profile-mk.c +++ b/osfmk/profiling/profile-mk.c @@ -47,11 +47,6 @@ extern char etext[], pstart[]; -#if NCPUS > 1 -struct profile_vars *_profile_vars_cpus[NCPUS] = { &_profile_vars }; -struct profile_vars _profile_vars_aux[NCPUS-1]; -#endif - void * _profile_alloc_pages (size_t size) { @@ -102,7 +97,6 @@ kmstartup(void) prof_uptrint_t monsize; prof_uptrint_t lowpc; prof_uptrint_t highpc; - int i; struct profile_vars *pv; /* @@ -115,48 +109,40 @@ kmstartup(void) textsize = highpc - lowpc; monsize = (textsize / HISTFRACTION) * sizeof(LHISTCOUNTER); - for (i = 0; i < NCPUS; i++) { - pv = PROFILE_VARS(i); - -#if NCPUS > 1 - if (!pv) { - _profile_vars_cpus[i] = pv = &_profile_vars_aux[i-i]; - } -#endif + pv = PROFILE_VARS(0); #ifdef DEBUG_PROFILE - pv->debug = 1; + pv->debug = 1; #endif - pv->page_size = PAGE_SIZE; - _profile_md_init(pv, PROFILE_GPROF, PROFILE_ALLOC_MEM_YES); - - /* Profil related variables */ - pv->profil_buf = _profile_alloc (pv, monsize, ACONTEXT_PROFIL); - pv->profil_info.highpc = highpc; - pv->profil_info.lowpc = lowpc; - pv->profil_info.text_len = textsize; - pv->profil_info.profil_len = monsize; - pv->profil_info.counter_size = sizeof(LHISTCOUNTER); - pv->profil_info.scale = 0x10000 / HISTFRACTION; - pv->stats.profil_buckets = monsize / sizeof(LHISTCOUNTER); - - /* Other gprof variables */ - pv->stats.my_cpu = i; - pv->stats.max_cpu = NCPUS; - pv->init = 1; - pv->active = 1; - pv->use_dci = 0; - pv->use_profil = 1; - pv->check_funcs = 1; /* for now */ - - if (pv->debug) { - printf("Profiling kernel, s_textsize=%ld, monsize=%ld [0x%lx..0x%lx], cpu = %d\n", - (long)textsize, - (long)monsize, - (long)lowpc, - (long)highpc, - i); - } + pv->page_size = PAGE_SIZE; + _profile_md_init(pv, PROFILE_GPROF, PROFILE_ALLOC_MEM_YES); + + /* Profil related variables */ + pv->profil_buf = _profile_alloc (pv, monsize, ACONTEXT_PROFIL); + pv->profil_info.highpc = highpc; + pv->profil_info.lowpc = lowpc; + pv->profil_info.text_len = textsize; + pv->profil_info.profil_len = monsize; + pv->profil_info.counter_size = sizeof(LHISTCOUNTER); + pv->profil_info.scale = 0x10000 / HISTFRACTION; + pv->stats.profil_buckets = monsize / sizeof(LHISTCOUNTER); + + /* Other gprof variables */ + pv->stats.my_cpu = 0; + pv->stats.max_cpu = 1; /* initial number of cpus */ + pv->init = 1; + pv->active = 1; + pv->use_dci = 0; + pv->use_profil = 1; + pv->check_funcs = 1; /* for now */ + + if (pv->debug) { + printf("Profiling kernel, s_textsize=%ld, monsize=%ld [0x%lx..0x%lx], cpu = %d\n", + (long)textsize, + (long)monsize, + (long)lowpc, + (long)highpc, + 0); } _profile_md_start(); @@ -207,7 +193,7 @@ gprofstrategy(io_req_t ior) long count = _profile_kgmon(!(ior->io_op & IO_READ), ior->io_count, ior->io_recnum, - NCPUS, + 1, &sys_ptr, (void (*)(kgmon_control_t))0); diff --git a/osfmk/profiling/profile-mk.h b/osfmk/profiling/profile-mk.h index 700e030e5..f2da965f8 100644 --- a/osfmk/profiling/profile-mk.h +++ b/osfmk/profiling/profile-mk.h @@ -55,12 +55,6 @@ extern int gprofwrite(dev_t, io_req_t); * Macros to access the nth cpu's profile variable structures. */ -#if NCPUS <= 1 #define PROFILE_VARS(cpu) (&_profile_vars) -#else -extern struct profile_vars *_profile_vars_cpus[NCPUS]; -#define PROFILE_VARS(cpu) (_profile_vars_cpus[(cpu)]) -#endif - diff --git a/osfmk/sys/scsi.h b/osfmk/sys/scsi.h deleted file mode 100644 index 545b08c78..000000000 --- a/osfmk/sys/scsi.h +++ /dev/null @@ -1,49 +0,0 @@ -/* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ - * - * This file contains Original Code and/or Modifications of Original Code - * as defined in and that are subject to the Apple Public Source License - * Version 2.0 (the 'License'). You may not use this file except in - * compliance with the License. The rights granted to you under the License - * may not be used to create, or enable the creation or redistribution of, - * unlawful or unlicensed copies of an Apple operating system, or to - * circumvent, violate, or enable the circumvention or violation of, any - * terms of an Apple operating system software license agreement. - * - * Please obtain a copy of the License at - * http://www.opensource.apple.com/apsl/ and read it before using this file. - * - * The Original Code and all software distributed under the License are - * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER - * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, - * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. - * Please see the License for the specific language governing rights and - * limitations under the License. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ - */ -/* - * @OSF_COPYRIGHT@ - * - */ -/* - * HISTORY - * - * Revision 1.1.1.1 1998/09/22 21:05:48 wsanchez - * Import of Mac OS X kernel (~semeria) - * - * Revision 1.1.1.1 1998/03/07 02:25:59 wsanchez - * Import of OSF Mach kernel (~mburg) - * - * Revision 1.1.4.1 1996/04/17 17:48:51 davidp - * Created for use with SVR4 drivers. - * [1996/04/11 13:18:06 davidp] - * - * Revision 1.1.1.2 1996/03/04 17:50:08 calvert - * Created for use with SVR4 drivers. - * - * $EndLog$ - */ diff --git a/osfmk/sys/sdi.h b/osfmk/sys/sdi.h deleted file mode 100644 index 6a92a8eb1..000000000 --- a/osfmk/sys/sdi.h +++ /dev/null @@ -1,505 +0,0 @@ -/* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ - * - * This file contains Original Code and/or Modifications of Original Code - * as defined in and that are subject to the Apple Public Source License - * Version 2.0 (the 'License'). You may not use this file except in - * compliance with the License. The rights granted to you under the License - * may not be used to create, or enable the creation or redistribution of, - * unlawful or unlicensed copies of an Apple operating system, or to - * circumvent, violate, or enable the circumvention or violation of, any - * terms of an Apple operating system software license agreement. - * - * Please obtain a copy of the License at - * http://www.opensource.apple.com/apsl/ and read it before using this file. - * - * The Original Code and all software distributed under the License are - * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER - * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, - * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. - * Please see the License for the specific language governing rights and - * limitations under the License. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ - */ -/* - * @OSF_COPYRIGHT@ - * - */ -/* - * HISTORY - * - * Revision 1.1.1.1 1998/09/22 21:05:48 wsanchez - * Import of Mac OS X kernel (~semeria) - * - * Revision 1.1.1.1 1998/03/07 02:25:59 wsanchez - * Import of OSF Mach kernel (~mburg) - * - * Revision 1.1.7.1 1996/09/17 16:34:56 bruel - * removed size_t, already defined in types.h. - * [96/09/17 bruel] - * - * Revision 1.1.4.1 1996/04/17 17:48:54 davidp - * Created for use with SVR4 drivers. - * [1996/04/11 13:19:26 davidp] - * - * Revision 1.1.1.2 1996/03/04 17:53:46 calvert - * Created for use with SVR4 drivers. - * - * $EndLog$ - */ -/* In vi use ":set ts=4" to edit/view this file - */ -#ifndef _SYS_SDI_H_ -#define _SYS_SDI_H_ 1 - -#include "scsi/scsi_endian.h" - -typedef u_long paddr_t; -typedef u_int rm_key_t; - -typedef long hba_clock_t; - -typedef u_long major_t; -typedef u_long minor_t; - -typedef u_long hba_buf_t; /* just to satisfy declaration */ -typedef u_long hba_uio_t; /* just to satisfy declaration */ - - -struct ver_no { - uchar_t sv_release; - uchar_t sv_machine; - short sv_modes; -}; - -struct hba_idata_v4 { - int version_num; - char *name; - uchar_t ha_id; - ulong_t ioaddr1; - int dmachan1; - int iov; - int cntlr; - int active; - ulong_t idata_memaddr; - uchar_t idata_ctlorder; - uchar_t idata_nbus; - ushort_t idata_ntargets; - ushort_t idata_nluns; - rm_key_t idata_rmkey; - void *idata_intrcookie; - int idata_cpubind; -}; - -struct hba_idata { - int version_num; - char *name; - uchar_t ha_id; - ulong_t ioaddr1; - int dmachan1; - int iov; - int cntlr; - int active; -}; - -#define HBA_SVR4_2 1 -#define HBA_SVR4_2_2 2 -#define HBA_SVR4_2MP 3 - -#define HBA_VMASK 0xffff - -#define HBA_IDATA_EXT 0x10000 -#define HBA_EXT_INFO 0x20000 -#define HBA_AUTOCONF 0x40000 - -#define VID_LEN 8 -#define PID_LEN 16 -#define REV_LEN 4 - -#define INQ_LEN VID_LEN+PID_LEN+1 -#define INQ_EXLEN INQ_LEN+REV_LEN - -struct ident { - BITFIELD_2( unsigned char, - id_type : 5, - id_pqual : 3); - BITFIELD_2(unsigned char, - id_qualif : 7, - id_rmb : 1); - uchar_t id_ver; - BITFIELD_2(unsigned char, - id_form : 4, - id_res1 : 4); - uchar_t id_len; - uchar_t id_vu [3]; - char id_vendor [VID_LEN]; - char id_prod [PID_LEN]; - char id_revnum [REV_LEN]; -}; - -#define SCSI_INQ_CON 0x0 -#define SCSI_INQ_TC 0x1 -#define SCSI_INQ_TNC 0x3 - -struct scsi_adr { - int scsi_ctl; - int scsi_target; - int scsi_lun; - int scsi_bus; -}; - -struct scsi_ad { - ulong_t sa_major; - ulong_t sa_minor; - uchar_t sa_lun; - BITFIELD_2(unsigned char, - sa_bus : 3, - sa_exta : 5); - short sa_ct; -}; - -/* sa_ct */ -#define SDI_SA_CT(c,t) (((c) << 3) | ((t) & 0x07)) -#define SDI_HAN(sa) (((sa)->sa_ct >> 3) & 0x07) -#define SDI_TCN(sa) ((sa)->sa_ct & 0x07) - -#define SDI_ETCN(sa) ((sa)->sa_exta) -#define SDI_EHAN(sa) (((sa)->sa_ct >> 3) & 0x1f) - -struct sdi_edt { - struct sdi_edt *hash_p; - short hba_no; - uchar_t scsi_id; - uchar_t lun; - struct owner *curdrv; - struct owner *owner_list; - ulong_t res1; - int pdtype; - uchar_t iotype; - char inquiry [INQ_EXLEN]; - struct scsi_adr scsi_adr; - ulong_t memaddr; - uchar_t ctlorder; - struct ident edt_ident; -}; - -/* iotype */ -#define F_DMA 0x001 -#define F_DMA_24 F_DMA -#define F_PIO 0x002 -#define F_SCGTH 0x004 -#define F_RMB 0x008 -#define F_DMA_32 0x010 -#define F_HDWREA 0x020 -#define F_RESID 0x040 - -struct mod_operations { - int (*modm_install)(void); - int (*modm_remove)(void); - int (*modm_info)(void); - int (*modm_bind)(void); -}; - -struct modlink { - struct mod_operations *ml_ops; - void *ml_type_data; -}; - -struct mod_type_data { - char *mtd_info; - void *mtd_pdata; -}; - -struct modwrapper { - int mw_rev; - int (*mw_load)(void); - int (*mw_unload)(void); - void (*mw_halt)(void); - void *mw_conf_data; - struct modlink *mw_modlink; -}; - -struct hbadata { - struct xsb *sb; -}; - -typedef struct physreq { - paddr_t phys_align; - paddr_t phys_boundary; - uchar_t phys_dmasize; - uchar_t phys_max_scgth; - uchar_t phys_flags; - void *phys_brkup_poolp; -} physreq_t; - - -typedef struct bcb { - uchar_t bcb_addrtypes; - uchar_t bcb_flags; - size_t bcb_max_xfer; - size_t bcb_granularity; - physreq_t *bcb_physreqp; -} bcb_t; - -struct hbagetinfo { - char *name; - char iotype; - bcb_t *bcbp; -}; - -struct hba_info { - int *hba_flag; - ulong_t max_xfer; - long (*hba_freeblk)(struct hbadata *hdp, int cntlr); - struct hbadata *(*hba_getblk)(int flag, int cntlr); - long (*hba_icmd)(struct hbadata *hdp, int flag); - void (*hba_getinfo)(struct scsi_ad *sap, - struct hbagetinfo *hgip); - long (*hba_send)(struct hbadata *hdp, int flag); - int (*hba_xlat)(struct hbadata *hdp, int bflag, void *procp, - int flag); - int (*hba_open)(void); - int (*hba_close)(void); - int (*hba_ioctl)(void); -}; - -/* hba_flag */ -#define HBA_MP 0x01 -#define HBA_HOT 0x02 -#define HBA_TIMEOUT 0x04 - -#define SC_EXHAN(minor) (((minor) >> 5) & 0x1f) -#define SC_EXTCN(minor) ((((minor) >> 2) & 0x07) | ((minor >> 7) & 0x18)) -#define SC_EXLUN(minor) (((minor) & 0x03) | ((minor>>10) & 0x1C)) -#define SC_BUS(minor) (((minor) >> 15) & 0x07) - -#define SC_MKMINOR(h,t,l,b) ( \ - (((h) & 0x1f) << 5) | \ - (((t) & 0x07) << 2) | (((t) & 0x18) << 7) | \ - ((l) & 0x03) | (((l) & 0x1c) << 10) | \ - (((b) & 0x07) << 15) \ - ) - -#define SDI_NAMESZ 49 - -#define SM_POOLSIZE 28 -#define LG_POOLSIZE (sizeof (struct xsb)) - -#define SCB_TYPE 1 -#define ISCB_TYPE 2 -#define SFB_TYPE 3 - -#define SCB_WRITE 0x00 -#define SCB_READ 0x01 -#define SCB_LINK 0x02 -#define SCB_HAAD 0x04 -#define SCB_PARTBLK 0x08 - -#define SDI_NOALLOC 0x00000000 -#define SDI_ASW 0x00000001 -#define SDI_LINKF0 0x00000002 -#define SDI_LINKF1 0x00000003 -#define SDI_QFLUSH 0xE0000004 -#define SDI_ABORT 0xF0000005 -#define SDI_RESET 0xF0000006 -#define SDI_CRESET 0xD0000007 -#define SDI_V2PERR 0xA0000008 -#define SDI_TIME 0xD0000009 -#define SDI_NOTEQ 0x8000000A -#define SDI_HAERR 0xE000000B -#define SDI_MEMERR 0xA000000C -#define SDI_SBUSER 0xA000000D -#define SDI_CKSTAT 0xD000000E -#define SDI_SCBERR 0x8000000F -#define SDI_OOS 0xA0000010 -#define SDI_NOSELE 0x90000011 -#define SDI_MISMAT 0x90000012 -#define SDI_PROGRES 0x00000013 -#define SDI_UNUSED 0x00000014 -#define SDI_ONEIC 0x80000017 -#define SDI_SFBERR 0x80000019 -#define SDI_TCERR 0x9000001A - -#define SDI_ERROR 0x80000000 -#define SDI_RETRY 0x40000000 -#define SDI_MESS 0x20000000 -#define SDI_SUSPEND 0x10000000 - -#define SFB_NOPF 0x00 -#define SFB_RESETM 0x01 -#define SFB_ABORTM 0x02 -#define SFB_FLUSHR 0x03 -#define SFB_RESUME 0x04 -#define SFB_SUSPEND 0x05 -#define SFB_ADD_DEV 0x06 -#define SFB_RM_DEV 0x07 -#define SFB_PAUSE 0x08 -#define SFB_CONTINUE 0x09 - -#define SDI_386_AT 0x06 -#define SDI_386_MCA 0x07 -#define SDI_386_EISA 0x08 - -#define SDI_RET_OK 0 -#define SDI_RET_ERR -1 -#define SDI_RET_RETRY 1 - -#define SDI_SEND 0x0081 -#define SDI_TRESET 0x0082 -#define SDI_BRESET 0x0084 -#define HA_VER 0x0083 -#define SDI_RESERVE 0x0085 -#define SDI_RELEASE 0x0086 -#define SDI_RESTAT 0x0087 -#define HA_GETPARMS 0x008a -#define IHA_GETPARMS 0x008b -#define HA_SETPARMS 0x008c -#define IHA_SETPARMS 0x008d -#define HA_GETPPARMS 0x008e - -struct sense { - uchar_t sd_pad0; - BITFIELD_2(unsigned char, - sd_errc : 7, - sd_valid : 1); - uchar_t sd_res1; - BITFIELD_5(unsigned char, - sd_key : 4, - sd_res2 : 1, - sd_ili : 1, - sd_eom : 1, - sd_fm : 1); - uint_t sd_ba; - uchar_t sd_len; - uchar_t sd_res3 [4]; - uchar_t sd_sencode; - uchar_t sd_qualifier; - uchar_t sd_fru; - BITFIELD_5(unsigned char, - sd_bitpt : 3, - sd_bpv : 1, - sd_res4 : 2, - sd_cd : 1, - sd_res5 : 1); - uchar_t sd_field [2]; - uchar_t sd_res6; - uchar_t sd_buffer; - uchar_t sd_res7 [2]; -}; - - -struct sb_extra { - struct sense sb_sense; -}; - -#define sc_priv sc_extra - -struct sb; - -struct scb { - ulong_t sc_comp_code; - void *sc_extra; - void (*sc_int)(struct sb *sbp); - caddr_t sc_cmdpt; - caddr_t sc_datapt; - long sc_wd; - time_t sc_time; - struct scsi_ad sc_dev; - ushort_t sc_mode; - uchar_t sc_status; - char sc_fill; - struct sb *sc_link; - long sc_cmdsz; - long sc_datasz; - long sc_resid; - hba_clock_t sc_start; -}; - -struct sfb { - ulong_t sf_comp_code; - char *sf_priv; - void (*sf_int)(struct sb *sbp); - struct scsi_ad sf_dev; - ulong_t sf_func; - int sf_wd; -}; - -struct sb { - ulong_t sb_type; - union { - struct scb b_scb; - struct sfb b_sfb; - } sb_b; -}; - -#define SCB sb_b.b_scb -#define SFB sb_b.b_sfb - -struct xsb { - struct sb sb; - struct hbadata *hbadata_p; - struct owner *owner_p; - struct sb_extra extra; -}; - -#define S_GOOD 0X00 -#define S_CKCON 0X02 -#define S_METGD 0X04 -#define S_BUSY 0X08 -#define S_INGD 0X10 -#define S_INMET 0X12 -#define S_RESER 0X18 -#define S_CTERM 0x22 -#define S_QFULL 0x28 - -#define SLEEP 0 -#define NOSLEEP 1 - -#define KM_SLEEP SLEEP -#define KM_NOSLEEP NOSLEEP -#define KM_DMA 2 -#define KM_REQ_DMA 4 -#define KM_PHYSCONTIG 8 - -struct mod_drvintr { - ushort_t di_magic; - ushort_t di_version; - char *di_modname; - int *di_devflagp; - void (*di_handler)(int vect); - void *di_hook; -}; - -#define MOD_INTR_MAGIC 0xEB13 -#define MOD_INTR_VER 1 - -struct o_mod_drvintr { - struct intr_info *drv_intrinfo; - void (*ihndler)(int vect); -}; - -#define MOD_INTRVER_MASK 0xff000000 -#define MOD_INTRVER_42 0x01000000 - -#define INTRVER(infop) ((unsigned int)((infop)->ivect_no & MOD_INTRVER_MASK)) -#define INTRNO(infop) ((infop)->ivect_no & ~MOD_INTRVER_MASK) - -struct intr_info0 { - int ivect_no; - int int_pri; - int itype; -}; - -struct intr_info { - int ivect_no; - int int_pri; - int itype; - int int_cpu; - int int_mp; -}; - -#endif /* _SYS_SDI_H_ */ diff --git a/osfmk/sys/sdi_edt.h b/osfmk/sys/sdi_edt.h deleted file mode 100644 index 1b73c9650..000000000 --- a/osfmk/sys/sdi_edt.h +++ /dev/null @@ -1,49 +0,0 @@ -/* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ - * - * This file contains Original Code and/or Modifications of Original Code - * as defined in and that are subject to the Apple Public Source License - * Version 2.0 (the 'License'). You may not use this file except in - * compliance with the License. The rights granted to you under the License - * may not be used to create, or enable the creation or redistribution of, - * unlawful or unlicensed copies of an Apple operating system, or to - * circumvent, violate, or enable the circumvention or violation of, any - * terms of an Apple operating system software license agreement. - * - * Please obtain a copy of the License at - * http://www.opensource.apple.com/apsl/ and read it before using this file. - * - * The Original Code and all software distributed under the License are - * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER - * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, - * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. - * Please see the License for the specific language governing rights and - * limitations under the License. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ - */ -/* - * @OSF_COPYRIGHT@ - * - */ -/* - * HISTORY - * - * Revision 1.1.1.1 1998/09/22 21:05:48 wsanchez - * Import of Mac OS X kernel (~semeria) - * - * Revision 1.1.1.1 1998/03/07 02:25:59 wsanchez - * Import of OSF Mach kernel (~mburg) - * - * Revision 1.1.4.1 1996/04/17 17:48:58 davidp - * Created for use with SVR4 drivers. - * [1996/04/11 13:20:36 davidp] - * - * Revision 1.1.1.2 1996/03/04 17:54:47 calvert - * Created for use with SVR4 drivers. - * - * $EndLog$ - */ diff --git a/osfmk/sys/time.h b/osfmk/sys/time.h deleted file mode 100644 index a48017b9e..000000000 --- a/osfmk/sys/time.h +++ /dev/null @@ -1,120 +0,0 @@ -/* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ - * - * This file contains Original Code and/or Modifications of Original Code - * as defined in and that are subject to the Apple Public Source License - * Version 2.0 (the 'License'). You may not use this file except in - * compliance with the License. The rights granted to you under the License - * may not be used to create, or enable the creation or redistribution of, - * unlawful or unlicensed copies of an Apple operating system, or to - * circumvent, violate, or enable the circumvention or violation of, any - * terms of an Apple operating system software license agreement. - * - * Please obtain a copy of the License at - * http://www.opensource.apple.com/apsl/ and read it before using this file. - * - * The Original Code and all software distributed under the License are - * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER - * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, - * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. - * Please see the License for the specific language governing rights and - * limitations under the License. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ - */ -/* - * @OSF_COPYRIGHT@ - */ -/* - * HISTORY - * - * Revision 1.1.1.1 1998/09/22 21:05:48 wsanchez - * Import of Mac OS X kernel (~semeria) - * - * Revision 1.1.1.1 1998/03/07 02:25:59 wsanchez - * Import of OSF Mach kernel (~mburg) - * - * Revision 1.2.6.1 1994/09/23 03:13:17 ezf - * change marker to not FREE - * [1994/09/22 21:58:56 ezf] - * - * Revision 1.2.2.2 1993/06/09 02:55:33 gm - * Added to OSF/1 R1.3 from NMK15.0. - * [1993/06/02 21:31:02 jeffc] - * - * Revision 1.2 1993/04/19 17:17:07 devrcs - * Fixes for ANSI C - * [1993/02/26 14:02:46 sp] - * - * Revision 1.1 1992/09/30 02:36:58 robert - * Initial revision - * - * $EndLog$ - */ -/* CMU_HIST */ -/* - * Revision 2.3 91/05/14 17:40:25 mrt - * Correcting copyright - * - * Revision 2.2 91/02/05 17:56:58 mrt - * Changed to new Mach copyright - * [91/02/01 17:49:29 mrt] - * - */ -/* CMU_ENDHIST */ -/* - * Mach Operating System - * Copyright (c) 1991 Carnegie Mellon University - * All Rights Reserved. - * - * Permission to use, copy, modify and distribute this software and its - * documentation is hereby granted, provided that both the copyright - * notice and this permission notice appear in all copies of the - * software, derivative works or modified versions, and any portions - * thereof, and that both notices appear in supporting documentation. - * - * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" - * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR - * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. - * - * Carnegie Mellon requests users of this software to return to - * - * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU - * School of Computer Science - * Carnegie Mellon University - * Pittsburgh PA 15213-3890 - * - * any improvements or extensions that they make and grant Carnegie Mellon rights - * to redistribute these changes. - */ -/* - */ -/* - * Time-keeper for kernel IO devices. - * - * May or may not have any relation to wall-clock time. - */ - -#ifndef _SYS_TIME_H_ -#define _SYS_TIME_H_ -#include - -extern time_value_t time; - -/* - * Definitions to keep old code happy. - */ -#define timeval_t time_value_t -#define timeval time_value -#define tv_sec seconds -#define tv_usec microseconds - -#define timerisset(tvp) ((tvp)->tv_sec || (tvp)->tv_usec) -#define timercmp(tvp, uvp, cmp) \ - ((tvp)->tv_sec cmp (uvp)->tv_sec || \ - (tvp)->tv_sec == (uvp)->tv_sec && (tvp)->tv_usec cmp (uvp)->tv_usec) -#define timerclear(tvp) (tvp)->tv_sec = (tvp)->tv_usec = 0 -#endif /* _SYS_TIME_H_ */ diff --git a/osfmk/sys/tm.h b/osfmk/sys/tm.h deleted file mode 100644 index 357ebd283..000000000 --- a/osfmk/sys/tm.h +++ /dev/null @@ -1,108 +0,0 @@ -/* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ - * - * This file contains Original Code and/or Modifications of Original Code - * as defined in and that are subject to the Apple Public Source License - * Version 2.0 (the 'License'). You may not use this file except in - * compliance with the License. The rights granted to you under the License - * may not be used to create, or enable the creation or redistribution of, - * unlawful or unlicensed copies of an Apple operating system, or to - * circumvent, violate, or enable the circumvention or violation of, any - * terms of an Apple operating system software license agreement. - * - * Please obtain a copy of the License at - * http://www.opensource.apple.com/apsl/ and read it before using this file. - * - * The Original Code and all software distributed under the License are - * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER - * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, - * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. - * Please see the License for the specific language governing rights and - * limitations under the License. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ - */ -/* - * @OSF_COPYRIGHT@ - */ -/* - * HISTORY - * - * Revision 1.1.1.1 1998/09/22 21:05:48 wsanchez - * Import of Mac OS X kernel (~semeria) - * - * Revision 1.1.1.1 1998/03/07 02:25:59 wsanchez - * Import of OSF Mach kernel (~mburg) - * - * Revision 1.2.6.1 1994/09/23 03:13:27 ezf - * change marker to not FREE - * [1994/09/22 21:59:00 ezf] - * - * Revision 1.2.2.2 1993/06/09 02:55:37 gm - * Added to OSF/1 R1.3 from NMK15.0. - * [1993/06/02 21:31:05 jeffc] - * - * Revision 1.2 1993/04/19 17:17:19 devrcs - * Fixes for ANSI C - * [1993/02/26 14:02:53 sp] - * - * Revision 1.1 1992/09/30 02:37:00 robert - * Initial revision - * - * $EndLog$ - */ -/* CMU_HIST */ -/* - * Revision 2.3 91/05/14 17:40:33 mrt - * Correcting copyright - * - * Revision 2.2 91/02/05 17:57:03 mrt - * Changed to new Mach copyright - * [91/02/01 17:49:35 mrt] - * - */ -/* CMU_ENDHIST */ -/* - * Mach Operating System - * Copyright (c) 1991 Carnegie Mellon University - * All Rights Reserved. - * - * Permission to use, copy, modify and distribute this software and its - * documentation is hereby granted, provided that both the copyright - * notice and this permission notice appear in all copies of the - * software, derivative works or modified versions, and any portions - * thereof, and that both notices appear in supporting documentation. - * - * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" - * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR - * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. - * - * Carnegie Mellon requests users of this software to return to - * - * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU - * School of Computer Science - * Carnegie Mellon University - * Pittsburgh PA 15213-3890 - * - * any improvements or extensions that they make and grant Carnegie Mellon rights - * to redistribute these changes. - */ -/* - */ -/* - * Time, broken out. - */ -#ifndef _SYS_TM_H_ -#define _SYS_TM_H_ -struct tm { - int tm_sec; - int tm_min; - int tm_hour; - int tm_mday; - int tm_mon; - int tm_year; -}; -#endif /* _SYS_TM_H_ */ diff --git a/osfmk/sys/varargs.h b/osfmk/sys/varargs.h deleted file mode 100644 index 828d75e4b..000000000 --- a/osfmk/sys/varargs.h +++ /dev/null @@ -1,241 +0,0 @@ -/* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ - * - * This file contains Original Code and/or Modifications of Original Code - * as defined in and that are subject to the Apple Public Source License - * Version 2.0 (the 'License'). You may not use this file except in - * compliance with the License. The rights granted to you under the License - * may not be used to create, or enable the creation or redistribution of, - * unlawful or unlicensed copies of an Apple operating system, or to - * circumvent, violate, or enable the circumvention or violation of, any - * terms of an Apple operating system software license agreement. - * - * Please obtain a copy of the License at - * http://www.opensource.apple.com/apsl/ and read it before using this file. - * - * The Original Code and all software distributed under the License are - * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER - * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, - * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. - * Please see the License for the specific language governing rights and - * limitations under the License. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ - */ -/* - * @OSF_COPYRIGHT@ - */ -/* - * HISTORY - * - * Revision 1.1.1.1 1998/09/22 21:05:49 wsanchez - * Import of Mac OS X kernel (~semeria) - * - * Revision 1.1.1.1 1998/03/07 02:25:59 wsanchez - * Import of OSF Mach kernel (~mburg) - * - * Revision 1.2.28.1 1996/11/29 16:59:53 stephen - * nmklinux_1.0b3_shared into pmk1.1 - * Added powerpc special case - * [1996/11/29 16:34:18 stephen] - * - * Revision 1.2.15.2 1996/01/09 19:23:16 devrcs - * Added alpha varargs.h - * [1995/12/01 20:39:10 jfraser] - * - * Merged '64-bit safe' changes from DEC alpha port. - * [1995/11/21 18:10:39 jfraser] - * - * Revision 1.2.15.1 1994/09/23 03:13:46 ezf - * change marker to not FREE - * [1994/09/22 21:59:07 ezf] - * - * Revision 1.2.4.3 1993/08/03 18:30:40 gm - * CR9596: Change KERNEL to MACH_KERNEL. - * [1993/08/02 19:03:10 gm] - * - * Revision 1.2.4.2 1993/06/09 02:55:42 gm - * Added to OSF/1 R1.3 from NMK15.0. - * [1993/06/02 21:31:11 jeffc] - * - * Revision 1.2 1993/04/19 17:17:26 devrcs - * correct endif tags for ansi - * [1993/02/25 17:56:02 david] - * - * Revision 1.1 1992/09/30 02:37:05 robert - * Initial revision - * - * $EndLog$ - */ -/* CMU_HIST */ -/* - * Revision 2.10 91/12/10 16:32:53 jsb - * Fixes from Intel - * [91/12/10 15:52:01 jsb] - * - * Revision 2.9 91/09/12 16:54:22 debo - * Added mac2. - * [91/09/11 17:22:52 debo] - * - * Revision 2.8 91/07/09 23:23:50 danner - * Added luna88k support. - * [91/06/24 danner] - * - * Revision 2.7 91/06/18 20:53:02 jsb - * Moved i860 varargs code here from i860/i860_varargs.h, thanks to - * new copyright from Intel. - * [91/06/18 19:15:02 jsb] - * - * Revision 2.6 91/05/14 17:40:46 mrt - * Correcting copyright - * - * Revision 2.5 91/02/05 17:57:12 mrt - * Changed to new Mach copyright - * [91/02/01 17:49:51 mrt] - * - * Revision 2.4 90/11/25 17:48:50 jsb - * Added i860 support. - * [90/11/25 16:54:09 jsb] - * - * Revision 2.3 90/05/03 15:51:29 dbg - * Added i386. - * [90/02/08 dbg] - * - * Revision 2.2 89/11/29 14:16:44 af - * RCS-ed, added mips case. Mips also needs it in Mach standalone - * programs. - * [89/10/28 10:39:14 af] - * - */ -/* CMU_ENDHIST */ -/* - * Mach Operating System - * Copyright (c) 1991,1990,1989,1988 Carnegie Mellon University - * All Rights Reserved. - * - * Permission to use, copy, modify and distribute this software and its - * documentation is hereby granted, provided that both the copyright - * notice and this permission notice appear in all copies of the - * software, derivative works or modified versions, and any portions - * thereof, and that both notices appear in supporting documentation. - * - * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" - * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR - * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. - * - * Carnegie Mellon requests users of this software to return to - * - * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU - * School of Computer Science - * Carnegie Mellon University - * Pittsburgh PA 15213-3890 - * - * any improvements or extensions that they make and grant Carnegie Mellon rights - * to redistribute these changes. - */ -/* - */ - -#ifndef _SYS_VARARGS_H_ -#define _SYS_VARARGS_H_ - -#if defined(vax) || defined(sun3) || defined(mips) || defined(i386) || defined(mac2) -#define va_dcl int va_alist; -typedef char * va_list; - -#define va_start(pvar) (pvar) = (va_list)&va_alist -#define va_end(pvar) -#ifdef mips -# define va_arg(pvar, type) ((type *)(pvar = \ - (va_list) (sizeof(type) > 4 ? ((int)pvar + 2*8 - 1) & -8 \ - : ((int)pvar + 2*4 - 1) & -4)))[-1] -#else /* mips */ -#define va_arg(pvar,type) ( \ - (pvar) += ((sizeof(type)+3) & ~0x3), \ - *((type *)((pvar) - ((sizeof(type)+3) & ~0x3))) ) -#endif /* mips */ -#endif /* vax */ - -/* - * Try to make varargs work for the Multimax so that _doprnt can be - * declared as - * _doprnt(file, fmt, list) - * FILE *file; - * char *fmt; - * va_list *list; - * and use - * - * n = va_arg(*list, type) - * - * without needing to drag in extra declarations - * - * and printf becomes - * - * printf(fmt, va_alist) - * char *fmt; - * va_dcl - * { - * va_list listp; - * va_start(listp); - * _doprnt((FILE *)0, fmt, &listp); - * va_end(listp); - * } - */ - -#if defined(multimax) && defined(MACH_KERNEL) - -/* - * the vararglist pointer is an elaborate structure (ecch) - */ -typedef struct va_list { - char *va_item; /* current item */ - int *va_ptr1, /* arglist pointers for 1, 2, n */ - *va_ptr2, - *va_ptrn; - int va_ct; /* current argument number */ -} va_list; - -#define va_alist va_arg1, va_arg2, va_argn -#define va_dcl int va_arg1, va_arg2, va_argn; - -#define va_start(pvar) ( \ - (pvar).va_ptr1 = &va_arg1, \ - (pvar).va_ptr2 = &va_arg2, \ - (pvar).va_ptrn = &va_argn, \ - (pvar).va_ct = 0 ) - -#define va_end(pvar) - -#define va_arg(pvar, type) ( \ - (pvar).va_ct++, \ - (pvar).va_item = (char *) \ - ( ((pvar).va_ct == 1) \ - ? (pvar).va_ptr1 \ - : ((pvar).va_ct == 2) \ - ? (pvar).va_ptr2 \ - : (pvar).va_ptrn++ ) , \ - *((type *)((pvar).va_item)) ) - -/* what a mess! */ -#endif /* defined(multimax) && defined(MACH_KERNEL) */ - -#if i860 -#include /* PGI vs. Greenhills */ -#endif - -#ifdef luna88k -#include /* How nice */ -#endif - -#if defined (__PPC__) && defined (_CALL_SYSV) -#include /* care of gcc compiler - TEMPORARY 2.7.1 TODO NMGS*/ -#endif - -#if defined(__alpha) -# include -#endif /* defined(__alpha) */ - -#endif /* _SYS_VARARGS_H_ */ diff --git a/osfmk/vm/Makefile b/osfmk/vm/Makefile index e7e6726e0..7181839ca 100644 --- a/osfmk/vm/Makefile +++ b/osfmk/vm/Makefile @@ -14,6 +14,7 @@ EXPORT_ONLY_FILES = \ vm_fault.h \ vm_kern.h \ vm_map.h \ + vm_options.h \ vm_pageout.h \ vm_protos.h \ vm_shared_region.h diff --git a/osfmk/vm/bsd_vm.c b/osfmk/vm/bsd_vm.c index fd383fea3..347a76883 100644 --- a/osfmk/vm/bsd_vm.c +++ b/osfmk/vm/bsd_vm.c @@ -132,13 +132,14 @@ const struct memory_object_pager_ops vnode_pager_ops = { }; typedef struct vnode_pager { + struct ipc_object_header pager_header; /* fake ip_kotype() */ memory_object_pager_ops_t pager_ops; /* == &vnode_pager_ops */ - unsigned int pager_ikot; /* JMM: fake ip_kotype() */ unsigned int ref_count; /* reference count */ memory_object_control_t control_handle; /* mem object control handle */ struct vnode *vnode_handle; /* vnode handle */ } *vnode_pager_t; +#define pager_ikot pager_header.io_bits ipc_port_t trigger_name_to_port( /* forward */ @@ -147,7 +148,9 @@ trigger_name_to_port( /* forward */ kern_return_t vnode_pager_cluster_read( /* forward */ vnode_pager_t, - vm_object_offset_t, + vm_object_offset_t, + vm_object_offset_t, + uint32_t, vm_size_t); void @@ -192,14 +195,20 @@ int pagerdebug=0; #define PAGER_DEBUG(LEVEL, A) #endif +extern int proc_resetpcontrol(int); + +#if DEVELOPMENT || DEBUG +extern unsigned long vm_cs_validated_resets; +#endif + /* - * Routine: macx_triggers + * Routine: mach_macx_triggers * Function: * Syscall interface to set the call backs for low and * high water marks. */ int -macx_triggers( +mach_macx_triggers( struct macx_triggers_args *args) { int hi_water = args->hi_water; @@ -217,8 +226,8 @@ macx_triggers( return EINVAL; } - if ((flags & SWAP_ENCRYPT_ON) && - (flags & SWAP_ENCRYPT_OFF)) { + if (((flags & SWAP_ENCRYPT_ON) && (flags & SWAP_ENCRYPT_OFF)) || + ((flags & SWAP_COMPACT_ENABLE) && (flags & SWAP_COMPACT_DISABLE))) { /* can't have it both ways */ return EINVAL; } @@ -242,6 +251,33 @@ macx_triggers( IP_NULL); } + if (flags & USE_EMERGENCY_SWAP_FILE_FIRST) { + /* + * Time to switch to the emergency segment. + */ + return default_pager_triggers(default_pager, + 0, 0, + USE_EMERGENCY_SWAP_FILE_FIRST, + IP_NULL); + } + + if (flags & SWAP_FILE_CREATION_ERROR) { + /* + * For some reason, the dynamic pager failed to create a swap file. + */ + trigger_port = trigger_name_to_port(trigger_name); + if(trigger_port == NULL) { + return EINVAL; + } + /* trigger_port is locked and active */ + ipc_port_make_send_locked(trigger_port); + /* now unlocked */ + default_pager_triggers(default_pager, + 0, 0, + SWAP_FILE_CREATION_ERROR, + trigger_port); + } + if (flags & HI_WAT_ALERT) { trigger_port = trigger_name_to_port(trigger_name); if(trigger_port == NULL) { @@ -268,6 +304,18 @@ macx_triggers( LO_WAT_ALERT, trigger_port); } + + if (flags & PROC_RESUME) { + + /* + * For this call, hi_water is used to pass in the pid of the process we want to resume + * or unthrottle. This is of course restricted to the superuser (checked inside of + * proc_resetpcontrol). + */ + + return proc_resetpcontrol(hi_water); + } + /* * Set thread scheduling priority and policy for the current thread * it is assumed for the time being that the thread setting the alert @@ -275,7 +323,7 @@ macx_triggers( * * XXX This does not belong in the kernel XXX */ - { + if (flags & HI_WAT_ALERT) { thread_precedence_policy_data_t pre; thread_extended_policy_data_t ext; @@ -291,9 +339,13 @@ macx_triggers( THREAD_PRECEDENCE_POLICY, (thread_policy_t)&pre, THREAD_PRECEDENCE_POLICY_COUNT); + + current_thread()->options |= TH_OPT_VMPRIV; } - current_thread()->options |= TH_OPT_VMPRIV; + if (flags & (SWAP_COMPACT_DISABLE | SWAP_COMPACT_ENABLE)) { + return macx_backing_store_compaction(flags & (SWAP_COMPACT_DISABLE | SWAP_COMPACT_ENABLE)); + } return 0; } @@ -312,7 +364,7 @@ trigger_name_to_port( return (NULL); space = current_space(); - if(ipc_port_translate_receive(space, (mach_port_name_t)trigger_name, + if(ipc_port_translate_receive(space, CAST_MACH_PORT_TO_NAME(trigger_name), &trigger_port) != KERN_SUCCESS) return (NULL); return trigger_port; @@ -322,8 +374,6 @@ trigger_name_to_port( extern int uiomove64(addr64_t, int, void *); #define MAX_RUN 32 -unsigned long vm_cs_tainted_forces = 0; - int memory_object_control_uiomove( memory_object_control_t control, @@ -342,7 +392,6 @@ memory_object_control_uiomove( int cur_needed; int i; int orig_offset; - boolean_t make_lru = FALSE; vm_page_t page_run[MAX_RUN]; object = memory_object_control_to_vm_object(control); @@ -376,22 +425,74 @@ memory_object_control_uiomove( if ((dst_page = vm_page_lookup(object, offset)) == VM_PAGE_NULL) break; + /* - * Sync up on getting the busy bit + * if we're in this routine, we are inside a filesystem's + * locking model, so we don't ever want to wait for pages that have + * list_req_pending == TRUE since it means that the + * page is a candidate for some type of I/O operation, + * but that it has not yet been gathered into a UPL... + * this implies that it is still outside the domain + * of the filesystem and that whoever is responsible for + * grabbing it into a UPL may be stuck behind the filesystem + * lock this thread owns, or trying to take a lock exclusively + * and waiting for the readers to drain from a rw lock... + * if we block in those cases, we will deadlock */ - if ((dst_page->busy || dst_page->cleaning)) { - /* + if (dst_page->list_req_pending) { + + if (dst_page->absent) { + /* + * this is the list_req_pending | absent | busy case + * which originates from vm_fault_page... we want + * to fall out of the fast path and go back + * to the caller which will gather this page + * into a UPL and issue the I/O if no one + * else beats us to it + */ + break; + } + if (dst_page->pageout) { + /* + * this is the list_req_pending | pageout | busy case + * which can originate from both the pageout_scan and + * msync worlds... we need to reset the state of this page to indicate + * it should stay in the cache marked dirty... nothing else we + * can do at this point... we can't block on it, we can't busy + * it and we can't clean it from this routine. + */ + vm_page_lockspin_queues(); + + vm_pageout_queue_steal(dst_page, TRUE); + vm_page_deactivate(dst_page); + + vm_page_unlock_queues(); + } + /* + * this is the list_req_pending | cleaning case... + * we can go ahead and deal with this page since + * its ok for us to mark this page busy... if a UPL + * tries to gather this page, it will block until the + * busy is cleared, thus allowing us safe use of the page + * when we're done with it, we will clear busy and wake + * up anyone waiting on it, thus allowing the UPL creation + * to finish + */ + + } else if (dst_page->busy || dst_page->cleaning) { + /* * someone else is playing with the page... if we've * already collected pages into this run, go ahead * and process now, we can't block on this * page while holding other pages in the BUSY state * otherwise we will wait */ - if (cur_run) - break; - PAGE_SLEEP(object, dst_page, THREAD_UNINT); + if (cur_run) + break; + PAGE_SLEEP(object, dst_page, THREAD_UNINT); continue; } + /* * this routine is only called when copying * to/from real files... no need to consider @@ -401,14 +502,18 @@ memory_object_control_uiomove( if (mark_dirty) { dst_page->dirty = TRUE; - if (dst_page->cs_validated) { + if (dst_page->cs_validated && + !dst_page->cs_tainted) { /* * CODE SIGNING: * We're modifying a code-signed - * page: assume that it is now tainted. + * page: force revalidate */ - dst_page->cs_tainted = TRUE; - vm_cs_tainted_forces++; + dst_page->cs_validated = FALSE; +#if DEVELOPMENT || DEBUG + vm_cs_validated_resets++; +#endif + pmap_disconnect(dst_page->phys_page); } } dst_page->busy = TRUE; @@ -419,8 +524,9 @@ memory_object_control_uiomove( } if (cur_run == 0) /* - * we hit a 'hole' in the cache - * we bail at this point + * we hit a 'hole' in the cache or + * a page we don't want to try to handle, + * so bail at this point * we'll unlock the object below */ break; @@ -454,8 +560,13 @@ memory_object_control_uiomove( * to the same page (this way we only move it once) */ if (take_reference && (cur_run > 1 || orig_offset == 0)) { + vm_page_lockspin_queues(); - make_lru = TRUE; + + for (i = 0; i < cur_run; i++) + vm_page_lru(page_run[i]); + + vm_page_unlock_queues(); } for (i = 0; i < cur_run; i++) { dst_page = page_run[i]; @@ -467,15 +578,8 @@ memory_object_control_uiomove( */ VM_PAGE_CONSUME_CLUSTERED(dst_page); - if (make_lru == TRUE) - vm_page_lru(dst_page); - PAGE_WAKEUP_DONE(dst_page); } - if (make_lru == TRUE) { - vm_page_unlock_queues(); - make_lru = FALSE; - } orig_offset = 0; } vm_object_unlock(object); @@ -498,6 +602,7 @@ vnode_pager_bootstrap(void) #if CONFIG_CODE_DECRYPTION apple_protect_pager_bootstrap(); #endif /* CONFIG_CODE_DECRYPTION */ + swapfile_pager_bootstrap(); return; } @@ -526,14 +631,14 @@ vnode_pager_init(memory_object_t mem_obj, #if !DEBUG __unused #endif - vm_size_t pg_size) + memory_object_cluster_size_t pg_size) { vnode_pager_t vnode_object; kern_return_t kr; memory_object_attr_info_data_t attributes; - PAGER_DEBUG(PAGER_ALL, ("vnode_pager_init: %p, %p, %x\n", mem_obj, control, pg_size)); + PAGER_DEBUG(PAGER_ALL, ("vnode_pager_init: %p, %p, %lx\n", mem_obj, control, (unsigned long)pg_size)); if (control == MEMORY_OBJECT_CONTROL_NULL) return KERN_INVALID_ARGUMENT; @@ -568,7 +673,7 @@ kern_return_t vnode_pager_data_return( memory_object_t mem_obj, memory_object_offset_t offset, - vm_size_t data_cnt, + memory_object_cluster_size_t data_cnt, memory_object_offset_t *resid_offset, int *io_error, __unused boolean_t dirty, @@ -588,7 +693,7 @@ kern_return_t vnode_pager_data_initialize( __unused memory_object_t mem_obj, __unused memory_object_offset_t offset, - __unused vm_size_t data_cnt) + __unused memory_object_cluster_size_t data_cnt) { panic("vnode_pager_data_initialize"); return KERN_FAILURE; @@ -598,12 +703,47 @@ kern_return_t vnode_pager_data_unlock( __unused memory_object_t mem_obj, __unused memory_object_offset_t offset, - __unused vm_size_t size, + __unused memory_object_size_t size, __unused vm_prot_t desired_access) { return KERN_FAILURE; } +kern_return_t +vnode_pager_get_isinuse( + memory_object_t mem_obj, + uint32_t *isinuse) +{ + vnode_pager_t vnode_object; + + if (mem_obj->mo_pager_ops != &vnode_pager_ops) { + *isinuse = 1; + return KERN_INVALID_ARGUMENT; + } + + vnode_object = vnode_pager_lookup(mem_obj); + + *isinuse = vnode_pager_isinuse(vnode_object->vnode_handle); + return KERN_SUCCESS; +} + +kern_return_t +vnode_pager_check_hard_throttle( + memory_object_t mem_obj, + uint32_t *limit, + uint32_t hard_throttle) +{ + vnode_pager_t vnode_object; + + if (mem_obj->mo_pager_ops != &vnode_pager_ops) + return KERN_INVALID_ARGUMENT; + + vnode_object = vnode_pager_lookup(mem_obj); + + (void)vnode_pager_return_hard_throttle_limit(vnode_object->vnode_handle, limit, hard_throttle); + return KERN_SUCCESS; +} + kern_return_t vnode_pager_get_object_size( memory_object_t mem_obj, @@ -683,27 +823,27 @@ kern_return_t vnode_pager_data_request( memory_object_t mem_obj, memory_object_offset_t offset, - __unused vm_size_t length, + __unused memory_object_cluster_size_t length, __unused vm_prot_t desired_access, memory_object_fault_info_t fault_info) { - register vnode_pager_t vnode_object; + vnode_pager_t vnode_object; + memory_object_offset_t base_offset; vm_size_t size; -#if MACH_ASSERT - memory_object_offset_t original_offset = offset; -#endif /* MACH_ASSERT */ + uint32_t io_streaming = 0; vnode_object = vnode_pager_lookup(mem_obj); size = MAX_UPL_TRANSFER * PAGE_SIZE; + base_offset = offset; - if (memory_object_cluster_size(vnode_object->control_handle, &offset, &size, fault_info) != KERN_SUCCESS) + if (memory_object_cluster_size(vnode_object->control_handle, &base_offset, &size, &io_streaming, fault_info) != KERN_SUCCESS) size = PAGE_SIZE; - assert(original_offset >= offset && - original_offset < offset + size); + assert(offset >= base_offset && + offset < base_offset + size); - return vnode_pager_cluster_read(vnode_object, offset, size); + return vnode_pager_cluster_read(vnode_object, base_offset, offset, io_streaming, size); } /* @@ -765,7 +905,7 @@ kern_return_t vnode_pager_synchronize( memory_object_t mem_obj, memory_object_offset_t offset, - vm_size_t length, + memory_object_size_t length, __unused vm_sync_t sync_flags) { register vnode_pager_t vnode_object; @@ -821,6 +961,7 @@ vnode_pager_last_unmap( } + /* * */ @@ -833,9 +974,7 @@ vnode_pager_cluster_write( int * io_error, int upl_flags) { - vm_size_t size; - upl_t upl = NULL; - int request_flags; + vm_size_t size; int errno; if (upl_flags & UPL_MSYNC) { @@ -846,20 +985,11 @@ vnode_pager_cluster_write( upl_flags |= UPL_KEEPCACHED; while (cnt) { - kern_return_t kr; - size = (cnt < (PAGE_SIZE * MAX_UPL_TRANSFER)) ? cnt : (PAGE_SIZE * MAX_UPL_TRANSFER); /* effective max */ - request_flags = UPL_RET_ONLY_DIRTY | UPL_COPYOUT_FROM | UPL_CLEAN_IN_PLACE | - UPL_SET_INTERNAL | UPL_SET_LITE; - - kr = memory_object_upl_request(vnode_object->control_handle, - offset, size, &upl, NULL, NULL, request_flags); - if (kr != KERN_SUCCESS) - panic("vnode_pager_cluster_write: upl request failed\n"); - + assert((upl_size_t) size == size); vnode_pageout(vnode_object->vnode_handle, - upl, (vm_offset_t)0, offset, size, upl_flags, &errno); + NULL, (upl_offset_t)0, offset, (upl_size_t)size, upl_flags, &errno); if ( (upl_flags & UPL_KEEPCACHED) ) { if ( (*io_error = errno) ) @@ -874,7 +1004,6 @@ vnode_pager_cluster_write( } else { vm_object_offset_t vnode_size; vm_object_offset_t base_offset; - vm_object_t object; /* * this is the pageout path @@ -892,7 +1021,7 @@ vnode_pager_cluster_write( base_offset = offset & ~((signed)(size - 1)); if ((base_offset + size) > vnode_size) - size = round_page_32(((vm_size_t)(vnode_size - base_offset))); + size = round_page(((vm_size_t)(vnode_size - base_offset))); } else { /* * we've been requested to page out a page beyond the current @@ -904,22 +1033,9 @@ vnode_pager_cluster_write( base_offset = offset; size = PAGE_SIZE; } - object = memory_object_control_to_vm_object(vnode_object->control_handle); - - if (object == VM_OBJECT_NULL) - panic("vnode_pager_cluster_write: NULL vm_object in control handle\n"); - - request_flags = UPL_NOBLOCK | UPL_FOR_PAGEOUT | UPL_CLEAN_IN_PLACE | - UPL_RET_ONLY_DIRTY | UPL_COPYOUT_FROM | - UPL_SET_INTERNAL | UPL_SET_LITE; - - vm_object_upl_request(object, base_offset, size, - &upl, NULL, NULL, request_flags); - if (upl == NULL) - panic("vnode_pager_cluster_write: upl request failed\n"); - + assert((upl_size_t) size == size); vnode_pageout(vnode_object->vnode_handle, - upl, (vm_offset_t)0, upl->offset, upl->size, UPL_VNODE_PAGER, NULL); + NULL, (upl_offset_t)(offset - base_offset), base_offset, (upl_size_t) size, UPL_VNODE_PAGER, NULL); } } @@ -930,20 +1046,27 @@ vnode_pager_cluster_write( kern_return_t vnode_pager_cluster_read( vnode_pager_t vnode_object, + vm_object_offset_t base_offset, vm_object_offset_t offset, + uint32_t io_streaming, vm_size_t cnt) { int local_error = 0; int kret; + int flags = 0; assert(! (cnt & PAGE_MASK)); + if (io_streaming) + flags |= UPL_IOSTREAMING; + + assert((upl_size_t) cnt == cnt); kret = vnode_pagein(vnode_object->vnode_handle, (upl_t) NULL, - (vm_offset_t) NULL, - offset, - cnt, - 0, + (upl_offset_t) (offset - base_offset), + base_offset, + (upl_size_t) cnt, + flags, &local_error); /* if(kret == PAGER_ABSENT) { @@ -961,8 +1084,9 @@ vnode_pager_cluster_read( UPL_CLEAN_IN_PLACE | UPL_SET_INTERNAL); count = 0; + assert((upl_size_t) cnt == cnt); kr = memory_object_upl_request(vnode_object->control_handle, - offset, cnt, + base_offset, (upl_size_t) cnt, &upl, NULL, &count, uplflags); if (kr == KERN_SUCCESS) { upl_abort(upl, 0); @@ -1013,7 +1137,7 @@ vnode_object_create( * The vm_map call takes both named entry ports and raw memory * objects in the same parameter. We need to make sure that * vm_map does not see this object as a named entry port. So, - * we reserve the second word in the object for a fake ip_kotype + * we reserve the first word in the object for a fake ip_kotype * setting - that will tell vm_map to use it as a memory object. */ vnode_object->pager_ops = &vnode_pager_ops; @@ -1044,11 +1168,11 @@ vnode_pager_lookup( #include -static int fill_vnodeinfoforaddr( vm_map_entry_t entry, uint32_t * vnodeaddr, uint32_t * vid); +static int fill_vnodeinfoforaddr( vm_map_entry_t entry, uintptr_t * vnodeaddr, uint32_t * vid); int -fill_procregioninfo(task_t task, uint64_t arg, struct proc_regioninfo_internal *pinfo, uint32_t *vnodeaddr, uint32_t *vid) +fill_procregioninfo(task_t task, uint64_t arg, struct proc_regioninfo_internal *pinfo, uintptr_t *vnodeaddr, uint32_t *vid) { vm_map_t map; @@ -1136,7 +1260,7 @@ fill_procregioninfo(task_t task, uint64_t arg, struct proc_regioninfo_internal * pinfo->pri_depth = 0; if ((vnodeaddr != 0) && (entry->is_sub_map == 0)) { - *vnodeaddr = (uint32_t)0; + *vnodeaddr = (uintptr_t)0; if (fill_vnodeinfoforaddr(entry, vnodeaddr, vid) ==0) { vm_map_unlock_read(map); @@ -1153,7 +1277,7 @@ fill_procregioninfo(task_t task, uint64_t arg, struct proc_regioninfo_internal * static int fill_vnodeinfoforaddr( vm_map_entry_t entry, - uint32_t * vnodeaddr, + uintptr_t * vnodeaddr, uint32_t * vid) { vm_object_t top_object, object; @@ -1218,14 +1342,14 @@ fill_vnodeinfoforaddr( kern_return_t vnode_pager_get_object_vnode ( memory_object_t mem_obj, - uint32_t * vnodeaddr, + uintptr_t * vnodeaddr, uint32_t * vid) { vnode_pager_t vnode_object; vnode_object = vnode_pager_lookup(mem_obj); if (vnode_object->vnode_handle) { - *vnodeaddr = (uint32_t)vnode_object->vnode_handle; + *vnodeaddr = (uintptr_t)vnode_object->vnode_handle; *vid = (uint32_t)vnode_vid((void *)vnode_object->vnode_handle); return(KERN_SUCCESS); @@ -1234,3 +1358,57 @@ vnode_pager_get_object_vnode ( return(KERN_FAILURE); } + +/* + * Find the underlying vnode object for the given vm_map_entry. If found, return with the + * object locked, otherwise return NULL with nothing locked. + */ + +vm_object_t +find_vnode_object( + vm_map_entry_t entry +) +{ + vm_object_t top_object, object; + memory_object_t memory_object; + memory_object_pager_ops_t pager_ops; + + if (!entry->is_sub_map) { + + /* + * The last object in the shadow chain has the + * relevant pager information. + */ + + top_object = entry->object.vm_object; + + if (top_object) { + vm_object_lock(top_object); + + for (object = top_object; object->shadow != VM_OBJECT_NULL; object = object->shadow) { + vm_object_lock(object->shadow); + vm_object_unlock(object); + } + + if (object && !object->internal && object->pager_ready && !object->terminating && + object->alive) { + memory_object = object->pager; + pager_ops = memory_object->mo_pager_ops; + + /* + * If this object points to the vnode_pager_ops, then we found what we're + * looking for. Otherwise, this vm_map_entry doesn't have an underlying + * vnode and so we fall through to the bottom and return NULL. + */ + + if (pager_ops == &vnode_pager_ops) + return object; /* we return with the object locked */ + } + + vm_object_unlock(object); + } + + } + + return(VM_OBJECT_NULL); +} diff --git a/osfmk/vm/cpm.h b/osfmk/vm/cpm.h index 08794ab7b..9233e644f 100644 --- a/osfmk/vm/cpm.h +++ b/osfmk/vm/cpm.h @@ -54,7 +54,7 @@ * These pages are all in "gobbled" state when . */ extern kern_return_t -cpm_allocate(vm_size_t size, vm_page_t *list, ppnum_t max_pnum, boolean_t wire); +cpm_allocate(vm_size_t size, vm_page_t *list, ppnum_t max_pnum, ppnum_t pnum_mask, boolean_t wire, int flags); /* * CPM-specific event counters. diff --git a/osfmk/vm/device_vm.c b/osfmk/vm/device_vm.c index 4f32ac723..575351078 100644 --- a/osfmk/vm/device_vm.c +++ b/osfmk/vm/device_vm.c @@ -78,14 +78,14 @@ const struct memory_object_pager_ops device_pager_ops = { "device pager" }; -typedef int device_port_t; +typedef uintptr_t device_port_t; /* * The start of "struct device_pager" MUST match a "struct memory_object". */ typedef struct device_pager { + struct ipc_object_header pager_header; /* fake ip_kotype() */ memory_object_pager_ops_t pager_ops; /* == &device_pager_ops */ - unsigned int pager_ikot; /* fake ip_kotype() */ unsigned int ref_count; /* reference count */ memory_object_control_t control_handle; /* mem object's cntrl handle */ device_port_t device_handle; /* device_handle */ @@ -93,7 +93,7 @@ typedef struct device_pager { int flags; } *device_pager_t; - +#define pager_ikot pager_header.io_bits device_pager_t @@ -136,7 +136,7 @@ device_pager_bootstrap(void) memory_object_t device_pager_setup( __unused memory_object_t device, - int device_handle, + uintptr_t device_handle, vm_size_t size, int flags) { @@ -184,10 +184,12 @@ device_pager_populate_object( if(!vm_object->phys_contiguous) { unsigned int null_size = 0; + assert((upl_size_t) size == size); kr = vm_object_upl_request(vm_object, - (vm_object_offset_t)offset, size, &upl, NULL, - &null_size, (UPL_NO_SYNC | UPL_CLEAN_IN_PLACE)); - + (vm_object_offset_t)offset, + (upl_size_t) size, &upl, NULL, + &null_size, + (UPL_NO_SYNC | UPL_CLEAN_IN_PLACE)); if(kr != KERN_SUCCESS) panic("device_pager_populate_object: list_req failed"); @@ -220,7 +222,7 @@ kern_return_t device_pager_init( memory_object_t mem_obj, memory_object_control_t control, - __unused vm_size_t pg_size) + __unused memory_object_cluster_size_t pg_size) { device_pager_t device_object; kern_return_t kr; @@ -278,7 +280,7 @@ kern_return_t device_pager_data_return( memory_object_t mem_obj, memory_object_offset_t offset, - vm_size_t data_cnt, + memory_object_cluster_size_t data_cnt, __unused memory_object_offset_t *resid_offset, __unused int *io_error, __unused boolean_t dirty, @@ -304,7 +306,7 @@ kern_return_t device_pager_data_request( memory_object_t mem_obj, memory_object_offset_t offset, - vm_size_t length, + memory_object_cluster_size_t length, __unused vm_prot_t protection_required, __unused memory_object_fault_info_t fault_info) { @@ -376,7 +378,7 @@ kern_return_t device_pager_data_initialize( __unused memory_object_t mem_obj, __unused memory_object_offset_t offset, - __unused vm_size_t data_cnt) + __unused memory_object_cluster_size_t data_cnt) { panic("device_pager_data_initialize"); return KERN_FAILURE; @@ -386,7 +388,7 @@ kern_return_t device_pager_data_unlock( __unused memory_object_t mem_obj, __unused memory_object_offset_t offset, - __unused vm_size_t size, + __unused memory_object_size_t size, __unused vm_prot_t desired_access) { return KERN_FAILURE; @@ -408,7 +410,7 @@ kern_return_t device_pager_synchronize( memory_object_t mem_obj, memory_object_offset_t offset, - vm_offset_t length, + memory_object_size_t length, __unused vm_sync_t sync_flags) { device_pager_t device_object; diff --git a/osfmk/vm/memory_object.c b/osfmk/vm/memory_object.c index a89aa0ef0..d54ddb42b 100644 --- a/osfmk/vm/memory_object.c +++ b/osfmk/vm/memory_object.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2006 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2008 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -110,7 +110,7 @@ memory_object_default_t memory_manager_default = MEMORY_OBJECT_DEFAULT_NULL; -decl_mutex_data(, memory_manager_default_lock) +decl_lck_mtx_data(, memory_manager_default_lock) /* @@ -172,7 +172,7 @@ memory_object_lock_page( { XPR(XPR_MEMORY_OBJECT, "m_o_lock_page, page 0x%X rtn %d flush %d prot %d\n", - (integer_t)m, should_return, should_flush, prot, 0); + m, should_return, should_flush, prot, 0); /* * If we cannot change access to the page, @@ -181,8 +181,36 @@ memory_object_lock_page( * wired, then give up. */ - if (m->busy || m->cleaning) - return(MEMORY_OBJECT_LOCK_RESULT_MUST_BLOCK); + if (m->busy || m->cleaning) { + if (m->list_req_pending && m->pageout && + should_return == MEMORY_OBJECT_RETURN_NONE && + should_flush == TRUE) { + /* + * page was earmarked by vm_pageout_scan + * to be cleaned and stolen... we're going + * to take it back since we are being asked to + * flush the page w/o cleaning it (i.e. we don't + * care that it's dirty, we want it gone from + * the cache) and we don't want to stall + * waiting for it to be cleaned for 2 reasons... + * 1 - no use paging it out since we're probably + * shrinking the file at this point or we no + * longer care about the data in the page + * 2 - if we stall, we may casue a deadlock in + * the FS trying to acquire its locks + * on the VNOP_PAGEOUT path presuming that + * those locks are already held on the truncate + * path before calling through to this function + * + * so undo all of the state that vm_pageout_scan + * hung on this page + */ + m->busy = FALSE; + + vm_pageout_queue_steal(m, FALSE); + } else + return(MEMORY_OBJECT_LOCK_RESULT_MUST_BLOCK); + } /* * Don't worry about pages for which the kernel @@ -194,7 +222,7 @@ memory_object_lock_page( /* dump the page, pager wants us to */ /* clean it up and there is no */ /* relevant data to return */ - if(m->wire_count == 0) { + if ( !VM_PAGE_WIRED(m)) { VM_PAGE_FREE(m); return(MEMORY_OBJECT_LOCK_RESULT_DONE); } @@ -210,7 +238,7 @@ memory_object_lock_page( * Wired pages don't get flushed or disconnected from the pmap. */ - if (m->wire_count != 0) { + if (VM_PAGE_WIRED(m)) { if (memory_object_should_return_page(m, should_return)) { if (m->dirty) return(MEMORY_OBJECT_LOCK_RESULT_MUST_CLEAN); @@ -286,7 +314,7 @@ memory_object_lock_page( if (vm_page_deactivate_hint && (should_return != MEMORY_OBJECT_RETURN_NONE)) { - vm_page_lock_queues(); + vm_page_lockspin_queues(); vm_page_deactivate(m); vm_page_unlock_queues(); } @@ -312,7 +340,7 @@ MACRO_BEGIN \ \ (void) memory_object_data_return(pager, \ po, \ - data_cnt, \ + (memory_object_cluster_size_t)data_cnt, \ ro, \ ioerr, \ (action) == MEMORY_OBJECT_LOCK_RESULT_MUST_CLEAN,\ @@ -361,16 +389,8 @@ memory_object_lock_request( vm_prot_t prot) { vm_object_t object; - __unused boolean_t should_flush; - - should_flush = flags & MEMORY_OBJECT_DATA_FLUSH; - - XPR(XPR_MEMORY_OBJECT, - "m_o_lock_request, control 0x%X off 0x%X size 0x%X flags %X prot %X\n", - (integer_t)control, offset, size, - (((should_return&1)<<1)|should_flush), prot); - /* + /* * Check for bogus arguments. */ object = memory_object_control_to_vm_object(control); @@ -388,10 +408,20 @@ memory_object_lock_request( */ vm_object_lock(object); vm_object_paging_begin(object); + + if (flags & MEMORY_OBJECT_DATA_FLUSH_ALL) { + if ((should_return != MEMORY_OBJECT_RETURN_NONE) || offset || object->copy) { + flags &= ~MEMORY_OBJECT_DATA_FLUSH_ALL; + flags |= MEMORY_OBJECT_DATA_FLUSH; + } + } offset -= object->paging_offset; - (void)vm_object_update(object, - offset, size, resid_offset, io_errno, should_return, flags, prot); + if (flags & MEMORY_OBJECT_DATA_FLUSH_ALL) + vm_object_reap_pages(object, REAP_DATA_FLUSH); + else + (void)vm_object_update(object, offset, size, resid_offset, + io_errno, should_return, flags, prot); vm_object_paging_end(object); vm_object_unlock(object); @@ -491,7 +521,7 @@ vm_object_sync( XPR(XPR_VM_OBJECT, "vm_o_sync, object 0x%X, offset 0x%X size 0x%x flush %d rtn %d\n", - (integer_t)object, offset, size, should_flush, should_return); + object, offset, size, should_flush, should_return); /* * Lock the object, and acquire a paging reference to @@ -539,9 +569,9 @@ vm_object_update_extent( { vm_page_t m; int retval = 0; - vm_size_t data_cnt = 0; + memory_object_cluster_size_t data_cnt = 0; vm_object_offset_t paging_offset = 0; - vm_object_offset_t last_offset = offset; + vm_object_offset_t next_offset = offset; memory_object_lock_result_t page_lock_result; memory_object_lock_result_t pageout_action; @@ -552,12 +582,15 @@ vm_object_update_extent( offset += PAGE_SIZE_64) { /* - * Limit the number of pages to be cleaned at once. + * Limit the number of pages to be cleaned at once to a contiguous + * run, or at most MAX_UPL_TRANSFER size */ - if (data_cnt >= PAGE_SIZE * MAX_UPL_TRANSFER) { - LIST_REQ_PAGEOUT_PAGES(object, data_cnt, - pageout_action, paging_offset, offset_resid, io_errno, should_iosync); - data_cnt = 0; + if (data_cnt) { + if ((data_cnt >= PAGE_SIZE * MAX_UPL_TRANSFER) || (next_offset != offset)) { + LIST_REQ_PAGEOUT_PAGES(object, data_cnt, + pageout_action, paging_offset, offset_resid, io_errno, should_iosync); + data_cnt = 0; + } } while ((m = vm_page_lookup(object, offset)) != VM_PAGE_NULL) { @@ -565,7 +598,7 @@ vm_object_update_extent( XPR(XPR_MEMORY_OBJECT, "m_o_update: lock_page, obj 0x%X offset 0x%X result %d\n", - (integer_t)object, offset, page_lock_result, 0, 0); + object, offset, page_lock_result, 0, 0); switch (page_lock_result) { @@ -604,20 +637,14 @@ vm_object_update_extent( * * if this would form a discontiguous block, * clean the old pages and start anew. - * - * Mark the page busy since we will unlock the - * object if we issue the LIST_REQ_PAGEOUT */ - m->busy = TRUE; - if (data_cnt && - ((last_offset != offset) || (pageout_action != page_lock_result))) { + if (data_cnt && pageout_action != page_lock_result) { LIST_REQ_PAGEOUT_PAGES(object, data_cnt, pageout_action, paging_offset, offset_resid, io_errno, should_iosync); data_cnt = 0; + continue; } - m->busy = FALSE; - if (m->cleaning) { PAGE_SLEEP(object, m, THREAD_UNINT); continue; @@ -627,9 +654,8 @@ vm_object_update_extent( paging_offset = offset; } data_cnt += PAGE_SIZE; - last_offset = offset + PAGE_SIZE_64; + next_offset = offset + PAGE_SIZE_64; - vm_page_lockspin_queues(); /* * Clean */ @@ -637,17 +663,19 @@ vm_object_update_extent( m->cleaning = TRUE; if (should_flush && - /* let's no flush a wired page... */ - !m->wire_count) { + /* let's not flush a wired page... */ + !VM_PAGE_WIRED(m)) { /* * and add additional state * for the flush */ m->busy = TRUE; m->pageout = TRUE; + + vm_page_lockspin_queues(); vm_page_wire(m); + vm_page_unlock_queues(); } - vm_page_unlock_queues(); retval = 1; break; @@ -691,6 +719,7 @@ vm_object_update( boolean_t update_cow; boolean_t should_flush = (flags & MEMORY_OBJECT_DATA_FLUSH) ? TRUE : FALSE; boolean_t should_iosync = (flags & MEMORY_OBJECT_IO_SYNC) ? TRUE : FALSE; + vm_fault_return_t result; int num_of_extents; int n; #define MAX_EXTENTS 8 @@ -809,25 +838,28 @@ vm_object_update( fault_info.lo_offset = copy_offset; fault_info.hi_offset = copy_size; fault_info.no_cache = FALSE; + fault_info.stealth = TRUE; vm_object_paging_begin(copy_object); for (i = copy_offset; i < copy_size; i += PAGE_SIZE) { RETRY_COW_OF_LOCK_REQUEST: - fault_info.cluster_size = copy_size - i; + fault_info.cluster_size = (vm_size_t) (copy_size - i); + assert(fault_info.cluster_size == copy_size - i); prot = VM_PROT_WRITE|VM_PROT_READ; - switch (vm_fault_page(copy_object, i, - VM_PROT_WRITE|VM_PROT_READ, - FALSE, - &prot, - &page, - &top_page, - (int *)0, - &error, - FALSE, - FALSE, &fault_info)) { - + result = vm_fault_page(copy_object, i, + VM_PROT_WRITE|VM_PROT_READ, + FALSE, + &prot, + &page, + &top_page, + (int *)0, + &error, + FALSE, + FALSE, &fault_info); + + switch (result) { case VM_FAULT_SUCCESS: if (top_page) { vm_fault_cleanup( @@ -835,11 +867,16 @@ vm_object_update( vm_object_lock(copy_object); vm_object_paging_begin(copy_object); } - vm_page_lock_queues(); - if (!page->active && !page->inactive) - vm_page_deactivate(page); - vm_page_unlock_queues(); - + if (!page->active && + !page->inactive && + !page->throttled) { + vm_page_lockspin_queues(); + if (!page->active && + !page->inactive && + !page->throttled) + vm_page_deactivate(page); + vm_page_unlock_queues(); + } PAGE_WAKEUP_DONE(page); break; case VM_FAULT_RETRY: @@ -864,11 +901,19 @@ vm_object_update( vm_object_lock(copy_object); vm_object_paging_begin(copy_object); goto RETRY_COW_OF_LOCK_REQUEST; + case VM_FAULT_SUCCESS_NO_VM_PAGE: + /* success but no VM page: fail */ + vm_object_paging_end(copy_object); + vm_object_unlock(copy_object); + /*FALLTHROUGH*/ case VM_FAULT_MEMORY_ERROR: if (object != copy_object) vm_object_deallocate(copy_object); vm_object_lock(object); goto BYPASS_COW_COPYIN; + default: + panic("vm_object_update: unexpected error 0x%x" + " from vm_fault_page()\n", result); } } @@ -1040,7 +1085,7 @@ kern_return_t memory_object_synchronize_completed( memory_object_control_t control, memory_object_offset_t offset, - vm_offset_t length) + memory_object_size_t length) { vm_object_t object; msync_req_t msr; @@ -1049,7 +1094,7 @@ memory_object_synchronize_completed( XPR(XPR_MEMORY_OBJECT, "m_o_sync_completed, object 0x%X, offset 0x%X length 0x%X\n", - (integer_t)object, offset, length, 0, 0); + object, offset, length, 0, 0); /* * Look for bogus arguments @@ -1097,7 +1142,7 @@ vm_object_set_attributes_common( XPR(XPR_MEMORY_OBJECT, "m_o_set_attr_com, object 0x%X flg %x strat %d\n", - (integer_t)object, (may_cache&1)|((temporary&1)<1), copy_strategy, 0, 0); + object, (may_cache&1)|((temporary&1)<1), copy_strategy, 0, 0); if (object == VM_OBJECT_NULL) return(KERN_INVALID_ARGUMENT); @@ -1466,7 +1511,9 @@ memory_object_iopl_request( if(*upl_size == 0) { if(offset >= named_entry->size) return(KERN_INVALID_RIGHT); - *upl_size = named_entry->size - offset; + *upl_size = (upl_size_t)(named_entry->size - offset); + if (*upl_size != named_entry->size - offset) + return KERN_INVALID_ARGUMENT; } if(caller_flags & UPL_COPYOUT_FROM) { if((named_entry->protection & VM_PROT_READ) @@ -1592,7 +1639,7 @@ memory_object_upl_request( object = memory_object_control_to_vm_object(control); if (object == VM_OBJECT_NULL) - return (KERN_INVALID_ARGUMENT); + return (KERN_TERMINATED); return vm_object_upl_request(object, offset, @@ -1643,7 +1690,7 @@ memory_object_super_upl_request( kern_return_t memory_object_cluster_size(memory_object_control_t control, memory_object_offset_t *start, - vm_size_t *length, memory_object_fault_info_t fault_info) + vm_size_t *length, uint32_t *io_streaming, memory_object_fault_info_t fault_info) { vm_object_t object; @@ -1654,7 +1701,7 @@ memory_object_cluster_size(memory_object_control_t control, memory_object_offset *start -= object->paging_offset; - vm_object_cluster_size(object, (vm_object_offset_t *)start, length, (vm_object_fault_info_t)fault_info); + vm_object_cluster_size(object, (vm_object_offset_t *)start, length, (vm_object_fault_info_t)fault_info, io_streaming); *start += object->paging_offset; @@ -1693,7 +1740,7 @@ host_default_memory_manager( assert(host_priv == &realhost); new_manager = *default_manager; - mutex_lock(&memory_manager_default_lock); + lck_mtx_lock(&memory_manager_default_lock); current_manager = memory_manager_default; returned_manager = MEMORY_OBJECT_DEFAULT_NULL; @@ -1732,9 +1779,18 @@ host_default_memory_manager( */ thread_wakeup((event_t) &memory_manager_default); + + /* + * Now that we have a default pager for anonymous memory, + * reactivate all the throttled pages (i.e. dirty pages with + * no pager). + */ + if (current_manager == MEMORY_OBJECT_DEFAULT_NULL) { + vm_page_reactivate_all_throttled(); + } } out: - mutex_unlock(&memory_manager_default_lock); + lck_mtx_unlock(&memory_manager_default_lock); *default_manager = returned_manager; return(result); @@ -1753,19 +1809,20 @@ memory_manager_default_reference(void) { memory_object_default_t current_manager; - mutex_lock(&memory_manager_default_lock); + lck_mtx_lock(&memory_manager_default_lock); current_manager = memory_manager_default; while (current_manager == MEMORY_OBJECT_DEFAULT_NULL) { wait_result_t res; - res = thread_sleep_mutex((event_t) &memory_manager_default, - &memory_manager_default_lock, - THREAD_UNINT); + res = lck_mtx_sleep(&memory_manager_default_lock, + LCK_SLEEP_DEFAULT, + (event_t) &memory_manager_default, + THREAD_UNINT); assert(res == THREAD_AWAKENED); current_manager = memory_manager_default; } memory_object_default_reference(current_manager); - mutex_unlock(&memory_manager_default_lock); + lck_mtx_unlock(&memory_manager_default_lock); return current_manager; } @@ -1787,18 +1844,18 @@ memory_manager_default_check(void) { memory_object_default_t current; - mutex_lock(&memory_manager_default_lock); + lck_mtx_lock(&memory_manager_default_lock); current = memory_manager_default; if (current == MEMORY_OBJECT_DEFAULT_NULL) { static boolean_t logged; /* initialized to 0 */ boolean_t complain = !logged; logged = TRUE; - mutex_unlock(&memory_manager_default_lock); + lck_mtx_unlock(&memory_manager_default_lock); if (complain) printf("Warning: No default memory manager\n"); return(KERN_FAILURE); } else { - mutex_unlock(&memory_manager_default_lock); + lck_mtx_unlock(&memory_manager_default_lock); return(KERN_SUCCESS); } } @@ -1807,7 +1864,7 @@ __private_extern__ void memory_manager_default_init(void) { memory_manager_default = MEMORY_OBJECT_DEFAULT_NULL; - mutex_init(&memory_manager_default_lock, 0); + lck_mtx_init(&memory_manager_default_lock, &vm_object_lck_grp, &vm_object_lck_attr); } @@ -1860,7 +1917,7 @@ memory_object_range_op( offset_beg, offset_end, ops, - range); + (uint32_t *) range); } @@ -2084,7 +2141,7 @@ kern_return_t memory_object_data_return ( memory_object_t memory_object, memory_object_offset_t offset, - vm_size_t size, + memory_object_cluster_size_t size, memory_object_offset_t *resid_offset, int *io_error, boolean_t dirty, @@ -2108,7 +2165,7 @@ kern_return_t memory_object_data_initialize ( memory_object_t memory_object, memory_object_offset_t offset, - vm_size_t size + memory_object_cluster_size_t size ) { return (memory_object->mo_pager_ops->memory_object_data_initialize)( @@ -2122,7 +2179,7 @@ kern_return_t memory_object_data_unlock ( memory_object_t memory_object, memory_object_offset_t offset, - vm_size_t size, + memory_object_size_t size, vm_prot_t desired_access ) { @@ -2138,7 +2195,7 @@ kern_return_t memory_object_synchronize ( memory_object_t memory_object, memory_object_offset_t offset, - vm_size_t size, + memory_object_size_t size, vm_sync_t sync_flags ) { diff --git a/osfmk/vm/memory_object.h b/osfmk/vm/memory_object.h index e9b974174..a0b6690c1 100644 --- a/osfmk/vm/memory_object.h +++ b/osfmk/vm/memory_object.h @@ -126,7 +126,7 @@ extern kern_return_t memory_object_free_from_cache( extern kern_return_t memory_object_iopl_request( ipc_port_t port, memory_object_offset_t offset, - vm_size_t *upl_size, + upl_size_t *upl_size, upl_t *upl_ptr, upl_page_info_array_t user_page_list, unsigned int *page_list_count, diff --git a/osfmk/vm/pmap.h b/osfmk/vm/pmap.h index d30299778..6fd7c00f2 100644 --- a/osfmk/vm/pmap.h +++ b/osfmk/vm/pmap.h @@ -169,6 +169,9 @@ extern void mapping_free_prime(void); /* Primes the mapping block release list */ extern boolean_t pmap_next_page(ppnum_t *pnum); +#if defined(__LP64__) +extern boolean_t pmap_next_page_k64(ppnum_t *pnum); +#endif /* During VM initialization, * return the next unused * physical page. @@ -416,9 +419,10 @@ extern void (pmap_pageable)( vm_map_offset_t end, boolean_t pageable); -#ifndef NO_NESTED_PMAP + extern uint64_t pmap_nesting_size_min; extern uint64_t pmap_nesting_size_max; + extern kern_return_t pmap_nest(pmap_t grand, pmap_t subord, addr64_t vstart, @@ -427,8 +431,7 @@ extern kern_return_t pmap_nest(pmap_t grand, extern kern_return_t pmap_unnest(pmap_t grand, addr64_t vaddr, uint64_t size); -#endif /* NO_NESTED_PMAP */ - +extern boolean_t pmap_adjust_unnest_parameters(pmap_t, vm_map_offset_t *, vm_map_offset_t *); #endif /* MACH_KERNEL_PRIVATE */ /* @@ -450,9 +453,11 @@ extern pmap_t kernel_pmap; /* The kernel's map */ #define VM_WIMG_MASK 0xFF #define VM_WIMG_USE_DEFAULT 0x80000000 +#define VM_MEM_SUPERPAGE 0x100 /* map a superpage instead of a base page */ +#if !defined(__LP64__) extern vm_offset_t pmap_extract(pmap_t pmap, vm_map_offset_t va); - +#endif extern void pmap_change_wiring( /* Specify pageability */ pmap_t pmap, vm_map_offset_t va, @@ -469,6 +474,10 @@ extern void fillPage(ppnum_t pa, unsigned int fill); extern void pmap_map_sharedpage(task_t task, pmap_t pmap); extern void pmap_unmap_sharedpage(pmap_t pmap); +#if defined(__LP64__) +void pmap_pre_expand(pmap_t pmap, vm_map_offset_t vaddr); +#endif + #endif /* KERNEL_PRIVATE */ #endif /* _VM_PMAP_H_ */ diff --git a/osfmk/vm/vm32_user.c b/osfmk/vm/vm32_user.c new file mode 100644 index 000000000..f37fd3659 --- /dev/null +++ b/osfmk/vm/vm32_user.c @@ -0,0 +1,556 @@ +/* + * Copyright (c) 2008 Apple Inc. All rights reserved. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. The rights granted to you under the License + * may not be used to create, or enable the creation or redistribution of, + * unlawful or unlicensed copies of an Apple operating system, or to + * circumvent, violate, or enable the circumvention or violation of, any + * terms of an Apple operating system software license agreement. + * + * Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ + */ +/* + * @OSF_COPYRIGHT@ + */ +/* + * Mach Operating System + * Copyright (c) 1991,1990,1989,1988 Carnegie Mellon University + * All Rights Reserved. + * + * Permission to use, copy, modify and distribute this software and its + * documentation is hereby granted, provided that both the copyright + * notice and this permission notice appear in all copies of the + * software, derivative works or modified versions, and any portions + * thereof, and that both notices appear in supporting documentation. + * + * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" + * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR + * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. + * + * Carnegie Mellon requests users of this software to return to + * + * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU + * School of Computer Science + * Carnegie Mellon University + * Pittsburgh PA 15213-3890 + * + * any improvements or extensions that they make and grant Carnegie Mellon + * the rights to redistribute these changes. + */ +/* + */ +/* + * File: vm/vm32_user.c + * Author: Avadis Tevanian, Jr., Michael Wayne Young + * + * User-exported virtual memory functions. + */ + +#include + +#include +#include +#include /* to get vm_address_t */ +#include +#include /* to get pointer_t */ +#include +#include +#include +#include + +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#if VM32_SUPPORT + +/* + * See vm_user.c for the real implementation of all of these functions. + * We call through to the mach_ "wide" versions of the routines, and trust + * that the VM system verifies the arguments and only returns address that + * are appropriate for the task's address space size. + * + * New VM call implementations should not be added here, because they would + * be available only to 32-bit userspace clients. Add them to vm_user.c + * and the corresponding prototype to mach_vm.defs (subsystem 4800). + */ + +kern_return_t +vm32_allocate( + vm_map_t map, + vm32_offset_t *addr, + vm32_size_t size, + int flags) +{ + mach_vm_offset_t maddr; + kern_return_t result; + + maddr = *addr; + result = mach_vm_allocate(map, &maddr, size, flags); + *addr = CAST_DOWN_EXPLICIT(vm32_offset_t, maddr); + + return result; +} + +kern_return_t +vm32_deallocate( + vm_map_t map, + vm32_offset_t start, + vm32_size_t size) +{ + if ((map == VM_MAP_NULL) || (start + size < start)) + return(KERN_INVALID_ARGUMENT); + + return mach_vm_deallocate(map, start, size); +} + +kern_return_t +vm32_inherit( + vm_map_t map, + vm32_offset_t start, + vm32_size_t size, + vm_inherit_t new_inheritance) +{ + if ((map == VM_MAP_NULL) || (start + size < start)) + return(KERN_INVALID_ARGUMENT); + + return mach_vm_inherit(map, start, size, new_inheritance); +} + +kern_return_t +vm32_protect( + vm_map_t map, + vm32_offset_t start, + vm32_size_t size, + boolean_t set_maximum, + vm_prot_t new_protection) +{ + if ((map == VM_MAP_NULL) || (start + size < start)) + return(KERN_INVALID_ARGUMENT); + + return mach_vm_protect(map, start, size, set_maximum, new_protection); +} + +kern_return_t +vm32_machine_attribute( + vm_map_t map, + vm32_address_t addr, + vm32_size_t size, + vm_machine_attribute_t attribute, + vm_machine_attribute_val_t* value) /* IN/OUT */ +{ + if ((map == VM_MAP_NULL) || (addr + size < addr)) + return(KERN_INVALID_ARGUMENT); + + return mach_vm_machine_attribute(map, addr, size, attribute, value); +} + +kern_return_t +vm32_read( + vm_map_t map, + vm32_address_t addr, + vm32_size_t size, + pointer_t *data, + mach_msg_type_number_t *data_size) +{ + return mach_vm_read(map, addr, size, data, data_size); +} + +kern_return_t +vm32_read_list( + vm_map_t map, + vm32_read_entry_t data_list, + natural_t count) +{ + mach_vm_read_entry_t mdata_list; + mach_msg_type_number_t i; + kern_return_t result; + + for (i=0; i < VM_MAP_ENTRY_MAX; i++) { + mdata_list[i].address = data_list[i].address; + mdata_list[i].size = data_list[i].size; + } + + result = mach_vm_read_list(map, mdata_list, count); + + for (i=0; i < VM_MAP_ENTRY_MAX; i++) { + data_list[i].address = CAST_DOWN_EXPLICIT(vm32_address_t, mdata_list[i].address); + data_list[i].size = CAST_DOWN_EXPLICIT(vm32_size_t, mdata_list[i].size); + } + + return result; +} + +kern_return_t +vm32_read_overwrite( + vm_map_t map, + vm32_address_t address, + vm32_size_t size, + vm32_address_t data, + vm32_size_t *data_size) +{ + kern_return_t result; + mach_vm_size_t mdata_size; + + mdata_size = *data_size; + result = mach_vm_read_overwrite(map, address, size, data, &mdata_size); + *data_size = CAST_DOWN_EXPLICIT(vm32_size_t, mdata_size); + + return result; +} + +kern_return_t +vm32_write( + vm_map_t map, + vm32_address_t address, + pointer_t data, + mach_msg_type_number_t size) +{ + return mach_vm_write(map, address, data, size); +} + +kern_return_t +vm32_copy( + vm_map_t map, + vm32_address_t source_address, + vm32_size_t size, + vm32_address_t dest_address) +{ + return mach_vm_copy(map, source_address, size, dest_address); +} + +kern_return_t +vm32_map_64( + vm_map_t target_map, + vm32_offset_t *address, + vm32_size_t size, + vm32_offset_t mask, + int flags, + ipc_port_t port, + vm_object_offset_t offset, + boolean_t copy, + vm_prot_t cur_protection, + vm_prot_t max_protection, + vm_inherit_t inheritance) +{ + mach_vm_offset_t maddress; + kern_return_t result; + + maddress = *address; + result = mach_vm_map(target_map, &maddress, size, mask, + flags, port, offset, copy, + cur_protection, max_protection, inheritance); + *address = CAST_DOWN_EXPLICIT(vm32_offset_t, maddress); + + return result; +} + +kern_return_t +vm32_map( + vm_map_t target_map, + vm32_offset_t *address, + vm32_size_t size, + vm32_offset_t mask, + int flags, + ipc_port_t port, + vm32_offset_t offset, + boolean_t copy, + vm_prot_t cur_protection, + vm_prot_t max_protection, + vm_inherit_t inheritance) +{ + return vm32_map_64(target_map, address, size, mask, + flags, port, offset, copy, + cur_protection, max_protection, inheritance); +} + +kern_return_t +vm32_remap( + vm_map_t target_map, + vm32_offset_t *address, + vm32_size_t size, + vm32_offset_t mask, + boolean_t anywhere, + vm_map_t src_map, + vm32_offset_t memory_address, + boolean_t copy, + vm_prot_t *cur_protection, + vm_prot_t *max_protection, + vm_inherit_t inheritance) +{ + mach_vm_offset_t maddress; + kern_return_t result; + + maddress = *address; + result = mach_vm_remap(target_map, &maddress, size, mask, + anywhere, src_map, memory_address, copy, + cur_protection, max_protection, inheritance); + *address = CAST_DOWN_EXPLICIT(vm32_offset_t, maddress); + + return result; +} + +kern_return_t +vm32_msync( + vm_map_t map, + vm32_address_t address, + vm32_size_t size, + vm_sync_t sync_flags) +{ + return mach_vm_msync(map, address, size, sync_flags); +} + +kern_return_t +vm32_behavior_set( + vm_map_t map, + vm32_offset_t start, + vm32_size_t size, + vm_behavior_t new_behavior) +{ + if ((map == VM_MAP_NULL) || (start + size < start)) + return(KERN_INVALID_ARGUMENT); + + return mach_vm_behavior_set(map, start, size, new_behavior); +} + +kern_return_t +vm32_region_64( + vm_map_t map, + vm32_offset_t *address, /* IN/OUT */ + vm32_size_t *size, /* OUT */ + vm_region_flavor_t flavor, /* IN */ + vm_region_info_t info, /* OUT */ + mach_msg_type_number_t *count, /* IN/OUT */ + mach_port_t *object_name) /* OUT */ +{ + mach_vm_offset_t maddress; + mach_vm_size_t msize; + kern_return_t result; + + maddress = *address; + msize = *size; + result = mach_vm_region(map, &maddress, &msize, flavor, info, count, object_name); + *size = CAST_DOWN_EXPLICIT(vm32_size_t, msize); + *address = CAST_DOWN_EXPLICIT(vm32_offset_t, maddress); + + return result; +} + +kern_return_t +vm32_region( + vm_map_t map, + vm32_address_t *address, /* IN/OUT */ + vm32_size_t *size, /* OUT */ + vm_region_flavor_t flavor, /* IN */ + vm_region_info_t info, /* OUT */ + mach_msg_type_number_t *count, /* IN/OUT */ + mach_port_t *object_name) /* OUT */ +{ + vm_map_address_t map_addr; + vm_map_size_t map_size; + kern_return_t kr; + + if (VM_MAP_NULL == map) + return KERN_INVALID_ARGUMENT; + + map_addr = (vm_map_address_t)*address; + map_size = (vm_map_size_t)*size; + + kr = vm_map_region(map, + &map_addr, &map_size, + flavor, info, count, + object_name); + + *address = CAST_DOWN_EXPLICIT(vm32_address_t, map_addr); + *size = CAST_DOWN_EXPLICIT(vm32_size_t, map_size); + + if (KERN_SUCCESS == kr && map_addr + map_size > VM32_MAX_ADDRESS) + return KERN_INVALID_ADDRESS; + return kr; +} + +kern_return_t +vm32_region_recurse_64( + vm_map_t map, + vm32_address_t *address, + vm32_size_t *size, + uint32_t *depth, + vm_region_recurse_info_64_t info, + mach_msg_type_number_t *infoCnt) +{ + mach_vm_address_t maddress; + mach_vm_size_t msize; + kern_return_t result; + + maddress = *address; + msize = *size; + result = mach_vm_region_recurse(map, &maddress, &msize, depth, info, infoCnt); + *address = CAST_DOWN_EXPLICIT(vm32_address_t, maddress); + *size = CAST_DOWN_EXPLICIT(vm32_size_t, msize); + + return result; +} + +kern_return_t +vm32_region_recurse( + vm_map_t map, + vm32_offset_t *address, /* IN/OUT */ + vm32_size_t *size, /* OUT */ + natural_t *depth, /* IN/OUT */ + vm_region_recurse_info_t info32, /* IN/OUT */ + mach_msg_type_number_t *infoCnt) /* IN/OUT */ +{ + vm_region_submap_info_data_64_t info64; + vm_region_submap_info_t info; + vm_map_address_t map_addr; + vm_map_size_t map_size; + kern_return_t kr; + + if (VM_MAP_NULL == map || *infoCnt < VM_REGION_SUBMAP_INFO_COUNT) + return KERN_INVALID_ARGUMENT; + + + map_addr = (vm_map_address_t)*address; + map_size = (vm_map_size_t)*size; + info = (vm_region_submap_info_t)info32; + *infoCnt = VM_REGION_SUBMAP_INFO_COUNT_64; + + kr = vm_map_region_recurse_64(map, &map_addr,&map_size, + depth, &info64, infoCnt); + + info->protection = info64.protection; + info->max_protection = info64.max_protection; + info->inheritance = info64.inheritance; + info->offset = (uint32_t)info64.offset; /* trouble-maker */ + info->user_tag = info64.user_tag; + info->pages_resident = info64.pages_resident; + info->pages_shared_now_private = info64.pages_shared_now_private; + info->pages_swapped_out = info64.pages_swapped_out; + info->pages_dirtied = info64.pages_dirtied; + info->ref_count = info64.ref_count; + info->shadow_depth = info64.shadow_depth; + info->external_pager = info64.external_pager; + info->share_mode = info64.share_mode; + info->is_submap = info64.is_submap; + info->behavior = info64.behavior; + info->object_id = info64.object_id; + info->user_wired_count = info64.user_wired_count; + + *address = CAST_DOWN_EXPLICIT(vm32_address_t, map_addr); + *size = CAST_DOWN_EXPLICIT(vm32_size_t, map_size); + *infoCnt = VM_REGION_SUBMAP_INFO_COUNT; + + if (KERN_SUCCESS == kr && map_addr + map_size > VM32_MAX_ADDRESS) + return KERN_INVALID_ADDRESS; + return kr; +} + +kern_return_t +vm32_purgable_control( + vm_map_t map, + vm32_offset_t address, + vm_purgable_t control, + int *state) +{ + if (VM_MAP_NULL == map) + return KERN_INVALID_ARGUMENT; + + return vm_map_purgable_control(map, + vm_map_trunc_page(address), + control, + state); +} + +kern_return_t +vm32_map_page_query( + vm_map_t map, + vm32_offset_t offset, + int *disposition, + int *ref_count) +{ + if (VM_MAP_NULL == map) + return KERN_INVALID_ARGUMENT; + + return vm_map_page_query_internal(map, + vm_map_trunc_page(offset), + disposition, ref_count); +} + +kern_return_t +vm32_make_memory_entry_64( + vm_map_t target_map, + memory_object_size_t *size, + memory_object_offset_t offset, + vm_prot_t permission, + ipc_port_t *object_handle, + ipc_port_t parent_handle) +{ + // use the existing entrypoint + return _mach_make_memory_entry(target_map, size, offset, permission, object_handle, parent_handle); +} + +kern_return_t +vm32_make_memory_entry( + vm_map_t target_map, + vm32_size_t *size, + vm32_offset_t offset, + vm_prot_t permission, + ipc_port_t *object_handle, + ipc_port_t parent_entry) +{ + memory_object_size_t mo_size; + kern_return_t kr; + + mo_size = (memory_object_size_t)*size; + kr = _mach_make_memory_entry(target_map, &mo_size, + (memory_object_offset_t)offset, permission, object_handle, + parent_entry); + *size = CAST_DOWN_EXPLICIT(vm32_size_t, mo_size); + return kr; +} + +kern_return_t +vm32__task_wire( + vm_map_t map, + boolean_t must_wire) +{ + if (map == VM_MAP_NULL) + return(KERN_INVALID_ARGUMENT); + + if (must_wire) + map->wiring_required = TRUE; + else + map->wiring_required = FALSE; + + return(KERN_SUCCESS); +} + +#endif /* VM32_SUPPORT */ diff --git a/osfmk/vm/vm_apple_protect.c b/osfmk/vm/vm_apple_protect.c index da167635f..d57cfc71a 100644 --- a/osfmk/vm/vm_apple_protect.c +++ b/osfmk/vm/vm_apple_protect.c @@ -84,16 +84,16 @@ void apple_protect_pager_reference(memory_object_t mem_obj); void apple_protect_pager_deallocate(memory_object_t mem_obj); kern_return_t apple_protect_pager_init(memory_object_t mem_obj, memory_object_control_t control, - vm_size_t pg_size); + memory_object_cluster_size_t pg_size); kern_return_t apple_protect_pager_terminate(memory_object_t mem_obj); kern_return_t apple_protect_pager_data_request(memory_object_t mem_obj, memory_object_offset_t offset, - vm_size_t length, + memory_object_cluster_size_t length, vm_prot_t protection_required, memory_object_fault_info_t fault_info); kern_return_t apple_protect_pager_data_return(memory_object_t mem_obj, memory_object_offset_t offset, - vm_size_t data_cnt, + memory_object_cluster_size_t data_cnt, memory_object_offset_t *resid_offset, int *io_error, boolean_t dirty, @@ -101,14 +101,14 @@ kern_return_t apple_protect_pager_data_return(memory_object_t mem_obj, int upl_flags); kern_return_t apple_protect_pager_data_initialize(memory_object_t mem_obj, memory_object_offset_t offset, - vm_size_t data_cnt); + memory_object_cluster_size_t data_cnt); kern_return_t apple_protect_pager_data_unlock(memory_object_t mem_obj, memory_object_offset_t offset, - vm_size_t size, + memory_object_size_t size, vm_prot_t desired_access); kern_return_t apple_protect_pager_synchronize(memory_object_t mem_obj, memory_object_offset_t offset, - vm_size_t length, + memory_object_size_t length, vm_sync_t sync_flags); kern_return_t apple_protect_pager_map(memory_object_t mem_obj, vm_prot_t prot); @@ -138,8 +138,8 @@ const struct memory_object_pager_ops apple_protect_pager_ops = { * the "apple protect" EMM. */ typedef struct apple_protect_pager { + struct ipc_object_header pager_header; /* fake ip_kotype() */ memory_object_pager_ops_t pager_ops; /* == &apple_protect_pager_ops */ - unsigned int pager_ikot; /* JMM: fake ip_kotype() */ queue_chain_t pager_queue; /* next & prev pagers */ unsigned int ref_count; /* reference count */ boolean_t is_ready; /* is this pager ready ? */ @@ -149,6 +149,7 @@ typedef struct apple_protect_pager { struct pager_crypt_info crypt; } *apple_protect_pager_t; #define APPLE_PROTECT_PAGER_NULL ((apple_protect_pager_t) NULL) +#define pager_ikot pager_header.io_bits /* * List of memory objects managed by this EMM. @@ -157,7 +158,7 @@ typedef struct apple_protect_pager { int apple_protect_pager_count = 0; /* number of pagers */ int apple_protect_pager_count_mapped = 0; /* number of unmapped pagers */ queue_head_t apple_protect_pager_queue; -decl_mutex_data(,apple_protect_pager_lock) +decl_lck_mtx_data(,apple_protect_pager_lock) /* * Maximum number of unmapped pagers we're willing to keep around. @@ -172,6 +173,12 @@ int apple_protect_pager_count_unmapped_max = 0; int apple_protect_pager_num_trim_max = 0; int apple_protect_pager_num_trim_total = 0; + +lck_grp_t apple_protect_pager_lck_grp; +lck_grp_attr_t apple_protect_pager_lck_grp_attr; +lck_attr_t apple_protect_pager_lck_attr; + + /* internal prototypes */ apple_protect_pager_t apple_protect_pager_create(vm_object_t backing_object, struct pager_crypt_info *crypt_info); @@ -203,7 +210,10 @@ int apple_protect_pagerdebug = 0; void apple_protect_pager_bootstrap(void) { - mutex_init(&apple_protect_pager_lock, 0); + lck_grp_attr_setdefault(&apple_protect_pager_lck_grp_attr); + lck_grp_init(&apple_protect_pager_lck_grp, "apple_protect", &apple_protect_pager_lck_grp_attr); + lck_attr_setdefault(&apple_protect_pager_lck_attr); + lck_mtx_init(&apple_protect_pager_lock, &apple_protect_pager_lck_grp, &apple_protect_pager_lck_attr); queue_init(&apple_protect_pager_queue); } @@ -219,7 +229,7 @@ apple_protect_pager_init( #if !DEBUG __unused #endif - vm_size_t pg_size) + memory_object_cluster_size_t pg_size) { apple_protect_pager_t pager; kern_return_t kr; @@ -268,7 +278,7 @@ kern_return_t apple_protect_pager_data_return( __unused memory_object_t mem_obj, __unused memory_object_offset_t offset, - __unused vm_size_t data_cnt, + __unused memory_object_cluster_size_t data_cnt, __unused memory_object_offset_t *resid_offset, __unused int *io_error, __unused boolean_t dirty, @@ -283,7 +293,7 @@ kern_return_t apple_protect_pager_data_initialize( __unused memory_object_t mem_obj, __unused memory_object_offset_t offset, - __unused vm_size_t data_cnt) + __unused memory_object_cluster_size_t data_cnt) { panic("apple_protect_pager_data_initialize: should never get called"); return KERN_FAILURE; @@ -293,7 +303,7 @@ kern_return_t apple_protect_pager_data_unlock( __unused memory_object_t mem_obj, __unused memory_object_offset_t offset, - __unused vm_size_t size, + __unused memory_object_size_t size, __unused vm_prot_t desired_access) { return KERN_FAILURE; @@ -308,7 +318,7 @@ kern_return_t apple_protect_pager_data_request( memory_object_t mem_obj, memory_object_offset_t offset, - vm_size_t length, + memory_object_cluster_size_t length, #if !DEBUG __unused #endif @@ -320,7 +330,7 @@ apple_protect_pager_data_request( upl_t upl; int upl_flags; upl_size_t upl_size; - upl_page_info_t *upl_pl = NULL; + upl_page_info_t *upl_pl; unsigned int pl_count; vm_object_t src_object, dst_object; kern_return_t kr, retval; @@ -332,16 +342,19 @@ apple_protect_pager_data_request( vm_prot_t prot; vm_page_t src_page, top_page; int interruptible; - vm_object_fault_info_t fault_info; + struct vm_object_fault_info fault_info; + int ret; PAGER_DEBUG(PAGER_ALL, ("apple_protect_pager_data_request: %p, %llx, %x, %x\n", mem_obj, offset, length, protection_required)); + retval = KERN_SUCCESS; src_object = VM_OBJECT_NULL; kernel_mapping = 0; upl = NULL; upl_pl = NULL; - fault_info = (vm_object_fault_info_t) mo_fault_info; - interruptible = fault_info->interruptible; + fault_info = *((struct vm_object_fault_info *) mo_fault_info); + fault_info.stealth = TRUE; + interruptible = fault_info.interruptible; pager = apple_protect_pager_lookup(mem_obj); assert(pager->is_ready); @@ -391,7 +404,7 @@ apple_protect_pager_data_request( goto done; } map_entry->object.vm_object = kernel_object; - map_entry->offset = kernel_mapping - VM_MIN_KERNEL_ADDRESS; + map_entry->offset = kernel_mapping; vm_map_unlock(kernel_map); src_vaddr = CAST_DOWN(vm_offset_t, kernel_mapping); dst_vaddr = CAST_DOWN(vm_offset_t, kernel_mapping + PAGE_SIZE_64); @@ -410,11 +423,12 @@ apple_protect_pager_data_request( */ upl_pl = UPL_GET_INTERNAL_PAGE_LIST(upl); pl_count = length / PAGE_SIZE; - for (cur_offset = 0; cur_offset < length; cur_offset += PAGE_SIZE) { + for (cur_offset = 0; + retval == KERN_SUCCESS && cur_offset < length; + cur_offset += PAGE_SIZE) { ppnum_t dst_pnum; - int type_of_fault; - if (!upl_page_present(upl_pl, cur_offset / PAGE_SIZE)) { + if (!upl_page_present(upl_pl, (int)(cur_offset / PAGE_SIZE))) { /* this page is not in the UPL: skip it */ continue; } @@ -436,11 +450,11 @@ apple_protect_pager_data_request( &prot, &src_page, &top_page, - &type_of_fault, + NULL, &error_code, FALSE, FALSE, - fault_info); + &fault_info); switch (kr) { case VM_FAULT_SUCCESS: break; @@ -454,6 +468,11 @@ apple_protect_pager_data_request( case VM_FAULT_INTERRUPTED: retval = MACH_SEND_INTERRUPTED; goto done; + case VM_FAULT_SUCCESS_NO_VM_PAGE: + /* success but no VM page: fail */ + vm_object_paging_end(src_object); + vm_object_unlock(src_object); + /*FALLTHROUGH*/ case VM_FAULT_MEMORY_ERROR: /* the page is not there ! */ if (error_code) { @@ -463,11 +482,24 @@ apple_protect_pager_data_request( } goto done; default: - retval = KERN_FAILURE; - goto done; + panic("apple_protect_pager_data_request: " + "vm_fault_page() unexpected error 0x%x\n", + kr); } assert(src_page != VM_PAGE_NULL); assert(src_page->busy); + + if (!src_page->active && + !src_page->inactive && + !src_page->throttled) { + vm_page_lockspin_queues(); + if (!src_page->active && + !src_page->inactive && + !src_page->throttled) { + vm_page_deactivate(src_page); + } + vm_page_unlock_queues(); + } /* * Establish an explicit mapping of the source @@ -485,8 +517,8 @@ apple_protect_pager_data_request( * We can't do a regular VM mapping because the VM page * is "busy". */ - dst_pnum = (addr64_t) - upl_phys_page(upl_pl, cur_offset / PAGE_SIZE); + dst_pnum = (ppnum_t) + upl_phys_page(upl_pl, (int)(cur_offset / PAGE_SIZE)); assert(dst_pnum != 0); pmap_enter(kernel_pmap, kernel_mapping + PAGE_SIZE_64, @@ -495,28 +527,36 @@ apple_protect_pager_data_request( dst_object->wimg_bits & VM_WIMG_MASK, TRUE); - /* - * Validate the original page... - */ - if (src_page->object->code_signed) { - vm_page_validate_cs_mapped(src_page, - (const void *) src_vaddr); - } - /* - * ... and transfer the results to the destination page. - */ - UPL_SET_CS_VALIDATED(upl_pl, cur_offset / PAGE_SIZE, - src_page->cs_validated); - UPL_SET_CS_TAINTED(upl_pl, cur_offset / PAGE_SIZE, - src_page->cs_tainted); - /* * Decrypt the encrypted contents of the source page * into the destination page. */ - pager->crypt.page_decrypt((const void *) src_vaddr, - (void *) dst_vaddr, offset+cur_offset, - pager->crypt.crypt_ops); + ret = pager->crypt.page_decrypt((const void *) src_vaddr, + (void *) dst_vaddr, + offset+cur_offset, + pager->crypt.crypt_ops); + if (ret) { + /* + * Decryption failed. Abort the fault. + */ + retval = KERN_ABORTED; + } else { + /* + * Validate the original page... + */ + if (src_page->object->code_signed) { + vm_page_validate_cs_mapped( + src_page, + (const void *) src_vaddr); + } + /* + * ... and transfer the results to the destination page. + */ + UPL_SET_CS_VALIDATED(upl_pl, cur_offset / PAGE_SIZE, + src_page->cs_validated); + UPL_SET_CS_TAINTED(upl_pl, cur_offset / PAGE_SIZE, + src_page->cs_tainted); + } /* * Remove the pmap mapping of the source and destination pages @@ -543,7 +583,6 @@ apple_protect_pager_data_request( } } - retval = KERN_SUCCESS; done: if (upl != NULL) { /* clean up the UPL */ @@ -560,6 +599,33 @@ apple_protect_pager_data_request( /* abort or commit the UPL */ if (retval != KERN_SUCCESS) { upl_abort(upl, 0); + if (retval == KERN_ABORTED) { + wait_result_t wait_result; + + /* + * We aborted the fault and did not provide + * any contents for the requested pages but + * the pages themselves are not invalid, so + * let's return success and let the caller + * retry the fault, in case it might succeed + * later (when the decryption code is up and + * running in the kernel, for example). + */ + retval = KERN_SUCCESS; + /* + * Wait a little bit first to avoid using + * too much CPU time retrying and failing + * the same fault over and over again. + */ + wait_result = assert_wait_timeout( + (event_t) apple_protect_pager_data_request, + THREAD_UNINT, + 10000, /* 10ms */ + NSEC_PER_USEC); + assert(wait_result == THREAD_WAITING); + wait_result = thread_block(THREAD_CONTINUE_NULL); + assert(wait_result == THREAD_TIMED_OUT); + } } else { boolean_t empty; upl_commit_range(upl, 0, upl->size, @@ -604,10 +670,10 @@ apple_protect_pager_reference( pager = apple_protect_pager_lookup(mem_obj); - mutex_lock(&apple_protect_pager_lock); + lck_mtx_lock(&apple_protect_pager_lock); assert(pager->ref_count > 0); pager->ref_count++; - mutex_unlock(&apple_protect_pager_lock); + lck_mtx_unlock(&apple_protect_pager_lock); } @@ -684,7 +750,7 @@ apple_protect_pager_deallocate_internal( int count_unmapped; if (! locked) { - mutex_lock(&apple_protect_pager_lock); + lck_mtx_lock(&apple_protect_pager_lock); } count_unmapped = (apple_protect_pager_count - @@ -707,7 +773,7 @@ apple_protect_pager_deallocate_internal( */ apple_protect_pager_dequeue(pager); /* the pager is all ours: no need for the lock now */ - mutex_unlock(&apple_protect_pager_lock); + lck_mtx_unlock(&apple_protect_pager_lock); apple_protect_pager_terminate_internal(pager); } else if (pager->ref_count == 0) { /* @@ -715,7 +781,7 @@ apple_protect_pager_deallocate_internal( * been terminated. Do some final cleanup and release the * pager structure. */ - mutex_unlock(&apple_protect_pager_lock); + lck_mtx_unlock(&apple_protect_pager_lock); if (pager->pager_control != MEMORY_OBJECT_CONTROL_NULL) { memory_object_control_deallocate(pager->pager_control); pager->pager_control = MEMORY_OBJECT_CONTROL_NULL; @@ -724,7 +790,7 @@ apple_protect_pager_deallocate_internal( pager = APPLE_PROTECT_PAGER_NULL; } else { /* there are still plenty of references: keep going... */ - mutex_unlock(&apple_protect_pager_lock); + lck_mtx_unlock(&apple_protect_pager_lock); } if (needs_trimming) { @@ -772,7 +838,7 @@ kern_return_t apple_protect_pager_synchronize( memory_object_t mem_obj, memory_object_offset_t offset, - vm_size_t length, + memory_object_size_t length, __unused vm_sync_t sync_flags) { apple_protect_pager_t pager; @@ -791,8 +857,8 @@ apple_protect_pager_synchronize( * apple_protect_pager_map() * * This allows VM to let us, the EMM, know that this memory object - * is currently mapped one or more times. This is called by VM only the first - * time the memory object gets mapped and we take one extra reference on the + * is currently mapped one or more times. This is called by VM each time + * the memory object gets mapped and we take one extra reference on the * memory object to account for all its mappings. */ kern_return_t @@ -806,7 +872,7 @@ apple_protect_pager_map( pager = apple_protect_pager_lookup(mem_obj); - mutex_lock(&apple_protect_pager_lock); + lck_mtx_lock(&apple_protect_pager_lock); assert(pager->is_ready); assert(pager->ref_count > 0); /* pager is alive */ if (pager->is_mapped == FALSE) { @@ -819,7 +885,7 @@ apple_protect_pager_map( pager->ref_count++; apple_protect_pager_count_mapped++; } - mutex_unlock(&apple_protect_pager_lock); + lck_mtx_unlock(&apple_protect_pager_lock); return KERN_SUCCESS; } @@ -841,7 +907,7 @@ apple_protect_pager_last_unmap( pager = apple_protect_pager_lookup(mem_obj); - mutex_lock(&apple_protect_pager_lock); + lck_mtx_lock(&apple_protect_pager_lock); if (pager->is_mapped) { /* * All the mappings are gone, so let go of the one extra @@ -857,7 +923,7 @@ apple_protect_pager_last_unmap( apple_protect_pager_deallocate_internal(pager, TRUE); /* caution: deallocate_internal() released the lock ! */ } else { - mutex_unlock(&apple_protect_pager_lock); + lck_mtx_unlock(&apple_protect_pager_lock); } return KERN_SUCCESS; @@ -897,7 +963,7 @@ apple_protect_pager_create( * The vm_map call takes both named entry ports and raw memory * objects in the same parameter. We need to make sure that * vm_map does not see this object as a named entry port. So, - * we reserve the second word in the object for a fake ip_kotype + * we reserve the first word in the object for a fake ip_kotype * setting - that will tell vm_map to use it as a memory object. */ pager->pager_ops = &apple_protect_pager_ops; @@ -911,7 +977,7 @@ apple_protect_pager_create( vm_object_reference(backing_object); - mutex_lock(&apple_protect_pager_lock); + lck_mtx_lock(&apple_protect_pager_lock); /* see if anyone raced us to create a pager for the same object */ queue_iterate(&apple_protect_pager_queue, pager2, @@ -926,7 +992,7 @@ apple_protect_pager_create( /* while we hold the lock, transfer our setup ref to winner */ pager2->ref_count++; /* we lost the race, down with the loser... */ - mutex_unlock(&apple_protect_pager_lock); + lck_mtx_unlock(&apple_protect_pager_lock); vm_object_deallocate(pager->backing_object); pager->backing_object = VM_OBJECT_NULL; kfree(pager, sizeof (*pager)); @@ -945,17 +1011,17 @@ apple_protect_pager_create( if (apple_protect_pager_count > apple_protect_pager_count_max) { apple_protect_pager_count_max = apple_protect_pager_count; } - mutex_unlock(&apple_protect_pager_lock); + lck_mtx_unlock(&apple_protect_pager_lock); kr = memory_object_create_named((memory_object_t) pager, 0, &control); assert(kr == KERN_SUCCESS); - mutex_lock(&apple_protect_pager_lock); + lck_mtx_lock(&apple_protect_pager_lock); /* the new pager is now ready to be used */ pager->is_ready = TRUE; - mutex_unlock(&apple_protect_pager_lock); + lck_mtx_unlock(&apple_protect_pager_lock); /* wakeup anyone waiting for this pager to be ready */ thread_wakeup(&pager->is_ready); @@ -977,7 +1043,7 @@ apple_protect_pager_setup( { apple_protect_pager_t pager; - mutex_lock(&apple_protect_pager_lock); + lck_mtx_lock(&apple_protect_pager_lock); queue_iterate(&apple_protect_pager_queue, pager, @@ -987,7 +1053,7 @@ apple_protect_pager_setup( /* For the same object we must always use the same protection options */ if (!((pager->crypt.page_decrypt == crypt_info->page_decrypt) && (pager->crypt.crypt_ops == crypt_info->crypt_ops) )) { - mutex_unlock(&apple_protect_pager_lock); + lck_mtx_unlock(&apple_protect_pager_lock); return MEMORY_OBJECT_NULL; } break; @@ -1002,7 +1068,7 @@ apple_protect_pager_setup( pager->ref_count++; } - mutex_unlock(&apple_protect_pager_lock); + lck_mtx_unlock(&apple_protect_pager_lock); if (pager == APPLE_PROTECT_PAGER_NULL) { pager = apple_protect_pager_create(backing_object, crypt_info); @@ -1011,13 +1077,14 @@ apple_protect_pager_setup( } } - mutex_lock(&apple_protect_pager_lock); + lck_mtx_lock(&apple_protect_pager_lock); while (!pager->is_ready) { - thread_sleep_mutex(&pager->is_ready, - &apple_protect_pager_lock, - THREAD_UNINT); + lck_mtx_sleep(&apple_protect_pager_lock, + LCK_SLEEP_DEFAULT, + &pager->is_ready, + THREAD_UNINT); } - mutex_unlock(&apple_protect_pager_lock); + lck_mtx_unlock(&apple_protect_pager_lock); return (memory_object_t) pager; } @@ -1030,7 +1097,7 @@ apple_protect_pager_trim(void) int num_trim; int count_unmapped; - mutex_lock(&apple_protect_pager_lock); + lck_mtx_lock(&apple_protect_pager_lock); /* * We have too many pagers, try and trim some unused ones, @@ -1074,7 +1141,7 @@ apple_protect_pager_trim(void) } apple_protect_pager_num_trim_total += num_trim; - mutex_unlock(&apple_protect_pager_lock); + lck_mtx_unlock(&apple_protect_pager_lock); /* terminate the trimmed pagers */ while (!queue_empty(&trim_queue)) { diff --git a/osfmk/vm/vm_debug.c b/osfmk/vm/vm_debug.c index 702ce6a42..a0712c54d 100644 --- a/osfmk/vm/vm_debug.c +++ b/osfmk/vm/vm_debug.c @@ -65,7 +65,6 @@ #include #include #include -#include #include #include #include @@ -92,6 +91,11 @@ #define __DEBUG_ONLY #endif /* !MACH_VM_DEBUG */ +#if VM32_SUPPORT + +#include +#include + /* * Routine: mach_vm_region_info [kernel call] * Purpose: @@ -107,9 +111,9 @@ */ kern_return_t -mach_vm_region_info( +vm32_region_info( __DEBUG_ONLY vm_map_t map, - __DEBUG_ONLY vm_offset_t address, + __DEBUG_ONLY vm32_offset_t address, __DEBUG_ONLY vm_info_region_t *regionp, __DEBUG_ONLY vm_info_object_array_t *objectsp, __DEBUG_ONLY mach_msg_type_number_t *objectsCntp) @@ -169,10 +173,10 @@ mach_vm_region_info( /* cmap is read-locked; we have a real entry */ object = entry->object.vm_object; - region.vir_start = entry->vme_start; - region.vir_end = entry->vme_end; - region.vir_object = (vm_offset_t) object; - region.vir_offset = entry->offset; + region.vir_start = (natural_t) entry->vme_start; + region.vir_end = (natural_t) entry->vme_end; + region.vir_object = (natural_t)(uintptr_t) object; + region.vir_offset = (natural_t) entry->offset; region.vir_needs_copy = entry->needs_copy; region.vir_protection = entry->protection; region.vir_max_protection = entry->max_protection; @@ -181,7 +185,7 @@ mach_vm_region_info( region.vir_user_wired_count = entry->user_wired_count; used = 0; - room = size / sizeof(vm_info_object_t); + room = (unsigned int) (size / sizeof(vm_info_object_t)); if (object == VM_OBJECT_NULL) { vm_map_unlock_read(cmap); @@ -200,27 +204,28 @@ mach_vm_region_info( &((vm_info_object_t *) addr)[used]; vio->vio_object = - (vm_offset_t) cobject; + (natural_t)(uintptr_t) cobject; vio->vio_size = - cobject->size; + (natural_t) cobject->size; vio->vio_ref_count = cobject->ref_count; vio->vio_resident_page_count = cobject->resident_page_count; vio->vio_copy = - (vm_offset_t) cobject->copy; + (natural_t)(uintptr_t) cobject->copy; vio->vio_shadow = - (vm_offset_t) cobject->shadow; + (natural_t)(uintptr_t) cobject->shadow; vio->vio_shadow_offset = - cobject->shadow_offset; + (natural_t) cobject->shadow_offset; vio->vio_paging_offset = - cobject->paging_offset; + (natural_t) cobject->paging_offset; vio->vio_copy_strategy = cobject->copy_strategy; vio->vio_last_alloc = - cobject->last_alloc; + (vm_offset_t) cobject->last_alloc; vio->vio_paging_in_progress = - cobject->paging_in_progress; + cobject->paging_in_progress + + cobject->activity_in_progress; vio->vio_pager_created = cobject->pager_created; vio->vio_pager_initialized = @@ -262,7 +267,7 @@ mach_vm_region_info( if (size != 0) kmem_free(ipc_kernel_map, addr, size); - size = round_page_32(2 * used * sizeof(vm_info_object_t)); + size = round_page(2 * used * sizeof(vm_info_object_t)); kr = vm_allocate(ipc_kernel_map, &addr, size, VM_FLAGS_ANYWHERE); if (kr != KERN_SUCCESS) @@ -283,7 +288,7 @@ mach_vm_region_info( kmem_free(ipc_kernel_map, addr, size); } else { vm_size_t size_used = - round_page_32(used * sizeof(vm_info_object_t)); + round_page(used * sizeof(vm_info_object_t)); kr = vm_map_unwire(ipc_kernel_map, vm_map_trunc_page(addr), vm_map_round_page(addr + size_used), FALSE); @@ -310,9 +315,9 @@ mach_vm_region_info( */ kern_return_t -mach_vm_region_info_64( +vm32_region_info_64( __DEBUG_ONLY vm_map_t map, - __DEBUG_ONLY vm_offset_t address, + __DEBUG_ONLY vm32_offset_t address, __DEBUG_ONLY vm_info_region_64_t *regionp, __DEBUG_ONLY vm_info_object_array_t *objectsp, __DEBUG_ONLY mach_msg_type_number_t *objectsCntp) @@ -370,9 +375,9 @@ mach_vm_region_info_64( /* cmap is read-locked; we have a real entry */ object = entry->object.vm_object; - region.vir_start = entry->vme_start; - region.vir_end = entry->vme_end; - region.vir_object = (vm_offset_t) object; + region.vir_start = (natural_t) entry->vme_start; + region.vir_end = (natural_t) entry->vme_end; + region.vir_object = (natural_t)(uintptr_t) object; region.vir_offset = entry->offset; region.vir_needs_copy = entry->needs_copy; region.vir_protection = entry->protection; @@ -382,7 +387,7 @@ mach_vm_region_info_64( region.vir_user_wired_count = entry->user_wired_count; used = 0; - room = size / sizeof(vm_info_object_t); + room = (unsigned int) (size / sizeof(vm_info_object_t)); if (object == VM_OBJECT_NULL) { vm_map_unlock_read(cmap); @@ -401,27 +406,28 @@ mach_vm_region_info_64( &((vm_info_object_t *) addr)[used]; vio->vio_object = - (vm_offset_t) cobject; + (natural_t)(uintptr_t) cobject; vio->vio_size = - cobject->size; + (natural_t) cobject->size; vio->vio_ref_count = cobject->ref_count; vio->vio_resident_page_count = cobject->resident_page_count; vio->vio_copy = - (vm_offset_t) cobject->copy; + (natural_t)(uintptr_t) cobject->copy; vio->vio_shadow = - (vm_offset_t) cobject->shadow; + (natural_t)(uintptr_t) cobject->shadow; vio->vio_shadow_offset = - cobject->shadow_offset; + (natural_t) cobject->shadow_offset; vio->vio_paging_offset = - cobject->paging_offset; + (natural_t) cobject->paging_offset; vio->vio_copy_strategy = cobject->copy_strategy; vio->vio_last_alloc = - cobject->last_alloc; + (vm_offset_t) cobject->last_alloc; vio->vio_paging_in_progress = - cobject->paging_in_progress; + cobject->paging_in_progress + + cobject->activity_in_progress; vio->vio_pager_created = cobject->pager_created; vio->vio_pager_initialized = @@ -463,7 +469,7 @@ mach_vm_region_info_64( if (size != 0) kmem_free(ipc_kernel_map, addr, size); - size = round_page_32(2 * used * sizeof(vm_info_object_t)); + size = round_page(2 * used * sizeof(vm_info_object_t)); kr = vm_allocate(ipc_kernel_map, &addr, size, VM_FLAGS_ANYWHERE); if (kr != KERN_SUCCESS) @@ -484,7 +490,7 @@ mach_vm_region_info_64( kmem_free(ipc_kernel_map, addr, size); } else { vm_size_t size_used = - round_page_32(used * sizeof(vm_info_object_t)); + round_page(used * sizeof(vm_info_object_t)); kr = vm_map_unwire(ipc_kernel_map, vm_map_trunc_page(addr), vm_map_round_page(addr + size_used), FALSE); @@ -509,7 +515,7 @@ mach_vm_region_info_64( * Return an array of virtual pages that are mapped to a task. */ kern_return_t -vm_mapped_pages_info( +vm32_mapped_pages_info( __DEBUG_ONLY vm_map_t map, __DEBUG_ONLY page_address_array_t *pages, __DEBUG_ONLY mach_msg_type_number_t *pages_count) @@ -528,7 +534,7 @@ vm_mapped_pages_info( pmap = map->pmap; size = pmap_resident_count(pmap) * sizeof(vm_offset_t); - size = round_page_32(size); + size = round_page(size); for (;;) { (void) vm_allocate(ipc_kernel_map, &addr, size, VM_FLAGS_ANYWHERE); @@ -536,7 +542,7 @@ vm_mapped_pages_info( vm_map_round_page(addr + size), FALSE); list = (page_address_array_t) addr; - space = size / sizeof(vm_offset_t); + space = (unsigned int) (size / sizeof(vm_offset_t)); actual = pmap_list_resident_pages(pmap, list, @@ -552,7 +558,7 @@ vm_mapped_pages_info( /* * Try again, doubling the size */ - size = round_page_32(actual * sizeof(vm_offset_t)); + size = round_page(actual * sizeof(vm_offset_t)); } if (actual == 0) { *pages = 0; @@ -561,7 +567,7 @@ vm_mapped_pages_info( } else { *pages_count = actual; - size_used = round_page_32(actual * sizeof(vm_offset_t)); + size_used = round_page(actual * sizeof(vm_offset_t)); (void) vm_map_wire(ipc_kernel_map, vm_map_trunc_page(addr), vm_map_round_page(addr + size), VM_PROT_READ|VM_PROT_WRITE, FALSE); @@ -581,6 +587,8 @@ vm_mapped_pages_info( #endif /* MACH_VM_DEBUG */ } +#endif /* VM32_SUPPORT */ + /* * Routine: host_virtual_physical_table_info * Purpose: @@ -626,13 +634,13 @@ host_virtual_physical_table_info( if (info != *infop) kmem_free(ipc_kernel_map, addr, size); - size = round_page_32(actual * sizeof *info); + size = round_page(actual * sizeof *info); kr = kmem_alloc_pageable(ipc_kernel_map, &addr, size); if (kr != KERN_SUCCESS) return KERN_RESOURCE_SHORTAGE; info = (hash_info_bucket_t *) addr; - potential = size/sizeof *info; + potential = (unsigned int) (size/sizeof (*info)); } if (info == *infop) { @@ -647,7 +655,7 @@ host_virtual_physical_table_info( vm_map_copy_t copy; vm_size_t used; - used = round_page_32(actual * sizeof *info); + used = round_page(actual * sizeof *info); if (used != size) kmem_free(ipc_kernel_map, addr + used, size - used); diff --git a/osfmk/vm/vm_external.c b/osfmk/vm/vm_external.c index 85712c9a6..db0c32d09 100644 --- a/osfmk/vm/vm_external.c +++ b/osfmk/vm/vm_external.c @@ -106,12 +106,12 @@ #define SMALL_SIZE KALLOC_MINSIZE #define LARGE_SIZE PAGE_SIZE -static vm_size_t power_of_2(vm_size_t size); +static vm_object_size_t power_of_2(vm_object_size_t size); -static vm_size_t -power_of_2(vm_size_t size) +static vm_object_size_t +power_of_2(vm_object_size_t size) { - vm_size_t power; + vm_object_size_t power; power = 2 * SMALL_SIZE; while (power < size) { @@ -122,21 +122,25 @@ power_of_2(vm_size_t size) vm_external_map_t vm_external_create( - vm_offset_t size) + vm_object_offset_t size) { - vm_size_t bytes; + vm_object_size_t bytes; vm_external_map_t result = VM_EXTERNAL_NULL; bytes = stob(size); if (bytes <= SMALL_SIZE) { - if ((result = (vm_external_map_t)kalloc(SMALL_SIZE)) != NULL) { + result = (vm_external_map_t)kalloc(SMALL_SIZE); + if (result != NULL) { memset(result, 0, SMALL_SIZE); } } else if (bytes <= LARGE_SIZE) { bytes = power_of_2(bytes); - if ((result = (vm_external_map_t)kalloc(bytes)) != NULL) { - memset(result, 0, bytes); + assert((vm_size_t) bytes == bytes); + result = (vm_external_map_t)kalloc((vm_size_t)bytes); + if (result != NULL) { + assert((size_t) bytes == bytes); + memset(result, 0, (size_t) bytes); } } return(result); @@ -145,9 +149,9 @@ vm_external_create( void vm_external_destroy( vm_external_map_t map, - vm_size_t size) + vm_object_size_t size) { - vm_size_t bytes; + vm_object_size_t bytes; if (map == VM_EXTERNAL_NULL) return; @@ -158,7 +162,8 @@ vm_external_destroy( } else { bytes = power_of_2(bytes); } - kfree(map, bytes); + assert((vm_size_t) bytes == bytes); + kfree(map, (vm_size_t) bytes); } /* @@ -166,11 +171,11 @@ vm_external_destroy( * size of the object to be mapped, i.e. the size of the map that was * created by vm_external_create. */ -vm_size_t +vm_object_size_t vm_external_map_size( - vm_offset_t size) + vm_object_size_t size) { - vm_size_t bytes; + vm_object_size_t bytes; bytes = stob(size); if (bytes != 0) { @@ -186,9 +191,11 @@ vm_external_map_size( void vm_external_copy( vm_external_map_t old_map, - vm_size_t old_size, + vm_object_size_t old_size, vm_external_map_t new_map) { + vm_object_size_t bytes; + /* * Cannot copy non-existent maps */ @@ -198,16 +205,18 @@ vm_external_copy( /* * Copy old map to new */ - memcpy(new_map, old_map, stob(old_size)); + bytes = stob(old_size); + assert((size_t) bytes == bytes); + memcpy(new_map, old_map, (size_t) bytes); } boolean_t vm_external_within( - vm_size_t new_size, - vm_size_t old_size) + vm_object_size_t new_size, + vm_object_size_t old_size) { - vm_size_t new_bytes; - vm_size_t old_bytes; + vm_object_size_t new_bytes; + vm_object_size_t old_bytes; assert(new_size >= old_size); @@ -232,14 +241,13 @@ vm_external_within( vm_external_state_t _vm_external_state_get( vm_external_map_t map, - vm_offset_t offset) + vm_object_offset_t offset) { - unsigned - int bit, byte; + uint64_t bit, byte; assert (map != VM_EXTERNAL_NULL); - bit = atop_32(offset); + bit = atop_64(offset); byte = bit >> 3; if (map[byte] & (1 << (bit & 07))) { return VM_EXTERNAL_STATE_EXISTS; @@ -251,15 +259,14 @@ _vm_external_state_get( void vm_external_state_set( vm_external_map_t map, - vm_offset_t offset) + vm_object_offset_t offset) { - unsigned - int bit, byte; + uint64_t bit, byte; if (map == VM_EXTERNAL_NULL) return; - bit = atop_32(offset); + bit = atop_64(offset); byte = bit >> 3; map[byte] |= (1 << (bit & 07)); } @@ -267,15 +274,14 @@ vm_external_state_set( void vm_external_state_clr( vm_external_map_t map, - vm_offset_t offset) + vm_object_offset_t offset) { - unsigned - int bit, byte; + uint64_t bit, byte; if (map == VM_EXTERNAL_NULL) return; - bit = atop_32(offset); + bit = atop_64(offset); byte = bit >> 3; map[byte] &= ~(1 << (bit & 07)); } diff --git a/osfmk/vm/vm_external.h b/osfmk/vm/vm_external.h index 7f46331ae..e0bdbf5e8 100644 --- a/osfmk/vm/vm_external.h +++ b/osfmk/vm/vm_external.h @@ -60,6 +60,7 @@ #define VM_VM_EXTERNAL_H_ #include +#include #include /* @@ -85,7 +86,7 @@ typedef int vm_external_state_t; /* * Useful macros */ -#define stob(s) ((atop_32((s)) + 07) >> 3) +#define stob(s) ((atop_64((s)) + 07) >> 3) /* * Routines exported by this module. @@ -96,34 +97,34 @@ extern void vm_external_module_initialize(void); extern vm_external_map_t vm_external_create( /* Create a vm_external_map_t */ - vm_offset_t size); + vm_object_size_t size); extern void vm_external_destroy( /* Destroy one */ vm_external_map_t map, - vm_size_t size); + vm_object_size_t size); -extern vm_size_t vm_external_map_size( +extern vm_object_size_t vm_external_map_size( /* Return size of map in bytes */ - vm_offset_t size); + vm_object_size_t size); extern void vm_external_copy( /* Copy one into another */ vm_external_map_t old_map, - vm_size_t old_size, + vm_object_size_t old_size, vm_external_map_t new_map); extern void vm_external_state_set( /* Set state of a page to * VM_EXTERNAL_STATE_EXISTS */ vm_external_map_t map, - vm_offset_t offset); + vm_object_offset_t offset); extern void vm_external_state_clr( /* clear page state */ vm_external_map_t map, - vm_offset_t offset); + vm_object_offset_t offset); #define vm_external_state_get(map, offset) \ (((map) != VM_EXTERNAL_NULL) ? \ @@ -135,11 +136,11 @@ extern void vm_external_state_clr( extern vm_external_state_t _vm_external_state_get( /* HIDDEN routine */ vm_external_map_t map, - vm_offset_t offset); + vm_object_offset_t offset); boolean_t vm_external_within( /* Check if new object size * fits in current map */ - vm_size_t new_size, - vm_size_t old_size); + vm_object_size_t new_size, + vm_object_size_t old_size); #endif /* VM_VM_EXTERNAL_H_ */ diff --git a/osfmk/vm/vm_fault.c b/osfmk/vm/vm_fault.c index 53ba64bee..cc652d4f8 100644 --- a/osfmk/vm/vm_fault.c +++ b/osfmk/vm/vm_fault.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2007 Apple Inc. All rights reserved. + * Copyright (c) 2000-2009 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -107,22 +107,47 @@ #define VM_FAULT_CLASSIFY 0 -/* Zero-filled pages are marked "m->zero_fill" and put on the - * special zero-fill inactive queue only if they belong to - * an object at least this big. - */ -#define VM_ZF_OBJECT_SIZE_THRESHOLD (0x200000) - #define TRACEFAULTPAGE 0 /* (TEST/DEBUG) */ int vm_object_pagein_throttle = 16; +/* + * We apply a hard throttle to the demand zero rate of tasks that we believe are running out of control which + * kicks in when swap space runs out. 64-bit programs have massive address spaces and can leak enormous amounts + * of memory if they're buggy and can run the system completely out of swap space. If this happens, we + * impose a hard throttle on them to prevent them from taking the last bit of memory left. This helps + * keep the UI active so that the user has a chance to kill the offending task before the system + * completely hangs. + * + * The hard throttle is only applied when the system is nearly completely out of swap space and is only applied + * to tasks that appear to be bloated. When swap runs out, any task using more than vm_hard_throttle_threshold + * will be throttled. The throttling is done by giving the thread that's trying to demand zero a page a + * delay of HARD_THROTTLE_DELAY microseconds before being allowed to try the page fault again. + */ + +boolean_t thread_is_io_throttled(void); + +uint64_t vm_hard_throttle_threshold; + +extern unsigned int dp_pages_free, dp_pages_reserve; + +#define NEED_TO_HARD_THROTTLE_THIS_TASK() (((dp_pages_free + dp_pages_reserve < 2000) && \ + (get_task_resident_size(current_task()) > vm_hard_throttle_threshold) && \ + (current_task() != kernel_task) && IP_VALID(memory_manager_default)) || \ + (vm_page_free_count < vm_page_throttle_limit && thread_is_io_throttled() && \ + (get_task_resident_size(current_task()) > vm_hard_throttle_threshold))) + + +#define HARD_THROTTLE_DELAY 10000 /* 10000 us == 10 ms */ + + extern int cs_debug; #if MACH_KDB extern struct db_watchpoint *db_watchpoint_list; #endif /* MACH_KDB */ +boolean_t current_thread_aborted(void); /* Forward declarations of internal routines. */ extern kern_return_t vm_fault_wire_fast( @@ -156,11 +181,9 @@ unsigned long vm_cs_query_modified = 0; unsigned long vm_cs_validated_dirtied = 0; #if CONFIG_ENFORCE_SIGNED_CODE -#if SECURE_KERNEL -const int cs_enforcement_disable=0; +int cs_enforcement_disable=0; #else -int cs_enforcement_disable=1; -#endif +static const int cs_enforcement_disable=1; #endif /* @@ -173,10 +196,22 @@ vm_fault_init(void) { #if !SECURE_KERNEL #if CONFIG_ENFORCE_SIGNED_CODE - PE_parse_boot_argn("cs_enforcement_disable", &cs_enforcement_disable, sizeof (cs_enforcement_disable)); + PE_parse_boot_argn("cs_enforcement_disable", &cs_enforcement_disable, + sizeof (cs_enforcement_disable)); #endif PE_parse_boot_argn("cs_debug", &cs_debug, sizeof (cs_debug)); #endif + + /* + * Choose a value for the hard throttle threshold based on the amount of ram. The threshold is + * computed as a percentage of available memory, and the percentage used is scaled inversely with + * the amount of memory. The pertange runs between 10% and 35%. We use 35% for small memory systems + * and reduce the value down to 10% for very large memory configurations. This helps give us a + * definition of a memory hog that makes more sense relative to the amount of ram in the machine. + * The formula here simply uses the number of gigabytes of ram to adjust the percentage. + */ + + vm_hard_throttle_threshold = sane_size * (35 - MIN((int)(sane_size / (1024*1024*1024)), 25)) / 100; } /* @@ -236,8 +271,11 @@ boolean_t vm_page_deactivate_behind = TRUE; /* * default sizes given VM_BEHAVIOR_DEFAULT reference behavior */ -int vm_default_ahead = 0; -int vm_default_behind = MAX_UPL_TRANSFER; +#define VM_DEFAULT_DEACTIVATE_BEHIND_WINDOW 128 +#define VM_DEFAULT_DEACTIVATE_BEHIND_CLUSTER 16 /* don't make this too big... */ + /* we use it to size an array on the stack */ + +int vm_default_behind = VM_DEFAULT_DEACTIVATE_BEHIND_WINDOW; #define MAX_SEQUENTIAL_RUN (1024 * 1024 * 1024) @@ -354,6 +392,8 @@ vm_fault_is_sequential( } +int vm_page_deactivate_behind_count = 0; + /* * vm_page_deactivate_behind * @@ -373,10 +413,17 @@ vm_fault_deactivate_behind( vm_object_offset_t offset, vm_behavior_t behavior) { - vm_page_t m = NULL; + int n; + int pages_in_run = 0; + int max_pages_in_run = 0; int sequential_run; int sequential_behavior = VM_BEHAVIOR_SEQUENTIAL; + vm_object_offset_t run_offset = 0; + vm_object_offset_t pg_offset = 0; + vm_page_t m; + vm_page_t page_run[VM_DEFAULT_DEACTIVATE_BEHIND_CLUSTER]; + pages_in_run = 0; #if TRACEFAULTPAGE dbgTrace(0xBEEF0018, (unsigned int) object, (unsigned int) vm_fault_deactivate_behind); /* (TEST/DEBUG) */ #endif @@ -401,12 +448,16 @@ vm_fault_deactivate_behind( case VM_BEHAVIOR_RANDOM: break; case VM_BEHAVIOR_SEQUENTIAL: - if (sequential_run >= (int)PAGE_SIZE) - m = vm_page_lookup(object, offset - PAGE_SIZE_64); + if (sequential_run >= (int)PAGE_SIZE) { + run_offset = 0 - PAGE_SIZE_64; + max_pages_in_run = 1; + } break; case VM_BEHAVIOR_RSEQNTL: - if (sequential_run >= (int)PAGE_SIZE) - m = vm_page_lookup(object, offset + PAGE_SIZE_64); + if (sequential_run >= (int)PAGE_SIZE) { + run_offset = PAGE_SIZE_64; + max_pages_in_run = 1; + } break; case VM_BEHAVIOR_DEFAULT: default: @@ -417,32 +468,109 @@ vm_fault_deactivate_behind( * long enough on an object with default access behavior * to consider it for deactivation */ - if ((uint64_t)sequential_run >= behind) { + if ((uint64_t)sequential_run >= behind && (sequential_run % (VM_DEFAULT_DEACTIVATE_BEHIND_CLUSTER * PAGE_SIZE)) == 0) { + /* + * the comparisons between offset and behind are done + * in this kind of odd fashion in order to prevent wrap around + * at the end points + */ if (sequential_behavior == VM_BEHAVIOR_SEQUENTIAL) { - if (offset >= behind) - m = vm_page_lookup(object, offset - behind); + if (offset >= behind) { + run_offset = 0 - behind; + pg_offset = PAGE_SIZE_64; + max_pages_in_run = VM_DEFAULT_DEACTIVATE_BEHIND_CLUSTER; + } } else { - if (offset < -behind) - m = vm_page_lookup(object, offset + behind); + if (offset < -behind) { + run_offset = behind; + pg_offset = 0 - PAGE_SIZE_64; + max_pages_in_run = VM_DEFAULT_DEACTIVATE_BEHIND_CLUSTER; + } } } break; } } - if (m) { - if (!m->busy && !m->no_cache && !m->throttled && !m->fictitious && !m->absent) { - pmap_clear_reference(m->phys_page); - m->deactivated = TRUE; + for (n = 0; n < max_pages_in_run; n++) { + m = vm_page_lookup(object, offset + run_offset + (n * pg_offset)); + + if (m && !m->busy && !m->no_cache && !m->throttled && !m->fictitious && !m->absent) { + page_run[pages_in_run++] = m; + pmap_clear_reference(m->phys_page); + } + } + if (pages_in_run) { + vm_page_lockspin_queues(); + + for (n = 0; n < pages_in_run; n++) { + + m = page_run[n]; + + vm_page_deactivate_internal(m, FALSE); + + vm_page_deactivate_behind_count++; #if TRACEFAULTPAGE dbgTrace(0xBEEF0019, (unsigned int) object, (unsigned int) m); /* (TEST/DEBUG) */ #endif - return TRUE; } + vm_page_unlock_queues(); + + return TRUE; } return FALSE; } +static boolean_t +vm_page_throttled(void) +{ + clock_sec_t elapsed_sec; + clock_sec_t tv_sec; + clock_usec_t tv_usec; + + thread_t thread = current_thread(); + + if (thread->options & TH_OPT_VMPRIV) + return (FALSE); + + thread->t_page_creation_count++; + + if (NEED_TO_HARD_THROTTLE_THIS_TASK()) + return (TRUE); + + if (vm_page_free_count < vm_page_throttle_limit && + thread->t_page_creation_count > vm_page_creation_throttle) { + + clock_get_system_microtime(&tv_sec, &tv_usec); + + elapsed_sec = tv_sec - thread->t_page_creation_time; + + if (elapsed_sec <= 6 || (thread->t_page_creation_count / elapsed_sec) >= (vm_page_creation_throttle / 6)) { + + if (elapsed_sec >= 60) { + /* + * we'll reset our stats to give a well behaved app + * that was unlucky enough to accumulate a bunch of pages + * over a long period of time a chance to get out of + * the throttled state... we reset the counter and timestamp + * so that if it stays under the rate limit for the next second + * it will be back in our good graces... if it exceeds it, it + * will remain in the throttled state + */ + thread->t_page_creation_time = tv_sec; + thread->t_page_creation_count = (vm_page_creation_throttle / 6) * 5; + } + ++vm_page_throttle_count; + + return (TRUE); + } + thread->t_page_creation_time = tv_sec; + thread->t_page_creation_count = 0; + } + return (FALSE); +} + + /* * check for various conditions that would * prevent us from creating a ZF page... @@ -454,10 +582,14 @@ vm_fault_deactivate_behind( static vm_fault_return_t vm_fault_check(vm_object_t object, vm_page_t m, vm_page_t first_m, boolean_t interruptible_state) { - if (object->shadow_severed) { + if (object->shadow_severed || + VM_OBJECT_PURGEABLE_FAULT_ERROR(object)) { /* - * the shadow chain was severed - * just have to return an error at this point + * Either: + * 1. the shadow chain was severed, + * 2. the purgeable object is volatile or empty and is marked + * to fault on access while volatile. + * Just have to return an error at this point */ if (m != VM_PAGE_NULL) VM_PAGE_FREE(m); @@ -487,7 +619,7 @@ vm_fault_check(vm_object_t object, vm_page_t m, vm_page_t first_m, boolean_t int return (VM_FAULT_RETRY); } } - if (VM_PAGE_ZFILL_THROTTLED()) { + if (vm_page_throttled()) { /* * we're throttling zero-fills... * treat this as if we couldn't grab a page @@ -496,6 +628,15 @@ vm_fault_check(vm_object_t object, vm_page_t m, vm_page_t first_m, boolean_t int VM_PAGE_FREE(m); vm_fault_cleanup(object, first_m); + if (NEED_TO_HARD_THROTTLE_THIS_TASK()) { + delay(HARD_THROTTLE_DELAY); + + if (current_thread_aborted()) { + thread_interrupt_level(interruptible_state); + return VM_FAULT_INTERRUPTED; + } + } + thread_interrupt_level(interruptible_state); return (VM_FAULT_MEMORY_SHORTAGE); @@ -552,7 +693,7 @@ vm_fault_zero_page(vm_page_t m, boolean_t no_zero_fill) (m->object->purgable == VM_PURGABLE_DENY || m->object->purgable == VM_PURGABLE_NONVOLATILE || m->object->purgable == VM_PURGABLE_VOLATILE )) { - vm_page_lock_queues(); + vm_page_lockspin_queues(); queue_enter(&vm_page_queue_throttled, m, vm_page_t, pageq); m->throttled = TRUE; @@ -560,9 +701,9 @@ vm_fault_zero_page(vm_page_t m, boolean_t no_zero_fill) vm_page_unlock_queues(); } else { - if (m->object->size > VM_ZF_OBJECT_SIZE_THRESHOLD) { + if (current_thread()->t_page_creation_count > vm_page_creation_throttle) { m->zero_fill = TRUE; - OSAddAtomic(1, (SInt32 *)&vm_zf_count); + VM_ZF_COUNT_INCR(); } } return (my_fault); @@ -610,7 +751,14 @@ vm_fault_zero_page(vm_page_t m, boolean_t no_zero_fill) * be destroyed when this guarantee is no longer required. * The "result_page" is also left busy. It is not removed * from the pageout queues. + * Special Case: + * A return value of VM_FAULT_SUCCESS_NO_PAGE means that the + * fault succeeded but there's no VM page (i.e. the VM object + * does not actually hold VM pages, but device memory or + * large pages). The object is still locked and we still hold a + * paging_in_progress reference. */ +unsigned int vm_fault_page_blocked_access = 0; vm_fault_return_t vm_fault_page( @@ -657,6 +805,7 @@ vm_fault_page( uint32_t try_failed_count; int interruptible; /* how may fault be interrupted? */ memory_object_t pager; + vm_fault_return_t retval; /* * MACH page map - an optional optimization where a bit map is maintained @@ -697,23 +846,15 @@ vm_fault_page( /* * Recovery actions */ -#define PREPARE_RELEASE_PAGE(m) \ - MACRO_BEGIN \ - vm_page_lock_queues(); \ - MACRO_END - -#define DO_RELEASE_PAGE(m) \ - MACRO_BEGIN \ - PAGE_WAKEUP_DONE(m); \ - if (!m->active && !m->inactive && !m->throttled)\ - vm_page_activate(m); \ - vm_page_unlock_queues(); \ - MACRO_END - #define RELEASE_PAGE(m) \ MACRO_BEGIN \ - PREPARE_RELEASE_PAGE(m); \ - DO_RELEASE_PAGE(m); \ + PAGE_WAKEUP_DONE(m); \ + if (!m->active && !m->inactive && !m->throttled) { \ + vm_page_lockspin_queues(); \ + if (!m->active && !m->inactive && !m->throttled) \ + vm_page_activate(m); \ + vm_page_unlock_queues(); \ + } \ MACRO_END #if TRACEFAULTPAGE @@ -776,7 +917,7 @@ vm_fault_page( XPR(XPR_VM_FAULT, "vm_f_page: obj 0x%X, offset 0x%X, type %d, prot %d\n", - (integer_t)object, offset, fault_type, *protection, 0); + object, offset, fault_type, *protection, 0); /* * default type of fault @@ -798,6 +939,35 @@ vm_fault_page( return (VM_FAULT_MEMORY_ERROR); } + if (!object->pager_created && object->phys_contiguous) { + /* + * A physically-contiguous object without a pager: + * must be a "large page" object. We do not deal + * with VM pages for this object. + */ + m = VM_PAGE_NULL; + goto phys_contig_object; + } + + if (object->blocked_access) { + /* + * Access to this VM object has been blocked. + * Replace our "paging_in_progress" reference with + * a "activity_in_progress" reference and wait for + * access to be unblocked. + */ + vm_object_activity_begin(object); + vm_object_paging_end(object); + while (object->blocked_access) { + vm_object_sleep(object, + VM_OBJECT_EVENT_UNBLOCKED, + THREAD_UNINT); + } + vm_fault_page_blocked_access++; + vm_object_paging_begin(object); + vm_object_activity_end(object); + } + /* * See whether the page at 'offset' is resident */ @@ -823,8 +993,8 @@ vm_fault_page( wait_result = PAGE_SLEEP(object, m, interruptible); XPR(XPR_VM_FAULT, "vm_f_page: block busy obj 0x%X, offset 0x%X, page 0x%X\n", - (integer_t)object, offset, - (integer_t)m, 0, 0); + object, offset, + m, 0, 0); counter(c_vm_fault_page_block_busy_kernel++); if (wait_result != THREAD_AWAKENED) { @@ -938,9 +1108,9 @@ vm_fault_page( XPR(XPR_VM_FAULT, "vm_f_page: zero obj 0x%X, off 0x%X, page 0x%X, first_obj 0x%X\n", - (integer_t)object, offset, - (integer_t)m, - (integer_t)first_object, 0); + object, offset, + m, + first_object, 0); if (object != first_object) { /* @@ -998,8 +1168,8 @@ vm_fault_page( } XPR(XPR_VM_FAULT, "vm_f_page: unavail obj 0x%X, off 0x%X, next_obj 0x%X, newoff 0x%X\n", - (integer_t)object, offset, - (integer_t)next_object, + object, offset, + next_object, offset+object->shadow_offset,0); offset += object->shadow_offset; @@ -1038,8 +1208,8 @@ vm_fault_page( #endif XPR(XPR_VM_FAULT, "vm_f_page: cleaning obj 0x%X, offset 0x%X, page 0x%X\n", - (integer_t)object, offset, - (integer_t)m, 0, 0); + object, offset, + m, 0, 0); /* * take an extra ref so that object won't die */ @@ -1070,7 +1240,8 @@ vm_fault_page( return (VM_FAULT_RETRY); } } - if (type_of_fault == NULL && m->speculative) { + if (type_of_fault == NULL && m->speculative && + !(fault_info != NULL && fault_info->stealth)) { /* * If we were passed a non-NULL pointer for * "type_of_fault", than we came from @@ -1081,6 +1252,10 @@ vm_fault_page( * take it off the speculative queue, we'll * let the caller of vm_fault_page deal * with getting it onto the correct queue + * + * If the caller specified in fault_info that + * it wants a "stealth" fault, we also leave + * the page in the speculative queue. */ vm_page_lockspin_queues(); VM_PAGE_QUEUES_REMOVE(m); @@ -1135,7 +1310,7 @@ vm_fault_page( #endif XPR(XPR_VM_FAULT, "vm_f_page: found page obj 0x%X, offset 0x%X, page 0x%X\n", - (integer_t)object, offset, (integer_t)m, 0, 0); + object, offset, m, 0, 0); assert(!m->busy); assert(!m->absent); @@ -1189,7 +1364,7 @@ vm_fault_page( XPR(XPR_VM_FAULT, "vm_f_page: ready wait obj 0x%X, offset 0x%X\n", - (integer_t)object, offset, 0, 0, 0); + object, offset, 0, 0, 0); /* * take an extra ref so object won't die @@ -1309,7 +1484,7 @@ vm_fault_page( XPR(XPR_VM_FAULT, "vm_f_page: data_req obj 0x%X, offset 0x%X, page 0x%X, acc %d\n", - (integer_t)object, offset, (integer_t)m, + object, offset, m, access_required | wants_copy_flag, 0); /* @@ -1335,6 +1510,13 @@ vm_fault_page( return ((rc == MACH_SEND_INTERRUPTED) ? VM_FAULT_INTERRUPTED : VM_FAULT_MEMORY_ERROR); + } else { + clock_sec_t tv_sec; + clock_usec_t tv_usec; + + clock_get_system_microtime(&tv_sec, &tv_usec); + current_thread()->t_page_creation_time = tv_sec; + current_thread()->t_page_creation_count = 0; } if ((interruptible != THREAD_UNINT) && (current_thread()->sched_mode & TH_MODE_ABORT)) { @@ -1357,7 +1539,8 @@ vm_fault_page( * page fault against the object's new backing * store (different memory object). */ - break; + phys_contig_object: + goto done; } /* * potentially a pagein fault @@ -1391,8 +1574,8 @@ vm_fault_page( XPR(XPR_VM_FAULT, "vm_f_page: no pager obj 0x%X, offset 0x%X, page 0x%X, next_obj 0x%X\n", - (integer_t)object, offset, (integer_t)m, - (integer_t)object->shadow, 0); + object, offset, m, + object->shadow, 0); next_object = object->shadow; @@ -1485,12 +1668,10 @@ vm_fault_page( dbgTrace(0xBEEF0015, (unsigned int) object, (unsigned int) m); /* (TEST/DEBUG) */ #endif #if EXTRA_ASSERTIONS - if (m != VM_PAGE_NULL) { - assert(m->busy && !m->absent); - assert((first_m == VM_PAGE_NULL) || - (first_m->busy && !first_m->absent && - !first_m->active && !first_m->inactive)); - } + assert(m->busy && !m->absent); + assert((first_m == VM_PAGE_NULL) || + (first_m->busy && !first_m->absent && + !first_m->active && !first_m->inactive)); #endif /* EXTRA_ASSERTIONS */ /* @@ -1498,14 +1679,12 @@ vm_fault_page( * If we found a page, we must have decrypted it before we * get here... */ - if (m != VM_PAGE_NULL) { - ASSERT_PAGE_DECRYPTED(m); - } + ASSERT_PAGE_DECRYPTED(m); XPR(XPR_VM_FAULT, "vm_f_page: FOUND obj 0x%X, off 0x%X, page 0x%X, 1_obj 0x%X, 1_m 0x%X\n", - (integer_t)object, offset, (integer_t)m, - (integer_t)first_object, (integer_t)first_m); + object, offset, m, + first_object, first_m); /* * If the page is being written, but isn't @@ -1513,7 +1692,7 @@ vm_fault_page( * we have to copy it into a new page owned * by the top-level object. */ - if ((object != first_object) && (m != VM_PAGE_NULL)) { + if (object != first_object) { #if TRACEFAULTPAGE dbgTrace(0xBEEF0016, (unsigned int) object, (unsigned int) fault_type); /* (TEST/DEBUG) */ @@ -1577,8 +1756,8 @@ vm_fault_page( } XPR(XPR_VM_FAULT, "vm_f_page: page_copy obj 0x%X, offset 0x%X, m 0x%X, copy_m 0x%X\n", - (integer_t)object, offset, - (integer_t)m, (integer_t)copy_m, 0); + object, offset, + m, copy_m, 0); vm_page_copy(m, copy_m); @@ -1653,7 +1832,7 @@ vm_fault_page( */ try_failed_count = 0; - while ((copy_object = first_object->copy) != VM_OBJECT_NULL && (m != VM_PAGE_NULL)) { + while ((copy_object = first_object->copy) != VM_OBJECT_NULL) { vm_object_offset_t copy_offset; vm_page_t copy_m; @@ -1918,14 +2097,17 @@ vm_fault_page( break; } + +done: *result_page = m; *top_page = first_m; XPR(XPR_VM_FAULT, "vm_f_page: DONE obj 0x%X, offset 0x%X, m 0x%X, first_m 0x%X\n", - (integer_t)object, offset, (integer_t)m, (integer_t)first_m, 0); + object, offset, m, first_m, 0); if (m != VM_PAGE_NULL) { + retval = VM_FAULT_SUCCESS; if (my_fault == DBG_PAGEIN_FAULT) { VM_STAT_INCR(pageins); @@ -1935,8 +2117,10 @@ vm_fault_page( if (m->object->internal) { DTRACE_VM2(anonpgin, int, 1, (uint64_t *), NULL); + my_fault = DBG_PAGEIND_FAULT; } else { DTRACE_VM2(fspgin, int, 1, (uint64_t *), NULL); + my_fault = DBG_PAGEINV_FAULT; } /* @@ -1950,15 +2134,18 @@ vm_fault_page( } if (type_of_fault) *type_of_fault = my_fault; - } else - vm_object_unlock(object); + } else { + retval = VM_FAULT_SUCCESS_NO_VM_PAGE; + assert(first_m == VM_PAGE_NULL); + assert(object == first_object); + } thread_interrupt_level(interruptible_state); #if TRACEFAULTPAGE dbgTrace(0xBEEF001A, (unsigned int) VM_FAULT_SUCCESS, 0); /* (TEST/DEBUG) */ #endif - return (VM_FAULT_SUCCESS); + return retval; backoff: thread_interrupt_level(interruptible_state); @@ -2011,10 +2198,12 @@ vm_fault_enter(vm_page_t m, unsigned int cache_attr; kern_return_t kr; boolean_t previously_pmapped = m->pmapped; - + boolean_t must_disconnect = 0; + boolean_t map_is_switched, map_is_switch_protected; + vm_object_lock_assert_held(m->object); #if DEBUG - mutex_assert(&vm_page_queue_lock, MA_NOTOWNED); + lck_mtx_assert(&vm_page_queue_lock, LCK_MTX_ASSERT_NOTOWNED); #endif /* DEBUG */ if (m->phys_page == vm_page_guard_addr) { @@ -2048,13 +2237,13 @@ vm_fault_enter(vm_page_t m, if (m->object->internal) { DTRACE_VM2(anonpgin, int, 1, (uint64_t *), NULL); + *type_of_fault = DBG_PAGEIND_FAULT; } else { DTRACE_VM2(fspgin, int, 1, (uint64_t *), NULL); + *type_of_fault = DBG_PAGEINV_FAULT; } current_task()->pageins++; - - *type_of_fault = DBG_PAGEIN_FAULT; } VM_PAGE_CONSUME_CLUSTERED(m); @@ -2069,6 +2258,7 @@ vm_fault_enter(vm_page_t m, } } + /* Validate code signature if necessary. */ if (VM_FAULT_NEED_CS_VALIDATION(pmap, m)) { vm_object_lock_assert_exclusive(m->object); @@ -2076,52 +2266,112 @@ vm_fault_enter(vm_page_t m, vm_cs_revalidates++; } - /* VM map is locked, so 1 ref will remain on VM object */ + /* VM map is locked, so 1 ref will remain on VM object - + * so no harm if vm_page_validate_cs drops the object lock */ vm_page_validate_cs(m); } - if (m->cs_tainted /* always invalidate a tainted page */ -#if CONFIG_ENFORCE_SIGNED_CODE - /* - * Code Signing enforcement invalidates an executable page that - * has no code directory, and thus could not be validated. - */ - || ((prot & VM_PROT_EXECUTE) && !m->cs_validated ) -#endif - ) { - /* - * CODE SIGNING: - * This page has been tainted and can not be trusted. - * Let's notify the current process and let it take any - * necessary precautions before we enter the tainted page - * into its address space. - */ - kr = KERN_SUCCESS; -#if CONFIG_ENFORCE_SIGNED_CODE - if (!cs_enforcement_disable) { -#endif - if (cs_invalid_page((addr64_t) vaddr)) { - /* reject the tainted page: abort the page fault */ - kr = KERN_MEMORY_ERROR; - cs_enter_tainted_rejected++; - } else { - /* proceed with the tainted page */ - kr = KERN_SUCCESS; - cs_enter_tainted_accepted++; - } -#if CONFIG_ENFORCE_SIGNED_CODE +#define page_immutable(m,prot) ((m)->cs_validated /*&& ((prot) & VM_PROT_EXECUTE)*/) + + map_is_switched = ((pmap != vm_map_pmap(current_task()->map)) && + (pmap == vm_map_pmap(current_thread()->map))); + map_is_switch_protected = current_thread()->map->switch_protect; + + /* If the map is switched, and is switch-protected, we must protect + * some pages from being write-faulted: immutable pages because by + * definition they may not be written, and executable pages because that + * would provide a way to inject unsigned code. + * If the page is immutable, we can simply return. However, we can't + * immediately determine whether a page is executable anywhere. But, + * we can disconnect it everywhere and remove the executable protection + * from the current map. We do that below right before we do the + * PMAP_ENTER. + */ + if(!cs_enforcement_disable && map_is_switched && + map_is_switch_protected && page_immutable(m, prot) && + (prot & VM_PROT_WRITE)) + { + return KERN_CODESIGN_ERROR; + } + + /* A page could be tainted, or pose a risk of being tainted later. + * Check whether the receiving process wants it, and make it feel + * the consequences (that hapens in cs_invalid_page()). + * For CS Enforcement, two other conditions will + * cause that page to be tainted as well: + * - pmapping an unsigned page executable - this means unsigned code; + * - writeable mapping of a validated page - the content of that page + * can be changed without the kernel noticing, therefore unsigned + * code can be created + */ + if (m->cs_tainted || + ( !cs_enforcement_disable && + (/* The page is unsigned and wants to be executable */ + (!m->cs_validated && (prot & VM_PROT_EXECUTE)) || + /* The page should be immutable, but is in danger of being modified + * This is the case where we want policy from the code directory - + * is the page immutable or not? For now we have to assume that + * code pages will be immutable, data pages not. + * We'll assume a page is a code page if it has a code directory + * and we fault for execution. + * That is good enough since if we faulted the code page for + * writing in another map before, it is wpmapped; if we fault + * it for writing in this map later it will also be faulted for executing + * at the same time; and if we fault for writing in another map + * later, we will disconnect it from this pmap so we'll notice + * the change. + */ + (page_immutable(m, prot) && ((prot & VM_PROT_WRITE) || m->wpmapped)) + )) + ) + { + /* We will have a tainted page. Have to handle the special case + * of a switched map now. If the map is not switched, standard + * procedure applies - call cs_invalid_page(). + * If the map is switched, the real owner is invalid already. + * There is no point in invalidating the switching process since + * it will not be executing from the map. So we don't call + * cs_invalid_page() in that case. */ + boolean_t reject_page; + if(map_is_switched) { + assert(pmap==vm_map_pmap(current_thread()->map)); + assert(!(prot & VM_PROT_WRITE) || (map_is_switch_protected == FALSE)); + reject_page = FALSE; + } else { + reject_page = cs_invalid_page((addr64_t) vaddr); + } + + if (reject_page) { + /* reject the tainted page: abort the page fault */ + kr = KERN_CODESIGN_ERROR; + cs_enter_tainted_rejected++; + } else { + /* proceed with the tainted page */ + kr = KERN_SUCCESS; + /* Page might have been tainted before or not; now it + * definitively is. If the page wasn't tainted, we must + * disconnect it from all pmaps later. */ + must_disconnect = ~m->cs_tainted; + m->cs_tainted = TRUE; + cs_enter_tainted_accepted++; } -#endif if (cs_debug || kr != KERN_SUCCESS) { printf("CODESIGNING: vm_fault_enter(0x%llx): " "page %p obj %p off 0x%llx *** INVALID PAGE ***\n", (long long)vaddr, m, m->object, m->offset); } + } else { /* proceed with the valid page */ kr = KERN_SUCCESS; } + /* If we have a KERN_SUCCESS from the previous checks, we either have + * a good page, or a tainted page that has been accepted by the process. + * In both cases the page will be entered into the pmap. + * If the page is writeable, we need to disconnect it from other pmaps + * now so those processes can take note. + */ if (kr == KERN_SUCCESS) { /* * NOTE: we may only hold the vm_object lock SHARED @@ -2136,8 +2386,21 @@ vm_fault_enter(vm_page_t m, if (prot & VM_PROT_WRITE) { vm_object_lock_assert_exclusive(m->object); m->wpmapped = TRUE; + if(must_disconnect) { + /* We can only get here + * because of the CSE logic */ + assert(cs_enforcement_disable == FALSE); + pmap_disconnect(m->phys_page); + /* If we are faulting for a write, we can clear + * the execute bit - that will ensure the page is + * checked again before being executable, which + * protects against a map switch. + * This only happens the first time the page + * gets tainted, so we won't get stuck here + * to make an already writeable page executable. */ + prot &= ~VM_PROT_EXECUTE; + } } - PMAP_ENTER(pmap, vaddr, m, prot, cache_attr, wired); } @@ -2160,16 +2423,61 @@ vm_fault_enter(vm_page_t m, } else { if (kr != KERN_SUCCESS) { - vm_page_lock_queues(); + vm_page_lockspin_queues(); vm_page_deactivate(m); vm_page_unlock_queues(); } else { - if (((!m->active && !m->inactive) || no_cache) && !m->wire_count && !m->throttled) { + if (((!m->active && !m->inactive) || no_cache) && !VM_PAGE_WIRED(m) && !m->throttled) { + + if ( vm_page_local_q && !no_cache && (*type_of_fault == DBG_COW_FAULT || *type_of_fault == DBG_ZERO_FILL_FAULT) ) { + struct vpl *lq; + uint32_t lid; + + /* + * we got a local queue to stuff this new page on... + * its safe to manipulate local and local_id at this point + * since we're behind an exclusive object lock and the + * page is not on any global queue. + * + * we'll use the current cpu number to select the queue + * note that we don't need to disable preemption... we're + * going to behind the local queue's lock to do the real + * work + */ + lid = cpu_number(); + + lq = &vm_page_local_q[lid].vpl_un.vpl; + + VPL_LOCK(&lq->vpl_lock); + + queue_enter(&lq->vpl_queue, m, vm_page_t, pageq); + m->local = TRUE; + m->local_id = lid; + lq->vpl_count++; + + VPL_UNLOCK(&lq->vpl_lock); + + if (lq->vpl_count > vm_page_local_q_soft_limit) { + /* + * we're beyond the soft limit for the local queue + * vm_page_reactivate_local will 'try' to take + * the global page queue lock... if it can't that's + * ok... we'll let the queue continue to grow up + * to the hard limit... at that point we'll wait + * for the lock... once we've got the lock, we'll + * transfer all of the pages from the local queue + * to the global active queue + */ + vm_page_reactivate_local(lid, FALSE, FALSE); + } + return kr; + } + vm_page_lockspin_queues(); /* * test again now that we hold the page queue lock */ - if (((!m->active && !m->inactive) || no_cache) && !m->wire_count) { + if (((!m->active && !m->inactive) || no_cache) && !VM_PAGE_WIRED(m)) { /* * If this is a no_cache mapping and the page has never been @@ -2276,6 +2584,7 @@ vm_fault( return (KERN_FAILURE); } + interruptible_state = thread_interrupt_level(interruptible); VM_STAT_INCR(faults); @@ -2317,6 +2626,7 @@ vm_fault( } pmap = real_map->pmap; fault_info.interruptible = interruptible; + fault_info.stealth = FALSE; /* * If the page is wired, we must fault for the current protection @@ -2394,6 +2704,18 @@ vm_fault( cur_offset = offset; while (TRUE) { + if (!cur_object->pager_created && + cur_object->phys_contiguous) /* superpage */ + break; + + if (cur_object->blocked_access) { + /* + * Access to this VM object has been blocked. + * Let the slow path handle it. + */ + break; + } + m = vm_page_lookup(cur_object, cur_offset); if (m != VM_PAGE_NULL) { @@ -2476,6 +2798,17 @@ vm_fault( */ break; } + if (VM_OBJECT_PURGEABLE_FAULT_ERROR(m->object)) { + if (object != cur_object) + vm_object_unlock(object); + vm_map_unlock_read(map); + if (real_map != map) + vm_map_unlock(real_map); + vm_object_unlock(cur_object); + kr = KERN_MEMORY_ERROR; + goto done; + } + if (m->encrypted) { /* * ENCRYPTED SWAP: @@ -2696,7 +3029,7 @@ vm_fault( if (need_collapse == TRUE) vm_object_collapse(object, offset, TRUE); - if (type_of_fault == DBG_PAGEIN_FAULT) { + if (type_of_fault == DBG_PAGEIND_FAULT || type_of_fault == DBG_PAGEINV_FAULT || type_of_fault == DBG_CACHE_HIT_FAULT) { /* * evaluate access pattern and update state * vm_fault_deactivate_behind depends on the @@ -2722,7 +3055,34 @@ vm_fault( } /* * COPY ON WRITE FAULT - * + */ + assert(object_lock_type == OBJECT_LOCK_EXCLUSIVE); + + if (vm_page_throttled()) { + /* + * drop all of our locks... + * wait until the free queue is + * pumped back up and then + * redrive the fault + */ + if (object != cur_object) + vm_object_unlock(cur_object); + vm_object_unlock(object); + vm_map_unlock_read(map); + if (real_map != map) + vm_map_unlock(real_map); + + if (NEED_TO_HARD_THROTTLE_THIS_TASK()) + delay(HARD_THROTTLE_DELAY); + + if (!current_thread_aborted() && vm_page_wait((change_wiring) ? + THREAD_UNINT : + THREAD_ABORTSAFE)) + goto RetryFault; + kr = KERN_ABORTED; + goto done; + } + /* * If objects match, then * object->copy must not be NULL (else control * would be in previous code block), and we @@ -2736,8 +3096,6 @@ vm_fault( */ break; } - assert(object_lock_type == OBJECT_LOCK_EXCLUSIVE); - /* * This is now a shadow based copy on write * fault -- it requires a copy up the shadow @@ -2842,8 +3200,9 @@ vm_fault( * Zero fill fault. Page gets * inserted into the original object. */ - if (cur_object->shadow_severed) { - + if (cur_object->shadow_severed || + VM_OBJECT_PURGEABLE_FAULT_ERROR(cur_object)) + { if (object != cur_object) vm_object_unlock(cur_object); vm_object_unlock(object); @@ -2855,7 +3214,7 @@ vm_fault( kr = KERN_MEMORY_ERROR; goto done; } - if (VM_PAGE_ZFILL_THROTTLED()) { + if (vm_page_throttled()) { /* * drop all of our locks... * wait until the free queue is @@ -2869,11 +3228,13 @@ vm_fault( if (real_map != map) vm_map_unlock(real_map); - if (vm_page_wait((change_wiring) ? + if (NEED_TO_HARD_THROTTLE_THIS_TASK()) + delay(HARD_THROTTLE_DELAY); + + if (!current_thread_aborted() && vm_page_wait((change_wiring) ? THREAD_UNINT : THREAD_ABORTSAFE)) goto RetryFault; - kr = KERN_ABORTED; goto done; } @@ -3014,14 +3375,14 @@ vm_fault( * if kr == VM_FAULT_SUCCESS, then the paging reference * is still held along with the ref_count on the original object * - * if m != NULL, then the object it belongs to - * is returned locked with a paging reference + * the object is returned locked with a paging reference * * if top_page != NULL, then it's BUSY and the * object it belongs to has a paging reference * but is returned unlocked */ - if (kr != VM_FAULT_SUCCESS) { + if (kr != VM_FAULT_SUCCESS && + kr != VM_FAULT_SUCCESS_NO_VM_PAGE) { /* * we didn't succeed, lose the object reference immediately. */ @@ -3050,6 +3411,9 @@ vm_fault( else kr = KERN_MEMORY_ERROR; goto done; + default: + panic("vm_fault: unexpected error 0x%x from " + "vm_fault_page()\n", kr); } } m = result_page; @@ -3067,10 +3431,12 @@ vm_fault( #define RELEASE_PAGE(m) \ MACRO_BEGIN \ PAGE_WAKEUP_DONE(m); \ - vm_page_lockspin_queues(); \ - if (!m->active && !m->inactive && !m->throttled)\ - vm_page_activate(m); \ - vm_page_unlock_queues(); \ + if (!m->active && !m->inactive && !m->throttled) { \ + vm_page_lockspin_queues(); \ + if (!m->active && !m->inactive && !m->throttled) \ + vm_page_activate(m); \ + vm_page_unlock_queues(); \ + } \ MACRO_END /* @@ -3080,8 +3446,10 @@ vm_fault( if (m != VM_PAGE_NULL) { old_copy_object = m->object->copy; vm_object_unlock(m->object); - } else + } else { old_copy_object = VM_OBJECT_NULL; + vm_object_unlock(object); + } /* * no object locks are held at this point @@ -3327,26 +3695,29 @@ vm_fault( (entry->object.vm_object != NULL) && (entry->object.vm_object == object)) { + int superpage = (!object->pager_created && object->phys_contiguous)? VM_MEM_SUPERPAGE : 0; if (caller_pmap) { /* * Set up a block mapped area */ + assert((uint32_t)((ldelta + hdelta) >> 12) == ((ldelta + hdelta) >> 12)); pmap_map_block(caller_pmap, (addr64_t)(caller_pmap_addr - ldelta), - (((vm_map_offset_t) (entry->object.vm_object->shadow_offset)) + - entry->offset + (laddr - entry->vme_start) - ldelta) >> 12, - ((ldelta + hdelta) >> 12), prot, - (VM_WIMG_MASK & (int)object->wimg_bits), 0); + (ppnum_t)((((vm_map_offset_t) (entry->object.vm_object->shadow_offset)) + + entry->offset + (laddr - entry->vme_start) - ldelta) >> 12), + (uint32_t)((ldelta + hdelta) >> 12), prot, + (VM_WIMG_MASK & (int)object->wimg_bits) | superpage, 0); } else { /* * Set up a block mapped area */ + assert((uint32_t)((ldelta + hdelta) >> 12) == ((ldelta + hdelta) >> 12)); pmap_map_block(real_map->pmap, (addr64_t)(vaddr - ldelta), - (((vm_map_offset_t)(entry->object.vm_object->shadow_offset)) + - entry->offset + (laddr - entry->vme_start) - ldelta) >> 12, - ((ldelta + hdelta) >> 12), prot, - (VM_WIMG_MASK & (int)object->wimg_bits), 0); + (ppnum_t)((((vm_map_offset_t)(entry->object.vm_object->shadow_offset)) + + entry->offset + (laddr - entry->vme_start) - ldelta) >> 12), + (uint32_t)((ldelta + hdelta) >> 12), prot, + (VM_WIMG_MASK & (int)object->wimg_bits) | superpage, 0); } } } @@ -3484,6 +3855,7 @@ vm_fault_unwire( fault_info.lo_offset = entry->offset; fault_info.hi_offset = (entry->vme_end - entry->vme_start) + entry->offset; fault_info.no_cache = entry->no_cache; + fault_info.stealth = TRUE; /* * Since the pages are wired down, we must be able to @@ -3506,7 +3878,13 @@ vm_fault_unwire( vm_object_t result_object; vm_fault_return_t result; - fault_info.cluster_size = end_addr - va; + if (end_addr - va > (vm_size_t) -1) { + /* 32-bit overflow */ + fault_info.cluster_size = (vm_size_t) (0 - PAGE_SIZE); + } else { + fault_info.cluster_size = (vm_size_t) (end_addr - va); + assert(fault_info.cluster_size == end_addr - va); + } do { prot = VM_PROT_NONE; @@ -3554,9 +3932,16 @@ vm_fault_unwire( pmap_disconnect(result_page->phys_page); VM_PAGE_FREE(result_page); } else { - vm_page_lockspin_queues(); - vm_page_unwire(result_page); - vm_page_unlock_queues(); + if (VM_PAGE_WIRED(result_page)) { + vm_page_lockspin_queues(); + vm_page_unwire(result_page); + vm_page_unlock_queues(); + } + if(entry->zero_wired_pages) { + pmap_zero_page(result_page->phys_page); + entry->zero_wired_pages = FALSE; + } + PAGE_WAKEUP_DONE(result_page); } vm_fault_cleanup(result_object, top_page); @@ -3774,10 +4159,12 @@ vm_fault_copy_cleanup( vm_object_lock(object); PAGE_WAKEUP_DONE(page); - vm_page_lockspin_queues(); - if (!page->active && !page->inactive && !page->throttled) - vm_page_activate(page); - vm_page_unlock_queues(); + if (!page->active && !page->inactive && !page->throttled) { + vm_page_lockspin_queues(); + if (!page->active && !page->inactive && !page->throttled) + vm_page_activate(page); + vm_page_unlock_queues(); + } vm_fault_cleanup(object, top_page); } @@ -3849,6 +4236,7 @@ vm_fault_copy( vm_map_size_t amount_left; vm_object_t old_copy_object; kern_return_t error = 0; + vm_fault_return_t result; vm_map_size_t part_size; struct vm_object_fault_info fault_info_src; @@ -3873,6 +4261,7 @@ vm_fault_copy( fault_info_src.lo_offset = vm_object_trunc_page(src_offset); fault_info_src.hi_offset = fault_info_src.lo_offset + amount_left; fault_info_src.no_cache = FALSE; + fault_info_src.stealth = TRUE; fault_info_dst.interruptible = interruptible; fault_info_dst.behavior = VM_BEHAVIOR_SEQUENTIAL; @@ -3880,6 +4269,7 @@ vm_fault_copy( fault_info_dst.lo_offset = vm_object_trunc_page(dst_offset); fault_info_dst.hi_offset = fault_info_dst.lo_offset + amount_left; fault_info_dst.no_cache = FALSE; + fault_info_dst.stealth = TRUE; do { /* while (amount_left > 0) */ /* @@ -3896,18 +4286,25 @@ vm_fault_copy( vm_object_lock(dst_object); vm_object_paging_begin(dst_object); - fault_info_dst.cluster_size = amount_left; + if (amount_left > (vm_size_t) -1) { + /* 32-bit overflow */ + fault_info_dst.cluster_size = (vm_size_t) (0 - PAGE_SIZE); + } else { + fault_info_dst.cluster_size = (vm_size_t) amount_left; + assert(fault_info_dst.cluster_size == amount_left); + } XPR(XPR_VM_FAULT,"vm_fault_copy -> vm_fault_page\n",0,0,0,0,0); - switch (vm_fault_page(dst_object, - vm_object_trunc_page(dst_offset), - VM_PROT_WRITE|VM_PROT_READ, - FALSE, - &dst_prot, &dst_page, &dst_top_page, - (int *)0, - &error, - dst_map->no_zero_fill, - FALSE, &fault_info_dst)) { + result = vm_fault_page(dst_object, + vm_object_trunc_page(dst_offset), + VM_PROT_WRITE|VM_PROT_READ, + FALSE, + &dst_prot, &dst_page, &dst_top_page, + (int *)0, + &error, + dst_map->no_zero_fill, + FALSE, &fault_info_dst); + switch (result) { case VM_FAULT_SUCCESS: break; case VM_FAULT_RETRY: @@ -3918,11 +4315,19 @@ vm_fault_copy( /* fall thru */ case VM_FAULT_INTERRUPTED: RETURN(MACH_SEND_INTERRUPTED); + case VM_FAULT_SUCCESS_NO_VM_PAGE: + /* success but no VM page: fail the copy */ + vm_object_paging_end(dst_object); + vm_object_unlock(dst_object); + /*FALLTHROUGH*/ case VM_FAULT_MEMORY_ERROR: if (error) return (error); else return(KERN_MEMORY_ERROR); + default: + panic("vm_fault_copy: unexpected error 0x%x from " + "vm_fault_page()\n", result); } assert ((dst_prot & VM_PROT_WRITE) != VM_PROT_NONE); @@ -3973,20 +4378,27 @@ vm_fault_copy( src_prot = VM_PROT_READ; vm_object_paging_begin(src_object); - fault_info_src.cluster_size = amount_left; + if (amount_left > (vm_size_t) -1) { + /* 32-bit overflow */ + fault_info_src.cluster_size = (vm_size_t) (0 - PAGE_SIZE); + } else { + fault_info_src.cluster_size = (vm_size_t) amount_left; + assert(fault_info_src.cluster_size == amount_left); + } XPR(XPR_VM_FAULT, "vm_fault_copy(2) -> vm_fault_page\n", 0,0,0,0,0); - switch (vm_fault_page( - src_object, - vm_object_trunc_page(src_offset), - VM_PROT_READ, FALSE, - &src_prot, - &result_page, &src_top_page, - (int *)0, &error, FALSE, - FALSE, &fault_info_src)) { - + result = vm_fault_page( + src_object, + vm_object_trunc_page(src_offset), + VM_PROT_READ, FALSE, + &src_prot, + &result_page, &src_top_page, + (int *)0, &error, FALSE, + FALSE, &fault_info_src); + + switch (result) { case VM_FAULT_SUCCESS: break; case VM_FAULT_RETRY: @@ -3998,12 +4410,21 @@ vm_fault_copy( case VM_FAULT_INTERRUPTED: vm_fault_copy_dst_cleanup(dst_page); RETURN(MACH_SEND_INTERRUPTED); + case VM_FAULT_SUCCESS_NO_VM_PAGE: + /* success but no VM page: fail */ + vm_object_paging_end(src_object); + vm_object_unlock(src_object); + /*FALLTHROUGH*/ case VM_FAULT_MEMORY_ERROR: vm_fault_copy_dst_cleanup(dst_page); if (error) return (error); else return(KERN_MEMORY_ERROR); + default: + panic("vm_fault_copy(2): unexpected " + "error 0x%x from " + "vm_fault_page()\n", result); } @@ -4058,11 +4479,20 @@ vm_fault_copy( } if (result_page == VM_PAGE_NULL) { + assert((vm_offset_t) dst_po == dst_po); + assert((vm_size_t) part_size == part_size); vm_page_part_zero_fill(dst_page, - dst_po, part_size); + (vm_offset_t) dst_po, + (vm_size_t) part_size); } else { - vm_page_part_copy(result_page, src_po, - dst_page, dst_po, part_size); + assert((vm_offset_t) src_po == src_po); + assert((vm_offset_t) dst_po == dst_po); + assert((vm_size_t) part_size == part_size); + vm_page_part_copy(result_page, + (vm_offset_t) src_po, + dst_page, + (vm_offset_t) dst_po, + (vm_size_t)part_size); if(!dst_page->dirty){ vm_object_lock(dst_object); dst_page->dirty = TRUE; @@ -4260,7 +4690,7 @@ vm_page_validate_cs_mapped( assert(object->pager_ready); pager = object->pager; - + assert(object->paging_in_progress); kr = vnode_pager_get_object_cs_blobs(pager, &blobs); if (kr != KERN_SUCCESS) { blobs = NULL; diff --git a/osfmk/vm/vm_fault.h b/osfmk/vm/vm_fault.h index 439af2540..855100338 100644 --- a/osfmk/vm/vm_fault.h +++ b/osfmk/vm/vm_fault.h @@ -81,6 +81,7 @@ typedef kern_return_t vm_fault_return_t; #define VM_FAULT_MEMORY_SHORTAGE 3 #define VM_FAULT_FICTITIOUS_SHORTAGE 4 #define VM_FAULT_MEMORY_ERROR 5 +#define VM_FAULT_SUCCESS_NO_VM_PAGE 6 /* success but no VM page */ /* * Page fault handling based on vm_map (or entries therein) diff --git a/osfmk/vm/vm_init.c b/osfmk/vm/vm_init.c index f5e05931e..8180254b2 100644 --- a/osfmk/vm/vm_init.c +++ b/osfmk/vm/vm_init.c @@ -66,6 +66,7 @@ #include #include #include +#include #include #include #include @@ -83,7 +84,7 @@ /* Maximum zone size is 1.5G */ #define ZONE_MAP_MAX (1024 * 1024 * 1536) -const vm_offset_t vm_min_kernel_address = VM_MIN_KERNEL_ADDRESS; +const vm_offset_t vm_min_kernel_address = VM_MIN_KERNEL_AND_KEXT_ADDRESS; const vm_offset_t vm_max_kernel_address = VM_MAX_KERNEL_ADDRESS; boolean_t vm_kernel_ready = FALSE; @@ -140,11 +141,21 @@ vm_mem_bootstrap(void) zsize = sane_size >> 2; /* Get target zone size as 1/4 of physical memory */ } - if(zsize < ZONE_MAP_MIN) zsize = ZONE_MAP_MIN; /* Clamp to min */ - if(zsize > ZONE_MAP_MAX) zsize = ZONE_MAP_MAX; /* Clamp to max */ + if (zsize < ZONE_MAP_MIN) + zsize = ZONE_MAP_MIN; /* Clamp to min */ + if (zsize > sane_size >> 1) + zsize = sane_size >> 1; /* Clamp to half of RAM max */ +#if !__LP64__ + if (zsize > ZONE_MAP_MAX) + zsize = ZONE_MAP_MAX; /* Clamp to 1.5GB max for K32 */ +#endif /* !__LP64__ */ + + vm_mem_bootstrap_kprintf(("vm_mem_bootstrap: calling kext_alloc_init\n")); + kext_alloc_init(); vm_mem_bootstrap_kprintf(("vm_mem_bootstrap: calling zone_init\n")); - zone_init(zsize); /* Allocate address space for zones */ + assert((vm_size_t) zsize == zsize); + zone_init((vm_size_t) zsize); /* Allocate address space for zones */ vm_mem_bootstrap_kprintf(("vm_mem_bootstrap: calling kalloc_init\n")); kalloc_init(); diff --git a/osfmk/vm/vm_kern.c b/osfmk/vm/vm_kern.c index f8b306855..ee3c02b65 100644 --- a/osfmk/vm/vm_kern.c +++ b/osfmk/vm/vm_kern.c @@ -112,6 +112,7 @@ kmem_alloc_contig( vm_size_t size, vm_offset_t mask, ppnum_t max_pnum, + ppnum_t pnum_mask, int flags) { vm_object_t object; @@ -123,7 +124,7 @@ kmem_alloc_contig( vm_page_t m, pages; kern_return_t kr; - if (map == VM_MAP_NULL || (flags && (flags ^ KMA_KOBJECT))) + if (map == VM_MAP_NULL || (flags & ~(KMA_KOBJECT | KMA_LOMEM | KMA_NOPAGEWAIT))) return KERN_INVALID_ARGUMENT; if (size == 0) { @@ -154,13 +155,13 @@ kmem_alloc_contig( entry->object.vm_object = object; entry->offset = offset = (object == kernel_object) ? - map_addr - VM_MIN_KERNEL_ADDRESS : 0; + map_addr : 0; /* Take an extra object ref in case the map entry gets deleted */ vm_object_reference(object); vm_map_unlock(map); - kr = cpm_allocate(CAST_DOWN(vm_size_t, map_size), &pages, max_pnum, FALSE); + kr = cpm_allocate(CAST_DOWN(vm_size_t, map_size), &pages, max_pnum, pnum_mask, FALSE, flags); if (kr != KERN_SUCCESS) { vm_map_remove(map, vm_map_trunc_page(map_addr), @@ -198,7 +199,8 @@ kmem_alloc_contig( if (object == kernel_object) vm_map_simplify(map, map_addr); - *addrp = map_addr; + *addrp = (vm_offset_t) map_addr; + assert((vm_map_offset_t) *addrp == map_addr); return KERN_SUCCESS; } @@ -229,13 +231,18 @@ kernel_memory_allocate( { vm_object_t object; vm_object_offset_t offset; + vm_object_offset_t pg_offset; vm_map_entry_t entry; vm_map_offset_t map_addr, fill_start; vm_map_offset_t map_mask; vm_map_size_t map_size, fill_size; - vm_map_size_t i; kern_return_t kr; vm_page_t mem; + vm_page_t guard_page_list = NULL; + vm_page_t wired_page_list = NULL; + int guard_page_count = 0; + int wired_page_count = 0; + int i; int vm_alloc_flags; if (! vm_kernel_ready) { @@ -257,6 +264,16 @@ kernel_memory_allocate( map_mask = (vm_map_offset_t) mask; vm_alloc_flags = 0; + + /* + * limit the size of a single extent of wired memory + * to try and limit the damage to the system if + * too many pages get wired down + */ + if (map_size > (1 << 30)) { + return KERN_RESOURCE_SHORTAGE; + } + /* * Guard pages: * @@ -274,6 +291,7 @@ kernel_memory_allocate( fill_start = 0; fill_size = map_size; + if (flags & KMA_GUARD_FIRST) { vm_alloc_flags |= VM_FLAGS_GUARD_BEFORE; fill_start += PAGE_SIZE_64; @@ -283,6 +301,7 @@ kernel_memory_allocate( *addrp = 0; return KERN_INVALID_ARGUMENT; } + guard_page_count++; } if (flags & KMA_GUARD_LAST) { vm_alloc_flags |= VM_FLAGS_GUARD_AFTER; @@ -292,6 +311,53 @@ kernel_memory_allocate( *addrp = 0; return KERN_INVALID_ARGUMENT; } + guard_page_count++; + } + wired_page_count = (int) (fill_size / PAGE_SIZE_64); + assert(wired_page_count * PAGE_SIZE_64 == fill_size); + + for (i = 0; i < guard_page_count; i++) { + for (;;) { + mem = vm_page_grab_guard(); + + if (mem != VM_PAGE_NULL) + break; + if (flags & KMA_NOPAGEWAIT) { + kr = KERN_RESOURCE_SHORTAGE; + goto out; + } + vm_page_more_fictitious(); + } + mem->pageq.next = (queue_entry_t)guard_page_list; + guard_page_list = mem; + } + + for (i = 0; i < wired_page_count; i++) { + uint64_t unavailable; + + for (;;) { + if (flags & KMA_LOMEM) + mem = vm_page_grablo(); + else + mem = vm_page_grab(); + + if (mem != VM_PAGE_NULL) + break; + + if (flags & KMA_NOPAGEWAIT) { + kr = KERN_RESOURCE_SHORTAGE; + goto out; + } + unavailable = (vm_page_wire_count + vm_page_free_target) * PAGE_SIZE; + + if (unavailable > max_mem || map_size > (max_mem - unavailable)) { + kr = KERN_RESOURCE_SHORTAGE; + goto out; + } + VM_PAGE_WAIT(); + } + mem->pageq.next = (queue_entry_t)wired_page_list; + wired_page_list = mem; } /* @@ -310,100 +376,85 @@ kernel_memory_allocate( vm_alloc_flags, &entry); if (KERN_SUCCESS != kr) { vm_object_deallocate(object); - return kr; + goto out; } entry->object.vm_object = object; entry->offset = offset = (object == kernel_object) ? - map_addr - VM_MIN_KERNEL_ADDRESS : 0; + map_addr : 0; - vm_object_reference(object); - vm_map_unlock(map); + entry->wired_count++; + + if (flags & KMA_PERMANENT) + entry->permanent = TRUE; + + if (object != kernel_object) + vm_object_reference(object); vm_object_lock(object); + vm_map_unlock(map); - /* - * Allocate the lower guard page if one was requested. The guard - * page extends up to fill_start which is where the real memory - * begins. - */ + pg_offset = 0; + + if (fill_start) { + if (guard_page_list == NULL) + panic("kernel_memory_allocate: guard_page_list == NULL"); + + mem = guard_page_list; + guard_page_list = (vm_page_t)mem->pageq.next; + mem->pageq.next = NULL; + + vm_page_insert(mem, object, offset + pg_offset); - for (i = 0; i < fill_start; i += PAGE_SIZE) { - for (;;) { - mem = vm_page_alloc_guard(object, offset + i); - if (mem != VM_PAGE_NULL) - break; - if (flags & KMA_NOPAGEWAIT) { - kr = KERN_RESOURCE_SHORTAGE; - goto nopage; - } - vm_object_unlock(object); - vm_page_more_fictitious(); - vm_object_lock(object); - } mem->busy = FALSE; + pg_offset += PAGE_SIZE_64; } + for (pg_offset = fill_start; pg_offset < fill_start + fill_size; pg_offset += PAGE_SIZE_64) { + if (wired_page_list == NULL) + panic("kernel_memory_allocate: wired_page_list == NULL"); - /* - * Allocate the real memory here. This extends from offset fill_start - * for fill_size bytes. - */ + mem = wired_page_list; + wired_page_list = (vm_page_t)mem->pageq.next; + mem->pageq.next = NULL; + mem->wire_count++; - for (i = fill_start; i < fill_start + fill_size; i += PAGE_SIZE) { - for (;;) { - if (flags & KMA_LOMEM) - mem = vm_page_alloclo(object, offset + i); - else - mem = vm_page_alloc(object, offset + i); - - if (mem != VM_PAGE_NULL) - break; + vm_page_insert(mem, object, offset + pg_offset); - if (flags & KMA_NOPAGEWAIT) { - kr = KERN_RESOURCE_SHORTAGE; - goto nopage; - } - vm_object_unlock(object); - VM_PAGE_WAIT(); - vm_object_lock(object); - } mem->busy = FALSE; + mem->pmapped = TRUE; + mem->wpmapped = TRUE; + + PMAP_ENTER(kernel_pmap, map_addr + pg_offset, mem, + VM_PROT_READ | VM_PROT_WRITE, object->wimg_bits & VM_WIMG_MASK, TRUE); } + if ((fill_start + fill_size) < map_size) { + if (guard_page_list == NULL) + panic("kernel_memory_allocate: guard_page_list == NULL"); - /* - * Lastly, allocate the ending guard page if requested. This starts at the ending - * address from the loop above up to the map_size that was originaly - * requested. - */ + mem = guard_page_list; + guard_page_list = (vm_page_t)mem->pageq.next; + mem->pageq.next = NULL; + + vm_page_insert(mem, object, offset + pg_offset); - for (i = fill_start + fill_size; i < map_size; i += PAGE_SIZE) { - for (;;) { - mem = vm_page_alloc_guard(object, offset + i); - if (mem != VM_PAGE_NULL) - break; - if (flags & KMA_NOPAGEWAIT) { - kr = KERN_RESOURCE_SHORTAGE; - goto nopage; - } - vm_object_unlock(object); - vm_page_more_fictitious(); - vm_object_lock(object); - } mem->busy = FALSE; } - vm_object_unlock(object); + if (guard_page_list || wired_page_list) + panic("kernel_memory_allocate: non empty list\n"); - kr = vm_map_wire(map, map_addr, map_addr + map_size, - VM_PROT_DEFAULT, FALSE); - if (kr != KERN_SUCCESS) { - vm_object_lock(object); - goto nopage; - } + vm_page_lockspin_queues(); + vm_page_wire_count += wired_page_count; + vm_page_unlock_queues(); - /* now that the page is wired, we no longer have to fear coalesce */ - vm_object_deallocate(object); + vm_object_unlock(object); + + /* + * now that the pages are wired, we no longer have to fear coalesce + */ if (object == kernel_object) vm_map_simplify(map, map_addr); + else + vm_object_deallocate(object); /* * Return the memory, not zeroed. @@ -411,13 +462,14 @@ kernel_memory_allocate( *addrp = CAST_DOWN(vm_offset_t, map_addr); return KERN_SUCCESS; -nopage: - if (object == kernel_object) - vm_object_page_remove(object, offset, offset + i); - vm_object_unlock(object); - vm_map_remove(map, map_addr, map_addr + map_size, 0); - vm_object_deallocate(object); - return KERN_RESOURCE_SHORTAGE; +out: + if (guard_page_list) + vm_page_free_list(guard_page_list, FALSE); + + if (wired_page_list) + vm_page_free_list(wired_page_list, FALSE); + + return kr; } /* @@ -516,9 +568,7 @@ kmem_realloc( for(offset = oldmapsize; offset < newmapsize; offset += PAGE_SIZE) { if ((mem = vm_page_lookup(object, offset)) != VM_PAGE_NULL) { - vm_page_lock_queues(); - vm_page_free(mem); - vm_page_unlock_queues(); + VM_PAGE_FREE(mem); } } object->size = oldmapsize; @@ -542,9 +592,7 @@ kmem_realloc( vm_object_lock(object); for(offset = oldsize; offset < newmapsize; offset += PAGE_SIZE) { if ((mem = vm_page_lookup(object, offset)) != VM_PAGE_NULL) { - vm_page_lock_queues(); - vm_page_free(mem); - vm_page_unlock_queues(); + VM_PAGE_FREE(mem); } } object->size = oldmapsize; @@ -559,7 +607,7 @@ kmem_realloc( } /* - * kmem_alloc_wired: + * kmem_alloc_kobject: * * Allocate wired-down memory in the kernel's address map * or a submap. The memory is not zero-filled. @@ -570,7 +618,7 @@ kmem_realloc( */ kern_return_t -kmem_alloc_wired( +kmem_alloc_kobject( vm_map_t map, vm_offset_t *addrp, vm_size_t size) @@ -581,7 +629,7 @@ kmem_alloc_wired( /* * kmem_alloc_aligned: * - * Like kmem_alloc_wired, except that the memory is aligned. + * Like kmem_alloc_kobject, except that the memory is aligned. * The size should be a power-of-2. */ @@ -635,7 +683,7 @@ kmem_alloc_pageable( * kmem_free: * * Release a region of kernel virtual memory allocated - * with kmem_alloc, kmem_alloc_wired, or kmem_alloc_pageable, + * with kmem_alloc, kmem_alloc_kobject, or kmem_alloc_pageable, * and return the physical pages associated with that region. */ @@ -647,8 +695,17 @@ kmem_free( { kern_return_t kr; + assert(addr >= VM_MIN_KERNEL_AND_KEXT_ADDRESS); + TRACE_MACHLEAKS(KMEM_FREE_CODE, KMEM_FREE_CODE_2, size, addr); + if(size == 0) { +#if MACH_ASSERT + printf("kmem_free called with size==0 for map: %p with addr: 0x%llx\n",map,(uint64_t)addr); +#endif + return; + } + kr = vm_map_remove(map, vm_map_trunc_page(addr), vm_map_round_page(addr + size), VM_MAP_REMOVE_KUNWIRE); @@ -748,6 +805,10 @@ kmem_remap_pages( * Enter it in the kernel pmap. The page isn't busy, * but this shouldn't be a problem because it is wired. */ + + mem->pmapped = TRUE; + mem->wpmapped = TRUE; + PMAP_ENTER(kernel_pmap, map_start, mem, protection, ((unsigned int)(mem->object->wimg_bits)) & VM_WIMG_MASK, @@ -871,26 +932,24 @@ kmem_init( * This may include inaccessible "holes" as determined by what * the machine-dependent init code includes in max_mem. */ - vm_page_wire_count = (atop_64(max_mem) - (vm_page_free_count - + vm_page_active_count - + vm_page_inactive_count)); + assert(atop_64(max_mem) == (unsigned int) atop_64(max_mem)); + vm_page_wire_count = ((unsigned int) atop_64(max_mem) - + (vm_page_free_count + + vm_page_active_count + + vm_page_inactive_count)); /* * Set the default global user wire limit which limits the amount of - * memory that can be locked via mlock(). We set this to the total number of - * pages that are potentially usable by a user app (max_mem) minus - * 1000 pages. This keeps 4MB in reserve for the kernel which will hopefully be - * enough to avoid memory deadlocks. If for some reason the system has less than - * 2000 pages of memory at this point, then we'll allow users to lock up to 80% - * of that. This can be overridden via a sysctl. + * memory that can be locked via mlock(). We set this to the total + * amount of memory that are potentially usable by a user app (max_mem) + * minus a certain amount. This can be overridden via a sysctl. */ - - if (max_mem > 2000) - vm_global_user_wire_limit = max_mem - 1000; - else - vm_global_user_wire_limit = max_mem * 100 / 80; + vm_global_no_user_wire_amount = MIN(max_mem*20/100, + VM_NOT_USER_WIREABLE); + vm_global_user_wire_limit = max_mem - vm_global_no_user_wire_amount; - vm_user_wire_limit = vm_global_user_wire_limit; /* the default per user limit is the same as the global limit */ + /* the default per user limit is the same as the global limit */ + vm_user_wire_limit = vm_global_user_wire_limit; } diff --git a/osfmk/vm/vm_kern.h b/osfmk/vm/vm_kern.h index 3e22b0633..b1b21a8f4 100644 --- a/osfmk/vm/vm_kern.h +++ b/osfmk/vm/vm_kern.h @@ -86,6 +86,7 @@ extern kern_return_t kernel_memory_allocate( #define KMA_LOMEM 0x08 #define KMA_GUARD_FIRST 0x10 #define KMA_GUARD_LAST 0x20 +#define KMA_PERMANENT 0x40 extern kern_return_t kmem_alloc_contig( vm_map_t map, @@ -93,6 +94,7 @@ extern kern_return_t kmem_alloc_contig( vm_size_t size, vm_offset_t mask, ppnum_t max_pnum, + ppnum_t pnum_mask, int flags); extern kern_return_t kmem_alloc( @@ -127,16 +129,14 @@ extern kern_return_t kmem_suballoc( vm_offset_t *addr, vm_size_t size, boolean_t pageable, - boolean_t anywhere, + int flags, vm_map_t *new_map); -#ifdef XNU_KERNEL_PRIVATE -extern kern_return_t kmem_alloc_wired( +extern kern_return_t kmem_alloc_kobject( vm_map_t map, vm_offset_t *addrp, vm_size_t size); -#endif #ifdef MACH_KERNEL_PRIVATE diff --git a/osfmk/vm/vm_map.c b/osfmk/vm/vm_map.c index b4a2e5cf1..d48a044fa 100644 --- a/osfmk/vm/vm_map.c +++ b/osfmk/vm/vm_map.c @@ -89,17 +89,18 @@ #include #include #include +#include #include #include #include #include -#include #include #include #include #include #include +#include #ifdef ppc #include @@ -268,6 +269,27 @@ static int vm_map_region_count_obj_refs( vm_map_entry_t entry, vm_object_t object); + +static kern_return_t vm_map_willneed( + vm_map_t map, + vm_map_offset_t start, + vm_map_offset_t end); + +static kern_return_t vm_map_reuse_pages( + vm_map_t map, + vm_map_offset_t start, + vm_map_offset_t end); + +static kern_return_t vm_map_reusable_pages( + vm_map_t map, + vm_map_offset_t start, + vm_map_offset_t end); + +static kern_return_t vm_map_can_reuse( + vm_map_t map, + vm_map_offset_t start, + vm_map_offset_t end); + /* * Macros to copy a vm_map_entry. We must be careful to correctly * manage the wired page count. vm_map_entry_copy() creates a new @@ -285,6 +307,7 @@ MACRO_BEGIN \ (NEW)->in_transition = FALSE; \ (NEW)->wired_count = 0; \ (NEW)->user_wired_count = 0; \ + (NEW)->permanent = FALSE; \ MACRO_END #define vm_map_entry_copy_full(NEW,OLD) (*(NEW) = *(OLD)) @@ -400,16 +423,16 @@ static zone_t vm_map_copy_zone; /* zone for vm_map_copy structures */ vm_object_t vm_submap_object; static void *map_data; -static vm_map_size_t map_data_size; +static vm_size_t map_data_size; static void *kentry_data; -static vm_map_size_t kentry_data_size; +static vm_size_t kentry_data_size; static int kentry_count = 2048; /* to init kentry_data_size */ -#define NO_COALESCE_LIMIT (1024 * 128) +#define NO_COALESCE_LIMIT ((1024 * 128) - 1) /* Skip acquiring locks if we're in the midst of a kernel core dump */ -extern unsigned int not_in_kdp; +unsigned int not_in_kdp = 1; #if CONFIG_CODE_DECRYPTION /* @@ -454,6 +477,12 @@ vm_map_apple_protected( goto done; } + /* make sure protected object stays alive while map is unlocked */ + vm_object_reference(protected_object); + + vm_map_unlock_read(map); + map_locked = FALSE; + /* * Lookup (and create if necessary) the protected memory object * matching that VM object. @@ -461,16 +490,16 @@ vm_map_apple_protected( * to guarantee that it doesn't go away before we get a chance to map * it. */ - protected_mem_obj = apple_protect_pager_setup(protected_object, crypt_info); + + /* release extra ref on protected object */ + vm_object_deallocate(protected_object); + if (protected_mem_obj == NULL) { kr = KERN_FAILURE; goto done; } - vm_map_unlock_read(map); - map_locked = FALSE; - /* map this memory object in place of the current one */ map_addr = start; kr = vm_map_enter_mem_object(map, @@ -502,6 +531,11 @@ vm_map_apple_protected( #endif /* CONFIG_CODE_DECRYPTION */ +lck_grp_t vm_map_lck_grp; +lck_grp_attr_t vm_map_lck_grp_attr; +lck_attr_t vm_map_lck_attr; + + /* * vm_map_init: * @@ -548,15 +582,20 @@ vm_map_init( zone_change(vm_map_zone, Z_COLLECT, FALSE); zone_change(vm_map_kentry_zone, Z_COLLECT, FALSE); zone_change(vm_map_kentry_zone, Z_EXPAND, FALSE); + zone_change(vm_map_kentry_zone, Z_FOREIGN, TRUE); zcram(vm_map_zone, map_data, map_data_size); zcram(vm_map_kentry_zone, kentry_data, kentry_data_size); + + lck_grp_attr_setdefault(&vm_map_lck_grp_attr); + lck_grp_init(&vm_map_lck_grp, "vm_map", &vm_map_lck_grp_attr); + lck_attr_setdefault(&vm_map_lck_attr); } void vm_map_steal_memory( void) { - map_data_size = vm_map_round_page(10 * sizeof(struct _vm_map)); + map_data_size = round_page(10 * sizeof(struct _vm_map)); map_data = pmap_steal_memory(map_data_size); #if 0 @@ -572,7 +611,7 @@ vm_map_steal_memory( kentry_data_size = - vm_map_round_page(kentry_count * sizeof(struct vm_map_entry)); + round_page(kentry_count * sizeof(struct vm_map_entry)); kentry_data = pmap_steal_memory(kentry_data_size); } @@ -616,18 +655,14 @@ vm_map_create( result->wiring_required = FALSE; result->no_zero_fill = FALSE; result->mapped = FALSE; -#if CONFIG_EMBEDDED - result->prot_copy_allow = FALSE; -#else - result->prot_copy_allow = TRUE; -#endif result->wait_for_space = FALSE; + result->switch_protect = FALSE; result->first_free = vm_map_to_entry(result); result->hint = vm_map_to_entry(result); result->color_rr = (color_seed++) & vm_color_mask; vm_map_lock_init(result); - mutex_init(&result->s_lock, 0); - + lck_mtx_init_ext(&result->s_lock, &result->s_lock_ext, &vm_map_lck_grp, &vm_map_lck_attr); + return(result); } @@ -826,10 +861,10 @@ void vm_map_res_reference(register vm_map_t map) assert(map->res_count >= 0); assert(map->ref_count >= map->res_count); if (map->res_count == 0) { - mutex_unlock(&map->s_lock); + lck_mtx_unlock(&map->s_lock); vm_map_lock(map); vm_map_swapin(map); - mutex_lock(&map->s_lock); + lck_mtx_lock(&map->s_lock); ++map->res_count; vm_map_unlock(map); } else @@ -847,12 +882,12 @@ void vm_map_res_reference(register vm_map_t map) void vm_map_reference_swap(register vm_map_t map) { assert(map != VM_MAP_NULL); - mutex_lock(&map->s_lock); + lck_mtx_lock(&map->s_lock); assert(map->res_count >= 0); assert(map->ref_count >= map->res_count); map->ref_count++; vm_map_res_reference(map); - mutex_unlock(&map->s_lock); + lck_mtx_unlock(&map->s_lock); } /* @@ -869,11 +904,11 @@ void vm_map_res_deallocate(register vm_map_t map) { assert(map->res_count > 0); if (--map->res_count == 0) { - mutex_unlock(&map->s_lock); + lck_mtx_unlock(&map->s_lock); vm_map_lock(map); vm_map_swapout(map); vm_map_unlock(map); - mutex_lock(&map->s_lock); + lck_mtx_lock(&map->s_lock); } assert(map->ref_count >= map->res_count); } @@ -1019,9 +1054,9 @@ void vm_map_swapin (vm_map_t map) if (entry->object.vm_object != VM_OBJECT_NULL) { if (entry->is_sub_map) { vm_map_t lmap = entry->object.sub_map; - mutex_lock(&lmap->s_lock); + lck_mtx_lock(&lmap->s_lock); vm_map_res_reference(lmap); - mutex_unlock(&lmap->s_lock); + lck_mtx_unlock(&lmap->s_lock); } else { vm_object_t object = entry->object.vm_object; vm_object_lock(object); @@ -1049,12 +1084,12 @@ void vm_map_swapout(vm_map_t map) * If we raced with a swapin and lost, the residence count * will have been incremented to 1, and we simply return. */ - mutex_lock(&map->s_lock); + lck_mtx_lock(&map->s_lock); if (map->res_count != 0) { - mutex_unlock(&map->s_lock); + lck_mtx_unlock(&map->s_lock); return; } - mutex_unlock(&map->s_lock); + lck_mtx_unlock(&map->s_lock); /* * There are no intermediate states of a map going out or @@ -1080,9 +1115,9 @@ void vm_map_swapout(vm_map_t map) if (entry->object.vm_object != VM_OBJECT_NULL) { if (entry->is_sub_map) { vm_map_t lmap = entry->object.sub_map; - mutex_lock(&lmap->s_lock); + lck_mtx_lock(&lmap->s_lock); vm_map_res_deallocate(lmap); - mutex_unlock(&lmap->s_lock); + lck_mtx_unlock(&lmap->s_lock); } else { vm_object_t object = entry->object.vm_object; vm_object_lock(object); @@ -1116,7 +1151,7 @@ void vm_map_swapout(vm_map_t map) */ #define SAVE_HINT_MAP_READ(map,value) \ MACRO_BEGIN \ - OSCompareAndSwap((UInt32)((map)->hint), (UInt32)value, (UInt32 *)(&(map)->hint)); \ + OSCompareAndSwapPtr((map)->hint, value, &(map)->hint); \ MACRO_END @@ -1363,8 +1398,11 @@ vm_map_find_space( new_entry->in_transition = FALSE; new_entry->needs_wakeup = FALSE; new_entry->no_cache = FALSE; + new_entry->permanent = FALSE; + new_entry->superpage_size = 0; new_entry->alias = 0; + new_entry->zero_wired_pages = FALSE; VM_GET_FLAGS_ALIAS(flags, new_entry->alias); @@ -1442,7 +1480,7 @@ vm_map_pmap_enter( } type_of_fault = DBG_CACHE_HIT_FAULT; kr = vm_fault_enter(m, map->pmap, addr, protection, - m->wire_count != 0, FALSE, FALSE, + VM_PAGE_WIRED(m), FALSE, FALSE, &type_of_fault); vm_object_unlock(object); @@ -1517,6 +1555,8 @@ vm_map_enter( vm_map_entry_t entry, new_entry; vm_map_offset_t start, tmp_start, tmp_offset; vm_map_offset_t end, tmp_end; + vm_map_offset_t tmp2_start, tmp2_end; + vm_map_offset_t step; kern_return_t result = KERN_SUCCESS; vm_map_t zap_old_map = VM_MAP_NULL; vm_map_t zap_new_map = VM_MAP_NULL; @@ -1528,10 +1568,34 @@ vm_map_enter( boolean_t overwrite = ((flags & VM_FLAGS_OVERWRITE) != 0); boolean_t no_cache = ((flags & VM_FLAGS_NO_CACHE) != 0); boolean_t is_submap = ((flags & VM_FLAGS_SUBMAP) != 0); + boolean_t permanent = ((flags & VM_FLAGS_PERMANENT) != 0); + unsigned int superpage_size = ((flags & VM_FLAGS_SUPERPAGE_MASK) >> VM_FLAGS_SUPERPAGE_SHIFT); char alias; vm_map_offset_t effective_min_offset, effective_max_offset; kern_return_t kr; + if (superpage_size) { + switch (superpage_size) { + /* + * Note that the current implementation only supports + * a single size for superpages, SUPERPAGE_SIZE, per + * architecture. As soon as more sizes are supposed + * to be supported, SUPERPAGE_SIZE has to be replaced + * with a lookup of the size depending on superpage_size. + */ +#ifdef __x86_64__ + case SUPERPAGE_SIZE_2MB: + break; +#endif + default: + return KERN_INVALID_ARGUMENT; + } + mask = SUPERPAGE_SIZE-1; + if (size & (SUPERPAGE_SIZE-1)) + return KERN_INVALID_ARGUMENT; + inheritance = VM_INHERIT_NONE; /* fork() children won't inherit superpages */ + } + #if CONFIG_EMBEDDED if (cur_protection & VM_PROT_WRITE) { if (cur_protection & VM_PROT_EXECUTE) { @@ -1539,22 +1603,6 @@ vm_map_enter( cur_protection &= ~VM_PROT_EXECUTE; } } - if (max_protection & VM_PROT_WRITE) { - if (max_protection & VM_PROT_EXECUTE) { - /* Right now all kinds of data segments are RWX. No point in logging that. */ - /* printf("EMBEDDED: %s maxprot cannot be write+execute. turning off execute\n", __PRETTY_FUNCTION__); */ - - /* Try to take a hint from curprot. If curprot is not writable, - * make maxprot not writable. Otherwise make it not executable. - */ - if((cur_protection & VM_PROT_WRITE) == 0) { - max_protection &= ~VM_PROT_WRITE; - } else { - max_protection &= ~VM_PROT_EXECUTE; - } - } - } - assert ((cur_protection | max_protection) == max_protection); #endif /* CONFIG_EMBEDDED */ if (is_submap) { @@ -1581,10 +1629,18 @@ vm_map_enter( } } - effective_min_offset = map->min_offset; + if (flags & VM_FLAGS_BELOW_MIN) { + /* + * Allow an insertion below the map's min offset. + */ + effective_min_offset = 0ULL; + } else { + effective_min_offset = map->min_offset; + } + if (flags & VM_FLAGS_BEYOND_MAX) { /* - * Allow an insertion beyond the map's official top boundary. + * Allow an insertion beyond the map's max offset. */ if (vm_map_is_64bit(map)) effective_max_offset = 0xFFFFFFFFFFFFF000ULL; @@ -1616,7 +1672,7 @@ vm_map_enter( (object != VM_OBJECT_NULL && (object->size != size || object->purgable == VM_PURGABLE_DENY)) - || size > VM_MAX_ADDRESS)) /* LP64todo: remove when dp capable */ + || size > ANON_MAX_SIZE)) /* LP64todo: remove when dp capable */ return KERN_INVALID_ARGUMENT; if (!anywhere && overwrite) { @@ -1632,7 +1688,7 @@ vm_map_enter( zap_old_map = vm_map_create(PMAP_NULL, *address, *address + size, - TRUE); + map->hdr.entries_pageable); } StartAgain: ; @@ -1890,8 +1946,10 @@ StartAgain: ; (entry->behavior == VM_BEHAVIOR_DEFAULT) && (entry->in_transition == 0) && (entry->no_cache == no_cache) && - ((alias == VM_MEMORY_REALLOC) || - ((entry->vme_end - entry->vme_start) + size < NO_COALESCE_LIMIT)) && + ((entry->vme_end - entry->vme_start) + size <= + (alias == VM_MEMORY_REALLOC ? + ANON_CHUNK_SIZE : + NO_COALESCE_LIMIT)) && (entry->wired_count == 0)) { /* implies user_wired_count == 0 */ if (vm_object_coalesce(entry->object.vm_object, VM_OBJECT_NULL, @@ -1912,79 +1970,118 @@ StartAgain: ; } } - /* - * Create a new entry - * LP64todo - for now, we can only allocate 4GB internal objects - * because the default pager can't page bigger ones. Remove this - * when it can. - * - * XXX FBDP - * The reserved "page zero" in each process's address space can - * be arbitrarily large. Splitting it into separate 4GB objects and - * therefore different VM map entries serves no purpose and just - * slows down operations on the VM map, so let's not split the - * allocation into 4GB chunks if the max protection is NONE. That - * memory should never be accessible, so it will never get to the - * default pager. - */ - tmp_start = start; - if (object == VM_OBJECT_NULL && - size > (vm_map_size_t)VM_MAX_ADDRESS && - max_protection != VM_PROT_NONE) - tmp_end = tmp_start + (vm_map_size_t)VM_MAX_ADDRESS; - else - tmp_end = end; - do { - new_entry = vm_map_entry_insert(map, entry, tmp_start, tmp_end, - object, offset, needs_copy, - FALSE, FALSE, - cur_protection, max_protection, - VM_BEHAVIOR_DEFAULT, - inheritance, 0, no_cache); - new_entry->alias = alias; - if (is_submap) { - vm_map_t submap; - boolean_t submap_is_64bit; - boolean_t use_pmap; - - new_entry->is_sub_map = TRUE; - submap = (vm_map_t) object; - submap_is_64bit = vm_map_is_64bit(submap); - use_pmap = (alias == VM_MEMORY_SHARED_PMAP); -#ifndef NO_NESTED_PMAP - if (use_pmap && submap->pmap == NULL) { - /* we need a sub pmap to nest... */ - submap->pmap = pmap_create(0, submap_is_64bit); - if (submap->pmap == NULL) { - /* let's proceed without nesting... */ + step = superpage_size ? SUPERPAGE_SIZE : (end - start); + new_entry = NULL; + + for (tmp2_start = start; tmp2_start (vm_map_size_t)ANON_CHUNK_SIZE && + max_protection != VM_PROT_NONE && + superpage_size == 0) + tmp_end = tmp_start + (vm_map_size_t)ANON_CHUNK_SIZE; + else + tmp_end = tmp2_end; + do { + new_entry = vm_map_entry_insert(map, entry, tmp_start, tmp_end, + object, offset, needs_copy, + FALSE, FALSE, + cur_protection, max_protection, + VM_BEHAVIOR_DEFAULT, + inheritance, 0, no_cache, + permanent, superpage_size); + new_entry->alias = alias; + if (is_submap) { + vm_map_t submap; + boolean_t submap_is_64bit; + boolean_t use_pmap; + + new_entry->is_sub_map = TRUE; + submap = (vm_map_t) object; + submap_is_64bit = vm_map_is_64bit(submap); + use_pmap = (alias == VM_MEMORY_SHARED_PMAP); + #ifndef NO_NESTED_PMAP + if (use_pmap && submap->pmap == NULL) { + /* we need a sub pmap to nest... */ + submap->pmap = pmap_create(0, submap_is_64bit); + if (submap->pmap == NULL) { + /* let's proceed without nesting... */ + } } + if (use_pmap && submap->pmap != NULL) { + kr = pmap_nest(map->pmap, + submap->pmap, + tmp_start, + tmp_start, + tmp_end - tmp_start); + if (kr != KERN_SUCCESS) { + printf("vm_map_enter: " + "pmap_nest(0x%llx,0x%llx) " + "error 0x%x\n", + (long long)tmp_start, + (long long)tmp_end, + kr); + } else { + /* we're now nested ! */ + new_entry->use_pmap = TRUE; + pmap_empty = FALSE; + } + } + #endif /* NO_NESTED_PMAP */ } - if (use_pmap && submap->pmap != NULL) { - kr = pmap_nest(map->pmap, - submap->pmap, - tmp_start, - tmp_start, - tmp_end - tmp_start); + entry = new_entry; + + if (superpage_size) { + vm_page_t pages, m; + vm_object_t sp_object; + + entry->offset = 0; + + /* allocate one superpage */ + kr = cpm_allocate(SUPERPAGE_SIZE, &pages, 0, SUPERPAGE_NBASEPAGES-1, TRUE, 0); if (kr != KERN_SUCCESS) { - printf("vm_map_enter: " - "pmap_nest(0x%llx,0x%llx) " - "error 0x%x\n", - (long long)tmp_start, - (long long)tmp_end, - kr); - } else { - /* we're now nested ! */ - new_entry->use_pmap = TRUE; - pmap_empty = FALSE; + new_mapping_established = TRUE; /* will cause deallocation of whole range */ + RETURN(kr); + } + + /* create one vm_object per superpage */ + sp_object = vm_object_allocate((vm_map_size_t)(entry->vme_end - entry->vme_start)); + sp_object->phys_contiguous = TRUE; + sp_object->shadow_offset = (vm_object_offset_t)pages->phys_page*PAGE_SIZE; + entry->object.vm_object = sp_object; + + /* enter the base pages into the object */ + vm_object_lock(sp_object); + for (offset = 0; offset < SUPERPAGE_SIZE; offset += PAGE_SIZE) { + m = pages; + pmap_zero_page(m->phys_page); + pages = NEXT_PAGE(m); + *(NEXT_PAGE_PTR(m)) = VM_PAGE_NULL; + vm_page_insert(m, sp_object, offset); } + vm_object_unlock(sp_object); } -#endif /* NO_NESTED_PMAP */ - } - entry = new_entry; - } while (tmp_end != end && - (tmp_start = tmp_end) && - (tmp_end = (end - tmp_end > (vm_map_size_t)VM_MAX_ADDRESS) ? - tmp_end + (vm_map_size_t)VM_MAX_ADDRESS : end)); + } while (tmp_end != tmp2_end && + (tmp_start = tmp_end) && + (tmp_end = (tmp2_end - tmp_end > (vm_map_size_t)ANON_CHUNK_SIZE) ? + tmp_end + (vm_map_size_t)ANON_CHUNK_SIZE : tmp2_end)); + } vm_map_unlock(map); map_locked = FALSE; @@ -1994,7 +2091,7 @@ StartAgain: ; /* Wire down the new entry if the user * requested all new map entries be wired. */ - if (map->wiring_required) { + if ((map->wiring_required)||(superpage_size)) { pmap_empty = FALSE; /* pmap won't be empty */ result = vm_map_wire(map, start, end, new_entry->protection, TRUE); @@ -2076,7 +2173,7 @@ BailOut: ; zap_new_map = vm_map_create(PMAP_NULL, *address, *address + size, - TRUE); + map->hdr.entries_pageable); if (!map_locked) { vm_map_lock(map); map_locked = TRUE; @@ -2381,14 +2478,17 @@ vm_map_enter_mem_object( " by a non-private kernel entity\n"); return KERN_INVALID_OBJECT; } - vm_object_lock(object); - while (!object->pager_ready) { - vm_object_wait(object, - VM_OBJECT_EVENT_PAGER_READY, - THREAD_UNINT); + if (!object->pager_ready) { vm_object_lock(object); + + while (!object->pager_ready) { + vm_object_wait(object, + VM_OBJECT_EVENT_PAGER_READY, + THREAD_UNINT); + vm_object_lock(object); + } + vm_object_unlock(object); } - vm_object_unlock(object); } } else { return KERN_INVALID_OBJECT; @@ -2501,6 +2601,156 @@ vm_map_enter_mem_object( return result; } + + + +kern_return_t +vm_map_enter_mem_object_control( + vm_map_t target_map, + vm_map_offset_t *address, + vm_map_size_t initial_size, + vm_map_offset_t mask, + int flags, + memory_object_control_t control, + vm_object_offset_t offset, + boolean_t copy, + vm_prot_t cur_protection, + vm_prot_t max_protection, + vm_inherit_t inheritance) +{ + vm_map_address_t map_addr; + vm_map_size_t map_size; + vm_object_t object; + vm_object_size_t size; + kern_return_t result; + memory_object_t pager; + vm_prot_t pager_prot; + kern_return_t kr; + + /* + * Check arguments for validity + */ + if ((target_map == VM_MAP_NULL) || + (cur_protection & ~VM_PROT_ALL) || + (max_protection & ~VM_PROT_ALL) || + (inheritance > VM_INHERIT_LAST_VALID) || + initial_size == 0) + return KERN_INVALID_ARGUMENT; + + map_addr = vm_map_trunc_page(*address); + map_size = vm_map_round_page(initial_size); + size = vm_object_round_page(initial_size); + + object = memory_object_control_to_vm_object(control); + + if (object == VM_OBJECT_NULL) + return KERN_INVALID_OBJECT; + + if (object == kernel_object) { + printf("Warning: Attempt to map kernel object" + " by a non-private kernel entity\n"); + return KERN_INVALID_OBJECT; + } + + vm_object_lock(object); + object->ref_count++; + vm_object_res_reference(object); + + /* + * For "named" VM objects, let the pager know that the + * memory object is being mapped. Some pagers need to keep + * track of this, to know when they can reclaim the memory + * object, for example. + * VM calls memory_object_map() for each mapping (specifying + * the protection of each mapping) and calls + * memory_object_last_unmap() when all the mappings are gone. + */ + pager_prot = max_protection; + if (copy) { + pager_prot &= ~VM_PROT_WRITE; + } + pager = object->pager; + if (object->named && + pager != MEMORY_OBJECT_NULL && + object->copy_strategy != MEMORY_OBJECT_COPY_NONE) { + assert(object->pager_ready); + vm_object_mapping_wait(object, THREAD_UNINT); + vm_object_mapping_begin(object); + vm_object_unlock(object); + + kr = memory_object_map(pager, pager_prot); + assert(kr == KERN_SUCCESS); + + vm_object_lock(object); + vm_object_mapping_end(object); + } + vm_object_unlock(object); + + /* + * Perform the copy if requested + */ + + if (copy) { + vm_object_t new_object; + vm_object_offset_t new_offset; + + result = vm_object_copy_strategically(object, offset, size, + &new_object, &new_offset, + ©); + + + if (result == KERN_MEMORY_RESTART_COPY) { + boolean_t success; + boolean_t src_needs_copy; + + /* + * XXX + * We currently ignore src_needs_copy. + * This really is the issue of how to make + * MEMORY_OBJECT_COPY_SYMMETRIC safe for + * non-kernel users to use. Solution forthcoming. + * In the meantime, since we don't allow non-kernel + * memory managers to specify symmetric copy, + * we won't run into problems here. + */ + new_object = object; + new_offset = offset; + success = vm_object_copy_quickly(&new_object, + new_offset, size, + &src_needs_copy, + ©); + assert(success); + result = KERN_SUCCESS; + } + /* + * Throw away the reference to the + * original object, as it won't be mapped. + */ + + vm_object_deallocate(object); + + if (result != KERN_SUCCESS) + return result; + + object = new_object; + offset = new_offset; + } + + result = vm_map_enter(target_map, + &map_addr, map_size, + (vm_map_offset_t)mask, + flags, + object, offset, + copy, + cur_protection, max_protection, inheritance); + if (result != KERN_SUCCESS) + vm_object_deallocate(object); + *address = map_addr; + + return result; +} + + #if VM_CPM #ifdef MACH_ASSERT @@ -2556,7 +2806,7 @@ vm_map_enter_cpm( if (size > VM_MAX_ADDRESS) return KERN_RESOURCE_SHORTAGE; if ((kr = cpm_allocate(CAST_DOWN(vm_size_t, size), - &pages, 0, TRUE)) != KERN_SUCCESS) + &pages, 0, 0, TRUE, flags)) != KERN_SUCCESS) return kr; cpm_obj = vm_object_allocate((vm_object_size_t)size); @@ -2582,7 +2832,7 @@ vm_map_enter_cpm( assert(!m->wanted); assert(!m->pageout); assert(!m->tabled); - assert(m->wire_count); + assert(VM_PAGE_WIRED(m)); /* * ENCRYPTED SWAP: * "m" is not supposed to be pageable, so it @@ -2672,7 +2922,7 @@ vm_map_enter_cpm( type_of_fault = DBG_ZERO_FILL_FAULT; vm_fault_enter(m, pmap, va, VM_PROT_ALL, - m->wire_count != 0, FALSE, FALSE, + VM_PAGE_WIRED(m), FALSE, FALSE, &type_of_fault); vm_object_unlock(cpm_obj); @@ -2738,9 +2988,13 @@ vm_map_enter_cpm( } #endif /* VM_CPM */ +/* Not used without nested pmaps */ +#ifndef NO_NESTED_PMAP /* * Clip and unnest a portion of a nested submap mapping. */ + + static void vm_map_clip_unnest( vm_map_t map, @@ -2748,9 +3002,24 @@ vm_map_clip_unnest( vm_map_offset_t start_unnest, vm_map_offset_t end_unnest) { + vm_map_offset_t old_start_unnest = start_unnest; + vm_map_offset_t old_end_unnest = end_unnest; + assert(entry->is_sub_map); assert(entry->object.sub_map != NULL); + /* + * Query the platform for the optimal unnest range. + * DRK: There's some duplication of effort here, since + * callers may have adjusted the range to some extent. This + * routine was introduced to support 1GiB subtree nesting + * for x86 platforms, which can also nest on 2MiB boundaries + * depending on size/alignment. + */ + if (pmap_adjust_unnest_parameters(map->pmap, &start_unnest, &end_unnest)) { + log_unnest_badness(map, old_start_unnest, old_end_unnest); + } + if (entry->vme_start > start_unnest || entry->vme_end < end_unnest) { panic("vm_map_clip_unnest(0x%llx,0x%llx): " @@ -2758,6 +3027,7 @@ vm_map_clip_unnest( (long long)start_unnest, (long long)end_unnest, (long long)entry->vme_start, (long long)entry->vme_end); } + if (start_unnest > entry->vme_start) { _vm_map_clip_start(&map->hdr, entry, @@ -2784,6 +3054,7 @@ vm_map_clip_unnest( } entry->use_pmap = FALSE; } +#endif /* NO_NESTED_PMAP */ /* * vm_map_clip_start: [ internal use only ] @@ -2807,6 +3078,8 @@ vm_map_clip_start( * Make sure "startaddr" is no longer in a nested range * before we clip. Unnest only the minimum range the platform * can handle. + * vm_map_clip_unnest may perform additional adjustments to + * the unnest range. */ start_unnest = startaddr & ~(pmap_nesting_size_min - 1); end_unnest = start_unnest + pmap_nesting_size_min; @@ -2897,6 +3170,8 @@ vm_map_clip_end( * Make sure the range between the start of this entry and * the new "endaddr" is no longer nested before we clip. * Unnest only the minimum range the platform can handle. + * vm_map_clip_unnest may perform additional adjustments to + * the unnest range. */ start_unnest = entry->vme_start; end_unnest = @@ -3151,15 +3426,10 @@ vm_map_protect( XPR(XPR_VM_MAP, "vm_map_protect, 0x%X start 0x%X end 0x%X, new 0x%X %d", - (integer_t)map, start, end, new_prot, set_max); + map, start, end, new_prot, set_max); vm_map_lock(map); - if ((new_prot & VM_PROT_COPY) && !map->prot_copy_allow) { - vm_map_unlock(map); - return(KERN_PROTECTION_FAILURE); - } - /* LP64todo - remove this check when vm_map_commpage64() * no longer has to stuff in a map_entry for the commpage * above the map's max_offset. @@ -3169,14 +3439,24 @@ vm_map_protect( return(KERN_INVALID_ADDRESS); } - /* - * Lookup the entry. If it doesn't start in a valid - * entry, return an error. - */ - if (! vm_map_lookup_entry(map, start, &entry)) { - vm_map_unlock(map); - return(KERN_INVALID_ADDRESS); - } + while(1) { + /* + * Lookup the entry. If it doesn't start in a valid + * entry, return an error. + */ + if (! vm_map_lookup_entry(map, start, &entry)) { + vm_map_unlock(map); + return(KERN_INVALID_ADDRESS); + } + + if (entry->superpage_size && (start & (SUPERPAGE_SIZE-1))) { /* extend request to whole entry */ + start = SUPERPAGE_ROUND_DOWN(start); + continue; + } + break; + } + if (entry->superpage_size) + end = SUPERPAGE_ROUND_UP(end); /* * Make a first pass to check for protection and address @@ -3409,7 +3689,8 @@ add_wire_counts( */ if(size + map->user_wire_size > MIN(map->user_wire_limit, vm_user_wire_limit) || - size + ptoa_64(vm_page_wire_count) > vm_global_user_wire_limit) + size + ptoa_64(vm_page_wire_count) > vm_global_user_wire_limit || + size + ptoa_64(vm_page_wire_count) > max_mem - vm_global_no_user_wire_amount) return KERN_RESOURCE_SHORTAGE; /* @@ -3994,7 +4275,8 @@ vm_map_wire( * existing mappings */ VM_MAP_RANGE_CHECK(map, start, end); - mapping_prealloc(end - start); + assert((unsigned int) (end - start) == (end - start)); + mapping_prealloc((unsigned int) (end - start)); #endif kret = vm_map_wire_nested(map, start, end, access_type, user_wire, (pmap_t)NULL, 0); @@ -4063,6 +4345,12 @@ vm_map_unwire_nested( return(KERN_INVALID_ADDRESS); } + if (entry->superpage_size) { + /* superpages are always wired */ + vm_map_unlock(map); + return KERN_INVALID_ADDRESS; + } + need_wakeup = FALSE; while ((entry != vm_map_to_entry(map)) && (entry->vme_start < end)) { if (entry->in_transition) { @@ -4268,6 +4556,10 @@ vm_map_unwire_nested( continue; } + if(entry->zero_wired_pages) { + entry->zero_wired_pages = FALSE; + } + entry->in_transition = TRUE; tmp_entry = *entry; /* see comment in vm_map_wire() */ @@ -4370,6 +4662,7 @@ vm_map_entry_delete( assert(page_aligned(e)); assert(entry->wired_count == 0); assert(entry->user_wired_count == 0); + assert(!entry->permanent); if (entry->is_sub_map) { object = NULL; @@ -4529,28 +4822,37 @@ vm_map_delete( */ flags |= VM_MAP_REMOVE_WAIT_FOR_KWIRE; - /* - * Find the start of the region, and clip it - */ - if (vm_map_lookup_entry(map, start, &first_entry)) { - entry = first_entry; - if (start == entry->vme_start) { + while(1) { + /* + * Find the start of the region, and clip it + */ + if (vm_map_lookup_entry(map, start, &first_entry)) { + entry = first_entry; + if (entry->superpage_size && (start & ~SUPERPAGE_MASK)) { /* extend request to whole entry */ start = SUPERPAGE_ROUND_DOWN(start); + start = SUPERPAGE_ROUND_DOWN(start); + continue; + } + if (start == entry->vme_start) { + /* + * No need to clip. We don't want to cause + * any unnecessary unnesting in this case... + */ + } else { + vm_map_clip_start(map, entry, start); + } + /* - * No need to clip. We don't want to cause - * any unnecessary unnesting in this case... + * Fix the lookup hint now, rather than each + * time through the loop. */ + SAVE_HINT_MAP_WRITE(map, entry->vme_prev); } else { - vm_map_clip_start(map, entry, start); + entry = first_entry->vme_next; } - - /* - * Fix the lookup hint now, rather than each - * time through the loop. - */ - SAVE_HINT_MAP_WRITE(map, entry->vme_prev); - } else { - entry = first_entry->vme_next; + break; } + if (entry->superpage_size) + end = SUPERPAGE_ROUND_UP(end); need_wakeup = FALSE; /* @@ -4591,6 +4893,14 @@ vm_map_delete( } else { vm_map_clip_end(map, entry, end); } + + if (entry->permanent) { + panic("attempt to remove permanent VM map entry " + "%p [0x%llx:0x%llx]\n", + entry, (uint64_t) s, (uint64_t) end); + } + + if (entry->in_transition) { wait_result_t wait_result; @@ -4648,15 +4958,19 @@ vm_map_delete( user_wire = entry->user_wired_count > 0; /* - * Remove a kernel wiring if requested or if - * there are user wirings. + * Remove a kernel wiring if requested */ - if ((flags & VM_MAP_REMOVE_KUNWIRE) || - (entry->user_wired_count > 0)) + if (flags & VM_MAP_REMOVE_KUNWIRE) { entry->wired_count--; - - /* remove all user wire references */ - entry->user_wired_count = 0; + } + + /* + * Remove all user wirings for proper accounting + */ + if (entry->user_wired_count > 0) { + while (entry->user_wired_count) + subtract_wire_counts(map, entry, user_wire); + } if (entry->wired_count != 0) { assert(map != kernel_map); @@ -4963,10 +5277,6 @@ void vm_map_copy_discard( vm_map_copy_t copy) { - TR_DECL("vm_map_copy_discard"); - -/* tr3("enter: copy 0x%x type %d", copy, copy->type);*/ - if (copy == VM_MAP_COPY_NULL) return; @@ -5475,7 +5785,7 @@ vm_map_copy_overwrite_nested( vm_map_entry_t next_copy = VM_MAP_ENTRY_NULL; int nentries; int remaining_entries = 0; - int new_offset = 0; + vm_map_offset_t new_offset = 0; for (entry = tmp_entry; copy_size == 0;) { vm_map_entry_t next; @@ -6366,7 +6676,14 @@ vm_map_copyin_kernel_buffer( { kern_return_t kr; vm_map_copy_t copy; - vm_map_size_t kalloc_size = sizeof(struct vm_map_copy) + len; + vm_size_t kalloc_size; + + if ((vm_size_t) len != len) { + /* "len" is too big and doesn't fit in a "vm_size_t" */ + return KERN_RESOURCE_SHORTAGE; + } + kalloc_size = (vm_size_t) (sizeof(struct vm_map_copy) + len); + assert((vm_map_size_t) kalloc_size == sizeof (struct vm_map_copy) + len); copy = (vm_map_copy_t) kalloc(kalloc_size); if (copy == VM_MAP_COPY_NULL) { @@ -6378,7 +6695,7 @@ vm_map_copyin_kernel_buffer( copy->cpy_kdata = (void *) (copy + 1); copy->cpy_kalloc_size = kalloc_size; - kr = copyinmap(src_map, src_addr, copy->cpy_kdata, len); + kr = copyinmap(src_map, src_addr, copy->cpy_kdata, (vm_size_t) len); if (kr != KERN_SUCCESS) { kfree(copy, kalloc_size); return kr; @@ -6447,7 +6764,8 @@ vm_map_copyout_kernel_buffer( * If the target map is the current map, just do * the copy. */ - if (copyout(copy->cpy_kdata, *addr, copy->size)) { + assert((vm_size_t) copy->size == copy->size); + if (copyout(copy->cpy_kdata, *addr, (vm_size_t) copy->size)) { kr = KERN_INVALID_ADDRESS; } } @@ -6462,7 +6780,8 @@ vm_map_copyout_kernel_buffer( vm_map_reference(map); oldmap = vm_map_switch(map); - if (copyout(copy->cpy_kdata, *addr, copy->size)) { + assert((vm_size_t) copy->size == copy->size); + if (copyout(copy->cpy_kdata, *addr, (vm_size_t) copy->size)) { vm_map_copyout_kernel_buffer_failures++; kr = KERN_INVALID_ADDRESS; } @@ -6739,7 +7058,7 @@ StartAgain: ; vm_object_lock(object); m = vm_page_lookup(object, offset); - if (m == VM_PAGE_NULL || m->wire_count == 0 || + if (m == VM_PAGE_NULL || !VM_PAGE_WIRED(m) || m->absent) panic("vm_map_copyout: wiring %p", m); @@ -6761,7 +7080,7 @@ StartAgain: ; type_of_fault = DBG_CACHE_HIT_FAULT; vm_fault_enter(m, dst_map->pmap, va, prot, - m->wire_count != 0, FALSE, FALSE, + VM_PAGE_WIRED(m), FALSE, FALSE, &type_of_fault); vm_object_unlock(object); @@ -6914,7 +7233,7 @@ vm_map_copyin_common( src_start = vm_map_trunc_page(src_addr); src_end = vm_map_round_page(src_end); - XPR(XPR_VM_MAP, "vm_map_copyin_common map 0x%x addr 0x%x len 0x%x dest %d\n", (natural_t)src_map, src_addr, len, src_destroy, 0); + XPR(XPR_VM_MAP, "vm_map_copyin_common map 0x%x addr 0x%x len 0x%x dest %d\n", src_map, src_addr, len, src_destroy, 0); /* * Allocate a header element for the list. @@ -7764,13 +8083,16 @@ vm_map_fork( boolean_t src_needs_copy; boolean_t new_entry_needs_copy; -#ifdef __i386__ new_pmap = pmap_create((vm_map_size_t) 0, - old_map->pmap->pm_task_map != TASK_MAP_32BIT); +#if defined(__i386__) || defined(__x86_64__) + old_map->pmap->pm_task_map != TASK_MAP_32BIT +#else + 0 +#endif + ); +#if defined(__i386__) if (old_map->pmap->pm_task_map == TASK_MAP_64BIT_SHARED) pmap_set_4GB_pagezero(new_pmap); -#else - new_pmap = pmap_create((vm_map_size_t) 0, 0); #endif vm_map_reference_swap(old_map); @@ -8385,6 +8707,7 @@ RetryLookup: ; fault_info->lo_offset = entry->offset; fault_info->hi_offset = (entry->vme_end - entry->vme_start) + entry->offset; fault_info->no_cache = entry->no_cache; + fault_info->stealth = FALSE; } /* @@ -8627,7 +8950,8 @@ vm_map_region_recurse_64( /* keep "next_map" locked in case we need it */ } else { /* release this map */ - vm_map_unlock_read(curr_map); + if (not_in_kdp) + vm_map_unlock_read(curr_map); } /* @@ -8689,6 +9013,11 @@ vm_map_region_recurse_64( *size = curr_entry->vme_end - curr_entry->vme_start; *address = curr_entry->vme_start + curr_offset; +// LP64todo: all the current tools are 32bit, obviously never worked for 64b +// so probably should be a real 32b ID vs. ptr. +// Current users just check for equality +#define INFO_MAKE_OBJECT_ID(p) ((uint32_t)(uintptr_t)p) + if (look_for_pages) { submap_info->user_tag = curr_entry->alias; submap_info->offset = curr_entry->offset; @@ -8698,7 +9027,7 @@ vm_map_region_recurse_64( submap_info->behavior = curr_entry->behavior; submap_info->user_wired_count = curr_entry->user_wired_count; submap_info->is_submap = curr_entry->is_sub_map; - submap_info->object_id = (uint32_t) curr_entry->object.vm_object; + submap_info->object_id = INFO_MAKE_OBJECT_ID(curr_entry->object.vm_object); } else { short_info->user_tag = curr_entry->alias; short_info->offset = curr_entry->offset; @@ -8708,7 +9037,7 @@ vm_map_region_recurse_64( short_info->behavior = curr_entry->behavior; short_info->user_wired_count = curr_entry->user_wired_count; short_info->is_submap = curr_entry->is_sub_map; - short_info->object_id = (uint32_t) curr_entry->object.vm_object; + short_info->object_id = INFO_MAKE_OBJECT_ID(curr_entry->object.vm_object); } extended.pages_resident = 0; @@ -8976,7 +9305,11 @@ vm_map_region( } } -#define min(a, b) (((a) < (b)) ? (a) : (b)) +#define OBJ_RESIDENT_COUNT(obj, entry_size) \ + MIN((entry_size), \ + ((obj)->all_reusable ? \ + (obj)->wired_page_count : \ + (obj)->resident_page_count - (obj)->reusable_page_count)) void vm_map_region_top_walk( @@ -8996,7 +9329,7 @@ vm_map_region_top_walk( int ref_count; uint32_t entry_size; - entry_size = (entry->vme_end - entry->vme_start) / PAGE_SIZE; + entry_size = (uint32_t) ((entry->vme_end - entry->vme_start) / PAGE_SIZE_64); obj = entry->object.vm_object; @@ -9005,11 +9338,14 @@ vm_map_region_top_walk( if ((ref_count = obj->ref_count) > 1 && obj->paging_in_progress) ref_count--; + assert(obj->reusable_page_count <= obj->resident_page_count); if (obj->shadow) { if (ref_count == 1) - top->private_pages_resident = min(obj->resident_page_count, entry_size); + top->private_pages_resident = + OBJ_RESIDENT_COUNT(obj, entry_size); else - top->shared_pages_resident = min(obj->resident_page_count, entry_size); + top->shared_pages_resident = + OBJ_RESIDENT_COUNT(obj, entry_size); top->ref_count = ref_count; top->share_mode = SM_COW; @@ -9021,26 +9357,34 @@ vm_map_region_top_walk( if ((ref_count = obj->ref_count) > 1 && obj->paging_in_progress) ref_count--; - top->shared_pages_resident += min(obj->resident_page_count, entry_size); + assert(obj->reusable_page_count <= obj->resident_page_count); + top->shared_pages_resident += + OBJ_RESIDENT_COUNT(obj, entry_size); top->ref_count += ref_count - 1; } } else { if (entry->needs_copy) { top->share_mode = SM_COW; - top->shared_pages_resident = min(obj->resident_page_count, entry_size); + top->shared_pages_resident = + OBJ_RESIDENT_COUNT(obj, entry_size); } else { if (ref_count == 1 || (ref_count == 2 && !(obj->pager_trusted) && !(obj->internal))) { top->share_mode = SM_PRIVATE; - top->private_pages_resident = min(obj->resident_page_count, entry_size); + top->private_pages_resident = + OBJ_RESIDENT_COUNT(obj, + entry_size); } else { top->share_mode = SM_SHARED; - top->shared_pages_resident = min(obj->resident_page_count, entry_size); + top->shared_pages_resident = + OBJ_RESIDENT_COUNT(obj, + entry_size); } } top->ref_count = ref_count; } - top->obj_id = (int)obj; + /* XXX K64: obj_id will be truncated */ + top->obj_id = (unsigned int) (uintptr_t)obj; vm_object_unlock(obj); } @@ -9085,26 +9429,34 @@ vm_map_region_walk( vm_map_region_look_for_page(map, va, obj, offset, ref_count, 0, extended); - } - - shadow_object = obj->shadow; - shadow_depth = 0; - if (shadow_object != VM_OBJECT_NULL) { - vm_object_lock(shadow_object); - for (; - shadow_object != VM_OBJECT_NULL; - shadow_depth++) { - vm_object_t next_shadow; - - next_shadow = shadow_object->shadow; - if (next_shadow) { - vm_object_lock(next_shadow); + } else { + shadow_object = obj->shadow; + shadow_depth = 0; + + if ( !(obj->pager_trusted) && !(obj->internal)) + extended->external_pager = 1; + + if (shadow_object != VM_OBJECT_NULL) { + vm_object_lock(shadow_object); + for (; + shadow_object != VM_OBJECT_NULL; + shadow_depth++) { + vm_object_t next_shadow; + + if ( !(shadow_object->pager_trusted) && + !(shadow_object->internal)) + extended->external_pager = 1; + + next_shadow = shadow_object->shadow; + if (next_shadow) { + vm_object_lock(next_shadow); + } + vm_object_unlock(shadow_object); + shadow_object = next_shadow; } - vm_object_unlock(shadow_object); - shadow_object = next_shadow; } + extended->shadow_depth = shadow_depth; } - extended->shadow_depth = shadow_depth; if (extended->shadow_depth || entry->needs_copy) extended->share_mode = SM_COW; @@ -9345,11 +9697,13 @@ vm_map_simplify_entry( (prev_entry->max_protection == this_entry->max_protection) && (prev_entry->behavior == this_entry->behavior) && (prev_entry->alias == this_entry->alias) && + (prev_entry->zero_wired_pages == this_entry->zero_wired_pages) && (prev_entry->no_cache == this_entry->no_cache) && (prev_entry->wired_count == this_entry->wired_count) && (prev_entry->user_wired_count == this_entry->user_wired_count) && (prev_entry->needs_copy == this_entry->needs_copy) && + (prev_entry->permanent == this_entry->permanent) && (prev_entry->use_pmap == FALSE) && (this_entry->use_pmap == FALSE) && @@ -9582,54 +9936,510 @@ vm_map_behavior_set( XPR(XPR_VM_MAP, "vm_map_behavior_set, 0x%X start 0x%X end 0x%X behavior %d", - (integer_t)map, start, end, new_behavior, 0); + map, start, end, new_behavior, 0); switch (new_behavior) { + + /* + * This first block of behaviors all set a persistent state on the specified + * memory range. All we have to do here is to record the desired behavior + * in the vm_map_entry_t's. + */ + case VM_BEHAVIOR_DEFAULT: case VM_BEHAVIOR_RANDOM: case VM_BEHAVIOR_SEQUENTIAL: case VM_BEHAVIOR_RSEQNTL: + case VM_BEHAVIOR_ZERO_WIRED_PAGES: + vm_map_lock(map); + + /* + * The entire address range must be valid for the map. + * Note that vm_map_range_check() does a + * vm_map_lookup_entry() internally and returns the + * entry containing the start of the address range if + * the entire range is valid. + */ + if (vm_map_range_check(map, start, end, &temp_entry)) { + entry = temp_entry; + vm_map_clip_start(map, entry, start); + } + else { + vm_map_unlock(map); + return(KERN_INVALID_ADDRESS); + } + + while ((entry != vm_map_to_entry(map)) && (entry->vme_start < end)) { + vm_map_clip_end(map, entry, end); + assert(!entry->use_pmap); + + if( new_behavior == VM_BEHAVIOR_ZERO_WIRED_PAGES ) { + entry->zero_wired_pages = TRUE; + } else { + entry->behavior = new_behavior; + } + entry = entry->vme_next; + } + + vm_map_unlock(map); break; + + /* + * The rest of these are different from the above in that they cause + * an immediate action to take place as opposed to setting a behavior that + * affects future actions. + */ + case VM_BEHAVIOR_WILLNEED: + return vm_map_willneed(map, start, end); + case VM_BEHAVIOR_DONTNEED: - new_behavior = VM_BEHAVIOR_DEFAULT; - break; + return vm_map_msync(map, start, end - start, VM_SYNC_DEACTIVATE | VM_SYNC_CONTIGUOUS); + + case VM_BEHAVIOR_FREE: + return vm_map_msync(map, start, end - start, VM_SYNC_KILLPAGES | VM_SYNC_CONTIGUOUS); + + case VM_BEHAVIOR_REUSABLE: + return vm_map_reusable_pages(map, start, end); + + case VM_BEHAVIOR_REUSE: + return vm_map_reuse_pages(map, start, end); + + case VM_BEHAVIOR_CAN_REUSE: + return vm_map_can_reuse(map, start, end); + default: return(KERN_INVALID_ARGUMENT); } - vm_map_lock(map); + return(KERN_SUCCESS); +} + + +/* + * Internals for madvise(MADV_WILLNEED) system call. + * + * The present implementation is to do a read-ahead if the mapping corresponds + * to a mapped regular file. If it's an anonymous mapping, then we do nothing + * and basically ignore the "advice" (which we are always free to do). + */ + + +static kern_return_t +vm_map_willneed( + vm_map_t map, + vm_map_offset_t start, + vm_map_offset_t end +) +{ + vm_map_entry_t entry; + vm_object_t object; + memory_object_t pager; + struct vm_object_fault_info fault_info; + kern_return_t kr; + vm_object_size_t len; + vm_object_offset_t offset; /* - * The entire address range must be valid for the map. - * Note that vm_map_range_check() does a - * vm_map_lookup_entry() internally and returns the - * entry containing the start of the address range if - * the entire range is valid. + * Fill in static values in fault_info. Several fields get ignored by the code + * we call, but we'll fill them in anyway since uninitialized fields are bad + * when it comes to future backwards compatibility. */ - if (vm_map_range_check(map, start, end, &temp_entry)) { - entry = temp_entry; - vm_map_clip_start(map, entry, start); + + fault_info.interruptible = THREAD_UNINT; /* ignored value */ + fault_info.behavior = VM_BEHAVIOR_SEQUENTIAL; + fault_info.no_cache = FALSE; /* ignored value */ + fault_info.stealth = TRUE; + + /* + * The MADV_WILLNEED operation doesn't require any changes to the + * vm_map_entry_t's, so the read lock is sufficient. + */ + + vm_map_lock_read(map); + + /* + * The madvise semantics require that the address range be fully + * allocated with no holes. Otherwise, we're required to return + * an error. + */ + + if (vm_map_range_check(map, start, end, &entry)) { + + /* + * Examine each vm_map_entry_t in the range. + */ + + for (; entry->vme_start < end; start += len, entry = entry->vme_next) { + + /* + * The first time through, the start address could be anywhere within the + * vm_map_entry we found. So adjust the offset to correspond. After that, + * the offset will always be zero to correspond to the beginning of the current + * vm_map_entry. + */ + + offset = (start - entry->vme_start) + entry->offset; + + /* + * Set the length so we don't go beyond the end of the map_entry or beyond the + * end of the range we were given. This range could span also multiple map + * entries all of which map different files, so make sure we only do the right + * amount of I/O for each object. Note that it's possible for there to be + * multiple map entries all referring to the same object but with different + * page permissions, but it's not worth trying to optimize that case. + */ + + len = MIN(entry->vme_end - start, end - start); + + if ((vm_size_t) len != len) { + /* 32-bit overflow */ + len = (vm_size_t) (0 - PAGE_SIZE); + } + fault_info.cluster_size = (vm_size_t) len; + fault_info.lo_offset = offset; + fault_info.hi_offset = offset + len; + fault_info.user_tag = entry->alias; + + /* + * If there's no read permission to this mapping, then just skip it. + */ + + if ((entry->protection & VM_PROT_READ) == 0) { + continue; + } + + /* + * Find the file object backing this map entry. If there is none, + * then we simply ignore the "will need" advice for this entry and + * go on to the next one. + */ + + if ((object = find_vnode_object(entry)) == VM_OBJECT_NULL) { + continue; + } + + vm_object_paging_begin(object); + pager = object->pager; + vm_object_unlock(object); + + /* + * Get the data from the object asynchronously. + * + * Note that memory_object_data_request() places limits on the amount + * of I/O it will do. Regardless of the len we specified, it won't do + * more than MAX_UPL_TRANSFER and it silently truncates the len to that + * size. This isn't necessarily bad since madvise shouldn't really be + * used to page in unlimited amounts of data. Other Unix variants limit + * the willneed case as well. If this turns out to be an issue for + * developers, then we can always adjust the policy here and still be + * backwards compatible since this is all just "advice". + */ + + kr = memory_object_data_request( + pager, + offset + object->paging_offset, + 0, /* ignored */ + VM_PROT_READ, + (memory_object_fault_info_t)&fault_info); + + vm_object_lock(object); + vm_object_paging_end(object); + vm_object_unlock(object); + + /* + * If we couldn't do the I/O for some reason, just give up on the + * madvise. We still return success to the user since madvise isn't + * supposed to fail when the advice can't be taken. + */ + + if (kr != KERN_SUCCESS) { + break; + } + } + + kr = KERN_SUCCESS; + } else + kr = KERN_INVALID_ADDRESS; + + vm_map_unlock_read(map); + return kr; +} + +static boolean_t +vm_map_entry_is_reusable( + vm_map_entry_t entry) +{ + vm_object_t object; + + if (entry->is_shared || + entry->is_sub_map || + entry->in_transition || + entry->protection != VM_PROT_DEFAULT || + entry->max_protection != VM_PROT_ALL || + entry->inheritance != VM_INHERIT_DEFAULT || + entry->no_cache || + entry->permanent || + entry->superpage_size != 0 || + entry->zero_wired_pages || + entry->wired_count != 0 || + entry->user_wired_count != 0) { + return FALSE; } - else { - vm_map_unlock(map); - return(KERN_INVALID_ADDRESS); + + object = entry->object.vm_object; + if (object == VM_OBJECT_NULL) { + return TRUE; + } + if (object->ref_count == 1 && + object->wired_page_count == 0 && + object->copy == VM_OBJECT_NULL && + object->shadow == VM_OBJECT_NULL && + object->copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC && + object->internal && + !object->true_share && + object->wimg_bits == VM_WIMG_DEFAULT && + !object->code_signed) { + return TRUE; } + return FALSE; + + +} - while ((entry != vm_map_to_entry(map)) && (entry->vme_start < end)) { - vm_map_clip_end(map, entry, end); - assert(!entry->use_pmap); +static kern_return_t +vm_map_reuse_pages( + vm_map_t map, + vm_map_offset_t start, + vm_map_offset_t end) +{ + vm_map_entry_t entry; + vm_object_t object; + vm_object_offset_t start_offset, end_offset; + + /* + * The MADV_REUSE operation doesn't require any changes to the + * vm_map_entry_t's, so the read lock is sufficient. + */ - entry->behavior = new_behavior; + vm_map_lock_read(map); - entry = entry->vme_next; + /* + * The madvise semantics require that the address range be fully + * allocated with no holes. Otherwise, we're required to return + * an error. + */ + + if (!vm_map_range_check(map, start, end, &entry)) { + vm_map_unlock_read(map); + vm_page_stats_reusable.reuse_pages_failure++; + return KERN_INVALID_ADDRESS; } - vm_map_unlock(map); - return(KERN_SUCCESS); + /* + * Examine each vm_map_entry_t in the range. + */ + for (; entry != vm_map_to_entry(map) && entry->vme_start < end; + entry = entry->vme_next) { + /* + * Sanity check on the VM map entry. + */ + if (! vm_map_entry_is_reusable(entry)) { + vm_map_unlock_read(map); + vm_page_stats_reusable.reuse_pages_failure++; + return KERN_INVALID_ADDRESS; + } + + /* + * The first time through, the start address could be anywhere + * within the vm_map_entry we found. So adjust the offset to + * correspond. + */ + if (entry->vme_start < start) { + start_offset = start - entry->vme_start; + } else { + start_offset = 0; + } + end_offset = MIN(end, entry->vme_end) - entry->vme_start; + start_offset += entry->offset; + end_offset += entry->offset; + + object = entry->object.vm_object; + if (object != VM_OBJECT_NULL) { + vm_object_lock(object); + vm_object_reuse_pages(object, start_offset, end_offset, + TRUE); + vm_object_unlock(object); + } + + if (entry->alias == VM_MEMORY_MALLOC_LARGE_REUSABLE) { + /* + * XXX + * We do not hold the VM map exclusively here. + * The "alias" field is not that critical, so it's + * safe to update it here, as long as it is the only + * one that can be modified while holding the VM map + * "shared". + */ + entry->alias = VM_MEMORY_MALLOC_LARGE_REUSED; + } + } + + vm_map_unlock_read(map); + vm_page_stats_reusable.reuse_pages_success++; + return KERN_SUCCESS; } +static kern_return_t +vm_map_reusable_pages( + vm_map_t map, + vm_map_offset_t start, + vm_map_offset_t end) +{ + vm_map_entry_t entry; + vm_object_t object; + vm_object_offset_t start_offset, end_offset; + + /* + * The MADV_REUSABLE operation doesn't require any changes to the + * vm_map_entry_t's, so the read lock is sufficient. + */ + + vm_map_lock_read(map); + + /* + * The madvise semantics require that the address range be fully + * allocated with no holes. Otherwise, we're required to return + * an error. + */ + + if (!vm_map_range_check(map, start, end, &entry)) { + vm_map_unlock_read(map); + vm_page_stats_reusable.reusable_pages_failure++; + return KERN_INVALID_ADDRESS; + } + + /* + * Examine each vm_map_entry_t in the range. + */ + for (; entry != vm_map_to_entry(map) && entry->vme_start < end; + entry = entry->vme_next) { + int kill_pages = 0; + + /* + * Sanity check on the VM map entry. + */ + if (! vm_map_entry_is_reusable(entry)) { + vm_map_unlock_read(map); + vm_page_stats_reusable.reusable_pages_failure++; + return KERN_INVALID_ADDRESS; + } + + /* + * The first time through, the start address could be anywhere + * within the vm_map_entry we found. So adjust the offset to + * correspond. + */ + if (entry->vme_start < start) { + start_offset = start - entry->vme_start; + } else { + start_offset = 0; + } + end_offset = MIN(end, entry->vme_end) - entry->vme_start; + start_offset += entry->offset; + end_offset += entry->offset; + + object = entry->object.vm_object; + if (object == VM_OBJECT_NULL) + continue; + + + vm_object_lock(object); + if (object->ref_count == 1 && !object->shadow) + kill_pages = 1; + else + kill_pages = -1; + if (kill_pages != -1) { + vm_object_deactivate_pages(object, + start_offset, + end_offset - start_offset, + kill_pages, + TRUE /*reusable_pages*/); + } else { + vm_page_stats_reusable.reusable_pages_shared++; + } + vm_object_unlock(object); + + if (entry->alias == VM_MEMORY_MALLOC_LARGE || + entry->alias == VM_MEMORY_MALLOC_LARGE_REUSED) { + /* + * XXX + * We do not hold the VM map exclusively here. + * The "alias" field is not that critical, so it's + * safe to update it here, as long as it is the only + * one that can be modified while holding the VM map + * "shared". + */ + entry->alias = VM_MEMORY_MALLOC_LARGE_REUSABLE; + } + } + + vm_map_unlock_read(map); + vm_page_stats_reusable.reusable_pages_success++; + return KERN_SUCCESS; +} + + +static kern_return_t +vm_map_can_reuse( + vm_map_t map, + vm_map_offset_t start, + vm_map_offset_t end) +{ + vm_map_entry_t entry; + + /* + * The MADV_REUSABLE operation doesn't require any changes to the + * vm_map_entry_t's, so the read lock is sufficient. + */ + + vm_map_lock_read(map); + + /* + * The madvise semantics require that the address range be fully + * allocated with no holes. Otherwise, we're required to return + * an error. + */ + + if (!vm_map_range_check(map, start, end, &entry)) { + vm_map_unlock_read(map); + vm_page_stats_reusable.can_reuse_failure++; + return KERN_INVALID_ADDRESS; + } + + /* + * Examine each vm_map_entry_t in the range. + */ + for (; entry != vm_map_to_entry(map) && entry->vme_start < end; + entry = entry->vme_next) { + /* + * Sanity check on the VM map entry. + */ + if (! vm_map_entry_is_reusable(entry)) { + vm_map_unlock_read(map); + vm_page_stats_reusable.can_reuse_failure++; + return KERN_INVALID_ADDRESS; + } + } + + vm_map_unlock_read(map); + vm_page_stats_reusable.can_reuse_success++; + return KERN_SUCCESS; +} + + + #include #if MACH_KDB #include @@ -9944,7 +10754,9 @@ vm_map_entry_insert( vm_behavior_t behavior, vm_inherit_t inheritance, unsigned wired_count, - boolean_t no_cache) + boolean_t no_cache, + boolean_t permanent, + unsigned int superpage_size) { vm_map_entry_t new_entry; @@ -9972,7 +10784,10 @@ vm_map_entry_insert( new_entry->user_wired_count = 0; new_entry->use_pmap = FALSE; new_entry->alias = 0; + new_entry->zero_wired_pages = FALSE; new_entry->no_cache = no_cache; + new_entry->permanent = permanent; + new_entry->superpage_size = superpage_size; /* * Insert the new entry into the list. @@ -10070,11 +10885,6 @@ vm_map_remap_extract( break; } - if(src_entry->is_sub_map) { - result = KERN_INVALID_ADDRESS; - break; - } - tmp_size = size - mapped_size; if (src_end > src_entry->vme_end) tmp_size -= (src_end - src_entry->vme_end); @@ -10774,22 +11584,28 @@ vm_map_purgable_control( return(KERN_INVALID_ARGUMENT); if (control != VM_PURGABLE_SET_STATE && - control != VM_PURGABLE_GET_STATE) + control != VM_PURGABLE_GET_STATE && + control != VM_PURGABLE_PURGE_ALL) return(KERN_INVALID_ARGUMENT); + if (control == VM_PURGABLE_PURGE_ALL) { + vm_purgeable_object_purge_all(); + return KERN_SUCCESS; + } + if (control == VM_PURGABLE_SET_STATE && - (((*state & ~(VM_PURGABLE_STATE_MASK|VM_VOLATILE_ORDER_MASK|VM_PURGABLE_ORDERING_MASK|VM_PURGABLE_BEHAVIOR_MASK|VM_VOLATILE_GROUP_MASK)) != 0) || + (((*state & ~(VM_PURGABLE_ALL_MASKS)) != 0) || ((*state & VM_PURGABLE_STATE_MASK) > VM_PURGABLE_STATE_MASK))) return(KERN_INVALID_ARGUMENT); - vm_map_lock(map); + vm_map_lock_read(map); if (!vm_map_lookup_entry(map, address, &entry) || entry->is_sub_map) { /* * Must pass a valid non-submap address. */ - vm_map_unlock(map); + vm_map_unlock_read(map); return(KERN_INVALID_ADDRESS); } @@ -10797,7 +11613,7 @@ vm_map_purgable_control( /* * Can't apply purgable controls to something you can't write. */ - vm_map_unlock(map); + vm_map_unlock_read(map); return(KERN_PROTECTION_FAILURE); } @@ -10806,7 +11622,7 @@ vm_map_purgable_control( /* * Object must already be present or it can't be purgable. */ - vm_map_unlock(map); + vm_map_unlock_read(map); return KERN_INVALID_ARGUMENT; } @@ -10818,12 +11634,12 @@ vm_map_purgable_control( * Can only apply purgable controls to the whole (existing) * object at once. */ - vm_map_unlock(map); + vm_map_unlock_read(map); vm_object_unlock(object); return KERN_INVALID_ARGUMENT; } - vm_map_unlock(map); + vm_map_unlock_read(map); kr = vm_object_purgable_control(object, control, state); @@ -10833,100 +11649,173 @@ vm_map_purgable_control( } kern_return_t -vm_map_page_info( +vm_map_page_query_internal( vm_map_t target_map, vm_map_offset_t offset, int *disposition, int *ref_count) { - vm_map_entry_t map_entry; - vm_object_t object; - vm_page_t m; - kern_return_t kr; - kern_return_t retval = KERN_SUCCESS; - boolean_t top_object = TRUE; - - *disposition = 0; - *ref_count = 0; + kern_return_t kr; + vm_page_info_basic_data_t info; + mach_msg_type_number_t count; + + count = VM_PAGE_INFO_BASIC_COUNT; + kr = vm_map_page_info(target_map, + offset, + VM_PAGE_INFO_BASIC, + (vm_page_info_t) &info, + &count); + if (kr == KERN_SUCCESS) { + *disposition = info.disposition; + *ref_count = info.ref_count; + } else { + *disposition = 0; + *ref_count = 0; + } - vm_map_lock_read(target_map); + return kr; +} + +kern_return_t +vm_map_page_info( + vm_map_t map, + vm_map_offset_t offset, + vm_page_info_flavor_t flavor, + vm_page_info_t info, + mach_msg_type_number_t *count) +{ + vm_map_entry_t map_entry; + vm_object_t object; + vm_page_t m; + kern_return_t kr; + kern_return_t retval = KERN_SUCCESS; + boolean_t top_object; + int disposition; + int ref_count; + vm_object_id_t object_id; + vm_page_info_basic_t basic_info; + int depth; -restart_page_query: - if (!vm_map_lookup_entry(target_map, offset, &map_entry)) { - vm_map_unlock_read(target_map); - return KERN_FAILURE; + switch (flavor) { + case VM_PAGE_INFO_BASIC: + if (*count != VM_PAGE_INFO_BASIC_COUNT) { + return KERN_INVALID_ARGUMENT; + } + break; + default: + return KERN_INVALID_ARGUMENT; } - offset -= map_entry->vme_start; /* adjust to offset within entry */ - offset += map_entry->offset; /* adjust to target object offset */ - if (map_entry->object.vm_object != VM_OBJECT_NULL) { - if (!map_entry->is_sub_map) { - object = map_entry->object.vm_object; - } else { - vm_map_t sub_map; + disposition = 0; + ref_count = 0; + object_id = 0; + top_object = TRUE; + depth = 0; + + retval = KERN_SUCCESS; + offset = vm_map_trunc_page(offset); + + vm_map_lock_read(map); + + /* + * First, find the map entry covering "offset", going down + * submaps if necessary. + */ + for (;;) { + if (!vm_map_lookup_entry(map, offset, &map_entry)) { + vm_map_unlock_read(map); + return KERN_INVALID_ADDRESS; + } + /* compute offset from this map entry's start */ + offset -= map_entry->vme_start; + /* compute offset into this map entry's object (or submap) */ + offset += map_entry->offset; + + if (map_entry->is_sub_map) { + vm_map_t sub_map; sub_map = map_entry->object.sub_map; vm_map_lock_read(sub_map); - vm_map_unlock_read(target_map); + vm_map_unlock_read(map); - target_map = sub_map; - goto restart_page_query; + map = sub_map; + + ref_count = MAX(ref_count, map->ref_count); + continue; } - } else { - vm_map_unlock_read(target_map); - return KERN_SUCCESS; + break; } + + object = map_entry->object.vm_object; + if (object == VM_OBJECT_NULL) { + /* no object -> no page */ + vm_map_unlock_read(map); + goto done; + } + vm_object_lock(object); - vm_map_unlock_read(target_map); + vm_map_unlock_read(map); + + /* + * Go down the VM object shadow chain until we find the page + * we're looking for. + */ + for (;;) { + ref_count = MAX(ref_count, object->ref_count); - while (TRUE) { m = vm_page_lookup(object, offset); if (m != VM_PAGE_NULL) { - *disposition |= VM_PAGE_QUERY_PAGE_PRESENT; + disposition |= VM_PAGE_QUERY_PAGE_PRESENT; break; } else { #if MACH_PAGEMAP if (object->existence_map) { - if (vm_external_state_get(object->existence_map, offset) - == VM_EXTERNAL_STATE_EXISTS) { + if (vm_external_state_get(object->existence_map, + offset) == + VM_EXTERNAL_STATE_EXISTS) { /* * this page has been paged out */ - *disposition |= VM_PAGE_QUERY_PAGE_PAGED_OUT; + disposition |= VM_PAGE_QUERY_PAGE_PAGED_OUT; break; } } else #endif + { if (object->internal && - object->alive && - !object->terminating && - object->pager_ready) { - - memory_object_t pager; - - vm_object_paging_begin(object); - pager = object->pager; - vm_object_unlock(object); + object->alive && + !object->terminating && + object->pager_ready) { - kr = memory_object_data_request( - pager, - offset + object->paging_offset, - 0, /* just poke the pager */ - VM_PROT_READ, - NULL); + memory_object_t pager; - vm_object_lock(object); - vm_object_paging_end(object); + vm_object_paging_begin(object); + pager = object->pager; + vm_object_unlock(object); - if (kr == KERN_SUCCESS) { /* - * the pager has this page + * Ask the default pager if + * it has this page. */ - *disposition |= VM_PAGE_QUERY_PAGE_PAGED_OUT; - break; + kr = memory_object_data_request( + pager, + offset + object->paging_offset, + 0, /* just poke the pager */ + VM_PROT_READ, + NULL); + + vm_object_lock(object); + vm_object_paging_end(object); + + if (kr == KERN_SUCCESS) { + /* the default pager has it */ + disposition |= VM_PAGE_QUERY_PAGE_PAGED_OUT; + break; + } } } + if (object->shadow != VM_OBJECT_NULL) { vm_object_t shadow; @@ -10938,12 +11827,13 @@ vm_map_page_info( object = shadow; top_object = FALSE; + depth++; } else { - if (!object->internal) - break; - - retval = KERN_FAILURE; - goto page_query_done; +// if (!object->internal) +// break; +// retval = KERN_FAILURE; +// goto done_with_object; + break; } } } @@ -10957,34 +11847,47 @@ vm_map_page_info( /* but this would under count as only faulted-in mappings would */ /* show up. */ - *ref_count = object->ref_count; - if (top_object == TRUE && object->shadow) - *disposition |= VM_PAGE_QUERY_PAGE_COPIED; + disposition |= VM_PAGE_QUERY_PAGE_COPIED; + + if (! object->internal) + disposition |= VM_PAGE_QUERY_PAGE_EXTERNAL; if (m == VM_PAGE_NULL) - goto page_query_done; + goto done_with_object; if (m->fictitious) { - *disposition |= VM_PAGE_QUERY_PAGE_FICTITIOUS; - goto page_query_done; + disposition |= VM_PAGE_QUERY_PAGE_FICTITIOUS; + goto done_with_object; } if (m->dirty || pmap_is_modified(m->phys_page)) - *disposition |= VM_PAGE_QUERY_PAGE_DIRTY; + disposition |= VM_PAGE_QUERY_PAGE_DIRTY; if (m->reference || pmap_is_referenced(m->phys_page)) - *disposition |= VM_PAGE_QUERY_PAGE_REF; + disposition |= VM_PAGE_QUERY_PAGE_REF; if (m->speculative) - *disposition |= VM_PAGE_QUERY_PAGE_SPECULATIVE; + disposition |= VM_PAGE_QUERY_PAGE_SPECULATIVE; if (m->cs_validated) - *disposition |= VM_PAGE_QUERY_PAGE_CS_VALIDATED; + disposition |= VM_PAGE_QUERY_PAGE_CS_VALIDATED; if (m->cs_tainted) - *disposition |= VM_PAGE_QUERY_PAGE_CS_TAINTED; + disposition |= VM_PAGE_QUERY_PAGE_CS_TAINTED; -page_query_done: +done_with_object: vm_object_unlock(object); +done: + + switch (flavor) { + case VM_PAGE_INFO_BASIC: + basic_info = (vm_page_info_basic_t) info; + basic_info->disposition = disposition; + basic_info->ref_count = ref_count; + basic_info->object_id = (vm_object_id_t) (uintptr_t) object; + basic_info->offset = (memory_object_offset_t) offset; + basic_info->depth = depth; + break; + } return retval; } @@ -11041,7 +11944,6 @@ vm_map_msync( vm_map_size_t amount_left; vm_object_offset_t offset; boolean_t do_sync_req; - boolean_t modifiable; boolean_t had_hole = FALSE; memory_object_t pager; @@ -11151,23 +12053,22 @@ vm_map_msync( continue; } offset += entry->offset; - modifiable = (entry->protection & VM_PROT_WRITE) - != VM_PROT_NONE; vm_object_lock(object); if (sync_flags & (VM_SYNC_KILLPAGES | VM_SYNC_DEACTIVATE)) { - boolean_t kill_pages = 0; + int kill_pages = 0; + boolean_t reusable_pages = FALSE; if (sync_flags & VM_SYNC_KILLPAGES) { - if (object->ref_count == 1 && !entry->needs_copy && !object->shadow) + if (object->ref_count == 1 && !object->shadow) kill_pages = 1; else kill_pages = -1; } if (kill_pages != -1) vm_object_deactivate_pages(object, offset, - (vm_object_size_t)flush_size, kill_pages); + (vm_object_size_t)flush_size, kill_pages, reusable_pages); vm_object_unlock(object); vm_map_unlock(map); continue; @@ -11195,15 +12096,14 @@ vm_map_msync( offset, flush_size, sync_flags & VM_SYNC_INVALIDATE, - (modifiable && - (sync_flags & VM_SYNC_SYNCHRONOUS || - sync_flags & VM_SYNC_ASYNCHRONOUS)), + ((sync_flags & VM_SYNC_SYNCHRONOUS) || + (sync_flags & VM_SYNC_ASYNCHRONOUS)), sync_flags & VM_SYNC_SYNCHRONOUS); /* * only send a m_o_s if we returned pages or if the entry * is writable (ie dirty pages may have already been sent back) */ - if (!do_sync_req && !modifiable) { + if (!do_sync_req) { if ((sync_flags & VM_SYNC_INVALIDATE) && object->resident_page_count == 0) { /* * clear out the clustering and read-ahead hints @@ -11338,7 +12238,7 @@ convert_port_entry_to_map( == IKOT_NAMED_ENTRY)) { named_entry = (vm_named_entry_t)port->ip_kobject; - if (!(mutex_try(&(named_entry)->Lock))) { + if (!(lck_mtx_try_lock(&(named_entry)->Lock))) { ip_unlock(port); try_failed_count++; @@ -11346,7 +12246,7 @@ convert_port_entry_to_map( continue; } named_entry->ref_count++; - mutex_unlock(&(named_entry)->Lock); + lck_mtx_unlock(&(named_entry)->Lock); ip_unlock(port); if ((named_entry->is_sub_map) && (named_entry->protection @@ -11396,7 +12296,7 @@ convert_port_entry_to_object( == IKOT_NAMED_ENTRY)) { named_entry = (vm_named_entry_t)port->ip_kobject; - if (!(mutex_try(&(named_entry)->Lock))) { + if (!(lck_mtx_try_lock(&(named_entry)->Lock))) { ip_unlock(port); try_failed_count++; @@ -11404,7 +12304,7 @@ convert_port_entry_to_object( continue; } named_entry->ref_count++; - mutex_unlock(&(named_entry)->Lock); + lck_mtx_unlock(&(named_entry)->Lock); ip_unlock(port); if ((!named_entry->is_sub_map) && (!named_entry->is_pager) && @@ -11455,14 +12355,14 @@ vm_map_reference( if (map == VM_MAP_NULL) return; - mutex_lock(&map->s_lock); + lck_mtx_lock(&map->s_lock); #if TASK_SWAPPER assert(map->res_count > 0); assert(map->ref_count >= map->res_count); map->res_count++; #endif map->ref_count++; - mutex_unlock(&map->s_lock); + lck_mtx_unlock(&map->s_lock); } /* @@ -11481,15 +12381,15 @@ vm_map_deallocate( if (map == VM_MAP_NULL) return; - mutex_lock(&map->s_lock); + lck_mtx_lock(&map->s_lock); ref = --map->ref_count; if (ref > 0) { vm_map_res_deallocate(map); - mutex_unlock(&map->s_lock); + lck_mtx_unlock(&map->s_lock); return; } assert(map->ref_count == 0); - mutex_unlock(&map->s_lock); + lck_mtx_unlock(&map->s_lock); #if TASK_SWAPPER /* @@ -11564,13 +12464,22 @@ vm_map_has_4GB_pagezero( void vm_map_set_4GB_pagezero(vm_map_t map) { +#ifdef __i386__ pmap_set_4GB_pagezero(map->pmap); +#else +#pragma unused(map) +#endif + } void vm_map_clear_4GB_pagezero(vm_map_t map) { +#ifdef __i386__ pmap_clear_4GB_pagezero(map->pmap); +#else +#pragma unused(map) +#endif } /* @@ -11630,10 +12539,11 @@ vm_map_set_user_wire_limit(vm_map_t map, map->user_wire_limit = limit; } -void vm_map_set_prot_copy_allow(vm_map_t map, - boolean_t allow) + +void vm_map_switch_protect(vm_map_t map, + boolean_t val) { vm_map_lock(map); - map->prot_copy_allow = allow; + map->switch_protect=val; vm_map_unlock(map); -}; +} diff --git a/osfmk/vm/vm_map.h b/osfmk/vm/vm_map.h index 423930b97..f520087ed 100644 --- a/osfmk/vm/vm_map.h +++ b/osfmk/vm/vm_map.h @@ -139,9 +139,9 @@ typedef union vm_map_object { vm_map_t sub_map; /* belongs to another map */ } vm_map_object_t; -#define named_entry_lock_init(object) mutex_init(&(object)->Lock, 0) -#define named_entry_lock(object) mutex_lock(&(object)->Lock) -#define named_entry_unlock(object) mutex_unlock(&(object)->Lock) +#define named_entry_lock_init(object) lck_mtx_init(&(object)->Lock, &vm_object_lck_grp, &vm_object_lck_attr) +#define named_entry_lock(object) lck_mtx_lock(&(object)->Lock) +#define named_entry_unlock(object) lck_mtx_unlock(&(object)->Lock) /* * Type: vm_named_entry_t [internal use only] @@ -162,7 +162,7 @@ typedef union vm_map_object { */ struct vm_named_entry { - decl_mutex_data(, Lock) /* Synchronization */ + decl_lck_mtx_data(, Lock) /* Synchronization */ union { vm_object_t object; /* object I point to */ memory_object_t pager; /* amo pager port */ @@ -219,13 +219,31 @@ struct vm_map_entry { /* vm_prot_t */ max_protection:3,/* maximum protection */ /* vm_inherit_t */ inheritance:2, /* inheritance */ /* boolean_t */ use_pmap:1, /* nested pmaps */ + /* + * IMPORTANT: + * The "alias" field can be updated while holding the VM map lock + * "shared". It's OK as along as it's the only field that can be + * updated without the VM map "exclusive" lock. + */ /* unsigned char */ alias:8, /* user alias */ /* boolean_t */ no_cache:1, /* should new pages be cached? */ - /* unsigned char */ pad:7; /* available bits */ + /* boolean_t */ permanent:1, /* mapping can not be removed */ + /* boolean_t */ superpage_size:3,/* use superpages of a certain size */ + /* boolean_t */ zero_wired_pages:1, /* zero out the wired pages of this entry it is being deleted without unwiring them */ + /* unsigned char */ pad:2; /* available bits */ unsigned short wired_count; /* can be paged if = 0 */ unsigned short user_wired_count; /* for vm_wire */ }; +/* + * Convenience macros for dealing with superpages + * SUPERPAGE_NBASEPAGES is architecture dependent and defined in pmap.h + */ +#define SUPERPAGE_SIZE (PAGE_SIZE*SUPERPAGE_NBASEPAGES) +#define SUPERPAGE_MASK (-SUPERPAGE_SIZE) +#define SUPERPAGE_ROUND_DOWN(a) (a & SUPERPAGE_MASK) +#define SUPERPAGE_ROUND_UP(a) ((a + SUPERPAGE_SIZE-1) & SUPERPAGE_MASK) + /* * wired_counts are unsigned short. This value is used to safeguard * against any mishaps due to runaway user programs. @@ -276,7 +294,8 @@ struct _vm_map { int res_count; /* Residence count (swap) */ int sw_state; /* Swap state */ #endif /* TASK_SWAPPER */ - decl_mutex_data(, s_lock) /* Lock ref, res fields */ + decl_lck_mtx_data(, s_lock) /* Lock ref, res fields */ + lck_mtx_ext_t s_lock_ext; vm_map_entry_t hint; /* hint for quick lookups */ vm_map_entry_t first_free; /* First free space hint */ boolean_t wait_for_space; /* Should callers wait @@ -284,7 +303,7 @@ struct _vm_map { boolean_t wiring_required;/* All memory wired? */ boolean_t no_zero_fill; /* No zero fill absent pages */ boolean_t mapped; /* has this map been mapped */ - boolean_t prot_copy_allow;/* is VM_PROT_COPY allowed on this map */ + boolean_t switch_protect; /* Protect map from write faults while switched */ unsigned int timestamp; /* Version number */ unsigned int color_rr; /* next color (not protected by a lock) */ } ; @@ -469,7 +488,9 @@ extern vm_map_entry_t vm_map_entry_insert( vm_behavior_t behavior, vm_inherit_t inheritance, unsigned wired_count, - boolean_t no_cache); + boolean_t no_cache, + boolean_t permanent, + unsigned int superpage_size); /* @@ -516,10 +537,10 @@ extern void vm_map_reference_swap( MACRO_BEGIN \ vm_map_t Map = (map); \ if (Map) { \ - mutex_lock(&Map->s_lock); \ + lck_mtx_lock(&Map->s_lock); \ Map->res_count++; \ Map->ref_count++; \ - mutex_unlock(&Map->s_lock); \ + lck_mtx_unlock(&Map->s_lock); \ } \ MACRO_END @@ -527,10 +548,10 @@ MACRO_END MACRO_BEGIN \ vm_map_t Lmap = (map); \ if (Lmap->res_count == 0) { \ - mutex_unlock(&Lmap->s_lock);\ + lck_mtx_unlock(&Lmap->s_lock);\ vm_map_lock(Lmap); \ vm_map_swapin(Lmap); \ - mutex_lock(&Lmap->s_lock); \ + lck_mtx_lock(&Lmap->s_lock); \ ++Lmap->res_count; \ vm_map_unlock(Lmap); \ } else \ @@ -541,21 +562,21 @@ MACRO_END MACRO_BEGIN \ vm_map_t Map = (map); \ if (--Map->res_count == 0) { \ - mutex_unlock(&Map->s_lock); \ + lck_mtx_unlock(&Map->s_lock); \ vm_map_lock(Map); \ vm_map_swapout(Map); \ vm_map_unlock(Map); \ - mutex_lock(&Map->s_lock); \ + lck_mtx_lock(&Map->s_lock); \ } \ MACRO_END #define vm_map_reference_swap(map) \ MACRO_BEGIN \ vm_map_t Map = (map); \ - mutex_lock(&Map->s_lock); \ + lck_mtx_lock(&Map->s_lock); \ ++Map->ref_count; \ vm_map_res_reference(Map); \ - mutex_unlock(&Map->s_lock); \ + lck_mtx_unlock(&Map->s_lock); \ MACRO_END #endif /* MACH_ASSERT */ @@ -571,9 +592,9 @@ extern void vm_map_swapout( MACRO_BEGIN \ vm_map_t Map = (map); \ if (Map) { \ - mutex_lock(&Map->s_lock); \ + lck_mtx_lock(&Map->s_lock); \ Map->ref_count++; \ - mutex_unlock(&Map->s_lock); \ + lck_mtx_unlock(&Map->s_lock); \ } \ MACRO_END @@ -604,21 +625,21 @@ extern vm_object_t vm_submap_object; #define vm_map_ref_fast(map) \ MACRO_BEGIN \ - mutex_lock(&map->s_lock); \ + lck_mtx_lock(&map->s_lock); \ map->ref_count++; \ vm_map_res_reference(map); \ - mutex_unlock(&map->s_lock); \ + lck_mtx_unlock(&map->s_lock); \ MACRO_END #define vm_map_dealloc_fast(map) \ MACRO_BEGIN \ register int c; \ \ - mutex_lock(&map->s_lock); \ + lck_mtx_lock(&map->s_lock); \ c = --map->ref_count; \ if (c > 0) \ vm_map_res_deallocate(map); \ - mutex_unlock(&map->s_lock); \ + lck_mtx_unlock(&map->s_lock); \ if (c == 0) \ vm_map_destroy(map); \ MACRO_END @@ -748,12 +769,13 @@ extern kern_return_t vm_map_region_recurse_64( vm_region_submap_info_64_t info, mach_msg_type_number_t *count); -extern kern_return_t vm_map_page_info( +extern kern_return_t vm_map_page_query_internal( vm_map_t map, vm_map_offset_t offset, int *disposition, int *ref_count); + extern kern_return_t vm_map_submap( vm_map_t map, vm_map_offset_t start, @@ -852,6 +874,20 @@ extern kern_return_t vm_map_enter_mem_object( vm_prot_t max_protection, vm_inherit_t inheritance); +/* Enter a mapping of a memory object */ +extern kern_return_t vm_map_enter_mem_object_control( + vm_map_t map, + vm_map_offset_t *address, + vm_map_size_t size, + vm_map_offset_t mask, + int flags, + memory_object_control_t control, + vm_object_offset_t offset, + boolean_t needs_copy, + vm_prot_t cur_protection, + vm_prot_t max_protection, + vm_inherit_t inheritance); + /* Deallocate a region */ extern kern_return_t vm_map_remove( vm_map_t map, @@ -868,7 +904,7 @@ extern kern_return_t vm_map_copy_overwrite( vm_map_t dst_map, vm_map_address_t dst_addr, vm_map_copy_t copy, - int interruptible); + boolean_t interruptible); /* Place a copy into a map */ extern kern_return_t vm_map_copyout( @@ -924,9 +960,19 @@ extern void vm_map_set_user_wire_limit( vm_map_t map, vm_size_t limit); -extern void vm_map_set_prot_copy_allow( - vm_map_t map, - boolean_t allow); +extern void vm_map_switch_protect( + vm_map_t map, + boolean_t val); + +#ifdef XNU_KERNEL_PRIVATE +extern kern_return_t vm_map_page_info( + vm_map_t map, + vm_map_offset_t offset, + vm_page_info_flavor_t flavor, + vm_page_info_t info, + mach_msg_type_number_t *count); +#endif /* XNU_KERNEL_PRIVATE */ + #ifdef MACH_KERNEL_PRIVATE @@ -971,12 +1017,12 @@ extern void vm_map_set_prot_copy_allow( extern kern_return_t vm_map_get_upl( vm_map_t target_map, vm_map_offset_t map_offset, - vm_size_t *size, + upl_size_t *size, upl_t *upl, upl_page_info_array_t page_info, - mach_msg_type_number_t *page_infoCnt, - integer_t *flags, - integer_t force_data_sync); + unsigned int *page_infoCnt, + int *flags, + int force_data_sync); __END_DECLS diff --git a/osfmk/vm/vm_object.c b/osfmk/vm/vm_object.c index d290fa801..f49238416 100644 --- a/osfmk/vm/vm_object.c +++ b/osfmk/vm/vm_object.c @@ -95,6 +95,10 @@ #include #include +#if CONFIG_EMBEDDED +#include +#endif + /* * Virtual memory objects maintain the actual data * associated with allocated virtual memory. A given @@ -175,12 +179,6 @@ static kern_return_t vm_object_terminate( extern void vm_object_remove( vm_object_t object); -static vm_object_t vm_object_cache_trim( - boolean_t called_from_vm_object_deallocate); - -static void vm_object_deactivate_all_pages( - vm_object_t object); - static kern_return_t vm_object_copy_call( vm_object_t src_object, vm_object_offset_t src_offset, @@ -196,7 +194,8 @@ static void vm_object_do_bypass( vm_object_t backing_object); static void vm_object_release_pager( - memory_object_t pager); + memory_object_t pager, + boolean_t hashed); static zone_t vm_object_zone; /* vm backing store zone */ @@ -226,6 +225,11 @@ static struct vm_object vm_submap_object_store; */ static struct vm_object vm_object_template; +unsigned int vm_page_purged_wired = 0; +unsigned int vm_page_purged_busy = 0; +unsigned int vm_page_purged_others = 0; + +#if VM_OBJECT_CACHE /* * Virtual memory objects that are not referenced by * any address maps, but that are allowed to persist @@ -254,23 +258,41 @@ static struct vm_object vm_object_template; * from the reference mechanism, so that the lock need * not be held to make simple references. */ +static vm_object_t vm_object_cache_trim( + boolean_t called_from_vm_object_deallocate); + static queue_head_t vm_object_cached_list; static int vm_object_cached_count=0; static int vm_object_cached_high; /* highest # cached objects */ static int vm_object_cached_max = 512; /* may be patched*/ -static decl_mutex_data(,vm_object_cached_lock_data) +static lck_mtx_t vm_object_cached_lock_data; +static lck_mtx_ext_t vm_object_cached_lock_data_ext; #define vm_object_cache_lock() \ - mutex_lock(&vm_object_cached_lock_data) -#define vm_object_cache_lock_try() \ - mutex_try(&vm_object_cached_lock_data) + lck_mtx_lock(&vm_object_cached_lock_data) +#define vm_object_cache_lock_try() \ + lck_mtx_try_lock(&vm_object_cached_lock_data) +#define vm_object_cache_lock_spin() \ + lck_mtx_lock_spin(&vm_object_cached_lock_data) #define vm_object_cache_unlock() \ - mutex_unlock(&vm_object_cached_lock_data) + lck_mtx_unlock(&vm_object_cached_lock_data) + +#endif /* VM_OBJECT_CACHE */ + + +static void vm_object_deactivate_all_pages( + vm_object_t object); + #define VM_OBJECT_HASH_COUNT 1024 +#define VM_OBJECT_HASH_LOCK_COUNT 512 + +static lck_mtx_t vm_object_hashed_lock_data[VM_OBJECT_HASH_COUNT]; +static lck_mtx_ext_t vm_object_hashed_lock_data_ext[VM_OBJECT_HASH_COUNT]; + static queue_head_t vm_object_hashtable[VM_OBJECT_HASH_COUNT]; -static struct zone *vm_object_hash_zone; +static struct zone *vm_object_hash_zone; struct vm_object_hash_entry { queue_chain_t hash_link; /* hash chain link */ @@ -283,9 +305,12 @@ struct vm_object_hash_entry { typedef struct vm_object_hash_entry *vm_object_hash_entry_t; #define VM_OBJECT_HASH_ENTRY_NULL ((vm_object_hash_entry_t) 0) -#define VM_OBJECT_HASH_SHIFT 8 +#define VM_OBJECT_HASH_SHIFT 5 #define vm_object_hash(pager) \ - ((((unsigned)pager) >> VM_OBJECT_HASH_SHIFT) % VM_OBJECT_HASH_COUNT) + ((int)((((uintptr_t)pager) >> VM_OBJECT_HASH_SHIFT) % VM_OBJECT_HASH_COUNT)) + +#define vm_object_lock_hash(pager) \ + ((int)((((uintptr_t)pager) >> VM_OBJECT_HASH_SHIFT) % VM_OBJECT_HASH_LOCK_COUNT)) void vm_object_hash_entry_free( vm_object_hash_entry_t entry); @@ -293,38 +318,68 @@ void vm_object_hash_entry_free( static void vm_object_reap(vm_object_t object); static void vm_object_reap_async(vm_object_t object); static void vm_object_reaper_thread(void); -static queue_head_t vm_object_reaper_queue; /* protected by vm_object_cache_lock() */ + +static lck_mtx_t vm_object_reaper_lock_data; +static lck_mtx_ext_t vm_object_reaper_lock_data_ext; + +static queue_head_t vm_object_reaper_queue; /* protected by vm_object_reaper_lock() */ unsigned int vm_object_reap_count = 0; unsigned int vm_object_reap_count_async = 0; +#define vm_object_reaper_lock() \ + lck_mtx_lock(&vm_object_reaper_lock_data) +#define vm_object_reaper_lock_spin() \ + lck_mtx_lock_spin(&vm_object_reaper_lock_data) +#define vm_object_reaper_unlock() \ + lck_mtx_unlock(&vm_object_reaper_lock_data) + + + +static lck_mtx_t * +vm_object_hash_lock_spin( + memory_object_t pager) +{ + int index; + + index = vm_object_lock_hash(pager); + + lck_mtx_lock_spin(&vm_object_hashed_lock_data[index]); + + return (&vm_object_hashed_lock_data[index]); +} + +static void +vm_object_hash_unlock(lck_mtx_t *lck) +{ + lck_mtx_unlock(lck); +} + + /* * vm_object_hash_lookup looks up a pager in the hashtable * and returns the corresponding entry, with optional removal. */ - static vm_object_hash_entry_t vm_object_hash_lookup( memory_object_t pager, boolean_t remove_entry) { - register queue_t bucket; - register vm_object_hash_entry_t entry; + queue_t bucket; + vm_object_hash_entry_t entry; bucket = &vm_object_hashtable[vm_object_hash(pager)]; entry = (vm_object_hash_entry_t)queue_first(bucket); while (!queue_end(bucket, (queue_entry_t)entry)) { - if (entry->pager == pager && !remove_entry) - return(entry); - else if (entry->pager == pager) { - queue_remove(bucket, entry, - vm_object_hash_entry_t, hash_link); + if (entry->pager == pager) { + if (remove_entry) { + queue_remove(bucket, entry, + vm_object_hash_entry_t, hash_link); + } return(entry); } - entry = (vm_object_hash_entry_t)queue_next(&entry->hash_link); } - return(VM_OBJECT_HASH_ENTRY_NULL); } @@ -335,13 +390,17 @@ vm_object_hash_lookup( static void vm_object_hash_insert( - vm_object_hash_entry_t entry) + vm_object_hash_entry_t entry, + vm_object_t object) { - register queue_t bucket; + queue_t bucket; bucket = &vm_object_hashtable[vm_object_hash(entry->pager)]; queue_enter(bucket, entry, vm_object_hash_entry_t, hash_link); + + entry->object = object; + object->hashed = TRUE; } static vm_object_hash_entry_t @@ -378,12 +437,12 @@ _vm_object_allocate( { XPR(XPR_VM_OBJECT, "vm_object_allocate, object 0x%X size 0x%X\n", - (integer_t)object, size, 0,0,0); + object, size, 0,0,0); *object = vm_object_template; queue_init(&object->memq); queue_init(&object->msr_q); -#ifdef UPL_DEBUG +#if UPL_DEBUG queue_init(&object->uplq); #endif /* UPL_DEBUG */ vm_object_lock_init(object); @@ -407,10 +466,10 @@ vm_object_allocate( } -lck_grp_t vm_object_lck_grp; +lck_grp_t vm_object_lck_grp; lck_grp_attr_t vm_object_lck_grp_attr; -lck_attr_t vm_object_lck_attr; -lck_attr_t kernel_object_lck_attr; +lck_attr_t vm_object_lck_attr; +lck_attr_t kernel_object_lck_attr; /* * vm_object_bootstrap: @@ -423,24 +482,42 @@ vm_object_bootstrap(void) register int i; vm_object_zone = zinit((vm_size_t) sizeof(struct vm_object), - round_page_32(512*1024), - round_page_32(12*1024), + round_page(512*1024), + round_page(12*1024), "vm objects"); - queue_init(&vm_object_reaper_queue); + vm_object_init_lck_grp(); + +#if VM_OBJECT_CACHE queue_init(&vm_object_cached_list); - mutex_init(&vm_object_cached_lock_data, 0); + + lck_mtx_init_ext(&vm_object_cached_lock_data, + &vm_object_cached_lock_data_ext, + &vm_object_lck_grp, + &vm_object_lck_attr); +#endif + queue_init(&vm_object_reaper_queue); + + for (i = 0; i < VM_OBJECT_HASH_LOCK_COUNT; i++) { + lck_mtx_init_ext(&vm_object_hashed_lock_data[i], + &vm_object_hashed_lock_data_ext[i], + &vm_object_lck_grp, + &vm_object_lck_attr); + } + lck_mtx_init_ext(&vm_object_reaper_lock_data, + &vm_object_reaper_lock_data_ext, + &vm_object_lck_grp, + &vm_object_lck_attr); vm_object_hash_zone = zinit((vm_size_t) sizeof (struct vm_object_hash_entry), - round_page_32(512*1024), - round_page_32(12*1024), + round_page(512*1024), + round_page(12*1024), "vm object hash entries"); for (i = 0; i < VM_OBJECT_HASH_COUNT; i++) queue_init(&vm_object_hashtable[i]); - vm_object_init_lck_grp(); /* * Fill in a template object, for quick initialization @@ -453,7 +530,7 @@ vm_object_bootstrap(void) /* * We can't call vm_object_lock_init() here because that will * allocate some memory and VM is not fully initialized yet. - * The lock will be initialized for each allocate object in + * The lock will be initialized for each allocated object in * _vm_object_allocate(), so we don't need to initialize it in * the vm_object_template. */ @@ -466,6 +543,8 @@ vm_object_bootstrap(void) vm_object_template.res_count = 1; #endif /* TASK_SWAPPER */ vm_object_template.resident_page_count = 0; + vm_object_template.wired_page_count = 0; + vm_object_template.reusable_page_count = 0; vm_object_template.copy = VM_OBJECT_NULL; vm_object_template.shadow = VM_OBJECT_NULL; vm_object_template.shadow_offset = (vm_object_offset_t) 0; @@ -474,6 +553,7 @@ vm_object_bootstrap(void) vm_object_template.pager_control = MEMORY_OBJECT_CONTROL_NULL; vm_object_template.copy_strategy = MEMORY_OBJECT_COPY_SYMMETRIC; vm_object_template.paging_in_progress = 0; + vm_object_template.activity_in_progress = 0; /* Begin bitfields */ vm_object_template.all_wanted = 0; /* all bits FALSE */ @@ -520,9 +600,15 @@ vm_object_bootstrap(void) /* cache bitfields */ vm_object_template.wimg_bits = VM_WIMG_DEFAULT; vm_object_template.code_signed = FALSE; + vm_object_template.hashed = FALSE; + vm_object_template.transposed = FALSE; vm_object_template.mapping_in_progress = FALSE; - vm_object_template.not_in_use = 0; -#ifdef UPL_DEBUG + vm_object_template.volatile_empty = FALSE; + vm_object_template.volatile_fault = FALSE; + vm_object_template.all_reusable = FALSE; + vm_object_template.blocked_access = FALSE; + vm_object_template.__object2_unused_bits = 0; +#if UPL_DEBUG vm_object_template.uplq.prev = NULL; vm_object_template.uplq.next = NULL; #endif /* UPL_DEBUG */ @@ -547,11 +633,11 @@ vm_object_bootstrap(void) */ #ifdef ppc - _vm_object_allocate((vm_last_addr - VM_MIN_KERNEL_ADDRESS) + 1, - kernel_object); + _vm_object_allocate(vm_last_addr + 1, + kernel_object); #else - _vm_object_allocate((VM_MAX_KERNEL_ADDRESS - VM_MIN_KERNEL_ADDRESS) + 1, - kernel_object); + _vm_object_allocate(VM_MAX_KERNEL_ADDRESS + 1, + kernel_object); #endif kernel_object->copy_strategy = MEMORY_OBJECT_COPY_NONE; @@ -562,11 +648,11 @@ vm_object_bootstrap(void) vm_submap_object = &vm_submap_object_store; #ifdef ppc - _vm_object_allocate((vm_last_addr - VM_MIN_KERNEL_ADDRESS) + 1, - vm_submap_object); + _vm_object_allocate(vm_last_addr + 1, + vm_submap_object); #else - _vm_object_allocate((VM_MAX_KERNEL_ADDRESS - VM_MIN_KERNEL_ADDRESS) + 1, - vm_submap_object); + _vm_object_allocate(VM_MAX_KERNEL_ADDRESS + 1, + vm_submap_object); #endif vm_submap_object->copy_strategy = MEMORY_OBJECT_COPY_NONE; @@ -611,21 +697,22 @@ vm_object_init(void) __private_extern__ void vm_object_init_lck_grp(void) { - /* + /* * initialze the vm_object lock world */ - lck_grp_attr_setdefault(&vm_object_lck_grp_attr); + lck_grp_attr_setdefault(&vm_object_lck_grp_attr); lck_grp_init(&vm_object_lck_grp, "vm_object", &vm_object_lck_grp_attr); lck_attr_setdefault(&vm_object_lck_attr); lck_attr_setdefault(&kernel_object_lck_attr); lck_attr_cleardebug(&kernel_object_lck_attr); } - +#if VM_OBJECT_CACHE #define MIGHT_NOT_CACHE_SHADOWS 1 #if MIGHT_NOT_CACHE_SHADOWS static int cache_shadows = TRUE; #endif /* MIGHT_NOT_CACHE_SHADOWS */ +#endif /* * vm_object_deallocate: @@ -645,9 +732,11 @@ __private_extern__ void vm_object_deallocate( register vm_object_t object) { +#if VM_OBJECT_CACHE boolean_t retry_cache_trim = FALSE; - vm_object_t shadow = VM_OBJECT_NULL; uint32_t try_failed_count = 0; +#endif + vm_object_t shadow = VM_OBJECT_NULL; // if(object)dbgLog(object, object->ref_count, object->can_persist, 3); /* (TEST/DEBUG) */ // else dbgLog(object, 0, 0, 3); /* (TEST/DEBUG) */ @@ -656,12 +745,14 @@ vm_object_deallocate( return; if (object == kernel_object) { - vm_object_lock(kernel_object); - kernel_object->ref_count--; - if (kernel_object->ref_count == 0) { + vm_object_lock_shared(object); + + OSAddAtomic(-1, &object->ref_count); + + if (object->ref_count == 0) { panic("vm_object_deallocate: losing kernel_object\n"); } - vm_object_unlock(kernel_object); + vm_object_unlock(object); return; } @@ -703,7 +794,9 @@ vm_object_deallocate( vm_object_unlock(object); if (atomic_swap) { - /* ref_count was updated atomically ! */ + /* + * ref_count was updated atomically ! + */ vm_object_deallocate_shared_successes++; return; } @@ -718,28 +811,8 @@ vm_object_deallocate( while (object != VM_OBJECT_NULL) { - /* - * The cache holds a reference (uncounted) to - * the object; we must lock it before removing - * the object. - */ - for (;;) { - vm_object_cache_lock(); - - /* - * if we try to take a regular lock here - * we risk deadlocking against someone - * holding a lock on this object while - * trying to vm_object_deallocate a different - * object - */ - if (vm_object_lock_try(object)) - break; - vm_object_cache_unlock(); - try_failed_count++; + vm_object_lock(object); - mutex_pause(try_failed_count); /* wait a bit */ - } assert(object->ref_count > 0); /* @@ -757,32 +830,24 @@ vm_object_deallocate( vm_object_mapping_wait(object, THREAD_UNINT); vm_object_mapping_begin(object); vm_object_unlock(object); - vm_object_cache_unlock(); - - memory_object_last_unmap(pager); - - try_failed_count = 0; - for (;;) { - vm_object_cache_lock(); - - /* - * if we try to take a regular lock here - * we risk deadlocking against someone - * holding a lock on this object while - * trying to vm_object_deallocate a different - * object - */ - if (vm_object_lock_try(object)) - break; - vm_object_cache_unlock(); - try_failed_count++; - mutex_pause(try_failed_count); /* wait a bit */ - } - assert(object->ref_count > 0); + memory_object_last_unmap(pager); + vm_object_lock(object); vm_object_mapping_end(object); } + /* + * recheck the ref_count since we dropped the object lock + * to call 'memory_object_last_unmap'... it's possible + * additional references got taken and we only want + * to deactivate the pages if this 'named' object will only + * referenced by the backing pager once we drop our reference + * below + */ + if (!object->terminating && object->ref_count == 2) + vm_object_deactivate_all_pages(object); + + assert(object->ref_count > 0); } /* @@ -801,7 +866,6 @@ vm_object_deallocate( vm_object_lock_assert_exclusive(object); object->ref_count--; vm_object_res_deallocate(object); - vm_object_cache_unlock(); if (object->ref_count == 1 && object->shadow != VM_OBJECT_NULL) { @@ -823,13 +887,14 @@ vm_object_deallocate( */ vm_object_collapse(object, 0, FALSE); } - vm_object_unlock(object); +#if VM_OBJECT_CACHE if (retry_cache_trim && ((object = vm_object_cache_trim(TRUE)) != VM_OBJECT_NULL)) { continue; } +#endif return; } @@ -844,11 +909,12 @@ vm_object_deallocate( VM_OBJECT_EVENT_INITIALIZED, THREAD_UNINT); vm_object_unlock(object); - vm_object_cache_unlock(); + thread_block(THREAD_CONTINUE_NULL); continue; } +#if VM_OBJECT_CACHE /* * If this object can persist, then enter it in * the cache. Otherwise, terminate it. @@ -864,11 +930,12 @@ vm_object_deallocate( * Now it is safe to decrement reference count, * and to return if reference count is > 0. */ + vm_object_lock_assert_exclusive(object); if (--object->ref_count > 0) { vm_object_res_deallocate(object); vm_object_unlock(object); - vm_object_cache_unlock(); + if (retry_cache_trim && ((object = vm_object_cache_trim(TRUE)) != VM_OBJECT_NULL)) { @@ -897,16 +964,38 @@ vm_object_deallocate( VM_OBJ_RES_DECR(object); XPR(XPR_VM_OBJECT, "vm_o_deallocate: adding %x to cache, queue = (%x, %x)\n", - (integer_t)object, - (integer_t)vm_object_cached_list.next, - (integer_t)vm_object_cached_list.prev,0,0); + object, + vm_object_cached_list.next, + vm_object_cached_list.prev,0,0); + + + vm_object_unlock(object); + + try_failed_count = 0; + for (;;) { + vm_object_cache_lock(); + + /* + * if we try to take a regular lock here + * we risk deadlocking against someone + * holding a lock on this object while + * trying to vm_object_deallocate a different + * object + */ + if (vm_object_lock_try(object)) + break; + vm_object_cache_unlock(); + try_failed_count++; + mutex_pause(try_failed_count); /* wait a bit */ + } vm_object_cached_count++; if (vm_object_cached_count > vm_object_cached_high) vm_object_cached_high = vm_object_cached_count; queue_enter(&vm_object_cached_list, object, vm_object_t, cached_list); vm_object_cache_unlock(); + vm_object_deactivate_all_pages(object); vm_object_unlock(object); @@ -936,14 +1025,15 @@ vm_object_deallocate( return; } retry_cache_trim = TRUE; - - } else { + } else +#endif /* VM_OBJECT_CACHE */ + { /* * This object is not cachable; terminate it. */ XPR(XPR_VM_OBJECT, "vm_o_deallocate: !cacheable 0x%X res %d paging_ops %d thread 0x%p ref %d\n", - (integer_t)object, object->resident_page_count, + object, object->resident_page_count, object->paging_in_progress, (void *)current_thread(),object->ref_count); @@ -957,24 +1047,31 @@ vm_object_deallocate( * a normal reference. */ shadow = object->pageout?VM_OBJECT_NULL:object->shadow; - if(vm_object_terminate(object) != KERN_SUCCESS) { + + if (vm_object_terminate(object) != KERN_SUCCESS) { return; } if (shadow != VM_OBJECT_NULL) { object = shadow; continue; } +#if VM_OBJECT_CACHE if (retry_cache_trim && ((object = vm_object_cache_trim(TRUE)) != VM_OBJECT_NULL)) { continue; } +#endif return; } } +#if VM_OBJECT_CACHE assert(! retry_cache_trim); +#endif } + +#if VM_OBJECT_CACHE /* * Check to see whether we really need to trim * down the cache. If so, remove an object from @@ -995,6 +1092,8 @@ vm_object_cache_trim( * If we no longer need to trim the cache, * then we are done. */ + if (vm_object_cached_count <= vm_object_cached_max) + return VM_OBJECT_NULL; vm_object_cache_lock(); if (vm_object_cached_count <= vm_object_cached_max) { @@ -1008,8 +1107,8 @@ vm_object_cache_trim( */ XPR(XPR_VM_OBJECT, "vm_object_cache_trim: removing from front of cache (%x, %x)\n", - (integer_t)vm_object_cached_list.next, - (integer_t)vm_object_cached_list.prev, 0, 0, 0); + vm_object_cached_list.next, + vm_object_cached_list.prev, 0, 0, 0); object = (vm_object_t) queue_first(&vm_object_cached_list); if(object == (vm_object_t) &vm_object_cached_list) { @@ -1027,6 +1126,7 @@ vm_object_cache_trim( cached_list); vm_object_cached_count--; + vm_object_cache_unlock(); /* * Since this object is in the cache, we know * that it is initialized and has no references. @@ -1047,8 +1147,10 @@ vm_object_cache_trim( * (We are careful here to limit recursion.) */ shadow = object->pageout?VM_OBJECT_NULL:object->shadow; + if(vm_object_terminate(object) != KERN_SUCCESS) continue; + if (shadow != VM_OBJECT_NULL) { if (called_from_vm_object_deallocate) { return shadow; @@ -1058,16 +1160,8 @@ vm_object_cache_trim( } } } +#endif -#define VM_OBJ_TERM_STATS DEBUG -#if VM_OBJ_TERM_STATS -uint32_t vm_object_terminate_pages_freed = 0; -uint32_t vm_object_terminate_pages_removed = 0; -uint32_t vm_object_terminate_batches = 0; -uint32_t vm_object_terminate_biggest_batch = 0; -#endif /* VM_OBJ_TERM_STATS */ - -#define V_O_T_MAX_BATCH 256 /* * Routine: vm_object_terminate @@ -1088,76 +1182,15 @@ uint32_t vm_object_terminate_biggest_batch = 0; */ static kern_return_t vm_object_terminate( - register vm_object_t object) + vm_object_t object) { - register vm_page_t p; - vm_object_t shadow_object; - vm_page_t local_free_q; - int loop_count; -#if VM_OBJ_TERM_STATS - uint32_t local_free_count; - uint32_t pages_removed; -#endif /* VM_OBJ_TERM_STATS */ - -#if VM_OBJ_TERM_STATS -#define VM_OBJ_TERM_FREELIST_DEBUG(_pages_removed, _local_free_count) \ - MACRO_BEGIN \ - if (_pages_removed) { \ - hw_atomic_add(&vm_object_terminate_batches, 1); \ - hw_atomic_add(&vm_object_terminate_pages_removed, \ - _pages_removed); \ - hw_atomic_add(&vm_object_terminate_pages_freed, \ - _local_free_count); \ - if (_local_free_count > \ - vm_object_terminate_biggest_batch) { \ - vm_object_terminate_biggest_batch = \ - _local_free_count; \ - } \ - _local_free_count = 0; \ - } \ - MACRO_END -#else /* VM_OBJ_TERM_STATS */ -#define VM_OBJ_TERM_FREELIST_DEBUG(_pages_removed, _local_free_count) -#endif /* VM_OBJ_TERM_STATS */ - -#define VM_OBJ_TERM_FREELIST(_pages_removed, _local_free_count, _local_free_q) \ - MACRO_BEGIN \ - VM_OBJ_TERM_FREELIST_DEBUG(_pages_removed, _local_free_count); \ - if (_local_free_q) { \ - vm_page_free_list(_local_free_q); \ - _local_free_q = VM_PAGE_NULL; \ - } \ - MACRO_END - - + vm_object_t shadow_object; XPR(XPR_VM_OBJECT, "vm_object_terminate, object 0x%X ref %d\n", - (integer_t)object, object->ref_count, 0, 0, 0); - - local_free_q = VM_PAGE_NULL; -#if VM_OBJ_TERM_STATS - local_free_count = 0; - pages_removed = 0; -#endif /* VM_OBJ_TERM_STATS */ - - if (!object->pageout && (!object->temporary || object->can_persist) - && (object->pager != NULL || object->shadow_severed)) { - vm_object_cache_unlock(); - loop_count = V_O_T_MAX_BATCH; - vm_page_lock_queues(); - while (!queue_empty(&object->memq)) { - if (--loop_count == 0) { - /* - * Free the pages we've reclaimed so far and - * take a little break to avoid hogging - * the page queues lock too long. - */ - VM_OBJ_TERM_FREELIST(pages_removed, - local_free_count, - local_free_q); - mutex_yield(&vm_page_queue_lock); - loop_count = V_O_T_MAX_BATCH; - } + object, object->ref_count, 0, 0, 0); + + if (!object->pageout && (!object->temporary || object->can_persist) && + (object->pager != NULL || object->shadow_severed)) { /* * Clear pager_trusted bit so that the pages get yanked * out of the object instead of cleaned in place. This @@ -1165,101 +1198,15 @@ vm_object_terminate( */ object->pager_trusted = FALSE; - p = (vm_page_t) queue_first(&object->memq); - - VM_PAGE_CHECK(p); - - if (p->busy || p->cleaning) { - if(p->cleaning || p->absent) { - /* free the pages reclaimed so far */ - VM_OBJ_TERM_FREELIST(pages_removed, - local_free_count, - local_free_q); - vm_page_unlock_queues(); - vm_object_paging_wait(object, THREAD_UNINT); - vm_page_lock_queues(); - continue; - } else { - panic("vm_object_terminate.3 %p %p", object, p); - } - } - - p->busy = TRUE; - VM_PAGE_QUEUES_REMOVE(p); -#if VM_OBJ_TERM_STATS - pages_removed++; -#endif /* VM_OBJ_TERM_STATS */ - - if (p->absent || p->private) { - - /* - * For private pages, VM_PAGE_FREE just - * leaves the page structure around for - * its owner to clean up. For absent - * pages, the structure is returned to - * the appropriate pool. - */ - - goto free_page; - } - - if (p->fictitious) { - if (p->phys_page == vm_page_guard_addr) { - goto free_page; - } - panic("vm_object_terminate.4 %p %p", object, p); - } - - if (!p->dirty && p->wpmapped) - p->dirty = pmap_is_modified(p->phys_page); - - if ((p->dirty || p->precious) && !p->error && object->alive) { - /* free the pages reclaimed so far */ - VM_OBJ_TERM_FREELIST(pages_removed, - local_free_count, - local_free_q); - vm_page_unlock_queues(); - vm_pageout_cluster(p); /* flush page */ - vm_object_paging_wait(object, THREAD_UNINT); - XPR(XPR_VM_OBJECT, - "vm_object_terminate restart, object 0x%X ref %d\n", - (integer_t)object, object->ref_count, 0, 0, 0); - vm_page_lock_queues(); - } else { - free_page: - /* - * Add this page to our list of reclaimed pages, - * to be freed later. - */ - vm_page_free_prepare(p); - p->pageq.next = (queue_entry_t) local_free_q; - local_free_q = p; -#if VM_OBJ_TERM_STATS - local_free_count++; -#endif /* VM_OBJ_TERM_STATS */ - } - } - - /* - * Free the remaining reclaimed pages. - */ - VM_OBJ_TERM_FREELIST(pages_removed, - local_free_count, - local_free_q); - vm_page_unlock_queues(); - vm_object_unlock(object); - vm_object_cache_lock(); - vm_object_lock(object); + vm_object_reap_pages(object, REAP_TERMINATE); } - /* * Make sure the object isn't already being terminated */ - if(object->terminating) { + if (object->terminating) { vm_object_lock_assert_exclusive(object); object->ref_count--; assert(object->ref_count > 0); - vm_object_cache_unlock(); vm_object_unlock(object); return KERN_FAILURE; } @@ -1268,12 +1215,11 @@ vm_object_terminate( * Did somebody get a reference to the object while we were * cleaning it? */ - if(object->ref_count != 1) { + if (object->ref_count != 1) { vm_object_lock_assert_exclusive(object); object->ref_count--; assert(object->ref_count > 0); vm_object_res_deallocate(object); - vm_object_cache_unlock(); vm_object_unlock(object); return KERN_FAILURE; } @@ -1284,8 +1230,14 @@ vm_object_terminate( object->terminating = TRUE; object->alive = FALSE; - vm_object_remove(object); + if (object->hashed) { + lck_mtx_t *lck; + + lck = vm_object_hash_lock_spin(object->pager); + vm_object_remove(object); + vm_object_hash_unlock(lck); + } /* * Detach the object from its shadow if we are the shadow's * copy. The reference we hold on the shadow must be dropped @@ -1299,7 +1251,8 @@ vm_object_terminate( vm_object_unlock(shadow_object); } - if (object->paging_in_progress != 0) { + if (object->paging_in_progress != 0 || + object->activity_in_progress != 0) { /* * There are still some paging_in_progress references * on this object, meaning that there are some paging @@ -1322,7 +1275,6 @@ vm_object_terminate( * VM object is "terminating" and not "alive". */ vm_object_reap_async(object); - vm_object_cache_unlock(); vm_object_unlock(object); /* * Return KERN_FAILURE to let the caller know that we @@ -1333,13 +1285,15 @@ vm_object_terminate( */ return KERN_FAILURE; } - - /* complete the VM object termination */ + /* + * complete the VM object termination + */ vm_object_reap(object); object = VM_OBJECT_NULL; - /* cache lock and object lock were released by vm_object_reap() */ /* + * the object lock was released by vm_object_reap() + * * KERN_SUCCESS means that this object has been terminated * and no longer needs its shadow object but still holds a * reference on it. @@ -1350,48 +1304,34 @@ vm_object_terminate( return KERN_SUCCESS; } + /* * vm_object_reap(): * * Complete the termination of a VM object after it's been marked * as "terminating" and "!alive" by vm_object_terminate(). * - * The VM object cache and the VM object must be locked by caller. - * The locks will be released on return and the VM object is no longer valid. + * The VM object must be locked by caller. + * The lock will be released on return and the VM object is no longer valid. */ void vm_object_reap( vm_object_t object) { memory_object_t pager; - vm_page_t p; - vm_page_t local_free_q; - int loop_count; -#if VM_OBJ_TERM_STATS - uint32_t local_free_count; -#endif /* VM_OBJ_TERM_STATS */ -#if DEBUG - mutex_assert(&vm_object_cached_lock_data, MA_OWNED); -#endif /* DEBUG */ vm_object_lock_assert_exclusive(object); assert(object->paging_in_progress == 0); + assert(object->activity_in_progress == 0); vm_object_reap_count++; - local_free_q = VM_PAGE_NULL; -#if VM_OBJ_TERM_STATS - local_free_count = 0; -#endif /* VM_OBJ_TERM_STATS */ - pager = object->pager; object->pager = MEMORY_OBJECT_NULL; if (pager != MEMORY_OBJECT_NULL) memory_object_control_disable(object->pager_control); - vm_object_cache_unlock(); - vm_object_lock_assert_exclusive(object); object->ref_count--; #if TASK_SWAPPER assert(object->res_count == 0); @@ -1399,7 +1339,9 @@ vm_object_reap( assert (object->ref_count == 0); - /* remove from purgeable queue if it's on */ + /* + * remove from purgeable queue if it's on + */ if (object->objq.next || object->objq.prev) { purgeable_q_t queue = vm_purgeable_object_remove(object); assert(queue); @@ -1422,46 +1364,13 @@ vm_object_reap( vm_pageout_object_terminate(object); - } else if ((object->temporary && !object->can_persist) || - (pager == MEMORY_OBJECT_NULL)) { - loop_count = V_O_T_MAX_BATCH; - vm_page_lock_queues(); - while (!queue_empty(&object->memq)) { - if (--loop_count == 0) { - /* - * Free the pages we reclaimed so far - * and take a little break to avoid - * hogging the page queue lock too long - */ - VM_OBJ_TERM_FREELIST(local_free_count, - local_free_count, - local_free_q); - mutex_yield(&vm_page_queue_lock); - loop_count = V_O_T_MAX_BATCH; - } - p = (vm_page_t) queue_first(&object->memq); - - vm_page_free_prepare(p); + } else if (((object->temporary && !object->can_persist) || (pager == MEMORY_OBJECT_NULL))) { - assert(p->pageq.next == NULL && p->pageq.prev == NULL); - p->pageq.next = (queue_entry_t) local_free_q; - local_free_q = p; -#if VM_OBJ_TERM_STATS - local_free_count++; -#endif /* VM_OBJ_TERM_STATS */ - } - /* - * Free the remaining reclaimed pages - */ - VM_OBJ_TERM_FREELIST(local_free_count, - local_free_count, - local_free_q); - vm_page_unlock_queues(); - } else if (!queue_empty(&object->memq)) { - panic("vm_object_reap: queue just emptied isn't"); + vm_object_reap_pages(object, REAP_REAP); } - + assert(queue_empty(&object->memq)); assert(object->paging_in_progress == 0); + assert(object->activity_in_progress == 0); assert(object->ref_count == 0); /* @@ -1471,7 +1380,7 @@ vm_object_reap( */ if (pager != MEMORY_OBJECT_NULL) { vm_object_unlock(object); - vm_object_release_pager(pager); + vm_object_release_pager(pager, object->hashed); vm_object_lock(object); } @@ -1495,59 +1404,316 @@ vm_object_reap( object = VM_OBJECT_NULL; } -void -vm_object_reap_async( - vm_object_t object) -{ -#if DEBUG - mutex_assert(&vm_object_cached_lock_data, MA_OWNED); -#endif /* DEBUG */ - vm_object_lock_assert_exclusive(object); - vm_object_reap_count_async++; - /* enqueue the VM object... */ - queue_enter(&vm_object_reaper_queue, object, - vm_object_t, cached_list); - /* ... and wake up the reaper thread */ - thread_wakeup((event_t) &vm_object_reaper_queue); -} +#define V_O_R_MAX_BATCH 128 + + +#define VM_OBJ_REAP_FREELIST(_local_free_q, do_disconnect) \ + MACRO_BEGIN \ + if (_local_free_q) { \ + if (do_disconnect) { \ + vm_page_t m; \ + for (m = _local_free_q; \ + m != VM_PAGE_NULL; \ + m = (vm_page_t) m->pageq.next) { \ + if (m->pmapped) { \ + pmap_disconnect(m->phys_page); \ + } \ + } \ + } \ + vm_page_free_list(_local_free_q, TRUE); \ + _local_free_q = VM_PAGE_NULL; \ + } \ + MACRO_END + void -vm_object_reaper_thread(void) +vm_object_reap_pages( + vm_object_t object, + int reap_type) { - vm_object_t object, shadow_object; - - vm_object_cache_lock(); + vm_page_t p; + vm_page_t next; + vm_page_t local_free_q = VM_PAGE_NULL; + int loop_count; + boolean_t disconnect_on_release; - while (!queue_empty(&vm_object_reaper_queue)) { - queue_remove_first(&vm_object_reaper_queue, - object, - vm_object_t, - cached_list); - vm_object_lock(object); - assert(object->terminating); - assert(!object->alive); - + if (reap_type == REAP_DATA_FLUSH) { /* - * The pageout daemon might be playing with our pages. - * Now that the object is dead, it won't touch any more - * pages, but some pages might already be on their way out. - * Hence, we wait until the active paging activities have - * ceased before we break the association with the pager - * itself. + * We need to disconnect pages from all pmaps before + * releasing them to the free list */ - while (object->paging_in_progress != 0) { - vm_object_cache_unlock(); - vm_object_wait(object, - VM_OBJECT_EVENT_PAGING_IN_PROGRESS, - THREAD_UNINT); - vm_object_cache_lock(); - vm_object_lock(object); - } - - shadow_object = - object->pageout ? VM_OBJECT_NULL : object->shadow; + disconnect_on_release = TRUE; + } else { + /* + * Either the caller has already disconnected the pages + * from all pmaps, or we disconnect them here as we add + * them to out local list of pages to be released. + * No need to re-disconnect them when we release the pages + * to the free list. + */ + disconnect_on_release = FALSE; + } + +restart_after_sleep: + if (queue_empty(&object->memq)) + return; + loop_count = V_O_R_MAX_BATCH + 1; + + vm_page_lockspin_queues(); + + next = (vm_page_t)queue_first(&object->memq); + + while (!queue_end(&object->memq, (queue_entry_t)next)) { + + p = next; + next = (vm_page_t)queue_next(&next->listq); + + if (--loop_count == 0) { + + vm_page_unlock_queues(); + + if (local_free_q) { + /* + * Free the pages we reclaimed so far + * and take a little break to avoid + * hogging the page queue lock too long + */ + VM_OBJ_REAP_FREELIST(local_free_q, + disconnect_on_release); + } else + mutex_pause(0); + + loop_count = V_O_R_MAX_BATCH + 1; + + vm_page_lockspin_queues(); + } + if (reap_type == REAP_DATA_FLUSH || reap_type == REAP_TERMINATE) { + + if (reap_type == REAP_DATA_FLUSH && (p->pageout == TRUE && p->list_req_pending == TRUE)) { + p->list_req_pending = FALSE; + p->cleaning = FALSE; + p->pageout = FALSE; + /* + * need to drop the laundry count... + * we may also need to remove it + * from the I/O paging queue... + * vm_pageout_throttle_up handles both cases + * + * the laundry and pageout_queue flags are cleared... + */ + vm_pageout_throttle_up(p); + + /* + * toss the wire count we picked up + * when we intially set this page up + * to be cleaned... + */ + vm_page_unwire(p); + PAGE_WAKEUP(p); + + } else if (p->busy || p->cleaning) { + + vm_page_unlock_queues(); + /* + * free the pages reclaimed so far + */ + VM_OBJ_REAP_FREELIST(local_free_q, + disconnect_on_release); + + PAGE_SLEEP(object, p, THREAD_UNINT); + + goto restart_after_sleep; + } + } + switch (reap_type) { + + case REAP_DATA_FLUSH: + if (VM_PAGE_WIRED(p)) { + /* + * this is an odd case... perhaps we should + * zero-fill this page since we're conceptually + * tossing its data at this point, but leaving + * it on the object to honor the 'wire' contract + */ + continue; + } + break; + + case REAP_PURGEABLE: + if (VM_PAGE_WIRED(p)) { + /* can't purge a wired page */ + vm_page_purged_wired++; + continue; + } + + if (p->busy) { + /* + * We can't reclaim a busy page but we can + * make it pageable (it's not wired) to make + * sure that it gets considered by + * vm_pageout_scan() later. + */ + vm_page_deactivate(p); + vm_page_purged_busy++; + continue; + } + + if (p->cleaning || p->laundry || p->list_req_pending) { + /* + * page is being acted upon, + * so don't mess with it + */ + vm_page_purged_others++; + continue; + } + assert(p->object != kernel_object); + + /* + * we can discard this page... + */ + if (p->pmapped == TRUE) { + int refmod_state; + /* + * unmap the page + */ + refmod_state = pmap_disconnect(p->phys_page); + if (refmod_state & VM_MEM_MODIFIED) { + p->dirty = TRUE; + } + } + if (p->dirty || p->precious) { + /* + * we saved the cost of cleaning this page ! + */ + vm_page_purged_count++; + } + + break; + + case REAP_TERMINATE: + if (p->absent || p->private) { + /* + * For private pages, VM_PAGE_FREE just + * leaves the page structure around for + * its owner to clean up. For absent + * pages, the structure is returned to + * the appropriate pool. + */ + break; + } + if (p->fictitious) { + assert (p->phys_page == vm_page_guard_addr); + break; + } + if (!p->dirty && p->wpmapped) + p->dirty = pmap_is_modified(p->phys_page); + + if ((p->dirty || p->precious) && !p->error && object->alive) { + + p->busy = TRUE; + + VM_PAGE_QUEUES_REMOVE(p); + + vm_page_unlock_queues(); + /* + * free the pages reclaimed so far + */ + VM_OBJ_REAP_FREELIST(local_free_q, + disconnect_on_release); + + /* + * flush page... page will be freed + * upon completion of I/O + */ + vm_pageout_cluster(p); + vm_object_paging_wait(object, THREAD_UNINT); + + goto restart_after_sleep; + } + break; + + case REAP_REAP: + break; + } + vm_page_free_prepare_queues(p); + assert(p->pageq.next == NULL && p->pageq.prev == NULL); + /* + * Add this page to our list of reclaimed pages, + * to be freed later. + */ + p->pageq.next = (queue_entry_t) local_free_q; + local_free_q = p; + } + vm_page_unlock_queues(); + + /* + * Free the remaining reclaimed pages + */ + VM_OBJ_REAP_FREELIST(local_free_q, + disconnect_on_release); +} + + +void +vm_object_reap_async( + vm_object_t object) +{ + vm_object_lock_assert_exclusive(object); + + vm_object_reaper_lock_spin(); + + vm_object_reap_count_async++; + + /* enqueue the VM object... */ + queue_enter(&vm_object_reaper_queue, object, + vm_object_t, cached_list); + + vm_object_reaper_unlock(); + + /* ... and wake up the reaper thread */ + thread_wakeup((event_t) &vm_object_reaper_queue); +} + + +void +vm_object_reaper_thread(void) +{ + vm_object_t object, shadow_object; + + vm_object_reaper_lock_spin(); + + while (!queue_empty(&vm_object_reaper_queue)) { + queue_remove_first(&vm_object_reaper_queue, + object, + vm_object_t, + cached_list); + + vm_object_reaper_unlock(); + vm_object_lock(object); + + assert(object->terminating); + assert(!object->alive); + + /* + * The pageout daemon might be playing with our pages. + * Now that the object is dead, it won't touch any more + * pages, but some pages might already be on their way out. + * Hence, we wait until the active paging activities have + * ceased before we break the association with the pager + * itself. + */ + while (object->paging_in_progress != 0 || + object->activity_in_progress != 0) { + vm_object_wait(object, + VM_OBJECT_EVENT_PAGING_IN_PROGRESS, + THREAD_UNINT); + vm_object_lock(object); + } + + shadow_object = + object->pageout ? VM_OBJECT_NULL : object->shadow; vm_object_reap(object); /* cache is unlocked and object is no longer valid */ @@ -1561,13 +1727,14 @@ vm_object_reaper_thread(void) vm_object_deallocate(shadow_object); shadow_object = VM_OBJECT_NULL; } - - vm_object_cache_lock(); + vm_object_reaper_lock_spin(); } /* wait for more work... */ assert_wait((event_t) &vm_object_reaper_queue, THREAD_UNINT); - vm_object_cache_unlock(); + + vm_object_reaper_unlock(); + thread_block((thread_continue_t) vm_object_reaper_thread); /*NOTREACHED*/ } @@ -1583,16 +1750,18 @@ vm_object_pager_wakeup( { vm_object_hash_entry_t entry; boolean_t waiting = FALSE; + lck_mtx_t *lck; /* * If anyone was waiting for the memory_object_terminate * to be queued, wake them up now. */ - vm_object_cache_lock(); + lck = vm_object_hash_lock_spin(pager); entry = vm_object_hash_lookup(pager, TRUE); if (entry != VM_OBJECT_HASH_ENTRY_NULL) waiting = entry->waiting; - vm_object_cache_unlock(); + vm_object_hash_unlock(lck); + if (entry != VM_OBJECT_HASH_ENTRY_NULL) { if (waiting) thread_wakeup((event_t) pager); @@ -1611,7 +1780,8 @@ vm_object_pager_wakeup( */ static void vm_object_release_pager( - memory_object_t pager) + memory_object_t pager, + boolean_t hashed) { /* @@ -1620,11 +1790,13 @@ vm_object_release_pager( (void) memory_object_terminate(pager); - /* - * Wakeup anyone waiting for this terminate - */ - vm_object_pager_wakeup(pager); - + if (hashed == TRUE) { + /* + * Wakeup anyone waiting for this terminate + * and remove the entry from the hash + */ + vm_object_pager_wakeup(pager); + } /* * Release reference to pager. */ @@ -1657,210 +1829,759 @@ vm_object_destroy( * the destroy call.] */ - vm_object_cache_lock(); vm_object_lock(object); object->can_persist = FALSE; object->named = FALSE; object->alive = FALSE; - /* - * Rip out the pager from the vm_object now... - */ - - vm_object_remove(object); + if (object->hashed) { + lck_mtx_t *lck; + /* + * Rip out the pager from the vm_object now... + */ + lck = vm_object_hash_lock_spin(object->pager); + vm_object_remove(object); + vm_object_hash_unlock(lck); + } old_pager = object->pager; object->pager = MEMORY_OBJECT_NULL; if (old_pager != MEMORY_OBJECT_NULL) memory_object_control_disable(object->pager_control); - vm_object_cache_unlock(); /* - * Wait for the existing paging activity (that got - * through before we nulled out the pager) to subside. + * Wait for the existing paging activity (that got + * through before we nulled out the pager) to subside. + */ + + vm_object_paging_wait(object, THREAD_UNINT); + vm_object_unlock(object); + + /* + * Terminate the object now. + */ + if (old_pager != MEMORY_OBJECT_NULL) { + vm_object_release_pager(old_pager, object->hashed); + + /* + * JMM - Release the caller's reference. This assumes the + * caller had a reference to release, which is a big (but + * currently valid) assumption if this is driven from the + * vnode pager (it is holding a named reference when making + * this call).. + */ + vm_object_deallocate(object); + + } + return(KERN_SUCCESS); +} + + +#define VM_OBJ_DEACT_ALL_STATS DEBUG +#if VM_OBJ_DEACT_ALL_STATS +uint32_t vm_object_deactivate_all_pages_batches = 0; +uint32_t vm_object_deactivate_all_pages_pages = 0; +#endif /* VM_OBJ_DEACT_ALL_STATS */ +/* + * vm_object_deactivate_all_pages + * + * Deactivate all pages in the specified object. (Keep its pages + * in memory even though it is no longer referenced.) + * + * The object must be locked. + */ +static void +vm_object_deactivate_all_pages( + register vm_object_t object) +{ + register vm_page_t p; + int loop_count; +#if VM_OBJ_DEACT_ALL_STATS + int pages_count; +#endif /* VM_OBJ_DEACT_ALL_STATS */ +#define V_O_D_A_P_MAX_BATCH 256 + + loop_count = V_O_D_A_P_MAX_BATCH; +#if VM_OBJ_DEACT_ALL_STATS + pages_count = 0; +#endif /* VM_OBJ_DEACT_ALL_STATS */ + vm_page_lock_queues(); + queue_iterate(&object->memq, p, vm_page_t, listq) { + if (--loop_count == 0) { +#if VM_OBJ_DEACT_ALL_STATS + hw_atomic_add(&vm_object_deactivate_all_pages_batches, + 1); + hw_atomic_add(&vm_object_deactivate_all_pages_pages, + pages_count); + pages_count = 0; +#endif /* VM_OBJ_DEACT_ALL_STATS */ + lck_mtx_yield(&vm_page_queue_lock); + loop_count = V_O_D_A_P_MAX_BATCH; + } + if (!p->busy && !p->throttled) { +#if VM_OBJ_DEACT_ALL_STATS + pages_count++; +#endif /* VM_OBJ_DEACT_ALL_STATS */ + vm_page_deactivate(p); + } + } +#if VM_OBJ_DEACT_ALL_STATS + if (pages_count) { + hw_atomic_add(&vm_object_deactivate_all_pages_batches, 1); + hw_atomic_add(&vm_object_deactivate_all_pages_pages, + pages_count); + pages_count = 0; + } +#endif /* VM_OBJ_DEACT_ALL_STATS */ + vm_page_unlock_queues(); +} + + + +/* + * when deallocating pages it is necessary to hold + * the vm_page_queue_lock (a hot global lock) for certain operations + * on the page... however, the majority of the work can be done + * while merely holding the object lock... to mitigate the time spent behind the + * global lock, go to a 2 pass algorithm... collect pages up to DELAYED_WORK_LIMIT + * while doing all of the work that doesn't require the vm_page_queue_lock... + * them call dw_do_work to acquire the vm_page_queue_lock and do the + * necessary work for each page... we will grab the busy bit on the page + * so that dw_do_work can drop the object lock if it can't immediately take the + * vm_page_queue_lock in order to compete for the locks in the same order that + * vm_pageout_scan takes them. + */ + +#define DELAYED_WORK_LIMIT 32 + +#define DW_clear_reference 0x01 +#define DW_move_page 0x02 +#define DW_clear_busy 0x04 +#define DW_PAGE_WAKEUP 0x08 + + +struct dw { + vm_page_t dw_m; + int dw_mask; +}; + +static void dw_do_work(vm_object_t object, struct dw *dwp, int dw_count); + + +static void +dw_do_work( + vm_object_t object, + struct dw *dwp, + int dw_count) +{ + vm_page_t m; + int j; + + /* + * pageout_scan takes the vm_page_lock_queues first + * then tries for the object lock... to avoid what + * is effectively a lock inversion, we'll go to the + * trouble of taking them in that same order... otherwise + * if this object contains the majority of the pages resident + * in the UBC (or a small set of large objects actively being + * worked on contain the majority of the pages), we could + * cause the pageout_scan thread to 'starve' in its attempt + * to find pages to move to the free queue, since it has to + * successfully acquire the object lock of any candidate page + * before it can steal/clean it. + */ + if (!vm_page_trylockspin_queues()) { + vm_object_unlock(object); + + vm_page_lockspin_queues(); + + for (j = 0; ; j++) { + if (!vm_object_lock_avoid(object) && + _vm_object_lock_try(object)) + break; + vm_page_unlock_queues(); + mutex_pause(j); + vm_page_lockspin_queues(); + } + } + for (j = 0; j < dw_count; j++, dwp++) { + + m = dwp->dw_m; + + if (dwp->dw_mask & DW_clear_reference) + m->reference = FALSE; + + if (dwp->dw_mask & DW_move_page) { + VM_PAGE_QUEUES_REMOVE(m); + + assert(!m->laundry); + assert(m->object != kernel_object); + assert(m->pageq.next == NULL && + m->pageq.prev == NULL); + + if (m->zero_fill) { + queue_enter_first(&vm_page_queue_zf, m, vm_page_t, pageq); + vm_zf_queue_count++; + } else { + queue_enter_first(&vm_page_queue_inactive, m, vm_page_t, pageq); + } + m->inactive = TRUE; + + if (!m->fictitious) { + vm_page_inactive_count++; + token_new_pagecount++; + } else { + assert(m->phys_page == vm_page_fictitious_addr); + } + } + if (dwp->dw_mask & DW_clear_busy) + dwp->dw_m->busy = FALSE; + + if (dwp->dw_mask & DW_PAGE_WAKEUP) + PAGE_WAKEUP(dwp->dw_m); + } + vm_page_unlock_queues(); + +#if CONFIG_EMBEDDED + { + int percent_avail; + + /* + * Decide if we need to send a memory status notification. + */ + percent_avail = + (vm_page_active_count + vm_page_inactive_count + + vm_page_speculative_count + vm_page_free_count + + (IP_VALID(memory_manager_default)?0:vm_page_purgeable_count) ) * 100 / + atop_64(max_mem); + if (percent_avail >= (kern_memorystatus_level + 5) || + percent_avail <= (kern_memorystatus_level - 5)) { + kern_memorystatus_level = percent_avail; + thread_wakeup((event_t)&kern_memorystatus_wakeup); + } + } +#endif +} + + + +/* + * The "chunk" macros are used by routines below when looking for pages to deactivate. These + * exist because of the need to handle shadow chains. When deactivating pages, we only + * want to deactive the ones at the top most level in the object chain. In order to do + * this efficiently, the specified address range is divided up into "chunks" and we use + * a bit map to keep track of which pages have already been processed as we descend down + * the shadow chain. These chunk macros hide the details of the bit map implementation + * as much as we can. + * + * For convenience, we use a 64-bit data type as the bit map, and therefore a chunk is + * set to 64 pages. The bit map is indexed from the low-order end, so that the lowest + * order bit represents page 0 in the current range and highest order bit represents + * page 63. + * + * For further convenience, we also use negative logic for the page state in the bit map. + * The bit is set to 1 to indicate it has not yet been seen, and to 0 to indicate it has + * been processed. This way we can simply test the 64-bit long word to see if it's zero + * to easily tell if the whole range has been processed. Therefore, the bit map starts + * out with all the bits set. The macros below hide all these details from the caller. + */ + +#define PAGES_IN_A_CHUNK 64 /* The number of pages in the chunk must */ + /* be the same as the number of bits in */ + /* the chunk_state_t type. We use 64 */ + /* just for convenience. */ + +#define CHUNK_SIZE (PAGES_IN_A_CHUNK * PAGE_SIZE_64) /* Size of a chunk in bytes */ + +typedef uint64_t chunk_state_t; + +/* + * The bit map uses negative logic, so we start out with all 64 bits set to indicate + * that no pages have been processed yet. Also, if len is less than the full CHUNK_SIZE, + * then we mark pages beyond the len as having been "processed" so that we don't waste time + * looking at pages in that range. This can save us from unnecessarily chasing down the + * shadow chain. + */ + +#define CHUNK_INIT(c, len) \ + MACRO_BEGIN \ + uint64_t p; \ + \ + (c) = 0xffffffffffffffffLL; \ + \ + for (p = (len) / PAGE_SIZE_64; p < PAGES_IN_A_CHUNK; p++) \ + MARK_PAGE_HANDLED(c, p); \ + MACRO_END + +/* + * Return true if all pages in the chunk have not yet been processed. + */ + +#define CHUNK_NOT_COMPLETE(c) ((c) != 0) + +/* + * Return true if the page at offset 'p' in the bit map has already been handled + * while processing a higher level object in the shadow chain. + */ + +#define PAGE_ALREADY_HANDLED(c, p) (((c) & (1LL << (p))) == 0) + +/* + * Mark the page at offset 'p' in the bit map as having been processed. + */ + +#define MARK_PAGE_HANDLED(c, p) \ +MACRO_BEGIN \ + (c) = (c) & ~(1LL << (p)); \ +MACRO_END + + +/* + * Return true if the page at the given offset has been paged out. Object is + * locked upon entry and returned locked. + */ + +static boolean_t +page_is_paged_out( + vm_object_t object, + vm_object_offset_t offset) +{ + kern_return_t kr; + memory_object_t pager; + + /* + * Check the existence map for the page if we have one, otherwise + * ask the pager about this page. + */ + +#if MACH_PAGEMAP + if (object->existence_map) { + if (vm_external_state_get(object->existence_map, offset) + == VM_EXTERNAL_STATE_EXISTS) { + /* + * We found the page + */ + + return TRUE; + } + } else +#endif + if (object->internal && + object->alive && + !object->terminating && + object->pager_ready) { + + /* + * We're already holding a "paging in progress" reference + * so the object can't disappear when we release the lock. + */ + + assert(object->paging_in_progress); + pager = object->pager; + vm_object_unlock(object); + + kr = memory_object_data_request( + pager, + offset + object->paging_offset, + 0, /* just poke the pager */ + VM_PROT_READ, + NULL); + + vm_object_lock(object); + + if (kr == KERN_SUCCESS) { + + /* + * We found the page + */ + + return TRUE; + } + } + + return FALSE; +} + + +/* + * Deactivate the pages in the specified object and range. If kill_page is set, also discard any + * page modified state from the pmap. Update the chunk_state as we go along. The caller must specify + * a size that is less than or equal to the CHUNK_SIZE. + */ + +static void +deactivate_pages_in_object( + vm_object_t object, + vm_object_offset_t offset, + vm_object_size_t size, + boolean_t kill_page, + boolean_t reusable_page, +#if !MACH_ASSERT + __unused +#endif + boolean_t all_reusable, + chunk_state_t *chunk_state) +{ + vm_page_t m; + int p; + struct dw dw_array[DELAYED_WORK_LIMIT]; + struct dw *dwp; + int dw_count; + unsigned int reusable = 0; + + + /* + * Examine each page in the chunk. The variable 'p' is the page number relative to the start of the + * chunk. Since this routine is called once for each level in the shadow chain, the chunk_state may + * have pages marked as having been processed already. We stop the loop early if we find we've handled + * all the pages in the chunk. + */ + + dwp = &dw_array[0]; + dw_count = 0; + + for(p = 0; size && CHUNK_NOT_COMPLETE(*chunk_state); p++, size -= PAGE_SIZE_64, offset += PAGE_SIZE_64) { + + /* + * If this offset has already been found and handled in a higher level object, then don't + * do anything with it in the current shadow object. + */ + + if (PAGE_ALREADY_HANDLED(*chunk_state, p)) + continue; + + /* + * See if the page at this offset is around. First check to see if the page is resident, + * then if not, check the existence map or with the pager. + */ + + if ((m = vm_page_lookup(object, offset)) != VM_PAGE_NULL) { + + /* + * We found a page we were looking for. Mark it as "handled" now in the chunk_state + * so that we won't bother looking for a page at this offset again if there are more + * shadow objects. Then deactivate the page. + */ + + MARK_PAGE_HANDLED(*chunk_state, p); + + if (( !VM_PAGE_WIRED(m)) && (!m->private) && (!m->gobbled) && (!m->busy)) { + int clear_refmod; + + assert(!m->laundry); + + clear_refmod = VM_MEM_REFERENCED; + dwp->dw_mask = DW_clear_reference; + + if ((kill_page) && (object->internal)) { + m->precious = FALSE; + m->dirty = FALSE; + + clear_refmod |= VM_MEM_MODIFIED; +#if CONFIG_EMBEDDED + dwp->dw_mask |= DW_move_page; +#endif +#if MACH_PAGEMAP + vm_external_state_clr(object->existence_map, offset); +#endif /* MACH_PAGEMAP */ + + if (reusable_page && !m->reusable) { + assert(!all_reusable); + assert(!object->all_reusable); + m->reusable = TRUE; + object->reusable_page_count++; + assert(object->resident_page_count >= object->reusable_page_count); + reusable++; + } + } + pmap_clear_refmod(m->phys_page, clear_refmod); + + if (!m->throttled && !(reusable_page || all_reusable)) + dwp->dw_mask |= DW_move_page; + /* + * dw_do_work may need to drop the object lock + * if it does, we need the pages its looking at to + * be held stable via the busy bit. + */ + m->busy = TRUE; + dwp->dw_mask |= (DW_clear_busy | DW_PAGE_WAKEUP); + + dwp->dw_m = m; + dwp++; + dw_count++; + + if (dw_count >= DELAYED_WORK_LIMIT) { + if (reusable) { + OSAddAtomic(reusable, + &vm_page_stats_reusable.reusable_count); + vm_page_stats_reusable.reusable += reusable; + reusable = 0; + } + dw_do_work(object, &dw_array[0], dw_count); + + dwp = &dw_array[0]; + dw_count = 0; + } + } + + } else { + + /* + * The page at this offset isn't memory resident, check to see if it's + * been paged out. If so, mark it as handled so we don't bother looking + * for it in the shadow chain. + */ + + if (page_is_paged_out(object, offset)) { + MARK_PAGE_HANDLED(*chunk_state, p); + + /* + * If we're killing a non-resident page, then clear the page in the existence + * map so we don't bother paging it back in if it's touched again in the future. + */ + + if ((kill_page) && (object->internal)) { +#if MACH_PAGEMAP + vm_external_state_clr(object->existence_map, offset); +#endif /* MACH_PAGEMAP */ + } + } + } + } + + if (reusable) { + OSAddAtomic(reusable, &vm_page_stats_reusable.reusable_count); + vm_page_stats_reusable.reusable += reusable; + reusable = 0; + } + + if (dw_count) + dw_do_work(object, &dw_array[0], dw_count); +} + + +/* + * Deactive a "chunk" of the given range of the object starting at offset. A "chunk" + * will always be less than or equal to the given size. The total range is divided up + * into chunks for efficiency and performance related to the locks and handling the shadow + * chain. This routine returns how much of the given "size" it actually processed. It's + * up to the caler to loop and keep calling this routine until the entire range they want + * to process has been done. + */ + +static vm_object_size_t +deactivate_a_chunk( + vm_object_t orig_object, + vm_object_offset_t offset, + vm_object_size_t size, + boolean_t kill_page, + boolean_t reusable_page, + boolean_t all_reusable) +{ + vm_object_t object; + vm_object_t tmp_object; + vm_object_size_t length; + chunk_state_t chunk_state; + + + /* + * Get set to do a chunk. We'll do up to CHUNK_SIZE, but no more than the + * remaining size the caller asked for. + */ + + length = MIN(size, CHUNK_SIZE); + + /* + * The chunk_state keeps track of which pages we've already processed if there's + * a shadow chain on this object. At this point, we haven't done anything with this + * range of pages yet, so initialize the state to indicate no pages processed yet. */ - vm_object_paging_wait(object, THREAD_UNINT); - vm_object_unlock(object); + CHUNK_INIT(chunk_state, length); + object = orig_object; /* - * Terminate the object now. + * Start at the top level object and iterate around the loop once for each object + * in the shadow chain. We stop processing early if we've already found all the pages + * in the range. Otherwise we stop when we run out of shadow objects. */ - if (old_pager != MEMORY_OBJECT_NULL) { - vm_object_release_pager(old_pager); - /* - * JMM - Release the caller's reference. This assumes the - * caller had a reference to release, which is a big (but - * currently valid) assumption if this is driven from the - * vnode pager (it is holding a named reference when making - * this call).. + while (object && CHUNK_NOT_COMPLETE(chunk_state)) { + vm_object_paging_begin(object); + + deactivate_pages_in_object(object, offset, length, kill_page, reusable_page, all_reusable, &chunk_state); + + vm_object_paging_end(object); + + /* + * We've finished with this object, see if there's a shadow object. If + * there is, update the offset and lock the new object. We also turn off + * kill_page at this point since we only kill pages in the top most object. */ - vm_object_deallocate(object); + tmp_object = object->shadow; + + if (tmp_object) { + kill_page = FALSE; + reusable_page = FALSE; + all_reusable = FALSE; + offset += object->shadow_offset; + vm_object_lock(tmp_object); + } + + if (object != orig_object) + vm_object_unlock(object); + + object = tmp_object; } - return(KERN_SUCCESS); + + if (object && object != orig_object) + vm_object_unlock(object); + + return length; } -#define VM_OBJ_DEACT_ALL_STATS DEBUG -#if VM_OBJ_DEACT_ALL_STATS -uint32_t vm_object_deactivate_all_pages_batches = 0; -uint32_t vm_object_deactivate_all_pages_pages = 0; -#endif /* VM_OBJ_DEACT_ALL_STATS */ + + /* - * vm_object_deactivate_pages - * - * Deactivate all pages in the specified object. (Keep its pages - * in memory even though it is no longer referenced.) - * - * The object must be locked. + * Move any resident pages in the specified range to the inactive queue. If kill_page is set, + * we also clear the modified status of the page and "forget" any changes that have been made + * to the page. */ -static void -vm_object_deactivate_all_pages( - register vm_object_t object) -{ - register vm_page_t p; - int loop_count; -#if VM_OBJ_DEACT_ALL_STATS - int pages_count; -#endif /* VM_OBJ_DEACT_ALL_STATS */ -#define V_O_D_A_P_MAX_BATCH 256 - - loop_count = V_O_D_A_P_MAX_BATCH; -#if VM_OBJ_DEACT_ALL_STATS - pages_count = 0; -#endif /* VM_OBJ_DEACT_ALL_STATS */ - vm_page_lock_queues(); - queue_iterate(&object->memq, p, vm_page_t, listq) { - if (--loop_count == 0) { -#if VM_OBJ_DEACT_ALL_STATS - hw_atomic_add(&vm_object_deactivate_all_pages_batches, - 1); - hw_atomic_add(&vm_object_deactivate_all_pages_pages, - pages_count); - pages_count = 0; -#endif /* VM_OBJ_DEACT_ALL_STATS */ - mutex_yield(&vm_page_queue_lock); - loop_count = V_O_D_A_P_MAX_BATCH; - } - if (!p->busy && !p->throttled) { -#if VM_OBJ_DEACT_ALL_STATS - pages_count++; -#endif /* VM_OBJ_DEACT_ALL_STATS */ - vm_page_deactivate(p); - } - } -#if VM_OBJ_DEACT_ALL_STATS - if (pages_count) { - hw_atomic_add(&vm_object_deactivate_all_pages_batches, 1); - hw_atomic_add(&vm_object_deactivate_all_pages_pages, - pages_count); - pages_count = 0; - } -#endif /* VM_OBJ_DEACT_ALL_STATS */ - vm_page_unlock_queues(); -} __private_extern__ void vm_object_deactivate_pages( vm_object_t object, vm_object_offset_t offset, vm_object_size_t size, - boolean_t kill_page) + boolean_t kill_page, + boolean_t reusable_page) { - vm_object_t orig_object; - int pages_moved = 0; - int pages_found = 0; + vm_object_size_t length; + boolean_t all_reusable; /* - * entered with object lock held, acquire a paging reference to - * prevent the memory_object and control ports from - * being destroyed. + * We break the range up into chunks and do one chunk at a time. This is for + * efficiency and performance while handling the shadow chains and the locks. + * The deactivate_a_chunk() function returns how much of the range it processed. + * We keep calling this routine until the given size is exhausted. */ - orig_object = object; - - for (;;) { - register vm_page_t m; - vm_object_offset_t toffset; - vm_object_size_t tsize; - - vm_object_paging_begin(object); - vm_page_lock_queues(); - - for (tsize = size, toffset = offset; tsize; tsize -= PAGE_SIZE, toffset += PAGE_SIZE) { - if ((m = vm_page_lookup(object, toffset)) != VM_PAGE_NULL) { - pages_found++; + all_reusable = FALSE; + if (reusable_page && + object->size != 0 && + object->size == size && + object->reusable_page_count == 0) { + all_reusable = TRUE; + reusable_page = FALSE; + } - if ((m->wire_count == 0) && (!m->private) && (!m->gobbled) && (!m->busy)) { + while (size) { + length = deactivate_a_chunk(object, offset, size, kill_page, reusable_page, all_reusable); - assert(!m->laundry); + size -= length; + offset += length; + } - m->reference = FALSE; - pmap_clear_reference(m->phys_page); + if (all_reusable) { + if (!object->all_reusable) { + unsigned int reusable; + + object->all_reusable = TRUE; + assert(object->reusable_page_count == 0); + /* update global stats */ + reusable = object->resident_page_count; + OSAddAtomic(reusable, + &vm_page_stats_reusable.reusable_count); + vm_page_stats_reusable.reusable += reusable; + vm_page_stats_reusable.all_reusable_calls++; + } + } else if (reusable_page) { + vm_page_stats_reusable.partial_reusable_calls++; + } +} - if ((kill_page) && (object->internal)) { - m->precious = FALSE; - m->dirty = FALSE; - pmap_clear_modify(m->phys_page); -#if MACH_PAGEMAP - vm_external_state_clr(object->existence_map, offset); -#endif /* MACH_PAGEMAP */ - } +void +vm_object_reuse_pages( + vm_object_t object, + vm_object_offset_t start_offset, + vm_object_offset_t end_offset, + boolean_t allow_partial_reuse) +{ + vm_object_offset_t cur_offset; + vm_page_t m; + unsigned int reused, reusable; - if (!m->throttled) { - VM_PAGE_QUEUES_REMOVE(m); +#define VM_OBJECT_REUSE_PAGE(object, m, reused) \ + MACRO_BEGIN \ + if ((m) != VM_PAGE_NULL && \ + (m)->reusable) { \ + assert((object)->reusable_page_count <= \ + (object)->resident_page_count); \ + assert((object)->reusable_page_count > 0); \ + (object)->reusable_page_count--; \ + (m)->reusable = FALSE; \ + (reused)++; \ + } \ + MACRO_END - assert(!m->laundry); - assert(m->object != kernel_object); - assert(m->pageq.next == NULL && - m->pageq.prev == NULL); - - if(m->zero_fill) { - queue_enter_first( - &vm_page_queue_zf, - m, vm_page_t, pageq); - vm_zf_queue_count++; - } else { - queue_enter_first( - &vm_page_queue_inactive, - m, vm_page_t, pageq); - } + reused = 0; + reusable = 0; - m->inactive = TRUE; - if (!m->fictitious) { - vm_page_inactive_count++; - token_new_pagecount++; - } else { - assert(m->phys_page == vm_page_fictitious_addr); - } + vm_object_lock_assert_exclusive(object); - pages_moved++; - } + if (object->all_reusable) { + assert(object->reusable_page_count == 0); + object->all_reusable = FALSE; + if (end_offset - start_offset == object->size || + !allow_partial_reuse) { + vm_page_stats_reusable.all_reuse_calls++; + reused = object->resident_page_count; + } else { + vm_page_stats_reusable.partial_reuse_calls++; + queue_iterate(&object->memq, m, vm_page_t, listq) { + if (m->offset < start_offset || + m->offset >= end_offset) { + m->reusable = TRUE; + object->reusable_page_count++; + assert(object->resident_page_count >= object->reusable_page_count); + continue; + } else { + assert(!m->reusable); + reused++; } } } - vm_page_unlock_queues(); - vm_object_paging_end(object); - - if (object->shadow) { - vm_object_t tmp_object; - - kill_page = 0; - - offset += object->shadow_offset; - - tmp_object = object->shadow; - vm_object_lock(tmp_object); - - if (object != orig_object) - vm_object_unlock(object); - object = tmp_object; - } else - break; + } else if (object->resident_page_count > + ((end_offset - start_offset) >> PAGE_SHIFT)) { + vm_page_stats_reusable.partial_reuse_calls++; + for (cur_offset = start_offset; + cur_offset < end_offset; + cur_offset += PAGE_SIZE_64) { + if (object->reusable_page_count == 0) { + break; + } + m = vm_page_lookup(object, cur_offset); + VM_OBJECT_REUSE_PAGE(object, m, reused); + } + } else { + vm_page_stats_reusable.partial_reuse_calls++; + queue_iterate(&object->memq, m, vm_page_t, listq) { + if (object->reusable_page_count == 0) { + break; + } + if (m->offset < start_offset || + m->offset >= end_offset) { + continue; + } + VM_OBJECT_REUSE_PAGE(object, m, reused); + } } - if (object != orig_object) - vm_object_unlock(object); + + /* update global stats */ + OSAddAtomic(reusable-reused, &vm_page_stats_reusable.reusable_count); + vm_page_stats_reusable.reused += reused; + vm_page_stats_reusable.reusable += reusable; } /* @@ -1919,7 +2640,7 @@ vm_object_pmap_protect( for (phys_addr = phys_start; phys_addr < phys_end; phys_addr += PAGE_SIZE_64) { - pmap_page_protect(phys_addr >> PAGE_SHIFT, prot); + pmap_page_protect((ppnum_t) (phys_addr >> PAGE_SHIFT), prot); } } return; @@ -1975,11 +2696,11 @@ vm_object_pmap_protect( target_off += PAGE_SIZE) { p = vm_page_lookup(object, target_off); if (p != VM_PAGE_NULL) { - vm_offset_t start; + vm_object_offset_t start; start = pmap_start + - (vm_offset_t)(p->offset - offset); + (p->offset - offset); pmap_protect(pmap, start, - start + PAGE_SIZE, prot); + start + PAGE_SIZE, prot); } } } else { @@ -2105,6 +2826,7 @@ vm_object_copy_slowly( fault_info.lo_offset = src_offset; fault_info.hi_offset = src_offset + size; fault_info.no_cache = FALSE; + fault_info.stealth = TRUE; for ( ; size != 0 ; @@ -2142,7 +2864,13 @@ vm_object_copy_slowly( vm_object_lock(src_object); vm_object_paging_begin(src_object); - fault_info.cluster_size = size; + if (size > (vm_size_t) -1) { + /* 32-bit overflow */ + fault_info.cluster_size = (vm_size_t) (0 - PAGE_SIZE); + } else { + fault_info.cluster_size = (vm_size_t) size; + assert(fault_info.cluster_size == size); + } XPR(XPR_VM_FAULT,"vm_object_copy_slowly -> vm_fault_page",0,0,0,0,0); result = vm_fault_page(src_object, src_offset, @@ -2152,100 +2880,105 @@ vm_object_copy_slowly( &error_code, FALSE, FALSE, &fault_info); switch(result) { - case VM_FAULT_SUCCESS: - result_page = _result_page; + case VM_FAULT_SUCCESS: + result_page = _result_page; - /* - * We don't need to hold the object - * lock -- the busy page will be enough. - * [We don't care about picking up any - * new modifications.] - * - * Copy the page to the new object. - * - * POLICY DECISION: - * If result_page is clean, - * we could steal it instead - * of copying. - */ + /* + * We don't need to hold the object + * lock -- the busy page will be enough. + * [We don't care about picking up any + * new modifications.] + * + * Copy the page to the new object. + * + * POLICY DECISION: + * If result_page is clean, + * we could steal it instead + * of copying. + */ - vm_object_unlock(result_page->object); - vm_page_copy(result_page, new_page); + vm_object_unlock(result_page->object); + vm_page_copy(result_page, new_page); - /* - * Let go of both pages (make them - * not busy, perform wakeup, activate). - */ - vm_object_lock(new_object); - new_page->dirty = TRUE; - PAGE_WAKEUP_DONE(new_page); - vm_object_unlock(new_object); - - vm_object_lock(result_page->object); - PAGE_WAKEUP_DONE(result_page); - - vm_page_lockspin_queues(); - if (!result_page->active && - !result_page->inactive && - !result_page->throttled) - vm_page_activate(result_page); - vm_page_activate(new_page); - vm_page_unlock_queues(); + /* + * Let go of both pages (make them + * not busy, perform wakeup, activate). + */ + vm_object_lock(new_object); + new_page->dirty = TRUE; + PAGE_WAKEUP_DONE(new_page); + vm_object_unlock(new_object); - /* - * Release paging references and - * top-level placeholder page, if any. - */ + vm_object_lock(result_page->object); + PAGE_WAKEUP_DONE(result_page); - vm_fault_cleanup(result_page->object, - top_page); + vm_page_lockspin_queues(); + if (!result_page->active && + !result_page->inactive && + !result_page->throttled) + vm_page_activate(result_page); + vm_page_activate(new_page); + vm_page_unlock_queues(); - break; + /* + * Release paging references and + * top-level placeholder page, if any. + */ + + vm_fault_cleanup(result_page->object, + top_page); + + break; - case VM_FAULT_RETRY: - break; + case VM_FAULT_RETRY: + break; + + case VM_FAULT_FICTITIOUS_SHORTAGE: + vm_page_more_fictitious(); + break; - case VM_FAULT_FICTITIOUS_SHORTAGE: - vm_page_more_fictitious(); + case VM_FAULT_MEMORY_SHORTAGE: + if (vm_page_wait(interruptible)) break; + /* fall thru */ - case VM_FAULT_MEMORY_SHORTAGE: - if (vm_page_wait(interruptible)) - break; - /* fall thru */ + case VM_FAULT_INTERRUPTED: + vm_object_lock(new_object); + VM_PAGE_FREE(new_page); + vm_object_unlock(new_object); + + vm_object_deallocate(new_object); + vm_object_deallocate(src_object); + *_result_object = VM_OBJECT_NULL; + return(MACH_SEND_INTERRUPTED); - case VM_FAULT_INTERRUPTED: - vm_object_lock(new_object); - vm_page_lock_queues(); - vm_page_free(new_page); - vm_page_unlock_queues(); - vm_object_unlock(new_object); + case VM_FAULT_SUCCESS_NO_VM_PAGE: + /* success but no VM page: fail */ + vm_object_paging_end(src_object); + vm_object_unlock(src_object); + /*FALLTHROUGH*/ + case VM_FAULT_MEMORY_ERROR: + /* + * A policy choice: + * (a) ignore pages that we can't + * copy + * (b) return the null object if + * any page fails [chosen] + */ - vm_object_deallocate(new_object); - vm_object_deallocate(src_object); - *_result_object = VM_OBJECT_NULL; - return(MACH_SEND_INTERRUPTED); + vm_object_lock(new_object); + VM_PAGE_FREE(new_page); + vm_object_unlock(new_object); - case VM_FAULT_MEMORY_ERROR: - /* - * A policy choice: - * (a) ignore pages that we can't - * copy - * (b) return the null object if - * any page fails [chosen] - */ + vm_object_deallocate(new_object); + vm_object_deallocate(src_object); + *_result_object = VM_OBJECT_NULL; + return(error_code ? error_code: + KERN_MEMORY_ERROR); - vm_object_lock(new_object); - vm_page_lock_queues(); - vm_page_free(new_page); - vm_page_unlock_queues(); - vm_object_unlock(new_object); - - vm_object_deallocate(new_object); - vm_object_deallocate(src_object); - *_result_object = VM_OBJECT_NULL; - return(error_code ? error_code: - KERN_MEMORY_ERROR); + default: + panic("vm_object_copy_slowly: unexpected error" + " 0x%x from vm_fault_page()\n", result); } } while (result != VM_FAULT_SUCCESS); } @@ -2535,12 +3268,14 @@ vm_object_copy_delayed( /* * Wait for paging in progress. */ - if (!src_object->true_share && src_object->paging_in_progress) { + if (!src_object->true_share && + (src_object->paging_in_progress != 0 || + src_object->activity_in_progress != 0)) { if (src_object_shared == TRUE) { vm_object_unlock(src_object); - vm_object_lock(src_object); src_object_shared = FALSE; + goto Retry; } vm_object_paging_wait(src_object, THREAD_UNINT); } @@ -2620,7 +3355,7 @@ vm_object_copy_delayed( if (!p->fictitious && p->offset >= old_copy->size && p->offset < copy_size) { - if (p->wire_count > 0) { + if (VM_PAGE_WIRED(p)) { vm_object_unlock(old_copy); vm_object_unlock(src_object); @@ -2709,7 +3444,7 @@ vm_object_copy_delayed( queue_iterate(&src_object->memq, p, vm_page_t, listq) { if (!p->fictitious && p->offset < copy_size) { - if (p->wire_count > 0) { + if (VM_PAGE_WIRED(p)) { if (old_copy) vm_object_unlock(old_copy); vm_object_unlock(src_object); @@ -2765,7 +3500,7 @@ vm_object_copy_delayed( XPR(XPR_VM_OBJECT, "vm_object_copy_delayed: used copy object %X for source %X\n", - (integer_t)new_copy, (integer_t)src_object, 0, 0, 0); + new_copy, src_object, 0, 0, 0); return new_copy; } @@ -2864,7 +3599,7 @@ vm_object_copy_strategically( break; case MEMORY_OBJECT_COPY_SYMMETRIC: - XPR(XPR_VM_OBJECT, "v_o_c_strategically obj 0x%x off 0x%x size 0x%x\n",(natural_t)src_object, src_offset, size, 0, 0); + XPR(XPR_VM_OBJECT, "v_o_c_strategically obj 0x%x off 0x%x size 0x%x\n", src_object, src_offset, size, 0, 0); vm_object_unlock(src_object); result = KERN_MEMORY_RESTART_COPY; break; @@ -3018,9 +3753,9 @@ vm_object_shadow( * [Furthermore, each routine must cope with the simultaneous * or previous operations of the others.] * - * In addition to the lock on the object, the vm_object_cache_lock + * In addition to the lock on the object, the vm_object_hash_lock * governs the associations. References gained through the - * association require use of the cache lock. + * association require use of the hash lock. * * Because the pager field may be cleared spontaneously, it * cannot be used to determine whether a memory object has @@ -3059,6 +3794,7 @@ vm_object_enter( boolean_t must_init; vm_object_hash_entry_t entry, new_entry; uint32_t try_failed_count = 0; + lck_mtx_t *lck; if (pager == MEMORY_OBJECT_NULL) return(vm_object_allocate(size)); @@ -3071,7 +3807,7 @@ vm_object_enter( * Look for an object associated with this port. */ Retry: - vm_object_cache_lock(); + lck = vm_object_hash_lock_spin(pager); do { entry = vm_object_hash_lookup(pager, FALSE); @@ -3081,19 +3817,18 @@ vm_object_enter( * We must unlock to create a new object; * if we do so, we must try the lookup again. */ - vm_object_cache_unlock(); + vm_object_hash_unlock(lck); assert(new_entry == VM_OBJECT_HASH_ENTRY_NULL); new_entry = vm_object_hash_entry_alloc(pager); new_object = vm_object_allocate(size); - vm_object_cache_lock(); + lck = vm_object_hash_lock_spin(pager); } else { /* * Lookup failed twice, and we have something * to insert; set the object. */ - vm_object_hash_insert(new_entry); + vm_object_hash_insert(new_entry, new_object); entry = new_entry; - entry->object = new_object; new_entry = VM_OBJECT_HASH_ENTRY_NULL; new_object = VM_OBJECT_NULL; must_init = TRUE; @@ -3107,9 +3842,10 @@ vm_object_enter( entry->waiting = TRUE; entry = VM_OBJECT_HASH_ENTRY_NULL; assert_wait((event_t) pager, THREAD_UNINT); - vm_object_cache_unlock(); + vm_object_hash_unlock(lck); + thread_block(THREAD_CONTINUE_NULL); - vm_object_cache_lock(); + lck = vm_object_hash_lock_spin(pager); } } while (entry == VM_OBJECT_HASH_ENTRY_NULL); @@ -3117,46 +3853,60 @@ vm_object_enter( assert(object != VM_OBJECT_NULL); if (!must_init) { - if (!vm_object_lock_try(object)) { + if ( !vm_object_lock_try(object)) { - vm_object_cache_unlock(); + vm_object_hash_unlock(lck); try_failed_count++; mutex_pause(try_failed_count); /* wait a bit */ - goto Retry; } assert(!internal || object->internal); - if (named) { - assert(!object->named); - object->named = TRUE; - } +#if VM_OBJECT_CACHE if (object->ref_count == 0) { + if ( !vm_object_cache_lock_try()) { + + vm_object_hash_unlock(lck); + vm_object_unlock(object); + + try_failed_count++; + mutex_pause(try_failed_count); /* wait a bit */ + goto Retry; + } XPR(XPR_VM_OBJECT_CACHE, - "vm_object_enter: removing %x from cache, head (%x, %x)\n", - (integer_t)object, - (integer_t)vm_object_cached_list.next, - (integer_t)vm_object_cached_list.prev, 0,0); + "vm_object_enter: removing %x from cache, head (%x, %x)\n", + object, + vm_object_cached_list.next, + vm_object_cached_list.prev, 0,0); queue_remove(&vm_object_cached_list, object, vm_object_t, cached_list); vm_object_cached_count--; + + vm_object_cache_unlock(); + } +#endif + if (named) { + assert(!object->named); + object->named = TRUE; } vm_object_lock_assert_exclusive(object); object->ref_count++; vm_object_res_reference(object); + + vm_object_hash_unlock(lck); vm_object_unlock(object); VM_STAT_INCR(hits); - } + } else + vm_object_hash_unlock(lck); + assert(object->ref_count > 0); VM_STAT_INCR(lookups); - vm_object_cache_unlock(); - XPR(XPR_VM_OBJECT, "vm_o_enter: pager 0x%x obj 0x%x must_init %d\n", - (integer_t)pager, (integer_t)object, must_init, 0, 0); + pager, object, must_init, 0, 0); /* * If we raced to create a vm_object but lost, let's @@ -3240,7 +3990,7 @@ vm_object_enter( XPR(XPR_VM_OBJECT, "vm_object_enter: vm_object %x, memory_object %x, internal %d\n", - (integer_t)object, (integer_t)object->pager, internal, 0,0); + object, object->pager, internal, 0,0); return(object); } @@ -3264,13 +4014,14 @@ vm_object_pager_create( { memory_object_t pager; vm_object_hash_entry_t entry; + lck_mtx_t *lck; #if MACH_PAGEMAP vm_object_size_t size; vm_external_map_t map; #endif /* MACH_PAGEMAP */ XPR(XPR_VM_OBJECT, "vm_object_pager_create, object 0x%X\n", - (integer_t)object, 0,0,0,0); + object, 0,0,0,0); assert(object != kernel_object); @@ -3317,6 +4068,11 @@ vm_object_pager_create( vm_object_unlock(object); #endif /* MACH_PAGEMAP */ + if ((uint32_t) object->size != object->size) { + panic("vm_object_pager_create(): object size 0x%llx >= 4GB\n", + (uint64_t) object->size); + } + /* * Create the [internal] pager, and associate it with this object. * @@ -3333,18 +4089,18 @@ vm_object_pager_create( assert(object->temporary); /* create our new memory object */ - (void) memory_object_create(dmm, object->size, &pager); + assert((vm_size_t) object->size == object->size); + (void) memory_object_create(dmm, (vm_size_t) object->size, + &pager); memory_object_default_deallocate(dmm); } entry = vm_object_hash_entry_alloc(pager); - vm_object_cache_lock(); - vm_object_hash_insert(entry); - - entry->object = object; - vm_object_cache_unlock(); + lck = vm_object_hash_lock_spin(pager); + vm_object_hash_insert(entry, object); + vm_object_hash_unlock(lck); /* * A reference was returned by @@ -3431,6 +4187,9 @@ vm_object_do_collapse( vm_object_offset_t new_offset, backing_offset; vm_object_size_t size; + vm_object_lock_assert_exclusive(object); + vm_object_lock_assert_exclusive(backing_object); + backing_offset = object->shadow_offset; size = object->size; @@ -3532,10 +4291,20 @@ vm_object_do_collapse( */ assert(!object->paging_in_progress); + assert(!object->activity_in_progress); object->pager = backing_object->pager; - entry = vm_object_hash_lookup(object->pager, FALSE); - assert(entry != VM_OBJECT_HASH_ENTRY_NULL); - entry->object = object; + + if (backing_object->hashed) { + lck_mtx_t *lck; + + lck = vm_object_hash_lock_spin(backing_object->pager); + entry = vm_object_hash_lookup(object->pager, FALSE); + assert(entry != VM_OBJECT_HASH_ENTRY_NULL); + entry->object = object; + vm_object_hash_unlock(lck); + + object->hashed = TRUE; + } object->pager_created = backing_object->pager_created; object->pager_control = backing_object->pager_control; object->pager_ready = backing_object->pager_ready; @@ -3548,8 +4317,6 @@ vm_object_do_collapse( } } - vm_object_cache_unlock(); - #if MACH_PAGEMAP /* * If the shadow offset is 0, the use the existence map from @@ -3604,13 +4371,14 @@ vm_object_do_collapse( assert((backing_object->ref_count == 1) && (backing_object->resident_page_count == 0) && - (backing_object->paging_in_progress == 0)); + (backing_object->paging_in_progress == 0) && + (backing_object->activity_in_progress == 0)); backing_object->alive = FALSE; vm_object_unlock(backing_object); XPR(XPR_VM_OBJECT, "vm_object_collapse, collapsed 0x%X\n", - (integer_t)backing_object, 0,0,0,0); + backing_object, 0,0,0,0); vm_object_lock_destroy(backing_object); @@ -3629,6 +4397,7 @@ vm_object_do_bypass( * in the chain. */ + vm_object_lock_assert_exclusive(object); vm_object_lock_assert_exclusive(backing_object); #if TASK_SWAPPER @@ -3759,6 +4528,8 @@ vm_object_collapse( register unsigned int rcount; register unsigned int size; vm_object_t original_object; + int object_lock_type; + int backing_object_lock_type; vm_object_collapse_calls++; @@ -3768,13 +4539,26 @@ vm_object_collapse( } XPR(XPR_VM_OBJECT, "vm_object_collapse, obj 0x%X\n", - (integer_t)object, 0,0,0,0); + object, 0,0,0,0); if (object == VM_OBJECT_NULL) return; original_object = object; + /* + * The top object was locked "exclusive" by the caller. + * In the first pass, to determine if we can collapse the shadow chain, + * take a "shared" lock on the shadow objects. If we can collapse, + * we'll have to go down the chain again with exclusive locks. + */ + object_lock_type = OBJECT_LOCK_EXCLUSIVE; + backing_object_lock_type = OBJECT_LOCK_SHARED; + +retry: + object = original_object; + vm_object_lock_assert_exclusive(object); + while (TRUE) { vm_object_collapse_objects++; /* @@ -3793,23 +4577,27 @@ vm_object_collapse( } return; } - + if (backing_object_lock_type == OBJECT_LOCK_SHARED) { + vm_object_lock_shared(backing_object); + } else { + vm_object_lock(backing_object); + } + /* * No pages in the object are currently * being paged out, and */ - if (object->paging_in_progress != 0) { + if (object->paging_in_progress != 0 || + object->activity_in_progress != 0) { /* try and collapse the rest of the shadow chain */ - vm_object_lock(backing_object); if (object != original_object) { vm_object_unlock(object); } object = backing_object; + object_lock_type = backing_object_lock_type; continue; } - vm_object_lock(backing_object); - /* * ... * The backing object is not read_only, @@ -3820,12 +4608,14 @@ vm_object_collapse( */ if (!backing_object->internal || - backing_object->paging_in_progress != 0) { + backing_object->paging_in_progress != 0 || + backing_object->activity_in_progress != 0) { /* try and collapse the rest of the shadow chain */ if (object != original_object) { vm_object_unlock(object); } object = backing_object; + object_lock_type = backing_object_lock_type; continue; } @@ -3846,6 +4636,7 @@ vm_object_collapse( vm_object_unlock(object); } object = backing_object; + object_lock_type = backing_object_lock_type; continue; } @@ -3876,25 +4667,37 @@ vm_object_collapse( #endif /*!MACH_PAGEMAP */ ) && vm_object_collapse_allowed) { - XPR(XPR_VM_OBJECT, - "vm_object_collapse: %x to %x, pager %x, pager_control %x\n", - (integer_t)backing_object, (integer_t)object, - (integer_t)backing_object->pager, - (integer_t)backing_object->pager_control, 0); - /* - * We need the cache lock for collapsing, - * but we must not deadlock. + * We need the exclusive lock on the VM objects. */ - - if (! vm_object_cache_lock_try()) { - if (object != original_object) { - vm_object_unlock(object); - } + if (backing_object_lock_type != OBJECT_LOCK_EXCLUSIVE) { + /* + * We have an object and its shadow locked + * "shared". We can't just upgrade the locks + * to "exclusive", as some other thread might + * also have these objects locked "shared" and + * attempt to upgrade one or the other to + * "exclusive". The upgrades would block + * forever waiting for the other "shared" locks + * to get released. + * So we have to release the locks and go + * down the shadow chain again (since it could + * have changed) with "exclusive" locking. + */ vm_object_unlock(backing_object); - return; + if (object != original_object) + vm_object_unlock(object); + object_lock_type = OBJECT_LOCK_EXCLUSIVE; + backing_object_lock_type = OBJECT_LOCK_EXCLUSIVE; + goto retry; } + XPR(XPR_VM_OBJECT, + "vm_object_collapse: %x to %x, pager %x, pager_control %x\n", + backing_object, object, + backing_object->pager, + backing_object->pager_control, 0); + /* * Collapse the object with its backing * object, and try again with the object's @@ -3917,6 +4720,7 @@ vm_object_collapse( vm_object_unlock(object); } object = backing_object; + object_lock_type = backing_object_lock_type; continue; } @@ -3941,7 +4745,7 @@ vm_object_collapse( */ if (backing_object->pager_created #if MACH_PAGEMAP - && (backing_object->existence_map == VM_EXTERNAL_NULL) + && (backing_object->existence_map == VM_EXTERNAL_NULL) #endif /* MACH_PAGEMAP */ ) { /* try and collapse the rest of the shadow chain */ @@ -3949,6 +4753,7 @@ vm_object_collapse( vm_object_unlock(object); } object = backing_object; + object_lock_type = backing_object_lock_type; continue; } @@ -3959,7 +4764,7 @@ vm_object_collapse( */ if (object->pager_created #if MACH_PAGEMAP - && (object->existence_map == VM_EXTERNAL_NULL) + && (object->existence_map == VM_EXTERNAL_NULL) #endif /* MACH_PAGEMAP */ ) { /* try and collapse the rest of the shadow chain */ @@ -3967,6 +4772,7 @@ vm_object_collapse( vm_object_unlock(object); } object = backing_object; + object_lock_type = backing_object_lock_type; continue; } @@ -4010,12 +4816,13 @@ vm_object_collapse( backing_offset, backing_rcount) && !EXISTS_IN_OBJECT(object, hint_offset, rcount)) { /* dependency right at the hint */ - object->cow_hint = (vm_offset_t)hint_offset; + object->cow_hint = (vm_offset_t) hint_offset; /* atomic */ /* try and collapse the rest of the shadow chain */ if (object != original_object) { vm_object_unlock(object); } object = backing_object; + object_lock_type = backing_object_lock_type; continue; } @@ -4059,7 +4866,8 @@ vm_object_collapse( offset != hint_offset && !EXISTS_IN_OBJECT(object, offset, rc)) { /* found a dependency */ - object->cow_hint = (vm_offset_t)offset; + object->cow_hint = (vm_offset_t) offset; /* atomic */ + break; } p = (vm_page_t) queue_next(&p->listq); @@ -4071,6 +4879,7 @@ vm_object_collapse( vm_object_unlock(object); } object = backing_object; + object_lock_type = backing_object_lock_type; continue; } } @@ -4079,12 +4888,11 @@ vm_object_collapse( * Walk through the offsets looking for pages in the * backing object that show through to the object. */ -#if MACH_PAGEMAP - if (backing_rcount || backing_object->existence_map) -#else - if (backing_rcount) + if (backing_rcount +#if MACH_PAGEMAP + || backing_object->existence_map #endif /* MACH_PAGEMAP */ - { + ) { offset = hint_offset; while((offset = @@ -4102,7 +4910,7 @@ vm_object_collapse( backing_offset, backing_rcount) && !EXISTS_IN_OBJECT(object, offset, rcount)) { /* found a dependency */ - object->cow_hint = (vm_offset_t)offset; + object->cow_hint = (vm_offset_t) offset; /* atomic */ break; } } @@ -4112,11 +4920,24 @@ vm_object_collapse( vm_object_unlock(object); } object = backing_object; + object_lock_type = backing_object_lock_type; continue; } } } + /* + * We need "exclusive" locks on the 2 VM objects. + */ + if (backing_object_lock_type != OBJECT_LOCK_EXCLUSIVE) { + vm_object_unlock(backing_object); + if (object != original_object) + vm_object_unlock(object); + object_lock_type = OBJECT_LOCK_EXCLUSIVE; + backing_object_lock_type = OBJECT_LOCK_EXCLUSIVE; + goto retry; + } + /* reset the offset hint for any objects deeper in the chain */ object->cow_hint = (vm_offset_t)0; @@ -4249,7 +5070,7 @@ vm_object_coalesce( XPR(XPR_VM_OBJECT, "vm_object_coalesce: 0x%X prev_off 0x%X prev_size 0x%X next_size 0x%X\n", - (integer_t)prev_object, prev_offset, prev_size, next_size, 0); + prev_object, prev_offset, prev_size, next_size, 0); vm_object_lock(prev_object); @@ -4275,7 +5096,8 @@ vm_object_coalesce( (prev_object->copy != VM_OBJECT_NULL) || (prev_object->true_share != FALSE) || (prev_object->purgable != VM_PURGABLE_DENY) || - (prev_object->paging_in_progress != 0)) { + (prev_object->paging_in_progress != 0) || + (prev_object->activity_in_progress != 0)) { vm_object_unlock(prev_object); return(FALSE); } @@ -4330,7 +5152,7 @@ vm_object_page_map( vm_object_offset_t offset), void *map_fn_data) /* private to map_fn */ { - int num_pages; + int64_t num_pages; int i; vm_page_t m; vm_page_t old_page; @@ -4349,14 +5171,15 @@ vm_object_page_map( if ((old_page = vm_page_lookup(object, offset)) != VM_PAGE_NULL) { - vm_page_lock_queues(); - vm_page_free(old_page); - vm_page_unlock_queues(); + VM_PAGE_FREE(old_page); } - vm_page_init(m, addr); - /* private normally requires lock_queues but since we */ - /* are initializing the page, its not necessary here */ + assert((ppnum_t) addr == addr); + vm_page_init(m, (ppnum_t) addr); + /* + * private normally requires lock_queues but since we + * are initializing the page, its not necessary here + */ m->private = TRUE; /* don`t free page */ m->wire_count = 1; vm_page_insert(m, object, offset); @@ -4399,8 +5222,9 @@ print_bitstring( boolean_t vm_object_cached( - register vm_object_t object) + __unused register vm_object_t object) { +#if VM_OBJECT_CACHE register vm_object_t o; queue_iterate(&vm_object_cached_list, o, vm_object_t, cached_list) { @@ -4408,6 +5232,7 @@ vm_object_cached( return TRUE; } } +#endif return FALSE; } @@ -4418,13 +5243,13 @@ vm_object_cached( void vm_external_print( vm_external_map_t emap, - vm_size_t size) + vm_object_size_t size) { if (emap == VM_EXTERNAL_NULL) { printf("0 "); } else { - vm_size_t existence_size = stob(size); - printf("{ size=%d, map=[", existence_size); + vm_object_size_t existence_size = stob(size); + printf("{ size=%lld, map=[", (uint64_t) existence_size); if (existence_size > 0) { print_bitstring(emap[0]); } @@ -4573,6 +5398,7 @@ vm_object_print(db_expr_t db_addr, __unused boolean_t have_addr, } printf(">"); printf(", paging_in_progress=%d\n", object->paging_in_progress); + printf(", activity_in_progress=%d\n", object->activity_in_progress); iprintf("%screated, %sinit, %sready, %spersist, %strusted, %spageout, %s, %s\n", (object->pager_created ? "" : "!"), @@ -4717,11 +5543,13 @@ vm_object_populate_with_private( m = vm_page_lookup(object, base_offset); if(m != VM_PAGE_NULL) { if(m->fictitious) { - if (m->phys_page != - vm_page_guard_addr) { + if (m->phys_page != vm_page_guard_addr) { + vm_page_lockspin_queues(); - m->fictitious = FALSE; m->private = TRUE; + vm_page_unlock_queues(); + + m->fictitious = FALSE; m->phys_page = base_page; if(!m->busy) { m->busy = TRUE; @@ -4730,7 +5558,6 @@ vm_object_populate_with_private( m->absent = TRUE; } m->list_req_pending = TRUE; - vm_page_unlock_queues(); } } else if (m->phys_page != base_page) { if (m->pmapped) { @@ -4754,17 +5581,20 @@ vm_object_populate_with_private( m->encrypted = FALSE; } else { - while ((m = vm_page_grab_fictitious()) - == VM_PAGE_NULL) + while ((m = vm_page_grab_fictitious()) == VM_PAGE_NULL) vm_page_more_fictitious(); - vm_page_lockspin_queues(); - m->fictitious = FALSE; + + /* + * private normally requires lock_queues but since we + * are initializing the page, its not necessary here + */ m->private = TRUE; + m->fictitious = FALSE; m->phys_page = base_page; m->list_req_pending = TRUE; m->absent = TRUE; m->unusual = TRUE; - vm_page_unlock_queues(); + vm_page_insert(m, object, base_offset); } base_page++; /* Go to the next physical page */ @@ -4802,10 +5632,10 @@ vm_object_populate_with_private( __private_extern__ kern_return_t memory_object_free_from_cache( __unused host_t host, - memory_object_pager_ops_t pager_ops, + __unused memory_object_pager_ops_t pager_ops, int *count) { - +#if VM_OBJECT_CACHE int object_released = 0; register vm_object_t object = VM_OBJECT_NULL; @@ -4828,6 +5658,7 @@ memory_object_free_from_cache( vm_object_t, cached_list); vm_object_cached_count--; + vm_object_cache_unlock(); /* * Since this object is in the cache, we know * that it is initialized and has only a pager's @@ -4850,6 +5681,7 @@ memory_object_free_from_cache( * (We are careful here to limit recursion.) */ shadow = object->pageout?VM_OBJECT_NULL:object->shadow; + if ((vm_object_terminate(object) == KERN_SUCCESS) && (shadow != VM_OBJECT_NULL)) { vm_object_deallocate(shadow); @@ -4862,6 +5694,9 @@ memory_object_free_from_cache( } vm_object_cache_unlock(); *count = object_released; +#else + *count = 0; +#endif return KERN_SUCCESS; } @@ -4875,21 +5710,22 @@ memory_object_create_named( { vm_object_t object; vm_object_hash_entry_t entry; + lck_mtx_t *lck; *control = MEMORY_OBJECT_CONTROL_NULL; if (pager == MEMORY_OBJECT_NULL) return KERN_INVALID_ARGUMENT; - vm_object_cache_lock(); + lck = vm_object_hash_lock_spin(pager); entry = vm_object_hash_lookup(pager, FALSE); + if ((entry != VM_OBJECT_HASH_ENTRY_NULL) && (entry->object != VM_OBJECT_NULL)) { if (entry->object->named == TRUE) panic("memory_object_create_named: caller already holds the right"); } + vm_object_hash_unlock(lck); - vm_object_cache_unlock(); - if ((object = vm_object_enter(pager, size, FALSE, FALSE, TRUE)) - == VM_OBJECT_NULL) { + if ((object = vm_object_enter(pager, size, FALSE, FALSE, TRUE)) == VM_OBJECT_NULL) { return(KERN_INVALID_OBJECT); } @@ -4928,50 +5764,47 @@ memory_object_recover_named( { vm_object_t object; - vm_object_cache_lock(); object = memory_object_control_to_vm_object(control); if (object == VM_OBJECT_NULL) { - vm_object_cache_unlock(); return (KERN_INVALID_ARGUMENT); } - restart: vm_object_lock(object); if (object->terminating && wait_on_terminating) { - vm_object_cache_unlock(); vm_object_wait(object, VM_OBJECT_EVENT_PAGING_IN_PROGRESS, THREAD_UNINT); - vm_object_cache_lock(); goto restart; } if (!object->alive) { - vm_object_cache_unlock(); vm_object_unlock(object); return KERN_FAILURE; } if (object->named == TRUE) { - vm_object_cache_unlock(); vm_object_unlock(object); return KERN_SUCCESS; } - - if((object->ref_count == 0) && (!object->terminating)){ +#if VM_OBJECT_CACHE + if ((object->ref_count == 0) && (!object->terminating)) { + if (!vm_object_cache_lock_try()) { + vm_object_unlock(object); + goto restart; + } queue_remove(&vm_object_cached_list, object, vm_object_t, cached_list); - vm_object_cached_count--; - XPR(XPR_VM_OBJECT_CACHE, - "memory_object_recover_named: removing %X, head (%X, %X)\n", - (integer_t)object, - (integer_t)vm_object_cached_list.next, - (integer_t)vm_object_cached_list.prev, 0,0); + vm_object_cached_count--; + XPR(XPR_VM_OBJECT_CACHE, + "memory_object_recover_named: removing %X, head (%X, %X)\n", + object, + vm_object_cached_list.next, + vm_object_cached_list.prev, 0,0); + + vm_object_cache_unlock(); } - - vm_object_cache_unlock(); - +#endif object->named = TRUE; vm_object_lock_assert_exclusive(object); object->ref_count++; @@ -5013,17 +5846,10 @@ vm_object_release_name( while (object != VM_OBJECT_NULL) { - /* - * The cache holds a reference (uncounted) to - * the object. We must locke it before removing - * the object. - * - */ - - vm_object_cache_lock(); vm_object_lock(object); + assert(object->alive); - if(original_object) + if (original_object) assert(object->named); assert(object->ref_count > 0); @@ -5038,7 +5864,6 @@ vm_object_release_name( VM_OBJECT_EVENT_INITIALIZED, THREAD_UNINT); vm_object_unlock(object); - vm_object_cache_unlock(); thread_block(THREAD_CONTINUE_NULL); continue; } @@ -5047,22 +5872,19 @@ vm_object_release_name( && (flags & MEMORY_OBJECT_TERMINATE_IDLE)) || (object->terminating)) { vm_object_unlock(object); - vm_object_cache_unlock(); return KERN_FAILURE; } else { if (flags & MEMORY_OBJECT_RELEASE_NO_OP) { vm_object_unlock(object); - vm_object_cache_unlock(); return KERN_SUCCESS; } } if ((flags & MEMORY_OBJECT_RESPECT_CACHE) && (object->ref_count == 1)) { - if(original_object) + if (original_object) object->named = FALSE; vm_object_unlock(object); - vm_object_cache_unlock(); /* let vm_object_deallocate push this thing into */ /* the cache, if that it is where it is bound */ vm_object_deallocate(object); @@ -5070,9 +5892,10 @@ vm_object_release_name( } VM_OBJ_RES_DECR(object); shadow = object->pageout?VM_OBJECT_NULL:object->shadow; - if(object->ref_count == 1) { - if(vm_object_terminate(object) != KERN_SUCCESS) { - if(original_object) { + + if (object->ref_count == 1) { + if (vm_object_terminate(object) != KERN_SUCCESS) { + if (original_object) { return KERN_FAILURE; } else { return KERN_SUCCESS; @@ -5091,7 +5914,6 @@ vm_object_release_name( if(original_object) object->named = FALSE; vm_object_unlock(object); - vm_object_cache_unlock(); return KERN_SUCCESS; } } @@ -5116,7 +5938,7 @@ vm_object_lock_request( XPR(XPR_MEMORY_OBJECT, "vm_o_lock_request, obj 0x%X off 0x%X size 0x%X flags %X prot %X\n", - (integer_t)object, offset, size, + object, offset, size, (((should_return&1)<<1)|should_flush), prot); /* @@ -5146,9 +5968,6 @@ vm_object_lock_request( return (KERN_SUCCESS); } -unsigned int vm_page_purged_wired = 0; -unsigned int vm_page_purged_busy = 0; -unsigned int vm_page_purged_others = 0; /* * Empty a purgeable object by grabbing the physical pages assigned to it and * putting them on the free queue without writing them to backing store, etc. @@ -5158,151 +5977,44 @@ unsigned int vm_page_purged_others = 0; * than happy to grab these since this is a purgeable object. We mark the * object as "empty" after reaping its pages. * - * On entry the object and page queues are locked, the object must be a - * purgeable object with no delayed copies pending. + * On entry the object must be locked and it must be + * purgeable with no delayed copies pending. */ -unsigned int +void vm_object_purge(vm_object_t object) { - vm_page_t p, next; - unsigned int num_purged_pages; - vm_page_t local_freeq; - unsigned long local_freed; - int purge_loop_quota; -/* free pages as soon as we gather PURGE_BATCH_FREE_LIMIT pages to free */ -#define PURGE_BATCH_FREE_LIMIT 50 -/* release page queues lock every PURGE_LOOP_QUOTA iterations */ -#define PURGE_LOOP_QUOTA 100 - - num_purged_pages = 0; - if (object->purgable == VM_PURGABLE_DENY) - return num_purged_pages; + vm_object_lock_assert_exclusive(object); - assert(object->purgable != VM_PURGABLE_NONVOLATILE); - object->purgable = VM_PURGABLE_EMPTY; + if (object->purgable == VM_PURGABLE_DENY) + return; assert(object->copy == VM_OBJECT_NULL); assert(object->copy_strategy == MEMORY_OBJECT_COPY_NONE); - purge_loop_quota = PURGE_LOOP_QUOTA; - - local_freeq = VM_PAGE_NULL; - local_freed = 0; - - /* - * Go through the object's resident pages and try and discard them. - */ - next = (vm_page_t)queue_first(&object->memq); - while (!queue_end(&object->memq, (queue_entry_t)next)) { - p = next; - next = (vm_page_t)queue_next(&next->listq); - - if (purge_loop_quota-- == 0) { - /* - * Avoid holding the page queues lock for too long. - * Let someone else take it for a while if needed. - * Keep holding the object's lock to guarantee that - * the object's page list doesn't change under us - * while we yield. - */ - if (local_freeq != VM_PAGE_NULL) { - /* - * Flush our queue of pages to free. - */ - vm_page_free_list(local_freeq); - local_freeq = VM_PAGE_NULL; - local_freed = 0; - } - mutex_yield(&vm_page_queue_lock); - - /* resume with the current page and a new quota */ - purge_loop_quota = PURGE_LOOP_QUOTA; - } - - if (p->wire_count) { - /* don't discard a wired page */ - vm_page_purged_wired++; - - skip_page: - /* - * This page is no longer "purgeable", - * for accounting purposes. - */ - assert(vm_page_purgeable_count > 0); - vm_page_purgeable_count--; - continue; - } - - if (p->busy) { - /* - * We can't reclaim a busy page but we can deactivate - * it (if it's not wired) to make sure it gets - * considered by vm_pageout_scan() later. - */ - vm_page_deactivate(p); - vm_page_purged_busy++; - goto skip_page; - } - - if (p->cleaning || p->laundry || p->list_req_pending) { - /* page is being acted upon, so don't mess with it */ - vm_page_purged_others++; - goto skip_page; - } - assert(!p->laundry); - assert(p->object != kernel_object); - - /* we can discard this page */ - - /* advertize that this page is in a transition state */ - p->busy = TRUE; - - if (p->pmapped == TRUE) { - /* unmap the page */ - int refmod_state; - - refmod_state = pmap_disconnect(p->phys_page); - if (refmod_state & VM_MEM_MODIFIED) { - p->dirty = TRUE; - } - } - - if (p->dirty || p->precious) { - /* we saved the cost of cleaning this page ! */ - num_purged_pages++; - vm_page_purged_count++; + if(object->purgable == VM_PURGABLE_VOLATILE) { + unsigned int delta; + assert(object->resident_page_count >= + object->wired_page_count); + delta = (object->resident_page_count - + object->wired_page_count); + if (delta != 0) { + assert(vm_page_purgeable_count >= + delta); + OSAddAtomic(-delta, + (SInt32 *)&vm_page_purgeable_count); } - - vm_page_free_prepare(p); - /* - * vm_page_purgeable_count is not updated when freeing - * a page from an "empty" object, so do it explicitly here. - */ - assert(vm_page_purgeable_count > 0); - vm_page_purgeable_count--; - - /* ... and put it on our queue of pages to free */ - assert(p->pageq.next == NULL && - p->pageq.prev == NULL); - p->pageq.next = (queue_entry_t) local_freeq; - local_freeq = p; - if (++local_freed >= PURGE_BATCH_FREE_LIMIT) { - /* flush our queue of pages to free */ - vm_page_free_list(local_freeq); - local_freeq = VM_PAGE_NULL; - local_freed = 0; + if (object->wired_page_count != 0) { + assert(vm_page_purgeable_wired_count >= + object->wired_page_count); + OSAddAtomic(-object->wired_page_count, + (SInt32 *)&vm_page_purgeable_wired_count); } } - - /* flush our local queue of pages to free one last time */ - if (local_freeq != VM_PAGE_NULL) { - vm_page_free_list(local_freeq); - local_freeq = VM_PAGE_NULL; - local_freed = 0; - } - - return num_purged_pages; + object->purgable = VM_PURGABLE_EMPTY; + + vm_object_reap_pages(object, REAP_PURGEABLE); } + /* * vm_object_purgeable_control() allows the caller to control and investigate the @@ -5414,35 +6126,81 @@ vm_object_purgable_control( return KERN_SUCCESS; } + if ((*state) & VM_PURGABLE_DEBUG_EMPTY) { + object->volatile_empty = TRUE; + } + if ((*state) & VM_PURGABLE_DEBUG_FAULT) { + object->volatile_fault = TRUE; + } + new_state = *state & VM_PURGABLE_STATE_MASK; + if (new_state == VM_PURGABLE_VOLATILE && + object->volatile_empty) { + new_state = VM_PURGABLE_EMPTY; + } + switch (new_state) { case VM_PURGABLE_DENY: case VM_PURGABLE_NONVOLATILE: object->purgable = new_state; - if (old_state != VM_PURGABLE_NONVOLATILE) { + if (old_state == VM_PURGABLE_VOLATILE) { + unsigned int delta; + + assert(object->resident_page_count >= + object->wired_page_count); + delta = (object->resident_page_count - + object->wired_page_count); + + assert(vm_page_purgeable_count >= delta); + + if (delta != 0) { + OSAddAtomic(-delta, + (SInt32 *)&vm_page_purgeable_count); + } + if (object->wired_page_count != 0) { + assert(vm_page_purgeable_wired_count >= + object->wired_page_count); + OSAddAtomic(-object->wired_page_count, + (SInt32 *)&vm_page_purgeable_wired_count); + } + vm_page_lock_queues(); - if (old_state==VM_PURGABLE_VOLATILE) { - assert(vm_page_purgeable_count >= - object->resident_page_count); - vm_page_purgeable_count -= object->resident_page_count; - - assert(object->objq.next != NULL && object->objq.prev != NULL); /* object should be on a queue */ - purgeable_q_t queue = vm_purgeable_object_remove(object); - assert(queue); - - vm_purgeable_token_delete_first(queue); - assert(queue->debug_count_objects>=0); - }; + + assert(object->objq.next != NULL && object->objq.prev != NULL); /* object should be on a queue */ + purgeable_q_t queue = vm_purgeable_object_remove(object); + assert(queue); + + vm_purgeable_token_delete_first(queue); + assert(queue->debug_count_objects>=0); + vm_page_unlock_queues(); } break; case VM_PURGABLE_VOLATILE: - + if (object->volatile_fault) { + vm_page_t p; + int refmod; + + queue_iterate(&object->memq, p, vm_page_t, listq) { + if (p->busy || + VM_PAGE_WIRED(p) || + p->fictitious) { + continue; + } + refmod = pmap_disconnect(p->phys_page); + if ((refmod & VM_MEM_MODIFIED) && + !p->dirty) { + p->dirty = TRUE; + } + } + } + if (old_state == VM_PURGABLE_EMPTY && object->resident_page_count == 0) break; + purgeable_q_t queue; /* find the correct queue */ @@ -5457,6 +6215,8 @@ vm_object_purgable_control( if (old_state == VM_PURGABLE_NONVOLATILE || old_state == VM_PURGABLE_EMPTY) { + unsigned int delta; + /* try to add token... this can fail */ vm_page_lock_queues(); @@ -5465,10 +6225,22 @@ vm_object_purgable_control( vm_page_unlock_queues(); return result; } - vm_page_purgeable_count += object->resident_page_count; - vm_page_unlock_queues(); + assert(object->resident_page_count >= + object->wired_page_count); + delta = (object->resident_page_count - + object->wired_page_count); + + if (delta != 0) { + OSAddAtomic(delta, + &vm_page_purgeable_count); + } + if (object->wired_page_count != 0) { + OSAddAtomic(object->wired_page_count, + &vm_page_purgeable_wired_count); + } + object->purgable = new_state; /* object should not be on a queue */ @@ -5508,25 +6280,40 @@ vm_object_purgable_control( case VM_PURGABLE_EMPTY: - if (old_state != new_state) - { - assert(old_state==VM_PURGABLE_NONVOLATILE || old_state==VM_PURGABLE_VOLATILE); - if(old_state==VM_PURGABLE_VOLATILE) { - assert(object->objq.next != NULL && object->objq.prev != NULL); /* object should be on a queue */ - purgeable_q_t old_queue=vm_purgeable_object_remove(object); - assert(old_queue); - vm_page_lock_queues(); - vm_purgeable_token_delete_first(old_queue); + if (object->volatile_fault) { + vm_page_t p; + int refmod; + + queue_iterate(&object->memq, p, vm_page_t, listq) { + if (p->busy || + VM_PAGE_WIRED(p) || + p->fictitious) { + continue; + } + refmod = pmap_disconnect(p->phys_page); + if ((refmod & VM_MEM_MODIFIED) && + !p->dirty) { + p->dirty = TRUE; + } } + } + + if (old_state != new_state) { + assert(old_state == VM_PURGABLE_NONVOLATILE || + old_state == VM_PURGABLE_VOLATILE); + if (old_state == VM_PURGABLE_VOLATILE) { + purgeable_q_t old_queue; - if (old_state==VM_PURGABLE_NONVOLATILE || - old_state == VM_PURGABLE_EMPTY) { + /* object should be on a queue */ + assert(object->objq.next != NULL && + object->objq.prev != NULL); + old_queue = vm_purgeable_object_remove(object); + assert(old_queue); vm_page_lock_queues(); - vm_page_purgeable_count += object->resident_page_count; + vm_purgeable_token_delete_first(old_queue); + vm_page_unlock_queues(); } - object->purgable = VM_PURGABLE_VOLATILE; (void) vm_object_purge(object); - vm_page_unlock_queues(); } break; @@ -5653,10 +6440,12 @@ vm_object_reference( kern_return_t adjust_vm_object_cache( __unused vm_size_t oval, - vm_size_t nval) + __unused vm_size_t nval) { +#if VM_OBJECT_CACHE vm_object_cached_max = nval; vm_object_cache_trim(FALSE); +#endif return (KERN_SUCCESS); } #endif /* MACH_BSD */ @@ -5672,6 +6461,7 @@ adjust_vm_object_cache( * * The VM objects must not be locked by caller. */ +unsigned int vm_object_transpose_count = 0; kern_return_t vm_object_transpose( vm_object_t object1, @@ -5681,13 +6471,13 @@ vm_object_transpose( vm_object_t tmp_object; kern_return_t retval; boolean_t object1_locked, object2_locked; - boolean_t object1_paging, object2_paging; vm_page_t page; vm_object_offset_t page_offset; + lck_mtx_t *hash_lck; + vm_object_hash_entry_t hash_entry; tmp_object = VM_OBJECT_NULL; object1_locked = FALSE; object2_locked = FALSE; - object1_paging = FALSE; object2_paging = FALSE; if (object1 == object2 || object1 == VM_OBJECT_NULL || @@ -5700,6 +6490,29 @@ vm_object_transpose( goto done; } + /* + * Since we need to lock both objects at the same time, + * make sure we always lock them in the same order to + * avoid deadlocks. + */ + if (object1 > object2) { + tmp_object = object1; + object1 = object2; + object2 = tmp_object; + } + + /* + * Allocate a temporary VM object to hold object1's contents + * while we copy object2 to object1. + */ + tmp_object = vm_object_allocate(transpose_size); + vm_object_lock(tmp_object); + tmp_object->can_persist = FALSE; + + + /* + * Grab control of the 1st VM object. + */ vm_object_lock(object1); object1_locked = TRUE; if (!object1->alive || object1->terminating || @@ -5712,17 +6525,19 @@ vm_object_transpose( goto done; } /* - * Since we're about to mess with the object's backing store, - * mark it as "paging_in_progress". Note that this is not enough + * We're about to mess with the object's backing store and + * taking a "paging_in_progress" reference wouldn't be enough * to prevent any paging activity on this object, so the caller should * have "quiesced" the objects beforehand, via a UPL operation with * UPL_SET_IO_WIRE (to make sure all the pages are there and wired) * and UPL_BLOCK_ACCESS (to mark the pages "busy"). + * + * Wait for any paging operation to complete (but only paging, not + * other kind of activities not linked to the pager). After we're + * statisfied that there's no more paging in progress, we keep the + * object locked, to guarantee that no one tries to access its pager. */ - vm_object_paging_begin(object1); - object1_paging = TRUE; - vm_object_unlock(object1); - object1_locked = FALSE; + vm_object_paging_only_wait(object1, THREAD_UNINT); /* * Same as above for the 2nd object... @@ -5735,34 +6550,8 @@ vm_object_transpose( retval = KERN_INVALID_VALUE; goto done; } - vm_object_paging_begin(object2); - object2_paging = TRUE; - vm_object_unlock(object2); - object2_locked = FALSE; - - /* - * Allocate a temporary VM object to hold object1's contents - * while we copy object2 to object1. - */ - tmp_object = vm_object_allocate(transpose_size); - vm_object_lock(tmp_object); - vm_object_paging_begin(tmp_object); - tmp_object->can_persist = FALSE; + vm_object_paging_only_wait(object2, THREAD_UNINT); - /* - * Since we need to lock both objects at the same time, - * make sure we always lock them in the same order to - * avoid deadlocks. - */ - if (object1 < object2) { - vm_object_lock(object1); - vm_object_lock(object2); - } else { - vm_object_lock(object2); - vm_object_lock(object1); - } - object1_locked = TRUE; - object2_locked = TRUE; if (object1->size != object2->size || object1->size != transpose_size) { @@ -5807,15 +6596,13 @@ vm_object_transpose( assert(queue_empty(&object1->memq)); } else { /* transfer object1's pages to tmp_object */ - vm_page_lock_queues(); while (!queue_empty(&object1->memq)) { page = (vm_page_t) queue_first(&object1->memq); page_offset = page->offset; - vm_page_remove(page); + vm_page_remove(page, TRUE); page->offset = page_offset; queue_enter(&tmp_object->memq, page, vm_page_t, listq); } - vm_page_unlock_queues(); assert(queue_empty(&object1->memq)); /* transfer object2's pages to object1 */ while (!queue_empty(&object2->memq)) { @@ -5840,14 +6627,17 @@ MACRO_BEGIN \ object2->field = tmp_object->field; \ MACRO_END + /* "Lock" refers to the object not its contents */ /* "size" should be identical */ assert(object1->size == object2->size); - /* "Lock" refers to the object not its contents */ + /* "memq_hint" was updated above when transposing pages */ /* "ref_count" refers to the object not its contents */ #if TASK_SWAPPER /* "res_count" refers to the object not its contents */ #endif /* "resident_page_count" was updated above when transposing pages */ + /* "wired_page_count" was updated above when transposing pages */ + /* "reusable_page_count" was updated above when transposing pages */ /* there should be no "copy" */ assert(!object1->copy); assert(!object2->copy); @@ -5869,8 +6659,10 @@ MACRO_END } __TRANSPOSE_FIELD(copy_strategy); /* "paging_in_progress" refers to the object not its contents */ - assert(object1->paging_in_progress); - assert(object2->paging_in_progress); + assert(!object1->paging_in_progress); + assert(!object2->paging_in_progress); + assert(object1->activity_in_progress); + assert(object2->activity_in_progress); /* "all_wanted" refers to the object not its contents */ __TRANSPOSE_FIELD(pager_created); __TRANSPOSE_FIELD(pager_initialized); @@ -5898,11 +6690,12 @@ MACRO_END /* "shadow_severed" refers to the object not its contents */ __TRANSPOSE_FIELD(phys_contiguous); __TRANSPOSE_FIELD(nophyscache); - /* "cached_list" should be NULL */ + /* "cached_list.next" points to transposed object */ + object1->cached_list.next = (queue_entry_t) object2; + object2->cached_list.next = (queue_entry_t) object1; + /* "cached_list.prev" should be NULL */ assert(object1->cached_list.prev == NULL); - assert(object1->cached_list.next == NULL); assert(object2->cached_list.prev == NULL); - assert(object2->cached_list.next == NULL); /* "msr_q" is linked to the object not its contents */ assert(queue_empty(&object1->msr_q)); assert(queue_empty(&object2->msr_q)); @@ -5919,10 +6712,38 @@ MACRO_END #endif __TRANSPOSE_FIELD(wimg_bits); __TRANSPOSE_FIELD(code_signed); - __TRANSPOSE_FIELD(not_in_use); -#ifdef UPL_DEBUG + if (object1->hashed) { + hash_lck = vm_object_hash_lock_spin(object2->pager); + hash_entry = vm_object_hash_lookup(object2->pager, FALSE); + assert(hash_entry != VM_OBJECT_HASH_ENTRY_NULL); + hash_entry->object = object2; + vm_object_hash_unlock(hash_lck); + } + if (object2->hashed) { + hash_lck = vm_object_hash_lock_spin(object1->pager); + hash_entry = vm_object_hash_lookup(object1->pager, FALSE); + assert(hash_entry != VM_OBJECT_HASH_ENTRY_NULL); + hash_entry->object = object1; + vm_object_hash_unlock(hash_lck); + } + __TRANSPOSE_FIELD(hashed); + object1->transposed = TRUE; + object2->transposed = TRUE; + __TRANSPOSE_FIELD(mapping_in_progress); + __TRANSPOSE_FIELD(volatile_empty); + __TRANSPOSE_FIELD(volatile_fault); + __TRANSPOSE_FIELD(all_reusable); + assert(object1->blocked_access); + assert(object2->blocked_access); + assert(object1->__object2_unused_bits == 0); + assert(object2->__object2_unused_bits == 0); +#if UPL_DEBUG /* "uplq" refers to the object not its contents (see upl_transpose()) */ #endif + assert(object1->objq.next == NULL); + assert(object1->objq.prev == NULL); + assert(object2->objq.next == NULL); + assert(object2->objq.prev == NULL); #undef __TRANSPOSE_FIELD @@ -5933,7 +6754,6 @@ MACRO_END * Cleanup. */ if (tmp_object != VM_OBJECT_NULL) { - vm_object_paging_end(tmp_object); vm_object_unlock(tmp_object); /* * Re-initialize the temporary object to avoid @@ -5952,25 +6772,15 @@ MACRO_END vm_object_unlock(object2); object2_locked = FALSE; } - if (object1_paging) { - vm_object_lock(object1); - vm_object_paging_end(object1); - vm_object_unlock(object1); - object1_paging = FALSE; - } - if (object2_paging) { - vm_object_lock(object2); - vm_object_paging_end(object2); - vm_object_unlock(object2); - object2_paging = FALSE; - } + + vm_object_transpose_count++; return retval; } /* - * vm_object_build_cluster + * vm_object_cluster_size * * Determine how big a cluster we should issue an I/O for... * @@ -5983,15 +6793,23 @@ MACRO_END * */ extern int speculative_reads_disabled; +#if CONFIG_EMBEDDED +unsigned int preheat_pages_max = MAX_UPL_TRANSFER; +unsigned int preheat_pages_min = 8; +unsigned int preheat_pages_mult = 4; +#else +unsigned int preheat_pages_max = MAX_UPL_TRANSFER; +unsigned int preheat_pages_min = 8; +unsigned int preheat_pages_mult = 4; +#endif -uint32_t pre_heat_scaling[MAX_UPL_TRANSFER]; -uint32_t pre_heat_cluster[MAX_UPL_TRANSFER]; +uint32_t pre_heat_scaling[MAX_UPL_TRANSFER + 1]; +uint32_t pre_heat_cluster[MAX_UPL_TRANSFER + 1]; -#define PRE_HEAT_MULTIPLIER 4 __private_extern__ void vm_object_cluster_size(vm_object_t object, vm_object_offset_t *start, - vm_size_t *length, vm_object_fault_info_t fault_info) + vm_size_t *length, vm_object_fault_info_t fault_info, uint32_t *io_streaming) { vm_size_t pre_heat_size; vm_size_t tail_size; @@ -6005,20 +6823,33 @@ vm_object_cluster_size(vm_object_t object, vm_object_offset_t *start, vm_behavior_t behavior; boolean_t look_behind = TRUE; boolean_t look_ahead = TRUE; + uint32_t throttle_limit; int sequential_run; int sequential_behavior = VM_BEHAVIOR_SEQUENTIAL; + unsigned int max_ph_size; + unsigned int min_ph_size; + unsigned int ph_mult; assert( !(*length & PAGE_MASK)); assert( !(*start & PAGE_MASK_64)); - if ( (max_length = *length) > (MAX_UPL_TRANSFER * PAGE_SIZE) ) - max_length = (MAX_UPL_TRANSFER * PAGE_SIZE); + if ( (ph_mult = preheat_pages_mult) < 1 ) + ph_mult = 1; + if ( (min_ph_size = preheat_pages_min) < 1 ) + min_ph_size = 1; + if ( (max_ph_size = preheat_pages_max) > MAX_UPL_TRANSFER ) + max_ph_size = MAX_UPL_TRANSFER; + + if ( (max_length = *length) > (max_ph_size * PAGE_SIZE) ) + max_length = (max_ph_size * PAGE_SIZE); + /* * we'll always return a cluster size of at least * 1 page, since the original fault must always * be processed */ *length = PAGE_SIZE; + *io_streaming = 0; if (speculative_reads_disabled || fault_info == NULL || max_length == 0) { /* @@ -6028,7 +6859,7 @@ vm_object_cluster_size(vm_object_t object, vm_object_offset_t *start, } orig_start = *start; target_start = orig_start; - cluster_size = round_page_32(fault_info->cluster_size); + cluster_size = round_page(fault_info->cluster_size); behavior = fault_info->behavior; vm_object_lock(object); @@ -6064,6 +6895,7 @@ vm_object_cluster_size(vm_object_t object, vm_object_offset_t *start, } else { sequential_behavior = VM_BEHAVIOR_SEQUENTIAL; } + } switch(behavior) { @@ -6074,34 +6906,36 @@ vm_object_cluster_size(vm_object_t object, vm_object_offset_t *start, if (object->internal && fault_info->user_tag == VM_MEMORY_STACK) goto out; - if (sequential_run >= (3 * PAGE_SIZE)) { + if (sequential_run >= (3 * PAGE_SIZE)) { pre_heat_size = sequential_run + PAGE_SIZE; - if ((behavior = sequential_behavior) == VM_BEHAVIOR_SEQUENTIAL) + if (sequential_behavior == VM_BEHAVIOR_SEQUENTIAL) look_behind = FALSE; else look_ahead = FALSE; + + *io_streaming = 1; } else { - uint32_t pages_unused; - if (object->pages_created < 32 * PRE_HEAT_MULTIPLIER) { + if (object->pages_created < 32 * ph_mult) { /* * prime the pump */ - pre_heat_size = PAGE_SIZE * 8 * PRE_HEAT_MULTIPLIER; + pre_heat_size = PAGE_SIZE * 8 * ph_mult; break; } - pages_unused = object->pages_created - object->pages_used; - - if (pages_unused < (object->pages_created / 8)) { - pre_heat_size = PAGE_SIZE * 32 * PRE_HEAT_MULTIPLIER; - } else if (pages_unused < (object->pages_created / 4)) { - pre_heat_size = PAGE_SIZE * 16 * PRE_HEAT_MULTIPLIER; - } else if (pages_unused < (object->pages_created / 2)) { - pre_heat_size = PAGE_SIZE * 8 * PRE_HEAT_MULTIPLIER; - } else { - pre_heat_size = PAGE_SIZE * 4 * PRE_HEAT_MULTIPLIER; - } + /* + * Linear growth in PH size: The maximum size is max_length... + * this cacluation will result in a size that is neither a + * power of 2 nor a multiple of PAGE_SIZE... so round + * it up to the nearest PAGE_SIZE boundary + */ + pre_heat_size = (ph_mult * (max_length * object->pages_used) / object->pages_created); + + if (pre_heat_size < PAGE_SIZE * min_ph_size) + pre_heat_size = PAGE_SIZE * min_ph_size; + else + pre_heat_size = round_page(pre_heat_size); } break; @@ -6114,6 +6948,7 @@ vm_object_cluster_size(vm_object_t object, vm_object_offset_t *start, if ((pre_heat_size = cluster_size) == 0) pre_heat_size = sequential_run + PAGE_SIZE; look_behind = FALSE; + *io_streaming = 1; break; @@ -6121,29 +6956,72 @@ vm_object_cluster_size(vm_object_t object, vm_object_offset_t *start, if ((pre_heat_size = cluster_size) == 0) pre_heat_size = sequential_run + PAGE_SIZE; look_ahead = FALSE; + *io_streaming = 1; break; } + throttle_limit = (uint32_t) max_length; + assert(throttle_limit == max_length); + + if (vnode_pager_check_hard_throttle(object->pager, &throttle_limit, *io_streaming) == KERN_SUCCESS) { + if (max_length > throttle_limit) + max_length = throttle_limit; + } if (pre_heat_size > max_length) pre_heat_size = max_length; - if (behavior == VM_BEHAVIOR_DEFAULT && vm_page_free_count < vm_page_free_target) - pre_heat_size /= 2; + if (behavior == VM_BEHAVIOR_DEFAULT) { + if (vm_page_free_count < vm_page_throttle_limit) + pre_heat_size = trunc_page(pre_heat_size / 8); + else if (vm_page_free_count < vm_page_free_target) + pre_heat_size = trunc_page(pre_heat_size / 2); + if (pre_heat_size <= PAGE_SIZE) + goto out; + } if (look_ahead == TRUE) { - if (look_behind == TRUE) - target_start &= ~(pre_heat_size - 1); + if (look_behind == TRUE) { + /* + * if we get here its due to a random access... + * so we want to center the original fault address + * within the cluster we will issue... make sure + * to calculate 'head_size' as a multiple of PAGE_SIZE... + * 'pre_heat_size' is a multiple of PAGE_SIZE but not + * necessarily an even number of pages so we need to truncate + * the result to a PAGE_SIZE boundary + */ + head_size = trunc_page(pre_heat_size / 2); - if ((target_start + pre_heat_size) > object_size) - pre_heat_size = (vm_size_t)(trunc_page_64(object_size - target_start)); + if (target_start > head_size) + target_start -= head_size; + else + target_start = 0; - tail_size = pre_heat_size - (orig_start - target_start) - PAGE_SIZE; + /* + * 'target_start' at this point represents the beginning offset + * of the cluster we are considering... 'orig_start' will be in + * the center of this cluster if we didn't have to clip the start + * due to running into the start of the file + */ + } + if ((target_start + pre_heat_size) > object_size) + pre_heat_size = (vm_size_t)(round_page_64(object_size - target_start)); + /* + * at this point caclulate the number of pages beyond the original fault + * address that we want to consider... this is guaranteed not to extend beyond + * the current EOF... + */ + assert((vm_size_t)(orig_start - target_start) == (orig_start - target_start)); + tail_size = pre_heat_size - (vm_size_t)(orig_start - target_start) - PAGE_SIZE; } else { if (pre_heat_size > target_start) - pre_heat_size = target_start; + pre_heat_size = (vm_size_t) target_start; /* XXX: 32-bit vs 64-bit ? Joe ? */ tail_size = 0; } + assert( !(target_start & PAGE_MASK_64)); + assert( !(pre_heat_size & PAGE_MASK)); + pre_heat_scaling[pre_heat_size / PAGE_SIZE]++; if (pre_heat_size <= PAGE_SIZE) @@ -6152,7 +7030,9 @@ vm_object_cluster_size(vm_object_t object, vm_object_offset_t *start, if (look_behind == TRUE) { /* * take a look at the pages before the original - * faulting offset + * faulting offset... recalculate this in case + * we had to clip 'pre_heat_size' above to keep + * from running past the EOF. */ head_size = pre_heat_size - tail_size - PAGE_SIZE; @@ -6192,6 +7072,8 @@ vm_object_cluster_size(vm_object_t object, vm_object_offset_t *start, */ if (offset >= fault_info->hi_offset) break; + assert(offset < object_size); + /* * for external objects and internal objects w/o an existence map * vm_externl_state_get will return VM_EXTERNAL_STATE_UNKNOWN @@ -6215,6 +7097,9 @@ vm_object_cluster_size(vm_object_t object, vm_object_offset_t *start, } } out: + if (*length > max_length) + *length = max_length; + pre_heat_cluster[*length / PAGE_SIZE]++; vm_object_unlock(object); @@ -6276,10 +7161,7 @@ vm_object_page_op( if (dst_page->pmapped == TRUE) pmap_disconnect(dst_page->phys_page); - vm_page_lock_queues(); - vm_page_free(dst_page); - vm_page_unlock_queues(); - + VM_PAGE_FREE(dst_page); break; } @@ -6393,17 +7275,23 @@ vm_object_range_op( vm_object_offset_t offset_beg, vm_object_offset_t offset_end, int ops, - int *range) + uint32_t *range) { vm_object_offset_t offset; vm_page_t dst_page; + if (offset_end - offset_beg > (uint32_t) -1) { + /* range is too big and would overflow "*range" */ + return KERN_INVALID_ARGUMENT; + } if (object->resident_page_count == 0) { if (range) { - if (ops & UPL_ROP_PRESENT) + if (ops & UPL_ROP_PRESENT) { *range = 0; - else - *range = offset_end - offset_beg; + } else { + *range = (uint32_t) (offset_end - offset_beg); + assert(*range == (offset_end - offset_beg)); + } } return KERN_SUCCESS; } @@ -6437,11 +7325,9 @@ vm_object_range_op( if (dst_page->pmapped == TRUE) pmap_disconnect(dst_page->phys_page); - vm_page_lock_queues(); - vm_page_free(dst_page); - vm_page_unlock_queues(); + VM_PAGE_FREE(dst_page); - } else if (ops & UPL_ROP_ABSENT) + } else if ((ops & UPL_ROP_ABSENT) && !dst_page->absent) break; } else if (ops & UPL_ROP_PRESENT) break; @@ -6453,9 +7339,12 @@ vm_object_range_op( if (range) { if (offset > offset_end) offset = offset_end; - if(offset > offset_beg) - *range = offset - offset_beg; - else *range=0; + if(offset > offset_beg) { + *range = (uint32_t) (offset - offset_beg); + assert(*range == (offset - offset_beg)); + } else { + *range = 0; + } } return KERN_SUCCESS; } @@ -6474,20 +7363,33 @@ vm_object_lock(vm_object_t object) } boolean_t -vm_object_lock_try(vm_object_t object) +vm_object_lock_avoid(vm_object_t object) { if (object == vm_pageout_scan_wants_object) { scan_object_collision++; - mutex_pause(2); + return TRUE; } + return FALSE; +} + +boolean_t +_vm_object_lock_try(vm_object_t object) +{ return (lck_rw_try_lock_exclusive(&object->Lock)); } +boolean_t +vm_object_lock_try(vm_object_t object) +{ + if (vm_object_lock_avoid(object)) { + mutex_pause(2); + } + return _vm_object_lock_try(object); +} void vm_object_lock_shared(vm_object_t object) { - if (object == vm_pageout_scan_wants_object) { - scan_object_collision++; + if (vm_object_lock_avoid(object)) { mutex_pause(2); } lck_rw_lock_shared(&object->Lock); @@ -6496,8 +7398,7 @@ vm_object_lock_shared(vm_object_t object) boolean_t vm_object_lock_try_shared(vm_object_t object) { - if (object == vm_pageout_scan_wants_object) { - scan_object_collision++; + if (vm_object_lock_avoid(object)) { mutex_pause(2); } return (lck_rw_try_lock_shared(&object->Lock)); diff --git a/osfmk/vm/vm_object.h b/osfmk/vm/vm_object.h index 68f60ef83..8ad7db64e 100644 --- a/osfmk/vm/vm_object.h +++ b/osfmk/vm/vm_object.h @@ -89,6 +89,8 @@ #include #endif /* MACH_PAGEMAP */ +#include + struct vm_page; /* @@ -106,6 +108,7 @@ struct vm_object_fault_info { vm_map_offset_t lo_offset; vm_map_offset_t hi_offset; boolean_t no_cache; + boolean_t stealth; }; @@ -124,6 +127,8 @@ struct vm_object { #endif /* TASK_SWAPPER */ unsigned int resident_page_count; /* number of resident pages */ + unsigned int wired_page_count; /* number of wired pages */ + unsigned int reusable_page_count; struct vm_object *copy; /* Object that should receive * a copy of my changed pages, @@ -142,7 +147,7 @@ struct vm_object { memory_object_copy_strategy_t copy_strategy; /* How to handle data copy */ - int paging_in_progress; + short paging_in_progress; /* The memory object ports are * being used (e.g., for pagein * or pageout) -- don't change @@ -150,6 +155,8 @@ struct vm_object { * don't collapse, destroy or * terminate) */ + short activity_in_progress; + unsigned int /* boolean_t array */ all_wanted:11, /* Bit array of "want to be * awakened" notations. See @@ -288,10 +295,16 @@ struct vm_object { code_signed:1, /* pages are signed and should be validated; the signatures are stored with the pager */ + hashed:1, /* object/pager entered in hash */ + transposed:1, /* object was transposed with another */ mapping_in_progress:1, /* pager being mapped/unmapped */ - not_in_use:22; /* for expansion */ + volatile_empty:1, + volatile_fault:1, + all_reusable:1, + blocked_access:1, + __object2_unused_bits:16; /* for expansion */ -#ifdef UPL_DEBUG +#if UPL_DEBUG queue_head_t uplq; /* List of outstanding upls */ #endif /* UPL_DEBUG */ @@ -310,6 +323,11 @@ struct vm_object { queue_chain_t objq; /* object queue - currently used for purgable queues */ }; +#define VM_OBJECT_PURGEABLE_FAULT_ERROR(object) \ + ((object)->volatile_fault && \ + ((object)->purgable == VM_PURGABLE_VOLATILE || \ + (object)->purgable == VM_PURGABLE_EMPTY)) + #define VM_PAGE_REMOVE(page) \ MACRO_BEGIN \ vm_page_t __page = (page); \ @@ -356,27 +374,31 @@ struct msync_req { vm_object_offset_t offset; vm_object_size_t length; vm_object_t object; /* back pointer */ - decl_mutex_data(, msync_req_lock) /* Lock for this structure */ + decl_lck_mtx_data(, msync_req_lock) /* Lock for this structure */ }; typedef struct msync_req *msync_req_t; #define MSYNC_REQ_NULL ((msync_req_t) 0) + +extern lck_grp_t vm_map_lck_grp; +extern lck_attr_t vm_map_lck_attr; + /* * Macros to allocate and free msync_reqs */ #define msync_req_alloc(msr) \ - MACRO_BEGIN \ + MACRO_BEGIN \ (msr) = (msync_req_t)kalloc(sizeof(struct msync_req)); \ - mutex_init(&(msr)->msync_req_lock, 0); \ - msr->flag = VM_MSYNC_INITIALIZED; \ - MACRO_END + lck_mtx_init(&(msr)->msync_req_lock, &vm_map_lck_grp, &vm_map_lck_attr); \ + msr->flag = VM_MSYNC_INITIALIZED; \ + MACRO_END #define msync_req_free(msr) \ (kfree((msr), sizeof(struct msync_req))) -#define msr_lock(msr) mutex_lock(&(msr)->msync_req_lock) -#define msr_unlock(msr) mutex_unlock(&(msr)->msync_req_lock) +#define msr_lock(msr) lck_mtx_lock(&(msr)->msync_req_lock) +#define msr_unlock(msr) lck_mtx_unlock(&(msr)->msync_req_lock) /* * Declare procedures that operate on VM objects. @@ -430,7 +452,7 @@ __private_extern__ void vm_object_res_deallocate( vm_object_t RLObject = (object); \ vm_object_lock_assert_shared(object); \ assert((RLObject)->ref_count > 0); \ - OSAddAtomic(1, (SInt32 *)&(RLObject)->ref_count); \ + OSAddAtomic(1, &(RLObject)->ref_count); \ assert((RLObject)->ref_count > 1); \ /* XXX we would need an atomic version of the following ... */ \ vm_object_res_reference(RLObject); \ @@ -446,8 +468,8 @@ __private_extern__ void vm_object_reference( MACRO_BEGIN \ vm_object_t RObject = (object); \ if (RObject) { \ - vm_object_lock(RObject); \ - vm_object_reference_locked(RObject); \ + vm_object_lock_shared(RObject); \ + vm_object_reference_shared(RObject); \ vm_object_unlock(RObject); \ } \ MACRO_END @@ -478,9 +500,16 @@ __private_extern__ void vm_object_deactivate_pages( vm_object_t object, vm_object_offset_t offset, vm_object_size_t size, - boolean_t kill_page); + boolean_t kill_page, + boolean_t reusable_page); + +__private_extern__ void vm_object_reuse_pages( + vm_object_t object, + vm_object_offset_t start_offset, + vm_object_offset_t end_offset, + boolean_t allow_partial_reuse); -__private_extern__ unsigned int vm_object_purge( +__private_extern__ void vm_object_purge( vm_object_t object); __private_extern__ kern_return_t vm_object_purgable_control( @@ -525,7 +554,7 @@ __private_extern__ kern_return_t vm_object_copy_slowly( vm_object_t src_object, vm_object_offset_t src_offset, vm_object_size_t size, - int interruptible, + boolean_t interruptible, vm_object_t *_result_object); __private_extern__ vm_object_t vm_object_copy_delayed( @@ -605,7 +634,8 @@ __private_extern__ void vm_object_cluster_size( vm_object_t object, vm_object_offset_t *start, vm_size_t *length, - vm_object_fault_info_t fault_info); + vm_object_fault_info_t fault_info, + uint32_t *io_streaming); __private_extern__ kern_return_t vm_object_populate_with_private( vm_object_t object, @@ -629,7 +659,17 @@ extern kern_return_t vm_object_range_op( vm_object_offset_t offset_beg, vm_object_offset_t offset_end, int ops, - int *range); + uint32_t *range); + + +__private_extern__ void vm_object_reap_pages( + vm_object_t object, + int reap_type); +#define REAP_REAP 0 +#define REAP_TERMINATE 1 +#define REAP_PURGEABLE 2 +#define REAP_DATA_FLUSH 3 + /* * Event waiting handling @@ -643,6 +683,8 @@ extern kern_return_t vm_object_range_op( #define VM_OBJECT_EVENT_UNCACHING 5 #define VM_OBJECT_EVENT_COPY_CALL 6 #define VM_OBJECT_EVENT_CACHING 7 +#define VM_OBJECT_EVENT_UNBLOCKED 8 +#define VM_OBJECT_EVENT_PAGING_ONLY_IN_PROGRESS 9 #define vm_object_assert_wait(object, event, interruptible) \ (((object)->all_wanted |= 1 << (event)), \ @@ -683,8 +725,9 @@ extern kern_return_t vm_object_range_op( #include #define VM_PIP_DEBUG_BEGIN(object) \ MACRO_BEGIN \ - if ((object)->paging_in_progress < VM_PIP_DEBUG_MAX_REFS) { \ - int pip = (object)->paging_in_progress; \ + int pip = ((object)->paging_in_progress + \ + (object)->activity_in_progress); \ + if (pip < VM_PIP_DEBUG_MAX_REFS) { \ (void) OSBacktrace(&(object)->pip_holders[pip].pip_retaddr[0], \ VM_PIP_DEBUG_STACK_FRAMES); \ } \ @@ -693,7 +736,26 @@ extern kern_return_t vm_object_range_op( #define VM_PIP_DEBUG_BEGIN(object) #endif /* VM_PIP_DEBUG */ -#define vm_object_paging_begin(object) \ +#define vm_object_activity_begin(object) \ + MACRO_BEGIN \ + vm_object_lock_assert_exclusive((object)); \ + assert((object)->paging_in_progress >= 0); \ + VM_PIP_DEBUG_BEGIN((object)); \ + (object)->activity_in_progress++; \ + MACRO_END + +#define vm_object_activity_end(object) \ + MACRO_BEGIN \ + vm_object_lock_assert_exclusive((object)); \ + assert((object)->activity_in_progress > 0); \ + (object)->activity_in_progress--; \ + if ((object)->paging_in_progress == 0 && \ + (object)->activity_in_progress == 0) \ + vm_object_wakeup((object), \ + VM_OBJECT_EVENT_PAGING_IN_PROGRESS); \ + MACRO_END + +#define vm_object_paging_begin(object) \ MACRO_BEGIN \ vm_object_lock_assert_exclusive((object)); \ assert((object)->paging_in_progress >= 0); \ @@ -701,20 +763,25 @@ extern kern_return_t vm_object_range_op( (object)->paging_in_progress++; \ MACRO_END -#define vm_object_paging_end(object) \ +#define vm_object_paging_end(object) \ MACRO_BEGIN \ vm_object_lock_assert_exclusive((object)); \ assert((object)->paging_in_progress > 0); \ - if (--(object)->paging_in_progress == 0) { \ - vm_object_wakeup(object, \ - VM_OBJECT_EVENT_PAGING_IN_PROGRESS); \ + (object)->paging_in_progress--; \ + if ((object)->paging_in_progress == 0) { \ + vm_object_wakeup((object), \ + VM_OBJECT_EVENT_PAGING_ONLY_IN_PROGRESS); \ + if ((object)->activity_in_progress == 0) \ + vm_object_wakeup((object), \ + VM_OBJECT_EVENT_PAGING_IN_PROGRESS); \ } \ MACRO_END #define vm_object_paging_wait(object, interruptible) \ MACRO_BEGIN \ vm_object_lock_assert_exclusive((object)); \ - while ((object)->paging_in_progress != 0) { \ + while ((object)->paging_in_progress != 0 || \ + (object)->activity_in_progress != 0) { \ wait_result_t _wr; \ \ _wr = vm_object_sleep((object), \ @@ -726,6 +793,21 @@ extern kern_return_t vm_object_range_op( } \ MACRO_END +#define vm_object_paging_only_wait(object, interruptible) \ + MACRO_BEGIN \ + vm_object_lock_assert_exclusive((object)); \ + while ((object)->paging_in_progress != 0) { \ + wait_result_t _wr; \ + \ + _wr = vm_object_sleep((object), \ + VM_OBJECT_EVENT_PAGING_ONLY_IN_PROGRESS,\ + (interruptible)); \ + \ + /*XXX if ((interruptible) && (_wr != THREAD_AWAKENED))*/\ + /*XXX break; */ \ + } \ + MACRO_END + #define vm_object_mapping_begin(object) \ MACRO_BEGIN \ @@ -772,6 +854,8 @@ extern vm_object_t vm_pageout_scan_wants_object; extern void vm_object_lock(vm_object_t); extern boolean_t vm_object_lock_try(vm_object_t); +extern boolean_t _vm_object_lock_try(vm_object_t); +extern boolean_t vm_object_lock_avoid(vm_object_t); extern void vm_object_lock_shared(vm_object_t); extern boolean_t vm_object_lock_try_shared(vm_object_t); @@ -789,7 +873,7 @@ extern boolean_t vm_object_lock_try_shared(vm_object_t); #define vm_object_unlock(object) lck_rw_done(&(object)->Lock) #define vm_object_lock_upgrade(object) lck_rw_lock_shared_to_exclusive(&(object)->Lock) -#define vm_object_lock_try_scan(object) lck_rw_try_lock_exclusive(&(object)->Lock) +#define vm_object_lock_try_scan(object) _vm_object_lock_try(object) /* * CAUTION: the following vm_object_lock_assert_held*() macros merely diff --git a/EXTERNAL_HEADERS/ppc/_limits.h b/osfmk/vm/vm_options.h similarity index 86% rename from EXTERNAL_HEADERS/ppc/_limits.h rename to osfmk/vm/vm_options.h index 53f939493..9128a12ac 100644 --- a/EXTERNAL_HEADERS/ppc/_limits.h +++ b/osfmk/vm/vm_options.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2004 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2009 Apple, Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -25,9 +25,10 @@ * * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ */ -#ifndef _PPC__LIMITS_H_ -#define _PPC__LIMITS_H_ -#define __DARWIN_CLK_TCK 100 /* ticks per second */ +#ifndef __VM_VM_OPTIONS_H__ +#define __VM_VM_OPTIONS_H__ -#endif /* _PPC__LIMITS_H_ */ +#define UPL_DEBUG DEBUG + +#endif /* __VM_VM_OPTIONS_H__ */ diff --git a/osfmk/vm/vm_page.h b/osfmk/vm/vm_page.h index 89310abe8..15eac5b2d 100644 --- a/osfmk/vm/vm_page.h +++ b/osfmk/vm/vm_page.h @@ -158,8 +158,9 @@ extern int speculative_age_index; */ struct vm_page { - queue_chain_t pageq; /* queue info for FIFO - * queue or free list (P) */ + queue_chain_t pageq; /* queue info for FIFO */ + /* queue or free list (P) */ + queue_chain_t listq; /* all pages in same object (O) */ struct vm_page *next; /* VP bucket link (O) */ @@ -169,7 +170,13 @@ struct vm_page { /* * The following word of flags is protected * by the "page queues" lock. + * + * we use the 'wire_count' field to store the local + * queue id if local queues are enabled... + * see the comments at 'VM_PAGE_QUEUES_REMOVE' as to + * why this is safe to do */ +#define local_id wire_count unsigned int wire_count:16, /* how many wired down maps use me? (O&P) */ /* boolean_t */ inactive:1, /* page is in inactive list (P) */ active:1, /* page is in active list (P) */ @@ -178,13 +185,16 @@ struct vm_page { laundry:1, /* page is being cleaned now (P)*/ free:1, /* page is on free list (P) */ reference:1, /* page has been used (P) */ - pageout:1, /* page wired & busy for pageout (P) */ gobbled:1, /* page used internally (P) */ private:1, /* Page should not be returned to * the free list (P) */ throttled:1, /* pager is not responding (P) */ + local:1, __unused_pageq_bits:5; /* 5 bits available here */ + ppnum_t phys_page; /* Physical address of page, passed + * to pmap_enter (read-only) */ + /* * The following word of flags is protected * by the "VM object" lock. @@ -194,10 +204,17 @@ struct vm_page { wanted:1, /* someone is waiting for page (O) */ tabled:1, /* page is in VP table (O) */ fictitious:1, /* Physical page doesn't exist (O) */ + /* + * IMPORTANT: the "pmapped" bit can be turned on while holding the + * VM object "shared" lock. See vm_fault_enter(). + * This is OK as long as it's the only bit in this bit field that + * can be updated without holding the VM object "exclusive" lock. + */ pmapped:1, /* page has been entered at some - * point into a pmap (O) */ + * point into a pmap (O **shared**) */ wpmapped:1, /* page has been entered at some * point into a pmap for write (O) */ + pageout:1, /* page wired & busy for pageout (O) */ absent:1, /* Data has been requested, but is * not yet available (O) */ error:1, /* Data manager was unable to provide @@ -230,12 +247,14 @@ struct vm_page { no_cache:1, /* page is not to be cached and */ /* should be reused ahead of */ /* other pages */ - deactivated:1, zero_fill:1, - __unused_object_bits:8; /* 8 bits available here */ + reusable:1, + __unused_object_bits:7; /* 7 bits available here */ - ppnum_t phys_page; /* Physical address of page, passed - * to pmap_enter (read-only) */ +#if __LP64__ + unsigned int __unused_padding; /* Pad structure explicitly + * to 8-byte multiple for LP64 */ +#endif }; #define DEBUG_ENCRYPTED_SWAP 1 @@ -253,6 +272,16 @@ struct vm_page { typedef struct vm_page *vm_page_t; + +typedef struct vm_locks_array { + char pad __attribute__ ((aligned (64))); + lck_mtx_t vm_page_queue_lock2 __attribute__ ((aligned (64))); + lck_mtx_t vm_page_queue_free_lock2 __attribute__ ((aligned (64))); + char pad2 __attribute__ ((aligned (64))); +} vm_locks_array_t; + + +#define VM_PAGE_WIRED(m) ((!(m)->local && (m)->wire_count)) #define VM_PAGE_NULL ((vm_page_t) 0) #define NEXT_PAGE(m) ((vm_page_t) (m)->pageq.next) #define NEXT_PAGE_PTR(m) ((vm_page_t *) &(m)->pageq.next) @@ -267,7 +296,10 @@ typedef struct vm_page *vm_page_t; * some useful check on a page structure. */ -#define VM_PAGE_CHECK(mem) do {} while (0) +#define VM_PAGE_CHECK(mem) \ + MACRO_BEGIN \ + VM_PAGE_QUEUES_ASSERT(mem, 1); \ + MACRO_END /* Page coloring: * @@ -289,6 +321,27 @@ unsigned int vm_color_mask; /* must be (vm_colors-1) */ extern unsigned int vm_cache_geometry_colors; /* optimal #colors based on cache geometry */ +/* + * Wired memory is a very limited resource and we can't let users exhaust it + * and deadlock the entire system. We enforce the following limits: + * + * vm_user_wire_limit (default: all memory minus vm_global_no_user_wire_amount) + * how much memory can be user-wired in one user task + * + * vm_global_user_wire_limit (default: same as vm_user_wire_limit) + * how much memory can be user-wired in all user tasks + * + * vm_global_no_user_wire_amount (default: VM_NOT_USER_WIREABLE) + * how much memory must remain user-unwired at any time + */ +#define VM_NOT_USER_WIREABLE (64*1024*1024) /* 64MB */ +extern +vm_map_size_t vm_user_wire_limit; +extern +vm_map_size_t vm_global_user_wire_limit; +extern +vm_map_size_t vm_global_no_user_wire_amount; + /* * Each pageable resident page falls into one of three lists: * @@ -310,6 +363,37 @@ unsigned int vm_cache_geometry_colors; /* optimal #colors based on cache geometr * ordered, in LRU-like fashion. */ + +#define VPL_LOCK_SPIN 1 + +struct vpl { + unsigned int vpl_count; + queue_head_t vpl_queue; +#ifdef VPL_LOCK_SPIN + lck_spin_t vpl_lock; +#else + lck_mtx_t vpl_lock; + lck_mtx_ext_t vpl_lock_ext; +#endif +}; + +struct vplq { + union { + char cache_line_pad[128]; + struct vpl vpl; + } vpl_un; +}; +extern +unsigned int vm_page_local_q_count; +extern +struct vplq *vm_page_local_q; +extern +unsigned int vm_page_local_q_soft_limit; +extern +unsigned int vm_page_local_q_hard_limit; +extern +vm_locks_array_t vm_page_locks; + extern queue_head_t vm_page_queue_free[MAX_COLORS]; /* memory free queue */ extern @@ -322,6 +406,7 @@ extern queue_head_t vm_page_queue_inactive; /* inactive memory queue for normal pages */ extern queue_head_t vm_page_queue_zf; /* inactive memory queue for zero fill */ +extern queue_head_t vm_page_queue_throttled; /* memory queue for throttled pageout pages */ extern @@ -344,48 +429,46 @@ unsigned int vm_page_speculative_count; /* How many speculative pages are unclai extern unsigned int vm_page_wire_count; /* How many pages are wired? */ extern -vm_map_size_t vm_user_wire_limit; /* How much memory can be locked by a user? */ -extern -vm_map_size_t vm_global_user_wire_limit; /* How much memory can be locked system wide by users? */ -extern unsigned int vm_page_free_target; /* How many do we want free? */ extern unsigned int vm_page_free_min; /* When to wakeup pageout */ extern +unsigned int vm_page_throttle_limit; /* When to throttle new page creation */ +extern +uint32_t vm_page_creation_throttle; /* When to throttle new page creation */ +extern unsigned int vm_page_inactive_target;/* How many do we want inactive? */ extern unsigned int vm_page_inactive_min; /* When do wakeup pageout */ extern unsigned int vm_page_free_reserved; /* How many pages reserved to do pageout */ extern -unsigned int vm_page_zfill_throttle_count;/* Count of zero-fill allocations throttled */ +unsigned int vm_page_throttle_count; /* Count of page allocations throttled */ extern unsigned int vm_page_gobble_count; -extern -unsigned int vm_page_speculative_unused; +#if DEVELOPMENT || DEBUG extern unsigned int vm_page_speculative_used; +#endif + extern unsigned int vm_page_purgeable_count;/* How many pages are purgeable now ? */ extern +unsigned int vm_page_purgeable_wired_count;/* How many purgeable pages are wired now ? */ +extern uint64_t vm_page_purged_count; /* How many pages got purged so far ? */ -decl_mutex_data(,vm_page_queue_lock) - /* lock on active and inactive page queues */ -decl_mutex_data(,vm_page_queue_free_lock) - /* lock on free page queue array (ie, all colors) */ - extern unsigned int vm_page_free_wanted; /* how many threads are waiting for memory */ extern unsigned int vm_page_free_wanted_privileged; /* how many VM privileged threads are waiting for memory */ -extern vm_offset_t vm_page_fictitious_addr; +extern ppnum_t vm_page_fictitious_addr; /* (fake) phys_addr of fictitious pages */ -extern vm_offset_t vm_page_guard_addr; +extern ppnum_t vm_page_guard_addr; /* (fake) phys_addr of guard pages */ @@ -408,6 +491,8 @@ extern void vm_page_bootstrap( extern void vm_page_module_init(void) __attribute__((section("__TEXT, initcode"))); +extern void vm_page_init_local_q(void); + extern void vm_page_create( ppnum_t start, ppnum_t end); @@ -454,10 +539,11 @@ extern void vm_page_init( ppnum_t phys_page); extern void vm_page_free( - vm_page_t page); + vm_page_t page); -extern void vm_page_free_prepare( - vm_page_t page); +extern void vm_page_free_unlocked( + vm_page_t page, + boolean_t remove_from_hash); extern void vm_page_activate( vm_page_t page); @@ -465,6 +551,10 @@ extern void vm_page_activate( extern void vm_page_deactivate( vm_page_t page); +extern void vm_page_deactivate_internal( + vm_page_t page, + boolean_t clear_hw_reference); + extern void vm_page_lru( vm_page_t page); @@ -475,6 +565,10 @@ extern void vm_page_speculate( extern void vm_page_speculate_ageit( struct vm_speculative_age_q *aq); +extern void vm_page_reactivate_all_throttled(void); + +extern void vm_page_reactivate_local(uint32_t lid, boolean_t force, boolean_t nolocks); + extern void vm_page_rename( vm_page_t page, vm_object_t new_object, @@ -487,10 +581,11 @@ extern void vm_page_insert( vm_object_offset_t offset); extern void vm_page_insert_internal( - vm_page_t page, + vm_page_t page, vm_object_t object, vm_object_offset_t offset, - boolean_t queues_lock_held); + boolean_t queues_lock_held, + boolean_t insert_in_hash); extern void vm_page_replace( vm_page_t mem, @@ -498,7 +593,8 @@ extern void vm_page_replace( vm_object_offset_t offset); extern void vm_page_remove( - vm_page_t page); + vm_page_t page, + boolean_t remove_from_hash); extern void vm_page_zero_fill( vm_page_t page); @@ -535,6 +631,13 @@ extern void vm_page_validate_cs_mapped( vm_page_t page, const void *kaddr); +extern void vm_page_free_prepare_queues( + vm_page_t page); + +extern void vm_page_free_prepare_object( + vm_page_t page, + boolean_t remove_from_hash); + /* * Functions implemented as macros. m->wanted and m->busy are * protected by the object lock. @@ -567,9 +670,7 @@ extern void vm_page_validate_cs_mapped( #define VM_PAGE_FREE(p) \ MACRO_BEGIN \ - vm_page_lock_queues(); \ - vm_page_free(p); \ - vm_page_unlock_queues(); \ + vm_page_free_unlocked(p, TRUE); \ MACRO_END #define VM_PAGE_GRAB_FICTITIOUS(M) \ @@ -578,21 +679,70 @@ extern void vm_page_validate_cs_mapped( vm_page_more_fictitious(); \ MACRO_END -#define VM_PAGE_ZFILL_THROTTLED() \ - (vm_page_free_count < vm_page_free_min && \ - !(current_thread()->options & TH_OPT_VMPRIV) && \ - ++vm_page_zfill_throttle_count) - #define VM_PAGE_WAIT() ((void)vm_page_wait(THREAD_UNINT)) -#define vm_page_lock_queues() mutex_lock(&vm_page_queue_lock) -#define vm_page_unlock_queues() mutex_unlock(&vm_page_queue_lock) +#define vm_page_queue_lock (vm_page_locks.vm_page_queue_lock2) +#define vm_page_queue_free_lock (vm_page_locks.vm_page_queue_free_lock2) + +#define vm_page_lock_queues() lck_mtx_lock(&vm_page_queue_lock) +#define vm_page_unlock_queues() lck_mtx_unlock(&vm_page_queue_lock) + +#define vm_page_lockspin_queues() lck_mtx_lock_spin(&vm_page_queue_lock) +#define vm_page_trylockspin_queues() lck_mtx_try_lock_spin(&vm_page_queue_lock) +#define vm_page_lockconvert_queues() lck_mtx_convert_spin(&vm_page_queue_lock) + +#ifdef VPL_LOCK_SPIN +#define VPL_LOCK_INIT(vlq, vpl_grp, vpl_attr) lck_spin_init(&vlq->vpl_lock, vpl_grp, vpl_attr) +#define VPL_LOCK(vpl) lck_spin_lock(vpl) +#define VPL_UNLOCK(vpl) lck_spin_unlock(vpl) +#else +#define VPL_LOCK_INIT(vlq, vpl_grp, vpl_attr) lck_mtx_init_ext(&vlq->vpl_lock, &vlq->vpl_lock_ext, vpl_grp, vpl_attr) +#define VPL_LOCK(vpl) lck_mtx_lock_spin(vpl) +#define VPL_UNLOCK(vpl) lck_mtx_unlock(vpl) +#endif -#define vm_page_lockspin_queues() mutex_lock_spin(&vm_page_queue_lock) +#if MACH_ASSERT +extern void vm_page_queues_assert(vm_page_t mem, int val); +#define VM_PAGE_QUEUES_ASSERT(mem, val) vm_page_queues_assert((mem), (val)) +#else +#define VM_PAGE_QUEUES_ASSERT(mem, val) +#endif + +/* + * 'vm_fault_enter' will place newly created pages (zero-fill and COW) onto the + * local queues if they exist... its the only spot in the system where we add pages + * to those queues... once on those queues, those pages can only move to one of the + * global page queues or the free queues... they NEVER move from local q to local q. + * the 'local' state is stable when VM_PAGE_QUEUES_REMOVE is called since we're behind + * the global vm_page_queue_lock at this point... we still need to take the local lock + * in case this operation is being run on a different CPU then the local queue's identity, + * but we don't have to worry about the page moving to a global queue or becoming wired + * while we're grabbing the local lock since those operations would require the global + * vm_page_queue_lock to be held, and we already own it. + * + * this is why its safe to utilze the wire_count field in the vm_page_t as the local_id... + * 'wired' and local are ALWAYS mutually exclusive conditions. + */ #define VM_PAGE_QUEUES_REMOVE(mem) \ MACRO_BEGIN \ + VM_PAGE_QUEUES_ASSERT(mem, 1); \ assert(!mem->laundry); \ + assert(!mem->pageout_queue); \ + if (mem->local) { \ + struct vpl *lq; \ + assert(mem->object != kernel_object); \ + assert(!mem->inactive && !mem->speculative); \ + assert(!mem->active && !mem->throttled); \ + lq = &vm_page_local_q[mem->local_id].vpl_un.vpl; \ + VPL_LOCK(&lq->vpl_lock); \ + queue_remove(&lq->vpl_queue, \ + mem, vm_page_t, pageq); \ + mem->local = FALSE; \ + mem->local_id = 0; \ + lq->vpl_count--; \ + VPL_UNLOCK(&lq->vpl_lock); \ + } \ if (mem->active) { \ assert(mem->object != kernel_object); \ assert(!mem->inactive && !mem->speculative); \ @@ -650,7 +800,18 @@ extern void vm_page_validate_cs_mapped( } \ mem->pageq.next = NULL; \ mem->pageq.prev = NULL; \ + VM_PAGE_QUEUES_ASSERT(mem, 0); \ + MACRO_END + + +#if DEVELOPMENT || DEBUG +#define VM_PAGE_SPECULATIVE_USED_ADD() \ + MACRO_BEGIN \ + OSAddAtomic(1, &vm_page_speculative_used); \ MACRO_END +#else +#define VM_PAGE_SPECULATIVE_USED_ADD() +#endif #define VM_PAGE_CONSUME_CLUSTERED(mem) \ @@ -659,7 +820,7 @@ extern void vm_page_validate_cs_mapped( assert(mem->object); \ mem->object->pages_used++; \ mem->clustered = FALSE; \ - OSAddAtomic(1, (SInt32 *)&vm_page_speculative_used); \ + VM_PAGE_SPECULATIVE_USED_ADD(); \ } \ MACRO_END diff --git a/osfmk/vm/vm_pageout.c b/osfmk/vm/vm_pageout.c index 9502c60ae..8906e1aba 100644 --- a/osfmk/vm/vm_pageout.c +++ b/osfmk/vm/vm_pageout.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2007 Apple Inc. All rights reserved. + * Copyright (c) 2000-2009 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -87,11 +87,13 @@ #include #include #include +#include #include #include #include #include +#include #if CONFIG_EMBEDDED #include @@ -111,7 +113,11 @@ * ENCRYPTED SWAP: */ #include <../bsd/crypto/aes/aes.h> +extern u_int32_t random(void); /* from */ +#if UPL_DEBUG +#include +#endif #ifndef VM_PAGEOUT_BURST_ACTIVE_THROTTLE /* maximum iterations of the active queue to move pages to inactive */ #define VM_PAGEOUT_BURST_ACTIVE_THROTTLE 100 @@ -218,7 +224,7 @@ #ifndef VM_PAGE_FREE_RESERVED #define VM_PAGE_FREE_RESERVED(n) \ - ((6 * VM_PAGE_LAUNDRY_MAX) + (n)) + ((unsigned) (6 * VM_PAGE_LAUNDRY_MAX) + (n)) #endif /* VM_PAGE_FREE_RESERVED */ /* @@ -276,7 +282,6 @@ static void vm_pageout_garbage_collect(int); static void vm_pageout_iothread_continue(struct vm_pageout_queue *); static void vm_pageout_iothread_external(void); static void vm_pageout_iothread_internal(void); -static void vm_pageout_queue_steal(vm_page_t); extern void vm_pageout_continue(void); extern void vm_pageout_scan(void); @@ -302,9 +307,14 @@ unsigned int vm_pageout_burst_inactive_throttle = 0; */ unsigned int vm_accellerate_zf_pageout_trigger = 400; unsigned int zf_queue_min_count = 100; -unsigned int vm_zf_count = 0; unsigned int vm_zf_queue_count = 0; +#if defined(__ppc__) /* On ppc, vm statistics are still 32-bit */ +unsigned int vm_zf_count = 0; +#else +uint64_t vm_zf_count __attribute__((aligned(8))) = 0; +#endif + /* * These variables record the pageout daemon's actions: * how many pages it looks at and what happens to those pages. @@ -322,6 +332,8 @@ unsigned int vm_pageout_inactive_absent = 0; /* debugging */ unsigned int vm_pageout_inactive_used = 0; /* debugging */ unsigned int vm_pageout_inactive_clean = 0; /* debugging */ unsigned int vm_pageout_inactive_dirty = 0; /* debugging */ +unsigned int vm_pageout_inactive_deactivated = 0; /* debugging */ +unsigned int vm_pageout_inactive_zf = 0; /* debugging */ unsigned int vm_pageout_dirty_no_pager = 0; /* debugging */ unsigned int vm_pageout_purged_objects = 0; /* debugging */ unsigned int vm_stat_discard = 0; /* debugging */ @@ -335,11 +347,16 @@ unsigned int vm_pageout_inactive_force_reclaim = 0; /* debugging */ unsigned int vm_pageout_scan_active_throttled = 0; unsigned int vm_pageout_scan_inactive_throttled = 0; unsigned int vm_pageout_scan_throttle = 0; /* debugging */ +unsigned int vm_pageout_scan_throttle_aborted = 0; /* debugging */ unsigned int vm_pageout_scan_burst_throttle = 0; /* debugging */ unsigned int vm_pageout_scan_empty_throttle = 0; /* debugging */ unsigned int vm_pageout_scan_deadlock_detected = 0; /* debugging */ unsigned int vm_pageout_scan_active_throttle_success = 0; /* debugging */ unsigned int vm_pageout_scan_inactive_throttle_success = 0; /* debugging */ + +unsigned int vm_page_speculative_count_drifts = 0; +unsigned int vm_page_speculative_count_drift_max = 0; + /* * Backing store throttle when BS is exhausted */ @@ -348,6 +365,8 @@ unsigned int vm_backing_store_low = 0; unsigned int vm_pageout_out_of_line = 0; unsigned int vm_pageout_in_place = 0; +unsigned int vm_page_steal_pageout_page = 0; + /* * ENCRYPTED SWAP: * counters and statistics... @@ -366,7 +385,11 @@ unsigned int vm_page_speculative_target = 0; vm_object_t vm_pageout_scan_wants_object = VM_OBJECT_NULL; +static boolean_t (* volatile consider_buffer_cache_collect)(void) = NULL; + +#if DEVELOPMENT || DEBUG unsigned long vm_cs_validated_resets = 0; +#endif /* * Routine: vm_backing_store_disable @@ -545,7 +568,7 @@ vm_pageout_object_terminate( /* alternate request page list, write to page_list */ /* case. Occurs when the original page was wired */ /* at the time of the list request */ - assert(m->wire_count != 0); + assert(VM_PAGE_WIRED(m)); vm_page_unwire(m);/* reactivates */ m->overwriting = FALSE; } else { @@ -580,11 +603,12 @@ vm_pageout_object_terminate( /* * Account for the paging reference taken in vm_paging_object_allocate. */ - vm_object_paging_end(shadow_object); + vm_object_activity_end(shadow_object); vm_object_unlock(shadow_object); assert(object->ref_count == 0); assert(object->paging_in_progress == 0); + assert(object->activity_in_progress == 0); assert(object->resident_page_count == 0); return; } @@ -596,9 +620,9 @@ vm_pageout_object_terminate( * necessarily flushed from the VM page cache. * This is accomplished by cleaning in place. * - * The page must not be busy, and the object and page - * queues must be locked. - * + * The page must not be busy, and new_object + * must be locked. + * */ void vm_pageclean_setup( @@ -614,8 +638,8 @@ vm_pageclean_setup( XPR(XPR_VM_PAGEOUT, "vm_pageclean_setup, obj 0x%X off 0x%X page 0x%X new 0x%X new_off 0x%X\n", - (integer_t)m->object, m->offset, (integer_t)m, - (integer_t)new_m, new_offset); + m->object, m->offset, m, + new_m, new_offset); pmap_clear_modify(m->phys_page); @@ -636,7 +660,10 @@ vm_pageclean_setup( new_m->private = TRUE; new_m->pageout = TRUE; new_m->phys_page = m->phys_page; + + vm_page_lockspin_queues(); vm_page_wire(new_m); + vm_page_unlock_queues(); vm_page_insert(new_m, new_object, new_offset); assert(!new_m->wanted); @@ -672,7 +699,7 @@ vm_pageout_initialize_page( XPR(XPR_VM_PAGEOUT, "vm_pageout_initialize_page, page 0x%X\n", - (integer_t)m, 0, 0, 0, 0); + m, 0, 0, 0, 0); assert(m->busy); /* @@ -713,15 +740,18 @@ vm_pageout_initialize_page( /* set the page for future call to vm_fault_list_request */ vm_object_paging_begin(object); holding_page = NULL; - vm_page_lock_queues(); + pmap_clear_modify(m->phys_page); m->dirty = TRUE; m->busy = TRUE; m->list_req_pending = TRUE; m->cleaning = TRUE; m->pageout = TRUE; + + vm_page_lockspin_queues(); vm_page_wire(m); vm_page_unlock_queues(); + vm_object_unlock(object); /* @@ -772,12 +802,14 @@ vm_pageout_cluster(vm_page_t m) XPR(XPR_VM_PAGEOUT, "vm_pageout_cluster, object 0x%X offset 0x%X page 0x%X\n", - (integer_t)object, m->offset, (integer_t)m, 0, 0); + object, m->offset, m, 0, 0); + + VM_PAGE_CHECK(m); /* * Only a certain kind of page is appreciated here. */ - assert(m->busy && (m->dirty || m->precious) && (m->wire_count == 0)); + assert(m->busy && (m->dirty || m->precious) && (!VM_PAGE_WIRED(m))); assert(!m->cleaning && !m->pageout && !m->inactive && !m->active); assert(!m->throttled); @@ -810,13 +842,16 @@ vm_pageout_cluster(vm_page_t m) q->pgo_idle = FALSE; thread_wakeup((event_t) &q->pgo_pending); } + + VM_PAGE_CHECK(m); } unsigned long vm_pageout_throttle_up_count = 0; /* - * A page is back from laundry. See if there are some pages waiting to + * A page is back from laundry or we are stealing it back from + * the laundering state. See if there are some pages waiting to * go to laundry and if we can let some of them go now. * * Object and page queues must be locked. @@ -827,17 +862,26 @@ vm_pageout_throttle_up( { struct vm_pageout_queue *q; - vm_pageout_throttle_up_count++; - assert(m->laundry); assert(m->object != VM_OBJECT_NULL); assert(m->object != kernel_object); + vm_pageout_throttle_up_count++; + if (m->object->internal == TRUE) q = &vm_pageout_queue_internal; else q = &vm_pageout_queue_external; + if (m->pageout_queue == TRUE) { + m->pageout_queue = FALSE; + + queue_remove(&q->pgo_pending, m, vm_page_t, pageq); + m->pageq.next = NULL; + m->pageq.prev = NULL; + + vm_object_paging_end(m->object); + } m->laundry = FALSE; q->pgo_laundry--; @@ -865,6 +909,171 @@ struct flow_control { mach_timespec_t ts; }; + +/* + * VM memory pressure monitoring. + * + * vm_pageout_scan() keeps track of the number of pages it considers and + * reclaims, in the currently active vm_pageout_stat[vm_pageout_stat_now]. + * + * compute_memory_pressure() is called every second from compute_averages() + * and moves "vm_pageout_stat_now" forward, to start accumulating the number + * of recalimed pages in a new vm_pageout_stat[] bucket. + * + * mach_vm_pressure_monitor() collects past statistics about memory pressure. + * The caller provides the number of seconds ("nsecs") worth of statistics + * it wants, up to 30 seconds. + * It computes the number of pages reclaimed in the past "nsecs" seconds and + * also returns the number of pages the system still needs to reclaim at this + * moment in time. + */ +#define VM_PAGEOUT_STAT_SIZE 31 +struct vm_pageout_stat { + unsigned int considered; + unsigned int reclaimed; +} vm_pageout_stats[VM_PAGEOUT_STAT_SIZE] = {{0,0}, }; +unsigned int vm_pageout_stat_now = 0; +unsigned int vm_memory_pressure = 0; + +#define VM_PAGEOUT_STAT_BEFORE(i) \ + (((i) == 0) ? VM_PAGEOUT_STAT_SIZE - 1 : (i) - 1) +#define VM_PAGEOUT_STAT_AFTER(i) \ + (((i) == VM_PAGEOUT_STAT_SIZE - 1) ? 0 : (i) + 1) + +/* + * Called from compute_averages(). + */ +void +compute_memory_pressure( + __unused void *arg) +{ + unsigned int vm_pageout_next; + + vm_memory_pressure = + vm_pageout_stats[VM_PAGEOUT_STAT_BEFORE(vm_pageout_stat_now)].reclaimed; + + commpage_set_memory_pressure( vm_memory_pressure ); + + /* move "now" forward */ + vm_pageout_next = VM_PAGEOUT_STAT_AFTER(vm_pageout_stat_now); + vm_pageout_stats[vm_pageout_next].considered = 0; + vm_pageout_stats[vm_pageout_next].reclaimed = 0; + vm_pageout_stat_now = vm_pageout_next; +} + +unsigned int +mach_vm_ctl_page_free_wanted(void) +{ + unsigned int page_free_target, page_free_count, page_free_wanted; + + page_free_target = vm_page_free_target; + page_free_count = vm_page_free_count; + if (page_free_target > page_free_count) { + page_free_wanted = page_free_target - page_free_count; + } else { + page_free_wanted = 0; + } + + return page_free_wanted; +} + +kern_return_t +mach_vm_pressure_monitor( + boolean_t wait_for_pressure, + unsigned int nsecs_monitored, + unsigned int *pages_reclaimed_p, + unsigned int *pages_wanted_p) +{ + wait_result_t wr; + unsigned int vm_pageout_then, vm_pageout_now; + unsigned int pages_reclaimed; + + /* + * We don't take the vm_page_queue_lock here because we don't want + * vm_pressure_monitor() to get in the way of the vm_pageout_scan() + * thread when it's trying to reclaim memory. We don't need fully + * accurate monitoring anyway... + */ + + if (wait_for_pressure) { + /* wait until there's memory pressure */ + while (vm_page_free_count >= vm_page_free_target) { + wr = assert_wait((event_t) &vm_page_free_wanted, + THREAD_INTERRUPTIBLE); + if (wr == THREAD_WAITING) { + wr = thread_block(THREAD_CONTINUE_NULL); + } + if (wr == THREAD_INTERRUPTED) { + return KERN_ABORTED; + } + if (wr == THREAD_AWAKENED) { + /* + * The memory pressure might have already + * been relieved but let's not block again + * and let's report that there was memory + * pressure at some point. + */ + break; + } + } + } + + /* provide the number of pages the system wants to reclaim */ + if (pages_wanted_p != NULL) { + *pages_wanted_p = mach_vm_ctl_page_free_wanted(); + } + + if (pages_reclaimed_p == NULL) { + return KERN_SUCCESS; + } + + /* provide number of pages reclaimed in the last "nsecs_monitored" */ + do { + vm_pageout_now = vm_pageout_stat_now; + pages_reclaimed = 0; + for (vm_pageout_then = + VM_PAGEOUT_STAT_BEFORE(vm_pageout_now); + vm_pageout_then != vm_pageout_now && + nsecs_monitored-- != 0; + vm_pageout_then = + VM_PAGEOUT_STAT_BEFORE(vm_pageout_then)) { + pages_reclaimed += vm_pageout_stats[vm_pageout_then].reclaimed; + } + } while (vm_pageout_now != vm_pageout_stat_now); + *pages_reclaimed_p = pages_reclaimed; + + return KERN_SUCCESS; +} + +/* Page States: Used below to maintain the page state + before it's removed from it's Q. This saved state + helps us do the right accounting in certain cases +*/ + +#define PAGE_STATE_SPECULATIVE 1 +#define PAGE_STATE_THROTTLED 2 +#define PAGE_STATE_ZEROFILL 3 +#define PAGE_STATE_INACTIVE 4 + +#define VM_PAGEOUT_SCAN_HANDLE_REUSABLE_PAGE(m) \ + MACRO_BEGIN \ + /* \ + * If a "reusable" page somehow made it back into \ + * the active queue, it's been re-used and is not \ + * quite re-usable. \ + * If the VM object was "all_reusable", consider it \ + * as "all re-used" instead of converting it to \ + * "partially re-used", which could be expensive. \ + */ \ + if ((m)->reusable || \ + (m)->object->all_reusable) { \ + vm_object_reuse_pages((m)->object, \ + (m)->offset, \ + (m)->offset + PAGE_SIZE_64, \ + FALSE); \ + } \ + MACRO_END + void vm_pageout_scan(void) { @@ -876,24 +1085,29 @@ vm_pageout_scan(void) vm_page_t local_freeq = NULL; int local_freed = 0; int delayed_unlock; - int need_internal_inactive = 0; int refmod_state = 0; int vm_pageout_deadlock_target = 0; struct vm_pageout_queue *iq; struct vm_pageout_queue *eq; struct vm_speculative_age_q *sq; - struct flow_control flow_control; + struct flow_control flow_control = { 0, { 0, 0 } }; boolean_t inactive_throttled = FALSE; boolean_t try_failed; mach_timespec_t ts; unsigned int msecs = 0; vm_object_t object; vm_object_t last_object_tried; - int zf_ratio; - int zf_run_count; +#if defined(__ppc__) /* On ppc, vm statistics are still 32-bit */ + unsigned int zf_ratio; + unsigned int zf_run_count; +#else + uint64_t zf_ratio; + uint64_t zf_run_count; +#endif uint32_t catch_up_count = 0; uint32_t inactive_reclaim_run; boolean_t forced_reclaim; + int page_prev_state = 0; flow_control.state = FCS_IDLE; iq = &vm_pageout_queue_internal; @@ -958,9 +1172,14 @@ vm_pageout_scan(void) * but at the moment mach vm cannot do this. */ { - uint32_t total = vm_page_active_count + vm_page_inactive_count; - uint32_t normal = total - vm_zf_count; - +#if defined(__ppc__) /* On ppc, vm statistics are still 32-bit */ + uint32_t total = vm_page_active_count + vm_page_inactive_count; + uint32_t normal = total - vm_zf_count; +#else + uint64_t total = vm_page_active_count + vm_page_inactive_count; + uint64_t normal = total - vm_zf_count; +#endif + /* zf_ratio is the number of zf pages we victimize per normal page */ if (vm_zf_count < vm_accellerate_zf_pageout_trigger) @@ -1010,16 +1229,16 @@ vm_pageout_scan(void) * Don't sweep through active queue more than the throttle * which should be kept relatively low */ - active_burst_count = MIN(vm_pageout_burst_active_throttle, vm_page_active_count); + active_burst_count = MIN(vm_pageout_burst_active_throttle, + vm_page_active_count); /* * Move pages from active to inactive. */ - if (need_internal_inactive == 0 && (vm_page_inactive_count + vm_page_speculative_count) >= vm_page_inactive_target) + if ((vm_page_inactive_count + vm_page_speculative_count) >= vm_page_inactive_target) goto done_moving_active_pages; - while (!queue_empty(&vm_page_queue_active) && - (need_internal_inactive || active_burst_count)) { + while (!queue_empty(&vm_page_queue_active) && active_burst_count) { if (active_burst_count) active_burst_count--; @@ -1065,7 +1284,7 @@ vm_pageout_scan(void) m = (vm_page_t) queue_first(&vm_page_queue_active); /* * this is the next object we're going to be interested in - * try to make sure its available after the mutex_yield + * try to make sure it's available after the mutex_yield * returns control */ vm_pageout_scan_wants_object = m->object; @@ -1096,6 +1315,9 @@ vm_pageout_scan(void) goto done_with_activepage; } + /* deal with a rogue "reusable" page */ + VM_PAGEOUT_SCAN_HANDLE_REUSABLE_PAGE(m); + /* * Deactivate the page while holding the object * locked, so we know the page is still not busy. @@ -1106,25 +1328,23 @@ vm_pageout_scan(void) */ vm_page_deactivate(m); - if (need_internal_inactive) { - vm_pageout_scan_active_throttle_success++; - need_internal_inactive--; - } done_with_activepage: if (delayed_unlock++ > VM_PAGEOUT_DELAYED_UNLOCK_LIMIT || try_failed == TRUE) { if (object != NULL) { + vm_pageout_scan_wants_object = VM_OBJECT_NULL; vm_object_unlock(object); object = NULL; - vm_pageout_scan_wants_object = VM_OBJECT_NULL; } if (local_freeq) { - vm_page_free_list(local_freeq); + vm_page_unlock_queues(); + vm_page_free_list(local_freeq, TRUE); local_freeq = NULL; local_freed = 0; - } - mutex_yield(&vm_page_queue_lock); + vm_page_lock_queues(); + } else + lck_mtx_yield(&vm_page_queue_lock); delayed_unlock = 1; @@ -1158,10 +1378,12 @@ vm_pageout_scan(void) vm_pageout_scan_wants_object = VM_OBJECT_NULL; if (local_freeq) { - vm_page_free_list(local_freeq); + vm_page_unlock_queues(); + vm_page_free_list(local_freeq, TRUE); local_freeq = NULL; local_freed = 0; + vm_page_lock_queues(); } /* * inactive target still not met... keep going @@ -1185,7 +1407,7 @@ vm_pageout_scan(void) continue; #endif - mutex_lock(&vm_page_queue_free_lock); + lck_mtx_lock(&vm_page_queue_free_lock); if ((vm_page_free_count >= vm_page_free_target) && (vm_page_free_wanted == 0) && (vm_page_free_wanted_privileged == 0)) { @@ -1198,11 +1420,15 @@ vm_pageout_scan(void) return; } - mutex_unlock(&vm_page_queue_free_lock); + lck_mtx_unlock(&vm_page_queue_free_lock); } + /* - * Before anything, we check if we have any ripe volatile objects around. - * If so, purge the first and see what it gives us. + * Before anything, we check if we have any ripe volatile + * objects around. If so, try to purge the first object. + * If the purge fails, fall through to reclaim a page instead. + * If the purge succeeds, go back to the top and reevalute + * the new memory situation. */ assert (available_for_purge>=0); if (available_for_purge) @@ -1211,8 +1437,9 @@ vm_pageout_scan(void) vm_object_unlock(object); object = NULL; } - vm_purgeable_object_purge_one(); - continue; + if(TRUE == vm_purgeable_object_purge_one()) { + continue; + } } if (queue_empty(&sq->age_q) && vm_page_speculative_count) { @@ -1224,10 +1451,13 @@ vm_pageout_scan(void) struct vm_speculative_age_q *aq; mach_timespec_t ts_fully_aged; boolean_t can_steal = FALSE; + int num_scanned_queues; aq = &vm_page_queue_speculative[speculative_steal_index]; - while (queue_empty(&aq->age_q)) { + num_scanned_queues = 0; + while (queue_empty(&aq->age_q) && + num_scanned_queues++ != VM_PAGE_MAX_SPECULATIVE_AGE_Q) { speculative_steal_index++; @@ -1236,6 +1466,33 @@ vm_pageout_scan(void) aq = &vm_page_queue_speculative[speculative_steal_index]; } + + if (num_scanned_queues == + VM_PAGE_MAX_SPECULATIVE_AGE_Q + 1) { + /* + * XXX We've scanned all the speculative + * queues but still haven't found one + * that is not empty, even though + * vm_page_speculative_count is not 0. + */ + /* report the anomaly... */ + printf("vm_pageout_scan: " + "all speculative queues empty " + "but count=%d. Re-adjusting.\n", + vm_page_speculative_count); + if (vm_page_speculative_count > + vm_page_speculative_count_drift_max) + vm_page_speculative_count_drift_max = vm_page_speculative_count; + vm_page_speculative_count_drifts++; +#if 6553678 + Debugger("vm_pageout_scan: no speculative pages"); +#endif + /* readjust... */ + vm_page_speculative_count = 0; + /* ... and continue */ + continue; + } + if (vm_page_speculative_count > vm_page_speculative_target) can_steal = TRUE; else { @@ -1245,7 +1502,11 @@ vm_pageout_scan(void) ADD_MACH_TIMESPEC(&ts_fully_aged, &aq->age_ts); - clock_get_system_nanotime(&ts.tv_sec, (unsigned *)&ts.tv_nsec); + clock_sec_t sec; + clock_nsec_t nsec; + clock_get_system_nanotime(&sec, &nsec); + ts.tv_sec = (unsigned int) sec; + ts.tv_nsec = nsec; if (CMP_MACH_TIMESPEC(&ts, &ts_fully_aged) >= 0) can_steal = TRUE; @@ -1267,7 +1528,7 @@ vm_pageout_scan(void) msecs = vm_pageout_empty_wait; goto vm_pageout_scan_delay; - } else if (inactive_burst_count >= + } else if (inactive_burst_count >= MIN(vm_pageout_burst_inactive_throttle, (vm_page_inactive_count + vm_page_speculative_count))) { @@ -1276,6 +1537,8 @@ vm_pageout_scan(void) goto vm_pageout_scan_delay; } else if (VM_PAGE_Q_THROTTLED(iq) && IP_VALID(memory_manager_default)) { + clock_sec_t sec; + clock_nsec_t nsec; switch (flow_control.state) { @@ -1283,8 +1546,9 @@ vm_pageout_scan(void) reset_deadlock_timer: ts.tv_sec = vm_pageout_deadlock_wait / 1000; ts.tv_nsec = (vm_pageout_deadlock_wait % 1000) * 1000 * NSEC_PER_USEC; - clock_get_system_nanotime(&flow_control.ts.tv_sec, - (unsigned *)&flow_control.ts.tv_nsec); + clock_get_system_nanotime(&sec, &nsec); + flow_control.ts.tv_sec = (unsigned int) sec; + flow_control.ts.tv_nsec = nsec; ADD_MACH_TIMESPEC(&flow_control.ts, &ts); flow_control.state = FCS_DELAYED; @@ -1293,8 +1557,9 @@ vm_pageout_scan(void) break; case FCS_DELAYED: - clock_get_system_nanotime(&ts.tv_sec, - (unsigned *)&ts.tv_nsec); + clock_get_system_nanotime(&sec, &nsec); + ts.tv_sec = (unsigned int) sec; + ts.tv_nsec = nsec; if (CMP_MACH_TIMESPEC(&ts, &flow_control.ts) >= 0) { /* @@ -1345,10 +1610,19 @@ vm_pageout_scan(void) vm_pageout_scan_wants_object = VM_OBJECT_NULL; if (local_freeq) { - vm_page_free_list(local_freeq); + vm_page_unlock_queues(); + vm_page_free_list(local_freeq, TRUE); local_freeq = NULL; local_freed = 0; + vm_page_lock_queues(); + + if (flow_control.state == FCS_DELAYED && + !VM_PAGE_Q_THROTTLED(iq)) { + flow_control.state = FCS_IDLE; + vm_pageout_scan_throttle_aborted++; + goto consider_inactive; + } } #if CONFIG_EMBEDDED { @@ -1370,13 +1644,12 @@ vm_pageout_scan(void) } #endif assert_wait_timeout((event_t) &iq->pgo_laundry, THREAD_INTERRUPTIBLE, msecs, 1000*NSEC_PER_USEC); - counter(c_vm_pageout_scan_block++); vm_page_unlock_queues(); assert(vm_pageout_scan_wants_object == VM_OBJECT_NULL); - + thread_block(THREAD_CONTINUE_NULL); vm_page_lock_queues(); @@ -1404,18 +1677,13 @@ vm_pageout_scan(void) while (1) { m = NULL; - /* - * the most eligible pages are ones that were throttled because the - * pager wasn't ready at the time. If a pager is ready now, - * see if one of these is useful. - */ - if (!VM_PAGE_Q_THROTTLED(iq) && !queue_empty(&vm_page_queue_throttled)) { - m = (vm_page_t) queue_first(&vm_page_queue_throttled); - break; + if (IP_VALID(memory_manager_default)) { + assert(vm_page_throttled_count == 0); + assert(queue_empty(&vm_page_queue_throttled)); } /* - * The second most eligible pages are ones we paged in speculatively, + * The most eligible pages are ones we paged in speculatively, * but which have not yet been touched. */ if ( !queue_empty(&sq->age_q) ) { @@ -1450,6 +1718,10 @@ vm_pageout_scan(void) assert(m->object != kernel_object); assert(m->phys_page != vm_page_guard_addr); + if (!m->speculative) { + vm_pageout_stats[vm_pageout_stat_now].considered++; + } + DTRACE_VM2(scan, int, 1, (uint64_t *), NULL); /* @@ -1479,11 +1751,18 @@ vm_pageout_scan(void) * object are fairly typical on the inactive and active queues */ if (!vm_object_lock_try_scan(m->object)) { + vm_pageout_inactive_nolock++; + + requeue_page: /* * Move page to end and continue. * Don't re-issue ticket */ if (m->zero_fill) { + if (m->speculative) { + panic("vm_pageout_scan(): page %p speculative and zero-fill !?\n", m); + } + assert(!m->speculative); queue_remove(&vm_page_queue_zf, m, vm_page_t, pageq); queue_enter(&vm_page_queue_zf, m, @@ -1494,15 +1773,15 @@ vm_pageout_scan(void) vm_page_speculative_count--; /* - * move to the tail of the inactive queue + * move to the head of the inactive queue * to get it out of the way... the speculative * queue is generally too small to depend * on there being enough pages from other * objects to make cycling it back on the * same queue a winning proposition */ - queue_enter(&vm_page_queue_inactive, m, - vm_page_t, pageq); + queue_enter_first(&vm_page_queue_inactive, m, + vm_page_t, pageq); m->inactive = TRUE; vm_page_inactive_count++; token_new_pagecount++; @@ -1539,8 +1818,6 @@ vm_pageout_scan(void) pmap_clear_reference(m->phys_page); m->reference = FALSE; - vm_pageout_inactive_nolock++; - if ( !queue_empty(&sq->age_q) ) m = (vm_page_t) queue_first(&sq->age_q); else if ( ((zf_run_count < zf_ratio) && vm_zf_queue_count >= zf_queue_min_count) || @@ -1591,86 +1868,38 @@ vm_pageout_scan(void) * pulled from the queue and paged out whenever * one of its logically adjacent fellows is * targeted. - * - * Pages found on the speculative list can never be - * in this state... they always have a pager associated - * with them. */ - assert(!m->speculative); - - if (m->zero_fill) { - queue_remove(&vm_page_queue_zf, m, - vm_page_t, pageq); - queue_enter(&vm_page_queue_zf, m, - vm_page_t, pageq); - } else { - queue_remove(&vm_page_queue_inactive, m, - vm_page_t, pageq); -#if MACH_ASSERT - vm_page_inactive_count--; /* balance for purgeable queue asserts */ -#endif - vm_purgeable_q_advance_all(); - - queue_enter(&vm_page_queue_inactive, m, - vm_page_t, pageq); -#if MACH_ASSERT - vm_page_inactive_count++; /* balance for purgeable queue asserts */ -#endif - token_new_pagecount++; - } vm_pageout_inactive_avoid++; - - goto done_with_inactivepage; + goto requeue_page; } /* * Remove the page from its list. */ if (m->speculative) { remque(&m->pageq); + page_prev_state = PAGE_STATE_SPECULATIVE; m->speculative = FALSE; vm_page_speculative_count--; } else if (m->throttled) { queue_remove(&vm_page_queue_throttled, m, vm_page_t, pageq); + page_prev_state = PAGE_STATE_THROTTLED; m->throttled = FALSE; vm_page_throttled_count--; } else { if (m->zero_fill) { queue_remove(&vm_page_queue_zf, m, vm_page_t, pageq); + page_prev_state = PAGE_STATE_ZEROFILL; vm_zf_queue_count--; } else { + page_prev_state = PAGE_STATE_INACTIVE; queue_remove(&vm_page_queue_inactive, m, vm_page_t, pageq); } m->inactive = FALSE; if (!m->fictitious) vm_page_inactive_count--; - vm_purgeable_q_advance_all(); + vm_purgeable_q_advance_all(); } - /* If the object is empty, the page must be reclaimed even if dirty or used. */ - /* If the page belongs to a volatile object, we stick it back on. */ - if (object->copy == VM_OBJECT_NULL) { - if(object->purgable == VM_PURGABLE_EMPTY && !m->cleaning) { - m->busy = TRUE; - if (m->pmapped == TRUE) { - /* unmap the page */ - refmod_state = pmap_disconnect(m->phys_page); - if (refmod_state & VM_MEM_MODIFIED) { - m->dirty = TRUE; - } - } - if (m->dirty || m->precious) { - /* we saved the cost of cleaning this page ! */ - vm_page_purged_count++; - } - goto reclaim_page; - } - if (object->purgable == VM_PURGABLE_VOLATILE) { - /* if it's wired, we can't put it on our queue */ - assert(m->wire_count == 0); - /* just stick it back on! */ - goto reactivate_page; - } - } m->pageq.next = NULL; m->pageq.prev = NULL; @@ -1712,13 +1941,21 @@ vm_pageout_scan(void) DTRACE_VM2(dfree, int, 1, (uint64_t *), NULL); - if (m->object->internal) { + if (object->internal) { DTRACE_VM2(anonfree, int, 1, (uint64_t *), NULL); } else { DTRACE_VM2(fsfree, int, 1, (uint64_t *), NULL); } + vm_page_free_prepare_queues(m); - vm_page_free_prepare(m); + /* + * remove page from object here since we're already + * behind the object lock... defer the rest of the work + * we'd normally do in vm_page_free_prepare_object + * until 'vm_page_free_list' is called + */ + if (m->tabled) + vm_page_remove(m, TRUE); assert(m->pageq.next == NULL && m->pageq.prev == NULL); @@ -1728,6 +1965,11 @@ vm_pageout_scan(void) inactive_burst_count = 0; + if(page_prev_state != PAGE_STATE_SPECULATIVE) { + vm_pageout_stats[vm_pageout_stat_now].reclaimed++; + page_prev_state = 0; + } + goto done_with_inactivepage; } @@ -1754,6 +1996,36 @@ vm_pageout_scan(void) goto done_with_inactivepage; } + /* + * If the object is empty, the page must be reclaimed even + * if dirty or used. + * If the page belongs to a volatile object, we stick it back + * on. + */ + if (object->copy == VM_OBJECT_NULL) { + if (object->purgable == VM_PURGABLE_EMPTY) { + m->busy = TRUE; + if (m->pmapped == TRUE) { + /* unmap the page */ + refmod_state = pmap_disconnect(m->phys_page); + if (refmod_state & VM_MEM_MODIFIED) { + m->dirty = TRUE; + } + } + if (m->dirty || m->precious) { + /* we saved the cost of cleaning this page ! */ + vm_page_purged_count++; + } + goto reclaim_page; + } + if (object->purgable == VM_PURGABLE_VOLATILE) { + /* if it's wired, we can't put it on our queue */ + assert(!VM_PAGE_WIRED(m)); + /* just stick it back on! */ + goto reactivate_page; + } + } + /* * If it's being used, reactivate. * (Fictitious pages are either busy or absent.) @@ -1770,6 +2042,12 @@ vm_pageout_scan(void) if (refmod_state & VM_MEM_MODIFIED) m->dirty = TRUE; } + + if (m->reference || m->dirty) { + /* deal with a rogue "reusable" page */ + VM_PAGEOUT_SCAN_HANDLE_REUSABLE_PAGE(m); + } + if (m->reference && !m->no_cache) { /* * The page we pulled off the inactive list has @@ -1786,13 +2064,23 @@ vm_pageout_scan(void) } else if (++inactive_reclaim_run >= VM_PAGEOUT_INACTIVE_FORCE_RECLAIM) { vm_pageout_inactive_force_reclaim++; } else { - /* - * The page was being used, so put back on active list. - */ + uint32_t isinuse; reactivate_page: - vm_page_activate(m); - VM_STAT_INCR(reactivations); - + if ( !object->internal && object->pager != MEMORY_OBJECT_NULL && + vnode_pager_get_isinuse(object->pager, &isinuse) == KERN_SUCCESS && !isinuse) { + /* + * no explict mappings of this object exist + * and it's not open via the filesystem + */ + vm_page_deactivate(m); + vm_pageout_inactive_deactivated++; + } else { + /* + * The page was/is being used, so put back on active list. + */ + vm_page_activate(m); + VM_STAT_INCR(reactivations); + } vm_pageout_inactive_used++; inactive_burst_count = 0; @@ -1815,7 +2103,7 @@ vm_pageout_scan(void) XPR(XPR_VM_PAGEOUT, "vm_pageout_scan, replace object 0x%X offset 0x%X page 0x%X\n", - (integer_t)object, (integer_t)m->offset, (integer_t)m, 0,0); + object, m->offset, m, 0,0); /* * we've got a candidate page to steal... @@ -1833,6 +2121,7 @@ vm_pageout_scan(void) * it if is, we need to skip over it by moving it back * to the end of the inactive queue */ + inactive_throttled = FALSE; if (m->dirty || m->precious) { @@ -1931,7 +2220,10 @@ vm_pageout_scan(void) * If it's clean and not precious, we can free the page. */ if (!m->dirty && !m->precious) { + if (m->zero_fill) + vm_pageout_inactive_zf++; vm_pageout_inactive_clean++; + goto reclaim_page; } @@ -1956,8 +2248,12 @@ vm_pageout_scan(void) } } + vm_pageout_stats[vm_pageout_stat_now].reclaimed++; + vm_pageout_cluster(m); + if (m->zero_fill) + vm_pageout_inactive_zf++; vm_pageout_inactive_dirty++; inactive_burst_count = 0; @@ -1966,17 +2262,19 @@ vm_pageout_scan(void) if (delayed_unlock++ > VM_PAGEOUT_DELAYED_UNLOCK_LIMIT || try_failed == TRUE) { if (object != NULL) { + vm_pageout_scan_wants_object = VM_OBJECT_NULL; vm_object_unlock(object); object = NULL; - vm_pageout_scan_wants_object = VM_OBJECT_NULL; } if (local_freeq) { - vm_page_free_list(local_freeq); + vm_page_unlock_queues(); + vm_page_free_list(local_freeq, TRUE); local_freeq = NULL; local_freed = 0; - } - mutex_yield(&vm_page_queue_lock); + vm_page_lock_queues(); + } else + lck_mtx_yield(&vm_page_queue_lock); delayed_unlock = 1; } @@ -2014,6 +2312,8 @@ vm_page_free_reserve( if (vm_page_free_target < vm_page_free_min + 5) vm_page_free_target = vm_page_free_min + 5; + vm_page_throttle_limit = vm_page_free_target - (vm_page_free_target / 3); + vm_page_creation_throttle = vm_page_free_target / 2; } /* @@ -2030,7 +2330,7 @@ vm_pageout_continue(void) assert(vm_page_free_wanted == 0); assert(vm_page_free_wanted_privileged == 0); assert_wait((event_t) &vm_page_free_wanted, THREAD_UNINT); - mutex_unlock(&vm_page_queue_free_lock); + lck_mtx_unlock(&vm_page_queue_free_lock); counter(c_vm_pageout_block++); thread_block((thread_continue_t)vm_pageout_continue); @@ -2038,33 +2338,6 @@ vm_pageout_continue(void) } -/* - * must be called with the - * queues and object locks held - */ -static void -vm_pageout_queue_steal(vm_page_t m) -{ - struct vm_pageout_queue *q; - - if (m->object->internal == TRUE) - q = &vm_pageout_queue_internal; - else - q = &vm_pageout_queue_external; - - m->laundry = FALSE; - m->pageout_queue = FALSE; - queue_remove(&q->pgo_pending, m, vm_page_t, pageq); - - m->pageq.next = NULL; - m->pageq.prev = NULL; - - vm_object_paging_end(m->object); - - q->pgo_laundry--; -} - - #ifdef FAKE_DEADLOCK #define FAKE_COUNT 5000 @@ -2094,11 +2367,12 @@ vm_pageout_iothread_continue(struct vm_pageout_queue *q) q->pgo_busy = TRUE; queue_remove_first(&q->pgo_pending, m, vm_page_t, pageq); + VM_PAGE_CHECK(m); m->pageout_queue = FALSE; - vm_page_unlock_queues(); - m->pageq.next = NULL; m->pageq.prev = NULL; + vm_page_unlock_queues(); + #ifdef FAKE_DEADLOCK if (q == &vm_pageout_queue_internal) { vm_offset_t addr; @@ -2143,15 +2417,12 @@ vm_pageout_iothread_continue(struct vm_pageout_queue *q) * Should only happen if there is no * default pager. */ - m->list_req_pending = FALSE; - m->cleaning = FALSE; - m->pageout = FALSE; - vm_page_lockspin_queues(); - vm_page_unwire(m); - vm_pageout_throttle_up(m); + + vm_pageout_queue_steal(m, TRUE); vm_pageout_dirty_no_pager++; vm_page_activate(m); + vm_page_unlock_queues(); /* @@ -2185,6 +2456,7 @@ vm_pageout_iothread_continue(struct vm_pageout_queue *q) vm_page_lockspin_queues(); continue; } + VM_PAGE_CHECK(m); vm_object_unlock(object); /* * we expect the paging_in_progress reference to have @@ -2257,10 +2529,21 @@ vm_pageout_iothread_internal(void) /*NOTREACHED*/ } +kern_return_t +vm_set_buffer_cleanup_callout(boolean_t (*func)(void)) +{ + if (OSCompareAndSwapPtr(NULL, func, (void * volatile *) &consider_buffer_cache_collect)) { + return KERN_SUCCESS; + } else { + return KERN_FAILURE; /* Already set */ + } +} + static void vm_pageout_garbage_collect(int collect) { if (collect) { + boolean_t buf_large_zfree = FALSE; stack_collect(); /* @@ -2268,7 +2551,10 @@ vm_pageout_garbage_collect(int collect) * might return memory to zones. */ consider_machine_collect(); - consider_zone_gc(); + if (consider_buffer_cache_collect != NULL) { + buf_large_zfree = (*consider_buffer_cache_collect)(); + } + consider_zone_gc(buf_large_zfree); consider_machine_adjust(); } @@ -2429,24 +2715,70 @@ vm_pageout_internal_start(void) return result; } -#define UPL_DELAYED_UNLOCK_LIMIT (MAX_UPL_TRANSFER / 2) -static upl_t -upl_create(int type, int flags, upl_size_t size) +/* + * when marshalling pages into a UPL and subsequently committing + * or aborting them, it is necessary to hold + * the vm_page_queue_lock (a hot global lock) for certain operations + * on the page... however, the majority of the work can be done + * while merely holding the object lock... in fact there are certain + * collections of pages that don't require any work brokered by the + * vm_page_queue_lock... to mitigate the time spent behind the global + * lock, go to a 2 pass algorithm... collect pages up to DELAYED_WORK_LIMIT + * while doing all of the work that doesn't require the vm_page_queue_lock... + * then call dw_do_work to acquire the vm_page_queue_lock and do the + * necessary work for each page... we will grab the busy bit on the page + * if it's not already held so that dw_do_work can drop the object lock + * if it can't immediately take the vm_page_queue_lock in order to compete + * for the locks in the same order that vm_pageout_scan takes them. + * the operation names are modeled after the names of the routines that + * need to be called in order to make the changes very obvious in the + * original loop + */ + +#define DELAYED_WORK_LIMIT 32 + +#define DW_vm_page_unwire 0x01 +#define DW_vm_page_wire 0x02 +#define DW_vm_page_free 0x04 +#define DW_vm_page_activate 0x08 +#define DW_vm_page_deactivate_internal 0x10 +#define DW_vm_page_speculate 0x20 +#define DW_vm_page_lru 0x40 +#define DW_vm_pageout_throttle_up 0x80 +#define DW_PAGE_WAKEUP 0x100 +#define DW_clear_busy 0x200 +#define DW_clear_reference 0x400 +#define DW_set_reference 0x800 + +struct dw { + vm_page_t dw_m; + int dw_mask; +}; + + +static void dw_do_work(vm_object_t object, struct dw *dwp, int dw_count); + + + +static upl_t +upl_create(int type, int flags, upl_size_t size) { upl_t upl; int page_field_size = 0; int upl_flags = 0; int upl_size = sizeof(struct upl); + size = round_page_32(size); + if (type & UPL_CREATE_LITE) { - page_field_size = ((size/PAGE_SIZE) + 7) >> 3; + page_field_size = (atop(size) + 7) >> 3; page_field_size = (page_field_size + 3) & 0xFFFFFFFC; upl_flags |= UPL_LITE; } if (type & UPL_CREATE_INTERNAL) { - upl_size += sizeof(struct upl_page_info) * (size/PAGE_SIZE); + upl_size += (int) sizeof(struct upl_page_info) * atop(size); upl_flags |= UPL_INTERNAL; } @@ -2463,10 +2795,19 @@ upl_create(int type, int flags, upl_size_t size) upl->ref_count = 1; upl->highest_page = 0; upl_lock_init(upl); -#ifdef UPL_DEBUG + upl->vector_upl = NULL; +#if UPL_DEBUG upl->ubc_alias1 = 0; upl->ubc_alias2 = 0; + + upl->upl_creator = current_thread(); + upl->upl_state = 0; + upl->upl_commit_index = 0; + bzero(&upl->upl_commit_records[0], sizeof(upl->upl_commit_records)); + + (void) OSBacktrace(&upl->upl_create_retaddr[0], UPL_DEBUG_STACK_FRAMES); #endif /* UPL_DEBUG */ + return(upl); } @@ -2476,7 +2817,7 @@ upl_destroy(upl_t upl) int page_field_size; /* bit field in word size buf */ int size; -#ifdef UPL_DEBUG +#if UPL_DEBUG { vm_object_t object; @@ -2507,6 +2848,8 @@ upl_destroy(upl_t upl) page_field_size = ((size/PAGE_SIZE) + 7) >> 3; page_field_size = (page_field_size + 3) & 0xFFFFFFFC; } + upl_lock_destroy(upl); + upl->vector_upl = (vector_upl_t) 0xfeedbeef; if (upl->flags & UPL_INTERNAL) { kfree(upl, sizeof(struct upl) + @@ -2528,19 +2871,25 @@ uc_upl_dealloc(upl_t upl) void upl_deallocate(upl_t upl) { - if (--upl->ref_count == 0) + if (--upl->ref_count == 0) { + if(vector_upl_is_valid(upl)) + vector_upl_deallocate(upl); upl_destroy(upl); + } } -/* +#if DEVELOPMENT || DEBUG +/*/* * Statistics about UPL enforcement of copy-on-write obligations. */ unsigned long upl_cow = 0; unsigned long upl_cow_again = 0; -unsigned long upl_cow_contiguous = 0; unsigned long upl_cow_pages = 0; unsigned long upl_cow_again_pages = 0; -unsigned long upl_cow_contiguous_pages = 0; + +unsigned long iopl_cow = 0; +unsigned long iopl_cow_pages = 0; +#endif /* * Routine: vm_object_upl_request @@ -2610,8 +2959,9 @@ vm_object_upl_request( int refmod_state = 0; wpl_array_t lite_list = NULL; vm_object_t last_copy_object; - int delayed_unlock = 0; - int j; + struct dw dw_array[DELAYED_WORK_LIMIT]; + struct dw *dwp; + int dw_count; if (cntrl_flags & ~UPL_VALID_FLAGS) { /* @@ -2641,10 +2991,17 @@ vm_object_upl_request( lite_list = (wpl_array_t) (((uintptr_t)user_page_list) + ((size/PAGE_SIZE) * sizeof(upl_page_info_t))); + if (size == 0) { + user_page_list = NULL; + lite_list = NULL; + } } else { upl = upl_create(UPL_CREATE_INTERNAL, 0, size); user_page_list = (upl_page_info_t *) (((uintptr_t)upl) + sizeof(struct upl)); + if (size == 0) { + user_page_list = NULL; + } } } else { if (cntrl_flags & UPL_SET_LITE) { @@ -2652,6 +3009,9 @@ vm_object_upl_request( upl = upl_create(UPL_CREATE_EXTERNAL | UPL_CREATE_LITE, 0, size); lite_list = (wpl_array_t) (((uintptr_t)upl) + sizeof(struct upl)); + if (size == 0) { + lite_list = NULL; + } } else { upl = upl_create(UPL_CREATE_EXTERNAL, 0, size); } @@ -2694,7 +3054,7 @@ vm_object_upl_request( upl->flags |= UPL_PAGEOUT; vm_object_lock(object); - vm_object_paging_begin(object); + vm_object_activity_begin(object); /* * we can lock in the paging_offset once paging_in_progress is set @@ -2702,7 +3062,7 @@ vm_object_upl_request( upl->size = size; upl->offset = offset + object->paging_offset; -#ifdef UPL_DEBUG +#if UPL_DEBUG queue_enter(&object->uplq, upl, upl_t, uplq); #endif /* UPL_DEBUG */ @@ -2724,8 +3084,10 @@ vm_object_upl_request( FALSE, /* should_return */ MEMORY_OBJECT_COPY_SYNC, VM_PROT_NO_CHANGE); +#if DEVELOPMENT || DEBUG upl_cow++; upl_cow_pages += size >> PAGE_SHIFT; +#endif } /* * remember which copy object we synchronized with @@ -2736,42 +3098,17 @@ vm_object_upl_request( xfer_size = size; dst_offset = offset; + dwp = &dw_array[0]; + dw_count = 0; + while (xfer_size) { + dwp->dw_mask = 0; + if ((alias_page == NULL) && !(cntrl_flags & UPL_SET_LITE)) { - if (delayed_unlock) { - delayed_unlock = 0; - vm_page_unlock_queues(); - } vm_object_unlock(object); VM_PAGE_GRAB_FICTITIOUS(alias_page); - goto relock; - } - if (delayed_unlock == 0) { - /* - * pageout_scan takes the vm_page_lock_queues first - * then tries for the object lock... to avoid what - * is effectively a lock inversion, we'll go to the - * trouble of taking them in that same order... otherwise - * if this object contains the majority of the pages resident - * in the UBC (or a small set of large objects actively being - * worked on contain the majority of the pages), we could - * cause the pageout_scan thread to 'starve' in its attempt - * to find pages to move to the free queue, since it has to - * successfully acquire the object lock of any candidate page - * before it can steal/clean it. - */ - vm_object_unlock(object); -relock: - for (j = 0; ; j++) { - vm_page_lock_queues(); - - if (vm_object_lock_try(object)) - break; - vm_page_unlock_queues(); - mutex_pause(j); - } - delayed_unlock = 1; + vm_object_lock(object); } if (cntrl_flags & UPL_COPYOUT_FROM) { upl->flags |= UPL_PAGE_SYNC_DONE; @@ -2780,12 +3117,12 @@ vm_object_upl_request( dst_page->fictitious || dst_page->absent || dst_page->error || - (dst_page->wire_count && !dst_page->pageout && !dst_page->list_req_pending)) { + (VM_PAGE_WIRED(dst_page) && !dst_page->pageout && !dst_page->list_req_pending)) { if (user_page_list) user_page_list[entry].phys_addr = 0; - goto delay_unlock_queues; + goto try_next_page; } /* * grab this up front... @@ -2806,8 +3143,7 @@ vm_object_upl_request( * way of vm_pageout_scan which would have to * reactivate it upon tripping over it */ - vm_page_activate(dst_page); - VM_STAT_INCR(reactivations); + dwp->dw_mask |= DW_vm_page_activate; } if (cntrl_flags & UPL_RET_ONLY_DIRTY) { /* @@ -2846,7 +3182,7 @@ vm_object_upl_request( if (user_page_list) user_page_list[entry].phys_addr = 0; - goto delay_unlock_queues; + goto try_next_page; } check_busy: if (dst_page->busy && (!(dst_page->list_req_pending && dst_page->pageout))) { @@ -2854,15 +3190,12 @@ vm_object_upl_request( if (user_page_list) user_page_list[entry].phys_addr = 0; - goto delay_unlock_queues; + goto try_next_page; } /* * someone else is playing with the * page. We will have to wait. */ - delayed_unlock = 0; - vm_page_unlock_queues(); - PAGE_SLEEP(object, dst_page, THREAD_UNINT); continue; @@ -2870,11 +3203,11 @@ vm_object_upl_request( /* * Someone else already cleaning the page? */ - if ((dst_page->cleaning || dst_page->absent || dst_page->wire_count != 0) && !dst_page->list_req_pending) { + if ((dst_page->cleaning || dst_page->absent || VM_PAGE_WIRED(dst_page)) && !dst_page->list_req_pending) { if (user_page_list) user_page_list[entry].phys_addr = 0; - goto delay_unlock_queues; + goto try_next_page; } /* * ENCRYPTED SWAP: @@ -2886,8 +3219,6 @@ vm_object_upl_request( if (! (cntrl_flags & UPL_ENCRYPT) && dst_page->encrypted) { int was_busy; - delayed_unlock = 0; - vm_page_unlock_queues(); /* * save the current state of busy * mark page as busy while decrypt @@ -2903,19 +3234,23 @@ vm_object_upl_request( * restore to original busy state */ dst_page->busy = was_busy; + } + if (dst_page->pageout_queue == TRUE) { - vm_page_lock_queues(); - delayed_unlock = 1; + vm_page_lockspin_queues(); + + if (dst_page->pageout_queue == TRUE) { + /* + * we've buddied up a page for a clustered pageout + * that has already been moved to the pageout + * queue by pageout_scan... we need to remove + * it from the queue and drop the laundry count + * on that queue + */ + vm_pageout_throttle_up(dst_page); + } + vm_page_unlock_queues(); } - if (dst_page->pageout_queue == TRUE) - /* - * we've buddied up a page for a clustered pageout - * that has already been moved to the pageout - * queue by pageout_scan... we need to remove - * it from the queue and drop the laundry count - * on that queue - */ - vm_pageout_queue_steal(dst_page); #if MACH_CLUSTER_STATS /* * pageout statistics gathering. count @@ -2947,9 +3282,10 @@ vm_object_upl_request( upl->highest_page = dst_page->phys_page; if (cntrl_flags & UPL_SET_LITE) { - int pg_num; + unsigned int pg_num; - pg_num = (dst_offset-offset)/PAGE_SIZE; + pg_num = (unsigned int) ((dst_offset-offset)/PAGE_SIZE); + assert(pg_num == (dst_offset-offset)/PAGE_SIZE); lite_list[pg_num>>5] |= 1 << (pg_num & 31); if (hw_dirty) @@ -3012,10 +3348,11 @@ vm_object_upl_request( * deny access to the target page * while it is being worked on */ - if ((!dst_page->pageout) && (dst_page->wire_count == 0)) { + if ((!dst_page->pageout) && ( !VM_PAGE_WIRED(dst_page))) { dst_page->busy = TRUE; dst_page->pageout = TRUE; - vm_page_wire(dst_page); + + dwp->dw_mask |= DW_vm_page_wire; } } } else { @@ -3042,9 +3379,6 @@ vm_object_upl_request( * to see both the *before* and *after* pages. */ if (object->copy != VM_OBJECT_NULL) { - delayed_unlock = 0; - vm_page_unlock_queues(); - vm_object_update( object, dst_offset,/* current offset */ @@ -3055,11 +3389,10 @@ vm_object_upl_request( MEMORY_OBJECT_COPY_SYNC, VM_PROT_NO_CHANGE); +#if DEVELOPMENT || DEBUG upl_cow_again++; upl_cow_again_pages += xfer_size >> PAGE_SHIFT; - - vm_page_lock_queues(); - delayed_unlock = 1; +#endif } /* * remember the copy object we synced with @@ -3069,23 +3402,25 @@ vm_object_upl_request( dst_page = vm_page_lookup(object, dst_offset); if (dst_page != VM_PAGE_NULL) { - if ( !(dst_page->list_req_pending) ) { - if ((cntrl_flags & UPL_RET_ONLY_ABSENT) && !dst_page->absent) { - /* + + if ((cntrl_flags & UPL_RET_ONLY_ABSENT)) { + + if ( !(dst_page->absent && dst_page->list_req_pending) ) { + /* * skip over pages already present in the cache */ - if (user_page_list) - user_page_list[entry].phys_addr = 0; + if (user_page_list) + user_page_list[entry].phys_addr = 0; - goto delay_unlock_queues; + goto try_next_page; } + } + if ( !(dst_page->list_req_pending) ) { + if (dst_page->cleaning) { /* * someone else is writing to the page... wait... */ - delayed_unlock = 0; - vm_page_unlock_queues(); - PAGE_SLEEP(object, dst_page, THREAD_UNINT); continue; @@ -3099,15 +3434,37 @@ vm_object_upl_request( */ dst_page->list_req_pending = FALSE; - vm_page_free(dst_page); + VM_PAGE_FREE(dst_page); dst_page = NULL; + } else if (dst_page->absent) { /* * the default_pager case */ dst_page->list_req_pending = FALSE; dst_page->busy = FALSE; + + } else if (dst_page->pageout) { + /* + * page was earmarked by vm_pageout_scan + * to be cleaned and stolen... we're going + * to take it back since we are not attempting + * to read that page and we don't want to stall + * waiting for it to be cleaned for 2 reasons... + * 1 - no use paging it out and back in + * 2 - if we stall, we may casue a deadlock in + * the FS trying to acquire the its locks + * on the VNOP_PAGEOUT path presuming that + * those locks are already held on the read + * path before trying to create this UPL + * + * so undo all of the state that vm_pageout_scan + * hung on this page + */ + dst_page->busy = FALSE; + + vm_pageout_queue_steal(dst_page, FALSE); } } } @@ -3125,7 +3482,7 @@ vm_object_upl_request( if (user_page_list) user_page_list[entry].phys_addr = 0; - goto delay_unlock_queues; + goto try_next_page; } /* * need to allocate a page @@ -3149,38 +3506,13 @@ vm_object_upl_request( * then try again for the same * offset... */ - delayed_unlock = 0; - vm_page_unlock_queues(); - vm_object_unlock(object); VM_PAGE_WAIT(); - - /* - * pageout_scan takes the vm_page_lock_queues first - * then tries for the object lock... to avoid what - * is effectively a lock inversion, we'll go to the - * trouble of taking them in that same order... otherwise - * if this object contains the majority of the pages resident - * in the UBC (or a small set of large objects actively being - * worked on contain the majority of the pages), we could - * cause the pageout_scan thread to 'starve' in its attempt - * to find pages to move to the free queue, since it has to - * successfully acquire the object lock of any candidate page - * before it can steal/clean it. - */ - for (j = 0; ; j++) { - vm_page_lock_queues(); - - if (vm_object_lock_try(object)) - break; - vm_page_unlock_queues(); - mutex_pause(j); - } - delayed_unlock = 1; + vm_object_lock(object); continue; } - vm_page_insert_internal(dst_page, object, dst_offset, TRUE); + vm_page_insert(dst_page, object, dst_offset); dst_page->absent = TRUE; dst_page->busy = FALSE; @@ -3197,6 +3529,18 @@ vm_object_upl_request( dst_page->clustered = TRUE; } } + if (dst_page->fictitious) { + panic("need corner case for fictitious page"); + } + if (dst_page->busy) { + /* + * someone else is playing with the + * page. We will have to wait. + */ + PAGE_SLEEP(object, dst_page, THREAD_UNINT); + + continue; + } /* * ENCRYPTED SWAP: */ @@ -3215,21 +3559,6 @@ vm_object_upl_request( } dst_page->overwriting = TRUE; - if (dst_page->fictitious) { - panic("need corner case for fictitious page"); - } - if (dst_page->busy) { - /* - * someone else is playing with the - * page. We will have to wait. - */ - delayed_unlock = 0; - vm_page_unlock_queues(); - - PAGE_SLEEP(object, dst_page, THREAD_UNINT); - - continue; - } if (dst_page->pmapped) { if ( !(cntrl_flags & UPL_FILE_IO)) /* @@ -3246,9 +3575,10 @@ vm_object_upl_request( dirty = hw_dirty ? TRUE : dst_page->dirty; if (cntrl_flags & UPL_SET_LITE) { - int pg_num; + unsigned int pg_num; - pg_num = (dst_offset-offset)/PAGE_SIZE; + pg_num = (unsigned int) ((dst_offset-offset)/PAGE_SIZE); + assert(pg_num == (dst_offset-offset)/PAGE_SIZE); lite_list[pg_num>>5] |= 1 << (pg_num & 31); if (hw_dirty) @@ -3290,29 +3620,25 @@ vm_object_upl_request( if (!dirty) dst_page->precious = TRUE; - if (dst_page->wire_count == 0) { + if ( !VM_PAGE_WIRED(dst_page)) { /* * deny access to the target page while * it is being worked on */ dst_page->busy = TRUE; } else - vm_page_wire(dst_page); + dwp->dw_mask |= DW_vm_page_wire; - if (dst_page->clustered) { - /* - * expect the page not to be used - * since it's coming in as part - * of a speculative cluster... - * pages that are 'consumed' will - * get a hardware reference - */ - dst_page->reference = FALSE; - } else { + /* + * We might be about to satisfy a fault which has been + * requested. So no need for the "restart" bit. + */ + dst_page->restart = FALSE; + if (!dst_page->absent && !(cntrl_flags & UPL_WILL_MODIFY)) { /* * expect the page to be used */ - dst_page->reference = TRUE; + dwp->dw_mask |= DW_set_reference; } dst_page->precious = (cntrl_flags & UPL_PRECIOUS) ? TRUE : FALSE; } @@ -3347,47 +3673,41 @@ vm_object_upl_request( */ VM_PAGE_CONSUME_CLUSTERED(dst_page); } -delay_unlock_queues: - if (delayed_unlock++ > UPL_DELAYED_UNLOCK_LIMIT) { - /* - * pageout_scan takes the vm_page_lock_queues first - * then tries for the object lock... to avoid what - * is effectively a lock inversion, we'll go to the - * trouble of taking them in that same order... otherwise - * if this object contains the majority of the pages resident - * in the UBC (or a small set of large objects actively being - * worked on contain the majority of the pages), we could - * cause the pageout_scan thread to 'starve' in its attempt - * to find pages to move to the free queue, since it has to - * successfully acquire the object lock of any candidate page - * before it can steal/clean it. - */ - vm_object_unlock(object); - mutex_yield(&vm_page_queue_lock); +try_next_page: + if (dwp->dw_mask) { + if (dwp->dw_mask & DW_vm_page_activate) + VM_STAT_INCR(reactivations); - for (j = 0; ; j++) { - if (vm_object_lock_try(object)) - break; - vm_page_unlock_queues(); - mutex_pause(j); - vm_page_lock_queues(); + if (dst_page->busy == FALSE) { + /* + * dw_do_work may need to drop the object lock + * if it does, we need the pages it's looking at to + * be held stable via the busy bit. + */ + dst_page->busy = TRUE; + dwp->dw_mask |= (DW_clear_busy | DW_PAGE_WAKEUP); + } + dwp->dw_m = dst_page; + dwp++; + dw_count++; + + if (dw_count >= DELAYED_WORK_LIMIT) { + dw_do_work(object, &dw_array[0], dw_count); + + dwp = &dw_array[0]; + dw_count = 0; } - delayed_unlock = 1; } -try_next_page: entry++; dst_offset += PAGE_SIZE_64; xfer_size -= PAGE_SIZE; } + if (dw_count) + dw_do_work(object, &dw_array[0], dw_count); + if (alias_page != NULL) { - if (delayed_unlock == 0) { - vm_page_lock_queues(); - delayed_unlock = 1; - } - vm_page_free(alias_page); + VM_PAGE_FREE(alias_page); } - if (delayed_unlock) - vm_page_unlock_queues(); if (page_list_count != NULL) { if (upl->flags & UPL_INTERNAL) @@ -3395,6 +3715,9 @@ vm_object_upl_request( else if (*page_list_count > entry) *page_list_count = entry; } +#if UPL_DEBUG + upl->upl_state = 1; +#endif vm_object_unlock(object); return KERN_SUCCESS; @@ -3424,6 +3747,9 @@ vm_fault_list_request( upl_page_info_t *user_page_list; kern_return_t kr; + if((cntrl_flags & UPL_VECTOR)==UPL_VECTOR) + return KERN_INVALID_ARGUMENT; + if (user_page_list_ptr != NULL) { local_list_count = page_list_count; user_page_list = *user_page_list_ptr; @@ -3473,7 +3799,7 @@ vm_object_super_upl_request( unsigned int *page_list_count, int cntrl_flags) { - if (object->paging_offset > offset) + if (object->paging_offset > offset || ((cntrl_flags & UPL_VECTOR)==UPL_VECTOR)) return KERN_FAILURE; assert(object->paging_in_progress); @@ -3483,10 +3809,13 @@ vm_object_super_upl_request( vm_object_offset_t base_offset; upl_size_t super_size; + vm_object_size_t super_size_64; base_offset = (offset & ~((vm_object_offset_t) super_cluster - 1)); super_size = (offset + size) > (base_offset + super_cluster) ? super_cluster<<1 : super_cluster; - super_size = ((base_offset + super_size) > object->size) ? (object->size - base_offset) : super_size; + super_size_64 = ((base_offset + super_size) > object->size) ? (object->size - base_offset) : super_size; + super_size = (upl_size_t) super_size_64; + assert(super_size == super_size_64); if (offset > (base_offset + super_size)) { panic("vm_object_super_upl_request: Missed target pageout" @@ -3499,8 +3828,11 @@ vm_object_super_upl_request( * page to be written out who's offset is beyond the * object size */ - if ((offset + size) > (base_offset + super_size)) - super_size = (offset + size) - base_offset; + if ((offset + size) > (base_offset + super_size)) { + super_size_64 = (offset + size) - base_offset; + super_size = (upl_size_t) super_size_64; + assert(super_size == super_size_64); + } offset = base_offset; size = super_size; @@ -3508,7 +3840,7 @@ vm_object_super_upl_request( return vm_object_upl_request(object, offset, size, upl, user_page_list, page_list_count, cntrl_flags); } - + kern_return_t vm_map_create_upl( vm_map_t map, @@ -3544,44 +3876,56 @@ vm_map_create_upl( return KERN_INVALID_ARGUMENT; REDISCOVER_ENTRY: - vm_map_lock(map); + vm_map_lock_read(map); if (vm_map_lookup_entry(map, offset, &entry)) { - if ((entry->vme_end - offset) < *upl_size) - *upl_size = entry->vme_end - offset; + if ((entry->vme_end - offset) < *upl_size) { + *upl_size = (upl_size_t) (entry->vme_end - offset); + assert(*upl_size == entry->vme_end - offset); + } if (caller_flags & UPL_QUERY_OBJECT_TYPE) { *flags = 0; - if (entry->object.vm_object != VM_OBJECT_NULL) { + if ( !entry->is_sub_map && entry->object.vm_object != VM_OBJECT_NULL) { if (entry->object.vm_object->private) *flags = UPL_DEV_MEMORY; if (entry->object.vm_object->phys_contiguous) *flags |= UPL_PHYS_CONTIG; } - vm_map_unlock(map); + vm_map_unlock_read(map); return KERN_SUCCESS; } if (entry->object.vm_object == VM_OBJECT_NULL || !entry->object.vm_object->phys_contiguous) { - if ((*upl_size/page_size) > MAX_UPL_SIZE) - *upl_size = MAX_UPL_SIZE * page_size; + if ((*upl_size/PAGE_SIZE) > MAX_UPL_SIZE) + *upl_size = MAX_UPL_SIZE * PAGE_SIZE; } /* * Create an object if necessary. */ if (entry->object.vm_object == VM_OBJECT_NULL) { + + if (vm_map_lock_read_to_write(map)) + goto REDISCOVER_ENTRY; + entry->object.vm_object = vm_object_allocate((vm_size_t)(entry->vme_end - entry->vme_start)); entry->offset = 0; + + vm_map_lock_write_to_read(map); } if (!(caller_flags & UPL_COPYOUT_FROM)) { if (!(entry->protection & VM_PROT_WRITE)) { - vm_map_unlock(map); + vm_map_unlock_read(map); return KERN_PROTECTION_FAILURE; } if (entry->needs_copy) { + /* + * Honor copy-on-write for COPY_SYMMETRIC + * strategy. + */ vm_map_t local_map; vm_object_t object; vm_object_offset_t new_offset; @@ -3591,7 +3935,6 @@ vm_map_create_upl( vm_map_t real_map; local_map = map; - vm_map_lock_write_to_read(map); if (vm_map_lookup_locked(&local_map, offset, VM_PROT_WRITE, @@ -3599,14 +3942,15 @@ vm_map_create_upl( &version, &object, &new_offset, &prot, &wired, NULL, - &real_map)) { - vm_map_unlock(local_map); + &real_map) != KERN_SUCCESS) { + vm_map_unlock_read(local_map); return KERN_FAILURE; } if (real_map != map) vm_map_unlock(real_map); + vm_map_unlock_read(local_map); + vm_object_unlock(object); - vm_map_unlock(local_map); goto REDISCOVER_ENTRY; } @@ -3619,7 +3963,7 @@ vm_map_create_upl( local_offset = entry->offset; vm_map_reference(submap); - vm_map_unlock(map); + vm_map_unlock_read(map); ret = vm_map_create_upl(submap, local_offset + (offset - local_start), @@ -3635,9 +3979,9 @@ vm_map_create_upl( local_offset = entry->offset; vm_object_reference(local_object); - vm_map_unlock(map); + vm_map_unlock_read(map); - if (entry->object.vm_object->shadow && entry->object.vm_object->copy) { + if (local_object->shadow && local_object->copy) { vm_object_lock_request( local_object->shadow, (vm_object_offset_t) @@ -3660,7 +4004,7 @@ vm_map_create_upl( local_offset = entry->offset; vm_object_reference(local_object); - vm_map_unlock(map); + vm_map_unlock_read(map); vm_object_lock_request( local_object, @@ -3688,7 +4032,7 @@ vm_map_create_upl( local_start = entry->vme_start; vm_object_reference(local_object); - vm_map_unlock(map); + vm_map_unlock_read(map); ret = vm_object_iopl_request(local_object, (vm_object_offset_t) ((offset - local_start) + local_offset), @@ -3701,7 +4045,7 @@ vm_map_create_upl( return(ret); } - vm_map_unlock(map); + vm_map_unlock_read(map); return(KERN_FAILURE); } @@ -3716,18 +4060,69 @@ kern_return_t vm_map_enter_upl( vm_map_t map, upl_t upl, - vm_map_offset_t *dst_addr) + vm_map_offset_t *dst_addr) { vm_map_size_t size; vm_object_offset_t offset; vm_map_offset_t addr; vm_page_t m; kern_return_t kr; + int isVectorUPL = 0, curr_upl=0; + upl_t vector_upl = NULL; + vm_offset_t vector_upl_dst_addr = 0; + vm_map_t vector_upl_submap = NULL; + upl_offset_t subupl_offset = 0; + upl_size_t subupl_size = 0; if (upl == UPL_NULL) return KERN_INVALID_ARGUMENT; - upl_lock(upl); + if((isVectorUPL = vector_upl_is_valid(upl))) { + int mapped=0,valid_upls=0; + vector_upl = upl; + + upl_lock(vector_upl); + for(curr_upl=0; curr_upl < MAX_VECTOR_UPL_ELEMENTS; curr_upl++) { + upl = vector_upl_subupl_byindex(vector_upl, curr_upl ); + if(upl == NULL) + continue; + valid_upls++; + if (UPL_PAGE_LIST_MAPPED & upl->flags) + mapped++; + } + + if(mapped) { + if(mapped != valid_upls) + panic("Only %d of the %d sub-upls within the Vector UPL are alread mapped\n", mapped, valid_upls); + else { + upl_unlock(vector_upl); + return KERN_FAILURE; + } + } + + kr = kmem_suballoc(map, &vector_upl_dst_addr, vector_upl->size, FALSE, VM_FLAGS_ANYWHERE, &vector_upl_submap); + if( kr != KERN_SUCCESS ) + panic("Vector UPL submap allocation failed\n"); + map = vector_upl_submap; + vector_upl_set_submap(vector_upl, vector_upl_submap, vector_upl_dst_addr); + curr_upl=0; + } + else + upl_lock(upl); + +process_upl_to_enter: + if(isVectorUPL){ + if(curr_upl == MAX_VECTOR_UPL_ELEMENTS) { + *dst_addr = vector_upl_dst_addr; + upl_unlock(vector_upl); + return KERN_SUCCESS; + } + upl = vector_upl_subupl_byindex(vector_upl, curr_upl++ ); + if(upl == NULL) + goto process_upl_to_enter; + vector_upl_get_iostate(vector_upl, upl, &subupl_offset, &subupl_size); + *dst_addr = (vm_map_offset_t)(vector_upl_dst_addr + (vm_map_offset_t)subupl_offset); + } /* * check to see if already mapped @@ -3742,7 +4137,7 @@ vm_map_enter_upl( vm_object_t object; vm_page_t alias_page; vm_object_offset_t new_offset; - int pg_num; + unsigned int pg_num; wpl_array_t lite_list; if (upl->flags & UPL_INTERNAL) { @@ -3770,7 +4165,8 @@ vm_map_enter_upl( upl->flags |= UPL_SHADOWED; while (size) { - pg_num = (new_offset)/PAGE_SIZE; + pg_num = (unsigned int) (new_offset / PAGE_SIZE); + assert(pg_num == new_offset / PAGE_SIZE); if (lite_list[pg_num>>5] & (1 << (pg_num & 31))) { @@ -3841,13 +4237,22 @@ vm_map_enter_upl( vm_object_reference(upl->map_object); - *dst_addr = 0; - /* - * NEED A UPL_MAP ALIAS - */ - kr = vm_map_enter(map, dst_addr, (vm_map_size_t)size, (vm_map_offset_t) 0, - VM_FLAGS_ANYWHERE, upl->map_object, offset, FALSE, - VM_PROT_DEFAULT, VM_PROT_ALL, VM_INHERIT_DEFAULT); + if(!isVectorUPL) { + *dst_addr = 0; + /* + * NEED A UPL_MAP ALIAS + */ + kr = vm_map_enter(map, dst_addr, (vm_map_size_t)size, (vm_map_offset_t) 0, + VM_FLAGS_ANYWHERE, upl->map_object, offset, FALSE, + VM_PROT_DEFAULT, VM_PROT_ALL, VM_INHERIT_DEFAULT); + } + else { + kr = vm_map_enter(map, dst_addr, (vm_map_size_t)size, (vm_map_offset_t) 0, + VM_FLAGS_FIXED, upl->map_object, offset, FALSE, + VM_PROT_DEFAULT, VM_PROT_ALL, VM_INHERIT_DEFAULT); + if(kr) + panic("vm_map_enter failed for a Vector UPL\n"); + } if (kr != KERN_SUCCESS) { upl_unlock(upl); @@ -3863,7 +4268,12 @@ vm_map_enter_upl( cache_attr = ((unsigned int)m->object->wimg_bits) & VM_WIMG_MASK; m->pmapped = TRUE; - m->wpmapped = TRUE; + + /* CODE SIGNING ENFORCEMENT: page has been wpmapped, + * but only in kernel space. If this was on a user map, + * we'd have to set the wpmapped bit. */ + /* m->wpmapped = TRUE; */ + assert(map==kernel_map); PMAP_ENTER(map->pmap, addr, m, VM_PROT_ALL, cache_attr, TRUE); } @@ -3876,8 +4286,13 @@ vm_map_enter_upl( */ upl->ref_count++; upl->flags |= UPL_PAGE_LIST_MAPPED; - upl->kaddr = *dst_addr; - upl_unlock(upl); + upl->kaddr = (vm_offset_t) *dst_addr; + assert(upl->kaddr == *dst_addr); + + if(!isVectorUPL) + upl_unlock(upl); + else + goto process_upl_to_enter; return KERN_SUCCESS; } @@ -3899,11 +4314,55 @@ vm_map_remove_upl( { vm_address_t addr; upl_size_t size; + int isVectorUPL = 0, curr_upl = 0; + upl_t vector_upl = NULL; if (upl == UPL_NULL) return KERN_INVALID_ARGUMENT; - upl_lock(upl); + if((isVectorUPL = vector_upl_is_valid(upl))) { + int unmapped=0, valid_upls=0; + vector_upl = upl; + upl_lock(vector_upl); + for(curr_upl=0; curr_upl < MAX_VECTOR_UPL_ELEMENTS; curr_upl++) { + upl = vector_upl_subupl_byindex(vector_upl, curr_upl ); + if(upl == NULL) + continue; + valid_upls++; + if (!(UPL_PAGE_LIST_MAPPED & upl->flags)) + unmapped++; + } + + if(unmapped) { + if(unmapped != valid_upls) + panic("%d of the %d sub-upls within the Vector UPL is/are not mapped\n", unmapped, valid_upls); + else { + upl_unlock(vector_upl); + return KERN_FAILURE; + } + } + curr_upl=0; + } + else + upl_lock(upl); + +process_upl_to_remove: + if(isVectorUPL) { + if(curr_upl == MAX_VECTOR_UPL_ELEMENTS) { + vm_map_t v_upl_submap; + vm_offset_t v_upl_submap_dst_addr; + vector_upl_get_submap(vector_upl, &v_upl_submap, &v_upl_submap_dst_addr); + + vm_map_remove(map, v_upl_submap_dst_addr, v_upl_submap_dst_addr + vector_upl->size, VM_MAP_NO_FLAGS); + vm_map_deallocate(v_upl_submap); + upl_unlock(vector_upl); + return KERN_SUCCESS; + } + + upl = vector_upl_subupl_byindex(vector_upl, curr_upl++ ); + if(upl == NULL) + goto process_upl_to_remove; + } if (upl->flags & UPL_PAGE_LIST_MAPPED) { addr = upl->kaddr; @@ -3914,20 +4373,110 @@ vm_map_remove_upl( upl->flags &= ~UPL_PAGE_LIST_MAPPED; upl->kaddr = (vm_offset_t) 0; - upl_unlock(upl); - - vm_map_remove(map, - vm_map_trunc_page(addr), - vm_map_round_page(addr + size), - VM_MAP_NO_FLAGS); - - return KERN_SUCCESS; + + if(!isVectorUPL) { + upl_unlock(upl); + + vm_map_remove(map, + vm_map_trunc_page(addr), + vm_map_round_page(addr + size), + VM_MAP_NO_FLAGS); + + return KERN_SUCCESS; + } + else { + /* + * If it's a Vectored UPL, we'll be removing the entire + * submap anyways, so no need to remove individual UPL + * element mappings from within the submap + */ + goto process_upl_to_remove; + } } upl_unlock(upl); return KERN_FAILURE; } +static void +dw_do_work( + vm_object_t object, + struct dw *dwp, + int dw_count) +{ + int j; + boolean_t held_as_spin = TRUE; + + /* + * pageout_scan takes the vm_page_lock_queues first + * then tries for the object lock... to avoid what + * is effectively a lock inversion, we'll go to the + * trouble of taking them in that same order... otherwise + * if this object contains the majority of the pages resident + * in the UBC (or a small set of large objects actively being + * worked on contain the majority of the pages), we could + * cause the pageout_scan thread to 'starve' in its attempt + * to find pages to move to the free queue, since it has to + * successfully acquire the object lock of any candidate page + * before it can steal/clean it. + */ + if (!vm_page_trylockspin_queues()) { + vm_object_unlock(object); + + vm_page_lockspin_queues(); + + for (j = 0; ; j++) { + if (!vm_object_lock_avoid(object) && + _vm_object_lock_try(object)) + break; + vm_page_unlock_queues(); + mutex_pause(j); + vm_page_lockspin_queues(); + } + } + for (j = 0; j < dw_count; j++, dwp++) { + + if (dwp->dw_mask & DW_vm_pageout_throttle_up) + vm_pageout_throttle_up(dwp->dw_m); + + if (dwp->dw_mask & DW_vm_page_wire) + vm_page_wire(dwp->dw_m); + else if (dwp->dw_mask & DW_vm_page_unwire) + vm_page_unwire(dwp->dw_m); + + if (dwp->dw_mask & DW_vm_page_free) { + if (held_as_spin == TRUE) { + vm_page_lockconvert_queues(); + held_as_spin = FALSE; + } + vm_page_free(dwp->dw_m); + } else { + if (dwp->dw_mask & DW_vm_page_deactivate_internal) + vm_page_deactivate_internal(dwp->dw_m, FALSE); + else if (dwp->dw_mask & DW_vm_page_activate) + vm_page_activate(dwp->dw_m); + else if (dwp->dw_mask & DW_vm_page_speculate) + vm_page_speculate(dwp->dw_m, TRUE); + else if (dwp->dw_mask & DW_vm_page_lru) + vm_page_lru(dwp->dw_m); + + if (dwp->dw_mask & DW_set_reference) + dwp->dw_m->reference = TRUE; + else if (dwp->dw_mask & DW_clear_reference) + dwp->dw_m->reference = FALSE; + + if (dwp->dw_mask & DW_clear_busy) + dwp->dw_m->busy = FALSE; + + if (dwp->dw_mask & DW_PAGE_WAKEUP) + PAGE_WAKEUP(dwp->dw_m); + } + } + vm_page_unlock_queues(); +} + + + kern_return_t upl_commit_range( upl_t upl, @@ -3938,17 +4487,20 @@ upl_commit_range( mach_msg_type_number_t count, boolean_t *empty) { - upl_size_t xfer_size; + upl_size_t xfer_size, subupl_size = size; vm_object_t shadow_object; vm_object_t object; vm_object_offset_t target_offset; + upl_offset_t subupl_offset = offset; int entry; wpl_array_t lite_list; int occupied; - int delayed_unlock = 0; int clear_refmod = 0; int pgpgout_count = 0; - int j; + struct dw dw_array[DELAYED_WORK_LIMIT]; + struct dw *dwp; + int dw_count, isVectorUPL = 0; + upl_t vector_upl = NULL; *empty = FALSE; @@ -3958,22 +4510,53 @@ upl_commit_range( if (count == 0) page_list = NULL; + if((isVectorUPL = vector_upl_is_valid(upl))) { + vector_upl = upl; + upl_lock(vector_upl); + } + else + upl_lock(upl); + +process_upl_to_commit: + + if(isVectorUPL) { + size = subupl_size; + offset = subupl_offset; + if(size == 0) { + upl_unlock(vector_upl); + return KERN_SUCCESS; + } + upl = vector_upl_subupl_byoffset(vector_upl, &offset, &size); + if(upl == NULL) { + upl_unlock(vector_upl); + return KERN_FAILURE; + } + page_list = UPL_GET_INTERNAL_PAGE_LIST_SIMPLE(upl); + subupl_size -= size; + subupl_offset += size; + } + +#if UPL_DEBUG + if (upl->upl_commit_index < UPL_DEBUG_COMMIT_RECORDS) { + (void) OSBacktrace(&upl->upl_commit_records[upl->upl_commit_index].c_retaddr[0], UPL_DEBUG_STACK_FRAMES); + + upl->upl_commit_records[upl->upl_commit_index].c_beg = offset; + upl->upl_commit_records[upl->upl_commit_index].c_end = (offset + size); + + upl->upl_commit_index++; + } +#endif if (upl->flags & UPL_DEVICE_MEMORY) xfer_size = 0; else if ((offset + size) <= upl->size) xfer_size = size; - else + else { + if(!isVectorUPL) + upl_unlock(upl); + else { + upl_unlock(vector_upl); + } return KERN_FAILURE; - - upl_lock(upl); - - if (upl->flags & UPL_ACCESS_BLOCKED) { - /* - * We used this UPL to block access to the pages by marking - * them "busy". Now we need to clear the "busy" bit to allow - * access to these pages again. - */ - flags |= UPL_COMMIT_ALLOW_ACCESS; } if (upl->flags & UPL_CLEAR_DIRTY) flags |= UPL_COMMIT_CLEAR_DIRTY; @@ -3995,28 +4578,16 @@ upl_commit_range( entry = offset/PAGE_SIZE; target_offset = (vm_object_offset_t)offset; - /* - * pageout_scan takes the vm_page_lock_queues first - * then tries for the object lock... to avoid what - * is effectively a lock inversion, we'll go to the - * trouble of taking them in that same order... otherwise - * if this object contains the majority of the pages resident - * in the UBC (or a small set of large objects actively being - * worked on contain the majority of the pages), we could - * cause the pageout_scan thread to 'starve' in its attempt - * to find pages to move to the free queue, since it has to - * successfully acquire the object lock of any candidate page - * before it can steal/clean it. - */ - for (j = 0; ; j++) { - vm_page_lock_queues(); + if (upl->flags & UPL_KERNEL_OBJECT) + vm_object_lock_shared(shadow_object); + else + vm_object_lock(shadow_object); - if (vm_object_lock_try(shadow_object)) - break; - vm_page_unlock_queues(); - mutex_pause(j); + if (upl->flags & UPL_ACCESS_BLOCKED) { + assert(shadow_object->blocked_access); + shadow_object->blocked_access = FALSE; + vm_object_wakeup(object, VM_OBJECT_EVENT_UNBLOCKED); } - delayed_unlock = 1; if (shadow_object->code_signed) { /* @@ -4035,20 +4606,28 @@ upl_commit_range( flags &= ~UPL_COMMIT_CS_VALIDATED; } + dwp = &dw_array[0]; + dw_count = 0; + while (xfer_size) { vm_page_t t, m; + dwp->dw_mask = 0; + clear_refmod = 0; + m = VM_PAGE_NULL; if (upl->flags & UPL_LITE) { - int pg_num; + unsigned int pg_num; - pg_num = target_offset/PAGE_SIZE; + pg_num = (unsigned int) (target_offset/PAGE_SIZE); + assert(pg_num == target_offset/PAGE_SIZE); if (lite_list[pg_num>>5] & (1 << (pg_num & 31))) { lite_list[pg_num>>5] &= ~(1 << (pg_num & 31)); - m = vm_page_lookup(shadow_object, target_offset + (upl->offset - shadow_object->paging_offset)); + if (!(upl->flags & UPL_KERNEL_OBJECT)) + m = vm_page_lookup(shadow_object, target_offset + (upl->offset - shadow_object->paging_offset)); } } if (upl->flags & UPL_SHADOWED) { @@ -4056,17 +4635,14 @@ upl_commit_range( t->pageout = FALSE; - vm_page_free(t); + VM_PAGE_FREE(t); if (m == VM_PAGE_NULL) m = vm_page_lookup(shadow_object, target_offset + object->shadow_offset); } } - if (m == VM_PAGE_NULL) { + if ((upl->flags & UPL_KERNEL_OBJECT) || m == VM_PAGE_NULL) goto commit_next_page; - } - - clear_refmod = 0; if (flags & UPL_COMMIT_CS_VALIDATED) { /* @@ -4079,7 +4655,7 @@ upl_commit_range( } if (upl->flags & UPL_IO_WIRE) { - vm_page_unwire(m); + dwp->dw_mask |= DW_vm_page_unwire; if (page_list) page_list[entry].phys_addr = 0; @@ -4088,6 +4664,7 @@ upl_commit_range( m->dirty = TRUE; else if (flags & UPL_COMMIT_CLEAR_DIRTY) { m->dirty = FALSE; + if (! (flags & UPL_COMMIT_CS_VALIDATED) && m->cs_validated && !m->cs_tainted) { /* @@ -4098,24 +4675,24 @@ upl_commit_range( * re-validated. */ m->cs_validated = FALSE; +#if DEVELOPMENT || DEBUG vm_cs_validated_resets++; +#endif + pmap_disconnect(m->phys_page); } clear_refmod |= VM_MEM_MODIFIED; } - - if (flags & UPL_COMMIT_INACTIVATE) - vm_page_deactivate(m); - - if (clear_refmod) - pmap_clear_refmod(m->phys_page, clear_refmod); - - if (flags & UPL_COMMIT_ALLOW_ACCESS) { + if (flags & UPL_COMMIT_INACTIVATE) { + dwp->dw_mask |= DW_vm_page_deactivate_internal; + clear_refmod |= VM_MEM_REFERENCED; + } + if (upl->flags & UPL_ACCESS_BLOCKED) { /* * We blocked access to the pages in this UPL. * Clear the "busy" bit and wake up any waiter * for this page. */ - PAGE_WAKEUP_DONE(m); + dwp->dw_mask |= (DW_clear_busy | DW_PAGE_WAKEUP); } goto commit_next_page; } @@ -4142,37 +4719,36 @@ upl_commit_range( #if DEVELOPMENT || DEBUG vm_cs_validated_resets++; #endif + pmap_disconnect(m->phys_page); } clear_refmod |= VM_MEM_MODIFIED; } - if (clear_refmod) - pmap_clear_refmod(m->phys_page, clear_refmod); - if (page_list) { upl_page_info_t *p; p = &(page_list[entry]); - + if (p->phys_addr && p->pageout && !m->pageout) { m->busy = TRUE; m->pageout = TRUE; - vm_page_wire(m); + + dwp->dw_mask |= DW_vm_page_wire; + } else if (p->phys_addr && !p->pageout && m->pageout && !m->dump_cleaning) { m->pageout = FALSE; m->absent = FALSE; m->overwriting = FALSE; - vm_page_unwire(m); - - PAGE_WAKEUP_DONE(m); + + dwp->dw_mask |= (DW_vm_page_unwire | DW_clear_busy | DW_PAGE_WAKEUP); } page_list[entry].phys_addr = 0; } m->dump_cleaning = FALSE; if (m->laundry) - vm_pageout_throttle_up(m); + dwp->dw_mask |= DW_vm_pageout_throttle_up; if (m->pageout) { m->cleaning = FALSE; @@ -4182,7 +4758,7 @@ upl_commit_range( if (m->wanted) vm_pageout_target_collisions++; #endif m->dirty = FALSE; - + if (! (flags & UPL_COMMIT_CS_VALIDATED) && m->cs_validated && !m->cs_tainted) { /* @@ -4196,11 +4772,13 @@ upl_commit_range( #if DEVELOPMENT || DEBUG vm_cs_validated_resets++; #endif + pmap_disconnect(m->phys_page); } - - if (m->pmapped && (pmap_disconnect(m->phys_page) & VM_MEM_MODIFIED)) + + if ((flags & UPL_COMMIT_SET_DIRTY) || + (m->pmapped && (pmap_disconnect(m->phys_page) & VM_MEM_MODIFIED))) m->dirty = TRUE; - + if (m->dirty) { /* * page was re-dirtied after we started @@ -4208,31 +4786,29 @@ upl_commit_range( * we don't know whether the on-disk * copy matches what is now in memory */ - vm_page_unwire(m); - + dwp->dw_mask |= (DW_vm_page_unwire | DW_clear_busy | DW_PAGE_WAKEUP); + if (upl->flags & UPL_PAGEOUT) { CLUSTER_STAT(vm_pageout_target_page_dirtied++;) VM_STAT_INCR(reactivations); DTRACE_VM2(pgrec, int, 1, (uint64_t *), NULL); } - PAGE_WAKEUP_DONE(m); } else { /* * page has been successfully cleaned * go ahead and free it for other use */ - + if (m->object->internal) { DTRACE_VM2(anonpgout, int, 1, (uint64_t *), NULL); } else { DTRACE_VM2(fspgout, int, 1, (uint64_t *), NULL); } - - vm_page_free(m); - + dwp->dw_mask |= DW_vm_page_free; + if (upl->flags & UPL_PAGEOUT) { CLUSTER_STAT(vm_pageout_target_page_freed++;) - + if (page_list[entry].dirty) { VM_STAT_INCR(pageouts); DTRACE_VM2(pgout, int, 1, (uint64_t *), NULL); @@ -4265,6 +4841,7 @@ upl_commit_range( #if DEVELOPMENT || DEBUG vm_cs_validated_resets++; #endif + pmap_disconnect(m->phys_page); } if ((m->busy) && (m->cleaning)) { @@ -4273,7 +4850,9 @@ upl_commit_range( */ m->absent = FALSE; m->overwriting = FALSE; - m->busy = FALSE; + + dwp->dw_mask |= DW_clear_busy; + } else if (m->overwriting) { /* * alternate request page list, write to @@ -4281,13 +4860,14 @@ upl_commit_range( * page was wired at the time of the list * request */ - assert(m->wire_count != 0); - vm_page_unwire(m);/* reactivates */ + assert(VM_PAGE_WIRED(m)); m->overwriting = FALSE; + + dwp->dw_mask |= DW_vm_page_unwire; /* reactivates */ } m->cleaning = FALSE; m->encrypted_cleaning = FALSE; - + /* * It is a part of the semantic of COPYOUT_FROM * UPLs that a commit implies cache sync @@ -4295,70 +4875,80 @@ upl_commit_range( * this can be used to strip the precious bit * as well as clean */ - if (upl->flags & UPL_PAGE_SYNC_DONE) + if ((upl->flags & UPL_PAGE_SYNC_DONE) || (flags & UPL_COMMIT_CLEAR_PRECIOUS)) m->precious = FALSE; - + if (flags & UPL_COMMIT_SET_DIRTY) m->dirty = TRUE; - + if ((flags & UPL_COMMIT_INACTIVATE) && !m->clustered && !m->speculative) { - vm_page_deactivate(m); + dwp->dw_mask |= DW_vm_page_deactivate_internal; + clear_refmod |= VM_MEM_REFERENCED; + } else if (!m->active && !m->inactive && !m->speculative) { - - if (m->clustered) - vm_page_speculate(m, TRUE); + + if (m->clustered || (flags & UPL_COMMIT_SPECULATE)) + dwp->dw_mask |= DW_vm_page_speculate; else if (m->reference) - vm_page_activate(m); - else - vm_page_deactivate(m); + dwp->dw_mask |= DW_vm_page_activate; + else { + dwp->dw_mask |= DW_vm_page_deactivate_internal; + clear_refmod |= VM_MEM_REFERENCED; + } } - if (flags & UPL_COMMIT_ALLOW_ACCESS) { + if (upl->flags & UPL_ACCESS_BLOCKED) { /* * We blocked access to the pages in this URL. * Clear the "busy" bit on this page before we * wake up any waiter. */ - m->busy = FALSE; + dwp->dw_mask |= DW_clear_busy; } /* * Wakeup any thread waiting for the page to be un-cleaning. */ - PAGE_WAKEUP(m); + dwp->dw_mask |= DW_PAGE_WAKEUP; commit_next_page: + if (clear_refmod) + pmap_clear_refmod(m->phys_page, clear_refmod); + target_offset += PAGE_SIZE_64; xfer_size -= PAGE_SIZE; entry++; - if (delayed_unlock++ > UPL_DELAYED_UNLOCK_LIMIT) { - /* - * pageout_scan takes the vm_page_lock_queues first - * then tries for the object lock... to avoid what - * is effectively a lock inversion, we'll go to the - * trouble of taking them in that same order... otherwise - * if this object contains the majority of the pages resident - * in the UBC (or a small set of large objects actively being - * worked on contain the majority of the pages), we could - * cause the pageout_scan thread to 'starve' in its attempt - * to find pages to move to the free queue, since it has to - * successfully acquire the object lock of any candidate page - * before it can steal/clean it. - */ - vm_object_unlock(shadow_object); - mutex_yield(&vm_page_queue_lock); + if (dwp->dw_mask) { + if (dwp->dw_mask & ~(DW_clear_busy | DW_PAGE_WAKEUP)) { + if (m->busy == FALSE) { + /* + * dw_do_work may need to drop the object lock + * if it does, we need the pages it's looking at to + * be held stable via the busy bit. + */ + m->busy = TRUE; + dwp->dw_mask |= (DW_clear_busy | DW_PAGE_WAKEUP); + } + dwp->dw_m = m; + dwp++; + dw_count++; - for (j = 0; ; j++) { - if (vm_object_lock_try(shadow_object)) - break; - vm_page_unlock_queues(); - mutex_pause(j); - vm_page_lock_queues(); + if (dw_count >= DELAYED_WORK_LIMIT) { + dw_do_work(shadow_object, &dw_array[0], dw_count); + + dwp = &dw_array[0]; + dw_count = 0; + } + } else { + if (dwp->dw_mask & DW_clear_busy) + m->busy = FALSE; + + if (dwp->dw_mask & DW_PAGE_WAKEUP) + PAGE_WAKEUP(m); } - delayed_unlock = 1; } } - if (delayed_unlock) - vm_page_unlock_queues(); + if (dw_count) + dw_do_work(shadow_object, &dw_array[0], dw_count); occupied = 1; @@ -4383,17 +4973,25 @@ upl_commit_range( occupied = 0; } if (occupied == 0) { - if (upl->flags & UPL_COMMIT_NOTIFY_EMPTY) + /* + * If this UPL element belongs to a Vector UPL and is + * empty, then this is the right function to deallocate + * it. So go ahead set the *empty variable. The flag + * UPL_COMMIT_NOTIFY_EMPTY, from the caller's point of view + * should be considered relevant for the Vector UPL and not + * the internal UPLs. + */ + if ((upl->flags & UPL_COMMIT_NOTIFY_EMPTY) || isVectorUPL) *empty = TRUE; - if (object == shadow_object) { + if (object == shadow_object && !(upl->flags & UPL_KERNEL_OBJECT)) { /* * this is not a paging object * so we need to drop the paging reference * that was taken when we created the UPL * against this object */ - vm_object_paging_end(shadow_object); + vm_object_activity_end(shadow_object); } else { /* * we dontated the paging reference to @@ -4405,7 +5003,25 @@ upl_commit_range( vm_object_unlock(shadow_object); if (object != shadow_object) vm_object_unlock(object); - upl_unlock(upl); + + if(!isVectorUPL) + upl_unlock(upl); + else { + /* + * If we completed our operations on an UPL that is + * part of a Vectored UPL and if empty is TRUE, then + * we should go ahead and deallocate this UPL element. + * Then we check if this was the last of the UPL elements + * within that Vectored UPL. If so, set empty to TRUE + * so that in ubc_upl_commit_range or ubc_upl_commit, we + * can go ahead and deallocate the Vector UPL too. + */ + if(*empty==TRUE) { + *empty = vector_upl_set_subupl(vector_upl, upl, 0); + upl_deallocate(upl); + } + goto process_upl_to_commit; + } if (pgpgout_count) { DTRACE_VM2(pgpgout, int, pgpgout_count, (uint64_t *), NULL); @@ -4422,15 +5038,18 @@ upl_abort_range( int error, boolean_t *empty) { - upl_size_t xfer_size; + upl_size_t xfer_size, subupl_size = size; vm_object_t shadow_object; vm_object_t object; vm_object_offset_t target_offset; + upl_offset_t subupl_offset = offset; int entry; wpl_array_t lite_list; int occupied; - int delayed_unlock = 0; - int j; + struct dw dw_array[DELAYED_WORK_LIMIT]; + struct dw *dwp; + int dw_count, isVectorUPL = 0; + upl_t vector_upl = NULL; *empty = FALSE; @@ -4440,15 +5059,56 @@ upl_abort_range( if ( (upl->flags & UPL_IO_WIRE) && !(error & UPL_ABORT_DUMP_PAGES) ) return upl_commit_range(upl, offset, size, 0, NULL, 0, empty); + if((isVectorUPL = vector_upl_is_valid(upl))) { + vector_upl = upl; + upl_lock(vector_upl); + } + else + upl_lock(upl); + +process_upl_to_abort: + if(isVectorUPL) { + size = subupl_size; + offset = subupl_offset; + if(size == 0) { + upl_unlock(vector_upl); + return KERN_SUCCESS; + } + upl = vector_upl_subupl_byoffset(vector_upl, &offset, &size); + if(upl == NULL) { + upl_unlock(vector_upl); + return KERN_FAILURE; + } + subupl_size -= size; + subupl_offset += size; + } + + *empty = FALSE; + +#if UPL_DEBUG + if (upl->upl_commit_index < UPL_DEBUG_COMMIT_RECORDS) { + (void) OSBacktrace(&upl->upl_commit_records[upl->upl_commit_index].c_retaddr[0], UPL_DEBUG_STACK_FRAMES); + + upl->upl_commit_records[upl->upl_commit_index].c_beg = offset; + upl->upl_commit_records[upl->upl_commit_index].c_end = (offset + size); + upl->upl_commit_records[upl->upl_commit_index].c_aborted = 1; + + upl->upl_commit_index++; + } +#endif if (upl->flags & UPL_DEVICE_MEMORY) xfer_size = 0; else if ((offset + size) <= upl->size) xfer_size = size; - else - return KERN_FAILURE; - - upl_lock(upl); + else { + if(!isVectorUPL) + upl_unlock(upl); + else { + upl_unlock(vector_upl); + } + return KERN_FAILURE; + } if (upl->flags & UPL_INTERNAL) { lite_list = (wpl_array_t) ((((uintptr_t)upl) + sizeof(struct upl)) @@ -4468,55 +5128,58 @@ upl_abort_range( entry = offset/PAGE_SIZE; target_offset = (vm_object_offset_t)offset; - /* - * pageout_scan takes the vm_page_lock_queues first - * then tries for the object lock... to avoid what - * is effectively a lock inversion, we'll go to the - * trouble of taking them in that same order... otherwise - * if this object contains the majority of the pages resident - * in the UBC (or a small set of large objects actively being - * worked on contain the majority of the pages), we could - * cause the pageout_scan thread to 'starve' in its attempt - * to find pages to move to the free queue, since it has to - * successfully acquire the object lock of any candidate page - * before it can steal/clean it. - */ - for (j = 0; ; j++) { - vm_page_lock_queues(); + if (upl->flags & UPL_KERNEL_OBJECT) + vm_object_lock_shared(shadow_object); + else + vm_object_lock(shadow_object); - if (vm_object_lock_try(shadow_object)) - break; - vm_page_unlock_queues(); - mutex_pause(j); + if (upl->flags & UPL_ACCESS_BLOCKED) { + assert(shadow_object->blocked_access); + shadow_object->blocked_access = FALSE; + vm_object_wakeup(object, VM_OBJECT_EVENT_UNBLOCKED); } - delayed_unlock = 1; + + dwp = &dw_array[0]; + dw_count = 0; + + if ((error & UPL_ABORT_DUMP_PAGES) && (upl->flags & UPL_KERNEL_OBJECT)) + panic("upl_abort_range: kernel_object being DUMPED"); while (xfer_size) { vm_page_t t, m; + dwp->dw_mask = 0; + m = VM_PAGE_NULL; if (upl->flags & UPL_LITE) { - int pg_num; - pg_num = target_offset/PAGE_SIZE; + unsigned int pg_num; + + pg_num = (unsigned int) (target_offset/PAGE_SIZE); + assert(pg_num == target_offset/PAGE_SIZE); + if (lite_list[pg_num>>5] & (1 << (pg_num & 31))) { lite_list[pg_num>>5] &= ~(1 << (pg_num & 31)); - m = vm_page_lookup(shadow_object, target_offset + - (upl->offset - shadow_object->paging_offset)); + if ( !(upl->flags & UPL_KERNEL_OBJECT)) + m = vm_page_lookup(shadow_object, target_offset + + (upl->offset - shadow_object->paging_offset)); } } if (upl->flags & UPL_SHADOWED) { if ((t = vm_page_lookup(object, target_offset)) != VM_PAGE_NULL) { t->pageout = FALSE; - vm_page_free(t); + VM_PAGE_FREE(t); if (m == VM_PAGE_NULL) m = vm_page_lookup(shadow_object, target_offset + object->shadow_offset); } } + if ((upl->flags & UPL_KERNEL_OBJECT)) + goto abort_next_page; + if (m != VM_PAGE_NULL) { if (m->absent) { @@ -4531,7 +5194,6 @@ upl_abort_range( if (error & UPL_ABORT_RESTART) { m->restart = TRUE; m->absent = FALSE; - m->error = TRUE; m->unusual = TRUE; must_free = FALSE; } else if (error & UPL_ABORT_UNAVAILABLE) { @@ -4558,24 +5220,26 @@ upl_abort_range( m->cleaning = FALSE; m->encrypted_cleaning = FALSE; m->overwriting = FALSE; - PAGE_WAKEUP_DONE(m); + + dwp->dw_mask |= (DW_clear_busy | DW_PAGE_WAKEUP); if (must_free == TRUE) - vm_page_free(m); + dwp->dw_mask |= DW_vm_page_free; else - vm_page_activate(m); + dwp->dw_mask |= DW_vm_page_activate; } else { /* * Handle the trusted pager throttle. */ if (m->laundry) - vm_pageout_throttle_up(m); + dwp->dw_mask |= DW_vm_pageout_throttle_up; if (m->pageout) { assert(m->busy); assert(m->wire_count == 1); m->pageout = FALSE; - vm_page_unwire(m); + + dwp->dw_mask |= DW_vm_page_unwire; } m->dump_cleaning = FALSE; m->cleaning = FALSE; @@ -4586,7 +5250,8 @@ upl_abort_range( #endif /* MACH_PAGEMAP */ if (error & UPL_ABORT_DUMP_PAGES) { pmap_disconnect(m->phys_page); - vm_page_free(m); + + dwp->dw_mask |= DW_vm_page_free; } else { if (error & UPL_ABORT_REFERENCE) { /* @@ -4595,44 +5260,49 @@ upl_abort_range( * file I/O, this is done by * implementing an LRU on the inactive q */ - vm_page_lru(m); + dwp->dw_mask |= DW_vm_page_lru; } - PAGE_WAKEUP_DONE(m); + dwp->dw_mask |= (DW_clear_busy | DW_PAGE_WAKEUP); } } } - if (delayed_unlock++ > UPL_DELAYED_UNLOCK_LIMIT) { - /* - * pageout_scan takes the vm_page_lock_queues first - * then tries for the object lock... to avoid what - * is effectively a lock inversion, we'll go to the - * trouble of taking them in that same order... otherwise - * if this object contains the majority of the pages resident - * in the UBC (or a small set of large objects actively being - * worked on contain the majority of the pages), we could - * cause the pageout_scan thread to 'starve' in its attempt - * to find pages to move to the free queue, since it has to - * successfully acquire the object lock of any candidate page - * before it can steal/clean it. - */ - vm_object_unlock(shadow_object); - mutex_yield(&vm_page_queue_lock); - - for (j = 0; ; j++) { - if (vm_object_lock_try(shadow_object)) - break; - vm_page_unlock_queues(); - mutex_pause(j); - vm_page_lock_queues(); - } - delayed_unlock = 1; - } +abort_next_page: target_offset += PAGE_SIZE_64; xfer_size -= PAGE_SIZE; entry++; + + if (dwp->dw_mask) { + if (dwp->dw_mask & ~(DW_clear_busy | DW_PAGE_WAKEUP)) { + if (m->busy == FALSE) { + /* + * dw_do_work may need to drop the object lock + * if it does, we need the pages it's looking at to + * be held stable via the busy bit. + */ + m->busy = TRUE; + dwp->dw_mask |= (DW_clear_busy | DW_PAGE_WAKEUP); + } + dwp->dw_m = m; + dwp++; + dw_count++; + + if (dw_count >= DELAYED_WORK_LIMIT) { + dw_do_work(shadow_object, &dw_array[0], dw_count); + + dwp = &dw_array[0]; + dw_count = 0; + } + } else { + if (dwp->dw_mask & DW_clear_busy) + m->busy = FALSE; + + if (dwp->dw_mask & DW_PAGE_WAKEUP) + PAGE_WAKEUP(m); + } + } } - if (delayed_unlock) - vm_page_unlock_queues(); + if (dw_count) + dw_do_work(shadow_object, &dw_array[0], dw_count); occupied = 1; @@ -4657,17 +5327,25 @@ upl_abort_range( occupied = 0; } if (occupied == 0) { - if (upl->flags & UPL_COMMIT_NOTIFY_EMPTY) + /* + * If this UPL element belongs to a Vector UPL and is + * empty, then this is the right function to deallocate + * it. So go ahead set the *empty variable. The flag + * UPL_COMMIT_NOTIFY_EMPTY, from the caller's point of view + * should be considered relevant for the Vector UPL and + * not the internal UPLs. + */ + if ((upl->flags & UPL_COMMIT_NOTIFY_EMPTY) || isVectorUPL) *empty = TRUE; - if (object == shadow_object) { + if (object == shadow_object && !(upl->flags & UPL_KERNEL_OBJECT)) { /* * this is not a paging object * so we need to drop the paging reference * that was taken when we created the UPL * against this object */ - vm_object_paging_end(shadow_object); + vm_object_activity_end(shadow_object); } else { /* * we dontated the paging reference to @@ -4679,7 +5357,25 @@ upl_abort_range( vm_object_unlock(shadow_object); if (object != shadow_object) vm_object_unlock(object); - upl_unlock(upl); + + if(!isVectorUPL) + upl_unlock(upl); + else { + /* + * If we completed our operations on an UPL that is + * part of a Vectored UPL and if empty is TRUE, then + * we should go ahead and deallocate this UPL element. + * Then we check if this was the last of the UPL elements + * within that Vectored UPL. If so, set empty to TRUE + * so that in ubc_upl_abort_range or ubc_upl_abort, we + * can go ahead and deallocate the Vector UPL too. + */ + if(*empty == TRUE) { + *empty = vector_upl_set_subupl(vector_upl, upl,0); + upl_deallocate(upl); + } + goto process_upl_to_abort; + } return KERN_SUCCESS; } @@ -4709,6 +5405,8 @@ upl_commit( } +unsigned int vm_object_iopl_request_sleep_for_cleaning = 0; + kern_return_t vm_object_iopl_request( vm_object_t object, @@ -4725,13 +5423,15 @@ vm_object_iopl_request( upl_t upl = NULL; unsigned int entry; wpl_array_t lite_list = NULL; - int delayed_unlock = 0; int no_zero_fill = FALSE; u_int32_t psize; kern_return_t ret; vm_prot_t prot; struct vm_object_fault_info fault_info; - + struct dw dw_array[DELAYED_WORK_LIMIT]; + struct dw *dwp; + int dw_count; + int dw_index; if (cntrl_flags & ~UPL_VALID_FLAGS) { /* @@ -4773,8 +5473,8 @@ vm_object_iopl_request( else prot = VM_PROT_READ | VM_PROT_WRITE; - if (((size/page_size) > MAX_UPL_SIZE) && !object->phys_contiguous) - size = MAX_UPL_SIZE * page_size; + if (((size/PAGE_SIZE) > MAX_UPL_SIZE) && !object->phys_contiguous) + size = MAX_UPL_SIZE * PAGE_SIZE; if (cntrl_flags & UPL_SET_INTERNAL) { if (page_list_count != NULL) @@ -4799,10 +5499,17 @@ vm_object_iopl_request( user_page_list = (upl_page_info_t *) (((uintptr_t)upl) + sizeof(struct upl)); lite_list = (wpl_array_t) (((uintptr_t)user_page_list) + ((psize / PAGE_SIZE) * sizeof(upl_page_info_t))); + if (size == 0) { + user_page_list = NULL; + lite_list = NULL; + } } else { upl = upl_create(UPL_CREATE_LITE, UPL_IO_WIRE, psize); lite_list = (wpl_array_t) (((uintptr_t)upl) + sizeof(struct upl)); + if (size == 0) { + lite_list = NULL; + } } if (user_page_list) user_page_list[0].device = FALSE; @@ -4811,18 +5518,41 @@ vm_object_iopl_request( upl->map_object = object; upl->size = size; - vm_object_lock(object); - vm_object_paging_begin(object); + if (object == kernel_object && + !(cntrl_flags & (UPL_NEED_32BIT_ADDR | UPL_BLOCK_ACCESS))) { + upl->flags |= UPL_KERNEL_OBJECT; +#if UPL_DEBUG + vm_object_lock(object); +#else + vm_object_lock_shared(object); +#endif + } else { + vm_object_lock(object); + vm_object_activity_begin(object); + } /* * paging in progress also protects the paging_offset */ upl->offset = offset + object->paging_offset; + if (cntrl_flags & UPL_BLOCK_ACCESS) { + /* + * The user requested that access to the pages in this URL + * be blocked until the UPL is commited or aborted. + */ + upl->flags |= UPL_ACCESS_BLOCKED; + } + if (object->phys_contiguous) { -#ifdef UPL_DEBUG +#if UPL_DEBUG queue_enter(&object->uplq, upl, upl_t, uplq); #endif /* UPL_DEBUG */ + if (upl->flags & UPL_ACCESS_BLOCKED) { + assert(!object->blocked_access); + object->blocked_access = TRUE; + } + vm_object_unlock(object); /* @@ -4831,10 +5561,10 @@ vm_object_iopl_request( */ upl->flags |= UPL_DEVICE_MEMORY; - upl->highest_page = (offset + object->shadow_offset + size - 1)>>PAGE_SHIFT; + upl->highest_page = (ppnum_t) ((offset + object->shadow_offset + size - 1)>>PAGE_SHIFT); if (user_page_list) { - user_page_list[0].phys_addr = (offset + object->shadow_offset)>>PAGE_SHIFT; + user_page_list[0].phys_addr = (ppnum_t) ((offset + object->shadow_offset)>>PAGE_SHIFT); user_page_list[0].device = TRUE; } if (page_list_count != NULL) { @@ -4845,25 +5575,54 @@ vm_object_iopl_request( } return KERN_SUCCESS; } - /* - * Protect user space from future COW operations - */ - object->true_share = TRUE; + if (object != kernel_object) { + /* + * Protect user space from future COW operations + */ + object->true_share = TRUE; - if (object->copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC) - object->copy_strategy = MEMORY_OBJECT_COPY_DELAY; + if (object->copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC) + object->copy_strategy = MEMORY_OBJECT_COPY_DELAY; + } -#ifdef UPL_DEBUG +#if UPL_DEBUG queue_enter(&object->uplq, upl, upl_t, uplq); #endif /* UPL_DEBUG */ - if (cntrl_flags & UPL_BLOCK_ACCESS) { + if (!(cntrl_flags & UPL_COPYOUT_FROM) && + object->copy != VM_OBJECT_NULL) { /* - * The user requested that access to the pages in this URL - * be blocked until the UPL is commited or aborted. + * Honor copy-on-write obligations + * + * The caller is gathering these pages and + * might modify their contents. We need to + * make sure that the copy object has its own + * private copies of these pages before we let + * the caller modify them. + * + * NOTE: someone else could map the original object + * after we've done this copy-on-write here, and they + * could then see an inconsistent picture of the memory + * while it's being modified via the UPL. To prevent this, + * we would have to block access to these pages until the + * UPL is released. We could use the UPL_BLOCK_ACCESS + * code path for that... */ - upl->flags |= UPL_ACCESS_BLOCKED; + vm_object_update(object, + offset, + size, + NULL, + NULL, + FALSE, /* should_return */ + MEMORY_OBJECT_COPY_SYNC, + VM_PROT_NO_CHANGE); +#if DEVELOPMENT || DEBUG + iopl_cow++; + iopl_cow_pages += size >> PAGE_SHIFT; +#endif } + + entry = 0; xfer_size = size; @@ -4874,10 +5633,16 @@ vm_object_iopl_request( fault_info.lo_offset = offset; fault_info.hi_offset = offset + xfer_size; fault_info.no_cache = FALSE; + fault_info.stealth = FALSE; + + dwp = &dw_array[0]; + dw_count = 0; while (xfer_size) { vm_fault_return_t result; - int pg_num; + unsigned int pg_num; + + dwp->dw_mask = 0; dst_page = vm_page_lookup(object, dst_offset); @@ -4886,22 +5651,22 @@ vm_object_iopl_request( * If the page is encrypted, we need to decrypt it, * so force a soft page fault. */ - if ((dst_page == VM_PAGE_NULL) || (dst_page->busy) || - (dst_page->encrypted) || - (dst_page->unusual && (dst_page->error || - dst_page->restart || - dst_page->absent || - dst_page->fictitious))) { + if (dst_page == VM_PAGE_NULL || + dst_page->busy || + dst_page->encrypted || + dst_page->error || + dst_page->restart || + dst_page->absent || + dst_page->fictitious) { + + if (object == kernel_object) + panic("vm_object_iopl_request: missing/bad page in kernel object\n"); do { vm_page_t top_page; kern_return_t error_code; int interruptible; - if (delayed_unlock) { - delayed_unlock = 0; - vm_page_unlock_queues(); - } if (cntrl_flags & UPL_SET_INTERRUPTIBLE) interruptible = THREAD_ABORTSAFE; else @@ -4910,6 +5675,8 @@ vm_object_iopl_request( fault_info.interruptible = interruptible; fault_info.cluster_size = xfer_size; + vm_object_paging_begin(object); + result = vm_fault_page(object, dst_offset, prot | VM_PROT_WRITE, FALSE, &prot, &dst_page, &top_page, @@ -4941,24 +5708,22 @@ vm_object_iopl_request( vm_object_paging_end(local_object); } } + vm_object_paging_end(object); break; case VM_FAULT_RETRY: vm_object_lock(object); - vm_object_paging_begin(object); break; case VM_FAULT_FICTITIOUS_SHORTAGE: vm_page_more_fictitious(); vm_object_lock(object); - vm_object_paging_begin(object); break; case VM_FAULT_MEMORY_SHORTAGE: if (vm_page_wait(interruptible)) { vm_object_lock(object); - vm_object_paging_begin(object); break; } /* fall thru */ @@ -4966,15 +5731,44 @@ vm_object_iopl_request( case VM_FAULT_INTERRUPTED: error_code = MACH_SEND_INTERRUPTED; case VM_FAULT_MEMORY_ERROR: + memory_error: ret = (error_code ? error_code: KERN_MEMORY_ERROR); vm_object_lock(object); - vm_object_paging_begin(object); goto return_err; + + case VM_FAULT_SUCCESS_NO_VM_PAGE: + /* success but no page: fail */ + vm_object_paging_end(object); + vm_object_unlock(object); + goto memory_error; + + default: + panic("vm_object_iopl_request: unexpected error" + " 0x%x from vm_fault_page()\n", result); } } while (result != VM_FAULT_SUCCESS); + } + if (upl->flags & UPL_KERNEL_OBJECT) + goto record_phys_addr; + + if (dst_page->cleaning) { + /* + * Someone else is cleaning this page in place.as + * In theory, we should be able to proceed and use this + * page but they'll probably end up clearing the "busy" + * bit on it in upl_commit_range() but they didn't set + * it, so they would clear our "busy" bit and open + * us to race conditions. + * We'd better wait for the cleaning to complete and + * then try again. + */ + vm_object_iopl_request_sleep_for_cleaning++; + PAGE_SLEEP(object, dst_page, THREAD_UNINT); + continue; + } if ( (cntrl_flags & UPL_NEED_32BIT_ADDR) && dst_page->phys_page >= (max_valid_dma_address >> PAGE_SHIFT) ) { vm_page_t low_page; @@ -4988,14 +5782,10 @@ vm_object_iopl_request( * we don't know whether that physical address has been * handed out to some other 64 bit capable DMA device to use */ - if (dst_page->wire_count) { + if (VM_PAGE_WIRED(dst_page)) { ret = KERN_PROTECTION_FAILURE; goto return_err; } - if (delayed_unlock) { - delayed_unlock = 0; - vm_page_unlock_queues(); - } low_page = vm_page_grablo(); if (low_page == VM_PAGE_NULL) { @@ -5023,13 +5813,7 @@ vm_object_iopl_request( if (refmod & VM_MEM_MODIFIED) low_page->dirty = TRUE; - vm_page_lock_queues(); vm_page_replace(low_page, object, dst_offset); - /* - * keep the queue lock since we're going to - * need it immediately - */ - delayed_unlock = 1; dst_page = low_page; /* @@ -5039,10 +5823,7 @@ vm_object_iopl_request( */ dst_page->busy = FALSE; } - if (delayed_unlock == 0) - vm_page_lock_queues(); - - vm_page_wire(dst_page); + dwp->dw_mask |= DW_vm_page_wire; if (cntrl_flags & UPL_BLOCK_ACCESS) { /* @@ -5053,17 +5834,18 @@ vm_object_iopl_request( assert(!dst_page->fictitious); dst_page->busy = TRUE; } - pg_num = (dst_offset-offset)/PAGE_SIZE; - lite_list[pg_num>>5] |= 1 << (pg_num & 31); - /* * expect the page to be used * page queues lock must be held to set 'reference' */ - dst_page->reference = TRUE; + dwp->dw_mask |= DW_set_reference; if (!(cntrl_flags & UPL_COPYOUT_FROM)) dst_page->dirty = TRUE; +record_phys_addr: + pg_num = (unsigned int) ((dst_offset-offset)/PAGE_SIZE); + assert(pg_num == (dst_offset-offset)/PAGE_SIZE); + lite_list[pg_num>>5] |= 1 << (pg_num & 31); if (dst_page->phys_page > upl->highest_page) upl->highest_page = dst_page->phys_page; @@ -5082,23 +5864,42 @@ vm_object_iopl_request( user_page_list[entry].cs_validated = dst_page->cs_validated; user_page_list[entry].cs_tainted = dst_page->cs_tainted; } - /* - * someone is explicitly grabbing this page... - * update clustered and speculative state - * - */ - VM_PAGE_CONSUME_CLUSTERED(dst_page); - - if (delayed_unlock++ > UPL_DELAYED_UNLOCK_LIMIT) { - mutex_yield(&vm_page_queue_lock); - delayed_unlock = 1; + if (object != kernel_object) { + /* + * someone is explicitly grabbing this page... + * update clustered and speculative state + * + */ + VM_PAGE_CONSUME_CLUSTERED(dst_page); } entry++; dst_offset += PAGE_SIZE_64; xfer_size -= PAGE_SIZE; + + if (dwp->dw_mask) { + if (dst_page->busy == FALSE) { + /* + * dw_do_work may need to drop the object lock + * if it does, we need the pages it's looking at to + * be held stable via the busy bit. + */ + dst_page->busy = TRUE; + dwp->dw_mask |= (DW_clear_busy | DW_PAGE_WAKEUP); + } + dwp->dw_m = dst_page; + dwp++; + dw_count++; + + if (dw_count >= DELAYED_WORK_LIMIT) { + dw_do_work(object, &dw_array[0], dw_count); + + dwp = &dw_array[0]; + dw_count = 0; + } + } } - if (delayed_unlock) - vm_page_unlock_queues(); + if (dw_count) + dw_do_work(object, &dw_array[0], dw_count); if (page_list_count != NULL) { if (upl->flags & UPL_INTERNAL) @@ -5117,12 +5918,13 @@ vm_object_iopl_request( */ vm_object_pmap_protect(object, offset, (vm_object_size_t)size, PMAP_NULL, 0, VM_PROT_NONE); + assert(!object->blocked_access); + object->blocked_access = TRUE; } return KERN_SUCCESS; return_err: - if (delayed_unlock) - vm_page_unlock_queues(); + dw_index = 0; for (; offset < dst_offset; offset += PAGE_SIZE) { dst_page = vm_page_lookup(object, offset); @@ -5130,13 +5932,25 @@ vm_object_iopl_request( if (dst_page == VM_PAGE_NULL) panic("vm_object_iopl_request: Wired pages missing. \n"); + if (dw_count) { + if (dw_array[dw_index].dw_m == dst_page) { + dw_index++; + dw_count--; + continue; + } + } vm_page_lockspin_queues(); vm_page_unwire(dst_page); vm_page_unlock_queues(); VM_STAT_INCR(reactivations); } - vm_object_paging_end(object); +#if UPL_DEBUG + upl->upl_state = 2; +#endif + if (! (upl->flags & UPL_KERNEL_OBJECT)) { + vm_object_activity_end(object); + } vm_object_unlock(object); upl_destroy(upl); @@ -5152,7 +5966,7 @@ upl_transpose( boolean_t upls_locked; vm_object_t object1, object2; - if (upl1 == UPL_NULL || upl2 == UPL_NULL || upl1 == upl2) { + if (upl1 == UPL_NULL || upl2 == UPL_NULL || upl1 == upl2 || ((upl1->flags & UPL_VECTOR)==UPL_VECTOR) || ((upl2->flags & UPL_VECTOR)==UPL_VECTOR)) { return KERN_INVALID_ARGUMENT; } @@ -5197,13 +6011,13 @@ upl_transpose( * Make each UPL point to the correct VM object, i.e. the * object holding the pages that the UPL refers to... */ -#ifdef UPL_DEBUG +#if UPL_DEBUG queue_remove(&object1->uplq, upl1, upl_t, uplq); queue_remove(&object2->uplq, upl2, upl_t, uplq); #endif upl1->map_object = object2; upl2->map_object = object1; -#ifdef UPL_DEBUG +#if UPL_DEBUG queue_enter(&object1->uplq, upl2, upl_t, uplq); queue_enter(&object2->uplq, upl1, upl_t, uplq); #endif @@ -5302,8 +6116,7 @@ vm_paging_map_init(void) panic("vm_paging_map_init: kernel_map full\n"); } map_entry->object.vm_object = kernel_object; - map_entry->offset = - page_map_offset - VM_MIN_KERNEL_ADDRESS; + map_entry->offset = page_map_offset; vm_object_reference(kernel_object); vm_map_unlock(kernel_map); @@ -5517,7 +6330,7 @@ vm_paging_map_object( } vm_paging_objects_mapped_slow++; - vm_paging_pages_mapped_slow += map_size / PAGE_SIZE_64; + vm_paging_pages_mapped_slow += (unsigned long) (map_size / PAGE_SIZE_64); return KERN_SUCCESS; } @@ -5564,7 +6377,8 @@ vm_paging_unmap_object( * for next time. */ assert(end - start == PAGE_SIZE); - i = (start - vm_paging_base_address) >> PAGE_SHIFT; + i = (int) ((start - vm_paging_base_address) >> PAGE_SHIFT); + assert(i >= 0 && i < VM_PAGING_NUM_PAGES); /* undo the pmap mapping */ pmap_remove(kernel_pmap, start, end); @@ -5598,8 +6412,6 @@ unsigned char swap_crypt_test_page_encrypt[4096] __attribute__((aligned(4096))); unsigned char swap_crypt_test_page_decrypt[4096] __attribute__((aligned(4096))); #endif /* DEBUG */ -extern u_long random(void); - /* * Initialize the encryption context: key and key size. */ @@ -5933,19 +6745,8 @@ vm_page_decrypt( * and the decryption doesn't count. */ page->dirty = FALSE; - if (page->cs_validated && !page->cs_tainted) { - /* - * CODE SIGNING: - * This page is no longer dirty - * but could have been modified, - * so it will need to be - * re-validated. - */ - page->cs_validated = FALSE; - vm_cs_validated_resets++; - } + assert (page->cs_validated == FALSE); pmap_clear_refmod(page->phys_page, VM_MEM_MODIFIED | VM_MEM_REFERENCED); - page->encrypted = FALSE; /* @@ -5969,8 +6770,10 @@ vm_page_decrypt( vm_object_paging_end(page->object); } +#if DEVELOPMENT || DEBUG unsigned long upl_encrypt_upls = 0; unsigned long upl_encrypt_pages = 0; +#endif /* * ENCRYPTED SWAP: @@ -5985,18 +6788,36 @@ upl_encrypt( upl_offset_t crypt_offset, upl_size_t crypt_size) { - upl_size_t upl_size; - upl_offset_t upl_offset; + upl_size_t upl_size, subupl_size=crypt_size; + upl_offset_t offset_in_upl, subupl_offset=crypt_offset; vm_object_t upl_object; + vm_object_offset_t upl_offset; vm_page_t page; vm_object_t shadow_object; vm_object_offset_t shadow_offset; vm_object_offset_t paging_offset; vm_object_offset_t base_offset; + int isVectorUPL = 0; + upl_t vector_upl = NULL; + + if((isVectorUPL = vector_upl_is_valid(upl))) + vector_upl = upl; + +process_upl_to_encrypt: + if(isVectorUPL) { + crypt_size = subupl_size; + crypt_offset = subupl_offset; + upl = vector_upl_subupl_byoffset(vector_upl, &crypt_offset, &crypt_size); + if(upl == NULL) + panic("upl_encrypt: Accessing a sub-upl that doesn't exist\n"); + subupl_size -= crypt_size; + subupl_offset += crypt_size; + } +#if DEVELOPMENT || DEBUG upl_encrypt_upls++; upl_encrypt_pages += crypt_size / PAGE_SIZE; - +#endif upl_object = upl->map_object; upl_offset = upl->offset; upl_size = upl->size; @@ -6035,17 +6856,17 @@ upl_encrypt( assert(crypt_offset + crypt_size <= upl_size); - for (upl_offset = 0; - upl_offset < crypt_size; - upl_offset += PAGE_SIZE) { + for (offset_in_upl = 0; + offset_in_upl < crypt_size; + offset_in_upl += PAGE_SIZE) { page = vm_page_lookup(shadow_object, - base_offset + upl_offset); + base_offset + offset_in_upl); if (page == VM_PAGE_NULL) { panic("upl_encrypt: " "no page for (obj=%p,off=%lld+%d)!\n", shadow_object, base_offset, - upl_offset); + offset_in_upl); } /* * Disconnect the page from all pmaps, so that nobody can @@ -6058,7 +6879,7 @@ upl_encrypt( pmap_disconnect(page->phys_page); vm_page_encrypt(page, 0); - if (shadow_object == vm_pageout_scan_wants_object) { + if (vm_object_lock_avoid(shadow_object)) { /* * Give vm_pageout_scan() a chance to convert more * pages from "clean-in-place" to "clean-and-free", @@ -6066,12 +6887,16 @@ upl_encrypt( * in this cluster. */ vm_object_unlock(shadow_object); + mutex_pause(2); vm_object_lock(shadow_object); } } vm_object_paging_end(shadow_object); vm_object_unlock(shadow_object); + + if(isVectorUPL && subupl_size) + goto process_upl_to_encrypt; } #else /* CRYPTO */ @@ -6099,6 +6924,352 @@ vm_page_decrypt( #endif /* CRYPTO */ +void +vm_pageout_queue_steal(vm_page_t page, boolean_t queues_locked) +{ + page->list_req_pending = FALSE; + page->cleaning = FALSE; + page->pageout = FALSE; + + if (!queues_locked) { + vm_page_lockspin_queues(); + } + + /* + * need to drop the laundry count... + * we may also need to remove it + * from the I/O paging queue... + * vm_pageout_throttle_up handles both cases + * + * the laundry and pageout_queue flags are cleared... + */ + vm_pageout_throttle_up(page); + + /* + * toss the wire count we picked up + * when we intially set this page up + * to be cleaned... + */ + vm_page_unwire(page); + + vm_page_steal_pageout_page++; + + if (!queues_locked) { + vm_page_unlock_queues(); + } +} + +upl_t +vector_upl_create(vm_offset_t upl_offset) +{ + int vector_upl_size = sizeof(struct _vector_upl); + int i=0; + upl_t upl; + vector_upl_t vector_upl = (vector_upl_t)kalloc(vector_upl_size); + + upl = upl_create(0,UPL_VECTOR,0); + upl->vector_upl = vector_upl; + upl->offset = upl_offset; + vector_upl->size = 0; + vector_upl->offset = upl_offset; + vector_upl->invalid_upls=0; + vector_upl->num_upls=0; + vector_upl->pagelist = NULL; + + for(i=0; i < MAX_VECTOR_UPL_ELEMENTS ; i++) { + vector_upl->upl_iostates[i].size = 0; + vector_upl->upl_iostates[i].offset = 0; + + } + return upl; +} + +void +vector_upl_deallocate(upl_t upl) +{ + if(upl) { + vector_upl_t vector_upl = upl->vector_upl; + if(vector_upl) { + if(vector_upl->invalid_upls != vector_upl->num_upls) + panic("Deallocating non-empty Vectored UPL\n"); + kfree(vector_upl->pagelist,(sizeof(struct upl_page_info)*(vector_upl->size/PAGE_SIZE))); + vector_upl->invalid_upls=0; + vector_upl->num_upls = 0; + vector_upl->pagelist = NULL; + vector_upl->size = 0; + vector_upl->offset = 0; + kfree(vector_upl, sizeof(struct _vector_upl)); + vector_upl = (vector_upl_t)0xdeadbeef; + } + else + panic("vector_upl_deallocate was passed a non-vectored upl\n"); + } + else + panic("vector_upl_deallocate was passed a NULL upl\n"); +} + +boolean_t +vector_upl_is_valid(upl_t upl) +{ + if(upl && ((upl->flags & UPL_VECTOR)==UPL_VECTOR)) { + vector_upl_t vector_upl = upl->vector_upl; + if(vector_upl == NULL || vector_upl == (vector_upl_t)0xdeadbeef || vector_upl == (vector_upl_t)0xfeedbeef) + return FALSE; + else + return TRUE; + } + return FALSE; +} + +boolean_t +vector_upl_set_subupl(upl_t upl,upl_t subupl, uint32_t io_size) +{ + if(vector_upl_is_valid(upl)) { + vector_upl_t vector_upl = upl->vector_upl; + + if(vector_upl) { + if(subupl) { + if(io_size) { + if(io_size < PAGE_SIZE) + io_size = PAGE_SIZE; + subupl->vector_upl = (void*)vector_upl; + vector_upl->upl_elems[vector_upl->num_upls++] = subupl; + vector_upl->size += io_size; + upl->size += io_size; + } + else { + uint32_t i=0,invalid_upls=0; + for(i = 0; i < vector_upl->num_upls; i++) { + if(vector_upl->upl_elems[i] == subupl) + break; + } + if(i == vector_upl->num_upls) + panic("Trying to remove sub-upl when none exists"); + + vector_upl->upl_elems[i] = NULL; + invalid_upls = hw_atomic_add(&(vector_upl)->invalid_upls, 1); + if(invalid_upls == vector_upl->num_upls) + return TRUE; + else + return FALSE; + } + } + else + panic("vector_upl_set_subupl was passed a NULL upl element\n"); + } + else + panic("vector_upl_set_subupl was passed a non-vectored upl\n"); + } + else + panic("vector_upl_set_subupl was passed a NULL upl\n"); + + return FALSE; +} + +void +vector_upl_set_pagelist(upl_t upl) +{ + if(vector_upl_is_valid(upl)) { + uint32_t i=0; + vector_upl_t vector_upl = upl->vector_upl; + + if(vector_upl) { + vm_offset_t pagelist_size=0, cur_upl_pagelist_size=0; + + vector_upl->pagelist = (upl_page_info_array_t)kalloc(sizeof(struct upl_page_info)*(vector_upl->size/PAGE_SIZE)); + + for(i=0; i < vector_upl->num_upls; i++) { + cur_upl_pagelist_size = sizeof(struct upl_page_info) * vector_upl->upl_elems[i]->size/PAGE_SIZE; + bcopy(UPL_GET_INTERNAL_PAGE_LIST_SIMPLE(vector_upl->upl_elems[i]), (char*)vector_upl->pagelist + pagelist_size, cur_upl_pagelist_size); + pagelist_size += cur_upl_pagelist_size; + if(vector_upl->upl_elems[i]->highest_page > upl->highest_page) + upl->highest_page = vector_upl->upl_elems[i]->highest_page; + } + assert( pagelist_size == (sizeof(struct upl_page_info)*(vector_upl->size/PAGE_SIZE)) ); + } + else + panic("vector_upl_set_pagelist was passed a non-vectored upl\n"); + } + else + panic("vector_upl_set_pagelist was passed a NULL upl\n"); + +} + +upl_t +vector_upl_subupl_byindex(upl_t upl, uint32_t index) +{ + if(vector_upl_is_valid(upl)) { + vector_upl_t vector_upl = upl->vector_upl; + if(vector_upl) { + if(index < vector_upl->num_upls) + return vector_upl->upl_elems[index]; + } + else + panic("vector_upl_subupl_byindex was passed a non-vectored upl\n"); + } + return NULL; +} + +upl_t +vector_upl_subupl_byoffset(upl_t upl, upl_offset_t *upl_offset, upl_size_t *upl_size) +{ + if(vector_upl_is_valid(upl)) { + uint32_t i=0; + vector_upl_t vector_upl = upl->vector_upl; + + if(vector_upl) { + upl_t subupl = NULL; + vector_upl_iostates_t subupl_state; + + for(i=0; i < vector_upl->num_upls; i++) { + subupl = vector_upl->upl_elems[i]; + subupl_state = vector_upl->upl_iostates[i]; + if( *upl_offset <= (subupl_state.offset + subupl_state.size - 1)) { + /* We could have been passed an offset/size pair that belongs + * to an UPL element that has already been committed/aborted. + * If so, return NULL. + */ + if(subupl == NULL) + return NULL; + if((subupl_state.offset + subupl_state.size) < (*upl_offset + *upl_size)) { + *upl_size = (subupl_state.offset + subupl_state.size) - *upl_offset; + if(*upl_size > subupl_state.size) + *upl_size = subupl_state.size; + } + if(*upl_offset >= subupl_state.offset) + *upl_offset -= subupl_state.offset; + else if(i) + panic("Vector UPL offset miscalculation\n"); + return subupl; + } + } + } + else + panic("vector_upl_subupl_byoffset was passed a non-vectored UPL\n"); + } + return NULL; +} + +void +vector_upl_get_submap(upl_t upl, vm_map_t *v_upl_submap, vm_offset_t *submap_dst_addr) +{ + *v_upl_submap = NULL; + + if(vector_upl_is_valid(upl)) { + vector_upl_t vector_upl = upl->vector_upl; + if(vector_upl) { + *v_upl_submap = vector_upl->submap; + *submap_dst_addr = vector_upl->submap_dst_addr; + } + else + panic("vector_upl_get_submap was passed a non-vectored UPL\n"); + } + else + panic("vector_upl_get_submap was passed a null UPL\n"); +} + +void +vector_upl_set_submap(upl_t upl, vm_map_t submap, vm_offset_t submap_dst_addr) +{ + if(vector_upl_is_valid(upl)) { + vector_upl_t vector_upl = upl->vector_upl; + if(vector_upl) { + vector_upl->submap = submap; + vector_upl->submap_dst_addr = submap_dst_addr; + } + else + panic("vector_upl_get_submap was passed a non-vectored UPL\n"); + } + else + panic("vector_upl_get_submap was passed a NULL UPL\n"); +} + +void +vector_upl_set_iostate(upl_t upl, upl_t subupl, upl_offset_t offset, upl_size_t size) +{ + if(vector_upl_is_valid(upl)) { + uint32_t i = 0; + vector_upl_t vector_upl = upl->vector_upl; + + if(vector_upl) { + for(i = 0; i < vector_upl->num_upls; i++) { + if(vector_upl->upl_elems[i] == subupl) + break; + } + + if(i == vector_upl->num_upls) + panic("setting sub-upl iostate when none exists"); + + vector_upl->upl_iostates[i].offset = offset; + if(size < PAGE_SIZE) + size = PAGE_SIZE; + vector_upl->upl_iostates[i].size = size; + } + else + panic("vector_upl_set_iostate was passed a non-vectored UPL\n"); + } + else + panic("vector_upl_set_iostate was passed a NULL UPL\n"); +} + +void +vector_upl_get_iostate(upl_t upl, upl_t subupl, upl_offset_t *offset, upl_size_t *size) +{ + if(vector_upl_is_valid(upl)) { + uint32_t i = 0; + vector_upl_t vector_upl = upl->vector_upl; + + if(vector_upl) { + for(i = 0; i < vector_upl->num_upls; i++) { + if(vector_upl->upl_elems[i] == subupl) + break; + } + + if(i == vector_upl->num_upls) + panic("getting sub-upl iostate when none exists"); + + *offset = vector_upl->upl_iostates[i].offset; + *size = vector_upl->upl_iostates[i].size; + } + else + panic("vector_upl_get_iostate was passed a non-vectored UPL\n"); + } + else + panic("vector_upl_get_iostate was passed a NULL UPL\n"); +} + +void +vector_upl_get_iostate_byindex(upl_t upl, uint32_t index, upl_offset_t *offset, upl_size_t *size) +{ + if(vector_upl_is_valid(upl)) { + vector_upl_t vector_upl = upl->vector_upl; + if(vector_upl) { + if(index < vector_upl->num_upls) { + *offset = vector_upl->upl_iostates[index].offset; + *size = vector_upl->upl_iostates[index].size; + } + else + *offset = *size = 0; + } + else + panic("vector_upl_get_iostate_byindex was passed a non-vectored UPL\n"); + } + else + panic("vector_upl_get_iostate_byindex was passed a NULL UPL\n"); +} + +upl_page_info_t * +upl_get_internal_vectorupl_pagelist(upl_t upl) +{ + return ((vector_upl_t)(upl->vector_upl))->pagelist; +} + +void * +upl_get_internal_vectorupl(upl_t upl) +{ + return upl->vector_upl; +} + vm_size_t upl_get_internal_pagelist_offset(void) { @@ -6239,14 +7410,20 @@ ppnum_t upl_get_highest_page( return upl->highest_page; } -#ifdef UPL_DEBUG -kern_return_t upl_ubc_alias_set(upl_t upl, unsigned int alias1, unsigned int alias2) +upl_size_t upl_get_size( + upl_t upl) +{ + return upl->size; +} + +#if UPL_DEBUG +kern_return_t upl_ubc_alias_set(upl_t upl, uintptr_t alias1, uintptr_t alias2) { upl->ubc_alias1 = alias1; upl->ubc_alias2 = alias2; return KERN_SUCCESS; } -int upl_ubc_alias_get(upl_t upl, unsigned int * al, unsigned int * al2) +int upl_ubc_alias_get(upl_t upl, uintptr_t * al, uintptr_t * al2) { if(al) *al = upl->ubc_alias1; diff --git a/osfmk/vm/vm_pageout.h b/osfmk/vm/vm_pageout.h index d5adb8b0f..b76023182 100644 --- a/osfmk/vm/vm_pageout.h +++ b/osfmk/vm/vm_pageout.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2009 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -76,6 +76,11 @@ #include #include +#include + + +#include + extern kern_return_t vm_map_create_upl( vm_map_t map, vm_map_address_t offset, @@ -88,14 +93,47 @@ extern kern_return_t vm_map_create_upl( extern ppnum_t upl_get_highest_page( upl_t upl); +extern upl_size_t upl_get_size( + upl_t upl); + #ifdef MACH_KERNEL_PRIVATE #include extern unsigned int vm_pageout_scan_event_counter; -extern unsigned int vm_zf_count; extern unsigned int vm_zf_queue_count; + +#if defined(__ppc__) /* On ppc, vm statistics are still 32-bit */ + +extern unsigned int vm_zf_count; + +#define VM_ZF_COUNT_INCR() \ + MACRO_BEGIN \ + OSAddAtomic(1, (SInt32 *) &vm_zf_count); \ + MACRO_END \ + +#define VM_ZF_COUNT_DECR() \ + MACRO_BEGIN \ + OSAddAtomic(-1, (SInt32 *) &vm_zf_count); \ + MACRO_END \ + +#else /* !(defined(__ppc__)) */ + +extern uint64_t vm_zf_count; + +#define VM_ZF_COUNT_INCR() \ + MACRO_BEGIN \ + OSAddAtomic64(1, (SInt64 *) &vm_zf_count); \ + MACRO_END \ + +#define VM_ZF_COUNT_DECR() \ + MACRO_BEGIN \ + OSAddAtomic64(-1, (SInt64 *) &vm_zf_count); \ + MACRO_END \ + +#endif /* !(defined(__ppc__)) */ + /* * Routines exported to Mach. */ @@ -120,15 +158,52 @@ extern void vm_pageclean_setup( /* UPL exported routines and structures */ -#define upl_lock_init(object) mutex_init(&(object)->Lock, 0) -#define upl_lock(object) mutex_lock(&(object)->Lock) -#define upl_unlock(object) mutex_unlock(&(object)->Lock) +#define upl_lock_init(object) lck_mtx_init(&(object)->Lock, &vm_object_lck_grp, &vm_object_lck_attr) +#define upl_lock_destroy(object) lck_mtx_destroy(&(object)->Lock, &vm_object_lck_grp) +#define upl_lock(object) lck_mtx_lock(&(object)->Lock) +#define upl_unlock(object) lck_mtx_unlock(&(object)->Lock) + +#define MAX_VECTOR_UPL_ELEMENTS 8 +struct _vector_upl_iostates{ + upl_offset_t offset; + upl_size_t size; +}; + +typedef struct _vector_upl_iostates vector_upl_iostates_t; + +struct _vector_upl { + upl_size_t size; + uint32_t num_upls; + uint32_t invalid_upls; + uint32_t _reserved; + vm_map_t submap; + vm_offset_t submap_dst_addr; + vm_object_offset_t offset; + upl_t upl_elems[MAX_VECTOR_UPL_ELEMENTS]; + upl_page_info_array_t pagelist; + vector_upl_iostates_t upl_iostates[MAX_VECTOR_UPL_ELEMENTS]; +}; + +typedef struct _vector_upl* vector_upl_t; /* universal page list structure */ +#if UPL_DEBUG +#define UPL_DEBUG_STACK_FRAMES 16 +#define UPL_DEBUG_COMMIT_RECORDS 4 + +struct ucd { + upl_offset_t c_beg; + upl_offset_t c_end; + int c_aborted; + void * c_retaddr[UPL_DEBUG_STACK_FRAMES]; +}; +#endif + + struct upl { - decl_mutex_data(, Lock) /* Synchronization */ + decl_lck_mtx_data(, Lock) /* Synchronization */ int ref_count; int flags; vm_object_t src_object; /* object derived from */ @@ -137,10 +212,18 @@ struct upl { vm_offset_t kaddr; /* secondary mapping in kernel */ vm_object_t map_object; ppnum_t highest_page; -#ifdef UPL_DEBUG - unsigned int ubc_alias1; - unsigned int ubc_alias2; + void* vector_upl; +#if UPL_DEBUG + uintptr_t ubc_alias1; + uintptr_t ubc_alias2; queue_chain_t uplq; /* List of outstanding upls on an obj */ + + thread_t upl_creator; + uint32_t upl_state; + uint32_t upl_commit_index; + void *upl_create_retaddr[UPL_DEBUG_STACK_FRAMES]; + + struct ucd upl_commit_records[UPL_DEBUG_COMMIT_RECORDS]; #endif /* UPL_DEBUG */ }; @@ -158,12 +241,27 @@ struct upl { #define UPL_ACCESS_BLOCKED 0x400 #define UPL_ENCRYPTED 0x800 #define UPL_SHADOWED 0x1000 +#define UPL_KERNEL_OBJECT 0x2000 +#define UPL_VECTOR 0x4000 /* flags for upl_create flags parameter */ #define UPL_CREATE_EXTERNAL 0 #define UPL_CREATE_INTERNAL 0x1 #define UPL_CREATE_LITE 0x2 +extern upl_t vector_upl_create(vm_offset_t); +extern void vector_upl_deallocate(upl_t); +extern boolean_t vector_upl_is_valid(upl_t); +extern boolean_t vector_upl_set_subupl(upl_t, upl_t, u_int32_t); +extern void vector_upl_set_pagelist(upl_t); +extern void vector_upl_set_submap(upl_t, vm_map_t, vm_offset_t); +extern void vector_upl_get_submap(upl_t, vm_map_t*, vm_offset_t*); +extern void vector_upl_set_iostate(upl_t, upl_t, upl_offset_t, upl_size_t); +extern void vector_upl_get_iostate(upl_t, upl_t, upl_offset_t*, upl_size_t*); +extern void vector_upl_get_iostate_byindex(upl_t, uint32_t, upl_offset_t*, upl_size_t*); +extern upl_t vector_upl_subupl_byindex(upl_t , uint32_t); +extern upl_t vector_upl_subupl_byoffset(upl_t , upl_offset_t*, upl_size_t*); + extern kern_return_t vm_object_iopl_request( vm_object_t object, vm_object_offset_t offset, @@ -194,22 +292,12 @@ extern kern_return_t vm_map_remove_upl( vm_map_t map, upl_t upl); -#ifdef UPL_DEBUG -extern kern_return_t upl_ubc_alias_set( - upl_t upl, - unsigned int alias1, - unsigned int alias2); -extern int upl_ubc_alias_get( - upl_t upl, - unsigned int * al, - unsigned int * al2); -#endif /* UPL_DEBUG */ - /* wired page list structure */ -typedef unsigned long *wpl_array_t; +typedef uint32_t *wpl_array_t; extern void vm_page_free_list( - register vm_page_t mem); + vm_page_t mem, + boolean_t prepare_object); extern void vm_page_free_reserve(int pages); @@ -249,8 +337,23 @@ decl_simple_lock_data(extern, vm_paging_lock) */ extern unsigned int vm_backing_store_low; +extern void vm_pageout_queue_steal( + vm_page_t page, + boolean_t queues_locked); + #endif /* MACH_KERNEL_PRIVATE */ +#if UPL_DEBUG +extern kern_return_t upl_ubc_alias_set( + upl_t upl, + uintptr_t alias1, + uintptr_t alias2); +extern int upl_ubc_alias_get( + upl_t upl, + uintptr_t * al, + uintptr_t * al2); +#endif /* UPL_DEBUG */ + extern void vm_countdirtypages(void); extern void vm_backing_store_disable( @@ -260,6 +363,36 @@ extern kern_return_t upl_transpose( upl_t upl1, upl_t upl2); +extern kern_return_t mach_vm_pressure_monitor( + boolean_t wait_for_pressure, + unsigned int nsecs_monitored, + unsigned int *pages_reclaimed_p, + unsigned int *pages_wanted_p); + +extern kern_return_t +vm_set_buffer_cleanup_callout( + boolean_t (*func)(void)); + +struct vm_page_stats_reusable { + SInt32 reusable_count; + uint64_t reusable; + uint64_t reused; + uint64_t reused_wire; + uint64_t reused_remove; + uint64_t all_reusable_calls; + uint64_t partial_reusable_calls; + uint64_t all_reuse_calls; + uint64_t partial_reuse_calls; + uint64_t reusable_pages_success; + uint64_t reusable_pages_failure; + uint64_t reusable_pages_shared; + uint64_t reuse_pages_success; + uint64_t reuse_pages_failure; + uint64_t can_reuse_success; + uint64_t can_reuse_failure; +}; +extern struct vm_page_stats_reusable vm_page_stats_reusable; + #endif /* KERNEL_PRIVATE */ #endif /* _VM_VM_PAGEOUT_H_ */ diff --git a/osfmk/vm/vm_print.h b/osfmk/vm/vm_print.h index b31deda03..6decd44f6 100644 --- a/osfmk/vm/vm_print.h +++ b/osfmk/vm/vm_print.h @@ -58,7 +58,7 @@ extern void vm_page_print( #include extern void vm_external_print( vm_external_map_t map, - vm_size_t size); + vm_object_size_t size); #endif /* MACH_PAGEMAP */ extern void db_vm(void); diff --git a/osfmk/vm/vm_protos.h b/osfmk/vm/vm_protos.h index e9fdc6ef3..a4562ce8a 100644 --- a/osfmk/vm/vm_protos.h +++ b/osfmk/vm/vm_protos.h @@ -49,14 +49,14 @@ * iokit */ extern kern_return_t device_data_action( - int device_handle, + uintptr_t device_handle, ipc_port_t device_pager, vm_prot_t protection, vm_object_offset_t offset, vm_size_t size); extern kern_return_t device_close( - int device_handle); + uintptr_t device_handle); /* * default_pager @@ -156,6 +156,11 @@ extern memory_object_t apple_protect_pager_setup(vm_object_t backing_object, struct pager_crypt_info *crypt_info); #endif /* CONFIG_CODE_DECRYPTION */ +struct vnode; +extern void swapfile_pager_bootstrap(void); +extern memory_object_t swapfile_pager_setup(struct vnode *vp); +extern memory_object_control_t swapfile_pager_control(memory_object_t mem_obj); + /* * bsd @@ -164,20 +169,26 @@ struct vnode; extern void vnode_pager_shutdown(void); extern void *upl_get_internal_page_list( upl_t upl); -#ifndef _VNODE_PAGER_ + typedef int pager_return_t; extern pager_return_t vnode_pagein( struct vnode *, upl_t, - vm_offset_t, vm_object_offset_t, - vm_size_t, int, int *); + upl_offset_t, vm_object_offset_t, + upl_size_t, int, int *); extern pager_return_t vnode_pageout( struct vnode *, upl_t, - vm_offset_t, vm_object_offset_t, - vm_size_t, int, int *); + upl_offset_t, vm_object_offset_t, + upl_size_t, int, int *); extern memory_object_t vnode_pager_setup( struct vnode *, memory_object_t); extern vm_object_offset_t vnode_pager_get_filesize( struct vnode *); +extern uint32_t vnode_pager_isinuse( + struct vnode *); +extern uint32_t vnode_pager_return_hard_throttle_limit( + struct vnode *, + uint32_t *, + uint32_t); extern kern_return_t vnode_pager_get_pathname( struct vnode *vp, char *pathname, @@ -189,21 +200,28 @@ extern kern_return_t vnode_pager_get_cs_blobs( struct vnode *vp, void **blobs); -#endif /* _VNODE_PAGER_ */ + extern void vnode_pager_bootstrap(void) __attribute__((section("__TEXT, initcode"))); extern kern_return_t vnode_pager_data_unlock( memory_object_t mem_obj, memory_object_offset_t offset, - vm_size_t size, + memory_object_size_t size, vm_prot_t desired_access); extern kern_return_t vnode_pager_init( memory_object_t, memory_object_control_t, - vm_size_t); + memory_object_cluster_size_t); extern kern_return_t vnode_pager_get_object_size( memory_object_t, memory_object_offset_t *); +extern kern_return_t vnode_pager_get_isinuse( + memory_object_t, + uint32_t *); +extern kern_return_t vnode_pager_check_hard_throttle( + memory_object_t, + uint32_t *, + uint32_t); extern kern_return_t vnode_pager_get_object_pathname( memory_object_t mem_obj, char *pathname, @@ -217,13 +235,13 @@ extern kern_return_t vnode_pager_get_object_cs_blobs( extern kern_return_t vnode_pager_data_request( memory_object_t, memory_object_offset_t, - vm_size_t, + memory_object_cluster_size_t, vm_prot_t, memory_object_fault_info_t); extern kern_return_t vnode_pager_data_return( memory_object_t, memory_object_offset_t, - vm_size_t, + memory_object_cluster_size_t, memory_object_offset_t *, int *, boolean_t, @@ -232,13 +250,13 @@ extern kern_return_t vnode_pager_data_return( extern kern_return_t vnode_pager_data_initialize( memory_object_t, memory_object_offset_t, - vm_size_t); + memory_object_cluster_size_t); extern void vnode_pager_reference( memory_object_t mem_obj); extern kern_return_t vnode_pager_synchronize( memory_object_t mem_obj, memory_object_offset_t offset, - vm_size_t length, + memory_object_size_t length, vm_sync_t sync_flags); extern kern_return_t vnode_pager_map( memory_object_t mem_obj, @@ -259,36 +277,39 @@ extern int ubc_map( extern void ubc_unmap( struct vnode *vp); +struct vm_map_entry; +extern struct vm_object *find_vnode_object(struct vm_map_entry *entry); + extern void dp_memory_object_reference(memory_object_t); extern void dp_memory_object_deallocate(memory_object_t); #ifndef _memory_object_server_ extern kern_return_t dp_memory_object_init(memory_object_t, memory_object_control_t, - vm_size_t); + memory_object_cluster_size_t); extern kern_return_t dp_memory_object_terminate(memory_object_t); extern kern_return_t dp_memory_object_data_request(memory_object_t, memory_object_offset_t, - vm_size_t, + memory_object_cluster_size_t, vm_prot_t, memory_object_fault_info_t); extern kern_return_t dp_memory_object_data_return(memory_object_t, memory_object_offset_t, - vm_size_t, - vm_size_t *, + memory_object_cluster_size_t, + memory_object_offset_t *, int *, boolean_t, boolean_t, int); extern kern_return_t dp_memory_object_data_initialize(memory_object_t, memory_object_offset_t, - vm_size_t); + memory_object_cluster_size_t); extern kern_return_t dp_memory_object_data_unlock(memory_object_t, memory_object_offset_t, - vm_size_t, + memory_object_size_t, vm_prot_t); extern kern_return_t dp_memory_object_synchronize(memory_object_t, memory_object_offset_t, - vm_size_t, + memory_object_size_t, vm_sync_t); extern kern_return_t dp_memory_object_map(memory_object_t, vm_prot_t); @@ -305,16 +326,16 @@ extern void device_pager_reference(memory_object_t); extern void device_pager_deallocate(memory_object_t); extern kern_return_t device_pager_init(memory_object_t, memory_object_control_t, - vm_size_t); + memory_object_cluster_size_t); extern kern_return_t device_pager_terminate(memory_object_t); extern kern_return_t device_pager_data_request(memory_object_t, memory_object_offset_t, - vm_size_t, + memory_object_cluster_size_t, vm_prot_t, memory_object_fault_info_t); extern kern_return_t device_pager_data_return(memory_object_t, memory_object_offset_t, - vm_size_t, + memory_object_cluster_size_t, memory_object_offset_t *, int *, boolean_t, @@ -322,14 +343,14 @@ extern kern_return_t device_pager_data_return(memory_object_t, int); extern kern_return_t device_pager_data_initialize(memory_object_t, memory_object_offset_t, - vm_size_t); + memory_object_cluster_size_t); extern kern_return_t device_pager_data_unlock(memory_object_t, memory_object_offset_t, - vm_size_t, + memory_object_size_t, vm_prot_t); extern kern_return_t device_pager_synchronize(memory_object_t, memory_object_offset_t, - vm_size_t, + memory_object_size_t, vm_sync_t); extern kern_return_t device_pager_map(memory_object_t, vm_prot_t); extern kern_return_t device_pager_last_unmap(memory_object_t); @@ -340,7 +361,7 @@ extern kern_return_t device_pager_populate_object( vm_size_t size); extern memory_object_t device_pager_setup( memory_object_t, - int, + uintptr_t, vm_size_t, int); extern void device_pager_bootstrap(void) __attribute__((section("__TEXT, initcode"))); @@ -350,6 +371,9 @@ extern kern_return_t memory_object_create_named( memory_object_offset_t size, memory_object_control_t *control); +struct macx_triggers_args; +extern int mach_macx_triggers( + struct macx_triggers_args *args); extern int macx_swapinfo( memory_object_size_t *total_p, @@ -358,6 +382,9 @@ extern int macx_swapinfo( boolean_t *encrypted_p); extern void log_stack_execution_failure(addr64_t vaddr, vm_prot_t prot); +extern void log_unnest_badness(vm_map_t, vm_map_offset_t, vm_map_offset_t); + +extern int cs_allow_invalid(struct proc *p); extern int cs_invalid_page(addr64_t vaddr); extern boolean_t cs_validate_page(void *blobs, memory_object_offset_t offset, @@ -391,6 +418,10 @@ extern kern_return_t mach_memory_entry_allocate( extern void vm_paging_map_init(void); +extern int macx_backing_store_compaction(int flags); +extern unsigned int mach_vm_ctl_page_free_wanted(void); + +extern void no_paging_space_action(void); #endif /* _VM_VM_PROTOS_H_ */ #endif /* XNU_KERNEL_PRIVATE */ diff --git a/osfmk/vm/vm_purgeable.c b/osfmk/vm/vm_purgeable.c index bf80947ab..f9f18a161 100644 --- a/osfmk/vm/vm_purgeable.c +++ b/osfmk/vm/vm_purgeable.c @@ -23,7 +23,7 @@ #include #include -#include /* kmem_alloc */ +#include /* kmem_alloc */ #include #include #include @@ -44,19 +44,24 @@ int32_t token_new_pagecount = 0; /* count of pages that will int available_for_purge = 0; /* increase when ripe token * added, decrease when ripe - * token removed protect with - * page_queue_lock */ + * token removed. + * protected by page_queue_lock + */ -static int token_q_allocating = 0; /* flag to singlethread allocator */ +static int token_q_allocating = 0; /* flag for singlethreading + * allocator */ struct purgeable_q purgeable_queues[PURGEABLE_Q_TYPE_MAX]; -#define TOKEN_ADD 0x40/* 0x100 */ -#define TOKEN_DELETE 0x41/* 0x104 */ -#define TOKEN_QUEUE_ADVANCE 0x42/* 0x108 actually means "token ripened" */ -#define TOKEN_OBJECT_PURGED 0x43/* 0x10c */ -#define OBJECT_ADDED 0x50/* 0x140 */ -#define OBJECT_REMOVED 0x51/* 0x144 */ +decl_lck_mtx_data(,vm_purgeable_queue_lock) + +#define TOKEN_ADD 0x40 /* 0x100 */ +#define TOKEN_DELETE 0x41 /* 0x104 */ +#define TOKEN_RIPEN 0x42 /* 0x108 */ +#define OBJECT_ADD 0x48 /* 0x120 */ +#define OBJECT_REMOVE 0x49 /* 0x124 */ +#define OBJECT_PURGE 0x4a /* 0x128 */ +#define OBJECT_PURGE_ALL 0x4b /* 0x12c */ static token_idx_t vm_purgeable_token_remove_first(purgeable_q_t queue); @@ -99,9 +104,17 @@ vm_purgeable_token_check_queue(purgeable_q_t queue) } #endif +/* + * Add a token. Allocate token queue memory if necessary. + * Call with page queue locked. + */ kern_return_t vm_purgeable_token_add(purgeable_q_t queue) { +#if MACH_ASSERT + lck_mtx_assert(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED); +#endif + /* new token */ token_idx_t token; enum purgeable_q_type i; @@ -117,9 +130,10 @@ vm_purgeable_token_add(purgeable_q_t queue) } else { /* allocate more memory */ /* Wait if another thread is inside the memory alloc section */ while(token_q_allocating) { - wait_result_t res = thread_sleep_mutex((event_t)&token_q_allocating, - &vm_page_queue_lock, - THREAD_UNINT); + wait_result_t res = lck_mtx_sleep(&vm_page_queue_lock, + LCK_SLEEP_DEFAULT, + (event_t)&token_q_allocating, + THREAD_UNINT); if(res != THREAD_AWAKENED) return KERN_ABORTED; }; @@ -137,11 +151,20 @@ vm_purgeable_token_add(purgeable_q_t queue) vm_size_t alloc_size = token_q_cur_size + PAGE_SIZE; kern_return_t result; - if (token_q_cur_size) { - result=kmem_realloc(kernel_map, (vm_offset_t)tokens, token_q_cur_size, - (vm_offset_t*)&new_loc, alloc_size); + if (alloc_size / sizeof (struct token) > TOKEN_COUNT_MAX) { + result = KERN_RESOURCE_SHORTAGE; } else { - result=kmem_alloc(kernel_map, (vm_offset_t*)&new_loc, alloc_size); + if (token_q_cur_size) { + result = kmem_realloc(kernel_map, + (vm_offset_t) tokens, + token_q_cur_size, + (vm_offset_t *) &new_loc, + alloc_size); + } else { + result = kmem_alloc(kernel_map, + (vm_offset_t *) &new_loc, + alloc_size); + } } vm_page_lock_queues(); @@ -159,7 +182,8 @@ vm_purgeable_token_add(purgeable_q_t queue) tokens=new_loc; vm_size_t old_token_q_cur_size=token_q_cur_size; token_q_cur_size=alloc_size; - token_q_max_cnt = token_q_cur_size / sizeof(struct token); + token_q_max_cnt = (token_idx_t) (token_q_cur_size / + sizeof(struct token)); assert (token_init_idx < token_q_max_cnt); /* We must have a free token now */ if (old_token_q_cur_size) { /* clean up old mapping */ @@ -186,7 +210,8 @@ vm_purgeable_token_add(purgeable_q_t queue) int64_t pages = purgeable_queues[i].new_pages += token_new_pagecount; assert(pages >= 0); assert(pages <= TOKEN_COUNT_MAX); - purgeable_queues[i].new_pages=pages; + purgeable_queues[i].new_pages = (int32_t) pages; + assert(purgeable_queues[i].new_pages == pages); } token_new_pagecount = 0; @@ -237,10 +262,15 @@ vm_purgeable_token_add(purgeable_q_t queue) /* * Remove first token from queue and return its index. Add its count to the * count of the next token. + * Call with page queue locked. */ static token_idx_t vm_purgeable_token_remove_first(purgeable_q_t queue) { +#if MACH_ASSERT + lck_mtx_assert(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED); +#endif + token_idx_t token; token = queue->token_q_head; @@ -291,10 +321,16 @@ vm_purgeable_token_remove_first(purgeable_q_t queue) return token; } -/* Delete first token from queue. Return token to token queue. */ +/* + * Delete first token from queue. Return token to token queue. + * Call with page queue locked. + */ void vm_purgeable_token_delete_first(purgeable_q_t queue) { +#if MACH_ASSERT + lck_mtx_assert(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED); +#endif token_idx_t token = vm_purgeable_token_remove_first(queue); if (token) { @@ -305,9 +341,14 @@ vm_purgeable_token_delete_first(purgeable_q_t queue) } +/* Call with page queue locked. */ void vm_purgeable_q_advance_all() { +#if MACH_ASSERT + lck_mtx_assert(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED); +#endif + /* check queue counters - if they get really large, scale them back. * They tend to get that large when there is no purgeable queue action */ int i; @@ -317,7 +358,8 @@ vm_purgeable_q_advance_all() int64_t pages = purgeable_queues[i].new_pages += token_new_pagecount; assert(pages >= 0); assert(pages <= TOKEN_COUNT_MAX); - purgeable_queues[i].new_pages=pages; + purgeable_queues[i].new_pages = (int32_t) pages; + assert(purgeable_queues[i].new_pages == pages); } token_new_pagecount = 0; } @@ -348,7 +390,7 @@ vm_purgeable_q_advance_all() if (tokens[queue->token_q_unripe].count == 0) { queue->token_q_unripe = tokens[queue->token_q_unripe].next; available_for_purge++; - KERNEL_DEBUG_CONSTANT((MACHDBG_CODE(DBG_MACH_VM, TOKEN_QUEUE_ADVANCE)), + KERNEL_DEBUG_CONSTANT((MACHDBG_CODE(DBG_MACH_VM, TOKEN_RIPEN)), queue->type, tokens[queue->token_q_head].count, /* num pages on new * first token */ @@ -390,10 +432,14 @@ vm_purgeable_q_advance_all() * Yes - purge it. Remove token. If there is no ripe token, remove ripe * token from other queue and migrate unripe token from this * queue to other queue. + * Call with page queue locked. */ static void vm_purgeable_token_remove_ripe(purgeable_q_t queue) { +#if MACH_ASSERT + lck_mtx_assert(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED); +#endif assert(queue->token_q_head && tokens[queue->token_q_head].count == 0); /* return token to free list. advance token list. */ token_idx_t new_head = tokens[queue->token_q_head].next; @@ -416,10 +462,14 @@ vm_purgeable_token_remove_ripe(purgeable_q_t queue) * Delete a ripe token from the given queue. If there are no ripe tokens on * that queue, delete a ripe token from queue2, and migrate an unripe token * from queue to queue2 + * Call with page queue locked. */ static void vm_purgeable_token_choose_and_delete_ripe(purgeable_q_t queue, purgeable_q_t queue2) { +#if MACH_ASSERT + lck_mtx_assert(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED); +#endif assert(queue->token_q_head); if (tokens[queue->token_q_head].count == 0) { @@ -485,9 +535,11 @@ vm_purgeable_token_choose_and_delete_ripe(purgeable_q_t queue, purgeable_q_t que } /* Find an object that can be locked. Returns locked object. */ +/* Call with purgeable queue locked. */ static vm_object_t vm_purgeable_object_find_and_lock(purgeable_q_t queue, int group) { + lck_mtx_assert(&vm_purgeable_queue_lock, LCK_MTX_ASSERT_OWNED); /* * Usually we would pick the first element from a queue. However, we * might not be able to get a lock on it, in which case we try the @@ -514,7 +566,70 @@ vm_purgeable_object_find_and_lock(purgeable_q_t queue, int group) return 0; } +/* Can be called without holding locks */ void +vm_purgeable_object_purge_all(void) +{ + enum purgeable_q_type i; + int group; + vm_object_t object; + unsigned int purged_count; + uint32_t collisions; + + purged_count = 0; + collisions = 0; + +restart: + lck_mtx_lock(&vm_purgeable_queue_lock); + /* Cycle through all queues */ + for (i = PURGEABLE_Q_TYPE_OBSOLETE; i < PURGEABLE_Q_TYPE_MAX; i++) { + purgeable_q_t queue; + + queue = &purgeable_queues[i]; + + /* + * Look through all groups, starting from the lowest. If + * we find an object in that group, try to lock it (this can + * fail). If locking is successful, we can drop the queue + * lock, remove a token and then purge the object. + */ + for (group = 0; group < NUM_VOLATILE_GROUPS; group++) { + while (!queue_empty(&queue->objq[group])) { + object = vm_purgeable_object_find_and_lock(queue, group); + if (object == VM_OBJECT_NULL) { + lck_mtx_unlock(&vm_purgeable_queue_lock); + mutex_pause(collisions++); + goto restart; + } + + lck_mtx_unlock(&vm_purgeable_queue_lock); + + /* Lock the page queue here so we don't hold it + * over the whole, legthy operation */ + vm_page_lock_queues(); + vm_purgeable_token_remove_first(queue); + vm_page_unlock_queues(); + + assert(object->purgable == VM_PURGABLE_VOLATILE); + (void) vm_object_purge(object); + vm_object_unlock(object); + purged_count++; + goto restart; + } + assert(queue->debug_count_objects >= 0); + } + } + KERNEL_DEBUG_CONSTANT((MACHDBG_CODE(DBG_MACH_VM, OBJECT_PURGE_ALL)), + purged_count, /* # of purged objects */ + 0, + available_for_purge, + 0, + 0); + lck_mtx_unlock(&vm_purgeable_queue_lock); + return; +} + +boolean_t vm_purgeable_object_purge_one(void) { enum purgeable_q_type i; @@ -522,7 +637,12 @@ vm_purgeable_object_purge_one(void) vm_object_t object = 0; purgeable_q_t queue, queue2; - mutex_lock(&vm_purgeable_queue_lock); + /* Need the page queue lock since we'll be changing the token queue. */ +#if MACH_ASSERT + lck_mtx_assert(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED); +#endif + lck_mtx_lock(&vm_purgeable_queue_lock); + /* Cycle through all queues */ for (i = PURGEABLE_Q_TYPE_OBSOLETE; i < PURGEABLE_Q_TYPE_MAX; i++) { queue = &purgeable_queues[i]; @@ -544,7 +664,7 @@ vm_purgeable_object_purge_one(void) for (group = 0; group < NUM_VOLATILE_GROUPS; group++) { if (!queue_empty(&queue->objq[group]) && (object = vm_purgeable_object_find_and_lock(queue, group))) { - mutex_unlock(&vm_purgeable_queue_lock); + lck_mtx_unlock(&vm_purgeable_queue_lock); vm_purgeable_token_choose_and_delete_ripe(queue, 0); goto purge_now; } @@ -557,7 +677,7 @@ vm_purgeable_object_purge_one(void) if (!queue_empty(&queue2->objq[group]) && (object = vm_purgeable_object_find_and_lock(queue2, group))) { - mutex_unlock(&vm_purgeable_queue_lock); + lck_mtx_unlock(&vm_purgeable_queue_lock); vm_purgeable_token_choose_and_delete_ripe(queue2, queue); goto purge_now; } @@ -570,27 +690,34 @@ vm_purgeable_object_purge_one(void) * we could end up with no object to purge at this time, even though * we have objects in a purgeable state */ - mutex_unlock(&vm_purgeable_queue_lock); - return; + lck_mtx_unlock(&vm_purgeable_queue_lock); + return FALSE; purge_now: assert(object); + assert(object->purgable == VM_PURGABLE_VOLATILE); + vm_page_unlock_queues(); /* Unlock for call to vm_object_purge() */ (void) vm_object_purge(object); vm_object_unlock(object); + vm_page_lock_queues(); - KERNEL_DEBUG_CONSTANT((MACHDBG_CODE(DBG_MACH_VM, TOKEN_OBJECT_PURGED)), - (unsigned int) object, /* purged object */ + KERNEL_DEBUG_CONSTANT((MACHDBG_CODE(DBG_MACH_VM, OBJECT_PURGE)), + object, /* purged object */ 0, available_for_purge, 0, 0); + + return TRUE; } +/* Called with object lock held */ void vm_purgeable_object_add(vm_object_t object, purgeable_q_t queue, int group) { - mutex_lock(&vm_purgeable_queue_lock); + vm_object_lock_assert_exclusive(object); + lck_mtx_lock(&vm_purgeable_queue_lock); if (queue->type == PURGEABLE_Q_TYPE_OBSOLETE) group = 0; @@ -602,7 +729,7 @@ vm_purgeable_object_add(vm_object_t object, purgeable_q_t queue, int group) #if MACH_ASSERT queue->debug_count_objects++; - KERNEL_DEBUG_CONSTANT((MACHDBG_CODE(DBG_MACH_VM, OBJECT_ADDED)), + KERNEL_DEBUG_CONSTANT((MACHDBG_CODE(DBG_MACH_VM, OBJECT_ADD)), 0, tokens[queue->token_q_head].count, queue->type, @@ -610,17 +737,20 @@ vm_purgeable_object_add(vm_object_t object, purgeable_q_t queue, int group) 0); #endif - mutex_unlock(&vm_purgeable_queue_lock); + lck_mtx_unlock(&vm_purgeable_queue_lock); } /* Look for object. If found, remove from purgeable queue. */ +/* Called with object lock held */ purgeable_q_t vm_purgeable_object_remove(vm_object_t object) { enum purgeable_q_type i; int group; - mutex_lock(&vm_purgeable_queue_lock); + vm_object_lock_assert_exclusive(object); + lck_mtx_lock(&vm_purgeable_queue_lock); + for (i = PURGEABLE_Q_TYPE_OBSOLETE; i < PURGEABLE_Q_TYPE_MAX; i++) { purgeable_q_t queue = &purgeable_queues[i]; for (group = 0; group < NUM_VOLATILE_GROUPS; group++) { @@ -633,14 +763,14 @@ vm_purgeable_object_remove(vm_object_t object) vm_object_t, objq); #if MACH_ASSERT queue->debug_count_objects--; - KERNEL_DEBUG_CONSTANT((MACHDBG_CODE(DBG_MACH_VM, OBJECT_REMOVED)), + KERNEL_DEBUG_CONSTANT((MACHDBG_CODE(DBG_MACH_VM, OBJECT_REMOVE)), 0, tokens[queue->token_q_head].count, queue->type, group, 0); #endif - mutex_unlock(&vm_purgeable_queue_lock); + lck_mtx_unlock(&vm_purgeable_queue_lock); object->objq.next = 0; object->objq.prev = 0; return &purgeable_queues[i]; @@ -648,6 +778,6 @@ vm_purgeable_object_remove(vm_object_t object) } } } - mutex_unlock(&vm_purgeable_queue_lock); + lck_mtx_unlock(&vm_purgeable_queue_lock); return 0; } diff --git a/osfmk/vm/vm_purgeable_internal.h b/osfmk/vm/vm_purgeable_internal.h index 95f90db5d..5e6d4e4af 100644 --- a/osfmk/vm/vm_purgeable_internal.h +++ b/osfmk/vm/vm_purgeable_internal.h @@ -86,7 +86,7 @@ extern int available_for_purge; * mostly used on a user context and we don't want any contention with the * pageout daemon. */ -decl_mutex_data(,vm_purgeable_queue_lock) +decl_lck_mtx_data(extern,vm_purgeable_queue_lock) /* add a new token to queue. called by vm_object_purgeable_control */ /* enter with page queue locked */ @@ -102,7 +102,11 @@ void vm_purgeable_token_delete_first(purgeable_q_t queue); void vm_purgeable_q_advance_all(void); /* the object purger. purges the next eligible object from memory. */ -void vm_purgeable_object_purge_one(void); +/* returns TRUE if an object was purged, otherwise FALSE. */ +boolean_t vm_purgeable_object_purge_one(void); + +/* purge all volatile objects now */ +void vm_purgeable_object_purge_all(void); /* insert purgeable object into queue */ void vm_purgeable_object_add(vm_object_t object, purgeable_q_t queue, int group); diff --git a/osfmk/vm/vm_resident.c b/osfmk/vm/vm_resident.c index 7e7520ce1..3a380d4d2 100644 --- a/osfmk/vm/vm_resident.c +++ b/osfmk/vm/vm_resident.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2007 Apple Inc. All rights reserved. + * Copyright (c) 2000-2009 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -73,6 +73,7 @@ #include #include #include +#include #include #include #include @@ -91,16 +92,32 @@ #include #include +#include + + #if CONFIG_EMBEDDED #include #endif -int speculative_age_index = 0; -int speculative_steal_index = 0; +#include + +boolean_t vm_page_free_verify = TRUE; + +int speculative_age_index = 0; +int speculative_steal_index = 0; +lck_mtx_ext_t vm_page_queue_lock_ext; +lck_mtx_ext_t vm_page_queue_free_lock_ext; +lck_mtx_ext_t vm_purgeable_queue_lock_ext; struct vm_speculative_age_q vm_page_queue_speculative[VM_PAGE_MAX_SPECULATIVE_AGE_Q + 1]; +__private_extern__ void vm_page_init_lck_grp(void); + +static void vm_page_free_prepare(vm_page_t page); + + + /* * Associated with page of user-allocatable memory is a * page structure. @@ -132,12 +149,17 @@ typedef struct { #endif /* MACH_PAGE_HASH_STATS */ } vm_page_bucket_t; + +#define BUCKETS_PER_LOCK 16 + vm_page_bucket_t *vm_page_buckets; /* Array of buckets */ unsigned int vm_page_bucket_count = 0; /* How big is array? */ unsigned int vm_page_hash_mask; /* Mask for hash function */ unsigned int vm_page_hash_shift; /* Shift for hash function */ uint32_t vm_page_bucket_hash; /* Basic bucket hash */ -decl_simple_lock_data(,vm_page_bucket_lock) +unsigned int vm_page_bucket_lock_count = 0; /* How big is array of locks? */ + +lck_spin_t *vm_page_bucket_locks; #if MACH_PAGE_HASH_STATS @@ -230,16 +252,22 @@ unsigned int vm_page_free_count_minimum; /* debugging */ * most other kernel structures are. */ zone_t vm_page_zone; -decl_mutex_data(,vm_page_alloc_lock) +vm_locks_array_t vm_page_locks; +decl_lck_mtx_data(,vm_page_alloc_lock) unsigned int io_throttle_zero_fill; +unsigned int vm_page_local_q_count = 0; +unsigned int vm_page_local_q_soft_limit = 250; +unsigned int vm_page_local_q_hard_limit = 500; +struct vplq *vm_page_local_q = NULL; + /* * Fictitious pages don't have a physical address, * but we must initialize phys_page to something. * For debugging, this should be a strange value * that the pmap module can recognize in assertions. */ -vm_offset_t vm_page_fictitious_addr = (vm_offset_t) -1; +ppnum_t vm_page_fictitious_addr = (ppnum_t) -1; /* * Guard pages are not accessible so they don't @@ -249,7 +277,7 @@ vm_offset_t vm_page_fictitious_addr = (vm_offset_t) -1; * we don't use a real physical page with that * physical address. */ -vm_offset_t vm_page_guard_addr = (vm_offset_t) -2; +ppnum_t vm_page_guard_addr = (ppnum_t) -2; /* * Resident page structures are also chained on @@ -264,6 +292,7 @@ vm_offset_t vm_page_guard_addr = (vm_offset_t) -2; queue_head_t vm_page_queue_active; queue_head_t vm_page_queue_inactive; queue_head_t vm_page_queue_zf; /* inactive memory queue for zero fill */ +queue_head_t vm_page_queue_throttled; unsigned int vm_page_active_count; unsigned int vm_page_inactive_count; @@ -275,11 +304,14 @@ unsigned int vm_page_wire_count_warning = 0; unsigned int vm_page_gobble_count_warning = 0; unsigned int vm_page_purgeable_count = 0; /* # of pages purgeable now */ +unsigned int vm_page_purgeable_wired_count = 0; /* # of purgeable pages that are wired now */ uint64_t vm_page_purged_count = 0; /* total count of purged pages */ +#if DEVELOPMENT || DEBUG unsigned int vm_page_speculative_recreated = 0; unsigned int vm_page_speculative_created = 0; unsigned int vm_page_speculative_used = 0; +#endif ppnum_t vm_lopage_poolstart = 0; ppnum_t vm_lopage_poolend = 0; @@ -295,10 +327,12 @@ uint64_t max_valid_dma_address = 0xffffffffffffffffULL; */ unsigned int vm_page_free_target = 0; unsigned int vm_page_free_min = 0; +unsigned int vm_page_throttle_limit = 0; +uint32_t vm_page_creation_throttle = 0; unsigned int vm_page_inactive_target = 0; unsigned int vm_page_inactive_min = 0; unsigned int vm_page_free_reserved = 0; -unsigned int vm_page_zfill_throttle_count = 0; +unsigned int vm_page_throttle_count = 0; /* * The VM system has a couple of heuristics for deciding @@ -310,6 +344,8 @@ unsigned int vm_page_zfill_throttle_count = 0; boolean_t vm_page_deactivate_hint = TRUE; +struct vm_page_stats_reusable vm_page_stats_reusable; + /* * vm_set_page_size: * @@ -352,7 +388,7 @@ vm_page_set_colors( void ) n = MAX_COLORS; /* the count must be a power of 2 */ - if ( ( n & (n - 1)) !=0 ) + if ( ( n & (n - 1)) != 0 ) panic("vm_page_set_colors"); vm_colors = n; @@ -360,6 +396,62 @@ vm_page_set_colors( void ) } +lck_grp_t vm_page_lck_grp_free; +lck_grp_t vm_page_lck_grp_queue; +lck_grp_t vm_page_lck_grp_local; +lck_grp_t vm_page_lck_grp_purge; +lck_grp_t vm_page_lck_grp_alloc; +lck_grp_t vm_page_lck_grp_bucket; +lck_grp_attr_t vm_page_lck_grp_attr; +lck_attr_t vm_page_lck_attr; + + +__private_extern__ void +vm_page_init_lck_grp(void) +{ + /* + * initialze the vm_page lock world + */ + lck_grp_attr_setdefault(&vm_page_lck_grp_attr); + lck_grp_init(&vm_page_lck_grp_free, "vm_page_free", &vm_page_lck_grp_attr); + lck_grp_init(&vm_page_lck_grp_queue, "vm_page_queue", &vm_page_lck_grp_attr); + lck_grp_init(&vm_page_lck_grp_local, "vm_page_queue_local", &vm_page_lck_grp_attr); + lck_grp_init(&vm_page_lck_grp_purge, "vm_page_purge", &vm_page_lck_grp_attr); + lck_grp_init(&vm_page_lck_grp_alloc, "vm_page_alloc", &vm_page_lck_grp_attr); + lck_grp_init(&vm_page_lck_grp_bucket, "vm_page_bucket", &vm_page_lck_grp_attr); + lck_attr_setdefault(&vm_page_lck_attr); +} + +void +vm_page_init_local_q() +{ + unsigned int num_cpus; + unsigned int i; + struct vplq *t_local_q; + + num_cpus = ml_get_max_cpus(); + + /* + * no point in this for a uni-processor system + */ + if (num_cpus >= 2) { + t_local_q = (struct vplq *)kalloc(num_cpus * sizeof(struct vplq)); + + for (i = 0; i < num_cpus; i++) { + struct vpl *lq; + + lq = &t_local_q[i].vpl_un.vpl; + VPL_LOCK_INIT(lq, &vm_page_lck_grp_local, &vm_page_lck_attr); + queue_init(&lq->vpl_queue); + lq->vpl_count = 0; + } + vm_page_local_q_count = num_cpus; + + vm_page_local_q = (struct vplq *)t_local_q; + } +} + + /* * vm_page_bootstrap: * @@ -387,57 +479,69 @@ vm_page_bootstrap( */ m = &vm_page_template; - m->object = VM_OBJECT_NULL; /* reset later */ - m->offset = (vm_object_offset_t) -1; /* reset later */ - m->wire_count = 0; + bzero(m, sizeof (*m)); m->pageq.next = NULL; m->pageq.prev = NULL; m->listq.next = NULL; m->listq.prev = NULL; + m->next = VM_PAGE_NULL; - m->speculative = FALSE; - m->throttled = FALSE; + m->object = VM_OBJECT_NULL; /* reset later */ + m->offset = (vm_object_offset_t) -1; /* reset later */ + + m->wire_count = 0; + m->local = FALSE; m->inactive = FALSE; m->active = FALSE; - m->no_cache = FALSE; + m->pageout_queue = FALSE; + m->speculative = FALSE; m->laundry = FALSE; m->free = FALSE; - m->pmapped = FALSE; - m->wpmapped = FALSE; m->reference = FALSE; - m->pageout = FALSE; - m->dump_cleaning = FALSE; - m->list_req_pending = FALSE; + m->gobbled = FALSE; + m->private = FALSE; + m->throttled = FALSE; + m->__unused_pageq_bits = 0; + + m->phys_page = 0; /* reset later */ m->busy = TRUE; m->wanted = FALSE; m->tabled = FALSE; m->fictitious = FALSE; - m->private = FALSE; + m->pmapped = FALSE; + m->wpmapped = FALSE; + m->pageout = FALSE; m->absent = FALSE; m->error = FALSE; m->dirty = FALSE; m->cleaning = FALSE; m->precious = FALSE; m->clustered = FALSE; - m->unusual = FALSE; + m->overwriting = FALSE; m->restart = FALSE; - m->zero_fill = FALSE; + m->unusual = FALSE; m->encrypted = FALSE; m->encrypted_cleaning = FALSE; - m->deactivated = FALSE; + m->list_req_pending = FALSE; + m->dump_cleaning = FALSE; + m->cs_validated = FALSE; + m->cs_tainted = FALSE; + m->no_cache = FALSE; + m->zero_fill = FALSE; + m->reusable = FALSE; + m->__unused_object_bits = 0; - m->phys_page = 0; /* reset later */ /* * Initialize the page queues. */ - - mutex_init(&vm_page_queue_free_lock, 0); - mutex_init(&vm_page_queue_lock, 0); - - mutex_init(&vm_purgeable_queue_lock, 0); + vm_page_init_lck_grp(); + + lck_mtx_init_ext(&vm_page_queue_free_lock, &vm_page_queue_free_lock_ext, &vm_page_lck_grp_free, &vm_page_lck_attr); + lck_mtx_init_ext(&vm_page_queue_lock, &vm_page_queue_lock_ext, &vm_page_lck_grp_queue, &vm_page_lck_attr); + lck_mtx_init_ext(&vm_purgeable_queue_lock, &vm_purgeable_queue_lock_ext, &vm_page_lck_grp_purge, &vm_page_lck_attr); for (i = 0; i < PURGEABLE_Q_TYPE_MAX; i++) { int group; @@ -493,8 +597,6 @@ vm_page_bootstrap( * than the number of physical pages in the system. */ - simple_lock_init(&vm_page_bucket_lock, 0); - if (vm_page_bucket_count == 0) { unsigned int npages = pmap_free_pages(); @@ -502,6 +604,7 @@ vm_page_bootstrap( while (vm_page_bucket_count < npages) vm_page_bucket_count <<= 1; } + vm_page_bucket_lock_count = (vm_page_bucket_count + BUCKETS_PER_LOCK - 1) / BUCKETS_PER_LOCK; vm_page_hash_mask = vm_page_bucket_count - 1; @@ -531,6 +634,10 @@ vm_page_bootstrap( pmap_steal_memory(vm_page_bucket_count * sizeof(vm_page_bucket_t)); + vm_page_bucket_locks = (lck_spin_t *) + pmap_steal_memory(vm_page_bucket_lock_count * + sizeof(lck_spin_t)); + for (i = 0; i < vm_page_bucket_count; i++) { register vm_page_bucket_t *bucket = &vm_page_buckets[i]; @@ -541,6 +648,9 @@ vm_page_bootstrap( #endif /* MACH_PAGE_HASH_STATS */ } + for (i = 0; i < vm_page_bucket_lock_count; i++) + lck_spin_init(&vm_page_bucket_locks[i], &vm_page_lck_grp_bucket, &vm_page_lck_attr); + /* * Machine-dependent code allocates the resident page table. * It uses vm_page_init to initialize the page frames. @@ -563,7 +673,8 @@ vm_page_bootstrap( * wired, they nonetheless can't be moved. At this moment, * all VM managed pages are "free", courtesy of pmap_startup. */ - vm_page_wire_count = atop_64(max_mem) - vm_page_free_count; /* initial value */ + assert((unsigned int) atop_64(max_mem) == atop_64(max_mem)); + vm_page_wire_count = ((unsigned int) atop_64(max_mem)) - vm_page_free_count; /* initial value */ vm_page_free_count_minimum = vm_page_free_count; printf("vm_page_bootstrap: %d free pages and %d wired pages\n", @@ -615,7 +726,7 @@ pmap_steal_memory( addr = virtual_space_start; virtual_space_start += size; - kprintf("pmap_steal_memory: %08X - %08X; size=%08X\n", addr, virtual_space_start, size); /* (TEST/DEBUG) */ + kprintf("pmap_steal_memory: %08lX - %08lX; size=%08lX\n", (long)addr, (long)virtual_space_start, (long)size); /* (TEST/DEBUG) */ /* * Allocate and map physical pages to back new virtual pages. @@ -624,13 +735,21 @@ pmap_steal_memory( for (vaddr = round_page(addr); vaddr < addr + size; vaddr += PAGE_SIZE) { +#if defined(__LP64__) + if (!pmap_next_page_k64(&phys_page)) +#else if (!pmap_next_page(&phys_page)) +#endif + panic("pmap_steal_memory"); /* * XXX Logically, these mappings should be wired, * but some pmap modules barf if they are. */ +#if defined(__LP64__) + pmap_pre_expand(kernel_pmap, vaddr); +#endif pmap_enter(kernel_pmap, vaddr, phys_page, VM_PROT_READ|VM_PROT_WRITE, @@ -662,7 +781,7 @@ pmap_startup( */ tmpaddr = (addr64_t)pmap_free_pages() * (addr64_t)PAGE_SIZE; /* Get the amount of memory left */ - tmpaddr = tmpaddr + (addr64_t)(round_page_32(virtual_space_start) - virtual_space_start); /* Account for any slop */ + tmpaddr = tmpaddr + (addr64_t)(round_page(virtual_space_start) - virtual_space_start); /* Account for any slop */ npages = (unsigned int)(tmpaddr / (addr64_t)(PAGE_SIZE + sizeof(*vm_pages))); /* Figure size of all vm_page_ts, including enough to hold the vm_page_ts */ vm_pages = (vm_page_t) pmap_steal_memory(npages * sizeof *vm_pages); @@ -791,7 +910,7 @@ pmap_startup( * because pmap_steal_memory has been using it. */ - virtual_space_start = round_page_32(virtual_space_start); + virtual_space_start = round_page(virtual_space_start); *startp = virtual_space_start; *endp = virtual_space_end; @@ -825,7 +944,7 @@ vm_page_module_init(void) vm_page_zone->count += vm_page_pages; vm_page_zone->cur_size += vm_page_pages * vm_page_zone->elem_size; - mutex_init(&vm_page_alloc_lock, 0); + lck_mtx_init(&vm_page_alloc_lock, &vm_page_lck_grp_alloc, &vm_page_lck_attr); } /* @@ -866,7 +985,7 @@ vm_page_create( * NOTE: The bucket count must be a power of 2 */ #define vm_page_hash(object, offset) (\ - ( (natural_t)((uint32_t)object * vm_page_bucket_hash) + ((uint32_t)atop_64(offset) ^ vm_page_bucket_hash))\ + ( (natural_t)((uintptr_t)object * vm_page_bucket_hash) + ((uint32_t)atop_64(offset) ^ vm_page_bucket_hash))\ & vm_page_hash_mask) @@ -884,22 +1003,24 @@ vm_page_insert( vm_object_t object, vm_object_offset_t offset) { - vm_page_insert_internal(mem, object, offset, FALSE); + vm_page_insert_internal(mem, object, offset, FALSE, TRUE); } - void vm_page_insert_internal( vm_page_t mem, vm_object_t object, vm_object_offset_t offset, - boolean_t queues_lock_held) + boolean_t queues_lock_held, + boolean_t insert_in_hash) { - register vm_page_bucket_t *bucket; + vm_page_bucket_t *bucket; + lck_spin_t *bucket_lock; + int hash_id; XPR(XPR_VM_PAGE, "vm_page_insert, object 0x%X offset 0x%X page 0x%X\n", - (integer_t)object, (integer_t)offset, (integer_t)mem, 0,0); + object, offset, mem, 0,0); VM_PAGE_CHECK(mem); @@ -910,40 +1031,51 @@ vm_page_insert_internal( vm_object_lock_assert_exclusive(object); #if DEBUG - if (mem->tabled || mem->object != VM_OBJECT_NULL) - panic("vm_page_insert: page %p for (obj=%p,off=0x%llx) " - "already in (obj=%p,off=0x%llx)", - mem, object, offset, mem->object, mem->offset); + lck_mtx_assert(&vm_page_queue_lock, + queues_lock_held ? LCK_MTX_ASSERT_OWNED + : LCK_MTX_ASSERT_NOTOWNED); +#endif /* DEBUG */ + + if (insert_in_hash == TRUE) { +#if DEBUG + if (mem->tabled || mem->object != VM_OBJECT_NULL) + panic("vm_page_insert: page %p for (obj=%p,off=0x%llx) " + "already in (obj=%p,off=0x%llx)", + mem, object, offset, mem->object, mem->offset); #endif - assert(!object->internal || offset < object->size); - - /* only insert "pageout" pages into "pageout" objects, - * and normal pages into normal objects */ - assert(object->pageout == mem->pageout); + assert(!object->internal || offset < object->size); - assert(vm_page_lookup(object, offset) == VM_PAGE_NULL); + /* only insert "pageout" pages into "pageout" objects, + * and normal pages into normal objects */ + assert(object->pageout == mem->pageout); - /* - * Record the object/offset pair in this page - */ + assert(vm_page_lookup(object, offset) == VM_PAGE_NULL); + + /* + * Record the object/offset pair in this page + */ - mem->object = object; - mem->offset = offset; + mem->object = object; + mem->offset = offset; - /* - * Insert it into the object_object/offset hash table - */ + /* + * Insert it into the object_object/offset hash table + */ + hash_id = vm_page_hash(object, offset); + bucket = &vm_page_buckets[hash_id]; + bucket_lock = &vm_page_bucket_locks[hash_id / BUCKETS_PER_LOCK]; + + lck_spin_lock(bucket_lock); - bucket = &vm_page_buckets[vm_page_hash(object, offset)]; - simple_lock(&vm_page_bucket_lock); - mem->next = bucket->pages; - bucket->pages = mem; + mem->next = bucket->pages; + bucket->pages = mem; #if MACH_PAGE_HASH_STATS - if (++bucket->cur_count > bucket->hi_count) - bucket->hi_count = bucket->cur_count; + if (++bucket->cur_count > bucket->hi_count) + bucket->hi_count = bucket->cur_count; #endif /* MACH_PAGE_HASH_STATS */ - simple_unlock(&vm_page_bucket_lock); + lck_spin_unlock(bucket_lock); + } /* * Now link into the object's list of backed pages. */ @@ -956,19 +1088,31 @@ vm_page_insert_internal( */ object->resident_page_count++; + if (VM_PAGE_WIRED(mem)) { + object->wired_page_count++; + } + assert(object->resident_page_count >= object->wired_page_count); - if (object->purgable == VM_PURGABLE_VOLATILE) { - if (queues_lock_held == FALSE) - vm_page_lockspin_queues(); - - vm_page_purgeable_count++; + assert(!mem->reusable); - if (queues_lock_held == FALSE) - vm_page_unlock_queues(); + if (object->purgable == VM_PURGABLE_VOLATILE) { + if (VM_PAGE_WIRED(mem)) { + OSAddAtomic(1, &vm_page_purgeable_wired_count); + } else { + OSAddAtomic(1, &vm_page_purgeable_count); + } } else if (object->purgable == VM_PURGABLE_EMPTY && mem->throttled) { + /* + * This page belongs to a purged VM object but hasn't + * been purged (because it was "busy"). + * It's in the "throttled" queue and hence not + * visible to vm_pageout_scan(). Move it to a pageable + * queue, so that it can eventually be reclaimed, instead + * of lingering in the "empty" object. + */ if (queues_lock_held == FALSE) - vm_page_lock_queues(); + vm_page_lockspin_queues(); vm_page_deactivate(mem); if (queues_lock_held == FALSE) vm_page_unlock_queues(); @@ -981,9 +1125,8 @@ vm_page_insert_internal( * Exactly like vm_page_insert, except that we first * remove any existing page at the given offset in object. * - * The object and page queues must be locked. + * The object must be locked. */ - void vm_page_replace( register vm_page_t mem, @@ -992,16 +1135,17 @@ vm_page_replace( { vm_page_bucket_t *bucket; vm_page_t found_m = VM_PAGE_NULL; + lck_spin_t *bucket_lock; + int hash_id; VM_PAGE_CHECK(mem); vm_object_lock_assert_exclusive(object); #if DEBUG - _mutex_assert(&vm_page_queue_lock, MA_OWNED); - if (mem->tabled || mem->object != VM_OBJECT_NULL) panic("vm_page_replace: page %p for (obj=%p,off=0x%llx) " "already in (obj=%p,off=0x%llx)", mem, object, offset, mem->object, mem->offset); + lck_mtx_assert(&vm_page_queue_lock, LCK_MTX_ASSERT_NOTOWNED); #endif /* * Record the object/offset pair in this page @@ -1015,12 +1159,15 @@ vm_page_replace( * replacing any page that might have been there. */ - bucket = &vm_page_buckets[vm_page_hash(object, offset)]; - simple_lock(&vm_page_bucket_lock); + hash_id = vm_page_hash(object, offset); + bucket = &vm_page_buckets[hash_id]; + bucket_lock = &vm_page_bucket_locks[hash_id / BUCKETS_PER_LOCK]; + + lck_spin_lock(bucket_lock); if (bucket->pages) { vm_page_t *mp = &bucket->pages; - register vm_page_t m = *mp; + vm_page_t m = *mp; do { if (m->object == object && m->offset == offset) { @@ -1044,7 +1191,7 @@ vm_page_replace( */ bucket->pages = mem; - simple_unlock(&vm_page_bucket_lock); + lck_spin_unlock(bucket_lock); if (found_m) { /* @@ -1052,45 +1199,9 @@ vm_page_replace( * offset for this object... remove it from * the object and free it back to the free list */ - VM_PAGE_REMOVE(found_m); - found_m->tabled = FALSE; - - found_m->object = VM_OBJECT_NULL; - found_m->offset = (vm_object_offset_t) -1; - object->resident_page_count--; - - if (object->purgable == VM_PURGABLE_VOLATILE) { - assert(vm_page_purgeable_count > 0); - vm_page_purgeable_count--; - } - - /* - * Return page to the free list. - * Note the page is not tabled now - */ - vm_page_free(found_m); - } - /* - * Now link into the object's list of backed pages. - */ - - VM_PAGE_INSERT(mem, object); - mem->tabled = TRUE; - - /* - * And show that the object has one more resident - * page. - */ - - object->resident_page_count++; - - if (object->purgable == VM_PURGABLE_VOLATILE) { - vm_page_purgeable_count++; - } else if (object->purgable == VM_PURGABLE_EMPTY) { - if (mem->throttled) { - vm_page_deactivate(mem); - } + vm_page_free_unlocked(found_m, FALSE); } + vm_page_insert_internal(mem, object, offset, FALSE, FALSE); } /* @@ -1099,53 +1210,58 @@ vm_page_replace( * Removes the given mem entry from the object/offset-page * table and the object page list. * - * The object and page queues must be locked. + * The object must be locked. */ void vm_page_remove( - register vm_page_t mem) + vm_page_t mem, + boolean_t remove_from_hash) { - register vm_page_bucket_t *bucket; - register vm_page_t this; + vm_page_bucket_t *bucket; + vm_page_t this; + lck_spin_t *bucket_lock; + int hash_id; XPR(XPR_VM_PAGE, "vm_page_remove, object 0x%X offset 0x%X page 0x%X\n", - (integer_t)mem->object, (integer_t)mem->offset, - (integer_t)mem, 0,0); -#if DEBUG - _mutex_assert(&vm_page_queue_lock, MA_OWNED); -#endif + mem->object, mem->offset, + mem, 0,0); + vm_object_lock_assert_exclusive(mem->object); assert(mem->tabled); assert(!mem->cleaning); VM_PAGE_CHECK(mem); + if (remove_from_hash == TRUE) { + /* + * Remove from the object_object/offset hash table + */ + hash_id = vm_page_hash(mem->object, mem->offset); + bucket = &vm_page_buckets[hash_id]; + bucket_lock = &vm_page_bucket_locks[hash_id / BUCKETS_PER_LOCK]; - /* - * Remove from the object_object/offset hash table - */ + lck_spin_lock(bucket_lock); - bucket = &vm_page_buckets[vm_page_hash(mem->object, mem->offset)]; - simple_lock(&vm_page_bucket_lock); - if ((this = bucket->pages) == mem) { - /* optimize for common case */ + if ((this = bucket->pages) == mem) { + /* optimize for common case */ - bucket->pages = mem->next; - } else { - register vm_page_t *prev; + bucket->pages = mem->next; + } else { + vm_page_t *prev; - for (prev = &this->next; - (this = *prev) != mem; - prev = &this->next) - continue; - *prev = this->next; - } + for (prev = &this->next; + (this = *prev) != mem; + prev = &this->next) + continue; + *prev = this->next; + } #if MACH_PAGE_HASH_STATS - bucket->cur_count--; + bucket->cur_count--; #endif /* MACH_PAGE_HASH_STATS */ - simple_unlock(&vm_page_bucket_lock); + lck_spin_unlock(bucket_lock); + } /* * Now remove from the object's list of backed pages. */ @@ -1157,17 +1273,42 @@ vm_page_remove( * page. */ + assert(mem->object->resident_page_count > 0); mem->object->resident_page_count--; + if (VM_PAGE_WIRED(mem)) { + assert(mem->object->wired_page_count > 0); + mem->object->wired_page_count--; + } + assert(mem->object->resident_page_count >= + mem->object->wired_page_count); + if (mem->reusable) { + assert(mem->object->reusable_page_count > 0); + mem->object->reusable_page_count--; + assert(mem->object->reusable_page_count <= + mem->object->resident_page_count); + mem->reusable = FALSE; + OSAddAtomic(-1, &vm_page_stats_reusable.reusable_count); + vm_page_stats_reusable.reused_remove++; + } else if (mem->object->all_reusable) { + OSAddAtomic(-1, &vm_page_stats_reusable.reusable_count); + vm_page_stats_reusable.reused_remove++; + } if (mem->object->purgable == VM_PURGABLE_VOLATILE) { - assert(vm_page_purgeable_count > 0); - vm_page_purgeable_count--; + if (VM_PAGE_WIRED(mem)) { + assert(vm_page_purgeable_wired_count > 0); + OSAddAtomic(-1, &vm_page_purgeable_wired_count); + } else { + assert(vm_page_purgeable_count > 0); + OSAddAtomic(-1, &vm_page_purgeable_count); + } } mem->tabled = FALSE; mem->object = VM_OBJECT_NULL; mem->offset = (vm_object_offset_t) -1; } + /* * vm_page_lookup: * @@ -1187,12 +1328,14 @@ unsigned long vm_page_lookup_miss = 0; vm_page_t vm_page_lookup( - register vm_object_t object, - register vm_object_offset_t offset) + vm_object_t object, + vm_object_offset_t offset) { - register vm_page_t mem; - register vm_page_bucket_t *bucket; - queue_entry_t qe; + vm_page_t mem; + vm_page_bucket_t *bucket; + queue_entry_t qe; + lck_spin_t *bucket_lock; + int hash_id; vm_object_lock_assert_held(object); mem = object->memq_hint; @@ -1236,7 +1379,8 @@ vm_page_lookup( /* * Search the hash table for this object/offset pair */ - bucket = &vm_page_buckets[vm_page_hash(object, offset)]; + hash_id = vm_page_hash(object, offset); + bucket = &vm_page_buckets[hash_id]; /* * since we hold the object lock, we are guaranteed that no @@ -1251,14 +1395,16 @@ vm_page_lookup( return (VM_PAGE_NULL); } - simple_lock(&vm_page_bucket_lock); + bucket_lock = &vm_page_bucket_locks[hash_id / BUCKETS_PER_LOCK]; + + lck_spin_lock(bucket_lock); for (mem = bucket->pages; mem != VM_PAGE_NULL; mem = mem->next) { VM_PAGE_CHECK(mem); if ((mem->object == object) && (mem->offset == offset)) break; } - simple_unlock(&vm_page_bucket_lock); + lck_spin_unlock(bucket_lock); if (mem != VM_PAGE_NULL) { if (object->memq_hint != VM_PAGE_NULL) { @@ -1307,19 +1453,20 @@ vm_page_rename( panic("vm_page_rename: page %p is encrypted\n", mem); } + XPR(XPR_VM_PAGE, + "vm_page_rename, new object 0x%X, offset 0x%X page 0x%X\n", + new_object, new_offset, + mem, 0,0); + /* * Changes to mem->object require the page lock because * the pageout daemon uses that lock to get the object. */ + vm_page_lockspin_queues(); - XPR(XPR_VM_PAGE, - "vm_page_rename, new object 0x%X, offset 0x%X page 0x%X\n", - (integer_t)new_object, (integer_t)new_offset, - (integer_t)mem, 0,0); + vm_page_remove(mem, TRUE); + vm_page_insert_internal(mem, new_object, new_offset, TRUE, TRUE); - vm_page_lockspin_queues(); - vm_page_remove(mem); - vm_page_insert(mem, new_object, new_offset); vm_page_unlock_queues(); } @@ -1350,11 +1497,11 @@ int c_vm_page_grab_fictitious = 0; int c_vm_page_release_fictitious = 0; int c_vm_page_more_fictitious = 0; -extern vm_page_t vm_page_grab_fictitious_common(vm_offset_t phys_addr); +extern vm_page_t vm_page_grab_fictitious_common(ppnum_t phys_addr); vm_page_t vm_page_grab_fictitious_common( - vm_offset_t phys_addr) + ppnum_t phys_addr) { register vm_page_t m; @@ -1451,7 +1598,7 @@ void vm_page_more_fictitious(void) * If winner is not vm-privileged, then the page allocation will fail, * and it will temporarily block here in the vm_page_wait(). */ - mutex_lock(&vm_page_alloc_lock); + lck_mtx_lock(&vm_page_alloc_lock); /* * If another thread allocated space, just bail out now. */ @@ -1468,7 +1615,7 @@ void vm_page_more_fictitious(void) * of fictitious pages required in this manner is 2. 5 is * simply a somewhat larger number. */ - mutex_unlock(&vm_page_alloc_lock); + lck_mtx_unlock(&vm_page_alloc_lock); return; } @@ -1481,7 +1628,7 @@ void vm_page_more_fictitious(void) * lock to give another thread a chance at it, and * wait for the pageout daemon to make progress. */ - mutex_unlock(&vm_page_alloc_lock); + lck_mtx_unlock(&vm_page_alloc_lock); vm_page_wait(THREAD_UNINT); return; } @@ -1497,7 +1644,7 @@ void vm_page_more_fictitious(void) m++; } zcram(vm_page_zone, (void *) addr, PAGE_SIZE); - mutex_unlock(&vm_page_alloc_lock); + lck_mtx_unlock(&vm_page_alloc_lock); } @@ -1512,7 +1659,7 @@ int vm_pool_low(void) { /* No locking, at worst we will fib. */ - return( vm_page_free_count < vm_page_free_reserved ); + return( vm_page_free_count <= vm_page_free_reserved ); } @@ -1542,7 +1689,7 @@ vm_page_grablo(void) if (vm_lopage_poolsize == 0) return (vm_page_grab()); - mutex_lock(&vm_page_queue_free_lock); + lck_mtx_lock_spin(&vm_page_queue_free_lock); if (! queue_empty(&vm_lopage_queue_free)) { queue_remove_first(&vm_lopage_queue_free, @@ -1565,7 +1712,7 @@ vm_page_grablo(void) } else { mem = VM_PAGE_NULL; } - mutex_unlock(&vm_page_queue_free_lock); + lck_mtx_unlock(&vm_page_queue_free_lock); return (mem); } @@ -1627,8 +1774,6 @@ vm_page_grab( void ) enable_preemption(); - mutex_lock(&vm_page_queue_free_lock); - /* * Optionally produce warnings if the wire or gobble * counts exceed some threshold. @@ -1646,13 +1791,15 @@ vm_page_grab( void ) assert(vm_page_gobble_count < vm_page_gobble_count_warning); } + lck_mtx_lock_spin(&vm_page_queue_free_lock); + /* * Only let privileged threads (involved in pageout) * dip into the reserved pool. */ if ((vm_page_free_count < vm_page_free_reserved) && !(current_thread()->options & TH_OPT_VMPRIV)) { - mutex_unlock(&vm_page_queue_free_lock); + lck_mtx_unlock(&vm_page_queue_free_lock); mem = VM_PAGE_NULL; } else { @@ -1663,7 +1810,7 @@ vm_page_grab( void ) while ( vm_page_free_count == 0 ) { - mutex_unlock(&vm_page_queue_free_lock); + lck_mtx_unlock(&vm_page_queue_free_lock); /* * must be a privileged thread to be * in this state since a non-privileged @@ -1671,13 +1818,13 @@ vm_page_grab( void ) * under the vm_page_free_reserved mark */ VM_PAGE_WAIT(); - mutex_lock(&vm_page_queue_free_lock); + lck_mtx_lock_spin(&vm_page_queue_free_lock); } disable_preemption(); if ((mem = PROCESSOR_DATA(current_processor(), free_pages))) { - mutex_unlock(&vm_page_queue_free_lock); + lck_mtx_unlock(&vm_page_queue_free_lock); /* * we got preempted and moved to another processor @@ -1743,7 +1890,7 @@ vm_page_grab( void ) mem = head; mem->pageq.next = NULL; - mutex_unlock(&vm_page_queue_free_lock); + lck_mtx_unlock(&vm_page_queue_free_lock); enable_preemption(); } @@ -1797,6 +1944,8 @@ vm_page_release( register vm_page_t mem) { unsigned int color; + int need_wakeup = 0; + int need_priv_wakeup = 0; #if 0 unsigned int pindex; phys_entry *physent; @@ -1808,10 +1957,13 @@ vm_page_release( physent->ppLink = physent->ppLink | ppN; /* (BRINGUP) */ #endif assert(!mem->private && !mem->fictitious); - + if (vm_page_free_verify) { + assert(pmap_verify_free(mem->phys_page)); + } // dbgLog(mem->phys_page, vm_page_free_count, vm_page_wire_count, 5); /* (TEST/DEBUG) */ - mutex_lock(&vm_page_queue_free_lock); + + lck_mtx_lock_spin(&vm_page_queue_free_lock); #if DEBUG if (mem->free) panic("vm_page_release"); @@ -1864,16 +2016,22 @@ vm_page_release( * as long as vm_page_free_wanted is non-zero. */ - if ((vm_page_free_wanted_privileged > 0) && vm_page_free_count) { + assert(vm_page_free_count > 0); + if (vm_page_free_wanted_privileged > 0) { vm_page_free_wanted_privileged--; - thread_wakeup_one((event_t) &vm_page_free_wanted_privileged); - } else if ((vm_page_free_wanted > 0) && - (vm_page_free_count >= vm_page_free_reserved)) { + need_priv_wakeup = 1; + } else if (vm_page_free_wanted > 0 && + vm_page_free_count > vm_page_free_reserved) { vm_page_free_wanted--; - thread_wakeup_one((event_t) &vm_page_free_count); + need_wakeup = 1; } } - mutex_unlock(&vm_page_queue_free_lock); + lck_mtx_unlock(&vm_page_queue_free_lock); + + if (need_priv_wakeup) + thread_wakeup_one((event_t) &vm_page_free_wanted_privileged); + else if (need_wakeup) + thread_wakeup_one((event_t) &vm_page_free_count); #if CONFIG_EMBEDDED { @@ -1922,10 +2080,10 @@ vm_page_wait( int need_wakeup = 0; int is_privileged = current_thread()->options & TH_OPT_VMPRIV; - mutex_lock(&vm_page_queue_free_lock); + lck_mtx_lock_spin(&vm_page_queue_free_lock); if (is_privileged && vm_page_free_count) { - mutex_unlock(&vm_page_queue_free_lock); + lck_mtx_unlock(&vm_page_queue_free_lock); return TRUE; } if (vm_page_free_count < vm_page_free_target) { @@ -1939,7 +2097,7 @@ vm_page_wait( need_wakeup = 1; wait_result = assert_wait((event_t)&vm_page_free_count, interruptible); } - mutex_unlock(&vm_page_queue_free_lock); + lck_mtx_unlock(&vm_page_queue_free_lock); counter(c_vm_page_wait_block++); if (need_wakeup) @@ -1950,7 +2108,7 @@ vm_page_wait( return(wait_result == THREAD_AWAKENED); } else { - mutex_unlock(&vm_page_queue_free_lock); + lck_mtx_unlock(&vm_page_queue_free_lock); return TRUE; } } @@ -2002,7 +2160,7 @@ vm_page_alloclo( /* * vm_page_alloc_guard: * - * Allocate a ficticious page which will be used + * Allocate a fictitious page which will be used * as a guard page. The page will be inserted into * the object and returned to the caller. */ @@ -2027,7 +2185,6 @@ vm_page_alloc_guard( counter(unsigned int c_laundry_pages_freed = 0;) -boolean_t vm_page_free_verify = TRUE; /* * vm_page_free: * @@ -2036,26 +2193,31 @@ boolean_t vm_page_free_verify = TRUE; * * Object and page queues must be locked prior to entry. */ -void +static void vm_page_free_prepare( register vm_page_t mem) +{ + vm_page_free_prepare_queues(mem); + vm_page_free_prepare_object(mem, TRUE); +} + + +void +vm_page_free_prepare_queues( + vm_page_t mem) { VM_PAGE_CHECK(mem); assert(!mem->free); assert(!mem->cleaning); assert(!mem->pageout); - #if DEBUG - if (vm_page_free_verify && !mem->fictitious && !mem->private) { - assert(pmap_verify_free(mem->phys_page)); - } - if (mem->object) - vm_object_lock_assert_exclusive(mem->object); - _mutex_assert(&vm_page_queue_lock, MA_OWNED); - + lck_mtx_assert(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED); if (mem->free) - panic("vm_page_free: freeing page on free list\n"); + panic("vm_page_free: freeing page on free list\n"); #endif + if (mem->object) { + vm_object_lock_assert_exclusive(mem->object); + } if (mem->laundry) { /* @@ -2063,18 +2225,20 @@ vm_page_free_prepare( * if we lost its pager (due to a forced unmount, for example). * We need to call vm_pageout_throttle_up() before removing * the page from its VM object, so that we can find out on - * which pageout queue the page is. + * which pageout queue the page is on. */ vm_pageout_throttle_up(mem); counter(++c_laundry_pages_freed); } - - if (mem->tabled) - vm_page_remove(mem); /* clears tabled, object, offset */ - - VM_PAGE_QUEUES_REMOVE(mem); /* clears active/inactive/throttled/speculative */ - - if (mem->wire_count) { + VM_PAGE_QUEUES_REMOVE(mem); /* clears local/active/inactive/throttled/speculative */ + + if (VM_PAGE_WIRED(mem)) { + if (mem->object) { + assert(mem->object->wired_page_count > 0); + mem->object->wired_page_count--; + assert(mem->object->resident_page_count >= + mem->object->wired_page_count); + } if (!mem->private && !mem->fictitious) vm_page_wire_count--; mem->wire_count = 0; @@ -2084,42 +2248,74 @@ vm_page_free_prepare( vm_page_wire_count--; vm_page_gobble_count--; } - mem->gobbled = FALSE; +} + + +void +vm_page_free_prepare_object( + vm_page_t mem, + boolean_t remove_from_hash) +{ + if (mem->object) { + vm_object_lock_assert_exclusive(mem->object); + } - PAGE_WAKEUP(mem); /* clears wanted */ + if (mem->tabled) + vm_page_remove(mem, remove_from_hash); /* clears tabled, object, offset */ - /* Some of these may be unnecessary */ - mem->busy = TRUE; - mem->absent = FALSE; - mem->error = FALSE; - mem->dirty = FALSE; - mem->precious = FALSE; - mem->reference = FALSE; - mem->encrypted = FALSE; - mem->encrypted_cleaning = FALSE; - mem->deactivated = FALSE; - mem->pmapped = FALSE; - mem->wpmapped = FALSE; + PAGE_WAKEUP(mem); /* clears wanted */ if (mem->private) { mem->private = FALSE; mem->fictitious = TRUE; mem->phys_page = vm_page_fictitious_addr; } - if (!mem->fictitious) { - if (mem->zero_fill == TRUE) { - mem->zero_fill = FALSE; - OSAddAtomic(-1, (SInt32 *)&vm_zf_count); - } + if (mem->fictitious) { + /* Some of these may be unnecessary */ + mem->gobbled = FALSE; + mem->busy = TRUE; + mem->absent = FALSE; + mem->error = FALSE; + mem->dirty = FALSE; + mem->precious = FALSE; + mem->reference = FALSE; + mem->encrypted = FALSE; + mem->encrypted_cleaning = FALSE; + mem->pmapped = FALSE; + mem->wpmapped = FALSE; + mem->reusable = FALSE; + } else { + if (mem->zero_fill == TRUE) + VM_ZF_COUNT_DECR(); vm_page_init(mem, mem->phys_page); } } + void vm_page_free( vm_page_t mem) { - vm_page_free_prepare(mem); + vm_page_free_prepare(mem); + if (mem->fictitious) { + vm_page_release_fictitious(mem); + } else { + vm_page_release(mem); + } +} + + +void +vm_page_free_unlocked( + vm_page_t mem, + boolean_t remove_from_hash) +{ + vm_page_lockspin_queues(); + vm_page_free_prepare_queues(mem); + vm_page_unlock_queues(); + + vm_page_free_prepare_object(mem, remove_from_hash); + if (mem->fictitious) { vm_page_release_fictitious(mem); } else { @@ -2130,15 +2326,14 @@ vm_page_free( /* * Free a list of pages. The list can be up to several hundred pages, * as blocked up by vm_pageout_scan(). - * The big win is not having to take the page q and free list locks once + * The big win is not having to take the free list lock once * per page. We sort the incoming pages into n lists, one for * each color. - * - * The page queues must be locked, and are kept locked. */ void vm_page_free_list( - vm_page_t mem) + vm_page_t mem, + boolean_t prepare_object) { vm_page_t nxt; int pg_count = 0; @@ -2152,31 +2347,39 @@ vm_page_free_list( queue_init(&free_list[color]); } -#if DEBUG - _mutex_assert(&vm_page_queue_lock, MA_OWNED); -#endif while (mem) { -#if DEBUG - if (mem->tabled || mem->object) - panic("vm_page_free_list: freeing tabled page\n"); - if (mem->inactive || mem->active || mem->throttled || mem->free) - panic("vm_page_free_list: freeing page on list\n"); + assert(!mem->inactive); + assert(!mem->active); + assert(!mem->throttled); + assert(!mem->free); + assert(!mem->speculative); + assert(mem->pageq.prev == NULL); + + nxt = (vm_page_t)(mem->pageq.next); + + if (prepare_object == TRUE) + vm_page_free_prepare_object(mem, TRUE); + if (vm_page_free_verify && !mem->fictitious && !mem->private) { assert(pmap_verify_free(mem->phys_page)); } -#endif - assert(mem->pageq.prev == NULL); assert(mem->busy); - assert(!mem->free); - nxt = (vm_page_t)(mem->pageq.next); if (!mem->fictitious) { if (mem->phys_page <= vm_lopage_poolend && mem->phys_page >= vm_lopage_poolstart) { mem->pageq.next = NULL; vm_page_release(mem); } else { - mem->free = TRUE; + /* + * IMPORTANT: we can't set the page "free" here + * because that would make the page eligible for + * a physically-contiguous allocation (see + * vm_page_find_contiguous()) right away (we don't + * hold the vm_page_queue_free lock). That would + * cause trouble because the page is not actually + * in the free queue yet... + */ color = mem->phys_page & vm_color_mask; if (queue_empty(&free_list[color])) { inuse[color] = inuse_list_head; @@ -2197,8 +2400,10 @@ vm_page_free_list( } if (pg_count) { unsigned int avail_free_count; + unsigned int need_wakeup = 0; + unsigned int need_priv_wakeup = 0; - mutex_lock(&vm_page_queue_free_lock); + lck_mtx_lock_spin(&vm_page_queue_free_lock); color = inuse_list_head; @@ -2206,10 +2411,27 @@ vm_page_free_list( vm_page_t first, last; vm_page_t first_free; + /* + * Now that we hold the vm_page_queue_free lock, + * it's safe to mark all pages in our local queue + * as "free"... + */ + queue_iterate(&free_list[color], + mem, + vm_page_t, + pageq) { + assert(!mem->free); + assert(mem->busy); + mem->free = TRUE; + } + + /* + * ... and insert our local queue at the head of + * the global free queue. + */ first = (vm_page_t) queue_first(&free_list[color]); last = (vm_page_t) queue_last(&free_list[color]); first_free = (vm_page_t) queue_first(&vm_page_queue_free[color]); - if (queue_empty(&vm_page_queue_free[color])) { queue_last(&vm_page_queue_free[color]) = (queue_entry_t) last; @@ -2223,41 +2445,67 @@ vm_page_free_list( (queue_entry_t) &vm_page_queue_free[color]; queue_next(&last->pageq) = (queue_entry_t) first_free; + + /* next color */ color = inuse[color]; } vm_page_free_count += pg_count; avail_free_count = vm_page_free_count; - while ((vm_page_free_wanted_privileged > 0) && avail_free_count) { - vm_page_free_wanted_privileged--; - avail_free_count--; - - thread_wakeup_one((event_t) &vm_page_free_wanted_privileged); + if (vm_page_free_wanted_privileged > 0 && + avail_free_count > 0) { + if (avail_free_count < vm_page_free_wanted_privileged) { + need_priv_wakeup = avail_free_count; + vm_page_free_wanted_privileged -= + avail_free_count; + avail_free_count = 0; + } else { + need_priv_wakeup = vm_page_free_wanted_privileged; + vm_page_free_wanted_privileged = 0; + avail_free_count -= + vm_page_free_wanted_privileged; + } } - if ((vm_page_free_wanted > 0) && - (avail_free_count >= vm_page_free_reserved)) { + if (vm_page_free_wanted > 0 && + avail_free_count > vm_page_free_reserved) { unsigned int available_pages; - if (avail_free_count >= vm_page_free_reserved) { - available_pages = (avail_free_count - vm_page_free_reserved); - } else { - available_pages = 0; - } + available_pages = (avail_free_count - + vm_page_free_reserved); if (available_pages >= vm_page_free_wanted) { + need_wakeup = vm_page_free_wanted; vm_page_free_wanted = 0; - thread_wakeup((event_t) &vm_page_free_count); } else { - while (available_pages--) { - vm_page_free_wanted--; - thread_wakeup_one((event_t) &vm_page_free_count); - } + need_wakeup = available_pages; + vm_page_free_wanted -= available_pages; } } - mutex_unlock(&vm_page_queue_free_lock); + lck_mtx_unlock(&vm_page_queue_free_lock); + if (need_priv_wakeup != 0) { + /* + * There shouldn't be that many VM-privileged threads, + * so let's wake them all up, even if we don't quite + * have enough pages to satisfy them all. + */ + thread_wakeup((event_t)&vm_page_free_wanted_privileged); + } + if (need_wakeup != 0 && vm_page_free_wanted == 0) { + /* + * We don't expect to have any more waiters + * after this, so let's wake them all up at + * once. + */ + thread_wakeup((event_t) &vm_page_free_count); + } else for (; need_wakeup != 0; need_wakeup--) { + /* + * Wake up one waiter per page we just released. + */ + thread_wakeup_one((event_t) &vm_page_free_count); + } #if CONFIG_EMBEDDED { int percent_avail; @@ -2297,13 +2545,54 @@ vm_page_wire( // dbgLog(current_thread(), mem->offset, mem->object, 1); /* (TEST/DEBUG) */ VM_PAGE_CHECK(mem); + if (mem->object) { + vm_object_lock_assert_exclusive(mem->object); + } else { + /* + * In theory, the page should be in an object before it + * gets wired, since we need to hold the object lock + * to update some fields in the page structure. + * However, some code (i386 pmap, for example) might want + * to wire a page before it gets inserted into an object. + * That's somewhat OK, as long as nobody else can get to + * that page and update it at the same time. + */ + } #if DEBUG - if (mem->object) - vm_object_lock_assert_exclusive(mem->object); - _mutex_assert(&vm_page_queue_lock, MA_OWNED); + lck_mtx_assert(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED); #endif - if (mem->wire_count == 0) { + if ( !VM_PAGE_WIRED(mem)) { VM_PAGE_QUEUES_REMOVE(mem); + + if (mem->object) { + mem->object->wired_page_count++; + assert(mem->object->resident_page_count >= + mem->object->wired_page_count); + if (mem->object->purgable == VM_PURGABLE_VOLATILE) { + assert(vm_page_purgeable_count > 0); + OSAddAtomic(-1, &vm_page_purgeable_count); + OSAddAtomic(1, &vm_page_purgeable_wired_count); + } + if (mem->object->all_reusable) { + /* + * Wired pages are not counted as "re-usable" + * in "all_reusable" VM objects, so nothing + * to do here. + */ + } else if (mem->reusable) { + /* + * This page is not "re-usable" when it's + * wired, so adjust its state and the + * accounting. + */ + vm_object_reuse_pages(mem->object, + mem->offset, + mem->offset+PAGE_SIZE_64, + FALSE); + } + } + assert(!mem->reusable); + if (!mem->private && !mem->fictitious && !mem->gobbled) vm_page_wire_count++; if (mem->gobbled) @@ -2311,7 +2600,7 @@ vm_page_wire( mem->gobbled = FALSE; if (mem->zero_fill == TRUE) { mem->zero_fill = FALSE; - OSAddAtomic(-1, (SInt32 *)&vm_zf_count); + VM_ZF_COUNT_DECR(); } #if CONFIG_EMBEDDED { @@ -2343,6 +2632,7 @@ vm_page_wire( } assert(!mem->gobbled); mem->wire_count++; + VM_PAGE_CHECK(mem); } /* @@ -2360,9 +2650,9 @@ vm_page_gobble( VM_PAGE_CHECK(mem); assert(!mem->gobbled); - assert(mem->wire_count == 0); + assert( !VM_PAGE_WIRED(mem)); - if (!mem->gobbled && mem->wire_count == 0) { + if (!mem->gobbled && !VM_PAGE_WIRED(mem)) { if (!mem->private && !mem->fictitious) vm_page_wire_count++; } @@ -2387,15 +2677,24 @@ vm_page_unwire( // dbgLog(current_thread(), mem->offset, mem->object, 0); /* (TEST/DEBUG) */ VM_PAGE_CHECK(mem); - assert(mem->wire_count > 0); + assert(VM_PAGE_WIRED(mem)); + assert(mem->object != VM_OBJECT_NULL); #if DEBUG - if (mem->object) - vm_object_lock_assert_exclusive(mem->object); - _mutex_assert(&vm_page_queue_lock, MA_OWNED); + vm_object_lock_assert_exclusive(mem->object); + lck_mtx_assert(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED); #endif if (--mem->wire_count == 0) { assert(!mem->private && !mem->fictitious); vm_page_wire_count--; + assert(mem->object->wired_page_count > 0); + mem->object->wired_page_count--; + assert(mem->object->resident_page_count >= + mem->object->wired_page_count); + if (mem->object->purgable == VM_PURGABLE_VOLATILE) { + OSAddAtomic(+1, &vm_page_purgeable_count); + assert(vm_page_purgeable_wired_count > 0); + OSAddAtomic(-1, &vm_page_purgeable_wired_count); + } assert(!mem->laundry); assert(mem->object != kernel_object); assert(mem->pageq.next == NULL && mem->pageq.prev == NULL); @@ -2423,9 +2722,9 @@ vm_page_unwire( } #endif } + VM_PAGE_CHECK(mem); } - /* * vm_page_deactivate: * @@ -2437,9 +2736,17 @@ vm_page_unwire( */ void vm_page_deactivate( - register vm_page_t m) + vm_page_t m) +{ + vm_page_deactivate_internal(m, TRUE); +} + + +void +vm_page_deactivate_internal( + vm_page_t m, + boolean_t clear_hw_reference) { - boolean_t rapid_age = FALSE; VM_PAGE_CHECK(m); assert(m->object != kernel_object); @@ -2447,7 +2754,7 @@ vm_page_deactivate( // dbgLog(m->phys_page, vm_page_free_count, vm_page_wire_count, 6); /* (TEST/DEBUG) */ #if DEBUG - _mutex_assert(&vm_page_queue_lock, MA_OWNED); + lck_mtx_assert(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED); #endif /* * This page is no longer very interesting. If it was @@ -2457,25 +2764,20 @@ vm_page_deactivate( * their reference bit cleared. */ if (m->gobbled) { /* can this happen? */ - assert(m->wire_count == 0); + assert( !VM_PAGE_WIRED(m)); if (!m->private && !m->fictitious) vm_page_wire_count--; vm_page_gobble_count--; m->gobbled = FALSE; } - if (m->private || (m->wire_count != 0)) + if (m->private || (VM_PAGE_WIRED(m))) return; - if (m->active && m->deactivated == TRUE) { - if (!pmap_is_referenced(m->phys_page)) - rapid_age = TRUE; - } - if (rapid_age == FALSE && !m->fictitious && !m->absent) + if (!m->fictitious && !m->absent && clear_hw_reference == TRUE) pmap_clear_reference(m->phys_page); m->reference = FALSE; - m->deactivated = FALSE; m->no_cache = FALSE; if (!m->inactive) { @@ -2493,10 +2795,11 @@ vm_page_deactivate( m->throttled = TRUE; vm_page_throttled_count++; } else { - if (rapid_age == TRUE || - (!m->fictitious && m->object->named && m->object->ref_count == 1)) { + if (!m->fictitious && m->object->named && m->object->ref_count == 1) { vm_page_speculate(m, FALSE); +#if DEVELOPMENT || DEBUG vm_page_speculative_recreated++; +#endif return; } else { if (m->zero_fill) { @@ -2533,10 +2836,10 @@ vm_page_activate( #endif assert(m->phys_page != vm_page_guard_addr); #if DEBUG - _mutex_assert(&vm_page_queue_lock, MA_OWNED); + lck_mtx_assert(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED); #endif if (m->gobbled) { - assert(m->wire_count == 0); + assert( !VM_PAGE_WIRED(m)); if (!m->private && !m->fictitious) vm_page_wire_count--; vm_page_gobble_count--; @@ -2557,7 +2860,7 @@ vm_page_activate( VM_PAGE_QUEUES_REMOVE(m); - if (m->wire_count == 0) { + if ( !VM_PAGE_WIRED(m)) { assert(!m->laundry); assert(m->pageq.next == NULL && m->pageq.prev == NULL); if (!IP_VALID(memory_manager_default) && @@ -2577,6 +2880,7 @@ vm_page_activate( m->reference = TRUE; m->no_cache = FALSE; } + VM_PAGE_CHECK(m); } @@ -2596,16 +2900,21 @@ vm_page_speculate( VM_PAGE_CHECK(m); assert(m->object != kernel_object); - assert(!m->speculative && !m->active && !m->inactive && !m->throttled); assert(m->phys_page != vm_page_guard_addr); - assert(m->pageq.next == NULL && m->pageq.prev == NULL); #if DEBUG - _mutex_assert(&vm_page_queue_lock, MA_OWNED); + lck_mtx_assert(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED); #endif - if (m->wire_count == 0) { + + VM_PAGE_QUEUES_REMOVE(m); + + if ( !VM_PAGE_WIRED(m)) { mach_timespec_t ts; + clock_sec_t sec; + clock_nsec_t nsec; - clock_get_system_nanotime(&ts.tv_sec, (unsigned *)&ts.tv_nsec); + clock_get_system_nanotime(&sec, &nsec); + ts.tv_sec = (unsigned int) sec; + ts.tv_nsec = nsec; if (vm_page_speculative_count == 0) { @@ -2653,9 +2962,12 @@ vm_page_speculate( if (new == TRUE) { m->object->pages_created++; +#if DEVELOPMENT || DEBUG vm_page_speculative_created++; +#endif } } + VM_PAGE_CHECK(m); } @@ -2707,12 +3019,12 @@ vm_page_lru( assert(m->phys_page != vm_page_guard_addr); #if DEBUG - _mutex_assert(&vm_page_queue_lock, MA_OWNED); + lck_mtx_assert(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED); #endif if (m->active || m->reference) return; - if (m->private || (m->wire_count != 0)) + if (m->private || (VM_PAGE_WIRED(m))) return; m->no_cache = FALSE; @@ -2730,6 +3042,159 @@ vm_page_lru( } +void +vm_page_reactivate_all_throttled(void) +{ + vm_page_t first_throttled, last_throttled; + vm_page_t first_active; + vm_page_t m; + int extra_active_count; + + extra_active_count = 0; + vm_page_lock_queues(); + if (! queue_empty(&vm_page_queue_throttled)) { + /* + * Switch "throttled" pages to "active". + */ + queue_iterate(&vm_page_queue_throttled, m, vm_page_t, pageq) { + VM_PAGE_CHECK(m); + assert(m->throttled); + assert(!m->active); + assert(!m->inactive); + assert(!m->speculative); + assert(!VM_PAGE_WIRED(m)); + if (!m->fictitious) { + extra_active_count++; + } + m->throttled = FALSE; + m->active = TRUE; + VM_PAGE_CHECK(m); + } + + /* + * Transfer the entire throttled queue to a regular LRU page queues. + * We insert it at the head of the active queue, so that these pages + * get re-evaluated by the LRU algorithm first, since they've been + * completely out of it until now. + */ + first_throttled = (vm_page_t) queue_first(&vm_page_queue_throttled); + last_throttled = (vm_page_t) queue_last(&vm_page_queue_throttled); + first_active = (vm_page_t) queue_first(&vm_page_queue_active); + if (queue_empty(&vm_page_queue_active)) { + queue_last(&vm_page_queue_active) = (queue_entry_t) last_throttled; + } else { + queue_prev(&first_active->pageq) = (queue_entry_t) last_throttled; + } + queue_first(&vm_page_queue_active) = (queue_entry_t) first_throttled; + queue_prev(&first_throttled->pageq) = (queue_entry_t) &vm_page_queue_active; + queue_next(&last_throttled->pageq) = (queue_entry_t) first_active; + +#if DEBUG + printf("reactivated %d throttled pages\n", vm_page_throttled_count); +#endif + queue_init(&vm_page_queue_throttled); + /* + * Adjust the global page counts. + */ + vm_page_active_count += extra_active_count; + vm_page_throttled_count = 0; + } + assert(vm_page_throttled_count == 0); + assert(queue_empty(&vm_page_queue_throttled)); + vm_page_unlock_queues(); +} + + +/* + * move pages from the indicated local queue to the global active queue + * its ok to fail if we're below the hard limit and force == FALSE + * the nolocks == TRUE case is to allow this function to be run on + * the hibernate path + */ + +void +vm_page_reactivate_local(uint32_t lid, boolean_t force, boolean_t nolocks) +{ + struct vpl *lq; + vm_page_t first_local, last_local; + vm_page_t first_active; + vm_page_t m; + uint32_t count = 0; + + if (vm_page_local_q == NULL) + return; + + lq = &vm_page_local_q[lid].vpl_un.vpl; + + if (nolocks == FALSE) { + if (lq->vpl_count < vm_page_local_q_hard_limit && force == FALSE) { + if ( !vm_page_trylockspin_queues()) + return; + } else + vm_page_lockspin_queues(); + + VPL_LOCK(&lq->vpl_lock); + } + if (lq->vpl_count) { + /* + * Switch "local" pages to "active". + */ + assert(!queue_empty(&lq->vpl_queue)); + + queue_iterate(&lq->vpl_queue, m, vm_page_t, pageq) { + VM_PAGE_CHECK(m); + assert(m->local); + assert(!m->active); + assert(!m->inactive); + assert(!m->speculative); + assert(!VM_PAGE_WIRED(m)); + assert(!m->throttled); + assert(!m->fictitious); + + if (m->local_id != lid) + panic("vm_page_reactivate_local: found vm_page_t(%p) with wrong cpuid", m); + + m->local_id = 0; + m->local = FALSE; + m->active = TRUE; + VM_PAGE_CHECK(m); + + count++; + } + if (count != lq->vpl_count) + panic("vm_page_reactivate_local: count = %d, vm_page_local_count = %d\n", count, lq->vpl_count); + + /* + * Transfer the entire local queue to a regular LRU page queues. + */ + first_local = (vm_page_t) queue_first(&lq->vpl_queue); + last_local = (vm_page_t) queue_last(&lq->vpl_queue); + first_active = (vm_page_t) queue_first(&vm_page_queue_active); + + if (queue_empty(&vm_page_queue_active)) { + queue_last(&vm_page_queue_active) = (queue_entry_t) last_local; + } else { + queue_prev(&first_active->pageq) = (queue_entry_t) last_local; + } + queue_first(&vm_page_queue_active) = (queue_entry_t) first_local; + queue_prev(&first_local->pageq) = (queue_entry_t) &vm_page_queue_active; + queue_next(&last_local->pageq) = (queue_entry_t) first_active; + + queue_init(&lq->vpl_queue); + /* + * Adjust the global page counts. + */ + vm_page_active_count += lq->vpl_count; + lq->vpl_count = 0; + } + assert(queue_empty(&lq->vpl_queue)); + + if (nolocks == FALSE) { + VPL_UNLOCK(&lq->vpl_lock); + vm_page_unlock_queues(); + } +} + /* * vm_page_part_zero_fill: * @@ -2764,9 +3229,7 @@ vm_page_part_zero_fill( m_pa + len, PAGE_SIZE - (m_pa + len)); } vm_page_copy(tmp,m); - vm_page_lock_queues(); - vm_page_free(tmp); - vm_page_unlock_queues(); + VM_PAGE_FREE(tmp); #endif } @@ -2782,7 +3245,7 @@ vm_page_zero_fill( { XPR(XPR_VM_PAGE, "vm_page_zero_fill, object 0x%X offset 0x%X page 0x%X\n", - (integer_t)m->object, (integer_t)m->offset, (integer_t)m, 0,0); + m->object, m->offset, m, 0,0); VM_PAGE_CHECK(m); @@ -2831,8 +3294,8 @@ vm_page_copy( { XPR(XPR_VM_PAGE, "vm_page_copy, object 0x%X offset 0x%X to object 0x%X offset 0x%X\n", - (integer_t)src_m->object, src_m->offset, - (integer_t)dest_m->object, dest_m->offset, + src_m->object, src_m->offset, + dest_m->object, dest_m->offset, 0); VM_PAGE_CHECK(src_m); @@ -2861,12 +3324,11 @@ vm_page_copy( vm_page_validate_cs(src_m); } /* - * Propagate the code-signing bits to the copy page. + * Propagate the cs_tainted bit to the copy page. Do not propagate + * the cs_validated bit. */ - dest_m->cs_validated = src_m->cs_validated; dest_m->cs_tainted = src_m->cs_tainted; if (dest_m->cs_tainted) { - assert(dest_m->cs_validated); vm_page_copy_cs_tainted++; } @@ -2874,6 +3336,63 @@ vm_page_copy( } #if MACH_ASSERT +static void +_vm_page_print( + vm_page_t p) +{ + printf("vm_page %p: \n", p); + printf(" pageq: next=%p prev=%p\n", p->pageq.next, p->pageq.prev); + printf(" listq: next=%p prev=%p\n", p->listq.next, p->listq.prev); + printf(" next=%p\n", p->next); + printf(" object=%p offset=0x%llx\n", p->object, p->offset); + printf(" wire_count=%u\n", p->wire_count); + + printf(" %slocal, %sinactive, %sactive, %spageout_queue, %sspeculative, %slaundry\n", + (p->local ? "" : "!"), + (p->inactive ? "" : "!"), + (p->active ? "" : "!"), + (p->pageout_queue ? "" : "!"), + (p->speculative ? "" : "!"), + (p->laundry ? "" : "!")); + printf(" %sfree, %sref, %sgobbled, %sprivate, %sthrottled\n", + (p->free ? "" : "!"), + (p->reference ? "" : "!"), + (p->gobbled ? "" : "!"), + (p->private ? "" : "!"), + (p->throttled ? "" : "!")); + printf(" %sbusy, %swanted, %stabled, %sfictitious, %spmapped, %swpmapped\n", + (p->busy ? "" : "!"), + (p->wanted ? "" : "!"), + (p->tabled ? "" : "!"), + (p->fictitious ? "" : "!"), + (p->pmapped ? "" : "!"), + (p->wpmapped ? "" : "!")); + printf(" %spageout, %sabsent, %serror, %sdirty, %scleaning, %sprecious, %sclustered\n", + (p->pageout ? "" : "!"), + (p->absent ? "" : "!"), + (p->error ? "" : "!"), + (p->dirty ? "" : "!"), + (p->cleaning ? "" : "!"), + (p->precious ? "" : "!"), + (p->clustered ? "" : "!")); + printf(" %soverwriting, %srestart, %sunusual, %sencrypted, %sencrypted_cleaning\n", + (p->overwriting ? "" : "!"), + (p->restart ? "" : "!"), + (p->unusual ? "" : "!"), + (p->encrypted ? "" : "!"), + (p->encrypted_cleaning ? "" : "!")); + printf(" %slist_req_pending, %sdump_cleaning, %scs_validated, %scs_tainted, %sno_cache\n", + (p->list_req_pending ? "" : "!"), + (p->dump_cleaning ? "" : "!"), + (p->cs_validated ? "" : "!"), + (p->cs_tainted ? "" : "!"), + (p->no_cache ? "" : "!")); + printf(" %szero_fill\n", + (p->zero_fill ? "" : "!")); + + printf("phys_page=0x%x\n", p->phys_page); +} + /* * Check that the list of pages is ordered by * ascending physical address and has no holes. @@ -2891,8 +3410,8 @@ vm_page_verify_contiguous( page_count = 1; for (m = NEXT_PAGE(pages); m != VM_PAGE_NULL; m = NEXT_PAGE(m)) { if (m->phys_page != prev_addr + 1) { - printf("m %p prev_addr 0x%x, current addr 0x%x\n", - m, prev_addr, m->phys_page); + printf("m %p prev_addr 0x%lx, current addr 0x%x\n", + m, (long)prev_addr, m->phys_page); printf("pages %p page_count %d\n", pages, page_count); panic("vm_page_verify_contiguous: not contiguous!"); } @@ -2906,61 +3425,116 @@ vm_page_verify_contiguous( } return 1; } -#endif /* MACH_ASSERT */ -#if MACH_ASSERT /* * Check the free lists for proper length etc. */ +static unsigned int +vm_page_verify_free_list( + unsigned int color, + vm_page_t look_for_page, + boolean_t expect_page) +{ + unsigned int npages; + vm_page_t m; + vm_page_t prev_m; + boolean_t found_page; + + found_page = FALSE; + npages = 0; + prev_m = (vm_page_t) &vm_page_queue_free[color]; + queue_iterate(&vm_page_queue_free[color], + m, + vm_page_t, + pageq) { + if (m == look_for_page) { + found_page = TRUE; + } + if ((vm_page_t) m->pageq.prev != prev_m) + panic("vm_page_verify_free_list(color=%u, npages=%u): page %p corrupted prev ptr %p instead of %p\n", + color, npages, m, m->pageq.prev, prev_m); + if ( ! m->free ) + panic("vm_page_verify_free_list(color=%u, npages=%u): page %p not free\n", + color, npages, m); + if ( ! m->busy ) + panic("vm_page_verify_free_list(color=%u, npages=%u): page %p not busy\n", + color, npages, m); + if ( (m->phys_page & vm_color_mask) != color) + panic("vm_page_verify_free_list(color=%u, npages=%u): page %p wrong color %u instead of %u\n", + color, npages, m, m->phys_page & vm_color_mask, color); + ++npages; + prev_m = m; + } + if (look_for_page != VM_PAGE_NULL) { + unsigned int other_color; + + if (expect_page && !found_page) { + printf("vm_page_verify_free_list(color=%u, npages=%u): page %p not found phys=%u\n", + color, npages, look_for_page, look_for_page->phys_page); + _vm_page_print(look_for_page); + for (other_color = 0; + other_color < vm_colors; + other_color++) { + if (other_color == color) + continue; + vm_page_verify_free_list(other_color, look_for_page, FALSE); + } + panic("vm_page_verify_free_list(color=%u)\n", color); + } + if (!expect_page && found_page) { + printf("vm_page_verify_free_list(color=%u, npages=%u): page %p found phys=%u\n", + color, npages, look_for_page, look_for_page->phys_page); + } + } + return npages; +} + +static boolean_t vm_page_verify_free_lists_enabled = FALSE; static void vm_page_verify_free_lists( void ) { unsigned int color, npages; - vm_page_t m; - vm_page_t prev_m; - + + if (! vm_page_verify_free_lists_enabled) + return; + npages = 0; - - mutex_lock(&vm_page_queue_free_lock); + + lck_mtx_lock(&vm_page_queue_free_lock); for( color = 0; color < vm_colors; color++ ) { - prev_m = (vm_page_t) &vm_page_queue_free[color]; - queue_iterate(&vm_page_queue_free[color], - m, - vm_page_t, - pageq) { - if ((vm_page_t) m->pageq.prev != prev_m) - panic("vm_page_verify_free_lists: corrupted prev ptr"); - if ( ! m->free ) - panic("vm_page_verify_free_lists: not free"); - if ( ! m->busy ) - panic("vm_page_verify_free_lists: not busy"); - if ( (m->phys_page & vm_color_mask) != color) - panic("vm_page_verify_free_lists: wrong color"); - ++npages; - prev_m = m; - } + npages += vm_page_verify_free_list(color, VM_PAGE_NULL, FALSE); } if (npages != vm_page_free_count) panic("vm_page_verify_free_lists: npages %u free_count %d", npages, vm_page_free_count); - mutex_unlock(&vm_page_queue_free_lock); + lck_mtx_unlock(&vm_page_queue_free_lock); } -#endif /* MACH_ASSERT */ +void +vm_page_queues_assert( + vm_page_t mem, + int val) +{ + if (mem->free + mem->active + mem->inactive + mem->speculative + + mem->throttled + mem->pageout_queue > (val)) { + _vm_page_print(mem); + panic("vm_page_queues_assert(%p, %d)\n", mem, val); + } + if (VM_PAGE_WIRED(mem)) { + assert(!mem->active); + assert(!mem->inactive); + assert(!mem->speculative); + assert(!mem->throttled); + } +} +#endif /* MACH_ASSERT */ /* * CONTIGUOUS PAGE ALLOCATION - * Additional levels of effort: - * + consider pages that are currently 'pmapped' - * this could be expensive since we'd have - * to ask the pmap layer about there state - * + consider dirty pages - * either clean them or - * copy them to other locations... * * Find a region large enough to contain at least n pages * of contiguous physical memory. @@ -3008,26 +3582,43 @@ vm_page_verify_free_lists( void ) #define RESET_STATE_OF_RUN() \ MACRO_BEGIN \ prevcontaddr = -2; \ + start_pnum = -1; \ free_considered = 0; \ substitute_needed = 0; \ npages = 0; \ MACRO_END +/* + * Can we steal in-use (i.e. not free) pages when searching for + * physically-contiguous pages ? + */ +#define VM_PAGE_FIND_CONTIGUOUS_CAN_STEAL 1 + +static unsigned int vm_page_find_contiguous_last_idx = 0, vm_page_lomem_find_contiguous_last_idx = 0; +#if DEBUG +int vm_page_find_contig_debug = 0; +#endif static vm_page_t vm_page_find_contiguous( unsigned int contig_pages, ppnum_t max_pnum, - boolean_t wire) + ppnum_t pnum_mask, + boolean_t wire, + int flags) { vm_page_t m = NULL; ppnum_t prevcontaddr; - unsigned int npages, considered; - unsigned int page_idx, start_idx; + ppnum_t start_pnum; + unsigned int npages, considered, scanned; + unsigned int page_idx, start_idx, last_idx, orig_last_idx; + unsigned int idx_last_contig_page_found = 0; int free_considered, free_available; int substitute_needed; + boolean_t wrapped; #if DEBUG - uint32_t tv_start_sec, tv_start_usec, tv_end_sec, tv_end_usec; + clock_sec_t tv_start_sec, tv_end_sec; + clock_usec_t tv_start_usec, tv_end_usec; #endif #if MACH_ASSERT int yielded = 0; @@ -3045,24 +3636,49 @@ vm_page_find_contiguous( clock_get_system_microtime(&tv_start_sec, &tv_start_usec); #endif vm_page_lock_queues(); - mutex_lock(&vm_page_queue_free_lock); + lck_mtx_lock(&vm_page_queue_free_lock); RESET_STATE_OF_RUN(); + scanned = 0; considered = 0; free_available = vm_page_free_count - vm_page_free_reserved; - for (page_idx = 0, start_idx = 0; + wrapped = FALSE; + + if(flags & KMA_LOMEM) + idx_last_contig_page_found = vm_page_lomem_find_contiguous_last_idx; + else + idx_last_contig_page_found = vm_page_find_contiguous_last_idx; + + orig_last_idx = idx_last_contig_page_found; + last_idx = orig_last_idx; + + for (page_idx = last_idx, start_idx = last_idx; npages < contig_pages && page_idx < vm_pages_count; page_idx++) { -retry: +retry: + if (wrapped && + npages == 0 && + page_idx >= orig_last_idx) { + /* + * We're back where we started and we haven't + * found any suitable contiguous range. Let's + * give up. + */ + break; + } + scanned++; m = &vm_pages[page_idx]; + assert(!m->fictitious); + assert(!m->private); + if (max_pnum && m->phys_page > max_pnum) { /* no more low pages... */ break; } - if (m->phys_page <= vm_lopage_poolend && + if ( !(flags & KMA_LOMEM) && m->phys_page <= vm_lopage_poolend && m->phys_page >= vm_lopage_poolstart) { /* * don't want to take pages from our @@ -3072,10 +3688,17 @@ vm_page_find_contiguous( */ RESET_STATE_OF_RUN(); - } else if (m->wire_count || m->gobbled || + } else if (!npages & ((m->phys_page & pnum_mask) != 0)) { + /* + * not aligned + */ + RESET_STATE_OF_RUN(); + + } else if (VM_PAGE_WIRED(m) || m->gobbled || m->encrypted || m->encrypted_cleaning || m->cs_validated || m->cs_tainted || m->error || m->absent || m->pageout_queue || m->laundry || m->wanted || m->precious || - m->cleaning || m->overwriting || m->restart || m->unusual || m->list_req_pending) { + m->cleaning || m->overwriting || m->restart || m->unusual || m->list_req_pending || + m->pageout) { /* * page is in a transient state * or a state we don't want to deal @@ -3110,19 +3733,41 @@ vm_page_find_contiguous( } else { if (m->phys_page != prevcontaddr + 1) { - npages = 1; - start_idx = page_idx; + if ((m->phys_page & pnum_mask) != 0) { + RESET_STATE_OF_RUN(); + goto did_consider; + } else { + npages = 1; + start_idx = page_idx; + start_pnum = m->phys_page; + } } else { npages++; } prevcontaddr = m->phys_page; - - if (m->pmapped || m->dirty) - substitute_needed++; - + + VM_PAGE_CHECK(m); if (m->free) { free_considered++; + } else { + /* + * This page is not free. + * If we can't steal used pages, + * we have to give up this run + * and keep looking. + * Otherwise, we might need to + * move the contents of this page + * into a substitute page. + */ +#if VM_PAGE_FIND_CONTIGUOUS_CAN_STEAL + if (m->pmapped || m->dirty) { + substitute_needed++; + } +#else + RESET_STATE_OF_RUN(); +#endif } + if ((free_considered + substitute_needed) > free_available) { /* * if we let this run continue @@ -3148,15 +3793,16 @@ vm_page_find_contiguous( */ } } +did_consider: if (considered > MAX_CONSIDERED_BEFORE_YIELD && npages <= 1) { - mutex_unlock(&vm_page_queue_free_lock); + lck_mtx_unlock(&vm_page_queue_free_lock); vm_page_unlock_queues(); mutex_pause(0); vm_page_lock_queues(); - mutex_lock(&vm_page_queue_free_lock); + lck_mtx_lock(&vm_page_queue_free_lock); RESET_STATE_OF_RUN(); /* @@ -3174,9 +3820,25 @@ vm_page_find_contiguous( } m = VM_PAGE_NULL; - if (npages != contig_pages) - mutex_unlock(&vm_page_queue_free_lock); - else { + if (npages != contig_pages) { + if (!wrapped) { + /* + * We didn't find a contiguous range but we didn't + * start from the very first page. + * Start again from the very first page. + */ + RESET_STATE_OF_RUN(); + if( flags & KMA_LOMEM) + idx_last_contig_page_found = vm_page_lomem_find_contiguous_last_idx = 0; + else + idx_last_contig_page_found = vm_page_find_contiguous_last_idx = 0; + last_idx = 0; + page_idx = last_idx; + wrapped = TRUE; + goto retry; + } + lck_mtx_unlock(&vm_page_queue_free_lock); + } else { vm_page_t m1; vm_page_t m2; unsigned int cur_idx; @@ -3184,6 +3846,8 @@ vm_page_find_contiguous( vm_object_t locked_object = VM_OBJECT_NULL; boolean_t abort_run = FALSE; + assert(page_idx - start_idx == contig_pages); + tmp_start_idx = start_idx; /* @@ -3199,14 +3863,33 @@ vm_page_find_contiguous( m1 = &vm_pages[start_idx++]; +#if !VM_PAGE_FIND_CONTIGUOUS_CAN_STEAL + assert(m1->free); +#endif + if (m1->free) { unsigned int color; color = m1->phys_page & vm_color_mask; +#if MACH_ASSERT + vm_page_verify_free_list(color, m1, TRUE); +#endif queue_remove(&vm_page_queue_free[color], m1, vm_page_t, pageq); + m1->pageq.next = NULL; + m1->pageq.prev = NULL; +#if MACH_ASSERT + vm_page_verify_free_list(color, VM_PAGE_NULL, FALSE); +#endif + /* + * Clear the "free" bit so that this page + * does not get considered for another + * concurrent physically-contiguous allocation. + */ + m1->free = FALSE; + assert(m1->busy); vm_page_free_count--; } @@ -3217,13 +3900,18 @@ vm_page_find_contiguous( if (vm_page_free_count < vm_page_free_count_minimum) vm_page_free_count_minimum = vm_page_free_count; + if( flags & KMA_LOMEM) + vm_page_lomem_find_contiguous_last_idx = page_idx; + else + vm_page_find_contiguous_last_idx = page_idx; + /* * we can drop the free queue lock at this point since * we've pulled any 'free' candidates off of the list * we need it dropped so that we can do a vm_page_grab * when substituing for pmapped/dirty pages */ - mutex_unlock(&vm_page_queue_free_lock); + lck_mtx_unlock(&vm_page_queue_free_lock); start_idx = tmp_start_idx; cur_idx = page_idx - 1; @@ -3236,17 +3924,16 @@ vm_page_find_contiguous( */ m1 = &vm_pages[cur_idx--]; - if (m1->free) { + assert(!m1->free); + if (m1->object == VM_OBJECT_NULL) { /* - * pages have already been removed from + * page has already been removed from * the free list in the 1st pass */ - assert(m1->free); + assert(m1->offset == (vm_object_offset_t) -1); assert(m1->busy); assert(!m1->wanted); assert(!m1->laundry); - m1->free = FALSE; - } else { vm_object_t object; @@ -3264,7 +3951,7 @@ vm_page_find_contiguous( locked_object = object; } if (locked_object == VM_OBJECT_NULL || - (m1->wire_count || m1->gobbled || + (VM_PAGE_WIRED(m1) || m1->gobbled || m1->encrypted || m1->encrypted_cleaning || m1->cs_validated || m1->cs_tainted || m1->error || m1->absent || m1->pageout_queue || m1->laundry || m1->wanted || m1->precious || m1->cleaning || m1->overwriting || m1->restart || m1->unusual || m1->list_req_pending || m1->busy)) { @@ -3326,7 +4013,7 @@ vm_page_find_contiguous( /* * now put the substitute page on the object */ - vm_page_insert_internal(m2, locked_object, offset, TRUE); + vm_page_insert_internal(m2, locked_object, offset, TRUE, TRUE); if (m2->reference) vm_page_activate(m2); @@ -3360,7 +4047,7 @@ vm_page_find_contiguous( if (abort_run == TRUE) { if (m != VM_PAGE_NULL) { - vm_page_free_list(m); + vm_page_free_list(m, FALSE); } #if MACH_ASSERT dumped_run++; @@ -3373,20 +4060,34 @@ vm_page_find_contiguous( * and 1 more to bump back over this page */ page_idx = tmp_start_idx + 2; - - if (page_idx >= vm_pages_count) - goto done_scanning; - - mutex_lock(&vm_page_queue_free_lock); - - RESET_STATE_OF_RUN(); - + if (page_idx >= vm_pages_count) { + if (wrapped) + goto done_scanning; + page_idx = last_idx = 0; + wrapped = TRUE; + } + abort_run = FALSE; + /* - * reset our free page limit since we - * dropped the lock protecting the vm_page_free_queue + * We didn't find a contiguous range but we didn't + * start from the very first page. + * Start again from the very first page. */ - free_available = vm_page_free_count - vm_page_free_reserved; + RESET_STATE_OF_RUN(); + + if( flags & KMA_LOMEM) + idx_last_contig_page_found = vm_page_lomem_find_contiguous_last_idx = page_idx; + else + idx_last_contig_page_found = vm_page_find_contiguous_last_idx = page_idx; + + last_idx = page_idx; + lck_mtx_lock(&vm_page_queue_free_lock); + /* + * reset our free page limit since we + * dropped the lock protecting the vm_page_free_queue + */ + free_available = vm_page_free_count - vm_page_free_reserved; goto retry; } @@ -3423,8 +4124,12 @@ vm_page_find_contiguous( tv_end_sec++; tv_end_sec -= 1000000; } - printf("vm_find_page_contiguous(num=%d,low=%d): found %d pages in %d.%06ds... scanned %d pages... yielded %d times... dumped run %d times... stole %d pages\n", - contig_pages, max_pnum, npages, tv_end_sec, tv_end_usec, page_idx, yielded, dumped_run, stolen_pages); + if (vm_page_find_contig_debug) { + printf("%s(num=%d,low=%d): found %d pages at 0x%llx in %ld.%06ds... started at %d... scanned %d pages... yielded %d times... dumped run %d times... stole %d pages\n", + __func__, contig_pages, max_pnum, npages, (vm_object_offset_t)start_pnum << PAGE_SHIFT, + (long)tv_end_sec, tv_end_usec, orig_last_idx, + scanned, yielded, dumped_run, stolen_pages); + } #endif #if MACH_ASSERT @@ -3441,7 +4146,9 @@ cpm_allocate( vm_size_t size, vm_page_t *list, ppnum_t max_pnum, - boolean_t wire) + ppnum_t pnum_mask, + boolean_t wire, + int flags) { vm_page_t pages; unsigned int npages; @@ -3449,14 +4156,18 @@ cpm_allocate( if (size % page_size != 0) return KERN_INVALID_ARGUMENT; - npages = size / page_size; + npages = (unsigned int) (size / PAGE_SIZE); + if (npages != size / PAGE_SIZE) { + /* 32-bit overflow */ + return KERN_INVALID_ARGUMENT; + } /* * Obtain a pointer to a subset of the free * list large enough to satisfy the request; * the region will be physically contiguous. */ - pages = vm_page_find_contiguous(npages, max_pnum, wire); + pages = vm_page_find_contiguous(npages, max_pnum, pnum_mask, wire, flags); if (pages == VM_PAGE_NULL) return KERN_NO_SPACE; @@ -3496,6 +4207,464 @@ cpm_allocate( return KERN_SUCCESS; } +/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ + +static vm_page_t hibernate_gobble_queue; + +static void +hibernate_page_list_zero(hibernate_page_list_t *list) +{ + uint32_t bank; + hibernate_bitmap_t * bitmap; + + bitmap = &list->bank_bitmap[0]; + for (bank = 0; bank < list->bank_count; bank++) + { + uint32_t last_bit; + + bzero((void *) &bitmap->bitmap[0], bitmap->bitmapwords << 2); + // set out-of-bound bits at end of bitmap. + last_bit = ((bitmap->last_page - bitmap->first_page + 1) & 31); + if (last_bit) + bitmap->bitmap[bitmap->bitmapwords - 1] = (0xFFFFFFFF >> last_bit); + + bitmap = (hibernate_bitmap_t *) &bitmap->bitmap[bitmap->bitmapwords]; + } +} + +void +hibernate_gobble_pages(uint32_t gobble_count, uint32_t free_page_time) +{ + uint32_t i; + vm_page_t m; + uint64_t start, end, timeout, nsec; + clock_interval_to_deadline(free_page_time, 1000 * 1000 /*ms*/, &timeout); + clock_get_uptime(&start); + + for (i = 0; i < gobble_count; i++) + { + while (VM_PAGE_NULL == (m = vm_page_grab())) + { + clock_get_uptime(&end); + if (end >= timeout) + break; + VM_PAGE_WAIT(); + } + if (!m) + break; + m->busy = FALSE; + vm_page_gobble(m); + + m->pageq.next = (queue_entry_t) hibernate_gobble_queue; + hibernate_gobble_queue = m; + } + + clock_get_uptime(&end); + absolutetime_to_nanoseconds(end - start, &nsec); + HIBLOG("Gobbled %d pages, time: %qd ms\n", i, nsec / 1000000ULL); +} + +void +hibernate_free_gobble_pages(void) +{ + vm_page_t m, next; + uint32_t count = 0; + + m = (vm_page_t) hibernate_gobble_queue; + while(m) + { + next = (vm_page_t) m->pageq.next; + vm_page_free(m); + count++; + m = next; + } + hibernate_gobble_queue = VM_PAGE_NULL; + + if (count) + HIBLOG("Freed %d pages\n", count); +} + +static boolean_t +hibernate_consider_discard(vm_page_t m) +{ + vm_object_t object = NULL; + int refmod_state; + boolean_t discard = FALSE; + + do + { + if(m->private) + panic("hibernate_consider_discard: private"); + + if (!vm_object_lock_try(m->object)) + break; + + object = m->object; + + if (VM_PAGE_WIRED(m)) + break; + if (m->precious) + break; + + if (m->busy || !object->alive) + /* + * Somebody is playing with this page. + */ + break; + + if (m->absent || m->unusual || m->error) + /* + * If it's unusual in anyway, ignore it + */ + break; + + if (m->cleaning) + break; + + if (m->laundry || m->list_req_pending) + break; + + if (!m->dirty) + { + refmod_state = pmap_get_refmod(m->phys_page); + + if (refmod_state & VM_MEM_REFERENCED) + m->reference = TRUE; + if (refmod_state & VM_MEM_MODIFIED) + m->dirty = TRUE; + } + + /* + * If it's clean or purgeable we can discard the page on wakeup. + */ + discard = (!m->dirty) + || (VM_PURGABLE_VOLATILE == object->purgable) + || (VM_PURGABLE_EMPTY == m->object->purgable); + } + while (FALSE); + + if (object) + vm_object_unlock(object); + + return (discard); +} + + +static void +hibernate_discard_page(vm_page_t m) +{ + if (m->absent || m->unusual || m->error) + /* + * If it's unusual in anyway, ignore + */ + return; + + if (m->pmapped == TRUE) + { + __unused int refmod_state = pmap_disconnect(m->phys_page); + } + + if (m->laundry) + panic("hibernate_discard_page(%p) laundry", m); + if (m->private) + panic("hibernate_discard_page(%p) private", m); + if (m->fictitious) + panic("hibernate_discard_page(%p) fictitious", m); + + if (VM_PURGABLE_VOLATILE == m->object->purgable) + { + /* object should be on a queue */ + assert((m->object->objq.next != NULL) && (m->object->objq.prev != NULL)); + purgeable_q_t old_queue = vm_purgeable_object_remove(m->object); + assert(old_queue); + /* No need to lock page queue for token delete, hibernate_vm_unlock() + makes sure these locks are uncontended before sleep */ + vm_purgeable_token_delete_first(old_queue); + m->object->purgable = VM_PURGABLE_EMPTY; + } + + vm_page_free(m); +} + +/* + Bits zero in the bitmaps => page needs to be saved. All pages default to be saved, + pages known to VM to not need saving are subtracted. + Wired pages to be saved are present in page_list_wired, pageable in page_list. +*/ + +void +hibernate_page_list_setall(hibernate_page_list_t * page_list, + hibernate_page_list_t * page_list_wired, + uint32_t * pagesOut) +{ + uint64_t start, end, nsec; + vm_page_t m; + uint32_t pages = page_list->page_count; + uint32_t count_zf = 0, count_throttled = 0; + uint32_t count_inactive = 0, count_active = 0, count_speculative = 0; + uint32_t count_wire = pages; + uint32_t count_discard_active = 0; + uint32_t count_discard_inactive = 0; + uint32_t count_discard_purgeable = 0; + uint32_t count_discard_speculative = 0; + uint32_t i; + uint32_t bank; + hibernate_bitmap_t * bitmap; + hibernate_bitmap_t * bitmap_wired; + + + HIBLOG("hibernate_page_list_setall start\n"); + + clock_get_uptime(&start); + + hibernate_page_list_zero(page_list); + hibernate_page_list_zero(page_list_wired); + + if (vm_page_local_q) { + for (i = 0; i < vm_page_local_q_count; i++) + vm_page_reactivate_local(i, TRUE, TRUE); + } + + m = (vm_page_t) hibernate_gobble_queue; + while(m) + { + pages--; + count_wire--; + hibernate_page_bitset(page_list, TRUE, m->phys_page); + hibernate_page_bitset(page_list_wired, TRUE, m->phys_page); + m = (vm_page_t) m->pageq.next; + } + + for( i = 0; i < vm_colors; i++ ) + { + queue_iterate(&vm_page_queue_free[i], + m, + vm_page_t, + pageq) + { + pages--; + count_wire--; + hibernate_page_bitset(page_list, TRUE, m->phys_page); + hibernate_page_bitset(page_list_wired, TRUE, m->phys_page); + } + } + + queue_iterate(&vm_lopage_queue_free, + m, + vm_page_t, + pageq) + { + pages--; + count_wire--; + hibernate_page_bitset(page_list, TRUE, m->phys_page); + hibernate_page_bitset(page_list_wired, TRUE, m->phys_page); + } + + queue_iterate( &vm_page_queue_throttled, + m, + vm_page_t, + pageq ) + { + if ((kIOHibernateModeDiscardCleanInactive & gIOHibernateMode) + && hibernate_consider_discard(m)) + { + hibernate_page_bitset(page_list, TRUE, m->phys_page); + count_discard_inactive++; + } + else + count_throttled++; + count_wire--; + hibernate_page_bitset(page_list_wired, TRUE, m->phys_page); + } + + queue_iterate( &vm_page_queue_zf, + m, + vm_page_t, + pageq ) + { + if ((kIOHibernateModeDiscardCleanInactive & gIOHibernateMode) + && hibernate_consider_discard(m)) + { + hibernate_page_bitset(page_list, TRUE, m->phys_page); + if (m->dirty) + count_discard_purgeable++; + else + count_discard_inactive++; + } + else + count_zf++; + count_wire--; + hibernate_page_bitset(page_list_wired, TRUE, m->phys_page); + } + + queue_iterate( &vm_page_queue_inactive, + m, + vm_page_t, + pageq ) + { + if ((kIOHibernateModeDiscardCleanInactive & gIOHibernateMode) + && hibernate_consider_discard(m)) + { + hibernate_page_bitset(page_list, TRUE, m->phys_page); + if (m->dirty) + count_discard_purgeable++; + else + count_discard_inactive++; + } + else + count_inactive++; + count_wire--; + hibernate_page_bitset(page_list_wired, TRUE, m->phys_page); + } + + for( i = 0; i <= VM_PAGE_MAX_SPECULATIVE_AGE_Q; i++ ) + { + queue_iterate(&vm_page_queue_speculative[i].age_q, + m, + vm_page_t, + pageq) + { + if ((kIOHibernateModeDiscardCleanInactive & gIOHibernateMode) + && hibernate_consider_discard(m)) + { + hibernate_page_bitset(page_list, TRUE, m->phys_page); + count_discard_speculative++; + } + else + count_speculative++; + count_wire--; + hibernate_page_bitset(page_list_wired, TRUE, m->phys_page); + } + } + + queue_iterate( &vm_page_queue_active, + m, + vm_page_t, + pageq ) + { + if ((kIOHibernateModeDiscardCleanActive & gIOHibernateMode) + && hibernate_consider_discard(m)) + { + hibernate_page_bitset(page_list, TRUE, m->phys_page); + if (m->dirty) + count_discard_purgeable++; + else + count_discard_active++; + } + else + count_active++; + count_wire--; + hibernate_page_bitset(page_list_wired, TRUE, m->phys_page); + } + + // pull wired from hibernate_bitmap + + bitmap = &page_list->bank_bitmap[0]; + bitmap_wired = &page_list_wired->bank_bitmap[0]; + for (bank = 0; bank < page_list->bank_count; bank++) + { + for (i = 0; i < bitmap->bitmapwords; i++) + bitmap->bitmap[i] = bitmap->bitmap[i] | ~bitmap_wired->bitmap[i]; + bitmap = (hibernate_bitmap_t *) &bitmap->bitmap [bitmap->bitmapwords]; + bitmap_wired = (hibernate_bitmap_t *) &bitmap_wired->bitmap[bitmap_wired->bitmapwords]; + } + + // machine dependent adjustments + hibernate_page_list_setall_machine(page_list, page_list_wired, &pages); + + clock_get_uptime(&end); + absolutetime_to_nanoseconds(end - start, &nsec); + HIBLOG("hibernate_page_list_setall time: %qd ms\n", nsec / 1000000ULL); + + HIBLOG("pages %d, wire %d, act %d, inact %d, spec %d, zf %d, throt %d, could discard act %d inact %d purgeable %d spec %d\n", + pages, count_wire, count_active, count_inactive, count_speculative, count_zf, count_throttled, + count_discard_active, count_discard_inactive, count_discard_purgeable, count_discard_speculative); + + *pagesOut = pages - count_discard_active - count_discard_inactive - count_discard_purgeable - count_discard_speculative; +} + +void +hibernate_page_list_discard(hibernate_page_list_t * page_list) +{ + uint64_t start, end, nsec; + vm_page_t m; + vm_page_t next; + uint32_t i; + uint32_t count_discard_active = 0; + uint32_t count_discard_inactive = 0; + uint32_t count_discard_purgeable = 0; + uint32_t count_discard_speculative = 0; + + clock_get_uptime(&start); + + m = (vm_page_t) queue_first(&vm_page_queue_zf); + while (m && !queue_end(&vm_page_queue_zf, (queue_entry_t)m)) + { + next = (vm_page_t) m->pageq.next; + if (hibernate_page_bittst(page_list, m->phys_page)) + { + if (m->dirty) + count_discard_purgeable++; + else + count_discard_inactive++; + hibernate_discard_page(m); + } + m = next; + } + + for( i = 0; i <= VM_PAGE_MAX_SPECULATIVE_AGE_Q; i++ ) + { + m = (vm_page_t) queue_first(&vm_page_queue_speculative[i].age_q); + while (m && !queue_end(&vm_page_queue_speculative[i].age_q, (queue_entry_t)m)) + { + next = (vm_page_t) m->pageq.next; + if (hibernate_page_bittst(page_list, m->phys_page)) + { + count_discard_speculative++; + hibernate_discard_page(m); + } + m = next; + } + } + + m = (vm_page_t) queue_first(&vm_page_queue_inactive); + while (m && !queue_end(&vm_page_queue_inactive, (queue_entry_t)m)) + { + next = (vm_page_t) m->pageq.next; + if (hibernate_page_bittst(page_list, m->phys_page)) + { + if (m->dirty) + count_discard_purgeable++; + else + count_discard_inactive++; + hibernate_discard_page(m); + } + m = next; + } + + m = (vm_page_t) queue_first(&vm_page_queue_active); + while (m && !queue_end(&vm_page_queue_active, (queue_entry_t)m)) + { + next = (vm_page_t) m->pageq.next; + if (hibernate_page_bittst(page_list, m->phys_page)) + { + if (m->dirty) + count_discard_purgeable++; + else + count_discard_active++; + hibernate_discard_page(m); + } + m = next; + } + + clock_get_uptime(&end); + absolutetime_to_nanoseconds(end - start, &nsec); + HIBLOG("hibernate_page_list_discard time: %qd ms, discarded act %d inact %d purgeable %d spec %d\n", + nsec / 1000000ULL, + count_discard_active, count_discard_inactive, count_discard_purgeable, count_discard_speculative); +} + +/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ #include #if MACH_VM_DEBUG @@ -3520,6 +4689,7 @@ vm_page_info( unsigned int count) { unsigned int i; + lck_spin_t *bucket_lock; if (vm_page_bucket_count < count) count = vm_page_bucket_count; @@ -3529,10 +4699,13 @@ vm_page_info( unsigned int bucket_count = 0; vm_page_t m; - simple_lock(&vm_page_bucket_lock); + bucket_lock = &vm_page_bucket_locks[i / BUCKETS_PER_LOCK]; + lck_spin_lock(bucket_lock); + for (m = bucket->pages; m != VM_PAGE_NULL; m = m->next) bucket_count++; - simple_unlock(&vm_page_bucket_lock); + + lck_spin_unlock(bucket_lock); /* don't touch pageable memory while holding locks */ info[i].hib_count = bucket_count; @@ -3568,7 +4741,8 @@ vm_page_print( printf(", offset=0x%x", p->offset); printf(", wire_count=%d", p->wire_count); - iprintf("%sinactive, %sactive, %sthrottled, %sgobbled, %slaundry, %sfree, %sref, %sencrypted\n", + iprintf("%slocal, %sinactive, %sactive, %sthrottled, %sgobbled, %slaundry, %sfree, %sref, %sencrypted\n", + (p->local ? "" : "!"), (p->inactive ? "" : "!"), (p->active ? "" : "!"), (p->throttled ? "" : "!"), diff --git a/osfmk/vm/vm_shared_region.c b/osfmk/vm/vm_shared_region.c index f6975e1c1..05fa65614 100644 --- a/osfmk/vm/vm_shared_region.c +++ b/osfmk/vm/vm_shared_region.c @@ -100,6 +100,7 @@ #include #include +#include #include @@ -114,12 +115,15 @@ /* "dyld" uses this to figure out what the kernel supports */ int shared_region_version = 3; -/* should local (non-chroot) shared regions persist when no task uses them ? */ -int shared_region_persistence = 1; /* yes by default */ - /* trace level, output is sent to the system log file */ int shared_region_trace_level = SHARED_REGION_TRACE_ERROR_LVL; +/* should local (non-chroot) shared regions persist when no task uses them ? */ +int shared_region_persistence = 0; /* no by default */ + +/* delay before reclaiming an unused shared region */ +int shared_region_destroy_delay = 120; /* in seconds */ + /* this lock protects all the shared region data structures */ lck_grp_t *vm_shared_region_lck_grp; lck_mtx_t vm_shared_region_lock; @@ -142,6 +146,16 @@ static vm_shared_region_t vm_shared_region_create( boolean_t is_64bit); static void vm_shared_region_destroy(vm_shared_region_t shared_region); +static void vm_shared_region_timeout(thread_call_param_t param0, + thread_call_param_t param1); + +static int __commpage_setup = 0; +#if defined(__i386__) || defined(__x86_64__) +static int __system_power_source = 1; /* init to extrnal power source */ +static void post_sys_powersource_internal(int i, int internal); +#endif /* __i386__ || __x86_64__ */ + + /* * Initialize the module... */ @@ -400,6 +414,22 @@ vm_shared_region_reference_locked( shared_region)); assert(shared_region->sr_ref_count > 0); shared_region->sr_ref_count++; + + if (shared_region->sr_timer_call != NULL) { + boolean_t cancelled; + + /* cancel and free any pending timeout */ + cancelled = thread_call_cancel(shared_region->sr_timer_call); + if (cancelled) { + thread_call_free(shared_region->sr_timer_call); + shared_region->sr_timer_call = NULL; + /* release the reference held by the cancelled timer */ + shared_region->sr_ref_count--; + } else { + /* the timer will drop the reference and free itself */ + } + } + SHARED_REGION_TRACE_DEBUG( ("shared_region: reference_locked(%p) <- %d\n", shared_region, shared_region->sr_ref_count)); @@ -449,16 +479,46 @@ vm_shared_region_deallocate( shared_region, shared_region->sr_ref_count)); if (shared_region->sr_ref_count == 0) { - assert(! shared_region->sr_mapping_in_progress); - /* remove it from the queue first, so no one can find it... */ - queue_remove(&vm_shared_region_queue, - shared_region, - vm_shared_region_t, - sr_q); - vm_shared_region_unlock(); - /* ... and destroy it */ - vm_shared_region_destroy(shared_region); - shared_region = NULL; + uint64_t deadline; + + if (shared_region->sr_timer_call == NULL) { + /* hold one reference for the timer */ + assert(! shared_region->sr_mapping_in_progress); + shared_region->sr_ref_count++; + + /* set up the timer */ + shared_region->sr_timer_call = thread_call_allocate( + (thread_call_func_t) vm_shared_region_timeout, + (thread_call_param_t) shared_region); + + /* schedule the timer */ + clock_interval_to_deadline(shared_region_destroy_delay, + 1000 * 1000 * 1000, + &deadline); + thread_call_enter_delayed(shared_region->sr_timer_call, + deadline); + + SHARED_REGION_TRACE_DEBUG( + ("shared_region: deallocate(%p): armed timer\n", + shared_region)); + + vm_shared_region_unlock(); + } else { + /* timer expired: let go of this shared region */ + + /* + * Remove it from the queue first, so no one can find + * it... + */ + queue_remove(&vm_shared_region_queue, + shared_region, + vm_shared_region_t, + sr_q); + vm_shared_region_unlock(); + /* ... and destroy it */ + vm_shared_region_destroy(shared_region); + shared_region = NULL; + } } else { vm_shared_region_unlock(); } @@ -468,6 +528,18 @@ vm_shared_region_deallocate( shared_region)); } +void +vm_shared_region_timeout( + thread_call_param_t param0, + __unused thread_call_param_t param1) +{ + vm_shared_region_t shared_region; + + shared_region = (vm_shared_region_t) param0; + + vm_shared_region_deallocate(shared_region); +} + /* * Create a new (empty) shared region for a new environment. */ @@ -606,6 +678,7 @@ vm_shared_region_create( queue_init(&shared_region->sr_q); shared_region->sr_mapping_in_progress = FALSE; shared_region->sr_persists = FALSE; + shared_region->sr_timer_call = NULL; shared_region->sr_first_mapping = (mach_vm_offset_t) -1; /* grab a reference for the caller */ @@ -683,6 +756,10 @@ vm_shared_region_destroy( mem_entry = NULL; shared_region->sr_mem_entry = IPC_PORT_NULL; + if (shared_region->sr_timer_call) { + thread_call_free(shared_region->sr_timer_call); + } + /* release the shared region structure... */ kfree(shared_region, sizeof (*shared_region)); SHARED_REGION_TRACE_DEBUG( @@ -1255,6 +1332,12 @@ vm_commpage_init(void) /* populate them according to this specific platform */ commpage_populate(); + __commpage_setup = 1; +#if defined(__i386__) || defined(__x86_64__) + if (__system_power_source == 0) { + post_sys_powersource_internal(0, 1); + } +#endif /* __i386__ || __x86_64__ */ SHARED_REGION_TRACE_DEBUG( ("commpage: init() <-\n")); @@ -1366,3 +1449,38 @@ vm_commpage_enter( map, task, kr)); return kr; } + + +/* + * This is called from powermanagement code to let kernel know the current source of power. + * 0 if it is external source (connected to power ) + * 1 if it is internal power source ie battery + */ +void +#if defined(__i386__) || defined(__x86_64__) +post_sys_powersource(int i) +#else +post_sys_powersource(__unused int i) +#endif +{ +#if defined(__i386__) || defined(__x86_64__) + post_sys_powersource_internal(i, 0); +#endif /* __i386__ || __x86_64__ */ +} + + +#if defined(__i386__) || defined(__x86_64__) +static void +post_sys_powersource_internal(int i, int internal) +{ + if (internal == 0) + __system_power_source = i; + + if (__commpage_setup != 0) { + if (__system_power_source != 0) + commpage_set_spin_count(0); + else + commpage_set_spin_count(MP_SPIN_TRIES); + } +} +#endif /* __i386__ || __x86_64__ */ diff --git a/osfmk/vm/vm_shared_region.h b/osfmk/vm/vm_shared_region.h index 17067253a..9cc8ba5af 100644 --- a/osfmk/vm/vm_shared_region.h +++ b/osfmk/vm/vm_shared_region.h @@ -95,6 +95,7 @@ typedef struct vm_shared_region *vm_shared_region_t; /* address space shared region descriptor */ struct vm_shared_region { uint32_t sr_ref_count; + queue_chain_t sr_q; void *sr_root_dir; cpu_type_t sr_cpu_type; boolean_t sr_64bit; @@ -106,7 +107,7 @@ struct vm_shared_region { mach_vm_size_t sr_size; mach_vm_offset_t sr_pmap_nesting_start; mach_vm_size_t sr_pmap_nesting_size; - queue_chain_t sr_q; + thread_call_t sr_timer_call; }; #else /* !MACH_KERNEL_PRIVATE */ diff --git a/osfmk/vm/vm_swapfile_pager.c b/osfmk/vm/vm_swapfile_pager.c new file mode 100644 index 000000000..db8943367 --- /dev/null +++ b/osfmk/vm/vm_swapfile_pager.c @@ -0,0 +1,876 @@ +/* + * Copyright (c) 2008 Apple Computer, Inc. All rights reserved. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. The rights granted to you under the License + * may not be used to create, or enable the creation or redistribution of, + * unlawful or unlicensed copies of an Apple operating system, or to + * circumvent, violate, or enable the circumvention or violation of, any + * terms of an Apple operating system software license agreement. + * + * Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ + */ + +#include +#include +#include + +#include +#include +#include + +#include +#include +#include +#include + + +/* + * APPLE SWAPFILE MEMORY PAGER + * + * This external memory manager (EMM) handles mappings of the swap files. + * Swap files are not regular files and are used solely to store contents of + * anonymous memory mappings while not resident in memory. + * There's no valid reason to map a swap file. This just puts extra burden + * on the system, is potentially a security issue and is not reliable since + * the contents can change at any time with pageout operations. + * Here are some of the issues with mapping a swap file. + * * PERFORMANCE: + * Each page in the swap file belong to an anonymous memory object. Mapping + * the swap file makes those pages also accessible via a vnode memory + * object and each page can now be resident twice. + * * SECURITY: + * Mapping a swap file allows access to other processes' memory. Swap files + * are only accessible by the "root" super-user, who can already access any + * process's memory, so this is not a real issue but if permissions on the + * swap file got changed, it could become one. + * Swap files are not "zero-filled" on creation, so until their contents are + * overwritten with pageout operations, they still contain whatever was on + * the disk blocks they were allocated. The "super-user" could see the + * contents of free blocks anyway, so this is not a new security issue but + * it may be perceive as one. + * * ENCRYPTED SWAP: + * When swap is encrypted, one does not expect to find any clear contents + * in the swap files. Since unused blocks are not scrubbed, they could still + * contain clear contents. If these contents are visible through a mapping + * of the swap file, it makes it look like swap is not really encrypted. + * + * We can't legitimately prevent a user process with appropriate privileges + * from mapping a swap file, but we can prevent it from accessing its actual + * contents. + * This pager mostly handles page-in request (from memory_object_data_request()) + * for swap file mappings and just returns bogus data. + * Pageouts are not handled, so mmap() has to make sure it does not allow + * writable (i.e. MAP_SHARED and PROT_WRITE) mappings of swap files. + */ + +/* forward declarations */ +void swapfile_pager_reference(memory_object_t mem_obj); +void swapfile_pager_deallocate(memory_object_t mem_obj); +kern_return_t swapfile_pager_init(memory_object_t mem_obj, + memory_object_control_t control, + memory_object_cluster_size_t pg_size); +kern_return_t swapfile_pager_terminate(memory_object_t mem_obj); +kern_return_t swapfile_pager_data_request(memory_object_t mem_obj, + memory_object_offset_t offset, + memory_object_cluster_size_t length, + vm_prot_t protection_required, + memory_object_fault_info_t fault_info); +kern_return_t swapfile_pager_data_return(memory_object_t mem_obj, + memory_object_offset_t offset, + memory_object_cluster_size_t data_cnt, + memory_object_offset_t *resid_offset, + int *io_error, + boolean_t dirty, + boolean_t kernel_copy, + int upl_flags); +kern_return_t swapfile_pager_data_initialize(memory_object_t mem_obj, + memory_object_offset_t offset, + memory_object_cluster_size_t data_cnt); +kern_return_t swapfile_pager_data_unlock(memory_object_t mem_obj, + memory_object_offset_t offset, + memory_object_size_t size, + vm_prot_t desired_access); +kern_return_t swapfile_pager_synchronize(memory_object_t mem_obj, + memory_object_offset_t offset, + memory_object_size_t length, + vm_sync_t sync_flags); +kern_return_t swapfile_pager_map(memory_object_t mem_obj, + vm_prot_t prot); +kern_return_t swapfile_pager_last_unmap(memory_object_t mem_obj); + +/* + * Vector of VM operations for this EMM. + * These routines are invoked by VM via the memory_object_*() interfaces. + */ +const struct memory_object_pager_ops swapfile_pager_ops = { + swapfile_pager_reference, + swapfile_pager_deallocate, + swapfile_pager_init, + swapfile_pager_terminate, + swapfile_pager_data_request, + swapfile_pager_data_return, + swapfile_pager_data_initialize, + swapfile_pager_data_unlock, + swapfile_pager_synchronize, + swapfile_pager_map, + swapfile_pager_last_unmap, + "swapfile pager" +}; + +/* + * The "swapfile_pager" describes a memory object backed by + * the "swapfile" EMM. + */ +typedef struct swapfile_pager { + struct ipc_object_header pager_header; /* fake ip_kotype() */ + memory_object_pager_ops_t pager_ops; /* == &swapfile_pager_ops */ + queue_chain_t pager_queue; /* next & prev pagers */ + unsigned int ref_count; /* reference count */ + boolean_t is_ready; /* is this pager ready ? */ + boolean_t is_mapped; /* is this pager mapped ? */ + memory_object_control_t pager_control; /* mem object control handle */ + struct vnode *swapfile_vnode;/* the swapfile's vnode */ +} *swapfile_pager_t; +#define SWAPFILE_PAGER_NULL ((swapfile_pager_t) NULL) +#define pager_ikot pager_header.io_bits + +/* + * List of memory objects managed by this EMM. + * The list is protected by the "swapfile_pager_lock" lock. + */ +int swapfile_pager_count = 0; /* number of pagers */ +queue_head_t swapfile_pager_queue; +decl_lck_mtx_data(,swapfile_pager_lock) + +/* + * Statistics & counters. + */ +int swapfile_pager_count_max = 0; + + +lck_grp_t swapfile_pager_lck_grp; +lck_grp_attr_t swapfile_pager_lck_grp_attr; +lck_attr_t swapfile_pager_lck_attr; + + +/* internal prototypes */ +swapfile_pager_t swapfile_pager_create(struct vnode *vp); +swapfile_pager_t swapfile_pager_lookup(memory_object_t mem_obj); +void swapfile_pager_dequeue(swapfile_pager_t pager); +void swapfile_pager_deallocate_internal(swapfile_pager_t pager, + boolean_t locked); +void swapfile_pager_terminate_internal(swapfile_pager_t pager); + + +#if DEBUG +int swapfile_pagerdebug = 0; +#define PAGER_ALL 0xffffffff +#define PAGER_INIT 0x00000001 +#define PAGER_PAGEIN 0x00000002 + +#define PAGER_DEBUG(LEVEL, A) \ + MACRO_BEGIN \ + if ((swapfile_pagerdebug & LEVEL)==LEVEL) { \ + printf A; \ + } \ + MACRO_END +#else +#define PAGER_DEBUG(LEVEL, A) +#endif + + +void +swapfile_pager_bootstrap(void) +{ + lck_grp_attr_setdefault(&swapfile_pager_lck_grp_attr); + lck_grp_init(&swapfile_pager_lck_grp, "swapfile pager", &swapfile_pager_lck_grp_attr); + lck_attr_setdefault(&swapfile_pager_lck_attr); + lck_mtx_init(&swapfile_pager_lock, &swapfile_pager_lck_grp, &swapfile_pager_lck_attr); + queue_init(&swapfile_pager_queue); +} + +/* + * swapfile_pager_init() + * + * Initialize the memory object and makes it ready to be used and mapped. + */ +kern_return_t +swapfile_pager_init( + memory_object_t mem_obj, + memory_object_control_t control, +#if !DEBUG + __unused +#endif + memory_object_cluster_size_t pg_size) +{ + swapfile_pager_t pager; + kern_return_t kr; + memory_object_attr_info_data_t attributes; + + PAGER_DEBUG(PAGER_ALL, + ("swapfile_pager_init: %p, %p, %x\n", + mem_obj, control, pg_size)); + + if (control == MEMORY_OBJECT_CONTROL_NULL) + return KERN_INVALID_ARGUMENT; + + pager = swapfile_pager_lookup(mem_obj); + + memory_object_control_reference(control); + + pager->pager_control = control; + + attributes.copy_strategy = MEMORY_OBJECT_COPY_DELAY; + attributes.cluster_size = (1 << (PAGE_SHIFT)); + attributes.may_cache_object = FALSE; + attributes.temporary = TRUE; + + kr = memory_object_change_attributes( + control, + MEMORY_OBJECT_ATTRIBUTE_INFO, + (memory_object_info_t) &attributes, + MEMORY_OBJECT_ATTR_INFO_COUNT); + if (kr != KERN_SUCCESS) + panic("swapfile_pager_init: " + "memory_object_change_attributes() failed"); + + return KERN_SUCCESS; +} + +/* + * swapfile_data_return() + * + * Handles page-out requests from VM. This should never happen since + * the pages provided by this EMM are not supposed to be dirty or dirtied + * and VM should simply discard the contents and reclaim the pages if it + * needs to. + */ +kern_return_t +swapfile_pager_data_return( + __unused memory_object_t mem_obj, + __unused memory_object_offset_t offset, + __unused memory_object_cluster_size_t data_cnt, + __unused memory_object_offset_t *resid_offset, + __unused int *io_error, + __unused boolean_t dirty, + __unused boolean_t kernel_copy, + __unused int upl_flags) +{ + panic("swapfile_pager_data_return: should never get called"); + return KERN_FAILURE; +} + +kern_return_t +swapfile_pager_data_initialize( + __unused memory_object_t mem_obj, + __unused memory_object_offset_t offset, + __unused memory_object_cluster_size_t data_cnt) +{ + panic("swapfile_pager_data_initialize: should never get called"); + return KERN_FAILURE; +} + +kern_return_t +swapfile_pager_data_unlock( + __unused memory_object_t mem_obj, + __unused memory_object_offset_t offset, + __unused memory_object_size_t size, + __unused vm_prot_t desired_access) +{ + return KERN_FAILURE; +} + +/* + * swapfile_pager_data_request() + * + * Handles page-in requests from VM. + */ +kern_return_t +swapfile_pager_data_request( + memory_object_t mem_obj, + memory_object_offset_t offset, + memory_object_cluster_size_t length, +#if !DEBUG + __unused +#endif + vm_prot_t protection_required, + __unused memory_object_fault_info_t mo_fault_info) +{ + swapfile_pager_t pager; + memory_object_control_t mo_control; + upl_t upl; + int upl_flags; + upl_size_t upl_size; + upl_page_info_t *upl_pl = NULL; + unsigned int pl_count; + vm_object_t dst_object; + kern_return_t kr, retval; + vm_map_offset_t kernel_mapping; + vm_offset_t dst_vaddr; + char *dst_ptr; + vm_offset_t cur_offset; + vm_map_entry_t map_entry; + + PAGER_DEBUG(PAGER_ALL, ("swapfile_pager_data_request: %p, %llx, %x, %x\n", mem_obj, offset, length, protection_required)); + + kernel_mapping = 0; + upl = NULL; + upl_pl = NULL; + + pager = swapfile_pager_lookup(mem_obj); + assert(pager->is_ready); + assert(pager->ref_count > 1); /* pager is alive and mapped */ + + PAGER_DEBUG(PAGER_PAGEIN, ("swapfile_pager_data_request: %p, %llx, %x, %x, pager %p\n", mem_obj, offset, length, protection_required, pager)); + + /* + * Gather in a UPL all the VM pages requested by VM. + */ + mo_control = pager->pager_control; + + upl_size = length; + upl_flags = + UPL_RET_ONLY_ABSENT | + UPL_SET_LITE | + UPL_NO_SYNC | + UPL_CLEAN_IN_PLACE | /* triggers UPL_CLEAR_DIRTY */ + UPL_SET_INTERNAL; + pl_count = 0; + kr = memory_object_upl_request(mo_control, + offset, upl_size, + &upl, NULL, NULL, upl_flags); + if (kr != KERN_SUCCESS) { + retval = kr; + goto done; + } + dst_object = mo_control->moc_object; + assert(dst_object != VM_OBJECT_NULL); + + + /* + * Reserve a virtual page in the kernel address space to map each + * destination physical page when it's its turn to be processed. + */ + vm_object_reference(kernel_object); /* ref. for mapping */ + kr = vm_map_find_space(kernel_map, + &kernel_mapping, + PAGE_SIZE_64, + 0, + 0, + &map_entry); + if (kr != KERN_SUCCESS) { + vm_object_deallocate(kernel_object); + retval = kr; + goto done; + } + map_entry->object.vm_object = kernel_object; + map_entry->offset = kernel_mapping - VM_MIN_KERNEL_ADDRESS; + vm_map_unlock(kernel_map); + dst_vaddr = CAST_DOWN(vm_offset_t, kernel_mapping); + dst_ptr = (char *) dst_vaddr; + + /* + * Fill in the contents of the pages requested by VM. + */ + upl_pl = UPL_GET_INTERNAL_PAGE_LIST(upl); + pl_count = length / PAGE_SIZE; + for (cur_offset = 0; cur_offset < length; cur_offset += PAGE_SIZE) { + ppnum_t dst_pnum; + + if (!upl_page_present(upl_pl, (int)(cur_offset / PAGE_SIZE))) { + /* this page is not in the UPL: skip it */ + continue; + } + + /* + * Establish an explicit pmap mapping of the destination + * physical page. + * We can't do a regular VM mapping because the VM page + * is "busy". + */ + dst_pnum = (ppnum_t) + upl_phys_page(upl_pl, (int)(cur_offset / PAGE_SIZE)); + assert(dst_pnum != 0); + pmap_enter(kernel_pmap, + kernel_mapping, + dst_pnum, + VM_PROT_READ | VM_PROT_WRITE, + dst_object->wimg_bits & VM_WIMG_MASK, + TRUE); + + memset(dst_ptr, '\0', PAGE_SIZE); + /* add an end-of-line to keep line counters happy */ + dst_ptr[PAGE_SIZE-1] = '\n'; + + /* + * Remove the pmap mapping of the destination page + * in the kernel. + */ + pmap_remove(kernel_pmap, + (addr64_t) kernel_mapping, + (addr64_t) (kernel_mapping + PAGE_SIZE_64)); + + } + + retval = KERN_SUCCESS; +done: + if (upl != NULL) { + /* clean up the UPL */ + + /* + * The pages are currently dirty because we've just been + * writing on them, but as far as we're concerned, they're + * clean since they contain their "original" contents as + * provided by us, the pager. + * Tell the UPL to mark them "clean". + */ + upl_clear_dirty(upl, TRUE); + + /* abort or commit the UPL */ + if (retval != KERN_SUCCESS) { + upl_abort(upl, 0); + } else { + boolean_t empty; + upl_commit_range(upl, 0, upl->size, + UPL_COMMIT_CS_VALIDATED, + upl_pl, pl_count, &empty); + } + + /* and deallocate the UPL */ + upl_deallocate(upl); + upl = NULL; + } + if (kernel_mapping != 0) { + /* clean up the mapping of the source and destination pages */ + kr = vm_map_remove(kernel_map, + kernel_mapping, + kernel_mapping + PAGE_SIZE_64, + VM_MAP_NO_FLAGS); + assert(kr == KERN_SUCCESS); + kernel_mapping = 0; + dst_vaddr = 0; + } + + return retval; +} + +/* + * swapfile_pager_reference() + * + * Get a reference on this memory object. + * For external usage only. Assumes that the initial reference count is not 0, + * i.e one should not "revive" a dead pager this way. + */ +void +swapfile_pager_reference( + memory_object_t mem_obj) +{ + swapfile_pager_t pager; + + pager = swapfile_pager_lookup(mem_obj); + + lck_mtx_lock(&swapfile_pager_lock); + assert(pager->ref_count > 0); + pager->ref_count++; + lck_mtx_unlock(&swapfile_pager_lock); +} + + +/* + * swapfile_pager_dequeue: + * + * Removes a pager from the list of pagers. + * + * The caller must hold "swapfile_pager_lock". + */ +void +swapfile_pager_dequeue( + swapfile_pager_t pager) +{ + assert(!pager->is_mapped); + + queue_remove(&swapfile_pager_queue, + pager, + swapfile_pager_t, + pager_queue); + pager->pager_queue.next = NULL; + pager->pager_queue.prev = NULL; + + swapfile_pager_count--; +} + +/* + * swapfile_pager_terminate_internal: + * + * Trigger the asynchronous termination of the memory object associated + * with this pager. + * When the memory object is terminated, there will be one more call + * to memory_object_deallocate() (i.e. swapfile_pager_deallocate()) + * to finish the clean up. + * + * "swapfile_pager_lock" should not be held by the caller. + * We don't need the lock because the pager has already been removed from + * the pagers' list and is now ours exclusively. + */ +void +swapfile_pager_terminate_internal( + swapfile_pager_t pager) +{ + assert(pager->is_ready); + assert(!pager->is_mapped); + + if (pager->swapfile_vnode != NULL) { + pager->swapfile_vnode = NULL; + } + + /* trigger the destruction of the memory object */ + memory_object_destroy(pager->pager_control, 0); +} + +/* + * swapfile_pager_deallocate_internal() + * + * Release a reference on this pager and free it when the last + * reference goes away. + * Can be called with swapfile_pager_lock held or not but always returns + * with it unlocked. + */ +void +swapfile_pager_deallocate_internal( + swapfile_pager_t pager, + boolean_t locked) +{ + if (! locked) { + lck_mtx_lock(&swapfile_pager_lock); + } + + /* drop a reference on this pager */ + pager->ref_count--; + + if (pager->ref_count == 1) { + /* + * Only the "named" reference is left, which means that + * no one is really holding on to this pager anymore. + * Terminate it. + */ + swapfile_pager_dequeue(pager); + /* the pager is all ours: no need for the lock now */ + lck_mtx_unlock(&swapfile_pager_lock); + swapfile_pager_terminate_internal(pager); + } else if (pager->ref_count == 0) { + /* + * Dropped the existence reference; the memory object has + * been terminated. Do some final cleanup and release the + * pager structure. + */ + lck_mtx_unlock(&swapfile_pager_lock); + if (pager->pager_control != MEMORY_OBJECT_CONTROL_NULL) { + memory_object_control_deallocate(pager->pager_control); + pager->pager_control = MEMORY_OBJECT_CONTROL_NULL; + } + kfree(pager, sizeof (*pager)); + pager = SWAPFILE_PAGER_NULL; + } else { + /* there are still plenty of references: keep going... */ + lck_mtx_unlock(&swapfile_pager_lock); + } + + /* caution: lock is not held on return... */ +} + +/* + * swapfile_pager_deallocate() + * + * Release a reference on this pager and free it when the last + * reference goes away. + */ +void +swapfile_pager_deallocate( + memory_object_t mem_obj) +{ + swapfile_pager_t pager; + + PAGER_DEBUG(PAGER_ALL, ("swapfile_pager_deallocate: %p\n", mem_obj)); + pager = swapfile_pager_lookup(mem_obj); + swapfile_pager_deallocate_internal(pager, FALSE); +} + +/* + * + */ +kern_return_t +swapfile_pager_terminate( +#if !DEBUG + __unused +#endif + memory_object_t mem_obj) +{ + PAGER_DEBUG(PAGER_ALL, ("swapfile_pager_terminate: %p\n", mem_obj)); + + return KERN_SUCCESS; +} + +/* + * + */ +kern_return_t +swapfile_pager_synchronize( + memory_object_t mem_obj, + memory_object_offset_t offset, + memory_object_size_t length, + __unused vm_sync_t sync_flags) +{ + swapfile_pager_t pager; + + PAGER_DEBUG(PAGER_ALL, ("swapfile_pager_synchronize: %p\n", mem_obj)); + + pager = swapfile_pager_lookup(mem_obj); + + memory_object_synchronize_completed(pager->pager_control, + offset, length); + + return KERN_SUCCESS; +} + +/* + * swapfile_pager_map() + * + * This allows VM to let us, the EMM, know that this memory object + * is currently mapped one or more times. This is called by VM each time + * the memory object gets mapped and we take one extra reference on the + * memory object to account for all its mappings. + */ +kern_return_t +swapfile_pager_map( + memory_object_t mem_obj, + __unused vm_prot_t prot) +{ + swapfile_pager_t pager; + + PAGER_DEBUG(PAGER_ALL, ("swapfile_pager_map: %p\n", mem_obj)); + + pager = swapfile_pager_lookup(mem_obj); + + lck_mtx_lock(&swapfile_pager_lock); + assert(pager->is_ready); + assert(pager->ref_count > 0); /* pager is alive */ + if (pager->is_mapped == FALSE) { + /* + * First mapping of this pager: take an extra reference + * that will remain until all the mappings of this pager + * are removed. + */ + pager->is_mapped = TRUE; + pager->ref_count++; + } + lck_mtx_unlock(&swapfile_pager_lock); + + return KERN_SUCCESS; +} + +/* + * swapfile_pager_last_unmap() + * + * This is called by VM when this memory object is no longer mapped anywhere. + */ +kern_return_t +swapfile_pager_last_unmap( + memory_object_t mem_obj) +{ + swapfile_pager_t pager; + + PAGER_DEBUG(PAGER_ALL, + ("swapfile_pager_last_unmap: %p\n", mem_obj)); + + pager = swapfile_pager_lookup(mem_obj); + + lck_mtx_lock(&swapfile_pager_lock); + if (pager->is_mapped) { + /* + * All the mappings are gone, so let go of the one extra + * reference that represents all the mappings of this pager. + */ + pager->is_mapped = FALSE; + swapfile_pager_deallocate_internal(pager, TRUE); + /* caution: deallocate_internal() released the lock ! */ + } else { + lck_mtx_unlock(&swapfile_pager_lock); + } + + return KERN_SUCCESS; +} + + +/* + * + */ +swapfile_pager_t +swapfile_pager_lookup( + memory_object_t mem_obj) +{ + swapfile_pager_t pager; + + pager = (swapfile_pager_t) mem_obj; + assert(pager->pager_ops == &swapfile_pager_ops); + assert(pager->ref_count > 0); + return pager; +} + +swapfile_pager_t +swapfile_pager_create( + struct vnode *vp) +{ + swapfile_pager_t pager, pager2; + memory_object_control_t control; + kern_return_t kr; + + pager = (swapfile_pager_t) kalloc(sizeof (*pager)); + if (pager == SWAPFILE_PAGER_NULL) { + return SWAPFILE_PAGER_NULL; + } + + /* + * The vm_map call takes both named entry ports and raw memory + * objects in the same parameter. We need to make sure that + * vm_map does not see this object as a named entry port. So, + * we reserve the second word in the object for a fake ip_kotype + * setting - that will tell vm_map to use it as a memory object. + */ + pager->pager_ops = &swapfile_pager_ops; + pager->pager_ikot = IKOT_MEMORY_OBJECT; + pager->is_ready = FALSE;/* not ready until it has a "name" */ + pager->ref_count = 1; /* setup reference */ + pager->is_mapped = FALSE; + pager->pager_control = MEMORY_OBJECT_CONTROL_NULL; + pager->swapfile_vnode = vp; + + lck_mtx_lock(&swapfile_pager_lock); + /* see if anyone raced us to create a pager for the same object */ + queue_iterate(&swapfile_pager_queue, + pager2, + swapfile_pager_t, + pager_queue) { + if (pager2->swapfile_vnode == vp) { + break; + } + } + if (! queue_end(&swapfile_pager_queue, + (queue_entry_t) pager2)) { + /* while we hold the lock, transfer our setup ref to winner */ + pager2->ref_count++; + /* we lost the race, down with the loser... */ + lck_mtx_unlock(&swapfile_pager_lock); + pager->swapfile_vnode = NULL; + kfree(pager, sizeof (*pager)); + /* ... and go with the winner */ + pager = pager2; + /* let the winner make sure the pager gets ready */ + return pager; + } + + /* enter new pager at the head of our list of pagers */ + queue_enter_first(&swapfile_pager_queue, + pager, + swapfile_pager_t, + pager_queue); + swapfile_pager_count++; + if (swapfile_pager_count > swapfile_pager_count_max) { + swapfile_pager_count_max = swapfile_pager_count; + } + lck_mtx_unlock(&swapfile_pager_lock); + + kr = memory_object_create_named((memory_object_t) pager, + 0, + &control); + assert(kr == KERN_SUCCESS); + + lck_mtx_lock(&swapfile_pager_lock); + /* the new pager is now ready to be used */ + pager->is_ready = TRUE; + lck_mtx_unlock(&swapfile_pager_lock); + + /* wakeup anyone waiting for this pager to be ready */ + thread_wakeup(&pager->is_ready); + + return pager; +} + +/* + * swapfile_pager_setup() + * + * Provide the caller with a memory object backed by the provided + * "backing_object" VM object. If such a memory object already exists, + * re-use it, otherwise create a new memory object. + */ +memory_object_t +swapfile_pager_setup( + struct vnode *vp) +{ + swapfile_pager_t pager; + + lck_mtx_lock(&swapfile_pager_lock); + + queue_iterate(&swapfile_pager_queue, + pager, + swapfile_pager_t, + pager_queue) { + if (pager->swapfile_vnode == vp) { + break; + } + } + if (queue_end(&swapfile_pager_queue, + (queue_entry_t) pager)) { + /* no existing pager for this backing object */ + pager = SWAPFILE_PAGER_NULL; + } else { + /* make sure pager doesn't disappear */ + pager->ref_count++; + } + + lck_mtx_unlock(&swapfile_pager_lock); + + if (pager == SWAPFILE_PAGER_NULL) { + pager = swapfile_pager_create(vp); + if (pager == SWAPFILE_PAGER_NULL) { + return MEMORY_OBJECT_NULL; + } + } + + lck_mtx_lock(&swapfile_pager_lock); + while (!pager->is_ready) { + lck_mtx_sleep(&swapfile_pager_lock, + LCK_SLEEP_DEFAULT, + &pager->is_ready, + THREAD_UNINT); + } + lck_mtx_unlock(&swapfile_pager_lock); + + return (memory_object_t) pager; +} + +memory_object_control_t +swapfile_pager_control( + memory_object_t mem_obj) +{ + swapfile_pager_t pager; + + pager = swapfile_pager_lookup(mem_obj); + + return pager->pager_control; +} diff --git a/osfmk/vm/vm_user.c b/osfmk/vm/vm_user.c index e551e62bd..7e68d60ca 100644 --- a/osfmk/vm/vm_user.c +++ b/osfmk/vm/vm_user.c @@ -62,6 +62,29 @@ * User-exported virtual memory functions. */ +/* + * There are three implementations of the "XXX_allocate" functionality in + * the kernel: mach_vm_allocate (for any task on the platform), vm_allocate + * (for a task with the same address space size, especially the current task), + * and vm32_vm_allocate (for the specific case of a 32-bit task). vm_allocate + * in the kernel should only be used on the kernel_task. vm32_vm_allocate only + * makes sense on platforms where a user task can either be 32 or 64, or the kernel + * task can be 32 or 64. mach_vm_allocate makes sense everywhere, and is preferred + * for new code. + * + * The entrypoints into the kernel are more complex. All platforms support a + * mach_vm_allocate-style API (subsystem 4800) which operates with the largest + * size types for the platform. On platforms that only support U32/K32, + * subsystem 4800 is all you need. On platforms that support both U32 and U64, + * subsystem 3800 is used disambiguate the size of parameters, and they will + * always be 32-bit and call into the vm32_vm_allocate APIs. On non-U32/K32 platforms, + * the MIG glue should never call into vm_allocate directly, because the calling + * task and kernel_task are unlikely to use the same size parameters + * + * New VM call implementations should be added here and to mach_vm.defs + * (subsystem 4800), and use mach_vm_* "wide" types. + */ + #include #include @@ -463,6 +486,8 @@ mach_vm_read( if (map == VM_MAP_NULL) return(KERN_INVALID_ARGUMENT); + if ((mach_msg_type_number_t) size != size) + return KERN_INVALID_ARGUMENT; error = vm_map_copyin(map, (vm_map_address_t)addr, @@ -472,7 +497,8 @@ mach_vm_read( if (KERN_SUCCESS == error) { *data = (pointer_t) ipc_address; - *data_size = size; + *data_size = (mach_msg_type_number_t) size; + assert(*data_size == size); } return(error); } @@ -501,6 +527,16 @@ vm_read( if (map == VM_MAP_NULL) return(KERN_INVALID_ARGUMENT); + if (size > (unsigned)(mach_msg_type_number_t) -1) { + /* + * The kernel could handle a 64-bit "size" value, but + * it could not return the size of the data in "*data_size" + * without overflowing. + * Let's reject this "size" as invalid. + */ + return KERN_INVALID_ARGUMENT; + } + error = vm_map_copyin(map, (vm_map_address_t)addr, (vm_map_size_t)size, @@ -509,7 +545,8 @@ vm_read( if (KERN_SUCCESS == error) { *data = (pointer_t) ipc_address; - *data_size = size; + *data_size = (mach_msg_type_number_t) size; + assert(*data_size == size); } return(error); } @@ -889,7 +926,7 @@ vm_map_64( kr = mach_vm_map(target_map, &map_addr, map_size, map_mask, flags, port, offset, copy, cur_protection, max_protection, inheritance); - *address = CAST_DOWN(vm_address_t, map_addr); + *address = CAST_DOWN(vm_offset_t, map_addr); return kr; } @@ -922,7 +959,7 @@ vm_map( kr = mach_vm_map(target_map, &map_addr, map_size, map_mask, flags, port, obj_offset, copy, cur_protection, max_protection, inheritance); - *address = CAST_DOWN(vm_address_t, map_addr); + *address = CAST_DOWN(vm_offset_t, map_addr); return kr; } @@ -1050,7 +1087,7 @@ mach_vm_wire( if (map == VM_MAP_NULL) return KERN_INVALID_TASK; - if (access & ~VM_PROT_ALL) + if (access & ~VM_PROT_ALL || (start + size < start)) return KERN_INVALID_ARGUMENT; if (access != VM_PROT_NONE) { @@ -1611,9 +1648,9 @@ mach_vm_page_query( if (VM_MAP_NULL == map) return KERN_INVALID_ARGUMENT; - return vm_map_page_info(map, - vm_map_trunc_page(offset), - disposition, ref_count); + return vm_map_page_query_internal(map, + vm_map_trunc_page(offset), + disposition, ref_count); } kern_return_t @@ -1626,9 +1663,27 @@ vm_map_page_query( if (VM_MAP_NULL == map) return KERN_INVALID_ARGUMENT; - return vm_map_page_info(map, - vm_map_trunc_page(offset), - disposition, ref_count); + return vm_map_page_query_internal(map, + vm_map_trunc_page(offset), + disposition, ref_count); +} + +kern_return_t +mach_vm_page_info( + vm_map_t map, + mach_vm_address_t address, + vm_page_info_flavor_t flavor, + vm_page_info_t info, + mach_msg_type_number_t *count) +{ + kern_return_t kr; + + if (map == VM_MAP_NULL) { + return KERN_INVALID_ARGUMENT; + } + + kr = vm_map_page_info(map, address, flavor, info, count); + return kr; } /* map a (whole) upl into an address space */ @@ -1636,7 +1691,7 @@ kern_return_t vm_upl_map( vm_map_t map, upl_t upl, - vm_offset_t *dst_addr) + vm_address_t *dst_addr) { vm_map_offset_t map_addr; kern_return_t kr; @@ -1645,7 +1700,7 @@ vm_upl_map( return KERN_INVALID_ARGUMENT; kr = vm_map_enter_upl(map, upl, &map_addr); - *dst_addr = CAST_DOWN(vm_offset_t, map_addr); + *dst_addr = CAST_DOWN(vm_address_t, map_addr); return kr; } @@ -1840,9 +1895,9 @@ mach_make_memory_entry_64( /* * Force the creation of the VM object now. */ - if (map_size > (vm_map_size_t) VM_MAX_ADDRESS) { + if (map_size > (vm_map_size_t) ANON_MAX_SIZE) { /* - * LP64todo - for now, we can only allocate 4GB + * LP64todo - for now, we can only allocate 4GB-4096 * internal objects because the default pager can't * page bigger ones. Remove this when it can. */ @@ -2567,7 +2622,7 @@ mach_memory_entry_purgable_control( return(KERN_INVALID_ARGUMENT); if (control == VM_PURGABLE_SET_STATE && - (((*state & ~(VM_PURGABLE_STATE_MASK|VM_VOLATILE_ORDER_MASK|VM_PURGABLE_ORDERING_MASK|VM_PURGABLE_BEHAVIOR_MASK|VM_VOLATILE_GROUP_MASK)) != 0) || + (((*state & ~(VM_PURGABLE_ALL_MASKS)) != 0) || ((*state & VM_PURGABLE_STATE_MASK) > VM_PURGABLE_STATE_MASK))) return(KERN_INVALID_ARGUMENT); @@ -2640,7 +2695,7 @@ mach_destroy_memory_entry( assert(ip_kotype(port) == IKOT_NAMED_ENTRY); #endif /* MACH_ASSERT */ named_entry = (vm_named_entry_t)port->ip_kobject; - mutex_lock(&(named_entry)->Lock); + lck_mtx_lock(&(named_entry)->Lock); named_entry->ref_count -= 1; if(named_entry->ref_count == 0) { if (named_entry->is_sub_map) { @@ -2650,12 +2705,12 @@ mach_destroy_memory_entry( vm_object_deallocate(named_entry->backing.object); } /* else JMM - need to drop reference on pager in that case */ - mutex_unlock(&(named_entry)->Lock); + lck_mtx_unlock(&(named_entry)->Lock); kfree((void *) port->ip_kobject, sizeof (struct vm_named_entry)); } else - mutex_unlock(&(named_entry)->Lock); + lck_mtx_unlock(&(named_entry)->Lock); } /* Allow manipulation of individual page state. This is actually part of */ @@ -2752,7 +2807,7 @@ mach_memory_entry_range_op( offset_beg, offset_end, ops, - range); + (uint32_t *) range); vm_object_deallocate(object); @@ -3044,7 +3099,7 @@ vm_map_get_phys_page( kern_return_t kernel_object_iopl_request( /* forward */ vm_named_entry_t named_entry, memory_object_offset_t offset, - vm_size_t *upl_size, + upl_size_t *upl_size, upl_t *upl_ptr, upl_page_info_array_t user_page_list, unsigned int *page_list_count, @@ -3054,7 +3109,7 @@ kern_return_t kernel_object_iopl_request( vm_named_entry_t named_entry, memory_object_offset_t offset, - vm_size_t *upl_size, + upl_size_t *upl_size, upl_t *upl_ptr, upl_page_info_array_t user_page_list, unsigned int *page_list_count, @@ -3079,7 +3134,9 @@ kernel_object_iopl_request( if(*upl_size == 0) { if(offset >= named_entry->size) return(KERN_INVALID_RIGHT); - *upl_size = named_entry->size - offset; + *upl_size = (upl_size_t) (named_entry->size - offset); + if (*upl_size != named_entry->size - offset) + return KERN_INVALID_ARGUMENT; } if(caller_flags & UPL_COPYOUT_FROM) { if((named_entry->protection & VM_PROT_READ) diff --git a/EXTERNAL_HEADERS/ppc/Makefile b/osfmk/x86_64/Makefile similarity index 70% rename from EXTERNAL_HEADERS/ppc/Makefile rename to osfmk/x86_64/Makefile index 843510dfd..354a2f3db 100644 --- a/EXTERNAL_HEADERS/ppc/Makefile +++ b/osfmk/x86_64/Makefile @@ -7,19 +7,17 @@ export MakeInc_dir=${SRCROOT}/makedefs/MakeInc.dir include $(MakeInc_cmd) include $(MakeInc_def) -INSTINC_SUBDIRS_PPC = +EXPORT_ONLY_FILES = -EXPORT_FILES = \ - _limits.h \ - limits.h +INSTALL_MD_DIR = x86_64 INSTALL_MD_LIST = -INSTALL_MD_DIR = +INSTALL_MD_LCL_LIST = -EXPORT_MD_LIST = ${EXPORT_FILES} +EXPORT_MD_LIST = ${EXPORT_ONLY_FILES} -EXPORT_MD_DIR = ppc +EXPORT_MD_DIR = x86_64 include $(MakeInc_rule) include $(MakeInc_dir) diff --git a/osfmk/i386/gcc.s b/osfmk/x86_64/bcopy.s similarity index 65% rename from osfmk/i386/gcc.s rename to osfmk/x86_64/bcopy.s index 06a6fb13c..249e621fd 100644 --- a/osfmk/i386/gcc.s +++ b/osfmk/x86_64/bcopy.s @@ -28,9 +28,9 @@ /* * @OSF_COPYRIGHT@ */ -/* +/* * Mach Operating System - * Copyright (c) 1991,1990,1989 Carnegie Mellon University + * Copyright (c) 1991,1990 Carnegie Mellon University * All Rights Reserved. * * Permission to use, copy, modify and distribute this software and its @@ -58,15 +58,69 @@ #include -ENTRY(__divsi3) - movl 4(%esp), %eax - cdq - idivl 8(%esp), %eax +/* void *memcpy((void *) to, (const void *) from, (size_t) bcount) */ +/* rdi, rsi, rdx */ +/* + * Note: memcpy does not support overlapping copies + */ +ENTRY(memcpy) + movq %rdx,%rcx + shrq $3,%rcx /* copy by 64-bit words */ + cld /* copy forwards */ + rep + movsq + movq %rdx,%rcx + andq $7,%rcx /* any bytes left? */ + rep + movsb ret -ENTRY(__udivsi3) - movl 4(%esp), %eax - xorl %edx, %edx - divl 8(%esp), %eax +/* void bcopy((const char *) from, (char *) to, (unsigned int) count) */ +/* rdi, rsi, rdx */ + +ENTRY(bcopy_no_overwrite) + xchgq %rsi,%rdi + jmp EXT(memcpy) + +/* + * bcopy(src, dst, cnt) + * rdi, rsi, rdx + * ws@tools.de (Wolfgang Solfrank, TooLs GmbH) +49-228-985800 + */ +ENTRY(bcopy) + xchgq %rsi,%rdi + movq %rdx,%rcx + + movq %rdi,%rax + subq %rsi,%rax + cmpq %rcx,%rax /* overlapping && src < dst? */ + jb 1f + + shrq $3,%rcx /* copy by 64-bit words */ + cld /* nope, copy forwards */ + rep + movsq + movq %rdx,%rcx + andq $7,%rcx /* any bytes left? */ + rep + movsb ret + /* ALIGN_TEXT */ +1: + addq %rcx,%rdi /* copy backwards */ + addq %rcx,%rsi + decq %rdi + decq %rsi + andq $7,%rcx /* any fractional bytes? */ + std + rep + movsb + movq %rdx,%rcx /* copy remainder by 32-bit words */ + shrq $3,%rcx + subq $7,%rsi + subq $7,%rdi + rep + movsq + cld + ret diff --git a/osfmk/i386/ast_check.c b/osfmk/x86_64/bzero.s similarity index 77% rename from osfmk/i386/ast_check.c rename to osfmk/x86_64/bzero.s index 9272e2246..cb2426300 100644 --- a/osfmk/i386/ast_check.c +++ b/osfmk/x86_64/bzero.s @@ -25,7 +25,7 @@ * * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ */ -/* + /* * @OSF_COPYRIGHT@ */ /* @@ -53,8 +53,50 @@ * any improvements or extensions that they make and grant Carnegie Mellon * the rights to redistribute these changes. */ +/* + */ + +#include /* + * void *memset(void * addr, int pattern, size_t length) */ -#include +ENTRY(memset) + movq %rdi, %r8 + movq %rsi, %rax /* move pattern (arg2) to rax */ + movb %al,%ah /* fill out pattern */ + movw %ax,%cx + shll $16,%eax + movw %cx,%ax + mov %eax, %ecx + shlq $32,%rax + orq %rcx, %rax + cld /* reset direction flag */ + movq %rdx, %rcx /* mov quads first */ + shrq $3, %rcx + rep + stosq + movq %rdx,%rcx /* mov bytes */ + andq $7,%rcx + rep + stosb + movq %r8 ,%rax /* returns its first argument */ + ret + +/* + * void bzero(char * addr, size_t length) + */ +Entry(blkclr) +ENTRY(bzero) + movq %rsi,%rcx + xorq %rax,%rax + shrq $3,%rcx + cld + rep + stosq + movq %rsi,%rcx + andq $7,%rcx + rep + stosb + ret diff --git a/osfmk/x86_64/cswitch.s b/osfmk/x86_64/cswitch.s new file mode 100644 index 000000000..6abb9a22c --- /dev/null +++ b/osfmk/x86_64/cswitch.s @@ -0,0 +1,167 @@ +/* + * Copyright (c) 2000-2006 Apple Computer, Inc. All rights reserved. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. The rights granted to you under the License + * may not be used to create, or enable the creation or redistribution of, + * unlawful or unlicensed copies of an Apple operating system, or to + * circumvent, violate, or enable the circumvention or violation of, any + * terms of an Apple operating system software license agreement. + * + * Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ + */ +/* + * @OSF_COPYRIGHT@ + */ +/* + * Mach Operating System + * Copyright (c) 1991,1990 Carnegie Mellon University + * All Rights Reserved. + * + * Permission to use, copy, modify and distribute this software and its + * documentation is hereby granted, provided that both the copyright + * notice and this permission notice appear in all copies of the + * software, derivative works or modified versions, and any portions + * thereof, and that both notices appear in supporting documentation. + * + * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" + * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR + * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. + * + * Carnegie Mellon requests users of this software to return to + * + * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU + * School of Computer Science + * Carnegie Mellon University + * Pittsburgh PA 15213-3890 + * + * any improvements or extensions that they make and grant Carnegie Mellon + * the rights to redistribute these changes. + */ +/* + */ + +#include + +#include +#include +#include + +#ifdef SYMMETRY +#include +#endif + +#if AT386 +#include +#endif /* AT386 */ + +Entry(Load_context) + movq TH_KERNEL_STACK(%rdi),%rcx /* get kernel stack */ + leaq -IKS_SIZE-IEL_SIZE(%rcx),%rdx + addq EXT(kernel_stack_size)(%rip),%rdx /* point to stack top */ + movq %rcx,%gs:CPU_ACTIVE_STACK /* store stack address */ + movq %rdx,%gs:CPU_KERNEL_STACK /* store stack top */ + + movq %rdx,%rsp + movq %rdx,%rbp + + xorq %rdi,%rdi /* return zero (no old thread) */ + call EXT(thread_continue) + + +/* + * thread_t Switch_context( + * thread_t old, // %rsi + * thread_continue_t continuation, // %rdi + * thread_t new) // %rdx + */ +Entry(Switch_context) + popq %rax /* pop return PC */ + + /* Test for a continuation and skip all state saving if so... */ + cmpq $0, %rsi + jne 5f + movq %gs:CPU_KERNEL_STACK,%rcx /* get old kernel stack top */ + movq %rbx,KSS_RBX(%rcx) /* save registers */ + movq %rbp,KSS_RBP(%rcx) + movq %r12,KSS_R12(%rcx) + movq %r13,KSS_R13(%rcx) + movq %r14,KSS_R14(%rcx) + movq %r15,KSS_R15(%rcx) + movq %rax,KSS_RIP(%rcx) /* save return PC */ + movq %rsp,KSS_RSP(%rcx) /* save SP */ +5: + movq %rdi,%rax /* return old thread */ + /* new thread in %rdx */ + movq %rdx,%gs:CPU_ACTIVE_THREAD /* new thread is active */ + movq TH_KERNEL_STACK(%rdx),%rdx /* get its kernel stack */ + lea -IKS_SIZE-IEL_SIZE(%rdx),%rcx + add EXT(kernel_stack_size)(%rip),%rcx /* point to stack top */ + + movq %rdx,%gs:CPU_ACTIVE_STACK /* set current stack */ + movq %rcx,%gs:CPU_KERNEL_STACK /* set stack top */ + + movq KSS_RSP(%rcx),%rsp /* switch stacks */ + movq KSS_RBX(%rcx),%rbx /* restore registers */ + movq KSS_RBP(%rcx),%rbp + movq KSS_R12(%rcx),%r12 + movq KSS_R13(%rcx),%r13 + movq KSS_R14(%rcx),%r14 + movq KSS_R15(%rcx),%r15 + jmp *KSS_RIP(%rcx) /* return old thread */ + + +Entry(Thread_continue) + movq %rax, %rdi /* load thread argument */ + xorq %rbp,%rbp /* zero frame pointer */ + call *%rbx /* call real continuation */ + + +/* + * thread_t Shutdown_context( + * thread_t thread, // %rdi + * void (*routine)(processor_t), // %rsi + * processor_t processor) // %rdx + * + * saves the kernel context of the thread, + * switches to the interrupt stack, + * continues the thread (with thread_continue), + * then runs routine on the interrupt stack. + * + */ +Entry(Shutdown_context) + movq %gs:CPU_KERNEL_STACK,%rcx /* get old kernel stack top */ + movq %rbx,KSS_RBX(%rcx) /* save registers */ + movq %rbp,KSS_RBP(%rcx) + movq %r12,KSS_R12(%rcx) + movq %r13,KSS_R13(%rcx) + movq %r14,KSS_R14(%rcx) + movq %r15,KSS_R15(%rcx) + popq KSS_RIP(%rcx) /* save return PC */ + movq %rsp,KSS_RSP(%rcx) /* save SP */ + + movq %gs:CPU_ACTIVE_STACK,%rcx /* get old kernel stack */ + movq %rdi,%rax /* get old thread */ + movq %rcx,TH_KERNEL_STACK(%rax) /* save old stack */ + + movq %gs:CPU_INT_STACK_TOP,%rsp /* switch to interrupt stack */ + + movq %rdx,%rdi /* processor arg to routine */ + call *%rsi /* call routine to run */ + hlt /* (should never return) */ + diff --git a/osfmk/i386/setjmp.s b/osfmk/x86_64/genassym.c similarity index 93% rename from osfmk/i386/setjmp.s rename to osfmk/x86_64/genassym.c index f6581f3ac..2fc719cff 100644 --- a/osfmk/i386/setjmp.s +++ b/osfmk/x86_64/genassym.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2008 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -25,3 +25,5 @@ * * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ */ + +#include "../i386/genassym.c" diff --git a/osfmk/x86_64/idt64.s b/osfmk/x86_64/idt64.s new file mode 100644 index 000000000..0f96dcd72 --- /dev/null +++ b/osfmk/x86_64/idt64.s @@ -0,0 +1,1461 @@ +/* + * Copyright (c) 2006 Apple Computer, Inc. All rights reserved. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. The rights granted to you under the License + * may not be used to create, or enable the creation or redistribution of, + * unlawful or unlicensed copies of an Apple operating system, or to + * circumvent, violate, or enable the circumvention or violation of, any + * terms of an Apple operating system software license agreement. + * + * Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ + */ +#include +#include +#include +#include +#include +#include +#define _ARCH_I386_ASM_HELP_H_ /* Prevent inclusion of user header */ +#include +#include +#include +#include + +#if DEBUG +#define DEBUG_IDT64 1 +#endif + +/* + * This is the low-level trap and interrupt handling code associated with + * the IDT. It also includes system call handlers for sysenter/syscall. + * The IDT itself is defined in mp_desc.c. + * + * Code here is structured as follows: + * + * stubs Code called directly from an IDT vector. + * All entry points have the "idt64_" prefix and they are built + * using macros expanded by the inclusion of idt_table.h. + * This code performs vector-dependent identification and jumps + * into the dispatch code. + * + * dispatch The dispatch code is responsible for saving the thread state + * (which is either 64-bit or 32-bit) and then jumping to the + * class handler identified by the stub. + * + * returns Code to restore state and return to the previous context. + * + * handlers There are several classes of handlers: + * interrupt - asynchronous events typically from external devices + * trap - synchronous events due to thread execution + * syscall - synchronous system call request + * fatal - fatal traps + */ + +/* + * Handlers: + */ +#define HNDL_ALLINTRS EXT(hndl_allintrs) +#define HNDL_ALLTRAPS EXT(hndl_alltraps) +#define HNDL_SYSENTER EXT(hndl_sysenter) +#define HNDL_SYSCALL EXT(hndl_syscall) +#define HNDL_UNIX_SCALL EXT(hndl_unix_scall) +#define HNDL_MACH_SCALL EXT(hndl_mach_scall) +#define HNDL_MDEP_SCALL EXT(hndl_mdep_scall) +#define HNDL_DIAG_SCALL EXT(hndl_diag_scall) +#define HNDL_DOUBLE_FAULT EXT(hndl_double_fault) +#define HNDL_MACHINE_CHECK EXT(hndl_machine_check) + +/* + * Nanosecond timing. + */ + +/* + * Nanotime returned in %rax. + * Computed from tsc based on the scale factor and an implicit 32 bit shift. + * This code must match what _rtc_nanotime_read does in + * machine_routines_asm.s. Failure to do so can + * result in "weird" timing results. + * + * Uses: %rsi, %rdi, %rdx, %rcx + */ +#define NANOTIME \ + movq %gs:CPU_NANOTIME,%rdi ; \ + RTC_NANOTIME_READ_FAST() + +/* + * Add 64-bit delta in register reg to timer pointed to by register treg. + */ +#define TIMER_UPDATE(treg,reg,offset) \ + addq reg,(offset)+TIMER_ALL(treg) /* add timer */ + +/* + * Add time delta to old timer and start new. + * Uses: %rsi, %rdi, %rdx, %rcx, %rax + */ +#define TIMER_EVENT(old,new) \ + NANOTIME /* %rax := nanosecs */ ; \ + movq %rax,%rsi /* save timestamp */ ; \ + movq %gs:CPU_ACTIVE_THREAD,%rcx /* get thread */ ; \ + subq (old##_TIMER)+TIMER_TSTAMP(%rcx),%rax /* compute elapsed */ ; \ + TIMER_UPDATE(%rcx,%rax,old##_TIMER) /* update timer */ ; \ + leaq (new##_TIMER)(%rcx),%rcx /* point to new timer */; \ + movq %rsi,TIMER_TSTAMP(%rcx) /* set timestamp */ ; \ + movq %gs:CPU_PROCESSOR,%rdx /* get processor */ ; \ + movq %rcx,THREAD_TIMER(%rdx) /* set current timer */ ; \ + movq %rsi,%rax /* restore timestamp */ ; \ + subq (old##_STATE)+TIMER_TSTAMP(%rdx),%rax /* compute elapsed */ ; \ + TIMER_UPDATE(%rdx,%rax,old##_STATE) /* update timer */ ; \ + leaq (new##_STATE)(%rdx),%rcx /* point to new state */; \ + movq %rcx,CURRENT_STATE(%rdx) /* set current state */ ; \ + movq %rsi,TIMER_TSTAMP(%rcx) /* set timestamp */ + +/* + * Update time on user trap entry. + * Uses: %rsi, %rdi, %rdx, %rcx, %rax + */ +#define TIME_TRAP_UENTRY TIMER_EVENT(USER,SYSTEM) + +/* + * update time on user trap exit. + * Uses: %rsi, %rdi, %rdx, %rcx, %rax + */ +#define TIME_TRAP_UEXIT TIMER_EVENT(SYSTEM,USER) + +/* + * update time on interrupt entry. + * Uses: %rsi, %rdi, %rdx, %rcx, %rax + * Saves processor state info on stack. + */ +#define TIME_INT_ENTRY \ + NANOTIME /* %rax := nanosecs */ ; \ + movq %rax,%gs:CPU_INT_EVENT_TIME /* save in cpu data */ ; \ + movq %rax,%rsi /* save timestamp */ ; \ + movq %gs:CPU_PROCESSOR,%rdx /* get processor */ ; \ + movq THREAD_TIMER(%rdx),%rcx /* get current timer */ ; \ + subq TIMER_TSTAMP(%rcx),%rax /* compute elapsed */ ; \ + TIMER_UPDATE(%rcx,%rax,0) /* update timer */ ; \ + movq KERNEL_TIMER(%rdx),%rcx /* get kernel timer */ ; \ + movq %rsi,TIMER_TSTAMP(%rcx) /* set timestamp */ ; \ + movq %rsi,%rax /* restore timestamp */ ; \ + movq CURRENT_STATE(%rdx),%rcx /* get current state */ ; \ + pushq %rcx /* save state */ ; \ + subq TIMER_TSTAMP(%rcx),%rax /* compute elapsed */ ; \ + TIMER_UPDATE(%rcx,%rax,0) /* update timer */ ; \ + leaq IDLE_STATE(%rdx),%rax /* get idle state */ ; \ + cmpq %rax,%rcx /* compare current */ ; \ + je 0f /* skip if equal */ ; \ + leaq SYSTEM_STATE(%rdx),%rcx /* get system state */ ; \ + movq %rcx,CURRENT_STATE(%rdx) /* set current state */ ; \ +0: movq %rsi,TIMER_TSTAMP(%rcx) /* set timestamp */ + +/* + * update time on interrupt exit. + * Uses: %rsi, %rdi, %rdx, %rcx, %rax + * Restores processor state info from stack. + */ +#define TIME_INT_EXIT \ + NANOTIME /* %rax := nanosecs */ ; \ + movq %rax,%gs:CPU_INT_EVENT_TIME /* save in cpu data */ ; \ + movq %rax,%rsi /* save timestamp */ ; \ + movq %gs:CPU_PROCESSOR,%rdx /* get processor */ ; \ + movq KERNEL_TIMER(%rdx),%rcx /* get kernel timer */ ; \ + subq TIMER_TSTAMP(%rcx),%rax /* compute elapsed */ ; \ + TIMER_UPDATE(%rcx,%rax,0) /* update timer */ ; \ + movq THREAD_TIMER(%rdx),%rcx /* interrupted timer */ ; \ + movq %rsi,TIMER_TSTAMP(%rcx) /* set timestamp */ ; \ + movq %rsi,%rax /* restore timestamp */ ; \ + movq CURRENT_STATE(%rdx),%rcx /* get current state */ ; \ + subq TIMER_TSTAMP(%rcx),%rax /* compute elapsed */ ; \ + TIMER_UPDATE(%rcx,%rax,0) /* update timer */ ; \ + popq %rcx /* restore state */ ; \ + movq %rcx,CURRENT_STATE(%rdx) /* set current state */ ; \ + movq %rsi,TIMER_TSTAMP(%rcx) /* set timestamp */ + +/* + * Check for vtimers for task. + * task_reg is register pointing to current task + * thread_reg is register pointing to current thread + */ +#define TASK_VTIMER_CHECK(task_reg,thread_reg) \ + cmpl $0,TASK_VTIMERS(task_reg) ; \ + jz 1f ; \ + orl $(AST_BSD),%gs:CPU_PENDING_AST /* Set pending AST */ ; \ + lock ; \ + orl $(AST_BSD),ACT_AST(thread_reg) /* Set thread AST */ ; \ +1: ; \ + + +/* + * Macros for calling into C functions. + * The stack is 16-byte aligned by masking. + */ +#define CCALL(fn) \ + mov %rsp, %r12 ;\ + and $0xFFFFFFFFFFFFFFF0, %rsp ;\ + call EXT(fn) ;\ + mov %r12, %rsp + +#define CCALL1(fn, arg1) \ + mov arg1, %rdi ;\ + CCALL(fn) + +#define CCALL2(fn, arg1, arg2) \ + mov arg1, %rdi ;\ + CCALL(fn) + +#define CCALL3(fn, arg1, arg2, arg3) \ + mov arg1, %rdi ;\ + mov arg2, %rsi ;\ + mov arg3, %rdx ;\ + CCALL(fn) + +#if 1 +#define PUSH_FUNCTION(func) \ + sub $8, %rsp ;\ + push %rax ;\ + leaq func(%rip), %rax ;\ + movq %rax, 8(%rsp) ;\ + pop %rax +#else +#define PUSH_FUNCTION(func) pushq func +#endif + +/* The wrapper for all non-special traps/interrupts */ +/* Everything up to PUSH_FUNCTION is just to output + * the interrupt number out to the postcode display + */ +#if DEBUG_IDT64 +#define IDT_ENTRY_WRAPPER(n, f) \ + push %rax ;\ + POSTCODE2(0x6400+n) ;\ + pop %rax ;\ + PUSH_FUNCTION(f) ;\ + pushq $(n) ;\ + jmp L_dispatch +#else +#define IDT_ENTRY_WRAPPER(n, f) \ + PUSH_FUNCTION(f) ;\ + pushq $(n) ;\ + jmp L_dispatch +#endif + +/* A trap that comes with an error code already on the stack */ +#define TRAP_ERR(n, f) \ + Entry(f) ;\ + IDT_ENTRY_WRAPPER(n, HNDL_ALLTRAPS) + +/* A normal trap */ +#define TRAP(n, f) \ + Entry(f) ;\ + pushq $0 ;\ + IDT_ENTRY_WRAPPER(n, HNDL_ALLTRAPS) + +#define USER_TRAP TRAP + +/* An interrupt */ +#define INTERRUPT(n) \ + Entry(_intr_ ## n) ;\ + pushq $0 ;\ + IDT_ENTRY_WRAPPER(n, HNDL_ALLINTRS) + +/* A trap with a special-case handler, hence we don't need to define anything */ +#define TRAP_SPC(n, f) +#define TRAP_IST(n, f) +#define USER_TRAP_SPC(n, f) + +/* Generate all the stubs */ +#include "idt_table.h" + +/* + * Common dispatch point. + * Determine what mode has been interrupted and save state accordingly. + */ +L_dispatch: + cmpq $(KERNEL64_CS), ISF64_CS(%rsp) + je L_64bit_dispatch + + swapgs + + cmpl $(USER_CS), ISF64_CS(%rsp) + je L_32bit_dispatch /* 32-bit user task */ + /* fall through to 64bit user dispatch */ + +/* + * Here for 64-bit user task or kernel + */ +L_64bit_dispatch: + subq $(ISS64_OFFSET), %rsp + movl $(SS_64), SS_FLAVOR(%rsp) + + /* + * Save segment regs - for completeness since theyre not used. + */ + mov %fs, R64_FS(%rsp) + mov %gs, R64_GS(%rsp) + + /* Save general-purpose registers */ + mov %rax, R64_RAX(%rsp) + mov %rcx, R64_RCX(%rsp) + mov %rbx, R64_RBX(%rsp) + mov %rbp, R64_RBP(%rsp) + mov %r11, R64_R11(%rsp) + mov %r12, R64_R12(%rsp) + mov %r13, R64_R13(%rsp) + mov %r14, R64_R14(%rsp) + mov %r15, R64_R15(%rsp) + + /* cr2 is significant only for page-faults */ + mov %cr2, %rax + mov %rax, R64_CR2(%rsp) + + /* Other registers (which may contain syscall args) */ + mov %rdi, R64_RDI(%rsp) /* arg0 .. */ + mov %rsi, R64_RSI(%rsp) + mov %rdx, R64_RDX(%rsp) + mov %r10, R64_R10(%rsp) + mov %r8, R64_R8(%rsp) + mov %r9, R64_R9(%rsp) /* .. arg5 */ + + mov R64_TRAPNO(%rsp), %ebx /* %ebx := trapno for later */ + mov R64_TRAPFN(%rsp), %rdx /* %rdx := trapfn for later */ + mov R64_CS(%rsp), %esi /* %esi := cs for later */ + + jmp L_common_dispatch + +L_64bit_entry_reject: + /* + * Here for a 64-bit user attempting an invalid kernel entry. + */ + pushq %rax + leaq HNDL_ALLTRAPS(%rip), %rax + movq %rax, ISF64_TRAPFN+8(%rsp) + popq %rax + movq $(T_INVALID_OPCODE), ISF64_TRAPNO(%rsp) + jmp L_64bit_dispatch + +L_32bit_entry_check: + /* + * Check we're not a confused 64-bit user. + */ + cmpl $(TASK_MAP_32BIT), %gs:CPU_TASK_MAP + jne L_64bit_entry_reject + /* fall through to 32-bit handler: */ + +L_32bit_dispatch: /* 32-bit user task */ + subq $(ISC32_OFFSET), %rsp + movl $(SS_32), SS_FLAVOR(%rsp) + + /* + * Save segment regs + */ + mov %ds, R32_DS(%rsp) + mov %es, R32_ES(%rsp) + mov %fs, R32_FS(%rsp) + mov %gs, R32_GS(%rsp) + + /* + * Save general 32-bit registers + */ + mov %eax, R32_EAX(%rsp) + mov %ebx, R32_EBX(%rsp) + mov %ecx, R32_ECX(%rsp) + mov %edx, R32_EDX(%rsp) + mov %ebp, R32_EBP(%rsp) + mov %esi, R32_ESI(%rsp) + mov %edi, R32_EDI(%rsp) + + /* Unconditionally save cr2; only meaningful on page faults */ + mov %cr2, %rax + mov %eax, R32_CR2(%rsp) + + /* + * Copy registers already saved in the machine state + * (in the interrupt stack frame) into the compat save area. + */ + mov ISC32_RIP(%rsp), %eax + mov %eax, R32_EIP(%rsp) + mov ISC32_RFLAGS(%rsp), %eax + mov %eax, R32_EFLAGS(%rsp) + mov ISC32_CS(%rsp), %esi /* %esi := %cs for later */ + + mov %esi, R32_CS(%rsp) + mov ISC32_RSP(%rsp), %eax + mov %eax, R32_UESP(%rsp) + mov ISC32_SS(%rsp), %eax + mov %eax, R32_SS(%rsp) +L_32bit_dispatch_after_fault: + mov ISC32_TRAPNO(%rsp), %ebx /* %ebx := trapno for later */ + mov %ebx, R32_TRAPNO(%rsp) + mov ISC32_ERR(%rsp), %eax + mov %eax, R32_ERR(%rsp) + mov ISC32_TRAPFN(%rsp), %rdx /* %rdx := trapfn for later */ + +L_common_dispatch: + /* + * On entering the kernel, we don't need to switch cr3 + * because the kernel shares the user's address space. + * But we mark the kernel's cr3 as "active". + * If, however, the invalid cr3 flag is set, we have to flush tlbs + * since the kernel's mapping was changed while we were in userspace. + * + * But: if global no_shared_cr3 is TRUE we do switch to the kernel's cr3 + * so that illicit accesses to userspace can be trapped. + */ + mov %gs:CPU_KERNEL_CR3, %rcx + mov %rcx, %gs:CPU_ACTIVE_CR3 + test $3, %esi /* user/kernel? */ + jz 1f /* skip cr3 reload from kernel */ + xor %rbp, %rbp + cmpl $0, EXT(no_shared_cr3)(%rip) + je 1f + mov %rcx, %cr3 /* load kernel cr3 */ + jmp 2f /* and skip tlb flush test */ +1: + cmpl $0, %gs:CPU_TLB_INVALID /* flush needed? */ + je 2f /* - no */ + movl $0, %gs:CPU_TLB_INVALID + mov %cr3, %rcx + mov %rcx, %cr3 +2: + mov %gs:CPU_ACTIVE_THREAD, %rcx /* Get the active thread */ + cmpq $0, ACT_PCB_IDS(%rcx) /* Is there a debug register state? */ + je 3f + mov $0, %rcx /* If so, reset DR7 (the control) */ + mov %rcx, %dr7 +3: + addl $1,%gs:hwIntCnt(,%ebx,4) // Bump the trap/intr count + /* Dispatch the designated handler */ + mov %rsp, %rdi /* rsp points to saved state */ + jmp *%rdx + +/* + * Control is passed here to return to user. + */ +Entry(return_to_user) + TIME_TRAP_UEXIT + +Entry(ret_to_user) +// XXX 'Be nice to tidy up this debug register restore sequence... + mov %gs:CPU_ACTIVE_THREAD, %rdx + movq ACT_PCB_IDS(%rdx),%rax /* Obtain this thread's debug state */ + + cmpq $0,%rax /* Is there a debug register context? */ + je 2f /* branch if not */ + cmpl $(TASK_MAP_32BIT), %gs:CPU_TASK_MAP /* Are we a 32-bit task? */ + jne 1f + movl DS_DR0(%rax), %ecx /* If so, load the 32 bit DRs */ + movq %rcx, %dr0 + movl DS_DR1(%rax), %ecx + movq %rcx, %dr1 + movl DS_DR2(%rax), %ecx + movq %rcx, %dr2 + movl DS_DR3(%rax), %ecx + movq %rcx, %dr3 + movl DS_DR7(%rax), %ecx + movq %rcx, %gs:CPU_DR7 + jmp 2f +1: + mov DS64_DR0(%rax), %rcx /* Load the full width DRs*/ + mov %rcx, %dr0 + mov DS64_DR1(%rax), %rcx + mov %rcx, %dr1 + mov DS64_DR2(%rax), %rcx + mov %rcx, %dr2 + mov DS64_DR3(%rax), %rcx + mov %rcx, %dr3 + mov DS64_DR7(%rax), %rcx + mov %rcx, %gs:CPU_DR7 +2: + /* + * On exiting the kernel there's no need to switch cr3 since we're + * already running in the user's address space which includes the + * kernel. Nevertheless, we now mark the task's cr3 as active. + * However, there may be a defered tlb flush to deal with. + * This is a case where another cpu modified this task's address + * space while this thread was in the kernel. + * But, if no_shared_cr3 is set, we do need to switch cr3 at this point. + */ + mov %gs:CPU_TASK_CR3, %rcx + mov %rcx, %gs:CPU_ACTIVE_CR3 + movl %gs:CPU_TLB_INVALID, %eax + orl EXT(no_shared_cr3)(%rip), %eax + test %eax, %eax /* -no_shered_cr3 or flush required? */ + jz 3f + movl $0, %gs:CPU_TLB_INVALID + mov %rcx, %cr3 +3: + + mov %gs:CPU_DR7, %rax /* Is there a debug control register?*/ + cmp $0, %rax + je 4f + mov %rax, %dr7 /* Set DR7 */ + movq $0, %gs:CPU_DR7 +4: + cmpl $(SS_64), SS_FLAVOR(%rsp) /* 64-bit state? */ + je L_64bit_return + +L_32bit_return: +#if DEBUG_IDT64 + cmpl $(SS_32), SS_FLAVOR(%rsp) /* 32-bit state? */ + je 1f + cli + POSTCODE2(0x6432) + CCALL1(panic_idt64, %rsp) +1: +#endif /* DEBUG_IDT64 */ + + /* + * Restore registers into the machine state for iret. + */ + movl R32_EIP(%rsp), %eax + movl %eax, ISC32_RIP(%rsp) + movl R32_EFLAGS(%rsp), %eax + movl %eax, ISC32_RFLAGS(%rsp) + movl R32_CS(%rsp), %eax + movl %eax, ISC32_CS(%rsp) + movl R32_UESP(%rsp), %eax + movl %eax, ISC32_RSP(%rsp) + movl R32_SS(%rsp), %eax + movl %eax, ISC32_SS(%rsp) + + /* + * Restore general 32-bit registers + */ + movl R32_EAX(%rsp), %eax + movl R32_EBX(%rsp), %ebx + movl R32_ECX(%rsp), %ecx + movl R32_EDX(%rsp), %edx + movl R32_EBP(%rsp), %ebp + movl R32_ESI(%rsp), %esi + movl R32_EDI(%rsp), %edi + + /* + * Restore segment registers. We make take an exception here but + * we've got enough space left in the save frame area to absorb + * a hardware frame plus the trapfn and trapno + */ + swapgs +EXT(ret32_set_ds): + movw R32_DS(%rsp), %ds +EXT(ret32_set_es): + movw R32_ES(%rsp), %es +EXT(ret32_set_fs): + movw R32_FS(%rsp), %fs +EXT(ret32_set_gs): + movw R32_GS(%rsp), %gs + + /* pop compat frame + trapno, trapfn and error */ + add $(ISC32_OFFSET)+8+8+8, %rsp + cmp $(SYSENTER_CS),ISF64_CS-8-8-8(%rsp) + /* test for fast entry/exit */ + je L_fast_exit +EXT(ret32_iret): + iretq /* return from interrupt */ + +L_fast_exit: + pop %rdx /* user return eip */ + pop %rcx /* pop and toss cs */ + andl $(~EFL_IF), (%rsp) /* clear interrupts enable, sti below */ + popf /* flags - carry denotes failure */ + pop %rcx /* user return esp */ + sti /* interrupts enabled after sysexit */ + sysexit /* 32-bit sysexit */ + +ret_to_kernel: +#if DEBUG_IDT64 + cmpl $(SS_64), SS_FLAVOR(%rsp) /* 64-bit state? */ + je 1f + cli + POSTCODE2(0x6464) + CCALL1(panic_idt64, %rsp) + hlt +1: + cmpq $(KERNEL64_CS), R64_CS(%rsp) + je 2f + CCALL1(panic_idt64, %rsp) + hlt +2: +#endif + +L_64bit_return: + testb $3, R64_CS(%rsp) /* returning to user-space? */ + jz 1f + swapgs +1: + + /* + * Restore general 64-bit registers + */ + mov R64_R15(%rsp), %r15 + mov R64_R14(%rsp), %r14 + mov R64_R13(%rsp), %r13 + mov R64_R12(%rsp), %r12 + mov R64_R11(%rsp), %r11 + mov R64_R10(%rsp), %r10 + mov R64_R9(%rsp), %r9 + mov R64_R8(%rsp), %r8 + mov R64_RSI(%rsp), %rsi + mov R64_RDI(%rsp), %rdi + mov R64_RBP(%rsp), %rbp + mov R64_RDX(%rsp), %rdx + mov R64_RBX(%rsp), %rbx + mov R64_RCX(%rsp), %rcx + mov R64_RAX(%rsp), %rax + + add $(ISS64_OFFSET)+24, %rsp /* pop saved state frame + + trapno + trapfn and error */ + cmpl $(SYSCALL_CS),ISF64_CS-24(%rsp) + /* test for fast entry/exit */ + je L_sysret +.globl _dump_iretq +EXT(ret64_iret): + iretq /* return from interrupt */ + +L_sysret: + /* + * Here to load rcx/r11/rsp and perform the sysret back to user-space. + * rcx user rip + * r1 user rflags + * rsp user stack pointer + */ + mov ISF64_RIP-24(%rsp), %rcx + mov ISF64_RFLAGS-24(%rsp), %r11 + mov ISF64_RSP-24(%rsp), %rsp + sysretq /* return from systen call */ + + + +/* + * System call handlers. + * These are entered via a syscall interrupt. The system call number in %rax + * is saved to the error code slot in the stack frame. We then branch to the + * common state saving code. + */ + +#ifndef UNIX_INT +#error NO UNIX INT!!! +#endif +Entry(idt64_unix_scall) + swapgs /* switch to kernel gs (cpu_data) */ +L_unix_scall_continue: + pushq %rax /* save system call number */ + PUSH_FUNCTION(HNDL_UNIX_SCALL) + pushq $(UNIX_INT) + jmp L_32bit_entry_check + + +Entry(idt64_mach_scall) + swapgs /* switch to kernel gs (cpu_data) */ +L_mach_scall_continue: + pushq %rax /* save system call number */ + PUSH_FUNCTION(HNDL_MACH_SCALL) + pushq $(MACH_INT) + jmp L_32bit_entry_check + + +Entry(idt64_mdep_scall) + swapgs /* switch to kernel gs (cpu_data) */ +L_mdep_scall_continue: + pushq %rax /* save system call number */ + PUSH_FUNCTION(HNDL_MDEP_SCALL) + pushq $(MACHDEP_INT) + jmp L_32bit_entry_check + + +Entry(idt64_diag_scall) + swapgs /* switch to kernel gs (cpu_data) */ +L_diag_scall_continue: + push %rax /* save system call number */ + PUSH_FUNCTION(HNDL_DIAG_SCALL) + pushq $(DIAG_INT) + jmp L_32bit_entry_check + +Entry(hi64_syscall) +Entry(idt64_syscall) + swapgs /* Kapow! get per-cpu data area */ +L_syscall_continue: + mov %rsp, %gs:CPU_UBER_TMP /* save user stack */ + mov %gs:CPU_UBER_ISF, %rsp /* switch stack to pcb */ + + /* + * Save values in the ISF frame in the PCB + * to cons up the saved machine state. + */ + movl $(USER_DS), ISF64_SS(%rsp) + movl $(SYSCALL_CS), ISF64_CS(%rsp) /* cs - a pseudo-segment */ + mov %r11, ISF64_RFLAGS(%rsp) /* rflags */ + mov %rcx, ISF64_RIP(%rsp) /* rip */ + mov %gs:CPU_UBER_TMP, %rcx + mov %rcx, ISF64_RSP(%rsp) /* user stack */ + mov %rax, ISF64_ERR(%rsp) /* err/rax - syscall code */ + movq $(T_SYSCALL), ISF64_TRAPNO(%rsp) /* trapno */ + leaq HNDL_SYSCALL(%rip), %r11; + movq %r11, ISF64_TRAPFN(%rsp) + jmp L_64bit_dispatch /* this can only be a 64-bit task */ + +/* + * sysenter entry point + * Requires user code to set up: + * edx: user instruction pointer (return address) + * ecx: user stack pointer + * on which is pushed stub ret addr and saved ebx + * Return to user-space is made using sysexit. + * Note: sysenter/sysexit cannot be used for calls returning a value in edx, + * or requiring ecx to be preserved. + */ +Entry(hi64_sysenter) +Entry(idt64_sysenter) + movq (%rsp), %rsp + /* + * Push values on to the PCB stack + * to cons up the saved machine state. + */ + push $(USER_DS) /* ss */ + push %rcx /* uesp */ + pushf /* flags */ + push $(SYSENTER_CS) /* cs */ + swapgs /* switch to kernel gs (cpu_data) */ +L_sysenter_continue: + push %rdx /* eip */ + push %rax /* err/eax - syscall code */ + PUSH_FUNCTION(HNDL_SYSENTER) + pushq $(T_SYSENTER) + orl $(EFL_IF), ISF64_RFLAGS(%rsp) + jmp L_32bit_entry_check + + +Entry(idt64_page_fault) + PUSH_FUNCTION(HNDL_ALLTRAPS) + push %rax /* save %rax temporarily in trap slot */ + leaq EXT(idt64_unix_scall_copy_args)(%rip), %rax + cmp %rax, ISF64_RIP(%rsp) + jne 1f + add $(ISF64_SIZE), %rsp /* remove entire intr stack frame */ + jmp L_copy_args_continue /* continue system call entry */ +1: + mov (%rsp), %rax /* restore %rax from trap slot */ + movq $(T_PAGE_FAULT), (%rsp) /* set trap code */ + jne L_dispatch + + +/* + * Debug trap. Check for single-stepping across system call into + * kernel. If this is the case, taking the debug trap has turned + * off single-stepping - save the flags register with the trace + * bit set. + */ +Entry(idt64_debug) + push $0 /* error code */ + PUSH_FUNCTION(HNDL_ALLTRAPS) + pushq $(T_DEBUG) + + testb $3, ISF64_CS(%rsp) + jnz L_dispatch + + /* + * trap came from kernel mode + */ + + push %rax /* save %rax temporarily */ + + leaq EXT(idt64_mach_scall)(%rip), %rax + cmp %rax, ISF64_RIP(%rsp) + jne 1f + pop %rax + add $(ISF64_SIZE),%rsp /* remove entire intr stack frame */ + jmp L_mach_scall_continue /* continue system call entry */ +1: + leaq EXT(idt64_mdep_scall)(%rip), %rax + cmp %rax, ISF64_RIP(%rsp) + jne 2f + pop %rax + add $(ISF64_SIZE),%rsp /* remove entire intr stack frame */ + jmp L_mdep_scall_continue /* continue system call entry */ +2: + leaq EXT(idt64_unix_scall)(%rip), %rax + cmp %rax, ISF64_RIP(%rsp) + jne 3f + pop %rax + add $(ISF64_SIZE),%rsp /* remove entire intr stack frame */ + jmp L_unix_scall_continue /* continue system call entry */ +3: + lea EXT(idt64_sysenter)(%rip), %rax + cmp %rax, ISF64_RIP(%rsp) + je 4f + pop %rax + jmp L_dispatch +4: + pop %rax + /* + * Interrupt stack frame has been pushed on the temporary stack. + * We have to switch to pcb stack and copy eflags. + */ + add $40,%rsp /* remove trapno/trapfn/err/rip/cs */ + push %rcx /* save %rcx - user stack pointer */ + mov 40(%rsp),%rcx /* top of intr stack -> pcb stack */ + xchg %rcx,%rsp /* switch to pcb stack */ + push $(USER_DS) /* ss */ + push (%rcx) /* saved %rcx into rsp slot */ + push 8(%rcx) /* rflags */ + mov (%rcx),%rcx /* restore %rcx */ + push $(SYSENTER_TF_CS) /* cs - not SYSENTER_CS for iret path */ + jmp L_sysenter_continue /* continue sysenter entry */ + + + +Entry(idt64_double_fault) + PUSH_FUNCTION(HNDL_DOUBLE_FAULT) + pushq $(T_DOUBLE_FAULT) + + push %rax + leaq EXT(idt64_syscall)(%rip), %rax + cmp %rax, ISF64_RIP(%rsp) + pop %rax + jne L_dispatch + + mov ISF64_RSP(%rsp), %rsp + jmp L_syscall_continue + + +/* + * General protection or segment-not-present fault. + * Check for a GP/NP fault in the kernel_return + * sequence; if there, report it as a GP/NP fault on the user's instruction. + * + * rsp-> 0: trap function + * 8: trap code (NP or GP) + * 16: segment number in error (error code) + * 24: rip + * 32: cs + * 40: rflags + * 48: rsp + * 56: ss + * 64: old registers (trap is from kernel) + */ +Entry(idt64_gen_prot) + PUSH_FUNCTION(HNDL_ALLTRAPS) + pushq $(T_GENERAL_PROTECTION) + jmp trap_check_kernel_exit /* check for kernel exit sequence */ + +Entry(idt64_stack_fault) + PUSH_FUNCTION(HNDL_ALLTRAPS) + pushq $(T_STACK_FAULT) + jmp trap_check_kernel_exit /* check for kernel exit sequence */ + +Entry(idt64_segnp) + PUSH_FUNCTION(HNDL_ALLTRAPS) + pushq $(T_SEGMENT_NOT_PRESENT) + /* indicate fault type */ +trap_check_kernel_exit: + testb $3,32(%rsp) + jnz L_dispatch + /* + * trap was from kernel mode, + * so check for the kernel exit sequence + */ + push %rax + + leaq EXT(ret32_iret)(%rip), %rax + cmp %rax, 24+8(%rsp) + je L_fault_iret + leaq EXT(ret64_iret)(%rip), %rax + cmp %rax, 24+8(%rsp) + je L_fault_iret + leaq EXT(ret32_set_ds)(%rip), %rax + cmp %rax, 24+8(%rsp) + je L_32bit_fault_set_seg + leaq EXT(ret32_set_es)(%rip), %rax + cmp %rax, 24+8(%rsp) + je L_32bit_fault_set_seg + leaq EXT(ret32_set_fs)(%rip), %rax + cmp %rax, 24+8(%rsp) + je L_32bit_fault_set_seg + leaq EXT(ret32_set_gs)(%rip), %rax + cmp %rax, 24+8(%rsp) + je L_32bit_fault_set_seg + + leaq EXT(idt64_unix_scall_copy_args)(%rip), %rax + cmp %rax, 24+8(%rsp) + add $(ISF64_SIZE)+8, (%rsp) + je L_copy_args_continue + + pop %rax + jmp L_dispatch + + +/* + * GP/NP fault on IRET: CS or SS is in error. + * Note that the user ss is originally 16-byte aligned, we'd popped the + * stack back to contain just the rip/cs/rflags/rsp/ss before issuing the iret. + * On taking the GP/NP fault on the iret instruction, the stack is 16-byte + * aligned before pushed the interrupt frame. Hence, an 8-byte padding exists. + * + * on SP is + * (- rax saved above, which is immediately popped) + * 0 function + * 8 trap number + * 16 errcode + * 24 rip + * 32 cs + * 40 rflags + * 48 rsp --> new trapfn + * 56 ss --> new trapno + * 64 pad --> new errcode + * 72 user rip + * 80 user cs + * 88 user rflags + * 96 user rsp + * 104 user ss (16-byte aligned) + */ +L_fault_iret: + pop %rax /* recover saved %rax */ + mov %rax, 24(%rsp) /* save rax (we don`t need saved rip) */ + mov 0(%rsp), %rax /* get trap func */ + mov %rax, 48(%rsp) /* put in user trap func */ + mov 8(%rsp), %rax /* get trap number */ + mov %rax, 56(%rsp) /* put in user trap number */ + mov 16(%rsp), %rax /* get error code */ + mov %rax, 64(%rsp) /* put in user errcode */ + mov 24(%rsp), %rax /* restore rax */ + add $48,%rsp /* reset to new trapfn */ + /* now treat as fault from user */ + jmp L_dispatch + +/* + * Fault restoring a segment register. All of the saved state is still + * on the stack untouched since we haven't yet moved the stack pointer. + */ +L_32bit_fault_set_seg: + pop %rax /* recover %rax from stack */ + mov 0(%rsp), %rax /* get trap function */ + mov 8(%rsp), %rcx /* get trap number */ + mov 16(%rsp), %rdx /* get error code */ + mov 48(%rsp), %rsp /* reset stack to saved state */ + mov %rax,ISC32_TRAPFN(%rsp) + mov %rcx,ISC32_TRAPNO(%rsp) + mov %rdx,ISC32_ERR(%rsp) + /* now treat as fault from user */ + /* except that all the state is */ + /* already saved - we just have to */ + /* move the trapno and error into */ + /* the compatibility frame */ + jmp L_32bit_dispatch_after_fault + + +/* + * Fatal exception handlers: + */ +Entry(idt64_db_task_dbl_fault) + PUSH_FUNCTION(HNDL_DOUBLE_FAULT) + pushq $(T_DOUBLE_FAULT) + jmp L_dispatch + +Entry(idt64_db_task_stk_fault) + PUSH_FUNCTION(HNDL_DOUBLE_FAULT) + pushq $(T_STACK_FAULT) + jmp L_dispatch + +Entry(idt64_mc) + push $(0) /* Error */ + PUSH_FUNCTION(HNDL_MACHINE_CHECK) + pushq $(T_MACHINE_CHECK) + jmp L_dispatch + + +/* All 'exceptions' enter hndl_alltraps: + * rsp -> x86_saved_state_t + * esi cs at trap + * + * The rest of the state is set up as: + * interrupts disabled + * direction flag cleared + */ +Entry(hndl_alltraps) + mov %esi, %eax + testb $3, %al + jz trap_from_kernel + + TIME_TRAP_UENTRY + + movq %gs:CPU_ACTIVE_THREAD,%rdi + movq %rsp, ACT_PCB_ISS(%rdi) /* stash the PCB stack */ + movq %rsp, %rdi /* also pass it as arg0 */ + movq %gs:CPU_KERNEL_STACK,%rsp /* switch to kernel stack */ + sti + + CCALL(user_trap) /* call user trap routine */ + cli /* hold off intrs - critical section */ + movq %gs:CPU_ACTIVE_THREAD,%rsp + movq ACT_PCB_ISS(%rsp), %rsp /* switch back to PCB stack */ + xorl %ecx, %ecx /* don't check if we're in the PFZ */ + +#define CLI cli +#define STI sti + +Entry(return_from_trap) + movl %gs:CPU_PENDING_AST,%eax + testl %eax,%eax + je EXT(return_to_user) /* branch if no AST */ + +L_return_from_trap_with_ast: + movq %rsp, %r13 + movq %gs:CPU_KERNEL_STACK, %rsp + + testl %ecx, %ecx /* see if we need to check for an EIP in the PFZ */ + je 2f /* no, go handle the AST */ + cmpl $(SS_64), SS_FLAVOR(%r13) /* are we a 64-bit task? */ + je 1f + /* no... 32-bit user mode */ + movl R32_EIP(%r13), %edi + CCALL(commpage_is_in_pfz32) + testl %eax, %eax + je 2f /* not in the PFZ... go service AST */ + movl %eax, R32_EBX(%r13) /* let the PFZ know we've pended an AST */ + movq %r13, %rsp /* switch back to PCB stack */ + jmp EXT(return_to_user) +1: + movq R64_RIP(%r13), %rdi + CCALL(commpage_is_in_pfz64) + testl %eax, %eax + je 2f /* not in the PFZ... go service AST */ + movl %eax, R64_RBX(%r13) /* let the PFZ know we've pended an AST */ + movq %r13, %rsp /* switch back to PCB stack */ + jmp EXT(return_to_user) +2: + STI /* interrupts always enabled on return to user mode */ + + xor %edi, %edi /* zero %rdi */ + CCALL(i386_astintr) /* take the AST */ + + CLI + movq %r13, %rsp /* switch back to PCB stack */ + + xorl %ecx, %ecx /* don't check if we're in the PFZ */ + jmp EXT(return_from_trap) /* and check again (rare) */ + +/* + * Trap from kernel mode. No need to switch stacks. + * Interrupts must be off here - we will set them to state at time of trap + * as soon as it's safe for us to do so and not recurse doing preemption + */ +hndl_kerntrap: +trap_from_kernel: + + movq %rsp, %rdi /* saved state addr */ + pushq R64_RIP(%rsp) /* Simulate a CALL from fault point */ + pushq %rbp /* Extend framepointer chain */ + movq %rsp, %rbp + CCALL(kernel_trap) /* to kernel trap routine */ + popq %rbp + addq $8, %rsp + cli + + movl %gs:CPU_PENDING_AST,%eax /* get pending asts */ + testl $(AST_URGENT),%eax /* any urgent preemption? */ + je ret_to_kernel /* no, nothing to do */ + cmpl $(T_PREEMPT),R64_TRAPNO(%rsp) + je ret_to_kernel /* T_PREEMPT handled in kernel_trap() */ + testl $(EFL_IF),R64_RFLAGS(%rsp) /* interrupts disabled? */ + je ret_to_kernel + cmpl $0,%gs:CPU_PREEMPTION_LEVEL /* preemption disabled? */ + jne ret_to_kernel + movq %gs:CPU_KERNEL_STACK,%rax + movq %rsp,%rcx + xorq %rax,%rcx + andq EXT(kernel_stack_mask)(%rip),%rcx + testq %rcx,%rcx /* are we on the kernel stack? */ + jne ret_to_kernel /* no, skip it */ + + CCALL1(i386_astintr, $1) /* take the AST */ + jmp ret_to_kernel + + +/* + * All interrupts on all tasks enter here with: + * rsp-> x86_saved_state_t + * esi cs at trap + * + * interrupts disabled + * direction flag cleared + */ +Entry(hndl_allintrs) + /* + * test whether already on interrupt stack + */ + movq %gs:CPU_INT_STACK_TOP,%rcx + cmpq %rsp,%rcx + jb 1f + leaq -INTSTACK_SIZE(%rcx),%rdx + cmpq %rsp,%rdx + jb int_from_intstack +1: + xchgq %rcx,%rsp /* switch to interrupt stack */ + + mov %cr0,%rax /* get cr0 */ + orl $(CR0_TS),%eax /* or in TS bit */ + mov %rax,%cr0 /* set cr0 */ + + subq $8, %rsp /* for 16-byte stack alignment */ + pushq %rcx /* save pointer to old stack */ + movq %rcx,%gs:CPU_INT_STATE /* save intr state */ + + TIME_INT_ENTRY /* do timing */ + + incl %gs:CPU_PREEMPTION_LEVEL + incl %gs:CPU_INTERRUPT_LEVEL + + movq %gs:CPU_INT_STATE, %rdi + + CCALL(interrupt) /* call generic interrupt routine */ + + cli /* just in case we returned with intrs enabled */ + xor %rax,%rax + movq %rax,%gs:CPU_INT_STATE /* clear intr state pointer */ + + .globl EXT(return_to_iret) +LEXT(return_to_iret) /* (label for kdb_kintr and hardclock) */ + + decl %gs:CPU_INTERRUPT_LEVEL + decl %gs:CPU_PREEMPTION_LEVEL + + TIME_INT_EXIT /* do timing */ + + movq %gs:CPU_ACTIVE_THREAD,%rax + movq ACT_PCB(%rax),%rax /* get act`s PCB */ + movq PCB_FPS(%rax),%rax /* get pcb's ims.ifps */ + cmpq $0,%rax /* Is there a context */ + je 1f /* Branch if not */ + movl FP_VALID(%rax),%eax /* Load fp_valid */ + cmpl $0,%eax /* Check if valid */ + jne 1f /* Branch if valid */ + clts /* Clear TS */ + jmp 2f +1: + mov %cr0,%rax /* get cr0 */ + orl $(CR0_TS),%eax /* or in TS bit */ + mov %rax,%cr0 /* set cr0 */ +2: + popq %rsp /* switch back to old stack */ + + /* Load interrupted code segment into %eax */ + movl R32_CS(%rsp),%eax /* assume 32-bit state */ + cmpl $(SS_64),SS_FLAVOR(%rsp)/* 64-bit? */ +#if DEBUG_IDT64 + jne 4f + movl R64_CS(%rsp),%eax /* 64-bit user mode */ + jmp 3f +4: + cmpl $(SS_32),SS_FLAVOR(%rsp) + je 3f + POSTCODE2(0x6431) + CCALL1(panic_idt64, %rsp) + hlt +#else + jne 3f + movl R64_CS(%rsp),%eax /* 64-bit user mode */ +#endif +3: + testb $3,%al /* user mode, */ + jnz ast_from_interrupt_user /* go handle potential ASTs */ + /* + * we only want to handle preemption requests if + * the interrupt fell in the kernel context + * and preemption isn't disabled + */ + movl %gs:CPU_PENDING_AST,%eax + testl $(AST_URGENT),%eax /* any urgent requests? */ + je ret_to_kernel /* no, nothing to do */ + + cmpl $0,%gs:CPU_PREEMPTION_LEVEL /* preemption disabled? */ + jne ret_to_kernel /* yes, skip it */ + + movq %gs:CPU_KERNEL_STACK,%rax + movq %rsp,%rcx + xorq %rax,%rcx + andq EXT(kernel_stack_mask)(%rip),%rcx + testq %rcx,%rcx /* are we on the kernel stack? */ + jne ret_to_kernel /* no, skip it */ + + /* + * Take an AST from kernel space. We don't need (and don't want) + * to do as much as the case where the interrupt came from user + * space. + */ + CCALL1(i386_astintr, $1) + + jmp ret_to_kernel + + +/* + * nested int - simple path, can't preempt etc on way out + */ +int_from_intstack: + incl %gs:CPU_PREEMPTION_LEVEL + incl %gs:CPU_INTERRUPT_LEVEL + + mov %rsp, %rdi /* x86_saved_state */ + CCALL(interrupt) + + decl %gs:CPU_INTERRUPT_LEVEL + decl %gs:CPU_PREEMPTION_LEVEL + +#if DEBUG_IDT64 + CCALL1(panic_idt64, %rsp) + POSTCODE2(0x6411) + hlt +#endif + jmp ret_to_kernel + +/* + * Take an AST from an interrupted user + */ +ast_from_interrupt_user: + movl %gs:CPU_PENDING_AST,%eax + testl %eax,%eax /* pending ASTs? */ + je EXT(ret_to_user) /* no, nothing to do */ + + TIME_TRAP_UENTRY + + movl $1, %ecx /* check if we're in the PFZ */ + jmp L_return_from_trap_with_ast /* return */ + + +/* Syscall dispatch routines! */ + +/* + * + * 32bit Tasks + * System call entries via INTR_GATE or sysenter: + * + * rsp -> x86_saved_state32_t + * interrupts disabled + * direction flag cleared + */ + +Entry(hndl_sysenter) + /* + * We can be here either for a mach syscall or a unix syscall, + * as indicated by the sign of the code: + */ + movl R32_EAX(%rsp),%eax + testl %eax,%eax + js EXT(hndl_mach_scall) /* < 0 => mach */ + /* > 0 => unix */ + +Entry(hndl_unix_scall) +/* If the caller (typically LibSystem) has recorded the cumulative size of + * the arguments in EAX, copy them over from the user stack directly. + * We recover from exceptions inline--if the copy loop doesn't complete + * due to an exception, we fall back to copyin from compatibility mode. + * We can potentially extend this mechanism to mach traps as well (DRK). + */ + testl $(I386_SYSCALL_ARG_BYTES_MASK), %eax + jz L_copy_args_continue + movl %eax, %ecx + mov %gs:CPU_UBER_ARG_STORE_VALID, %rbx + shrl $(I386_SYSCALL_ARG_DWORDS_SHIFT), %ecx + andl $(I386_SYSCALL_ARG_DWORDS_MASK), %ecx + mov %gs:CPU_UBER_ARG_STORE, %rdi + mov ISC32_RSP(%rsp), %rsi + add $4, %rsi + movl $0, (%rbx) + +EXT(idt64_unix_scall_copy_args): + rep movsl + movl $1, (%rbx) +L_copy_args_continue: + + TIME_TRAP_UENTRY + + movq %gs:CPU_KERNEL_STACK,%rdi + xchgq %rdi,%rsp /* switch to kernel stack */ + movq %gs:CPU_ACTIVE_THREAD,%rcx /* get current thread */ + movq %rdi,ACT_PCB_ISS(%rcx) + movq ACT_TASK(%rcx),%rbx /* point to current task */ + addl $1,TASK_SYSCALLS_UNIX(%rbx) /* increment call count */ + + /* Check for active vtimers in the current task */ + TASK_VTIMER_CHECK(%rbx,%rcx) + + sti + + CCALL(unix_syscall) + /* + * always returns through thread_exception_return + */ + + +Entry(hndl_mach_scall) + TIME_TRAP_UENTRY + + movq %gs:CPU_KERNEL_STACK,%rdi + xchgq %rdi,%rsp /* switch to kernel stack */ + movq %gs:CPU_ACTIVE_THREAD,%rcx /* get current thread */ + movq %rdi,ACT_PCB_ISS(%rcx) + movq ACT_TASK(%rcx),%rbx /* point to current task */ + addl $1,TASK_SYSCALLS_MACH(%rbx) /* increment call count */ + + /* Check for active vtimers in the current task */ + TASK_VTIMER_CHECK(%rbx,%rcx) + + sti + + CCALL(mach_call_munger) + /* + * always returns through thread_exception_return + */ + + +Entry(hndl_mdep_scall) + TIME_TRAP_UENTRY + + movq %gs:CPU_KERNEL_STACK,%rdi + xchgq %rdi,%rsp /* switch to kernel stack */ + + /* Check for active vtimers in the current task */ + movq %gs:CPU_ACTIVE_THREAD,%rcx /* get current thread */ + movq ACT_TASK(%rcx),%rbx /* point to current task */ + TASK_VTIMER_CHECK(%rbx,%rcx) + + sti + + CCALL(machdep_syscall) + /* + * always returns through thread_exception_return + */ + + +Entry(hndl_diag_scall) + TIME_TRAP_UENTRY + + movq %gs:CPU_KERNEL_STACK,%rdi + xchgq %rdi,%rsp /* switch to kernel stack */ + + /* Check for active vtimers in the current task */ + movq %gs:CPU_ACTIVE_THREAD,%rcx /* get current thread */ + movq ACT_TASK(%rcx),%rbx /* point to current task */ + TASK_VTIMER_CHECK(%rbx,%rcx) + + pushq %rdi /* push pcb stack so we can pop it later */ + + CCALL(diagCall) // Call diagnostics + cli // Disable interruptions just in case they were enabled + popq %rsp // Get back the original stack + + cmpl $0,%eax // What kind of return is this? + jne EXT(return_to_user) // Normal return, do not check asts... + + CCALL3(i386_exception, $EXC_SYSCALL, $0x6000, $1) + // pass what would be the diag syscall + // error return - cause an exception + /* no return */ + + + +/* + * 64bit Tasks + * System call entries via syscall only: + * + * rsp -> x86_saved_state64_t + * interrupts disabled + * direction flag cleared + */ + +Entry(hndl_syscall) + TIME_TRAP_UENTRY + + movq %gs:CPU_KERNEL_STACK,%rdi + xchgq %rdi,%rsp /* switch to kernel stack */ + movq %gs:CPU_ACTIVE_THREAD,%rcx /* get current thread */ + movq %rdi, ACT_PCB_ISS(%rcx) + movq ACT_TASK(%rcx),%rbx /* point to current task */ + + /* Check for active vtimers in the current task */ + TASK_VTIMER_CHECK(%rbx,%rcx) + + /* + * We can be here either for a mach, unix machdep or diag syscall, + * as indicated by the syscall class: + */ + movl R64_RAX(%rdi), %eax /* syscall number/class */ + movl %eax, %edx + andl $(SYSCALL_CLASS_MASK), %edx /* syscall class */ + cmpl $(SYSCALL_CLASS_MACH< +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define _ARCH_I386_ASM_HELP_H_ /* Prevent inclusion of user header */ +#include + +#include + +/* + * Fault recovery. + */ + +#ifdef __MACHO__ +#define RECOVERY_SECTION .section __VECTORS, __recover +#else +#define RECOVERY_SECTION .text +#define RECOVERY_SECTION .text +#endif + +#define RECOVER_TABLE_START \ + .align 3 ; \ + .globl EXT(recover_table) ;\ +LEXT(recover_table) ;\ + .text + +#define RECOVER(addr) \ + .align 3; \ + .quad 9f ;\ + .quad addr ;\ + .text ;\ +9: + +#define RECOVER_TABLE_END \ + .align 3 ;\ + .globl EXT(recover_table_end) ;\ +LEXT(recover_table_end) ;\ + .text + +/* + * Allocate recovery and table. + */ + RECOVERY_SECTION + RECOVER_TABLE_START + +Entry(call_continuation) + movq %rdi,%rcx /* get continuation */ + movq %rsi,%rdi /* continuation param */ + movq %rdx,%rsi /* wait result */ + movq %gs:CPU_KERNEL_STACK,%rsp /* set the stack */ + xorq %rbp,%rbp /* zero frame pointer */ + call *%rcx /* call continuation */ + movq %gs:CPU_ACTIVE_THREAD,%rdi + call EXT(thread_terminate) + +/* + * int rdmsr_carefully(uint32_t msr, uint32_t *lo, uint32_t *hi) + */ +ENTRY(rdmsr_carefully) + movl %edi, %ecx + movq %rdx, %rdi + RECOVERY_SECTION + RECOVER(rdmsr_fail) + rdmsr + movl %eax, (%rsi) + movl %edx, (%rdi) + xorl %eax, %eax + ret + +rdmsr_fail: + movq $1, %rax + ret + +.globl EXT(thread_exception_return) +.globl EXT(thread_bootstrap_return) +LEXT(thread_bootstrap_return) +#if CONFIG_DTRACE + call EXT(dtrace_thread_bootstrap) +#endif + +LEXT(thread_exception_return) + cli + movq %gs:CPU_ACTIVE_THREAD,%rsp + movq ACT_PCB_ISS(%rsp), %rsp + xorl %ecx, %ecx /* don't check if we're in the PFZ */ + jmp EXT(return_from_trap) + +/* + * Copyin/out from user/kernel address space. + * rdi: source address + * rsi: destination address + * rdx: byte count + */ +Entry(_bcopy) +// TODO not pop regs; movq; think about 32 bit or 64 bit byte count + xchgq %rdi, %rsi /* source %rsi, dest %rdi */ + + cld /* count up */ + movl %edx,%ecx /* move by longwords first */ + shrl $3,%ecx + RECOVERY_SECTION + RECOVER(_bcopy_fail) + rep + movsq /* move longwords */ + + movl %edx,%ecx /* now move remaining bytes */ + andl $7,%ecx + RECOVERY_SECTION + RECOVER(_bcopy_fail) + rep + movsb + + xorl %eax,%eax /* return 0 for success */ + ret /* and return */ + +_bcopy_fail: + movl $(EFAULT),%eax /* return error for failure */ + ret + + + +/* + * Copyin string from user/kern address space. + * rdi: source address + * rsi: destination address + * rdx: max byte count + * rcx: actual byte count (OUT) + */ +Entry(_bcopystr) + pushq %rdi + xchgq %rdi, %rsi /* source %rsi, dest %rdi */ + + xorl %eax,%eax /* set to 0 here so that high 24 bits */ + /* are 0 for the cmpl against 0 */ +2: + RECOVERY_SECTION + RECOVER(_bcopystr_fail) /* copy bytes... */ + movb (%rsi),%al + incq %rsi + testq %rdi,%rdi /* if kernel address is ... */ + jz 3f /* not NULL */ + movb %al,(%rdi) /* copy the byte */ + incq %rdi +3: + testl %eax,%eax /* did we just stuff the 0-byte? */ + jz 4f /* yes, return 0 already in %eax */ + decq %rdx /* decrement #bytes left in buffer */ + jnz 2b /* buffer not full, copy another byte */ + movl $(ENAMETOOLONG),%eax /* buffer full, no \0: ENAMETOOLONG */ +4: + cmpq $0,%rcx /* get OUT len ptr */ + jz _bcopystr_ret /* if null, just return */ + subq (%rsp),%rsi + movq %rsi,(%rcx) /* else set OUT arg to xfer len */ + popq %rdi /* restore registers */ +_bcopystr_ret: + ret /* and return */ + +_bcopystr_fail: + popq %rdi /* restore registers */ + movl $(EFAULT),%eax /* return error for failure */ + ret + + +/* + * Done with recovery table. + */ + RECOVERY_SECTION + RECOVER_TABLE_END + diff --git a/osfmk/x86_64/loose_ends.c b/osfmk/x86_64/loose_ends.c new file mode 100644 index 000000000..5ad70b323 --- /dev/null +++ b/osfmk/x86_64/loose_ends.c @@ -0,0 +1,932 @@ +/* + * Copyright (c) 2000-2006 Apple Computer, Inc. All rights reserved. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. The rights granted to you under the License + * may not be used to create, or enable the creation or redistribution of, + * unlawful or unlicensed copies of an Apple operating system, or to + * circumvent, violate, or enable the circumvention or violation of, any + * terms of an Apple operating system software license agreement. + * + * Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ + */ +/* + * @OSF_COPYRIGHT@ + */ +/* + * Mach Operating System + * Copyright (c) 1991,1990,1989 Carnegie Mellon University + * All Rights Reserved. + * + * Permission to use, copy, modify and distribute this software and its + * documentation is hereby granted, provided that both the copyright + * notice and this permission notice appear in all copies of the + * software, derivative works or modified versions, and any portions + * thereof, and that both notices appear in supporting documentation. + * + * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" + * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR + * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. + * + * Carnegie Mellon requests users of this software to return to + * + * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU + * School of Computer Science + * Carnegie Mellon University + * Pittsburgh PA 15213-3890 + * + * any improvements or extensions that they make and grant Carnegie Mellon + * the rights to redistribute these changes. + */ +/* + */ +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#if 0 + +#undef KERNEL_DEBUG +#define KERNEL_DEBUG KERNEL_DEBUG_CONSTANT +#define KDEBUG 1 + +#endif + +/* XXX - should be gone from here */ +extern void invalidate_icache64(addr64_t addr, unsigned cnt, int phys); +extern void flush_dcache64(addr64_t addr, unsigned count, int phys); +extern boolean_t phys_page_exists(ppnum_t); +extern void bcopy_no_overwrite(const char *from, char *to,vm_size_t bytes); +extern void pmap_set_reference(ppnum_t pn); +extern void mapping_set_mod(ppnum_t pa); +extern void mapping_set_ref(ppnum_t pn); + +extern void ovbcopy(const char *from, + char *to, + vm_size_t nbytes); +void machine_callstack(natural_t *buf, vm_size_t callstack_max); + + +#define value_64bit(value) ((value) & 0xFFFFFFFF00000000ULL) +#define low32(x) ((unsigned int)((x) & 0x00000000FFFFFFFFULL)) + +#define INT_SIZE (BYTE_SIZE * sizeof (int)) + +/* + * Set indicated bit in bit string. + */ +void +setbit(int bitno, int *s) +{ + s[bitno / INT_SIZE] |= 1 << (bitno % INT_SIZE); +} + +/* + * Clear indicated bit in bit string. + */ +void +clrbit(int bitno, int *s) +{ + s[bitno / INT_SIZE] &= ~(1 << (bitno % INT_SIZE)); +} + +/* + * Test if indicated bit is set in bit string. + */ +int +testbit(int bitno, int *s) +{ + return s[bitno / INT_SIZE] & (1 << (bitno % INT_SIZE)); +} + +/* + * Find first bit set in bit string. + */ +int +ffsbit(int *s) +{ + int offset; + + for (offset = 0; !*s; offset += (int)INT_SIZE, ++s); + return offset + __builtin_ctz(*s); +} + +int +ffs(unsigned int mask) +{ + if (mask == 0) + return 0; + + /* + * NOTE: cannot use __builtin_ffs because it generates a call to + * 'ffs' + */ + return 1 + __builtin_ctz(mask); +} + +void +bzero_phys_nc( + addr64_t src64, + uint32_t bytes) +{ + bzero_phys(src64,bytes); +} + +void +bzero_phys( + addr64_t src64, + uint32_t bytes) +{ + bzero(PHYSMAP_PTOV(src64), bytes); +} + + +/* + * bcopy_phys - like bcopy but copies from/to physical addresses. + */ + +void +bcopy_phys( + addr64_t src64, + addr64_t dst64, + vm_size_t bytes) +{ + /* Not necessary for K64 - but ensure we stay within a page */ + if (((((uint32_t)src64 & (NBPG-1)) + bytes) > NBPG) || + ((((uint32_t)dst64 & (NBPG-1)) + bytes) > NBPG) ) { + panic("bcopy_phys alignment"); + } + bcopy(PHYSMAP_PTOV(src64), PHYSMAP_PTOV(dst64), bytes); +} + +/* + * ovbcopy - like bcopy, but recognizes overlapping ranges and handles + * them correctly. + */ + +void +ovbcopy( + const char *from, + char *to, + vm_size_t bytes) /* num bytes to copy */ +{ + /* Assume that bcopy copies left-to-right (low addr first). */ + if (from + bytes <= to || to + bytes <= from || to == from) + bcopy_no_overwrite(from, to, bytes); /* non-overlapping or no-op*/ + else if (from > to) + bcopy_no_overwrite(from, to, bytes); /* overlapping but OK */ + else { + /* to > from: overlapping, and must copy right-to-left. */ + from += bytes - 1; + to += bytes - 1; + while (bytes-- > 0) + *to-- = *from--; + } +} + + +/* + * Read data from a physical address. Memory should not be cache inhibited. + */ + + +static unsigned int +ml_phys_read_data(pmap_paddr_t paddr, int size) +{ + unsigned int result; + + switch (size) { + unsigned char s1; + unsigned short s2; + case 1: + s1 = *(unsigned char *)PHYSMAP_PTOV(paddr); + result = s1; + break; + case 2: + s2 = *(unsigned short *)PHYSMAP_PTOV(paddr); + result = s2; + break; + case 4: + default: + result = *(unsigned int *)PHYSMAP_PTOV(paddr); + break; + } + + return result; +} + +static unsigned long long +ml_phys_read_long_long(pmap_paddr_t paddr ) +{ + return *(unsigned long long *)PHYSMAP_PTOV(paddr); +} + + + +unsigned int ml_phys_read( vm_offset_t paddr) +{ + return ml_phys_read_data((pmap_paddr_t)paddr, 4); +} + +unsigned int ml_phys_read_word(vm_offset_t paddr) { + + return ml_phys_read_data((pmap_paddr_t)paddr, 4); +} + +unsigned int ml_phys_read_64(addr64_t paddr64) +{ + return ml_phys_read_data((pmap_paddr_t)paddr64, 4); +} + +unsigned int ml_phys_read_word_64(addr64_t paddr64) +{ + return ml_phys_read_data((pmap_paddr_t)paddr64, 4); +} + +unsigned int ml_phys_read_half(vm_offset_t paddr) +{ + return ml_phys_read_data((pmap_paddr_t)paddr, 2); +} + +unsigned int ml_phys_read_half_64(addr64_t paddr64) +{ + return ml_phys_read_data((pmap_paddr_t)paddr64, 2); +} + +unsigned int ml_phys_read_byte(vm_offset_t paddr) +{ + return ml_phys_read_data((pmap_paddr_t)paddr, 1); +} + +unsigned int ml_phys_read_byte_64(addr64_t paddr64) +{ + return ml_phys_read_data((pmap_paddr_t)paddr64, 1); +} + +unsigned long long ml_phys_read_double(vm_offset_t paddr) +{ + return ml_phys_read_long_long((pmap_paddr_t)paddr); +} + +unsigned long long ml_phys_read_double_64(addr64_t paddr64) +{ + return ml_phys_read_long_long((pmap_paddr_t)paddr64); +} + + + +/* + * Write data to a physical address. Memory should not be cache inhibited. + */ + +static void +ml_phys_write_data(pmap_paddr_t paddr, unsigned long data, int size) +{ + switch (size) { + case 1: + *(unsigned char *)PHYSMAP_PTOV(paddr) = (unsigned char)data; + break; + case 2: + *(unsigned short *)PHYSMAP_PTOV(paddr) = (unsigned short)data; + break; + case 4: + default: + *(unsigned int *)PHYSMAP_PTOV(paddr) = (unsigned int)data; + break; + } +} + +static void +ml_phys_write_long_long(pmap_paddr_t paddr, unsigned long long data) +{ + *(unsigned long long *)PHYSMAP_PTOV(paddr) = data; +} + + + +void ml_phys_write_byte(vm_offset_t paddr, unsigned int data) +{ + ml_phys_write_data((pmap_paddr_t)paddr, data, 1); +} + +void ml_phys_write_byte_64(addr64_t paddr64, unsigned int data) +{ + ml_phys_write_data((pmap_paddr_t)paddr64, data, 1); +} + +void ml_phys_write_half(vm_offset_t paddr, unsigned int data) +{ + ml_phys_write_data((pmap_paddr_t)paddr, data, 2); +} + +void ml_phys_write_half_64(addr64_t paddr64, unsigned int data) +{ + ml_phys_write_data((pmap_paddr_t)paddr64, data, 2); +} + +void ml_phys_write(vm_offset_t paddr, unsigned int data) +{ + ml_phys_write_data((pmap_paddr_t)paddr, data, 4); +} + +void ml_phys_write_64(addr64_t paddr64, unsigned int data) +{ + ml_phys_write_data((pmap_paddr_t)paddr64, data, 4); +} + +void ml_phys_write_word(vm_offset_t paddr, unsigned int data) +{ + ml_phys_write_data((pmap_paddr_t)paddr, data, 4); +} + +void ml_phys_write_word_64(addr64_t paddr64, unsigned int data) +{ + ml_phys_write_data((pmap_paddr_t)paddr64, data, 4); +} + +void ml_phys_write_double(vm_offset_t paddr, unsigned long long data) +{ + ml_phys_write_long_long((pmap_paddr_t)paddr, data); +} + +void ml_phys_write_double_64(addr64_t paddr64, unsigned long long data) +{ + ml_phys_write_long_long((pmap_paddr_t)paddr64, data); +} + + +/* PCI config cycle probing + * + * + * Read the memory location at physical address paddr. + * This is a part of a device probe, so there is a good chance we will + * have a machine check here. So we have to be able to handle that. + * We assume that machine checks are enabled both in MSR and HIDs + */ + +boolean_t +ml_probe_read(vm_offset_t paddr, unsigned int *val) +{ + if ((PAGE_SIZE - (paddr & PAGE_MASK)) < 4) + return FALSE; + + *val = ml_phys_read((pmap_paddr_t)paddr); + + return TRUE; +} + +/* + * Read the memory location at physical address paddr. + * This is a part of a device probe, so there is a good chance we will + * have a machine check here. So we have to be able to handle that. + * We assume that machine checks are enabled both in MSR and HIDs + */ +boolean_t +ml_probe_read_64(addr64_t paddr64, unsigned int *val) +{ + if ((PAGE_SIZE - (paddr64 & PAGE_MASK)) < 4) + return FALSE; + + *val = ml_phys_read_64((pmap_paddr_t)paddr64); + return TRUE; +} + + +int bcmp( + const void *pa, + const void *pb, + size_t len) +{ + const char *a = (const char *)pa; + const char *b = (const char *)pb; + + if (len == 0) + return 0; + + do + if (*a++ != *b++) + break; + while (--len); + + return (int)len; +} + +int +memcmp(const void *s1, const void *s2, size_t n) +{ + if (n != 0) { + const unsigned char *p1 = s1, *p2 = s2; + + do { + if (*p1++ != *p2++) + return (*--p1 - *--p2); + } while (--n != 0); + } + return (0); +} + +/* + * Abstract: + * strlen returns the number of characters in "string" preceeding + * the terminating null character. + */ + +size_t +strlen( + register const char *string) +{ + register const char *ret = string; + + while (*string++ != '\0') + continue; + return string - 1 - ret; +} + +uint32_t +hw_compare_and_store(uint32_t oldval, uint32_t newval, volatile uint32_t *dest) +{ + return OSCompareAndSwap((UInt32)oldval, + (UInt32)newval, + (volatile UInt32 *)dest); +} + +#if MACH_ASSERT + +/* + * Machine-dependent routine to fill in an array with up to callstack_max + * levels of return pc information. + */ +void machine_callstack( + __unused natural_t *buf, + __unused vm_size_t callstack_max) +{ +} + +#endif /* MACH_ASSERT */ + +void fillPage(ppnum_t pa, unsigned int fill) +{ + pmap_paddr_t src; + int i; + int cnt = PAGE_SIZE / sizeof(unsigned int); + unsigned int *addr; + + src = i386_ptob(pa); + for (i = 0, addr = (unsigned int *)PHYSMAP_PTOV(src); i < cnt; i++) + *addr++ = fill; +} + +static inline void __sfence(void) +{ + __asm__ volatile("sfence"); +} +static inline void __mfence(void) +{ + __asm__ volatile("mfence"); +} +static inline void __wbinvd(void) +{ + __asm__ volatile("wbinvd"); +} +static inline void __clflush(void *ptr) +{ + __asm__ volatile("clflush (%0)" : : "r" (ptr)); +} + +void dcache_incoherent_io_store64(addr64_t pa, unsigned int count) +{ + uint32_t linesize = cpuid_info()->cache_linesize; + addr64_t addr; + boolean_t istate; + + __mfence(); + + istate = ml_set_interrupts_enabled(FALSE); + + for (addr = pa; addr < pa + count; addr += linesize) + __clflush(PHYSMAP_PTOV(addr)); + + (void) ml_set_interrupts_enabled(istate); + + __mfence(); +} + +void dcache_incoherent_io_flush64(addr64_t pa, unsigned int count) +{ + return(dcache_incoherent_io_store64(pa,count)); +} + +void +flush_dcache64(__unused addr64_t addr, + __unused unsigned count, + __unused int phys) +{ +} + +void +invalidate_icache64(__unused addr64_t addr, + __unused unsigned count, + __unused int phys) +{ +} + + +addr64_t vm_last_addr; + +void +mapping_set_mod(ppnum_t pn) +{ + pmap_set_modify(pn); +} + +void +mapping_set_ref(ppnum_t pn) +{ + pmap_set_reference(pn); +} + +void +cache_flush_page_phys(ppnum_t pa) +{ + boolean_t istate; + unsigned char *cacheline_addr; + int cacheline_size = cpuid_info()->cache_linesize; + int cachelines_to_flush = PAGE_SIZE/cacheline_size; + + __mfence(); + + istate = ml_set_interrupts_enabled(FALSE); + + for (cacheline_addr = (unsigned char *)PHYSMAP_PTOV(i386_ptob(pa)); + cachelines_to_flush > 0; + cachelines_to_flush--, cacheline_addr += cacheline_size) { + __clflush((void *) cacheline_addr); + } + + (void) ml_set_interrupts_enabled(istate); + + __mfence(); +} + + +static int copyio(int, user_addr_t, char *, vm_size_t, vm_size_t *, int); +static int copyio_phys(addr64_t, addr64_t, vm_size_t, int); + +/* + * The copy engine has the following characteristics + * - copyio() handles copies to/from user or kernel space + * - copypv() deals with physical or virtual addresses + * + * Readers familiar with the 32-bit kernel will expect Joe's thesis at this + * point describing the full glory of the copy window implementation. In K64, + * however, there is no need for windowing. Thanks to the vast shared address + * space, the kernel has direct access to userspace and to physical memory. + * + * User virtual addresses are accessible provided the user's cr3 is loaded. + * Physical addresses are accessible via the direct map and the PHYSMAP_PTOV() + * translation. + * + * Copyin/out variants all boil done to just these 2 routines in locore.s which + * provide fault-recoverable copying: + */ +extern int _bcopy(const void *, void *, vm_size_t); +extern int _bcopystr(const void *, void *, vm_size_t, vm_size_t *); + + +/* + * Types of copies: + */ +#define COPYIN 0 /* from user virtual to kernel virtual */ +#define COPYOUT 1 /* from kernel virtual to user virtual */ +#define COPYINSTR 2 /* string variant of copyout */ +#define COPYINPHYS 3 /* from user virtual to kernel physical */ +#define COPYOUTPHYS 4 /* from kernel physical to user virtual */ + + +static int +copyio(int copy_type, user_addr_t user_addr, char *kernel_addr, + vm_size_t nbytes, vm_size_t *lencopied, int use_kernel_map) +{ + thread_t thread; + pmap_t pmap; + vm_size_t bytes_copied; + int error = 0; + boolean_t istate = FALSE; + boolean_t recursive_CopyIOActive; +#if KDEBUG + int debug_type = 0xeff70010; + debug_type += (copy_type << 2); +#endif + + thread = current_thread(); + + KERNEL_DEBUG(debug_type | DBG_FUNC_START, + (unsigned)(user_addr >> 32), (unsigned)user_addr, + nbytes, thread->machine.copyio_state, 0); + + if (nbytes == 0) + goto out; + + pmap = thread->map->pmap; + + /* Sanity and security check for addresses to/from a user */ + if ((copy_type == COPYIN || + copy_type == COPYINSTR || + copy_type == COPYOUT) && + (pmap != kernel_pmap) && + ((vm_offset_t)kernel_addr < VM_MIN_KERNEL_AND_KEXT_ADDRESS || + !IS_USERADDR64_CANONICAL(user_addr))) { + error = EACCES; + goto out; + } + + /* + * If the no_shared_cr3 boot-arg is set (true), the kernel runs on + * its own pmap and cr3 rather than the user's -- so that wild accesses + * from kernel or kexts can be trapped. So, during copyin and copyout, + * we need to switch back to the user's map/cr3. The thread is flagged + * "CopyIOActive" at this time so that if the thread is pre-empted, + * we will later restore the correct cr3. + */ + recursive_CopyIOActive = thread->machine.specFlags & CopyIOActive; + thread->machine.specFlags |= CopyIOActive; + if (no_shared_cr3) { + istate = ml_set_interrupts_enabled(FALSE); + if (get_cr3() != pmap->pm_cr3) + set_cr3(pmap->pm_cr3); + } + + /* + * Ensure that we're running on the target thread's cr3. + */ + if ((pmap != kernel_pmap) && !use_kernel_map && + (get_cr3() != pmap->pm_cr3)) { + panic("copyio(%d,%p,%p,%ld,%p,%d) cr3 is %p expects %p", + copy_type, (void *)user_addr, kernel_addr, nbytes, lencopied, use_kernel_map, + (void *) get_cr3(), (void *) pmap->pm_cr3); + } + if (no_shared_cr3) + (void) ml_set_interrupts_enabled(istate); + + KERNEL_DEBUG(0xeff70044 | DBG_FUNC_NONE, (unsigned)user_addr, + (unsigned)kernel_addr, nbytes, 0, 0); + + switch (copy_type) { + + case COPYIN: + error = _bcopy((const void *) user_addr, + kernel_addr, + nbytes); + break; + + case COPYOUT: + error = _bcopy(kernel_addr, + (void *) user_addr, + nbytes); + break; + + case COPYINPHYS: + error = _bcopy((const void *) user_addr, + PHYSMAP_PTOV(kernel_addr), + nbytes); + break; + + case COPYOUTPHYS: + error = _bcopy((const void *) PHYSMAP_PTOV(kernel_addr), + (void *) user_addr, + nbytes); + break; + + case COPYINSTR: + error = _bcopystr((const void *) user_addr, + kernel_addr, + (int) nbytes, + &bytes_copied); + + /* + * lencopied should be updated on success + * or ENAMETOOLONG... but not EFAULT + */ + if (error != EFAULT) + *lencopied = bytes_copied; + + if (error) { +#if KDEBUG + nbytes = *lencopied; +#endif + break; + } + if (*(kernel_addr + bytes_copied - 1) == 0) { + /* + * we found a NULL terminator... we're done + */ +#if KDEBUG + nbytes = *lencopied; +#endif + break; + } else { + /* + * no more room in the buffer and we haven't + * yet come across a NULL terminator + */ +#if KDEBUG + nbytes = *lencopied; +#endif + error = ENAMETOOLONG; + break; + } + break; + } + + if (!recursive_CopyIOActive) + thread->machine.specFlags &= ~CopyIOActive; + if (no_shared_cr3) { + istate = ml_set_interrupts_enabled(FALSE); + if (get_cr3() != kernel_pmap->pm_cr3) + set_cr3(kernel_pmap->pm_cr3); + (void) ml_set_interrupts_enabled(istate); + } + +out: + KERNEL_DEBUG(debug_type | DBG_FUNC_END, (unsigned)user_addr, + (unsigned)kernel_addr, (unsigned)nbytes, error, 0); + + return (error); +} + + +static int +copyio_phys(addr64_t source, addr64_t sink, vm_size_t csize, int which) +{ + char *paddr; + user_addr_t vaddr; + int ctype; + + if (which & cppvPsnk) { + paddr = (char *)sink; + vaddr = (user_addr_t)source; + ctype = COPYINPHYS; + } else { + paddr = (char *)source; + vaddr = (user_addr_t)sink; + ctype = COPYOUTPHYS; + } + return copyio(ctype, vaddr, paddr, csize, NULL, which & cppvKmap); +} + +int +copyinmsg(const user_addr_t user_addr, char *kernel_addr, mach_msg_size_t nbytes) +{ + return copyio(COPYIN, user_addr, kernel_addr, nbytes, NULL, 0); +} + +int +copyin(const user_addr_t user_addr, char *kernel_addr, vm_size_t nbytes) +{ + return copyio(COPYIN, user_addr, kernel_addr, nbytes, NULL, 0); +} + +int +copyinstr(const user_addr_t user_addr, char *kernel_addr, vm_size_t nbytes, vm_size_t *lencopied) +{ + *lencopied = 0; + + return copyio(COPYINSTR, user_addr, kernel_addr, nbytes, lencopied, 0); +} + +int +copyoutmsg(const char *kernel_addr, user_addr_t user_addr, mach_msg_size_t nbytes) +{ + return copyio(COPYOUT, user_addr, (char *)(uintptr_t)kernel_addr, nbytes, NULL, 0); +} + +int +copyout(const void *kernel_addr, user_addr_t user_addr, vm_size_t nbytes) +{ + return copyio(COPYOUT, user_addr, (char *)(uintptr_t)kernel_addr, nbytes, NULL, 0); +} + + +kern_return_t +copypv(addr64_t src64, addr64_t snk64, unsigned int size, int which) +{ + unsigned int lop, csize; + int bothphys = 0; + + KERNEL_DEBUG(0xeff7004c | DBG_FUNC_START, (unsigned)src64, + (unsigned)snk64, size, which, 0); + + if ((which & (cppvPsrc | cppvPsnk)) == 0 ) /* Make sure that only one is virtual */ + panic("copypv: no more than 1 parameter may be virtual\n"); /* Not allowed */ + + if ((which & (cppvPsrc | cppvPsnk)) == (cppvPsrc | cppvPsnk)) + bothphys = 1; /* both are physical */ + + while (size) { + + if (bothphys) { + lop = (unsigned int)(PAGE_SIZE - (snk64 & (PAGE_SIZE - 1))); /* Assume sink smallest */ + + if (lop > (unsigned int)(PAGE_SIZE - (src64 & (PAGE_SIZE - 1)))) + lop = (unsigned int)(PAGE_SIZE - (src64 & (PAGE_SIZE - 1))); /* No, source is smaller */ + } else { + /* + * only need to compute the resid for the physical page + * address... we don't care about where we start/finish in + * the virtual since we just call the normal copyin/copyout + */ + if (which & cppvPsrc) + lop = (unsigned int)(PAGE_SIZE - (src64 & (PAGE_SIZE - 1))); + else + lop = (unsigned int)(PAGE_SIZE - (snk64 & (PAGE_SIZE - 1))); + } + csize = size; /* Assume we can copy it all */ + if (lop < size) + csize = lop; /* Nope, we can't do it all */ +#if 0 + /* + * flush_dcache64 is currently a nop on the i386... + * it's used when copying to non-system memory such + * as video capture cards... on PPC there was a need + * to flush due to how we mapped this memory... not + * sure if it's needed on i386. + */ + if (which & cppvFsrc) + flush_dcache64(src64, csize, 1); /* If requested, flush source before move */ + if (which & cppvFsnk) + flush_dcache64(snk64, csize, 1); /* If requested, flush sink before move */ +#endif + if (bothphys) + bcopy_phys(src64, snk64, csize); /* Do a physical copy, virtually */ + else { + if (copyio_phys(src64, snk64, csize, which)) + return (KERN_FAILURE); + } +#if 0 + if (which & cppvFsrc) + flush_dcache64(src64, csize, 1); /* If requested, flush source after move */ + if (which & cppvFsnk) + flush_dcache64(snk64, csize, 1); /* If requested, flush sink after move */ +#endif + size -= csize; /* Calculate what is left */ + snk64 += csize; /* Bump sink to next physical address */ + src64 += csize; /* Bump source to next physical address */ + } + KERNEL_DEBUG(0xeff7004c | DBG_FUNC_END, (unsigned)src64, + (unsigned)snk64, size, which, 0); + + return KERN_SUCCESS; +} + +#if !MACH_KDP +void +kdp_register_callout(void) +{ +} +#endif + +#if !CONFIG_VMX +int host_vmxon(boolean_t exclusive __unused) +{ + return VMX_UNSUPPORTED; +} + +void host_vmxoff(void) +{ + return; +} +#endif diff --git a/osfmk/x86_64/lowglobals.h b/osfmk/x86_64/lowglobals.h new file mode 100644 index 000000000..aef2f638f --- /dev/null +++ b/osfmk/x86_64/lowglobals.h @@ -0,0 +1,77 @@ +/* + * Copyright (c) 2000-2006 Apple Computer, Inc. All rights reserved. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. The rights granted to you under the License + * may not be used to create, or enable the creation or redistribution of, + * unlawful or unlicensed copies of an Apple operating system, or to + * circumvent, violate, or enable the circumvention or violation of, any + * terms of an Apple operating system software license agreement. + * + * Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ + */ + +/* + * Header files for the Low Memory Globals (lg) + */ +#ifndef _LOW_MEMORY_GLOBALS_H_ +#define _LOW_MEMORY_GLOBALS_H_ + +#include +#include +#include +#include + +#ifndef __x86_64__ +#error Wrong architecture - this file is meant for x86_64 +#endif + +/* + * Don't change these structures unless you change the corresponding assembly code + * which is in lowmem_vectors.s + */ + +/* + * This is where we put constants, pointers, and data areas that must be accessed + * quickly through assembler. They are designed to be accessed directly with + * absolute addresses, not via a base register. This is a global area, and not + * per processor. + */ + +#pragma pack(8) /* Make sure the structure stays as we defined it */ +typedef struct lowglo { + + unsigned char lgVerCode[8]; /* 0xffffff8000002000 System verification code */ + uint64_t lgZero[2]; /* 0xffffff8000002008 Double constant 0 */ + uint64_t lgRsv010; /* 0xffffff8000002018 Reserved */ + uint64_t lgCHUDXNUfnStart; /* 0xffffff8000002020 CHUD XNU function glue table */ + uint64_t lgRsv018; /* 0xffffff8000002028 Reserved */ + uint64_t lgVersion; /* 0xffffff8000002030 Pointer to kernel version string */ + uint64_t lgRsv020[280]; /* 0xffffff8000002038 Reserved */ + uint64_t lgKmodptr; /* 0xffffff80000028f8 Pointer to kmod, debugging aid */ + uint64_t lgTransOff; /* 0xffffff8000002900 Pointer to kdp_trans_off, debugging aid */ + uint64_t lgReadIO; /* 0xffffff8000002908 Pointer to kdp_read_io, debugging aid */ + uint64_t lgDevSlot1; /* 0xffffff8000002910 For developer use */ + uint64_t lgDevSlot2; /* 0xffffff8000002918 For developer use */ + uint64_t lgOSVersion; /* 0xffffff8000002920 Pointer to OS version string */ + uint64_t lgRebootFlag; /* 0xffffff8000002928 Pointer to debugger reboot trigger */ + uint64_t lgRsv49C[218]; /* 0xffffff8000002930 Reserved - push to 1 page */ +} lowglo; +#pragma pack() +extern lowglo lowGlo; +#endif /* _LOW_MEMORY_GLOBALS_H_ */ diff --git a/osfmk/i386/mp_slave_boot.h b/osfmk/x86_64/lowmem_vectors.s similarity index 60% rename from osfmk/i386/mp_slave_boot.h rename to osfmk/x86_64/lowmem_vectors.s index d011444e7..aa5c57e4e 100644 --- a/osfmk/i386/mp_slave_boot.h +++ b/osfmk/x86_64/lowmem_vectors.s @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2006 Apple Computer, Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -25,10 +25,10 @@ * * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ */ + /* * @OSF_COPYRIGHT@ */ - /* * Mach Operating System * Copyright (c) 1991,1990 Carnegie Mellon University @@ -55,57 +55,48 @@ * the rights to redistribute these changes. */ -/* - * HISTORY - * - * Revision 1.1.1.1 1998/09/22 21:05:39 wsanchez - * Import of Mac OS X kernel (~semeria) - * - * Revision 1.1.1.1 1998/03/07 02:25:40 wsanchez - * Import of OSF Mach kernel (~mburg) - * - * Revision 1.2.6.1 1994/09/23 01:45:53 ezf - * change marker to not FREE - * [1994/09/22 21:19:54 ezf] - * - * Revision 1.2.2.2 1993/06/09 02:27:00 gm - * Added to OSF/1 R1.3 from NMK15.0. - * [1993/06/02 21:02:53 jeffc] - * - * Revision 1.2 1993/04/19 16:12:08 devrcs - * Fixed Copyrights - * [92/12/16 bernadat] - * - * Changed MP_GDT from 1200 to 1100 to save unused space. - * [92/12/08 bernadat] - * - * Revision 1.1 1992/09/30 02:27:14 robert - * Initial revision - * - * $EndLog$ - */ -/* CMU_HIST */ -/* - * Revision 2.1.3.1 92/04/30 11:57:14 bernadat - * Moved from cbus to here, applies to both Corollary - * and SystemPro - * [92/04/08 bernadat] - * - * Revision 2.1.9.1 92/02/18 18:34:14 jeffreyh - * Created - * [91/06/27 05:00:05 bernadat] - * - */ -/* CMU_ENDHIST */ +#include +#include +#include -/* - * Define where to store boot code for slaves +#include +#include +#include +#include + + +/* + * on x86_64 the low mem vectors live here and get mapped to 0xffffff8000200000 at + * system startup time */ -#define MP_BOOT 0x1000 /* address where slave boots load */ -#define MP_BOOTSEG 0x100 -#define MP_BOOTGDT 0x1100 /* temporary gdt address for boot */ -#define MP_BOOTSTACK 0x800 /* stack for boot */ -#define MP_MACH_START MP_BOOTSTACK /* contains address where to jump - after boot */ -#define MP_FIRST_ADDR 0x3000 /* 2 extra pages reserved */ + .text + .align 12 + .globl EXT(lowGlo) +EXT(lowGlo): + + .ascii "Catfish " /* +0x000 System verification code */ + .quad 0 /* +0x008 Double constant 0 */ + .quad 0 + .quad 0 /* +0x018 Reserved */ + .quad 0 /* +0x020 Reserved */ + .quad 0 /* +0x028 Reserved */ + .quad EXT(version) /* +0x030 Pointer to kernel version string */ + .fill 560, 4, 0 /* +0x038 Reserved - rdar://problem/5783217 */ + .quad EXT(kmod) /* +0x8f8 Pointer to kmod, debugging aid */ +#if MACH_KDP + .quad EXT(kdp_trans_off) /* +0x900 Pointer to kdp_trans_off, debugging aid */ + .quad EXT(kdp_read_io) /* +0x908 Pointer to kdp_read_io, debugging aid */ +#else + .quad 0 /* +0x900 Reserved */ + .quad 0 /* +0x908 Reserved */ +#endif + .quad 0 /* +0x910 Reserved for developer use */ + .quad 0 /* +0x918 Reserved for developer use */ + .quad EXT(osversion) /* +0x920 Pointer to osversion string */ +#if MACH_KDP + .quad EXT(flag_kdp_trigger_reboot) /* +0x928 Pointer to debugger reboot trigger */ +#else + .quad 0 /* +0x928 Reserved */ +#endif + .fill 436, 4, 0 /* pad to 0x1000 (page size) - rdar://problem/5783217 */ diff --git a/osfmk/x86_64/machine_routines_asm.s b/osfmk/x86_64/machine_routines_asm.s new file mode 100644 index 000000000..641cd9cdc --- /dev/null +++ b/osfmk/x86_64/machine_routines_asm.s @@ -0,0 +1,171 @@ +/* + * Copyright (c) 2000-2008 Apple Inc. All rights reserved. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. The rights granted to you under the License + * may not be used to create, or enable the creation or redistribution of, + * unlawful or unlicensed copies of an Apple operating system, or to + * circumvent, violate, or enable the circumvention or violation of, any + * terms of an Apple operating system software license agreement. + * + * Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ + */ + +#include +#include +#include +#include + +#include +#include +#include + +/* +** ml_get_timebase() +** +** Entry - %rdi contains pointer to 64 bit structure. +** +** Exit - 64 bit structure filled in. +** +*/ +ENTRY(ml_get_timebase) + + lfence + rdtsc + lfence + shlq $32,%rdx + orq %rdx,%rax + movq %rax, (%rdi) + + ret + +/* + * Convert between various timer units + * + * This code converts 64-bit time units to other units. + * For example, the TSC is converted to HPET units. + * + * Time is a 64-bit integer that is some number of ticks. + * Conversion is 64-bit fixed point number which is composed + * of a 32 bit integer and a 32 bit fraction. + * + * The time ticks are multiplied by the conversion factor. The + * calculations are done as a 128-bit value but both the high + * and low words are dropped. The high word is overflow and the + * low word is the fraction part of the result. + * + * We return a 64-bit value. + * + * Note that we can use this function to multiply 2 conversion factors. + * We do this in order to calculate the multiplier used to convert + * directly between any two units. + * + * uint64_t tmrCvt(uint64_t time, // %rdi + * uint64_t conversion) // %rsi + * + */ +ENTRY(tmrCvt) + movq %rdi,%rax + mulq %rsi /* result is %rdx:%rax */ + shrdq $32,%rdx,%rax /* %rdx:%rax >>= 32 */ + ret + + +/* + * void _rtc_nanotime_store( + * uint64_t tsc, // %rdi + * uint64_t nsec, // %rsi + * uint32_t scale, // %rdx + * uint32_t shift, // %rcx + * rtc_nanotime_t *dst); // %r8 + */ +ENTRY(_rtc_nanotime_store) + movl RNT_GENERATION(%r8),%eax /* get current generation */ + movl $0,RNT_GENERATION(%r8) /* flag data as being updated */ + movq %rdi,RNT_TSC_BASE(%r8) + movq %rsi,RNT_NS_BASE(%r8) + movl %edx,RNT_SCALE(%r8) + movl %ecx,RNT_SHIFT(%r8) + + incl %eax /* next generation */ + jnz 1f + incl %eax /* skip 0, which is a flag */ +1: movl %eax,RNT_GENERATION(%r8) /* update generation */ + + ret + +/* + * unint64_t _rtc_nanotime_read(rtc_nanotime_t *rntp, int slow); + * + * This is the same as the commpage nanotime routine, except that it uses the + * kernel internal "rtc_nanotime_info" data instead of the commpage data. + * These two copies of data are kept in sync by rtc_clock_napped(). + * + * Warning! There is another copy of this code in osfmk/x86_64/idt64.s. + * These are kept in sync by both using the RTC_NANOTIME_READ() macro. + * + * There are two versions of this algorithm, for "slow" and "fast" processors. + * The more common "fast" algorithm is: + * + * ns = (((rdtsc - rnt_tsc_base)*rnt_tsc_scale) / 2**32) + rnt_ns_base; + * + * Of course, the divide by 2**32 is a nop. rnt_tsc_scale is a constant + * computed during initialization: + * + * rnt_tsc_scale = (10e9 * 2**32) / tscFreq; + * + * The "slow" algorithm uses long division: + * + * ns = (((rdtsc - rnt_tsc_base) * 10e9) / tscFreq) + rnt_ns_base; + * + * Since this routine is not synchronized and can be called in any context, + * we use a generation count to guard against seeing partially updated data. + * In addition, the _rtc_nanotime_store() routine zeroes the generation before + * updating the data, and stores the nonzero generation only after all fields + * have been stored. Because IA32 guarantees that stores by one processor + * must be seen in order by another, we can avoid using a lock. We spin while + * the generation is zero. + * + * unint64_t _rtc_nanotime_read( + * rtc_nanotime_t *rntp, // %rdi + * int slow); // %rsi + * + */ +ENTRY(_rtc_nanotime_read) + test %rsi,%rsi + jnz Lslow + + /* + * Processor whose TSC frequency is faster than SLOW_TSC_THRESHOLD + */ + RTC_NANOTIME_READ_FAST() + + ret + + /* + * Processor whose TSC frequency is not faster than SLOW_TSC_THRESHOLD + * But K64 doesn't support this... + */ +Lslow: + lea 1f(%rip),%rdi + xorb %al,%al + call EXT(panic) + hlt + .data +1: String "_rtc_nanotime_read() - slow algorithm not supported" + diff --git a/osfmk/x86_64/mcount.s b/osfmk/x86_64/mcount.s new file mode 100644 index 000000000..c19945452 --- /dev/null +++ b/osfmk/x86_64/mcount.s @@ -0,0 +1,87 @@ +/* + * Copyright (c) 2003 Apple Computer, Inc. All rights reserved. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. The rights granted to you under the License + * may not be used to create, or enable the creation or redistribution of, + * unlawful or unlicensed copies of an Apple operating system, or to + * circumvent, violate, or enable the circumvention or violation of, any + * terms of an Apple operating system software license agreement. + * + * Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ + */ + +#define __NO_UNDERSCORES__ +#include +#include + +Entry(mcount) + pushq %rbp // setup mcount's frame + movq %rsp,%rbp + pushq %rax // save %eax + pushf // save interrupt state + cli // disable interrupts + + // + // Check that this cpu is ready. + // This delays the start of mcounting until a cpu is really prepared. + // + mov %gs,%ax + test %ax,%ax + jz 1f + + movl %gs:CPU_RUNNING,%eax + testl %eax,%eax + jz 1f + + // + // Test for recursion as indicated by a per-cpu flag. + // Skip if nested, otherwise set the flag and call the C mount(). + // + movl %gs:CPU_MCOUNT_OFF,%eax + testl %eax,%eax // test for recursion + jnz 1f + + incl %gs:CPU_MCOUNT_OFF // set recursion flag + + movq (%rbp),%rax // frame pointer of mcount's caller + pushq %rdi + pushq %rsi + pushq %rdx + pushq %rcx + pushq %r8 + pushq %r9 + movq 8(%rax),%rdi // mcount's caller's return address + movq 8(%rbp),%rsi // push selfpc parameter for mcount() + + call _mcount // call the C mcount + + popq %r9 + popq %r8 + popq %rcx + popq %rdx + popq %rsi + popq %rdi + + decl %gs:CPU_MCOUNT_OFF // turn off recursion flag +1: + popf // restore interrupt state + popq %rax + movq %rbp,%rsp // tear down mcount's frame + popq %rbp + ret diff --git a/osfmk/x86_64/pmap.c b/osfmk/x86_64/pmap.c new file mode 100644 index 000000000..13c439a96 --- /dev/null +++ b/osfmk/x86_64/pmap.c @@ -0,0 +1,3642 @@ + +/* + * Copyright (c) 2000-2007 Apple Inc. All rights reserved. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. The rights granted to you under the License + * may not be used to create, or enable the creation or redistribution of, + * unlawful or unlicensed copies of an Apple operating system, or to + * circumvent, violate, or enable the circumvention or violation of, any + * terms of an Apple operating system software license agreement. + * + * Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ + */ +/* + * @OSF_COPYRIGHT@ + */ +/* + * Mach Operating System + * Copyright (c) 1991,1990,1989,1988 Carnegie Mellon University + * All Rights Reserved. + * + * Permission to use, copy, modify and distribute this software and its + * documentation is hereby granted, provided that both the copyright + * notice and this permission notice appear in all copies of the + * software, derivative works or modified versions, and any portions + * thereof, and that both notices appear in supporting documentation. + * + * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" + * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR + * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. + * + * Carnegie Mellon requests users of this software to return to + * + * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU + * School of Computer Science + * Carnegie Mellon University + * Pittsburgh PA 15213-3890 + * + * any improvements or extensions that they make and grant Carnegie Mellon + * the rights to redistribute these changes. + */ +/* + */ + +/* + * File: pmap.c + * Author: Avadis Tevanian, Jr., Michael Wayne Young + * (These guys wrote the Vax version) + * + * Physical Map management code for Intel i386, i486, and i860. + * + * Manages physical address maps. + * + * In addition to hardware address maps, this + * module is called upon to provide software-use-only + * maps which may or may not be stored in the same + * form as hardware maps. These pseudo-maps are + * used to store intermediate results from copy + * operations to and from address spaces. + * + * Since the information managed by this module is + * also stored by the logical address mapping module, + * this module may throw away valid virtual-to-physical + * mappings at almost any time. However, invalidations + * of virtual-to-physical mappings must be done as + * requested. + * + * In order to cope with hardware architectures which + * make virtual-to-physical map invalidates expensive, + * this module may delay invalidate or reduced protection + * operations until such time as they are actually + * necessary. This module is given full information as + * to which processors are currently using which maps, + * and to when physical maps must be made correct. + */ + +#include +#include +#include +#include + +#include + +#include + +#include +#include +#include +#include + +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#include /* prototyping */ +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#if MACH_KDB +#include +#include +#include +#include +#endif /* MACH_KDB */ + +#include + +#include +#include + + +/* #define DEBUGINTERRUPTS 1 uncomment to ensure pmap callers have interrupts enabled */ +#ifdef DEBUGINTERRUPTS +#define pmap_intr_assert() { \ + if (processor_avail_count > 1 && !ml_get_interrupts_enabled()) \ + panic("pmap interrupt assert %s, %d",__FILE__, __LINE__); \ +} +#else +#define pmap_intr_assert() +#endif + +#ifdef IWANTTODEBUG +#undef DEBUG +#define DEBUG 1 +#define POSTCODE_DELAY 1 +#include +#endif /* IWANTTODEBUG */ + +boolean_t pmap_trace = FALSE; + +#if PMAP_DBG +#define DBG(x...) kprintf("DBG: " x) +#else +#define DBG(x...) +#endif + +boolean_t no_shared_cr3 = DEBUG; /* TRUE for DEBUG by default */ + +/* + * Forward declarations for internal functions. + */ + +void pmap_remove_range( + pmap_t pmap, + vm_map_offset_t va, + pt_entry_t *spte, + pt_entry_t *epte); + +void phys_attribute_clear( + ppnum_t phys, + int bits); + +int phys_attribute_test( + ppnum_t phys, + int bits); + +void phys_attribute_set( + ppnum_t phys, + int bits); + +void pmap_set_reference( + ppnum_t pn); + +boolean_t phys_page_exists( + ppnum_t pn); + + +int nx_enabled = 1; /* enable no-execute protection */ +int allow_data_exec = VM_ABI_32; /* 32-bit apps may execute data by default, 64-bit apps may not */ +int allow_stack_exec = 0; /* No apps may execute from the stack by default */ + +const boolean_t cpu_64bit = TRUE; /* Mais oui! */ + +/* + * when spinning through pmap_remove + * ensure that we don't spend too much + * time with preemption disabled. + * I'm setting the current threshold + * to 20us + */ +#define MAX_PREEMPTION_LATENCY_NS 20000 + +uint64_t max_preemption_latency_tsc = 0; + + +/* + * Private data structures. + */ + +/* + * For each vm_page_t, there is a list of all currently + * valid virtual mappings of that page. An entry is + * a pv_rooted_entry_t; the list is the pv_table. + * + * N.B. with the new combo rooted/hashed scheme it is + * only possibly to remove individual non-rooted entries + * if they are found via the hashed chains as there is no + * way to unlink the singly linked hashed entries if navigated to + * via the queue list off the rooted entries. Think of it as + * hash/walk/pull, keeping track of the prev pointer while walking + * the singly linked hash list. All of this is to save memory and + * keep both types of pv_entries as small as possible. + */ + +/* + +PV HASHING Changes - JK 1/2007 + +Pve's establish physical to virtual mappings. These are used for aliasing of a +physical page to (potentially many) virtual addresses within pmaps. In the +previous implementation the structure of the pv_entries (each 16 bytes in size) was + +typedef struct pv_entry { + struct pv_entry_t next; + pmap_t pmap; + vm_map_offset_t va; +} *pv_entry_t; + +An initial array of these is created at boot time, one per physical page of +memory, indexed by the physical page number. Additionally, a pool of entries +is created from a pv_zone to be used as needed by pmap_enter() when it is +creating new mappings. Originally, we kept this pool around because the code +in pmap_enter() was unable to block if it needed an entry and none were +available - we'd panic. Some time ago I restructured the pmap_enter() code +so that for user pmaps it can block while zalloc'ing a pv structure and restart, +removing a panic from the code (in the case of the kernel pmap we cannot block +and still panic, so, we keep a separate hot pool for use only on kernel pmaps). +The pool has not been removed since there is a large performance gain keeping +freed pv's around for reuse and not suffering the overhead of zalloc for every +new pv we need. + +As pmap_enter() created new mappings it linked the new pve's for them off the +fixed pv array for that ppn (off the next pointer). These pve's are accessed +for several operations, one of them being address space teardown. In that case, +we basically do this + + for (every page/pte in the space) { + calc pve_ptr from the ppn in the pte + for (every pv in the list for the ppn) { + if (this pv is for this pmap/vaddr) { + do housekeeping + unlink/free the pv + } + } + } + +The problem arose when we were running, say 8000 (or even 2000) apache or +other processes and one or all terminate. The list hanging off each pv array +entry could have thousands of entries. We were continuously linearly searching +each of these lists as we stepped through the address space we were tearing +down. Because of the locks we hold, likely taking a cache miss for each node, +and interrupt disabling for MP issues the system became completely unresponsive +for many seconds while we did this. + +Realizing that pve's are accessed in two distinct ways (linearly running the +list by ppn for operations like pmap_page_protect and finding and +modifying/removing a single pve as part of pmap_enter processing) has led to +modifying the pve structures and databases. + +There are now two types of pve structures. A "rooted" structure which is +basically the original structure accessed in an array by ppn, and a ''hashed'' +structure accessed on a hash list via a hash of [pmap, vaddr]. These have been +designed with the two goals of minimizing wired memory and making the lookup of +a ppn faster. Since a vast majority of pages in the system are not aliased +and hence represented by a single pv entry I've kept the rooted entry size as +small as possible because there is one of these dedicated for every physical +page of memory. The hashed pve's are larger due to the addition of the hash +link and the ppn entry needed for matching while running the hash list to find +the entry we are looking for. This way, only systems that have lots of +aliasing (like 2000+ httpd procs) will pay the extra memory price. Both +structures have the same first three fields allowing some simplification in +the code. + +They have these shapes + +typedef struct pv_rooted_entry { + queue_head_t qlink; + vm_map_offset_t va; + pmap_t pmap; +} *pv_rooted_entry_t; + + +typedef struct pv_hashed_entry { + queue_head_t qlink; + vm_map_offset_t va; + pmap_t pmap; + ppnum_t ppn; + struct pv_hashed_entry *nexth; +} *pv_hashed_entry_t; + +The main flow difference is that the code is now aware of the rooted entry and +the hashed entries. Code that runs the pv list still starts with the rooted +entry and then continues down the qlink onto the hashed entries. Code that is +looking up a specific pv entry first checks the rooted entry and then hashes +and runs the hash list for the match. The hash list lengths are much smaller +than the original pv lists that contained all aliases for the specific ppn. + +*/ + +typedef struct pv_rooted_entry { + /* first three entries must match pv_hashed_entry_t */ + queue_head_t qlink; + vm_map_offset_t va; /* virtual address for mapping */ + pmap_t pmap; /* pmap where mapping lies */ +} *pv_rooted_entry_t; + +#define PV_ROOTED_ENTRY_NULL ((pv_rooted_entry_t) 0) + +pv_rooted_entry_t pv_head_table; /* array of entries, one per page */ + +typedef struct pv_hashed_entry { + /* first three entries must match pv_rooted_entry_t */ + queue_head_t qlink; + vm_map_offset_t va; + pmap_t pmap; + ppnum_t ppn; + struct pv_hashed_entry *nexth; +} *pv_hashed_entry_t; + +#define PV_HASHED_ENTRY_NULL ((pv_hashed_entry_t)0) + +#define NPVHASH 4095 /* MUST BE 2^N - 1 */ +pv_hashed_entry_t *pv_hash_table; /* hash lists */ + +uint32_t npvhash = 0; + +//#define PV_DEBUG 1 /* uncomment to enable some PV debugging code */ +#ifdef PV_DEBUG +#define CHK_NPVHASH() if(0 == npvhash) panic("npvhash uninitialized"); +#else +#define CHK_NPVHASH(x) +#endif + +pv_hashed_entry_t pv_hashed_free_list = PV_HASHED_ENTRY_NULL; +pv_hashed_entry_t pv_hashed_kern_free_list = PV_HASHED_ENTRY_NULL; +decl_simple_lock_data(,pv_hashed_free_list_lock) +decl_simple_lock_data(,pv_hashed_kern_free_list_lock) +decl_simple_lock_data(,pv_hash_table_lock) + +int pv_hashed_free_count = 0; +int pv_hashed_kern_free_count = 0; +#define PV_HASHED_LOW_WATER_MARK 5000 +#define PV_HASHED_KERN_LOW_WATER_MARK 100 +#define PV_HASHED_ALLOC_CHUNK 2000 +#define PV_HASHED_KERN_ALLOC_CHUNK 50 +thread_call_t mapping_adjust_call; +static thread_call_data_t mapping_adjust_call_data; +uint32_t mappingrecurse = 0; + +#define PV_HASHED_ALLOC(pvh_e) { \ + simple_lock(&pv_hashed_free_list_lock); \ + if ((pvh_e = pv_hashed_free_list) != 0) { \ + pv_hashed_free_list = (pv_hashed_entry_t)pvh_e->qlink.next; \ + pv_hashed_free_count--; \ + if (pv_hashed_free_count < PV_HASHED_LOW_WATER_MARK) \ + if (hw_compare_and_store(0,1,(u_int *)&mappingrecurse)) \ + thread_call_enter(mapping_adjust_call); \ + } \ + simple_unlock(&pv_hashed_free_list_lock); \ +} + +#define PV_HASHED_FREE_LIST(pvh_eh, pvh_et, pv_cnt) { \ + simple_lock(&pv_hashed_free_list_lock); \ + pvh_et->qlink.next = (queue_entry_t)pv_hashed_free_list; \ + pv_hashed_free_list = pvh_eh; \ + pv_hashed_free_count += pv_cnt; \ + simple_unlock(&pv_hashed_free_list_lock); \ +} + +#define PV_HASHED_KERN_ALLOC(pvh_e) { \ + simple_lock(&pv_hashed_kern_free_list_lock); \ + if ((pvh_e = pv_hashed_kern_free_list) != 0) { \ + pv_hashed_kern_free_list = (pv_hashed_entry_t)pvh_e->qlink.next; \ + pv_hashed_kern_free_count--; \ + if (pv_hashed_kern_free_count < PV_HASHED_KERN_LOW_WATER_MARK)\ + if (hw_compare_and_store(0,1,(u_int *)&mappingrecurse)) \ + thread_call_enter(mapping_adjust_call); \ + } \ + simple_unlock(&pv_hashed_kern_free_list_lock); \ +} + +#define PV_HASHED_KERN_FREE_LIST(pvh_eh, pvh_et, pv_cnt) { \ + simple_lock(&pv_hashed_kern_free_list_lock); \ + pvh_et->qlink.next = (queue_entry_t)pv_hashed_kern_free_list; \ + pv_hashed_kern_free_list = pvh_eh; \ + pv_hashed_kern_free_count += pv_cnt; \ + simple_unlock(&pv_hashed_kern_free_list_lock); \ +} + +zone_t pv_hashed_list_zone; /* zone of pv_hashed_entry structures */ + +static zone_t pdpt_zone; + +/* + * Each entry in the pv_head_table is locked by a bit in the + * pv_lock_table. The lock bits are accessed by the physical + * address of the page they lock. + */ + +char *pv_lock_table; /* pointer to array of bits */ +#define pv_lock_table_size(n) (((n)+BYTE_SIZE-1)/BYTE_SIZE) + +char *pv_hash_lock_table; +#define pv_hash_lock_table_size(n) (((n)+BYTE_SIZE-1)/BYTE_SIZE) + +/* + * First and last physical addresses that we maintain any information + * for. Initialized to zero so that pmap operations done before + * pmap_init won't touch any non-existent structures. + */ +boolean_t pmap_initialized = FALSE;/* Has pmap_init completed? */ + +static struct vm_object kptobj_object_store; +static struct vm_object kpml4obj_object_store; +static struct vm_object kpdptobj_object_store; + +/* + * Index into pv_head table, its lock bits, and the modify/reference and managed bits + */ + +#define pa_index(pa) (i386_btop(pa)) +#define ppn_to_pai(ppn) ((int)ppn) + +#define pai_to_pvh(pai) (&pv_head_table[pai]) +#define lock_pvh_pai(pai) bit_lock(pai, (void *)pv_lock_table) +#define unlock_pvh_pai(pai) bit_unlock(pai, (void *)pv_lock_table) + +static inline uint32_t +pvhashidx(pmap_t pmap, vm_offset_t va) +{ + return ((uint32_t)(uint64_t)pmap ^ + ((uint32_t)((uint64_t)va >> PAGE_SHIFT) & 0xFFFFFFFF)) & + npvhash; +} +#define pvhash(idx) (&pv_hash_table[idx]) + +#define lock_hash_hash(hash) bit_lock(hash, (void *)pv_hash_lock_table) +#define unlock_hash_hash(hash) bit_unlock(hash, (void *)pv_hash_lock_table) + +/* + * Array of physical page attribites for managed pages. + * One byte per physical page. + */ +char *pmap_phys_attributes; +unsigned int last_managed_page = 0; +#define IS_MANAGED_PAGE(x) \ + ((unsigned int)(x) <= last_managed_page && \ + (pmap_phys_attributes[x] & PHYS_MANAGED)) + +/* + * Physical page attributes. Copy bits from PTE definition. + */ +#define PHYS_MODIFIED INTEL_PTE_MOD /* page modified */ +#define PHYS_REFERENCED INTEL_PTE_REF /* page referenced */ +#define PHYS_MANAGED INTEL_PTE_VALID /* page is managed */ + +/* + * Amount of virtual memory mapped by one + * page-directory entry. + */ +#define PDE_MAPPED_SIZE (pdetova(1)) +uint64_t pde_mapped_size = PDE_MAPPED_SIZE; + +/* + * Locking and TLB invalidation + */ + +/* + * Locking Protocols: (changed 2/2007 JK) + * + * There are two structures in the pmap module that need locking: + * the pmaps themselves, and the per-page pv_lists (which are locked + * by locking the pv_lock_table entry that corresponds to the pv_head + * for the list in question.) Most routines want to lock a pmap and + * then do operations in it that require pv_list locking -- however + * pmap_remove_all and pmap_copy_on_write operate on a physical page + * basis and want to do the locking in the reverse order, i.e. lock + * a pv_list and then go through all the pmaps referenced by that list. + * + * The system wide pmap lock has been removed. Now, paths take a lock + * on the pmap before changing its 'shape' and the reverse order lockers + * (coming in by phys ppn) take a lock on the corresponding pv and then + * retest to be sure nothing changed during the window before they locked + * and can then run up/down the pv lists holding the list lock. This also + * lets the pmap layer run (nearly completely) interrupt enabled, unlike + * previously. + */ + +/* + * PV locking + */ + +#define LOCK_PVH(index) { \ + mp_disable_preemption(); \ + lock_pvh_pai(index); \ +} + +#define UNLOCK_PVH(index) { \ + unlock_pvh_pai(index); \ + mp_enable_preemption(); \ +} +/* + * PV hash locking + */ + +#define LOCK_PV_HASH(hash) lock_hash_hash(hash) +#define UNLOCK_PV_HASH(hash) unlock_hash_hash(hash) + +unsigned pmap_memory_region_count; +unsigned pmap_memory_region_current; + +pmap_memory_region_t pmap_memory_regions[PMAP_MEMORY_REGIONS_SIZE]; + +/* + * Other useful macros. + */ +#define current_pmap() (vm_map_pmap(current_thread()->map)) + +struct pmap kernel_pmap_store; +pmap_t kernel_pmap; + +pd_entry_t high_shared_pde; +pd_entry_t commpage64_pde; + +struct zone *pmap_zone; /* zone of pmap structures */ + +int pmap_debug = 0; /* flag for debugging prints */ + +unsigned int inuse_ptepages_count = 0; + +addr64_t kernel64_cr3; + +/* + * Pmap cache. Cache is threaded through ref_count field of pmap. + * Max will eventually be constant -- variable for experimentation. + */ +int pmap_cache_max = 32; +int pmap_alloc_chunk = 8; +pmap_t pmap_cache_list; +int pmap_cache_count; +decl_simple_lock_data(,pmap_cache_lock) + +extern char end; + +static int nkpt; + +pt_entry_t *DMAP1, *DMAP2; +caddr_t DADDR1; +caddr_t DADDR2; + +/* + * unlinks the pv_hashed_entry_t pvh from the singly linked hash chain. + * properly deals with the anchor. + * must be called with the hash locked, does not unlock it + */ + +static inline void +pmap_pvh_unlink(pv_hashed_entry_t pvh) +{ + pv_hashed_entry_t curh; + pv_hashed_entry_t *pprevh; + int pvhash_idx; + + CHK_NPVHASH(); + pvhash_idx = pvhashidx(pvh->pmap, pvh->va); + + pprevh = pvhash(pvhash_idx); + +#if PV_DEBUG + if (NULL == *pprevh) + panic("pvh_unlink null anchor"); /* JK DEBUG */ +#endif + curh = *pprevh; + + while (PV_HASHED_ENTRY_NULL != curh) { + if (pvh == curh) + break; + pprevh = &curh->nexth; + curh = curh->nexth; + } + if (PV_HASHED_ENTRY_NULL == curh) panic("pmap_pvh_unlink no pvh"); + *pprevh = pvh->nexth; + return; +} + +static inline void +pv_hash_add(pv_hashed_entry_t pvh_e, + pv_rooted_entry_t pv_h) +{ + pv_hashed_entry_t *hashp; + int pvhash_idx; + + CHK_NPVHASH(); + pvhash_idx = pvhashidx(pvh_e->pmap, pvh_e->va); + LOCK_PV_HASH(pvhash_idx); + insque(&pvh_e->qlink, &pv_h->qlink); + hashp = pvhash(pvhash_idx); +#if PV_DEBUG + if (NULL==hashp) + panic("pv_hash_add(%p) null hash bucket", pvh_e); +#endif + pvh_e->nexth = *hashp; + *hashp = pvh_e; + UNLOCK_PV_HASH(pvhash_idx); +} + +static inline void +pv_hash_remove(pv_hashed_entry_t pvh_e) +{ + int pvhash_idx; + + CHK_NPVHASH(); + pvhash_idx = pvhashidx(pvh_e->pmap,pvh_e->va); + LOCK_PV_HASH(pvhash_idx); + remque(&pvh_e->qlink); + pmap_pvh_unlink(pvh_e); + UNLOCK_PV_HASH(pvhash_idx); +} + +/* + * Remove pv list entry. + * Called with pv_head_table entry locked. + * Returns pv entry to be freed (or NULL). + */ +static inline pv_hashed_entry_t +pmap_pv_remove(pmap_t pmap, + vm_map_offset_t vaddr, + ppnum_t ppn) +{ + pv_hashed_entry_t pvh_e; + pv_rooted_entry_t pv_h; + pv_hashed_entry_t *pprevh; + int pvhash_idx; + uint32_t pv_cnt; + + pvh_e = PV_HASHED_ENTRY_NULL; + pv_h = pai_to_pvh(ppn_to_pai(ppn)); + if (pv_h->pmap == PMAP_NULL) + panic("pmap_pv_remove(%p,%llu,%u): null pv_list!", + pmap, vaddr, ppn); + + if (pv_h->va == vaddr && pv_h->pmap == pmap) { + /* + * Header is the pv_rooted_entry. + * We can't free that. If there is a queued + * entry after this one we remove that + * from the ppn queue, we remove it from the hash chain + * and copy it to the rooted entry. Then free it instead. + */ + pvh_e = (pv_hashed_entry_t) queue_next(&pv_h->qlink); + if (pv_h != (pv_rooted_entry_t) pvh_e) { + /* + * Entry queued to root, remove this from hash + * and install as nem root. + */ + CHK_NPVHASH(); + pvhash_idx = pvhashidx(pvh_e->pmap, pvh_e->va); + LOCK_PV_HASH(pvhash_idx); + remque(&pvh_e->qlink); + pprevh = pvhash(pvhash_idx); + if (PV_HASHED_ENTRY_NULL == *pprevh) { + panic("pmap_pv_remove(%p,%llu,%u): " + "empty hash, removing rooted", + pmap, vaddr, ppn); + } + pmap_pvh_unlink(pvh_e); + UNLOCK_PV_HASH(pvhash_idx); + pv_h->pmap = pvh_e->pmap; + pv_h->va = pvh_e->va; /* dispose of pvh_e */ + } else { + /* none queued after rooted */ + pv_h->pmap = PMAP_NULL; + pvh_e = PV_HASHED_ENTRY_NULL; + } + } else { + /* + * not removing rooted pv. find it on hash chain, remove from + * ppn queue and hash chain and free it + */ + CHK_NPVHASH(); + pvhash_idx = pvhashidx(pmap, vaddr); + LOCK_PV_HASH(pvhash_idx); + pprevh = pvhash(pvhash_idx); + if (PV_HASHED_ENTRY_NULL == *pprevh) { + panic("pmap_pv_remove(%p,%llu,%u): empty hash", + pmap, vaddr, ppn); + } + pvh_e = *pprevh; + pmap_pv_hashlist_walks++; + pv_cnt = 0; + while (PV_HASHED_ENTRY_NULL != pvh_e) { + pv_cnt++; + if (pvh_e->pmap == pmap && + pvh_e->va == vaddr && + pvh_e->ppn == ppn) + break; + pprevh = &pvh_e->nexth; + pvh_e = pvh_e->nexth; + } + if (PV_HASHED_ENTRY_NULL == pvh_e) + panic("pmap_pv_remove(%p,%llu,%u): pv not on hash", + pmap, vaddr, ppn); + pmap_pv_hashlist_cnts += pv_cnt; + if (pmap_pv_hashlist_max < pv_cnt) + pmap_pv_hashlist_max = pv_cnt; + *pprevh = pvh_e->nexth; + remque(&pvh_e->qlink); + UNLOCK_PV_HASH(pvhash_idx); + } + + return pvh_e; +} + +/* + * for legacy, returns the address of the pde entry. + * for 64 bit, causes the pdpt page containing the pde entry to be mapped, + * then returns the mapped address of the pde entry in that page + */ +pd_entry_t * +pmap_pde(pmap_t m, vm_map_offset_t v) +{ + pd_entry_t *pde; + + assert(m); +#if 0 + if (m == kernel_pmap) + pde = (&((m)->dirbase[(vm_offset_t)(v) >> PDESHIFT])); + else +#endif + pde = pmap64_pde(m, v); + + return pde; +} + +/* + * the single pml4 page per pmap is allocated at pmap create time and exists + * for the duration of the pmap. we allocate this page in kernel vm. + * this returns the address of the requested pml4 entry in the top level page. + */ +static inline +pml4_entry_t * +pmap64_pml4(pmap_t pmap, vm_map_offset_t vaddr) +{ + return &pmap->pm_pml4[(vaddr >> PML4SHIFT) & (NPML4PG-1)]; +} + +/* + * maps in the pml4 page, if any, containing the pdpt entry requested + * and returns the address of the pdpt entry in that mapped page + */ +pdpt_entry_t * +pmap64_pdpt(pmap_t pmap, vm_map_offset_t vaddr) +{ + pml4_entry_t newpf; + pml4_entry_t *pml4; + + assert(pmap); + if ((vaddr > 0x00007FFFFFFFFFFFULL) && + (vaddr < 0xFFFF800000000000ULL)) { + return (0); + } + + pml4 = pmap64_pml4(pmap, vaddr); + if (pml4 && ((*pml4 & INTEL_PTE_VALID))) { + newpf = *pml4 & PG_FRAME; + return &((pdpt_entry_t *) PHYSMAP_PTOV(newpf)) + [(vaddr >> PDPTSHIFT) & (NPDPTPG-1)]; + } + return (NULL); +} +/* + * maps in the pdpt page, if any, containing the pde entry requested + * and returns the address of the pde entry in that mapped page + */ +pd_entry_t * +pmap64_pde(pmap_t pmap, vm_map_offset_t vaddr) +{ + pdpt_entry_t newpf; + pdpt_entry_t *pdpt; + + assert(pmap); + if ((vaddr > 0x00007FFFFFFFFFFFULL) && + (vaddr < 0xFFFF800000000000ULL)) { + return (0); + } + + pdpt = pmap64_pdpt(pmap, vaddr); + + if (pdpt && ((*pdpt & INTEL_PTE_VALID))) { + newpf = *pdpt & PG_FRAME; + return &((pd_entry_t *) PHYSMAP_PTOV(newpf)) + [(vaddr >> PDSHIFT) & (NPDPG-1)]; + } + return (NULL); +} + +/* + * return address of mapped pte for vaddr va in pmap pmap. + * + * physically maps the pde page, if any, containing the pte in and returns + * the address of the pte in that mapped page + * + * In case the pde maps a superpage, return the pde, which, in this case + * is the actual page table entry. + */ +pt_entry_t * +pmap_pte(pmap_t pmap, vm_map_offset_t vaddr) +{ + pd_entry_t *pde; + pd_entry_t newpf; + + assert(pmap); + pde = pmap_pde(pmap, vaddr); + + if (pde && ((*pde & INTEL_PTE_VALID))) { + if (*pde & INTEL_PTE_PS) + return pde; + newpf = *pde & PG_FRAME; + return &((pt_entry_t *)PHYSMAP_PTOV(newpf)) + [i386_btop(vaddr) & (ppnum_t)(NPTEPG-1)]; + } + return (NULL); +} + +/* + * Map memory at initialization. The physical addresses being + * mapped are not managed and are never unmapped. + * + * For now, VM is already on, we only need to map the + * specified memory. + */ +vm_offset_t +pmap_map( + vm_offset_t virt, + vm_map_offset_t start_addr, + vm_map_offset_t end_addr, + vm_prot_t prot, + unsigned int flags) +{ + int ps; + + ps = PAGE_SIZE; + while (start_addr < end_addr) { + pmap_enter(kernel_pmap, (vm_map_offset_t)virt, + (ppnum_t) i386_btop(start_addr), prot, flags, FALSE); + virt += ps; + start_addr += ps; + } + return(virt); +} + +/* + * Back-door routine for mapping kernel VM at initialization. + * Useful for mapping memory outside the range + * Sets no-cache, A, D. + * Otherwise like pmap_map. + */ +vm_offset_t +pmap_map_bd( + vm_offset_t virt, + vm_map_offset_t start_addr, + vm_map_offset_t end_addr, + vm_prot_t prot, + unsigned int flags) +{ + pt_entry_t template; + pt_entry_t *pte; + spl_t spl; + + template = pa_to_pte(start_addr) + | INTEL_PTE_REF + | INTEL_PTE_MOD + | INTEL_PTE_WIRED + | INTEL_PTE_VALID; + + if (flags & (VM_MEM_NOT_CACHEABLE | VM_WIMG_USE_DEFAULT)) { + template |= INTEL_PTE_NCACHE; + if (!(flags & (VM_MEM_GUARDED | VM_WIMG_USE_DEFAULT))) + template |= INTEL_PTE_PTA; + } + if (prot & VM_PROT_WRITE) + template |= INTEL_PTE_WRITE; + + + while (start_addr < end_addr) { + spl = splhigh(); + pte = pmap_pte(kernel_pmap, (vm_map_offset_t)virt); + if (pte == PT_ENTRY_NULL) { + panic("pmap_map_bd: Invalid kernel address\n"); + } + pmap_store_pte(pte, template); + splx(spl); + pte_increment_pa(template); + virt += PAGE_SIZE; + start_addr += PAGE_SIZE; + } + + + flush_tlb(); + return(virt); +} + +extern char *first_avail; +extern vm_offset_t virtual_avail, virtual_end; +extern pmap_paddr_t avail_start, avail_end; +extern vm_offset_t sHIB; +extern vm_offset_t eHIB; +extern vm_offset_t stext; +extern vm_offset_t etext; +extern vm_offset_t sdata; + +void +pmap_cpu_init(void) +{ + /* + * Here early in the life of a processor (from cpu_mode_init()). + * Ensure global page feature is disabled. + */ + set_cr4(get_cr4() &~ CR4_PGE); + + /* + * Initialize the per-cpu, TLB-related fields. + */ + current_cpu_datap()->cpu_kernel_cr3 = kernel_pmap->pm_cr3; + current_cpu_datap()->cpu_active_cr3 = kernel_pmap->pm_cr3; + current_cpu_datap()->cpu_tlb_invalid = FALSE; +} + + + +/* + * Bootstrap the system enough to run with virtual memory. + * Map the kernel's code and data, and allocate the system page table. + * Called with mapping OFF. Page_size must already be set. + */ + +void +pmap_bootstrap( + __unused vm_offset_t load_start, + __unused boolean_t IA32e) +{ +#if NCOPY_WINDOWS > 0 + vm_offset_t va; + int i; +#endif + + assert(IA32e); + + vm_last_addr = VM_MAX_KERNEL_ADDRESS; /* Set the highest address + * known to VM */ + /* + * The kernel's pmap is statically allocated so we don't + * have to use pmap_create, which is unlikely to work + * correctly at this part of the boot sequence. + */ + + kernel_pmap = &kernel_pmap_store; + kernel_pmap->ref_count = 1; + kernel_pmap->nx_enabled = FALSE; + kernel_pmap->pm_task_map = TASK_MAP_64BIT; + kernel_pmap->pm_obj = (vm_object_t) NULL; + kernel_pmap->dirbase = (pd_entry_t *)((uintptr_t)IdlePTD); + kernel_pmap->pm_pdpt = (pd_entry_t *) ((uintptr_t)IdlePDPT); + kernel_pmap->pm_pml4 = IdlePML4; + kernel_pmap->pm_cr3 = (uintptr_t)ID_MAP_VTOP(IdlePML4); + + + current_cpu_datap()->cpu_kernel_cr3 = (addr64_t) kernel_pmap->pm_cr3; + + nkpt = NKPT; + OSAddAtomic(NKPT, &inuse_ptepages_count); + + virtual_avail = (vm_offset_t)(VM_MIN_KERNEL_ADDRESS) + (vm_offset_t)first_avail; + virtual_end = (vm_offset_t)(VM_MAX_KERNEL_ADDRESS); + +#if NCOPY_WINDOWS > 0 + /* + * Reserve some special page table entries/VA space for temporary + * mapping of pages. + */ +#define SYSMAP(c, p, v, n) \ + v = (c)va; va += ((n)*INTEL_PGBYTES); + + va = virtual_avail; + + for (i=0; icpu_pmap); + kprintf("mapwindow %p\n", current_cpu_datap()->cpu_pmap->mapwindow); + kprintf("two stuff %p %p\n", + (void *)(current_cpu_datap()->cpu_pmap->mapwindow[i].prv_CMAP), + (void *)(current_cpu_datap()->cpu_pmap->mapwindow[i].prv_CADDR)); +#endif + SYSMAP(caddr_t, + (current_cpu_datap()->cpu_pmap->mapwindow[i].prv_CMAP), + (current_cpu_datap()->cpu_pmap->mapwindow[i].prv_CADDR), + 1); + current_cpu_datap()->cpu_pmap->mapwindow[i].prv_CMAP = + &(current_cpu_datap()->cpu_pmap->mapwindow[i].prv_CMAP_store); + *current_cpu_datap()->cpu_pmap->mapwindow[i].prv_CMAP = 0; + } + + /* DMAP user for debugger */ + SYSMAP(caddr_t, DMAP1, DADDR1, 1); + SYSMAP(caddr_t, DMAP2, DADDR2, 1); /* XXX temporary - can remove */ + + virtual_avail = va; +#endif + + if (PE_parse_boot_argn("npvhash", &npvhash, sizeof (npvhash))) { + if (0 != ((npvhash + 1) & npvhash)) { + kprintf("invalid hash %d, must be ((2^N)-1), " + "using default %d\n", npvhash, NPVHASH); + npvhash = NPVHASH; + } + } else { + npvhash = NPVHASH; + } + + printf("npvhash=%d\n", npvhash); + + simple_lock_init(&kernel_pmap->lock, 0); + simple_lock_init(&pv_hashed_free_list_lock, 0); + simple_lock_init(&pv_hashed_kern_free_list_lock, 0); + simple_lock_init(&pv_hash_table_lock,0); + + pmap_cpu_init(); + + kprintf("Kernel virtual space from 0x%lx to 0x%lx.\n", + (long)KERNEL_BASE, (long)virtual_end); + kprintf("Available physical space from 0x%llx to 0x%llx\n", + avail_start, avail_end); + + /* + * The -no_shared_cr3 boot-arg is a debugging feature (set by default + * in the DEBUG kernel) to force the kernel to switch to its own map + * (and cr3) when control is in kernelspace. The kernel's map does not + * include (i.e. share) userspace so wild references will cause + * a panic. Only copyin and copyout are exempt from this. + */ + (void) PE_parse_boot_argn("-no_shared_cr3", + &no_shared_cr3, sizeof (no_shared_cr3)); + if (no_shared_cr3) + kprintf("Kernel not sharing user map\n"); + +#ifdef PMAP_TRACES + if (PE_parse_boot_argn("-pmap_trace", &pmap_trace, sizeof (pmap_trace))) { + kprintf("Kernel traces for pmap operations enabled\n"); + } +#endif /* PMAP_TRACES */ +} + +void +pmap_virtual_space( + vm_offset_t *startp, + vm_offset_t *endp) +{ + *startp = virtual_avail; + *endp = virtual_end; +} + +/* + * Initialize the pmap module. + * Called by vm_init, to initialize any structures that the pmap + * system needs to map virtual memory. + */ +void +pmap_init(void) +{ + long npages; + vm_offset_t addr; + vm_size_t s; + vm_map_offset_t vaddr; + ppnum_t ppn; + + + kernel_pmap->pm_obj_pml4 = &kpml4obj_object_store; + _vm_object_allocate((vm_object_size_t)NPML4PGS, &kpml4obj_object_store); + + kernel_pmap->pm_obj_pdpt = &kpdptobj_object_store; + _vm_object_allocate((vm_object_size_t)NPDPTPGS, &kpdptobj_object_store); + + kernel_pmap->pm_obj = &kptobj_object_store; + _vm_object_allocate((vm_object_size_t)NPDEPGS, &kptobj_object_store); + + /* + * Allocate memory for the pv_head_table and its lock bits, + * the modify bit array, and the pte_page table. + */ + + /* + * zero bias all these arrays now instead of off avail_start + * so we cover all memory + */ + + npages = i386_btop(avail_end); + s = (vm_size_t) (sizeof(struct pv_rooted_entry) * npages + + (sizeof (struct pv_hashed_entry_t *) * (npvhash+1)) + + pv_lock_table_size(npages) + + pv_hash_lock_table_size((npvhash+1)) + + npages); + + s = round_page(s); + if (kernel_memory_allocate(kernel_map, &addr, s, 0, + KMA_KOBJECT | KMA_PERMANENT) + != KERN_SUCCESS) + panic("pmap_init"); + + memset((char *)addr, 0, s); + +#if PV_DEBUG + if (0 == npvhash) panic("npvhash not initialized"); +#endif + + /* + * Allocate the structures first to preserve word-alignment. + */ + pv_head_table = (pv_rooted_entry_t) addr; + addr = (vm_offset_t) (pv_head_table + npages); + + pv_hash_table = (pv_hashed_entry_t *)addr; + addr = (vm_offset_t) (pv_hash_table + (npvhash + 1)); + + pv_lock_table = (char *) addr; + addr = (vm_offset_t) (pv_lock_table + pv_lock_table_size(npages)); + + pv_hash_lock_table = (char *) addr; + addr = (vm_offset_t) (pv_hash_lock_table + pv_hash_lock_table_size((npvhash+1))); + + pmap_phys_attributes = (char *) addr; + + ppnum_t last_pn = i386_btop(avail_end); + unsigned int i; + pmap_memory_region_t *pmptr = pmap_memory_regions; + for (i = 0; i < pmap_memory_region_count; i++, pmptr++) { + if (pmptr->type != kEfiConventionalMemory) + continue; + unsigned int pn; + for (pn = pmptr->base; pn <= pmptr->end; pn++) { + if (pn < last_pn) { + pmap_phys_attributes[pn] |= PHYS_MANAGED; + if (pn > last_managed_page) + last_managed_page = pn; + } + } + } + + /* + * Create the zone of physical maps, + * and of the physical-to-virtual entries. + */ + s = (vm_size_t) sizeof(struct pmap); + pmap_zone = zinit(s, 400*s, 4096, "pmap"); /* XXX */ + s = (vm_size_t) sizeof(struct pv_hashed_entry); + pv_hashed_list_zone = zinit(s, 10000*s, 4096, "pv_list"); /* XXX */ + s = 63; + pdpt_zone = zinit(s, 400*s, 4096, "pdpt"); /* XXX */ + + + /* create pv entries for kernel pages mapped by low level + startup code. these have to exist so we can pmap_remove() + e.g. kext pages from the middle of our addr space */ + + vaddr = (vm_map_offset_t) VM_MIN_KERNEL_ADDRESS; + for (ppn = 0; ppn < i386_btop(avail_start); ppn++) { + pv_rooted_entry_t pv_e; + + pv_e = pai_to_pvh(ppn); + pv_e->va = vaddr; + vaddr += PAGE_SIZE; + pv_e->pmap = kernel_pmap; + queue_init(&pv_e->qlink); + } + pmap_initialized = TRUE; + + /* + * Initialize pmap cache. + */ + pmap_cache_list = PMAP_NULL; + pmap_cache_count = 0; + simple_lock_init(&pmap_cache_lock, 0); + + max_preemption_latency_tsc = tmrCvt((uint64_t)MAX_PREEMPTION_LATENCY_NS, tscFCvtn2t); + + /* + * Ensure the kernel's PML4 entry exists for the basement + * before this is shared with any user. + */ + pmap_expand_pml4(kernel_pmap, KERNEL_BASEMENT); +} + + +/* + * this function is only used for debugging fron the vm layer + */ +boolean_t +pmap_verify_free( + ppnum_t pn) +{ + pv_rooted_entry_t pv_h; + int pai; + boolean_t result; + + assert(pn != vm_page_fictitious_addr); + + if (!pmap_initialized) + return(TRUE); + + if (pn == vm_page_guard_addr) + return TRUE; + + pai = ppn_to_pai(pn); + if (!IS_MANAGED_PAGE(pai)) + return(FALSE); + pv_h = pai_to_pvh(pn); + result = (pv_h->pmap == PMAP_NULL); + return(result); +} + +boolean_t +pmap_is_empty( + pmap_t pmap, + vm_map_offset_t va_start, + vm_map_offset_t va_end) +{ + vm_map_offset_t offset; + ppnum_t phys_page; + + if (pmap == PMAP_NULL) { + return TRUE; + } + + /* + * Check the resident page count + * - if it's zero, the pmap is completely empty. + * This short-circuit test prevents a virtual address scan which is + * painfully slow for 64-bit spaces. + * This assumes the count is correct + * .. the debug kernel ought to be checking perhaps by page table walk. + */ + if (pmap->stats.resident_count == 0) + return TRUE; + + for (offset = va_start; + offset < va_end; + offset += PAGE_SIZE_64) { + phys_page = pmap_find_phys(pmap, offset); + if (phys_page) { + kprintf("pmap_is_empty(%p,0x%llx,0x%llx): " + "page %d at 0x%llx\n", + pmap, va_start, va_end, phys_page, offset); + return FALSE; + } + } + + return TRUE; +} + + +/* + * Create and return a physical map. + * + * If the size specified for the map + * is zero, the map is an actual physical + * map, and may be referenced by the + * hardware. + * + * If the size specified is non-zero, + * the map will be used in software only, and + * is bounded by that size. + */ +pmap_t +pmap_create( + vm_map_size_t sz, + boolean_t is_64bit) +{ + pmap_t p; + vm_size_t size; + pml4_entry_t *pml4; + pml4_entry_t *kpml4; + + PMAP_TRACE(PMAP_CODE(PMAP__CREATE) | DBG_FUNC_START, + (uint32_t) (sz>>32), (uint32_t) sz, is_64bit, 0, 0); + + size = (vm_size_t) sz; + + /* + * A software use-only map doesn't even need a map. + */ + + if (size != 0) { + return(PMAP_NULL); + } + + p = (pmap_t) zalloc(pmap_zone); + if (PMAP_NULL == p) + panic("pmap_create zalloc"); + + /* init counts now since we'll be bumping some */ + simple_lock_init(&p->lock, 0); + p->stats.resident_count = 0; + p->stats.resident_max = 0; + p->stats.wired_count = 0; + p->ref_count = 1; + p->nx_enabled = 1; + p->pm_shared = FALSE; + + p->pm_task_map = is_64bit ? TASK_MAP_64BIT : TASK_MAP_32BIT;; + + /* alloc the pml4 page in kernel vm */ + if (KERN_SUCCESS != kmem_alloc_kobject(kernel_map, (vm_offset_t *)(&p->pm_pml4), PAGE_SIZE)) + panic("pmap_create kmem_alloc_kobject pml4"); + + memset((char *)p->pm_pml4, 0, PAGE_SIZE); + p->pm_cr3 = (pmap_paddr_t)kvtophys((vm_offset_t)p->pm_pml4); + + OSAddAtomic(1, &inuse_ptepages_count); + + /* allocate the vm_objs to hold the pdpt, pde and pte pages */ + + p->pm_obj_pml4 = vm_object_allocate((vm_object_size_t)(NPML4PGS)); + if (NULL == p->pm_obj_pml4) + panic("pmap_create pdpt obj"); + + p->pm_obj_pdpt = vm_object_allocate((vm_object_size_t)(NPDPTPGS)); + if (NULL == p->pm_obj_pdpt) + panic("pmap_create pdpt obj"); + + p->pm_obj = vm_object_allocate((vm_object_size_t)(NPDEPGS)); + if (NULL == p->pm_obj) + panic("pmap_create pte obj"); + + /* All pmaps share the kennel's pml4 */ + pml4 = pmap64_pml4(p, 0ULL); + kpml4 = kernel_pmap->pm_pml4; + pml4[KERNEL_PML4_INDEX] = kpml4[KERNEL_PML4_INDEX]; + pml4[KERNEL_KEXTS_INDEX] = kpml4[KERNEL_KEXTS_INDEX]; + pml4[KERNEL_PHYSMAP_INDEX] = kpml4[KERNEL_PHYSMAP_INDEX]; + + PMAP_TRACE(PMAP_CODE(PMAP__CREATE) | DBG_FUNC_START, + p, is_64bit, 0, 0, 0); + + return(p); +} + +/* + * Retire the given physical map from service. + * Should only be called if the map contains + * no valid mappings. + */ + +void +pmap_destroy( + register pmap_t p) +{ + register int c; + + if (p == PMAP_NULL) + return; + + PMAP_TRACE(PMAP_CODE(PMAP__DESTROY) | DBG_FUNC_START, + p, 0, 0, 0, 0); + + PMAP_LOCK(p); + + c = --p->ref_count; + + if (c == 0) { + /* + * If some cpu is not using the physical pmap pointer that it + * is supposed to be (see set_dirbase), we might be using the + * pmap that is being destroyed! Make sure we are + * physically on the right pmap: + */ + PMAP_UPDATE_TLBS(p, 0x0ULL, 0xFFFFFFFFFFFFF000ULL); + } + + PMAP_UNLOCK(p); + + if (c != 0) { + PMAP_TRACE(PMAP_CODE(PMAP__DESTROY) | DBG_FUNC_END, + p, 1, 0, 0, 0); + return; /* still in use */ + } + + /* + * Free the memory maps, then the + * pmap structure. + */ + int inuse_ptepages = 0; + + inuse_ptepages++; + kmem_free(kernel_map, (vm_offset_t)p->pm_pml4, PAGE_SIZE); + + inuse_ptepages += p->pm_obj_pml4->resident_page_count; + vm_object_deallocate(p->pm_obj_pml4); + + inuse_ptepages += p->pm_obj_pdpt->resident_page_count; + vm_object_deallocate(p->pm_obj_pdpt); + + inuse_ptepages += p->pm_obj->resident_page_count; + vm_object_deallocate(p->pm_obj); + + OSAddAtomic(-inuse_ptepages, &inuse_ptepages_count); + + zfree(pmap_zone, p); + + PMAP_TRACE(PMAP_CODE(PMAP__DESTROY) | DBG_FUNC_END, + 0, 0, 0, 0, 0); +} + +/* + * Add a reference to the specified pmap. + */ + +void +pmap_reference(pmap_t p) +{ + if (p != PMAP_NULL) { + PMAP_LOCK(p); + p->ref_count++; + PMAP_UNLOCK(p);; + } +} + +/* + * Remove a range of hardware page-table entries. + * The entries given are the first (inclusive) + * and last (exclusive) entries for the VM pages. + * The virtual address is the va for the first pte. + * + * The pmap must be locked. + * If the pmap is not the kernel pmap, the range must lie + * entirely within one pte-page. This is NOT checked. + * Assumes that the pte-page exists. + */ + +void +pmap_remove_range( + pmap_t pmap, + vm_map_offset_t start_vaddr, + pt_entry_t *spte, + pt_entry_t *epte) +{ + pt_entry_t *cpte; + pv_hashed_entry_t pvh_et = PV_HASHED_ENTRY_NULL; + pv_hashed_entry_t pvh_eh = PV_HASHED_ENTRY_NULL; + pv_hashed_entry_t pvh_e; + int pvh_cnt = 0; + int num_removed, num_unwired, num_found; + int pai; + pmap_paddr_t pa; + vm_map_offset_t vaddr; + + num_removed = 0; + num_unwired = 0; + num_found = 0; + + /* invalidate the PTEs first to "freeze" them */ + for (cpte = spte, vaddr = start_vaddr; + cpte < epte; + cpte++, vaddr += PAGE_SIZE_64) { + + pa = pte_to_pa(*cpte); + if (pa == 0) + continue; + num_found++; + + if (iswired(*cpte)) + num_unwired++; + + pai = pa_index(pa); + + if (!IS_MANAGED_PAGE(pai)) { + /* + * Outside range of managed physical memory. + * Just remove the mappings. + */ + pmap_store_pte(cpte, 0); + continue; + } + + /* invalidate the PTE */ + pmap_update_pte(cpte, *cpte, (*cpte & ~INTEL_PTE_VALID)); + } + + if (num_found == 0) { + /* nothing was changed: we're done */ + goto update_counts; + } + + /* propagate the invalidates to other CPUs */ + + PMAP_UPDATE_TLBS(pmap, start_vaddr, vaddr); + + for (cpte = spte, vaddr = start_vaddr; + cpte < epte; + cpte++, vaddr += PAGE_SIZE_64) { + + pa = pte_to_pa(*cpte); + if (pa == 0) + continue; + + pai = pa_index(pa); + + LOCK_PVH(pai); + + pa = pte_to_pa(*cpte); + if (pa == 0) { + UNLOCK_PVH(pai); + continue; + } + num_removed++; + + /* + * Get the modify and reference bits, then + * nuke the entry in the page table + */ + /* remember reference and change */ + pmap_phys_attributes[pai] |= + (char) (*cpte & (PHYS_MODIFIED | PHYS_REFERENCED)); + /* completely invalidate the PTE */ + pmap_store_pte(cpte, 0); + + /* + * Remove the mapping from the pvlist for this physical page. + */ + pvh_e = pmap_pv_remove(pmap, vaddr, (ppnum_t) pai); + + UNLOCK_PVH(pai); + + if (pvh_e != PV_HASHED_ENTRY_NULL) { + pvh_e->qlink.next = (queue_entry_t) pvh_eh; + pvh_eh = pvh_e; + + if (pvh_et == PV_HASHED_ENTRY_NULL) { + pvh_et = pvh_e; + } + pvh_cnt++; + } + } /* for loop */ + + if (pvh_eh != PV_HASHED_ENTRY_NULL) { + PV_HASHED_FREE_LIST(pvh_eh, pvh_et, pvh_cnt); + } +update_counts: + /* + * Update the counts + */ +#if TESTING + if (pmap->stats.resident_count < num_removed) + panic("pmap_remove_range: resident_count"); +#endif + assert(pmap->stats.resident_count >= num_removed); + OSAddAtomic(-num_removed, &pmap->stats.resident_count); + +#if TESTING + if (pmap->stats.wired_count < num_unwired) + panic("pmap_remove_range: wired_count"); +#endif + assert(pmap->stats.wired_count >= num_unwired); + OSAddAtomic(-num_unwired, &pmap->stats.wired_count); + + return; +} + +/* + * Remove phys addr if mapped in specified map + * + */ +void +pmap_remove_some_phys( + __unused pmap_t map, + __unused ppnum_t pn) +{ + +/* Implement to support working set code */ + +} + +/* + * Remove the given range of addresses + * from the specified map. + * + * It is assumed that the start and end are properly + * rounded to the hardware page size. + */ +void +pmap_remove( + pmap_t map, + addr64_t s64, + addr64_t e64) +{ + pt_entry_t *pde; + pt_entry_t *spte, *epte; + addr64_t l64; + uint64_t deadline; + + pmap_intr_assert(); + + if (map == PMAP_NULL || s64 == e64) + return; + + PMAP_TRACE(PMAP_CODE(PMAP__REMOVE) | DBG_FUNC_START, + map, + (uint32_t) (s64 >> 32), s64, + (uint32_t) (e64 >> 32), e64); + + + PMAP_LOCK(map); + +#if 0 + /* + * Check that address range in the kernel does not overlap the stacks. + * We initialize local static min/max variables once to avoid making + * 2 function calls for every remove. Note also that these functions + * both return 0 before kernel stacks have been initialized, and hence + * the panic is not triggered in this case. + */ + if (map == kernel_pmap) { + static vm_offset_t kernel_stack_min = 0; + static vm_offset_t kernel_stack_max = 0; + + if (kernel_stack_min == 0) { + kernel_stack_min = min_valid_stack_address(); + kernel_stack_max = max_valid_stack_address(); + } + if ((kernel_stack_min <= s64 && s64 < kernel_stack_max) || + (kernel_stack_min < e64 && e64 <= kernel_stack_max)) + panic("pmap_remove() attempted in kernel stack"); + } +#else + + /* + * The values of kernel_stack_min and kernel_stack_max are no longer + * relevant now that we allocate kernel stacks in the kernel map, + * so the old code above no longer applies. If we wanted to check that + * we weren't removing a mapping of a page in a kernel stack we'd + * mark the PTE with an unused bit and check that here. + */ + +#endif + + deadline = rdtsc64() + max_preemption_latency_tsc; + + while (s64 < e64) { + l64 = (s64 + pde_mapped_size) & ~(pde_mapped_size - 1); + if (l64 > e64) + l64 = e64; + pde = pmap_pde(map, s64); + + if (pde && (*pde & INTEL_PTE_VALID)) { + if (*pde & INTEL_PTE_PS) { + /* + * If we're removing a superpage, pmap_remove_range() + * must work on level 2 instead of level 1; and we're + * only passing a single level 2 entry instead of a + * level 1 range. + */ + spte = pde; + epte = spte+1; /* excluded */ + } else { + spte = pmap_pte(map, (s64 & ~(pde_mapped_size - 1))); + spte = &spte[ptenum(s64)]; + epte = &spte[intel_btop(l64 - s64)]; + } + pmap_remove_range(map, s64, spte, epte); + } + s64 = l64; + pde++; + + if (s64 < e64 && rdtsc64() >= deadline) { + PMAP_UNLOCK(map) + PMAP_LOCK(map) + deadline = rdtsc64() + max_preemption_latency_tsc; + } + } + + PMAP_UNLOCK(map); + + PMAP_TRACE(PMAP_CODE(PMAP__REMOVE) | DBG_FUNC_END, + map, 0, 0, 0, 0); + +} + +/* + * Routine: pmap_page_protect + * + * Function: + * Lower the permission for all mappings to a given + * page. + */ +void +pmap_page_protect( + ppnum_t pn, + vm_prot_t prot) +{ + pv_hashed_entry_t pvh_eh = PV_HASHED_ENTRY_NULL; + pv_hashed_entry_t pvh_et = PV_HASHED_ENTRY_NULL; + pv_hashed_entry_t nexth; + int pvh_cnt = 0; + pv_rooted_entry_t pv_h; + pv_rooted_entry_t pv_e; + pv_hashed_entry_t pvh_e; + pt_entry_t *pte; + int pai; + pmap_t pmap; + boolean_t remove; + + pmap_intr_assert(); + assert(pn != vm_page_fictitious_addr); + if (pn == vm_page_guard_addr) + return; + + pai = ppn_to_pai(pn); + + if (!IS_MANAGED_PAGE(pai)) { + /* + * Not a managed page. + */ + return; + } + PMAP_TRACE(PMAP_CODE(PMAP__PAGE_PROTECT) | DBG_FUNC_START, + pn, prot, 0, 0, 0); + + /* + * Determine the new protection. + */ + switch (prot) { + case VM_PROT_READ: + case VM_PROT_READ | VM_PROT_EXECUTE: + remove = FALSE; + break; + case VM_PROT_ALL: + return; /* nothing to do */ + default: + remove = TRUE; + break; + } + + pv_h = pai_to_pvh(pai); + + LOCK_PVH(pai); + + + /* + * Walk down PV list, if any, changing or removing all mappings. + */ + if (pv_h->pmap == PMAP_NULL) + goto done; + + pv_e = pv_h; + pvh_e = (pv_hashed_entry_t) pv_e; /* cheat */ + + do { + vm_map_offset_t vaddr; + + pmap = pv_e->pmap; + vaddr = pv_e->va; + pte = pmap_pte(pmap, vaddr); + if (0 == pte) { + panic("pmap_page_protect() " + "pmap=%p pn=0x%x vaddr=0x%llx\n", + pmap, pn, vaddr); + } + nexth = (pv_hashed_entry_t) queue_next(&pvh_e->qlink); + + /* + * Remove the mapping if new protection is NONE + * or if write-protecting a kernel mapping. + */ + if (remove || pmap == kernel_pmap) { + /* + * Remove the mapping, collecting dirty bits. + */ + pmap_update_pte(pte, *pte, *pte & ~INTEL_PTE_VALID); + PMAP_UPDATE_TLBS(pmap, vaddr, vaddr+PAGE_SIZE); + pmap_phys_attributes[pai] |= + *pte & (PHYS_MODIFIED|PHYS_REFERENCED); + pmap_store_pte(pte, 0); + +#if TESTING + if (pmap->stats.resident_count < 1) + panic("pmap_page_protect: resident_count"); +#endif + assert(pmap->stats.resident_count >= 1); + OSAddAtomic(-1, &pmap->stats.resident_count); + + /* + * Deal with the pv_rooted_entry. + */ + + if (pv_e == pv_h) { + /* + * Fix up head later. + */ + pv_h->pmap = PMAP_NULL; + } else { + /* + * Delete this entry. + */ + pv_hash_remove(pvh_e); + pvh_e->qlink.next = (queue_entry_t) pvh_eh; + pvh_eh = pvh_e; + + if (pvh_et == PV_HASHED_ENTRY_NULL) + pvh_et = pvh_e; + pvh_cnt++; + } + } else { + /* + * Write-protect. + */ + pmap_update_pte(pte, *pte, *pte & ~INTEL_PTE_WRITE); + PMAP_UPDATE_TLBS(pmap, vaddr, vaddr+PAGE_SIZE); + } + pvh_e = nexth; + } while ((pv_e = (pv_rooted_entry_t) nexth) != pv_h); + + + /* + * If pv_head mapping was removed, fix it up. + */ + if (pv_h->pmap == PMAP_NULL) { + pvh_e = (pv_hashed_entry_t) queue_next(&pv_h->qlink); + + if (pvh_e != (pv_hashed_entry_t) pv_h) { + pv_hash_remove(pvh_e); + pv_h->pmap = pvh_e->pmap; + pv_h->va = pvh_e->va; + pvh_e->qlink.next = (queue_entry_t) pvh_eh; + pvh_eh = pvh_e; + + if (pvh_et == PV_HASHED_ENTRY_NULL) + pvh_et = pvh_e; + pvh_cnt++; + } + } + if (pvh_eh != PV_HASHED_ENTRY_NULL) { + PV_HASHED_FREE_LIST(pvh_eh, pvh_et, pvh_cnt); + } +done: + UNLOCK_PVH(pai); + + PMAP_TRACE(PMAP_CODE(PMAP__PAGE_PROTECT) | DBG_FUNC_END, + 0, 0, 0, 0, 0); +} + + +/* + * Routine: + * pmap_disconnect + * + * Function: + * Disconnect all mappings for this page and return reference and change status + * in generic format. + * + */ +unsigned int pmap_disconnect( + ppnum_t pa) +{ + pmap_page_protect(pa, 0); /* disconnect the page */ + return (pmap_get_refmod(pa)); /* return ref/chg status */ +} + +/* + * Set the physical protection on the + * specified range of this map as requested. + * Will not increase permissions. + */ +void +pmap_protect( + pmap_t map, + vm_map_offset_t sva, + vm_map_offset_t eva, + vm_prot_t prot) +{ + pt_entry_t *pde; + pt_entry_t *spte, *epte; + vm_map_offset_t lva; + vm_map_offset_t orig_sva; + boolean_t set_NX; + int num_found = 0; + + pmap_intr_assert(); + + if (map == PMAP_NULL) + return; + + if (prot == VM_PROT_NONE) { + pmap_remove(map, sva, eva); + return; + } + PMAP_TRACE(PMAP_CODE(PMAP__PROTECT) | DBG_FUNC_START, + map, + (uint32_t) (sva >> 32), (uint32_t) sva, + (uint32_t) (eva >> 32), (uint32_t) eva); + + if ((prot & VM_PROT_EXECUTE) || !nx_enabled || !map->nx_enabled) + set_NX = FALSE; + else + set_NX = TRUE; + + PMAP_LOCK(map); + + orig_sva = sva; + while (sva < eva) { + lva = (sva + pde_mapped_size) & ~(pde_mapped_size - 1); + if (lva > eva) + lva = eva; + pde = pmap_pde(map, sva); + if (pde && (*pde & INTEL_PTE_VALID)) { + if (*pde & INTEL_PTE_PS) { + /* superpage */ + spte = pde; + epte = spte+1; /* excluded */ + } else { + spte = pmap_pte(map, (sva & ~(pde_mapped_size - 1))); + spte = &spte[ptenum(sva)]; + epte = &spte[intel_btop(lva - sva)]; + } + + for (; spte < epte; spte++) { + if (!(*spte & INTEL_PTE_VALID)) + continue; + + if (prot & VM_PROT_WRITE) + pmap_update_pte(spte, *spte, + *spte | INTEL_PTE_WRITE); + else + pmap_update_pte(spte, *spte, + *spte & ~INTEL_PTE_WRITE); + + if (set_NX) + pmap_update_pte(spte, *spte, + *spte | INTEL_PTE_NX); + else + pmap_update_pte(spte, *spte, + *spte & ~INTEL_PTE_NX); + + num_found++; + } + } + sva = lva; + } + if (num_found) + PMAP_UPDATE_TLBS(map, orig_sva, eva); + + PMAP_UNLOCK(map); + + PMAP_TRACE(PMAP_CODE(PMAP__PROTECT) | DBG_FUNC_END, + 0, 0, 0, 0, 0); + +} + +/* Map a (possibly) autogenned block */ +void +pmap_map_block( + pmap_t pmap, + addr64_t va, + ppnum_t pa, + uint32_t size, + vm_prot_t prot, + int attr, + __unused unsigned int flags) +{ + uint32_t page; + int cur_page_size; + + if (attr & VM_MEM_SUPERPAGE) + cur_page_size = SUPERPAGE_SIZE; + else + cur_page_size = PAGE_SIZE; + + for (page = 0; page < size; page+=cur_page_size/PAGE_SIZE) { + pmap_enter(pmap, va, pa, prot, attr, TRUE); + va += cur_page_size; + pa+=cur_page_size/PAGE_SIZE; + } +} + + +/* + * Insert the given physical page (p) at + * the specified virtual address (v) in the + * target physical map with the protection requested. + * + * If specified, the page will be wired down, meaning + * that the related pte cannot be reclaimed. + * + * NB: This is the only routine which MAY NOT lazy-evaluate + * or lose information. That is, this routine must actually + * insert this page into the given map NOW. + */ +void +pmap_enter( + register pmap_t pmap, + vm_map_offset_t vaddr, + ppnum_t pn, + vm_prot_t prot, + unsigned int flags, + boolean_t wired) +{ + pt_entry_t *pte; + pv_rooted_entry_t pv_h; + int pai; + pv_hashed_entry_t pvh_e; + pv_hashed_entry_t pvh_new; + pt_entry_t template; + pmap_paddr_t old_pa; + pmap_paddr_t pa = (pmap_paddr_t) i386_ptob(pn); + boolean_t need_tlbflush = FALSE; + boolean_t set_NX; + char oattr; + boolean_t old_pa_locked; + boolean_t superpage = flags & VM_MEM_SUPERPAGE; + vm_object_t delpage_pm_obj = NULL; + int delpage_pde_index = 0; + + + pmap_intr_assert(); + assert(pn != vm_page_fictitious_addr); + if (pmap_debug) + kprintf("pmap_enter(%p,%llu,%u)\n", pmap, vaddr, pn); + if (pmap == PMAP_NULL) + return; + if (pn == vm_page_guard_addr) + return; + + PMAP_TRACE(PMAP_CODE(PMAP__ENTER) | DBG_FUNC_START, + pmap, + (uint32_t) (vaddr >> 32), (uint32_t) vaddr, + pn, prot); + + if ((prot & VM_PROT_EXECUTE) || !nx_enabled || !pmap->nx_enabled) + set_NX = FALSE; + else + set_NX = TRUE; + + /* + * Must allocate a new pvlist entry while we're unlocked; + * zalloc may cause pageout (which will lock the pmap system). + * If we determine we need a pvlist entry, we will unlock + * and allocate one. Then we will retry, throughing away + * the allocated entry later (if we no longer need it). + */ + + pvh_new = PV_HASHED_ENTRY_NULL; +Retry: + pvh_e = PV_HASHED_ENTRY_NULL; + + PMAP_LOCK(pmap); + + /* + * Expand pmap to include this pte. Assume that + * pmap is always expanded to include enough hardware + * pages to map one VM page. + */ + if(superpage) { + while ((pte = pmap64_pde(pmap, vaddr)) == PD_ENTRY_NULL) { + /* need room for another pde entry */ + PMAP_UNLOCK(pmap); + pmap_expand_pdpt(pmap, vaddr); + PMAP_LOCK(pmap); + } + } else { + while ((pte = pmap_pte(pmap, vaddr)) == PT_ENTRY_NULL) { + /* + * Must unlock to expand the pmap + * going to grow pde level page(s) + */ + PMAP_UNLOCK(pmap); + pmap_expand(pmap, vaddr); + PMAP_LOCK(pmap); + } + } + + if (superpage && *pte && !(*pte & INTEL_PTE_PS)) { + /* + * There is still an empty page table mapped that + * was used for a previous base page mapping. + * Remember the PDE and the PDE index, so that we + * can free the page at the end of this function. + */ + delpage_pde_index = (int)pdeidx(pmap, vaddr); + delpage_pm_obj = pmap->pm_obj; + *pte = 0; + } + + old_pa = pte_to_pa(*pte); + pai = pa_index(old_pa); + old_pa_locked = FALSE; + + /* + * if we have a previous managed page, lock the pv entry now. after + * we lock it, check to see if someone beat us to the lock and if so + * drop the lock + */ + if ((0 != old_pa) && IS_MANAGED_PAGE(pai)) { + LOCK_PVH(pai); + old_pa_locked = TRUE; + old_pa = pte_to_pa(*pte); + if (0 == old_pa) { + UNLOCK_PVH(pai); /* another path beat us to it */ + old_pa_locked = FALSE; + } + } + + /* + * Special case if the incoming physical page is already mapped + * at this address. + */ + if (old_pa == pa) { + + /* + * May be changing its wired attribute or protection + */ + + template = pa_to_pte(pa) | INTEL_PTE_VALID; + + if (VM_MEM_NOT_CACHEABLE == + (flags & (VM_MEM_NOT_CACHEABLE | VM_WIMG_USE_DEFAULT))) { + if (!(flags & VM_MEM_GUARDED)) + template |= INTEL_PTE_PTA; + template |= INTEL_PTE_NCACHE; + } + if (pmap != kernel_pmap) + template |= INTEL_PTE_USER; + if (prot & VM_PROT_WRITE) + template |= INTEL_PTE_WRITE; + + if (set_NX) + template |= INTEL_PTE_NX; + + if (wired) { + template |= INTEL_PTE_WIRED; + if (!iswired(*pte)) + OSAddAtomic(+1, + &pmap->stats.wired_count); + } else { + if (iswired(*pte)) { + assert(pmap->stats.wired_count >= 1); + OSAddAtomic(-1, + &pmap->stats.wired_count); + } + } + if (superpage) /* this path can not be used */ + template |= INTEL_PTE_PS; /* to change the page size! */ + + /* store modified PTE and preserve RC bits */ + pmap_update_pte(pte, *pte, + template | (*pte & (INTEL_PTE_REF | INTEL_PTE_MOD))); + if (old_pa_locked) { + UNLOCK_PVH(pai); + old_pa_locked = FALSE; + } + need_tlbflush = TRUE; + goto Done; + } + + /* + * Outline of code from here: + * 1) If va was mapped, update TLBs, remove the mapping + * and remove old pvlist entry. + * 2) Add pvlist entry for new mapping + * 3) Enter new mapping. + * + * If the old physical page is not managed step 1) is skipped + * (except for updating the TLBs), and the mapping is + * overwritten at step 3). If the new physical page is not + * managed, step 2) is skipped. + */ + + if (old_pa != (pmap_paddr_t) 0) { + + /* + * Don't do anything to pages outside valid memory here. + * Instead convince the code that enters a new mapping + * to overwrite the old one. + */ + + /* invalidate the PTE */ + pmap_update_pte(pte, *pte, (*pte & ~INTEL_PTE_VALID)); + /* propagate invalidate everywhere */ + PMAP_UPDATE_TLBS(pmap, vaddr, vaddr + PAGE_SIZE); + /* remember reference and change */ + oattr = (char) (*pte & (PHYS_MODIFIED | PHYS_REFERENCED)); + /* completely invalidate the PTE */ + pmap_store_pte(pte, 0); + + if (IS_MANAGED_PAGE(pai)) { +#if TESTING + if (pmap->stats.resident_count < 1) + panic("pmap_enter: resident_count"); +#endif + assert(pmap->stats.resident_count >= 1); + OSAddAtomic(-1, + &pmap->stats.resident_count); + + if (iswired(*pte)) { +#if TESTING + if (pmap->stats.wired_count < 1) + panic("pmap_enter: wired_count"); +#endif + assert(pmap->stats.wired_count >= 1); + OSAddAtomic(-1, + &pmap->stats.wired_count); + } + pmap_phys_attributes[pai] |= oattr; + + /* + * Remove the mapping from the pvlist for + * this physical page. + * We'll end up with either a rooted pv or a + * hashed pv + */ + pvh_e = pmap_pv_remove(pmap, vaddr, (ppnum_t) pai); + + } else { + + /* + * old_pa is not managed. + * Do removal part of accounting. + */ + + if (iswired(*pte)) { + assert(pmap->stats.wired_count >= 1); + OSAddAtomic(-1, + &pmap->stats.wired_count); + } + } + } + + /* + * if we had a previously managed paged locked, unlock it now + */ + if (old_pa_locked) { + UNLOCK_PVH(pai); + old_pa_locked = FALSE; + } + + pai = pa_index(pa); /* now working with new incoming phys page */ + if (IS_MANAGED_PAGE(pai)) { + + /* + * Step 2) Enter the mapping in the PV list for this + * physical page. + */ + pv_h = pai_to_pvh(pai); + + LOCK_PVH(pai); + + if (pv_h->pmap == PMAP_NULL) { + /* + * No mappings yet, use rooted pv + */ + pv_h->va = vaddr; + pv_h->pmap = pmap; + queue_init(&pv_h->qlink); + } else { + /* + * Add new pv_hashed_entry after header. + */ + if ((PV_HASHED_ENTRY_NULL == pvh_e) && pvh_new) { + pvh_e = pvh_new; + pvh_new = PV_HASHED_ENTRY_NULL; + } else if (PV_HASHED_ENTRY_NULL == pvh_e) { + PV_HASHED_ALLOC(pvh_e); + if (PV_HASHED_ENTRY_NULL == pvh_e) { + /* + * the pv list is empty. if we are on + * the kernel pmap we'll use one of + * the special private kernel pv_e's, + * else, we need to unlock + * everything, zalloc a pv_e, and + * restart bringing in the pv_e with + * us. + */ + if (kernel_pmap == pmap) { + PV_HASHED_KERN_ALLOC(pvh_e); + } else { + UNLOCK_PVH(pai); + PMAP_UNLOCK(pmap); + pvh_new = (pv_hashed_entry_t) zalloc(pv_hashed_list_zone); + goto Retry; + } + } + } + if (PV_HASHED_ENTRY_NULL == pvh_e) + panic("pvh_e exhaustion"); + + pvh_e->va = vaddr; + pvh_e->pmap = pmap; + pvh_e->ppn = pn; + pv_hash_add(pvh_e, pv_h); + + /* + * Remember that we used the pvlist entry. + */ + pvh_e = PV_HASHED_ENTRY_NULL; + } + + /* + * only count the mapping + * for 'managed memory' + */ + OSAddAtomic(+1, & pmap->stats.resident_count); + if (pmap->stats.resident_count > pmap->stats.resident_max) { + pmap->stats.resident_max = pmap->stats.resident_count; + } + } + /* + * Step 3) Enter the mapping. + * + * Build a template to speed up entering - + * only the pfn changes. + */ + template = pa_to_pte(pa) | INTEL_PTE_VALID; + + if (flags & VM_MEM_NOT_CACHEABLE) { + if (!(flags & VM_MEM_GUARDED)) + template |= INTEL_PTE_PTA; + template |= INTEL_PTE_NCACHE; + } + if (pmap != kernel_pmap) + template |= INTEL_PTE_USER; + if (prot & VM_PROT_WRITE) + template |= INTEL_PTE_WRITE; + if (set_NX) + template |= INTEL_PTE_NX; + if (wired) { + template |= INTEL_PTE_WIRED; + OSAddAtomic(+1, & pmap->stats.wired_count); + } + if (superpage) + template |= INTEL_PTE_PS; + pmap_store_pte(pte, template); + + /* + * if this was a managed page we delayed unlocking the pv until here + * to prevent pmap_page_protect et al from finding it until the pte + * has been stored + */ + if (IS_MANAGED_PAGE(pai)) { + UNLOCK_PVH(pai); + } +Done: + if (need_tlbflush == TRUE) + PMAP_UPDATE_TLBS(pmap, vaddr, vaddr + PAGE_SIZE); + + if (pvh_e != PV_HASHED_ENTRY_NULL) { + PV_HASHED_FREE_LIST(pvh_e, pvh_e, 1); + } + if (pvh_new != PV_HASHED_ENTRY_NULL) { + PV_HASHED_KERN_FREE_LIST(pvh_new, pvh_new, 1); + } + PMAP_UNLOCK(pmap); + + if (delpage_pm_obj) { + vm_page_t m; + + vm_object_lock(delpage_pm_obj); + m = vm_page_lookup(delpage_pm_obj, delpage_pde_index); + if (m == VM_PAGE_NULL) + panic("pmap_enter: pte page not in object"); + VM_PAGE_FREE(m); + OSAddAtomic(-1, &inuse_ptepages_count); + vm_object_unlock(delpage_pm_obj); + } + + PMAP_TRACE(PMAP_CODE(PMAP__ENTER) | DBG_FUNC_END, 0, 0, 0, 0, 0); +} + +/* + * Routine: pmap_change_wiring + * Function: Change the wiring attribute for a map/virtual-address + * pair. + * In/out conditions: + * The mapping must already exist in the pmap. + */ +void +pmap_change_wiring( + pmap_t map, + vm_map_offset_t vaddr, + boolean_t wired) +{ + pt_entry_t *pte; + + PMAP_LOCK(map); + + if ((pte = pmap_pte(map, vaddr)) == PT_ENTRY_NULL) + panic("pmap_change_wiring: pte missing"); + + if (wired && !iswired(*pte)) { + /* + * wiring down mapping + */ + OSAddAtomic(+1, &map->stats.wired_count); + pmap_update_pte(pte, *pte, (*pte | INTEL_PTE_WIRED)); + } + else if (!wired && iswired(*pte)) { + /* + * unwiring mapping + */ + assert(map->stats.wired_count >= 1); + OSAddAtomic(-1, &map->stats.wired_count); + pmap_update_pte(pte, *pte, (*pte & ~INTEL_PTE_WIRED)); + } + + PMAP_UNLOCK(map); +} + +void +pmap_expand_pml4( + pmap_t map, + vm_map_offset_t vaddr) +{ + vm_page_t m; + pmap_paddr_t pa; + uint64_t i; + ppnum_t pn; + pml4_entry_t *pml4p; + + DBG("pmap_expand_pml4(%p,%p)\n", map, (void *)vaddr); + + /* + * Allocate a VM page for the pml4 page + */ + while ((m = vm_page_grab()) == VM_PAGE_NULL) + VM_PAGE_WAIT(); + + /* + * put the page into the pmap's obj list so it + * can be found later. + */ + pn = m->phys_page; + pa = i386_ptob(pn); + i = pml4idx(map, vaddr); + + /* + * Zero the page. + */ + pmap_zero_page(pn); + + vm_page_lockspin_queues(); + vm_page_wire(m); + vm_page_unlock_queues(); + + OSAddAtomic(1, &inuse_ptepages_count); + + /* Take the oject lock (mutex) before the PMAP_LOCK (spinlock) */ + vm_object_lock(map->pm_obj_pml4); + + PMAP_LOCK(map); + /* + * See if someone else expanded us first + */ + if (pmap64_pdpt(map, vaddr) != PDPT_ENTRY_NULL) { + PMAP_UNLOCK(map); + vm_object_unlock(map->pm_obj_pml4); + + VM_PAGE_FREE(m); + + OSAddAtomic(-1, &inuse_ptepages_count); + return; + } + +#if 0 /* DEBUG */ + if (0 != vm_page_lookup(map->pm_obj_pml4, (vm_object_offset_t)i)) { + panic("pmap_expand_pml4: obj not empty, pmap %p pm_obj %p vaddr 0x%llx i 0x%llx\n", + map, map->pm_obj_pml4, vaddr, i); + } +#endif + vm_page_insert(m, map->pm_obj_pml4, (vm_object_offset_t)i); + vm_object_unlock(map->pm_obj_pml4); + + /* + * Set the page directory entry for this page table. + */ + pml4p = pmap64_pml4(map, vaddr); /* refetch under lock */ + + pmap_store_pte(pml4p, pa_to_pte(pa) + | INTEL_PTE_VALID + | INTEL_PTE_USER + | INTEL_PTE_WRITE); + + PMAP_UNLOCK(map); + + return; +} + +void +pmap_expand_pdpt( + pmap_t map, + vm_map_offset_t vaddr) +{ + vm_page_t m; + pmap_paddr_t pa; + uint64_t i; + ppnum_t pn; + pdpt_entry_t *pdptp; + + DBG("pmap_expand_pdpt(%p,%p)\n", map, (void *)vaddr); + + while ((pdptp = pmap64_pdpt(map, vaddr)) == PDPT_ENTRY_NULL) { + pmap_expand_pml4(map, vaddr); + } + + /* + * Allocate a VM page for the pdpt page + */ + while ((m = vm_page_grab()) == VM_PAGE_NULL) + VM_PAGE_WAIT(); + + /* + * put the page into the pmap's obj list so it + * can be found later. + */ + pn = m->phys_page; + pa = i386_ptob(pn); + i = pdptidx(map, vaddr); + + /* + * Zero the page. + */ + pmap_zero_page(pn); + + vm_page_lockspin_queues(); + vm_page_wire(m); + vm_page_unlock_queues(); + + OSAddAtomic(1, &inuse_ptepages_count); + + /* Take the oject lock (mutex) before the PMAP_LOCK (spinlock) */ + vm_object_lock(map->pm_obj_pdpt); + + PMAP_LOCK(map); + /* + * See if someone else expanded us first + */ + if (pmap64_pde(map, vaddr) != PD_ENTRY_NULL) { + PMAP_UNLOCK(map); + vm_object_unlock(map->pm_obj_pdpt); + + VM_PAGE_FREE(m); + + OSAddAtomic(-1, &inuse_ptepages_count); + return; + } + +#if 0 /* DEBUG */ + if (0 != vm_page_lookup(map->pm_obj_pdpt, (vm_object_offset_t)i)) { + panic("pmap_expand_pdpt: obj not empty, pmap %p pm_obj %p vaddr 0x%llx i 0x%llx\n", + map, map->pm_obj_pdpt, vaddr, i); + } +#endif + vm_page_insert(m, map->pm_obj_pdpt, (vm_object_offset_t)i); + vm_object_unlock(map->pm_obj_pdpt); + + /* + * Set the page directory entry for this page table. + */ + pdptp = pmap64_pdpt(map, vaddr); /* refetch under lock */ + + pmap_store_pte(pdptp, pa_to_pte(pa) + | INTEL_PTE_VALID + | INTEL_PTE_USER + | INTEL_PTE_WRITE); + + PMAP_UNLOCK(map); + + return; + +} + + + +/* + * Routine: pmap_expand + * + * Expands a pmap to be able to map the specified virtual address. + * + * Allocates new virtual memory for the P0 or P1 portion of the + * pmap, then re-maps the physical pages that were in the old + * pmap to be in the new pmap. + * + * Must be called with the pmap system and the pmap unlocked, + * since these must be unlocked to use vm_allocate or vm_deallocate. + * Thus it must be called in a loop that checks whether the map + * has been expanded enough. + * (We won't loop forever, since page tables aren't shrunk.) + */ +void +pmap_expand( + pmap_t map, + vm_map_offset_t vaddr) +{ + pt_entry_t *pdp; + register vm_page_t m; + register pmap_paddr_t pa; + uint64_t i; + ppnum_t pn; + + + /* + * For the kernel, the virtual address must be in or above the basement + * which is for kexts and is in the 512GB immediately below the kernel.. + * XXX - should use VM_MIN_KERNEL_AND_KEXT_ADDRESS not KERNEL_BASEMENT + */ + if (map == kernel_pmap && + !(vaddr >= KERNEL_BASEMENT && vaddr <= VM_MAX_KERNEL_ADDRESS)) + panic("pmap_expand: bad vaddr 0x%llx for kernel pmap", vaddr); + + + while ((pdp = pmap64_pde(map, vaddr)) == PD_ENTRY_NULL) { + /* need room for another pde entry */ + pmap_expand_pdpt(map, vaddr); + } + + /* + * Allocate a VM page for the pde entries. + */ + while ((m = vm_page_grab()) == VM_PAGE_NULL) + VM_PAGE_WAIT(); + + /* + * put the page into the pmap's obj list so it + * can be found later. + */ + pn = m->phys_page; + pa = i386_ptob(pn); + i = pdeidx(map, vaddr); + + /* + * Zero the page. + */ + pmap_zero_page(pn); + + vm_page_lockspin_queues(); + vm_page_wire(m); + vm_page_unlock_queues(); + + OSAddAtomic(1, &inuse_ptepages_count); + + /* Take the oject lock (mutex) before the PMAP_LOCK (spinlock) */ + vm_object_lock(map->pm_obj); + + PMAP_LOCK(map); + + /* + * See if someone else expanded us first + */ + if (pmap_pte(map, vaddr) != PT_ENTRY_NULL) { + PMAP_UNLOCK(map); + vm_object_unlock(map->pm_obj); + + VM_PAGE_FREE(m); + + OSAddAtomic(-1, &inuse_ptepages_count); + return; + } + +#if 0 /* DEBUG */ + if (0 != vm_page_lookup(map->pm_obj, (vm_object_offset_t)i)) { + panic("pmap_expand: obj not empty, pmap 0x%x pm_obj 0x%x vaddr 0x%llx i 0x%llx\n", + map, map->pm_obj, vaddr, i); + } +#endif + vm_page_insert(m, map->pm_obj, (vm_object_offset_t)i); + vm_object_unlock(map->pm_obj); + + /* + * Set the page directory entry for this page table. + */ + pdp = pmap_pde(map, vaddr); + pmap_store_pte(pdp, pa_to_pte(pa) + | INTEL_PTE_VALID + | INTEL_PTE_USER + | INTEL_PTE_WRITE); + + PMAP_UNLOCK(map); + + return; +} + +/* On K64 machines with more than 32GB of memory, pmap_steal_memory + * will allocate past the 1GB of pre-expanded virtual kernel area. This + * function allocates all the page tables using memory from the same pool + * that pmap_steal_memory uses, rather than calling vm_page_grab (which + * isn't available yet). */ +void +pmap_pre_expand(pmap_t pmap, vm_map_offset_t vaddr) { + ppnum_t pn; + pt_entry_t *pte; + + PMAP_LOCK(pmap); + + if(pmap64_pdpt(pmap, vaddr) == PDPT_ENTRY_NULL) { + if (!pmap_next_page_k64(&pn)) + panic("pmap_pre_expand"); + + pmap_zero_page(pn); + + pte = pmap64_pml4(pmap, vaddr); + + pmap_store_pte(pte, pa_to_pte(i386_ptob(pn)) + | INTEL_PTE_VALID + | INTEL_PTE_USER + | INTEL_PTE_WRITE); + } + + if(pmap64_pde(pmap, vaddr) == PD_ENTRY_NULL) { + if (!pmap_next_page_k64(&pn)) + panic("pmap_pre_expand"); + + pmap_zero_page(pn); + + pte = pmap64_pdpt(pmap, vaddr); + + pmap_store_pte(pte, pa_to_pte(i386_ptob(pn)) + | INTEL_PTE_VALID + | INTEL_PTE_USER + | INTEL_PTE_WRITE); + } + + if(pmap_pte(pmap, vaddr) == PT_ENTRY_NULL) { + if (!pmap_next_page_k64(&pn)) + panic("pmap_pre_expand"); + + pmap_zero_page(pn); + + pte = pmap64_pde(pmap, vaddr); + + pmap_store_pte(pte, pa_to_pte(i386_ptob(pn)) + | INTEL_PTE_VALID + | INTEL_PTE_USER + | INTEL_PTE_WRITE); + } + + PMAP_UNLOCK(pmap); +} + +/* + * pmap_sync_page_data_phys(ppnum_t pa) + * + * Invalidates all of the instruction cache on a physical page and + * pushes any dirty data from the data cache for the same physical page + * Not required in i386. + */ +void +pmap_sync_page_data_phys(__unused ppnum_t pa) +{ + return; +} + +/* + * pmap_sync_page_attributes_phys(ppnum_t pa) + * + * Write back and invalidate all cachelines on a physical page. + */ +void +pmap_sync_page_attributes_phys(ppnum_t pa) +{ + cache_flush_page_phys(pa); +} + + + +#ifdef CURRENTLY_UNUSED_AND_UNTESTED + +int collect_ref; +int collect_unref; + +/* + * Routine: pmap_collect + * Function: + * Garbage collects the physical map system for + * pages which are no longer used. + * Success need not be guaranteed -- that is, there + * may well be pages which are not referenced, but + * others may be collected. + * Usage: + * Called by the pageout daemon when pages are scarce. + */ +void +pmap_collect( + pmap_t p) +{ + register pt_entry_t *pdp, *ptp; + pt_entry_t *eptp; + int wired; + + if (p == PMAP_NULL) + return; + + if (p == kernel_pmap) + return; + + /* + * Garbage collect map. + */ + PMAP_LOCK(p); + + for (pdp = (pt_entry_t *)p->dirbase; + pdp < (pt_entry_t *)&p->dirbase[(UMAXPTDI+1)]; + pdp++) + { + if (*pdp & INTEL_PTE_VALID) { + if(*pdp & INTEL_PTE_REF) { + pmap_store_pte(pdp, *pdp & ~INTEL_PTE_REF); + collect_ref++; + } else { + collect_unref++; + ptp = pmap_pte(p, pdetova(pdp - (pt_entry_t *)p->dirbase)); + eptp = ptp + NPTEPG; + + /* + * If the pte page has any wired mappings, we cannot + * free it. + */ + wired = 0; + { + register pt_entry_t *ptep; + for (ptep = ptp; ptep < eptp; ptep++) { + if (iswired(*ptep)) { + wired = 1; + break; + } + } + } + if (!wired) { + /* + * Remove the virtual addresses mapped by this pte page. + */ + pmap_remove_range(p, + pdetova(pdp - (pt_entry_t *)p->dirbase), + ptp, + eptp); + + /* + * Invalidate the page directory pointer. + */ + pmap_store_pte(pdp, 0x0); + + PMAP_UNLOCK(p); + + /* + * And free the pte page itself. + */ + { + register vm_page_t m; + + vm_object_lock(p->pm_obj); + + m = vm_page_lookup(p->pm_obj,(vm_object_offset_t)(pdp - (pt_entry_t *)&p->dirbase[0])); + if (m == VM_PAGE_NULL) + panic("pmap_collect: pte page not in object"); + + VM_PAGE_FREE(m); + + OSAddAtomic(-1, &inuse_ptepages_count); + + vm_object_unlock(p->pm_obj); + } + + PMAP_LOCK(p); + } + } + } + } + + PMAP_UPDATE_TLBS(p, 0x0, 0xFFFFFFFFFFFFF000ULL); + PMAP_UNLOCK(p); + return; + +} +#endif + + +void +pmap_copy_page(ppnum_t src, ppnum_t dst) +{ + bcopy_phys((addr64_t)i386_ptob(src), + (addr64_t)i386_ptob(dst), + PAGE_SIZE); +} + + +/* + * Routine: pmap_pageable + * Function: + * Make the specified pages (by pmap, offset) + * pageable (or not) as requested. + * + * A page which is not pageable may not take + * a fault; therefore, its page table entry + * must remain valid for the duration. + * + * This routine is merely advisory; pmap_enter + * will specify that these pages are to be wired + * down (or not) as appropriate. + */ +void +pmap_pageable( + __unused pmap_t pmap, + __unused vm_map_offset_t start_addr, + __unused vm_map_offset_t end_addr, + __unused boolean_t pageable) +{ +#ifdef lint + pmap++; start_addr++; end_addr++; pageable++; +#endif /* lint */ +} + +/* + * Clear specified attribute bits. + */ +void +phys_attribute_clear( + ppnum_t pn, + int bits) +{ + pv_rooted_entry_t pv_h; + pv_hashed_entry_t pv_e; + pt_entry_t *pte; + int pai; + pmap_t pmap; + + pmap_intr_assert(); + assert(pn != vm_page_fictitious_addr); + if (pn == vm_page_guard_addr) + return; + + pai = ppn_to_pai(pn); + + if (!IS_MANAGED_PAGE(pai)) { + /* + * Not a managed page. + */ + return; + } + + + PMAP_TRACE(PMAP_CODE(PMAP__ATTRIBUTE_CLEAR) | DBG_FUNC_START, + pn, bits, 0, 0, 0); + + pv_h = pai_to_pvh(pai); + + LOCK_PVH(pai); + + /* + * Walk down PV list, clearing all modify or reference bits. + * We do not have to lock the pv_list because we have + * the entire pmap system locked. + */ + if (pv_h->pmap != PMAP_NULL) { + /* + * There are some mappings. + */ + + pv_e = (pv_hashed_entry_t)pv_h; + + do { + vm_map_offset_t va; + + pmap = pv_e->pmap; + va = pv_e->va; + + /* + * Clear modify and/or reference bits. + */ + pte = pmap_pte(pmap, va); + pmap_update_pte(pte, *pte, (*pte & ~bits)); + /* Ensure all processors using this translation + * invalidate this TLB entry. The invalidation *must* + * follow the PTE update, to ensure that the TLB + * shadow of the 'D' bit (in particular) is + * synchronized with the updated PTE. + */ + PMAP_UPDATE_TLBS(pmap, va, va + PAGE_SIZE); + + pv_e = (pv_hashed_entry_t)queue_next(&pv_e->qlink); + + } while (pv_e != (pv_hashed_entry_t)pv_h); + } + pmap_phys_attributes[pai] &= ~bits; + + UNLOCK_PVH(pai); + + PMAP_TRACE(PMAP_CODE(PMAP__ATTRIBUTE_CLEAR) | DBG_FUNC_END, + 0, 0, 0, 0, 0); +} + +/* + * Check specified attribute bits. + */ +int +phys_attribute_test( + ppnum_t pn, + int bits) +{ + pv_rooted_entry_t pv_h; + pv_hashed_entry_t pv_e; + pt_entry_t *pte; + int pai; + pmap_t pmap; + int attributes = 0; + + pmap_intr_assert(); + assert(pn != vm_page_fictitious_addr); + if (pn == vm_page_guard_addr) + return 0; + + pai = ppn_to_pai(pn); + + if (!IS_MANAGED_PAGE(pai)) { + /* + * Not a managed page. + */ + return 0; + } + + /* + * super fast check... if bits already collected + * no need to take any locks... + * if not set, we need to recheck after taking + * the lock in case they got pulled in while + * we were waiting for the lock + */ + if ((pmap_phys_attributes[pai] & bits) == bits) + return bits; + + pv_h = pai_to_pvh(pai); + + LOCK_PVH(pai); + + attributes = pmap_phys_attributes[pai] & bits; + + + /* + * Walk down PV list, checking the mappings until we + * reach the end or we've found the attributes we've asked for + * We do not have to lock the pv_list because we have + * the entire pmap system locked. + */ + if (attributes != bits && + pv_h->pmap != PMAP_NULL) { + /* + * There are some mappings. + */ + pv_e = (pv_hashed_entry_t)pv_h; + do { + vm_map_offset_t va; + + pmap = pv_e->pmap; + va = pv_e->va; + /* + * first make sure any processor actively + * using this pmap, flushes its TLB state + */ + PMAP_UPDATE_TLBS(pmap, va, va + PAGE_SIZE); + + /* + * pick up modify and/or reference bits from mapping + */ + + pte = pmap_pte(pmap, va); + attributes |= (int)(*pte & bits); + + pv_e = (pv_hashed_entry_t)queue_next(&pv_e->qlink); + + } while ((attributes != bits) && + (pv_e != (pv_hashed_entry_t)pv_h)); + } + + UNLOCK_PVH(pai); + return (attributes); +} + +/* + * Set specified attribute bits. + */ +void +phys_attribute_set( + ppnum_t pn, + int bits) +{ + int pai; + + pmap_intr_assert(); + assert(pn != vm_page_fictitious_addr); + if (pn == vm_page_guard_addr) + return; + + pai = ppn_to_pai(pn); + + if (!IS_MANAGED_PAGE(pai)) { + /* Not a managed page. */ + return; + } + + LOCK_PVH(pai); + pmap_phys_attributes[pai] |= bits; + UNLOCK_PVH(pai); +} + +/* + * Set the modify bit on the specified physical page. + */ + +void +pmap_set_modify(ppnum_t pn) +{ + phys_attribute_set(pn, PHYS_MODIFIED); +} + +/* + * Clear the modify bits on the specified physical page. + */ + +void +pmap_clear_modify(ppnum_t pn) +{ + phys_attribute_clear(pn, PHYS_MODIFIED); +} + +/* + * pmap_is_modified: + * + * Return whether or not the specified physical page is modified + * by any physical maps. + */ + +boolean_t +pmap_is_modified(ppnum_t pn) +{ + if (phys_attribute_test(pn, PHYS_MODIFIED)) + return TRUE; + return FALSE; +} + +/* + * pmap_clear_reference: + * + * Clear the reference bit on the specified physical page. + */ + +void +pmap_clear_reference(ppnum_t pn) +{ + phys_attribute_clear(pn, PHYS_REFERENCED); +} + +void +pmap_set_reference(ppnum_t pn) +{ + phys_attribute_set(pn, PHYS_REFERENCED); +} + +/* + * pmap_is_referenced: + * + * Return whether or not the specified physical page is referenced + * by any physical maps. + */ + +boolean_t +pmap_is_referenced(ppnum_t pn) +{ + if (phys_attribute_test(pn, PHYS_REFERENCED)) + return TRUE; + return FALSE; +} + +/* + * pmap_get_refmod(phys) + * returns the referenced and modified bits of the specified + * physical page. + */ +unsigned int +pmap_get_refmod(ppnum_t pn) +{ + int refmod; + unsigned int retval = 0; + + refmod = phys_attribute_test(pn, PHYS_MODIFIED | PHYS_REFERENCED); + + if (refmod & PHYS_MODIFIED) + retval |= VM_MEM_MODIFIED; + if (refmod & PHYS_REFERENCED) + retval |= VM_MEM_REFERENCED; + + return (retval); +} + +/* + * pmap_clear_refmod(phys, mask) + * clears the referenced and modified bits as specified by the mask + * of the specified physical page. + */ +void +pmap_clear_refmod(ppnum_t pn, unsigned int mask) +{ + unsigned int x86Mask; + + x86Mask = ( ((mask & VM_MEM_MODIFIED)? PHYS_MODIFIED : 0) + | ((mask & VM_MEM_REFERENCED)? PHYS_REFERENCED : 0)); + phys_attribute_clear(pn, x86Mask); +} + +void +invalidate_icache(__unused vm_offset_t addr, + __unused unsigned cnt, + __unused int phys) +{ + return; +} + +void +flush_dcache(__unused vm_offset_t addr, + __unused unsigned count, + __unused int phys) +{ + return; +} + +#if CONFIG_DTRACE +/* + * Constrain DTrace copyin/copyout actions + */ +extern kern_return_t dtrace_copyio_preflight(addr64_t); +extern kern_return_t dtrace_copyio_postflight(addr64_t); + +kern_return_t dtrace_copyio_preflight(__unused addr64_t va) +{ + thread_t thread = current_thread(); + + if (current_map() == kernel_map) + return KERN_FAILURE; + else if (get_cr3() != thread->map->pmap->pm_cr3) + return KERN_FAILURE; + else if (thread->machine.specFlags & CopyIOActive) + return KERN_FAILURE; + else + return KERN_SUCCESS; +} + +kern_return_t dtrace_copyio_postflight(__unused addr64_t va) +{ + return KERN_SUCCESS; +} +#endif /* CONFIG_DTRACE */ + +#include +#if MACH_VM_DEBUG +#include + +int +pmap_list_resident_pages( + __unused pmap_t pmap, + __unused vm_offset_t *listp, + __unused int space) +{ + return 0; +} +#endif /* MACH_VM_DEBUG */ + + + +/* temporary workaround */ +boolean_t +coredumpok(__unused vm_map_t map, __unused vm_offset_t va) +{ +#if 0 + pt_entry_t *ptep; + + ptep = pmap_pte(map->pmap, va); + if (0 == ptep) + return FALSE; + return ((*ptep & (INTEL_PTE_NCACHE | INTEL_PTE_WIRED)) != (INTEL_PTE_NCACHE | INTEL_PTE_WIRED)); +#else + return TRUE; +#endif +} + + +boolean_t +phys_page_exists(ppnum_t pn) +{ + assert(pn != vm_page_fictitious_addr); + + if (!pmap_initialized) + return TRUE; + + if (pn == vm_page_guard_addr) + return FALSE; + + if (!IS_MANAGED_PAGE(ppn_to_pai(pn))) + return FALSE; + + return TRUE; +} + +void +mapping_free_prime(void) +{ + int i; + pv_hashed_entry_t pvh_e; + pv_hashed_entry_t pvh_eh; + pv_hashed_entry_t pvh_et; + int pv_cnt; + + pv_cnt = 0; + pvh_eh = pvh_et = PV_HASHED_ENTRY_NULL; + for (i = 0; i < (5 * PV_HASHED_ALLOC_CHUNK); i++) { + pvh_e = (pv_hashed_entry_t) zalloc(pv_hashed_list_zone); + + pvh_e->qlink.next = (queue_entry_t)pvh_eh; + pvh_eh = pvh_e; + + if (pvh_et == PV_HASHED_ENTRY_NULL) + pvh_et = pvh_e; + pv_cnt++; + } + PV_HASHED_FREE_LIST(pvh_eh, pvh_et, pv_cnt); + + pv_cnt = 0; + pvh_eh = pvh_et = PV_HASHED_ENTRY_NULL; + for (i = 0; i < PV_HASHED_KERN_ALLOC_CHUNK; i++) { + pvh_e = (pv_hashed_entry_t) zalloc(pv_hashed_list_zone); + + pvh_e->qlink.next = (queue_entry_t)pvh_eh; + pvh_eh = pvh_e; + + if (pvh_et == PV_HASHED_ENTRY_NULL) + pvh_et = pvh_e; + pv_cnt++; + } + PV_HASHED_KERN_FREE_LIST(pvh_eh, pvh_et, pv_cnt); + +} + +void +mapping_adjust(void) +{ + pv_hashed_entry_t pvh_e; + pv_hashed_entry_t pvh_eh; + pv_hashed_entry_t pvh_et; + int pv_cnt; + int i; + + if (mapping_adjust_call == NULL) { + thread_call_setup(&mapping_adjust_call_data, + (thread_call_func_t) mapping_adjust, + (thread_call_param_t) NULL); + mapping_adjust_call = &mapping_adjust_call_data; + } + + pv_cnt = 0; + pvh_eh = pvh_et = PV_HASHED_ENTRY_NULL; + if (pv_hashed_kern_free_count < PV_HASHED_KERN_LOW_WATER_MARK) { + for (i = 0; i < PV_HASHED_KERN_ALLOC_CHUNK; i++) { + pvh_e = (pv_hashed_entry_t) zalloc(pv_hashed_list_zone); + + pvh_e->qlink.next = (queue_entry_t)pvh_eh; + pvh_eh = pvh_e; + + if (pvh_et == PV_HASHED_ENTRY_NULL) + pvh_et = pvh_e; + pv_cnt++; + } + PV_HASHED_KERN_FREE_LIST(pvh_eh, pvh_et, pv_cnt); + } + + pv_cnt = 0; + pvh_eh = pvh_et = PV_HASHED_ENTRY_NULL; + if (pv_hashed_free_count < PV_HASHED_LOW_WATER_MARK) { + for (i = 0; i < PV_HASHED_ALLOC_CHUNK; i++) { + pvh_e = (pv_hashed_entry_t) zalloc(pv_hashed_list_zone); + + pvh_e->qlink.next = (queue_entry_t)pvh_eh; + pvh_eh = pvh_e; + + if (pvh_et == PV_HASHED_ENTRY_NULL) + pvh_et = pvh_e; + pv_cnt++; + } + PV_HASHED_FREE_LIST(pvh_eh, pvh_et, pv_cnt); + } + mappingrecurse = 0; +} + + +void +pmap_switch(pmap_t tpmap) +{ + spl_t s; + + s = splhigh(); /* Make sure interruptions are disabled */ + set_dirbase(tpmap, current_thread()); + splx(s); +} + + +/* + * disable no-execute capability on + * the specified pmap + */ +void +pmap_disable_NX(pmap_t pmap) +{ + pmap->nx_enabled = 0; +} + +void +pt_fake_zone_info( + int *count, + vm_size_t *cur_size, + vm_size_t *max_size, + vm_size_t *elem_size, + vm_size_t *alloc_size, + int *collectable, + int *exhaustable) +{ + *count = inuse_ptepages_count; + *cur_size = PAGE_SIZE * inuse_ptepages_count; + *max_size = PAGE_SIZE * (inuse_ptepages_count + + vm_page_inactive_count + + vm_page_active_count + + vm_page_free_count); + *elem_size = PAGE_SIZE; + *alloc_size = PAGE_SIZE; + + *collectable = 1; + *exhaustable = 0; +} + +static inline void +pmap_cpuset_NMIPI(cpu_set cpu_mask) { + unsigned int cpu, cpu_bit; + uint64_t deadline; + + for (cpu = 0, cpu_bit = 1; cpu < real_ncpus; cpu++, cpu_bit <<= 1) { + if (cpu_mask & cpu_bit) + cpu_NMI_interrupt(cpu); + } + deadline = mach_absolute_time() + (LockTimeOut); + while (mach_absolute_time() < deadline) + cpu_pause(); +} + +/* + * Called with pmap locked, we: + * - scan through per-cpu data to see which other cpus need to flush + * - send an IPI to each non-idle cpu to be flushed + * - wait for all to signal back that they are inactive or we see that + * they are at a safe point (idle). + * - flush the local tlb if active for this pmap + * - return ... the caller will unlock the pmap + */ +void +pmap_flush_tlbs(pmap_t pmap) +{ + unsigned int cpu; + unsigned int cpu_bit; + cpu_set cpus_to_signal; + unsigned int my_cpu = cpu_number(); + pmap_paddr_t pmap_cr3 = pmap->pm_cr3; + boolean_t flush_self = FALSE; + uint64_t deadline; + + assert((processor_avail_count < 2) || + (ml_get_interrupts_enabled() && get_preemption_level() != 0)); + + /* + * Scan other cpus for matching active or task CR3. + * For idle cpus (with no active map) we mark them invalid but + * don't signal -- they'll check as they go busy. + */ + cpus_to_signal = 0; + for (cpu = 0, cpu_bit = 1; cpu < real_ncpus; cpu++, cpu_bit <<= 1) { + if (!cpu_datap(cpu)->cpu_running) + continue; + uint64_t cpu_active_cr3 = CPU_GET_ACTIVE_CR3(cpu); + uint64_t cpu_task_cr3 = CPU_GET_TASK_CR3(cpu); + + if ((pmap_cr3 == cpu_task_cr3) || + (pmap_cr3 == cpu_active_cr3) || + (pmap->pm_shared) || + (pmap == kernel_pmap)) { + if (cpu == my_cpu) { + flush_self = TRUE; + continue; + } + cpu_datap(cpu)->cpu_tlb_invalid = TRUE; + __asm__ volatile("mfence"); + + /* + * We don't need to signal processors which will flush + * lazily at the idle state or kernel boundary. + * For example, if we're invalidating the kernel pmap, + * processors currently in userspace don't need to flush + * their TLBs until the next time they enter the kernel. + * Alterations to the address space of a task active + * on a remote processor result in a signal, to + * account for copy operations. (There may be room + * for optimization in such cases). + * The order of the loads below with respect + * to the store to the "cpu_tlb_invalid" field above + * is important--hence the barrier. + */ + if (CPU_CR3_IS_ACTIVE(cpu) && + (pmap_cr3 == CPU_GET_ACTIVE_CR3(cpu) || + pmap->pm_shared || + (pmap_cr3 == CPU_GET_TASK_CR3(cpu)))) { + cpus_to_signal |= cpu_bit; + i386_signal_cpu(cpu, MP_TLB_FLUSH, ASYNC); + } + } + } + + PMAP_TRACE(PMAP_CODE(PMAP__FLUSH_TLBS) | DBG_FUNC_START, + pmap, cpus_to_signal, flush_self, 0, 0); + + /* + * Flush local tlb if required. + * Do this now to overlap with other processors responding. + */ + if (flush_self) + flush_tlb(); + + if (cpus_to_signal) { + cpu_set cpus_to_respond = cpus_to_signal; + + deadline = mach_absolute_time() + LockTimeOut; + /* + * Wait for those other cpus to acknowledge + */ + while (cpus_to_respond != 0) { + if (mach_absolute_time() > deadline) { + if (mp_recent_debugger_activity()) + continue; + if (!panic_active()) { + pmap_tlb_flush_timeout = TRUE; + pmap_cpuset_NMIPI(cpus_to_respond); + } + panic("pmap_flush_tlbs() timeout: " + "cpu(s) failing to respond to interrupts, pmap=%p cpus_to_respond=0x%lx", + pmap, cpus_to_respond); + } + + for (cpu = 0, cpu_bit = 1; cpu < real_ncpus; cpu++, cpu_bit <<= 1) { + if ((cpus_to_respond & cpu_bit) != 0) { + if (!cpu_datap(cpu)->cpu_running || + cpu_datap(cpu)->cpu_tlb_invalid == FALSE || + !CPU_CR3_IS_ACTIVE(cpu)) { + cpus_to_respond &= ~cpu_bit; + } + cpu_pause(); + } + if (cpus_to_respond == 0) + break; + } + } + } + + PMAP_TRACE(PMAP_CODE(PMAP__FLUSH_TLBS) | DBG_FUNC_END, + pmap, cpus_to_signal, flush_self, 0, 0); +} + +void +process_pmap_updates(void) +{ + assert(ml_get_interrupts_enabled() == 0 || get_preemption_level() != 0); + + flush_tlb(); + + current_cpu_datap()->cpu_tlb_invalid = FALSE; + __asm__ volatile("mfence"); +} + +void +pmap_update_interrupt(void) +{ + PMAP_TRACE(PMAP_CODE(PMAP__UPDATE_INTERRUPT) | DBG_FUNC_START, + 0, 0, 0, 0, 0); + + process_pmap_updates(); + + PMAP_TRACE(PMAP_CODE(PMAP__UPDATE_INTERRUPT) | DBG_FUNC_END, + 0, 0, 0, 0, 0); +} + + +unsigned int +pmap_cache_attributes(ppnum_t pn) +{ + return IS_MANAGED_PAGE(ppn_to_pai(pn)) ? VM_WIMG_COPYBACK + : VM_WIMG_IO; +} + + diff --git a/osfmk/x86_64/start.s b/osfmk/x86_64/start.s new file mode 100644 index 000000000..ccba4f64b --- /dev/null +++ b/osfmk/x86_64/start.s @@ -0,0 +1,664 @@ +/* + * Copyright (c) 2007 Apple Inc. All rights reserved. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. The rights granted to you under the License + * may not be used to create, or enable the creation or redistribution of, + * unlawful or unlicensed copies of an Apple operating system, or to + * circumvent, violate, or enable the circumvention or violation of, any + * terms of an Apple operating system software license agreement. + * + * Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ + */ +/* + * @OSF_COPYRIGHT@ + */ +/* + * Mach Operating System + * Copyright (c) 1991,1990 Carnegie Mellon University + * All Rights Reserved. + * + * Permission to use, copy, modify and distribute this software and its + * documentation is hereby granted, provided that both the copyright + * notice and this permission notice appear in all copies of the + * software, derivative works or modified versions, and any portions + * thereof, and that both notices appear in supporting documentation. + * + * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" + * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR + * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. + * + * Carnegie Mellon requests users of this software to return to + * + * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU + * School of Computer Science + * Carnegie Mellon University + * Pittsburgh PA 15213-3890 + * + * any improvements or extensions that they make and grant Carnegie Mellon + * the rights to redistribute these changes. + */ +/* + */ + +#include +#include + +#include +#include +#include +#include + +#include +#include +#include + +.code32 + + +/* + * Interrupt and bootup stack for initial processor. + */ + + /* in the __HIB section since the hibernate restore code uses this stack. */ + .section __HIB, __data + .align 12 + + .globl EXT(low_intstack) +EXT(low_intstack): + .globl EXT(gIOHibernateRestoreStack) +EXT(gIOHibernateRestoreStack): + + .set ., .+INTSTACK_SIZE + + .globl EXT(low_eintstack) +EXT(low_eintstack:) + .globl EXT(gIOHibernateRestoreStackEnd) +EXT(gIOHibernateRestoreStackEnd): + + /* back to the regular __DATA section. */ + + .section __DATA, __data + +/* + * Stack for last-gasp double-fault handler. + */ + .align 12 + .globl EXT(df_task_stack) +EXT(df_task_stack): + .set ., .+INTSTACK_SIZE + .globl EXT(df_task_stack_end) +EXT(df_task_stack_end): + + +/* + * Stack for machine-check handler. + */ + .align 12 + .globl EXT(mc_task_stack) +EXT(mc_task_stack): + .set ., .+INTSTACK_SIZE + .globl EXT(mc_task_stack_end) +EXT(mc_task_stack_end): + + +#if MACH_KDB +/* + * Kernel debugger stack for each processor. + */ + .align 12 + .globl EXT(db_stack_store) +EXT(db_stack_store): + .set ., .+(INTSTACK_SIZE*MAX_CPUS) + +/* + * Stack for last-ditch debugger task for each processor. + */ + .align 12 + .globl EXT(db_task_stack_store) +EXT(db_task_stack_store): + .set ., .+(INTSTACK_SIZE*MAX_CPUS) + +/* + * per-processor kernel debugger stacks + */ + .align ALIGN + .globl EXT(kgdb_stack_store) +EXT(kgdb_stack_store): + .set ., .+(INTSTACK_SIZE*MAX_CPUS) +#endif /* MACH_KDB */ + +/* + * BSP CPU start here. + * eax points to kernbootstruct + * + * Environment: + * protected mode, no paging, flat 32-bit address space. + * (Code/data/stack segments have base == 0, limit == 4G) + */ + +#define SWITCH_TO_64BIT_MODE \ + movl $(CR4_PAE),%eax /* enable PAE */ ;\ + movl %eax,%cr4 ;\ + movl $MSR_IA32_EFER,%ecx ;\ + rdmsr ;\ + orl $MSR_IA32_EFER_LME,%eax /* enable long mode */ ;\ + wrmsr ;\ + movl $INITPT_SEG_BASE,%eax ;\ + movl %eax,%cr3 ;\ + movl %cr0,%eax ;\ + orl $(CR0_PG|CR0_WP),%eax /* enable paging */ ;\ + movl %eax,%cr0 ;\ + /* "The Aussie Maneuver" ("Myria" variant) */ ;\ + pushl $(0xcb<<24)|KERNEL64_CS /* reload CS with 0x08 */ ;\ + call .-1 ;\ + .code64 + +/* + * [ We used to have a reason for the following statement; ] + * [ but the issue has been fixed. The line is true ] + * [ nevertheless, therefore it should remain there. ] + * This proves that Little Endian is superior to Big Endian. + */ + + + .text + .align ALIGN + .globl EXT(_start) + .globl EXT(_pstart) +LEXT(_start) +LEXT(_pstart) + + .code32 + +#if 0 + mov $0x3f8, %dx + mov $0x4D, %al; out %al, %dx + mov $0x49, %al; out %al, %dx + mov $0x53, %al; out %al, %dx + mov $0x54, %al; out %al, %dx + mov $0x0D, %al; out %al, %dx + mov $0x0A, %al; out %al, %dx +#endif + +/* + * Here we do the minimal setup to switch from 32 bit mode to 64 bit long mode. + * + * Initial memory layout: + * + * ------------------------- + * | | + * | Kernel text/data | + * | | + * ------------------------- Kernel start addr + * | | + * | | + * ------------------------- 0 + * + */ + mov %eax, %edi /* save kernbootstruct */ + + /* Use low 32-bits of address as 32-bit stack */ + movl $EXT(low_eintstack), %esp + + /* + * Set up segmentation + */ + movl $EXT(protected_mode_gdtr), %eax + lgdtl (%eax) + + mov $(KERNEL_DS), %ax + mov %ax, %ds + mov %ax, %es + mov %ax, %ss + xor %eax, %eax + mov %ax, %fs + mov %ax, %gs + +/* the following code is shared by the master CPU and all slave CPUs */ +L_pstart_common: + /* + * switch to 64 bit mode + */ + SWITCH_TO_64BIT_MODE + + /* %edi = boot_args_start */ + + leaq _vstart(%rip), %rcx + movq $0xffffff8000000000, %rax /* adjust the pointer to be up high */ + or %rax, %rsp /* and stack pointer up there too */ + or %rcx, %rax + andq $0xfffffffffffffff0, %rsp /* align stack */ + xorq %rbp, %rbp /* zero frame pointer */ + callq *%rax + +/* + * AP (slave) CPUs enter here. + * + * Environment: + * protected mode, no paging, flat 32-bit address space. + * (Code/data/stack segments have base == 0, limit == 4G) + */ + .align ALIGN + .globl EXT(slave_pstart) +LEXT(slave_pstart) + .code32 + cli /* disable interrupts, so we don`t */ + /* need IDT for a while */ + POSTCODE(SLAVE_PSTART_ENTRY) + + movl $EXT(mp_slave_stack) + PAGE_SIZE, %esp + + /* set up identity mapping of page tables */ + movl $INITPT_SEG_BASE,%eax + movl (KERNEL_PML4_INDEX*8)(%eax), %esi + movl %esi, (0)(%eax) + movl (KERNEL_PML4_INDEX*8+4)(%eax), %esi + movl %esi, (0+4)(%eax) + + movl $0, %edi /* "no kernbootstruct" */ + + jmp L_pstart_common /* hop a ride to vstart() */ + + +/* BEGIN HIBERNATE CODE */ + +.section __HIB, __text +/* +This code is linked into the kernel but part of the "__HIB" section, which means +its used by code running in the special context of restoring the kernel text and data +from the hibernation image read by the booter. hibernate_kernel_entrypoint() and everything +it calls or references (ie. hibernate_restore_phys_page()) +needs to be careful to only touch memory also in the "__HIB" section. +*/ + + + .align ALIGN + .globl EXT(hibernate_machine_entrypoint) +.code32 +LEXT(hibernate_machine_entrypoint) + movl %eax, %edi /* regparm(1) calling convention */ + + /* restore gdt */ + mov $(SLEEP_SEG_BASE)+20, %eax // load saved_gdt, this may break + lgdtl (%eax) + + /* setup the protected mode segment registers */ + mov $KERNEL_DS, %eax + movw %ax, %ds + movw %ax, %es + movw %ax, %ss + xor %eax,%eax + movw %ax, %fs + movw %ax, %gs + + /* set up the page tables to use BootstrapPTD + * as done in idle_pt.c, but this must be done programatically */ + mov $(INITPT_SEG_BASE + PAGE_SIZE), %eax + mov $(INITPT_SEG_BASE + 2*PAGE_SIZE | INTEL_PTE_WRITE | INTEL_PTE_VALID), %ecx + mov $0x0, %edx + mov %ecx, (0*8+0)(%eax) + mov %edx, (0*8+4)(%eax) + add $(PAGE_SIZE), %ecx + mov %ecx, (1*8+0)(%eax) + mov %edx, (1*8+4)(%eax) + add $(PAGE_SIZE), %ecx + mov %ecx, (2*8+0)(%eax) + mov %edx, (2*8+4)(%eax) + add $(PAGE_SIZE), %ecx + mov %ecx, (3*8+0)(%eax) + mov %edx, (3*8+4)(%eax) + + /* Temporary stack */ + mov $(REAL_MODE_BOOTSTRAP_OFFSET + PROT_MODE_START), %esp + + SWITCH_TO_64BIT_MODE + + leaq EXT(hibernate_kernel_entrypoint)(%rip),%rcx + leaq EXT(gIOHibernateRestoreStackEnd)(%rip),%rsp /* switch to the bootup stack */ + movq $0xffffff8000000000, %rax /* adjust the pointer to be up high */ + orq %rax, %rsp /* and stack pointer up there too :D */ + orq %rcx, %rax /* put entrypoint in %rax */ + /* %edi is already filled with header pointer */ + xorl %esi, %esi /* zero 2nd arg */ + xorl %edx, %edx /* zero 3rd arg */ + xorl %ecx, %ecx /* zero 4th arg */ + andq $0xfffffffffffffff0, %rsp /* align stack */ + /* (future-proofing, stack should already be aligned) */ + xorq %rbp, %rbp /* zero frame pointer */ + call *%rax /* call instead of jmp to keep the required stack alignment */ + /* NOTREACHED */ + hlt + +/* END HIBERNATE CODE */ + +#if CONFIG_SLEEP +/* BEGIN ACPI WAKEUP CODE */ + +#include + + + + +#define PA(addr) (addr) + +/* + * acpi_wake_start + * + * The code from acpi_wake_start to acpi_wake_end is copied to + * memory below 1MB. The firmware waking vector is updated to + * point at acpi_wake_start in low memory before sleeping. + */ + +.section __TEXT,__text +.text +.align 12 /* Page align for single bcopy_phys() */ +.code32 +.globl EXT(acpi_wake_prot) +EXT(acpi_wake_prot): + /* protected mode, paging disabled */ + + /* jump to acpi_temp_alloc (stored in saved_tmp) */ + mov $(SLEEP_SEG_BASE)+16, %eax + mov (%eax), %ecx // Load acpi_temp_reloc from saved_eip + jmp *%ecx +acpi_temp_reloc: + mov $(SLEEP_SEG_BASE)+16, %esp /* setup stack for 64bit */ + + SWITCH_TO_64BIT_MODE + + lea Lwake_64(%rip), %rax + movq $0xffffff8000000000, %rdx + orq %rdx, %rax + jmp *%rax +.code32 + +.code64 + +/* + * acpi_sleep_cpu(acpi_sleep_callback func, void * refcon) + * + * Save CPU state before platform sleep. Restore CPU state + * following wake up. + */ + +ENTRY(acpi_sleep_cpu) + push %rbp + mov %rsp, %rbp + + /* save flags */ + pushf + + /* save general purpose registers */ + push %rax + push %rbx + push %rcx + push %rdx + push %rbp + push %rsi + push %rdi + push %r8 + push %r9 + push %r10 + push %r11 + push %r12 + push %r13 + push %r14 + push %r15 + + mov %rsp, saved_rsp(%rip) + + /* make sure tlb is flushed */ + mov %cr3,%rax + mov %rax,%cr3 + + /* save control registers */ + mov %cr0, %rax + mov %rax, saved_cr0(%rip) + mov %cr2, %rax + mov %rax, saved_cr2(%rip) + mov %cr4, %rax + mov %rax, saved_cr4(%rip) + + /* save segment registers */ + movw %es, saved_es(%rip) + movw %fs, saved_fs(%rip) + movw %gs, saved_gs(%rip) + movw %ss, saved_ss(%rip) + + /* save the 64bit kernel gs base */ + mov $MSR_IA32_KERNEL_GS_BASE, %rcx + swapgs + rdmsr + movl %eax, saved_kgs_base(%rip) + movl %edx, saved_kgs_base+4(%rip) + swapgs + + /* save descriptor table registers */ + sgdt saved_gdt(%rip) + sldt saved_ldt(%rip) + sidt saved_idt(%rip) + str saved_tr(%rip) + + /* + * When system wakes up, the real mode wake handler will revert to + * protected mode, then jump to the address stored at saved_eip. + */ + leaq acpi_temp_reloc(%rip), %rax + mov %eax, saved_eip(%rip) + + /* + * Call ACPI function provided by the caller to sleep the platform. + * This call will not return on success. + */ + + xchgq %rdi, %rsi + call *%rsi + + /* sleep failed, no cpu context lost */ + jmp wake_restore + +.globl EXT(acpi_wake_prot_entry) +EXT(acpi_wake_prot_entry): + POSTCODE(ACPI_WAKE_PROT_ENTRY) + /* Entry from the hibernate code in iokit/Kernel/IOHibernateRestoreKernel.c + * + * Reset the first 4 PDE's to point to entries in IdlePTD, as done in + * Idle_PTs_init() during startup */ + leaq _IdlePDPT(%rip), %rax + movq _IdlePTD(%rip), %rcx + mov %ecx, %ecx /* zero top 32bits of %rcx */ + orq $(INTEL_PTE_WRITE|INTEL_PTE_VALID), %rcx + movq %rcx, 0x0(%rax) + add $0x1000, %rcx + movq %rcx, 0x8(%rax) + add $0x1000, %rcx + movq %rcx, 0x10(%rax) + add $0x1000, %rcx + movq %rcx, 0x18(%rax) + mov %cr3, %rax + mov %rax, %cr3 + +Lwake_64: + /* + * restore cr4, PAE and NXE states in an orderly fashion + */ + mov saved_cr4(%rip), %rcx + mov %rcx, %cr4 + + mov $(MSR_IA32_EFER), %ecx /* MSR number in ecx */ + rdmsr /* MSR value return in edx: eax */ + or $(MSR_IA32_EFER_NXE), %eax /* Set NXE bit in low 32-bits */ + wrmsr /* Update Extended Feature Enable reg */ + + /* restore kernel GDT */ + lgdt EXT(protected_mode_gdtr)(%rip) + + movq saved_cr2(%rip), %rax + mov %rax, %cr2 + + /* restore CR0, paging enabled */ + mov saved_cr0(%rip), %rax + mov %rax, %cr0 + + /* protected mode, paging enabled */ + POSTCODE(ACPI_WAKE_PAGED_ENTRY) + + /* switch to kernel data segment */ + movw $(KERNEL_DS), %ax + movw %ax, %ds + + /* restore local and interrupt descriptor tables */ + lldt saved_ldt(%rip) + lidt saved_idt(%rip) + + /* restore segment registers */ + movw saved_es(%rip), %es + movw saved_fs(%rip), %fs + movw saved_gs(%rip), %gs + movw saved_ss(%rip), %ss + + /* save the 64bit kernel gs base */ + mov $MSR_IA32_KERNEL_GS_BASE, %rcx + movl saved_kgs_base(%rip), %eax + movl saved_kgs_base+4(%rip), %edx + wrmsr + swapgs + + //K64todo verify this TSS stuff + /* + * Restore task register. Before doing this, clear the busy flag + * in the TSS descriptor set by the CPU. + */ + lea saved_gdt(%rip), %rax + movq 2(%rax), %rdx /* GDT base, skip limit word */ + movl $(KERNEL_TSS), %eax /* TSS segment selector */ + movb $(K_TSS), 5(%rdx, %rax) /* clear busy flag */ + + ltr saved_tr(%rip) /* restore TR */ + +wake_restore: + mov saved_rsp(%rip), %rsp + + /* restore general purpose registers */ + pop %r15 + pop %r14 + pop %r13 + pop %r12 + pop %r11 + pop %r10 + pop %r9 + pop %r8 + pop %rdi + pop %rsi + pop %rbp + pop %rdx + pop %rcx + pop %rbx + pop %rax + + /* restore flags */ + popf + + leave + ret + +/* END ACPI WAKEUP CODE */ +#endif /* CONFIG_SLEEP */ + +/* Code to get from real mode to protected mode */ + +#define operand_size_prefix .byte 0x66 +#define address_size_prefix .byte 0x67 +#define cs_base_prefix .byte 0x2e + +#define LJMP(segment,address) \ + operand_size_prefix ;\ + .byte 0xea ;\ + .long address-EXT(real_mode_bootstrap_base) ;\ + .word segment + +#define LGDT(address) \ + cs_base_prefix ;\ + address_size_prefix ;\ + operand_size_prefix ;\ + .word 0x010f ;\ + .byte 0x15 ;\ + .long address-EXT(real_mode_bootstrap_base) + +.section __TEXT,__text +.align 12 /* Page align for single bcopy_phys() */ +.code32 +Entry(real_mode_bootstrap_base) + cli + + LGDT(EXT(protected_mode_gdtr)) + + /* set the PE bit of CR0 */ + mov %cr0, %eax + inc %eax + mov %eax, %cr0 + + /* reload CS register */ + LJMP(KERNEL32_CS, 1f + REAL_MODE_BOOTSTRAP_OFFSET) +1: + + /* we are in protected mode now */ + /* set up the segment registers */ + mov $KERNEL_DS, %eax + movw %ax, %ds + movw %ax, %es + movw %ax, %ss + xor %eax,%eax + movw %ax, %fs + movw %ax, %gs + + POSTCODE(SLAVE_STARTPROG_ENTRY); + + mov PROT_MODE_START+REAL_MODE_BOOTSTRAP_OFFSET, %ecx + jmp *%ecx + +Entry(protected_mode_gdtr) + .short 160 /* limit (8*6 segs) */ + .quad EXT(master_gdt) + +Entry(real_mode_bootstrap_end) + +/* Save area used across sleep/wake */ +.section __SLEEP, __data +.align 2 + +temp_stack: .quad 0 + .quad 0 +saved_eip: .long 0 +saved_gdt: .word 0 + .quad 0 +saved_rsp: .quad 0 +saved_es: .word 0 +saved_fs: .word 0 +saved_gs: .word 0 +saved_ss: .word 0 +saved_cr0: .quad 0 +saved_cr2: .quad 0 +saved_cr4: .quad 0 +saved_idt: .word 0 + .quad 0 +saved_ldt: .word 0 +saved_tr: .word 0 +saved_kgs_base: .quad 0 + diff --git a/pexpert/Makefile b/pexpert/Makefile index 59300a74d..abccc00b0 100644 --- a/pexpert/Makefile +++ b/pexpert/Makefile @@ -14,6 +14,10 @@ INSTINC_SUBDIRS_PPC = pexpert INSTINC_SUBDIRS_I386 = pexpert + +INSTINC_SUBDIRS_X86_64 = pexpert + + INSTINC_SUBDIRS_ARM = pexpert @@ -25,6 +29,10 @@ EXPINC_SUBDIRS_PPC = pexpert EXPINC_SUBDIRS_I386 = pexpert + +EXPINC_SUBDIRS_X86_64 = pexpert + + EXPINC_SUBDIRS_ARM = pexpert diff --git a/pexpert/conf/MASTER b/pexpert/conf/MASTER index 1588c80c5..f622474d6 100644 --- a/pexpert/conf/MASTER +++ b/pexpert/conf/MASTER @@ -84,8 +84,8 @@ ident PEXPERT options MACH_PE # Objective-C support # options MACH_KERNEL -options DEBUG -options CONFIG_DTRACE # dtrace support # +options DEBUG # general debugging code # +options CONFIG_DTRACE # dtrace support # options PANIC_INFO # want kernel panic info # @@ -97,3 +97,4 @@ options CONFIG_NO_KPRINTF_STRINGS # # embedded device # options CONFIG_EMBEDDED # + diff --git a/pexpert/conf/MASTER.i386 b/pexpert/conf/MASTER.i386 index 3bde93bca..f4e41a8e6 100644 --- a/pexpert/conf/MASTER.i386 +++ b/pexpert/conf/MASTER.i386 @@ -3,14 +3,17 @@ # Standard Apple Mac OS Configurations: # -------- ----- ------ --------------- # -# RELEASE = [ i386 mach mach_pe panic_info config_dtrace ] +# RELEASE = [ intel mach mach_pe panic_info config_dtrace ] # PROFILE = [ RELEASE profile ] # DEBUG = [ RELEASE debug ] # -# EMBEDDED = [ i386 mach mach_pe panic_info ] +# +# EMBEDDED = [ intel mach mach_pe panic_info ] # DEVELOPMENT = [ EMBEDDED config_dtrace ] # ###################################################################### -machine "i386" # -cpu "i386" # +machine "i386" # +cpu "i386" # + +options NO_NESTED_PMAP # diff --git a/pexpert/conf/MASTER.x86_64 b/pexpert/conf/MASTER.x86_64 new file mode 100644 index 000000000..536c4eb59 --- /dev/null +++ b/pexpert/conf/MASTER.x86_64 @@ -0,0 +1,19 @@ +###################################################################### +# +# Standard Apple Mac OS Configurations: +# -------- ----- ------ --------------- +# +# RELEASE = [ intel mach mach_pe panic_info config_dtrace ] +# PROFILE = [ RELEASE profile ] +# DEBUG = [ RELEASE debug ] +# +# +# EMBEDDED = [ intel mach mach_pe panic_info ] +# DEVELOPMENT = [ EMBEDDED ] +# +###################################################################### + +machine "x86_64" # +cpu "x86_64" # + +options NO_NESTED_PMAP # diff --git a/pexpert/conf/Makefile b/pexpert/conf/Makefile index 054208e35..93eb84150 100644 --- a/pexpert/conf/Makefile +++ b/pexpert/conf/Makefile @@ -35,7 +35,6 @@ $(COMPOBJROOT)/$(PEXPERT_KERNEL_CONFIG)/Makefile : $(SOURCE)/MASTER \ $(SOURCE)/files.$(ARCH_CONFIG_LC) \ $(COMPOBJROOT)/doconf $(_v)(doconf_target=$(addsuffix /conf, $(TARGET)); \ - echo $${doconf_target};\ $(MKDIR) $${doconf_target}; \ cd $${doconf_target}; \ rm -f $(notdir $?); \ diff --git a/pexpert/conf/Makefile.i386 b/pexpert/conf/Makefile.i386 index 895c8b5fe..d3cca3ad4 100644 --- a/pexpert/conf/Makefile.i386 +++ b/pexpert/conf/Makefile.i386 @@ -2,21 +2,6 @@ #BEGIN Machine dependent Makefile fragment for i386 ###################################################################### -# Enable -Werror for i386 builds -CFLAGS+= $(WERROR) -CWARNFLAGS= $(filter-out -Wbad-function-cast, $(CWARNFLAGS_STD)) - -# Objects that don't compile cleanly: -OBJS_NO_WERROR= \ - ioconf.o \ - bootargs.o \ - pe_init.o \ - device_tree.o - -OBJS_WERROR=$(filter-out $(OBJS_NO_WERROR),$(OBJS)) - -$(OBJS_WERROR): WERROR=-Werror - ###################################################################### #END Machine dependent Makefile fragment for i386 ###################################################################### diff --git a/pexpert/conf/Makefile.ppc b/pexpert/conf/Makefile.ppc index 0d463a9a2..4ef7445f0 100644 --- a/pexpert/conf/Makefile.ppc +++ b/pexpert/conf/Makefile.ppc @@ -1,6 +1,7 @@ ###################################################################### #BEGIN Machine dependent Makefile fragment for ppc ###################################################################### + ###################################################################### #END Machine dependent Makefile fragment for ppc ###################################################################### diff --git a/pexpert/conf/Makefile.template b/pexpert/conf/Makefile.template index 526b89513..1207442f0 100644 --- a/pexpert/conf/Makefile.template +++ b/pexpert/conf/Makefile.template @@ -11,7 +11,7 @@ export IDENT # -# XXX: INCFLAGS +# INCFLAGS # INCFLAGS_MAKEFILE= $(INCFLAGS_POSIX) @@ -24,15 +24,11 @@ include $(MakeInc_cmd) include $(MakeInc_def) # -# XXX: CFLAGS +# CFLAGS # -# -D_KERNEL_BUILD -DKERNEL_BUILD -DARCH_PRIVATE -DBSD_BUILD -DMACH_KERNEL # -CFLAGS+= -imacros meta_features.h -DPEXPERT_KERNEL_PRIVATE -DKERNEL -DDRIVER_PRIVATE \ - -Wall -Wno-four-char-constants -fno-common \ - -DRelease3CompatibilityBuild $(CFLAGS_INLINE_CONFIG) - -SFLAGS+= -DKERNEL +CFLAGS+= -imacros meta_features.h -DPEXPERT_KERNEL_PRIVATE \ + -Werror $(CFLAGS_INLINE_CONFIG) # # Directories for mig generated files @@ -79,8 +75,10 @@ ${OBJS}: ${OBJSDEPS} LDOBJS = $(OBJS) $(COMPONENT).o: $(LDOBJS) - @echo LD $(COMPONENT) - $(_v)$(LD) $(LDFLAGS_COMPONENT) -o $(COMPONENT).o ${LDOBJS} + @echo LDFILELIST $(COMPONENT) + $(_v)( for obj in ${LDOBJS}; do \ + echo $(TARGET)$(COMP_OBJ_DIR)/$(KERNEL_CONFIG)/$${obj}; \ + done; ) > $(COMPONENT).o do_depend: do_all $(_v)${MD} -u Makedep -f -d `ls *.d`; diff --git a/pexpert/conf/Makefile.x86_64 b/pexpert/conf/Makefile.x86_64 new file mode 100644 index 000000000..25f7be596 --- /dev/null +++ b/pexpert/conf/Makefile.x86_64 @@ -0,0 +1,8 @@ +###################################################################### +#BEGIN Machine dependent Makefile fragment for x86_64 +###################################################################### + +###################################################################### +#END Machine dependent Makefile fragment for x86_64 +###################################################################### + diff --git a/pexpert/conf/files.i386 b/pexpert/conf/files.i386 index 9808c74ec..0ba9ffc18 100644 --- a/pexpert/conf/files.i386 +++ b/pexpert/conf/files.i386 @@ -6,7 +6,3 @@ pexpert/i386/pe_identify_machine.c standard pexpert/i386/pe_kprintf.c standard pexpert/i386/pe_interrupt.c standard pexpert/i386/pe_serial.c standard - - -# Polled-mode keyboard driver. -pexpert/i386/kd.c standard diff --git a/pexpert/conf/files.x86_64 b/pexpert/conf/files.x86_64 new file mode 100644 index 000000000..0ba9ffc18 --- /dev/null +++ b/pexpert/conf/files.x86_64 @@ -0,0 +1,8 @@ +OPTIONS/gprof optional gprof + +pexpert/i386/pe_init.c standard +pexpert/i386/pe_bootargs.c standard +pexpert/i386/pe_identify_machine.c standard +pexpert/i386/pe_kprintf.c standard +pexpert/i386/pe_interrupt.c standard +pexpert/i386/pe_serial.c standard diff --git a/pexpert/gen/bootargs.c b/pexpert/gen/bootargs.c index 1d9fd94bd..4c5b5f07d 100644 --- a/pexpert/gen/bootargs.c +++ b/pexpert/gen/bootargs.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2008 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -36,6 +36,7 @@ static int argstrcpy2(char *from,char *to, unsigned maxlen); #define NUM 0 #define STR 1 +#if !defined(__LP64__) && !defined(__arm__) boolean_t PE_parse_boot_arg( const char *arg_string, @@ -50,6 +51,7 @@ PE_parse_boot_arg( return PE_parse_boot_argn(arg_string, arg_ptr, max_len); } +#endif boolean_t PE_parse_boot_argn( @@ -69,7 +71,7 @@ PE_parse_boot_argn( arg_found = FALSE; - while(isargsep(*args)) args++; + while(*args && isargsep(*args)) args++; while (*args) { @@ -95,7 +97,7 @@ PE_parse_boot_argn( arg_found = TRUE; break; } else { - while (isargsep (*cp)) + while (*cp && isargsep (*cp)) cp++; if (*cp == '=' && c != '=') { args = cp+1; diff --git a/pexpert/gen/device_tree.c b/pexpert/gen/device_tree.c index 0ea525699..dc3ea9ddc 100644 --- a/pexpert/gen/device_tree.c +++ b/pexpert/gen/device_tree.c @@ -44,8 +44,8 @@ #define NULL ((void *) 0) #endif -#define round_long(x) (((x) + 3) & -4) -#define next_prop(x) ((DeviceTreeNodeProperty *) (((int)x) + sizeof(DeviceTreeNodeProperty) + round_long(x->length))) +#define round_long(x) (((x) + 3UL) & ~(3UL)) +#define next_prop(x) ((DeviceTreeNodeProperty *) (((uintptr_t)x) + sizeof(DeviceTreeNodeProperty) + round_long(x->length))) /* Entry*/ typedef DeviceTreeNode *RealDTEntry; @@ -200,7 +200,7 @@ int DTFindEntry(const char *propName, const char *propValue, DTEntry *entryH) int find_entry(const char *propName, const char *propValue, DTEntry *entryH) { - DeviceTreeNode *nodeP = (DeviceTreeNode *) startingP; + DeviceTreeNode *nodeP = (DeviceTreeNode *) (void *) startingP; unsigned int k; if (nodeP->nProperties == 0) return(kError); // End of the list of nodes @@ -208,7 +208,7 @@ int find_entry(const char *propName, const char *propValue, DTEntry *entryH) // Search current entry for (k = 0; k < nodeP->nProperties; ++k) { - DeviceTreeNodeProperty *propP = (DeviceTreeNodeProperty *) startingP; + DeviceTreeNodeProperty *propP = (DeviceTreeNodeProperty *) (void *) startingP; startingP += sizeof (*propP) + ((propP->length + 3) & -4); @@ -410,7 +410,7 @@ DTGetProperty(const DTEntry entry, const char *propertyName, void **propertyValu prop = (DeviceTreeNodeProperty *) (entry + 1); for (k = 0; k < entry->nProperties; k++) { if (strcmp(prop->name, propertyName) == 0) { - *propertyValue = (void *) (((int)prop) + *propertyValue = (void *) (((uintptr_t)prop) + sizeof(DeviceTreeNodeProperty)); *propertySize = prop->length; return kSuccess; diff --git a/pexpert/i386/kd.c b/pexpert/i386/kd.c deleted file mode 100644 index 7424ed2e4..000000000 --- a/pexpert/i386/kd.c +++ /dev/null @@ -1,157 +0,0 @@ -/* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ - * - * This file contains Original Code and/or Modifications of Original Code - * as defined in and that are subject to the Apple Public Source License - * Version 2.0 (the 'License'). You may not use this file except in - * compliance with the License. The rights granted to you under the License - * may not be used to create, or enable the creation or redistribution of, - * unlawful or unlicensed copies of an Apple operating system, or to - * circumvent, violate, or enable the circumvention or violation of, any - * terms of an Apple operating system software license agreement. - * - * Please obtain a copy of the License at - * http://www.opensource.apple.com/apsl/ and read it before using this file. - * - * The Original Code and all software distributed under the License are - * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER - * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, - * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. - * Please see the License for the specific language governing rights and - * limitations under the License. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ - */ -/* - * @OSF_COPYRIGHT@ - */ -/* - */ - -/* - * Olivetti Mach Console driver v0.0 - * Copyright Ing. C. Olivetti & C. S.p.A. 1988, 1989 - * All rights reserved. - * - */ -/* - * Copyright 1988, 1989 by Olivetti Advanced Technology Center, Inc., - * Cupertino, California. - * - * All Rights Reserved - * - * Permission to use, copy, modify, and distribute this software and - * its documentation for any purpose and without fee is hereby - * granted, provided that the above copyright notice appears in all - * copies and that both the copyright notice and this permission notice - * appear in supporting documentation, and that the name of Olivetti - * not be used in advertising or publicity pertaining to distribution - * of the software without specific, written prior permission. - * - * OLIVETTI DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE - * INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, - * IN NO EVENT SHALL OLIVETTI BE LIABLE FOR ANY SPECIAL, INDIRECT, OR - * CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM - * LOSS OF USE, DATA OR PROFITS, WHETHER IN ACTION OF CONTRACT, - * NEGLIGENCE, OR OTHER TORTIOUS ACTION, ARISING OUR OF OR IN CONNECTION - * WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. - * - * - * Copyright 1988, 1989 by Intel Corporation, Santa Clara, California. - * - * All Rights Reserved - * - * Permission to use, copy, modify, and distribute this software and - * its documentation for any purpose and without fee is hereby - * granted, provided that the above copyright notice appears in all - * copies and that both the copyright notice and this permission notice - * appear in supporting documentation, and that the name of Intel - * not be used in advertising or publicity pertaining to distribution - * of the software without specific, written prior permission. - * - * INTEL DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE - * INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, - * IN NO EVENT SHALL INTEL BE LIABLE FOR ANY SPECIAL, INDIRECT, OR - * CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM - * LOSS OF USE, DATA OR PROFITS, WHETHER IN ACTION OF CONTRACT, - * NEGLIGENCE, OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION - * WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. - */ - -/* $ Header: $ */ - -#include - -extern void cpu_shutdown(void); - -void kdreboot(void); - -/* - * Common I/O ports. - */ -#define K_RDWR 0x60 /* keyboard data & cmds (read/write) */ -#define K_STATUS 0x64 /* keybd status (read-only) */ -#define K_CMD 0x64 /* keybd ctlr command (write-only) */ - -/* - * Bit definitions for K_STATUS port. - */ -#define K_OBUF_FUL 0x01 /* output (from keybd) buffer full */ -#define K_IBUF_FUL 0x02 /* input (to keybd) buffer full */ -#define K_SYSFLAG 0x04 /* "System Flag" */ -#define K_CMD_DATA 0x08 /* 1 = input buf has cmd, 0 = data */ -#define K_KBD_INHBT 0x10 /* 0 if keyboard inhibited */ -#define K_XMT_TIMEOUT 0x20 /* Transmit time out */ -#define K_RCV_TIMEOUT 0x40 /* Receive time out */ - -/* - * Keyboard controller commands (sent to K_CMD port). - */ -#define K_CMD_READ 0x20 /* read controller command byte */ -#define K_CMD_WRITE 0x60 /* write controller command byte */ -#define K_CMD_TEST 0xab /* test interface */ -#define K_CMD_DUMP 0xac /* diagnostic dump */ -#define K_CMD_DISBLE 0xad /* disable keyboard */ -#define K_CMD_ENBLE 0xae /* enable keyboard */ -#define K_CMD_RDKBD 0xc4 /* read keyboard ID */ -#define K_CMD_ECHO 0xee /* used for diagnostic testing */ -#define K_CMD_RESET 0xfe /* issue a system reset */ - -/* - * kd_sendcmd - * - * This function sends a command byte to the keyboard command - * port, but first waits until the input/output data buffer is - * clear before sending the data. - * - */ - -static void -kd_sendcmd(unsigned char ch) -{ - while (inb(K_STATUS) & K_IBUF_FUL); - outb(K_CMD, ch); -} - -/* - * kdreboot - * - * Send a command to the motherboard keyboard controller to - * issue a hardware reset. - */ - -void -kdreboot(void) -{ - kd_sendcmd( K_CMD_RESET ); - - /* - * DRAT. We're still here. Let's try a "CPU shutdown", which consists - * of clearing the IDTR and causing an exception. It's in locore.s - */ - cpu_shutdown(); - /*NOTREACHED*/ -} diff --git a/pexpert/i386/pe_init.c b/pexpert/i386/pe_init.c index feb0a8843..614382096 100644 --- a/pexpert/i386/pe_init.c +++ b/pexpert/i386/pe_init.c @@ -31,6 +31,7 @@ */ #include #include +#include #include #include #include @@ -74,6 +75,7 @@ int PE_initialize_console( PE_Video * info, int op ) case kPEEnableScreen: initialize_screen(info, op); + if (info) PE_state.video = *info; kprintf("kPEEnableScreen %d\n", last_console); if( last_console != -1) switch_to_old_console( last_console); @@ -101,7 +103,7 @@ void PE_init_iokit(void) boolean_t norootInitialized = FALSE; DTEntry entry; unsigned int size; - void ** map; + uint32_t *map; boot_progress_element *bootPict; PE_init_kprintf(TRUE); @@ -115,12 +117,14 @@ void PE_init_iokit(void) if( kSuccess == DTLookupEntry(NULL, "/chosen/memory-map", &entry)) { if( kSuccess == DTGetProperty(entry, "BootCLUT", (void **) &map, &size)) { - bcopy( map[0], appleClut8, sizeof(appleClut8) ); - bootClutInitialized = TRUE; - } + if (sizeof(appleClut8) <= map[1]) { + bcopy( (void *)ml_static_ptovirt(map[0]), appleClut8, sizeof(appleClut8) ); + bootClutInitialized = TRUE; + } + } if( kSuccess == DTGetProperty(entry, "Pict-FailedBoot", (void **) &map, &size)) { - bootPict = (boot_progress_element *) map[0]; + bootPict = (boot_progress_element *) ml_static_ptovirt(map[0]); default_noroot.width = bootPict->width; default_noroot.height = bootPict->height; default_noroot.dx = 0; @@ -165,8 +169,8 @@ void PE_init_platform(boolean_t vm_initialized, void * _args) // New EFI-style PE_state.bootArgs = _args; - PE_state.deviceTreeHead = (void *) args->deviceTreeP; - PE_state.video.v_baseAddr = args->Video.v_baseAddr; + PE_state.deviceTreeHead = (void *) ml_static_ptovirt(args->deviceTreeP); + PE_state.video.v_baseAddr = args->Video.v_baseAddr; // remains physical address PE_state.video.v_rowBytes = args->Video.v_rowBytes; PE_state.video.v_width = args->Video.v_width; PE_state.video.v_height = args->Video.v_height; diff --git a/pexpert/i386/pe_interrupt.c b/pexpert/i386/pe_interrupt.c index 316bf9780..19b0e003f 100644 --- a/pexpert/i386/pe_interrupt.c +++ b/pexpert/i386/pe_interrupt.c @@ -28,15 +28,12 @@ #include #include #include -#include -#include - #if CONFIG_DTRACE && DEVELOPMENT #include #endif -void PE_incoming_interrupt(x86_saved_state_t *); +void PE_incoming_interrupt(int); struct i386_interrupt_handler { @@ -53,55 +50,25 @@ i386_interrupt_handler_t PE_interrupt_handler; void -PE_incoming_interrupt(x86_saved_state_t *state) +PE_incoming_interrupt(int interrupt) { i386_interrupt_handler_t *vector; - uint64_t rip; - int interrupt; - boolean_t user_mode = FALSE; - - if (is_saved_state64(state) == TRUE) { - x86_saved_state64_t *state64; - - state64 = saved_state64(state); - rip = state64->isf.rip; - interrupt = state64->isf.trapno; - user_mode = TRUE; - } else { - x86_saved_state32_t *state32; - - state32 = saved_state32(state); - if (state32->cs & 0x03) - user_mode = TRUE; - rip = state32->eip; - interrupt = state32->trapno; - } - - KERNEL_DEBUG_CONSTANT( - MACHDBG_CODE(DBG_MACH_EXCP_INTR, 0) | DBG_FUNC_START, - interrupt, (unsigned int)rip, user_mode, 0, 0); vector = &PE_interrupt_handler; #if CONFIG_DTRACE && DEVELOPMENT - DTRACE_INT5(interrupt_start, void *, vector->nub, int, 0, - void *, vector->target, IOInterruptHandler, vector->handler, - void *, vector->refCon); + DTRACE_INT5(interrupt_start, void *, vector->nub, int, 0, + void *, vector->target, IOInterruptHandler, vector->handler, + void *, vector->refCon); #endif - if (!lapic_interrupt(interrupt, state)) { - vector->handler(vector->target, NULL, vector->nub, interrupt); - } + vector->handler(vector->target, NULL, vector->nub, interrupt); #if CONFIG_DTRACE && DEVELOPMENT - DTRACE_INT5(interrupt_complete, void *, vector->nub, int, 0, - void *, vector->target, IOInterruptHandler, vector->handler, - void *, vector->refCon); + DTRACE_INT5(interrupt_complete, void *, vector->nub, int, 0, + void *, vector->target, IOInterruptHandler, vector->handler, + void *, vector->refCon); #endif - - KERNEL_DEBUG_CONSTANT( - MACHDBG_CODE(DBG_MACH_EXCP_INTR, 0) | DBG_FUNC_END, - 0, 0, 0, 0, 0); } void PE_install_interrupt_handler(void *nub, diff --git a/pexpert/i386/pe_kprintf.c b/pexpert/i386/pe_kprintf.c index 84855f63e..b25bbcf28 100644 --- a/pexpert/i386/pe_kprintf.c +++ b/pexpert/i386/pe_kprintf.c @@ -39,7 +39,14 @@ /* Globals */ void (*PE_kputc)(char c); +#if DEBUG +/* DEBUG kernel starts with true serial, but + * may later disable or switch to video + * console */ +unsigned int disable_serial_output = FALSE; +#else unsigned int disable_serial_output = TRUE; +#endif decl_simple_lock_data(static, kprintf_lock) @@ -51,19 +58,30 @@ void PE_init_kprintf(boolean_t vm_initialized) panic("Platform Expert not initialized"); if (!vm_initialized) { + unsigned int new_disable_serial_output = TRUE; + simple_lock_init(&kprintf_lock, 0); if (PE_parse_boot_argn("debug", &boot_arg, sizeof (boot_arg))) if (boot_arg & DB_KPRT) - disable_serial_output = FALSE; + new_disable_serial_output = FALSE; - if (!disable_serial_output && serial_init()) + /* If we are newly enabling serial, make sure we only call serial_init() + * if our previous state was not enabled */ + if (!new_disable_serial_output && (!disable_serial_output || serial_init())) PE_kputc = serial_putc; else PE_kputc = cnputc; + + disable_serial_output = new_disable_serial_output; } } +#if CONFIG_NO_KPRINTF_STRINGS +/* Prevent CPP from breaking the definition below */ +#undef kprintf +#endif + #ifdef MP_DEBUG static void _kprintf(const char *format, ...) { @@ -86,6 +104,15 @@ void kprintf(const char *fmt, ...) if (!disable_serial_output) { + /* If PE_kputc has not yet been initialized, don't + * take any locks, just dump to serial */ + if (!PE_kputc) { + va_start(listp, fmt); + _doprnt(fmt, &listp, serial_putc, 16); + va_end(listp); + return; + } + /* * Spin to get kprintf lock but re-enable interrupts while * failing. diff --git a/pexpert/i386/pe_serial.c b/pexpert/i386/pe_serial.c index ddb48b162..cba4e1b59 100644 --- a/pexpert/i386/pe_serial.c +++ b/pexpert/i386/pe_serial.c @@ -159,7 +159,7 @@ int serial_init( void ) { unsigned serial_baud_rate = 0; - if ( /*uart_initted ||*/ uart_probe() == 0 ) return 0; + if ( uart_probe() == 0 ) return 0; /* Disable hardware interrupts */ @@ -198,6 +198,7 @@ int serial_init( void ) return 1; } + void serial_putc( char c ) { uart_putc(c); diff --git a/pexpert/i386/pe_spl.c b/pexpert/i386/pe_spl.c deleted file mode 100644 index b72ae8bd9..000000000 --- a/pexpert/i386/pe_spl.c +++ /dev/null @@ -1,84 +0,0 @@ -/* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ - * - * This file contains Original Code and/or Modifications of Original Code - * as defined in and that are subject to the Apple Public Source License - * Version 2.0 (the 'License'). You may not use this file except in - * compliance with the License. The rights granted to you under the License - * may not be used to create, or enable the creation or redistribution of, - * unlawful or unlicensed copies of an Apple operating system, or to - * circumvent, violate, or enable the circumvention or violation of, any - * terms of an Apple operating system software license agreement. - * - * Please obtain a copy of the License at - * http://www.opensource.apple.com/apsl/ and read it before using this file. - * - * The Original Code and all software distributed under the License are - * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER - * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, - * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. - * Please see the License for the specific language governing rights and - * limitations under the License. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ - */ -#include - - -typedef unsigned long spl_t; - -spl_t PE_set_spl(spl_t x); - -spl_t splhi() { return PE_set_spl(8); } -spl_t splhigh() { return PE_set_spl(8); } -spl_t splclock() { return PE_set_spl(8); } -spl_t splvm() { return PE_set_spl(8); } -spl_t splsched() { return PE_set_spl(8); } -spl_t splimp() { return PE_set_spl(6); } -void splx(spl_t x) { (void) PE_set_spl(x); } -spl_t splnet() { return PE_set_spl(6); } -void spllo() { (void) PE_set_spl(0); } -spl_t spl1() { return PE_set_spl(1); } -spl_t spl2() { return PE_set_spl(2); } -spl_t spl3() { return PE_set_spl(3); } -spl_t spl4() { return PE_set_spl(4); } -spl_t spl5() { return PE_set_spl(5); } -spl_t spl6() { return PE_set_spl(6); } -spl_t splbio() { return PE_set_spl(5); } -spl_t spltty() { return PE_set_spl(6); } - -spl_t sploff() { return PE_set_spl(8); } -void splon(spl_t x) { (void) PE_set_spl(x); } - -spl_t PE_set_spl(spl_t lvl) -{ - spl_t old_level; - int mycpu; - - - __asm__ volatile("cli"); - - mycpu = cpu_number(); - old_level = cpu_data[mycpu].spl_level; - cpu_data[mycpu].spl_level = lvl ; - - if (!lvl) __asm__ volatile("sti"); - - return old_level; -} - -void PE_set_spl_no_interrupt(spl_t lvl) -{ - int mycpu; - - __asm__ volatile("cli"); - - mycpu = cpu_number(); - cpu_data[mycpu].spl_level = lvl ; - - return; -} - diff --git a/pexpert/pexpert/Makefile b/pexpert/pexpert/Makefile index 3c1284231..82be8207d 100644 --- a/pexpert/pexpert/Makefile +++ b/pexpert/pexpert/Makefile @@ -16,6 +16,9 @@ INSTINC_SUBDIRS_PPC = \ INSTINC_SUBDIRS_I386 = \ i386 +INSTINC_SUBDIRS_X86_64 = \ + i386 + INSTINC_SUBDIRS_ARM = \ arm @@ -25,6 +28,8 @@ EXPINC_SUBDIRS_PPC = ${INSTINC_SUBDIRS_PPC} EXPINC_SUBDIRS_I386 = ${INSTINC_SUBDIRS_I386} +EXPINC_SUBDIRS_X86_64 = ${INSTINC_SUBDIRS_X86_64} + EXPINC_SUBDIRS_ARM = ${INSTINC_SUBDIRS_ARM} DATAFILES = \ diff --git a/pexpert/pexpert/device_tree.h b/pexpert/pexpert/device_tree.h index cb5dda3b6..e68a85588 100644 --- a/pexpert/pexpert/device_tree.h +++ b/pexpert/pexpert/device_tree.h @@ -90,14 +90,14 @@ Structures for a Flattened Device Tree typedef struct DeviceTreeNodeProperty { char name[kPropNameLength]; // NUL terminated property name - unsigned long length; // Length (bytes) of folloing prop value + uint32_t length; // Length (bytes) of folloing prop value // unsigned long value[1]; // Variable length value of property // Padded to a multiple of a longword? } DeviceTreeNodeProperty; typedef struct OpaqueDTEntry { - unsigned long nProperties; // Number of props[] elements (0 => end) - unsigned long nChildren; // Number of children[] elements + uint32_t nProperties; // Number of props[] elements (0 => end) + uint32_t nChildren; // Number of children[] elements // DeviceTreeNodeProperty props[];// array size == nProperties // DeviceTreeNode children[]; // array size == nChildren } DeviceTreeNode; diff --git a/pexpert/pexpert/i386/Makefile b/pexpert/pexpert/i386/Makefile index 181965c90..aba89e1ca 100644 --- a/pexpert/pexpert/i386/Makefile +++ b/pexpert/pexpert/i386/Makefile @@ -10,7 +10,6 @@ include $(MakeInc_def) DATAFILES = \ boot.h \ efi.h \ - fb_entries.h \ protos.h INSTALL_MD_LIST = ${DATAFILES} diff --git a/pexpert/pexpert/i386/boot.h b/pexpert/pexpert/i386/boot.h index 73254f6e4..673063156 100644 --- a/pexpert/pexpert/i386/boot.h +++ b/pexpert/pexpert/i386/boot.h @@ -43,27 +43,6 @@ enum { kBootDriverTypeMKEXT = 2 }; -/* - * Video information. - */ -struct boot_video { - uint32_t v_baseAddr; // Base address of video memory - uint32_t v_display; // Display Code - uint32_t v_rowBytes; // Number of bytes per pixel row - uint32_t v_width; // Width - uint32_t v_height; // Height - uint32_t v_depth; // Pixel Depth -}; - -typedef struct boot_video boot_video; - -/* Values for v_display */ - -#define VGA_TEXT_MODE 0 -#define GRAPHICS_MODE 1 -#define FB_TEXT_MODE 2 - - enum { kEfiReservedMemoryType = 0, kEfiLoaderCode = 1, @@ -87,7 +66,7 @@ enum { */ typedef struct EfiMemoryRange { uint32_t Type; - uint32_t pad; + uint32_t Pad; uint64_t PhysicalStart; uint64_t VirtualStart; uint64_t NumberOfPages; @@ -108,47 +87,59 @@ struct Boot_Video { uint32_t v_width; /* Width */ uint32_t v_height; /* Height */ uint32_t v_depth; /* Pixel Depth */ -} __attribute__((aligned(4))); +}; typedef struct Boot_Video Boot_Video; +/* Values for v_display */ + +#define GRAPHICS_MODE 1 +#define FB_TEXT_MODE 2 /* Boot argument structure - passed into Mach kernel at boot time. + * "Revision" can be incremented for compatible changes */ -#define kBootArgsRevision 4 +#define kBootArgsRevision 5 #define kBootArgsVersion 1 +/* Snapshot constants of previous revisions that are supported */ +#define kBootArgsVersion1 1 +#define kBootArgsRevision1_4 4 +#define kBootArgsRevision1_5 5 + #define kBootArgsEfiMode32 32 #define kBootArgsEfiMode64 64 typedef struct boot_args { - uint16_t Revision; /* Revision of boot_args structure */ - uint16_t Version; /* Version of boot_args structure */ + uint16_t Revision; /* Revision of boot_args structure */ + uint16_t Version; /* Version of boot_args structure */ - char CommandLine[BOOT_LINE_LENGTH]; /* Passed in command line */ + char CommandLine[BOOT_LINE_LENGTH]; /* Passed in command line */ - uint32_t MemoryMap; + uint32_t MemoryMap; /* Physical address of memory map */ uint32_t MemoryMapSize; uint32_t MemoryMapDescriptorSize; uint32_t MemoryMapDescriptorVersion; Boot_Video Video; /* Video Information */ - uint32_t deviceTreeP; /* Base of flattened device tree */ - uint32_t deviceTreeLength;/* Length of flattened tree */ + uint32_t deviceTreeP; /* Physical address of flattened device tree */ + uint32_t deviceTreeLength; /* Length of flattened tree */ - uint32_t kaddr; - uint32_t ksize; + uint32_t kaddr; /* Physical address of beginning of kernel text */ + uint32_t ksize; /* Size of combined kernel text+data+efi */ - uint32_t efiRuntimeServicesPageStart; + uint32_t efiRuntimeServicesPageStart; /* physical address of defragmented runtime pages */ uint32_t efiRuntimeServicesPageCount; - uint32_t efiSystemTable; + uint32_t efiSystemTable; /* physical address of system table in runtime area */ uint8_t efiMode; /* 32 = 32-bit, 64 = 64-bit */ uint8_t __reserved1[3]; - uint32_t __reserved2[7]; + uint32_t __reserved2[3]; + uint64_t efiRuntimeServicesVirtualPageStart; /* virtual address of defragmented runtime pages */ + uint32_t __reserved3[2]; -} __attribute__((aligned(4))) boot_args; +} boot_args; #endif /* _PEXPERT_I386_BOOT_H */ diff --git a/pexpert/pexpert/i386/efi.h b/pexpert/pexpert/i386/efi.h index c345f761b..5ef7a5bf6 100644 --- a/pexpert/pexpert/i386/efi.h +++ b/pexpert/pexpert/i386/efi.h @@ -29,12 +29,14 @@ #ifndef _PEXPERT_I386_EFI_H #define _PEXPERT_I386_EFI_H +#include + typedef uint8_t EFI_UINT8; typedef uint16_t EFI_UINT16; typedef uint32_t EFI_UINT32; typedef uint64_t EFI_UINT64; -typedef uint32_t EFI_UINTN; +typedef uint32_t EFI_UINTN; /* natural size for firmware, not kernel */ typedef int8_t EFI_INT8; typedef int16_t EFI_INT16; @@ -47,9 +49,11 @@ typedef int32_t EFI_CHAR32; typedef int64_t EFI_CHAR64; typedef uint32_t EFI_STATUS; -typedef boolean_t EFI_BOOLEAN; +typedef uint8_t EFI_BOOLEAN; typedef void VOID; -typedef VOID * EFI_HANDLE; + +typedef uint32_t EFI_PTR32; +typedef uint32_t EFI_HANDLE32; typedef uint64_t EFI_PTR64; typedef uint64_t EFI_HANDLE64; @@ -141,7 +145,7 @@ typedef struct { typedef union { EFI_GUID Guid; - EFI_UINT8 Raw[16]; + EFI_UINT8 Raw[16]; } EFI_GUID_UNION; // @@ -405,29 +409,29 @@ typedef struct { // // Time services // - EFI_GET_TIME GetTime; - EFI_SET_TIME SetTime; - EFI_GET_WAKEUP_TIME GetWakeupTime; - EFI_SET_WAKEUP_TIME SetWakeupTime; + EFI_PTR32 GetTime; + EFI_PTR32 SetTime; + EFI_PTR32 GetWakeupTime; + EFI_PTR32 SetWakeupTime; // // Virtual memory services // - EFI_SET_VIRTUAL_ADDRESS_MAP SetVirtualAddressMap; - EFI_CONVERT_POINTER ConvertPointer; + EFI_PTR32 SetVirtualAddressMap; + EFI_PTR32 ConvertPointer; // // Variable services // - EFI_GET_VARIABLE GetVariable; - EFI_GET_NEXT_VARIABLE_NAME GetNextVariableName; - EFI_SET_VARIABLE SetVariable; + EFI_PTR32 GetVariable; + EFI_PTR32 GetNextVariableName; + EFI_PTR32 SetVariable; // // Misc // - EFI_GET_NEXT_HIGH_MONO_COUNT GetNextHighMonotonicCount; - EFI_RESET_SYSTEM ResetSystem; + EFI_PTR32 GetNextHighMonotonicCount; + EFI_PTR32 ResetSystem; #ifdef TIANO_EXTENSION_FLAG // @@ -435,10 +439,10 @@ typedef struct { // Extended EFI Services ////////////////////////////////////////////////////// // - EFI_REPORT_STATUS_CODE ReportStatusCode; + EFI_PTR32 ReportStatusCode; #endif -} __attribute__((aligned(8))) EFI_RUNTIME_SERVICES; +} __attribute__((aligned(8))) EFI_RUNTIME_SERVICES_32; typedef struct { EFI_TABLE_HEADER Hdr; @@ -446,29 +450,29 @@ typedef struct { // // Time services // - EFI_PTR64 GetTime; - EFI_PTR64 SetTime; - EFI_PTR64 GetWakeupTime; - EFI_PTR64 SetWakeupTime; + EFI_PTR64 GetTime; + EFI_PTR64 SetTime; + EFI_PTR64 GetWakeupTime; + EFI_PTR64 SetWakeupTime; // // Virtual memory services // - EFI_PTR64 SetVirtualAddressMap; - EFI_PTR64 ConvertPointer; + EFI_PTR64 SetVirtualAddressMap; + EFI_PTR64 ConvertPointer; // // Variable services // - EFI_PTR64 GetVariable; - EFI_PTR64 GetNextVariableName; - EFI_PTR64 SetVariable; + EFI_PTR64 GetVariable; + EFI_PTR64 GetNextVariableName; + EFI_PTR64 SetVariable; // // Misc // - EFI_PTR64 GetNextHighMonotonicCount; - EFI_PTR64 ResetSystem; + EFI_PTR64 GetNextHighMonotonicCount; + EFI_PTR64 ResetSystem; #ifdef TIANO_EXTENSION_FLAG // @@ -476,7 +480,7 @@ typedef struct { // Extended EFI Services ////////////////////////////////////////////////////// // - EFI_PTR64 ReportStatusCode; + EFI_PTR64 ReportStatusCode; #endif } __attribute__((aligned(8))) EFI_RUNTIME_SERVICES_64; @@ -486,8 +490,8 @@ typedef struct { // typedef struct { EFI_GUID VendorGuid; - VOID *VendorTable; -} EFI_CONFIGURATION_TABLE; + EFI_PTR32 VendorTable; +} EFI_CONFIGURATION_TABLE_32; typedef struct { EFI_GUID VendorGuid; @@ -503,28 +507,28 @@ typedef struct { #define EFI_1_02_SYSTEM_TABLE_REVISION ((1 << 16) | 02) #define EFI_1_10_SYSTEM_TABLE_REVISION ((1 << 16) | 10) -typedef struct EFI_SYSTEM_TABLE { +typedef struct EFI_SYSTEM_TABLE_32 { EFI_TABLE_HEADER Hdr; - EFI_CHAR16 *FirmwareVendor; + EFI_PTR32 FirmwareVendor; EFI_UINT32 FirmwareRevision; - EFI_HANDLE ConsoleInHandle; - VOID *ConIn; + EFI_HANDLE32 ConsoleInHandle; + EFI_PTR32 ConIn; - EFI_HANDLE ConsoleOutHandle; - VOID *ConOut; + EFI_HANDLE32 ConsoleOutHandle; + EFI_PTR32 ConOut; - EFI_HANDLE StandardErrorHandle; - VOID *StdErr; + EFI_HANDLE32 StandardErrorHandle; + EFI_PTR32 StdErr; - EFI_RUNTIME_SERVICES *RuntimeServices; - VOID *BootServices; + EFI_PTR32 RuntimeServices; + EFI_PTR32 BootServices; - EFI_UINTN NumberOfTableEntries; - EFI_CONFIGURATION_TABLE *ConfigurationTable; + EFI_UINT32 NumberOfTableEntries; + EFI_PTR32 ConfigurationTable; -} __attribute__((aligned(8))) EFI_SYSTEM_TABLE; +} __attribute__((aligned(8))) EFI_SYSTEM_TABLE_32; typedef struct EFI_SYSTEM_TABLE_64 { EFI_TABLE_HEADER Hdr; @@ -535,16 +539,16 @@ typedef struct EFI_SYSTEM_TABLE_64 { EFI_UINT32 __pad; EFI_HANDLE64 ConsoleInHandle; - EFI_PTR64 ConIn; + EFI_PTR64 ConIn; EFI_HANDLE64 ConsoleOutHandle; - EFI_PTR64 ConOut; + EFI_PTR64 ConOut; EFI_HANDLE64 StandardErrorHandle; - EFI_PTR64 StdErr; + EFI_PTR64 StdErr; EFI_PTR64 RuntimeServices; - EFI_PTR64 BootServices; + EFI_PTR64 BootServices; EFI_UINT64 NumberOfTableEntries; EFI_PTR64 ConfigurationTable; diff --git a/pexpert/pexpert/i386/fb_entries.h b/pexpert/pexpert/i386/fb_entries.h deleted file mode 100644 index d9c14341a..000000000 --- a/pexpert/pexpert/i386/fb_entries.h +++ /dev/null @@ -1,40 +0,0 @@ -/* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ - * - * This file contains Original Code and/or Modifications of Original Code - * as defined in and that are subject to the Apple Public Source License - * Version 2.0 (the 'License'). You may not use this file except in - * compliance with the License. The rights granted to you under the License - * may not be used to create, or enable the creation or redistribution of, - * unlawful or unlicensed copies of an Apple operating system, or to - * circumvent, violate, or enable the circumvention or violation of, any - * terms of an Apple operating system software license agreement. - * - * Please obtain a copy of the License at - * http://www.opensource.apple.com/apsl/ and read it before using this file. - * - * The Original Code and all software distributed under the License are - * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER - * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, - * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. - * Please see the License for the specific language governing rights and - * limitations under the License. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ - */ -#ifndef _I386_FD_ENTRIES_H_ -#define _I386_FD_ENTRIES_H_ - -#include - -/* test for presence of linear framebuffer */ -extern boolean_t fb_present(void); -/* initialize framebuffer */ -extern void fb_init(void); -/* prepare for reboot */ -extern void fb_reset(void); - -#endif /* _I386_FD_ENTRIES_H_ */ diff --git a/pexpert/pexpert/i386/kd_entries.h b/pexpert/pexpert/i386/kd_entries.h deleted file mode 100644 index ce5d6ee84..000000000 --- a/pexpert/pexpert/i386/kd_entries.h +++ /dev/null @@ -1,109 +0,0 @@ -/* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ - * - * This file contains Original Code and/or Modifications of Original Code - * as defined in and that are subject to the Apple Public Source License - * Version 2.0 (the 'License'). You may not use this file except in - * compliance with the License. The rights granted to you under the License - * may not be used to create, or enable the creation or redistribution of, - * unlawful or unlicensed copies of an Apple operating system, or to - * circumvent, violate, or enable the circumvention or violation of, any - * terms of an Apple operating system software license agreement. - * - * Please obtain a copy of the License at - * http://www.opensource.apple.com/apsl/ and read it before using this file. - * - * The Original Code and all software distributed under the License are - * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER - * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, - * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. - * Please see the License for the specific language governing rights and - * limitations under the License. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ - */ -#ifndef _PEXPERT_I386_KD_ENTRIES_H_ -#define _PEXPERT_I386_KD_ENTRIES_H_ -/* - * @OSF_COPYRIGHT@ - */ -typedef int io_return_t; -typedef unsigned int dev_mode_t; -typedef unsigned int dev_flavor_t; -typedef int *dev_status_t; - -extern io_return_t kdopen( - dev_t dev, - dev_mode_t flag, - io_req_t ior); -extern void kdclose( - dev_t dev); -extern io_return_t kdread( - dev_t dev, - io_req_t ior); -extern io_return_t kdwrite( - dev_t dev, - io_req_t ior); -extern vm_offset_t kdmmap( - dev_t dev, - vm_offset_t off, - vm_prot_t prot); -extern boolean_t kdportdeath( - dev_t dev, - ipc_port_t port); -extern io_return_t kdgetstat( - dev_t dev, - dev_flavor_t flavor, - dev_status_t data, - natural_t *count); -extern io_return_t kdsetstat( - dev_t dev, - dev_flavor_t flavor, - dev_status_t data, - natural_t count); -extern void kd_cmdreg_write( - u_char val); -extern int kd_mouse_write( - u_char val); -extern void kd_mouse_read( - int no, - char * bufp); -extern void kd_mouse_drain(void); -extern void kdreboot(void); -extern void bmpput( - csrpos_t pos, - char ch, - char chattr); -extern void bmpmvup( - csrpos_t from, - csrpos_t to, - int count); -extern void bmpmvdown( - csrpos_t from, - csrpos_t to, - int count); -extern void bmpclear( - csrpos_t to, - int count, - char chattr); -extern void bmpsetsetcursor( - csrpos_t pos); -extern void kd_slmscu( - u_char * from, - u_char * to, - int count); -extern void kd_slmscd( - u_char * from, - u_char * to, - int count); -extern void kd_slmwd( - u_char * pos, - int count, - u_short val); -extern void kd_sendcmd( - u_char c); - -#endif /* _PEXPERT_POWERMAC_PDM_H_ */ diff --git a/pexpert/pexpert/i386/kdsoft.h b/pexpert/pexpert/i386/kdsoft.h deleted file mode 100644 index 399fa9e6e..000000000 --- a/pexpert/pexpert/i386/kdsoft.h +++ /dev/null @@ -1,272 +0,0 @@ -/* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ - * - * This file contains Original Code and/or Modifications of Original Code - * as defined in and that are subject to the Apple Public Source License - * Version 2.0 (the 'License'). You may not use this file except in - * compliance with the License. The rights granted to you under the License - * may not be used to create, or enable the creation or redistribution of, - * unlawful or unlicensed copies of an Apple operating system, or to - * circumvent, violate, or enable the circumvention or violation of, any - * terms of an Apple operating system software license agreement. - * - * Please obtain a copy of the License at - * http://www.opensource.apple.com/apsl/ and read it before using this file. - * - * The Original Code and all software distributed under the License are - * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER - * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, - * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. - * Please see the License for the specific language governing rights and - * limitations under the License. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ - */ -#ifndef _PEXPERT_I386_KDSOFT_H_ -#define _PEXPERT_I386_KDSOFT_H_ - -/* - * Mach Operating System - * Copyright (c) 1991,1990,1989 Carnegie Mellon University - * All Rights Reserved. - * - * Permission to use, copy, modify and distribute this software and its - * documentation is hereby granted, provided that both the copyright - * notice and this permission notice appear in all copies of the - * software, derivative works or modified versions, and any portions - * thereof, and that both notices appear in supporting documentation. - * - * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" - * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR - * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. - * - * Carnegie Mellon requests users of this software to return to - * - * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU - * School of Computer Science - * Carnegie Mellon University - * Pittsburgh PA 15213-3890 - * - * any improvements or extensions that they make and grant Carnegie Mellon - * the rights to redistribute these changes. - */ -/* - */ - -/* - * File: kdsoft.h - * Description: Software structures for keyboard/display driver, shared with - * drivers for specific graphics cards. - * - * $ Header: $ - * - * Copyright Ing. C. Olivetti & C. S.p.A. 1988, 1989. - * All rights reserved. - * - * Copyright 1988, 1989 by Olivetti Advanced Technology Center, Inc., - * Cupertino, California. - * - * All Rights Reserved - * - * Permission to use, copy, modify, and distribute this software and - * its documentation for any purpose and without fee is hereby - * granted, provided that the above copyright notice appears in all - * copies and that both the copyright notice and this permission notice - * appear in supporting documentation, and that the name of Olivetti - * not be used in advertising or publicity pertaining to distribution - * of the software without specific, written prior permission. - * - * OLIVETTI DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE - * INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, - * IN NO EVENT SHALL OLIVETTI BE LIABLE FOR ANY SPECIAL, INDIRECT, OR - * CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM - * LOSS OF USE, DATA OR PROFITS, WHETHER IN ACTION OF CONTRACT, - * NEGLIGENCE, OR OTHER TORTIOUS ACTION, ARISING OUR OF OR IN CONNECTION - * WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. - */ - -/* - * This driver handles two types of graphics cards. The first type - * (e.g., EGA, CGA), treats the screen as a page of characters and - * has a hardware cursor. The second type (e.g., the Blit) treats the - * screen as a bitmap. A hardware cursor may be present, but it is - * ignored in favor of a software cursor. - * - * - * Most of the driver uses the following abstraction for the display: - * - * The cursor position is simply an index into a (logical) linear char - * array that wraps around at the end of each line. Each character - * takes up ONE_SPACE bytes. Values in [0..ONE_PAGE) are positions in - * the displayed page. Values < 0 and >= ONE_PAGE are off the page - * and require some scrolling to put the cursor back on the page. - * - * The kd_dxxx routines handle the conversion from this abstraction to - * what the hardware requires. - * - * (*kd_dput)(pos, ch, chattr) - * csrpos_t pos; - * char ch, chattr; - * Displays a character at "pos", where "ch" = the character to - * be displayed and "chattr" is its attribute byte. - * - * (*kd_dmvup)(from, to, count) - * csrpos_t from, to; - * int count; - * Does a (relatively) fast block transfer of characters upward. - * "count" is the number of character positions (not bytes) to move. - * "from" is the character position to start moving from (at the start - * of the block to be moved). "to" is the character position to start - * moving to. - * - * (*kd_dmvdown)(from, to, count) - * csrpos_t from, to; - * int count; - * "count" is the number of character positions (not bytes) to move. - * "from" is the character position to start moving from (at the end - * of the block to be moved). "to" is the character position to - * start moving to. - * - * (*kd_dclear)(to, count, chattr) - * csrpos_t, to; - * int count; - * char chattr; - * Erases "count" character positions, starting with "to". - * - * (*kd_dsetcursor)(pos) - * Sets kd_curpos and moves the displayed cursor to track it. "pos" - * should be in the range [0..ONE_PAGE). - * - * (*kd_dreset)() - * In some cases, the boot program expects the display to be in a - * particular state, and doing a soft reset (i.e., - * software-controlled reboot) doesn't put it into that state. For - * these cases, the machine-specific driver should provide a "reset" - * procedure, which will be called just before the kd code causes the - * system to reboot. - */ - -//ERICHACK#include - -/* - * Globals used for both character-based controllers and bitmap-based - * controllers. - */ - -typedef short csrpos_t; /* cursor position, ONE_SPACE bytes per char */ -extern u_char *vid_start; /* VM start of video RAM or frame buffer */ -extern csrpos_t kd_curpos; /* should be set only by kd_setpos */ -extern short kd_lines; /* num lines in tty display */ -extern short kd_cols; -extern char kd_attr; /* current character attribute */ - - -/* - * Globals used only for bitmap-based controllers. - * XXX - probably needs reworking for color. - */ - -/* - * The following font layout is assumed: - * - * The top scan line of all the characters comes first. Then the - * second scan line, then the third, etc. - * - * ------ ... ---------|-----N--------|-------------- ... ----------- - * ------ ... ---------|-----N--------|-------------- ... ----------- - * . - * . - * . - * ------ ... ---------|-----N--------|-------------- ... ----------- - * - * In the picture, each line is a scan line from the font. Each scan - * line is stored in memory immediately after the previous one. The - * bits between the vertical lines are the bits for a single character - * (e.g., the letter "N"). - * There are "char_height" scan lines. Each character is "char_width" - * bits wide. We make the simplifying assumption that characters are - * on byte boundaries. (We also assume that a byte is 8 bits.) - */ - -extern u_char *font_start; /* starting addr of font */ - -extern short fb_width; /* bits in frame buffer scan line */ -extern short fb_height; /* scan lines in frame buffer*/ -extern short char_width; /* bit width of 1 char */ -extern short char_height; /* bit height of 1 char */ -extern short chars_in_font; -extern short cursor_height; /* bit height of cursor */ - /* char_height + cursor_height = line_height */ - -extern u_char char_black; /* 8 black (off) bits */ -extern u_char char_white; /* 8 white (on) bits */ - - -/* - * The tty emulation does not usually require the entire frame buffer. - * (xstart, ystart) is the bit address for the upper left corner of the - * tty "screen". - */ - -extern short xstart, ystart; - - -/* - * Accelerators for bitmap displays. - */ - -extern short char_byte_width; /* char_width/8 */ -extern short fb_byte_width; /* fb_width/8 */ -extern short font_byte_width; /* num bytes in 1 scan line of font */ - -extern void bmpput( - csrpos_t pos, - char ch, - char chattr); -extern void bmpmvup( - csrpos_t from, - csrpos_t to, - int count); -extern void bmpmvdown( - csrpos_t from, - csrpos_t to, - int count); -extern void bmpclear( - csrpos_t to, - int count, - char chattr); -extern void bmpsetcursor( - csrpos_t pos); - -extern void (*kd_dput)( /* put attributed char */ - csrpos_t pos, - char ch, - char chattr); -extern void (*kd_dmvup)( /* block move up */ - csrpos_t from, - csrpos_t to, - int count); -extern void (*kd_dmvdown)( /* block move down */ - csrpos_t from, - csrpos_t to, - int count); -extern void (*kd_dclear)( /* block clear */ - csrpos_t to, - int count, - char chattr); -extern void (*kd_dsetcursor)( - /* set cursor position on displayed page */ - csrpos_t pos); -extern void (*kd_dreset)(void); /* prepare for reboot */ - - -#include - -extern void kdintr( - int vec, - int regs); - -#endif /* _PEXPERT_I386_KDSOFT_H_ */ diff --git a/pexpert/pexpert/machine/boot.h b/pexpert/pexpert/machine/boot.h index e78f39c34..542ee10db 100644 --- a/pexpert/pexpert/machine/boot.h +++ b/pexpert/pexpert/machine/boot.h @@ -30,7 +30,7 @@ #if defined (__ppc__) #include "pexpert/ppc/boot.h" -#elif defined (__i386__) +#elif defined (__i386__) || defined(__x86_64__) #include "pexpert/i386/boot.h" #else #error architecture not supported diff --git a/pexpert/pexpert/machine/protos.h b/pexpert/pexpert/machine/protos.h index 3735c930b..3dd9cbacb 100644 --- a/pexpert/pexpert/machine/protos.h +++ b/pexpert/pexpert/machine/protos.h @@ -30,7 +30,7 @@ #if defined (__ppc__) #include "pexpert/ppc/protos.h" -#elif defined (__i386__) +#elif defined (__i386__) || defined(__x86_64__) #include "pexpert/i386/protos.h" #else #error architecture not supported diff --git a/pexpert/pexpert/pexpert.h b/pexpert/pexpert/pexpert.h index 42e96d977..3dd73dad7 100644 --- a/pexpert/pexpert/pexpert.h +++ b/pexpert/pexpert/pexpert.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2006 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2008 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -60,7 +60,6 @@ void PE_init_platform( - void PE_init_kprintf( boolean_t vm_initialized); @@ -130,9 +129,17 @@ void PE_install_interrupt_handler( void kprintf(const char *fmt, ...) __printflike(1,2); #endif +#if KERNEL_PRIVATE +void _consume_kprintf_args(int, ...); +#endif + #if CONFIG_NO_KPRINTF_STRINGS +#if KERNEL_PRIVATE +#define kprintf(x, ...) _consume_kprintf_args( 0, ## __VA_ARGS__ ) +#else #define kprintf(x, ...) do {} while (0) #endif +#endif void init_display_putc(unsigned char *baseaddr, int rowbytes, int height); void display_putc(char c); @@ -203,14 +210,22 @@ extern int PE_initialize_console( extern void PE_display_icon( unsigned int flags, const char * name ); +#if !CONFIG_EMBEDDED + +extern void +vc_enable_progressmeter(int new_value); +extern void +vc_set_progressmeter(int new_value); +extern int vc_progress_meter_enable; +extern int vc_progress_meter_value; + +#endif /* !CONFIG_EMBEDDED */ + typedef struct PE_state { boolean_t initialized; PE_Video video; void *deviceTreeHead; void *bootArgs; -#if defined(i386) || defined(arm) - void *fakePPCBootArgs; -#endif } PE_state_t; extern PE_state_t PE_state; @@ -218,9 +233,11 @@ extern PE_state_t PE_state; extern char * PE_boot_args( void); +#if !defined(__LP64__) && !defined(__arm__) extern boolean_t PE_parse_boot_arg( const char *arg_string, - void *arg_ptr); + void *arg_ptr) __deprecated; +#endif extern boolean_t PE_parse_boot_argn( const char *arg_string, diff --git a/pexpert/ppc/pe_init.c b/pexpert/ppc/pe_init.c index 11a7a2a1a..6bcf93210 100644 --- a/pexpert/ppc/pe_init.c +++ b/pexpert/ppc/pe_init.c @@ -101,6 +101,7 @@ int PE_initialize_console( PE_Video * info, int op ) case kPEEnableScreen: initialize_screen(info, op); + if (info) PE_state.video = *info; kprintf("kPEEnableScreen %d\n",last_console); if( last_console != -1) switch_to_old_console( last_console); diff --git a/pexpert/ppc/pe_misc.s b/pexpert/ppc/pe_misc.s deleted file mode 100644 index 55c2d4cc1..000000000 --- a/pexpert/ppc/pe_misc.s +++ /dev/null @@ -1,52 +0,0 @@ -/* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ - * - * This file contains Original Code and/or Modifications of Original Code - * as defined in and that are subject to the Apple Public Source License - * Version 2.0 (the 'License'). You may not use this file except in - * compliance with the License. The rights granted to you under the License - * may not be used to create, or enable the creation or redistribution of, - * unlawful or unlicensed copies of an Apple operating system, or to - * circumvent, violate, or enable the circumvention or violation of, any - * terms of an Apple operating system software license agreement. - * - * Please obtain a copy of the License at - * http://www.opensource.apple.com/apsl/ and read it before using this file. - * - * The Original Code and all software distributed under the License are - * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER - * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, - * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. - * Please see the License for the specific language governing rights and - * limitations under the License. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ - */ -#include - -/* -** PE_get_timebase() -** -** Entry - R3 contains pointer to 64 bit structure. -** -** Exit - 64 bit structure filled in. -** -*/ -ENTRY(PE_get_timebase, TAG_NO_FRAME_USED) -loop: - mftbu r4 - mftb r5 - mftbu r6 - cmpw r6, r4 - bne loop - - stw r4, 0(r3) - stw r5, 4(r3) - - blr - - - diff --git a/security/Makefile b/security/Makefile index 34ab8930c..90a42a992 100644 --- a/security/Makefile +++ b/security/Makefile @@ -12,12 +12,16 @@ INSTINC_SUBDIRS_PPC = \ INSTINC_SUBDIRS_I386 = \ +INSTINC_SUBDIRS_X86_64 = \ + EXPINC_SUBDIRS = \ EXPINC_SUBDIRS_PPC = \ EXPINC_SUBDIRS_I386 = \ +EXPINC_SUBDIRS_X86_64 = \ + DATAFILES = \ mac.h \ mac_policy.h diff --git a/security/_label.h b/security/_label.h index 93b57b4f2..9afcf5a0f 100644 --- a/security/_label.h +++ b/security/_label.h @@ -69,7 +69,7 @@ * and various other messes. */ -#define MAC_MAX_SLOTS 8 +#define MAC_MAX_SLOTS 7 #define MAC_FLAG_INITIALIZED 0x0000001 /* Is initialized for use. */ diff --git a/security/conf/MASTER b/security/conf/MASTER index d692d5ae5..18a6f7f1f 100644 --- a/security/conf/MASTER +++ b/security/conf/MASTER @@ -63,4 +63,5 @@ options CONFIG_DTRACE # dtrace support # options CONFIG_NO_PANIC_STRINGS # options CONFIG_NO_PRINTF_STRINGS # options CONFIG_NO_KPRINTF_STRINGS # +options CONFIG_FSE # file system events # diff --git a/security/conf/MASTER.i386 b/security/conf/MASTER.i386 index 1bd463765..524008c7b 100644 --- a/security/conf/MASTER.i386 +++ b/security/conf/MASTER.i386 @@ -4,22 +4,27 @@ # PROFILE = [ RELEASE profile ] # DEBUG = [ RELEASE debug ] # +# # EMBEDDED = [ intel mach libkerncpp audit ] # DEVELOPMENT = [ EMBEDDED config_dtrace ] # ###################################################################### +machine "i386" # +cpu "i386" # + # -# Note: MAC options must be set in all the bsd/conf, osfmk/conf, and +# Note: MAC/AUDIT options must be set in all the bsd/conf, osfmk/conf, and # security/conf MASTER files. # options CONFIG_MACF # Mandatory Access Control Framework options CONFIG_MACF_SOCKET_SUBSET # MACF subset of socket support +options CONFIG_FSE #options CONFIG_MACF_SOCKET #options CONFIG_MACF_NET #options CONFIG_MACF_ALWAYS_LABEL_MBUF #options CONFIG_MACF_DEBUG #options CONFIG_MACF_MACH +options CONFIG_AUDIT # kernel auditing -machine "i386" # -cpu "i386" # +options NO_NESTED_PMAP # diff --git a/security/conf/MASTER.ppc b/security/conf/MASTER.ppc index 534e8d2fc..8b946ff2a 100644 --- a/security/conf/MASTER.ppc +++ b/security/conf/MASTER.ppc @@ -19,11 +19,13 @@ # options CONFIG_MACF # Mandatory Access Control Framework options CONFIG_MACF_SOCKET_SUBSET # MACF subset of socket support +options CONFIG_FSE #options CONFIG_MACF_SOCKET #options CONFIG_MACF_NET #options CONFIG_MACF_ALWAYS_LABEL_MBUF #options CONFIG_MACF_DEBUG #options CONFIG_MACF_MACH +options CONFIG_AUDIT # Kernel auditing machine "ppc" # cpu "ppc" # diff --git a/security/conf/MASTER.x86_64 b/security/conf/MASTER.x86_64 new file mode 100644 index 000000000..86b65c412 --- /dev/null +++ b/security/conf/MASTER.x86_64 @@ -0,0 +1,30 @@ +###################################################################### +# +# RELEASE = [ intel mach libkerncpp config_dtrace audit ] +# PROFILE = [ RELEASE profile ] +# DEBUG = [ RELEASE debug ] +# +# +# EMBEDDED = [ intel mach libkerncpp audit ] +# DEVELOPMENT = [ EMBEDDED ] +# +###################################################################### + +# +# Note: MAC/AUDIT options must be set in all the bsd/conf, osfmk/conf, and +# security/conf MASTER files. +# +options CONFIG_MACF # Mandatory Access Control Framework +options CONFIG_MACF_SOCKET_SUBSET # MACF subset of socket support +options CONFIG_FSE +#options CONFIG_MACF_SOCKET +#options CONFIG_MACF_NET +#options CONFIG_MACF_ALWAYS_LABEL_MBUF +#options CONFIG_MACF_DEBUG +#options CONFIG_MACF_MACH +options CONFIG_AUDIT # Kernel auditing + +machine "x86_64" # +cpu "x86_64" # + +options NO_NESTED_PMAP # diff --git a/security/conf/Makefile b/security/conf/Makefile index f8934f813..f32722158 100644 --- a/security/conf/Makefile +++ b/security/conf/Makefile @@ -35,7 +35,6 @@ $(COMPOBJROOT)/$(SECURITY_KERNEL_CONFIG)/Makefile : $(SOURCE)/MASTER \ $(SOURCE)/files.$(ARCH_CONFIG_LC) \ $(COMPOBJROOT)/doconf $(_v)(doconf_target=$(addsuffix /conf, $(TARGET)); \ - echo $${doconf_target};\ $(MKDIR) $${doconf_target}; \ cd $${doconf_target}; \ rm -f $(notdir $?); \ diff --git a/security/conf/Makefile.i386 b/security/conf/Makefile.i386 index b89fdd145..7da8f08d6 100644 --- a/security/conf/Makefile.i386 +++ b/security/conf/Makefile.i386 @@ -2,6 +2,16 @@ #BEGIN Machine dependent Makefile fragment for i386 ###################################################################### +# Enable -Werror for i386 builds +CFLAGS+=$(WERROR) +CWARNFLAGS= $(filter-out -Wbad-function-cast, $(CWARNFLAGS_STD)) + +# Objects that don't compile cleanly: +#OBJS_NO_WERROR = \ + +OBJS_WERROR=$(filter-out $(OBJS_NO_WERROR),$(OBJS)) + +$(OBJS_WERROR): WERROR=-Werror ###################################################################### #END Machine dependent Makefile fragment for i386 diff --git a/security/conf/Makefile.ppc b/security/conf/Makefile.ppc index 2b438f2fa..d71f1d77b 100644 --- a/security/conf/Makefile.ppc +++ b/security/conf/Makefile.ppc @@ -2,6 +2,16 @@ #BEGIN Machine dependent Makefile fragment for ppc ###################################################################### +# Enable -Werror for ppc builds +CFLAGS+=$(WERROR) +CWARNFLAGS= $(filter-out -Wbad-function-cast, $(CWARNFLAGS_STD)) + +# Objects that don't compile cleanly: +#OBJS_NO_WERROR = \ + +OBJS_WERROR=$(filter-out $(OBJS_NO_WERROR),$(OBJS)) + +$(OBJS_WERROR): WERROR=-Werror ###################################################################### #END Machine dependent Makefile fragment for ppc diff --git a/security/conf/Makefile.template b/security/conf/Makefile.template index f697e624e..ebe12b2d7 100644 --- a/security/conf/Makefile.template +++ b/security/conf/Makefile.template @@ -27,7 +27,7 @@ include $(MakeInc_def) # XXX: CFLAGS # CFLAGS+= -I. -imacros meta_features.h -DKERNEL -DBSD_KERNEL_PRIVATE \ - -Wall -Wno-four-char-constants -fno-common + -Wall -fno-common # # Directories for mig generated files @@ -86,8 +86,10 @@ $(COMPONENT).o: $(LDOBJS) $(_v)$(COMPOBJROOT)/newvers \ `$(CAT) ${VERSION_FILES}` ${COPYRIGHT_FILES} $(_v)${KCC} $(CFLAGS) $(INCLUDES) -c vers.c - @echo LD $(COMPONENT) - $(_v)$(LD) $(LDFLAGS_COMPONENT) -o $(COMPONENT).o ${LDOBJS} vers.o + @echo LDFILELIST $(COMPONENT) + $(_v)( for obj in ${LDOBJS} vers.o; do \ + echo $(TARGET)$(COMP_OBJ_DIR)/$(KERNEL_CONFIG)/$${obj}; \ + done; ) > $(COMPONENT).o do_all: $(COMPONENT).o diff --git a/security/conf/Makefile.x86_64 b/security/conf/Makefile.x86_64 new file mode 100644 index 000000000..64c2b46d5 --- /dev/null +++ b/security/conf/Makefile.x86_64 @@ -0,0 +1,18 @@ +###################################################################### +#BEGIN Machine dependent Makefile fragment for x86_64 +###################################################################### + +# Enable -Werror for x86_64 builds +CFLAGS+=$(WERROR) +CWARNFLAGS= $(filter-out -Wbad-function-cast, $(CWARNFLAGS_STD)) + +# Objects that don't compile cleanly: +#OBJS_NO_WERROR = \ + +OBJS_WERROR=$(filter-out $(OBJS_NO_WERROR),$(OBJS)) + +$(OBJS_WERROR): WERROR=-Werror + +###################################################################### +#END Machine dependent Makefile fragment for x86_64 +###################################################################### diff --git a/security/conf/files b/security/conf/files index bea378a45..295d886c2 100644 --- a/security/conf/files +++ b/security/conf/files @@ -6,6 +6,7 @@ OPTIONS/config_macf optional config_macf OPTIONS/config_macf_socket_subset optional config_macf_socket_subset OPTIONS/config_macf_socket optional config_macf_socket OPTIONS/config_macf_net optional config_macf_net +OPTIONS/config_fse optional config_fse # security diff --git a/security/conf/files.x86_64 b/security/conf/files.x86_64 new file mode 100644 index 000000000..e69de29bb diff --git a/security/mac_audit.c b/security/mac_audit.c index 286b6ad5a..504c55ae8 100644 --- a/security/mac_audit.c +++ b/security/mac_audit.c @@ -68,13 +68,17 @@ #include #include #include -#include +#include #include #include #include #include -#if AUDIT + +int mac_audit(__unused int len, __unused u_char *data); + + +#if CONFIG_AUDIT /* The zone allocator is initialized in mac_base.c. */ zone_t mac_audit_data_zone; @@ -162,7 +166,7 @@ mac_proc_check_getaudit(struct proc *curp) } int -mac_proc_check_setaudit(struct proc *curp, struct auditinfo *ai) +mac_proc_check_setaudit(struct proc *curp, struct auditinfo_addr *ai) { kauth_cred_t cred; int error; @@ -318,28 +322,28 @@ mac_audit_check_postselect(struct ucred *cred, unsigned short syscode, return (ret); } -#else /* AUDIT */ +#else /* !CONFIG_AUDIT */ /* * Function stubs for when AUDIT isn't defined. */ int -mac_system_check_audit(struct ucred *cred, void *record, int length) +mac_system_check_audit(__unused struct ucred *cred, __unused void *record, __unused int length) { return (0); } int -mac_system_check_auditon(struct ucred *cred, int cmd) +mac_system_check_auditon(__unused struct ucred *cred, __unused int cmd) { return (0); } int -mac_system_check_auditctl(struct ucred *cred, struct vnode *vp) +mac_system_check_auditctl(__unused struct ucred *cred, __unused struct vnode *vp) { return (0); @@ -367,7 +371,8 @@ mac_proc_check_getaudit(__unused struct proc *curp) } int -mac_proc_check_setaudit(__unused struct proc *curp, struct auditinfo *ai) +mac_proc_check_setaudit(__unused struct proc *curp, + __unused struct auditinfo_addr *ai) { return (0); @@ -390,7 +395,7 @@ mac_audit_check_postselect(__unused struct ucred *cred, __unused unsigned short } int -mac_audit(int len, u_char *data) +mac_audit(__unused int len, __unused u_char *data) { return (0); @@ -401,4 +406,4 @@ mac_audit_text(__unused char *text, __unused mac_policy_handle_t handle) { return (0); } -#endif /* !AUDIT */ +#endif /* !CONFIG_AUDIT */ diff --git a/security/mac_base.c b/security/mac_base.c index b65948131..8b2eabff2 100644 --- a/security/mac_base.c +++ b/security/mac_base.c @@ -79,7 +79,7 @@ #include #include #include -#include +#include #include #include #include @@ -95,7 +95,6 @@ #include #include #include -#include #if CONFIG_MACF #include @@ -105,6 +104,7 @@ #include #endif + /* * define MB_DEBUG to display run-time debugging information * #define MB_DEBUG 1 @@ -161,10 +161,32 @@ int mac_late = 0; * already has to deal with uninitialized labels, this probably won't * be a problem. Note: currently no locking. Will this be a problem? */ +#if CONFIG_MACF_NET +unsigned int mac_label_mbufs = 1; +SYSCTL_UINT(_security_mac, OID_AUTO, label_mbufs, CTLFLAG_RW, + &mac_label_mbufs, 0, "Label all MBUFs"); +#endif + #if !defined(CONFIG_MACF_ALWAYS_LABEL_MBUF) && 0 static int mac_labelmbufs = 0; #endif +/* + * Flag to indicate whether or not we should allocate label storage for + * new vnodes. Since most dynamic policies we currently work with don't + * rely on vnode labeling, try to avoid paying the cost of mtag allocation + * unless specifically notified of interest. One result of this is + * that if a dynamically loaded policy requests vnode labels, it must + * be able to deal with a NULL label being returned on any vnodes that + * were already in flight when the policy was loaded. Since the policy + * already has to deal with uninitialized labels, this probably won't + * be a problem. + */ +unsigned int mac_label_vnodes = 0; +SYSCTL_UINT(_security_mac, OID_AUTO, labelvnodes, CTLFLAG_RW, + &mac_label_vnodes, 0, "Label all vnodes"); + + unsigned int mac_mmap_revocation = 0; SYSCTL_UINT(_security_mac, OID_AUTO, mmap_revocation, CTLFLAG_RW, &mac_mmap_revocation, 0, "Revoke mmap access to files on subject " @@ -242,13 +264,7 @@ SYSCTL_UINT(_security_mac, OID_AUTO, task_enforce, CTLFLAG_RW, &mac_task_enforce, 0, "Enforce MAC policy on Mach task operations"); #endif -#if CONFIG_MACF_NET -unsigned int mac_label_mbufs = 1; -SYSCTL_UINT(_security_mac, OID_AUTO, label_mbufs, CTLFLAG_RW, - &mac_label_mbufs, 0, "Label all MBUFs"); -#endif - -#if AUDIT +#if CONFIG_AUDIT /* * mac_audit_data_zone is the zone used for data pushed into the audit * record by policies. Using a zone simplifies memory management of this @@ -512,6 +528,13 @@ mac_policy_init(void) mac_labelzone_init(); } +/* Function pointer set up for loading security extensions. + * It is set to an actual function after OSlibkernInit() + * has been called, and is set back to 0 by OSKextRemoveKextBootstrap() + * after bsd_init(). + */ +void (*load_security_extensions_function)(void) = 0; + /* * Init after early Mach startup, but before BSD */ @@ -526,7 +549,9 @@ mac_policy_initmach(void) * kernel startup. */ - load_security_extensions(); + if (load_security_extensions_function) { + load_security_extensions_function(); + } mac_late = 1; #if CONFIG_MACF_MACH mac_label_journal_replay(); @@ -542,7 +567,7 @@ mac_policy_initbsd(void) struct mac_policy_conf *mpc; u_int i; -#if AUDIT +#if CONFIG_AUDIT mac_audit_data_zone = zinit(MAC_AUDIT_DATA_LIMIT, AQ_HIWATER * MAC_AUDIT_DATA_LIMIT, 8192, "mac_audit_data_zone"); @@ -1181,17 +1206,43 @@ mac_label_externalize(size_t mpo_externalize_off, struct label *label, /* * Get the external forms of labels from all policies, for all label * namespaces contained in a list. + * + * XXX This may be leaking an sbuf. */ int mac_externalize(size_t mpo_externalize_off, struct label *label, const char *elementlist, char *outbuf, size_t outbuflen) { char *element; + char *scratch_base; + char *scratch; struct sbuf sb; int error = 0, len; - sbuf_new(&sb, outbuf, outbuflen, SBUF_FIXEDLEN); - while ((element = strsep(&elementlist, ",")) != NULL) { + /* allocate a scratch buffer the size of the string */ + MALLOC(scratch_base, char *, strlen(elementlist)+1, M_MACTEMP, M_WAITOK); + if (scratch_base == NULL) { + error = ENOMEM; + goto out; + } + + /* copy the elementlist to the scratch buffer */ + strlcpy(scratch_base, elementlist, strlen(elementlist)+1); + + /* + * set up a temporary pointer that can be used to iterate the + * scratch buffer without losing the allocation address + */ + scratch = scratch_base; + + /* get an sbuf */ + if (sbuf_new(&sb, outbuf, outbuflen, SBUF_FIXEDLEN) == NULL) { + /* could not allocate interior buffer */ + error = ENOMEM; + goto out; + } + /* iterate the scratch buffer; NOTE: buffer contents modified! */ + while ((element = strsep(&scratch, ",")) != NULL) { error = mac_label_externalize(mpo_externalize_off, label, element, &sb); if (error) @@ -1200,6 +1251,11 @@ mac_externalize(size_t mpo_externalize_off, struct label *label, if ((len = sbuf_len(&sb)) > 0) sbuf_setpos(&sb, len - 1); /* trim trailing comma */ sbuf_finish(&sb); + +out: + if (scratch_base != NULL) + FREE(scratch_base, M_MACTEMP); + return (error); } @@ -1279,7 +1335,7 @@ mac_internalize(size_t mpo_internalize_off, struct label *label, /* system calls */ int -__mac_get_pid(struct proc *p, struct __mac_get_pid_args *uap, register_t *ret __unused) +__mac_get_pid(struct proc *p, struct __mac_get_pid_args *uap, int *ret __unused) { char *elements, *buffer; struct user_mac mac; @@ -1332,7 +1388,7 @@ __mac_get_pid(struct proc *p, struct __mac_get_pid_args *uap, register_t *ret __ } int -__mac_get_proc(proc_t p, struct __mac_get_proc_args *uap, register_t *ret __unused) +__mac_get_proc(proc_t p, struct __mac_get_proc_args *uap, int *ret __unused) { char *elements, *buffer; struct user_mac mac; @@ -1377,14 +1433,10 @@ __mac_get_proc(proc_t p, struct __mac_get_proc_args *uap, register_t *ret __unus return (error); } -/* - * MPSAFE - */ - int -__mac_set_proc(proc_t p, struct __mac_set_proc_args *uap, register_t *ret __unused) +__mac_set_proc(proc_t p, struct __mac_set_proc_args *uap, int *ret __unused) { - kauth_cred_t newcred, oldcred; + kauth_cred_t newcred; struct label *intlabel; struct user_mac mac; char *buffer; @@ -1447,8 +1499,24 @@ __mac_set_proc(proc_t p, struct __mac_set_proc_args *uap, register_t *ret __unus } #if CONFIG_LCTX +/* + * __mac_get_lcid: + * Get login context ID. A login context associates a BSD process + * with an instance of a user. For more information see getlcid(2) man page. + * + * Parameters: p Process requesting the get + * uap User argument descriptor (see below) + * ret (ignored) + * + * Indirect: uap->lcid login context ID to search + * uap->mac_p.m_buflen MAC info buffer size + * uap->mac_p.m_string MAC info user address + * + * Returns: 0 Success + * !0 Not success + */ int -__mac_get_lcid(proc_t p, struct __mac_get_lcid_args *uap, register_t *ret __unused) +__mac_get_lcid(proc_t p, struct __mac_get_lcid_args *uap, int *ret __unused) { char *elements, *buffer; struct user_mac mac; @@ -1456,7 +1524,7 @@ __mac_get_lcid(proc_t p, struct __mac_get_lcid_args *uap, register_t *ret __unus int error; size_t ulen; - AUDIT_ARG(value, uap->lcid); + AUDIT_ARG(value32, uap->lcid); if (IS_64BIT_PROCESS(p)) { error = copyin(uap->mac_p, &mac, sizeof(mac)); } else { @@ -1497,8 +1565,24 @@ __mac_get_lcid(proc_t p, struct __mac_get_lcid_args *uap, register_t *ret __unus return (error); } +/* + * __mac_get_lctx: + * Get login context label. A login context associates a BSD process + * associated with an instance of a user. + * + * Parameters: p Process requesting the get + * uap User argument descriptor (see below) + * ret (ignored) + * + * Indirect: uap->lcid login context ID to search + * uap->mac_p MAC info + * + * Returns: 0 Success + * !0 Not success + * + */ int -__mac_get_lctx(proc_t p, struct __mac_get_lctx_args *uap, register_t *ret __unused) +__mac_get_lctx(proc_t p, struct __mac_get_lctx_args *uap, int *ret __unused) { char *elements, *buffer; struct user_mac mac; @@ -1550,7 +1634,7 @@ __mac_get_lctx(proc_t p, struct __mac_get_lctx_args *uap, register_t *ret __unus } int -__mac_set_lctx(proc_t p, struct __mac_set_lctx_args *uap, register_t *ret __unused) +__mac_set_lctx(proc_t p, struct __mac_set_lctx_args *uap, int *ret __unused) { struct user_mac mac; struct label *intlabel; @@ -1609,21 +1693,21 @@ __mac_set_lctx(proc_t p, struct __mac_set_lctx_args *uap, register_t *ret __unus #else /* LCTX */ int -__mac_get_lcid(proc_t p __unused, struct __mac_get_lcid_args *uap __unused, register_t *ret __unused) +__mac_get_lcid(proc_t p __unused, struct __mac_get_lcid_args *uap __unused, int *ret __unused) { return (ENOSYS); } int -__mac_get_lctx(proc_t p __unused, struct __mac_get_lctx_args *uap __unused, register_t *ret __unused) +__mac_get_lctx(proc_t p __unused, struct __mac_get_lctx_args *uap __unused, int *ret __unused) { return (ENOSYS); } int -__mac_set_lctx(proc_t p __unused, struct __mac_set_lctx_args *uap __unused, register_t *ret __unused) +__mac_set_lctx(proc_t p __unused, struct __mac_set_lctx_args *uap __unused, int *ret __unused) { return (ENOSYS); @@ -1631,7 +1715,7 @@ __mac_set_lctx(proc_t p __unused, struct __mac_set_lctx_args *uap __unused, regi #endif /* !LCTX */ int -__mac_get_fd(proc_t p, struct __mac_get_fd_args *uap, register_t *ret __unused) +__mac_get_fd(proc_t p, struct __mac_get_fd_args *uap, int *ret __unused) { struct fileproc *fp; struct vnode *vp; @@ -1691,10 +1775,12 @@ __mac_get_fd(proc_t p, struct __mac_get_fd_args *uap, register_t *ret __unused) switch (fp->f_fglob->fg_type) { case DTYPE_VNODE: - intlabel = mac_vnode_label_alloc(); + if (intlabel == NULL) { + error = ENOMEM; + break; + } vp = (struct vnode *)fp->f_fglob->fg_data; - error = vnode_getwithref(vp); if (error == 0) { mac_vnode_label_copy(vp->v_label, intlabel); @@ -1735,9 +1821,6 @@ __mac_get_fd(proc_t p, struct __mac_get_fd_args *uap, register_t *ret __unused) return (error); } -/* - * MPSAFE - */ static int mac_get_filelink(proc_t p, user_addr_t mac_p, user_addr_t path_p, int follow) { @@ -1767,8 +1850,11 @@ mac_get_filelink(proc_t p, user_addr_t mac_p, user_addr_t path_p, int follow) return (error); MALLOC(elements, char *, mac.m_buflen, M_MACTEMP, M_WAITOK); + MALLOC(buffer, char *, mac.m_buflen, M_MACTEMP, M_WAITOK | M_ZERO); + error = copyinstr(mac.m_string, elements, mac.m_buflen, &ulen); if (error) { + FREE(buffer, M_MACTEMP); FREE(elements, M_MACTEMP); return (error); } @@ -1781,6 +1867,7 @@ mac_get_filelink(proc_t p, user_addr_t mac_p, user_addr_t path_p, int follow) UIO_USERSPACE, path_p, ctx); error = namei(&nd); if (error) { + FREE(buffer, M_MACTEMP); FREE(elements, M_MACTEMP); return (error); } @@ -1790,25 +1877,23 @@ mac_get_filelink(proc_t p, user_addr_t mac_p, user_addr_t path_p, int follow) intlabel = mac_vnode_label_alloc(); mac_vnode_label_copy(vp->v_label, intlabel); - - MALLOC(buffer, char *, mac.m_buflen, M_MACTEMP, M_WAITOK | M_ZERO); error = mac_vnode_label_externalize(intlabel, elements, buffer, - mac.m_buflen, M_WAITOK); - FREE(elements, M_MACTEMP); - + mac.m_buflen, M_WAITOK); + mac_vnode_label_free(intlabel); if (error == 0) error = copyout(buffer, mac.m_string, strlen(buffer) + 1); - FREE(buffer, M_MACTEMP); vnode_put(vp); - mac_vnode_label_free(intlabel); + + FREE(buffer, M_MACTEMP); + FREE(elements, M_MACTEMP); return (error); } int __mac_get_file(proc_t p, struct __mac_get_file_args *uap, - register_t *ret __unused) + int *ret __unused) { return (mac_get_filelink(p, uap->mac_p, uap->path_p, 1)); @@ -1816,14 +1901,14 @@ __mac_get_file(proc_t p, struct __mac_get_file_args *uap, int __mac_get_link(proc_t p, struct __mac_get_link_args *uap, - register_t *ret __unused) + int *ret __unused) { return (mac_get_filelink(p, uap->mac_p, uap->path_p, 0)); } int -__mac_set_fd(proc_t p, struct __mac_set_fd_args *uap, register_t *ret __unused) +__mac_set_fd(proc_t p, struct __mac_set_fd_args *uap, int *ret __unused) { struct fileproc *fp; @@ -1880,6 +1965,11 @@ __mac_set_fd(proc_t p, struct __mac_set_fd_args *uap, register_t *ret __unused) switch (fp->f_fglob->fg_type) { case DTYPE_VNODE: + if (mac_label_vnodes == 0) { + error = ENOSYS; + break; + } + intlabel = mac_vnode_label_alloc(); error = mac_vnode_label_internalize(intlabel, buffer); @@ -1927,9 +2017,6 @@ __mac_set_fd(proc_t p, struct __mac_set_fd_args *uap, register_t *ret __unused) return (error); } -/* - * MPSAFE - */ static int mac_set_filelink(proc_t p, user_addr_t mac_p, user_addr_t path_p, int follow) @@ -1943,6 +2030,9 @@ mac_set_filelink(proc_t p, user_addr_t mac_p, user_addr_t path_p, int error; size_t ulen; + if (mac_label_vnodes == 0) + return ENOSYS; + if (IS_64BIT_PROCESS(p)) { error = copyin(mac_p, &mac, sizeof(mac)); } else { @@ -1997,7 +2087,7 @@ mac_set_filelink(proc_t p, user_addr_t mac_p, user_addr_t path_p, int __mac_set_file(proc_t p, struct __mac_set_file_args *uap, - register_t *ret __unused) + int *ret __unused) { return (mac_set_filelink(p, uap->mac_p, uap->path_p, 1)); @@ -2005,18 +2095,29 @@ __mac_set_file(proc_t p, struct __mac_set_file_args *uap, int __mac_set_link(proc_t p, struct __mac_set_link_args *uap, - register_t *ret __unused) + int *ret __unused) { return (mac_set_filelink(p, uap->mac_p, uap->path_p, 0)); } /* - * MPSAFE + * __mac_syscall: Perform a MAC policy system call + * + * Parameters: p Process calling this routine + * uap User argument descriptor (see below) + * retv (Unused) + * + * Indirect: uap->policy Name of target MAC policy + * uap->call MAC policy-specific system call to perform + * uap->arg MAC policy-specific system call arguments + * + * Returns: 0 Success + * !0 Not success + * */ - int -__mac_syscall(proc_t p, struct __mac_syscall_args *uap, register_t *retv __unused) +__mac_syscall(proc_t p, struct __mac_syscall_args *uap, int *retv __unused) { struct mac_policy_conf *mpc; char target[MAC_MAX_POLICY_NAME]; @@ -2027,7 +2128,7 @@ __mac_syscall(proc_t p, struct __mac_syscall_args *uap, register_t *retv __unuse error = copyinstr(uap->policy, target, sizeof(target), &ulen); if (error) return (error); - AUDIT_ARG(value, uap->call); + AUDIT_ARG(value32, uap->call); AUDIT_ARG(mac_string, target); error = ENOPOLICY; @@ -2109,9 +2210,22 @@ mac_mount_label_get(struct mount *mp, user_addr_t mac_p) return (error); } +/* + * __mac_get_mount: Get mount point label information for a given pathname + * + * Parameters: p (ignored) + * uap User argument descriptor (see below) + * ret (ignored) + * + * Indirect: uap->path Pathname + * uap->mac_p MAC info + * + * Returns: 0 Success + * !0 Not success + */ int __mac_get_mount(proc_t p __unused, struct __mac_get_mount_args *uap, - register_t *ret __unused) + int *ret __unused) { struct nameidata nd; struct vfs_context *ctx = vfs_context_current(); @@ -2183,91 +2297,91 @@ mac_vnop_removexattr(struct vnode *vp __unused, const char *name __unused) } int -__mac_get_pid(proc_t p __unused, struct __mac_get_pid_args *uap __unused, register_t *ret __unused) +__mac_get_pid(proc_t p __unused, struct __mac_get_pid_args *uap __unused, int *ret __unused) { return (ENOSYS); } int -__mac_get_proc(proc_t p __unused, struct __mac_get_proc_args *uap __unused, register_t *ret __unused) +__mac_get_proc(proc_t p __unused, struct __mac_get_proc_args *uap __unused, int *ret __unused) { return (ENOSYS); } int -__mac_set_proc(proc_t p __unused, struct __mac_set_proc_args *uap __unused, register_t *ret __unused) +__mac_set_proc(proc_t p __unused, struct __mac_set_proc_args *uap __unused, int *ret __unused) { return (ENOSYS); } int -__mac_get_file(proc_t p __unused, struct __mac_get_file_args *uap __unused, register_t *ret __unused) +__mac_get_file(proc_t p __unused, struct __mac_get_file_args *uap __unused, int *ret __unused) { return (ENOSYS); } int -__mac_get_link(proc_t p __unused, struct __mac_get_link_args *uap __unused, register_t *ret __unused) +__mac_get_link(proc_t p __unused, struct __mac_get_link_args *uap __unused, int *ret __unused) { return (ENOSYS); } int -__mac_set_file(proc_t p __unused, struct __mac_set_file_args *uap __unused, register_t *ret __unused) +__mac_set_file(proc_t p __unused, struct __mac_set_file_args *uap __unused, int *ret __unused) { return (ENOSYS); } int -__mac_set_link(proc_t p __unused, struct __mac_set_link_args *uap __unused, register_t *ret __unused) +__mac_set_link(proc_t p __unused, struct __mac_set_link_args *uap __unused, int *ret __unused) { return (ENOSYS); } int -__mac_get_fd(proc_t p __unused, struct __mac_get_fd_args *uap __unused, register_t *ret __unused) +__mac_get_fd(proc_t p __unused, struct __mac_get_fd_args *uap __unused, int *ret __unused) { return (ENOSYS); } int -__mac_set_fd(proc_t p __unused, struct __mac_set_fd_args *uap __unused, register_t *ret __unused) +__mac_set_fd(proc_t p __unused, struct __mac_set_fd_args *uap __unused, int *ret __unused) { return (ENOSYS); } int -__mac_syscall(proc_t p __unused, struct __mac_syscall_args *uap __unused, register_t *ret __unused) +__mac_syscall(proc_t p __unused, struct __mac_syscall_args *uap __unused, int *ret __unused) { return (ENOSYS); } int -__mac_get_lcid(proc_t p __unused, struct __mac_get_lcid_args *uap __unused, register_t *ret __unused) +__mac_get_lcid(proc_t p __unused, struct __mac_get_lcid_args *uap __unused, int *ret __unused) { return (ENOSYS); } int -__mac_get_lctx(proc_t p __unused, struct __mac_get_lctx_args *uap __unused, register_t *ret __unused) +__mac_get_lctx(proc_t p __unused, struct __mac_get_lctx_args *uap __unused, int *ret __unused) { return (ENOSYS); } int -__mac_set_lctx(proc_t p __unused, struct __mac_set_lctx_args *uap __unused, register_t *ret __unused) +__mac_set_lctx(proc_t p __unused, struct __mac_set_lctx_args *uap __unused, int *ret __unused) { return (ENOSYS); @@ -2275,7 +2389,7 @@ __mac_set_lctx(proc_t p __unused, struct __mac_set_lctx_args *uap __unused, regi int __mac_get_mount(proc_t p __unused, - struct __mac_get_mount_args *uap __unused, register_t *ret __unused) + struct __mac_get_mount_args *uap __unused, int *ret __unused) { return (ENOSYS); diff --git a/security/mac_data.h b/security/mac_data.h index f1b78dd54..6a5d14025 100644 --- a/security/mac_data.h +++ b/security/mac_data.h @@ -133,11 +133,11 @@ mmd_fixup_ele(struct mac_module_data *oldbase, struct mac_module_data *newbase, struct mac_module_data_element *ele) { if (ele->key != NULL) { /* Array elements have no keys. */ - ele->key -= (unsigned int)oldbase; - ele->key += (unsigned int)newbase; + ele->key -= (uintptr_t)oldbase; + ele->key += (uintptr_t)newbase; } - ele->value -= (unsigned int)oldbase; - ele->value += (unsigned int)newbase; + ele->value -= (uintptr_t)oldbase; + ele->value += (uintptr_t)newbase; } #endif diff --git a/security/mac_framework.h b/security/mac_framework.h index c3ea61435..01f5e8557 100644 --- a/security/mac_framework.h +++ b/security/mac_framework.h @@ -124,7 +124,6 @@ struct vop_setlabel_args; /*@ macros */ #define VNODE_LABEL_CREATE 1 -#define VNODE_LABEL_NEEDREF 2 #if CONFIG_MACF_MACH #define mac_task_label_update_cred(cred, task) \ @@ -318,11 +317,11 @@ int mac_proc_check_getaudit(proc_t proc); int mac_proc_check_getauid(proc_t proc); int mac_proc_check_getlcid(proc_t proc1, proc_t proc2, pid_t pid); -int mac_proc_check_map_prot_copy_allow(proc_t proc); int mac_proc_check_mprotect(proc_t proc, user_addr_t addr, user_size_t size, int prot); +int mac_proc_check_run_cs_invalid(proc_t proc); int mac_proc_check_sched(proc_t proc, proc_t proc2); -int mac_proc_check_setaudit(proc_t proc, struct auditinfo *ai); +int mac_proc_check_setaudit(proc_t proc, struct auditinfo_addr *ai); int mac_proc_check_setauid(proc_t proc, uid_t auid); int mac_proc_check_setlcid(proc_t proc1, proc_t proc2, pid_t pid1, pid_t pid2); @@ -486,6 +485,9 @@ int mac_vnode_check_stat(vfs_context_t ctx, kauth_cred_t file_cred, struct vnode *vp); int mac_vnode_check_truncate(vfs_context_t ctx, kauth_cred_t file_cred, struct vnode *vp); +int mac_vnode_check_uipc_bind(vfs_context_t ctx, struct vnode *dvp, + struct componentname *cnp, struct vnode_attr *vap); +int mac_vnode_check_uipc_connect(vfs_context_t ctx, struct vnode *vp); int mac_vnode_check_unlink(vfs_context_t ctx, struct vnode *dvp, struct vnode *vp, struct componentname *cnp); int mac_vnode_check_write(vfs_context_t ctx, @@ -505,6 +507,7 @@ void mac_vnode_label_destroy(struct vnode *vp); int mac_vnode_label_externalize_audit(struct vnode *vp, struct mac *mac); void mac_vnode_label_free(struct label *label); void mac_vnode_label_init(struct vnode *vp); +int mac_vnode_label_init_needed(struct vnode *vp); void mac_vnode_label_recycle(struct vnode *vp); void mac_vnode_label_update(vfs_context_t ctx, struct vnode *vp, struct label *newlabel); diff --git a/security/mac_internal.h b/security/mac_internal.h index e41c429af..283fef64a 100644 --- a/security/mac_internal.h +++ b/security/mac_internal.h @@ -113,11 +113,6 @@ struct mac_policy_list { typedef struct mac_policy_list mac_policy_list_t; -/* - * Darwin functions not properly exported - */ -extern void kmod_load_early(void); /* defined in libsa/kext.cpp */ - /* * Policy that has registered with the framework for a specific * label namespace name. @@ -212,6 +207,8 @@ extern unsigned int mac_task_enforce; extern unsigned int mac_label_mbufs; #endif +extern unsigned int mac_label_vnodes; + static int mac_proc_check_enforce(proc_t p, int enforce_flag); static __inline__ int mac_proc_check_enforce(proc_t p, int enforce_flags) diff --git a/security/mac_iokit.c b/security/mac_iokit.c index fd78646ec..6212d59d3 100644 --- a/security/mac_iokit.c +++ b/security/mac_iokit.c @@ -57,7 +57,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/security/mac_label.c b/security/mac_label.c index ef2c09e74..0f4e21524 100644 --- a/security/mac_label.c +++ b/security/mac_label.c @@ -79,3 +79,23 @@ mac_labelzone_free(struct label *l) bzero(l, sizeof(struct label)); zfree(zone_label, l); } + +/* + * Functions used by policy modules to get and set label values. + */ +intptr_t +mac_label_get(struct label *l, int slot) +{ + KASSERT(l != NULL, ("mac_label_get: NULL label")); + + return ((intptr_t) (l->l_perpolicy[slot].l_ptr)); +} + +void +mac_label_set(struct label *l, int slot, intptr_t v) +{ + KASSERT(l != NULL, ("mac_label_set: NULL label")); + + l->l_perpolicy[slot].l_ptr = (void *) v; +} + diff --git a/security/mac_net.c b/security/mac_net.c index cb847acb0..cd452be5e 100644 --- a/security/mac_net.c +++ b/security/mac_net.c @@ -73,7 +73,7 @@ #include #include -#include +#include #include diff --git a/security/mac_policy.h b/security/mac_policy.h index 544565552..7305a8356 100644 --- a/security/mac_policy.h +++ b/security/mac_policy.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2007 Apple Inc. All rights reserved. + * Copyright (c) 2007-2008 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -3059,7 +3059,7 @@ typedef int mpo_proc_check_sched_t( */ typedef int mpo_proc_check_setaudit_t( kauth_cred_t cred, - struct auditinfo *ai + struct auditinfo_addr *ai ); /** @brief Access control check for setting audit user ID @@ -4412,21 +4412,17 @@ typedef int mpo_proc_check_get_task_t( kauth_cred_t cred, struct proc *p ); - - /** - @brief Access control check for manipulating a proc's vm_map - @param cred Subject credential + @brief Privilege check for a process to run invalid @param proc Object process - Determine whether the vm_map map belonging to process proc with - credential cred allows the VM_PROT_COPY operation. + Determine whether the process may execute even though the system determined + that it is untrusted (eg unidentified / modified code). @return Return 0 if access is granted, otherwise an appropriate value for errno should be returned. */ -typedef int mpo_proc_check_map_prot_copy_allow_t( - kauth_cred_t cred, +typedef int mac_proc_check_run_cs_invalid_t( struct proc *p ); @@ -5279,6 +5275,47 @@ typedef int mpo_vnode_check_truncate_t( struct vnode *vp, struct label *label ); +/** + @brief Access control check for binding UNIX domain socket + @param cred Subject credential + @param dvp Directory vnode + @param dlabel Policy label for dvp + @param cnp Component name for dvp + @param vap vnode attributes for vap + + Determine whether the subject identified by the credential can perform a + bind operation on a UNIX domain socket with the passed parent directory, + passed name information, and passed attribute information. + + @return Return 0 if access is granted, otherwise an appropriate value for + errno should be returned. Suggested failure: EACCES for label mismatch or + EPERM for lack of privilege. +*/ +typedef int mpo_vnode_check_uipc_bind_t( + kauth_cred_t cred, + struct vnode *dvp, + struct label *dlabel, + struct componentname *cnp, + struct vnode_attr *vap +); +/** + @brief Access control check for connecting UNIX domain socket + @param cred Subject credential + @param vp Object vnode + @param label Policy label associated with vp + + Determine whether the subject identified by the credential can perform a + connect operation on the passed UNIX domain socket vnode. + + @return Return 0 if access is granted, otherwise an appropriate value for + errno should be returned. Suggested failure: EACCES for label mismatch or + EPERM for lack of privilege. +*/ +typedef int mpo_vnode_check_uipc_connect_t( + kauth_cred_t cred, + struct vnode *vp, + struct label *label +); /** @brief Access control check for deleting vnode @param cred Subject credential @@ -6035,9 +6072,9 @@ struct mac_policy_ops { mpo_vnode_label_update_t *mpo_vnode_label_update; mpo_vnode_notify_create_t *mpo_vnode_notify_create; mpo_vnode_check_signature_t *mpo_vnode_check_signature; - mpo_proc_check_map_prot_copy_allow_t *mpo_proc_check_map_prot_copy_allow; - mpo_reserved_hook_t *mpo_reserved2; - mpo_reserved_hook_t *mpo_reserved3; + mpo_vnode_check_uipc_bind_t *mpo_vnode_check_uipc_bind; + mpo_vnode_check_uipc_connect_t *mpo_vnode_check_uipc_connect; + mac_proc_check_run_cs_invalid_t *mpo_proc_check_run_cs_invalid; mpo_reserved_hook_t *mpo_reserved4; mpo_reserved_hook_t *mpo_reserved5; mpo_reserved_hook_t *mpo_reserved6; @@ -6248,6 +6285,14 @@ int mac_vnop_removexattr(struct vnode *, const char *); #define LABEL_TO_SLOT(l, s) (l)->l_perpolicy[s] +/* + * Policy interface to map a struct label pointer to per-policy data. + * Typically, policies wrap this in their own accessor macro that casts an + * intptr_t to a policy-specific data type. + */ +intptr_t mac_label_get(struct label *l, int slot); +void mac_label_set(struct label *l, int slot, intptr_t v); + #define mac_get_mpc(h) (mac_policy_list.entries[h].mpc) /** diff --git a/security/mac_process.c b/security/mac_process.c index 4ed4d53b7..c0f312049 100644 --- a/security/mac_process.c +++ b/security/mac_process.c @@ -75,7 +75,7 @@ #include -#include +#include struct label * mac_cred_label_alloc(void) @@ -367,16 +367,13 @@ mac_proc_check_mprotect(proc_t proc, } int -mac_proc_check_map_prot_copy_allow(proc_t proc) +mac_proc_check_run_cs_invalid(proc_t proc) { - kauth_cred_t cred; int error; if (!mac_vm_enforce) return (0); - cred = kauth_cred_proc_ref(proc); - MAC_CHECK(proc_check_map_prot_copy_allow, cred, proc); - kauth_cred_unref(&cred); + MAC_CHECK(proc_check_run_cs_invalid, proc); return (error); } diff --git a/security/mac_socket.c b/security/mac_socket.c index bd35170ee..45c7daef6 100644 --- a/security/mac_socket.c +++ b/security/mac_socket.c @@ -596,9 +596,9 @@ mac_socket_check_received(kauth_cred_t cred, struct socket *so, struct sockaddr if (!mac_socket_enforce) return 0; - + MAC_CHECK(socket_check_received, cred, - (socket_t)so, so->so_label, saddr); + so, so->so_label, saddr); return (error); } diff --git a/security/mac_stub.c b/security/mac_stub.c index 3a59c4374..0dc5276e6 100644 --- a/security/mac_stub.c +++ b/security/mac_stub.c @@ -26,7 +26,9 @@ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ */ +#include #include +#include #if !CONFIG_MACF /* @@ -592,10 +594,6 @@ int vop_stdsetlabel_ea(void) { return 0; } -int kau_will_audit(void) -{ - return 0; -} int mac_kalloc(void) { return 0; @@ -677,3 +675,10 @@ int mac_set_enforce_proc(void) return 0; } #endif /* CONFIG_MACF */ + +#if !CONFIG_AUDIT +int kau_will_audit(void) +{ + return 0; +} +#endif diff --git a/security/mac_vfs.c b/security/mac_vfs.c index 8910d6d72..0a136aa4c 100644 --- a/security/mac_vfs.c +++ b/security/mac_vfs.c @@ -79,7 +79,7 @@ #include #include -#include +#include #include @@ -139,10 +139,15 @@ mac_vnode_label_alloc(void) void mac_vnode_label_init(vnode_t vp) { - vp->v_label = mac_vnode_label_alloc(); } +int +mac_vnode_label_init_needed(vnode_t vp) +{ + return (mac_label_vnodes != 0 && vp->v_label == NULL); +} + /* * vnode labels are allocated at the same time as vnodes, but vnodes are never * freed. Instead, we want to remove any sensitive information before putting @@ -182,8 +187,6 @@ mac_mount_label_free(struct label *label) void mac_mount_label_destroy(struct mount *mp) { - - if (mp->mnt_mntlabel != NULL) { mac_mount_label_free(mp->mnt_mntlabel); mp->mnt_mntlabel = NULL; @@ -193,7 +196,6 @@ mac_mount_label_destroy(struct mount *mp) void mac_vnode_label_free(struct label *label) { - MAC_PERFORM(vnode_label_destroy, label); mac_labelzone_free(label); } @@ -202,17 +204,21 @@ mac_vnode_label_free(struct label *label) void mac_vnode_label_destroy(struct vnode *vp) { - - mac_vnode_label_free(vp->v_label); - vp->v_label = NULL; + if (vp->v_label != NULL) { + mac_vnode_label_free(vp->v_label); + vp->v_label = NULL; + } } #endif void mac_vnode_label_copy(struct label *src, struct label *dest) { - - MAC_PERFORM(vnode_label_copy, src, dest); + if (src == NULL) { + MAC_PERFORM(vnode_label_init, dest); + } else { + MAC_PERFORM(vnode_label_copy, src, dest); + } } int @@ -346,7 +352,7 @@ void mac_vnode_label_associate_singlelabel(struct mount *mp, struct vnode *vp) { - if (!mac_vnode_enforce) + if (!mac_vnode_enforce || !mac_label_vnodes) return; MAC_PERFORM(vnode_label_associate_singlelabel, mp, @@ -382,7 +388,7 @@ mac_vnode_label_update_extattr(struct mount *mp, struct vnode *vp, { int error = 0; - if (!mac_vnode_enforce) + if (!mac_vnode_enforce || !mac_label_vnodes) return; MAC_PERFORM(vnode_label_update_extattr, mp, mp->mnt_mntlabel, vp, @@ -403,7 +409,7 @@ mac_vnode_label_store(vfs_context_t ctx, struct vnode *vp, kauth_cred_t cred; int error; - if (!mac_vnode_enforce && + if (!mac_vnode_enforce || !mac_label_vnodes || !mac_context_check_enforce(ctx, MAC_VNODE_ENFORCE)) return 0; @@ -1074,14 +1080,59 @@ mac_vnode_check_write(vfs_context_t ctx, struct ucred *file_cred, return (error); } +int +mac_vnode_check_uipc_bind(vfs_context_t ctx, struct vnode *dvp, + struct componentname *cnp, struct vnode_attr *vap) +{ + kauth_cred_t cred; + int error; + + if (!mac_vnode_enforce || + !mac_context_check_enforce(ctx, MAC_VNODE_ENFORCE)) + return (0); + + cred = vfs_context_ucred(ctx); + MAC_CHECK(vnode_check_uipc_bind, cred, dvp, dvp->v_label, cnp, vap); + return (error); +} + +int +mac_vnode_check_uipc_connect(vfs_context_t ctx, struct vnode *vp) +{ + kauth_cred_t cred; + int error; + + if (!mac_vnode_enforce || + !mac_context_check_enforce(ctx, MAC_VNODE_ENFORCE)) + return (0); + + cred = vfs_context_ucred(ctx); + MAC_CHECK(vnode_check_uipc_connect, cred, vp, vp->v_label); + return (error); +} + void mac_vnode_label_update(vfs_context_t ctx, struct vnode *vp, struct label *newlabel) { kauth_cred_t cred = vfs_context_ucred(ctx); + struct label *tmpl = NULL; + + if (vp->v_label == NULL) + tmpl = mac_vnode_label_alloc(); vnode_lock(vp); + + /* recheck after lock */ + if (vp->v_label == NULL) { + vp->v_label = tmpl; + tmpl = NULL; + } + MAC_PERFORM(vnode_label_update, cred, vp, vp->v_label, newlabel); vnode_unlock(vp); + + if (tmpl != NULL) + mac_vnode_label_free(tmpl); } void @@ -1289,7 +1340,7 @@ vn_setlabel(struct vnode *vp, struct label *intlabel, vfs_context_t context) { int error; - if (!mac_vnode_enforce) + if (!mac_vnode_enforce || !mac_label_vnodes) return (0); if (vp->v_mount == NULL) { diff --git a/security/mac_vfs_subr.c b/security/mac_vfs_subr.c index e418342b0..f8b2263b9 100644 --- a/security/mac_vfs_subr.c +++ b/security/mac_vfs_subr.c @@ -34,34 +34,44 @@ #include #include #include +#include "../bsd/sys/fsevents.h" #include /* - * Caller holds reference or sets VNODE_LABEL_NEEDREF to non-zero. - * - * Function will drop lock and reference on return. + * Caller holds I/O reference on vnode */ int vnode_label(struct mount *mp, struct vnode *dvp, struct vnode *vp, struct componentname *cnp, int flags, vfs_context_t ctx) { - int error; + int error = 0; - error = 0; - vnode_lock(vp); + /* fast path checks... */ - if (vp->v_lflag & VL_LABELED) { - if (!(flags & VNODE_LABEL_NEEDREF)) - vnode_put_locked(vp); - vnode_unlock(vp); - return (0); + /* are we labeling vnodes? If not still notify of create */ + if (mac_label_vnodes == 0) { + if (flags & VNODE_LABEL_CREATE) + error = mac_vnode_notify_create(ctx, + mp, dvp, vp, cnp); + return 0; } - if ((flags & VNODE_LABEL_NEEDREF) && vnode_get_locked(vp)) { + /* if already VL_LABELED */ + if (vp->v_lflag & VL_LABELED) + return (0); + + vnode_lock_spin(vp); + + /* + * must revalidate state once we hold the lock + * since we could have blocked and someone else + * has since labeled this vnode + */ + if (vp->v_lflag & VL_LABELED) { vnode_unlock(vp); - return (ENOENT); + return (0); } if ((vp->v_lflag & VL_LABEL) == 0) { @@ -69,12 +79,17 @@ vnode_label(struct mount *mp, struct vnode *dvp, struct vnode *vp, /* Could sleep on disk I/O, drop lock. */ vnode_unlock(vp); + + if (vp->v_label == NULL) + vp->v_label = mac_vnode_label_alloc(); + if (flags & VNODE_LABEL_CREATE) error = mac_vnode_notify_create(ctx, mp, dvp, vp, cnp); else error = mac_vnode_label_associate(mp, vp, ctx); - vnode_lock(vp); + + vnode_lock_spin(vp); if ((error == 0) && (vp->v_flag & VNCACHEABLE)) vp->v_lflag |= VL_LABELED; @@ -82,10 +97,8 @@ vnode_label(struct mount *mp, struct vnode *dvp, struct vnode *vp, if (vp->v_lflag & VL_LABELWAIT) { vp->v_lflag &= ~VL_LABELWAIT; - wakeup(vp->v_label); + wakeup(&vp->v_label); } - vnode_put_locked(vp); - vnode_unlock(vp); } else { struct timespec ts; @@ -94,18 +107,19 @@ vnode_label(struct mount *mp, struct vnode *dvp, struct vnode *vp, while (vp->v_lflag & VL_LABEL) { vp->v_lflag |= VL_LABELWAIT; - error = msleep(vp->v_label, &vp->v_lock, PVFS|PDROP, - "vnode_label", &ts); - vnode_lock(vp); + + error = msleep(&vp->v_label, &vp->v_lock, PVFS|PDROP, + "vnode_label", &ts); + vnode_lock_spin(vp); + if (error == EWOULDBLOCK) { vprint("vnode label timeout", vp); break; } } /* XXX: what should be done if labeling failed (above)? */ - vnode_put_locked(vp); - vnode_unlock(vp); } + vnode_unlock(vp); return (error); } @@ -126,7 +140,7 @@ vnode_relabel(struct vnode *vp) /* Wait for any other labeling to complete. */ while (vp->v_lflag & VL_LABEL) { vp->v_lflag |= VL_LABELWAIT; - (void)msleep(vp->v_label, &vp->v_lock, PVFS, "vnode_relabel", 0); + (void)msleep(&vp->v_label, &vp->v_lock, PVFS, "vnode_relabel", 0); } /* Clear labeled flag */ @@ -157,6 +171,13 @@ mac_vnop_setxattr (struct vnode *vp, const char *name, char *buf, size_t len) uio_addiov(auio, CAST_USER_ADDR_T(buf), len); error = vn_setxattr(vp, name, auio, options, ctx); +#if CONFIG_FSE + if (error == 0) { + add_fsevent(FSE_XATTR_MODIFIED, ctx, + FSE_ARG_VNODE, vp, + FSE_ARG_DONE); + } +#endif return (error); } @@ -192,6 +213,13 @@ mac_vnop_removexattr (struct vnode *vp, const char *name) return (EROFS); error = vn_removexattr(vp, name, options, ctx); +#if CONFIG_FSE + if (error == 0) { + add_fsevent(FSE_XATTR_REMOVED, ctx, + FSE_ARG_VNODE, vp, + FSE_ARG_DONE); + } +#endif return (error); } diff --git a/tools/cred_dump_creds.c b/tools/cred_dump_creds.c index 560040679..e5fe91f7e 100644 --- a/tools/cred_dump_creds.c +++ b/tools/cred_dump_creds.c @@ -29,7 +29,7 @@ struct debug_ucred { gid_t cr_rgid; /* real group id */ gid_t cr_svgid; /* saved group id */ uid_t cr_gmuid; /* UID for group membership purposes */ - struct auditinfo cr_au; /* user auditing data */ + struct auditinfo_addr cr_audit; /* user auditing data */ uint32_t cr_label; /* MACF label */ int cr_flags; /* flags on credential */ }; @@ -103,12 +103,12 @@ void dump_cred( debug_ucred * credp ) printf("] %d ", credp->cr_rgid); printf("%d ", credp->cr_svgid); printf("%d ", credp->cr_gmuid); - printf("a[%d ", credp->cr_au.ai_auid); - printf("%d ", credp->cr_au.ai_mask.am_success); - printf("%d ", credp->cr_au.ai_mask.am_failure); - printf("%d ", credp->cr_au.ai_termid.port); - printf("%d ", credp->cr_au.ai_termid.machine); - printf("%d ", credp->cr_au.ai_asid); + printf("a[%d ", credp->cr_audit.ai_auid); + printf("%d ", credp->cr_audit.ai_mask.am_success); + printf("%d ", credp->cr_audit.ai_mask.am_failure); + printf("%d ", credp->cr_audit.ai_termid.at_port); + printf("%d ", credp->cr_audit.ai_termid.at_addr[0]); + printf("%d ", credp->cr_audit.ai_asid); printf("] "); printf("%p ", credp->cr_label); printf("0x%08x \n", credp->cr_flags); diff --git a/tools/tests/MMTest/MMtest.c b/tools/tests/MMTest/MMtest.c deleted file mode 100644 index b11b16fe8..000000000 --- a/tools/tests/MMTest/MMtest.c +++ /dev/null @@ -1,516 +0,0 @@ -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#include -#include - -#define MAX(A, B) ((A) < (B) ? (B) : (A)) - -static __inline__ unsigned long long ReadTSR() { - union { - unsigned long long time64; - unsigned long word[2]; - } now; -#if defined(__i386__) - /* Read from Pentium and Pentium Pro 64-bit timestamp counter. - * The counter is set to 0 at processor reset and increments on - * every clock cycle. */ - __asm__ volatile("rdtsc" : : : "eax", "edx"); - __asm__ volatile("movl %%eax,%0" : "=m" (now.word[0]) : : "eax"); - __asm__ volatile("movl %%edx,%0" : "=m" (now.word[1]) : : "edx"); -#elif defined(__ppc__) - /* Read from PowerPC 64-bit time base register. The increment - * rate of the time base is implementation-dependent, but is - * 1/4th the bus clock cycle on 603/604 processors. */ - unsigned long t3; - do { - __asm__ volatile("mftbu %0" : "=r" (now.word[0])); - __asm__ volatile("mftb %0" : "=r" (now.word[1])); - __asm__ volatile("mftbu %0" : "=r" (t3)); - } while (now.word[0] != t3); -#else -#warning Do not know how to read a time stamp register on this architecture - now.time64 = 0ULL; -#endif - return now.time64; -} - -typedef struct { - unsigned int msgt_name : 8, - msgt_size : 8, - msgt_number : 12, - msgt_inline : 1, - msgt_longform : 1, - msgt_deallocate : 1, - msgt_unused : 1; -} mach_msg_type_t; - -typedef struct { - mach_msg_type_t msgtl_header; - unsigned short msgtl_name; - unsigned short msgtl_size; - natural_t msgtl_number; -} mach_msg_type_long_t; -#define MACH_MSG_TYPE_INTEGER_32 0 - - -typedef struct { - mach_msg_header_t header; - mach_msg_trailer_t trailer; // subtract this when sending -} ipc_trivial_message; - -typedef struct { - mach_msg_header_t header; - mach_msg_type_t type; - u_int32_t numbers[0]; - mach_msg_trailer_t trailer; // subtract this when sending -} ipc_inline_message; - -typedef struct { - mach_msg_header_t header; - mach_msg_body_t body; - mach_msg_ool_descriptor_t descriptor; - mach_msg_trailer_t trailer; // subtract this when sending -} ipc_complex_message; - -enum { - msg_type_trivial = 0, - msg_type_inline = 1, - msg_type_complex = 2 -}; - -struct port_args { - int req_size; - mach_msg_header_t *req_msg; - int reply_size; - mach_msg_header_t *reply_msg; - mach_port_t port; -}; - -/* Global options */ -static int verbose; -int oneway; -int msg_type; -int num_ints; -int num_msgs; -int num_clients; -int client_delay; -char *server_port_name; - -void signal_handler(int sig) { -} - -void usage(const char *progname) { - fprintf(stderr, "usage: %s [options]\n", progname); - fprintf(stderr, "where options are:\n"); - fprintf(stderr, " -verbose\t\tbe verbose\n"); - fprintf(stderr, " -oneway\t\tdo not request return reply\n"); - fprintf(stderr, " -count num\t\tnumber of messages to send\n"); - fprintf(stderr, " -type trivial|inline|complex\ttype of messages to send\n"); - fprintf(stderr, " -numints num\tnumber of 32-bit ints to send in messages\n"); - fprintf(stderr, " -clients num\tnumber of client threads to run\n"); - fprintf(stderr, " -delay num\t\tmicroseconds to sleep clients between messages\n"); - fprintf(stderr, " -name portname\tname of port on which to communicate\n"); - fprintf(stderr, "default values are:\n"); - fprintf(stderr, " . not verbose\n"); - fprintf(stderr, " . not oneway\n"); - fprintf(stderr, " . client sends 10000 messages\n"); - fprintf(stderr, " . inline message type\n"); - fprintf(stderr, " . 64 32-bit integers in inline/complex messages\n"); - fprintf(stderr, " . avail_cpus - 1 clients\n"); - fprintf(stderr, " . no delay\n"); - fprintf(stderr, " . port name 'TEST'\n"); - exit(1); -} - -void parse_args(int argc, char *argv[]) { - host_basic_info_data_t info; - mach_msg_type_number_t count; - kern_return_t result; - - /* Initialize defaults */ - verbose = 0; - oneway = 0; - msg_type = msg_type_trivial; - num_ints = 64; - num_msgs = 10000; - client_delay = 0; - server_port_name = "TEST"; - - count = HOST_BASIC_INFO_COUNT; - result = host_info(mach_host_self(), HOST_BASIC_INFO, - (host_info_t)&info, &count); - if (result == KERN_SUCCESS) - num_clients = MAX(1, info.avail_cpus - 1); - else - num_clients = 1; - - const char *progname = argv[0]; - argc--; argv++; - while (0 < argc) { - if (0 == strcmp("-verbose", argv[0])) { - verbose = 1; - argc--; argv++; - } else if (0 == strcmp("-oneway", argv[0])) { - oneway = 1; - argc--; argv++; - } else if (0 == strcmp("-type", argv[0])) { - if (argc < 2) - usage(progname); - if (0 == strcmp("trivial", argv[1])) { - msg_type = msg_type_trivial; - } else if (0 == strcmp("inline", argv[1])) { - msg_type = msg_type_inline; - } else if (0 == strcmp("complex", argv[1])) { - msg_type = msg_type_complex; - } else - usage(progname); - argc -= 2; argv += 2; - } else if (0 == strcmp("-name", argv[0])) { - if (argc < 2) - usage(progname); - server_port_name = argv[1]; - argc -= 2; argv += 2; - } else if (0 == strcmp("-numints", argv[0])) { - if (argc < 2) - usage(progname); - num_ints = strtoul(argv[1], NULL, 0); - argc -= 2; argv += 2; - } else if (0 == strcmp("-count", argv[0])) { - if (argc < 2) - usage(progname); - num_msgs = strtoul(argv[1], NULL, 0); - argc -= 2; argv += 2; - } else if (0 == strcmp("-clients", argv[0])) { - if (argc < 2) - usage(progname); - num_clients = strtoul(argv[1], NULL, 0); - argc -= 2; argv += 2; - } else if (0 == strcmp("-delay", argv[0])) { - if (argc < 2) - usage(progname); - client_delay = strtoul(argv[1], NULL, 0); - argc -= 2; argv += 2; - } else - usage(progname); - } -} - -void setup_server_ports(struct port_args *ports) -{ - kern_return_t ret = 0; - mach_port_t bsport; - - ports->req_size = MAX(sizeof(ipc_inline_message) + - sizeof(u_int32_t) * num_ints, - sizeof(ipc_complex_message)); - ports->reply_size = sizeof(ipc_trivial_message) - - sizeof(mach_msg_trailer_t); - ports->req_msg = malloc(ports->req_size); - ports->reply_msg = malloc(ports->reply_size); - - ret = mach_port_allocate(mach_task_self(), - MACH_PORT_RIGHT_RECEIVE, - &(ports->port)); - if (KERN_SUCCESS != ret) { - mach_error("mach_port_allocate(): ", ret); - exit(1); - } - - ret = mach_port_insert_right(mach_task_self(), - ports->port, - ports->port, - MACH_MSG_TYPE_MAKE_SEND); - if (KERN_SUCCESS != ret) { - mach_error("mach_port_insert_right(): ", ret); - exit(1); - } - - ret = task_get_bootstrap_port(mach_task_self(), &bsport); - if (KERN_SUCCESS != ret) { - mach_error("task_get_bootstrap_port(): ", ret); - exit(1); - } - - ret = bootstrap_register(bsport, server_port_name, ports->port); - if (KERN_SUCCESS != ret) { - mach_error("bootstrap_register(): ", ret); - exit(1); - } - if (verbose) { - printf("server waiting for IPC messages from client on port '%s'.\n", - server_port_name); - } -} - -void setup_client_ports(struct port_args *ports) -{ - kern_return_t ret = 0; - switch(msg_type) { - case msg_type_trivial: - ports->req_size = sizeof(ipc_trivial_message); - break; - case msg_type_inline: - ports->req_size = sizeof(ipc_inline_message) + - sizeof(u_int32_t) * num_ints; - break; - case msg_type_complex: - ports->req_size = sizeof(ipc_complex_message); - break; - } - ports->req_size -= sizeof(mach_msg_trailer_t); - ports->reply_size = sizeof(ipc_trivial_message); - ports->req_msg = malloc(ports->req_size); - ports->reply_msg = malloc(ports->reply_size); - - ret = mach_port_allocate(mach_task_self(), - MACH_PORT_RIGHT_RECEIVE, - &(ports->port)); - if (KERN_SUCCESS != ret) { - mach_error("mach_port_allocate(): ", ret); - exit(1); - } - if (verbose) { - printf("Client sending %d %s IPC messages to port '%s' in %s mode.\n", - num_msgs, (msg_type == msg_type_inline) ? - "inline" : ((msg_type == msg_type_complex) ? - "complex" : "trivial"), - server_port_name, (oneway ? "oneway" : "rpc")); - } - -} - -void server(struct port_args *args) -{ - int idx; - kern_return_t ret; - int totalmsg = num_msgs * num_clients; - - unsigned long long starttsc, endtsc, deltatsc; - struct timeval starttv, endtv, deltatv; - - /* Call gettimeofday() once and throw away result; some implementations - * (like Mach's) cache some time zone info on first call. Then, call - * ReadTSR in case that helps warm things up, again discarding the - * results. - */ - gettimeofday(&starttv, NULL); - ReadTSR(); - - gettimeofday(&starttv, NULL); - starttsc = ReadTSR(); - - for (idx = 0; idx < totalmsg; idx++) { - if (verbose) - printf("server awaiting message %d\n", idx); - args->req_msg->msgh_bits = 0; - args->req_msg->msgh_size = args->req_size; - args->req_msg->msgh_local_port = args->port; - ret = mach_msg(args->req_msg, - MACH_RCV_MSG|MACH_RCV_INTERRUPT|MACH_RCV_LARGE, - 0, - args->req_size, - args->port, - MACH_MSG_TIMEOUT_NONE, - MACH_PORT_NULL); - if (MACH_MSG_SUCCESS != ret) { - mach_error("mach_msg (receive): ", ret); - exit(1); - } - if (verbose) - printf("server received message %d\n", idx); - if (args->req_msg->msgh_bits & MACH_MSGH_BITS_COMPLEX) { - ret = vm_deallocate(mach_task_self(), - (vm_address_t)((ipc_complex_message *)args->req_msg)->descriptor.address, - ((ipc_complex_message *)args->req_msg)->descriptor.size); - } - - if (1 == args->req_msg->msgh_id) { - if (verbose) - printf("server sending reply %d\n", idx); - args->reply_msg->msgh_bits = MACH_MSGH_BITS(MACH_MSG_TYPE_COPY_SEND, - MACH_MSG_TYPE_MAKE_SEND); - args->reply_msg->msgh_size = args->reply_size; - args->reply_msg->msgh_remote_port = args->req_msg->msgh_remote_port; - args->reply_msg->msgh_local_port = args->req_msg->msgh_local_port; - args->reply_msg->msgh_id = 2; - ret = mach_msg(args->reply_msg, - MACH_SEND_MSG, - args->reply_size, - 0, - MACH_PORT_NULL, - MACH_MSG_TIMEOUT_NONE, - MACH_PORT_NULL); - if (MACH_MSG_SUCCESS != ret) { - mach_error("mach_msg (send): ", ret); - exit(1); - } - } - } - - endtsc = ReadTSR(); - gettimeofday(&endtv, NULL); - - /* report results */ - deltatsc = endtsc - starttsc; - deltatv.tv_sec = endtv.tv_sec - starttv.tv_sec; - deltatv.tv_usec = endtv.tv_usec - starttv.tv_usec; - if (endtv.tv_usec < starttv.tv_usec) { - deltatv.tv_sec--; - deltatv.tv_usec += 1000000; - } - - double dsecs = (double) deltatv.tv_sec + - 1.0E-6 * (double) deltatv.tv_usec; - - printf("\n%u messages during %qd time stamp ticks\n", - totalmsg, deltatsc); - printf("%g time stamp ticks per message\n", - (double) deltatsc / (double) totalmsg); - printf("\n%u messages during %u.%06u seconds\n", - totalmsg, deltatv.tv_sec, deltatv.tv_usec); - printf("%g messages per second\n", (double)totalmsg / dsecs); - printf("%g microseconds per message\n\n", - dsecs * 1.0E6 / (double) totalmsg); -} - -void *client(void *threadarg) -{ - struct port_args args; - int idx; - mach_msg_header_t *req, *reply; - mach_port_t bsport, servport; - kern_return_t ret; - void *ints = malloc(sizeof(u_int32_t) * num_ints); - - /* find server port */ - ret = task_get_bootstrap_port(mach_task_self(), &bsport); - if (KERN_SUCCESS != ret) { - mach_error("task_get_bootstrap_port(): ", ret); - exit(1); - } - ret = bootstrap_look_up(bsport, server_port_name, &servport); - if (KERN_SUCCESS != ret) { - mach_error("bootstrap_look_up(): ", ret); - exit(1); - } - - setup_client_ports(&args); - - /* start message loop */ - for (idx = 0; idx < num_msgs; idx++) { - req = args.req_msg; - reply = args.reply_msg; - - req->msgh_bits = MACH_MSGH_BITS(MACH_MSG_TYPE_COPY_SEND, - MACH_MSG_TYPE_MAKE_SEND); - req->msgh_size = args.req_size; - req->msgh_remote_port = servport; - req->msgh_local_port = args.port; - req->msgh_id = oneway ? 0 : 1; - switch (msg_type) { - case msg_type_trivial: - break; - case msg_type_inline: - ((ipc_inline_message *)req)->type.msgt_name = MACH_MSG_TYPE_INTEGER_32; - ((ipc_inline_message *)req)->type.msgt_size = 32; - ((ipc_inline_message *)req)->type.msgt_number = num_ints; - ((ipc_inline_message *)req)->type.msgt_inline = TRUE; - ((ipc_inline_message *)req)->type.msgt_longform = FALSE; - ((ipc_inline_message *)req)->type.msgt_deallocate = FALSE; - ((ipc_inline_message *)req)->type.msgt_unused = 0; - break; - case msg_type_complex: - (req)->msgh_bits |= MACH_MSGH_BITS_COMPLEX; - ((ipc_complex_message *)req)->body.msgh_descriptor_count = 1; - ((ipc_complex_message *)req)->descriptor.address = ints; - ((ipc_complex_message *)req)->descriptor.size = - num_ints * sizeof(u_int32_t); - ((ipc_complex_message *)req)->descriptor.deallocate = FALSE; - ((ipc_complex_message *)req)->descriptor.copy = MACH_MSG_VIRTUAL_COPY; - ((ipc_complex_message *)req)->descriptor.type = MACH_MSG_OOL_DESCRIPTOR; - break; - } - if (verbose) - printf("client sending message %d\n", idx); - ret = mach_msg(req, - MACH_SEND_MSG, - args.req_size, - 0, - MACH_PORT_NULL, - MACH_MSG_TIMEOUT_NONE, - MACH_PORT_NULL); - if (MACH_MSG_SUCCESS != ret) { - mach_error("mach_msg (send): ", ret); - fprintf(stderr, "bailing after %u iterations\n", idx); - exit(1); - break; - } - if (!oneway) { - if (verbose) - printf("client awaiting reply %d\n", idx); - reply->msgh_bits = 0; - reply->msgh_size = args.reply_size; - reply->msgh_local_port = args.port; - ret = mach_msg(args.reply_msg, - MACH_RCV_MSG|MACH_RCV_INTERRUPT, - 0, - args.reply_size, - args.port, - MACH_MSG_TIMEOUT_NONE, - MACH_PORT_NULL); - if (MACH_MSG_SUCCESS != ret) { - mach_error("mach_msg (receive): ", ret); - fprintf(stderr, "bailing after %u iterations\n", - idx); - exit(1); - } - if (verbose) - printf("client received reply %d\n", idx); - } - - if (client_delay) { - usleep(client_delay); - } - } - - free(ints); - return; -} - - -int main(int argc, char *argv[]) -{ - struct port_args portargs; - int i; - - signal(SIGINT, signal_handler); - parse_args(argc, argv); - - setup_server_ports(&portargs); - - if (fork() != 0) { - server(&portargs); - exit(0); - } - - if (num_clients > 1) { - for (i = 1; i < num_clients; i++) { - if (fork() == 0) { - client(NULL); - exit(0); - } - } - } - - client(NULL); - - return (0); -} diff --git a/tools/tests/MMTest/Makefile b/tools/tests/MMTest/Makefile deleted file mode 100644 index c09cdc9e8..000000000 --- a/tools/tests/MMTest/Makefile +++ /dev/null @@ -1,21 +0,0 @@ -CFLAGS=-g -O2 -arch ppc -arch i386 -CFLAGS64=-g -O2 -arch ppc64 -arch x86_64 - -TARGETS = MMtest MMtest_64 MPMMtest MPMMtest_64 - -all: $(TARGETS) - -MMtest: MMtest.c - ${CC} ${CFLAGS} -o $@ $? - -MMtest_64: MMtest.c - ${CC} ${CFLAGS64} -o $@ $? - -MPMMtest: MPMMtest.c - ${CC} ${CFLAGS} -o $@ $? - -MPMMtest_64: MPMMtest.c - ${CC} ${CFLAGS64} -o $@ $? - -clean: - rm -rf $(TARGETS) diff --git a/tools/tests/MPMMTest/KQMPMMtest.c b/tools/tests/MPMMTest/KQMPMMtest.c new file mode 100644 index 000000000..b16c5f847 --- /dev/null +++ b/tools/tests/MPMMTest/KQMPMMtest.c @@ -0,0 +1,809 @@ +#include +#ifdef AVAILABLE_MAC_OS_X_VERSION_10_5_AND_LATER +#include +#endif + +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define MAX(A, B) ((A) < (B) ? (B) : (A)) + + +typedef struct { + mach_msg_header_t header; + mach_msg_trailer_t trailer; // subtract this when sending +} ipc_trivial_message; + +typedef struct { + mach_msg_header_t header; + u_int32_t numbers[0]; + mach_msg_trailer_t trailer; // subtract this when sending +} ipc_inline_message; + +typedef struct { + mach_msg_header_t header; + mach_msg_body_t body; + mach_msg_ool_descriptor_t descriptor; + mach_msg_trailer_t trailer; // subtract this when sending +} ipc_complex_message; + +enum { + msg_type_trivial = 0, + msg_type_inline = 1, + msg_type_complex = 2 +}; + +struct port_args { + int server_num; + int req_size; + mach_msg_header_t *req_msg; + int reply_size; + mach_msg_header_t *reply_msg; + mach_port_t port; + mach_port_t pset; +}; + +typedef union { + pid_t pid; + pthread_t tid; +} thread_id_t; + +/* Global options */ +static boolean_t verbose = FALSE; +static boolean_t affinity = FALSE; +static boolean_t timeshare = FALSE; +static boolean_t threaded = FALSE; +static boolean_t oneway = FALSE; +int msg_type; +int num_ints; +int num_msgs; +int num_clients; +int num_servers; +int client_delay; +int client_spin; +int client_pages; +char **server_port_name; + +void signal_handler(int sig) { +} + +void usage(const char *progname) { + fprintf(stderr, "usage: %s [options]\n", progname); + fprintf(stderr, "where options are:\n"); + fprintf(stderr, " -affinity\t\tthreads use affinity\n"); + fprintf(stderr, " -timeshare\t\tthreads use timeshare\n"); + fprintf(stderr, " -threaded\t\tuse (p)threads\n"); + fprintf(stderr, " -verbose\t\tbe verbose\n"); + fprintf(stderr, " -oneway\t\tdo not request return reply\n"); + fprintf(stderr, " -count num\t\tnumber of messages to send\n"); + fprintf(stderr, " -type trivial|inline|complex\ttype of messages to send\n"); + fprintf(stderr, " -numints num\tnumber of 32-bit ints to send in messages\n"); + fprintf(stderr, " -servers num\tnumber of servers threads to run\n"); + fprintf(stderr, " -clients num\tnumber of clients per server\n"); + fprintf(stderr, " -delay num\t\tmicroseconds to sleep clients between messages\n"); + fprintf(stderr, " -work num\t\tmicroseconds of client work\n"); + fprintf(stderr, " -pages num\t\tpages of memory touched by client work\n"); + fprintf(stderr, "default values are:\n"); + fprintf(stderr, " . no affinity\n"); + fprintf(stderr, " . not timeshare\n"); + fprintf(stderr, " . not verbose\n"); + fprintf(stderr, " . not oneway\n"); + fprintf(stderr, " . client sends 100000 messages\n"); + fprintf(stderr, " . inline message type\n"); + fprintf(stderr, " . 64 32-bit integers in inline/complex messages\n"); + fprintf(stderr, " . (num_available_processors+1)%%2 servers\n"); + fprintf(stderr, " . 4 clients per server\n"); + fprintf(stderr, " . no delay\n"); + exit(1); +} + +void parse_args(int argc, char *argv[]) { + host_basic_info_data_t info; + mach_msg_type_number_t count; + kern_return_t result; + + /* Initialize defaults */ + msg_type = msg_type_trivial; + num_ints = 64; + num_msgs = 100000; + client_delay = 0; + num_clients = 4; + + count = HOST_BASIC_INFO_COUNT; + result = host_info(mach_host_self(), HOST_BASIC_INFO, + (host_info_t)&info, &count); + if (result == KERN_SUCCESS && info.avail_cpus > 1) + num_servers = info.avail_cpus / 2; + else + num_servers = 1; + + const char *progname = argv[0]; + argc--; argv++; + while (0 < argc) { + if (0 == strcmp("-verbose", argv[0])) { + verbose = TRUE; + argc--; argv++; + } else if (0 == strcmp("-affinity", argv[0])) { + affinity = TRUE; + argc--; argv++; + } else if (0 == strcmp("-timeshare", argv[0])) { + timeshare = TRUE; + argc--; argv++; + } else if (0 == strcmp("-threaded", argv[0])) { + threaded = TRUE; + argc--; argv++; + } else if (0 == strcmp("-oneway", argv[0])) { + oneway = TRUE; + argc--; argv++; + } else if (0 == strcmp("-type", argv[0])) { + if (argc < 2) + usage(progname); + if (0 == strcmp("trivial", argv[1])) { + msg_type = msg_type_trivial; + } else if (0 == strcmp("inline", argv[1])) { + msg_type = msg_type_inline; + } else if (0 == strcmp("complex", argv[1])) { + msg_type = msg_type_complex; + } else + usage(progname); + argc -= 2; argv += 2; + } else if (0 == strcmp("-numints", argv[0])) { + if (argc < 2) + usage(progname); + num_ints = strtoul(argv[1], NULL, 0); + argc -= 2; argv += 2; + } else if (0 == strcmp("-count", argv[0])) { + if (argc < 2) + usage(progname); + num_msgs = strtoul(argv[1], NULL, 0); + argc -= 2; argv += 2; + } else if (0 == strcmp("-clients", argv[0])) { + if (argc < 2) + usage(progname); + num_clients = strtoul(argv[1], NULL, 0); + argc -= 2; argv += 2; + } else if (0 == strcmp("-servers", argv[0])) { + if (argc < 2) + usage(progname); + num_servers = strtoul(argv[1], NULL, 0); + argc -= 2; argv += 2; + } else if (0 == strcmp("-delay", argv[0])) { + if (argc < 2) + usage(progname); + client_delay = strtoul(argv[1], NULL, 0); + argc -= 2; argv += 2; + } else if (0 == strcmp("-spin", argv[0])) { + if (argc < 2) + usage(progname); + client_spin = strtoul(argv[1], NULL, 0); + argc -= 2; argv += 2; + } else if (0 == strcmp("-pages", argv[0])) { + if (argc < 2) + usage(progname); + client_pages = strtoul(argv[1], NULL, 0); + argc -= 2; argv += 2; + } else + usage(progname); + } +} + +void setup_server_ports(struct port_args *ports) +{ + kern_return_t ret = 0; + mach_port_t bsport; + + ports->req_size = MAX(sizeof(ipc_inline_message) + + sizeof(u_int32_t) * num_ints, + sizeof(ipc_complex_message)); + ports->reply_size = sizeof(ipc_trivial_message) - + sizeof(mach_msg_trailer_t); + ports->req_msg = malloc(ports->req_size); + ports->reply_msg = malloc(ports->reply_size); + + ret = mach_port_allocate(mach_task_self(), + MACH_PORT_RIGHT_RECEIVE, + &(ports->port)); + if (KERN_SUCCESS != ret) { + mach_error("mach_port_allocate(): ", ret); + exit(1); + } + + ret = mach_port_allocate(mach_task_self(), + MACH_PORT_RIGHT_PORT_SET, + &(ports->pset)); + if (KERN_SUCCESS != ret) { + mach_error("mach_port_allocate(): ", ret); + exit(1); + } + + ret = mach_port_insert_member(mach_task_self(), + ports->port, + ports->pset); + if (KERN_SUCCESS != ret) { + mach_error("mach_port_insert_member(): ", ret); + exit(1); + } + + ret = mach_port_insert_right(mach_task_self(), + ports->port, + ports->port, + MACH_MSG_TYPE_MAKE_SEND); + if (KERN_SUCCESS != ret) { + mach_error("mach_port_insert_right(): ", ret); + exit(1); + } + + ret = task_get_bootstrap_port(mach_task_self(), &bsport); + if (KERN_SUCCESS != ret) { + mach_error("task_get_bootstrap_port(): ", ret); + exit(1); + } + + if (verbose) { + printf("server waiting for IPC messages from client on port '%s'.\n", + server_port_name[ports->server_num]); + } + ret = bootstrap_register(bsport, + server_port_name[ports->server_num], + ports->port); + if (KERN_SUCCESS != ret) { + mach_error("bootstrap_register(): ", ret); + exit(1); + } +} + +void setup_client_ports(struct port_args *ports) +{ + kern_return_t ret = 0; + switch(msg_type) { + case msg_type_trivial: + ports->req_size = sizeof(ipc_trivial_message); + break; + case msg_type_inline: + ports->req_size = sizeof(ipc_inline_message) + + sizeof(u_int32_t) * num_ints; + break; + case msg_type_complex: + ports->req_size = sizeof(ipc_complex_message); + break; + } + ports->req_size -= sizeof(mach_msg_trailer_t); + ports->reply_size = sizeof(ipc_trivial_message); + ports->req_msg = malloc(ports->req_size); + ports->reply_msg = malloc(ports->reply_size); + + ret = mach_port_allocate(mach_task_self(), + MACH_PORT_RIGHT_RECEIVE, + &(ports->port)); + if (KERN_SUCCESS != ret) { + mach_error("mach_port_allocate(): ", ret); + exit(1); + } + if (verbose) { + printf("Client sending %d %s IPC messages to port '%s' in %s mode.\n", + num_msgs, (msg_type == msg_type_inline) ? + "inline" : ((msg_type == msg_type_complex) ? + "complex" : "trivial"), + server_port_name[ports->server_num], + (oneway ? "oneway" : "rpc")); + } + +} + + +static void +thread_setup(int tag) { +#ifdef AVAILABLE_MAC_OS_X_VERSION_10_5_AND_LATER + kern_return_t ret; + thread_extended_policy_data_t epolicy; + thread_affinity_policy_data_t policy; + + if (!timeshare) { + epolicy.timeshare = FALSE; + ret = thread_policy_set( + mach_thread_self(), THREAD_EXTENDED_POLICY, + (thread_policy_t) &epolicy, + THREAD_EXTENDED_POLICY_COUNT); + if (ret != KERN_SUCCESS) + printf("thread_policy_set(THREAD_EXTENDED_POLICY) returned %d\n", ret); + } + + if (affinity) { + policy.affinity_tag = tag; + ret = thread_policy_set( + mach_thread_self(), THREAD_AFFINITY_POLICY, + (thread_policy_t) &policy, + THREAD_AFFINITY_POLICY_COUNT); + if (ret != KERN_SUCCESS) + printf("thread_policy_set(THREAD_AFFINITY_POLICY) returned %d\n", ret); + } +#endif +} + +void * +server(void *serverarg) +{ + int kq; + struct kevent64_s kev[1]; + int err; + struct port_args args; + int idx; + kern_return_t ret; + int totalmsg = num_msgs * num_clients; + + args.server_num = (int) (long) serverarg; + setup_server_ports(&args); + + thread_setup(args.server_num + 1); + + kq = kqueue(); + if (kq == -1) { + perror("kqueue"); + exit(1); + } + EV_SET64(&kev[0], args.pset, EVFILT_MACHPORT, (EV_ADD | EV_CLEAR | EV_DISPATCH), +#if DIRECT_MSG_RCV + MACH_RCV_MSG|MACH_RCV_LARGE, 0, 0, (mach_vm_address_t)args.req_msg, args.req_size); +#else + 0, 0, 0, 0, 0); +#endif + err = kevent64(kq, kev, 1, NULL, 0, 0, NULL); + if (err == -1) { + perror("kevent"); + exit(1); + } + for (idx = 0; idx < totalmsg; idx++) { + + if (verbose) + printf("server awaiting message %d\n", idx); + retry: + EV_SET64(&kev[0], args.pset, EVFILT_MACHPORT, EV_ENABLE, +#if DIRECT_MSG_RCV + MACH_RCV_MSG|MACH_RCV_LARGE, 0, 0, (mach_vm_address_t)args.req_msg, args.req_size); +#else + 0, 0, 0, 0, 0); +#endif + err = kevent64(kq, kev, 1, kev, 1, 0, NULL); + if (err == -1) { + perror("kevent64"); + exit(1); + } + if (err == 0) { + // printf("kevent64: returned zero\n"); + goto retry; + } + +#if DIRECT_MSG_RCV + ret = kev[0].fflags; + if (MACH_MSG_SUCCESS != ret) { + if (verbose) + printf("kevent64() mach_msg_return=%d", ret); + mach_error("kevent64 (msg receive): ", ret); + exit(1); + } +#else + if (kev[0].data != args.port) + printf("kevent64(MACH_PORT_NULL) port name (0x%x) != expected (0x%x)\n", kev[0].data, args.port); + + args.req_msg->msgh_bits = 0; + args.req_msg->msgh_size = args.req_size; + args.req_msg->msgh_local_port = args.port; + ret = mach_msg(args.req_msg, + MACH_RCV_MSG|MACH_RCV_INTERRUPT|MACH_RCV_LARGE, + 0, + args.req_size, + args.pset, + MACH_MSG_TIMEOUT_NONE, + MACH_PORT_NULL); + if (MACH_RCV_INTERRUPTED == ret) + break; + if (MACH_MSG_SUCCESS != ret) { + if (verbose) + printf("mach_msg() ret=%d", ret); + mach_error("mach_msg (receive): ", ret); + exit(1); + } +#endif + if (verbose) + printf("server received message %d\n", idx); + if (args.req_msg->msgh_bits & MACH_MSGH_BITS_COMPLEX) { + ret = vm_deallocate(mach_task_self(), + (vm_address_t)((ipc_complex_message *)args.req_msg)->descriptor.address, + ((ipc_complex_message *)args.req_msg)->descriptor.size); + } + + if (1 == args.req_msg->msgh_id) { + if (verbose) + printf("server sending reply %d\n", idx); + args.reply_msg->msgh_bits = MACH_MSGH_BITS(MACH_MSG_TYPE_COPY_SEND, + MACH_MSG_TYPE_MAKE_SEND); + args.reply_msg->msgh_size = args.reply_size; + args.reply_msg->msgh_remote_port = args.req_msg->msgh_remote_port; + args.reply_msg->msgh_local_port = args.req_msg->msgh_local_port; + args.reply_msg->msgh_id = 2; + ret = mach_msg(args.reply_msg, + MACH_SEND_MSG, + args.reply_size, + 0, + MACH_PORT_NULL, + MACH_MSG_TIMEOUT_NONE, + MACH_PORT_NULL); + if (MACH_MSG_SUCCESS != ret) { + mach_error("mach_msg (send): ", ret); + exit(1); + } + } + } +} + +static inline void +client_spin_loop(unsigned count, void (fn)(void)) +{ + while (count--) + fn(); +} + +static long dummy_memory; +static long *client_memory = &dummy_memory; +static void +client_work_atom(void) +{ + static int i; + + if (++i > client_pages * PAGE_SIZE / sizeof(long)) + i = 0; + client_memory[i] = 0; +} + +static int calibration_count = 10000; +static int calibration_usec; +static void * +calibrate_client_work(void) +{ + long dummy; + struct timeval nowtv; + struct timeval warmuptv = { 0, 100 * 1000 }; /* 100ms */ + struct timeval starttv; + struct timeval endtv; + + if (client_spin) { + /* Warm-up the stepper first... */ + gettimeofday(&nowtv, NULL); + timeradd(&nowtv, &warmuptv, &endtv); + do { + client_spin_loop(calibration_count, client_work_atom); + gettimeofday(&nowtv, NULL); + } while (timercmp(&nowtv, &endtv, < )); + + /* Now do the calibration */ + while (TRUE) { + gettimeofday(&starttv, NULL); + client_spin_loop(calibration_count, client_work_atom); + gettimeofday(&endtv, NULL); + if (endtv.tv_sec - starttv.tv_sec > 1) { + calibration_count /= 10; + continue; + } + calibration_usec = endtv.tv_usec - starttv.tv_usec; + if (endtv.tv_usec < starttv.tv_usec) { + calibration_usec += 1000000; + } + if (calibration_usec < 1000) { + calibration_count *= 10; + continue; + } + calibration_count /= calibration_usec; + break; + } + if (verbose) + printf("calibration_count=%d calibration_usec=%d\n", + calibration_count, calibration_usec); + } +} + +static void * +client_work(void) +{ + + if (client_spin) { + client_spin_loop(calibration_count*client_spin, + client_work_atom); + } + + if (client_delay) { + usleep(client_delay); + } +} + +void *client(void *threadarg) +{ + struct port_args args; + int idx; + mach_msg_header_t *req, *reply; + mach_port_t bsport, servport; + kern_return_t ret; + long server_num = (long) threadarg; + void *ints = malloc(sizeof(u_int32_t) * num_ints); + + if (verbose) + printf("client(%d) started, server port name %s\n", + server_num, server_port_name[server_num]); + + args.server_num = server_num; + thread_setup(server_num + 1); + + /* find server port */ + ret = task_get_bootstrap_port(mach_task_self(), &bsport); + if (KERN_SUCCESS != ret) { + mach_error("task_get_bootstrap_port(): ", ret); + exit(1); + } + ret = bootstrap_look_up(bsport, + server_port_name[server_num], + &servport); + if (KERN_SUCCESS != ret) { + mach_error("bootstrap_look_up(): ", ret); + exit(1); + } + + setup_client_ports(&args); + + /* Allocate and touch memory */ + if (client_pages) { + unsigned i; + client_memory = (long *) malloc(client_pages * PAGE_SIZE); + for (i = 0; i < client_pages; i++) + client_memory[i * PAGE_SIZE / sizeof(long)] = 0; + } + + /* start message loop */ + for (idx = 0; idx < num_msgs; idx++) { + req = args.req_msg; + reply = args.reply_msg; + + req->msgh_bits = MACH_MSGH_BITS(MACH_MSG_TYPE_COPY_SEND, + MACH_MSG_TYPE_MAKE_SEND); + req->msgh_size = args.req_size; + req->msgh_remote_port = servport; + req->msgh_local_port = args.port; + req->msgh_id = oneway ? 0 : 1; + if (msg_type == msg_type_complex) { + (req)->msgh_bits |= MACH_MSGH_BITS_COMPLEX; + ((ipc_complex_message *)req)->body.msgh_descriptor_count = 1; + ((ipc_complex_message *)req)->descriptor.address = ints; + ((ipc_complex_message *)req)->descriptor.size = + num_ints * sizeof(u_int32_t); + ((ipc_complex_message *)req)->descriptor.deallocate = FALSE; + ((ipc_complex_message *)req)->descriptor.copy = MACH_MSG_VIRTUAL_COPY; + ((ipc_complex_message *)req)->descriptor.type = MACH_MSG_OOL_DESCRIPTOR; + } + if (verbose) + printf("client sending message %d\n", idx); + ret = mach_msg(req, + MACH_SEND_MSG, + args.req_size, + 0, + MACH_PORT_NULL, + MACH_MSG_TIMEOUT_NONE, + MACH_PORT_NULL); + if (MACH_MSG_SUCCESS != ret) { + mach_error("mach_msg (send): ", ret); + fprintf(stderr, "bailing after %u iterations\n", idx); + exit(1); + break; + } + if (!oneway) { + if (verbose) + printf("client awaiting reply %d\n", idx); + reply->msgh_bits = 0; + reply->msgh_size = args.reply_size; + reply->msgh_local_port = args.port; + ret = mach_msg(args.reply_msg, + MACH_RCV_MSG|MACH_RCV_INTERRUPT, + 0, + args.reply_size, + args.port, + MACH_MSG_TIMEOUT_NONE, + MACH_PORT_NULL); + if (MACH_MSG_SUCCESS != ret) { + mach_error("mach_msg (receive): ", ret); + fprintf(stderr, "bailing after %u iterations\n", + idx); + exit(1); + } + if (verbose) + printf("client received reply %d\n", idx); + } + + client_work(); + } + + free(ints); + return; +} + +static void +thread_spawn(thread_id_t *thread, void *(fn)(void *), void *arg) { + if (threaded) { + kern_return_t ret; + ret = pthread_create( + &thread->tid, + NULL, + fn, + arg); + if (ret != 0) + err(1, "pthread_create()"); + if (verbose) + printf("created pthread 0x%x\n", thread->tid); + } else { + thread->pid = fork(); + if (thread->pid == 0) { + if (verbose) + printf("calling 0x%x(0x%x)\n", fn, arg); + fn(arg); + exit(0); + } + if (verbose) + printf("forked pid %d\n", thread->pid); + } +} + +static void +thread_join(thread_id_t *thread) { + if (threaded) { + kern_return_t ret; + if (verbose) + printf("joining thread 0x%x\n", thread->tid); + ret = pthread_join(thread->tid, NULL); + if (ret != KERN_SUCCESS) + err(1, "pthread_join(0x%x)", thread->tid); + } else { + int stat; + if (verbose) + printf("waiting for pid %d\n", thread->pid); + waitpid(thread->pid, &stat, 0); + } +} + +static void +wait_for_servers(void) +{ + int i; + int retry_count = 10; + mach_port_t bsport, servport; + kern_return_t ret; + + /* find server port */ + ret = task_get_bootstrap_port(mach_task_self(), &bsport); + if (KERN_SUCCESS != ret) { + mach_error("task_get_bootstrap_port(): ", ret); + exit(1); + } + + while (retry_count-- > 0) { + for (i = 0; i < num_servers; i++) { + ret = bootstrap_look_up(bsport, + server_port_name[i], + &servport); + if (ret != KERN_SUCCESS) { + break; + } + } + if (ret == KERN_SUCCESS) + return; + usleep(100 * 1000); /* 100ms */ + } + fprintf(stderr, "Server(s) failed to register\n"); + exit(1); +} + +int main(int argc, char *argv[]) +{ + int i; + int j; + thread_id_t *client_id; + thread_id_t *server_id; + + signal(SIGINT, signal_handler); + parse_args(argc, argv); + + calibrate_client_work(); + + /* + * If we're using affinity create an empty namespace now + * so this is shared by all our offspring. + */ + if (affinity) + thread_setup(0); + + server_id = (thread_id_t *) malloc(num_servers * sizeof(thread_id_t)); + server_port_name = (char **) malloc(num_servers * sizeof(char *)); + if (verbose) + printf("creating %d servers\n", num_servers); + for (i = 0; i < num_servers; i++) { + server_port_name[i] = (char *) malloc(sizeof("PORT.pppppp.xx")); + /* PORT names include pid of main process for disambiguation */ + sprintf(server_port_name[i], "PORT.%06d.%02d", getpid(), i); + thread_spawn(&server_id[i], server, (void *) (long) i); + } + + int totalclients = num_servers * num_clients; + int totalmsg = num_msgs * totalclients; + struct timeval starttv, endtv, deltatv; + + /* + * Wait for all servers to have registered all ports before starting + * the clients and the clock. + */ + wait_for_servers(); + + printf("%d server%s, %d client%s per server (%d total) %u messages...", + num_servers, (num_servers > 1)? "s" : "", + num_clients, (num_clients > 1)? "s" : "", + totalclients, + totalmsg); + fflush(stdout); + + /* Call gettimeofday() once and throw away result; some implementations + * (like Mach's) cache some time zone info on first call. + */ + gettimeofday(&starttv, NULL); + gettimeofday(&starttv, NULL); + + client_id = (thread_id_t *) malloc(totalclients * sizeof(thread_id_t)); + if (verbose) + printf("creating %d clients\n", totalclients); + for (i = 0; i < num_servers; i++) { + for (j = 0; j < num_clients; j++) { + thread_spawn( + &client_id[(i*num_clients) + j], + client, + (void *) (long) i); + } + } + + /* Wait for servers to complete */ + for (i = 0; i < num_servers; i++) { + thread_join(&server_id[i]); + } + + gettimeofday(&endtv, NULL); + + for (i = 0; i < totalclients; i++) { + thread_join(&client_id[i]); + } + + /* report results */ + deltatv.tv_sec = endtv.tv_sec - starttv.tv_sec; + deltatv.tv_usec = endtv.tv_usec - starttv.tv_usec; + if (endtv.tv_usec < starttv.tv_usec) { + deltatv.tv_sec--; + deltatv.tv_usec += 1000000; + } + + double dsecs = (double) deltatv.tv_sec + + 1.0E-6 * (double) deltatv.tv_usec; + + printf(" in %u.%03u seconds\n", + deltatv.tv_sec, deltatv.tv_usec/1000); + printf(" throughput in messages/sec: %g\n", + (double)totalmsg / dsecs); + printf(" average message latency (usec): %2.3g\n", + dsecs * 1.0E6 / (double) totalmsg); + + return (0); + +} diff --git a/tools/tests/MMTest/MPMMtest.c b/tools/tests/MPMMTest/MPMMtest.c similarity index 90% rename from tools/tests/MMTest/MPMMtest.c rename to tools/tests/MPMMTest/MPMMtest.c index ac555fa05..44389b35d 100644 --- a/tools/tests/MMTest/MPMMtest.c +++ b/tools/tests/MPMMTest/MPMMtest.c @@ -20,24 +20,6 @@ #define MAX(A, B) ((A) < (B) ? (B) : (A)) -typedef struct { - unsigned int msgt_name : 8, - msgt_size : 8, - msgt_number : 12, - msgt_inline : 1, - msgt_longform : 1, - msgt_deallocate : 1, - msgt_unused : 1; -} mach_msg_type_t; - -typedef struct { - mach_msg_type_t msgtl_header; - unsigned short msgtl_name; - unsigned short msgtl_size; - natural_t msgtl_number; -} mach_msg_type_long_t; -#define MACH_MSG_TYPE_INTEGER_32 0 - typedef struct { mach_msg_header_t header; @@ -46,7 +28,6 @@ typedef struct { typedef struct { mach_msg_header_t header; - mach_msg_type_t type; u_int32_t numbers[0]; mach_msg_trailer_t trailer; // subtract this when sending } ipc_inline_message; @@ -71,6 +52,7 @@ struct port_args { int reply_size; mach_msg_header_t *reply_msg; mach_port_t port; + mach_port_t set; }; typedef union { @@ -84,6 +66,7 @@ static boolean_t affinity = FALSE; static boolean_t timeshare = FALSE; static boolean_t threaded = FALSE; static boolean_t oneway = FALSE; +static boolean_t useset = FALSE; int msg_type; int num_ints; int num_msgs; @@ -92,6 +75,7 @@ int num_servers; int client_delay; int client_spin; int client_pages; +int portcount = 1; char **server_port_name; void signal_handler(int sig) { @@ -113,6 +97,7 @@ void usage(const char *progname) { fprintf(stderr, " -delay num\t\tmicroseconds to sleep clients between messages\n"); fprintf(stderr, " -work num\t\tmicroseconds of client work\n"); fprintf(stderr, " -pages num\t\tpages of memory touched by client work\n"); + fprintf(stderr, " -set num\t\tuse a portset stuffed with num ports in server\n"); fprintf(stderr, "default values are:\n"); fprintf(stderr, " . no affinity\n"); fprintf(stderr, " . not timeshare\n"); @@ -212,6 +197,13 @@ void parse_args(int argc, char *argv[]) { usage(progname); client_pages = strtoul(argv[1], NULL, 0); argc -= 2; argv += 2; + } else if (0 == strcmp("-set", argv[0])) { + if (argc < 2) + usage(progname); + portcount = strtoul(argv[1], NULL, 0); + useset = TRUE; + argc -= 2; argv += 2; + argc--; argv++; } else usage(progname); } @@ -221,6 +213,8 @@ void setup_server_ports(struct port_args *ports) { kern_return_t ret = 0; mach_port_t bsport; + mach_port_t port; + int i; ports->req_size = MAX(sizeof(ipc_inline_message) + sizeof(u_int32_t) * num_ints, @@ -230,14 +224,40 @@ void setup_server_ports(struct port_args *ports) ports->req_msg = malloc(ports->req_size); ports->reply_msg = malloc(ports->reply_size); - ret = mach_port_allocate(mach_task_self(), - MACH_PORT_RIGHT_RECEIVE, - &(ports->port)); - if (KERN_SUCCESS != ret) { - mach_error("mach_port_allocate(): ", ret); - exit(1); + if (useset) { + ret = mach_port_allocate(mach_task_self(), + MACH_PORT_RIGHT_PORT_SET, + &(ports->set)); + if (KERN_SUCCESS != ret) { + mach_error("mach_port_allocate(SET): ", ret); + exit(1); + } } + /* stuff the portset with ports */ + for (i=0; i < portcount; i++) { + ret = mach_port_allocate(mach_task_self(), + MACH_PORT_RIGHT_RECEIVE, + &port); + if (KERN_SUCCESS != ret) { + mach_error("mach_port_allocate(PORT): ", ret); + exit(1); + } + + if (useset) { + ret = mach_port_move_member(mach_task_self(), + port, + ports->set); + if (KERN_SUCCESS != ret) { + mach_error("mach_port_move_member(): ", ret); + exit(1); + } + } + } + + /* use the last one as the real port */ + ports->port = port; + ret = mach_port_insert_right(mach_task_self(), ports->port, ports->port, @@ -341,23 +361,23 @@ server(void *serverarg) int idx; kern_return_t ret; int totalmsg = num_msgs * num_clients; + mach_port_t recv_port; args.server_num = (int) (long) serverarg; setup_server_ports(&args); thread_setup(args.server_num + 1); + recv_port = (useset) ? args.set : args.port; + for (idx = 0; idx < totalmsg; idx++) { if (verbose) printf("server awaiting message %d\n", idx); - args.req_msg->msgh_bits = 0; - args.req_msg->msgh_size = args.req_size; - args.req_msg->msgh_local_port = args.port; ret = mach_msg(args.req_msg, MACH_RCV_MSG|MACH_RCV_INTERRUPT|MACH_RCV_LARGE, 0, args.req_size, - args.port, + recv_port, MACH_MSG_TIMEOUT_NONE, MACH_PORT_NULL); if (MACH_RCV_INTERRUPTED == ret) @@ -379,11 +399,10 @@ server(void *serverarg) if (1 == args.req_msg->msgh_id) { if (verbose) printf("server sending reply %d\n", idx); - args.reply_msg->msgh_bits = MACH_MSGH_BITS(MACH_MSG_TYPE_COPY_SEND, - MACH_MSG_TYPE_MAKE_SEND); + args.reply_msg->msgh_bits = MACH_MSGH_BITS(MACH_MSG_TYPE_MOVE_SEND_ONCE, 0); args.reply_msg->msgh_size = args.reply_size; args.reply_msg->msgh_remote_port = args.req_msg->msgh_remote_port; - args.reply_msg->msgh_local_port = args.req_msg->msgh_local_port; + args.reply_msg->msgh_local_port = MACH_PORT_NULL; args.reply_msg->msgh_id = 2; ret = mach_msg(args.reply_msg, MACH_SEND_MSG, @@ -526,33 +545,20 @@ void *client(void *threadarg) reply = args.reply_msg; req->msgh_bits = MACH_MSGH_BITS(MACH_MSG_TYPE_COPY_SEND, - MACH_MSG_TYPE_MAKE_SEND); + MACH_MSG_TYPE_MAKE_SEND_ONCE); req->msgh_size = args.req_size; req->msgh_remote_port = servport; req->msgh_local_port = args.port; req->msgh_id = oneway ? 0 : 1; - switch (msg_type) { - case msg_type_trivial: - break; - case msg_type_inline: - ((ipc_inline_message *)req)->type.msgt_name = MACH_MSG_TYPE_INTEGER_32; - ((ipc_inline_message *)req)->type.msgt_size = 32; - ((ipc_inline_message *)req)->type.msgt_number = num_ints; - ((ipc_inline_message *)req)->type.msgt_inline = TRUE; - ((ipc_inline_message *)req)->type.msgt_longform = FALSE; - ((ipc_inline_message *)req)->type.msgt_deallocate = FALSE; - ((ipc_inline_message *)req)->type.msgt_unused = 0; - break; - case msg_type_complex: - (req)->msgh_bits |= MACH_MSGH_BITS_COMPLEX; - ((ipc_complex_message *)req)->body.msgh_descriptor_count = 1; - ((ipc_complex_message *)req)->descriptor.address = ints; - ((ipc_complex_message *)req)->descriptor.size = - num_ints * sizeof(u_int32_t); - ((ipc_complex_message *)req)->descriptor.deallocate = FALSE; - ((ipc_complex_message *)req)->descriptor.copy = MACH_MSG_VIRTUAL_COPY; - ((ipc_complex_message *)req)->descriptor.type = MACH_MSG_OOL_DESCRIPTOR; - break; + if (msg_type == msg_type_complex) { + (req)->msgh_bits |= MACH_MSGH_BITS_COMPLEX; + ((ipc_complex_message *)req)->body.msgh_descriptor_count = 1; + ((ipc_complex_message *)req)->descriptor.address = ints; + ((ipc_complex_message *)req)->descriptor.size = + num_ints * sizeof(u_int32_t); + ((ipc_complex_message *)req)->descriptor.deallocate = FALSE; + ((ipc_complex_message *)req)->descriptor.copy = MACH_MSG_VIRTUAL_COPY; + ((ipc_complex_message *)req)->descriptor.type = MACH_MSG_OOL_DESCRIPTOR; } if (verbose) printf("client sending message %d\n", idx); diff --git a/tools/tests/MPMMTest/Makefile b/tools/tests/MPMMTest/Makefile new file mode 100644 index 000000000..7762791a0 --- /dev/null +++ b/tools/tests/MPMMTest/Makefile @@ -0,0 +1,27 @@ +CFLAGS=-g -O2 -arch ppc -arch i386 +CFLAGS64=-g -O2 -arch x86_64 + +TARGETS = MPMMtest MPMMtest_64 KQMPMMtest KQMPMMtest_64 KQMPMMtestD KQMPMMtest_64D + +all: $(TARGETS) + +MPMMtest: MPMMtest.c + ${CC} ${CFLAGS} -o $@ $? + +MPMMtest_64: MPMMtest.c + ${CC} ${CFLAGS64} -o $@ $? + +KQMPMMtest: KQMPMMtest.c + ${CC} ${CFLAGS} -o $@ $? + +KQMPMMtest_64: KQMPMMtest.c + ${CC} ${CFLAGS64} -o $@ $? + +KQMPMMtestD: KQMPMMtest.c + ${CC} ${CFLAGS} -DDIRECT_MSG_RCV=1 -o $@ $? + +KQMPMMtest_64D: KQMPMMtest.c + ${CC} ${CFLAGS64} -DDIRECT_MSG_RCV=1 -o $@ $? + +clean: + rm -rf $(TARGETS) *.dSYM diff --git a/tools/tests/MPMMTest/README b/tools/tests/MPMMTest/README new file mode 100644 index 000000000..6c7530f51 --- /dev/null +++ b/tools/tests/MPMMTest/README @@ -0,0 +1,15 @@ +MPMMTest / KQMPMMTest + +These tests measure the speed of IPC with mach messaging and kqueues. To build, +simply run make. A 32- and 64-bit version of each test will be generated by +default. Each test can be run without options: + +$ ./MPMMtest +1 server, 4 clients per server (4 total) 400000 messages... in 4.820 seconds + throughput in messages/sec: 82978.7 + average message latency (usec): 12.1 + +and will report the latency and throughput that the server achieved. The user +can change the number of servers and clients, the flavor of message, and other +variables with command line options--run './MPMMtest -h' for details. + diff --git a/tools/tests/kqueue_tests/Makefile b/tools/tests/kqueue_tests/Makefile new file mode 100644 index 000000000..9db391fe4 --- /dev/null +++ b/tools/tests/kqueue_tests/Makefile @@ -0,0 +1,7 @@ +all: readwrite timer + +readwrite: + gcc -o readwrite_tests kqueue_readwrite_tests.c -arch ppc -arch i386 + +timer: + gcc -o timer_tests kqueue_timer_tests.c -arch ppc -arch i386 -arch x86_64 diff --git a/tools/tests/kqueue_tests/kqueue_readwrite_tests.c b/tools/tests/kqueue_tests/kqueue_readwrite_tests.c new file mode 100644 index 000000000..e4ad5b5e4 --- /dev/null +++ b/tools/tests/kqueue_tests/kqueue_readwrite_tests.c @@ -0,0 +1,1614 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define DIR1 "dir1" +#define DOTDOT ".." +#define DIR2 "dir2" +#define FILE1 "file1" +#define FILE2 "file2" + +#define KEY "somekey" +#define VAL "someval" + +#define NOSLEEP 0 +#define SLEEP 1 +#define NO_EVENT 0 +#define YES_EVENT 1 + + +#define OUTPUT_LEVEL 2 +#define RESULT_LEVEL 3 + +#define TEST_STRING "Some text!!! Yes indeed, some of that very structure which has passed on man's knowledge for generations." +#define HELLO_WORLD "Hello, World!" +#define SLEEP_TIME 2 +#define WAIT_TIME (4l) +#define LENGTHEN_SIZE 500 +#define FIFO_SPACE 8192 /* FIFOS have 8K of buffer space */ + +/* + * Types of actions for setup, cleanup, and execution of tests + */ +typedef enum {CREAT, MKDIR, READ, WRITE, WRITEFD, FILLFD, UNLINK, LSKEE, RMDIR, MKFIFO, LENGTHEN, TRUNC, + SYMLINK, CHMOD, CHOWN, EXCHANGEDATA, RENAME, LSEEK, OPEN, MMAP, NOTHING, + SETXATTR, UTIMES, STAT, HARDLINK, REVOKE} action_id_t; + +/* + * Directs an action as mentioned above + */ +typedef struct _action { + int act_dosleep; + action_id_t act_id; + void *act_args[5]; + int act_fd; +} action_t; + +/* + * A test case. Specifies setup, an event to look for, an action to take to + * cause (or not cause) that event, and cleanup. + */ +typedef struct _test { + char *t_testname; + + /* Test kevent() or poll() */ + int t_is_poll_test; + + /* Actions for setting up test */ + int t_n_prep_actions; + action_t t_prep_actions[5]; + + /* Actions for cleaning up test */ + int t_n_cleanup_actions; + action_t t_cleanup_actions[5]; + + /* Action for thred to take while we wait */ + action_t t_helpthreadact; + + /* File to look for event on */ + char *t_watchfile; /* set event ident IN TEST (can't know fd beforehand)*/ + int t_file_is_fifo;/* FIFOs are handled in a special manner */ + + /* Different parameters for poll() vs kevent() */ + union { + struct kevent tu_kev; + short tu_pollevents; + } t_union; + + /* Do we expect results? */ + int t_want_event; + + /* Not always used--how much data should we find (EVFILT_{READ,WRITE}) */ + int t_nbytes; + + /* Hacks for FILT_READ and pipes */ + int t_read_to_end_first; /* Consume all data in file before waiting for event */ + int t_write_some_data; /* Write some data to file before waiting for event (FIFO hack) */ + int t_extra_sleep_hack; /* Sleep before waiting, to let a fifo fill up with data */ +} test_t; + +/* + * Extra logging infrastructure so we can filter some out + */ +void LOG(int level, FILE *f, const char *fmt, ...) { + va_list ap; + va_start(ap, fmt); + if (level >= OUTPUT_LEVEL) { + /* Indent for ease of reading */ + if (level < RESULT_LEVEL) { + fprintf(f, "\t"); + } + vfprintf(f, fmt, ap); + } + + va_end(ap); +} + +/* + * Initialize an action struct. Whether to sleep, what action to take, + * and arguments for that action. + */ + void +init_action(action_t *act, int sleep, action_id_t call, int nargs, ...) +{ + int i; + va_list ap; + va_start(ap, nargs); + act->act_dosleep = sleep; + act->act_id = call; + + for (i = 0; i < nargs; i++) + { + act->act_args[i] = va_arg(ap, void*); + } + + va_end(ap); + +} + +/* + * Opening a fifo is complicated: need to open both sides at once + */ +void* +open_fifo_readside(void *arg) +{ + return (void*)open((char*)arg, O_RDONLY); +} + +/* + * Open a fifo, setting read and write descriptors. Return 0 for success, -1 for failure. + * Only set FD args upon success; they will be unmodified on failure. + */ +int +open_fifo(const char *path, int *readfd, int *writefd) +{ + pthread_t thread; + int waitres; + int res; + int tmpreadfd, tmpwritefd; + + res = pthread_create(&thread, 0, open_fifo_readside, (void*)path); + if (res == 0) { + tmpwritefd = open(path, O_WRONLY); + waitres = pthread_join(thread, (void**) &tmpreadfd); + + fcntl(tmpwritefd, F_SETFL, O_WRONLY | O_NONBLOCK); + + if ((waitres == 0) && (tmpwritefd >= 0) && (tmpreadfd >= 0)) { + *readfd = tmpreadfd; + *writefd = tmpwritefd; + } else { + res = -1; + } + } + + return res; +} + +/* + * Just concatenate a directory and a filename, sticking a "/" betwixt them + */ +void +makepath(char *buf, const char *dir, const char *file) +{ + strcpy(buf, dir); + strcat(buf, "/"); + strcat(buf, file); +} + + +/* Execute a prep, cleanup, or test action; specific tricky notes below. + * + * CREAT: comes to life and given length 1 + * READ: try to read one char + * WRITE: try to write TEST_STRING to file + * LENGTHEN: make longer by LENGTHEN_SIZE + * MMAP: mmap first 20 bytes of file, write HELLO_WORLD in + * SETXATTR: set the KEY attribute to value VAL + * WRITEFD: instead of opening fresh, take an FD in the action struct (FIFOs) + * FILLFD: write a file until you can no longer. for filling FIFOS. + * + * * Several of these have hard-coded sizes. + */ +void* +execute_action(void *actionptr) +{ + action_t *act = (action_t*)actionptr; + void **args = act->act_args; + char c; + int res = -1, tmpfd, tmpfd2; + static int lastfd; + void *addr; + struct timeval tv; + struct stat sstat; + + LOG(1, stderr, "Beginning action of type %d\n", act->act_id); + + /* Let other thread get into kevent() sleep */ + if(SLEEP == act->act_dosleep) { + sleep(SLEEP_TIME); + } + switch(act->act_id) { + case NOTHING: + res = 0; + break; + case CREAT: + tmpfd = creat((char*)args[0], 0755); + ftruncate(tmpfd, 1); /* So that mmap() doesn't fool us */ + if (tmpfd >= 0) { + close(tmpfd); + res = 0; + } + break; + case MKDIR: + res = mkdir((char*)args[0], 0755); + break; + case READ: + tmpfd = open((char*)args[0], O_RDONLY); + if (tmpfd >= 0) { + res = read(tmpfd, &c, 1); + res = (res == 1 ? 0 : -1); + } + close(tmpfd); + break; + case WRITE: + tmpfd = open((char*)args[0], O_RDWR); + if (tmpfd >= 0) { + res = write(tmpfd, TEST_STRING, strlen(TEST_STRING)); + if (res == strlen(TEST_STRING)) { + res = 0; + } else { + res = -1; + } + + close(tmpfd); + } + break; + case WRITEFD: + res = write((int)act->act_fd, TEST_STRING, strlen(TEST_STRING)); + if (res == strlen(TEST_STRING)) { + res = 0; + } else { + res = -1; + } + break; + case FILLFD: + while (write((int)act->act_fd, "a", 1) > 0); + res = 0; + break; + case UNLINK: + res = unlink((char*)args[0]); + break; + case LSEEK: + res = lseek((int)act->act_fd, (int)args[0], SEEK_SET); + res = (res == (int)args[0] ? 0 : -1); + break; + case RMDIR: + res = rmdir((char*)args[0]); + break; + case MKFIFO: + res = mkfifo((char*)args[0], 0755); + break; + case LENGTHEN: + res = truncate((char*)args[0], LENGTHEN_SIZE); + break; + case TRUNC: + res = truncate((char*)args[0], 0); + break; + case SYMLINK: + res = symlink((char*)args[0], (char*)args[1]); + break; + case CHMOD: + res = chmod((char*)args[0], (int)args[1]); + break; + case CHOWN: + /* path, uid, gid */ + res = chown((char*)args[0], (int) args[1], (int) args[2]); + break; + case EXCHANGEDATA: + res = exchangedata((char*)args[0], (char*)args[1], 0); + break; + case RENAME: + res = rename((char*)args[0], (char*)args[1]); + break; + case OPEN: + tmpfd = open((char*)args[0], O_RDONLY | O_CREAT); + res = close(tmpfd); + break; + case MMAP: + /* It had best already exist with nonzero size */ + tmpfd = open((char*)args[0], O_RDWR); + addr = mmap(0, 20, PROT_WRITE | PROT_READ, MAP_FILE | MAP_SHARED, tmpfd, 0); + if (addr != ((void*)-1)) { + res = 0; + if ((int)args[1]) { + strcpy((char*)addr, HELLO_WORLD); + msync(addr, 20, MS_SYNC); + } + } + close(tmpfd); + munmap(addr, 20); + break; + case SETXATTR: + res = setxattr((char*)args[0], KEY, (void*)VAL, strlen(VAL), + 0, 0); + break; + case UTIMES: + tv.tv_sec = time(NULL); + tv.tv_usec = 0; + res = utimes((char*)args[0], &tv); + break; + case STAT: + res = lstat((char*)args[0], &sstat); + break; + case HARDLINK: + res = link((char*)args[0], (char*)args[1]); + break; + case REVOKE: + tmpfd = open((char*)args[0], O_RDONLY); + res = revoke((char*)args[0]); + close(tmpfd); + break; + default: + res = -1; + break; + } + + return (void*)res; + +} + +/* + * Read until the end of a file, for EVFILT_READ purposes (considers file position) + */ +void +read_to_end(int fd) +{ + char buf[50]; + while (read(fd, buf, sizeof(buf)) > 0); +} + +/* + * Helper for setup and cleanup; just execute every action in an array + * of actions. "failout" parameter indicates whether to stop if one fails. + */ +int +execute_action_list(action_t *actions, int nactions, int failout) +{ + int i, res; + for (i = 0, res = 0; (0 == res || (!failout)) && (i < nactions); i++) { + LOG(1, stderr, "Starting prep action %d\n", i); + res = (int) execute_action(&(actions[i])); + if(res != 0) { + LOG(2, stderr, "Action list failed on step %d.\n", i); + } else { + LOG(1, stderr, "Action list work succeeded on step %d.\n", i); + } + } + + return res; +} + +/* + * Execute a full test, return success value. + */ +int +execute_test(test_t *test) +{ + int i, kqfd, filefd = -1, res2, res, cnt, status, writefd = -1; + int retval = -1; + pthread_t thr; + struct kevent evlist; + struct timespec ts = {WAIT_TIME, 0l}; + + memset(&evlist, 0, sizeof(evlist)); + + LOG(1, stderr, "Test %s starting.\n", test->t_testname); + LOG(1, stderr, test->t_want_event ? "Expecting an event.\n" : "Not expecting events.\n"); + + res = execute_action_list(test->t_prep_actions, test->t_n_prep_actions, 1); + + /* If prep succeeded */ + if (0 == res) { + /* Create kqueue for kqueue tests*/ + if (!test->t_is_poll_test) { + kqfd = kqueue(); + } + + if ((test->t_is_poll_test) || kqfd >= 0) { + LOG(1, stderr, "Opened kqueue.\n"); + + /* Open the file we're to monitor. Fifos get special handling */ + if (test->t_file_is_fifo) { + filefd = -1; + open_fifo(test->t_watchfile, &filefd, &writefd); + } else { + filefd = open(test->t_watchfile, O_RDONLY | O_SYMLINK); + } + + if (filefd >= 0) { + LOG(1, stderr, "Opened file to monitor.\n"); + + /* + * Fill in the fd to monitor once you know it + * If it's a fifo test, then the helper is definitely going to want the write end. + */ + test->t_helpthreadact.act_fd = (writefd >= 0 ? writefd : filefd); + + if (test->t_read_to_end_first) { + read_to_end(filefd); + } else if (test->t_write_some_data) { + action_t dowr; + init_action(&dowr, NOSLEEP, WRITEFD, 0); + dowr.act_fd = writefd; + execute_action(&dowr); + } + + /* Helper modifies the file that we're listening on (sleeps first, in general) */ + res = pthread_create(&thr, NULL, execute_action, (void*) &test->t_helpthreadact); + if (0 == res) { + LOG(1, stderr, "Created helper thread.\n"); + + /* This is ugly business to hack on filling up a FIFO */ + if (test->t_extra_sleep_hack) { + sleep(5); + } + + if (test->t_is_poll_test) { + struct pollfd pl; + pl.fd = filefd; + pl.events = test->t_union.tu_pollevents; + cnt = poll(&pl, 1, WAIT_TIME); + LOG(1, stderr, "Finished poll() call.\n"); + + if ((cnt < 0)) { + LOG(2, stderr, "error is in errno, %s\n", strerror(errno)); + res = cnt; + } + } else { + test->t_union.tu_kev.ident = filefd; + cnt = kevent(kqfd, &test->t_union.tu_kev, 1, &evlist, 1, &ts); + LOG(1, stderr, "Finished kevent() call.\n"); + + if ((cnt < 0) || (evlist.flags & EV_ERROR)) { + LOG(2, stderr, "kevent() call failed.\n"); + if (cnt < 0) { + LOG(2, stderr, "error is in errno, %s\n", strerror(errno)); + } else { + LOG(2, stderr, "error is in data, %s\n", strerror(evlist.data)); + } + res = cnt; + } + } + + /* Success only if you've succeeded to this point AND joined AND other thread is happy*/ + status = 0; + res2 = pthread_join(thr, (void**)&status); + if (res2 < 0) { + LOG(2, stderr, "Couldn't join helper thread.\n"); + } else if (status) { + LOG(2, stderr, "Helper action had result %d\n", (int)status); + } + res = ((res == 0) && (res2 == 0) && (status == 0)) ? 0 : -1; + } else { + LOG(2, stderr, "Couldn't start thread.\n"); + } + + close(filefd); + if (test->t_file_is_fifo) { + close(writefd); + } + } else { + LOG(2, stderr, "Couldn't open test file %s to monitor.\n", test->t_watchfile); + res = -1; + } + close(kqfd); + } else { + LOG(2, stderr, "Couldn't open kqueue.\n"); + res = -1; + } + } + + /* Cleanup work */ + execute_action_list(test->t_cleanup_actions, test->t_n_cleanup_actions, 0); + + /* Success if nothing failed and we either received or did not receive event, + * as expected + */ + if (0 == res) { + LOG(1, stderr, cnt > 0 ? "Got an event.\n" : "Did not get an event.\n"); + if (((cnt > 0) && (test->t_want_event)) || ((cnt == 0) && (!test->t_want_event))) { + if ((!test->t_is_poll_test) && (test->t_union.tu_kev.filter == EVFILT_READ || test->t_union.tu_kev.filter == EVFILT_WRITE) + && (test->t_nbytes) && (test->t_nbytes != evlist.data)) { + LOG(2, stderr, "Read wrong number of bytes available. Wanted %d, got %d\n", test->t_nbytes, evlist.data); + retval = -1; + } else { + retval = 0; + } + + } else { + LOG(2, stderr, "Got unexpected event or lack thereof.\n"); + retval = -1; + } + } else { + LOG(2, stderr, "Failed to execute test.\n"); + retval = -1; + } + + LOG(3, stdout, "Test %s done with result %d.\n", test->t_testname, retval); +} + +void +init_test_common(test_t *tst, char *testname, char *watchfile, int nprep, int nclean, int event, int want, int ispoll) +{ + memset(tst, 0, sizeof(test_t)); + tst->t_testname = testname; + tst->t_watchfile = watchfile; + tst->t_n_prep_actions = nprep; + tst->t_n_cleanup_actions = nclean; + tst->t_want_event = (want > 0); + + if (ispoll) { + tst->t_is_poll_test = 1; + tst->t_union.tu_pollevents = (short)event; + } else { + /* Can do this because filter is negative, notes are positive */ + if (event == EVFILT_READ || event == EVFILT_WRITE) { + EV_SET(&tst->t_union.tu_kev, 0, event, EV_ADD | EV_ENABLE, 0, 0, NULL); + tst->t_nbytes = want; + } else { + EV_SET(&tst->t_union.tu_kev, 0, EVFILT_VNODE, EV_ADD | EV_ENABLE, event, 0, NULL); + } + } +} + +/* + * Initialize a test case, not including its actions. Meaning: a name for it, what filename to watch, + * counts of prep and cleanup actions, what event to watch for, and whether you want an event/how many bytes read. + * + * "want" does double duty as whether you want an event and how many bytes you might want to read + * "event" is either an event flag (e.g. NOTE_WRITE) or EVFILT_READ + */ +void +init_test(test_t *tst, char *testname, char *watchfile, int nprep, int nclean, int event, int want) +{ + init_test_common(tst, testname, watchfile, nprep, nclean, event, want, 0); +} + +/* + * Same as above, but for a poll() test + */ +void +init_poll_test(test_t *tst, char *testname, char *watchfile, int nprep, int nclean, int event, int want) +{ + init_test_common(tst, testname, watchfile, nprep, nclean, event, want, 1); +} + +void +run_note_delete_tests() +{ + test_t test; + + init_test(&test, "1.1.2: unlink a file", FILE1, 1, 0, NOTE_DELETE, YES_EVENT); + init_action(&(test.t_prep_actions[0]), NOSLEEP, CREAT, 2, (void*)FILE1, (void*)NULL); + init_action(&test.t_helpthreadact, SLEEP, UNLINK, 2, (void*)FILE1, NULL); + execute_test(&test); + + init_test(&test, "1.1.3: rmdir a dir", DIR1, 1, 0, NOTE_DELETE, YES_EVENT); + init_action(&(test.t_prep_actions[0]), NOSLEEP, MKDIR, 2, (void*)DIR1, (void*)NULL); + init_action(&test.t_helpthreadact, SLEEP, RMDIR, 2, (void*)DIR1, NULL); + execute_test(&test); + + init_test(&test, "1.1.4: rename one file over another", FILE2, 2, 1, NOTE_DELETE, YES_EVENT); + init_action(&(test.t_prep_actions[0]), NOSLEEP, CREAT, 2, (void*)FILE1, (void*)NULL); + init_action(&(test.t_prep_actions[1]), NOSLEEP, CREAT, 2, (void*)FILE2, (void*)NULL); + init_action(&test.t_helpthreadact, SLEEP, RENAME, 2, (void*)FILE1, (void*)FILE2); + init_action(&test.t_cleanup_actions[0], NOSLEEP, UNLINK, 2, (void*)FILE2, NULL); + execute_test(&test); + + init_test(&test, "1.1.5: rename one dir over another", DIR2, 2, 1, NOTE_DELETE, YES_EVENT); + init_action(&(test.t_prep_actions[0]), NOSLEEP, MKDIR, 2, (void*)DIR1, (void*)NULL); + init_action(&(test.t_prep_actions[1]), NOSLEEP, MKDIR, 2, (void*)DIR2, (void*)NULL); + init_action(&test.t_helpthreadact, SLEEP, RENAME, 2, (void*)DIR1, (void*)DIR2); + init_action(&test.t_cleanup_actions[0], NOSLEEP, RMDIR, 2, (void*)DIR2, NULL); + execute_test(&test); + + /* Do FIFO stuff here */ + init_test(&test, "1.1.6: make a fifo, unlink it", FILE1, 1, 0, NOTE_DELETE, YES_EVENT); + test.t_file_is_fifo = 1; + init_action(&(test.t_prep_actions[0]), NOSLEEP, MKFIFO, 2, (void*)FILE1, (void*)NULL); + init_action(&test.t_helpthreadact, SLEEP, UNLINK, 1, (void*)FILE1); + execute_test(&test); + + init_test(&test, "1.1.7: rename a file over a fifo", FILE1, 2, 1, NOTE_DELETE, YES_EVENT); + test.t_file_is_fifo = 1; + init_action(&(test.t_prep_actions[0]), NOSLEEP, MKFIFO, 2, (void*)FILE1, (void*)NULL); + init_action(&(test.t_prep_actions[1]), NOSLEEP, CREAT, 2, (void*)FILE2, (void*)NULL); + init_action(&test.t_helpthreadact, SLEEP, RENAME, 2, (void*)FILE2, (void*)FILE1); + init_action(&test.t_cleanup_actions[0], NOSLEEP, UNLINK, 2, (void*)FILE1, NULL); + execute_test(&test); + + init_test(&test, "1.1.8: unlink a symlink to a file", FILE2, 2, 1, NOTE_DELETE, YES_EVENT); + init_action(&(test.t_prep_actions[0]), NOSLEEP, CREAT, 2, (void*)FILE1, (void*)NULL); + init_action(&(test.t_prep_actions[1]), NOSLEEP, SYMLINK, 2, (void*)FILE1, (void*)FILE2); + init_action(&test.t_helpthreadact, SLEEP, UNLINK, 2, (void*)FILE2, NULL); + init_action(&test.t_cleanup_actions[0], NOSLEEP, UNLINK, 2, (void*)FILE1, NULL); + execute_test(&test); + + /* ================= */ + + init_test(&test, "1.2.1: Straight-up rename file", FILE1, 1, 1, NOTE_DELETE, NO_EVENT); + init_action(&(test.t_prep_actions[0]), NOSLEEP, CREAT, 2, (void*)FILE1, (void*)NULL); + init_action(&test.t_helpthreadact, SLEEP, RENAME, 2, (void*)FILE1, (void*)FILE2); + init_action(&test.t_cleanup_actions[0], NOSLEEP, UNLINK, 2, (void*)FILE2, (void*)NULL); + execute_test(&test); + + init_test(&test, "1.2.2: Straight-up rename dir", DIR1, 1, 1, NOTE_DELETE, NO_EVENT); + init_action(&(test.t_prep_actions[0]), NOSLEEP, MKDIR, 2, (void*)DIR1, (void*)NULL); + init_action(&test.t_helpthreadact, SLEEP, RENAME, 2, (void*)DIR1, (void*)DIR2); + init_action(&test.t_cleanup_actions[0], NOSLEEP, RMDIR, 2, (void*)DIR2, (void*)NULL); + execute_test(&test); + + init_test(&test, "1.2.3: Null action on file", FILE1, 1, 1, NOTE_DELETE, NO_EVENT); + init_action(&(test.t_prep_actions[0]), NOSLEEP, CREAT, 2, (void*)FILE1, (void*)NULL); + init_action(&test.t_helpthreadact, SLEEP, NOTHING, 2, NULL, NULL); /* The null action */ + init_action(&test.t_cleanup_actions[0], NOSLEEP, UNLINK, 2, (void*)FILE1, (void*)NULL); + execute_test(&test); + + init_test(&test, "1.2.4: Rename one file over another: watch the file that lives", FILE1, 2, 1, NOTE_DELETE, NO_EVENT); + init_action(&(test.t_prep_actions[0]), NOSLEEP, CREAT, 2, (void*)FILE1, (void*)NULL); + init_action(&(test.t_prep_actions[1]), NOSLEEP, CREAT, 2, (void*)FILE2, (void*)NULL); + init_action(&test.t_helpthreadact, SLEEP, RENAME, 2, (void*)FILE1, (void*)FILE2); + init_action(&test.t_cleanup_actions[0], NOSLEEP, UNLINK, 2, (void*)FILE2, NULL); + execute_test(&test); + + init_test(&test, "1.2.5: Rename one dir over another, watch the dir that lives", DIR1, 2, 1, NOTE_DELETE, NO_EVENT); + init_action(&(test.t_prep_actions[0]), NOSLEEP, MKDIR, 2, (void*)DIR1, (void*)NULL); + init_action(&(test.t_prep_actions[1]), NOSLEEP, MKDIR, 2, (void*)DIR2, (void*)NULL); + init_action(&test.t_helpthreadact, SLEEP, RENAME, 2, (void*)DIR1, (void*)DIR2); + init_action(&test.t_cleanup_actions[0], NOSLEEP, RMDIR, 2, (void*)DIR2, NULL); +} + +void +run_note_write_tests() +{ + char pathbuf[50]; + char otherpathbuf[50]; + + test_t test; + + init_test(&test, "2.1.1: Straight-up write to a file", FILE1, 1, 1, NOTE_WRITE, YES_EVENT); + init_action(&(test.t_prep_actions[0]), NOSLEEP, CREAT, 2, (void*)FILE1, (void*)NULL); + init_action(&test.t_helpthreadact, SLEEP, WRITE, 2, (void*)FILE1, NULL); + init_action(&test.t_cleanup_actions[0], NOSLEEP, UNLINK, 2, (void*)FILE1, (void*)NULL); + execute_test(&test); + + + makepath(pathbuf, DIR1, FILE1); + init_test(&test, "2.1.2: creat() file inside a dir", DIR1, 1, 2, NOTE_WRITE, YES_EVENT); + init_action(&(test.t_prep_actions[0]), NOSLEEP, MKDIR, 2, (void*)DIR1, (void*)NULL); + init_action(&test.t_helpthreadact, SLEEP, CREAT, 2, (void*)pathbuf, NULL); + init_action(&test.t_cleanup_actions[0], NOSLEEP, UNLINK, 2, (void*)pathbuf, (void*)NULL); + init_action(&test.t_cleanup_actions[1], NOSLEEP, RMDIR, 2, (void*)DIR1, (void*)NULL); + execute_test(&test); + + makepath(pathbuf, DIR1, FILE1); + init_test(&test, "2.1.3: open() file inside a dir", DIR1, 1, 2, NOTE_WRITE, YES_EVENT); + init_action(&(test.t_prep_actions[0]), NOSLEEP, MKDIR, 2, (void*)DIR1, (void*)NULL); + init_action(&test.t_helpthreadact, SLEEP, OPEN, 2, (void*)pathbuf, NULL); + init_action(&test.t_cleanup_actions[0], NOSLEEP, UNLINK, 2, (void*)pathbuf, (void*)NULL); + init_action(&test.t_cleanup_actions[1], NOSLEEP, RMDIR, 2, (void*)DIR1, (void*)NULL); + execute_test(&test); + + makepath(pathbuf, DIR1, FILE1); + init_test(&test, "2.1.3: unlink a file from a dir", DIR1, 2, 1, NOTE_WRITE, YES_EVENT); + init_action(&(test.t_prep_actions[0]), NOSLEEP, MKDIR, 2, (void*)DIR1, (void*)NULL); + init_action(&(test.t_prep_actions[1]), NOSLEEP, CREAT, 2, (void*)pathbuf, (void*)NULL); + init_action(&test.t_helpthreadact, SLEEP, UNLINK, 2, (void*)pathbuf, NULL); + init_action(&test.t_cleanup_actions[0], NOSLEEP, RMDIR, 2, (void*)DIR1, (void*)NULL); + execute_test(&test); + + makepath(pathbuf, DIR1, FILE1); + makepath(otherpathbuf, DIR1, FILE2); + init_test(&test, "2.1.5: rename a file in a dir", DIR1, 2, 2, NOTE_WRITE, YES_EVENT); + init_action(&(test.t_prep_actions[0]), NOSLEEP, MKDIR, 2, (void*)DIR1, (void*)NULL); + init_action(&(test.t_prep_actions[1]), NOSLEEP, CREAT, 2, (void*)pathbuf, (void*)NULL); + init_action(&test.t_helpthreadact, SLEEP, RENAME, 2, (void*)pathbuf, (void*)otherpathbuf); + init_action(&test.t_cleanup_actions[0], NOSLEEP, UNLINK, 2, (void*)otherpathbuf, (void*)NULL); + init_action(&test.t_cleanup_actions[1], NOSLEEP, RMDIR, 2, (void*)DIR1, (void*)NULL); + execute_test(&test); + + makepath(pathbuf, DIR1, FILE1); + init_test(&test, "2.1.6: rename a file to outside of a dir", DIR1, 2, 2, NOTE_WRITE, YES_EVENT); + init_action(&(test.t_prep_actions[0]), NOSLEEP, MKDIR, 2, (void*)DIR1, (void*)NULL); + init_action(&(test.t_prep_actions[1]), NOSLEEP, CREAT, 2, (void*)pathbuf, (void*)NULL); + init_action(&test.t_helpthreadact, SLEEP, RENAME, 2, (void*)pathbuf, (void*)FILE1); + init_action(&test.t_cleanup_actions[0], NOSLEEP, UNLINK, 2, (void*)FILE1, (void*)NULL); + init_action(&test.t_cleanup_actions[1], NOSLEEP, RMDIR, 2, (void*)DIR1, (void*)NULL); + execute_test(&test); + + makepath(pathbuf, DIR1, FILE1); + init_test(&test, "2.1.7: rename a file into a dir", DIR1, 2, 2, NOTE_WRITE, YES_EVENT); + init_action(&(test.t_prep_actions[0]), NOSLEEP, MKDIR, 2, (void*)DIR1, (void*)NULL); + init_action(&(test.t_prep_actions[1]), NOSLEEP, CREAT, 2, (void*)FILE1, (void*)NULL); + init_action(&test.t_helpthreadact, SLEEP, RENAME, 2, (void*)FILE1, (void*)pathbuf); + init_action(&test.t_cleanup_actions[0], NOSLEEP, UNLINK, 2, (void*)pathbuf, (void*)NULL); + init_action(&test.t_cleanup_actions[1], NOSLEEP, RMDIR, 2, (void*)DIR1, (void*)NULL); + execute_test(&test); + + makepath(pathbuf, DIR1, FILE1); + init_test(&test, "2.1.9: unlink a fifo from a dir", DIR1, 2, 1, NOTE_WRITE, YES_EVENT); + init_action(&(test.t_prep_actions[0]), NOSLEEP, MKDIR, 2, (void*)DIR1, (void*)NULL); + init_action(&(test.t_prep_actions[1]), NOSLEEP, MKFIFO, 2, (void*)pathbuf, (void*)NULL); + init_action(&test.t_helpthreadact, SLEEP, UNLINK, 2, (void*)pathbuf, NULL); + init_action(&test.t_cleanup_actions[0], NOSLEEP, RMDIR, 2, (void*)DIR1, (void*)NULL); + execute_test(&test); + + makepath(pathbuf, DIR1, FILE1); + init_test(&test, "2.1.10: make symlink in a dir", DIR1, 1, 2, NOTE_WRITE, YES_EVENT); + init_action(&(test.t_prep_actions[0]), NOSLEEP, MKDIR, 2, (void*)DIR1, (void*)NULL); + init_action(&test.t_helpthreadact, SLEEP, SYMLINK, 2, (void*)DOTDOT, (void*)pathbuf); + init_action(&test.t_cleanup_actions[0], NOSLEEP, UNLINK, 2, (void*)pathbuf, (void*)NULL); + init_action(&test.t_cleanup_actions[1], NOSLEEP, RMDIR, 2, (void*)DIR1, (void*)NULL); + execute_test(&test); + + init_test(&test, "2.1.12: write to a FIFO", FILE1, 1, 1, NOTE_WRITE, YES_EVENT); + init_action(&(test.t_prep_actions[0]), NOSLEEP, MKFIFO, 2, (void*)FILE1, (void*)NULL); + test.t_file_is_fifo = 1; + init_action(&test.t_helpthreadact, SLEEP, WRITEFD, 0); + init_action(&test.t_cleanup_actions[0], NOSLEEP, UNLINK, 2, (void*)FILE1, (void*)NULL); + execute_test(&test); + + + makepath(pathbuf, DIR1, FILE1); + init_test(&test, "2.1.13: delete a symlink in a dir", DIR1, 2, 1, NOTE_WRITE, YES_EVENT); + init_action(&(test.t_prep_actions[0]), NOSLEEP, MKDIR, 2, (void*)DIR1, (void*)NULL); + init_action(&(test.t_prep_actions[1]), NOSLEEP, SYMLINK, 2, (void*)DOTDOT, (void*)pathbuf); + init_action(&test.t_helpthreadact, SLEEP, UNLINK, 2, (void*)pathbuf, (void*)FILE1); + init_action(&test.t_cleanup_actions[0], NOSLEEP, RMDIR, 2, (void*)DIR1, (void*)NULL); + execute_test(&test); + + /* This actually should not generate an event, though it's in this section */ + makepath(pathbuf, DIR1, FILE1); + makepath(otherpathbuf, DIR1, FILE2); + init_test(&test, "2.1.14: exchangedata two files in a dir", DIR1, 3, 3, NOTE_WRITE, NO_EVENT); + init_action(&(test.t_prep_actions[0]), NOSLEEP, MKDIR, 2, (void*)DIR1, (void*)NULL); + init_action(&(test.t_prep_actions[1]), NOSLEEP, CREAT, 2, (void*)pathbuf, (void*)NULL); + init_action(&(test.t_prep_actions[2]), NOSLEEP, CREAT, 2, (void*)otherpathbuf, (void*)NULL); + init_action(&test.t_helpthreadact, SLEEP, EXCHANGEDATA, 2, (void*)pathbuf, (void*)otherpathbuf); + init_action(&test.t_cleanup_actions[0], NOSLEEP, UNLINK, 2, (void*)pathbuf, (void*)NULL); + init_action(&test.t_cleanup_actions[1], NOSLEEP, UNLINK, 2, (void*)otherpathbuf, (void*)NULL); + init_action(&test.t_cleanup_actions[2], NOSLEEP, RMDIR, 2, (void*)DIR1, (void*)NULL); + execute_test(&test); + + LOG(1, stderr, "MMAP test should fail on HFS.\n"); + init_test(&test, "2.1.15: Change a file with mmap()", FILE1, 1, 1, NOTE_WRITE, YES_EVENT); + init_action(&(test.t_prep_actions[0]), NOSLEEP, CREAT, 2, (void*)FILE1, (void*)NULL); + init_action(&test.t_helpthreadact, SLEEP, MMAP, 2, (void*)FILE1, (void*)1); /* 1 -> "modify it"*/ + init_action(&test.t_cleanup_actions[0], NOSLEEP, UNLINK, 2, (void*)FILE1, (void*)NULL); + execute_test(&test); + + /*================= no-event tests ==================*/ + init_test(&test, "2.2.1: just open and close existing file", FILE1, 1, 1, NOTE_WRITE, NO_EVENT); + init_action(&(test.t_prep_actions[0]), NOSLEEP, CREAT, 2, (void*)FILE1, (void*)NULL); + init_action(&test.t_helpthreadact, SLEEP, OPEN, 2, (void*)FILE1, NULL); + init_action(&test.t_cleanup_actions[0], NOSLEEP, UNLINK, 2, (void*)FILE1, (void*)NULL); + execute_test(&test); + + init_test(&test, "2.2.2: read from existing file", FILE1, 1, 1, NOTE_WRITE, NO_EVENT); + init_action(&(test.t_prep_actions[0]), NOSLEEP, CREAT, 2, (void*)FILE1, (void*)NULL); + init_action(&test.t_helpthreadact, SLEEP, READ, 2, (void*)FILE1, NULL); + init_action(&test.t_cleanup_actions[0], NOSLEEP, UNLINK, 2, (void*)FILE1, (void*)NULL); + execute_test(&test); + + init_test(&test, "2.2.3: rename existing file", FILE1, 1, 1, NOTE_WRITE, NO_EVENT); + init_action(&(test.t_prep_actions[0]), NOSLEEP, CREAT, 2, (void*)FILE1, (void*)NULL); + init_action(&test.t_helpthreadact, SLEEP, RENAME, 2, (void*)FILE1, (void*)FILE2); + init_action(&test.t_cleanup_actions[0], NOSLEEP, UNLINK, 2, (void*)FILE2, (void*)NULL); + execute_test(&test); + + init_test(&test, "2.2.4: just open and close dir", DIR1, 1, 1, NOTE_WRITE, NO_EVENT); + init_action(&(test.t_prep_actions[0]), NOSLEEP, MKDIR, 2, (void*)DIR1, (void*)NULL); + init_action(&test.t_helpthreadact, SLEEP, OPEN, 2, (void*)DIR1, (void*)NULL); + init_action(&test.t_cleanup_actions[0], NOSLEEP, RMDIR, 2, (void*)DIR1, (void*)NULL); + execute_test(&test); + + /* There are no tests 2.2.5 or 2.2.6 */ + + init_test(&test, "2.2.7: rename a dir", DIR1, 1, 1, NOTE_WRITE, NO_EVENT); + init_action(&(test.t_prep_actions[0]), NOSLEEP, MKDIR, 2, (void*)DIR1, (void*)NULL); + init_action(&test.t_helpthreadact, SLEEP, RENAME, 2, (void*)DIR1, (void*)DIR2); + init_action(&test.t_cleanup_actions[0], NOSLEEP, RMDIR, 2, (void*)DIR2, (void*)NULL); + execute_test(&test); + + init_test(&test, "2.2.8: rename a fifo", FILE1, 1, 1, NOTE_WRITE, NO_EVENT); + test.t_file_is_fifo = 1; + init_action(&(test.t_prep_actions[0]), NOSLEEP, MKFIFO, 2, (void*)FILE1, (void*)NULL); + init_action(&test.t_helpthreadact, SLEEP, RENAME, 2, (void*)FILE1, (void*)FILE2); + init_action(&test.t_cleanup_actions[0], NOSLEEP, UNLINK, 2, (void*)FILE2, (void*)NULL); + execute_test(&test); + + init_test(&test, "2.2.9: unlink a fifo", FILE1, 1, 0, NOTE_WRITE, NO_EVENT); + test.t_file_is_fifo = 1; + init_action(&(test.t_prep_actions[0]), NOSLEEP, MKFIFO, 2, (void*)FILE1, (void*)NULL); + init_action(&test.t_helpthreadact, SLEEP, UNLINK,1, (void*)FILE1); + execute_test(&test); + + init_test(&test, "2.2.10: chmod a file", FILE1, 1, 1, NOTE_WRITE, NO_EVENT); + init_action(&(test.t_prep_actions[0]), NOSLEEP, CREAT, 2, (void*)FILE1, (void*)NULL); + init_action(&test.t_helpthreadact, SLEEP, CHMOD, 2, (void*)FILE1, (void*)0700); + init_action(&test.t_cleanup_actions[0], NOSLEEP, UNLINK, 2, (void*)FILE1, (void*)NULL); + execute_test(&test); + + struct passwd *pwd = getpwnam("local"); + int uid = pwd->pw_uid; + int gid = pwd->pw_gid; + + init_test(&test, "2.2.11: chown a file", FILE1, 2, 1, NOTE_WRITE, NO_EVENT); + init_action(&(test.t_prep_actions[0]), NOSLEEP, CREAT, 2, (void*)FILE1, (void*)NULL); + init_action(&test.t_prep_actions[1], NOSLEEP, CHOWN, 3, (void*)FILE1, (void*)uid, (void*)gid); + init_action(&test.t_helpthreadact, SLEEP, CHOWN, 3, (void*)FILE1, (void*)getuid(), (void*)getgid()); + init_action(&test.t_cleanup_actions[0], NOSLEEP, UNLINK, 2, (void*)FILE1, (void*)NULL); + execute_test(&test); + + + init_test(&test, "2.2.12: chmod a dir", DIR1, 1, 1, NOTE_WRITE, NO_EVENT); + init_action(&(test.t_prep_actions[0]), NOSLEEP, MKDIR, 2, (void*)DIR1, (void*)NULL); + init_action(&test.t_helpthreadact, SLEEP, CHMOD, 2, (void*)DIR1, (void*)0700); + init_action(&test.t_cleanup_actions[0], NOSLEEP, RMDIR, 2, (void*)DIR1, (void*)NULL); + execute_test(&test); + + init_test(&test, "2.2.13: chown a dir", DIR1, 2, 1, NOTE_WRITE, NO_EVENT); + init_action(&(test.t_prep_actions[0]), NOSLEEP, MKDIR, 2, (void*)DIR1, (void*)NULL); + init_action(&test.t_prep_actions[1], NOSLEEP, CHOWN, 3, (void*)DIR1, (void*)uid, (void*)gid); + init_action(&test.t_helpthreadact, SLEEP, CHOWN, 3, (void*)DIR1, (void*)getuid(), (void*)getgid()); + init_action(&test.t_cleanup_actions[0], NOSLEEP, RMDIR, 2, (void*)DIR1, (void*)NULL); + execute_test(&test); + + + + LOG(1, stderr, "MMAP will never give a notification on HFS.\n"); + init_test(&test, "2.1.14: mmap() a file but do not change it", FILE1, 1, 1, NOTE_WRITE, NO_EVENT); + init_action(&(test.t_prep_actions[0]), NOSLEEP, CREAT, 2, (void*)FILE1, (void*)NULL); + init_action(&test.t_helpthreadact, SLEEP, MMAP, 2, (void*)FILE1, (void*)0); + init_action(&test.t_cleanup_actions[0], NOSLEEP, UNLINK, 2, (void*)FILE1, (void*)NULL); + execute_test(&test); +} + +void +run_note_extend_tests() +{ + test_t test; + char pathbuf[50]; + + LOG(1, stderr, "THESE TESTS WILL FAIL ON HFS!\n"); + + init_test(&test, "3.1.1: write beyond the end of a file", FILE1, 1, 1, NOTE_EXTEND, YES_EVENT); + init_action(&(test.t_prep_actions[0]), NOSLEEP, CREAT, 2, (void*)FILE1, (void*)NULL); + init_action(&test.t_helpthreadact, SLEEP, WRITE, 2, (void*)FILE1, (void*)NULL); + init_action(&test.t_cleanup_actions[0], NOSLEEP, UNLINK, 2, (void*)FILE1, (void*)NULL); + execute_test(&test); + + /* + * We won't concern ourselves with lengthening directories: commenting these out + * + + makepath(pathbuf, DIR1, FILE1); + init_test(&test, "3.1.2: add a file to a directory with creat()", DIR1, 1, 2, NOTE_EXTEND, YES_EVENT); + init_action(&(test.t_prep_actions[0]), NOSLEEP, MKDIR, 2, (void*)DIR1, (void*)NULL); + init_action(&test.t_helpthreadact, SLEEP, CREAT, 2, (void*)pathbuf, (void*)NULL); + init_action(&test.t_cleanup_actions[0], NOSLEEP, UNLINK, 2, (void*)pathbuf, (void*)NULL); + init_action(&test.t_cleanup_actions[1], NOSLEEP, RMDIR, 2, (void*)DIR1, (void*)NULL); + execute_test(&test); + + makepath(pathbuf, DIR1, FILE1); + init_test(&test, "3.1.3: add a file to a directory with open()", DIR1, 1, 2, NOTE_EXTEND, YES_EVENT); + init_action(&(test.t_prep_actions[0]), NOSLEEP, MKDIR, 2, (void*)DIR1, (void*)NULL); + init_action(&test.t_helpthreadact, SLEEP, CREAT, 2, (void*)pathbuf, (void*)NULL); + init_action(&test.t_cleanup_actions[0], NOSLEEP, UNLINK, 2, (void*)pathbuf, (void*)NULL); + init_action(&test.t_cleanup_actions[1], NOSLEEP, RMDIR, 2, (void*)DIR1, (void*)NULL); + execute_test(&test); + + makepath(pathbuf, DIR1, FILE1); + init_test(&test, "3.1.4: add a file to a directory with rename()", DIR1, 2, 2, NOTE_EXTEND, YES_EVENT); + init_action(&(test.t_prep_actions[0]), NOSLEEP, CREAT, 2, (void*)FILE1, (void*)NULL); + init_action(&(test.t_prep_actions[1]), NOSLEEP, MKDIR, 2, (void*)DIR1, (void*)NULL); + init_action(&test.t_helpthreadact, SLEEP, RENAME, 2, (void*)FILE1, (void*)pathbuf); + init_action(&test.t_cleanup_actions[0], NOSLEEP, UNLINK, 2, (void*)pathbuf, (void*)NULL); + init_action(&test.t_cleanup_actions[1], NOSLEEP, RMDIR, 2, (void*)DIR1, (void*)NULL); + execute_test(&test); + */ + + /* 3.1.5: a placeholder for a potential kernel test */ + /* + makepath(pathbuf, DIR1, DIR2); + init_test(&test, "3.1.6: add a file to a directory with mkdir()", DIR1, 1, 2, NOTE_EXTEND, YES_EVENT); + init_action(&(test.t_prep_actions[0]), NOSLEEP, MKDIR, 2, (void*)DIR1, (void*)NULL); + init_action(&test.t_helpthreadact, SLEEP, MKDIR, 2, (void*)pathbuf, (void*)NULL); + init_action(&test.t_cleanup_actions[0], NOSLEEP, RMDIR, 2, (void*)pathbuf, (void*)NULL); + init_action(&test.t_cleanup_actions[1], NOSLEEP, RMDIR, 2, (void*)DIR1, (void*)NULL); + execute_test(&test); + */ + init_test(&test, "3.1.7: lengthen a file with truncate()", FILE1, 1, 1, NOTE_EXTEND, YES_EVENT); + init_action(&(test.t_prep_actions[0]), NOSLEEP, CREAT, 2, (void*)FILE1, (void*)NULL); + init_action(&test.t_helpthreadact, SLEEP, LENGTHEN, 2, FILE1, (void*)NULL); + init_action(&test.t_cleanup_actions[0], NOSLEEP, UNLINK, 2, (void*)FILE1, (void*)NULL); + execute_test(&test); + + + /** ========== NO EVENT SECTION ============== **/ + init_test(&test, "3.2.1: setxattr() a file", FILE1, 1, 1, NOTE_EXTEND, NO_EVENT); + init_action(&(test.t_prep_actions[0]), NOSLEEP, CREAT, 2, (void*)FILE1, (void*)NULL); + init_action(&test.t_helpthreadact, SLEEP, SETXATTR, 2, FILE1, (void*)NULL); + init_action(&test.t_cleanup_actions[0], NOSLEEP, UNLINK, 2, (void*)FILE1, (void*)NULL); + execute_test(&test); + + init_test(&test, "3.2.2: chmod a file", FILE1, 1, 1, NOTE_EXTEND, NO_EVENT); + init_action(&(test.t_prep_actions[0]), NOSLEEP, CREAT, 2, (void*)FILE1, (void*)NULL); + init_action(&test.t_helpthreadact, SLEEP, CHMOD, 2, (void*)FILE1, (void*)0700); + init_action(&test.t_cleanup_actions[0], NOSLEEP, UNLINK, 2, (void*)FILE1, (void*)NULL); + execute_test(&test); + + struct passwd *pwd = getpwnam("local"); + if (!pwd) { + LOG(2, stderr, "Couldn't getpwnam for local.\n"); + exit(1); + } + int uid = pwd->pw_uid; + int gid = pwd->pw_gid; + + init_test(&test, "3.2.3: chown a file", FILE1, 2, 1, NOTE_EXTEND, NO_EVENT); + init_action(&(test.t_prep_actions[0]), NOSLEEP, CREAT, 2, (void*)FILE1, (void*)NULL); + init_action(&test.t_prep_actions[1], NOSLEEP, CHOWN, 3, (void*)FILE1, (void*)uid, (void*)gid); + init_action(&test.t_helpthreadact, SLEEP, CHOWN, 3, (void*)FILE1, (void*)getuid(), (void*)getgid()); + init_action(&test.t_cleanup_actions[0], NOSLEEP, UNLINK, 2, (void*)FILE1, (void*)NULL); + execute_test(&test); + + + init_test(&test, "3.2.4: chmod a dir", DIR1, 1, 1, NOTE_EXTEND, NO_EVENT); + init_action(&(test.t_prep_actions[0]), NOSLEEP, MKDIR, 2, (void*)DIR1, (void*)NULL); + init_action(&test.t_helpthreadact, SLEEP, CHMOD, 2, (void*)DIR1, (void*)0700); + init_action(&test.t_cleanup_actions[0], NOSLEEP, RMDIR, 2, (void*)DIR1, (void*)NULL); + execute_test(&test); + + init_test(&test, "3.2.5: chown a dir", DIR1, 2, 1, NOTE_EXTEND, NO_EVENT); + init_action(&(test.t_prep_actions[0]), NOSLEEP, MKDIR, 2, (void*)DIR1, (void*)NULL); + init_action(&test.t_prep_actions[1], NOSLEEP, CHOWN, 3, (void*)DIR1, (void*)uid, (void*)gid); + init_action(&test.t_helpthreadact, SLEEP, CHOWN, 3, (void*)DIR1, (void*)getuid(), (void*)getgid()); + init_action(&test.t_cleanup_actions[0], NOSLEEP, RMDIR, 2, (void*)DIR1, (void*)NULL); + execute_test(&test); + + init_test(&test, "3.2.6: TRUNC a file with truncate()", FILE1, 1, 1, NOTE_EXTEND, NO_EVENT); + init_action(&(test.t_prep_actions[0]), NOSLEEP, CREAT, 2, (void*)FILE1, (void*)NULL); + init_action(&test.t_helpthreadact, SLEEP, TRUNC, 2, FILE1, (void*)NULL); + init_action(&test.t_cleanup_actions[0], NOSLEEP, UNLINK, 2, (void*)FILE1, (void*)NULL); + execute_test(&test); +} + +void +run_note_attrib_tests() +{ + test_t test; + char pathbuf[50]; + + init_test(&test, "4.1.1: chmod a file", FILE1, 1, 1, NOTE_ATTRIB, YES_EVENT); + init_action(&(test.t_prep_actions[0]), NOSLEEP, CREAT, 2, (void*)FILE1, (void*)NULL); + init_action(&test.t_helpthreadact, SLEEP, CHMOD, 2, FILE1, (void*)0700); + init_action(&test.t_cleanup_actions[0], NOSLEEP, UNLINK, 2, (void*)FILE1, (void*)NULL); + execute_test(&test); + + struct passwd *pwd = getpwnam("local"); + int uid = pwd->pw_uid; + int gid = pwd->pw_gid; + + init_test(&test, "4.1.2: chown a file", FILE1, 2, 1, NOTE_ATTRIB, YES_EVENT); + init_action(&(test.t_prep_actions[0]), NOSLEEP, CREAT, 2, (void*)FILE1, (void*)NULL); + init_action(&(test.t_prep_actions[1]), NOSLEEP, CHOWN, 3, (void*)FILE1, (void*)uid, (void*)gid); + init_action(&test.t_helpthreadact, SLEEP, CHOWN, 3, FILE1, (void*)getuid(), (void*)gid); + init_action(&test.t_cleanup_actions[0], NOSLEEP, UNLINK, 2, (void*)FILE1, (void*)NULL); + execute_test(&test); + + init_test(&test, "4.1.3: chmod a dir", DIR1, 1, 1, NOTE_ATTRIB, YES_EVENT); + init_action(&(test.t_prep_actions[0]), NOSLEEP, MKDIR, 2, (void*)DIR1, (void*)NULL); + init_action(&(test.t_helpthreadact), SLEEP, CHMOD, 2, (void*)DIR1, (void*)0700); + init_action(&test.t_cleanup_actions[0], NOSLEEP, RMDIR, 2, (void*)DIR1, (void*)NULL); + execute_test(&test); + + init_test(&test, "4.1.4: chown a dir", DIR1, 2, 1, NOTE_ATTRIB, YES_EVENT); + init_action(&(test.t_prep_actions[0]), NOSLEEP, MKDIR, 2, (void*)DIR1, (void*)NULL); + init_action(&(test.t_prep_actions[1]), NOSLEEP, CHOWN, 3, (void*)DIR1, (void*) uid, (void*)gid); + init_action(&test.t_helpthreadact, SLEEP, CHOWN, 3, DIR1, (void*)getuid(), (void*)getgid()); + init_action(&test.t_cleanup_actions[0], NOSLEEP, RMDIR, 2, (void*)DIR1, (void*)NULL); + execute_test(&test); + + init_test(&test, "4.1.5: setxattr on a file", FILE1, 1, 1, NOTE_ATTRIB, YES_EVENT); + init_action(&(test.t_prep_actions[0]), NOSLEEP, CREAT, 2, (void*)FILE1, (void*)NULL); + init_action(&test.t_helpthreadact, SLEEP, SETXATTR, 2, (void*)FILE1, (void*)NULL); + init_action(&test.t_cleanup_actions[0], NOSLEEP, UNLINK, 2, (void*)FILE1, (void*)NULL); + execute_test(&test); + + init_test(&test, "4.1.6: setxattr on a dir", DIR1, 1, 1, NOTE_ATTRIB, YES_EVENT); + init_action(&(test.t_prep_actions[0]), NOSLEEP, MKDIR, 2, (void*)DIR1, (void*)NULL); + init_action(&test.t_helpthreadact, SLEEP, SETXATTR, 2, (void*)DIR1, (void*)NULL); + init_action(&test.t_cleanup_actions[0], NOSLEEP, RMDIR, 2, (void*)DIR1, (void*)NULL); + execute_test(&test); + + + init_test(&test, "4.1.7: exchangedata", FILE1, 2, 2, NOTE_ATTRIB, YES_EVENT); + init_action(&(test.t_prep_actions[0]), NOSLEEP, CREAT, 2, (void*)FILE1, (void*)NULL); + init_action(&(test.t_prep_actions[1]), NOSLEEP, CREAT, 2, (void*)FILE2, (void*)NULL); + init_action(&test.t_helpthreadact, SLEEP, EXCHANGEDATA, 2, (void*)FILE1, (void*)FILE2); + init_action(&test.t_cleanup_actions[0], NOSLEEP, UNLINK, 2, (void*)FILE1, (void*)NULL); + init_action(&test.t_cleanup_actions[1], NOSLEEP, UNLINK, 2, (void*)FILE2, (void*)NULL); + execute_test(&test); + + + init_test(&test, "4.1.8: utimes on a file", FILE1, 1, 1, NOTE_ATTRIB, YES_EVENT); + init_action(&(test.t_prep_actions[0]), NOSLEEP, CREAT, 2, (void*)FILE1, (void*)NULL); + init_action(&test.t_helpthreadact, SLEEP, UTIMES, 2, (void*)FILE1, (void*)NULL); + init_action(&test.t_cleanup_actions[0], NOSLEEP, UNLINK, 2, (void*)FILE1, (void*)NULL); + execute_test(&test); + + init_test(&test, "4.1.9: utimes on a dir", DIR1, 1, 1, NOTE_ATTRIB, YES_EVENT); + init_action(&(test.t_prep_actions[0]), NOSLEEP, MKDIR, 2, (void*)DIR1, (void*)NULL); + init_action(&test.t_helpthreadact, SLEEP, UTIMES, 2, (void*)DIR1, (void*)NULL); + init_action(&test.t_cleanup_actions[0], NOSLEEP, RMDIR, 2, (void*)DIR1, (void*)NULL); + execute_test(&test); + + + /* ====== NO EVENT TESTS ========== */ + + init_test(&test, "4.2.1: rename a file", FILE1, 1, 1, NOTE_ATTRIB, NO_EVENT); + init_action(&(test.t_prep_actions[0]), NOSLEEP, CREAT, 2, (void*)FILE1, (void*)NULL); + init_action(&test.t_helpthreadact, SLEEP, RENAME, 2, (void*)FILE1, (void*)FILE2); + init_action(&test.t_cleanup_actions[0], NOSLEEP, UNLINK, 2, (void*)FILE2, NULL); + execute_test(&test); + + init_test(&test, "4.2.2: open (do not change) a file", FILE1, 1, 1, NOTE_ATTRIB, NO_EVENT); + init_action(&(test.t_prep_actions[0]), NOSLEEP, CREAT, 2, (void*)FILE1, (void*)NULL); + init_action(&test.t_helpthreadact, SLEEP, OPEN, 2, (void*)FILE1, NULL); + init_action(&test.t_cleanup_actions[0], NOSLEEP, UNLINK, 2, (void*)FILE1, NULL); + execute_test(&test); + + init_test(&test, "4.2.3: stat a file", FILE1, 1, 1, NOTE_ATTRIB, NO_EVENT); + init_action(&(test.t_prep_actions[0]), NOSLEEP, CREAT, 2, (void*)FILE1, (void*)NULL); + init_action(&test.t_helpthreadact, SLEEP, STAT, 2, (void*)FILE1, NULL); + init_action(&test.t_cleanup_actions[0], NOSLEEP, UNLINK, 2, (void*)FILE1, NULL); + execute_test(&test); + + init_test(&test, "4.2.4: unlink a file", FILE1, 1, 0, NOTE_ATTRIB, NO_EVENT); + init_action(&(test.t_prep_actions[0]), NOSLEEP, CREAT, 2, (void*)FILE1, (void*)NULL); + init_action(&test.t_helpthreadact, SLEEP, UNLINK, 2, (void*)FILE1, NULL); + execute_test(&test); + + init_test(&test, "4.2.5: write to a file", FILE1, 1, 1, NOTE_ATTRIB, NO_EVENT); + init_action(&(test.t_prep_actions[0]), NOSLEEP, CREAT, 2, (void*)FILE1, (void*)NULL); + init_action(&test.t_helpthreadact, SLEEP, WRITE, 2, (void*)FILE1, (void*)NULL); + init_action(&test.t_cleanup_actions[0], NOSLEEP, UNLINK, 2, (void*)FILE1, (void*)NULL); + execute_test(&test); + + LOG(1, stderr, "EXPECT SPURIOUS NOTE_ATTRIB EVENTS FROM DIRECTORY OPERATIONS on HFS.\n"); + init_test(&test, "4.2.6: add a file to a directory with creat()", DIR1, 1, 2, NOTE_ATTRIB, NO_EVENT); + makepath(pathbuf, DIR1, FILE1); + init_action(&(test.t_prep_actions[0]), NOSLEEP, MKDIR, 2, (void*)DIR1, (void*)NULL); + init_action(&test.t_helpthreadact, SLEEP, CREAT, 2, (void*)pathbuf, (void*)NULL); + init_action(&test.t_cleanup_actions[0], NOSLEEP, UNLINK, 2, (void*)pathbuf, (void*)NULL); + init_action(&test.t_cleanup_actions[1], NOSLEEP, RMDIR, 2, (void*)DIR1, (void*)NULL); + execute_test(&test); + + init_test(&test, "4.2.7: mkdir in a dir", DIR1, 1, 2, NOTE_ATTRIB, NO_EVENT); + makepath(pathbuf, DIR1, DIR2); + init_action(&(test.t_prep_actions[0]), NOSLEEP, MKDIR, 2, (void*)DIR1, (void*)NULL); + init_action(&test.t_helpthreadact, SLEEP, MKDIR, 2, (void*)pathbuf, (void*)NULL); + init_action(&test.t_cleanup_actions[0], NOSLEEP, RMDIR, 2, (void*)pathbuf, (void*)NULL); + init_action(&test.t_cleanup_actions[1], NOSLEEP, RMDIR, 2, (void*)DIR1, (void*)NULL); + execute_test(&test); + + init_test(&test, "4.2.8: add a symlink to a directory", DIR1, 1, 2, NOTE_ATTRIB, NO_EVENT); + makepath(pathbuf, DIR1, FILE1); + init_action(&(test.t_prep_actions[0]), NOSLEEP, MKDIR, 2, (void*)DIR1, (void*)NULL); + init_action(&test.t_helpthreadact, SLEEP, SYMLINK, 2, (void*)DOTDOT, (void*)pathbuf); + init_action(&test.t_cleanup_actions[0], NOSLEEP, UNLINK, 2, (void*)pathbuf, (void*)NULL); + init_action(&test.t_cleanup_actions[1], NOSLEEP, RMDIR, 2, (void*)DIR1, (void*)NULL); + execute_test(&test); + + init_test(&test, "4.2.9: rename into a dir()", DIR1, 2, 2, NOTE_ATTRIB, NO_EVENT); + makepath(pathbuf, DIR1, FILE1); + init_action(&(test.t_prep_actions[0]), NOSLEEP, MKDIR, 2, (void*)DIR1, (void*)NULL); + init_action(&(test.t_prep_actions[1]), NOSLEEP, CREAT, 2, (void*)FILE1, (void*)NULL); + init_action(&test.t_helpthreadact, SLEEP, RENAME, 2, (void*)FILE1, (void*)pathbuf); + init_action(&test.t_cleanup_actions[0], NOSLEEP, UNLINK, 2, (void*)pathbuf, (void*)NULL); + init_action(&test.t_cleanup_actions[1], NOSLEEP, RMDIR, 2, (void*)DIR1, (void*)NULL); + execute_test(&test); + + init_test(&test, "4.2.10: unlink() file from dir", DIR1, 2, 1, NOTE_ATTRIB, NO_EVENT); + makepath(pathbuf, DIR1, FILE1); + init_action(&(test.t_prep_actions[0]), NOSLEEP, MKDIR, 2, (void*)DIR1, (void*)NULL); + init_action(&(test.t_prep_actions[1]), NOSLEEP, CREAT, 2, (void*)pathbuf, (void*)NULL); + init_action(&test.t_helpthreadact, SLEEP, UNLINK, 2, (void*)pathbuf, (void*)NULL); + init_action(&test.t_cleanup_actions[0], NOSLEEP, RMDIR, 2, (void*)DIR1, (void*)NULL); + execute_test(&test); + + init_test(&test, "4.2.11: mkfifo in a directory", DIR1, 1, 2, NOTE_ATTRIB, NO_EVENT); + makepath(pathbuf, DIR1, FILE1); + init_action(&(test.t_prep_actions[0]), NOSLEEP, MKDIR, 2, (void*)DIR1, (void*)NULL); + init_action(&test.t_helpthreadact, SLEEP, MKFIFO, 1, (void*)pathbuf); + init_action(&test.t_cleanup_actions[0], NOSLEEP, UNLINK, 2, (void*)pathbuf, (void*)NULL); + init_action(&test.t_cleanup_actions[1], NOSLEEP, RMDIR, 2, (void*)DIR1, (void*)NULL); + execute_test(&test); + + +} + + +void +run_note_link_tests() +{ + test_t test; + char pathbuf[50]; + char otherpathbuf[50]; + + LOG(1, stderr, "HFS DOES NOT HANDLE UNLINK CORRECTLY...\n"); + init_test(&test, "5.1.1: unlink() a file", FILE1, 1, 0, NOTE_LINK, YES_EVENT); + init_action(&(test.t_prep_actions[0]), NOSLEEP, CREAT, 2, (void*)FILE1, (void*)NULL); + init_action(&test.t_helpthreadact, SLEEP, UNLINK, 2, (void*)FILE1, (void*)NULL); + execute_test(&test); + + + init_test(&test, "5.1.1.5: link A to B, watch A, remove B", FILE1, 2, 1, NOTE_LINK, YES_EVENT); + init_action(&(test.t_prep_actions[0]), NOSLEEP, CREAT, 2, (void*)FILE1, (void*)NULL); + init_action(&(test.t_prep_actions[1]), NOSLEEP, HARDLINK, 2, (void*)FILE1, (void*)FILE2); + init_action(&test.t_helpthreadact, SLEEP, UNLINK, 2, (void*)FILE2, (void*)NULL); + init_action(&test.t_cleanup_actions[0], NOSLEEP, UNLINK, 2, (void*)FILE1, NULL); + execute_test(&test); + + init_test(&test, "5.1.2: link() to a file", FILE1, 1, 2, NOTE_LINK, YES_EVENT); + init_action(&(test.t_prep_actions[0]), NOSLEEP, CREAT, 2, (void*)FILE1, (void*)NULL); + init_action(&test.t_helpthreadact, SLEEP, HARDLINK, 2, (void*)FILE1, (void*)FILE2); + init_action(&test.t_cleanup_actions[0], NOSLEEP, UNLINK, 2, (void*)FILE1, NULL); + init_action(&test.t_cleanup_actions[1], NOSLEEP, UNLINK, 2, (void*)FILE2, NULL); + execute_test(&test); + + makepath(pathbuf, DIR1, DIR2); + init_test(&test, "5.1.3: make one dir in another", DIR1, 1, 2, NOTE_LINK, YES_EVENT); + init_action(&(test.t_prep_actions[0]), NOSLEEP, MKDIR, 2, (void*)DIR1, (void*)NULL); + init_action(&test.t_helpthreadact, SLEEP, MKDIR, 2, (void*)pathbuf, (void*)NULL); + init_action(&test.t_cleanup_actions[0], NOSLEEP, RMDIR, 2, (void*)pathbuf, NULL); + init_action(&test.t_cleanup_actions[1], NOSLEEP, RMDIR, 2, (void*)DIR1, NULL); + execute_test(&test); + + makepath(pathbuf, DIR1, DIR2); + init_test(&test, "5.1.4: rmdir a dir from within another", DIR1, 2, 1, NOTE_LINK, YES_EVENT); + init_action(&(test.t_prep_actions[0]), NOSLEEP, MKDIR, 2, (void*)DIR1, (void*)NULL); + init_action(&(test.t_prep_actions[1]), NOSLEEP, MKDIR, 2, (void*)pathbuf, (void*)NULL); + init_action(&test.t_helpthreadact, SLEEP, RMDIR, 2, (void*)pathbuf, (void*)NULL); + init_action(&test.t_cleanup_actions[0], NOSLEEP, RMDIR, 2, (void*)DIR1, NULL); + execute_test(&test); + + makepath(pathbuf, DIR1, DIR2); + makepath(otherpathbuf, DIR1, DIR1); + init_test(&test, "5.1.5: rename dir A over dir B inside dir C", DIR1, 3, 2, NOTE_LINK, YES_EVENT); + init_action(&(test.t_prep_actions[0]), NOSLEEP, MKDIR, 2, (void*)DIR1, (void*)NULL); + init_action(&(test.t_prep_actions[1]), NOSLEEP, MKDIR, 2, (void*)pathbuf, (void*)NULL); + init_action(&(test.t_prep_actions[2]), NOSLEEP, MKDIR, 2, (void*)otherpathbuf, (void*)NULL); + init_action(&test.t_helpthreadact, SLEEP, RENAME, 2, (void*)pathbuf, (void*)otherpathbuf); + init_action(&test.t_cleanup_actions[0], NOSLEEP, RMDIR, 2, (void*)otherpathbuf, NULL); + init_action(&test.t_cleanup_actions[1], NOSLEEP, RMDIR, 2, (void*)DIR1, NULL); + execute_test(&test); + + LOG(1, stderr, "HFS bypasses hfs_makenode to create in target, so misses knote.\n"); + makepath(pathbuf, DIR1, DIR2); + init_test(&test, "5.1.6: rename one dir into another", DIR1, 2, 2, NOTE_LINK, YES_EVENT); + init_action(&(test.t_prep_actions[0]), NOSLEEP, MKDIR, 2, (void*)DIR1, (void*)NULL); + init_action(&(test.t_prep_actions[1]), NOSLEEP, MKDIR, 2, (void*)DIR2, (void*)NULL); + init_action(&test.t_helpthreadact, SLEEP, RENAME, 2, (void*)DIR2, (void*)pathbuf); + init_action(&test.t_cleanup_actions[0], NOSLEEP, RMDIR, 2, (void*)pathbuf, NULL); + init_action(&test.t_cleanup_actions[1], NOSLEEP, RMDIR, 2, (void*)DIR1, NULL); + execute_test(&test); + + LOG(1, stderr, "HFS bypasses hfs_removedir to remove from source, so misses knote.\n"); + makepath(pathbuf, DIR1, DIR2); + init_test(&test, "5.1.7: rename one dir out of another", DIR1, 2, 2, NOTE_LINK, YES_EVENT); + init_action(&(test.t_prep_actions[0]), NOSLEEP, MKDIR, 2, (void*)DIR1, (void*)NULL); + init_action(&(test.t_prep_actions[1]), NOSLEEP, MKDIR, 2, (void*)pathbuf, (void*)NULL); + init_action(&test.t_helpthreadact, SLEEP, RENAME, 2, (void*)pathbuf, (void*)DIR2); + init_action(&test.t_cleanup_actions[0], NOSLEEP, RMDIR, 2, (void*)DIR2, NULL); + init_action(&test.t_cleanup_actions[1], NOSLEEP, RMDIR, 2, (void*)DIR1, NULL); + execute_test(&test); + + init_test(&test, "5.1.8: rmdir a dir", DIR1, 1, 0, NOTE_LINK, YES_EVENT); + init_action(&(test.t_prep_actions[0]), NOSLEEP, MKDIR, 2, (void*)DIR1, (void*)NULL); + init_action(&test.t_helpthreadact, SLEEP, RMDIR, 2, (void*)DIR1, (void*)NULL); + execute_test(&test); + + /* ============= NO EVENT SECTION ============== */ + makepath(pathbuf, DIR1, FILE1); + init_test(&test, "5.2.1: make a file in a dir", DIR1, 1, 2, NOTE_LINK, NO_EVENT); + init_action(&(test.t_prep_actions[0]), NOSLEEP, MKDIR, 2, (void*)DIR1, (void*)NULL); + init_action(&test.t_helpthreadact, SLEEP, CREAT, 2, (void*)pathbuf, (void*)NULL); + init_action(&test.t_cleanup_actions[0], NOSLEEP, UNLINK, 2, (void*)pathbuf, NULL); + init_action(&test.t_cleanup_actions[1], NOSLEEP, RMDIR, 2, (void*)DIR1, NULL); + execute_test(&test); + + makepath(pathbuf, DIR1, FILE1); + init_test(&test, "5.2.2: unlink a file in a dir", DIR1, 2, 1, NOTE_LINK, NO_EVENT); + init_action(&(test.t_prep_actions[0]), NOSLEEP, MKDIR, 2, (void*)DIR1, (void*)NULL); + init_action(&(test.t_prep_actions[1]), NOSLEEP, CREAT, 2, (void*)pathbuf, (void*)NULL); + init_action(&test.t_helpthreadact, SLEEP, UNLINK, 2, (void*)pathbuf, (void*)NULL); + init_action(&test.t_cleanup_actions[0], NOSLEEP, RMDIR, 2, (void*)DIR1, NULL); + execute_test(&test); + + makepath(pathbuf, DIR1, FILE1); + makepath(otherpathbuf, DIR1, FILE2); + init_test(&test, "5.2.3: rename a file within a dir", DIR1, 2, 2, NOTE_LINK, NO_EVENT); + init_action(&(test.t_prep_actions[0]), NOSLEEP, MKDIR, 2, (void*)DIR1, (void*)NULL); + init_action(&(test.t_prep_actions[1]), NOSLEEP, CREAT, 2, (void*)pathbuf, (void*)NULL); + init_action(&test.t_helpthreadact, SLEEP, RENAME, 2, (void*)pathbuf, (void*)otherpathbuf); + init_action(&test.t_cleanup_actions[0], NOSLEEP, UNLINK, 2, (void*)otherpathbuf, NULL); + init_action(&test.t_cleanup_actions[1], NOSLEEP, RMDIR, 2, (void*)DIR1, NULL); + execute_test(&test); + + makepath(pathbuf, DIR1, FILE1); + init_test(&test, "5.2.4: rename a file into a dir", DIR1, 2, 2, NOTE_LINK, NO_EVENT); + init_action(&(test.t_prep_actions[0]), NOSLEEP, MKDIR, 2, (void*)DIR1, (void*)NULL); + init_action(&(test.t_prep_actions[1]), NOSLEEP, CREAT, 2, (void*)FILE1, (void*)NULL); + init_action(&test.t_helpthreadact, SLEEP, RENAME, 2, (void*)FILE1, (void*)pathbuf); + init_action(&test.t_cleanup_actions[0], NOSLEEP, UNLINK, 2, (void*)pathbuf, NULL); + init_action(&test.t_cleanup_actions[1], NOSLEEP, RMDIR, 2, (void*)DIR1, NULL); + execute_test(&test); + + makepath(pathbuf, DIR1, FILE1); + init_test(&test, "5.2.5: make a symlink in a dir", DIR1, 1, 2, NOTE_LINK, NO_EVENT); + init_action(&(test.t_prep_actions[0]), NOSLEEP, MKDIR, 2, (void*)DIR1, (void*)NULL); + init_action(&test.t_helpthreadact, SLEEP, SYMLINK, 2, (void*)DOTDOT, (void*)pathbuf); + init_action(&test.t_cleanup_actions[0], NOSLEEP, UNLINK, 2, (void*)pathbuf, NULL); + init_action(&test.t_cleanup_actions[1], NOSLEEP, RMDIR, 2, (void*)DIR1, NULL); + execute_test(&test); + + init_test(&test, "5.2.6: make a symlink to a dir", DIR1, 1, 2, NOTE_LINK, NO_EVENT); + init_action(&(test.t_prep_actions[0]), NOSLEEP, MKDIR, 2, (void*)DIR1, (void*)NULL); + init_action(&test.t_helpthreadact, SLEEP, SYMLINK, 2, (void*)DIR1, (void*)FILE1); + init_action(&test.t_cleanup_actions[0], NOSLEEP, UNLINK, 2, (void*)FILE1, NULL); + init_action(&test.t_cleanup_actions[1], NOSLEEP, RMDIR, 2, (void*)DIR1, NULL); + execute_test(&test); + + init_test(&test, "5.2.7: make a symlink to a file", FILE1, 1, 2, NOTE_LINK, NO_EVENT); + init_action(&(test.t_prep_actions[0]), NOSLEEP, CREAT, 2, (void*)FILE1, (void*)NULL); + init_action(&test.t_helpthreadact, SLEEP, SYMLINK, 2, (void*)FILE1, (void*)FILE2); + init_action(&test.t_cleanup_actions[0], NOSLEEP, UNLINK, 2, (void*)FILE2, NULL); + init_action(&test.t_cleanup_actions[1], NOSLEEP, UNLINK, 2, (void*)FILE1, NULL); + execute_test(&test); +} + +void +run_note_rename_tests() +{ + test_t test; + + init_test(&test, "6.1.1: rename a file", FILE1, 1, 1, NOTE_RENAME, YES_EVENT); + init_action(&(test.t_prep_actions[0]), NOSLEEP, CREAT, 2, (void*)FILE1, (void*)NULL); + init_action(&test.t_helpthreadact, SLEEP, RENAME, 2, (void*)FILE1, (void*)FILE2); + init_action(&test.t_cleanup_actions[0], NOSLEEP, UNLINK, 2, (void*)FILE2, NULL); + execute_test(&test); + + init_test(&test, "6.1.2: rename a dir", DIR1, 1, 1, NOTE_RENAME, YES_EVENT); + init_action(&(test.t_prep_actions[0]), NOSLEEP, MKDIR, 2, (void*)DIR1, (void*)NULL); + init_action(&test.t_helpthreadact, SLEEP, RENAME, 2, (void*)DIR1, (void*)DIR2); + init_action(&test.t_cleanup_actions[0], NOSLEEP, RMDIR, 2, (void*)DIR2, NULL); + execute_test(&test); + + init_test(&test, "6.1.2: rename one file over another", FILE1, 2, 1, NOTE_RENAME, YES_EVENT); + init_action(&(test.t_prep_actions[0]), NOSLEEP, CREAT, 2, (void*)FILE1, (void*)NULL); + init_action(&(test.t_prep_actions[1]), NOSLEEP, CREAT, 2, (void*)FILE2, (void*)NULL); + init_action(&test.t_helpthreadact, SLEEP, RENAME, 2, (void*)FILE1, (void*)FILE2); + init_action(&test.t_cleanup_actions[0], NOSLEEP, UNLINK, 2, (void*)FILE2, NULL); + execute_test(&test); + + init_test(&test, "6.1.3: rename one dir over another", DIR1, 2, 1, NOTE_RENAME, YES_EVENT); + init_action(&(test.t_prep_actions[0]), NOSLEEP, MKDIR, 2, (void*)DIR1, (void*)NULL); + init_action(&(test.t_prep_actions[1]), NOSLEEP, MKDIR, 2, (void*)DIR2, (void*)NULL); + init_action(&test.t_helpthreadact, SLEEP, RENAME, 2, (void*)DIR1, (void*)DIR2); + init_action(&test.t_cleanup_actions[0], NOSLEEP, RMDIR, 2, (void*)DIR2, NULL); + execute_test(&test); + + /* ========= NO EVENT SECTION =========== */ + + init_test(&test, "6.2.1: unlink a file", FILE1, 1, 0, NOTE_RENAME, NO_EVENT); + init_action(&(test.t_prep_actions[0]), NOSLEEP, CREAT, 2, (void*)FILE1, (void*)NULL); + init_action(&test.t_helpthreadact, SLEEP, UNLINK, 2, (void*)FILE1, NULL); + execute_test(&test); + + init_test(&test, "6.2.2: rmdir a dir", DIR1, 1, 0, NOTE_RENAME, NO_EVENT); + init_action(&(test.t_prep_actions[0]), NOSLEEP, MKDIR, 2, (void*)DIR1, (void*)NULL); + init_action(&test.t_helpthreadact, SLEEP, RMDIR, 2, (void*)DIR1, NULL); + execute_test(&test); + + init_test(&test, "6.2.3: link() to a file", FILE1, 1, 2, NOTE_RENAME, NO_EVENT); + init_action(&(test.t_prep_actions[0]), NOSLEEP, CREAT, 2, (void*)FILE1, (void*)NULL); + init_action(&test.t_helpthreadact, SLEEP, HARDLINK, 2, (void*)FILE1, (void*)FILE2); + init_action(&test.t_cleanup_actions[0], NOSLEEP, UNLINK, 2, (void*)FILE1, NULL); + init_action(&test.t_cleanup_actions[1], NOSLEEP, UNLINK, 2, (void*)FILE2, NULL); + execute_test(&test); + + init_test(&test, "6.2.4: rename one file over another: watch deceased", + FILE2, 2, 1, NOTE_RENAME, NO_EVENT); + init_action(&(test.t_prep_actions[0]), NOSLEEP, CREAT, 2, (void*)FILE1, (void*)NULL); + init_action(&(test.t_prep_actions[1]), NOSLEEP, CREAT, 2, (void*)FILE2, (void*)NULL); + init_action(&test.t_helpthreadact, SLEEP, RENAME, 2, (void*)FILE1, (void*)FILE2); + init_action(&test.t_cleanup_actions[0], NOSLEEP, UNLINK, 2, (void*)FILE2, NULL); + execute_test(&test); + + init_test(&test, "6.2.5: rename one dir over another: watch deceased", + DIR2, 2, 1, NOTE_RENAME, NO_EVENT); + init_action(&(test.t_prep_actions[0]), NOSLEEP, MKDIR, 2, (void*)DIR1, (void*)NULL); + init_action(&(test.t_prep_actions[1]), NOSLEEP, MKDIR, 2, (void*)DIR2, (void*)NULL); + init_action(&test.t_helpthreadact, SLEEP, RENAME, 2, (void*)DIR1, (void*)DIR2); + init_action(&test.t_cleanup_actions[0], NOSLEEP, RMDIR, 2, (void*)DIR2, NULL); + execute_test(&test); + + init_test(&test, "6.2.6: rename a file to itself", FILE1, 1, 1, NOTE_RENAME, NO_EVENT); + init_action(&(test.t_prep_actions[0]), NOSLEEP, CREAT, 2, (void*)FILE1, (void*)NULL); + init_action(&test.t_helpthreadact, SLEEP, RENAME, 2, (void*)FILE1, (void*)FILE1); + init_action(&test.t_cleanup_actions[0], NOSLEEP, UNLINK, 2, (void*)FILE1, NULL); + execute_test(&test); + + init_test(&test, "6.2.7: rename a dir to itself", DIR1, 1, 1, NOTE_RENAME, NO_EVENT); + init_action(&(test.t_prep_actions[0]), NOSLEEP, MKDIR, 2, (void*)DIR1, (void*)NULL); + init_action(&test.t_helpthreadact, SLEEP, RENAME, 2, (void*)DIR1, (void*)DIR1); + init_action(&test.t_cleanup_actions[0], NOSLEEP, RMDIR, 2, (void*)DIR1, NULL); + execute_test(&test); +} + +void +run_note_revoke_tests() +{ + test_t test; + init_test(&test, "7.1.1: revoke file", FILE1, 1, 1, NOTE_REVOKE, YES_EVENT); + init_action(&(test.t_prep_actions[0]), NOSLEEP, CREAT, 1, (void*)FILE1); + init_action(&test.t_helpthreadact, SLEEP, REVOKE, 1, (void*)FILE1); + init_action(&(test.t_cleanup_actions[0]), NOSLEEP, UNLINK, 1, (void*)FILE1); + execute_test(&test); + + init_test(&test, "7.2.1: delete file", FILE1, 1, 0, NOTE_REVOKE, NO_EVENT); + init_action(&(test.t_prep_actions[0]), NOSLEEP, CREAT, 1, (void*)FILE1); + init_action(&test.t_helpthreadact, SLEEP, UNLINK, 1, (void*)FILE1); + execute_test(&test); +} + + +void +run_evfilt_read_tests() +{ + test_t test; + init_test(&test, "8.1.1: how much data in file of length LENGTHEN_SIZE?", FILE1, 2, 1, EVFILT_READ, LENGTHEN_SIZE); + init_action(&(test.t_prep_actions[0]), NOSLEEP, CREAT, 2, (void*)FILE1, (void*)NULL); + init_action(&(test.t_prep_actions[1]), NOSLEEP, LENGTHEN, 2, (void*)FILE1, (void*)NULL); + init_action(&test.t_helpthreadact, SLEEP, NOTHING, 0); + init_action(&test.t_cleanup_actions[0], NOSLEEP, UNLINK, 2, (void*)FILE1, NULL); + execute_test(&test); + + init_test(&test, "8.1.2: block, then write to file", FILE1, 2, 1, EVFILT_READ, strlen(TEST_STRING)); + init_action(&(test.t_prep_actions[0]), NOSLEEP, CREAT, 1, (void*)FILE1); + init_action(&(test.t_prep_actions[1]), NOSLEEP, TRUNC, 1, (void*)FILE1); + init_action(&test.t_helpthreadact, SLEEP, WRITE, 1, (void*)FILE1); + init_action(&test.t_cleanup_actions[0], NOSLEEP, UNLINK, 2, (void*)FILE1, NULL); + execute_test(&test); + + init_test(&test, "8.1.3: block, then extend", FILE1, 2, 1, EVFILT_READ, LENGTHEN_SIZE); + init_action(&(test.t_prep_actions[0]), NOSLEEP, CREAT, 1, (void*)FILE1); + init_action(&(test.t_prep_actions[1]), NOSLEEP, TRUNC, 1, (void*)FILE1); + init_action(&test.t_helpthreadact, SLEEP, LENGTHEN, 1, (void*)FILE1); + init_action(&test.t_cleanup_actions[0], NOSLEEP, UNLINK, 2, (void*)FILE1, NULL); + execute_test(&test); + + init_test(&test, "8.1.4: block, then seek to beginning", FILE1, 2, 1, EVFILT_READ, strlen(TEST_STRING)); + init_action(&(test.t_prep_actions[0]), NOSLEEP, CREAT, 1, (void*)FILE1); + init_action(&(test.t_prep_actions[1]), NOSLEEP, WRITE, 1, (void*)FILE1); + test.t_read_to_end_first = 1; /* hack means that we've gotten to EOF before we block */ + init_action(&test.t_helpthreadact, SLEEP, LSEEK, 1, (void*)0); + init_action(&test.t_cleanup_actions[0], NOSLEEP, UNLINK, 2, (void*)FILE1, NULL); + execute_test(&test); + + + init_test(&test, "8.1.5: block, then write to fifo", FILE1, 1, 1, EVFILT_READ, strlen(TEST_STRING)); + init_action(&(test.t_prep_actions[0]), NOSLEEP, MKFIFO, 1, (void*)FILE1); + test.t_file_is_fifo = 1; + init_action(&test.t_helpthreadact, SLEEP, WRITE, 1, (void*)FILE1); + init_action(&test.t_cleanup_actions[0], NOSLEEP, UNLINK, 2, (void*)FILE1, NULL); + execute_test(&test); + + /* No result section... */ + init_test(&test, "8.2.1: just rename", FILE1, 2, 1, EVFILT_READ, NO_EVENT); + init_action(&(test.t_prep_actions[0]), NOSLEEP, CREAT, 1, (void*)FILE1); + init_action(&(test.t_prep_actions[1]), NOSLEEP, TRUNC, 1, (void*)FILE1); + init_action(&test.t_helpthreadact, SLEEP, RENAME, 2, (void*)FILE1, (void*)FILE2); + init_action(&test.t_cleanup_actions[0], NOSLEEP, UNLINK, 2, (void*)FILE2, NULL); + execute_test(&test); + + init_test(&test, "8.2.2: delete file", FILE1, 2, 0, EVFILT_READ, NO_EVENT); + init_action(&(test.t_prep_actions[0]), NOSLEEP, CREAT, 1, (void*)FILE1); + init_action(&(test.t_prep_actions[1]), NOSLEEP, TRUNC, 1, (void*)FILE1); + init_action(&test.t_helpthreadact, SLEEP, UNLINK, 1, (void*)FILE1); + execute_test(&test); + + init_test(&test, "8.2.3: write to beginning", FILE1, 2, 1, EVFILT_READ, NO_EVENT); + init_action(&(test.t_prep_actions[0]), NOSLEEP, CREAT, 1, (void*)FILE1); + init_action(&(test.t_prep_actions[1]), NOSLEEP, WRITE, 1, (void*)FILE1); + test.t_read_to_end_first = 1; /* hack means that we've gotten to EOF before we block */ + init_action(&test.t_helpthreadact, SLEEP, WRITE, 1, (void*)FILE1); + init_action(&test.t_cleanup_actions[0], NOSLEEP, UNLINK, 1, (void*)FILE1); + execute_test(&test); + + init_test(&test, "8.1.4: block, then seek to current location", FILE1, 2, 1, EVFILT_READ, 0); + init_action(&(test.t_prep_actions[0]), NOSLEEP, CREAT, 1, (void*)FILE1); + init_action(&(test.t_prep_actions[1]), NOSLEEP, WRITE, 1, (void*)FILE1); + test.t_read_to_end_first = 1; /* hack means that we've gotten to EOF before we block */ + init_action(&test.t_helpthreadact, SLEEP, LSEEK, 1, (void*)strlen(TEST_STRING)); + init_action(&test.t_cleanup_actions[0], NOSLEEP, UNLINK, 2, (void*)FILE1, NULL); + execute_test(&test); + + init_test(&test, "8.2.5: trying to read from empty fifo", FILE1, 1, 1, EVFILT_READ, 0); + init_action(&(test.t_prep_actions[0]), NOSLEEP, MKFIFO, 1, (void*)FILE1); + test.t_file_is_fifo = 1; + init_action(&test.t_helpthreadact, SLEEP, NOTHING, 1, (void*)0); + init_action(&test.t_cleanup_actions[0], NOSLEEP, UNLINK, 2, (void*)FILE1, NULL); + execute_test(&test); + +} + + + +void* +read_from_fd(void *arg) +{ + char buf[50]; + int fd = (int) arg; + sleep(2); + return (void*) read(fd, buf, sizeof(buf)); +} + +void* +write_to_fd(void *arg) +{ + char buf[50]; + int fd = (int) arg; + sleep(2); + return (void*) write(fd, buf, sizeof(buf)); +} + +/* + * We don't (in principle) support EVFILT_WRITE for vnodes; thusly, no tests here + */ +void +run_evfilt_write_tests() +{ + + test_t test; + init_test(&test, "9.1.1: how much space in empty fifo?", FILE1, 1, 1, EVFILT_WRITE, FIFO_SPACE); + init_action(&(test.t_prep_actions[0]), NOSLEEP, MKFIFO, 1, (void*)FILE1, (void*)NULL); + test.t_file_is_fifo = 1; + init_action(&test.t_helpthreadact, SLEEP, NOTHING, 0); + init_action(&test.t_cleanup_actions[0], NOSLEEP, UNLINK, 2, (void*)FILE1, NULL); + execute_test(&test); + + init_test(&test, "9.1.2: how much space in slightly written fifo?", FILE1, 1, 1, EVFILT_WRITE, FIFO_SPACE - strlen(TEST_STRING)); + init_action(&(test.t_prep_actions[0]), NOSLEEP, MKFIFO, 1, (void*)FILE1, (void*)NULL); + test.t_file_is_fifo = 1; + test.t_write_some_data = 1; + init_action(&(test.t_helpthreadact), NOSLEEP, NOTHING, 0); + init_action(&test.t_cleanup_actions[0], NOSLEEP, UNLINK, 2, (void*)FILE1, NULL); + execute_test(&test); + + init_test(&test, "9.2.1: how much space in a full fifo?", FILE1, 1, 1, EVFILT_WRITE, 0); + init_action(&(test.t_prep_actions[0]), NOSLEEP, MKFIFO, 1, (void*)FILE1, (void*)NULL); + test.t_file_is_fifo = 1; + test.t_extra_sleep_hack = 1; + init_action(&(test.t_helpthreadact), NOSLEEP, FILLFD, 1, (void*)FILE1, (void*)NULL); + init_action(&test.t_cleanup_actions[0], NOSLEEP, UNLINK, 2, (void*)FILE1, NULL); + execute_test(&test); +} + +void +run_poll_tests() +{ + test_t test; + init_poll_test(&test, "10.1.1: does poll say I can write a regular file?", FILE1, 1, 1, POLLWRNORM, 1); + init_action(&(test.t_prep_actions[0]), NOSLEEP, CREAT, 1, (void*)FILE1, (void*)NULL); + init_action(&test.t_helpthreadact, SLEEP, NOTHING, 0); + init_action(&test.t_cleanup_actions[0], NOSLEEP, UNLINK, 2, (void*)FILE1, NULL); + execute_test(&test); + + init_poll_test(&test, "10.1.2: does poll say I can write an empty FIFO?", FILE1, 1, 1, POLLWRNORM, 1); + init_action(&(test.t_prep_actions[0]), NOSLEEP, MKFIFO, 1, (void*)FILE1, (void*)NULL); + test.t_file_is_fifo = 1; + init_action(&test.t_helpthreadact, SLEEP, NOTHING, 0); + init_action(&test.t_cleanup_actions[0], NOSLEEP, UNLINK, 2, (void*)FILE1, NULL); + execute_test(&test); + + init_poll_test(&test, "10.1.3: does poll say I can read a nonempty FIFO?", FILE1, 1, 1, POLLRDNORM, 1); + init_action(&(test.t_prep_actions[0]), NOSLEEP, MKFIFO, 1, (void*)FILE1, (void*)NULL); + test.t_file_is_fifo = 1; + test.t_write_some_data = 1; + init_action(&test.t_helpthreadact, SLEEP, NOTHING, 0); + init_action(&test.t_cleanup_actions[0], NOSLEEP, UNLINK, 2, (void*)FILE1, NULL); + execute_test(&test); + + init_poll_test(&test, "10.1.4: does poll say I can read a nonempty regular file?", FILE1, 2, 1, POLLRDNORM, 1); + init_action(&(test.t_prep_actions[0]), NOSLEEP, CREAT, 1, (void*)FILE1, (void*)NULL); + init_action(&(test.t_prep_actions[1]), NOSLEEP, LENGTHEN, 1, (void*)FILE1, (void*)NULL); + init_action(&test.t_helpthreadact, SLEEP, NOTHING, 0); + init_action(&test.t_cleanup_actions[0], NOSLEEP, UNLINK, 2, (void*)FILE1, NULL); + execute_test(&test); + + init_poll_test(&test, "10.1.5: does poll say I can read an empty file?", FILE1, 1, 1, POLLRDNORM, 1); + init_action(&(test.t_prep_actions[0]), NOSLEEP, CREAT, 1, (void*)FILE1, (void*)NULL); + init_action(&test.t_helpthreadact, SLEEP, NOTHING, 0); + init_action(&test.t_cleanup_actions[0], NOSLEEP, UNLINK, 2, (void*)FILE1, NULL); + execute_test(&test); + + + + + init_poll_test(&test, "10.2.2: does poll say I can read an empty FIFO?", FILE1, 1, 1, POLLRDNORM, 0); + init_action(&(test.t_prep_actions[0]), NOSLEEP, MKFIFO, 1, (void*)FILE1, (void*)NULL); + test.t_file_is_fifo = 1; + init_action(&test.t_helpthreadact, SLEEP, NOTHING, 0); + init_action(&test.t_cleanup_actions[0], NOSLEEP, UNLINK, 2, (void*)FILE1, NULL); + execute_test(&test); + + init_poll_test(&test, "10.2.3: does poll say I can write a full FIFO?", FILE1, 1, 1, POLLWRNORM, 0); + init_action(&(test.t_prep_actions[0]), NOSLEEP, MKFIFO, 1, (void*)FILE1, (void*)NULL); + test.t_file_is_fifo = 1; + test.t_extra_sleep_hack = 1; + init_action(&(test.t_helpthreadact), NOSLEEP, FILLFD, 1, (void*)FILE1, (void*)NULL); + init_action(&test.t_cleanup_actions[0], NOSLEEP, UNLINK, 2, (void*)FILE1, NULL); + execute_test(&test); +} + + void +run_all_tests() +{ + run_note_delete_tests(); + run_note_write_tests(); + run_note_extend_tests(); + run_note_attrib_tests(); + run_note_link_tests(); + run_note_rename_tests(); +#if 0 + run_note_revoke_tests(); /* Can no longer revoke a regular file--need an unmount test */ +#endif /* 0 */ + run_evfilt_read_tests(); + run_evfilt_write_tests(); + run_poll_tests(); +} + + int +main(int argc, char **argv) +{ + char *which = NULL; + if (argc > 1) { + which = argv[1]; + } + + if ((!which) || (strcmp(which, "all") == 0)) + run_all_tests(); + else if (strcmp(which, "delete") == 0) + run_note_delete_tests(); + else if (strcmp(which, "write") == 0) + run_note_write_tests(); + else if (strcmp(which, "extend") == 0) + run_note_extend_tests(); + else if (strcmp(which, "attrib") == 0) + run_note_attrib_tests(); + else if (strcmp(which, "link") == 0) + run_note_link_tests(); + else if (strcmp(which, "rename") == 0) + run_note_rename_tests(); + else if (strcmp(which, "revoke") == 0) + run_note_revoke_tests(); + else if (strcmp(which, "evfiltread") == 0) + run_evfilt_read_tests(); + else if (strcmp(which, "evfiltwrite") == 0) + run_evfilt_write_tests(); + else if (strcmp(which, "poll") == 0) + run_poll_tests(); + else { + fprintf(stderr, "Valid options are:\n\tdelete, write, extend," + "attrib, link, rename, revoke, evfiltread, fifo, all, evfiltwrite\n"); + exit(1); + } + return 0; +} + diff --git a/tools/tests/kqueue_tests/kqueue_timer_tests.c b/tools/tests/kqueue_tests/kqueue_timer_tests.c new file mode 100644 index 000000000..4111af382 --- /dev/null +++ b/tools/tests/kqueue_tests/kqueue_timer_tests.c @@ -0,0 +1,253 @@ +#include +#include +#include +#include +#include +#include +#include + +int kq, passed, failed; + +/* + * Wait for given kevent, which should return in 'expected' usecs. + */ +int +do_simple_kevent(struct kevent64_s *kev, uint64_t expected) +{ + int ret; + uint64_t elapsed_usecs, delta_usecs; + struct timespec timeout; + struct timeval before, after; + + /* time out after 1 sec extra delay */ + timeout.tv_sec = (expected / (1000 * 1000)) + 1; + timeout.tv_nsec = (expected % (1000 * 1000)) * 1000; + + /* measure time for the kevent */ + gettimeofday(&before, NULL); + ret = kevent64(kq, kev, 1, kev, 1, 0, &timeout); + gettimeofday(&after, NULL); + + if (ret < 1 || (kev->flags & EV_ERROR)) { + printf("\tfailure: kevent returned %d, error %d\n", ret, + (ret == -1 ? errno : (int) kev->data)); + return 0; + } + + /* did it work? */ + elapsed_usecs = (after.tv_sec - before.tv_sec) * (1000 * 1000) + + (after.tv_usec - before.tv_usec); + delta_usecs = abs(elapsed_usecs - (expected)); + + /* failure if we're 30% off, or 50 mics late */ + if (delta_usecs > (30 * expected / 100.0) && delta_usecs > 50) { + printf("\tfailure: expected %lld usec, measured %lld usec.\n", + expected, elapsed_usecs); + return 0; + } else { + printf("\tsuccess.\n"); + return 1; + } +} + +void +test_absolute_kevent(int time, int scale) +{ + struct timeval tv; + struct kevent64_s kev; + uint64_t nowus, expected, deadline; + int ret; + int timescale = 0; + + gettimeofday(&tv, NULL); + nowus = tv.tv_sec * (1000 * 1000LL) + tv.tv_usec; + + switch (scale) { + case NOTE_SECONDS: + printf("Testing %d sec absolute timer...\n", time); + timescale = 1000 * 1000; + break; + case NOTE_USECONDS: + printf("Testing %d usec absolute timer...\n", time); + timescale = 1; + break; + case 0: + printf("Testing %d msec absolute timer...\n", time); + timescale = 1000; + break; + default: + printf("Failure: scale 0x%x not recognized.\n", scale); + return; + } + + expected = time * timescale; + deadline = nowus / timescale + time; + + /* deadlines in the past should fire immediately */ + if (time < 0) + expected = 0; + + EV_SET64(&kev, 1, EVFILT_TIMER, EV_ADD, + NOTE_ABSOLUTE | scale, deadline, 0,0,0); + ret = do_simple_kevent(&kev, expected); + + if (ret) + passed++; + else + failed++; +} + +void +test_oneshot_kevent(int time, int scale) +{ + int ret; + uint64_t expected = 0; + struct kevent64_s kev; + + switch (scale) { + case NOTE_SECONDS: + printf("Testing %d sec interval timer...\n", time); + expected = time * (1000 * 1000); + break; + case NOTE_USECONDS: + printf("Testing %d usec interval timer...\n", time); + expected = time; + break; + case NOTE_NSECONDS: + printf("Testing %d nsec interval timer...\n", time); + expected = time / 1000; + break; + case 0: + printf("Testing %d msec interval timer...\n", time); + expected = time * 1000; + break; + default: + printf("Failure: scale 0x%x not recognized.\n", scale); + return; + } + + /* deadlines in the past should fire immediately */ + if (time < 0) + expected = 0; + + EV_SET64(&kev, 2, EVFILT_TIMER, EV_ADD | EV_ONESHOT, scale, time, + 0, 0, 0); + ret = do_simple_kevent(&kev, expected); + + if (ret) + passed++; + else + failed++; + +} + +void +test_repeating_kevent(int usec) +{ + struct kevent64_s kev; + int expected_pops, ret; + + expected_pops = 1000 * 1000 / usec; + printf("Testing repeating kevent for %d pops in a second...\n", + expected_pops); + + EV_SET64(&kev, 3, EVFILT_TIMER, EV_ADD, NOTE_USECONDS, usec, 0, 0, 0); + ret = kevent64(kq, &kev, 1, NULL, 0, 0, NULL); + if (ret != 0) { + printf("\tfailure: kevent64 returned %d\n", ret); + failed++; + return; + } + + /* sleep 1 second */ + usleep(1000 * 1000); + ret = kevent64(kq, NULL, 0, &kev, 1, 0, NULL); + if (ret != 1 || (kev.flags & EV_ERROR)) { + printf("\tfailure: kevent64 returned %d\n", ret); + failed++; + return; + } + + /* check how many times the timer fired: within 5%? */ + if (kev.data > expected_pops + (expected_pops / 20) || + kev.data < expected_pops - (expected_pops / 20)) { + printf("\tfailure: saw %lld pops.\n", kev.data); + failed++; + } else { + printf("\tsuccess: saw %lld pops.\n", kev.data); + passed++; + } + + EV_SET64(&kev, 3, EVFILT_TIMER, EV_DELETE, 0, 0, 0, 0, 0); + ret = kevent64(kq, &kev, 1, NULL, 0, 0, NULL); + if (ret != 0) { + printf("\tfailed to stop repeating timer: %d\n", ret); + } +} + +test_updated_kevent(int first, int second) +{ + struct kevent64_s kev; + int ret; + + printf("Testing update from %d to %d msecs...\n", first, second); + + EV_SET64(&kev, 4, EVFILT_TIMER, EV_ADD|EV_ONESHOT, 0, first, 0, 0, 0); + ret = kevent64(kq, &kev, 1, NULL, 0, 0, NULL); + if (ret != 0) { + printf("\tfailure: initial kevent returned %d\n", ret); + failed++; + return; + } + + EV_SET64(&kev, 4, EVFILT_TIMER, EV_ONESHOT, 0, second, 0, 0, 0); + if (second < 0) + second = 0; + ret = do_simple_kevent(&kev, second * 1000); + if (ret) + passed++; + else + failed++; +} + +int +main(void) +{ + struct timeval tv; + struct kevent64_s kev; + uint64_t nowms, deadline; + + kq = kqueue(); + assert(kq > 0); + passed = 0; + failed = 0; + + test_absolute_kevent(100, 0); + test_absolute_kevent(200, 0); + test_absolute_kevent(300, 0); + test_absolute_kevent(1000, 0); + test_absolute_kevent(500, NOTE_USECONDS); + test_absolute_kevent(100, NOTE_USECONDS); + test_absolute_kevent(5, NOTE_SECONDS); + test_absolute_kevent(-1000, 0); + + test_oneshot_kevent(1, NOTE_SECONDS); + test_oneshot_kevent(10, 0); + test_oneshot_kevent(200, NOTE_USECONDS); + test_oneshot_kevent(300000, NOTE_NSECONDS); + test_oneshot_kevent(-1, NOTE_SECONDS); + + test_repeating_kevent(100 * 1000); + test_repeating_kevent(5 * 1000); + test_repeating_kevent(200); + test_repeating_kevent(50); + test_repeating_kevent(10); + + test_updated_kevent(1000, 2000); + test_updated_kevent(2000, 1000); + test_updated_kevent(1000, -1); + + printf("\nFinished: %d tests passed, %d failed.\n", passed, failed); + + exit(EXIT_SUCCESS); +} diff --git a/tools/tests/libMicro/AppleReadMe b/tools/tests/libMicro/AppleReadMe new file mode 100755 index 000000000..156b3e4b6 --- /dev/null +++ b/tools/tests/libMicro/AppleReadMe @@ -0,0 +1,131 @@ +Mac OS X specific notes + +*** Instructions before Starting libMicro *** + +# Disable Open directory and LDAP using Directory Utility app +# Turn off airport +# Turn off spotlight. In terminal, execute the following: + sudo service com.apple.metadata.mds stop +# Turn off Time Machine in System Preferences +# Wait at least 2 minutes after boot to desktop for boot cache to settle down + +*** Make and run quickstart *** + + make + ./bench >output.txt +gives you a text file named output.txt with the results of one run. + ./multiview output1.txt output2.txt >compare.html +gives you a html file comparing two runs. + +*** Makefile *** + +The Makefile invokes Makefile.Darwin which invokes Makefile.com.Darwin. +Just invoke make, with options if necessary, and everything should +build correctly. The binaries are placed in a directory called +bin-ARCH where ARCH is the default or specified when building via +the ARCH flag. + +options for invoking Makefile are: +ARCH defaults to i386 + if you just want to build for ppc, you can specify + make ARCH=ppc + this will put the results in bin-ppc + + to build fat/multi architecture, specify + make ARCH=fat + the makefile will automatically build with ARCH_FLAG="-arch ppc -arch i386 -arch x86_64" and put the results in bin-fat + + to build with only two of the architectures see below + +ARCH_FLAG defaults to -arch $(ARCH) + to build fat/multi architecture, specify + make ARCH_FLAG="-arch ppc -arch i386" ARCH=fat + this will put the results in bin-fat + +OPT_FLAG defaults to -g + to build optimized, specify make OPT_FLAG=-s + +SEMOP_FLAG defaults to -DUSE_SEMOP + to eliminate SEMOP usage, specify make SEMOP_FLAG= + this is needed on some lower-end systems (e.g. M63) + +These can be combined, e.g. + make ARCH=ppc SEMOP_FLAG= + +*** Before running benchmarks *** + +The shell script create_stuff should be run before any benchmarking + +this script takes care of raising the process limits which would +otherwise cause several of the tests to fail - if not you will see: + Running: pipe_pst1 + fork: Resource temporarily unavailable +in your stderr during the runs. After you run create_stuff, the +system then needs to be rebooted. + +*** running the benchmarks *** + +The shell script "bench" will run all the benchmarks, or you can +pass it a parameter to run a single benchmark, e.g. + + bench lmbench_bw_unix + +Watch for: + # WARNINGS + # Quantization error likely;increase batch size (-B option) 4X to avoid. +in the output +To see an example run the supplied testbench script + +Add or adjust the -B parameter for any benchmark that fails. The +Quantization error will refer to the benchmark preceding the error, +not the one following... + +A typical run: + $ make clean + $ make + $ ./create_stuff + $ ./bench > output1 + Running: getpid + for 0.13353 seconds + Running: getppid + for 3.65609 seconds + Running: getenv + for 0.20924 seconds + Running: getenvT2 + for 0.37437 seconds + Running: gettimeofday + for 0.58077 seconds + etc... + +Use the supplied multiview script to compare runs like: + +multiview output1 output2 > compare.html +open compare.html (safari launches) +will show output2 results as a percentage change from the output1 results + +*** Adding additional benchmark tests *** + +Look at the sample file trivial.c. This demonstrates how to do +argument passing, the flow of control of a benchmark, etc. for the +trivial case. The tests starting with "lmbench_" were ported from +the lmbench suite, so they might be good examples as well. + +*** Things to do *** + +* port the rest of the lmbench benchmarks into this framework + +* create website that will allow easy ability to compare many builds +across many machines with historical repository of runs + +* document better how to write a benchmark for this framework +(started in trivial.c) + +* check this into xnu/test + +* create new benchmarks + +*** Leopard notes *** + +Due to rdar://4654956 and its original, rdar://2588252 you cannot +run these tests on Leopard without removing the cascade_lockf test. +There may be other tests which panic a Leopard system. diff --git a/tools/tests/libMicro/Makefile b/tools/tests/libMicro/Makefile new file mode 100644 index 000000000..e81cc6c1e --- /dev/null +++ b/tools/tests/libMicro/Makefile @@ -0,0 +1,103 @@ +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms +# of the Common Development and Distribution License +# (the "License"). You may not use this file except +# in compliance with the License. +# +# You can obtain a copy of the license at +# src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing +# permissions and limitations under the License. +# +# When distributing Covered Code, include this CDDL +# HEADER in each file and include the License file at +# usr/src/OPENSOLARIS.LICENSE. If applicable, +# add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your +# own identifying information: Portions Copyright [yyyy] +# [name of copyright owner] +# +# CDDL HEADER END +# + +# +# Copyright 2005 Sun Microsystems, Inc. All rights reserved. +# Use is subject to license terms. +# + + +include Makefile.benchmarks + +ARCH = i386 + +BINS= $(ALL:%=bin-$(ARCH)/%) bin-$(ARCH)/tattle + +TARBALL_CONTENTS = \ + Makefile.benchmarks \ + Makefile.SunOS \ + Makefile.Linux \ + Makefile.Aix \ + Makefile.com \ + Makefile \ + $(ALL:%=%.c) \ + elided.c \ + exec_bin.c \ + libmicro.c \ + libmicro_main.c \ + libmicro.h \ + recurse2.c \ + benchmark_finibatch.c \ + benchmark_initbatch.c \ + benchmark_optswitch.c \ + benchmark_fini.c \ + benchmark_init.c \ + benchmark_result.c \ + benchmark_finirun.c \ + benchmark_initrun.c \ + benchmark_initworker.c \ + benchmark_finiworker.c \ + bench \ + bench.sh \ + mk_tarball \ + multiview \ + multiview.sh \ + OPENSOLARIS.LICENSE \ + tattle.c \ + wrapper \ + wrapper.sh \ + README + +default $(ALL) run cstyle lint tattle: $(BINS) + @cp bench.sh bench + @cp multiview.sh multiview + @cp wrapper.sh wrapper + @cp create_stuff.sh create_stuff + @chmod +x bench create_stuff multiview wrapper + @mkdir -p bin-$(ARCH); cd bin-$(ARCH); MACH=$(ARCH) $(MAKE) -f ../Makefile.`uname -s` ARCH=$(ARCH) UNAME_RELEASE=`uname -r | sed 's/\./_/g'` $@ + +.PHONY: clean clean_subdirs clean_$(SUBDIRS) + +clean: clean_subdirs + rm -rf bin bin-* wrapper multiview create_stuff bench tattle + +clean_subdirs: + for dir in $(SUBDIRS); do $(MAKE) -C $$dir clean; done + +bin: + @mkdir -p bin-$(ARCH) + +$(BINS): bin + @cp wrapper.sh wrapper + @chmod +x wrapper + @ln -sf ../wrapper $@ + + +libMicro.tar: FORCE + @chmod +x ./mk_tarball wrapper + @./mk_tarball $(TARBALL_CONTENTS) + +FORCE: + diff --git a/tools/tests/libMicro/Makefile.Aix b/tools/tests/libMicro/Makefile.Aix new file mode 100644 index 000000000..9e4dc4224 --- /dev/null +++ b/tools/tests/libMicro/Makefile.Aix @@ -0,0 +1,41 @@ +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms +# of the Common Development and Distribution License +# (the "License"). You may not use this file except +# in compliance with the License. +# +# You can obtain a copy of the license at +# src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing +# permissions and limitations under the License. +# +# When distributing Covered Code, include this CDDL +# HEADER in each file and include the License file at +# usr/src/OPENSOLARIS.LICENSE. If applicable, +# add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your +# own identifying information: Portions Copyright [yyyy] +# [name of copyright owner] +# +# CDDL HEADER END +# + +# +# Copyright 2005 Sun Microsystems, Inc. All rights reserved. +# Use is subject to license terms. +# + + +CFLAGS= -O3 + +CPPFLAGS = -D_REENTRANT + +include ../Makefile.com + +NSLLIB= -lnsl +SOCKLIB= + +.KEEP_STATE: diff --git a/tools/tests/libMicro/Makefile.Darwin b/tools/tests/libMicro/Makefile.Darwin new file mode 100644 index 000000000..7eaa1aaf1 --- /dev/null +++ b/tools/tests/libMicro/Makefile.Darwin @@ -0,0 +1,63 @@ +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms +# of the Common Development and Distribution License +# (the "License"). You may not use this file except +# in compliance with the License. +# +# You can obtain a copy of the license at +# src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing +# permissions and limitations under the License. +# +# When distributing Covered Code, include this CDDL +# HEADER in each file and include the License file at +# usr/src/OPENSOLARIS.LICENSE. If applicable, +# add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your +# own identifying information: Portions Copyright [yyyy] +# [name of copyright owner] +# +# CDDL HEADER END +# + +# +# Copyright 2005 Sun Microsystems, Inc. All rights reserved. +# Use is subject to license terms. +# +# ident "@(#)Makefile.Darwin 1.5 05/08/04 SMI" +# + + +CC= gcc +#NOPIC= -mdynamic-no-pic +ARCH= i386 + +ifeq "$(strip $(ARCH))" "fat" +ARCH_FLAG= -arch i386 -arch ppc -arch x86_64 +else +ARCH_FLAG= -arch $(ARCH) +endif + +OPT_FLAG= -g +SEMOP_FLAG= -DUSE_SEMOP + +### +###CFLAGS= -Os -DUSE_SEMOP -fno-builtin $(NOPIC) $(ARCH_FLAG) -Wall +###extra_CFLAGS= -Os -DUSE_SEMOP -fno-builtin $(NOPIC) $(ARCH_FLAG) -Wall +### +CFLAGS= $(OPT_FLAG) $(SEMOP_FLAG) -DUSE_GETHRTIME -fno-builtin $(NOPIC) $(ARCH_FLAG) -Wall +extra_CFLAGS= $(OPT_FLAG) $(SEMOP_FLAG) -fno-builtin $(NOPIC) $(ARCH_FLAG) -Wall +CPPFLAGS= $(SEMOP_FLAG) -D_REENTRANT -Wall +MATHLIB= -lm + +ELIDED_BENCHMARKS= \ + cachetocache \ + atomic \ + getcontext \ + setcontext \ + + +include ../Makefile.com.Darwin diff --git a/tools/tests/libMicro/Makefile.Linux b/tools/tests/libMicro/Makefile.Linux new file mode 100644 index 000000000..ca12d1561 --- /dev/null +++ b/tools/tests/libMicro/Makefile.Linux @@ -0,0 +1,43 @@ +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms +# of the Common Development and Distribution License +# (the "License"). You may not use this file except +# in compliance with the License. +# +# You can obtain a copy of the license at +# src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing +# permissions and limitations under the License. +# +# When distributing Covered Code, include this CDDL +# HEADER in each file and include the License file at +# usr/src/OPENSOLARIS.LICENSE. If applicable, +# add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your +# own identifying information: Portions Copyright [yyyy] +# [name of copyright owner] +# +# CDDL HEADER END +# + +# +# Copyright 2005 Sun Microsystems, Inc. All rights reserved. +# Use is subject to license terms. +# + + +CC= gcc + +#CFLAGS= -O -DUSE_SEMOP +CPPFLAGS= -DUSE_SEMOP -D_REENTRANT +MATHLIB= -lm + +ELIDED_BENCHMARKS= \ + cachetocache \ + atomic + + +include ../Makefile.com diff --git a/tools/tests/libMicro/Makefile.SunOS b/tools/tests/libMicro/Makefile.SunOS new file mode 100644 index 000000000..4fc726975 --- /dev/null +++ b/tools/tests/libMicro/Makefile.SunOS @@ -0,0 +1,61 @@ +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms +# of the Common Development and Distribution License +# (the "License"). You may not use this file except +# in compliance with the License. +# +# You can obtain a copy of the license at +# src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing +# permissions and limitations under the License. +# +# When distributing Covered Code, include this CDDL +# HEADER in each file and include the License file at +# usr/src/OPENSOLARIS.LICENSE. If applicable, +# add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your +# own identifying information: Portions Copyright [yyyy] +# [name of copyright owner] +# +# CDDL HEADER END +# + +# +# Copyright 2005 Sun Microsystems, Inc. All rights reserved. +# Use is subject to license terms. +# + + +CPPFLAGS= -DUSE_GETHRTIME -D_REENTRANT + +CFLAGS = -O $(extra_CFLAGS) + +# +# These defines allow libmicro to be compiled against older Solaris +# releases by turning off the tests which don't work there. +# +# This is a little contorted-- UNAME_RELEASE is set as an environment +# variable for us by the invoking make process (see Makefile)-- it is +# the output of uname -r | sed 's/\./_/g'. +# +# We couldn't find any other gmake/unix make portable way to make this +# work. +# +ELIDED_BENCHMARKS_5_8=atomic cachetocache +ELIDED_BENCHMARKS_5_9=atomic + +ELIDED_BENCHMARKS_CMN=cascade_flock + +ELIDED_BENCHMARKS=$(ELIDED_BENCHMARKS_CMN) $(ELIDED_BENCHMARKS_$(UNAME_RELEASE)) + +include ../Makefile.com + +NSLLIB= -lnsl +SOCKLIB= -lsocket +UCBLIB= -lc -L/usr/ucblib -lucb -R/usr/ucblib +MATHLIB= -lm + +.KEEP_STATE: diff --git a/tools/tests/libMicro/Makefile.benchmarks b/tools/tests/libMicro/Makefile.benchmarks new file mode 100644 index 000000000..96c105f76 --- /dev/null +++ b/tools/tests/libMicro/Makefile.benchmarks @@ -0,0 +1,137 @@ +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms +# of the Common Development and Distribution License +# (the "License"). You may not use this file except +# in compliance with the License. +# +# You can obtain a copy of the license at +# src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing +# permissions and limitations under the License. +# +# When distributing Covered Code, include this CDDL +# HEADER in each file and include the License file at +# usr/src/OPENSOLARIS.LICENSE. If applicable, +# add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your +# own identifying information: Portions Copyright [yyyy] +# [name of copyright owner] +# +# CDDL HEADER END +# + +# +# Copyright 2005 Sun Microsystems, Inc. All rights reserved. +# Use is subject to license terms. +# + +SUBDIRS = apple + +ALL= \ + atomic \ + bind \ + cachetocache \ + cascade_mutex \ + cascade_cond \ + cascade_lockf \ + cascade_fcntl \ + cascade_flock \ + chdir \ + close \ + close_tcp \ + connection \ + dup \ + exec \ + exit \ + exp \ + fcntl \ + fcntl_ndelay \ + file_lock \ + fork \ + getcontext \ + getenv \ + gettimeofday \ + getpeername \ + getpid \ + getrusage \ + getsockname \ + isatty \ + listen \ + localtime_r \ + log \ + longjmp \ + lrand48 \ + lseek \ + malloc \ + memcpy \ + memmove \ + memrand \ + memset \ + mktime \ + mprotect \ + mmap \ + msync \ + munmap \ + mutex \ + nop \ + open \ + pipe \ + poll \ + pread \ + pthread_create \ + pwrite \ + read \ + realpath \ + recurse \ + select \ + semop \ + setcontext \ + setsockopt \ + sigaction \ + siglongjmp \ + signal \ + sigprocmask \ + socket \ + socketpair \ + stat \ + strcasecmp \ + strchr \ + strcmp \ + strcpy \ + strftime \ + strlen \ + strtol \ + system \ + time \ + times \ + write \ + writev + +ALL_APPLE = \ + create_file \ + getppid \ + lb_mmtest \ + lm_null_call \ + lmbench_bw_file_rd \ + lmbench_bw_mem \ + lmbench_bw_mmap_rd \ + lmbench_bw_unix \ + lmbench_fstat \ + lmbench_lat_sig_catch \ + lmbench_lat_sig_install \ + lmbench_lat_sig_prot \ + lmbench_lat_sig_send \ + lmbench_openclose \ + lmbench_read \ + lmbench_select_file \ + lmbench_select_tcp \ + lmbench_stat \ + lmbench_write \ + posix_spawn \ + trivial \ + vm_allocate + + diff --git a/tools/tests/libMicro/Makefile.com b/tools/tests/libMicro/Makefile.com new file mode 100644 index 000000000..5cb92a10e --- /dev/null +++ b/tools/tests/libMicro/Makefile.com @@ -0,0 +1,128 @@ +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms +# of the Common Development and Distribution License +# (the "License"). You may not use this file except +# in compliance with the License. +# +# You can obtain a copy of the license at +# src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing +# permissions and limitations under the License. +# +# When distributing Covered Code, include this CDDL +# HEADER in each file and include the License file at +# usr/src/OPENSOLARIS.LICENSE. If applicable, +# add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your +# own identifying information: Portions Copyright [yyyy] +# [name of copyright owner] +# +# CDDL HEADER END +# + +# +# Copyright 2005 Sun Microsystems, Inc. All rights reserved. +# Use is subject to license terms. +# + +include ../Makefile.benchmarks + +EXTRA_CFILES= \ + exec_bin.c \ + elided.c \ + tattle.c + +# +# some definitions to make getting compiler versions possible - avoid quotes +# +COMPILER_VERSION_CMD_cc=cc -V 2>&1 | egrep Sun +COMPILER_VERSION_CMD_gcc=gcc -dumpversion +COMPILER_VERSION_CMD=$(COMPILER_VERSION_CMD_$(CC)) + +default: $(ALL) tattle + +cstyle: + for file in $(ALL:%=../%.c) $(EXTRA_CFILES:%=../%) ; \ + do cstyle -p $$file ;\ + done + + +lint: libmicro.ln $(ALL:%=%.lint) $(EXTRA_CFILES:%.c=%.lint) + + +$(EXTRA_CFILES:%.c=%.lint): + $(LINT) ../$(@:%.lint=%.c) -I. -mu -lc libmicro.ln -lm + +%.lint: ../%.c libmicro.ln + $(LINT) -mu $(CPPFLAGS) $< libmicro.ln -lpthread -lsocket -lnsl -lm + +%.o: ../%.c + $(CC) -c $(CFLAGS) $(CPPFLAGS) $< -o $@ + +libmicro.ln: ../libmicro.c ../libmicro_main.c ../libmicro.h ../benchmark_*.c + $(LINT) -muc $(CPPFLAGS) ../libmicro.c ../libmicro_main.c ../benchmark_*.c + +CPPFLAGS+= -D_REENTRANT + +bind_EXTRA_LIBS=$(NSLLIB) $(SOCKLIB) +cascade_flock_EXTRA_LIBS=$(UCBLIB) +close_tcp_EXTRA_LIBS=$(NSLLIB) $(SOCKLIB) +connection_EXTRA_LIBS=$(NSLLIB) $(SOCKLIB) +fcntl_ndelay_EXTRA_LIBS=$(SOCKLIB) +getpeername_EXTRA_LIBS=$(NSLLIB) $(SOCKLIB) +getsockname_EXTRA_LIBS=$(NSLLIB) $(SOCKLIB) +listen_EXTRA_LIBS=$(NSLLIB) $(SOCKLIB) +log_EXTRA_LIBS=$(MATHLIB) +pipe_EXTRA_LIBS=$(NSLLIB) $(SOCKLIB) +poll_EXTRA_LIBS=$(SOCKLIB) +select_EXTRA_LIBS=$(SOCKLIB) +setsockopt_EXTRA_LIBS=$(NSLLIB) $(SOCKLIB) +socket_EXTRA_LIBS=$(SOCKLIB) +socketpair_EXTRA_LIBS=$(SOCKLIB) + +BENCHMARK_FUNCS= \ + benchmark_init.o \ + benchmark_fini.o \ + benchmark_initrun.o \ + benchmark_finirun.o \ + benchmark_initbatch.o \ + benchmark_finibatch.o \ + benchmark_initworker.o \ + benchmark_finiworker.o \ + benchmark_optswitch.o \ + benchmark_result.o + +recurse_EXTRA_DEPS=recurse2.o + + +recurse: $(recurse_EXTRA_DEPS) + +libmicro.a: libmicro.o libmicro_main.o $(BENCHMARK_FUNCS) + $(AR) -cr libmicro.a libmicro.o libmicro_main.o $(BENCHMARK_FUNCS) + +tattle: ../tattle.c libmicro.a + echo "char * compiler_version = \""`$(COMPILER_VERSION_CMD)`"\";" > tattle.h + echo "char * CC = \""$(CC)"\";" >> tattle.h + echo "char * extra_compiler_flags = \""$(extra_CFLAGS)"\";" >> tattle.h + $(CC) -o tattle $(CFLAGS) -I. ../tattle.c libmicro.a -lrt -lm + cp tattle ../tattle + +$(ELIDED_BENCHMARKS): ../elided.c + $(CC) -o $(@) ../elided.c + +%: libmicro.a %.o + $(CC) -o $(@) $(@).o $($(@)_EXTRA_DEPS) $(CFLAGS) libmicro.a $($(@)_EXTRA_LIBS) $(EXTRA_LIBS) -lpthread -lm + +exec: exec_bin + +exec_bin: exec_bin.o + $(CC) -o exec_bin $(CFLAGS) exec_bin.o + +FORCE: + + +._KEEP_STATE: + diff --git a/tools/tests/libMicro/Makefile.com.Darwin b/tools/tests/libMicro/Makefile.com.Darwin new file mode 100644 index 000000000..eb942dbe2 --- /dev/null +++ b/tools/tests/libMicro/Makefile.com.Darwin @@ -0,0 +1,142 @@ +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms +# of the Common Development and Distribution License +# (the "License"). You may not use this file except +# in compliance with the License. +# +# You can obtain a copy of the license at +# src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing +# permissions and limitations under the License. +# +# When distributing Covered Code, include this CDDL +# HEADER in each file and include the License file at +# usr/src/OPENSOLARIS.LICENSE. If applicable, +# add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your +# own identifying information: Portions Copyright [yyyy] +# [name of copyright owner] +# +# CDDL HEADER END +# + +# +# Copyright 2005 Sun Microsystems, Inc. All rights reserved. +# Use is subject to license terms. +# +# ident "@(#)Makefile.com.Darwin 1.10 05/08/04 SMI" +# + +include ../Makefile.benchmarks + +EXTRA_CFILES= \ + exec_bin.c \ + elided.c \ + tattle.c + +# +# some definitions to make getting compiler versions possible - avoid quotes +# +COMPILER_VERSION_CMD_cc=cc -V 2>&1 | egrep Sun +COMPILER_VERSION_CMD_gcc=gcc -dumpversion +COMPILER_VERSION_CMD=$(COMPILER_VERSION_CMD_$(CC)) + +default: $(ALL) subdirs tattle + +cstyle: + for file in $(ALL:%=../%.c) $(EXTRA_CFILES:%=../%) ; \ + do cstyle -p $$file ;\ + done + + +lint: libmicro.ln $(ALL:%=%.lint) $(EXTRA_CFILES:%.c=%.lint) + + +$(EXTRA_CFILES:%.c=%.lint): + $(LINT) ../$(@:%.lint=%.c) -I. -mu -lc libmicro.ln -lm + +%.lint: ../%.c libmicro.ln + $(LINT) -mu $(CPPFLAGS) $< libmicro.ln -lpthread -lsocket -lnsl -lm + +%.o: ../%.c + $(CC) -c $(CFLAGS) $(CPPFLAGS) $< -o $@ + +libmicro.ln: ../libmicro.c ../libmicro_main.c ../libmicro.h ../benchmark_*.c + $(LINT) -muc $(CPPFLAGS) ../libmicro.c ../libmicro_main.c ../benchmark_*.c + +CPPFLAGS+= -D_REENTRANT + +bind_EXTRA_LIBS=$(NSLLIB) $(SOCKLIB) +cascade_flock_EXTRA_LIBS=$(UCBLIB) +close_tcp_EXTRA_LIBS=$(NSLLIB) $(SOCKLIB) +connection_EXTRA_LIBS=$(NSLLIB) $(SOCKLIB) +fcntl_ndelay_EXTRA_LIBS=$(SOCKLIB) +getpeername_EXTRA_LIBS=$(NSLLIB) $(SOCKLIB) +getsockname_EXTRA_LIBS=$(NSLLIB) $(SOCKLIB) +listen_EXTRA_LIBS=$(NSLLIB) $(SOCKLIB) +log_EXTRA_LIBS=$(MATHLIB) +pipe_EXTRA_LIBS=$(NSLLIB) $(SOCKLIB) +poll_EXTRA_LIBS=$(SOCKLIB) +select_EXTRA_LIBS=$(SOCKLIB) +setsockopt_EXTRA_LIBS=$(NSLLIB) $(SOCKLIB) +socket_EXTRA_LIBS=$(SOCKLIB) +socketpair_EXTRA_LIBS=$(SOCKLIB) + +BENCHMARK_FUNCS= \ + benchmark_init.o \ + benchmark_fini.o \ + benchmark_initrun.o \ + benchmark_finirun.o \ + benchmark_initbatch.o \ + benchmark_finibatch.o \ + benchmark_initworker.o \ + benchmark_finiworker.o \ + benchmark_optswitch.o \ + benchmark_result.o + +recurse_EXTRA_DEPS=recurse2.o + + +recurse: $(recurse_EXTRA_DEPS) + +libmicro.a: libmicro.o libmicro_main.o $(BENCHMARK_FUNCS) + $(AR) -cr libmicro.a libmicro.o libmicro_main.o $(BENCHMARK_FUNCS) + ranlib libmicro.a + +tattle: ../tattle.c libmicro.a + echo "char * compiler_version = \""`$(COMPILER_VERSION_CMD)`"\";" > tattle.h + echo "char * CC = \""$(CC)"\";" >> tattle.h + echo "char * extra_compiler_flags = \""$(extra_CFLAGS)"\";" >> tattle.h + $(CC) -o tattle $(CFLAGS) -I. ../tattle.c libmicro.a -lSystem -lm + cp tattle ../tattle + +$(ELIDED_BENCHMARKS): ../elided.c + $(CC) $(CFLAGS) -o $(@) ../elided.c + +%: libmicro.a %.o + $(CC) -o $(@) $(@).o $($(@)_EXTRA_DEPS) $(CFLAGS) libmicro.a $($(@)_EXTRA_LIBS) $(EXTRA_LIBS) -lpthread -lm + +exec: exec_bin + +exec_bin: exec_bin.o + $(CC) -o exec_bin $(CFLAGS) exec_bin.o + +# for apple added tests + +.PHONY: subdirs $(SUBDIRS) + +subdirs: $(SUBDIRS) + +$(SUBDIRS): + cd ..; $(MAKE) -C $@ ARCH=$(ARCH) + + + +FORCE: + + +._KEEP_STATE: + diff --git a/tools/tests/libMicro/OPENSOLARIS.LICENSE b/tools/tests/libMicro/OPENSOLARIS.LICENSE new file mode 100644 index 000000000..535dec222 --- /dev/null +++ b/tools/tests/libMicro/OPENSOLARIS.LICENSE @@ -0,0 +1,385 @@ +Unless otherwise noted, all files in this distribution are released +under the Common Development and Distribution License (CDDL), +Version 1.0 only. Exceptions are noted within the associated +source files. + +-------------------------------------------------------------------- + + +COMMON DEVELOPMENT AND DISTRIBUTION LICENSE Version 1.0 + +1. Definitions. + + 1.1. "Contributor" means each individual or entity that creates + or contributes to the creation of Modifications. + + 1.2. "Contributor Version" means the combination of the Original + Software, prior Modifications used by a Contributor (if any), + and the Modifications made by that particular Contributor. + + 1.3. "Covered Software" means (a) the Original Software, or (b) + Modifications, or (c) the combination of files containing + Original Software with files containing Modifications, in + each case including portions thereof. + + 1.4. "Executable" means the Covered Software in any form other + than Source Code. + + 1.5. "Initial Developer" means the individual or entity that first + makes Original Software available under this License. + + 1.6. "Larger Work" means a work which combines Covered Software or + portions thereof with code not governed by the terms of this + License. + + 1.7. "License" means this document. + + 1.8. "Licensable" means having the right to grant, to the maximum + extent possible, whether at the time of the initial grant or + subsequently acquired, any and all of the rights conveyed + herein. + + 1.9. "Modifications" means the Source Code and Executable form of + any of the following: + + A. Any file that results from an addition to, deletion from or + modification of the contents of a file containing Original + Software or previous Modifications; + + B. Any new file that contains any part of the Original + Software or previous Modifications; or + + C. Any new file that is contributed or otherwise made + available under the terms of this License. + + 1.10. "Original Software" means the Source Code and Executable + form of computer software code that is originally released + under this License. + + 1.11. "Patent Claims" means any patent claim(s), now owned or + hereafter acquired, including without limitation, method, + process, and apparatus claims, in any patent Licensable by + grantor. + + 1.12. "Source Code" means (a) the common form of computer software + code in which modifications are made and (b) associated + documentation included in or with such code. + + 1.13. "You" (or "Your") means an individual or a legal entity + exercising rights under, and complying with all of the terms + of, this License. For legal entities, "You" includes any + entity which controls, is controlled by, or is under common + control with You. For purposes of this definition, + "control" means (a) the power, direct or indirect, to cause + the direction or management of such entity, whether by + contract or otherwise, or (b) ownership of more than fifty + percent (50%) of the outstanding shares or beneficial + ownership of such entity. + +2. License Grants. + + 2.1. The Initial Developer Grant. + + Conditioned upon Your compliance with Section 3.1 below and + subject to third party intellectual property claims, the Initial + Developer hereby grants You a world-wide, royalty-free, + non-exclusive license: + + (a) under intellectual property rights (other than patent or + trademark) Licensable by Initial Developer, to use, + reproduce, modify, display, perform, sublicense and + distribute the Original Software (or portions thereof), + with or without Modifications, and/or as part of a Larger + Work; and + + (b) under Patent Claims infringed by the making, using or + selling of Original Software, to make, have made, use, + practice, sell, and offer for sale, and/or otherwise + dispose of the Original Software (or portions thereof). + + (c) The licenses granted in Sections 2.1(a) and (b) are + effective on the date Initial Developer first distributes + or otherwise makes the Original Software available to a + third party under the terms of this License. + + (d) Notwithstanding Section 2.1(b) above, no patent license is + granted: (1) for code that You delete from the Original + Software, or (2) for infringements caused by: (i) the + modification of the Original Software, or (ii) the + combination of the Original Software with other software + or devices. + + 2.2. Contributor Grant. + + Conditioned upon Your compliance with Section 3.1 below and + subject to third party intellectual property claims, each + Contributor hereby grants You a world-wide, royalty-free, + non-exclusive license: + + (a) under intellectual property rights (other than patent or + trademark) Licensable by Contributor to use, reproduce, + modify, display, perform, sublicense and distribute the + Modifications created by such Contributor (or portions + thereof), either on an unmodified basis, with other + Modifications, as Covered Software and/or as part of a + Larger Work; and + + (b) under Patent Claims infringed by the making, using, or + selling of Modifications made by that Contributor either + alone and/or in combination with its Contributor Version + (or portions of such combination), to make, use, sell, + offer for sale, have made, and/or otherwise dispose of: + (1) Modifications made by that Contributor (or portions + thereof); and (2) the combination of Modifications made by + that Contributor with its Contributor Version (or portions + of such combination). + + (c) The licenses granted in Sections 2.2(a) and 2.2(b) are + effective on the date Contributor first distributes or + otherwise makes the Modifications available to a third + party. + + (d) Notwithstanding Section 2.2(b) above, no patent license is + granted: (1) for any code that Contributor has deleted + from the Contributor Version; (2) for infringements caused + by: (i) third party modifications of Contributor Version, + or (ii) the combination of Modifications made by that + Contributor with other software (except as part of the + Contributor Version) or other devices; or (3) under Patent + Claims infringed by Covered Software in the absence of + Modifications made by that Contributor. + +3. Distribution Obligations. + + 3.1. Availability of Source Code. + + Any Covered Software that You distribute or otherwise make + available in Executable form must also be made available in Source + Code form and that Source Code form must be distributed only under + the terms of this License. You must include a copy of this + License with every copy of the Source Code form of the Covered + Software You distribute or otherwise make available. You must + inform recipients of any such Covered Software in Executable form + as to how they can obtain such Covered Software in Source Code + form in a reasonable manner on or through a medium customarily + used for software exchange. + + 3.2. Modifications. + + The Modifications that You create or to which You contribute are + governed by the terms of this License. You represent that You + believe Your Modifications are Your original creation(s) and/or + You have sufficient rights to grant the rights conveyed by this + License. + + 3.3. Required Notices. + + You must include a notice in each of Your Modifications that + identifies You as the Contributor of the Modification. You may + not remove or alter any copyright, patent or trademark notices + contained within the Covered Software, or any notices of licensing + or any descriptive text giving attribution to any Contributor or + the Initial Developer. + + 3.4. Application of Additional Terms. + + You may not offer or impose any terms on any Covered Software in + Source Code form that alters or restricts the applicable version + of this License or the recipients' rights hereunder. You may + choose to offer, and to charge a fee for, warranty, support, + indemnity or liability obligations to one or more recipients of + Covered Software. However, you may do so only on Your own behalf, + and not on behalf of the Initial Developer or any Contributor. + You must make it absolutely clear that any such warranty, support, + indemnity or liability obligation is offered by You alone, and You + hereby agree to indemnify the Initial Developer and every + Contributor for any liability incurred by the Initial Developer or + such Contributor as a result of warranty, support, indemnity or + liability terms You offer. + + 3.5. Distribution of Executable Versions. + + You may distribute the Executable form of the Covered Software + under the terms of this License or under the terms of a license of + Your choice, which may contain terms different from this License, + provided that You are in compliance with the terms of this License + and that the license for the Executable form does not attempt to + limit or alter the recipient's rights in the Source Code form from + the rights set forth in this License. If You distribute the + Covered Software in Executable form under a different license, You + must make it absolutely clear that any terms which differ from + this License are offered by You alone, not by the Initial + Developer or Contributor. You hereby agree to indemnify the + Initial Developer and every Contributor for any liability incurred + by the Initial Developer or such Contributor as a result of any + such terms You offer. + + 3.6. Larger Works. + + You may create a Larger Work by combining Covered Software with + other code not governed by the terms of this License and + distribute the Larger Work as a single product. In such a case, + You must make sure the requirements of this License are fulfilled + for the Covered Software. + +4. Versions of the License. + + 4.1. New Versions. + + Sun Microsystems, Inc. is the initial license steward and may + publish revised and/or new versions of this License from time to + time. Each version will be given a distinguishing version number. + Except as provided in Section 4.3, no one other than the license + steward has the right to modify this License. + + 4.2. Effect of New Versions. + + You may always continue to use, distribute or otherwise make the + Covered Software available under the terms of the version of the + License under which You originally received the Covered Software. + If the Initial Developer includes a notice in the Original + Software prohibiting it from being distributed or otherwise made + available under any subsequent version of the License, You must + distribute and make the Covered Software available under the terms + of the version of the License under which You originally received + the Covered Software. Otherwise, You may also choose to use, + distribute or otherwise make the Covered Software available under + the terms of any subsequent version of the License published by + the license steward. + + 4.3. Modified Versions. + + When You are an Initial Developer and You want to create a new + license for Your Original Software, You may create and use a + modified version of this License if You: (a) rename the license + and remove any references to the name of the license steward + (except to note that the license differs from this License); and + (b) otherwise make it clear that the license contains terms which + differ from this License. + +5. DISCLAIMER OF WARRANTY. + + COVERED SOFTWARE IS PROVIDED UNDER THIS LICENSE ON AN "AS IS" + BASIS, WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED OR IMPLIED, + INCLUDING, WITHOUT LIMITATION, WARRANTIES THAT THE COVERED + SOFTWARE IS FREE OF DEFECTS, MERCHANTABLE, FIT FOR A PARTICULAR + PURPOSE OR NON-INFRINGING. THE ENTIRE RISK AS TO THE QUALITY AND + PERFORMANCE OF THE COVERED SOFTWARE IS WITH YOU. SHOULD ANY + COVERED SOFTWARE PROVE DEFECTIVE IN ANY RESPECT, YOU (NOT THE + INITIAL DEVELOPER OR ANY OTHER CONTRIBUTOR) ASSUME THE COST OF ANY + NECESSARY SERVICING, REPAIR OR CORRECTION. THIS DISCLAIMER OF + WARRANTY CONSTITUTES AN ESSENTIAL PART OF THIS LICENSE. NO USE OF + ANY COVERED SOFTWARE IS AUTHORIZED HEREUNDER EXCEPT UNDER THIS + DISCLAIMER. + +6. TERMINATION. + + 6.1. This License and the rights granted hereunder will terminate + automatically if You fail to comply with terms herein and fail to + cure such breach within 30 days of becoming aware of the breach. + Provisions which, by their nature, must remain in effect beyond + the termination of this License shall survive. + + 6.2. If You assert a patent infringement claim (excluding + declaratory judgment actions) against Initial Developer or a + Contributor (the Initial Developer or Contributor against whom You + assert such claim is referred to as "Participant") alleging that + the Participant Software (meaning the Contributor Version where + the Participant is a Contributor or the Original Software where + the Participant is the Initial Developer) directly or indirectly + infringes any patent, then any and all rights granted directly or + indirectly to You by such Participant, the Initial Developer (if + the Initial Developer is not the Participant) and all Contributors + under Sections 2.1 and/or 2.2 of this License shall, upon 60 days + notice from Participant terminate prospectively and automatically + at the expiration of such 60 day notice period, unless if within + such 60 day period You withdraw Your claim with respect to the + Participant Software against such Participant either unilaterally + or pursuant to a written agreement with Participant. + + 6.3. In the event of termination under Sections 6.1 or 6.2 above, + all end user licenses that have been validly granted by You or any + distributor hereunder prior to termination (excluding licenses + granted to You by any distributor) shall survive termination. + +7. LIMITATION OF LIABILITY. + + UNDER NO CIRCUMSTANCES AND UNDER NO LEGAL THEORY, WHETHER TORT + (INCLUDING NEGLIGENCE), CONTRACT, OR OTHERWISE, SHALL YOU, THE + INITIAL DEVELOPER, ANY OTHER CONTRIBUTOR, OR ANY DISTRIBUTOR OF + COVERED SOFTWARE, OR ANY SUPPLIER OF ANY OF SUCH PARTIES, BE + LIABLE TO ANY PERSON FOR ANY INDIRECT, SPECIAL, INCIDENTAL, OR + CONSEQUENTIAL DAMAGES OF ANY CHARACTER INCLUDING, WITHOUT + LIMITATION, DAMAGES FOR LOST PROFITS, LOSS OF GOODWILL, WORK + STOPPAGE, COMPUTER FAILURE OR MALFUNCTION, OR ANY AND ALL OTHER + COMMERCIAL DAMAGES OR LOSSES, EVEN IF SUCH PARTY SHALL HAVE BEEN + INFORMED OF THE POSSIBILITY OF SUCH DAMAGES. THIS LIMITATION OF + LIABILITY SHALL NOT APPLY TO LIABILITY FOR DEATH OR PERSONAL + INJURY RESULTING FROM SUCH PARTY'S NEGLIGENCE TO THE EXTENT + APPLICABLE LAW PROHIBITS SUCH LIMITATION. SOME JURISDICTIONS DO + NOT ALLOW THE EXCLUSION OR LIMITATION OF INCIDENTAL OR + CONSEQUENTIAL DAMAGES, SO THIS EXCLUSION AND LIMITATION MAY NOT + APPLY TO YOU. + +8. U.S. GOVERNMENT END USERS. + + The Covered Software is a "commercial item," as that term is + defined in 48 C.F.R. 2.101 (Oct. 1995), consisting of "commercial + computer software" (as that term is defined at 48 + C.F.R. 252.227-7014(a)(1)) and "commercial computer software + documentation" as such terms are used in 48 C.F.R. 12.212 + (Sept. 1995). Consistent with 48 C.F.R. 12.212 and 48 + C.F.R. 227.7202-1 through 227.7202-4 (June 1995), all + U.S. Government End Users acquire Covered Software with only those + rights set forth herein. This U.S. Government Rights clause is in + lieu of, and supersedes, any other FAR, DFAR, or other clause or + provision that addresses Government rights in computer software + under this License. + +9. MISCELLANEOUS. + + This License represents the complete agreement concerning subject + matter hereof. If any provision of this License is held to be + unenforceable, such provision shall be reformed only to the extent + necessary to make it enforceable. This License shall be governed + by the law of the jurisdiction specified in a notice contained + within the Original Software (except to the extent applicable law, + if any, provides otherwise), excluding such jurisdiction's + conflict-of-law provisions. Any litigation relating to this + License shall be subject to the jurisdiction of the courts located + in the jurisdiction and venue specified in a notice contained + within the Original Software, with the losing party responsible + for costs, including, without limitation, court costs and + reasonable attorneys' fees and expenses. The application of the + United Nations Convention on Contracts for the International Sale + of Goods is expressly excluded. Any law or regulation which + provides that the language of a contract shall be construed + against the drafter shall not apply to this License. You agree + that You alone are responsible for compliance with the United + States export administration regulations (and the export control + laws and regulation of any other countries) when You use, + distribute or otherwise make available any Covered Software. + +10. RESPONSIBILITY FOR CLAIMS. + + As between Initial Developer and the Contributors, each party is + responsible for claims and damages arising, directly or + indirectly, out of its utilization of rights under this License + and You agree to work with Initial Developer and Contributors to + distribute such responsibility on an equitable basis. Nothing + herein is intended or shall be deemed to constitute any admission + of liability. + +-------------------------------------------------------------------- + +NOTICE PURSUANT TO SECTION 9 OF THE COMMON DEVELOPMENT AND +DISTRIBUTION LICENSE (CDDL) + +For Covered Software in this distribution, this License shall +be governed by the laws of the State of California (excluding +conflict-of-law provisions). + +Any litigation relating to this License shall be subject to the +jurisdiction of the Federal Courts of the Northern District of +California and the state courts of the State of California, with +venue lying in Santa Clara County, California. diff --git a/tools/tests/libMicro/README b/tools/tests/libMicro/README new file mode 100644 index 000000000..a4374cb1a --- /dev/null +++ b/tools/tests/libMicro/README @@ -0,0 +1,114 @@ +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms +# of the Common Development and Distribution License +# (the "License"). You may not use this file except +# in compliance with the License. +# +# You can obtain a copy of the license at +# src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing +# permissions and limitations under the License. +# +# When distributing Covered Code, include this CDDL +# HEADER in each file and include the License file at +# usr/src/OPENSOLARIS.LICENSE. If applicable, +# add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your +# own identifying information: Portions Copyright [yyyy] +# [name of copyright owner] +# +# CDDL HEADER END +# + +# +# Copyright 2005 Sun Microsystems, Inc. All rights reserved. +# Use is subject to license terms. +# + +Building the tarball +-------------------- +As long as cc is in your path, (gcc on Linux), + +% tar xf libMicro.tar +% make + +will build the benchmark suite. + +Running the benchmarks +----------------------- + +A set of generic scripts to invoke each micro benchmark +are created in the bin directory; these may be invoked +directly. Note that the actual binaries are created in +OS-specific directories; this allows one to build for +all varients (x86/sparc/Solaris/Linux) in one place. + +To collect a complete set of benchmarks, use the bench +script and redirect its output to a file. + +% ./bench > output + +To compare the output of two or more runs, use multiview in the src +directory: + +% ./multiview reference compare1 compare2 compare2 > compare.html +% + +where the reference and compare files contain the output of different +libmicro runs. + +The compare.html file will allow quick comparisons to be drawn, +allowing a variety of experiments to be quickly analyzed. + +All benchmarks support the following options: + + [-1] (single process; overrides -P > 1) + [-A] (align with clock) + [-B batch-size (default 10)] + [-C minimum number of samples (default 0)] + [-D duration in msecs (default 10s)] + [-E (echo name to stderr)] + [-H] (suppress headers) + [-I] specify approx. time per op in nsecs + [-L] (print argument line) + [-M] (reports mean rather than median) + [-N test-name ] + [-P processes (default 1)] + [-S] (print detailed stats) + [-T threads (default 1)] + [-V] (print the libMicro version and exit) + [-W] (flag possible benchmark problems) + + +Apple-added Benchmarks +----------------------- + + create_file + geekbench_stdlib_write + getppid + lb_mmtest + lm_null_call + lmbench_bw_file_rd + lmbench_bw_mem + lmbench_bw_mmap_rd + lmbench_bw_unix + lmbench_fstat + lmbench_lat_ctx + lmbench_lat_sig_catch + lmbench_lat_sig_install + lmbench_lat_sig_prot + lmbench_lat_sig_send + lmbench_openclose + lmbench_read + lmbench_select_file + lmbench_select_tcp + lmbench_stat + lmbench_write + trivial + vm_allocate + +Also, please read AppleReadMe for further information. + diff --git a/tools/tests/libMicro/apple/Makefile b/tools/tests/libMicro/apple/Makefile new file mode 100644 index 000000000..fa9ce7009 --- /dev/null +++ b/tools/tests/libMicro/apple/Makefile @@ -0,0 +1,49 @@ +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms +# of the Common Development and Distribution License +# (the "License"). You may not use this file except +# in compliance with the License. +# +# You can obtain a copy of the license at +# src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing +# permissions and limitations under the License. +# +# When distributing Covered Code, include this CDDL +# HEADER in each file and include the License file at +# usr/src/OPENSOLARIS.LICENSE. If applicable, +# add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your +# own identifying information: Portions Copyright [yyyy] +# [name of copyright owner] +# +# CDDL HEADER END +# + +# +# Copyright 2005 Sun Microsystems, Inc. All rights reserved. +# Use is subject to license terms. +# + + +include Makefile.benchmarks + +ARCH= i386 + +BINS= $(ALL:%=../bin-$(ARCH)/%) + +default $(ALL): $(BINS) + @mkdir -p bin-$(ARCH); cd bin-$(ARCH); MACH=$(ARCH) $(MAKE) -f ../Makefile.`uname -s` ARCH=$(ARCH) UNAME_RELEASE=`uname -r | sed 's/\./_/g'` $@ + +clean: + rm -rf bin bin-* + +bin: + @mkdir -p ../bin-$(ARCH) + +$(BINS): bin + + diff --git a/tools/tests/libMicro/apple/Makefile.Darwin b/tools/tests/libMicro/apple/Makefile.Darwin new file mode 100644 index 000000000..3ca3607ae --- /dev/null +++ b/tools/tests/libMicro/apple/Makefile.Darwin @@ -0,0 +1,66 @@ +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms +# of the Common Development and Distribution License +# (the "License"). You may not use this file except +# in compliance with the License. +# +# You can obtain a copy of the license at +# src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing +# permissions and limitations under the License. +# +# When distributing Covered Code, include this CDDL +# HEADER in each file and include the License file at +# usr/src/OPENSOLARIS.LICENSE. If applicable, +# add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your +# own identifying information: Portions Copyright [yyyy] +# [name of copyright owner] +# +# CDDL HEADER END +# + +# +# Copyright 2005 Sun Microsystems, Inc. All rights reserved. +# Use is subject to license terms. +# +# ident "@(#)Makefile.Linux 1.5 05/08/04 SMI" +# + + +CC= gcc +#NOPIC= -mdynamic-no-pic +ARCH= i386 + +ifeq "$(strip $(ARCH))" "fat" +ARCH_FLAG= -arch i386 -arch ppc -arch x86_64 +else +ARCH_FLAG= -arch $(ARCH) +endif + +OPT_FLAG= -g +SEMOP_FLAG= -DUSE_SEMOP + +### +###CFLAGS= -Os -DUSE_SEMOP -fno-builtin $(NOPIC) $(ARCH_FLAG) -Wall +###extra_CFLAGS= -Os -DUSE_SEMOP -fno-builtin $(NOPIC) $(ARCH_FLAG) -Wall +### +CFLAGS= $(OPT_FLAG) $(SEMOP_FLAG) -fno-builtin $(NOPIC) $(ARCH_FLAG) -Wall +extra_CFLAGS= $(OPT_FLAG) $(SEMOP_FLAG) -fno-builtin $(NOPIC) $(ARCH_FLAG) -Wall +CPPFLAGS= $(SEMOP_FLAG) -D_REENTRANT -Wall +MATHLIB= -lm + +ELIDED_BENCHMARKS= \ + cachetocache \ + atomic \ + getcontext \ + setcontext \ + fork \ + exit \ + connection + + +include ../Makefile.com.Darwin diff --git a/tools/tests/libMicro/apple/Makefile.benchmarks b/tools/tests/libMicro/apple/Makefile.benchmarks new file mode 100644 index 000000000..0e3cfe2ac --- /dev/null +++ b/tools/tests/libMicro/apple/Makefile.benchmarks @@ -0,0 +1,58 @@ +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms +# of the Common Development and Distribution License +# (the "License"). You may not use this file except +# in compliance with the License. +# +# You can obtain a copy of the license at +# src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing +# permissions and limitations under the License. +# +# When distributing Covered Code, include this CDDL +# HEADER in each file and include the License file at +# usr/src/OPENSOLARIS.LICENSE. If applicable, +# add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your +# own identifying information: Portions Copyright [yyyy] +# [name of copyright owner] +# +# CDDL HEADER END +# + +# +# Copyright 2005 Sun Microsystems, Inc. All rights reserved. +# Use is subject to license terms. +# + + +ALL = \ + create_file \ + geekbench_stdlib_write \ + getppid \ + lb_mmtest \ + lm_null_call \ + lmbench_bw_file_rd \ + lmbench_bw_mem \ + lmbench_bw_mmap_rd \ + lmbench_bw_unix \ + lmbench_fstat \ + lmbench_lat_ctx \ + lmbench_lat_sig_catch \ + lmbench_lat_sig_install \ + lmbench_lat_sig_prot \ + lmbench_lat_sig_send \ + lmbench_openclose \ + lmbench_read \ + lmbench_select_file \ + lmbench_select_tcp \ + lmbench_stat \ + lmbench_write \ + posix_spawn \ + trivial \ + vm_allocate + + diff --git a/tools/tests/libMicro/apple/Makefile.com.Darwin b/tools/tests/libMicro/apple/Makefile.com.Darwin new file mode 100644 index 000000000..121473735 --- /dev/null +++ b/tools/tests/libMicro/apple/Makefile.com.Darwin @@ -0,0 +1,58 @@ +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms +# of the Common Development and Distribution License +# (the "License"). You may not use this file except +# in compliance with the License. +# +# You can obtain a copy of the license at +# src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing +# permissions and limitations under the License. +# +# When distributing Covered Code, include this CDDL +# HEADER in each file and include the License file at +# usr/src/OPENSOLARIS.LICENSE. If applicable, +# add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your +# own identifying information: Portions Copyright [yyyy] +# [name of copyright owner] +# +# CDDL HEADER END +# + +# +# Copyright 2005 Sun Microsystems, Inc. All rights reserved. +# Use is subject to license terms. +# +# ident "@(#)Makefile.com.Darwin 1.10 05/08/04 SMI" +# + +include ../Makefile.benchmarks + +EXTRA_CFILES= \ + exec_bin.c \ + elided.c \ + tattle.c + +# +# some definitions to make getting compiler versions possible - avoid quotes +# +COMPILER_VERSION_CMD_cc=cc -V 2>&1 | egrep Sun +COMPILER_VERSION_CMD_gcc=gcc -dumpversion +COMPILER_VERSION_CMD=$(COMPILER_VERSION_CMD_$(CC)) + +default: $(ALL) + +%.o: ../%.c + $(CC) -c $(CFLAGS) $(CPPFLAGS) $< -o $@ + +%: %.o + $(CC) -o $(@) $(@).o $($(@)_EXTRA_DEPS) $(CFLAGS) ../../bin-$(ARCH)/libmicro.a $($(@)_EXTRA_LIBS) $(EXTRA_LIBS) -lpthread -lm; cp $@ ../../bin-$(ARCH)/ + +posix_spawn: posix_spawn_bin + +posix_spawn_bin: posix_spawn_bin.o + $(CC) -o posix_spawn_bin $(CFLAGS) posix_spawn_bin.o diff --git a/tools/tests/libMicro/apple/create_file.c b/tools/tests/libMicro/apple/create_file.c new file mode 100644 index 000000000..f4e5abf34 --- /dev/null +++ b/tools/tests/libMicro/apple/create_file.c @@ -0,0 +1,247 @@ +/* + * Copyright (c) 2006 Apple Inc. All Rights Reserved. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. The rights granted to you under the License + * may not be used to create, or enable the creation or redistribution of, + * unlawful or unlicensed copies of an Apple operating system, or to + * circumvent, violate, or enable the circumvention or violation of, any + * terms of an Apple operating system software license agreement. + * + * Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ + */ + + +/* + * Order of Execution + * + * benchmark_init + * + * benchmark_optswitch + * + * benchmark_initrun + * + * benchmark_initworker + * benchmark_initbatch + * benchmark + * benchmark_finibatch + * benchmark_initbatch + * benchmark + * benchmark_finibatch, etc. + * benchmark_finiworker + * + * benchmark_result + * + * benchmark_finirun + * + * benchmark_fini + */ + + + +#ifdef __sun +#pragma ident "@(#)trivial.c 1.0 08/17/06 Apple Inc." +#endif + + + +#include +#include +#include +#include +#include +// add additional headers needed here. +#include + +#include "../libmicro.h" + +#if DEBUG +# define debug(fmt, args...) (void) fprintf(stderr, fmt "\n" , ##args) +#else +# define debug(fmt, args...) +#endif + +#define MAXPATHLEN 1024 +/* + * Your state variables should live in the tsd_t struct below + */ +typedef struct { + int ts_once; +} tsd_t; + +/* + * You can have any lower-case option you want to define. + * options are specified in the lm_optstr as either a + * single lower-case letter, or a single lower case letter + * with a colon after it. In this example, you can optionally + * specify -c {str} -e or -t {number} + * -c takes a string (quote the string if blanks) + * -e is a boolean + * -t takes a numeric + * argument. + */ +static char * optf; // allocated in benchmark_init, freed in benchmark_fini. + + +int +benchmark_init() +{ + debug("benchmark_init\n"); + /* + * the lm_optstr must be defined here or no options for you + * + * ...and the framework will throw an error + * + */ + (void) sprintf(lm_optstr, "f:"); + /* + * tsd_t is the state info struct that we pass around + * + * lm_tsdsize will allocate the space we need for this + * structure throughout the rest of the framework + */ + lm_tsdsize = sizeof (tsd_t); + + (void) sprintf(lm_usage, + " -f filename\n" + "notes: measures file creation using open(2)\n"); + + optf = malloc(MAXPATHLEN); + sprintf(optf, "/tmp/create_file_%d", getpid()); + return (0); +} + +/* + * This is where you parse your lower-case arguments. + * the format was defined in the lm_optstr assignment + * in benchmark_init + */ +int +benchmark_optswitch(int opt, char *optarg) +{ + debug("benchmark_optswitch\n"); + + switch (opt) { + case 'f': + strncpy(optf, optarg, 20); + (void)fprintf(stderr, "optf = %s\n", optf); + break; + default: + return (-1); + } + return (0); +} + +int +benchmark_initrun() +{ + debug("benchmark_initrun\n"); + return (0); +} + +int +benchmark_initworker(void *tsd) +{ + /* + * initialize your state variables here first + */ +// tsd_t *ts = (tsd_t *)tsd; +// debug("benchmark_initworker: ts_once = %i\n",ts->ts_once); + return (0); +} + +/*ARGSUSED*/ +int +benchmark_initbatch(void *tsd) +{ + /* + * initialize your state variables here second + */ + tsd_t *ts = (tsd_t *)tsd; + // useless code to show what you can do. + ts->ts_once++; + ts->ts_once--; + debug("benchmark_initbatch: ts_once = %i\n",ts->ts_once); + return (0); +} + +int +benchmark(void *tsd, result_t *res) +{ + /* + * try not to initialize things here. This is the main + * loop of things to get timed. Start a server in + * benchmark_initbatch + */ +// tsd_t *ts = (tsd_t *)tsd; + int i; + + debug("in to benchmark - optB = %i : ts_once = %i\n", lm_optB, ts->ts_once); + for (i = 0; i < lm_optB; i++) { + if (!open(optf, O_CREAT)) + res->re_errors++; + } + res->re_count = i; + debug("out of benchmark - optB = %i : ts_once = %i\n", lm_optB, ts->ts_once); + + return (0); +} + +int +benchmark_finibatch(void *tsd) +{ +// tsd_t *ts = (tsd_t *)tsd; +// debug("benchmark_finibatch: ts_once = %i\n",ts->ts_once); + return (0); +} + +int +benchmark_finiworker(void *tsd) +{ + tsd_t *ts = (tsd_t *)tsd; + // useless code to show what you can do. + ts->ts_once++; + ts->ts_once--; + debug("benchmark_finiworker: ts_once = %i\n",ts->ts_once); + return (0); +} + +char * +benchmark_result() +{ + static char result = '\0'; + debug("benchmark_result\n"); + return (&result); +} + +int +benchmark_finirun() +{ + debug("benchmark_finirun\n"); + return (0); +} + + +int +benchmark_fini() +{ + debug("benchmark_fini\n"); + free(optf); + return (0); +} + diff --git a/tools/tests/libMicro/apple/geekbench_stdlib_write.c b/tools/tests/libMicro/apple/geekbench_stdlib_write.c new file mode 100644 index 000000000..65bcee95e --- /dev/null +++ b/tools/tests/libMicro/apple/geekbench_stdlib_write.c @@ -0,0 +1,223 @@ +/* + * Copyright (c) 2006 Apple Inc. All Rights Reserved. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. The rights granted to you under the License + * may not be used to create, or enable the creation or redistribution of, + * unlawful or unlicensed copies of an Apple operating system, or to + * circumvent, violate, or enable the circumvention or violation of, any + * terms of an Apple operating system software license agreement. + * + * Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ + */ + + +/* + * Order of Execution + * + * benchmark_init + * + * benchmark_optswitch + * + * benchmark_initrun + * + * benchmark_initworker + * benchmark_initbatch + * benchmark + * benchmark_finibatch + * benchmark_initbatch + * benchmark + * benchmark_finibatch, etc. + * benchmark_finiworker + * + * benchmark_result + * + * benchmark_finirun + * + * benchmark_fini + */ + + + +#ifdef __sun +#pragma ident "@(#)geekbench_stdlib_write.c 1.0 08/17/06 Apple Inc." +#endif + + + +#include +#include +#include +#include + +#include "../libmicro.h" + +/* + * Your state variables should live in the tsd_t struct below + */ +typedef struct { + int ts_once; +} tsd_t; + +unsigned char * arena; +unsigned int arenaSize = 1048576; + +static int optt = 1; + +/*ARGSUSED*/ +int +benchmark_initbatch(void *tsd) +{ + /* + * initialize your state variables here second + */ + //tsd_t *ts = (tsd_t *)tsd; + //(void) fprintf(stderr, "benchmark_initbatch: ts_once = %i\n",ts->ts_once); + return (0); +} + +int +benchmark_finirun() +{ + (void) fprintf(stderr, "benchmark_finirun\n"); + return (0); +} + +int +benchmark_init() +{ + (void) fprintf(stderr, "benchmark_init\n"); + /* + * the lm_optstr must be defined here or no options for you + * + * ...and the framework will throw an error + * + */ + (void) sprintf(lm_optstr, "t:"); + /* + * working hypothesis: + * + * tsd_t is the struct that we can pass around our + * state info in + * + * lm_tsdsize will allocate the space we need for this + * structure throughout the rest of the framework + */ + lm_tsdsize = sizeof (tsd_t); + + (void) sprintf(lm_usage, + " [-t int (default 1)]\n" + "notes: measures nothing\n"); + return (0); +} + +int +benchmark_fini() +{ + (void) fprintf(stderr, "benchmark_fini\n"); + return (0); +} + +int +benchmark_finibatch(void *tsd) +{ + tsd_t *ts = (tsd_t *)tsd; + /* + * more proof of state passing + */ + ts->ts_once = optt; + //(void) fprintf(stderr, "benchmark_finibatch: ts_once = %i\n",ts->ts_once); + return (0); +} + +char * +benchmark_result() +{ + static char result = '\0'; + (void) fprintf(stderr, "benchmark_result\n"); + return (&result); +} + +int +benchmark_finiworker(void *tsd) +{ + //tsd_t *ts = (tsd_t *)tsd; + //(void) fprintf(stderr, "benchmark_finiworker: ts_once = %i\n",ts->ts_once); + return (0); +} + +int +benchmark_optswitch(int opt, char *optarg) +{ + (void) fprintf(stderr, "benchmark_optswitch\n"); + + switch (opt) { + case 't': + optt = sizetoint(optarg); + break; + default: + return (-1); + } + return (0); +} + +int +benchmark_initworker(void *tsd) +{ + /* + * initialize your state variables here first + */ + //tsd_t *ts = (tsd_t *)tsd; + //ts->ts_once = optt; + //(void) fprintf(stderr, "benchmark_initworker: ts_once = %i\n",ts->ts_once); + arena = ( unsigned char * )malloc( arenaSize); + return (0); +} + +int +benchmark_initrun() +{ + //(void) fprintf(stderr, "benchmark_initrun\n"); + return (0); +} + +int +benchmark(void *tsd, result_t *res) +{ + /* + * initialize your state variables here last + * + * and realize that you are paying for your initialization here + * and it is really a bad idea + */ + //tsd_t *ts = (tsd_t *)tsd; + int i; + + //(void) fprintf(stderr, "in to benchmark - optB = %i : ts_once = %i\n", lm_optB, ts->ts_once); + for (i = 0; i < lm_optB; i++) { + /* + * just to show that ts really contains state + */ + //(void) fprintf(stderr, "i is %i\n",i); + memset( arena, 0, arenaSize ); + } + res->re_count = i; + //(void) fprintf(stderr, "out of benchmark - optB = %i : ts_once = %i\n", lm_optB, ts->ts_once); + + return (0); +} diff --git a/osfmk/libsa/float.h b/tools/tests/libMicro/apple/getppid.c similarity index 71% rename from osfmk/libsa/float.h rename to tools/tests/libMicro/apple/getppid.c index 1dab99631..ac775eb70 100644 --- a/osfmk/libsa/float.h +++ b/tools/tests/libMicro/apple/getppid.c @@ -1,8 +1,8 @@ /* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2006 Apple Inc. All Rights Reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ - * + * * This file contains Original Code and/or Modifications of Original Code * as defined in and that are subject to the Apple Public Source License * Version 2.0 (the 'License'). You may not use this file except in @@ -11,10 +11,10 @@ * unlawful or unlicensed copies of an Apple operating system, or to * circumvent, violate, or enable the circumvention or violation of, any * terms of an Apple operating system software license agreement. - * + * * Please obtain a copy of the License at * http://www.opensource.apple.com/apsl/ and read it before using this file. - * + * * The Original Code and all software distributed under the License are * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, @@ -22,30 +22,46 @@ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. * Please see the License for the specific language governing rights and * limitations under the License. - * + * * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ */ + + +#ifdef __sun +#pragma ident "@(#)getppid.c 1.0 06/20/06 Apple Inc." +#endif + /* - * @OSF_COPYRIGHT@ - * - */ -/* - * HISTORY - * - * Revision 1.1.1.1 1998/09/22 21:05:51 wsanchez - * Import of Mac OS X kernel (~semeria) - * - * Revision 1.1.1.1 1998/03/07 02:25:35 wsanchez - * Import of OSF Mach kernel (~mburg) - * - * Revision 1.1.2.1 1996/10/10 13:56:02 yp - * Created. - * [96/10/10 yp] - * - * $EndLog$ + * getpid */ -#ifndef _FLOAT_H_ -#define _FLOAT_H_ -# include -#endif /* _FLOAT_H_ */ + +#include +#include +#include + +#include "../libmicro.h" + +int +benchmark_init() +{ + (void) sprintf(lm_usage, "note: measures getppid()"); + + lm_tsdsize = 0; + + return (0); +} + +/*ARGSUSED*/ +int +benchmark(void *tsd, result_t *res) +{ + int i; + + for (i = 0; i < lm_optB; i ++) { + (void) getppid(); + } + res->re_count = i; + + return (0); +} diff --git a/tools/tests/libMicro/apple/lb_mmtest.c b/tools/tests/libMicro/apple/lb_mmtest.c new file mode 100644 index 000000000..2629ebe34 --- /dev/null +++ b/tools/tests/libMicro/apple/lb_mmtest.c @@ -0,0 +1,601 @@ +/* + * Copyright (c) 2006 Apple Inc. All Rights Reserved. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. The rights granted to you under the License + * may not be used to create, or enable the creation or redistribution of, + * unlawful or unlicensed copies of an Apple operating system, or to + * circumvent, violate, or enable the circumvention or violation of, any + * terms of an Apple operating system software license agreement. + * + * Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ + */ + + +/* + * Order of Execution + * + * benchmark_init + * + * benchmark_optswitch + * + * benchmark_initrun + * + * benchmark_initworker + * benchmark_initbatch + * benchmark + * benchmark_finibatch + * benchmark_initbatch + * benchmark + * benchmark_finibatch, etc. + * benchmark_finiworker + * + * benchmark_result + * + * benchmark_finirun + * + * benchmark_fini + */ + + + +#ifdef __sun +#pragma ident "@(#)lb_mmtest.c 1.0 08/21/06 Apple Inc." +#endif + + + +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "../libmicro.h" + +/* + * Your state variables should live in the tsd_t struct below + */ +typedef struct { + int server_mode; + boolean_t verbose; + boolean_t oneway; + int overwrite; + int msg_type; + int num_ints; + int num_msgs; + const char *server_port_name; + mach_port_t server_port; + mach_port_t reply_port; + int request_msg_size; + void *request_msg; + int reply_msg_size; + void *reply_msg; + void *ints; + long pid; +} tsd_t; + +static boolean_t opt_verbose; +static boolean_t opt_oneway; +static int opt_num_msgs; +static int opt_msg_type; +static int opt_num_ints; +static char * opt_server_port_name; + +#pragma mark *** definitions from MMTest.c +/* + * These variables were taken from MMtest.c + */ +typedef struct { + mach_msg_header_t header; + mach_msg_trailer_t trailer; // subtract this when sending +} ipc_trivial_message; + +typedef struct { + mach_msg_header_t header; + u_int32_t numbers[0]; + mach_msg_trailer_t trailer; // subtract this when sending +} ipc_inline_message; + +typedef struct { + mach_msg_header_t header; + mach_msg_body_t body; + mach_msg_ool_descriptor_t descriptor; + mach_msg_trailer_t trailer; // subtract this when sending +} ipc_complex_message; + +void signal_handler(int sig) { +} + +enum { + msg_type_trivial = 0, + msg_type_inline = 1, + msg_type_complex = 2 +}; + +void server(void *tsd); +void client(void *tsd); + +#pragma mark *** routines from MMTest.c +/* + * These routines were taken from MMtest.c + */ + +void server(void *tsd) { + mach_msg_header_t *request; + mach_msg_header_t *reply; + mach_msg_option_t option; + kern_return_t ret; + + tsd_t *ts = (tsd_t *)tsd; + + request = (mach_msg_header_t *)ts->request_msg; + + reply = (mach_msg_header_t *)ts->reply_msg; + +#ifndef OPTIMIZED_SERVER + for (;;) { +#endif /* !OPTIMIZED_SERVER */ + + if (ts->verbose) printf("Awaiting message\n"); + option = MACH_RCV_MSG|MACH_RCV_INTERRUPT|MACH_RCV_LARGE; + ret = mach_msg(request, + option, + 0, + ts->request_msg_size, + ts->server_port, + MACH_MSG_TIMEOUT_NONE, + MACH_PORT_NULL); + +#ifdef OPTIMIZED_SERVER + for (;;) { + mach_msg_header_t *tmp; +#endif /* OPTIMIZED_SERVER */ + + if (MACH_MSG_SUCCESS != ret) + break; + if (ts->verbose) printf("Received message\n"); + if (request->msgh_bits & MACH_MSGH_BITS_COMPLEX) { + ipc_complex_message *complex_request; + + complex_request = (ipc_complex_message *)ts->request_msg; + ret = vm_deallocate(mach_task_self(), + (vm_address_t)complex_request->descriptor.address, + complex_request->descriptor.size); + } + if (1 == request->msgh_id) { + if (ts->verbose) printf("Sending reply\n"); + reply->msgh_bits = MACH_MSGH_BITS(MACH_MSG_TYPE_MOVE_SEND_ONCE, 0); + reply->msgh_size = ts->reply_msg_size; + reply->msgh_remote_port = request->msgh_remote_port; + reply->msgh_local_port = MACH_PORT_NULL; + reply->msgh_id = 2; + +#ifdef OPTIMIZED_SERVER + option = MACH_SEND_MSG|MACH_RCV_MSG|MACH_RCV_INTERRUPT|MACH_RCV_LARGE; + } else { + option = MACH_RCV_MSG|MACH_RCV_INTERRUPT|MACH_RCV_LARGE; + } + + ret = mach_msg( reply, + option, + ts->reply_msg_size, + ts->request_msg_size, + ts->server_port, + MACH_MSG_TIMEOUT_NONE, + MACH_PORT_NULL); + tmp = reply; + reply = request; + request = tmp; +#else /* !OPTIMIZED_SERVER */ + ret = mach_msg(reply, + MACH_SEND_MSG, + ts->reply_msg_size, + 0, + MACH_PORT_NULL, + MACH_MSG_TIMEOUT_NONE, + MACH_PORT_NULL); + if (ret != MACH_MSG_SUCCESS) + break; + } +#endif /* !OPTIMIZED_SERVER */ + } + + if (MACH_RCV_INTERRUPTED != ret) { + mach_error("mach_msg: ", ret); + exit(1); + } +} + +void client(void *tsd) { + mach_msg_header_t *request; + mach_msg_header_t *reply; + mach_msg_option_t option; + kern_return_t ret; + int idx; + + tsd_t *ts = (tsd_t *)tsd; + +#ifdef SWAP_BUFFERS + mach_msg_header_t *tmp; +#endif + + request = (mach_msg_header_t *)ts->request_msg; + reply = (mach_msg_header_t *)ts->reply_msg; + + for (idx = 0; idx < ts->num_msgs; idx++) { + request->msgh_bits = MACH_MSGH_BITS(MACH_MSG_TYPE_COPY_SEND, + MACH_MSG_TYPE_MAKE_SEND_ONCE); + request->msgh_size = ts->request_msg_size; + request->msgh_remote_port = ts->server_port; + request->msgh_local_port = ts->reply_port; + + if (ts->msg_type == msg_type_complex) { + ipc_complex_message *complexmsg = (ipc_complex_message *)request; + + request->msgh_bits |= MACH_MSGH_BITS_COMPLEX; + complexmsg->body.msgh_descriptor_count = 1; + complexmsg->descriptor.address = ts->ints; + complexmsg->descriptor.size = ts->num_ints * sizeof(u_int32_t); + complexmsg->descriptor.deallocate = FALSE; + complexmsg->descriptor.copy = MACH_MSG_VIRTUAL_COPY; + complexmsg->descriptor.type = MACH_MSG_OOL_DESCRIPTOR; + } + + if (ts->oneway) { + request->msgh_id = 0; + option = MACH_SEND_MSG; + } else { + request->msgh_id = 1; + option = MACH_SEND_MSG|MACH_RCV_MSG; + } + + if (ts->verbose) printf("Sending request\n"); +#ifdef SWAP_BUFFERS + ret = mach_msg( request, + option, + ts->request_msg_size, + ts->reply_msg_size, + ts->reply_port, + MACH_MSG_TIMEOUT_NONE, + MACH_PORT_NULL); + if (MACH_MSG_SUCCESS != ret) { + mach_error("client: mach_msg: ", ret); + fprintf(stderr, "bailing after %u iterations\n", idx); + exit(1); + } + tmp = request; + request = reply; + reply = tmp; +#else + ret = mach_msg_overwrite(request, + option, + ts->request_msg_size, + ts->reply_msg_size, + ts->reply_port, + MACH_MSG_TIMEOUT_NONE, + MACH_PORT_NULL, + reply, + 0); + if (MACH_MSG_SUCCESS != ret) { + mach_error("client: mach_msg_overwrite: ", ret); + fprintf(stderr, "bailing after %u iterations\n", idx); + exit(1); + } +#endif + if (ts->verbose && !ts->oneway) printf("Received reply\n"); + } +} + + +#pragma mark *** Darbench routines + +/* + * These routines are required by darbench + */ + +/*ARGSUSED*/ +int +benchmark_initbatch(void *tsd) +{ + /* + * initialize your state variables here second + */ + long pid; + tsd_t *ts = (tsd_t *)tsd; + + ts->server_mode = -1; + ts->verbose = opt_verbose; + ts->oneway = opt_oneway; + ts->overwrite = 0; + ts->msg_type = opt_msg_type; + ts->num_ints = opt_num_ints; + ts->num_msgs = opt_num_msgs; + ts->server_port_name = opt_server_port_name; + ts->server_port = MACH_PORT_NULL; + ts->reply_port = MACH_PORT_NULL; + ts->request_msg = NULL; + ts->request_msg_size = 0; + ts->reply_msg = NULL; + ts->reply_msg_size = 0; + + switch (ts->msg_type) { + case msg_type_trivial: + ts->request_msg_size = sizeof(ipc_trivial_message); + break; + + case msg_type_inline: + ts->request_msg_size = sizeof(ipc_inline_message) + + sizeof(u_int32_t) * ts->num_ints; + break; + + case msg_type_complex: + ts->request_msg_size = sizeof(ipc_complex_message); + ts->ints = malloc(sizeof(u_int32_t) * ts->num_ints); + break; + } + + ts->request_msg = malloc(ts->request_msg_size); + ts->reply_msg = malloc(ts->reply_msg_size); + + if (ts->server_mode) { + kern_return_t ret = 0; + mach_port_t bsport; + + ts->reply_msg_size -= sizeof(mach_msg_trailer_t); + ret = mach_port_allocate(mach_task_self(), MACH_PORT_RIGHT_RECEIVE, + &(ts->server_port)); + if (KERN_SUCCESS != ret) { + mach_error("mach_port_allocate(): ", ret); + exit(1); + } + ret = mach_port_insert_right(mach_task_self(), ts->server_port, + ts->server_port, MACH_MSG_TYPE_MAKE_SEND); + if (KERN_SUCCESS != ret) { + mach_error("mach_port_insert_right(): ", ret); + exit(1); + } + ret = task_get_bootstrap_port(mach_task_self(), &bsport); + if (KERN_SUCCESS != ret) { + mach_error("task_get_bootstrap_port(): ", ret); + exit(1); + } + ret = bootstrap_check_in(bsport, (char *)ts->server_port_name, + &ts->server_port); + if (KERN_SUCCESS != ret) { + mach_error("bootstrap_register(): ", ret); + exit(1); + } + } else { /* client mode */ + kern_return_t ret = 0; + mach_port_t bsport; + + ts->request_msg_size -= sizeof(mach_msg_trailer_t); + + ret = mach_port_allocate(mach_task_self(), MACH_PORT_RIGHT_RECEIVE, + &(ts->reply_port)); + if (KERN_SUCCESS != ret) { + mach_error("mach_port_allocate(): ", ret); + exit(1); + } + + ret = task_get_bootstrap_port(mach_task_self(), &bsport); + if (KERN_SUCCESS != ret) { + mach_error("task_get_bootstrap_port(): ", ret); + exit(1); + } + ret = bootstrap_look_up(bsport, (char *)ts->server_port_name, + &(ts->server_port)); + if (KERN_SUCCESS != ret) { + mach_error("bootstrap_look_up(): ", ret); + exit(1); + } + } + + if (ts->verbose) { + if (ts->server_mode) { + printf("Server waiting for IPC messages from client on port '%s'.\n", + ts->server_port_name); + } else { + printf("Client sending %d %s IPC messages to port '%s' in %s mode.\n", + ts->num_msgs, (ts->msg_type == msg_type_inline) ? "inline" : + ((ts->msg_type == msg_type_complex) ? "complex" : "trivial"), + ts->server_port_name, (ts->oneway ? "oneway" : "rpc")); + } + } + + pid = fork(); + switch (pid) { + case 0: + server(tsd); + exit(0); + break; + case -1: + return (-1); + default: + ts->pid = pid; + break; + } + return (0); +} + +int +benchmark_finirun() +{ + (void) fprintf(stderr, "benchmark_finirun\n"); + return (0); +} + +int +benchmark_init() +{ + /* + * the lm_optstr must be defined here or no options for you + * ...and the framework will throw an error + * lm_optstr has two kinds of arguments, boolean (single + * lower case character) and with an argument (single lower + * case character plus a :, indicating the next option is + * the argument) + * + */ + (void) sprintf(lm_optstr, "voc:t:n:p:"); + /* + * tsd_t is the struct that we can pass around our + * state info in + * + * lm_tsdsize will allocate the space we need for this + * structure throughout the rest of the framework + */ + lm_tsdsize = sizeof (tsd_t); + + (void) sprintf(lm_usage, + " -v\t\tbe verbose\n" + " -o\t\tdo not request return reply (client)\n" + " -c num\t\tnumber of messages to send (client)\n" + " -t trivial|inline|complex\ttype of messages to send (client)\n" + " -n num\tnumber of 32-bit ints to send in messages\n" + "\t\t\t(client's value must be <= the server's)\n" + " -p portname\tname of port on which to communicate\n" + "\t\t\t(client and server must use the same value)\n"); + + opt_verbose = FALSE; + opt_oneway = FALSE; + opt_num_msgs = 10000; + opt_msg_type = msg_type_trivial; + opt_num_ints = 64; + opt_server_port_name = malloc(32); + strcpy(opt_server_port_name, "TEST"); + + return (0); +} + +int +benchmark_fini() +{ + free(opt_server_port_name); + return (0); +} + +int +benchmark_finibatch(void *tsd) +{ + tsd_t *ts = (tsd_t *)tsd; + kill(ts->pid, SIGKILL); + return (0); +} + +char * +benchmark_result() +{ + static char result = '\0'; + (void) fprintf(stderr, "benchmark_result\n"); + return (&result); +} + +int +benchmark_finiworker(void *tsd) +{ +// tsd_t *ts = (tsd_t *)tsd; + return (0); +} + +int +benchmark_optswitch(int opt, char *optarg) +{ + (void) fprintf(stderr, "benchmark_optswitch\n"); + + switch (opt) { + case 'v': + opt_verbose = TRUE; + break; + case 'o': + opt_oneway = TRUE; + break; + case 'c': + opt_num_msgs = sizetoint(optarg); + break; + case 't': + if ( 0 == strcmp("trivial", optarg) ) + opt_msg_type = msg_type_trivial; + else if ( 0 == strcmp("inline", optarg) ) + opt_msg_type = msg_type_inline; + else if ( 0 == strcmp("complex", optarg) ) + opt_msg_type = msg_type_complex; + else { + (void) fprintf(stderr, "incorrect argument for message type %s\n", optarg); + return (-1); + } + break; + case 'n': + opt_num_ints = sizetoint(optarg); + break; + case 'p': + strncpy(opt_server_port_name, optarg, 32); + break; + default: + return (-1); + } + return (0); +} + +int +benchmark_initworker(void *tsd) +{ + /* + * initialize your state variables here first + */ +// tsd_t *ts = (tsd_t *)tsd; + return (0); +} + +int +benchmark_initrun() +{ + (void) fprintf(stderr, "benchmark_initrun\n"); + return (0); +} + +int +benchmark(void *tsd, result_t *res) +{ + /* + * initialize your state variables here last + * + * and realize that you are paying for your initialization here + * and it is really a bad idea + */ +// tsd_t *ts = (tsd_t *)tsd; + int i; + + for (i = 0; i < lm_optB; i++) { + client(tsd); + } + + return (0); +} diff --git a/tools/tests/libMicro/apple/lm_null_call.c b/tools/tests/libMicro/apple/lm_null_call.c new file mode 100644 index 000000000..825c14c7d --- /dev/null +++ b/tools/tests/libMicro/apple/lm_null_call.c @@ -0,0 +1,185 @@ +/* + * Copyright (c) 2006 Apple Inc. All Rights Reserved. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. The rights granted to you under the License + * may not be used to create, or enable the creation or redistribution of, + * unlawful or unlicensed copies of an Apple operating system, or to + * circumvent, violate, or enable the circumvention or violation of, any + * terms of an Apple operating system software license agreement. + * + * Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ + */ + + +/* + * Order of Execution + * + * benchmark_init + * + * benchmark_optswitch + * + * benchmark_initrun + * + * benchmark_initworker + * benchmark_initbatch + * benchmark + * benchmark_finibatch + * benchmark_initbatch + * benchmark + * benchmark_finibatch, etc. + * benchmark_finiworker + * + * benchmark_result + * + * benchmark_finirun + * + * benchmark_fini + */ + + + +#ifdef __sun +#pragma ident "@(#)socket.c 1.3 05/08/04 Apple Inc." +#endif + + + +#include +#include +#include + +#include "../libmicro.h" + +/* + * Your state variables should live in the tsd_t struct below + */ +typedef struct { + int fd; + char* file; +} tsd_t; + +/*ARGSUSED*/ +int +benchmark_initbatch(void *tsd) +{ + return (0); +} + +int +benchmark_finirun() +{ + return (0); +} + +int +benchmark_init() +{ + /* + * the lm_optstr must be defined here or no options for you + * + * ...and the framework will throw an error + * + */ + (void) sprintf(lm_optstr, "t:"); + /* + * working hypothesis: + * + * tsd_t is the struct that we can pass around our + * state info in + * + * will allocate the space we need for this + * structure throughout the rest of the framework + */ + lm_tsdsize = sizeof (tsd_t); + + (void) sprintf(lm_usage, + " [-t int (default 1)]\n" + "notes: measures nothing\n"); + return (0); +} + +int +benchmark_fini() +{ + (void) fprintf(stderr, "benchmark_fini\n"); + return (0); +} + +int +benchmark_finibatch(void *tsd) +{ + return (0); +} + +char * +benchmark_result() +{ + static char result = '\0'; + (void) fprintf(stderr, "null_call (getppid)\n"); + return (&result); +} + +int +benchmark_finiworker(void *tsd) +{ + return (0); +} + +int +benchmark_optswitch(int opt, char *optarg) +{ + return (0); +} + +int +benchmark_initworker(void *tsd) +{ + /* + * initialize your state variables here first + */ + return (0); +} + +int +benchmark_initrun() +{ + return (0); +} + +int +benchmark(void *tsd, result_t *res) +{ + /* + * initialize your state variables here last + * + * and realize that you are paying for your initialization here + * and it is really a bad idea + */ + int i; + + for (i = 0; i < lm_optB; i++) { + /* + * just to show that ts really contains state + */ + getppid(); + } + res->re_count = i; + + return (0); +} diff --git a/tools/tests/libMicro/apple/lmbench_bw_file_rd.c b/tools/tests/libMicro/apple/lmbench_bw_file_rd.c new file mode 100644 index 000000000..d5a4f9561 --- /dev/null +++ b/tools/tests/libMicro/apple/lmbench_bw_file_rd.c @@ -0,0 +1,322 @@ +/* + * Copyright (c) 2006 Apple Inc. All Rights Reserved. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. The rights granted to you under the License + * may not be used to create, or enable the creation or redistribution of, + * unlawful or unlicensed copies of an Apple operating system, or to + * circumvent, violate, or enable the circumvention or violation of, any + * terms of an Apple operating system software license agreement. + * + * Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ + */ + + +/* + * Order of Execution + * + * benchmark_init + * + * benchmark_optswitch + * + * benchmark_initrun + * + * benchmark_initworker + * benchmark_initbatch + * benchmark + * benchmark_finibatch + * benchmark_initbatch + * benchmark + * benchmark_finibatch, etc. + * benchmark_finiworker + * + * benchmark_result + * + * benchmark_finirun + * + * benchmark_fini + */ + + + +#ifdef __sun +#pragma ident "@(#)lmbench_bw_file_rd.c 1.0 08/17/06 Apple Inc." +#endif + + + +#include +#include +#include +#include +#include +// add additional headers needed here. +#include + +#include "../libmicro.h" + +#if DEBUG +# define debug(fmt, args...) (void) fprintf(stderr, fmt "\n" , ##args) +#else +# define debug(fmt, args...) +#endif + +/* + * Your state variables should live in the tsd_t struct below + */ +typedef struct { + char filename[256]; + int fd; + int clone; + bool open_read_close; +} tsd_t; + +/* + * You can have any lower-case option you want to define. + * options are specified in the lm_optstr as either a + * single lower-case letter, or a single lower case letter + * with a colon after it. In this example, you can optionally + * specify -c {str} -e or -t {number} + * -c takes a string (quote the string if blanks) + * -e is a boolean + * -t takes a numeric + * argument. + */ +static char optf[256]; +static bool opti = false; // io_only or read and i/o (default read and i/o) + +#define CHK(x) if ((int)(x) == -1) { perror(#x); exit(1); } +#ifndef MIN +#define MIN(a, b) ((a) < (b) ? (a) : (b)) +#endif + +#define TYPE int +#define MINSZ (sizeof(TYPE) * 128) + +void *buf; /* do the I/O here */ +size_t xfersize; /* do it in units of this */ +size_t count; /* bytes to move (can't be modified) */ + +/* analogous to bzero, bcopy, etc., except that it just reads + * data into the processor + */ +long +bread(void* buf, long nbytes) +{ + long sum = 0; + register long *p, *next; + register char *end; + + p = (long*)buf; + end = (char*)buf + nbytes; + for (next = p + 128; (void*)next <= (void*)end; p = next, next += 128) { + sum += + p[0]+p[1]+p[2]+p[3]+p[4]+p[5]+p[6]+p[7]+ + p[8]+p[9]+p[10]+p[11]+p[12]+p[13]+p[14]+ + p[15]+p[16]+p[17]+p[18]+p[19]+p[20]+p[21]+ + p[22]+p[23]+p[24]+p[25]+p[26]+p[27]+p[28]+ + p[29]+p[30]+p[31]+p[32]+p[33]+p[34]+p[35]+ + p[36]+p[37]+p[38]+p[39]+p[40]+p[41]+p[42]+ + p[43]+p[44]+p[45]+p[46]+p[47]+p[48]+p[49]+ + p[50]+p[51]+p[52]+p[53]+p[54]+p[55]+p[56]+ + p[57]+p[58]+p[59]+p[60]+p[61]+p[62]+p[63]+ + p[64]+p[65]+p[66]+p[67]+p[68]+p[69]+p[70]+ + p[71]+p[72]+p[73]+p[74]+p[75]+p[76]+p[77]+ + p[78]+p[79]+p[80]+p[81]+p[82]+p[83]+p[84]+ + p[85]+p[86]+p[87]+p[88]+p[89]+p[90]+p[91]+ + p[92]+p[93]+p[94]+p[95]+p[96]+p[97]+p[98]+ + p[99]+p[100]+p[101]+p[102]+p[103]+p[104]+ + p[105]+p[106]+p[107]+p[108]+p[109]+p[110]+ + p[111]+p[112]+p[113]+p[114]+p[115]+p[116]+ + p[117]+p[118]+p[119]+p[120]+p[121]+p[122]+ + p[123]+p[124]+p[125]+p[126]+p[127]; + } + for (next = p + 16; (void*)next <= (void*)end; p = next, next += 16) { + sum += + p[0]+p[1]+p[2]+p[3]+p[4]+p[5]+p[6]+p[7]+ + p[8]+p[9]+p[10]+p[11]+p[12]+p[13]+p[14]+ + p[15]; + } + for (next = p + 1; (void*)next <= (void*)end; p = next, next++) { + sum += *p; + } + return sum; +} + +void doit(int fd) +{ + size_t size, chunk; + + size = count; + chunk = xfersize; + while (size >= 0) { + if (size < chunk) chunk = size; + if (read(fd, buf, MIN(size, chunk)) <= 0) { + break; + } + bread(buf, MIN(size, xfersize)); + size -= chunk; + } +} + + +int +benchmark_init() +{ + debug("benchmark_init"); + /* + * the lm_optstr must be defined here or no options for you + * + * ...and the framework will throw an error + * + */ + (void) sprintf(lm_optstr, "f:i"); + /* + * tsd_t is the state info struct that we pass around + * + * lm_tsdsize will allocate the space we need for this + * structure throughout the rest of the framework + */ + lm_tsdsize = sizeof (tsd_t); + + (void) sprintf(lm_usage, + " [-f filename]\n" + " [-i] io_only (no open/close)\n" + "notes: read and sum file via read(2) interface"); + sprintf(optf, "/tmp/%d", (int)getpid()); + return (0); +} + +/* + * This is where you parse your lower-case arguments. + * the format was defined in the lm_optstr assignment + * in benchmark_init + */ +int +benchmark_optswitch(int opt, char *optarg) +{ + debug("benchmark_optswitch"); + + switch (opt) { + case 'f': + strncpy(optf, optarg, 255); + debug("optf = %s\n", optf); + break; + case 'i': + opti = true; + debug("opti = %s\n", opte? "true": "false"); + break; + default: + return (-1); + } + return (0); +} + +int +benchmark_initrun() +{ + debug("benchmark_initrun"); + return (0); +} + +int +benchmark_initworker(void *tsd) +{ + /* + * initialize your state variables here first + */ + tsd_t *ts = (tsd_t *)tsd; + strncpy(ts->filename, optf, 255); + ts->open_read_close = opti; + debug("benchmark_initworker: ts_once = %i\n",ts->ts_once); + return (0); +} + +/*ARGSUSED*/ +int +benchmark_initbatch(void *tsd) +{ + debug("benchmark_initbatch"); + return (0); +} + +int +benchmark(void *tsd, result_t *res) +{ + /* + * try not to initialize things here. This is the main + * loop of things to get timed. Start a server in + * benchmark_initbatch + */ + tsd_t *ts = (tsd_t *)tsd; + int i; + int fd; + + debug("in to benchmark - optB = %i", lm_optB); + for (i = 0; i < lm_optB; i++) { + if (ts->open_read_close) { + fd = open(ts->filename, O_RDONLY); + doit(fd); + close(fd); + } else { + doit(fd); + } + } + res->re_count = i; + debug("out of benchmark - optB = %i", lm_optB); + + return (0); +} + +int +benchmark_finibatch(void *tsd) +{ + debug("benchmark_finibatch"); + return (0); +} + +int +benchmark_finiworker(void *tsd) +{ + debug("benchmark_finiworker"); + return (0); +} + +char * +benchmark_result() +{ + static char result = '\0'; + debug("benchmark_result"); + return (&result); +} + +int +benchmark_finirun() +{ + debug("benchmark_finirun"); + return (0); +} + + +int +benchmark_fini() +{ + debug("benchmark_fini"); + return (0); +} + diff --git a/tools/tests/libMicro/apple/lmbench_bw_mem.c b/tools/tests/libMicro/apple/lmbench_bw_mem.c new file mode 100644 index 000000000..4b5aa07c3 --- /dev/null +++ b/tools/tests/libMicro/apple/lmbench_bw_mem.c @@ -0,0 +1,651 @@ +/* + * Copyright (c) 2006 Apple Inc. All Rights Reserved. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. The rights granted to you under the License + * may not be used to create, or enable the creation or redistribution of, + * unlawful or unlicensed copies of an Apple operating system, or to + * circumvent, violate, or enable the circumvention or violation of, any + * terms of an Apple operating system software license agreement. + * + * Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ + */ + + +/* + * Order of Execution + * + * benchmark_init + * + * benchmark_optswitch + * + * benchmark_initrun + * + * benchmark_initworker + * benchmark_initbatch + * benchmark + * benchmark_finibatch + * benchmark_initbatch + * benchmark + * benchmark_finibatch, etc. + * benchmark_finiworker + * + * benchmark_result + * + * benchmark_finirun + * + * benchmark_fini + */ + +/* + * To port to libmicro, I had to add options for + * some items which were just arguments before. + * -s is the size (more than 512) + * -x is the command to execute (rd wr rdwr cp fwr frd fcp bzero bcopy) + * see usage string for command options. + */ + +#ifdef __sun +#pragma ident "@(#)lmbench_bw_mem.c 1.0 20060814 Apple Inc." +#endif + + +#include +#include +#include +#include +#include + +#include "../libmicro.h" + +#define TRIES 11 // value from bench.h in lmbench +#define TYPE int + +/* + * rd - 4 byte read, 32 byte stride + * wr - 4 byte write, 32 byte stride + * rdwr - 4 byte read followed by 4 byte write to same place, 32 byte stride + * cp - 4 byte read then 4 byte write to different place, 32 byte stride + * fwr - write every 4 byte word + * frd - read every 4 byte word + * fcp - copy every 4 byte word + * + * All tests do 512 byte chunks in a loop. + * + * XXX - do a 64bit version of this. + */ +void rd(iter_t iterations, void *cookie); +void wr(iter_t iterations, void *cookie); +void rdwr(iter_t iterations, void *cookie); +void mcp(iter_t iterations, void *cookie); +void fwr(iter_t iterations, void *cookie); +void frd(iter_t iterations, void *cookie); +void fcp(iter_t iterations, void *cookie); +void loop_bzero(iter_t iterations, void *cookie); +void loop_bcopy(iter_t iterations, void *cookie); +void init_overhead(iter_t iterations, void *cookie); +void init_loop(iter_t iterations, void *cookie); +void cleanup(iter_t iterations, void *cookie); + +typedef struct _state { + double overhead; + size_t nbytes; + int need_buf2; + int aligned; + TYPE *buf; + TYPE *buf2; + TYPE *buf2_orig; + TYPE *lastone; + size_t N; +} state_t; + + +/* + * Your state variables should live in the tsd_t struct below + */ +typedef struct { + double overhead; + size_t nbytes; + int need_buf2; + int aligned; + TYPE *buf; + TYPE *buf2; + TYPE *buf2_orig; + TYPE *lastone; + size_t N; + int parallel; + int warmup; + int repetitions; +} tsd_t; + + +static int optp = 1; +static int optw = 0; +static int optn = TRIES; +static int opt_size = 0; +static char *opt_what; // maximum "what" command string size + +void +init_overhead(iter_t iterations, void *cookie) +{ +} + +void +init_loop(iter_t iterations, void *cookie) +{ + tsd_t *ts = (tsd_t *)cookie; + + if (iterations) return; + + ts->buf = (TYPE *)valloc(ts->nbytes); + ts->buf2_orig = NULL; + ts->lastone = (TYPE*)ts->buf - 1; + ts->lastone = (TYPE*)((char *)ts->buf + ts->nbytes - 512); + ts->N = ts->nbytes; + + if (!ts->buf) { + perror("malloc"); + exit(1); + } + bzero((void*)ts->buf, ts->nbytes); + + if (ts->need_buf2 == 1) { + ts->buf2_orig = ts->buf2 = (TYPE *)valloc(ts->nbytes + 2048); + if (!ts->buf2) { + perror("malloc"); + exit(1); + } + + /* default is to have stuff unaligned wrt each other */ + /* XXX - this is not well tested or thought out */ + if (ts->aligned) { + char *tmp = (char *)ts->buf2; + + tmp += 2048 - 128; + ts->buf2 = (TYPE *)tmp; + } + } +} + +void +cleanup(iter_t iterations, void *cookie) +{ + tsd_t *ts = (tsd_t *)cookie; + + if (iterations) return; + + free(ts->buf); + if (ts->buf2_orig) free(ts->buf2_orig); +} + +void +rd(iter_t iterations, void *cookie) +{ + tsd_t *ts = (tsd_t *)cookie; + register TYPE *lastone = ts->lastone; + register int sum = 0; + + while (iterations-- > 0) { + register TYPE *p = ts->buf; + while (p <= lastone) { + sum += +#define DOIT(i) p[i]+ + DOIT(0) DOIT(4) DOIT(8) DOIT(12) DOIT(16) DOIT(20) DOIT(24) + DOIT(28) DOIT(32) DOIT(36) DOIT(40) DOIT(44) DOIT(48) DOIT(52) + DOIT(56) DOIT(60) DOIT(64) DOIT(68) DOIT(72) DOIT(76) + DOIT(80) DOIT(84) DOIT(88) DOIT(92) DOIT(96) DOIT(100) + DOIT(104) DOIT(108) DOIT(112) DOIT(116) DOIT(120) + p[124]; + p += 128; + } + } +} +#undef DOIT + +void +wr(iter_t iterations, void *cookie) +{ + tsd_t *ts = (tsd_t *)cookie; + register TYPE *lastone = ts->lastone; + + while (iterations-- > 0) { + register TYPE *p = ts->buf; + while (p <= lastone) { +#define DOIT(i) p[i] = 1; + DOIT(0) DOIT(4) DOIT(8) DOIT(12) DOIT(16) DOIT(20) DOIT(24) + DOIT(28) DOIT(32) DOIT(36) DOIT(40) DOIT(44) DOIT(48) DOIT(52) + DOIT(56) DOIT(60) DOIT(64) DOIT(68) DOIT(72) DOIT(76) + DOIT(80) DOIT(84) DOIT(88) DOIT(92) DOIT(96) DOIT(100) + DOIT(104) DOIT(108) DOIT(112) DOIT(116) DOIT(120) DOIT(124); + p += 128; + } + } +} +#undef DOIT + +void +rdwr(iter_t iterations, void *cookie) +{ + tsd_t *ts = (tsd_t *)cookie; + register TYPE *lastone = ts->lastone; + register int sum = 0; + + while (iterations-- > 0) { + register TYPE *p = ts->buf; + while (p <= lastone) { +#define DOIT(i) sum += p[i]; p[i] = 1; + DOIT(0) DOIT(4) DOIT(8) DOIT(12) DOIT(16) DOIT(20) DOIT(24) + DOIT(28) DOIT(32) DOIT(36) DOIT(40) DOIT(44) DOIT(48) DOIT(52) + DOIT(56) DOIT(60) DOIT(64) DOIT(68) DOIT(72) DOIT(76) + DOIT(80) DOIT(84) DOIT(88) DOIT(92) DOIT(96) DOIT(100) + DOIT(104) DOIT(108) DOIT(112) DOIT(116) DOIT(120) DOIT(124); + p += 128; + } + } +} +#undef DOIT + +void +mcp(iter_t iterations, void *cookie) +{ + tsd_t *ts = (tsd_t *)cookie; + register TYPE *lastone = ts->lastone; + TYPE* p_save = NULL; + + while (iterations-- > 0) { + register TYPE *p = ts->buf; + register TYPE *dst = ts->buf2; + while (p <= lastone) { +#define DOIT(i) dst[i] = p[i]; + DOIT(0) DOIT(4) DOIT(8) DOIT(12) DOIT(16) DOIT(20) DOIT(24) + DOIT(28) DOIT(32) DOIT(36) DOIT(40) DOIT(44) DOIT(48) DOIT(52) + DOIT(56) DOIT(60) DOIT(64) DOIT(68) DOIT(72) DOIT(76) + DOIT(80) DOIT(84) DOIT(88) DOIT(92) DOIT(96) DOIT(100) + DOIT(104) DOIT(108) DOIT(112) DOIT(116) DOIT(120) DOIT(124); + p += 128; + dst += 128; + } + p_save = p; + } +} +#undef DOIT + +void +fwr(iter_t iterations, void *cookie) +{ + tsd_t *ts = (tsd_t *)cookie; + register TYPE *lastone = ts->lastone; + TYPE* p_save = NULL; + + while (iterations-- > 0) { + register TYPE *p = ts->buf; + while (p <= lastone) { +#define DOIT(i) p[i]= + DOIT(0) DOIT(1) DOIT(2) DOIT(3) DOIT(4) DOIT(5) DOIT(6) + DOIT(7) DOIT(8) DOIT(9) DOIT(10) DOIT(11) DOIT(12) + DOIT(13) DOIT(14) DOIT(15) DOIT(16) DOIT(17) DOIT(18) + DOIT(19) DOIT(20) DOIT(21) DOIT(22) DOIT(23) DOIT(24) + DOIT(25) DOIT(26) DOIT(27) DOIT(28) DOIT(29) DOIT(30) + DOIT(31) DOIT(32) DOIT(33) DOIT(34) DOIT(35) DOIT(36) + DOIT(37) DOIT(38) DOIT(39) DOIT(40) DOIT(41) DOIT(42) + DOIT(43) DOIT(44) DOIT(45) DOIT(46) DOIT(47) DOIT(48) + DOIT(49) DOIT(50) DOIT(51) DOIT(52) DOIT(53) DOIT(54) + DOIT(55) DOIT(56) DOIT(57) DOIT(58) DOIT(59) DOIT(60) + DOIT(61) DOIT(62) DOIT(63) DOIT(64) DOIT(65) DOIT(66) + DOIT(67) DOIT(68) DOIT(69) DOIT(70) DOIT(71) DOIT(72) + DOIT(73) DOIT(74) DOIT(75) DOIT(76) DOIT(77) DOIT(78) + DOIT(79) DOIT(80) DOIT(81) DOIT(82) DOIT(83) DOIT(84) + DOIT(85) DOIT(86) DOIT(87) DOIT(88) DOIT(89) DOIT(90) + DOIT(91) DOIT(92) DOIT(93) DOIT(94) DOIT(95) DOIT(96) + DOIT(97) DOIT(98) DOIT(99) DOIT(100) DOIT(101) DOIT(102) + DOIT(103) DOIT(104) DOIT(105) DOIT(106) DOIT(107) + DOIT(108) DOIT(109) DOIT(110) DOIT(111) DOIT(112) + DOIT(113) DOIT(114) DOIT(115) DOIT(116) DOIT(117) + DOIT(118) DOIT(119) DOIT(120) DOIT(121) DOIT(122) + DOIT(123) DOIT(124) DOIT(125) DOIT(126) DOIT(127) 1; + p += 128; + } + p_save = p; + } +} +#undef DOIT + +void +frd(iter_t iterations, void *cookie) +{ + tsd_t *ts = (tsd_t *)cookie; + register int sum = 0; + register TYPE *lastone = ts->lastone; + + while (iterations-- > 0) { + register TYPE *p = ts->buf; + while (p <= lastone) { + sum += +#define DOIT(i) p[i]+ + DOIT(0) DOIT(1) DOIT(2) DOIT(3) DOIT(4) DOIT(5) DOIT(6) + DOIT(7) DOIT(8) DOIT(9) DOIT(10) DOIT(11) DOIT(12) + DOIT(13) DOIT(14) DOIT(15) DOIT(16) DOIT(17) DOIT(18) + DOIT(19) DOIT(20) DOIT(21) DOIT(22) DOIT(23) DOIT(24) + DOIT(25) DOIT(26) DOIT(27) DOIT(28) DOIT(29) DOIT(30) + DOIT(31) DOIT(32) DOIT(33) DOIT(34) DOIT(35) DOIT(36) + DOIT(37) DOIT(38) DOIT(39) DOIT(40) DOIT(41) DOIT(42) + DOIT(43) DOIT(44) DOIT(45) DOIT(46) DOIT(47) DOIT(48) + DOIT(49) DOIT(50) DOIT(51) DOIT(52) DOIT(53) DOIT(54) + DOIT(55) DOIT(56) DOIT(57) DOIT(58) DOIT(59) DOIT(60) + DOIT(61) DOIT(62) DOIT(63) DOIT(64) DOIT(65) DOIT(66) + DOIT(67) DOIT(68) DOIT(69) DOIT(70) DOIT(71) DOIT(72) + DOIT(73) DOIT(74) DOIT(75) DOIT(76) DOIT(77) DOIT(78) + DOIT(79) DOIT(80) DOIT(81) DOIT(82) DOIT(83) DOIT(84) + DOIT(85) DOIT(86) DOIT(87) DOIT(88) DOIT(89) DOIT(90) + DOIT(91) DOIT(92) DOIT(93) DOIT(94) DOIT(95) DOIT(96) + DOIT(97) DOIT(98) DOIT(99) DOIT(100) DOIT(101) DOIT(102) + DOIT(103) DOIT(104) DOIT(105) DOIT(106) DOIT(107) + DOIT(108) DOIT(109) DOIT(110) DOIT(111) DOIT(112) + DOIT(113) DOIT(114) DOIT(115) DOIT(116) DOIT(117) + DOIT(118) DOIT(119) DOIT(120) DOIT(121) DOIT(122) + DOIT(123) DOIT(124) DOIT(125) DOIT(126) p[127]; + p += 128; + } + } +} +#undef DOIT + +void +fcp(iter_t iterations, void *cookie) +{ + tsd_t *ts = (tsd_t *)cookie; + register TYPE *lastone = ts->lastone; + + while (iterations-- > 0) { + register TYPE *p = ts->buf; + register TYPE *dst = ts->buf2; + while (p <= lastone) { +#define DOIT(i) dst[i]=p[i]; + DOIT(0) DOIT(1) DOIT(2) DOIT(3) DOIT(4) DOIT(5) DOIT(6) + DOIT(7) DOIT(8) DOIT(9) DOIT(10) DOIT(11) DOIT(12) + DOIT(13) DOIT(14) DOIT(15) DOIT(16) DOIT(17) DOIT(18) + DOIT(19) DOIT(20) DOIT(21) DOIT(22) DOIT(23) DOIT(24) + DOIT(25) DOIT(26) DOIT(27) DOIT(28) DOIT(29) DOIT(30) + DOIT(31) DOIT(32) DOIT(33) DOIT(34) DOIT(35) DOIT(36) + DOIT(37) DOIT(38) DOIT(39) DOIT(40) DOIT(41) DOIT(42) + DOIT(43) DOIT(44) DOIT(45) DOIT(46) DOIT(47) DOIT(48) + DOIT(49) DOIT(50) DOIT(51) DOIT(52) DOIT(53) DOIT(54) + DOIT(55) DOIT(56) DOIT(57) DOIT(58) DOIT(59) DOIT(60) + DOIT(61) DOIT(62) DOIT(63) DOIT(64) DOIT(65) DOIT(66) + DOIT(67) DOIT(68) DOIT(69) DOIT(70) DOIT(71) DOIT(72) + DOIT(73) DOIT(74) DOIT(75) DOIT(76) DOIT(77) DOIT(78) + DOIT(79) DOIT(80) DOIT(81) DOIT(82) DOIT(83) DOIT(84) + DOIT(85) DOIT(86) DOIT(87) DOIT(88) DOIT(89) DOIT(90) + DOIT(91) DOIT(92) DOIT(93) DOIT(94) DOIT(95) DOIT(96) + DOIT(97) DOIT(98) DOIT(99) DOIT(100) DOIT(101) DOIT(102) + DOIT(103) DOIT(104) DOIT(105) DOIT(106) DOIT(107) + DOIT(108) DOIT(109) DOIT(110) DOIT(111) DOIT(112) + DOIT(113) DOIT(114) DOIT(115) DOIT(116) DOIT(117) + DOIT(118) DOIT(119) DOIT(120) DOIT(121) DOIT(122) + DOIT(123) DOIT(124) DOIT(125) DOIT(126) DOIT(127) + p += 128; + dst += 128; + } + } +} + +void +loop_bzero(iter_t iterations, void *cookie) +{ + tsd_t *ts = (tsd_t *)cookie; + register TYPE *p = ts->buf; + register size_t N = ts->N; + + while (iterations-- > 0) { + bzero(p, N); + } +} + +void +loop_bcopy(iter_t iterations, void *cookie) +{ + tsd_t *ts = (tsd_t *)cookie; + register TYPE *p = ts->buf; + register TYPE *dst = ts->buf2; + register size_t N = ts->N; + + while (iterations-- > 0) { + bcopy(p,dst,N); + } +} + +#pragma mark libmicro routines + +/*ARGSUSED*/ +int +benchmark_initbatch(void *tsd) +{ + /* + * initialize your state variables here second + */ + tsd_t *ts = (tsd_t *)tsd; + ts->buf = (TYPE *)valloc(ts->nbytes); + ts->buf2_orig = NULL; + ts->lastone = (TYPE*)ts->buf - 1; + ts->lastone = (TYPE*)((char *)ts->buf + ts->nbytes - 512); + ts->N = ts->nbytes; + + if (!ts->buf) { + perror("malloc"); + exit(1); + } + bzero((void*)ts->buf, ts->nbytes); + + if (ts->need_buf2 == 1) { + ts->buf2_orig = ts->buf2 = (TYPE *)valloc(ts->nbytes + 2048); + if (!ts->buf2) { + perror("malloc"); + exit(1); + } + + /* default is to have stuff unaligned wrt each other */ + /* XXX - this is not well tested or thought out */ + if (ts->aligned) { + char *tmp = (char *)ts->buf2; + + tmp += 2048 - 128; + ts->buf2 = (TYPE *)tmp; + } + } + return (0); +} + +int +benchmark_finirun() +{ + return (0); +} + +int +benchmark_init() +{ + /* + * the lm_optstr must be defined here or no options for you + * + * ...and the framework will throw an error + * + */ + (void) sprintf(lm_optstr, "p:w:n:s:x:"); + /* + * working hypothesis: + * + * tsd_t is the struct that we can pass around our + * state info in + * + * lm_tsdsize will allocate the space we need for this + * structure throughout the rest of the framework + */ + lm_tsdsize = sizeof (tsd_t); + opt_what = (char *)malloc(30); + + (void) sprintf(lm_usage, + " [-p ]\n" + " [-w ]\n" + " [-n ]\n" + " -s \n" + " must be larger than 512" + " -x what\n" + " what: rd wr rdwr cp fwr frd fcp bzero bcopy\n" + " [conflict] -- unknown option?\n" + ); + return (0); +} + +int +benchmark_fini() +{ + free(opt_what); + return (0); +} + +int +benchmark_finibatch(void *tsd) +{ + return (0); +} + +char * +benchmark_result() +{ + static char result = '\0'; + return (&result); +} + +int +benchmark_finiworker(void *tsd) +{ + tsd_t *ts = (tsd_t *)tsd; + free(ts->buf); + if (ts->buf2_orig) free(ts->buf2_orig); + return (0); +} + +/* return -1 to display usage (i.e. if can't parse arguments */ +int +benchmark_optswitch(int opt, char *optarg) +{ + + switch (opt) { + case 'p': + optp = sizetoint(optarg); + if (optp <= 0) + return (-1); + break; + case 'w': + optw = sizetoint(optarg); + break; + case 'n': + optn = sizetoint(optarg); + break; + case 's': + opt_size = sizetoint(optarg); + break; + case 'x': + strcpy(opt_what, optarg); + break; + default: + return(-1); + break; + } +// (void) fprintf(stderr, "optp = %i optw = %i optn = %i opt_size = %i\n", +// optp, optw, optn, opt_size); +// (void) fprintf(stderr, "opt_what = %s\n", opt_what); + return (0); +} + +int +benchmark_initworker(void *tsd) +{ + /* + * initialize your state variables here first + */ + tsd_t *ts = (tsd_t *)tsd; + ts->parallel = optp; + ts->warmup = optw; + ts->repetitions = optn; + return (0); +} + +int +benchmark_initrun() +{ + return (0); +} + +int +benchmark(void *tsd, result_t *res) +{ + /* + * initialize your state variables here last + * + * and realize that you are paying for your initialization here + * and it is really a bad idea + */ + tsd_t *ts = (tsd_t *)tsd; + size_t nbytes; + int i; + + ts->overhead = 0; + + + /* should have two, possibly three [indicates align] arguments left */ + ts->aligned = ts->need_buf2 = 0; + + nbytes = ts->nbytes = opt_size; + if (ts->nbytes < 512) { /* this is the number of bytes in the loop */ + return(-1); + } + + if (strcmp(opt_what, "cp") || + strcmp(opt_what, "fcp") || strcmp(opt_what, "bcopy")) { + ts->need_buf2 = 1; + } + + for (i = 0 ; i < lm_optB ; i++) + { + if (strcmp(opt_what, "rd")) { + rd( ts->repetitions, tsd ); + } else if (strcmp(opt_what, "wr")) { + wr( ts->repetitions, tsd ); + } else if (strcmp(opt_what, "rdwr")) { + rdwr( ts->repetitions, tsd ); + } else if (strcmp(opt_what, "cp")) { + mcp( ts->repetitions, tsd ); + } else if (strcmp(opt_what, "frd")) { + frd( ts->repetitions, tsd ); + } else if (strcmp(opt_what, "fwr")) { + fwr( ts->repetitions, tsd ); + } else if (strcmp(opt_what, "fcp")) { + fcp( ts->repetitions, tsd ); + } else if (strcmp(opt_what, "bzero")) { + loop_bzero( ts->repetitions, tsd ); + } else if (strcmp(opt_what, "bcopy")) { + loop_bcopy( ts->repetitions, tsd ); + } else { + return(-1); + } + } + res->re_count = i; + + return (0); +} diff --git a/tools/tests/libMicro/apple/lmbench_bw_mmap_rd.c b/tools/tests/libMicro/apple/lmbench_bw_mmap_rd.c new file mode 100644 index 000000000..656bbb114 --- /dev/null +++ b/tools/tests/libMicro/apple/lmbench_bw_mmap_rd.c @@ -0,0 +1,378 @@ +/* + * Copyright (c) 2006 Apple Inc. All Rights Reserved. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. The rights granted to you under the License + * may not be used to create, or enable the creation or redistribution of, + * unlawful or unlicensed copies of an Apple operating system, or to + * circumvent, violate, or enable the circumvention or violation of, any + * terms of an Apple operating system software license agreement. + * + * Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ + */ + + +/* + * Order of Execution + * + * benchmark_init + * + * benchmark_optswitch + * + * benchmark_initrun + * + * benchmark_initworker + * benchmark_initbatch + * benchmark + * benchmark_finibatch + * benchmark_initbatch + * benchmark + * benchmark_finibatch, etc. + * benchmark_finiworker + * + * benchmark_result + * + * benchmark_finirun + * + * benchmark_fini + */ + + + +#ifdef __sun +#pragma ident "@(#)lmbench_bw_mmap_rd.c 1.0 08/17/06 Apple Inc." +#endif + + + +#include +#include +#include +#include +#include +// add additional headers needed here. +#include +#include +#include + +#include "../libmicro.h" + +#if DEBUG +# define debug(fmt, args...) (void) fprintf(stderr, fmt "\n" , ##args) +#else +# define debug(fmt, args...) +#endif + +/* + * Your state variables should live in the tsd_t struct below + */ +typedef struct { + size_t nbytes; + char filename[_POSIX_PATH_MAX]; + int fd; + int clone; + void *buf; + bool open_read_close; +} tsd_t; + +/* + * You can have any lower-case option you want to define. + * options are specified in the lm_optstr as either a + * single lower-case letter, or a single lower case letter + * with a colon after it. In this example, you can optionally + * specify -c {str} -e or -t {number} + * -c takes a string (quote the string if blanks) + * -e is a boolean + * -t takes a numeric + * argument. + */ +static char optf[_POSIX_PATH_MAX]; +static int opts = 1024; +static bool opti = false; // io_only or read and i/o (default read and i/o) + +#ifdef MAP_FILE +# define MMAP_FLAGS MAP_FILE|MAP_SHARED +#else +# define MMAP_FLAGS MAP_SHARED +#endif + +#define CHK(x) if ((int)(x) == -1) { perror(#x); exit(1); } +#ifndef MIN +#define MIN(a, b) ((a) < (b) ? (a) : (b)) +#endif + +#define TYPE int +#define MINSZ (sizeof(TYPE) * 128) + +void *buf; /* do the I/O here */ +size_t xfersize; /* do it in units of this */ +size_t count; /* bytes to move (can't be modified) */ + +/* analogous to bzero, bcopy, etc., except that it just reads + * data into the processor + */ +long +bread(void* buf, long nbytes) +{ + long sum = 0; + register long *p, *next; + register char *end; + + p = (long*)buf; + end = (char*)buf + nbytes; + for (next = p + 128; (void*)next <= (void*)end; p = next, next += 128) { + sum += + p[0]+p[1]+p[2]+p[3]+p[4]+p[5]+p[6]+p[7]+ + p[8]+p[9]+p[10]+p[11]+p[12]+p[13]+p[14]+ + p[15]+p[16]+p[17]+p[18]+p[19]+p[20]+p[21]+ + p[22]+p[23]+p[24]+p[25]+p[26]+p[27]+p[28]+ + p[29]+p[30]+p[31]+p[32]+p[33]+p[34]+p[35]+ + p[36]+p[37]+p[38]+p[39]+p[40]+p[41]+p[42]+ + p[43]+p[44]+p[45]+p[46]+p[47]+p[48]+p[49]+ + p[50]+p[51]+p[52]+p[53]+p[54]+p[55]+p[56]+ + p[57]+p[58]+p[59]+p[60]+p[61]+p[62]+p[63]+ + p[64]+p[65]+p[66]+p[67]+p[68]+p[69]+p[70]+ + p[71]+p[72]+p[73]+p[74]+p[75]+p[76]+p[77]+ + p[78]+p[79]+p[80]+p[81]+p[82]+p[83]+p[84]+ + p[85]+p[86]+p[87]+p[88]+p[89]+p[90]+p[91]+ + p[92]+p[93]+p[94]+p[95]+p[96]+p[97]+p[98]+ + p[99]+p[100]+p[101]+p[102]+p[103]+p[104]+ + p[105]+p[106]+p[107]+p[108]+p[109]+p[110]+ + p[111]+p[112]+p[113]+p[114]+p[115]+p[116]+ + p[117]+p[118]+p[119]+p[120]+p[121]+p[122]+ + p[123]+p[124]+p[125]+p[126]+p[127]; + } + for (next = p + 16; (void*)next <= (void*)end; p = next, next += 16) { + sum += + p[0]+p[1]+p[2]+p[3]+p[4]+p[5]+p[6]+p[7]+ + p[8]+p[9]+p[10]+p[11]+p[12]+p[13]+p[14]+ + p[15]; + } + for (next = p + 1; (void*)next <= (void*)end; p = next, next++) { + sum += *p; + } + return sum; +} + +int +cp(char* src, char* dst, mode_t mode) +{ + int sfd, dfd; + char buf[8192]; + ssize_t size; + + if ((sfd = open(src, O_RDONLY)) < 0) { + return -1; + } + if ((dfd = open(dst, O_CREAT|O_TRUNC|O_RDWR, mode)) < 0) { + return -1; + } + while ((size = read(sfd, buf, 8192)) > 0) { + if (write(dfd, buf, size) < size) return -1; + } + fsync(dfd); + close(sfd); + close(dfd); + return 0; +} + + +int +benchmark_init() +{ + debug("benchmark_init"); + /* + * the lm_optstr must be defined here or no options for you + * + * ...and the framework will throw an error + * + */ + (void) sprintf(lm_optstr, "f:is:"); + /* + * tsd_t is the state info struct that we pass around + * + * lm_tsdsize will allocate the space we need for this + * structure throughout the rest of the framework + */ + lm_tsdsize = sizeof (tsd_t); + + (void) sprintf(lm_usage, + " -f filename\n" + " -s size\n" + " [-i] io_only (no open/close)\n" + "notes: read and sum file via memory mapping mmap(2) interface"); + sprintf(optf, "/tmp/%d", (int)getpid()); + opts = 1024; + return (0); +} + +/* + * This is where you parse your lower-case arguments. + * the format was defined in the lm_optstr assignment + * in benchmark_init + */ +int +benchmark_optswitch(int opt, char *optarg) +{ + debug("benchmark_optswitch"); + + switch (opt) { + case 'f': + strncpy(optf, optarg, 255); + debug("optf = %s\n", optf); + break; + case 'i': + opti = true; + debug("opti = %s\n", opti? "true": "false"); + break; + case 's': + opts = sizetoint(optarg); + debug("opts = %d\n", opts); + break; + default: + return (-1); + } + return (0); +} + +int +benchmark_initrun() +{ + debug("benchmark_initrun"); + return (0); +} + +int +benchmark_initworker(void *tsd) +{ + /* + * initialize your state variables here first + */ + tsd_t *state = (tsd_t *)tsd; + + strncpy(state->filename, optf, 255); + state->nbytes = opts; + state->open_read_close = opti; + + debug("benchmark_initworker\n"); + return (0); +} + +/*ARGSUSED*/ +int +benchmark_initbatch(void *tsd) +{ + tsd_t *state = (tsd_t *)tsd; + state->fd = -1; + state->buf = NULL; + + if (state->clone) { + char buf[8192]; + char* s; + + /* copy original file into a process-specific one */ + sprintf(buf, "/tmp/%d", (int)getpid()); + s = (char*)malloc(strlen(state->filename) + strlen(buf) + 1); + sprintf(s, "/tmp/%s%d", state->filename, (int)getpid()); + if (cp(state->filename, s, S_IREAD|S_IWRITE) < 0) { + perror("creating private tempfile"); + unlink(s); + exit(1); + } + strcpy(state->filename, s); + } + + CHK(state->fd = open(state->filename, 0)); + CHK(state->buf = mmap(0, state->nbytes, PROT_READ, + MMAP_FLAGS, state->fd, 0)); + debug("benchmark_initbatch"); + return (0); +} + +int +benchmark(void *tsd, result_t *res) +{ + /* + * try not to initialize things here. This is the main + * loop of things to get timed. Start a server in + * benchmark_initbatch + */ + tsd_t *state = (tsd_t *)tsd; + int i; + int fd; + void *p; + + debug("in to benchmark - optB = %i", lm_optB); + for (i = 0; i < lm_optB; i++) { + if (state->open_read_close) { + CHK(fd = open(state->filename, 0)); + CHK(p = mmap(0, state->nbytes, PROT_READ, MMAP_FLAGS, fd, 0)); + bread(p, state->nbytes); + close(fd); + munmap(p, state->nbytes); + } else { + bread(state->buf, state->nbytes); + } + } + res->re_count = i; + debug("out of benchmark - optB = %i", lm_optB); + + return (0); +} + +int +benchmark_finibatch(void *tsd) +{ + tsd_t *state = (tsd_t *)tsd; + if (state->buf) munmap(state->buf, state->nbytes); + if (state->fd >= 0) close(state->fd); + if (state->clone) unlink(state->filename); + debug("benchmark_finibatch"); + return (0); +} + +int +benchmark_finiworker(void *tsd) +{ + debug("benchmark_finiworker"); + return (0); +} + +char * +benchmark_result() +{ + static char result = '\0'; + debug("benchmark_result"); + return (&result); +} + +int +benchmark_finirun() +{ + debug("benchmark_finirun"); + return (0); +} + + +int +benchmark_fini() +{ + debug("benchmark_fini"); + return (0); +} + diff --git a/tools/tests/libMicro/apple/lmbench_bw_unix.c b/tools/tests/libMicro/apple/lmbench_bw_unix.c new file mode 100644 index 000000000..9c86510e8 --- /dev/null +++ b/tools/tests/libMicro/apple/lmbench_bw_unix.c @@ -0,0 +1,355 @@ +/* + * Copyright (c) 2006 Apple Inc. All Rights Reserved. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. The rights granted to you under the License + * may not be used to create, or enable the creation or redistribution of, + * unlawful or unlicensed copies of an Apple operating system, or to + * circumvent, violate, or enable the circumvention or violation of, any + * terms of an Apple operating system software license agreement. + * + * Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ + */ + + +/* + * Order of Execution + * + * benchmark_init + * + * benchmark_optswitch + * + * benchmark_initrun + * + * benchmark_initworker + * benchmark_initbatch + * benchmark + * benchmark_finibatch + * benchmark_initbatch + * benchmark + * benchmark_finibatch, etc. + * benchmark_finiworker + * + * benchmark_result + * + * benchmark_finirun + * + * benchmark_fini + */ + + + +#ifdef __sun +#pragma ident "@(#)trivial.c 1.0 08/17/06 Apple Inc." +#endif + + + +#include +#include +#include +#include +#include +// add additional headers needed here. +#include +#include + +#include "../libmicro.h" + +void writer(int controlfd, int writefd, char* buf, void* cookie); +void touch(char *buf, int nbytes); + +#if DEBUG +# define debug(fmt, args...) (void) fprintf(stderr, fmt "\n" , ##args) +#else +# define debug(fmt, args...) +#endif + +/* + * Your state variables should live in the tsd_t struct below + */ +typedef struct { + int pid; + size_t xfer; /* bytes to read/write per "packet" */ + size_t bytes; /* bytes to read/write in one iteration */ + char *buf; /* buffer memory space */ + int pipes[2]; + int control[2]; + int initerr; + int parallel; + int warmup; + int repetitions; +} tsd_t; + +size_t XFER = 10*1024*1024; +#ifndef XFERSIZE +#define XFERSIZE (64*1024) /* all bandwidth I/O should use this */ +#endif + +/* + * You can have any lower-case option you want to define. + * options are specified in the lm_optstr as either a + * single lower-case letter, or a single lower case letter + * with a colon after it. In this example, you can optionally + * specify -c {str} -e or -t {number} + * -c takes a string (quote the string if blanks) + * -e is a boolean + * -t takes a numeric + * argument. + */ +static int optm = XFERSIZE; +static int opts = 10*1024*1024; +static int optw = 0; + +int +benchmark_init() +{ + debug("benchmark_init\n"); + /* + * the lm_optstr must be defined here or no options for you + * + * ...and the framework will throw an error + * + */ + (void) sprintf(lm_optstr, "m:s:w:"); + /* + * + * tsd_t is the state_information struct + * + * lm_tsdsize will allocate the space we need for this + * structure throughout the rest of the framework + */ + lm_tsdsize = sizeof (tsd_t); + + (void) sprintf(lm_usage, + " [-m ]\n" + " [-s ]\n" + " [-w ]\n"); + + return (0); +} + +/* + * This is where you parse your lower-case arguments. + * the format was defined in the lm_optstr assignment + * in benchmark_init + */ +int +benchmark_optswitch(int opt, char *optarg) +{ + debug("benchmark_optswitch\n"); + + switch (opt) { + case 'm': + optm = atoi(optarg); + break; + case 's': + opts = atoi(optarg); + break; + case 'w': + optw = atoi(optarg); + break; + default: + return (-1); + } + return (0); +} + +int +benchmark_initrun() +{ + debug("benchmark_initrun\n"); + return (0); +} + +int +benchmark_initworker(void *tsd) +{ + /* + * initialize your state variables here first + */ + tsd_t *state = (tsd_t *)tsd; + state->xfer = optm; + state->bytes = opts; + state->parallel = lm_optP; + state->warmup = optw; + state->repetitions = lm_optB; + debug("benchmark_initworker: repetitions = %i\n",state->repetitions); + return (0); +} + +/*ARGSUSED*/ +int +benchmark_initbatch(void *tsd) +{ + tsd_t *state = (tsd_t *)tsd; + + state->buf = valloc(XFERSIZE); + touch(state->buf, XFERSIZE); + state->initerr = 0; + if (socketpair(AF_UNIX, SOCK_STREAM, 0, state->pipes) == -1) { + perror("socketpair"); + state->initerr = 1; + return(0); + } + if (pipe(state->control) == -1) { + perror("pipe"); + state->initerr = 2; + return(0); + } +// handle_scheduler(benchmp_childid(), 0, 1); + switch (state->pid = fork()) { + case 0: +// handle_scheduler(benchmp_childid(), 1, 1); + close(state->control[1]); + close(state->pipes[0]); + writer(state->control[0], state->pipes[1], state->buf, state); + return (0); + /*NOTREACHED*/ + + case -1: + perror("fork"); + state->initerr = 3; + return (0); + /*NOTREACHED*/ + + default: + break; + } + close(state->control[0]); + close(state->pipes[1]); + return (0); +} + +int +benchmark(void *tsd, result_t *res) +{ + /* + * try not to initialize things here. This is the main + * loop of things to get timed. Start a server in + * benchmark_initbatch + */ + tsd_t *state = (tsd_t *)tsd; + size_t done, n; + size_t todo = state->bytes; + int i; + + debug("in to benchmark - optB = %i : repetitions = %i\n", lm_optB, state->repetitions); + for (i = 0; i < lm_optB; i++) { + write(state->control[1], &todo, sizeof(todo)); + for (done = 0; done < todo; done += n) { + if ((n = read(state->pipes[0], state->buf, state->xfer)) <= 0) { + /* error! */ + debug("error (n = %d) exiting now\n", n); + exit(1); + } + } + } + res->re_count = i; + debug("out of benchmark - optB = %i : repetitions = %i\n", lm_optB, state->repetitions); + + return (0); +} + +int +benchmark_finibatch(void *tsd) +{ + tsd_t *state = (tsd_t *)tsd; + + close(state->control[1]); + close(state->pipes[0]); + if (state->pid > 0) { + kill(state->pid, SIGKILL); + waitpid(state->pid, NULL, 0); + } + state->pid = 0; + return (0); +} + +int +benchmark_finiworker(void *tsd) +{ + tsd_t *ts = (tsd_t *)tsd; + // useless code to show what you can do. + ts->repetitions++; + ts->repetitions--; + debug("benchmark_finiworker: repetitions = %i\n",ts->repetitions); + return (0); +} + +char * +benchmark_result() +{ + static char result = '\0'; + debug("benchmark_result\n"); + return (&result); +} + +int +benchmark_finirun() +{ + debug("benchmark_finirun\n"); + return (0); +} + + +int +benchmark_fini() +{ + debug("benchmark_fini\n"); + return (0); +} + +/* + * functions from bw_unix.c + */ +void +writer(int controlfd, int writefd, char* buf, void* cookie) +{ + size_t todo, n, done; + tsd_t *state = (tsd_t *)cookie; + + for ( ;; ) { + read(controlfd, &todo, sizeof(todo)); + for (done = 0; done < todo; done += n) { +#ifdef TOUCH + touch(buf, XFERSIZE); +#endif + if ((n = write(writefd, buf, state->xfer)) < 0) { + /* error! */ + exit(1); + } + } + } +} + +void +touch(char *buf, int nbytes) +{ + static int psize; + + if (!psize) { + psize = getpagesize(); + } + while (nbytes > 0) { + *buf = 1; + buf += psize; + nbytes -= psize; + } +} + diff --git a/tools/tests/libMicro/apple/lmbench_fstat.c b/tools/tests/libMicro/apple/lmbench_fstat.c new file mode 100644 index 000000000..f417005e5 --- /dev/null +++ b/tools/tests/libMicro/apple/lmbench_fstat.c @@ -0,0 +1,133 @@ +/* + * Copyright (c) 2006 Apple Inc. All Rights Reserved. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. The rights granted to you under the License + * may not be used to create, or enable the creation or redistribution of, + * unlawful or unlicensed copies of an Apple operating system, or to + * circumvent, violate, or enable the circumvention or violation of, any + * terms of an Apple operating system software license agreement. + * + * Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ + */ + + +#ifdef __sun +#pragma ident "@(#)lmbench_fstat.c 1.4 06/21/06 Apple Inc." +#endif + + +#include +#include +#include +#include +#include +#include + +#include "../libmicro.h" + +typedef struct { + char *ts_buf; + int ts_fd; +} tsd_t; + + +#define DEFF "/dev/null" +#define DEFS 1024 + +static char *optf = DEFF; +// static long long opts = DEFS; + +int +benchmark_init() +{ + + (void) sprintf(lm_optstr, "f:"); + + lm_tsdsize = 0; + + (void) sprintf(lm_usage, + " [-f file-to-stat (default %s)]\n" + "notes: measures lmbench_fstat()\n", + DEFF); + + return (0); +} + +int +benchmark_optswitch(int opt, char *optarg) +{ + switch (opt) { + case 'f': + optf = optarg; + break; + default: + return (-1); + } + return (0); +} + +/* + * This initbatch stolen from lmbench_read.c + */ +int +benchmark_initbatch(void *tsd) +{ + tsd_t *ts = (tsd_t *)tsd; + + if (ts->ts_buf == NULL) { + /* ts->ts_buf = malloc(opts); */ + ts->ts_fd = open(optf, O_RDONLY); + } + + /* (void) lseek(ts->ts_fd, 0, SEEK_SET); */ + + return (0); +} + + +/*ARGSUSED*/ +int +benchmark(void *tsd, result_t *res) +{ + int i; + struct stat sbuf; + tsd_t *ts = (tsd_t *)tsd; + + res->re_errors = 0; + +/* + * The libmicro test uses a for loop as below: + * for (i = 0; i < lm_optB; i++) { + * + * we can probably get away with using lm_optB + * in the while loop below + * + */ + i = 0; + +// while (i++ < lm_optB) { + for (i = 0; i < lm_optB; i++) { + if (fstat(ts->ts_fd, &sbuf) == -1) + res->re_errors++; + } + + res->re_count += lm_optB; + + return (0); +} diff --git a/tools/tests/libMicro/apple/lmbench_lat_ctx.c b/tools/tests/libMicro/apple/lmbench_lat_ctx.c new file mode 100644 index 000000000..2cbe790c2 --- /dev/null +++ b/tools/tests/libMicro/apple/lmbench_lat_ctx.c @@ -0,0 +1,634 @@ +/* + * Copyright (c) 2006 Apple Inc. All Rights Reserved. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. The rights granted to you under the License + * may not be used to create, or enable the creation or redistribution of, + * unlawful or unlicensed copies of an Apple operating system, or to + * circumvent, violate, or enable the circumvention or violation of, any + * terms of an Apple operating system software license agreement. + * + * Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ + */ + + +/* + * Order of Execution + * + * benchmark_init + * + * benchmark_optswitch + * + * benchmark_initrun + * + * benchmark_initworker + * benchmark_initbatch + * benchmark + * benchmark_finibatch + * benchmark_initbatch + * benchmark + * benchmark_finibatch, etc. + * benchmark_finiworker + * + * benchmark_result + * + * benchmark_finirun + * + * benchmark_fini + */ + + + +#ifdef __sun +#pragma ident "@(#)trivial.c 1.0 08/17/06 Apple Inc." +#endif + + + +#include +#include +#include + +#include +#include + +#include +#include "../libmicro.h" + +#if 1 +# define debug(fmt, args...) (void) fprintf(stderr, fmt "\n" , ##args) +#else +# define debug(fmt, args...) +#endif + + +#define MAXPROC 2048 +#define CHUNK (4<<10) +#define TRIPS 5 +#ifndef max +#define max(a, b) ((a) > (b) ? (a) : (b)) +#endif + + +/* + * Your state variables should live in the tsd_t struct below + */ +typedef struct { + int process_size; + double overhead; + int procs; + pid_t* pids; + int **p; + void* data; +} tsd_t; + +static int opts = 1; + +void doit(int rd, int wr, int process_size); +int create_pipes(int **p, int procs); +int create_daemons(int **p, pid_t *pids, int procs, int process_size); +void initialize_overhead(void* tsd); +void cleanup_overhead(void* tsd); +void benchmark_overhead(void* tsd); +void initialize(void* tsd); +void cleanup(void* tsd); +long bread(void* buf, long nbytes); + + +#pragma mark *** lmbench routines + +/* + * lmbench routines, etc. brought over for this benchmark + */ + +void +morefds(void) +{ +#ifdef RLIMIT_NOFILE + struct rlimit r; + + getrlimit(RLIMIT_NOFILE, &r); + r.rlim_cur = r.rlim_max; + setrlimit(RLIMIT_NOFILE, &r); +#endif +} + +void +doit(int rd, int wr, int process_size) +{ + int msg; + void* data = NULL; + + if (process_size) { + data = malloc(process_size); + if (data) bzero(data, process_size); + } + for ( ;; ) { + if (read(rd, &msg, sizeof(msg)) != sizeof(msg)) { + debug("read/write on pipe"); + break; + } + bread(data, process_size); + if (write(wr, &msg, sizeof(msg)) != sizeof(msg)) { + debug("read/write on pipe"); + break; + } + } + exit(0); +} + +/* + * Return the number of processors in this host + */ +int +sched_ncpus() +{ +#ifdef MP_NPROCS + /* SGI IRIX interface */ + return sysmp(MP_NPROCS); +#elif defined(HAVE_MPCTL) + /* HP-UX interface */ + return mpctl(MPC_GETNUMSPUS_SYS, 0, 0); +#elif defined(_SC_NPROCESSORS_ONLN) + /* AIX, Solaris, and Linux interface */ + return sysconf(_SC_NPROCESSORS_ONLN); +#elif __APPLE__ + char *name="hw.activecpu"; + int cpus, retval; + size_t len = 4; + retval=sysctlbyname(name, &cpus, &len, NULL, 0); + /* Check retval here */ + debug("cpus = %d retval = %d", cpus, retval); + return cpus; +#endif + return 1; +} + +/* + * Use to get sequentially created processes "far" away from + * each other in an SMP. + * + * XXX: probably doesn't work for NCPUS not a power of two. + */ +int +reverse_bits(int cpu) +{ + int i; + int nbits; + int max = sched_ncpus() - 1; + int cpu_reverse = 0; + + for (i = max>>1, nbits = 1; i > 0; i >>= 1, nbits++) + ; + /* now reverse the bits */ + for (i = 0; i < nbits; i++) { + if (cpu & (1<pids = NULL; + pState->p = (int**)malloc(pState->procs * (sizeof(int*) + 2 * sizeof(int))); + p = (int*)&pState->p[pState->procs]; + for (i = 0; i < pState->procs; ++i) { + pState->p[i] = p; + p += 2; + } + + pState->data = (pState->process_size > 0) ? malloc(pState->process_size) : NULL; + if (pState->data) + bzero(pState->data, pState->process_size); + + procs = create_pipes(pState->p, pState->procs); + if (procs < pState->procs) { + debug("procs < pState->procs"); + cleanup_overhead(cookie); + exit(1); + } +} + +void +cleanup_overhead(void* tsd) +{ + int i; + tsd_t *ts = (tsd_t *)tsd; + + for (i = 0; i < ts->procs; ++i) { + close(ts->p[i][0]); + close(ts->p[i][1]); + } + + free(ts->p); + if (ts->data) free(ts->data); +} + +void +cleanup(void* cookie) +{ + int i; + tsd_t *pState = (tsd_t *)cookie; + + + /* + * Close the pipes and kill the children. + */ + cleanup_overhead(cookie); + for (i = 1; pState->pids && i < pState->procs; ++i) { + if (pState->pids[i] > 0) { + kill(pState->pids[i], SIGKILL); + waitpid(pState->pids[i], NULL, 0); + } + } + if (pState->pids) + free(pState->pids); + pState->pids = NULL; +} + +void +benchmark_overhead(void* tsd) +{ + tsd_t *ts = (tsd_t *)tsd; + int i = 0; + int msg = 1; + + for (i = 0; i < lm_optB; i++) { + if (write(ts->p[i][1], &msg, sizeof(msg)) != sizeof(msg)) { + debug("read/write on pipe"); + exit(1); + } + if (read(ts->p[i][0], &msg, sizeof(msg)) != sizeof(msg)) { + debug("read/write on pipe"); + exit(1); + } + if (++i == ts->procs) { + i = 0; + } + bread(ts->data, ts->process_size); + } +} + +/* analogous to bzero, bcopy, etc., except that it just reads + * data into the processor + */ +long +bread(void* buf, long nbytes) +{ + long sum = 0; + register long *p, *next; + register char *end; + + p = (long*)buf; + end = (char*)buf + nbytes; + for (next = p + 128; (void*)next <= (void*)end; p = next, next += 128) { + sum += + p[0]+p[1]+p[2]+p[3]+p[4]+p[5]+p[6]+p[7]+ + p[8]+p[9]+p[10]+p[11]+p[12]+p[13]+p[14]+ + p[15]+p[16]+p[17]+p[18]+p[19]+p[20]+p[21]+ + p[22]+p[23]+p[24]+p[25]+p[26]+p[27]+p[28]+ + p[29]+p[30]+p[31]+p[32]+p[33]+p[34]+p[35]+ + p[36]+p[37]+p[38]+p[39]+p[40]+p[41]+p[42]+ + p[43]+p[44]+p[45]+p[46]+p[47]+p[48]+p[49]+ + p[50]+p[51]+p[52]+p[53]+p[54]+p[55]+p[56]+ + p[57]+p[58]+p[59]+p[60]+p[61]+p[62]+p[63]+ + p[64]+p[65]+p[66]+p[67]+p[68]+p[69]+p[70]+ + p[71]+p[72]+p[73]+p[74]+p[75]+p[76]+p[77]+ + p[78]+p[79]+p[80]+p[81]+p[82]+p[83]+p[84]+ + p[85]+p[86]+p[87]+p[88]+p[89]+p[90]+p[91]+ + p[92]+p[93]+p[94]+p[95]+p[96]+p[97]+p[98]+ + p[99]+p[100]+p[101]+p[102]+p[103]+p[104]+ + p[105]+p[106]+p[107]+p[108]+p[109]+p[110]+ + p[111]+p[112]+p[113]+p[114]+p[115]+p[116]+ + p[117]+p[118]+p[119]+p[120]+p[121]+p[122]+ + p[123]+p[124]+p[125]+p[126]+p[127]; + } + for (next = p + 16; (void*)next <= (void*)end; p = next, next += 16) { + sum += + p[0]+p[1]+p[2]+p[3]+p[4]+p[5]+p[6]+p[7]+ + p[8]+p[9]+p[10]+p[11]+p[12]+p[13]+p[14]+ + p[15]; + } + for (next = p + 1; (void*)next <= (void*)end; p = next, next++) { + sum += *p; + } + return sum; +} + +#pragma mark *** darbench routines + + +/*ARGSUSED*/ +int +benchmark_initbatch(void *tsd) +{ + /* + * initialize your state variables here second + */ + tsd_t *ts = (tsd_t *)tsd; + int procs; + + initialize_overhead(tsd); + + ts->pids = (pid_t*)malloc(ts->procs * sizeof(pid_t)); + if (ts->pids == NULL) + exit(1); + bzero((void*)ts->pids, ts->procs * sizeof(pid_t)); + procs = create_daemons(ts->p, ts->pids, + ts->procs, ts->process_size); + if (procs < ts->procs) { + cleanup(tsd); + exit(1); + } + return (0); +} + +int +benchmark_finirun() +{ + return (0); +} + +int +benchmark_init() +{ + /* + * the lm_optstr must be defined here or no options for you + * + * ...and the framework will throw an error + * + */ + (void) sprintf(lm_optstr, "s:"); + /* + * working hypothesis: + * + * tsd_t is the struct that we can pass around our + * state info in + * + * lm_tsdsize will allocate the space we need for this + * structure throughout the rest of the framework + */ + lm_tsdsize = sizeof (tsd_t); + + (void) sprintf(lm_usage, + " [-s kbytes]\n" + " processes [processes ...]\n"); + + return (0); +} + +int +benchmark_fini() +{ + return (0); +} + +int +benchmark_finibatch(void *tsd) +{ + tsd_t *ts = (tsd_t *)tsd; + int i; + + /* + * Close the pipes and kill the children. + */ + cleanup_overhead(tsd); + for (i = 1; ts->pids && i < ts->procs; ++i) { + if (ts->pids[i] > 0) { + kill(ts->pids[i], SIGKILL); + waitpid(ts->pids[i], NULL, 0); + } + } + if (ts->pids) + free(ts->pids); + ts->pids = NULL; + return (0); +} + +char * +benchmark_result() +{ + static char result = '\0'; + return (&result); +} + +int +benchmark_finiworker(void *tsd) +{ + return (0); +} + +int +benchmark_optswitch(int opt, char *optarg) +{ + + switch (opt) { + case 's': + opts = sizetoint(optarg); + break; + default: + return (-1); + } + return (0); +} + +int +benchmark_initworker(void *tsd) +{ + tsd_t *ts = (tsd_t *)tsd; + + ts->process_size = opts; + + return (0); +} + +int +benchmark_initrun() +{ + return (0); +} + +int +benchmark(void *tsd, result_t *res) +{ + /* + * initialize your state variables here last + * + * and realize that you are paying for your initialization here + * and it is really a bad idea + */ + tsd_t *ts = (tsd_t *)tsd; + int i; + int msg=1; + + for (i = 0; i < lm_optB; i++) { + if (write(ts->p[0][1], &msg, sizeof(msg)) != + sizeof(msg)) { + debug("read/write on pipe"); + exit(1); + } + if (read(ts->p[ts->procs-1][0], &msg, sizeof(msg)) != sizeof(msg)) { + debug("read/write on pipe"); + exit(1); + } + bread(ts->data, ts->process_size); + } + res->re_count = i; + + return (0); +} diff --git a/tools/tests/libMicro/apple/lmbench_lat_sig_catch.c b/tools/tests/libMicro/apple/lmbench_lat_sig_catch.c new file mode 100644 index 000000000..edca677ee --- /dev/null +++ b/tools/tests/libMicro/apple/lmbench_lat_sig_catch.c @@ -0,0 +1,226 @@ +/* + * Copyright (c) 2006 Apple Inc. All Rights Reserved. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. The rights granted to you under the License + * may not be used to create, or enable the creation or redistribution of, + * unlawful or unlicensed copies of an Apple operating system, or to + * circumvent, violate, or enable the circumvention or violation of, any + * terms of an Apple operating system software license agreement. + * + * Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ + */ + + +/* + * Order of Execution + * + * benchmark_init + * + * benchmark_optswitch + * + * benchmark_initrun + * + * benchmark_initworker + * benchmark_initbatch + * benchmark + * benchmark_finibatch + * benchmark_initbatch + * benchmark + * benchmark_finibatch, etc. + * benchmark_finiworker + * + * benchmark_result + * + * benchmark_finirun + * + * benchmark_fini + */ + + + +#ifdef __sun +#pragma ident "@(#)lmbench_lat_sig_catch.c 1.0 08/16/06 Apple Inc." +#endif + + + +#include +#include +#include +#include +#include + +#include "../libmicro.h" + +/* + * Your state variables should live in the tsd_t struct below + */ + +static int optp = 1; +static int optw = 0; +static int optn = -1; + +u_int64_t caught, n; +double adj; +void handler(int s) { } +jmp_buf prot_env; + +typedef struct { + int pid; +} tsd_t; + +/*ARGSUSED*/ +int +benchmark_initbatch(void *tsd) +{ + /* + * initialize your state variables here second + */ + return (0); +} + +int +benchmark_finirun() +{ + return (0); +} + +int +benchmark_init() +{ + /* + * the lm_optstr must be defined here or no options for you + * + * ...and the framework will throw an error + * + */ + (void) sprintf(lm_optstr, "p:w:n"); + /* + * working hypothesis: + * + * tsd_t is the struct that we can pass around our + * state info in + * + * lm_tsdsize will allocate the space we need for this + * structure throughout the rest of the framework + */ + lm_tsdsize = sizeof (tsd_t); + + (void) sprintf(lm_usage, + " [-p ]\n" + " [-w ]\n" + " [-n ]\n" + "notes: measures lmbench lat_sig install\n"); + lm_defB = 1; + return (0); +} + +int +benchmark_fini() +{ + return (0); +} + +int +benchmark_finibatch(void *tsd) +{ + /* + * more proof of state passing + */ + return (0); +} + +char * +benchmark_result() +{ + static char result = '\0'; + (void) fprintf(stderr, "benchmark_result\n"); + return (&result); +} + +int +benchmark_finiworker(void *tsd) +{ + return (0); +} + +int +benchmark_optswitch(int opt, char *optarg) +{ + + switch (opt) { + case 'w': + optw = sizetoint(optarg); + break; + case 'n': + optn = sizetoint(optarg); + break; + case 'p': + optp = sizetoint(optarg); + break; + default: + return (-1); + } + return (0); +} + +int +benchmark_initworker(void *tsd) +{ + /* + * initialize your state variables here first + */ + tsd_t* ts = (tsd_t*)tsd; + struct sigaction sa, old; + + sa.sa_handler = handler; + (void) sigemptyset(&sa.sa_mask); + sa.sa_flags = 0; + (void) sigaction(SIGUSR1, &sa, &old); + + ts->pid = getpid(); + return (0); +} + +int +benchmark_initrun() +{ + + return (0); +} + +int +benchmark(void *tsd, result_t *res) +{ + /* + * initialize your state variables here last + * + * and realize that you are paying for your initialization here + * and it is really a bad idea + */ + int i; + tsd_t* ts = (tsd_t*)tsd; + + for (i = 0; i < lm_optB; i++) { + (void) kill(ts->pid, SIGUSR1); + } + res->re_count = i; + + return (0); +} diff --git a/tools/tests/libMicro/apple/lmbench_lat_sig_install.c b/tools/tests/libMicro/apple/lmbench_lat_sig_install.c new file mode 100644 index 000000000..ee1acd697 --- /dev/null +++ b/tools/tests/libMicro/apple/lmbench_lat_sig_install.c @@ -0,0 +1,221 @@ +/* + * Copyright (c) 2006 Apple Inc. All Rights Reserved. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. The rights granted to you under the License + * may not be used to create, or enable the creation or redistribution of, + * unlawful or unlicensed copies of an Apple operating system, or to + * circumvent, violate, or enable the circumvention or violation of, any + * terms of an Apple operating system software license agreement. + * + * Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ + */ + + +/* + * Order of Execution + * + * benchmark_init + * + * benchmark_optswitch + * + * benchmark_initrun + * + * benchmark_initworker + * benchmark_initbatch + * benchmark + * benchmark_finibatch + * benchmark_initbatch + * benchmark + * benchmark_finibatch, etc. + * benchmark_finiworker + * + * benchmark_result + * + * benchmark_finirun + * + * benchmark_fini + */ + + + +#ifdef __sun +#pragma ident "@(#)lmbench_lat_sig_install.c 1.0 08/16/06 Apple Inc." +#endif + + + +#include +#include +#include +#include +#include + +#include "../libmicro.h" + +/* + * Your state variables should live in the tsd_t struct below + */ + +static int optp = 1; +static int optw = 0; +static int optn = -1; + +u_int64_t caught, n; +double adj; +void handler(int s) { } +jmp_buf prot_env; + +typedef struct { + int ts_once; +} tsd_t; + +/*ARGSUSED*/ +int +benchmark_initbatch(void *tsd) +{ + /* + * initialize your state variables here second + */ + return (0); +} + +int +benchmark_finirun() +{ + return (0); +} + +int +benchmark_init() +{ + /* + * the lm_optstr must be defined here or no options for you + * + * ...and the framework will throw an error + * + */ + (void) sprintf(lm_optstr, "t:"); + /* + * working hypothesis: + * + * tsd_t is the struct that we can pass around our + * state info in + * + * lm_tsdsize will allocate the space we need for this + * structure throughout the rest of the framework + */ + lm_tsdsize = sizeof (tsd_t); + + (void) sprintf(lm_usage, + " [-p ]\n" + " [-w ]\n" + " [-n ]\n" + "notes: measures lmbench lat_sig install\n"); + lm_defB = 1; + return (0); +} + +int +benchmark_fini() +{ + return (0); +} + +int +benchmark_finibatch(void *tsd) +{ + /* + * more proof of state passing + */ + return (0); +} + +char * +benchmark_result() +{ + static char result = '\0'; + (void) fprintf(stderr, "benchmark_result\n"); + return (&result); +} + +int +benchmark_finiworker(void *tsd) +{ + return (0); +} + +int +benchmark_optswitch(int opt, char *optarg) +{ + + switch (opt) { + case 'w': + optw = sizetoint(optarg); + break; + case 'n': + optn = sizetoint(optarg); + break; + case 'p': + optp = sizetoint(optarg); + break; + default: + return (-1); + } + return (0); +} + +int +benchmark_initworker(void *tsd) +{ + /* + * initialize your state variables here first + */ + + + return (0); +} + +int +benchmark_initrun() +{ + return (0); +} + +int +benchmark(void *tsd, result_t *res) +{ + /* + * initialize your state variables here last + * + * and realize that you are paying for your initialization here + * and it is really a bad idea + */ + struct sigaction sa, old; + int i; + + for (i = 0; i < lm_optB; i++) { + sa.sa_handler = handler; + sigemptyset(&sa.sa_mask); + sa.sa_flags = 0; + sigaction(SIGUSR1, &sa, &old); + } + res->re_count = i; + + return (0); +} diff --git a/tools/tests/libMicro/apple/lmbench_lat_sig_prot.c b/tools/tests/libMicro/apple/lmbench_lat_sig_prot.c new file mode 100644 index 000000000..b2612e9a4 --- /dev/null +++ b/tools/tests/libMicro/apple/lmbench_lat_sig_prot.c @@ -0,0 +1,281 @@ +/* + * Copyright (c) 2006 Apple Inc. All Rights Reserved. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. The rights granted to you under the License + * may not be used to create, or enable the creation or redistribution of, + * unlawful or unlicensed copies of an Apple operating system, or to + * circumvent, violate, or enable the circumvention or violation of, any + * terms of an Apple operating system software license agreement. + * + * Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ + */ + + +/* + * Order of Execution + * + * benchmark_init + * + * benchmark_optswitch + * + * benchmark_initrun + * + * benchmark_initworker + * benchmark_initbatch + * benchmark + * benchmark_finibatch + * benchmark_initbatch + * benchmark + * benchmark_finibatch, etc. + * benchmark_finiworker + * + * benchmark_result + * + * benchmark_finirun + * + * benchmark_fini + */ + + + +#ifdef __sun +#pragma ident "@(#)lmbench_lat_sig_prot.c 1.0 08/16/06 Apple Inc." +#endif + + + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +#include "../libmicro.h" + +/* + * Your state variables should live in the tsd_t struct below + */ + +static int optp = 1; +static int optw = 0; +static int optn = -1; +static char *optf = "/Volumes/data/darbench/bin-i386/lmbench_lat_sig_prot"; +static int *mappedfile; +jmp_buf jumper; + +u_int64_t caught, n; +double adj; +void handler(int s) { } +jmp_buf prot_env; + +typedef struct { + char* fname; + char* where; +} tsd_t; + + +void +prot(int s) +{ + _longjmp(jumper, s); + +} + + +/*ARGSUSED*/ +int +benchmark_initbatch(void *tsd) +{ + /* + * initialize your state variables here second + */ + (void) fprintf(stderr, "benchmark_initbatch: entry\n"); + + return (0); +} + +int +benchmark_finirun() +{ + return (0); +} + +int +benchmark_init() +{ + /* + * the lm_optstr must be defined here or no options for you + * + * ...and the framework will throw an error + * + */ + (void) sprintf(lm_optstr, "p:w:n:f"); + /* + * working hypothesis: + * + * tsd_t is the struct that we can pass around our + * state info in + * + * lm_tsdsize will allocate the space we need for this + * structure throughout the rest of the framework + */ + lm_tsdsize = sizeof (tsd_t); + + (void) sprintf(lm_usage, + " [-p ]\n" + " [-w ]\n" + " [-n ]\n" + " [-f ]\n" + "notes: measures lmbench lat_sig prot\n"); + (void) fprintf(stderr, "benchmark_init: entry\n"); + lm_defB = 1; + return (0); +} + +int +benchmark_fini() +{ + return (0); +} + +int +benchmark_finibatch(void *tsd) +{ + /* + * more proof of state passing + */ + return (0); +} + +char * +benchmark_result() +{ + static char result = '\0'; + (void) fprintf(stderr, "benchmark_result\n"); + return (&result); +} + +int +benchmark_finiworker(void *tsd) +{ + return (0); +} + +int +benchmark_optswitch(int opt, char *optarg) +{ + (void) fprintf(stderr, "benchmark_optswitch: entry\n"); + + switch (opt) { + case 'w': + optw = sizetoint(optarg); + break; + case 'n': + optn = sizetoint(optarg); + break; + case 'p': + optp = sizetoint(optarg); + break; + case 'f': + (void) fprintf(stderr, "benchmark_optswitch: FILENAME entry = %s\n",optf); + //strcpy(optf, optarg); + (void) fprintf(stderr, "benchmark_optswitch: FILENAME exit\n"); + break; + default: + return (-1); + } + (void) fprintf(stderr, "benchmark_optswitch: exit\n"); + return (0); +} + +int +benchmark_initworker(void *tsd) +{ + /* + * initialize your state variables here first + */ + tsd_t* ts = (tsd_t*)tsd; + int fd; + struct sigaction sa; + (void) fprintf(stderr, "benchmark_initworker: entry = %s\n",optf); + + ts->fname = optf; + fd = open(ts->fname, 0); + (void) fprintf(stderr, "benchmark_initworker: open result is %i\n",fd); + (void) fprintf(stderr, "benchmark_initworker: errno result is %d - \"%s\"\n",errno, strerror(errno)); + + ts->where = mmap(0,4096, PROT_READ, MAP_SHARED, fd, 0); + (void) fprintf(stderr, "benchmark_initworker: mmap result is %i\n",ts->where); + *mappedfile = (int) ts->where; + (void) fprintf(stderr, "benchmark_initworker: mappedfile result is %i\n",*mappedfile); + + if ((long)ts->where == -1) { + perror("mmap"); + exit(1); + } + + sa.sa_handler = prot; + sigemptyset(&sa.sa_mask); + sa.sa_flags = 0; + sigaction(SIGSEGV, &sa, 0); + sigaction(SIGBUS, &sa, 0); + + + caught = 0; + n = lm_optB; + return (0); +} + +int +benchmark_initrun() +{ + (void) fprintf(stderr, "benchmark_initrun: entry\n"); + + return (0); +} + +int +benchmark(void *tsd, result_t *res) +{ + /* + * initialize your state variables here last + * + * and realize that you are paying for your initialization here + * and it is really a bad idea + */ + int i; + //tsd_t* ts = (tsd_t*)tsd; + (void) fprintf(stderr, "benchmark: lm_optB = %i\n",lm_optB); + for (i = 0; i < lm_optB; i++) { + if (_setjmp(jumper) == 0) { + + *mappedfile= 1; + } + } + res->re_count = i; + + return (0); +} diff --git a/tools/tests/libMicro/apple/lmbench_lat_sig_send.c b/tools/tests/libMicro/apple/lmbench_lat_sig_send.c new file mode 100644 index 000000000..9797bc42f --- /dev/null +++ b/tools/tests/libMicro/apple/lmbench_lat_sig_send.c @@ -0,0 +1,219 @@ +/* + * Copyright (c) 2006 Apple Inc. All Rights Reserved. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. The rights granted to you under the License + * may not be used to create, or enable the creation or redistribution of, + * unlawful or unlicensed copies of an Apple operating system, or to + * circumvent, violate, or enable the circumvention or violation of, any + * terms of an Apple operating system software license agreement. + * + * Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ + */ + + +/* + * Order of Execution + * + * benchmark_init + * + * benchmark_optswitch + * + * benchmark_initrun + * + * benchmark_initworker + * benchmark_initbatch + * benchmark + * benchmark_finibatch + * benchmark_initbatch + * benchmark + * benchmark_finibatch, etc. + * benchmark_finiworker + * + * benchmark_result + * + * benchmark_finirun + * + * benchmark_fini + */ + + + +#ifdef __sun +#pragma ident "@(#)lmbench_lat_sig_catch.c 1.0 08/16/06 Apple Inc." +#endif + + + +#include +#include +#include +#include +#include + +#include "../libmicro.h" + +/* + * Your state variables should live in the tsd_t struct below + */ + +static int optp = 1; +static int optw = 0; +static int optn = -1; + +u_int64_t caught, n; +double adj; +void handler(int s) { } +jmp_buf prot_env; + +typedef struct { + int pid; +} tsd_t; + +/*ARGSUSED*/ +int +benchmark_initbatch(void *tsd) +{ + /* + * initialize your state variables here second + */ + return (0); +} + +int +benchmark_finirun() +{ + return (0); +} + +int +benchmark_init() +{ + /* + * the lm_optstr must be defined here or no options for you + * + * ...and the framework will throw an error + * + */ + (void) sprintf(lm_optstr, "p:w:n"); + /* + * working hypothesis: + * + * tsd_t is the struct that we can pass around our + * state info in + * + * lm_tsdsize will allocate the space we need for this + * structure throughout the rest of the framework + */ + lm_tsdsize = sizeof (tsd_t); + + (void) sprintf(lm_usage, + " [-p ]\n" + " [-w ]\n" + " [-n ]\n" + "notes: measures lmbench lat_sig install\n"); + lm_defB = 1; + return (0); +} + +int +benchmark_fini() +{ + return (0); +} + +int +benchmark_finibatch(void *tsd) +{ + /* + * more proof of state passing + */ + return (0); +} + +char * +benchmark_result() +{ + static char result = '\0'; + (void) fprintf(stderr, "benchmark_result\n"); + return (&result); +} + +int +benchmark_finiworker(void *tsd) +{ + return (0); +} + +int +benchmark_optswitch(int opt, char *optarg) +{ + + switch (opt) { + case 'w': + optw = sizetoint(optarg); + break; + case 'n': + optn = sizetoint(optarg); + break; + case 'p': + optp = sizetoint(optarg); + break; + default: + return (-1); + } + return (0); +} + +int +benchmark_initworker(void *tsd) +{ + /* + * initialize your state variables here first + */ + tsd_t* ts = (tsd_t*)tsd; + ts->pid = getpid(); + return (0); +} + +int +benchmark_initrun() +{ + + return (0); +} + +int +benchmark(void *tsd, result_t *res) +{ + /* + * initialize your state variables here last + * + * and realize that you are paying for your initialization here + * and it is really a bad idea + */ + int i; + tsd_t* ts = (tsd_t*)tsd; + + for (i = 0; i < lm_optB; i++) { + (void) kill(ts->pid, 0); + } + res->re_count = i; + + return (0); +} diff --git a/bsd/dev/i386/cons.h b/tools/tests/libMicro/apple/lmbench_openclose.c similarity index 54% rename from bsd/dev/i386/cons.h rename to tools/tests/libMicro/apple/lmbench_openclose.c index 3ea2d28d6..f25154cd6 100644 --- a/bsd/dev/i386/cons.h +++ b/tools/tests/libMicro/apple/lmbench_openclose.c @@ -1,6 +1,6 @@ /* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. - * + * Copyright (c) 2006 Apple Inc. All Rights Reserved. + * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * * This file contains Original Code and/or Modifications of Original Code @@ -25,39 +25,82 @@ * * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ */ -/* - * Copyright (c) 1987 NeXT, Inc. - */ + -struct consdev { - char *cn_name; /* name of device in dev_name_list */ - int (*cn_probe)(void); /* probe hardware, fill consdev info */ - int (*cn_init)(void); /* turn on as console */ - int (*cn_getc)(void); /* kernel getchar interface */ - int (*cn_putc)(void); /* kernel putchar interface */ - struct tty *cn_tp; /* tty structure for console device */ - dev_t cn_dev; /* major/minor of device */ - short cn_pri; /* pecking order; higher the better */ -}; - -/* values for cn_pri - reflect our policy for console selection */ -#define CN_DEAD 0 /* device doesn't exist */ -#define CN_NORMAL 1 /* device exists but is nothing special */ -#define CN_INTERNAL 2 /* "internal" bit-mapped display */ -#define CN_REMOTE 3 /* serial interface with remote bit set */ - -/* XXX */ -#define CONSMAJOR 0 - -#ifdef KERNEL +#ifdef __sun +#pragma ident "@(#)lmbench_openclose.c 1.4 06/21/06 Apple Inc." +#endif + +#include +#include +#include #include +#include +#include -extern struct consdev constab[]; -extern struct consdev *cn_tab; -extern struct tty *cn_tty; +#include "../libmicro.h" -extern struct tty cons; -extern struct tty *constty; /* current console device */ -#endif +#define DEFF "/dev/null" +static char *optf = DEFF; + +int +benchmark_init() +{ + + (void) sprintf(lm_optstr, "f:"); + + lm_tsdsize = 0; + + (void) sprintf(lm_usage, + " [-f file-to-stat (default %s)]\n" + "notes: measures stat()\n", + DEFF); + + return (0); +} + +int +benchmark_optswitch(int opt, char *optarg) +{ + switch (opt) { + case 'f': + optf = optarg; + break; + default: + return (-1); + } + return (0); +} + +/*ARGSUSED*/ +int +benchmark(void *tsd, result_t *res) +{ + int i; + int fd; + + res->re_errors = 0; + +/* + * The libmicro test uses a for loop as below: + * for (i = 0; i < lm_optB; i++) { + * + * we can probably get away with using lm_optB + * in the while loop below + * + */ + i = 0; + + while (i++ < lm_optB) { + fd = open(optf, 0); + if (fd == -1) { + res->re_errors++; + } + close(fd); + } + + res->re_count += lm_optB; + return (0); +} diff --git a/tools/tests/libMicro/apple/lmbench_read.c b/tools/tests/libMicro/apple/lmbench_read.c new file mode 100644 index 000000000..15225ddcd --- /dev/null +++ b/tools/tests/libMicro/apple/lmbench_read.c @@ -0,0 +1,156 @@ +/* + * Copyright (c) 2006 Apple Inc. All Rights Reserved. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. The rights granted to you under the License + * may not be used to create, or enable the creation or redistribution of, + * unlawful or unlicensed copies of an Apple operating system, or to + * circumvent, violate, or enable the circumvention or violation of, any + * terms of an Apple operating system software license agreement. + * + * Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ + */ + + +#ifdef __sun +#pragma ident "@(#)lmbench_read.c 1.5 05/08/04 Apple Inc." +#endif + + +#ifdef linux +#define _XOPEN_SOURCE 500 +#endif + +#include +#include +#include +#include + +#include "../libmicro.h" + +typedef struct { + char *ts_buf; + int ts_fd; +} tsd_t; + +#define DEFF "/dev/zero" +#define DEFS 1024 + +static char *optf = DEFF; +static long long opts = DEFS; +int optw = 0; + +int +benchmark_init() +{ + lm_tsdsize = sizeof (tsd_t); + + (void) sprintf(lm_optstr, "f:s:w"); + + (void) sprintf(lm_usage, + " [-f file-to-read (default %s)]\n" + " [-s buffer-size (default %d)]\n" + " [-w (store a byte to each page after read)]\n" + "notes: measures lmbench_read()\n", + DEFF, DEFS); + + (void) sprintf(lm_header, "%8s", "size"); + + lm_defB = 1; + + return (0); +} + +int +benchmark_optswitch(int opt, char *optarg) +{ + switch (opt) { + case 'w': + optw = getpagesize(); + break; + case 'f': + optf = optarg; + break; + case 's': + opts = sizetoll(optarg); + break; + default: + return (-1); + } + return (0); +} + +int +benchmark_initrun() +{ + return (0); +} + +int +benchmark_initbatch(void *tsd) +{ + tsd_t *ts = (tsd_t *)tsd; + + if (ts->ts_buf == NULL) { + ts->ts_buf = malloc(opts); + ts->ts_fd = open(optf, O_RDONLY); + } + + (void) lseek(ts->ts_fd, 0, SEEK_SET); + + return (0); +} + +int +benchmark(void *tsd, result_t *res) +{ + tsd_t *ts = (tsd_t *)tsd; + int i; + int j; +/* + * The libmicro test uses a for loop as below: + * for (i = 0; i < lm_optB; i++) { + * + * we can probably get away with using lm_optB + * in the while loop below + * + */ + i = 0; + + while (i++ < lm_optB) { + if (read(ts->ts_fd, ts->ts_buf, opts) != opts) { + res->re_errors++; + } + if (optw) + for (j = 0; j < opts; j += optw) + ts->ts_buf[j] = 0; + } + res->re_count = i; + + return (0); +} + +char * +benchmark_result() +{ + static char result[256]; + + (void) sprintf(result, "%8lld", opts); + + return (result); +} diff --git a/tools/tests/libMicro/apple/lmbench_select_file.c b/tools/tests/libMicro/apple/lmbench_select_file.c new file mode 100644 index 000000000..cdad43ec1 --- /dev/null +++ b/tools/tests/libMicro/apple/lmbench_select_file.c @@ -0,0 +1,408 @@ +/* + * Copyright (c) 2006 Apple Inc. All Rights Reserved. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. The rights granted to you under the License + * may not be used to create, or enable the creation or redistribution of, + * unlawful or unlicensed copies of an Apple operating system, or to + * circumvent, violate, or enable the circumvention or violation of, any + * terms of an Apple operating system software license agreement. + * + * Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ + */ + + +/* + * Order of Execution + * + * benchmark_init + * + * benchmark_optswitch + * + * benchmark_initrun + * + * benchmark_initworker + * benchmark_initbatch + * benchmark + * benchmark_finibatch + * benchmark_initbatch + * benchmark + * benchmark_finibatch, etc. + * benchmark_finiworker + * + * benchmark_result + * + * benchmark_finirun + * + * benchmark_fini + */ + + + +#ifdef __sun +#pragma ident "@(#)socket.c 1.3 05/08/04 Apple Inc." +#endif + + + +#include +#include +#include +#include +#include +#include + +#include "../libmicro.h" + +/* + * lmbench routines, etc. brought over for this benchmark + */ +int open_file(void* tsd); +void server(void* tsd); + + +typedef int (*open_f)(void* tsd); +/* + * end of lmbench support routines + */ + +/* + * Your state variables should live in the tsd_t struct below + */ +typedef struct { + char fname[L_tmpnam]; + open_f fid_f; + pid_t pid; + int sock; + int fid; + int num; + int max; + fd_set set; +} tsd_t; + +static int optt = 1; +static int optn = -1; +static int optp = 1; +static int optw = 0; + +/* + * lmbench routines, etc. brought over for this benchmark + */ + +void +morefds(void) +{ +#ifdef RLIMIT_NOFILE + struct rlimit r; + + getrlimit(RLIMIT_NOFILE, &r); + r.rlim_cur = r.rlim_max; + setrlimit(RLIMIT_NOFILE, &r); +#endif +} + +int +open_file(void* tsd) +{ + tsd_t* ts = (tsd_t*)tsd; + //(void) fprintf(stderr, "open_file: ts->fname = %s\n",ts->fname); + return (int) open(ts->fname, O_RDONLY); +} + +void +server(void* tsd) +{ + int pid; + tsd_t* ts = (tsd_t*)tsd; + + pid = getpid(); + ts->pid = 0; + //(void) fprintf(stderr, "server: state->fid_f = %i\n",ts->fid_f); + + if (ts->fid_f == open_file) { + /* Create a temporary file for clients to open */ + sprintf(ts->fname, "/tmp/lat_selectXXXXXX"); + //(void) fprintf(stderr, "server: ts->fname = %s\n",ts->fname); + ts->fid = mkstemp(ts->fname); + //(void) fprintf(stderr, "server: ts->fname = %s: ts->fid = %d\n",ts->fname, ts->fid); + + if (ts->fid <= 0) { + char buf[L_tmpnam+128]; + sprintf(buf, "lat_select: Could not create temp file %s", ts->fname); + perror(buf); + exit(1); + } + close(ts->fid); + return; + } +// +// this is all for the tcp version of this test only +// +// /* Create a socket for clients to connect to */ +// state->sock = tcp_server(TCP_SELECT, SOCKOPT_REUSE); +// if (state->sock <= 0) { +// perror("lat_select: Could not open tcp server socket"); +// exit(1); +// } + + /* Start a server process to accept client connections */ +// switch(state->pid = fork()) { +// case 0: +// /* child server process */ +// while (pid == getppid()) { +// int newsock = tcp_accept(state->sock, SOCKOPT_NONE); +// read(newsock, &state->fid, 1); +// close(newsock); +// } +// exit(0); +// case -1: +// /* error */ +// perror("lat_select::server(): fork() failed"); +// exit(1); +// default: +// break; +// } +} + + +/* + * end of lmbench support routines + */ + +/*ARGSUSED*/ +int +benchmark_initbatch(void *tsd) +{ + /* + * initialize your state variables here second + */ + return (0); +} + +int +benchmark_finirun() +{ + //(void) fprintf(stderr, "benchmark_finirun\n"); + return (0); +} + +int +benchmark_init() +{ + //(void) fprintf(stderr, "benchmark_init\n"); + /* + * the lm_optstr must be defined here or no options for you + * + * ...and the framework will throw an error + * + */ + (void) sprintf(lm_optstr, "p:w:n:t:"); + /* + * working hypothesis: + * + * tsd_t is the struct that we can pass around our + * state info in + * + * lm_tsdsize will allocate the space we need for this + * structure throughout the rest of the framework + */ + lm_tsdsize = sizeof (tsd_t); + + (void) sprintf(lm_usage, + " [-p parallelism (default 1)]\n" + " [-w warmup (default 0)]\n" + " [-n number of descriptors (default 1)]\n" + " [-t int (default 1)]\n" + "notes: measures lmbench_select_file\n"); + lm_defB = 1; + return (0); +} + +int +benchmark_fini() +{ + //(void) fprintf(stderr, "benchmark_fini\n"); + return (0); +} + +int +benchmark_finibatch(void *tsd) +{ + return (0); +} + +char * +benchmark_result() +{ + static char result = '\0'; + //(void) fprintf(stderr, "benchmark_result\n"); + return (&result); +} + +int +benchmark_finiworker(void *tsd) +{ + tsd_t *ts = (tsd_t *)tsd; + int i; + // pulls in the lmbench cleanup code + //(void) fprintf(stderr, "benchmark_finiworker\n"); + for (i = 0; i <= ts->max; ++i) { + if (FD_ISSET(i, &(ts->set))) + close(i); + } + FD_ZERO(&(ts->set)); + unlink(ts->fname); + return (0); +} + +int +benchmark_optswitch(int opt, char *optarg) +{ + //(void) fprintf(stderr, "benchmark_optswitch\n"); + + switch (opt) { + case 't': + optt = sizetoint(optarg); + break; + case 'n': + optn = sizetoint(optarg); + break; + case 'p': + optp = sizetoint(optarg); + break; + case 'w': + optw = sizetoint(optarg); + break; + default: + return (-1); + } + return (0); +} + +int +benchmark_initworker(void *tsd) +{ + // pulls in code from lmbench main and initialize + int n = 0; + /* + * initialize your state variables here first + */ + tsd_t *ts = (tsd_t *)tsd; + int N, fid, fd; + + /* + * default number of file descriptors + */ + //(void) fprintf(stderr, "benchmark_initworker\n"); + ts->num = 200; + if (optn > 0) { + ts->num = optn; + } + N = ts->num; + //(void) fprintf(stderr, "benchmark_initworker ts->num is %i\n",ts->num); + + /* + * grab more file descriptors + */ + + morefds(); + + ts->fid_f = open_file; + server(ts); + //(void) fprintf(stderr, "benchmark_initworker: returned from server call\n"); + /* + * Initialize function from lmbench + * for this test + */ + fid = (*ts->fid_f)(ts); + //(void) fprintf(stderr, "initworker: fid is %i\n",fid); + if (fid <= 0) { + perror("Could not open device"); + exit(1); + } + ts->max = 0; + FD_ZERO(&(ts->set)); + //(void) fprintf(stderr, "initworker FD_ZERO: ts->set result is %i\n",ts->set); + //(void) fprintf(stderr, "initworker: N is %i\n",N); + for (n = 0; n < N; n++) { + //(void) fprintf(stderr, "benchmark_initworker: in the loop - N is %i: n is %i\n",N, n); + fd = dup(fid); + //(void) fprintf(stderr, "benchmark_initworker: dup result is %i\n",fd); + //(void) fprintf(stderr, "benchmark_initworker: errno result is %d - \"%s\"\n",errno, strerror(errno)); + + if (fd == -1) break; + if (fd > ts->max) + ts->max = fd; + FD_SET(fd, &(ts->set)); + //(void) fprintf(stderr, "initworker FD_SET: ts->set result is %i\n",ts->set); + + } + //(void) fprintf(stderr, "benchmark_initworker: after second macro/loop\n"); + + ts->max++; + close(fid); + //(void) fprintf(stderr, "benchmark_initworker: N is %i: n is %i\n",N, n); + if (n != N) + exit(1); + /* end of initialize function */ + //(void) fprintf(stderr, "benchmark_initworker: about to exit\n"); + return (0); +} + +int +benchmark_initrun() +{ + //(void) fprintf(stderr, "benchmark_initrun\n"); + return (0); +} + +int +benchmark(void *tsd, result_t *res) +{ + /* + * initialize your state variables here last + * + * and realize that you are paying for your initialization here + * and it is really a bad idea + */ + tsd_t *ts = (tsd_t *)tsd; + fd_set nosave; + static struct timeval tv; + + //(void) fprintf(stderr, "benchmark\n"); + + int i; + //int sel_res; + tv.tv_sec = 0; + tv.tv_usec = 0; + + + for (i = 0; i < lm_optB; i++) { + nosave = ts->set; + //(void) fprintf(stderr, "benchmark: nosave is %i\n", nosave); + + select(ts->num, 0, &nosave, 0, &tv); + //(void) fprintf(stderr, "benchmark: select result is %i\n",sel_res); + //(void) fprintf(stderr, "benchmark: errno result is %d - \"%s\"\n",errno, strerror(errno)); + + + } + res->re_count = i; + return (0); +} + diff --git a/tools/tests/libMicro/apple/lmbench_select_tcp.c b/tools/tests/libMicro/apple/lmbench_select_tcp.c new file mode 100644 index 000000000..aea77dae1 --- /dev/null +++ b/tools/tests/libMicro/apple/lmbench_select_tcp.c @@ -0,0 +1,622 @@ +/* + * Copyright (c) 2006 Apple Inc. All Rights Reserved. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. The rights granted to you under the License + * may not be used to create, or enable the creation or redistribution of, + * unlawful or unlicensed copies of an Apple operating system, or to + * circumvent, violate, or enable the circumvention or violation of, any + * terms of an Apple operating system software license agreement. + * + * Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ + */ + + +/* + * Order of Execution + * + * benchmark_init + * + * benchmark_optswitch + * + * benchmark_initrun + * + * benchmark_initworker + * benchmark_initbatch + * benchmark + * benchmark_finibatch + * benchmark_initbatch + * benchmark + * benchmark_finibatch, etc. + * benchmark_finiworker + * + * benchmark_result + * + * benchmark_finirun + * + * benchmark_fini + */ + + + +#ifdef __sun +#pragma ident "@(#)socket.c 1.3 05/08/04 Apple Inc." +#endif + + + +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "../libmicro.h" + +/* + * lmbench routines, etc. brought over for this benchmark + */ +int open_file(void* tsd); +void server(void* tsd); +int tcp_accept(int sock, int rdwr); +void sock_optimize(int sock, int flags); +int sockport(int s); +int tcp_server(int prog, int rdwr); +int tcp_connect(char *host, int prog, int rdwr); +int open_socket(void *tsd); + + +typedef int (*open_f)(void* tsd); +/* + * end of lmbench support routines + */ + +/* + * Your state variables should live in the tsd_t struct below + */ +typedef struct { + char fname[L_tmpnam]; + open_f fid_f; + pid_t pid; + int sock; + int fid; + int num; + int max; + fd_set set; +} tsd_t; + +static int optt = 1; +static int optn = -1; +static int optp = 1; +static int optw = 0; + +/* + * lmbench routines, etc. brought over for this benchmark + */ + +void +morefds(void) +{ +#ifdef RLIMIT_NOFILE + struct rlimit r; + + getrlimit(RLIMIT_NOFILE, &r); + r.rlim_cur = r.rlim_max; + setrlimit(RLIMIT_NOFILE, &r); +#endif +} + +int +open_file(void* tsd) +{ + tsd_t* ts = (tsd_t*)tsd; + return (int) open(ts->fname, O_RDONLY); +} + +int +open_socket(void* tsd) +{ + return tcp_connect("localhost", TCP_SELECT, SOCKOPT_NONE); +} + +void +server(void* tsd) +{ + int pid; + tsd_t *ts = (tsd_t *)tsd; + + pid = getpid(); + ts->pid = 0; + + if (ts->fid_f == open_file) { + /* Create a temporary file for clients to open */ + sprintf(ts->fname, "lat_selectXXXXXX"); + ts->fid = mkstemp(ts->fname); + if (ts->fid <= 0) { + char buf[L_tmpnam+128]; + sprintf(buf, "lat_select: Could not create temp file %s", ts->fname); + perror(buf); + exit(1); + } + close(ts->fid); + return; + } + + /* Create a socket for clients to connect to */ + ts->sock = tcp_server(TCP_SELECT, SOCKOPT_REUSE); + if (ts->sock <= 0) { + perror("lat_select: Could not open tcp server socket"); + exit(1); + } + + /* Start a server process to accept client connections */ + switch(ts->pid = fork()) { + case 0: + /* child server process */ + while (pid == getppid()) { + int newsock = tcp_accept(ts->sock, SOCKOPT_NONE); + read(newsock, &ts->fid, 1); + close(newsock); + } + exit(0); + case -1: + /* error */ + perror("lat_select::server(): fork() failed"); + exit(1); + default: + break; + } +} + + +/* + * Accept a connection and return it + */ +int +tcp_accept(int sock, int rdwr) +{ + struct sockaddr_in s; + int newsock; + socklen_t namelen; + + namelen = sizeof(s); + bzero((void*)&s, namelen); + +retry: + if ((newsock = accept(sock, (struct sockaddr*)&s, &namelen)) < 0) { + if (errno == EINTR) + goto retry; + perror("accept"); + exit(6); + } +#ifdef LIBTCP_VERBOSE + fprintf(stderr, "Server newsock port %d\n", sockport(newsock)); +#endif + sock_optimize(newsock, rdwr); + return (newsock); +} + +void +sock_optimize(int sock, int flags) +{ + if (flags & SOCKOPT_READ) { + int sockbuf = SOCKBUF; + + while (setsockopt(sock, SOL_SOCKET, SO_RCVBUF, &sockbuf, + sizeof(int))) { + sockbuf >>= 1; + } +#ifdef LIBTCP_VERBOSE + fprintf(stderr, "sockopt %d: RCV: %dK\n", sock, sockbuf>>10); +#endif + } + if (flags & SOCKOPT_WRITE) { + int sockbuf = SOCKBUF; + + while (setsockopt(sock, SOL_SOCKET, SO_SNDBUF, &sockbuf, + sizeof(int))) { + sockbuf >>= 1; + } +#ifdef LIBTCP_VERBOSE + fprintf(stderr, "sockopt %d: SND: %dK\n", sock, sockbuf>>10); +#endif + } + if (flags & SOCKOPT_REUSE) { + int val = 1; + if (setsockopt(sock, SOL_SOCKET, + SO_REUSEADDR, &val, sizeof(val)) == -1) { + perror("SO_REUSEADDR"); + } + } +} + +int +sockport(int s) +{ + socklen_t namelen; + struct sockaddr_in sin; + + namelen = sizeof(sin); + if (getsockname(s, (struct sockaddr *)&sin, &namelen) < 0) { + perror("getsockname"); + return(-1); + } + return ((int)ntohs(sin.sin_port)); +} + +/* + * Get a TCP socket, bind it, figure out the port, + * and advertise the port as program "prog". + * + * XXX - it would be nice if you could advertise ascii strings. + */ +int +tcp_server(int prog, int rdwr) +{ + int sock; + struct sockaddr_in s; + +#ifdef LIBTCP_VERBOSE + fprintf(stderr, "tcp_server(%u, %u)\n", prog, rdwr); +#endif + if ((sock = socket(AF_INET, SOCK_STREAM, IPPROTO_TCP)) < 0) { + perror("socket"); + exit(1); + } + sock_optimize(sock, rdwr); + bzero((void*)&s, sizeof(s)); + s.sin_family = AF_INET; + if (prog < 0) { + s.sin_port = htons(-prog); + } + if (bind(sock, (struct sockaddr*)&s, sizeof(s)) < 0) { + perror("bind"); + exit(2); + } + if (listen(sock, 100) < 0) { + perror("listen"); + exit(4); + } + if (prog > 0) { +#ifdef LIBTCP_VERBOSE + fprintf(stderr, "Server port %d\n", sockport(sock)); +#endif + (void)pmap_unset((u_long)prog, (u_long)1); + if (!pmap_set((u_long)prog, (u_long)1, (u_long)IPPROTO_TCP, + (unsigned short)sockport(sock))) { + perror("pmap_set"); + exit(5); + } + } + return (sock); +} + + +/* + * Connect to the TCP socket advertised as "prog" on "host" and + * return the connected socket. + * + * Hacked Thu Oct 27 1994 to cache pmap_getport calls. This saves + * about 4000 usecs in loopback lat_connect calls. I suppose we + * should time gethostbyname() & pmap_getprot(), huh? + */ +int +tcp_connect(char *host, int prog, int rdwr) +{ + static struct hostent *h; + static struct sockaddr_in s; + static u_short save_port; + static u_long save_prog; + static char *save_host; + int sock; + static int tries = 0; + + if ((sock = socket(AF_INET, SOCK_STREAM, IPPROTO_TCP)) < 0) { + perror("socket"); + exit(1); + } + if (rdwr & SOCKOPT_PID) { + static unsigned short port; + struct sockaddr_in sin; + + if (!port) { + port = (unsigned short)(getpid() << 4); + if (port < 1024) { + port += 1024; + } + } + do { + port++; + bzero((void*)&sin, sizeof(sin)); + sin.sin_family = AF_INET; + sin.sin_port = htons(port); + } while (bind(sock, (struct sockaddr*)&sin, sizeof(sin)) == -1); + } +#ifdef LIBTCP_VERBOSE + else { + struct sockaddr_in sin; + + bzero((void*)&sin, sizeof(sin)); + sin.sin_family = AF_INET; + if (bind(sock, (struct sockaddr*)&sin, sizeof(sin)) < 0) { + perror("bind"); + exit(2); + } + } + fprintf(stderr, "Client port %d\n", sockport(sock)); +#endif + sock_optimize(sock, rdwr); + if (!h || host != save_host || prog != save_prog) { + save_host = host; /* XXX - counting on them not + * changing it - benchmark only. + */ + save_prog = prog; + if (!(h = gethostbyname(host))) { + perror(host); + exit(2); + } + bzero((void *) &s, sizeof(s)); + s.sin_family = AF_INET; + bcopy((void*)h->h_addr, (void *)&s.sin_addr, h->h_length); + if (prog > 0) { + save_port = pmap_getport(&s, prog, + (u_long)1, IPPROTO_TCP); + if (!save_port) { + perror("lib TCP: No port found"); + exit(3); + } +#ifdef LIBTCP_VERBOSE + fprintf(stderr, "Server port %d\n", save_port); +#endif + s.sin_port = htons(save_port); + } else { + s.sin_port = htons(-prog); + } + } + if (connect(sock, (struct sockaddr*)&s, sizeof(s)) < 0) { + if (errno == ECONNRESET + || errno == ECONNREFUSED + || errno == EAGAIN) { + close(sock); + if (++tries > 10) return(-1); + return (tcp_connect(host, prog, rdwr)); + } + perror("connect"); + exit(4); + } + tries = 0; + return (sock); +} + + +/* + * end of lmbench support routines + */ + +/*ARGSUSED*/ +int +benchmark_initbatch(void *tsd) +{ + /* + * initialize your state variables here second + */ + return (0); +} + +int +benchmark_finirun() +{ + return (0); +} + +int +benchmark_init() +{ + /* + * the lm_optstr must be defined here or no options for you + * + * ...and the framework will throw an error + * + */ + (void) sprintf(lm_optstr, "p:w:n:t:"); + /* + * working hypothesis: + * + * tsd_t is the struct that we can pass around our + * state info in + * + * lm_tsdsize will allocate the space we need for this + * structure throughout the rest of the framework + */ + lm_tsdsize = sizeof (tsd_t); + + (void) sprintf(lm_usage, + " [-p parallelism (default 1)]\n" + " [-w warmup (default 0)]\n" + " [-n number of descriptors (default 1)]\n" + " [-t int (default 1)]\n" + "notes: measures lmbench_select_file\n"); + lm_defB = 1; + return (0); +} + +int +benchmark_fini() +{ + return (0); +} + +int +benchmark_finibatch(void *tsd) +{ + return (0); +} + +char * +benchmark_result() +{ + static char result = '\0'; + return (&result); +} + +int +benchmark_finiworker(void *tsd) +{ + tsd_t *ts = (tsd_t *)tsd; + int i; + // pulls in the lmbench cleanup code + for (i = 0; i <= ts->max; ++i) { + if (FD_ISSET(i, &(ts->set))) + close(i); + } + FD_ZERO(&(ts->set)); + unlink(ts->fname); + return (0); +} + +int +benchmark_optswitch(int opt, char *optarg) +{ + + switch (opt) { + case 't': + optt = sizetoint(optarg); + break; + case 'n': + optn = sizetoint(optarg); + break; + case 'p': + optp = sizetoint(optarg); + break; + case 'w': + optw = sizetoint(optarg); + break; + default: + return (-1); + } + return (0); +} + +int +benchmark_initworker(void *tsd) +{ + // pulls in code from lmbench main and initialize + int n = 0; + /* + * initialize your state variables here first + */ + tsd_t *ts = (tsd_t *)tsd; + int N, fid, fd; + + /* + * default number of file descriptors + */ + ts->num = 200; + if (optn > 0) { + ts->num = optn; + } + N = ts->num; + + /* + * grab more file descriptors + */ + + morefds(); + + ts->fid_f = open_socket; + server(ts); + /* + * Initialize function from lmbench + * for this test + */ + fid = (*ts->fid_f)(ts); + if (fid <= 0) { + perror("Could not open device"); + exit(1); + } + ts->max = 0; + FD_ZERO(&(ts->set)); + for (n = 0; n < N; n++) { + fd = dup(fid); + //(void) fprintf(stderr, "benchmark_initworker: errno result is %d - \"%s\"\n",errno, strerror(errno)); + + if (fd == -1) break; + if (fd > ts->max) + ts->max = fd; + FD_SET(fd, &(ts->set)); + //(void) fprintf(stderr, "initworker FD_SET: ts->set result is %i\n",ts->set); + + } + //(void) fprintf(stderr, "benchmark_initworker: after second macro/loop\n"); + + ts->max++; + close(fid); + if (n != N) + exit(1); + /* end of initialize function */ + return (0); +} + +int +benchmark_initrun() +{ + return (0); +} + +int +benchmark(void *tsd, result_t *res) +{ + /* + * initialize your state variables here last + * + * and realize that you are paying for your initialization here + * and it is really a bad idea + */ + tsd_t *ts = (tsd_t *)tsd; + fd_set nosave; + static struct timeval tv; + + //(void) fprintf(stderr, "benchmark\n"); + + int i; + //int sel_res; + tv.tv_sec = 0; + tv.tv_usec = 0; + + + for (i = 0; i < lm_optB; i++) { + nosave = ts->set; + //(void) fprintf(stderr, "benchmark: nosave is %i\n", nosave); + + select(ts->num, 0, &nosave, 0, &tv); + + } + res->re_count = i; + return (0); +} + diff --git a/osfmk/libsa/ctype.h b/tools/tests/libMicro/apple/lmbench_stat.c similarity index 53% rename from osfmk/libsa/ctype.h rename to tools/tests/libMicro/apple/lmbench_stat.c index 73d90e9ab..b20b0251e 100644 --- a/osfmk/libsa/ctype.h +++ b/tools/tests/libMicro/apple/lmbench_stat.c @@ -1,6 +1,6 @@ /* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. - * + * Copyright (c) 2006 Apple Inc. All Rights Reserved. + * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * * This file contains Original Code and/or Modifications of Original Code @@ -25,47 +25,82 @@ * * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ */ + + /* - * @OSF_COPYRIGHT@ - * + * Portions Copyright (c) 2006, Apple Inc. */ + +#ifdef __sun +#pragma ident "@(#)lmbench_stat.c 1.4 06/21/06 Apple Inc." +#endif + + +#include +#include +#include +#include +#include + +#include "../libmicro.h" + +#define DEFF "/dev/null" +static char *optf = DEFF; + +int +benchmark_init() +{ + + (void) sprintf(lm_optstr, "f:"); + + lm_tsdsize = 0; + + (void) sprintf(lm_usage, + " [-f file-to-stat (default %s)]\n" + "notes: measures stat()\n", + DEFF); + + return (0); +} + +int +benchmark_optswitch(int opt, char *optarg) +{ + switch (opt) { + case 'f': + optf = optarg; + break; + default: + return (-1); + } + return (0); +} + +/*ARGSUSED*/ +int +benchmark(void *tsd, result_t *res) +{ + int i; + struct stat sbuf; + + res->re_errors = 0; + /* - * HISTORY - * - * Revision 1.1.1.1 1998/09/22 21:05:51 wsanchez - * Import of Mac OS X kernel (~semeria) - * - * Revision 1.1.1.1 1998/03/07 02:25:35 wsanchez - * Import of OSF Mach kernel (~mburg) + * The libmicro test uses a for loop as below: + * for (i = 0; i < lm_optB; i++) { * - * Revision 1.1.2.1 1996/09/17 16:56:20 bruel - * created from standalone mach servers. - * [96/09/17 bruel] + * we can probably get away with using lm_optB + * in the while loop below * - * $EndLog$ */ + i = 0; + + while (i++ < lm_optB) { + if (stat(optf, &sbuf) == -1) + res->re_errors++; + } + + res->re_count += lm_optB; -#ifndef _CTYPE_H_ -#define _CTYPE_H_ - -extern int isalpha(int); -extern int isalnum(int); -extern int iscntrl(int); -extern int isdigit(int); -extern int isgraph(int); -extern int islower(int); -extern int isprint(int); -extern int ispunct(int); -extern int isspace(int); -extern int isupper(int); -extern int isxdigit(int); -extern int toupper(int); -extern int tolower(int); - -extern int isascii(int); -extern int toascii(int); - -extern int (_toupper)(int); -extern int (_tolower)(int); - -#endif /* _CTYPE_H_ */ + return (0); +} diff --git a/tools/tests/libMicro/apple/lmbench_write.c b/tools/tests/libMicro/apple/lmbench_write.c new file mode 100644 index 000000000..3224d4d20 --- /dev/null +++ b/tools/tests/libMicro/apple/lmbench_write.c @@ -0,0 +1,167 @@ +/* + * Copyright (c) 2006 Apple Inc. All Rights Reserved. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. The rights granted to you under the License + * may not be used to create, or enable the creation or redistribution of, + * unlawful or unlicensed copies of an Apple operating system, or to + * circumvent, violate, or enable the circumvention or violation of, any + * terms of an Apple operating system software license agreement. + * + * Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ + */ + + +#ifdef __sun +#pragma ident "@(#)write.c 1.3 05/08/04 Apple Inc." +#endif + +#ifdef linux +#define _XOPEN_SOURCE 500 +#endif + +#include +#include +#include +#include + +#include "../libmicro.h" + +typedef struct { + char *ts_buf; + int ts_fd; +} tsd_t; + +#define DEFF "/dev/null" +#define DEFS 1024 + +static int optc = 0; +static char *optf = DEFF; +static long long opts = DEFS; +static int optd; + +int +benchmark_init() +{ + lm_tsdsize = sizeof (tsd_t); + + (void) sprintf(lm_optstr, "cdf:s:"); + + (void) sprintf(lm_usage, + " [-f file-to-write (default %s)]\n" + " [-s buffer-size (default %d)]\n" + " [-c ] (make sure buffer is in cache)\n" +#ifdef __sun + " [-d ] use directio" +#endif + "notes: measures lmbench_write()\n", + DEFF, DEFS); + + (void) sprintf(lm_header, "%8s", "size"); + + lm_defB = 1; + + return (0); +} + +int +benchmark_optswitch(int opt, char *optarg) +{ + switch (opt) { + + case 'd': + optd++; + break; + case 'c': + optc++; + break; + case 'f': + optf = optarg; + break; + case 's': + opts = sizetoll(optarg); + break; + default: + return (-1); + } + return (0); +} + +int +benchmark_initbatch(void *tsd) +{ + tsd_t *ts = (tsd_t *)tsd; + int i; + + if (ts->ts_buf == NULL) { + ts->ts_buf = malloc(opts); + ts->ts_fd = open(optf, O_WRONLY); + +#ifdef __sun + if (optd) + (void) directio(ts->ts_fd, DIRECTIO_ON); +#endif + /* + * bring buf into cache if specified. + */ + + if (optc) + for (i = 0; i < opts; i++) + ts->ts_buf[i] = 0; + } + + (void) lseek(ts->ts_fd, 0, SEEK_SET); + + return (0); +} + +int +benchmark(void *tsd, result_t *res) +{ + tsd_t *ts = (tsd_t *)tsd; + int i; + +/* + * The libmicro test uses a for loop as below: + * for (i = 0; i < lm_optB; i++) { + * + * we can probably get away with using lm_optB + * in the while loop below + * + */ + i = 0; + + while (i++ < lm_optB) { + if (write(ts->ts_fd, ts->ts_buf, opts) != opts) { + res->re_errors++; + } + } + res->re_count = i; + + return (0); +} + +char * +benchmark_result() +{ + static char result[256]; + + (void) sprintf(result, "%8lld", opts); + + return (result); +} diff --git a/osfmk/libsa/i386/math.h b/tools/tests/libMicro/apple/posix_spawn.c similarity index 51% rename from osfmk/libsa/i386/math.h rename to tools/tests/libMicro/apple/posix_spawn.c index 7e061390c..dc5167ef6 100644 --- a/osfmk/libsa/i386/math.h +++ b/tools/tests/libMicro/apple/posix_spawn.c @@ -1,8 +1,8 @@ /* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2008 Apple Inc. All Rights Reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ - * + * * This file contains Original Code and/or Modifications of Original Code * as defined in and that are subject to the Apple Public Source License * Version 2.0 (the 'License'). You may not use this file except in @@ -11,10 +11,10 @@ * unlawful or unlicensed copies of an Apple operating system, or to * circumvent, violate, or enable the circumvention or violation of, any * terms of an Apple operating system software license agreement. - * + * * Please obtain a copy of the License at * http://www.opensource.apple.com/apsl/ and read it before using this file. - * + * * The Original Code and all software distributed under the License are * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, @@ -22,39 +22,84 @@ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. * Please see the License for the specific language governing rights and * limitations under the License. - * + * * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ */ + + +#ifdef __sun +#pragma ident "@(#)posix_spawn.c 1.0 08/21/08 Apple Inc." +#endif + /* - * @OSF_COPYRIGHT@ - * - */ -/* - * HISTORY - * - * Revision 1.2 1998/09/30 21:21:00 wsanchez - * Merged in IntelMerge1 (mburg: Intel support) - * - * Revision 1.1.2.1 1998/09/30 18:19:49 mburg - * Changes for Intel port - * - * Revision 1.1.1.1 1998/03/07 02:25:36 wsanchez - * Import of OSF Mach kernel (~mburg) - * - * Revision 1.1.2.2 1996/10/10 13:56:11 yp - * Submitted again (ODE problems). - * [96/10/10 yp] - * - * Revision 1.1.2.1 1996/10/10 09:16:45 yp - * Created. - * [96/10/10 yp] - * - * $EndLog$ + * posix_spawn benchmark */ -#ifndef _MACHINE_MATH_H_ -#define _MACHINE_MATH_H_ 1 +#include +#include +#include +#include +#include +#include + +#include "../libmicro.h" + +static char exec_path[1024]; +static char *argv[3]; + +int +benchmark_init() +{ + lm_defB = 128; + lm_tsdsize = 0; + + (void) sprintf(lm_usage, + "notes: measures posix_spawn time of simple process()\n"); + + return (0); +} + +/*ARGSUSED*/ +int +benchmark_initbatch(void *tsd) +{ + char buffer[80]; + + (void) strcpy(exec_path, lm_procpath); + (void) strcat(exec_path, "/posix_spawn_bin"); + + (void) sprintf(buffer, "%d", lm_optB); + argv[0] = exec_path; + argv[1] = strdup(buffer); + argv[2] = NULL; + + return (0); +} + +/*ARGSUSED*/ +int +benchmark(void *tsd, result_t *res) +{ + int c; + int pid; + int status; + + if (( c = posix_spawn(&pid, exec_path, NULL, NULL, argv, NULL) != 0)) + { + res->re_errors++; + } + + if (waitpid(pid, &status, 0) < 0) + { + res->re_errors++; + } -#define HUGE_VAL (1.7976931348623157e+308 * 2.0) + if (WIFEXITED(status) && WEXITSTATUS(status) != 0) + { + res->re_errors++; + } + + res->re_count = lm_optB; -#endif /* _MACHINE_MATH_H_ */ + return (0); +} diff --git a/bsd/dev/i386/table_inline.h b/tools/tests/libMicro/apple/posix_spawn_bin.c similarity index 66% rename from bsd/dev/i386/table_inline.h rename to tools/tests/libMicro/apple/posix_spawn_bin.c index b99eddd0b..a5173367d 100644 --- a/bsd/dev/i386/table_inline.h +++ b/tools/tests/libMicro/apple/posix_spawn_bin.c @@ -1,8 +1,8 @@ /* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2008 Apple Inc. All Rights Reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ - * + * * This file contains Original Code and/or Modifications of Original Code * as defined in and that are subject to the Apple Public Source License * Version 2.0 (the 'License'). You may not use this file except in @@ -11,10 +11,10 @@ * unlawful or unlicensed copies of an Apple operating system, or to * circumvent, violate, or enable the circumvention or violation of, any * terms of an Apple operating system software license agreement. - * + * * Please obtain a copy of the License at * http://www.opensource.apple.com/apsl/ and read it before using this file. - * + * * The Original Code and all software distributed under the License are * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, @@ -22,46 +22,49 @@ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. * Please see the License for the specific language governing rights and * limitations under the License. - * + * * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ */ + + +#ifdef __sun +#pragma ident "@(#)posix_spawn_bin.c 1.0 08/21/08 Apple Inc." +#endif + /* - * Copyright (c) 1992 NeXT Computer, Inc. - * - * Intel386 Family: Selector based access to descriptor tables. - * - * HISTORY - * - * 2 April 1992 ? at NeXT - * Created. + * time program to recursively test posix_spawn time */ - -#include -#include -#include +#include +#include +#include +#include +#include -static inline -gdt_entry_t * -sel_to_gdt_entry(sel) -sel_t sel; +int +main(int argc, char *argv[]) { - return (&gdt[sel.index]); -} + int left; + int pid; -static inline -idt_entry_t * -sel_to_idt_entry(sel) -sel_t sel; -{ - return (&idt[sel.index]); -} + if (argc == 1) { + exit(1); + } -static inline -ldt_entry_t * -sel_to_ldt_entry(tbl, sel) -ldt_t * tbl; -sel_t sel; -{ - return (&tbl[sel.index]); + left = atoi(argv[1]); + + left--; + + if (left <= 0) { + exit(0); + } else { + char buffer[80]; + (void) sprintf(buffer, "%d", left); + argv[1] = buffer; + if (posix_spawn(&pid, argv[0], NULL, NULL, argv, NULL)) { + exit(2); + } + } + + return (0); } diff --git a/tools/tests/libMicro/apple/trivial.c b/tools/tests/libMicro/apple/trivial.c new file mode 100644 index 000000000..97a051b26 --- /dev/null +++ b/tools/tests/libMicro/apple/trivial.c @@ -0,0 +1,263 @@ +/* + * Copyright (c) 2006 Apple Inc. All Rights Reserved. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. The rights granted to you under the License + * may not be used to create, or enable the creation or redistribution of, + * unlawful or unlicensed copies of an Apple operating system, or to + * circumvent, violate, or enable the circumvention or violation of, any + * terms of an Apple operating system software license agreement. + * + * Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ + */ + + +/* + * Order of Execution + * + * benchmark_init + * + * benchmark_optswitch + * + * benchmark_initrun + * + * benchmark_initworker + * benchmark_initbatch + * benchmark + * benchmark_finibatch + * benchmark_initbatch + * benchmark + * benchmark_finibatch, etc. + * benchmark_finiworker + * + * benchmark_result + * + * benchmark_finirun + * + * benchmark_fini + */ + + + +#ifdef __sun +#pragma ident "@(#)trivial.c 1.0 08/17/06 Apple Inc." +#endif + + + +#include +#include +#include +#include +#include +// add additional headers needed here. + +#include "../libmicro.h" + +#if DEBUG +# define debug(fmt, args...) (void) fprintf(stderr, fmt "\n" , ##args) +#else +# define debug(fmt, args...) +#endif + +/* + * Your state variables should live in the tsd_t struct below + */ +typedef struct { + int ts_once; +} tsd_t; + +/* + * You can have any lower-case option you want to define. + * options are specified in the lm_optstr as either a + * single lower-case letter, or a single lower case letter + * with a colon after it. In this example, you can optionally + * specify -c {str} -e or -t {number} + * -c takes a string (quote the string if blanks) + * -e is a boolean + * -t takes a numeric + * argument. + */ +static char * optc; // allocated in benchmark_init, freed in benchmark_fini. +static bool opte = false; +static int optt = 1; + + +int +benchmark_init() +{ + debug("benchmark_init\n"); + /* + * the lm_optstr must be defined here or no options for you + * + * ...and the framework will throw an error + * + */ + (void) sprintf(lm_optstr, "c:et:"); + /* + * tsd_t is the state info struct that we pass around + * + * lm_tsdsize will allocate the space we need for this + * structure throughout the rest of the framework + */ + lm_tsdsize = sizeof (tsd_t); + + (void) sprintf(lm_usage, + " [-c string]\n" + " [-e] optional parameter\n" + " [-t int (default 1)]\n" + "notes: measures nothing\n"); + + optc = malloc(20); + return (0); +} + +/* + * This is where you parse your lower-case arguments. + * the format was defined in the lm_optstr assignment + * in benchmark_init + */ +int +benchmark_optswitch(int opt, char *optarg) +{ + debug("benchmark_optswitch\n"); + + switch (opt) { + case 'c': + strncpy(optc, optarg, 20); + debug("optc = %s\n", optc); + break; + case 'e': + opte = true; + debug("opte = %s\n", opte? "true": "false"); + break; + case 't': + optt = sizetoint(optarg); + debug("optt = %d\n", optt); + break; + default: + return (-1); + } + return (0); +} + +int +benchmark_initrun() +{ + debug("benchmark_initrun\n"); + return (0); +} + +int +benchmark_initworker(void *tsd) +{ + /* + * initialize your state variables here first + */ + tsd_t *ts = (tsd_t *)tsd; + ts->ts_once = optt; + debug("benchmark_initworker: ts_once = %i\n",ts->ts_once); + return (0); +} + +/*ARGSUSED*/ +int +benchmark_initbatch(void *tsd) +{ + /* + * initialize your state variables here second + */ + tsd_t *ts = (tsd_t *)tsd; + // useless code to show what you can do. + ts->ts_once++; + ts->ts_once--; + debug("benchmark_initbatch: ts_once = %i\n",ts->ts_once); + return (0); +} + +int +benchmark(void *tsd, result_t *res) +{ + /* + * try not to initialize things here. This is the main + * loop of things to get timed. Start a server in + * benchmark_initbatch + */ + tsd_t *ts = (tsd_t *)tsd; + int i; + + debug("in to benchmark - optB = %i : ts_once = %i\n", lm_optB, ts->ts_once); + for (i = 0; i < lm_optB; i++) { + /* + * just to show that ts really contains state + */ + ts->ts_once++; + } + res->re_count = i; + debug("out of benchmark - optB = %i : ts_once = %i\n", lm_optB, ts->ts_once); + + return (0); +} + +int +benchmark_finibatch(void *tsd) +{ + tsd_t *ts = (tsd_t *)tsd; + /* + * more proof of state passing + */ + ts->ts_once = optt; + debug("benchmark_finibatch: ts_once = %i\n",ts->ts_once); + return (0); +} + +int +benchmark_finiworker(void *tsd) +{ + tsd_t *ts = (tsd_t *)tsd; + // useless code to show what you can do. + ts->ts_once++; + ts->ts_once--; + debug("benchmark_finiworker: ts_once = %i\n",ts->ts_once); + return (0); +} + +char * +benchmark_result() +{ + static char result = '\0'; + debug("benchmark_result\n"); + return (&result); +} + +int +benchmark_finirun() +{ + debug("benchmark_finirun\n"); + return (0); +} + + +int +benchmark_fini() +{ + debug("benchmark_fini\n"); + free(optc); + return (0); +} + diff --git a/tools/tests/libMicro/apple/vm_allocate.c b/tools/tests/libMicro/apple/vm_allocate.c new file mode 100644 index 000000000..0450f7a44 --- /dev/null +++ b/tools/tests/libMicro/apple/vm_allocate.c @@ -0,0 +1,239 @@ +/* + * Copyright (c) 2006 Apple Inc. All Rights Reserved. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. The rights granted to you under the License + * may not be used to create, or enable the creation or redistribution of, + * unlawful or unlicensed copies of an Apple operating system, or to + * circumvent, violate, or enable the circumvention or violation of, any + * terms of an Apple operating system software license agreement. + * + * Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ + */ + + +/* + * Order of Execution + * + * benchmark_init + * + * benchmark_optswitch + * + * benchmark_initrun + * + * benchmark_initworker + * benchmark_initbatch + * benchmark + * benchmark_finibatch + * benchmark_initbatch + * benchmark + * benchmark_finibatch, etc. + * benchmark_finiworker + * + * benchmark_result + * + * benchmark_finirun + * + * benchmark_fini + */ + + + +#ifdef __sun +#pragma ident "@(#)vm_allocate.c 1.0 09/17/06 Apple Inc." +#endif + + + +#include +#include +#include +#include +#include + +#include "../libmicro.h" + +/* + * Your state variables should live in the tsd_t struct below + */ +typedef struct { + int ts_once; +} tsd_t; + +unsigned char * arena; +unsigned int arenaSize = 1; + +static int optt = 0; + +/*ARGSUSED*/ +int +benchmark_initbatch(void *tsd) +{ + /* + * initialize your state variables here second + */ + //tsd_t *ts = (tsd_t *)tsd; + //(void) fprintf(stderr, "benchmark_initbatch: ts_once = %i\n",ts->ts_once); + return (0); +} + +int +benchmark_finirun() +{ + (void) fprintf(stderr, "benchmark_finirun\n"); + return (0); +} + +int +benchmark_init() +{ + (void) fprintf(stderr, "benchmark_init\n"); + /* + * the lm_optstr must be defined here or no options for you + * + * ...and the framework will throw an error + * + */ + (void) sprintf(lm_optstr, "t:"); + /* + * working hypothesis: + * + * tsd_t is the struct that we can pass around our + * state info in + * + * lm_tsdsize will allocate the space we need for this + * structure throughout the rest of the framework + */ + lm_tsdsize = sizeof (tsd_t); + lm_defB = 1; + + + (void) sprintf(lm_usage, + " [-t int (default 1)]\n" + "notes: measures nothing\n"); + return (0); +} + +int +benchmark_fini() +{ + (void) fprintf(stderr, "benchmark_fini\n"); + return (0); +} + +int +benchmark_finibatch(void *tsd) +{ + tsd_t *ts = (tsd_t *)tsd; + /* + * more proof of state passing + */ + ts->ts_once = optt; + //(void) fprintf(stderr, "benchmark_finibatch: ts_once = %i\n",ts->ts_once); + return (0); +} + +char * +benchmark_result() +{ + static char result = '\0'; + (void) fprintf(stderr, "benchmark_result\n"); + return (&result); +} + +int +benchmark_finiworker(void *tsd) +{ + //tsd_t *ts = (tsd_t *)tsd; + //(void) fprintf(stderr, "benchmark_finiworker: ts_once = %i\n",ts->ts_once); + //vm_deallocate( mach_task_self(), (vm_address_t) arena, arenaSize * vm_page_size); + + return (0); +} + +int +benchmark_optswitch(int opt, char *optarg) +{ + (void) fprintf(stderr, "benchmark_optswitch\n"); + + switch (opt) { + case 't': + optt = sizetoint(optarg); + break; + default: + return (-1); + } + return (0); +} + +int +benchmark_initworker(void *tsd) +{ + /* + * initialize your state variables here first + */ + //tsd_t *ts = (tsd_t *)tsd; + //ts->ts_once = optt; + //(void) fprintf(stderr, "benchmark_initworker: ts_once = %i\n",ts->ts_once); + if ( optt > 0 ) { + arenaSize = optt; + } + // warmup + vm_allocate( mach_task_self(), (vm_address_t *) &arena, arenaSize * vm_page_size, 1); + + vm_deallocate( mach_task_self(), (vm_address_t) arena, arenaSize * vm_page_size); + //arena = ( unsigned char * )malloc( arenaSize); + return (0); +} + +int +benchmark_initrun() +{ + //(void) fprintf(stderr, "benchmark_initrun\n"); + return (0); +} + +int +benchmark(void *tsd, result_t *res) +{ + /* + * initialize your state variables here last + * + * and realize that you are paying for your initialization here + * and it is really a bad idea + */ + //tsd_t *ts = (tsd_t *)tsd; + int i; + + //(void) fprintf(stderr, "in to benchmark - optB = %i\n", lm_optB); + for (i = 0; i < lm_optB; i++) { + /* + * just to show that ts really contains state + */ + //(void) fprintf(stderr, "i is %i\n",i); + if (vm_allocate( mach_task_self(), (vm_address_t *) &arena, arenaSize * vm_page_size, 1)) + abort(); + if (vm_deallocate( mach_task_self(), (vm_address_t) arena, arenaSize * vm_page_size)) + abort(); + + } + res->re_count = i; + //(void) fprintf(stderr, "out of benchmark - optB = %i : ts_once = %i\n", lm_optB, ts->ts_once); + + return (0); +} diff --git a/tools/tests/libMicro/atomic.c b/tools/tests/libMicro/atomic.c new file mode 100644 index 000000000..e005b4632 --- /dev/null +++ b/tools/tests/libMicro/atomic.c @@ -0,0 +1,74 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms + * of the Common Development and Distribution License + * (the "License"). You may not use this file except + * in compliance with the License. + * + * You can obtain a copy of the license at + * src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing + * permissions and limitations under the License. + * + * When distributing Covered Code, include this CDDL + * HEADER in each file and include the License file at + * usr/src/OPENSOLARIS.LICENSE. If applicable, + * add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your + * own identifying information: Portions Copyright [yyyy] + * [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +/* + * benchmarks atomic add on Solaris - useful for platform comparisons. + */ + +#include +#include +#include +#include + +#include "libmicro.h" + +int +benchmark_init() +{ + (void) sprintf(lm_usage, "note: measures atomic_add_32_nv()"); + + lm_tsdsize = 0; + + return (0); +} + +static unsigned int value = 0; + +/*ARGSUSED*/ +int +benchmark(void *tsd, result_t *res) +{ + unsigned int i; + for (i = 0; i < lm_optB; i += 10) { + (void) atomic_add_32_nv(&value, 1); + (void) atomic_add_32_nv(&value, 1); + (void) atomic_add_32_nv(&value, 1); + (void) atomic_add_32_nv(&value, 1); + (void) atomic_add_32_nv(&value, 1); + (void) atomic_add_32_nv(&value, 1); + (void) atomic_add_32_nv(&value, 1); + (void) atomic_add_32_nv(&value, 1); + (void) atomic_add_32_nv(&value, 1); + (void) atomic_add_32_nv(&value, 1); + } + res->re_count = i; + + return (0); +} diff --git a/tools/tests/libMicro/bench.sh b/tools/tests/libMicro/bench.sh new file mode 100644 index 000000000..698557eb6 --- /dev/null +++ b/tools/tests/libMicro/bench.sh @@ -0,0 +1,761 @@ +#!/bin/sh +# +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms +# of the Common Development and Distribution License +# (the "License"). You may not use this file except +# in compliance with the License. +# +# You can obtain a copy of the license at +# src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing +# permissions and limitations under the License. +# +# When distributing Covered Code, include this CDDL +# HEADER in each file and include the License file at +# usr/src/OPENSOLARIS.LICENSE. If applicable, +# add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your +# own identifying information: Portions Copyright [yyyy] +# [name of copyright owner] +# +# CDDL HEADER END +# + +# +# Copyright 2007 Sun Microsystems, Inc. All rights reserved. +# Use is subject to license terms. +# + +tattle="./tattle" + +bench_version=0.4.0 +libmicro_version=`$tattle -V` + +case $libmicro_version in +$bench_version) + ;; +*) + echo "ERROR: libMicro version doesn't match 'bench' script version" + exit 1 +esac + +TMPROOT=/private/tmp/libmicro.$$ +VARROOT=/private/var/tmp/libmicro.$$ +mkdir -p $TMPROOT +mkdir -p $VARROOT +trap "rm -rf $TMPROOT $VARROOT && exit" 0 2 + +TFILE=$TMPROOT/data +IFILE=$TMPROOT/ifile +TDIR1=$TMPROOT/0/1/2/3/4/5/6/7/8/9 +TDIR2=$TMPROOT/1/2/3/4/5/6/7/8/9/0 +VFILE=$VARROOT/data +VDIR1=$VARROOT/0/1/2/3/4/5/6/7/8/9 +VDIR2=$VARROOT/1/2/3/4/5/6/7/8/9/0 + + +OPTS="-E -C 200 -L -S -W" + +dd if=/dev/zero of=$TFILE bs=1024k count=10 2>/dev/null +dd if=/dev/zero of=$VFILE bs=1024k count=10 2>/dev/null +mkdir -p $TDIR1 $TDIR2 +mkdir -p $VDIR1 $VDIR2 + +touch $IFILE +/usr/bin/touch /private/var/tmp/lmbench + + +# produce benchmark header for easier comparisons + +hostname=`uname -n` + +if [ -f /usr/sbin/psrinfo ]; then + p_count=`psrinfo|wc -l` + p_mhz=`psrinfo -v | awk '/operates/{print $6 "MHz"; exit }'` + p_type=`psrinfo -vp 2>/dev/null | awk '{if (NR == 3) {print $0; exit}}'` + p_ipaddr=`getent hosts $hostname | awk '{print $1}'` +fi + +if [ -f /proc/cpuinfo ]; then + p_count=`egrep processor /proc/cpuinfo | wc -l` + p_mhz=`awk -F: '/cpu MHz/{printf("%5.0f00Mhz\n",$2/100); exit}' /proc/cpuinfo` + p_type=`awk -F: '/model name/{print $2; exit}' /proc/cpuinfo` + p_ipaddr=`getent hosts $hostname | awk '{print $1}'` +else +## Mac OS X specific stuff +# first, get ugly output, in case pretty output isn't available +# + p_count=`sysctl -n hw.physicalcpu` + p_mhz=`sysctl -n hw.cpufrequency` + p_type=`sysctl -n hw.model` + +if [ -x /usr/sbin/system_profiler ]; then + # requires this hunk of work-around + # grep the XML for the characteristic we need. The key appears twice, so grep for the useful key (with 'string') + # use sed to strip off the and the tabs in front of the string. So much work for so little result. + # + p_mhz=`system_profiler -xml -detailLevel mini SPHardwareDataType | \ + grep -A1 current_processor_speed | grep string | \ + sed -E 's/(.+)<\/string>/\1/' | sed 's- --g'` + p_type=`system_profiler -xml -detailLevel mini SPHardwareDataType | \ + grep -A1 cpu_type | grep string | \ + sed -E 's/(.+)<\/string>/\1/' | sed 's- --g'` +fi + +# look for en0 (usually ethernet) if that isn't there try en1 (usually wireless) else give up + p_ipaddr=`ipconfig getpacket en0 | grep yiaddr | tr "= " "\n" | grep [0-9]` + if [ ! $p_ipaddr ]; then + p_ipaddr=`ipconfig getpacket en1 | grep yiaddr | tr "= " "\n" | grep [0-9]` + elif [ ! $p_ipaddr ]; then + p_ipaddr="unknown" + fi +fi + +printf "\n\n!Libmicro_#: %30s\n" $libmicro_version +printf "!Options: %30s\n" "$OPTS" +printf "!Machine_name: %30s\n" "$hostname" +printf "!OS_name: %30s\n" `uname -s` +printf "!OS_release: %30s\n" `sw_vers -productVersion` +printf "!OS_build: %30.18s\n" "`sw_vers -buildVersion`" +printf "!Processor: %30s\n" `arch` +printf "!#CPUs: %30s\n" $p_count +printf "!CPU_MHz: %30s\n" "$p_mhz" +printf "!CPU_NAME: %30s\n" "$p_type" +printf "!IP_address: %30s\n" "$p_ipaddr" +printf "!Run_by: %30s\n" $LOGNAME +printf "!Date: %30s\n" "`date '+%D %R'`" +printf "!Compiler: %30s\n" `$tattle -c` +printf "!Compiler Ver.:%30s\n" "`$tattle -v`" +printf "!sizeof(long): %30s\n" `$tattle -s` +printf "!extra_CFLAGS: %30s\n" "`$tattle -f`" +printf "!TimerRes: %30s\n\n\n" "`$tattle -r`" + +bin_dir="$TMPROOT/bin" + +mkdir -p $bin_dir +cp bin-*/exec_bin $bin_dir/$A + +cp ./apple/bin-*/posix_spawn_bin $bin_dir/$A + +newline=0 + +# +# Everything below the while loop is input for the while loop +# if you have any tests which can't run in the while loop, put +# them above this comment +# +while read A B +do + # $A contains the command, $B contains the arguments + # we echo blank lines and comments + # we skip anything which fails to match *$1* (useful + # if we only want to test one case, but a nasty hack) + + case $A in + \#*) + echo "$A $B" + newline=1 + continue + ;; + + "") + if [ $newline -eq 1 ] + then + newline=0 + echo + echo + fi + + continue + ;; + + *$1*) + ;; + + *) + continue + ;; + esac + + if [ ! -f $bin_dir/$A ] + then + cp bin-*/$A $bin_dir/$A + fi + + echo + + (cd $TMPROOT && eval "bin/$A $B") + + echo + echo +done <<. + +# +# Obligatory null system call: use very short time +# for default since SuSe implements this "syscall" in userland +# + +getpid $OPTS -N "getpid" -I 5 +getppid $OPTS -N "getppid" -I 5 + +getenv $OPTS -N "getenv" -s 100 -I 100 +getenv $OPTS -N "getenvT2" -s 100 -I 100 -T 2 + +gettimeofday $OPTS -N "gettimeofday" + +log $OPTS -N "log" -I 20 -B 300000 +exp $OPTS -N "exp" -I 20 -B 100000 +lrand48 $OPTS -N "lrand48" + +memset $OPTS -N "memset_10" -s 10 -I 10 +memset $OPTS -N "memset_256" -s 256 -I 20 +memset $OPTS -N "memset_256_u" -s 256 -a 1 -I 20 +memset $OPTS -N "memset_1k" -s 1k -I 100 -B 2000 +memset $OPTS -N "memset_4k" -s 4k -I 250 -B 500 +memset $OPTS -N "memset_4k_uc" -s 4k -u -I 400 + +memset $OPTS -N "memset_10k" -s 10k -I 600 -B 500 +memset $OPTS -N "memset_1m" -s 1m -I 200000 +memset $OPTS -N "memset_10m" -s 10m -I 2000000 +memset $OPTS -N "memsetP2_10m" -s 10m -P 2 -I 2000000 + +memrand $OPTS -N "memrand" -s 40m -B 10000 + +# This is an elided test and is not ported yet. +# Check Makefile.darwin for list of elided tests +# cachetocache $OPTS -N "cachetocache" -s 100k -T 2 -I 200 + +isatty $OPTS -N "isatty_yes" +isatty $OPTS -N "isatty_no" -f $IFILE + +malloc $OPTS -N "malloc_10" -s 10 -g 10 -I 50 +malloc $OPTS -N "malloc_100" -s 100 -g 10 -I 50 +malloc $OPTS -N "malloc_1k" -s 1k -g 10 -I 50 +malloc $OPTS -N "malloc_10k" -s 10k -g 10 -I 50 +malloc $OPTS -N "malloc_100k" -s 100k -g 10 -I 2000 + +malloc $OPTS -N "mallocT2_10" -s 10 -g 10 -T 2 -I 200 +malloc $OPTS -N "mallocT2_100" -s 100 -g 10 -T 2 -I 200 +malloc $OPTS -N "mallocT2_1k" -s 1k -g 10 -T 2 -I 200 +malloc $OPTS -N "mallocT2_10k" -s 10k -g 10 -T 2 -I 200 +malloc $OPTS -N "mallocT2_100k" -s 100k -g 10 -T 2 -I 10000 + +close $OPTS -N "close_bad" -B 96 -b +close $OPTS -N "close_tmp" -B 64 -f $TFILE +close $OPTS -N "close_usr" -B 64 -f $VFILE +close $OPTS -N "close_zero" -B 64 -f /dev/zero +close_tcp $OPTS -N "close_tcp" -B 32 + +memcpy $OPTS -N "memcpy_10" -s 10 -I 10 +memcpy $OPTS -N "memcpy_1k" -s 1k -I 50 +memcpy $OPTS -N "memcpy_10k" -s 10k -I 800 +memcpy $OPTS -N "memcpy_1m" -s 1m -I 500000 +memcpy $OPTS -N "memcpy_10m" -s 10m -I 5000000 + +strcpy $OPTS -N "strcpy_10" -s 10 -I 5 +strcpy $OPTS -N "strcpy_1k" -s 1k -I 100 + +strlen $OPTS -N "strlen_10" -s 10 -I 5 +strlen $OPTS -N "strlen_1k" -s 1k -I 100 + +strchr $OPTS -N "strchr_10" -s 10 -I 5 +strchr $OPTS -N "strchr_1k" -s 1k -I 200 +strcmp $OPTS -N "strcmp_10" -s 10 -I 10 +strcmp $OPTS -N "strcmp_1k" -s 1k -I 200 + +strcasecmp $OPTS -N "scasecmp_10" -s 10 -I 50 -B 2000 +strcasecmp $OPTS -N "scasecmp_1k" -s 1k -I 20000 -B 100 + +strtol $OPTS -N "strtol" -I 20 + +# This is an elided test and is not ported yet. +# Check Makefile.darwin for list of elided tests +# getcontext $OPTS -N "getcontext" -I 100 + +# This is an elided test and is not ported yet. +# Check Makefile.darwin for list of elided tests +# setcontext $OPTS -N "setcontext" -I 100 + +mutex $OPTS -N "mutex_st" -I 10 +mutex $OPTS -N "mutex_mt" -t -I 10 +mutex $OPTS -N "mutex_T2" -T 2 -I 100 + +longjmp $OPTS -N "longjmp" -I 10 +siglongjmp $OPTS -N "siglongjmp" -I 20 + +getrusage $OPTS -N "getrusage" -I 200 + +times $OPTS -N "times" -I 200 +time $OPTS -N "time" -I 50 +localtime_r $OPTS -N "localtime_r" -I 200 +strftime $OPTS -N "strftime" -I 10000 -B 100 + +mktime $OPTS -N "mktime" -I 500 +mktime $OPTS -N "mktimeT2" -T 2 -I 1000 + +cascade_mutex $OPTS -N "c_mutex_1" -I 50 +cascade_mutex $OPTS -N "c_mutex_10" -T 10 -I 5000 +cascade_mutex $OPTS -N "c_mutex_200" -T 200 -I 2000000 + +cascade_cond $OPTS -N "c_cond_1" -I 100 +cascade_cond $OPTS -N "c_cond_10" -T 10 -I 3000 +cascade_cond $OPTS -N "c_cond_200" -T 200 -I 2000000 + +cascade_lockf $OPTS -N "c_lockf_1" -I 1000 +cascade_lockf $OPTS -N "c_lockf_10" -P 10 -I 50000 +cascade_lockf $OPTS -N "c_lockf_200" -P 200 -I 5000000 + +cascade_flock $OPTS -N "c_flock" -I 1000 +cascade_flock $OPTS -N "c_flock_10" -P 10 -I 50000 +cascade_flock $OPTS -N "c_flock_200" -P 200 -I 5000000 + +cascade_fcntl $OPTS -N "c_fcntl_1" -I 2000 +cascade_fcntl $OPTS -N "c_fcntl_10" -P 10 -I 20000 +cascade_fcntl $OPTS -N "c_fcntl_200" -P 200 -I 5000000 + +file_lock $OPTS -N "file_lock" -I 1000 + +getsockname $OPTS -N "getsockname" -I 100 +getpeername $OPTS -N "getpeername" -I 100 + +chdir $OPTS -N "chdir_tmp" -I 2000 $TDIR1 $TDIR2 +chdir $OPTS -N "chdir_usr" -I 2000 $VDIR1 $VDIR2 + +chdir $OPTS -N "chgetwd_tmp" -I 3000 -g $TDIR1 $TDIR2 +chdir $OPTS -N "chgetwd_usr" -I 3000 -g $VDIR1 $VDIR2 + +realpath $OPTS -N "realpath_tmp" -I 3000 -f $TDIR1 +realpath $OPTS -N "realpath_usr" -I 3000 -f $VDIR1 + +stat $OPTS -N "stat_tmp" -I 1000 -f $TFILE +stat $OPTS -N "stat_usr" -I 1000 -f $VFILE + +lmbench_stat $OPTS -N "lmbench_stat_tmp" -I 1000 -f $TFILE +lmbench_stat $OPTS -N "lmbench_stat_usr" -I 10000 -B 100 -f /private/var/tmp/lmbench + +# +# lmbench uses a touched empty file in /private/var/tmp +# libMicro uses a 1M file in a directory off /private/var/tmp +# performance difference is ~ 0.2 usecs/call +# +# why? - walking the dir tree, empty file vs. non-empty file, non-empty dir +# in the case of libMicro, etc., etc. +# + +lmbench_stat $OPTS -N "lmbench_stat_usr - Default" -I 10000 -B 100 -f /private/var/tmp/lmbench + +lmbench_fstat $OPTS -N "lmbench_fstat_tmp" -I 1000 -f $TFILE +lmbench_fstat $OPTS -N "lmbench_fstat_usr" -I 10000 -B 100 -f /private/var/tmp/lmbench + +# see stat test to understand why we are using /private/var/tmp/lmbench + +lmbench_fstat $OPTS -N "lmbench_fstat_usr - Default" -I 10000 -B 100 -f /private/var/tmp/lmbench + +lmbench_openclose $OPTS -N "lmbench_openclose - Default" -I 10000 -B 100 -f /private/var/tmp/lmbench + +lmbench_select_file $OPTS -N "lmbench_select_file_10" -n 10 -B 100 +lmbench_select_file $OPTS -N "lmbench_select_file_100" -n 100 -B 100 +lmbench_select_file $OPTS -N "lmbench_select_file_250" -n 250 -B 100 +lmbench_select_file $OPTS -N "lmbench_select_file_500" -n 500 -B 100 + +lmbench_select_tcp $OPTS -N "lmbench_select_tcp_10" -n 10 -B 100 +lmbench_select_tcp $OPTS -N "lmbench_select_tcp_100" -n 100 -B 100 +lmbench_select_tcp $OPTS -N "lmbench_select_tcp_250" -n 250 -B 100 +lmbench_select_tcp $OPTS -N "lmbench_select_tcp_500" -n 500 -B 100 + +fcntl $OPTS -N "fcntl_tmp" -I 100 -f $TFILE +fcntl $OPTS -N "fcntl_usr" -I 100 -f $VFILE +fcntl_ndelay $OPTS -N "fcntl_ndelay" -I 100 + +lseek $OPTS -N "lseek_t8k" -s 8k -I 50 -f $TFILE +lseek $OPTS -N "lseek_u8k" -s 8k -I 50 -f $VFILE + +open $OPTS -N "open_tmp" -B 256 -f $TFILE +open $OPTS -N "open_usr" -B 256 -f $VFILE +open $OPTS -N "open_zero" -B 256 -f /dev/zero + +dup $OPTS -N "dup" -B 512 + +socket $OPTS -N "socket_u" -B 256 +socket $OPTS -N "socket_i" -B 256 -f PF_INET + +socketpair $OPTS -N "socketpair" -B 256 + +setsockopt $OPTS -N "setsockopt" -I 200 + +bind $OPTS -N "bind" -B 100 + +listen $OPTS -N "listen" -B 100 + +#connection $OPTS -N "connection" -B 256 + +poll $OPTS -N "poll_10" -n 10 -I 500 +poll $OPTS -N "poll_100" -n 100 -I 1000 +poll $OPTS -N "poll_1000" -n 1000 -I 5000 + +poll $OPTS -N "poll_w10" -n 10 -I 500 -w 1 +poll $OPTS -N "poll_w100" -n 100 -I 2000 -w 10 +poll $OPTS -N "poll_w1000" -n 1000 -I 40000 -w 100 + +select $OPTS -N "select_10" -n 10 -I 500 +select $OPTS -N "select_100" -n 100 -I 1000 +select $OPTS -N "select_1000" -n 1000 -I 5000 + +select $OPTS -N "select_w10" -n 10 -I 500 -w 1 +select $OPTS -N "select_w100" -n 100 -I 2000 -w 10 +select $OPTS -N "select_w1000" -n 1000 -I 40000 -w 100 + +semop $OPTS -N "semop" -I 200 + +sigaction $OPTS -N "sigaction" -I 100 +signal $OPTS -N "signal" -I 1000 +sigprocmask $OPTS -N "sigprocmask" -I 200 + +lmbench_lat_sig_install $OPTS -N "lmbench_siginstall" +# sigcatch and sigsend need to be evaluated together +# lmbench framework will allow multiple measurements within the same +# benchmark test which allow them to factor out the cost of sending +# a signal from catching one +# +# for our purposes sigcatch results - sigsend results yield +# lmbench sig handler overhead measurements +lmbench_lat_sig_catch $OPTS -N "lmbench_sigcatch" +lmbench_lat_sig_send $OPTS -N "lmbench_sigsend" + + +pthread_create $OPTS -N "pthread_8" -B 8 +pthread_create $OPTS -N "pthread_32" -B 32 +pthread_create $OPTS -N "pthread_128" -B 128 +pthread_create $OPTS -N "pthread_512" -B 512 + +fork $OPTS -N "fork_10" -B 10 +fork $OPTS -N "fork_100" -B 100 -C 100 + +#fork $OPTS -N "fork_1000" -B 1000 -C 50 + +exit $OPTS -N "exit_10" -B 10 +exit $OPTS -N "exit_100" -B 100 + +#exit $OPTS -N "exit_1000" -B 1000 -C 50 + +exit $OPTS -N "exit_10_nolibc" -e -B 10 + +exec $OPTS -N "exec" -B 10 + +posix_spawn $OPTS -N "posix_spawn" -B 10 + +system $OPTS -N "system" -I 1000000 + +recurse $OPTS -N "recurse" -B 512 + +read $OPTS -N "read_t1k" -s 1k -B 50 -f $TFILE +read $OPTS -N "read_t10k" -s 10k -B 16 -f $TFILE +read $OPTS -N "read_t100k" -s 100k -B 4 -f $TFILE + +read $OPTS -N "read_u1k" -s 1k -B 50 -f $VFILE +read $OPTS -N "read_u10k" -s 10k -B 16 -f $VFILE +read $OPTS -N "read_u100k" -s 100k -B 4 -f $VFILE + +read $OPTS -N "read_z1k" -s 1k -B 100 -f /dev/zero +read $OPTS -N "read_z10k" -s 10k -B 30 -f /dev/zero +read $OPTS -N "read_z100k" -s 100k -B 4 -f /dev/zero +read $OPTS -N "read_zw100k" -s 100k -B 4 -w -f /dev/zero + +lmbench_read $OPTS -N "read_t1b" -s 1 -B 50 -f $TFILE +lmbench_read $OPTS -N "read_t1k" -s 1k -B 50 -f $TFILE +lmbench_read $OPTS -N "read_t10k" -s 10k -B 16 -f $TFILE +lmbench_read $OPTS -N "read_t100k" -s 100k -B 4 -f $TFILE + +lmbench_read $OPTS -N "read_u1b" -s 1 -B 50 -f $VFILE +lmbench_read $OPTS -N "read_u1k" -s 1k -B 50 -f $VFILE +lmbench_read $OPTS -N "read_u10k" -s 10k -B 16 -f $VFILE +lmbench_read $OPTS -N "read_u100k" -s 100k -B 4 -f $VFILE + +lmbench_read $OPTS -N "read_z1b - Default" -s 1 -B 100 -f /dev/zero +lmbench_read $OPTS -N "read_z1k" -s 1k -B 100 -f /dev/zero +lmbench_read $OPTS -N "read_z10k" -s 10k -B 30 -f /dev/zero +lmbench_read $OPTS -N "read_z100k" -s 100k -B 4 -f /dev/zero +lmbench_read $OPTS -N "read_zw100k" -s 100k -B 4 -w -f /dev/zero + +write $OPTS -N "write_t1k" -s 1k -B 50 -f $TFILE +write $OPTS -N "write_t10k" -s 10k -B 25 -f $TFILE +write $OPTS -N "write_t100k" -s 100k -B 4 -f $TFILE + +write $OPTS -N "write_u1k" -s 1k -B 50 -f $VFILE +write $OPTS -N "write_u10k" -s 10k -B 25 -f $VFILE +write $OPTS -N "write_u100k" -s 100k -B 4 -f $VFILE + +write $OPTS -N "write_n1k" -s 1k -I 100 -B 0 -f /dev/null +write $OPTS -N "write_n10k" -s 10k -I 100 -B 0 -f /dev/null +write $OPTS -N "write_n100k" -s 100k -I 100 -B 0 -f /dev/null + +lmbench_write $OPTS -N "lmbench_write_t1b" -s 1 -B 50 -f $TFILE +lmbench_write $OPTS -N "lmbench_write_t1k" -s 1k -B 50 -f $TFILE +lmbench_write $OPTS -N "lmbench_write_t10k" -s 10k -B 25 -f $TFILE +lmbench_write $OPTS -N "lmbench_write_t100k" -s 100k -B 4 -f $TFILE + +lmbench_write $OPTS -N "lmbench_write_u1b" -s 1 -B 50 -f $VFILE +lmbench_write $OPTS -N "lmbench_write_u1k" -s 1k -B 50 -f $VFILE +lmbench_write $OPTS -N "lmbench_write_u10k" -s 10k -B 25 -f $VFILE +lmbench_write $OPTS -N "lmbench_write_u100k" -s 100k -B 4 -f $VFILE + +lmbench_write $OPTS -N "lmbench_write_n1b - Default" -s 1 -I 100 -B 0 -f /dev/null +lmbench_write $OPTS -N "lmbench_write_n1k" -s 1k -I 100 -B 0 -f /dev/null +lmbench_write $OPTS -N "lmbench_write_n10k" -s 10k -I 100 -B 0 -f /dev/null +lmbench_write $OPTS -N "lmbench_write_n100k" -s 100k -I 100 -B 0 -f /dev/null + +writev $OPTS -N "writev_t1k" -s 1k -B 20 -f $TFILE +writev $OPTS -N "writev_t10k" -s 10k -B 4 -f $TFILE +writev $OPTS -N "writev_t100k" -s 100k -f $TFILE + +writev $OPTS -N "writev_u1k" -s 1k -B 20 -f $VFILE +writev $OPTS -N "writev_u10k" -s 10k -B 4 -f $VFILE +writev $OPTS -N "writev_u100k" -s 100k -f $VFILE + +writev $OPTS -N "writev_n1k" -s 1k -I 100 -B 0 -f /dev/null +writev $OPTS -N "writev_n10k" -s 10k -I 100 -B 0 -f /dev/null +writev $OPTS -N "writev_n100k" -s 100k -I 100 -B 0 -f /dev/null + +pread $OPTS -N "pread_t1k" -s 1k -I 300 -f $TFILE +pread $OPTS -N "pread_t10k" -s 10k -I 1000 -f $TFILE +pread $OPTS -N "pread_t100k" -s 100k -I 10000 -f $TFILE + +pread $OPTS -N "pread_u1k" -s 1k -I 300 -f $VFILE +pread $OPTS -N "pread_u10k" -s 10k -I 1000 -f $VFILE +pread $OPTS -N "pread_u100k" -s 100k -I 10000 -f $VFILE + +pread $OPTS -N "pread_z1k" -s 1k -I 300 -f /dev/zero +pread $OPTS -N "pread_z10k" -s 10k -I 1000 -f /dev/zero +pread $OPTS -N "pread_z100k" -s 100k -I 2000 -f /dev/zero +pread $OPTS -N "pread_zw100k" -s 100k -w -I 10000 -f /dev/zero + +pwrite $OPTS -N "pwrite_t1k" -s 1k -I 500 -f $TFILE +pwrite $OPTS -N "pwrite_t10k" -s 10k -I 1000 -f $TFILE +pwrite $OPTS -N "pwrite_t100k" -s 100k -I 10000 -f $TFILE + +pwrite $OPTS -N "pwrite_u1k" -s 1k -I 500 -f $VFILE +pwrite $OPTS -N "pwrite_u10k" -s 10k -I 1000 -f $VFILE +pwrite $OPTS -N "pwrite_u100k" -s 100k -I 20000 -f $VFILE + +pwrite $OPTS -N "pwrite_n1k" -s 1k -I 100 -f /dev/null +pwrite $OPTS -N "pwrite_n10k" -s 10k -I 100 -f /dev/null +pwrite $OPTS -N "pwrite_n100k" -s 100k -I 100 -f /dev/null + +mmap $OPTS -N "mmap_z8k" -l 8k -I 1000 -B 50 -f /dev/zero +mmap $OPTS -N "mmap_z128k" -l 128k -I 2000 -B 100 -f /dev/zero +mmap $OPTS -N "mmap_t8k" -l 8k -I 1000 -f $TFILE +mmap $OPTS -N "mmap_t128k" -l 128k -I 1000 -f $TFILE +mmap $OPTS -N "mmap_u8k" -l 8k -I 1000 -f $VFILE +mmap $OPTS -N "mmap_u128k" -l 128k -I 1000 -f $VFILE +mmap $OPTS -N "mmap_a8k" -l 8k -I 200 -f MAP_ANON +mmap $OPTS -N "mmap_a128k" -l 128k -I 200 -f MAP_ANON + + +mmap $OPTS -N "mmap_rz8k" -l 8k -I 2000 -r -f /dev/zero +mmap $OPTS -N "mmap_rz128k" -l 128k -I 2000 -r -f /dev/zero +mmap $OPTS -N "mmap_rt8k" -l 8k -I 2000 -r -f $TFILE +mmap $OPTS -N "mmap_rt128k" -l 128k -I 20000 -r -f $TFILE +mmap $OPTS -N "mmap_ru8k" -l 8k -I 2000 -r -f $VFILE +mmap $OPTS -N "mmap_ru128k" -l 128k -I 20000 -r -f $VFILE +mmap $OPTS -N "mmap_ra8k" -l 8k -I 2000 -r -f MAP_ANON +mmap $OPTS -N "mmap_ra128k" -l 128k -I 20000 -r -f MAP_ANON + +mmap $OPTS -N "mmap_wz8k" -l 8k -I 5000 -w -B 50 -f /dev/zero +mmap $OPTS -N "mmap_wz128k" -l 128k -I 50000 -w -B 50 -f /dev/zero +mmap $OPTS -N "mmap_wt8k" -l 8k -I 5000 -w -f $TFILE +mmap $OPTS -N "mmap_wt128k" -l 128k -I 50000 -w -f $TFILE +mmap $OPTS -N "mmap_wu8k" -l 8k -I 5000 -w -f $VFILE +mmap $OPTS -N "mmap_wu128k" -l 128k -I 500000 -w -f $VFILE +mmap $OPTS -N "mmap_wa8k" -l 8k -I 3000 -w -f MAP_ANON +mmap $OPTS -N "mmap_wa128k" -l 128k -I 50000 -w -f MAP_ANON + +munmap $OPTS -N "unmap_z8k" -l 8k -I 500 -f /dev/zero +munmap $OPTS -N "unmap_z128k" -l 128k -I 500 -B 100 -f /dev/zero +munmap $OPTS -N "unmap_t8k" -l 8k -I 500 -f $TFILE +munmap $OPTS -N "unmap_t128k" -l 128k -I 500 -f $TFILE +munmap $OPTS -N "unmap_u8k" -l 8k -I 500 -f $VFILE +munmap $OPTS -N "unmap_u128k" -l 128k -I 500 -f $VFILE +munmap $OPTS -N "unmap_a8k" -l 8k -I 500 -f MAP_ANON +munmap $OPTS -N "unmap_a128k" -l 128k -I 500 -f MAP_ANON + +munmap $OPTS -N "unmap_rz8k" -l 8k -I 1000 -r -f /dev/zero +munmap $OPTS -N "unmap_rz128k" -l 128k -I 2000 -r -B 100 -f /dev/zero +munmap $OPTS -N "unmap_rt8k" -l 8k -I 1000 -r -f $TFILE +munmap $OPTS -N "unmap_rt128k" -l 128k -I 3000 -r -f $TFILE +munmap $OPTS -N "unmap_ru8k" -l 8k -I 1000 -r -f $VFILE +munmap $OPTS -N "unmap_ru128k" -l 128k -I 3000 -r -f $VFILE +munmap $OPTS -N "unmap_ra8k" -l 8k -I 1000 -r -f MAP_ANON +munmap $OPTS -N "unmap_ra128k" -l 128k -I 2000 -r -f MAP_ANON + +connection $OPTS -N "conn_connect" -B 256 -c + +munmap $OPTS -N "unmap_wz8k" -l 8k -I 1000 -w -f /dev/zero +munmap $OPTS -N "unmap_wz128k" -l 128k -I 8000 -w -B 100 -f /dev/zero +munmap $OPTS -N "unmap_wt8k" -l 8k -I 1000 -w -f $TFILE +munmap $OPTS -N "unmap_wt128k" -l 128k -I 10000 -w -f $TFILE +munmap $OPTS -N "unmap_wu8k" -l 8k -I 1000 -w -f $VFILE +munmap $OPTS -N "unmap_wu128k" -l 128k -I 50000 -w -B 10 -f $VFILE +munmap $OPTS -N "unmap_wa8k" -l 8k -I 1000 -w -f MAP_ANON +munmap $OPTS -N "unmap_wa128k" -l 128k -I 10000 -w -f MAP_ANON + + +mprotect $OPTS -N "mprot_z8k" -l 8k -I 300 -f /dev/zero +mprotect $OPTS -N "mprot_z128k" -l 128k -I 500 -f /dev/zero +mprotect $OPTS -N "mprot_wz8k" -l 8k -I 500 -w -f /dev/zero +mprotect $OPTS -N "mprot_wz128k" -l 128k -I 1000 -w -f /dev/zero +mprotect $OPTS -N "mprot_twz8k" -l 8k -I 1000 -w -t -f /dev/zero +mprotect $OPTS -N "mprot_tw128k" -l 128k -I 2000 -w -t -f /dev/zero +mprotect $OPTS -N "mprot_tw4m" -l 4m -w -t -B 1 -f /dev/zero + +pipe $OPTS -N "pipe_pst1" -s 1 -I 1000 -x pipe -m st +pipe $OPTS -N "pipe_pmt1" -s 1 -I 8000 -x pipe -m mt +pipe $OPTS -N "pipe_pmp1" -s 1 -I 8000 -x pipe -m mp +pipe $OPTS -N "pipe_pst4k" -s 4k -I 1000 -x pipe -m st +pipe $OPTS -N "pipe_pmt4k" -s 4k -I 8000 -x pipe -m mt +pipe $OPTS -N "pipe_pmp4k" -s 4k -I 8000 -x pipe -m mp + +pipe $OPTS -N "pipe_sst1" -s 1 -I 1000 -x sock -m st +pipe $OPTS -N "pipe_smt1" -s 1 -I 8000 -x sock -m mt +pipe $OPTS -N "pipe_smp1" -s 1 -I 8000 -x sock -m mp +pipe $OPTS -N "pipe_sst4k" -s 4k -I 1000 -x sock -m st +pipe $OPTS -N "pipe_smt4k" -s 4k -I 8000 -x sock -m mt +pipe $OPTS -N "pipe_smp4k" -s 4k -I 8000 -x sock -m mp + +pipe $OPTS -N "pipe_tst1" -s 1 -I 1000 -x tcp -m st +pipe $OPTS -N "pipe_tmt1" -s 1 -I 8000 -x tcp -m mt +pipe $OPTS -N "pipe_tmp1" -s 1 -I 8000 -x tcp -m mp +pipe $OPTS -N "pipe_tst4k" -s 4k -I 1000 -x tcp -m st +pipe $OPTS -N "pipe_tmt4k" -s 4k -I 8000 -x tcp -m mt +pipe $OPTS -N "pipe_tmp4k" -s 4k -I 8000 -x tcp -m mp + +#connection $OPTS -N "conn_accept" -B 256 -a + +lmbench_bw_unix -B 11 -L -W + +lmbench_bw_mem $OPTS -N lmbench_bcopy_512 -s 512 -x bcopy +lmbench_bw_mem $OPTS -N lmbench_bcopy_1k -s 1k -x bcopy +lmbench_bw_mem $OPTS -N lmbench_bcopy_2k -s 2k -x bcopy +lmbench_bw_mem $OPTS -N lmbench_bcopy_4k -s 4k -x bcopy +lmbench_bw_mem $OPTS -N lmbench_bcopy_8k -s 8k -x bcopy +lmbench_bw_mem $OPTS -N lmbench_bcopy_16k -s 16k -x bcopy +lmbench_bw_mem $OPTS -N lmbench_bcopy_32k -s 32k -x bcopy +lmbench_bw_mem $OPTS -N lmbench_bcopy_64k -s 64k -x bcopy +lmbench_bw_mem $OPTS -N lmbench_bcopy_128k -s 128k -x bcopy +lmbench_bw_mem $OPTS -N lmbench_bcopy_256k -s 256k -x bcopy +lmbench_bw_mem $OPTS -N lmbench_bcopy_512k -s 512k -x bcopy +lmbench_bw_mem $OPTS -N lmbench_bcopy_1m -s 1m -x bcopy +lmbench_bw_mem $OPTS -N lmbench_bzero_512 -s 512 -x bzero +lmbench_bw_mem $OPTS -N lmbench_bzero_1k -s 1k -x bzero +lmbench_bw_mem $OPTS -N lmbench_bzero_2k -s 2k -x bzero +lmbench_bw_mem $OPTS -N lmbench_bzero_4k -s 4k -x bzero +lmbench_bw_mem $OPTS -N lmbench_bzero_8k -s 8k -x bzero +lmbench_bw_mem $OPTS -N lmbench_bzero_16k -s 16k -x bzero +lmbench_bw_mem $OPTS -N lmbench_bzero_32k -s 32k -x bzero +lmbench_bw_mem $OPTS -N lmbench_bzero_64k -s 64k -x bzero +lmbench_bw_mem $OPTS -N lmbench_bzero_128k -s 128k -x bzero +lmbench_bw_mem $OPTS -N lmbench_bzero_256k -s 256k -x bzero +lmbench_bw_mem $OPTS -N lmbench_bzero_512k -s 512k -x bzero +lmbench_bw_mem $OPTS -N lmbench_bzero_1m -s 1m -x bzero +lmbench_bw_mem $OPTS -N lmbench_bzero_512 -s 512 -x fcp +lmbench_bw_mem $OPTS -N lmbench_bzero_1k -s 1k -x fcp +lmbench_bw_mem $OPTS -N lmbench_bzero_2k -s 2k -x fcp +lmbench_bw_mem $OPTS -N lmbench_bzero_4k -s 4k -x fcp +lmbench_bw_mem $OPTS -N lmbench_bzero_8k -s 8k -x fcp +lmbench_bw_mem $OPTS -N lmbench_bzero_16k -s 16k -x fcp +lmbench_bw_mem $OPTS -N lmbench_bzero_32k -s 32k -x fcp +lmbench_bw_mem $OPTS -N lmbench_bzero_64k -s 64k -x fcp +lmbench_bw_mem $OPTS -N lmbench_bzero_128k -s 128k -x fcp +lmbench_bw_mem $OPTS -N lmbench_bzero_256k -s 256k -x fcp +lmbench_bw_mem $OPTS -N lmbench_bzero_512k -s 512k -x fcp +lmbench_bw_mem $OPTS -N lmbench_bzero_1m -s 1m -x fcp +lmbench_bw_mem $OPTS -N lmbench_cp_512 -s 512 -x cp +lmbench_bw_mem $OPTS -N lmbench_cp_1k -s 1k -x cp +lmbench_bw_mem $OPTS -N lmbench_cp_2k -s 2k -x cp +lmbench_bw_mem $OPTS -N lmbench_cp_4k -s 4k -x cp +lmbench_bw_mem $OPTS -N lmbench_cp_8k -s 8k -x cp +lmbench_bw_mem $OPTS -N lmbench_cp_16k -s 16k -x cp +lmbench_bw_mem $OPTS -N lmbench_cp_32k -s 32k -x cp +lmbench_bw_mem $OPTS -N lmbench_cp_64k -s 64k -x cp +lmbench_bw_mem $OPTS -N lmbench_cp_128k -s 128k -x cp +lmbench_bw_mem $OPTS -N lmbench_cp_256k -s 256k -x cp +lmbench_bw_mem $OPTS -N lmbench_cp_512k -s 512k -x cp +lmbench_bw_mem $OPTS -N lmbench_cp_1m -s 1m -x cp +lmbench_bw_mem $OPTS -N lmbench_frd_512 -s 512 -x frd +lmbench_bw_mem $OPTS -N lmbench_frd_1k -s 1k -x frd +lmbench_bw_mem $OPTS -N lmbench_frd_2k -s 2k -x frd +lmbench_bw_mem $OPTS -N lmbench_frd_4k -s 4k -x frd +lmbench_bw_mem $OPTS -N lmbench_frd_8k -s 8k -x frd +lmbench_bw_mem $OPTS -N lmbench_frd_16k -s 16k -x frd +lmbench_bw_mem $OPTS -N lmbench_frd_32k -s 32k -x frd +lmbench_bw_mem $OPTS -N lmbench_frd_64k -s 64k -x frd +lmbench_bw_mem $OPTS -N lmbench_frd_128k -s 128k -x frd +lmbench_bw_mem $OPTS -N lmbench_frd_256k -s 256k -x frd +lmbench_bw_mem $OPTS -N lmbench_frd_512k -s 512k -x frd +lmbench_bw_mem $OPTS -N lmbench_frd_1m -s 1m -x frd +lmbench_bw_mem $OPTS -N lmbench_rd_512 -s 512 -x rd +lmbench_bw_mem $OPTS -N lmbench_rd_1k -s 1k -x rd +lmbench_bw_mem $OPTS -N lmbench_rd_2k -s 2k -x rd +lmbench_bw_mem $OPTS -N lmbench_rd_4k -s 4k -x rd +lmbench_bw_mem $OPTS -N lmbench_rd_8k -s 8k -x rd +lmbench_bw_mem $OPTS -N lmbench_rd_16k -s 16k -x rd +lmbench_bw_mem $OPTS -N lmbench_rd_32k -s 32k -x rd +lmbench_bw_mem $OPTS -N lmbench_rd_64k -s 64k -x rd +lmbench_bw_mem $OPTS -N lmbench_rd_128k -s 128k -x rd +lmbench_bw_mem $OPTS -N lmbench_rd_256k -s 256k -x rd +lmbench_bw_mem $OPTS -N lmbench_rd_512k -s 512k -x rd +lmbench_bw_mem $OPTS -N lmbench_rd_1m -s 1m -x rd +lmbench_bw_mem $OPTS -N lmbench_fwr_512 -s 512 -x fwr +lmbench_bw_mem $OPTS -N lmbench_fwr_1k -s 1k -x fwr +lmbench_bw_mem $OPTS -N lmbench_fwr_2k -s 2k -x fwr +lmbench_bw_mem $OPTS -N lmbench_fwr_4k -s 4k -x fwr +lmbench_bw_mem $OPTS -N lmbench_fwr_8k -s 8k -x fwr +lmbench_bw_mem $OPTS -N lmbench_fwr_16k -s 16k -x fwr +lmbench_bw_mem $OPTS -N lmbench_fwr_32k -s 32k -x fwr +lmbench_bw_mem $OPTS -N lmbench_fwr_64k -s 64k -x fwr +lmbench_bw_mem $OPTS -N lmbench_fwr_128k -s 128k -x fwr +lmbench_bw_mem $OPTS -N lmbench_fwr_256k -s 256k -x fwr +lmbench_bw_mem $OPTS -N lmbench_fwr_512k -s 512k -x fwr +lmbench_bw_mem $OPTS -N lmbench_fwr_1m -s 1m -x fwr +lmbench_bw_mem $OPTS -N lmbench_wr_512 -s 512 -x wr +lmbench_bw_mem $OPTS -N lmbench_wr_1k -s 1k -x wr +lmbench_bw_mem $OPTS -N lmbench_wr_2k -s 2k -x wr +lmbench_bw_mem $OPTS -N lmbench_wr_4k -s 4k -x wr +lmbench_bw_mem $OPTS -N lmbench_wr_8k -s 8k -x wr +lmbench_bw_mem $OPTS -N lmbench_wr_16k -s 16k -x wr +lmbench_bw_mem $OPTS -N lmbench_wr_32k -s 32k -x wr +lmbench_bw_mem $OPTS -N lmbench_wr_64k -s 64k -x wr +lmbench_bw_mem $OPTS -N lmbench_wr_128k -s 128k -x wr +lmbench_bw_mem $OPTS -N lmbench_wr_256k -s 256k -x wr +lmbench_bw_mem $OPTS -N lmbench_wr_512k -s 512k -x wr +lmbench_bw_mem $OPTS -N lmbench_wr_1m -s 1m -x wr +lmbench_bw_mem $OPTS -N lmbench_rdwr_512 -s 512 -x rdwr +lmbench_bw_mem $OPTS -N lmbench_rdwr_1k -s 1k -x rdwr +lmbench_bw_mem $OPTS -N lmbench_rdwr_2k -s 2k -x rdwr +lmbench_bw_mem $OPTS -N lmbench_rdwr_4k -s 4k -x rdwr +lmbench_bw_mem $OPTS -N lmbench_rdwr_8k -s 8k -x rdwr +lmbench_bw_mem $OPTS -N lmbench_rdwr_16k -s 16k -x rdwr +lmbench_bw_mem $OPTS -N lmbench_rdwr_32k -s 32k -x rdwr +lmbench_bw_mem $OPTS -N lmbench_rdwr_64k -s 64k -x rdwr +lmbench_bw_mem $OPTS -N lmbench_rdwr_128k -s 128k -x rdwr +lmbench_bw_mem $OPTS -N lmbench_rdwr_256k -s 256k -x rdwr +lmbench_bw_mem $OPTS -N lmbench_rdwr_512k -s 512k -x rdwr +lmbench_bw_mem $OPTS -N lmbench_rdwr_1m -s 1m -x rdwr + +lmbench_bw_mmap_rd $OPTS -N bw_mmap_rd_512 -s 512 -f $TFILE +lmbench_bw_mmap_rd $OPTS -N bw_mmap_rd_1k -s 1k -f $TFILE +lmbench_bw_mmap_rd $OPTS -N bw_mmap_rd_2k -s 2k -f $TFILE +lmbench_bw_mmap_rd $OPTS -N bw_mmap_rd_4k -s 4k -f $TFILE +lmbench_bw_mmap_rd $OPTS -N bw_mmap_rd_8k -s 8k -f $TFILE +lmbench_bw_mmap_rd $OPTS -N bw_mmap_rd_16k -s 16k -f $TFILE +lmbench_bw_mmap_rd $OPTS -N bw_mmap_rd_32k -s 32k -f $TFILE +lmbench_bw_mmap_rd $OPTS -N bw_mmap_rd_64k -s 64k -f $TFILE +lmbench_bw_mmap_rd $OPTS -N bw_mmap_rd_128k -s 128k -f $TFILE +lmbench_bw_mmap_rd $OPTS -N bw_mmap_rd_256k -s 256k -f $TFILE +lmbench_bw_mmap_rd $OPTS -N bw_mmap_rd_512k -s 512k -f $TFILE +lmbench_bw_mmap_rd $OPTS -N bw_mmap_rd_1m -s 1m -f $TFILE + +. diff --git a/tools/tests/libMicro/benchmark_fini.c b/tools/tests/libMicro/benchmark_fini.c new file mode 100644 index 000000000..4952a9ac0 --- /dev/null +++ b/tools/tests/libMicro/benchmark_fini.c @@ -0,0 +1,46 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms + * of the Common Development and Distribution License + * (the "License"). You may not use this file except + * in compliance with the License. + * + * You can obtain a copy of the license at + * src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing + * permissions and limitations under the License. + * + * When distributing Covered Code, include this CDDL + * HEADER in each file and include the License file at + * usr/src/OPENSOLARIS.LICENSE. If applicable, + * add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your + * own identifying information: Portions Copyright [yyyy] + * [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +/* + * default implementation (nop) of benchmark_fini + */ + + +#include +#include +#include + +#include "libmicro.h" + +int +benchmark_fini() +{ + return (0); +} diff --git a/tools/tests/libMicro/benchmark_finibatch.c b/tools/tests/libMicro/benchmark_finibatch.c new file mode 100644 index 000000000..482258c7b --- /dev/null +++ b/tools/tests/libMicro/benchmark_finibatch.c @@ -0,0 +1,46 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms + * of the Common Development and Distribution License + * (the "License"). You may not use this file except + * in compliance with the License. + * + * You can obtain a copy of the license at + * src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing + * permissions and limitations under the License. + * + * When distributing Covered Code, include this CDDL + * HEADER in each file and include the License file at + * usr/src/OPENSOLARIS.LICENSE. If applicable, + * add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your + * own identifying information: Portions Copyright [yyyy] + * [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +/* + * default implementation (nop) of benchmark_finibatch + */ + +#include +#include +#include + +#include "libmicro.h" + +/*ARGSUSED*/ +int +benchmark_finibatch(void *tsd) +{ + return (0); +} diff --git a/tools/tests/libMicro/benchmark_finirun.c b/tools/tests/libMicro/benchmark_finirun.c new file mode 100644 index 000000000..52a582274 --- /dev/null +++ b/tools/tests/libMicro/benchmark_finirun.c @@ -0,0 +1,45 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms + * of the Common Development and Distribution License + * (the "License"). You may not use this file except + * in compliance with the License. + * + * You can obtain a copy of the license at + * src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing + * permissions and limitations under the License. + * + * When distributing Covered Code, include this CDDL + * HEADER in each file and include the License file at + * usr/src/OPENSOLARIS.LICENSE. If applicable, + * add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your + * own identifying information: Portions Copyright [yyyy] + * [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +/* + * default implementation (nop) of benchmark_finirun + */ + +#include +#include +#include + +#include "libmicro.h" + +int +benchmark_finirun() +{ + return (0); +} diff --git a/tools/tests/libMicro/benchmark_finiworker.c b/tools/tests/libMicro/benchmark_finiworker.c new file mode 100644 index 000000000..53161eae0 --- /dev/null +++ b/tools/tests/libMicro/benchmark_finiworker.c @@ -0,0 +1,46 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms + * of the Common Development and Distribution License + * (the "License"). You may not use this file except + * in compliance with the License. + * + * You can obtain a copy of the license at + * src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing + * permissions and limitations under the License. + * + * When distributing Covered Code, include this CDDL + * HEADER in each file and include the License file at + * usr/src/OPENSOLARIS.LICENSE. If applicable, + * add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your + * own identifying information: Portions Copyright [yyyy] + * [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +/* + * default implementation (nop) of benchmark_finiworker + */ + +#include +#include +#include + +#include "libmicro.h" + +/*ARGSUSED*/ +int +benchmark_finiworker(void *tsd) +{ + return (0); +} diff --git a/tools/tests/libMicro/benchmark_init.c b/tools/tests/libMicro/benchmark_init.c new file mode 100644 index 000000000..83cec2bf2 --- /dev/null +++ b/tools/tests/libMicro/benchmark_init.c @@ -0,0 +1,46 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms + * of the Common Development and Distribution License + * (the "License"). You may not use this file except + * in compliance with the License. + * + * You can obtain a copy of the license at + * src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing + * permissions and limitations under the License. + * + * When distributing Covered Code, include this CDDL + * HEADER in each file and include the License file at + * usr/src/OPENSOLARIS.LICENSE. If applicable, + * add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your + * own identifying information: Portions Copyright [yyyy] + * [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +/* + * default implementation (nop) of benchmark_init + */ + + +#include +#include +#include + +#include "libmicro.h" + +int +benchmark_init() +{ + return (0); +} diff --git a/tools/tests/libMicro/benchmark_initbatch.c b/tools/tests/libMicro/benchmark_initbatch.c new file mode 100644 index 000000000..0a8cddd11 --- /dev/null +++ b/tools/tests/libMicro/benchmark_initbatch.c @@ -0,0 +1,47 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms + * of the Common Development and Distribution License + * (the "License"). You may not use this file except + * in compliance with the License. + * + * You can obtain a copy of the license at + * src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing + * permissions and limitations under the License. + * + * When distributing Covered Code, include this CDDL + * HEADER in each file and include the License file at + * usr/src/OPENSOLARIS.LICENSE. If applicable, + * add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your + * own identifying information: Portions Copyright [yyyy] + * [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +/* + * default implementation (nop) of benchmark_initbatch + */ + + +#include +#include +#include + +#include "libmicro.h" + +/*ARGSUSED*/ +int +benchmark_initbatch(void *tsd) +{ + return (0); +} diff --git a/tools/tests/libMicro/benchmark_initrun.c b/tools/tests/libMicro/benchmark_initrun.c new file mode 100644 index 000000000..ac437d4c2 --- /dev/null +++ b/tools/tests/libMicro/benchmark_initrun.c @@ -0,0 +1,45 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms + * of the Common Development and Distribution License + * (the "License"). You may not use this file except + * in compliance with the License. + * + * You can obtain a copy of the license at + * src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing + * permissions and limitations under the License. + * + * When distributing Covered Code, include this CDDL + * HEADER in each file and include the License file at + * usr/src/OPENSOLARIS.LICENSE. If applicable, + * add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your + * own identifying information: Portions Copyright [yyyy] + * [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +/* + * default implementation (nop) of benchmark_initrun + */ + +#include +#include +#include + +#include "libmicro.h" + +int +benchmark_initrun() +{ + return (0); +} diff --git a/tools/tests/libMicro/benchmark_initworker.c b/tools/tests/libMicro/benchmark_initworker.c new file mode 100644 index 000000000..29b487c6a --- /dev/null +++ b/tools/tests/libMicro/benchmark_initworker.c @@ -0,0 +1,46 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms + * of the Common Development and Distribution License + * (the "License"). You may not use this file except + * in compliance with the License. + * + * You can obtain a copy of the license at + * src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing + * permissions and limitations under the License. + * + * When distributing Covered Code, include this CDDL + * HEADER in each file and include the License file at + * usr/src/OPENSOLARIS.LICENSE. If applicable, + * add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your + * own identifying information: Portions Copyright [yyyy] + * [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +/* + * default implementation (nop) of benchmark_initworker + */ + +#include +#include +#include + +#include "libmicro.h" + +/*ARGSUSED*/ +int +benchmark_initworker(void *tsd) +{ + return (0); +} diff --git a/tools/tests/libMicro/benchmark_optswitch.c b/tools/tests/libMicro/benchmark_optswitch.c new file mode 100644 index 000000000..d46ee2fb6 --- /dev/null +++ b/tools/tests/libMicro/benchmark_optswitch.c @@ -0,0 +1,46 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms + * of the Common Development and Distribution License + * (the "License"). You may not use this file except + * in compliance with the License. + * + * You can obtain a copy of the license at + * src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing + * permissions and limitations under the License. + * + * When distributing Covered Code, include this CDDL + * HEADER in each file and include the License file at + * usr/src/OPENSOLARIS.LICENSE. If applicable, + * add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your + * own identifying information: Portions Copyright [yyyy] + * [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +/* + * default implementation (nop) of benchmark_optswitch + */ + +#include +#include +#include + +#include "libmicro.h" + +/*ARGSUSED*/ +int +benchmark_optswitch(int opt, char *optarg) +{ + return (0); +} diff --git a/tools/tests/libMicro/benchmark_result.c b/tools/tests/libMicro/benchmark_result.c new file mode 100644 index 000000000..c8ab2f209 --- /dev/null +++ b/tools/tests/libMicro/benchmark_result.c @@ -0,0 +1,47 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms + * of the Common Development and Distribution License + * (the "License"). You may not use this file except + * in compliance with the License. + * + * You can obtain a copy of the license at + * src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing + * permissions and limitations under the License. + * + * When distributing Covered Code, include this CDDL + * HEADER in each file and include the License file at + * usr/src/OPENSOLARIS.LICENSE. If applicable, + * add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your + * own identifying information: Portions Copyright [yyyy] + * [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +/* + * default implementation (nop) of benchmark_result + */ + +#include +#include +#include + +#include "libmicro.h" + +char * +benchmark_result() +{ + static char result = '\0'; + + return (&result); +} diff --git a/tools/tests/libMicro/bind.c b/tools/tests/libMicro/bind.c new file mode 100644 index 000000000..bd8e43b13 --- /dev/null +++ b/tools/tests/libMicro/bind.c @@ -0,0 +1,163 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms + * of the Common Development and Distribution License + * (the "License"). You may not use this file except + * in compliance with the License. + * + * You can obtain a copy of the license at + * src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing + * permissions and limitations under the License. + * + * When distributing Covered Code, include this CDDL + * HEADER in each file and include the License file at + * usr/src/OPENSOLARIS.LICENSE. If applicable, + * add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your + * own identifying information: Portions Copyright [yyyy] + * [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +/* + * benchmark for bind... keep in mind tcp hash chain effects + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "libmicro.h" + +#define FIRSTPORT 12345 + +typedef struct { + int *ts_lsns; + struct sockaddr_in *ts_adds; +} tsd_t; + +static int optz = -0; + +int +benchmark_init() +{ + lm_tsdsize = sizeof (tsd_t); + + lm_defB = 256; + (void) sprintf(lm_optstr, "z"); + + (void) sprintf(lm_usage, + " [-z bind to port 0 rather than seq. number\n" + "notes: measures bind() on TCP"); + + return (0); +} + +int +benchmark_initrun() +{ + (void) setfdlimit(lm_optB * lm_optT + 10); + + return (0); +} + +/*ARGSUSED*/ +int +benchmark_optswitch(int opt, char *optarg) +{ + switch (opt) { + case 'z': + optz = 1; + break; + default: + return (-1); + } + return (0); +} + +int +benchmark_initbatch(void *tsd) +{ + tsd_t *ts = (tsd_t *)tsd; + int i, j; + int opt = 1; + struct hostent *host; + int errors = 0; + + ts->ts_lsns = (int *)malloc(lm_optB * sizeof (int)); + if (ts->ts_lsns == NULL) + errors ++; + + ts->ts_adds = (struct sockaddr_in *)malloc(lm_optB * + sizeof (struct sockaddr_in)); + if (ts->ts_adds == NULL) + errors ++; + + j = FIRSTPORT; + for (i = 0; i < lm_optB; i++) { + if ((ts->ts_lsns[i] = socket(PF_INET, SOCK_STREAM, 0)) == -1) + errors ++; + + if (setsockopt(ts->ts_lsns[i], SOL_SOCKET, SO_REUSEADDR, + &opt, sizeof (int)) == -1) + errors ++; + + if ((host = gethostbyname("localhost")) == NULL) + errors ++; + + (void) memset(&ts->ts_adds[i], 0, + sizeof (struct sockaddr_in)); + ts->ts_adds[i].sin_family = AF_INET; + ts->ts_adds[i].sin_port = (optz ? 0 : htons(j++)); + (void) memcpy(&ts->ts_adds[i].sin_addr.s_addr, + host->h_addr_list[0], sizeof (struct in_addr)); + } + return (errors); +} + +int +benchmark(void *tsd, result_t *res) +{ + tsd_t *ts = (tsd_t *)tsd; + int i; + + for (i = 0; i < lm_optB; i++) { + if ((bind(ts->ts_lsns[i], + (struct sockaddr *)&ts->ts_adds[i], + sizeof (struct sockaddr_in)) != 0) && + (errno != EADDRINUSE)) + res->re_errors ++; + } + res->re_count = i; + + return (0); +} + +int +benchmark_finibatch(void *tsd) +{ + tsd_t *ts = (tsd_t *)tsd; + int i; + + for (i = 0; i < lm_optB; i++) + (void) close(ts->ts_lsns[i]); + return (0); +} diff --git a/tools/tests/libMicro/cachetocache.c b/tools/tests/libMicro/cachetocache.c new file mode 100644 index 000000000..ffe9ddf7a --- /dev/null +++ b/tools/tests/libMicro/cachetocache.c @@ -0,0 +1,222 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms + * of the Common Development and Distribution License + * (the "License"). You may not use this file except + * in compliance with the License. + * + * You can obtain a copy of the license at + * src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing + * permissions and limitations under the License. + * + * When distributing Covered Code, include this CDDL + * HEADER in each file and include the License file at + * usr/src/OPENSOLARIS.LICENSE. If applicable, + * add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your + * own identifying information: Portions Copyright [yyyy] + * [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +/* + * routine to benchmark cache-to-cache transfer times... uses + * solaris features to find and bind to cpus in the current + * processor set, so not likely to work elsewhere. + */ + + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "libmicro.h" + +static long opts = 1024*512; + +typedef struct { + long **ts_data; + long ts_result; + pthread_mutex_t ts_lock; +} tsd_t; + +static unsigned int ncpu = 1024; + +static tsd_t *thread_data[1024]; +static processorid_t cpus[1024]; + +int traverse_ptrchain(long **, int, int); + +int +benchmark_init() +{ + lm_tsdsize = sizeof (tsd_t); + + (void) sprintf(lm_optstr, "s:"); + + (void) sprintf(lm_usage, + " [-s size] size of access area in bytes" + " (default %ld)\n" + "notes: measures cache to cache transfer times on Solaris\n", + opts); + + (void) sprintf(lm_header, "%8s", "size"); + + return (0); +} + +int +benchmark_optswitch(int opt, char *optarg) +{ + switch (opt) { + case 's': + opts = sizetoint(optarg); + break; + default: + return (-1); + } + + return (0); +} + +int +benchmark_initrun() +{ + if (pset_info(PS_MYID, NULL, &ncpu, cpus) < 0) { + perror("pset_info"); + return (1); + } + + return (0); +} + +int +benchmark_initworker(void *tsd) +{ + tsd_t *ts = (tsd_t *)tsd; + int i, j; + processorid_t cpu; + + ts->ts_data = malloc(opts); + + if (ts->ts_data == NULL) { + return (1); + } + + (void) pthread_mutex_init(&ts->ts_lock, NULL); + + + if (processor_bind(P_LWPID, P_MYID, + cpu = cpus[(pthread_self() - 1) % ncpu], + NULL) < 0) { + perror("processor_bind:"); + return (1); + } + + (void) printf("# thread %d using processor %d\n", pthread_self(), cpu); + + /* + * use lmbench style backwards stride + */ + + for (i = 0; i < opts / sizeof (long); i++) { + j = i - 128; + if (j < 0) + j = j + opts / sizeof (long); + ts->ts_data[i] = (long *)&(ts->ts_data[j]); + } + + thread_data[pthread_self() - 1] = ts; + + return (0); +} + +/* + * here we go in order for each thread, causing inherent serialization + * this is normally not a good idea, but in this case we're trying to + * measure cache-to-cache transfer times, and if we run threads in + * parallel we're likely to see saturation effects rather than cache-to-cache, + * esp. on wimpy memory platforms like P4. + */ + + +/*ARGSUSED*/ +int +benchmark(void *tsd, result_t *res) +{ + tsd_t *ts; + int i, j; + int count = opts / 128 / sizeof (long); + + for (j = 0; j < lm_optB; j++) + for (i = 0; i < lm_optT; i++) { + ts = thread_data[i]; + (void) pthread_mutex_lock(&ts->ts_lock); + ts->ts_result += traverse_ptrchain( + (long **)ts->ts_data, count, 0); + (void) pthread_mutex_unlock(&ts->ts_lock); + } + + res->re_count = lm_optB * lm_optT * count; + + return (0); +} + +int +traverse_ptrchain(long **ptr, int count, int value) +{ + int i; + + for (i = 0; i < count; i += 10) { + *ptr = *ptr + value; + ptr = (long **)*ptr; + *ptr = *ptr + value; + ptr = (long **)*ptr; + *ptr = *ptr + value; + ptr = (long **)*ptr; + *ptr = *ptr + value; + ptr = (long **)*ptr; + *ptr = *ptr + value; + ptr = (long **)*ptr; + *ptr = *ptr + value; + ptr = (long **)*ptr; + *ptr = *ptr + value; + ptr = (long **)*ptr; + *ptr = *ptr + value; + ptr = (long **)*ptr; + *ptr = *ptr + value; + ptr = (long **)*ptr; + *ptr = *ptr + value; + ptr = (long **)*ptr; + *ptr = *ptr + value; + } + return ((int)*ptr); /* bogus return */ +} + + +char * +benchmark_result() +{ + static char result[256]; + + (void) sprintf(result, "%8ld ", opts); + + + return (result); +} diff --git a/tools/tests/libMicro/cascade_cond.c b/tools/tests/libMicro/cascade_cond.c new file mode 100644 index 000000000..350e3d6d8 --- /dev/null +++ b/tools/tests/libMicro/cascade_cond.c @@ -0,0 +1,283 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms + * of the Common Development and Distribution License + * (the "License"). You may not use this file except + * in compliance with the License. + * + * You can obtain a copy of the license at + * src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing + * permissions and limitations under the License. + * + * When distributing Covered Code, include this CDDL + * HEADER in each file and include the License file at + * usr/src/OPENSOLARIS.LICENSE. If applicable, + * add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your + * own identifying information: Portions Copyright [yyyy] + * [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +/* + * The "cascade" test case is a multiprocess/multithread batten-passing model + * using lock primitives alone for synchronisation. Threads are arranged in a + * ring. Each thread has two locks of its own on which it blocks, and is able + * to manipulate the two locks belonging to the thread which follows it in the + * ring. + * + * The number of threads (nthreads) is specified by the generic libMicro -P/-T + * options. With nthreads == 1 (the default) the uncontended case can be timed. + * + * The main logic is generic and allows any simple blocking API to be tested. + * The API-specific component is clearly indicated. + */ + +#include +#include +#include +#include +#include + +#include "libmicro.h" + +typedef struct { + int ts_once; + int ts_id; + int ts_us0; /* our lock indices */ + int ts_us1; + int ts_them0; /* their lock indices */ + int ts_them1; +} tsd_t; + +static int nthreads; + +/* + * API-specific code BEGINS here + */ + +static int opto = 0; +static int opts = 0; +static int nlocks; +static pthread_mutex_t *mxs; +static pthread_cond_t *cvs; +static int *conds; + +int +benchmark_init() +{ + lm_tsdsize = sizeof (tsd_t); + + (void) sprintf(lm_optstr, "os"); + + lm_defN = "cscd_cond"; + + (void) sprintf(lm_usage, + " [-o] (do signal outside mutex)\n" + " [-s] (force PTHREAD_PROCESS_SHARED)\n" + "notes: thread cascade using pthread_conds\n"); + + return (0); +} + +/*ARGSUSED*/ +int +benchmark_optswitch(int opt, char *optarg) +{ + switch (opt) { + case 'o': + opto = 1; + break; + case 's': + opts = 1; + break; + default: + return (-1); + } + return (0); +} + +int +benchmark_initrun() +{ + int i; + int e = 0; + pthread_mutexattr_t ma; + pthread_condattr_t ca; + + nthreads = lm_optP * lm_optT; + nlocks = nthreads * 2; + /*LINTED*/ + mxs = (pthread_mutex_t *)mmap(NULL, + nlocks * sizeof (pthread_mutex_t), + PROT_READ | PROT_WRITE, + MAP_ANON | MAP_SHARED, + -1, 0L); + if (mxs == MAP_FAILED) { + return (1); + } + + /*LINTED*/ + cvs = (pthread_cond_t *)mmap(NULL, + nlocks * sizeof (pthread_cond_t), + PROT_READ | PROT_WRITE, + MAP_ANON | MAP_SHARED, + -1, 0L); + if (cvs == MAP_FAILED) { + return (1); + } + + /*LINTED*/ + conds = (int *)mmap(NULL, + nlocks * sizeof (pthread_cond_t), + PROT_READ | PROT_WRITE, + MAP_ANON | MAP_SHARED, + -1, 0L); + if (conds == MAP_FAILED) { + return (1); + } + + (void) pthread_mutexattr_init(&ma); + (void) pthread_condattr_init(&ca); + if (lm_optP > 1 || opts) { + (void) pthread_mutexattr_setpshared(&ma, + PTHREAD_PROCESS_SHARED); + (void) pthread_condattr_setpshared(&ca, + PTHREAD_PROCESS_SHARED); + } else { + (void) pthread_mutexattr_setpshared(&ma, + PTHREAD_PROCESS_PRIVATE); + (void) pthread_condattr_setpshared(&ca, + PTHREAD_PROCESS_PRIVATE); + } + + for (i = 0; i < nlocks; i++) { + (void) pthread_mutex_init(&mxs[i], &ma); + (void) pthread_cond_init(&cvs[i], &ca); + conds[i] = 0; + } + + return (e); +} + +int +block(int index) +{ + (void) pthread_mutex_lock(&mxs[index]); + while (conds[index] != 0) { + (void) pthread_cond_wait(&cvs[index], &mxs[index]); + } + conds[index] = 1; + (void) pthread_mutex_unlock(&mxs[index]); + + return (0); +} + +int +unblock(int index) +{ + (void) pthread_mutex_lock(&mxs[index]); + conds[index] = 0; + if (opto) { + (void) pthread_mutex_unlock(&mxs[index]); + (void) pthread_cond_signal(&cvs[index]); + } else { + (void) pthread_cond_signal(&cvs[index]); + (void) pthread_mutex_unlock(&mxs[index]); + } + return (0); +} + +/* + * API-specific code ENDS here + */ + +int +benchmark_initbatch(void *tsd) +{ + tsd_t *ts = (tsd_t *)tsd; + int e = 0; + + if (ts->ts_once == 0) { + int us, them; + +#if !defined(__APPLE__) + us = (getpindex() * lm_optT) + gettindex(); +#else + us = gettsdindex(tsd); +#endif /* __APPLE__ */ + + them = (us + 1) % (lm_optP * lm_optT); + + ts->ts_id = us; + + /* lock index asignment for us and them */ + ts->ts_us0 = (us * 2); + ts->ts_us1 = (us * 2) + 1; + if (us < nthreads - 1) { + /* straight-thru connection to them */ + ts->ts_them0 = (them * 2); + ts->ts_them1 = (them * 2) + 1; + } else { + /* cross-over connection to them */ + ts->ts_them0 = (them * 2) + 1; + ts->ts_them1 = (them * 2); + } + + ts->ts_once = 1; + } + + /* block their first move */ + e += block(ts->ts_them0); + + return (e); +} + +int +benchmark(void *tsd, result_t *res) +{ + tsd_t *ts = (tsd_t *)tsd; + int i; + int e = 0; + + /* wait to be unblocked (id == 0 will not block) */ + e += block(ts->ts_us0); + + for (i = 0; i < lm_optB; i += 2) { + /* allow them to block us again */ + e += unblock(ts->ts_us0); + + /* block their next + 1 move */ + e += block(ts->ts_them1); + + /* unblock their next move */ + e += unblock(ts->ts_them0); + + /* wait for them to unblock us */ + e += block(ts->ts_us1); + + /* repeat with locks reversed */ + e += unblock(ts->ts_us1); + e += block(ts->ts_them0); + e += unblock(ts->ts_them1); + e += block(ts->ts_us0); + } + + /* finish batch with nothing blocked */ + e += unblock(ts->ts_them0); + e += unblock(ts->ts_us0); + + res->re_count = i; + res->re_errors = e; + + return (0); +} diff --git a/tools/tests/libMicro/cascade_fcntl.c b/tools/tests/libMicro/cascade_fcntl.c new file mode 100644 index 000000000..b9bde5d14 --- /dev/null +++ b/tools/tests/libMicro/cascade_fcntl.c @@ -0,0 +1,237 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms + * of the Common Development and Distribution License + * (the "License"). You may not use this file except + * in compliance with the License. + * + * You can obtain a copy of the license at + * src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing + * permissions and limitations under the License. + * + * When distributing Covered Code, include this CDDL + * HEADER in each file and include the License file at + * usr/src/OPENSOLARIS.LICENSE. If applicable, + * add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your + * own identifying information: Portions Copyright [yyyy] + * [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +/* + * The "cascade" test case is a multiprocess/multithread batten-passing model + * using lock primitives alone for synchronisation. Threads are arranged in a + * ring. Each thread has two locks of its own on which it blocks, and is able + * to manipulate the two locks belonging to the thread which follows it in the + * ring. + * + * The number of threads (nthreads) is specified by the generic libMicro -P/-T + * options. With nthreads == 1 (the default) the uncontended case can be timed. + * + * The main logic is generic and allows any simple blocking API to be tested. + * The API-specific component is clearly indicated. + */ + +#include +#include +#include +#include + +#include "libmicro.h" + +typedef struct { + int ts_once; + int ts_id; + int ts_us0; /* our lock indices */ + int ts_us1; + int ts_them0; /* their lock indices */ + int ts_them1; +} tsd_t; + +static int nthreads; + +/* + * API-specific code BEGINS here + */ + +#define DEFD "/private/tmp" + +static char *optd = DEFD; +static int file; +static int nlocks; + +int +benchmark_init() +{ + lm_tsdsize = sizeof (tsd_t); + + (void) sprintf(lm_optstr, "d:"); + + lm_defN = "cscd_fcntl"; + + (void) sprintf(lm_usage, + " [-d directory for temp file (default %s)]\n" + "notes: thread cascade using fcntl region locking\n", + DEFD); + + return (0); +} + +int +benchmark_optswitch(int opt, char *optarg) +{ + switch (opt) { + case 'd': + optd = optarg; + break; + default: + return (-1); + } + return (0); +} + +int +benchmark_initrun() +{ + int errors = 0; + char fname[1024]; + + nthreads = lm_optP * lm_optT; + nlocks = nthreads * 2; + + (void) sprintf(fname, "%s/cascade.%ld", optd, getpid()); + + file = open(fname, O_CREAT | O_TRUNC | O_RDWR, 0600); + if (file == -1) { + errors++; + } + + if (unlink(fname)) { + errors++; + } + + if (ftruncate(file, nlocks * 3) == -1) { + errors++; + } + + return (errors); +} + +int +block(int index) +{ + struct flock fl; + + fl.l_type = F_WRLCK; + fl.l_whence = SEEK_SET; + fl.l_start = index; + fl.l_len = 1; + return (fcntl(file, F_SETLKW, &fl) == -1); +} + +int +unblock(int index) +{ + struct flock fl; + + fl.l_type = F_UNLCK; + fl.l_whence = SEEK_SET; + fl.l_start = index; + fl.l_len = 1; + return (fcntl(file, F_SETLK, &fl) == -1); +} + +/* + * API-specific code ENDS here + */ + +int +benchmark_initbatch(void *tsd) +{ + tsd_t *ts = (tsd_t *)tsd; + int e = 0; + + if (ts->ts_once == 0) { + int us, them; + +#if !defined(__APPLE__) + us = (getpindex() * lm_optT) + gettindex(); +#else + us = gettsdindex(tsd); +#endif /* __APPLE__ */ + + them = (us + 1) % (lm_optP * lm_optT); + + ts->ts_id = us; + + /* lock index asignment for us and them */ + ts->ts_us0 = (us * 4); + ts->ts_us1 = (us * 4) + 2; + if (us < nthreads - 1) { + /* straight-thru connection to them */ + ts->ts_them0 = (them * 4); + ts->ts_them1 = (them * 4) + 2; + } else { + /* cross-over connection to them */ + ts->ts_them0 = (them * 4) + 2; + ts->ts_them1 = (them * 4); + } + + ts->ts_once = 1; + } + + /* block their first move */ + e += block(ts->ts_them0); + + return (e); +} + +int +benchmark(void *tsd, result_t *res) +{ + tsd_t *ts = (tsd_t *)tsd; + int i; + int e = 0; + + /* wait to be unblocked (id == 0 will not block) */ + e += block(ts->ts_us0); + + for (i = 0; i < lm_optB; i += 2) { + /* allow them to block us again */ + e += unblock(ts->ts_us0); + + /* block their next + 1 move */ + e += block(ts->ts_them1); + + /* unblock their next move */ + e += unblock(ts->ts_them0); + + /* wait for them to unblock us */ + e += block(ts->ts_us1); + + /* repeat with locks reversed */ + e += unblock(ts->ts_us1); + e += block(ts->ts_them0); + e += unblock(ts->ts_them1); + e += block(ts->ts_us0); + } + + /* finish batch with nothing blocked */ + e += unblock(ts->ts_them0); + e += unblock(ts->ts_us0); + + res->re_count = i; + res->re_errors = e; + + return (0); +} diff --git a/tools/tests/libMicro/cascade_flock.c b/tools/tests/libMicro/cascade_flock.c new file mode 100644 index 000000000..e225b4076 --- /dev/null +++ b/tools/tests/libMicro/cascade_flock.c @@ -0,0 +1,233 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms + * of the Common Development and Distribution License + * (the "License"). You may not use this file except + * in compliance with the License. + * + * You can obtain a copy of the license at + * src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing + * permissions and limitations under the License. + * + * When distributing Covered Code, include this CDDL + * HEADER in each file and include the License file at + * usr/src/OPENSOLARIS.LICENSE. If applicable, + * add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your + * own identifying information: Portions Copyright [yyyy] + * [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +/* + * The "cascade" test case is a multiprocess/multithread batten-passing model + * using lock primitives alone for synchronisation. Threads are arranged in a + * ring. Each thread has two locks of its own on which it blocks, and is able + * to manipulate the two locks belonging to the thread which follows it in the + * ring. + * + * The number of threads (nthreads) is specified by the generic libMicro -P/-T + * options. With nthreads == 1 (the default) the uncontended case can be timed. + * + * The main logic is generic and allows any simple blocking API to be tested. + * The API-specific component is clearly indicated. + */ + +#include +#include +#include +#include + +#include "libmicro.h" + +#ifndef LOCK_EX +#include "/usr/ucbinclude/sys/file.h" +extern int flock(int fd, int operation); +#endif + +typedef struct { + int ts_once; + int ts_id; + int ts_us0; /* our lock indices */ + int ts_us1; + int ts_them0; /* their lock indices */ + int ts_them1; +} tsd_t; + +static int nthreads; + +/* + * API-specific code BEGINS here + */ + +#define DEFD "/private/tmp" + +static char *optd = DEFD; +static int nfiles; +static int *files; + +int +benchmark_init() +{ + lm_tsdsize = sizeof (tsd_t); + + (void) sprintf(lm_optstr, "d:"); + + lm_defN = "cscd_flock"; + + (void) sprintf(lm_usage, + " [-d directory for temp files (default %s)]\n" + "notes: thread cascade using flock file locking\n", + DEFD); + + return (0); +} + +int +benchmark_optswitch(int opt, char *optarg) +{ + switch (opt) { + case 'd': + optd = optarg; + break; + default: + return (-1); + } + return (0); +} + +int +benchmark_initrun() +{ + int i; + int errors = 0; + char fname[1024]; + + nthreads = lm_optP * lm_optT; + nfiles = nthreads * 2; + (void) setfdlimit(nfiles + 10); + files = (int *)malloc(nfiles * sizeof (int)); + if (files == NULL) { + return (1); + } + + (void) sprintf(fname, "%s/cascade.%ld", optd, getpid()); + + for (i = 0; i < nfiles; i++) { + files[i] = open(fname, O_CREAT | O_TRUNC | O_RDWR, 0600); + if (files[i] == -1) { + errors++; + } + if (unlink(fname)) { + errors++; + } + } + + return (errors); +} + +int +block(int index) +{ + return (flock(files[index], LOCK_EX) == -1); +} + +int +unblock(int index) +{ + return (flock(files[index], LOCK_UN) == -1); +} + +/* + * API-specific code ENDS here + */ + +int +benchmark_initbatch(void *tsd) +{ + tsd_t *ts = (tsd_t *)tsd; + int e = 0; + + if (ts->ts_once == 0) { + int us, them; + +#if !defined(__APPLE__) + us = (getpindex() * lm_optT) + gettindex(); +#else + us = gettsdindex(tsd); +#endif /* __APPLE__ */ + + them = (us + 1) % (lm_optP * lm_optT); + + ts->ts_id = us; + + /* lock index asignment for us and them */ + ts->ts_us0 = (us * 2); + ts->ts_us1 = (us * 2) + 1; + if (us < nthreads - 1) { + /* straight-thru connection to them */ + ts->ts_them0 = (them * 2); + ts->ts_them1 = (them * 2) + 1; + } else { + /* cross-over connection to them */ + ts->ts_them0 = (them * 2) + 1; + ts->ts_them1 = (them * 2); + } + + ts->ts_once = 1; + } + + /* block their first move */ + e += block(ts->ts_them0); + + return (e); +} + +int +benchmark(void *tsd, result_t *res) +{ + tsd_t *ts = (tsd_t *)tsd; + int i; + int e = 0; + + /* wait to be unblocked (id == 0 will not block) */ + e += block(ts->ts_us0); + + for (i = 0; i < lm_optB; i += 2) { + /* allow them to block us again */ + e += unblock(ts->ts_us0); + + /* block their next + 1 move */ + e += block(ts->ts_them1); + + /* unblock their next move */ + e += unblock(ts->ts_them0); + + /* wait for them to unblock us */ + e += block(ts->ts_us1); + + /* repeat with locks reversed */ + e += unblock(ts->ts_us1); + e += block(ts->ts_them0); + e += unblock(ts->ts_them1); + e += block(ts->ts_us0); + } + + /* finish batch with nothing blocked */ + e += unblock(ts->ts_them0); + e += unblock(ts->ts_us0); + + res->re_count = i; + res->re_errors = e; + + return (0); +} diff --git a/tools/tests/libMicro/cascade_lockf.c b/tools/tests/libMicro/cascade_lockf.c new file mode 100644 index 000000000..1792c0932 --- /dev/null +++ b/tools/tests/libMicro/cascade_lockf.c @@ -0,0 +1,228 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms + * of the Common Development and Distribution License + * (the "License"). You may not use this file except + * in compliance with the License. + * + * You can obtain a copy of the license at + * src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing + * permissions and limitations under the License. + * + * When distributing Covered Code, include this CDDL + * HEADER in each file and include the License file at + * usr/src/OPENSOLARIS.LICENSE. If applicable, + * add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your + * own identifying information: Portions Copyright [yyyy] + * [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +/* + * The "cascade" test case is a multiprocess/multithread batten-passing model + * using lock primitives alone for synchronisation. Threads are arranged in a + * ring. Each thread has two locks of its own on which it blocks, and is able + * to manipulate the two locks belonging to the thread which follows it in the + * ring. + * + * The number of threads (nthreads) is specified by the generic libMicro -P/-T + * options. With nthreads == 1 (the default) the uncontended case can be timed. + * + * The main logic is generic and allows any simple blocking API to be tested. + * The API-specific component is clearly indicated. + */ + +#include +#include +#include +#include + +#include "libmicro.h" + +typedef struct { + int ts_once; + int ts_id; + int ts_us0; /* our lock indices */ + int ts_us1; + int ts_them0; /* their lock indices */ + int ts_them1; +} tsd_t; + +static int nthreads; + +/* + * API-specific code BEGINS here + */ + +#define DEFD "/private/tmp" + +static char *optd = DEFD; +static int nfiles; +static int *files; + +int +benchmark_init() +{ + lm_tsdsize = sizeof (tsd_t); + + (void) sprintf(lm_optstr, "d:"); + + lm_defN = "cscd_lockf"; + + (void) sprintf(lm_usage, + " [-d directory for temp files (default %s)]\n" + "notes: thread cascade using lockf file locking\n", + DEFD); + + return (0); +} + +int +benchmark_optswitch(int opt, char *optarg) +{ + switch (opt) { + case 'd': + optd = optarg; + break; + default: + return (-1); + } + return (0); +} + +int +benchmark_initrun() +{ + int i; + int errors = 0; + char fname[1024]; + + nthreads = lm_optP * lm_optT; + nfiles = nthreads * 2; + (void) setfdlimit(nfiles + 10); + files = (int *)malloc(nfiles * sizeof (int)); + if (files == NULL) { + return (1); + } + + (void) sprintf(fname, "%s/cascade.%ld", optd, getpid()); + + for (i = 0; i < nfiles; i++) { + files[i] = open(fname, O_CREAT | O_TRUNC | O_RDWR, 0600); + if (files[i] == -1) { + errors++; + } + if (unlink(fname)) { + errors++; + } + } + + return (errors); +} + +int +block(int index) +{ + return (lockf(files[index], F_LOCK, 0) == -1); +} + +int +unblock(int index) +{ + return (lockf(files[index], F_ULOCK, 0) == -1); +} + +/* + * API-specific code ENDS here + */ + +int +benchmark_initbatch(void *tsd) +{ + tsd_t *ts = (tsd_t *)tsd; + int e = 0; + + if (ts->ts_once == 0) { + int us, them; + +#if !defined(__APPLE__) + us = (getpindex() * lm_optT) + gettindex(); +#else + us = gettsdindex(tsd); +#endif /* __APPLE__ */ + + them = (us + 1) % (lm_optP * lm_optT); + + ts->ts_id = us; + + /* lock index asignment for us and them */ + ts->ts_us0 = (us * 2); + ts->ts_us1 = (us * 2) + 1; + if (us < nthreads - 1) { + /* straight-thru connection to them */ + ts->ts_them0 = (them * 2); + ts->ts_them1 = (them * 2) + 1; + } else { + /* cross-over connection to them */ + ts->ts_them0 = (them * 2) + 1; + ts->ts_them1 = (them * 2); + } + + ts->ts_once = 1; + } + + /* block their first move */ + e += block(ts->ts_them0); + + return (e); +} + +int +benchmark(void *tsd, result_t *res) +{ + tsd_t *ts = (tsd_t *)tsd; + int i; + int e = 0; + + /* wait to be unblocked (id == 0 will not block) */ + e += block(ts->ts_us0); + + for (i = 0; i < lm_optB; i += 2) { + /* allow them to block us again */ + e += unblock(ts->ts_us0); + + /* block their next + 1 move */ + e += block(ts->ts_them1); + + /* unblock their next move */ + e += unblock(ts->ts_them0); + + /* wait for them to unblock us */ + e += block(ts->ts_us1); + + /* repeat with locks reversed */ + e += unblock(ts->ts_us1); + e += block(ts->ts_them0); + e += unblock(ts->ts_them1); + e += block(ts->ts_us0); + } + + /* finish batch with nothing blocked */ + e += unblock(ts->ts_them0); + e += unblock(ts->ts_us0); + + res->re_count = i; + res->re_errors = e; + + return (0); +} diff --git a/tools/tests/libMicro/cascade_mutex.c b/tools/tests/libMicro/cascade_mutex.c new file mode 100644 index 000000000..4f5807414 --- /dev/null +++ b/tools/tests/libMicro/cascade_mutex.c @@ -0,0 +1,232 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms + * of the Common Development and Distribution License + * (the "License"). You may not use this file except + * in compliance with the License. + * + * You can obtain a copy of the license at + * src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing + * permissions and limitations under the License. + * + * When distributing Covered Code, include this CDDL + * HEADER in each file and include the License file at + * usr/src/OPENSOLARIS.LICENSE. If applicable, + * add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your + * own identifying information: Portions Copyright [yyyy] + * [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +/* + * The "cascade" test case is a multiprocess/multithread batten-passing model + * using lock primitives alone for synchronisation. Threads are arranged in a + * ring. Each thread has two locks of its own on which it blocks, and is able + * to manipulate the two locks belonging to the thread which follows it in the + * ring. + * + * The number of threads (nthreads) is specified by the generic libMicro -P/-T + * options. With nthreads == 1 (the default) the uncontended case can be timed. + * + * The main logic is generic and allows any simple blocking API to be tested. + * The API-specific component is clearly indicated. + */ + +#include +#include +#include +#include +#include + +#include "libmicro.h" + +typedef struct { + int ts_once; + int ts_id; + int ts_us0; /* our lock indices */ + int ts_us1; + int ts_them0; /* their lock indices */ + int ts_them1; +} tsd_t; + +static int nthreads; + +/* + * API-specific code BEGINS here + */ + +static int opts = 0; +static int nlocks; +static pthread_mutex_t *locks; + +int +benchmark_init() +{ + lm_tsdsize = sizeof (tsd_t); + + (void) sprintf(lm_optstr, "s"); + + lm_defN = "cscd_mutex"; + + (void) sprintf(lm_usage, + " [-s] (force PTHREAD_PROCESS_SHARED)\n" + "notes: thread cascade using pthread_mutexes\n"); + + return (0); +} + +/*ARGSUSED*/ +int +benchmark_optswitch(int opt, char *optarg) +{ + switch (opt) { + case 's': + opts = 1; + break; + default: + return (-1); + } + return (0); +} + +int +benchmark_initrun() +{ + int i; + int e = 0; + pthread_mutexattr_t ma; + + nthreads = lm_optP * lm_optT; + nlocks = nthreads * 2; + /*LINTED*/ + locks = (pthread_mutex_t *)mmap(NULL, + nlocks * sizeof (pthread_mutex_t), + PROT_READ | PROT_WRITE, + MAP_ANON | MAP_SHARED, + -1, 0L); + if (locks == MAP_FAILED) { + return (1); + } + + (void) pthread_mutexattr_init(&ma); + if (lm_optP > 1 || opts) { + (void) pthread_mutexattr_setpshared(&ma, + PTHREAD_PROCESS_SHARED); + } else { + (void) pthread_mutexattr_setpshared(&ma, + PTHREAD_PROCESS_PRIVATE); + } + + for (i = 0; i < nlocks; i++) { + (void) pthread_mutex_init(&locks[i], &ma); + } + + return (e); +} + +int +block(int index) +{ + return (pthread_mutex_lock(&locks[index]) == -1); +} + +int +unblock(int index) +{ + return (pthread_mutex_unlock(&locks[index]) == -1); +} + +/* + * API-specific code ENDS here + */ + +int +benchmark_initbatch(void *tsd) +{ + tsd_t *ts = (tsd_t *)tsd; + int e = 0; + + if (ts->ts_once == 0) { + int us, them; + +#if !defined(__APPLE__) + us = (getpindex() * lm_optT) + gettindex(); +#else + us = gettsdindex(tsd); +#endif /* __APPLE__ */ + + them = (us + 1) % (lm_optP * lm_optT); + + ts->ts_id = us; + + /* lock index asignment for us and them */ + ts->ts_us0 = (us * 2); + ts->ts_us1 = (us * 2) + 1; + if (us < nthreads - 1) { + /* straight-thru connection to them */ + ts->ts_them0 = (them * 2); + ts->ts_them1 = (them * 2) + 1; + } else { + /* cross-over connection to them */ + ts->ts_them0 = (them * 2) + 1; + ts->ts_them1 = (them * 2); + } + + ts->ts_once = 1; + } + + /* block their first move */ + e += block(ts->ts_them0); + + return (e); +} + +int +benchmark(void *tsd, result_t *res) +{ + tsd_t *ts = (tsd_t *)tsd; + int i; + int e = 0; + + /* wait to be unblocked (id == 0 will not block) */ + e += block(ts->ts_us0); + + for (i = 0; i < lm_optB; i += 2) { + /* allow them to block us again */ + e += unblock(ts->ts_us0); + + /* block their next + 1 move */ + e += block(ts->ts_them1); + + /* unblock their next move */ + e += unblock(ts->ts_them0); + + /* wait for them to unblock us */ + e += block(ts->ts_us1); + + /* repeat with locks reversed */ + e += unblock(ts->ts_us1); + e += block(ts->ts_them0); + e += unblock(ts->ts_them1); + e += block(ts->ts_us0); + } + + /* finish batch with nothing blocked */ + e += unblock(ts->ts_them0); + e += unblock(ts->ts_us0); + + res->re_count = i; + res->re_errors = e; + + return (0); +} diff --git a/tools/tests/libMicro/chdir.c b/tools/tests/libMicro/chdir.c new file mode 100644 index 000000000..a9ff379ca --- /dev/null +++ b/tools/tests/libMicro/chdir.c @@ -0,0 +1,132 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms + * of the Common Development and Distribution License + * (the "License"). You may not use this file except + * in compliance with the License. + * + * You can obtain a copy of the license at + * src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing + * permissions and limitations under the License. + * + * When distributing Covered Code, include this CDDL + * HEADER in each file and include the License file at + * usr/src/OPENSOLARIS.LICENSE. If applicable, + * add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your + * own identifying information: Portions Copyright [yyyy] + * [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +/* + * change directory benchmark + */ + +#include +#include +#include + +#include "libmicro.h" + +#define DEFAULTDIR "/" +#define MAXPATHLEN 1024 + +static int optg = 0; + +static int dircount; +static char ** dirlist; + +int +benchmark_init() +{ + (void) sprintf(lm_optstr, "g"); + lm_tsdsize = 0; + + (void) sprintf(lm_usage, + " [-g] (do getcwd() also)\n" + " directory ... (default = %s)\n" + "notes: measures chdir() and (optionally) getcwd()", + DEFAULTDIR); + + (void) sprintf(lm_header, "%5s %5s", "dirs", "gets"); + + return (0); +} + +/*ARGSUSED*/ +int +benchmark_optswitch(int opt, char *optarg) +{ + switch (opt) { + case 'g': + optg = 1; + break; + default: + return (-1); + } + return (0); +} + +int +benchmark_initrun() +{ + extern int optind; + int i; + + dircount = lm_argc - optind; + if (dircount <= 0) { + dirlist = (char **)malloc(sizeof (char *)); + dirlist[0] = DEFAULTDIR; + dircount = 1; + } else { + dirlist = (char **)malloc(dircount * sizeof (char *)); + for (i = 0; i < dircount; i++) { + dirlist[i] = lm_argv[optind++]; + } + } + + return (0); +} + +/*ARGSUSED*/ +int +benchmark(void *tsd, result_t *res) +{ + int i, j; + char buf[MAXPATHLEN]; + + j = 0; + for (i = 0; i < lm_optB; i++) { + if (chdir(dirlist[j]) == -1) + res->re_errors++; + j++; + j %= dircount; + + if (optg && (getcwd(buf, MAXPATHLEN) == NULL)) { + res->re_errors++; + } + } + res->re_count = i; + + return (0); +} + +char * +benchmark_result() +{ + static char result[256]; + + (void) sprintf(result, "%5d %5s", dircount, optg ? "y" : "n"); + + return (result); +} diff --git a/tools/tests/libMicro/close.c b/tools/tests/libMicro/close.c new file mode 100644 index 000000000..6050bcd50 --- /dev/null +++ b/tools/tests/libMicro/close.c @@ -0,0 +1,141 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms + * of the Common Development and Distribution License + * (the "License"). You may not use this file except + * in compliance with the License. + * + * You can obtain a copy of the license at + * src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing + * permissions and limitations under the License. + * + * When distributing Covered Code, include this CDDL + * HEADER in each file and include the License file at + * usr/src/OPENSOLARIS.LICENSE. If applicable, + * add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your + * own identifying information: Portions Copyright [yyyy] + * [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +/* + * benchmark for close + */ + +#include +#include +#include +#include + +#include "libmicro.h" + +#define DEFF "/dev/null" +static char *optf = DEFF; +static int optb = 0; + +typedef struct { + int *ts_fds; +} tsd_t; + +int +benchmark_init() +{ + lm_tsdsize = sizeof (tsd_t); + + lm_defB = 256; + + (void) sprintf(lm_optstr, "f:b"); + + (void) sprintf(lm_usage, + " [-f file-to-close (default %s)]\n" + " [-b] (try to close an unopened fd)\n" + "notes: measures close()", + DEFF); + + return (0); +} + +int +benchmark_optswitch(int opt, char *optarg) +{ + switch (opt) { + case 'f': + optf = optarg; + break; + case 'b': + optb = 1; + break; + default: + return (-1); + } + + return (0); +} + +int +benchmark_initrun() +{ + (void) setfdlimit(lm_optB * lm_optT + 10); + + return (0); +} + +int +benchmark_initworker(void *tsd) +{ + tsd_t *ts = (tsd_t *)tsd; + + ts->ts_fds = (int *)malloc(lm_optB * sizeof (int)); + if (ts->ts_fds == NULL) { + return (1); + } + return (0); +} + +/* + * don't need a finiworker; we're exiting anyway + */ + +int +benchmark_initbatch(void *tsd) +{ + tsd_t *ts = (tsd_t *)tsd; + int i; + int errors = 0; + + for (i = 0; i < lm_optB; i++) { + ts->ts_fds[i] = ((optb == 0) ? + open(optf, O_RDONLY) : i + 1024); + if (ts->ts_fds[i] == -1) { + errors++; + } + } + + return (errors); +} + +int +benchmark(void *tsd, result_t *res) +{ + tsd_t *ts = (tsd_t *)tsd; + int i; + + for (i = 0; i < lm_optB; i++) { + if (close(ts->ts_fds[i]) == -1 && !optb) { + res->re_errors++; + } + } + res->re_count = i; + + return (0); +} diff --git a/tools/tests/libMicro/close_tcp.c b/tools/tests/libMicro/close_tcp.c new file mode 100644 index 000000000..6bce7a73d --- /dev/null +++ b/tools/tests/libMicro/close_tcp.c @@ -0,0 +1,241 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms + * of the Common Development and Distribution License + * (the "License"). You may not use this file except + * in compliance with the License. + * + * You can obtain a copy of the license at + * src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing + * permissions and limitations under the License. + * + * When distributing Covered Code, include this CDDL + * HEADER in each file and include the License file at + * usr/src/OPENSOLARIS.LICENSE. If applicable, + * add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your + * own identifying information: Portions Copyright [yyyy] + * [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +/* + * benchmark to measure time to close a local tcp connection + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "libmicro.h" + +#define FIRSTPORT 12345 + +typedef struct { + int *ts_lsns; + int *ts_accs; + int *ts_cons; + struct sockaddr_in *ts_adds; +} tsd_t; + +int +benchmark_init() +{ + lm_tsdsize = sizeof (tsd_t); + + lm_defB = 256; + + (void) sprintf(lm_usage, + "notes: measures close() on local TCP connections"); + + return (0); +} + +int +benchmark_initrun() +{ + (void) setfdlimit(3 * lm_optB * lm_optT + 10); + + return (0); +} + +int +benchmark_initworker(void *tsd) +{ + tsd_t *ts = (tsd_t *)tsd; + int i, j; + int opt = 1; + struct hostent *host; + int errors = 0; + + ts->ts_lsns = (int *)malloc(lm_optB * sizeof (int)); + if (ts->ts_lsns == NULL) { + errors ++; + } + ts->ts_accs = (int *)malloc(lm_optB * sizeof (int)); + if (ts->ts_accs == NULL) { + errors ++; + } + ts->ts_cons = (int *)malloc(lm_optB * sizeof (int)); + if (ts->ts_cons == NULL) { + errors ++; + } + ts->ts_adds = (struct sockaddr_in *)malloc(lm_optB * + sizeof (struct sockaddr_in)); + if (ts->ts_adds == NULL) { + errors ++; + } + + j = FIRSTPORT; + for (i = 0; i < lm_optB; i++) { + ts->ts_lsns[i] = socket(AF_INET, SOCK_STREAM, 0); + if (ts->ts_lsns[i] == -1) { + perror("socket"); + errors ++; + } + + if (setsockopt(ts->ts_lsns[i], SOL_SOCKET, SO_REUSEADDR, + &opt, sizeof (int)) == -1) { + perror("setsockopt"); + errors ++; + } + + if ((host = gethostbyname("localhost")) == NULL) { + errors ++; + } + + for (;;) { + (void) memset(&ts->ts_adds[i], 0, + sizeof (struct sockaddr_in)); + ts->ts_adds[i].sin_family = AF_INET; + ts->ts_adds[i].sin_port = htons(j++); + (void) memcpy(&ts->ts_adds[i].sin_addr.s_addr, + host->h_addr_list[0], sizeof (struct in_addr)); + + if (bind(ts->ts_lsns[i], + (struct sockaddr *)&ts->ts_adds[i], + sizeof (struct sockaddr_in)) == 0) { + break; + } + + if (errno != EADDRINUSE) { + perror("bind"); + errors ++; + } + } + + if (listen(ts->ts_lsns[i], 5) == -1) { + perror("listen"); + errors ++; + } + + } + return (errors); +} + +int +benchmark_initbatch(void *tsd) +{ + tsd_t *ts = (tsd_t *)tsd; + int i; + int result; + struct sockaddr_in addr; + socklen_t size; + int errors = 0; + + for (i = 0; i < lm_optB; i++) { + ts->ts_cons[i] = socket(AF_INET, SOCK_STREAM, 0); + if (ts->ts_cons[i] == -1) { + perror("socket"); + errors ++; + continue; + } + + if (fcntl(ts->ts_cons[i], F_SETFL, O_NDELAY) == -1) { + perror("fcnt"); + errors ++; + continue; + } + + result = connect(ts->ts_cons[i], + (struct sockaddr *)&ts->ts_adds[i], + sizeof (struct sockaddr_in)); + + if ((result == -1) && (errno != EINPROGRESS)) { + perror("connect"); + errors ++; + continue; + } + + size = sizeof (struct sockaddr); + result = accept(ts->ts_lsns[i], (struct sockaddr *)&addr, + &size); + if (result == -1) { + perror("accept"); + errors ++; + continue; + } + ts->ts_accs[i] = result; + } + + return (errors); +} + +int +benchmark(void *tsd, result_t *res) +{ + tsd_t *ts = (tsd_t *)tsd; + int i; + + for (i = 0; i < lm_optB; i++) { + if (close(ts->ts_accs[i]) == -1) { + res->re_errors ++; + } + } + res->re_count = i; + + return (0); +} + +int +benchmark_finibatch(void *tsd) +{ + tsd_t *ts = (tsd_t *)tsd; + int i; + + for (i = 0; i < lm_optB; i++) { + (void) close(ts->ts_cons[i]); + } + + return (0); +} + +int +benchmark_finiworker(void *tsd) +{ + tsd_t *ts = (tsd_t *)tsd; + int i; + + for (i = 0; i < lm_optB; i++) { + (void) close(ts->ts_lsns[i]); + } + return (0); +} diff --git a/tools/tests/libMicro/connection.c b/tools/tests/libMicro/connection.c new file mode 100644 index 000000000..67239f9dd --- /dev/null +++ b/tools/tests/libMicro/connection.c @@ -0,0 +1,305 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms + * of the Common Development and Distribution License + * (the "License"). You may not use this file except + * in compliance with the License. + * + * You can obtain a copy of the license at + * src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing + * permissions and limitations under the License. + * + * When distributing Covered Code, include this CDDL + * HEADER in each file and include the License file at + * usr/src/OPENSOLARIS.LICENSE. If applicable, + * add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your + * own identifying information: Portions Copyright [yyyy] + * [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "libmicro.h" + +#define FIRSTPORT 12345 + +typedef struct { + int ts_once; + int *ts_lsns; + int *ts_accs; + int *ts_cons; + struct sockaddr_in *ts_adds; +} tsd_t; + +static int opta = 0; +static int optc = 0; +static struct hostent *host; + +int +benchmark_init() +{ + lm_defB = 256; + lm_tsdsize = sizeof (tsd_t); + + (void) sprintf(lm_optstr, "ac"); + + (void) sprintf(lm_usage, + " [-a] (measure accept() only)\n" + " [-c] (measure connect() only)\n" + "notes: measures connect()/accept()\n"); + + return (0); +} + +/*ARGSUSED*/ +int +benchmark_optswitch(int opt, char *optarg) +{ + switch (opt) { + case 'a': + opta = 1; + break; + case 'c': + optc = 1; + break; + default: + return (-1); + } + + if (opta && optc) { + (void) printf("warning: -a overrides -c\n"); + optc = 0; + } + + return (0); +} + +int +benchmark_initrun() +{ + (void) setfdlimit(3 * lm_optB * lm_optT + 10); + + return (0); +} + +int +benchmark_initbatch_once(void *tsd) +{ + tsd_t *ts = (tsd_t *)tsd; + int i, j; + + int errors = 0; + + ts->ts_lsns = (int *)malloc(lm_optB * sizeof (int)); + if (ts->ts_lsns == NULL) { + errors ++; + } + ts->ts_accs = (int *)malloc(lm_optB * sizeof (int)); + if (ts->ts_accs == NULL) { + errors ++; + } + ts->ts_cons = (int *)malloc(lm_optB * sizeof (int)); + if (ts->ts_cons == NULL) { + errors ++; + } + ts->ts_adds = + (struct sockaddr_in *)malloc(lm_optB * + sizeof (struct sockaddr_in)); + if (ts->ts_accs == NULL) { + errors ++; + } + + j = FIRSTPORT; + for (i = 0; i < lm_optB; i++) { + ts->ts_lsns[i] = socket(AF_INET, SOCK_STREAM, 0); + if (ts->ts_lsns[i] == -1) { + perror("socket"); + errors ++; + } + + /* + * make accept socket non-blocking so in case of errors + * we don't hang + */ + + if (fcntl(ts->ts_lsns[i], F_SETFL, O_NDELAY) == -1) { + perror("fcntl"); + errors ++; + } + + + if ((host = gethostbyname("localhost")) == NULL) { + errors ++; + } + + for (;;) { + (void) memset(&ts->ts_adds[i], 0, + sizeof (struct sockaddr_in)); + ts->ts_adds[i].sin_family = AF_INET; + ts->ts_adds[i].sin_port = htons(j++); + (void) memcpy(&ts->ts_adds[i].sin_addr.s_addr, + host->h_addr_list[0], sizeof (struct in_addr)); + + if (bind(ts->ts_lsns[i], + (struct sockaddr *)&ts->ts_adds[i], + sizeof (struct sockaddr_in)) == 0) { + break; + } + + if (errno != EADDRINUSE) { + errors ++; + } + } + + if (listen(ts->ts_lsns[i], 5) == -1) { + perror("listen"); + errors ++; + } + } + return (errors); +} + +int +benchmark_initbatch(void *tsd) +{ + tsd_t *ts = (tsd_t *)tsd; + int i; + int errors = 0; + int result; + + if (ts->ts_once++ == 0) { + if (errors += benchmark_initbatch_once(tsd) == -1) { + return (-1); + } + } + + + for (i = 0; i < lm_optB; i++) { + ts->ts_cons[i] = socket(AF_INET, SOCK_STREAM, 0); + if (ts->ts_cons[i] == -1) { + perror("init:socket"); + errors ++; + } + + if (fcntl(ts->ts_cons[i], F_SETFL, O_NDELAY) == -1) { + perror("init:fcntl"); + errors ++; + } + + if (opta) { + result = connect(ts->ts_cons[i], + (struct sockaddr *)&ts->ts_adds[i], + sizeof (struct sockaddr_in)); + if ((result == -1) && (errno != EINPROGRESS)) { + perror("init:connect"); + errors ++; + } + } + } + + return (errors); +} + +int +benchmark(void *tsd, result_t *res) + + + +{ + tsd_t *ts = (tsd_t *)tsd; + int i; + int result; + struct sockaddr_in addr; + socklen_t size; + + for (i = 0; i < lm_optB; i++) { + if (!opta) { + again: + result = connect(ts->ts_cons[i], + (struct sockaddr *)&ts->ts_adds[i], + sizeof (struct sockaddr_in)); + if (result != 0 && errno != EISCONN) { + if (errno == EINPROGRESS) { + struct pollfd pollfd; + if (optc) + continue; + pollfd.fd = ts->ts_cons[i]; + pollfd.events = POLLOUT; + if (poll(&pollfd, 1, -1) == 1) + goto again; + } + + res->re_errors ++; + perror("benchmark:connect"); + continue; + } + } + + if (!optc) { + size = sizeof (struct sockaddr); + for (;;) { + struct pollfd pollfd; + result = accept(ts->ts_lsns[i], + (struct sockaddr *)&addr, &size); + if (result > 0 || (result == -1 && + errno != EAGAIN)) + break; + pollfd.fd = ts->ts_lsns[i]; + pollfd.events = POLLIN; + if (poll(&pollfd, 1, -1) != 1) + break; + } + + ts->ts_accs[i] = result; + if (result == -1) { + res->re_errors ++; + perror("benchmark:accept"); + continue; + } + } + } + res->re_count = i; + + return (0); +} + +int +benchmark_finibatch(void *tsd) +{ + tsd_t *ts = (tsd_t *)tsd; + int i; + + for (i = 0; i < lm_optB; i++) { + + if (!optc) { + (void) close(ts->ts_accs[i]); + } + (void) close(ts->ts_cons[i]); + } + + return (0); +} diff --git a/tools/tests/libMicro/create_stuff.sh b/tools/tests/libMicro/create_stuff.sh new file mode 100644 index 000000000..7b274cc4a --- /dev/null +++ b/tools/tests/libMicro/create_stuff.sh @@ -0,0 +1,6 @@ +#!/bin/sh + +echo Raising process limits +echo limit maxproc 1000 2000 >> /etc/launchd.conf + +echo Done. diff --git a/tools/tests/libMicro/dup.c b/tools/tests/libMicro/dup.c new file mode 100644 index 000000000..cdce1f1c3 --- /dev/null +++ b/tools/tests/libMicro/dup.c @@ -0,0 +1,139 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms + * of the Common Development and Distribution License + * (the "License"). You may not use this file except + * in compliance with the License. + * + * You can obtain a copy of the license at + * src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing + * permissions and limitations under the License. + * + * When distributing Covered Code, include this CDDL + * HEADER in each file and include the License file at + * usr/src/OPENSOLARIS.LICENSE. If applicable, + * add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your + * own identifying information: Portions Copyright [yyyy] + * [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +/* + * time dup + */ + +#include +#include +#include +#include + +#include "libmicro.h" + +#define DEFF "/dev/null" +static char *optf = DEFF; + +static int fd; + +typedef struct { + int ts_once; + int *ts_fds; +} tsd_t; + +int +benchmark_init() +{ + lm_tsdsize = sizeof (tsd_t); + + lm_defB = 256; + + (void) sprintf(lm_optstr, "f:"); + + (void) sprintf(lm_usage, + " [-f file-to-dup (default %s)]\n" + "notes: measures dup()\n", + DEFF); + + return (0); +} + +int +benchmark_optswitch(int opt, char *optarg) +{ + switch (opt) { + case 'f': + optf = optarg; + break; + default: + return (-1); + } + return (0); +} + +int +benchmark_initrun() +{ + (void) setfdlimit(lm_optB * lm_optT + 10); + fd = (open(optf, O_RDONLY)); + + return (0); +} + +int +benchmark_initbatch(void *tsd) +{ + tsd_t *ts = (tsd_t *)tsd; + int i; + int errors = 0; + + if (ts->ts_once++ == 0) { + ts->ts_fds = (int *)malloc(lm_optB * sizeof (int)); + if (ts->ts_fds == NULL) { + errors ++; + } + for (i = 0; i < lm_optB; i++) { + ts->ts_fds[i] = -1; + } + } + + return (errors); +} + +int +benchmark(void *tsd, result_t *res) +{ + tsd_t *ts = (tsd_t *)tsd; + int i; + + for (i = 0; i < lm_optB; i++) { + ts->ts_fds[i] = dup(fd); + if (ts->ts_fds[i] == -1) { + res->re_errors++; + } + } + res->re_count = i; + + return (0); +} + +int +benchmark_finibatch(void *tsd) +{ + tsd_t *ts = (tsd_t *)tsd; + int i; + + for (i = 0; i < lm_optB; i++) { + (void) close(ts->ts_fds[i]); + } + + return (0); +} diff --git a/tools/tests/libMicro/elided.c b/tools/tests/libMicro/elided.c new file mode 100644 index 000000000..ab26e6eab --- /dev/null +++ b/tools/tests/libMicro/elided.c @@ -0,0 +1,59 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms + * of the Common Development and Distribution License + * (the "License"). You may not use this file except + * in compliance with the License. + * + * You can obtain a copy of the license at + * src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing + * permissions and limitations under the License. + * + * When distributing Covered Code, include this CDDL + * HEADER in each file and include the License file at + * usr/src/OPENSOLARIS.LICENSE. If applicable, + * add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your + * own identifying information: Portions Copyright [yyyy] + * [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +/* + * empty benchmark program to substitute for benchmarks + * that don't work/exist on some platforms + */ + +#include +#include +#include +#include + +/*ARGSUSED*/ +int +main(int argc, char *argv[]) +{ + char *tmp = strrchr(argv[0], '/'); + + if (tmp == NULL) + tmp = argv[0]; + else + tmp++; + + (void) printf( + "#\n" + "# benchmark %s not compiled/supported on this platform\n" + "#\n", + tmp); + + return (0); +} diff --git a/tools/tests/libMicro/exec.c b/tools/tests/libMicro/exec.c new file mode 100644 index 000000000..3110a14c2 --- /dev/null +++ b/tools/tests/libMicro/exec.c @@ -0,0 +1,101 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms + * of the Common Development and Distribution License + * (the "License"). You may not use this file except + * in compliance with the License. + * + * You can obtain a copy of the license at + * src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing + * permissions and limitations under the License. + * + * When distributing Covered Code, include this CDDL + * HEADER in each file and include the License file at + * usr/src/OPENSOLARIS.LICENSE. If applicable, + * add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your + * own identifying information: Portions Copyright [yyyy] + * [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +/* + * exec benchmark + */ + +#include +#include +#include +#include +#include + +#include "libmicro.h" + +static char exec_path[1024]; +static char *argv[3]; + +int +benchmark_init() +{ + lm_defB = 128; + lm_tsdsize = 0; + + (void) sprintf(lm_usage, + "notes: measures execv time of simple process()\n"); + + return (0); +} + +/*ARGSUSED*/ +int +benchmark_initbatch(void *tsd) +{ + char buffer[80]; + + (void) strcpy(exec_path, lm_procpath); + (void) strcat(exec_path, "/exec_bin"); + + (void) sprintf(buffer, "%d", lm_optB); + argv[0] = exec_path; + argv[1] = strdup(buffer); + argv[2] = NULL; + + return (0); +} + +/*ARGSUSED*/ +int +benchmark(void *tsd, result_t *res) +{ + int c; + int status; + + switch (c = fork()) { + case -1: + res->re_errors++; + break; + default: + if (waitpid(c, &status, 0) < 0) + res->re_errors++; + + if (WIFEXITED(status) && WEXITSTATUS(status) != 0) + res->re_errors++; + break; + case 0: + if (execv(exec_path, argv) < 0) + res->re_errors++; + } + + res->re_count = lm_optB; + + return (0); +} diff --git a/tools/tests/libMicro/exec_bin.c b/tools/tests/libMicro/exec_bin.c new file mode 100644 index 000000000..4cddfd311 --- /dev/null +++ b/tools/tests/libMicro/exec_bin.c @@ -0,0 +1,65 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms + * of the Common Development and Distribution License + * (the "License"). You may not use this file except + * in compliance with the License. + * + * You can obtain a copy of the license at + * src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing + * permissions and limitations under the License. + * + * When distributing Covered Code, include this CDDL + * HEADER in each file and include the License file at + * usr/src/OPENSOLARIS.LICENSE. If applicable, + * add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your + * own identifying information: Portions Copyright [yyyy] + * [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +/* + * time program to recursively test exec time + */ + +#include +#include +#include +#include + +int +main(int argc, char *argv[]) +{ + int left; + + if (argc == 1) { + exit(1); + } + + left = atoi(argv[1]); + + left--; + + if (left <= 0) { + exit(0); + } else { + char buffer[80]; + (void) sprintf(buffer, "%d", left); + argv[1] = buffer; + if (execv(argv[0], argv)) { + exit(2); + } + } + + return (0); +} diff --git a/tools/tests/libMicro/exit.c b/tools/tests/libMicro/exit.c new file mode 100644 index 000000000..e2aa54ddb --- /dev/null +++ b/tools/tests/libMicro/exit.c @@ -0,0 +1,160 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms + * of the Common Development and Distribution License + * (the "License"). You may not use this file except + * in compliance with the License. + * + * You can obtain a copy of the license at + * src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing + * permissions and limitations under the License. + * + * When distributing Covered Code, include this CDDL + * HEADER in each file and include the License file at + * usr/src/OPENSOLARIS.LICENSE. If applicable, + * add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your + * own identifying information: Portions Copyright [yyyy] + * [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +/* + * benchmark exit + */ + +#include +#include +#include +#include +#include + +#include "libmicro.h" + +typedef struct { + int ts_once; + int *ts_pids; +} tsd_t; + +static int opte = 0; +static barrier_t *b; + +int +benchmark_init() +{ + lm_tsdsize = sizeof (tsd_t); + (void) sprintf(lm_optstr, "e"); + + (void) sprintf(lm_usage, + " [-e] (uses _exit() rather than exit())" + "notes: measures exit()\n"); + + return (0); +} + +/*ARGSUSED*/ +int +benchmark_optswitch(int opt, char *optarg) +{ + switch (opt) { + case 'e': + opte = 1; + break; + default: + return (-1); + } + return (0); +} + +int +benchmark_initrun() +{ + b = barrier_create(lm_optP * lm_optT * (lm_optB + 1), 0); + + return (0); +} + +int +benchmark_finirun() +{ + (void) barrier_destroy(b); + + return (0); +} + +int +benchmark_initbatch(void *tsd) +{ + tsd_t *ts = (tsd_t *)tsd; + int i; + int errors = 0; + + if (ts->ts_once++ == 0) { + ts->ts_pids = (int *)malloc(lm_optB * sizeof (pid_t)); + if (ts->ts_pids == NULL) { + errors ++; + } + } + + /* + * create processes to exit + */ + + for (i = 0; i < lm_optB; i++) { + ts->ts_pids[i] = fork(); + switch (ts->ts_pids[i]) { + case 0: + (void) barrier_queue(b, NULL); + if (opte) + _exit(0); + exit(0); + break; + case -1: + errors ++; + break; + default: + continue; + } + } + + return (errors); +} + +/*ARGSUSED*/ +int +benchmark(void *tsd, result_t *res) +{ + int i; + + /* + * start them all exiting + */ + + (void) barrier_queue(b, NULL); + + /* + * wait for them all to exit + */ + + for (i = 0; i < lm_optB; i++) { + switch (waitpid((pid_t)-1, NULL, 0)) { + case 0: + continue; + case -1: + res->re_errors++; + } + } + + res->re_count = i; + + return (0); +} diff --git a/tools/tests/libMicro/exp.c b/tools/tests/libMicro/exp.c new file mode 100644 index 000000000..acc81c577 --- /dev/null +++ b/tools/tests/libMicro/exp.c @@ -0,0 +1,73 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms + * of the Common Development and Distribution License + * (the "License"). You may not use this file except + * in compliance with the License. + * + * You can obtain a copy of the license at + * src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing + * permissions and limitations under the License. + * + * When distributing Covered Code, include this CDDL + * HEADER in each file and include the License file at + * usr/src/OPENSOLARIS.LICENSE. If applicable, + * add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your + * own identifying information: Portions Copyright [yyyy] + * [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +/* + * test exp performance (should add range check) + */ + +#include +#include +#include +#include + +#include "libmicro.h" + +int +benchmark_init() +{ + (void) sprintf(lm_usage, "note: measures exp()"); + lm_nsecs_per_op = 25; + lm_tsdsize = 0; + return (0); +} + +/*ARGSUSED*/ +int +benchmark(void *tsd, result_t *res) +{ + int i; + + for (i = 0; i < lm_optB; i += 10) { + double value = 1.0 / (i + .01); + (void) exp(value); + (void) exp(value); + (void) exp(value); + (void) exp(value); + (void) exp(value); + (void) exp(value); + (void) exp(value); + (void) exp(value); + (void) exp(value); + (void) exp(value); + } + res->re_count = i; + + return (0); +} diff --git a/tools/tests/libMicro/fcntl.c b/tools/tests/libMicro/fcntl.c new file mode 100644 index 000000000..95099c136 --- /dev/null +++ b/tools/tests/libMicro/fcntl.c @@ -0,0 +1,99 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms + * of the Common Development and Distribution License + * (the "License"). You may not use this file except + * in compliance with the License. + * + * You can obtain a copy of the license at + * src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing + * permissions and limitations under the License. + * + * When distributing Covered Code, include this CDDL + * HEADER in each file and include the License file at + * usr/src/OPENSOLARIS.LICENSE. If applicable, + * add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your + * own identifying information: Portions Copyright [yyyy] + * [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +/* + * benchmark fcntl getfl + */ + +#include +#include +#include +#include +#include + +#include "libmicro.h" + +#define DEFF "/dev/null" + +static char *optf = DEFF; +static int fd = -1; + +int +benchmark_init() +{ + (void) sprintf(lm_optstr, "f:"); + lm_tsdsize = 0; + + (void) sprintf(lm_usage, + " [-f file-to-fcntl (default %s)]\n" + "notes: measures fcntl()\n", + DEFF); + + return (0); +} + +int +benchmark_optswitch(int opt, char *optarg) +{ + switch (opt) { + case 'f': + optf = optarg; + break; + default: + return (-1); + } + return (0); +} + +int +benchmark_initrun() +{ + if ((fd = open(optf, O_RDONLY)) == -1) { + perror("open"); + exit(1); + } + return (0); +} + +/*ARGSUSED*/ +int +benchmark(void *tsd, result_t *res) +{ + int i; + int flags; + + for (i = 0; i < lm_optB; i++) { + if (fcntl(fd, F_GETFL, &flags) == -1) + res->re_errors++; + } + res->re_count = i; + + return (0); +} diff --git a/tools/tests/libMicro/fcntl_ndelay.c b/tools/tests/libMicro/fcntl_ndelay.c new file mode 100644 index 000000000..3f55a181d --- /dev/null +++ b/tools/tests/libMicro/fcntl_ndelay.c @@ -0,0 +1,100 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms + * of the Common Development and Distribution License + * (the "License"). You may not use this file except + * in compliance with the License. + * + * You can obtain a copy of the license at + * src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing + * permissions and limitations under the License. + * + * When distributing Covered Code, include this CDDL + * HEADER in each file and include the License file at + * usr/src/OPENSOLARIS.LICENSE. If applicable, + * add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your + * own identifying information: Portions Copyright [yyyy] + * [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +/* + * measures O_NDELAY on socket + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "libmicro.h" + +static int fd = -1; + +int +benchmark_init() +{ + (void) sprintf(lm_usage, + "notes: measures F_GETFL/F_SETFL O_NDELAY on socket\n"); + + lm_tsdsize = 0; + + return (0); +} + +int +benchmark_initrun() +{ + fd = socket(AF_INET, SOCK_STREAM, 0); + if (fd == -1) { + perror("socket"); + exit(1); + } + + return (0); +} + +/*ARGSUSED*/ +int +benchmark(void *tsd, result_t *res) +{ + int i; + int flags; + + for (i = 0; i < lm_optB; i += 4) { + if (fcntl(fd, F_GETFL, &flags) < 0) + res->re_errors++; + flags |= O_NDELAY; + + if (fcntl(fd, F_SETFL, &flags) < 0) + res->re_errors++; + + if (fcntl(fd, F_GETFL, &flags) < 0) + res->re_errors++; + flags &= ~O_NDELAY; + + if (fcntl(fd, F_SETFL, &flags) < 0) + res->re_errors++; + } + res->re_count = i; + + return (0); +} diff --git a/tools/tests/libMicro/file_lock.c b/tools/tests/libMicro/file_lock.c new file mode 100644 index 000000000..fc1196420 --- /dev/null +++ b/tools/tests/libMicro/file_lock.c @@ -0,0 +1,106 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms + * of the Common Development and Distribution License + * (the "License"). You may not use this file except + * in compliance with the License. + * + * You can obtain a copy of the license at + * src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing + * permissions and limitations under the License. + * + * When distributing Covered Code, include this CDDL + * HEADER in each file and include the License file at + * usr/src/OPENSOLARIS.LICENSE. If applicable, + * add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your + * own identifying information: Portions Copyright [yyyy] + * [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +/* + * test file locking + */ + +#include +#include +#include +#include +#include +#include + +#include "libmicro.h" + +static int file; + +int +block(int index) +{ + struct flock fl; + + fl.l_type = F_WRLCK; + fl.l_whence = SEEK_SET; + fl.l_start = index; + fl.l_len = 1; + return (fcntl(file, F_SETLKW, &fl) == -1); +} + +int +unblock(int index) +{ + struct flock fl; + + fl.l_type = F_UNLCK; + fl.l_whence = SEEK_SET; + fl.l_start = index; + fl.l_len = 1; + return (fcntl(file, F_SETLK, &fl) == -1); +} +int +benchmark_init() +{ + char fname[80]; + int errors = 0; + + (void) sprintf(fname, "/private/tmp/oneflock.%ld", getpid()); + + file = open(fname, O_CREAT | O_TRUNC | O_RDWR, 0600); + + if (file == -1) { + errors++; + } + if (unlink(fname)) { + errors++; + } + + lm_tsdsize = 0; + + return (errors); +} + +/*ARGSUSED*/ +int +benchmark(void *tsd, result_t *res) +{ + int i; + int e = 0; + + for (i = 0; i < lm_optB; i ++) { + e += block(0); + e += unblock(0); + } + res->re_count = i; + res->re_errors = e; + + return (0); +} diff --git a/tools/tests/libMicro/fork.c b/tools/tests/libMicro/fork.c new file mode 100644 index 000000000..d0336915c --- /dev/null +++ b/tools/tests/libMicro/fork.c @@ -0,0 +1,131 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms + * of the Common Development and Distribution License + * (the "License"). You may not use this file except + * in compliance with the License. + * + * You can obtain a copy of the license at + * src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing + * permissions and limitations under the License. + * + * When distributing Covered Code, include this CDDL + * HEADER in each file and include the License file at + * usr/src/OPENSOLARIS.LICENSE. If applicable, + * add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your + * own identifying information: Portions Copyright [yyyy] + * [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +/* + * benchmark fork + */ + +#include +#include +#include +#include +#include + +#include "libmicro.h" + +static barrier_t *b; + +typedef struct { + int ts_once; + int *ts_pids; +} tsd_t; + +int +benchmark_init() +{ + lm_tsdsize = sizeof (tsd_t); + (void) sprintf(lm_usage, "notes: measures fork()\n"); + + return (0); +} + +int +benchmark_initrun() +{ + b = barrier_create(lm_optP * lm_optT * (lm_optB + 1), 0); + + return (0); +} + +int +benchmark_finirun() +{ + (void) barrier_destroy(b); + + return (0); +} + +int +benchmark_initbatch(void *tsd) +{ + tsd_t *ts = (tsd_t *)tsd; + int errors = 0; + + if (ts->ts_once++ == 0) { + ts->ts_pids = (int *)malloc(lm_optB * sizeof (pid_t)); + if (ts->ts_pids == NULL) { + errors++; + } + } + + return (errors); +} + +int +benchmark(void *tsd, result_t *res) +{ + tsd_t *ts = (tsd_t *)tsd; + int i; + + for (i = 0; i < lm_optB; i++) { + ts->ts_pids[i] = fork(); + switch (ts->ts_pids[i]) { + case 0: + (void) barrier_queue(b, NULL); + exit(0); + break; + case -1: + res->re_errors++; + break; + default: + continue; + } + } + res->re_count = lm_optB; + + (void) barrier_queue(b, NULL); + + return (0); +} + +int +benchmark_finibatch(void *tsd) +{ + tsd_t *ts = (tsd_t *)tsd; + int i; + + for (i = 0; i < lm_optB; i++) { + if (ts->ts_pids[i] > 0) { + (void) waitpid(ts->ts_pids[i], NULL, 0); + } + } + + return (0); +} diff --git a/tools/tests/libMicro/getcontext.c b/tools/tests/libMicro/getcontext.c new file mode 100644 index 000000000..524a0169c --- /dev/null +++ b/tools/tests/libMicro/getcontext.c @@ -0,0 +1,74 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms + * of the Common Development and Distribution License + * (the "License"). You may not use this file except + * in compliance with the License. + * + * You can obtain a copy of the license at + * src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing + * permissions and limitations under the License. + * + * When distributing Covered Code, include this CDDL + * HEADER in each file and include the License file at + * usr/src/OPENSOLARIS.LICENSE. If applicable, + * add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your + * own identifying information: Portions Copyright [yyyy] + * [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +/* + * getcontext + */ + +#include +#include +#include +#include + +#include "libmicro.h" + +int +benchmark_init() +{ + (void) sprintf(lm_usage, "notes: measures getcontext()\n"); + + lm_tsdsize = 0; + + return (0); +} + +/*ARGSUSED*/ +int +benchmark(void *tsd, result_t *res) +{ + int i; + + for (i = 0; i < lm_optB; i += 10) { + ucontext_t uc; + (void) getcontext(&uc); + (void) getcontext(&uc); + (void) getcontext(&uc); + (void) getcontext(&uc); + (void) getcontext(&uc); + (void) getcontext(&uc); + (void) getcontext(&uc); + (void) getcontext(&uc); + (void) getcontext(&uc); + (void) getcontext(&uc); + } + res->re_count = i; + + return (0); +} diff --git a/tools/tests/libMicro/getenv.c b/tools/tests/libMicro/getenv.c new file mode 100644 index 000000000..f75bfd476 --- /dev/null +++ b/tools/tests/libMicro/getenv.c @@ -0,0 +1,126 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms + * of the Common Development and Distribution License + * (the "License"). You may not use this file except + * in compliance with the License. + * + * You can obtain a copy of the license at + * src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing + * permissions and limitations under the License. + * + * When distributing Covered Code, include this CDDL + * HEADER in each file and include the License file at + * usr/src/OPENSOLARIS.LICENSE. If applicable, + * add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your + * own identifying information: Portions Copyright [yyyy] + * [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +/* + * test getenv + */ + +#include +#include +#include +#include +#include + +#include "libmicro.h" + +#define DEFS 100 + +static int opts = DEFS; + +int +benchmark_init() +{ + (void) sprintf(lm_optstr, "s:"); + + lm_tsdsize = 0; + + (void) sprintf(lm_usage, + " [-s search-size (default = %d)]\n" + "notes: measures time to search env for missing string\n", + DEFS); + + lm_nsecs_per_op = 200; + + return (0); +} + +int +benchmark_optswitch(int opt, char *optarg) +{ + switch (opt) { + case 's': + opts = atoi(optarg); + break; + default: + return (-1); + } + return (0); +} + +int +benchmark_initrun() +{ + extern char ** environ; + int i, j; + + /* count environment strings */ + + for (i = 0; environ[i++]; ) + ; + + /* + * pad to desired count + */ + + if (opts < i) + opts = i; + + for (j = i; j < opts; j++) { + char buf[80]; + (void) sprintf(buf, "VAR_%d=%d", j, j); + (void) putenv(strdup(buf)); + } + + return (0); +} + +/*ARGSUSED*/ +int +benchmark(void *tsd, result_t *res) +{ + int i; + char *search = "RUMPLSTILTSKIN"; + + for (i = 0; i < lm_optB; i += 10) { + (void) getenv(search); + (void) getenv(search); + (void) getenv(search); + (void) getenv(search); + (void) getenv(search); + (void) getenv(search); + (void) getenv(search); + (void) getenv(search); + (void) getenv(search); + (void) getenv(search); + } + res->re_count = i; + + return (0); +} diff --git a/tools/tests/libMicro/getpeername.c b/tools/tests/libMicro/getpeername.c new file mode 100644 index 000000000..877215d13 --- /dev/null +++ b/tools/tests/libMicro/getpeername.c @@ -0,0 +1,163 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms + * of the Common Development and Distribution License + * (the "License"). You may not use this file except + * in compliance with the License. + * + * You can obtain a copy of the license at + * src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing + * permissions and limitations under the License. + * + * When distributing Covered Code, include this CDDL + * HEADER in each file and include the License file at + * usr/src/OPENSOLARIS.LICENSE. If applicable, + * add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your + * own identifying information: Portions Copyright [yyyy] + * [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +/* + * getpeername test + */ + + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "libmicro.h" + +#define FIRSTPORT 12345 + +static int sock = -1; + +int +benchmark_init() +{ + (void) sprintf(lm_usage, "notes: measures getpeername()\n"); + lm_tsdsize = 0; + + return (0); +} + +int +benchmark_initrun() +{ + int j; + int opt = 1; + int result; + socklen_t size; + struct hostent *host; + struct sockaddr_in adds; + int sock2, sock3; + + sock2 = socket(AF_INET, SOCK_STREAM, 0); + if (sock2 == -1) { + perror("socket"); + exit(1); + } + + if (setsockopt(sock2, SOL_SOCKET, SO_REUSEADDR, + &opt, sizeof (int)) == -1) { + perror("setsockopt"); + exit(1); + } + + if ((host = gethostbyname("localhost")) == NULL) { + perror("gethostbyname"); + exit(1); + } + + j = FIRSTPORT; + for (;;) { + (void) memset(&adds, 0, sizeof (struct sockaddr_in)); + adds.sin_family = AF_INET; + adds.sin_port = htons(j++); + (void) memcpy(&adds.sin_addr.s_addr, host->h_addr_list[0], + sizeof (struct in_addr)); + + if (bind(sock2, (struct sockaddr *)&adds, + sizeof (struct sockaddr_in)) == 0) { + break; + } + + if (errno != EADDRINUSE) { + perror("bind"); + exit(1); + } + } + + if (listen(sock2, 5) == -1) { + perror("listen"); + exit(1); + } + + sock3 = socket(AF_INET, SOCK_STREAM, 0); + if (sock3 == -1) { + perror("socket"); + exit(1); + } + + if (fcntl(sock3, F_SETFL, O_NDELAY) == -1) { + perror("fcntl"); + exit(1); + } + + result = connect(sock3, (struct sockaddr *)&adds, + sizeof (struct sockaddr_in)); + if ((result == -1) && (errno != EINPROGRESS)) { + perror("connect"); + exit(1); + } + + size = sizeof (struct sockaddr); + sock = accept(sock2, (struct sockaddr *)&adds, &size); + if (sock == -1) { + perror("accept"); + exit(1); + } + + return (0); +} + +/*ARGSUSED*/ +int +benchmark(void *tsd, result_t *res) +{ + int i; + struct sockaddr_in adds; + socklen_t size; + + for (i = 0; i < lm_optB; i++) { + size = sizeof (struct sockaddr_in); + if (getpeername(sock, (struct sockaddr *)&adds, &size) == -1) { + perror("getpeername"); + exit(1); + res->re_errors++; + } + } + res->re_count = i; + + return (0); +} diff --git a/tools/tests/libMicro/getpid.c b/tools/tests/libMicro/getpid.c new file mode 100644 index 000000000..4ac816543 --- /dev/null +++ b/tools/tests/libMicro/getpid.c @@ -0,0 +1,64 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms + * of the Common Development and Distribution License + * (the "License"). You may not use this file except + * in compliance with the License. + * + * You can obtain a copy of the license at + * src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing + * permissions and limitations under the License. + * + * When distributing Covered Code, include this CDDL + * HEADER in each file and include the License file at + * usr/src/OPENSOLARIS.LICENSE. If applicable, + * add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your + * own identifying information: Portions Copyright [yyyy] + * [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +/* + * getpid + */ + + +#include +#include +#include + +#include "libmicro.h" + +int +benchmark_init() +{ + (void) sprintf(lm_usage, "note: measures getpid()"); + + lm_tsdsize = 0; + + return (0); +} + +/*ARGSUSED*/ +int +benchmark(void *tsd, result_t *res) +{ + int i; + + for (i = 0; i < lm_optB; i ++) { + (void) getpid(); + } + res->re_count = i; + + return (0); +} diff --git a/tools/tests/libMicro/getrusage.c b/tools/tests/libMicro/getrusage.c new file mode 100644 index 000000000..2f02213db --- /dev/null +++ b/tools/tests/libMicro/getrusage.c @@ -0,0 +1,73 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms + * of the Common Development and Distribution License + * (the "License"). You may not use this file except + * in compliance with the License. + * + * You can obtain a copy of the license at + * src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing + * permissions and limitations under the License. + * + * When distributing Covered Code, include this CDDL + * HEADER in each file and include the License file at + * usr/src/OPENSOLARIS.LICENSE. If applicable, + * add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your + * own identifying information: Portions Copyright [yyyy] + * [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +/* + * getrusage + */ + + +#include +#include +#include +#include + +#include "libmicro.h" + +int +benchmark_init() +{ + (void) sprintf(lm_usage, "notes: measures getrusage(RUSAGE_SELF)\n"); + lm_tsdsize = 0; + return (0); +} + +/*ARGSUSED*/ +int +benchmark(void *tsd, result_t *res) +{ + int i; + struct rusage u; + + for (i = 0; i < lm_optB; i += 10) { + (void) getrusage(RUSAGE_SELF, &u); + (void) getrusage(RUSAGE_SELF, &u); + (void) getrusage(RUSAGE_SELF, &u); + (void) getrusage(RUSAGE_SELF, &u); + (void) getrusage(RUSAGE_SELF, &u); + (void) getrusage(RUSAGE_SELF, &u); + (void) getrusage(RUSAGE_SELF, &u); + (void) getrusage(RUSAGE_SELF, &u); + (void) getrusage(RUSAGE_SELF, &u); + (void) getrusage(RUSAGE_SELF, &u); + } + res->re_count = i; + + return (0); +} diff --git a/tools/tests/libMicro/getsockname.c b/tools/tests/libMicro/getsockname.c new file mode 100644 index 000000000..e3f7769bc --- /dev/null +++ b/tools/tests/libMicro/getsockname.c @@ -0,0 +1,125 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms + * of the Common Development and Distribution License + * (the "License"). You may not use this file except + * in compliance with the License. + * + * You can obtain a copy of the license at + * src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing + * permissions and limitations under the License. + * + * When distributing Covered Code, include this CDDL + * HEADER in each file and include the License file at + * usr/src/OPENSOLARIS.LICENSE. If applicable, + * add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your + * own identifying information: Portions Copyright [yyyy] + * [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +/* + * getsockname + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "libmicro.h" + +#define FIRSTPORT 12345 + +static struct sockaddr_in adds; +static int sock = -1; + +int +benchmark_init() +{ + (void) sprintf(lm_usage, "notes: measures getsockname()()\n"); + lm_tsdsize = 0; + return (0); +} + +int +benchmark_initrun() +{ + int j; + int opt = 1; + struct hostent *host; + + sock = socket(AF_INET, SOCK_STREAM, 0); + if (sock == -1) { + perror("socket"); + exit(1); + } + + if (setsockopt(sock, SOL_SOCKET, SO_REUSEADDR, + &opt, sizeof (int)) == -1) { + perror("setsockopt"); + exit(1); + } + + if ((host = gethostbyname("localhost")) == NULL) { + perror("gethostbyname"); + exit(1); + } + + j = FIRSTPORT; + for (;;) { + (void) memset(&adds, 0, sizeof (struct sockaddr_in)); + adds.sin_family = AF_INET; + adds.sin_port = htons(j++); + (void) memcpy(&adds.sin_addr.s_addr, host->h_addr_list[0], + sizeof (struct in_addr)); + + if (bind(sock, (struct sockaddr *)&adds, + sizeof (struct sockaddr_in)) == 0) { + break; + } + + if (errno != EADDRINUSE) { + perror("bind"); + exit(1); + } + } + + return (0); +} + +/*ARGSUSED*/ +int +benchmark(void *tsd, result_t *res) +{ + int i; + struct sockaddr_in adds; + socklen_t size; + + for (i = 0; i < lm_optB; i++) { + size = sizeof (struct sockaddr_in); + if (getsockname(sock, (struct sockaddr *)&adds, &size) == -1) + res->re_errors++; + } + res->re_count = i; + + return (0); +} diff --git a/tools/tests/libMicro/gettimeofday.c b/tools/tests/libMicro/gettimeofday.c new file mode 100644 index 000000000..a17bf7519 --- /dev/null +++ b/tools/tests/libMicro/gettimeofday.c @@ -0,0 +1,72 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms + * of the Common Development and Distribution License + * (the "License"). You may not use this file except + * in compliance with the License. + * + * You can obtain a copy of the license at + * src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing + * permissions and limitations under the License. + * + * When distributing Covered Code, include this CDDL + * HEADER in each file and include the License file at + * usr/src/OPENSOLARIS.LICENSE. If applicable, + * add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your + * own identifying information: Portions Copyright [yyyy] + * [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +/* + * gettimeofday test + */ + +#include +#include +#include +#include + +#include "libmicro.h" + +int +benchmark_init() +{ + (void) sprintf(lm_usage, "note: measures gettimeofday()"); + lm_tsdsize = 0; + return (0); +} + +/*ARGSUSED*/ +int +benchmark(void *tsd, result_t *res) +{ + int i; + struct timeval t; + + for (i = 0; i < lm_optB; i += 10) { + (void) gettimeofday(&t, NULL); + (void) gettimeofday(&t, NULL); + (void) gettimeofday(&t, NULL); + (void) gettimeofday(&t, NULL); + (void) gettimeofday(&t, NULL); + (void) gettimeofday(&t, NULL); + (void) gettimeofday(&t, NULL); + (void) gettimeofday(&t, NULL); + (void) gettimeofday(&t, NULL); + (void) gettimeofday(&t, NULL); + } + res->re_count = i; + + return (0); +} diff --git a/tools/tests/libMicro/isatty.c b/tools/tests/libMicro/isatty.c new file mode 100644 index 000000000..68aaf8505 --- /dev/null +++ b/tools/tests/libMicro/isatty.c @@ -0,0 +1,110 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms + * of the Common Development and Distribution License + * (the "License"). You may not use this file except + * in compliance with the License. + * + * You can obtain a copy of the license at + * src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing + * permissions and limitations under the License. + * + * When distributing Covered Code, include this CDDL + * HEADER in each file and include the License file at + * usr/src/OPENSOLARIS.LICENSE. If applicable, + * add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your + * own identifying information: Portions Copyright [yyyy] + * [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +/* + * isatty test + */ + +#include +#include +#include +#include + +#include "libmicro.h" + +#define DEFF "/dev/tty" +static char *optf = DEFF; +static int optb = 0; + +typedef struct { + int ts_fd; +} tsd_t; + +int +benchmark_init() +{ + lm_tsdsize = sizeof (tsd_t); + + (void) sprintf(lm_optstr, "f:b"); + + (void) sprintf(lm_usage, + " [-f file-to-isatty (default %s)]\n" + " [-b] (try to isatty an unopened fd)\n" + "notes: measures isatty()", + DEFF); + + return (0); +} + +int +benchmark_optswitch(int opt, char *optarg) +{ + switch (opt) { + case 'f': + optf = optarg; + break; + case 'b': + optb = 1; + break; + default: + return (-1); + } + + return (0); +} + +int +benchmark_initworker(void *tsd) +{ + tsd_t *ts = (tsd_t *)tsd; + + ts->ts_fd = ((optb == 0) ? + open(optf, O_RDONLY) : 1024); + if (ts->ts_fd == -1) { + return (1); + } + return (0); +} + +int +benchmark(void *tsd, result_t *res) +{ + tsd_t *ts = (tsd_t *)tsd; + int i; + + for (i = 0; i < lm_optB; i++) { + if (isatty(ts->ts_fd) == -1) { + res->re_errors++; + } + } + res->re_count = i; + + return (0); +} diff --git a/tools/tests/libMicro/libmicro.c b/tools/tests/libMicro/libmicro.c new file mode 100644 index 000000000..a3239c63a --- /dev/null +++ b/tools/tests/libMicro/libmicro.c @@ -0,0 +1,1608 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms + * of the Common Development and Distribution License + * (the "License"). You may not use this file except + * in compliance with the License. + * + * You can obtain a copy of the license at + * src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing + * permissions and limitations under the License. + * + * When distributing Covered Code, include this CDDL + * HEADER in each file and include the License file at + * usr/src/OPENSOLARIS.LICENSE. If applicable, + * add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your + * own identifying information: Portions Copyright [yyyy] + * [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +/* + * benchmarking routines + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#ifdef __sun +#include +#endif + +#include "libmicro.h" + + +#if defined(__APPLE__) +#include + +long long +gethrtime(void) +{ + long long elapsed; + static long long start; + static mach_timebase_info_data_t sTimebaseInfo = { 0, 0 }; + + // If this is the first time we've run, get the timebase. + // We can use denom == 0 to indicate that sTimebaseInfo is + // uninitialised because it makes no sense to have a zero + // denominator in a fraction. + + if ( sTimebaseInfo.denom == 0 ) { + (void) mach_timebase_info(&sTimebaseInfo); + start = mach_absolute_time(); + } + + elapsed = mach_absolute_time() - start; + + // Convert to nanoseconds. + // return (elapsed * (long long)sTimebaseInfo.numer)/(long long)sTimebaseInfo.denom; + + // Provided the final result is representable in 64 bits the following maneuver will + // deliver that result without intermediate overflow. + if (sTimebaseInfo.denom == sTimebaseInfo.numer) + return elapsed; + else if (sTimebaseInfo.denom == 1) + return elapsed * (long long)sTimebaseInfo.numer; + else { + // Decompose elapsed = eta32 * 2^32 + eps32: + long long eta32 = elapsed >> 32; + long long eps32 = elapsed & 0x00000000ffffffffLL; + + long long numer = sTimebaseInfo.numer, denom = sTimebaseInfo.denom; + + // Form product of elapsed64 (decomposed) and numer: + long long mu64 = numer * eta32; + long long lambda64 = numer * eps32; + + // Divide the constituents by denom: + long long q32 = mu64/denom; + long long r32 = mu64 - (q32 * denom); // mu64 % denom + + return (q32 << 32) + ((r32 << 32) + lambda64)/denom; + } +} + +#endif + +/* + * user visible globals + */ + +int lm_argc = 0; +char ** lm_argv = NULL; + +int lm_opt1; +int lm_optA; +int lm_optB; +int lm_optC = 100; +int lm_optD; +int lm_optE; +int lm_optH; +int lm_optI; +int lm_optL = 0; +int lm_optM = 0; +char *lm_optN; +int lm_optP; +int lm_optS; +int lm_optT; +int lm_optW; + +int lm_def1 = 0; +int lm_defB = 0; /* use lm_nsecs_per_op */ +int lm_defD = 10; +int lm_defH = 0; +char *lm_defN = NULL; +int lm_defP = 1; + +int lm_defS = 0; +int lm_defT = 1; + +/* + * default on fast platform, should be overridden by individual + * benchmarks if significantly wrong in either direction. + */ + +int lm_nsecs_per_op = 5; + +char *lm_procpath; +char lm_procname[STRSIZE]; +char lm_usage[STRSIZE]; +char lm_optstr[STRSIZE]; +char lm_header[STRSIZE]; +size_t lm_tsdsize = 0; + + +/* + * Globals we do not export to the user + */ + +static barrier_t *lm_barrier; +static pid_t *pids = NULL; +static pthread_t *tids = NULL; +static int pindex = -1; +static void *tsdseg = NULL; +static size_t tsdsize = 0; + +#ifdef USE_RDTSC +static long long lm_hz = 0; +#endif + + +/* + * Forward references + */ + +static void worker_process(); +static void usage(); +static void print_stats(barrier_t *); +static void print_histo(barrier_t *); +static int remove_outliers(double *, int, stats_t *); +static long long nsecs_overhead; +static long long nsecs_resolution; +static long long get_nsecs_overhead(); +static int crunch_stats(double *, int, stats_t *); +static void compute_stats(barrier_t *); +/* + * main routine; renamed in this file to allow linking with other + * files + */ + +int +actual_main(int argc, char *argv[]) +{ + int i; + int opt; + extern char *optarg; + char *tmp; + char optstr[256]; + barrier_t *b; + long long startnsecs = getnsecs(); + +#ifdef USE_RDTSC + if (getenv("LIBMICRO_HZ") == NULL) { + (void) printf("LIBMICRO_HZ needed but not set\n"); + exit(1); + } + lm_hz = strtoll(getenv("LIBMICRO_HZ"), NULL, 10); +#endif + + lm_argc = argc; + lm_argv = argv; + + /* before we do anything */ + (void) benchmark_init(); + + + nsecs_overhead = get_nsecs_overhead(); + nsecs_resolution = get_nsecs_resolution(); + + /* + * Set defaults + */ + + lm_opt1 = lm_def1; + lm_optB = lm_defB; + lm_optD = lm_defD; + lm_optH = lm_defH; + lm_optN = lm_defN; + lm_optP = lm_defP; + + lm_optS = lm_defS; + lm_optT = lm_defT; + + /* + * squirrel away the path to the current + * binary in a way that works on both + * Linux and Solaris + */ + + if (*argv[0] == '/') { + lm_procpath = strdup(argv[0]); + *strrchr(lm_procpath, '/') = 0; + } else { + char path[1024]; + (void) getcwd(path, 1024); + (void) strcat(path, "/"); + (void) strcat(path, argv[0]); + *strrchr(path, '/') = 0; + lm_procpath = strdup(path); + } + + /* + * name of binary + */ + + if ((tmp = strrchr(argv[0], '/')) == NULL) + (void) strcpy(lm_procname, argv[0]); + else + (void) strcpy(lm_procname, tmp + 1); + + if (lm_optN == NULL) { + lm_optN = lm_procname; + } + + /* + * Parse command line arguments + */ + + (void) sprintf(optstr, "1AB:C:D:EHI:LMN:P:RST:VW?%s", lm_optstr); + while ((opt = getopt(argc, argv, optstr)) != -1) { + switch (opt) { + case '1': + lm_opt1 = 1; + break; + case 'A': + lm_optA = 1; + break; + case 'B': + lm_optB = sizetoint(optarg); + break; + case 'C': + lm_optC = sizetoint(optarg); + break; + case 'D': + lm_optD = sizetoint(optarg); + break; + case 'E': + lm_optE = 1; + break; + case 'H': + lm_optH = 1; + break; + case 'I': + lm_optI = sizetoint(optarg); + break; + case 'L': + lm_optL = 1; + break; + case 'M': + lm_optM = 1; + break; + case 'N': + lm_optN = optarg; + break; + case 'P': + lm_optP = sizetoint(optarg); + break; + case 'S': + lm_optS = 1; + break; + case 'T': + lm_optT = sizetoint(optarg); + break; + case 'V': + (void) printf("%s\n", LIBMICRO_VERSION); + exit(0); + break; + case 'W': + lm_optW = 1; + lm_optS = 1; + break; + case '?': + usage(); + exit(0); + break; + default: + if (benchmark_optswitch(opt, optarg) == -1) { + usage(); + exit(0); + } + } + } + + /* deal with implicit and overriding options */ + if (lm_opt1 && lm_optP > 1) { + lm_optP = 1; + (void) printf("warning: -1 overrides -P\n"); + } + + if (lm_optE) { + (void) fprintf(stderr, "Running:%20s", lm_optN); + (void) fflush(stderr); + } + + if (lm_optB == 0) { + /* + * neither benchmark or user has specified the number + * of cnts/sample, so use computed value + */ + if (lm_optI) + lm_nsecs_per_op = lm_optI; +#define BLOCK_TOCK_DURATION 10000 /* number of raw timer "tocks" ideally comprising a block of work */ + lm_optB = nsecs_resolution * BLOCK_TOCK_DURATION / lm_nsecs_per_op; + if (lm_optB == 0) + lm_optB = 1; + } + + /* + * now that the options are set + */ + + if (benchmark_initrun() == -1) { + exit(1); + } + + /* allocate dynamic data */ + pids = (pid_t *)malloc(lm_optP * sizeof (pid_t)); + if (pids == NULL) { + perror("malloc(pids)"); + exit(1); + } + tids = (pthread_t *)malloc(lm_optT * sizeof (pthread_t)); + if (tids == NULL) { + perror("malloc(tids)"); + exit(1); + } + + /* check that the case defines lm_tsdsize before proceeding */ + if (lm_tsdsize == (size_t)-1) { + (void) fprintf(stderr, "error in benchmark_init: " + "lm_tsdsize not set\n"); + exit(1); + } + + /* round up tsdsize to nearest 128 to eliminate false sharing */ + tsdsize = ((lm_tsdsize + 127) / 128) * 128; + + /* allocate sufficient TSD for each thread in each process */ + tsdseg = (void *)mmap(NULL, lm_optT * lm_optP * tsdsize + 8192, + PROT_READ | PROT_WRITE, MAP_SHARED | MAP_ANON, -1, 0L); + if (tsdseg == NULL) { + perror("mmap(tsd)"); + exit(1); + } + + /* initialise worker synchronisation */ + b = barrier_create(lm_optT * lm_optP, DATASIZE); + if (b == NULL) { + perror("barrier_create()"); + exit(1); + } + lm_barrier = b; + b->ba_flag = 1; + + /* need this here so that parent and children can call exit() */ + (void) fflush(stdout); + (void) fflush(stderr); + + /* when we started and when to stop */ + + b->ba_starttime = getnsecs(); + b->ba_deadline = (long long) (b->ba_starttime + (lm_optD * 1000000LL)); + + /* do the work */ + if (lm_opt1) { + /* single process, non-fork mode */ + pindex = 0; + worker_process(); + } else { + /* create worker processes */ + for (i = 0; i < lm_optP; i++) { + pids[i] = fork(); + + switch (pids[i]) { + case 0: + pindex = i; + worker_process(); + exit(0); + break; + case -1: + perror("fork"); + exit(1); + break; + default: + continue; + } + } + + /* wait for worker processes */ + for (i = 0; i < lm_optP; i++) { + if (pids[i] > 0) { + (void) waitpid(pids[i], NULL, 0); + } + } + } + + b->ba_endtime = getnsecs(); + + /* compute results */ + + compute_stats(b); + + /* print arguments benchmark was invoked with ? */ + if (lm_optL) { + int l; + (void) printf("# %s ", argv[0]); + for (l = 1; l < argc; l++) { + (void) printf("%s ", argv[l]); + } + (void) printf("\n"); + } + + /* print result header (unless suppressed) */ + if (!lm_optH) { + (void) printf("%12s %3s %3s %12s %12s %8s %8s %s\n", + "", "prc", "thr", + "usecs/call", + "samples", "errors", "cnt/samp", lm_header); + } + + /* print result */ + + (void) printf("%-12s %3d %3d %12.5f %12d %8lld %8d %s\n", + lm_optN, lm_optP, lm_optT, + (lm_optM?b->ba_corrected.st_mean:b->ba_corrected.st_median), + b->ba_batches, b->ba_errors, lm_optB, + benchmark_result()); + + if (lm_optS) { + print_stats(b); + } + + /* just incase something goes awry */ + (void) fflush(stdout); + (void) fflush(stderr); + + /* cleanup by stages */ + (void) benchmark_finirun(); + (void) barrier_destroy(b); + (void) benchmark_fini(); + + if (lm_optE) { + (void) fprintf(stderr, " for %12.5f seconds\n", + (double)(getnsecs() - startnsecs) / + 1.e9); + (void) fflush(stderr); + } + return (0); +} + +void * +worker_thread(void *arg) +{ + result_t r; + long long last_sleep = 0; + long long t; + + r.re_errors = benchmark_initworker(arg); + + while (lm_barrier->ba_flag) { + r.re_count = 0; + r.re_errors += benchmark_initbatch(arg); + + /* sync to clock */ + + if (lm_optA && ((t = getnsecs()) - last_sleep) > 75000000LL) { + (void) poll(0, 0, 10); + last_sleep = t; + } + /* wait for it ... */ + (void) barrier_queue(lm_barrier, NULL); + + /* time the test */ + r.re_t0 = getnsecs(); + (void) benchmark(arg, &r); + r.re_t1 = getnsecs(); + + /* time to stop? */ + if (r.re_t1 > lm_barrier->ba_deadline && + (!lm_optC || lm_optC < lm_barrier->ba_batches)) { + lm_barrier->ba_flag = 0; + } + + /* record results and sync */ + (void) barrier_queue(lm_barrier, &r); + + (void) benchmark_finibatch(arg); + + r.re_errors = 0; + } + + (void) benchmark_finiworker(arg); + + return (0); +} + +void +worker_process() +{ + int i; + void *tsd; + + for (i = 1; i < lm_optT; i++) { + tsd = gettsd(pindex, i); + if (pthread_create(&tids[i], NULL, worker_thread, tsd) != 0) { + perror("pthread_create"); + exit(1); + } + } + + tsd = gettsd(pindex, 0); + (void) worker_thread(tsd); + + for (i = 1; i < lm_optT; i++) { + (void) pthread_join(tids[i], NULL); + } +} + +void +usage() +{ + (void) printf( + "usage: %s\n" + " [-1] (single process; overrides -P > 1)\n" + " [-A] (align with clock)\n" + " [-B batch-size (default %d)]\n" + " [-C minimum number of samples (default 0)]\n" + " [-D duration in msecs (default %ds)]\n" + " [-E (echo name to stderr)]\n" + " [-H] (suppress headers)\n" + " [-I] nsecs per op (used to compute batch size)" + " [-L] (print argument line)\n" + " [-M] (reports mean rather than median)\n" + " [-N test-name (default '%s')]\n" + " [-P processes (default %d)]\n" + " [-S] (print detailed stats)\n" + " [-T threads (default %d)]\n" + " [-V] (print the libMicro version and exit)\n" + " [-W] (flag possible benchmark problems)\n" + "%s\n", + lm_procname, + lm_defB, lm_defD, lm_procname, lm_defP, lm_defT, + lm_usage); +} + +void +print_warnings(barrier_t *b) +{ + int head = 0; + int increase; + + if (b->ba_quant) { + if (!head++) { + (void) printf("#\n# WARNINGS\n"); + } + increase = (int)(floor((nsecs_resolution * 100.0) / + ((double)lm_optB * b->ba_corrected.st_median * 1000.0)) + + 1.0); + (void) printf("# Quantization error likely;" + "increase batch size (-B option) %dX to avoid.\n", + increase); + } + + /* + * XXX should warn on median != mean by a lot + */ + + if (b->ba_errors) { + if (!head++) { + (void) printf("#\n# WARNINGS\n"); + } + (void) printf("# Errors occured during benchmark.\n"); + } +} + +void +print_stats(barrier_t *b) +{ + (void) printf("#\n"); + (void) printf("# STATISTICS %12s %12s\n", + "usecs/call (raw)", + "usecs/call (outliers removed)"); + + if (b->ba_count == 0) { + (void) printf("zero samples\n"); + return; + } + + (void) printf("# min %12.5f %12.5f\n", + b->ba_raw.st_min, + b->ba_corrected.st_min); + + (void) printf("# max %12.5f %12.5f\n", + b->ba_raw.st_max, + b->ba_corrected.st_max); + (void) printf("# mean %12.5f %12.5f\n", + b->ba_raw.st_mean, + b->ba_corrected.st_mean); + (void) printf("# median %12.5f %12.5f\n", + b->ba_raw.st_median, + b->ba_corrected.st_median); + (void) printf("# stddev %12.5f %12.5f\n", + b->ba_raw.st_stddev, + b->ba_corrected.st_stddev); + (void) printf("# standard error %12.5f %12.5f\n", + b->ba_raw.st_stderr, + b->ba_corrected.st_stderr); + (void) printf("# 99%% confidence level %12.5f %12.5f\n", + b->ba_raw.st_99confidence, + b->ba_corrected.st_99confidence); + (void) printf("# skew %12.5f %12.5f\n", + b->ba_raw.st_skew, + b->ba_corrected.st_skew); + (void) printf("# kurtosis %12.5f %12.5f\n", + b->ba_raw.st_kurtosis, + b->ba_corrected.st_kurtosis); + + (void) printf("# time correlation %12.5f %12.5f\n", + b->ba_raw.st_timecorr, + b->ba_corrected.st_timecorr); + (void) printf("#\n"); + + (void) printf("# elasped time %12.5f\n", (b->ba_endtime - + b->ba_starttime) / 1.0e9); + (void) printf("# number of samples %12d\n", b->ba_batches); + (void) printf("# number of outliers %12d\n", b->ba_outliers); + (void) printf("# getnsecs overhead %12d\n", (int)nsecs_overhead); + + (void) printf("#\n"); + (void) printf("# DISTRIBUTION\n"); + + print_histo(b); + + if (lm_optW) { + print_warnings(b); + } +} + +void +update_stats(barrier_t *b, result_t *r) +{ + double time; + double nsecs_per_call; + + if (b->ba_waiters == 0) { + /* first thread only */ + b->ba_t0 = r->re_t0; + b->ba_t1 = r->re_t1; + b->ba_count0 = 0; + b->ba_errors0 = 0; + } else { + /* all but first thread */ + if (r->re_t0 < b->ba_t0) { + b->ba_t0 = r->re_t0; + } + if (r->re_t1 > b->ba_t1) { + b->ba_t1 = r->re_t1; + } + } + + b->ba_count0 += r->re_count; + b->ba_errors0 += r->re_errors; + + if (b->ba_waiters == b->ba_hwm - 1) { + /* last thread only */ + + + time = (double)b->ba_t1 - (double)b->ba_t0 - + (double)nsecs_overhead; + + if (time < 100 * nsecs_resolution) + b->ba_quant++; + + /* + * normalize by procs * threads if not -U + */ + + nsecs_per_call = time / (double)b->ba_count0 * + (double)(lm_optT * lm_optP); + + b->ba_count += b->ba_count0; + b->ba_errors += b->ba_errors0; + + b->ba_data[b->ba_batches % b->ba_datasize] = + nsecs_per_call; + + b->ba_batches++; + } +} + +#ifdef USE_SEMOP +barrier_t * +barrier_create(int hwm, int datasize) +{ + struct sembuf s[1]; + barrier_t *b; + + /*LINTED*/ + b = (barrier_t *)mmap(NULL, + sizeof (barrier_t) + (datasize - 1) * sizeof (double), + PROT_READ | PROT_WRITE, + MAP_SHARED | MAP_ANON, -1, 0L); + if (b == (barrier_t *)MAP_FAILED) { + return (NULL); + } + b->ba_datasize = datasize; + + b->ba_flag = 0; + b->ba_hwm = hwm; + b->ba_semid = semget(IPC_PRIVATE, 3, 0600); + if (b->ba_semid == -1) { + (void) munmap((void *)b, sizeof (barrier_t)); + return (NULL); + } + + /* [hwm - 1, 0, 0] */ + s[0].sem_num = 0; + s[0].sem_op = hwm - 1; + s[0].sem_flg = 0; + if (semop(b->ba_semid, s, 1) == -1) { + perror("semop(1)"); + (void) semctl(b->ba_semid, 0, IPC_RMID); + (void) munmap((void *)b, sizeof (barrier_t)); + return (NULL); + } + + b->ba_waiters = 0; + b->ba_phase = 0; + + b->ba_count = 0; + b->ba_errors = 0; + + return (b); +} + +int +barrier_destroy(barrier_t *b) +{ + (void) semctl(b->ba_semid, 0, IPC_RMID); + (void) munmap((void *)b, sizeof (barrier_t)); + + return (0); +} + +int +barrier_queue(barrier_t *b, result_t *r) +{ + struct sembuf s[2]; + + /* + * {s0(-(hwm-1))} + * if ! nowait {s1(-(hwm-1))} + * (all other threads) + * update shared stats + * {s0(hwm-1), s1(1)} + * {s0(1), s2(-1)} + * else + * (last thread) + * update shared stats + * {s2(hwm-1)} + */ + + s[0].sem_num = 0; + s[0].sem_op = -(b->ba_hwm - 1); + s[0].sem_flg = 0; + if (semop(b->ba_semid, s, 1) == -1) { + perror("semop(2)"); + return (-1); + } + + s[0].sem_num = 1; + s[0].sem_op = -(b->ba_hwm - 1); + s[0].sem_flg = IPC_NOWAIT; + if (semop(b->ba_semid, s, 1) == -1) { + if (errno != EAGAIN) { + perror("semop(3)"); + return (-1); + } + + /* all but the last thread */ + + if (r != NULL) { + update_stats(b, r); + } + + b->ba_waiters++; + + s[0].sem_num = 0; + s[0].sem_op = b->ba_hwm - 1; + s[0].sem_flg = 0; + s[1].sem_num = 1; + s[1].sem_op = 1; + s[1].sem_flg = 0; + if (semop(b->ba_semid, s, 2) == -1) { + perror("semop(4)"); + return (-1); + } + + s[0].sem_num = 0; + s[0].sem_op = 1; + s[0].sem_flg = 0; + s[1].sem_num = 2; + s[1].sem_op = -1; + s[1].sem_flg = 0; + if (semop(b->ba_semid, s, 2) == -1) { + perror("semop(5)"); + return (-1); + } + + } else { + /* the last thread */ + + if (r != NULL) { + update_stats(b, r); + } + + b->ba_waiters = 0; + b->ba_phase++; + + s[0].sem_num = 2; + s[0].sem_op = b->ba_hwm - 1; + s[0].sem_flg = 0; + if (semop(b->ba_semid, s, 1) == -1) { + perror("semop(6)"); + return (-1); + } + } + + return (0); +} + +#else /* USE_SEMOP */ + +barrier_t * +barrier_create(int hwm, int datasize) +{ + pthread_mutexattr_t attr; + pthread_condattr_t cattr; + barrier_t *b; + + /*LINTED*/ + b = (barrier_t *)mmap(NULL, + sizeof (barrier_t) + (datasize - 1) * sizeof (double), + PROT_READ | PROT_WRITE, + MAP_SHARED | MAP_ANON, -1, 0L); + if (b == (barrier_t *)MAP_FAILED) { + return (NULL); + } + b->ba_datasize = datasize; + + b->ba_hwm = hwm; + b->ba_flag = 0; + + (void) pthread_mutexattr_init(&attr); + (void) pthread_mutexattr_setpshared(&attr, PTHREAD_PROCESS_SHARED); + + (void) pthread_condattr_init(&cattr); + (void) pthread_condattr_setpshared(&cattr, PTHREAD_PROCESS_SHARED); + + (void) pthread_mutex_init(&b->ba_lock, &attr); + (void) pthread_cond_init(&b->ba_cv, &cattr); + + b->ba_waiters = 0; + b->ba_phase = 0; + + b->ba_count = 0; + b->ba_errors = 0; + + return (b); +} + +int +barrier_destroy(barrier_t *b) +{ + (void) munmap((void *)b, sizeof (barrier_t)); + + return (0); +} + +int +barrier_queue(barrier_t *b, result_t *r) +{ + int phase; + + (void) pthread_mutex_lock(&b->ba_lock); + + if (r != NULL) { + update_stats(b, r); + } + + phase = b->ba_phase; + + b->ba_waiters++; + if (b->ba_hwm == b->ba_waiters) { + b->ba_waiters = 0; + b->ba_phase++; + (void) pthread_cond_broadcast(&b->ba_cv); + } + + while (b->ba_phase == phase) { + (void) pthread_cond_wait(&b->ba_cv, &b->ba_lock); + } + + (void) pthread_mutex_unlock(&b->ba_lock); + return (0); +} +#endif /* USE_SEMOP */ + +int +gettindex() +{ + int i; + + if (tids == NULL) { + return (-1); + } + + for (i = 1; i < lm_optT; i++) { + if (pthread_self() == tids[i]) { + return (i); + } + } + + return (0); +} + +int +getpindex() +{ + return (pindex); +} + +void * +gettsd(int p, int t) +{ + if ((p < 0) || (p >= lm_optP) || (t < 0) || (t >= lm_optT)) + return (NULL); + + return ((void *)((unsigned long)tsdseg + + (((p * lm_optT) + t) * tsdsize))); +} + +#if defined(__APPLE__) +int +gettsdindex(void *arg){ + /* + * gettindex() can race with pthread_create() filling in tids[]. + * This is an alternative approach to finding the calling thread's tsd in t +sdseg + */ + return tsdsize ? ((unsigned long)arg - (unsigned long)tsdseg)/tsdsize : 0; +} +#endif /* __APPLE__ */ + +#ifdef USE_GETHRTIME +long long +getnsecs() +{ + return (gethrtime()); +} + +long long +getusecs() +{ + return (gethrtime() / 1000); +} + +#elif USE_RDTSC /* USE_GETHRTIME */ + +__inline__ long long +rdtsc(void) +{ + unsigned long long x; + __asm__ volatile(".byte 0x0f, 0x31" : "=A" (x)); + return (x); +} + +long long +getusecs() +{ + return (rdtsc() * 1000000 / lm_hz); +} + +long long +getnsecs() +{ + return (rdtsc() * 1000000000 / lm_hz); +} + +#else /* USE_GETHRTIME */ + +long long +getusecs() +{ + struct timeval tv; + + (void) gettimeofday(&tv, NULL); + + return ((long long)tv.tv_sec * 1000000LL + (long long) tv.tv_usec); +} + +long long +getnsecs() +{ + struct timeval tv; + + (void) gettimeofday(&tv, NULL); + + return ((long long)tv.tv_sec * 1000000000LL + + (long long) tv.tv_usec * 1000LL); +} + +#endif /* USE_GETHRTIME */ + +int +setfdlimit(int limit) +{ + struct rlimit rlimit; + + if (getrlimit(RLIMIT_NOFILE, &rlimit) < 0) { + perror("getrlimit"); + exit(1); + } + + if (rlimit.rlim_cur > limit) + return (0); /* no worries */ + + rlimit.rlim_cur = limit; + + if (rlimit.rlim_max < limit) + rlimit.rlim_max = limit; + + if (setrlimit(RLIMIT_NOFILE, &rlimit) < 0) { + perror("setrlimit"); + exit(3); + } + + return (0); +} + + +#define KILOBYTE 1024 +#define MEGABYTE (KILOBYTE * KILOBYTE) +#define GIGABYTE (KILOBYTE * MEGABYTE) + +long long +sizetoll(const char *arg) +{ + int len = strlen(arg); + int i; + long long mult = 1; + + if (len && isalpha(arg[len - 1])) { + switch (arg[len - 1]) { + + case 'k': + case 'K': + mult = KILOBYTE; + break; + case 'm': + case 'M': + mult = MEGABYTE; + break; + case 'g': + case 'G': + mult = GIGABYTE; + break; + default: + return (-1); + } + + for (i = 0; i < len - 1; i++) + if (!isdigit(arg[i])) + return (-1); + } + + return (mult * strtoll(arg, NULL, 10)); +} + +int +sizetoint(const char *arg) +{ + int len = strlen(arg); + int i; + long long mult = 1; + + if (len && isalpha(arg[len - 1])) { + switch (arg[len - 1]) { + + case 'k': + case 'K': + mult = KILOBYTE; + break; + case 'm': + case 'M': + mult = MEGABYTE; + break; + case 'g': + case 'G': + mult = GIGABYTE; + break; + default: + return (-1); + } + + for (i = 0; i < len - 1; i++) + if (!isdigit(arg[i])) + return (-1); + } + + return (mult * atoi(arg)); +} + +static void +print_bar(long count, long total) +{ + int i; + + (void) putchar_unlocked(count ? '*' : ' '); + for (i = 1; i < (32 * count) / total; i++) + (void) putchar_unlocked('*'); + for (; i < 32; i++) + (void) putchar_unlocked(' '); +} + +static int +doublecmp(const void *p1, const void *p2) +{ + double a = *((double *)p1); + double b = *((double *)p2); + + if (a > b) + return (1); + if (a < b) + return (-1); + return (0); +} + +static void +print_histo(barrier_t *b) +{ + int n; + int i; + int j; + int last; + long long maxcount; + double sum; + long long min; + long long scale; + double x; + long long y; + long long count; + int i95; + double p95; + double r95; + double m95; + histo_t *histo; + + (void) printf("# %12s %12s %32s %12s\n", "counts", "usecs/call", + "", "means"); + + /* calculate how much data we've captured */ + n = b->ba_batches > b->ba_datasize ? b->ba_datasize : b->ba_batches; + + /* find the 95th percentile - index, value and range */ + qsort((void *)b->ba_data, n, sizeof (double), doublecmp); + min = b->ba_data[0] + 0.000001; + i95 = n * 95 / 100; + p95 = b->ba_data[i95]; + r95 = p95 - min + 1; + + /* find a suitable min and scale */ + i = 0; + x = r95 / (HISTOSIZE - 1); + while (x >= 10.0) { + x /= 10.0; + i++; + } + y = x + 0.9999999999; + while (i > 0) { + y *= 10; + i--; + } + min /= y; + min *= y; + scale = y * (HISTOSIZE - 1); + if (scale < (HISTOSIZE - 1)) { + scale = (HISTOSIZE - 1); + } + + /* create and initialise the histogram */ + histo = malloc(HISTOSIZE * sizeof (histo_t)); + for (i = 0; i < HISTOSIZE; i++) { + histo[i].sum = 0.0; + histo[i].count = 0; + } + + /* populate the histogram */ + last = 0; + sum = 0.0; + count = 0; + for (i = 0; i < i95; i++) { + j = (HISTOSIZE - 1) * (b->ba_data[i] - min) / scale; + + if (j >= HISTOSIZE) { + (void) printf("panic!\n"); + j = HISTOSIZE - 1; + } + + histo[j].sum += b->ba_data[i]; + histo[j].count++; + + sum += b->ba_data[i]; + count++; + } + m95 = sum / count; + + /* find the larges bucket */ + maxcount = 0; + for (i = 0; i < HISTOSIZE; i++) + if (histo[i].count > 0) { + last = i; + if (histo[i].count > maxcount) + maxcount = histo[i].count; + } + + /* print the buckets */ + for (i = 0; i <= last; i++) { + (void) printf("# %12lld %12.5f |", histo[i].count, + (min + scale * (double)i / (HISTOSIZE - 1))); + + print_bar(histo[i].count, maxcount); + + if (histo[i].count > 0) + (void) printf("%12.5f\n", + histo[i].sum / histo[i].count); + else + (void) printf("%12s\n", "-"); + } + + /* find the mean of values beyond the 95th percentile */ + sum = 0.0; + count = 0; + for (i = i95; i < n; i++) { + sum += b->ba_data[i]; + count++; + } + + /* print the >95% bucket summary */ + (void) printf("#\n"); + (void) printf("# %12lld %12s |", count, "> 95%"); + print_bar(count, maxcount); + if (count > 0) + (void) printf("%12.5f\n", sum / count); + else + (void) printf("%12s\n", "-"); + (void) printf("#\n"); + (void) printf("# %12s %12.5f\n", "mean of 95%", m95); + (void) printf("# %12s %12.5f\n", "95th %ile", p95); + + /* quantify any buffer overflow */ + if (b->ba_batches > b->ba_datasize) + (void) printf("# %12s %12d\n", "data dropped", + b->ba_batches - b->ba_datasize); +} + +static void +compute_stats(barrier_t *b) +{ + int i; + + if (b->ba_batches > b->ba_datasize) + b->ba_batches = b->ba_datasize; + + /* + * convert to usecs/call + */ + + for (i = 0; i < b->ba_batches; i++) + b->ba_data[i] /= 1000.0; + + /* + * do raw stats + */ + + (void) crunch_stats(b->ba_data, b->ba_batches, &b->ba_raw); + + /* + * recursively apply 3 sigma rule to remove outliers + */ + + b->ba_corrected = b->ba_raw; + b->ba_outliers = 0; + + if (b->ba_batches > 40) { /* remove outliers */ + int removed; + + do { + removed = remove_outliers(b->ba_data, b->ba_batches, + &b->ba_corrected); + b->ba_outliers += removed; + b->ba_batches -= removed; + (void) crunch_stats(b->ba_data, b->ba_batches, + &b->ba_corrected); + } while (removed != 0 && b->ba_batches > 40); + } + +} + +/* + * routine to compute various statistics on array of doubles. + */ + +static int +crunch_stats(double *data, int count, stats_t *stats) +{ + double a; + double std; + double diff; + double sk; + double ku; + double mean; + int i; + int bytes; + double *dupdata; + + /* + * first we need the mean + */ + + mean = 0.0; + + for (i = 0; i < count; i++) { + mean += data[i]; + } + + mean /= count; + + stats->st_mean = mean; + + /* + * malloc and sort so we can do median + */ + + dupdata = malloc(bytes = sizeof (double) * count); + (void) memcpy(dupdata, data, bytes); + qsort((void *)dupdata, count, sizeof (double), doublecmp); + stats->st_median = dupdata[count/2]; + + /* + * reuse dupdata to compute time correlation of data to + * detect interesting time-based trends + */ + + for (i = 0; i < count; i++) + dupdata[i] = (double)i; + + (void) fit_line(dupdata, data, count, &a, &stats->st_timecorr); + free(dupdata); + + std = 0.0; + sk = 0.0; + ku = 0.0; + + stats->st_max = -1; + stats->st_min = 1.0e99; /* hard to find portable values */ + + for (i = 0; i < count; i++) { + if (data[i] > stats->st_max) + stats->st_max = data[i]; + if (data[i] < stats->st_min) + stats->st_min = data[i]; + + diff = data[i] - mean; + std += diff * diff; + sk += diff * diff * diff; + ku += diff * diff * diff * diff; + } + + stats->st_stddev = std = sqrt(std/(double)(count - 1)); + stats->st_stderr = std / sqrt(count); + stats->st_99confidence = stats->st_stderr * 2.326; + stats->st_skew = sk / (std * std * std) / (double)(count); + stats->st_kurtosis = ku / (std * std * std * std) / + (double)(count) - 3; + + return (0); +} + +/* + * does a least squares fit to the set of points x, y and + * fits a line y = a + bx. Returns a, b + */ + +int +fit_line(double *x, double *y, int count, double *a, double *b) +{ + double sumx, sumy, sumxy, sumx2; + double denom; + int i; + + sumx = sumy = sumxy = sumx2 = 0.0; + + for (i = 0; i < count; i++) { + sumx += x[i]; + sumx2 += x[i] * x[i]; + sumy += y[i]; + sumxy += x[i] * y[i]; + } + + denom = count * sumx2 - sumx * sumx; + + if (denom == 0.0) + return (-1); + + *a = (sumy * sumx2 - sumx * sumxy) / denom; + + *b = (count * sumxy - sumx * sumy) / denom; + + return (0); +} + +/* + * empty function for measurement purposes + */ + +int +nop() +{ + return (1); +} + +#define NSECITER 1000 + +static long long +get_nsecs_overhead() +{ + long long s; + + double data[NSECITER]; + stats_t stats; + + int i; + int count; + int outliers; + + (void) getnsecs(); /* warmup */ + (void) getnsecs(); /* warmup */ + (void) getnsecs(); /* warmup */ + + i = 0; + + count = NSECITER; + + for (i = 0; i < count; i++) { + s = getnsecs(); + data[i] = getnsecs() - s; + } + + (void) crunch_stats(data, count, &stats); + + while ((outliers = remove_outliers(data, count, &stats)) != 0) { + count -= outliers; + (void) crunch_stats(data, count, &stats); + } + + return ((long long)stats.st_mean); + +} + +long long +get_nsecs_resolution() +{ + long long y[1000]; + + int i, j, nops, res; + long long start, stop; + + /* + * first, figure out how many nops to use + * to get any delta between time measurements. + * use a minimum of one. + */ + + /* + * warm cache + */ + + stop = start = getnsecs(); + + for (i = 1; i < 10000000; i++) { + start = getnsecs(); + for (j = i; j; j--) + ; + stop = getnsecs(); + if (stop > start) + break; + } + + nops = i; + + /* + * now collect data at linearly varying intervals + */ + + for (i = 0; i < 1000; i++) { + start = getnsecs(); + for (j = nops * i; j; j--) + ; + stop = getnsecs(); + y[i] = stop - start; + } + + /* + * find smallest positive difference between samples; + * this is the timer resolution + */ + + res = 1<<30; + + for (i = 1; i < 1000; i++) { + int diff = y[i] - y[i-1]; + + if (diff > 0 && res > diff) + res = diff; + + } + + return (res); +} + +/* + * remove any data points from the array more than 3 sigma out + */ + +static int +remove_outliers(double *data, int count, stats_t *stats) +{ + double outmin = stats->st_mean - 3 * stats->st_stddev; + double outmax = stats->st_mean + 3 * stats->st_stddev; + + int i, j, outliers; + + for (outliers = i = j = 0; i < count; i++) + if (data[i] > outmax || data[i] < outmin) + outliers++; + else + data[j++] = data[i]; + + return (outliers); +} diff --git a/tools/tests/libMicro/libmicro.h b/tools/tests/libMicro/libmicro.h new file mode 100644 index 000000000..54dcb8503 --- /dev/null +++ b/tools/tests/libMicro/libmicro.h @@ -0,0 +1,253 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License, Version 1.0 only + * (the "License"). You may not use this file except in compliance + * with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#ifndef LIBMICRO_H +#define LIBMICRO_H + +#include + +#define LIBMICRO_VERSION "0.4.0" + +#define STRSIZE 1024 + +typedef struct { + long long re_count; + long long re_errors; + long long re_t0; + long long re_t1; +} result_t; + +typedef struct { + double sum; + long long count; +} histo_t; + +#define HISTOSIZE 32 +#define DATASIZE 100000 + +/* + * stats we compute on data sets + */ + +typedef struct stats { + double st_min; + double st_max; + double st_mean; + double st_median; + double st_stddev; + double st_stderr; + double st_99confidence; + double st_skew; + double st_kurtosis; + double st_timecorr; /* correlation with respect to time */ +} stats_t; + +/* + * Barrier stuff + */ + +typedef struct { + int ba_hwm; /* barrier setpoint */ + int ba_flag; /* benchmark while true */ + long long ba_deadline; /* when to stop */ + int ba_phase; /* number of time used */ + int ba_waiters; /* how many are waiting */ + +#ifdef USE_SEMOP + int ba_semid; +#else + pthread_mutex_t ba_lock; + pthread_cond_t ba_cv; +#endif + + long long ba_count; /* how many ops */ + long long ba_errors; /* how many errors */ + + int ba_quant; /* how many quant errors */ + int ba_batches; /* how many samples */ + + double ba_starttime; /* test time start */ + double ba_endtime; /* test time end */ + +#ifdef NEVER + double ba_tmin; /* min time taken */ + double ba_tmax; /* max time taken */ + double ba_ctmax; /* max after outliers */ + double ba_mean; /* average value */ + double ba_median; /* median value */ + double ba_rawmedian; /* raw median value */ + double ba_stddev; /* standard deviation */ + double ba_stderr; /* standard error */ + double ba_skew; /* skew */ + double ba_kurtosis; /* kurtosis */ +#endif + stats_t ba_raw; /* raw stats */ + stats_t ba_corrected; /* corrected stats */ + + int ba_outliers; /* outlier count */ + + long long ba_t0; /* first thread/proc */ + long long ba_t1; /* time of last thread */ + long long ba_count0; + long long ba_errors0; + + int ba_datasize; /* possible #items data */ + double ba_data[1]; /* start of data ararry */ +} barrier_t; + + +/* + * Barrier interfaces + */ + +barrier_t *barrier_create(int hwm, int datasize); +int barrier_destroy(barrier_t *bar); +int barrier_queue(barrier_t *bar, result_t *res); + + +/* + * Functions that can be provided by the user + */ + +int benchmark(void *tsd, result_t *res); +int benchmark_init(); +int benchmark_fini(); +int benchmark_initrun(); +int benchmark_finirun(); +int benchmark_initworker(); +int benchmark_finiworker(); +int benchmark_initbatch(void *tsd); +int benchmark_finibatch(void *tsd); +int benchmark_optswitch(int opt, char *optarg); +char *benchmark_result(); + + +/* + * Globals exported to the user + */ + +extern int lm_argc; +extern char **lm_argv; + +extern int lm_optB; +extern int lm_optD; +extern int lm_optH; +extern char *lm_optN; +extern int lm_optP; +extern int lm_optS; +extern int lm_optT; + +extern int lm_defB; +extern int lm_defD; +extern int lm_defH; +extern char *lm_defN; +extern int lm_defP; +extern int lm_defS; +extern int lm_defT; +extern int lm_nsecs_per_op; + +extern char *lm_procpath; +extern char lm_procname[STRSIZE]; +extern char lm_usage[STRSIZE]; +extern char lm_optstr[STRSIZE]; +extern char lm_header[STRSIZE]; +extern size_t lm_tsdsize; + + +/* + * Utility functions + */ + +int getpindex(); +int gettindex(); +void *gettsd(int p, int t); +#if defined(__APPLE__) +int gettsdindex(void *arg); +#endif /* __APPLE__ */ +long long getusecs(); +long long getnsecs(); +int setfdlimit(int limit); +long long sizetoll(); +int sizetoint(); +int fit_line(double *, double *, int, double *, double *); +long long get_nsecs_resolution(); + + +/* Apple Mods Here */ + + + +#ifdef NO_PORTMAPPER +#define TCP_SELECT -31233 +#define TCP_XACT -31234 +#define TCP_CONTROL -31235 +#define TCP_DATA -31236 +#define TCP_CONNECT -31237 +#define UDP_XACT -31238 +#define UDP_DATA -31239 +#else +#define TCP_SELECT (u_long)404038 /* XXX - unregistered */ +#define TCP_XACT (u_long)404039 /* XXX - unregistered */ +#define TCP_CONTROL (u_long)404040 /* XXX - unregistered */ +#define TCP_DATA (u_long)404041 /* XXX - unregistered */ +#define TCP_CONNECT (u_long)404042 /* XXX - unregistered */ +#define UDP_XACT (u_long)404032 /* XXX - unregistered */ +#define UDP_DATA (u_long)404033 /* XXX - unregistered */ +#define VERS (u_long)1 +#endif + +/* +* socket send/recv buffer optimizations +*/ +#define SOCKOPT_READ 0x0001 +#define SOCKOPT_WRITE 0x0002 +#define SOCKOPT_RDWR 0x0003 +#define SOCKOPT_PID 0x0004 +#define SOCKOPT_REUSE 0x0008 +#define SOCKOPT_NONE 0 + +#ifndef SOCKBUF +#define SOCKBUF (1024*1024) +#endif + +#ifndef XFERSIZE +#define XFERSIZE (64*1024) /* all bandwidth I/O should use this */ +#endif + +typedef unsigned long iter_t; + +int tcp_server(int prog, int rdwr); +int tcp_done(int prog); +int tcp_accept(int sock, int rdwr); +int tcp_connect(char *host, int prog, int rdwr); +void sock_optimize(int sock, int rdwr); +int sockport(int s); + +/* end Apple Mods */ + + +#endif /* LIBMICRO_H */ diff --git a/tools/tests/libMicro/libmicro_main.c b/tools/tests/libMicro/libmicro_main.c new file mode 100644 index 000000000..f7bcde105 --- /dev/null +++ b/tools/tests/libMicro/libmicro_main.c @@ -0,0 +1,44 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms + * of the Common Development and Distribution License + * (the "License"). You may not use this file except + * in compliance with the License. + * + * You can obtain a copy of the license at + * src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing + * permissions and limitations under the License. + * + * When distributing Covered Code, include this CDDL + * HEADER in each file and include the License file at + * usr/src/OPENSOLARIS.LICENSE. If applicable, + * add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your + * own identifying information: Portions Copyright [yyyy] + * [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +/* + * Separate file for main so we can link other programs + * with libmicro + */ + +#include + +extern int actual_main(int, char **); + +int +main(int argc, char *argv[]) +{ + return (actual_main(argc, argv)); +} diff --git a/tools/tests/libMicro/listen.c b/tools/tests/libMicro/listen.c new file mode 100644 index 000000000..f8730d7bf --- /dev/null +++ b/tools/tests/libMicro/listen.c @@ -0,0 +1,127 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms + * of the Common Development and Distribution License + * (the "License"). You may not use this file except + * in compliance with the License. + * + * You can obtain a copy of the license at + * src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing + * permissions and limitations under the License. + * + * When distributing Covered Code, include this CDDL + * HEADER in each file and include the License file at + * usr/src/OPENSOLARIS.LICENSE. If applicable, + * add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your + * own identifying information: Portions Copyright [yyyy] + * [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +/* + * listen benchmark + */ + + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "libmicro.h" + +#define FIRSTPORT 12345 + +static struct sockaddr_in adds; +static int sock = -1; + +int +benchmark_init() +{ + (void) sprintf(lm_usage, "notes: measures listen()()\n"); + + lm_tsdsize = 0; + + return (0); +} + +int +benchmark_initrun() +{ + int j; + int opt = 1; + struct hostent *host; + + sock = socket(AF_INET, SOCK_STREAM, 0); + if (sock == -1) { + perror("socket"); + exit(1); + } + + if (setsockopt(sock, SOL_SOCKET, SO_REUSEADDR, + &opt, sizeof (int)) == -1) { + perror("setsockopt"); + exit(1); + } + + if ((host = gethostbyname("localhost")) == NULL) { + perror("gethostbyname"); + exit(1); + } + + j = FIRSTPORT; + for (;;) { + (void) memset(&adds, 0, sizeof (struct sockaddr_in)); + adds.sin_family = AF_INET; + adds.sin_port = htons(j++); + (void) memcpy(&adds.sin_addr.s_addr, host->h_addr_list[0], + sizeof (struct in_addr)); + + if (bind(sock, (struct sockaddr *)&adds, + sizeof (struct sockaddr_in)) == 0) { + break; + } + + if (errno != EADDRINUSE) { + perror("bind"); + exit(1); + } + } + + return (0); +} + +/*ARGSUSED*/ +int +benchmark(void *tsd, result_t *res) +{ + int i; + + for (i = 0; i < lm_optB; i += 2) { + if (listen(sock, 4) == -1) + res->re_errors++; + if (listen(sock, 5) == -1) + res->re_errors++; + } + res->re_count = i; + + return (0); +} diff --git a/tools/tests/libMicro/localtime_r.c b/tools/tests/libMicro/localtime_r.c new file mode 100644 index 000000000..84308844f --- /dev/null +++ b/tools/tests/libMicro/localtime_r.c @@ -0,0 +1,75 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms + * of the Common Development and Distribution License + * (the "License"). You may not use this file except + * in compliance with the License. + * + * You can obtain a copy of the license at + * src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing + * permissions and limitations under the License. + * + * When distributing Covered Code, include this CDDL + * HEADER in each file and include the License file at + * usr/src/OPENSOLARIS.LICENSE. If applicable, + * add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your + * own identifying information: Portions Copyright [yyyy] + * [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +/* + * localtime benchmark + */ + +#include +#include +#include +#include + +#include "libmicro.h" + +int +benchmark_init() +{ + (void) sprintf(lm_usage, "notes: measures localtime_r()\n"); + lm_tsdsize = 0; + + return (0); +} + +/*ARGSUSED*/ +int +benchmark(void *tsd, result_t *res) +{ + int i; + struct tm tms; + static time_t clock1 = 0L; + static time_t clock2 = 1L; + + for (i = 0; i < lm_optB; i += 10) { + (void) localtime_r(&clock1, &tms); + (void) localtime_r(&clock2, &tms); + (void) localtime_r(&clock1, &tms); + (void) localtime_r(&clock2, &tms); + (void) localtime_r(&clock1, &tms); + (void) localtime_r(&clock2, &tms); + (void) localtime_r(&clock1, &tms); + (void) localtime_r(&clock2, &tms); + (void) localtime_r(&clock1, &tms); + (void) localtime_r(&clock2, &tms); + } + res->re_count = i; + + return (0); +} diff --git a/tools/tests/libMicro/log.c b/tools/tests/libMicro/log.c new file mode 100644 index 000000000..0b4605f3f --- /dev/null +++ b/tools/tests/libMicro/log.c @@ -0,0 +1,73 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms + * of the Common Development and Distribution License + * (the "License"). You may not use this file except + * in compliance with the License. + * + * You can obtain a copy of the license at + * src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing + * permissions and limitations under the License. + * + * When distributing Covered Code, include this CDDL + * HEADER in each file and include the License file at + * usr/src/OPENSOLARIS.LICENSE. If applicable, + * add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your + * own identifying information: Portions Copyright [yyyy] + * [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +/* + * log benchmark - should do wider range... + */ + +#include +#include +#include +#include + +#include "libmicro.h" + +int +benchmark_init() +{ + (void) sprintf(lm_usage, "note: measures log()"); + lm_nsecs_per_op = 75; + lm_tsdsize = 0; + return (0); +} + +/*ARGSUSED*/ +int +benchmark(void *tsd, result_t *res) +{ + int i; + + for (i = 0; i < lm_optB; i += 10) { + double value = i + .01; + (void) log(value); + (void) log(value); + (void) log(value); + (void) log(value); + (void) log(value); + (void) log(value); + (void) log(value); + (void) log(value); + (void) log(value); + (void) log(value); + } + res->re_count = i; + + return (0); +} diff --git a/tools/tests/libMicro/longjmp.c b/tools/tests/libMicro/longjmp.c new file mode 100644 index 000000000..50f4dbc93 --- /dev/null +++ b/tools/tests/libMicro/longjmp.c @@ -0,0 +1,65 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms + * of the Common Development and Distribution License + * (the "License"). You may not use this file except + * in compliance with the License. + * + * You can obtain a copy of the license at + * src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing + * permissions and limitations under the License. + * + * When distributing Covered Code, include this CDDL + * HEADER in each file and include the License file at + * usr/src/OPENSOLARIS.LICENSE. If applicable, + * add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your + * own identifying information: Portions Copyright [yyyy] + * [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +/* + * benchmark longjmp + */ + +#include +#include +#include +#include + +#include "libmicro.h" + +int +benchmark_init() +{ + (void) sprintf(lm_usage, "notes: measures longjmp()\n"); + lm_tsdsize = 0; + return (0); +} + +/*ARGSUSED*/ +int +benchmark(void *tsd, result_t *res) +{ + int i = 0; + jmp_buf env; + + (void) setjmp(env); + i++; + if (i < lm_optB) + longjmp(env, 0); + + res->re_count = i; + + return (0); +} diff --git a/tools/tests/libMicro/lrand48.c b/tools/tests/libMicro/lrand48.c new file mode 100644 index 000000000..e82ddfdc1 --- /dev/null +++ b/tools/tests/libMicro/lrand48.c @@ -0,0 +1,71 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms + * of the Common Development and Distribution License + * (the "License"). You may not use this file except + * in compliance with the License. + * + * You can obtain a copy of the license at + * src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing + * permissions and limitations under the License. + * + * When distributing Covered Code, include this CDDL + * HEADER in each file and include the License file at + * usr/src/OPENSOLARIS.LICENSE. If applicable, + * add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your + * own identifying information: Portions Copyright [yyyy] + * [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +/* + * lrand48 + */ + +#include +#include +#include + +#include "libmicro.h" + +int +benchmark_init() +{ + (void) sprintf(lm_usage, "note: measures lrand48()"); + lm_nsecs_per_op = 10; + lm_tsdsize = 0; + return (0); +} + +/*ARGSUSED*/ +int +benchmark(void *tsd, result_t *res) +{ + int i; + + for (i = 0; i < lm_optB; i += 10) { + (void) lrand48(); + (void) lrand48(); + (void) lrand48(); + (void) lrand48(); + (void) lrand48(); + (void) lrand48(); + (void) lrand48(); + (void) lrand48(); + (void) lrand48(); + (void) lrand48(); + } + res->re_count = i; + + return (0); +} diff --git a/tools/tests/libMicro/lseek.c b/tools/tests/libMicro/lseek.c new file mode 100644 index 000000000..6ca4af4cd --- /dev/null +++ b/tools/tests/libMicro/lseek.c @@ -0,0 +1,130 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms + * of the Common Development and Distribution License + * (the "License"). You may not use this file except + * in compliance with the License. + * + * You can obtain a copy of the license at + * src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing + * permissions and limitations under the License. + * + * When distributing Covered Code, include this CDDL + * HEADER in each file and include the License file at + * usr/src/OPENSOLARIS.LICENSE. If applicable, + * add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your + * own identifying information: Portions Copyright [yyyy] + * [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +/* + * lseek + */ + +#ifdef linux +#define _XOPEN_SOURCE 500 +#endif + +#include +#include +#include +#include + +#include "libmicro.h" + +#define DEFF "/dev/zero" +#define DEFS 1024 + +static char *optf = DEFF; +static long long opts = DEFS; + +typedef struct { + int ts_once; + int ts_fd; +} tsd_t; + +int +benchmark_init() +{ + lm_tsdsize = sizeof (tsd_t); + + (void) sprintf(lm_optstr, "f:s:"); + + (void) sprintf(lm_usage, + " [-f file-to-read (default %s)]\n" + " [-s buffer-size (default %d)]\n" + "notes: measures lseek()\n", + DEFF, DEFS); + + (void) sprintf(lm_header, "%8s", "size"); + + return (0); +} + +int +benchmark_optswitch(int opt, char *optarg) +{ + switch (opt) { + case 'f': + optf = optarg; + break; + case 's': + opts = sizetoll(optarg); + break; + default: + return (-1); + } + return (0); +} + +int +benchmark_initbatch(void *tsd) +{ + tsd_t *ts = (tsd_t *)tsd; + + if (ts->ts_once++ == 0) { + ts->ts_fd = open(optf, O_RDONLY); + } + + return (0); +} + +int +benchmark(void *tsd, result_t *res) +{ + tsd_t *ts = (tsd_t *)tsd; + int i; + + for (i = 0; i < lm_optB; i += 2) { + if (lseek(ts->ts_fd, 0L, SEEK_SET) != 0) { + res->re_errors++; + } + if (lseek(ts->ts_fd, opts, SEEK_SET) != opts) { + res->re_errors++; + } + } + res->re_count = i; + + return (0); +} + +char * +benchmark_result() +{ + static char result[256]; + + (void) sprintf(result, "%8lld", opts); + + return (result); +} diff --git a/tools/tests/libMicro/malloc.c b/tools/tests/libMicro/malloc.c new file mode 100644 index 000000000..1e2568024 --- /dev/null +++ b/tools/tests/libMicro/malloc.c @@ -0,0 +1,140 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms + * of the Common Development and Distribution License + * (the "License"). You may not use this file except + * in compliance with the License. + * + * You can obtain a copy of the license at + * src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing + * permissions and limitations under the License. + * + * When distributing Covered Code, include this CDDL + * HEADER in each file and include the License file at + * usr/src/OPENSOLARIS.LICENSE. If applicable, + * add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your + * own identifying information: Portions Copyright [yyyy] + * [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +/* + * malloc benchmark (crude) + */ + + +#include +#include +#include +#include +#include + +#include "libmicro.h" + +static int optg = 100; +static int opts[32] = {32}; +static int optscnt = 0; + +typedef struct { + void **ts_glob; +} tsd_t; + +int +benchmark_init() +{ + lm_tsdsize = sizeof (tsd_t); + + (void) sprintf(lm_optstr, "s:g:"); + + (void) sprintf(lm_usage, + " [-g number of mallocs before free (default %d)]\n" + " [-s size to malloc (default %d)." + " Up to 32 sizes accepted\n" + "notes: measures malloc()/free()", + optg, opts[0]); + + (void) sprintf(lm_header, "%6s %6s", "glob", "sizes"); + + return (0); +} + +int +benchmark_optswitch(int opt, char *optarg) +{ + int tmp; + switch (opt) { + case 'g': + optg = sizetoint(optarg); + break; + case 's': + opts[optscnt] = sizetoint(optarg); + tmp = ((++optscnt) & (0x1F)); + optscnt = tmp; + break; + default: + return (-1); + } + + return (0); +} + +int +benchmark_initworker(void *tsd) +{ + tsd_t *ts = (tsd_t *)tsd; + + if (optscnt == 0) + optscnt = 1; + + ts->ts_glob = malloc(sizeof (void *)* optg); + if (ts->ts_glob == NULL) { + return (1); + } + return (0); +} + +int +benchmark(void *tsd, result_t *res) +{ + tsd_t *ts = (tsd_t *)tsd; + int i, j, k; + + for (i = 0; i < lm_optB; i++) { + for (k = j = 0; j < optg; j++) { + if ((ts->ts_glob[j] = malloc(opts[k++])) == NULL) + res->re_errors++; + if (k >= optscnt) + k = 0; + } + for (j = 0; j < optg; j++) { + free(ts->ts_glob[j]); + } + } + + res->re_count = i * j; + + return (0); +} + +char * +benchmark_result() +{ + static char result[256]; + int i; + + (void) sprintf(result, "%6d ", optg); + + for (i = 0; i < optscnt; i++) + (void) sprintf(result + strlen(result), "%d ", opts[i]); + return (result); +} diff --git a/tools/tests/libMicro/memcpy.c b/tools/tests/libMicro/memcpy.c new file mode 100644 index 000000000..9a9448c34 --- /dev/null +++ b/tools/tests/libMicro/memcpy.c @@ -0,0 +1,160 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms + * of the Common Development and Distribution License + * (the "License"). You may not use this file except + * in compliance with the License. + * + * You can obtain a copy of the license at + * src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing + * permissions and limitations under the License. + * + * When distributing Covered Code, include this CDDL + * HEADER in each file and include the License file at + * usr/src/OPENSOLARIS.LICENSE. If applicable, + * add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your + * own identifying information: Portions Copyright [yyyy] + * [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +/* + * memcpy + */ + +#include +#include +#include +#include + +#include "libmicro.h" + +#define DEFS 8192 +#define DEFR 1 + +static long long opts = DEFS; +static int optf; +static int optt; +static int opta; + +typedef struct { + char *ts_src; + char *ts_dest; + int ts_srcsize; + int ts_destsize; +} tsd_t; + +int +benchmark_init() +{ + lm_tsdsize = sizeof (tsd_t); + + (void) sprintf(lm_optstr, "a:s:ft"); + + (void) sprintf(lm_usage, + " [-s buffer-size (default %d)]\n" + " [-a relative alignment (default page aligned)]\n" + " [-f (rotate \"from\" buffer to keep it out of cache)]\n" + " [-t (rotate \"to\" buffer to keep it out of cache)]\n" + "notes: measures memcpy()\n", + DEFS); + + (void) sprintf(lm_header, "%8s", "size"); + + return (0); +} + +int +benchmark_optswitch(int opt, char *optarg) +{ + switch (opt) { + case 'f': + optf++; + break; + case 't': + optt++; + break; + case 's': + opts = sizetoll(optarg); + break; + case 'a': + opta = sizetoint(optarg); + break; + default: + return (-1); + } + return (0); +} + +int +benchmark_initworker(void *tsd) +{ + tsd_t *ts = (tsd_t *)tsd; + + if (optf) + ts->ts_srcsize = 64 * 1024 * 1024; + else + ts->ts_srcsize = opts + opta; + + if (optt) + ts->ts_destsize = 64 * 1024 * 1024; + else + ts->ts_destsize = (int)opts; + + + ts->ts_src = opta + (char *)valloc(ts->ts_srcsize); + ts->ts_dest = valloc(ts->ts_destsize); + + return (0); +} + +int +benchmark(void *tsd, result_t *res) +{ + tsd_t *ts = (tsd_t *)tsd; + int i; + char *src = ts->ts_src; + char *dest = ts->ts_dest; + + int bump = (int)opts; + + if (bump < 1024) + bump = 1024; /* avoid prefetched area */ + for (i = 0; i < lm_optB; i++) { + (void) memcpy(dest, src, opts); + if (optf) { + src += bump; + if (src + opts > ts->ts_src + ts->ts_srcsize) + src = ts->ts_src; + } + if (optt) { + dest += bump; + if (dest + opts > ts->ts_dest + ts->ts_destsize) + dest = ts->ts_dest; + } + } + + res->re_count = i; + + return (0); +} + +char * +benchmark_result() +{ + static char result[256]; + + (void) sprintf(result, "%8lld", opts); + + return (result); +} diff --git a/tools/tests/libMicro/memmove.c b/tools/tests/libMicro/memmove.c new file mode 100644 index 000000000..822c88597 --- /dev/null +++ b/tools/tests/libMicro/memmove.c @@ -0,0 +1,160 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms + * of the Common Development and Distribution License + * (the "License"). You may not use this file except + * in compliance with the License. + * + * You can obtain a copy of the license at + * src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing + * permissions and limitations under the License. + * + * When distributing Covered Code, include this CDDL + * HEADER in each file and include the License file at + * usr/src/OPENSOLARIS.LICENSE. If applicable, + * add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your + * own identifying information: Portions Copyright [yyyy] + * [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +/* + * memmove + */ + +#include +#include +#include +#include + +#include "libmicro.h" + +#define DEFS 8192 +#define DEFR 1 + +static long long opts = DEFS; +static int optf; +static int optt; +static int opta; + +typedef struct { + char *ts_src; + char *ts_dest; + int ts_srcsize; + int ts_destsize; +} tsd_t; + +int +benchmark_init() +{ + lm_tsdsize = sizeof (tsd_t); + + (void) sprintf(lm_optstr, "a:s:ft"); + + (void) sprintf(lm_usage, + " [-s buffer-size (default %d)]\n" + " [-a relative alignment (default page aligned)]\n" + " [-f (rotate \"from\" buffer to keep it out of cache)]\n" + " [-t (rotate \"to\" buffer to keep it out of cache)]\n" + "notes: measures memmove()\n", + DEFS); + + (void) sprintf(lm_header, "%8s", "size"); + + return (0); +} + +int +benchmark_optswitch(int opt, char *optarg) +{ + switch (opt) { + case 'f': + optf++; + break; + case 't': + optt++; + break; + case 's': + opts = sizetoll(optarg); + break; + case 'a': + opta = sizetoint(optarg); + break; + default: + return (-1); + } + return (0); +} + +int +benchmark_initworker(void *tsd) +{ + tsd_t *ts = (tsd_t *)tsd; + + if (optf) + ts->ts_srcsize = 64 * 1024 * 1024; + else + ts->ts_srcsize = opts + opta; + + if (optt) + ts->ts_destsize = 64 * 1024 * 1024; + else + ts->ts_destsize = (int)opts; + + + ts->ts_src = opta + (char *)valloc(ts->ts_srcsize); + ts->ts_dest = valloc(ts->ts_destsize); + + return (0); +} + +int +benchmark(void *tsd, result_t *res) +{ + tsd_t *ts = (tsd_t *)tsd; + int i; + char *src = ts->ts_src; + char *dest = ts->ts_dest; + + int bump = (int)opts; + + if (bump < 1024) + bump = 1024; /* avoid prefetched area */ + for (i = 0; i < lm_optB; i++) { + (void) memmove(dest, src, opts); + if (optf) { + src += bump; + if (src + opts > ts->ts_src + ts->ts_srcsize) + src = ts->ts_src; + } + if (optt) { + dest += bump; + if (dest + opts > ts->ts_dest + ts->ts_destsize) + dest = ts->ts_dest; + } + } + + res->re_count = i; + + return (0); +} + +char * +benchmark_result() +{ + static char result[256]; + + (void) sprintf(result, "%8lld", opts); + + return (result); +} diff --git a/tools/tests/libMicro/memrand.c b/tools/tests/libMicro/memrand.c new file mode 100644 index 000000000..8a9ccf326 --- /dev/null +++ b/tools/tests/libMicro/memrand.c @@ -0,0 +1,146 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms + * of the Common Development and Distribution License + * (the "License"). You may not use this file except + * in compliance with the License. + * + * You can obtain a copy of the license at + * src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing + * permissions and limitations under the License. + * + * When distributing Covered Code, include this CDDL + * HEADER in each file and include the License file at + * usr/src/OPENSOLARIS.LICENSE. If applicable, + * add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your + * own identifying information: Portions Copyright [yyyy] + * [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +/* + * memory access time check + */ + +#include +#include +#include +#include +#include + +#include "libmicro.h" + +static long opts = 1024*1024; + +typedef struct { + long **ts_data; + long ts_result; +} tsd_t; + +int +benchmark_init() +{ + lm_tsdsize = sizeof (tsd_t); + + (void) sprintf(lm_optstr, "s:"); + + (void) sprintf(lm_usage, + " [-s size] number of bytes to " + " access (default %ld)\n" + "notes: measures \"random\" memory access times\n", + opts); + + (void) sprintf(lm_header, "%8s", "size"); + + return (0); +} + +int +benchmark_optswitch(int opt, char *optarg) +{ + switch (opt) { + case 's': + opts = sizetoint(optarg); + break; + default: + return (-1); + } + + return (0); +} + +int +benchmark_initworker(void *tsd) +{ + tsd_t *ts = (tsd_t *)tsd; + int i, j; + + ts->ts_data = malloc(opts); + + if (ts->ts_data == NULL) { + return (1); + } + + /* + * use lmbench style backwards stride + */ + + for (i = 0; i < opts / sizeof (long); i++) { + j = i - 128; + if (j < 0) + j = j + opts / sizeof (long); + ts->ts_data[i] = (long *)&(ts->ts_data[j]); + } + return (0); +} + +int +benchmark(void *tsd, result_t *res) +{ + tsd_t *ts = (tsd_t *)tsd; + int i; + + long **ptr = ts->ts_data; + + + + for (i = 0; i < lm_optB; i += 10) { + ptr = (long **)*ptr; + ptr = (long **)*ptr; + ptr = (long **)*ptr; + ptr = (long **)*ptr; + ptr = (long **)*ptr; + ptr = (long **)*ptr; + ptr = (long **)*ptr; + ptr = (long **)*ptr; + ptr = (long **)*ptr; + ptr = (long **)*ptr; + } + + ts->ts_result = (long)*ptr; + + res->re_count = i; + + return (0); +} + +char * +benchmark_result() +{ + static char result[256]; + + (void) sprintf(result, "%8ld ", opts); + + + return (result); +} diff --git a/tools/tests/libMicro/memset.c b/tools/tests/libMicro/memset.c new file mode 100644 index 000000000..8a6345409 --- /dev/null +++ b/tools/tests/libMicro/memset.c @@ -0,0 +1,172 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms + * of the Common Development and Distribution License + * (the "License"). You may not use this file except + * in compliance with the License. + * + * You can obtain a copy of the license at + * src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing + * permissions and limitations under the License. + * + * When distributing Covered Code, include this CDDL + * HEADER in each file and include the License file at + * usr/src/OPENSOLARIS.LICENSE. If applicable, + * add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your + * own identifying information: Portions Copyright [yyyy] + * [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +/* + * memset + */ + +#include +#include +#include +#include + +#include "libmicro.h" + +#define DEFS 8192 + +static long long opts = DEFS; +static int opta = 0; +static int optu = 0; + +static char *optas = "4k"; + +typedef struct { + char *ts_buff; + int ts_size; + int ts_offset; +} tsd_t; + +int +benchmark_init() +{ + lm_tsdsize = sizeof (tsd_t); + + (void) sprintf(lm_optstr, "a:us:"); + + (void) sprintf(lm_usage, + " [-s buffer-size (default %d)]\n" + " [-a alignment (force buffer alignment)]\n" + " [-u (try to always use uncached memory)]" + "notes: measures memset()\n", + DEFS); + + (void) sprintf(lm_header, "%8s%16s", "size", "alignment"); + + return (0); +} + +int +benchmark_optswitch(int opt, char *optarg) +{ + switch (opt) { + case 'u': + optu = 1; + break; + case 's': + opts = sizetoll(optarg); + break; + case 'a': + opta = sizetoll(optarg); + if (opta > 4096) + opta = 0; + else + optas = optarg; + break; + default: + return (-1); + } + return (0); +} + +int +benchmark_initworker(void *tsd) +{ + tsd_t *ts = (tsd_t *)tsd; + int errors = 0; + int i; + + if (optu) { + ts->ts_size = 1024 * 1024 * 64; + ts->ts_offset = opta; + } else { + ts->ts_size = opta + opts; + ts->ts_offset = opta; + } + + if ((ts->ts_buff = (char *)valloc(ts->ts_size)) == NULL) + errors++; + + for (i = 0; i < ts->ts_size; i++) + ts->ts_buff[i] = 0; + return (errors); +} + +/*ARGSUSED*/ +int +benchmark(void *tsd, result_t *res) +{ + int i; + tsd_t *ts = (tsd_t *)tsd; + + + if (optu) { + char *buf = ts->ts_buff + ts->ts_offset; + char *end = ts->ts_buff + ts->ts_size; + int offset = ts->ts_offset; + + unsigned long tmp; + + for (i = 0; i < lm_optB; i ++) { + (void) memset(buf, 0, opts); + tmp = (((unsigned long)buf + opts + 4095) & ~4095) + offset; + buf = (char *) tmp; + if (buf + opts > end) + buf = ts->ts_buff + offset; + } + } else { + char *buf = ts->ts_buff + ts->ts_offset; + + for (i = 0; i < lm_optB; i += 10) { + (void) memset(buf, 0, opts); + (void) memset(buf, 0, opts); + (void) memset(buf, 0, opts); + (void) memset(buf, 0, opts); + (void) memset(buf, 0, opts); + (void) memset(buf, 0, opts); + (void) memset(buf, 0, opts); + (void) memset(buf, 0, opts); + (void) memset(buf, 0, opts); + (void) memset(buf, 0, opts); + } + } + res->re_count = i; + + return (0); +} + +char * +benchmark_result() +{ + static char result[256]; + + (void) sprintf(result, "%8lld%12s", opts, optas); + + return (result); +} diff --git a/tools/tests/libMicro/mk_tarball b/tools/tests/libMicro/mk_tarball new file mode 100755 index 000000000..fcac23e85 --- /dev/null +++ b/tools/tests/libMicro/mk_tarball @@ -0,0 +1,28 @@ +#!/bin/sh -x +# +# Copyright 2003 Sun Microsystems, Inc. All rights reserved. +# Use is subject to license terms. +# +# +# script to make tarball... args are contents to be inserted +# + +libmicro_version=`bin/getpid -V` +case $libmicro_version in +[0-9]*) + ;; +*) + echo "ERROR: cannot determine libMicro version" + exit 1 +esac +dirname="libMicro-$libmicro_version" + +here=`pwd` +target=$here/libMicro.tar +tmpdir=/private/tmp/libmicro.$$ +mkdir -p $tmpdir/$dirname +cp $* $tmpdir/$dirname +cd $tmpdir +tar cvf $target $dirname +cd $here +rm -rf $tmpdir diff --git a/tools/tests/libMicro/mktime.c b/tools/tests/libMicro/mktime.c new file mode 100644 index 000000000..9738ce504 --- /dev/null +++ b/tools/tests/libMicro/mktime.c @@ -0,0 +1,112 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms + * of the Common Development and Distribution License + * (the "License"). You may not use this file except + * in compliance with the License. + * + * You can obtain a copy of the license at + * src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing + * permissions and limitations under the License. + * + * When distributing Covered Code, include this CDDL + * HEADER in each file and include the License file at + * usr/src/OPENSOLARIS.LICENSE. If applicable, + * add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your + * own identifying information: Portions Copyright [yyyy] + * [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +/* + * mktime + */ + +#include +#include +#include +#include + +#include "libmicro.h" + +typedef struct { + struct tm ts_tm1; + struct tm ts_tm2; +} tsd_t; + +int +benchmark_init() +{ + lm_tsdsize = sizeof (tsd_t); + + (void) sprintf(lm_usage, + "notes: measures mktime()\n"); + + return (0); +} + +int +benchmark_initbatch(void *tsd) +{ + tsd_t *ts = (tsd_t *)tsd; + + time_t clock1; + time_t clock2; + + clock1 = time(NULL); + clock2 = clock1 + 1; + + (void) localtime_r(&clock1, &ts->ts_tm1); + (void) localtime_r(&clock2, &ts->ts_tm2); + + return (0); +} + + +int +benchmark(void *tsd, result_t *res) +{ + int i; + tsd_t *ts = (tsd_t *)tsd; + struct tm t1, t2; + + for (i = 0; i < lm_optB; i += 10) { + t1 = ts->ts_tm1; + t2 = ts->ts_tm2; + (void) mktime(&t1); + (void) mktime(&t2); + + t1 = ts->ts_tm1; + t2 = ts->ts_tm2; + (void) mktime(&t1); + (void) mktime(&t2); + + t1 = ts->ts_tm1; + t2 = ts->ts_tm2; + (void) mktime(&t1); + (void) mktime(&t2); + + t1 = ts->ts_tm1; + t2 = ts->ts_tm2; + (void) mktime(&t1); + (void) mktime(&t2); + + t1 = ts->ts_tm1; + t2 = ts->ts_tm2; + (void) mktime(&t1); + (void) mktime(&t2); + } + res->re_count = i; + + return (0); +} diff --git a/tools/tests/libMicro/mmap.c b/tools/tests/libMicro/mmap.c new file mode 100644 index 000000000..44b8b5883 --- /dev/null +++ b/tools/tests/libMicro/mmap.c @@ -0,0 +1,198 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms + * of the Common Development and Distribution License + * (the "License"). You may not use this file except + * in compliance with the License. + * + * You can obtain a copy of the license at + * src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing + * permissions and limitations under the License. + * + * When distributing Covered Code, include this CDDL + * HEADER in each file and include the License file at + * usr/src/OPENSOLARIS.LICENSE. If applicable, + * add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your + * own identifying information: Portions Copyright [yyyy] + * [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2002 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#include +#include +#include +#include +#include +#include + +#include "libmicro.h" + +typedef volatile char vchar_t; + +typedef struct { + int ts_once; + vchar_t ** ts_map; + vchar_t ts_foo; +} tsd_t; + +#define DEFF "/dev/zero" +#define DEFL 8192 + +static char *optf = DEFF; +static long long optl = DEFL; +static int optr = 0; +static int opts = 0; +static int optw = 0; +static int fd = -1; +static int anon = 0; + +int +benchmark_init() +{ + lm_tsdsize = sizeof (tsd_t); + + (void) sprintf(lm_optstr, "f:l:rsw"); + + (void) sprintf(lm_usage, + " [-f file-to-map (default %s)]\n" + " [-l mapping-length (default %d)]\n" + " [-r] (read a byte from each page)\n" + " [-w] (write a byte on each page)\n" + " [-s] (use MAP_SHARED)\n" + "notes: measures mmap()\n", + DEFF, DEFL); + + (void) sprintf(lm_header, "%8s %5s", "length", "flags"); + + return (0); +} + +int +benchmark_optswitch(int opt, char *optarg) +{ + switch (opt) { + case 'f': + optf = optarg; + anon = strcmp(optf, "MAP_ANON") == 0; + break; + case 'l': + optl = sizetoll(optarg); + break; + case 'r': + optr = 1; + break; + case 's': + opts = 1; + break; + case 'w': + optw = 1; + break; + default: + return (-1); + } + return (0); +} + +int +benchmark_initrun() +{ + if (!anon) + fd = open(optf, O_RDWR); + + return (0); +} + +int +benchmark_initbatch(void *tsd) +{ + tsd_t *ts = (tsd_t *)tsd; + int errors = 0; + + if (ts->ts_once++ == 0) { + ts->ts_map = (vchar_t **)malloc(lm_optB * sizeof (void *)); + if (ts->ts_map == NULL) { + errors++; + } + } + + return (errors); +} + +int +benchmark(void *tsd, result_t *res) +{ + tsd_t *ts = (tsd_t *)tsd; + int i, j; + + for (i = 0; i < lm_optB; i++) { + if (anon) { + ts->ts_map[i] = (vchar_t *)mmap(NULL, optl, + PROT_READ | PROT_WRITE, + MAP_ANON | (opts ? MAP_SHARED : MAP_PRIVATE), + -1, 0L); + } else { + ts->ts_map[i] = (vchar_t *)mmap(NULL, optl, + PROT_READ | PROT_WRITE, + opts ? MAP_SHARED : MAP_PRIVATE, + fd, 0L); + } + + if (ts->ts_map[i] == MAP_FAILED) { + res->re_errors++; + continue; + } + + if (optr) { + for (j = 0; j < optl; j += 4096) { + ts->ts_foo += ts->ts_map[i][j]; + } + } + if (optw) { + for (j = 0; j < optl; j += 4096) { + ts->ts_map[i][j] = 1; + } + } + } + res->re_count = i; + + return (0); +} + +int +benchmark_finibatch(void *tsd) +{ + tsd_t *ts = (tsd_t *)tsd; + int i; + + for (i = 0; i < lm_optB; i++) { + (void) munmap((void *)ts->ts_map[i], optl); + } + return (0); +} + +char * +benchmark_result() +{ + static char result[256]; + char flags[5]; + + flags[0] = anon ? 'a' : '-'; + flags[1] = optr ? 'r' : '-'; + flags[2] = optw ? 'w' : '-'; + flags[3] = opts ? 's' : '-'; + flags[4] = 0; + + (void) sprintf(result, "%8lld %5s", optl, flags); + + return (result); +} diff --git a/tools/tests/libMicro/mprotect.c b/tools/tests/libMicro/mprotect.c new file mode 100644 index 000000000..db6ecaae4 --- /dev/null +++ b/tools/tests/libMicro/mprotect.c @@ -0,0 +1,205 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms + * of the Common Development and Distribution License + * (the "License"). You may not use this file except + * in compliance with the License. + * + * You can obtain a copy of the license at + * src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing + * permissions and limitations under the License. + * + * When distributing Covered Code, include this CDDL + * HEADER in each file and include the License file at + * usr/src/OPENSOLARIS.LICENSE. If applicable, + * add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your + * own identifying information: Portions Copyright [yyyy] + * [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2004 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#include +#include +#include +#include +#include +#include + +#include "libmicro.h" + +typedef volatile char vchar_t; + +typedef struct { + int ts_batch; + int ts_res; +} tsd_t; + +#define DEFF "/dev/zero" +#define DEFL 8192 + +static char *optf = DEFF; +static long long optl = DEFL; +static int optr = 0; +static int optw = 0; +static int opts = 0; +static int optt = 0; +static int fd = -1; +static int anon = 0; +static int foo = 0; +static vchar_t *seg; +static int pagesize; + +int +benchmark_init() +{ + lm_tsdsize = sizeof (tsd_t); + + (void) sprintf(lm_optstr, "f:l:rstw"); + + (void) sprintf(lm_usage, + " [-f file-to-map (default %s)]\n" + " [-l mapping-length (default %d)]\n" + " [-r] (read a byte from each page)\n" + " [-w] (write a byte on each page)\n" + " [-s] (use MAP_SHARED)\n" + " [-t] (touch each page after restoring permissions)\n" + "notes: measures mprotect()\n", + DEFF, DEFL); + + (void) sprintf(lm_header, "%8s %5s", "size", "flags"); + + return (0); +} + +int +benchmark_optswitch(int opt, char *optarg) +{ + switch (opt) { + case 'f': + optf = optarg; + anon = strcmp(optf, "MAP_ANON") == 0; + break; + case 'l': + optl = sizetoll(optarg); + break; + case 'r': + optr = 1; + break; + case 's': + opts = 1; + break; + case 't': + optt = 1; + break; + case 'w': + optw = 1; + break; + default: + return (-1); + } + return (0); +} + +int +benchmark_initrun() +{ + int flags; + int i; + + if (!anon) + fd = open(optf, O_RDWR); + + flags = opts ? MAP_SHARED : MAP_PRIVATE; + flags |= anon ? MAP_ANON : 0; + + seg = (vchar_t *)mmap(NULL, lm_optB * optl, PROT_READ | PROT_WRITE, + flags, anon ? -1 : fd, 0L); + + if (seg == MAP_FAILED) { + return (-1); + } + + if (optr) { + for (i = 0; i < lm_optB * optl; i += 4096) { + foo += seg[i]; + } + } + + if (optw) { + for (i = 0; i < lm_optB * optl; i += 4096) { + seg[i] = 1; + } + } + + pagesize = getpagesize(); + + return (0); +} + +int +benchmark(void *tsd, result_t *res) +{ + tsd_t *ts = (tsd_t *)tsd; + int i; + int us; + int prot = PROT_NONE; + int j, k; + +#if !defined(__APPLE__) + us = (getpindex() * lm_optT) + gettindex(); +#else + us = gettsdindex(tsd); +#endif /* __APPLE__ */ + + for (i = 0; i < lm_optB; i++) { + switch ((us + ts->ts_batch + i) % 2) { + case 0: + prot = PROT_NONE; + if (optt) { + for (j = k = 0; j < optl; j += pagesize) + k += seg[i * optl + j]; + ts->ts_res += k; + } + break; + default: + prot = PROT_READ | PROT_WRITE; + break; + } + + if (mprotect((void *)&seg[i * optl], optl, prot) == -1) { + res->re_errors++; + } + } + res->re_count += lm_optB; + ts->ts_batch++; + + return (0); +} + +char * +benchmark_result() +{ + static char result[256]; + char flags[6]; + + flags[0] = anon ? 'a' : '-'; + flags[1] = optr ? 'r' : '-'; + flags[2] = optw ? 'w' : '-'; + flags[3] = opts ? 's' : '-'; + flags[4] = optt ? 't' : '-'; + flags[5] = 0; + + (void) sprintf(result, "%8lld %5s", optl, flags); + + return (result); +} diff --git a/tools/tests/libMicro/msync.c b/tools/tests/libMicro/msync.c new file mode 100644 index 000000000..48b05109d --- /dev/null +++ b/tools/tests/libMicro/msync.c @@ -0,0 +1,190 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms + * of the Common Development and Distribution License + * (the "License"). You may not use this file except + * in compliance with the License. + * + * You can obtain a copy of the license at + * src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing + * permissions and limitations under the License. + * + * When distributing Covered Code, include this CDDL + * HEADER in each file and include the License file at + * usr/src/OPENSOLARIS.LICENSE. If applicable, + * add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your + * own identifying information: Portions Copyright [yyyy] + * [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2004 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#include +#include +#include +#include +#include +#include + +#include "libmicro.h" + +typedef struct { + char *ts_map; + int ts_foo; /* defeat optimizers */ +} tsd_t; + +#define DEFF "/dev/zero" +#define DEFL 8192 + +static char *optf = DEFF; +static long long optl = DEFL; +static int optr = 0; +static int opts = 0; +static int optw = 0; +static int opta = MS_SYNC; +static int opti = 0; +static int anon = 0; +static int pagesize; + +int +benchmark_init() +{ + lm_tsdsize = sizeof (tsd_t); + + (void) sprintf(lm_optstr, "af:il:rsw"); + + (void) sprintf(lm_usage, + " [-f file-to-map (default %s)]\n" + " [-l mapping-length (default %d)]\n" + " [-r] (read a byte from each page between msyncs)\n" + " [-w] (write a byte to each page between msyncs)\n" + " [-s] (use MAP_SHARED instead of MAP_PRIVATE)\n" + " [-a (specify MS_ASYNC rather than default MS_SYNC)\n" + " [-i (specify MS_INVALIDATE)\n" + "notes: measures msync()\n", + DEFF, DEFL); + + (void) sprintf(lm_header, "%8s %6s", "length", "flags"); + + return (0); +} + +int +benchmark_optswitch(int opt, char *optarg) +{ + switch (opt) { + case 'a': + opta = MS_ASYNC; + break; + + case 'f': + optf = optarg; + break; + + case 'i': + opti = MS_INVALIDATE; + break; + + case 'l': + optl = sizetoll(optarg); + break; + case 'r': + optr = 1; + break; + case 's': + opts = 1; + break; + case 'w': + optw = 1; + break; + default: + return (-1); + } + + pagesize = getpagesize(); + + return (0); +} + +int +benchmark_initworker(void *tsd) +{ + tsd_t *ts = (tsd_t *)tsd; + + int fd; + + if ((fd = open(optf, O_RDWR)) < 0) { + perror("open:"); + return (1); + } + + (void) ftruncate(fd, optl); + + if ((ts->ts_map = (char *)mmap(NULL, optl, + PROT_READ | PROT_WRITE, opts ? MAP_SHARED : MAP_PRIVATE, + fd, 0L)) == MAP_FAILED) { + perror("mmap:"); + (void) close(fd); + return (1); + } + + return (0); +} + +int +benchmark(void *tsd, result_t *res) +{ + tsd_t *ts = (tsd_t *)tsd; + int i, j; + + for (i = 0; i < lm_optB; i++) { + + if (msync(ts->ts_map, optl, opta | opti) < 0) { + perror("msync:"); + res->re_errors++; + break; + } + + if (optr) { + for (j = 0; j < optl; j += pagesize) { + ts->ts_foo += ts->ts_map[j]; + } + } + + if (optw) { + for (j = 0; j < optl; j += pagesize) { + ts->ts_map[j] = 1; + } + } + } + res->re_count = i; + + return (0); +} + +char * +benchmark_result() +{ + static char result[256]; + char flags[6]; + + flags[0] = anon ? 'a' : '-'; + flags[1] = optr ? 'r' : '-'; + flags[2] = optw ? 'w' : '-'; + flags[3] = opts ? 's' : '-'; + flags[4] = opti ? 'i' : '-'; + flags[5] = 0; + + (void) sprintf(result, "%8lld %6s", optl, flags); + + return (result); +} diff --git a/tools/tests/libMicro/multiview.sh b/tools/tests/libMicro/multiview.sh new file mode 100644 index 000000000..c1608f058 --- /dev/null +++ b/tools/tests/libMicro/multiview.sh @@ -0,0 +1,201 @@ +#!/bin/sh +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms +# of the Common Development and Distribution License +# (the "License"). You may not use this file except +# in compliance with the License. +# +# You can obtain a copy of the license at +# src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing +# permissions and limitations under the License. +# +# When distributing Covered Code, include this CDDL +# HEADER in each file and include the License file at +# usr/src/OPENSOLARIS.LICENSE. If applicable, +# add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your +# own identifying information: Portions Copyright [yyyy] +# [name of copyright owner] +# +# CDDL HEADER END +# + +# +# Copyright 2005 Sun Microsystems, Inc. All rights reserved. +# Use is subject to license terms. +# + +# +# output html comparison of several libmicro output data files +# usage: multiview file1 file2 file3 file4 ... +# +# relative ranking is calculated using first as reference +# color interpolation is done to indicate relative performance; +# the redder the color, the slower the result, the greener the +# faster + +awk ' BEGIN { + benchmark_count = 0; + header_count = 0; +} +/^#/ { + next; + } +/errors/ { + next; + } +/^\!/ { + split($0, A_header, ":"); + name = substr(A_header[1],2); + headers[name]=name; + header_data[name,FILENAME] = substr($0, length(name) + 3); + if (header_names[name] == 0) { + header_names[name] = ++header_count; + headers[header_count] = name; + } + next; +} + + { + if(NF >= 7) { + if (benchmark_names[$1] == 0) { + benchmark_names[$1] = ++benchmark_count; + benchmarks[benchmark_count] = $1; + } + if ($6 == 0) + benchmark_data[$1,FILENAME] = $4; + else + benchmark_data[$1,FILENAME] = -1; + } +} + +END { + printf("\n"); + printf("\n"); + printf("\n"); + printf("\n"); + printf("\n"); + printf("multiview comparison\n") + printf("\n"); + printf("\n"); + printf("\n"); + printf("\n"); + printf("\n"); + for(i = 1; i <= header_count; i++) { + hname = headers[i]; + printf("\n", hname); + + for (j = 1; j < ARGC; j++) { + sub("^[\t ]+", "", header_data[hname, ARGV[j]]); + printf("\n", header_data[hname, ARGV[j]]); + } + printf("\n"); + } + printf("\n"); + printf("\n"); + printf("\n"); + + for (i = 2; i < ARGC; i++) + printf("\n"); + + printf("\n"); + for(i = 1; i < benchmark_count; i++) { + for(j = 1; j < benchmark_count; j++) { + if (benchmarks[j] > benchmarks[j + 1]) { + tmp = benchmarks[j]; + benchmarks[j] = benchmarks[j+1]; + benchmarks[j+1] = tmp; + } + } + } + + for(i = 1; i <= benchmark_count; i++) { + name = benchmarks[i]; + a = benchmark_data[name, ARGV[1]]; + + printf("\n"); + printf("\n", name); + if (a > 0) + printf("\n", a); + else { + if (a < 0) + printf("\n", "ERRORS"); + else + printf("\n", "missing"); + + for (j = 2; j < ARGC; j++) + printf("\n", "not computed"); + continue; + } + + for (j = 2; j < ARGC; j++) { + b = benchmark_data[name, ARGV[j]]; + if (b > 0) { + factor = b/a; + bgcolor = colormap(factor); + if (factor > 1) + percentage = -(factor * 100 - 100); + if (factor <= 1) + percentage = 100/factor - 100; + + printf("\n", + bgcolor, b, percentage); + } + + else if (b < 0) + printf("\n", "ERRORS"); + else + printf("\n", "missing"); + + } + printf("\n"); + + } + printf("
    %s%s
    BENCHMARKUSECSUSECS [percentage]
    %s
    %f
    %s%s%s
    %11.5f[%#+7.1f%%]
    %s%25s
    \n"); + +} + +function colormap(value, bgcolor, r, g, b) +{ + if (value <= .2) + value = .2; + if (value > 5) + value = 5; + + if (value < .9) { + r = colorcalc(.2, value, .9, 0, 255); + g = colorcalc(.2, value, .9, 153, 255); + b = colorcalc(.2, value, .9, 0, 255); + bgcolor=sprintf("#%2.2x%2.2x%2.2x", r, g, b); + } + else if (value < 1.1) + bgcolor="#ffffff"; + else { + r = 255; + g = colorcalc(1.1, value, 5, 255, 0); + b = colorcalc(1.1, value, 5, 255, 0); + bgcolor=sprintf("#%2.2x%2.2x%2.2x", r, g, b); + } + + return (bgcolor); +} + +function colorcalc(min, value, max, mincolor, maxcolor) +{ + return((value - min)/(max-min) * (maxcolor-mincolor) + mincolor); +} + +' "$@" + + diff --git a/tools/tests/libMicro/munmap.c b/tools/tests/libMicro/munmap.c new file mode 100644 index 000000000..7979dbba2 --- /dev/null +++ b/tools/tests/libMicro/munmap.c @@ -0,0 +1,192 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms + * of the Common Development and Distribution License + * (the "License"). You may not use this file except + * in compliance with the License. + * + * You can obtain a copy of the license at + * src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing + * permissions and limitations under the License. + * + * When distributing Covered Code, include this CDDL + * HEADER in each file and include the License file at + * usr/src/OPENSOLARIS.LICENSE. If applicable, + * add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your + * own identifying information: Portions Copyright [yyyy] + * [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2002 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#include +#include +#include +#include +#include +#include + +#include "libmicro.h" + +typedef volatile char vchar_t; + +typedef struct { + int ts_once; + vchar_t ** ts_map; + vchar_t ts_foo; +} tsd_t; + +#define DEFF "/dev/zero" +#define DEFL 8192 + +static char *optf = DEFF; +static long long optl = DEFL; +static int optr = 0; +static int optw = 0; +static int opts = 0; +static int fd = -1; +static int anon = 0; + +int +benchmark_init() +{ + lm_tsdsize = sizeof (tsd_t); + + (void) sprintf(lm_optstr, "f:l:rsw"); + + (void) sprintf(lm_usage, + " [-f file-to-map (default %s)]\n" + " [-l mapping-length (default %d)]\n" + " [-r] (read a byte from each page)\n" + " [-w] (write a byte on each page)\n" + " [-s] (use MAP_SHARED)\n" + "notes: measures munmap()\n", + DEFF, DEFL); + + (void) sprintf(lm_header, "%8s %5s", "size", "flags"); + + return (0); +} + +int +benchmark_optswitch(int opt, char *optarg) +{ + switch (opt) { + case 'f': + optf = optarg; + anon = strcmp(optf, "MAP_ANON") == 0; + break; + case 'l': + optl = sizetoll(optarg); + break; + case 'r': + optr = 1; + break; + case 's': + opts = 1; + break; + case 'w': + optw = 1; + break; + default: + return (-1); + } + return (0); +} + +int +benchmark_initrun() +{ + if (!anon) + fd = open(optf, O_RDWR); + + return (0); +} + +int +benchmark_initbatch(void *tsd) +{ + tsd_t *ts = (tsd_t *)tsd; + int i, j; + int errors = 0; + + if (ts->ts_once++ == 0) { + ts->ts_map = (vchar_t **)malloc(lm_optB * sizeof (void *)); + if (ts->ts_map == NULL) { + errors++; + } + } + + for (i = 0; i < lm_optB; i++) { + if (anon) { + ts->ts_map[i] = (vchar_t *)mmap(NULL, optl, + PROT_READ | PROT_WRITE, + MAP_ANON | (opts ? MAP_SHARED : MAP_PRIVATE), + -1, 0L); + } else { + ts->ts_map[i] = (vchar_t *)mmap(NULL, optl, + PROT_READ | PROT_WRITE, + opts ? MAP_SHARED : MAP_PRIVATE, + fd, 0L); + } + + if (ts->ts_map[i] == MAP_FAILED) { + errors++; + continue; + } + if (optr) { + for (j = 0; j < optl; j += 4096) { + ts->ts_foo += ts->ts_map[i][j]; + } + } + if (optw) { + for (j = 0; j < optl; j += 4096) { + ts->ts_map[i][j] = 1; + } + } + } + + return (0); +} + +int +benchmark(void *tsd, result_t *res) +{ + tsd_t *ts = (tsd_t *)tsd; + int i; + + for (i = 0; i < lm_optB; i++) { + if (munmap((void *)ts->ts_map[i], optl) == -1) { + res->re_errors++; + } + } + res->re_count += lm_optB; + + return (0); +} + +char * +benchmark_result() +{ + static char result[256]; + char flags[5]; + + flags[0] = anon ? 'a' : '-'; + flags[1] = optr ? 'r' : '-'; + flags[2] = optw ? 'w' : '-'; + flags[3] = opts ? 's' : '-'; + flags[4] = 0; + + (void) sprintf(result, "%8lld %5s", optl, flags); + + return (result); +} diff --git a/tools/tests/libMicro/mutex.c b/tools/tests/libMicro/mutex.c new file mode 100644 index 000000000..3c056af2d --- /dev/null +++ b/tools/tests/libMicro/mutex.c @@ -0,0 +1,194 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms + * of the Common Development and Distribution License + * (the "License"). You may not use this file except + * in compliance with the License. + * + * You can obtain a copy of the license at + * src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing + * permissions and limitations under the License. + * + * When distributing Covered Code, include this CDDL + * HEADER in each file and include the License file at + * usr/src/OPENSOLARIS.LICENSE. If applicable, + * add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your + * own identifying information: Portions Copyright [yyyy] + * [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +/* + * mutex + */ + +#include +#include +#include +#include +#include + +#include "libmicro.h" + +static int optt = 0; +static int optp = 0; +static int opth = 0; +static int opto = 0; + +pthread_mutex_t *lock; + +typedef struct { + int ts_once; + pthread_mutex_t *ts_lock; +} tsd_t; + +int +benchmark_init() +{ + lm_tsdsize = sizeof (tsd_t); + + (void) sprintf(lm_usage, + " [-t] (create dummy thread so we are multithreaded)\n" + " [-p] (use inter-process mutex (not support everywhere))\n" + " [-h usecs] (specify mutex hold time (default 0)\n" + "notes: measures uncontended pthread_mutex_[un,]lock\n"); + + (void) sprintf(lm_optstr, "tph:o:"); + + (void) sprintf(lm_header, "%8s", "holdtime"); + + return (0); +} + +/*ARGSUSED*/ +int +benchmark_optswitch(int opt, char *optarg) +{ + switch (opt) { + case 'p': + optp = 1; + break; + + case 't': + optt = 1; + break; + + case 'h': + opth = sizetoint(optarg); + break; + + case 'o': + opto = sizetoint(optarg); + break; + + default: + return (-1); + } + return (0); +} + +void * +dummy(void *arg) +{ + (void) pause(); + return (arg); +} + +int +benchmark_initrun() +{ + pthread_mutexattr_t attr; + int errors = 0; + + /*LINTED*/ + lock = (pthread_mutex_t *)mmap(NULL, + getpagesize(), + PROT_READ | PROT_WRITE, + optp?(MAP_ANON | MAP_SHARED):MAP_ANON|MAP_PRIVATE, + -1, 0L) + opto; + + if (lock == MAP_FAILED) { + errors++; + } else { + (void) pthread_mutexattr_init(&attr); + if (optp) + (void) pthread_mutexattr_setpshared(&attr, + PTHREAD_PROCESS_SHARED); + + if (pthread_mutex_init(lock, &attr) != 0) + errors++; + } + + return (errors); +} + +int +benchmark_initworker(void *tsd) +{ + int errors = 0; + tsd_t *ts = (tsd_t *)tsd; + + + if (optt) { + pthread_t tid; + + + + if (pthread_create(&tid, NULL, dummy, NULL) != 0) { + errors++; + } + } + + ts->ts_lock = lock; + + return (errors); +} + +void +spinme(int usecs) +{ + long long s = getusecs(); + + while (getusecs() - s < usecs) + ; +} + +int +benchmark(void *tsd, result_t *res) +{ + tsd_t *ts = (tsd_t *)tsd; + int i; + + for (i = 0; i < lm_optB; i ++) { + + (void) pthread_mutex_lock(ts->ts_lock); + if (opth) + spinme(opth); + (void) pthread_mutex_unlock(ts->ts_lock); + + } + + res->re_count = lm_optB; + + return (0); +} + +char * +benchmark_result() +{ + static char result[256]; + + (void) sprintf(result, "%8d", opth); + + return (result); +} diff --git a/tools/tests/libMicro/nop.c b/tools/tests/libMicro/nop.c new file mode 100644 index 000000000..815691f76 --- /dev/null +++ b/tools/tests/libMicro/nop.c @@ -0,0 +1,63 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms + * of the Common Development and Distribution License + * (the "License"). You may not use this file except + * in compliance with the License. + * + * You can obtain a copy of the license at + * src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing + * permissions and limitations under the License. + * + * When distributing Covered Code, include this CDDL + * HEADER in each file and include the License file at + * usr/src/OPENSOLARIS.LICENSE. If applicable, + * add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your + * own identifying information: Portions Copyright [yyyy] + * [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +/* + * silly nop benchmark to test infrastructure + */ + + +#include +#include +#include + +#include "libmicro.h" + +int +benchmark_init() +{ + (void) sprintf(lm_usage, "notes: measures nothing()\n"); + + return (0); +} + +/*ARGSUSED*/ +int +benchmark(void *tsd, result_t *res) +{ + int i; + int nop(); + + for (i = 0; i < lm_optB; i++) + (void) nop(); /* do nothing but the call */ + + res->re_count = i; + + return (0); +} diff --git a/tools/tests/libMicro/open.c b/tools/tests/libMicro/open.c new file mode 100644 index 000000000..7cdcd76f5 --- /dev/null +++ b/tools/tests/libMicro/open.c @@ -0,0 +1,138 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms + * of the Common Development and Distribution License + * (the "License"). You may not use this file except + * in compliance with the License. + * + * You can obtain a copy of the license at + * src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing + * permissions and limitations under the License. + * + * When distributing Covered Code, include this CDDL + * HEADER in each file and include the License file at + * usr/src/OPENSOLARIS.LICENSE. If applicable, + * add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your + * own identifying information: Portions Copyright [yyyy] + * [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +/* + * benchmark open + */ + + +#include +#include +#include +#include + +#include "libmicro.h" + +typedef struct { + int ts_once; + int *ts_fds; +} tsd_t; + +#define DEFF "/dev/null" + +static char *optf = DEFF; + +int +benchmark_init() +{ + lm_tsdsize = sizeof (tsd_t); + + lm_defB = 256; + + (void) sprintf(lm_usage, + " [-f file-to-open (default %s)]\n" + "notes: measures open()\n", + DEFF); + + (void) sprintf(lm_optstr, "f:"); + + return (0); +} + +int +benchmark_optswitch(int opt, char *optarg) +{ + switch (opt) { + case 'f': + optf = optarg; + break; + default: + return (-1); + } + return (0); +} + +int +benchmark_initrun() +{ + (void) setfdlimit(lm_optB * lm_optT + 10); + + return (0); +} + +int +benchmark_initbatch(void *tsd) +{ + tsd_t *ts = (tsd_t *)tsd; + int i; + int errors = 0; + + if (ts->ts_once++ == 0) { + ts->ts_fds = (int *)malloc(lm_optB * sizeof (int)); + if (ts->ts_fds == NULL) { + errors++; + } + for (i = 0; i < lm_optB; i++) { + ts->ts_fds[i] = -1; + } + } + + return (errors); +} + +int +benchmark(void *tsd, result_t *res) +{ + tsd_t *ts = (tsd_t *)tsd; + int i; + + for (i = 0; i < lm_optB; i++) { + ts->ts_fds[i] = open(optf, O_RDONLY); + if (ts->ts_fds[i] < 0) { + res->re_errors++; + } + } + res->re_count = i; + + return (0); +} + +int +benchmark_finibatch(void *tsd) +{ + tsd_t *ts = (tsd_t *)tsd; + int i; + + for (i = 0; i < lm_optB; i++) { + (void) close(ts->ts_fds[i]); + } + + return (0); +} diff --git a/tools/tests/libMicro/pipe.c b/tools/tests/libMicro/pipe.c new file mode 100644 index 000000000..f1b3eb413 --- /dev/null +++ b/tools/tests/libMicro/pipe.c @@ -0,0 +1,565 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms + * of the Common Development and Distribution License + * (the "License"). You may not use this file except + * in compliance with the License. + * + * You can obtain a copy of the license at + * src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing + * permissions and limitations under the License. + * + * When distributing Covered Code, include this CDDL + * HEADER in each file and include the License file at + * usr/src/OPENSOLARIS.LICENSE. If applicable, + * add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your + * own identifying information: Portions Copyright [yyyy] + * [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "libmicro.h" + +typedef struct { + int ts_once; + pid_t ts_child; + pthread_t ts_thread; + int ts_in; + int ts_out; + int ts_in2; + int ts_out2; + int ts_lsn; + struct sockaddr_in ts_add; +} tsd_t; + +#define FIRSTPORT 12345 + +static char *modes[] = {"st", "mt", "mp", NULL}; +#define MD_SINGLE 0 +#define MD_MULTITHREAD 1 +#define MD_MULTIPROCESS 2 + +static char *xports[] = {"pipe", "fifo", "sock", "tcp", + NULL}; +#define XP_PIPES 0 +#define XP_FIFOS 1 +#define XP_SOCKETPAIR 2 +#define XP_LOCALTCP 3 + +#define DEFM MD_SINGLE +#define DEFS 1024 +#define DEFX XP_PIPES + +static int optm = DEFM; +static size_t opts = DEFS; +static int optx = DEFX; +static void *rbuf = NULL; +static void *wbuf = NULL; + +int readall(int s, void *buf, size_t len); +void *loopback(void *arg); +int prepare_pipes(tsd_t *tsd); +int prepare_fifos(tsd_t *tsd); +int cleanup_fifos(tsd_t *tsd); +int prepare_socketpair(tsd_t *tsd); +int prepare_localtcp(tsd_t *tsd); +int prepare_localtcp_once(tsd_t *tsd); +char *lookupa(int x, char *names[]); +int lookup(char *x, char *names[]); + +int +benchmark_init() +{ + lm_tsdsize = sizeof (tsd_t); + + (void) sprintf(lm_optstr, "m:s:x:"); + + (void) sprintf(lm_usage, + " [-m mode (st|mt|mp, default %s)]\n" + " [-s buffer-size (default %d)]\n" + " [-x transport (pipe|fifo|sock|tcp, default %s)]\n" + "notes: measures write()/read() across various transports\n", + lookupa(DEFM, modes), DEFS, lookupa(DEFX, xports)); + + (void) sprintf(lm_header, "%2s %4s", "md", "xprt"); + + return (0); +} + +int +benchmark_optswitch(int opt, char *optarg) +{ + int x; + + switch (opt) { + case 'm': + x = lookup(optarg, modes); + if (x == -1) + return (-1); + optm = x; + break; + case 's': + opts = sizetoll(optarg); + break; + case 'x': + x = lookup(optarg, xports); + if (x == -1) + return (-1); + optx = x; + break; + default: + return (-1); + } + return (0); +} + +int +benchmark_initrun() +{ + if (optx == XP_FIFOS) { + if (geteuid() != 0) { + (void) printf("sorry, must be root to create fifos\n"); + exit(1); + } + } + + (void) setfdlimit(4 * lm_optT + 10); + + rbuf = malloc(opts); + wbuf = malloc(opts); + + return (0); +} + +int +benchmark_initbatch(void *tsd) +{ + tsd_t *ts = (tsd_t *)tsd; + int result; + pid_t pid; + + switch (optx) { + case XP_SOCKETPAIR: + result = prepare_socketpair(ts); + break; + case XP_LOCALTCP: + result = prepare_localtcp(ts); + break; + case XP_FIFOS: + result = prepare_fifos(ts); + break; + case XP_PIPES: + default: + result = prepare_pipes(ts); + break; + } + if (result == -1) { + return (1); + } + + switch (optm) { + case MD_MULTITHREAD: + result = pthread_create(&ts->ts_thread, NULL, loopback, tsd); + if (result == -1) { + return (1); + } + break; + case MD_MULTIPROCESS: + pid = fork(); + switch (pid) { + case 0: + (void) loopback(tsd); + exit(0); + break; + case -1: + return (-1); + default: + ts->ts_child = pid; + break; + } + break; + case MD_SINGLE: + default: + break; + } + + /* Prime the loopback */ + if (write(ts->ts_out, wbuf, opts) != opts) { + return (1); + } + if (readall(ts->ts_in, rbuf, opts) != opts) { + return (1); + } + + return (0); +} + +int +benchmark(void *tsd, result_t *res) +{ + tsd_t *ts = (tsd_t *)tsd; + int i; + int n; + + for (i = 0; i < lm_optB; i++) { + if (write(ts->ts_out, wbuf, opts) != opts) { + res->re_errors++; + continue; + } + + n = readall(ts->ts_in, rbuf, opts); + if (n == -1) { + res->re_errors++; + continue; + } + } + res->re_count = i; + + return (0); +} + +int +benchmark_finibatch(void *tsd) +{ + tsd_t *ts = (tsd_t *)tsd; + + /* Terminate the loopback */ + (void) write(ts->ts_out, wbuf, opts); + (void) readall(ts->ts_in, rbuf, opts); + + switch (optm) { + case MD_MULTITHREAD: + (void) close(ts->ts_in2); + (void) close(ts->ts_out2); + (void) pthread_join(ts->ts_thread, NULL); + break; + case MD_MULTIPROCESS: + (void) close(ts->ts_in2); + (void) close(ts->ts_out2); + (void) waitpid(ts->ts_child, NULL, 0); + break; + case MD_SINGLE: + default: + break; + } + + (void) close(ts->ts_in); + (void) close(ts->ts_out); + + if (optx == XP_FIFOS) { + (void) cleanup_fifos(ts); + } + + return (0); +} + +char * +benchmark_result() +{ + static char result[256]; + + (void) sprintf(result, "%2s %4s", + lookupa(optm, modes), lookupa(optx, xports)); + + return (result); +} + +int +readall(int s, void *buf, size_t len) +{ + size_t n; + size_t total = 0; + + for (;;) { + n = read(s, (void *)((long)buf + total), len - total); + if (n < 1) { + return (-1); + } + total += n; + if (total >= len) { + return (total); + } + } +} + +void * +loopback(void *arg) +{ + tsd_t *ts = (tsd_t *)arg; + int i, n, m; + + /* Include priming and termination */ + m = lm_optB + 2; + + for (i = 0; i < m; i++) { + n = readall(ts->ts_in2, rbuf, opts); + if (n == -1) { + break; + } + if (write(ts->ts_out2, wbuf, opts) != opts) { + break; + } + } + + return (NULL); +} + +int +prepare_localtcp_once(tsd_t *ts) +{ + int j; + int opt = 1; + struct hostent *host; + + j = FIRSTPORT; + + ts->ts_lsn = socket(AF_INET, SOCK_STREAM, 0); + if (ts->ts_lsn == -1) { + return (-1); + } + + if (setsockopt(ts->ts_lsn, SOL_SOCKET, SO_REUSEADDR, + &opt, sizeof (int)) == -1) { + return (-1); + } + + if ((host = gethostbyname("localhost")) == NULL) { + return (-1); + } + + for (;;) { + (void) memset(&ts->ts_add, 0, + sizeof (struct sockaddr_in)); + ts->ts_add.sin_family = AF_INET; + ts->ts_add.sin_port = htons(j++); + (void) memcpy(&ts->ts_add.sin_addr.s_addr, + host->h_addr_list[0], sizeof (struct in_addr)); + + if (bind(ts->ts_lsn, + (struct sockaddr *)&ts->ts_add, + sizeof (struct sockaddr_in)) == 0) { + break; + } + + if (errno != EADDRINUSE) { + return (-1); + } + } + + if (listen(ts->ts_lsn, 5) == -1) { + return (-1); + } + + return (0); +} + +int +prepare_localtcp(tsd_t *ts) +{ + int result; + struct sockaddr_in addr; + int opt = 1; + socklen_t size; + + if (ts->ts_once++ == 0) { + if (prepare_localtcp_once(ts) == -1) { + return (-1); + } + } + + ts->ts_out = socket(AF_INET, SOCK_STREAM, 0); + if (ts->ts_out == -1) { + return (-1); + } + + if (fcntl(ts->ts_out, F_SETFL, O_NDELAY) == -1) { + return (-1); + } + + result = connect(ts->ts_out, (struct sockaddr *)&ts->ts_add, + sizeof (struct sockaddr_in)); + if ((result == -1) && (errno != EINPROGRESS)) { + return (-1); + } + + if (fcntl(ts->ts_out, F_SETFL, 0) == -1) { + return (-1); + } + + size = sizeof (struct sockaddr); + result = accept(ts->ts_lsn, (struct sockaddr *)&addr, &size); + if (result == -1) { + return (-1); + } + ts->ts_out2 = result; + + if (setsockopt(ts->ts_out, IPPROTO_TCP, TCP_NODELAY, + &opt, sizeof (int)) == -1) { + return (-1); + } + + if (setsockopt(ts->ts_out2, IPPROTO_TCP, TCP_NODELAY, + &opt, sizeof (int)) == -1) { + return (-1); + } + + if (optm == MD_SINGLE) { + ts->ts_in = ts->ts_out2; + } else { + ts->ts_in = ts->ts_out; + ts->ts_in2 = ts->ts_out2; + } + + return (0); +} + +int +prepare_socketpair(tsd_t *ts) +{ + int s[2]; + + if (socketpair(PF_UNIX, SOCK_STREAM, 0, s) == -1) { + return (-1); + } + + if (optm == MD_SINGLE) { + ts->ts_in = s[0]; + ts->ts_out = s[1]; + } else { + ts->ts_in = s[0]; + ts->ts_out = s[0]; + ts->ts_in2 = s[1]; + ts->ts_out2 = s[1]; + } + + return (0); +} + +int +prepare_fifos(tsd_t *ts) +{ + char path[64]; + + (void) sprintf(path, "/private/tmp/pipe_%ld.%dA", + getpid(), pthread_self()); + if (mknod(path, 0600, S_IFIFO) == -1) { + return (-1); + } + + if (optm == MD_SINGLE) { + ts->ts_in = open(path, O_RDONLY); + ts->ts_out = open(path, O_WRONLY); + } else { + ts->ts_in = open(path, O_RDONLY); + ts->ts_out2 = open(path, O_WRONLY); + + (void) sprintf(path, "/private/tmp/pipe_%ld.%dB", + getpid(), pthread_self()); + if (mknod(path, 0600, S_IFIFO) == -1) { + return (-1); + } + + ts->ts_in2 = open(path, O_RDONLY); + ts->ts_out = open(path, O_WRONLY); + } + + return (0); +} + +/*ARGSUSED*/ +int +cleanup_fifos(tsd_t *ts) +{ + char path[64]; + + (void) sprintf(path, "/private/tmp/pipe_%ld.%dA", getpid(), pthread_self()); + (void) unlink(path); + (void) sprintf(path, "/private/tmp/pipe_%ld.%dB", getpid(), pthread_self()); + (void) unlink(path); + + return (0); +} + +int +prepare_pipes(tsd_t *ts) +{ + int p[2]; + + if (optm == MD_SINGLE) { + if (pipe(p) == -1) { + return (-1); + } + ts->ts_in = p[0]; + ts->ts_out = p[1]; + + } else { + if (pipe(p) == -1) { + return (-1); + } + ts->ts_in = p[0]; + ts->ts_out2 = p[1]; + + if (pipe(p) == -1) { + return (-1); + } + ts->ts_in2 = p[0]; + ts->ts_out = p[1]; + } + + return (0); +} + +char * +lookupa(int x, char *names[]) +{ + int i = 0; + + while (names[i] != NULL) { + if (x == i) { + return (names[i]); + } + i++; + } + return (NULL); +} + +int +lookup(char *x, char *names[]) +{ + int i = 0; + + while (names[i] != NULL) { + if (strcmp(names[i], x) == 0) { + return (i); + } + i++; + } + return (-1); +} diff --git a/tools/tests/libMicro/poll.c b/tools/tests/libMicro/poll.c new file mode 100644 index 000000000..5ef128738 --- /dev/null +++ b/tools/tests/libMicro/poll.c @@ -0,0 +1,218 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms + * of the Common Development and Distribution License + * (the "License"). You may not use this file except + * in compliance with the License. + * + * You can obtain a copy of the license at + * src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing + * permissions and limitations under the License. + * + * When distributing Covered Code, include this CDDL + * HEADER in each file and include the License file at + * usr/src/OPENSOLARIS.LICENSE. If applicable, + * add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your + * own identifying information: Portions Copyright [yyyy] + * [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#define MAX(x, y) ((x) > (y) ? (x) : (y)) +#define MIN(x, y) ((x) > (y) ? (y) : (x)) + +#include +#include +#include +#include +#include +#include + +#include "libmicro.h" + +#define DEFN 256 + +static int optn = DEFN; +static int optr = 0; +static int optw = 0; +static int optx = 0; +static int *fds; +static int target = 0; + +typedef struct pollfd pfd_t; + +typedef struct { + int ts_once; + pfd_t *ts_pfds; +} tsd_t; + +int +benchmark_init() +{ + lm_tsdsize = sizeof (tsd_t); + + (void) sprintf(lm_optstr, "n:r:w:x"); + + (void) sprintf(lm_usage, + " [-n fds-per-thread (default %d)]\n" + " [-r readable-fds (default 0)]\n" + " [-w writeable-fds (default 0)]\n" + " [-x] (start -r option with highest fd first; " + "default is lowest first)\n" + "notes: measures poll()\n", + DEFN); + + (void) sprintf(lm_header, "%8s %5s", "nfds", "flags"); + + return (0); +} + +int +benchmark_optswitch(int opt, char *optarg) +{ + switch (opt) { + case 'n': + optn = atoi(optarg); + break; + case 'r': + optr = atoi(optarg); + break; + case 'w': + optw = atoi(optarg); + break; + case 'x': + optx = 1; + break; + default: + return (-1); + } + return (0); +} + +int +benchmark_initrun() +{ + int i; + int j; + int pair[2]; + + if (optn % 2 != 0) { + (void) printf("ERROR: -n value must be even\n"); + optn = optr = optw = 0; + return (-1); + } + + if (optn < 0 || optr < 0 || optw < 0) { + (void) printf("ERROR: -n, -r and -w values must be > 0\n"); + optn = optr = optw = 0; + return (-1); + } + + if (optr > optn || optw > optn) { + (void) printf("ERROR: -r and -w values must be <= maxfd\n"); + optn = optr = optw = 0; + return (-1); + } + + fds = (int *)malloc(optn * sizeof (int)); + if (fds == NULL) { + (void) printf("ERROR: malloc() failed\n"); + optn = optr = optw = 0; + return (-1); + } + + (void) setfdlimit(optn + 10); + + + for (i = 0; i < optn; i += 2) { + if (socketpair(PF_UNIX, SOCK_STREAM, 0, pair) == -1) { + (void) printf("ERROR: socketpair() failed\n"); + return (-1); + } + + fds[i] = MIN(pair[0], pair[1]); + fds[i+1] = MAX(pair[0], pair[1]); + } + + if (optx) { + target = MIN(optr + optw, optn); + for (i = 0, j = optn - 1; i < optr; i++, j--) { + (void) write(fds[j+1 - (2*(j%2))], "", 1); + } + } else { + target = MAX(optr, optw); + for (i = 0; i < optr; i++) { + (void) write(fds[i+1 - (2*(i%2))], "", 1); + } + } + + return (0); +} + +int +benchmark_initbatch(void *tsd) +{ + tsd_t *ts = (tsd_t *)tsd; + int i; + int errors = 0; + + if (ts->ts_once++ == 0) { + ts->ts_pfds = (pfd_t *)malloc(optn * sizeof (pfd_t)); + if (ts->ts_pfds == NULL) { + errors++; + } + + for (i = 0; i < optn; i++) { + ts->ts_pfds[i].fd = fds[i]; + ts->ts_pfds[i].events = POLLIN; + } + + for (i = 0; i < optw; i++) { + ts->ts_pfds[i].events |= POLLOUT; + } + } + + return (errors); +} + +int +benchmark(void *tsd, result_t *res) +{ + tsd_t *ts = (tsd_t *)tsd; + int i; + + for (i = 0; i < lm_optB; i++) { + if (poll(ts->ts_pfds, optn, 0) != target) { + res->re_errors++; + } + } + res->re_count = i; + + return (0); +} + +char * +benchmark_result() +{ + static char result[256]; + char flags[4]; + + flags[0] = optr ? 'r' : '-'; + flags[1] = optw ? 'w' : '-'; + flags[2] = optx ? 'x' : '-'; + flags[3] = 0; + + (void) sprintf(result, "%8d %5s", optn, flags); + + return (result); +} diff --git a/tools/tests/libMicro/pread.c b/tools/tests/libMicro/pread.c new file mode 100644 index 000000000..51e15d962 --- /dev/null +++ b/tools/tests/libMicro/pread.c @@ -0,0 +1,142 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms + * of the Common Development and Distribution License + * (the "License"). You may not use this file except + * in compliance with the License. + * + * You can obtain a copy of the license at + * src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing + * permissions and limitations under the License. + * + * When distributing Covered Code, include this CDDL + * HEADER in each file and include the License file at + * usr/src/OPENSOLARIS.LICENSE. If applicable, + * add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your + * own identifying information: Portions Copyright [yyyy] + * [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#ifdef linux +#define _XOPEN_SOURCE 500 +#endif + +#include +#include +#include +#include + +#include "libmicro.h" + +typedef struct { + char *ts_buf; +} tsd_t; + +#define DEFF "/dev/zero" +#define DEFS 1024 + +static char *optf = DEFF; +static long long opts = DEFS; +static int optw = 0; +static int fd = -1; + +int +benchmark_init() +{ + lm_tsdsize = sizeof (tsd_t); + + (void) sprintf(lm_optstr, "wf:s:"); + + (void) sprintf(lm_usage, + " [-f file-to-read (default %s)]\n" + " [-s buffer-size (default %d)]\n" + " [-w (write a byte to each page after read)]\n" + "notes: measures pread()\n", + DEFF, DEFS); + + (void) sprintf(lm_header, "%8s", "size"); + + return (0); +} + +int +benchmark_optswitch(int opt, char *optarg) +{ + switch (opt) { + case 'w': + optw = getpagesize(); + break; + case 'f': + optf = optarg; + break; + case 's': + opts = sizetoll(optarg); + break; + default: + return (-1); + } + return (0); +} + +int +benchmark_initrun() +{ + fd = open(optf, O_RDONLY); + + return (0); +} + +int +benchmark_initbatch(void *tsd) +{ + tsd_t *ts = (tsd_t *)tsd; + int errors = 0; + + if (ts->ts_buf == NULL) { + ts->ts_buf = malloc(opts); + } + + return (errors); +} + +int +benchmark(void *tsd, result_t *res) +{ + tsd_t *ts = (tsd_t *)tsd; + int i; + int j; + + for (i = 0; i < lm_optB; i++) { + if (pread(fd, ts->ts_buf, opts, 0) != opts) { + res->re_errors++; + } + if (optw) { + for (j = 0; j < opts; j += optw) + ts->ts_buf[j] = 0; + } + } + res->re_count = i; + + return (0); +} + +char * +benchmark_result() +{ + static char result[256]; + + (void) sprintf(result, "%8lld", opts); + + return (result); +} diff --git a/tools/tests/libMicro/pthread_create.c b/tools/tests/libMicro/pthread_create.c new file mode 100644 index 000000000..340d35c05 --- /dev/null +++ b/tools/tests/libMicro/pthread_create.c @@ -0,0 +1,160 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms + * of the Common Development and Distribution License + * (the "License"). You may not use this file except + * in compliance with the License. + * + * You can obtain a copy of the license at + * src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing + * permissions and limitations under the License. + * + * When distributing Covered Code, include this CDDL + * HEADER in each file and include the License file at + * usr/src/OPENSOLARIS.LICENSE. If applicable, + * add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your + * own identifying information: Portions Copyright [yyyy] + * [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#include +#include +#include +#include +#include +#include + +#include "libmicro.h" + +typedef struct { + pthread_t *ts_threads; + pthread_attr_t *ts_attr; + pthread_mutex_t ts_lock; +} tsd_t; + +static int opts = 0; + +int +benchmark_init() +{ + lm_defN = "pthread"; + + lm_tsdsize = sizeof (tsd_t); + + (void) sprintf(lm_usage, + " [-s stacksize] (specify stacksize)\n" + "notes: measures pthread_create\n"); + + (void) sprintf(lm_optstr, "s:"); + + return (0); +} + +int +benchmark_optswitch(int opt, char *optarg) +{ + switch (opt) { + case 's': + opts = sizetoll(optarg); + break; + default: + return (-1); + } + + return (0); +} + +int +benchmark_initworker(void *tsd) +{ + tsd_t *ts = (tsd_t *)tsd; + int errors = 0; + + ts->ts_threads = calloc(lm_optB, sizeof (pthread_t)); + (void) pthread_mutex_init(&ts->ts_lock, NULL); + + if (opts) { + ts->ts_attr = malloc(sizeof (pthread_attr_t)); + (void) pthread_attr_init(ts->ts_attr); + if ((errors = pthread_attr_setstacksize(ts->ts_attr, opts)) + != 0) { + errno = errors; + perror("pthread_attr_setstacksize"); + } + } else + ts->ts_attr = NULL; + + return (errors?1:0); +} + +int +benchmark_initbatch(void *tsd) +{ + tsd_t *ts = (tsd_t *)tsd; + + (void) pthread_mutex_lock(&ts->ts_lock); + + return (0); +} + + +void * +func(void *tsd) +{ + tsd_t *ts = (tsd_t *)tsd; + + (void) pthread_mutex_lock(&ts->ts_lock); + (void) pthread_mutex_unlock(&ts->ts_lock); + + return (tsd); +} + +int +benchmark(void *tsd, result_t *res) +{ + int i; + tsd_t *ts = (tsd_t *)tsd; + int error; + + for (i = 0; i < lm_optB; i++) { + if ((error = pthread_create(ts->ts_threads + i, + ts->ts_attr, func, tsd)) != 0) { + errno = error; + perror("pthread_create"); + ts->ts_threads[i] = 0; + res->re_errors++; + return (0); + } + } + res->re_count = lm_optB; + + return (0); +} + +int +benchmark_finibatch(void *tsd) +{ + tsd_t *ts = (tsd_t *)tsd; + int i; + int errors = 0; + + (void) pthread_mutex_unlock(&ts->ts_lock); + + for (i = 0; i < lm_optB; i++) + if (ts->ts_threads[i] == 0 || + pthread_join(ts->ts_threads[i], NULL) < 0) { + errors++; + } + return (errors); +} diff --git a/tools/tests/libMicro/pwrite.c b/tools/tests/libMicro/pwrite.c new file mode 100644 index 000000000..ab1374c6b --- /dev/null +++ b/tools/tests/libMicro/pwrite.c @@ -0,0 +1,154 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms + * of the Common Development and Distribution License + * (the "License"). You may not use this file except + * in compliance with the License. + * + * You can obtain a copy of the license at + * src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing + * permissions and limitations under the License. + * + * When distributing Covered Code, include this CDDL + * HEADER in each file and include the License file at + * usr/src/OPENSOLARIS.LICENSE. If applicable, + * add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your + * own identifying information: Portions Copyright [yyyy] + * [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#ifdef linux +#define _XOPEN_SOURCE 500 +#endif + +#include +#include +#include +#include + +#include "libmicro.h" + +typedef struct { + char *ts_buf; +} tsd_t; + +#define DEFF "/dev/null" +#define DEFS 1024 + +static int optc = 0; +static char *optf = DEFF; +static long long opts = DEFS; +static int fd = -1; + +int +benchmark_init() +{ + lm_tsdsize = sizeof (tsd_t); + + (void) sprintf(lm_optstr, "cf:s:"); + + (void) sprintf(lm_usage, + " [-f file-to-write (default %s)]\n" + " [-s buffer-size (default %d)]\n" + " [-c ] (make sure buffer is in cache)\n" + "notes: measures pwrite()\n", + DEFF, DEFS); + + (void) sprintf(lm_header, "%8s", "size"); + + return (0); +} + +int +benchmark_optswitch(int opt, char *optarg) +{ + switch (opt) { + case 'c': + optc++; + break; + case 'f': + optf = optarg; + break; + case 's': + opts = sizetoll(optarg); + break; + default: + return (-1); + } + return (0); +} + +int +benchmark_initrun() +{ + fd = open(optf, O_WRONLY); + if (fd == -1) { + return (-1); + } + + return (0); +} + +int +benchmark_finirun() +{ + return (0); +} + +int +benchmark_initbatch(void *tsd) +{ + tsd_t *ts = (tsd_t *)tsd; + int i; + + if (ts->ts_buf == NULL) { + ts->ts_buf = malloc(opts); + + /* + * bring buf into cache if specified. + */ + + if (optc) + for (i = 0; i < opts; i++) + ts->ts_buf[i] = 0; + } + + return (0); +} + +int +benchmark(void *tsd, result_t *res) +{ + tsd_t *ts = (tsd_t *)tsd; + int i; + + for (i = 0; i < lm_optB; i++) { + if (pwrite(fd, ts->ts_buf, opts, 0) != opts) { + res->re_errors++; + } + } + res->re_count = i; + + return (0); +} + +char * +benchmark_result() +{ + static char result[256]; + + (void) sprintf(result, "%8lld", opts); + + return (result); +} diff --git a/tools/tests/libMicro/read.c b/tools/tests/libMicro/read.c new file mode 100644 index 000000000..32faf05ac --- /dev/null +++ b/tools/tests/libMicro/read.c @@ -0,0 +1,143 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms + * of the Common Development and Distribution License + * (the "License"). You may not use this file except + * in compliance with the License. + * + * You can obtain a copy of the license at + * src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing + * permissions and limitations under the License. + * + * When distributing Covered Code, include this CDDL + * HEADER in each file and include the License file at + * usr/src/OPENSOLARIS.LICENSE. If applicable, + * add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your + * own identifying information: Portions Copyright [yyyy] + * [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + + +#ifdef linux +#define _XOPEN_SOURCE 500 +#endif + +#include +#include +#include +#include + +#include "libmicro.h" + +typedef struct { + char *ts_buf; + int ts_fd; +} tsd_t; + +#define DEFF "/dev/zero" +#define DEFS 1024 + +static char *optf = DEFF; +static long long opts = DEFS; +int optw = 0; + +int +benchmark_init() +{ + lm_tsdsize = sizeof (tsd_t); + + (void) sprintf(lm_optstr, "f:s:w"); + + (void) sprintf(lm_usage, + " [-f file-to-read (default %s)]\n" + " [-s buffer-size (default %d)]\n" + " [-w (store a byte to each page after read)]\n" + "notes: measures read()\n", + DEFF, DEFS); + + (void) sprintf(lm_header, "%8s", "size"); + + lm_defB = 1; + + return (0); +} + +int +benchmark_optswitch(int opt, char *optarg) +{ + switch (opt) { + case 'w': + optw = getpagesize(); + break; + case 'f': + optf = optarg; + break; + case 's': + opts = sizetoll(optarg); + break; + default: + return (-1); + } + return (0); +} + +int +benchmark_initrun() +{ + return (0); +} + +int +benchmark_initbatch(void *tsd) +{ + tsd_t *ts = (tsd_t *)tsd; + + if (ts->ts_buf == NULL) { + ts->ts_buf = malloc(opts); + ts->ts_fd = open(optf, O_RDONLY); + } + + (void) lseek(ts->ts_fd, 0, SEEK_SET); + + return (0); +} + +int +benchmark(void *tsd, result_t *res) +{ + tsd_t *ts = (tsd_t *)tsd; + int i; + int j; + for (i = 0; i < lm_optB; i++) { + if (read(ts->ts_fd, ts->ts_buf, opts) != opts) { + res->re_errors++; + } + if (optw) + for (j = 0; j < opts; j += optw) + ts->ts_buf[j] = 0; + } + res->re_count = i; + + return (0); +} + +char * +benchmark_result() +{ + static char result[256]; + + (void) sprintf(result, "%8lld", opts); + + return (result); +} diff --git a/tools/tests/libMicro/realpath.c b/tools/tests/libMicro/realpath.c new file mode 100644 index 000000000..540ebf77a --- /dev/null +++ b/tools/tests/libMicro/realpath.c @@ -0,0 +1,85 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms + * of the Common Development and Distribution License + * (the "License"). You may not use this file except + * in compliance with the License. + * + * You can obtain a copy of the license at + * src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing + * permissions and limitations under the License. + * + * When distributing Covered Code, include this CDDL + * HEADER in each file and include the License file at + * usr/src/OPENSOLARIS.LICENSE. If applicable, + * add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your + * own identifying information: Portions Copyright [yyyy] + * [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + + +#include +#include +#include + +#include "libmicro.h" + +#define DEFF "/" +#define MAXPATHLEN 1024 + +static char *optf = DEFF; + +int +benchmark_init() +{ + (void) sprintf(lm_optstr, "f:"); + + lm_tsdsize = 0; + + (void) sprintf(lm_usage, + " [-f directory (default = %s)]\n" + "notes: measures realpath()\n", + DEFF); + + return (0); +} + +int +benchmark_optswitch(int opt, char *optarg) +{ + switch (opt) { + case 'f': + optf = optarg; + break; + default: + return (-1); + } + return (0); +} + +/*ARGSUSED*/ +int +benchmark(void *tsd, result_t *res) +{ + int i; + char path[MAXPATHLEN]; + + for (i = 0; i < lm_optB; i++) { + if (realpath(optf, path) == NULL) + res->re_errors++; + } + res->re_count = i; + + return (0); +} diff --git a/tools/tests/libMicro/recurse.c b/tools/tests/libMicro/recurse.c new file mode 100644 index 000000000..3237b465b --- /dev/null +++ b/tools/tests/libMicro/recurse.c @@ -0,0 +1,98 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms + * of the Common Development and Distribution License + * (the "License"). You may not use this file except + * in compliance with the License. + * + * You can obtain a copy of the license at + * src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing + * permissions and limitations under the License. + * + * When distributing Covered Code, include this CDDL + * HEADER in each file and include the License file at + * usr/src/OPENSOLARIS.LICENSE. If applicable, + * add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your + * own identifying information: Portions Copyright [yyyy] + * [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + + +#include +#include +#include +#include + +#include "libmicro.h" + +#define DEFD 100 + +static int optd = DEFD; + +int recurse2(int x, int y, char *s); + +/*ARGSUSED*/ +int +recurse1(int x, int y, char *s) +{ + char str[32]; + + if (x < y) { + return (recurse2(x + 1, y, str)); + } + + return (x); +} + +int +benchmark_init() +{ + lm_tsdsize = 0; + + (void) sprintf(lm_optstr, "d:"); + + (void) sprintf(lm_usage, + " [-d depth-limit (default = %d)]\n" + "notes: measures recursion performance\n", + DEFD); + + return (0); +} + +int +benchmark_optswitch(int opt, char *optarg) +{ + switch (opt) { + case 'd': + optd = atoi(optarg); + break; + default: + return (-1); + } + return (0); +} + +/*ARGSUSED*/ +int +benchmark(void *tsd, result_t *res) +{ + int i; + + for (i = 0; i < lm_optB; i++) { + (void) recurse1(0, optd, NULL); + } + res->re_count = i; + + return (0); +} diff --git a/tools/tests/libMicro/recurse2.c b/tools/tests/libMicro/recurse2.c new file mode 100644 index 000000000..9f228900d --- /dev/null +++ b/tools/tests/libMicro/recurse2.c @@ -0,0 +1,46 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms + * of the Common Development and Distribution License + * (the "License"). You may not use this file except + * in compliance with the License. + * + * You can obtain a copy of the license at + * src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing + * permissions and limitations under the License. + * + * When distributing Covered Code, include this CDDL + * HEADER in each file and include the License file at + * usr/src/OPENSOLARIS.LICENSE. If applicable, + * add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your + * own identifying information: Portions Copyright [yyyy] + * [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#include + +int recurse1(int x, int y, char *s); + +/*ARGSUSED*/ +int +recurse2(int x, int y, char *s) +{ + char str[32]; + + if (x < y) { + return (recurse1(x + 1, y, str)); + } + + return (x); +} diff --git a/tools/tests/libMicro/select.c b/tools/tests/libMicro/select.c new file mode 100644 index 000000000..5edc37edb --- /dev/null +++ b/tools/tests/libMicro/select.c @@ -0,0 +1,214 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms + * of the Common Development and Distribution License + * (the "License"). You may not use this file except + * in compliance with the License. + * + * You can obtain a copy of the license at + * src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing + * permissions and limitations under the License. + * + * When distributing Covered Code, include this CDDL + * HEADER in each file and include the License file at + * usr/src/OPENSOLARIS.LICENSE. If applicable, + * add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your + * own identifying information: Portions Copyright [yyyy] + * [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#define MAX(x, y) ((x) > (y) ? (x) : (y)) +#define MIN(x, y) ((x) > (y) ? (y) : (x)) + +#include +#include +#include +#include +#include +#include + +#include "libmicro.h" + +#define DEFN 256 + +static int optn = DEFN; +static int optr = 0; +static int optw = 0; +static int optx = 0; +static int *fds; +static fd_set iset; +static fd_set oset; +static int maxfd = 0; +static int target = 0; + +int +benchmark_init() +{ + (void) sprintf(lm_optstr, "n:r:w:x"); + + lm_tsdsize = 0; + + (void) sprintf(lm_usage, + " [-n fds-per-thread (default %d)]\n" + " [-r readable-fds (default 0)]\n" + " [-w writeable-fds (default 0)]\n" + " [-x] (start -r option with highest fd first; " + "default is lowest first)\n" + "notes: measures select()\n", + DEFN); + + (void) sprintf(lm_header, "%8s %5s", "maxfd", "flags"); + + return (0); +} + +int +benchmark_optswitch(int opt, char *optarg) +{ + switch (opt) { + case 'n': + optn = atoi(optarg); + break; + case 'r': + optr = atoi(optarg); + break; + case 'w': + optw = atoi(optarg); + break; + case 'x': + optx = 1; + break; + default: + return (-1); + } + return (0); +} + +int +benchmark_initrun() +{ + int i; + int j; + int pair[2]; + + if (optn % 2 != 0) { + (void) printf("ERROR: -n value must be even\n"); + optn = optr = optw = 0; + return (-1); + } + + if (optn < 0 || optr < 0 || optw < 0) { + (void) printf("ERROR: -n, -r and -w values must be > 0\n"); + optn = optr = optw = 0; + return (-1); + } + + if (optr > optn || optw > optn) { + (void) printf("ERROR: -r and -w values must be <= maxfd\n"); + optn = optr = optw = 0; + return (-1); + } + + fds = (int *)malloc(optn * sizeof (int)); + if (fds == NULL) { + (void) printf("ERROR: malloc() failed\n"); + optn = optr = optw = 0; + return (-1); + } + + (void) setfdlimit(optn + 10); + + target = optr + optw; + + FD_ZERO(&iset); + FD_ZERO(&oset); + + for (i = 0; i < optn; i += 2) { + if (socketpair(PF_UNIX, SOCK_STREAM, 0, pair) == -1) { + (void) printf("ERROR: socketpair() failed\n"); + return (-1); + } + + fds[i] = MIN(pair[0], pair[1]); + fds[i+1] = MAX(pair[0], pair[1]); + maxfd = fds[i+1] + 1; + + if (maxfd > FD_SETSIZE) { + (void) printf("WARNING: FD_SETSIZE is too small!\n"); + return (-1); + } + + FD_SET(fds[i], &iset); + FD_SET(fds[i+1], &iset); + } + + for (i = 0; i < optw; i++) { + FD_SET(fds[i], &oset); + } + if (optx) { + for (i = 0, j = optn - 1; i < optr; i++, j--) { + (void) write(fds[j+1 - (2*(j%2))], "", 1); + } + } else { + for (i = 0; i < optr; i++) { + (void) write(fds[i+1 - (2*(i%2))], "", 1); + } + } + + return (0); +} + +/*ARGSUSED*/ +int +benchmark(void *tsd, result_t *res) +{ + int i; + fd_set set1; + fd_set set2; + fd_set *my_iset = &set1; + fd_set *my_oset = NULL; + struct timeval tv = {0, 0}; + + if (optw) { + my_oset = &set2; + } + + for (i = 0; i < lm_optB; i++) { + (void) memcpy(&set1, &iset, sizeof (fd_set)); + (void) memcpy(&set2, &oset, sizeof (fd_set)); + + if (select(maxfd, my_iset, my_oset, NULL, &tv) != target) { + res->re_errors++; + } + } + res->re_count = i; + + return (0); +} + +char * +benchmark_result() +{ + static char result[256]; + char flags[4]; + + flags[0] = optr ? 'r' : '-'; + flags[1] = optw ? 'w' : '-'; + flags[2] = optx ? 'x' : '-'; + flags[3] = 0; + + (void) sprintf(result, "%8d %5s", optn, flags); + + return (result); +} diff --git a/tools/tests/libMicro/semop.c b/tools/tests/libMicro/semop.c new file mode 100644 index 000000000..9953a799d --- /dev/null +++ b/tools/tests/libMicro/semop.c @@ -0,0 +1,103 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms + * of the Common Development and Distribution License + * (the "License"). You may not use this file except + * in compliance with the License. + * + * You can obtain a copy of the license at + * src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing + * permissions and limitations under the License. + * + * When distributing Covered Code, include this CDDL + * HEADER in each file and include the License file at + * usr/src/OPENSOLARIS.LICENSE. If applicable, + * add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your + * own identifying information: Portions Copyright [yyyy] + * [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#include +#include +#include +#include +#include +#include + +#include "libmicro.h" + + +typedef struct { + int ts_semid; +} tsd_t; + +int +benchmark_init() +{ + lm_tsdsize = sizeof (tsd_t); + + (void) sprintf(lm_usage, "notes: measures semop()\n"); + + return (0); +} + +int +benchmark_initbatch(void *tsd) +{ + + tsd_t *ts = (tsd_t *)tsd; + + if ((ts->ts_semid = semget(IPC_PRIVATE, 2, 0600)) == -1) { + return (-1); + } + + return (0); +} + +int +benchmark_finibatch(void *tsd) +{ + tsd_t *ts = (tsd_t *)tsd; + + (void) semctl(ts->ts_semid, 0, IPC_RMID); + + return (0); +} + +int +benchmark(void *tsd, result_t *res) +{ + int i; + tsd_t *ts = (tsd_t *)tsd; + struct sembuf s[1]; + + for (i = 0; i < lm_optB; i++) { + s[0].sem_num = 0; + s[0].sem_op = 1; + s[0].sem_flg = 0; + if (semop(ts->ts_semid, s, 1) == -1) { + res->re_errors++; + } + s[0].sem_num = 0; + s[0].sem_op = -1; + s[0].sem_flg = 0; + if (semop(ts->ts_semid, s, 1) == -1) { + res->re_errors++; + } + } + + res->re_count += lm_optB; + + return (0); +} diff --git a/tools/tests/libMicro/setcontext.c b/tools/tests/libMicro/setcontext.c new file mode 100644 index 000000000..dad284982 --- /dev/null +++ b/tools/tests/libMicro/setcontext.c @@ -0,0 +1,67 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms + * of the Common Development and Distribution License + * (the "License"). You may not use this file except + * in compliance with the License. + * + * You can obtain a copy of the license at + * src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing + * permissions and limitations under the License. + * + * When distributing Covered Code, include this CDDL + * HEADER in each file and include the License file at + * usr/src/OPENSOLARIS.LICENSE. If applicable, + * add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your + * own identifying information: Portions Copyright [yyyy] + * [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#include +#include +#include +#include + +#include "libmicro.h" + +int +benchmark_init() +{ + + (void) sprintf(lm_usage, "notes: measures setcontext()\n"); + + lm_tsdsize = 0; + + return (0); +} + +/*ARGSUSED*/ +int +benchmark(void *tsd, result_t *res) +{ + volatile int i; + + ucontext_t uc; + + i = 0; + + (void) getcontext(&uc); + + if (i++ < lm_optB) + (void) setcontext(&uc); + + res->re_count += lm_optB; + + return (0); +} diff --git a/tools/tests/libMicro/setsockopt.c b/tools/tests/libMicro/setsockopt.c new file mode 100644 index 000000000..c48672efd --- /dev/null +++ b/tools/tests/libMicro/setsockopt.c @@ -0,0 +1,99 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms + * of the Common Development and Distribution License + * (the "License"). You may not use this file except + * in compliance with the License. + * + * You can obtain a copy of the license at + * src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing + * permissions and limitations under the License. + * + * When distributing Covered Code, include this CDDL + * HEADER in each file and include the License file at + * usr/src/OPENSOLARIS.LICENSE. If applicable, + * add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your + * own identifying information: Portions Copyright [yyyy] + * [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "libmicro.h" + +typedef struct { + int ts_fd; +} tsd_t; + +int +benchmark_init() +{ + lm_tsdsize = sizeof (tsd_t); + + (void) sprintf(lm_usage, "setsockopt(TCP_NODELAY)\n"); + + return (0); +} + +int +benchmark_initbatch(void *tsd) +{ + + tsd_t *ts = (tsd_t *)tsd; + + if ((ts->ts_fd = socket(AF_INET, SOCK_STREAM, 0)) < 0) + return (1); + return (0); +} + +int +benchmark_finibatch(void *tsd) +{ + tsd_t *ts = (tsd_t *)tsd; + + (void) close(ts->ts_fd); + return (0); +} + +int +benchmark(void *tsd, result_t *res) +{ + int i; + tsd_t *ts = (tsd_t *)tsd; + int opt; + + res->re_errors = 0; + + for (i = 0; i < lm_optB; i++) { + opt = 1 & i; + if (setsockopt(ts->ts_fd, IPPROTO_TCP, TCP_NODELAY, + &opt, sizeof (int)) == -1) { + res->re_errors ++; + } + } + res->re_count += lm_optB; + + return (0); +} diff --git a/tools/tests/libMicro/sigaction.c b/tools/tests/libMicro/sigaction.c new file mode 100644 index 000000000..b0053b3e6 --- /dev/null +++ b/tools/tests/libMicro/sigaction.c @@ -0,0 +1,95 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms + * of the Common Development and Distribution License + * (the "License"). You may not use this file except + * in compliance with the License. + * + * You can obtain a copy of the license at + * src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing + * permissions and limitations under the License. + * + * When distributing Covered Code, include this CDDL + * HEADER in each file and include the License file at + * usr/src/OPENSOLARIS.LICENSE. If applicable, + * add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your + * own identifying information: Portions Copyright [yyyy] + * [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#include +#include +#include +#include +#include + +#include "libmicro.h" + +#ifdef __sun +static void +nop() +{ +} +#else +static void +nop(int sig) +{ +} +#endif + + +typedef struct { + struct sigaction ts_act; +} tsd_t; + +int +benchmark_init() +{ + lm_tsdsize = sizeof (tsd_t); + + (void) sprintf(lm_usage, "notes: measures sigaction()\n"); + + return (0); +} + +int +benchmark_initbatch(void *tsd) +{ + + tsd_t *ts = (tsd_t *)tsd; + ts->ts_act.sa_handler = nop; + ts->ts_act.sa_flags = 0; + (void) sigemptyset(&ts->ts_act.sa_mask); + + return (0); +} + +int +benchmark(void *tsd, result_t *res) +{ + int i; + tsd_t *ts = (tsd_t *)tsd; + struct sigaction oact; + + res->re_errors = 0; + + for (i = 0; i < lm_optB; i++) { + if (sigaction(SIGUSR1, &ts->ts_act, &oact)) + res->re_errors++; + } + + res->re_count += lm_optB; + + return (0); +} diff --git a/tools/tests/libMicro/siglongjmp.c b/tools/tests/libMicro/siglongjmp.c new file mode 100644 index 000000000..b4dfd160e --- /dev/null +++ b/tools/tests/libMicro/siglongjmp.c @@ -0,0 +1,69 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms + * of the Common Development and Distribution License + * (the "License"). You may not use this file except + * in compliance with the License. + * + * You can obtain a copy of the license at + * src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing + * permissions and limitations under the License. + * + * When distributing Covered Code, include this CDDL + * HEADER in each file and include the License file at + * usr/src/OPENSOLARIS.LICENSE. If applicable, + * add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your + * own identifying information: Portions Copyright [yyyy] + * [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#include +#include +#include +#include + +#include "libmicro.h" + +typedef struct { + jmp_buf ts_env; +} tsd_t; + +int +benchmark_init() +{ + lm_tsdsize = sizeof (tsd_t); + + lm_tsdsize = 0; + + (void) sprintf(lm_usage, "notes: measures siglongjmp()\n"); + + return (0); +} + +int +benchmark(void *tsd, result_t *res) +{ + tsd_t *ts = (tsd_t *)tsd; + + int i = 0; + + (void) sigsetjmp(ts->ts_env, 1); + + if (i++ < lm_optB) + siglongjmp(ts->ts_env, 0); + + res->re_count = lm_optB; + + return (0); +} diff --git a/tools/tests/libMicro/signal.c b/tools/tests/libMicro/signal.c new file mode 100644 index 000000000..623aa08e9 --- /dev/null +++ b/tools/tests/libMicro/signal.c @@ -0,0 +1,100 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms + * of the Common Development and Distribution License + * (the "License"). You may not use this file except + * in compliance with the License. + * + * You can obtain a copy of the license at + * src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing + * permissions and limitations under the License. + * + * When distributing Covered Code, include this CDDL + * HEADER in each file and include the License file at + * usr/src/OPENSOLARIS.LICENSE. If applicable, + * add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your + * own identifying information: Portions Copyright [yyyy] + * [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#include +#include +#include +#include +#include + +#include "libmicro.h" + + +#ifdef __sun +static void +nop() +{ +} +#else +static void +nop(int sig) +{ +} +#endif + +int +benchmark_init() +{ + lm_tsdsize = 0; + + (void) sprintf(lm_usage, "notes: measures signal()\n"); + + return (0); +} + +int +benchmark_initrun() +{ + struct sigaction act; + + act.sa_handler = nop; + act.sa_flags = 0; + + (void) sigemptyset(&act.sa_mask); + (void) sigaction(SIGUSR1, &act, NULL); + + return (0); +} + +/*ARGSUSED*/ +int +benchmark(void *tsd, result_t *res) +{ + int i; + int pid; + + pid = getpid(); + + for (i = 0; i < lm_optB; i += 10) { + (void) kill(pid, SIGUSR1); + (void) kill(pid, SIGUSR1); + (void) kill(pid, SIGUSR1); + (void) kill(pid, SIGUSR1); + (void) kill(pid, SIGUSR1); + (void) kill(pid, SIGUSR1); + (void) kill(pid, SIGUSR1); + (void) kill(pid, SIGUSR1); + (void) kill(pid, SIGUSR1); + (void) kill(pid, SIGUSR1); + } + res->re_count += i; + + return (0); +} diff --git a/tools/tests/libMicro/sigprocmask.c b/tools/tests/libMicro/sigprocmask.c new file mode 100644 index 000000000..b94675e5f --- /dev/null +++ b/tools/tests/libMicro/sigprocmask.c @@ -0,0 +1,84 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms + * of the Common Development and Distribution License + * (the "License"). You may not use this file except + * in compliance with the License. + * + * You can obtain a copy of the license at + * src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing + * permissions and limitations under the License. + * + * When distributing Covered Code, include this CDDL + * HEADER in each file and include the License file at + * usr/src/OPENSOLARIS.LICENSE. If applicable, + * add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your + * own identifying information: Portions Copyright [yyyy] + * [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#include +#include +#include +#include + +#include "libmicro.h" + +int +benchmark_init() +{ + lm_tsdsize = 0; + + (void) sprintf(lm_usage, "notes: measures sigprocmask()\n"); + + return (0); +} + +int +benchmark_initrun() +{ + sigset_t iset; + + (void) sigemptyset(&iset); + (void) sigprocmask(SIG_SETMASK, &iset, NULL); + + return (0); +} +/*ARGSUSED*/ +int +benchmark(void *tsd, result_t *res) +{ + int i; + sigset_t set0, set1; + + (void) sigemptyset(&set0); + (void) sigaddset(&set0, SIGTERM); + + for (i = 0; i < lm_optB; i += 10) { + (void) sigprocmask(SIG_SETMASK, &set0, &set1); + (void) sigprocmask(SIG_SETMASK, &set1, &set0); + (void) sigprocmask(SIG_SETMASK, &set0, &set1); + (void) sigprocmask(SIG_SETMASK, &set1, &set0); + (void) sigprocmask(SIG_SETMASK, &set0, &set1); + (void) sigprocmask(SIG_SETMASK, &set1, &set0); + (void) sigprocmask(SIG_SETMASK, &set0, &set1); + (void) sigprocmask(SIG_SETMASK, &set1, &set0); + (void) sigprocmask(SIG_SETMASK, &set0, &set1); + (void) sigprocmask(SIG_SETMASK, &set1, &set0); + } + + res->re_count += i; + + return (0); +} diff --git a/tools/tests/libMicro/socket.c b/tools/tests/libMicro/socket.c new file mode 100644 index 000000000..496966dcd --- /dev/null +++ b/tools/tests/libMicro/socket.c @@ -0,0 +1,157 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms + * of the Common Development and Distribution License + * (the "License"). You may not use this file except + * in compliance with the License. + * + * You can obtain a copy of the license at + * src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing + * permissions and limitations under the License. + * + * When distributing Covered Code, include this CDDL + * HEADER in each file and include the License file at + * usr/src/OPENSOLARIS.LICENSE. If applicable, + * add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your + * own identifying information: Portions Copyright [yyyy] + * [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#include +#include +#include +#include +#include + +#include "libmicro.h" + +typedef struct { + int ts_once; + int *ts_fds; +} tsd_t; + +#define DEFF "PF_UNIX" + +static char *optf = DEFF; +static int family; + +int +lookup_family(char *name) +{ + if (strcmp("PF_UNIX", name) == 0) { + return (PF_UNIX); + } else if (strcmp("PF_INET", name) == 0) { + return (PF_INET); + } else if (strcmp("PF_INET6", name) == 0) { + return (PF_INET6); + } + + return (-1); +} + +int +benchmark_init() +{ + lm_tsdsize = sizeof (tsd_t); + + lm_defB = 256; + + (void) sprintf(lm_optstr, "f:n"); + + (void) sprintf(lm_usage, + " [-f socket-family (default %s)]\n" + "notes: measures socket\n", + DEFF); + + return (0); +} + +int +benchmark_optswitch(int opt, char *optarg) +{ + switch (opt) { + case 'f': + optf = optarg; + break; + default: + return (-1); + } + + return (0); +} + + +int +benchmark_initrun() +{ + (void) setfdlimit(lm_optB * lm_optT + 10); + family = lookup_family(optf); + + return (0); +} + +int +benchmark_finirun() +{ + return (0); +} + +int +benchmark_initbatch(void *tsd) +{ + int i; + tsd_t *ts = (tsd_t *)tsd; + + if (ts->ts_once++ == 0) { + ts->ts_fds = (int *)malloc(lm_optB * sizeof (int)); + if (ts->ts_fds == NULL) { + return (1); + } + for (i = 0; i < lm_optB; i++) { + ts->ts_fds[i] = -1; + } + } + + return (0); +} + +int +benchmark(void *tsd, result_t *res) +{ + int i; + tsd_t *ts = (tsd_t *)tsd; + + for (i = 0; i < lm_optB; i++) { + ts->ts_fds[i] = socket(family, SOCK_STREAM, 0); + if (ts->ts_fds[i] == -1) { + res->re_errors++; + } + } + res->re_count += lm_optB; + + return (0); +} + +int +benchmark_finibatch(void *tsd) +{ + int i; + tsd_t *ts = (tsd_t *)tsd; + + for (i = 0; i < lm_optB; i++) { + (void) close(ts->ts_fds[i]); + } + + return (0); +} diff --git a/tools/tests/libMicro/socketpair.c b/tools/tests/libMicro/socketpair.c new file mode 100644 index 000000000..59e7edb7d --- /dev/null +++ b/tools/tests/libMicro/socketpair.c @@ -0,0 +1,118 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms + * of the Common Development and Distribution License + * (the "License"). You may not use this file except + * in compliance with the License. + * + * You can obtain a copy of the license at + * src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing + * permissions and limitations under the License. + * + * When distributing Covered Code, include this CDDL + * HEADER in each file and include the License file at + * usr/src/OPENSOLARIS.LICENSE. If applicable, + * add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your + * own identifying information: Portions Copyright [yyyy] + * [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#include +#include +#include +#include +#include + +#include "libmicro.h" + +typedef struct { + int ts_once; + int *ts_fds; +} tsd_t; + +#define DEFN 256 + + +int +benchmark_init() +{ + lm_tsdsize = sizeof (tsd_t); + + lm_defB = 256; + + (void) sprintf(lm_usage, + "notes: measures socketpair\n"); + + return (0); +} + + +int +benchmark_initrun() +{ + (void) setfdlimit(lm_optB * lm_optT + 10); + return (0); +} + +int +benchmark_initbatch(void *tsd) +{ + int i; + tsd_t *ts = (tsd_t *)tsd; + + if (ts->ts_once++ == 0) { + ts->ts_fds = (int *)malloc(lm_optB * sizeof (int)); + if (ts->ts_fds == NULL) { + return (1); + } + for (i = 0; i < lm_optB; i++) { + ts->ts_fds[i] = -1; + } + } + + return (0); +} + +int +benchmark(void *tsd, result_t *res) +{ + int i; + tsd_t *ts = (tsd_t *)tsd; + + res->re_count = 0; + res->re_errors = 0; + + for (i = 0; i < lm_optB; i += 2) { + if (socketpair(PF_UNIX, SOCK_STREAM, 0, &ts->ts_fds[i]) + == -1) { + res->re_errors++; + } + } + res->re_count = i / 2; + + return (0); +} + +int +benchmark_finibatch(void *tsd) +{ + int i; + tsd_t *ts = (tsd_t *)tsd; + + for (i = 0; i < lm_optB; i++) { + (void) close(ts->ts_fds[i]); + } + + return (0); +} diff --git a/tools/tests/libMicro/stat.c b/tools/tests/libMicro/stat.c new file mode 100644 index 000000000..cffcceed4 --- /dev/null +++ b/tools/tests/libMicro/stat.c @@ -0,0 +1,88 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms + * of the Common Development and Distribution License + * (the "License"). You may not use this file except + * in compliance with the License. + * + * You can obtain a copy of the license at + * src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing + * permissions and limitations under the License. + * + * When distributing Covered Code, include this CDDL + * HEADER in each file and include the License file at + * usr/src/OPENSOLARIS.LICENSE. If applicable, + * add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your + * own identifying information: Portions Copyright [yyyy] + * [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#include +#include +#include +#include +#include + +#include "libmicro.h" + +#define DEFF "/dev/null" +static char *optf = DEFF; + +int +benchmark_init() +{ + + (void) sprintf(lm_optstr, "f:"); + + lm_tsdsize = 0; + + (void) sprintf(lm_usage, + " [-f file-to-stat (default %s)]\n" + "notes: measures stat()\n", + DEFF); + + return (0); +} + +int +benchmark_optswitch(int opt, char *optarg) +{ + switch (opt) { + case 'f': + optf = optarg; + break; + default: + return (-1); + } + return (0); +} + +/*ARGSUSED*/ +int +benchmark(void *tsd, result_t *res) +{ + int i; + struct stat sbuf; + + res->re_errors = 0; + + for (i = 0; i < lm_optB; i++) { + if (stat(optf, &sbuf) == -1) + res->re_errors++; + } + + res->re_count += lm_optB; + + return (0); +} diff --git a/tools/tests/libMicro/strcasecmp.c b/tools/tests/libMicro/strcasecmp.c new file mode 100644 index 000000000..c57137b76 --- /dev/null +++ b/tools/tests/libMicro/strcasecmp.c @@ -0,0 +1,150 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms + * of the Common Development and Distribution License + * (the "License"). You may not use this file except + * in compliance with the License. + * + * You can obtain a copy of the license at + * src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing + * permissions and limitations under the License. + * + * When distributing Covered Code, include this CDDL + * HEADER in each file and include the License file at + * usr/src/OPENSOLARIS.LICENSE. If applicable, + * add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your + * own identifying information: Portions Copyright [yyyy] + * [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#include +#include +#include +#include + +#include "libmicro.h" + + +static int unaligned = 0; +static int opts = 100; + +typedef struct { + int ts_once; + char *ts_a; + char *ts_b; + int ts_fakegcc; +} tsd_t; + +int +benchmark_init() +{ + + lm_tsdsize = sizeof (tsd_t); + + (void) sprintf(lm_optstr, "s:n"); + + (void) sprintf(lm_usage, + " [-s string size (default %d)]\n" + " [-n causes unaligned cmp]\n" + "notes: measures strcasecmp()\n", + opts); + + (void) sprintf(lm_header, "%8s", "size"); + + return (0); +} + +int +benchmark_optswitch(int opt, char *optarg) +{ + switch (opt) { + case 'n': + unaligned = 1; + break; + case 's': + opts = sizetoll(optarg); + break; + default: + return (-1); + } + return (0); +} + +int +benchmark_initbatch(void *tsd) +{ + tsd_t *ts = (tsd_t *)tsd; + + static char *demo = + "The quick brown fox jumps over the lazy dog."; + + if (ts->ts_once++ == 0) { + int l = strlen(demo); + int i; + + ts->ts_a = malloc(opts + 1); + ts->ts_b = malloc(opts + 1 + unaligned); + ts->ts_b += unaligned; + + for (i = 0; i < opts; i++) { + ts->ts_a[i] = ts->ts_b[i] = demo[i%l]; + } + ts->ts_a[opts] = 0; + ts->ts_b[opts] = 0; + } + return (0); +} + +int +benchmark(void *tsd, result_t *res) +{ + int i; + tsd_t *ts = (tsd_t *)tsd; + + char *src = ts->ts_a; + char *src2 = ts->ts_b; + int *sum = &ts->ts_fakegcc; + + res->re_errors = 0; + + for (i = 0; i < lm_optB; i += 10) { + *sum += strcasecmp(src, src2); + *sum += strcasecmp(src, src2); + *sum += strcasecmp(src, src2); + *sum += strcasecmp(src, src2); + *sum += strcasecmp(src, src2); + *sum += strcasecmp(src, src2); + *sum += strcasecmp(src, src2); + *sum += strcasecmp(src, src2); + *sum += strcasecmp(src, src2); + *sum += strcasecmp(src, src2); + } + + res->re_count = i; + + return (0); +} + +char * +benchmark_result() +{ + static char result[256]; + + if (unaligned == 0) + (void) sprintf(result, "%8d", opts); + else + (void) sprintf(result, "%8d ", opts); + + return (result); +} diff --git a/tools/tests/libMicro/strchr.c b/tools/tests/libMicro/strchr.c new file mode 100644 index 000000000..54c74778e --- /dev/null +++ b/tools/tests/libMicro/strchr.c @@ -0,0 +1,144 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms + * of the Common Development and Distribution License + * (the "License"). You may not use this file except + * in compliance with the License. + * + * You can obtain a copy of the license at + * src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing + * permissions and limitations under the License. + * + * When distributing Covered Code, include this CDDL + * HEADER in each file and include the License file at + * usr/src/OPENSOLARIS.LICENSE. If applicable, + * add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your + * own identifying information: Portions Copyright [yyyy] + * [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#include +#include +#include +#include + +#include "libmicro.h" + +static int unaligned = 0; +static int opts = 100; + +typedef struct { + int ts_once; + char *ts_string; + char *ts_fakegcc; +} tsd_t; + +int +benchmark_init() +{ + + lm_tsdsize = sizeof (tsd_t); + + (void) sprintf(lm_optstr, "s:n"); + + (void) sprintf(lm_usage, + " [-s string size (default %d)]\n" + " [-n causes unaligned strchr]\n" + "notes: measures strchr()\n", + opts); + + (void) sprintf(lm_header, "%8s", "size"); + + return (0); +} + +int +benchmark_optswitch(int opt, char *optarg) +{ + switch (opt) { + case 'n': + unaligned = 1; + break; + case 's': + opts = sizetoll(optarg); + break; + default: + return (-1); + } + return (0); +} + +int +benchmark_initbatch(void *tsd) +{ + tsd_t *ts = (tsd_t *)tsd; + + static char *demo = + "The quick brown fox jumps over the lazy dog."; + + if (ts->ts_once++ == 0) { + int l = strlen(demo); + int i; + + ts->ts_string = malloc(opts + 1 + unaligned); + ts->ts_string += unaligned; + + + for (i = 0; i < opts; i++) { + ts->ts_string[i] = demo[i%l]; + } + + ts->ts_string[opts] = 0; + + } + return (0); +} + +int +benchmark(void *tsd, result_t *res) +{ + int i; + tsd_t *ts = (tsd_t *)tsd; + char *src = ts->ts_string; + + for (i = 0; i < lm_optB; i += 10) { + ts->ts_fakegcc = strchr(src, 'X'); + ts->ts_fakegcc = strchr(src, 'X'); + ts->ts_fakegcc = strchr(src, 'X'); + ts->ts_fakegcc = strchr(src, 'X'); + ts->ts_fakegcc = strchr(src, 'X'); + ts->ts_fakegcc = strchr(src, 'X'); + ts->ts_fakegcc = strchr(src, 'X'); + ts->ts_fakegcc = strchr(src, 'X'); + ts->ts_fakegcc = strchr(src, 'X'); + ts->ts_fakegcc = strchr(src, 'X'); + } + + res->re_count = i; + + return (0); +} + +char * +benchmark_result() +{ + static char result[256]; + + if (unaligned == 0) + (void) sprintf(result, "%8d", opts); + else + (void) sprintf(result, "%8d ", opts); + + return (result); +} diff --git a/tools/tests/libMicro/strcmp.c b/tools/tests/libMicro/strcmp.c new file mode 100644 index 000000000..c3c0da018 --- /dev/null +++ b/tools/tests/libMicro/strcmp.c @@ -0,0 +1,147 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms + * of the Common Development and Distribution License + * (the "License"). You may not use this file except + * in compliance with the License. + * + * You can obtain a copy of the license at + * src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing + * permissions and limitations under the License. + * + * When distributing Covered Code, include this CDDL + * HEADER in each file and include the License file at + * usr/src/OPENSOLARIS.LICENSE. If applicable, + * add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your + * own identifying information: Portions Copyright [yyyy] + * [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#include +#include +#include +#include + +#include "libmicro.h" + +static int unaligned = 0; +static int opts = 100; + +typedef struct { + int ts_once; + char *ts_a; + char *ts_b; + int ts_fakegcc; +} tsd_t; + +int +benchmark_init() +{ + + lm_tsdsize = sizeof (tsd_t); + + (void) sprintf(lm_optstr, "s:n"); + + (void) sprintf(lm_usage, + " [-s string size (default %d)]\n" + " [-n causes unaligned cmp]\n" + "notes: measures strcmp()\n", + opts); + + (void) sprintf(lm_header, "%8s", "size"); + + return (0); +} + +int +benchmark_optswitch(int opt, char *optarg) +{ + switch (opt) { + case 'n': + unaligned = 1; + break; + case 's': + opts = sizetoll(optarg); + break; + default: + return (-1); + } + return (0); +} + +int +benchmark_initbatch(void *tsd) +{ + tsd_t *ts = (tsd_t *)tsd; + static char *demo = + "The quick brown fox jumps over the lazy dog."; + + if (ts->ts_once++ == 0) { + int l = strlen(demo); + int i; + + ts->ts_a = malloc(opts + 1); + ts->ts_b = malloc(opts + 1 + unaligned); + ts->ts_b += unaligned; + + for (i = 0; i < opts; i++) { + ts->ts_a[i] = ts->ts_b[i] = demo[i%l]; + } + ts->ts_a[opts] = 0; + ts->ts_b[opts] = 0; + } + return (0); +} + +int +benchmark(void *tsd, result_t *res) +{ + int i; + tsd_t *ts = (tsd_t *)tsd; + int *sum = &ts->ts_fakegcc; + char *src = ts->ts_a; + char *src2 = ts->ts_b; + + res->re_errors = 0; + + for (i = 0; i < lm_optB; i += 10) { + *sum += strcmp(src, src2); + *sum += strcmp(src, src2); + *sum += strcmp(src, src2); + *sum += strcmp(src, src2); + *sum += strcmp(src, src2); + *sum += strcmp(src, src2); + *sum += strcmp(src, src2); + *sum += strcmp(src, src2); + *sum += strcmp(src, src2); + *sum += strcmp(src, src2); + } + + res->re_count = i; + + return (0); +} + +char * +benchmark_result() +{ + static char result[256]; + + if (unaligned == 0) + (void) sprintf(result, "%8d", opts); + else + (void) sprintf(result, "%8d ", opts); + + return (result); +} diff --git a/tools/tests/libMicro/strcpy.c b/tools/tests/libMicro/strcpy.c new file mode 100644 index 000000000..3a32353ed --- /dev/null +++ b/tools/tests/libMicro/strcpy.c @@ -0,0 +1,145 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms + * of the Common Development and Distribution License + * (the "License"). You may not use this file except + * in compliance with the License. + * + * You can obtain a copy of the license at + * src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing + * permissions and limitations under the License. + * + * When distributing Covered Code, include this CDDL + * HEADER in each file and include the License file at + * usr/src/OPENSOLARIS.LICENSE. If applicable, + * add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your + * own identifying information: Portions Copyright [yyyy] + * [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#include +#include +#include +#include + +#include "libmicro.h" + +static int unaligned = 0; +static int opts = 100; + +typedef struct { + int ts_once; + char *ts_a; + char *ts_b; +} tsd_t; + +int +benchmark_init() +{ + + lm_tsdsize = sizeof (tsd_t); + + (void) sprintf(lm_optstr, "s:n"); + + (void) sprintf(lm_usage, + " [-s string size (default %d)]\n" + " [-n causes unaligned cmp]\n" + "notes: measures strcpy()\n", + opts); + + (void) sprintf(lm_header, "%8s", "size"); + + return (0); +} + +int +benchmark_optswitch(int opt, char *optarg) +{ + switch (opt) { + case 'n': + unaligned = 1; + break; + case 's': + opts = sizetoll(optarg); + break; + default: + return (-1); + } + return (0); +} + +int +benchmark_initbatch(void *tsd) +{ + tsd_t *ts = (tsd_t *)tsd; + static char *demo = + "The quick brown fox jumps over the lazy dog."; + + if (ts->ts_once++ == 0) { + int l = strlen(demo); + int i; + + ts->ts_a = malloc(opts + 1); + ts->ts_b = malloc(opts + 1 + unaligned); + ts->ts_b += unaligned; + + for (i = 0; i < opts; i++) { + ts->ts_b[i] = demo[i%l]; + } + ts->ts_b[opts] = 0; + } + return (0); +} + +int +benchmark(void *tsd, result_t *res) +{ + int i; + tsd_t *ts = (tsd_t *)tsd; + + char *src = ts->ts_a; + char *src2 = ts->ts_b; + + res->re_errors = 0; + + for (i = 0; i < lm_optB; i += 10) { + (void) strcpy(src, src2); + (void) strcpy(src, src2); + (void) strcpy(src, src2); + (void) strcpy(src, src2); + (void) strcpy(src, src2); + (void) strcpy(src, src2); + (void) strcpy(src, src2); + (void) strcpy(src, src2); + (void) strcpy(src, src2); + (void) strcpy(src, src2); + } + + res->re_count = i; + + return (0); +} + +char * +benchmark_result() +{ + static char result[256]; + + if (unaligned == 0) + (void) sprintf(result, "%8d", opts); + else + (void) sprintf(result, "%8d ", opts); + + return (result); +} diff --git a/tools/tests/libMicro/strftime.c b/tools/tests/libMicro/strftime.c new file mode 100644 index 000000000..b05646684 --- /dev/null +++ b/tools/tests/libMicro/strftime.c @@ -0,0 +1,129 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms + * of the Common Development and Distribution License + * (the "License"). You may not use this file except + * in compliance with the License. + * + * You can obtain a copy of the license at + * src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing + * permissions and limitations under the License. + * + * When distributing Covered Code, include this CDDL + * HEADER in each file and include the License file at + * usr/src/OPENSOLARIS.LICENSE. If applicable, + * add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your + * own identifying information: Portions Copyright [yyyy] + * [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#include +#include +#include +#include + +#include "libmicro.h" + +#define DEFF "%c" +#define MAXSIZE 80 + +static char *optf = DEFF; + +typedef struct { + int ts_once; + struct tm ts_tm1; + struct tm ts_tm2; +} tsd_t; + +int +benchmark_init() +{ + + lm_tsdsize = sizeof (tsd_t); + + (void) sprintf(lm_optstr, "f:"); + + (void) sprintf(lm_usage, + " [-f format (default = \"%s\")]\n" + "notes: measures strftime()\n", + DEFF); + + (void) sprintf(lm_header, "%8s", "format"); + + return (0); +} +int +benchmark_optswitch(int opt, char *optarg) +{ + switch (opt) { + + case 'f': + optf = optarg; + break; + default: + return (-1); + } + return (0); +} + + +char * +benchmark_result() +{ + static char result[256]; + + (void) sprintf(result, "%8s", optf); + + return (result); +} + + +int +benchmark_initbatch(void *tsd) +{ + tsd_t *ts = (tsd_t *)tsd; + + static time_t clock1 = 0L; + static time_t clock2 = 1L; + + (void) localtime_r(&clock1, &ts->ts_tm1); + (void) localtime_r(&clock2, &ts->ts_tm2); + + return (0); +} + + +int +benchmark(void *tsd, result_t *res) +{ + int i; + tsd_t *ts = (tsd_t *)tsd; + char s[MAXSIZE]; + + for (i = 0; i < lm_optB; i += 10) { + (void) strftime(s, MAXSIZE, optf, &ts->ts_tm1); + (void) strftime(s, MAXSIZE, optf, &ts->ts_tm2); + (void) strftime(s, MAXSIZE, optf, &ts->ts_tm1); + (void) strftime(s, MAXSIZE, optf, &ts->ts_tm2); + (void) strftime(s, MAXSIZE, optf, &ts->ts_tm1); + (void) strftime(s, MAXSIZE, optf, &ts->ts_tm2); + (void) strftime(s, MAXSIZE, optf, &ts->ts_tm1); + (void) strftime(s, MAXSIZE, optf, &ts->ts_tm2); + (void) strftime(s, MAXSIZE, optf, &ts->ts_tm1); + (void) strftime(s, MAXSIZE, optf, &ts->ts_tm2); + } + res->re_count = i; + + return (0); +} diff --git a/tools/tests/libMicro/strlen.c b/tools/tests/libMicro/strlen.c new file mode 100644 index 000000000..bfcce12d3 --- /dev/null +++ b/tools/tests/libMicro/strlen.c @@ -0,0 +1,143 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms + * of the Common Development and Distribution License + * (the "License"). You may not use this file except + * in compliance with the License. + * + * You can obtain a copy of the license at + * src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing + * permissions and limitations under the License. + * + * When distributing Covered Code, include this CDDL + * HEADER in each file and include the License file at + * usr/src/OPENSOLARIS.LICENSE. If applicable, + * add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your + * own identifying information: Portions Copyright [yyyy] + * [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#include +#include +#include +#include + +#include "libmicro.h" + +static int unaligned = 0; +static int opts = 100; + +typedef struct { + int ts_once; + char *ts_string; + int ts_fakegcc; +} tsd_t; + +int +benchmark_init() +{ + + lm_tsdsize = sizeof (tsd_t); + + (void) sprintf(lm_optstr, "s:n"); + + (void) sprintf(lm_usage, + " [-s string size (default %d)]\n" + " [-n causes unaligned strlen]\n" + "notes: measures strlen()\n", + opts); + + (void) sprintf(lm_header, "%8s", "size"); + + return (0); +} + +int +benchmark_optswitch(int opt, char *optarg) +{ + switch (opt) { + case 'n': + unaligned = 1; + break; + case 's': + opts = sizetoll(optarg); + break; + default: + return (-1); + } + return (0); +} + +int +benchmark_initbatch(void *tsd) +{ + tsd_t *ts = (tsd_t *)tsd; + static char *demo = + "The quick brown fox jumps over the lazy dog."; + + if (ts->ts_once++ == 0) { + int l = strlen(demo); + int i; + + ts->ts_string = malloc(opts + 1 + unaligned); + ts->ts_string += unaligned; + + + for (i = 0; i < opts; i++) { + ts->ts_string[i] = demo[i%l]; + } + + ts->ts_string[opts] = 0; + + } + return (0); +} + +int +benchmark(void *tsd, result_t *res) +{ + int i; + tsd_t *ts = (tsd_t *)tsd; + char *src = ts->ts_string; + + for (i = 0; i < lm_optB; i += 10) { + ts->ts_fakegcc += strlen(src); + ts->ts_fakegcc += strlen(src); + ts->ts_fakegcc += strlen(src); + ts->ts_fakegcc += strlen(src); + ts->ts_fakegcc += strlen(src); + ts->ts_fakegcc += strlen(src); + ts->ts_fakegcc += strlen(src); + ts->ts_fakegcc += strlen(src); + ts->ts_fakegcc += strlen(src); + ts->ts_fakegcc += strlen(src); + } + + res->re_count = i; + + return (0); +} + +char * +benchmark_result() +{ + static char result[256]; + + if (unaligned == 0) + (void) sprintf(result, "%8d", opts); + else + (void) sprintf(result, "%8d ", opts); + + return (result); +} diff --git a/tools/tests/libMicro/strtol.c b/tools/tests/libMicro/strtol.c new file mode 100644 index 000000000..02f8abc75 --- /dev/null +++ b/tools/tests/libMicro/strtol.c @@ -0,0 +1,67 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms + * of the Common Development and Distribution License + * (the "License"). You may not use this file except + * in compliance with the License. + * + * You can obtain a copy of the license at + * src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing + * permissions and limitations under the License. + * + * When distributing Covered Code, include this CDDL + * HEADER in each file and include the License file at + * usr/src/OPENSOLARIS.LICENSE. If applicable, + * add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your + * own identifying information: Portions Copyright [yyyy] + * [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#include +#include +#include +#include + +#include "libmicro.h" + +int +benchmark_init() +{ + (void) sprintf(lm_usage, "note: measures strtol()"); + lm_tsdsize = 0; + return (0); +} + +/*ARGSUSED*/ +int +benchmark(void *tsd, result_t *res) +{ + int i; + + for (i = 0; i < lm_optB; i += 10) { + (void) strtol("1", NULL, 10); + (void) strtol("11", NULL, 10); + (void) strtol("123", NULL, 10); + (void) strtol("1234", NULL, 10); + (void) strtol("12345", NULL, 10); + (void) strtol("123456", NULL, 10); + (void) strtol("1234567", NULL, 10); + (void) strtol("12345678", NULL, 10); + (void) strtol("123456789", NULL, 10); + (void) strtol("1234567890", NULL, 10); + } + res->re_count = i; + + return (0); +} diff --git a/tools/tests/libMicro/system.c b/tools/tests/libMicro/system.c new file mode 100644 index 000000000..d70db260f --- /dev/null +++ b/tools/tests/libMicro/system.c @@ -0,0 +1,98 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms + * of the Common Development and Distribution License + * (the "License"). You may not use this file except + * in compliance with the License. + * + * You can obtain a copy of the license at + * src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing + * permissions and limitations under the License. + * + * When distributing Covered Code, include this CDDL + * HEADER in each file and include the License file at + * usr/src/OPENSOLARIS.LICENSE. If applicable, + * add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your + * own identifying information: Portions Copyright [yyyy] + * [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#include +#include +#include + +#include "libmicro.h" + +#define DEFB 10 +#define DEFC "A=$$" + +static char *optc = DEFC; + +int +benchmark_init() +{ + lm_tsdsize = 0; + + (void) sprintf(lm_optstr, "c:"); + + (void) sprintf(lm_usage, + " [-c command (default %s)]\n" + "notes: measures system()\n", + DEFC); + + (void) sprintf(lm_header, "%8s", "command"); + + return (0); +} + +int +benchmark_optswitch(int opt, char *optarg) +{ + switch (opt) { + case 'c': + optc = optarg; + break; + default: + return (-1); + } + + return (0); +} + + +/*ARGSUSED*/ +int +benchmark(void *tsd, result_t *res) +{ + int i; + + for (i = 0; i < lm_optB; i++) { + if (system(optc) != 0) { + res->re_errors++; + } + } + res->re_count = lm_optB; + + return (0); +} + +char * +benchmark_result() +{ + static char result[256]; + + (void) sprintf(result, "%8s", optc); + + return (result); +} diff --git a/tools/tests/libMicro/tattle.c b/tools/tests/libMicro/tattle.c new file mode 100644 index 000000000..59520f8ce --- /dev/null +++ b/tools/tests/libMicro/tattle.c @@ -0,0 +1,154 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms + * of the Common Development and Distribution License + * (the "License"). You may not use this file except + * in compliance with the License. + * + * You can obtain a copy of the license at + * src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing + * permissions and limitations under the License. + * + * When distributing Covered Code, include this CDDL + * HEADER in each file and include the License file at + * usr/src/OPENSOLARIS.LICENSE. If applicable, + * add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your + * own identifying information: Portions Copyright [yyyy] + * [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#include +#include +#include +#include +#include +#include +#include +#include "libmicro.h" +#include + + +#ifdef USE_RDTSC +#ifdef __GNUC__ +#define ENABLE_RDTSC 1 +#endif +#endif + +/* + * dummy so we can link w/ libmicro + */ + +/*ARGSUSED*/ +int +benchmark(void *tsd, result_t *res) +{ + return (0); +} + +static void +cleanup(char *s) +{ + char *o = s; + char *e; + + while (*s == ' ') + s++; + + if (o != s) + (void) strcpy(o, s); + + e = o; + + while (*e != 0) + e++; + + e--; + + while (*e == ' ' && e > o) + *e-- = 0; + +} + + +int +main(int argc, char *argv[]) +{ + int c; + + if (strlen(compiler_version) > 30) + compiler_version[30] = 0; + + cleanup(compiler_version); + cleanup(extra_compiler_flags); + + while ((c = getopt(argc, argv, "vcfrsVTR")) != -1) { + switch (c) { + case 'V': + (void) printf("%s\n", LIBMICRO_VERSION); + break; + case 'v': + (void) printf("%s\n", compiler_version); + break; + case 'c': + (void) printf("%s\n", CC); + break; + case 'f': + if (strlen(extra_compiler_flags) == 0) + (void) printf("[none]\n"); + else + (void) printf("%s\n", extra_compiler_flags); + break; + + case 's': + (void) printf("%d\n", sizeof (long)); + break; + + case 'r': + + (void) printf("%lld nsecs\n", get_nsecs_resolution()); + break; + + case 'R': +#ifdef ENABLE_RDTSC + { + struct timeval s; + struct timeval f; + long long start_nsecs; + long long end_nsecs; + long elapsed_usecs; + + gettimeofday(&s, NULL); + start_nsecs = rdtsc(); + for (;;) { + gettimeofday(&f, NULL); + elapsed_usecs = (f.tv_sec - s.tv_sec) * + 1000000 + (f.tv_usec - s.tv_usec); + if (elapsed_usecs > 1000000) + break; + } + end_nsecs = rdtsc(); + (void) printf("LIBMICRO_HZ=%lld\n", + (long long)elapsed_usecs * + (end_nsecs - start_nsecs) / 1000000LL); + } +#else + (void) printf("\n"); +#endif + break; + } + } + + exit(0); + return (0); +} diff --git a/tools/tests/libMicro/time.c b/tools/tests/libMicro/time.c new file mode 100644 index 000000000..aa46d499f --- /dev/null +++ b/tools/tests/libMicro/time.c @@ -0,0 +1,69 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms + * of the Common Development and Distribution License + * (the "License"). You may not use this file except + * in compliance with the License. + * + * You can obtain a copy of the license at + * src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing + * permissions and limitations under the License. + * + * When distributing Covered Code, include this CDDL + * HEADER in each file and include the License file at + * usr/src/OPENSOLARIS.LICENSE. If applicable, + * add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your + * own identifying information: Portions Copyright [yyyy] + * [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#include +#include +#include + +#include "libmicro.h" + +int +benchmark_init() +{ + lm_tsdsize = 0; + + (void) sprintf(lm_usage, + "notes: measures time()\n"); + + return (0); +} + +/*ARGSUSED*/ +int +benchmark(void *tsd, result_t *res) +{ + int i; + + for (i = 0; i < lm_optB; i += 10) { + (void) time(NULL); + (void) time(NULL); + (void) time(NULL); + (void) time(NULL); + (void) time(NULL); + (void) time(NULL); + (void) time(NULL); + (void) time(NULL); + (void) time(NULL); + (void) time(NULL); + } + res->re_count += i; + + return (0); +} diff --git a/tools/tests/libMicro/times.c b/tools/tests/libMicro/times.c new file mode 100644 index 000000000..670af9bf8 --- /dev/null +++ b/tools/tests/libMicro/times.c @@ -0,0 +1,73 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms + * of the Common Development and Distribution License + * (the "License"). You may not use this file except + * in compliance with the License. + * + * You can obtain a copy of the license at + * src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing + * permissions and limitations under the License. + * + * When distributing Covered Code, include this CDDL + * HEADER in each file and include the License file at + * usr/src/OPENSOLARIS.LICENSE. If applicable, + * add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your + * own identifying information: Portions Copyright [yyyy] + * [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#include +#include +#include +#include +#include + +#include "libmicro.h" + +int +benchmark_init() +{ + + lm_tsdsize = 0; + + (void) sprintf(lm_usage, + "notes: measures times()\n"); + + return (0); +} + +/*ARGSUSED*/ +int +benchmark(void *tsd, result_t *res) +{ + int i; + struct tms buf; + + for (i = 0; i < lm_optB; i += 10) { + (void) times(&buf); + (void) times(&buf); + (void) times(&buf); + (void) times(&buf); + (void) times(&buf); + (void) times(&buf); + (void) times(&buf); + (void) times(&buf); + (void) times(&buf); + (void) times(&buf); + } + res->re_count += i; + + return (0); +} diff --git a/tools/tests/libMicro/wrapper.sh b/tools/tests/libMicro/wrapper.sh new file mode 100644 index 000000000..791643dbe --- /dev/null +++ b/tools/tests/libMicro/wrapper.sh @@ -0,0 +1,37 @@ +#!/bin/sh +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms +# of the Common Development and Distribution License +# (the "License"). You may not use this file except +# in compliance with the License. +# +# You can obtain a copy of the license at +# src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing +# permissions and limitations under the License. +# +# When distributing Covered Code, include this CDDL +# HEADER in each file and include the License file at +# usr/src/OPENSOLARIS.LICENSE. If applicable, +# add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your +# own identifying information: Portions Copyright [yyyy] +# [name of copyright owner] +# +# CDDL HEADER END +# + +# +# Copyright 2007 Sun Microsystems, Inc. All rights reserved. +# Use is subject to license terms. +# + + +BASENAME=`basename $0` +DIRNAME=`dirname $0` +ARCH=`uname -m` + +exec $DIRNAME/../bin-$ARCH/$BASENAME "$@" diff --git a/tools/tests/libMicro/write.c b/tools/tests/libMicro/write.c new file mode 100644 index 000000000..078027435 --- /dev/null +++ b/tools/tests/libMicro/write.c @@ -0,0 +1,154 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms + * of the Common Development and Distribution License + * (the "License"). You may not use this file except + * in compliance with the License. + * + * You can obtain a copy of the license at + * src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing + * permissions and limitations under the License. + * + * When distributing Covered Code, include this CDDL + * HEADER in each file and include the License file at + * usr/src/OPENSOLARIS.LICENSE. If applicable, + * add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your + * own identifying information: Portions Copyright [yyyy] + * [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#ifdef linux +#define _XOPEN_SOURCE 500 +#endif + +#include +#include +#include +#include + +#include "libmicro.h" + +typedef struct { + char *ts_buf; + int ts_fd; +} tsd_t; + +#define DEFF "/dev/null" +#define DEFS 1024 + +static int optc = 0; +static char *optf = DEFF; +static long long opts = DEFS; +static int optd; + +int +benchmark_init() +{ + lm_tsdsize = sizeof (tsd_t); + + (void) sprintf(lm_optstr, "cdf:s:"); + + (void) sprintf(lm_usage, + " [-f file-to-write (default %s)]\n" + " [-s buffer-size (default %d)]\n" + " [-c ] (make sure buffer is in cache)\n" +#ifdef __sun + " [-d ] use directio" +#endif + "notes: measures write()\n", + DEFF, DEFS); + + (void) sprintf(lm_header, "%8s", "size"); + + lm_defB = 1; + + return (0); +} + +int +benchmark_optswitch(int opt, char *optarg) +{ + switch (opt) { + + case 'd': + optd++; + break; + case 'c': + optc++; + break; + case 'f': + optf = optarg; + break; + case 's': + opts = sizetoll(optarg); + break; + default: + return (-1); + } + return (0); +} + +int +benchmark_initbatch(void *tsd) +{ + tsd_t *ts = (tsd_t *)tsd; + int i; + + if (ts->ts_buf == NULL) { + ts->ts_buf = malloc(opts); + ts->ts_fd = open(optf, O_WRONLY); + +#ifdef __sun + if (optd) + (void) directio(ts->ts_fd, DIRECTIO_ON); +#endif + /* + * bring buf into cache if specified. + */ + + if (optc) + for (i = 0; i < opts; i++) + ts->ts_buf[i] = 0; + } + + (void) lseek(ts->ts_fd, 0, SEEK_SET); + + return (0); +} + +int +benchmark(void *tsd, result_t *res) +{ + tsd_t *ts = (tsd_t *)tsd; + int i; + + for (i = 0; i < lm_optB; i++) { + if (write(ts->ts_fd, ts->ts_buf, opts) != opts) { + res->re_errors++; + } + } + res->re_count = i; + + return (0); +} + +char * +benchmark_result() +{ + static char result[256]; + + (void) sprintf(result, "%8lld", opts); + + return (result); +} diff --git a/tools/tests/libMicro/writev.c b/tools/tests/libMicro/writev.c new file mode 100644 index 000000000..ac1bf2ef3 --- /dev/null +++ b/tools/tests/libMicro/writev.c @@ -0,0 +1,149 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms + * of the Common Development and Distribution License + * (the "License"). You may not use this file except + * in compliance with the License. + * + * You can obtain a copy of the license at + * src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing + * permissions and limitations under the License. + * + * When distributing Covered Code, include this CDDL + * HEADER in each file and include the License file at + * usr/src/OPENSOLARIS.LICENSE. If applicable, + * add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your + * own identifying information: Portions Copyright [yyyy] + * [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#include +#include +#include +#include +#include +#include + +#ifndef IOV_MAX +#define IOV_MAX UIO_MAXIOV +#endif + +#include "libmicro.h" + +typedef struct { + int ts_once; + struct iovec *ts_iov; + int ts_fd; +} tsd_t; + +#define DEFF "/dev/null" +#define DEFS 1024 +#define DEFV 10 + +static char *optf = DEFF; +static int opts = DEFS; +static int optv = DEFV; + +int +benchmark_init() +{ + lm_tsdsize = sizeof (tsd_t); + + (void) sprintf(lm_optstr, "f:s:v:"); + + (void) sprintf(lm_usage, + " [-f file-to-write (default %s)]\n" + " [-s buffer-size (default %d)]\n" + " [-v vector-size (default %d)]\n" + "notes: measures writev()\n" + " IOV_MAX is %d\n" + " SSIZE_MAX is %ld\n", + DEFF, DEFS, DEFV, IOV_MAX, SSIZE_MAX); + + (void) sprintf(lm_header, "%8s %4s", "size", "vec"); + + lm_defB = 1; + + return (0); +} + +int +benchmark_optswitch(int opt, char *optarg) +{ + switch (opt) { + case 'f': + optf = optarg; + break; + case 's': + opts = sizetoint(optarg); + break; + case 'v': + optv = atoi(optarg); + break; + default: + return (-1); + } + return (0); +} + +int +benchmark_initbatch(void *tsd) +{ + tsd_t *ts = (tsd_t *)tsd; + int i; + int errors = 0; + + if (ts->ts_once++ == 0) { + ts->ts_fd = open(optf, O_WRONLY); + if (ts->ts_fd == -1) { + errors++; + } + ts->ts_iov = (struct iovec *)malloc( + optv * sizeof (struct iovec)); + for (i = 0; i < optv; i++) { + ts->ts_iov[i].iov_base = malloc(opts); + ts->ts_iov[i].iov_len = opts; + } + } + + (void) lseek(ts->ts_fd, 0, SEEK_SET); + + return (errors); +} + +int +benchmark(void *tsd, result_t *res) +{ + tsd_t *ts = (tsd_t *)tsd; + int i; + + for (i = 0; i < lm_optB; i++) { + if (writev(ts->ts_fd, ts->ts_iov, optv) != opts * optv) { + res->re_errors++; + } + } + res->re_count = i; + + return (0); +} + +char * +benchmark_result() +{ + static char result[256]; + + (void) sprintf(result, "%8d %4d", opts, optv); + + return (result); +} diff --git a/tools/tests/superpages/measure_tlbs.c b/tools/tests/superpages/measure_tlbs.c new file mode 100644 index 000000000..02097c588 --- /dev/null +++ b/tools/tests/superpages/measure_tlbs.c @@ -0,0 +1,129 @@ +#include +#include +#include +#include +#include +#include +#include + +#define SUPERPAGE_SIZE (2*1024*1024) +#define SUPERPAGE_MASK (-SUPERPAGE_SIZE) +#define SUPERPAGE_ROUND_UP(a) ((a + SUPERPAGE_SIZE-1) & SUPERPAGE_MASK) + +#define RUNS0 100000 +#define STEP 4 /* KB */ +#define START STEP +#define MAX (1024*1024) /* KB */ + +#define RUNS1 RUNS0 +#define RUNS2 (RUNS0/20) + +clock_t +testt(boolean_t superpages, int mode, int write, int kb) { + static int sum; + char *data; + unsigned int run, p, p2, i, res; + mach_vm_address_t addr = 0; + int pages = kb/4; + mach_vm_size_t size = SUPERPAGE_ROUND_UP(pages*PAGE_SIZE); /* allocate full superpages */ + int kr; + + kr = mach_vm_allocate(mach_task_self(), &addr, size, VM_FLAGS_ANYWHERE | (superpages? VM_FLAGS_SUPERPAGE_SIZE_2MB : VM_FLAGS_SUPERPAGE_NONE)); + + if (!addr) + return 0; + + data = (char*)(long)addr; + + /* touch every base page to make sure everything is mapped and zero-filled */ + for (p = 0; p +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define SUPERPAGE_SIZE (2*1024*1024) +#define SUPERPAGE_MASK (-SUPERPAGE_SIZE) + +#define MAP_SUPERPAGE 0x2000 + +#ifdef __LP64__ +#define FIXED_ADDRESS1 (0x100000000ULL+500*1024*1024) /* at 4 GB + 500 MB virtual */ +#define FIXED_ADDRESS2 (0x100000000ULL+502*1024*1024 + 4*1024) /* at 4 GB + 502 MB + 4 KB virtual */ +#else +#define FIXED_ADDRESS1 (500*1024*1024) /* at 500 MB virtual */ +#define FIXED_ADDRESS2 (502*1024*1024 + 4*1024) /* at 502 MB + 4 KB virtual */ +#endif + +char error[100]; + +jmp_buf resume; +void test_signal_handler(int signo) +{ + longjmp(resume, signo); +} + +char *signame[32] = { + [SIGBUS] "SIGBUS", + [SIGSEGV] "SIGSEGV" +}; + +typedef struct { + char *description; + boolean_t (*fn)(); +} test_t; + +boolean_t +check_kr(int kr, char *fn) { + if (kr) { + sprintf(error, "%s() returned %d", fn, kr); + return FALSE; + } + return TRUE; +} + +boolean_t +check_addr0(mach_vm_address_t addr, char *fn) { + if (!addr) { + sprintf(error, "%s() returned address 0", fn); + return FALSE; + } + return TRUE; +} + +boolean_t +check_addr(mach_vm_address_t addr1, mach_vm_address_t addr2, char *fn) { + if (addr1 != addr2) { + sprintf(error, "%s() returned address %llx instead of %llx", fn, addr1, addr2); + return FALSE; + } + return TRUE; +} + +boolean_t +check_align(mach_vm_address_t addr) { + if (addr & !SUPERPAGE_MASK) { + sprintf(error, "address not aligned properly: 0x%llx", addr); + return FALSE; + } + return TRUE; +} + +boolean_t +check_r(mach_vm_address_t addr, mach_vm_size_t size, int *res) { + volatile char *data = (char*)(uintptr_t)addr; + int i, sig, test; + + if ((sig = setjmp(resume)) != 0) { + sprintf(error, "%s when reading", signame[sig]); + return FALSE; + } + test = 0; + for (i=0; i1) { + if (!strcmp(argv[1], "-h")) { + printf("Usage: %s \n", argv[0]); + printf("\tmode = 0: test all cases\n"); + printf("\tmode = -1: allocate/deallocate until failure\n"); + printf("\tmode > 0: run test \n"); + exit(0); + } + mode=atoi(argv[1]); + } + + /* install SIGBUS handler */ + struct sigaction my_sigaction; + my_sigaction.sa_handler = test_signal_handler; + my_sigaction.sa_flags = SA_RESTART; + my_sigaction.sa_mask = 0; + sigaction( SIGBUS, &my_sigaction, NULL ); + sigaction( SIGSEGV, &my_sigaction, NULL ); + + if (mode>0) /* one specific test */ + testit(mode-1); + + if (mode==0) { /* test all cases */ + printf("Running %d tests:\n", TESTS); + for (i=0; i + + + + CFBundleDevelopmentRegion + English + CFBundleExecutable + ${EXECUTABLE_NAME} + CFBundleIdentifier + com.yourcompany.driver.${PRODUCT_NAME:identifier} + CFBundleInfoDictionaryVersion + 6.0 + CFBundlePackageType + KEXT + CFBundleSignature + ???? + CFBundleVersion + 1.0.0d1 + IOKitPersonalities + + testvmx + + CFBundleIdentifier + com.yourcompany.driver.${PRODUCT_NAME:identifier} + IOClass + testvmx + IOMatchCategory + testvmx + IOProviderClass + IOResources + IOResourceMatch + IOKit + + + OSBundleLibraries + + com.apple.kpi.mach + 10.0.0d3 + com.apple.kpi.libkern + 10.0.0d3 + com.apple.kpi.iokit + 10.0.0d3 + + + diff --git a/tools/tests/testkext/testvmx.cpp b/tools/tests/testkext/testvmx.cpp new file mode 100644 index 000000000..eaa93d3ee --- /dev/null +++ b/tools/tests/testkext/testvmx.cpp @@ -0,0 +1,51 @@ +/* + * testvmx.cpp + * testkext + * + * Created by Shantonu Sen on 10/24/08. + * Copyright 2008 Apple Computer, Inc.. All rights reserved. + * + */ + +#include "testvmx.h" + +#if !(defined(__i386__) || defined(__x86_64__)) +#error VMX only supported on i386/x86_64 +#endif + +#include +#include + + +#define super IOService +OSDefineMetaClassAndStructors(testvmx, super); + +bool +testvmx::start( IOService * provider ) +{ + int ret; + + IOLog("%s\n", __PRETTY_FUNCTION__); + + if (!super::start(provider)) { + return false; + } + + IOLog("Attempting host_vmxon\n"); + ret = host_vmxon(FALSE); + IOLog("host_vmxon: %d\n", ret); + + return true; +} + +void +testvmx::stop( IOService * provider ) +{ + IOLog("%s\n", __PRETTY_FUNCTION__); + + super::stop(provider); + + IOLog("Attempting host_vmxoff\n"); + host_vmxoff(); + IOLog("host_vmxoff called\n"); +} diff --git a/tools/tests/testkext/testvmx.h b/tools/tests/testkext/testvmx.h new file mode 100644 index 000000000..9da7ca9d5 --- /dev/null +++ b/tools/tests/testkext/testvmx.h @@ -0,0 +1,20 @@ +/* + * testvmx.h + * testkext + * + * Created by Shantonu Sen on 10/24/08. + * Copyright 2008 Apple Computer, Inc.. All rights reserved. + * + */ + +#include +#include + +class testvmx : public IOService { + OSDeclareDefaultStructors(testvmx); + + virtual bool start( IOService * provider ); + + virtual void stop( IOService * provider ); + +}; diff --git a/tools/tests/xnu_quick_test/32bit_inode_tests.c b/tools/tests/xnu_quick_test/32bit_inode_tests.c new file mode 100644 index 000000000..e5effea56 --- /dev/null +++ b/tools/tests/xnu_quick_test/32bit_inode_tests.c @@ -0,0 +1,294 @@ +/* + * 32bit_inode_tests.c + * xnu_quick_test + * + * Created by Ryan Branche on 2/17/08. + * Copyright 2008 Apple Inc. All rights reserved. + * + */ + +/* + * Explicitely turn off 64-bit inodes because we are testing the 32-bit inode + * versions of statfs functions and getdirentries doesn't support 64-bit inodes. + */ +#define _DARWIN_NO_64_BIT_INODE 1 + +#include "tests.h" +#include +#include + +extern char g_target_path[ PATH_MAX ]; +extern int g_skip_setuid_tests; +extern int g_is_under_rosetta; +extern int g_is_single_user; + +/* ************************************************************************************************************** + * Test getdirentries system call. + * ************************************************************************************************************** + */ +struct test_attr_buf { + uint32_t length; + fsobj_type_t obj_type; + fsobj_id_t obj_id; + struct timespec backup_time; +}; + +typedef struct test_attr_buf test_attr_buf; + +int getdirentries_test( void * the_argp ) +{ + int my_err, done, found_it, i; + int my_fd = -1; + int is_ufs = 0; + char * my_pathp = NULL; + char * my_bufp = NULL; + char * my_file_namep; + unsigned long my_base; + unsigned long my_count; + unsigned long my_new_state; + fsobj_id_t my_obj_id; + struct timespec my_new_backup_time; + struct attrlist my_attrlist; + test_attr_buf my_attr_buf[4]; + struct statfs my_statfs_buf; + kern_return_t my_kr; + + /* need to know type of file system */ + my_err = statfs( &g_target_path[0], &my_statfs_buf ); + if ( my_err == -1 ) { + printf( "statfs call failed. got errno %d - %s. \n", errno, strerror( errno ) ); + goto test_failed_exit; + } + if ( memcmp( &my_statfs_buf.f_fstypename[0], "ufs", 3 ) == 0 ) { + is_ufs = 1; + } + + my_kr = vm_allocate((vm_map_t) mach_task_self(), (vm_address_t*)&my_bufp, (1024 * 5), VM_FLAGS_ANYWHERE); + if(my_kr != KERN_SUCCESS){ + printf( "vm_allocate failed with error %d - \"%s\" \n", errno, strerror( errno) ); + goto test_failed_exit; + } + + my_kr = vm_allocate((vm_map_t) mach_task_self(), (vm_address_t*)&my_pathp, PATH_MAX, VM_FLAGS_ANYWHERE); + if(my_kr != KERN_SUCCESS){ + printf( "vm_allocate failed with error %d - \"%s\" \n", errno, strerror( errno) ); + goto test_failed_exit; + } + + *my_pathp = 0x00; + strcat( my_pathp, &g_target_path[0] ); + strcat( my_pathp, "/" ); + + /* create a test file */ + my_err = create_random_name( my_pathp, 1 ); + if ( my_err != 0 ) { + goto test_failed_exit; + } + + /* get pointer to just the file name */ + my_file_namep = strrchr( my_pathp, '/' ); + my_file_namep++; + + /* check out the test directory */ + my_fd = open( &g_target_path[0], (O_RDONLY), 0 ); + if ( my_fd == -1 ) { + printf( "open failed with error %d - \"%s\" \n", errno, strerror( errno) ); + goto test_failed_exit; + } + + done = found_it = 0; + while ( done == 0 ) { + int my_result, i; + struct dirent * my_dirent_p; + + /* This call requires that 64-bit inodes are disabled */ + my_result = getdirentries( my_fd, my_bufp, (1024 * 5), &my_base ); + if ( my_result <= 0 ) + break; + for ( i = 0; i < my_result; ) { + my_dirent_p = (struct dirent *) (my_bufp + i); +#if DEBUG + printf( "d_ino %d d_reclen %d d_type %d d_namlen %d \"%s\" \n", + my_dirent_p->d_ino, my_dirent_p->d_reclen, my_dirent_p->d_type, + my_dirent_p->d_namlen, &my_dirent_p->d_name[0] ); +#endif + + i += my_dirent_p->d_reclen; + /* validate results by looking for our test file */ + if ( my_dirent_p->d_type == DT_REG && my_dirent_p->d_ino != 0 && + strlen( my_file_namep ) == my_dirent_p->d_namlen && + memcmp( &my_dirent_p->d_name[0], my_file_namep, my_dirent_p->d_namlen ) == 0 ) { + done = found_it = 1; + break; + } + } + } + if ( found_it == 0 ) { + printf( "getdirentries failed to find test file. \n" ); + goto test_failed_exit; + } + +test_failed_exit: + if(my_err != 0) + my_err = -1; + +test_passed_exit: + if ( my_fd != -1 ) + close( my_fd ); + if ( my_pathp != NULL ) { + remove( my_pathp ); + vm_deallocate(mach_task_self(), (vm_address_t)my_pathp, PATH_MAX); + } + if ( my_bufp != NULL ) { + vm_deallocate(mach_task_self(), (vm_address_t)my_bufp, (1024 * 5)); + } + + return( my_err ); +} + + +/* ************************************************************************************************************** + * Test 32-bit inode versions of statfs, fstatfs, and getfsstat system calls. + * ************************************************************************************************************** + */ + +#pragma pack(4) +struct vol_attr_buf { + u_int32_t length; + off_t volume_size; + u_int32_t io_blksize; +}; +#pragma pack() +typedef struct vol_attr_buf vol_attr_buf; + +int statfs_32bit_inode_tests( void * the_argp ) +{ + int my_err, my_count, i; + int my_buffer_size; + int my_fd = -1; + int is_ufs = 0; + void * my_bufferp = NULL; + struct statfs * my_statfsp; + long my_io_size; + fsid_t my_fsid; + struct attrlist my_attrlist; + vol_attr_buf my_attr_buf; + kern_return_t my_kr; + + my_buffer_size = (sizeof(struct statfs) * 10); + my_kr = vm_allocate((vm_map_t) mach_task_self(), (vm_address_t*)&my_bufferp, my_buffer_size, VM_FLAGS_ANYWHERE); + if(my_kr != KERN_SUCCESS){ + printf( "vm_allocate failed with error %d - \"%s\" \n", errno, strerror( errno) ); + goto test_failed_exit; + } + + my_statfsp = (struct statfs *) my_bufferp; + my_err = statfs( "/", my_statfsp ); + if ( my_err == -1 ) { + printf( "statfs call failed. got errno %d - %s. \n", errno, strerror( errno ) ); + goto test_failed_exit; + } + if ( memcmp( &my_statfsp->f_fstypename[0], "ufs", 3 ) == 0 ) { + is_ufs = 1; + } + + my_count = getfsstat( (struct statfs *)my_bufferp, my_buffer_size, MNT_NOWAIT ); + if ( my_count == -1 ) { + printf( "getfsstat call failed. got errno %d - %s. \n", errno, strerror( errno ) ); + goto test_failed_exit; + } + + /* validate results */ + my_statfsp = (struct statfs *) my_bufferp; + for ( i = 0; i < my_count; i++, my_statfsp++ ) { + if ( memcmp( &my_statfsp->f_fstypename[0], "hfs", 3 ) == 0 || + memcmp( &my_statfsp->f_fstypename[0], "ufs", 3 ) == 0 || + memcmp( &my_statfsp->f_fstypename[0], "devfs", 5 ) == 0 || + memcmp( &my_statfsp->f_fstypename[0], "volfs", 5 ) == 0 ) { + /* found a valid entry */ + break; + } + } + if ( i >= my_count ) { + printf( "getfsstat call failed. could not find valid f_fstypename! \n" ); + goto test_failed_exit; + } + + /* set up to validate results via multiple sources. we use getattrlist to get volume + * related attributes to verify against results from fstatfs and statfs - but only if + * we are not targeting ufs volume since it doesn't support getattr calls + */ + if ( is_ufs == 0 ) { + memset( &my_attrlist, 0, sizeof(my_attrlist) ); + my_attrlist.bitmapcount = ATTR_BIT_MAP_COUNT; + my_attrlist.volattr = (ATTR_VOL_SIZE | ATTR_VOL_IOBLOCKSIZE); + my_err = getattrlist( "/", &my_attrlist, &my_attr_buf, sizeof(my_attr_buf), 0 ); + if ( my_err != 0 ) { + printf( "getattrlist call failed. got errno %d - %s. \n", errno, strerror( errno ) ); + goto test_failed_exit; + } + } + + /* open kernel to use as test file for fstatfs */ + my_fd = open( "/mach_kernel", O_RDONLY, 0 ); + if ( my_fd == -1 ) { + printf( "open call failed. got errno %d - %s. \n", errno, strerror( errno ) ); + goto test_failed_exit; + } + + /* testing fstatfs */ + my_statfsp = (struct statfs *) my_bufferp; + my_err = fstatfs( my_fd, my_statfsp ); + if ( my_err == -1 ) { + printf( "fstatfs call failed. got errno %d - %s. \n", errno, strerror( errno ) ); + goto test_failed_exit; + } + + /* validate results */ + if ( !(memcmp( &my_statfsp->f_fstypename[0], "hfs", 3 ) == 0 || + memcmp( &my_statfsp->f_fstypename[0], "ufs", 3 ) == 0) ) { + printf( "fstatfs call failed. could not find valid f_fstypename! \n" ); + goto test_failed_exit; + } + my_io_size = my_statfsp->f_iosize; + my_fsid = my_statfsp->f_fsid; + if ( is_ufs == 0 && my_statfsp->f_iosize != my_attr_buf.io_blksize ) { + printf( "fstatfs and getattrlist results do not match for volume block size \n" ); + goto test_failed_exit; + } + + /* try again with statfs */ + my_err = statfs( "/mach_kernel", my_statfsp ); + if ( my_err == -1 ) { + printf( "statfs call failed. got errno %d - %s. \n", errno, strerror( errno ) ); + goto test_failed_exit; + } + + /* validate resutls */ + if ( my_io_size != my_statfsp->f_iosize || my_fsid.val[0] != my_statfsp->f_fsid.val[0] || + my_fsid.val[1] != my_statfsp->f_fsid.val[1] ) { + printf( "statfs call failed. wrong f_iosize or f_fsid! \n" ); + goto test_failed_exit; + } + if ( is_ufs == 0 && my_statfsp->f_iosize != my_attr_buf.io_blksize ) { + printf( "statfs and getattrlist results do not match for volume block size \n" ); + goto test_failed_exit; + } + + /* We passed the test */ + my_err = 0; + +test_failed_exit: + if(my_err != 0) + my_err = -1; + +test_passed_exit: + if ( my_fd != -1 ) + close( my_fd ); + if ( my_bufferp != NULL ) { + vm_deallocate(mach_task_self(), (vm_address_t)my_bufferp, my_buffer_size); + } + + return( my_err ); +} + diff --git a/tools/tests/xnu_quick_test/README b/tools/tests/xnu_quick_test/README index 06f204bc7..3e15a5a91 100644 --- a/tools/tests/xnu_quick_test/README +++ b/tools/tests/xnu_quick_test/README @@ -3,7 +3,7 @@ honest most) system calls we support in xnu. WARNING - this is not meant to be a full regression test of all the system calls. The intent is to have a quick test of each system call that -can be run very easily and quickly when a new kerenl is built. +can be run very easily and quickly when a new kernel is built. This tool is meant to grow as we find xnu problems that could have be caught before we submit to a build train. So please add more tests and @@ -15,7 +15,7 @@ will stop running it. LP64 testing tip - when adding or modifying tests, keep in mind the variants in the LP64 world. If xnu gets passed a structure the varies in size between 32 and 64-bit processes, try to test that a field in the -sructure contains valid data. For example if we know foo structure +structure contains valid data. For example, if we know foo structure looks like: struct foo { int an_int; @@ -82,11 +82,11 @@ List of all tests this tool performs... 8 chown, fchown, lchown, lstat, readlink, symlink 9 fstatfs, getattrlist, getfsstat, statfs 10 getpid, getppid, pipe - 11 getauid, gettid, getuid, geteuid, issetugid, setauid, seteuid, settid, settid_with_pid, setuid + 11 getauid, gettid, getuid, geteuid, issetugid, setaudit_addr, seteuid, settid, settid_with_pid, setuid 12 mkdir, rmdir, umask 13 mknod, sync 14 fsync, getsockopt, poll, select, setsockopt, socketpair - 15 accept, bind, connect, getpeername, getsockname, listen, socket, recvmsg, sendmsg, sendto + 15 accept, bind, connect, getpeername, getsockname, listen, socket, recvmsg, sendmsg, sendto, sendfile 16 chflags, fchflags 17 kill, vfork, execve 18 getegid, getgid, getgroups, setegid, setgid, setgroups diff --git a/tools/tests/xnu_quick_test/helpers/data_exec.c b/tools/tests/xnu_quick_test/helpers/data_exec.c new file mode 100644 index 000000000..f8b353c5e --- /dev/null +++ b/tools/tests/xnu_quick_test/helpers/data_exec.c @@ -0,0 +1,221 @@ +#include +#include +#include +#include +#include +#include +#include + +int test_func(); +void catch_segv(int); +jmp_buf resume; + +#define func_len 256 + +#define ALT_STK_SIZE (MINSIGSTKSZ + pagesize) + +#if __i386__ || __ppc__ +typedef unsigned int psint_t; +#endif +#if __x86_64__ || __ppc64__ +typedef unsigned long long psint_t; +#endif + +int verbose = 0; + +#define msg(...) do { if (verbose) printf(__VA_ARGS__); } while (0); + +/* + * Test whether the architecture allows execution from the stack and heap data areas. What's + * allowed varies by architecture due to backwards compatibility. We also run a separate test + * where we turn on PROT_EXEC explicitly which should always allow execution to take place. + * + * The "expected" array tells us what the result of each test should be based on the architecture. + * The code assumes the test numbers in the macros below are consecutive starting from 0. + */ + +#define HEAP_TEST 0 +#define HEAP_PROT_EXEC 1 +#define STACK_TEST 2 +#define STACK_PROT_EXEC 3 + +#define SUCCEED 1 +#define FAIL -1 /* can't use 0 since setjmp uses that */ + +int expected[4] = { +#if __i386__ + SUCCEED, /* execute from heap */ + SUCCEED, /* exeucte from heap with PROT_EXEC */ + FAIL, /* execute from stack */ + SUCCEED, /* exeucte from stack with PROT_EXEC */ +#endif +#if __x86_64__ + FAIL, /* execute from heap */ + SUCCEED, /* exeucte from heap with PROT_EXEC */ + FAIL, /* execute from stack */ + SUCCEED, /* exeucte from stack with PROT_EXEC */ +#endif +#if __ppc__ + SUCCEED, /* execute from heap */ + SUCCEED, /* exeucte from heap with PROT_EXEC */ + SUCCEED, /* execute from stack */ + SUCCEED, /* exeucte from stack with PROT_EXEC */ +#endif +#if __ppc64__ + FAIL, /* execute from heap */ + SUCCEED, /* exeucte from heap with PROT_EXEC */ + FAIL, /* execute from stack */ + SUCCEED, /* exeucte from stack with PROT_EXEC */ +#endif +}; + + +main(int argc, char *argv[]) +{ + int (*func)(); + int result, test; + char buf[func_len + 4]; + psint_t base; + unsigned int len; + psint_t pagesize; + size_t count; + stack_t sigstk; + struct sigaction sigact; + char *cmd_name; + int c; + + cmd_name = argv[0]; + + while ((c = getopt(argc, argv, "v")) != -1) { + switch (c) { + case 'v': + verbose = 1; + break; + + case '?': + default: + fprintf(stderr, "usage: data_exec [-v]\n"); + exit(1); + } + } + + pagesize = getpagesize(); + + sigstk.ss_sp = malloc(ALT_STK_SIZE); + sigstk.ss_size = ALT_STK_SIZE; + sigstk.ss_flags = 0; + + if (sigaltstack(&sigstk, NULL) < 0) { + perror("sigaltstack"); + exit(1); + } + + sigact.sa_handler = catch_segv; + sigact.sa_flags = SA_ONSTACK; + sigemptyset(&sigact.sa_mask); + + if (sigaction(SIGSEGV, &sigact, NULL) == -1) { + perror("sigaction SIGSEGV"); + exit(1); + } + + if (sigaction(SIGBUS, &sigact, NULL) == -1) { + perror("sigaction SIGBUS"); + exit(1); + } + + test = HEAP_TEST; + +restart: + + if ((result = setjmp(resume)) != 0) { + if (result != expected[test]) { + printf("%s: test %d failed, expected %d, got %d\n", cmd_name, test, expected[test], result); + exit(2); + } + + test++; + goto restart; + } + + switch (test) { + case HEAP_TEST: + msg("attempting to execute from malloc'ed area..\n"); + + func = (void *)malloc(func_len); + + func = (void *)((char *)func + ((psint_t)test_func & 0x3)); + + bcopy(test_func, func, func_len); + + result = (*func)(); + msg("execution suceeded, result is %d\n\n", result); + longjmp(resume, SUCCEED); + + case HEAP_PROT_EXEC: + msg("attempting to execute from malloc'ed area with PROT_EXEC..\n"); + + func = (void *)malloc(func_len); + + func = (void *)((char *)func + ((psint_t)test_func & 0x3)); + bcopy(test_func, func, func_len); + + base = (psint_t)func & ~(pagesize - 1); + len = func_len + (psint_t)func - base; + + if(mprotect((void *)base, len, PROT_READ|PROT_WRITE|PROT_EXEC) == -1) { + perror("mprotect of stack"); + exit(1); + } + + result = (*func)(); + msg("execution suceeded, result is %d\n\n", result); + longjmp(resume, SUCCEED); + + case STACK_TEST: + msg("attempting to execute from stack...\n"); + + func = (void *)(buf + ((psint_t)test_func & 0x3)); + bcopy(test_func, func, func_len); + + result = (*func)(); + msg("stack execution suceeded, result from stack exec is %d\n\n", result); + longjmp(resume, SUCCEED); + + case STACK_PROT_EXEC: + msg("attempting to execute from stack with PROT_EXEC...\n"); + + func = (void *)(buf + ((psint_t)test_func & 0x3)); + bcopy(test_func, func, func_len); + + base = (psint_t)func & ~(pagesize - 1); + len = func_len + (psint_t)func - base; + + if(mprotect((void *)base, len, PROT_READ|PROT_WRITE|PROT_EXEC) == -1) { + perror("mprotect of stack"); + exit(1); + } + + result = (*func)(); + msg("stack execution suceeded, result from stack exec is %d\n", result); + longjmp(resume, SUCCEED); + } + + msg("All tests passed.\n"); + exit(0); +} + + +int +test_func() +{ + return 42; +} + + +void +catch_segv(int sig) +{ + msg("got sig %d\n\n", sig); + longjmp(resume, FAIL); +} diff --git a/tools/tests/xnu_quick_test/helpers/launch.c b/tools/tests/xnu_quick_test/helpers/launch.c index 5d5772bb6..7e8638eff 100644 --- a/tools/tests/xnu_quick_test/helpers/launch.c +++ b/tools/tests/xnu_quick_test/helpers/launch.c @@ -108,6 +108,11 @@ int main(int argc, const char * argv[]) if (do_execve_test("helpers/sleep-ppc32", argvs, NULL, 1)) goto test_failed_exit; #endif + /* + * We are ourselves launched with do_execve_test, which wants a chance to + * send a SIGKILL + */ + sleep(4); return 0; test_failed_exit: diff --git a/tools/tests/xnu_quick_test/kqueue_tests.c b/tools/tests/xnu_quick_test/kqueue_tests.c new file mode 100644 index 000000000..0d872ccea --- /dev/null +++ b/tools/tests/xnu_quick_test/kqueue_tests.c @@ -0,0 +1,544 @@ +/* + * tests.c + * xnu_quick_test + * + * Created by Jerry Cottingham on 3/25/05. + * Copyright 2005 Apple Computer Inc. All rights reserved. + * + */ + +#include "tests.h" +#include +#include +#include /* for kqueue tests */ +#include /* for determining hw */ +#include +#include /* for determination of Mac OS X version (tiger, leopard, etc.) */ +#include /* for OSSwap32() */ + +extern char g_target_path[ PATH_MAX ]; +extern int g_skip_setuid_tests; +extern int g_is_under_rosetta; + +int msg_count = 14; +int last_msg_seen = 0; +pthread_cond_t my_cond = PTHREAD_COND_INITIALIZER; +pthread_mutex_t my_mutex = PTHREAD_MUTEX_INITIALIZER; + + +static kern_return_t +kmsg_send(mach_port_t remote_port, int index) +{ + int msgh_id = 1000 + index; + kern_return_t my_kr; + mach_msg_header_t * my_kmsg = NULL; + mach_msg_size_t size = sizeof(mach_msg_header_t) + sizeof(int)*index; + + my_kr = vm_allocate( mach_task_self(), + (vm_address_t *)&my_kmsg, + size, + VM_MAKE_TAG(VM_MEMORY_MACH_MSG) | TRUE ); + if (my_kr != KERN_SUCCESS) + return my_kr; + my_kmsg->msgh_bits = MACH_MSGH_BITS(MACH_MSG_TYPE_COPY_SEND, 0); + my_kmsg->msgh_size = size; + my_kmsg->msgh_remote_port = remote_port; + my_kmsg->msgh_local_port = MACH_PORT_NULL; + my_kmsg->msgh_reserved = 0; + my_kmsg->msgh_id = msgh_id; + my_kr = mach_msg( my_kmsg, + MACH_SEND_MSG | MACH_MSG_OPTION_NONE, + size, + 0, /* receive size */ + MACH_PORT_NULL, + MACH_MSG_TIMEOUT_NONE, + MACH_PORT_NULL ); + vm_deallocate( mach_task_self(), (vm_address_t)my_kmsg, size ); + return my_kr; +} + +static kern_return_t +kmsg_recv(mach_port_t portset, mach_port_t port, int * msgh_id_return) +{ + kern_return_t my_kr; + mach_msg_header_t * my_kmsg = NULL; + + my_kr = vm_allocate( mach_task_self(), + (vm_address_t *)&my_kmsg, + PAGE_SIZE, + VM_MAKE_TAG(VM_MEMORY_MACH_MSG) | TRUE ); + if (my_kr != KERN_SUCCESS) + return my_kr; + my_kr = mach_msg( my_kmsg, + MACH_RCV_MSG | MACH_MSG_OPTION_NONE, + 0, /* send size */ + PAGE_SIZE, /* receive size */ + port, + MACH_MSG_TIMEOUT_NONE, + MACH_PORT_NULL ); + if ( my_kr == KERN_SUCCESS && + msgh_id_return != NULL ) + *msgh_id_return = my_kmsg->msgh_id; + vm_deallocate( mach_task_self(), (vm_address_t)my_kmsg, PAGE_SIZE ); + return my_kr; +} + +static void * +kmsg_consumer_thread(void * arg) +{ +#if !TARGET_OS_EMBEDDED + int my_kqueue = *(int *)arg; + int my_err; + kern_return_t my_kr; + struct kevent my_keventv[3]; + int msgid; + + EV_SET( &my_keventv[0], 0, 0, 0, 0, 0, 0 ); + while ( !(my_keventv[0].filter == EVFILT_USER && + my_keventv[0].ident == 0)) { + /* keep getting events */ + my_err = kevent( my_kqueue, NULL, 0, my_keventv, 1, NULL ); + if ( my_err == -1 ) { + printf( "kevent call from consumer thread failed with error %d - \"%s\" \n", errno, strerror( errno) ); + return (void *)-1; + } + if ( my_err == 0 ) { + printf( "kevent call from consumer thread did not return any events when it should have \n" ); + return (void *)-1; + } + if ( my_keventv[0].filter == EVFILT_MACHPORT ) { + if ( my_keventv[0].data == 0 ) { + printf( "kevent call to get machport event returned 0 msg_size \n" ); + return (void *)-1; + } + my_kr = kmsg_recv( my_keventv[0].ident, my_keventv[0].data, &msgid ); + if ( my_kr != KERN_SUCCESS ) { + printf( "kmsg_recv failed with error %d - %s \n", my_kr, mach_error_string(my_kr) ); + return (void *)-1; + } + my_keventv[0].flags = EV_ENABLE; + my_err = kevent( my_kqueue, my_keventv, 1, NULL, 0, NULL ); + if ( my_err == -1 ) { + printf( "kevent call to re-enable machport events failed with error %d - \"%s\" \n", errno, strerror( errno) ); + return (void *)-1; + } + if (msgid == 1000 + msg_count) { + pthread_mutex_lock(&my_mutex); + last_msg_seen = 1; + pthread_cond_signal(&my_cond); + pthread_mutex_unlock(&my_mutex); + } + } + } + return (void *)0; +#else + printf( "\t--> Not supported on EMBEDDED TARGET\n" ); + return (void *)0; +#endif +} + +/* ************************************************************************************************************** + * Test kevent, kqueue system calls. + * ************************************************************************************************************** + */ +int kqueue_tests( void * the_argp ) +{ + int my_err, my_status; + int my_kqueue = -1; + int my_kqueue64 = -1; + int my_fd = -1; + char * my_pathp = NULL; + pid_t my_pid, my_wait_pid; + size_t my_count, my_index; + int my_sockets[ 2 ] = {-1, -1}; + struct kevent my_keventv[3]; +#if !TARGET_OS_EMBEDDED + struct kevent64_s my_kevent64; +#endif + struct timespec my_timeout; + char my_buffer[ 16 ]; + kern_return_t kr; + + kr = vm_allocate((vm_map_t) mach_task_self(), (vm_address_t*)&my_pathp, PATH_MAX, VM_FLAGS_ANYWHERE); + if(kr != KERN_SUCCESS){ + printf( "vm_allocate failed with error %d - \"%s\" \n", errno, strerror( errno) ); + goto test_failed_exit; + } + + *my_pathp = 0x00; + strcat( my_pathp, &g_target_path[0] ); + strcat( my_pathp, "/" ); + + /* create a test file */ + my_err = create_random_name( my_pathp, 1 ); + if ( my_err != 0 ) { + goto test_failed_exit; + } + + my_fd = open( my_pathp, O_RDWR, 0 ); + if ( my_fd == -1 ) { + printf( "open call failed with error %d - \"%s\" \n", errno, strerror( errno) ); + goto test_failed_exit; + } + + my_err = socketpair( AF_UNIX, SOCK_STREAM, 0, &my_sockets[0] ); + if ( my_err == -1 ) { + printf( "socketpair failed with errno %d - %s \n", errno, strerror( errno ) ); + goto test_failed_exit; + } + + /* fork here and use pipe to communicate */ + my_pid = fork( ); + if ( my_pid == -1 ) { + printf( "fork failed with errno %d - %s \n", errno, strerror( errno ) ); + goto test_failed_exit; + } + else if ( my_pid == 0 ) { + /* + * child process - tell parent we are ready to go. + */ + my_count = write( my_sockets[1], "r", 1 ); + if ( my_count == -1 ) { + printf( "write call failed. got errno %d - %s. \n", errno, strerror( errno ) ); + exit( -1 ); + } + + my_count = read( my_sockets[1], &my_buffer[0], 1 ); + if ( my_count == -1 ) { + printf( "read call failed with error %d - \"%s\" \n", errno, strerror( errno) ); + exit( -1 ); + } + if ( my_buffer[0] != 'g' ) { + printf( "read call on socket failed to get \"all done\" message \n" ); + exit( -1 ); + } + + /* now do some work that will trigger events our parent will track */ + my_count = write( my_fd, "11111111", 8 ); + if ( my_count == -1 ) { + printf( "write call failed with error %d - \"%s\" \n", errno, strerror( errno) ); + exit( -1 ); + } + + my_err = unlink( my_pathp ); + if ( my_err == -1 ) { + printf( "unlink failed with error %d - \"%s\" \n", errno, strerror( errno) ); + exit( -1 ); + } + + /* wait for parent to tell us to exit */ + my_count = read( my_sockets[1], &my_buffer[0], 1 ); + if ( my_count == -1 ) { + printf( "read call failed with error %d - \"%s\" \n", errno, strerror( errno) ); + exit( -1 ); + } + if ( my_buffer[0] != 'e' ) { + printf( "read call on socket failed to get \"all done\" message \n" ); + exit( -1 ); + } + exit(0); + } + + /* parent process - wait for child to spin up */ + my_count = read( my_sockets[0], &my_buffer[0], sizeof(my_buffer) ); + if ( my_count == -1 ) { + printf( "read call failed with error %d - \"%s\" \n", errno, strerror( errno) ); + goto test_failed_exit; + } + if ( my_buffer[0] != 'r' ) { + printf( "read call on socket failed to get \"ready to go message\" \n" ); + goto test_failed_exit; + } + + /* set up a kqueue and register for some events */ + my_kqueue = kqueue( ); + if ( my_kqueue == -1 ) { + printf( "kqueue call failed with error %d - \"%s\" \n", errno, strerror( errno) ); + goto test_failed_exit; + } + + /* look for our test file to get unlinked or written to */ + EV_SET( &my_keventv[0], my_fd, EVFILT_VNODE, (EV_ADD | EV_CLEAR), (NOTE_DELETE | NOTE_WRITE), 0, 0 ); + /* also keep an eye on our child process while we're at it */ + EV_SET( &my_keventv[1], my_pid, EVFILT_PROC, (EV_ADD | EV_ONESHOT), NOTE_EXIT, 0, 0 ); + + my_timeout.tv_sec = 0; + my_timeout.tv_nsec = 0; + my_err = kevent( my_kqueue, my_keventv, 2, NULL, 0, &my_timeout); + if ( my_err == -1 ) { + printf( "kevent call to register events failed with error %d - \"%s\" \n", errno, strerror( errno) ); + goto test_failed_exit; + } + +#if !TARGET_OS_EMBEDDED + if (!g_is_under_rosetta) { + /* use kevent64 to test EVFILT_PROC */ + EV_SET64( &my_kevent64, my_pid, EVFILT_PROC, EV_ADD, NOTE_EXIT, 0, 0, 0, 0 ); + my_err = kevent64( my_kqueue, &my_kevent64, 1, NULL, 0, 0, 0); + if ( my_err != -1 && errno != EINVAL ) { + printf( "kevent64 call should fail with kqueue used for kevent() - %d\n", my_err); + goto test_failed_exit; + } + + my_kqueue64 = kqueue(); + EV_SET64( &my_kevent64, my_pid, EVFILT_PROC, EV_ADD, NOTE_EXIT, 0, 0, 0, 0 ); + my_err = kevent64( my_kqueue64, &my_kevent64, 1, NULL, 0, 0, 0); + if ( my_err == -1 ) { + printf( "kevent64 call to get proc exit failed with error %d - \"%s\" \n", errno, strerror( errno) ); + goto test_failed_exit; + } + } +#endif + + /* tell child to get to work */ + my_count = write( my_sockets[0], "g", 1 ); + if ( my_count == -1 ) { + printf( "write call failed. got errno %d - %s. \n", errno, strerror( errno ) ); + goto test_failed_exit; + } + + /* go get vnode events */ + EV_SET( &my_keventv[0], my_fd, EVFILT_VNODE, (EV_CLEAR), 0, 0, 0 ); + my_err = kevent( my_kqueue, NULL, 0, my_keventv, 1, NULL ); + if ( my_err == -1 ) { + printf( "kevent call to get vnode events failed with error %d - \"%s\" \n", errno, strerror( errno) ); + goto test_failed_exit; + } + if ( my_err == 0 ) { + printf( "kevent call to get vnode events did not return any when it should have \n" ); + goto test_failed_exit; + } + if ( (my_keventv[0].fflags & (NOTE_DELETE | NOTE_WRITE)) == 0 ) { + printf( "kevent call to get vnode events did not return NOTE_DELETE or NOTE_WRITE \n" ); + printf( "fflags 0x%02X \n", my_keventv[0].fflags ); + goto test_failed_exit; + } + + /* tell child to exit */ + my_count = write( my_sockets[0], "e", 1 ); + if ( my_count == -1 ) { + printf( "write call failed. got errno %d - %s. \n", errno, strerror( errno ) ); + goto test_failed_exit; + } + + /* look for child exit notification after unregistering for vnode events */ + EV_SET( &my_keventv[0], my_fd, EVFILT_VNODE, EV_DELETE, 0, 0, 0 ); + my_err = kevent( my_kqueue, my_keventv, 1, my_keventv, 1, NULL ); + if ( my_err == -1 ) { + printf( "kevent call to get proc exit event failed with error %d - \"%s\" \n", errno, strerror( errno) ); + goto test_failed_exit; + } + if ( my_err == 0 ) { + printf( "kevent call to get proc exit event did not return any when it should have \n" ); + goto test_failed_exit; + } + if ( my_keventv[0].filter != EVFILT_PROC ) { + printf( "kevent call to get proc exit event did not return EVFILT_PROC \n" ); + printf( "filter %i \n", my_keventv[0].filter ); + goto test_failed_exit; + } + if ( (my_keventv[0].fflags & NOTE_EXIT) == 0 ) { + printf( "kevent call to get proc exit event did not return NOTE_EXIT \n" ); + printf( "fflags 0x%02X \n", my_keventv[0].fflags ); + goto test_failed_exit; + } + +#if !TARGET_OS_EMBEDDED + if (!g_is_under_rosetta) { + /* look for child exit notification on the kevent64 kqueue */ + EV_SET64( &my_kevent64, my_pid, EVFILT_PROC, EV_CLEAR, NOTE_EXIT, 0, 0, 0, 0 ); + my_err = kevent64( my_kqueue64, NULL, 0, &my_kevent64, 1, 0, 0); + if ( my_err == -1 ) { + printf( "kevent64 call to get child exit failed with error %d - \"%s\" \n", errno, strerror( errno) ); + goto test_failed_exit; + } + if ( my_err == 0 ) { + printf( "kevent64 call to get proc exit event did not return any when it should have \n" ); + goto test_failed_exit; + } + if ( my_kevent64.filter != EVFILT_PROC ) { + printf( "kevent64 call to get proc exit event did not return EVFILT_PROC \n" ); + printf( "filter %i \n", my_kevent64.filter ); + goto test_failed_exit; + } + if ( (my_kevent64.fflags & NOTE_EXIT) == 0 ) { + printf( "kevent64 call to get proc exit event did not return NOTE_EXIT \n" ); + printf( "fflags 0x%02X \n", my_kevent64.fflags ); + goto test_failed_exit; + } + } + + my_wait_pid = wait4( my_pid, &my_status, 0, NULL ); + if ( my_wait_pid == -1 ) { + printf( "wait4 failed with errno %d - %s \n", errno, strerror( errno ) ); + goto test_failed_exit; + } + + /* wait4 should return our child's pid when it exits */ + if ( my_wait_pid != my_pid ) { + printf( "wait4 did not return child pid - returned %d should be %d \n", my_wait_pid, my_pid ); + goto test_failed_exit; + } + + if ( WIFEXITED( my_status ) && WEXITSTATUS( my_status ) != 0 ) { + printf( "wait4 returned wrong exit status - 0x%02X \n", my_status ); + goto test_failed_exit; + } + + /* now try out EVFILT_MACHPORT and EVFILT_USER */ + mach_port_t my_pset = MACH_PORT_NULL; + mach_port_t my_port = MACH_PORT_NULL; + kern_return_t my_kr; + + my_kr = mach_port_allocate( mach_task_self(), MACH_PORT_RIGHT_PORT_SET, &my_pset ); + if ( my_kr != KERN_SUCCESS ) { + printf( "mach_port_allocate failed with error %d - %s \n", my_kr, mach_error_string(my_kr) ); + goto test_failed_exit; + } + + my_kr = mach_port_allocate( mach_task_self(), MACH_PORT_RIGHT_RECEIVE, &my_port ); + if ( my_kr != KERN_SUCCESS ) { + printf( "mach_port_allocate failed with error %d - %s \n", my_kr, mach_error_string(my_kr) ); + goto test_failed_exit; + } + + /* try to register for events on my_port directly -- this should fail */ + EV_SET( &my_keventv[0], my_port, EVFILT_MACHPORT, (EV_ADD | EV_DISPATCH), 0, 0, 0 ); + my_err = kevent( my_kqueue, my_keventv, 1, NULL, 0, NULL ); + if ( my_err != -1 || errno != ENOTSUP ) { + printf( "kevent call to register my_port should have failed, but got %s \n", strerror(errno) ); + goto test_failed_exit; + } + + /* now register for events on my_pset and user 0 */ + EV_SET( &my_keventv[0], my_pset, EVFILT_MACHPORT, (EV_ADD | EV_CLEAR | EV_DISPATCH), 0, 0, 0 ); + EV_SET( &my_keventv[1], 0, EVFILT_USER, EV_ADD, 0, 0, 0 ); + my_err = kevent( my_kqueue, my_keventv, 2, NULL, 0, NULL ); + if ( my_err == -1 ) { + printf( "kevent call to register my_pset and user 0 failed with error %d - %s \n", errno, strerror( errno) ); + goto test_failed_exit; + } + + pthread_t my_threadv[3]; + + for (my_index = 0; + my_index < 3; + my_index++) { + my_err = pthread_create( &my_threadv[my_index], NULL, kmsg_consumer_thread, (void *)&my_kqueue ); + if ( my_err != 0 ) { + printf( "pthread_create failed with error %d - %s \n", my_err, strerror(my_err) ); + goto test_failed_exit; + } + } + + /* insert my_port into my_pset */ + my_kr = mach_port_insert_member( mach_task_self(), my_port, my_pset ); + if ( my_kr != KERN_SUCCESS ) { + printf( "mach_port_insert_member failed with error %d - %s \n", my_kr, mach_error_string(my_kr) ); + goto test_failed_exit; + } + + my_kr = mach_port_insert_right( mach_task_self(), my_port, my_port, MACH_MSG_TYPE_MAKE_SEND ); + if ( my_kr != KERN_SUCCESS ) { + printf( "mach_port_insert_right failed with error %d - %s \n", my_kr, mach_error_string(my_kr) ); + goto test_failed_exit; + } + + /* send some Mach messages */ + for (my_index = 1; + my_index <= msg_count; + my_index++) { + my_kr = kmsg_send( my_port, my_index ); + if ( my_kr != KERN_SUCCESS ) { + printf( "kmsg_send failed with error %d - %s \n", my_kr, mach_error_string(my_kr) ); + goto test_failed_exit; + } + } + + /* make sure the last message eventually gets processed */ + pthread_mutex_lock(&my_mutex); + while (last_msg_seen == 0) + pthread_cond_wait(&my_cond, &my_mutex); + pthread_mutex_unlock(&my_mutex); + + /* trigger the user 0 event, telling consumer threads to exit */ + EV_SET( &my_keventv[0], 0, EVFILT_USER, 0, NOTE_TRIGGER, 0, 0 ); + my_err = kevent( my_kqueue, my_keventv, 1, NULL, 0, NULL ); + if ( my_err == -1 ) { + printf( "kevent call to trigger user 0 failed with error %d - %s \n", errno, strerror( errno) ); + goto test_failed_exit; + } + + for (my_index = 0; + my_index < 3; + my_index++) { + my_err = pthread_join( my_threadv[my_index], (void **)&my_status ); + if ( my_err != 0 ) { + printf( "pthread_join failed with error %d - %s \n", my_err, strerror(my_err) ); + goto test_failed_exit; + } + if ( my_status != 0 ) { + goto test_failed_exit; + } + } + + /* clear the user 0 event */ + EV_SET( &my_keventv[0], 0, EVFILT_USER, EV_CLEAR, 0, 0, 0 ); + my_err = kevent( my_kqueue, my_keventv, 1, NULL, 0, NULL ); + if ( my_err == -1 ) { + printf( "kevent call to trigger user 0 failed with error %d - %s \n", errno, strerror( errno) ); + goto test_failed_exit; + } + + /* delibrately destroy my_pset while it's still registered for events */ + my_kr = mach_port_mod_refs( mach_task_self(), my_pset, MACH_PORT_RIGHT_PORT_SET, -1 ); + if ( my_kr != KERN_SUCCESS ) { + printf( "mach_port_mod_refs failed with error %d - %s \n", my_kr, mach_error_string(my_kr) ); + goto test_failed_exit; + } + + /* look for the event to trigger with a zero msg_size */ + my_err = kevent( my_kqueue, NULL, 0, my_keventv, 1, NULL ); + if ( my_err == -1 ) { + printf( "kevent call to get machport event failed with error %d - \"%s\" \n", errno, strerror( errno) ); + goto test_failed_exit; + } + if ( my_err == 0 ) { + printf( "kevent call to get machport event did not return any when it should have \n" ); + goto test_failed_exit; + } + if ( my_keventv[0].filter != EVFILT_MACHPORT ) { + printf( "kevent call to get machport event did not return EVFILT_MACHPORT \n" ); + printf( "filter %i \n", my_keventv[0].filter ); + goto test_failed_exit; + } + if ( my_keventv[0].data != 0 ) { + printf( "kevent call to get machport event did not return 0 msg_size \n" ); + printf( "data %ld \n", (long int) my_keventv[0].data ); + goto test_failed_exit; + } +#endif + + my_err = 0; + goto test_passed_exit; + +test_failed_exit: + my_err = -1; + +test_passed_exit: + if ( my_sockets[0] != -1 ) + close( my_sockets[0] ); + if ( my_sockets[1] != -1 ) + close( my_sockets[1] ); + if ( my_kqueue != -1 ) + close( my_kqueue ); + if ( my_kqueue64 != -1 ) + close( my_kqueue ); + if ( my_fd != -1 ) + close( my_fd ); + if ( my_pathp != NULL ) { + remove( my_pathp ); + vm_deallocate(mach_task_self(), (vm_address_t)my_pathp, PATH_MAX); + } + return( my_err ); +} diff --git a/tools/tests/xnu_quick_test/machvm_tests.c b/tools/tests/xnu_quick_test/machvm_tests.c new file mode 100644 index 000000000..d478807a7 --- /dev/null +++ b/tools/tests/xnu_quick_test/machvm_tests.c @@ -0,0 +1,266 @@ +/* + * machvm_tests.c + * xnu_quick_test + * + * Copyright 2008 Apple Inc. All rights reserved. + * + */ + +#include "tests.h" +#include +#include +#include +#include +#include + +extern int g_is_under_rosetta; + +int machvm_tests( void * the_argp ) +{ + int pagesize = getpagesize(); + int regionsizes[] = { 1, 3, 7, 13, 77, 1223 }; /* sizes must be in increasing order */ + char *regionbuffers[] = { NULL, NULL, NULL, NULL, NULL, NULL }; + int i; + kern_return_t kret; + + /* Use vm_allocate to grab some memory */ + for (i=0; i < sizeof(regionsizes)/sizeof(regionsizes[0]); i++) { + vm_address_t addr = 0; + + kret = vm_allocate(mach_task_self(), &addr, regionsizes[i]*pagesize, VM_FLAGS_ANYWHERE); + if (kret != KERN_SUCCESS) { + warnx("vm_allocate of %d pages failed: %d", regionsizes[i], kret); + goto fail; + } + regionbuffers[i] = (char *)addr; + } + + /* deallocate one range without having touched it, scribble on another, then deallocate that one */ + kret = vm_deallocate(mach_task_self(), (vm_address_t)regionbuffers[4], regionsizes[4]*pagesize); + if (kret != KERN_SUCCESS) { + warnx("vm_deallocate of %d pages failed: %d", regionsizes[4], kret); + goto fail; + } + regionbuffers[4] = NULL; + + memset(regionbuffers[3], 0x4f, pagesize*MIN(3, regionsizes[3])); + + kret = vm_deallocate(mach_task_self(), (vm_address_t)regionbuffers[3], regionsizes[3]*pagesize); + if (kret != KERN_SUCCESS) { + warnx("vm_deallocate of %d pages failed: %d", regionsizes[3], kret); + goto fail; + } + regionbuffers[3] = NULL; + + // populate the largest buffer with a byte pattern that matches the page offset, then fix it to readonly + for (i=0; i < regionsizes[5]; i++) { + memset(regionbuffers[5] + i*pagesize, (unsigned char)i, pagesize); + } + kret = vm_protect(mach_task_self(), (vm_offset_t)regionbuffers[5], regionsizes[5]*pagesize, FALSE, VM_PROT_READ); + if (kret != KERN_SUCCESS) { + warnx("vm_protect of %d pages failed: %d", regionsizes[5], kret); + goto fail; + } + + // read the last few pagse of the largest buffer and verify its contents + { + vm_offset_t newdata; + mach_msg_type_number_t newcount; + + kret = vm_read(mach_task_self(), (vm_address_t)regionbuffers[5] + (regionsizes[5]-5)*pagesize, 5*pagesize, + &newdata, &newcount); + if (kret != KERN_SUCCESS) { + warnx("vm_read of %d pages failed: %d", 5, kret); + goto fail; + } + + if (0 != memcmp((char *)newdata, regionbuffers[5] + (regionsizes[5]-5)*pagesize, + 5*pagesize)) { + warnx("vm_read comparison of %d pages failed", 5); + kret = -1; + vm_deallocate(mach_task_self(), newdata, 5*pagesize); + goto fail; + } + + kret = vm_deallocate(mach_task_self(), newdata, 5*pagesize); + if (kret != KERN_SUCCESS) { + warnx("vm_deallocate of %d pages failed: %d", 5, kret); + goto fail; + } + } + + // do a list read to repopulate slots 3 and 4 + { + vm_read_entry_t readlist; + + readlist[0].address = (vm_offset_t)regionbuffers[5] + 10*pagesize; + readlist[0].size = regionsizes[3]*pagesize; + readlist[1].address = (vm_offset_t)regionbuffers[5] + 10*pagesize + regionsizes[3]*pagesize; + readlist[1].size = regionsizes[4]*pagesize; + + kret = vm_read_list(mach_task_self(), readlist, 2); + if (kret != KERN_SUCCESS) { + warnx("vm_read_list failed: %d", kret); + goto fail; + } + + if (0 != memcmp((char *)readlist[0].address, regionbuffers[5] + 10*pagesize, + regionsizes[3]*pagesize)) { + warnx("vm_read_list comparison of allocation 0 failed"); + kret = -1; + vm_deallocate(mach_task_self(), readlist[0].address, readlist[0].size); + vm_deallocate(mach_task_self(), readlist[1].address, readlist[1].size); + goto fail; + } + + if (0 != memcmp((char *)readlist[1].address, regionbuffers[5] + 10*pagesize + regionsizes[3]*pagesize, + regionsizes[4]*pagesize)) { + warnx("vm_read_list comparison of allocation 1 failed"); + kret = -1; + vm_deallocate(mach_task_self(), readlist[0].address, readlist[0].size); + vm_deallocate(mach_task_self(), readlist[1].address, readlist[1].size); + goto fail; + } + + regionbuffers[3] = (char *)readlist[0].address; + regionbuffers[4] = (char *)readlist[1].address; + } + + // do a read_overwrite and copy, which should be about the same + { + vm_size_t count; + + kret = vm_read_overwrite(mach_task_self(), (vm_offset_t)regionbuffers[3], + regionsizes[0]*pagesize, + (vm_offset_t)regionbuffers[0], + &count); + if (kret != KERN_SUCCESS) { + warnx("vm_read_overwrite of %d pages failed: %d", regionsizes[0], kret); + goto fail; + } + + kret = vm_copy(mach_task_self(), (vm_offset_t)regionbuffers[0], + regionsizes[0]*pagesize, + (vm_offset_t)regionbuffers[1]); + if (kret != KERN_SUCCESS) { + warnx("vm_copy of %d pages failed: %d", regionsizes[0], kret); + goto fail; + } + + if (0 != memcmp(regionbuffers[1], regionbuffers[3], + regionsizes[0]*pagesize)) { + warnx("vm_read_overwrite/vm_copy comparison failed"); + kret = -1; + goto fail; + } + } + + // do a vm_copy of our mach-o header and compare. Rosetta doesn't support this, though + if (!g_is_under_rosetta) { + + kret = vm_write(mach_task_self(), (vm_address_t)regionbuffers[2], + (vm_offset_t)&_mh_execute_header, pagesize); + if (kret != KERN_SUCCESS) { + warnx("vm_write of %d pages failed: %d", 1, kret); + goto fail; + } + + if (_mh_execute_header.magic != *(uint32_t *)regionbuffers[2]) { + warnx("vm_write comparison failed"); + kret = -1; + goto fail; + } + } + + // check that the vm_protects above worked + { + vm_address_t addr = (vm_address_t)regionbuffers[5]+7*pagesize; + vm_size_t size = pagesize; + int _basic[VM_REGION_BASIC_INFO_COUNT]; + vm_region_basic_info_t basic = (vm_region_basic_info_t)_basic; + int _basic64[VM_REGION_BASIC_INFO_COUNT_64]; + vm_region_basic_info_64_t basic64 = (vm_region_basic_info_64_t)_basic64; + mach_msg_type_number_t infocnt; + mach_port_t objname; + +#if !__LP64__ + infocnt = VM_REGION_BASIC_INFO_COUNT; + kret = vm_region(mach_task_self(), &addr, &size, VM_REGION_BASIC_INFO, + (vm_region_info_t)basic, &infocnt, &objname); + if (kret != KERN_SUCCESS) { + warnx("vm_region(VM_REGION_BASIC_INFO) failed: %d", kret); + goto fail; + } + if (VM_REGION_BASIC_INFO_COUNT != infocnt) { + warnx("vm_region(VM_REGION_BASIC_INFO) returned a bad info count"); + kret = -1; + goto fail; + } + + // when we did the vm_read_list above, it should have split this region into + // a 10 page sub-region + if (addr != (vm_address_t)regionbuffers[5] || size != 10*pagesize) { + warnx("vm_region(VM_REGION_BASIC_INFO) returned a bad region range"); + kret = -1; + goto fail; + } + + if (basic->protection != VM_PROT_READ) { + warnx("vm_region(VM_REGION_BASIC_INFO) returned a bad protection"); + kret = -1; + goto fail; + } +#endif + + infocnt = VM_REGION_BASIC_INFO_COUNT_64; + // intentionally use VM_REGION_BASIC_INFO and get up-converted + kret = vm_region_64(mach_task_self(), &addr, &size, VM_REGION_BASIC_INFO, + (vm_region_info_t)basic64, &infocnt, &objname); + if (kret != KERN_SUCCESS) { + warnx("vm_region_64(VM_REGION_BASIC_INFO) failed: %d", kret); + goto fail; + } + if (VM_REGION_BASIC_INFO_COUNT_64 != infocnt) { + warnx("vm_region_64(VM_REGION_BASIC_INFO) returned a bad info count"); + kret = -1; + goto fail; + } + + // when we did the vm_read_list above, it should have split this region into + // a 10 page sub-region + if (addr != (vm_address_t)regionbuffers[5] || size != 10*pagesize) { + warnx("vm_region_64(VM_REGION_BASIC_INFO) returned a bad region range"); + kret = -1; + goto fail; + } + + if (basic64->protection != VM_PROT_READ) { + warnx("vm_region_64(VM_REGION_BASIC_INFO) returned a bad protection"); + kret = -1; + goto fail; + } + +#if !__LP64__ + // try to compare some stuff. Particularly important for fields after offset + if (!g_is_under_rosetta) { + if (basic->offset != basic64->offset || + basic->behavior != basic64->behavior || + basic->user_wired_count != basic64->user_wired_count) { + warnx("vm_region and vm_region_64 did not agree"); + kret = -1; + goto fail; + } + } +#endif + } + +fail: + for (i=0; i < sizeof(regionsizes)/sizeof(regionsizes[0]); i++) { + if (regionbuffers[i]) { + vm_deallocate(mach_task_self(), (vm_address_t)regionbuffers[i], regionsizes[i]*pagesize); + } + } + + return kret; +} + diff --git a/tools/tests/xnu_quick_test/main.c b/tools/tests/xnu_quick_test/main.c index 3ef0137ae..0c116215c 100644 --- a/tools/tests/xnu_quick_test/main.c +++ b/tools/tests/xnu_quick_test/main.c @@ -41,6 +41,7 @@ #include #include #include +#include #include #include #include @@ -50,6 +51,9 @@ #include #include #include +#include +#include +#include #include "tests.h" #if !TARGET_OS_EMBEDDED @@ -69,18 +73,21 @@ struct test_entry g_tests[] = {1, &chdir_fchdir_test, NULL, "chdir, fchdir"}, {1, &access_chmod_fchmod_test, NULL, "access, chmod, fchmod"}, {1, &chown_fchown_lchown_lstat_symlink_test, NULL, "chown, fchown, lchown, lstat, readlink, symlink"}, - {1, &fs_stat_tests, NULL, "fstatfs, getattrlist, getfsstat, statfs, getfsstat64, statfs64, fstatfs64"}, + {1, &fs_stat_tests, NULL, "fstatfs, getfsstat, statfs, fstatfs64, getfsstat64, statfs64"}, +#if !TARGET_OS_EMBEDDED + {1, &statfs_32bit_inode_tests, NULL, "32-bit inode versions: fstatfs, getfsstat, statfs"}, +#endif {1, &getpid_getppid_pipe_test, NULL, "getpid, getppid, pipe"}, - {1, &uid_tests, NULL, "getauid, gettid, getuid, geteuid, issetugid, setauid, seteuid, settid, settid_with_pid, setuid"}, + {1, &uid_tests, NULL, "getauid, gettid, getuid, geteuid, issetugid, setaudit_addr, seteuid, settid, settid_with_pid, setuid"}, {1, &mkdir_rmdir_umask_test, NULL, "mkdir, rmdir, umask"}, {1, &mknod_sync_test, NULL, "mknod, sync"}, {1, &socket2_tests, NULL, "fsync, getsockopt, poll, select, setsockopt, socketpair"}, - {1, &socket_tests, NULL, "accept, bind, connect, getpeername, getsockname, listen, socket, recvmsg, sendmsg, sendto"}, + {1, &socket_tests, NULL, "accept, bind, connect, getpeername, getsockname, listen, socket, recvmsg, sendmsg, sendto, sendfile"}, {1, &chflags_fchflags_test, NULL, "chflags, fchflags"}, {1, &execve_kill_vfork_test, NULL, "kill, vfork, execve, posix_spawn"}, {1, &groups_test, NULL, "getegid, getgid, getgroups, setegid, setgid, setgroups"}, {1, &dup_test, NULL, "dup, dup2, getdtablesize"}, - {1, &getrusage_profil_test, NULL, "getrusage, profil"}, + {1, &getrusage_test, NULL, "getrusage"}, {1, &signals_test, NULL, "getitimer, setitimer, sigaction, sigpending, sigprocmask, sigsuspend, sigwait"}, {1, &acct_test, NULL, "acct"}, {1, &ioctl_test, NULL, "ioctl"}, @@ -96,7 +103,10 @@ struct test_entry g_tests[] = {1, &mkfifo_test, NULL, "mkfifo, read, write"}, {1, "actl_test, NULL, "quotactl"}, {1, &limit_tests, NULL, "getrlimit, setrlimit"}, - {1, &directory_tests, NULL, "getattrlist, getdirentries, getdirentriesattr, setattrlist"}, + {1, &directory_tests, NULL, "getattrlist, getdirentriesattr, setattrlist"}, +#if !TARGET_OS_EMBEDDED + {1, &getdirentries_test, NULL, "getdirentries"}, +#endif {1, &exchangedata_test, NULL, "exchangedata"}, {1, &searchfs_test, NULL, "searchfs"}, {1, &sema2_tests, NULL, "sem_close, sem_open, sem_post, sem_trywait, sem_unlink, sem_wait"}, @@ -107,6 +117,8 @@ struct test_entry g_tests[] = {1, &aio_tests, NULL, "aio_cancel, aio_error, aio_read, aio_return, aio_suspend, aio_write, fcntl, lio_listio"}, {1, &kqueue_tests, NULL, "kevent, kqueue"}, {1, &message_queue_tests, NULL, "msgctl, msgget, msgrcv, msgsnd"}, + {1, &data_exec_tests, NULL, "data/stack execution"}, + {1, &machvm_tests, NULL, "Mach VM calls"}, {0, NULL, NULL, "last one"} }; @@ -115,6 +127,8 @@ static void list_all_tests( void ); static void mark_tests_to_run( long my_start, long my_end ); static int parse_tests_to_run( int argc, const char * argv[], int * indexp ); static void usage( void ); +static int setgroups_if_single_user(void); +static const char *current_arch( void ); /* globals */ long g_max_failures = 0; @@ -123,6 +137,7 @@ int g_xilog_active = 0; const char * g_cmd_namep; char g_target_path[ PATH_MAX ]; int g_is_under_rosetta = 0; +int g_is_single_user = 0; int main( int argc, const char * argv[] ) { @@ -253,6 +268,11 @@ int main( int argc, const char * argv[] ) g_is_under_rosetta = val ? 0 : 1; } #endif + + /* Populate groups list if we're in single user mode */ + if (setgroups_if_single_user()) { + return 1; + } if ( list_the_tests != 0 ) { list_all_tests( ); @@ -274,7 +294,7 @@ int main( int argc, const char * argv[] ) * files and directories. */ create_target_directory( my_targetp ); - printf( "Will allow %d failures before testing is aborted \n", g_max_failures ); + printf( "Will allow %ld failures before testing is aborted \n", g_max_failures ); if (g_is_under_rosetta) { printf("Running under Rosetta.\n"); @@ -282,7 +302,8 @@ int main( int argc, const char * argv[] ) my_start_time = time( NULL ); printf( "\nBegin testing - %s \n", ctime_r( &my_start_time, &my_buffer[0] ) ); - + printf( "Current architecture is %s\n", current_arch() ); + /* run each test that is marked to run in our table until we complete all of them or * hit the maximum number of failures. */ @@ -382,7 +403,7 @@ static int parse_tests_to_run( int argc, const char * argv[], int * indexp ) } if ( strlen( my_ptr ) > (sizeof( my_buffer ) - 1) ) { - printf( "-run argument has too many test parameters (max of %d characters) \n", sizeof( my_buffer ) ); + printf( "-run argument has too many test parameters (max of %lu characters) \n", sizeof( my_buffer ) ); return -1; } /* get a local copy of the parameter string to work with - break range into two strings */ @@ -485,21 +506,6 @@ static void create_target_directory( const char * the_targetp ) } /* create_target_directory */ -static void list_all_tests( void ) -{ - int i, my_tests_count; - - my_tests_count = (sizeof( g_tests ) / sizeof( g_tests[0] )); - printf( "\nList of all tests this tool performs... \n" ); - - for ( i = 0; i < (my_tests_count - 1); i++ ) { - printf( " %d \t %s \n", (i + 1), g_tests[ i ].test_infop ); - } - - return; -} /* list_all_tests */ - - static void mark_tests_to_run( long my_start, long my_end ) { int my_tests_count, i; @@ -543,3 +549,88 @@ static void usage( void ) } /* usage */ +/* This is a private API between Libinfo, Libc, and the DirectoryService daemon. + * Since we are trying to determine if an external provider will back group + * lookups, we can use this, without relying on additional APIs or tools + * that might not work yet */ +extern int _ds_running(void); + +#define NUM_GROUPS 6 +static int +setgroups_if_single_user(void) +{ + int i, retval = -1; + struct group *grp; + gid_t gids[NUM_GROUPS]; + + if (!_ds_running()) { + printf("In single-user mode.\n"); + g_is_single_user = 1; + + /* We skip 'nobody' and 'anyone' */ + getgrent(); + getgrent(); + for (i = 0; i < NUM_GROUPS; i++) { + grp = getgrent(); + if (!grp) { + break; + } + + gids[i] = grp->gr_gid; + } + + endgrent(); + + /* Only succeed if we find at least NUM_GROUPS */ + if (i == NUM_GROUPS) { + retval = setgroups(NUM_GROUPS, gids); + if (retval == 0) { + getgroups(NUM_GROUPS, gids); + printf("After single-user hack, groups are: "); + for (i = 0; i < NUM_GROUPS; i++) { + printf("%d, ", gids[i]); + } + putchar('\n'); + } else { + printf("Setgroups failed.\n"); + } + } else { + printf("Couldn't get sufficient number of groups.\n"); + } + } else { + printf("Not in single user mode.\n"); + retval = 0; + } + + + return retval; +} + +static const char *current_arch( void ) +{ + cpu_type_t cputype = _mh_execute_header.cputype; + cpu_subtype_t cpusubtype = _mh_execute_header.cpusubtype; + + const NXArchInfo *arch = NXGetArchInfoFromCpuType(cputype, cpusubtype); + + if (arch) { + return arch->name; + } else { + return ""; + } +} + +#undef printf /* this makes the "-l" output easier to read */ +static void list_all_tests( void ) +{ + int i, my_tests_count; + + my_tests_count = (sizeof( g_tests ) / sizeof( g_tests[0] )); + printf( "\nList of all tests this tool performs... \n" ); + + for ( i = 0; i < (my_tests_count - 1); i++ ) { + printf( " %d \t %s \n", (i + 1), g_tests[ i ].test_infop ); + } + + return; +} /* list_all_tests */ diff --git a/tools/tests/xnu_quick_test/makefile b/tools/tests/xnu_quick_test/makefile index 6fc3a0ed2..65a770c3e 100644 --- a/tools/tests/xnu_quick_test/makefile +++ b/tools/tests/xnu_quick_test/makefile @@ -1,13 +1,18 @@ +SDKROOT ?= / Product=$(shell tconf --product) Embedded=$(shell tconf --test TARGET_OS_EMBEDDED) ifeq "$(Embedded)" "YES" XILogFLAG = +SDKPATH = $(shell xcodebuild -sdk $(SDKROOT) -version | grep Path | cut -f 2 -d " ") +CFLAGS += -isysroot $(SDKPATH) +LIBFLAGS += -isysroot $(SDKPATH) else XILogFLAG = -framework XILog endif -CC=gcc $(SYSROOT) +HOSTCC = gcc +CC = xcrun -sdk $(SDKROOT) gcc ifdef RC_BUILDIT DOING_BUILDIT=yes @@ -33,9 +38,9 @@ else endif ifndef ARCH - ARCH=i386 x86_64 ppc ppc64 + ARCH=i386 x86_64 ppc # this hack should be removed once tconf gets - # + # ifeq "$(Product)" "iPhone" ARCH=armv6 endif @@ -51,27 +56,34 @@ else CFLAGS += $(MY_ARCH) endif -CFLAGS += -g -I /System/Library/Frameworks/System.framework/Versions/B/PrivateHeaders/ -F/AppleInternal/Library/Frameworks/ $(MORECFLAGS) -LIBFLAGS = -I /System/Library/Frameworks/System.framework/Versions/B/PrivateHeaders -F/AppleInternal/Library/Frameworks/ $(XILogFLAG) - -#CFLAGS+= $(MY_ARCH) -g -D_POSIX_C_SOURCE=200112L +CFLAGS += -g -I $(SDKPATH)/System/Library/Frameworks/System.framework/Versions/B/PrivateHeaders/ -F/AppleInternal/Library/Frameworks/ $(MORECFLAGS) +LIBFLAGS += -I $(SDKPATH)/System/Library/Frameworks/System.framework/Versions/B/PrivateHeaders -F/AppleInternal/Library/Frameworks/ $(XILogFLAG) MY_OBJECTS = $(OBJROOT)/main.o $(OBJROOT)/memory_tests.o $(OBJROOT)/misc.o \ $(OBJROOT)/sema_tests.o $(OBJROOT)/shared_memory_tests.o \ - $(OBJROOT)/socket_tests.o $(OBJROOT)/tests.o $(OBJROOT)/xattr_tests.o + $(OBJROOT)/socket_tests.o $(OBJROOT)/tests.o \ + $(OBJROOT)/xattr_tests.o $(OBJROOT)/kqueue_tests.o \ + $(OBJROOT)/machvm_tests.o +ifneq "$(Product)" "iPhone" +MY_OBJECTS += $(OBJROOT)/32bit_inode_tests.o +endif +# In networked home directories, the chown will fail; we notice and print a helpful message +CHOWN_COMMAND=sudo chown root $(DSTROOT)/xnu_quick_test +PERM_ADVICE="\tYou'll have to set the executable's permissions yourself: chown to root and chmod to 4755. You may need to move to a local volume to do that." xnu_quick_test : $(OBJROOT) $(DSTROOT) $(MY_OBJECTS) helpers sudo rm -rf $(DSTROOT)/xnu_quick_test $(CC) $(MY_ARCH) $(LIBFLAGS) -o $(DSTROOT)/xnu_quick_test $(MY_OBJECTS) - sudo chown root $(DSTROOT)/xnu_quick_test + @echo $(CHOWN_COMMAND) # Hack so we don't echo help-message echo + @$(CHOWN_COMMAND) || echo $(PERM_ADVICE) sudo chmod 4755 $(DSTROOT)/xnu_quick_test # The helper binaries are used to test exec()'ing between 64bit and 32bit. # Creates test binaries with page zero sizes = 4KB and 4GB. Also creates 32-bit # helper processes for the 64-bit version of xnu_quick_test to test the conversion # from a 32-bit process to a 64-bit process. -helpers : helpers/sleep.c helpers/launch.c helpers/arch.c helperdir $(OBJROOT)/misc.o +helpers : helpers/sleep.c helpers/launch.c helpers/arch.c helpers/data_exec.c helperdir $(OBJROOT)/misc.o ifneq "$(Product)" "iPhone" $(CC) -arch i386 helpers/sleep.c -o $(DSTROOT)/helpers/sleep-i386 endif @@ -79,21 +91,20 @@ ifeq "$(Product)" "MacOSX" $(CC) -arch x86_64 -pagezero_size 0x100000000 helpers/sleep.c -o $(DSTROOT)/helpers/sleep-x86_64-4G $(CC) -arch x86_64 -pagezero_size 0x1000 helpers/sleep.c -o $(DSTROOT)/helpers/sleep-x86_64-4K $(CC) -arch ppc helpers/sleep.c -o $(DSTROOT)/helpers/sleep-ppc32 - $(CC) -arch ppc64 -pagezero_size 0x100000000 helpers/sleep.c -o $(DSTROOT)/helpers/sleep-ppc64-4G - $(CC) -arch ppc64 -pagezero_size 0x1000 helpers/sleep.c -o $(DSTROOT)/helpers/sleep-ppc64-4K endif ifneq "$(Product)" "iPhone" $(CC) $(LIBFLAGS) -arch i386 $(OBJROOT)/misc.o helpers/launch.c -o $(DSTROOT)/helpers/launch-i386 endif -ifeq "$(Product)" "MacOS" +ifeq "$(Product)" "MacOSX" $(CC) $(LIBFLAGS) -arch x86_64 $(OBJROOT)/misc.o helpers/launch.c -o $(DSTROOT)/helpers/launch-x86_64 $(CC) $(LIBFLAGS) -arch ppc $(OBJROOT)/misc.o helpers/launch.c -o $(DSTROOT)/helpers/launch-ppc - $(CC) $(LIBFLAGS) -arch ppc64 $(OBJROOT)/misc.o helpers/launch.c -o $(DSTROOT)/helpers/launch-ppc64 $(CC) $(MY_ARCH) helpers/arch.c -o $(DSTROOT)/helpers/arch + $(CC) $(MY_ARCH) helpers/data_exec.c -o $(DSTROOT)/helpers/data_exec + endif ifeq "$(Product)" "iPhone" - $(CC) -arch armv6 helpers/sleep.c -o $(DSTROOT)/helpers/sleep-arm - $(CC) $(LIBFLAGS) -arch armv6 $(OBJROOT)/misc.o helpers/launch.c -o $(DSTROOT)/helpers/launch-arm + $(CC) -arch armv6 -isysroot $(SDKROOT) $(CFLAGS) helpers/sleep.c -o $(DSTROOT)/helpers/sleep-arm + $(CC) $(LIBFLAGS) -arch armv6 -isysroot $(SDKROOT) $(OBJROOT)/misc.o helpers/launch.c -o $(DSTROOT)/helpers/launch-arm endif @@ -106,20 +117,18 @@ $(OBJROOT) : $(DSTROOT) : mkdir -p $(DSTROOT); -INCLUDES = /Developer/SDKs/Purple/System/Library/Frameworks/System.framework/Versions/B/PrivateHeaders/ - $(OBJROOT)/main.o : main.c tests.h $(CC) $(CFLAGS) -c main.c -o $@ $(OBJROOT)/memory_tests.o : memory_tests.c tests.h $(CC) $(CFLAGS) -c memory_tests.c -o $@ -# misc.o has to be built 4-way for the helpers to link +# misc.o has to be built 3-way for the helpers to link $(OBJROOT)/misc.o : misc.c tests.h ifeq "$(Product)" "iPhone" $(CC) -arch armv6 $(CFLAGS) -c misc.c -o $@ else - $(CC) -arch i386 -arch x86_64 -arch ppc -arch ppc64 $(CFLAGS) -c misc.c -o $@ + $(CC) -arch i386 -arch x86_64 -arch ppc $(CFLAGS) -c misc.c -o $@ endif $(OBJROOT)/sema_tests.o : sema_tests.c tests.h @@ -137,11 +146,19 @@ $(OBJROOT)/tests.o : tests.c tests.h $(OBJROOT)/xattr_tests.o : xattr_tests.c tests.h $(CC) $(CFLAGS) -c xattr_tests.c -o $@ +$(OBJROOT)/machvm_tests.o : machvm_tests.c tests.h + $(CC) $(CFLAGS) -c machvm_tests.c -o $@ + +$(OBJROOT)/kqueue_tests.o : kqueue_tests.c tests.h + $(CC) $(CFLAGS) -c kqueue_tests.c -o $@ + +$(OBJROOT)/32bit_inode_tests.o : 32bit_inode_tests.c tests.h + $(CC) $(CFLAGS) -c 32bit_inode_tests.c -o $@ ifndef DOING_BUILDIT .PHONY : clean clean : - sudo rm -f $(DSTROOT)/xnu_quick_test - sudo rm -f $(DSTROOT)/helpers/* - rm -f $(OBJROOT)/*.o + sudo rm -Rf $(DSTROOT)/xnu_quick_test + sudo rm -Rf $(DSTROOT)/helpers/* + rm -Rf $(OBJROOT)/*.o endif diff --git a/tools/tests/xnu_quick_test/memory_tests.c b/tools/tests/xnu_quick_test/memory_tests.c index 952be5ab9..eb8817b9a 100644 --- a/tools/tests/xnu_quick_test/memory_tests.c +++ b/tools/tests/xnu_quick_test/memory_tests.c @@ -8,6 +8,7 @@ */ #include "tests.h" +#include extern char g_target_path[ PATH_MAX ]; @@ -27,12 +28,14 @@ int memory_tests( void * the_argp ) char * my_test_page_p = NULL; ssize_t my_result; pid_t my_pid, my_wait_pid; + kern_return_t my_kr; - my_pathp = (char *) malloc( PATH_MAX ); - if ( my_pathp == NULL ) { - printf( "malloc failed with error %d - \"%s\" \n", errno, strerror( errno) ); - goto test_failed_exit; - } + my_kr = vm_allocate((vm_map_t) mach_task_self(), (vm_address_t*)&my_pathp, PATH_MAX, VM_FLAGS_ANYWHERE); + if(my_kr != KERN_SUCCESS){ + printf( "vm_allocate failed with error %d - \"%s\" \n", errno, strerror( errno) ); + goto test_failed_exit; + } + *my_pathp = 0x00; strcat( my_pathp, &g_target_path[0] ); strcat( my_pathp, "/" ); @@ -44,11 +47,12 @@ int memory_tests( void * the_argp ) } my_page_size = getpagesize( ); - my_test_page_p = (char *) malloc( my_page_size ); - if ( my_test_page_p == NULL ) { - printf( "malloc failed with error %d - \"%s\" \n", errno, strerror( errno) ); - goto test_failed_exit; - } + my_kr = vm_allocate((vm_map_t) mach_task_self(), (vm_address_t*)&my_test_page_p, my_page_size, VM_FLAGS_ANYWHERE); + if(my_kr != KERN_SUCCESS){ + printf( "vm_allocate failed with error %d - \"%s\" \n", errno, strerror( errno) ); + goto test_failed_exit; + } + *my_test_page_p = 0x00; strcat( my_test_page_p, "parent data" ); @@ -76,9 +80,17 @@ int memory_tests( void * the_argp ) strcat( my_test_page_p, " child data" ); /* create a test file in page size chunks */ - my_bufp = (char *) malloc( (my_page_size * 10) ); - if ( my_bufp == NULL ) { - printf( "malloc failed with error %d - \"%s\" \n", errno, strerror( errno) ); + my_kr = vm_allocate((vm_map_t) mach_task_self(), (vm_address_t*)&my_bufp, (my_page_size * 10), VM_FLAGS_ANYWHERE); + if(my_kr != KERN_SUCCESS){ + printf( "vm_allocate failed with error %d - \"%s\" \n", errno, strerror( errno) ); + my_err = -1; + goto exit_child; + } + + /* test madvise on anonymous memory */ + my_err = madvise(my_bufp, (my_page_size * 10), MADV_WILLNEED); + if ( my_err == -1 ) { + printf("madvise WILLNEED on anon memory failed with error %d - \"%s\" \n", errno, strerror( errno ) ); my_err = -1; goto exit_child; } @@ -91,6 +103,14 @@ int memory_tests( void * the_argp ) goto exit_child; } + /* test madvise on anonymous memory */ + my_err = madvise(my_bufp, (my_page_size * 10), MADV_DONTNEED); + if ( my_err == -1 ) { + printf("madvise DONTNEED on anon memory failed with error %d - \"%s\" \n", errno, strerror( errno ) ); + my_err = -1; + goto exit_child; + } + my_result = write( my_fd, my_bufp, (my_page_size * 10) ); if ( my_result == -1 ) { printf( "write call failed with error %d - \"%s\" \n", errno, strerror( errno) ); @@ -116,7 +136,14 @@ int memory_tests( void * the_argp ) /* test madvise */ my_err = madvise( my_addr, (my_page_size * 2), MADV_WILLNEED ); if ( my_err == -1 ) { - printf( "madvise call failed with error %d - \"%s\" \n", errno, strerror( errno) ); + printf( "madvise WILLNEED call failed with error %d - \"%s\" \n", errno, strerror( errno) ); + my_err = -1; + goto exit_child; + } + + my_err = madvise( my_addr, (my_page_size * 2), MADV_DONTNEED ); + if ( my_err == -1 ) { + printf( "madvise DONTNEED call failed with error %d - \"%s\" \n", errno, strerror( errno) ); my_err = -1; goto exit_child; } @@ -129,6 +156,14 @@ int memory_tests( void * the_argp ) goto exit_child; } + /* mybufp is about to be reused, so test madvise on anonymous memory */ + my_err = madvise(my_bufp, (my_page_size * 10), MADV_FREE); + if ( my_err == -1 ) { + printf("madvise FREE on anon memory failed with error %d - \"%s\" \n", errno, strerror( errno ) ); + my_err = -1; + goto exit_child; + } + my_err = mincore( my_addr, 1, my_bufp ); if ( my_err == -1 ) { printf( "mincore call failed with error %d - \"%s\" \n", errno, strerror( errno) ); @@ -158,6 +193,22 @@ int memory_tests( void * the_argp ) goto exit_child; } + /* test madvise */ + my_err = madvise( my_addr, (my_page_size * 2), MADV_DONTNEED ); + if ( my_err == -1 ) { + printf( "madvise DONTNEED call failed with error %d - \"%s\" \n", errno, strerror( errno) ); + my_err = -1; + goto exit_child; + } + + /* test madvise */ + my_err = madvise( my_addr, (my_page_size * 2), MADV_FREE ); + if ( my_err == -1 ) { + printf( "madvise FREE call failed with error %d - \"%s\" \n", errno, strerror( errno) ); + my_err = -1; + goto exit_child; + } + /* verify that the file was updated */ lseek( my_fd, 0, SEEK_SET ); bzero( (void *)my_bufp, my_page_size ); @@ -245,10 +296,10 @@ int memory_tests( void * the_argp ) test_passed_exit: if ( my_pathp != NULL ) { remove( my_pathp ); - free( my_pathp ); + vm_deallocate(mach_task_self(), (vm_address_t)my_pathp, PATH_MAX); } if ( my_test_page_p != NULL ) { - free( my_test_page_p ); + vm_deallocate(mach_task_self(), (vm_address_t)my_test_page_p, my_page_size); } return( my_err ); } diff --git a/tools/tests/xnu_quick_test/misc.c b/tools/tests/xnu_quick_test/misc.c index fc962b22f..1fcc298f1 100644 --- a/tools/tests/xnu_quick_test/misc.c +++ b/tools/tests/xnu_quick_test/misc.c @@ -1,5 +1,6 @@ #include "tests.h" +#include /* * create_random_name - creates a file with a random / unique name in the given directory. @@ -83,13 +84,15 @@ int create_file_with_name( char *the_target_dirp, char *the_namep, int remove_ex int my_fd = -1; char * my_pathp = NULL; struct stat my_sb; - + kern_return_t my_kr; + create_test_file = 0; - my_pathp = (char *) malloc( PATH_MAX ); - if ( my_pathp == NULL ) { - printf( "malloc failed with error %d - \"%s\" \n", errno, strerror( errno) ); - goto failure_exit; - } + my_kr = vm_allocate((vm_map_t) mach_task_self(), (vm_address_t*)&my_pathp, PATH_MAX, VM_FLAGS_ANYWHERE); + if(my_kr != KERN_SUCCESS){ + printf( "vm_allocate failed with error %d - \"%s\" \n", errno, strerror( errno) ); + goto failure_exit; + } + strcpy( my_pathp, the_target_dirp ); strcat( my_pathp, the_namep ); @@ -147,7 +150,7 @@ int create_file_with_name( char *the_target_dirp, char *the_namep, int remove_ex if ( my_result == -1 && create_test_file ) { remove( my_pathp ); } - free( my_pathp ); + vm_deallocate(mach_task_self(), (vm_address_t)my_pathp, PATH_MAX); } return( my_result ); @@ -220,7 +223,12 @@ int do_execve_test(char * path, char * argv[], void * envp, int killwait) goto test_failed_exit; } - if ( WIFSIGNALED( my_status ) && WTERMSIG( my_status ) != SIGKILL ) { + if (!(WIFSIGNALED( my_status ))) { + printf( "child process was not signaled and should have been\n", my_status ); + goto test_failed_exit; + } + + if (WTERMSIG( my_status ) != SIGKILL) { printf( "wait4 returned wrong signal status - 0x%02X \n", my_status ); goto test_failed_exit; } @@ -367,3 +375,43 @@ int get_bits() return rval; } +/* + * printf with a date and time stamp so that we can correlate printf's + * with the log files of a system in case of test failure. + * + * NB: MY_PRINTF_DATE_FMT chosen to look like syslog to aid "grep". + */ +#define MY_PRINTF_DATE_FMT "%b %e %T" +#undef printf /* was my_printf */ +int +my_printf(const char * __restrict fmt, ...) +{ + char *bufp; + char datebuf[256]; + struct tm *timeptr; + time_t result; + int rv; + va_list ap; + + /* Get the timestamp for this printf */ + result = time(NULL); + timeptr = localtime(&result); + strftime(datebuf, sizeof(datebuf), MY_PRINTF_DATE_FMT, timeptr); + + /* do the printf of the requested data to a local buffer */ + va_start(ap, fmt); + rv = vasprintf(&bufp, fmt, ap); + va_end(ap); + + /* + * if we successfully got a local buffer, then we want to + * print a timestamp plus what we would have printed before, + * then free the allocated memory. + */ + if (rv != -1) { + rv = printf("%s %s", datebuf, bufp); + free(bufp); + } + + return(rv); +} diff --git a/tools/tests/xnu_quick_test/sema_tests.c b/tools/tests/xnu_quick_test/sema_tests.c index 279d42744..bbb84439d 100644 --- a/tools/tests/xnu_quick_test/sema_tests.c +++ b/tools/tests/xnu_quick_test/sema_tests.c @@ -21,6 +21,7 @@ int sema_tests( void * the_argp ) int my_err, i; int my_sem_id = -1; union semun my_sem_union; + struct sembuf my_sembuf; srand( (unsigned int)getpid() ); my_sem_id = semget( (key_t)1234, 1, (0666 | IPC_CREAT) ); @@ -29,13 +30,6 @@ int sema_tests( void * the_argp ) goto test_failed_exit; } -#if 1 // todo - remove this once 4149385 is fixed - /* workaround for bug in the xnu implementation of semctl */ - if ( sizeof( long ) == 8 ) { - my_sem_union.array = (void *)1; - } - else -#endif my_sem_union.val = 1; my_err = semctl( my_sem_id, 0, SETVAL, my_sem_union ); if ( my_sem_id == -1 ) { @@ -43,9 +37,15 @@ int sema_tests( void * the_argp ) goto test_failed_exit; } - for ( i = 0; i < 10000; i++ ) { - struct sembuf my_sembuf; + /* verify semop failure for bad nsop values */ + my_err = semop( my_sem_id, &my_sembuf, 10000); + if (my_err != -1 || errno != E2BIG) { + printf( "semop did not fail with E2BIG - instead %d - \"%s\" \n", errno, strerror( errno) ); + goto test_failed_exit; + } + + for ( i = 0; i < 10000; i++ ) { my_sembuf.sem_num = 0; my_sembuf.sem_op = -1; my_sembuf.sem_flg = SEM_UNDO; @@ -56,7 +56,7 @@ int sema_tests( void * the_argp ) goto test_failed_exit; } - my_err = semctl( my_sem_id, 0, GETVAL, 0 ); + my_err = semctl( my_sem_id, 0, GETVAL, 0 ); if ( my_err == -1 ) { printf( "semctl failed with error %d - \"%s\" \n", errno, strerror( errno) ); goto test_failed_exit; diff --git a/tools/tests/xnu_quick_test/shared_memory_tests.c b/tools/tests/xnu_quick_test/shared_memory_tests.c index b7c197800..ad0ddcd6f 100644 --- a/tools/tests/xnu_quick_test/shared_memory_tests.c +++ b/tools/tests/xnu_quick_test/shared_memory_tests.c @@ -62,6 +62,13 @@ int shm_tests( void * the_argp ) printf( "shmdt failed with error %d - \"%s\" \n", errno, strerror( errno) ); goto test_failed_exit; } + + my_err = shmctl( my_shm_id, IPC_RMID, NULL ); + if ( my_err == -1 ) { + printf("shmctl failed to delete memory segment.\n"); + goto test_failed_exit; + } + my_shm_addr = NULL; my_err = 0; @@ -73,6 +80,7 @@ int shm_tests( void * the_argp ) test_passed_exit: if ( my_shm_addr != NULL ) { shmdt( my_shm_addr ); + shmctl( my_shm_id, IPC_RMID, NULL); } return( my_err ); #else diff --git a/tools/tests/xnu_quick_test/socket_tests.c b/tools/tests/xnu_quick_test/socket_tests.c index b76652b4d..c80172e19 100644 --- a/tools/tests/xnu_quick_test/socket_tests.c +++ b/tools/tests/xnu_quick_test/socket_tests.c @@ -9,8 +9,10 @@ #include "tests.h" #include +#include extern char g_target_path[ PATH_MAX ]; +extern int g_is_under_rosetta; /* ************************************************************************************************************** * Test accept, bind, connect, listen, socket, recvmsg, sendmsg, recvfrom, sendto, getpeername, getsockname @@ -33,17 +35,19 @@ int socket_tests( void * the_argp ) char my_parent_socket_name[sizeof(struct sockaddr) + 64]; char my_child_socket_name[sizeof(struct sockaddr) + 64]; char my_accept_buffer[sizeof(struct sockaddr) + 64]; + kern_return_t my_kr; /* generate 2 names for binding to the sockets (one socket in the parent and one in the child) */ - my_parent_pathp = (char *) malloc( 128 ); - if ( my_parent_pathp == NULL ) { - printf( "malloc failed with error %d - \"%s\" \n", errno, strerror( errno) ); - goto test_failed_exit; - } - my_child_pathp = (char *) malloc( 128 ); - if ( my_child_pathp == NULL ) { - printf( "malloc failed with error %d - \"%s\" \n", errno, strerror( errno) ); - goto test_failed_exit; + my_kr = vm_allocate((vm_map_t) mach_task_self(), (vm_address_t*)&my_parent_pathp, 128, VM_FLAGS_ANYWHERE); + if(my_kr != KERN_SUCCESS){ + printf( "vm_allocate failed with error %d - \"%s\" \n", errno, strerror( errno) ); + goto test_failed_exit; + } + + my_kr = vm_allocate((vm_map_t) mach_task_self(), (vm_address_t*)&my_child_pathp, 128, VM_FLAGS_ANYWHERE); + if(my_kr != KERN_SUCCESS){ + printf( "vm_allocate failed with error %d - \"%s\" \n", errno, strerror( errno) ); + goto test_failed_exit; } *my_parent_pathp = 0x00; @@ -121,7 +125,7 @@ int socket_tests( void * the_argp ) */ int my_child_fd = -1; struct msghdr my_msghdr; - struct iovec my_iov; + struct iovec my_iov[4]; char my_buffer[128]; my_child_fd = socket( AF_UNIX, SOCK_STREAM, 0 ); @@ -170,13 +174,13 @@ int socket_tests( void * the_argp ) } my_buffer[0] = 'j'; - my_iov.iov_base = &my_buffer[0]; - my_iov.iov_len = 1; + my_iov[0].iov_base = &my_buffer[0]; + my_iov[0].iov_len = 1; my_sockaddr = (struct sockaddr *) &my_parent_socket_name[0]; my_msghdr.msg_name = my_sockaddr; my_msghdr.msg_namelen = my_sockaddr->sa_len; - my_msghdr.msg_iov = &my_iov; + my_msghdr.msg_iov = &my_iov[0]; my_msghdr.msg_iovlen = 1; my_msghdr.msg_control = NULL; my_msghdr.msg_controllen = 0; @@ -207,6 +211,55 @@ int socket_tests( void * the_argp ) } #endif +#if 1 + /* sendfile test. Open libsystem, set up some headers, and send it */ + if (!g_is_under_rosetta) { + struct sf_hdtr my_sf_hdtr; + int my_libsys_fd; + off_t my_libsys_len; + + my_libsys_fd = open("/usr/lib/libSystem.dylib", O_RDONLY, 0644); + if (my_libsys_fd < 0) { + printf( "test failed - could not open /usr/lib/libSystem.dylib\n" ); + close ( my_child_fd ); + exit ( -1 ); + } + + my_libsys_len = 7+2; /* 2 bytes of header */ + my_buffer[0] = 's'; + my_iov[0].iov_base = &my_buffer[0]; + my_iov[0].iov_len = 1; + my_buffer[1] = 'e'; + my_iov[1].iov_base = &my_buffer[1]; + my_iov[1].iov_len = 1; + my_buffer[2] = 'n'; + my_iov[2].iov_base = &my_buffer[2]; + my_iov[2].iov_len = 1; + my_buffer[3] = 'd'; + my_iov[3].iov_base = &my_buffer[3]; + my_iov[3].iov_len = 1; + + my_sf_hdtr.headers = &my_iov[0]; + my_sf_hdtr.hdr_cnt = 2; + my_sf_hdtr.trailers = &my_iov[2]; + my_sf_hdtr.trl_cnt = 2; + + my_result = sendfile(my_libsys_fd, my_child_fd, 3, &my_libsys_len, &my_sf_hdtr, 0); + if (my_result < 0 || my_libsys_len != 11) { + printf( "sendfile failed with error %d - \"%s\" \n", errno, strerror( errno) ); + close( my_child_fd ); + exit( -1 ); + } + + my_result = close ( my_libsys_fd ); + if ( my_libsys_fd < 0 ) { + printf ( "close failed with error %d - \"%s\" \n", errno, strerror( errno) ); + close ( my_child_fd ); + exit ( -1 ); + } + } +#endif + /* tell parent we're done */ my_result = write( my_child_fd, "all done", 8 ); if ( my_result == -1 ) { @@ -278,7 +331,37 @@ int socket_tests( void * the_argp ) } #endif - /* see if child is done */ +#if 1 + if (!g_is_under_rosetta) { + size_t neededBytes = 11; + + /* Check for sendfile output */ + bzero( (void *)&my_parent_buffer[0], sizeof(my_parent_buffer) ); + while (neededBytes > 0) { + my_result = read( my_accepted_socket, &my_parent_buffer[11-neededBytes], neededBytes ); + if ( my_result == -1 ) { + printf( "read call failed with error %d - \"%s\" \n", errno, strerror( errno) ); + goto test_failed_exit; + } else if (my_result == 0) { + break; + } + neededBytes -= my_result; + } + + if ( neededBytes > 0 ) { + printf( "read call returned %ld bytes instead of 11\n", 11 - neededBytes ); + goto test_failed_exit; + } + + if ( ! (my_parent_buffer[0] == 's' && my_parent_buffer[1] == 'e' && my_parent_buffer[9] == 'n' && my_parent_buffer[10] == 'd') ) { + printf( "read wrong sendfile message from child \n" ); + goto test_failed_exit; + } + } + +#endif + + /* see if child is done. bzero so that string is NUL terminated */ bzero( (void *)&my_parent_buffer[0], sizeof(my_parent_buffer) ); my_result = read( my_accepted_socket, &my_parent_buffer[0], sizeof(my_parent_buffer) ); if ( my_result == -1 ) { @@ -315,11 +398,11 @@ int socket_tests( void * the_argp ) close( my_accepted_socket ); if ( my_parent_pathp != NULL ) { remove( my_parent_pathp ); - free( my_parent_pathp ); + vm_deallocate(mach_task_self(), (vm_address_t)my_parent_pathp, 128); } if ( my_child_pathp != NULL ) { remove( my_child_pathp ); - free( my_child_pathp ); + vm_deallocate(mach_task_self(), (vm_address_t)my_child_pathp, 128); } return( my_err ); } diff --git a/tools/tests/xnu_quick_test/tests.c b/tools/tests/xnu_quick_test/tests.c index 19d6af77f..a916b6a38 100644 --- a/tools/tests/xnu_quick_test/tests.c +++ b/tools/tests/xnu_quick_test/tests.c @@ -3,25 +3,27 @@ * xnu_quick_test * * Created by Jerry Cottingham on 3/25/05. - * Copyright 2005 Apple Computer Inc. All rights reserved. + * Copyright 2008 Apple Inc. All rights reserved. * */ #include "tests.h" -#include /* for kqueue tests */ #include /* for message queue tests */ #include /* for message queue tests */ #include /* for get / settid */ #include /* for determining hw */ #include /* for determination of Mac OS X version (tiger, leopard, etc.) */ #include /* for OSSwap32() */ - +#include extern char g_target_path[ PATH_MAX ]; extern int g_skip_setuid_tests; extern int g_is_under_rosetta; +extern int g_is_single_user; + + +void print_acct_debug_strings( char * my_ac_comm ); - #if TEST_SYSTEM_CALLS /* system calls to do */ "reboot", /* 55 = reboot */ @@ -68,13 +70,14 @@ int syscall_test( void * the_argp ) { int my_err; int my_fd = -1; - char * my_pathp; + char * my_pathp; + kern_return_t my_kr; - my_pathp = (char *) malloc( PATH_MAX ); - if ( my_pathp == NULL ) { - printf( "malloc failed with error %d - \"%s\" \n", errno, strerror( errno) ); - goto test_failed_exit; - } + my_kr = vm_allocate((vm_map_t) mach_task_self(), (vm_address_t*)&my_pathp, PATH_MAX, VM_FLAGS_ANYWHERE); + if(my_kr != KERN_SUCCESS){ + printf( "vm_allocate failed with error %d - \"%s\" \n", errno, strerror( errno) ); + goto test_failed_exit; + } *my_pathp = 0x00; strcpy( my_pathp, &g_target_path[0] ); @@ -107,7 +110,7 @@ int syscall_test( void * the_argp ) close( my_fd ); if ( my_pathp != NULL ) { remove( my_pathp ); - free( my_pathp ); + vm_deallocate(mach_task_self(), (vm_address_t)my_pathp, PATH_MAX); } return( my_err ); } @@ -179,26 +182,27 @@ int fork_wait4_exit_test( void * the_argp ) */ int read_write_test( void * the_argp ) { - int my_fd = -1; - int my_err; + int my_fd = -1; + int my_err; char * my_pathp = NULL; char * my_bufp = NULL; ssize_t my_result; off_t my_current_offset; - struct iovec my_iovs[2]; + struct iovec my_iovs[2]; struct stat my_sb; + kern_return_t my_kr; - my_pathp = (char *) malloc( PATH_MAX ); - if ( my_pathp == NULL ) { - printf( "malloc failed with error %d - \"%s\" \n", errno, strerror( errno) ); - goto test_failed_exit; - } + my_kr = vm_allocate((vm_map_t) mach_task_self(), (vm_address_t*)&my_pathp, PATH_MAX, VM_FLAGS_ANYWHERE); + if(my_kr != KERN_SUCCESS){ + printf( "vm_allocate failed with error %d - \"%s\" \n", errno, strerror( errno) ); + goto test_failed_exit; + } - my_bufp = (char *) malloc( MY_BUFFER_SIZE ); - if ( my_bufp == NULL ) { - printf( "malloc failed with error %d - \"%s\" \n", errno, strerror( errno) ); - goto test_failed_exit; - } + my_kr = vm_allocate((vm_map_t) mach_task_self(), (vm_address_t*)&my_bufp, MY_BUFFER_SIZE, VM_FLAGS_ANYWHERE); + if(my_kr != KERN_SUCCESS){ + printf( "vm_allocate failed with error %d - \"%s\" \n", errno, strerror( errno) ); + goto test_failed_exit; + } *my_pathp = 0x00; strcat( my_pathp, &g_target_path[0] ); @@ -225,10 +229,10 @@ int read_write_test( void * the_argp ) } if ( my_result != 0 ) { if ( sizeof( ssize_t ) > sizeof( int ) ) { - printf( "read call failed - should have read 0 bytes on empty file - read %lld \n", my_result ); + printf( "read call failed - should have read 0 bytes on empty file - read %ld \n", (long int) my_result ); } else { - printf( "read call failed - should have read 0 bytes on empty file - read %d \n", my_result ); + printf( "read call failed - should have read 0 bytes on empty file - read %d \n", (int) my_result ); } goto test_failed_exit; } @@ -238,10 +242,10 @@ int read_write_test( void * the_argp ) my_err = errno; if ( my_result != -1 ) { if ( sizeof( ssize_t ) > sizeof( int ) ) { - printf( "write should have failed for read only fd - %lld \n", my_result ); + printf( "write should have failed for read only fd - %ld \n", (long int) my_result ); } else { - printf( "write should have failed for read only fd - %d \n", my_result ); + printf( "write should have failed for read only fd - %d \n", (int) my_result ); } goto test_failed_exit; } @@ -292,7 +296,7 @@ int read_write_test( void * the_argp ) goto test_failed_exit; } if ( my_result != 32 ) { - printf( "readv failed to get all the data - asked for %d got back %d\n", MY_BUFFER_SIZE, my_result ); + printf( "readv failed to get all the data - asked for %d got back %d\n", MY_BUFFER_SIZE, (int) my_result ); goto test_failed_exit; } if ( *my_bufp != 'j' || *(my_bufp + (MY_BUFFER_SIZE - 1)) != 'j' ) { @@ -330,7 +334,7 @@ int read_write_test( void * the_argp ) goto test_failed_exit; } if ( my_result != 16 ) { - printf( "writev failed to get all the data - asked for %d got back %d\n", MY_BUFFER_SIZE, my_result ); + printf( "writev failed to get all the data - asked for %d got back %d\n", MY_BUFFER_SIZE, (int) my_result ); goto test_failed_exit; } @@ -348,7 +352,7 @@ int read_write_test( void * the_argp ) goto test_failed_exit; } if ( my_result != 16 ) { - printf( "readv failed to get all the data - asked for %d got back %d\n", MY_BUFFER_SIZE, my_result ); + printf( "readv failed to get all the data - asked for %d got back %d\n", MY_BUFFER_SIZE, (int) my_result ); goto test_failed_exit; } if ( *my_bufp != 'z' || *(my_bufp + (MY_BUFFER_SIZE - 1)) != 'z' ) { @@ -429,11 +433,11 @@ int read_write_test( void * the_argp ) if ( my_fd != -1 ) close( my_fd ); if ( my_pathp != NULL ) { - remove( my_pathp ); - free( my_pathp ); + remove( my_pathp ); + vm_deallocate(mach_task_self(), (vm_address_t)my_pathp, PATH_MAX); } if ( my_bufp != NULL ) - free( my_bufp ); + vm_deallocate(mach_task_self(), (vm_address_t)my_bufp, MY_BUFFER_SIZE); return( my_err ); } @@ -443,19 +447,20 @@ int read_write_test( void * the_argp ) */ int open_close_test( void * the_argp ) { - int my_err; - int my_fd = -1; + int my_err; + int my_fd = -1; char * my_pathp = NULL; ssize_t my_result; long my_pconf_result; struct stat my_sb; char my_buffer[32]; + kern_return_t my_kr; - my_pathp = (char *) malloc( PATH_MAX ); - if ( my_pathp == NULL ) { - printf( "malloc failed with error %d - \"%s\" \n", errno, strerror( errno) ); - goto test_failed_exit; - } + my_kr = vm_allocate((vm_map_t) mach_task_self(), (vm_address_t*)&my_pathp, PATH_MAX, VM_FLAGS_ANYWHERE); + if(my_kr != KERN_SUCCESS){ + printf( "vm_allocate failed with error %d - \"%s\" \n", errno, strerror( errno) ); + goto test_failed_exit; + } *my_pathp = 0x00; strcat( my_pathp, &g_target_path[0] ); @@ -484,7 +489,7 @@ int open_close_test( void * the_argp ) // printf( "_PC_PATH_MAX %ld \n", my_pconf_result ); /* results look OK? */ if ( my_pconf_result < PATH_MAX ) { - printf( "pathconf - _PC_PATH_MAX - looks like wrong resutls \n" ); + printf( "pathconf - _PC_PATH_MAX - looks like wrong results \n" ); goto test_failed_exit; } @@ -496,7 +501,7 @@ int open_close_test( void * the_argp ) // printf( "_PC_NAME_MAX %ld \n", my_pconf_result ); /* results look OK? */ if ( my_pconf_result < 6 ) { - printf( "fpathconf - _PC_NAME_MAX - looks like wrong resutls \n" ); + printf( "fpathconf - _PC_NAME_MAX - looks like wrong results \n" ); goto test_failed_exit; } @@ -505,10 +510,10 @@ int open_close_test( void * the_argp ) my_err = errno; if ( my_result != 3 ) { if ( sizeof( ssize_t ) > sizeof( int ) ) { - printf( "write failed. should have written 3 bytes actually wrote - %lld \n", my_result ); + printf( "write failed. should have written 3 bytes actually wrote - %ld \n", (long int) my_result ); } else { - printf( "write failed. should have written 3 bytes actually wrote - %ld \n", my_result ); + printf( "write failed. should have written 3 bytes actually wrote - %d \n", (int) my_result ); } goto test_failed_exit; } @@ -549,10 +554,10 @@ int open_close_test( void * the_argp ) my_err = errno; if ( my_result != 3 ) { if ( sizeof( ssize_t ) > sizeof( int ) ) { - printf( "write failed. should have written 3 bytes actually wrote - %lld \n", my_result ); + printf( "write failed. should have written 3 bytes actually wrote - %ld \n", (long int) my_result ); } else { - printf( "write failed. should have written 3 bytes actually wrote - %ld \n", my_result ); + printf( "write failed. should have written 3 bytes actually wrote - %d \n", (int) my_result ); } goto test_failed_exit; } @@ -563,10 +568,10 @@ int open_close_test( void * the_argp ) my_err = errno; if ( my_result != 3 ) { if ( sizeof( ssize_t ) > sizeof( int ) ) { - printf( "write failed. should have written 3 bytes actually wrote - %lld \n", my_result ); + printf( "write failed. should have written 3 bytes actually wrote - %ld \n", (long int) my_result ); } else { - printf( "write failed. should have written 3 bytes actually wrote - %ld \n", my_result ); + printf( "write failed. should have written 3 bytes actually wrote - %d \n", (int) my_result ); } goto test_failed_exit; } @@ -609,8 +614,8 @@ int open_close_test( void * the_argp ) if ( my_fd != -1 ) close( my_fd ); if ( my_pathp != NULL ) { - remove( my_pathp ); - free( my_pathp ); + remove( my_pathp ); + vm_deallocate(mach_task_self(), (vm_address_t)my_pathp, PATH_MAX); } return( my_err ); } @@ -621,24 +626,26 @@ int open_close_test( void * the_argp ) */ int link_stat_unlink_test( void * the_argp ) { - int my_err; - int my_fd = -1; + int my_err; + int my_fd = -1; char * my_pathp = NULL; char * my_path2p = NULL; nlink_t my_link_count; ssize_t my_result; struct stat my_sb; + kern_return_t my_kr; - my_pathp = (char *) malloc( PATH_MAX ); - if ( my_pathp == NULL ) { - printf( "malloc failed with error %d - \"%s\" \n", errno, strerror( errno) ); - goto test_failed_exit; - } - my_path2p = (char *) malloc( PATH_MAX ); - if ( my_path2p == NULL ) { - printf( "malloc failed with error %d - \"%s\" \n", errno, strerror( errno) ); - goto test_failed_exit; - } + my_kr = vm_allocate((vm_map_t) mach_task_self(), (vm_address_t*)&my_pathp, PATH_MAX, VM_FLAGS_ANYWHERE); + if(my_kr != KERN_SUCCESS){ + printf( "vm_allocate failed with error %d - \"%s\" \n", errno, strerror( errno) ); + goto test_failed_exit; + } + + my_kr = vm_allocate((vm_map_t) mach_task_self(), (vm_address_t*)&my_path2p, PATH_MAX, VM_FLAGS_ANYWHERE); + if(my_kr != KERN_SUCCESS){ + printf( "vm_allocate failed with error %d - \"%s\" \n", errno, strerror( errno) ); + goto test_failed_exit; + } *my_pathp = 0x00; *my_path2p = 0x00; @@ -681,10 +688,10 @@ int link_stat_unlink_test( void * the_argp ) my_err = errno; if ( my_result != 3 ) { if ( sizeof( ssize_t ) > sizeof( int ) ) { - printf( "write failed. should have written 3 bytes actually wrote - %lld \n", my_result ); + printf( "write failed. should have written 3 bytes actually wrote - %ld \n", (long int) my_result ); } else { - printf( "write failed. should have written 3 bytes actually wrote - %ld \n", my_result ); + printf( "write failed. should have written 3 bytes actually wrote - %d \n", (int) my_result ); } goto test_failed_exit; } @@ -742,12 +749,12 @@ int link_stat_unlink_test( void * the_argp ) if ( my_fd != -1 ) close( my_fd ); if ( my_pathp != NULL ) { - remove( my_pathp ); - free( my_pathp ); + remove( my_pathp ); + vm_deallocate(mach_task_self(), (vm_address_t)my_pathp, PATH_MAX); } if ( my_path2p != NULL ) { remove( my_path2p ); - free( my_path2p ); + vm_deallocate(mach_task_self(), (vm_address_t)my_path2p, PATH_MAX); } return( my_err ); } @@ -758,20 +765,22 @@ int link_stat_unlink_test( void * the_argp ) */ int chdir_fchdir_test( void * the_argp ) { - int my_err; - int my_fd = -1; + int my_err; + int my_fd = -1; char * my_pathp = NULL; char * my_file_namep; struct stat my_sb; struct stat my_sb2; + kern_return_t my_kr; char *cwd = getwd(NULL); /* Save current working directory so we can restore later */ - my_pathp = (char *) malloc( PATH_MAX ); - if ( my_pathp == NULL ) { - printf( "malloc failed with error %d - \"%s\" \n", errno, strerror( errno) ); - goto test_failed_exit; - } + my_kr = vm_allocate((vm_map_t) mach_task_self(), (vm_address_t*)&my_pathp, PATH_MAX, VM_FLAGS_ANYWHERE); + if(my_kr != KERN_SUCCESS){ + printf( "vm_allocate failed with error %d - \"%s\" \n", errno, strerror( errno) ); + goto test_failed_exit; + } + *my_pathp = 0x00; strcat( my_pathp, &g_target_path[0] ); strcat( my_pathp, "/" ); @@ -870,8 +879,8 @@ int chdir_fchdir_test( void * the_argp ) if ( my_fd != -1 ) close( my_fd ); if ( my_pathp != NULL ) { - remove( my_pathp ); - free( my_pathp ); + remove( my_pathp ); + vm_deallocate(mach_task_self(), (vm_address_t)my_pathp, PATH_MAX); } if ( chdir(cwd) != 0) /* Changes back to original directory, don't screw up the env. */ my_err = -1; @@ -884,16 +893,17 @@ int chdir_fchdir_test( void * the_argp ) */ int access_chmod_fchmod_test( void * the_argp ) { - int my_err; - int my_fd = -1; - char * my_pathp = NULL; + int my_err; + int my_fd = -1; + char * my_pathp = NULL; struct stat my_sb; + kern_return_t my_kr; - my_pathp = (char *) malloc( PATH_MAX ); - if ( my_pathp == NULL ) { - printf( "malloc failed with error %d - \"%s\" \n", errno, strerror( errno) ); - goto test_failed_exit; - } + my_kr = vm_allocate((vm_map_t) mach_task_self(), (vm_address_t*)&my_pathp, PATH_MAX, VM_FLAGS_ANYWHERE); + if(my_kr != KERN_SUCCESS){ + printf( "vm_allocate failed with error %d - \"%s\" \n", errno, strerror( errno) ); + goto test_failed_exit; + } *my_pathp = 0x00; strcat( my_pathp, &g_target_path[0] ); @@ -994,7 +1004,7 @@ int access_chmod_fchmod_test( void * the_argp ) close( my_fd ); if ( my_pathp != NULL ) { remove( my_pathp ); - free( my_pathp ); + vm_deallocate(mach_task_self(), (vm_address_t)my_pathp, PATH_MAX); } return( my_err ); } @@ -1006,8 +1016,8 @@ int access_chmod_fchmod_test( void * the_argp ) int chown_fchown_lchown_lstat_symlink_test( void * the_argp ) { #if !TARGET_OS_EMBEDDED - int my_err, my_group_count, i; - int my_fd = -1; + int my_err, my_group_count, i; + int my_fd = -1; char * my_pathp = NULL; char * my_link_pathp = NULL; uid_t my_orig_uid; @@ -1016,12 +1026,13 @@ int chown_fchown_lchown_lstat_symlink_test( void * the_argp ) struct stat my_sb; gid_t my_groups[ NGROUPS_MAX ]; char my_buffer[ 64 ]; + kern_return_t my_kr; - my_pathp = (char *) malloc( PATH_MAX ); - if ( my_pathp == NULL ) { - printf( "malloc failed with error %d - \"%s\" \n", errno, strerror( errno) ); - goto test_failed_exit; - } + my_kr = vm_allocate((vm_map_t) mach_task_self(), (vm_address_t*)&my_pathp, PATH_MAX, VM_FLAGS_ANYWHERE); + if(my_kr != KERN_SUCCESS){ + printf( "vm_allocate failed with error %d - \"%s\" \n", errno, strerror( errno) ); + goto test_failed_exit; + } *my_pathp = 0x00; strcat( my_pathp, &g_target_path[0] ); @@ -1033,11 +1044,11 @@ int chown_fchown_lchown_lstat_symlink_test( void * the_argp ) goto test_failed_exit; } - my_link_pathp = (char *) malloc( PATH_MAX ); - if ( my_link_pathp == NULL ) { - printf( "malloc failed with error %d - \"%s\" \n", errno, strerror( errno) ); - goto test_failed_exit; - } + my_kr = vm_allocate((vm_map_t) mach_task_self(), (vm_address_t*)&my_link_pathp, PATH_MAX, VM_FLAGS_ANYWHERE); + if(my_kr != KERN_SUCCESS){ + printf( "vm_allocate failed with error %d - \"%s\" \n", errno, strerror( errno) ); + goto test_failed_exit; + } *my_link_pathp = 0x00; strcat( my_link_pathp, &g_target_path[0] ); @@ -1051,7 +1062,6 @@ int chown_fchown_lchown_lstat_symlink_test( void * the_argp ) /* set up by getting a list of groups */ my_group_count = getgroups( NGROUPS_MAX, &my_groups[0] ); - printf("my_group_count: %d\n", my_group_count); if ( my_group_count == -1 || my_group_count < 1 ) { printf( "getgroups call failed. got errno %d - %s. \n", errno, strerror( errno ) ); @@ -1068,8 +1078,6 @@ int chown_fchown_lchown_lstat_symlink_test( void * the_argp ) my_orig_gid = my_sb.st_gid; my_orig_uid = my_sb.st_uid; - printf( "st_gid: %d, st_uid: %d, my_group_count: %d\n" ); - for ( i = 0; i < my_group_count; i++ ) { if ( my_orig_gid != my_groups[ i ] ) { if ( my_new_gid1 == 0 ) { @@ -1183,12 +1191,12 @@ int chown_fchown_lchown_lstat_symlink_test( void * the_argp ) if ( my_fd != -1 ) close( my_fd ); if ( my_pathp != NULL ) { - remove( my_pathp ); - free( my_pathp ); + remove( my_pathp ); + vm_deallocate(mach_task_self(), (vm_address_t)my_pathp, PATH_MAX); } if ( my_link_pathp != NULL ) { unlink( my_link_pathp ); - free( my_link_pathp ); + vm_deallocate(mach_task_self(), (vm_address_t)my_link_pathp, PATH_MAX); } return( my_err ); #else @@ -1213,33 +1221,39 @@ typedef struct vol_attr_buf vol_attr_buf; int fs_stat_tests( void * the_argp ) { - int my_err, my_count, i; - int my_buffer_size, my_buffer64_size; - int my_fd = -1; - int is_ufs = 0; + int my_err, my_count, i; + int my_buffer_size, my_buffer64_size; + int my_fd = -1; + int is_ufs = 0; + long my_io_size; + fsid_t my_fsid; + struct attrlist my_attrlist; + vol_attr_buf my_attr_buf; void * my_bufferp = NULL; - void * my_buffer64p = NULL; struct statfs * my_statfsp; + kern_return_t my_kr; + +#if !TARGET_OS_EMBEDDED + void * my_buffer64p = NULL; struct statfs64 * my_statfs64p; - long my_io_size; - fsid_t my_fsid; - struct attrlist my_attrlist; - vol_attr_buf my_attr_buf; - my_buffer_size = (sizeof(struct statfs) * 10); my_buffer64_size = (sizeof(struct statfs64) * 10); - my_bufferp = malloc( my_buffer_size ); - if ( my_bufferp == NULL ) { - printf( "malloc failed with error %d - \"%s\" \n", errno, strerror( errno) ); - goto test_failed_exit; - } - my_buffer64p = malloc( my_buffer64_size ); - if ( my_buffer64p == NULL ) { - printf( "malloc failed with error %d - \"%s\" \n", errno, strerror( errno) ); - goto test_failed_exit; + my_kr = vm_allocate((vm_map_t) mach_task_self(),(vm_address_t*) &my_buffer64p, my_buffer64_size, VM_FLAGS_ANYWHERE); + if(my_kr != KERN_SUCCESS){ + printf( "vm_allocate failed with error %d - \"%s\" \n", errno, strerror( errno) ); + goto test_failed_exit; } +#endif + my_buffer_size = (sizeof(struct statfs) * 10); + + my_kr = vm_allocate((vm_map_t) mach_task_self(),(vm_address_t*) &my_bufferp, my_buffer_size, VM_FLAGS_ANYWHERE); + if(my_kr != KERN_SUCCESS){ + printf( "vm_allocate failed with error %d - \"%s\" \n", errno, strerror( errno) ); + goto test_failed_exit; + } + my_statfsp = (struct statfs *) my_bufferp; my_err = statfs( "/", my_statfsp ); if ( my_err == -1 ) { @@ -1272,6 +1286,7 @@ int fs_stat_tests( void * the_argp ) goto test_failed_exit; } +#if !TARGET_OS_EMBEDDED /* now try statfs64 */ my_statfs64p = (struct statfs64 *) my_buffer64p; my_err = statfs64( "/", my_statfs64p ); @@ -1306,6 +1321,7 @@ int fs_stat_tests( void * the_argp ) printf( "getfsstat64 call failed. could not find valid f_fstypename! \n" ); goto test_failed_exit; } +#endif /* set up to validate results via multiple sources. we use getattrlist to get volume * related attributes to verify against results from fstatfs and statfs - but only if @@ -1329,6 +1345,7 @@ int fs_stat_tests( void * the_argp ) goto test_failed_exit; } +#if !TARGET_OS_EMBEDDED /* testing fstatfs64 */ my_statfs64p = (struct statfs64 *) my_buffer64p; my_err = fstatfs64( my_fd, my_statfs64p ); @@ -1343,6 +1360,7 @@ int fs_stat_tests( void * the_argp ) printf( "fstatfs64 call failed. could not find valid f_fstypename! \n" ); goto test_failed_exit; } +#endif /* testing fstatfs */ my_statfsp = (struct statfs *) my_bufferp; @@ -1352,7 +1370,7 @@ int fs_stat_tests( void * the_argp ) goto test_failed_exit; } - /* validate resutls */ + /* validate results */ if ( !(memcmp( &my_statfsp->f_fstypename[0], "hfs", 3 ) == 0 || memcmp( &my_statfsp->f_fstypename[0], "ufs", 3 ) == 0) ) { printf( "fstatfs call failed. could not find valid f_fstypename! \n" ); @@ -1372,7 +1390,7 @@ int fs_stat_tests( void * the_argp ) goto test_failed_exit; } - /* validate resutls */ + /* validate results */ if ( my_io_size != my_statfsp->f_iosize || my_fsid.val[0] != my_statfsp->f_fsid.val[0] || my_fsid.val[1] != my_statfsp->f_fsid.val[1] ) { printf( "statfs call failed. wrong f_iosize or f_fsid! \n" ); @@ -1393,11 +1411,13 @@ int fs_stat_tests( void * the_argp ) if ( my_fd != -1 ) close( my_fd ); if ( my_bufferp != NULL ) { - free( my_bufferp ); + vm_deallocate(mach_task_self(), (vm_address_t)my_bufferp, my_buffer_size); } +#if !TARGET_OS_EMBEDDED if ( my_buffer64p != NULL ) { - free( my_buffer64p ); + vm_deallocate(mach_task_self(), (vm_address_t)my_buffer64p, my_buffer64_size); } +#endif return( my_err ); } @@ -1509,7 +1529,7 @@ int getpid_getppid_pipe_test( void * the_argp ) /* ************************************************************************************************************** - * Test getauid, gettid, getuid, geteuid, issetugid, setauid, seteuid, settid, settid_with_pid, setuid system calls. + * Test getauid, gettid, getuid, geteuid, issetugid, setaudit_addr, seteuid, settid, settid_with_pid, setuid system calls. * ************************************************************************************************************** */ int uid_tests( void * the_argp ) @@ -1523,18 +1543,23 @@ int uid_tests( void * the_argp ) goto test_passed_exit; } - /* test issetugid - should return 1 when not root and 0 when root */ - my_err = issetugid( ); - if ( getuid( ) == 0 ) { - if ( my_err == 1 ) { - printf( "issetugid should return false \n" ); - goto test_failed_exit; + /* test issetugid - should return 1 when not root and 0 when root + * Figuring out setugid will not work in single-user mode; skip + * this test in that case. + */ + if (!g_is_single_user) { + my_err = issetugid( ); + if ( getuid( ) == 0 ) { + if ( my_err == 1 ) { + printf( "issetugid should return false \n" ); + goto test_failed_exit; + } } - } - else { - if ( my_err == 0 ) { - printf( "issetugid should return true \n" ); - goto test_failed_exit; + else { + if ( my_err == 0 ) { + printf( "issetugid should return true \n" ); + goto test_failed_exit; + } } } @@ -1550,10 +1575,10 @@ int uid_tests( void * the_argp ) /* * child process */ - uid_t my_ruid, my_euid; - uid_t my_uid, my_temp_uid; - gid_t my_gid, my_temp_gid; - au_id_t my_au_id, my_temp_au_id; + uid_t my_ruid, my_euid; + uid_t my_uid, my_temp_uid; + gid_t my_gid, my_temp_gid; + auditinfo_addr_t my_aia; my_ruid = getuid( ); my_euid = geteuid( ); @@ -1561,7 +1586,7 @@ int uid_tests( void * the_argp ) exit( 0 ); } - /* Test getauid, gettid, setauid, settid, settid_with_pid */ + /* Test getauid, gettid, setaudit_addr, settid, settid_with_pid */ /* get our current uid and gid for comparison later */ my_uid = getuid( ); my_gid = getgid( ); @@ -1629,54 +1654,32 @@ int uid_tests( void * the_argp ) } /* - * test to make sure setauid doesn't cause audit info to get lost from + * test to make sure setaudit_addr doesn't cause audit info to get lost from * the credential. */ - my_err = getauid( &my_au_id ); - if (my_err != 0) { - printf( "getauid - failed with error %d - \"%s\" \n", errno, strerror( errno) ); - exit( -1 ); - } - //printf("current au_id is %d \n", my_au_id); - - my_temp_au_id = 442344; - my_err = setauid( &my_temp_au_id ); + bzero( &my_aia, sizeof(my_aia) ); + my_aia.ai_auid = 442344; + my_aia.ai_asid = AU_ASSIGN_ASID; + my_aia.ai_termid.at_type = AU_IPv4; + my_err = setaudit_addr( &my_aia, sizeof(my_aia) ); if (my_err != 0) { - printf( "setauid - failed with error %d - \"%s\" \n", errno, strerror( errno) ); + printf( "setaudit_addr - failed with error %d - \"%s\" \n", errno, strerror( errno) ); exit( -1 ); } - my_temp_au_id = 0; - my_err = getauid( &my_temp_au_id ); + my_aia.ai_auid = 0; + my_err = getaudit_addr( &my_aia, sizeof(my_aia) ); if (my_err != 0) { - printf( "getauid - failed with error %d - \"%s\" \n", errno, strerror( errno) ); + printf( "getaudit_addr - failed with error %d - \"%s\" \n", errno, strerror( errno) ); exit( -1 ); } - //printf("new au_id is %d \n", my_temp_au_id); + //printf("new audit ID is %d \n", my_aia.ai_auid); - if (my_temp_au_id != 442344) { - printf("test failed - wrong au_id was set - %d \n", my_temp_au_id); + if (my_aia.ai_auid != 442344) { + printf("test failed - wrong audit ID was set - %d \n", my_aia.ai_auid); exit( -1 ); } - my_err = setauid( &my_au_id ); - if (my_err != 0) { - printf( "setauid - failed with error %d - \"%s\" \n", errno, strerror( errno) ); - exit( -1 ); - } - - my_temp_au_id = 0; - my_err = getauid( &my_temp_au_id ); - if (my_err != 0) { - printf( "getauid - failed with error %d - \"%s\" \n", errno, strerror( errno) ); - exit( -1 ); - } - - if (my_temp_au_id != my_au_id) { - printf("test failed - wrong au_id was set - %d \n", my_temp_au_id); - exit( -1 ); - } - /* change real uid and effective uid to current euid */ my_err = setuid( my_euid ); if ( my_err == -1 ) { @@ -1751,7 +1754,8 @@ int uid_tests( void * the_argp ) int mknod_sync_test( void * the_argp ) { int my_err; - char * my_pathp = NULL; + char * my_pathp = NULL; + kern_return_t my_kr; if ( g_skip_setuid_tests != 0 ) { printf("\t skipping this test \n"); @@ -1759,11 +1763,11 @@ int mknod_sync_test( void * the_argp ) goto test_passed_exit; } - my_pathp = (char *) malloc( PATH_MAX ); - if ( my_pathp == NULL ) { - printf( "malloc failed with error %d - \"%s\" \n", errno, strerror( errno) ); - goto test_failed_exit; - } + my_kr = vm_allocate((vm_map_t) mach_task_self(), (vm_address_t*)&my_pathp, PATH_MAX, VM_FLAGS_ANYWHERE); + if(my_kr != KERN_SUCCESS){ + printf( "vm_allocate failed with error %d - \"%s\" \n", errno, strerror( errno) ); + goto test_failed_exit; + } *my_pathp = 0x00; strcat( my_pathp, "/dev/" ); @@ -1791,8 +1795,8 @@ int mknod_sync_test( void * the_argp ) test_passed_exit: if ( my_pathp != NULL ) { - remove( my_pathp ); - free( my_pathp ); + remove( my_pathp ); + vm_deallocate(mach_task_self(), (vm_address_t)my_pathp, PATH_MAX); } return( my_err ); } @@ -1808,12 +1812,13 @@ int chflags_fchflags_test( void * the_argp ) u_int my_flags; char * my_pathp = NULL; struct stat my_sb; + kern_return_t my_kr; - my_pathp = (char *) malloc( PATH_MAX ); - if ( my_pathp == NULL ) { - printf( "malloc failed with error %d - \"%s\" \n", errno, strerror( errno) ); - goto test_failed_exit; - } + my_kr = vm_allocate((vm_map_t) mach_task_self(), (vm_address_t*)&my_pathp, PATH_MAX, VM_FLAGS_ANYWHERE); + if(my_kr != KERN_SUCCESS){ + printf( "vm_allocate failed with error %d - \"%s\" \n", errno, strerror( errno) ); + goto test_failed_exit; + } *my_pathp = 0x00; strcat( my_pathp, &g_target_path[0] ); @@ -1887,8 +1892,8 @@ int chflags_fchflags_test( void * the_argp ) if ( my_fd != -1 ) close( my_fd ); if ( my_pathp != NULL ) { - remove( my_pathp ); - free( my_pathp ); + remove( my_pathp ); + vm_deallocate(mach_task_self(), (vm_address_t)my_pathp, PATH_MAX); } return( my_err ); } @@ -1937,7 +1942,13 @@ int execve_kill_vfork_test( void * the_argp ) } if (get_architecture() == INTEL) { - if (bits == 64 && sizeof(long) == 8) { + int ppc_fail_flag = 0; + struct stat sb; + + if (stat("/usr/libexec/oah/translate", &sb)) + ppc_fail_flag = 1; + + if (bits == 64 && sizeof(long) == 8) { /* * Running on x86_64 hardware and running in 64-bit mode. * Check cases 1, 2, 3 and fork a child to check 4, 5, 6. @@ -1959,17 +1970,19 @@ int execve_kill_vfork_test( void * the_argp ) argvs[0] = "launch-i386"; if (do_execve_test("helpers/launch-i386", argvs, NULL, 1) != 0) goto test_failed_exit; - /* Test posix_spawn for ppc64 (should fail), i386, x86_64, and ppc (should succeed) */ + /* Test posix_spawn for i386, x86_64, and ppc (should succeed) */ errmsg = NULL; - if (do_spawn_test(CPU_TYPE_POWERPC64, 1)) - goto test_failed_exit; if (do_spawn_test(CPU_TYPE_I386, 0)) goto test_failed_exit; if (do_spawn_test(CPU_TYPE_X86_64, 0)) goto test_failed_exit; - if (do_spawn_test(CPU_TYPE_POWERPC, 0)) - goto test_failed_exit; - + /* + * Note: rosetta is no go in single-user mode + */ + if (!g_is_single_user) { + if (do_spawn_test(CPU_TYPE_POWERPC, ppc_fail_flag)) + goto test_failed_exit; + } } else if (bits == 64 && sizeof(long) == 4) { /* @@ -1993,16 +2006,19 @@ int execve_kill_vfork_test( void * the_argp ) argvs[0] = "launch-x86_64"; if (do_execve_test("helpers/launch-x86_64", argvs, NULL, 1) != 0) goto test_failed_exit; - /* Test posix_spawn for ppc64 (should fail), i386, x86_64, and ppc (should succeed) */ + /* Test posix_spawn for i386, x86_64, and ppc (should succeed) */ errmsg = NULL; - if (do_spawn_test(CPU_TYPE_POWERPC64, 1)) - goto test_failed_exit; if (do_spawn_test(CPU_TYPE_I386, 0)) goto test_failed_exit; if (do_spawn_test(CPU_TYPE_X86_64, 0)) goto test_failed_exit; - if (do_spawn_test(CPU_TYPE_POWERPC, 0)) - goto test_failed_exit; + /* + * Note: rosetta is no go in single-user mode + */ + if (!g_is_single_user) { + if (do_spawn_test(CPU_TYPE_POWERPC, ppc_fail_flag)) + goto test_failed_exit; + } } else if (bits == 32) { /* Running on i386 hardware. Check cases 4. */ @@ -2016,70 +2032,36 @@ int execve_kill_vfork_test( void * the_argp ) goto test_failed_exit; if (do_spawn_test(CPU_TYPE_I386, 0)) goto test_failed_exit; - if (do_spawn_test(CPU_TYPE_POWERPC, 0)) - goto test_failed_exit; + /* + * Note: rosetta is no go in single-user mode + */ + if (!g_is_single_user) { + if (do_spawn_test(CPU_TYPE_POWERPC, ppc_fail_flag)) + goto test_failed_exit; + } } } else if (get_architecture() == POWERPC) { if (bits == 64 && sizeof(long) == 8) { /* * Running on PPC64 hardware and running in 64-bit mode. - * Check cases 1, 2, 3 and fork a child to check 4, 5, 6. + * No longer supported on SnowLeopard. */ - errmsg = "execve failed: from ppc64 forking and exec()ing 64-bit ppc process w/ 4G pagezero.\n"; - argvs[0] = "sleep-ppc64-4G"; - if (do_execve_test("helpers/sleep-ppc64-4G", argvs, NULL, 1)) goto test_failed_exit; - - errmsg = "execve failed: from ppc64 forking and exec()ing 64-bit ppc process w/ 4K pagezero.\n"; - argvs[0] = "sleep-ppc64-4K"; - if (do_execve_test("helpers/sleep-ppc64-4K", argvs, NULL, 1)) goto test_failed_exit; - - errmsg = "execve failed: from ppc64 forking and exec()ing 32 bit ppc process.\n"; - argvs[0] = "sleep-ppc32"; - if (do_execve_test("helpers/sleep-ppc32", argvs, NULL, 1)) goto test_failed_exit; - - /* Fork off a helper process and load a 32-bit program in it to test 32->64 bit exec(). */ - errmsg = "execve failed to exec the helper process.\n"; - argvs[0] = "launch-ppc"; - if (do_execve_test("helpers/launch-ppc", argvs, NULL, 1) != 0) goto test_failed_exit; - - /* Test posix_spawn for i386 (should fail), ppc64, and ppc (should succeed) */ - errmsg = NULL; - if (do_spawn_test(CPU_TYPE_I386, 1)) - goto test_failed_exit; - if (do_spawn_test(CPU_TYPE_POWERPC64, 0)) - goto test_failed_exit; - if (do_spawn_test(CPU_TYPE_POWERPC, 0)) - goto test_failed_exit; - + errmsg = "runnning ppc64 on snowleopard"; + goto test_failed_exit; } else if (bits == 64 && sizeof(long) == 4) { /* - * Running on PPC64 hardware, but actually running in 32-bit mode. + * Running as PPC on PPC64 hardware or under Rosetta on x86_64 hardware. * Check cases 4, 5, 6 and fork a child to check 1, 2, 3. */ errmsg = "execve failed: from ppc forking and exec()ing ppc process.\n"; argvs[0] = "sleep-ppc32"; if (do_execve_test("helpers/sleep-ppc32", argvs, NULL, 0)) goto test_failed_exit; - errmsg = "execve failed: from ppc forking and exec()ing ppc64 process w/ 4G pagezero.\n"; - argvs[0] = "sleep-ppc64-4G"; - if (do_execve_test("helpers/sleep-ppc64-4G", argvs, NULL, 0)) goto test_failed_exit; - - errmsg = "execve failed: from ppc forking and exec()ing ppc64 process w/ 4K pagezero.\n"; - argvs[0] = "sleep-ppc64-4K"; - if (do_execve_test("helpers/sleep-ppc64-4K", argvs, NULL, 0)) goto test_failed_exit; - - /* Fork off a helper process and load a 64-bit program in it to test 64->32 bit exec(). */ - errmsg = "execve failed to exec the helper process.\n"; - argvs[0] = "launch-ppc"; - if (do_execve_test("helpers/launch-ppc64", argvs, NULL, 1) != 0) goto test_failed_exit; - - /* Test posix_spawn for i386 (should fail), ppc64, and ppc (should succeed) */ + /* Test posix_spawn for i386 and ppc */ errmsg = NULL; - if (do_spawn_test(CPU_TYPE_I386, 1)) - goto test_failed_exit; - if (do_spawn_test(CPU_TYPE_POWERPC64, 0)) + if (do_spawn_test(CPU_TYPE_I386, (g_is_under_rosetta ? 0 : 1))) goto test_failed_exit; if (do_spawn_test(CPU_TYPE_POWERPC, 0)) goto test_failed_exit; @@ -2154,7 +2136,6 @@ int groups_test( void * the_argp ) /* start by getting list of groups the current user belongs to */ my_orig_group_count = getgroups( NGROUPS_MAX, &my_groups[0] ); - printf("my_orig_group_count: %d\n", my_orig_group_count); if ( my_orig_group_count == -1 || my_orig_group_count < 1 ) { printf( "getgroups call failed. got errno %d - %s. \n", errno, strerror( errno ) ); @@ -2188,7 +2169,6 @@ int groups_test( void * the_argp ) } my_group_count = getgroups( NGROUPS_MAX, &my_groups[0] ); - printf("my_group_count: %d\n", my_group_count); if ( my_group_count == -1 || my_group_count < 1 ) { printf( "getgroups call failed. got errno %d - %s. \n", errno, strerror( errno ) ); @@ -2286,12 +2266,13 @@ int dup_test( void * the_argp ) char * my_pathp = NULL; ssize_t my_count; char my_buffer[64]; + kern_return_t my_kr; - my_pathp = (char *) malloc( PATH_MAX ); - if ( my_pathp == NULL ) { - printf( "malloc failed with error %d - \"%s\" \n", errno, strerror( errno) ); - goto test_failed_exit; - } + my_kr = vm_allocate((vm_map_t) mach_task_self(), (vm_address_t*)&my_pathp, PATH_MAX, VM_FLAGS_ANYWHERE); + if(my_kr != KERN_SUCCESS){ + printf( "vm_allocate failed with error %d - \"%s\" \n", errno, strerror( errno) ); + goto test_failed_exit; + } *my_pathp = 0x00; strcat( my_pathp, &g_target_path[0] ); @@ -2394,38 +2375,22 @@ int dup_test( void * the_argp ) if ( my_newfd != -1 ) close( my_newfd ); if ( my_pathp != NULL ) { - remove( my_pathp ); - free( my_pathp ); + remove( my_pathp ); + vm_deallocate(mach_task_self(), (vm_address_t)my_pathp, PATH_MAX); } return( my_err ); } /* ************************************************************************************************************** - * Test profil, getrusage system calls. - * todo - how do we really test profil is working? + * Test getrusage system call. * ************************************************************************************************************** */ -int getrusage_profil_test( void * the_argp ) +int getrusage_test( void * the_argp ) { int my_err; - char * my_bufferp = NULL; struct rusage my_rusage; - my_bufferp = (char *) malloc( (1024 * 1000) ); - if ( my_bufferp == NULL ) { - printf( "malloc failed with error %d - \"%s\" \n", errno, strerror( errno) ); - goto test_failed_exit; - } - bzero( (void *)my_bufferp, (1024 * 1000) ); - - /* turn on profiling */ - my_err = profil( my_bufferp, (1024 * 1000), 0, 32768 ); - if ( my_err == -1 ) { - printf( "profil failed with error %d - \"%s\" \n", errno, strerror( errno) ); - goto test_failed_exit; - } - my_err = getrusage( RUSAGE_SELF, &my_rusage ); if ( my_err == -1 ) { printf( "getrusage failed with error %d - \"%s\" \n", errno, strerror( errno) ); @@ -2442,13 +2407,6 @@ int getrusage_profil_test( void * the_argp ) goto test_failed_exit; } - /* turn off profiling (scale value of 0 turns off profiling) */ - my_err = profil( my_bufferp, (1024 * 1000), 0, 0 ); - if ( my_err == -1 ) { - printf( "profil failed with error %d - \"%s\" \n", errno, strerror( errno) ); - goto test_failed_exit; - } - my_err = 0; goto test_passed_exit; @@ -2456,9 +2414,6 @@ int getrusage_profil_test( void * the_argp ) my_err = -1; test_passed_exit: - if ( my_bufferp != NULL ) { - free( my_bufferp ); - } return( my_err ); } @@ -2493,12 +2448,13 @@ int signals_test( void * the_argp ) int my_fd = -1; char * my_pathp = NULL; pid_t my_pid, my_wait_pid; + kern_return_t my_kr; - my_pathp = (char *) malloc( PATH_MAX ); - if ( my_pathp == NULL ) { - printf( "malloc failed with error %d - \"%s\" \n", errno, strerror( errno) ); - goto test_failed_exit; - } + my_kr = vm_allocate((vm_map_t) mach_task_self(), (vm_address_t*)&my_pathp, PATH_MAX, VM_FLAGS_ANYWHERE); + if(my_kr != KERN_SUCCESS){ + printf( "vm_allocate failed with error %d - \"%s\" \n", errno, strerror( errno) ); + goto test_failed_exit; + } *my_pathp = 0x00; strcat( my_pathp, &g_target_path[0] ); @@ -2729,8 +2685,8 @@ int signals_test( void * the_argp ) if ( my_fd != -1 ) close( my_fd ); if ( my_pathp != NULL ) { - remove( my_pathp ); - free( my_pathp ); + remove( my_pathp ); + vm_deallocate(mach_task_self(), (vm_address_t)my_pathp, PATH_MAX); } return( my_err ); } @@ -2743,6 +2699,7 @@ int getlogin_setlogin_test( void * the_argp ) { int my_err, my_status; pid_t my_pid, my_wait_pid; + kern_return_t my_kr; if ( g_skip_setuid_tests != 0 ) { printf("\t skipping this test \n"); @@ -2762,32 +2719,36 @@ int getlogin_setlogin_test( void * the_argp ) /* * child process - do getlogin and setlogin testing. */ - char * my_namep; - int my_len; + char * my_namep = NULL; + int my_len; char * my_new_namep = NULL; - + my_namep = getlogin( ); if ( my_namep == NULL ) { printf( "getlogin returned NULL name pointer \n" ); my_err = -1; goto exit_child; } + my_len = strlen( my_namep ) + 4; - my_new_namep = (char *) malloc( my_len ); - if ( my_new_namep == NULL ) { - printf( "malloc failed with error %d - \"%s\" \n", errno, strerror( errno) ); - my_err = -1; + my_kr = vm_allocate((vm_map_t) mach_task_self(), (vm_address_t*)&my_new_namep, my_len, VM_FLAGS_ANYWHERE); + if(my_kr != KERN_SUCCESS){ + printf( "vm_allocate failed with error %d - \"%s\" \n", errno, strerror( errno) ); + my_err = -1; goto exit_child; - } + } + bzero( (void *)my_new_namep, my_len ); + strcat( my_new_namep, my_namep ); strcat( my_new_namep, "2" ); + /* set new name */ my_err = setlogin( my_new_namep ); if ( my_err == -1 ) { - printf( "setlogin failed with error %d - \"%s\" \n", errno, strerror( errno) ); + printf( "When setting new login name, setlogin failed with error %d - \"%s\" \n", errno, strerror( errno) ); my_err = -1; goto exit_child; } @@ -2799,16 +2760,29 @@ int getlogin_setlogin_test( void * the_argp ) my_err = -1; goto exit_child; } + if ( memcmp( my_namep, my_new_namep, strlen( my_new_namep ) ) != 0 ) { printf( "setlogin failed to set the new name \n" ); my_err = -1; goto exit_child; } + + /* reset to original name */ + my_len = strlen ( my_namep ); + my_namep[ my_len - 1 ] = '\0'; + + my_err = setlogin( my_namep ); + if ( my_err == -1 ) { + printf( "When resetting login name, setlogin failed with error %d - \"%s\" \n", errno, strerror( errno) ); + my_err = -1; + goto exit_child; + } + my_err = 0; exit_child: if ( my_new_namep != NULL ) { - free( my_new_namep ); + vm_deallocate(mach_task_self(), (vm_address_t)my_new_namep, my_len); } exit( my_err ); } @@ -2841,13 +2815,14 @@ int getlogin_setlogin_test( void * the_argp ) */ int acct_test( void * the_argp ) { - int my_err, my_status; - int my_fd = -1; - char * my_pathp = NULL; + int my_err, my_status; + int my_fd = -1; + char * my_pathp = NULL; struct acct * my_acctp; - pid_t my_pid, my_wait_pid; - ssize_t my_count; - char my_buffer[ (sizeof(struct acct) + 32) ]; + pid_t my_pid, my_wait_pid; + ssize_t my_count; + char my_buffer[ (sizeof(struct acct) + 32) ]; + kern_return_t my_kr; if ( g_skip_setuid_tests != 0 ) { printf("\t skipping this test \n"); @@ -2855,11 +2830,12 @@ int acct_test( void * the_argp ) goto test_passed_exit; } - my_pathp = (char *) malloc( PATH_MAX ); - if ( my_pathp == NULL ) { - printf( "malloc failed with error %d - \"%s\" \n", errno, strerror( errno) ); - goto test_failed_exit; - } + my_kr = vm_allocate((vm_map_t) mach_task_self(), (vm_address_t*)&my_pathp, PATH_MAX, VM_FLAGS_ANYWHERE); + if(my_kr != KERN_SUCCESS){ + printf( "vm_allocate failed with error %d - \"%s\" \n", errno, strerror( errno) ); + goto test_failed_exit; + } + *my_pathp = 0x00; strcat( my_pathp, &g_target_path[0] ); strcat( my_pathp, "/" ); @@ -2886,10 +2862,14 @@ int acct_test( void * the_argp ) goto test_failed_exit; } if ( my_pid == 0 ) { + char *argv[2]; /* supply valid argv array to execv() */ + argv[0] = "/usr/bin/true"; + argv[1] = 0; + /* * child process - do a little work then exit. */ - my_err = execv( "/usr/bin/true", (char **) 0 ); + my_err = execv( argv[0], argv); exit( 0 ); } @@ -2903,10 +2883,11 @@ int acct_test( void * the_argp ) } if ( WIFEXITED( my_status ) && WEXITSTATUS( my_status ) != 0 ) { + printf("unexpected child exit status for accounting test load: %d\n", WEXITSTATUS( my_status)); goto test_failed_exit; } - /* diable process accounting */ + /* disable process accounting */ my_err = acct( NULL ); if ( my_err == -1 ) { printf( "acct failed with error %d - \"%s\" \n", errno, strerror( errno) ); @@ -2929,6 +2910,7 @@ int acct_test( void * the_argp ) } my_acctp = (struct acct *) &my_buffer[0]; + /* first letters in ac_comm should match the name of the executable */ if ( getuid( ) != my_acctp->ac_uid || getgid( ) != my_acctp->ac_gid || my_acctp->ac_comm[0] != 't' || my_acctp->ac_comm[1] != 'r' ) { @@ -2939,7 +2921,14 @@ int acct_test( void * the_argp ) getgid( ) != OSSwapInt32(my_acctp->ac_gid) || my_acctp->ac_comm[0] != 't' || my_acctp->ac_comm[1] != 'r' ) { - printf( "accounting data does not look correct under Rosetta.\n" ); + printf( "accounting data does not look correct under Rosetta:\n" ); + printf( "------------------------\n" ); + printf( "my_acctp->ac_uid = %lu (should be: %lu)\n", + (unsigned long) OSSwapInt32( my_acctp->ac_uid ), (unsigned long) getuid() ); + printf( "my_acctp->ac_gid = %lu (should be: %lu)\n", + (unsigned long) OSSwapInt32( my_acctp->ac_gid ), (unsigned long) getgid() ); + + print_acct_debug_strings(my_acctp->ac_comm); } else { // is cool under Rosetta @@ -2948,8 +2937,14 @@ int acct_test( void * the_argp ) } } else { - printf( "accounting data does not look correct \n" ); + printf( "accounting data does not look correct:\n" ); + printf( "------------------------\n" ); + printf( "my_acctp->ac_uid = %lu (should be: %lu)\n", (unsigned long) my_acctp->ac_uid, (unsigned long) getuid() ); + printf( "my_acctp->ac_gid = %lu (should be: %lu)\n", (unsigned long) my_acctp->ac_gid, (unsigned long) getgid() ); + + print_acct_debug_strings(my_acctp->ac_comm); } + goto test_failed_exit; } my_err = 0; @@ -2962,12 +2957,33 @@ int acct_test( void * the_argp ) if ( my_fd != -1 ) close( my_fd ); if ( my_pathp != NULL ) { - remove( my_pathp ); - free( my_pathp ); + remove( my_pathp ); + vm_deallocate(mach_task_self(), (vm_address_t)my_pathp, PATH_MAX); } return( my_err ); } +void print_acct_debug_strings( char * my_ac_comm ) +{ + char my_cmd_str[11]; /* sizeof(acct_cmd) + 1 for '\0' if acct_cmd is bogus */ + char my_hex_str[128]; + int i; + + my_hex_str[0] = '\0'; + for(i = 0; i < 10; i++) + { + sprintf( my_hex_str, "%s \'0x%x\' ", my_hex_str, my_ac_comm[i]); + } + + memccpy(my_cmd_str, my_ac_comm, '\0', 10); + my_cmd_str[10] = '\0'; /* In case ac_comm was bogus */ + + + printf( "my_acctp->ac_comm = \"%s\" (should begin with: \"tr\")\n", my_cmd_str); + printf( "my_acctp->ac_comm = \"%s\"\n", my_hex_str); + printf( "------------------------\n" ); +} + /* ************************************************************************************************************** * Test ioctl system calls. @@ -3050,12 +3066,14 @@ int mkdir_rmdir_umask_test( void * the_argp ) char * my_pathp = NULL; mode_t my_orig_mask; struct stat my_sb; + kern_return_t my_kr; + + my_kr = vm_allocate((vm_map_t) mach_task_self(), (vm_address_t*)&my_pathp, PATH_MAX, VM_FLAGS_ANYWHERE); + if(my_kr != KERN_SUCCESS){ + printf( "vm_allocate failed with error %d - \"%s\" \n", errno, strerror( errno) ); + goto test_failed_exit; + } - my_pathp = (char *) malloc( PATH_MAX ); - if ( my_pathp == NULL ) { - printf( "malloc failed with error %d - \"%s\" \n", errno, strerror( errno) ); - goto test_failed_exit; - } *my_pathp = 0x00; strcat( my_pathp, &g_target_path[0] ); strcat( my_pathp, "/" ); @@ -3105,8 +3123,8 @@ int mkdir_rmdir_umask_test( void * the_argp ) if ( my_fd != -1 ) close( my_fd ); if ( my_pathp != NULL ) { - rmdir( my_pathp ); - free( my_pathp ); + rmdir( my_pathp ); + vm_deallocate(mach_task_self(), (vm_address_t)my_pathp, PATH_MAX); } if ( did_umask != 0 ) { umask( my_orig_mask ); @@ -3124,6 +3142,7 @@ int chroot_test( void * the_argp ) int my_err, my_status; pid_t my_pid, my_wait_pid; char * my_pathp = NULL; + kern_return_t my_kr; if ( g_skip_setuid_tests != 0 ) { printf("\t skipping this test \n"); @@ -3131,11 +3150,12 @@ int chroot_test( void * the_argp ) goto test_passed_exit; } - my_pathp = (char *) malloc( PATH_MAX ); - if ( my_pathp == NULL ) { - printf( "malloc failed with error %d - \"%s\" \n", errno, strerror( errno) ); - goto test_failed_exit; - } + my_kr = vm_allocate((vm_map_t) mach_task_self(), (vm_address_t*)&my_pathp, PATH_MAX, VM_FLAGS_ANYWHERE); + if(my_kr != KERN_SUCCESS){ + printf( "vm_allocate failed with error %d - \"%s\" \n", errno, strerror( errno) ); + goto test_failed_exit; + } + *my_pathp = 0x00; strcat( my_pathp, &g_target_path[0] ); strcat( my_pathp, "/" ); @@ -3213,7 +3233,7 @@ int chroot_test( void * the_argp ) if ( my_err != 0 ) { printf( "rmdir failed with error %d - \"%s\" path %p\n", errno, strerror( errno), my_pathp ); } - free( my_pathp ); + vm_deallocate(mach_task_self(), (vm_address_t)my_pathp, PATH_MAX); } return( my_err ); } @@ -3301,12 +3321,14 @@ int fcntl_test( void * the_argp ) int my_err, my_result, my_tmep; int my_fd = -1; char * my_pathp = NULL; + kern_return_t my_kr; + + my_kr = vm_allocate((vm_map_t) mach_task_self(), (vm_address_t*)&my_pathp, PATH_MAX, VM_FLAGS_ANYWHERE); + if(my_kr != KERN_SUCCESS){ + printf( "vm_allocate failed with error %d - \"%s\" \n", errno, strerror( errno) ); + goto test_failed_exit; + } - my_pathp = (char *) malloc( PATH_MAX ); - if ( my_pathp == NULL ) { - printf( "malloc failed with error %d - \"%s\" \n", errno, strerror( errno) ); - goto test_failed_exit; - } *my_pathp = 0x00; strcat( my_pathp, &g_target_path[0] ); strcat( my_pathp, "/" ); @@ -3365,8 +3387,8 @@ int fcntl_test( void * the_argp ) if ( my_fd != -1 ) close( my_fd ); if ( my_pathp != NULL ) { - remove( my_pathp ); - free( my_pathp ); + remove( my_pathp ); + vm_deallocate(mach_task_self(), (vm_address_t)my_pathp, PATH_MAX); } return( my_err ); } @@ -3441,6 +3463,7 @@ int time_tests( void * the_argp ) struct timeval my_utimes[4]; struct timezone my_tz; struct stat my_sb; + kern_return_t my_kr; if ( g_skip_setuid_tests != 0 ) { printf( "\t skipping this test \n" ); @@ -3448,11 +3471,12 @@ int time_tests( void * the_argp ) goto test_passed_exit; } - my_pathp = (char *) malloc( PATH_MAX ); - if ( my_pathp == NULL ) { - printf( "malloc failed with error %d - \"%s\" \n", errno, strerror( errno) ); - goto test_failed_exit; - } + my_kr = vm_allocate((vm_map_t) mach_task_self(), (vm_address_t*)&my_pathp, PATH_MAX, VM_FLAGS_ANYWHERE); + if(my_kr != KERN_SUCCESS){ + printf( "vm_allocate failed with error %d - \"%s\" \n", errno, strerror( errno) ); + goto test_failed_exit; + } + *my_pathp = 0x00; strcat( my_pathp, &g_target_path[0] ); strcat( my_pathp, "/" ); @@ -3567,8 +3591,8 @@ int time_tests( void * the_argp ) if ( my_fd != -1 ) close( my_fd ); if ( my_pathp != NULL ) { - remove( my_pathp ); - free( my_pathp ); + remove( my_pathp ); + vm_deallocate(mach_task_self(), (vm_address_t)my_pathp, PATH_MAX); } return( my_err ); } @@ -3584,12 +3608,14 @@ int rename_test( void * the_argp ) char * my_new_pathp = NULL; ino_t my_file_id; struct stat my_sb; + kern_return_t my_kr; + + my_kr = vm_allocate((vm_map_t) mach_task_self(), (vm_address_t*)&my_pathp, PATH_MAX, VM_FLAGS_ANYWHERE); + if(my_kr != KERN_SUCCESS){ + printf( "vm_allocate failed with error %d - \"%s\" \n", errno, strerror( errno) ); + goto test_failed_exit; + } - my_pathp = (char *) malloc( PATH_MAX ); - if ( my_pathp == NULL ) { - printf( "malloc failed with error %d - \"%s\" \n", errno, strerror( errno) ); - goto test_failed_exit; - } *my_pathp = 0x00; strcat( my_pathp, &g_target_path[0] ); strcat( my_pathp, "/" ); @@ -3600,11 +3626,12 @@ int rename_test( void * the_argp ) goto test_failed_exit; } - my_new_pathp = (char *) malloc( PATH_MAX ); - if ( my_new_pathp == NULL ) { - printf( "malloc failed with error %d - \"%s\" \n", errno, strerror( errno) ); - goto test_failed_exit; - } + my_kr = vm_allocate((vm_map_t) mach_task_self(), (vm_address_t*)&my_new_pathp, PATH_MAX, VM_FLAGS_ANYWHERE); + if(my_kr != KERN_SUCCESS){ + printf( "vm_allocate failed with error %d - \"%s\" \n", errno, strerror( errno) ); + goto test_failed_exit; + } + *my_new_pathp = 0x00; strcat( my_new_pathp, &g_target_path[0] ); strcat( my_new_pathp, "/" ); @@ -3656,12 +3683,12 @@ int rename_test( void * the_argp ) test_passed_exit: if ( my_pathp != NULL ) { - remove( my_pathp ); - free( my_pathp ); + remove( my_pathp ); + vm_deallocate(mach_task_self(), (vm_address_t)my_pathp, PATH_MAX); } if ( my_new_pathp != NULL ) { - remove( my_new_pathp ); - free( my_new_pathp ); + remove( my_new_pathp ); + vm_deallocate(mach_task_self(), (vm_address_t)my_new_pathp, PATH_MAX); } return( my_err ); } @@ -3676,12 +3703,14 @@ int locking_test( void * the_argp ) pid_t my_pid, my_wait_pid; int my_fd = -1; char * my_pathp = NULL; + kern_return_t my_kr; + + my_kr = vm_allocate((vm_map_t) mach_task_self(), (vm_address_t*)&my_pathp, PATH_MAX, VM_FLAGS_ANYWHERE); + if(my_kr != KERN_SUCCESS){ + printf( "vm_allocate failed with error %d - \"%s\" \n", errno, strerror( errno) ); + goto test_failed_exit; + } - my_pathp = (char *) malloc( PATH_MAX ); - if ( my_pathp == NULL ) { - printf( "malloc failed with error %d - \"%s\" \n", errno, strerror( errno) ); - goto test_failed_exit; - } *my_pathp = 0x00; strcat( my_pathp, &g_target_path[0] ); strcat( my_pathp, "/" ); @@ -3776,8 +3805,8 @@ int locking_test( void * the_argp ) if ( my_fd != -1 ) close( my_fd ); if ( my_pathp != NULL ) { - remove( my_pathp ); - free( my_pathp ); + remove( my_pathp ); + vm_deallocate(mach_task_self(), (vm_address_t)my_pathp, PATH_MAX); } return( my_err ); } @@ -3794,12 +3823,14 @@ int mkfifo_test( void * the_argp ) char * my_pathp = NULL; ssize_t my_result; off_t my_current_offset; + kern_return_t my_kr; + + my_kr = vm_allocate((vm_map_t) mach_task_self(), (vm_address_t*)&my_pathp, PATH_MAX, VM_FLAGS_ANYWHERE); + if(my_kr != KERN_SUCCESS){ + printf( "vm_allocate failed with error %d - \"%s\" \n", errno, strerror( errno) ); + goto test_failed_exit; + } - my_pathp = (char *) malloc( PATH_MAX ); - if ( my_pathp == NULL ) { - printf( "malloc failed with error %d - \"%s\" \n", errno, strerror( errno) ); - goto test_failed_exit; - } *my_pathp = 0x00; strcat( my_pathp, &g_target_path[0] ); strcat( my_pathp, "/" ); @@ -3902,8 +3933,8 @@ int mkfifo_test( void * the_argp ) if ( my_fd != -1 ) close( my_fd ); if ( my_pathp != NULL ) { - remove( my_pathp ); - free( my_pathp ); + remove( my_pathp ); + vm_deallocate(mach_task_self(), (vm_address_t)my_pathp, PATH_MAX); } return( my_err ); } @@ -3998,6 +4029,7 @@ int limit_tests( void * the_argp ) printf( "soft limits - current %lld should be %lld \n", my_rlimit.rlim_cur, my_current_rlimit.rlim_cur ); goto test_failed_exit; } + #if CONFORMANCE_CHANGES_IN_XNU // can't do this check until conformance changes get into xnu printf( "hard limits - current %lld should be %lld \n", my_rlimit.rlim_max, my_current_rlimit.rlim_max ); if ( my_rlimit.rlim_max != my_current_rlimit.rlim_max ) { @@ -4005,6 +4037,30 @@ int limit_tests( void * the_argp ) goto test_failed_exit; } #endif + + /* + * A test for a limit that won't fit in a signed 32 bits, a la 5414697 + * Note: my_rlimit should still have a valid rlim_max. + */ + long long biglim = 2147483649ll; /* Just over 2^31 */ + my_rlimit.rlim_cur = biglim; + my_err = setrlimit(RLIMIT_CPU, &my_rlimit); + if (my_err == -1) { + printf("failed to set large limit.\n"); + goto test_failed_exit; + } + + bzero(&my_rlimit, sizeof(struct rlimit)); + my_err = getrlimit(RLIMIT_CPU, &my_rlimit); + if (my_err == -1) { + printf("after setting large value, failed to getrlimit().\n"); + goto test_failed_exit; + } + + if (my_rlimit.rlim_cur != biglim) { + printf("didn't retrieve large limit.\n"); + goto test_failed_exit; + } } my_err = 0; @@ -4018,7 +4074,7 @@ int limit_tests( void * the_argp ) } /* ************************************************************************************************************** - * Test getattrlist, getdirentries, getdirentriesattr, setattrlist system calls. + * Test getattrlist, getdirentriesattr, setattrlist system calls. * ************************************************************************************************************** */ struct test_attr_buf { @@ -4052,6 +4108,7 @@ int directory_tests( void * the_argp ) struct attrlist my_attrlist; test_attr_buf my_attr_buf[4]; struct statfs my_statfs_buf; + kern_return_t my_kr; /* need to know type of file system */ my_err = statfs( &g_target_path[0], &my_statfs_buf ); @@ -4063,17 +4120,18 @@ int directory_tests( void * the_argp ) is_ufs = 1; } - my_bufp = (char *) malloc( (1024 * 5) ); - if ( my_bufp == NULL ) { - printf( "malloc failed with error %d - \"%s\" \n", errno, strerror( errno) ); - goto test_failed_exit; - } - - my_pathp = (char *) malloc( PATH_MAX ); - if ( my_pathp == NULL ) { - printf( "malloc failed with error %d - \"%s\" \n", errno, strerror( errno) ); - goto test_failed_exit; - } + my_kr = vm_allocate((vm_map_t) mach_task_self(), (vm_address_t*)&my_bufp, (1024 * 5), VM_FLAGS_ANYWHERE); + if(my_kr != KERN_SUCCESS){ + printf( "vm_allocate failed with error %d - \"%s\" \n", errno, strerror( errno) ); + goto test_failed_exit; + } + + my_kr = vm_allocate((vm_map_t) mach_task_self(), (vm_address_t*)&my_pathp, PATH_MAX, VM_FLAGS_ANYWHERE); + if(my_kr != KERN_SUCCESS){ + printf( "vm_allocate failed with error %d - \"%s\" \n", errno, strerror( errno) ); + goto test_failed_exit; + } + *my_pathp = 0x00; strcat( my_pathp, &g_target_path[0] ); strcat( my_pathp, "/" ); @@ -4094,37 +4152,6 @@ int directory_tests( void * the_argp ) printf( "open failed with error %d - \"%s\" \n", errno, strerror( errno) ); goto test_failed_exit; } - - done = found_it = 0; - while ( done == 0 ) { - int my_result, i; - struct dirent * my_dirent_p; - - my_result = getdirentries( my_fd, my_bufp, (1024 * 5), &my_base ); - if ( my_result <= 0 ) - break; - for ( i = 0; i < my_result; ) { - my_dirent_p = (struct dirent *) (my_bufp + i); -#if DEBUG - printf( "d_ino %d d_reclen %d d_type %d d_namlen %d \"%s\" \n", - my_dirent_p->d_ino, my_dirent_p->d_reclen, my_dirent_p->d_type, - my_dirent_p->d_namlen, &my_dirent_p->d_name[0] ); -#endif - - i += my_dirent_p->d_reclen; - /* validate results by looking for our test file */ - if ( my_dirent_p->d_type == DT_REG && my_dirent_p->d_ino != 0 && - strlen( my_file_namep ) == my_dirent_p->d_namlen && - memcmp( &my_dirent_p->d_name[0], my_file_namep, my_dirent_p->d_namlen ) == 0 ) { - done = found_it = 1; - break; - } - } - } - if ( found_it == 0 ) { - printf( "getdirentries failed to find test file. \n" ); - goto test_failed_exit; - } /* test get/setattrlist */ memset( &my_attrlist, 0, sizeof(my_attrlist) ); @@ -4206,11 +4233,11 @@ int directory_tests( void * the_argp ) if ( my_fd != -1 ) close( my_fd ); if ( my_pathp != NULL ) { - remove( my_pathp ); - free( my_pathp ); + remove( my_pathp ); + vm_deallocate(mach_task_self(), (vm_address_t)my_pathp, PATH_MAX); } if ( my_bufp != NULL ) { - free( my_bufp ); + vm_deallocate(mach_task_self(), (vm_address_t)my_bufp, (1024 * 5)); } return( my_err ); } @@ -4229,6 +4256,7 @@ int exchangedata_test( void * the_argp ) ssize_t my_result; char my_buffer[16]; struct statfs my_statfs_buf; + kern_return_t my_kr; /* need to know type of file system */ my_err = statfs( &g_target_path[0], &my_statfs_buf ); @@ -4242,11 +4270,12 @@ int exchangedata_test( void * the_argp ) goto test_passed_exit; } - my_file1_pathp = (char *) malloc( PATH_MAX ); - if ( my_file1_pathp == NULL ) { - printf( "malloc failed with error %d - \"%s\" \n", errno, strerror( errno) ); - goto test_failed_exit; - } + my_kr = vm_allocate((vm_map_t) mach_task_self(), (vm_address_t*)&my_file1_pathp, PATH_MAX, VM_FLAGS_ANYWHERE); + if(my_kr != KERN_SUCCESS){ + printf( "vm_allocate failed with error %d - \"%s\" \n", errno, strerror( errno) ); + goto test_failed_exit; + } + *my_file1_pathp = 0x00; strcat( my_file1_pathp, &g_target_path[0] ); strcat( my_file1_pathp, "/" ); @@ -4268,11 +4297,12 @@ int exchangedata_test( void * the_argp ) goto test_failed_exit; } - my_file2_pathp = (char *) malloc( PATH_MAX ); - if ( my_file2_pathp == NULL ) { - printf( "malloc failed with error %d - \"%s\" \n", errno, strerror( errno) ); - goto test_failed_exit; - } + my_kr = vm_allocate((vm_map_t) mach_task_self(), (vm_address_t*)&my_file2_pathp, PATH_MAX, VM_FLAGS_ANYWHERE); + if(my_kr != KERN_SUCCESS){ + printf( "vm_allocate failed with error %d - \"%s\" \n", errno, strerror( errno) ); + goto test_failed_exit; + } + *my_file2_pathp = 0x00; strcat( my_file2_pathp, &g_target_path[0] ); strcat( my_file2_pathp, "/" ); @@ -4333,14 +4363,14 @@ int exchangedata_test( void * the_argp ) if ( my_fd1 != -1 ) close( my_fd1 ); if ( my_file1_pathp != NULL ) { - remove( my_file1_pathp ); - free( my_file1_pathp ); + remove( my_file1_pathp ); + vm_deallocate(mach_task_self(), (vm_address_t)my_file1_pathp, PATH_MAX); } if ( my_fd2 != -1 ) close( my_fd2 ); if ( my_file2_pathp != NULL ) { - remove( my_file2_pathp ); - free( my_file2_pathp ); + remove( my_file2_pathp ); + vm_deallocate(mach_task_self(), (vm_address_t)my_file2_pathp, PATH_MAX); } return( my_err ); } @@ -4373,10 +4403,11 @@ typedef struct packed_result packed_result; typedef struct packed_result * packed_result_p; #define MAX_MATCHES 10 +#define MAX_EBUSY_RETRIES 5 int searchfs_test( void * the_argp ) { - int my_err, my_items_found = 0; + int my_err, my_items_found = 0, my_ebusy_count; char * my_pathp = NULL; unsigned long my_matches; unsigned long my_search_options; @@ -4387,6 +4418,7 @@ int searchfs_test( void * the_argp ) struct packed_attr_ref my_info2; packed_result my_result_buffer[ MAX_MATCHES ]; struct statfs my_statfs_buf; + kern_return_t my_kr; /* need to know type of file system */ my_err = statfs( &g_target_path[0], &my_statfs_buf ); @@ -4400,11 +4432,12 @@ int searchfs_test( void * the_argp ) goto test_passed_exit; } - my_pathp = (char *) malloc( PATH_MAX ); - if ( my_pathp == NULL ) { - printf( "malloc failed with error %d - \"%s\" \n", errno, strerror( errno) ); - goto test_failed_exit; - } + my_kr = vm_allocate((vm_map_t) mach_task_self(), (vm_address_t*)&my_pathp, PATH_MAX, VM_FLAGS_ANYWHERE); + if(my_kr != KERN_SUCCESS){ + printf( "vm_allocate failed with error %d - \"%s\" \n", errno, strerror( errno) ); + goto test_failed_exit; + } + *my_pathp = 0x00; strcat( my_pathp, &g_target_path[0] ); strcat( my_pathp, "/" ); @@ -4433,9 +4466,14 @@ int searchfs_test( void * the_argp ) printf( "failed to create a test file name in \"%s\" \n", my_pathp ); goto test_failed_exit; } - + + /* EBUSY count updated below the catalogue_changed label */ + my_ebusy_count = 0; + +catalogue_changed: /* search target volume for all file system objects with "foo" in the name */ /* Set up the attributes we're searching on. */ + my_items_found = 0; /* Set this here in case we're completely restarting */ my_search_blk.searchattrs.bitmapcount = ATTR_BIT_MAP_COUNT; my_search_blk.searchattrs.reserved = 0; my_search_blk.searchattrs.commonattr = ATTR_CMN_NAME; @@ -4524,6 +4562,11 @@ int searchfs_test( void * the_argp ) break; } } + + /* EBUSY indicates catalogue change; retry a few times. */ + if ((my_err == EBUSY) && (my_ebusy_count++ < MAX_EBUSY_RETRIES)) { + goto catalogue_changed; + } if ( !(my_err == 0 || my_err == EAGAIN) ) { printf( "searchfs failed with error %d - \"%s\" \n", my_err, strerror( my_err) ); } @@ -4554,8 +4597,8 @@ int searchfs_test( void * the_argp ) remove( my_pathp ); *my_ptr = 0x00; strcat( my_pathp, "xxxfoo" ); - remove( my_pathp ); - free( my_pathp ); + remove( my_pathp ); + vm_deallocate(mach_task_self(), (vm_address_t)my_pathp, PATH_MAX); } return( my_err ); } @@ -4580,6 +4623,7 @@ int aio_tests( void * the_argp ) struct aiocb * my_aiocb_list[ AIO_TESTS_OUR_COUNT ]; struct aiocb my_aiocbs[ AIO_TESTS_OUR_COUNT ]; char * my_file_paths[ AIO_TESTS_OUR_COUNT ]; + kern_return_t my_kr; /* set up to have the ability to fire off up to AIO_TESTS_OUR_COUNT async IOs at once */ memset( &my_fd_list[0], 0xFF, sizeof( my_fd_list ) ); @@ -4587,17 +4631,18 @@ int aio_tests( void * the_argp ) memset( &my_aiocb_list[0], 0x00, sizeof( my_aiocb_list ) ); memset( &my_file_paths[0], 0x00, sizeof( my_file_paths ) ); for ( i = 0; i < AIO_TESTS_OUR_COUNT; i++ ) { - my_buffers[ i ] = malloc( AIO_TESTS_BUFFER_SIZE ); - if ( my_buffers[ i ] == NULL ) { - printf( "malloc failed with error %d - \"%s\" \n", errno, strerror( errno) ); - goto test_failed_exit; - } + my_kr = vm_allocate((vm_map_t) mach_task_self(), (vm_address_t*)&my_buffers[ i ], AIO_TESTS_BUFFER_SIZE, VM_FLAGS_ANYWHERE); + if(my_kr != KERN_SUCCESS){ + printf( "vm_allocate failed with error %d - \"%s\" \n", errno, strerror( errno) ); + goto test_failed_exit; + } + + my_kr = vm_allocate((vm_map_t) mach_task_self(), (vm_address_t*)&my_file_paths[ i ], PATH_MAX, VM_FLAGS_ANYWHERE); + if(my_kr != KERN_SUCCESS){ + printf( "vm_allocate failed with error %d - \"%s\" \n", errno, strerror( errno) ); + goto test_failed_exit; + } - my_file_paths[ i ] = malloc( PATH_MAX ); - if ( my_file_paths[ i ] == NULL ) { - printf( "malloc failed with error %d - \"%s\" \n", errno, strerror( errno) ); - goto test_failed_exit; - } my_pathp = my_file_paths[ i ]; *my_pathp = 0x00; strcat( my_pathp, &g_target_path[0] ); @@ -4822,12 +4867,12 @@ int aio_tests( void * the_argp ) my_fd_list[ i ] = -1; } if ( my_file_paths[ i ] != NULL ) { - remove( my_file_paths[ i ] ); - free( my_file_paths[ i ] ); + remove( my_file_paths[ i ] ); + vm_deallocate(mach_task_self(), (vm_address_t)my_file_paths[ i ], PATH_MAX); my_file_paths[ i ] = NULL; } if ( my_buffers[ i ] != NULL ) { - free( my_buffers[ i ] ); + vm_deallocate(mach_task_self(), (vm_address_t)my_buffers[ i ], AIO_TESTS_BUFFER_SIZE); my_buffers[ i ] = NULL; } } @@ -4839,202 +4884,6 @@ int aio_tests( void * the_argp ) } -/* ************************************************************************************************************** - * Test kevent, kqueue system calls. - * ************************************************************************************************************** - */ -int kqueue_tests( void * the_argp ) -{ - int my_err, my_status; - int my_kqueue = -1; - int my_fd = -1; - char * my_pathp = NULL; - pid_t my_pid, my_wait_pid; - size_t my_count; - int my_sockets[ 2 ] = {-1, -1}; - struct kevent my_kevent; - struct timespec my_timeout; - char my_buffer[ 16 ]; - - my_pathp = (char *) malloc( PATH_MAX ); - if ( my_pathp == NULL ) { - printf( "malloc failed with error %d - \"%s\" \n", errno, strerror( errno) ); - goto test_failed_exit; - } - *my_pathp = 0x00; - strcat( my_pathp, &g_target_path[0] ); - strcat( my_pathp, "/" ); - - /* create a test file */ - my_err = create_random_name( my_pathp, 1 ); - if ( my_err != 0 ) { - goto test_failed_exit; - } - - my_fd = open( my_pathp, O_RDWR, 0 ); - if ( my_fd == -1 ) { - printf( "open call failed with error %d - \"%s\" \n", errno, strerror( errno) ); - goto test_failed_exit; - } - - my_err = socketpair( AF_UNIX, SOCK_STREAM, 0, &my_sockets[0] ); - if ( my_err == -1 ) { - printf( "socketpair failed with errno %d - %s \n", errno, strerror( errno ) ); - goto test_failed_exit; - } - - /* fork here and use pipe to communicate */ - my_pid = fork( ); - if ( my_pid == -1 ) { - printf( "fork failed with errno %d - %s \n", errno, strerror( errno ) ); - goto test_failed_exit; - } - else if ( my_pid == 0 ) { - /* - * child process - tell parent we are ready to go. - */ - my_count = write( my_sockets[1], "r", 1 ); - if ( my_count == -1 ) { - printf( "write call failed. got errno %d - %s. \n", errno, strerror( errno ) ); - exit( -1 ); - } - - my_count = read( my_sockets[1], &my_buffer[0], 1 ); - if ( my_count == -1 ) { - printf( "read call failed with error %d - \"%s\" \n", errno, strerror( errno) ); - exit( -1 ); - } - if ( my_buffer[0] != 'g' ) { - printf( "read call on socket failed to get \"all done\" message \n" ); - exit( -1 ); - } - - /* now do some work that will trigger events our parent will track */ - my_count = write( my_fd, "11111111", 8 ); - if ( my_count == -1 ) { - printf( "write call failed with error %d - \"%s\" \n", errno, strerror( errno) ); - exit( -1 ); - } - - my_err = unlink( my_pathp ); - if ( my_err == -1 ) { - printf( "unlink failed with error %d - \"%s\" \n", errno, strerror( errno) ); - exit( -1 ); - } - - /* wait for parent to tell us to exit */ - my_count = read( my_sockets[1], &my_buffer[0], 1 ); - if ( my_count == -1 ) { - printf( "read call failed with error %d - \"%s\" \n", errno, strerror( errno) ); - exit( -1 ); - } - if ( my_buffer[0] != 'e' ) { - printf( "read call on socket failed to get \"all done\" message \n" ); - exit( -1 ); - } - exit(0); - } - - /* parent process - wait for child to spin up */ - my_count = read( my_sockets[0], &my_buffer[0], sizeof(my_buffer) ); - if ( my_count == -1 ) { - printf( "read call failed with error %d - \"%s\" \n", errno, strerror( errno) ); - goto test_failed_exit; - } - if ( my_buffer[0] != 'r' ) { - printf( "read call on socket failed to get \"ready to go message\" \n" ); - goto test_failed_exit; - } - - /* set up a kqueue and register for some events */ - my_kqueue = kqueue( ); - if ( my_kqueue == -1 ) { - printf( "kqueue call failed with error %d - \"%s\" \n", errno, strerror( errno) ); - goto test_failed_exit; - } - - /* look for our test file to get unlinked or written to */ - EV_SET( &my_kevent, my_fd, EVFILT_VNODE, (EV_ADD | EV_CLEAR), (NOTE_DELETE | NOTE_WRITE), 0, 0 ); - - my_timeout.tv_sec = 0; - my_timeout.tv_nsec = 0; - my_err = kevent( my_kqueue, &my_kevent, 1, NULL, 0, &my_timeout); - if ( my_err == -1 ) { - printf( "kevent call to register events failed with error %d - \"%s\" \n", errno, strerror( errno) ); - goto test_failed_exit; - } - - /* tell child to get to work */ - my_count = write( my_sockets[0], "g", 1 ); - if ( my_count == -1 ) { - printf( "write call failed. got errno %d - %s. \n", errno, strerror( errno ) ); - goto test_failed_exit; - } - - /* go get vnode events */ - EV_SET( &my_kevent, my_fd, EVFILT_VNODE, (EV_CLEAR), 0, 0, 0 ); - my_err = kevent( my_kqueue, NULL, 0, &my_kevent, 1, NULL ); - if ( my_err == -1 ) { - printf( "kevent call to get vnode events failed with error %d - \"%s\" \n", errno, strerror( errno) ); - goto test_failed_exit; - } - if ( my_err == 0 ) { - printf( "kevent call to get vnode events did not return any when it should have \n" ); - goto test_failed_exit; - } - if ( (my_kevent.fflags & (NOTE_DELETE | NOTE_WRITE)) == 0 ) { - printf( "kevent call to get vnode events did not return NOTE_DELETE or NOTE_WRITE \n" ); - printf( "fflags 0x%02X \n", my_kevent.fflags ); - goto test_failed_exit; - } - - /* tell child to get to exit */ - my_count = write( my_sockets[0], "e", 1 ); - if ( my_count == -1 ) { - printf( "write call failed. got errno %d - %s. \n", errno, strerror( errno ) ); - goto test_failed_exit; - } - - my_wait_pid = wait4( my_pid, &my_status, 0, NULL ); - if ( my_wait_pid == -1 ) { - printf( "wait4 failed with errno %d - %s \n", errno, strerror( errno ) ); - goto test_failed_exit; - } - - /* wait4 should return our child's pid when it exits */ - if ( my_wait_pid != my_pid ) { - printf( "wait4 did not return child pid - returned %d should be %d \n", my_wait_pid, my_pid ); - goto test_failed_exit; - } - - if ( WIFEXITED( my_status ) && WEXITSTATUS( my_status ) != 0 ) { - printf( "wait4 returned wrong exit status - 0x%02X \n", my_status ); - goto test_failed_exit; - } - - my_err = 0; - goto test_passed_exit; - -test_failed_exit: - my_err = -1; - -test_passed_exit: - if ( my_sockets[0] != -1 ) - close( my_sockets[0] ); - if ( my_sockets[1] != -1 ) - close( my_sockets[1] ); - if ( my_kqueue != -1 ) - close( my_kqueue ); - if ( my_fd != -1 ) - close( my_fd ); - if ( my_pathp != NULL ) { - remove( my_pathp ); - free( my_pathp ); - } - return( my_err ); -} - - /* ************************************************************************************************************** * Test msgctl, msgget, msgrcv, msgsnd system calls. * ************************************************************************************************************** @@ -5144,6 +4993,60 @@ int message_queue_tests( void * the_argp ) } +/* ************************************************************************************************************** + * Test execution from data and stack areas. + * ************************************************************************************************************** + */ +int data_exec_tests( void * the_argp ) +{ + int my_err = 0; + int arch, bits; + + if ((arch = get_architecture()) == -1) { + printf("data_exec_test: couldn't determine architecture\n"); + goto test_failed_exit; + } + + bits = get_bits(); + + /* + * If the machine is 64-bit capable, run both the 32 and 64 bit versions of the test. + * Otherwise, just run the 32-bit version. + */ + + if (arch == INTEL) { + if (bits == 64) { + if (system("arch -arch x86_64 helpers/data_exec") != 0) { + printf("data_exec-x86_64 failed\n"); + goto test_failed_exit; + } + } + + if (system("arch -arch i386 helpers/data_exec") != 0) { + printf("data_exec-i386 failed\n"); + goto test_failed_exit; + } + } + + if (arch == POWERPC) { + if (system("arch -arch ppc helpers/data_exec") != 0) { + printf("data_exec-ppc failed\n"); + goto test_failed_exit; + } + } + + /* Add new architectures here similar to the above. */ + + goto test_passed_exit; + +test_failed_exit: + my_err = -1; + +test_passed_exit: + return my_err; +} + + #if TEST_SYSTEM_CALLS /* ************************************************************************************************************** @@ -5155,12 +5058,14 @@ int sample_test( void * the_argp ) int my_err; int my_fd = -1; char * my_pathp = NULL; + kern_return_t my_kr; + + my_kr = vm_allocate((vm_map_t) mach_task_self(), (vm_address_t*)&my_pathp, PATH_MAX, VM_FLAGS_ANYWHERE); + if(my_kr != KERN_SUCCESS){ + printf( "vm_allocate failed with error %d - \"%s\" \n", errno, strerror( errno) ); + goto test_failed_exit; + } - my_pathp = (char *) malloc( PATH_MAX ); - if ( my_pathp == NULL ) { - printf( "malloc failed with error %d - \"%s\" \n", errno, strerror( errno) ); - goto test_failed_exit; - } *my_pathp = 0x00; strcat( my_pathp, &g_target_path[0] ); strcat( my_pathp, "/" ); @@ -5184,8 +5089,8 @@ int sample_test( void * the_argp ) if ( my_fd != -1 ) close( my_fd ); if ( my_pathp != NULL ) { - remove( my_pathp ); - free( my_pathp ); + remove( my_pathp ); + vm_deallocate(mach_task_self(), (vm_address_t)my_pathp, PATH_MAX); } return( my_err ); } diff --git a/tools/tests/xnu_quick_test/tests.h b/tools/tests/xnu_quick_test/tests.h index 268ab0e34..f93240152 100644 --- a/tools/tests/xnu_quick_test/tests.h +++ b/tools/tests/xnu_quick_test/tests.h @@ -1,9 +1,6 @@ #ifndef _TESTS_H_ #define _TESTS_H_ -#ifndef DEBUG -#define DEBUG 0 -#endif #ifndef CONFORMANCE_TESTS_IN_XNU #define CONFORMANCE_TESTS_IN_XNU 0 #endif @@ -19,6 +16,7 @@ #include #include #include +#include /* Used to support printf() in misc.c */ #include /* Used to determine host properties */ #include #include @@ -82,7 +80,7 @@ int get_bits(void); /* 64 or 32 */ int getlogin_setlogin_test( void * the_argp ); int getpid_getppid_pipe_test( void * the_argp ); int getpriority_setpriority_test( void * the_argp ); -int getrusage_profil_test( void * the_argp ); +int getrusage_test( void * the_argp ); int groups_test( void * the_argp ); int ioctl_test( void * the_argp ); int kqueue_tests( void * the_argp ); @@ -110,6 +108,10 @@ int syscall_test( void * the_argp ); int time_tests( void * the_argp ); int uid_tests( void * the_argp ); int xattr_tests( void * the_argp ); +int data_exec_tests( void * the_argp ); +int machvm_tests( void * the_argp ); +int getdirentries_test( void * the_argp ); +int statfs_32bit_inode_tests( void * the_argp ); struct test_entry { @@ -120,4 +122,8 @@ struct test_entry }; typedef struct test_entry * test_entryp; +/* Special replacement printf with date/time stamp */ +int my_printf(const char * __restrict fmt, ...); +#define printf my_printf + #endif /* !_TESTS_H_ */ diff --git a/tools/tests/xnu_quick_test/xattr_tests.c b/tools/tests/xnu_quick_test/xattr_tests.c index bcd83802b..b3248edc2 100644 --- a/tools/tests/xnu_quick_test/xattr_tests.c +++ b/tools/tests/xnu_quick_test/xattr_tests.c @@ -9,6 +9,7 @@ #include "tests.h" #include +#include extern char g_target_path[ PATH_MAX ]; @@ -26,12 +27,14 @@ int xattr_tests( void * the_argp ) ssize_t my_result; char my_buffer[ 64 ]; char my_xattr_data[ ] = "xattr_foo"; + kern_return_t my_kr; + + my_kr = vm_allocate((vm_map_t) mach_task_self(), (vm_address_t*)&my_pathp, PATH_MAX, VM_FLAGS_ANYWHERE); + if(my_kr != KERN_SUCCESS){ + printf( "vm_allocate failed with error %d - \"%s\" \n", errno, strerror( errno) ); + goto test_failed_exit; + } - my_pathp = (char *) malloc( PATH_MAX ); - if ( my_pathp == NULL ) { - printf( "malloc failed with error %d - \"%s\" \n", errno, strerror( errno) ); - goto test_failed_exit; - } *my_pathp = 0x00; strcat( my_pathp, &g_target_path[0] ); strcat( my_pathp, "/" ); @@ -156,8 +159,8 @@ int xattr_tests( void * the_argp ) if ( my_fd != -1 ) close( my_fd ); if ( my_pathp != NULL ) { - remove( my_pathp ); - free( my_pathp ); + remove( my_pathp ); + vm_deallocate(mach_task_self(), (vm_address_t)my_pathp, PATH_MAX); } return( my_err ); }